xref: /netbsd-src/lib/libc/stdlib/jemalloc.c (revision a536ee5124e62c9a0051a252f7833dc8f50f44c9)
1 /*	$NetBSD: jemalloc.c,v 1.28 2012/03/21 14:32:22 christos Exp $	*/
2 
3 /*-
4  * Copyright (C) 2006,2007 Jason Evans <jasone@FreeBSD.org>.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice(s), this list of conditions and the following disclaimer as
12  *    the first lines of this file unmodified other than the possible
13  *    addition of one or more copyright notices.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice(s), this list of conditions and the following disclaimer in
16  *    the documentation and/or other materials provided with the
17  *    distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY
20  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE
23  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
26  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
27  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
28  * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
29  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30  *
31  *******************************************************************************
32  *
33  * This allocator implementation is designed to provide scalable performance
34  * for multi-threaded programs on multi-processor systems.  The following
35  * features are included for this purpose:
36  *
37  *   + Multiple arenas are used if there are multiple CPUs, which reduces lock
38  *     contention and cache sloshing.
39  *
40  *   + Cache line sharing between arenas is avoided for internal data
41  *     structures.
42  *
43  *   + Memory is managed in chunks and runs (chunks can be split into runs),
44  *     rather than as individual pages.  This provides a constant-time
45  *     mechanism for associating allocations with particular arenas.
46  *
47  * Allocation requests are rounded up to the nearest size class, and no record
48  * of the original request size is maintained.  Allocations are broken into
49  * categories according to size class.  Assuming runtime defaults, 4 kB pages
50  * and a 16 byte quantum, the size classes in each category are as follows:
51  *
52  *   |=====================================|
53  *   | Category | Subcategory    |    Size |
54  *   |=====================================|
55  *   | Small    | Tiny           |       2 |
56  *   |          |                |       4 |
57  *   |          |                |       8 |
58  *   |          |----------------+---------|
59  *   |          | Quantum-spaced |      16 |
60  *   |          |                |      32 |
61  *   |          |                |      48 |
62  *   |          |                |     ... |
63  *   |          |                |     480 |
64  *   |          |                |     496 |
65  *   |          |                |     512 |
66  *   |          |----------------+---------|
67  *   |          | Sub-page       |    1 kB |
68  *   |          |                |    2 kB |
69  *   |=====================================|
70  *   | Large                     |    4 kB |
71  *   |                           |    8 kB |
72  *   |                           |   12 kB |
73  *   |                           |     ... |
74  *   |                           | 1012 kB |
75  *   |                           | 1016 kB |
76  *   |                           | 1020 kB |
77  *   |=====================================|
78  *   | Huge                      |    1 MB |
79  *   |                           |    2 MB |
80  *   |                           |    3 MB |
81  *   |                           |     ... |
82  *   |=====================================|
83  *
84  * A different mechanism is used for each category:
85  *
86  *   Small : Each size class is segregated into its own set of runs.  Each run
87  *           maintains a bitmap of which regions are free/allocated.
88  *
89  *   Large : Each allocation is backed by a dedicated run.  Metadata are stored
90  *           in the associated arena chunk header maps.
91  *
92  *   Huge : Each allocation is backed by a dedicated contiguous set of chunks.
93  *          Metadata are stored in a separate red-black tree.
94  *
95  *******************************************************************************
96  */
97 
98 /* LINTLIBRARY */
99 
100 #ifdef __NetBSD__
101 #  define xutrace(a, b)		utrace("malloc", (a), (b))
102 #  define __DECONST(x, y)	((x)__UNCONST(y))
103 #  define NO_TLS
104 #else
105 #  define xutrace(a, b)		utrace((a), (b))
106 #endif	/* __NetBSD__ */
107 
108 /*
109  * MALLOC_PRODUCTION disables assertions and statistics gathering.  It also
110  * defaults the A and J runtime options to off.  These settings are appropriate
111  * for production systems.
112  */
113 #define MALLOC_PRODUCTION
114 
115 #ifndef MALLOC_PRODUCTION
116 #  define MALLOC_DEBUG
117 #endif
118 
119 #include <sys/cdefs.h>
120 /* __FBSDID("$FreeBSD: src/lib/libc/stdlib/malloc.c,v 1.147 2007/06/15 22:00:16 jasone Exp $"); */
121 __RCSID("$NetBSD: jemalloc.c,v 1.28 2012/03/21 14:32:22 christos Exp $");
122 
123 #ifdef __FreeBSD__
124 #include "libc_private.h"
125 #ifdef MALLOC_DEBUG
126 #  define _LOCK_DEBUG
127 #endif
128 #include "spinlock.h"
129 #endif
130 #include "namespace.h"
131 #include <sys/mman.h>
132 #include <sys/param.h>
133 #ifdef __FreeBSD__
134 #include <sys/stddef.h>
135 #endif
136 #include <sys/time.h>
137 #include <sys/types.h>
138 #include <sys/sysctl.h>
139 #include <sys/tree.h>
140 #include <sys/uio.h>
141 #include <sys/ktrace.h> /* Must come after several other sys/ includes. */
142 
143 #ifdef __FreeBSD__
144 #include <machine/atomic.h>
145 #include <machine/cpufunc.h>
146 #endif
147 #include <machine/vmparam.h>
148 
149 #include <errno.h>
150 #include <limits.h>
151 #include <pthread.h>
152 #include <sched.h>
153 #include <stdarg.h>
154 #include <stdbool.h>
155 #include <stdio.h>
156 #include <stdint.h>
157 #include <stdlib.h>
158 #include <string.h>
159 #include <strings.h>
160 #include <unistd.h>
161 
162 #ifdef __NetBSD__
163 #  include <reentrant.h>
164 #  include "extern.h"
165 
166 #define STRERROR_R(a, b, c)	__strerror_r(a, b, c);
167 /*
168  * A non localized version of strerror, that avoids bringing in
169  * stdio and the locale code. All the malloc messages are in English
170  * so why bother?
171  */
172 static int
173 __strerror_r(int e, char *s, size_t l)
174 {
175 	int rval;
176 	size_t slen;
177 
178 	if (e >= 0 && e < sys_nerr) {
179 		slen = strlcpy(s, sys_errlist[e], l);
180 		rval = 0;
181 	} else {
182 		slen = snprintf_ss(s, l, "Unknown error %u", e);
183 		rval = EINVAL;
184 	}
185 	return slen >= l ? ERANGE : rval;
186 }
187 #endif
188 
189 #ifdef __FreeBSD__
190 #define STRERROR_R(a, b, c)	strerror_r(a, b, c);
191 #include "un-namespace.h"
192 #endif
193 
194 /* MALLOC_STATS enables statistics calculation. */
195 #ifndef MALLOC_PRODUCTION
196 #  define MALLOC_STATS
197 #endif
198 
199 #ifdef MALLOC_DEBUG
200 #  ifdef NDEBUG
201 #    undef NDEBUG
202 #  endif
203 #else
204 #  ifndef NDEBUG
205 #    define NDEBUG
206 #  endif
207 #endif
208 #include <assert.h>
209 
210 #ifdef MALLOC_DEBUG
211    /* Disable inlining to make debugging easier. */
212 #  define inline
213 #endif
214 
215 /* Size of stack-allocated buffer passed to strerror_r(). */
216 #define	STRERROR_BUF		64
217 
218 /* Minimum alignment of allocations is 2^QUANTUM_2POW_MIN bytes. */
219 #ifdef __i386__
220 #  define QUANTUM_2POW_MIN	4
221 #  define SIZEOF_PTR_2POW	2
222 #  define USE_BRK
223 #endif
224 #ifdef __ia64__
225 #  define QUANTUM_2POW_MIN	4
226 #  define SIZEOF_PTR_2POW	3
227 #endif
228 #ifdef __alpha__
229 #  define QUANTUM_2POW_MIN	4
230 #  define SIZEOF_PTR_2POW	3
231 #  define NO_TLS
232 #endif
233 #ifdef __sparc64__
234 #  define QUANTUM_2POW_MIN	4
235 #  define SIZEOF_PTR_2POW	3
236 #  define NO_TLS
237 #endif
238 #ifdef __amd64__
239 #  define QUANTUM_2POW_MIN	4
240 #  define SIZEOF_PTR_2POW	3
241 #endif
242 #ifdef __arm__
243 #  define QUANTUM_2POW_MIN	3
244 #  define SIZEOF_PTR_2POW	2
245 #  define USE_BRK
246 #  define NO_TLS
247 #endif
248 #ifdef __powerpc__
249 #  define QUANTUM_2POW_MIN	4
250 #  define SIZEOF_PTR_2POW	2
251 #  define USE_BRK
252 #endif
253 #if defined(__sparc__) && !defined(__sparc64__)
254 #  define QUANTUM_2POW_MIN	4
255 #  define SIZEOF_PTR_2POW	2
256 #  define USE_BRK
257 #endif
258 #ifdef __vax__
259 #  define QUANTUM_2POW_MIN	4
260 #  define SIZEOF_PTR_2POW	2
261 #  define USE_BRK
262 #endif
263 #ifdef __sh__
264 #  define QUANTUM_2POW_MIN	4
265 #  define SIZEOF_PTR_2POW	2
266 #  define USE_BRK
267 #endif
268 #ifdef __m68k__
269 #  define QUANTUM_2POW_MIN	4
270 #  define SIZEOF_PTR_2POW	2
271 #  define USE_BRK
272 #endif
273 #ifdef __mips__
274 #  define QUANTUM_2POW_MIN	4
275 #  define SIZEOF_PTR_2POW	2
276 #  define USE_BRK
277 #endif
278 #ifdef __hppa__
279 #  define QUANTUM_2POW_MIN     4
280 #  define SIZEOF_PTR_2POW      2
281 #  define USE_BRK
282 #endif
283 
284 #define	SIZEOF_PTR		(1 << SIZEOF_PTR_2POW)
285 
286 /* sizeof(int) == (1 << SIZEOF_INT_2POW). */
287 #ifndef SIZEOF_INT_2POW
288 #  define SIZEOF_INT_2POW	2
289 #endif
290 
291 /* We can't use TLS in non-PIC programs, since TLS relies on loader magic. */
292 #if (!defined(PIC) && !defined(NO_TLS))
293 #  define NO_TLS
294 #endif
295 
296 /*
297  * Size and alignment of memory chunks that are allocated by the OS's virtual
298  * memory system.
299  */
300 #define	CHUNK_2POW_DEFAULT	20
301 
302 /*
303  * Maximum size of L1 cache line.  This is used to avoid cache line aliasing,
304  * so over-estimates are okay (up to a point), but under-estimates will
305  * negatively affect performance.
306  */
307 #define	CACHELINE_2POW		6
308 #define	CACHELINE		((size_t)(1 << CACHELINE_2POW))
309 
310 /* Smallest size class to support. */
311 #define	TINY_MIN_2POW		1
312 
313 /*
314  * Maximum size class that is a multiple of the quantum, but not (necessarily)
315  * a power of 2.  Above this size, allocations are rounded up to the nearest
316  * power of 2.
317  */
318 #define	SMALL_MAX_2POW_DEFAULT	9
319 #define	SMALL_MAX_DEFAULT	(1 << SMALL_MAX_2POW_DEFAULT)
320 
321 /*
322  * RUN_MAX_OVRHD indicates maximum desired run header overhead.  Runs are sized
323  * as small as possible such that this setting is still honored, without
324  * violating other constraints.  The goal is to make runs as small as possible
325  * without exceeding a per run external fragmentation threshold.
326  *
327  * We use binary fixed point math for overhead computations, where the binary
328  * point is implicitly RUN_BFP bits to the left.
329  *
330  * Note that it is possible to set RUN_MAX_OVRHD low enough that it cannot be
331  * honored for some/all object sizes, since there is one bit of header overhead
332  * per object (plus a constant).  This constraint is relaxed (ignored) for runs
333  * that are so small that the per-region overhead is greater than:
334  *
335  *   (RUN_MAX_OVRHD / (reg_size << (3+RUN_BFP))
336  */
337 #define RUN_BFP			12
338 /*                              \/   Implicit binary fixed point. */
339 #define RUN_MAX_OVRHD		0x0000003dU
340 #define RUN_MAX_OVRHD_RELAX	0x00001800U
341 
342 /* Put a cap on small object run size.  This overrides RUN_MAX_OVRHD. */
343 #define RUN_MAX_SMALL_2POW	15
344 #define RUN_MAX_SMALL		(1 << RUN_MAX_SMALL_2POW)
345 
346 /******************************************************************************/
347 
348 #ifdef __FreeBSD__
349 /*
350  * Mutexes based on spinlocks.  We can't use normal pthread mutexes, because
351  * they require malloc()ed memory.
352  */
353 typedef struct {
354 	spinlock_t	lock;
355 } malloc_mutex_t;
356 
357 /* Set to true once the allocator has been initialized. */
358 static bool malloc_initialized = false;
359 
360 /* Used to avoid initialization races. */
361 static malloc_mutex_t init_lock = {_SPINLOCK_INITIALIZER};
362 #else
363 #define	malloc_mutex_t	mutex_t
364 
365 /* Set to true once the allocator has been initialized. */
366 static bool malloc_initialized = false;
367 
368 /* Used to avoid initialization races. */
369 static mutex_t init_lock = MUTEX_INITIALIZER;
370 #endif
371 
372 /******************************************************************************/
373 /*
374  * Statistics data structures.
375  */
376 
377 #ifdef MALLOC_STATS
378 
379 typedef struct malloc_bin_stats_s malloc_bin_stats_t;
380 struct malloc_bin_stats_s {
381 	/*
382 	 * Number of allocation requests that corresponded to the size of this
383 	 * bin.
384 	 */
385 	uint64_t	nrequests;
386 
387 	/* Total number of runs created for this bin's size class. */
388 	uint64_t	nruns;
389 
390 	/*
391 	 * Total number of runs reused by extracting them from the runs tree for
392 	 * this bin's size class.
393 	 */
394 	uint64_t	reruns;
395 
396 	/* High-water mark for this bin. */
397 	unsigned long	highruns;
398 
399 	/* Current number of runs in this bin. */
400 	unsigned long	curruns;
401 };
402 
403 typedef struct arena_stats_s arena_stats_t;
404 struct arena_stats_s {
405 	/* Number of bytes currently mapped. */
406 	size_t		mapped;
407 
408 	/* Per-size-category statistics. */
409 	size_t		allocated_small;
410 	uint64_t	nmalloc_small;
411 	uint64_t	ndalloc_small;
412 
413 	size_t		allocated_large;
414 	uint64_t	nmalloc_large;
415 	uint64_t	ndalloc_large;
416 };
417 
418 typedef struct chunk_stats_s chunk_stats_t;
419 struct chunk_stats_s {
420 	/* Number of chunks that were allocated. */
421 	uint64_t	nchunks;
422 
423 	/* High-water mark for number of chunks allocated. */
424 	unsigned long	highchunks;
425 
426 	/*
427 	 * Current number of chunks allocated.  This value isn't maintained for
428 	 * any other purpose, so keep track of it in order to be able to set
429 	 * highchunks.
430 	 */
431 	unsigned long	curchunks;
432 };
433 
434 #endif /* #ifdef MALLOC_STATS */
435 
436 /******************************************************************************/
437 /*
438  * Chunk data structures.
439  */
440 
441 /* Tree of chunks. */
442 typedef struct chunk_node_s chunk_node_t;
443 struct chunk_node_s {
444 	/* Linkage for the chunk tree. */
445 	RB_ENTRY(chunk_node_s) link;
446 
447 	/*
448 	 * Pointer to the chunk that this tree node is responsible for.  In some
449 	 * (but certainly not all) cases, this data structure is placed at the
450 	 * beginning of the corresponding chunk, so this field may point to this
451 	 * node.
452 	 */
453 	void	*chunk;
454 
455 	/* Total chunk size. */
456 	size_t	size;
457 };
458 typedef struct chunk_tree_s chunk_tree_t;
459 RB_HEAD(chunk_tree_s, chunk_node_s);
460 
461 /******************************************************************************/
462 /*
463  * Arena data structures.
464  */
465 
466 typedef struct arena_s arena_t;
467 typedef struct arena_bin_s arena_bin_t;
468 
469 typedef struct arena_chunk_map_s arena_chunk_map_t;
470 struct arena_chunk_map_s {
471 	/* Number of pages in run. */
472 	uint32_t	npages;
473 	/*
474 	 * Position within run.  For a free run, this is POS_FREE for the first
475 	 * and last pages.  The POS_FREE special value makes it possible to
476 	 * quickly coalesce free runs.
477 	 *
478 	 * This is the limiting factor for chunksize; there can be at most 2^31
479 	 * pages in a run.
480 	 */
481 #define POS_FREE ((uint32_t)0xffffffffU)
482 	uint32_t	pos;
483 };
484 
485 /* Arena chunk header. */
486 typedef struct arena_chunk_s arena_chunk_t;
487 struct arena_chunk_s {
488 	/* Arena that owns the chunk. */
489 	arena_t *arena;
490 
491 	/* Linkage for the arena's chunk tree. */
492 	RB_ENTRY(arena_chunk_s) link;
493 
494 	/*
495 	 * Number of pages in use.  This is maintained in order to make
496 	 * detection of empty chunks fast.
497 	 */
498 	uint32_t pages_used;
499 
500 	/*
501 	 * Every time a free run larger than this value is created/coalesced,
502 	 * this value is increased.  The only way that the value decreases is if
503 	 * arena_run_alloc() fails to find a free run as large as advertised by
504 	 * this value.
505 	 */
506 	uint32_t max_frun_npages;
507 
508 	/*
509 	 * Every time a free run that starts at an earlier page than this value
510 	 * is created/coalesced, this value is decreased.  It is reset in a
511 	 * similar fashion to max_frun_npages.
512 	 */
513 	uint32_t min_frun_ind;
514 
515 	/*
516 	 * Map of pages within chunk that keeps track of free/large/small.  For
517 	 * free runs, only the map entries for the first and last pages are
518 	 * kept up to date, so that free runs can be quickly coalesced.
519 	 */
520 	arena_chunk_map_t map[1]; /* Dynamically sized. */
521 };
522 typedef struct arena_chunk_tree_s arena_chunk_tree_t;
523 RB_HEAD(arena_chunk_tree_s, arena_chunk_s);
524 
525 typedef struct arena_run_s arena_run_t;
526 struct arena_run_s {
527 	/* Linkage for run trees. */
528 	RB_ENTRY(arena_run_s) link;
529 
530 #ifdef MALLOC_DEBUG
531 	uint32_t	magic;
532 #  define ARENA_RUN_MAGIC 0x384adf93
533 #endif
534 
535 	/* Bin this run is associated with. */
536 	arena_bin_t	*bin;
537 
538 	/* Index of first element that might have a free region. */
539 	unsigned	regs_minelm;
540 
541 	/* Number of free regions in run. */
542 	unsigned	nfree;
543 
544 	/* Bitmask of in-use regions (0: in use, 1: free). */
545 	unsigned	regs_mask[1]; /* Dynamically sized. */
546 };
547 typedef struct arena_run_tree_s arena_run_tree_t;
548 RB_HEAD(arena_run_tree_s, arena_run_s);
549 
550 struct arena_bin_s {
551 	/*
552 	 * Current run being used to service allocations of this bin's size
553 	 * class.
554 	 */
555 	arena_run_t	*runcur;
556 
557 	/*
558 	 * Tree of non-full runs.  This tree is used when looking for an
559 	 * existing run when runcur is no longer usable.  We choose the
560 	 * non-full run that is lowest in memory; this policy tends to keep
561 	 * objects packed well, and it can also help reduce the number of
562 	 * almost-empty chunks.
563 	 */
564 	arena_run_tree_t runs;
565 
566 	/* Size of regions in a run for this bin's size class. */
567 	size_t		reg_size;
568 
569 	/* Total size of a run for this bin's size class. */
570 	size_t		run_size;
571 
572 	/* Total number of regions in a run for this bin's size class. */
573 	uint32_t	nregs;
574 
575 	/* Number of elements in a run's regs_mask for this bin's size class. */
576 	uint32_t	regs_mask_nelms;
577 
578 	/* Offset of first region in a run for this bin's size class. */
579 	uint32_t	reg0_offset;
580 
581 #ifdef MALLOC_STATS
582 	/* Bin statistics. */
583 	malloc_bin_stats_t stats;
584 #endif
585 };
586 
587 struct arena_s {
588 #ifdef MALLOC_DEBUG
589 	uint32_t		magic;
590 #  define ARENA_MAGIC 0x947d3d24
591 #endif
592 
593 	/* All operations on this arena require that mtx be locked. */
594 	malloc_mutex_t		mtx;
595 
596 #ifdef MALLOC_STATS
597 	arena_stats_t		stats;
598 #endif
599 
600 	/*
601 	 * Tree of chunks this arena manages.
602 	 */
603 	arena_chunk_tree_t	chunks;
604 
605 	/*
606 	 * In order to avoid rapid chunk allocation/deallocation when an arena
607 	 * oscillates right on the cusp of needing a new chunk, cache the most
608 	 * recently freed chunk.  This caching is disabled by opt_hint.
609 	 *
610 	 * There is one spare chunk per arena, rather than one spare total, in
611 	 * order to avoid interactions between multiple threads that could make
612 	 * a single spare inadequate.
613 	 */
614 	arena_chunk_t *spare;
615 
616 	/*
617 	 * bins is used to store rings of free regions of the following sizes,
618 	 * assuming a 16-byte quantum, 4kB pagesize, and default MALLOC_OPTIONS.
619 	 *
620 	 *   bins[i] | size |
621 	 *   --------+------+
622 	 *        0  |    2 |
623 	 *        1  |    4 |
624 	 *        2  |    8 |
625 	 *   --------+------+
626 	 *        3  |   16 |
627 	 *        4  |   32 |
628 	 *        5  |   48 |
629 	 *        6  |   64 |
630 	 *           :      :
631 	 *           :      :
632 	 *       33  |  496 |
633 	 *       34  |  512 |
634 	 *   --------+------+
635 	 *       35  | 1024 |
636 	 *       36  | 2048 |
637 	 *   --------+------+
638 	 */
639 	arena_bin_t		bins[1]; /* Dynamically sized. */
640 };
641 
642 /******************************************************************************/
643 /*
644  * Data.
645  */
646 
647 /* Number of CPUs. */
648 static unsigned		ncpus;
649 
650 /* VM page size. */
651 static size_t		pagesize;
652 static size_t		pagesize_mask;
653 static int		pagesize_2pow;
654 
655 /* Various bin-related settings. */
656 static size_t		bin_maxclass; /* Max size class for bins. */
657 static unsigned		ntbins; /* Number of (2^n)-spaced tiny bins. */
658 static unsigned		nqbins; /* Number of quantum-spaced bins. */
659 static unsigned		nsbins; /* Number of (2^n)-spaced sub-page bins. */
660 static size_t		small_min;
661 static size_t		small_max;
662 
663 /* Various quantum-related settings. */
664 static size_t		quantum;
665 static size_t		quantum_mask; /* (quantum - 1). */
666 
667 /* Various chunk-related settings. */
668 static size_t		chunksize;
669 static size_t		chunksize_mask; /* (chunksize - 1). */
670 static int		chunksize_2pow;
671 static unsigned		chunk_npages;
672 static unsigned		arena_chunk_header_npages;
673 static size_t		arena_maxclass; /* Max size class for arenas. */
674 
675 /********/
676 /*
677  * Chunks.
678  */
679 
680 /* Protects chunk-related data structures. */
681 static malloc_mutex_t	chunks_mtx;
682 
683 /* Tree of chunks that are stand-alone huge allocations. */
684 static chunk_tree_t	huge;
685 
686 #ifdef USE_BRK
687 /*
688  * Try to use brk for chunk-size allocations, due to address space constraints.
689  */
690 /*
691  * Protects sbrk() calls.  This must be separate from chunks_mtx, since
692  * base_pages_alloc() also uses sbrk(), but cannot lock chunks_mtx (doing so
693  * could cause recursive lock acquisition).
694  */
695 static malloc_mutex_t	brk_mtx;
696 /* Result of first sbrk(0) call. */
697 static void		*brk_base;
698 /* Current end of brk, or ((void *)-1) if brk is exhausted. */
699 static void		*brk_prev;
700 /* Current upper limit on brk addresses. */
701 static void		*brk_max;
702 #endif
703 
704 #ifdef MALLOC_STATS
705 /* Huge allocation statistics. */
706 static uint64_t		huge_nmalloc;
707 static uint64_t		huge_ndalloc;
708 static uint64_t		huge_nralloc;
709 static size_t		huge_allocated;
710 #endif
711 
712 /*
713  * Tree of chunks that were previously allocated.  This is used when allocating
714  * chunks, in an attempt to re-use address space.
715  */
716 static chunk_tree_t	old_chunks;
717 
718 /****************************/
719 /*
720  * base (internal allocation).
721  */
722 
723 /*
724  * Current pages that are being used for internal memory allocations.  These
725  * pages are carved up in cacheline-size quanta, so that there is no chance of
726  * false cache line sharing.
727  */
728 static void		*base_pages;
729 static void		*base_next_addr;
730 static void		*base_past_addr; /* Addr immediately past base_pages. */
731 static chunk_node_t	*base_chunk_nodes; /* LIFO cache of chunk nodes. */
732 static malloc_mutex_t	base_mtx;
733 #ifdef MALLOC_STATS
734 static size_t		base_mapped;
735 #endif
736 
737 /********/
738 /*
739  * Arenas.
740  */
741 
742 /*
743  * Arenas that are used to service external requests.  Not all elements of the
744  * arenas array are necessarily used; arenas are created lazily as needed.
745  */
746 static arena_t		**arenas;
747 static unsigned		narenas;
748 static unsigned		next_arena;
749 static malloc_mutex_t	arenas_mtx; /* Protects arenas initialization. */
750 
751 #ifndef NO_TLS
752 /*
753  * Map of pthread_self() --> arenas[???], used for selecting an arena to use
754  * for allocations.
755  */
756 static __thread arena_t	*arenas_map;
757 #define	get_arenas_map()	(arenas_map)
758 #define	set_arenas_map(x)	(arenas_map = x)
759 #else
760 static thread_key_t arenas_map_key;
761 #define	get_arenas_map()	thr_getspecific(arenas_map_key)
762 #define	set_arenas_map(x)	thr_setspecific(arenas_map_key, x)
763 #endif
764 
765 #ifdef MALLOC_STATS
766 /* Chunk statistics. */
767 static chunk_stats_t	stats_chunks;
768 #endif
769 
770 /*******************************/
771 /*
772  * Runtime configuration options.
773  */
774 const char	*_malloc_options;
775 
776 #ifndef MALLOC_PRODUCTION
777 static bool	opt_abort = true;
778 static bool	opt_junk = true;
779 #else
780 static bool	opt_abort = false;
781 static bool	opt_junk = false;
782 #endif
783 static bool	opt_hint = false;
784 static bool	opt_print_stats = false;
785 static int	opt_quantum_2pow = QUANTUM_2POW_MIN;
786 static int	opt_small_max_2pow = SMALL_MAX_2POW_DEFAULT;
787 static int	opt_chunk_2pow = CHUNK_2POW_DEFAULT;
788 static bool	opt_utrace = false;
789 static bool	opt_sysv = false;
790 static bool	opt_xmalloc = false;
791 static bool	opt_zero = false;
792 static int32_t	opt_narenas_lshift = 0;
793 
794 typedef struct {
795 	void	*p;
796 	size_t	s;
797 	void	*r;
798 } malloc_utrace_t;
799 
800 #define	UTRACE(a, b, c)							\
801 	if (opt_utrace) {						\
802 		malloc_utrace_t ut;					\
803 		ut.p = a;						\
804 		ut.s = b;						\
805 		ut.r = c;						\
806 		xutrace(&ut, sizeof(ut));				\
807 	}
808 
809 /******************************************************************************/
810 /*
811  * Begin function prototypes for non-inline static functions.
812  */
813 
814 static void	wrtmessage(const char *p1, const char *p2, const char *p3,
815 		const char *p4);
816 #ifdef MALLOC_STATS
817 static void	malloc_printf(const char *format, ...);
818 #endif
819 static char	*size_t2s(size_t x, char *s);
820 static bool	base_pages_alloc(size_t minsize);
821 static void	*base_alloc(size_t size);
822 static chunk_node_t *base_chunk_node_alloc(void);
823 static void	base_chunk_node_dealloc(chunk_node_t *node);
824 #ifdef MALLOC_STATS
825 static void	stats_print(arena_t *arena);
826 #endif
827 static void	*pages_map(void *addr, size_t size);
828 static void	*pages_map_align(void *addr, size_t size, int align);
829 static void	pages_unmap(void *addr, size_t size);
830 static void	*chunk_alloc(size_t size);
831 static void	chunk_dealloc(void *chunk, size_t size);
832 static void	arena_run_split(arena_t *arena, arena_run_t *run, size_t size);
833 static arena_chunk_t *arena_chunk_alloc(arena_t *arena);
834 static void	arena_chunk_dealloc(arena_t *arena, arena_chunk_t *chunk);
835 static arena_run_t *arena_run_alloc(arena_t *arena, size_t size);
836 static void	arena_run_dalloc(arena_t *arena, arena_run_t *run, size_t size);
837 static arena_run_t *arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin);
838 static void *arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin);
839 static size_t arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size);
840 static void	*arena_malloc(arena_t *arena, size_t size);
841 static void	*arena_palloc(arena_t *arena, size_t alignment, size_t size,
842     size_t alloc_size);
843 static size_t	arena_salloc(const void *ptr);
844 static void	*arena_ralloc(void *ptr, size_t size, size_t oldsize);
845 static void	arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr);
846 static bool	arena_new(arena_t *arena);
847 static arena_t	*arenas_extend(unsigned ind);
848 static void	*huge_malloc(size_t size);
849 static void	*huge_palloc(size_t alignment, size_t size);
850 static void	*huge_ralloc(void *ptr, size_t size, size_t oldsize);
851 static void	huge_dalloc(void *ptr);
852 static void	*imalloc(size_t size);
853 static void	*ipalloc(size_t alignment, size_t size);
854 static void	*icalloc(size_t size);
855 static size_t	isalloc(const void *ptr);
856 static void	*iralloc(void *ptr, size_t size);
857 static void	idalloc(void *ptr);
858 static void	malloc_print_stats(void);
859 static bool	malloc_init_hard(void);
860 
861 /*
862  * End function prototypes.
863  */
864 /******************************************************************************/
865 /*
866  * Begin mutex.
867  */
868 
869 #ifdef __NetBSD__
870 #define	malloc_mutex_init(m)	mutex_init(m, NULL)
871 #define	malloc_mutex_lock(m)	mutex_lock(m)
872 #define	malloc_mutex_unlock(m)	mutex_unlock(m)
873 #else	/* __NetBSD__ */
874 static inline void
875 malloc_mutex_init(malloc_mutex_t *a_mutex)
876 {
877 	static const spinlock_t lock = _SPINLOCK_INITIALIZER;
878 
879 	a_mutex->lock = lock;
880 }
881 
882 static inline void
883 malloc_mutex_lock(malloc_mutex_t *a_mutex)
884 {
885 
886 	if (__isthreaded)
887 		_SPINLOCK(&a_mutex->lock);
888 }
889 
890 static inline void
891 malloc_mutex_unlock(malloc_mutex_t *a_mutex)
892 {
893 
894 	if (__isthreaded)
895 		_SPINUNLOCK(&a_mutex->lock);
896 }
897 #endif	/* __NetBSD__ */
898 
899 /*
900  * End mutex.
901  */
902 /******************************************************************************/
903 /*
904  * Begin Utility functions/macros.
905  */
906 
907 /* Return the chunk address for allocation address a. */
908 #define	CHUNK_ADDR2BASE(a)						\
909 	((void *)((uintptr_t)(a) & ~chunksize_mask))
910 
911 /* Return the chunk offset of address a. */
912 #define	CHUNK_ADDR2OFFSET(a)						\
913 	((size_t)((uintptr_t)(a) & chunksize_mask))
914 
915 /* Return the smallest chunk multiple that is >= s. */
916 #define	CHUNK_CEILING(s)						\
917 	(((s) + chunksize_mask) & ~chunksize_mask)
918 
919 /* Return the smallest cacheline multiple that is >= s. */
920 #define	CACHELINE_CEILING(s)						\
921 	(((s) + (CACHELINE - 1)) & ~(CACHELINE - 1))
922 
923 /* Return the smallest quantum multiple that is >= a. */
924 #define	QUANTUM_CEILING(a)						\
925 	(((a) + quantum_mask) & ~quantum_mask)
926 
927 /* Return the smallest pagesize multiple that is >= s. */
928 #define	PAGE_CEILING(s)							\
929 	(((s) + pagesize_mask) & ~pagesize_mask)
930 
931 /* Compute the smallest power of 2 that is >= x. */
932 static inline size_t
933 pow2_ceil(size_t x)
934 {
935 
936 	x--;
937 	x |= x >> 1;
938 	x |= x >> 2;
939 	x |= x >> 4;
940 	x |= x >> 8;
941 	x |= x >> 16;
942 #if (SIZEOF_PTR == 8)
943 	x |= x >> 32;
944 #endif
945 	x++;
946 	return (x);
947 }
948 
949 static void
950 wrtmessage(const char *p1, const char *p2, const char *p3, const char *p4)
951 {
952 
953 	write(STDERR_FILENO, p1, strlen(p1));
954 	write(STDERR_FILENO, p2, strlen(p2));
955 	write(STDERR_FILENO, p3, strlen(p3));
956 	write(STDERR_FILENO, p4, strlen(p4));
957 }
958 
959 void	(*_malloc_message)(const char *p1, const char *p2, const char *p3,
960 	    const char *p4) = wrtmessage;
961 
962 #ifdef MALLOC_STATS
963 /*
964  * Print to stderr in such a way as to (hopefully) avoid memory allocation.
965  */
966 static void
967 malloc_printf(const char *format, ...)
968 {
969 	char buf[4096];
970 	va_list ap;
971 
972 	va_start(ap, format);
973 	vsnprintf(buf, sizeof(buf), format, ap);
974 	va_end(ap);
975 	_malloc_message(buf, "", "", "");
976 }
977 #endif
978 
979 /*
980  * We don't want to depend on vsnprintf() for production builds, since that can
981  * cause unnecessary bloat for static binaries.  size_t2s() provides minimal
982  * integer printing functionality, so that malloc_printf() use can be limited to
983  * MALLOC_STATS code.
984  */
985 #define UMAX2S_BUFSIZE	21
986 static char *
987 size_t2s(size_t x, char *s)
988 {
989 	unsigned i;
990 
991 	/* Make sure UMAX2S_BUFSIZE is large enough. */
992 	/* LINTED */
993 	assert(sizeof(size_t) <= 8);
994 
995 	i = UMAX2S_BUFSIZE - 1;
996 	s[i] = '\0';
997 	do {
998 		i--;
999 		s[i] = "0123456789"[(int)x % 10];
1000 		x /= (uintmax_t)10LL;
1001 	} while (x > 0);
1002 
1003 	return (&s[i]);
1004 }
1005 
1006 /******************************************************************************/
1007 
1008 static bool
1009 base_pages_alloc(size_t minsize)
1010 {
1011 	size_t csize = 0;
1012 
1013 #ifdef USE_BRK
1014 	/*
1015 	 * Do special brk allocation here, since base allocations don't need to
1016 	 * be chunk-aligned.
1017 	 */
1018 	if (brk_prev != (void *)-1) {
1019 		void *brk_cur;
1020 		intptr_t incr;
1021 
1022 		if (minsize != 0)
1023 			csize = CHUNK_CEILING(minsize);
1024 
1025 		malloc_mutex_lock(&brk_mtx);
1026 		do {
1027 			/* Get the current end of brk. */
1028 			brk_cur = sbrk(0);
1029 
1030 			/*
1031 			 * Calculate how much padding is necessary to
1032 			 * chunk-align the end of brk.  Don't worry about
1033 			 * brk_cur not being chunk-aligned though.
1034 			 */
1035 			incr = (intptr_t)chunksize
1036 			    - (intptr_t)CHUNK_ADDR2OFFSET(brk_cur);
1037 			assert(incr >= 0);
1038 			if ((size_t)incr < minsize)
1039 				incr += csize;
1040 
1041 			brk_prev = sbrk(incr);
1042 			if (brk_prev == brk_cur) {
1043 				/* Success. */
1044 				malloc_mutex_unlock(&brk_mtx);
1045 				base_pages = brk_cur;
1046 				base_next_addr = base_pages;
1047 				base_past_addr = (void *)((uintptr_t)base_pages
1048 				    + incr);
1049 #ifdef MALLOC_STATS
1050 				base_mapped += incr;
1051 #endif
1052 				return (false);
1053 			}
1054 		} while (brk_prev != (void *)-1);
1055 		malloc_mutex_unlock(&brk_mtx);
1056 	}
1057 	if (minsize == 0) {
1058 		/*
1059 		 * Failure during initialization doesn't matter, so avoid
1060 		 * falling through to the mmap-based page mapping code.
1061 		 */
1062 		return (true);
1063 	}
1064 #endif
1065 	assert(minsize != 0);
1066 	csize = PAGE_CEILING(minsize);
1067 	base_pages = pages_map(NULL, csize);
1068 	if (base_pages == NULL)
1069 		return (true);
1070 	base_next_addr = base_pages;
1071 	base_past_addr = (void *)((uintptr_t)base_pages + csize);
1072 #ifdef MALLOC_STATS
1073 	base_mapped += csize;
1074 #endif
1075 	return (false);
1076 }
1077 
1078 static void *
1079 base_alloc(size_t size)
1080 {
1081 	void *ret;
1082 	size_t csize;
1083 
1084 	/* Round size up to nearest multiple of the cacheline size. */
1085 	csize = CACHELINE_CEILING(size);
1086 
1087 	malloc_mutex_lock(&base_mtx);
1088 
1089 	/* Make sure there's enough space for the allocation. */
1090 	if ((uintptr_t)base_next_addr + csize > (uintptr_t)base_past_addr) {
1091 		if (base_pages_alloc(csize)) {
1092 			ret = NULL;
1093 			goto RETURN;
1094 		}
1095 	}
1096 
1097 	/* Allocate. */
1098 	ret = base_next_addr;
1099 	base_next_addr = (void *)((uintptr_t)base_next_addr + csize);
1100 
1101 RETURN:
1102 	malloc_mutex_unlock(&base_mtx);
1103 	return (ret);
1104 }
1105 
1106 static chunk_node_t *
1107 base_chunk_node_alloc(void)
1108 {
1109 	chunk_node_t *ret;
1110 
1111 	malloc_mutex_lock(&base_mtx);
1112 	if (base_chunk_nodes != NULL) {
1113 		ret = base_chunk_nodes;
1114 		/* LINTED */
1115 		base_chunk_nodes = *(chunk_node_t **)ret;
1116 		malloc_mutex_unlock(&base_mtx);
1117 	} else {
1118 		malloc_mutex_unlock(&base_mtx);
1119 		ret = (chunk_node_t *)base_alloc(sizeof(chunk_node_t));
1120 	}
1121 
1122 	return (ret);
1123 }
1124 
1125 static void
1126 base_chunk_node_dealloc(chunk_node_t *node)
1127 {
1128 
1129 	malloc_mutex_lock(&base_mtx);
1130 	/* LINTED */
1131 	*(chunk_node_t **)node = base_chunk_nodes;
1132 	base_chunk_nodes = node;
1133 	malloc_mutex_unlock(&base_mtx);
1134 }
1135 
1136 /******************************************************************************/
1137 
1138 #ifdef MALLOC_STATS
1139 static void
1140 stats_print(arena_t *arena)
1141 {
1142 	unsigned i;
1143 	int gap_start;
1144 
1145 	malloc_printf(
1146 	    "          allocated/mapped            nmalloc      ndalloc\n");
1147 
1148 	malloc_printf("small: %12zu %-12s %12llu %12llu\n",
1149 	    arena->stats.allocated_small, "", arena->stats.nmalloc_small,
1150 	    arena->stats.ndalloc_small);
1151 	malloc_printf("large: %12zu %-12s %12llu %12llu\n",
1152 	    arena->stats.allocated_large, "", arena->stats.nmalloc_large,
1153 	    arena->stats.ndalloc_large);
1154 	malloc_printf("total: %12zu/%-12zu %12llu %12llu\n",
1155 	    arena->stats.allocated_small + arena->stats.allocated_large,
1156 	    arena->stats.mapped,
1157 	    arena->stats.nmalloc_small + arena->stats.nmalloc_large,
1158 	    arena->stats.ndalloc_small + arena->stats.ndalloc_large);
1159 
1160 	malloc_printf("bins:     bin   size regs pgs  requests   newruns"
1161 	    "    reruns maxruns curruns\n");
1162 	for (i = 0, gap_start = -1; i < ntbins + nqbins + nsbins; i++) {
1163 		if (arena->bins[i].stats.nrequests == 0) {
1164 			if (gap_start == -1)
1165 				gap_start = i;
1166 		} else {
1167 			if (gap_start != -1) {
1168 				if (i > gap_start + 1) {
1169 					/* Gap of more than one size class. */
1170 					malloc_printf("[%u..%u]\n",
1171 					    gap_start, i - 1);
1172 				} else {
1173 					/* Gap of one size class. */
1174 					malloc_printf("[%u]\n", gap_start);
1175 				}
1176 				gap_start = -1;
1177 			}
1178 			malloc_printf(
1179 			    "%13u %1s %4u %4u %3u %9llu %9llu"
1180 			    " %9llu %7lu %7lu\n",
1181 			    i,
1182 			    i < ntbins ? "T" : i < ntbins + nqbins ? "Q" : "S",
1183 			    arena->bins[i].reg_size,
1184 			    arena->bins[i].nregs,
1185 			    arena->bins[i].run_size >> pagesize_2pow,
1186 			    arena->bins[i].stats.nrequests,
1187 			    arena->bins[i].stats.nruns,
1188 			    arena->bins[i].stats.reruns,
1189 			    arena->bins[i].stats.highruns,
1190 			    arena->bins[i].stats.curruns);
1191 		}
1192 	}
1193 	if (gap_start != -1) {
1194 		if (i > gap_start + 1) {
1195 			/* Gap of more than one size class. */
1196 			malloc_printf("[%u..%u]\n", gap_start, i - 1);
1197 		} else {
1198 			/* Gap of one size class. */
1199 			malloc_printf("[%u]\n", gap_start);
1200 		}
1201 	}
1202 }
1203 #endif
1204 
1205 /*
1206  * End Utility functions/macros.
1207  */
1208 /******************************************************************************/
1209 /*
1210  * Begin chunk management functions.
1211  */
1212 
1213 #ifndef lint
1214 static inline int
1215 chunk_comp(chunk_node_t *a, chunk_node_t *b)
1216 {
1217 
1218 	assert(a != NULL);
1219 	assert(b != NULL);
1220 
1221 	if ((uintptr_t)a->chunk < (uintptr_t)b->chunk)
1222 		return (-1);
1223 	else if (a->chunk == b->chunk)
1224 		return (0);
1225 	else
1226 		return (1);
1227 }
1228 
1229 /* Generate red-black tree code for chunks. */
1230 RB_GENERATE_STATIC(chunk_tree_s, chunk_node_s, link, chunk_comp);
1231 #endif
1232 
1233 static void *
1234 pages_map_align(void *addr, size_t size, int align)
1235 {
1236 	void *ret;
1237 
1238 	/*
1239 	 * We don't use MAP_FIXED here, because it can cause the *replacement*
1240 	 * of existing mappings, and we only want to create new mappings.
1241 	 */
1242 	ret = mmap(addr, size, PROT_READ | PROT_WRITE,
1243 	    MAP_PRIVATE | MAP_ANON | MAP_ALIGNED(align), -1, 0);
1244 	assert(ret != NULL);
1245 
1246 	if (ret == MAP_FAILED)
1247 		ret = NULL;
1248 	else if (addr != NULL && ret != addr) {
1249 		/*
1250 		 * We succeeded in mapping memory, but not in the right place.
1251 		 */
1252 		if (munmap(ret, size) == -1) {
1253 			char buf[STRERROR_BUF];
1254 
1255 			STRERROR_R(errno, buf, sizeof(buf));
1256 			_malloc_message(getprogname(),
1257 			    ": (malloc) Error in munmap(): ", buf, "\n");
1258 			if (opt_abort)
1259 				abort();
1260 		}
1261 		ret = NULL;
1262 	}
1263 
1264 	assert(ret == NULL || (addr == NULL && ret != addr)
1265 	    || (addr != NULL && ret == addr));
1266 	return (ret);
1267 }
1268 
1269 static void *
1270 pages_map(void *addr, size_t size)
1271 {
1272 
1273 	return pages_map_align(addr, size, 0);
1274 }
1275 
1276 static void
1277 pages_unmap(void *addr, size_t size)
1278 {
1279 
1280 	if (munmap(addr, size) == -1) {
1281 		char buf[STRERROR_BUF];
1282 
1283 		STRERROR_R(errno, buf, sizeof(buf));
1284 		_malloc_message(getprogname(),
1285 		    ": (malloc) Error in munmap(): ", buf, "\n");
1286 		if (opt_abort)
1287 			abort();
1288 	}
1289 }
1290 
1291 static void *
1292 chunk_alloc(size_t size)
1293 {
1294 	void *ret, *chunk;
1295 	chunk_node_t *tchunk, *delchunk;
1296 
1297 	assert(size != 0);
1298 	assert((size & chunksize_mask) == 0);
1299 
1300 	malloc_mutex_lock(&chunks_mtx);
1301 
1302 	if (size == chunksize) {
1303 		/*
1304 		 * Check for address ranges that were previously chunks and try
1305 		 * to use them.
1306 		 */
1307 
1308 		/* LINTED */
1309 		tchunk = RB_MIN(chunk_tree_s, &old_chunks);
1310 		while (tchunk != NULL) {
1311 			/* Found an address range.  Try to recycle it. */
1312 
1313 			chunk = tchunk->chunk;
1314 			delchunk = tchunk;
1315 			/* LINTED */
1316 			tchunk = RB_NEXT(chunk_tree_s, &old_chunks, delchunk);
1317 
1318 			/* Remove delchunk from the tree. */
1319 			/* LINTED */
1320 			RB_REMOVE(chunk_tree_s, &old_chunks, delchunk);
1321 			base_chunk_node_dealloc(delchunk);
1322 
1323 #ifdef USE_BRK
1324 			if ((uintptr_t)chunk >= (uintptr_t)brk_base
1325 			    && (uintptr_t)chunk < (uintptr_t)brk_max) {
1326 				/* Re-use a previously freed brk chunk. */
1327 				ret = chunk;
1328 				goto RETURN;
1329 			}
1330 #endif
1331 			if ((ret = pages_map(chunk, size)) != NULL) {
1332 				/* Success. */
1333 				goto RETURN;
1334 			}
1335 		}
1336 	}
1337 
1338 	/*
1339 	 * Try to over-allocate, but allow the OS to place the allocation
1340 	 * anywhere.  Beware of size_t wrap-around.
1341 	 */
1342 	if (size + chunksize > size) {
1343 		if ((ret = pages_map_align(NULL, size, chunksize_2pow))
1344 		    != NULL) {
1345 			goto RETURN;
1346 		}
1347 	}
1348 
1349 #ifdef USE_BRK
1350 	/*
1351 	 * Try to create allocations in brk, in order to make full use of
1352 	 * limited address space.
1353 	 */
1354 	if (brk_prev != (void *)-1) {
1355 		void *brk_cur;
1356 		intptr_t incr;
1357 
1358 		/*
1359 		 * The loop is necessary to recover from races with other
1360 		 * threads that are using brk for something other than malloc.
1361 		 */
1362 		malloc_mutex_lock(&brk_mtx);
1363 		do {
1364 			/* Get the current end of brk. */
1365 			brk_cur = sbrk(0);
1366 
1367 			/*
1368 			 * Calculate how much padding is necessary to
1369 			 * chunk-align the end of brk.
1370 			 */
1371 			incr = (intptr_t)size
1372 			    - (intptr_t)CHUNK_ADDR2OFFSET(brk_cur);
1373 			if (incr == (intptr_t)size) {
1374 				ret = brk_cur;
1375 			} else {
1376 				ret = (void *)((intptr_t)brk_cur + incr);
1377 				incr += size;
1378 			}
1379 
1380 			brk_prev = sbrk(incr);
1381 			if (brk_prev == brk_cur) {
1382 				/* Success. */
1383 				malloc_mutex_unlock(&brk_mtx);
1384 				brk_max = (void *)((intptr_t)ret + size);
1385 				goto RETURN;
1386 			}
1387 		} while (brk_prev != (void *)-1);
1388 		malloc_mutex_unlock(&brk_mtx);
1389 	}
1390 #endif
1391 
1392 	/* All strategies for allocation failed. */
1393 	ret = NULL;
1394 RETURN:
1395 	if (ret != NULL) {
1396 		chunk_node_t key;
1397 		/*
1398 		 * Clean out any entries in old_chunks that overlap with the
1399 		 * memory we just allocated.
1400 		 */
1401 		key.chunk = ret;
1402 		/* LINTED */
1403 		tchunk = RB_NFIND(chunk_tree_s, &old_chunks, &key);
1404 		while (tchunk != NULL
1405 		    && (uintptr_t)tchunk->chunk >= (uintptr_t)ret
1406 		    && (uintptr_t)tchunk->chunk < (uintptr_t)ret + size) {
1407 			delchunk = tchunk;
1408 			/* LINTED */
1409 			tchunk = RB_NEXT(chunk_tree_s, &old_chunks, delchunk);
1410 			/* LINTED */
1411 			RB_REMOVE(chunk_tree_s, &old_chunks, delchunk);
1412 			base_chunk_node_dealloc(delchunk);
1413 		}
1414 
1415 	}
1416 #ifdef MALLOC_STATS
1417 	if (ret != NULL) {
1418 		stats_chunks.nchunks += (size / chunksize);
1419 		stats_chunks.curchunks += (size / chunksize);
1420 	}
1421 	if (stats_chunks.curchunks > stats_chunks.highchunks)
1422 		stats_chunks.highchunks = stats_chunks.curchunks;
1423 #endif
1424 	malloc_mutex_unlock(&chunks_mtx);
1425 
1426 	assert(CHUNK_ADDR2BASE(ret) == ret);
1427 	return (ret);
1428 }
1429 
1430 static void
1431 chunk_dealloc(void *chunk, size_t size)
1432 {
1433 	chunk_node_t *node;
1434 
1435 	assert(chunk != NULL);
1436 	assert(CHUNK_ADDR2BASE(chunk) == chunk);
1437 	assert(size != 0);
1438 	assert((size & chunksize_mask) == 0);
1439 
1440 	malloc_mutex_lock(&chunks_mtx);
1441 
1442 #ifdef USE_BRK
1443 	if ((uintptr_t)chunk >= (uintptr_t)brk_base
1444 	    && (uintptr_t)chunk < (uintptr_t)brk_max) {
1445 		void *brk_cur;
1446 
1447 		malloc_mutex_lock(&brk_mtx);
1448 		/* Get the current end of brk. */
1449 		brk_cur = sbrk(0);
1450 
1451 		/*
1452 		 * Try to shrink the data segment if this chunk is at the end
1453 		 * of the data segment.  The sbrk() call here is subject to a
1454 		 * race condition with threads that use brk(2) or sbrk(2)
1455 		 * directly, but the alternative would be to leak memory for
1456 		 * the sake of poorly designed multi-threaded programs.
1457 		 */
1458 		if (brk_cur == brk_max
1459 		    && (void *)((uintptr_t)chunk + size) == brk_max
1460 		    && sbrk(-(intptr_t)size) == brk_max) {
1461 			malloc_mutex_unlock(&brk_mtx);
1462 			if (brk_prev == brk_max) {
1463 				/* Success. */
1464 				brk_prev = (void *)((intptr_t)brk_max
1465 				    - (intptr_t)size);
1466 				brk_max = brk_prev;
1467 			}
1468 		} else {
1469 			size_t offset;
1470 
1471 			malloc_mutex_unlock(&brk_mtx);
1472 			madvise(chunk, size, MADV_FREE);
1473 
1474 			/*
1475 			 * Iteratively create records of each chunk-sized
1476 			 * memory region that 'chunk' is comprised of, so that
1477 			 * the address range can be recycled if memory usage
1478 			 * increases later on.
1479 			 */
1480 			for (offset = 0; offset < size; offset += chunksize) {
1481 				node = base_chunk_node_alloc();
1482 				if (node == NULL)
1483 					break;
1484 
1485 				node->chunk = (void *)((uintptr_t)chunk
1486 				    + (uintptr_t)offset);
1487 				node->size = chunksize;
1488 				/* LINTED */
1489 				RB_INSERT(chunk_tree_s, &old_chunks, node);
1490 			}
1491 		}
1492 	} else {
1493 #endif
1494 		pages_unmap(chunk, size);
1495 
1496 		/*
1497 		 * Make a record of the chunk's address, so that the address
1498 		 * range can be recycled if memory usage increases later on.
1499 		 * Don't bother to create entries if (size > chunksize), since
1500 		 * doing so could cause scalability issues for truly gargantuan
1501 		 * objects (many gigabytes or larger).
1502 		 */
1503 		if (size == chunksize) {
1504 			node = base_chunk_node_alloc();
1505 			if (node != NULL) {
1506 				node->chunk = (void *)(uintptr_t)chunk;
1507 				node->size = chunksize;
1508 				/* LINTED */
1509 				RB_INSERT(chunk_tree_s, &old_chunks, node);
1510 			}
1511 		}
1512 #ifdef USE_BRK
1513 	}
1514 #endif
1515 
1516 #ifdef MALLOC_STATS
1517 	stats_chunks.curchunks -= (size / chunksize);
1518 #endif
1519 	malloc_mutex_unlock(&chunks_mtx);
1520 }
1521 
1522 /*
1523  * End chunk management functions.
1524  */
1525 /******************************************************************************/
1526 /*
1527  * Begin arena.
1528  */
1529 
1530 /*
1531  * Choose an arena based on a per-thread and (optimistically) per-CPU value.
1532  *
1533  * We maintain at least one block of arenas.  Usually there are more.
1534  * The blocks are $ncpu arenas in size.  Whole blocks are 'hashed'
1535  * amongst threads.  To accomplish this, next_arena advances only in
1536  * ncpu steps.
1537  */
1538 static __noinline arena_t *
1539 choose_arena_hard(void)
1540 {
1541 	unsigned i, curcpu;
1542 	arena_t **map;
1543 
1544 	/* Initialize the current block of arenas and advance to next. */
1545 	malloc_mutex_lock(&arenas_mtx);
1546 	assert(next_arena % ncpus == 0);
1547 	assert(narenas % ncpus == 0);
1548 	map = &arenas[next_arena];
1549 	set_arenas_map(map);
1550 	for (i = 0; i < ncpus; i++) {
1551 		if (arenas[next_arena] == NULL)
1552 			arenas_extend(next_arena);
1553 		next_arena = (next_arena + 1) % narenas;
1554 	}
1555 	malloc_mutex_unlock(&arenas_mtx);
1556 
1557 	/*
1558 	 * If we were unable to allocate an arena above, then default to
1559 	 * the first arena, which is always present.
1560 	 */
1561 	curcpu = thr_curcpu();
1562 	if (map[curcpu] != NULL)
1563 		return map[curcpu];
1564 	return arenas[0];
1565 }
1566 
1567 static inline arena_t *
1568 choose_arena(void)
1569 {
1570 	unsigned curcpu;
1571 	arena_t **map;
1572 
1573 	map = get_arenas_map();
1574 	curcpu = thr_curcpu();
1575 	if (__predict_true(map != NULL && map[curcpu] != NULL))
1576 		return map[curcpu];
1577 
1578         return choose_arena_hard();
1579 }
1580 
1581 #ifndef lint
1582 static inline int
1583 arena_chunk_comp(arena_chunk_t *a, arena_chunk_t *b)
1584 {
1585 
1586 	assert(a != NULL);
1587 	assert(b != NULL);
1588 
1589 	if ((uintptr_t)a < (uintptr_t)b)
1590 		return (-1);
1591 	else if (a == b)
1592 		return (0);
1593 	else
1594 		return (1);
1595 }
1596 
1597 /* Generate red-black tree code for arena chunks. */
1598 RB_GENERATE_STATIC(arena_chunk_tree_s, arena_chunk_s, link, arena_chunk_comp);
1599 #endif
1600 
1601 #ifndef lint
1602 static inline int
1603 arena_run_comp(arena_run_t *a, arena_run_t *b)
1604 {
1605 
1606 	assert(a != NULL);
1607 	assert(b != NULL);
1608 
1609 	if ((uintptr_t)a < (uintptr_t)b)
1610 		return (-1);
1611 	else if (a == b)
1612 		return (0);
1613 	else
1614 		return (1);
1615 }
1616 
1617 /* Generate red-black tree code for arena runs. */
1618 RB_GENERATE_STATIC(arena_run_tree_s, arena_run_s, link, arena_run_comp);
1619 #endif
1620 
1621 static inline void *
1622 arena_run_reg_alloc(arena_run_t *run, arena_bin_t *bin)
1623 {
1624 	void *ret;
1625 	unsigned i, mask, bit, regind;
1626 
1627 	assert(run->magic == ARENA_RUN_MAGIC);
1628 	assert(run->regs_minelm < bin->regs_mask_nelms);
1629 
1630 	/*
1631 	 * Move the first check outside the loop, so that run->regs_minelm can
1632 	 * be updated unconditionally, without the possibility of updating it
1633 	 * multiple times.
1634 	 */
1635 	i = run->regs_minelm;
1636 	mask = run->regs_mask[i];
1637 	if (mask != 0) {
1638 		/* Usable allocation found. */
1639 		bit = ffs((int)mask) - 1;
1640 
1641 		regind = ((i << (SIZEOF_INT_2POW + 3)) + bit);
1642 		ret = (void *)(((uintptr_t)run) + bin->reg0_offset
1643 		    + (bin->reg_size * regind));
1644 
1645 		/* Clear bit. */
1646 		mask ^= (1 << bit);
1647 		run->regs_mask[i] = mask;
1648 
1649 		return (ret);
1650 	}
1651 
1652 	for (i++; i < bin->regs_mask_nelms; i++) {
1653 		mask = run->regs_mask[i];
1654 		if (mask != 0) {
1655 			/* Usable allocation found. */
1656 			bit = ffs((int)mask) - 1;
1657 
1658 			regind = ((i << (SIZEOF_INT_2POW + 3)) + bit);
1659 			ret = (void *)(((uintptr_t)run) + bin->reg0_offset
1660 			    + (bin->reg_size * regind));
1661 
1662 			/* Clear bit. */
1663 			mask ^= (1 << bit);
1664 			run->regs_mask[i] = mask;
1665 
1666 			/*
1667 			 * Make a note that nothing before this element
1668 			 * contains a free region.
1669 			 */
1670 			run->regs_minelm = i; /* Low payoff: + (mask == 0); */
1671 
1672 			return (ret);
1673 		}
1674 	}
1675 	/* Not reached. */
1676 	/* LINTED */
1677 	assert(0);
1678 	return (NULL);
1679 }
1680 
1681 static inline void
1682 arena_run_reg_dalloc(arena_run_t *run, arena_bin_t *bin, void *ptr, size_t size)
1683 {
1684 	/*
1685 	 * To divide by a number D that is not a power of two we multiply
1686 	 * by (2^21 / D) and then right shift by 21 positions.
1687 	 *
1688 	 *   X / D
1689 	 *
1690 	 * becomes
1691 	 *
1692 	 *   (X * size_invs[(D >> QUANTUM_2POW_MIN) - 3]) >> SIZE_INV_SHIFT
1693 	 */
1694 #define SIZE_INV_SHIFT 21
1695 #define SIZE_INV(s) (((1 << SIZE_INV_SHIFT) / (s << QUANTUM_2POW_MIN)) + 1)
1696 	static const unsigned size_invs[] = {
1697 	    SIZE_INV(3),
1698 	    SIZE_INV(4), SIZE_INV(5), SIZE_INV(6), SIZE_INV(7),
1699 	    SIZE_INV(8), SIZE_INV(9), SIZE_INV(10), SIZE_INV(11),
1700 	    SIZE_INV(12),SIZE_INV(13), SIZE_INV(14), SIZE_INV(15),
1701 	    SIZE_INV(16),SIZE_INV(17), SIZE_INV(18), SIZE_INV(19),
1702 	    SIZE_INV(20),SIZE_INV(21), SIZE_INV(22), SIZE_INV(23),
1703 	    SIZE_INV(24),SIZE_INV(25), SIZE_INV(26), SIZE_INV(27),
1704 	    SIZE_INV(28),SIZE_INV(29), SIZE_INV(30), SIZE_INV(31)
1705 #if (QUANTUM_2POW_MIN < 4)
1706 	    ,
1707 	    SIZE_INV(32), SIZE_INV(33), SIZE_INV(34), SIZE_INV(35),
1708 	    SIZE_INV(36), SIZE_INV(37), SIZE_INV(38), SIZE_INV(39),
1709 	    SIZE_INV(40), SIZE_INV(41), SIZE_INV(42), SIZE_INV(43),
1710 	    SIZE_INV(44), SIZE_INV(45), SIZE_INV(46), SIZE_INV(47),
1711 	    SIZE_INV(48), SIZE_INV(49), SIZE_INV(50), SIZE_INV(51),
1712 	    SIZE_INV(52), SIZE_INV(53), SIZE_INV(54), SIZE_INV(55),
1713 	    SIZE_INV(56), SIZE_INV(57), SIZE_INV(58), SIZE_INV(59),
1714 	    SIZE_INV(60), SIZE_INV(61), SIZE_INV(62), SIZE_INV(63)
1715 #endif
1716 	};
1717 	unsigned diff, regind, elm, bit;
1718 
1719 	/* LINTED */
1720 	assert(run->magic == ARENA_RUN_MAGIC);
1721 	assert(((sizeof(size_invs)) / sizeof(unsigned)) + 3
1722 	    >= (SMALL_MAX_DEFAULT >> QUANTUM_2POW_MIN));
1723 
1724 	/*
1725 	 * Avoid doing division with a variable divisor if possible.  Using
1726 	 * actual division here can reduce allocator throughput by over 20%!
1727 	 */
1728 	diff = (unsigned)((uintptr_t)ptr - (uintptr_t)run - bin->reg0_offset);
1729 	if ((size & (size - 1)) == 0) {
1730 		/*
1731 		 * log2_table allows fast division of a power of two in the
1732 		 * [1..128] range.
1733 		 *
1734 		 * (x / divisor) becomes (x >> log2_table[divisor - 1]).
1735 		 */
1736 		static const unsigned char log2_table[] = {
1737 		    0, 1, 0, 2, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 4,
1738 		    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5,
1739 		    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1740 		    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6,
1741 		    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1742 		    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1743 		    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1744 		    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7
1745 		};
1746 
1747 		if (size <= 128)
1748 			regind = (diff >> log2_table[size - 1]);
1749 		else if (size <= 32768)
1750 			regind = diff >> (8 + log2_table[(size >> 8) - 1]);
1751 		else {
1752 			/*
1753 			 * The page size is too large for us to use the lookup
1754 			 * table.  Use real division.
1755 			 */
1756 			regind = (unsigned)(diff / size);
1757 		}
1758 	} else if (size <= ((sizeof(size_invs) / sizeof(unsigned))
1759 	    << QUANTUM_2POW_MIN) + 2) {
1760 		regind = size_invs[(size >> QUANTUM_2POW_MIN) - 3] * diff;
1761 		regind >>= SIZE_INV_SHIFT;
1762 	} else {
1763 		/*
1764 		 * size_invs isn't large enough to handle this size class, so
1765 		 * calculate regind using actual division.  This only happens
1766 		 * if the user increases small_max via the 'S' runtime
1767 		 * configuration option.
1768 		 */
1769 		regind = (unsigned)(diff / size);
1770 	};
1771 	assert(diff == regind * size);
1772 	assert(regind < bin->nregs);
1773 
1774 	elm = regind >> (SIZEOF_INT_2POW + 3);
1775 	if (elm < run->regs_minelm)
1776 		run->regs_minelm = elm;
1777 	bit = regind - (elm << (SIZEOF_INT_2POW + 3));
1778 	assert((run->regs_mask[elm] & (1 << bit)) == 0);
1779 	run->regs_mask[elm] |= (1 << bit);
1780 #undef SIZE_INV
1781 #undef SIZE_INV_SHIFT
1782 }
1783 
1784 static void
1785 arena_run_split(arena_t *arena, arena_run_t *run, size_t size)
1786 {
1787 	arena_chunk_t *chunk;
1788 	unsigned run_ind, map_offset, total_pages, need_pages, rem_pages;
1789 	unsigned i;
1790 
1791 	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
1792 	run_ind = (unsigned)(((uintptr_t)run - (uintptr_t)chunk)
1793 	    >> pagesize_2pow);
1794 	total_pages = chunk->map[run_ind].npages;
1795 	need_pages = (unsigned)(size >> pagesize_2pow);
1796 	assert(need_pages <= total_pages);
1797 	rem_pages = total_pages - need_pages;
1798 
1799 	/* Split enough pages from the front of run to fit allocation size. */
1800 	map_offset = run_ind;
1801 	for (i = 0; i < need_pages; i++) {
1802 		chunk->map[map_offset + i].npages = need_pages;
1803 		chunk->map[map_offset + i].pos = i;
1804 	}
1805 
1806 	/* Keep track of trailing unused pages for later use. */
1807 	if (rem_pages > 0) {
1808 		/* Update map for trailing pages. */
1809 		map_offset += need_pages;
1810 		chunk->map[map_offset].npages = rem_pages;
1811 		chunk->map[map_offset].pos = POS_FREE;
1812 		chunk->map[map_offset + rem_pages - 1].npages = rem_pages;
1813 		chunk->map[map_offset + rem_pages - 1].pos = POS_FREE;
1814 	}
1815 
1816 	chunk->pages_used += need_pages;
1817 }
1818 
1819 static arena_chunk_t *
1820 arena_chunk_alloc(arena_t *arena)
1821 {
1822 	arena_chunk_t *chunk;
1823 
1824 	if (arena->spare != NULL) {
1825 		chunk = arena->spare;
1826 		arena->spare = NULL;
1827 
1828 		/* LINTED */
1829 		RB_INSERT(arena_chunk_tree_s, &arena->chunks, chunk);
1830 	} else {
1831 		chunk = (arena_chunk_t *)chunk_alloc(chunksize);
1832 		if (chunk == NULL)
1833 			return (NULL);
1834 #ifdef MALLOC_STATS
1835 		arena->stats.mapped += chunksize;
1836 #endif
1837 
1838 		chunk->arena = arena;
1839 
1840 		/* LINTED */
1841 		RB_INSERT(arena_chunk_tree_s, &arena->chunks, chunk);
1842 
1843 		/*
1844 		 * Claim that no pages are in use, since the header is merely
1845 		 * overhead.
1846 		 */
1847 		chunk->pages_used = 0;
1848 
1849 		chunk->max_frun_npages = chunk_npages -
1850 		    arena_chunk_header_npages;
1851 		chunk->min_frun_ind = arena_chunk_header_npages;
1852 
1853 		/*
1854 		 * Initialize enough of the map to support one maximal free run.
1855 		 */
1856 		chunk->map[arena_chunk_header_npages].npages = chunk_npages -
1857 		    arena_chunk_header_npages;
1858 		chunk->map[arena_chunk_header_npages].pos = POS_FREE;
1859 		chunk->map[chunk_npages - 1].npages = chunk_npages -
1860 		    arena_chunk_header_npages;
1861 		chunk->map[chunk_npages - 1].pos = POS_FREE;
1862 	}
1863 
1864 	return (chunk);
1865 }
1866 
1867 static void
1868 arena_chunk_dealloc(arena_t *arena, arena_chunk_t *chunk)
1869 {
1870 
1871 	/*
1872 	 * Remove chunk from the chunk tree, regardless of whether this chunk
1873 	 * will be cached, so that the arena does not use it.
1874 	 */
1875 	/* LINTED */
1876 	RB_REMOVE(arena_chunk_tree_s, &chunk->arena->chunks, chunk);
1877 
1878 	if (opt_hint == false) {
1879 		if (arena->spare != NULL) {
1880 			chunk_dealloc((void *)arena->spare, chunksize);
1881 #ifdef MALLOC_STATS
1882 			arena->stats.mapped -= chunksize;
1883 #endif
1884 		}
1885 		arena->spare = chunk;
1886 	} else {
1887 		assert(arena->spare == NULL);
1888 		chunk_dealloc((void *)chunk, chunksize);
1889 #ifdef MALLOC_STATS
1890 		arena->stats.mapped -= chunksize;
1891 #endif
1892 	}
1893 }
1894 
1895 static arena_run_t *
1896 arena_run_alloc(arena_t *arena, size_t size)
1897 {
1898 	arena_chunk_t *chunk;
1899 	arena_run_t *run;
1900 	unsigned need_npages, limit_pages, compl_need_npages;
1901 
1902 	assert(size <= (chunksize - (arena_chunk_header_npages <<
1903 	    pagesize_2pow)));
1904 	assert((size & pagesize_mask) == 0);
1905 
1906 	/*
1907 	 * Search through arena's chunks in address order for a free run that is
1908 	 * large enough.  Look for the first fit.
1909 	 */
1910 	need_npages = (unsigned)(size >> pagesize_2pow);
1911 	limit_pages = chunk_npages - arena_chunk_header_npages;
1912 	compl_need_npages = limit_pages - need_npages;
1913 	/* LINTED */
1914 	RB_FOREACH(chunk, arena_chunk_tree_s, &arena->chunks) {
1915 		/*
1916 		 * Avoid searching this chunk if there are not enough
1917 		 * contiguous free pages for there to possibly be a large
1918 		 * enough free run.
1919 		 */
1920 		if (chunk->pages_used <= compl_need_npages &&
1921 		    need_npages <= chunk->max_frun_npages) {
1922 			arena_chunk_map_t *mapelm;
1923 			unsigned i;
1924 			unsigned max_frun_npages = 0;
1925 			unsigned min_frun_ind = chunk_npages;
1926 
1927 			assert(chunk->min_frun_ind >=
1928 			    arena_chunk_header_npages);
1929 			for (i = chunk->min_frun_ind; i < chunk_npages;) {
1930 				mapelm = &chunk->map[i];
1931 				if (mapelm->pos == POS_FREE) {
1932 					if (mapelm->npages >= need_npages) {
1933 						run = (arena_run_t *)
1934 						    ((uintptr_t)chunk + (i <<
1935 						    pagesize_2pow));
1936 						/* Update page map. */
1937 						arena_run_split(arena, run,
1938 						    size);
1939 						return (run);
1940 					}
1941 					if (mapelm->npages >
1942 					    max_frun_npages) {
1943 						max_frun_npages =
1944 						    mapelm->npages;
1945 					}
1946 					if (i < min_frun_ind) {
1947 						min_frun_ind = i;
1948 						if (i < chunk->min_frun_ind)
1949 							chunk->min_frun_ind = i;
1950 					}
1951 				}
1952 				i += mapelm->npages;
1953 			}
1954 			/*
1955 			 * Search failure.  Reset cached chunk->max_frun_npages.
1956 			 * chunk->min_frun_ind was already reset above (if
1957 			 * necessary).
1958 			 */
1959 			chunk->max_frun_npages = max_frun_npages;
1960 		}
1961 	}
1962 
1963 	/*
1964 	 * No usable runs.  Create a new chunk from which to allocate the run.
1965 	 */
1966 	chunk = arena_chunk_alloc(arena);
1967 	if (chunk == NULL)
1968 		return (NULL);
1969 	run = (arena_run_t *)((uintptr_t)chunk + (arena_chunk_header_npages <<
1970 	    pagesize_2pow));
1971 	/* Update page map. */
1972 	arena_run_split(arena, run, size);
1973 	return (run);
1974 }
1975 
1976 static void
1977 arena_run_dalloc(arena_t *arena, arena_run_t *run, size_t size)
1978 {
1979 	arena_chunk_t *chunk;
1980 	unsigned run_ind, run_pages;
1981 
1982 	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
1983 
1984 	run_ind = (unsigned)(((uintptr_t)run - (uintptr_t)chunk)
1985 	    >> pagesize_2pow);
1986 	assert(run_ind >= arena_chunk_header_npages);
1987 	assert(run_ind < (chunksize >> pagesize_2pow));
1988 	run_pages = (unsigned)(size >> pagesize_2pow);
1989 	assert(run_pages == chunk->map[run_ind].npages);
1990 
1991 	/* Subtract pages from count of pages used in chunk. */
1992 	chunk->pages_used -= run_pages;
1993 
1994 	/* Mark run as deallocated. */
1995 	assert(chunk->map[run_ind].npages == run_pages);
1996 	chunk->map[run_ind].pos = POS_FREE;
1997 	assert(chunk->map[run_ind + run_pages - 1].npages == run_pages);
1998 	chunk->map[run_ind + run_pages - 1].pos = POS_FREE;
1999 
2000 	/*
2001 	 * Tell the kernel that we don't need the data in this run, but only if
2002 	 * requested via runtime configuration.
2003 	 */
2004 	if (opt_hint)
2005 		madvise(run, size, MADV_FREE);
2006 
2007 	/* Try to coalesce with neighboring runs. */
2008 	if (run_ind > arena_chunk_header_npages &&
2009 	    chunk->map[run_ind - 1].pos == POS_FREE) {
2010 		unsigned prev_npages;
2011 
2012 		/* Coalesce with previous run. */
2013 		prev_npages = chunk->map[run_ind - 1].npages;
2014 		run_ind -= prev_npages;
2015 		assert(chunk->map[run_ind].npages == prev_npages);
2016 		assert(chunk->map[run_ind].pos == POS_FREE);
2017 		run_pages += prev_npages;
2018 
2019 		chunk->map[run_ind].npages = run_pages;
2020 		assert(chunk->map[run_ind].pos == POS_FREE);
2021 		chunk->map[run_ind + run_pages - 1].npages = run_pages;
2022 		assert(chunk->map[run_ind + run_pages - 1].pos == POS_FREE);
2023 	}
2024 
2025 	if (run_ind + run_pages < chunk_npages &&
2026 	    chunk->map[run_ind + run_pages].pos == POS_FREE) {
2027 		unsigned next_npages;
2028 
2029 		/* Coalesce with next run. */
2030 		next_npages = chunk->map[run_ind + run_pages].npages;
2031 		run_pages += next_npages;
2032 		assert(chunk->map[run_ind + run_pages - 1].npages ==
2033 		    next_npages);
2034 		assert(chunk->map[run_ind + run_pages - 1].pos == POS_FREE);
2035 
2036 		chunk->map[run_ind].npages = run_pages;
2037 		chunk->map[run_ind].pos = POS_FREE;
2038 		chunk->map[run_ind + run_pages - 1].npages = run_pages;
2039 		assert(chunk->map[run_ind + run_pages - 1].pos == POS_FREE);
2040 	}
2041 
2042 	if (chunk->map[run_ind].npages > chunk->max_frun_npages)
2043 		chunk->max_frun_npages = chunk->map[run_ind].npages;
2044 	if (run_ind < chunk->min_frun_ind)
2045 		chunk->min_frun_ind = run_ind;
2046 
2047 	/* Deallocate chunk if it is now completely unused. */
2048 	if (chunk->pages_used == 0)
2049 		arena_chunk_dealloc(arena, chunk);
2050 }
2051 
2052 static arena_run_t *
2053 arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin)
2054 {
2055 	arena_run_t *run;
2056 	unsigned i, remainder;
2057 
2058 	/* Look for a usable run. */
2059 	/* LINTED */
2060 	if ((run = RB_MIN(arena_run_tree_s, &bin->runs)) != NULL) {
2061 		/* run is guaranteed to have available space. */
2062 		/* LINTED */
2063 		RB_REMOVE(arena_run_tree_s, &bin->runs, run);
2064 #ifdef MALLOC_STATS
2065 		bin->stats.reruns++;
2066 #endif
2067 		return (run);
2068 	}
2069 	/* No existing runs have any space available. */
2070 
2071 	/* Allocate a new run. */
2072 	run = arena_run_alloc(arena, bin->run_size);
2073 	if (run == NULL)
2074 		return (NULL);
2075 
2076 	/* Initialize run internals. */
2077 	run->bin = bin;
2078 
2079 	for (i = 0; i < bin->regs_mask_nelms; i++)
2080 		run->regs_mask[i] = UINT_MAX;
2081 	remainder = bin->nregs & ((1 << (SIZEOF_INT_2POW + 3)) - 1);
2082 	if (remainder != 0) {
2083 		/* The last element has spare bits that need to be unset. */
2084 		run->regs_mask[i] = (UINT_MAX >> ((1 << (SIZEOF_INT_2POW + 3))
2085 		    - remainder));
2086 	}
2087 
2088 	run->regs_minelm = 0;
2089 
2090 	run->nfree = bin->nregs;
2091 #ifdef MALLOC_DEBUG
2092 	run->magic = ARENA_RUN_MAGIC;
2093 #endif
2094 
2095 #ifdef MALLOC_STATS
2096 	bin->stats.nruns++;
2097 	bin->stats.curruns++;
2098 	if (bin->stats.curruns > bin->stats.highruns)
2099 		bin->stats.highruns = bin->stats.curruns;
2100 #endif
2101 	return (run);
2102 }
2103 
2104 /* bin->runcur must have space available before this function is called. */
2105 static inline void *
2106 arena_bin_malloc_easy(arena_t *arena, arena_bin_t *bin, arena_run_t *run)
2107 {
2108 	void *ret;
2109 
2110 	assert(run->magic == ARENA_RUN_MAGIC);
2111 	assert(run->nfree > 0);
2112 
2113 	ret = arena_run_reg_alloc(run, bin);
2114 	assert(ret != NULL);
2115 	run->nfree--;
2116 
2117 	return (ret);
2118 }
2119 
2120 /* Re-fill bin->runcur, then call arena_bin_malloc_easy(). */
2121 static void *
2122 arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin)
2123 {
2124 
2125 	bin->runcur = arena_bin_nonfull_run_get(arena, bin);
2126 	if (bin->runcur == NULL)
2127 		return (NULL);
2128 	assert(bin->runcur->magic == ARENA_RUN_MAGIC);
2129 	assert(bin->runcur->nfree > 0);
2130 
2131 	return (arena_bin_malloc_easy(arena, bin, bin->runcur));
2132 }
2133 
2134 /*
2135  * Calculate bin->run_size such that it meets the following constraints:
2136  *
2137  *   *) bin->run_size >= min_run_size
2138  *   *) bin->run_size <= arena_maxclass
2139  *   *) bin->run_size <= RUN_MAX_SMALL
2140  *   *) run header overhead <= RUN_MAX_OVRHD (or header overhead relaxed).
2141  *
2142  * bin->nregs, bin->regs_mask_nelms, and bin->reg0_offset are
2143  * also calculated here, since these settings are all interdependent.
2144  */
2145 static size_t
2146 arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size)
2147 {
2148 	size_t try_run_size, good_run_size;
2149 	unsigned good_nregs, good_mask_nelms, good_reg0_offset;
2150 	unsigned try_nregs, try_mask_nelms, try_reg0_offset;
2151 
2152 	assert(min_run_size >= pagesize);
2153 	assert(min_run_size <= arena_maxclass);
2154 	assert(min_run_size <= RUN_MAX_SMALL);
2155 
2156 	/*
2157 	 * Calculate known-valid settings before entering the run_size
2158 	 * expansion loop, so that the first part of the loop always copies
2159 	 * valid settings.
2160 	 *
2161 	 * The do..while loop iteratively reduces the number of regions until
2162 	 * the run header and the regions no longer overlap.  A closed formula
2163 	 * would be quite messy, since there is an interdependency between the
2164 	 * header's mask length and the number of regions.
2165 	 */
2166 	try_run_size = min_run_size;
2167 	try_nregs = (unsigned)(((try_run_size - sizeof(arena_run_t)) /
2168 	    bin->reg_size) + 1); /* Counter-act try_nregs-- in loop. */
2169 	do {
2170 		try_nregs--;
2171 		try_mask_nelms = (try_nregs >> (SIZEOF_INT_2POW + 3)) +
2172 		    ((try_nregs & ((1 << (SIZEOF_INT_2POW + 3)) - 1)) ? 1 : 0);
2173 		try_reg0_offset = (unsigned)(try_run_size -
2174 		    (try_nregs * bin->reg_size));
2175 	} while (sizeof(arena_run_t) + (sizeof(unsigned) * (try_mask_nelms - 1))
2176 	    > try_reg0_offset);
2177 
2178 	/* run_size expansion loop. */
2179 	do {
2180 		/*
2181 		 * Copy valid settings before trying more aggressive settings.
2182 		 */
2183 		good_run_size = try_run_size;
2184 		good_nregs = try_nregs;
2185 		good_mask_nelms = try_mask_nelms;
2186 		good_reg0_offset = try_reg0_offset;
2187 
2188 		/* Try more aggressive settings. */
2189 		try_run_size += pagesize;
2190 		try_nregs = (unsigned)(((try_run_size - sizeof(arena_run_t)) /
2191 		    bin->reg_size) + 1); /* Counter-act try_nregs-- in loop. */
2192 		do {
2193 			try_nregs--;
2194 			try_mask_nelms = (try_nregs >> (SIZEOF_INT_2POW + 3)) +
2195 			    ((try_nregs & ((1 << (SIZEOF_INT_2POW + 3)) - 1)) ?
2196 			    1 : 0);
2197 			try_reg0_offset = (unsigned)(try_run_size - (try_nregs *
2198 			    bin->reg_size));
2199 		} while (sizeof(arena_run_t) + (sizeof(unsigned) *
2200 		    (try_mask_nelms - 1)) > try_reg0_offset);
2201 	} while (try_run_size <= arena_maxclass && try_run_size <= RUN_MAX_SMALL
2202 	    && RUN_MAX_OVRHD * (bin->reg_size << 3) > RUN_MAX_OVRHD_RELAX
2203 	    && (try_reg0_offset << RUN_BFP) > RUN_MAX_OVRHD * try_run_size);
2204 
2205 	assert(sizeof(arena_run_t) + (sizeof(unsigned) * (good_mask_nelms - 1))
2206 	    <= good_reg0_offset);
2207 	assert((good_mask_nelms << (SIZEOF_INT_2POW + 3)) >= good_nregs);
2208 
2209 	/* Copy final settings. */
2210 	bin->run_size = good_run_size;
2211 	bin->nregs = good_nregs;
2212 	bin->regs_mask_nelms = good_mask_nelms;
2213 	bin->reg0_offset = good_reg0_offset;
2214 
2215 	return (good_run_size);
2216 }
2217 
2218 static void *
2219 arena_malloc(arena_t *arena, size_t size)
2220 {
2221 	void *ret;
2222 
2223 	assert(arena != NULL);
2224 	assert(arena->magic == ARENA_MAGIC);
2225 	assert(size != 0);
2226 	assert(QUANTUM_CEILING(size) <= arena_maxclass);
2227 
2228 	if (size <= bin_maxclass) {
2229 		arena_bin_t *bin;
2230 		arena_run_t *run;
2231 
2232 		/* Small allocation. */
2233 
2234 		if (size < small_min) {
2235 			/* Tiny. */
2236 			size = pow2_ceil(size);
2237 			bin = &arena->bins[ffs((int)(size >> (TINY_MIN_2POW +
2238 			    1)))];
2239 #if (!defined(NDEBUG) || defined(MALLOC_STATS))
2240 			/*
2241 			 * Bin calculation is always correct, but we may need
2242 			 * to fix size for the purposes of assertions and/or
2243 			 * stats accuracy.
2244 			 */
2245 			if (size < (1 << TINY_MIN_2POW))
2246 				size = (1 << TINY_MIN_2POW);
2247 #endif
2248 		} else if (size <= small_max) {
2249 			/* Quantum-spaced. */
2250 			size = QUANTUM_CEILING(size);
2251 			bin = &arena->bins[ntbins + (size >> opt_quantum_2pow)
2252 			    - 1];
2253 		} else {
2254 			/* Sub-page. */
2255 			size = pow2_ceil(size);
2256 			bin = &arena->bins[ntbins + nqbins
2257 			    + (ffs((int)(size >> opt_small_max_2pow)) - 2)];
2258 		}
2259 		assert(size == bin->reg_size);
2260 
2261 		malloc_mutex_lock(&arena->mtx);
2262 		if ((run = bin->runcur) != NULL && run->nfree > 0)
2263 			ret = arena_bin_malloc_easy(arena, bin, run);
2264 		else
2265 			ret = arena_bin_malloc_hard(arena, bin);
2266 
2267 		if (ret == NULL) {
2268 			malloc_mutex_unlock(&arena->mtx);
2269 			return (NULL);
2270 		}
2271 
2272 #ifdef MALLOC_STATS
2273 		bin->stats.nrequests++;
2274 		arena->stats.nmalloc_small++;
2275 		arena->stats.allocated_small += size;
2276 #endif
2277 	} else {
2278 		/* Large allocation. */
2279 		size = PAGE_CEILING(size);
2280 		malloc_mutex_lock(&arena->mtx);
2281 		ret = (void *)arena_run_alloc(arena, size);
2282 		if (ret == NULL) {
2283 			malloc_mutex_unlock(&arena->mtx);
2284 			return (NULL);
2285 		}
2286 #ifdef MALLOC_STATS
2287 		arena->stats.nmalloc_large++;
2288 		arena->stats.allocated_large += size;
2289 #endif
2290 	}
2291 
2292 	malloc_mutex_unlock(&arena->mtx);
2293 
2294 	if (opt_junk)
2295 		memset(ret, 0xa5, size);
2296 	else if (opt_zero)
2297 		memset(ret, 0, size);
2298 	return (ret);
2299 }
2300 
2301 static inline void
2302 arena_palloc_trim(arena_t *arena, arena_chunk_t *chunk, unsigned pageind,
2303     unsigned npages)
2304 {
2305 	unsigned i;
2306 
2307 	assert(npages > 0);
2308 
2309 	/*
2310 	 * Modifiy the map such that arena_run_dalloc() sees the run as
2311 	 * separately allocated.
2312 	 */
2313 	for (i = 0; i < npages; i++) {
2314 		chunk->map[pageind + i].npages = npages;
2315 		chunk->map[pageind + i].pos = i;
2316 	}
2317 	arena_run_dalloc(arena, (arena_run_t *)((uintptr_t)chunk + (pageind <<
2318 	    pagesize_2pow)), npages << pagesize_2pow);
2319 }
2320 
2321 /* Only handles large allocations that require more than page alignment. */
2322 static void *
2323 arena_palloc(arena_t *arena, size_t alignment, size_t size, size_t alloc_size)
2324 {
2325 	void *ret;
2326 	size_t offset;
2327 	arena_chunk_t *chunk;
2328 	unsigned pageind, i, npages;
2329 
2330 	assert((size & pagesize_mask) == 0);
2331 	assert((alignment & pagesize_mask) == 0);
2332 
2333 	npages = (unsigned)(size >> pagesize_2pow);
2334 
2335 	malloc_mutex_lock(&arena->mtx);
2336 	ret = (void *)arena_run_alloc(arena, alloc_size);
2337 	if (ret == NULL) {
2338 		malloc_mutex_unlock(&arena->mtx);
2339 		return (NULL);
2340 	}
2341 
2342 	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ret);
2343 
2344 	offset = (uintptr_t)ret & (alignment - 1);
2345 	assert((offset & pagesize_mask) == 0);
2346 	assert(offset < alloc_size);
2347 	if (offset == 0) {
2348 		pageind = (unsigned)(((uintptr_t)ret - (uintptr_t)chunk) >>
2349 		    pagesize_2pow);
2350 
2351 		/* Update the map for the run to be kept. */
2352 		for (i = 0; i < npages; i++) {
2353 			chunk->map[pageind + i].npages = npages;
2354 			assert(chunk->map[pageind + i].pos == i);
2355 		}
2356 
2357 		/* Trim trailing space. */
2358 		arena_palloc_trim(arena, chunk, pageind + npages,
2359 		    (unsigned)((alloc_size - size) >> pagesize_2pow));
2360 	} else {
2361 		size_t leadsize, trailsize;
2362 
2363 		leadsize = alignment - offset;
2364 		ret = (void *)((uintptr_t)ret + leadsize);
2365 		pageind = (unsigned)(((uintptr_t)ret - (uintptr_t)chunk) >>
2366 		    pagesize_2pow);
2367 
2368 		/* Update the map for the run to be kept. */
2369 		for (i = 0; i < npages; i++) {
2370 			chunk->map[pageind + i].npages = npages;
2371 			chunk->map[pageind + i].pos = i;
2372 		}
2373 
2374 		/* Trim leading space. */
2375 		arena_palloc_trim(arena, chunk,
2376 		    (unsigned)(pageind - (leadsize >> pagesize_2pow)),
2377 		    (unsigned)(leadsize >> pagesize_2pow));
2378 
2379 		trailsize = alloc_size - leadsize - size;
2380 		if (trailsize != 0) {
2381 			/* Trim trailing space. */
2382 			assert(trailsize < alloc_size);
2383 			arena_palloc_trim(arena, chunk, pageind + npages,
2384 			    (unsigned)(trailsize >> pagesize_2pow));
2385 		}
2386 	}
2387 
2388 #ifdef MALLOC_STATS
2389 	arena->stats.nmalloc_large++;
2390 	arena->stats.allocated_large += size;
2391 #endif
2392 	malloc_mutex_unlock(&arena->mtx);
2393 
2394 	if (opt_junk)
2395 		memset(ret, 0xa5, size);
2396 	else if (opt_zero)
2397 		memset(ret, 0, size);
2398 	return (ret);
2399 }
2400 
2401 /* Return the size of the allocation pointed to by ptr. */
2402 static size_t
2403 arena_salloc(const void *ptr)
2404 {
2405 	size_t ret;
2406 	arena_chunk_t *chunk;
2407 	arena_chunk_map_t *mapelm;
2408 	unsigned pageind;
2409 
2410 	assert(ptr != NULL);
2411 	assert(CHUNK_ADDR2BASE(ptr) != ptr);
2412 
2413 	/*
2414 	 * No arena data structures that we query here can change in a way that
2415 	 * affects this function, so we don't need to lock.
2416 	 */
2417 	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
2418 	pageind = (unsigned)(((uintptr_t)ptr - (uintptr_t)chunk) >>
2419 	    pagesize_2pow);
2420 	mapelm = &chunk->map[pageind];
2421 	if (mapelm->pos != 0 || ptr != (char *)((uintptr_t)chunk) + (pageind <<
2422 	    pagesize_2pow)) {
2423 		arena_run_t *run;
2424 
2425 		pageind -= mapelm->pos;
2426 
2427 		run = (arena_run_t *)((uintptr_t)chunk + (pageind <<
2428 		    pagesize_2pow));
2429 		assert(run->magic == ARENA_RUN_MAGIC);
2430 		ret = run->bin->reg_size;
2431 	} else
2432 		ret = mapelm->npages << pagesize_2pow;
2433 
2434 	return (ret);
2435 }
2436 
2437 static void *
2438 arena_ralloc(void *ptr, size_t size, size_t oldsize)
2439 {
2440 	void *ret;
2441 
2442 	/* Avoid moving the allocation if the size class would not change. */
2443 	if (size < small_min) {
2444 		if (oldsize < small_min &&
2445 		    ffs((int)(pow2_ceil(size) >> (TINY_MIN_2POW + 1)))
2446 		    == ffs((int)(pow2_ceil(oldsize) >> (TINY_MIN_2POW + 1))))
2447 			goto IN_PLACE;
2448 	} else if (size <= small_max) {
2449 		if (oldsize >= small_min && oldsize <= small_max &&
2450 		    (QUANTUM_CEILING(size) >> opt_quantum_2pow)
2451 		    == (QUANTUM_CEILING(oldsize) >> opt_quantum_2pow))
2452 			goto IN_PLACE;
2453 	} else {
2454 		/*
2455 		 * We make no attempt to resize runs here, though it would be
2456 		 * possible to do so.
2457 		 */
2458 		if (oldsize > small_max && PAGE_CEILING(size) == oldsize)
2459 			goto IN_PLACE;
2460 	}
2461 
2462 	/*
2463 	 * If we get here, then size and oldsize are different enough that we
2464 	 * need to use a different size class.  In that case, fall back to
2465 	 * allocating new space and copying.
2466 	 */
2467 	ret = arena_malloc(choose_arena(), size);
2468 	if (ret == NULL)
2469 		return (NULL);
2470 
2471 	/* Junk/zero-filling were already done by arena_malloc(). */
2472 	if (size < oldsize)
2473 		memcpy(ret, ptr, size);
2474 	else
2475 		memcpy(ret, ptr, oldsize);
2476 	idalloc(ptr);
2477 	return (ret);
2478 IN_PLACE:
2479 	if (opt_junk && size < oldsize)
2480 		memset((void *)((uintptr_t)ptr + size), 0x5a, oldsize - size);
2481 	else if (opt_zero && size > oldsize)
2482 		memset((void *)((uintptr_t)ptr + oldsize), 0, size - oldsize);
2483 	return (ptr);
2484 }
2485 
2486 static void
2487 arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr)
2488 {
2489 	unsigned pageind;
2490 	arena_chunk_map_t *mapelm;
2491 	size_t size;
2492 
2493 	assert(arena != NULL);
2494 	assert(arena->magic == ARENA_MAGIC);
2495 	assert(chunk->arena == arena);
2496 	assert(ptr != NULL);
2497 	assert(CHUNK_ADDR2BASE(ptr) != ptr);
2498 
2499 	pageind = (unsigned)(((uintptr_t)ptr - (uintptr_t)chunk) >>
2500 	    pagesize_2pow);
2501 	mapelm = &chunk->map[pageind];
2502 	if (mapelm->pos != 0 || ptr != (char *)((uintptr_t)chunk) + (pageind <<
2503 	    pagesize_2pow)) {
2504 		arena_run_t *run;
2505 		arena_bin_t *bin;
2506 
2507 		/* Small allocation. */
2508 
2509 		pageind -= mapelm->pos;
2510 
2511 		run = (arena_run_t *)((uintptr_t)chunk + (pageind <<
2512 		    pagesize_2pow));
2513 		assert(run->magic == ARENA_RUN_MAGIC);
2514 		bin = run->bin;
2515 		size = bin->reg_size;
2516 
2517 		if (opt_junk)
2518 			memset(ptr, 0x5a, size);
2519 
2520 		malloc_mutex_lock(&arena->mtx);
2521 		arena_run_reg_dalloc(run, bin, ptr, size);
2522 		run->nfree++;
2523 
2524 		if (run->nfree == bin->nregs) {
2525 			/* Deallocate run. */
2526 			if (run == bin->runcur)
2527 				bin->runcur = NULL;
2528 			else if (bin->nregs != 1) {
2529 				/*
2530 				 * This block's conditional is necessary because
2531 				 * if the run only contains one region, then it
2532 				 * never gets inserted into the non-full runs
2533 				 * tree.
2534 				 */
2535 				/* LINTED */
2536 				RB_REMOVE(arena_run_tree_s, &bin->runs, run);
2537 			}
2538 #ifdef MALLOC_DEBUG
2539 			run->magic = 0;
2540 #endif
2541 			arena_run_dalloc(arena, run, bin->run_size);
2542 #ifdef MALLOC_STATS
2543 			bin->stats.curruns--;
2544 #endif
2545 		} else if (run->nfree == 1 && run != bin->runcur) {
2546 			/*
2547 			 * Make sure that bin->runcur always refers to the
2548 			 * lowest non-full run, if one exists.
2549 			 */
2550 			if (bin->runcur == NULL)
2551 				bin->runcur = run;
2552 			else if ((uintptr_t)run < (uintptr_t)bin->runcur) {
2553 				/* Switch runcur. */
2554 				if (bin->runcur->nfree > 0) {
2555 					/* Insert runcur. */
2556 					/* LINTED */
2557 					RB_INSERT(arena_run_tree_s, &bin->runs,
2558 					    bin->runcur);
2559 				}
2560 				bin->runcur = run;
2561 			} else {
2562 				/* LINTED */
2563 				RB_INSERT(arena_run_tree_s, &bin->runs, run);
2564 			}
2565 		}
2566 #ifdef MALLOC_STATS
2567 		arena->stats.allocated_small -= size;
2568 		arena->stats.ndalloc_small++;
2569 #endif
2570 	} else {
2571 		/* Large allocation. */
2572 
2573 		size = mapelm->npages << pagesize_2pow;
2574 		assert((((uintptr_t)ptr) & pagesize_mask) == 0);
2575 
2576 		if (opt_junk)
2577 			memset(ptr, 0x5a, size);
2578 
2579 		malloc_mutex_lock(&arena->mtx);
2580 		arena_run_dalloc(arena, (arena_run_t *)ptr, size);
2581 #ifdef MALLOC_STATS
2582 		arena->stats.allocated_large -= size;
2583 		arena->stats.ndalloc_large++;
2584 #endif
2585 	}
2586 
2587 	malloc_mutex_unlock(&arena->mtx);
2588 }
2589 
2590 static bool
2591 arena_new(arena_t *arena)
2592 {
2593 	unsigned i;
2594 	arena_bin_t *bin;
2595 	size_t prev_run_size;
2596 
2597 	malloc_mutex_init(&arena->mtx);
2598 
2599 #ifdef MALLOC_STATS
2600 	memset(&arena->stats, 0, sizeof(arena_stats_t));
2601 #endif
2602 
2603 	/* Initialize chunks. */
2604 	RB_INIT(&arena->chunks);
2605 	arena->spare = NULL;
2606 
2607 	/* Initialize bins. */
2608 	prev_run_size = pagesize;
2609 
2610 	/* (2^n)-spaced tiny bins. */
2611 	for (i = 0; i < ntbins; i++) {
2612 		bin = &arena->bins[i];
2613 		bin->runcur = NULL;
2614 		RB_INIT(&bin->runs);
2615 
2616 		bin->reg_size = (1 << (TINY_MIN_2POW + i));
2617 		prev_run_size = arena_bin_run_size_calc(bin, prev_run_size);
2618 
2619 #ifdef MALLOC_STATS
2620 		memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
2621 #endif
2622 	}
2623 
2624 	/* Quantum-spaced bins. */
2625 	for (; i < ntbins + nqbins; i++) {
2626 		bin = &arena->bins[i];
2627 		bin->runcur = NULL;
2628 		RB_INIT(&bin->runs);
2629 
2630 		bin->reg_size = quantum * (i - ntbins + 1);
2631 /*
2632 		pow2_size = pow2_ceil(quantum * (i - ntbins + 1));
2633 */
2634 		prev_run_size = arena_bin_run_size_calc(bin, prev_run_size);
2635 
2636 #ifdef MALLOC_STATS
2637 		memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
2638 #endif
2639 	}
2640 
2641 	/* (2^n)-spaced sub-page bins. */
2642 	for (; i < ntbins + nqbins + nsbins; i++) {
2643 		bin = &arena->bins[i];
2644 		bin->runcur = NULL;
2645 		RB_INIT(&bin->runs);
2646 
2647 		bin->reg_size = (small_max << (i - (ntbins + nqbins) + 1));
2648 
2649 		prev_run_size = arena_bin_run_size_calc(bin, prev_run_size);
2650 
2651 #ifdef MALLOC_STATS
2652 		memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
2653 #endif
2654 	}
2655 
2656 #ifdef MALLOC_DEBUG
2657 	arena->magic = ARENA_MAGIC;
2658 #endif
2659 
2660 	return (false);
2661 }
2662 
2663 /* Create a new arena and insert it into the arenas array at index ind. */
2664 static arena_t *
2665 arenas_extend(unsigned ind)
2666 {
2667 	arena_t *ret;
2668 
2669 	/* Allocate enough space for trailing bins. */
2670 	ret = (arena_t *)base_alloc(sizeof(arena_t)
2671 	    + (sizeof(arena_bin_t) * (ntbins + nqbins + nsbins - 1)));
2672 	if (ret != NULL && arena_new(ret) == false) {
2673 		arenas[ind] = ret;
2674 		return (ret);
2675 	}
2676 	/* Only reached if there is an OOM error. */
2677 
2678 	/*
2679 	 * OOM here is quite inconvenient to propagate, since dealing with it
2680 	 * would require a check for failure in the fast path.  Instead, punt
2681 	 * by using arenas[0].  In practice, this is an extremely unlikely
2682 	 * failure.
2683 	 */
2684 	_malloc_message(getprogname(),
2685 	    ": (malloc) Error initializing arena\n", "", "");
2686 	if (opt_abort)
2687 		abort();
2688 
2689 	return (arenas[0]);
2690 }
2691 
2692 /*
2693  * End arena.
2694  */
2695 /******************************************************************************/
2696 /*
2697  * Begin general internal functions.
2698  */
2699 
2700 static void *
2701 huge_malloc(size_t size)
2702 {
2703 	void *ret;
2704 	size_t csize;
2705 	chunk_node_t *node;
2706 
2707 	/* Allocate one or more contiguous chunks for this request. */
2708 
2709 	csize = CHUNK_CEILING(size);
2710 	if (csize == 0) {
2711 		/* size is large enough to cause size_t wrap-around. */
2712 		return (NULL);
2713 	}
2714 
2715 	/* Allocate a chunk node with which to track the chunk. */
2716 	node = base_chunk_node_alloc();
2717 	if (node == NULL)
2718 		return (NULL);
2719 
2720 	ret = chunk_alloc(csize);
2721 	if (ret == NULL) {
2722 		base_chunk_node_dealloc(node);
2723 		return (NULL);
2724 	}
2725 
2726 	/* Insert node into huge. */
2727 	node->chunk = ret;
2728 	node->size = csize;
2729 
2730 	malloc_mutex_lock(&chunks_mtx);
2731 	RB_INSERT(chunk_tree_s, &huge, node);
2732 #ifdef MALLOC_STATS
2733 	huge_nmalloc++;
2734 	huge_allocated += csize;
2735 #endif
2736 	malloc_mutex_unlock(&chunks_mtx);
2737 
2738 	if (opt_junk)
2739 		memset(ret, 0xa5, csize);
2740 	else if (opt_zero)
2741 		memset(ret, 0, csize);
2742 
2743 	return (ret);
2744 }
2745 
2746 /* Only handles large allocations that require more than chunk alignment. */
2747 static void *
2748 huge_palloc(size_t alignment, size_t size)
2749 {
2750 	void *ret;
2751 	size_t alloc_size, chunk_size, offset;
2752 	chunk_node_t *node;
2753 
2754 	/*
2755 	 * This allocation requires alignment that is even larger than chunk
2756 	 * alignment.  This means that huge_malloc() isn't good enough.
2757 	 *
2758 	 * Allocate almost twice as many chunks as are demanded by the size or
2759 	 * alignment, in order to assure the alignment can be achieved, then
2760 	 * unmap leading and trailing chunks.
2761 	 */
2762 	assert(alignment >= chunksize);
2763 
2764 	chunk_size = CHUNK_CEILING(size);
2765 
2766 	if (size >= alignment)
2767 		alloc_size = chunk_size + alignment - chunksize;
2768 	else
2769 		alloc_size = (alignment << 1) - chunksize;
2770 
2771 	/* Allocate a chunk node with which to track the chunk. */
2772 	node = base_chunk_node_alloc();
2773 	if (node == NULL)
2774 		return (NULL);
2775 
2776 	ret = chunk_alloc(alloc_size);
2777 	if (ret == NULL) {
2778 		base_chunk_node_dealloc(node);
2779 		return (NULL);
2780 	}
2781 
2782 	offset = (uintptr_t)ret & (alignment - 1);
2783 	assert((offset & chunksize_mask) == 0);
2784 	assert(offset < alloc_size);
2785 	if (offset == 0) {
2786 		/* Trim trailing space. */
2787 		chunk_dealloc((void *)((uintptr_t)ret + chunk_size), alloc_size
2788 		    - chunk_size);
2789 	} else {
2790 		size_t trailsize;
2791 
2792 		/* Trim leading space. */
2793 		chunk_dealloc(ret, alignment - offset);
2794 
2795 		ret = (void *)((uintptr_t)ret + (alignment - offset));
2796 
2797 		trailsize = alloc_size - (alignment - offset) - chunk_size;
2798 		if (trailsize != 0) {
2799 		    /* Trim trailing space. */
2800 		    assert(trailsize < alloc_size);
2801 		    chunk_dealloc((void *)((uintptr_t)ret + chunk_size),
2802 			trailsize);
2803 		}
2804 	}
2805 
2806 	/* Insert node into huge. */
2807 	node->chunk = ret;
2808 	node->size = chunk_size;
2809 
2810 	malloc_mutex_lock(&chunks_mtx);
2811 	RB_INSERT(chunk_tree_s, &huge, node);
2812 #ifdef MALLOC_STATS
2813 	huge_nmalloc++;
2814 	huge_allocated += chunk_size;
2815 #endif
2816 	malloc_mutex_unlock(&chunks_mtx);
2817 
2818 	if (opt_junk)
2819 		memset(ret, 0xa5, chunk_size);
2820 	else if (opt_zero)
2821 		memset(ret, 0, chunk_size);
2822 
2823 	return (ret);
2824 }
2825 
2826 static void *
2827 huge_ralloc(void *ptr, size_t size, size_t oldsize)
2828 {
2829 	void *ret;
2830 
2831 	/* Avoid moving the allocation if the size class would not change. */
2832 	if (oldsize > arena_maxclass &&
2833 	    CHUNK_CEILING(size) == CHUNK_CEILING(oldsize)) {
2834 		if (opt_junk && size < oldsize) {
2835 			memset((void *)((uintptr_t)ptr + size), 0x5a, oldsize
2836 			    - size);
2837 		} else if (opt_zero && size > oldsize) {
2838 			memset((void *)((uintptr_t)ptr + oldsize), 0, size
2839 			    - oldsize);
2840 		}
2841 		return (ptr);
2842 	}
2843 
2844 	if (CHUNK_ADDR2BASE(ptr) == ptr
2845 #ifdef USE_BRK
2846 	    && ((uintptr_t)ptr < (uintptr_t)brk_base
2847 	    || (uintptr_t)ptr >= (uintptr_t)brk_max)
2848 #endif
2849 	    ) {
2850 		chunk_node_t *node, key;
2851 		void *newptr;
2852 		size_t oldcsize;
2853 		size_t newcsize;
2854 
2855 		newcsize = CHUNK_CEILING(size);
2856 		oldcsize = CHUNK_CEILING(oldsize);
2857 		assert(oldcsize != newcsize);
2858 		if (newcsize == 0) {
2859 			/* size_t wrap-around */
2860 			return (NULL);
2861 		}
2862 
2863 		/*
2864 		 * Remove the old region from the tree now.  If mremap()
2865 		 * returns the region to the system, other thread may
2866 		 * map it for same huge allocation and insert it to the
2867 		 * tree before we acquire the mutex lock again.
2868 		 */
2869 		malloc_mutex_lock(&chunks_mtx);
2870 		key.chunk = __DECONST(void *, ptr);
2871 		/* LINTED */
2872 		node = RB_FIND(chunk_tree_s, &huge, &key);
2873 		assert(node != NULL);
2874 		assert(node->chunk == ptr);
2875 		assert(node->size == oldcsize);
2876 		RB_REMOVE(chunk_tree_s, &huge, node);
2877 		malloc_mutex_unlock(&chunks_mtx);
2878 
2879 		newptr = mremap(ptr, oldcsize, NULL, newcsize,
2880 		    MAP_ALIGNED(chunksize_2pow));
2881 		if (newptr == MAP_FAILED) {
2882 			/* We still own the old region. */
2883 			malloc_mutex_lock(&chunks_mtx);
2884 			RB_INSERT(chunk_tree_s, &huge, node);
2885 			malloc_mutex_unlock(&chunks_mtx);
2886 		} else {
2887 			assert(CHUNK_ADDR2BASE(newptr) == newptr);
2888 
2889 			/* Insert new or resized old region. */
2890 			malloc_mutex_lock(&chunks_mtx);
2891 			node->size = newcsize;
2892 			node->chunk = newptr;
2893 			RB_INSERT(chunk_tree_s, &huge, node);
2894 #ifdef MALLOC_STATS
2895 			huge_nralloc++;
2896 			huge_allocated += newcsize - oldcsize;
2897 			if (newcsize > oldcsize) {
2898 				stats_chunks.curchunks +=
2899 				    (newcsize - oldcsize) / chunksize;
2900 				if (stats_chunks.curchunks >
2901 				    stats_chunks.highchunks)
2902 					stats_chunks.highchunks =
2903 					    stats_chunks.curchunks;
2904 			} else {
2905 				stats_chunks.curchunks -=
2906 				    (oldcsize - newcsize) / chunksize;
2907 			}
2908 #endif
2909 			malloc_mutex_unlock(&chunks_mtx);
2910 
2911 			if (opt_junk && size < oldsize) {
2912 				memset((void *)((uintptr_t)newptr + size), 0x5a,
2913 				    newcsize - size);
2914 			} else if (opt_zero && size > oldsize) {
2915 				memset((void *)((uintptr_t)newptr + oldsize), 0,
2916 				    size - oldsize);
2917 			}
2918 			return (newptr);
2919 		}
2920 	}
2921 
2922 	/*
2923 	 * If we get here, then size and oldsize are different enough that we
2924 	 * need to use a different size class.  In that case, fall back to
2925 	 * allocating new space and copying.
2926 	 */
2927 	ret = huge_malloc(size);
2928 	if (ret == NULL)
2929 		return (NULL);
2930 
2931 	if (CHUNK_ADDR2BASE(ptr) == ptr) {
2932 		/* The old allocation is a chunk. */
2933 		if (size < oldsize)
2934 			memcpy(ret, ptr, size);
2935 		else
2936 			memcpy(ret, ptr, oldsize);
2937 	} else {
2938 		/* The old allocation is a region. */
2939 		assert(oldsize < size);
2940 		memcpy(ret, ptr, oldsize);
2941 	}
2942 	idalloc(ptr);
2943 	return (ret);
2944 }
2945 
2946 static void
2947 huge_dalloc(void *ptr)
2948 {
2949 	chunk_node_t key;
2950 	chunk_node_t *node;
2951 
2952 	malloc_mutex_lock(&chunks_mtx);
2953 
2954 	/* Extract from tree of huge allocations. */
2955 	key.chunk = ptr;
2956 	/* LINTED */
2957 	node = RB_FIND(chunk_tree_s, &huge, &key);
2958 	assert(node != NULL);
2959 	assert(node->chunk == ptr);
2960 	/* LINTED */
2961 	RB_REMOVE(chunk_tree_s, &huge, node);
2962 
2963 #ifdef MALLOC_STATS
2964 	huge_ndalloc++;
2965 	huge_allocated -= node->size;
2966 #endif
2967 
2968 	malloc_mutex_unlock(&chunks_mtx);
2969 
2970 	/* Unmap chunk. */
2971 #ifdef USE_BRK
2972 	if (opt_junk)
2973 		memset(node->chunk, 0x5a, node->size);
2974 #endif
2975 	chunk_dealloc(node->chunk, node->size);
2976 
2977 	base_chunk_node_dealloc(node);
2978 }
2979 
2980 static void *
2981 imalloc(size_t size)
2982 {
2983 	void *ret;
2984 
2985 	assert(size != 0);
2986 
2987 	if (size <= arena_maxclass)
2988 		ret = arena_malloc(choose_arena(), size);
2989 	else
2990 		ret = huge_malloc(size);
2991 
2992 	return (ret);
2993 }
2994 
2995 static void *
2996 ipalloc(size_t alignment, size_t size)
2997 {
2998 	void *ret;
2999 	size_t ceil_size;
3000 
3001 	/*
3002 	 * Round size up to the nearest multiple of alignment.
3003 	 *
3004 	 * This done, we can take advantage of the fact that for each small
3005 	 * size class, every object is aligned at the smallest power of two
3006 	 * that is non-zero in the base two representation of the size.  For
3007 	 * example:
3008 	 *
3009 	 *   Size |   Base 2 | Minimum alignment
3010 	 *   -----+----------+------------------
3011 	 *     96 |  1100000 |  32
3012 	 *    144 | 10100000 |  32
3013 	 *    192 | 11000000 |  64
3014 	 *
3015 	 * Depending on runtime settings, it is possible that arena_malloc()
3016 	 * will further round up to a power of two, but that never causes
3017 	 * correctness issues.
3018 	 */
3019 	ceil_size = (size + (alignment - 1)) & (-alignment);
3020 	/*
3021 	 * (ceil_size < size) protects against the combination of maximal
3022 	 * alignment and size greater than maximal alignment.
3023 	 */
3024 	if (ceil_size < size) {
3025 		/* size_t overflow. */
3026 		return (NULL);
3027 	}
3028 
3029 	if (ceil_size <= pagesize || (alignment <= pagesize
3030 	    && ceil_size <= arena_maxclass))
3031 		ret = arena_malloc(choose_arena(), ceil_size);
3032 	else {
3033 		size_t run_size;
3034 
3035 		/*
3036 		 * We can't achieve sub-page alignment, so round up alignment
3037 		 * permanently; it makes later calculations simpler.
3038 		 */
3039 		alignment = PAGE_CEILING(alignment);
3040 		ceil_size = PAGE_CEILING(size);
3041 		/*
3042 		 * (ceil_size < size) protects against very large sizes within
3043 		 * pagesize of SIZE_T_MAX.
3044 		 *
3045 		 * (ceil_size + alignment < ceil_size) protects against the
3046 		 * combination of maximal alignment and ceil_size large enough
3047 		 * to cause overflow.  This is similar to the first overflow
3048 		 * check above, but it needs to be repeated due to the new
3049 		 * ceil_size value, which may now be *equal* to maximal
3050 		 * alignment, whereas before we only detected overflow if the
3051 		 * original size was *greater* than maximal alignment.
3052 		 */
3053 		if (ceil_size < size || ceil_size + alignment < ceil_size) {
3054 			/* size_t overflow. */
3055 			return (NULL);
3056 		}
3057 
3058 		/*
3059 		 * Calculate the size of the over-size run that arena_palloc()
3060 		 * would need to allocate in order to guarantee the alignment.
3061 		 */
3062 		if (ceil_size >= alignment)
3063 			run_size = ceil_size + alignment - pagesize;
3064 		else {
3065 			/*
3066 			 * It is possible that (alignment << 1) will cause
3067 			 * overflow, but it doesn't matter because we also
3068 			 * subtract pagesize, which in the case of overflow
3069 			 * leaves us with a very large run_size.  That causes
3070 			 * the first conditional below to fail, which means
3071 			 * that the bogus run_size value never gets used for
3072 			 * anything important.
3073 			 */
3074 			run_size = (alignment << 1) - pagesize;
3075 		}
3076 
3077 		if (run_size <= arena_maxclass) {
3078 			ret = arena_palloc(choose_arena(), alignment, ceil_size,
3079 			    run_size);
3080 		} else if (alignment <= chunksize)
3081 			ret = huge_malloc(ceil_size);
3082 		else
3083 			ret = huge_palloc(alignment, ceil_size);
3084 	}
3085 
3086 	assert(((uintptr_t)ret & (alignment - 1)) == 0);
3087 	return (ret);
3088 }
3089 
3090 static void *
3091 icalloc(size_t size)
3092 {
3093 	void *ret;
3094 
3095 	if (size <= arena_maxclass) {
3096 		ret = arena_malloc(choose_arena(), size);
3097 		if (ret == NULL)
3098 			return (NULL);
3099 		memset(ret, 0, size);
3100 	} else {
3101 		/*
3102 		 * The virtual memory system provides zero-filled pages, so
3103 		 * there is no need to do so manually, unless opt_junk is
3104 		 * enabled, in which case huge_malloc() fills huge allocations
3105 		 * with junk.
3106 		 */
3107 		ret = huge_malloc(size);
3108 		if (ret == NULL)
3109 			return (NULL);
3110 
3111 		if (opt_junk)
3112 			memset(ret, 0, size);
3113 #ifdef USE_BRK
3114 		else if ((uintptr_t)ret >= (uintptr_t)brk_base
3115 		    && (uintptr_t)ret < (uintptr_t)brk_max) {
3116 			/*
3117 			 * This may be a re-used brk chunk.  Therefore, zero
3118 			 * the memory.
3119 			 */
3120 			memset(ret, 0, size);
3121 		}
3122 #endif
3123 	}
3124 
3125 	return (ret);
3126 }
3127 
3128 static size_t
3129 isalloc(const void *ptr)
3130 {
3131 	size_t ret;
3132 	arena_chunk_t *chunk;
3133 
3134 	assert(ptr != NULL);
3135 
3136 	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
3137 	if (chunk != ptr) {
3138 		/* Region. */
3139 		assert(chunk->arena->magic == ARENA_MAGIC);
3140 
3141 		ret = arena_salloc(ptr);
3142 	} else {
3143 		chunk_node_t *node, key;
3144 
3145 		/* Chunk (huge allocation). */
3146 
3147 		malloc_mutex_lock(&chunks_mtx);
3148 
3149 		/* Extract from tree of huge allocations. */
3150 		key.chunk = __DECONST(void *, ptr);
3151 		/* LINTED */
3152 		node = RB_FIND(chunk_tree_s, &huge, &key);
3153 		assert(node != NULL);
3154 
3155 		ret = node->size;
3156 
3157 		malloc_mutex_unlock(&chunks_mtx);
3158 	}
3159 
3160 	return (ret);
3161 }
3162 
3163 static void *
3164 iralloc(void *ptr, size_t size)
3165 {
3166 	void *ret;
3167 	size_t oldsize;
3168 
3169 	assert(ptr != NULL);
3170 	assert(size != 0);
3171 
3172 	oldsize = isalloc(ptr);
3173 
3174 	if (size <= arena_maxclass)
3175 		ret = arena_ralloc(ptr, size, oldsize);
3176 	else
3177 		ret = huge_ralloc(ptr, size, oldsize);
3178 
3179 	return (ret);
3180 }
3181 
3182 static void
3183 idalloc(void *ptr)
3184 {
3185 	arena_chunk_t *chunk;
3186 
3187 	assert(ptr != NULL);
3188 
3189 	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
3190 	if (chunk != ptr) {
3191 		/* Region. */
3192 		arena_dalloc(chunk->arena, chunk, ptr);
3193 	} else
3194 		huge_dalloc(ptr);
3195 }
3196 
3197 static void
3198 malloc_print_stats(void)
3199 {
3200 
3201 	if (opt_print_stats) {
3202 		char s[UMAX2S_BUFSIZE];
3203 		_malloc_message("___ Begin malloc statistics ___\n", "", "",
3204 		    "");
3205 		_malloc_message("Assertions ",
3206 #ifdef NDEBUG
3207 		    "disabled",
3208 #else
3209 		    "enabled",
3210 #endif
3211 		    "\n", "");
3212 		_malloc_message("Boolean MALLOC_OPTIONS: ",
3213 		    opt_abort ? "A" : "a",
3214 		    opt_junk ? "J" : "j",
3215 		    opt_hint ? "H" : "h");
3216 		_malloc_message(opt_utrace ? "PU" : "Pu",
3217 		    opt_sysv ? "V" : "v",
3218 		    opt_xmalloc ? "X" : "x",
3219 		    opt_zero ? "Z\n" : "z\n");
3220 
3221 		_malloc_message("CPUs: ", size_t2s(ncpus, s), "\n", "");
3222 		_malloc_message("Max arenas: ", size_t2s(narenas, s), "\n", "");
3223 		_malloc_message("Pointer size: ", size_t2s(sizeof(void *), s),
3224 		    "\n", "");
3225 		_malloc_message("Quantum size: ", size_t2s(quantum, s), "\n", "");
3226 		_malloc_message("Max small size: ", size_t2s(small_max, s), "\n",
3227 		    "");
3228 
3229 		_malloc_message("Chunk size: ", size_t2s(chunksize, s), "", "");
3230 		_malloc_message(" (2^", size_t2s((size_t)opt_chunk_2pow, s),
3231 		    ")\n", "");
3232 
3233 #ifdef MALLOC_STATS
3234 		{
3235 			size_t allocated, mapped;
3236 			unsigned i;
3237 			arena_t *arena;
3238 
3239 			/* Calculate and print allocated/mapped stats. */
3240 
3241 			/* arenas. */
3242 			for (i = 0, allocated = 0; i < narenas; i++) {
3243 				if (arenas[i] != NULL) {
3244 					malloc_mutex_lock(&arenas[i]->mtx);
3245 					allocated +=
3246 					    arenas[i]->stats.allocated_small;
3247 					allocated +=
3248 					    arenas[i]->stats.allocated_large;
3249 					malloc_mutex_unlock(&arenas[i]->mtx);
3250 				}
3251 			}
3252 
3253 			/* huge/base. */
3254 			malloc_mutex_lock(&chunks_mtx);
3255 			allocated += huge_allocated;
3256 			mapped = stats_chunks.curchunks * chunksize;
3257 			malloc_mutex_unlock(&chunks_mtx);
3258 
3259 			malloc_mutex_lock(&base_mtx);
3260 			mapped += base_mapped;
3261 			malloc_mutex_unlock(&base_mtx);
3262 
3263 			malloc_printf("Allocated: %zu, mapped: %zu\n",
3264 			    allocated, mapped);
3265 
3266 			/* Print chunk stats. */
3267 			{
3268 				chunk_stats_t chunks_stats;
3269 
3270 				malloc_mutex_lock(&chunks_mtx);
3271 				chunks_stats = stats_chunks;
3272 				malloc_mutex_unlock(&chunks_mtx);
3273 
3274 				malloc_printf("chunks: nchunks   "
3275 				    "highchunks    curchunks\n");
3276 				malloc_printf("  %13llu%13lu%13lu\n",
3277 				    chunks_stats.nchunks,
3278 				    chunks_stats.highchunks,
3279 				    chunks_stats.curchunks);
3280 			}
3281 
3282 			/* Print chunk stats. */
3283 			malloc_printf(
3284 			    "huge: nmalloc      ndalloc      "
3285 			    "nralloc    allocated\n");
3286 			malloc_printf(" %12llu %12llu %12llu %12zu\n",
3287 			    huge_nmalloc, huge_ndalloc, huge_nralloc,
3288 			    huge_allocated);
3289 
3290 			/* Print stats for each arena. */
3291 			for (i = 0; i < narenas; i++) {
3292 				arena = arenas[i];
3293 				if (arena != NULL) {
3294 					malloc_printf(
3295 					    "\narenas[%u] @ %p\n", i, arena);
3296 					malloc_mutex_lock(&arena->mtx);
3297 					stats_print(arena);
3298 					malloc_mutex_unlock(&arena->mtx);
3299 				}
3300 			}
3301 		}
3302 #endif /* #ifdef MALLOC_STATS */
3303 		_malloc_message("--- End malloc statistics ---\n", "", "", "");
3304 	}
3305 }
3306 
3307 /*
3308  * FreeBSD's pthreads implementation calls malloc(3), so the malloc
3309  * implementation has to take pains to avoid infinite recursion during
3310  * initialization.
3311  */
3312 static inline bool
3313 malloc_init(void)
3314 {
3315 
3316 	if (malloc_initialized == false)
3317 		return (malloc_init_hard());
3318 
3319 	return (false);
3320 }
3321 
3322 static bool
3323 malloc_init_hard(void)
3324 {
3325 	unsigned i, j;
3326 	ssize_t linklen;
3327 	char buf[PATH_MAX + 1];
3328 	const char *opts = "";
3329 	int serrno;
3330 
3331 	malloc_mutex_lock(&init_lock);
3332 	if (malloc_initialized) {
3333 		/*
3334 		 * Another thread initialized the allocator before this one
3335 		 * acquired init_lock.
3336 		 */
3337 		malloc_mutex_unlock(&init_lock);
3338 		return (false);
3339 	}
3340 
3341 	serrno = errno;
3342 	/* Get number of CPUs. */
3343 	{
3344 		int mib[2];
3345 		size_t len;
3346 
3347 		mib[0] = CTL_HW;
3348 		mib[1] = HW_NCPU;
3349 		len = sizeof(ncpus);
3350 		if (sysctl(mib, 2, &ncpus, &len, (void *) 0, 0) == -1) {
3351 			/* Error. */
3352 			ncpus = 1;
3353 		}
3354 	}
3355 
3356 	/* Get page size. */
3357 	{
3358 		long result;
3359 
3360 		result = sysconf(_SC_PAGESIZE);
3361 		assert(result != -1);
3362 		pagesize = (unsigned) result;
3363 
3364 		/*
3365 		 * We assume that pagesize is a power of 2 when calculating
3366 		 * pagesize_mask and pagesize_2pow.
3367 		 */
3368 		assert(((result - 1) & result) == 0);
3369 		pagesize_mask = result - 1;
3370 		pagesize_2pow = ffs((int)result) - 1;
3371 	}
3372 
3373 	for (i = 0; i < 3; i++) {
3374 		/* Get runtime configuration. */
3375 		switch (i) {
3376 		case 0:
3377 			if ((linklen = readlink("/etc/malloc.conf", buf,
3378 						sizeof(buf) - 1)) != -1) {
3379 				/*
3380 				 * Use the contents of the "/etc/malloc.conf"
3381 				 * symbolic link's name.
3382 				 */
3383 				buf[linklen] = '\0';
3384 				opts = buf;
3385 			} else {
3386 				/* No configuration specified. */
3387 				buf[0] = '\0';
3388 				opts = buf;
3389 			}
3390 			break;
3391 		case 1:
3392 			if ((opts = getenv("MALLOC_OPTIONS")) != NULL &&
3393 			    issetugid() == 0) {
3394 				/*
3395 				 * Do nothing; opts is already initialized to
3396 				 * the value of the MALLOC_OPTIONS environment
3397 				 * variable.
3398 				 */
3399 			} else {
3400 				/* No configuration specified. */
3401 				buf[0] = '\0';
3402 				opts = buf;
3403 			}
3404 			break;
3405 		case 2:
3406 			if (_malloc_options != NULL) {
3407 			    /*
3408 			     * Use options that were compiled into the program.
3409 			     */
3410 			    opts = _malloc_options;
3411 			} else {
3412 				/* No configuration specified. */
3413 				buf[0] = '\0';
3414 				opts = buf;
3415 			}
3416 			break;
3417 		default:
3418 			/* NOTREACHED */
3419 			/* LINTED */
3420 			assert(false);
3421 		}
3422 
3423 		for (j = 0; opts[j] != '\0'; j++) {
3424 			switch (opts[j]) {
3425 			case 'a':
3426 				opt_abort = false;
3427 				break;
3428 			case 'A':
3429 				opt_abort = true;
3430 				break;
3431 			case 'h':
3432 				opt_hint = false;
3433 				break;
3434 			case 'H':
3435 				opt_hint = true;
3436 				break;
3437 			case 'j':
3438 				opt_junk = false;
3439 				break;
3440 			case 'J':
3441 				opt_junk = true;
3442 				break;
3443 			case 'k':
3444 				/*
3445 				 * Chunks always require at least one header
3446 				 * page, so chunks can never be smaller than
3447 				 * two pages.
3448 				 */
3449 				if (opt_chunk_2pow > pagesize_2pow + 1)
3450 					opt_chunk_2pow--;
3451 				break;
3452 			case 'K':
3453 				if (opt_chunk_2pow + 1 <
3454 				    (int)(sizeof(size_t) << 3))
3455 					opt_chunk_2pow++;
3456 				break;
3457 			case 'n':
3458 				opt_narenas_lshift--;
3459 				break;
3460 			case 'N':
3461 				opt_narenas_lshift++;
3462 				break;
3463 			case 'p':
3464 				opt_print_stats = false;
3465 				break;
3466 			case 'P':
3467 				opt_print_stats = true;
3468 				break;
3469 			case 'q':
3470 				if (opt_quantum_2pow > QUANTUM_2POW_MIN)
3471 					opt_quantum_2pow--;
3472 				break;
3473 			case 'Q':
3474 				if (opt_quantum_2pow < pagesize_2pow - 1)
3475 					opt_quantum_2pow++;
3476 				break;
3477 			case 's':
3478 				if (opt_small_max_2pow > QUANTUM_2POW_MIN)
3479 					opt_small_max_2pow--;
3480 				break;
3481 			case 'S':
3482 				if (opt_small_max_2pow < pagesize_2pow - 1)
3483 					opt_small_max_2pow++;
3484 				break;
3485 			case 'u':
3486 				opt_utrace = false;
3487 				break;
3488 			case 'U':
3489 				opt_utrace = true;
3490 				break;
3491 			case 'v':
3492 				opt_sysv = false;
3493 				break;
3494 			case 'V':
3495 				opt_sysv = true;
3496 				break;
3497 			case 'x':
3498 				opt_xmalloc = false;
3499 				break;
3500 			case 'X':
3501 				opt_xmalloc = true;
3502 				break;
3503 			case 'z':
3504 				opt_zero = false;
3505 				break;
3506 			case 'Z':
3507 				opt_zero = true;
3508 				break;
3509 			default: {
3510 				char cbuf[2];
3511 
3512 				cbuf[0] = opts[j];
3513 				cbuf[1] = '\0';
3514 				_malloc_message(getprogname(),
3515 				    ": (malloc) Unsupported character in "
3516 				    "malloc options: '", cbuf, "'\n");
3517 			}
3518 			}
3519 		}
3520 	}
3521 	errno = serrno;
3522 
3523 	/* Take care to call atexit() only once. */
3524 	if (opt_print_stats) {
3525 		/* Print statistics at exit. */
3526 		atexit(malloc_print_stats);
3527 	}
3528 
3529 	/* Set variables according to the value of opt_small_max_2pow. */
3530 	if (opt_small_max_2pow < opt_quantum_2pow)
3531 		opt_small_max_2pow = opt_quantum_2pow;
3532 	small_max = (1 << opt_small_max_2pow);
3533 
3534 	/* Set bin-related variables. */
3535 	bin_maxclass = (pagesize >> 1);
3536 	assert(opt_quantum_2pow >= TINY_MIN_2POW);
3537 	ntbins = (unsigned)(opt_quantum_2pow - TINY_MIN_2POW);
3538 	assert(ntbins <= opt_quantum_2pow);
3539 	nqbins = (unsigned)(small_max >> opt_quantum_2pow);
3540 	nsbins = (unsigned)(pagesize_2pow - opt_small_max_2pow - 1);
3541 
3542 	/* Set variables according to the value of opt_quantum_2pow. */
3543 	quantum = (1 << opt_quantum_2pow);
3544 	quantum_mask = quantum - 1;
3545 	if (ntbins > 0)
3546 		small_min = (quantum >> 1) + 1;
3547 	else
3548 		small_min = 1;
3549 	assert(small_min <= quantum);
3550 
3551 	/* Set variables according to the value of opt_chunk_2pow. */
3552 	chunksize = (1LU << opt_chunk_2pow);
3553 	chunksize_mask = chunksize - 1;
3554 	chunksize_2pow = (unsigned)opt_chunk_2pow;
3555 	chunk_npages = (unsigned)(chunksize >> pagesize_2pow);
3556 	{
3557 		unsigned header_size;
3558 
3559 		header_size = (unsigned)(sizeof(arena_chunk_t) +
3560 		    (sizeof(arena_chunk_map_t) * (chunk_npages - 1)));
3561 		arena_chunk_header_npages = (header_size >> pagesize_2pow);
3562 		if ((header_size & pagesize_mask) != 0)
3563 			arena_chunk_header_npages++;
3564 	}
3565 	arena_maxclass = chunksize - (arena_chunk_header_npages <<
3566 	    pagesize_2pow);
3567 
3568 	UTRACE(0, 0, 0);
3569 
3570 #ifdef MALLOC_STATS
3571 	memset(&stats_chunks, 0, sizeof(chunk_stats_t));
3572 #endif
3573 
3574 	/* Various sanity checks that regard configuration. */
3575 	assert(quantum >= sizeof(void *));
3576 	assert(quantum <= pagesize);
3577 	assert(chunksize >= pagesize);
3578 	assert(quantum * 4 <= chunksize);
3579 
3580 	/* Initialize chunks data. */
3581 	malloc_mutex_init(&chunks_mtx);
3582 	RB_INIT(&huge);
3583 #ifdef USE_BRK
3584 	malloc_mutex_init(&brk_mtx);
3585 	brk_base = sbrk(0);
3586 	brk_prev = brk_base;
3587 	brk_max = brk_base;
3588 #endif
3589 #ifdef MALLOC_STATS
3590 	huge_nmalloc = 0;
3591 	huge_ndalloc = 0;
3592 	huge_nralloc = 0;
3593 	huge_allocated = 0;
3594 #endif
3595 	RB_INIT(&old_chunks);
3596 
3597 	/* Initialize base allocation data structures. */
3598 #ifdef MALLOC_STATS
3599 	base_mapped = 0;
3600 #endif
3601 #ifdef USE_BRK
3602 	/*
3603 	 * Allocate a base chunk here, since it doesn't actually have to be
3604 	 * chunk-aligned.  Doing this before allocating any other chunks allows
3605 	 * the use of space that would otherwise be wasted.
3606 	 */
3607 	base_pages_alloc(0);
3608 #endif
3609 	base_chunk_nodes = NULL;
3610 	malloc_mutex_init(&base_mtx);
3611 
3612 	if (ncpus > 1) {
3613 		/*
3614 		 * For SMP systems, create four times as many arenas as there
3615 		 * are CPUs by default.
3616 		 */
3617 		opt_narenas_lshift += 2;
3618 	}
3619 
3620 #ifdef NO_TLS
3621 	/* Initialize arena key. */
3622 	(void)thr_keycreate(&arenas_map_key, NULL);
3623 #endif
3624 
3625 	/* Determine how many arenas to use. */
3626 	narenas = ncpus;
3627 	if (opt_narenas_lshift > 0) {
3628 		if ((narenas << opt_narenas_lshift) > narenas)
3629 			narenas <<= opt_narenas_lshift;
3630 		/*
3631 		 * Make sure not to exceed the limits of what base_malloc()
3632 		 * can handle.
3633 		 */
3634 		if (narenas * sizeof(arena_t *) > chunksize)
3635 			narenas = (unsigned)(chunksize / sizeof(arena_t *));
3636 	} else if (opt_narenas_lshift < 0) {
3637 		if ((narenas << opt_narenas_lshift) < narenas)
3638 			narenas <<= opt_narenas_lshift;
3639 		/* Make sure there is at least one arena. */
3640 		if (narenas == 0)
3641 			narenas = 1;
3642 	}
3643 
3644 	next_arena = 0;
3645 
3646 	/* Allocate and initialize arenas. */
3647 	arenas = (arena_t **)base_alloc(sizeof(arena_t *) * narenas);
3648 	if (arenas == NULL) {
3649 		malloc_mutex_unlock(&init_lock);
3650 		return (true);
3651 	}
3652 	/*
3653 	 * Zero the array.  In practice, this should always be pre-zeroed,
3654 	 * since it was just mmap()ed, but let's be sure.
3655 	 */
3656 	memset(arenas, 0, sizeof(arena_t *) * narenas);
3657 
3658 	/*
3659 	 * Initialize one arena here.  The rest are lazily created in
3660 	 * arena_choose_hard().
3661 	 */
3662 	arenas_extend(0);
3663 	if (arenas[0] == NULL) {
3664 		malloc_mutex_unlock(&init_lock);
3665 		return (true);
3666 	}
3667 
3668 	malloc_mutex_init(&arenas_mtx);
3669 
3670 	malloc_initialized = true;
3671 	malloc_mutex_unlock(&init_lock);
3672 	return (false);
3673 }
3674 
3675 /*
3676  * End general internal functions.
3677  */
3678 /******************************************************************************/
3679 /*
3680  * Begin malloc(3)-compatible functions.
3681  */
3682 
3683 void *
3684 malloc(size_t size)
3685 {
3686 	void *ret;
3687 
3688 	if (malloc_init()) {
3689 		ret = NULL;
3690 		goto RETURN;
3691 	}
3692 
3693 	if (size == 0) {
3694 		if (opt_sysv == false)
3695 			size = 1;
3696 		else {
3697 			ret = NULL;
3698 			goto RETURN;
3699 		}
3700 	}
3701 
3702 	ret = imalloc(size);
3703 
3704 RETURN:
3705 	if (ret == NULL) {
3706 		if (opt_xmalloc) {
3707 			_malloc_message(getprogname(),
3708 			    ": (malloc) Error in malloc(): out of memory\n", "",
3709 			    "");
3710 			abort();
3711 		}
3712 		errno = ENOMEM;
3713 	}
3714 
3715 	UTRACE(0, size, ret);
3716 	return (ret);
3717 }
3718 
3719 int
3720 posix_memalign(void **memptr, size_t alignment, size_t size)
3721 {
3722 	int ret;
3723 	void *result;
3724 
3725 	if (malloc_init())
3726 		result = NULL;
3727 	else {
3728 		/* Make sure that alignment is a large enough power of 2. */
3729 		if (((alignment - 1) & alignment) != 0
3730 		    || alignment < sizeof(void *)) {
3731 			if (opt_xmalloc) {
3732 				_malloc_message(getprogname(),
3733 				    ": (malloc) Error in posix_memalign(): "
3734 				    "invalid alignment\n", "", "");
3735 				abort();
3736 			}
3737 			result = NULL;
3738 			ret = EINVAL;
3739 			goto RETURN;
3740 		}
3741 
3742 		result = ipalloc(alignment, size);
3743 	}
3744 
3745 	if (result == NULL) {
3746 		if (opt_xmalloc) {
3747 			_malloc_message(getprogname(),
3748 			": (malloc) Error in posix_memalign(): out of memory\n",
3749 			"", "");
3750 			abort();
3751 		}
3752 		ret = ENOMEM;
3753 		goto RETURN;
3754 	}
3755 
3756 	*memptr = result;
3757 	ret = 0;
3758 
3759 RETURN:
3760 	UTRACE(0, size, result);
3761 	return (ret);
3762 }
3763 
3764 void *
3765 calloc(size_t num, size_t size)
3766 {
3767 	void *ret;
3768 	size_t num_size;
3769 
3770 	if (malloc_init()) {
3771 		num_size = 0;
3772 		ret = NULL;
3773 		goto RETURN;
3774 	}
3775 
3776 	num_size = num * size;
3777 	if (num_size == 0) {
3778 		if ((opt_sysv == false) && ((num == 0) || (size == 0)))
3779 			num_size = 1;
3780 		else {
3781 			ret = NULL;
3782 			goto RETURN;
3783 		}
3784 	/*
3785 	 * Try to avoid division here.  We know that it isn't possible to
3786 	 * overflow during multiplication if neither operand uses any of the
3787 	 * most significant half of the bits in a size_t.
3788 	 */
3789 	} else if ((unsigned long long)((num | size) &
3790 	   ((unsigned long long)SIZE_T_MAX << (sizeof(size_t) << 2))) &&
3791 	   (num_size / size != num)) {
3792 		/* size_t overflow. */
3793 		ret = NULL;
3794 		goto RETURN;
3795 	}
3796 
3797 	ret = icalloc(num_size);
3798 
3799 RETURN:
3800 	if (ret == NULL) {
3801 		if (opt_xmalloc) {
3802 			_malloc_message(getprogname(),
3803 			    ": (malloc) Error in calloc(): out of memory\n", "",
3804 			    "");
3805 			abort();
3806 		}
3807 		errno = ENOMEM;
3808 	}
3809 
3810 	UTRACE(0, num_size, ret);
3811 	return (ret);
3812 }
3813 
3814 void *
3815 realloc(void *ptr, size_t size)
3816 {
3817 	void *ret;
3818 
3819 	if (size == 0) {
3820 		if (opt_sysv == false)
3821 			size = 1;
3822 		else {
3823 			if (ptr != NULL)
3824 				idalloc(ptr);
3825 			ret = NULL;
3826 			goto RETURN;
3827 		}
3828 	}
3829 
3830 	if (ptr != NULL) {
3831 		assert(malloc_initialized);
3832 
3833 		ret = iralloc(ptr, size);
3834 
3835 		if (ret == NULL) {
3836 			if (opt_xmalloc) {
3837 				_malloc_message(getprogname(),
3838 				    ": (malloc) Error in realloc(): out of "
3839 				    "memory\n", "", "");
3840 				abort();
3841 			}
3842 			errno = ENOMEM;
3843 		}
3844 	} else {
3845 		if (malloc_init())
3846 			ret = NULL;
3847 		else
3848 			ret = imalloc(size);
3849 
3850 		if (ret == NULL) {
3851 			if (opt_xmalloc) {
3852 				_malloc_message(getprogname(),
3853 				    ": (malloc) Error in realloc(): out of "
3854 				    "memory\n", "", "");
3855 				abort();
3856 			}
3857 			errno = ENOMEM;
3858 		}
3859 	}
3860 
3861 RETURN:
3862 	UTRACE(ptr, size, ret);
3863 	return (ret);
3864 }
3865 
3866 void
3867 free(void *ptr)
3868 {
3869 
3870 	UTRACE(ptr, 0, 0);
3871 	if (ptr != NULL) {
3872 		assert(malloc_initialized);
3873 
3874 		idalloc(ptr);
3875 	}
3876 }
3877 
3878 /*
3879  * End malloc(3)-compatible functions.
3880  */
3881 /******************************************************************************/
3882 /*
3883  * Begin non-standard functions.
3884  */
3885 #ifndef __NetBSD__
3886 size_t
3887 malloc_usable_size(const void *ptr)
3888 {
3889 
3890 	assert(ptr != NULL);
3891 
3892 	return (isalloc(ptr));
3893 }
3894 #endif
3895 
3896 /*
3897  * End non-standard functions.
3898  */
3899 /******************************************************************************/
3900 /*
3901  * Begin library-private functions, used by threading libraries for protection
3902  * of malloc during fork().  These functions are only called if the program is
3903  * running in threaded mode, so there is no need to check whether the program
3904  * is threaded here.
3905  */
3906 
3907 void
3908 _malloc_prefork(void)
3909 {
3910 	unsigned i;
3911 
3912 	/* Acquire all mutexes in a safe order. */
3913 
3914 	malloc_mutex_lock(&arenas_mtx);
3915 	for (i = 0; i < narenas; i++) {
3916 		if (arenas[i] != NULL)
3917 			malloc_mutex_lock(&arenas[i]->mtx);
3918 	}
3919 	malloc_mutex_unlock(&arenas_mtx);
3920 
3921 	malloc_mutex_lock(&base_mtx);
3922 
3923 	malloc_mutex_lock(&chunks_mtx);
3924 }
3925 
3926 void
3927 _malloc_postfork(void)
3928 {
3929 	unsigned i;
3930 
3931 	/* Release all mutexes, now that fork() has completed. */
3932 
3933 	malloc_mutex_unlock(&chunks_mtx);
3934 
3935 	malloc_mutex_unlock(&base_mtx);
3936 
3937 	malloc_mutex_lock(&arenas_mtx);
3938 	for (i = 0; i < narenas; i++) {
3939 		if (arenas[i] != NULL)
3940 			malloc_mutex_unlock(&arenas[i]->mtx);
3941 	}
3942 	malloc_mutex_unlock(&arenas_mtx);
3943 }
3944 
3945 /*
3946  * End library-private functions.
3947  */
3948 /******************************************************************************/
3949