xref: /netbsd-src/lib/libc/stdlib/jemalloc.c (revision ce099b40997c43048fb78bd578195f81d2456523)
1 /*	$NetBSD: jemalloc.c,v 1.17 2008/03/08 13:17:13 ad Exp $	*/
2 
3 /*-
4  * Copyright (C) 2006,2007 Jason Evans <jasone@FreeBSD.org>.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice(s), this list of conditions and the following disclaimer as
12  *    the first lines of this file unmodified other than the possible
13  *    addition of one or more copyright notices.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice(s), this list of conditions and the following disclaimer in
16  *    the documentation and/or other materials provided with the
17  *    distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY
20  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE
23  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
26  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
27  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
28  * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
29  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30  *
31  *******************************************************************************
32  *
33  * This allocator implementation is designed to provide scalable performance
34  * for multi-threaded programs on multi-processor systems.  The following
35  * features are included for this purpose:
36  *
37  *   + Multiple arenas are used if there are multiple CPUs, which reduces lock
38  *     contention and cache sloshing.
39  *
40  *   + Cache line sharing between arenas is avoided for internal data
41  *     structures.
42  *
43  *   + Memory is managed in chunks and runs (chunks can be split into runs),
44  *     rather than as individual pages.  This provides a constant-time
45  *     mechanism for associating allocations with particular arenas.
46  *
47  * Allocation requests are rounded up to the nearest size class, and no record
48  * of the original request size is maintained.  Allocations are broken into
49  * categories according to size class.  Assuming runtime defaults, 4 kB pages
50  * and a 16 byte quantum, the size classes in each category are as follows:
51  *
52  *   |=====================================|
53  *   | Category | Subcategory    |    Size |
54  *   |=====================================|
55  *   | Small    | Tiny           |       2 |
56  *   |          |                |       4 |
57  *   |          |                |       8 |
58  *   |          |----------------+---------|
59  *   |          | Quantum-spaced |      16 |
60  *   |          |                |      32 |
61  *   |          |                |      48 |
62  *   |          |                |     ... |
63  *   |          |                |     480 |
64  *   |          |                |     496 |
65  *   |          |                |     512 |
66  *   |          |----------------+---------|
67  *   |          | Sub-page       |    1 kB |
68  *   |          |                |    2 kB |
69  *   |=====================================|
70  *   | Large                     |    4 kB |
71  *   |                           |    8 kB |
72  *   |                           |   12 kB |
73  *   |                           |     ... |
74  *   |                           | 1012 kB |
75  *   |                           | 1016 kB |
76  *   |                           | 1020 kB |
77  *   |=====================================|
78  *   | Huge                      |    1 MB |
79  *   |                           |    2 MB |
80  *   |                           |    3 MB |
81  *   |                           |     ... |
82  *   |=====================================|
83  *
84  * A different mechanism is used for each category:
85  *
86  *   Small : Each size class is segregated into its own set of runs.  Each run
87  *           maintains a bitmap of which regions are free/allocated.
88  *
89  *   Large : Each allocation is backed by a dedicated run.  Metadata are stored
90  *           in the associated arena chunk header maps.
91  *
92  *   Huge : Each allocation is backed by a dedicated contiguous set of chunks.
93  *          Metadata are stored in a separate red-black tree.
94  *
95  *******************************************************************************
96  */
97 
98 /* LINTLIBRARY */
99 
100 #ifdef __NetBSD__
101 #  define xutrace(a, b)		utrace("malloc", (a), (b))
102 #  define __DECONST(x, y)	((x)__UNCONST(y))
103 #  define NO_TLS
104 #else
105 #  define xutrace(a, b)		utrace((a), (b))
106 #endif	/* __NetBSD__ */
107 
108 /*
109  * MALLOC_PRODUCTION disables assertions and statistics gathering.  It also
110  * defaults the A and J runtime options to off.  These settings are appropriate
111  * for production systems.
112  */
113 #define MALLOC_PRODUCTION
114 
115 #ifndef MALLOC_PRODUCTION
116 #  define MALLOC_DEBUG
117 #endif
118 
119 #include <sys/cdefs.h>
120 /* __FBSDID("$FreeBSD: src/lib/libc/stdlib/malloc.c,v 1.147 2007/06/15 22:00:16 jasone Exp $"); */
121 __RCSID("$NetBSD: jemalloc.c,v 1.17 2008/03/08 13:17:13 ad Exp $");
122 
123 #ifdef __FreeBSD__
124 #include "libc_private.h"
125 #ifdef MALLOC_DEBUG
126 #  define _LOCK_DEBUG
127 #endif
128 #include "spinlock.h"
129 #endif
130 #include "namespace.h"
131 #include <sys/mman.h>
132 #include <sys/param.h>
133 #ifdef __FreeBSD__
134 #include <sys/stddef.h>
135 #endif
136 #include <sys/time.h>
137 #include <sys/types.h>
138 #include <sys/sysctl.h>
139 #include <sys/tree.h>
140 #include <sys/uio.h>
141 #include <sys/ktrace.h> /* Must come after several other sys/ includes. */
142 
143 #ifdef __FreeBSD__
144 #include <machine/atomic.h>
145 #include <machine/cpufunc.h>
146 #endif
147 #include <machine/vmparam.h>
148 
149 #include <errno.h>
150 #include <limits.h>
151 #include <pthread.h>
152 #include <sched.h>
153 #include <stdarg.h>
154 #include <stdbool.h>
155 #include <stdio.h>
156 #include <stdint.h>
157 #include <stdlib.h>
158 #include <string.h>
159 #include <strings.h>
160 #include <unistd.h>
161 
162 #ifdef __NetBSD__
163 #  include <reentrant.h>
164 #  include "extern.h"
165 
166 #define STRERROR_R(a, b, c)	__strerror_r(a, b, c);
167 /*
168  * A non localized version of strerror, that avoids bringing in
169  * stdio and the locale code. All the malloc messages are in English
170  * so why bother?
171  */
172 static int
173 __strerror_r(int e, char *s, size_t l)
174 {
175 	int rval;
176 	size_t slen;
177 
178 	if (e >= 0 && e < sys_nerr) {
179 		slen = strlcpy(s, sys_errlist[e], l);
180 		rval = 0;
181 	} else {
182 		slen = snprintf_ss(s, l, "Unknown error %u", e);
183 		rval = EINVAL;
184 	}
185 	return slen >= l ? ERANGE : rval;
186 }
187 #endif
188 
189 #ifdef __FreeBSD__
190 #define STRERROR_R(a, b, c)	strerror_r(a, b, c);
191 #include "un-namespace.h"
192 #endif
193 
194 /* MALLOC_STATS enables statistics calculation. */
195 #ifndef MALLOC_PRODUCTION
196 #  define MALLOC_STATS
197 #endif
198 
199 #ifdef MALLOC_DEBUG
200 #  ifdef NDEBUG
201 #    undef NDEBUG
202 #  endif
203 #else
204 #  ifndef NDEBUG
205 #    define NDEBUG
206 #  endif
207 #endif
208 #include <assert.h>
209 
210 #ifdef MALLOC_DEBUG
211    /* Disable inlining to make debugging easier. */
212 #  define inline
213 #endif
214 
215 /* Size of stack-allocated buffer passed to strerror_r(). */
216 #define	STRERROR_BUF		64
217 
218 /* Minimum alignment of allocations is 2^QUANTUM_2POW_MIN bytes. */
219 #ifdef __i386__
220 #  define QUANTUM_2POW_MIN	4
221 #  define SIZEOF_PTR_2POW	2
222 #  define USE_BRK
223 #endif
224 #ifdef __ia64__
225 #  define QUANTUM_2POW_MIN	4
226 #  define SIZEOF_PTR_2POW	3
227 #endif
228 #ifdef __alpha__
229 #  define QUANTUM_2POW_MIN	4
230 #  define SIZEOF_PTR_2POW	3
231 #  define NO_TLS
232 #endif
233 #ifdef __sparc64__
234 #  define QUANTUM_2POW_MIN	4
235 #  define SIZEOF_PTR_2POW	3
236 #  define NO_TLS
237 #endif
238 #ifdef __amd64__
239 #  define QUANTUM_2POW_MIN	4
240 #  define SIZEOF_PTR_2POW	3
241 #endif
242 #ifdef __arm__
243 #  define QUANTUM_2POW_MIN	3
244 #  define SIZEOF_PTR_2POW	2
245 #  define USE_BRK
246 #  define NO_TLS
247 #endif
248 #ifdef __powerpc__
249 #  define QUANTUM_2POW_MIN	4
250 #  define SIZEOF_PTR_2POW	2
251 #  define USE_BRK
252 #endif
253 #if defined(__sparc__) && !defined(__sparc64__)
254 #  define QUANTUM_2POW_MIN	4
255 #  define SIZEOF_PTR_2POW	2
256 #  define USE_BRK
257 #endif
258 #ifdef __vax__
259 #  define QUANTUM_2POW_MIN	4
260 #  define SIZEOF_PTR_2POW	2
261 #  define USE_BRK
262 #endif
263 #ifdef __sh__
264 #  define QUANTUM_2POW_MIN	4
265 #  define SIZEOF_PTR_2POW	2
266 #  define USE_BRK
267 #endif
268 #ifdef __m68k__
269 #  define QUANTUM_2POW_MIN	4
270 #  define SIZEOF_PTR_2POW	2
271 #  define USE_BRK
272 #endif
273 #ifdef __mips__
274 #  define QUANTUM_2POW_MIN	4
275 #  define SIZEOF_PTR_2POW	2
276 #  define USE_BRK
277 #endif
278 #ifdef __hppa__
279 #  define QUANTUM_2POW_MIN     4
280 #  define SIZEOF_PTR_2POW      2
281 #  define USE_BRK
282 #endif
283 
284 #define	SIZEOF_PTR		(1 << SIZEOF_PTR_2POW)
285 
286 /* sizeof(int) == (1 << SIZEOF_INT_2POW). */
287 #ifndef SIZEOF_INT_2POW
288 #  define SIZEOF_INT_2POW	2
289 #endif
290 
291 /* We can't use TLS in non-PIC programs, since TLS relies on loader magic. */
292 #if (!defined(PIC) && !defined(NO_TLS))
293 #  define NO_TLS
294 #endif
295 
296 /*
297  * Size and alignment of memory chunks that are allocated by the OS's virtual
298  * memory system.
299  */
300 #define	CHUNK_2POW_DEFAULT	20
301 
302 /*
303  * Maximum size of L1 cache line.  This is used to avoid cache line aliasing,
304  * so over-estimates are okay (up to a point), but under-estimates will
305  * negatively affect performance.
306  */
307 #define	CACHELINE_2POW		6
308 #define	CACHELINE		((size_t)(1 << CACHELINE_2POW))
309 
310 /* Smallest size class to support. */
311 #define	TINY_MIN_2POW		1
312 
313 /*
314  * Maximum size class that is a multiple of the quantum, but not (necessarily)
315  * a power of 2.  Above this size, allocations are rounded up to the nearest
316  * power of 2.
317  */
318 #define	SMALL_MAX_2POW_DEFAULT	9
319 #define	SMALL_MAX_DEFAULT	(1 << SMALL_MAX_2POW_DEFAULT)
320 
321 /*
322  * Maximum desired run header overhead.  Runs are sized as small as possible
323  * such that this setting is still honored, without violating other constraints.
324  * The goal is to make runs as small as possible without exceeding a per run
325  * external fragmentation threshold.
326  *
327  * Note that it is possible to set this low enough that it cannot be honored
328  * for some/all object sizes, since there is one bit of header overhead per
329  * object (plus a constant).  In such cases, this constraint is relaxed.
330  *
331  * RUN_MAX_OVRHD_RELAX specifies the maximum number of bits per region of
332  * overhead for which RUN_MAX_OVRHD is relaxed.
333  */
334 #define RUN_MAX_OVRHD		0.015
335 #define RUN_MAX_OVRHD_RELAX	1.5
336 
337 /* Put a cap on small object run size.  This overrides RUN_MAX_OVRHD. */
338 #define RUN_MAX_SMALL_2POW	15
339 #define RUN_MAX_SMALL		(1 << RUN_MAX_SMALL_2POW)
340 
341 /******************************************************************************/
342 
343 #ifdef __FreeBSD__
344 /*
345  * Mutexes based on spinlocks.  We can't use normal pthread mutexes, because
346  * they require malloc()ed memory.
347  */
348 typedef struct {
349 	spinlock_t	lock;
350 } malloc_mutex_t;
351 
352 /* Set to true once the allocator has been initialized. */
353 static bool malloc_initialized = false;
354 
355 /* Used to avoid initialization races. */
356 static malloc_mutex_t init_lock = {_SPINLOCK_INITIALIZER};
357 #else
358 #define	malloc_mutex_t	mutex_t
359 
360 /* Set to true once the allocator has been initialized. */
361 static bool malloc_initialized = false;
362 
363 /* Used to avoid initialization races. */
364 static mutex_t init_lock = MUTEX_INITIALIZER;
365 #endif
366 
367 /******************************************************************************/
368 /*
369  * Statistics data structures.
370  */
371 
372 #ifdef MALLOC_STATS
373 
374 typedef struct malloc_bin_stats_s malloc_bin_stats_t;
375 struct malloc_bin_stats_s {
376 	/*
377 	 * Number of allocation requests that corresponded to the size of this
378 	 * bin.
379 	 */
380 	uint64_t	nrequests;
381 
382 	/* Total number of runs created for this bin's size class. */
383 	uint64_t	nruns;
384 
385 	/*
386 	 * Total number of runs reused by extracting them from the runs tree for
387 	 * this bin's size class.
388 	 */
389 	uint64_t	reruns;
390 
391 	/* High-water mark for this bin. */
392 	unsigned long	highruns;
393 
394 	/* Current number of runs in this bin. */
395 	unsigned long	curruns;
396 };
397 
398 typedef struct arena_stats_s arena_stats_t;
399 struct arena_stats_s {
400 	/* Number of bytes currently mapped. */
401 	size_t		mapped;
402 
403 	/* Per-size-category statistics. */
404 	size_t		allocated_small;
405 	uint64_t	nmalloc_small;
406 	uint64_t	ndalloc_small;
407 
408 	size_t		allocated_large;
409 	uint64_t	nmalloc_large;
410 	uint64_t	ndalloc_large;
411 };
412 
413 typedef struct chunk_stats_s chunk_stats_t;
414 struct chunk_stats_s {
415 	/* Number of chunks that were allocated. */
416 	uint64_t	nchunks;
417 
418 	/* High-water mark for number of chunks allocated. */
419 	unsigned long	highchunks;
420 
421 	/*
422 	 * Current number of chunks allocated.  This value isn't maintained for
423 	 * any other purpose, so keep track of it in order to be able to set
424 	 * highchunks.
425 	 */
426 	unsigned long	curchunks;
427 };
428 
429 #endif /* #ifdef MALLOC_STATS */
430 
431 /******************************************************************************/
432 /*
433  * Chunk data structures.
434  */
435 
436 /* Tree of chunks. */
437 typedef struct chunk_node_s chunk_node_t;
438 struct chunk_node_s {
439 	/* Linkage for the chunk tree. */
440 	RB_ENTRY(chunk_node_s) link;
441 
442 	/*
443 	 * Pointer to the chunk that this tree node is responsible for.  In some
444 	 * (but certainly not all) cases, this data structure is placed at the
445 	 * beginning of the corresponding chunk, so this field may point to this
446 	 * node.
447 	 */
448 	void	*chunk;
449 
450 	/* Total chunk size. */
451 	size_t	size;
452 };
453 typedef struct chunk_tree_s chunk_tree_t;
454 RB_HEAD(chunk_tree_s, chunk_node_s);
455 
456 /******************************************************************************/
457 /*
458  * Arena data structures.
459  */
460 
461 typedef struct arena_s arena_t;
462 typedef struct arena_bin_s arena_bin_t;
463 
464 typedef struct arena_chunk_map_s arena_chunk_map_t;
465 struct arena_chunk_map_s {
466 	/* Number of pages in run. */
467 	uint32_t	npages;
468 	/*
469 	 * Position within run.  For a free run, this is POS_FREE for the first
470 	 * and last pages.  The POS_FREE special value makes it possible to
471 	 * quickly coalesce free runs.
472 	 *
473 	 * This is the limiting factor for chunksize; there can be at most 2^31
474 	 * pages in a run.
475 	 */
476 #define POS_FREE ((uint32_t)0xffffffffU)
477 	uint32_t	pos;
478 };
479 
480 /* Arena chunk header. */
481 typedef struct arena_chunk_s arena_chunk_t;
482 struct arena_chunk_s {
483 	/* Arena that owns the chunk. */
484 	arena_t *arena;
485 
486 	/* Linkage for the arena's chunk tree. */
487 	RB_ENTRY(arena_chunk_s) link;
488 
489 	/*
490 	 * Number of pages in use.  This is maintained in order to make
491 	 * detection of empty chunks fast.
492 	 */
493 	uint32_t pages_used;
494 
495 	/*
496 	 * Every time a free run larger than this value is created/coalesced,
497 	 * this value is increased.  The only way that the value decreases is if
498 	 * arena_run_alloc() fails to find a free run as large as advertised by
499 	 * this value.
500 	 */
501 	uint32_t max_frun_npages;
502 
503 	/*
504 	 * Every time a free run that starts at an earlier page than this value
505 	 * is created/coalesced, this value is decreased.  It is reset in a
506 	 * similar fashion to max_frun_npages.
507 	 */
508 	uint32_t min_frun_ind;
509 
510 	/*
511 	 * Map of pages within chunk that keeps track of free/large/small.  For
512 	 * free runs, only the map entries for the first and last pages are
513 	 * kept up to date, so that free runs can be quickly coalesced.
514 	 */
515 	arena_chunk_map_t map[1]; /* Dynamically sized. */
516 };
517 typedef struct arena_chunk_tree_s arena_chunk_tree_t;
518 RB_HEAD(arena_chunk_tree_s, arena_chunk_s);
519 
520 typedef struct arena_run_s arena_run_t;
521 struct arena_run_s {
522 	/* Linkage for run trees. */
523 	RB_ENTRY(arena_run_s) link;
524 
525 #ifdef MALLOC_DEBUG
526 	uint32_t	magic;
527 #  define ARENA_RUN_MAGIC 0x384adf93
528 #endif
529 
530 	/* Bin this run is associated with. */
531 	arena_bin_t	*bin;
532 
533 	/* Index of first element that might have a free region. */
534 	unsigned	regs_minelm;
535 
536 	/* Number of free regions in run. */
537 	unsigned	nfree;
538 
539 	/* Bitmask of in-use regions (0: in use, 1: free). */
540 	unsigned	regs_mask[1]; /* Dynamically sized. */
541 };
542 typedef struct arena_run_tree_s arena_run_tree_t;
543 RB_HEAD(arena_run_tree_s, arena_run_s);
544 
545 struct arena_bin_s {
546 	/*
547 	 * Current run being used to service allocations of this bin's size
548 	 * class.
549 	 */
550 	arena_run_t	*runcur;
551 
552 	/*
553 	 * Tree of non-full runs.  This tree is used when looking for an
554 	 * existing run when runcur is no longer usable.  We choose the
555 	 * non-full run that is lowest in memory; this policy tends to keep
556 	 * objects packed well, and it can also help reduce the number of
557 	 * almost-empty chunks.
558 	 */
559 	arena_run_tree_t runs;
560 
561 	/* Size of regions in a run for this bin's size class. */
562 	size_t		reg_size;
563 
564 	/* Total size of a run for this bin's size class. */
565 	size_t		run_size;
566 
567 	/* Total number of regions in a run for this bin's size class. */
568 	uint32_t	nregs;
569 
570 	/* Number of elements in a run's regs_mask for this bin's size class. */
571 	uint32_t	regs_mask_nelms;
572 
573 	/* Offset of first region in a run for this bin's size class. */
574 	uint32_t	reg0_offset;
575 
576 #ifdef MALLOC_STATS
577 	/* Bin statistics. */
578 	malloc_bin_stats_t stats;
579 #endif
580 };
581 
582 struct arena_s {
583 #ifdef MALLOC_DEBUG
584 	uint32_t		magic;
585 #  define ARENA_MAGIC 0x947d3d24
586 #endif
587 
588 	/* All operations on this arena require that mtx be locked. */
589 	malloc_mutex_t		mtx;
590 
591 #ifdef MALLOC_STATS
592 	arena_stats_t		stats;
593 #endif
594 
595 	/*
596 	 * Tree of chunks this arena manages.
597 	 */
598 	arena_chunk_tree_t	chunks;
599 
600 	/*
601 	 * In order to avoid rapid chunk allocation/deallocation when an arena
602 	 * oscillates right on the cusp of needing a new chunk, cache the most
603 	 * recently freed chunk.  This caching is disabled by opt_hint.
604 	 *
605 	 * There is one spare chunk per arena, rather than one spare total, in
606 	 * order to avoid interactions between multiple threads that could make
607 	 * a single spare inadequate.
608 	 */
609 	arena_chunk_t *spare;
610 
611 	/*
612 	 * bins is used to store rings of free regions of the following sizes,
613 	 * assuming a 16-byte quantum, 4kB pagesize, and default MALLOC_OPTIONS.
614 	 *
615 	 *   bins[i] | size |
616 	 *   --------+------+
617 	 *        0  |    2 |
618 	 *        1  |    4 |
619 	 *        2  |    8 |
620 	 *   --------+------+
621 	 *        3  |   16 |
622 	 *        4  |   32 |
623 	 *        5  |   48 |
624 	 *        6  |   64 |
625 	 *           :      :
626 	 *           :      :
627 	 *       33  |  496 |
628 	 *       34  |  512 |
629 	 *   --------+------+
630 	 *       35  | 1024 |
631 	 *       36  | 2048 |
632 	 *   --------+------+
633 	 */
634 	arena_bin_t		bins[1]; /* Dynamically sized. */
635 };
636 
637 /******************************************************************************/
638 /*
639  * Data.
640  */
641 
642 /* Number of CPUs. */
643 static unsigned		ncpus;
644 
645 /* VM page size. */
646 static size_t		pagesize;
647 static size_t		pagesize_mask;
648 static int		pagesize_2pow;
649 
650 /* Various bin-related settings. */
651 static size_t		bin_maxclass; /* Max size class for bins. */
652 static unsigned		ntbins; /* Number of (2^n)-spaced tiny bins. */
653 static unsigned		nqbins; /* Number of quantum-spaced bins. */
654 static unsigned		nsbins; /* Number of (2^n)-spaced sub-page bins. */
655 static size_t		small_min;
656 static size_t		small_max;
657 
658 /* Various quantum-related settings. */
659 static size_t		quantum;
660 static size_t		quantum_mask; /* (quantum - 1). */
661 
662 /* Various chunk-related settings. */
663 static size_t		chunksize;
664 static size_t		chunksize_mask; /* (chunksize - 1). */
665 static int		chunksize_2pow;
666 static unsigned		chunk_npages;
667 static unsigned		arena_chunk_header_npages;
668 static size_t		arena_maxclass; /* Max size class for arenas. */
669 
670 /********/
671 /*
672  * Chunks.
673  */
674 
675 /* Protects chunk-related data structures. */
676 static malloc_mutex_t	chunks_mtx;
677 
678 /* Tree of chunks that are stand-alone huge allocations. */
679 static chunk_tree_t	huge;
680 
681 #ifdef USE_BRK
682 /*
683  * Try to use brk for chunk-size allocations, due to address space constraints.
684  */
685 /*
686  * Protects sbrk() calls.  This must be separate from chunks_mtx, since
687  * base_pages_alloc() also uses sbrk(), but cannot lock chunks_mtx (doing so
688  * could cause recursive lock acquisition).
689  */
690 static malloc_mutex_t	brk_mtx;
691 /* Result of first sbrk(0) call. */
692 static void		*brk_base;
693 /* Current end of brk, or ((void *)-1) if brk is exhausted. */
694 static void		*brk_prev;
695 /* Current upper limit on brk addresses. */
696 static void		*brk_max;
697 #endif
698 
699 #ifdef MALLOC_STATS
700 /* Huge allocation statistics. */
701 static uint64_t		huge_nmalloc;
702 static uint64_t		huge_ndalloc;
703 static uint64_t		huge_nralloc;
704 static size_t		huge_allocated;
705 #endif
706 
707 /*
708  * Tree of chunks that were previously allocated.  This is used when allocating
709  * chunks, in an attempt to re-use address space.
710  */
711 static chunk_tree_t	old_chunks;
712 
713 /****************************/
714 /*
715  * base (internal allocation).
716  */
717 
718 /*
719  * Current pages that are being used for internal memory allocations.  These
720  * pages are carved up in cacheline-size quanta, so that there is no chance of
721  * false cache line sharing.
722  */
723 static void		*base_pages;
724 static void		*base_next_addr;
725 static void		*base_past_addr; /* Addr immediately past base_pages. */
726 static chunk_node_t	*base_chunk_nodes; /* LIFO cache of chunk nodes. */
727 static malloc_mutex_t	base_mtx;
728 #ifdef MALLOC_STATS
729 static size_t		base_mapped;
730 #endif
731 
732 /********/
733 /*
734  * Arenas.
735  */
736 
737 /*
738  * Arenas that are used to service external requests.  Not all elements of the
739  * arenas array are necessarily used; arenas are created lazily as needed.
740  */
741 static arena_t		**arenas;
742 static unsigned		narenas;
743 static unsigned		next_arena;
744 static malloc_mutex_t	arenas_mtx; /* Protects arenas initialization. */
745 
746 #ifndef NO_TLS
747 /*
748  * Map of pthread_self() --> arenas[???], used for selecting an arena to use
749  * for allocations.
750  */
751 static __thread arena_t	*arenas_map;
752 #define	get_arenas_map()	(arenas_map)
753 #define	set_arenas_map(x)	(arenas_map = x)
754 #else
755 static thread_key_t arenas_map_key;
756 #define	get_arenas_map()	thr_getspecific(arenas_map_key)
757 #define	set_arenas_map(x)	thr_setspecific(arenas_map_key, x)
758 #endif
759 
760 #ifdef MALLOC_STATS
761 /* Chunk statistics. */
762 static chunk_stats_t	stats_chunks;
763 #endif
764 
765 /*******************************/
766 /*
767  * Runtime configuration options.
768  */
769 const char	*_malloc_options;
770 
771 #ifndef MALLOC_PRODUCTION
772 static bool	opt_abort = true;
773 static bool	opt_junk = true;
774 #else
775 static bool	opt_abort = false;
776 static bool	opt_junk = false;
777 #endif
778 static bool	opt_hint = false;
779 static bool	opt_print_stats = false;
780 static int	opt_quantum_2pow = QUANTUM_2POW_MIN;
781 static int	opt_small_max_2pow = SMALL_MAX_2POW_DEFAULT;
782 static int	opt_chunk_2pow = CHUNK_2POW_DEFAULT;
783 static bool	opt_utrace = false;
784 static bool	opt_sysv = false;
785 static bool	opt_xmalloc = false;
786 static bool	opt_zero = false;
787 static int32_t	opt_narenas_lshift = 0;
788 
789 typedef struct {
790 	void	*p;
791 	size_t	s;
792 	void	*r;
793 } malloc_utrace_t;
794 
795 #define	UTRACE(a, b, c)							\
796 	if (opt_utrace) {						\
797 		malloc_utrace_t ut;					\
798 		ut.p = a;						\
799 		ut.s = b;						\
800 		ut.r = c;						\
801 		xutrace(&ut, sizeof(ut));				\
802 	}
803 
804 /******************************************************************************/
805 /*
806  * Begin function prototypes for non-inline static functions.
807  */
808 
809 static void	wrtmessage(const char *p1, const char *p2, const char *p3,
810 		const char *p4);
811 #ifdef MALLOC_STATS
812 static void	malloc_printf(const char *format, ...);
813 #endif
814 static char	*umax2s(uintmax_t x, char *s);
815 static bool	base_pages_alloc(size_t minsize);
816 static void	*base_alloc(size_t size);
817 static chunk_node_t *base_chunk_node_alloc(void);
818 static void	base_chunk_node_dealloc(chunk_node_t *node);
819 #ifdef MALLOC_STATS
820 static void	stats_print(arena_t *arena);
821 #endif
822 static void	*pages_map(void *addr, size_t size);
823 static void	*pages_map_align(void *addr, size_t size, int align);
824 static void	pages_unmap(void *addr, size_t size);
825 static void	*chunk_alloc(size_t size);
826 static void	chunk_dealloc(void *chunk, size_t size);
827 static void	arena_run_split(arena_t *arena, arena_run_t *run, size_t size);
828 static arena_chunk_t *arena_chunk_alloc(arena_t *arena);
829 static void	arena_chunk_dealloc(arena_t *arena, arena_chunk_t *chunk);
830 static arena_run_t *arena_run_alloc(arena_t *arena, size_t size);
831 static void	arena_run_dalloc(arena_t *arena, arena_run_t *run, size_t size);
832 static arena_run_t *arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin);
833 static void *arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin);
834 static size_t arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size);
835 static void	*arena_malloc(arena_t *arena, size_t size);
836 static void	*arena_palloc(arena_t *arena, size_t alignment, size_t size,
837     size_t alloc_size);
838 static size_t	arena_salloc(const void *ptr);
839 static void	*arena_ralloc(void *ptr, size_t size, size_t oldsize);
840 static void	arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr);
841 static bool	arena_new(arena_t *arena);
842 static arena_t	*arenas_extend(unsigned ind);
843 static void	*huge_malloc(size_t size);
844 static void	*huge_palloc(size_t alignment, size_t size);
845 static void	*huge_ralloc(void *ptr, size_t size, size_t oldsize);
846 static void	huge_dalloc(void *ptr);
847 static void	*imalloc(size_t size);
848 static void	*ipalloc(size_t alignment, size_t size);
849 static void	*icalloc(size_t size);
850 static size_t	isalloc(const void *ptr);
851 static void	*iralloc(void *ptr, size_t size);
852 static void	idalloc(void *ptr);
853 static void	malloc_print_stats(void);
854 static bool	malloc_init_hard(void);
855 
856 /*
857  * End function prototypes.
858  */
859 /******************************************************************************/
860 /*
861  * Begin mutex.
862  */
863 
864 #ifdef __NetBSD__
865 #define	malloc_mutex_init(m)	mutex_init(m, NULL)
866 #define	malloc_mutex_lock(m)	mutex_lock(m)
867 #define	malloc_mutex_unlock(m)	mutex_unlock(m)
868 #else	/* __NetBSD__ */
869 static inline void
870 malloc_mutex_init(malloc_mutex_t *a_mutex)
871 {
872 	static const spinlock_t lock = _SPINLOCK_INITIALIZER;
873 
874 	a_mutex->lock = lock;
875 }
876 
877 static inline void
878 malloc_mutex_lock(malloc_mutex_t *a_mutex)
879 {
880 
881 	if (__isthreaded)
882 		_SPINLOCK(&a_mutex->lock);
883 }
884 
885 static inline void
886 malloc_mutex_unlock(malloc_mutex_t *a_mutex)
887 {
888 
889 	if (__isthreaded)
890 		_SPINUNLOCK(&a_mutex->lock);
891 }
892 #endif	/* __NetBSD__ */
893 
894 /*
895  * End mutex.
896  */
897 /******************************************************************************/
898 /*
899  * Begin Utility functions/macros.
900  */
901 
902 /* Return the chunk address for allocation address a. */
903 #define	CHUNK_ADDR2BASE(a)						\
904 	((void *)((uintptr_t)(a) & ~chunksize_mask))
905 
906 /* Return the chunk offset of address a. */
907 #define	CHUNK_ADDR2OFFSET(a)						\
908 	((size_t)((uintptr_t)(a) & chunksize_mask))
909 
910 /* Return the smallest chunk multiple that is >= s. */
911 #define	CHUNK_CEILING(s)						\
912 	(((s) + chunksize_mask) & ~chunksize_mask)
913 
914 /* Return the smallest cacheline multiple that is >= s. */
915 #define	CACHELINE_CEILING(s)						\
916 	(((s) + (CACHELINE - 1)) & ~(CACHELINE - 1))
917 
918 /* Return the smallest quantum multiple that is >= a. */
919 #define	QUANTUM_CEILING(a)						\
920 	(((a) + quantum_mask) & ~quantum_mask)
921 
922 /* Return the smallest pagesize multiple that is >= s. */
923 #define	PAGE_CEILING(s)							\
924 	(((s) + pagesize_mask) & ~pagesize_mask)
925 
926 /* Compute the smallest power of 2 that is >= x. */
927 static inline size_t
928 pow2_ceil(size_t x)
929 {
930 
931 	x--;
932 	x |= x >> 1;
933 	x |= x >> 2;
934 	x |= x >> 4;
935 	x |= x >> 8;
936 	x |= x >> 16;
937 #if (SIZEOF_PTR == 8)
938 	x |= x >> 32;
939 #endif
940 	x++;
941 	return (x);
942 }
943 
944 static void
945 wrtmessage(const char *p1, const char *p2, const char *p3, const char *p4)
946 {
947 
948 	write(STDERR_FILENO, p1, strlen(p1));
949 	write(STDERR_FILENO, p2, strlen(p2));
950 	write(STDERR_FILENO, p3, strlen(p3));
951 	write(STDERR_FILENO, p4, strlen(p4));
952 }
953 
954 void	(*_malloc_message)(const char *p1, const char *p2, const char *p3,
955 	    const char *p4) = wrtmessage;
956 
957 #ifdef MALLOC_STATS
958 /*
959  * Print to stderr in such a way as to (hopefully) avoid memory allocation.
960  */
961 static void
962 malloc_printf(const char *format, ...)
963 {
964 	char buf[4096];
965 	va_list ap;
966 
967 	va_start(ap, format);
968 	vsnprintf(buf, sizeof(buf), format, ap);
969 	va_end(ap);
970 	_malloc_message(buf, "", "", "");
971 }
972 #endif
973 
974 /*
975  * We don't want to depend on vsnprintf() for production builds, since that can
976  * cause unnecessary bloat for static binaries.  umax2s() provides minimal
977  * integer printing functionality, so that malloc_printf() use can be limited to
978  * MALLOC_STATS code.
979  */
980 #define UMAX2S_BUFSIZE	21
981 static char *
982 umax2s(uintmax_t x, char *s)
983 {
984 	unsigned i;
985 
986 	/* Make sure UMAX2S_BUFSIZE is large enough. */
987 	/* LINTED */
988 	assert(sizeof(uintmax_t) <= 8);
989 
990 	i = UMAX2S_BUFSIZE - 1;
991 	s[i] = '\0';
992 	do {
993 		i--;
994 		s[i] = "0123456789"[(int)x % 10];
995 		x /= (uintmax_t)10LL;
996 	} while (x > 0);
997 
998 	return (&s[i]);
999 }
1000 
1001 /******************************************************************************/
1002 
1003 static bool
1004 base_pages_alloc(size_t minsize)
1005 {
1006 	size_t csize = 0;
1007 
1008 #ifdef USE_BRK
1009 	/*
1010 	 * Do special brk allocation here, since base allocations don't need to
1011 	 * be chunk-aligned.
1012 	 */
1013 	if (brk_prev != (void *)-1) {
1014 		void *brk_cur;
1015 		intptr_t incr;
1016 
1017 		if (minsize != 0)
1018 			csize = CHUNK_CEILING(minsize);
1019 
1020 		malloc_mutex_lock(&brk_mtx);
1021 		do {
1022 			/* Get the current end of brk. */
1023 			brk_cur = sbrk(0);
1024 
1025 			/*
1026 			 * Calculate how much padding is necessary to
1027 			 * chunk-align the end of brk.  Don't worry about
1028 			 * brk_cur not being chunk-aligned though.
1029 			 */
1030 			incr = (intptr_t)chunksize
1031 			    - (intptr_t)CHUNK_ADDR2OFFSET(brk_cur);
1032 			if (incr < minsize)
1033 				incr += csize;
1034 
1035 			brk_prev = sbrk(incr);
1036 			if (brk_prev == brk_cur) {
1037 				/* Success. */
1038 				malloc_mutex_unlock(&brk_mtx);
1039 				base_pages = brk_cur;
1040 				base_next_addr = base_pages;
1041 				base_past_addr = (void *)((uintptr_t)base_pages
1042 				    + incr);
1043 #ifdef MALLOC_STATS
1044 				base_mapped += incr;
1045 #endif
1046 				return (false);
1047 			}
1048 		} while (brk_prev != (void *)-1);
1049 		malloc_mutex_unlock(&brk_mtx);
1050 	}
1051 	if (minsize == 0) {
1052 		/*
1053 		 * Failure during initialization doesn't matter, so avoid
1054 		 * falling through to the mmap-based page mapping code.
1055 		 */
1056 		return (true);
1057 	}
1058 #endif
1059 	assert(minsize != 0);
1060 	csize = PAGE_CEILING(minsize);
1061 	base_pages = pages_map(NULL, csize);
1062 	if (base_pages == NULL)
1063 		return (true);
1064 	base_next_addr = base_pages;
1065 	base_past_addr = (void *)((uintptr_t)base_pages + csize);
1066 #ifdef MALLOC_STATS
1067 	base_mapped += csize;
1068 #endif
1069 	return (false);
1070 }
1071 
1072 static void *
1073 base_alloc(size_t size)
1074 {
1075 	void *ret;
1076 	size_t csize;
1077 
1078 	/* Round size up to nearest multiple of the cacheline size. */
1079 	csize = CACHELINE_CEILING(size);
1080 
1081 	malloc_mutex_lock(&base_mtx);
1082 
1083 	/* Make sure there's enough space for the allocation. */
1084 	if ((uintptr_t)base_next_addr + csize > (uintptr_t)base_past_addr) {
1085 		if (base_pages_alloc(csize)) {
1086 			ret = NULL;
1087 			goto RETURN;
1088 		}
1089 	}
1090 
1091 	/* Allocate. */
1092 	ret = base_next_addr;
1093 	base_next_addr = (void *)((uintptr_t)base_next_addr + csize);
1094 
1095 RETURN:
1096 	malloc_mutex_unlock(&base_mtx);
1097 	return (ret);
1098 }
1099 
1100 static chunk_node_t *
1101 base_chunk_node_alloc(void)
1102 {
1103 	chunk_node_t *ret;
1104 
1105 	malloc_mutex_lock(&base_mtx);
1106 	if (base_chunk_nodes != NULL) {
1107 		ret = base_chunk_nodes;
1108 		/* LINTED */
1109 		base_chunk_nodes = *(chunk_node_t **)ret;
1110 		malloc_mutex_unlock(&base_mtx);
1111 	} else {
1112 		malloc_mutex_unlock(&base_mtx);
1113 		ret = (chunk_node_t *)base_alloc(sizeof(chunk_node_t));
1114 	}
1115 
1116 	return (ret);
1117 }
1118 
1119 static void
1120 base_chunk_node_dealloc(chunk_node_t *node)
1121 {
1122 
1123 	malloc_mutex_lock(&base_mtx);
1124 	/* LINTED */
1125 	*(chunk_node_t **)node = base_chunk_nodes;
1126 	base_chunk_nodes = node;
1127 	malloc_mutex_unlock(&base_mtx);
1128 }
1129 
1130 /******************************************************************************/
1131 
1132 #ifdef MALLOC_STATS
1133 static void
1134 stats_print(arena_t *arena)
1135 {
1136 	unsigned i;
1137 	int gap_start;
1138 
1139 	malloc_printf(
1140 	    "          allocated/mapped            nmalloc      ndalloc\n");
1141 
1142 	malloc_printf("small: %12zu %-12s %12llu %12llu\n",
1143 	    arena->stats.allocated_small, "", arena->stats.nmalloc_small,
1144 	    arena->stats.ndalloc_small);
1145 	malloc_printf("large: %12zu %-12s %12llu %12llu\n",
1146 	    arena->stats.allocated_large, "", arena->stats.nmalloc_large,
1147 	    arena->stats.ndalloc_large);
1148 	malloc_printf("total: %12zu/%-12zu %12llu %12llu\n",
1149 	    arena->stats.allocated_small + arena->stats.allocated_large,
1150 	    arena->stats.mapped,
1151 	    arena->stats.nmalloc_small + arena->stats.nmalloc_large,
1152 	    arena->stats.ndalloc_small + arena->stats.ndalloc_large);
1153 
1154 	malloc_printf("bins:     bin   size regs pgs  requests   newruns"
1155 	    "    reruns maxruns curruns\n");
1156 	for (i = 0, gap_start = -1; i < ntbins + nqbins + nsbins; i++) {
1157 		if (arena->bins[i].stats.nrequests == 0) {
1158 			if (gap_start == -1)
1159 				gap_start = i;
1160 		} else {
1161 			if (gap_start != -1) {
1162 				if (i > gap_start + 1) {
1163 					/* Gap of more than one size class. */
1164 					malloc_printf("[%u..%u]\n",
1165 					    gap_start, i - 1);
1166 				} else {
1167 					/* Gap of one size class. */
1168 					malloc_printf("[%u]\n", gap_start);
1169 				}
1170 				gap_start = -1;
1171 			}
1172 			malloc_printf(
1173 			    "%13u %1s %4u %4u %3u %9llu %9llu"
1174 			    " %9llu %7lu %7lu\n",
1175 			    i,
1176 			    i < ntbins ? "T" : i < ntbins + nqbins ? "Q" : "S",
1177 			    arena->bins[i].reg_size,
1178 			    arena->bins[i].nregs,
1179 			    arena->bins[i].run_size >> pagesize_2pow,
1180 			    arena->bins[i].stats.nrequests,
1181 			    arena->bins[i].stats.nruns,
1182 			    arena->bins[i].stats.reruns,
1183 			    arena->bins[i].stats.highruns,
1184 			    arena->bins[i].stats.curruns);
1185 		}
1186 	}
1187 	if (gap_start != -1) {
1188 		if (i > gap_start + 1) {
1189 			/* Gap of more than one size class. */
1190 			malloc_printf("[%u..%u]\n", gap_start, i - 1);
1191 		} else {
1192 			/* Gap of one size class. */
1193 			malloc_printf("[%u]\n", gap_start);
1194 		}
1195 	}
1196 }
1197 #endif
1198 
1199 /*
1200  * End Utility functions/macros.
1201  */
1202 /******************************************************************************/
1203 /*
1204  * Begin chunk management functions.
1205  */
1206 
1207 #ifndef lint
1208 static inline int
1209 chunk_comp(chunk_node_t *a, chunk_node_t *b)
1210 {
1211 
1212 	assert(a != NULL);
1213 	assert(b != NULL);
1214 
1215 	if ((uintptr_t)a->chunk < (uintptr_t)b->chunk)
1216 		return (-1);
1217 	else if (a->chunk == b->chunk)
1218 		return (0);
1219 	else
1220 		return (1);
1221 }
1222 
1223 /* Generate red-black tree code for chunks. */
1224 RB_GENERATE_STATIC(chunk_tree_s, chunk_node_s, link, chunk_comp);
1225 #endif
1226 
1227 static void *
1228 pages_map_align(void *addr, size_t size, int align)
1229 {
1230 	void *ret;
1231 
1232 	/*
1233 	 * We don't use MAP_FIXED here, because it can cause the *replacement*
1234 	 * of existing mappings, and we only want to create new mappings.
1235 	 */
1236 	ret = mmap(addr, size, PROT_READ | PROT_WRITE,
1237 	    MAP_PRIVATE | MAP_ANON | MAP_ALIGNED(align), -1, 0);
1238 	assert(ret != NULL);
1239 
1240 	if (ret == MAP_FAILED)
1241 		ret = NULL;
1242 	else if (addr != NULL && ret != addr) {
1243 		/*
1244 		 * We succeeded in mapping memory, but not in the right place.
1245 		 */
1246 		if (munmap(ret, size) == -1) {
1247 			char buf[STRERROR_BUF];
1248 
1249 			STRERROR_R(errno, buf, sizeof(buf));
1250 			_malloc_message(getprogname(),
1251 			    ": (malloc) Error in munmap(): ", buf, "\n");
1252 			if (opt_abort)
1253 				abort();
1254 		}
1255 		ret = NULL;
1256 	}
1257 
1258 	assert(ret == NULL || (addr == NULL && ret != addr)
1259 	    || (addr != NULL && ret == addr));
1260 	return (ret);
1261 }
1262 
1263 static void *
1264 pages_map(void *addr, size_t size)
1265 {
1266 
1267 	return pages_map_align(addr, size, 0);
1268 }
1269 
1270 static void
1271 pages_unmap(void *addr, size_t size)
1272 {
1273 
1274 	if (munmap(addr, size) == -1) {
1275 		char buf[STRERROR_BUF];
1276 
1277 		STRERROR_R(errno, buf, sizeof(buf));
1278 		_malloc_message(getprogname(),
1279 		    ": (malloc) Error in munmap(): ", buf, "\n");
1280 		if (opt_abort)
1281 			abort();
1282 	}
1283 }
1284 
1285 static void *
1286 chunk_alloc(size_t size)
1287 {
1288 	void *ret, *chunk;
1289 	chunk_node_t *tchunk, *delchunk;
1290 
1291 	assert(size != 0);
1292 	assert((size & chunksize_mask) == 0);
1293 
1294 	malloc_mutex_lock(&chunks_mtx);
1295 
1296 	if (size == chunksize) {
1297 		/*
1298 		 * Check for address ranges that were previously chunks and try
1299 		 * to use them.
1300 		 */
1301 
1302 		/* LINTED */
1303 		tchunk = RB_MIN(chunk_tree_s, &old_chunks);
1304 		while (tchunk != NULL) {
1305 			/* Found an address range.  Try to recycle it. */
1306 
1307 			chunk = tchunk->chunk;
1308 			delchunk = tchunk;
1309 			/* LINTED */
1310 			tchunk = RB_NEXT(chunk_tree_s, &old_chunks, delchunk);
1311 
1312 			/* Remove delchunk from the tree. */
1313 			/* LINTED */
1314 			RB_REMOVE(chunk_tree_s, &old_chunks, delchunk);
1315 			base_chunk_node_dealloc(delchunk);
1316 
1317 #ifdef USE_BRK
1318 			if ((uintptr_t)chunk >= (uintptr_t)brk_base
1319 			    && (uintptr_t)chunk < (uintptr_t)brk_max) {
1320 				/* Re-use a previously freed brk chunk. */
1321 				ret = chunk;
1322 				goto RETURN;
1323 			}
1324 #endif
1325 			if ((ret = pages_map(chunk, size)) != NULL) {
1326 				/* Success. */
1327 				goto RETURN;
1328 			}
1329 		}
1330 	}
1331 
1332 	/*
1333 	 * Try to over-allocate, but allow the OS to place the allocation
1334 	 * anywhere.  Beware of size_t wrap-around.
1335 	 */
1336 	if (size + chunksize > size) {
1337 		if ((ret = pages_map_align(NULL, size, chunksize_2pow))
1338 		    != NULL) {
1339 			goto RETURN;
1340 		}
1341 	}
1342 
1343 #ifdef USE_BRK
1344 	/*
1345 	 * Try to create allocations in brk, in order to make full use of
1346 	 * limited address space.
1347 	 */
1348 	if (brk_prev != (void *)-1) {
1349 		void *brk_cur;
1350 		intptr_t incr;
1351 
1352 		/*
1353 		 * The loop is necessary to recover from races with other
1354 		 * threads that are using brk for something other than malloc.
1355 		 */
1356 		malloc_mutex_lock(&brk_mtx);
1357 		do {
1358 			/* Get the current end of brk. */
1359 			brk_cur = sbrk(0);
1360 
1361 			/*
1362 			 * Calculate how much padding is necessary to
1363 			 * chunk-align the end of brk.
1364 			 */
1365 			incr = (intptr_t)size
1366 			    - (intptr_t)CHUNK_ADDR2OFFSET(brk_cur);
1367 			if (incr == size) {
1368 				ret = brk_cur;
1369 			} else {
1370 				ret = (void *)((intptr_t)brk_cur + incr);
1371 				incr += size;
1372 			}
1373 
1374 			brk_prev = sbrk(incr);
1375 			if (brk_prev == brk_cur) {
1376 				/* Success. */
1377 				malloc_mutex_unlock(&brk_mtx);
1378 				brk_max = (void *)((intptr_t)ret + size);
1379 				goto RETURN;
1380 			}
1381 		} while (brk_prev != (void *)-1);
1382 		malloc_mutex_unlock(&brk_mtx);
1383 	}
1384 #endif
1385 
1386 	/* All strategies for allocation failed. */
1387 	ret = NULL;
1388 RETURN:
1389 	if (ret != NULL) {
1390 		chunk_node_t key;
1391 		/*
1392 		 * Clean out any entries in old_chunks that overlap with the
1393 		 * memory we just allocated.
1394 		 */
1395 		key.chunk = ret;
1396 		/* LINTED */
1397 		tchunk = RB_NFIND(chunk_tree_s, &old_chunks, &key);
1398 		while (tchunk != NULL
1399 		    && (uintptr_t)tchunk->chunk >= (uintptr_t)ret
1400 		    && (uintptr_t)tchunk->chunk < (uintptr_t)ret + size) {
1401 			delchunk = tchunk;
1402 			/* LINTED */
1403 			tchunk = RB_NEXT(chunk_tree_s, &old_chunks, delchunk);
1404 			/* LINTED */
1405 			RB_REMOVE(chunk_tree_s, &old_chunks, delchunk);
1406 			base_chunk_node_dealloc(delchunk);
1407 		}
1408 
1409 	}
1410 #ifdef MALLOC_STATS
1411 	if (ret != NULL) {
1412 		stats_chunks.nchunks += (size / chunksize);
1413 		stats_chunks.curchunks += (size / chunksize);
1414 	}
1415 	if (stats_chunks.curchunks > stats_chunks.highchunks)
1416 		stats_chunks.highchunks = stats_chunks.curchunks;
1417 #endif
1418 	malloc_mutex_unlock(&chunks_mtx);
1419 
1420 	assert(CHUNK_ADDR2BASE(ret) == ret);
1421 	return (ret);
1422 }
1423 
1424 static void
1425 chunk_dealloc(void *chunk, size_t size)
1426 {
1427 	chunk_node_t *node;
1428 
1429 	assert(chunk != NULL);
1430 	assert(CHUNK_ADDR2BASE(chunk) == chunk);
1431 	assert(size != 0);
1432 	assert((size & chunksize_mask) == 0);
1433 
1434 	malloc_mutex_lock(&chunks_mtx);
1435 
1436 #ifdef USE_BRK
1437 	if ((uintptr_t)chunk >= (uintptr_t)brk_base
1438 	    && (uintptr_t)chunk < (uintptr_t)brk_max) {
1439 		void *brk_cur;
1440 
1441 		malloc_mutex_lock(&brk_mtx);
1442 		/* Get the current end of brk. */
1443 		brk_cur = sbrk(0);
1444 
1445 		/*
1446 		 * Try to shrink the data segment if this chunk is at the end
1447 		 * of the data segment.  The sbrk() call here is subject to a
1448 		 * race condition with threads that use brk(2) or sbrk(2)
1449 		 * directly, but the alternative would be to leak memory for
1450 		 * the sake of poorly designed multi-threaded programs.
1451 		 */
1452 		if (brk_cur == brk_max
1453 		    && (void *)((uintptr_t)chunk + size) == brk_max
1454 		    && sbrk(-(intptr_t)size) == brk_max) {
1455 			malloc_mutex_unlock(&brk_mtx);
1456 			if (brk_prev == brk_max) {
1457 				/* Success. */
1458 				brk_prev = (void *)((intptr_t)brk_max
1459 				    - (intptr_t)size);
1460 				brk_max = brk_prev;
1461 			}
1462 		} else {
1463 			size_t offset;
1464 
1465 			malloc_mutex_unlock(&brk_mtx);
1466 			madvise(chunk, size, MADV_FREE);
1467 
1468 			/*
1469 			 * Iteratively create records of each chunk-sized
1470 			 * memory region that 'chunk' is comprised of, so that
1471 			 * the address range can be recycled if memory usage
1472 			 * increases later on.
1473 			 */
1474 			for (offset = 0; offset < size; offset += chunksize) {
1475 				node = base_chunk_node_alloc();
1476 				if (node == NULL)
1477 					break;
1478 
1479 				node->chunk = (void *)((uintptr_t)chunk
1480 				    + (uintptr_t)offset);
1481 				node->size = chunksize;
1482 				/* LINTED */
1483 				RB_INSERT(chunk_tree_s, &old_chunks, node);
1484 			}
1485 		}
1486 	} else {
1487 #endif
1488 		pages_unmap(chunk, size);
1489 
1490 		/*
1491 		 * Make a record of the chunk's address, so that the address
1492 		 * range can be recycled if memory usage increases later on.
1493 		 * Don't bother to create entries if (size > chunksize), since
1494 		 * doing so could cause scalability issues for truly gargantuan
1495 		 * objects (many gigabytes or larger).
1496 		 */
1497 		if (size == chunksize) {
1498 			node = base_chunk_node_alloc();
1499 			if (node != NULL) {
1500 				node->chunk = (void *)(uintptr_t)chunk;
1501 				node->size = chunksize;
1502 				/* LINTED */
1503 				RB_INSERT(chunk_tree_s, &old_chunks, node);
1504 			}
1505 		}
1506 #ifdef USE_BRK
1507 	}
1508 #endif
1509 
1510 #ifdef MALLOC_STATS
1511 	stats_chunks.curchunks -= (size / chunksize);
1512 #endif
1513 	malloc_mutex_unlock(&chunks_mtx);
1514 }
1515 
1516 /*
1517  * End chunk management functions.
1518  */
1519 /******************************************************************************/
1520 /*
1521  * Begin arena.
1522  */
1523 
1524 /*
1525  * Choose an arena based on a per-thread and (optimistically) per-CPU value.
1526  *
1527  * We maintain at least one block of arenas.  Usually there are more.
1528  * The blocks are $ncpu arenas in size.  Whole blocks are 'hashed'
1529  * amongst threads.  To accomplish this, next_arena advances only in
1530  * ncpu steps.
1531  */
1532 static inline arena_t *
1533 choose_arena(void)
1534 {
1535 	unsigned i, curcpu;
1536 	arena_t **map;
1537 
1538 	map = get_arenas_map();
1539 	curcpu = thr_curcpu();
1540 	if (__predict_true(map != NULL && map[curcpu] != NULL))
1541 		return map[curcpu];
1542 
1543 	/* Initialize the current block of arenas and advance to next. */
1544 	malloc_mutex_lock(&arenas_mtx);
1545 	assert(next_arena % ncpus == 0);
1546 	assert(narenas % ncpus == 0);
1547 	map = &arenas[next_arena];
1548 	set_arenas_map(map);
1549 	for (i = 0; i < ncpus; i++) {
1550 		if (arenas[next_arena] == NULL)
1551 			arenas_extend(next_arena);
1552 		next_arena = (next_arena + 1) % narenas;
1553 	}
1554 	malloc_mutex_unlock(&arenas_mtx);
1555 
1556 	/*
1557 	 * If we were unable to allocate an arena above, then default to
1558 	 * the first arena, which is always present.
1559 	 */
1560 	curcpu = thr_curcpu();
1561 	if (map[curcpu] != NULL)
1562 		return map[curcpu];
1563 	return arenas[0];
1564 }
1565 
1566 #ifndef lint
1567 static inline int
1568 arena_chunk_comp(arena_chunk_t *a, arena_chunk_t *b)
1569 {
1570 
1571 	assert(a != NULL);
1572 	assert(b != NULL);
1573 
1574 	if ((uintptr_t)a < (uintptr_t)b)
1575 		return (-1);
1576 	else if (a == b)
1577 		return (0);
1578 	else
1579 		return (1);
1580 }
1581 
1582 /* Generate red-black tree code for arena chunks. */
1583 RB_GENERATE_STATIC(arena_chunk_tree_s, arena_chunk_s, link, arena_chunk_comp);
1584 #endif
1585 
1586 #ifndef lint
1587 static inline int
1588 arena_run_comp(arena_run_t *a, arena_run_t *b)
1589 {
1590 
1591 	assert(a != NULL);
1592 	assert(b != NULL);
1593 
1594 	if ((uintptr_t)a < (uintptr_t)b)
1595 		return (-1);
1596 	else if (a == b)
1597 		return (0);
1598 	else
1599 		return (1);
1600 }
1601 
1602 /* Generate red-black tree code for arena runs. */
1603 RB_GENERATE_STATIC(arena_run_tree_s, arena_run_s, link, arena_run_comp);
1604 #endif
1605 
1606 static inline void *
1607 arena_run_reg_alloc(arena_run_t *run, arena_bin_t *bin)
1608 {
1609 	void *ret;
1610 	unsigned i, mask, bit, regind;
1611 
1612 	assert(run->magic == ARENA_RUN_MAGIC);
1613 	assert(run->regs_minelm < bin->regs_mask_nelms);
1614 
1615 	/*
1616 	 * Move the first check outside the loop, so that run->regs_minelm can
1617 	 * be updated unconditionally, without the possibility of updating it
1618 	 * multiple times.
1619 	 */
1620 	i = run->regs_minelm;
1621 	mask = run->regs_mask[i];
1622 	if (mask != 0) {
1623 		/* Usable allocation found. */
1624 		bit = ffs((int)mask) - 1;
1625 
1626 		regind = ((i << (SIZEOF_INT_2POW + 3)) + bit);
1627 		ret = (void *)(((uintptr_t)run) + bin->reg0_offset
1628 		    + (bin->reg_size * regind));
1629 
1630 		/* Clear bit. */
1631 		mask ^= (1 << bit);
1632 		run->regs_mask[i] = mask;
1633 
1634 		return (ret);
1635 	}
1636 
1637 	for (i++; i < bin->regs_mask_nelms; i++) {
1638 		mask = run->regs_mask[i];
1639 		if (mask != 0) {
1640 			/* Usable allocation found. */
1641 			bit = ffs((int)mask) - 1;
1642 
1643 			regind = ((i << (SIZEOF_INT_2POW + 3)) + bit);
1644 			ret = (void *)(((uintptr_t)run) + bin->reg0_offset
1645 			    + (bin->reg_size * regind));
1646 
1647 			/* Clear bit. */
1648 			mask ^= (1 << bit);
1649 			run->regs_mask[i] = mask;
1650 
1651 			/*
1652 			 * Make a note that nothing before this element
1653 			 * contains a free region.
1654 			 */
1655 			run->regs_minelm = i; /* Low payoff: + (mask == 0); */
1656 
1657 			return (ret);
1658 		}
1659 	}
1660 	/* Not reached. */
1661 	/* LINTED */
1662 	assert(0);
1663 	return (NULL);
1664 }
1665 
1666 static inline void
1667 arena_run_reg_dalloc(arena_run_t *run, arena_bin_t *bin, void *ptr, size_t size)
1668 {
1669 	/*
1670 	 * To divide by a number D that is not a power of two we multiply
1671 	 * by (2^21 / D) and then right shift by 21 positions.
1672 	 *
1673 	 *   X / D
1674 	 *
1675 	 * becomes
1676 	 *
1677 	 *   (X * size_invs[(D >> QUANTUM_2POW_MIN) - 3]) >> SIZE_INV_SHIFT
1678 	 */
1679 #define SIZE_INV_SHIFT 21
1680 #define SIZE_INV(s) (((1 << SIZE_INV_SHIFT) / (s << QUANTUM_2POW_MIN)) + 1)
1681 	static const unsigned size_invs[] = {
1682 	    SIZE_INV(3),
1683 	    SIZE_INV(4), SIZE_INV(5), SIZE_INV(6), SIZE_INV(7),
1684 	    SIZE_INV(8), SIZE_INV(9), SIZE_INV(10), SIZE_INV(11),
1685 	    SIZE_INV(12),SIZE_INV(13), SIZE_INV(14), SIZE_INV(15),
1686 	    SIZE_INV(16),SIZE_INV(17), SIZE_INV(18), SIZE_INV(19),
1687 	    SIZE_INV(20),SIZE_INV(21), SIZE_INV(22), SIZE_INV(23),
1688 	    SIZE_INV(24),SIZE_INV(25), SIZE_INV(26), SIZE_INV(27),
1689 	    SIZE_INV(28),SIZE_INV(29), SIZE_INV(30), SIZE_INV(31)
1690 #if (QUANTUM_2POW_MIN < 4)
1691 	    ,
1692 	    SIZE_INV(32), SIZE_INV(33), SIZE_INV(34), SIZE_INV(35),
1693 	    SIZE_INV(36), SIZE_INV(37), SIZE_INV(38), SIZE_INV(39),
1694 	    SIZE_INV(40), SIZE_INV(41), SIZE_INV(42), SIZE_INV(43),
1695 	    SIZE_INV(44), SIZE_INV(45), SIZE_INV(46), SIZE_INV(47),
1696 	    SIZE_INV(48), SIZE_INV(49), SIZE_INV(50), SIZE_INV(51),
1697 	    SIZE_INV(52), SIZE_INV(53), SIZE_INV(54), SIZE_INV(55),
1698 	    SIZE_INV(56), SIZE_INV(57), SIZE_INV(58), SIZE_INV(59),
1699 	    SIZE_INV(60), SIZE_INV(61), SIZE_INV(62), SIZE_INV(63)
1700 #endif
1701 	};
1702 	unsigned diff, regind, elm, bit;
1703 
1704 	/* LINTED */
1705 	assert(run->magic == ARENA_RUN_MAGIC);
1706 	assert(((sizeof(size_invs)) / sizeof(unsigned)) + 3
1707 	    >= (SMALL_MAX_DEFAULT >> QUANTUM_2POW_MIN));
1708 
1709 	/*
1710 	 * Avoid doing division with a variable divisor if possible.  Using
1711 	 * actual division here can reduce allocator throughput by over 20%!
1712 	 */
1713 	diff = (unsigned)((uintptr_t)ptr - (uintptr_t)run - bin->reg0_offset);
1714 	if ((size & (size - 1)) == 0) {
1715 		/*
1716 		 * log2_table allows fast division of a power of two in the
1717 		 * [1..128] range.
1718 		 *
1719 		 * (x / divisor) becomes (x >> log2_table[divisor - 1]).
1720 		 */
1721 		static const unsigned char log2_table[] = {
1722 		    0, 1, 0, 2, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 4,
1723 		    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5,
1724 		    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1725 		    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6,
1726 		    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1727 		    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1728 		    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1729 		    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7
1730 		};
1731 
1732 		if (size <= 128)
1733 			regind = (diff >> log2_table[size - 1]);
1734 		else if (size <= 32768)
1735 			regind = diff >> (8 + log2_table[(size >> 8) - 1]);
1736 		else {
1737 			/*
1738 			 * The page size is too large for us to use the lookup
1739 			 * table.  Use real division.
1740 			 */
1741 			regind = (unsigned)(diff / size);
1742 		}
1743 	} else if (size <= ((sizeof(size_invs) / sizeof(unsigned))
1744 	    << QUANTUM_2POW_MIN) + 2) {
1745 		regind = size_invs[(size >> QUANTUM_2POW_MIN) - 3] * diff;
1746 		regind >>= SIZE_INV_SHIFT;
1747 	} else {
1748 		/*
1749 		 * size_invs isn't large enough to handle this size class, so
1750 		 * calculate regind using actual division.  This only happens
1751 		 * if the user increases small_max via the 'S' runtime
1752 		 * configuration option.
1753 		 */
1754 		regind = (unsigned)(diff / size);
1755 	};
1756 	assert(diff == regind * size);
1757 	assert(regind < bin->nregs);
1758 
1759 	elm = regind >> (SIZEOF_INT_2POW + 3);
1760 	if (elm < run->regs_minelm)
1761 		run->regs_minelm = elm;
1762 	bit = regind - (elm << (SIZEOF_INT_2POW + 3));
1763 	assert((run->regs_mask[elm] & (1 << bit)) == 0);
1764 	run->regs_mask[elm] |= (1 << bit);
1765 #undef SIZE_INV
1766 #undef SIZE_INV_SHIFT
1767 }
1768 
1769 static void
1770 arena_run_split(arena_t *arena, arena_run_t *run, size_t size)
1771 {
1772 	arena_chunk_t *chunk;
1773 	unsigned run_ind, map_offset, total_pages, need_pages, rem_pages;
1774 	unsigned i;
1775 
1776 	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
1777 	run_ind = (unsigned)(((uintptr_t)run - (uintptr_t)chunk)
1778 	    >> pagesize_2pow);
1779 	total_pages = chunk->map[run_ind].npages;
1780 	need_pages = (unsigned)(size >> pagesize_2pow);
1781 	assert(need_pages <= total_pages);
1782 	rem_pages = total_pages - need_pages;
1783 
1784 	/* Split enough pages from the front of run to fit allocation size. */
1785 	map_offset = run_ind;
1786 	for (i = 0; i < need_pages; i++) {
1787 		chunk->map[map_offset + i].npages = need_pages;
1788 		chunk->map[map_offset + i].pos = i;
1789 	}
1790 
1791 	/* Keep track of trailing unused pages for later use. */
1792 	if (rem_pages > 0) {
1793 		/* Update map for trailing pages. */
1794 		map_offset += need_pages;
1795 		chunk->map[map_offset].npages = rem_pages;
1796 		chunk->map[map_offset].pos = POS_FREE;
1797 		chunk->map[map_offset + rem_pages - 1].npages = rem_pages;
1798 		chunk->map[map_offset + rem_pages - 1].pos = POS_FREE;
1799 	}
1800 
1801 	chunk->pages_used += need_pages;
1802 }
1803 
1804 static arena_chunk_t *
1805 arena_chunk_alloc(arena_t *arena)
1806 {
1807 	arena_chunk_t *chunk;
1808 
1809 	if (arena->spare != NULL) {
1810 		chunk = arena->spare;
1811 		arena->spare = NULL;
1812 
1813 		/* LINTED */
1814 		RB_INSERT(arena_chunk_tree_s, &arena->chunks, chunk);
1815 	} else {
1816 		chunk = (arena_chunk_t *)chunk_alloc(chunksize);
1817 		if (chunk == NULL)
1818 			return (NULL);
1819 #ifdef MALLOC_STATS
1820 		arena->stats.mapped += chunksize;
1821 #endif
1822 
1823 		chunk->arena = arena;
1824 
1825 		/* LINTED */
1826 		RB_INSERT(arena_chunk_tree_s, &arena->chunks, chunk);
1827 
1828 		/*
1829 		 * Claim that no pages are in use, since the header is merely
1830 		 * overhead.
1831 		 */
1832 		chunk->pages_used = 0;
1833 
1834 		chunk->max_frun_npages = chunk_npages -
1835 		    arena_chunk_header_npages;
1836 		chunk->min_frun_ind = arena_chunk_header_npages;
1837 
1838 		/*
1839 		 * Initialize enough of the map to support one maximal free run.
1840 		 */
1841 		chunk->map[arena_chunk_header_npages].npages = chunk_npages -
1842 		    arena_chunk_header_npages;
1843 		chunk->map[arena_chunk_header_npages].pos = POS_FREE;
1844 		chunk->map[chunk_npages - 1].npages = chunk_npages -
1845 		    arena_chunk_header_npages;
1846 		chunk->map[chunk_npages - 1].pos = POS_FREE;
1847 	}
1848 
1849 	return (chunk);
1850 }
1851 
1852 static void
1853 arena_chunk_dealloc(arena_t *arena, arena_chunk_t *chunk)
1854 {
1855 
1856 	/*
1857 	 * Remove chunk from the chunk tree, regardless of whether this chunk
1858 	 * will be cached, so that the arena does not use it.
1859 	 */
1860 	/* LINTED */
1861 	RB_REMOVE(arena_chunk_tree_s, &chunk->arena->chunks, chunk);
1862 
1863 	if (opt_hint == false) {
1864 		if (arena->spare != NULL) {
1865 			chunk_dealloc((void *)arena->spare, chunksize);
1866 #ifdef MALLOC_STATS
1867 			arena->stats.mapped -= chunksize;
1868 #endif
1869 		}
1870 		arena->spare = chunk;
1871 	} else {
1872 		assert(arena->spare == NULL);
1873 		chunk_dealloc((void *)chunk, chunksize);
1874 #ifdef MALLOC_STATS
1875 		arena->stats.mapped -= chunksize;
1876 #endif
1877 	}
1878 }
1879 
1880 static arena_run_t *
1881 arena_run_alloc(arena_t *arena, size_t size)
1882 {
1883 	arena_chunk_t *chunk;
1884 	arena_run_t *run;
1885 	unsigned need_npages, limit_pages, compl_need_npages;
1886 
1887 	assert(size <= (chunksize - (arena_chunk_header_npages <<
1888 	    pagesize_2pow)));
1889 	assert((size & pagesize_mask) == 0);
1890 
1891 	/*
1892 	 * Search through arena's chunks in address order for a free run that is
1893 	 * large enough.  Look for the first fit.
1894 	 */
1895 	need_npages = (unsigned)(size >> pagesize_2pow);
1896 	limit_pages = chunk_npages - arena_chunk_header_npages;
1897 	compl_need_npages = limit_pages - need_npages;
1898 	/* LINTED */
1899 	RB_FOREACH(chunk, arena_chunk_tree_s, &arena->chunks) {
1900 		/*
1901 		 * Avoid searching this chunk if there are not enough
1902 		 * contiguous free pages for there to possibly be a large
1903 		 * enough free run.
1904 		 */
1905 		if (chunk->pages_used <= compl_need_npages &&
1906 		    need_npages <= chunk->max_frun_npages) {
1907 			arena_chunk_map_t *mapelm;
1908 			unsigned i;
1909 			unsigned max_frun_npages = 0;
1910 			unsigned min_frun_ind = chunk_npages;
1911 
1912 			assert(chunk->min_frun_ind >=
1913 			    arena_chunk_header_npages);
1914 			for (i = chunk->min_frun_ind; i < chunk_npages;) {
1915 				mapelm = &chunk->map[i];
1916 				if (mapelm->pos == POS_FREE) {
1917 					if (mapelm->npages >= need_npages) {
1918 						run = (arena_run_t *)
1919 						    ((uintptr_t)chunk + (i <<
1920 						    pagesize_2pow));
1921 						/* Update page map. */
1922 						arena_run_split(arena, run,
1923 						    size);
1924 						return (run);
1925 					}
1926 					if (mapelm->npages >
1927 					    max_frun_npages) {
1928 						max_frun_npages =
1929 						    mapelm->npages;
1930 					}
1931 					if (i < min_frun_ind) {
1932 						min_frun_ind = i;
1933 						if (i < chunk->min_frun_ind)
1934 							chunk->min_frun_ind = i;
1935 					}
1936 				}
1937 				i += mapelm->npages;
1938 			}
1939 			/*
1940 			 * Search failure.  Reset cached chunk->max_frun_npages.
1941 			 * chunk->min_frun_ind was already reset above (if
1942 			 * necessary).
1943 			 */
1944 			chunk->max_frun_npages = max_frun_npages;
1945 		}
1946 	}
1947 
1948 	/*
1949 	 * No usable runs.  Create a new chunk from which to allocate the run.
1950 	 */
1951 	chunk = arena_chunk_alloc(arena);
1952 	if (chunk == NULL)
1953 		return (NULL);
1954 	run = (arena_run_t *)((uintptr_t)chunk + (arena_chunk_header_npages <<
1955 	    pagesize_2pow));
1956 	/* Update page map. */
1957 	arena_run_split(arena, run, size);
1958 	return (run);
1959 }
1960 
1961 static void
1962 arena_run_dalloc(arena_t *arena, arena_run_t *run, size_t size)
1963 {
1964 	arena_chunk_t *chunk;
1965 	unsigned run_ind, run_pages;
1966 
1967 	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
1968 
1969 	run_ind = (unsigned)(((uintptr_t)run - (uintptr_t)chunk)
1970 	    >> pagesize_2pow);
1971 	assert(run_ind >= arena_chunk_header_npages);
1972 	assert(run_ind < (chunksize >> pagesize_2pow));
1973 	run_pages = (unsigned)(size >> pagesize_2pow);
1974 	assert(run_pages == chunk->map[run_ind].npages);
1975 
1976 	/* Subtract pages from count of pages used in chunk. */
1977 	chunk->pages_used -= run_pages;
1978 
1979 	/* Mark run as deallocated. */
1980 	assert(chunk->map[run_ind].npages == run_pages);
1981 	chunk->map[run_ind].pos = POS_FREE;
1982 	assert(chunk->map[run_ind + run_pages - 1].npages == run_pages);
1983 	chunk->map[run_ind + run_pages - 1].pos = POS_FREE;
1984 
1985 	/*
1986 	 * Tell the kernel that we don't need the data in this run, but only if
1987 	 * requested via runtime configuration.
1988 	 */
1989 	if (opt_hint)
1990 		madvise(run, size, MADV_FREE);
1991 
1992 	/* Try to coalesce with neighboring runs. */
1993 	if (run_ind > arena_chunk_header_npages &&
1994 	    chunk->map[run_ind - 1].pos == POS_FREE) {
1995 		unsigned prev_npages;
1996 
1997 		/* Coalesce with previous run. */
1998 		prev_npages = chunk->map[run_ind - 1].npages;
1999 		run_ind -= prev_npages;
2000 		assert(chunk->map[run_ind].npages == prev_npages);
2001 		assert(chunk->map[run_ind].pos == POS_FREE);
2002 		run_pages += prev_npages;
2003 
2004 		chunk->map[run_ind].npages = run_pages;
2005 		assert(chunk->map[run_ind].pos == POS_FREE);
2006 		chunk->map[run_ind + run_pages - 1].npages = run_pages;
2007 		assert(chunk->map[run_ind + run_pages - 1].pos == POS_FREE);
2008 	}
2009 
2010 	if (run_ind + run_pages < chunk_npages &&
2011 	    chunk->map[run_ind + run_pages].pos == POS_FREE) {
2012 		unsigned next_npages;
2013 
2014 		/* Coalesce with next run. */
2015 		next_npages = chunk->map[run_ind + run_pages].npages;
2016 		run_pages += next_npages;
2017 		assert(chunk->map[run_ind + run_pages - 1].npages ==
2018 		    next_npages);
2019 		assert(chunk->map[run_ind + run_pages - 1].pos == POS_FREE);
2020 
2021 		chunk->map[run_ind].npages = run_pages;
2022 		chunk->map[run_ind].pos = POS_FREE;
2023 		chunk->map[run_ind + run_pages - 1].npages = run_pages;
2024 		assert(chunk->map[run_ind + run_pages - 1].pos == POS_FREE);
2025 	}
2026 
2027 	if (chunk->map[run_ind].npages > chunk->max_frun_npages)
2028 		chunk->max_frun_npages = chunk->map[run_ind].npages;
2029 	if (run_ind < chunk->min_frun_ind)
2030 		chunk->min_frun_ind = run_ind;
2031 
2032 	/* Deallocate chunk if it is now completely unused. */
2033 	if (chunk->pages_used == 0)
2034 		arena_chunk_dealloc(arena, chunk);
2035 }
2036 
2037 static arena_run_t *
2038 arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin)
2039 {
2040 	arena_run_t *run;
2041 	unsigned i, remainder;
2042 
2043 	/* Look for a usable run. */
2044 	/* LINTED */
2045 	if ((run = RB_MIN(arena_run_tree_s, &bin->runs)) != NULL) {
2046 		/* run is guaranteed to have available space. */
2047 		/* LINTED */
2048 		RB_REMOVE(arena_run_tree_s, &bin->runs, run);
2049 #ifdef MALLOC_STATS
2050 		bin->stats.reruns++;
2051 #endif
2052 		return (run);
2053 	}
2054 	/* No existing runs have any space available. */
2055 
2056 	/* Allocate a new run. */
2057 	run = arena_run_alloc(arena, bin->run_size);
2058 	if (run == NULL)
2059 		return (NULL);
2060 
2061 	/* Initialize run internals. */
2062 	run->bin = bin;
2063 
2064 	for (i = 0; i < bin->regs_mask_nelms; i++)
2065 		run->regs_mask[i] = UINT_MAX;
2066 	remainder = bin->nregs & ((1 << (SIZEOF_INT_2POW + 3)) - 1);
2067 	if (remainder != 0) {
2068 		/* The last element has spare bits that need to be unset. */
2069 		run->regs_mask[i] = (UINT_MAX >> ((1 << (SIZEOF_INT_2POW + 3))
2070 		    - remainder));
2071 	}
2072 
2073 	run->regs_minelm = 0;
2074 
2075 	run->nfree = bin->nregs;
2076 #ifdef MALLOC_DEBUG
2077 	run->magic = ARENA_RUN_MAGIC;
2078 #endif
2079 
2080 #ifdef MALLOC_STATS
2081 	bin->stats.nruns++;
2082 	bin->stats.curruns++;
2083 	if (bin->stats.curruns > bin->stats.highruns)
2084 		bin->stats.highruns = bin->stats.curruns;
2085 #endif
2086 	return (run);
2087 }
2088 
2089 /* bin->runcur must have space available before this function is called. */
2090 static inline void *
2091 arena_bin_malloc_easy(arena_t *arena, arena_bin_t *bin, arena_run_t *run)
2092 {
2093 	void *ret;
2094 
2095 	assert(run->magic == ARENA_RUN_MAGIC);
2096 	assert(run->nfree > 0);
2097 
2098 	ret = arena_run_reg_alloc(run, bin);
2099 	assert(ret != NULL);
2100 	run->nfree--;
2101 
2102 	return (ret);
2103 }
2104 
2105 /* Re-fill bin->runcur, then call arena_bin_malloc_easy(). */
2106 static void *
2107 arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin)
2108 {
2109 
2110 	bin->runcur = arena_bin_nonfull_run_get(arena, bin);
2111 	if (bin->runcur == NULL)
2112 		return (NULL);
2113 	assert(bin->runcur->magic == ARENA_RUN_MAGIC);
2114 	assert(bin->runcur->nfree > 0);
2115 
2116 	return (arena_bin_malloc_easy(arena, bin, bin->runcur));
2117 }
2118 
2119 /*
2120  * Calculate bin->run_size such that it meets the following constraints:
2121  *
2122  *   *) bin->run_size >= min_run_size
2123  *   *) bin->run_size <= arena_maxclass
2124  *   *) bin->run_size <= RUN_MAX_SMALL
2125  *   *) run header overhead <= RUN_MAX_OVRHD (or header overhead relaxed).
2126  *
2127  * bin->nregs, bin->regs_mask_nelms, and bin->reg0_offset are
2128  * also calculated here, since these settings are all interdependent.
2129  */
2130 static size_t
2131 arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size)
2132 {
2133 	size_t try_run_size, good_run_size;
2134 	unsigned good_nregs, good_mask_nelms, good_reg0_offset;
2135 	unsigned try_nregs, try_mask_nelms, try_reg0_offset;
2136 	float max_ovrhd = RUN_MAX_OVRHD;
2137 
2138 	assert(min_run_size >= pagesize);
2139 	assert(min_run_size <= arena_maxclass);
2140 	assert(min_run_size <= RUN_MAX_SMALL);
2141 
2142 	/*
2143 	 * Calculate known-valid settings before entering the run_size
2144 	 * expansion loop, so that the first part of the loop always copies
2145 	 * valid settings.
2146 	 *
2147 	 * The do..while loop iteratively reduces the number of regions until
2148 	 * the run header and the regions no longer overlap.  A closed formula
2149 	 * would be quite messy, since there is an interdependency between the
2150 	 * header's mask length and the number of regions.
2151 	 */
2152 	try_run_size = min_run_size;
2153 	try_nregs = (unsigned)(((try_run_size - sizeof(arena_run_t)) /
2154 	    bin->reg_size) + 1); /* Counter-act the first line of the loop. */
2155 	do {
2156 		try_nregs--;
2157 		try_mask_nelms = (try_nregs >> (SIZEOF_INT_2POW + 3)) +
2158 		    ((try_nregs & ((1 << (SIZEOF_INT_2POW + 3)) - 1)) ? 1 : 0);
2159 		try_reg0_offset = (unsigned)(try_run_size -
2160 		    (try_nregs * bin->reg_size));
2161 	} while (sizeof(arena_run_t) + (sizeof(unsigned) * (try_mask_nelms - 1))
2162 	    > try_reg0_offset);
2163 
2164 	/* run_size expansion loop. */
2165 	do {
2166 		/*
2167 		 * Copy valid settings before trying more aggressive settings.
2168 		 */
2169 		good_run_size = try_run_size;
2170 		good_nregs = try_nregs;
2171 		good_mask_nelms = try_mask_nelms;
2172 		good_reg0_offset = try_reg0_offset;
2173 
2174 		/* Try more aggressive settings. */
2175 		try_run_size += pagesize;
2176 		try_nregs = (unsigned)(((try_run_size - sizeof(arena_run_t)) /
2177 		    bin->reg_size) + 1); /* Counter-act try_nregs-- in loop. */
2178 		do {
2179 			try_nregs--;
2180 			try_mask_nelms = (try_nregs >> (SIZEOF_INT_2POW + 3)) +
2181 			    ((try_nregs & ((1 << (SIZEOF_INT_2POW + 3)) - 1)) ?
2182 			    1 : 0);
2183 			try_reg0_offset = (unsigned)(try_run_size - (try_nregs *
2184 			    bin->reg_size));
2185 		} while (sizeof(arena_run_t) + (sizeof(unsigned) *
2186 		    (try_mask_nelms - 1)) > try_reg0_offset);
2187 	} while (try_run_size <= arena_maxclass && try_run_size <= RUN_MAX_SMALL
2188 	    && max_ovrhd > RUN_MAX_OVRHD_RELAX / ((float)(bin->reg_size << 3))
2189 	    && ((float)(try_reg0_offset)) / ((float)(try_run_size)) >
2190 	    max_ovrhd);
2191 
2192 	assert(sizeof(arena_run_t) + (sizeof(unsigned) * (good_mask_nelms - 1))
2193 	    <= good_reg0_offset);
2194 	assert((good_mask_nelms << (SIZEOF_INT_2POW + 3)) >= good_nregs);
2195 
2196 	/* Copy final settings. */
2197 	bin->run_size = good_run_size;
2198 	bin->nregs = good_nregs;
2199 	bin->regs_mask_nelms = good_mask_nelms;
2200 	bin->reg0_offset = good_reg0_offset;
2201 
2202 	return (good_run_size);
2203 }
2204 
2205 static void *
2206 arena_malloc(arena_t *arena, size_t size)
2207 {
2208 	void *ret;
2209 
2210 	assert(arena != NULL);
2211 	assert(arena->magic == ARENA_MAGIC);
2212 	assert(size != 0);
2213 	assert(QUANTUM_CEILING(size) <= arena_maxclass);
2214 
2215 	if (size <= bin_maxclass) {
2216 		arena_bin_t *bin;
2217 		arena_run_t *run;
2218 
2219 		/* Small allocation. */
2220 
2221 		if (size < small_min) {
2222 			/* Tiny. */
2223 			size = pow2_ceil(size);
2224 			bin = &arena->bins[ffs((int)(size >> (TINY_MIN_2POW +
2225 			    1)))];
2226 #if (!defined(NDEBUG) || defined(MALLOC_STATS))
2227 			/*
2228 			 * Bin calculation is always correct, but we may need
2229 			 * to fix size for the purposes of assertions and/or
2230 			 * stats accuracy.
2231 			 */
2232 			if (size < (1 << TINY_MIN_2POW))
2233 				size = (1 << TINY_MIN_2POW);
2234 #endif
2235 		} else if (size <= small_max) {
2236 			/* Quantum-spaced. */
2237 			size = QUANTUM_CEILING(size);
2238 			bin = &arena->bins[ntbins + (size >> opt_quantum_2pow)
2239 			    - 1];
2240 		} else {
2241 			/* Sub-page. */
2242 			size = pow2_ceil(size);
2243 			bin = &arena->bins[ntbins + nqbins
2244 			    + (ffs((int)(size >> opt_small_max_2pow)) - 2)];
2245 		}
2246 		assert(size == bin->reg_size);
2247 
2248 		malloc_mutex_lock(&arena->mtx);
2249 		if ((run = bin->runcur) != NULL && run->nfree > 0)
2250 			ret = arena_bin_malloc_easy(arena, bin, run);
2251 		else
2252 			ret = arena_bin_malloc_hard(arena, bin);
2253 
2254 		if (ret == NULL) {
2255 			malloc_mutex_unlock(&arena->mtx);
2256 			return (NULL);
2257 		}
2258 
2259 #ifdef MALLOC_STATS
2260 		bin->stats.nrequests++;
2261 		arena->stats.nmalloc_small++;
2262 		arena->stats.allocated_small += size;
2263 #endif
2264 	} else {
2265 		/* Large allocation. */
2266 		size = PAGE_CEILING(size);
2267 		malloc_mutex_lock(&arena->mtx);
2268 		ret = (void *)arena_run_alloc(arena, size);
2269 		if (ret == NULL) {
2270 			malloc_mutex_unlock(&arena->mtx);
2271 			return (NULL);
2272 		}
2273 #ifdef MALLOC_STATS
2274 		arena->stats.nmalloc_large++;
2275 		arena->stats.allocated_large += size;
2276 #endif
2277 	}
2278 
2279 	malloc_mutex_unlock(&arena->mtx);
2280 
2281 	if (opt_junk)
2282 		memset(ret, 0xa5, size);
2283 	else if (opt_zero)
2284 		memset(ret, 0, size);
2285 	return (ret);
2286 }
2287 
2288 static inline void
2289 arena_palloc_trim(arena_t *arena, arena_chunk_t *chunk, unsigned pageind,
2290     unsigned npages)
2291 {
2292 	unsigned i;
2293 
2294 	assert(npages > 0);
2295 
2296 	/*
2297 	 * Modifiy the map such that arena_run_dalloc() sees the run as
2298 	 * separately allocated.
2299 	 */
2300 	for (i = 0; i < npages; i++) {
2301 		chunk->map[pageind + i].npages = npages;
2302 		chunk->map[pageind + i].pos = i;
2303 	}
2304 	arena_run_dalloc(arena, (arena_run_t *)((uintptr_t)chunk + (pageind <<
2305 	    pagesize_2pow)), npages << pagesize_2pow);
2306 }
2307 
2308 /* Only handles large allocations that require more than page alignment. */
2309 static void *
2310 arena_palloc(arena_t *arena, size_t alignment, size_t size, size_t alloc_size)
2311 {
2312 	void *ret;
2313 	size_t offset;
2314 	arena_chunk_t *chunk;
2315 	unsigned pageind, i, npages;
2316 
2317 	assert((size & pagesize_mask) == 0);
2318 	assert((alignment & pagesize_mask) == 0);
2319 
2320 	npages = (unsigned)(size >> pagesize_2pow);
2321 
2322 	malloc_mutex_lock(&arena->mtx);
2323 	ret = (void *)arena_run_alloc(arena, alloc_size);
2324 	if (ret == NULL) {
2325 		malloc_mutex_unlock(&arena->mtx);
2326 		return (NULL);
2327 	}
2328 
2329 	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ret);
2330 
2331 	offset = (uintptr_t)ret & (alignment - 1);
2332 	assert((offset & pagesize_mask) == 0);
2333 	assert(offset < alloc_size);
2334 	if (offset == 0) {
2335 		pageind = (unsigned)(((uintptr_t)ret - (uintptr_t)chunk) >>
2336 		    pagesize_2pow);
2337 
2338 		/* Update the map for the run to be kept. */
2339 		for (i = 0; i < npages; i++) {
2340 			chunk->map[pageind + i].npages = npages;
2341 			assert(chunk->map[pageind + i].pos == i);
2342 		}
2343 
2344 		/* Trim trailing space. */
2345 		arena_palloc_trim(arena, chunk, pageind + npages,
2346 		    (unsigned)((alloc_size - size) >> pagesize_2pow));
2347 	} else {
2348 		size_t leadsize, trailsize;
2349 
2350 		leadsize = alignment - offset;
2351 		ret = (void *)((uintptr_t)ret + leadsize);
2352 		pageind = (unsigned)(((uintptr_t)ret - (uintptr_t)chunk) >>
2353 		    pagesize_2pow);
2354 
2355 		/* Update the map for the run to be kept. */
2356 		for (i = 0; i < npages; i++) {
2357 			chunk->map[pageind + i].npages = npages;
2358 			chunk->map[pageind + i].pos = i;
2359 		}
2360 
2361 		/* Trim leading space. */
2362 		arena_palloc_trim(arena, chunk,
2363 		    (unsigned)(pageind - (leadsize >> pagesize_2pow)),
2364 		    (unsigned)(leadsize >> pagesize_2pow));
2365 
2366 		trailsize = alloc_size - leadsize - size;
2367 		if (trailsize != 0) {
2368 			/* Trim trailing space. */
2369 			assert(trailsize < alloc_size);
2370 			arena_palloc_trim(arena, chunk, pageind + npages,
2371 			    (unsigned)(trailsize >> pagesize_2pow));
2372 		}
2373 	}
2374 
2375 #ifdef MALLOC_STATS
2376 	arena->stats.nmalloc_large++;
2377 	arena->stats.allocated_large += size;
2378 #endif
2379 	malloc_mutex_unlock(&arena->mtx);
2380 
2381 	if (opt_junk)
2382 		memset(ret, 0xa5, size);
2383 	else if (opt_zero)
2384 		memset(ret, 0, size);
2385 	return (ret);
2386 }
2387 
2388 /* Return the size of the allocation pointed to by ptr. */
2389 static size_t
2390 arena_salloc(const void *ptr)
2391 {
2392 	size_t ret;
2393 	arena_chunk_t *chunk;
2394 	arena_chunk_map_t *mapelm;
2395 	unsigned pageind;
2396 
2397 	assert(ptr != NULL);
2398 	assert(CHUNK_ADDR2BASE(ptr) != ptr);
2399 
2400 	/*
2401 	 * No arena data structures that we query here can change in a way that
2402 	 * affects this function, so we don't need to lock.
2403 	 */
2404 	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
2405 	pageind = (unsigned)(((uintptr_t)ptr - (uintptr_t)chunk) >>
2406 	    pagesize_2pow);
2407 	mapelm = &chunk->map[pageind];
2408 	if (mapelm->pos != 0 || ptr != (char *)((uintptr_t)chunk) + (pageind <<
2409 	    pagesize_2pow)) {
2410 		arena_run_t *run;
2411 
2412 		pageind -= mapelm->pos;
2413 
2414 		run = (arena_run_t *)((uintptr_t)chunk + (pageind <<
2415 		    pagesize_2pow));
2416 		assert(run->magic == ARENA_RUN_MAGIC);
2417 		ret = run->bin->reg_size;
2418 	} else
2419 		ret = mapelm->npages << pagesize_2pow;
2420 
2421 	return (ret);
2422 }
2423 
2424 static void *
2425 arena_ralloc(void *ptr, size_t size, size_t oldsize)
2426 {
2427 	void *ret;
2428 
2429 	/* Avoid moving the allocation if the size class would not change. */
2430 	if (size < small_min) {
2431 		if (oldsize < small_min &&
2432 		    ffs((int)(pow2_ceil(size) >> (TINY_MIN_2POW + 1)))
2433 		    == ffs((int)(pow2_ceil(oldsize) >> (TINY_MIN_2POW + 1))))
2434 			goto IN_PLACE;
2435 	} else if (size <= small_max) {
2436 		if (oldsize >= small_min && oldsize <= small_max &&
2437 		    (QUANTUM_CEILING(size) >> opt_quantum_2pow)
2438 		    == (QUANTUM_CEILING(oldsize) >> opt_quantum_2pow))
2439 			goto IN_PLACE;
2440 	} else {
2441 		/*
2442 		 * We make no attempt to resize runs here, though it would be
2443 		 * possible to do so.
2444 		 */
2445 		if (oldsize > small_max && PAGE_CEILING(size) == oldsize)
2446 			goto IN_PLACE;
2447 	}
2448 
2449 	/*
2450 	 * If we get here, then size and oldsize are different enough that we
2451 	 * need to use a different size class.  In that case, fall back to
2452 	 * allocating new space and copying.
2453 	 */
2454 	ret = arena_malloc(choose_arena(), size);
2455 	if (ret == NULL)
2456 		return (NULL);
2457 
2458 	/* Junk/zero-filling were already done by arena_malloc(). */
2459 	if (size < oldsize)
2460 		memcpy(ret, ptr, size);
2461 	else
2462 		memcpy(ret, ptr, oldsize);
2463 	idalloc(ptr);
2464 	return (ret);
2465 IN_PLACE:
2466 	if (opt_junk && size < oldsize)
2467 		memset((void *)((uintptr_t)ptr + size), 0x5a, oldsize - size);
2468 	else if (opt_zero && size > oldsize)
2469 		memset((void *)((uintptr_t)ptr + oldsize), 0, size - oldsize);
2470 	return (ptr);
2471 }
2472 
2473 static void
2474 arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr)
2475 {
2476 	unsigned pageind;
2477 	arena_chunk_map_t *mapelm;
2478 	size_t size;
2479 
2480 	assert(arena != NULL);
2481 	assert(arena->magic == ARENA_MAGIC);
2482 	assert(chunk->arena == arena);
2483 	assert(ptr != NULL);
2484 	assert(CHUNK_ADDR2BASE(ptr) != ptr);
2485 
2486 	pageind = (unsigned)(((uintptr_t)ptr - (uintptr_t)chunk) >>
2487 	    pagesize_2pow);
2488 	mapelm = &chunk->map[pageind];
2489 	if (mapelm->pos != 0 || ptr != (char *)((uintptr_t)chunk) + (pageind <<
2490 	    pagesize_2pow)) {
2491 		arena_run_t *run;
2492 		arena_bin_t *bin;
2493 
2494 		/* Small allocation. */
2495 
2496 		pageind -= mapelm->pos;
2497 
2498 		run = (arena_run_t *)((uintptr_t)chunk + (pageind <<
2499 		    pagesize_2pow));
2500 		assert(run->magic == ARENA_RUN_MAGIC);
2501 		bin = run->bin;
2502 		size = bin->reg_size;
2503 
2504 		if (opt_junk)
2505 			memset(ptr, 0x5a, size);
2506 
2507 		malloc_mutex_lock(&arena->mtx);
2508 		arena_run_reg_dalloc(run, bin, ptr, size);
2509 		run->nfree++;
2510 
2511 		if (run->nfree == bin->nregs) {
2512 			/* Deallocate run. */
2513 			if (run == bin->runcur)
2514 				bin->runcur = NULL;
2515 			else if (bin->nregs != 1) {
2516 				/*
2517 				 * This block's conditional is necessary because
2518 				 * if the run only contains one region, then it
2519 				 * never gets inserted into the non-full runs
2520 				 * tree.
2521 				 */
2522 				/* LINTED */
2523 				RB_REMOVE(arena_run_tree_s, &bin->runs, run);
2524 			}
2525 #ifdef MALLOC_DEBUG
2526 			run->magic = 0;
2527 #endif
2528 			arena_run_dalloc(arena, run, bin->run_size);
2529 #ifdef MALLOC_STATS
2530 			bin->stats.curruns--;
2531 #endif
2532 		} else if (run->nfree == 1 && run != bin->runcur) {
2533 			/*
2534 			 * Make sure that bin->runcur always refers to the
2535 			 * lowest non-full run, if one exists.
2536 			 */
2537 			if (bin->runcur == NULL)
2538 				bin->runcur = run;
2539 			else if ((uintptr_t)run < (uintptr_t)bin->runcur) {
2540 				/* Switch runcur. */
2541 				if (bin->runcur->nfree > 0) {
2542 					/* Insert runcur. */
2543 					/* LINTED */
2544 					RB_INSERT(arena_run_tree_s, &bin->runs,
2545 					    bin->runcur);
2546 				}
2547 				bin->runcur = run;
2548 			} else {
2549 				/* LINTED */
2550 				RB_INSERT(arena_run_tree_s, &bin->runs, run);
2551 			}
2552 		}
2553 #ifdef MALLOC_STATS
2554 		arena->stats.allocated_small -= size;
2555 		arena->stats.ndalloc_small++;
2556 #endif
2557 	} else {
2558 		/* Large allocation. */
2559 
2560 		size = mapelm->npages << pagesize_2pow;
2561 		assert((((uintptr_t)ptr) & pagesize_mask) == 0);
2562 
2563 		if (opt_junk)
2564 			memset(ptr, 0x5a, size);
2565 
2566 		malloc_mutex_lock(&arena->mtx);
2567 		arena_run_dalloc(arena, (arena_run_t *)ptr, size);
2568 #ifdef MALLOC_STATS
2569 		arena->stats.allocated_large -= size;
2570 		arena->stats.ndalloc_large++;
2571 #endif
2572 	}
2573 
2574 	malloc_mutex_unlock(&arena->mtx);
2575 }
2576 
2577 static bool
2578 arena_new(arena_t *arena)
2579 {
2580 	unsigned i;
2581 	arena_bin_t *bin;
2582 	size_t prev_run_size;
2583 
2584 	malloc_mutex_init(&arena->mtx);
2585 
2586 #ifdef MALLOC_STATS
2587 	memset(&arena->stats, 0, sizeof(arena_stats_t));
2588 #endif
2589 
2590 	/* Initialize chunks. */
2591 	RB_INIT(&arena->chunks);
2592 	arena->spare = NULL;
2593 
2594 	/* Initialize bins. */
2595 	prev_run_size = pagesize;
2596 
2597 	/* (2^n)-spaced tiny bins. */
2598 	for (i = 0; i < ntbins; i++) {
2599 		bin = &arena->bins[i];
2600 		bin->runcur = NULL;
2601 		RB_INIT(&bin->runs);
2602 
2603 		bin->reg_size = (1 << (TINY_MIN_2POW + i));
2604 		prev_run_size = arena_bin_run_size_calc(bin, prev_run_size);
2605 
2606 #ifdef MALLOC_STATS
2607 		memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
2608 #endif
2609 	}
2610 
2611 	/* Quantum-spaced bins. */
2612 	for (; i < ntbins + nqbins; i++) {
2613 		bin = &arena->bins[i];
2614 		bin->runcur = NULL;
2615 		RB_INIT(&bin->runs);
2616 
2617 		bin->reg_size = quantum * (i - ntbins + 1);
2618 /*
2619 		pow2_size = pow2_ceil(quantum * (i - ntbins + 1));
2620 */
2621 		prev_run_size = arena_bin_run_size_calc(bin, prev_run_size);
2622 
2623 #ifdef MALLOC_STATS
2624 		memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
2625 #endif
2626 	}
2627 
2628 	/* (2^n)-spaced sub-page bins. */
2629 	for (; i < ntbins + nqbins + nsbins; i++) {
2630 		bin = &arena->bins[i];
2631 		bin->runcur = NULL;
2632 		RB_INIT(&bin->runs);
2633 
2634 		bin->reg_size = (small_max << (i - (ntbins + nqbins) + 1));
2635 
2636 		prev_run_size = arena_bin_run_size_calc(bin, prev_run_size);
2637 
2638 #ifdef MALLOC_STATS
2639 		memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
2640 #endif
2641 	}
2642 
2643 #ifdef MALLOC_DEBUG
2644 	arena->magic = ARENA_MAGIC;
2645 #endif
2646 
2647 	return (false);
2648 }
2649 
2650 /* Create a new arena and insert it into the arenas array at index ind. */
2651 static arena_t *
2652 arenas_extend(unsigned ind)
2653 {
2654 	arena_t *ret;
2655 
2656 	/* Allocate enough space for trailing bins. */
2657 	ret = (arena_t *)base_alloc(sizeof(arena_t)
2658 	    + (sizeof(arena_bin_t) * (ntbins + nqbins + nsbins - 1)));
2659 	if (ret != NULL && arena_new(ret) == false) {
2660 		arenas[ind] = ret;
2661 		return (ret);
2662 	}
2663 	/* Only reached if there is an OOM error. */
2664 
2665 	/*
2666 	 * OOM here is quite inconvenient to propagate, since dealing with it
2667 	 * would require a check for failure in the fast path.  Instead, punt
2668 	 * by using arenas[0].  In practice, this is an extremely unlikely
2669 	 * failure.
2670 	 */
2671 	_malloc_message(getprogname(),
2672 	    ": (malloc) Error initializing arena\n", "", "");
2673 	if (opt_abort)
2674 		abort();
2675 
2676 	return (arenas[0]);
2677 }
2678 
2679 /*
2680  * End arena.
2681  */
2682 /******************************************************************************/
2683 /*
2684  * Begin general internal functions.
2685  */
2686 
2687 static void *
2688 huge_malloc(size_t size)
2689 {
2690 	void *ret;
2691 	size_t csize;
2692 	chunk_node_t *node;
2693 
2694 	/* Allocate one or more contiguous chunks for this request. */
2695 
2696 	csize = CHUNK_CEILING(size);
2697 	if (csize == 0) {
2698 		/* size is large enough to cause size_t wrap-around. */
2699 		return (NULL);
2700 	}
2701 
2702 	/* Allocate a chunk node with which to track the chunk. */
2703 	node = base_chunk_node_alloc();
2704 	if (node == NULL)
2705 		return (NULL);
2706 
2707 	ret = chunk_alloc(csize);
2708 	if (ret == NULL) {
2709 		base_chunk_node_dealloc(node);
2710 		return (NULL);
2711 	}
2712 
2713 	/* Insert node into huge. */
2714 	node->chunk = ret;
2715 	node->size = csize;
2716 
2717 	malloc_mutex_lock(&chunks_mtx);
2718 	RB_INSERT(chunk_tree_s, &huge, node);
2719 #ifdef MALLOC_STATS
2720 	huge_nmalloc++;
2721 	huge_allocated += csize;
2722 #endif
2723 	malloc_mutex_unlock(&chunks_mtx);
2724 
2725 	if (opt_junk)
2726 		memset(ret, 0xa5, csize);
2727 	else if (opt_zero)
2728 		memset(ret, 0, csize);
2729 
2730 	return (ret);
2731 }
2732 
2733 /* Only handles large allocations that require more than chunk alignment. */
2734 static void *
2735 huge_palloc(size_t alignment, size_t size)
2736 {
2737 	void *ret;
2738 	size_t alloc_size, chunk_size, offset;
2739 	chunk_node_t *node;
2740 
2741 	/*
2742 	 * This allocation requires alignment that is even larger than chunk
2743 	 * alignment.  This means that huge_malloc() isn't good enough.
2744 	 *
2745 	 * Allocate almost twice as many chunks as are demanded by the size or
2746 	 * alignment, in order to assure the alignment can be achieved, then
2747 	 * unmap leading and trailing chunks.
2748 	 */
2749 	assert(alignment >= chunksize);
2750 
2751 	chunk_size = CHUNK_CEILING(size);
2752 
2753 	if (size >= alignment)
2754 		alloc_size = chunk_size + alignment - chunksize;
2755 	else
2756 		alloc_size = (alignment << 1) - chunksize;
2757 
2758 	/* Allocate a chunk node with which to track the chunk. */
2759 	node = base_chunk_node_alloc();
2760 	if (node == NULL)
2761 		return (NULL);
2762 
2763 	ret = chunk_alloc(alloc_size);
2764 	if (ret == NULL) {
2765 		base_chunk_node_dealloc(node);
2766 		return (NULL);
2767 	}
2768 
2769 	offset = (uintptr_t)ret & (alignment - 1);
2770 	assert((offset & chunksize_mask) == 0);
2771 	assert(offset < alloc_size);
2772 	if (offset == 0) {
2773 		/* Trim trailing space. */
2774 		chunk_dealloc((void *)((uintptr_t)ret + chunk_size), alloc_size
2775 		    - chunk_size);
2776 	} else {
2777 		size_t trailsize;
2778 
2779 		/* Trim leading space. */
2780 		chunk_dealloc(ret, alignment - offset);
2781 
2782 		ret = (void *)((uintptr_t)ret + (alignment - offset));
2783 
2784 		trailsize = alloc_size - (alignment - offset) - chunk_size;
2785 		if (trailsize != 0) {
2786 		    /* Trim trailing space. */
2787 		    assert(trailsize < alloc_size);
2788 		    chunk_dealloc((void *)((uintptr_t)ret + chunk_size),
2789 			trailsize);
2790 		}
2791 	}
2792 
2793 	/* Insert node into huge. */
2794 	node->chunk = ret;
2795 	node->size = chunk_size;
2796 
2797 	malloc_mutex_lock(&chunks_mtx);
2798 	RB_INSERT(chunk_tree_s, &huge, node);
2799 #ifdef MALLOC_STATS
2800 	huge_nmalloc++;
2801 	huge_allocated += chunk_size;
2802 #endif
2803 	malloc_mutex_unlock(&chunks_mtx);
2804 
2805 	if (opt_junk)
2806 		memset(ret, 0xa5, chunk_size);
2807 	else if (opt_zero)
2808 		memset(ret, 0, chunk_size);
2809 
2810 	return (ret);
2811 }
2812 
2813 static void *
2814 huge_ralloc(void *ptr, size_t size, size_t oldsize)
2815 {
2816 	void *ret;
2817 
2818 	/* Avoid moving the allocation if the size class would not change. */
2819 	if (oldsize > arena_maxclass &&
2820 	    CHUNK_CEILING(size) == CHUNK_CEILING(oldsize)) {
2821 		if (opt_junk && size < oldsize) {
2822 			memset((void *)((uintptr_t)ptr + size), 0x5a, oldsize
2823 			    - size);
2824 		} else if (opt_zero && size > oldsize) {
2825 			memset((void *)((uintptr_t)ptr + oldsize), 0, size
2826 			    - oldsize);
2827 		}
2828 		return (ptr);
2829 	}
2830 
2831 	if (CHUNK_ADDR2BASE(ptr) == ptr
2832 #ifdef USE_BRK
2833 	    && ((uintptr_t)ptr < (uintptr_t)brk_base
2834 	    || (uintptr_t)ptr >= (uintptr_t)brk_max)
2835 #endif
2836 	    ) {
2837 		chunk_node_t *node, key;
2838 		void *newptr;
2839 		size_t oldcsize;
2840 		size_t newcsize;
2841 
2842 		newcsize = CHUNK_CEILING(size);
2843 		oldcsize = CHUNK_CEILING(oldsize);
2844 		assert(oldcsize != newcsize);
2845 		if (newcsize == 0) {
2846 			/* size_t wrap-around */
2847 			return (NULL);
2848 		}
2849 		newptr = mremap(ptr, oldcsize, NULL, newcsize,
2850 		    MAP_ALIGNED(chunksize_2pow));
2851 		if (newptr != MAP_FAILED) {
2852 			assert(CHUNK_ADDR2BASE(newptr) == newptr);
2853 
2854 			/* update tree */
2855 			malloc_mutex_lock(&chunks_mtx);
2856 			key.chunk = __DECONST(void *, ptr);
2857 			/* LINTED */
2858 			node = RB_FIND(chunk_tree_s, &huge, &key);
2859 			assert(node != NULL);
2860 			assert(node->chunk == ptr);
2861 			assert(node->size == oldcsize);
2862 			node->size = newcsize;
2863 			if (ptr != newptr) {
2864 				RB_REMOVE(chunk_tree_s, &huge, node);
2865 				node->chunk = newptr;
2866 				RB_INSERT(chunk_tree_s, &huge, node);
2867 			}
2868 #ifdef MALLOC_STATS
2869 			huge_nralloc++;
2870 			huge_allocated += newcsize - oldcsize;
2871 			if (newcsize > oldcsize) {
2872 				stats_chunks.curchunks +=
2873 				    (newcsize - oldcsize) / chunksize;
2874 				if (stats_chunks.curchunks >
2875 				    stats_chunks.highchunks)
2876 					stats_chunks.highchunks =
2877 					    stats_chunks.curchunks;
2878 			} else {
2879 				stats_chunks.curchunks -=
2880 				    (oldcsize - newcsize) / chunksize;
2881 			}
2882 #endif
2883 			malloc_mutex_unlock(&chunks_mtx);
2884 
2885 			if (opt_junk && size < oldsize) {
2886 				memset((void *)((uintptr_t)newptr + size), 0x5a,
2887 				    newcsize - size);
2888 			} else if (opt_zero && size > oldsize) {
2889 				memset((void *)((uintptr_t)newptr + oldsize), 0,
2890 				    size - oldsize);
2891 			}
2892 			return (newptr);
2893 		}
2894 	}
2895 
2896 	/*
2897 	 * If we get here, then size and oldsize are different enough that we
2898 	 * need to use a different size class.  In that case, fall back to
2899 	 * allocating new space and copying.
2900 	 */
2901 	ret = huge_malloc(size);
2902 	if (ret == NULL)
2903 		return (NULL);
2904 
2905 	if (CHUNK_ADDR2BASE(ptr) == ptr) {
2906 		/* The old allocation is a chunk. */
2907 		if (size < oldsize)
2908 			memcpy(ret, ptr, size);
2909 		else
2910 			memcpy(ret, ptr, oldsize);
2911 	} else {
2912 		/* The old allocation is a region. */
2913 		assert(oldsize < size);
2914 		memcpy(ret, ptr, oldsize);
2915 	}
2916 	idalloc(ptr);
2917 	return (ret);
2918 }
2919 
2920 static void
2921 huge_dalloc(void *ptr)
2922 {
2923 	chunk_node_t key;
2924 	chunk_node_t *node;
2925 
2926 	malloc_mutex_lock(&chunks_mtx);
2927 
2928 	/* Extract from tree of huge allocations. */
2929 	key.chunk = ptr;
2930 	/* LINTED */
2931 	node = RB_FIND(chunk_tree_s, &huge, &key);
2932 	assert(node != NULL);
2933 	assert(node->chunk == ptr);
2934 	/* LINTED */
2935 	RB_REMOVE(chunk_tree_s, &huge, node);
2936 
2937 #ifdef MALLOC_STATS
2938 	huge_ndalloc++;
2939 	huge_allocated -= node->size;
2940 #endif
2941 
2942 	malloc_mutex_unlock(&chunks_mtx);
2943 
2944 	/* Unmap chunk. */
2945 #ifdef USE_BRK
2946 	if (opt_junk)
2947 		memset(node->chunk, 0x5a, node->size);
2948 #endif
2949 	chunk_dealloc(node->chunk, node->size);
2950 
2951 	base_chunk_node_dealloc(node);
2952 }
2953 
2954 static void *
2955 imalloc(size_t size)
2956 {
2957 	void *ret;
2958 
2959 	assert(size != 0);
2960 
2961 	if (size <= arena_maxclass)
2962 		ret = arena_malloc(choose_arena(), size);
2963 	else
2964 		ret = huge_malloc(size);
2965 
2966 	return (ret);
2967 }
2968 
2969 static void *
2970 ipalloc(size_t alignment, size_t size)
2971 {
2972 	void *ret;
2973 	size_t ceil_size;
2974 
2975 	/*
2976 	 * Round size up to the nearest multiple of alignment.
2977 	 *
2978 	 * This done, we can take advantage of the fact that for each small
2979 	 * size class, every object is aligned at the smallest power of two
2980 	 * that is non-zero in the base two representation of the size.  For
2981 	 * example:
2982 	 *
2983 	 *   Size |   Base 2 | Minimum alignment
2984 	 *   -----+----------+------------------
2985 	 *     96 |  1100000 |  32
2986 	 *    144 | 10100000 |  32
2987 	 *    192 | 11000000 |  64
2988 	 *
2989 	 * Depending on runtime settings, it is possible that arena_malloc()
2990 	 * will further round up to a power of two, but that never causes
2991 	 * correctness issues.
2992 	 */
2993 	ceil_size = (size + (alignment - 1)) & (-alignment);
2994 	/*
2995 	 * (ceil_size < size) protects against the combination of maximal
2996 	 * alignment and size greater than maximal alignment.
2997 	 */
2998 	if (ceil_size < size) {
2999 		/* size_t overflow. */
3000 		return (NULL);
3001 	}
3002 
3003 	if (ceil_size <= pagesize || (alignment <= pagesize
3004 	    && ceil_size <= arena_maxclass))
3005 		ret = arena_malloc(choose_arena(), ceil_size);
3006 	else {
3007 		size_t run_size;
3008 
3009 		/*
3010 		 * We can't achieve sub-page alignment, so round up alignment
3011 		 * permanently; it makes later calculations simpler.
3012 		 */
3013 		alignment = PAGE_CEILING(alignment);
3014 		ceil_size = PAGE_CEILING(size);
3015 		/*
3016 		 * (ceil_size < size) protects against very large sizes within
3017 		 * pagesize of SIZE_T_MAX.
3018 		 *
3019 		 * (ceil_size + alignment < ceil_size) protects against the
3020 		 * combination of maximal alignment and ceil_size large enough
3021 		 * to cause overflow.  This is similar to the first overflow
3022 		 * check above, but it needs to be repeated due to the new
3023 		 * ceil_size value, which may now be *equal* to maximal
3024 		 * alignment, whereas before we only detected overflow if the
3025 		 * original size was *greater* than maximal alignment.
3026 		 */
3027 		if (ceil_size < size || ceil_size + alignment < ceil_size) {
3028 			/* size_t overflow. */
3029 			return (NULL);
3030 		}
3031 
3032 		/*
3033 		 * Calculate the size of the over-size run that arena_palloc()
3034 		 * would need to allocate in order to guarantee the alignment.
3035 		 */
3036 		if (ceil_size >= alignment)
3037 			run_size = ceil_size + alignment - pagesize;
3038 		else {
3039 			/*
3040 			 * It is possible that (alignment << 1) will cause
3041 			 * overflow, but it doesn't matter because we also
3042 			 * subtract pagesize, which in the case of overflow
3043 			 * leaves us with a very large run_size.  That causes
3044 			 * the first conditional below to fail, which means
3045 			 * that the bogus run_size value never gets used for
3046 			 * anything important.
3047 			 */
3048 			run_size = (alignment << 1) - pagesize;
3049 		}
3050 
3051 		if (run_size <= arena_maxclass) {
3052 			ret = arena_palloc(choose_arena(), alignment, ceil_size,
3053 			    run_size);
3054 		} else if (alignment <= chunksize)
3055 			ret = huge_malloc(ceil_size);
3056 		else
3057 			ret = huge_palloc(alignment, ceil_size);
3058 	}
3059 
3060 	assert(((uintptr_t)ret & (alignment - 1)) == 0);
3061 	return (ret);
3062 }
3063 
3064 static void *
3065 icalloc(size_t size)
3066 {
3067 	void *ret;
3068 
3069 	if (size <= arena_maxclass) {
3070 		ret = arena_malloc(choose_arena(), size);
3071 		if (ret == NULL)
3072 			return (NULL);
3073 		memset(ret, 0, size);
3074 	} else {
3075 		/*
3076 		 * The virtual memory system provides zero-filled pages, so
3077 		 * there is no need to do so manually, unless opt_junk is
3078 		 * enabled, in which case huge_malloc() fills huge allocations
3079 		 * with junk.
3080 		 */
3081 		ret = huge_malloc(size);
3082 		if (ret == NULL)
3083 			return (NULL);
3084 
3085 		if (opt_junk)
3086 			memset(ret, 0, size);
3087 #ifdef USE_BRK
3088 		else if ((uintptr_t)ret >= (uintptr_t)brk_base
3089 		    && (uintptr_t)ret < (uintptr_t)brk_max) {
3090 			/*
3091 			 * This may be a re-used brk chunk.  Therefore, zero
3092 			 * the memory.
3093 			 */
3094 			memset(ret, 0, size);
3095 		}
3096 #endif
3097 	}
3098 
3099 	return (ret);
3100 }
3101 
3102 static size_t
3103 isalloc(const void *ptr)
3104 {
3105 	size_t ret;
3106 	arena_chunk_t *chunk;
3107 
3108 	assert(ptr != NULL);
3109 
3110 	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
3111 	if (chunk != ptr) {
3112 		/* Region. */
3113 		assert(chunk->arena->magic == ARENA_MAGIC);
3114 
3115 		ret = arena_salloc(ptr);
3116 	} else {
3117 		chunk_node_t *node, key;
3118 
3119 		/* Chunk (huge allocation). */
3120 
3121 		malloc_mutex_lock(&chunks_mtx);
3122 
3123 		/* Extract from tree of huge allocations. */
3124 		key.chunk = __DECONST(void *, ptr);
3125 		/* LINTED */
3126 		node = RB_FIND(chunk_tree_s, &huge, &key);
3127 		assert(node != NULL);
3128 
3129 		ret = node->size;
3130 
3131 		malloc_mutex_unlock(&chunks_mtx);
3132 	}
3133 
3134 	return (ret);
3135 }
3136 
3137 static void *
3138 iralloc(void *ptr, size_t size)
3139 {
3140 	void *ret;
3141 	size_t oldsize;
3142 
3143 	assert(ptr != NULL);
3144 	assert(size != 0);
3145 
3146 	oldsize = isalloc(ptr);
3147 
3148 	if (size <= arena_maxclass)
3149 		ret = arena_ralloc(ptr, size, oldsize);
3150 	else
3151 		ret = huge_ralloc(ptr, size, oldsize);
3152 
3153 	return (ret);
3154 }
3155 
3156 static void
3157 idalloc(void *ptr)
3158 {
3159 	arena_chunk_t *chunk;
3160 
3161 	assert(ptr != NULL);
3162 
3163 	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
3164 	if (chunk != ptr) {
3165 		/* Region. */
3166 		arena_dalloc(chunk->arena, chunk, ptr);
3167 	} else
3168 		huge_dalloc(ptr);
3169 }
3170 
3171 static void
3172 malloc_print_stats(void)
3173 {
3174 
3175 	if (opt_print_stats) {
3176 		char s[UMAX2S_BUFSIZE];
3177 		_malloc_message("___ Begin malloc statistics ___\n", "", "",
3178 		    "");
3179 		_malloc_message("Assertions ",
3180 #ifdef NDEBUG
3181 		    "disabled",
3182 #else
3183 		    "enabled",
3184 #endif
3185 		    "\n", "");
3186 		_malloc_message("Boolean MALLOC_OPTIONS: ",
3187 		    opt_abort ? "A" : "a",
3188 		    opt_junk ? "J" : "j",
3189 		    opt_hint ? "H" : "h");
3190 		_malloc_message(opt_utrace ? "PU" : "Pu",
3191 		    opt_sysv ? "V" : "v",
3192 		    opt_xmalloc ? "X" : "x",
3193 		    opt_zero ? "Z\n" : "z\n");
3194 
3195 		_malloc_message("CPUs: ", umax2s(ncpus, s), "\n", "");
3196 		_malloc_message("Max arenas: ", umax2s(narenas, s), "\n", "");
3197 		_malloc_message("Pointer size: ", umax2s(sizeof(void *), s),
3198 		    "\n", "");
3199 		_malloc_message("Quantum size: ", umax2s(quantum, s), "\n", "");
3200 		_malloc_message("Max small size: ", umax2s(small_max, s), "\n",
3201 		    "");
3202 
3203 		_malloc_message("Chunk size: ", umax2s(chunksize, s), "", "");
3204 		_malloc_message(" (2^", umax2s(opt_chunk_2pow, s), ")\n", "");
3205 
3206 #ifdef MALLOC_STATS
3207 		{
3208 			size_t allocated, mapped;
3209 			unsigned i;
3210 			arena_t *arena;
3211 
3212 			/* Calculate and print allocated/mapped stats. */
3213 
3214 			/* arenas. */
3215 			for (i = 0, allocated = 0; i < narenas; i++) {
3216 				if (arenas[i] != NULL) {
3217 					malloc_mutex_lock(&arenas[i]->mtx);
3218 					allocated +=
3219 					    arenas[i]->stats.allocated_small;
3220 					allocated +=
3221 					    arenas[i]->stats.allocated_large;
3222 					malloc_mutex_unlock(&arenas[i]->mtx);
3223 				}
3224 			}
3225 
3226 			/* huge/base. */
3227 			malloc_mutex_lock(&chunks_mtx);
3228 			allocated += huge_allocated;
3229 			mapped = stats_chunks.curchunks * chunksize;
3230 			malloc_mutex_unlock(&chunks_mtx);
3231 
3232 			malloc_mutex_lock(&base_mtx);
3233 			mapped += base_mapped;
3234 			malloc_mutex_unlock(&base_mtx);
3235 
3236 			malloc_printf("Allocated: %zu, mapped: %zu\n",
3237 			    allocated, mapped);
3238 
3239 			/* Print chunk stats. */
3240 			{
3241 				chunk_stats_t chunks_stats;
3242 
3243 				malloc_mutex_lock(&chunks_mtx);
3244 				chunks_stats = stats_chunks;
3245 				malloc_mutex_unlock(&chunks_mtx);
3246 
3247 				malloc_printf("chunks: nchunks   "
3248 				    "highchunks    curchunks\n");
3249 				malloc_printf("  %13llu%13lu%13lu\n",
3250 				    chunks_stats.nchunks,
3251 				    chunks_stats.highchunks,
3252 				    chunks_stats.curchunks);
3253 			}
3254 
3255 			/* Print chunk stats. */
3256 			malloc_printf(
3257 			    "huge: nmalloc      ndalloc      "
3258 			    "nralloc    allocated\n");
3259 			malloc_printf(" %12llu %12llu %12llu %12zu\n",
3260 			    huge_nmalloc, huge_ndalloc, huge_nralloc,
3261 			    huge_allocated);
3262 
3263 			/* Print stats for each arena. */
3264 			for (i = 0; i < narenas; i++) {
3265 				arena = arenas[i];
3266 				if (arena != NULL) {
3267 					malloc_printf(
3268 					    "\narenas[%u] @ %p\n", i, arena);
3269 					malloc_mutex_lock(&arena->mtx);
3270 					stats_print(arena);
3271 					malloc_mutex_unlock(&arena->mtx);
3272 				}
3273 			}
3274 		}
3275 #endif /* #ifdef MALLOC_STATS */
3276 		_malloc_message("--- End malloc statistics ---\n", "", "", "");
3277 	}
3278 }
3279 
3280 /*
3281  * FreeBSD's pthreads implementation calls malloc(3), so the malloc
3282  * implementation has to take pains to avoid infinite recursion during
3283  * initialization.
3284  */
3285 static inline bool
3286 malloc_init(void)
3287 {
3288 
3289 	if (malloc_initialized == false)
3290 		return (malloc_init_hard());
3291 
3292 	return (false);
3293 }
3294 
3295 static bool
3296 malloc_init_hard(void)
3297 {
3298 	unsigned i, j;
3299 	ssize_t linklen;
3300 	char buf[PATH_MAX + 1];
3301 	const char *opts = "";
3302 
3303 	malloc_mutex_lock(&init_lock);
3304 	if (malloc_initialized) {
3305 		/*
3306 		 * Another thread initialized the allocator before this one
3307 		 * acquired init_lock.
3308 		 */
3309 		malloc_mutex_unlock(&init_lock);
3310 		return (false);
3311 	}
3312 
3313 	/* Get number of CPUs. */
3314 	{
3315 		int mib[2];
3316 		size_t len;
3317 
3318 		mib[0] = CTL_HW;
3319 		mib[1] = HW_NCPU;
3320 		len = sizeof(ncpus);
3321 		if (sysctl(mib, 2, &ncpus, &len, (void *) 0, 0) == -1) {
3322 			/* Error. */
3323 			ncpus = 1;
3324 		}
3325 	}
3326 
3327 	/* Get page size. */
3328 	{
3329 		long result;
3330 
3331 		result = sysconf(_SC_PAGESIZE);
3332 		assert(result != -1);
3333 		pagesize = (unsigned) result;
3334 
3335 		/*
3336 		 * We assume that pagesize is a power of 2 when calculating
3337 		 * pagesize_mask and pagesize_2pow.
3338 		 */
3339 		assert(((result - 1) & result) == 0);
3340 		pagesize_mask = result - 1;
3341 		pagesize_2pow = ffs((int)result) - 1;
3342 	}
3343 
3344 	for (i = 0; i < 3; i++) {
3345 		/* Get runtime configuration. */
3346 		switch (i) {
3347 		case 0:
3348 			if ((linklen = readlink("/etc/malloc.conf", buf,
3349 						sizeof(buf) - 1)) != -1) {
3350 				/*
3351 				 * Use the contents of the "/etc/malloc.conf"
3352 				 * symbolic link's name.
3353 				 */
3354 				buf[linklen] = '\0';
3355 				opts = buf;
3356 			} else {
3357 				/* No configuration specified. */
3358 				buf[0] = '\0';
3359 				opts = buf;
3360 			}
3361 			break;
3362 		case 1:
3363 			if (issetugid() == 0 && (opts =
3364 			    getenv("MALLOC_OPTIONS")) != NULL) {
3365 				/*
3366 				 * Do nothing; opts is already initialized to
3367 				 * the value of the MALLOC_OPTIONS environment
3368 				 * variable.
3369 				 */
3370 			} else {
3371 				/* No configuration specified. */
3372 				buf[0] = '\0';
3373 				opts = buf;
3374 			}
3375 			break;
3376 		case 2:
3377 			if (_malloc_options != NULL) {
3378 			    /*
3379 			     * Use options that were compiled into the program.
3380 			     */
3381 			    opts = _malloc_options;
3382 			} else {
3383 				/* No configuration specified. */
3384 				buf[0] = '\0';
3385 				opts = buf;
3386 			}
3387 			break;
3388 		default:
3389 			/* NOTREACHED */
3390 			/* LINTED */
3391 			assert(false);
3392 		}
3393 
3394 		for (j = 0; opts[j] != '\0'; j++) {
3395 			switch (opts[j]) {
3396 			case 'a':
3397 				opt_abort = false;
3398 				break;
3399 			case 'A':
3400 				opt_abort = true;
3401 				break;
3402 			case 'h':
3403 				opt_hint = false;
3404 				break;
3405 			case 'H':
3406 				opt_hint = true;
3407 				break;
3408 			case 'j':
3409 				opt_junk = false;
3410 				break;
3411 			case 'J':
3412 				opt_junk = true;
3413 				break;
3414 			case 'k':
3415 				/*
3416 				 * Chunks always require at least one header
3417 				 * page, so chunks can never be smaller than
3418 				 * two pages.
3419 				 */
3420 				if (opt_chunk_2pow > pagesize_2pow + 1)
3421 					opt_chunk_2pow--;
3422 				break;
3423 			case 'K':
3424 				/*
3425 				 * There must be fewer pages in a chunk than
3426 				 * can be recorded by the pos field of
3427 				 * arena_chunk_map_t, in order to make POS_FREE
3428 				 * special.
3429 				 */
3430 				if (opt_chunk_2pow - pagesize_2pow
3431 				    < (sizeof(uint32_t) << 3) - 1)
3432 					opt_chunk_2pow++;
3433 				break;
3434 			case 'n':
3435 				opt_narenas_lshift--;
3436 				break;
3437 			case 'N':
3438 				opt_narenas_lshift++;
3439 				break;
3440 			case 'p':
3441 				opt_print_stats = false;
3442 				break;
3443 			case 'P':
3444 				opt_print_stats = true;
3445 				break;
3446 			case 'q':
3447 				if (opt_quantum_2pow > QUANTUM_2POW_MIN)
3448 					opt_quantum_2pow--;
3449 				break;
3450 			case 'Q':
3451 				if (opt_quantum_2pow < pagesize_2pow - 1)
3452 					opt_quantum_2pow++;
3453 				break;
3454 			case 's':
3455 				if (opt_small_max_2pow > QUANTUM_2POW_MIN)
3456 					opt_small_max_2pow--;
3457 				break;
3458 			case 'S':
3459 				if (opt_small_max_2pow < pagesize_2pow - 1)
3460 					opt_small_max_2pow++;
3461 				break;
3462 			case 'u':
3463 				opt_utrace = false;
3464 				break;
3465 			case 'U':
3466 				opt_utrace = true;
3467 				break;
3468 			case 'v':
3469 				opt_sysv = false;
3470 				break;
3471 			case 'V':
3472 				opt_sysv = true;
3473 				break;
3474 			case 'x':
3475 				opt_xmalloc = false;
3476 				break;
3477 			case 'X':
3478 				opt_xmalloc = true;
3479 				break;
3480 			case 'z':
3481 				opt_zero = false;
3482 				break;
3483 			case 'Z':
3484 				opt_zero = true;
3485 				break;
3486 			default: {
3487 				char cbuf[2];
3488 
3489 				cbuf[0] = opts[j];
3490 				cbuf[1] = '\0';
3491 				_malloc_message(getprogname(),
3492 				    ": (malloc) Unsupported character in "
3493 				    "malloc options: '", cbuf, "'\n");
3494 			}
3495 			}
3496 		}
3497 	}
3498 
3499 	/* Take care to call atexit() only once. */
3500 	if (opt_print_stats) {
3501 		/* Print statistics at exit. */
3502 		atexit(malloc_print_stats);
3503 	}
3504 
3505 	/* Set variables according to the value of opt_small_max_2pow. */
3506 	if (opt_small_max_2pow < opt_quantum_2pow)
3507 		opt_small_max_2pow = opt_quantum_2pow;
3508 	small_max = (1 << opt_small_max_2pow);
3509 
3510 	/* Set bin-related variables. */
3511 	bin_maxclass = (pagesize >> 1);
3512 	assert(opt_quantum_2pow >= TINY_MIN_2POW);
3513 	ntbins = (unsigned)(opt_quantum_2pow - TINY_MIN_2POW);
3514 	assert(ntbins <= opt_quantum_2pow);
3515 	nqbins = (unsigned)(small_max >> opt_quantum_2pow);
3516 	nsbins = (unsigned)(pagesize_2pow - opt_small_max_2pow - 1);
3517 
3518 	/* Set variables according to the value of opt_quantum_2pow. */
3519 	quantum = (1 << opt_quantum_2pow);
3520 	quantum_mask = quantum - 1;
3521 	if (ntbins > 0)
3522 		small_min = (quantum >> 1) + 1;
3523 	else
3524 		small_min = 1;
3525 	assert(small_min <= quantum);
3526 
3527 	/* Set variables according to the value of opt_chunk_2pow. */
3528 	chunksize = (1LU << opt_chunk_2pow);
3529 	chunksize_mask = chunksize - 1;
3530 	chunksize_2pow = (unsigned)opt_chunk_2pow;
3531 	chunk_npages = (unsigned)(chunksize >> pagesize_2pow);
3532 	{
3533 		unsigned header_size;
3534 
3535 		header_size = (unsigned)(sizeof(arena_chunk_t) +
3536 		    (sizeof(arena_chunk_map_t) * (chunk_npages - 1)));
3537 		arena_chunk_header_npages = (header_size >> pagesize_2pow);
3538 		if ((header_size & pagesize_mask) != 0)
3539 			arena_chunk_header_npages++;
3540 	}
3541 	arena_maxclass = chunksize - (arena_chunk_header_npages <<
3542 	    pagesize_2pow);
3543 
3544 	UTRACE(0, 0, 0);
3545 
3546 #ifdef MALLOC_STATS
3547 	memset(&stats_chunks, 0, sizeof(chunk_stats_t));
3548 #endif
3549 
3550 	/* Various sanity checks that regard configuration. */
3551 	assert(quantum >= sizeof(void *));
3552 	assert(quantum <= pagesize);
3553 	assert(chunksize >= pagesize);
3554 	assert(quantum * 4 <= chunksize);
3555 
3556 	/* Initialize chunks data. */
3557 	malloc_mutex_init(&chunks_mtx);
3558 	RB_INIT(&huge);
3559 #ifdef USE_BRK
3560 	malloc_mutex_init(&brk_mtx);
3561 	brk_base = sbrk(0);
3562 	brk_prev = brk_base;
3563 	brk_max = brk_base;
3564 #endif
3565 #ifdef MALLOC_STATS
3566 	huge_nmalloc = 0;
3567 	huge_ndalloc = 0;
3568 	huge_nralloc = 0;
3569 	huge_allocated = 0;
3570 #endif
3571 	RB_INIT(&old_chunks);
3572 
3573 	/* Initialize base allocation data structures. */
3574 #ifdef MALLOC_STATS
3575 	base_mapped = 0;
3576 #endif
3577 #ifdef USE_BRK
3578 	/*
3579 	 * Allocate a base chunk here, since it doesn't actually have to be
3580 	 * chunk-aligned.  Doing this before allocating any other chunks allows
3581 	 * the use of space that would otherwise be wasted.
3582 	 */
3583 	base_pages_alloc(0);
3584 #endif
3585 	base_chunk_nodes = NULL;
3586 	malloc_mutex_init(&base_mtx);
3587 
3588 	if (ncpus > 1) {
3589 		/*
3590 		 * For SMP systems, create four times as many arenas as there
3591 		 * are CPUs by default.
3592 		 */
3593 		opt_narenas_lshift += 2;
3594 	}
3595 
3596 #ifdef NO_TLS
3597 	/* Initialize arena key. */
3598 	(void)thr_keycreate(&arenas_map_key, NULL);
3599 #endif
3600 
3601 	/* Determine how many arenas to use. */
3602 	narenas = ncpus;
3603 	if (opt_narenas_lshift > 0) {
3604 		if ((narenas << opt_narenas_lshift) > narenas)
3605 			narenas <<= opt_narenas_lshift;
3606 		/*
3607 		 * Make sure not to exceed the limits of what base_malloc()
3608 		 * can handle.
3609 		 */
3610 		if (narenas * sizeof(arena_t *) > chunksize)
3611 			narenas = (unsigned)(chunksize / sizeof(arena_t *));
3612 	} else if (opt_narenas_lshift < 0) {
3613 		if ((narenas << opt_narenas_lshift) < narenas)
3614 			narenas <<= opt_narenas_lshift;
3615 		/* Make sure there is at least one arena. */
3616 		if (narenas == 0)
3617 			narenas = 1;
3618 	}
3619 
3620 	next_arena = 0;
3621 
3622 	/* Allocate and initialize arenas. */
3623 	arenas = (arena_t **)base_alloc(sizeof(arena_t *) * narenas);
3624 	if (arenas == NULL) {
3625 		malloc_mutex_unlock(&init_lock);
3626 		return (true);
3627 	}
3628 	/*
3629 	 * Zero the array.  In practice, this should always be pre-zeroed,
3630 	 * since it was just mmap()ed, but let's be sure.
3631 	 */
3632 	memset(arenas, 0, sizeof(arena_t *) * narenas);
3633 
3634 	/*
3635 	 * Initialize one arena here.  The rest are lazily created in
3636 	 * arena_choose_hard().
3637 	 */
3638 	arenas_extend(0);
3639 	if (arenas[0] == NULL) {
3640 		malloc_mutex_unlock(&init_lock);
3641 		return (true);
3642 	}
3643 
3644 	malloc_mutex_init(&arenas_mtx);
3645 
3646 	malloc_initialized = true;
3647 	malloc_mutex_unlock(&init_lock);
3648 	return (false);
3649 }
3650 
3651 /*
3652  * End general internal functions.
3653  */
3654 /******************************************************************************/
3655 /*
3656  * Begin malloc(3)-compatible functions.
3657  */
3658 
3659 void *
3660 malloc(size_t size)
3661 {
3662 	void *ret;
3663 
3664 	if (malloc_init()) {
3665 		ret = NULL;
3666 		goto RETURN;
3667 	}
3668 
3669 	if (size == 0) {
3670 		if (opt_sysv == false)
3671 			size = 1;
3672 		else {
3673 			ret = NULL;
3674 			goto RETURN;
3675 		}
3676 	}
3677 
3678 	ret = imalloc(size);
3679 
3680 RETURN:
3681 	if (ret == NULL) {
3682 		if (opt_xmalloc) {
3683 			_malloc_message(getprogname(),
3684 			    ": (malloc) Error in malloc(): out of memory\n", "",
3685 			    "");
3686 			abort();
3687 		}
3688 		errno = ENOMEM;
3689 	}
3690 
3691 	UTRACE(0, size, ret);
3692 	return (ret);
3693 }
3694 
3695 int
3696 posix_memalign(void **memptr, size_t alignment, size_t size)
3697 {
3698 	int ret;
3699 	void *result;
3700 
3701 	if (malloc_init())
3702 		result = NULL;
3703 	else {
3704 		/* Make sure that alignment is a large enough power of 2. */
3705 		if (((alignment - 1) & alignment) != 0
3706 		    || alignment < sizeof(void *)) {
3707 			if (opt_xmalloc) {
3708 				_malloc_message(getprogname(),
3709 				    ": (malloc) Error in posix_memalign(): "
3710 				    "invalid alignment\n", "", "");
3711 				abort();
3712 			}
3713 			result = NULL;
3714 			ret = EINVAL;
3715 			goto RETURN;
3716 		}
3717 
3718 		result = ipalloc(alignment, size);
3719 	}
3720 
3721 	if (result == NULL) {
3722 		if (opt_xmalloc) {
3723 			_malloc_message(getprogname(),
3724 			": (malloc) Error in posix_memalign(): out of memory\n",
3725 			"", "");
3726 			abort();
3727 		}
3728 		ret = ENOMEM;
3729 		goto RETURN;
3730 	}
3731 
3732 	*memptr = result;
3733 	ret = 0;
3734 
3735 RETURN:
3736 	UTRACE(0, size, result);
3737 	return (ret);
3738 }
3739 
3740 void *
3741 calloc(size_t num, size_t size)
3742 {
3743 	void *ret;
3744 	size_t num_size;
3745 
3746 	if (malloc_init()) {
3747 		num_size = 0;
3748 		ret = NULL;
3749 		goto RETURN;
3750 	}
3751 
3752 	num_size = num * size;
3753 	if (num_size == 0) {
3754 		if ((opt_sysv == false) && ((num == 0) || (size == 0)))
3755 			num_size = 1;
3756 		else {
3757 			ret = NULL;
3758 			goto RETURN;
3759 		}
3760 	/*
3761 	 * Try to avoid division here.  We know that it isn't possible to
3762 	 * overflow during multiplication if neither operand uses any of the
3763 	 * most significant half of the bits in a size_t.
3764 	 */
3765 	} else if ((unsigned long long)((num | size) &
3766 	   ((unsigned long long)SIZE_T_MAX << (sizeof(size_t) << 2))) &&
3767 	   (num_size / size != num)) {
3768 		/* size_t overflow. */
3769 		ret = NULL;
3770 		goto RETURN;
3771 	}
3772 
3773 	ret = icalloc(num_size);
3774 
3775 RETURN:
3776 	if (ret == NULL) {
3777 		if (opt_xmalloc) {
3778 			_malloc_message(getprogname(),
3779 			    ": (malloc) Error in calloc(): out of memory\n", "",
3780 			    "");
3781 			abort();
3782 		}
3783 		errno = ENOMEM;
3784 	}
3785 
3786 	UTRACE(0, num_size, ret);
3787 	return (ret);
3788 }
3789 
3790 void *
3791 realloc(void *ptr, size_t size)
3792 {
3793 	void *ret;
3794 
3795 	if (size == 0) {
3796 		if (opt_sysv == false)
3797 			size = 1;
3798 		else {
3799 			if (ptr != NULL)
3800 				idalloc(ptr);
3801 			ret = NULL;
3802 			goto RETURN;
3803 		}
3804 	}
3805 
3806 	if (ptr != NULL) {
3807 		assert(malloc_initialized);
3808 
3809 		ret = iralloc(ptr, size);
3810 
3811 		if (ret == NULL) {
3812 			if (opt_xmalloc) {
3813 				_malloc_message(getprogname(),
3814 				    ": (malloc) Error in realloc(): out of "
3815 				    "memory\n", "", "");
3816 				abort();
3817 			}
3818 			errno = ENOMEM;
3819 		}
3820 	} else {
3821 		if (malloc_init())
3822 			ret = NULL;
3823 		else
3824 			ret = imalloc(size);
3825 
3826 		if (ret == NULL) {
3827 			if (opt_xmalloc) {
3828 				_malloc_message(getprogname(),
3829 				    ": (malloc) Error in realloc(): out of "
3830 				    "memory\n", "", "");
3831 				abort();
3832 			}
3833 			errno = ENOMEM;
3834 		}
3835 	}
3836 
3837 RETURN:
3838 	UTRACE(ptr, size, ret);
3839 	return (ret);
3840 }
3841 
3842 void
3843 free(void *ptr)
3844 {
3845 
3846 	UTRACE(ptr, 0, 0);
3847 	if (ptr != NULL) {
3848 		assert(malloc_initialized);
3849 
3850 		idalloc(ptr);
3851 	}
3852 }
3853 
3854 /*
3855  * End malloc(3)-compatible functions.
3856  */
3857 /******************************************************************************/
3858 /*
3859  * Begin non-standard functions.
3860  */
3861 #ifndef __NetBSD__
3862 size_t
3863 malloc_usable_size(const void *ptr)
3864 {
3865 
3866 	assert(ptr != NULL);
3867 
3868 	return (isalloc(ptr));
3869 }
3870 #endif
3871 
3872 /*
3873  * End non-standard functions.
3874  */
3875 /******************************************************************************/
3876 /*
3877  * Begin library-private functions, used by threading libraries for protection
3878  * of malloc during fork().  These functions are only called if the program is
3879  * running in threaded mode, so there is no need to check whether the program
3880  * is threaded here.
3881  */
3882 
3883 void
3884 _malloc_prefork(void)
3885 {
3886 	unsigned i;
3887 
3888 	/* Acquire all mutexes in a safe order. */
3889 
3890 	malloc_mutex_lock(&arenas_mtx);
3891 	for (i = 0; i < narenas; i++) {
3892 		if (arenas[i] != NULL)
3893 			malloc_mutex_lock(&arenas[i]->mtx);
3894 	}
3895 	malloc_mutex_unlock(&arenas_mtx);
3896 
3897 	malloc_mutex_lock(&base_mtx);
3898 
3899 	malloc_mutex_lock(&chunks_mtx);
3900 }
3901 
3902 void
3903 _malloc_postfork(void)
3904 {
3905 	unsigned i;
3906 
3907 	/* Release all mutexes, now that fork() has completed. */
3908 
3909 	malloc_mutex_unlock(&chunks_mtx);
3910 
3911 	malloc_mutex_unlock(&base_mtx);
3912 
3913 	malloc_mutex_lock(&arenas_mtx);
3914 	for (i = 0; i < narenas; i++) {
3915 		if (arenas[i] != NULL)
3916 			malloc_mutex_unlock(&arenas[i]->mtx);
3917 	}
3918 	malloc_mutex_unlock(&arenas_mtx);
3919 }
3920 
3921 /*
3922  * End library-private functions.
3923  */
3924 /******************************************************************************/
3925