xref: /netbsd-src/lib/libc/stdlib/jemalloc.c (revision c34236556bea94afcaca1782d7d228301edc3ea0)
1 /*	$NetBSD: jemalloc.c,v 1.40 2016/04/12 18:07:08 joerg Exp $	*/
2 
3 /*-
4  * Copyright (C) 2006,2007 Jason Evans <jasone@FreeBSD.org>.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice(s), this list of conditions and the following disclaimer as
12  *    the first lines of this file unmodified other than the possible
13  *    addition of one or more copyright notices.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice(s), this list of conditions and the following disclaimer in
16  *    the documentation and/or other materials provided with the
17  *    distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY
20  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE
23  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
26  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
27  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
28  * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
29  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30  *
31  *******************************************************************************
32  *
33  * This allocator implementation is designed to provide scalable performance
34  * for multi-threaded programs on multi-processor systems.  The following
35  * features are included for this purpose:
36  *
37  *   + Multiple arenas are used if there are multiple CPUs, which reduces lock
38  *     contention and cache sloshing.
39  *
40  *   + Cache line sharing between arenas is avoided for internal data
41  *     structures.
42  *
43  *   + Memory is managed in chunks and runs (chunks can be split into runs),
44  *     rather than as individual pages.  This provides a constant-time
45  *     mechanism for associating allocations with particular arenas.
46  *
47  * Allocation requests are rounded up to the nearest size class, and no record
48  * of the original request size is maintained.  Allocations are broken into
49  * categories according to size class.  Assuming runtime defaults, 4 kB pages
50  * and a 16 byte quantum, the size classes in each category are as follows:
51  *
52  *   |=====================================|
53  *   | Category | Subcategory    |    Size |
54  *   |=====================================|
55  *   | Small    | Tiny           |       2 |
56  *   |          |                |       4 |
57  *   |          |                |       8 |
58  *   |          |----------------+---------|
59  *   |          | Quantum-spaced |      16 |
60  *   |          |                |      32 |
61  *   |          |                |      48 |
62  *   |          |                |     ... |
63  *   |          |                |     480 |
64  *   |          |                |     496 |
65  *   |          |                |     512 |
66  *   |          |----------------+---------|
67  *   |          | Sub-page       |    1 kB |
68  *   |          |                |    2 kB |
69  *   |=====================================|
70  *   | Large                     |    4 kB |
71  *   |                           |    8 kB |
72  *   |                           |   12 kB |
73  *   |                           |     ... |
74  *   |                           | 1012 kB |
75  *   |                           | 1016 kB |
76  *   |                           | 1020 kB |
77  *   |=====================================|
78  *   | Huge                      |    1 MB |
79  *   |                           |    2 MB |
80  *   |                           |    3 MB |
81  *   |                           |     ... |
82  *   |=====================================|
83  *
84  * A different mechanism is used for each category:
85  *
86  *   Small : Each size class is segregated into its own set of runs.  Each run
87  *           maintains a bitmap of which regions are free/allocated.
88  *
89  *   Large : Each allocation is backed by a dedicated run.  Metadata are stored
90  *           in the associated arena chunk header maps.
91  *
92  *   Huge : Each allocation is backed by a dedicated contiguous set of chunks.
93  *          Metadata are stored in a separate red-black tree.
94  *
95  *******************************************************************************
96  */
97 
98 /* LINTLIBRARY */
99 
100 #ifdef __NetBSD__
101 #  define xutrace(a, b)		utrace("malloc", (a), (b))
102 #  define __DECONST(x, y)	((x)__UNCONST(y))
103 #  define NO_TLS
104 #else
105 #  define xutrace(a, b)		utrace((a), (b))
106 #endif	/* __NetBSD__ */
107 
108 /*
109  * MALLOC_PRODUCTION disables assertions and statistics gathering.  It also
110  * defaults the A and J runtime options to off.  These settings are appropriate
111  * for production systems.
112  */
113 #define MALLOC_PRODUCTION
114 
115 #ifndef MALLOC_PRODUCTION
116 #  define MALLOC_DEBUG
117 #endif
118 
119 #include <sys/cdefs.h>
120 /* __FBSDID("$FreeBSD: src/lib/libc/stdlib/malloc.c,v 1.147 2007/06/15 22:00:16 jasone Exp $"); */
121 __RCSID("$NetBSD: jemalloc.c,v 1.40 2016/04/12 18:07:08 joerg Exp $");
122 
123 #ifdef __FreeBSD__
124 #include "libc_private.h"
125 #ifdef MALLOC_DEBUG
126 #  define _LOCK_DEBUG
127 #endif
128 #include "spinlock.h"
129 #endif
130 #include "namespace.h"
131 #include <sys/mman.h>
132 #include <sys/param.h>
133 #ifdef __FreeBSD__
134 #include <sys/stddef.h>
135 #endif
136 #include <sys/time.h>
137 #include <sys/types.h>
138 #include <sys/sysctl.h>
139 #include <sys/tree.h>
140 #include <sys/uio.h>
141 #include <sys/ktrace.h> /* Must come after several other sys/ includes. */
142 
143 #ifdef __FreeBSD__
144 #include <machine/atomic.h>
145 #include <machine/cpufunc.h>
146 #include <machine/vmparam.h>
147 #endif
148 
149 #include <errno.h>
150 #include <limits.h>
151 #include <pthread.h>
152 #include <sched.h>
153 #include <stdarg.h>
154 #include <stdbool.h>
155 #include <stdio.h>
156 #include <stdint.h>
157 #include <stdlib.h>
158 #include <string.h>
159 #include <strings.h>
160 #include <unistd.h>
161 
162 #ifdef __NetBSD__
163 #  include <reentrant.h>
164 #  include "extern.h"
165 
166 #define STRERROR_R(a, b, c)	__strerror_r(a, b, c);
167 /*
168  * A non localized version of strerror, that avoids bringing in
169  * stdio and the locale code. All the malloc messages are in English
170  * so why bother?
171  */
172 static int
173 __strerror_r(int e, char *s, size_t l)
174 {
175 	int rval;
176 	size_t slen;
177 
178 	if (e >= 0 && e < sys_nerr) {
179 		slen = strlcpy(s, sys_errlist[e], l);
180 		rval = 0;
181 	} else {
182 		slen = snprintf_ss(s, l, "Unknown error %u", e);
183 		rval = EINVAL;
184 	}
185 	return slen >= l ? ERANGE : rval;
186 }
187 #endif
188 
189 #ifdef __FreeBSD__
190 #define STRERROR_R(a, b, c)	strerror_r(a, b, c);
191 #include "un-namespace.h"
192 #endif
193 
194 /* MALLOC_STATS enables statistics calculation. */
195 #ifndef MALLOC_PRODUCTION
196 #  define MALLOC_STATS
197 #endif
198 
199 #ifdef MALLOC_DEBUG
200 #  ifdef NDEBUG
201 #    undef NDEBUG
202 #  endif
203 #else
204 #  ifndef NDEBUG
205 #    define NDEBUG
206 #  endif
207 #endif
208 #include <assert.h>
209 
210 #ifdef MALLOC_DEBUG
211    /* Disable inlining to make debugging easier. */
212 #  define inline
213 #endif
214 
215 /* Size of stack-allocated buffer passed to strerror_r(). */
216 #define	STRERROR_BUF		64
217 
218 /* Minimum alignment of allocations is 2^QUANTUM_2POW_MIN bytes. */
219 
220 /*
221  * If you touch the TINY_MIN_2POW definition for any architecture, please
222  * make sure to adjust the corresponding definition for JEMALLOC_TINY_MIN_2POW
223  * in the gcc 4.8 tree in dist/gcc/tree-ssa-ccp.c and verify that a native
224  * gcc is still buildable!
225  */
226 
227 #ifdef __i386__
228 #  define QUANTUM_2POW_MIN	4
229 #  define SIZEOF_PTR_2POW	2
230 #  define USE_BRK
231 #endif
232 #ifdef __ia64__
233 #  define QUANTUM_2POW_MIN	4
234 #  define SIZEOF_PTR_2POW	3
235 #endif
236 #ifdef __aarch64__
237 #  define QUANTUM_2POW_MIN	4
238 #  define SIZEOF_PTR_2POW	3
239 #  define NO_TLS
240 #endif
241 #ifdef __alpha__
242 #  define QUANTUM_2POW_MIN	4
243 #  define SIZEOF_PTR_2POW	3
244 #  define TINY_MIN_2POW		3
245 #  define NO_TLS
246 #endif
247 #ifdef __sparc64__
248 #  define QUANTUM_2POW_MIN	4
249 #  define SIZEOF_PTR_2POW	3
250 #  define TINY_MIN_2POW		3
251 #  define NO_TLS
252 #endif
253 #ifdef __amd64__
254 #  define QUANTUM_2POW_MIN	4
255 #  define SIZEOF_PTR_2POW	3
256 #  define TINY_MIN_2POW		3
257 #endif
258 #ifdef __arm__
259 #  define QUANTUM_2POW_MIN	3
260 #  define SIZEOF_PTR_2POW	2
261 #  define USE_BRK
262 #  ifdef __ARM_EABI__
263 #    define TINY_MIN_2POW	3
264 #  endif
265 #  define NO_TLS
266 #endif
267 #ifdef __powerpc__
268 #  define QUANTUM_2POW_MIN	4
269 #  define SIZEOF_PTR_2POW	2
270 #  define USE_BRK
271 #  define TINY_MIN_2POW		3
272 #endif
273 #if defined(__sparc__) && !defined(__sparc64__)
274 #  define QUANTUM_2POW_MIN	4
275 #  define SIZEOF_PTR_2POW	2
276 #  define USE_BRK
277 #endif
278 #ifdef __or1k__
279 #  define QUANTUM_2POW_MIN	4
280 #  define SIZEOF_PTR_2POW	2
281 #  define USE_BRK
282 #endif
283 #ifdef __vax__
284 #  define QUANTUM_2POW_MIN	4
285 #  define SIZEOF_PTR_2POW	2
286 #  define USE_BRK
287 #endif
288 #ifdef __sh__
289 #  define QUANTUM_2POW_MIN	4
290 #  define SIZEOF_PTR_2POW	2
291 #  define USE_BRK
292 #endif
293 #ifdef __m68k__
294 #  define QUANTUM_2POW_MIN	4
295 #  define SIZEOF_PTR_2POW	2
296 #  define USE_BRK
297 #endif
298 #if defined(__mips__) || defined(__riscv__)
299 # ifdef _LP64
300 #  define SIZEOF_PTR_2POW	3
301 #  define TINY_MIN_2POW		3
302 # else
303 #  define SIZEOF_PTR_2POW	2
304 # endif
305 # define QUANTUM_2POW_MIN	4
306 # define USE_BRK
307 #endif
308 #ifdef __hppa__
309 #  define QUANTUM_2POW_MIN     4
310 #  define SIZEOF_PTR_2POW      2
311 #  define USE_BRK
312 #endif
313 
314 #define	SIZEOF_PTR		(1 << SIZEOF_PTR_2POW)
315 
316 /* sizeof(int) == (1 << SIZEOF_INT_2POW). */
317 #ifndef SIZEOF_INT_2POW
318 #  define SIZEOF_INT_2POW	2
319 #endif
320 
321 /*
322  * Size and alignment of memory chunks that are allocated by the OS's virtual
323  * memory system.
324  */
325 #define	CHUNK_2POW_DEFAULT	20
326 
327 /*
328  * Maximum size of L1 cache line.  This is used to avoid cache line aliasing,
329  * so over-estimates are okay (up to a point), but under-estimates will
330  * negatively affect performance.
331  */
332 #define	CACHELINE_2POW		6
333 #define	CACHELINE		((size_t)(1 << CACHELINE_2POW))
334 
335 /* Smallest size class to support. */
336 #ifndef TINY_MIN_2POW
337 #define	TINY_MIN_2POW		2
338 #endif
339 
340 /*
341  * Maximum size class that is a multiple of the quantum, but not (necessarily)
342  * a power of 2.  Above this size, allocations are rounded up to the nearest
343  * power of 2.
344  */
345 #define	SMALL_MAX_2POW_DEFAULT	9
346 #define	SMALL_MAX_DEFAULT	(1 << SMALL_MAX_2POW_DEFAULT)
347 
348 /*
349  * RUN_MAX_OVRHD indicates maximum desired run header overhead.  Runs are sized
350  * as small as possible such that this setting is still honored, without
351  * violating other constraints.  The goal is to make runs as small as possible
352  * without exceeding a per run external fragmentation threshold.
353  *
354  * We use binary fixed point math for overhead computations, where the binary
355  * point is implicitly RUN_BFP bits to the left.
356  *
357  * Note that it is possible to set RUN_MAX_OVRHD low enough that it cannot be
358  * honored for some/all object sizes, since there is one bit of header overhead
359  * per object (plus a constant).  This constraint is relaxed (ignored) for runs
360  * that are so small that the per-region overhead is greater than:
361  *
362  *   (RUN_MAX_OVRHD / (reg_size << (3+RUN_BFP))
363  */
364 #define RUN_BFP			12
365 /*                              \/   Implicit binary fixed point. */
366 #define RUN_MAX_OVRHD		0x0000003dU
367 #define RUN_MAX_OVRHD_RELAX	0x00001800U
368 
369 /* Put a cap on small object run size.  This overrides RUN_MAX_OVRHD. */
370 #define RUN_MAX_SMALL_2POW	15
371 #define RUN_MAX_SMALL		(1 << RUN_MAX_SMALL_2POW)
372 
373 /******************************************************************************/
374 
375 #ifdef __FreeBSD__
376 /*
377  * Mutexes based on spinlocks.  We can't use normal pthread mutexes, because
378  * they require malloc()ed memory.
379  */
380 typedef struct {
381 	spinlock_t	lock;
382 } malloc_mutex_t;
383 
384 /* Set to true once the allocator has been initialized. */
385 static bool malloc_initialized = false;
386 
387 /* Used to avoid initialization races. */
388 static malloc_mutex_t init_lock = {_SPINLOCK_INITIALIZER};
389 #else
390 #define	malloc_mutex_t	mutex_t
391 
392 /* Set to true once the allocator has been initialized. */
393 static bool malloc_initialized = false;
394 
395 #ifdef _REENTRANT
396 /* Used to avoid initialization races. */
397 static mutex_t init_lock = MUTEX_INITIALIZER;
398 #endif
399 #endif
400 
401 /******************************************************************************/
402 /*
403  * Statistics data structures.
404  */
405 
406 #ifdef MALLOC_STATS
407 
408 typedef struct malloc_bin_stats_s malloc_bin_stats_t;
409 struct malloc_bin_stats_s {
410 	/*
411 	 * Number of allocation requests that corresponded to the size of this
412 	 * bin.
413 	 */
414 	uint64_t	nrequests;
415 
416 	/* Total number of runs created for this bin's size class. */
417 	uint64_t	nruns;
418 
419 	/*
420 	 * Total number of runs reused by extracting them from the runs tree for
421 	 * this bin's size class.
422 	 */
423 	uint64_t	reruns;
424 
425 	/* High-water mark for this bin. */
426 	unsigned long	highruns;
427 
428 	/* Current number of runs in this bin. */
429 	unsigned long	curruns;
430 };
431 
432 typedef struct arena_stats_s arena_stats_t;
433 struct arena_stats_s {
434 	/* Number of bytes currently mapped. */
435 	size_t		mapped;
436 
437 	/* Per-size-category statistics. */
438 	size_t		allocated_small;
439 	uint64_t	nmalloc_small;
440 	uint64_t	ndalloc_small;
441 
442 	size_t		allocated_large;
443 	uint64_t	nmalloc_large;
444 	uint64_t	ndalloc_large;
445 };
446 
447 typedef struct chunk_stats_s chunk_stats_t;
448 struct chunk_stats_s {
449 	/* Number of chunks that were allocated. */
450 	uint64_t	nchunks;
451 
452 	/* High-water mark for number of chunks allocated. */
453 	unsigned long	highchunks;
454 
455 	/*
456 	 * Current number of chunks allocated.  This value isn't maintained for
457 	 * any other purpose, so keep track of it in order to be able to set
458 	 * highchunks.
459 	 */
460 	unsigned long	curchunks;
461 };
462 
463 #endif /* #ifdef MALLOC_STATS */
464 
465 /******************************************************************************/
466 /*
467  * Chunk data structures.
468  */
469 
470 /* Tree of chunks. */
471 typedef struct chunk_node_s chunk_node_t;
472 struct chunk_node_s {
473 	/* Linkage for the chunk tree. */
474 	RB_ENTRY(chunk_node_s) link;
475 
476 	/*
477 	 * Pointer to the chunk that this tree node is responsible for.  In some
478 	 * (but certainly not all) cases, this data structure is placed at the
479 	 * beginning of the corresponding chunk, so this field may point to this
480 	 * node.
481 	 */
482 	void	*chunk;
483 
484 	/* Total chunk size. */
485 	size_t	size;
486 };
487 typedef struct chunk_tree_s chunk_tree_t;
488 RB_HEAD(chunk_tree_s, chunk_node_s);
489 
490 /******************************************************************************/
491 /*
492  * Arena data structures.
493  */
494 
495 typedef struct arena_s arena_t;
496 typedef struct arena_bin_s arena_bin_t;
497 
498 typedef struct arena_chunk_map_s arena_chunk_map_t;
499 struct arena_chunk_map_s {
500 	/* Number of pages in run. */
501 	uint32_t	npages;
502 	/*
503 	 * Position within run.  For a free run, this is POS_FREE for the first
504 	 * and last pages.  The POS_FREE special value makes it possible to
505 	 * quickly coalesce free runs.
506 	 *
507 	 * This is the limiting factor for chunksize; there can be at most 2^31
508 	 * pages in a run.
509 	 */
510 #define POS_FREE ((uint32_t)0xffffffffU)
511 	uint32_t	pos;
512 };
513 
514 /* Arena chunk header. */
515 typedef struct arena_chunk_s arena_chunk_t;
516 struct arena_chunk_s {
517 	/* Arena that owns the chunk. */
518 	arena_t *arena;
519 
520 	/* Linkage for the arena's chunk tree. */
521 	RB_ENTRY(arena_chunk_s) link;
522 
523 	/*
524 	 * Number of pages in use.  This is maintained in order to make
525 	 * detection of empty chunks fast.
526 	 */
527 	uint32_t pages_used;
528 
529 	/*
530 	 * Every time a free run larger than this value is created/coalesced,
531 	 * this value is increased.  The only way that the value decreases is if
532 	 * arena_run_alloc() fails to find a free run as large as advertised by
533 	 * this value.
534 	 */
535 	uint32_t max_frun_npages;
536 
537 	/*
538 	 * Every time a free run that starts at an earlier page than this value
539 	 * is created/coalesced, this value is decreased.  It is reset in a
540 	 * similar fashion to max_frun_npages.
541 	 */
542 	uint32_t min_frun_ind;
543 
544 	/*
545 	 * Map of pages within chunk that keeps track of free/large/small.  For
546 	 * free runs, only the map entries for the first and last pages are
547 	 * kept up to date, so that free runs can be quickly coalesced.
548 	 */
549 	arena_chunk_map_t map[1]; /* Dynamically sized. */
550 };
551 typedef struct arena_chunk_tree_s arena_chunk_tree_t;
552 RB_HEAD(arena_chunk_tree_s, arena_chunk_s);
553 
554 typedef struct arena_run_s arena_run_t;
555 struct arena_run_s {
556 	/* Linkage for run trees. */
557 	RB_ENTRY(arena_run_s) link;
558 
559 #ifdef MALLOC_DEBUG
560 	uint32_t	magic;
561 #  define ARENA_RUN_MAGIC 0x384adf93
562 #endif
563 
564 	/* Bin this run is associated with. */
565 	arena_bin_t	*bin;
566 
567 	/* Index of first element that might have a free region. */
568 	unsigned	regs_minelm;
569 
570 	/* Number of free regions in run. */
571 	unsigned	nfree;
572 
573 	/* Bitmask of in-use regions (0: in use, 1: free). */
574 	unsigned	regs_mask[1]; /* Dynamically sized. */
575 };
576 typedef struct arena_run_tree_s arena_run_tree_t;
577 RB_HEAD(arena_run_tree_s, arena_run_s);
578 
579 struct arena_bin_s {
580 	/*
581 	 * Current run being used to service allocations of this bin's size
582 	 * class.
583 	 */
584 	arena_run_t	*runcur;
585 
586 	/*
587 	 * Tree of non-full runs.  This tree is used when looking for an
588 	 * existing run when runcur is no longer usable.  We choose the
589 	 * non-full run that is lowest in memory; this policy tends to keep
590 	 * objects packed well, and it can also help reduce the number of
591 	 * almost-empty chunks.
592 	 */
593 	arena_run_tree_t runs;
594 
595 	/* Size of regions in a run for this bin's size class. */
596 	size_t		reg_size;
597 
598 	/* Total size of a run for this bin's size class. */
599 	size_t		run_size;
600 
601 	/* Total number of regions in a run for this bin's size class. */
602 	uint32_t	nregs;
603 
604 	/* Number of elements in a run's regs_mask for this bin's size class. */
605 	uint32_t	regs_mask_nelms;
606 
607 	/* Offset of first region in a run for this bin's size class. */
608 	uint32_t	reg0_offset;
609 
610 #ifdef MALLOC_STATS
611 	/* Bin statistics. */
612 	malloc_bin_stats_t stats;
613 #endif
614 };
615 
616 struct arena_s {
617 #ifdef MALLOC_DEBUG
618 	uint32_t		magic;
619 #  define ARENA_MAGIC 0x947d3d24
620 #endif
621 
622 	/* All operations on this arena require that mtx be locked. */
623 	malloc_mutex_t		mtx;
624 
625 #ifdef MALLOC_STATS
626 	arena_stats_t		stats;
627 #endif
628 
629 	/*
630 	 * Tree of chunks this arena manages.
631 	 */
632 	arena_chunk_tree_t	chunks;
633 
634 	/*
635 	 * In order to avoid rapid chunk allocation/deallocation when an arena
636 	 * oscillates right on the cusp of needing a new chunk, cache the most
637 	 * recently freed chunk.  This caching is disabled by opt_hint.
638 	 *
639 	 * There is one spare chunk per arena, rather than one spare total, in
640 	 * order to avoid interactions between multiple threads that could make
641 	 * a single spare inadequate.
642 	 */
643 	arena_chunk_t *spare;
644 
645 	/*
646 	 * bins is used to store rings of free regions of the following sizes,
647 	 * assuming a 16-byte quantum, 4kB pagesize, and default MALLOC_OPTIONS.
648 	 *
649 	 *   bins[i] | size |
650 	 *   --------+------+
651 	 *        0  |    2 |
652 	 *        1  |    4 |
653 	 *        2  |    8 |
654 	 *   --------+------+
655 	 *        3  |   16 |
656 	 *        4  |   32 |
657 	 *        5  |   48 |
658 	 *        6  |   64 |
659 	 *           :      :
660 	 *           :      :
661 	 *       33  |  496 |
662 	 *       34  |  512 |
663 	 *   --------+------+
664 	 *       35  | 1024 |
665 	 *       36  | 2048 |
666 	 *   --------+------+
667 	 */
668 	arena_bin_t		bins[1]; /* Dynamically sized. */
669 };
670 
671 /******************************************************************************/
672 /*
673  * Data.
674  */
675 
676 /* Number of CPUs. */
677 static unsigned		ncpus;
678 
679 /* VM page size. */
680 static size_t		pagesize;
681 static size_t		pagesize_mask;
682 static int		pagesize_2pow;
683 
684 /* Various bin-related settings. */
685 static size_t		bin_maxclass; /* Max size class for bins. */
686 static unsigned		ntbins; /* Number of (2^n)-spaced tiny bins. */
687 static unsigned		nqbins; /* Number of quantum-spaced bins. */
688 static unsigned		nsbins; /* Number of (2^n)-spaced sub-page bins. */
689 static size_t		small_min;
690 static size_t		small_max;
691 
692 /* Various quantum-related settings. */
693 static size_t		quantum;
694 static size_t		quantum_mask; /* (quantum - 1). */
695 
696 /* Various chunk-related settings. */
697 static size_t		chunksize;
698 static size_t		chunksize_mask; /* (chunksize - 1). */
699 static int		chunksize_2pow;
700 static unsigned		chunk_npages;
701 static unsigned		arena_chunk_header_npages;
702 static size_t		arena_maxclass; /* Max size class for arenas. */
703 
704 /********/
705 /*
706  * Chunks.
707  */
708 
709 #ifdef _REENTRANT
710 /* Protects chunk-related data structures. */
711 static malloc_mutex_t	chunks_mtx;
712 #endif
713 
714 /* Tree of chunks that are stand-alone huge allocations. */
715 static chunk_tree_t	huge;
716 
717 #ifdef USE_BRK
718 /*
719  * Try to use brk for chunk-size allocations, due to address space constraints.
720  */
721 /*
722  * Protects sbrk() calls.  This must be separate from chunks_mtx, since
723  * base_pages_alloc() also uses sbrk(), but cannot lock chunks_mtx (doing so
724  * could cause recursive lock acquisition).
725  */
726 static malloc_mutex_t	brk_mtx;
727 /* Result of first sbrk(0) call. */
728 static void		*brk_base;
729 /* Current end of brk, or ((void *)-1) if brk is exhausted. */
730 static void		*brk_prev;
731 /* Current upper limit on brk addresses. */
732 static void		*brk_max;
733 #endif
734 
735 #ifdef MALLOC_STATS
736 /* Huge allocation statistics. */
737 static uint64_t		huge_nmalloc;
738 static uint64_t		huge_ndalloc;
739 static uint64_t		huge_nralloc;
740 static size_t		huge_allocated;
741 #endif
742 
743 /*
744  * Tree of chunks that were previously allocated.  This is used when allocating
745  * chunks, in an attempt to re-use address space.
746  */
747 static chunk_tree_t	old_chunks;
748 
749 /****************************/
750 /*
751  * base (internal allocation).
752  */
753 
754 /*
755  * Current pages that are being used for internal memory allocations.  These
756  * pages are carved up in cacheline-size quanta, so that there is no chance of
757  * false cache line sharing.
758  */
759 static void		*base_pages;
760 static void		*base_next_addr;
761 static void		*base_past_addr; /* Addr immediately past base_pages. */
762 static chunk_node_t	*base_chunk_nodes; /* LIFO cache of chunk nodes. */
763 #ifdef _REENTRANT
764 static malloc_mutex_t	base_mtx;
765 #endif
766 #ifdef MALLOC_STATS
767 static size_t		base_mapped;
768 #endif
769 
770 /********/
771 /*
772  * Arenas.
773  */
774 
775 /*
776  * Arenas that are used to service external requests.  Not all elements of the
777  * arenas array are necessarily used; arenas are created lazily as needed.
778  */
779 static arena_t		**arenas;
780 static unsigned		narenas;
781 static unsigned		next_arena;
782 #ifdef _REENTRANT
783 static malloc_mutex_t	arenas_mtx; /* Protects arenas initialization. */
784 #endif
785 
786 /*
787  * Map of pthread_self() --> arenas[???], used for selecting an arena to use
788  * for allocations.
789  */
790 #ifndef NO_TLS
791 static __thread arena_t	**arenas_map;
792 #else
793 static arena_t	**arenas_map;
794 #endif
795 
796 #if !defined(NO_TLS) || !defined(_REENTRANT)
797 # define	get_arenas_map()	(arenas_map)
798 # define	set_arenas_map(x)	(arenas_map = x)
799 #else
800 
801 static thread_key_t arenas_map_key = -1;
802 
803 static inline arena_t **
804 get_arenas_map(void)
805 {
806 	if (!__isthreaded)
807 		return arenas_map;
808 
809 	if (arenas_map_key == -1) {
810 		(void)thr_keycreate(&arenas_map_key, NULL);
811 		if (arenas_map != NULL) {
812 			thr_setspecific(arenas_map_key, arenas_map);
813 			arenas_map = NULL;
814 		}
815 	}
816 
817 	return thr_getspecific(arenas_map_key);
818 }
819 
820 static __inline void
821 set_arenas_map(arena_t **a)
822 {
823 	if (!__isthreaded) {
824 		arenas_map = a;
825 		return;
826 	}
827 
828 	if (arenas_map_key == -1) {
829 		(void)thr_keycreate(&arenas_map_key, NULL);
830 		if (arenas_map != NULL) {
831 			_DIAGASSERT(arenas_map == a);
832 			arenas_map = NULL;
833 		}
834 	}
835 
836 	thr_setspecific(arenas_map_key, a);
837 }
838 #endif
839 
840 #ifdef MALLOC_STATS
841 /* Chunk statistics. */
842 static chunk_stats_t	stats_chunks;
843 #endif
844 
845 /*******************************/
846 /*
847  * Runtime configuration options.
848  */
849 const char	*_malloc_options;
850 
851 #ifndef MALLOC_PRODUCTION
852 static bool	opt_abort = true;
853 static bool	opt_junk = true;
854 #else
855 static bool	opt_abort = false;
856 static bool	opt_junk = false;
857 #endif
858 static bool	opt_hint = false;
859 static bool	opt_print_stats = false;
860 static int	opt_quantum_2pow = QUANTUM_2POW_MIN;
861 static int	opt_small_max_2pow = SMALL_MAX_2POW_DEFAULT;
862 static int	opt_chunk_2pow = CHUNK_2POW_DEFAULT;
863 static bool	opt_utrace = false;
864 static bool	opt_sysv = false;
865 static bool	opt_xmalloc = false;
866 static bool	opt_zero = false;
867 static int32_t	opt_narenas_lshift = 0;
868 
869 typedef struct {
870 	void	*p;
871 	size_t	s;
872 	void	*r;
873 } malloc_utrace_t;
874 
875 #define	UTRACE(a, b, c)							\
876 	if (opt_utrace) {						\
877 		malloc_utrace_t ut;					\
878 		ut.p = a;						\
879 		ut.s = b;						\
880 		ut.r = c;						\
881 		xutrace(&ut, sizeof(ut));				\
882 	}
883 
884 /******************************************************************************/
885 /*
886  * Begin function prototypes for non-inline static functions.
887  */
888 
889 static void	wrtmessage(const char *p1, const char *p2, const char *p3,
890 		const char *p4);
891 #ifdef MALLOC_STATS
892 static void	malloc_printf(const char *format, ...);
893 #endif
894 static char	*size_t2s(size_t x, char *s);
895 static bool	base_pages_alloc(size_t minsize);
896 static void	*base_alloc(size_t size);
897 static chunk_node_t *base_chunk_node_alloc(void);
898 static void	base_chunk_node_dealloc(chunk_node_t *node);
899 #ifdef MALLOC_STATS
900 static void	stats_print(arena_t *arena);
901 #endif
902 static void	*pages_map(void *addr, size_t size);
903 static void	*pages_map_align(void *addr, size_t size, int align);
904 static void	pages_unmap(void *addr, size_t size);
905 static void	*chunk_alloc(size_t size);
906 static void	chunk_dealloc(void *chunk, size_t size);
907 static void	arena_run_split(arena_t *arena, arena_run_t *run, size_t size);
908 static arena_chunk_t *arena_chunk_alloc(arena_t *arena);
909 static void	arena_chunk_dealloc(arena_t *arena, arena_chunk_t *chunk);
910 static arena_run_t *arena_run_alloc(arena_t *arena, size_t size);
911 static void	arena_run_dalloc(arena_t *arena, arena_run_t *run, size_t size);
912 static arena_run_t *arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin);
913 static void *arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin);
914 static size_t arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size);
915 static void	*arena_malloc(arena_t *arena, size_t size);
916 static void	*arena_palloc(arena_t *arena, size_t alignment, size_t size,
917     size_t alloc_size);
918 static size_t	arena_salloc(const void *ptr);
919 static void	*arena_ralloc(void *ptr, size_t size, size_t oldsize);
920 static void	arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr);
921 static bool	arena_new(arena_t *arena);
922 static arena_t	*arenas_extend(unsigned ind);
923 static void	*huge_malloc(size_t size);
924 static void	*huge_palloc(size_t alignment, size_t size);
925 static void	*huge_ralloc(void *ptr, size_t size, size_t oldsize);
926 static void	huge_dalloc(void *ptr);
927 static void	*imalloc(size_t size);
928 static void	*ipalloc(size_t alignment, size_t size);
929 static void	*icalloc(size_t size);
930 static size_t	isalloc(const void *ptr);
931 static void	*iralloc(void *ptr, size_t size);
932 static void	idalloc(void *ptr);
933 static void	malloc_print_stats(void);
934 static bool	malloc_init_hard(void);
935 
936 /*
937  * End function prototypes.
938  */
939 /******************************************************************************/
940 /*
941  * Begin mutex.
942  */
943 
944 #ifdef __NetBSD__
945 #define	malloc_mutex_init(m)	mutex_init(m, NULL)
946 #define	malloc_mutex_lock(m)	mutex_lock(m)
947 #define	malloc_mutex_unlock(m)	mutex_unlock(m)
948 #else	/* __NetBSD__ */
949 static inline void
950 malloc_mutex_init(malloc_mutex_t *a_mutex)
951 {
952 	static const spinlock_t lock = _SPINLOCK_INITIALIZER;
953 
954 	a_mutex->lock = lock;
955 }
956 
957 static inline void
958 malloc_mutex_lock(malloc_mutex_t *a_mutex)
959 {
960 
961 	if (__isthreaded)
962 		_SPINLOCK(&a_mutex->lock);
963 }
964 
965 static inline void
966 malloc_mutex_unlock(malloc_mutex_t *a_mutex)
967 {
968 
969 	if (__isthreaded)
970 		_SPINUNLOCK(&a_mutex->lock);
971 }
972 #endif	/* __NetBSD__ */
973 
974 /*
975  * End mutex.
976  */
977 /******************************************************************************/
978 /*
979  * Begin Utility functions/macros.
980  */
981 
982 /* Return the chunk address for allocation address a. */
983 #define	CHUNK_ADDR2BASE(a)						\
984 	((void *)((uintptr_t)(a) & ~chunksize_mask))
985 
986 /* Return the chunk offset of address a. */
987 #define	CHUNK_ADDR2OFFSET(a)						\
988 	((size_t)((uintptr_t)(a) & chunksize_mask))
989 
990 /* Return the smallest chunk multiple that is >= s. */
991 #define	CHUNK_CEILING(s)						\
992 	(((s) + chunksize_mask) & ~chunksize_mask)
993 
994 /* Return the smallest cacheline multiple that is >= s. */
995 #define	CACHELINE_CEILING(s)						\
996 	(((s) + (CACHELINE - 1)) & ~(CACHELINE - 1))
997 
998 /* Return the smallest quantum multiple that is >= a. */
999 #define	QUANTUM_CEILING(a)						\
1000 	(((a) + quantum_mask) & ~quantum_mask)
1001 
1002 /* Return the smallest pagesize multiple that is >= s. */
1003 #define	PAGE_CEILING(s)							\
1004 	(((s) + pagesize_mask) & ~pagesize_mask)
1005 
1006 /* Compute the smallest power of 2 that is >= x. */
1007 static inline size_t
1008 pow2_ceil(size_t x)
1009 {
1010 
1011 	x--;
1012 	x |= x >> 1;
1013 	x |= x >> 2;
1014 	x |= x >> 4;
1015 	x |= x >> 8;
1016 	x |= x >> 16;
1017 #if (SIZEOF_PTR == 8)
1018 	x |= x >> 32;
1019 #endif
1020 	x++;
1021 	return (x);
1022 }
1023 
1024 static void
1025 wrtmessage(const char *p1, const char *p2, const char *p3, const char *p4)
1026 {
1027 
1028 	write(STDERR_FILENO, p1, strlen(p1));
1029 	write(STDERR_FILENO, p2, strlen(p2));
1030 	write(STDERR_FILENO, p3, strlen(p3));
1031 	write(STDERR_FILENO, p4, strlen(p4));
1032 }
1033 
1034 void	(*_malloc_message)(const char *p1, const char *p2, const char *p3,
1035 	    const char *p4) = wrtmessage;
1036 
1037 #ifdef MALLOC_STATS
1038 /*
1039  * Print to stderr in such a way as to (hopefully) avoid memory allocation.
1040  */
1041 static void
1042 malloc_printf(const char *format, ...)
1043 {
1044 	char buf[4096];
1045 	va_list ap;
1046 
1047 	va_start(ap, format);
1048 	vsnprintf(buf, sizeof(buf), format, ap);
1049 	va_end(ap);
1050 	_malloc_message(buf, "", "", "");
1051 }
1052 #endif
1053 
1054 /*
1055  * We don't want to depend on vsnprintf() for production builds, since that can
1056  * cause unnecessary bloat for static binaries.  size_t2s() provides minimal
1057  * integer printing functionality, so that malloc_printf() use can be limited to
1058  * MALLOC_STATS code.
1059  */
1060 #define UMAX2S_BUFSIZE	21
1061 static char *
1062 size_t2s(size_t x, char *s)
1063 {
1064 	unsigned i;
1065 
1066 	/* Make sure UMAX2S_BUFSIZE is large enough. */
1067 	/* LINTED */
1068 	assert(sizeof(size_t) <= 8);
1069 
1070 	i = UMAX2S_BUFSIZE - 1;
1071 	s[i] = '\0';
1072 	do {
1073 		i--;
1074 		s[i] = "0123456789"[(int)x % 10];
1075 		x /= (uintmax_t)10LL;
1076 	} while (x > 0);
1077 
1078 	return (&s[i]);
1079 }
1080 
1081 /******************************************************************************/
1082 
1083 static bool
1084 base_pages_alloc(size_t minsize)
1085 {
1086 	size_t csize = 0;
1087 
1088 #ifdef USE_BRK
1089 	/*
1090 	 * Do special brk allocation here, since base allocations don't need to
1091 	 * be chunk-aligned.
1092 	 */
1093 	if (brk_prev != (void *)-1) {
1094 		void *brk_cur;
1095 		intptr_t incr;
1096 
1097 		if (minsize != 0)
1098 			csize = CHUNK_CEILING(minsize);
1099 
1100 		malloc_mutex_lock(&brk_mtx);
1101 		do {
1102 			/* Get the current end of brk. */
1103 			brk_cur = sbrk(0);
1104 
1105 			/*
1106 			 * Calculate how much padding is necessary to
1107 			 * chunk-align the end of brk.  Don't worry about
1108 			 * brk_cur not being chunk-aligned though.
1109 			 */
1110 			incr = (intptr_t)chunksize
1111 			    - (intptr_t)CHUNK_ADDR2OFFSET(brk_cur);
1112 			assert(incr >= 0);
1113 			if ((size_t)incr < minsize)
1114 				incr += csize;
1115 
1116 			brk_prev = sbrk(incr);
1117 			if (brk_prev == brk_cur) {
1118 				/* Success. */
1119 				malloc_mutex_unlock(&brk_mtx);
1120 				base_pages = brk_cur;
1121 				base_next_addr = base_pages;
1122 				base_past_addr = (void *)((uintptr_t)base_pages
1123 				    + incr);
1124 #ifdef MALLOC_STATS
1125 				base_mapped += incr;
1126 #endif
1127 				return (false);
1128 			}
1129 		} while (brk_prev != (void *)-1);
1130 		malloc_mutex_unlock(&brk_mtx);
1131 	}
1132 	if (minsize == 0) {
1133 		/*
1134 		 * Failure during initialization doesn't matter, so avoid
1135 		 * falling through to the mmap-based page mapping code.
1136 		 */
1137 		return (true);
1138 	}
1139 #endif
1140 	assert(minsize != 0);
1141 	csize = PAGE_CEILING(minsize);
1142 	base_pages = pages_map(NULL, csize);
1143 	if (base_pages == NULL)
1144 		return (true);
1145 	base_next_addr = base_pages;
1146 	base_past_addr = (void *)((uintptr_t)base_pages + csize);
1147 #ifdef MALLOC_STATS
1148 	base_mapped += csize;
1149 #endif
1150 	return (false);
1151 }
1152 
1153 static void *
1154 base_alloc(size_t size)
1155 {
1156 	void *ret;
1157 	size_t csize;
1158 
1159 	/* Round size up to nearest multiple of the cacheline size. */
1160 	csize = CACHELINE_CEILING(size);
1161 
1162 	malloc_mutex_lock(&base_mtx);
1163 
1164 	/* Make sure there's enough space for the allocation. */
1165 	if ((uintptr_t)base_next_addr + csize > (uintptr_t)base_past_addr) {
1166 		if (base_pages_alloc(csize)) {
1167 			ret = NULL;
1168 			goto RETURN;
1169 		}
1170 	}
1171 
1172 	/* Allocate. */
1173 	ret = base_next_addr;
1174 	base_next_addr = (void *)((uintptr_t)base_next_addr + csize);
1175 
1176 RETURN:
1177 	malloc_mutex_unlock(&base_mtx);
1178 	return (ret);
1179 }
1180 
1181 static chunk_node_t *
1182 base_chunk_node_alloc(void)
1183 {
1184 	chunk_node_t *ret;
1185 
1186 	malloc_mutex_lock(&base_mtx);
1187 	if (base_chunk_nodes != NULL) {
1188 		ret = base_chunk_nodes;
1189 		/* LINTED */
1190 		base_chunk_nodes = *(chunk_node_t **)ret;
1191 		malloc_mutex_unlock(&base_mtx);
1192 	} else {
1193 		malloc_mutex_unlock(&base_mtx);
1194 		ret = (chunk_node_t *)base_alloc(sizeof(chunk_node_t));
1195 	}
1196 
1197 	return (ret);
1198 }
1199 
1200 static void
1201 base_chunk_node_dealloc(chunk_node_t *node)
1202 {
1203 
1204 	malloc_mutex_lock(&base_mtx);
1205 	/* LINTED */
1206 	*(chunk_node_t **)node = base_chunk_nodes;
1207 	base_chunk_nodes = node;
1208 	malloc_mutex_unlock(&base_mtx);
1209 }
1210 
1211 /******************************************************************************/
1212 
1213 #ifdef MALLOC_STATS
1214 static void
1215 stats_print(arena_t *arena)
1216 {
1217 	unsigned i;
1218 	int gap_start;
1219 
1220 	malloc_printf(
1221 	    "          allocated/mapped            nmalloc      ndalloc\n");
1222 
1223 	malloc_printf("small: %12zu %-12s %12llu %12llu\n",
1224 	    arena->stats.allocated_small, "", arena->stats.nmalloc_small,
1225 	    arena->stats.ndalloc_small);
1226 	malloc_printf("large: %12zu %-12s %12llu %12llu\n",
1227 	    arena->stats.allocated_large, "", arena->stats.nmalloc_large,
1228 	    arena->stats.ndalloc_large);
1229 	malloc_printf("total: %12zu/%-12zu %12llu %12llu\n",
1230 	    arena->stats.allocated_small + arena->stats.allocated_large,
1231 	    arena->stats.mapped,
1232 	    arena->stats.nmalloc_small + arena->stats.nmalloc_large,
1233 	    arena->stats.ndalloc_small + arena->stats.ndalloc_large);
1234 
1235 	malloc_printf("bins:     bin   size regs pgs  requests   newruns"
1236 	    "    reruns maxruns curruns\n");
1237 	for (i = 0, gap_start = -1; i < ntbins + nqbins + nsbins; i++) {
1238 		if (arena->bins[i].stats.nrequests == 0) {
1239 			if (gap_start == -1)
1240 				gap_start = i;
1241 		} else {
1242 			if (gap_start != -1) {
1243 				if (i > gap_start + 1) {
1244 					/* Gap of more than one size class. */
1245 					malloc_printf("[%u..%u]\n",
1246 					    gap_start, i - 1);
1247 				} else {
1248 					/* Gap of one size class. */
1249 					malloc_printf("[%u]\n", gap_start);
1250 				}
1251 				gap_start = -1;
1252 			}
1253 			malloc_printf(
1254 			    "%13u %1s %4u %4u %3u %9llu %9llu"
1255 			    " %9llu %7lu %7lu\n",
1256 			    i,
1257 			    i < ntbins ? "T" : i < ntbins + nqbins ? "Q" : "S",
1258 			    arena->bins[i].reg_size,
1259 			    arena->bins[i].nregs,
1260 			    arena->bins[i].run_size >> pagesize_2pow,
1261 			    arena->bins[i].stats.nrequests,
1262 			    arena->bins[i].stats.nruns,
1263 			    arena->bins[i].stats.reruns,
1264 			    arena->bins[i].stats.highruns,
1265 			    arena->bins[i].stats.curruns);
1266 		}
1267 	}
1268 	if (gap_start != -1) {
1269 		if (i > gap_start + 1) {
1270 			/* Gap of more than one size class. */
1271 			malloc_printf("[%u..%u]\n", gap_start, i - 1);
1272 		} else {
1273 			/* Gap of one size class. */
1274 			malloc_printf("[%u]\n", gap_start);
1275 		}
1276 	}
1277 }
1278 #endif
1279 
1280 /*
1281  * End Utility functions/macros.
1282  */
1283 /******************************************************************************/
1284 /*
1285  * Begin chunk management functions.
1286  */
1287 
1288 #ifndef lint
1289 static inline int
1290 chunk_comp(chunk_node_t *a, chunk_node_t *b)
1291 {
1292 
1293 	assert(a != NULL);
1294 	assert(b != NULL);
1295 
1296 	if ((uintptr_t)a->chunk < (uintptr_t)b->chunk)
1297 		return (-1);
1298 	else if (a->chunk == b->chunk)
1299 		return (0);
1300 	else
1301 		return (1);
1302 }
1303 
1304 /* Generate red-black tree code for chunks. */
1305 RB_GENERATE_STATIC(chunk_tree_s, chunk_node_s, link, chunk_comp);
1306 #endif
1307 
1308 static void *
1309 pages_map_align(void *addr, size_t size, int align)
1310 {
1311 	void *ret;
1312 
1313 	/*
1314 	 * We don't use MAP_FIXED here, because it can cause the *replacement*
1315 	 * of existing mappings, and we only want to create new mappings.
1316 	 */
1317 	ret = mmap(addr, size, PROT_READ | PROT_WRITE,
1318 	    MAP_PRIVATE | MAP_ANON | MAP_ALIGNED(align), -1, 0);
1319 	assert(ret != NULL);
1320 
1321 	if (ret == MAP_FAILED)
1322 		ret = NULL;
1323 	else if (addr != NULL && ret != addr) {
1324 		/*
1325 		 * We succeeded in mapping memory, but not in the right place.
1326 		 */
1327 		if (munmap(ret, size) == -1) {
1328 			char buf[STRERROR_BUF];
1329 
1330 			STRERROR_R(errno, buf, sizeof(buf));
1331 			_malloc_message(getprogname(),
1332 			    ": (malloc) Error in munmap(): ", buf, "\n");
1333 			if (opt_abort)
1334 				abort();
1335 		}
1336 		ret = NULL;
1337 	}
1338 
1339 	assert(ret == NULL || (addr == NULL && ret != addr)
1340 	    || (addr != NULL && ret == addr));
1341 	return (ret);
1342 }
1343 
1344 static void *
1345 pages_map(void *addr, size_t size)
1346 {
1347 
1348 	return pages_map_align(addr, size, 0);
1349 }
1350 
1351 static void
1352 pages_unmap(void *addr, size_t size)
1353 {
1354 
1355 	if (munmap(addr, size) == -1) {
1356 		char buf[STRERROR_BUF];
1357 
1358 		STRERROR_R(errno, buf, sizeof(buf));
1359 		_malloc_message(getprogname(),
1360 		    ": (malloc) Error in munmap(): ", buf, "\n");
1361 		if (opt_abort)
1362 			abort();
1363 	}
1364 }
1365 
1366 static void *
1367 chunk_alloc(size_t size)
1368 {
1369 	void *ret, *chunk;
1370 	chunk_node_t *tchunk, *delchunk;
1371 
1372 	assert(size != 0);
1373 	assert((size & chunksize_mask) == 0);
1374 
1375 	malloc_mutex_lock(&chunks_mtx);
1376 
1377 	if (size == chunksize) {
1378 		/*
1379 		 * Check for address ranges that were previously chunks and try
1380 		 * to use them.
1381 		 */
1382 
1383 		/* LINTED */
1384 		tchunk = RB_MIN(chunk_tree_s, &old_chunks);
1385 		while (tchunk != NULL) {
1386 			/* Found an address range.  Try to recycle it. */
1387 
1388 			chunk = tchunk->chunk;
1389 			delchunk = tchunk;
1390 			/* LINTED */
1391 			tchunk = RB_NEXT(chunk_tree_s, &old_chunks, delchunk);
1392 
1393 			/* Remove delchunk from the tree. */
1394 			/* LINTED */
1395 			RB_REMOVE(chunk_tree_s, &old_chunks, delchunk);
1396 			base_chunk_node_dealloc(delchunk);
1397 
1398 #ifdef USE_BRK
1399 			if ((uintptr_t)chunk >= (uintptr_t)brk_base
1400 			    && (uintptr_t)chunk < (uintptr_t)brk_max) {
1401 				/* Re-use a previously freed brk chunk. */
1402 				ret = chunk;
1403 				goto RETURN;
1404 			}
1405 #endif
1406 			if ((ret = pages_map(chunk, size)) != NULL) {
1407 				/* Success. */
1408 				goto RETURN;
1409 			}
1410 		}
1411 	}
1412 
1413 	/*
1414 	 * Try to over-allocate, but allow the OS to place the allocation
1415 	 * anywhere.  Beware of size_t wrap-around.
1416 	 */
1417 	if (size + chunksize > size) {
1418 		if ((ret = pages_map_align(NULL, size, chunksize_2pow))
1419 		    != NULL) {
1420 			goto RETURN;
1421 		}
1422 	}
1423 
1424 #ifdef USE_BRK
1425 	/*
1426 	 * Try to create allocations in brk, in order to make full use of
1427 	 * limited address space.
1428 	 */
1429 	if (brk_prev != (void *)-1) {
1430 		void *brk_cur;
1431 		intptr_t incr;
1432 
1433 		/*
1434 		 * The loop is necessary to recover from races with other
1435 		 * threads that are using brk for something other than malloc.
1436 		 */
1437 		malloc_mutex_lock(&brk_mtx);
1438 		do {
1439 			/* Get the current end of brk. */
1440 			brk_cur = sbrk(0);
1441 
1442 			/*
1443 			 * Calculate how much padding is necessary to
1444 			 * chunk-align the end of brk.
1445 			 */
1446 			incr = (intptr_t)size
1447 			    - (intptr_t)CHUNK_ADDR2OFFSET(brk_cur);
1448 			if (incr == (intptr_t)size) {
1449 				ret = brk_cur;
1450 			} else {
1451 				ret = (void *)((intptr_t)brk_cur + incr);
1452 				incr += size;
1453 			}
1454 
1455 			brk_prev = sbrk(incr);
1456 			if (brk_prev == brk_cur) {
1457 				/* Success. */
1458 				malloc_mutex_unlock(&brk_mtx);
1459 				brk_max = (void *)((intptr_t)ret + size);
1460 				goto RETURN;
1461 			}
1462 		} while (brk_prev != (void *)-1);
1463 		malloc_mutex_unlock(&brk_mtx);
1464 	}
1465 #endif
1466 
1467 	/* All strategies for allocation failed. */
1468 	ret = NULL;
1469 RETURN:
1470 	if (ret != NULL) {
1471 		chunk_node_t key;
1472 		/*
1473 		 * Clean out any entries in old_chunks that overlap with the
1474 		 * memory we just allocated.
1475 		 */
1476 		key.chunk = ret;
1477 		/* LINTED */
1478 		tchunk = RB_NFIND(chunk_tree_s, &old_chunks, &key);
1479 		while (tchunk != NULL
1480 		    && (uintptr_t)tchunk->chunk >= (uintptr_t)ret
1481 		    && (uintptr_t)tchunk->chunk < (uintptr_t)ret + size) {
1482 			delchunk = tchunk;
1483 			/* LINTED */
1484 			tchunk = RB_NEXT(chunk_tree_s, &old_chunks, delchunk);
1485 			/* LINTED */
1486 			RB_REMOVE(chunk_tree_s, &old_chunks, delchunk);
1487 			base_chunk_node_dealloc(delchunk);
1488 		}
1489 
1490 	}
1491 #ifdef MALLOC_STATS
1492 	if (ret != NULL) {
1493 		stats_chunks.nchunks += (size / chunksize);
1494 		stats_chunks.curchunks += (size / chunksize);
1495 	}
1496 	if (stats_chunks.curchunks > stats_chunks.highchunks)
1497 		stats_chunks.highchunks = stats_chunks.curchunks;
1498 #endif
1499 	malloc_mutex_unlock(&chunks_mtx);
1500 
1501 	assert(CHUNK_ADDR2BASE(ret) == ret);
1502 	return (ret);
1503 }
1504 
1505 static void
1506 chunk_dealloc(void *chunk, size_t size)
1507 {
1508 	chunk_node_t *node;
1509 
1510 	assert(chunk != NULL);
1511 	assert(CHUNK_ADDR2BASE(chunk) == chunk);
1512 	assert(size != 0);
1513 	assert((size & chunksize_mask) == 0);
1514 
1515 	malloc_mutex_lock(&chunks_mtx);
1516 
1517 #ifdef USE_BRK
1518 	if ((uintptr_t)chunk >= (uintptr_t)brk_base
1519 	    && (uintptr_t)chunk < (uintptr_t)brk_max) {
1520 		void *brk_cur;
1521 
1522 		malloc_mutex_lock(&brk_mtx);
1523 		/* Get the current end of brk. */
1524 		brk_cur = sbrk(0);
1525 
1526 		/*
1527 		 * Try to shrink the data segment if this chunk is at the end
1528 		 * of the data segment.  The sbrk() call here is subject to a
1529 		 * race condition with threads that use brk(2) or sbrk(2)
1530 		 * directly, but the alternative would be to leak memory for
1531 		 * the sake of poorly designed multi-threaded programs.
1532 		 */
1533 		if (brk_cur == brk_max
1534 		    && (void *)((uintptr_t)chunk + size) == brk_max
1535 		    && sbrk(-(intptr_t)size) == brk_max) {
1536 			malloc_mutex_unlock(&brk_mtx);
1537 			if (brk_prev == brk_max) {
1538 				/* Success. */
1539 				brk_prev = (void *)((intptr_t)brk_max
1540 				    - (intptr_t)size);
1541 				brk_max = brk_prev;
1542 			}
1543 		} else {
1544 			size_t offset;
1545 
1546 			malloc_mutex_unlock(&brk_mtx);
1547 			madvise(chunk, size, MADV_FREE);
1548 
1549 			/*
1550 			 * Iteratively create records of each chunk-sized
1551 			 * memory region that 'chunk' is comprised of, so that
1552 			 * the address range can be recycled if memory usage
1553 			 * increases later on.
1554 			 */
1555 			for (offset = 0; offset < size; offset += chunksize) {
1556 				node = base_chunk_node_alloc();
1557 				if (node == NULL)
1558 					break;
1559 
1560 				node->chunk = (void *)((uintptr_t)chunk
1561 				    + (uintptr_t)offset);
1562 				node->size = chunksize;
1563 				/* LINTED */
1564 				RB_INSERT(chunk_tree_s, &old_chunks, node);
1565 			}
1566 		}
1567 	} else {
1568 #endif
1569 		pages_unmap(chunk, size);
1570 
1571 		/*
1572 		 * Make a record of the chunk's address, so that the address
1573 		 * range can be recycled if memory usage increases later on.
1574 		 * Don't bother to create entries if (size > chunksize), since
1575 		 * doing so could cause scalability issues for truly gargantuan
1576 		 * objects (many gigabytes or larger).
1577 		 */
1578 		if (size == chunksize) {
1579 			node = base_chunk_node_alloc();
1580 			if (node != NULL) {
1581 				node->chunk = (void *)(uintptr_t)chunk;
1582 				node->size = chunksize;
1583 				/* LINTED */
1584 				RB_INSERT(chunk_tree_s, &old_chunks, node);
1585 			}
1586 		}
1587 #ifdef USE_BRK
1588 	}
1589 #endif
1590 
1591 #ifdef MALLOC_STATS
1592 	stats_chunks.curchunks -= (size / chunksize);
1593 #endif
1594 	malloc_mutex_unlock(&chunks_mtx);
1595 }
1596 
1597 /*
1598  * End chunk management functions.
1599  */
1600 /******************************************************************************/
1601 /*
1602  * Begin arena.
1603  */
1604 
1605 /*
1606  * Choose an arena based on a per-thread and (optimistically) per-CPU value.
1607  *
1608  * We maintain at least one block of arenas.  Usually there are more.
1609  * The blocks are $ncpu arenas in size.  Whole blocks are 'hashed'
1610  * amongst threads.  To accomplish this, next_arena advances only in
1611  * ncpu steps.
1612  */
1613 static __noinline arena_t *
1614 choose_arena_hard(void)
1615 {
1616 	unsigned i, curcpu;
1617 	arena_t **map;
1618 
1619 	/* Initialize the current block of arenas and advance to next. */
1620 	malloc_mutex_lock(&arenas_mtx);
1621 	assert(next_arena % ncpus == 0);
1622 	assert(narenas % ncpus == 0);
1623 	map = &arenas[next_arena];
1624 	set_arenas_map(map);
1625 	for (i = 0; i < ncpus; i++) {
1626 		if (arenas[next_arena] == NULL)
1627 			arenas_extend(next_arena);
1628 		next_arena = (next_arena + 1) % narenas;
1629 	}
1630 	malloc_mutex_unlock(&arenas_mtx);
1631 
1632 	/*
1633 	 * If we were unable to allocate an arena above, then default to
1634 	 * the first arena, which is always present.
1635 	 */
1636 	curcpu = thr_curcpu();
1637 	if (map[curcpu] != NULL)
1638 		return map[curcpu];
1639 	return arenas[0];
1640 }
1641 
1642 static inline arena_t *
1643 choose_arena(void)
1644 {
1645 	unsigned curcpu;
1646 	arena_t **map;
1647 
1648 	map = get_arenas_map();
1649 	curcpu = thr_curcpu();
1650 	if (__predict_true(map != NULL && map[curcpu] != NULL))
1651 		return map[curcpu];
1652 
1653         return choose_arena_hard();
1654 }
1655 
1656 #ifndef lint
1657 static inline int
1658 arena_chunk_comp(arena_chunk_t *a, arena_chunk_t *b)
1659 {
1660 
1661 	assert(a != NULL);
1662 	assert(b != NULL);
1663 
1664 	if (a->max_frun_npages < b->max_frun_npages)
1665 		return -1;
1666 	if (a->max_frun_npages > b->max_frun_npages)
1667 		return 1;
1668 
1669 	if ((uintptr_t)a < (uintptr_t)b)
1670 		return (-1);
1671 	else if (a == b)
1672 		return (0);
1673 	else
1674 		return (1);
1675 }
1676 
1677 /* Generate red-black tree code for arena chunks. */
1678 RB_GENERATE_STATIC(arena_chunk_tree_s, arena_chunk_s, link, arena_chunk_comp);
1679 #endif
1680 
1681 #ifndef lint
1682 static inline int
1683 arena_run_comp(arena_run_t *a, arena_run_t *b)
1684 {
1685 
1686 	assert(a != NULL);
1687 	assert(b != NULL);
1688 
1689 	if ((uintptr_t)a < (uintptr_t)b)
1690 		return (-1);
1691 	else if (a == b)
1692 		return (0);
1693 	else
1694 		return (1);
1695 }
1696 
1697 /* Generate red-black tree code for arena runs. */
1698 RB_GENERATE_STATIC(arena_run_tree_s, arena_run_s, link, arena_run_comp);
1699 #endif
1700 
1701 static inline void *
1702 arena_run_reg_alloc(arena_run_t *run, arena_bin_t *bin)
1703 {
1704 	void *ret;
1705 	unsigned i, mask, bit, regind;
1706 
1707 	assert(run->magic == ARENA_RUN_MAGIC);
1708 	assert(run->regs_minelm < bin->regs_mask_nelms);
1709 
1710 	/*
1711 	 * Move the first check outside the loop, so that run->regs_minelm can
1712 	 * be updated unconditionally, without the possibility of updating it
1713 	 * multiple times.
1714 	 */
1715 	i = run->regs_minelm;
1716 	mask = run->regs_mask[i];
1717 	if (mask != 0) {
1718 		/* Usable allocation found. */
1719 		bit = ffs((int)mask) - 1;
1720 
1721 		regind = ((i << (SIZEOF_INT_2POW + 3)) + bit);
1722 		ret = (void *)(((uintptr_t)run) + bin->reg0_offset
1723 		    + (bin->reg_size * regind));
1724 
1725 		/* Clear bit. */
1726 		mask ^= (1 << bit);
1727 		run->regs_mask[i] = mask;
1728 
1729 		return (ret);
1730 	}
1731 
1732 	for (i++; i < bin->regs_mask_nelms; i++) {
1733 		mask = run->regs_mask[i];
1734 		if (mask != 0) {
1735 			/* Usable allocation found. */
1736 			bit = ffs((int)mask) - 1;
1737 
1738 			regind = ((i << (SIZEOF_INT_2POW + 3)) + bit);
1739 			ret = (void *)(((uintptr_t)run) + bin->reg0_offset
1740 			    + (bin->reg_size * regind));
1741 
1742 			/* Clear bit. */
1743 			mask ^= (1 << bit);
1744 			run->regs_mask[i] = mask;
1745 
1746 			/*
1747 			 * Make a note that nothing before this element
1748 			 * contains a free region.
1749 			 */
1750 			run->regs_minelm = i; /* Low payoff: + (mask == 0); */
1751 
1752 			return (ret);
1753 		}
1754 	}
1755 	/* Not reached. */
1756 	/* LINTED */
1757 	assert(0);
1758 	return (NULL);
1759 }
1760 
1761 static inline void
1762 arena_run_reg_dalloc(arena_run_t *run, arena_bin_t *bin, void *ptr, size_t size)
1763 {
1764 	/*
1765 	 * To divide by a number D that is not a power of two we multiply
1766 	 * by (2^21 / D) and then right shift by 21 positions.
1767 	 *
1768 	 *   X / D
1769 	 *
1770 	 * becomes
1771 	 *
1772 	 *   (X * size_invs[(D >> QUANTUM_2POW_MIN) - 3]) >> SIZE_INV_SHIFT
1773 	 */
1774 #define SIZE_INV_SHIFT 21
1775 #define SIZE_INV(s) (((1 << SIZE_INV_SHIFT) / (s << QUANTUM_2POW_MIN)) + 1)
1776 	static const unsigned size_invs[] = {
1777 	    SIZE_INV(3),
1778 	    SIZE_INV(4), SIZE_INV(5), SIZE_INV(6), SIZE_INV(7),
1779 	    SIZE_INV(8), SIZE_INV(9), SIZE_INV(10), SIZE_INV(11),
1780 	    SIZE_INV(12),SIZE_INV(13), SIZE_INV(14), SIZE_INV(15),
1781 	    SIZE_INV(16),SIZE_INV(17), SIZE_INV(18), SIZE_INV(19),
1782 	    SIZE_INV(20),SIZE_INV(21), SIZE_INV(22), SIZE_INV(23),
1783 	    SIZE_INV(24),SIZE_INV(25), SIZE_INV(26), SIZE_INV(27),
1784 	    SIZE_INV(28),SIZE_INV(29), SIZE_INV(30), SIZE_INV(31)
1785 #if (QUANTUM_2POW_MIN < 4)
1786 	    ,
1787 	    SIZE_INV(32), SIZE_INV(33), SIZE_INV(34), SIZE_INV(35),
1788 	    SIZE_INV(36), SIZE_INV(37), SIZE_INV(38), SIZE_INV(39),
1789 	    SIZE_INV(40), SIZE_INV(41), SIZE_INV(42), SIZE_INV(43),
1790 	    SIZE_INV(44), SIZE_INV(45), SIZE_INV(46), SIZE_INV(47),
1791 	    SIZE_INV(48), SIZE_INV(49), SIZE_INV(50), SIZE_INV(51),
1792 	    SIZE_INV(52), SIZE_INV(53), SIZE_INV(54), SIZE_INV(55),
1793 	    SIZE_INV(56), SIZE_INV(57), SIZE_INV(58), SIZE_INV(59),
1794 	    SIZE_INV(60), SIZE_INV(61), SIZE_INV(62), SIZE_INV(63)
1795 #endif
1796 	};
1797 	unsigned diff, regind, elm, bit;
1798 
1799 	/* LINTED */
1800 	assert(run->magic == ARENA_RUN_MAGIC);
1801 	assert(((sizeof(size_invs)) / sizeof(unsigned)) + 3
1802 	    >= (SMALL_MAX_DEFAULT >> QUANTUM_2POW_MIN));
1803 
1804 	/*
1805 	 * Avoid doing division with a variable divisor if possible.  Using
1806 	 * actual division here can reduce allocator throughput by over 20%!
1807 	 */
1808 	diff = (unsigned)((uintptr_t)ptr - (uintptr_t)run - bin->reg0_offset);
1809 	if ((size & (size - 1)) == 0) {
1810 		/*
1811 		 * log2_table allows fast division of a power of two in the
1812 		 * [1..128] range.
1813 		 *
1814 		 * (x / divisor) becomes (x >> log2_table[divisor - 1]).
1815 		 */
1816 		static const unsigned char log2_table[] = {
1817 		    0, 1, 0, 2, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 4,
1818 		    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5,
1819 		    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1820 		    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6,
1821 		    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1822 		    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1823 		    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1824 		    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7
1825 		};
1826 
1827 		if (size <= 128)
1828 			regind = (diff >> log2_table[size - 1]);
1829 		else if (size <= 32768)
1830 			regind = diff >> (8 + log2_table[(size >> 8) - 1]);
1831 		else {
1832 			/*
1833 			 * The page size is too large for us to use the lookup
1834 			 * table.  Use real division.
1835 			 */
1836 			regind = (unsigned)(diff / size);
1837 		}
1838 	} else if (size <= ((sizeof(size_invs) / sizeof(unsigned))
1839 	    << QUANTUM_2POW_MIN) + 2) {
1840 		regind = size_invs[(size >> QUANTUM_2POW_MIN) - 3] * diff;
1841 		regind >>= SIZE_INV_SHIFT;
1842 	} else {
1843 		/*
1844 		 * size_invs isn't large enough to handle this size class, so
1845 		 * calculate regind using actual division.  This only happens
1846 		 * if the user increases small_max via the 'S' runtime
1847 		 * configuration option.
1848 		 */
1849 		regind = (unsigned)(diff / size);
1850 	};
1851 	assert(diff == regind * size);
1852 	assert(regind < bin->nregs);
1853 
1854 	elm = regind >> (SIZEOF_INT_2POW + 3);
1855 	if (elm < run->regs_minelm)
1856 		run->regs_minelm = elm;
1857 	bit = regind - (elm << (SIZEOF_INT_2POW + 3));
1858 	assert((run->regs_mask[elm] & (1 << bit)) == 0);
1859 	run->regs_mask[elm] |= (1 << bit);
1860 #undef SIZE_INV
1861 #undef SIZE_INV_SHIFT
1862 }
1863 
1864 static void
1865 arena_run_split(arena_t *arena, arena_run_t *run, size_t size)
1866 {
1867 	arena_chunk_t *chunk;
1868 	unsigned run_ind, map_offset, total_pages, need_pages, rem_pages;
1869 	unsigned i;
1870 
1871 	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
1872 	run_ind = (unsigned)(((uintptr_t)run - (uintptr_t)chunk)
1873 	    >> pagesize_2pow);
1874 	total_pages = chunk->map[run_ind].npages;
1875 	need_pages = (unsigned)(size >> pagesize_2pow);
1876 	assert(need_pages <= total_pages);
1877 	rem_pages = total_pages - need_pages;
1878 
1879 	/* Split enough pages from the front of run to fit allocation size. */
1880 	map_offset = run_ind;
1881 	for (i = 0; i < need_pages; i++) {
1882 		chunk->map[map_offset + i].npages = need_pages;
1883 		chunk->map[map_offset + i].pos = i;
1884 	}
1885 
1886 	/* Keep track of trailing unused pages for later use. */
1887 	if (rem_pages > 0) {
1888 		/* Update map for trailing pages. */
1889 		map_offset += need_pages;
1890 		chunk->map[map_offset].npages = rem_pages;
1891 		chunk->map[map_offset].pos = POS_FREE;
1892 		chunk->map[map_offset + rem_pages - 1].npages = rem_pages;
1893 		chunk->map[map_offset + rem_pages - 1].pos = POS_FREE;
1894 	}
1895 
1896 	chunk->pages_used += need_pages;
1897 }
1898 
1899 static arena_chunk_t *
1900 arena_chunk_alloc(arena_t *arena)
1901 {
1902 	arena_chunk_t *chunk;
1903 
1904 	if (arena->spare != NULL) {
1905 		chunk = arena->spare;
1906 		arena->spare = NULL;
1907 
1908 		/* LINTED */
1909 		RB_INSERT(arena_chunk_tree_s, &arena->chunks, chunk);
1910 	} else {
1911 		chunk = (arena_chunk_t *)chunk_alloc(chunksize);
1912 		if (chunk == NULL)
1913 			return (NULL);
1914 #ifdef MALLOC_STATS
1915 		arena->stats.mapped += chunksize;
1916 #endif
1917 
1918 		chunk->arena = arena;
1919 
1920 		/*
1921 		 * Claim that no pages are in use, since the header is merely
1922 		 * overhead.
1923 		 */
1924 		chunk->pages_used = 0;
1925 
1926 		chunk->max_frun_npages = chunk_npages -
1927 		    arena_chunk_header_npages;
1928 		chunk->min_frun_ind = arena_chunk_header_npages;
1929 
1930 		/*
1931 		 * Initialize enough of the map to support one maximal free run.
1932 		 */
1933 		chunk->map[arena_chunk_header_npages].npages = chunk_npages -
1934 		    arena_chunk_header_npages;
1935 		chunk->map[arena_chunk_header_npages].pos = POS_FREE;
1936 		chunk->map[chunk_npages - 1].npages = chunk_npages -
1937 		    arena_chunk_header_npages;
1938 		chunk->map[chunk_npages - 1].pos = POS_FREE;
1939 
1940 		RB_INSERT(arena_chunk_tree_s, &arena->chunks, chunk);
1941 	}
1942 
1943 	return (chunk);
1944 }
1945 
1946 static void
1947 arena_chunk_dealloc(arena_t *arena, arena_chunk_t *chunk)
1948 {
1949 
1950 	/*
1951 	 * Remove chunk from the chunk tree, regardless of whether this chunk
1952 	 * will be cached, so that the arena does not use it.
1953 	 */
1954 	/* LINTED */
1955 	RB_REMOVE(arena_chunk_tree_s, &chunk->arena->chunks, chunk);
1956 
1957 	if (opt_hint == false) {
1958 		if (arena->spare != NULL) {
1959 			chunk_dealloc((void *)arena->spare, chunksize);
1960 #ifdef MALLOC_STATS
1961 			arena->stats.mapped -= chunksize;
1962 #endif
1963 		}
1964 		arena->spare = chunk;
1965 	} else {
1966 		assert(arena->spare == NULL);
1967 		chunk_dealloc((void *)chunk, chunksize);
1968 #ifdef MALLOC_STATS
1969 		arena->stats.mapped -= chunksize;
1970 #endif
1971 	}
1972 }
1973 
1974 static arena_run_t *
1975 arena_run_alloc(arena_t *arena, size_t size)
1976 {
1977 	arena_chunk_t *chunk, *chunk_tmp;
1978 	arena_run_t *run;
1979 	unsigned need_npages;
1980 
1981 	assert(size <= (chunksize - (arena_chunk_header_npages <<
1982 	    pagesize_2pow)));
1983 	assert((size & pagesize_mask) == 0);
1984 
1985 	/*
1986 	 * Search through the arena chunk tree for a large enough free run.
1987 	 * Tree order ensures that any exact fit is picked immediately or
1988 	 * otherwise the lowest address of the next size.
1989 	 */
1990 	need_npages = (unsigned)(size >> pagesize_2pow);
1991 	/* LINTED */
1992 	for (;;) {
1993 		chunk_tmp = RB_ROOT(&arena->chunks);
1994 		chunk = NULL;
1995 		while (chunk_tmp) {
1996 			if (chunk_tmp->max_frun_npages == need_npages) {
1997 				chunk = chunk_tmp;
1998 				break;
1999 			}
2000 			if (chunk_tmp->max_frun_npages < need_npages) {
2001 				chunk_tmp = RB_RIGHT(chunk_tmp, link);
2002 				continue;
2003 			}
2004 			chunk = chunk_tmp;
2005 			chunk_tmp = RB_LEFT(chunk, link);
2006 		}
2007 		if (chunk == NULL)
2008 			break;
2009 		/*
2010 		 * At this point, the chunk must have a cached run size large
2011 		 * enough to fit the allocation.
2012 		 */
2013 		assert(need_npages <= chunk->max_frun_npages);
2014 		{
2015 			arena_chunk_map_t *mapelm;
2016 			unsigned i;
2017 			unsigned max_frun_npages = 0;
2018 			unsigned min_frun_ind = chunk_npages;
2019 
2020 			assert(chunk->min_frun_ind >=
2021 			    arena_chunk_header_npages);
2022 			for (i = chunk->min_frun_ind; i < chunk_npages;) {
2023 				mapelm = &chunk->map[i];
2024 				if (mapelm->pos == POS_FREE) {
2025 					if (mapelm->npages >= need_npages) {
2026 						run = (arena_run_t *)
2027 						    ((uintptr_t)chunk + (i <<
2028 						    pagesize_2pow));
2029 						/* Update page map. */
2030 						arena_run_split(arena, run,
2031 						    size);
2032 						return (run);
2033 					}
2034 					if (mapelm->npages >
2035 					    max_frun_npages) {
2036 						max_frun_npages =
2037 						    mapelm->npages;
2038 					}
2039 					if (i < min_frun_ind) {
2040 						min_frun_ind = i;
2041 						if (i < chunk->min_frun_ind)
2042 							chunk->min_frun_ind = i;
2043 					}
2044 				}
2045 				i += mapelm->npages;
2046 			}
2047 			/*
2048 			 * Search failure.  Reset cached chunk->max_frun_npages.
2049 			 * chunk->min_frun_ind was already reset above (if
2050 			 * necessary).
2051 			 */
2052 			RB_REMOVE(arena_chunk_tree_s, &arena->chunks, chunk);
2053 			chunk->max_frun_npages = max_frun_npages;
2054 			RB_INSERT(arena_chunk_tree_s, &arena->chunks, chunk);
2055 		}
2056 	}
2057 
2058 	/*
2059 	 * No usable runs.  Create a new chunk from which to allocate the run.
2060 	 */
2061 	chunk = arena_chunk_alloc(arena);
2062 	if (chunk == NULL)
2063 		return (NULL);
2064 	run = (arena_run_t *)((uintptr_t)chunk + (arena_chunk_header_npages <<
2065 	    pagesize_2pow));
2066 	/* Update page map. */
2067 	arena_run_split(arena, run, size);
2068 	return (run);
2069 }
2070 
2071 static void
2072 arena_run_dalloc(arena_t *arena, arena_run_t *run, size_t size)
2073 {
2074 	arena_chunk_t *chunk;
2075 	unsigned run_ind, run_pages;
2076 
2077 	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
2078 
2079 	run_ind = (unsigned)(((uintptr_t)run - (uintptr_t)chunk)
2080 	    >> pagesize_2pow);
2081 	assert(run_ind >= arena_chunk_header_npages);
2082 	assert(run_ind < (chunksize >> pagesize_2pow));
2083 	run_pages = (unsigned)(size >> pagesize_2pow);
2084 	assert(run_pages == chunk->map[run_ind].npages);
2085 
2086 	/* Subtract pages from count of pages used in chunk. */
2087 	chunk->pages_used -= run_pages;
2088 
2089 	/* Mark run as deallocated. */
2090 	assert(chunk->map[run_ind].npages == run_pages);
2091 	chunk->map[run_ind].pos = POS_FREE;
2092 	assert(chunk->map[run_ind + run_pages - 1].npages == run_pages);
2093 	chunk->map[run_ind + run_pages - 1].pos = POS_FREE;
2094 
2095 	/*
2096 	 * Tell the kernel that we don't need the data in this run, but only if
2097 	 * requested via runtime configuration.
2098 	 */
2099 	if (opt_hint)
2100 		madvise(run, size, MADV_FREE);
2101 
2102 	/* Try to coalesce with neighboring runs. */
2103 	if (run_ind > arena_chunk_header_npages &&
2104 	    chunk->map[run_ind - 1].pos == POS_FREE) {
2105 		unsigned prev_npages;
2106 
2107 		/* Coalesce with previous run. */
2108 		prev_npages = chunk->map[run_ind - 1].npages;
2109 		run_ind -= prev_npages;
2110 		assert(chunk->map[run_ind].npages == prev_npages);
2111 		assert(chunk->map[run_ind].pos == POS_FREE);
2112 		run_pages += prev_npages;
2113 
2114 		chunk->map[run_ind].npages = run_pages;
2115 		assert(chunk->map[run_ind].pos == POS_FREE);
2116 		chunk->map[run_ind + run_pages - 1].npages = run_pages;
2117 		assert(chunk->map[run_ind + run_pages - 1].pos == POS_FREE);
2118 	}
2119 
2120 	if (run_ind + run_pages < chunk_npages &&
2121 	    chunk->map[run_ind + run_pages].pos == POS_FREE) {
2122 		unsigned next_npages;
2123 
2124 		/* Coalesce with next run. */
2125 		next_npages = chunk->map[run_ind + run_pages].npages;
2126 		run_pages += next_npages;
2127 		assert(chunk->map[run_ind + run_pages - 1].npages ==
2128 		    next_npages);
2129 		assert(chunk->map[run_ind + run_pages - 1].pos == POS_FREE);
2130 
2131 		chunk->map[run_ind].npages = run_pages;
2132 		chunk->map[run_ind].pos = POS_FREE;
2133 		chunk->map[run_ind + run_pages - 1].npages = run_pages;
2134 		assert(chunk->map[run_ind + run_pages - 1].pos == POS_FREE);
2135 	}
2136 
2137 	if (chunk->map[run_ind].npages > chunk->max_frun_npages) {
2138 		RB_REMOVE(arena_chunk_tree_s, &arena->chunks, chunk);
2139 		chunk->max_frun_npages = chunk->map[run_ind].npages;
2140 		RB_INSERT(arena_chunk_tree_s, &arena->chunks, chunk);
2141 	}
2142 	if (run_ind < chunk->min_frun_ind)
2143 		chunk->min_frun_ind = run_ind;
2144 
2145 	/* Deallocate chunk if it is now completely unused. */
2146 	if (chunk->pages_used == 0)
2147 		arena_chunk_dealloc(arena, chunk);
2148 }
2149 
2150 static arena_run_t *
2151 arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin)
2152 {
2153 	arena_run_t *run;
2154 	unsigned i, remainder;
2155 
2156 	/* Look for a usable run. */
2157 	/* LINTED */
2158 	if ((run = RB_MIN(arena_run_tree_s, &bin->runs)) != NULL) {
2159 		/* run is guaranteed to have available space. */
2160 		/* LINTED */
2161 		RB_REMOVE(arena_run_tree_s, &bin->runs, run);
2162 #ifdef MALLOC_STATS
2163 		bin->stats.reruns++;
2164 #endif
2165 		return (run);
2166 	}
2167 	/* No existing runs have any space available. */
2168 
2169 	/* Allocate a new run. */
2170 	run = arena_run_alloc(arena, bin->run_size);
2171 	if (run == NULL)
2172 		return (NULL);
2173 
2174 	/* Initialize run internals. */
2175 	run->bin = bin;
2176 
2177 	for (i = 0; i < bin->regs_mask_nelms; i++)
2178 		run->regs_mask[i] = UINT_MAX;
2179 	remainder = bin->nregs & ((1 << (SIZEOF_INT_2POW + 3)) - 1);
2180 	if (remainder != 0) {
2181 		/* The last element has spare bits that need to be unset. */
2182 		run->regs_mask[i] = (UINT_MAX >> ((1 << (SIZEOF_INT_2POW + 3))
2183 		    - remainder));
2184 	}
2185 
2186 	run->regs_minelm = 0;
2187 
2188 	run->nfree = bin->nregs;
2189 #ifdef MALLOC_DEBUG
2190 	run->magic = ARENA_RUN_MAGIC;
2191 #endif
2192 
2193 #ifdef MALLOC_STATS
2194 	bin->stats.nruns++;
2195 	bin->stats.curruns++;
2196 	if (bin->stats.curruns > bin->stats.highruns)
2197 		bin->stats.highruns = bin->stats.curruns;
2198 #endif
2199 	return (run);
2200 }
2201 
2202 /* bin->runcur must have space available before this function is called. */
2203 static inline void *
2204 arena_bin_malloc_easy(arena_t *arena, arena_bin_t *bin, arena_run_t *run)
2205 {
2206 	void *ret;
2207 
2208 	assert(run->magic == ARENA_RUN_MAGIC);
2209 	assert(run->nfree > 0);
2210 
2211 	ret = arena_run_reg_alloc(run, bin);
2212 	assert(ret != NULL);
2213 	run->nfree--;
2214 
2215 	return (ret);
2216 }
2217 
2218 /* Re-fill bin->runcur, then call arena_bin_malloc_easy(). */
2219 static void *
2220 arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin)
2221 {
2222 
2223 	bin->runcur = arena_bin_nonfull_run_get(arena, bin);
2224 	if (bin->runcur == NULL)
2225 		return (NULL);
2226 	assert(bin->runcur->magic == ARENA_RUN_MAGIC);
2227 	assert(bin->runcur->nfree > 0);
2228 
2229 	return (arena_bin_malloc_easy(arena, bin, bin->runcur));
2230 }
2231 
2232 /*
2233  * Calculate bin->run_size such that it meets the following constraints:
2234  *
2235  *   *) bin->run_size >= min_run_size
2236  *   *) bin->run_size <= arena_maxclass
2237  *   *) bin->run_size <= RUN_MAX_SMALL
2238  *   *) run header overhead <= RUN_MAX_OVRHD (or header overhead relaxed).
2239  *
2240  * bin->nregs, bin->regs_mask_nelms, and bin->reg0_offset are
2241  * also calculated here, since these settings are all interdependent.
2242  */
2243 static size_t
2244 arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size)
2245 {
2246 	size_t try_run_size, good_run_size;
2247 	unsigned good_nregs, good_mask_nelms, good_reg0_offset;
2248 	unsigned try_nregs, try_mask_nelms, try_reg0_offset;
2249 
2250 	assert(min_run_size >= pagesize);
2251 	assert(min_run_size <= arena_maxclass);
2252 	assert(min_run_size <= RUN_MAX_SMALL);
2253 
2254 	/*
2255 	 * Calculate known-valid settings before entering the run_size
2256 	 * expansion loop, so that the first part of the loop always copies
2257 	 * valid settings.
2258 	 *
2259 	 * The do..while loop iteratively reduces the number of regions until
2260 	 * the run header and the regions no longer overlap.  A closed formula
2261 	 * would be quite messy, since there is an interdependency between the
2262 	 * header's mask length and the number of regions.
2263 	 */
2264 	try_run_size = min_run_size;
2265 	try_nregs = (unsigned)(((try_run_size - sizeof(arena_run_t)) /
2266 	    bin->reg_size) + 1); /* Counter-act try_nregs-- in loop. */
2267 	do {
2268 		try_nregs--;
2269 		try_mask_nelms = (try_nregs >> (SIZEOF_INT_2POW + 3)) +
2270 		    ((try_nregs & ((1 << (SIZEOF_INT_2POW + 3)) - 1)) ? 1 : 0);
2271 		try_reg0_offset = (unsigned)(try_run_size -
2272 		    (try_nregs * bin->reg_size));
2273 	} while (sizeof(arena_run_t) + (sizeof(unsigned) * (try_mask_nelms - 1))
2274 	    > try_reg0_offset);
2275 
2276 	/* run_size expansion loop. */
2277 	do {
2278 		/*
2279 		 * Copy valid settings before trying more aggressive settings.
2280 		 */
2281 		good_run_size = try_run_size;
2282 		good_nregs = try_nregs;
2283 		good_mask_nelms = try_mask_nelms;
2284 		good_reg0_offset = try_reg0_offset;
2285 
2286 		/* Try more aggressive settings. */
2287 		try_run_size += pagesize;
2288 		try_nregs = (unsigned)(((try_run_size - sizeof(arena_run_t)) /
2289 		    bin->reg_size) + 1); /* Counter-act try_nregs-- in loop. */
2290 		do {
2291 			try_nregs--;
2292 			try_mask_nelms = (try_nregs >> (SIZEOF_INT_2POW + 3)) +
2293 			    ((try_nregs & ((1 << (SIZEOF_INT_2POW + 3)) - 1)) ?
2294 			    1 : 0);
2295 			try_reg0_offset = (unsigned)(try_run_size - (try_nregs *
2296 			    bin->reg_size));
2297 		} while (sizeof(arena_run_t) + (sizeof(unsigned) *
2298 		    (try_mask_nelms - 1)) > try_reg0_offset);
2299 	} while (try_run_size <= arena_maxclass && try_run_size <= RUN_MAX_SMALL
2300 	    && RUN_MAX_OVRHD * (bin->reg_size << 3) > RUN_MAX_OVRHD_RELAX
2301 	    && (try_reg0_offset << RUN_BFP) > RUN_MAX_OVRHD * try_run_size);
2302 
2303 	assert(sizeof(arena_run_t) + (sizeof(unsigned) * (good_mask_nelms - 1))
2304 	    <= good_reg0_offset);
2305 	assert((good_mask_nelms << (SIZEOF_INT_2POW + 3)) >= good_nregs);
2306 
2307 	/* Copy final settings. */
2308 	bin->run_size = good_run_size;
2309 	bin->nregs = good_nregs;
2310 	bin->regs_mask_nelms = good_mask_nelms;
2311 	bin->reg0_offset = good_reg0_offset;
2312 
2313 	return (good_run_size);
2314 }
2315 
2316 static void *
2317 arena_malloc(arena_t *arena, size_t size)
2318 {
2319 	void *ret;
2320 
2321 	assert(arena != NULL);
2322 	assert(arena->magic == ARENA_MAGIC);
2323 	assert(size != 0);
2324 	assert(QUANTUM_CEILING(size) <= arena_maxclass);
2325 
2326 	if (size <= bin_maxclass) {
2327 		arena_bin_t *bin;
2328 		arena_run_t *run;
2329 
2330 		/* Small allocation. */
2331 
2332 		if (size < small_min) {
2333 			/* Tiny. */
2334 			size = pow2_ceil(size);
2335 			bin = &arena->bins[ffs((int)(size >> (TINY_MIN_2POW +
2336 			    1)))];
2337 #if (!defined(NDEBUG) || defined(MALLOC_STATS))
2338 			/*
2339 			 * Bin calculation is always correct, but we may need
2340 			 * to fix size for the purposes of assertions and/or
2341 			 * stats accuracy.
2342 			 */
2343 			if (size < (1 << TINY_MIN_2POW))
2344 				size = (1 << TINY_MIN_2POW);
2345 #endif
2346 		} else if (size <= small_max) {
2347 			/* Quantum-spaced. */
2348 			size = QUANTUM_CEILING(size);
2349 			bin = &arena->bins[ntbins + (size >> opt_quantum_2pow)
2350 			    - 1];
2351 		} else {
2352 			/* Sub-page. */
2353 			size = pow2_ceil(size);
2354 			bin = &arena->bins[ntbins + nqbins
2355 			    + (ffs((int)(size >> opt_small_max_2pow)) - 2)];
2356 		}
2357 		assert(size == bin->reg_size);
2358 
2359 		malloc_mutex_lock(&arena->mtx);
2360 		if ((run = bin->runcur) != NULL && run->nfree > 0)
2361 			ret = arena_bin_malloc_easy(arena, bin, run);
2362 		else
2363 			ret = arena_bin_malloc_hard(arena, bin);
2364 
2365 		if (ret == NULL) {
2366 			malloc_mutex_unlock(&arena->mtx);
2367 			return (NULL);
2368 		}
2369 
2370 #ifdef MALLOC_STATS
2371 		bin->stats.nrequests++;
2372 		arena->stats.nmalloc_small++;
2373 		arena->stats.allocated_small += size;
2374 #endif
2375 	} else {
2376 		/* Large allocation. */
2377 		size = PAGE_CEILING(size);
2378 		malloc_mutex_lock(&arena->mtx);
2379 		ret = (void *)arena_run_alloc(arena, size);
2380 		if (ret == NULL) {
2381 			malloc_mutex_unlock(&arena->mtx);
2382 			return (NULL);
2383 		}
2384 #ifdef MALLOC_STATS
2385 		arena->stats.nmalloc_large++;
2386 		arena->stats.allocated_large += size;
2387 #endif
2388 	}
2389 
2390 	malloc_mutex_unlock(&arena->mtx);
2391 
2392 	if (opt_junk)
2393 		memset(ret, 0xa5, size);
2394 	else if (opt_zero)
2395 		memset(ret, 0, size);
2396 	return (ret);
2397 }
2398 
2399 static inline void
2400 arena_palloc_trim(arena_t *arena, arena_chunk_t *chunk, unsigned pageind,
2401     unsigned npages)
2402 {
2403 	unsigned i;
2404 
2405 	assert(npages > 0);
2406 
2407 	/*
2408 	 * Modifiy the map such that arena_run_dalloc() sees the run as
2409 	 * separately allocated.
2410 	 */
2411 	for (i = 0; i < npages; i++) {
2412 		chunk->map[pageind + i].npages = npages;
2413 		chunk->map[pageind + i].pos = i;
2414 	}
2415 	arena_run_dalloc(arena, (arena_run_t *)((uintptr_t)chunk + (pageind <<
2416 	    pagesize_2pow)), npages << pagesize_2pow);
2417 }
2418 
2419 /* Only handles large allocations that require more than page alignment. */
2420 static void *
2421 arena_palloc(arena_t *arena, size_t alignment, size_t size, size_t alloc_size)
2422 {
2423 	void *ret;
2424 	size_t offset;
2425 	arena_chunk_t *chunk;
2426 	unsigned pageind, i, npages;
2427 
2428 	assert((size & pagesize_mask) == 0);
2429 	assert((alignment & pagesize_mask) == 0);
2430 
2431 	npages = (unsigned)(size >> pagesize_2pow);
2432 
2433 	malloc_mutex_lock(&arena->mtx);
2434 	ret = (void *)arena_run_alloc(arena, alloc_size);
2435 	if (ret == NULL) {
2436 		malloc_mutex_unlock(&arena->mtx);
2437 		return (NULL);
2438 	}
2439 
2440 	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ret);
2441 
2442 	offset = (uintptr_t)ret & (alignment - 1);
2443 	assert((offset & pagesize_mask) == 0);
2444 	assert(offset < alloc_size);
2445 	if (offset == 0) {
2446 		pageind = (unsigned)(((uintptr_t)ret - (uintptr_t)chunk) >>
2447 		    pagesize_2pow);
2448 
2449 		/* Update the map for the run to be kept. */
2450 		for (i = 0; i < npages; i++) {
2451 			chunk->map[pageind + i].npages = npages;
2452 			assert(chunk->map[pageind + i].pos == i);
2453 		}
2454 
2455 		/* Trim trailing space. */
2456 		arena_palloc_trim(arena, chunk, pageind + npages,
2457 		    (unsigned)((alloc_size - size) >> pagesize_2pow));
2458 	} else {
2459 		size_t leadsize, trailsize;
2460 
2461 		leadsize = alignment - offset;
2462 		ret = (void *)((uintptr_t)ret + leadsize);
2463 		pageind = (unsigned)(((uintptr_t)ret - (uintptr_t)chunk) >>
2464 		    pagesize_2pow);
2465 
2466 		/* Update the map for the run to be kept. */
2467 		for (i = 0; i < npages; i++) {
2468 			chunk->map[pageind + i].npages = npages;
2469 			chunk->map[pageind + i].pos = i;
2470 		}
2471 
2472 		/* Trim leading space. */
2473 		arena_palloc_trim(arena, chunk,
2474 		    (unsigned)(pageind - (leadsize >> pagesize_2pow)),
2475 		    (unsigned)(leadsize >> pagesize_2pow));
2476 
2477 		trailsize = alloc_size - leadsize - size;
2478 		if (trailsize != 0) {
2479 			/* Trim trailing space. */
2480 			assert(trailsize < alloc_size);
2481 			arena_palloc_trim(arena, chunk, pageind + npages,
2482 			    (unsigned)(trailsize >> pagesize_2pow));
2483 		}
2484 	}
2485 
2486 #ifdef MALLOC_STATS
2487 	arena->stats.nmalloc_large++;
2488 	arena->stats.allocated_large += size;
2489 #endif
2490 	malloc_mutex_unlock(&arena->mtx);
2491 
2492 	if (opt_junk)
2493 		memset(ret, 0xa5, size);
2494 	else if (opt_zero)
2495 		memset(ret, 0, size);
2496 	return (ret);
2497 }
2498 
2499 /* Return the size of the allocation pointed to by ptr. */
2500 static size_t
2501 arena_salloc(const void *ptr)
2502 {
2503 	size_t ret;
2504 	arena_chunk_t *chunk;
2505 	arena_chunk_map_t *mapelm;
2506 	unsigned pageind;
2507 
2508 	assert(ptr != NULL);
2509 	assert(CHUNK_ADDR2BASE(ptr) != ptr);
2510 
2511 	/*
2512 	 * No arena data structures that we query here can change in a way that
2513 	 * affects this function, so we don't need to lock.
2514 	 */
2515 	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
2516 	pageind = (unsigned)(((uintptr_t)ptr - (uintptr_t)chunk) >>
2517 	    pagesize_2pow);
2518 	mapelm = &chunk->map[pageind];
2519 	if (mapelm->pos != 0 || ptr != (char *)((uintptr_t)chunk) + (pageind <<
2520 	    pagesize_2pow)) {
2521 		arena_run_t *run;
2522 
2523 		pageind -= mapelm->pos;
2524 
2525 		run = (arena_run_t *)((uintptr_t)chunk + (pageind <<
2526 		    pagesize_2pow));
2527 		assert(run->magic == ARENA_RUN_MAGIC);
2528 		ret = run->bin->reg_size;
2529 	} else
2530 		ret = mapelm->npages << pagesize_2pow;
2531 
2532 	return (ret);
2533 }
2534 
2535 static void *
2536 arena_ralloc(void *ptr, size_t size, size_t oldsize)
2537 {
2538 	void *ret;
2539 
2540 	/* Avoid moving the allocation if the size class would not change. */
2541 	if (size < small_min) {
2542 		if (oldsize < small_min &&
2543 		    ffs((int)(pow2_ceil(size) >> (TINY_MIN_2POW + 1)))
2544 		    == ffs((int)(pow2_ceil(oldsize) >> (TINY_MIN_2POW + 1))))
2545 			goto IN_PLACE;
2546 	} else if (size <= small_max) {
2547 		if (oldsize >= small_min && oldsize <= small_max &&
2548 		    (QUANTUM_CEILING(size) >> opt_quantum_2pow)
2549 		    == (QUANTUM_CEILING(oldsize) >> opt_quantum_2pow))
2550 			goto IN_PLACE;
2551 	} else {
2552 		/*
2553 		 * We make no attempt to resize runs here, though it would be
2554 		 * possible to do so.
2555 		 */
2556 		if (oldsize > small_max && PAGE_CEILING(size) == oldsize)
2557 			goto IN_PLACE;
2558 	}
2559 
2560 	/*
2561 	 * If we get here, then size and oldsize are different enough that we
2562 	 * need to use a different size class.  In that case, fall back to
2563 	 * allocating new space and copying.
2564 	 */
2565 	ret = arena_malloc(choose_arena(), size);
2566 	if (ret == NULL)
2567 		return (NULL);
2568 
2569 	/* Junk/zero-filling were already done by arena_malloc(). */
2570 	if (size < oldsize)
2571 		memcpy(ret, ptr, size);
2572 	else
2573 		memcpy(ret, ptr, oldsize);
2574 	idalloc(ptr);
2575 	return (ret);
2576 IN_PLACE:
2577 	if (opt_junk && size < oldsize)
2578 		memset((void *)((uintptr_t)ptr + size), 0x5a, oldsize - size);
2579 	else if (opt_zero && size > oldsize)
2580 		memset((void *)((uintptr_t)ptr + oldsize), 0, size - oldsize);
2581 	return (ptr);
2582 }
2583 
2584 static void
2585 arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr)
2586 {
2587 	unsigned pageind;
2588 	arena_chunk_map_t *mapelm;
2589 	size_t size;
2590 
2591 	assert(arena != NULL);
2592 	assert(arena->magic == ARENA_MAGIC);
2593 	assert(chunk->arena == arena);
2594 	assert(ptr != NULL);
2595 	assert(CHUNK_ADDR2BASE(ptr) != ptr);
2596 
2597 	pageind = (unsigned)(((uintptr_t)ptr - (uintptr_t)chunk) >>
2598 	    pagesize_2pow);
2599 	mapelm = &chunk->map[pageind];
2600 	if (mapelm->pos != 0 || ptr != (char *)((uintptr_t)chunk) + (pageind <<
2601 	    pagesize_2pow)) {
2602 		arena_run_t *run;
2603 		arena_bin_t *bin;
2604 
2605 		/* Small allocation. */
2606 
2607 		pageind -= mapelm->pos;
2608 
2609 		run = (arena_run_t *)((uintptr_t)chunk + (pageind <<
2610 		    pagesize_2pow));
2611 		assert(run->magic == ARENA_RUN_MAGIC);
2612 		bin = run->bin;
2613 		size = bin->reg_size;
2614 
2615 		if (opt_junk)
2616 			memset(ptr, 0x5a, size);
2617 
2618 		malloc_mutex_lock(&arena->mtx);
2619 		arena_run_reg_dalloc(run, bin, ptr, size);
2620 		run->nfree++;
2621 
2622 		if (run->nfree == bin->nregs) {
2623 			/* Deallocate run. */
2624 			if (run == bin->runcur)
2625 				bin->runcur = NULL;
2626 			else if (bin->nregs != 1) {
2627 				/*
2628 				 * This block's conditional is necessary because
2629 				 * if the run only contains one region, then it
2630 				 * never gets inserted into the non-full runs
2631 				 * tree.
2632 				 */
2633 				/* LINTED */
2634 				RB_REMOVE(arena_run_tree_s, &bin->runs, run);
2635 			}
2636 #ifdef MALLOC_DEBUG
2637 			run->magic = 0;
2638 #endif
2639 			arena_run_dalloc(arena, run, bin->run_size);
2640 #ifdef MALLOC_STATS
2641 			bin->stats.curruns--;
2642 #endif
2643 		} else if (run->nfree == 1 && run != bin->runcur) {
2644 			/*
2645 			 * Make sure that bin->runcur always refers to the
2646 			 * lowest non-full run, if one exists.
2647 			 */
2648 			if (bin->runcur == NULL)
2649 				bin->runcur = run;
2650 			else if ((uintptr_t)run < (uintptr_t)bin->runcur) {
2651 				/* Switch runcur. */
2652 				if (bin->runcur->nfree > 0) {
2653 					/* Insert runcur. */
2654 					/* LINTED */
2655 					RB_INSERT(arena_run_tree_s, &bin->runs,
2656 					    bin->runcur);
2657 				}
2658 				bin->runcur = run;
2659 			} else {
2660 				/* LINTED */
2661 				RB_INSERT(arena_run_tree_s, &bin->runs, run);
2662 			}
2663 		}
2664 #ifdef MALLOC_STATS
2665 		arena->stats.allocated_small -= size;
2666 		arena->stats.ndalloc_small++;
2667 #endif
2668 	} else {
2669 		/* Large allocation. */
2670 
2671 		size = mapelm->npages << pagesize_2pow;
2672 		assert((((uintptr_t)ptr) & pagesize_mask) == 0);
2673 
2674 		if (opt_junk)
2675 			memset(ptr, 0x5a, size);
2676 
2677 		malloc_mutex_lock(&arena->mtx);
2678 		arena_run_dalloc(arena, (arena_run_t *)ptr, size);
2679 #ifdef MALLOC_STATS
2680 		arena->stats.allocated_large -= size;
2681 		arena->stats.ndalloc_large++;
2682 #endif
2683 	}
2684 
2685 	malloc_mutex_unlock(&arena->mtx);
2686 }
2687 
2688 static bool
2689 arena_new(arena_t *arena)
2690 {
2691 	unsigned i;
2692 	arena_bin_t *bin;
2693 	size_t prev_run_size;
2694 
2695 	malloc_mutex_init(&arena->mtx);
2696 
2697 #ifdef MALLOC_STATS
2698 	memset(&arena->stats, 0, sizeof(arena_stats_t));
2699 #endif
2700 
2701 	/* Initialize chunks. */
2702 	RB_INIT(&arena->chunks);
2703 	arena->spare = NULL;
2704 
2705 	/* Initialize bins. */
2706 	prev_run_size = pagesize;
2707 
2708 	/* (2^n)-spaced tiny bins. */
2709 	for (i = 0; i < ntbins; i++) {
2710 		bin = &arena->bins[i];
2711 		bin->runcur = NULL;
2712 		RB_INIT(&bin->runs);
2713 
2714 		bin->reg_size = (1 << (TINY_MIN_2POW + i));
2715 		prev_run_size = arena_bin_run_size_calc(bin, prev_run_size);
2716 
2717 #ifdef MALLOC_STATS
2718 		memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
2719 #endif
2720 	}
2721 
2722 	/* Quantum-spaced bins. */
2723 	for (; i < ntbins + nqbins; i++) {
2724 		bin = &arena->bins[i];
2725 		bin->runcur = NULL;
2726 		RB_INIT(&bin->runs);
2727 
2728 		bin->reg_size = quantum * (i - ntbins + 1);
2729 /*
2730 		pow2_size = pow2_ceil(quantum * (i - ntbins + 1));
2731 */
2732 		prev_run_size = arena_bin_run_size_calc(bin, prev_run_size);
2733 
2734 #ifdef MALLOC_STATS
2735 		memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
2736 #endif
2737 	}
2738 
2739 	/* (2^n)-spaced sub-page bins. */
2740 	for (; i < ntbins + nqbins + nsbins; i++) {
2741 		bin = &arena->bins[i];
2742 		bin->runcur = NULL;
2743 		RB_INIT(&bin->runs);
2744 
2745 		bin->reg_size = (small_max << (i - (ntbins + nqbins) + 1));
2746 
2747 		prev_run_size = arena_bin_run_size_calc(bin, prev_run_size);
2748 
2749 #ifdef MALLOC_STATS
2750 		memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
2751 #endif
2752 	}
2753 
2754 #ifdef MALLOC_DEBUG
2755 	arena->magic = ARENA_MAGIC;
2756 #endif
2757 
2758 	return (false);
2759 }
2760 
2761 /* Create a new arena and insert it into the arenas array at index ind. */
2762 static arena_t *
2763 arenas_extend(unsigned ind)
2764 {
2765 	arena_t *ret;
2766 
2767 	/* Allocate enough space for trailing bins. */
2768 	ret = (arena_t *)base_alloc(sizeof(arena_t)
2769 	    + (sizeof(arena_bin_t) * (ntbins + nqbins + nsbins - 1)));
2770 	if (ret != NULL && arena_new(ret) == false) {
2771 		arenas[ind] = ret;
2772 		return (ret);
2773 	}
2774 	/* Only reached if there is an OOM error. */
2775 
2776 	/*
2777 	 * OOM here is quite inconvenient to propagate, since dealing with it
2778 	 * would require a check for failure in the fast path.  Instead, punt
2779 	 * by using arenas[0].  In practice, this is an extremely unlikely
2780 	 * failure.
2781 	 */
2782 	_malloc_message(getprogname(),
2783 	    ": (malloc) Error initializing arena\n", "", "");
2784 	if (opt_abort)
2785 		abort();
2786 
2787 	return (arenas[0]);
2788 }
2789 
2790 /*
2791  * End arena.
2792  */
2793 /******************************************************************************/
2794 /*
2795  * Begin general internal functions.
2796  */
2797 
2798 static void *
2799 huge_malloc(size_t size)
2800 {
2801 	void *ret;
2802 	size_t csize;
2803 	chunk_node_t *node;
2804 
2805 	/* Allocate one or more contiguous chunks for this request. */
2806 
2807 	csize = CHUNK_CEILING(size);
2808 	if (csize == 0) {
2809 		/* size is large enough to cause size_t wrap-around. */
2810 		return (NULL);
2811 	}
2812 
2813 	/* Allocate a chunk node with which to track the chunk. */
2814 	node = base_chunk_node_alloc();
2815 	if (node == NULL)
2816 		return (NULL);
2817 
2818 	ret = chunk_alloc(csize);
2819 	if (ret == NULL) {
2820 		base_chunk_node_dealloc(node);
2821 		return (NULL);
2822 	}
2823 
2824 	/* Insert node into huge. */
2825 	node->chunk = ret;
2826 	node->size = csize;
2827 
2828 	malloc_mutex_lock(&chunks_mtx);
2829 	RB_INSERT(chunk_tree_s, &huge, node);
2830 #ifdef MALLOC_STATS
2831 	huge_nmalloc++;
2832 	huge_allocated += csize;
2833 #endif
2834 	malloc_mutex_unlock(&chunks_mtx);
2835 
2836 	if (opt_junk)
2837 		memset(ret, 0xa5, csize);
2838 	else if (opt_zero)
2839 		memset(ret, 0, csize);
2840 
2841 	return (ret);
2842 }
2843 
2844 /* Only handles large allocations that require more than chunk alignment. */
2845 static void *
2846 huge_palloc(size_t alignment, size_t size)
2847 {
2848 	void *ret;
2849 	size_t alloc_size, chunk_size, offset;
2850 	chunk_node_t *node;
2851 
2852 	/*
2853 	 * This allocation requires alignment that is even larger than chunk
2854 	 * alignment.  This means that huge_malloc() isn't good enough.
2855 	 *
2856 	 * Allocate almost twice as many chunks as are demanded by the size or
2857 	 * alignment, in order to assure the alignment can be achieved, then
2858 	 * unmap leading and trailing chunks.
2859 	 */
2860 	assert(alignment >= chunksize);
2861 
2862 	chunk_size = CHUNK_CEILING(size);
2863 
2864 	if (size >= alignment)
2865 		alloc_size = chunk_size + alignment - chunksize;
2866 	else
2867 		alloc_size = (alignment << 1) - chunksize;
2868 
2869 	/* Allocate a chunk node with which to track the chunk. */
2870 	node = base_chunk_node_alloc();
2871 	if (node == NULL)
2872 		return (NULL);
2873 
2874 	ret = chunk_alloc(alloc_size);
2875 	if (ret == NULL) {
2876 		base_chunk_node_dealloc(node);
2877 		return (NULL);
2878 	}
2879 
2880 	offset = (uintptr_t)ret & (alignment - 1);
2881 	assert((offset & chunksize_mask) == 0);
2882 	assert(offset < alloc_size);
2883 	if (offset == 0) {
2884 		/* Trim trailing space. */
2885 		chunk_dealloc((void *)((uintptr_t)ret + chunk_size), alloc_size
2886 		    - chunk_size);
2887 	} else {
2888 		size_t trailsize;
2889 
2890 		/* Trim leading space. */
2891 		chunk_dealloc(ret, alignment - offset);
2892 
2893 		ret = (void *)((uintptr_t)ret + (alignment - offset));
2894 
2895 		trailsize = alloc_size - (alignment - offset) - chunk_size;
2896 		if (trailsize != 0) {
2897 		    /* Trim trailing space. */
2898 		    assert(trailsize < alloc_size);
2899 		    chunk_dealloc((void *)((uintptr_t)ret + chunk_size),
2900 			trailsize);
2901 		}
2902 	}
2903 
2904 	/* Insert node into huge. */
2905 	node->chunk = ret;
2906 	node->size = chunk_size;
2907 
2908 	malloc_mutex_lock(&chunks_mtx);
2909 	RB_INSERT(chunk_tree_s, &huge, node);
2910 #ifdef MALLOC_STATS
2911 	huge_nmalloc++;
2912 	huge_allocated += chunk_size;
2913 #endif
2914 	malloc_mutex_unlock(&chunks_mtx);
2915 
2916 	if (opt_junk)
2917 		memset(ret, 0xa5, chunk_size);
2918 	else if (opt_zero)
2919 		memset(ret, 0, chunk_size);
2920 
2921 	return (ret);
2922 }
2923 
2924 static void *
2925 huge_ralloc(void *ptr, size_t size, size_t oldsize)
2926 {
2927 	void *ret;
2928 
2929 	/* Avoid moving the allocation if the size class would not change. */
2930 	if (oldsize > arena_maxclass &&
2931 	    CHUNK_CEILING(size) == CHUNK_CEILING(oldsize)) {
2932 		if (opt_junk && size < oldsize) {
2933 			memset((void *)((uintptr_t)ptr + size), 0x5a, oldsize
2934 			    - size);
2935 		} else if (opt_zero && size > oldsize) {
2936 			memset((void *)((uintptr_t)ptr + oldsize), 0, size
2937 			    - oldsize);
2938 		}
2939 		return (ptr);
2940 	}
2941 
2942 	if (CHUNK_ADDR2BASE(ptr) == ptr
2943 #ifdef USE_BRK
2944 	    && ((uintptr_t)ptr < (uintptr_t)brk_base
2945 	    || (uintptr_t)ptr >= (uintptr_t)brk_max)
2946 #endif
2947 	    ) {
2948 		chunk_node_t *node, key;
2949 		void *newptr;
2950 		size_t oldcsize;
2951 		size_t newcsize;
2952 
2953 		newcsize = CHUNK_CEILING(size);
2954 		oldcsize = CHUNK_CEILING(oldsize);
2955 		assert(oldcsize != newcsize);
2956 		if (newcsize == 0) {
2957 			/* size_t wrap-around */
2958 			return (NULL);
2959 		}
2960 
2961 		/*
2962 		 * Remove the old region from the tree now.  If mremap()
2963 		 * returns the region to the system, other thread may
2964 		 * map it for same huge allocation and insert it to the
2965 		 * tree before we acquire the mutex lock again.
2966 		 */
2967 		malloc_mutex_lock(&chunks_mtx);
2968 		key.chunk = __DECONST(void *, ptr);
2969 		/* LINTED */
2970 		node = RB_FIND(chunk_tree_s, &huge, &key);
2971 		assert(node != NULL);
2972 		assert(node->chunk == ptr);
2973 		assert(node->size == oldcsize);
2974 		RB_REMOVE(chunk_tree_s, &huge, node);
2975 		malloc_mutex_unlock(&chunks_mtx);
2976 
2977 		newptr = mremap(ptr, oldcsize, NULL, newcsize,
2978 		    MAP_ALIGNED(chunksize_2pow));
2979 		if (newptr == MAP_FAILED) {
2980 			/* We still own the old region. */
2981 			malloc_mutex_lock(&chunks_mtx);
2982 			RB_INSERT(chunk_tree_s, &huge, node);
2983 			malloc_mutex_unlock(&chunks_mtx);
2984 		} else {
2985 			assert(CHUNK_ADDR2BASE(newptr) == newptr);
2986 
2987 			/* Insert new or resized old region. */
2988 			malloc_mutex_lock(&chunks_mtx);
2989 			node->size = newcsize;
2990 			node->chunk = newptr;
2991 			RB_INSERT(chunk_tree_s, &huge, node);
2992 #ifdef MALLOC_STATS
2993 			huge_nralloc++;
2994 			huge_allocated += newcsize - oldcsize;
2995 			if (newcsize > oldcsize) {
2996 				stats_chunks.curchunks +=
2997 				    (newcsize - oldcsize) / chunksize;
2998 				if (stats_chunks.curchunks >
2999 				    stats_chunks.highchunks)
3000 					stats_chunks.highchunks =
3001 					    stats_chunks.curchunks;
3002 			} else {
3003 				stats_chunks.curchunks -=
3004 				    (oldcsize - newcsize) / chunksize;
3005 			}
3006 #endif
3007 			malloc_mutex_unlock(&chunks_mtx);
3008 
3009 			if (opt_junk && size < oldsize) {
3010 				memset((void *)((uintptr_t)newptr + size), 0x5a,
3011 				    newcsize - size);
3012 			} else if (opt_zero && size > oldsize) {
3013 				memset((void *)((uintptr_t)newptr + oldsize), 0,
3014 				    size - oldsize);
3015 			}
3016 			return (newptr);
3017 		}
3018 	}
3019 
3020 	/*
3021 	 * If we get here, then size and oldsize are different enough that we
3022 	 * need to use a different size class.  In that case, fall back to
3023 	 * allocating new space and copying.
3024 	 */
3025 	ret = huge_malloc(size);
3026 	if (ret == NULL)
3027 		return (NULL);
3028 
3029 	if (CHUNK_ADDR2BASE(ptr) == ptr) {
3030 		/* The old allocation is a chunk. */
3031 		if (size < oldsize)
3032 			memcpy(ret, ptr, size);
3033 		else
3034 			memcpy(ret, ptr, oldsize);
3035 	} else {
3036 		/* The old allocation is a region. */
3037 		assert(oldsize < size);
3038 		memcpy(ret, ptr, oldsize);
3039 	}
3040 	idalloc(ptr);
3041 	return (ret);
3042 }
3043 
3044 static void
3045 huge_dalloc(void *ptr)
3046 {
3047 	chunk_node_t key;
3048 	chunk_node_t *node;
3049 
3050 	malloc_mutex_lock(&chunks_mtx);
3051 
3052 	/* Extract from tree of huge allocations. */
3053 	key.chunk = ptr;
3054 	/* LINTED */
3055 	node = RB_FIND(chunk_tree_s, &huge, &key);
3056 	assert(node != NULL);
3057 	assert(node->chunk == ptr);
3058 	/* LINTED */
3059 	RB_REMOVE(chunk_tree_s, &huge, node);
3060 
3061 #ifdef MALLOC_STATS
3062 	huge_ndalloc++;
3063 	huge_allocated -= node->size;
3064 #endif
3065 
3066 	malloc_mutex_unlock(&chunks_mtx);
3067 
3068 	/* Unmap chunk. */
3069 #ifdef USE_BRK
3070 	if (opt_junk)
3071 		memset(node->chunk, 0x5a, node->size);
3072 #endif
3073 	chunk_dealloc(node->chunk, node->size);
3074 
3075 	base_chunk_node_dealloc(node);
3076 }
3077 
3078 static void *
3079 imalloc(size_t size)
3080 {
3081 	void *ret;
3082 
3083 	assert(size != 0);
3084 
3085 	if (size <= arena_maxclass)
3086 		ret = arena_malloc(choose_arena(), size);
3087 	else
3088 		ret = huge_malloc(size);
3089 
3090 	return (ret);
3091 }
3092 
3093 static void *
3094 ipalloc(size_t alignment, size_t size)
3095 {
3096 	void *ret;
3097 	size_t ceil_size;
3098 
3099 	/*
3100 	 * Round size up to the nearest multiple of alignment.
3101 	 *
3102 	 * This done, we can take advantage of the fact that for each small
3103 	 * size class, every object is aligned at the smallest power of two
3104 	 * that is non-zero in the base two representation of the size.  For
3105 	 * example:
3106 	 *
3107 	 *   Size |   Base 2 | Minimum alignment
3108 	 *   -----+----------+------------------
3109 	 *     96 |  1100000 |  32
3110 	 *    144 | 10100000 |  32
3111 	 *    192 | 11000000 |  64
3112 	 *
3113 	 * Depending on runtime settings, it is possible that arena_malloc()
3114 	 * will further round up to a power of two, but that never causes
3115 	 * correctness issues.
3116 	 */
3117 	ceil_size = (size + (alignment - 1)) & (-alignment);
3118 	/*
3119 	 * (ceil_size < size) protects against the combination of maximal
3120 	 * alignment and size greater than maximal alignment.
3121 	 */
3122 	if (ceil_size < size) {
3123 		/* size_t overflow. */
3124 		return (NULL);
3125 	}
3126 
3127 	if (ceil_size <= pagesize || (alignment <= pagesize
3128 	    && ceil_size <= arena_maxclass))
3129 		ret = arena_malloc(choose_arena(), ceil_size);
3130 	else {
3131 		size_t run_size;
3132 
3133 		/*
3134 		 * We can't achieve sub-page alignment, so round up alignment
3135 		 * permanently; it makes later calculations simpler.
3136 		 */
3137 		alignment = PAGE_CEILING(alignment);
3138 		ceil_size = PAGE_CEILING(size);
3139 		/*
3140 		 * (ceil_size < size) protects against very large sizes within
3141 		 * pagesize of SIZE_T_MAX.
3142 		 *
3143 		 * (ceil_size + alignment < ceil_size) protects against the
3144 		 * combination of maximal alignment and ceil_size large enough
3145 		 * to cause overflow.  This is similar to the first overflow
3146 		 * check above, but it needs to be repeated due to the new
3147 		 * ceil_size value, which may now be *equal* to maximal
3148 		 * alignment, whereas before we only detected overflow if the
3149 		 * original size was *greater* than maximal alignment.
3150 		 */
3151 		if (ceil_size < size || ceil_size + alignment < ceil_size) {
3152 			/* size_t overflow. */
3153 			return (NULL);
3154 		}
3155 
3156 		/*
3157 		 * Calculate the size of the over-size run that arena_palloc()
3158 		 * would need to allocate in order to guarantee the alignment.
3159 		 */
3160 		if (ceil_size >= alignment)
3161 			run_size = ceil_size + alignment - pagesize;
3162 		else {
3163 			/*
3164 			 * It is possible that (alignment << 1) will cause
3165 			 * overflow, but it doesn't matter because we also
3166 			 * subtract pagesize, which in the case of overflow
3167 			 * leaves us with a very large run_size.  That causes
3168 			 * the first conditional below to fail, which means
3169 			 * that the bogus run_size value never gets used for
3170 			 * anything important.
3171 			 */
3172 			run_size = (alignment << 1) - pagesize;
3173 		}
3174 
3175 		if (run_size <= arena_maxclass) {
3176 			ret = arena_palloc(choose_arena(), alignment, ceil_size,
3177 			    run_size);
3178 		} else if (alignment <= chunksize)
3179 			ret = huge_malloc(ceil_size);
3180 		else
3181 			ret = huge_palloc(alignment, ceil_size);
3182 	}
3183 
3184 	assert(((uintptr_t)ret & (alignment - 1)) == 0);
3185 	return (ret);
3186 }
3187 
3188 static void *
3189 icalloc(size_t size)
3190 {
3191 	void *ret;
3192 
3193 	if (size <= arena_maxclass) {
3194 		ret = arena_malloc(choose_arena(), size);
3195 		if (ret == NULL)
3196 			return (NULL);
3197 		memset(ret, 0, size);
3198 	} else {
3199 		/*
3200 		 * The virtual memory system provides zero-filled pages, so
3201 		 * there is no need to do so manually, unless opt_junk is
3202 		 * enabled, in which case huge_malloc() fills huge allocations
3203 		 * with junk.
3204 		 */
3205 		ret = huge_malloc(size);
3206 		if (ret == NULL)
3207 			return (NULL);
3208 
3209 		if (opt_junk)
3210 			memset(ret, 0, size);
3211 #ifdef USE_BRK
3212 		else if ((uintptr_t)ret >= (uintptr_t)brk_base
3213 		    && (uintptr_t)ret < (uintptr_t)brk_max) {
3214 			/*
3215 			 * This may be a re-used brk chunk.  Therefore, zero
3216 			 * the memory.
3217 			 */
3218 			memset(ret, 0, size);
3219 		}
3220 #endif
3221 	}
3222 
3223 	return (ret);
3224 }
3225 
3226 static size_t
3227 isalloc(const void *ptr)
3228 {
3229 	size_t ret;
3230 	arena_chunk_t *chunk;
3231 
3232 	assert(ptr != NULL);
3233 
3234 	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
3235 	if (chunk != ptr) {
3236 		/* Region. */
3237 		assert(chunk->arena->magic == ARENA_MAGIC);
3238 
3239 		ret = arena_salloc(ptr);
3240 	} else {
3241 		chunk_node_t *node, key;
3242 
3243 		/* Chunk (huge allocation). */
3244 
3245 		malloc_mutex_lock(&chunks_mtx);
3246 
3247 		/* Extract from tree of huge allocations. */
3248 		key.chunk = __DECONST(void *, ptr);
3249 		/* LINTED */
3250 		node = RB_FIND(chunk_tree_s, &huge, &key);
3251 		assert(node != NULL);
3252 
3253 		ret = node->size;
3254 
3255 		malloc_mutex_unlock(&chunks_mtx);
3256 	}
3257 
3258 	return (ret);
3259 }
3260 
3261 static void *
3262 iralloc(void *ptr, size_t size)
3263 {
3264 	void *ret;
3265 	size_t oldsize;
3266 
3267 	assert(ptr != NULL);
3268 	assert(size != 0);
3269 
3270 	oldsize = isalloc(ptr);
3271 
3272 	if (size <= arena_maxclass)
3273 		ret = arena_ralloc(ptr, size, oldsize);
3274 	else
3275 		ret = huge_ralloc(ptr, size, oldsize);
3276 
3277 	return (ret);
3278 }
3279 
3280 static void
3281 idalloc(void *ptr)
3282 {
3283 	arena_chunk_t *chunk;
3284 
3285 	assert(ptr != NULL);
3286 
3287 	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
3288 	if (chunk != ptr) {
3289 		/* Region. */
3290 		arena_dalloc(chunk->arena, chunk, ptr);
3291 	} else
3292 		huge_dalloc(ptr);
3293 }
3294 
3295 static void
3296 malloc_print_stats(void)
3297 {
3298 
3299 	if (opt_print_stats) {
3300 		char s[UMAX2S_BUFSIZE];
3301 		_malloc_message("___ Begin malloc statistics ___\n", "", "",
3302 		    "");
3303 		_malloc_message("Assertions ",
3304 #ifdef NDEBUG
3305 		    "disabled",
3306 #else
3307 		    "enabled",
3308 #endif
3309 		    "\n", "");
3310 		_malloc_message("Boolean MALLOC_OPTIONS: ",
3311 		    opt_abort ? "A" : "a",
3312 		    opt_junk ? "J" : "j",
3313 		    opt_hint ? "H" : "h");
3314 		_malloc_message(opt_utrace ? "PU" : "Pu",
3315 		    opt_sysv ? "V" : "v",
3316 		    opt_xmalloc ? "X" : "x",
3317 		    opt_zero ? "Z\n" : "z\n");
3318 
3319 		_malloc_message("CPUs: ", size_t2s(ncpus, s), "\n", "");
3320 		_malloc_message("Max arenas: ", size_t2s(narenas, s), "\n", "");
3321 		_malloc_message("Pointer size: ", size_t2s(sizeof(void *), s),
3322 		    "\n", "");
3323 		_malloc_message("Quantum size: ", size_t2s(quantum, s), "\n", "");
3324 		_malloc_message("Max small size: ", size_t2s(small_max, s), "\n",
3325 		    "");
3326 
3327 		_malloc_message("Chunk size: ", size_t2s(chunksize, s), "", "");
3328 		_malloc_message(" (2^", size_t2s((size_t)opt_chunk_2pow, s),
3329 		    ")\n", "");
3330 
3331 #ifdef MALLOC_STATS
3332 		{
3333 			size_t allocated, mapped;
3334 			unsigned i;
3335 			arena_t *arena;
3336 
3337 			/* Calculate and print allocated/mapped stats. */
3338 
3339 			/* arenas. */
3340 			for (i = 0, allocated = 0; i < narenas; i++) {
3341 				if (arenas[i] != NULL) {
3342 					malloc_mutex_lock(&arenas[i]->mtx);
3343 					allocated +=
3344 					    arenas[i]->stats.allocated_small;
3345 					allocated +=
3346 					    arenas[i]->stats.allocated_large;
3347 					malloc_mutex_unlock(&arenas[i]->mtx);
3348 				}
3349 			}
3350 
3351 			/* huge/base. */
3352 			malloc_mutex_lock(&chunks_mtx);
3353 			allocated += huge_allocated;
3354 			mapped = stats_chunks.curchunks * chunksize;
3355 			malloc_mutex_unlock(&chunks_mtx);
3356 
3357 			malloc_mutex_lock(&base_mtx);
3358 			mapped += base_mapped;
3359 			malloc_mutex_unlock(&base_mtx);
3360 
3361 			malloc_printf("Allocated: %zu, mapped: %zu\n",
3362 			    allocated, mapped);
3363 
3364 			/* Print chunk stats. */
3365 			{
3366 				chunk_stats_t chunks_stats;
3367 
3368 				malloc_mutex_lock(&chunks_mtx);
3369 				chunks_stats = stats_chunks;
3370 				malloc_mutex_unlock(&chunks_mtx);
3371 
3372 				malloc_printf("chunks: nchunks   "
3373 				    "highchunks    curchunks\n");
3374 				malloc_printf("  %13llu%13lu%13lu\n",
3375 				    chunks_stats.nchunks,
3376 				    chunks_stats.highchunks,
3377 				    chunks_stats.curchunks);
3378 			}
3379 
3380 			/* Print chunk stats. */
3381 			malloc_printf(
3382 			    "huge: nmalloc      ndalloc      "
3383 			    "nralloc    allocated\n");
3384 			malloc_printf(" %12llu %12llu %12llu %12zu\n",
3385 			    huge_nmalloc, huge_ndalloc, huge_nralloc,
3386 			    huge_allocated);
3387 
3388 			/* Print stats for each arena. */
3389 			for (i = 0; i < narenas; i++) {
3390 				arena = arenas[i];
3391 				if (arena != NULL) {
3392 					malloc_printf(
3393 					    "\narenas[%u] @ %p\n", i, arena);
3394 					malloc_mutex_lock(&arena->mtx);
3395 					stats_print(arena);
3396 					malloc_mutex_unlock(&arena->mtx);
3397 				}
3398 			}
3399 		}
3400 #endif /* #ifdef MALLOC_STATS */
3401 		_malloc_message("--- End malloc statistics ---\n", "", "", "");
3402 	}
3403 }
3404 
3405 /*
3406  * FreeBSD's pthreads implementation calls malloc(3), so the malloc
3407  * implementation has to take pains to avoid infinite recursion during
3408  * initialization.
3409  */
3410 static inline bool
3411 malloc_init(void)
3412 {
3413 
3414 	if (malloc_initialized == false)
3415 		return (malloc_init_hard());
3416 
3417 	return (false);
3418 }
3419 
3420 static bool
3421 malloc_init_hard(void)
3422 {
3423 	unsigned i, j;
3424 	ssize_t linklen;
3425 	char buf[PATH_MAX + 1];
3426 	const char *opts = "";
3427 	int serrno;
3428 
3429 	malloc_mutex_lock(&init_lock);
3430 	if (malloc_initialized) {
3431 		/*
3432 		 * Another thread initialized the allocator before this one
3433 		 * acquired init_lock.
3434 		 */
3435 		malloc_mutex_unlock(&init_lock);
3436 		return (false);
3437 	}
3438 
3439 	serrno = errno;
3440 	/* Get number of CPUs. */
3441 	{
3442 		int mib[2];
3443 		size_t len;
3444 
3445 		mib[0] = CTL_HW;
3446 		mib[1] = HW_NCPU;
3447 		len = sizeof(ncpus);
3448 		if (sysctl(mib, 2, &ncpus, &len, (void *) 0, 0) == -1) {
3449 			/* Error. */
3450 			ncpus = 1;
3451 		}
3452 	}
3453 
3454 	/* Get page size. */
3455 	{
3456 		long result;
3457 
3458 		result = sysconf(_SC_PAGESIZE);
3459 		assert(result != -1);
3460 		pagesize = (unsigned) result;
3461 
3462 		/*
3463 		 * We assume that pagesize is a power of 2 when calculating
3464 		 * pagesize_mask and pagesize_2pow.
3465 		 */
3466 		assert(((result - 1) & result) == 0);
3467 		pagesize_mask = result - 1;
3468 		pagesize_2pow = ffs((int)result) - 1;
3469 	}
3470 
3471 	for (i = 0; i < 3; i++) {
3472 		/* Get runtime configuration. */
3473 		switch (i) {
3474 		case 0:
3475 			if ((linklen = readlink("/etc/malloc.conf", buf,
3476 						sizeof(buf) - 1)) != -1) {
3477 				/*
3478 				 * Use the contents of the "/etc/malloc.conf"
3479 				 * symbolic link's name.
3480 				 */
3481 				buf[linklen] = '\0';
3482 				opts = buf;
3483 			} else {
3484 				/* No configuration specified. */
3485 				buf[0] = '\0';
3486 				opts = buf;
3487 			}
3488 			break;
3489 		case 1:
3490 			if ((opts = getenv("MALLOC_OPTIONS")) != NULL &&
3491 			    issetugid() == 0) {
3492 				/*
3493 				 * Do nothing; opts is already initialized to
3494 				 * the value of the MALLOC_OPTIONS environment
3495 				 * variable.
3496 				 */
3497 			} else {
3498 				/* No configuration specified. */
3499 				buf[0] = '\0';
3500 				opts = buf;
3501 			}
3502 			break;
3503 		case 2:
3504 			if (_malloc_options != NULL) {
3505 			    /*
3506 			     * Use options that were compiled into the program.
3507 			     */
3508 			    opts = _malloc_options;
3509 			} else {
3510 				/* No configuration specified. */
3511 				buf[0] = '\0';
3512 				opts = buf;
3513 			}
3514 			break;
3515 		default:
3516 			/* NOTREACHED */
3517 			/* LINTED */
3518 			assert(false);
3519 		}
3520 
3521 		for (j = 0; opts[j] != '\0'; j++) {
3522 			switch (opts[j]) {
3523 			case 'a':
3524 				opt_abort = false;
3525 				break;
3526 			case 'A':
3527 				opt_abort = true;
3528 				break;
3529 			case 'h':
3530 				opt_hint = false;
3531 				break;
3532 			case 'H':
3533 				opt_hint = true;
3534 				break;
3535 			case 'j':
3536 				opt_junk = false;
3537 				break;
3538 			case 'J':
3539 				opt_junk = true;
3540 				break;
3541 			case 'k':
3542 				/*
3543 				 * Chunks always require at least one header
3544 				 * page, so chunks can never be smaller than
3545 				 * two pages.
3546 				 */
3547 				if (opt_chunk_2pow > pagesize_2pow + 1)
3548 					opt_chunk_2pow--;
3549 				break;
3550 			case 'K':
3551 				if (opt_chunk_2pow + 1 <
3552 				    (int)(sizeof(size_t) << 3))
3553 					opt_chunk_2pow++;
3554 				break;
3555 			case 'n':
3556 				opt_narenas_lshift--;
3557 				break;
3558 			case 'N':
3559 				opt_narenas_lshift++;
3560 				break;
3561 			case 'p':
3562 				opt_print_stats = false;
3563 				break;
3564 			case 'P':
3565 				opt_print_stats = true;
3566 				break;
3567 			case 'q':
3568 				if (opt_quantum_2pow > QUANTUM_2POW_MIN)
3569 					opt_quantum_2pow--;
3570 				break;
3571 			case 'Q':
3572 				if (opt_quantum_2pow < pagesize_2pow - 1)
3573 					opt_quantum_2pow++;
3574 				break;
3575 			case 's':
3576 				if (opt_small_max_2pow > QUANTUM_2POW_MIN)
3577 					opt_small_max_2pow--;
3578 				break;
3579 			case 'S':
3580 				if (opt_small_max_2pow < pagesize_2pow - 1)
3581 					opt_small_max_2pow++;
3582 				break;
3583 			case 'u':
3584 				opt_utrace = false;
3585 				break;
3586 			case 'U':
3587 				opt_utrace = true;
3588 				break;
3589 			case 'v':
3590 				opt_sysv = false;
3591 				break;
3592 			case 'V':
3593 				opt_sysv = true;
3594 				break;
3595 			case 'x':
3596 				opt_xmalloc = false;
3597 				break;
3598 			case 'X':
3599 				opt_xmalloc = true;
3600 				break;
3601 			case 'z':
3602 				opt_zero = false;
3603 				break;
3604 			case 'Z':
3605 				opt_zero = true;
3606 				break;
3607 			default: {
3608 				char cbuf[2];
3609 
3610 				cbuf[0] = opts[j];
3611 				cbuf[1] = '\0';
3612 				_malloc_message(getprogname(),
3613 				    ": (malloc) Unsupported character in "
3614 				    "malloc options: '", cbuf, "'\n");
3615 			}
3616 			}
3617 		}
3618 	}
3619 	errno = serrno;
3620 
3621 	/* Take care to call atexit() only once. */
3622 	if (opt_print_stats) {
3623 		/* Print statistics at exit. */
3624 		atexit(malloc_print_stats);
3625 	}
3626 
3627 	/* Set variables according to the value of opt_small_max_2pow. */
3628 	if (opt_small_max_2pow < opt_quantum_2pow)
3629 		opt_small_max_2pow = opt_quantum_2pow;
3630 	small_max = (1 << opt_small_max_2pow);
3631 
3632 	/* Set bin-related variables. */
3633 	bin_maxclass = (pagesize >> 1);
3634 	assert(opt_quantum_2pow >= TINY_MIN_2POW);
3635 	ntbins = (unsigned)(opt_quantum_2pow - TINY_MIN_2POW);
3636 	assert(ntbins <= opt_quantum_2pow);
3637 	nqbins = (unsigned)(small_max >> opt_quantum_2pow);
3638 	nsbins = (unsigned)(pagesize_2pow - opt_small_max_2pow - 1);
3639 
3640 	/* Set variables according to the value of opt_quantum_2pow. */
3641 	quantum = (1 << opt_quantum_2pow);
3642 	quantum_mask = quantum - 1;
3643 	if (ntbins > 0)
3644 		small_min = (quantum >> 1) + 1;
3645 	else
3646 		small_min = 1;
3647 	assert(small_min <= quantum);
3648 
3649 	/* Set variables according to the value of opt_chunk_2pow. */
3650 	chunksize = (1LU << opt_chunk_2pow);
3651 	chunksize_mask = chunksize - 1;
3652 	chunksize_2pow = (unsigned)opt_chunk_2pow;
3653 	chunk_npages = (unsigned)(chunksize >> pagesize_2pow);
3654 	{
3655 		unsigned header_size;
3656 
3657 		header_size = (unsigned)(sizeof(arena_chunk_t) +
3658 		    (sizeof(arena_chunk_map_t) * (chunk_npages - 1)));
3659 		arena_chunk_header_npages = (header_size >> pagesize_2pow);
3660 		if ((header_size & pagesize_mask) != 0)
3661 			arena_chunk_header_npages++;
3662 	}
3663 	arena_maxclass = chunksize - (arena_chunk_header_npages <<
3664 	    pagesize_2pow);
3665 
3666 	UTRACE(0, 0, 0);
3667 
3668 #ifdef MALLOC_STATS
3669 	memset(&stats_chunks, 0, sizeof(chunk_stats_t));
3670 #endif
3671 
3672 	/* Various sanity checks that regard configuration. */
3673 	assert(quantum >= sizeof(void *));
3674 	assert(quantum <= pagesize);
3675 	assert(chunksize >= pagesize);
3676 	assert(quantum * 4 <= chunksize);
3677 
3678 	/* Initialize chunks data. */
3679 	malloc_mutex_init(&chunks_mtx);
3680 	RB_INIT(&huge);
3681 #ifdef USE_BRK
3682 	malloc_mutex_init(&brk_mtx);
3683 	brk_base = sbrk(0);
3684 	brk_prev = brk_base;
3685 	brk_max = brk_base;
3686 #endif
3687 #ifdef MALLOC_STATS
3688 	huge_nmalloc = 0;
3689 	huge_ndalloc = 0;
3690 	huge_nralloc = 0;
3691 	huge_allocated = 0;
3692 #endif
3693 	RB_INIT(&old_chunks);
3694 
3695 	/* Initialize base allocation data structures. */
3696 #ifdef MALLOC_STATS
3697 	base_mapped = 0;
3698 #endif
3699 #ifdef USE_BRK
3700 	/*
3701 	 * Allocate a base chunk here, since it doesn't actually have to be
3702 	 * chunk-aligned.  Doing this before allocating any other chunks allows
3703 	 * the use of space that would otherwise be wasted.
3704 	 */
3705 	base_pages_alloc(0);
3706 #endif
3707 	base_chunk_nodes = NULL;
3708 	malloc_mutex_init(&base_mtx);
3709 
3710 	if (ncpus > 1) {
3711 		/*
3712 		 * For SMP systems, create four times as many arenas as there
3713 		 * are CPUs by default.
3714 		 */
3715 		opt_narenas_lshift += 2;
3716 	}
3717 
3718 	/* Determine how many arenas to use. */
3719 	narenas = ncpus;
3720 	if (opt_narenas_lshift > 0) {
3721 		if ((narenas << opt_narenas_lshift) > narenas)
3722 			narenas <<= opt_narenas_lshift;
3723 		/*
3724 		 * Make sure not to exceed the limits of what base_malloc()
3725 		 * can handle.
3726 		 */
3727 		if (narenas * sizeof(arena_t *) > chunksize)
3728 			narenas = (unsigned)(chunksize / sizeof(arena_t *));
3729 	} else if (opt_narenas_lshift < 0) {
3730 		if ((narenas << opt_narenas_lshift) < narenas)
3731 			narenas <<= opt_narenas_lshift;
3732 		/* Make sure there is at least one arena. */
3733 		if (narenas == 0)
3734 			narenas = 1;
3735 	}
3736 
3737 	next_arena = 0;
3738 
3739 	/* Allocate and initialize arenas. */
3740 	arenas = (arena_t **)base_alloc(sizeof(arena_t *) * narenas);
3741 	if (arenas == NULL) {
3742 		malloc_mutex_unlock(&init_lock);
3743 		return (true);
3744 	}
3745 	/*
3746 	 * Zero the array.  In practice, this should always be pre-zeroed,
3747 	 * since it was just mmap()ed, but let's be sure.
3748 	 */
3749 	memset(arenas, 0, sizeof(arena_t *) * narenas);
3750 
3751 	/*
3752 	 * Initialize one arena here.  The rest are lazily created in
3753 	 * arena_choose_hard().
3754 	 */
3755 	arenas_extend(0);
3756 	if (arenas[0] == NULL) {
3757 		malloc_mutex_unlock(&init_lock);
3758 		return (true);
3759 	}
3760 
3761 	malloc_mutex_init(&arenas_mtx);
3762 
3763 	malloc_initialized = true;
3764 	malloc_mutex_unlock(&init_lock);
3765 	return (false);
3766 }
3767 
3768 /*
3769  * End general internal functions.
3770  */
3771 /******************************************************************************/
3772 /*
3773  * Begin malloc(3)-compatible functions.
3774  */
3775 
3776 void *
3777 malloc(size_t size)
3778 {
3779 	void *ret;
3780 
3781 	if (malloc_init()) {
3782 		ret = NULL;
3783 		goto RETURN;
3784 	}
3785 
3786 	if (size == 0) {
3787 		if (opt_sysv == false)
3788 			size = 1;
3789 		else {
3790 			ret = NULL;
3791 			goto RETURN;
3792 		}
3793 	}
3794 
3795 	ret = imalloc(size);
3796 
3797 RETURN:
3798 	if (ret == NULL) {
3799 		if (opt_xmalloc) {
3800 			_malloc_message(getprogname(),
3801 			    ": (malloc) Error in malloc(): out of memory\n", "",
3802 			    "");
3803 			abort();
3804 		}
3805 		errno = ENOMEM;
3806 	}
3807 
3808 	UTRACE(0, size, ret);
3809 	return (ret);
3810 }
3811 
3812 int
3813 posix_memalign(void **memptr, size_t alignment, size_t size)
3814 {
3815 	int ret;
3816 	void *result;
3817 
3818 	if (malloc_init())
3819 		result = NULL;
3820 	else {
3821 		/* Make sure that alignment is a large enough power of 2. */
3822 		if (((alignment - 1) & alignment) != 0
3823 		    || alignment < sizeof(void *)) {
3824 			if (opt_xmalloc) {
3825 				_malloc_message(getprogname(),
3826 				    ": (malloc) Error in posix_memalign(): "
3827 				    "invalid alignment\n", "", "");
3828 				abort();
3829 			}
3830 			result = NULL;
3831 			ret = EINVAL;
3832 			goto RETURN;
3833 		}
3834 
3835 		result = ipalloc(alignment, size);
3836 	}
3837 
3838 	if (result == NULL) {
3839 		if (opt_xmalloc) {
3840 			_malloc_message(getprogname(),
3841 			": (malloc) Error in posix_memalign(): out of memory\n",
3842 			"", "");
3843 			abort();
3844 		}
3845 		ret = ENOMEM;
3846 		goto RETURN;
3847 	}
3848 
3849 	*memptr = result;
3850 	ret = 0;
3851 
3852 RETURN:
3853 	UTRACE(0, size, result);
3854 	return (ret);
3855 }
3856 
3857 void *
3858 calloc(size_t num, size_t size)
3859 {
3860 	void *ret;
3861 	size_t num_size;
3862 
3863 	if (malloc_init()) {
3864 		num_size = 0;
3865 		ret = NULL;
3866 		goto RETURN;
3867 	}
3868 
3869 	num_size = num * size;
3870 	if (num_size == 0) {
3871 		if ((opt_sysv == false) && ((num == 0) || (size == 0)))
3872 			num_size = 1;
3873 		else {
3874 			ret = NULL;
3875 			goto RETURN;
3876 		}
3877 	/*
3878 	 * Try to avoid division here.  We know that it isn't possible to
3879 	 * overflow during multiplication if neither operand uses any of the
3880 	 * most significant half of the bits in a size_t.
3881 	 */
3882 	} else if ((unsigned long long)((num | size) &
3883 	   ((unsigned long long)SIZE_T_MAX << (sizeof(size_t) << 2))) &&
3884 	   (num_size / size != num)) {
3885 		/* size_t overflow. */
3886 		ret = NULL;
3887 		goto RETURN;
3888 	}
3889 
3890 	ret = icalloc(num_size);
3891 
3892 RETURN:
3893 	if (ret == NULL) {
3894 		if (opt_xmalloc) {
3895 			_malloc_message(getprogname(),
3896 			    ": (malloc) Error in calloc(): out of memory\n", "",
3897 			    "");
3898 			abort();
3899 		}
3900 		errno = ENOMEM;
3901 	}
3902 
3903 	UTRACE(0, num_size, ret);
3904 	return (ret);
3905 }
3906 
3907 void *
3908 realloc(void *ptr, size_t size)
3909 {
3910 	void *ret;
3911 
3912 	if (size == 0) {
3913 		if (opt_sysv == false)
3914 			size = 1;
3915 		else {
3916 			if (ptr != NULL)
3917 				idalloc(ptr);
3918 			ret = NULL;
3919 			goto RETURN;
3920 		}
3921 	}
3922 
3923 	if (ptr != NULL) {
3924 		assert(malloc_initialized);
3925 
3926 		ret = iralloc(ptr, size);
3927 
3928 		if (ret == NULL) {
3929 			if (opt_xmalloc) {
3930 				_malloc_message(getprogname(),
3931 				    ": (malloc) Error in realloc(): out of "
3932 				    "memory\n", "", "");
3933 				abort();
3934 			}
3935 			errno = ENOMEM;
3936 		}
3937 	} else {
3938 		if (malloc_init())
3939 			ret = NULL;
3940 		else
3941 			ret = imalloc(size);
3942 
3943 		if (ret == NULL) {
3944 			if (opt_xmalloc) {
3945 				_malloc_message(getprogname(),
3946 				    ": (malloc) Error in realloc(): out of "
3947 				    "memory\n", "", "");
3948 				abort();
3949 			}
3950 			errno = ENOMEM;
3951 		}
3952 	}
3953 
3954 RETURN:
3955 	UTRACE(ptr, size, ret);
3956 	return (ret);
3957 }
3958 
3959 void
3960 free(void *ptr)
3961 {
3962 
3963 	UTRACE(ptr, 0, 0);
3964 	if (ptr != NULL) {
3965 		assert(malloc_initialized);
3966 
3967 		idalloc(ptr);
3968 	}
3969 }
3970 
3971 /*
3972  * End malloc(3)-compatible functions.
3973  */
3974 /******************************************************************************/
3975 /*
3976  * Begin non-standard functions.
3977  */
3978 #ifndef __NetBSD__
3979 size_t
3980 malloc_usable_size(const void *ptr)
3981 {
3982 
3983 	assert(ptr != NULL);
3984 
3985 	return (isalloc(ptr));
3986 }
3987 #endif
3988 
3989 /*
3990  * End non-standard functions.
3991  */
3992 /******************************************************************************/
3993 /*
3994  * Begin library-private functions, used by threading libraries for protection
3995  * of malloc during fork().  These functions are only called if the program is
3996  * running in threaded mode, so there is no need to check whether the program
3997  * is threaded here.
3998  */
3999 
4000 void
4001 _malloc_prefork(void)
4002 {
4003 	unsigned i;
4004 
4005 	/* Acquire all mutexes in a safe order. */
4006 
4007 	malloc_mutex_lock(&arenas_mtx);
4008 	for (i = 0; i < narenas; i++) {
4009 		if (arenas[i] != NULL)
4010 			malloc_mutex_lock(&arenas[i]->mtx);
4011 	}
4012 
4013 	malloc_mutex_lock(&base_mtx);
4014 
4015 	malloc_mutex_lock(&chunks_mtx);
4016 }
4017 
4018 void
4019 _malloc_postfork(void)
4020 {
4021 	unsigned i;
4022 
4023 	/* Release all mutexes, now that fork() has completed. */
4024 
4025 	malloc_mutex_unlock(&chunks_mtx);
4026 
4027 	malloc_mutex_unlock(&base_mtx);
4028 
4029 	for (i = 0; i < narenas; i++) {
4030 		if (arenas[i] != NULL)
4031 			malloc_mutex_unlock(&arenas[i]->mtx);
4032 	}
4033 	malloc_mutex_unlock(&arenas_mtx);
4034 }
4035 
4036 /*
4037  * End library-private functions.
4038  */
4039 /******************************************************************************/
4040