xref: /netbsd-src/lib/libc/stdlib/jemalloc.c (revision 60dbe74596d99c974a4936eec2674e13fae0a482)
1 /*	$NetBSD: jemalloc.c,v 1.7 2007/10/15 10:30:56 yamt Exp $	*/
2 
3 /*-
4  * Copyright (C) 2006,2007 Jason Evans <jasone@FreeBSD.org>.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice(s), this list of conditions and the following disclaimer as
12  *    the first lines of this file unmodified other than the possible
13  *    addition of one or more copyright notices.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice(s), this list of conditions and the following disclaimer in
16  *    the documentation and/or other materials provided with the
17  *    distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY
20  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE
23  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
26  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
27  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
28  * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
29  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30  *
31  *******************************************************************************
32  *
33  * This allocator implementation is designed to provide scalable performance
34  * for multi-threaded programs on multi-processor systems.  The following
35  * features are included for this purpose:
36  *
37  *   + Multiple arenas are used if there are multiple CPUs, which reduces lock
38  *     contention and cache sloshing.
39  *
40  *   + Cache line sharing between arenas is avoided for internal data
41  *     structures.
42  *
43  *   + Memory is managed in chunks and runs (chunks can be split into runs),
44  *     rather than as individual pages.  This provides a constant-time
45  *     mechanism for associating allocations with particular arenas.
46  *
47  * Allocation requests are rounded up to the nearest size class, and no record
48  * of the original request size is maintained.  Allocations are broken into
49  * categories according to size class.  Assuming runtime defaults, 4 kB pages
50  * and a 16 byte quantum, the size classes in each category are as follows:
51  *
52  *   |=====================================|
53  *   | Category | Subcategory    |    Size |
54  *   |=====================================|
55  *   | Small    | Tiny           |       2 |
56  *   |          |                |       4 |
57  *   |          |                |       8 |
58  *   |          |----------------+---------|
59  *   |          | Quantum-spaced |      16 |
60  *   |          |                |      32 |
61  *   |          |                |      48 |
62  *   |          |                |     ... |
63  *   |          |                |     480 |
64  *   |          |                |     496 |
65  *   |          |                |     512 |
66  *   |          |----------------+---------|
67  *   |          | Sub-page       |    1 kB |
68  *   |          |                |    2 kB |
69  *   |=====================================|
70  *   | Large                     |    4 kB |
71  *   |                           |    8 kB |
72  *   |                           |   12 kB |
73  *   |                           |     ... |
74  *   |                           | 1012 kB |
75  *   |                           | 1016 kB |
76  *   |                           | 1020 kB |
77  *   |=====================================|
78  *   | Huge                      |    1 MB |
79  *   |                           |    2 MB |
80  *   |                           |    3 MB |
81  *   |                           |     ... |
82  *   |=====================================|
83  *
84  * A different mechanism is used for each category:
85  *
86  *   Small : Each size class is segregated into its own set of runs.  Each run
87  *           maintains a bitmap of which regions are free/allocated.
88  *
89  *   Large : Each allocation is backed by a dedicated run.  Metadata are stored
90  *           in the associated arena chunk header maps.
91  *
92  *   Huge : Each allocation is backed by a dedicated contiguous set of chunks.
93  *          Metadata are stored in a separate red-black tree.
94  *
95  *******************************************************************************
96  */
97 
98 /* LINTLIBRARY */
99 
100 #ifdef __NetBSD__
101 #  define xutrace(a, b)		utrace("malloc", (a), (b))
102 #  define __DECONST(x, y)	((x)__UNCONST(y))
103 #  define NO_TLS
104 #else
105 #  define xutrace(a, b)		utrace((a), (b))
106 #endif	/* __NetBSD__ */
107 
108 /*
109  * MALLOC_PRODUCTION disables assertions and statistics gathering.  It also
110  * defaults the A and J runtime options to off.  These settings are appropriate
111  * for production systems.
112  */
113 #define MALLOC_PRODUCTION
114 
115 #ifndef MALLOC_PRODUCTION
116 #  define MALLOC_DEBUG
117 #endif
118 
119 #include <sys/cdefs.h>
120 /* __FBSDID("$FreeBSD: src/lib/libc/stdlib/malloc.c,v 1.147 2007/06/15 22:00:16 jasone Exp $"); */
121 __RCSID("$NetBSD: jemalloc.c,v 1.7 2007/10/15 10:30:56 yamt Exp $");
122 
123 #ifdef __FreeBSD__
124 #include "libc_private.h"
125 #ifdef MALLOC_DEBUG
126 #  define _LOCK_DEBUG
127 #endif
128 #include "spinlock.h"
129 #endif
130 #include "namespace.h"
131 #include <sys/mman.h>
132 #include <sys/param.h>
133 #ifdef __FreeBSD__
134 #include <sys/stddef.h>
135 #endif
136 #include <sys/time.h>
137 #include <sys/types.h>
138 #include <sys/sysctl.h>
139 #include <sys/tree.h>
140 #include <sys/uio.h>
141 #include <sys/ktrace.h> /* Must come after several other sys/ includes. */
142 
143 #ifdef __FreeBSD__
144 #include <machine/atomic.h>
145 #include <machine/cpufunc.h>
146 #endif
147 #include <machine/vmparam.h>
148 
149 #include <errno.h>
150 #include <limits.h>
151 #include <pthread.h>
152 #include <sched.h>
153 #include <stdarg.h>
154 #include <stdbool.h>
155 #include <stdio.h>
156 #include <stdint.h>
157 #include <stdlib.h>
158 #include <string.h>
159 #include <strings.h>
160 #include <unistd.h>
161 
162 #ifdef __NetBSD__
163 #  include <reentrant.h>
164 void	_malloc_prefork(void);
165 void	_malloc_postfork(void);
166 ssize_t	_write(int, const void *, size_t);
167 const char	*_getprogname(void);
168 #endif
169 
170 #ifdef __FreeBSD__
171 #include "un-namespace.h"
172 #endif
173 
174 /* MALLOC_STATS enables statistics calculation. */
175 #ifndef MALLOC_PRODUCTION
176 #  define MALLOC_STATS
177 #endif
178 
179 #ifdef MALLOC_DEBUG
180 #  ifdef NDEBUG
181 #    undef NDEBUG
182 #  endif
183 #else
184 #  ifndef NDEBUG
185 #    define NDEBUG
186 #  endif
187 #endif
188 #include <assert.h>
189 
190 #ifdef MALLOC_DEBUG
191    /* Disable inlining to make debugging easier. */
192 #  define inline
193 #endif
194 
195 /* Size of stack-allocated buffer passed to strerror_r(). */
196 #define	STRERROR_BUF		64
197 
198 /* Minimum alignment of allocations is 2^QUANTUM_2POW_MIN bytes. */
199 #ifdef __i386__
200 #  define QUANTUM_2POW_MIN	4
201 #  define SIZEOF_PTR_2POW	2
202 #  define USE_BRK
203 #endif
204 #ifdef __ia64__
205 #  define QUANTUM_2POW_MIN	4
206 #  define SIZEOF_PTR_2POW	3
207 #endif
208 #ifdef __alpha__
209 #  define QUANTUM_2POW_MIN	4
210 #  define SIZEOF_PTR_2POW	3
211 #  define NO_TLS
212 #endif
213 #ifdef __sparc64__
214 #  define QUANTUM_2POW_MIN	4
215 #  define SIZEOF_PTR_2POW	3
216 #  define NO_TLS
217 #endif
218 #ifdef __amd64__
219 #  define QUANTUM_2POW_MIN	4
220 #  define SIZEOF_PTR_2POW	3
221 #endif
222 #ifdef __arm__
223 #  define QUANTUM_2POW_MIN	3
224 #  define SIZEOF_PTR_2POW	2
225 #  define USE_BRK
226 #  define NO_TLS
227 #endif
228 #ifdef __powerpc__
229 #  define QUANTUM_2POW_MIN	4
230 #  define SIZEOF_PTR_2POW	2
231 #  define USE_BRK
232 #endif
233 #if defined(__sparc__) && !defined(__sparc64__)
234 #  define QUANTUM_2POW_MIN	4
235 #  define SIZEOF_PTR_2POW	2
236 #  define USE_BRK
237 #endif
238 #ifdef __vax__
239 #  define QUANTUM_2POW_MIN	4
240 #  define SIZEOF_PTR_2POW	2
241 #  define USE_BRK
242 #endif
243 #ifdef __sh__
244 #  define QUANTUM_2POW_MIN	4
245 #  define SIZEOF_PTR_2POW	2
246 #  define USE_BRK
247 #endif
248 #ifdef __m68k__
249 #  define QUANTUM_2POW_MIN	4
250 #  define SIZEOF_PTR_2POW	2
251 #  define USE_BRK
252 #endif
253 #ifdef __mips__
254 #  define QUANTUM_2POW_MIN	4
255 #  define SIZEOF_PTR_2POW	2
256 #  define USE_BRK
257 #endif
258 #ifdef __hppa__
259 #  define QUANTUM_2POW_MIN     4
260 #  define SIZEOF_PTR_2POW      2
261 #  define USE_BRK
262 #endif
263 
264 #define	SIZEOF_PTR		(1 << SIZEOF_PTR_2POW)
265 
266 /* sizeof(int) == (1 << SIZEOF_INT_2POW). */
267 #ifndef SIZEOF_INT_2POW
268 #  define SIZEOF_INT_2POW	2
269 #endif
270 
271 /* We can't use TLS in non-PIC programs, since TLS relies on loader magic. */
272 #if (!defined(PIC) && !defined(NO_TLS))
273 #  define NO_TLS
274 #endif
275 
276 /*
277  * Size and alignment of memory chunks that are allocated by the OS's virtual
278  * memory system.
279  */
280 #define	CHUNK_2POW_DEFAULT	20
281 
282 /*
283  * Maximum size of L1 cache line.  This is used to avoid cache line aliasing,
284  * so over-estimates are okay (up to a point), but under-estimates will
285  * negatively affect performance.
286  */
287 #define	CACHELINE_2POW		6
288 #define	CACHELINE		((size_t)(1 << CACHELINE_2POW))
289 
290 /* Smallest size class to support. */
291 #define	TINY_MIN_2POW		1
292 
293 /*
294  * Maximum size class that is a multiple of the quantum, but not (necessarily)
295  * a power of 2.  Above this size, allocations are rounded up to the nearest
296  * power of 2.
297  */
298 #define	SMALL_MAX_2POW_DEFAULT	9
299 #define	SMALL_MAX_DEFAULT	(1 << SMALL_MAX_2POW_DEFAULT)
300 
301 /*
302  * Maximum desired run header overhead.  Runs are sized as small as possible
303  * such that this setting is still honored, without violating other constraints.
304  * The goal is to make runs as small as possible without exceeding a per run
305  * external fragmentation threshold.
306  *
307  * Note that it is possible to set this low enough that it cannot be honored
308  * for some/all object sizes, since there is one bit of header overhead per
309  * object (plus a constant).  In such cases, this constraint is relaxed.
310  *
311  * RUN_MAX_OVRHD_RELAX specifies the maximum number of bits per region of
312  * overhead for which RUN_MAX_OVRHD is relaxed.
313  */
314 #define RUN_MAX_OVRHD		0.015
315 #define RUN_MAX_OVRHD_RELAX	1.5
316 
317 /* Put a cap on small object run size.  This overrides RUN_MAX_OVRHD. */
318 #define RUN_MAX_SMALL_2POW	15
319 #define RUN_MAX_SMALL		(1 << RUN_MAX_SMALL_2POW)
320 
321 /******************************************************************************/
322 
323 #ifdef __FreeBSD__
324 /*
325  * Mutexes based on spinlocks.  We can't use normal pthread mutexes, because
326  * they require malloc()ed memory.
327  */
328 typedef struct {
329 	spinlock_t	lock;
330 } malloc_mutex_t;
331 
332 /* Set to true once the allocator has been initialized. */
333 static bool malloc_initialized = false;
334 
335 /* Used to avoid initialization races. */
336 static malloc_mutex_t init_lock = {_SPINLOCK_INITIALIZER};
337 #else
338 #define	malloc_mutex_t	mutex_t
339 
340 /* Set to true once the allocator has been initialized. */
341 static bool malloc_initialized = false;
342 
343 /* Used to avoid initialization races. */
344 static mutex_t init_lock = MUTEX_INITIALIZER;
345 #endif
346 
347 /******************************************************************************/
348 /*
349  * Statistics data structures.
350  */
351 
352 #ifdef MALLOC_STATS
353 
354 typedef struct malloc_bin_stats_s malloc_bin_stats_t;
355 struct malloc_bin_stats_s {
356 	/*
357 	 * Number of allocation requests that corresponded to the size of this
358 	 * bin.
359 	 */
360 	uint64_t	nrequests;
361 
362 	/* Total number of runs created for this bin's size class. */
363 	uint64_t	nruns;
364 
365 	/*
366 	 * Total number of runs reused by extracting them from the runs tree for
367 	 * this bin's size class.
368 	 */
369 	uint64_t	reruns;
370 
371 	/* High-water mark for this bin. */
372 	unsigned long	highruns;
373 
374 	/* Current number of runs in this bin. */
375 	unsigned long	curruns;
376 };
377 
378 typedef struct arena_stats_s arena_stats_t;
379 struct arena_stats_s {
380 	/* Number of bytes currently mapped. */
381 	size_t		mapped;
382 
383 	/* Per-size-category statistics. */
384 	size_t		allocated_small;
385 	uint64_t	nmalloc_small;
386 	uint64_t	ndalloc_small;
387 
388 	size_t		allocated_large;
389 	uint64_t	nmalloc_large;
390 	uint64_t	ndalloc_large;
391 };
392 
393 typedef struct chunk_stats_s chunk_stats_t;
394 struct chunk_stats_s {
395 	/* Number of chunks that were allocated. */
396 	uint64_t	nchunks;
397 
398 	/* High-water mark for number of chunks allocated. */
399 	unsigned long	highchunks;
400 
401 	/*
402 	 * Current number of chunks allocated.  This value isn't maintained for
403 	 * any other purpose, so keep track of it in order to be able to set
404 	 * highchunks.
405 	 */
406 	unsigned long	curchunks;
407 };
408 
409 #endif /* #ifdef MALLOC_STATS */
410 
411 /******************************************************************************/
412 /*
413  * Chunk data structures.
414  */
415 
416 /* Tree of chunks. */
417 typedef struct chunk_node_s chunk_node_t;
418 struct chunk_node_s {
419 	/* Linkage for the chunk tree. */
420 	RB_ENTRY(chunk_node_s) link;
421 
422 	/*
423 	 * Pointer to the chunk that this tree node is responsible for.  In some
424 	 * (but certainly not all) cases, this data structure is placed at the
425 	 * beginning of the corresponding chunk, so this field may point to this
426 	 * node.
427 	 */
428 	void	*chunk;
429 
430 	/* Total chunk size. */
431 	size_t	size;
432 };
433 typedef struct chunk_tree_s chunk_tree_t;
434 RB_HEAD(chunk_tree_s, chunk_node_s);
435 
436 /******************************************************************************/
437 /*
438  * Arena data structures.
439  */
440 
441 typedef struct arena_s arena_t;
442 typedef struct arena_bin_s arena_bin_t;
443 
444 typedef struct arena_chunk_map_s arena_chunk_map_t;
445 struct arena_chunk_map_s {
446 	/* Number of pages in run. */
447 	uint32_t	npages;
448 	/*
449 	 * Position within run.  For a free run, this is POS_FREE for the first
450 	 * and last pages.  The POS_FREE special value makes it possible to
451 	 * quickly coalesce free runs.
452 	 *
453 	 * This is the limiting factor for chunksize; there can be at most 2^31
454 	 * pages in a run.
455 	 */
456 #define POS_FREE ((uint32_t)0xffffffffU)
457 	uint32_t	pos;
458 };
459 
460 /* Arena chunk header. */
461 typedef struct arena_chunk_s arena_chunk_t;
462 struct arena_chunk_s {
463 	/* Arena that owns the chunk. */
464 	arena_t *arena;
465 
466 	/* Linkage for the arena's chunk tree. */
467 	RB_ENTRY(arena_chunk_s) link;
468 
469 	/*
470 	 * Number of pages in use.  This is maintained in order to make
471 	 * detection of empty chunks fast.
472 	 */
473 	uint32_t pages_used;
474 
475 	/*
476 	 * Every time a free run larger than this value is created/coalesced,
477 	 * this value is increased.  The only way that the value decreases is if
478 	 * arena_run_alloc() fails to find a free run as large as advertised by
479 	 * this value.
480 	 */
481 	uint32_t max_frun_npages;
482 
483 	/*
484 	 * Every time a free run that starts at an earlier page than this value
485 	 * is created/coalesced, this value is decreased.  It is reset in a
486 	 * similar fashion to max_frun_npages.
487 	 */
488 	uint32_t min_frun_ind;
489 
490 	/*
491 	 * Map of pages within chunk that keeps track of free/large/small.  For
492 	 * free runs, only the map entries for the first and last pages are
493 	 * kept up to date, so that free runs can be quickly coalesced.
494 	 */
495 	arena_chunk_map_t map[1]; /* Dynamically sized. */
496 };
497 typedef struct arena_chunk_tree_s arena_chunk_tree_t;
498 RB_HEAD(arena_chunk_tree_s, arena_chunk_s);
499 
500 typedef struct arena_run_s arena_run_t;
501 struct arena_run_s {
502 	/* Linkage for run trees. */
503 	RB_ENTRY(arena_run_s) link;
504 
505 #ifdef MALLOC_DEBUG
506 	uint32_t	magic;
507 #  define ARENA_RUN_MAGIC 0x384adf93
508 #endif
509 
510 	/* Bin this run is associated with. */
511 	arena_bin_t	*bin;
512 
513 	/* Index of first element that might have a free region. */
514 	unsigned	regs_minelm;
515 
516 	/* Number of free regions in run. */
517 	unsigned	nfree;
518 
519 	/* Bitmask of in-use regions (0: in use, 1: free). */
520 	unsigned	regs_mask[1]; /* Dynamically sized. */
521 };
522 typedef struct arena_run_tree_s arena_run_tree_t;
523 RB_HEAD(arena_run_tree_s, arena_run_s);
524 
525 struct arena_bin_s {
526 	/*
527 	 * Current run being used to service allocations of this bin's size
528 	 * class.
529 	 */
530 	arena_run_t	*runcur;
531 
532 	/*
533 	 * Tree of non-full runs.  This tree is used when looking for an
534 	 * existing run when runcur is no longer usable.  We choose the
535 	 * non-full run that is lowest in memory; this policy tends to keep
536 	 * objects packed well, and it can also help reduce the number of
537 	 * almost-empty chunks.
538 	 */
539 	arena_run_tree_t runs;
540 
541 	/* Size of regions in a run for this bin's size class. */
542 	size_t		reg_size;
543 
544 	/* Total size of a run for this bin's size class. */
545 	size_t		run_size;
546 
547 	/* Total number of regions in a run for this bin's size class. */
548 	uint32_t	nregs;
549 
550 	/* Number of elements in a run's regs_mask for this bin's size class. */
551 	uint32_t	regs_mask_nelms;
552 
553 	/* Offset of first region in a run for this bin's size class. */
554 	uint32_t	reg0_offset;
555 
556 #ifdef MALLOC_STATS
557 	/* Bin statistics. */
558 	malloc_bin_stats_t stats;
559 #endif
560 };
561 
562 struct arena_s {
563 #ifdef MALLOC_DEBUG
564 	uint32_t		magic;
565 #  define ARENA_MAGIC 0x947d3d24
566 #endif
567 
568 	/* All operations on this arena require that mtx be locked. */
569 	malloc_mutex_t		mtx;
570 
571 #ifdef MALLOC_STATS
572 	arena_stats_t		stats;
573 #endif
574 
575 	/*
576 	 * Tree of chunks this arena manages.
577 	 */
578 	arena_chunk_tree_t	chunks;
579 
580 	/*
581 	 * In order to avoid rapid chunk allocation/deallocation when an arena
582 	 * oscillates right on the cusp of needing a new chunk, cache the most
583 	 * recently freed chunk.  This caching is disabled by opt_hint.
584 	 *
585 	 * There is one spare chunk per arena, rather than one spare total, in
586 	 * order to avoid interactions between multiple threads that could make
587 	 * a single spare inadequate.
588 	 */
589 	arena_chunk_t *spare;
590 
591 	/*
592 	 * bins is used to store rings of free regions of the following sizes,
593 	 * assuming a 16-byte quantum, 4kB pagesize, and default MALLOC_OPTIONS.
594 	 *
595 	 *   bins[i] | size |
596 	 *   --------+------+
597 	 *        0  |    2 |
598 	 *        1  |    4 |
599 	 *        2  |    8 |
600 	 *   --------+------+
601 	 *        3  |   16 |
602 	 *        4  |   32 |
603 	 *        5  |   48 |
604 	 *        6  |   64 |
605 	 *           :      :
606 	 *           :      :
607 	 *       33  |  496 |
608 	 *       34  |  512 |
609 	 *   --------+------+
610 	 *       35  | 1024 |
611 	 *       36  | 2048 |
612 	 *   --------+------+
613 	 */
614 	arena_bin_t		bins[1]; /* Dynamically sized. */
615 };
616 
617 /******************************************************************************/
618 /*
619  * Data.
620  */
621 
622 /* Number of CPUs. */
623 static unsigned		ncpus;
624 
625 /* VM page size. */
626 static size_t		pagesize;
627 static size_t		pagesize_mask;
628 static size_t		pagesize_2pow;
629 
630 /* Various bin-related settings. */
631 static size_t		bin_maxclass; /* Max size class for bins. */
632 static unsigned		ntbins; /* Number of (2^n)-spaced tiny bins. */
633 static unsigned		nqbins; /* Number of quantum-spaced bins. */
634 static unsigned		nsbins; /* Number of (2^n)-spaced sub-page bins. */
635 static size_t		small_min;
636 static size_t		small_max;
637 
638 /* Various quantum-related settings. */
639 static size_t		quantum;
640 static size_t		quantum_mask; /* (quantum - 1). */
641 
642 /* Various chunk-related settings. */
643 static size_t		chunksize;
644 static size_t		chunksize_mask; /* (chunksize - 1). */
645 static int		chunksize_2pow;
646 static unsigned		chunk_npages;
647 static unsigned		arena_chunk_header_npages;
648 static size_t		arena_maxclass; /* Max size class for arenas. */
649 
650 /********/
651 /*
652  * Chunks.
653  */
654 
655 /* Protects chunk-related data structures. */
656 static malloc_mutex_t	chunks_mtx;
657 
658 /* Tree of chunks that are stand-alone huge allocations. */
659 static chunk_tree_t	huge;
660 
661 #ifdef USE_BRK
662 /*
663  * Try to use brk for chunk-size allocations, due to address space constraints.
664  */
665 /*
666  * Protects sbrk() calls.  This must be separate from chunks_mtx, since
667  * base_pages_alloc() also uses sbrk(), but cannot lock chunks_mtx (doing so
668  * could cause recursive lock acquisition).
669  */
670 static malloc_mutex_t	brk_mtx;
671 /* Result of first sbrk(0) call. */
672 static void		*brk_base;
673 /* Current end of brk, or ((void *)-1) if brk is exhausted. */
674 static void		*brk_prev;
675 /* Current upper limit on brk addresses. */
676 static void		*brk_max;
677 #endif
678 
679 #ifdef MALLOC_STATS
680 /* Huge allocation statistics. */
681 static uint64_t		huge_nmalloc;
682 static uint64_t		huge_ndalloc;
683 static size_t		huge_allocated;
684 #endif
685 
686 /*
687  * Tree of chunks that were previously allocated.  This is used when allocating
688  * chunks, in an attempt to re-use address space.
689  */
690 static chunk_tree_t	old_chunks;
691 
692 /****************************/
693 /*
694  * base (internal allocation).
695  */
696 
697 /*
698  * Current pages that are being used for internal memory allocations.  These
699  * pages are carved up in cacheline-size quanta, so that there is no chance of
700  * false cache line sharing.
701  */
702 static void		*base_pages;
703 static void		*base_next_addr;
704 static void		*base_past_addr; /* Addr immediately past base_pages. */
705 static chunk_node_t	*base_chunk_nodes; /* LIFO cache of chunk nodes. */
706 static malloc_mutex_t	base_mtx;
707 #ifdef MALLOC_STATS
708 static size_t		base_mapped;
709 #endif
710 
711 /********/
712 /*
713  * Arenas.
714  */
715 
716 /*
717  * Arenas that are used to service external requests.  Not all elements of the
718  * arenas array are necessarily used; arenas are created lazily as needed.
719  */
720 static arena_t		**arenas;
721 static unsigned		narenas;
722 static unsigned		next_arena;
723 static malloc_mutex_t	arenas_mtx; /* Protects arenas initialization. */
724 
725 #ifndef NO_TLS
726 /*
727  * Map of pthread_self() --> arenas[???], used for selecting an arena to use
728  * for allocations.
729  */
730 static __thread arena_t	*arenas_map;
731 #define	get_arenas_map()	(arenas_map)
732 #define	set_arenas_map(x)	(arenas_map = x)
733 #else
734 static thread_key_t arenas_map_key;
735 #define	get_arenas_map()	thr_getspecific(arenas_map_key)
736 #define	set_arenas_map(x)	thr_setspecific(arenas_map_key, x)
737 #endif
738 
739 #ifdef MALLOC_STATS
740 /* Chunk statistics. */
741 static chunk_stats_t	stats_chunks;
742 #endif
743 
744 /*******************************/
745 /*
746  * Runtime configuration options.
747  */
748 const char	*_malloc_options;
749 
750 #ifndef MALLOC_PRODUCTION
751 static bool	opt_abort = true;
752 static bool	opt_junk = true;
753 #else
754 static bool	opt_abort = false;
755 static bool	opt_junk = false;
756 #endif
757 static bool	opt_hint = false;
758 static bool	opt_print_stats = false;
759 static size_t	opt_quantum_2pow = QUANTUM_2POW_MIN;
760 static size_t	opt_small_max_2pow = SMALL_MAX_2POW_DEFAULT;
761 static size_t	opt_chunk_2pow = CHUNK_2POW_DEFAULT;
762 static bool	opt_utrace = false;
763 static bool	opt_sysv = false;
764 static bool	opt_xmalloc = false;
765 static bool	opt_zero = false;
766 static int32_t	opt_narenas_lshift = 0;
767 
768 typedef struct {
769 	void	*p;
770 	size_t	s;
771 	void	*r;
772 } malloc_utrace_t;
773 
774 #define	UTRACE(a, b, c)							\
775 	if (opt_utrace) {						\
776 		malloc_utrace_t ut;					\
777 		ut.p = a;						\
778 		ut.s = b;						\
779 		ut.r = c;						\
780 		xutrace(&ut, sizeof(ut));				\
781 	}
782 
783 /******************************************************************************/
784 /*
785  * Begin function prototypes for non-inline static functions.
786  */
787 
788 static void	wrtmessage(const char *p1, const char *p2, const char *p3,
789 		const char *p4);
790 #ifdef MALLOC_STATS
791 static void	malloc_printf(const char *format, ...);
792 #endif
793 static char	*umax2s(uintmax_t x, char *s);
794 static bool	base_pages_alloc(size_t minsize);
795 static void	*base_alloc(size_t size);
796 static chunk_node_t *base_chunk_node_alloc(void);
797 static void	base_chunk_node_dealloc(chunk_node_t *node);
798 #ifdef MALLOC_STATS
799 static void	stats_print(arena_t *arena);
800 #endif
801 static void	*pages_map(void *addr, size_t size);
802 static void	*pages_map_align(void *addr, size_t size, int align);
803 static void	pages_unmap(void *addr, size_t size);
804 static void	*chunk_alloc(size_t size);
805 static void	chunk_dealloc(void *chunk, size_t size);
806 static arena_t	*choose_arena_hard(void);
807 static void	arena_run_split(arena_t *arena, arena_run_t *run, size_t size);
808 static arena_chunk_t *arena_chunk_alloc(arena_t *arena);
809 static void	arena_chunk_dealloc(arena_t *arena, arena_chunk_t *chunk);
810 static arena_run_t *arena_run_alloc(arena_t *arena, size_t size);
811 static void	arena_run_dalloc(arena_t *arena, arena_run_t *run, size_t size);
812 static arena_run_t *arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin);
813 static void *arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin);
814 static size_t arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size);
815 static void	*arena_malloc(arena_t *arena, size_t size);
816 static void	*arena_palloc(arena_t *arena, size_t alignment, size_t size,
817     size_t alloc_size);
818 static size_t	arena_salloc(const void *ptr);
819 static void	*arena_ralloc(void *ptr, size_t size, size_t oldsize);
820 static void	arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr);
821 static bool	arena_new(arena_t *arena);
822 static arena_t	*arenas_extend(unsigned ind);
823 static void	*huge_malloc(size_t size);
824 static void	*huge_palloc(size_t alignment, size_t size);
825 static void	*huge_ralloc(void *ptr, size_t size, size_t oldsize);
826 static void	huge_dalloc(void *ptr);
827 static void	*imalloc(size_t size);
828 static void	*ipalloc(size_t alignment, size_t size);
829 static void	*icalloc(size_t size);
830 static size_t	isalloc(const void *ptr);
831 static void	*iralloc(void *ptr, size_t size);
832 static void	idalloc(void *ptr);
833 static void	malloc_print_stats(void);
834 static bool	malloc_init_hard(void);
835 
836 /*
837  * End function prototypes.
838  */
839 /******************************************************************************/
840 /*
841  * Begin mutex.
842  */
843 
844 #ifdef __NetBSD__
845 #define	malloc_mutex_init(m)	mutex_init(m, NULL)
846 #define	malloc_mutex_lock(m)	mutex_lock(m)
847 #define	malloc_mutex_unlock(m)	mutex_unlock(m)
848 #else	/* __NetBSD__ */
849 static inline void
850 malloc_mutex_init(malloc_mutex_t *a_mutex)
851 {
852 	static const spinlock_t lock = _SPINLOCK_INITIALIZER;
853 
854 	a_mutex->lock = lock;
855 }
856 
857 static inline void
858 malloc_mutex_lock(malloc_mutex_t *a_mutex)
859 {
860 
861 	if (__isthreaded)
862 		_SPINLOCK(&a_mutex->lock);
863 }
864 
865 static inline void
866 malloc_mutex_unlock(malloc_mutex_t *a_mutex)
867 {
868 
869 	if (__isthreaded)
870 		_SPINUNLOCK(&a_mutex->lock);
871 }
872 #endif	/* __NetBSD__ */
873 
874 /*
875  * End mutex.
876  */
877 /******************************************************************************/
878 /*
879  * Begin Utility functions/macros.
880  */
881 
882 /* Return the chunk address for allocation address a. */
883 #define	CHUNK_ADDR2BASE(a)						\
884 	((void *)((uintptr_t)(a) & ~chunksize_mask))
885 
886 /* Return the chunk offset of address a. */
887 #define	CHUNK_ADDR2OFFSET(a)						\
888 	((size_t)((uintptr_t)(a) & chunksize_mask))
889 
890 /* Return the smallest chunk multiple that is >= s. */
891 #define	CHUNK_CEILING(s)						\
892 	(((s) + chunksize_mask) & ~chunksize_mask)
893 
894 /* Return the smallest cacheline multiple that is >= s. */
895 #define	CACHELINE_CEILING(s)						\
896 	(((s) + (CACHELINE - 1)) & ~(CACHELINE - 1))
897 
898 /* Return the smallest quantum multiple that is >= a. */
899 #define	QUANTUM_CEILING(a)						\
900 	(((a) + quantum_mask) & ~quantum_mask)
901 
902 /* Return the smallest pagesize multiple that is >= s. */
903 #define	PAGE_CEILING(s)							\
904 	(((s) + pagesize_mask) & ~pagesize_mask)
905 
906 /* Compute the smallest power of 2 that is >= x. */
907 static inline size_t
908 pow2_ceil(size_t x)
909 {
910 
911 	x--;
912 	x |= x >> 1;
913 	x |= x >> 2;
914 	x |= x >> 4;
915 	x |= x >> 8;
916 	x |= x >> 16;
917 #if (SIZEOF_PTR == 8)
918 	x |= x >> 32;
919 #endif
920 	x++;
921 	return (x);
922 }
923 
924 static void
925 wrtmessage(const char *p1, const char *p2, const char *p3, const char *p4)
926 {
927 
928 	_write(STDERR_FILENO, p1, strlen(p1));
929 	_write(STDERR_FILENO, p2, strlen(p2));
930 	_write(STDERR_FILENO, p3, strlen(p3));
931 	_write(STDERR_FILENO, p4, strlen(p4));
932 }
933 
934 void	(*_malloc_message)(const char *p1, const char *p2, const char *p3,
935 	    const char *p4) = wrtmessage;
936 
937 #ifdef MALLOC_STATS
938 /*
939  * Print to stderr in such a way as to (hopefully) avoid memory allocation.
940  */
941 static void
942 malloc_printf(const char *format, ...)
943 {
944 	char buf[4096];
945 	va_list ap;
946 
947 	va_start(ap, format);
948 	vsnprintf(buf, sizeof(buf), format, ap);
949 	va_end(ap);
950 	_malloc_message(buf, "", "", "");
951 }
952 #endif
953 
954 /*
955  * We don't want to depend on vsnprintf() for production builds, since that can
956  * cause unnecessary bloat for static binaries.  umax2s() provides minimal
957  * integer printing functionality, so that malloc_printf() use can be limited to
958  * MALLOC_STATS code.
959  */
960 #define UMAX2S_BUFSIZE	21
961 static char *
962 umax2s(uintmax_t x, char *s)
963 {
964 	unsigned i;
965 
966 	/* Make sure UMAX2S_BUFSIZE is large enough. */
967 	/* LINTED */
968 	assert(sizeof(uintmax_t) <= 8);
969 
970 	i = UMAX2S_BUFSIZE - 1;
971 	s[i] = '\0';
972 	do {
973 		i--;
974 		s[i] = "0123456789"[(int)x % 10];
975 		x /= (uintmax_t)10LL;
976 	} while (x > 0);
977 
978 	return (&s[i]);
979 }
980 
981 /******************************************************************************/
982 
983 static bool
984 base_pages_alloc(size_t minsize)
985 {
986 	size_t csize = 0;
987 
988 #ifdef USE_BRK
989 	/*
990 	 * Do special brk allocation here, since base allocations don't need to
991 	 * be chunk-aligned.
992 	 */
993 	if (brk_prev != (void *)-1) {
994 		void *brk_cur;
995 		intptr_t incr;
996 
997 		if (minsize != 0)
998 			csize = CHUNK_CEILING(minsize);
999 
1000 		malloc_mutex_lock(&brk_mtx);
1001 		do {
1002 			/* Get the current end of brk. */
1003 			brk_cur = sbrk(0);
1004 
1005 			/*
1006 			 * Calculate how much padding is necessary to
1007 			 * chunk-align the end of brk.  Don't worry about
1008 			 * brk_cur not being chunk-aligned though.
1009 			 */
1010 			incr = (intptr_t)chunksize
1011 			    - (intptr_t)CHUNK_ADDR2OFFSET(brk_cur);
1012 			if (incr < minsize)
1013 				incr += csize;
1014 
1015 			brk_prev = sbrk(incr);
1016 			if (brk_prev == brk_cur) {
1017 				/* Success. */
1018 				malloc_mutex_unlock(&brk_mtx);
1019 				base_pages = brk_cur;
1020 				base_next_addr = base_pages;
1021 				base_past_addr = (void *)((uintptr_t)base_pages
1022 				    + incr);
1023 #ifdef MALLOC_STATS
1024 				base_mapped += incr;
1025 #endif
1026 				return (false);
1027 			}
1028 		} while (brk_prev != (void *)-1);
1029 		malloc_mutex_unlock(&brk_mtx);
1030 	}
1031 	if (minsize == 0) {
1032 		/*
1033 		 * Failure during initialization doesn't matter, so avoid
1034 		 * falling through to the mmap-based page mapping code.
1035 		 */
1036 		return (true);
1037 	}
1038 #endif
1039 	assert(minsize != 0);
1040 	csize = PAGE_CEILING(minsize);
1041 	base_pages = pages_map(NULL, csize);
1042 	if (base_pages == NULL)
1043 		return (true);
1044 	base_next_addr = base_pages;
1045 	base_past_addr = (void *)((uintptr_t)base_pages + csize);
1046 #ifdef MALLOC_STATS
1047 	base_mapped += csize;
1048 #endif
1049 	return (false);
1050 }
1051 
1052 static void *
1053 base_alloc(size_t size)
1054 {
1055 	void *ret;
1056 	size_t csize;
1057 
1058 	/* Round size up to nearest multiple of the cacheline size. */
1059 	csize = CACHELINE_CEILING(size);
1060 
1061 	malloc_mutex_lock(&base_mtx);
1062 
1063 	/* Make sure there's enough space for the allocation. */
1064 	if ((uintptr_t)base_next_addr + csize > (uintptr_t)base_past_addr) {
1065 		if (base_pages_alloc(csize)) {
1066 			ret = NULL;
1067 			goto RETURN;
1068 		}
1069 	}
1070 
1071 	/* Allocate. */
1072 	ret = base_next_addr;
1073 	base_next_addr = (void *)((uintptr_t)base_next_addr + csize);
1074 
1075 RETURN:
1076 	malloc_mutex_unlock(&base_mtx);
1077 	return (ret);
1078 }
1079 
1080 static chunk_node_t *
1081 base_chunk_node_alloc(void)
1082 {
1083 	chunk_node_t *ret;
1084 
1085 	malloc_mutex_lock(&base_mtx);
1086 	if (base_chunk_nodes != NULL) {
1087 		ret = base_chunk_nodes;
1088 		/* LINTED */
1089 		base_chunk_nodes = *(chunk_node_t **)ret;
1090 		malloc_mutex_unlock(&base_mtx);
1091 	} else {
1092 		malloc_mutex_unlock(&base_mtx);
1093 		ret = (chunk_node_t *)base_alloc(sizeof(chunk_node_t));
1094 	}
1095 
1096 	return (ret);
1097 }
1098 
1099 static void
1100 base_chunk_node_dealloc(chunk_node_t *node)
1101 {
1102 
1103 	malloc_mutex_lock(&base_mtx);
1104 	/* LINTED */
1105 	*(chunk_node_t **)node = base_chunk_nodes;
1106 	base_chunk_nodes = node;
1107 	malloc_mutex_unlock(&base_mtx);
1108 }
1109 
1110 /******************************************************************************/
1111 
1112 #ifdef MALLOC_STATS
1113 static void
1114 stats_print(arena_t *arena)
1115 {
1116 	unsigned i;
1117 	int gap_start;
1118 
1119 	malloc_printf(
1120 	    "          allocated/mapped            nmalloc      ndalloc\n");
1121 
1122 	malloc_printf("small: %12zu %-12s %12llu %12llu\n",
1123 	    arena->stats.allocated_small, "", arena->stats.nmalloc_small,
1124 	    arena->stats.ndalloc_small);
1125 	malloc_printf("large: %12zu %-12s %12llu %12llu\n",
1126 	    arena->stats.allocated_large, "", arena->stats.nmalloc_large,
1127 	    arena->stats.ndalloc_large);
1128 	malloc_printf("total: %12zu/%-12zu %12llu %12llu\n",
1129 	    arena->stats.allocated_small + arena->stats.allocated_large,
1130 	    arena->stats.mapped,
1131 	    arena->stats.nmalloc_small + arena->stats.nmalloc_large,
1132 	    arena->stats.ndalloc_small + arena->stats.ndalloc_large);
1133 
1134 	malloc_printf("bins:     bin   size regs pgs  requests   newruns"
1135 	    "    reruns maxruns curruns\n");
1136 	for (i = 0, gap_start = -1; i < ntbins + nqbins + nsbins; i++) {
1137 		if (arena->bins[i].stats.nrequests == 0) {
1138 			if (gap_start == -1)
1139 				gap_start = i;
1140 		} else {
1141 			if (gap_start != -1) {
1142 				if (i > gap_start + 1) {
1143 					/* Gap of more than one size class. */
1144 					malloc_printf("[%u..%u]\n",
1145 					    gap_start, i - 1);
1146 				} else {
1147 					/* Gap of one size class. */
1148 					malloc_printf("[%u]\n", gap_start);
1149 				}
1150 				gap_start = -1;
1151 			}
1152 			malloc_printf(
1153 			    "%13u %1s %4u %4u %3u %9llu %9llu"
1154 			    " %9llu %7lu %7lu\n",
1155 			    i,
1156 			    i < ntbins ? "T" : i < ntbins + nqbins ? "Q" : "S",
1157 			    arena->bins[i].reg_size,
1158 			    arena->bins[i].nregs,
1159 			    arena->bins[i].run_size >> pagesize_2pow,
1160 			    arena->bins[i].stats.nrequests,
1161 			    arena->bins[i].stats.nruns,
1162 			    arena->bins[i].stats.reruns,
1163 			    arena->bins[i].stats.highruns,
1164 			    arena->bins[i].stats.curruns);
1165 		}
1166 	}
1167 	if (gap_start != -1) {
1168 		if (i > gap_start + 1) {
1169 			/* Gap of more than one size class. */
1170 			malloc_printf("[%u..%u]\n", gap_start, i - 1);
1171 		} else {
1172 			/* Gap of one size class. */
1173 			malloc_printf("[%u]\n", gap_start);
1174 		}
1175 	}
1176 }
1177 #endif
1178 
1179 /*
1180  * End Utility functions/macros.
1181  */
1182 /******************************************************************************/
1183 /*
1184  * Begin chunk management functions.
1185  */
1186 
1187 #ifndef lint
1188 static inline int
1189 chunk_comp(chunk_node_t *a, chunk_node_t *b)
1190 {
1191 
1192 	assert(a != NULL);
1193 	assert(b != NULL);
1194 
1195 	if ((uintptr_t)a->chunk < (uintptr_t)b->chunk)
1196 		return (-1);
1197 	else if (a->chunk == b->chunk)
1198 		return (0);
1199 	else
1200 		return (1);
1201 }
1202 
1203 /* Generate red-black tree code for chunks. */
1204 RB_GENERATE_STATIC(chunk_tree_s, chunk_node_s, link, chunk_comp);
1205 #endif
1206 
1207 static void *
1208 pages_map_align(void *addr, size_t size, int align)
1209 {
1210 	void *ret;
1211 
1212 	/*
1213 	 * We don't use MAP_FIXED here, because it can cause the *replacement*
1214 	 * of existing mappings, and we only want to create new mappings.
1215 	 */
1216 	ret = mmap(addr, size, PROT_READ | PROT_WRITE,
1217 	    MAP_PRIVATE | MAP_ANON | MAP_ALIGNED(align), -1, 0);
1218 	assert(ret != NULL);
1219 
1220 	if (ret == MAP_FAILED)
1221 		ret = NULL;
1222 	else if (addr != NULL && ret != addr) {
1223 		/*
1224 		 * We succeeded in mapping memory, but not in the right place.
1225 		 */
1226 		if (munmap(ret, size) == -1) {
1227 			char buf[STRERROR_BUF];
1228 
1229 			strerror_r(errno, buf, sizeof(buf));
1230 			_malloc_message(_getprogname(),
1231 			    ": (malloc) Error in munmap(): ", buf, "\n");
1232 			if (opt_abort)
1233 				abort();
1234 		}
1235 		ret = NULL;
1236 	}
1237 
1238 	assert(ret == NULL || (addr == NULL && ret != addr)
1239 	    || (addr != NULL && ret == addr));
1240 	return (ret);
1241 }
1242 
1243 static void *
1244 pages_map(void *addr, size_t size)
1245 {
1246 
1247 	return pages_map_align(addr, size, 0);
1248 }
1249 
1250 static void
1251 pages_unmap(void *addr, size_t size)
1252 {
1253 
1254 	if (munmap(addr, size) == -1) {
1255 		char buf[STRERROR_BUF];
1256 
1257 		strerror_r(errno, buf, sizeof(buf));
1258 		_malloc_message(_getprogname(),
1259 		    ": (malloc) Error in munmap(): ", buf, "\n");
1260 		if (opt_abort)
1261 			abort();
1262 	}
1263 }
1264 
1265 static void *
1266 chunk_alloc(size_t size)
1267 {
1268 	void *ret, *chunk;
1269 	chunk_node_t *tchunk, *delchunk;
1270 
1271 	assert(size != 0);
1272 	assert((size & chunksize_mask) == 0);
1273 
1274 	malloc_mutex_lock(&chunks_mtx);
1275 
1276 	if (size == chunksize) {
1277 		/*
1278 		 * Check for address ranges that were previously chunks and try
1279 		 * to use them.
1280 		 */
1281 
1282 		/* LINTED */
1283 		tchunk = RB_MIN(chunk_tree_s, &old_chunks);
1284 		while (tchunk != NULL) {
1285 			/* Found an address range.  Try to recycle it. */
1286 
1287 			chunk = tchunk->chunk;
1288 			delchunk = tchunk;
1289 			/* LINTED */
1290 			tchunk = RB_NEXT(chunk_tree_s, &old_chunks, delchunk);
1291 
1292 			/* Remove delchunk from the tree. */
1293 			/* LINTED */
1294 			RB_REMOVE(chunk_tree_s, &old_chunks, delchunk);
1295 			base_chunk_node_dealloc(delchunk);
1296 
1297 #ifdef USE_BRK
1298 			if ((uintptr_t)chunk >= (uintptr_t)brk_base
1299 			    && (uintptr_t)chunk < (uintptr_t)brk_max) {
1300 				/* Re-use a previously freed brk chunk. */
1301 				ret = chunk;
1302 				goto RETURN;
1303 			}
1304 #endif
1305 			if ((ret = pages_map(chunk, size)) != NULL) {
1306 				/* Success. */
1307 				goto RETURN;
1308 			}
1309 		}
1310 	}
1311 
1312 	/*
1313 	 * Try to over-allocate, but allow the OS to place the allocation
1314 	 * anywhere.  Beware of size_t wrap-around.
1315 	 */
1316 	if (size + chunksize > size) {
1317 		if ((ret = pages_map_align(NULL, size, chunksize_2pow))
1318 		    != NULL) {
1319 			goto RETURN;
1320 		}
1321 	}
1322 
1323 #ifdef USE_BRK
1324 	/*
1325 	 * Try to create allocations in brk, in order to make full use of
1326 	 * limited address space.
1327 	 */
1328 	if (brk_prev != (void *)-1) {
1329 		void *brk_cur;
1330 		intptr_t incr;
1331 
1332 		/*
1333 		 * The loop is necessary to recover from races with other
1334 		 * threads that are using brk for something other than malloc.
1335 		 */
1336 		malloc_mutex_lock(&brk_mtx);
1337 		do {
1338 			/* Get the current end of brk. */
1339 			brk_cur = sbrk(0);
1340 
1341 			/*
1342 			 * Calculate how much padding is necessary to
1343 			 * chunk-align the end of brk.
1344 			 */
1345 			incr = (intptr_t)size
1346 			    - (intptr_t)CHUNK_ADDR2OFFSET(brk_cur);
1347 			if (incr == size) {
1348 				ret = brk_cur;
1349 			} else {
1350 				ret = (void *)((intptr_t)brk_cur + incr);
1351 				incr += size;
1352 			}
1353 
1354 			brk_prev = sbrk(incr);
1355 			if (brk_prev == brk_cur) {
1356 				/* Success. */
1357 				malloc_mutex_unlock(&brk_mtx);
1358 				brk_max = (void *)((intptr_t)ret + size);
1359 				goto RETURN;
1360 			}
1361 		} while (brk_prev != (void *)-1);
1362 		malloc_mutex_unlock(&brk_mtx);
1363 	}
1364 #endif
1365 
1366 	/* All strategies for allocation failed. */
1367 	ret = NULL;
1368 RETURN:
1369 	if (ret != NULL) {
1370 		chunk_node_t key;
1371 		/*
1372 		 * Clean out any entries in old_chunks that overlap with the
1373 		 * memory we just allocated.
1374 		 */
1375 		key.chunk = ret;
1376 		/* LINTED */
1377 		tchunk = RB_NFIND(chunk_tree_s, &old_chunks, &key);
1378 		while (tchunk != NULL
1379 		    && (uintptr_t)tchunk->chunk >= (uintptr_t)ret
1380 		    && (uintptr_t)tchunk->chunk < (uintptr_t)ret + size) {
1381 			delchunk = tchunk;
1382 			/* LINTED */
1383 			tchunk = RB_NEXT(chunk_tree_s, &old_chunks, delchunk);
1384 			/* LINTED */
1385 			RB_REMOVE(chunk_tree_s, &old_chunks, delchunk);
1386 			base_chunk_node_dealloc(delchunk);
1387 		}
1388 
1389 	}
1390 #ifdef MALLOC_STATS
1391 	if (ret != NULL) {
1392 		stats_chunks.nchunks += (size / chunksize);
1393 		stats_chunks.curchunks += (size / chunksize);
1394 	}
1395 	if (stats_chunks.curchunks > stats_chunks.highchunks)
1396 		stats_chunks.highchunks = stats_chunks.curchunks;
1397 #endif
1398 	malloc_mutex_unlock(&chunks_mtx);
1399 
1400 	assert(CHUNK_ADDR2BASE(ret) == ret);
1401 	return (ret);
1402 }
1403 
1404 static void
1405 chunk_dealloc(void *chunk, size_t size)
1406 {
1407 	chunk_node_t *node;
1408 
1409 	assert(chunk != NULL);
1410 	assert(CHUNK_ADDR2BASE(chunk) == chunk);
1411 	assert(size != 0);
1412 	assert((size & chunksize_mask) == 0);
1413 
1414 	malloc_mutex_lock(&chunks_mtx);
1415 
1416 #ifdef USE_BRK
1417 	if ((uintptr_t)chunk >= (uintptr_t)brk_base
1418 	    && (uintptr_t)chunk < (uintptr_t)brk_max) {
1419 		void *brk_cur;
1420 
1421 		malloc_mutex_lock(&brk_mtx);
1422 		/* Get the current end of brk. */
1423 		brk_cur = sbrk(0);
1424 
1425 		/*
1426 		 * Try to shrink the data segment if this chunk is at the end
1427 		 * of the data segment.  The sbrk() call here is subject to a
1428 		 * race condition with threads that use brk(2) or sbrk(2)
1429 		 * directly, but the alternative would be to leak memory for
1430 		 * the sake of poorly designed multi-threaded programs.
1431 		 */
1432 		if (brk_cur == brk_max
1433 		    && (void *)((uintptr_t)chunk + size) == brk_max
1434 		    && sbrk(-(intptr_t)size) == brk_max) {
1435 			malloc_mutex_unlock(&brk_mtx);
1436 			if (brk_prev == brk_max) {
1437 				/* Success. */
1438 				brk_prev = (void *)((intptr_t)brk_max
1439 				    - (intptr_t)size);
1440 				brk_max = brk_prev;
1441 			}
1442 		} else {
1443 			size_t offset;
1444 
1445 			malloc_mutex_unlock(&brk_mtx);
1446 			madvise(chunk, size, MADV_FREE);
1447 
1448 			/*
1449 			 * Iteratively create records of each chunk-sized
1450 			 * memory region that 'chunk' is comprised of, so that
1451 			 * the address range can be recycled if memory usage
1452 			 * increases later on.
1453 			 */
1454 			for (offset = 0; offset < size; offset += chunksize) {
1455 				node = base_chunk_node_alloc();
1456 				if (node == NULL)
1457 					break;
1458 
1459 				node->chunk = (void *)((uintptr_t)chunk
1460 				    + (uintptr_t)offset);
1461 				node->size = chunksize;
1462 				/* LINTED */
1463 				RB_INSERT(chunk_tree_s, &old_chunks, node);
1464 			}
1465 		}
1466 	} else {
1467 #endif
1468 		pages_unmap(chunk, size);
1469 
1470 		/*
1471 		 * Make a record of the chunk's address, so that the address
1472 		 * range can be recycled if memory usage increases later on.
1473 		 * Don't bother to create entries if (size > chunksize), since
1474 		 * doing so could cause scalability issues for truly gargantuan
1475 		 * objects (many gigabytes or larger).
1476 		 */
1477 		if (size == chunksize) {
1478 			node = base_chunk_node_alloc();
1479 			if (node != NULL) {
1480 				node->chunk = (void *)(uintptr_t)chunk;
1481 				node->size = chunksize;
1482 				/* LINTED */
1483 				RB_INSERT(chunk_tree_s, &old_chunks, node);
1484 			}
1485 		}
1486 #ifdef USE_BRK
1487 	}
1488 #endif
1489 
1490 #ifdef MALLOC_STATS
1491 	stats_chunks.curchunks -= (size / chunksize);
1492 #endif
1493 	malloc_mutex_unlock(&chunks_mtx);
1494 }
1495 
1496 /*
1497  * End chunk management functions.
1498  */
1499 /******************************************************************************/
1500 /*
1501  * Begin arena.
1502  */
1503 
1504 /*
1505  * Choose an arena based on a per-thread value (fast-path code, calls slow-path
1506  * code if necessary).
1507  */
1508 static inline arena_t *
1509 choose_arena(void)
1510 {
1511 	arena_t *ret;
1512 
1513 	/*
1514 	 * We can only use TLS if this is a PIC library, since for the static
1515 	 * library version, libc's malloc is used by TLS allocation, which
1516 	 * introduces a bootstrapping issue.
1517 	 */
1518 	if (__isthreaded == false) {
1519 	    /*
1520 	     * Avoid the overhead of TLS for single-threaded operation.  If the
1521 	     * app switches to threaded mode, the initial thread may end up
1522 	     * being assigned to some other arena, but this one-time switch
1523 	     * shouldn't cause significant issues.
1524 	     */
1525 	    return (arenas[0]);
1526 	}
1527 
1528 	ret = get_arenas_map();
1529 	if (ret == NULL)
1530 		ret = choose_arena_hard();
1531 
1532 	assert(ret != NULL);
1533 	return (ret);
1534 }
1535 
1536 /*
1537  * Choose an arena based on a per-thread value (slow-path code only, called
1538  * only by choose_arena()).
1539  */
1540 static arena_t *
1541 choose_arena_hard(void)
1542 {
1543 	arena_t *ret;
1544 
1545 	assert(__isthreaded);
1546 
1547 	/* Assign one of the arenas to this thread, in a round-robin fashion. */
1548 	malloc_mutex_lock(&arenas_mtx);
1549 	ret = arenas[next_arena];
1550 	if (ret == NULL)
1551 		ret = arenas_extend(next_arena);
1552 	if (ret == NULL) {
1553 		/*
1554 		 * Make sure that this function never returns NULL, so that
1555 		 * choose_arena() doesn't have to check for a NULL return
1556 		 * value.
1557 		 */
1558 		ret = arenas[0];
1559 	}
1560 	next_arena = (next_arena + 1) % narenas;
1561 	malloc_mutex_unlock(&arenas_mtx);
1562 	set_arenas_map(ret);
1563 
1564 	return (ret);
1565 }
1566 
1567 #ifndef lint
1568 static inline int
1569 arena_chunk_comp(arena_chunk_t *a, arena_chunk_t *b)
1570 {
1571 
1572 	assert(a != NULL);
1573 	assert(b != NULL);
1574 
1575 	if ((uintptr_t)a < (uintptr_t)b)
1576 		return (-1);
1577 	else if (a == b)
1578 		return (0);
1579 	else
1580 		return (1);
1581 }
1582 
1583 /* Generate red-black tree code for arena chunks. */
1584 RB_GENERATE_STATIC(arena_chunk_tree_s, arena_chunk_s, link, arena_chunk_comp);
1585 #endif
1586 
1587 #ifndef lint
1588 static inline int
1589 arena_run_comp(arena_run_t *a, arena_run_t *b)
1590 {
1591 
1592 	assert(a != NULL);
1593 	assert(b != NULL);
1594 
1595 	if ((uintptr_t)a < (uintptr_t)b)
1596 		return (-1);
1597 	else if (a == b)
1598 		return (0);
1599 	else
1600 		return (1);
1601 }
1602 
1603 /* Generate red-black tree code for arena runs. */
1604 RB_GENERATE_STATIC(arena_run_tree_s, arena_run_s, link, arena_run_comp);
1605 #endif
1606 
1607 static inline void *
1608 arena_run_reg_alloc(arena_run_t *run, arena_bin_t *bin)
1609 {
1610 	void *ret;
1611 	unsigned i, mask, bit, regind;
1612 
1613 	assert(run->magic == ARENA_RUN_MAGIC);
1614 	assert(run->regs_minelm < bin->regs_mask_nelms);
1615 
1616 	/*
1617 	 * Move the first check outside the loop, so that run->regs_minelm can
1618 	 * be updated unconditionally, without the possibility of updating it
1619 	 * multiple times.
1620 	 */
1621 	i = run->regs_minelm;
1622 	mask = run->regs_mask[i];
1623 	if (mask != 0) {
1624 		/* Usable allocation found. */
1625 		bit = ffs((int)mask) - 1;
1626 
1627 		regind = ((i << (SIZEOF_INT_2POW + 3)) + bit);
1628 		ret = (void *)(((uintptr_t)run) + bin->reg0_offset
1629 		    + (bin->reg_size * regind));
1630 
1631 		/* Clear bit. */
1632 		mask ^= (1 << bit);
1633 		run->regs_mask[i] = mask;
1634 
1635 		return (ret);
1636 	}
1637 
1638 	for (i++; i < bin->regs_mask_nelms; i++) {
1639 		mask = run->regs_mask[i];
1640 		if (mask != 0) {
1641 			/* Usable allocation found. */
1642 			bit = ffs((int)mask) - 1;
1643 
1644 			regind = ((i << (SIZEOF_INT_2POW + 3)) + bit);
1645 			ret = (void *)(((uintptr_t)run) + bin->reg0_offset
1646 			    + (bin->reg_size * regind));
1647 
1648 			/* Clear bit. */
1649 			mask ^= (1 << bit);
1650 			run->regs_mask[i] = mask;
1651 
1652 			/*
1653 			 * Make a note that nothing before this element
1654 			 * contains a free region.
1655 			 */
1656 			run->regs_minelm = i; /* Low payoff: + (mask == 0); */
1657 
1658 			return (ret);
1659 		}
1660 	}
1661 	/* Not reached. */
1662 	/* LINTED */
1663 	assert(0);
1664 	return (NULL);
1665 }
1666 
1667 static inline void
1668 arena_run_reg_dalloc(arena_run_t *run, arena_bin_t *bin, void *ptr, size_t size)
1669 {
1670 	/*
1671 	 * To divide by a number D that is not a power of two we multiply
1672 	 * by (2^21 / D) and then right shift by 21 positions.
1673 	 *
1674 	 *   X / D
1675 	 *
1676 	 * becomes
1677 	 *
1678 	 *   (X * size_invs[(D >> QUANTUM_2POW_MIN) - 3]) >> SIZE_INV_SHIFT
1679 	 */
1680 #define SIZE_INV_SHIFT 21
1681 #define SIZE_INV(s) (((1 << SIZE_INV_SHIFT) / (s << QUANTUM_2POW_MIN)) + 1)
1682 	static const unsigned size_invs[] = {
1683 	    SIZE_INV(3),
1684 	    SIZE_INV(4), SIZE_INV(5), SIZE_INV(6), SIZE_INV(7),
1685 	    SIZE_INV(8), SIZE_INV(9), SIZE_INV(10), SIZE_INV(11),
1686 	    SIZE_INV(12),SIZE_INV(13), SIZE_INV(14), SIZE_INV(15),
1687 	    SIZE_INV(16),SIZE_INV(17), SIZE_INV(18), SIZE_INV(19),
1688 	    SIZE_INV(20),SIZE_INV(21), SIZE_INV(22), SIZE_INV(23),
1689 	    SIZE_INV(24),SIZE_INV(25), SIZE_INV(26), SIZE_INV(27),
1690 	    SIZE_INV(28),SIZE_INV(29), SIZE_INV(30), SIZE_INV(31)
1691 #if (QUANTUM_2POW_MIN < 4)
1692 	    ,
1693 	    SIZE_INV(32), SIZE_INV(33), SIZE_INV(34), SIZE_INV(35),
1694 	    SIZE_INV(36), SIZE_INV(37), SIZE_INV(38), SIZE_INV(39),
1695 	    SIZE_INV(40), SIZE_INV(41), SIZE_INV(42), SIZE_INV(43),
1696 	    SIZE_INV(44), SIZE_INV(45), SIZE_INV(46), SIZE_INV(47),
1697 	    SIZE_INV(48), SIZE_INV(49), SIZE_INV(50), SIZE_INV(51),
1698 	    SIZE_INV(52), SIZE_INV(53), SIZE_INV(54), SIZE_INV(55),
1699 	    SIZE_INV(56), SIZE_INV(57), SIZE_INV(58), SIZE_INV(59),
1700 	    SIZE_INV(60), SIZE_INV(61), SIZE_INV(62), SIZE_INV(63)
1701 #endif
1702 	};
1703 	unsigned diff, regind, elm, bit;
1704 
1705 	/* LINTED */
1706 	assert(run->magic == ARENA_RUN_MAGIC);
1707 	assert(((sizeof(size_invs)) / sizeof(unsigned)) + 3
1708 	    >= (SMALL_MAX_DEFAULT >> QUANTUM_2POW_MIN));
1709 
1710 	/*
1711 	 * Avoid doing division with a variable divisor if possible.  Using
1712 	 * actual division here can reduce allocator throughput by over 20%!
1713 	 */
1714 	diff = (unsigned)((uintptr_t)ptr - (uintptr_t)run - bin->reg0_offset);
1715 	if ((size & (size - 1)) == 0) {
1716 		/*
1717 		 * log2_table allows fast division of a power of two in the
1718 		 * [1..128] range.
1719 		 *
1720 		 * (x / divisor) becomes (x >> log2_table[divisor - 1]).
1721 		 */
1722 		static const unsigned char log2_table[] = {
1723 		    0, 1, 0, 2, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 4,
1724 		    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5,
1725 		    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1726 		    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6,
1727 		    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1728 		    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1729 		    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1730 		    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7
1731 		};
1732 
1733 		if (size <= 128)
1734 			regind = (diff >> log2_table[size - 1]);
1735 		else if (size <= 32768)
1736 			regind = diff >> (8 + log2_table[(size >> 8) - 1]);
1737 		else {
1738 			/*
1739 			 * The page size is too large for us to use the lookup
1740 			 * table.  Use real division.
1741 			 */
1742 			regind = diff / size;
1743 		}
1744 	} else if (size <= ((sizeof(size_invs) / sizeof(unsigned))
1745 	    << QUANTUM_2POW_MIN) + 2) {
1746 		regind = size_invs[(size >> QUANTUM_2POW_MIN) - 3] * diff;
1747 		regind >>= SIZE_INV_SHIFT;
1748 	} else {
1749 		/*
1750 		 * size_invs isn't large enough to handle this size class, so
1751 		 * calculate regind using actual division.  This only happens
1752 		 * if the user increases small_max via the 'S' runtime
1753 		 * configuration option.
1754 		 */
1755 		regind = diff / size;
1756 	};
1757 	assert(diff == regind * size);
1758 	assert(regind < bin->nregs);
1759 
1760 	elm = regind >> (SIZEOF_INT_2POW + 3);
1761 	if (elm < run->regs_minelm)
1762 		run->regs_minelm = elm;
1763 	bit = regind - (elm << (SIZEOF_INT_2POW + 3));
1764 	assert((run->regs_mask[elm] & (1 << bit)) == 0);
1765 	run->regs_mask[elm] |= (1 << bit);
1766 #undef SIZE_INV
1767 #undef SIZE_INV_SHIFT
1768 }
1769 
1770 static void
1771 arena_run_split(arena_t *arena, arena_run_t *run, size_t size)
1772 {
1773 	arena_chunk_t *chunk;
1774 	unsigned run_ind, map_offset, total_pages, need_pages, rem_pages;
1775 	unsigned i;
1776 
1777 	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
1778 	run_ind = (unsigned)(((uintptr_t)run - (uintptr_t)chunk)
1779 	    >> pagesize_2pow);
1780 	total_pages = chunk->map[run_ind].npages;
1781 	need_pages = (size >> pagesize_2pow);
1782 	assert(need_pages <= total_pages);
1783 	rem_pages = total_pages - need_pages;
1784 
1785 	/* Split enough pages from the front of run to fit allocation size. */
1786 	map_offset = run_ind;
1787 	for (i = 0; i < need_pages; i++) {
1788 		chunk->map[map_offset + i].npages = need_pages;
1789 		chunk->map[map_offset + i].pos = i;
1790 	}
1791 
1792 	/* Keep track of trailing unused pages for later use. */
1793 	if (rem_pages > 0) {
1794 		/* Update map for trailing pages. */
1795 		map_offset += need_pages;
1796 		chunk->map[map_offset].npages = rem_pages;
1797 		chunk->map[map_offset].pos = POS_FREE;
1798 		chunk->map[map_offset + rem_pages - 1].npages = rem_pages;
1799 		chunk->map[map_offset + rem_pages - 1].pos = POS_FREE;
1800 	}
1801 
1802 	chunk->pages_used += need_pages;
1803 }
1804 
1805 static arena_chunk_t *
1806 arena_chunk_alloc(arena_t *arena)
1807 {
1808 	arena_chunk_t *chunk;
1809 
1810 	if (arena->spare != NULL) {
1811 		chunk = arena->spare;
1812 		arena->spare = NULL;
1813 
1814 		/* LINTED */
1815 		RB_INSERT(arena_chunk_tree_s, &arena->chunks, chunk);
1816 	} else {
1817 		chunk = (arena_chunk_t *)chunk_alloc(chunksize);
1818 		if (chunk == NULL)
1819 			return (NULL);
1820 #ifdef MALLOC_STATS
1821 		arena->stats.mapped += chunksize;
1822 #endif
1823 
1824 		chunk->arena = arena;
1825 
1826 		/* LINTED */
1827 		RB_INSERT(arena_chunk_tree_s, &arena->chunks, chunk);
1828 
1829 		/*
1830 		 * Claim that no pages are in use, since the header is merely
1831 		 * overhead.
1832 		 */
1833 		chunk->pages_used = 0;
1834 
1835 		chunk->max_frun_npages = chunk_npages -
1836 		    arena_chunk_header_npages;
1837 		chunk->min_frun_ind = arena_chunk_header_npages;
1838 
1839 		/*
1840 		 * Initialize enough of the map to support one maximal free run.
1841 		 */
1842 		chunk->map[arena_chunk_header_npages].npages = chunk_npages -
1843 		    arena_chunk_header_npages;
1844 		chunk->map[arena_chunk_header_npages].pos = POS_FREE;
1845 		chunk->map[chunk_npages - 1].npages = chunk_npages -
1846 		    arena_chunk_header_npages;
1847 		chunk->map[chunk_npages - 1].pos = POS_FREE;
1848 	}
1849 
1850 	return (chunk);
1851 }
1852 
1853 static void
1854 arena_chunk_dealloc(arena_t *arena, arena_chunk_t *chunk)
1855 {
1856 
1857 	/*
1858 	 * Remove chunk from the chunk tree, regardless of whether this chunk
1859 	 * will be cached, so that the arena does not use it.
1860 	 */
1861 	/* LINTED */
1862 	RB_REMOVE(arena_chunk_tree_s, &chunk->arena->chunks, chunk);
1863 
1864 	if (opt_hint == false) {
1865 		if (arena->spare != NULL) {
1866 			chunk_dealloc((void *)arena->spare, chunksize);
1867 #ifdef MALLOC_STATS
1868 			arena->stats.mapped -= chunksize;
1869 #endif
1870 		}
1871 		arena->spare = chunk;
1872 	} else {
1873 		assert(arena->spare == NULL);
1874 		chunk_dealloc((void *)chunk, chunksize);
1875 #ifdef MALLOC_STATS
1876 		arena->stats.mapped -= chunksize;
1877 #endif
1878 	}
1879 }
1880 
1881 static arena_run_t *
1882 arena_run_alloc(arena_t *arena, size_t size)
1883 {
1884 	arena_chunk_t *chunk;
1885 	arena_run_t *run;
1886 	unsigned need_npages, limit_pages, compl_need_npages;
1887 
1888 	assert(size <= (chunksize - (arena_chunk_header_npages <<
1889 	    pagesize_2pow)));
1890 	assert((size & pagesize_mask) == 0);
1891 
1892 	/*
1893 	 * Search through arena's chunks in address order for a free run that is
1894 	 * large enough.  Look for the first fit.
1895 	 */
1896 	need_npages = (size >> pagesize_2pow);
1897 	limit_pages = chunk_npages - arena_chunk_header_npages;
1898 	compl_need_npages = limit_pages - need_npages;
1899 	/* LINTED */
1900 	RB_FOREACH(chunk, arena_chunk_tree_s, &arena->chunks) {
1901 		/*
1902 		 * Avoid searching this chunk if there are not enough
1903 		 * contiguous free pages for there to possibly be a large
1904 		 * enough free run.
1905 		 */
1906 		if (chunk->pages_used <= compl_need_npages &&
1907 		    need_npages <= chunk->max_frun_npages) {
1908 			arena_chunk_map_t *mapelm;
1909 			unsigned i;
1910 			unsigned max_frun_npages = 0;
1911 			unsigned min_frun_ind = chunk_npages;
1912 
1913 			assert(chunk->min_frun_ind >=
1914 			    arena_chunk_header_npages);
1915 			for (i = chunk->min_frun_ind; i < chunk_npages;) {
1916 				mapelm = &chunk->map[i];
1917 				if (mapelm->pos == POS_FREE) {
1918 					if (mapelm->npages >= need_npages) {
1919 						run = (arena_run_t *)
1920 						    ((uintptr_t)chunk + (i <<
1921 						    pagesize_2pow));
1922 						/* Update page map. */
1923 						arena_run_split(arena, run,
1924 						    size);
1925 						return (run);
1926 					}
1927 					if (mapelm->npages >
1928 					    max_frun_npages) {
1929 						max_frun_npages =
1930 						    mapelm->npages;
1931 					}
1932 					if (i < min_frun_ind) {
1933 						min_frun_ind = i;
1934 						if (i < chunk->min_frun_ind)
1935 							chunk->min_frun_ind = i;
1936 					}
1937 				}
1938 				i += mapelm->npages;
1939 			}
1940 			/*
1941 			 * Search failure.  Reset cached chunk->max_frun_npages.
1942 			 * chunk->min_frun_ind was already reset above (if
1943 			 * necessary).
1944 			 */
1945 			chunk->max_frun_npages = max_frun_npages;
1946 		}
1947 	}
1948 
1949 	/*
1950 	 * No usable runs.  Create a new chunk from which to allocate the run.
1951 	 */
1952 	chunk = arena_chunk_alloc(arena);
1953 	if (chunk == NULL)
1954 		return (NULL);
1955 	run = (arena_run_t *)((uintptr_t)chunk + (arena_chunk_header_npages <<
1956 	    pagesize_2pow));
1957 	/* Update page map. */
1958 	arena_run_split(arena, run, size);
1959 	return (run);
1960 }
1961 
1962 static void
1963 arena_run_dalloc(arena_t *arena, arena_run_t *run, size_t size)
1964 {
1965 	arena_chunk_t *chunk;
1966 	unsigned run_ind, run_pages;
1967 
1968 	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
1969 
1970 	run_ind = (unsigned)(((uintptr_t)run - (uintptr_t)chunk)
1971 	    >> pagesize_2pow);
1972 	assert(run_ind >= arena_chunk_header_npages);
1973 	assert(run_ind < (chunksize >> pagesize_2pow));
1974 	run_pages = (size >> pagesize_2pow);
1975 	assert(run_pages == chunk->map[run_ind].npages);
1976 
1977 	/* Subtract pages from count of pages used in chunk. */
1978 	chunk->pages_used -= run_pages;
1979 
1980 	/* Mark run as deallocated. */
1981 	assert(chunk->map[run_ind].npages == run_pages);
1982 	chunk->map[run_ind].pos = POS_FREE;
1983 	assert(chunk->map[run_ind + run_pages - 1].npages == run_pages);
1984 	chunk->map[run_ind + run_pages - 1].pos = POS_FREE;
1985 
1986 	/*
1987 	 * Tell the kernel that we don't need the data in this run, but only if
1988 	 * requested via runtime configuration.
1989 	 */
1990 	if (opt_hint)
1991 		madvise(run, size, MADV_FREE);
1992 
1993 	/* Try to coalesce with neighboring runs. */
1994 	if (run_ind > arena_chunk_header_npages &&
1995 	    chunk->map[run_ind - 1].pos == POS_FREE) {
1996 		unsigned prev_npages;
1997 
1998 		/* Coalesce with previous run. */
1999 		prev_npages = chunk->map[run_ind - 1].npages;
2000 		run_ind -= prev_npages;
2001 		assert(chunk->map[run_ind].npages == prev_npages);
2002 		assert(chunk->map[run_ind].pos == POS_FREE);
2003 		run_pages += prev_npages;
2004 
2005 		chunk->map[run_ind].npages = run_pages;
2006 		assert(chunk->map[run_ind].pos == POS_FREE);
2007 		chunk->map[run_ind + run_pages - 1].npages = run_pages;
2008 		assert(chunk->map[run_ind + run_pages - 1].pos == POS_FREE);
2009 	}
2010 
2011 	if (run_ind + run_pages < chunk_npages &&
2012 	    chunk->map[run_ind + run_pages].pos == POS_FREE) {
2013 		unsigned next_npages;
2014 
2015 		/* Coalesce with next run. */
2016 		next_npages = chunk->map[run_ind + run_pages].npages;
2017 		run_pages += next_npages;
2018 		assert(chunk->map[run_ind + run_pages - 1].npages ==
2019 		    next_npages);
2020 		assert(chunk->map[run_ind + run_pages - 1].pos == POS_FREE);
2021 
2022 		chunk->map[run_ind].npages = run_pages;
2023 		chunk->map[run_ind].pos = POS_FREE;
2024 		chunk->map[run_ind + run_pages - 1].npages = run_pages;
2025 		assert(chunk->map[run_ind + run_pages - 1].pos == POS_FREE);
2026 	}
2027 
2028 	if (chunk->map[run_ind].npages > chunk->max_frun_npages)
2029 		chunk->max_frun_npages = chunk->map[run_ind].npages;
2030 	if (run_ind < chunk->min_frun_ind)
2031 		chunk->min_frun_ind = run_ind;
2032 
2033 	/* Deallocate chunk if it is now completely unused. */
2034 	if (chunk->pages_used == 0)
2035 		arena_chunk_dealloc(arena, chunk);
2036 }
2037 
2038 static arena_run_t *
2039 arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin)
2040 {
2041 	arena_run_t *run;
2042 	unsigned i, remainder;
2043 
2044 	/* Look for a usable run. */
2045 	/* LINTED */
2046 	if ((run = RB_MIN(arena_run_tree_s, &bin->runs)) != NULL) {
2047 		/* run is guaranteed to have available space. */
2048 		/* LINTED */
2049 		RB_REMOVE(arena_run_tree_s, &bin->runs, run);
2050 #ifdef MALLOC_STATS
2051 		bin->stats.reruns++;
2052 #endif
2053 		return (run);
2054 	}
2055 	/* No existing runs have any space available. */
2056 
2057 	/* Allocate a new run. */
2058 	run = arena_run_alloc(arena, bin->run_size);
2059 	if (run == NULL)
2060 		return (NULL);
2061 
2062 	/* Initialize run internals. */
2063 	run->bin = bin;
2064 
2065 	for (i = 0; i < bin->regs_mask_nelms; i++)
2066 		run->regs_mask[i] = UINT_MAX;
2067 	remainder = bin->nregs & ((1 << (SIZEOF_INT_2POW + 3)) - 1);
2068 	if (remainder != 0) {
2069 		/* The last element has spare bits that need to be unset. */
2070 		run->regs_mask[i] = (UINT_MAX >> ((1 << (SIZEOF_INT_2POW + 3))
2071 		    - remainder));
2072 	}
2073 
2074 	run->regs_minelm = 0;
2075 
2076 	run->nfree = bin->nregs;
2077 #ifdef MALLOC_DEBUG
2078 	run->magic = ARENA_RUN_MAGIC;
2079 #endif
2080 
2081 #ifdef MALLOC_STATS
2082 	bin->stats.nruns++;
2083 	bin->stats.curruns++;
2084 	if (bin->stats.curruns > bin->stats.highruns)
2085 		bin->stats.highruns = bin->stats.curruns;
2086 #endif
2087 	return (run);
2088 }
2089 
2090 /* bin->runcur must have space available before this function is called. */
2091 static inline void *
2092 arena_bin_malloc_easy(arena_t *arena, arena_bin_t *bin, arena_run_t *run)
2093 {
2094 	void *ret;
2095 
2096 	assert(run->magic == ARENA_RUN_MAGIC);
2097 	assert(run->nfree > 0);
2098 
2099 	ret = arena_run_reg_alloc(run, bin);
2100 	assert(ret != NULL);
2101 	run->nfree--;
2102 
2103 	return (ret);
2104 }
2105 
2106 /* Re-fill bin->runcur, then call arena_bin_malloc_easy(). */
2107 static void *
2108 arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin)
2109 {
2110 
2111 	bin->runcur = arena_bin_nonfull_run_get(arena, bin);
2112 	if (bin->runcur == NULL)
2113 		return (NULL);
2114 	assert(bin->runcur->magic == ARENA_RUN_MAGIC);
2115 	assert(bin->runcur->nfree > 0);
2116 
2117 	return (arena_bin_malloc_easy(arena, bin, bin->runcur));
2118 }
2119 
2120 /*
2121  * Calculate bin->run_size such that it meets the following constraints:
2122  *
2123  *   *) bin->run_size >= min_run_size
2124  *   *) bin->run_size <= arena_maxclass
2125  *   *) bin->run_size <= RUN_MAX_SMALL
2126  *   *) run header overhead <= RUN_MAX_OVRHD (or header overhead relaxed).
2127  *
2128  * bin->nregs, bin->regs_mask_nelms, and bin->reg0_offset are
2129  * also calculated here, since these settings are all interdependent.
2130  */
2131 static size_t
2132 arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size)
2133 {
2134 	size_t try_run_size, good_run_size;
2135 	unsigned good_nregs, good_mask_nelms, good_reg0_offset;
2136 	unsigned try_nregs, try_mask_nelms, try_reg0_offset;
2137 	float max_ovrhd = RUN_MAX_OVRHD;
2138 
2139 	assert(min_run_size >= pagesize);
2140 	assert(min_run_size <= arena_maxclass);
2141 	assert(min_run_size <= RUN_MAX_SMALL);
2142 
2143 	/*
2144 	 * Calculate known-valid settings before entering the run_size
2145 	 * expansion loop, so that the first part of the loop always copies
2146 	 * valid settings.
2147 	 *
2148 	 * The do..while loop iteratively reduces the number of regions until
2149 	 * the run header and the regions no longer overlap.  A closed formula
2150 	 * would be quite messy, since there is an interdependency between the
2151 	 * header's mask length and the number of regions.
2152 	 */
2153 	try_run_size = min_run_size;
2154 	try_nregs = ((try_run_size - sizeof(arena_run_t)) / bin->reg_size)
2155 	    + 1; /* Counter-act the first line of the loop. */
2156 	do {
2157 		try_nregs--;
2158 		try_mask_nelms = (try_nregs >> (SIZEOF_INT_2POW + 3)) +
2159 		    ((try_nregs & ((1 << (SIZEOF_INT_2POW + 3)) - 1)) ? 1 : 0);
2160 		try_reg0_offset = try_run_size - (try_nregs * bin->reg_size);
2161 	} while (sizeof(arena_run_t) + (sizeof(unsigned) * (try_mask_nelms - 1))
2162 	    > try_reg0_offset);
2163 
2164 	/* run_size expansion loop. */
2165 	do {
2166 		/*
2167 		 * Copy valid settings before trying more aggressive settings.
2168 		 */
2169 		good_run_size = try_run_size;
2170 		good_nregs = try_nregs;
2171 		good_mask_nelms = try_mask_nelms;
2172 		good_reg0_offset = try_reg0_offset;
2173 
2174 		/* Try more aggressive settings. */
2175 		try_run_size += pagesize;
2176 		try_nregs = ((try_run_size - sizeof(arena_run_t)) /
2177 		    bin->reg_size) + 1; /* Counter-act try_nregs-- in loop. */
2178 		do {
2179 			try_nregs--;
2180 			try_mask_nelms = (try_nregs >> (SIZEOF_INT_2POW + 3)) +
2181 			    ((try_nregs & ((1 << (SIZEOF_INT_2POW + 3)) - 1)) ?
2182 			    1 : 0);
2183 			try_reg0_offset = try_run_size - (try_nregs *
2184 			    bin->reg_size);
2185 		} while (sizeof(arena_run_t) + (sizeof(unsigned) *
2186 		    (try_mask_nelms - 1)) > try_reg0_offset);
2187 	} while (try_run_size <= arena_maxclass && try_run_size <= RUN_MAX_SMALL
2188 	    && max_ovrhd > RUN_MAX_OVRHD_RELAX / ((float)(bin->reg_size << 3))
2189 	    && ((float)(try_reg0_offset)) / ((float)(try_run_size)) >
2190 	    max_ovrhd);
2191 
2192 	assert(sizeof(arena_run_t) + (sizeof(unsigned) * (good_mask_nelms - 1))
2193 	    <= good_reg0_offset);
2194 	assert((good_mask_nelms << (SIZEOF_INT_2POW + 3)) >= good_nregs);
2195 
2196 	/* Copy final settings. */
2197 	bin->run_size = good_run_size;
2198 	bin->nregs = good_nregs;
2199 	bin->regs_mask_nelms = good_mask_nelms;
2200 	bin->reg0_offset = good_reg0_offset;
2201 
2202 	return (good_run_size);
2203 }
2204 
2205 static void *
2206 arena_malloc(arena_t *arena, size_t size)
2207 {
2208 	void *ret;
2209 
2210 	assert(arena != NULL);
2211 	assert(arena->magic == ARENA_MAGIC);
2212 	assert(size != 0);
2213 	assert(QUANTUM_CEILING(size) <= arena_maxclass);
2214 
2215 	if (size <= bin_maxclass) {
2216 		arena_bin_t *bin;
2217 		arena_run_t *run;
2218 
2219 		/* Small allocation. */
2220 
2221 		if (size < small_min) {
2222 			/* Tiny. */
2223 			size = pow2_ceil(size);
2224 			bin = &arena->bins[ffs((int)(size >> (TINY_MIN_2POW +
2225 			    1)))];
2226 #if (!defined(NDEBUG) || defined(MALLOC_STATS))
2227 			/*
2228 			 * Bin calculation is always correct, but we may need
2229 			 * to fix size for the purposes of assertions and/or
2230 			 * stats accuracy.
2231 			 */
2232 			if (size < (1 << TINY_MIN_2POW))
2233 				size = (1 << TINY_MIN_2POW);
2234 #endif
2235 		} else if (size <= small_max) {
2236 			/* Quantum-spaced. */
2237 			size = QUANTUM_CEILING(size);
2238 			bin = &arena->bins[ntbins + (size >> opt_quantum_2pow)
2239 			    - 1];
2240 		} else {
2241 			/* Sub-page. */
2242 			size = pow2_ceil(size);
2243 			bin = &arena->bins[ntbins + nqbins
2244 			    + (ffs((int)(size >> opt_small_max_2pow)) - 2)];
2245 		}
2246 		assert(size == bin->reg_size);
2247 
2248 		malloc_mutex_lock(&arena->mtx);
2249 		if ((run = bin->runcur) != NULL && run->nfree > 0)
2250 			ret = arena_bin_malloc_easy(arena, bin, run);
2251 		else
2252 			ret = arena_bin_malloc_hard(arena, bin);
2253 
2254 		if (ret == NULL) {
2255 			malloc_mutex_unlock(&arena->mtx);
2256 			return (NULL);
2257 		}
2258 
2259 #ifdef MALLOC_STATS
2260 		bin->stats.nrequests++;
2261 		arena->stats.nmalloc_small++;
2262 		arena->stats.allocated_small += size;
2263 #endif
2264 	} else {
2265 		/* Large allocation. */
2266 		size = PAGE_CEILING(size);
2267 		malloc_mutex_lock(&arena->mtx);
2268 		ret = (void *)arena_run_alloc(arena, size);
2269 		if (ret == NULL) {
2270 			malloc_mutex_unlock(&arena->mtx);
2271 			return (NULL);
2272 		}
2273 #ifdef MALLOC_STATS
2274 		arena->stats.nmalloc_large++;
2275 		arena->stats.allocated_large += size;
2276 #endif
2277 	}
2278 
2279 	malloc_mutex_unlock(&arena->mtx);
2280 
2281 	if (opt_junk)
2282 		memset(ret, 0xa5, size);
2283 	else if (opt_zero)
2284 		memset(ret, 0, size);
2285 	return (ret);
2286 }
2287 
2288 static inline void
2289 arena_palloc_trim(arena_t *arena, arena_chunk_t *chunk, unsigned pageind,
2290     unsigned npages)
2291 {
2292 	unsigned i;
2293 
2294 	assert(npages > 0);
2295 
2296 	/*
2297 	 * Modifiy the map such that arena_run_dalloc() sees the run as
2298 	 * separately allocated.
2299 	 */
2300 	for (i = 0; i < npages; i++) {
2301 		chunk->map[pageind + i].npages = npages;
2302 		chunk->map[pageind + i].pos = i;
2303 	}
2304 	arena_run_dalloc(arena, (arena_run_t *)((uintptr_t)chunk + (pageind <<
2305 	    pagesize_2pow)), npages << pagesize_2pow);
2306 }
2307 
2308 /* Only handles large allocations that require more than page alignment. */
2309 static void *
2310 arena_palloc(arena_t *arena, size_t alignment, size_t size, size_t alloc_size)
2311 {
2312 	void *ret;
2313 	size_t offset;
2314 	arena_chunk_t *chunk;
2315 	unsigned pageind, i, npages;
2316 
2317 	assert((size & pagesize_mask) == 0);
2318 	assert((alignment & pagesize_mask) == 0);
2319 
2320 	npages = size >> pagesize_2pow;
2321 
2322 	malloc_mutex_lock(&arena->mtx);
2323 	ret = (void *)arena_run_alloc(arena, alloc_size);
2324 	if (ret == NULL) {
2325 		malloc_mutex_unlock(&arena->mtx);
2326 		return (NULL);
2327 	}
2328 
2329 	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ret);
2330 
2331 	offset = (uintptr_t)ret & (alignment - 1);
2332 	assert((offset & pagesize_mask) == 0);
2333 	assert(offset < alloc_size);
2334 	if (offset == 0) {
2335 		pageind = (((uintptr_t)ret - (uintptr_t)chunk) >>
2336 		    pagesize_2pow);
2337 
2338 		/* Update the map for the run to be kept. */
2339 		for (i = 0; i < npages; i++) {
2340 			chunk->map[pageind + i].npages = npages;
2341 			assert(chunk->map[pageind + i].pos == i);
2342 		}
2343 
2344 		/* Trim trailing space. */
2345 		arena_palloc_trim(arena, chunk, pageind + npages,
2346 		    (alloc_size - size) >> pagesize_2pow);
2347 	} else {
2348 		size_t leadsize, trailsize;
2349 
2350 		leadsize = alignment - offset;
2351 		ret = (void *)((uintptr_t)ret + leadsize);
2352 		pageind = (((uintptr_t)ret - (uintptr_t)chunk) >>
2353 		    pagesize_2pow);
2354 
2355 		/* Update the map for the run to be kept. */
2356 		for (i = 0; i < npages; i++) {
2357 			chunk->map[pageind + i].npages = npages;
2358 			chunk->map[pageind + i].pos = i;
2359 		}
2360 
2361 		/* Trim leading space. */
2362 		arena_palloc_trim(arena, chunk, pageind - (leadsize >>
2363 		    pagesize_2pow), leadsize >> pagesize_2pow);
2364 
2365 		trailsize = alloc_size - leadsize - size;
2366 		if (trailsize != 0) {
2367 			/* Trim trailing space. */
2368 			assert(trailsize < alloc_size);
2369 			arena_palloc_trim(arena, chunk, pageind + npages,
2370 			    trailsize >> pagesize_2pow);
2371 		}
2372 	}
2373 
2374 #ifdef MALLOC_STATS
2375 	arena->stats.nmalloc_large++;
2376 	arena->stats.allocated_large += size;
2377 #endif
2378 	malloc_mutex_unlock(&arena->mtx);
2379 
2380 	if (opt_junk)
2381 		memset(ret, 0xa5, size);
2382 	else if (opt_zero)
2383 		memset(ret, 0, size);
2384 	return (ret);
2385 }
2386 
2387 /* Return the size of the allocation pointed to by ptr. */
2388 static size_t
2389 arena_salloc(const void *ptr)
2390 {
2391 	size_t ret;
2392 	arena_chunk_t *chunk;
2393 	arena_chunk_map_t *mapelm;
2394 	unsigned pageind;
2395 
2396 	assert(ptr != NULL);
2397 	assert(CHUNK_ADDR2BASE(ptr) != ptr);
2398 
2399 	/*
2400 	 * No arena data structures that we query here can change in a way that
2401 	 * affects this function, so we don't need to lock.
2402 	 */
2403 	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
2404 	pageind = (((uintptr_t)ptr - (uintptr_t)chunk) >> pagesize_2pow);
2405 	mapelm = &chunk->map[pageind];
2406 	if (mapelm->pos != 0 || ptr != (char *)((uintptr_t)chunk) + (pageind <<
2407 	    pagesize_2pow)) {
2408 		arena_run_t *run;
2409 
2410 		pageind -= mapelm->pos;
2411 
2412 		run = (arena_run_t *)((uintptr_t)chunk + (pageind <<
2413 		    pagesize_2pow));
2414 		assert(run->magic == ARENA_RUN_MAGIC);
2415 		ret = run->bin->reg_size;
2416 	} else
2417 		ret = mapelm->npages << pagesize_2pow;
2418 
2419 	return (ret);
2420 }
2421 
2422 static void *
2423 arena_ralloc(void *ptr, size_t size, size_t oldsize)
2424 {
2425 	void *ret;
2426 
2427 	/* Avoid moving the allocation if the size class would not change. */
2428 	if (size < small_min) {
2429 		if (oldsize < small_min &&
2430 		    ffs((int)(pow2_ceil(size) >> (TINY_MIN_2POW + 1)))
2431 		    == ffs((int)(pow2_ceil(oldsize) >> (TINY_MIN_2POW + 1))))
2432 			goto IN_PLACE;
2433 	} else if (size <= small_max) {
2434 		if (oldsize >= small_min && oldsize <= small_max &&
2435 		    (QUANTUM_CEILING(size) >> opt_quantum_2pow)
2436 		    == (QUANTUM_CEILING(oldsize) >> opt_quantum_2pow))
2437 			goto IN_PLACE;
2438 	} else {
2439 		/*
2440 		 * We make no attempt to resize runs here, though it would be
2441 		 * possible to do so.
2442 		 */
2443 		if (oldsize > small_max && PAGE_CEILING(size) == oldsize)
2444 			goto IN_PLACE;
2445 	}
2446 
2447 	/*
2448 	 * If we get here, then size and oldsize are different enough that we
2449 	 * need to use a different size class.  In that case, fall back to
2450 	 * allocating new space and copying.
2451 	 */
2452 	ret = arena_malloc(choose_arena(), size);
2453 	if (ret == NULL)
2454 		return (NULL);
2455 
2456 	/* Junk/zero-filling were already done by arena_malloc(). */
2457 	if (size < oldsize)
2458 		memcpy(ret, ptr, size);
2459 	else
2460 		memcpy(ret, ptr, oldsize);
2461 	idalloc(ptr);
2462 	return (ret);
2463 IN_PLACE:
2464 	if (opt_junk && size < oldsize)
2465 		memset((void *)((uintptr_t)ptr + size), 0x5a, oldsize - size);
2466 	else if (opt_zero && size > oldsize)
2467 		memset((void *)((uintptr_t)ptr + oldsize), 0, size - oldsize);
2468 	return (ptr);
2469 }
2470 
2471 static void
2472 arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr)
2473 {
2474 	unsigned pageind;
2475 	arena_chunk_map_t *mapelm;
2476 	size_t size;
2477 
2478 	assert(arena != NULL);
2479 	assert(arena->magic == ARENA_MAGIC);
2480 	assert(chunk->arena == arena);
2481 	assert(ptr != NULL);
2482 	assert(CHUNK_ADDR2BASE(ptr) != ptr);
2483 
2484 	pageind = (((uintptr_t)ptr - (uintptr_t)chunk) >> pagesize_2pow);
2485 	mapelm = &chunk->map[pageind];
2486 	if (mapelm->pos != 0 || ptr != (char *)((uintptr_t)chunk) + (pageind <<
2487 	    pagesize_2pow)) {
2488 		arena_run_t *run;
2489 		arena_bin_t *bin;
2490 
2491 		/* Small allocation. */
2492 
2493 		pageind -= mapelm->pos;
2494 
2495 		run = (arena_run_t *)((uintptr_t)chunk + (pageind <<
2496 		    pagesize_2pow));
2497 		assert(run->magic == ARENA_RUN_MAGIC);
2498 		bin = run->bin;
2499 		size = bin->reg_size;
2500 
2501 		if (opt_junk)
2502 			memset(ptr, 0x5a, size);
2503 
2504 		malloc_mutex_lock(&arena->mtx);
2505 		arena_run_reg_dalloc(run, bin, ptr, size);
2506 		run->nfree++;
2507 
2508 		if (run->nfree == bin->nregs) {
2509 			/* Deallocate run. */
2510 			if (run == bin->runcur)
2511 				bin->runcur = NULL;
2512 			else if (bin->nregs != 1) {
2513 				/*
2514 				 * This block's conditional is necessary because
2515 				 * if the run only contains one region, then it
2516 				 * never gets inserted into the non-full runs
2517 				 * tree.
2518 				 */
2519 				/* LINTED */
2520 				RB_REMOVE(arena_run_tree_s, &bin->runs, run);
2521 			}
2522 #ifdef MALLOC_DEBUG
2523 			run->magic = 0;
2524 #endif
2525 			arena_run_dalloc(arena, run, bin->run_size);
2526 #ifdef MALLOC_STATS
2527 			bin->stats.curruns--;
2528 #endif
2529 		} else if (run->nfree == 1 && run != bin->runcur) {
2530 			/*
2531 			 * Make sure that bin->runcur always refers to the
2532 			 * lowest non-full run, if one exists.
2533 			 */
2534 			if (bin->runcur == NULL)
2535 				bin->runcur = run;
2536 			else if ((uintptr_t)run < (uintptr_t)bin->runcur) {
2537 				/* Switch runcur. */
2538 				if (bin->runcur->nfree > 0) {
2539 					/* Insert runcur. */
2540 					/* LINTED */
2541 					RB_INSERT(arena_run_tree_s, &bin->runs,
2542 					    bin->runcur);
2543 				}
2544 				bin->runcur = run;
2545 			} else {
2546 				/* LINTED */
2547 				RB_INSERT(arena_run_tree_s, &bin->runs, run);
2548 			}
2549 		}
2550 #ifdef MALLOC_STATS
2551 		arena->stats.allocated_small -= size;
2552 		arena->stats.ndalloc_small++;
2553 #endif
2554 	} else {
2555 		/* Large allocation. */
2556 
2557 		size = mapelm->npages << pagesize_2pow;
2558 		assert((((uintptr_t)ptr) & pagesize_mask) == 0);
2559 
2560 		if (opt_junk)
2561 			memset(ptr, 0x5a, size);
2562 
2563 		malloc_mutex_lock(&arena->mtx);
2564 		arena_run_dalloc(arena, (arena_run_t *)ptr, size);
2565 #ifdef MALLOC_STATS
2566 		arena->stats.allocated_large -= size;
2567 		arena->stats.ndalloc_large++;
2568 #endif
2569 	}
2570 
2571 	malloc_mutex_unlock(&arena->mtx);
2572 }
2573 
2574 static bool
2575 arena_new(arena_t *arena)
2576 {
2577 	unsigned i;
2578 	arena_bin_t *bin;
2579 	size_t prev_run_size;
2580 
2581 	malloc_mutex_init(&arena->mtx);
2582 
2583 #ifdef MALLOC_STATS
2584 	memset(&arena->stats, 0, sizeof(arena_stats_t));
2585 #endif
2586 
2587 	/* Initialize chunks. */
2588 	RB_INIT(&arena->chunks);
2589 	arena->spare = NULL;
2590 
2591 	/* Initialize bins. */
2592 	prev_run_size = pagesize;
2593 
2594 	/* (2^n)-spaced tiny bins. */
2595 	for (i = 0; i < ntbins; i++) {
2596 		bin = &arena->bins[i];
2597 		bin->runcur = NULL;
2598 		RB_INIT(&bin->runs);
2599 
2600 		bin->reg_size = (1 << (TINY_MIN_2POW + i));
2601 		prev_run_size = arena_bin_run_size_calc(bin, prev_run_size);
2602 
2603 #ifdef MALLOC_STATS
2604 		memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
2605 #endif
2606 	}
2607 
2608 	/* Quantum-spaced bins. */
2609 	for (; i < ntbins + nqbins; i++) {
2610 		bin = &arena->bins[i];
2611 		bin->runcur = NULL;
2612 		RB_INIT(&bin->runs);
2613 
2614 		bin->reg_size = quantum * (i - ntbins + 1);
2615 /*
2616 		pow2_size = pow2_ceil(quantum * (i - ntbins + 1));
2617 */
2618 		prev_run_size = arena_bin_run_size_calc(bin, prev_run_size);
2619 
2620 #ifdef MALLOC_STATS
2621 		memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
2622 #endif
2623 	}
2624 
2625 	/* (2^n)-spaced sub-page bins. */
2626 	for (; i < ntbins + nqbins + nsbins; i++) {
2627 		bin = &arena->bins[i];
2628 		bin->runcur = NULL;
2629 		RB_INIT(&bin->runs);
2630 
2631 		bin->reg_size = (small_max << (i - (ntbins + nqbins) + 1));
2632 
2633 		prev_run_size = arena_bin_run_size_calc(bin, prev_run_size);
2634 
2635 #ifdef MALLOC_STATS
2636 		memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
2637 #endif
2638 	}
2639 
2640 #ifdef MALLOC_DEBUG
2641 	arena->magic = ARENA_MAGIC;
2642 #endif
2643 
2644 	return (false);
2645 }
2646 
2647 /* Create a new arena and insert it into the arenas array at index ind. */
2648 static arena_t *
2649 arenas_extend(unsigned ind)
2650 {
2651 	arena_t *ret;
2652 
2653 	/* Allocate enough space for trailing bins. */
2654 	ret = (arena_t *)base_alloc(sizeof(arena_t)
2655 	    + (sizeof(arena_bin_t) * (ntbins + nqbins + nsbins - 1)));
2656 	if (ret != NULL && arena_new(ret) == false) {
2657 		arenas[ind] = ret;
2658 		return (ret);
2659 	}
2660 	/* Only reached if there is an OOM error. */
2661 
2662 	/*
2663 	 * OOM here is quite inconvenient to propagate, since dealing with it
2664 	 * would require a check for failure in the fast path.  Instead, punt
2665 	 * by using arenas[0].  In practice, this is an extremely unlikely
2666 	 * failure.
2667 	 */
2668 	_malloc_message(_getprogname(),
2669 	    ": (malloc) Error initializing arena\n", "", "");
2670 	if (opt_abort)
2671 		abort();
2672 
2673 	return (arenas[0]);
2674 }
2675 
2676 /*
2677  * End arena.
2678  */
2679 /******************************************************************************/
2680 /*
2681  * Begin general internal functions.
2682  */
2683 
2684 static void *
2685 huge_malloc(size_t size)
2686 {
2687 	void *ret;
2688 	size_t csize;
2689 	chunk_node_t *node;
2690 
2691 	/* Allocate one or more contiguous chunks for this request. */
2692 
2693 	csize = CHUNK_CEILING(size);
2694 	if (csize == 0) {
2695 		/* size is large enough to cause size_t wrap-around. */
2696 		return (NULL);
2697 	}
2698 
2699 	/* Allocate a chunk node with which to track the chunk. */
2700 	node = base_chunk_node_alloc();
2701 	if (node == NULL)
2702 		return (NULL);
2703 
2704 	ret = chunk_alloc(csize);
2705 	if (ret == NULL) {
2706 		base_chunk_node_dealloc(node);
2707 		return (NULL);
2708 	}
2709 
2710 	/* Insert node into huge. */
2711 	node->chunk = ret;
2712 	node->size = csize;
2713 
2714 	malloc_mutex_lock(&chunks_mtx);
2715 	RB_INSERT(chunk_tree_s, &huge, node);
2716 #ifdef MALLOC_STATS
2717 	huge_nmalloc++;
2718 	huge_allocated += csize;
2719 #endif
2720 	malloc_mutex_unlock(&chunks_mtx);
2721 
2722 	if (opt_junk)
2723 		memset(ret, 0xa5, csize);
2724 	else if (opt_zero)
2725 		memset(ret, 0, csize);
2726 
2727 	return (ret);
2728 }
2729 
2730 /* Only handles large allocations that require more than chunk alignment. */
2731 static void *
2732 huge_palloc(size_t alignment, size_t size)
2733 {
2734 	void *ret;
2735 	size_t alloc_size, chunk_size, offset;
2736 	chunk_node_t *node;
2737 
2738 	/*
2739 	 * This allocation requires alignment that is even larger than chunk
2740 	 * alignment.  This means that huge_malloc() isn't good enough.
2741 	 *
2742 	 * Allocate almost twice as many chunks as are demanded by the size or
2743 	 * alignment, in order to assure the alignment can be achieved, then
2744 	 * unmap leading and trailing chunks.
2745 	 */
2746 	assert(alignment >= chunksize);
2747 
2748 	chunk_size = CHUNK_CEILING(size);
2749 
2750 	if (size >= alignment)
2751 		alloc_size = chunk_size + alignment - chunksize;
2752 	else
2753 		alloc_size = (alignment << 1) - chunksize;
2754 
2755 	/* Allocate a chunk node with which to track the chunk. */
2756 	node = base_chunk_node_alloc();
2757 	if (node == NULL)
2758 		return (NULL);
2759 
2760 	ret = chunk_alloc(alloc_size);
2761 	if (ret == NULL) {
2762 		base_chunk_node_dealloc(node);
2763 		return (NULL);
2764 	}
2765 
2766 	offset = (uintptr_t)ret & (alignment - 1);
2767 	assert((offset & chunksize_mask) == 0);
2768 	assert(offset < alloc_size);
2769 	if (offset == 0) {
2770 		/* Trim trailing space. */
2771 		chunk_dealloc((void *)((uintptr_t)ret + chunk_size), alloc_size
2772 		    - chunk_size);
2773 	} else {
2774 		size_t trailsize;
2775 
2776 		/* Trim leading space. */
2777 		chunk_dealloc(ret, alignment - offset);
2778 
2779 		ret = (void *)((uintptr_t)ret + (alignment - offset));
2780 
2781 		trailsize = alloc_size - (alignment - offset) - chunk_size;
2782 		if (trailsize != 0) {
2783 		    /* Trim trailing space. */
2784 		    assert(trailsize < alloc_size);
2785 		    chunk_dealloc((void *)((uintptr_t)ret + chunk_size),
2786 			trailsize);
2787 		}
2788 	}
2789 
2790 	/* Insert node into huge. */
2791 	node->chunk = ret;
2792 	node->size = chunk_size;
2793 
2794 	malloc_mutex_lock(&chunks_mtx);
2795 	RB_INSERT(chunk_tree_s, &huge, node);
2796 #ifdef MALLOC_STATS
2797 	huge_nmalloc++;
2798 	huge_allocated += chunk_size;
2799 #endif
2800 	malloc_mutex_unlock(&chunks_mtx);
2801 
2802 	if (opt_junk)
2803 		memset(ret, 0xa5, chunk_size);
2804 	else if (opt_zero)
2805 		memset(ret, 0, chunk_size);
2806 
2807 	return (ret);
2808 }
2809 
2810 static void *
2811 huge_ralloc(void *ptr, size_t size, size_t oldsize)
2812 {
2813 	void *ret;
2814 
2815 	/* Avoid moving the allocation if the size class would not change. */
2816 	if (oldsize > arena_maxclass &&
2817 	    CHUNK_CEILING(size) == CHUNK_CEILING(oldsize)) {
2818 		if (opt_junk && size < oldsize) {
2819 			memset((void *)((uintptr_t)ptr + size), 0x5a, oldsize
2820 			    - size);
2821 		} else if (opt_zero && size > oldsize) {
2822 			memset((void *)((uintptr_t)ptr + oldsize), 0, size
2823 			    - oldsize);
2824 		}
2825 		return (ptr);
2826 	}
2827 
2828 	/*
2829 	 * If we get here, then size and oldsize are different enough that we
2830 	 * need to use a different size class.  In that case, fall back to
2831 	 * allocating new space and copying.
2832 	 */
2833 	ret = huge_malloc(size);
2834 	if (ret == NULL)
2835 		return (NULL);
2836 
2837 	if (CHUNK_ADDR2BASE(ptr) == ptr) {
2838 		/* The old allocation is a chunk. */
2839 		if (size < oldsize)
2840 			memcpy(ret, ptr, size);
2841 		else
2842 			memcpy(ret, ptr, oldsize);
2843 	} else {
2844 		/* The old allocation is a region. */
2845 		assert(oldsize < size);
2846 		memcpy(ret, ptr, oldsize);
2847 	}
2848 	idalloc(ptr);
2849 	return (ret);
2850 }
2851 
2852 static void
2853 huge_dalloc(void *ptr)
2854 {
2855 	chunk_node_t key;
2856 	chunk_node_t *node;
2857 
2858 	malloc_mutex_lock(&chunks_mtx);
2859 
2860 	/* Extract from tree of huge allocations. */
2861 	key.chunk = ptr;
2862 	/* LINTED */
2863 	node = RB_FIND(chunk_tree_s, &huge, &key);
2864 	assert(node != NULL);
2865 	assert(node->chunk == ptr);
2866 	/* LINTED */
2867 	RB_REMOVE(chunk_tree_s, &huge, node);
2868 
2869 #ifdef MALLOC_STATS
2870 	huge_ndalloc++;
2871 	huge_allocated -= node->size;
2872 #endif
2873 
2874 	malloc_mutex_unlock(&chunks_mtx);
2875 
2876 	/* Unmap chunk. */
2877 #ifdef USE_BRK
2878 	if (opt_junk)
2879 		memset(node->chunk, 0x5a, node->size);
2880 #endif
2881 	chunk_dealloc(node->chunk, node->size);
2882 
2883 	base_chunk_node_dealloc(node);
2884 }
2885 
2886 static void *
2887 imalloc(size_t size)
2888 {
2889 	void *ret;
2890 
2891 	assert(size != 0);
2892 
2893 	if (size <= arena_maxclass)
2894 		ret = arena_malloc(choose_arena(), size);
2895 	else
2896 		ret = huge_malloc(size);
2897 
2898 	return (ret);
2899 }
2900 
2901 static void *
2902 ipalloc(size_t alignment, size_t size)
2903 {
2904 	void *ret;
2905 	size_t ceil_size;
2906 
2907 	/*
2908 	 * Round size up to the nearest multiple of alignment.
2909 	 *
2910 	 * This done, we can take advantage of the fact that for each small
2911 	 * size class, every object is aligned at the smallest power of two
2912 	 * that is non-zero in the base two representation of the size.  For
2913 	 * example:
2914 	 *
2915 	 *   Size |   Base 2 | Minimum alignment
2916 	 *   -----+----------+------------------
2917 	 *     96 |  1100000 |  32
2918 	 *    144 | 10100000 |  32
2919 	 *    192 | 11000000 |  64
2920 	 *
2921 	 * Depending on runtime settings, it is possible that arena_malloc()
2922 	 * will further round up to a power of two, but that never causes
2923 	 * correctness issues.
2924 	 */
2925 	ceil_size = (size + (alignment - 1)) & (-alignment);
2926 	/*
2927 	 * (ceil_size < size) protects against the combination of maximal
2928 	 * alignment and size greater than maximal alignment.
2929 	 */
2930 	if (ceil_size < size) {
2931 		/* size_t overflow. */
2932 		return (NULL);
2933 	}
2934 
2935 	if (ceil_size <= pagesize || (alignment <= pagesize
2936 	    && ceil_size <= arena_maxclass))
2937 		ret = arena_malloc(choose_arena(), ceil_size);
2938 	else {
2939 		size_t run_size;
2940 
2941 		/*
2942 		 * We can't achieve sub-page alignment, so round up alignment
2943 		 * permanently; it makes later calculations simpler.
2944 		 */
2945 		alignment = PAGE_CEILING(alignment);
2946 		ceil_size = PAGE_CEILING(size);
2947 		/*
2948 		 * (ceil_size < size) protects against very large sizes within
2949 		 * pagesize of SIZE_T_MAX.
2950 		 *
2951 		 * (ceil_size + alignment < ceil_size) protects against the
2952 		 * combination of maximal alignment and ceil_size large enough
2953 		 * to cause overflow.  This is similar to the first overflow
2954 		 * check above, but it needs to be repeated due to the new
2955 		 * ceil_size value, which may now be *equal* to maximal
2956 		 * alignment, whereas before we only detected overflow if the
2957 		 * original size was *greater* than maximal alignment.
2958 		 */
2959 		if (ceil_size < size || ceil_size + alignment < ceil_size) {
2960 			/* size_t overflow. */
2961 			return (NULL);
2962 		}
2963 
2964 		/*
2965 		 * Calculate the size of the over-size run that arena_palloc()
2966 		 * would need to allocate in order to guarantee the alignment.
2967 		 */
2968 		if (ceil_size >= alignment)
2969 			run_size = ceil_size + alignment - pagesize;
2970 		else {
2971 			/*
2972 			 * It is possible that (alignment << 1) will cause
2973 			 * overflow, but it doesn't matter because we also
2974 			 * subtract pagesize, which in the case of overflow
2975 			 * leaves us with a very large run_size.  That causes
2976 			 * the first conditional below to fail, which means
2977 			 * that the bogus run_size value never gets used for
2978 			 * anything important.
2979 			 */
2980 			run_size = (alignment << 1) - pagesize;
2981 		}
2982 
2983 		if (run_size <= arena_maxclass) {
2984 			ret = arena_palloc(choose_arena(), alignment, ceil_size,
2985 			    run_size);
2986 		} else if (alignment <= chunksize)
2987 			ret = huge_malloc(ceil_size);
2988 		else
2989 			ret = huge_palloc(alignment, ceil_size);
2990 	}
2991 
2992 	assert(((uintptr_t)ret & (alignment - 1)) == 0);
2993 	return (ret);
2994 }
2995 
2996 static void *
2997 icalloc(size_t size)
2998 {
2999 	void *ret;
3000 
3001 	if (size <= arena_maxclass) {
3002 		ret = arena_malloc(choose_arena(), size);
3003 		if (ret == NULL)
3004 			return (NULL);
3005 		memset(ret, 0, size);
3006 	} else {
3007 		/*
3008 		 * The virtual memory system provides zero-filled pages, so
3009 		 * there is no need to do so manually, unless opt_junk is
3010 		 * enabled, in which case huge_malloc() fills huge allocations
3011 		 * with junk.
3012 		 */
3013 		ret = huge_malloc(size);
3014 		if (ret == NULL)
3015 			return (NULL);
3016 
3017 		if (opt_junk)
3018 			memset(ret, 0, size);
3019 #ifdef USE_BRK
3020 		else if ((uintptr_t)ret >= (uintptr_t)brk_base
3021 		    && (uintptr_t)ret < (uintptr_t)brk_max) {
3022 			/*
3023 			 * This may be a re-used brk chunk.  Therefore, zero
3024 			 * the memory.
3025 			 */
3026 			memset(ret, 0, size);
3027 		}
3028 #endif
3029 	}
3030 
3031 	return (ret);
3032 }
3033 
3034 static size_t
3035 isalloc(const void *ptr)
3036 {
3037 	size_t ret;
3038 	arena_chunk_t *chunk;
3039 
3040 	assert(ptr != NULL);
3041 
3042 	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
3043 	if (chunk != ptr) {
3044 		/* Region. */
3045 		assert(chunk->arena->magic == ARENA_MAGIC);
3046 
3047 		ret = arena_salloc(ptr);
3048 	} else {
3049 		chunk_node_t *node, key;
3050 
3051 		/* Chunk (huge allocation). */
3052 
3053 		malloc_mutex_lock(&chunks_mtx);
3054 
3055 		/* Extract from tree of huge allocations. */
3056 		key.chunk = __DECONST(void *, ptr);
3057 		/* LINTED */
3058 		node = RB_FIND(chunk_tree_s, &huge, &key);
3059 		assert(node != NULL);
3060 
3061 		ret = node->size;
3062 
3063 		malloc_mutex_unlock(&chunks_mtx);
3064 	}
3065 
3066 	return (ret);
3067 }
3068 
3069 static void *
3070 iralloc(void *ptr, size_t size)
3071 {
3072 	void *ret;
3073 	size_t oldsize;
3074 
3075 	assert(ptr != NULL);
3076 	assert(size != 0);
3077 
3078 	oldsize = isalloc(ptr);
3079 
3080 	if (size <= arena_maxclass)
3081 		ret = arena_ralloc(ptr, size, oldsize);
3082 	else
3083 		ret = huge_ralloc(ptr, size, oldsize);
3084 
3085 	return (ret);
3086 }
3087 
3088 static void
3089 idalloc(void *ptr)
3090 {
3091 	arena_chunk_t *chunk;
3092 
3093 	assert(ptr != NULL);
3094 
3095 	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
3096 	if (chunk != ptr) {
3097 		/* Region. */
3098 		arena_dalloc(chunk->arena, chunk, ptr);
3099 	} else
3100 		huge_dalloc(ptr);
3101 }
3102 
3103 static void
3104 malloc_print_stats(void)
3105 {
3106 
3107 	if (opt_print_stats) {
3108 		char s[UMAX2S_BUFSIZE];
3109 		_malloc_message("___ Begin malloc statistics ___\n", "", "",
3110 		    "");
3111 		_malloc_message("Assertions ",
3112 #ifdef NDEBUG
3113 		    "disabled",
3114 #else
3115 		    "enabled",
3116 #endif
3117 		    "\n", "");
3118 		_malloc_message("Boolean MALLOC_OPTIONS: ",
3119 		    opt_abort ? "A" : "a",
3120 		    opt_junk ? "J" : "j",
3121 		    opt_hint ? "H" : "h");
3122 		_malloc_message(opt_utrace ? "PU" : "Pu",
3123 		    opt_sysv ? "V" : "v",
3124 		    opt_xmalloc ? "X" : "x",
3125 		    opt_zero ? "Z\n" : "z\n");
3126 
3127 		_malloc_message("CPUs: ", umax2s(ncpus, s), "\n", "");
3128 		_malloc_message("Max arenas: ", umax2s(narenas, s), "\n", "");
3129 		_malloc_message("Pointer size: ", umax2s(sizeof(void *), s),
3130 		    "\n", "");
3131 		_malloc_message("Quantum size: ", umax2s(quantum, s), "\n", "");
3132 		_malloc_message("Max small size: ", umax2s(small_max, s), "\n",
3133 		    "");
3134 
3135 		_malloc_message("Chunk size: ", umax2s(chunksize, s), "", "");
3136 		_malloc_message(" (2^", umax2s(opt_chunk_2pow, s), ")\n", "");
3137 
3138 #ifdef MALLOC_STATS
3139 		{
3140 			size_t allocated, mapped;
3141 			unsigned i;
3142 			arena_t *arena;
3143 
3144 			/* Calculate and print allocated/mapped stats. */
3145 
3146 			/* arenas. */
3147 			for (i = 0, allocated = 0; i < narenas; i++) {
3148 				if (arenas[i] != NULL) {
3149 					malloc_mutex_lock(&arenas[i]->mtx);
3150 					allocated +=
3151 					    arenas[i]->stats.allocated_small;
3152 					allocated +=
3153 					    arenas[i]->stats.allocated_large;
3154 					malloc_mutex_unlock(&arenas[i]->mtx);
3155 				}
3156 			}
3157 
3158 			/* huge/base. */
3159 			malloc_mutex_lock(&chunks_mtx);
3160 			allocated += huge_allocated;
3161 			mapped = stats_chunks.curchunks * chunksize;
3162 			malloc_mutex_unlock(&chunks_mtx);
3163 
3164 			malloc_mutex_lock(&base_mtx);
3165 			mapped += base_mapped;
3166 			malloc_mutex_unlock(&base_mtx);
3167 
3168 			malloc_printf("Allocated: %zu, mapped: %zu\n",
3169 			    allocated, mapped);
3170 
3171 			/* Print chunk stats. */
3172 			{
3173 				chunk_stats_t chunks_stats;
3174 
3175 				malloc_mutex_lock(&chunks_mtx);
3176 				chunks_stats = stats_chunks;
3177 				malloc_mutex_unlock(&chunks_mtx);
3178 
3179 				malloc_printf("chunks: nchunks   "
3180 				    "highchunks    curchunks\n");
3181 				malloc_printf("  %13llu%13lu%13lu\n",
3182 				    chunks_stats.nchunks,
3183 				    chunks_stats.highchunks,
3184 				    chunks_stats.curchunks);
3185 			}
3186 
3187 			/* Print chunk stats. */
3188 			malloc_printf(
3189 			    "huge: nmalloc      ndalloc    allocated\n");
3190 			malloc_printf(" %12llu %12llu %12zu\n",
3191 			    huge_nmalloc, huge_ndalloc, huge_allocated);
3192 
3193 			/* Print stats for each arena. */
3194 			for (i = 0; i < narenas; i++) {
3195 				arena = arenas[i];
3196 				if (arena != NULL) {
3197 					malloc_printf(
3198 					    "\narenas[%u] @ %p\n", i, arena);
3199 					malloc_mutex_lock(&arena->mtx);
3200 					stats_print(arena);
3201 					malloc_mutex_unlock(&arena->mtx);
3202 				}
3203 			}
3204 		}
3205 #endif /* #ifdef MALLOC_STATS */
3206 		_malloc_message("--- End malloc statistics ---\n", "", "", "");
3207 	}
3208 }
3209 
3210 /*
3211  * FreeBSD's pthreads implementation calls malloc(3), so the malloc
3212  * implementation has to take pains to avoid infinite recursion during
3213  * initialization.
3214  */
3215 static inline bool
3216 malloc_init(void)
3217 {
3218 
3219 	if (malloc_initialized == false)
3220 		return (malloc_init_hard());
3221 
3222 	return (false);
3223 }
3224 
3225 static bool
3226 malloc_init_hard(void)
3227 {
3228 	unsigned i, j;
3229 	int linklen;
3230 	char buf[PATH_MAX + 1];
3231 	const char *opts = "";
3232 
3233 	malloc_mutex_lock(&init_lock);
3234 	if (malloc_initialized) {
3235 		/*
3236 		 * Another thread initialized the allocator before this one
3237 		 * acquired init_lock.
3238 		 */
3239 		malloc_mutex_unlock(&init_lock);
3240 		return (false);
3241 	}
3242 
3243 	/* Get number of CPUs. */
3244 	{
3245 		int mib[2];
3246 		size_t len;
3247 
3248 		mib[0] = CTL_HW;
3249 		mib[1] = HW_NCPU;
3250 		len = sizeof(ncpus);
3251 		if (sysctl(mib, 2, &ncpus, &len, (void *) 0, 0) == -1) {
3252 			/* Error. */
3253 			ncpus = 1;
3254 		}
3255 	}
3256 
3257 	/* Get page size. */
3258 	{
3259 		long result;
3260 
3261 		result = sysconf(_SC_PAGESIZE);
3262 		assert(result != -1);
3263 		pagesize = (unsigned) result;
3264 
3265 		/*
3266 		 * We assume that pagesize is a power of 2 when calculating
3267 		 * pagesize_mask and pagesize_2pow.
3268 		 */
3269 		assert(((result - 1) & result) == 0);
3270 		pagesize_mask = result - 1;
3271 		pagesize_2pow = ffs((int)result) - 1;
3272 	}
3273 
3274 	for (i = 0; i < 3; i++) {
3275 		/* Get runtime configuration. */
3276 		switch (i) {
3277 		case 0:
3278 			if ((linklen = readlink("/etc/malloc.conf", buf,
3279 						sizeof(buf) - 1)) != -1) {
3280 				/*
3281 				 * Use the contents of the "/etc/malloc.conf"
3282 				 * symbolic link's name.
3283 				 */
3284 				buf[linklen] = '\0';
3285 				opts = buf;
3286 			} else {
3287 				/* No configuration specified. */
3288 				buf[0] = '\0';
3289 				opts = buf;
3290 			}
3291 			break;
3292 		case 1:
3293 			if (issetugid() == 0 && (opts =
3294 			    getenv("MALLOC_OPTIONS")) != NULL) {
3295 				/*
3296 				 * Do nothing; opts is already initialized to
3297 				 * the value of the MALLOC_OPTIONS environment
3298 				 * variable.
3299 				 */
3300 			} else {
3301 				/* No configuration specified. */
3302 				buf[0] = '\0';
3303 				opts = buf;
3304 			}
3305 			break;
3306 		case 2:
3307 			if (_malloc_options != NULL) {
3308 			    /*
3309 			     * Use options that were compiled into the program.
3310 			     */
3311 			    opts = _malloc_options;
3312 			} else {
3313 				/* No configuration specified. */
3314 				buf[0] = '\0';
3315 				opts = buf;
3316 			}
3317 			break;
3318 		default:
3319 			/* NOTREACHED */
3320 			/* LINTED */
3321 			assert(false);
3322 		}
3323 
3324 		for (j = 0; opts[j] != '\0'; j++) {
3325 			switch (opts[j]) {
3326 			case 'a':
3327 				opt_abort = false;
3328 				break;
3329 			case 'A':
3330 				opt_abort = true;
3331 				break;
3332 			case 'h':
3333 				opt_hint = false;
3334 				break;
3335 			case 'H':
3336 				opt_hint = true;
3337 				break;
3338 			case 'j':
3339 				opt_junk = false;
3340 				break;
3341 			case 'J':
3342 				opt_junk = true;
3343 				break;
3344 			case 'k':
3345 				/*
3346 				 * Chunks always require at least one header
3347 				 * page, so chunks can never be smaller than
3348 				 * two pages.
3349 				 */
3350 				if (opt_chunk_2pow > pagesize_2pow + 1)
3351 					opt_chunk_2pow--;
3352 				break;
3353 			case 'K':
3354 				/*
3355 				 * There must be fewer pages in a chunk than
3356 				 * can be recorded by the pos field of
3357 				 * arena_chunk_map_t, in order to make POS_FREE
3358 				 * special.
3359 				 */
3360 				if (opt_chunk_2pow - pagesize_2pow
3361 				    < (sizeof(uint32_t) << 3) - 1)
3362 					opt_chunk_2pow++;
3363 				break;
3364 			case 'n':
3365 				opt_narenas_lshift--;
3366 				break;
3367 			case 'N':
3368 				opt_narenas_lshift++;
3369 				break;
3370 			case 'p':
3371 				opt_print_stats = false;
3372 				break;
3373 			case 'P':
3374 				opt_print_stats = true;
3375 				break;
3376 			case 'q':
3377 				if (opt_quantum_2pow > QUANTUM_2POW_MIN)
3378 					opt_quantum_2pow--;
3379 				break;
3380 			case 'Q':
3381 				if (opt_quantum_2pow < pagesize_2pow - 1)
3382 					opt_quantum_2pow++;
3383 				break;
3384 			case 's':
3385 				if (opt_small_max_2pow > QUANTUM_2POW_MIN)
3386 					opt_small_max_2pow--;
3387 				break;
3388 			case 'S':
3389 				if (opt_small_max_2pow < pagesize_2pow - 1)
3390 					opt_small_max_2pow++;
3391 				break;
3392 			case 'u':
3393 				opt_utrace = false;
3394 				break;
3395 			case 'U':
3396 				opt_utrace = true;
3397 				break;
3398 			case 'v':
3399 				opt_sysv = false;
3400 				break;
3401 			case 'V':
3402 				opt_sysv = true;
3403 				break;
3404 			case 'x':
3405 				opt_xmalloc = false;
3406 				break;
3407 			case 'X':
3408 				opt_xmalloc = true;
3409 				break;
3410 			case 'z':
3411 				opt_zero = false;
3412 				break;
3413 			case 'Z':
3414 				opt_zero = true;
3415 				break;
3416 			default: {
3417 				char cbuf[2];
3418 
3419 				cbuf[0] = opts[j];
3420 				cbuf[1] = '\0';
3421 				_malloc_message(_getprogname(),
3422 				    ": (malloc) Unsupported character in "
3423 				    "malloc options: '", cbuf, "'\n");
3424 			}
3425 			}
3426 		}
3427 	}
3428 
3429 	/* Take care to call atexit() only once. */
3430 	if (opt_print_stats) {
3431 		/* Print statistics at exit. */
3432 		atexit(malloc_print_stats);
3433 	}
3434 
3435 	/* Set variables according to the value of opt_small_max_2pow. */
3436 	if (opt_small_max_2pow < opt_quantum_2pow)
3437 		opt_small_max_2pow = opt_quantum_2pow;
3438 	small_max = (1 << opt_small_max_2pow);
3439 
3440 	/* Set bin-related variables. */
3441 	bin_maxclass = (pagesize >> 1);
3442 	assert(opt_quantum_2pow >= TINY_MIN_2POW);
3443 	ntbins = opt_quantum_2pow - TINY_MIN_2POW;
3444 	assert(ntbins <= opt_quantum_2pow);
3445 	nqbins = (small_max >> opt_quantum_2pow);
3446 	nsbins = pagesize_2pow - opt_small_max_2pow - 1;
3447 
3448 	/* Set variables according to the value of opt_quantum_2pow. */
3449 	quantum = (1 << opt_quantum_2pow);
3450 	quantum_mask = quantum - 1;
3451 	if (ntbins > 0)
3452 		small_min = (quantum >> 1) + 1;
3453 	else
3454 		small_min = 1;
3455 	assert(small_min <= quantum);
3456 
3457 	/* Set variables according to the value of opt_chunk_2pow. */
3458 	chunksize = (1LU << opt_chunk_2pow);
3459 	chunksize_mask = chunksize - 1;
3460 	chunksize_2pow = opt_chunk_2pow;
3461 	chunk_npages = (chunksize >> pagesize_2pow);
3462 	{
3463 		unsigned header_size;
3464 
3465 		header_size = sizeof(arena_chunk_t) + (sizeof(arena_chunk_map_t)
3466 		    * (chunk_npages - 1));
3467 		arena_chunk_header_npages = (header_size >> pagesize_2pow);
3468 		if ((header_size & pagesize_mask) != 0)
3469 			arena_chunk_header_npages++;
3470 	}
3471 	arena_maxclass = chunksize - (arena_chunk_header_npages <<
3472 	    pagesize_2pow);
3473 
3474 	UTRACE(0, 0, 0);
3475 
3476 #ifdef MALLOC_STATS
3477 	memset(&stats_chunks, 0, sizeof(chunk_stats_t));
3478 #endif
3479 
3480 	/* Various sanity checks that regard configuration. */
3481 	assert(quantum >= sizeof(void *));
3482 	assert(quantum <= pagesize);
3483 	assert(chunksize >= pagesize);
3484 	assert(quantum * 4 <= chunksize);
3485 
3486 	/* Initialize chunks data. */
3487 	malloc_mutex_init(&chunks_mtx);
3488 	RB_INIT(&huge);
3489 #ifdef USE_BRK
3490 	malloc_mutex_init(&brk_mtx);
3491 	brk_base = sbrk(0);
3492 	brk_prev = brk_base;
3493 	brk_max = brk_base;
3494 #endif
3495 #ifdef MALLOC_STATS
3496 	huge_nmalloc = 0;
3497 	huge_ndalloc = 0;
3498 	huge_allocated = 0;
3499 #endif
3500 	RB_INIT(&old_chunks);
3501 
3502 	/* Initialize base allocation data structures. */
3503 #ifdef MALLOC_STATS
3504 	base_mapped = 0;
3505 #endif
3506 #ifdef USE_BRK
3507 	/*
3508 	 * Allocate a base chunk here, since it doesn't actually have to be
3509 	 * chunk-aligned.  Doing this before allocating any other chunks allows
3510 	 * the use of space that would otherwise be wasted.
3511 	 */
3512 	base_pages_alloc(0);
3513 #endif
3514 	base_chunk_nodes = NULL;
3515 	malloc_mutex_init(&base_mtx);
3516 
3517 	if (ncpus > 1) {
3518 		/*
3519 		 * For SMP systems, create four times as many arenas as there
3520 		 * are CPUs by default.
3521 		 */
3522 		opt_narenas_lshift += 2;
3523 	}
3524 
3525 #ifdef NO_TLS
3526 	/* Initialize arena key. */
3527 	(void)thr_keycreate(&arenas_map_key, NULL);
3528 #endif
3529 
3530 	/* Determine how many arenas to use. */
3531 	narenas = ncpus;
3532 	if (opt_narenas_lshift > 0) {
3533 		if ((narenas << opt_narenas_lshift) > narenas)
3534 			narenas <<= opt_narenas_lshift;
3535 		/*
3536 		 * Make sure not to exceed the limits of what base_malloc()
3537 		 * can handle.
3538 		 */
3539 		if (narenas * sizeof(arena_t *) > chunksize)
3540 			narenas = chunksize / sizeof(arena_t *);
3541 	} else if (opt_narenas_lshift < 0) {
3542 		if ((narenas << opt_narenas_lshift) < narenas)
3543 			narenas <<= opt_narenas_lshift;
3544 		/* Make sure there is at least one arena. */
3545 		if (narenas == 0)
3546 			narenas = 1;
3547 	}
3548 
3549 	next_arena = 0;
3550 
3551 	/* Allocate and initialize arenas. */
3552 	arenas = (arena_t **)base_alloc(sizeof(arena_t *) * narenas);
3553 	if (arenas == NULL) {
3554 		malloc_mutex_unlock(&init_lock);
3555 		return (true);
3556 	}
3557 	/*
3558 	 * Zero the array.  In practice, this should always be pre-zeroed,
3559 	 * since it was just mmap()ed, but let's be sure.
3560 	 */
3561 	memset(arenas, 0, sizeof(arena_t *) * narenas);
3562 
3563 	/*
3564 	 * Initialize one arena here.  The rest are lazily created in
3565 	 * arena_choose_hard().
3566 	 */
3567 	arenas_extend(0);
3568 	if (arenas[0] == NULL) {
3569 		malloc_mutex_unlock(&init_lock);
3570 		return (true);
3571 	}
3572 
3573 	malloc_mutex_init(&arenas_mtx);
3574 
3575 	malloc_initialized = true;
3576 	malloc_mutex_unlock(&init_lock);
3577 	return (false);
3578 }
3579 
3580 /*
3581  * End general internal functions.
3582  */
3583 /******************************************************************************/
3584 /*
3585  * Begin malloc(3)-compatible functions.
3586  */
3587 
3588 void *
3589 malloc(size_t size)
3590 {
3591 	void *ret;
3592 
3593 	if (malloc_init()) {
3594 		ret = NULL;
3595 		goto RETURN;
3596 	}
3597 
3598 	if (size == 0) {
3599 		if (opt_sysv == false)
3600 			size = 1;
3601 		else {
3602 			ret = NULL;
3603 			goto RETURN;
3604 		}
3605 	}
3606 
3607 	ret = imalloc(size);
3608 
3609 RETURN:
3610 	if (ret == NULL) {
3611 		if (opt_xmalloc) {
3612 			_malloc_message(_getprogname(),
3613 			    ": (malloc) Error in malloc(): out of memory\n", "",
3614 			    "");
3615 			abort();
3616 		}
3617 		errno = ENOMEM;
3618 	}
3619 
3620 	UTRACE(0, size, ret);
3621 	return (ret);
3622 }
3623 
3624 /* XXXAD */
3625 int	posix_memalign(void **memptr, size_t alignment, size_t size);
3626 
3627 int
3628 posix_memalign(void **memptr, size_t alignment, size_t size)
3629 {
3630 	int ret;
3631 	void *result;
3632 
3633 	if (malloc_init())
3634 		result = NULL;
3635 	else {
3636 		/* Make sure that alignment is a large enough power of 2. */
3637 		if (((alignment - 1) & alignment) != 0
3638 		    || alignment < sizeof(void *)) {
3639 			if (opt_xmalloc) {
3640 				_malloc_message(_getprogname(),
3641 				    ": (malloc) Error in posix_memalign(): "
3642 				    "invalid alignment\n", "", "");
3643 				abort();
3644 			}
3645 			result = NULL;
3646 			ret = EINVAL;
3647 			goto RETURN;
3648 		}
3649 
3650 		result = ipalloc(alignment, size);
3651 	}
3652 
3653 	if (result == NULL) {
3654 		if (opt_xmalloc) {
3655 			_malloc_message(_getprogname(),
3656 			": (malloc) Error in posix_memalign(): out of memory\n",
3657 			"", "");
3658 			abort();
3659 		}
3660 		ret = ENOMEM;
3661 		goto RETURN;
3662 	}
3663 
3664 	*memptr = result;
3665 	ret = 0;
3666 
3667 RETURN:
3668 	UTRACE(0, size, result);
3669 	return (ret);
3670 }
3671 
3672 void *
3673 calloc(size_t num, size_t size)
3674 {
3675 	void *ret;
3676 	size_t num_size;
3677 
3678 	if (malloc_init()) {
3679 		num_size = 0;
3680 		ret = NULL;
3681 		goto RETURN;
3682 	}
3683 
3684 	num_size = num * size;
3685 	if (num_size == 0) {
3686 		if ((opt_sysv == false) && ((num == 0) || (size == 0)))
3687 			num_size = 1;
3688 		else {
3689 			ret = NULL;
3690 			goto RETURN;
3691 		}
3692 	/*
3693 	 * Try to avoid division here.  We know that it isn't possible to
3694 	 * overflow during multiplication if neither operand uses any of the
3695 	 * most significant half of the bits in a size_t.
3696 	 */
3697 	} else if ((unsigned long long)((num | size) &
3698 	   ((unsigned long long)SIZE_T_MAX << (sizeof(size_t) << 2))) &&
3699 	   (num_size / size != num)) {
3700 		/* size_t overflow. */
3701 		ret = NULL;
3702 		goto RETURN;
3703 	}
3704 
3705 	ret = icalloc(num_size);
3706 
3707 RETURN:
3708 	if (ret == NULL) {
3709 		if (opt_xmalloc) {
3710 			_malloc_message(_getprogname(),
3711 			    ": (malloc) Error in calloc(): out of memory\n", "",
3712 			    "");
3713 			abort();
3714 		}
3715 		errno = ENOMEM;
3716 	}
3717 
3718 	UTRACE(0, num_size, ret);
3719 	return (ret);
3720 }
3721 
3722 void *
3723 realloc(void *ptr, size_t size)
3724 {
3725 	void *ret;
3726 
3727 	if (size == 0) {
3728 		if (opt_sysv == false)
3729 			size = 1;
3730 		else {
3731 			if (ptr != NULL)
3732 				idalloc(ptr);
3733 			ret = NULL;
3734 			goto RETURN;
3735 		}
3736 	}
3737 
3738 	if (ptr != NULL) {
3739 		assert(malloc_initialized);
3740 
3741 		ret = iralloc(ptr, size);
3742 
3743 		if (ret == NULL) {
3744 			if (opt_xmalloc) {
3745 				_malloc_message(_getprogname(),
3746 				    ": (malloc) Error in realloc(): out of "
3747 				    "memory\n", "", "");
3748 				abort();
3749 			}
3750 			errno = ENOMEM;
3751 		}
3752 	} else {
3753 		if (malloc_init())
3754 			ret = NULL;
3755 		else
3756 			ret = imalloc(size);
3757 
3758 		if (ret == NULL) {
3759 			if (opt_xmalloc) {
3760 				_malloc_message(_getprogname(),
3761 				    ": (malloc) Error in realloc(): out of "
3762 				    "memory\n", "", "");
3763 				abort();
3764 			}
3765 			errno = ENOMEM;
3766 		}
3767 	}
3768 
3769 RETURN:
3770 	UTRACE(ptr, size, ret);
3771 	return (ret);
3772 }
3773 
3774 void
3775 free(void *ptr)
3776 {
3777 
3778 	UTRACE(ptr, 0, 0);
3779 	if (ptr != NULL) {
3780 		assert(malloc_initialized);
3781 
3782 		idalloc(ptr);
3783 	}
3784 }
3785 
3786 /*
3787  * End malloc(3)-compatible functions.
3788  */
3789 /******************************************************************************/
3790 /*
3791  * Begin non-standard functions.
3792  */
3793 #ifndef __NetBSD__
3794 size_t
3795 malloc_usable_size(const void *ptr)
3796 {
3797 
3798 	assert(ptr != NULL);
3799 
3800 	return (isalloc(ptr));
3801 }
3802 #endif
3803 
3804 /*
3805  * End non-standard functions.
3806  */
3807 /******************************************************************************/
3808 /*
3809  * Begin library-private functions, used by threading libraries for protection
3810  * of malloc during fork().  These functions are only called if the program is
3811  * running in threaded mode, so there is no need to check whether the program
3812  * is threaded here.
3813  */
3814 
3815 void
3816 _malloc_prefork(void)
3817 {
3818 	unsigned i;
3819 
3820 	/* Acquire all mutexes in a safe order. */
3821 
3822 	malloc_mutex_lock(&arenas_mtx);
3823 	for (i = 0; i < narenas; i++) {
3824 		if (arenas[i] != NULL)
3825 			malloc_mutex_lock(&arenas[i]->mtx);
3826 	}
3827 	malloc_mutex_unlock(&arenas_mtx);
3828 
3829 	malloc_mutex_lock(&base_mtx);
3830 
3831 	malloc_mutex_lock(&chunks_mtx);
3832 }
3833 
3834 void
3835 _malloc_postfork(void)
3836 {
3837 	unsigned i;
3838 
3839 	/* Release all mutexes, now that fork() has completed. */
3840 
3841 	malloc_mutex_unlock(&chunks_mtx);
3842 
3843 	malloc_mutex_unlock(&base_mtx);
3844 
3845 	malloc_mutex_lock(&arenas_mtx);
3846 	for (i = 0; i < narenas; i++) {
3847 		if (arenas[i] != NULL)
3848 			malloc_mutex_unlock(&arenas[i]->mtx);
3849 	}
3850 	malloc_mutex_unlock(&arenas_mtx);
3851 }
3852 
3853 /*
3854  * End library-private functions.
3855  */
3856 /******************************************************************************/
3857