xref: /dflybsd-src/lib/libc/stdlib/nmalloc.c (revision 369c9b6ca801f3b4ba739e0527a5de34bb51e9d8)
1 /*
2  * NMALLOC.C	- New Malloc (ported from kernel slab allocator)
3  *
4  * Copyright (c) 2003,2004,2009,2010 The DragonFly Project. All rights reserved.
5  *
6  * This code is derived from software contributed to The DragonFly Project
7  * by Matthew Dillon <dillon@backplane.com> and by
8  * Venkatesh Srinivas <me@endeavour.zapto.org>.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  *
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in
18  *    the documentation and/or other materials provided with the
19  *    distribution.
20  * 3. Neither the name of The DragonFly Project nor the names of its
21  *    contributors may be used to endorse or promote products derived
22  *    from this software without specific, prior written permission.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
25  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
26  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
27  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
28  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
29  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
30  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
31  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
32  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
33  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
34  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35  * SUCH DAMAGE.
36  *
37  * $Id: nmalloc.c,v 1.37 2010/07/23 08:20:35 vsrinivas Exp $
38  */
39 /*
40  * This module implements a slab allocator drop-in replacement for the
41  * libc malloc().
42  *
43  * A slab allocator reserves a ZONE for each chunk size, then lays the
44  * chunks out in an array within the zone.  Allocation and deallocation
45  * is nearly instantaneous, and overhead losses are limited to a fixed
46  * worst-case amount.
47  *
48  * The slab allocator does not have to pre-initialize the list of
49  * free chunks for each zone, and the underlying VM will not be
50  * touched at all beyond the zone header until an actual allocation
51  * needs it.
52  *
53  * Slab management and locking is done on a per-zone basis.
54  *
55  *	Alloc Size	Chunking        Number of zones
56  *	0-127		8		16
57  *	128-255		16		8
58  *	256-511		32		8
59  *	512-1023	64		8
60  *	1024-2047	128		8
61  *	2048-4095	256		8
62  *	4096-8191	512		8
63  *	8192-16383	1024		8
64  *	16384-32767	2048		8
65  *
66  *	Allocations >= ZoneLimit go directly to mmap and a hash table
67  *	is used to locate for free.  One and Two-page allocations use the
68  *	zone mechanic to avoid excessive mmap()/munmap() calls.
69  *
70  *			   API FEATURES AND SIDE EFFECTS
71  *
72  *    + power-of-2 sized allocations up to a page will be power-of-2 aligned.
73  *	Above that power-of-2 sized allocations are page-aligned.  Non
74  *	power-of-2 sized allocations are aligned the same as the chunk
75  *	size for their zone.
76  *    + malloc(0) returns a special non-NULL value
77  *    + ability to allocate arbitrarily large chunks of memory
78  *    + realloc will reuse the passed pointer if possible, within the
79  *	limitations of the zone chunking.
80  *
81  * Multithreaded enhancements for small allocations introduced August 2010.
82  * These are in the spirit of 'libumem'. See:
83  *	Bonwick, J.; Adams, J. (2001). "Magazines and Vmem: Extending the
84  *	slab allocator to many CPUs and arbitrary resources". In Proc. 2001
85  *	USENIX Technical Conference. USENIX Association.
86  *
87  * Oversized allocations employ the BIGCACHE mechanic whereby large
88  * allocations may be handed significantly larger buffers, allowing them
89  * to avoid mmap/munmap operations even through significant realloc()s.
90  * The excess space is only trimmed if too many large allocations have been
91  * given this treatment.
92  *
93  * TUNING
94  *
95  * The value of the environment variable MALLOC_OPTIONS is a character string
96  * containing various flags to tune nmalloc.
97  *
98  * 'U'   / ['u']	Generate / do not generate utrace entries for ktrace(1)
99  *			This will generate utrace events for all malloc,
100  *			realloc, and free calls. There are tools (mtrplay) to
101  *			replay and allocation pattern or to graph heap structure
102  *			(mtrgraph) which can interpret these logs.
103  * 'Z'   / ['z']	Zero out / do not zero all allocations.
104  *			Each new byte of memory allocated by malloc, realloc, or
105  *			reallocf will be initialized to 0. This is intended for
106  *			debugging and will affect performance negatively.
107  * 'H'	/  ['h']	Pass a hint to the kernel about pages unused by the
108  *			allocation functions.
109  */
110 
111 /* cc -shared -fPIC -g -O -I/usr/src/lib/libc/include -o nmalloc.so nmalloc.c */
112 
113 #include "namespace.h"
114 #include <sys/param.h>
115 #include <sys/types.h>
116 #include <sys/mman.h>
117 #include <sys/queue.h>
118 #include <sys/ktrace.h>
119 #include <stdio.h>
120 #include <stdint.h>
121 #include <stdlib.h>
122 #include <stdarg.h>
123 #include <stddef.h>
124 #include <unistd.h>
125 #include <string.h>
126 #include <fcntl.h>
127 #include <errno.h>
128 #include <pthread.h>
129 #include <machine/atomic.h>
130 #include "un-namespace.h"
131 
132 #include "libc_private.h"
133 #include "spinlock.h"
134 
135 void __free(void *);
136 void *__malloc(size_t);
137 void *__calloc(size_t, size_t);
138 void *__realloc(void *, size_t);
139 void *__aligned_alloc(size_t, size_t);
140 int __posix_memalign(void **, size_t, size_t);
141 
142 /*
143  * Linked list of large allocations
144  */
145 typedef struct bigalloc {
146 	struct bigalloc *next;	/* hash link */
147 	void	*base;		/* base pointer */
148 	u_long	active;		/* bytes active */
149 	u_long	bytes;		/* bytes allocated */
150 } *bigalloc_t;
151 
152 /*
153  * Note that any allocations which are exact multiples of PAGE_SIZE, or
154  * which are >= ZALLOC_ZONE_LIMIT, will fall through to the kmem subsystem.
155  */
156 #define MAX_SLAB_PAGEALIGN	(2 * PAGE_SIZE)	/* max slab for PAGE_SIZE*n */
157 #define ZALLOC_ZONE_LIMIT	(16 * 1024)	/* max slab-managed alloc */
158 #define ZALLOC_ZONE_SIZE	(64 * 1024)	/* zone size */
159 #define ZALLOC_SLAB_MAGIC	0x736c6162	/* magic sanity */
160 
161 #if ZALLOC_ZONE_LIMIT == 16384
162 #define NZONES			72
163 #elif ZALLOC_ZONE_LIMIT == 32768
164 #define NZONES			80
165 #else
166 #error "I couldn't figure out NZONES"
167 #endif
168 
169 /*
170  * Chunk structure for free elements
171  */
172 typedef struct slchunk {
173 	struct slchunk *c_Next;
174 } *slchunk_t;
175 
176 /*
177  * The IN-BAND zone header is placed at the beginning of each zone.
178  */
179 struct slglobaldata;
180 
181 typedef struct slzone {
182 	int32_t		z_Magic;	/* magic number for sanity check */
183 	int		z_NFree;	/* total free chunks / ualloc space */
184 	struct slzone *z_Next;		/* ZoneAry[] link if z_NFree non-zero */
185 	int		z_NMax;		/* maximum free chunks */
186 	char		*z_BasePtr;	/* pointer to start of chunk array */
187 	int		z_UIndex;	/* current initial allocation index */
188 	int		z_UEndIndex;	/* last (first) allocation index */
189 	int		z_ChunkSize;	/* chunk size for validation */
190 	int		z_FirstFreePg;	/* chunk list on a page-by-page basis */
191 	int		z_ZoneIndex;
192 	int		z_Flags;
193 	struct slchunk *z_PageAry[ZALLOC_ZONE_SIZE / PAGE_SIZE];
194 } *slzone_t;
195 
196 typedef struct slglobaldata {
197 	spinlock_t	Spinlock;
198 	slzone_t	ZoneAry[NZONES];/* linked list of zones NFree > 0 */
199 } *slglobaldata_t;
200 
201 #define SLZF_UNOTZEROD		0x0001
202 
203 #define FASTSLABREALLOC		0x02
204 
205 /*
206  * Misc constants.  Note that allocations that are exact multiples of
207  * PAGE_SIZE, or exceed the zone limit, fall through to the kmem module.
208  * IN_SAME_PAGE_MASK is used to sanity-check the per-page free lists.
209  */
210 #define MIN_CHUNK_SIZE		8		/* in bytes */
211 #define MIN_CHUNK_MASK		(MIN_CHUNK_SIZE - 1)
212 #define IN_SAME_PAGE_MASK	(~(intptr_t)PAGE_MASK | MIN_CHUNK_MASK)
213 
214 /*
215  * WARNING: A limited number of spinlocks are available, BIGXSIZE should
216  *	    not be larger then 64.
217  */
218 #define BIGHSHIFT	10			/* bigalloc hash table */
219 #define BIGHSIZE	(1 << BIGHSHIFT)
220 #define BIGHMASK	(BIGHSIZE - 1)
221 #define BIGXSIZE	(BIGHSIZE / 16)		/* bigalloc lock table */
222 #define BIGXMASK	(BIGXSIZE - 1)
223 
224 /*
225  * BIGCACHE caches oversized allocations.  Note that a linear search is
226  * performed, so do not make the cache too large.
227  *
228  * BIGCACHE will garbage-collect excess space when the excess exceeds the
229  * specified value.  A relatively large number should be used here because
230  * garbage collection is expensive.
231  */
232 #define BIGCACHE	16
233 #define BIGCACHE_MASK	(BIGCACHE - 1)
234 #define BIGCACHE_LIMIT	(1024 * 1024)		/* size limit */
235 #define BIGCACHE_EXCESS	(16 * 1024 * 1024)	/* garbage collect */
236 
237 #define CACHE_CHUNKS	32
238 
239 #define SAFLAG_ZERO	0x0001
240 #define SAFLAG_PASSIVE	0x0002
241 #define SAFLAG_MAGS	0x0004
242 
243 /*
244  * Thread control
245  */
246 
247 #define arysize(ary)	(sizeof(ary)/sizeof((ary)[0]))
248 
249 /*
250  * The assertion macros try to pretty-print assertion failures
251  * which can be caused by corruption.  If a lock is held, we
252  * provide a macro that attempts to release it before asserting
253  * in order to prevent (e.g.) a reentrant SIGABRT calling malloc
254  * and deadlocking, resulting in the program freezing up.
255  */
256 #define MASSERT(exp)				\
257 	do { if (__predict_false(!(exp)))	\
258 	    _mpanic("assertion: %s in %s",	\
259 		    #exp, __func__);		\
260 	} while (0)
261 
262 #define MASSERT_WTHUNLK(exp, unlk)		\
263 	do { if (__predict_false(!(exp))) {	\
264 	    unlk;				\
265 	    _mpanic("assertion: %s in %s",	\
266 		    #exp, __func__);		\
267 	  }					\
268 	} while (0)
269 
270 /*
271  * Magazines, arrange so the structure is roughly 4KB.
272  */
273 #define M_MAX_ROUNDS		(512 - 3)
274 #define M_MIN_ROUNDS		16
275 #define M_ZONE_INIT_ROUNDS	64
276 #define M_ZONE_HYSTERESIS	32
277 
278 struct magazine {
279 	SLIST_ENTRY(magazine) nextmagazine;
280 
281 	int		flags;
282 	int		capacity;	/* Max rounds in this magazine */
283 	int		rounds;		/* Current number of free rounds */
284 	int		unused01;
285 	void		*objects[M_MAX_ROUNDS];
286 };
287 
288 SLIST_HEAD(magazinelist, magazine);
289 
290 static spinlock_t zone_mag_lock;
291 static spinlock_t depot_spinlock;
292 static struct magazine zone_magazine = {
293 	.flags = 0,
294 	.capacity = M_ZONE_INIT_ROUNDS,
295 	.rounds = 0,
296 };
297 
298 #define MAGAZINE_FULL(mp)	(mp->rounds == mp->capacity)
299 #define MAGAZINE_NOTFULL(mp)	(mp->rounds < mp->capacity)
300 #define MAGAZINE_EMPTY(mp)	(mp->rounds == 0)
301 #define MAGAZINE_NOTEMPTY(mp)	(mp->rounds != 0)
302 
303 /*
304  * Each thread will have a pair of magazines per size-class (NZONES)
305  * The loaded magazine will support immediate allocations, the previous
306  * magazine will either be full or empty and can be swapped at need
307  */
308 typedef struct magazine_pair {
309 	struct magazine	*loaded;
310 	struct magazine	*prev;
311 } magazine_pair;
312 
313 /* A depot is a collection of magazines for a single zone. */
314 typedef struct magazine_depot {
315 	struct magazinelist full;
316 	struct magazinelist empty;
317 	spinlock_t	lock;
318 } magazine_depot;
319 
320 typedef struct thr_mags {
321 	magazine_pair	mags[NZONES];
322 	struct magazine	*newmag;
323 	int		init;
324 } thr_mags;
325 
326 static __thread thr_mags thread_mags TLS_ATTRIBUTE;
327 static pthread_key_t thread_mags_key;
328 static pthread_once_t thread_mags_once = PTHREAD_ONCE_INIT;
329 static magazine_depot depots[NZONES];
330 
331 /*
332  * Fixed globals (not per-cpu)
333  */
334 static const int ZoneSize = ZALLOC_ZONE_SIZE;
335 static const int ZoneLimit = ZALLOC_ZONE_LIMIT;
336 static const int ZonePageCount = ZALLOC_ZONE_SIZE / PAGE_SIZE;
337 static const int ZoneMask = ZALLOC_ZONE_SIZE - 1;
338 
339 static int opt_madvise = 0;
340 static int opt_utrace = 0;
341 static int g_malloc_flags = 0;
342 static struct slglobaldata SLGlobalData;
343 static bigalloc_t bigalloc_array[BIGHSIZE];
344 static spinlock_t bigspin_array[BIGXSIZE];
345 static volatile void *bigcache_array[BIGCACHE];		/* atomic swap */
346 static volatile size_t bigcache_size_array[BIGCACHE];	/* SMP races ok */
347 static volatile int bigcache_index;			/* SMP races ok */
348 static int malloc_panic;
349 static size_t excess_alloc;				/* excess big allocs */
350 
351 static void *_slaballoc(size_t size, int flags);
352 static void *_slabrealloc(void *ptr, size_t size);
353 static void _slabfree(void *ptr, int, bigalloc_t *);
354 static int _slabmemalign(void **memptr, size_t alignment, size_t size);
355 static void *_vmem_alloc(size_t bytes, size_t align, int flags);
356 static void _vmem_free(void *ptr, size_t bytes);
357 static void *magazine_alloc(struct magazine *);
358 static int magazine_free(struct magazine *, void *);
359 static void *mtmagazine_alloc(int zi, int flags);
360 static int mtmagazine_free(int zi, void *);
361 static void mtmagazine_init(void);
362 static void mtmagazine_destructor(void *);
363 static slzone_t zone_alloc(int flags);
364 static void zone_free(void *z);
365 static void _mpanic(const char *ctl, ...) __printflike(1, 2);
366 static void malloc_init(void) __constructor(101);
367 
368 struct nmalloc_utrace {
369 	void *p;
370 	size_t s;
371 	void *r;
372 };
373 
374 #define UTRACE(a, b, c)						\
375 	if (opt_utrace) {					\
376 		struct nmalloc_utrace ut = {			\
377 			.p = (a),				\
378 			.s = (b),				\
379 			.r = (c)				\
380 		};						\
381 		utrace(&ut, sizeof(ut));			\
382 	}
383 
384 static void
385 malloc_init(void)
386 {
387 	const char *p = NULL;
388 
389 	if (issetugid() == 0)
390 		p = getenv("MALLOC_OPTIONS");
391 
392 	for (; p != NULL && *p != '\0'; p++) {
393 		switch(*p) {
394 		case 'u':	opt_utrace = 0; break;
395 		case 'U':	opt_utrace = 1; break;
396 		case 'h':	opt_madvise = 0; break;
397 		case 'H':	opt_madvise = 1; break;
398 		case 'z':	g_malloc_flags = 0; break;
399 		case 'Z':	g_malloc_flags = SAFLAG_ZERO; break;
400 		default:
401 			break;
402 		}
403 	}
404 
405 	UTRACE((void *) -1, 0, NULL);
406 }
407 
408 /*
409  * We have to install a handler for nmalloc thread teardowns when
410  * the thread is created.  We cannot delay this because destructors in
411  * sophisticated userland programs can call malloc() for the first time
412  * during their thread exit.
413  *
414  * This routine is called directly from pthreads.
415  */
416 void
417 _nmalloc_thr_init(void)
418 {
419 	thr_mags *tp;
420 
421 	/*
422 	 * Disallow mtmagazine operations until the mtmagazine is
423 	 * initialized.
424 	 */
425 	tp = &thread_mags;
426 	tp->init = -1;
427 
428 	_pthread_once(&thread_mags_once, mtmagazine_init);
429 	_pthread_setspecific(thread_mags_key, tp);
430 	tp->init = 1;
431 }
432 
433 void
434 _nmalloc_thr_prepfork(void)
435 {
436 	if (__isthreaded) {
437 		_SPINLOCK(&zone_mag_lock);
438 		_SPINLOCK(&depot_spinlock);
439 	}
440 }
441 
442 void
443 _nmalloc_thr_parentfork(void)
444 {
445 	if (__isthreaded) {
446 		_SPINUNLOCK(&depot_spinlock);
447 		_SPINUNLOCK(&zone_mag_lock);
448 	}
449 }
450 
451 void
452 _nmalloc_thr_childfork(void)
453 {
454 	if (__isthreaded) {
455 		_SPINUNLOCK(&depot_spinlock);
456 		_SPINUNLOCK(&zone_mag_lock);
457 	}
458 }
459 
460 /*
461  * Handle signal reentrancy safely whether we are threaded or not.
462  * This improves the stability for mono and will probably improve
463  * stability for other high-level languages which are becoming increasingly
464  * sophisticated.
465  *
466  * The sigblockall()/sigunblockall() implementation uses a counter on
467  * a per-thread shared user/kernel page, avoids system calls, and is thus
468  *  very fast.
469  */
470 static __inline void
471 nmalloc_sigblockall(void)
472 {
473 	sigblockall();
474 }
475 
476 static __inline void
477 nmalloc_sigunblockall(void)
478 {
479 	sigunblockall();
480 }
481 
482 /*
483  * Thread locks.
484  */
485 static __inline void
486 slgd_lock(slglobaldata_t slgd)
487 {
488 	if (__isthreaded)
489 		_SPINLOCK(&slgd->Spinlock);
490 	else
491 		sigblockall();
492 }
493 
494 static __inline void
495 slgd_unlock(slglobaldata_t slgd)
496 {
497 	if (__isthreaded)
498 		_SPINUNLOCK(&slgd->Spinlock);
499 	else
500 		sigunblockall();
501 }
502 
503 static __inline void
504 depot_lock(magazine_depot *dp __unused)
505 {
506 	if (__isthreaded)
507 		_SPINLOCK(&depot_spinlock);
508 	else
509 		sigblockall();
510 #if 0
511 	if (__isthreaded)
512 		_SPINLOCK(&dp->lock);
513 #endif
514 }
515 
516 static __inline void
517 depot_unlock(magazine_depot *dp __unused)
518 {
519 	if (__isthreaded)
520 		_SPINUNLOCK(&depot_spinlock);
521 	else
522 		sigunblockall();
523 #if 0
524 	if (__isthreaded)
525 		_SPINUNLOCK(&dp->lock);
526 #endif
527 }
528 
529 static __inline void
530 zone_magazine_lock(void)
531 {
532 	if (__isthreaded)
533 		_SPINLOCK(&zone_mag_lock);
534 	else
535 		sigblockall();
536 }
537 
538 static __inline void
539 zone_magazine_unlock(void)
540 {
541 	if (__isthreaded)
542 		_SPINUNLOCK(&zone_mag_lock);
543 	else
544 		sigunblockall();
545 }
546 
547 static __inline void
548 swap_mags(magazine_pair *mp)
549 {
550 	struct magazine *tmp;
551 	tmp = mp->loaded;
552 	mp->loaded = mp->prev;
553 	mp->prev = tmp;
554 }
555 
556 /*
557  * bigalloc hashing and locking support.
558  *
559  * Return an unmasked hash code for the passed pointer.
560  */
561 static __inline int
562 _bigalloc_hash(void *ptr)
563 {
564 	int hv;
565 
566 	hv = ((int)(intptr_t)ptr >> PAGE_SHIFT) ^
567 	      ((int)(intptr_t)ptr >> (PAGE_SHIFT + BIGHSHIFT));
568 
569 	return(hv);
570 }
571 
572 /*
573  * Lock the hash chain and return a pointer to its base for the specified
574  * address.
575  */
576 static __inline bigalloc_t *
577 bigalloc_lock(void *ptr)
578 {
579 	int hv = _bigalloc_hash(ptr);
580 	bigalloc_t *bigp;
581 
582 	bigp = &bigalloc_array[hv & BIGHMASK];
583 	if (__isthreaded)
584 		_SPINLOCK(&bigspin_array[hv & BIGXMASK]);
585 	return(bigp);
586 }
587 
588 /*
589  * Lock the hash chain and return a pointer to its base for the specified
590  * address.
591  *
592  * BUT, if the hash chain is empty, just return NULL and do not bother
593  * to lock anything.
594  */
595 static __inline bigalloc_t *
596 bigalloc_check_and_lock(void *ptr)
597 {
598 	int hv = _bigalloc_hash(ptr);
599 	bigalloc_t *bigp;
600 
601 	bigp = &bigalloc_array[hv & BIGHMASK];
602 	if (*bigp == NULL)
603 		return(NULL);
604 	if (__isthreaded) {
605 		_SPINLOCK(&bigspin_array[hv & BIGXMASK]);
606 	}
607 	return(bigp);
608 }
609 
610 static __inline void
611 bigalloc_unlock(void *ptr)
612 {
613 	int hv;
614 
615 	if (__isthreaded) {
616 		hv = _bigalloc_hash(ptr);
617 		_SPINUNLOCK(&bigspin_array[hv & BIGXMASK]);
618 	}
619 }
620 
621 /*
622  * Find a bigcache entry that might work for the allocation.  SMP races are
623  * ok here except for the swap (that is, it is ok if bigcache_size_array[i]
624  * is wrong or if a NULL or too-small big is returned).
625  *
626  * Generally speaking it is ok to find a large entry even if the bytes
627  * requested are relatively small (but still oversized), because we really
628  * don't know *what* the application is going to do with the buffer.
629  */
630 static __inline
631 bigalloc_t
632 bigcache_find_alloc(size_t bytes)
633 {
634 	bigalloc_t big = NULL;
635 	size_t test;
636 	int i;
637 
638 	for (i = 0; i < BIGCACHE; ++i) {
639 		test = bigcache_size_array[i];
640 		if (bytes <= test) {
641 			bigcache_size_array[i] = 0;
642 			big = atomic_swap_ptr(&bigcache_array[i], NULL);
643 			break;
644 		}
645 	}
646 	return big;
647 }
648 
649 /*
650  * Free a bigcache entry, possibly returning one that the caller really must
651  * free.  This is used to cache recent oversized memory blocks.  Only
652  * big blocks smaller than BIGCACHE_LIMIT will be cached this way, so try
653  * to collect the biggest ones we can that are under the limit.
654  */
655 static __inline
656 bigalloc_t
657 bigcache_find_free(bigalloc_t big)
658 {
659 	int i;
660 	int j;
661 	int b;
662 
663 	b = ++bigcache_index;
664 	for (i = 0; i < BIGCACHE; ++i) {
665 		j = (b + i) & BIGCACHE_MASK;
666 		if (bigcache_size_array[j] < big->bytes) {
667 			bigcache_size_array[j] = big->bytes;
668 			big = atomic_swap_ptr(&bigcache_array[j], big);
669 			break;
670 		}
671 	}
672 	return big;
673 }
674 
675 static __inline
676 void
677 handle_excess_big(void)
678 {
679 	int i;
680 	bigalloc_t big;
681 	bigalloc_t *bigp;
682 
683 	if (excess_alloc <= BIGCACHE_EXCESS)
684 		return;
685 
686 	for (i = 0; i < BIGHSIZE; ++i) {
687 		bigp = &bigalloc_array[i];
688 		if (*bigp == NULL)
689 			continue;
690 		if (__isthreaded)
691 			_SPINLOCK(&bigspin_array[i & BIGXMASK]);
692 		for (big = *bigp; big; big = big->next) {
693 			if (big->active < big->bytes) {
694 				MASSERT_WTHUNLK((big->active & PAGE_MASK) == 0,
695 				    _SPINUNLOCK(&bigspin_array[i & BIGXMASK]));
696 				MASSERT_WTHUNLK((big->bytes & PAGE_MASK) == 0,
697 				    _SPINUNLOCK(&bigspin_array[i & BIGXMASK]));
698 				munmap((char *)big->base + big->active,
699 				       big->bytes - big->active);
700 				atomic_add_long(&excess_alloc,
701 						big->active - big->bytes);
702 				big->bytes = big->active;
703 			}
704 		}
705 		if (__isthreaded)
706 			_SPINUNLOCK(&bigspin_array[i & BIGXMASK]);
707 	}
708 }
709 
710 /*
711  * Calculate the zone index for the allocation request size and set the
712  * allocation request size to that particular zone's chunk size.
713  */
714 static __inline int
715 zoneindex(size_t *bytes, size_t *chunking)
716 {
717 	size_t n = (unsigned int)*bytes;	/* unsigned for shift opt */
718 
719 	/*
720 	 * This used to be 8-byte chunks and 16 zones for n < 128.
721 	 * However some instructions may require 16-byte alignment
722 	 * (aka SIMD) and programs might not request an aligned size
723 	 * (aka GCC-7), so change this as follows:
724 	 *
725 	 * 0-15 bytes	8-byte alignment in two zones	(0-1)
726 	 * 16-127 bytes	16-byte alignment in four zones	(3-10)
727 	 * zone index 2 and 11-15 are currently unused.
728 	 */
729 	if (n < 16) {
730 		*bytes = n = (n + 7) & ~7;
731 		*chunking = 8;
732 		return(n / 8 - 1);		/* 8 byte chunks, 2 zones */
733 		/* zones 0,1, zone 2 is unused */
734 	}
735 	if (n < 128) {
736 		*bytes = n = (n + 15) & ~15;
737 		*chunking = 16;
738 		return(n / 16 + 2);		/* 16 byte chunks, 8 zones */
739 		/* zones 3-10, zones 11-15 unused */
740 	}
741 	if (n < 256) {
742 		*bytes = n = (n + 15) & ~15;
743 		*chunking = 16;
744 		return(n / 16 + 7);
745 	}
746 	if (n < 8192) {
747 		if (n < 512) {
748 			*bytes = n = (n + 31) & ~31;
749 			*chunking = 32;
750 			return(n / 32 + 15);
751 		}
752 		if (n < 1024) {
753 			*bytes = n = (n + 63) & ~63;
754 			*chunking = 64;
755 			return(n / 64 + 23);
756 		}
757 		if (n < 2048) {
758 			*bytes = n = (n + 127) & ~127;
759 			*chunking = 128;
760 			return(n / 128 + 31);
761 		}
762 		if (n < 4096) {
763 			*bytes = n = (n + 255) & ~255;
764 			*chunking = 256;
765 			return(n / 256 + 39);
766 		}
767 		*bytes = n = (n + 511) & ~511;
768 		*chunking = 512;
769 		return(n / 512 + 47);
770 	}
771 #if ZALLOC_ZONE_LIMIT > 8192
772 	if (n < 16384) {
773 		*bytes = n = (n + 1023) & ~1023;
774 		*chunking = 1024;
775 		return(n / 1024 + 55);
776 	}
777 #endif
778 #if ZALLOC_ZONE_LIMIT > 16384
779 	if (n < 32768) {
780 		*bytes = n = (n + 2047) & ~2047;
781 		*chunking = 2048;
782 		return(n / 2048 + 63);
783 	}
784 #endif
785 	_mpanic("Unexpected byte count %zu", n);
786 	return(0);
787 }
788 
789 /*
790  * We want large magazines for small allocations
791  */
792 static __inline int
793 zonecapacity(int zi)
794 {
795 	int cap;
796 
797 	cap = (NZONES - zi) * (M_MAX_ROUNDS - M_MIN_ROUNDS) / NZONES +
798 	      M_MIN_ROUNDS;
799 
800 	return cap;
801 }
802 
803 /*
804  * malloc() - call internal slab allocator
805  */
806 void *
807 __malloc(size_t size)
808 {
809 	void *ptr;
810 
811 	nmalloc_sigblockall();
812 	ptr = _slaballoc(size, 0);
813 	if (ptr == NULL)
814 		errno = ENOMEM;
815 	else
816 		UTRACE(0, size, ptr);
817 	nmalloc_sigunblockall();
818 
819 	return(ptr);
820 }
821 
822 #define MUL_NO_OVERFLOW	(1UL << (sizeof(size_t) * 4))
823 
824 /*
825  * calloc() - call internal slab allocator
826  */
827 void *
828 __calloc(size_t number, size_t size)
829 {
830 	void *ptr;
831 
832 	if ((number >= MUL_NO_OVERFLOW || size >= MUL_NO_OVERFLOW) &&
833 	     number > 0 && SIZE_MAX / number < size) {
834 		errno = ENOMEM;
835 		return(NULL);
836 	}
837 
838 	nmalloc_sigblockall();
839 	ptr = _slaballoc(number * size, SAFLAG_ZERO);
840 	if (ptr == NULL)
841 		errno = ENOMEM;
842 	else
843 		UTRACE(0, number * size, ptr);
844 	nmalloc_sigunblockall();
845 
846 	return(ptr);
847 }
848 
849 /*
850  * realloc() (SLAB ALLOCATOR)
851  *
852  * We do not attempt to optimize this routine beyond reusing the same
853  * pointer if the new size fits within the chunking of the old pointer's
854  * zone.
855  */
856 void *
857 __realloc(void *ptr, size_t size)
858 {
859 	void *ret;
860 
861 	nmalloc_sigblockall();
862 	ret = _slabrealloc(ptr, size);
863 	if (ret == NULL)
864 		errno = ENOMEM;
865 	else
866 		UTRACE(ptr, size, ret);
867 	nmalloc_sigunblockall();
868 
869 	return(ret);
870 }
871 
872 /*
873  * aligned_alloc()
874  *
875  * Allocate (size) bytes with a alignment of (alignment).
876  */
877 void *
878 __aligned_alloc(size_t alignment, size_t size)
879 {
880 	void *ptr;
881 	int rc;
882 
883 	nmalloc_sigblockall();
884 	ptr = NULL;
885 	rc = _slabmemalign(&ptr, alignment, size);
886 	if (rc)
887 		errno = rc;
888 	nmalloc_sigunblockall();
889 
890 	return (ptr);
891 }
892 
893 /*
894  * posix_memalign()
895  *
896  * Allocate (size) bytes with a alignment of (alignment), where (alignment)
897  * is a power of 2 >= sizeof(void *).
898  */
899 int
900 __posix_memalign(void **memptr, size_t alignment, size_t size)
901 {
902 	int rc;
903 
904 	/*
905 	 * OpenGroup spec issue 6 check
906 	 */
907 	if (alignment < sizeof(void *)) {
908 		*memptr = NULL;
909 		return(EINVAL);
910 	}
911 
912 	nmalloc_sigblockall();
913 	rc = _slabmemalign(memptr, alignment, size);
914 	nmalloc_sigunblockall();
915 
916 	return (rc);
917 }
918 
919 /*
920  * The slab allocator will allocate on power-of-2 boundaries up to
921  * at least PAGE_SIZE.  We use the zoneindex mechanic to find a
922  * zone matching the requirements, and _vmem_alloc() otherwise.
923  */
924 static int
925 _slabmemalign(void **memptr, size_t alignment, size_t size)
926 {
927 	bigalloc_t *bigp;
928 	bigalloc_t big;
929 	size_t chunking;
930 	int zi __unused;
931 
932 	if (alignment < 1) {
933 		*memptr = NULL;
934 		return(EINVAL);
935 	}
936 
937 	/*
938 	 * OpenGroup spec issue 6 checks
939 	 */
940 	if ((alignment | (alignment - 1)) + 1 != (alignment << 1)) {
941 		*memptr = NULL;
942 		return(EINVAL);
943 	}
944 
945 	/*
946 	 * Our zone mechanism guarantees same-sized alignment for any
947 	 * power-of-2 allocation.  If size is a power-of-2 and reasonable
948 	 * we can just call _slaballoc() and be done.  We round size up
949 	 * to the nearest alignment boundary to improve our odds of
950 	 * it becoming a power-of-2 if it wasn't before.
951 	 */
952 	if (size <= alignment)
953 		size = alignment;
954 	else
955 		size = (size + alignment - 1) & ~(size_t)(alignment - 1);
956 
957 	/*
958 	 * If we have overflowed above when rounding to the nearest alignment
959 	 * boundary, just return ENOMEM, size should be == N * sizeof(void *).
960 	 *
961 	 * Power-of-2 allocations up to 8KB will be aligned to the allocation
962 	 * size and _slaballoc() can simply be used.  Please see line 1082
963 	 * for this special case: 'Align the storage in the zone based on
964 	 * the chunking' has a special case for powers of 2.
965 	 */
966 	if (size == 0)
967 		return(ENOMEM);
968 
969 	if (size <= MAX_SLAB_PAGEALIGN &&
970 	    (size | (size - 1)) + 1 == (size << 1)) {
971 		*memptr = _slaballoc(size, 0);
972 		return(*memptr ? 0 : ENOMEM);
973 	}
974 
975 	/*
976 	 * Otherwise locate a zone with a chunking that matches
977 	 * the requested alignment, within reason.   Consider two cases:
978 	 *
979 	 * (1) A 1K allocation on a 32-byte alignment.  The first zoneindex
980 	 *     we find will be the best fit because the chunking will be
981 	 *     greater or equal to the alignment.
982 	 *
983 	 * (2) A 513 allocation on a 256-byte alignment.  In this case
984 	 *     the first zoneindex we find will be for 576 byte allocations
985 	 *     with a chunking of 64, which is not sufficient.  To fix this
986 	 *     we simply find the nearest power-of-2 >= size and use the
987 	 *     same side-effect of _slaballoc() which guarantees
988 	 *     same-alignment on a power-of-2 allocation.
989 	 */
990 	if (size < PAGE_SIZE) {
991 		zi = zoneindex(&size, &chunking);
992 		if (chunking >= alignment) {
993 			*memptr = _slaballoc(size, 0);
994 			return(*memptr ? 0 : ENOMEM);
995 		}
996 		if (size >= 1024)
997 			alignment = 1024;
998 		if (size >= 16384)
999 			alignment = 16384;
1000 		while (alignment < size)
1001 			alignment <<= 1;
1002 		*memptr = _slaballoc(alignment, 0);
1003 		return(*memptr ? 0 : ENOMEM);
1004 	}
1005 
1006 	/*
1007 	 * If the slab allocator cannot handle it use vmem_alloc().
1008 	 *
1009 	 * Alignment must be adjusted up to at least PAGE_SIZE in this case.
1010 	 */
1011 	if (alignment < PAGE_SIZE)
1012 		alignment = PAGE_SIZE;
1013 	if (size < alignment)
1014 		size = alignment;
1015 	size = (size + PAGE_MASK) & ~(size_t)PAGE_MASK;
1016 	if (alignment == PAGE_SIZE && size <= BIGCACHE_LIMIT) {
1017 		big = bigcache_find_alloc(size);
1018 		if (big && big->bytes < size) {
1019 			_slabfree(big->base, FASTSLABREALLOC, &big);
1020 			big = NULL;
1021 		}
1022 		if (big) {
1023 			*memptr = big->base;
1024 			big->active = size;
1025 			if (big->active < big->bytes) {
1026 				atomic_add_long(&excess_alloc,
1027 						big->bytes - big->active);
1028 			}
1029 			bigp = bigalloc_lock(*memptr);
1030 			big->next = *bigp;
1031 			*bigp = big;
1032 			bigalloc_unlock(*memptr);
1033 			handle_excess_big();
1034 			return(0);
1035 		}
1036 	}
1037 	*memptr = _vmem_alloc(size, alignment, 0);
1038 	if (*memptr == NULL)
1039 		return(ENOMEM);
1040 
1041 	big = _slaballoc(sizeof(struct bigalloc), 0);
1042 	if (big == NULL) {
1043 		_vmem_free(*memptr, size);
1044 		*memptr = NULL;
1045 		return(ENOMEM);
1046 	}
1047 	bigp = bigalloc_lock(*memptr);
1048 	big->base = *memptr;
1049 	big->active = size;
1050 	big->bytes = size;		/* no excess */
1051 	big->next = *bigp;
1052 	*bigp = big;
1053 	bigalloc_unlock(*memptr);
1054 
1055 	return(0);
1056 }
1057 
1058 /*
1059  * free() (SLAB ALLOCATOR) - do the obvious
1060  */
1061 void
1062 __free(void *ptr)
1063 {
1064 	UTRACE(ptr, 0, 0);
1065 	nmalloc_sigblockall();
1066 	_slabfree(ptr, 0, NULL);
1067 	nmalloc_sigunblockall();
1068 }
1069 
1070 /*
1071  * _slaballoc()	(SLAB ALLOCATOR)
1072  *
1073  *	Allocate memory via the slab allocator.  If the request is too large,
1074  *	or if it page-aligned beyond a certain size, we fall back to the
1075  *	KMEM subsystem
1076  */
1077 static void *
1078 _slaballoc(size_t size, int flags)
1079 {
1080 	slzone_t z;
1081 	slchunk_t chunk;
1082 	slglobaldata_t slgd;
1083 	size_t chunking;
1084 	thr_mags *tp;
1085 	struct magazine *mp;
1086 	int count;
1087 	int zi;
1088 	int off;
1089 	void *obj;
1090 
1091 	/*
1092 	 * Handle the degenerate size == 0 case.  Yes, this does happen.
1093 	 * Return a special pointer.  This is to maintain compatibility with
1094 	 * the original malloc implementation.  Certain devices, such as the
1095 	 * adaptec driver, not only allocate 0 bytes, they check for NULL and
1096 	 * also realloc() later on.  Joy.
1097 	 */
1098 	if (size == 0)
1099 		size = 1;
1100 
1101 	/* Capture global flags */
1102 	flags |= g_malloc_flags;
1103 
1104 	/*
1105 	 * Handle large allocations directly, with a separate bigmem cache.
1106 	 *
1107 	 * The backend allocator is pretty nasty on a SMP system.   Use the
1108 	 * slab allocator for one and two page-sized chunks even though we
1109 	 * lose some efficiency.
1110 	 *
1111 	 * NOTE: Please see _slabmemalign(), which assumes that power-of-2
1112 	 *	 allocations up to an including MAX_SLAB_PAGEALIGN
1113 	 *	 can use _slaballoc() and be aligned to the same.  The
1114 	 *	 zone cache can be used for this case, bigalloc does not
1115 	 *	 have to be used.
1116 	 */
1117 	if (size >= ZoneLimit ||
1118 	    ((size & PAGE_MASK) == 0 && size > MAX_SLAB_PAGEALIGN)) {
1119 		bigalloc_t big;
1120 		bigalloc_t *bigp;
1121 
1122 		/*
1123 		 * Page-align and cache-color in case of virtually indexed
1124 		 * physically tagged L1 caches (aka SandyBridge).  No sweat
1125 		 * otherwise, so just do it.
1126 		 *
1127 		 * (don't count as excess).
1128 		 */
1129 		size = (size + PAGE_MASK) & ~(size_t)PAGE_MASK;
1130 
1131 		/*
1132 		 * If we have overflowed above when rounding to the page
1133 		 * boundary, something has passed us (size_t)[-PAGE_MASK..-1]
1134 		 * so just return NULL, size at this point should be >= 0.
1135 		 */
1136 		if (size == 0)
1137 			return (NULL);
1138 
1139 		/*
1140 		 * Force an additional page offset for 8KB-aligned requests
1141 		 * (i.e. 8KB, 16KB, etc) that helps spread data across the
1142 		 * CPU caches at the cost of some dead space in the memory
1143 		 * map.
1144 		 */
1145 		if ((size & (PAGE_SIZE * 2 - 1)) == 0)
1146 			size += PAGE_SIZE;
1147 
1148 		/*
1149 		 * Try to reuse a cached big block to avoid mmap'ing.  If it
1150 		 * turns out not to fit our requirements we throw it away
1151 		 * and allocate normally.
1152 		 */
1153 		big = NULL;
1154 		if (size <= BIGCACHE_LIMIT) {
1155 			big = bigcache_find_alloc(size);
1156 			if (big && big->bytes < size) {
1157 				_slabfree(big->base, FASTSLABREALLOC, &big);
1158 				big = NULL;
1159 			}
1160 		}
1161 		if (big) {
1162 			chunk = big->base;
1163 			if (flags & SAFLAG_ZERO)
1164 				bzero(chunk, size);
1165 		} else {
1166 			chunk = _vmem_alloc(size, PAGE_SIZE, flags);
1167 			if (chunk == NULL)
1168 				return(NULL);
1169 
1170 			big = _slaballoc(sizeof(struct bigalloc), 0);
1171 			if (big == NULL) {
1172 				_vmem_free(chunk, size);
1173 				return(NULL);
1174 			}
1175 			big->base = chunk;
1176 			big->bytes = size;
1177 		}
1178 		big->active = size;
1179 
1180 		bigp = bigalloc_lock(chunk);
1181 		if (big->active < big->bytes) {
1182 			atomic_add_long(&excess_alloc,
1183 					big->bytes - big->active);
1184 		}
1185 		big->next = *bigp;
1186 		*bigp = big;
1187 		bigalloc_unlock(chunk);
1188 		handle_excess_big();
1189 
1190 		return(chunk);
1191 	}
1192 
1193 	/* Compute allocation zone; zoneindex will panic on excessive sizes */
1194 	zi = zoneindex(&size, &chunking);
1195 	MASSERT(zi < NZONES);
1196 
1197 	obj = mtmagazine_alloc(zi, flags);
1198 	if (obj != NULL) {
1199 		if (flags & SAFLAG_ZERO)
1200 			bzero(obj, size);
1201 		return (obj);
1202 	}
1203 
1204 	/*
1205 	 * Attempt to allocate out of an existing global zone.  If all zones
1206 	 * are exhausted pull one off the free list or allocate a new one.
1207 	 */
1208 	slgd = &SLGlobalData;
1209 
1210 again:
1211 	if (slgd->ZoneAry[zi] == NULL) {
1212 		z = zone_alloc(flags);
1213 		if (z == NULL)
1214 			goto fail;
1215 
1216 		/*
1217 		 * How big is the base structure?
1218 		 */
1219 		off = sizeof(struct slzone);
1220 
1221 		/*
1222 		 * Align the storage in the zone based on the chunking.
1223 		 *
1224 		 * Guarantee power-of-2 alignment for power-of-2-sized
1225 		 * chunks.  Otherwise align based on the chunking size
1226 		 * (typically 8 or 16 bytes for small allocations).
1227 		 *
1228 		 * NOTE: Allocations >= ZoneLimit are governed by the
1229 		 * bigalloc code and typically only guarantee page-alignment.
1230 		 *
1231 		 * Set initial conditions for UIndex near the zone header
1232 		 * to reduce unecessary page faults, vs semi-randomization
1233 		 * to improve L1 cache saturation.
1234 		 *
1235 		 * NOTE: Please see _slabmemalign(), which assumes that
1236 		 *	 power-of-2 allocations up to an including
1237 		 *	 MAX_SLAB_PAGEALIGN can use _slaballoc()
1238 		 *	 and be aligned to the same.  The zone cache can be
1239 		 *	 used for this case, bigalloc does not have to be
1240 		 *	 used.
1241 		 *
1242 		 *	 ALL power-of-2 requests that fall through to this
1243 		 *	 code use this rule (conditionals above limit this
1244 		 *	 to <= MAX_SLAB_PAGEALIGN).
1245 		 */
1246 		if ((size | (size - 1)) + 1 == (size << 1))
1247 			off = roundup2(off, size);
1248 		else
1249 			off = roundup2(off, chunking);
1250 		z->z_Magic = ZALLOC_SLAB_MAGIC;
1251 		z->z_ZoneIndex = zi;
1252 		z->z_NMax = (ZoneSize - off) / size;
1253 		z->z_NFree = z->z_NMax;
1254 		z->z_BasePtr = (char *)z + off;
1255 		z->z_UIndex = z->z_UEndIndex = 0;
1256 		z->z_ChunkSize = size;
1257 		z->z_FirstFreePg = ZonePageCount;
1258 		if ((z->z_Flags & SLZF_UNOTZEROD) == 0) {
1259 			flags &= ~SAFLAG_ZERO;	/* already zero'd */
1260 			flags |= SAFLAG_PASSIVE;
1261 		}
1262 
1263 		/*
1264 		 * Slide the base index for initial allocations out of the
1265 		 * next zone we create so we do not over-weight the lower
1266 		 * part of the cpu memory caches.
1267 		 */
1268 		slgd_lock(slgd);
1269 		z->z_Next = slgd->ZoneAry[zi];
1270 		slgd->ZoneAry[zi] = z;
1271 	} else {
1272 		slgd_lock(slgd);
1273 		z = slgd->ZoneAry[zi];
1274 		if (z == NULL) {
1275 			slgd_unlock(slgd);
1276 			goto again;
1277 		}
1278 	}
1279 
1280 	/*
1281 	 * Ok, we have a zone from which at least one chunk is available.
1282 	 */
1283 	MASSERT_WTHUNLK(z->z_NFree > 0, slgd_unlock(slgd));
1284 
1285 	/*
1286 	 * Try to cache <count> chunks, up to CACHE_CHUNKS (32 typ)
1287 	 * to avoid unnecessary global lock contention.
1288 	 */
1289 	tp = &thread_mags;
1290 	mp = tp->mags[zi].loaded;
1291 	count = 0;
1292 	if (mp && tp->init >= 0) {
1293 		count = mp->capacity - mp->rounds;
1294 		if (count >= z->z_NFree)
1295 			count = z->z_NFree - 1;
1296 		if (count > CACHE_CHUNKS)
1297 			count = CACHE_CHUNKS;
1298 	}
1299 
1300 	/*
1301 	 * Locate a chunk in a free page.  This attempts to localize
1302 	 * reallocations into earlier pages without us having to sort
1303 	 * the chunk list.  A chunk may still overlap a page boundary.
1304 	 */
1305 	while (z->z_FirstFreePg < ZonePageCount) {
1306 		if ((chunk = z->z_PageAry[z->z_FirstFreePg]) != NULL) {
1307 			if (((uintptr_t)chunk & ZoneMask) == 0) {
1308 				slgd_unlock(slgd);
1309 				_mpanic("assertion: corrupt malloc zone");
1310 			}
1311 			z->z_PageAry[z->z_FirstFreePg] = chunk->c_Next;
1312 			--z->z_NFree;
1313 
1314 			if (count == 0)
1315 				goto done;
1316 			mp->objects[mp->rounds++] = chunk;
1317 			--count;
1318 			continue;
1319 		}
1320 		++z->z_FirstFreePg;
1321 	}
1322 
1323 	/*
1324 	 * No chunks are available but NFree said we had some memory,
1325 	 * so it must be available in the never-before-used-memory
1326 	 * area governed by UIndex.  The consequences are very
1327 	 * serious if our zone got corrupted so we use an explicit
1328 	 * panic rather then a KASSERT.
1329 	 */
1330 	for (;;) {
1331 		chunk = (slchunk_t)(z->z_BasePtr + z->z_UIndex * size);
1332 		--z->z_NFree;
1333 		if (++z->z_UIndex == z->z_NMax)
1334 			z->z_UIndex = 0;
1335 		if (z->z_UIndex == z->z_UEndIndex) {
1336 			if (z->z_NFree != 0) {
1337 				slgd_unlock(slgd);
1338 				_mpanic("slaballoc: corrupted zone");
1339 			}
1340 		}
1341 		if (count == 0)
1342 			break;
1343 		mp->objects[mp->rounds++] = chunk;
1344 		--count;
1345 	}
1346 
1347 	if ((z->z_Flags & SLZF_UNOTZEROD) == 0) {
1348 		flags &= ~SAFLAG_ZERO;
1349 		flags |= SAFLAG_PASSIVE;
1350 	}
1351 
1352 done:
1353 	/*
1354 	 * Remove us from the ZoneAry[] when we become empty
1355 	 */
1356 	if (z->z_NFree == 0) {
1357 		slgd->ZoneAry[zi] = z->z_Next;
1358 		z->z_Next = NULL;
1359 	}
1360 	slgd_unlock(slgd);
1361 	if (flags & SAFLAG_ZERO)
1362 		bzero(chunk, size);
1363 
1364 	return(chunk);
1365 fail:
1366 	return(NULL);
1367 }
1368 
1369 /*
1370  * Reallocate memory within the chunk
1371  */
1372 static void *
1373 _slabrealloc(void *ptr, size_t size)
1374 {
1375 	bigalloc_t *bigp;
1376 	void *nptr;
1377 	slzone_t z;
1378 	size_t chunking;
1379 
1380 	if (ptr == NULL) {
1381 		return(_slaballoc(size, 0));
1382 	}
1383 
1384 	if (size == 0)
1385 		size = 1;
1386 
1387 	/*
1388 	 * Handle oversized allocations.
1389 	 */
1390 	if ((bigp = bigalloc_check_and_lock(ptr)) != NULL) {
1391 		bigalloc_t big;
1392 		size_t bigbytes;
1393 
1394 		while ((big = *bigp) != NULL) {
1395 			if (big->base == ptr) {
1396 				size = (size + PAGE_MASK) & ~(size_t)PAGE_MASK;
1397 				bigbytes = big->bytes;
1398 
1399 				/*
1400 				 * If it already fits determine if it makes
1401 				 * sense to shrink/reallocate.  Try to optimize
1402 				 * programs which stupidly make incremental
1403 				 * reallocations larger or smaller by scaling
1404 				 * the allocation.  Also deal with potential
1405 				 * coloring.
1406 				 */
1407 				if (size >= (bigbytes >> 1) &&
1408 				    size <= bigbytes) {
1409 					if (big->active != size) {
1410 						atomic_add_long(&excess_alloc,
1411 								big->active -
1412 								size);
1413 					}
1414 					big->active = size;
1415 					bigalloc_unlock(ptr);
1416 					return(ptr);
1417 				}
1418 
1419 				/*
1420 				 * For large reallocations, allocate more space
1421 				 * than we need to try to avoid excessive
1422 				 * reallocations later on.
1423 				 */
1424 				chunking = size + (size >> 3);
1425 				chunking = (chunking + PAGE_MASK) &
1426 					   ~(size_t)PAGE_MASK;
1427 
1428 				/*
1429 				 * Try to allocate adjacently in case the
1430 				 * program is idiotically realloc()ing a
1431 				 * huge memory block just slightly bigger.
1432 				 * (llvm's llc tends to do this a lot).
1433 				 *
1434 				 * (MAP_TRYFIXED forces mmap to fail if there
1435 				 *  is already something at the address).
1436 				 */
1437 				if (chunking > bigbytes) {
1438 					char *addr;
1439 					int errno_save = errno;
1440 
1441 					addr = mmap((char *)ptr + bigbytes,
1442 						    chunking - bigbytes,
1443 						    PROT_READ|PROT_WRITE,
1444 						    MAP_PRIVATE|MAP_ANON|
1445 						    MAP_TRYFIXED,
1446 						    -1, 0);
1447 					errno = errno_save;
1448 					if (addr == (char *)ptr + bigbytes) {
1449 						atomic_add_long(&excess_alloc,
1450 								big->active -
1451 								big->bytes +
1452 								chunking -
1453 								size);
1454 						big->bytes = chunking;
1455 						big->active = size;
1456 						bigalloc_unlock(ptr);
1457 
1458 						return(ptr);
1459 					}
1460 					MASSERT_WTHUNLK(
1461 						(void *)addr == MAP_FAILED,
1462 						bigalloc_unlock(ptr));
1463 				}
1464 
1465 				/*
1466 				 * Failed, unlink big and allocate fresh.
1467 				 * (note that we have to leave (big) intact
1468 				 * in case the slaballoc fails).
1469 				 */
1470 				*bigp = big->next;
1471 				bigalloc_unlock(ptr);
1472 				if ((nptr = _slaballoc(size, 0)) == NULL) {
1473 					/* Relink block */
1474 					bigp = bigalloc_lock(ptr);
1475 					big->next = *bigp;
1476 					*bigp = big;
1477 					bigalloc_unlock(ptr);
1478 					return(NULL);
1479 				}
1480 				if (size > bigbytes)
1481 					size = bigbytes;
1482 				bcopy(ptr, nptr, size);
1483 				atomic_add_long(&excess_alloc, big->active -
1484 							       big->bytes);
1485 				_slabfree(ptr, FASTSLABREALLOC, &big);
1486 
1487 				return(nptr);
1488 			}
1489 			bigp = &big->next;
1490 		}
1491 		bigalloc_unlock(ptr);
1492 		handle_excess_big();
1493 	}
1494 
1495 	/*
1496 	 * Get the original allocation's zone.  If the new request winds
1497 	 * up using the same chunk size we do not have to do anything.
1498 	 *
1499 	 * NOTE: We don't have to lock the globaldata here, the fields we
1500 	 * access here will not change at least as long as we have control
1501 	 * over the allocation.
1502 	 */
1503 	z = (slzone_t)((uintptr_t)ptr & ~(uintptr_t)ZoneMask);
1504 	MASSERT(z->z_Magic == ZALLOC_SLAB_MAGIC);
1505 
1506 	/*
1507 	 * Use zoneindex() to chunk-align the new size, as long as the
1508 	 * new size is not too large.
1509 	 */
1510 	if (size < ZoneLimit) {
1511 		zoneindex(&size, &chunking);
1512 		if (z->z_ChunkSize == size) {
1513 			return(ptr);
1514 		}
1515 	}
1516 
1517 	/*
1518 	 * Allocate memory for the new request size and copy as appropriate.
1519 	 */
1520 	if ((nptr = _slaballoc(size, 0)) != NULL) {
1521 		if (size > z->z_ChunkSize)
1522 			size = z->z_ChunkSize;
1523 		bcopy(ptr, nptr, size);
1524 		_slabfree(ptr, 0, NULL);
1525 	}
1526 
1527 	return(nptr);
1528 }
1529 
1530 /*
1531  * free (SLAB ALLOCATOR)
1532  *
1533  * Free a memory block previously allocated by malloc.  Note that we do not
1534  * attempt to uplodate ks_loosememuse as MP races could prevent us from
1535  * checking memory limits in malloc.
1536  *
1537  * flags:
1538  *	FASTSLABREALLOC		Fast call from realloc, *rbigp already
1539  *				unlinked.
1540  *
1541  * MPSAFE
1542  */
1543 static void
1544 _slabfree(void *ptr, int flags, bigalloc_t *rbigp)
1545 {
1546 	slzone_t z;
1547 	slchunk_t chunk;
1548 	bigalloc_t big;
1549 	bigalloc_t *bigp;
1550 	slglobaldata_t slgd;
1551 	size_t size;
1552 	int zi;
1553 	int pgno;
1554 
1555 	/* Fast realloc path for big allocations */
1556 	if (flags & FASTSLABREALLOC) {
1557 		big = *rbigp;
1558 		goto fastslabrealloc;
1559 	}
1560 
1561 	/*
1562 	 * Handle NULL frees and special 0-byte allocations
1563 	 */
1564 	if (ptr == NULL)
1565 		return;
1566 
1567 	/*
1568 	 * Handle oversized allocations.
1569 	 */
1570 	if ((bigp = bigalloc_check_and_lock(ptr)) != NULL) {
1571 		while ((big = *bigp) != NULL) {
1572 			if (big->base == ptr) {
1573 				*bigp = big->next;
1574 				atomic_add_long(&excess_alloc, big->active -
1575 							       big->bytes);
1576 				bigalloc_unlock(ptr);
1577 
1578 				/*
1579 				 * Try to stash the block we are freeing,
1580 				 * potentially receiving another block in
1581 				 * return which must be freed.
1582 				 */
1583 fastslabrealloc:
1584 				if (big->bytes <= BIGCACHE_LIMIT) {
1585 					big = bigcache_find_free(big);
1586 					if (big == NULL)
1587 						return;
1588 				}
1589 				ptr = big->base;	/* reload */
1590 				size = big->bytes;
1591 				_slabfree(big, 0, NULL);
1592 				_vmem_free(ptr, size);
1593 				return;
1594 			}
1595 			bigp = &big->next;
1596 		}
1597 		bigalloc_unlock(ptr);
1598 		handle_excess_big();
1599 	}
1600 
1601 	/*
1602 	 * Zone case.  Figure out the zone based on the fact that it is
1603 	 * ZoneSize aligned.
1604 	 */
1605 	z = (slzone_t)((uintptr_t)ptr & ~(uintptr_t)ZoneMask);
1606 	MASSERT(z->z_Magic == ZALLOC_SLAB_MAGIC);
1607 
1608 	size = z->z_ChunkSize;
1609 	zi = z->z_ZoneIndex;
1610 
1611 	if (g_malloc_flags & SAFLAG_ZERO)
1612 		bzero(ptr, size);
1613 
1614 	if (mtmagazine_free(zi, ptr) == 0)
1615 		return;
1616 
1617 	pgno = ((char *)ptr - (char *)z) >> PAGE_SHIFT;
1618 	chunk = ptr;
1619 
1620 	/*
1621 	 * Add this free non-zero'd chunk to a linked list for reuse, adjust
1622 	 * z_FirstFreePg.
1623 	 */
1624 	slgd = &SLGlobalData;
1625 	slgd_lock(slgd);
1626 
1627 	chunk->c_Next = z->z_PageAry[pgno];
1628 	z->z_PageAry[pgno] = chunk;
1629 	if (z->z_FirstFreePg > pgno)
1630 		z->z_FirstFreePg = pgno;
1631 
1632 	/*
1633 	 * Bump the number of free chunks.  If it becomes non-zero the zone
1634 	 * must be added back onto the appropriate list.
1635 	 */
1636 	if (z->z_NFree++ == 0) {
1637 		z->z_Next = slgd->ZoneAry[z->z_ZoneIndex];
1638 		slgd->ZoneAry[z->z_ZoneIndex] = z;
1639 	}
1640 
1641 	/*
1642 	 * If the zone becomes totally free we get rid of it.
1643 	 */
1644 	if (z->z_NFree == z->z_NMax) {
1645 		slzone_t *pz;
1646 
1647 		pz = &slgd->ZoneAry[z->z_ZoneIndex];
1648 		while (z != *pz)
1649 			pz = &(*pz)->z_Next;
1650 		*pz = z->z_Next;
1651 		z->z_Magic = -1;
1652 		z->z_Next = NULL;
1653 		slgd_unlock(slgd);
1654 		zone_free(z);
1655 	} else {
1656 		slgd_unlock(slgd);
1657 	}
1658 }
1659 
1660 /*
1661  * Allocate and return a magazine.  Return NULL if no magazines are
1662  * available.
1663  */
1664 static __inline void *
1665 magazine_alloc(struct magazine *mp)
1666 {
1667 	void *obj;
1668 
1669 	if (mp && MAGAZINE_NOTEMPTY(mp)) {
1670 		obj = mp->objects[--mp->rounds];
1671 	} else {
1672 		obj = NULL;
1673 	}
1674 	return (obj);
1675 }
1676 
1677 static __inline int
1678 magazine_free(struct magazine *mp, void *p)
1679 {
1680 	if (mp != NULL && MAGAZINE_NOTFULL(mp)) {
1681 		mp->objects[mp->rounds++] = p;
1682 		return 0;
1683 	}
1684 
1685 	return -1;
1686 }
1687 
1688 static void *
1689 mtmagazine_alloc(int zi, int flags)
1690 {
1691 	thr_mags *tp;
1692 	struct magazine *mp, *emptymag;
1693 	magazine_depot *d;
1694 	void *obj;
1695 
1696 	/*
1697 	 * Do not try to access per-thread magazines while the mtmagazine
1698 	 * is being initialized or destroyed.
1699 	 */
1700 	tp = &thread_mags;
1701 	if (tp->init < 0)
1702 		return(NULL);
1703 
1704 	/*
1705 	 * Primary per-thread allocation loop
1706 	 */
1707 	nmalloc_sigblockall();
1708 	for (;;) {
1709 		/*
1710 		 * Make sure we have a magazine available for use.
1711 		 */
1712 		if (tp->newmag == NULL && (flags & SAFLAG_MAGS) == 0) {
1713 			mp = _slaballoc(sizeof(struct magazine),
1714 					SAFLAG_ZERO | SAFLAG_MAGS);
1715 			if (mp == NULL) {
1716 				obj = NULL;
1717 				break;
1718 			}
1719 			if (tp->newmag) {
1720 				_slabfree(mp, 0, NULL);
1721 			} else {
1722 				tp->newmag = mp;
1723 			}
1724 		}
1725 
1726 		/*
1727 		 * If the loaded magazine has rounds, allocate and return
1728 		 */
1729 		mp = tp->mags[zi].loaded;
1730 		obj = magazine_alloc(mp);
1731 		if (obj)
1732 			break;
1733 
1734 		/*
1735 		 * The prev magazine can only be completely empty or completely
1736 		 * full.  If it is full, swap it with the loaded magazine
1737 		 * and retry.
1738 		 */
1739 		mp = tp->mags[zi].prev;
1740 		if (mp && MAGAZINE_FULL(mp)) {
1741 			MASSERT(mp->rounds != 0);
1742 			swap_mags(&tp->mags[zi]);	/* prev now empty */
1743 			continue;
1744 		}
1745 
1746 		/*
1747 		 * If the depot has no loaded magazines ensure that tp->loaded
1748 		 * is not NULL and return NULL.  This will allow _slaballoc()
1749 		 * to cache referals to SLGlobalData in a magazine.
1750 		 */
1751 		d = &depots[zi];
1752 		if (SLIST_EMPTY(&d->full)) {	/* UNLOCKED TEST IS SAFE */
1753 			mp = tp->mags[zi].loaded;
1754 			if (mp == NULL && tp->newmag) {
1755 				mp = tp->newmag;
1756 				tp->newmag = NULL;
1757 				mp->capacity = zonecapacity(zi);
1758 				mp->rounds = 0;
1759 				mp->flags = 0;
1760 				tp->mags[zi].loaded = mp;
1761 			}
1762 			break;
1763 		}
1764 
1765 		/*
1766 		 * Cycle: depot(loaded) -> loaded -> prev -> depot(empty)
1767 		 *
1768 		 * If we race and the depot has no full magazines, retry.
1769 		 */
1770 		depot_lock(d);
1771 		mp = SLIST_FIRST(&d->full);
1772 		if (mp) {
1773 			SLIST_REMOVE_HEAD(&d->full, nextmagazine);
1774 			emptymag = tp->mags[zi].prev;
1775 			if (emptymag) {
1776 				SLIST_INSERT_HEAD(&d->empty, emptymag,
1777 						  nextmagazine);
1778 			}
1779 			tp->mags[zi].prev = tp->mags[zi].loaded;
1780 			tp->mags[zi].loaded = mp;
1781 			MASSERT(MAGAZINE_NOTEMPTY(mp));
1782 		}
1783 		depot_unlock(d);
1784 		continue;
1785 	}
1786 	nmalloc_sigunblockall();
1787 
1788 	return (obj);
1789 }
1790 
1791 static int
1792 mtmagazine_free(int zi, void *ptr)
1793 {
1794 	thr_mags *tp;
1795 	struct magazine *mp, *loadedmag;
1796 	magazine_depot *d;
1797 	int rc = -1;
1798 
1799 	/*
1800 	 * Do not try to access per-thread magazines while the mtmagazine
1801 	 * is being initialized or destroyed.
1802 	 */
1803 	tp = &thread_mags;
1804 	if (tp->init < 0)
1805 		return(-1);
1806 
1807 	/*
1808 	 * Primary per-thread freeing loop
1809 	 */
1810 	nmalloc_sigblockall();
1811 	for (;;) {
1812 		/*
1813 		 * Make sure a new magazine is available in case we have
1814 		 * to use it.  Staging the newmag allows us to avoid
1815 		 * some locking/reentrancy complexity.
1816 		 *
1817 		 * Temporarily disable the per-thread caches for this
1818 		 * allocation to avoid reentrancy and/or to avoid a
1819 		 * stack overflow if the [zi] happens to be the same that
1820 		 * would be used to allocate the new magazine.
1821 		 */
1822 		if (tp->newmag == NULL) {
1823 			tp->newmag = _slaballoc(sizeof(struct magazine),
1824 						SAFLAG_ZERO);
1825 			if (tp->newmag == NULL) {
1826 				rc = -1;
1827 				break;
1828 			}
1829 		}
1830 
1831 		/*
1832 		 * If the loaded magazine has space, free directly to it
1833 		 */
1834 		rc = magazine_free(tp->mags[zi].loaded, ptr);
1835 		if (rc == 0)
1836 			break;
1837 
1838 		/*
1839 		 * The prev magazine can only be completely empty or completely
1840 		 * full.  If it is empty, swap it with the loaded magazine
1841 		 * and retry.
1842 		 */
1843 		mp = tp->mags[zi].prev;
1844 		if (mp && MAGAZINE_EMPTY(mp)) {
1845 			MASSERT(mp->rounds == 0);
1846 			swap_mags(&tp->mags[zi]);	/* prev now full */
1847 			continue;
1848 		}
1849 
1850 		/*
1851 		 * Try to get an empty magazine from the depot.  Cycle
1852 		 * through depot(empty)->loaded->prev->depot(full).
1853 		 * Retry if an empty magazine was available from the depot.
1854 		 */
1855 		d = &depots[zi];
1856 		depot_lock(d);
1857 
1858 		if ((loadedmag = tp->mags[zi].prev) != NULL)
1859 			SLIST_INSERT_HEAD(&d->full, loadedmag, nextmagazine);
1860 		tp->mags[zi].prev = tp->mags[zi].loaded;
1861 		mp = SLIST_FIRST(&d->empty);
1862 		if (mp) {
1863 			tp->mags[zi].loaded = mp;
1864 			SLIST_REMOVE_HEAD(&d->empty, nextmagazine);
1865 			depot_unlock(d);
1866 			MASSERT(MAGAZINE_NOTFULL(mp));
1867 		} else {
1868 			mp = tp->newmag;
1869 			tp->newmag = NULL;
1870 			mp->capacity = zonecapacity(zi);
1871 			mp->rounds = 0;
1872 			mp->flags = 0;
1873 			tp->mags[zi].loaded = mp;
1874 			depot_unlock(d);
1875 		}
1876 	}
1877 	nmalloc_sigunblockall();
1878 
1879 	return rc;
1880 }
1881 
1882 static void
1883 mtmagazine_init(void)
1884 {
1885 	int error;
1886 
1887 	error = _pthread_key_create(&thread_mags_key, mtmagazine_destructor);
1888 	if (error)
1889 		abort();
1890 }
1891 
1892 /*
1893  * This function is only used by the thread exit destructor
1894  */
1895 static void
1896 mtmagazine_drain(struct magazine *mp)
1897 {
1898 	void *obj;
1899 
1900 	while (MAGAZINE_NOTEMPTY(mp)) {
1901 		obj = magazine_alloc(mp);
1902 		_slabfree(obj, 0, NULL);
1903 	}
1904 }
1905 
1906 /*
1907  * mtmagazine_destructor()
1908  *
1909  * When a thread exits, we reclaim all its resources; all its magazines are
1910  * drained and the structures are freed.
1911  *
1912  * WARNING!  The destructor can be called multiple times if the larger user
1913  *	     program has its own destructors which run after ours which
1914  *	     allocate or free memory.
1915  */
1916 static void
1917 mtmagazine_destructor(void *thrp)
1918 {
1919 	thr_mags *tp = thrp;
1920 	struct magazine *mp;
1921 	int i;
1922 
1923 	if (__isexiting)
1924 		return;
1925 
1926 	/*
1927 	 * Prevent further use of mtmagazines while we are destructing
1928 	 * them, as well as for any destructors which are run after us
1929 	 * prior to the thread actually being destroyed.
1930 	 */
1931 	tp->init = -1;
1932 
1933 	for (i = 0; i < NZONES; i++) {
1934 		mp = tp->mags[i].loaded;
1935 		tp->mags[i].loaded = NULL;
1936 		if (mp) {
1937 			if (MAGAZINE_NOTEMPTY(mp))
1938 				mtmagazine_drain(mp);
1939 			_slabfree(mp, 0, NULL);
1940 		}
1941 
1942 		mp = tp->mags[i].prev;
1943 		tp->mags[i].prev = NULL;
1944 		if (mp) {
1945 			if (MAGAZINE_NOTEMPTY(mp))
1946 				mtmagazine_drain(mp);
1947 			_slabfree(mp, 0, NULL);
1948 		}
1949 	}
1950 
1951 	if (tp->newmag) {
1952 		mp = tp->newmag;
1953 		tp->newmag = NULL;
1954 		_slabfree(mp, 0, NULL);
1955 	}
1956 }
1957 
1958 /*
1959  * zone_alloc()
1960  *
1961  * Attempt to allocate a zone from the zone magazine.
1962  */
1963 static slzone_t
1964 zone_alloc(int flags)
1965 {
1966 	slzone_t z;
1967 
1968 	zone_magazine_lock();
1969 
1970 	z = magazine_alloc(&zone_magazine);
1971 	if (z == NULL) {
1972 		zone_magazine_unlock();
1973 		z = _vmem_alloc(ZoneSize, ZoneSize, flags);
1974 	} else {
1975 		z->z_Flags |= SLZF_UNOTZEROD;
1976 		zone_magazine_unlock();
1977 	}
1978 	return z;
1979 }
1980 
1981 /*
1982  * Free a zone.
1983  */
1984 static void
1985 zone_free(void *z)
1986 {
1987 	void *excess[M_ZONE_HYSTERESIS];
1988 	int i;
1989 
1990 	zone_magazine_lock();
1991 
1992 	bzero(z, sizeof(struct slzone));
1993 
1994 	if (opt_madvise)
1995 		madvise(z, ZoneSize, MADV_FREE);
1996 
1997 	i = magazine_free(&zone_magazine, z);
1998 
1999 	/*
2000 	 * If we failed to free, collect excess magazines; release the zone
2001 	 * magazine lock, and then free to the system via _vmem_free. Re-enable
2002 	 * BURST mode for the magazine.
2003 	 */
2004 	if (i == -1) {
2005 		for (i = 0; i < M_ZONE_HYSTERESIS; ++i) {
2006 			excess[i] = magazine_alloc(&zone_magazine);
2007 			MASSERT_WTHUNLK(excess[i] != NULL,
2008 					zone_magazine_unlock());
2009 		}
2010 		zone_magazine_unlock();
2011 
2012 		for (i = 0; i < M_ZONE_HYSTERESIS; ++i)
2013 			_vmem_free(excess[i], ZoneSize);
2014 		_vmem_free(z, ZoneSize);
2015 	} else {
2016 		zone_magazine_unlock();
2017 	}
2018 }
2019 
2020 /*
2021  * _vmem_alloc()
2022  *
2023  *	Directly map memory in PAGE_SIZE'd chunks with the specified
2024  *	alignment.
2025  *
2026  *	Alignment must be a multiple of PAGE_SIZE.
2027  *
2028  *	Size must be >= alignment.
2029  */
2030 static void *
2031 _vmem_alloc(size_t size, size_t align, int flags)
2032 {
2033 	static char *addr_hint;
2034 	static int reset_hint = 16;
2035 	char *addr;
2036 	char *save;
2037 
2038 	if (--reset_hint <= 0) {
2039 		addr_hint = NULL;
2040 		reset_hint = 16;
2041 	}
2042 
2043 	/*
2044 	 * Map anonymous private memory.
2045 	 */
2046 	save = mmap(addr_hint, size, PROT_READ|PROT_WRITE,
2047 		    MAP_PRIVATE|MAP_ANON, -1, 0);
2048 	if (save == MAP_FAILED)
2049 		goto worst_case;
2050 	if (((uintptr_t)save & (align - 1)) == 0)
2051 		return((void *)save);
2052 
2053 	addr_hint = (char *)(((size_t)save + (align - 1)) & ~(align - 1));
2054 	munmap(save, size);
2055 
2056 	save = mmap(addr_hint, size, PROT_READ|PROT_WRITE,
2057 		    MAP_PRIVATE|MAP_ANON, -1, 0);
2058 	if (save == MAP_FAILED)
2059 		goto worst_case;
2060 	if (((size_t)save & (align - 1)) == 0)
2061 		return((void *)save);
2062 	munmap(save, size);
2063 
2064 worst_case:
2065 	save = mmap(NULL, size + align, PROT_READ|PROT_WRITE,
2066 		    MAP_PRIVATE|MAP_ANON, -1, 0);
2067 	if (save == MAP_FAILED)
2068 		return NULL;
2069 
2070 	addr = (char *)(((size_t)save + (align - 1)) & ~(align - 1));
2071 	if (save != addr)
2072 		munmap(save, addr - save);
2073 	if (addr + size != save + size + align)
2074 		munmap(addr + size, save + align - addr);
2075 
2076 	addr_hint = addr + size;
2077 
2078 	return ((void *)addr);
2079 }
2080 
2081 /*
2082  * _vmem_free()
2083  *
2084  *	Free a chunk of memory allocated with _vmem_alloc()
2085  */
2086 static void
2087 _vmem_free(void *ptr, size_t size)
2088 {
2089 	munmap(ptr, size);
2090 }
2091 
2092 /*
2093  * Panic on fatal conditions
2094  */
2095 static void
2096 _mpanic(const char *ctl, ...)
2097 {
2098 	va_list va;
2099 
2100 	if (malloc_panic == 0) {
2101 		malloc_panic = 1;
2102 		va_start(va, ctl);
2103 		vfprintf(stderr, ctl, va);
2104 		fprintf(stderr, "\n");
2105 		fflush(stderr);
2106 		va_end(va);
2107 	}
2108 	abort();
2109 }
2110 
2111 __weak_reference(__aligned_alloc, aligned_alloc);
2112 __weak_reference(__malloc, malloc);
2113 __weak_reference(__calloc, calloc);
2114 __weak_reference(__posix_memalign, posix_memalign);
2115 __weak_reference(__realloc, realloc);
2116 __weak_reference(__free, free);
2117