10Sstevel@tonic-gate /*
20Sstevel@tonic-gate  * CDDL HEADER START
30Sstevel@tonic-gate  *
40Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
53866Sraf  * Common Development and Distribution License (the "License").
63866Sraf  * You may not use this file except in compliance with the License.
70Sstevel@tonic-gate  *
80Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
100Sstevel@tonic-gate  * See the License for the specific language governing permissions
110Sstevel@tonic-gate  * and limitations under the License.
120Sstevel@tonic-gate  *
130Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
140Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
160Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
170Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
180Sstevel@tonic-gate  *
190Sstevel@tonic-gate  * CDDL HEADER END
200Sstevel@tonic-gate  */
213866Sraf 
220Sstevel@tonic-gate /*
23*6812Sraf  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
240Sstevel@tonic-gate  * Use is subject to license terms.
250Sstevel@tonic-gate  */
260Sstevel@tonic-gate 
270Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
280Sstevel@tonic-gate 
290Sstevel@tonic-gate #include <mtmalloc.h>
300Sstevel@tonic-gate #include "mtmalloc_impl.h"
310Sstevel@tonic-gate #include <unistd.h>
320Sstevel@tonic-gate #include <synch.h>
330Sstevel@tonic-gate #include <thread.h>
343866Sraf #include <pthread.h>
350Sstevel@tonic-gate #include <stdio.h>
360Sstevel@tonic-gate #include <limits.h>
370Sstevel@tonic-gate #include <errno.h>
380Sstevel@tonic-gate #include <string.h>
390Sstevel@tonic-gate #include <strings.h>
400Sstevel@tonic-gate #include <sys/param.h>
410Sstevel@tonic-gate #include <sys/sysmacros.h>
420Sstevel@tonic-gate 
430Sstevel@tonic-gate /*
440Sstevel@tonic-gate  * To turn on the asserts just compile -DDEBUG
450Sstevel@tonic-gate  */
460Sstevel@tonic-gate 
470Sstevel@tonic-gate #ifndef	DEBUG
480Sstevel@tonic-gate #define	NDEBUG
490Sstevel@tonic-gate #endif
500Sstevel@tonic-gate 
510Sstevel@tonic-gate #include <assert.h>
520Sstevel@tonic-gate 
530Sstevel@tonic-gate /*
540Sstevel@tonic-gate  * The MT hot malloc implementation contained herein is designed to be
550Sstevel@tonic-gate  * plug-compatible with the libc version of malloc. It is not intended
560Sstevel@tonic-gate  * to replace that implementation until we decide that it is ok to break
570Sstevel@tonic-gate  * customer apps (Solaris 3.0).
580Sstevel@tonic-gate  *
590Sstevel@tonic-gate  * For requests up to 2^^16, the allocator initializes itself into NCPUS
600Sstevel@tonic-gate  * worth of chains of caches. When a memory request is made, the calling thread
610Sstevel@tonic-gate  * is vectored into one of NCPUS worth of caches.  The LWP id gives us a cheap,
620Sstevel@tonic-gate  * contention-reducing index to use, eventually, this should be replaced with
630Sstevel@tonic-gate  * the actual CPU sequence number, when an interface to get it is available.
640Sstevel@tonic-gate  *
650Sstevel@tonic-gate  * Once the thread is vectored into one of the list of caches the real
660Sstevel@tonic-gate  * allocation of the memory begins. The size is determined to figure out which
670Sstevel@tonic-gate  * bucket the allocation should be satisfied from. The management of free
680Sstevel@tonic-gate  * buckets is done via a bitmask. A free bucket is represented by a 1. The
690Sstevel@tonic-gate  * first free bit represents the first free bucket. The position of the bit,
700Sstevel@tonic-gate  * represents the position of the bucket in the arena.
710Sstevel@tonic-gate  *
720Sstevel@tonic-gate  * When the memory from the arena is handed out, the address of the cache
730Sstevel@tonic-gate  * control structure is written in the word preceeding the returned memory.
740Sstevel@tonic-gate  * This cache control address is used during free() to mark the buffer free
750Sstevel@tonic-gate  * in the cache control structure.
760Sstevel@tonic-gate  *
770Sstevel@tonic-gate  * When all available memory in a cache has been depleted, a new chunk of memory
780Sstevel@tonic-gate  * is allocated via sbrk(). The new cache is allocated from this chunk of memory
790Sstevel@tonic-gate  * and initialized in the function create_cache(). New caches are installed at
800Sstevel@tonic-gate  * the front of a singly linked list of the same size memory pools. This helps
810Sstevel@tonic-gate  * to ensure that there will tend to be available memory in the beginning of the
820Sstevel@tonic-gate  * list.
830Sstevel@tonic-gate  *
840Sstevel@tonic-gate  * Long linked lists hurt performance. To decrease this effect, there is a
850Sstevel@tonic-gate  * tunable, requestsize, that bumps up the sbrk allocation size and thus
860Sstevel@tonic-gate  * increases the number of available blocks within an arena.  We also keep
870Sstevel@tonic-gate  * a "hint" for each cache list, which is the last cache in the list allocated
880Sstevel@tonic-gate  * from.  This lowers the cost of searching if there are a lot of fully
890Sstevel@tonic-gate  * allocated blocks at the front of the list.
900Sstevel@tonic-gate  *
910Sstevel@tonic-gate  * For requests greater than 2^^16 (oversize allocations), there are two pieces
920Sstevel@tonic-gate  * of overhead. There is the OVERHEAD used to hold the cache addr
930Sstevel@tonic-gate  * (&oversize_list), plus an oversize_t structure to further describe the block.
940Sstevel@tonic-gate  *
950Sstevel@tonic-gate  * The oversize list is kept as defragmented as possible by coalescing
960Sstevel@tonic-gate  * freed oversized allocations with adjacent neighbors.
970Sstevel@tonic-gate  *
980Sstevel@tonic-gate  * Addresses handed out are stored in a hash table, and are aligned on
990Sstevel@tonic-gate  * MTMALLOC_MIN_ALIGN-byte boundaries at both ends. Request sizes are rounded-up
1000Sstevel@tonic-gate  * where necessary in order to achieve this. This eases the implementation of
1010Sstevel@tonic-gate  * MTDEBUGPATTERN and MTINITPATTERN, particularly where coalescing occurs.
1020Sstevel@tonic-gate  *
1030Sstevel@tonic-gate  * A memalign allocation takes memalign header overhead.  There's two
1040Sstevel@tonic-gate  * types of memalign headers distinguished by MTMALLOC_MEMALIGN_MAGIC
1050Sstevel@tonic-gate  * and MTMALLOC_MEMALIGN_MIN_MAGIC.  When the size of memory taken to
1060Sstevel@tonic-gate  * get to the aligned address from malloc'ed address is the minimum size
1070Sstevel@tonic-gate  * OVERHEAD, we create a header taking only one OVERHEAD space with magic
1080Sstevel@tonic-gate  * number MTMALLOC_MEMALIGN_MIN_MAGIC, and we know by subtracting OVERHEAD
1090Sstevel@tonic-gate  * from memaligned address, we can get to the malloc'ed address. Otherwise,
1100Sstevel@tonic-gate  * we create a memalign header taking two OVERHEAD space, one stores
1110Sstevel@tonic-gate  * MTMALLOC_MEMALIGN_MAGIC magic number, the other one points back to the
1120Sstevel@tonic-gate  * malloc'ed address.
1130Sstevel@tonic-gate  */
1140Sstevel@tonic-gate 
1150Sstevel@tonic-gate #if defined(__i386) || defined(__amd64)
1160Sstevel@tonic-gate #include <arpa/inet.h>	/* for htonl() */
1170Sstevel@tonic-gate #endif
1180Sstevel@tonic-gate 
1190Sstevel@tonic-gate static void * morecore(size_t);
1200Sstevel@tonic-gate static void create_cache(cache_t *, size_t bufsize, uint_t hunks);
1210Sstevel@tonic-gate static void * malloc_internal(size_t, percpu_t *);
1220Sstevel@tonic-gate static void * oversize(size_t);
1230Sstevel@tonic-gate static oversize_t *find_oversize(size_t);
1240Sstevel@tonic-gate static void add_oversize(oversize_t *);
1250Sstevel@tonic-gate static void copy_pattern(uint32_t, void *, size_t);
1260Sstevel@tonic-gate static void * verify_pattern(uint32_t, void *, size_t);
1270Sstevel@tonic-gate static void reinit_cpu_list(void);
1280Sstevel@tonic-gate static void reinit_cache(cache_t *);
1290Sstevel@tonic-gate static void free_oversize(oversize_t *);
1300Sstevel@tonic-gate static oversize_t *oversize_header_alloc(uintptr_t, size_t);
1310Sstevel@tonic-gate 
1320Sstevel@tonic-gate /*
1330Sstevel@tonic-gate  * oversize hash table stuff
1340Sstevel@tonic-gate  */
1350Sstevel@tonic-gate #define	NUM_BUCKETS	67	/* must be prime */
1360Sstevel@tonic-gate #define	HASH_OVERSIZE(caddr)	((uintptr_t)(caddr) % NUM_BUCKETS)
1370Sstevel@tonic-gate oversize_t *ovsz_hashtab[NUM_BUCKETS];
1380Sstevel@tonic-gate 
1390Sstevel@tonic-gate #define	ALIGN(x, a)	((((uintptr_t)(x) + ((uintptr_t)(a) - 1)) \
1400Sstevel@tonic-gate 			& ~((uintptr_t)(a) - 1)))
1410Sstevel@tonic-gate 
1420Sstevel@tonic-gate /* need this to deal with little endianess of x86 */
1430Sstevel@tonic-gate #if defined(__i386) || defined(__amd64)
1440Sstevel@tonic-gate #define	FLIP_EM(x)	htonl((x))
1450Sstevel@tonic-gate #else
1460Sstevel@tonic-gate #define	FLIP_EM(x)	(x)
1470Sstevel@tonic-gate #endif
1480Sstevel@tonic-gate 
1490Sstevel@tonic-gate #define	INSERT_ONLY			0
1500Sstevel@tonic-gate #define	COALESCE_LEFT			0x00000001
1510Sstevel@tonic-gate #define	COALESCE_RIGHT			0x00000002
1520Sstevel@tonic-gate #define	COALESCE_WITH_BOTH_SIDES	(COALESCE_LEFT | COALESCE_RIGHT)
1530Sstevel@tonic-gate 
1540Sstevel@tonic-gate #define	OVERHEAD	8	/* size needed to write cache addr */
1550Sstevel@tonic-gate #define	HUNKSIZE	8192	/* just a multiplier */
1560Sstevel@tonic-gate 
1570Sstevel@tonic-gate #define	MAX_CACHED_SHIFT	16	/* 64K is the max cached size */
1580Sstevel@tonic-gate #define	MAX_CACHED		(1 << MAX_CACHED_SHIFT)
1590Sstevel@tonic-gate #define	MIN_CACHED_SHIFT	4	/* smaller requests rounded up */
1600Sstevel@tonic-gate #define	MTMALLOC_MIN_ALIGN	8	/* min guaranteed alignment */
1610Sstevel@tonic-gate 
1621412Srm88369 /* maximum size before overflow */
1631412Srm88369 #define	MAX_MTMALLOC	(SIZE_MAX - (SIZE_MAX % MTMALLOC_MIN_ALIGN) \
1641412Srm88369 			- OVSZ_HEADER_SIZE)
1651412Srm88369 
1660Sstevel@tonic-gate #define	NUM_CACHES	(MAX_CACHED_SHIFT - MIN_CACHED_SHIFT + 1)
1670Sstevel@tonic-gate #define	CACHELIST_SIZE	ALIGN(NUM_CACHES * sizeof (cache_head_t), \
1680Sstevel@tonic-gate     CACHE_COHERENCY_UNIT)
1690Sstevel@tonic-gate 
1700Sstevel@tonic-gate #define	MINSIZE		9	/* for requestsize, tunable */
1710Sstevel@tonic-gate #define	MAXSIZE		256	/* arbitrary, big enough, for requestsize */
1720Sstevel@tonic-gate 
1730Sstevel@tonic-gate #define	FREEPATTERN	0xdeadbeef /* debug fill pattern for free buf */
1740Sstevel@tonic-gate #define	INITPATTERN	0xbaddcafe /* debug fill pattern for new buf */
1750Sstevel@tonic-gate 
1760Sstevel@tonic-gate #define	misaligned(p)	((unsigned)(p) & (sizeof (int) - 1))
1770Sstevel@tonic-gate #define	IS_OVERSIZE(x, y)	(((x) < (y)) && (((x) > MAX_CACHED)? 1 : 0))
1780Sstevel@tonic-gate 
1790Sstevel@tonic-gate static long requestsize = MINSIZE; /* 9 pages per cache; tunable; 9 is min */
1800Sstevel@tonic-gate 
1810Sstevel@tonic-gate static uint_t cpu_mask;
1820Sstevel@tonic-gate static curcpu_func curcpu;
1830Sstevel@tonic-gate 
1840Sstevel@tonic-gate static int32_t debugopt;
1850Sstevel@tonic-gate static int32_t reinit;
1860Sstevel@tonic-gate 
1870Sstevel@tonic-gate static percpu_t *cpu_list;
1880Sstevel@tonic-gate static oversize_t oversize_list;
1893866Sraf static mutex_t oversize_lock = DEFAULTMUTEX;
1900Sstevel@tonic-gate 
1913866Sraf static int ncpus = 0;
1920Sstevel@tonic-gate 
1930Sstevel@tonic-gate #define	MTMALLOC_OVERSIZE_MAGIC		((uintptr_t)&oversize_list)
1940Sstevel@tonic-gate #define	MTMALLOC_MEMALIGN_MAGIC		((uintptr_t)&oversize_list + 1)
1950Sstevel@tonic-gate #define	MTMALLOC_MEMALIGN_MIN_MAGIC	((uintptr_t)&oversize_list + 2)
1960Sstevel@tonic-gate 
1970Sstevel@tonic-gate /*
1980Sstevel@tonic-gate  * We require allocations handed out to be aligned on MTMALLOC_MIN_ALIGN-byte
1990Sstevel@tonic-gate  * boundaries. We round up sizeof (oversize_t) (when necessary) to ensure that
2000Sstevel@tonic-gate  * this is achieved.
2010Sstevel@tonic-gate  */
2020Sstevel@tonic-gate #define	OVSZ_SIZE		(ALIGN(sizeof (oversize_t), MTMALLOC_MIN_ALIGN))
2030Sstevel@tonic-gate #define	OVSZ_HEADER_SIZE	(OVSZ_SIZE + OVERHEAD)
2040Sstevel@tonic-gate 
2050Sstevel@tonic-gate /*
2060Sstevel@tonic-gate  * memalign header takes 2 OVERHEAD space.  One for memalign magic, and the
2070Sstevel@tonic-gate  * other one points back to the start address of originally allocated space.
2080Sstevel@tonic-gate  */
2090Sstevel@tonic-gate #define	MEMALIGN_HEADER_SIZE	2 * OVERHEAD
2100Sstevel@tonic-gate #define	MEMALIGN_HEADER_ALLOC(x, shift, malloc_addr)\
2110Sstevel@tonic-gate 	if (shift == OVERHEAD)\
2120Sstevel@tonic-gate 		*((uintptr_t *)((caddr_t)x - OVERHEAD)) = \
2130Sstevel@tonic-gate 			MTMALLOC_MEMALIGN_MIN_MAGIC; \
2140Sstevel@tonic-gate 	else {\
2150Sstevel@tonic-gate 		*((uintptr_t *)((caddr_t)x - OVERHEAD)) = \
2160Sstevel@tonic-gate 			MTMALLOC_MEMALIGN_MAGIC; \
2170Sstevel@tonic-gate 		*((uintptr_t *)((caddr_t)x - 2 * OVERHEAD)) = \
2180Sstevel@tonic-gate 			(uintptr_t)malloc_addr; \
2190Sstevel@tonic-gate 	}
2200Sstevel@tonic-gate 
2210Sstevel@tonic-gate void *
2220Sstevel@tonic-gate malloc(size_t bytes)
2230Sstevel@tonic-gate {
2240Sstevel@tonic-gate 	percpu_t *list_rotor;
2250Sstevel@tonic-gate 	uint_t	list_index;
2260Sstevel@tonic-gate 
2270Sstevel@tonic-gate 	if (bytes > MAX_CACHED)
2280Sstevel@tonic-gate 		return (oversize(bytes));
2290Sstevel@tonic-gate 
2300Sstevel@tonic-gate 	list_index = (curcpu() & cpu_mask);
2310Sstevel@tonic-gate 
2320Sstevel@tonic-gate 	list_rotor = &cpu_list[list_index];
2330Sstevel@tonic-gate 
2340Sstevel@tonic-gate 	return (malloc_internal(bytes, list_rotor));
2350Sstevel@tonic-gate }
2360Sstevel@tonic-gate 
2370Sstevel@tonic-gate void *
2380Sstevel@tonic-gate realloc(void * ptr, size_t bytes)
2390Sstevel@tonic-gate {
2400Sstevel@tonic-gate 	void *new, *data_ptr;
2410Sstevel@tonic-gate 	cache_t *cacheptr;
2420Sstevel@tonic-gate 	caddr_t mem;
2430Sstevel@tonic-gate 	size_t shift = 0;
2440Sstevel@tonic-gate 
2450Sstevel@tonic-gate 	if (ptr == NULL)
2460Sstevel@tonic-gate 		return (malloc(bytes));
2470Sstevel@tonic-gate 
2480Sstevel@tonic-gate 	if (bytes == 0) {
2490Sstevel@tonic-gate 		free(ptr);
2500Sstevel@tonic-gate 		return (NULL);
2510Sstevel@tonic-gate 	}
2520Sstevel@tonic-gate 
2530Sstevel@tonic-gate 	data_ptr = ptr;
2540Sstevel@tonic-gate 	mem = (caddr_t)ptr - OVERHEAD;
2550Sstevel@tonic-gate 
2560Sstevel@tonic-gate 	new = malloc(bytes);
2570Sstevel@tonic-gate 
2580Sstevel@tonic-gate 	if (new == NULL)
2590Sstevel@tonic-gate 		return (NULL);
2600Sstevel@tonic-gate 
2610Sstevel@tonic-gate 	/*
2620Sstevel@tonic-gate 	 * If new == ptr, ptr has previously been freed. Passing a freed pointer
2630Sstevel@tonic-gate 	 * to realloc() is not allowed - unless the caller specifically states
2640Sstevel@tonic-gate 	 * otherwise, in which case we must avoid freeing ptr (ie new) before we
2650Sstevel@tonic-gate 	 * return new. There is (obviously) no requirement to memcpy() ptr to
2660Sstevel@tonic-gate 	 * new before we return.
2670Sstevel@tonic-gate 	 */
2680Sstevel@tonic-gate 	if (new == ptr) {
2690Sstevel@tonic-gate 		if (!(debugopt & MTDOUBLEFREE))
2700Sstevel@tonic-gate 			abort();
2710Sstevel@tonic-gate 		return (new);
2720Sstevel@tonic-gate 	}
2730Sstevel@tonic-gate 
2740Sstevel@tonic-gate 	if (*(uintptr_t *)mem == MTMALLOC_MEMALIGN_MAGIC) {
2750Sstevel@tonic-gate 		mem -= OVERHEAD;
2760Sstevel@tonic-gate 		ptr = (void *)*(uintptr_t *)mem;
2770Sstevel@tonic-gate 		mem = (caddr_t)ptr - OVERHEAD;
2780Sstevel@tonic-gate 		shift = (size_t)((uintptr_t)data_ptr - (uintptr_t)ptr);
2790Sstevel@tonic-gate 	} else if (*(uintptr_t *)mem == MTMALLOC_MEMALIGN_MIN_MAGIC) {
2800Sstevel@tonic-gate 		ptr = (void *) mem;
2810Sstevel@tonic-gate 		mem -= OVERHEAD;
2820Sstevel@tonic-gate 		shift = OVERHEAD;
2830Sstevel@tonic-gate 	}
2840Sstevel@tonic-gate 
2850Sstevel@tonic-gate 	if (*(uintptr_t *)mem == MTMALLOC_OVERSIZE_MAGIC) {
2860Sstevel@tonic-gate 		oversize_t *old;
2870Sstevel@tonic-gate 
2880Sstevel@tonic-gate 		old = (oversize_t *)(mem - OVSZ_SIZE);
2890Sstevel@tonic-gate 		(void) memcpy(new, data_ptr, MIN(bytes, old->size - shift));
2900Sstevel@tonic-gate 		free(ptr);
2910Sstevel@tonic-gate 		return (new);
2920Sstevel@tonic-gate 	}
2930Sstevel@tonic-gate 
2940Sstevel@tonic-gate 	cacheptr = (cache_t *)*(uintptr_t *)mem;
2950Sstevel@tonic-gate 
2960Sstevel@tonic-gate 	(void) memcpy(new, data_ptr,
2970Sstevel@tonic-gate 		MIN(cacheptr->mt_size - OVERHEAD - shift, bytes));
2980Sstevel@tonic-gate 	free(ptr);
2990Sstevel@tonic-gate 
3000Sstevel@tonic-gate 	return (new);
3010Sstevel@tonic-gate }
3020Sstevel@tonic-gate 
3030Sstevel@tonic-gate void *
3040Sstevel@tonic-gate calloc(size_t nelem, size_t bytes)
3050Sstevel@tonic-gate {
3060Sstevel@tonic-gate 	void * ptr;
3070Sstevel@tonic-gate 	size_t size = nelem * bytes;
3080Sstevel@tonic-gate 
3090Sstevel@tonic-gate 	ptr = malloc(size);
3100Sstevel@tonic-gate 	if (ptr == NULL)
3110Sstevel@tonic-gate 		return (NULL);
3123866Sraf 	(void) memset(ptr, 0, size);
3130Sstevel@tonic-gate 
3140Sstevel@tonic-gate 	return (ptr);
3150Sstevel@tonic-gate }
3160Sstevel@tonic-gate 
3170Sstevel@tonic-gate void
3180Sstevel@tonic-gate free(void * ptr)
3190Sstevel@tonic-gate {
3200Sstevel@tonic-gate 	cache_t *cacheptr;
3210Sstevel@tonic-gate 	caddr_t mem;
3220Sstevel@tonic-gate 	int32_t i;
3230Sstevel@tonic-gate 	caddr_t freeblocks;
3240Sstevel@tonic-gate 	uintptr_t offset;
3250Sstevel@tonic-gate 	uchar_t mask;
3260Sstevel@tonic-gate 	int32_t which_bit, num_bytes;
3270Sstevel@tonic-gate 
3280Sstevel@tonic-gate 	if (ptr == NULL)
3290Sstevel@tonic-gate 		return;
3300Sstevel@tonic-gate 
3310Sstevel@tonic-gate 	mem = (caddr_t)ptr - OVERHEAD;
3320Sstevel@tonic-gate 
3330Sstevel@tonic-gate 	if (*(uintptr_t *)mem == MTMALLOC_MEMALIGN_MAGIC) {
3340Sstevel@tonic-gate 		mem -= OVERHEAD;
3350Sstevel@tonic-gate 		ptr = (void *)*(uintptr_t *)mem;
3360Sstevel@tonic-gate 		mem = (caddr_t)ptr - OVERHEAD;
3370Sstevel@tonic-gate 	} else if (*(uintptr_t *)mem == MTMALLOC_MEMALIGN_MIN_MAGIC) {
3380Sstevel@tonic-gate 		ptr = (void *) mem;
3390Sstevel@tonic-gate 		mem -= OVERHEAD;
3400Sstevel@tonic-gate 	}
3410Sstevel@tonic-gate 
3420Sstevel@tonic-gate 	if (*(uintptr_t *)mem == MTMALLOC_OVERSIZE_MAGIC) {
3430Sstevel@tonic-gate 		oversize_t *big, **opp;
3440Sstevel@tonic-gate 		int bucket;
3450Sstevel@tonic-gate 
3460Sstevel@tonic-gate 		big = (oversize_t *)(mem - OVSZ_SIZE);
3470Sstevel@tonic-gate 		(void) mutex_lock(&oversize_lock);
3480Sstevel@tonic-gate 
3490Sstevel@tonic-gate 		bucket = HASH_OVERSIZE(big->addr);
3500Sstevel@tonic-gate 		for (opp = &ovsz_hashtab[bucket]; *opp != NULL;
3510Sstevel@tonic-gate 		    opp = &(*opp)->hash_next)
3520Sstevel@tonic-gate 			if (*opp == big)
3530Sstevel@tonic-gate 				break;
3540Sstevel@tonic-gate 
3550Sstevel@tonic-gate 		if (*opp == NULL) {
3560Sstevel@tonic-gate 			if (!(debugopt & MTDOUBLEFREE))
3570Sstevel@tonic-gate 				abort();
3580Sstevel@tonic-gate 			(void) mutex_unlock(&oversize_lock);
3590Sstevel@tonic-gate 			return;
3600Sstevel@tonic-gate 		}
3610Sstevel@tonic-gate 
3620Sstevel@tonic-gate 		*opp = big->hash_next;	/* remove big from the hash table */
3630Sstevel@tonic-gate 		big->hash_next = NULL;
3640Sstevel@tonic-gate 
3650Sstevel@tonic-gate 		if (debugopt & MTDEBUGPATTERN)
3660Sstevel@tonic-gate 			copy_pattern(FREEPATTERN, ptr, big->size);
3670Sstevel@tonic-gate 		add_oversize(big);
3680Sstevel@tonic-gate 		(void) mutex_unlock(&oversize_lock);
3690Sstevel@tonic-gate 		return;
3700Sstevel@tonic-gate 	}
3710Sstevel@tonic-gate 
3720Sstevel@tonic-gate 	cacheptr = (cache_t *)*(uintptr_t *)mem;
3730Sstevel@tonic-gate 	freeblocks = cacheptr->mt_freelist;
3740Sstevel@tonic-gate 
3750Sstevel@tonic-gate 	/*
3760Sstevel@tonic-gate 	 * This is the distance measured in bits into the arena.
3770Sstevel@tonic-gate 	 * The value of offset is in bytes but there is a 1-1 correlation
3780Sstevel@tonic-gate 	 * between distance into the arena and distance into the
3790Sstevel@tonic-gate 	 * freelist bitmask.
3800Sstevel@tonic-gate 	 */
3810Sstevel@tonic-gate 	offset = mem - cacheptr->mt_arena;
3820Sstevel@tonic-gate 
3830Sstevel@tonic-gate 	/*
3840Sstevel@tonic-gate 	 * i is total number of bits to offset into freelist bitmask.
3850Sstevel@tonic-gate 	 */
3860Sstevel@tonic-gate 
3870Sstevel@tonic-gate 	i = offset / cacheptr->mt_size;
3880Sstevel@tonic-gate 
3890Sstevel@tonic-gate 	num_bytes = i >> 3;
3900Sstevel@tonic-gate 
3910Sstevel@tonic-gate 	/*
3920Sstevel@tonic-gate 	 * which_bit is the bit offset into the byte in the freelist.
3930Sstevel@tonic-gate 	 * if our freelist bitmask looks like 0xf3 and we are freeing
3940Sstevel@tonic-gate 	 * block 5 (ie: the 6th block) our mask will be 0xf7 after
3950Sstevel@tonic-gate 	 * the free. Things go left to right that's why the mask is 0x80
3960Sstevel@tonic-gate 	 * and not 0x01.
3970Sstevel@tonic-gate 	 */
3980Sstevel@tonic-gate 	which_bit = i - (num_bytes << 3);
3990Sstevel@tonic-gate 
4000Sstevel@tonic-gate 	mask = 0x80 >> which_bit;
4010Sstevel@tonic-gate 
4020Sstevel@tonic-gate 	freeblocks += num_bytes;
4030Sstevel@tonic-gate 
4040Sstevel@tonic-gate 	if (debugopt & MTDEBUGPATTERN)
4050Sstevel@tonic-gate 		copy_pattern(FREEPATTERN, ptr, cacheptr->mt_size - OVERHEAD);
4060Sstevel@tonic-gate 
4070Sstevel@tonic-gate 	(void) mutex_lock(&cacheptr->mt_cache_lock);
4080Sstevel@tonic-gate 
4090Sstevel@tonic-gate 	if (*freeblocks & mask) {
4100Sstevel@tonic-gate 		if (!(debugopt & MTDOUBLEFREE))
4110Sstevel@tonic-gate 			abort();
4120Sstevel@tonic-gate 	} else {
4130Sstevel@tonic-gate 		*freeblocks |= mask;
4140Sstevel@tonic-gate 		cacheptr->mt_nfree++;
4150Sstevel@tonic-gate 	}
4160Sstevel@tonic-gate 
4170Sstevel@tonic-gate 	(void) mutex_unlock(&cacheptr->mt_cache_lock);
4180Sstevel@tonic-gate }
4190Sstevel@tonic-gate 
4200Sstevel@tonic-gate void *
4210Sstevel@tonic-gate memalign(size_t alignment, size_t size)
4220Sstevel@tonic-gate {
4230Sstevel@tonic-gate 	size_t alloc_size;
4240Sstevel@tonic-gate 	uintptr_t offset;
4250Sstevel@tonic-gate 	void *alloc_buf;
4260Sstevel@tonic-gate 	void *ret_buf;
4270Sstevel@tonic-gate 
4280Sstevel@tonic-gate 	if (size == 0 || alignment == 0 ||
4290Sstevel@tonic-gate 		misaligned(alignment) ||
4300Sstevel@tonic-gate 		(alignment & (alignment - 1)) != 0) {
4310Sstevel@tonic-gate 		errno = EINVAL;
4320Sstevel@tonic-gate 		return (NULL);
4330Sstevel@tonic-gate 	}
4340Sstevel@tonic-gate 
4350Sstevel@tonic-gate 	/* <= MTMALLOC_MIN_ALIGN, malloc can provide directly */
4360Sstevel@tonic-gate 	if (alignment <= MTMALLOC_MIN_ALIGN)
4370Sstevel@tonic-gate 		return (malloc(size));
4380Sstevel@tonic-gate 
4390Sstevel@tonic-gate 	alloc_size = size + alignment - MTMALLOC_MIN_ALIGN;
4400Sstevel@tonic-gate 
4410Sstevel@tonic-gate 	if (alloc_size < size) { /* overflow */
4420Sstevel@tonic-gate 		errno = ENOMEM;
4430Sstevel@tonic-gate 		return (NULL);
4440Sstevel@tonic-gate 	}
4450Sstevel@tonic-gate 
4460Sstevel@tonic-gate 	alloc_buf = malloc(alloc_size);
4470Sstevel@tonic-gate 
4480Sstevel@tonic-gate 	if (alloc_buf == NULL)
4490Sstevel@tonic-gate 		/* malloc sets errno */
4500Sstevel@tonic-gate 		return (NULL);
4510Sstevel@tonic-gate 
4520Sstevel@tonic-gate 	/*
4530Sstevel@tonic-gate 	 * If alloc_size > MAX_CACHED, malloc() will have returned a multiple of
4540Sstevel@tonic-gate 	 * MTMALLOC_MIN_ALIGN, having rounded-up alloc_size if necessary. Since
4550Sstevel@tonic-gate 	 * we will use alloc_size to return the excess fragments to the free
4560Sstevel@tonic-gate 	 * list, we also round-up alloc_size if necessary.
4570Sstevel@tonic-gate 	 */
4580Sstevel@tonic-gate 	if ((alloc_size > MAX_CACHED) &&
4590Sstevel@tonic-gate 	    (alloc_size & (MTMALLOC_MIN_ALIGN - 1)))
4600Sstevel@tonic-gate 		alloc_size = ALIGN(alloc_size, MTMALLOC_MIN_ALIGN);
4610Sstevel@tonic-gate 
4620Sstevel@tonic-gate 	if ((offset = (uintptr_t)alloc_buf & (alignment - 1)) == 0) {
4630Sstevel@tonic-gate 		/* aligned correctly */
4640Sstevel@tonic-gate 
4650Sstevel@tonic-gate 		size_t frag_size = alloc_size -
4660Sstevel@tonic-gate 			(size + MTMALLOC_MIN_ALIGN + OVSZ_HEADER_SIZE);
4670Sstevel@tonic-gate 
4680Sstevel@tonic-gate 		/*
4690Sstevel@tonic-gate 		 * If the leftover piece of the memory > MAX_CACHED,
4700Sstevel@tonic-gate 		 * split off the piece and return it back to the freelist.
4710Sstevel@tonic-gate 		 */
4720Sstevel@tonic-gate 		if (IS_OVERSIZE(frag_size, alloc_size)) {
4730Sstevel@tonic-gate 			oversize_t *orig, *tail;
4740Sstevel@tonic-gate 			uintptr_t taddr;
4750Sstevel@tonic-gate 			size_t data_size;
4760Sstevel@tonic-gate 			taddr = ALIGN((uintptr_t)alloc_buf + size,
4770Sstevel@tonic-gate 					MTMALLOC_MIN_ALIGN);
4780Sstevel@tonic-gate 			data_size = taddr - (uintptr_t)alloc_buf;
4790Sstevel@tonic-gate 			orig = (oversize_t *)((uintptr_t)alloc_buf -
4800Sstevel@tonic-gate 					OVSZ_HEADER_SIZE);
4810Sstevel@tonic-gate 			frag_size = orig->size - data_size -
4820Sstevel@tonic-gate 					OVSZ_HEADER_SIZE;
4830Sstevel@tonic-gate 			orig->size = data_size;
4840Sstevel@tonic-gate 			tail = oversize_header_alloc(taddr, frag_size);
4850Sstevel@tonic-gate 			free_oversize(tail);
4860Sstevel@tonic-gate 		}
4870Sstevel@tonic-gate 		ret_buf = alloc_buf;
4880Sstevel@tonic-gate 	} else {
4890Sstevel@tonic-gate 		uchar_t	oversize_bits = 0;
4900Sstevel@tonic-gate 		size_t	head_sz, data_sz, tail_sz;
4910Sstevel@tonic-gate 		uintptr_t ret_addr, taddr, shift, tshift;
4920Sstevel@tonic-gate 		oversize_t *orig, *tail;
4930Sstevel@tonic-gate 		size_t tsize;
4940Sstevel@tonic-gate 
4950Sstevel@tonic-gate 		/* needs to be aligned */
4960Sstevel@tonic-gate 		shift = alignment - offset;
4970Sstevel@tonic-gate 
4980Sstevel@tonic-gate 		assert(shift >= MTMALLOC_MIN_ALIGN);
4990Sstevel@tonic-gate 
5000Sstevel@tonic-gate 		ret_addr = ((uintptr_t)alloc_buf + shift);
5010Sstevel@tonic-gate 		ret_buf = (void *)ret_addr;
5020Sstevel@tonic-gate 
5030Sstevel@tonic-gate 		if (alloc_size <= MAX_CACHED) {
5040Sstevel@tonic-gate 			MEMALIGN_HEADER_ALLOC(ret_addr, shift, alloc_buf);
5050Sstevel@tonic-gate 			return (ret_buf);
5060Sstevel@tonic-gate 		}
5070Sstevel@tonic-gate 
5080Sstevel@tonic-gate 		/*
5090Sstevel@tonic-gate 		 * Only check for the fragments when the memory is allocted
5100Sstevel@tonic-gate 		 * from oversize_list.  Split off a fragment and return it
5110Sstevel@tonic-gate 		 * to the oversize freelist when it's > MAX_CACHED.
5120Sstevel@tonic-gate 		 */
5130Sstevel@tonic-gate 
5140Sstevel@tonic-gate 		head_sz = shift - MAX(MEMALIGN_HEADER_SIZE, OVSZ_HEADER_SIZE);
5150Sstevel@tonic-gate 
5160Sstevel@tonic-gate 		tail_sz = alloc_size -
5170Sstevel@tonic-gate 			(shift + size + MTMALLOC_MIN_ALIGN + OVSZ_HEADER_SIZE);
5180Sstevel@tonic-gate 
5190Sstevel@tonic-gate 		oversize_bits |= IS_OVERSIZE(head_sz, alloc_size) |
5200Sstevel@tonic-gate 				IS_OVERSIZE(size, alloc_size) << DATA_SHIFT |
5210Sstevel@tonic-gate 				IS_OVERSIZE(tail_sz, alloc_size) << TAIL_SHIFT;
5220Sstevel@tonic-gate 
5230Sstevel@tonic-gate 		switch (oversize_bits) {
5240Sstevel@tonic-gate 			case NONE_OVERSIZE:
5250Sstevel@tonic-gate 			case DATA_OVERSIZE:
5260Sstevel@tonic-gate 				MEMALIGN_HEADER_ALLOC(ret_addr, shift,
5270Sstevel@tonic-gate 					alloc_buf);
5280Sstevel@tonic-gate 				break;
5290Sstevel@tonic-gate 			case HEAD_OVERSIZE:
5300Sstevel@tonic-gate 				/*
5310Sstevel@tonic-gate 				 * If we can extend data > MAX_CACHED and have
5320Sstevel@tonic-gate 				 * head still > MAX_CACHED, we split head-end
5330Sstevel@tonic-gate 				 * as the case of head-end and data oversized,
5340Sstevel@tonic-gate 				 * otherwise just create memalign header.
5350Sstevel@tonic-gate 				 */
5360Sstevel@tonic-gate 				tsize = (shift + size) - (MAX_CACHED + 8 +
5370Sstevel@tonic-gate 					MTMALLOC_MIN_ALIGN + OVSZ_HEADER_SIZE);
5380Sstevel@tonic-gate 
5390Sstevel@tonic-gate 				if (!IS_OVERSIZE(tsize, alloc_size)) {
5400Sstevel@tonic-gate 					MEMALIGN_HEADER_ALLOC(ret_addr, shift,
5410Sstevel@tonic-gate 						alloc_buf);
5420Sstevel@tonic-gate 					break;
5430Sstevel@tonic-gate 				} else {
5440Sstevel@tonic-gate 					tsize += OVSZ_HEADER_SIZE;
5450Sstevel@tonic-gate 					taddr = ALIGN((uintptr_t)alloc_buf +
5460Sstevel@tonic-gate 						tsize, MTMALLOC_MIN_ALIGN);
5470Sstevel@tonic-gate 					tshift = ret_addr - taddr;
5480Sstevel@tonic-gate 					MEMALIGN_HEADER_ALLOC(ret_addr, tshift,
5490Sstevel@tonic-gate 						taddr);
5500Sstevel@tonic-gate 					ret_addr = taddr;
5510Sstevel@tonic-gate 					shift = ret_addr - (uintptr_t)alloc_buf;
5520Sstevel@tonic-gate 				}
5530Sstevel@tonic-gate 				/* FALLTHROUGH */
5540Sstevel@tonic-gate 			case HEAD_AND_DATA_OVERSIZE:
5550Sstevel@tonic-gate 				/*
5560Sstevel@tonic-gate 				 * Split off the head fragment and
5570Sstevel@tonic-gate 				 * return it back to oversize freelist.
5580Sstevel@tonic-gate 				 * Create oversize header for the piece
5590Sstevel@tonic-gate 				 * of (data + tail fragment).
5600Sstevel@tonic-gate 				 */
5610Sstevel@tonic-gate 				orig = (oversize_t *)((uintptr_t)alloc_buf -
5620Sstevel@tonic-gate 						OVSZ_HEADER_SIZE);
5630Sstevel@tonic-gate 				(void) oversize_header_alloc(ret_addr -
5640Sstevel@tonic-gate 						OVSZ_HEADER_SIZE,
5650Sstevel@tonic-gate 						(orig->size - shift));
5660Sstevel@tonic-gate 				orig->size = shift - OVSZ_HEADER_SIZE;
5670Sstevel@tonic-gate 
5680Sstevel@tonic-gate 				/* free up the head fragment */
5690Sstevel@tonic-gate 				free_oversize(orig);
5700Sstevel@tonic-gate 				break;
5710Sstevel@tonic-gate 			case TAIL_OVERSIZE:
5720Sstevel@tonic-gate 				/*
5730Sstevel@tonic-gate 				 * If we can extend data > MAX_CACHED and have
5740Sstevel@tonic-gate 				 * tail-end still > MAX_CACHED, we split tail
5750Sstevel@tonic-gate 				 * end, otherwise just create memalign header.
5760Sstevel@tonic-gate 				 */
5770Sstevel@tonic-gate 				orig = (oversize_t *)((uintptr_t)alloc_buf -
5780Sstevel@tonic-gate 						OVSZ_HEADER_SIZE);
5790Sstevel@tonic-gate 				tsize =  orig->size - (MAX_CACHED + 8 +
5800Sstevel@tonic-gate 					shift + OVSZ_HEADER_SIZE +
5810Sstevel@tonic-gate 					MTMALLOC_MIN_ALIGN);
5820Sstevel@tonic-gate 				if (!IS_OVERSIZE(tsize, alloc_size)) {
5830Sstevel@tonic-gate 					MEMALIGN_HEADER_ALLOC(ret_addr, shift,
5840Sstevel@tonic-gate 						alloc_buf);
5850Sstevel@tonic-gate 					break;
5860Sstevel@tonic-gate 				} else {
5870Sstevel@tonic-gate 					size = MAX_CACHED + 8;
5880Sstevel@tonic-gate 				}
5890Sstevel@tonic-gate 				/* FALLTHROUGH */
5900Sstevel@tonic-gate 			case DATA_AND_TAIL_OVERSIZE:
5910Sstevel@tonic-gate 				/*
5920Sstevel@tonic-gate 				 * Split off the tail fragment and
5930Sstevel@tonic-gate 				 * return it back to oversize freelist.
5940Sstevel@tonic-gate 				 * Create memalign header and adjust
5950Sstevel@tonic-gate 				 * the size for the piece of
5960Sstevel@tonic-gate 				 * (head fragment + data).
5970Sstevel@tonic-gate 				 */
5980Sstevel@tonic-gate 				taddr = ALIGN(ret_addr + size,
5990Sstevel@tonic-gate 						MTMALLOC_MIN_ALIGN);
6000Sstevel@tonic-gate 				data_sz = (size_t)(taddr -
6010Sstevel@tonic-gate 						(uintptr_t)alloc_buf);
6020Sstevel@tonic-gate 				orig = (oversize_t *)((uintptr_t)alloc_buf -
6030Sstevel@tonic-gate 						OVSZ_HEADER_SIZE);
6040Sstevel@tonic-gate 				tsize = orig->size - data_sz;
6050Sstevel@tonic-gate 				orig->size = data_sz;
6060Sstevel@tonic-gate 				MEMALIGN_HEADER_ALLOC(ret_buf, shift,
6070Sstevel@tonic-gate 					alloc_buf);
6080Sstevel@tonic-gate 				tsize -= OVSZ_HEADER_SIZE;
6090Sstevel@tonic-gate 				tail = oversize_header_alloc(taddr,  tsize);
6100Sstevel@tonic-gate 				free_oversize(tail);
6110Sstevel@tonic-gate 				break;
6120Sstevel@tonic-gate 			case HEAD_AND_TAIL_OVERSIZE:
6130Sstevel@tonic-gate 				/*
6140Sstevel@tonic-gate 				 * Split off the head fragment.
6150Sstevel@tonic-gate 				 * We try to free up tail-end when we can
6160Sstevel@tonic-gate 				 * extend data size to (MAX_CACHED + 8)
6170Sstevel@tonic-gate 				 * and remain tail-end oversized.
6180Sstevel@tonic-gate 				 * The bottom line is all split pieces
6190Sstevel@tonic-gate 				 * should be oversize in size.
6200Sstevel@tonic-gate 				 */
6210Sstevel@tonic-gate 				orig = (oversize_t *)((uintptr_t)alloc_buf -
6220Sstevel@tonic-gate 					OVSZ_HEADER_SIZE);
6230Sstevel@tonic-gate 				tsize =  orig->size - (MAX_CACHED + 8 +
6240Sstevel@tonic-gate 					OVSZ_HEADER_SIZE + shift +
6250Sstevel@tonic-gate 					MTMALLOC_MIN_ALIGN);
6260Sstevel@tonic-gate 
6270Sstevel@tonic-gate 				if (!IS_OVERSIZE(tsize, alloc_size)) {
6280Sstevel@tonic-gate 					/*
6290Sstevel@tonic-gate 					 * If the chunk is not big enough
6300Sstevel@tonic-gate 					 * to make both data and tail oversize
6310Sstevel@tonic-gate 					 * we just keep them as one piece.
6320Sstevel@tonic-gate 					 */
6330Sstevel@tonic-gate 					(void) oversize_header_alloc(ret_addr -
6340Sstevel@tonic-gate 						OVSZ_HEADER_SIZE,
6350Sstevel@tonic-gate 						orig->size - shift);
6360Sstevel@tonic-gate 					orig->size = shift -
6370Sstevel@tonic-gate 						OVSZ_HEADER_SIZE;
6380Sstevel@tonic-gate 					free_oversize(orig);
6390Sstevel@tonic-gate 					break;
6400Sstevel@tonic-gate 				} else {
6410Sstevel@tonic-gate 					/*
6420Sstevel@tonic-gate 					 * extend data size > MAX_CACHED
6430Sstevel@tonic-gate 					 * and handle it as head, data, tail
6440Sstevel@tonic-gate 					 * are all oversized.
6450Sstevel@tonic-gate 					 */
6460Sstevel@tonic-gate 					size = MAX_CACHED + 8;
6470Sstevel@tonic-gate 				}
6480Sstevel@tonic-gate 				/* FALLTHROUGH */
6490Sstevel@tonic-gate 			case ALL_OVERSIZE:
6500Sstevel@tonic-gate 				/*
6510Sstevel@tonic-gate 				 * split off the head and tail fragments,
6520Sstevel@tonic-gate 				 * return them back to the oversize freelist.
6530Sstevel@tonic-gate 				 * Alloc oversize header for data seg.
6540Sstevel@tonic-gate 				 */
6550Sstevel@tonic-gate 				orig = (oversize_t *)((uintptr_t)alloc_buf -
6560Sstevel@tonic-gate 					OVSZ_HEADER_SIZE);
6570Sstevel@tonic-gate 				tsize = orig->size;
6580Sstevel@tonic-gate 				orig->size = shift - OVSZ_HEADER_SIZE;
6590Sstevel@tonic-gate 				free_oversize(orig);
6600Sstevel@tonic-gate 
6610Sstevel@tonic-gate 				taddr = ALIGN(ret_addr + size,
6620Sstevel@tonic-gate 					MTMALLOC_MIN_ALIGN);
6630Sstevel@tonic-gate 				data_sz = taddr - ret_addr;
6640Sstevel@tonic-gate 				assert(tsize > (shift + data_sz +
6650Sstevel@tonic-gate 					OVSZ_HEADER_SIZE));
6660Sstevel@tonic-gate 				tail_sz = tsize -
6670Sstevel@tonic-gate 					(shift + data_sz + OVSZ_HEADER_SIZE);
6680Sstevel@tonic-gate 
6690Sstevel@tonic-gate 				/* create oversize header for data seg */
6700Sstevel@tonic-gate 				(void) oversize_header_alloc(ret_addr -
6710Sstevel@tonic-gate 					OVSZ_HEADER_SIZE, data_sz);
6720Sstevel@tonic-gate 
6730Sstevel@tonic-gate 				/* create oversize header for tail fragment */
6740Sstevel@tonic-gate 				tail = oversize_header_alloc(taddr, tail_sz);
6750Sstevel@tonic-gate 				free_oversize(tail);
6760Sstevel@tonic-gate 				break;
6770Sstevel@tonic-gate 			default:
6780Sstevel@tonic-gate 				/* should not reach here */
6790Sstevel@tonic-gate 				assert(0);
6800Sstevel@tonic-gate 		}
6810Sstevel@tonic-gate 	}
6820Sstevel@tonic-gate 	return (ret_buf);
6830Sstevel@tonic-gate }
6840Sstevel@tonic-gate 
6850Sstevel@tonic-gate 
6860Sstevel@tonic-gate void *
6870Sstevel@tonic-gate valloc(size_t size)
6880Sstevel@tonic-gate {
6890Sstevel@tonic-gate 	static unsigned pagesize;
6900Sstevel@tonic-gate 
6910Sstevel@tonic-gate 	if (size == 0)
6920Sstevel@tonic-gate 		return (NULL);
6930Sstevel@tonic-gate 
6940Sstevel@tonic-gate 	if (!pagesize)
6950Sstevel@tonic-gate 		pagesize = sysconf(_SC_PAGESIZE);
6960Sstevel@tonic-gate 
6970Sstevel@tonic-gate 	return (memalign(pagesize, size));
6980Sstevel@tonic-gate }
6990Sstevel@tonic-gate 
7000Sstevel@tonic-gate void
7010Sstevel@tonic-gate mallocctl(int cmd, long value)
7020Sstevel@tonic-gate {
7030Sstevel@tonic-gate 	switch (cmd) {
7040Sstevel@tonic-gate 
7050Sstevel@tonic-gate 	case MTDEBUGPATTERN:
7060Sstevel@tonic-gate 		/*
7070Sstevel@tonic-gate 		 * Reinitialize free blocks in case malloc() is called prior
7080Sstevel@tonic-gate 		 * to mallocctl().
7090Sstevel@tonic-gate 		 */
7100Sstevel@tonic-gate 		if (value && !(debugopt & cmd)) {
7110Sstevel@tonic-gate 			reinit++;
7120Sstevel@tonic-gate 			debugopt |= cmd;
7130Sstevel@tonic-gate 			reinit_cpu_list();
7140Sstevel@tonic-gate 		}
7150Sstevel@tonic-gate 		/*FALLTHRU*/
7160Sstevel@tonic-gate 	case MTDOUBLEFREE:
7170Sstevel@tonic-gate 	case MTINITBUFFER:
7180Sstevel@tonic-gate 		if (value)
7190Sstevel@tonic-gate 			debugopt |= cmd;
7200Sstevel@tonic-gate 		else
7210Sstevel@tonic-gate 			debugopt &= ~cmd;
7220Sstevel@tonic-gate 		break;
7230Sstevel@tonic-gate 	case MTCHUNKSIZE:
7240Sstevel@tonic-gate 		if (value >= MINSIZE && value <= MAXSIZE)
7250Sstevel@tonic-gate 			requestsize = value;
7260Sstevel@tonic-gate 		break;
7270Sstevel@tonic-gate 	default:
7280Sstevel@tonic-gate 		break;
7290Sstevel@tonic-gate 	}
7300Sstevel@tonic-gate }
7310Sstevel@tonic-gate 
7320Sstevel@tonic-gate /*
7333866Sraf  * Initialization function, called from the init section of the library.
7343866Sraf  * No locking is required here because we are single-threaded during
7353866Sraf  * library initialization.
7360Sstevel@tonic-gate  */
7373866Sraf static void
7380Sstevel@tonic-gate setup_caches(void)
7390Sstevel@tonic-gate {
7400Sstevel@tonic-gate 	uintptr_t oldbrk;
7410Sstevel@tonic-gate 	uintptr_t newbrk;
7420Sstevel@tonic-gate 
7430Sstevel@tonic-gate 	size_t cache_space_needed;
7440Sstevel@tonic-gate 	size_t padding;
7450Sstevel@tonic-gate 
7460Sstevel@tonic-gate 	curcpu_func new_curcpu;
7470Sstevel@tonic-gate 	uint_t new_cpu_mask;
7480Sstevel@tonic-gate 	percpu_t *new_cpu_list;
7490Sstevel@tonic-gate 
7500Sstevel@tonic-gate 	uint_t i, j;
7510Sstevel@tonic-gate 	uintptr_t list_addr;
7520Sstevel@tonic-gate 
7533866Sraf 	/*
7543866Sraf 	 * Get a decent "current cpu identifier", to be used to reduce
7553866Sraf 	 * contention.  Eventually, this should be replaced by an interface
7563866Sraf 	 * to get the actual CPU sequence number in libthread/liblwp.
7573866Sraf 	 */
7583866Sraf 	new_curcpu = (curcpu_func)thr_self;
7593866Sraf 	if ((ncpus = 2 * sysconf(_SC_NPROCESSORS_CONF)) <= 0)
7603866Sraf 		ncpus = 4; /* decent default value */
7610Sstevel@tonic-gate 
7620Sstevel@tonic-gate 	/* round ncpus up to a power of 2 */
7630Sstevel@tonic-gate 	while (ncpus & (ncpus - 1))
7640Sstevel@tonic-gate 		ncpus++;
7650Sstevel@tonic-gate 
7660Sstevel@tonic-gate 	new_cpu_mask = ncpus - 1;	/* create the cpu mask */
7670Sstevel@tonic-gate 
7680Sstevel@tonic-gate 	/*
7690Sstevel@tonic-gate 	 * We now do some magic with the brk.  What we want to get in the
7700Sstevel@tonic-gate 	 * end is a bunch of well-aligned stuff in a big initial allocation.
7710Sstevel@tonic-gate 	 * Along the way, we do sanity checks to make sure no one else has
7720Sstevel@tonic-gate 	 * touched the brk (which shouldn't happen, but it's always good to
7730Sstevel@tonic-gate 	 * check)
7740Sstevel@tonic-gate 	 *
7750Sstevel@tonic-gate 	 * First, make sure sbrk is sane, and store the current brk in oldbrk.
7760Sstevel@tonic-gate 	 */
7770Sstevel@tonic-gate 	oldbrk = (uintptr_t)sbrk(0);
7783866Sraf 	if ((void *)oldbrk == (void *)-1)
7793866Sraf 		abort();	/* sbrk is broken -- we're doomed. */
7800Sstevel@tonic-gate 
7810Sstevel@tonic-gate 	/*
7820Sstevel@tonic-gate 	 * Now, align the brk to a multiple of CACHE_COHERENCY_UNIT, so that
7830Sstevel@tonic-gate 	 * the percpu structures and cache lists will be properly aligned.
7840Sstevel@tonic-gate 	 *
7850Sstevel@tonic-gate 	 *   2.  All hunks will be page-aligned, assuming HUNKSIZE >= PAGESIZE,
7860Sstevel@tonic-gate 	 *	so they can be paged out individually.
7870Sstevel@tonic-gate 	 */
7880Sstevel@tonic-gate 	newbrk = ALIGN(oldbrk, CACHE_COHERENCY_UNIT);
7893866Sraf 	if (newbrk != oldbrk && (uintptr_t)sbrk(newbrk - oldbrk) != oldbrk)
7903866Sraf 		abort();	/* sbrk is broken -- we're doomed. */
7910Sstevel@tonic-gate 
7920Sstevel@tonic-gate 	/*
7930Sstevel@tonic-gate 	 * For each cpu, there is one percpu_t and a list of caches
7940Sstevel@tonic-gate 	 */
7950Sstevel@tonic-gate 	cache_space_needed = ncpus * (sizeof (percpu_t) + CACHELIST_SIZE);
7960Sstevel@tonic-gate 
7970Sstevel@tonic-gate 	new_cpu_list = (percpu_t *)sbrk(cache_space_needed);
7980Sstevel@tonic-gate 
7990Sstevel@tonic-gate 	if (new_cpu_list == (percpu_t *)-1 ||
8003866Sraf 	    (uintptr_t)new_cpu_list != newbrk)
8013866Sraf 		abort();	/* sbrk is broken -- we're doomed. */
8020Sstevel@tonic-gate 
8030Sstevel@tonic-gate 	/*
8040Sstevel@tonic-gate 	 * Finally, align the brk to HUNKSIZE so that all hunks are
8050Sstevel@tonic-gate 	 * page-aligned, to avoid edge-effects.
8060Sstevel@tonic-gate 	 */
8070Sstevel@tonic-gate 
8080Sstevel@tonic-gate 	newbrk = (uintptr_t)new_cpu_list + cache_space_needed;
8090Sstevel@tonic-gate 
8100Sstevel@tonic-gate 	padding = ALIGN(newbrk, HUNKSIZE) - newbrk;
8110Sstevel@tonic-gate 
8123866Sraf 	if (padding > 0 && (uintptr_t)sbrk(padding) != newbrk)
8133866Sraf 		abort();	/* sbrk is broken -- we're doomed. */
8140Sstevel@tonic-gate 
8150Sstevel@tonic-gate 	list_addr = ((uintptr_t)new_cpu_list + (sizeof (percpu_t) * ncpus));
8160Sstevel@tonic-gate 
8170Sstevel@tonic-gate 	/* initialize the percpu list */
8180Sstevel@tonic-gate 	for (i = 0; i < ncpus; i++) {
8190Sstevel@tonic-gate 		new_cpu_list[i].mt_caches = (cache_head_t *)list_addr;
8200Sstevel@tonic-gate 		for (j = 0; j < NUM_CACHES; j++) {
8210Sstevel@tonic-gate 			new_cpu_list[i].mt_caches[j].mt_cache = NULL;
8220Sstevel@tonic-gate 			new_cpu_list[i].mt_caches[j].mt_hint = NULL;
8230Sstevel@tonic-gate 		}
8240Sstevel@tonic-gate 
8253866Sraf 		(void) mutex_init(&new_cpu_list[i].mt_parent_lock,
8263866Sraf 		    USYNC_THREAD, NULL);
8270Sstevel@tonic-gate 
8280Sstevel@tonic-gate 		/* get the correct cache list alignment */
8290Sstevel@tonic-gate 		list_addr += CACHELIST_SIZE;
8300Sstevel@tonic-gate 	}
8310Sstevel@tonic-gate 
8320Sstevel@tonic-gate 	/*
8330Sstevel@tonic-gate 	 * Initialize oversize listhead
8340Sstevel@tonic-gate 	 */
8350Sstevel@tonic-gate 	oversize_list.next_bysize = &oversize_list;
8360Sstevel@tonic-gate 	oversize_list.prev_bysize = &oversize_list;
8370Sstevel@tonic-gate 	oversize_list.next_byaddr = &oversize_list;
8380Sstevel@tonic-gate 	oversize_list.prev_byaddr = &oversize_list;
8390Sstevel@tonic-gate 	oversize_list.addr = NULL;
8400Sstevel@tonic-gate 	oversize_list.size = 0;		/* sentinal */
8410Sstevel@tonic-gate 
8420Sstevel@tonic-gate 	/*
8433866Sraf 	 * Now install the global variables.
8440Sstevel@tonic-gate 	 */
8450Sstevel@tonic-gate 	curcpu = new_curcpu;
8460Sstevel@tonic-gate 	cpu_mask = new_cpu_mask;
8470Sstevel@tonic-gate 	cpu_list = new_cpu_list;
8480Sstevel@tonic-gate }
8490Sstevel@tonic-gate 
8500Sstevel@tonic-gate static void
8510Sstevel@tonic-gate create_cache(cache_t *cp, size_t size, uint_t chunksize)
8520Sstevel@tonic-gate {
8530Sstevel@tonic-gate 	long nblocks;
8540Sstevel@tonic-gate 
8553866Sraf 	(void) mutex_init(&cp->mt_cache_lock, USYNC_THREAD, NULL);
8560Sstevel@tonic-gate 	cp->mt_size = size;
8570Sstevel@tonic-gate 	cp->mt_freelist = ((caddr_t)cp + sizeof (cache_t));
8580Sstevel@tonic-gate 	cp->mt_span = chunksize * HUNKSIZE - sizeof (cache_t);
8590Sstevel@tonic-gate 	cp->mt_hunks = chunksize;
8600Sstevel@tonic-gate 	/*
8610Sstevel@tonic-gate 	 * rough calculation. We will need to adjust later.
8620Sstevel@tonic-gate 	 */
8630Sstevel@tonic-gate 	nblocks = cp->mt_span / cp->mt_size;
8640Sstevel@tonic-gate 	nblocks >>= 3;
8650Sstevel@tonic-gate 	if (nblocks == 0) { /* less than 8 free blocks in this pool */
8660Sstevel@tonic-gate 		int32_t numblocks = 0;
8670Sstevel@tonic-gate 		long i = cp->mt_span;
8680Sstevel@tonic-gate 		size_t sub = cp->mt_size;
8690Sstevel@tonic-gate 		uchar_t mask = 0;
8700Sstevel@tonic-gate 
8710Sstevel@tonic-gate 		while (i > sub) {
8720Sstevel@tonic-gate 			numblocks++;
8730Sstevel@tonic-gate 			i -= sub;
8740Sstevel@tonic-gate 		}
8750Sstevel@tonic-gate 		nblocks = numblocks;
8760Sstevel@tonic-gate 		cp->mt_arena = (caddr_t)ALIGN(cp->mt_freelist + 8, 8);
8770Sstevel@tonic-gate 		cp->mt_nfree = numblocks;
8780Sstevel@tonic-gate 		while (numblocks--) {
8790Sstevel@tonic-gate 			mask |= 0x80 >> numblocks;
8800Sstevel@tonic-gate 		}
8810Sstevel@tonic-gate 		*(cp->mt_freelist) = mask;
8820Sstevel@tonic-gate 	} else {
8830Sstevel@tonic-gate 		cp->mt_arena = (caddr_t)ALIGN((caddr_t)cp->mt_freelist +
8840Sstevel@tonic-gate 			nblocks, 32);
8850Sstevel@tonic-gate 		/* recompute nblocks */
8860Sstevel@tonic-gate 		nblocks = (uintptr_t)((caddr_t)cp->mt_freelist +
8870Sstevel@tonic-gate 			cp->mt_span - cp->mt_arena) / cp->mt_size;
8880Sstevel@tonic-gate 		cp->mt_nfree = ((nblocks >> 3) << 3);
8890Sstevel@tonic-gate 		/* Set everything to free */
8900Sstevel@tonic-gate 		(void) memset(cp->mt_freelist, 0xff, nblocks >> 3);
8910Sstevel@tonic-gate 	}
8920Sstevel@tonic-gate 
8930Sstevel@tonic-gate 	if (debugopt & MTDEBUGPATTERN)
8940Sstevel@tonic-gate 		copy_pattern(FREEPATTERN, cp->mt_arena, cp->mt_size * nblocks);
8950Sstevel@tonic-gate 
8960Sstevel@tonic-gate 	cp->mt_next = NULL;
8970Sstevel@tonic-gate }
8980Sstevel@tonic-gate 
8990Sstevel@tonic-gate static void
9000Sstevel@tonic-gate reinit_cpu_list(void)
9010Sstevel@tonic-gate {
9020Sstevel@tonic-gate 	oversize_t *wp = oversize_list.next_bysize;
9030Sstevel@tonic-gate 	percpu_t *cpuptr;
9040Sstevel@tonic-gate 	cache_t *thiscache;
9050Sstevel@tonic-gate 	cache_head_t *cachehead;
9060Sstevel@tonic-gate 
9070Sstevel@tonic-gate 	/* Reinitialize free oversize blocks. */
9080Sstevel@tonic-gate 	(void) mutex_lock(&oversize_lock);
9090Sstevel@tonic-gate 	if (debugopt & MTDEBUGPATTERN)
9100Sstevel@tonic-gate 		for (; wp != &oversize_list; wp = wp->next_bysize)
9110Sstevel@tonic-gate 			copy_pattern(FREEPATTERN, wp->addr, wp->size);
9120Sstevel@tonic-gate 	(void) mutex_unlock(&oversize_lock);
9130Sstevel@tonic-gate 
9140Sstevel@tonic-gate 	/* Reinitialize free blocks. */
9150Sstevel@tonic-gate 	for (cpuptr = &cpu_list[0]; cpuptr < &cpu_list[ncpus]; cpuptr++) {
9160Sstevel@tonic-gate 		(void) mutex_lock(&cpuptr->mt_parent_lock);
9170Sstevel@tonic-gate 		for (cachehead = &cpuptr->mt_caches[0]; cachehead <
9180Sstevel@tonic-gate 			&cpuptr->mt_caches[NUM_CACHES]; cachehead++) {
9190Sstevel@tonic-gate 			for (thiscache = cachehead->mt_cache; thiscache != NULL;
9200Sstevel@tonic-gate 				thiscache = thiscache->mt_next) {
9210Sstevel@tonic-gate 				(void) mutex_lock(&thiscache->mt_cache_lock);
9220Sstevel@tonic-gate 				if (thiscache->mt_nfree == 0) {
9230Sstevel@tonic-gate 					(void) mutex_unlock(
9240Sstevel@tonic-gate 					    &thiscache->mt_cache_lock);
9250Sstevel@tonic-gate 					continue;
9260Sstevel@tonic-gate 				}
9270Sstevel@tonic-gate 				if (thiscache != NULL)
9280Sstevel@tonic-gate 					reinit_cache(thiscache);
9290Sstevel@tonic-gate 				(void) mutex_unlock(&thiscache->mt_cache_lock);
9300Sstevel@tonic-gate 			}
9310Sstevel@tonic-gate 		}
9320Sstevel@tonic-gate 		(void) mutex_unlock(&cpuptr->mt_parent_lock);
9330Sstevel@tonic-gate 	}
9340Sstevel@tonic-gate 	reinit = 0;
9350Sstevel@tonic-gate }
9360Sstevel@tonic-gate 
9370Sstevel@tonic-gate static void
9380Sstevel@tonic-gate reinit_cache(cache_t *thiscache)
9390Sstevel@tonic-gate {
9400Sstevel@tonic-gate 	uint32_t *freeblocks; /* not a uintptr_t on purpose */
9410Sstevel@tonic-gate 	int32_t i, n;
9420Sstevel@tonic-gate 	caddr_t ret;
9430Sstevel@tonic-gate 
9440Sstevel@tonic-gate 	freeblocks = (uint32_t *)thiscache->mt_freelist;
9450Sstevel@tonic-gate 	while (freeblocks < (uint32_t *)thiscache->mt_arena) {
9460Sstevel@tonic-gate 		if (*freeblocks & 0xffffffff) {
9470Sstevel@tonic-gate 		    for (i = 0; i < 32; i++) {
9480Sstevel@tonic-gate 			if (FLIP_EM(*freeblocks) & (0x80000000 >> i)) {
9490Sstevel@tonic-gate 				n = (uintptr_t)(((freeblocks -
9500Sstevel@tonic-gate 				    (uint32_t *)thiscache->mt_freelist) << 5)
9510Sstevel@tonic-gate 				    + i) * thiscache->mt_size;
9520Sstevel@tonic-gate 				ret = thiscache->mt_arena + n;
9530Sstevel@tonic-gate 				ret += OVERHEAD;
9540Sstevel@tonic-gate 				copy_pattern(FREEPATTERN, ret,
9550Sstevel@tonic-gate 				    thiscache->mt_size);
9560Sstevel@tonic-gate 			}
9570Sstevel@tonic-gate 		    }
9580Sstevel@tonic-gate 		}
9590Sstevel@tonic-gate 		freeblocks++;
9600Sstevel@tonic-gate 	}
9610Sstevel@tonic-gate }
9620Sstevel@tonic-gate 
9630Sstevel@tonic-gate static void *
9640Sstevel@tonic-gate malloc_internal(size_t size, percpu_t *cpuptr)
9650Sstevel@tonic-gate {
9660Sstevel@tonic-gate 	cache_head_t *cachehead;
9670Sstevel@tonic-gate 	cache_t *thiscache, *hintcache;
9680Sstevel@tonic-gate 	int32_t i, n, logsz, bucket;
9690Sstevel@tonic-gate 	uint32_t index;
9700Sstevel@tonic-gate 	uint32_t *freeblocks; /* not a uintptr_t on purpose */
9710Sstevel@tonic-gate 	caddr_t ret;
9720Sstevel@tonic-gate 
9730Sstevel@tonic-gate 	logsz = MIN_CACHED_SHIFT;
9740Sstevel@tonic-gate 
9750Sstevel@tonic-gate 	while (size > (1 << logsz))
9760Sstevel@tonic-gate 		logsz++;
9770Sstevel@tonic-gate 
9780Sstevel@tonic-gate 	bucket = logsz - MIN_CACHED_SHIFT;
9790Sstevel@tonic-gate 
9800Sstevel@tonic-gate 	(void) mutex_lock(&cpuptr->mt_parent_lock);
9810Sstevel@tonic-gate 
9820Sstevel@tonic-gate 	/*
9830Sstevel@tonic-gate 	 * Find a cache of the appropriate size with free buffers.
9840Sstevel@tonic-gate 	 *
9850Sstevel@tonic-gate 	 * We don't need to lock each cache as we check their mt_nfree count,
9860Sstevel@tonic-gate 	 * since:
9870Sstevel@tonic-gate 	 *	1.  We are only looking for caches with mt_nfree > 0.  If a
9880Sstevel@tonic-gate 	 *	   free happens during our search, it will increment mt_nfree,
9890Sstevel@tonic-gate 	 *	   which will not effect the test.
9900Sstevel@tonic-gate 	 *	2.  Allocations can decrement mt_nfree, but they can't happen
9910Sstevel@tonic-gate 	 *	   as long as we hold mt_parent_lock.
9920Sstevel@tonic-gate 	 */
9930Sstevel@tonic-gate 
9940Sstevel@tonic-gate 	cachehead = &cpuptr->mt_caches[bucket];
9950Sstevel@tonic-gate 
9960Sstevel@tonic-gate 	/* Search through the list, starting at the mt_hint */
9970Sstevel@tonic-gate 	thiscache = cachehead->mt_hint;
9980Sstevel@tonic-gate 
9990Sstevel@tonic-gate 	while (thiscache != NULL && thiscache->mt_nfree == 0)
10000Sstevel@tonic-gate 		thiscache = thiscache->mt_next;
10010Sstevel@tonic-gate 
10020Sstevel@tonic-gate 	if (thiscache == NULL) {
10030Sstevel@tonic-gate 		/* wrap around -- search up to the hint */
10040Sstevel@tonic-gate 		thiscache = cachehead->mt_cache;
10050Sstevel@tonic-gate 		hintcache = cachehead->mt_hint;
10060Sstevel@tonic-gate 
10070Sstevel@tonic-gate 		while (thiscache != NULL && thiscache != hintcache &&
10080Sstevel@tonic-gate 		    thiscache->mt_nfree == 0)
10090Sstevel@tonic-gate 			thiscache = thiscache->mt_next;
10100Sstevel@tonic-gate 
10110Sstevel@tonic-gate 		if (thiscache == hintcache)
10120Sstevel@tonic-gate 			thiscache = NULL;
10130Sstevel@tonic-gate 	}
10140Sstevel@tonic-gate 
10150Sstevel@tonic-gate 
10160Sstevel@tonic-gate 	if (thiscache == NULL) { /* there are no free caches */
10170Sstevel@tonic-gate 		int32_t thisrequest = requestsize;
10180Sstevel@tonic-gate 		int32_t buffer_size = (1 << logsz) + OVERHEAD;
10190Sstevel@tonic-gate 
10200Sstevel@tonic-gate 		thiscache = (cache_t *)morecore(thisrequest * HUNKSIZE);
10210Sstevel@tonic-gate 
10220Sstevel@tonic-gate 		if (thiscache == (cache_t *)-1) {
10230Sstevel@tonic-gate 		    (void) mutex_unlock(&cpuptr->mt_parent_lock);
10240Sstevel@tonic-gate 		    errno = EAGAIN;
10250Sstevel@tonic-gate 		    return (NULL);
10260Sstevel@tonic-gate 		}
10270Sstevel@tonic-gate 		create_cache(thiscache, buffer_size, thisrequest);
10280Sstevel@tonic-gate 
10290Sstevel@tonic-gate 		/* link in the new block at the beginning of the list */
10300Sstevel@tonic-gate 		thiscache->mt_next = cachehead->mt_cache;
10310Sstevel@tonic-gate 		cachehead->mt_cache = thiscache;
10320Sstevel@tonic-gate 	}
10330Sstevel@tonic-gate 
10340Sstevel@tonic-gate 	/* update the hint to the cache we found or created */
10350Sstevel@tonic-gate 	cachehead->mt_hint = thiscache;
10360Sstevel@tonic-gate 
10370Sstevel@tonic-gate 	/* thiscache now points to a cache with available space */
10380Sstevel@tonic-gate 	(void) mutex_lock(&thiscache->mt_cache_lock);
10390Sstevel@tonic-gate 
10400Sstevel@tonic-gate 	freeblocks = (uint32_t *)thiscache->mt_freelist;
10410Sstevel@tonic-gate 	while (freeblocks < (uint32_t *)thiscache->mt_arena) {
10420Sstevel@tonic-gate 		if (*freeblocks & 0xffffffff)
10430Sstevel@tonic-gate 			break;
10440Sstevel@tonic-gate 		freeblocks++;
10450Sstevel@tonic-gate 		if (freeblocks < (uint32_t *)thiscache->mt_arena &&
10460Sstevel@tonic-gate 		    *freeblocks & 0xffffffff)
10470Sstevel@tonic-gate 			break;
10480Sstevel@tonic-gate 		freeblocks++;
10490Sstevel@tonic-gate 		if (freeblocks < (uint32_t *)thiscache->mt_arena &&
10500Sstevel@tonic-gate 		    *freeblocks & 0xffffffff)
10510Sstevel@tonic-gate 			break;
10520Sstevel@tonic-gate 		freeblocks++;
10530Sstevel@tonic-gate 		if (freeblocks < (uint32_t *)thiscache->mt_arena &&
10540Sstevel@tonic-gate 		    *freeblocks & 0xffffffff)
10550Sstevel@tonic-gate 			break;
10560Sstevel@tonic-gate 		freeblocks++;
10570Sstevel@tonic-gate 	}
10580Sstevel@tonic-gate 
10590Sstevel@tonic-gate 	/*
10600Sstevel@tonic-gate 	 * the offset from mt_freelist to freeblocks is the offset into
10610Sstevel@tonic-gate 	 * the arena. Be sure to include the offset into freeblocks
10620Sstevel@tonic-gate 	 * of the bitmask. n is the offset.
10630Sstevel@tonic-gate 	 */
10640Sstevel@tonic-gate 	for (i = 0; i < 32; ) {
10650Sstevel@tonic-gate 		if (FLIP_EM(*freeblocks) & (0x80000000 >> i++))
10660Sstevel@tonic-gate 			break;
10670Sstevel@tonic-gate 		if (FLIP_EM(*freeblocks) & (0x80000000 >> i++))
10680Sstevel@tonic-gate 			break;
10690Sstevel@tonic-gate 		if (FLIP_EM(*freeblocks) & (0x80000000 >> i++))
10700Sstevel@tonic-gate 			break;
10710Sstevel@tonic-gate 		if (FLIP_EM(*freeblocks) & (0x80000000 >> i++))
10720Sstevel@tonic-gate 			break;
10730Sstevel@tonic-gate 	}
10740Sstevel@tonic-gate 	index = 0x80000000 >> --i;
10750Sstevel@tonic-gate 
10760Sstevel@tonic-gate 
10770Sstevel@tonic-gate 	*freeblocks &= FLIP_EM(~index);
10780Sstevel@tonic-gate 
10790Sstevel@tonic-gate 	thiscache->mt_nfree--;
10800Sstevel@tonic-gate 
10810Sstevel@tonic-gate 	(void) mutex_unlock(&thiscache->mt_cache_lock);
10820Sstevel@tonic-gate 	(void) mutex_unlock(&cpuptr->mt_parent_lock);
10830Sstevel@tonic-gate 
10840Sstevel@tonic-gate 	n = (uintptr_t)(((freeblocks - (uint32_t *)thiscache->mt_freelist) << 5)
10850Sstevel@tonic-gate 		+ i) * thiscache->mt_size;
10860Sstevel@tonic-gate 	/*
10870Sstevel@tonic-gate 	 * Now you have the offset in n, you've changed the free mask
10880Sstevel@tonic-gate 	 * in the freelist. Nothing left to do but find the block
10890Sstevel@tonic-gate 	 * in the arena and put the value of thiscache in the word
10900Sstevel@tonic-gate 	 * ahead of the handed out address and return the memory
10910Sstevel@tonic-gate 	 * back to the user.
10920Sstevel@tonic-gate 	 */
10930Sstevel@tonic-gate 	ret = thiscache->mt_arena + n;
10940Sstevel@tonic-gate 
10950Sstevel@tonic-gate 	/* Store the cache addr for this buf. Makes free go fast. */
10960Sstevel@tonic-gate 	*(uintptr_t *)ret = (uintptr_t)thiscache;
10970Sstevel@tonic-gate 
10980Sstevel@tonic-gate 	/*
10990Sstevel@tonic-gate 	 * This assert makes sure we don't hand out memory that is not
11000Sstevel@tonic-gate 	 * owned by this cache.
11010Sstevel@tonic-gate 	 */
11020Sstevel@tonic-gate 	assert(ret + thiscache->mt_size <= thiscache->mt_freelist +
11030Sstevel@tonic-gate 		thiscache->mt_span);
11040Sstevel@tonic-gate 
11050Sstevel@tonic-gate 	ret += OVERHEAD;
11060Sstevel@tonic-gate 
11070Sstevel@tonic-gate 	assert(((uintptr_t)ret & 7) == 0); /* are we 8 byte aligned */
11080Sstevel@tonic-gate 
11090Sstevel@tonic-gate 	if (reinit == 0 && (debugopt & MTDEBUGPATTERN))
11100Sstevel@tonic-gate 		if (verify_pattern(FREEPATTERN, ret, size))
11110Sstevel@tonic-gate 			abort();	/* reference after free */
11120Sstevel@tonic-gate 
11130Sstevel@tonic-gate 	if (debugopt & MTINITBUFFER)
11140Sstevel@tonic-gate 		copy_pattern(INITPATTERN, ret, size);
11150Sstevel@tonic-gate 	return ((void *)ret);
11160Sstevel@tonic-gate }
11170Sstevel@tonic-gate 
11180Sstevel@tonic-gate static void *
11190Sstevel@tonic-gate morecore(size_t bytes)
11200Sstevel@tonic-gate {
11210Sstevel@tonic-gate 	void * ret;
11220Sstevel@tonic-gate 
11230Sstevel@tonic-gate 	if (bytes > LONG_MAX) {
11240Sstevel@tonic-gate 		intptr_t wad;
11250Sstevel@tonic-gate 		/*
11260Sstevel@tonic-gate 		 * The request size is too big. We need to do this in
11270Sstevel@tonic-gate 		 * chunks. Sbrk only takes an int for an arg.
11280Sstevel@tonic-gate 		 */
11290Sstevel@tonic-gate 		if (bytes == ULONG_MAX)
11300Sstevel@tonic-gate 			return ((void *)-1);
11310Sstevel@tonic-gate 
11320Sstevel@tonic-gate 		ret = sbrk(0);
11330Sstevel@tonic-gate 		wad = LONG_MAX;
11340Sstevel@tonic-gate 		while (wad > 0) {
11350Sstevel@tonic-gate 			if (sbrk(wad) == (void *)-1) {
11360Sstevel@tonic-gate 				if (ret != sbrk(0))
11370Sstevel@tonic-gate 					(void) sbrk(-LONG_MAX);
11380Sstevel@tonic-gate 				return ((void *)-1);
11390Sstevel@tonic-gate 			}
11400Sstevel@tonic-gate 			bytes -= LONG_MAX;
11410Sstevel@tonic-gate 			wad = bytes;
11420Sstevel@tonic-gate 		}
11430Sstevel@tonic-gate 	} else
11440Sstevel@tonic-gate 		ret = sbrk(bytes);
11450Sstevel@tonic-gate 
11460Sstevel@tonic-gate 	return (ret);
11470Sstevel@tonic-gate }
11480Sstevel@tonic-gate 
11490Sstevel@tonic-gate 
11500Sstevel@tonic-gate static void *
11510Sstevel@tonic-gate oversize(size_t size)
11520Sstevel@tonic-gate {
11530Sstevel@tonic-gate 	caddr_t ret;
11540Sstevel@tonic-gate 	oversize_t *big;
11550Sstevel@tonic-gate 	int bucket;
11560Sstevel@tonic-gate 
11571412Srm88369 	/* make sure we will not overflow */
11581412Srm88369 	if (size > MAX_MTMALLOC) {
11591412Srm88369 		errno = ENOMEM;
11601412Srm88369 		return (NULL);
11611412Srm88369 	}
11620Sstevel@tonic-gate 
11630Sstevel@tonic-gate 	/*
11640Sstevel@tonic-gate 	 * Since we ensure every address we hand back is
11650Sstevel@tonic-gate 	 * MTMALLOC_MIN_ALIGN-byte aligned, ALIGNing size ensures that the
11660Sstevel@tonic-gate 	 * memory handed out is MTMALLOC_MIN_ALIGN-byte aligned at both ends.
11670Sstevel@tonic-gate 	 * This eases the implementation of MTDEBUGPATTERN and MTINITPATTERN,
11680Sstevel@tonic-gate 	 * particularly where coalescing occurs.
11690Sstevel@tonic-gate 	 */
11700Sstevel@tonic-gate 	size = ALIGN(size, MTMALLOC_MIN_ALIGN);
11710Sstevel@tonic-gate 
11721412Srm88369 	/*
11731412Srm88369 	 * The idea with the global lock is that we are sure to
11741412Srm88369 	 * block in the kernel anyway since given an oversize alloc
11751412Srm88369 	 * we are sure to have to call morecore();
11761412Srm88369 	 */
11771412Srm88369 	(void) mutex_lock(&oversize_lock);
11781412Srm88369 
11790Sstevel@tonic-gate 	if ((big = find_oversize(size)) != NULL) {
11800Sstevel@tonic-gate 		if (reinit == 0 && (debugopt & MTDEBUGPATTERN))
11810Sstevel@tonic-gate 			if (verify_pattern(FREEPATTERN, big->addr, size))
11820Sstevel@tonic-gate 				abort();	/* reference after free */
11830Sstevel@tonic-gate 	} else {
11840Sstevel@tonic-gate 		/* Get more 8-byte aligned memory from heap */
11850Sstevel@tonic-gate 		ret = morecore(size + OVSZ_HEADER_SIZE);
11860Sstevel@tonic-gate 		if (ret == (caddr_t)-1) {
11870Sstevel@tonic-gate 			(void) mutex_unlock(&oversize_lock);
11880Sstevel@tonic-gate 			errno = ENOMEM;
11890Sstevel@tonic-gate 			return (NULL);
11900Sstevel@tonic-gate 		}
11910Sstevel@tonic-gate 		big = oversize_header_alloc((uintptr_t)ret, size);
11920Sstevel@tonic-gate 	}
11930Sstevel@tonic-gate 	ret = big->addr;
11940Sstevel@tonic-gate 
11950Sstevel@tonic-gate 	/* Add big to the hash table at the head of the relevant bucket. */
11960Sstevel@tonic-gate 	bucket = HASH_OVERSIZE(ret);
11970Sstevel@tonic-gate 	big->hash_next = ovsz_hashtab[bucket];
11980Sstevel@tonic-gate 	ovsz_hashtab[bucket] = big;
11990Sstevel@tonic-gate 
12000Sstevel@tonic-gate 	if (debugopt & MTINITBUFFER)
12010Sstevel@tonic-gate 		copy_pattern(INITPATTERN, ret, size);
12020Sstevel@tonic-gate 
12030Sstevel@tonic-gate 	(void) mutex_unlock(&oversize_lock);
12040Sstevel@tonic-gate 	assert(((uintptr_t)ret & 7) == 0); /* are we 8 byte aligned */
12050Sstevel@tonic-gate 	return ((void *)ret);
12060Sstevel@tonic-gate }
12070Sstevel@tonic-gate 
12080Sstevel@tonic-gate static void
12090Sstevel@tonic-gate insert_oversize(oversize_t *op, oversize_t *nx)
12100Sstevel@tonic-gate {
12110Sstevel@tonic-gate 	oversize_t *sp;
12120Sstevel@tonic-gate 
12130Sstevel@tonic-gate 	/* locate correct insertion point in size-ordered list */
12140Sstevel@tonic-gate 	for (sp = oversize_list.next_bysize;
12150Sstevel@tonic-gate 	    sp != &oversize_list && (op->size > sp->size);
12160Sstevel@tonic-gate 	    sp = sp->next_bysize)
12170Sstevel@tonic-gate 		;
12180Sstevel@tonic-gate 
12190Sstevel@tonic-gate 	/* link into size-ordered list */
12200Sstevel@tonic-gate 	op->next_bysize = sp;
12210Sstevel@tonic-gate 	op->prev_bysize = sp->prev_bysize;
12220Sstevel@tonic-gate 	op->prev_bysize->next_bysize = op;
12230Sstevel@tonic-gate 	op->next_bysize->prev_bysize = op;
12240Sstevel@tonic-gate 
12250Sstevel@tonic-gate 	/*
12260Sstevel@tonic-gate 	 * link item into address-ordered list
12270Sstevel@tonic-gate 	 * (caller provides insertion point as an optimization)
12280Sstevel@tonic-gate 	 */
12290Sstevel@tonic-gate 	op->next_byaddr = nx;
12300Sstevel@tonic-gate 	op->prev_byaddr = nx->prev_byaddr;
12310Sstevel@tonic-gate 	op->prev_byaddr->next_byaddr = op;
12320Sstevel@tonic-gate 	op->next_byaddr->prev_byaddr = op;
12330Sstevel@tonic-gate 
12340Sstevel@tonic-gate }
12350Sstevel@tonic-gate 
12360Sstevel@tonic-gate static void
12370Sstevel@tonic-gate unlink_oversize(oversize_t *lp)
12380Sstevel@tonic-gate {
12390Sstevel@tonic-gate 	/* unlink from address list */
12400Sstevel@tonic-gate 	lp->prev_byaddr->next_byaddr = lp->next_byaddr;
12410Sstevel@tonic-gate 	lp->next_byaddr->prev_byaddr = lp->prev_byaddr;
12420Sstevel@tonic-gate 
12430Sstevel@tonic-gate 	/* unlink from size list */
12440Sstevel@tonic-gate 	lp->prev_bysize->next_bysize = lp->next_bysize;
12450Sstevel@tonic-gate 	lp->next_bysize->prev_bysize = lp->prev_bysize;
12460Sstevel@tonic-gate }
12470Sstevel@tonic-gate 
12480Sstevel@tonic-gate static void
12490Sstevel@tonic-gate position_oversize_by_size(oversize_t *op)
12500Sstevel@tonic-gate {
12510Sstevel@tonic-gate 	oversize_t *sp;
12520Sstevel@tonic-gate 
12530Sstevel@tonic-gate 	if (op->size > op->next_bysize->size ||
12540Sstevel@tonic-gate 	    op->size < op->prev_bysize->size) {
12550Sstevel@tonic-gate 
12560Sstevel@tonic-gate 		/* unlink from size list */
12570Sstevel@tonic-gate 		op->prev_bysize->next_bysize = op->next_bysize;
12580Sstevel@tonic-gate 		op->next_bysize->prev_bysize = op->prev_bysize;
12590Sstevel@tonic-gate 
12600Sstevel@tonic-gate 		/* locate correct insertion point in size-ordered list */
12610Sstevel@tonic-gate 		for (sp = oversize_list.next_bysize;
12620Sstevel@tonic-gate 		    sp != &oversize_list && (op->size > sp->size);
12630Sstevel@tonic-gate 		    sp = sp->next_bysize)
12640Sstevel@tonic-gate 			;
12650Sstevel@tonic-gate 
12660Sstevel@tonic-gate 		/* link into size-ordered list */
12670Sstevel@tonic-gate 		op->next_bysize = sp;
12680Sstevel@tonic-gate 		op->prev_bysize = sp->prev_bysize;
12690Sstevel@tonic-gate 		op->prev_bysize->next_bysize = op;
12700Sstevel@tonic-gate 		op->next_bysize->prev_bysize = op;
12710Sstevel@tonic-gate 	}
12720Sstevel@tonic-gate }
12730Sstevel@tonic-gate 
12740Sstevel@tonic-gate static void
12750Sstevel@tonic-gate add_oversize(oversize_t *lp)
12760Sstevel@tonic-gate {
12770Sstevel@tonic-gate 	int merge_flags = INSERT_ONLY;
12780Sstevel@tonic-gate 	oversize_t *nx;  	/* ptr to item right of insertion point */
12790Sstevel@tonic-gate 	oversize_t *pv;  	/* ptr to item left of insertion point */
12800Sstevel@tonic-gate 	uint_t size_lp, size_pv, size_nx;
12810Sstevel@tonic-gate 	uintptr_t endp_lp, endp_pv, endp_nx;
12820Sstevel@tonic-gate 
12830Sstevel@tonic-gate 	/*
12840Sstevel@tonic-gate 	 * Locate insertion point in address-ordered list
12850Sstevel@tonic-gate 	 */
12860Sstevel@tonic-gate 
12870Sstevel@tonic-gate 	for (nx = oversize_list.next_byaddr;
12880Sstevel@tonic-gate 	    nx != &oversize_list && (lp->addr > nx->addr);
12890Sstevel@tonic-gate 	    nx = nx->next_byaddr)
12900Sstevel@tonic-gate 		;
12910Sstevel@tonic-gate 
12920Sstevel@tonic-gate 	/*
12930Sstevel@tonic-gate 	 * Determine how to add chunk to oversize freelist
12940Sstevel@tonic-gate 	 */
12950Sstevel@tonic-gate 
12960Sstevel@tonic-gate 	size_lp = OVSZ_HEADER_SIZE + lp->size;
12970Sstevel@tonic-gate 	endp_lp = ALIGN((uintptr_t)lp + size_lp, MTMALLOC_MIN_ALIGN);
12980Sstevel@tonic-gate 	size_lp = endp_lp - (uintptr_t)lp;
12990Sstevel@tonic-gate 
13000Sstevel@tonic-gate 	pv = nx->prev_byaddr;
13010Sstevel@tonic-gate 
13020Sstevel@tonic-gate 	if (pv->size) {
13030Sstevel@tonic-gate 
13040Sstevel@tonic-gate 		size_pv = OVSZ_HEADER_SIZE + pv->size;
13050Sstevel@tonic-gate 		endp_pv = ALIGN((uintptr_t)pv + size_pv,
13060Sstevel@tonic-gate 		    MTMALLOC_MIN_ALIGN);
13070Sstevel@tonic-gate 		size_pv = endp_pv - (uintptr_t)pv;
13080Sstevel@tonic-gate 
13090Sstevel@tonic-gate 		/* Check for adjacency with left chunk */
13100Sstevel@tonic-gate 		if ((uintptr_t)lp == endp_pv)
13110Sstevel@tonic-gate 			merge_flags |= COALESCE_LEFT;
13120Sstevel@tonic-gate 	}
13130Sstevel@tonic-gate 
13140Sstevel@tonic-gate 	if (nx->size) {
13150Sstevel@tonic-gate 
13160Sstevel@tonic-gate 	    /* Check for adjacency with right chunk */
13170Sstevel@tonic-gate 	    if ((uintptr_t)nx == endp_lp) {
13180Sstevel@tonic-gate 		size_nx = OVSZ_HEADER_SIZE + nx->size;
13190Sstevel@tonic-gate 		endp_nx = ALIGN((uintptr_t)nx + size_nx,
13200Sstevel@tonic-gate 		    MTMALLOC_MIN_ALIGN);
13210Sstevel@tonic-gate 		size_nx = endp_nx - (uintptr_t)nx;
13220Sstevel@tonic-gate 		merge_flags |= COALESCE_RIGHT;
13230Sstevel@tonic-gate 	    }
13240Sstevel@tonic-gate 	}
13250Sstevel@tonic-gate 
13260Sstevel@tonic-gate 	/*
13270Sstevel@tonic-gate 	 * If MTDEBUGPATTERN==1, lp->addr will have been overwritten with
13280Sstevel@tonic-gate 	 * FREEPATTERN for lp->size bytes. If we can merge, the oversize
13290Sstevel@tonic-gate 	 * header(s) that will also become part of the memory available for
13300Sstevel@tonic-gate 	 * reallocation (ie lp and/or nx) must also be overwritten with
13310Sstevel@tonic-gate 	 * FREEPATTERN or we will SIGABRT when this memory is next reallocated.
13320Sstevel@tonic-gate 	 */
13330Sstevel@tonic-gate 	switch (merge_flags) {
13340Sstevel@tonic-gate 
13350Sstevel@tonic-gate 	case INSERT_ONLY:		/* Coalescing not possible */
13360Sstevel@tonic-gate 		insert_oversize(lp, nx);
13370Sstevel@tonic-gate 		break;
13380Sstevel@tonic-gate 	case COALESCE_LEFT:
13390Sstevel@tonic-gate 		pv->size += size_lp;
13400Sstevel@tonic-gate 		position_oversize_by_size(pv);
13410Sstevel@tonic-gate 		if (debugopt & MTDEBUGPATTERN)
13420Sstevel@tonic-gate 			copy_pattern(FREEPATTERN, lp, OVSZ_HEADER_SIZE);
13430Sstevel@tonic-gate 		break;
13440Sstevel@tonic-gate 	case COALESCE_RIGHT:
13450Sstevel@tonic-gate 		unlink_oversize(nx);
13460Sstevel@tonic-gate 		lp->size += size_nx;
13470Sstevel@tonic-gate 		insert_oversize(lp, pv->next_byaddr);
13480Sstevel@tonic-gate 		if (debugopt & MTDEBUGPATTERN)
13490Sstevel@tonic-gate 			copy_pattern(FREEPATTERN, nx, OVSZ_HEADER_SIZE);
13500Sstevel@tonic-gate 		break;
13510Sstevel@tonic-gate 	case COALESCE_WITH_BOTH_SIDES:	/* Merge (with right) to the left */
13520Sstevel@tonic-gate 		pv->size += size_lp + size_nx;
13530Sstevel@tonic-gate 		unlink_oversize(nx);
13540Sstevel@tonic-gate 		position_oversize_by_size(pv);
13550Sstevel@tonic-gate 		if (debugopt & MTDEBUGPATTERN) {
13560Sstevel@tonic-gate 			copy_pattern(FREEPATTERN, lp, OVSZ_HEADER_SIZE);
13570Sstevel@tonic-gate 			copy_pattern(FREEPATTERN, nx, OVSZ_HEADER_SIZE);
13580Sstevel@tonic-gate 		}
13590Sstevel@tonic-gate 		break;
13600Sstevel@tonic-gate 	}
13610Sstevel@tonic-gate }
13620Sstevel@tonic-gate 
13630Sstevel@tonic-gate /*
13640Sstevel@tonic-gate  * Find memory on our list that is at least size big. If we find a block that is
13650Sstevel@tonic-gate  * big enough, we break it up and return the associated oversize_t struct back
13660Sstevel@tonic-gate  * to the calling client. Any leftover piece of that block is returned to the
13670Sstevel@tonic-gate  * freelist.
13680Sstevel@tonic-gate  */
13690Sstevel@tonic-gate static oversize_t *
13700Sstevel@tonic-gate find_oversize(size_t size)
13710Sstevel@tonic-gate {
13720Sstevel@tonic-gate 	oversize_t *wp = oversize_list.next_bysize;
13730Sstevel@tonic-gate 	while (wp != &oversize_list && size > wp->size)
13740Sstevel@tonic-gate 		wp = wp->next_bysize;
13750Sstevel@tonic-gate 
13760Sstevel@tonic-gate 	if (wp == &oversize_list) /* empty list or nothing big enough */
13770Sstevel@tonic-gate 		return (NULL);
13780Sstevel@tonic-gate 	/* breaking up a chunk of memory */
13790Sstevel@tonic-gate 	if ((long)((wp->size - (size + OVSZ_HEADER_SIZE + MTMALLOC_MIN_ALIGN)))
13800Sstevel@tonic-gate 	    > MAX_CACHED) {
13810Sstevel@tonic-gate 		caddr_t off;
13820Sstevel@tonic-gate 		oversize_t *np;
13830Sstevel@tonic-gate 		size_t osize;
13840Sstevel@tonic-gate 		off = (caddr_t)ALIGN(wp->addr + size,
13850Sstevel@tonic-gate 		    MTMALLOC_MIN_ALIGN);
13860Sstevel@tonic-gate 		osize = wp->size;
13870Sstevel@tonic-gate 		wp->size = (size_t)(off - wp->addr);
13880Sstevel@tonic-gate 		np = oversize_header_alloc((uintptr_t)off,
13890Sstevel@tonic-gate 		    osize - (wp->size + OVSZ_HEADER_SIZE));
13900Sstevel@tonic-gate 		if ((long)np->size < 0)
13910Sstevel@tonic-gate 			abort();
13920Sstevel@tonic-gate 		unlink_oversize(wp);
13930Sstevel@tonic-gate 		add_oversize(np);
13940Sstevel@tonic-gate 	} else {
13950Sstevel@tonic-gate 		unlink_oversize(wp);
13960Sstevel@tonic-gate 	}
13970Sstevel@tonic-gate 	return (wp);
13980Sstevel@tonic-gate }
13990Sstevel@tonic-gate 
14000Sstevel@tonic-gate static void
14010Sstevel@tonic-gate copy_pattern(uint32_t pattern, void *buf_arg, size_t size)
14020Sstevel@tonic-gate {
14030Sstevel@tonic-gate 	uint32_t *bufend = (uint32_t *)((char *)buf_arg + size);
14040Sstevel@tonic-gate 	uint32_t *buf = buf_arg;
14050Sstevel@tonic-gate 
14060Sstevel@tonic-gate 	while (buf < bufend - 3) {
14070Sstevel@tonic-gate 		buf[3] = buf[2] = buf[1] = buf[0] = pattern;
14080Sstevel@tonic-gate 		buf += 4;
14090Sstevel@tonic-gate 	}
14100Sstevel@tonic-gate 	while (buf < bufend)
14110Sstevel@tonic-gate 		*buf++ = pattern;
14120Sstevel@tonic-gate }
14130Sstevel@tonic-gate 
14140Sstevel@tonic-gate static void *
14150Sstevel@tonic-gate verify_pattern(uint32_t pattern, void *buf_arg, size_t size)
14160Sstevel@tonic-gate {
14170Sstevel@tonic-gate 	uint32_t *bufend = (uint32_t *)((char *)buf_arg + size);
14180Sstevel@tonic-gate 	uint32_t *buf;
14190Sstevel@tonic-gate 
14200Sstevel@tonic-gate 	for (buf = buf_arg; buf < bufend; buf++)
14210Sstevel@tonic-gate 		if (*buf != pattern)
14220Sstevel@tonic-gate 			return (buf);
14230Sstevel@tonic-gate 	return (NULL);
14240Sstevel@tonic-gate }
14250Sstevel@tonic-gate 
14260Sstevel@tonic-gate static void
14270Sstevel@tonic-gate free_oversize(oversize_t *ovp)
14280Sstevel@tonic-gate {
14290Sstevel@tonic-gate 	assert(((uintptr_t)ovp->addr & 7) == 0); /* are we 8 byte aligned */
14300Sstevel@tonic-gate 	assert(ovp->size > MAX_CACHED);
14310Sstevel@tonic-gate 
14320Sstevel@tonic-gate 	ovp->next_bysize = ovp->prev_bysize = NULL;
14330Sstevel@tonic-gate 	ovp->next_byaddr = ovp->prev_byaddr = NULL;
14340Sstevel@tonic-gate 	(void) mutex_lock(&oversize_lock);
14350Sstevel@tonic-gate 	add_oversize(ovp);
14360Sstevel@tonic-gate 	(void) mutex_unlock(&oversize_lock);
14370Sstevel@tonic-gate }
14380Sstevel@tonic-gate 
14390Sstevel@tonic-gate static oversize_t *
14400Sstevel@tonic-gate oversize_header_alloc(uintptr_t mem, size_t size)
14410Sstevel@tonic-gate {
14420Sstevel@tonic-gate 	oversize_t *ovsz_hdr;
14430Sstevel@tonic-gate 
14440Sstevel@tonic-gate 	assert(size > MAX_CACHED);
14450Sstevel@tonic-gate 
14460Sstevel@tonic-gate 	ovsz_hdr = (oversize_t *)mem;
14470Sstevel@tonic-gate 	ovsz_hdr->prev_bysize = NULL;
14480Sstevel@tonic-gate 	ovsz_hdr->next_bysize = NULL;
14490Sstevel@tonic-gate 	ovsz_hdr->prev_byaddr = NULL;
14500Sstevel@tonic-gate 	ovsz_hdr->next_byaddr = NULL;
14510Sstevel@tonic-gate 	ovsz_hdr->hash_next = NULL;
14520Sstevel@tonic-gate 	ovsz_hdr->size = size;
14530Sstevel@tonic-gate 	mem += OVSZ_SIZE;
14540Sstevel@tonic-gate 	*(uintptr_t *)mem = MTMALLOC_OVERSIZE_MAGIC;
14550Sstevel@tonic-gate 	mem += OVERHEAD;
14560Sstevel@tonic-gate 	assert(((uintptr_t)mem & 7) == 0); /* are we 8 byte aligned */
14570Sstevel@tonic-gate 	ovsz_hdr->addr = (caddr_t)mem;
14580Sstevel@tonic-gate 	return (ovsz_hdr);
14590Sstevel@tonic-gate }
14603866Sraf 
14613866Sraf static void
14623866Sraf malloc_prepare()
14633866Sraf {
14643866Sraf 	percpu_t *cpuptr;
14653866Sraf 	cache_head_t *cachehead;
14663866Sraf 	cache_t *thiscache;
14673866Sraf 
14683866Sraf 	(void) mutex_lock(&oversize_lock);
14693866Sraf 	for (cpuptr = &cpu_list[0]; cpuptr < &cpu_list[ncpus]; cpuptr++) {
14703866Sraf 		(void) mutex_lock(&cpuptr->mt_parent_lock);
14713866Sraf 		for (cachehead = &cpuptr->mt_caches[0];
14723866Sraf 		    cachehead < &cpuptr->mt_caches[NUM_CACHES];
14733866Sraf 		    cachehead++) {
14743866Sraf 			for (thiscache = cachehead->mt_cache;
14753866Sraf 			    thiscache != NULL;
14763866Sraf 			    thiscache = thiscache->mt_next) {
14773866Sraf 				(void) mutex_lock(
14783866Sraf 				    &thiscache->mt_cache_lock);
14793866Sraf 			}
14803866Sraf 		}
14813866Sraf 	}
14823866Sraf }
14833866Sraf 
14843866Sraf static void
14853866Sraf malloc_release()
14863866Sraf {
14873866Sraf 	percpu_t *cpuptr;
14883866Sraf 	cache_head_t *cachehead;
14893866Sraf 	cache_t *thiscache;
14903866Sraf 
14913866Sraf 	for (cpuptr = &cpu_list[ncpus - 1]; cpuptr >= &cpu_list[0]; cpuptr--) {
14923866Sraf 		for (cachehead = &cpuptr->mt_caches[NUM_CACHES - 1];
14933866Sraf 		    cachehead >= &cpuptr->mt_caches[0];
14943866Sraf 		    cachehead--) {
14953866Sraf 			for (thiscache = cachehead->mt_cache;
14963866Sraf 			    thiscache != NULL;
14973866Sraf 			    thiscache = thiscache->mt_next) {
14983866Sraf 				(void) mutex_unlock(
14993866Sraf 				    &thiscache->mt_cache_lock);
15003866Sraf 			}
15013866Sraf 		}
15023866Sraf 		(void) mutex_unlock(&cpuptr->mt_parent_lock);
15033866Sraf 	}
15043866Sraf 	(void) mutex_unlock(&oversize_lock);
15053866Sraf }
15063866Sraf 
15073866Sraf #pragma init(malloc_init)
15083866Sraf static void
15093866Sraf malloc_init(void)
15103866Sraf {
15113866Sraf 	/*
15123866Sraf 	 * This works in the init section for this library
15133866Sraf 	 * because setup_caches() doesn't call anything in libc
15143866Sraf 	 * that calls malloc().  If it did, disaster would ensue.
15153866Sraf 	 *
15163866Sraf 	 * For this to work properly, this library must be the first
15173866Sraf 	 * one to have its init section called (after libc) by the
15183866Sraf 	 * dynamic linker.  If some other library's init section
15193866Sraf 	 * ran first and called malloc(), disaster would ensue.
15203866Sraf 	 * Because this is an interposer library for malloc(), the
15213866Sraf 	 * dynamic linker arranges for its init section to run first.
15223866Sraf 	 */
15233866Sraf 	(void) setup_caches();
15243866Sraf 
15253866Sraf 	(void) pthread_atfork(malloc_prepare, malloc_release, malloc_release);
15263866Sraf }
1527