10Sstevel@tonic-gate /*
20Sstevel@tonic-gate  * CDDL HEADER START
30Sstevel@tonic-gate  *
40Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
50Sstevel@tonic-gate  * Common Development and Distribution License, Version 1.0 only
60Sstevel@tonic-gate  * (the "License").  You may not use this file except in compliance
70Sstevel@tonic-gate  * with the License.
80Sstevel@tonic-gate  *
90Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
100Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
110Sstevel@tonic-gate  * See the License for the specific language governing permissions
120Sstevel@tonic-gate  * and limitations under the License.
130Sstevel@tonic-gate  *
140Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
150Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
160Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
170Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
180Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
190Sstevel@tonic-gate  *
200Sstevel@tonic-gate  * CDDL HEADER END
210Sstevel@tonic-gate  */
220Sstevel@tonic-gate /*
23*1412Srm88369  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
240Sstevel@tonic-gate  * Use is subject to license terms.
250Sstevel@tonic-gate  */
260Sstevel@tonic-gate 
270Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
280Sstevel@tonic-gate 
290Sstevel@tonic-gate #include <mtmalloc.h>
300Sstevel@tonic-gate #include "mtmalloc_impl.h"
310Sstevel@tonic-gate #include <unistd.h>
320Sstevel@tonic-gate #include <synch.h>
330Sstevel@tonic-gate #include <thread.h>
340Sstevel@tonic-gate #include <stdio.h>
350Sstevel@tonic-gate #include <limits.h>
360Sstevel@tonic-gate #include <errno.h>
370Sstevel@tonic-gate #include <string.h>
380Sstevel@tonic-gate #include <strings.h>
390Sstevel@tonic-gate #include <sys/param.h>
400Sstevel@tonic-gate #include <sys/sysmacros.h>
410Sstevel@tonic-gate 
420Sstevel@tonic-gate /*
430Sstevel@tonic-gate  * To turn on the asserts just compile -DDEBUG
440Sstevel@tonic-gate  */
450Sstevel@tonic-gate 
460Sstevel@tonic-gate #ifndef	DEBUG
470Sstevel@tonic-gate #define	NDEBUG
480Sstevel@tonic-gate #endif
490Sstevel@tonic-gate 
500Sstevel@tonic-gate #include <assert.h>
510Sstevel@tonic-gate 
520Sstevel@tonic-gate /*
530Sstevel@tonic-gate  * The MT hot malloc implementation contained herein is designed to be
540Sstevel@tonic-gate  * plug-compatible with the libc version of malloc. It is not intended
550Sstevel@tonic-gate  * to replace that implementation until we decide that it is ok to break
560Sstevel@tonic-gate  * customer apps (Solaris 3.0).
570Sstevel@tonic-gate  *
580Sstevel@tonic-gate  * For requests up to 2^^16, the allocator initializes itself into NCPUS
590Sstevel@tonic-gate  * worth of chains of caches. When a memory request is made, the calling thread
600Sstevel@tonic-gate  * is vectored into one of NCPUS worth of caches.  The LWP id gives us a cheap,
610Sstevel@tonic-gate  * contention-reducing index to use, eventually, this should be replaced with
620Sstevel@tonic-gate  * the actual CPU sequence number, when an interface to get it is available.
630Sstevel@tonic-gate  *
640Sstevel@tonic-gate  * Once the thread is vectored into one of the list of caches the real
650Sstevel@tonic-gate  * allocation of the memory begins. The size is determined to figure out which
660Sstevel@tonic-gate  * bucket the allocation should be satisfied from. The management of free
670Sstevel@tonic-gate  * buckets is done via a bitmask. A free bucket is represented by a 1. The
680Sstevel@tonic-gate  * first free bit represents the first free bucket. The position of the bit,
690Sstevel@tonic-gate  * represents the position of the bucket in the arena.
700Sstevel@tonic-gate  *
710Sstevel@tonic-gate  * When the memory from the arena is handed out, the address of the cache
720Sstevel@tonic-gate  * control structure is written in the word preceeding the returned memory.
730Sstevel@tonic-gate  * This cache control address is used during free() to mark the buffer free
740Sstevel@tonic-gate  * in the cache control structure.
750Sstevel@tonic-gate  *
760Sstevel@tonic-gate  * When all available memory in a cache has been depleted, a new chunk of memory
770Sstevel@tonic-gate  * is allocated via sbrk(). The new cache is allocated from this chunk of memory
780Sstevel@tonic-gate  * and initialized in the function create_cache(). New caches are installed at
790Sstevel@tonic-gate  * the front of a singly linked list of the same size memory pools. This helps
800Sstevel@tonic-gate  * to ensure that there will tend to be available memory in the beginning of the
810Sstevel@tonic-gate  * list.
820Sstevel@tonic-gate  *
830Sstevel@tonic-gate  * Long linked lists hurt performance. To decrease this effect, there is a
840Sstevel@tonic-gate  * tunable, requestsize, that bumps up the sbrk allocation size and thus
850Sstevel@tonic-gate  * increases the number of available blocks within an arena.  We also keep
860Sstevel@tonic-gate  * a "hint" for each cache list, which is the last cache in the list allocated
870Sstevel@tonic-gate  * from.  This lowers the cost of searching if there are a lot of fully
880Sstevel@tonic-gate  * allocated blocks at the front of the list.
890Sstevel@tonic-gate  *
900Sstevel@tonic-gate  * For requests greater than 2^^16 (oversize allocations), there are two pieces
910Sstevel@tonic-gate  * of overhead. There is the OVERHEAD used to hold the cache addr
920Sstevel@tonic-gate  * (&oversize_list), plus an oversize_t structure to further describe the block.
930Sstevel@tonic-gate  *
940Sstevel@tonic-gate  * The oversize list is kept as defragmented as possible by coalescing
950Sstevel@tonic-gate  * freed oversized allocations with adjacent neighbors.
960Sstevel@tonic-gate  *
970Sstevel@tonic-gate  * Addresses handed out are stored in a hash table, and are aligned on
980Sstevel@tonic-gate  * MTMALLOC_MIN_ALIGN-byte boundaries at both ends. Request sizes are rounded-up
990Sstevel@tonic-gate  * where necessary in order to achieve this. This eases the implementation of
1000Sstevel@tonic-gate  * MTDEBUGPATTERN and MTINITPATTERN, particularly where coalescing occurs.
1010Sstevel@tonic-gate  *
1020Sstevel@tonic-gate  * A memalign allocation takes memalign header overhead.  There's two
1030Sstevel@tonic-gate  * types of memalign headers distinguished by MTMALLOC_MEMALIGN_MAGIC
1040Sstevel@tonic-gate  * and MTMALLOC_MEMALIGN_MIN_MAGIC.  When the size of memory taken to
1050Sstevel@tonic-gate  * get to the aligned address from malloc'ed address is the minimum size
1060Sstevel@tonic-gate  * OVERHEAD, we create a header taking only one OVERHEAD space with magic
1070Sstevel@tonic-gate  * number MTMALLOC_MEMALIGN_MIN_MAGIC, and we know by subtracting OVERHEAD
1080Sstevel@tonic-gate  * from memaligned address, we can get to the malloc'ed address. Otherwise,
1090Sstevel@tonic-gate  * we create a memalign header taking two OVERHEAD space, one stores
1100Sstevel@tonic-gate  * MTMALLOC_MEMALIGN_MAGIC magic number, the other one points back to the
1110Sstevel@tonic-gate  * malloc'ed address.
1120Sstevel@tonic-gate  */
1130Sstevel@tonic-gate 
1140Sstevel@tonic-gate #if defined(__i386) || defined(__amd64)
1150Sstevel@tonic-gate #include <arpa/inet.h>	/* for htonl() */
1160Sstevel@tonic-gate #endif
1170Sstevel@tonic-gate 
1180Sstevel@tonic-gate static void * morecore(size_t);
1190Sstevel@tonic-gate static int setup_caches(void);
1200Sstevel@tonic-gate static void create_cache(cache_t *, size_t bufsize, uint_t hunks);
1210Sstevel@tonic-gate static void * malloc_internal(size_t, percpu_t *);
1220Sstevel@tonic-gate static void * oversize(size_t);
1230Sstevel@tonic-gate static oversize_t *find_oversize(size_t);
1240Sstevel@tonic-gate static void add_oversize(oversize_t *);
1250Sstevel@tonic-gate static void copy_pattern(uint32_t, void *, size_t);
1260Sstevel@tonic-gate static void * verify_pattern(uint32_t, void *, size_t);
1270Sstevel@tonic-gate static void reinit_cpu_list(void);
1280Sstevel@tonic-gate static void reinit_cache(cache_t *);
1290Sstevel@tonic-gate static void free_oversize(oversize_t *);
1300Sstevel@tonic-gate static oversize_t *oversize_header_alloc(uintptr_t, size_t);
1310Sstevel@tonic-gate 
1320Sstevel@tonic-gate /*
1330Sstevel@tonic-gate  * oversize hash table stuff
1340Sstevel@tonic-gate  */
1350Sstevel@tonic-gate #define	NUM_BUCKETS	67	/* must be prime */
1360Sstevel@tonic-gate #define	HASH_OVERSIZE(caddr)	((uintptr_t)(caddr) % NUM_BUCKETS)
1370Sstevel@tonic-gate oversize_t *ovsz_hashtab[NUM_BUCKETS];
1380Sstevel@tonic-gate 
1390Sstevel@tonic-gate /*
1400Sstevel@tonic-gate  * Gets a decent "current cpu identifier", to be used to reduce contention.
1410Sstevel@tonic-gate  * Eventually, this should be replaced by an interface to get the actual
1420Sstevel@tonic-gate  * CPU sequence number in libthread/liblwp.
1430Sstevel@tonic-gate  */
1440Sstevel@tonic-gate extern uint_t _thr_self();
1450Sstevel@tonic-gate #pragma weak _thr_self
1460Sstevel@tonic-gate #define	get_curcpu_func() (curcpu_func)_thr_self
1470Sstevel@tonic-gate 
1480Sstevel@tonic-gate #define	ALIGN(x, a)	((((uintptr_t)(x) + ((uintptr_t)(a) - 1)) \
1490Sstevel@tonic-gate 			& ~((uintptr_t)(a) - 1)))
1500Sstevel@tonic-gate 
1510Sstevel@tonic-gate /* need this to deal with little endianess of x86 */
1520Sstevel@tonic-gate #if defined(__i386) || defined(__amd64)
1530Sstevel@tonic-gate #define	FLIP_EM(x)	htonl((x))
1540Sstevel@tonic-gate #else
1550Sstevel@tonic-gate #define	FLIP_EM(x)	(x)
1560Sstevel@tonic-gate #endif
1570Sstevel@tonic-gate 
1580Sstevel@tonic-gate #define	INSERT_ONLY			0
1590Sstevel@tonic-gate #define	COALESCE_LEFT			0x00000001
1600Sstevel@tonic-gate #define	COALESCE_RIGHT			0x00000002
1610Sstevel@tonic-gate #define	COALESCE_WITH_BOTH_SIDES	(COALESCE_LEFT | COALESCE_RIGHT)
1620Sstevel@tonic-gate 
1630Sstevel@tonic-gate #define	OVERHEAD	8	/* size needed to write cache addr */
1640Sstevel@tonic-gate #define	HUNKSIZE	8192	/* just a multiplier */
1650Sstevel@tonic-gate 
1660Sstevel@tonic-gate #define	MAX_CACHED_SHIFT	16	/* 64K is the max cached size */
1670Sstevel@tonic-gate #define	MAX_CACHED		(1 << MAX_CACHED_SHIFT)
1680Sstevel@tonic-gate #define	MIN_CACHED_SHIFT	4	/* smaller requests rounded up */
1690Sstevel@tonic-gate #define	MTMALLOC_MIN_ALIGN	8	/* min guaranteed alignment */
1700Sstevel@tonic-gate 
171*1412Srm88369 /* maximum size before overflow */
172*1412Srm88369 #define	MAX_MTMALLOC	(SIZE_MAX - (SIZE_MAX % MTMALLOC_MIN_ALIGN) \
173*1412Srm88369 			- OVSZ_HEADER_SIZE)
174*1412Srm88369 
1750Sstevel@tonic-gate #define	NUM_CACHES	(MAX_CACHED_SHIFT - MIN_CACHED_SHIFT + 1)
1760Sstevel@tonic-gate #define	CACHELIST_SIZE	ALIGN(NUM_CACHES * sizeof (cache_head_t), \
1770Sstevel@tonic-gate     CACHE_COHERENCY_UNIT)
1780Sstevel@tonic-gate 
1790Sstevel@tonic-gate #define	MINSIZE		9	/* for requestsize, tunable */
1800Sstevel@tonic-gate #define	MAXSIZE		256	/* arbitrary, big enough, for requestsize */
1810Sstevel@tonic-gate 
1820Sstevel@tonic-gate #define	FREEPATTERN	0xdeadbeef /* debug fill pattern for free buf */
1830Sstevel@tonic-gate #define	INITPATTERN	0xbaddcafe /* debug fill pattern for new buf */
1840Sstevel@tonic-gate 
1850Sstevel@tonic-gate #define	misaligned(p)	((unsigned)(p) & (sizeof (int) - 1))
1860Sstevel@tonic-gate #define	IS_OVERSIZE(x, y)	(((x) < (y)) && (((x) > MAX_CACHED)? 1 : 0))
1870Sstevel@tonic-gate 
1880Sstevel@tonic-gate static long requestsize = MINSIZE; /* 9 pages per cache; tunable; 9 is min */
1890Sstevel@tonic-gate 
1900Sstevel@tonic-gate static uint_t cpu_mask;
1910Sstevel@tonic-gate static curcpu_func curcpu;
1920Sstevel@tonic-gate 
1930Sstevel@tonic-gate static int32_t debugopt;
1940Sstevel@tonic-gate static int32_t reinit;
1950Sstevel@tonic-gate 
1960Sstevel@tonic-gate static percpu_t *cpu_list;
1970Sstevel@tonic-gate static oversize_t oversize_list;
1980Sstevel@tonic-gate static mutex_t oversize_lock;
1990Sstevel@tonic-gate 
2000Sstevel@tonic-gate static int ncpus;
2010Sstevel@tonic-gate 
2020Sstevel@tonic-gate #define	MTMALLOC_OVERSIZE_MAGIC		((uintptr_t)&oversize_list)
2030Sstevel@tonic-gate #define	MTMALLOC_MEMALIGN_MAGIC		((uintptr_t)&oversize_list + 1)
2040Sstevel@tonic-gate #define	MTMALLOC_MEMALIGN_MIN_MAGIC	((uintptr_t)&oversize_list + 2)
2050Sstevel@tonic-gate 
2060Sstevel@tonic-gate /*
2070Sstevel@tonic-gate  * We require allocations handed out to be aligned on MTMALLOC_MIN_ALIGN-byte
2080Sstevel@tonic-gate  * boundaries. We round up sizeof (oversize_t) (when necessary) to ensure that
2090Sstevel@tonic-gate  * this is achieved.
2100Sstevel@tonic-gate  */
2110Sstevel@tonic-gate #define	OVSZ_SIZE		(ALIGN(sizeof (oversize_t), MTMALLOC_MIN_ALIGN))
2120Sstevel@tonic-gate #define	OVSZ_HEADER_SIZE	(OVSZ_SIZE + OVERHEAD)
2130Sstevel@tonic-gate 
2140Sstevel@tonic-gate /*
2150Sstevel@tonic-gate  * memalign header takes 2 OVERHEAD space.  One for memalign magic, and the
2160Sstevel@tonic-gate  * other one points back to the start address of originally allocated space.
2170Sstevel@tonic-gate  */
2180Sstevel@tonic-gate #define	MEMALIGN_HEADER_SIZE	2 * OVERHEAD
2190Sstevel@tonic-gate #define	MEMALIGN_HEADER_ALLOC(x, shift, malloc_addr)\
2200Sstevel@tonic-gate 	if (shift == OVERHEAD)\
2210Sstevel@tonic-gate 		*((uintptr_t *)((caddr_t)x - OVERHEAD)) = \
2220Sstevel@tonic-gate 			MTMALLOC_MEMALIGN_MIN_MAGIC; \
2230Sstevel@tonic-gate 	else {\
2240Sstevel@tonic-gate 		*((uintptr_t *)((caddr_t)x - OVERHEAD)) = \
2250Sstevel@tonic-gate 			MTMALLOC_MEMALIGN_MAGIC; \
2260Sstevel@tonic-gate 		*((uintptr_t *)((caddr_t)x - 2 * OVERHEAD)) = \
2270Sstevel@tonic-gate 			(uintptr_t)malloc_addr; \
2280Sstevel@tonic-gate 	}
2290Sstevel@tonic-gate 
2300Sstevel@tonic-gate void *
2310Sstevel@tonic-gate malloc(size_t bytes)
2320Sstevel@tonic-gate {
2330Sstevel@tonic-gate 	percpu_t *list_rotor;
2340Sstevel@tonic-gate 	uint_t	list_index;
2350Sstevel@tonic-gate 
2360Sstevel@tonic-gate 	/*
2370Sstevel@tonic-gate 	 * this test is due to linking with libthread.
2380Sstevel@tonic-gate 	 * There are malloc calls prior to this library
2390Sstevel@tonic-gate 	 * being initialized.
2400Sstevel@tonic-gate 	 *
2410Sstevel@tonic-gate 	 * If setup_caches fails, we set ENOMEM and return NULL
2420Sstevel@tonic-gate 	 */
2430Sstevel@tonic-gate 	if (cpu_list == (percpu_t *)NULL) {
2440Sstevel@tonic-gate 		if (setup_caches() == 0) {
2450Sstevel@tonic-gate 			errno = ENOMEM;
2460Sstevel@tonic-gate 			return (NULL);
2470Sstevel@tonic-gate 		}
2480Sstevel@tonic-gate 	}
2490Sstevel@tonic-gate 
2500Sstevel@tonic-gate 	if (bytes > MAX_CACHED)
2510Sstevel@tonic-gate 		return (oversize(bytes));
2520Sstevel@tonic-gate 
2530Sstevel@tonic-gate 	list_index = (curcpu() & cpu_mask);
2540Sstevel@tonic-gate 
2550Sstevel@tonic-gate 	list_rotor = &cpu_list[list_index];
2560Sstevel@tonic-gate 
2570Sstevel@tonic-gate 	return (malloc_internal(bytes, list_rotor));
2580Sstevel@tonic-gate }
2590Sstevel@tonic-gate 
2600Sstevel@tonic-gate void *
2610Sstevel@tonic-gate realloc(void * ptr, size_t bytes)
2620Sstevel@tonic-gate {
2630Sstevel@tonic-gate 	void *new, *data_ptr;
2640Sstevel@tonic-gate 	cache_t *cacheptr;
2650Sstevel@tonic-gate 	caddr_t mem;
2660Sstevel@tonic-gate 	size_t shift = 0;
2670Sstevel@tonic-gate 
2680Sstevel@tonic-gate 	if (ptr == NULL)
2690Sstevel@tonic-gate 		return (malloc(bytes));
2700Sstevel@tonic-gate 
2710Sstevel@tonic-gate 	if (bytes == 0) {
2720Sstevel@tonic-gate 		free(ptr);
2730Sstevel@tonic-gate 		return (NULL);
2740Sstevel@tonic-gate 	}
2750Sstevel@tonic-gate 
2760Sstevel@tonic-gate 	data_ptr = ptr;
2770Sstevel@tonic-gate 	mem = (caddr_t)ptr - OVERHEAD;
2780Sstevel@tonic-gate 
2790Sstevel@tonic-gate 	new = malloc(bytes);
2800Sstevel@tonic-gate 
2810Sstevel@tonic-gate 	if (new == NULL)
2820Sstevel@tonic-gate 		return (NULL);
2830Sstevel@tonic-gate 
2840Sstevel@tonic-gate 	/*
2850Sstevel@tonic-gate 	 * If new == ptr, ptr has previously been freed. Passing a freed pointer
2860Sstevel@tonic-gate 	 * to realloc() is not allowed - unless the caller specifically states
2870Sstevel@tonic-gate 	 * otherwise, in which case we must avoid freeing ptr (ie new) before we
2880Sstevel@tonic-gate 	 * return new. There is (obviously) no requirement to memcpy() ptr to
2890Sstevel@tonic-gate 	 * new before we return.
2900Sstevel@tonic-gate 	 */
2910Sstevel@tonic-gate 	if (new == ptr) {
2920Sstevel@tonic-gate 		if (!(debugopt & MTDOUBLEFREE))
2930Sstevel@tonic-gate 			abort();
2940Sstevel@tonic-gate 		return (new);
2950Sstevel@tonic-gate 	}
2960Sstevel@tonic-gate 
2970Sstevel@tonic-gate 	if (*(uintptr_t *)mem == MTMALLOC_MEMALIGN_MAGIC) {
2980Sstevel@tonic-gate 		mem -= OVERHEAD;
2990Sstevel@tonic-gate 		ptr = (void *)*(uintptr_t *)mem;
3000Sstevel@tonic-gate 		mem = (caddr_t)ptr - OVERHEAD;
3010Sstevel@tonic-gate 		shift = (size_t)((uintptr_t)data_ptr - (uintptr_t)ptr);
3020Sstevel@tonic-gate 	} else if (*(uintptr_t *)mem == MTMALLOC_MEMALIGN_MIN_MAGIC) {
3030Sstevel@tonic-gate 		ptr = (void *) mem;
3040Sstevel@tonic-gate 		mem -= OVERHEAD;
3050Sstevel@tonic-gate 		shift = OVERHEAD;
3060Sstevel@tonic-gate 	}
3070Sstevel@tonic-gate 
3080Sstevel@tonic-gate 	if (*(uintptr_t *)mem == MTMALLOC_OVERSIZE_MAGIC) {
3090Sstevel@tonic-gate 		oversize_t *old;
3100Sstevel@tonic-gate 
3110Sstevel@tonic-gate 		old = (oversize_t *)(mem - OVSZ_SIZE);
3120Sstevel@tonic-gate 		(void) memcpy(new, data_ptr, MIN(bytes, old->size - shift));
3130Sstevel@tonic-gate 		free(ptr);
3140Sstevel@tonic-gate 		return (new);
3150Sstevel@tonic-gate 	}
3160Sstevel@tonic-gate 
3170Sstevel@tonic-gate 	cacheptr = (cache_t *)*(uintptr_t *)mem;
3180Sstevel@tonic-gate 
3190Sstevel@tonic-gate 	(void) memcpy(new, data_ptr,
3200Sstevel@tonic-gate 		MIN(cacheptr->mt_size - OVERHEAD - shift, bytes));
3210Sstevel@tonic-gate 	free(ptr);
3220Sstevel@tonic-gate 
3230Sstevel@tonic-gate 	return (new);
3240Sstevel@tonic-gate }
3250Sstevel@tonic-gate 
3260Sstevel@tonic-gate void *
3270Sstevel@tonic-gate calloc(size_t nelem, size_t bytes)
3280Sstevel@tonic-gate {
3290Sstevel@tonic-gate 	void * ptr;
3300Sstevel@tonic-gate 	size_t size = nelem * bytes;
3310Sstevel@tonic-gate 
3320Sstevel@tonic-gate 	ptr = malloc(size);
3330Sstevel@tonic-gate 	if (ptr == NULL)
3340Sstevel@tonic-gate 		return (NULL);
3350Sstevel@tonic-gate 	bzero(ptr, size);
3360Sstevel@tonic-gate 
3370Sstevel@tonic-gate 	return (ptr);
3380Sstevel@tonic-gate }
3390Sstevel@tonic-gate 
3400Sstevel@tonic-gate void
3410Sstevel@tonic-gate free(void * ptr)
3420Sstevel@tonic-gate {
3430Sstevel@tonic-gate 	cache_t *cacheptr;
3440Sstevel@tonic-gate 	caddr_t mem;
3450Sstevel@tonic-gate 	int32_t i;
3460Sstevel@tonic-gate 	caddr_t freeblocks;
3470Sstevel@tonic-gate 	uintptr_t offset;
3480Sstevel@tonic-gate 	uchar_t mask;
3490Sstevel@tonic-gate 	int32_t which_bit, num_bytes;
3500Sstevel@tonic-gate 
3510Sstevel@tonic-gate 	if (ptr == NULL)
3520Sstevel@tonic-gate 		return;
3530Sstevel@tonic-gate 
3540Sstevel@tonic-gate 	mem = (caddr_t)ptr - OVERHEAD;
3550Sstevel@tonic-gate 
3560Sstevel@tonic-gate 	if (*(uintptr_t *)mem == MTMALLOC_MEMALIGN_MAGIC) {
3570Sstevel@tonic-gate 		mem -= OVERHEAD;
3580Sstevel@tonic-gate 		ptr = (void *)*(uintptr_t *)mem;
3590Sstevel@tonic-gate 		mem = (caddr_t)ptr - OVERHEAD;
3600Sstevel@tonic-gate 	} else if (*(uintptr_t *)mem == MTMALLOC_MEMALIGN_MIN_MAGIC) {
3610Sstevel@tonic-gate 		ptr = (void *) mem;
3620Sstevel@tonic-gate 		mem -= OVERHEAD;
3630Sstevel@tonic-gate 	}
3640Sstevel@tonic-gate 
3650Sstevel@tonic-gate 	if (*(uintptr_t *)mem == MTMALLOC_OVERSIZE_MAGIC) {
3660Sstevel@tonic-gate 		oversize_t *big, **opp;
3670Sstevel@tonic-gate 		int bucket;
3680Sstevel@tonic-gate 
3690Sstevel@tonic-gate 		big = (oversize_t *)(mem - OVSZ_SIZE);
3700Sstevel@tonic-gate 		(void) mutex_lock(&oversize_lock);
3710Sstevel@tonic-gate 
3720Sstevel@tonic-gate 		bucket = HASH_OVERSIZE(big->addr);
3730Sstevel@tonic-gate 		for (opp = &ovsz_hashtab[bucket]; *opp != NULL;
3740Sstevel@tonic-gate 		    opp = &(*opp)->hash_next)
3750Sstevel@tonic-gate 			if (*opp == big)
3760Sstevel@tonic-gate 				break;
3770Sstevel@tonic-gate 
3780Sstevel@tonic-gate 		if (*opp == NULL) {
3790Sstevel@tonic-gate 			if (!(debugopt & MTDOUBLEFREE))
3800Sstevel@tonic-gate 				abort();
3810Sstevel@tonic-gate 			(void) mutex_unlock(&oversize_lock);
3820Sstevel@tonic-gate 			return;
3830Sstevel@tonic-gate 		}
3840Sstevel@tonic-gate 
3850Sstevel@tonic-gate 		*opp = big->hash_next;	/* remove big from the hash table */
3860Sstevel@tonic-gate 		big->hash_next = NULL;
3870Sstevel@tonic-gate 
3880Sstevel@tonic-gate 		if (debugopt & MTDEBUGPATTERN)
3890Sstevel@tonic-gate 			copy_pattern(FREEPATTERN, ptr, big->size);
3900Sstevel@tonic-gate 		add_oversize(big);
3910Sstevel@tonic-gate 		(void) mutex_unlock(&oversize_lock);
3920Sstevel@tonic-gate 		return;
3930Sstevel@tonic-gate 	}
3940Sstevel@tonic-gate 
3950Sstevel@tonic-gate 	cacheptr = (cache_t *)*(uintptr_t *)mem;
3960Sstevel@tonic-gate 	freeblocks = cacheptr->mt_freelist;
3970Sstevel@tonic-gate 
3980Sstevel@tonic-gate 	/*
3990Sstevel@tonic-gate 	 * This is the distance measured in bits into the arena.
4000Sstevel@tonic-gate 	 * The value of offset is in bytes but there is a 1-1 correlation
4010Sstevel@tonic-gate 	 * between distance into the arena and distance into the
4020Sstevel@tonic-gate 	 * freelist bitmask.
4030Sstevel@tonic-gate 	 */
4040Sstevel@tonic-gate 	offset = mem - cacheptr->mt_arena;
4050Sstevel@tonic-gate 
4060Sstevel@tonic-gate 	/*
4070Sstevel@tonic-gate 	 * i is total number of bits to offset into freelist bitmask.
4080Sstevel@tonic-gate 	 */
4090Sstevel@tonic-gate 
4100Sstevel@tonic-gate 	i = offset / cacheptr->mt_size;
4110Sstevel@tonic-gate 
4120Sstevel@tonic-gate 	num_bytes = i >> 3;
4130Sstevel@tonic-gate 
4140Sstevel@tonic-gate 	/*
4150Sstevel@tonic-gate 	 * which_bit is the bit offset into the byte in the freelist.
4160Sstevel@tonic-gate 	 * if our freelist bitmask looks like 0xf3 and we are freeing
4170Sstevel@tonic-gate 	 * block 5 (ie: the 6th block) our mask will be 0xf7 after
4180Sstevel@tonic-gate 	 * the free. Things go left to right that's why the mask is 0x80
4190Sstevel@tonic-gate 	 * and not 0x01.
4200Sstevel@tonic-gate 	 */
4210Sstevel@tonic-gate 	which_bit = i - (num_bytes << 3);
4220Sstevel@tonic-gate 
4230Sstevel@tonic-gate 	mask = 0x80 >> which_bit;
4240Sstevel@tonic-gate 
4250Sstevel@tonic-gate 	freeblocks += num_bytes;
4260Sstevel@tonic-gate 
4270Sstevel@tonic-gate 	if (debugopt & MTDEBUGPATTERN)
4280Sstevel@tonic-gate 		copy_pattern(FREEPATTERN, ptr, cacheptr->mt_size - OVERHEAD);
4290Sstevel@tonic-gate 
4300Sstevel@tonic-gate 	(void) mutex_lock(&cacheptr->mt_cache_lock);
4310Sstevel@tonic-gate 
4320Sstevel@tonic-gate 	if (*freeblocks & mask) {
4330Sstevel@tonic-gate 		if (!(debugopt & MTDOUBLEFREE))
4340Sstevel@tonic-gate 			abort();
4350Sstevel@tonic-gate 	} else {
4360Sstevel@tonic-gate 		*freeblocks |= mask;
4370Sstevel@tonic-gate 		cacheptr->mt_nfree++;
4380Sstevel@tonic-gate 	}
4390Sstevel@tonic-gate 
4400Sstevel@tonic-gate 	(void) mutex_unlock(&cacheptr->mt_cache_lock);
4410Sstevel@tonic-gate }
4420Sstevel@tonic-gate 
4430Sstevel@tonic-gate void *
4440Sstevel@tonic-gate memalign(size_t alignment, size_t size)
4450Sstevel@tonic-gate {
4460Sstevel@tonic-gate 	size_t alloc_size;
4470Sstevel@tonic-gate 	uintptr_t offset;
4480Sstevel@tonic-gate 	void *alloc_buf;
4490Sstevel@tonic-gate 	void *ret_buf;
4500Sstevel@tonic-gate 
4510Sstevel@tonic-gate 	if (size == 0 || alignment == 0 ||
4520Sstevel@tonic-gate 		misaligned(alignment) ||
4530Sstevel@tonic-gate 		(alignment & (alignment - 1)) != 0) {
4540Sstevel@tonic-gate 		errno = EINVAL;
4550Sstevel@tonic-gate 		return (NULL);
4560Sstevel@tonic-gate 	}
4570Sstevel@tonic-gate 
4580Sstevel@tonic-gate 	/* <= MTMALLOC_MIN_ALIGN, malloc can provide directly */
4590Sstevel@tonic-gate 	if (alignment <= MTMALLOC_MIN_ALIGN)
4600Sstevel@tonic-gate 		return (malloc(size));
4610Sstevel@tonic-gate 
4620Sstevel@tonic-gate 	alloc_size = size + alignment - MTMALLOC_MIN_ALIGN;
4630Sstevel@tonic-gate 
4640Sstevel@tonic-gate 	if (alloc_size < size) { /* overflow */
4650Sstevel@tonic-gate 		errno = ENOMEM;
4660Sstevel@tonic-gate 		return (NULL);
4670Sstevel@tonic-gate 	}
4680Sstevel@tonic-gate 
4690Sstevel@tonic-gate 	alloc_buf = malloc(alloc_size);
4700Sstevel@tonic-gate 
4710Sstevel@tonic-gate 	if (alloc_buf == NULL)
4720Sstevel@tonic-gate 		/* malloc sets errno */
4730Sstevel@tonic-gate 		return (NULL);
4740Sstevel@tonic-gate 
4750Sstevel@tonic-gate 	/*
4760Sstevel@tonic-gate 	 * If alloc_size > MAX_CACHED, malloc() will have returned a multiple of
4770Sstevel@tonic-gate 	 * MTMALLOC_MIN_ALIGN, having rounded-up alloc_size if necessary. Since
4780Sstevel@tonic-gate 	 * we will use alloc_size to return the excess fragments to the free
4790Sstevel@tonic-gate 	 * list, we also round-up alloc_size if necessary.
4800Sstevel@tonic-gate 	 */
4810Sstevel@tonic-gate 	if ((alloc_size > MAX_CACHED) &&
4820Sstevel@tonic-gate 	    (alloc_size & (MTMALLOC_MIN_ALIGN - 1)))
4830Sstevel@tonic-gate 		alloc_size = ALIGN(alloc_size, MTMALLOC_MIN_ALIGN);
4840Sstevel@tonic-gate 
4850Sstevel@tonic-gate 	if ((offset = (uintptr_t)alloc_buf & (alignment - 1)) == 0) {
4860Sstevel@tonic-gate 		/* aligned correctly */
4870Sstevel@tonic-gate 
4880Sstevel@tonic-gate 		size_t frag_size = alloc_size -
4890Sstevel@tonic-gate 			(size + MTMALLOC_MIN_ALIGN + OVSZ_HEADER_SIZE);
4900Sstevel@tonic-gate 
4910Sstevel@tonic-gate 		/*
4920Sstevel@tonic-gate 		 * If the leftover piece of the memory > MAX_CACHED,
4930Sstevel@tonic-gate 		 * split off the piece and return it back to the freelist.
4940Sstevel@tonic-gate 		 */
4950Sstevel@tonic-gate 		if (IS_OVERSIZE(frag_size, alloc_size)) {
4960Sstevel@tonic-gate 			oversize_t *orig, *tail;
4970Sstevel@tonic-gate 			uintptr_t taddr;
4980Sstevel@tonic-gate 			size_t data_size;
4990Sstevel@tonic-gate 			taddr = ALIGN((uintptr_t)alloc_buf + size,
5000Sstevel@tonic-gate 					MTMALLOC_MIN_ALIGN);
5010Sstevel@tonic-gate 			data_size = taddr - (uintptr_t)alloc_buf;
5020Sstevel@tonic-gate 			orig = (oversize_t *)((uintptr_t)alloc_buf -
5030Sstevel@tonic-gate 					OVSZ_HEADER_SIZE);
5040Sstevel@tonic-gate 			frag_size = orig->size - data_size -
5050Sstevel@tonic-gate 					OVSZ_HEADER_SIZE;
5060Sstevel@tonic-gate 			orig->size = data_size;
5070Sstevel@tonic-gate 			tail = oversize_header_alloc(taddr, frag_size);
5080Sstevel@tonic-gate 			free_oversize(tail);
5090Sstevel@tonic-gate 		}
5100Sstevel@tonic-gate 		ret_buf = alloc_buf;
5110Sstevel@tonic-gate 	} else {
5120Sstevel@tonic-gate 		uchar_t	oversize_bits = 0;
5130Sstevel@tonic-gate 		size_t	head_sz, data_sz, tail_sz;
5140Sstevel@tonic-gate 		uintptr_t ret_addr, taddr, shift, tshift;
5150Sstevel@tonic-gate 		oversize_t *orig, *tail;
5160Sstevel@tonic-gate 		size_t tsize;
5170Sstevel@tonic-gate 
5180Sstevel@tonic-gate 		/* needs to be aligned */
5190Sstevel@tonic-gate 		shift = alignment - offset;
5200Sstevel@tonic-gate 
5210Sstevel@tonic-gate 		assert(shift >= MTMALLOC_MIN_ALIGN);
5220Sstevel@tonic-gate 
5230Sstevel@tonic-gate 		ret_addr = ((uintptr_t)alloc_buf + shift);
5240Sstevel@tonic-gate 		ret_buf = (void *)ret_addr;
5250Sstevel@tonic-gate 
5260Sstevel@tonic-gate 		if (alloc_size <= MAX_CACHED) {
5270Sstevel@tonic-gate 			MEMALIGN_HEADER_ALLOC(ret_addr, shift, alloc_buf);
5280Sstevel@tonic-gate 			return (ret_buf);
5290Sstevel@tonic-gate 		}
5300Sstevel@tonic-gate 
5310Sstevel@tonic-gate 		/*
5320Sstevel@tonic-gate 		 * Only check for the fragments when the memory is allocted
5330Sstevel@tonic-gate 		 * from oversize_list.  Split off a fragment and return it
5340Sstevel@tonic-gate 		 * to the oversize freelist when it's > MAX_CACHED.
5350Sstevel@tonic-gate 		 */
5360Sstevel@tonic-gate 
5370Sstevel@tonic-gate 		head_sz = shift - MAX(MEMALIGN_HEADER_SIZE, OVSZ_HEADER_SIZE);
5380Sstevel@tonic-gate 
5390Sstevel@tonic-gate 		tail_sz = alloc_size -
5400Sstevel@tonic-gate 			(shift + size + MTMALLOC_MIN_ALIGN + OVSZ_HEADER_SIZE);
5410Sstevel@tonic-gate 
5420Sstevel@tonic-gate 		oversize_bits |= IS_OVERSIZE(head_sz, alloc_size) |
5430Sstevel@tonic-gate 				IS_OVERSIZE(size, alloc_size) << DATA_SHIFT |
5440Sstevel@tonic-gate 				IS_OVERSIZE(tail_sz, alloc_size) << TAIL_SHIFT;
5450Sstevel@tonic-gate 
5460Sstevel@tonic-gate 		switch (oversize_bits) {
5470Sstevel@tonic-gate 			case NONE_OVERSIZE:
5480Sstevel@tonic-gate 			case DATA_OVERSIZE:
5490Sstevel@tonic-gate 				MEMALIGN_HEADER_ALLOC(ret_addr, shift,
5500Sstevel@tonic-gate 					alloc_buf);
5510Sstevel@tonic-gate 				break;
5520Sstevel@tonic-gate 			case HEAD_OVERSIZE:
5530Sstevel@tonic-gate 				/*
5540Sstevel@tonic-gate 				 * If we can extend data > MAX_CACHED and have
5550Sstevel@tonic-gate 				 * head still > MAX_CACHED, we split head-end
5560Sstevel@tonic-gate 				 * as the case of head-end and data oversized,
5570Sstevel@tonic-gate 				 * otherwise just create memalign header.
5580Sstevel@tonic-gate 				 */
5590Sstevel@tonic-gate 				tsize = (shift + size) - (MAX_CACHED + 8 +
5600Sstevel@tonic-gate 					MTMALLOC_MIN_ALIGN + OVSZ_HEADER_SIZE);
5610Sstevel@tonic-gate 
5620Sstevel@tonic-gate 				if (!IS_OVERSIZE(tsize, alloc_size)) {
5630Sstevel@tonic-gate 					MEMALIGN_HEADER_ALLOC(ret_addr, shift,
5640Sstevel@tonic-gate 						alloc_buf);
5650Sstevel@tonic-gate 					break;
5660Sstevel@tonic-gate 				} else {
5670Sstevel@tonic-gate 					tsize += OVSZ_HEADER_SIZE;
5680Sstevel@tonic-gate 					taddr = ALIGN((uintptr_t)alloc_buf +
5690Sstevel@tonic-gate 						tsize, MTMALLOC_MIN_ALIGN);
5700Sstevel@tonic-gate 					tshift = ret_addr - taddr;
5710Sstevel@tonic-gate 					MEMALIGN_HEADER_ALLOC(ret_addr, tshift,
5720Sstevel@tonic-gate 						taddr);
5730Sstevel@tonic-gate 					ret_addr = taddr;
5740Sstevel@tonic-gate 					shift = ret_addr - (uintptr_t)alloc_buf;
5750Sstevel@tonic-gate 				}
5760Sstevel@tonic-gate 				/* FALLTHROUGH */
5770Sstevel@tonic-gate 			case HEAD_AND_DATA_OVERSIZE:
5780Sstevel@tonic-gate 				/*
5790Sstevel@tonic-gate 				 * Split off the head fragment and
5800Sstevel@tonic-gate 				 * return it back to oversize freelist.
5810Sstevel@tonic-gate 				 * Create oversize header for the piece
5820Sstevel@tonic-gate 				 * of (data + tail fragment).
5830Sstevel@tonic-gate 				 */
5840Sstevel@tonic-gate 				orig = (oversize_t *)((uintptr_t)alloc_buf -
5850Sstevel@tonic-gate 						OVSZ_HEADER_SIZE);
5860Sstevel@tonic-gate 				(void) oversize_header_alloc(ret_addr -
5870Sstevel@tonic-gate 						OVSZ_HEADER_SIZE,
5880Sstevel@tonic-gate 						(orig->size - shift));
5890Sstevel@tonic-gate 				orig->size = shift - OVSZ_HEADER_SIZE;
5900Sstevel@tonic-gate 
5910Sstevel@tonic-gate 				/* free up the head fragment */
5920Sstevel@tonic-gate 				free_oversize(orig);
5930Sstevel@tonic-gate 				break;
5940Sstevel@tonic-gate 			case TAIL_OVERSIZE:
5950Sstevel@tonic-gate 				/*
5960Sstevel@tonic-gate 				 * If we can extend data > MAX_CACHED and have
5970Sstevel@tonic-gate 				 * tail-end still > MAX_CACHED, we split tail
5980Sstevel@tonic-gate 				 * end, otherwise just create memalign header.
5990Sstevel@tonic-gate 				 */
6000Sstevel@tonic-gate 				orig = (oversize_t *)((uintptr_t)alloc_buf -
6010Sstevel@tonic-gate 						OVSZ_HEADER_SIZE);
6020Sstevel@tonic-gate 				tsize =  orig->size - (MAX_CACHED + 8 +
6030Sstevel@tonic-gate 					shift + OVSZ_HEADER_SIZE +
6040Sstevel@tonic-gate 					MTMALLOC_MIN_ALIGN);
6050Sstevel@tonic-gate 				if (!IS_OVERSIZE(tsize, alloc_size)) {
6060Sstevel@tonic-gate 					MEMALIGN_HEADER_ALLOC(ret_addr, shift,
6070Sstevel@tonic-gate 						alloc_buf);
6080Sstevel@tonic-gate 					break;
6090Sstevel@tonic-gate 				} else {
6100Sstevel@tonic-gate 					size = MAX_CACHED + 8;
6110Sstevel@tonic-gate 				}
6120Sstevel@tonic-gate 				/* FALLTHROUGH */
6130Sstevel@tonic-gate 			case DATA_AND_TAIL_OVERSIZE:
6140Sstevel@tonic-gate 				/*
6150Sstevel@tonic-gate 				 * Split off the tail fragment and
6160Sstevel@tonic-gate 				 * return it back to oversize freelist.
6170Sstevel@tonic-gate 				 * Create memalign header and adjust
6180Sstevel@tonic-gate 				 * the size for the piece of
6190Sstevel@tonic-gate 				 * (head fragment + data).
6200Sstevel@tonic-gate 				 */
6210Sstevel@tonic-gate 				taddr = ALIGN(ret_addr + size,
6220Sstevel@tonic-gate 						MTMALLOC_MIN_ALIGN);
6230Sstevel@tonic-gate 				data_sz = (size_t)(taddr -
6240Sstevel@tonic-gate 						(uintptr_t)alloc_buf);
6250Sstevel@tonic-gate 				orig = (oversize_t *)((uintptr_t)alloc_buf -
6260Sstevel@tonic-gate 						OVSZ_HEADER_SIZE);
6270Sstevel@tonic-gate 				tsize = orig->size - data_sz;
6280Sstevel@tonic-gate 				orig->size = data_sz;
6290Sstevel@tonic-gate 				MEMALIGN_HEADER_ALLOC(ret_buf, shift,
6300Sstevel@tonic-gate 					alloc_buf);
6310Sstevel@tonic-gate 				tsize -= OVSZ_HEADER_SIZE;
6320Sstevel@tonic-gate 				tail = oversize_header_alloc(taddr,  tsize);
6330Sstevel@tonic-gate 				free_oversize(tail);
6340Sstevel@tonic-gate 				break;
6350Sstevel@tonic-gate 			case HEAD_AND_TAIL_OVERSIZE:
6360Sstevel@tonic-gate 				/*
6370Sstevel@tonic-gate 				 * Split off the head fragment.
6380Sstevel@tonic-gate 				 * We try to free up tail-end when we can
6390Sstevel@tonic-gate 				 * extend data size to (MAX_CACHED + 8)
6400Sstevel@tonic-gate 				 * and remain tail-end oversized.
6410Sstevel@tonic-gate 				 * The bottom line is all split pieces
6420Sstevel@tonic-gate 				 * should be oversize in size.
6430Sstevel@tonic-gate 				 */
6440Sstevel@tonic-gate 				orig = (oversize_t *)((uintptr_t)alloc_buf -
6450Sstevel@tonic-gate 					OVSZ_HEADER_SIZE);
6460Sstevel@tonic-gate 				tsize =  orig->size - (MAX_CACHED + 8 +
6470Sstevel@tonic-gate 					OVSZ_HEADER_SIZE + shift +
6480Sstevel@tonic-gate 					MTMALLOC_MIN_ALIGN);
6490Sstevel@tonic-gate 
6500Sstevel@tonic-gate 				if (!IS_OVERSIZE(tsize, alloc_size)) {
6510Sstevel@tonic-gate 					/*
6520Sstevel@tonic-gate 					 * If the chunk is not big enough
6530Sstevel@tonic-gate 					 * to make both data and tail oversize
6540Sstevel@tonic-gate 					 * we just keep them as one piece.
6550Sstevel@tonic-gate 					 */
6560Sstevel@tonic-gate 					(void) oversize_header_alloc(ret_addr -
6570Sstevel@tonic-gate 						OVSZ_HEADER_SIZE,
6580Sstevel@tonic-gate 						orig->size - shift);
6590Sstevel@tonic-gate 					orig->size = shift -
6600Sstevel@tonic-gate 						OVSZ_HEADER_SIZE;
6610Sstevel@tonic-gate 					free_oversize(orig);
6620Sstevel@tonic-gate 					break;
6630Sstevel@tonic-gate 				} else {
6640Sstevel@tonic-gate 					/*
6650Sstevel@tonic-gate 					 * extend data size > MAX_CACHED
6660Sstevel@tonic-gate 					 * and handle it as head, data, tail
6670Sstevel@tonic-gate 					 * are all oversized.
6680Sstevel@tonic-gate 					 */
6690Sstevel@tonic-gate 					size = MAX_CACHED + 8;
6700Sstevel@tonic-gate 				}
6710Sstevel@tonic-gate 				/* FALLTHROUGH */
6720Sstevel@tonic-gate 			case ALL_OVERSIZE:
6730Sstevel@tonic-gate 				/*
6740Sstevel@tonic-gate 				 * split off the head and tail fragments,
6750Sstevel@tonic-gate 				 * return them back to the oversize freelist.
6760Sstevel@tonic-gate 				 * Alloc oversize header for data seg.
6770Sstevel@tonic-gate 				 */
6780Sstevel@tonic-gate 				orig = (oversize_t *)((uintptr_t)alloc_buf -
6790Sstevel@tonic-gate 					OVSZ_HEADER_SIZE);
6800Sstevel@tonic-gate 				tsize = orig->size;
6810Sstevel@tonic-gate 				orig->size = shift - OVSZ_HEADER_SIZE;
6820Sstevel@tonic-gate 				free_oversize(orig);
6830Sstevel@tonic-gate 
6840Sstevel@tonic-gate 				taddr = ALIGN(ret_addr + size,
6850Sstevel@tonic-gate 					MTMALLOC_MIN_ALIGN);
6860Sstevel@tonic-gate 				data_sz = taddr - ret_addr;
6870Sstevel@tonic-gate 				assert(tsize > (shift + data_sz +
6880Sstevel@tonic-gate 					OVSZ_HEADER_SIZE));
6890Sstevel@tonic-gate 				tail_sz = tsize -
6900Sstevel@tonic-gate 					(shift + data_sz + OVSZ_HEADER_SIZE);
6910Sstevel@tonic-gate 
6920Sstevel@tonic-gate 				/* create oversize header for data seg */
6930Sstevel@tonic-gate 				(void) oversize_header_alloc(ret_addr -
6940Sstevel@tonic-gate 					OVSZ_HEADER_SIZE, data_sz);
6950Sstevel@tonic-gate 
6960Sstevel@tonic-gate 				/* create oversize header for tail fragment */
6970Sstevel@tonic-gate 				tail = oversize_header_alloc(taddr, tail_sz);
6980Sstevel@tonic-gate 				free_oversize(tail);
6990Sstevel@tonic-gate 				break;
7000Sstevel@tonic-gate 			default:
7010Sstevel@tonic-gate 				/* should not reach here */
7020Sstevel@tonic-gate 				assert(0);
7030Sstevel@tonic-gate 		}
7040Sstevel@tonic-gate 	}
7050Sstevel@tonic-gate 	return (ret_buf);
7060Sstevel@tonic-gate }
7070Sstevel@tonic-gate 
7080Sstevel@tonic-gate 
7090Sstevel@tonic-gate void *
7100Sstevel@tonic-gate valloc(size_t size)
7110Sstevel@tonic-gate {
7120Sstevel@tonic-gate 	static unsigned pagesize;
7130Sstevel@tonic-gate 
7140Sstevel@tonic-gate 	if (size == 0)
7150Sstevel@tonic-gate 		return (NULL);
7160Sstevel@tonic-gate 
7170Sstevel@tonic-gate 	if (!pagesize)
7180Sstevel@tonic-gate 		pagesize = sysconf(_SC_PAGESIZE);
7190Sstevel@tonic-gate 
7200Sstevel@tonic-gate 	return (memalign(pagesize, size));
7210Sstevel@tonic-gate }
7220Sstevel@tonic-gate 
7230Sstevel@tonic-gate void
7240Sstevel@tonic-gate mallocctl(int cmd, long value)
7250Sstevel@tonic-gate {
7260Sstevel@tonic-gate 	switch (cmd) {
7270Sstevel@tonic-gate 
7280Sstevel@tonic-gate 	case MTDEBUGPATTERN:
7290Sstevel@tonic-gate 		/*
7300Sstevel@tonic-gate 		 * Reinitialize free blocks in case malloc() is called prior
7310Sstevel@tonic-gate 		 * to mallocctl().
7320Sstevel@tonic-gate 		 */
7330Sstevel@tonic-gate 		if (value && !(debugopt & cmd)) {
7340Sstevel@tonic-gate 			reinit++;
7350Sstevel@tonic-gate 			debugopt |= cmd;
7360Sstevel@tonic-gate 			reinit_cpu_list();
7370Sstevel@tonic-gate 		}
7380Sstevel@tonic-gate 		/*FALLTHRU*/
7390Sstevel@tonic-gate 	case MTDOUBLEFREE:
7400Sstevel@tonic-gate 	case MTINITBUFFER:
7410Sstevel@tonic-gate 		if (value)
7420Sstevel@tonic-gate 			debugopt |= cmd;
7430Sstevel@tonic-gate 		else
7440Sstevel@tonic-gate 			debugopt &= ~cmd;
7450Sstevel@tonic-gate 		break;
7460Sstevel@tonic-gate 	case MTCHUNKSIZE:
7470Sstevel@tonic-gate 		if (value >= MINSIZE && value <= MAXSIZE)
7480Sstevel@tonic-gate 			requestsize = value;
7490Sstevel@tonic-gate 		break;
7500Sstevel@tonic-gate 	default:
7510Sstevel@tonic-gate 		break;
7520Sstevel@tonic-gate 	}
7530Sstevel@tonic-gate }
7540Sstevel@tonic-gate 
7550Sstevel@tonic-gate /*
7560Sstevel@tonic-gate  * if this function is changed, update the fallback code in setup_caches to
7570Sstevel@tonic-gate  * set ncpus to the number of possible return values. (currently 1)
7580Sstevel@tonic-gate  */
7590Sstevel@tonic-gate static uint_t
7600Sstevel@tonic-gate fallback_curcpu(void)
7610Sstevel@tonic-gate {
7620Sstevel@tonic-gate 	return (0);
7630Sstevel@tonic-gate }
7640Sstevel@tonic-gate 
7650Sstevel@tonic-gate /*
7660Sstevel@tonic-gate  * Returns non-zero on success, zero on failure.
7670Sstevel@tonic-gate  *
7680Sstevel@tonic-gate  * This carefully doesn't set cpu_list until initialization is finished.
7690Sstevel@tonic-gate  */
7700Sstevel@tonic-gate static int
7710Sstevel@tonic-gate setup_caches(void)
7720Sstevel@tonic-gate {
7730Sstevel@tonic-gate 	static mutex_t init_lock = DEFAULTMUTEX;
7740Sstevel@tonic-gate 
7750Sstevel@tonic-gate 	uintptr_t oldbrk;
7760Sstevel@tonic-gate 	uintptr_t newbrk;
7770Sstevel@tonic-gate 
7780Sstevel@tonic-gate 	size_t cache_space_needed;
7790Sstevel@tonic-gate 	size_t padding;
7800Sstevel@tonic-gate 
7810Sstevel@tonic-gate 	curcpu_func new_curcpu;
7820Sstevel@tonic-gate 	uint_t new_cpu_mask;
7830Sstevel@tonic-gate 	percpu_t *new_cpu_list;
7840Sstevel@tonic-gate 
7850Sstevel@tonic-gate 	uint_t i, j;
7860Sstevel@tonic-gate 	uintptr_t list_addr;
7870Sstevel@tonic-gate 
7880Sstevel@tonic-gate 	(void) mutex_lock(&init_lock);
7890Sstevel@tonic-gate 	if (cpu_list != NULL) {
7900Sstevel@tonic-gate 		(void) mutex_unlock(&init_lock);
7910Sstevel@tonic-gate 		return (1); 		/* success -- already initialized */
7920Sstevel@tonic-gate 	}
7930Sstevel@tonic-gate 
7940Sstevel@tonic-gate 	new_curcpu = get_curcpu_func();
7950Sstevel@tonic-gate 	if (new_curcpu == NULL) {
7960Sstevel@tonic-gate 		new_curcpu = fallback_curcpu;
7970Sstevel@tonic-gate 		ncpus = 1;
7980Sstevel@tonic-gate 	} else {
7990Sstevel@tonic-gate 		if ((ncpus = 2 * sysconf(_SC_NPROCESSORS_CONF)) <= 0)
8000Sstevel@tonic-gate 			ncpus = 4; /* decent default value */
8010Sstevel@tonic-gate 	}
8020Sstevel@tonic-gate 	assert(ncpus > 0);
8030Sstevel@tonic-gate 
8040Sstevel@tonic-gate 	/* round ncpus up to a power of 2 */
8050Sstevel@tonic-gate 	while (ncpus & (ncpus - 1))
8060Sstevel@tonic-gate 		ncpus++;
8070Sstevel@tonic-gate 
8080Sstevel@tonic-gate 	new_cpu_mask = ncpus - 1;	/* create the cpu mask */
8090Sstevel@tonic-gate 
8100Sstevel@tonic-gate 	/*
8110Sstevel@tonic-gate 	 * We now do some magic with the brk.  What we want to get in the
8120Sstevel@tonic-gate 	 * end is a bunch of well-aligned stuff in a big initial allocation.
8130Sstevel@tonic-gate 	 * Along the way, we do sanity checks to make sure no one else has
8140Sstevel@tonic-gate 	 * touched the brk (which shouldn't happen, but it's always good to
8150Sstevel@tonic-gate 	 * check)
8160Sstevel@tonic-gate 	 *
8170Sstevel@tonic-gate 	 * First, make sure sbrk is sane, and store the current brk in oldbrk.
8180Sstevel@tonic-gate 	 */
8190Sstevel@tonic-gate 	oldbrk = (uintptr_t)sbrk(0);
8200Sstevel@tonic-gate 	if ((void *)oldbrk == (void *)-1) {
8210Sstevel@tonic-gate 		(void) mutex_unlock(&init_lock);
8220Sstevel@tonic-gate 		return (0);	/* sbrk is broken -- we're doomed. */
8230Sstevel@tonic-gate 	}
8240Sstevel@tonic-gate 
8250Sstevel@tonic-gate 	/*
8260Sstevel@tonic-gate 	 * Now, align the brk to a multiple of CACHE_COHERENCY_UNIT, so that
8270Sstevel@tonic-gate 	 * the percpu structures and cache lists will be properly aligned.
8280Sstevel@tonic-gate 	 *
8290Sstevel@tonic-gate 	 *   2.  All hunks will be page-aligned, assuming HUNKSIZE >= PAGESIZE,
8300Sstevel@tonic-gate 	 *	so they can be paged out individually.
8310Sstevel@tonic-gate 	 */
8320Sstevel@tonic-gate 	newbrk = ALIGN(oldbrk, CACHE_COHERENCY_UNIT);
8330Sstevel@tonic-gate 	if (newbrk != oldbrk && (uintptr_t)sbrk(newbrk - oldbrk) != oldbrk) {
8340Sstevel@tonic-gate 		(void) mutex_unlock(&init_lock);
8350Sstevel@tonic-gate 		return (0);	/* someone else sbrked */
8360Sstevel@tonic-gate 	}
8370Sstevel@tonic-gate 
8380Sstevel@tonic-gate 	/*
8390Sstevel@tonic-gate 	 * For each cpu, there is one percpu_t and a list of caches
8400Sstevel@tonic-gate 	 */
8410Sstevel@tonic-gate 	cache_space_needed = ncpus * (sizeof (percpu_t) + CACHELIST_SIZE);
8420Sstevel@tonic-gate 
8430Sstevel@tonic-gate 	new_cpu_list = (percpu_t *)sbrk(cache_space_needed);
8440Sstevel@tonic-gate 
8450Sstevel@tonic-gate 	if (new_cpu_list == (percpu_t *)-1 ||
8460Sstevel@tonic-gate 	    (uintptr_t)new_cpu_list != newbrk) {
8470Sstevel@tonic-gate 		(void) mutex_unlock(&init_lock);
8480Sstevel@tonic-gate 		return (0);	/* someone else sbrked */
8490Sstevel@tonic-gate 	}
8500Sstevel@tonic-gate 
8510Sstevel@tonic-gate 	/*
8520Sstevel@tonic-gate 	 * Finally, align the brk to HUNKSIZE so that all hunks are
8530Sstevel@tonic-gate 	 * page-aligned, to avoid edge-effects.
8540Sstevel@tonic-gate 	 */
8550Sstevel@tonic-gate 
8560Sstevel@tonic-gate 	newbrk = (uintptr_t)new_cpu_list + cache_space_needed;
8570Sstevel@tonic-gate 
8580Sstevel@tonic-gate 	padding = ALIGN(newbrk, HUNKSIZE) - newbrk;
8590Sstevel@tonic-gate 
8600Sstevel@tonic-gate 	if (padding > 0 && (uintptr_t)sbrk(padding) != newbrk) {
8610Sstevel@tonic-gate 		(void) mutex_unlock(&init_lock);
8620Sstevel@tonic-gate 		return (0);	/* someone else sbrked */
8630Sstevel@tonic-gate 	}
8640Sstevel@tonic-gate 
8650Sstevel@tonic-gate 	list_addr = ((uintptr_t)new_cpu_list + (sizeof (percpu_t) * ncpus));
8660Sstevel@tonic-gate 
8670Sstevel@tonic-gate 	/* initialize the percpu list */
8680Sstevel@tonic-gate 	for (i = 0; i < ncpus; i++) {
8690Sstevel@tonic-gate 		new_cpu_list[i].mt_caches = (cache_head_t *)list_addr;
8700Sstevel@tonic-gate 		for (j = 0; j < NUM_CACHES; j++) {
8710Sstevel@tonic-gate 			new_cpu_list[i].mt_caches[j].mt_cache = NULL;
8720Sstevel@tonic-gate 			new_cpu_list[i].mt_caches[j].mt_hint = NULL;
8730Sstevel@tonic-gate 		}
8740Sstevel@tonic-gate 
8750Sstevel@tonic-gate 		bzero(&new_cpu_list[i].mt_parent_lock, sizeof (mutex_t));
8760Sstevel@tonic-gate 
8770Sstevel@tonic-gate 		/* get the correct cache list alignment */
8780Sstevel@tonic-gate 		list_addr += CACHELIST_SIZE;
8790Sstevel@tonic-gate 	}
8800Sstevel@tonic-gate 
8810Sstevel@tonic-gate 	/*
8820Sstevel@tonic-gate 	 * Initialize oversize listhead
8830Sstevel@tonic-gate 	 */
8840Sstevel@tonic-gate 	oversize_list.next_bysize = &oversize_list;
8850Sstevel@tonic-gate 	oversize_list.prev_bysize = &oversize_list;
8860Sstevel@tonic-gate 	oversize_list.next_byaddr = &oversize_list;
8870Sstevel@tonic-gate 	oversize_list.prev_byaddr = &oversize_list;
8880Sstevel@tonic-gate 	oversize_list.addr = NULL;
8890Sstevel@tonic-gate 	oversize_list.size = 0;		/* sentinal */
8900Sstevel@tonic-gate 
8910Sstevel@tonic-gate 	/*
8920Sstevel@tonic-gate 	 * now install the global variables, leaving cpu_list for last, so that
8930Sstevel@tonic-gate 	 * there aren't any race conditions.
8940Sstevel@tonic-gate 	 */
8950Sstevel@tonic-gate 	curcpu = new_curcpu;
8960Sstevel@tonic-gate 	cpu_mask = new_cpu_mask;
8970Sstevel@tonic-gate 	cpu_list = new_cpu_list;
8980Sstevel@tonic-gate 
8990Sstevel@tonic-gate 	(void) mutex_unlock(&init_lock);
9000Sstevel@tonic-gate 
9010Sstevel@tonic-gate 	return (1);
9020Sstevel@tonic-gate }
9030Sstevel@tonic-gate 
9040Sstevel@tonic-gate static void
9050Sstevel@tonic-gate create_cache(cache_t *cp, size_t size, uint_t chunksize)
9060Sstevel@tonic-gate {
9070Sstevel@tonic-gate 	long nblocks;
9080Sstevel@tonic-gate 
9090Sstevel@tonic-gate 	bzero(&cp->mt_cache_lock, sizeof (mutex_t));
9100Sstevel@tonic-gate 	cp->mt_size = size;
9110Sstevel@tonic-gate 	cp->mt_freelist = ((caddr_t)cp + sizeof (cache_t));
9120Sstevel@tonic-gate 	cp->mt_span = chunksize * HUNKSIZE - sizeof (cache_t);
9130Sstevel@tonic-gate 	cp->mt_hunks = chunksize;
9140Sstevel@tonic-gate 	/*
9150Sstevel@tonic-gate 	 * rough calculation. We will need to adjust later.
9160Sstevel@tonic-gate 	 */
9170Sstevel@tonic-gate 	nblocks = cp->mt_span / cp->mt_size;
9180Sstevel@tonic-gate 	nblocks >>= 3;
9190Sstevel@tonic-gate 	if (nblocks == 0) { /* less than 8 free blocks in this pool */
9200Sstevel@tonic-gate 		int32_t numblocks = 0;
9210Sstevel@tonic-gate 		long i = cp->mt_span;
9220Sstevel@tonic-gate 		size_t sub = cp->mt_size;
9230Sstevel@tonic-gate 		uchar_t mask = 0;
9240Sstevel@tonic-gate 
9250Sstevel@tonic-gate 		while (i > sub) {
9260Sstevel@tonic-gate 			numblocks++;
9270Sstevel@tonic-gate 			i -= sub;
9280Sstevel@tonic-gate 		}
9290Sstevel@tonic-gate 		nblocks = numblocks;
9300Sstevel@tonic-gate 		cp->mt_arena = (caddr_t)ALIGN(cp->mt_freelist + 8, 8);
9310Sstevel@tonic-gate 		cp->mt_nfree = numblocks;
9320Sstevel@tonic-gate 		while (numblocks--) {
9330Sstevel@tonic-gate 			mask |= 0x80 >> numblocks;
9340Sstevel@tonic-gate 		}
9350Sstevel@tonic-gate 		*(cp->mt_freelist) = mask;
9360Sstevel@tonic-gate 	} else {
9370Sstevel@tonic-gate 		cp->mt_arena = (caddr_t)ALIGN((caddr_t)cp->mt_freelist +
9380Sstevel@tonic-gate 			nblocks, 32);
9390Sstevel@tonic-gate 		/* recompute nblocks */
9400Sstevel@tonic-gate 		nblocks = (uintptr_t)((caddr_t)cp->mt_freelist +
9410Sstevel@tonic-gate 			cp->mt_span - cp->mt_arena) / cp->mt_size;
9420Sstevel@tonic-gate 		cp->mt_nfree = ((nblocks >> 3) << 3);
9430Sstevel@tonic-gate 		/* Set everything to free */
9440Sstevel@tonic-gate 		(void) memset(cp->mt_freelist, 0xff, nblocks >> 3);
9450Sstevel@tonic-gate 	}
9460Sstevel@tonic-gate 
9470Sstevel@tonic-gate 	if (debugopt & MTDEBUGPATTERN)
9480Sstevel@tonic-gate 		copy_pattern(FREEPATTERN, cp->mt_arena, cp->mt_size * nblocks);
9490Sstevel@tonic-gate 
9500Sstevel@tonic-gate 	cp->mt_next = NULL;
9510Sstevel@tonic-gate }
9520Sstevel@tonic-gate 
9530Sstevel@tonic-gate static void
9540Sstevel@tonic-gate reinit_cpu_list(void)
9550Sstevel@tonic-gate {
9560Sstevel@tonic-gate 	oversize_t *wp = oversize_list.next_bysize;
9570Sstevel@tonic-gate 	percpu_t *cpuptr;
9580Sstevel@tonic-gate 	cache_t *thiscache;
9590Sstevel@tonic-gate 	cache_head_t *cachehead;
9600Sstevel@tonic-gate 
9610Sstevel@tonic-gate 	if (wp == NULL || cpu_list == NULL) {
9620Sstevel@tonic-gate 		reinit = 0;
9630Sstevel@tonic-gate 		return;
9640Sstevel@tonic-gate 	}
9650Sstevel@tonic-gate 
9660Sstevel@tonic-gate 	/* Reinitialize free oversize blocks. */
9670Sstevel@tonic-gate 	(void) mutex_lock(&oversize_lock);
9680Sstevel@tonic-gate 	if (debugopt & MTDEBUGPATTERN)
9690Sstevel@tonic-gate 		for (; wp != &oversize_list; wp = wp->next_bysize)
9700Sstevel@tonic-gate 			copy_pattern(FREEPATTERN, wp->addr, wp->size);
9710Sstevel@tonic-gate 	(void) mutex_unlock(&oversize_lock);
9720Sstevel@tonic-gate 
9730Sstevel@tonic-gate 	/* Reinitialize free blocks. */
9740Sstevel@tonic-gate 	for (cpuptr = &cpu_list[0]; cpuptr < &cpu_list[ncpus]; cpuptr++) {
9750Sstevel@tonic-gate 		(void) mutex_lock(&cpuptr->mt_parent_lock);
9760Sstevel@tonic-gate 		for (cachehead = &cpuptr->mt_caches[0]; cachehead <
9770Sstevel@tonic-gate 			&cpuptr->mt_caches[NUM_CACHES]; cachehead++) {
9780Sstevel@tonic-gate 			for (thiscache = cachehead->mt_cache; thiscache != NULL;
9790Sstevel@tonic-gate 				thiscache = thiscache->mt_next) {
9800Sstevel@tonic-gate 				(void) mutex_lock(&thiscache->mt_cache_lock);
9810Sstevel@tonic-gate 				if (thiscache->mt_nfree == 0) {
9820Sstevel@tonic-gate 					(void) mutex_unlock(
9830Sstevel@tonic-gate 					    &thiscache->mt_cache_lock);
9840Sstevel@tonic-gate 					continue;
9850Sstevel@tonic-gate 				}
9860Sstevel@tonic-gate 				if (thiscache != NULL)
9870Sstevel@tonic-gate 					reinit_cache(thiscache);
9880Sstevel@tonic-gate 				(void) mutex_unlock(&thiscache->mt_cache_lock);
9890Sstevel@tonic-gate 			}
9900Sstevel@tonic-gate 		}
9910Sstevel@tonic-gate 		(void) mutex_unlock(&cpuptr->mt_parent_lock);
9920Sstevel@tonic-gate 	}
9930Sstevel@tonic-gate 	reinit = 0;
9940Sstevel@tonic-gate }
9950Sstevel@tonic-gate 
9960Sstevel@tonic-gate static void
9970Sstevel@tonic-gate reinit_cache(cache_t *thiscache)
9980Sstevel@tonic-gate {
9990Sstevel@tonic-gate 	uint32_t *freeblocks; /* not a uintptr_t on purpose */
10000Sstevel@tonic-gate 	int32_t i, n;
10010Sstevel@tonic-gate 	caddr_t ret;
10020Sstevel@tonic-gate 
10030Sstevel@tonic-gate 	freeblocks = (uint32_t *)thiscache->mt_freelist;
10040Sstevel@tonic-gate 	while (freeblocks < (uint32_t *)thiscache->mt_arena) {
10050Sstevel@tonic-gate 		if (*freeblocks & 0xffffffff) {
10060Sstevel@tonic-gate 		    for (i = 0; i < 32; i++) {
10070Sstevel@tonic-gate 			if (FLIP_EM(*freeblocks) & (0x80000000 >> i)) {
10080Sstevel@tonic-gate 				n = (uintptr_t)(((freeblocks -
10090Sstevel@tonic-gate 				    (uint32_t *)thiscache->mt_freelist) << 5)
10100Sstevel@tonic-gate 				    + i) * thiscache->mt_size;
10110Sstevel@tonic-gate 				ret = thiscache->mt_arena + n;
10120Sstevel@tonic-gate 				ret += OVERHEAD;
10130Sstevel@tonic-gate 				copy_pattern(FREEPATTERN, ret,
10140Sstevel@tonic-gate 				    thiscache->mt_size);
10150Sstevel@tonic-gate 			}
10160Sstevel@tonic-gate 		    }
10170Sstevel@tonic-gate 		}
10180Sstevel@tonic-gate 		freeblocks++;
10190Sstevel@tonic-gate 	}
10200Sstevel@tonic-gate }
10210Sstevel@tonic-gate 
10220Sstevel@tonic-gate static void *
10230Sstevel@tonic-gate malloc_internal(size_t size, percpu_t *cpuptr)
10240Sstevel@tonic-gate {
10250Sstevel@tonic-gate 	cache_head_t *cachehead;
10260Sstevel@tonic-gate 	cache_t *thiscache, *hintcache;
10270Sstevel@tonic-gate 	int32_t i, n, logsz, bucket;
10280Sstevel@tonic-gate 	uint32_t index;
10290Sstevel@tonic-gate 	uint32_t *freeblocks; /* not a uintptr_t on purpose */
10300Sstevel@tonic-gate 	caddr_t ret;
10310Sstevel@tonic-gate 
10320Sstevel@tonic-gate 	logsz = MIN_CACHED_SHIFT;
10330Sstevel@tonic-gate 
10340Sstevel@tonic-gate 	while (size > (1 << logsz))
10350Sstevel@tonic-gate 		logsz++;
10360Sstevel@tonic-gate 
10370Sstevel@tonic-gate 	bucket = logsz - MIN_CACHED_SHIFT;
10380Sstevel@tonic-gate 
10390Sstevel@tonic-gate 	(void) mutex_lock(&cpuptr->mt_parent_lock);
10400Sstevel@tonic-gate 
10410Sstevel@tonic-gate 	/*
10420Sstevel@tonic-gate 	 * Find a cache of the appropriate size with free buffers.
10430Sstevel@tonic-gate 	 *
10440Sstevel@tonic-gate 	 * We don't need to lock each cache as we check their mt_nfree count,
10450Sstevel@tonic-gate 	 * since:
10460Sstevel@tonic-gate 	 *	1.  We are only looking for caches with mt_nfree > 0.  If a
10470Sstevel@tonic-gate 	 *	   free happens during our search, it will increment mt_nfree,
10480Sstevel@tonic-gate 	 *	   which will not effect the test.
10490Sstevel@tonic-gate 	 *	2.  Allocations can decrement mt_nfree, but they can't happen
10500Sstevel@tonic-gate 	 *	   as long as we hold mt_parent_lock.
10510Sstevel@tonic-gate 	 */
10520Sstevel@tonic-gate 
10530Sstevel@tonic-gate 	cachehead = &cpuptr->mt_caches[bucket];
10540Sstevel@tonic-gate 
10550Sstevel@tonic-gate 	/* Search through the list, starting at the mt_hint */
10560Sstevel@tonic-gate 	thiscache = cachehead->mt_hint;
10570Sstevel@tonic-gate 
10580Sstevel@tonic-gate 	while (thiscache != NULL && thiscache->mt_nfree == 0)
10590Sstevel@tonic-gate 		thiscache = thiscache->mt_next;
10600Sstevel@tonic-gate 
10610Sstevel@tonic-gate 	if (thiscache == NULL) {
10620Sstevel@tonic-gate 		/* wrap around -- search up to the hint */
10630Sstevel@tonic-gate 		thiscache = cachehead->mt_cache;
10640Sstevel@tonic-gate 		hintcache = cachehead->mt_hint;
10650Sstevel@tonic-gate 
10660Sstevel@tonic-gate 		while (thiscache != NULL && thiscache != hintcache &&
10670Sstevel@tonic-gate 		    thiscache->mt_nfree == 0)
10680Sstevel@tonic-gate 			thiscache = thiscache->mt_next;
10690Sstevel@tonic-gate 
10700Sstevel@tonic-gate 		if (thiscache == hintcache)
10710Sstevel@tonic-gate 			thiscache = NULL;
10720Sstevel@tonic-gate 	}
10730Sstevel@tonic-gate 
10740Sstevel@tonic-gate 
10750Sstevel@tonic-gate 	if (thiscache == NULL) { /* there are no free caches */
10760Sstevel@tonic-gate 		int32_t thisrequest = requestsize;
10770Sstevel@tonic-gate 		int32_t buffer_size = (1 << logsz) + OVERHEAD;
10780Sstevel@tonic-gate 
10790Sstevel@tonic-gate 		thiscache = (cache_t *)morecore(thisrequest * HUNKSIZE);
10800Sstevel@tonic-gate 
10810Sstevel@tonic-gate 		if (thiscache == (cache_t *)-1) {
10820Sstevel@tonic-gate 		    (void) mutex_unlock(&cpuptr->mt_parent_lock);
10830Sstevel@tonic-gate 		    errno = EAGAIN;
10840Sstevel@tonic-gate 		    return (NULL);
10850Sstevel@tonic-gate 		}
10860Sstevel@tonic-gate 		create_cache(thiscache, buffer_size, thisrequest);
10870Sstevel@tonic-gate 
10880Sstevel@tonic-gate 		/* link in the new block at the beginning of the list */
10890Sstevel@tonic-gate 		thiscache->mt_next = cachehead->mt_cache;
10900Sstevel@tonic-gate 		cachehead->mt_cache = thiscache;
10910Sstevel@tonic-gate 	}
10920Sstevel@tonic-gate 
10930Sstevel@tonic-gate 	/* update the hint to the cache we found or created */
10940Sstevel@tonic-gate 	cachehead->mt_hint = thiscache;
10950Sstevel@tonic-gate 
10960Sstevel@tonic-gate 	/* thiscache now points to a cache with available space */
10970Sstevel@tonic-gate 	(void) mutex_lock(&thiscache->mt_cache_lock);
10980Sstevel@tonic-gate 
10990Sstevel@tonic-gate 	freeblocks = (uint32_t *)thiscache->mt_freelist;
11000Sstevel@tonic-gate 	while (freeblocks < (uint32_t *)thiscache->mt_arena) {
11010Sstevel@tonic-gate 		if (*freeblocks & 0xffffffff)
11020Sstevel@tonic-gate 			break;
11030Sstevel@tonic-gate 		freeblocks++;
11040Sstevel@tonic-gate 		if (freeblocks < (uint32_t *)thiscache->mt_arena &&
11050Sstevel@tonic-gate 		    *freeblocks & 0xffffffff)
11060Sstevel@tonic-gate 			break;
11070Sstevel@tonic-gate 		freeblocks++;
11080Sstevel@tonic-gate 		if (freeblocks < (uint32_t *)thiscache->mt_arena &&
11090Sstevel@tonic-gate 		    *freeblocks & 0xffffffff)
11100Sstevel@tonic-gate 			break;
11110Sstevel@tonic-gate 		freeblocks++;
11120Sstevel@tonic-gate 		if (freeblocks < (uint32_t *)thiscache->mt_arena &&
11130Sstevel@tonic-gate 		    *freeblocks & 0xffffffff)
11140Sstevel@tonic-gate 			break;
11150Sstevel@tonic-gate 		freeblocks++;
11160Sstevel@tonic-gate 	}
11170Sstevel@tonic-gate 
11180Sstevel@tonic-gate 	/*
11190Sstevel@tonic-gate 	 * the offset from mt_freelist to freeblocks is the offset into
11200Sstevel@tonic-gate 	 * the arena. Be sure to include the offset into freeblocks
11210Sstevel@tonic-gate 	 * of the bitmask. n is the offset.
11220Sstevel@tonic-gate 	 */
11230Sstevel@tonic-gate 	for (i = 0; i < 32; ) {
11240Sstevel@tonic-gate 		if (FLIP_EM(*freeblocks) & (0x80000000 >> i++))
11250Sstevel@tonic-gate 			break;
11260Sstevel@tonic-gate 		if (FLIP_EM(*freeblocks) & (0x80000000 >> i++))
11270Sstevel@tonic-gate 			break;
11280Sstevel@tonic-gate 		if (FLIP_EM(*freeblocks) & (0x80000000 >> i++))
11290Sstevel@tonic-gate 			break;
11300Sstevel@tonic-gate 		if (FLIP_EM(*freeblocks) & (0x80000000 >> i++))
11310Sstevel@tonic-gate 			break;
11320Sstevel@tonic-gate 	}
11330Sstevel@tonic-gate 	index = 0x80000000 >> --i;
11340Sstevel@tonic-gate 
11350Sstevel@tonic-gate 
11360Sstevel@tonic-gate 	*freeblocks &= FLIP_EM(~index);
11370Sstevel@tonic-gate 
11380Sstevel@tonic-gate 	thiscache->mt_nfree--;
11390Sstevel@tonic-gate 
11400Sstevel@tonic-gate 	(void) mutex_unlock(&thiscache->mt_cache_lock);
11410Sstevel@tonic-gate 	(void) mutex_unlock(&cpuptr->mt_parent_lock);
11420Sstevel@tonic-gate 
11430Sstevel@tonic-gate 	n = (uintptr_t)(((freeblocks - (uint32_t *)thiscache->mt_freelist) << 5)
11440Sstevel@tonic-gate 		+ i) * thiscache->mt_size;
11450Sstevel@tonic-gate 	/*
11460Sstevel@tonic-gate 	 * Now you have the offset in n, you've changed the free mask
11470Sstevel@tonic-gate 	 * in the freelist. Nothing left to do but find the block
11480Sstevel@tonic-gate 	 * in the arena and put the value of thiscache in the word
11490Sstevel@tonic-gate 	 * ahead of the handed out address and return the memory
11500Sstevel@tonic-gate 	 * back to the user.
11510Sstevel@tonic-gate 	 */
11520Sstevel@tonic-gate 	ret = thiscache->mt_arena + n;
11530Sstevel@tonic-gate 
11540Sstevel@tonic-gate 	/* Store the cache addr for this buf. Makes free go fast. */
11550Sstevel@tonic-gate 	*(uintptr_t *)ret = (uintptr_t)thiscache;
11560Sstevel@tonic-gate 
11570Sstevel@tonic-gate 	/*
11580Sstevel@tonic-gate 	 * This assert makes sure we don't hand out memory that is not
11590Sstevel@tonic-gate 	 * owned by this cache.
11600Sstevel@tonic-gate 	 */
11610Sstevel@tonic-gate 	assert(ret + thiscache->mt_size <= thiscache->mt_freelist +
11620Sstevel@tonic-gate 		thiscache->mt_span);
11630Sstevel@tonic-gate 
11640Sstevel@tonic-gate 	ret += OVERHEAD;
11650Sstevel@tonic-gate 
11660Sstevel@tonic-gate 	assert(((uintptr_t)ret & 7) == 0); /* are we 8 byte aligned */
11670Sstevel@tonic-gate 
11680Sstevel@tonic-gate 	if (reinit == 0 && (debugopt & MTDEBUGPATTERN))
11690Sstevel@tonic-gate 		if (verify_pattern(FREEPATTERN, ret, size))
11700Sstevel@tonic-gate 			abort();	/* reference after free */
11710Sstevel@tonic-gate 
11720Sstevel@tonic-gate 	if (debugopt & MTINITBUFFER)
11730Sstevel@tonic-gate 		copy_pattern(INITPATTERN, ret, size);
11740Sstevel@tonic-gate 	return ((void *)ret);
11750Sstevel@tonic-gate }
11760Sstevel@tonic-gate 
11770Sstevel@tonic-gate static void *
11780Sstevel@tonic-gate morecore(size_t bytes)
11790Sstevel@tonic-gate {
11800Sstevel@tonic-gate 	void * ret;
11810Sstevel@tonic-gate 
11820Sstevel@tonic-gate 	if (bytes > LONG_MAX) {
11830Sstevel@tonic-gate 		intptr_t wad;
11840Sstevel@tonic-gate 		/*
11850Sstevel@tonic-gate 		 * The request size is too big. We need to do this in
11860Sstevel@tonic-gate 		 * chunks. Sbrk only takes an int for an arg.
11870Sstevel@tonic-gate 		 */
11880Sstevel@tonic-gate 		if (bytes == ULONG_MAX)
11890Sstevel@tonic-gate 			return ((void *)-1);
11900Sstevel@tonic-gate 
11910Sstevel@tonic-gate 		ret = sbrk(0);
11920Sstevel@tonic-gate 		wad = LONG_MAX;
11930Sstevel@tonic-gate 		while (wad > 0) {
11940Sstevel@tonic-gate 			if (sbrk(wad) == (void *)-1) {
11950Sstevel@tonic-gate 				if (ret != sbrk(0))
11960Sstevel@tonic-gate 					(void) sbrk(-LONG_MAX);
11970Sstevel@tonic-gate 				return ((void *)-1);
11980Sstevel@tonic-gate 			}
11990Sstevel@tonic-gate 			bytes -= LONG_MAX;
12000Sstevel@tonic-gate 			wad = bytes;
12010Sstevel@tonic-gate 		}
12020Sstevel@tonic-gate 	} else
12030Sstevel@tonic-gate 		ret = sbrk(bytes);
12040Sstevel@tonic-gate 
12050Sstevel@tonic-gate 	return (ret);
12060Sstevel@tonic-gate }
12070Sstevel@tonic-gate 
12080Sstevel@tonic-gate 
12090Sstevel@tonic-gate static void *
12100Sstevel@tonic-gate oversize(size_t size)
12110Sstevel@tonic-gate {
12120Sstevel@tonic-gate 	caddr_t ret;
12130Sstevel@tonic-gate 	oversize_t *big;
12140Sstevel@tonic-gate 	int bucket;
12150Sstevel@tonic-gate 
1216*1412Srm88369 	/* make sure we will not overflow */
1217*1412Srm88369 	if (size > MAX_MTMALLOC) {
1218*1412Srm88369 		errno = ENOMEM;
1219*1412Srm88369 		return (NULL);
1220*1412Srm88369 	}
12210Sstevel@tonic-gate 
12220Sstevel@tonic-gate 	/*
12230Sstevel@tonic-gate 	 * Since we ensure every address we hand back is
12240Sstevel@tonic-gate 	 * MTMALLOC_MIN_ALIGN-byte aligned, ALIGNing size ensures that the
12250Sstevel@tonic-gate 	 * memory handed out is MTMALLOC_MIN_ALIGN-byte aligned at both ends.
12260Sstevel@tonic-gate 	 * This eases the implementation of MTDEBUGPATTERN and MTINITPATTERN,
12270Sstevel@tonic-gate 	 * particularly where coalescing occurs.
12280Sstevel@tonic-gate 	 */
12290Sstevel@tonic-gate 	size = ALIGN(size, MTMALLOC_MIN_ALIGN);
12300Sstevel@tonic-gate 
1231*1412Srm88369 	/*
1232*1412Srm88369 	 * The idea with the global lock is that we are sure to
1233*1412Srm88369 	 * block in the kernel anyway since given an oversize alloc
1234*1412Srm88369 	 * we are sure to have to call morecore();
1235*1412Srm88369 	 */
1236*1412Srm88369 	(void) mutex_lock(&oversize_lock);
1237*1412Srm88369 
12380Sstevel@tonic-gate 	if ((big = find_oversize(size)) != NULL) {
12390Sstevel@tonic-gate 		if (reinit == 0 && (debugopt & MTDEBUGPATTERN))
12400Sstevel@tonic-gate 			if (verify_pattern(FREEPATTERN, big->addr, size))
12410Sstevel@tonic-gate 				abort();	/* reference after free */
12420Sstevel@tonic-gate 	} else {
12430Sstevel@tonic-gate 		/* Get more 8-byte aligned memory from heap */
12440Sstevel@tonic-gate 		ret = morecore(size + OVSZ_HEADER_SIZE);
12450Sstevel@tonic-gate 		if (ret == (caddr_t)-1) {
12460Sstevel@tonic-gate 			(void) mutex_unlock(&oversize_lock);
12470Sstevel@tonic-gate 			errno = ENOMEM;
12480Sstevel@tonic-gate 			return (NULL);
12490Sstevel@tonic-gate 		}
12500Sstevel@tonic-gate 		big = oversize_header_alloc((uintptr_t)ret, size);
12510Sstevel@tonic-gate 	}
12520Sstevel@tonic-gate 	ret = big->addr;
12530Sstevel@tonic-gate 
12540Sstevel@tonic-gate 	/* Add big to the hash table at the head of the relevant bucket. */
12550Sstevel@tonic-gate 	bucket = HASH_OVERSIZE(ret);
12560Sstevel@tonic-gate 	big->hash_next = ovsz_hashtab[bucket];
12570Sstevel@tonic-gate 	ovsz_hashtab[bucket] = big;
12580Sstevel@tonic-gate 
12590Sstevel@tonic-gate 	if (debugopt & MTINITBUFFER)
12600Sstevel@tonic-gate 		copy_pattern(INITPATTERN, ret, size);
12610Sstevel@tonic-gate 
12620Sstevel@tonic-gate 	(void) mutex_unlock(&oversize_lock);
12630Sstevel@tonic-gate 	assert(((uintptr_t)ret & 7) == 0); /* are we 8 byte aligned */
12640Sstevel@tonic-gate 	return ((void *)ret);
12650Sstevel@tonic-gate }
12660Sstevel@tonic-gate 
12670Sstevel@tonic-gate static void
12680Sstevel@tonic-gate insert_oversize(oversize_t *op, oversize_t *nx)
12690Sstevel@tonic-gate {
12700Sstevel@tonic-gate 	oversize_t *sp;
12710Sstevel@tonic-gate 
12720Sstevel@tonic-gate 	/* locate correct insertion point in size-ordered list */
12730Sstevel@tonic-gate 	for (sp = oversize_list.next_bysize;
12740Sstevel@tonic-gate 	    sp != &oversize_list && (op->size > sp->size);
12750Sstevel@tonic-gate 	    sp = sp->next_bysize)
12760Sstevel@tonic-gate 		;
12770Sstevel@tonic-gate 
12780Sstevel@tonic-gate 	/* link into size-ordered list */
12790Sstevel@tonic-gate 	op->next_bysize = sp;
12800Sstevel@tonic-gate 	op->prev_bysize = sp->prev_bysize;
12810Sstevel@tonic-gate 	op->prev_bysize->next_bysize = op;
12820Sstevel@tonic-gate 	op->next_bysize->prev_bysize = op;
12830Sstevel@tonic-gate 
12840Sstevel@tonic-gate 	/*
12850Sstevel@tonic-gate 	 * link item into address-ordered list
12860Sstevel@tonic-gate 	 * (caller provides insertion point as an optimization)
12870Sstevel@tonic-gate 	 */
12880Sstevel@tonic-gate 	op->next_byaddr = nx;
12890Sstevel@tonic-gate 	op->prev_byaddr = nx->prev_byaddr;
12900Sstevel@tonic-gate 	op->prev_byaddr->next_byaddr = op;
12910Sstevel@tonic-gate 	op->next_byaddr->prev_byaddr = op;
12920Sstevel@tonic-gate 
12930Sstevel@tonic-gate }
12940Sstevel@tonic-gate 
12950Sstevel@tonic-gate static void
12960Sstevel@tonic-gate unlink_oversize(oversize_t *lp)
12970Sstevel@tonic-gate {
12980Sstevel@tonic-gate 	/* unlink from address list */
12990Sstevel@tonic-gate 	lp->prev_byaddr->next_byaddr = lp->next_byaddr;
13000Sstevel@tonic-gate 	lp->next_byaddr->prev_byaddr = lp->prev_byaddr;
13010Sstevel@tonic-gate 
13020Sstevel@tonic-gate 	/* unlink from size list */
13030Sstevel@tonic-gate 	lp->prev_bysize->next_bysize = lp->next_bysize;
13040Sstevel@tonic-gate 	lp->next_bysize->prev_bysize = lp->prev_bysize;
13050Sstevel@tonic-gate }
13060Sstevel@tonic-gate 
13070Sstevel@tonic-gate static void
13080Sstevel@tonic-gate position_oversize_by_size(oversize_t *op)
13090Sstevel@tonic-gate {
13100Sstevel@tonic-gate 	oversize_t *sp;
13110Sstevel@tonic-gate 
13120Sstevel@tonic-gate 	if (op->size > op->next_bysize->size ||
13130Sstevel@tonic-gate 	    op->size < op->prev_bysize->size) {
13140Sstevel@tonic-gate 
13150Sstevel@tonic-gate 		/* unlink from size list */
13160Sstevel@tonic-gate 		op->prev_bysize->next_bysize = op->next_bysize;
13170Sstevel@tonic-gate 		op->next_bysize->prev_bysize = op->prev_bysize;
13180Sstevel@tonic-gate 
13190Sstevel@tonic-gate 		/* locate correct insertion point in size-ordered list */
13200Sstevel@tonic-gate 		for (sp = oversize_list.next_bysize;
13210Sstevel@tonic-gate 		    sp != &oversize_list && (op->size > sp->size);
13220Sstevel@tonic-gate 		    sp = sp->next_bysize)
13230Sstevel@tonic-gate 			;
13240Sstevel@tonic-gate 
13250Sstevel@tonic-gate 		/* link into size-ordered list */
13260Sstevel@tonic-gate 		op->next_bysize = sp;
13270Sstevel@tonic-gate 		op->prev_bysize = sp->prev_bysize;
13280Sstevel@tonic-gate 		op->prev_bysize->next_bysize = op;
13290Sstevel@tonic-gate 		op->next_bysize->prev_bysize = op;
13300Sstevel@tonic-gate 	}
13310Sstevel@tonic-gate }
13320Sstevel@tonic-gate 
13330Sstevel@tonic-gate static void
13340Sstevel@tonic-gate add_oversize(oversize_t *lp)
13350Sstevel@tonic-gate {
13360Sstevel@tonic-gate 	int merge_flags = INSERT_ONLY;
13370Sstevel@tonic-gate 	oversize_t *nx;  	/* ptr to item right of insertion point */
13380Sstevel@tonic-gate 	oversize_t *pv;  	/* ptr to item left of insertion point */
13390Sstevel@tonic-gate 	uint_t size_lp, size_pv, size_nx;
13400Sstevel@tonic-gate 	uintptr_t endp_lp, endp_pv, endp_nx;
13410Sstevel@tonic-gate 
13420Sstevel@tonic-gate 	/*
13430Sstevel@tonic-gate 	 * Locate insertion point in address-ordered list
13440Sstevel@tonic-gate 	 */
13450Sstevel@tonic-gate 
13460Sstevel@tonic-gate 	for (nx = oversize_list.next_byaddr;
13470Sstevel@tonic-gate 	    nx != &oversize_list && (lp->addr > nx->addr);
13480Sstevel@tonic-gate 	    nx = nx->next_byaddr)
13490Sstevel@tonic-gate 		;
13500Sstevel@tonic-gate 
13510Sstevel@tonic-gate 	/*
13520Sstevel@tonic-gate 	 * Determine how to add chunk to oversize freelist
13530Sstevel@tonic-gate 	 */
13540Sstevel@tonic-gate 
13550Sstevel@tonic-gate 	size_lp = OVSZ_HEADER_SIZE + lp->size;
13560Sstevel@tonic-gate 	endp_lp = ALIGN((uintptr_t)lp + size_lp, MTMALLOC_MIN_ALIGN);
13570Sstevel@tonic-gate 	size_lp = endp_lp - (uintptr_t)lp;
13580Sstevel@tonic-gate 
13590Sstevel@tonic-gate 	pv = nx->prev_byaddr;
13600Sstevel@tonic-gate 
13610Sstevel@tonic-gate 	if (pv->size) {
13620Sstevel@tonic-gate 
13630Sstevel@tonic-gate 		size_pv = OVSZ_HEADER_SIZE + pv->size;
13640Sstevel@tonic-gate 		endp_pv = ALIGN((uintptr_t)pv + size_pv,
13650Sstevel@tonic-gate 		    MTMALLOC_MIN_ALIGN);
13660Sstevel@tonic-gate 		size_pv = endp_pv - (uintptr_t)pv;
13670Sstevel@tonic-gate 
13680Sstevel@tonic-gate 		/* Check for adjacency with left chunk */
13690Sstevel@tonic-gate 		if ((uintptr_t)lp == endp_pv)
13700Sstevel@tonic-gate 			merge_flags |= COALESCE_LEFT;
13710Sstevel@tonic-gate 	}
13720Sstevel@tonic-gate 
13730Sstevel@tonic-gate 	if (nx->size) {
13740Sstevel@tonic-gate 
13750Sstevel@tonic-gate 	    /* Check for adjacency with right chunk */
13760Sstevel@tonic-gate 	    if ((uintptr_t)nx == endp_lp) {
13770Sstevel@tonic-gate 		size_nx = OVSZ_HEADER_SIZE + nx->size;
13780Sstevel@tonic-gate 		endp_nx = ALIGN((uintptr_t)nx + size_nx,
13790Sstevel@tonic-gate 		    MTMALLOC_MIN_ALIGN);
13800Sstevel@tonic-gate 		size_nx = endp_nx - (uintptr_t)nx;
13810Sstevel@tonic-gate 		merge_flags |= COALESCE_RIGHT;
13820Sstevel@tonic-gate 	    }
13830Sstevel@tonic-gate 	}
13840Sstevel@tonic-gate 
13850Sstevel@tonic-gate 	/*
13860Sstevel@tonic-gate 	 * If MTDEBUGPATTERN==1, lp->addr will have been overwritten with
13870Sstevel@tonic-gate 	 * FREEPATTERN for lp->size bytes. If we can merge, the oversize
13880Sstevel@tonic-gate 	 * header(s) that will also become part of the memory available for
13890Sstevel@tonic-gate 	 * reallocation (ie lp and/or nx) must also be overwritten with
13900Sstevel@tonic-gate 	 * FREEPATTERN or we will SIGABRT when this memory is next reallocated.
13910Sstevel@tonic-gate 	 */
13920Sstevel@tonic-gate 	switch (merge_flags) {
13930Sstevel@tonic-gate 
13940Sstevel@tonic-gate 	case INSERT_ONLY:		/* Coalescing not possible */
13950Sstevel@tonic-gate 		insert_oversize(lp, nx);
13960Sstevel@tonic-gate 		break;
13970Sstevel@tonic-gate 	case COALESCE_LEFT:
13980Sstevel@tonic-gate 		pv->size += size_lp;
13990Sstevel@tonic-gate 		position_oversize_by_size(pv);
14000Sstevel@tonic-gate 		if (debugopt & MTDEBUGPATTERN)
14010Sstevel@tonic-gate 			copy_pattern(FREEPATTERN, lp, OVSZ_HEADER_SIZE);
14020Sstevel@tonic-gate 		break;
14030Sstevel@tonic-gate 	case COALESCE_RIGHT:
14040Sstevel@tonic-gate 		unlink_oversize(nx);
14050Sstevel@tonic-gate 		lp->size += size_nx;
14060Sstevel@tonic-gate 		insert_oversize(lp, pv->next_byaddr);
14070Sstevel@tonic-gate 		if (debugopt & MTDEBUGPATTERN)
14080Sstevel@tonic-gate 			copy_pattern(FREEPATTERN, nx, OVSZ_HEADER_SIZE);
14090Sstevel@tonic-gate 		break;
14100Sstevel@tonic-gate 	case COALESCE_WITH_BOTH_SIDES:	/* Merge (with right) to the left */
14110Sstevel@tonic-gate 		pv->size += size_lp + size_nx;
14120Sstevel@tonic-gate 		unlink_oversize(nx);
14130Sstevel@tonic-gate 		position_oversize_by_size(pv);
14140Sstevel@tonic-gate 		if (debugopt & MTDEBUGPATTERN) {
14150Sstevel@tonic-gate 			copy_pattern(FREEPATTERN, lp, OVSZ_HEADER_SIZE);
14160Sstevel@tonic-gate 			copy_pattern(FREEPATTERN, nx, OVSZ_HEADER_SIZE);
14170Sstevel@tonic-gate 		}
14180Sstevel@tonic-gate 		break;
14190Sstevel@tonic-gate 	}
14200Sstevel@tonic-gate }
14210Sstevel@tonic-gate 
14220Sstevel@tonic-gate /*
14230Sstevel@tonic-gate  * Find memory on our list that is at least size big. If we find a block that is
14240Sstevel@tonic-gate  * big enough, we break it up and return the associated oversize_t struct back
14250Sstevel@tonic-gate  * to the calling client. Any leftover piece of that block is returned to the
14260Sstevel@tonic-gate  * freelist.
14270Sstevel@tonic-gate  */
14280Sstevel@tonic-gate static oversize_t *
14290Sstevel@tonic-gate find_oversize(size_t size)
14300Sstevel@tonic-gate {
14310Sstevel@tonic-gate 	oversize_t *wp = oversize_list.next_bysize;
14320Sstevel@tonic-gate 	while (wp != &oversize_list && size > wp->size)
14330Sstevel@tonic-gate 		wp = wp->next_bysize;
14340Sstevel@tonic-gate 
14350Sstevel@tonic-gate 	if (wp == &oversize_list) /* empty list or nothing big enough */
14360Sstevel@tonic-gate 		return (NULL);
14370Sstevel@tonic-gate 	/* breaking up a chunk of memory */
14380Sstevel@tonic-gate 	if ((long)((wp->size - (size + OVSZ_HEADER_SIZE + MTMALLOC_MIN_ALIGN)))
14390Sstevel@tonic-gate 	    > MAX_CACHED) {
14400Sstevel@tonic-gate 		caddr_t off;
14410Sstevel@tonic-gate 		oversize_t *np;
14420Sstevel@tonic-gate 		size_t osize;
14430Sstevel@tonic-gate 		off = (caddr_t)ALIGN(wp->addr + size,
14440Sstevel@tonic-gate 		    MTMALLOC_MIN_ALIGN);
14450Sstevel@tonic-gate 		osize = wp->size;
14460Sstevel@tonic-gate 		wp->size = (size_t)(off - wp->addr);
14470Sstevel@tonic-gate 		np = oversize_header_alloc((uintptr_t)off,
14480Sstevel@tonic-gate 		    osize - (wp->size + OVSZ_HEADER_SIZE));
14490Sstevel@tonic-gate 		if ((long)np->size < 0)
14500Sstevel@tonic-gate 			abort();
14510Sstevel@tonic-gate 		unlink_oversize(wp);
14520Sstevel@tonic-gate 		add_oversize(np);
14530Sstevel@tonic-gate 	} else {
14540Sstevel@tonic-gate 		unlink_oversize(wp);
14550Sstevel@tonic-gate 	}
14560Sstevel@tonic-gate 	return (wp);
14570Sstevel@tonic-gate }
14580Sstevel@tonic-gate 
14590Sstevel@tonic-gate static void
14600Sstevel@tonic-gate copy_pattern(uint32_t pattern, void *buf_arg, size_t size)
14610Sstevel@tonic-gate {
14620Sstevel@tonic-gate 	uint32_t *bufend = (uint32_t *)((char *)buf_arg + size);
14630Sstevel@tonic-gate 	uint32_t *buf = buf_arg;
14640Sstevel@tonic-gate 
14650Sstevel@tonic-gate 	while (buf < bufend - 3) {
14660Sstevel@tonic-gate 		buf[3] = buf[2] = buf[1] = buf[0] = pattern;
14670Sstevel@tonic-gate 		buf += 4;
14680Sstevel@tonic-gate 	}
14690Sstevel@tonic-gate 	while (buf < bufend)
14700Sstevel@tonic-gate 		*buf++ = pattern;
14710Sstevel@tonic-gate }
14720Sstevel@tonic-gate 
14730Sstevel@tonic-gate static void *
14740Sstevel@tonic-gate verify_pattern(uint32_t pattern, void *buf_arg, size_t size)
14750Sstevel@tonic-gate {
14760Sstevel@tonic-gate 	uint32_t *bufend = (uint32_t *)((char *)buf_arg + size);
14770Sstevel@tonic-gate 	uint32_t *buf;
14780Sstevel@tonic-gate 
14790Sstevel@tonic-gate 	for (buf = buf_arg; buf < bufend; buf++)
14800Sstevel@tonic-gate 		if (*buf != pattern)
14810Sstevel@tonic-gate 			return (buf);
14820Sstevel@tonic-gate 	return (NULL);
14830Sstevel@tonic-gate }
14840Sstevel@tonic-gate 
14850Sstevel@tonic-gate static void
14860Sstevel@tonic-gate free_oversize(oversize_t *ovp)
14870Sstevel@tonic-gate {
14880Sstevel@tonic-gate 	assert(((uintptr_t)ovp->addr & 7) == 0); /* are we 8 byte aligned */
14890Sstevel@tonic-gate 	assert(ovp->size > MAX_CACHED);
14900Sstevel@tonic-gate 
14910Sstevel@tonic-gate 	ovp->next_bysize = ovp->prev_bysize = NULL;
14920Sstevel@tonic-gate 	ovp->next_byaddr = ovp->prev_byaddr = NULL;
14930Sstevel@tonic-gate 	(void) mutex_lock(&oversize_lock);
14940Sstevel@tonic-gate 	add_oversize(ovp);
14950Sstevel@tonic-gate 	(void) mutex_unlock(&oversize_lock);
14960Sstevel@tonic-gate }
14970Sstevel@tonic-gate 
14980Sstevel@tonic-gate static oversize_t *
14990Sstevel@tonic-gate oversize_header_alloc(uintptr_t mem, size_t size)
15000Sstevel@tonic-gate {
15010Sstevel@tonic-gate 	oversize_t *ovsz_hdr;
15020Sstevel@tonic-gate 
15030Sstevel@tonic-gate 	assert(size > MAX_CACHED);
15040Sstevel@tonic-gate 
15050Sstevel@tonic-gate 	ovsz_hdr = (oversize_t *)mem;
15060Sstevel@tonic-gate 	ovsz_hdr->prev_bysize = NULL;
15070Sstevel@tonic-gate 	ovsz_hdr->next_bysize = NULL;
15080Sstevel@tonic-gate 	ovsz_hdr->prev_byaddr = NULL;
15090Sstevel@tonic-gate 	ovsz_hdr->next_byaddr = NULL;
15100Sstevel@tonic-gate 	ovsz_hdr->hash_next = NULL;
15110Sstevel@tonic-gate 	ovsz_hdr->size = size;
15120Sstevel@tonic-gate 	mem += OVSZ_SIZE;
15130Sstevel@tonic-gate 	*(uintptr_t *)mem = MTMALLOC_OVERSIZE_MAGIC;
15140Sstevel@tonic-gate 	mem += OVERHEAD;
15150Sstevel@tonic-gate 	assert(((uintptr_t)mem & 7) == 0); /* are we 8 byte aligned */
15160Sstevel@tonic-gate 	ovsz_hdr->addr = (caddr_t)mem;
15170Sstevel@tonic-gate 	return (ovsz_hdr);
15180Sstevel@tonic-gate }
1519