xref: /dflybsd-src/lib/libc/stdlib/dmalloc.c (revision 171835807871f68c36f75ff84e1d6f7df4683df0)
1ff10d954SMatthew Dillon /*
2ff10d954SMatthew Dillon  * DMALLOC.C	- Dillon's malloc
3ff10d954SMatthew Dillon  *
4e2caf0e7SMatthew Dillon  * Copyright (c) 2011,2017 The DragonFly Project. All rights reserved.
5ff10d954SMatthew Dillon  *
6ff10d954SMatthew Dillon  * This code is derived from software contributed to The DragonFly Project
7ff10d954SMatthew Dillon  * by Matthew Dillon <dillon@backplane.com>.
8ff10d954SMatthew Dillon  *
9ff10d954SMatthew Dillon  * Redistribution and use in source and binary forms, with or without
10ff10d954SMatthew Dillon  * modification, are permitted provided that the following conditions
11ff10d954SMatthew Dillon  * are met:
12ff10d954SMatthew Dillon  *
13ff10d954SMatthew Dillon  * 1. Redistributions of source code must retain the above copyright
14ff10d954SMatthew Dillon  *    notice, this list of conditions and the following disclaimer.
15ff10d954SMatthew Dillon  * 2. Redistributions in binary form must reproduce the above copyright
16ff10d954SMatthew Dillon  *    notice, this list of conditions and the following disclaimer in
17ff10d954SMatthew Dillon  *    the documentation and/or other materials provided with the
18ff10d954SMatthew Dillon  *    distribution.
19ff10d954SMatthew Dillon  * 3. Neither the name of The DragonFly Project nor the names of its
20ff10d954SMatthew Dillon  *    contributors may be used to endorse or promote products derived
21ff10d954SMatthew Dillon  *    from this software without specific, prior written permission.
22ff10d954SMatthew Dillon  *
23ff10d954SMatthew Dillon  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24ff10d954SMatthew Dillon  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25ff10d954SMatthew Dillon  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
26ff10d954SMatthew Dillon  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
27ff10d954SMatthew Dillon  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
28ff10d954SMatthew Dillon  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
29ff10d954SMatthew Dillon  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
30ff10d954SMatthew Dillon  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
31ff10d954SMatthew Dillon  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
32ff10d954SMatthew Dillon  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
33ff10d954SMatthew Dillon  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34ff10d954SMatthew Dillon  * SUCH DAMAGE.
35ff10d954SMatthew Dillon  */
36ff10d954SMatthew Dillon /*
37e2caf0e7SMatthew Dillon  * This module implements a modified slab allocator as a drop-in replacement
38e2caf0e7SMatthew Dillon  * for the libc malloc().  The slab algorithm has been adjusted to support
39e2caf0e7SMatthew Dillon  * dynamic sizing of slabs which effectively allows slabs to be used for
40e2caf0e7SMatthew Dillon  * allocations of any size.  Because of this we neither have a small-block
41e2caf0e7SMatthew Dillon  * allocator or a big-block allocator and the code paths are simplified.
42ff10d954SMatthew Dillon  *
43ff10d954SMatthew Dillon  * To support dynamic slab sizing available user virtual memory is broken
44ff10d954SMatthew Dillon  * down into ~1024 regions.  Each region has fixed slab size whos value is
45ff10d954SMatthew Dillon  * set when the region is opened up for use.  The free() path simply applies
46ff10d954SMatthew Dillon  * a mask based on the region to the pointer to acquire the base of the
47ff10d954SMatthew Dillon  * governing slab structure.
48ff10d954SMatthew Dillon  *
49ff10d954SMatthew Dillon  * Regions[NREGIONS]	(1024)
50ff10d954SMatthew Dillon  *
51ff10d954SMatthew Dillon  * Slab management and locking is done on a per-zone basis.
52ff10d954SMatthew Dillon  *
53ff10d954SMatthew Dillon  *	Alloc Size	Chunking        Number of zones
54ff10d954SMatthew Dillon  *	0-127		8		16
55ff10d954SMatthew Dillon  *	128-255		16		8
56ff10d954SMatthew Dillon  *	256-511		32		8
57ff10d954SMatthew Dillon  *	512-1023	64		8
58ff10d954SMatthew Dillon  *	1024-2047	128		8
59ff10d954SMatthew Dillon  *	2048-4095	256		8
60ff10d954SMatthew Dillon  *	4096-8191	512		8
61ff10d954SMatthew Dillon  *	8192-16383	1024		8
62ff10d954SMatthew Dillon  *	16384-32767	2048		8
63ff10d954SMatthew Dillon  *	32768-65535	4096		8
64e2caf0e7SMatthew Dillon  *	... continues forever ...	4 zones
65ff10d954SMatthew Dillon  *
66ff10d954SMatthew Dillon  *	For a 2^63 memory space each doubling >= 64K is broken down into
67ff10d954SMatthew Dillon  *	4 chunking zones, so we support 88 + (48 * 4) = 280 zones.
68ff10d954SMatthew Dillon  *
69ff10d954SMatthew Dillon  *			   API FEATURES AND SIDE EFFECTS
70ff10d954SMatthew Dillon  *
71ff10d954SMatthew Dillon  *    + power-of-2 sized allocations up to a page will be power-of-2 aligned.
72ff10d954SMatthew Dillon  *	Above that power-of-2 sized allocations are page-aligned.  Non
73ff10d954SMatthew Dillon  *	power-of-2 sized allocations are aligned the same as the chunk
74ff10d954SMatthew Dillon  *	size for their zone.
75ff10d954SMatthew Dillon  *    + ability to allocate arbitrarily large chunks of memory
76ff10d954SMatthew Dillon  *    + realloc will reuse the passed pointer if possible, within the
77ff10d954SMatthew Dillon  *	limitations of the zone chunking.
78ff10d954SMatthew Dillon  *
79f7b2bab1SMatthew Dillon  * On top of the slab allocator we also implement a 16-entry-per-thread
80e2caf0e7SMatthew Dillon  * magazine cache for allocations <= NOMSLABSIZE.
81e2caf0e7SMatthew Dillon  *
82ff10d954SMatthew Dillon  *				FUTURE FEATURES
83ff10d954SMatthew Dillon  *
84ff10d954SMatthew Dillon  *    + [better] garbage collection
85ff10d954SMatthew Dillon  *    + better initial sizing.
86ff10d954SMatthew Dillon  *
87ff10d954SMatthew Dillon  * TUNING
88ff10d954SMatthew Dillon  *
89ff10d954SMatthew Dillon  * The value of the environment variable MALLOC_OPTIONS is a character string
90ff10d954SMatthew Dillon  * containing various flags to tune nmalloc.  Upper case letters enabled
91ff10d954SMatthew Dillon  * or increase the feature, lower case disables or decreases the feature.
92ff10d954SMatthew Dillon  *
93ff10d954SMatthew Dillon  * U		Enable UTRACE for all operations, observable with ktrace.
94ff10d954SMatthew Dillon  *		Diasbled by default.
95ff10d954SMatthew Dillon  *
96ff10d954SMatthew Dillon  * Z		Zero out allocations, otherwise allocations (except for
97ff10d954SMatthew Dillon  *		calloc) will contain garbage.
98ff10d954SMatthew Dillon  *		Disabled by default.
99ff10d954SMatthew Dillon  *
100ff10d954SMatthew Dillon  * H		Pass a hint with madvise() about unused pages.
101ff10d954SMatthew Dillon  *		Disabled by default.
102ff10d954SMatthew Dillon  *		Not currently implemented.
103ff10d954SMatthew Dillon  *
104ff10d954SMatthew Dillon  * F		Disable local per-thread caching.
105ff10d954SMatthew Dillon  *		Disabled by default.
106ff10d954SMatthew Dillon  *
107ff10d954SMatthew Dillon  * C		Increase (decrease) how much excess cache to retain.
108ff10d954SMatthew Dillon  *		Set to 4 by default.
109ff10d954SMatthew Dillon  */
110ff10d954SMatthew Dillon 
111ff10d954SMatthew Dillon /* cc -shared -fPIC -g -O -I/usr/src/lib/libc/include -o dmalloc.so dmalloc.c */
112ff10d954SMatthew Dillon 
113ff10d954SMatthew Dillon #ifndef STANDALONE_DEBUG
114ff10d954SMatthew Dillon #include "libc_private.h"
115ff10d954SMatthew Dillon #endif
116ff10d954SMatthew Dillon 
117ff10d954SMatthew Dillon #include <sys/param.h>
118ff10d954SMatthew Dillon #include <sys/types.h>
119ff10d954SMatthew Dillon #include <sys/mman.h>
120ff10d954SMatthew Dillon #include <sys/queue.h>
121ff10d954SMatthew Dillon #include <sys/ktrace.h>
122ff10d954SMatthew Dillon #include <stdio.h>
123ff10d954SMatthew Dillon #include <stdint.h>
124ff10d954SMatthew Dillon #include <stdlib.h>
125ff10d954SMatthew Dillon #include <stdarg.h>
126ff10d954SMatthew Dillon #include <stddef.h>
127ff10d954SMatthew Dillon #include <unistd.h>
128ff10d954SMatthew Dillon #include <string.h>
129ff10d954SMatthew Dillon #include <fcntl.h>
130ff10d954SMatthew Dillon #include <errno.h>
131ff10d954SMatthew Dillon #include <pthread.h>
132ff10d954SMatthew Dillon #include <limits.h>
133ff10d954SMatthew Dillon 
134ff10d954SMatthew Dillon #include <machine/atomic.h>
135ff10d954SMatthew Dillon #include <machine/cpufunc.h>
136ff10d954SMatthew Dillon 
137ff10d954SMatthew Dillon #ifdef STANDALONE_DEBUG
138ff10d954SMatthew Dillon void _nmalloc_thr_init(void);
139ff10d954SMatthew Dillon #else
140ff10d954SMatthew Dillon #include "spinlock.h"
141ff10d954SMatthew Dillon #include "un-namespace.h"
142ff10d954SMatthew Dillon #endif
143ff10d954SMatthew Dillon 
144ff10d954SMatthew Dillon #ifndef MAP_SIZEALIGN
145ff10d954SMatthew Dillon #define MAP_SIZEALIGN	0
146ff10d954SMatthew Dillon #endif
147ff10d954SMatthew Dillon 
148ff10d954SMatthew Dillon #if SSIZE_MAX == 0x7FFFFFFF
149ff10d954SMatthew Dillon #define ADDRBITS	32
150ff10d954SMatthew Dillon #define UVM_BITS	32	/* worst case */
151ff10d954SMatthew Dillon #else
152ff10d954SMatthew Dillon #define ADDRBITS	64
153ff10d954SMatthew Dillon #define UVM_BITS	48	/* worst case XXX */
154ff10d954SMatthew Dillon #endif
155ff10d954SMatthew Dillon 
156ff10d954SMatthew Dillon #if LONG_MAX == 0x7FFFFFFF
157ff10d954SMatthew Dillon #define LONG_BITS	32
158ff10d954SMatthew Dillon #define LONG_BITS_SHIFT	5
159ff10d954SMatthew Dillon #else
160ff10d954SMatthew Dillon #define LONG_BITS	64
161ff10d954SMatthew Dillon #define LONG_BITS_SHIFT	6
162ff10d954SMatthew Dillon #endif
163ff10d954SMatthew Dillon 
164ff10d954SMatthew Dillon #define LOCKEDPTR	((void *)(intptr_t)-1)
165ff10d954SMatthew Dillon 
166ff10d954SMatthew Dillon /*
167ff10d954SMatthew Dillon  * Regions[]
168ff10d954SMatthew Dillon  */
169ff10d954SMatthew Dillon #define NREGIONS_BITS	10
170ff10d954SMatthew Dillon #define NREGIONS	(1 << NREGIONS_BITS)
171ff10d954SMatthew Dillon #define NREGIONS_MASK	(NREGIONS - 1)
172ff10d954SMatthew Dillon #define NREGIONS_SHIFT	(UVM_BITS - NREGIONS_BITS)
173ff10d954SMatthew Dillon #define NREGIONS_SIZE	(1LU << NREGIONS_SHIFT)
174ff10d954SMatthew Dillon 
175ff10d954SMatthew Dillon typedef struct region *region_t;
176ff10d954SMatthew Dillon typedef struct slglobaldata *slglobaldata_t;
177ff10d954SMatthew Dillon typedef struct slab *slab_t;
178ff10d954SMatthew Dillon 
179ff10d954SMatthew Dillon struct region {
180ff10d954SMatthew Dillon 	uintptr_t	mask;
181ff10d954SMatthew Dillon 	slab_t		slab;	/* conditional out of band slab */
182ff10d954SMatthew Dillon };
183ff10d954SMatthew Dillon 
184ff10d954SMatthew Dillon static struct region Regions[NREGIONS];
185ff10d954SMatthew Dillon 
186ff10d954SMatthew Dillon /*
187ff10d954SMatthew Dillon  * Number of chunking zones available
188ff10d954SMatthew Dillon  */
189ff10d954SMatthew Dillon #define CHUNKFACTOR	8
190ff10d954SMatthew Dillon #if ADDRBITS == 32
191ff10d954SMatthew Dillon #define NZONES		(16 + 9 * CHUNKFACTOR + 16 * CHUNKFACTOR)
192ff10d954SMatthew Dillon #else
193ff10d954SMatthew Dillon #define NZONES		(16 + 9 * CHUNKFACTOR + 48 * CHUNKFACTOR)
194ff10d954SMatthew Dillon #endif
195ff10d954SMatthew Dillon 
196ff10d954SMatthew Dillon static int MaxChunks[NZONES];
197ff10d954SMatthew Dillon 
198ff10d954SMatthew Dillon #define NDEPOTS		8		/* must be power of 2 */
199ff10d954SMatthew Dillon 
200ff10d954SMatthew Dillon /*
201ff10d954SMatthew Dillon  * Maximum number of chunks per slab, governed by the allocation bitmap in
202ff10d954SMatthew Dillon  * each slab.  The maximum is reduced for large chunk sizes.
203ff10d954SMatthew Dillon  */
204ff10d954SMatthew Dillon #define MAXCHUNKS	(LONG_BITS * LONG_BITS)
205ff10d954SMatthew Dillon #define MAXCHUNKS_BITS	(LONG_BITS_SHIFT * LONG_BITS_SHIFT)
206ff10d954SMatthew Dillon #define LITSLABSIZE	(32 * 1024)
207ff10d954SMatthew Dillon #define NOMSLABSIZE	(2 * 1024 * 1024)
208ff10d954SMatthew Dillon #define BIGSLABSIZE	(128 * 1024 * 1024)
209ff10d954SMatthew Dillon 
210ff10d954SMatthew Dillon #define ZALLOC_SLAB_MAGIC	0x736c6162	/* magic sanity */
211ff10d954SMatthew Dillon 
212ff10d954SMatthew Dillon TAILQ_HEAD(slab_list, slab);
213ff10d954SMatthew Dillon 
214ff10d954SMatthew Dillon /*
215ff10d954SMatthew Dillon  * A slab structure
216ff10d954SMatthew Dillon  */
217ff10d954SMatthew Dillon struct slab {
218ff10d954SMatthew Dillon 	struct slab	*next;		/* slabs with available space */
219ff10d954SMatthew Dillon 	TAILQ_ENTRY(slab) entry;
220ff10d954SMatthew Dillon 	int32_t		magic;		/* magic number for sanity check */
221ff10d954SMatthew Dillon 	u_int		navail;		/* number of free elements available */
222ff10d954SMatthew Dillon 	u_int		nmax;
223ff10d954SMatthew Dillon 	u_int		free_bit;	/* free hint bitno */
224ff10d954SMatthew Dillon 	u_int		free_index;	/* free hint index */
225ff10d954SMatthew Dillon 	u_long		bitmap[LONG_BITS]; /* free chunks */
226ff10d954SMatthew Dillon 	size_t		slab_size;	/* size of entire slab */
227ff10d954SMatthew Dillon 	size_t		chunk_size;	/* chunk size for validation */
228ff10d954SMatthew Dillon 	int		zone_index;
229ff10d954SMatthew Dillon 	enum { UNKNOWN, AVAIL, EMPTY, FULL } state;
230ff10d954SMatthew Dillon 	int		flags;
231ff10d954SMatthew Dillon 	region_t	region;		/* related region */
232ff10d954SMatthew Dillon 	char		*chunks;	/* chunk base */
233e2caf0e7SMatthew Dillon 	slglobaldata_t	slgd;		/* localized to thread else NULL */
234ff10d954SMatthew Dillon };
235ff10d954SMatthew Dillon 
236ff10d954SMatthew Dillon /*
237e2caf0e7SMatthew Dillon  * per-thread data + global depot
238e2caf0e7SMatthew Dillon  *
239e2caf0e7SMatthew Dillon  * NOTE: The magazine shortcut is only used for per-thread data.
240ff10d954SMatthew Dillon  */
241e2caf0e7SMatthew Dillon #define NMAGSHORTCUT	16
242e2caf0e7SMatthew Dillon 
243ff10d954SMatthew Dillon struct slglobaldata {
244e2caf0e7SMatthew Dillon 	spinlock_t	lock;		/* only used by slglobaldepot */
245ff10d954SMatthew Dillon 	struct zoneinfo {
246ff10d954SMatthew Dillon 		slab_t	avail_base;
247ff10d954SMatthew Dillon 		slab_t	empty_base;
248ff10d954SMatthew Dillon 		int	best_region;
249e2caf0e7SMatthew Dillon 		int	mag_index;
250e2caf0e7SMatthew Dillon 		int	avail_count;
251ff10d954SMatthew Dillon 		int	empty_count;
252e2caf0e7SMatthew Dillon 		void	*mag_shortcut[NMAGSHORTCUT];
253ff10d954SMatthew Dillon 	} zone[NZONES];
254ff10d954SMatthew Dillon 	struct slab_list full_zones;	/* via entry */
255ff10d954SMatthew Dillon 	int		masked;
256ff10d954SMatthew Dillon 	int		biggest_index;
257ff10d954SMatthew Dillon 	size_t		nslabs;
258ff10d954SMatthew Dillon };
259ff10d954SMatthew Dillon 
260ff10d954SMatthew Dillon #define SLAB_ZEROD		0x0001
261ff10d954SMatthew Dillon 
262ff10d954SMatthew Dillon /*
263ff10d954SMatthew Dillon  * Misc constants.  Note that allocations that are exact multiples of
264ff10d954SMatthew Dillon  * PAGE_SIZE, or exceed the zone limit, fall through to the kmem module.
265ff10d954SMatthew Dillon  * IN_SAME_PAGE_MASK is used to sanity-check the per-page free lists.
266ff10d954SMatthew Dillon  */
267ff10d954SMatthew Dillon #define MIN_CHUNK_SIZE		8		/* in bytes */
268ff10d954SMatthew Dillon #define MIN_CHUNK_MASK		(MIN_CHUNK_SIZE - 1)
269ff10d954SMatthew Dillon 
270ff10d954SMatthew Dillon #define SAFLAG_ZERO	0x00000001
271ff10d954SMatthew Dillon 
272ff10d954SMatthew Dillon /*
273ff10d954SMatthew Dillon  * The WEIRD_ADDR is used as known text to copy into free objects to
274ff10d954SMatthew Dillon  * try to create deterministic failure cases if the data is accessed after
275ff10d954SMatthew Dillon  * free.
276ff10d954SMatthew Dillon  *
277ff10d954SMatthew Dillon  * WARNING: A limited number of spinlocks are available, BIGXSIZE should
278ff10d954SMatthew Dillon  *	    not be larger then 64.
279ff10d954SMatthew Dillon  */
2809706d23cSzrj #ifdef INVARIANTS
281ff10d954SMatthew Dillon #define WEIRD_ADDR      0xdeadc0de
2829706d23cSzrj #endif
283ff10d954SMatthew Dillon 
284ff10d954SMatthew Dillon /*
285ff10d954SMatthew Dillon  * Thread control
286ff10d954SMatthew Dillon  */
287ff10d954SMatthew Dillon 
288ff10d954SMatthew Dillon #define MASSERT(exp)	do { if (__predict_false(!(exp)))	\
289ff10d954SMatthew Dillon 				_mpanic("assertion: %s in %s",	\
290ff10d954SMatthew Dillon 				#exp, __func__);		\
291ff10d954SMatthew Dillon 			    } while (0)
292ff10d954SMatthew Dillon 
293e2caf0e7SMatthew Dillon /*
294e2caf0e7SMatthew Dillon  * With this attribute set, do not require a function call for accessing
295e2caf0e7SMatthew Dillon  * this variable when the code is compiled -fPIC.
296e2caf0e7SMatthew Dillon  *
297e2caf0e7SMatthew Dillon  * Must be empty for libc_rtld (similar to __thread)
298e2caf0e7SMatthew Dillon  */
299e2caf0e7SMatthew Dillon #if defined(__LIBC_RTLD)
300e2caf0e7SMatthew Dillon #define TLS_ATTRIBUTE
301e2caf0e7SMatthew Dillon #else
302ff10d954SMatthew Dillon #define TLS_ATTRIBUTE __attribute__ ((tls_model ("initial-exec")));
303e2caf0e7SMatthew Dillon #endif
304ff10d954SMatthew Dillon 
305ff10d954SMatthew Dillon static __thread struct slglobaldata slglobal TLS_ATTRIBUTE;
306ff10d954SMatthew Dillon static pthread_key_t thread_malloc_key;
307ff10d954SMatthew Dillon static pthread_once_t thread_malloc_once = PTHREAD_ONCE_INIT;
308e2caf0e7SMatthew Dillon static struct slglobaldata slglobaldepot;
309ff10d954SMatthew Dillon 
310ff10d954SMatthew Dillon static int opt_madvise = 0;
311ff10d954SMatthew Dillon static int opt_free = 0;
312ff10d954SMatthew Dillon static int opt_cache = 4;
313ff10d954SMatthew Dillon static int opt_utrace = 0;
314ff10d954SMatthew Dillon static int g_malloc_flags = 0;
315ff10d954SMatthew Dillon static int malloc_panic;
316ff10d954SMatthew Dillon 
3179706d23cSzrj #ifdef INVARIANTS
318ff10d954SMatthew Dillon static const int32_t weirdary[16] = {
319ff10d954SMatthew Dillon 	WEIRD_ADDR, WEIRD_ADDR, WEIRD_ADDR, WEIRD_ADDR,
320ff10d954SMatthew Dillon 	WEIRD_ADDR, WEIRD_ADDR, WEIRD_ADDR, WEIRD_ADDR,
321ff10d954SMatthew Dillon 	WEIRD_ADDR, WEIRD_ADDR, WEIRD_ADDR, WEIRD_ADDR,
322ff10d954SMatthew Dillon 	WEIRD_ADDR, WEIRD_ADDR, WEIRD_ADDR, WEIRD_ADDR
323ff10d954SMatthew Dillon };
3249706d23cSzrj #endif
325ff10d954SMatthew Dillon 
326ff10d954SMatthew Dillon static void *memalloc(size_t size, int flags);
327ff10d954SMatthew Dillon static void *memrealloc(void *ptr, size_t size);
328ff10d954SMatthew Dillon static void memfree(void *ptr, int);
329c9cb4430Szrj static int memalign(void **memptr, size_t alignment, size_t size);
330ff10d954SMatthew Dillon static slab_t slaballoc(int zi, size_t chunking, size_t chunk_size);
331ff10d954SMatthew Dillon static void slabfree(slab_t slab);
332ff10d954SMatthew Dillon static void slabterm(slglobaldata_t slgd, slab_t slab);
333ff10d954SMatthew Dillon static void *_vmem_alloc(int ri, size_t slab_size);
334ff10d954SMatthew Dillon static void _vmem_free(void *ptr, size_t slab_size);
335ff10d954SMatthew Dillon static void _mpanic(const char *ctl, ...) __printflike(1, 2);
3364018c6edSSascha Wildner #ifndef STANDALONE_DEBUG
337450f08dbSSascha Wildner static void malloc_init(void) __constructor(101);
3384018c6edSSascha Wildner #else
3394018c6edSSascha Wildner static void malloc_init(void) __constructor(101);
3404018c6edSSascha Wildner #endif
341ff10d954SMatthew Dillon 
34220b2da2cSVenkatesh Srinivas 
343ff10d954SMatthew Dillon struct nmalloc_utrace {
344ff10d954SMatthew Dillon 	void *p;
345ff10d954SMatthew Dillon 	size_t s;
346ff10d954SMatthew Dillon 	void *r;
347ff10d954SMatthew Dillon };
348ff10d954SMatthew Dillon 
349ff10d954SMatthew Dillon #define UTRACE(a, b, c)						\
350ff10d954SMatthew Dillon 	if (opt_utrace) {					\
351ff10d954SMatthew Dillon 		struct nmalloc_utrace ut = {			\
352ff10d954SMatthew Dillon 			.p = (a),				\
353ff10d954SMatthew Dillon 			.s = (b),				\
354ff10d954SMatthew Dillon 			.r = (c)				\
355ff10d954SMatthew Dillon 		};						\
356ff10d954SMatthew Dillon 		utrace(&ut, sizeof(ut));			\
357ff10d954SMatthew Dillon 	}
358ff10d954SMatthew Dillon 
359ff10d954SMatthew Dillon #ifdef INVARIANTS
360ff10d954SMatthew Dillon /*
361ff10d954SMatthew Dillon  * If enabled any memory allocated without M_ZERO is initialized to -1.
362ff10d954SMatthew Dillon  */
363ff10d954SMatthew Dillon static int  use_malloc_pattern;
364ff10d954SMatthew Dillon #endif
365ff10d954SMatthew Dillon 
366ff10d954SMatthew Dillon static void
malloc_init(void)367ff10d954SMatthew Dillon malloc_init(void)
368ff10d954SMatthew Dillon {
369ff10d954SMatthew Dillon 	const char *p = NULL;
37020b2da2cSVenkatesh Srinivas 
371e2caf0e7SMatthew Dillon 	TAILQ_INIT(&slglobal.full_zones);
372ff10d954SMatthew Dillon 
373ff10d954SMatthew Dillon 	Regions[0].mask = -1; /* disallow activity in lowest region */
374ff10d954SMatthew Dillon 
375ff10d954SMatthew Dillon 	if (issetugid() == 0)
376ff10d954SMatthew Dillon 		p = getenv("MALLOC_OPTIONS");
377ff10d954SMatthew Dillon 
378ff10d954SMatthew Dillon 	for (; p != NULL && *p != '\0'; p++) {
379ff10d954SMatthew Dillon 		switch(*p) {
380ff10d954SMatthew Dillon 		case 'u':
381ff10d954SMatthew Dillon 			opt_utrace = 0;
382ff10d954SMatthew Dillon 			break;
383ff10d954SMatthew Dillon 		case 'U':
384ff10d954SMatthew Dillon 			opt_utrace = 1;
385ff10d954SMatthew Dillon 			break;
386ff10d954SMatthew Dillon 		case 'h':
387ff10d954SMatthew Dillon 			opt_madvise = 0;
388ff10d954SMatthew Dillon 			break;
389ff10d954SMatthew Dillon 		case 'H':
390ff10d954SMatthew Dillon 			opt_madvise = 1;
391ff10d954SMatthew Dillon 			break;
392ff10d954SMatthew Dillon 		case 'c':
393ff10d954SMatthew Dillon 			if (opt_cache > 0)
394ff10d954SMatthew Dillon 				--opt_cache;
395ff10d954SMatthew Dillon 			break;
396ff10d954SMatthew Dillon 		case 'C':
397ff10d954SMatthew Dillon 			++opt_cache;
398ff10d954SMatthew Dillon 			break;
399ff10d954SMatthew Dillon 		case 'f':
400ff10d954SMatthew Dillon 			opt_free = 0;
401ff10d954SMatthew Dillon 			break;
402ff10d954SMatthew Dillon 		case 'F':
403ff10d954SMatthew Dillon 			opt_free = 1;
404ff10d954SMatthew Dillon 			break;
405ff10d954SMatthew Dillon 		case 'z':
406ff10d954SMatthew Dillon 			g_malloc_flags = 0;
407ff10d954SMatthew Dillon 			break;
408ff10d954SMatthew Dillon 		case 'Z':
409ff10d954SMatthew Dillon 			g_malloc_flags = SAFLAG_ZERO;
410ff10d954SMatthew Dillon 			break;
411ff10d954SMatthew Dillon 		default:
412ff10d954SMatthew Dillon 			break;
413ff10d954SMatthew Dillon 		}
414ff10d954SMatthew Dillon 	}
415ff10d954SMatthew Dillon 
416ff10d954SMatthew Dillon 	UTRACE((void *) -1, 0, NULL);
417ff10d954SMatthew Dillon }
418ff10d954SMatthew Dillon 
419ff10d954SMatthew Dillon /*
420ff10d954SMatthew Dillon  * We have to install a handler for nmalloc thread teardowns when
421ff10d954SMatthew Dillon  * the thread is created.  We cannot delay this because destructors in
422ff10d954SMatthew Dillon  * sophisticated userland programs can call malloc() for the first time
423ff10d954SMatthew Dillon  * during their thread exit.
424ff10d954SMatthew Dillon  *
425ff10d954SMatthew Dillon  * This routine is called directly from pthreads.
426ff10d954SMatthew Dillon  */
427ff10d954SMatthew Dillon static void _nmalloc_thr_init_once(void);
428ff10d954SMatthew Dillon static void _nmalloc_thr_destructor(void *thrp);
429ff10d954SMatthew Dillon 
430ff10d954SMatthew Dillon void
_nmalloc_thr_init(void)431ff10d954SMatthew Dillon _nmalloc_thr_init(void)
432ff10d954SMatthew Dillon {
433ff10d954SMatthew Dillon 	static int did_init;
434ff10d954SMatthew Dillon 
435ff10d954SMatthew Dillon 	TAILQ_INIT(&slglobal.full_zones);
436ff10d954SMatthew Dillon 
43720b2da2cSVenkatesh Srinivas 	if (slglobal.masked)
43820b2da2cSVenkatesh Srinivas 		return;
43920b2da2cSVenkatesh Srinivas 
440ff10d954SMatthew Dillon 	slglobal.masked = 1;
441ff10d954SMatthew Dillon 	if (did_init == 0) {
442ff10d954SMatthew Dillon 		did_init = 1;
443ff10d954SMatthew Dillon 		pthread_once(&thread_malloc_once, _nmalloc_thr_init_once);
444ff10d954SMatthew Dillon 	}
445ff10d954SMatthew Dillon 	pthread_setspecific(thread_malloc_key, &slglobal);
446ff10d954SMatthew Dillon 	slglobal.masked = 0;
447ff10d954SMatthew Dillon }
448ff10d954SMatthew Dillon 
449e2caf0e7SMatthew Dillon void
_nmalloc_thr_prepfork(void)450e2caf0e7SMatthew Dillon _nmalloc_thr_prepfork(void)
451e2caf0e7SMatthew Dillon {
452e2caf0e7SMatthew Dillon 	if (__isthreaded)
453e2caf0e7SMatthew Dillon 		_SPINLOCK(&slglobaldepot.lock);
454e2caf0e7SMatthew Dillon }
455e2caf0e7SMatthew Dillon 
456e2caf0e7SMatthew Dillon void
_nmalloc_thr_parentfork(void)457e2caf0e7SMatthew Dillon _nmalloc_thr_parentfork(void)
458e2caf0e7SMatthew Dillon {
459e2caf0e7SMatthew Dillon 	if (__isthreaded)
460e2caf0e7SMatthew Dillon 		_SPINUNLOCK(&slglobaldepot.lock);
461e2caf0e7SMatthew Dillon }
462e2caf0e7SMatthew Dillon 
463e2caf0e7SMatthew Dillon void
_nmalloc_thr_childfork(void)464e2caf0e7SMatthew Dillon _nmalloc_thr_childfork(void)
465e2caf0e7SMatthew Dillon {
466e2caf0e7SMatthew Dillon 	if (__isthreaded)
467e2caf0e7SMatthew Dillon 		_SPINUNLOCK(&slglobaldepot.lock);
468e2caf0e7SMatthew Dillon }
469e2caf0e7SMatthew Dillon 
470ff10d954SMatthew Dillon /*
471ff10d954SMatthew Dillon  * Called just once
472ff10d954SMatthew Dillon  */
473ff10d954SMatthew Dillon static void
_nmalloc_thr_init_once(void)474ff10d954SMatthew Dillon _nmalloc_thr_init_once(void)
475ff10d954SMatthew Dillon {
476*17183580SMatthew Dillon 	/* ignore error from stub if not threaded */
477*17183580SMatthew Dillon 	pthread_key_create(&thread_malloc_key, _nmalloc_thr_destructor);
478ff10d954SMatthew Dillon }
479ff10d954SMatthew Dillon 
480ff10d954SMatthew Dillon /*
481ff10d954SMatthew Dillon  * Called for each thread undergoing exit
482ff10d954SMatthew Dillon  *
483ff10d954SMatthew Dillon  * Move all of the thread's slabs into a depot.
484ff10d954SMatthew Dillon  */
485ff10d954SMatthew Dillon static void
_nmalloc_thr_destructor(void * thrp)486ff10d954SMatthew Dillon _nmalloc_thr_destructor(void *thrp)
487ff10d954SMatthew Dillon {
488ff10d954SMatthew Dillon 	slglobaldata_t slgd = thrp;
489e2caf0e7SMatthew Dillon 	struct zoneinfo *zinfo;
490ff10d954SMatthew Dillon 	slab_t slab;
491e2caf0e7SMatthew Dillon 	void *ptr;
492ff10d954SMatthew Dillon 	int i;
493e2caf0e7SMatthew Dillon 	int j;
494ff10d954SMatthew Dillon 
495ff10d954SMatthew Dillon 	slgd->masked = 1;
496ff10d954SMatthew Dillon 
497ff10d954SMatthew Dillon 	for (i = 0; i <= slgd->biggest_index; i++) {
498e2caf0e7SMatthew Dillon 		zinfo = &slgd->zone[i];
499e2caf0e7SMatthew Dillon 
500e2caf0e7SMatthew Dillon 		while ((j = zinfo->mag_index) > 0) {
501e2caf0e7SMatthew Dillon 			--j;
502e2caf0e7SMatthew Dillon 			ptr = zinfo->mag_shortcut[j];
503e2caf0e7SMatthew Dillon 			zinfo->mag_shortcut[j] = NULL;	/* SAFETY */
504e2caf0e7SMatthew Dillon 			zinfo->mag_index = j;
505e2caf0e7SMatthew Dillon 			memfree(ptr, 0);
506e2caf0e7SMatthew Dillon 		}
507e2caf0e7SMatthew Dillon 
508e2caf0e7SMatthew Dillon 		while ((slab = zinfo->empty_base) != NULL) {
509e2caf0e7SMatthew Dillon 			zinfo->empty_base = slab->next;
510e2caf0e7SMatthew Dillon 			--zinfo->empty_count;
511ff10d954SMatthew Dillon 			slabterm(slgd, slab);
512ff10d954SMatthew Dillon 		}
513ff10d954SMatthew Dillon 
514e2caf0e7SMatthew Dillon 		while ((slab = zinfo->avail_base) != NULL) {
515e2caf0e7SMatthew Dillon 			zinfo->avail_base = slab->next;
516e2caf0e7SMatthew Dillon 			--zinfo->avail_count;
517ff10d954SMatthew Dillon 			slabterm(slgd, slab);
518ff10d954SMatthew Dillon 		}
519ff10d954SMatthew Dillon 
520ff10d954SMatthew Dillon 		while ((slab = TAILQ_FIRST(&slgd->full_zones)) != NULL) {
521ff10d954SMatthew Dillon 			TAILQ_REMOVE(&slgd->full_zones, slab, entry);
522ff10d954SMatthew Dillon 			slabterm(slgd, slab);
523ff10d954SMatthew Dillon 		}
524ff10d954SMatthew Dillon 	}
525ff10d954SMatthew Dillon }
526ff10d954SMatthew Dillon 
527ff10d954SMatthew Dillon /*
528ff10d954SMatthew Dillon  * Calculate the zone index for the allocation request size and set the
529ff10d954SMatthew Dillon  * allocation request size to that particular zone's chunk size.
530e2caf0e7SMatthew Dillon  *
531e2caf0e7SMatthew Dillon  * Minimum alignment is 16 bytes for allocations >= 16 bytes to conform
532e2caf0e7SMatthew Dillon  * with malloc requirements for intel/amd.
533ff10d954SMatthew Dillon  */
534ff10d954SMatthew Dillon static __inline int
zoneindex(size_t * bytes,size_t * chunking)535ff10d954SMatthew Dillon zoneindex(size_t *bytes, size_t *chunking)
536ff10d954SMatthew Dillon {
537ff10d954SMatthew Dillon 	size_t n = (size_t)*bytes;
538ff10d954SMatthew Dillon 	size_t x;
539ff10d954SMatthew Dillon 	size_t c;
540ff10d954SMatthew Dillon 	int i;
541ff10d954SMatthew Dillon 
542ff10d954SMatthew Dillon 	if (n < 128) {
543e2caf0e7SMatthew Dillon 		if (n < 16) {
544ff10d954SMatthew Dillon 			*bytes = n = (n + 7) & ~7;
545ff10d954SMatthew Dillon 			*chunking = 8;
546e2caf0e7SMatthew Dillon 			return(n / 8 - 1);	/* 8 byte chunks, 2 zones */
547e2caf0e7SMatthew Dillon 		} else {
548e2caf0e7SMatthew Dillon 			*bytes = n = (n + 15) & ~15;
549e2caf0e7SMatthew Dillon 			*chunking = 16;
550e2caf0e7SMatthew Dillon 			return(n / 16 + 2);	/* 16 byte chunks, 8 zones */
551e2caf0e7SMatthew Dillon 		}
552ff10d954SMatthew Dillon 	}
553ff10d954SMatthew Dillon 	if (n < 4096) {
554ff10d954SMatthew Dillon 		x = 256;
555ff10d954SMatthew Dillon 		c = x / (CHUNKFACTOR * 2);
556ff10d954SMatthew Dillon 		i = 16;
557ff10d954SMatthew Dillon 	} else {
558ff10d954SMatthew Dillon 		x = 8192;
559ff10d954SMatthew Dillon 		c = x / (CHUNKFACTOR * 2);
560ff10d954SMatthew Dillon 		i = 16 + CHUNKFACTOR * 5;  /* 256->512,1024,2048,4096,8192 */
561ff10d954SMatthew Dillon 	}
562ff10d954SMatthew Dillon 	while (n >= x) {
563ff10d954SMatthew Dillon 		x <<= 1;
564ff10d954SMatthew Dillon 		c <<= 1;
565ff10d954SMatthew Dillon 		i += CHUNKFACTOR;
566ff10d954SMatthew Dillon 		if (x == 0)
567ff10d954SMatthew Dillon 			_mpanic("slaballoc: byte value too high");
568ff10d954SMatthew Dillon 	}
569965b839fSSascha Wildner 	*bytes = n = roundup2(n, c);
570ff10d954SMatthew Dillon 	*chunking = c;
571ff10d954SMatthew Dillon 	return (i + n / c - CHUNKFACTOR);
572ff10d954SMatthew Dillon #if 0
573ff10d954SMatthew Dillon 	*bytes = n = (n + c - 1) & ~(c - 1);
574ff10d954SMatthew Dillon 	*chunking = c;
575ff10d954SMatthew Dillon 	return (n / c + i);
576ff10d954SMatthew Dillon 
577ff10d954SMatthew Dillon 	if (n < 256) {
578ff10d954SMatthew Dillon 		*bytes = n = (n + 15) & ~15;
579ff10d954SMatthew Dillon 		*chunking = 16;
580ff10d954SMatthew Dillon 		return(n / (CHUNKINGLO*2) + CHUNKINGLO*1 - 1);
581ff10d954SMatthew Dillon 	}
582ff10d954SMatthew Dillon 	if (n < 8192) {
583ff10d954SMatthew Dillon 		if (n < 512) {
584ff10d954SMatthew Dillon 			*bytes = n = (n + 31) & ~31;
585ff10d954SMatthew Dillon 			*chunking = 32;
586ff10d954SMatthew Dillon 			return(n / (CHUNKINGLO*4) + CHUNKINGLO*2 - 1);
587ff10d954SMatthew Dillon 		}
588ff10d954SMatthew Dillon 		if (n < 1024) {
589ff10d954SMatthew Dillon 			*bytes = n = (n + 63) & ~63;
590ff10d954SMatthew Dillon 			*chunking = 64;
591ff10d954SMatthew Dillon 			return(n / (CHUNKINGLO*8) + CHUNKINGLO*3 - 1);
592ff10d954SMatthew Dillon 		}
593ff10d954SMatthew Dillon 		if (n < 2048) {
594ff10d954SMatthew Dillon 			*bytes = n = (n + 127) & ~127;
595ff10d954SMatthew Dillon 			*chunking = 128;
596ff10d954SMatthew Dillon 			return(n / (CHUNKINGLO*16) + CHUNKINGLO*4 - 1);
597ff10d954SMatthew Dillon 		}
598ff10d954SMatthew Dillon 		if (n < 4096) {
599ff10d954SMatthew Dillon 			*bytes = n = (n + 255) & ~255;
600ff10d954SMatthew Dillon 			*chunking = 256;
601ff10d954SMatthew Dillon 			return(n / (CHUNKINGLO*32) + CHUNKINGLO*5 - 1);
602ff10d954SMatthew Dillon 		}
603ff10d954SMatthew Dillon 		*bytes = n = (n + 511) & ~511;
604ff10d954SMatthew Dillon 		*chunking = 512;
605ff10d954SMatthew Dillon 		return(n / (CHUNKINGLO*64) + CHUNKINGLO*6 - 1);
606ff10d954SMatthew Dillon 	}
607ff10d954SMatthew Dillon 	if (n < 16384) {
608ff10d954SMatthew Dillon 		*bytes = n = (n + 1023) & ~1023;
609ff10d954SMatthew Dillon 		*chunking = 1024;
610ff10d954SMatthew Dillon 		return(n / (CHUNKINGLO*128) + CHUNKINGLO*7 - 1);
611ff10d954SMatthew Dillon 	}
612ff10d954SMatthew Dillon 	if (n < 32768) {				/* 16384-32767 */
613ff10d954SMatthew Dillon 		*bytes = n = (n + 2047) & ~2047;
614ff10d954SMatthew Dillon 		*chunking = 2048;
615ff10d954SMatthew Dillon 		return(n / (CHUNKINGLO*256) + CHUNKINGLO*8 - 1);
616ff10d954SMatthew Dillon 	}
617ff10d954SMatthew Dillon 	if (n < 65536) {
618ff10d954SMatthew Dillon 		*bytes = n = (n + 4095) & ~4095;	/* 32768-65535 */
619ff10d954SMatthew Dillon 		*chunking = 4096;
620ff10d954SMatthew Dillon 		return(n / (CHUNKINGLO*512) + CHUNKINGLO*9 - 1);
621ff10d954SMatthew Dillon 	}
622ff10d954SMatthew Dillon 
623ff10d954SMatthew Dillon 	x = 131072;
624ff10d954SMatthew Dillon 	c = 8192;
625ff10d954SMatthew Dillon 	i = CHUNKINGLO*10 - 1;
626ff10d954SMatthew Dillon 
627ff10d954SMatthew Dillon 	while (n >= x) {
628ff10d954SMatthew Dillon 		x <<= 1;
629ff10d954SMatthew Dillon 		c <<= 1;
630ff10d954SMatthew Dillon 		i += CHUNKINGHI;
631ff10d954SMatthew Dillon 		if (x == 0)
632ff10d954SMatthew Dillon 			_mpanic("slaballoc: byte value too high");
633ff10d954SMatthew Dillon 	}
634ff10d954SMatthew Dillon 	*bytes = n = (n + c - 1) & ~(c - 1);
635ff10d954SMatthew Dillon 	*chunking = c;
636ff10d954SMatthew Dillon 	return (n / c + i);
637ff10d954SMatthew Dillon #endif
638ff10d954SMatthew Dillon }
639ff10d954SMatthew Dillon 
640ff10d954SMatthew Dillon /*
641ff10d954SMatthew Dillon  * malloc() - call internal slab allocator
642ff10d954SMatthew Dillon  */
643ff10d954SMatthew Dillon void *
__malloc(size_t size)644e2caf0e7SMatthew Dillon __malloc(size_t size)
645ff10d954SMatthew Dillon {
646ff10d954SMatthew Dillon 	void *ptr;
647ff10d954SMatthew Dillon 
648ff10d954SMatthew Dillon 	ptr = memalloc(size, 0);
649ff10d954SMatthew Dillon 	if (ptr == NULL)
650ff10d954SMatthew Dillon 		errno = ENOMEM;
651ff10d954SMatthew Dillon 	else
652ff10d954SMatthew Dillon 		UTRACE(0, size, ptr);
653ff10d954SMatthew Dillon 	return(ptr);
654ff10d954SMatthew Dillon }
655ff10d954SMatthew Dillon 
656ff10d954SMatthew Dillon /*
657ff10d954SMatthew Dillon  * calloc() - call internal slab allocator
658ff10d954SMatthew Dillon  */
659ff10d954SMatthew Dillon void *
__calloc(size_t number,size_t size)660e2caf0e7SMatthew Dillon __calloc(size_t number, size_t size)
661ff10d954SMatthew Dillon {
662ff10d954SMatthew Dillon 	void *ptr;
663ff10d954SMatthew Dillon 
664ff10d954SMatthew Dillon 	ptr = memalloc(number * size, SAFLAG_ZERO);
665ff10d954SMatthew Dillon 	if (ptr == NULL)
666ff10d954SMatthew Dillon 		errno = ENOMEM;
667ff10d954SMatthew Dillon 	else
668ff10d954SMatthew Dillon 		UTRACE(0, number * size, ptr);
669ff10d954SMatthew Dillon 	return(ptr);
670ff10d954SMatthew Dillon }
671ff10d954SMatthew Dillon 
672ff10d954SMatthew Dillon /*
673ff10d954SMatthew Dillon  * realloc() (SLAB ALLOCATOR)
674ff10d954SMatthew Dillon  *
675ff10d954SMatthew Dillon  * We do not attempt to optimize this routine beyond reusing the same
676ff10d954SMatthew Dillon  * pointer if the new size fits within the chunking of the old pointer's
677ff10d954SMatthew Dillon  * zone.
678ff10d954SMatthew Dillon  */
679ff10d954SMatthew Dillon void *
__realloc(void * ptr,size_t size)680e2caf0e7SMatthew Dillon __realloc(void *ptr, size_t size)
681ff10d954SMatthew Dillon {
682ff10d954SMatthew Dillon 	void *ret;
683ff10d954SMatthew Dillon 
684ff10d954SMatthew Dillon 	if (ptr == NULL)
685ff10d954SMatthew Dillon 		ret = memalloc(size, 0);
686ff10d954SMatthew Dillon 	else
687ff10d954SMatthew Dillon 		ret = memrealloc(ptr, size);
688ff10d954SMatthew Dillon 	if (ret == NULL)
689ff10d954SMatthew Dillon 		errno = ENOMEM;
690ff10d954SMatthew Dillon 	else
691ff10d954SMatthew Dillon 		UTRACE(ptr, size, ret);
692ff10d954SMatthew Dillon 	return(ret);
693ff10d954SMatthew Dillon }
694ff10d954SMatthew Dillon 
695ff10d954SMatthew Dillon /*
696c9cb4430Szrj  * aligned_alloc()
697c9cb4430Szrj  *
698c9cb4430Szrj  * Allocate (size) bytes with a alignment of (alignment).
699c9cb4430Szrj  */
700c9cb4430Szrj void *
__aligned_alloc(size_t alignment,size_t size)701c9cb4430Szrj __aligned_alloc(size_t alignment, size_t size)
702c9cb4430Szrj {
703c9cb4430Szrj 	void *ptr;
704c9cb4430Szrj 	int rc;
705c9cb4430Szrj 
706c9cb4430Szrj 	ptr = NULL;
707c9cb4430Szrj 	rc = memalign(&ptr, alignment, size);
708c9cb4430Szrj 	if (rc)
709c9cb4430Szrj 		errno = rc;
710c9cb4430Szrj 
711c9cb4430Szrj 	return (ptr);
712c9cb4430Szrj }
713c9cb4430Szrj 
714c9cb4430Szrj /*
715ff10d954SMatthew Dillon  * posix_memalign()
716ff10d954SMatthew Dillon  *
717ff10d954SMatthew Dillon  * Allocate (size) bytes with a alignment of (alignment), where (alignment)
718ff10d954SMatthew Dillon  * is a power of 2 >= sizeof(void *).
719ff10d954SMatthew Dillon  */
720ff10d954SMatthew Dillon int
__posix_memalign(void ** memptr,size_t alignment,size_t size)721e2caf0e7SMatthew Dillon __posix_memalign(void **memptr, size_t alignment, size_t size)
722ff10d954SMatthew Dillon {
723c9cb4430Szrj 	int rc;
724c9cb4430Szrj 
725ff10d954SMatthew Dillon 	/*
726c9cb4430Szrj 	 * OpenGroup spec issue 6 check
727ff10d954SMatthew Dillon 	 */
728c9cb4430Szrj 	if (alignment < sizeof(void *)) {
729ff10d954SMatthew Dillon 		*memptr = NULL;
730ff10d954SMatthew Dillon 		return(EINVAL);
731ff10d954SMatthew Dillon 	}
732c9cb4430Szrj 
733c9cb4430Szrj 	rc = memalign(memptr, alignment, size);
734c9cb4430Szrj 
735c9cb4430Szrj 	return (rc);
736c9cb4430Szrj }
737c9cb4430Szrj 
738c9cb4430Szrj /*
739c9cb4430Szrj  * The slab allocator will allocate on power-of-2 boundaries up to at least
740c9cb4430Szrj  * PAGE_SIZE.  Otherwise we use the zoneindex mechanic to find a zone
741c9cb4430Szrj  * matching the requirements.
742c9cb4430Szrj  */
743c9cb4430Szrj static int
memalign(void ** memptr,size_t alignment,size_t size)744c9cb4430Szrj memalign(void **memptr, size_t alignment, size_t size)
745c9cb4430Szrj {
746c9cb4430Szrj 
747c9cb4430Szrj 	if (alignment < 1) {
748c9cb4430Szrj 		*memptr = NULL;
749c9cb4430Szrj 		return(EINVAL);
750c9cb4430Szrj 	}
751c9cb4430Szrj 
752c9cb4430Szrj 	/*
753c9cb4430Szrj 	 * OpenGroup spec issue 6 check
754c9cb4430Szrj 	 */
755c9cb4430Szrj 	if ((alignment | (alignment - 1)) + 1 != (alignment << 1)) {
756ff10d954SMatthew Dillon 		*memptr = NULL;
757ff10d954SMatthew Dillon 		return(EINVAL);
758ff10d954SMatthew Dillon 	}
759ff10d954SMatthew Dillon 
760ff10d954SMatthew Dillon 	/*
761ff10d954SMatthew Dillon 	 * XXX for now just find the nearest power of 2 >= size and also
762ff10d954SMatthew Dillon 	 * >= alignment and allocate that.
763ff10d954SMatthew Dillon 	 */
764ff10d954SMatthew Dillon 	while (alignment < size) {
765ff10d954SMatthew Dillon 		alignment <<= 1;
766ff10d954SMatthew Dillon 		if (alignment == 0)
767ff10d954SMatthew Dillon 			_mpanic("posix_memalign: byte value too high");
768ff10d954SMatthew Dillon 	}
769ff10d954SMatthew Dillon 	*memptr = memalloc(alignment, 0);
770ff10d954SMatthew Dillon 	return(*memptr ? 0 : ENOMEM);
771ff10d954SMatthew Dillon }
772ff10d954SMatthew Dillon 
773ff10d954SMatthew Dillon /*
774ff10d954SMatthew Dillon  * free() (SLAB ALLOCATOR) - do the obvious
775ff10d954SMatthew Dillon  */
776ff10d954SMatthew Dillon void
__free(void * ptr)777e2caf0e7SMatthew Dillon __free(void *ptr)
778ff10d954SMatthew Dillon {
779ff10d954SMatthew Dillon 	if (ptr) {
780ff10d954SMatthew Dillon 		UTRACE(ptr, 0, 0);
781ff10d954SMatthew Dillon 		memfree(ptr, 0);
782ff10d954SMatthew Dillon 	}
783ff10d954SMatthew Dillon }
784ff10d954SMatthew Dillon 
785ff10d954SMatthew Dillon /*
786ff10d954SMatthew Dillon  * memalloc()	(SLAB ALLOCATOR)
787ff10d954SMatthew Dillon  *
788ff10d954SMatthew Dillon  *	Allocate memory via the slab allocator.
789ff10d954SMatthew Dillon  */
790ff10d954SMatthew Dillon static void *
memalloc(size_t size,int flags)791ff10d954SMatthew Dillon memalloc(size_t size, int flags)
792ff10d954SMatthew Dillon {
793ff10d954SMatthew Dillon 	slglobaldata_t slgd;
794ff10d954SMatthew Dillon 	struct zoneinfo *zinfo;
795ff10d954SMatthew Dillon 	slab_t slab;
796ff10d954SMatthew Dillon 	size_t chunking;
797ff10d954SMatthew Dillon 	int bmi;
798ff10d954SMatthew Dillon 	int bno;
799ff10d954SMatthew Dillon 	u_long *bmp;
800ff10d954SMatthew Dillon 	int zi;
801ff10d954SMatthew Dillon #ifdef INVARIANTS
802ff10d954SMatthew Dillon 	int i;
803ff10d954SMatthew Dillon #endif
804e2caf0e7SMatthew Dillon 	int j;
805ff10d954SMatthew Dillon 	char *obj;
806ff10d954SMatthew Dillon 
807ff10d954SMatthew Dillon 	/*
808ff10d954SMatthew Dillon 	 * If 0 bytes is requested we have to return a unique pointer, allocate
809ff10d954SMatthew Dillon 	 * at least one byte.
810ff10d954SMatthew Dillon 	 */
811ff10d954SMatthew Dillon 	if (size == 0)
812ff10d954SMatthew Dillon 		size = 1;
813ff10d954SMatthew Dillon 
814ff10d954SMatthew Dillon 	/* Capture global flags */
815ff10d954SMatthew Dillon 	flags |= g_malloc_flags;
816ff10d954SMatthew Dillon 
817ff10d954SMatthew Dillon 	/* Compute allocation zone; zoneindex will panic on excessive sizes */
818ff10d954SMatthew Dillon 	zi = zoneindex(&size, &chunking);
819ff10d954SMatthew Dillon 	MASSERT(zi < NZONES);
820ff10d954SMatthew Dillon 	if (size == 0)
821ff10d954SMatthew Dillon 		return(NULL);
822ff10d954SMatthew Dillon 
823ff10d954SMatthew Dillon 	/*
824e2caf0e7SMatthew Dillon 	 * Try magazine shortcut first
825e2caf0e7SMatthew Dillon 	 */
826e2caf0e7SMatthew Dillon 	slgd = &slglobal;
827e2caf0e7SMatthew Dillon 	zinfo = &slgd->zone[zi];
828e2caf0e7SMatthew Dillon 
829e2caf0e7SMatthew Dillon 	if ((j = zinfo->mag_index) != 0) {
830e2caf0e7SMatthew Dillon 		zinfo->mag_index = --j;
831e2caf0e7SMatthew Dillon 		obj = zinfo->mag_shortcut[j];
832e2caf0e7SMatthew Dillon 		zinfo->mag_shortcut[j] = NULL;	/* SAFETY */
833e2caf0e7SMatthew Dillon 		if (flags & SAFLAG_ZERO)
834e2caf0e7SMatthew Dillon 			bzero(obj, size);
835e2caf0e7SMatthew Dillon 		return obj;
836e2caf0e7SMatthew Dillon 	}
837e2caf0e7SMatthew Dillon 
838e2caf0e7SMatthew Dillon 	/*
839ff10d954SMatthew Dillon 	 * Locate a slab with available space.  If no slabs are available
840ff10d954SMatthew Dillon 	 * back-off to the empty list and if we still come up dry allocate
841ff10d954SMatthew Dillon 	 * a new slab (which will try the depot first).
842ff10d954SMatthew Dillon 	 */
843ff10d954SMatthew Dillon retry:
844ff10d954SMatthew Dillon 	if ((slab = zinfo->avail_base) == NULL) {
845ff10d954SMatthew Dillon 		if ((slab = zinfo->empty_base) == NULL) {
846ff10d954SMatthew Dillon 			/*
847ff10d954SMatthew Dillon 			 * Still dry
848ff10d954SMatthew Dillon 			 */
849ff10d954SMatthew Dillon 			slab = slaballoc(zi, chunking, size);
850ff10d954SMatthew Dillon 			if (slab == NULL)
851ff10d954SMatthew Dillon 				return(NULL);
852ff10d954SMatthew Dillon 			slab->next = zinfo->avail_base;
853ff10d954SMatthew Dillon 			zinfo->avail_base = slab;
854e2caf0e7SMatthew Dillon 			++zinfo->avail_count;
855ff10d954SMatthew Dillon 			slab->state = AVAIL;
856ff10d954SMatthew Dillon 			if (slgd->biggest_index < zi)
857ff10d954SMatthew Dillon 				slgd->biggest_index = zi;
858ff10d954SMatthew Dillon 			++slgd->nslabs;
859ff10d954SMatthew Dillon 		} else {
860ff10d954SMatthew Dillon 			/*
861ff10d954SMatthew Dillon 			 * Pulled from empty list
862ff10d954SMatthew Dillon 			 */
863ff10d954SMatthew Dillon 			zinfo->empty_base = slab->next;
864ff10d954SMatthew Dillon 			slab->next = zinfo->avail_base;
865ff10d954SMatthew Dillon 			zinfo->avail_base = slab;
866e2caf0e7SMatthew Dillon 			++zinfo->avail_count;
867ff10d954SMatthew Dillon 			slab->state = AVAIL;
868ff10d954SMatthew Dillon 			--zinfo->empty_count;
869ff10d954SMatthew Dillon 		}
870ff10d954SMatthew Dillon 	}
871ff10d954SMatthew Dillon 
872ff10d954SMatthew Dillon 	/*
873ff10d954SMatthew Dillon 	 * Allocate a chunk out of the slab.  HOT PATH
874ff10d954SMatthew Dillon 	 *
875ff10d954SMatthew Dillon 	 * Only the thread owning the slab can allocate out of it.
876ff10d954SMatthew Dillon 	 *
877ff10d954SMatthew Dillon 	 * NOTE: The last bit in the bitmap is always marked allocated so
878ff10d954SMatthew Dillon 	 *	 we cannot overflow here.
879ff10d954SMatthew Dillon 	 */
880ff10d954SMatthew Dillon 	bno = slab->free_bit;
881ff10d954SMatthew Dillon 	bmi = slab->free_index;
882ff10d954SMatthew Dillon 	bmp = &slab->bitmap[bmi];
883ff10d954SMatthew Dillon 	if (*bmp & (1LU << bno)) {
884ff10d954SMatthew Dillon 		atomic_clear_long(bmp, 1LU << bno);
885ff10d954SMatthew Dillon 		obj = slab->chunks + ((bmi << LONG_BITS_SHIFT) + bno) * size;
886ff10d954SMatthew Dillon 		slab->free_bit = (bno + 1) & (LONG_BITS - 1);
887ff10d954SMatthew Dillon 		atomic_add_int(&slab->navail, -1);
888ff10d954SMatthew Dillon 		if (flags & SAFLAG_ZERO)
889ff10d954SMatthew Dillon 			bzero(obj, size);
890ff10d954SMatthew Dillon 		return (obj);
891ff10d954SMatthew Dillon 	}
892ff10d954SMatthew Dillon 
893ff10d954SMatthew Dillon 	/*
894ff10d954SMatthew Dillon 	 * Allocate a chunk out of a slab.  COLD PATH
895ff10d954SMatthew Dillon 	 */
896ff10d954SMatthew Dillon 	if (slab->navail == 0) {
897ff10d954SMatthew Dillon 		zinfo->avail_base = slab->next;
898e2caf0e7SMatthew Dillon 		--zinfo->avail_count;
899ff10d954SMatthew Dillon 		slab->state = FULL;
900ff10d954SMatthew Dillon 		TAILQ_INSERT_TAIL(&slgd->full_zones, slab, entry);
901ff10d954SMatthew Dillon 		goto retry;
902ff10d954SMatthew Dillon 	}
903ff10d954SMatthew Dillon 
904ff10d954SMatthew Dillon 	while (bmi < LONG_BITS) {
905ff10d954SMatthew Dillon 		bmp = &slab->bitmap[bmi];
906ff10d954SMatthew Dillon 		if (*bmp) {
907ff10d954SMatthew Dillon 			bno = bsflong(*bmp);
908ff10d954SMatthew Dillon 			atomic_clear_long(bmp, 1LU << bno);
909ff10d954SMatthew Dillon 			obj = slab->chunks + ((bmi << LONG_BITS_SHIFT) + bno) *
910ff10d954SMatthew Dillon 					     size;
911ff10d954SMatthew Dillon 			slab->free_index = bmi;
912ff10d954SMatthew Dillon 			slab->free_bit = (bno + 1) & (LONG_BITS - 1);
913ff10d954SMatthew Dillon 			atomic_add_int(&slab->navail, -1);
914ff10d954SMatthew Dillon 			if (flags & SAFLAG_ZERO)
915ff10d954SMatthew Dillon 				bzero(obj, size);
916ff10d954SMatthew Dillon 			return (obj);
917ff10d954SMatthew Dillon 		}
918ff10d954SMatthew Dillon 		++bmi;
919ff10d954SMatthew Dillon 	}
920ff10d954SMatthew Dillon 	bmi = 0;
921ff10d954SMatthew Dillon 	while (bmi < LONG_BITS) {
922ff10d954SMatthew Dillon 		bmp = &slab->bitmap[bmi];
923ff10d954SMatthew Dillon 		if (*bmp) {
924ff10d954SMatthew Dillon 			bno = bsflong(*bmp);
925ff10d954SMatthew Dillon 			atomic_clear_long(bmp, 1LU << bno);
926ff10d954SMatthew Dillon 			obj = slab->chunks + ((bmi << LONG_BITS_SHIFT) + bno) *
927ff10d954SMatthew Dillon 					     size;
928ff10d954SMatthew Dillon 			slab->free_index = bmi;
929ff10d954SMatthew Dillon 			slab->free_bit = (bno + 1) & (LONG_BITS - 1);
930ff10d954SMatthew Dillon 			atomic_add_int(&slab->navail, -1);
931ff10d954SMatthew Dillon 			if (flags & SAFLAG_ZERO)
932ff10d954SMatthew Dillon 				bzero(obj, size);
933ff10d954SMatthew Dillon 			return (obj);
934ff10d954SMatthew Dillon 		}
935ff10d954SMatthew Dillon 		++bmi;
936ff10d954SMatthew Dillon 	}
937ff10d954SMatthew Dillon 	_mpanic("slaballoc: corrupted zone: navail %d", slab->navail);
938ff10d954SMatthew Dillon 	/* not reached */
939ff10d954SMatthew Dillon 	return NULL;
940ff10d954SMatthew Dillon }
941ff10d954SMatthew Dillon 
942ff10d954SMatthew Dillon /*
943ff10d954SMatthew Dillon  * Reallocate memory within the chunk
944ff10d954SMatthew Dillon  */
945ff10d954SMatthew Dillon static void *
memrealloc(void * ptr,size_t nsize)946ff10d954SMatthew Dillon memrealloc(void *ptr, size_t nsize)
947ff10d954SMatthew Dillon {
948ff10d954SMatthew Dillon 	region_t region;
949ff10d954SMatthew Dillon 	slab_t slab;
950ff10d954SMatthew Dillon 	size_t osize;
951ff10d954SMatthew Dillon 	char *obj;
9522d4b0500SImre Vadasz 	int flags = 0;
953ff10d954SMatthew Dillon 
954ff10d954SMatthew Dillon 	/*
955ff10d954SMatthew Dillon 	 * If 0 bytes is requested we have to return a unique pointer, allocate
956ff10d954SMatthew Dillon 	 * at least one byte.
957ff10d954SMatthew Dillon 	 */
958ff10d954SMatthew Dillon 	if (nsize == 0)
959ff10d954SMatthew Dillon 		nsize = 1;
960ff10d954SMatthew Dillon 
961ff10d954SMatthew Dillon 	/* Capture global flags */
962ff10d954SMatthew Dillon 	flags |= g_malloc_flags;
963ff10d954SMatthew Dillon 
964ff10d954SMatthew Dillon 	/*
965ff10d954SMatthew Dillon 	 * Locate the zone by looking up the dynamic slab size mask based
966ff10d954SMatthew Dillon 	 * on the memory region the allocation resides in.
967ff10d954SMatthew Dillon 	 */
968ff10d954SMatthew Dillon 	region = &Regions[((uintptr_t)ptr >> NREGIONS_SHIFT) & NREGIONS_MASK];
969ff10d954SMatthew Dillon 	if ((slab = region->slab) == NULL)
970ff10d954SMatthew Dillon 		slab = (void *)((uintptr_t)ptr & region->mask);
971ff10d954SMatthew Dillon 	MASSERT(slab->magic == ZALLOC_SLAB_MAGIC);
972ff10d954SMatthew Dillon 	osize = slab->chunk_size;
973ff10d954SMatthew Dillon 	if (nsize <= osize) {
974ff10d954SMatthew Dillon 		if (osize < 32 || nsize >= osize / 2) {
975ff10d954SMatthew Dillon 			obj = ptr;
976ff10d954SMatthew Dillon 			if ((flags & SAFLAG_ZERO) && nsize < osize)
977ff10d954SMatthew Dillon 				bzero(obj + nsize, osize - nsize);
978ff10d954SMatthew Dillon 			return(obj);
979ff10d954SMatthew Dillon 		}
980ff10d954SMatthew Dillon 	}
981ff10d954SMatthew Dillon 
982ff10d954SMatthew Dillon 	/*
983ff10d954SMatthew Dillon 	 * Otherwise resize the object
984ff10d954SMatthew Dillon 	 */
985ff10d954SMatthew Dillon 	obj = memalloc(nsize, 0);
986ff10d954SMatthew Dillon 	if (obj) {
987ff10d954SMatthew Dillon 		if (nsize > osize)
988ff10d954SMatthew Dillon 			nsize = osize;
989ff10d954SMatthew Dillon 		bcopy(ptr, obj, nsize);
990ff10d954SMatthew Dillon 		memfree(ptr, 0);
991ff10d954SMatthew Dillon 	}
992ff10d954SMatthew Dillon 	return (obj);
993ff10d954SMatthew Dillon }
994ff10d954SMatthew Dillon 
995ff10d954SMatthew Dillon /*
996ff10d954SMatthew Dillon  * free (SLAB ALLOCATOR)
997ff10d954SMatthew Dillon  *
998ff10d954SMatthew Dillon  * Free a memory block previously allocated by malloc.
999ff10d954SMatthew Dillon  *
1000ff10d954SMatthew Dillon  * MPSAFE
1001ff10d954SMatthew Dillon  */
1002ff10d954SMatthew Dillon static void
memfree(void * ptr,int flags)1003ff10d954SMatthew Dillon memfree(void *ptr, int flags)
1004ff10d954SMatthew Dillon {
1005ff10d954SMatthew Dillon 	region_t region;
1006ff10d954SMatthew Dillon 	slglobaldata_t slgd;
1007ff10d954SMatthew Dillon 	slab_t slab;
1008ff10d954SMatthew Dillon 	slab_t stmp;
1009ff10d954SMatthew Dillon 	slab_t *slabp;
1010ff10d954SMatthew Dillon 	int bmi;
1011ff10d954SMatthew Dillon 	int bno;
1012e2caf0e7SMatthew Dillon 	int j;
1013ff10d954SMatthew Dillon 	u_long *bmp;
1014ff10d954SMatthew Dillon 
1015ff10d954SMatthew Dillon 	/*
1016ff10d954SMatthew Dillon 	 * Locate the zone by looking up the dynamic slab size mask based
1017ff10d954SMatthew Dillon 	 * on the memory region the allocation resides in.
1018ff10d954SMatthew Dillon 	 *
1019ff10d954SMatthew Dillon 	 * WARNING!  The slab may be owned by another thread!
1020ff10d954SMatthew Dillon 	 */
1021ff10d954SMatthew Dillon 	region = &Regions[((uintptr_t)ptr >> NREGIONS_SHIFT) & NREGIONS_MASK];
1022ff10d954SMatthew Dillon 	if ((slab = region->slab) == NULL)
1023ff10d954SMatthew Dillon 		slab = (void *)((uintptr_t)ptr & region->mask);
1024ff10d954SMatthew Dillon 	MASSERT(slab != NULL);
1025ff10d954SMatthew Dillon 	MASSERT(slab->magic == ZALLOC_SLAB_MAGIC);
1026ff10d954SMatthew Dillon 
1027ff10d954SMatthew Dillon #ifdef INVARIANTS
1028ff10d954SMatthew Dillon 	/*
1029ff10d954SMatthew Dillon 	 * Put weird data into the memory to detect modifications after
1030ff10d954SMatthew Dillon 	 * freeing, illegal pointer use after freeing (we should fault on
1031ff10d954SMatthew Dillon 	 * the odd address), and so forth.
1032ff10d954SMatthew Dillon 	 */
1033ff10d954SMatthew Dillon 	if (slab->chunk_size < sizeof(weirdary))
1034ff10d954SMatthew Dillon 		bcopy(weirdary, ptr, slab->chunk_size);
1035ff10d954SMatthew Dillon 	else
1036ff10d954SMatthew Dillon 		bcopy(weirdary, ptr, sizeof(weirdary));
1037ff10d954SMatthew Dillon #endif
1038e2caf0e7SMatthew Dillon 	slgd = &slglobal;
1039ff10d954SMatthew Dillon 
1040e2caf0e7SMatthew Dillon 	/*
1041e2caf0e7SMatthew Dillon 	 * Use mag_shortcut[] when possible
1042e2caf0e7SMatthew Dillon 	 */
1043e2caf0e7SMatthew Dillon 	if (slgd->masked == 0 && slab->chunk_size <= NOMSLABSIZE) {
1044e2caf0e7SMatthew Dillon 		struct zoneinfo *zinfo;
1045e2caf0e7SMatthew Dillon 
1046e2caf0e7SMatthew Dillon 		zinfo = &slgd->zone[slab->zone_index];
1047e2caf0e7SMatthew Dillon 		j = zinfo->mag_index;
1048e2caf0e7SMatthew Dillon 		if (j < NMAGSHORTCUT) {
1049e2caf0e7SMatthew Dillon 			zinfo->mag_shortcut[j] = ptr;
1050e2caf0e7SMatthew Dillon 			zinfo->mag_index = j + 1;
1051e2caf0e7SMatthew Dillon 			return;
1052e2caf0e7SMatthew Dillon 		}
1053e2caf0e7SMatthew Dillon 	}
1054e2caf0e7SMatthew Dillon 
1055e2caf0e7SMatthew Dillon 	/*
1056e2caf0e7SMatthew Dillon 	 * Free to slab and increment navail.  We can delay incrementing
1057e2caf0e7SMatthew Dillon 	 * navail to prevent the slab from being destroyed out from under
1058e2caf0e7SMatthew Dillon 	 * us while we do other optimizations.
1059e2caf0e7SMatthew Dillon 	 */
1060ff10d954SMatthew Dillon 	bno = ((uintptr_t)ptr - (uintptr_t)slab->chunks) / slab->chunk_size;
1061ff10d954SMatthew Dillon 	bmi = bno >> LONG_BITS_SHIFT;
1062ff10d954SMatthew Dillon 	bno &= (LONG_BITS - 1);
1063ff10d954SMatthew Dillon 	bmp = &slab->bitmap[bmi];
1064ff10d954SMatthew Dillon 
1065ff10d954SMatthew Dillon 	MASSERT(bmi >= 0 && bmi < slab->nmax);
1066ff10d954SMatthew Dillon 	MASSERT((*bmp & (1LU << bno)) == 0);
1067ff10d954SMatthew Dillon 	atomic_set_long(bmp, 1LU << bno);
1068ff10d954SMatthew Dillon 
1069ff10d954SMatthew Dillon 	if (slab->slgd == slgd) {
1070e2caf0e7SMatthew Dillon 		/*
1071e2caf0e7SMatthew Dillon 		 * We can only do the following if we own the slab.  Note
1072e2caf0e7SMatthew Dillon 		 * that navail can be incremented by any thread even if
1073e2caf0e7SMatthew Dillon 		 * we own the slab.
1074e2caf0e7SMatthew Dillon 		 */
1075ff10d954SMatthew Dillon 		struct zoneinfo *zinfo;
1076ff10d954SMatthew Dillon 
1077e2caf0e7SMatthew Dillon 		atomic_add_int(&slab->navail, 1);
1078ff10d954SMatthew Dillon 		if (slab->free_index > bmi) {
1079ff10d954SMatthew Dillon 			slab->free_index = bmi;
1080ff10d954SMatthew Dillon 			slab->free_bit = bno;
1081ff10d954SMatthew Dillon 		} else if (slab->free_index == bmi &&
1082ff10d954SMatthew Dillon 			   slab->free_bit > bno) {
1083ff10d954SMatthew Dillon 			slab->free_bit = bno;
1084ff10d954SMatthew Dillon 		}
1085ff10d954SMatthew Dillon 		zinfo = &slgd->zone[slab->zone_index];
1086ff10d954SMatthew Dillon 
1087ff10d954SMatthew Dillon 		/*
1088e2caf0e7SMatthew Dillon 		 * Freeing an object from a full slab makes it less than
1089e2caf0e7SMatthew Dillon 		 * full.  The slab must be moved to the available list.
1090e2caf0e7SMatthew Dillon 		 *
1091e2caf0e7SMatthew Dillon 		 * If the available list has too many slabs, release some
1092e2caf0e7SMatthew Dillon 		 * to the depot.
1093ff10d954SMatthew Dillon 		 */
1094ff10d954SMatthew Dillon 		if (slab->state == FULL) {
1095ff10d954SMatthew Dillon 			TAILQ_REMOVE(&slgd->full_zones, slab, entry);
1096ff10d954SMatthew Dillon 			slab->state = AVAIL;
1097ff10d954SMatthew Dillon 			stmp = zinfo->avail_base;
1098ff10d954SMatthew Dillon 			slab->next = stmp;
1099ff10d954SMatthew Dillon 			zinfo->avail_base = slab;
1100e2caf0e7SMatthew Dillon 			++zinfo->avail_count;
1101e2caf0e7SMatthew Dillon 			while (zinfo->avail_count > opt_cache) {
1102e2caf0e7SMatthew Dillon 				slab = zinfo->avail_base;
1103e2caf0e7SMatthew Dillon 				zinfo->avail_base = slab->next;
1104e2caf0e7SMatthew Dillon 				--zinfo->avail_count;
1105ff10d954SMatthew Dillon 				slabterm(slgd, slab);
1106ff10d954SMatthew Dillon 			}
1107e2caf0e7SMatthew Dillon 			goto done;
1108ff10d954SMatthew Dillon 		}
1109ff10d954SMatthew Dillon 
1110ff10d954SMatthew Dillon 		/*
1111ff10d954SMatthew Dillon 		 * If the slab becomes completely empty dispose of it in
1112ff10d954SMatthew Dillon 		 * some manner.  By default each thread caches up to 4
1113ff10d954SMatthew Dillon 		 * empty slabs.  Only small slabs are cached.
1114ff10d954SMatthew Dillon 		 */
1115ff10d954SMatthew Dillon 		if (slab->navail == slab->nmax && slab->state == AVAIL) {
1116ff10d954SMatthew Dillon 			/*
1117ff10d954SMatthew Dillon 			 * Remove slab from available queue
1118ff10d954SMatthew Dillon 			 */
1119ff10d954SMatthew Dillon 			slabp = &zinfo->avail_base;
1120ff10d954SMatthew Dillon 			while ((stmp = *slabp) != slab)
1121ff10d954SMatthew Dillon 				slabp = &stmp->next;
1122ff10d954SMatthew Dillon 			*slabp = slab->next;
1123e2caf0e7SMatthew Dillon 			--zinfo->avail_count;
1124ff10d954SMatthew Dillon 
1125ff10d954SMatthew Dillon 			if (opt_free || opt_cache == 0) {
1126ff10d954SMatthew Dillon 				/*
1127ff10d954SMatthew Dillon 				 * If local caching is disabled cache the
1128ff10d954SMatthew Dillon 				 * slab in the depot (or free it).
1129ff10d954SMatthew Dillon 				 */
1130ff10d954SMatthew Dillon 				slabterm(slgd, slab);
1131ff10d954SMatthew Dillon 			} else if (slab->slab_size > BIGSLABSIZE) {
1132ff10d954SMatthew Dillon 				/*
1133ff10d954SMatthew Dillon 				 * We do not try to retain large slabs
1134ff10d954SMatthew Dillon 				 * in per-thread caches.
1135ff10d954SMatthew Dillon 				 */
1136ff10d954SMatthew Dillon 				slabterm(slgd, slab);
1137ff10d954SMatthew Dillon 			} else if (zinfo->empty_count > opt_cache) {
1138ff10d954SMatthew Dillon 				/*
1139ff10d954SMatthew Dillon 				 * We have too many slabs cached, but
1140ff10d954SMatthew Dillon 				 * instead of freeing this one free
1141ff10d954SMatthew Dillon 				 * an empty slab that's been idle longer.
1142ff10d954SMatthew Dillon 				 *
1143ff10d954SMatthew Dillon 				 * (empty_count does not change)
1144ff10d954SMatthew Dillon 				 */
1145ff10d954SMatthew Dillon 				stmp = zinfo->empty_base;
1146ff10d954SMatthew Dillon 				slab->state = EMPTY;
1147ff10d954SMatthew Dillon 				slab->next = stmp->next;
1148ff10d954SMatthew Dillon 				zinfo->empty_base = slab;
1149ff10d954SMatthew Dillon 				slabterm(slgd, stmp);
1150ff10d954SMatthew Dillon 			} else {
1151ff10d954SMatthew Dillon 				/*
1152ff10d954SMatthew Dillon 				 * Cache the empty slab in our thread local
1153ff10d954SMatthew Dillon 				 * empty list.
1154ff10d954SMatthew Dillon 				 */
1155ff10d954SMatthew Dillon 				++zinfo->empty_count;
1156ff10d954SMatthew Dillon 				slab->state = EMPTY;
1157ff10d954SMatthew Dillon 				slab->next = zinfo->empty_base;
1158ff10d954SMatthew Dillon 				zinfo->empty_base = slab;
1159ff10d954SMatthew Dillon 			}
1160ff10d954SMatthew Dillon 		}
1161e2caf0e7SMatthew Dillon 	} else if (slab->slgd == NULL && slab->navail + 1 == slab->nmax) {
1162e2caf0e7SMatthew Dillon 		slglobaldata_t sldepot;
1163e2caf0e7SMatthew Dillon 
1164e2caf0e7SMatthew Dillon 		/*
1165e2caf0e7SMatthew Dillon 		 * If freeing to a slab owned by the global depot, and
1166e2caf0e7SMatthew Dillon 		 * the slab becomes completely EMPTY, try to move it to
1167e2caf0e7SMatthew Dillon 		 * the correct list.
1168e2caf0e7SMatthew Dillon 		 */
1169e2caf0e7SMatthew Dillon 		sldepot = &slglobaldepot;
1170e2caf0e7SMatthew Dillon 		if (__isthreaded)
1171e2caf0e7SMatthew Dillon 			_SPINLOCK(&sldepot->lock);
1172e2caf0e7SMatthew Dillon 		if (slab->slgd == NULL && slab->navail + 1 == slab->nmax) {
1173e2caf0e7SMatthew Dillon 			struct zoneinfo *zinfo;
1174e2caf0e7SMatthew Dillon 
1175e2caf0e7SMatthew Dillon 			/*
1176e2caf0e7SMatthew Dillon 			 * Move the slab to the empty list
1177e2caf0e7SMatthew Dillon 			 */
1178e2caf0e7SMatthew Dillon 			MASSERT(slab->state == AVAIL);
1179e2caf0e7SMatthew Dillon 			atomic_add_int(&slab->navail, 1);
1180e2caf0e7SMatthew Dillon 			zinfo = &sldepot->zone[slab->zone_index];
1181e2caf0e7SMatthew Dillon 			slabp = &zinfo->avail_base;
1182e2caf0e7SMatthew Dillon 			while (slab != *slabp)
1183e2caf0e7SMatthew Dillon 				slabp = &(*slabp)->next;
1184e2caf0e7SMatthew Dillon 			*slabp = slab->next;
1185e2caf0e7SMatthew Dillon 			--zinfo->avail_count;
1186e2caf0e7SMatthew Dillon 
1187e2caf0e7SMatthew Dillon 			/*
1188e2caf0e7SMatthew Dillon 			 * Clean out excessive empty entries from the
1189e2caf0e7SMatthew Dillon 			 * depot.
1190e2caf0e7SMatthew Dillon 			 */
1191e2caf0e7SMatthew Dillon 			slab->state = EMPTY;
1192e2caf0e7SMatthew Dillon 			slab->next = zinfo->empty_base;
1193e2caf0e7SMatthew Dillon 			zinfo->empty_base = slab;
1194e2caf0e7SMatthew Dillon 			++zinfo->empty_count;
1195e2caf0e7SMatthew Dillon 			while (zinfo->empty_count > opt_cache) {
1196e2caf0e7SMatthew Dillon 				slab = zinfo->empty_base;
1197e2caf0e7SMatthew Dillon 				zinfo->empty_base = slab->next;
1198e2caf0e7SMatthew Dillon 				--zinfo->empty_count;
1199e2caf0e7SMatthew Dillon 				slab->state = UNKNOWN;
1200e2caf0e7SMatthew Dillon 				if (__isthreaded)
1201e2caf0e7SMatthew Dillon 					_SPINUNLOCK(&sldepot->lock);
1202e2caf0e7SMatthew Dillon 				slabfree(slab);
1203e2caf0e7SMatthew Dillon 				if (__isthreaded)
1204e2caf0e7SMatthew Dillon 					_SPINLOCK(&sldepot->lock);
1205e2caf0e7SMatthew Dillon 			}
1206e2caf0e7SMatthew Dillon 		} else {
1207e2caf0e7SMatthew Dillon 			atomic_add_int(&slab->navail, 1);
1208e2caf0e7SMatthew Dillon 		}
1209e2caf0e7SMatthew Dillon 		if (__isthreaded)
1210e2caf0e7SMatthew Dillon 			_SPINUNLOCK(&sldepot->lock);
1211e2caf0e7SMatthew Dillon 	} else {
1212e2caf0e7SMatthew Dillon 		/*
1213e2caf0e7SMatthew Dillon 		 * We can't act on the slab other than by adjusting navail
1214e2caf0e7SMatthew Dillon 		 * (and the bitmap which we did in the common code at the
1215e2caf0e7SMatthew Dillon 		 * top).
1216e2caf0e7SMatthew Dillon 		 */
1217e2caf0e7SMatthew Dillon 		atomic_add_int(&slab->navail, 1);
1218ff10d954SMatthew Dillon 	}
1219ff10d954SMatthew Dillon done:
1220ff10d954SMatthew Dillon 	;
1221ff10d954SMatthew Dillon }
1222ff10d954SMatthew Dillon 
1223ff10d954SMatthew Dillon /*
1224ff10d954SMatthew Dillon  * Allocate a new slab holding objects of size chunk_size.
1225ff10d954SMatthew Dillon  */
1226ff10d954SMatthew Dillon static slab_t
slaballoc(int zi,size_t chunking,size_t chunk_size)1227ff10d954SMatthew Dillon slaballoc(int zi, size_t chunking, size_t chunk_size)
1228ff10d954SMatthew Dillon {
1229ff10d954SMatthew Dillon 	slglobaldata_t slgd;
1230ff10d954SMatthew Dillon 	slglobaldata_t sldepot;
1231ff10d954SMatthew Dillon 	struct zoneinfo *zinfo;
1232ff10d954SMatthew Dillon 	region_t region;
1233ff10d954SMatthew Dillon 	void *save;
1234ff10d954SMatthew Dillon 	slab_t slab;
1235ff10d954SMatthew Dillon 	size_t slab_desire;
1236ff10d954SMatthew Dillon 	size_t slab_size;
1237ff10d954SMatthew Dillon 	size_t region_mask;
1238ff10d954SMatthew Dillon 	uintptr_t chunk_offset;
1239ff10d954SMatthew Dillon 	ssize_t maxchunks;
1240ff10d954SMatthew Dillon 	ssize_t tmpchunks;
1241ff10d954SMatthew Dillon 	int ispower2;
1242ff10d954SMatthew Dillon 	int power;
1243ff10d954SMatthew Dillon 	int ri;
1244ff10d954SMatthew Dillon 	int rx;
1245ff10d954SMatthew Dillon 	int nswath;
1246ff10d954SMatthew Dillon 	int j;
1247ff10d954SMatthew Dillon 
1248ff10d954SMatthew Dillon 	/*
1249ff10d954SMatthew Dillon 	 * First look in the depot.  Any given zone in the depot may be
1250ff10d954SMatthew Dillon 	 * locked by being set to -1.  We have to do this instead of simply
1251ff10d954SMatthew Dillon 	 * removing the entire chain because removing the entire chain can
1252ff10d954SMatthew Dillon 	 * cause racing threads to allocate local slabs for large objects,
1253ff10d954SMatthew Dillon 	 * resulting in a large VSZ.
1254ff10d954SMatthew Dillon 	 */
1255ff10d954SMatthew Dillon 	slgd = &slglobal;
1256e2caf0e7SMatthew Dillon 	sldepot = &slglobaldepot;
1257ff10d954SMatthew Dillon 	zinfo = &sldepot->zone[zi];
1258ff10d954SMatthew Dillon 
1259e2caf0e7SMatthew Dillon 	if (zinfo->avail_base) {
1260e2caf0e7SMatthew Dillon 		if (__isthreaded)
1261e2caf0e7SMatthew Dillon 			_SPINLOCK(&sldepot->lock);
1262e2caf0e7SMatthew Dillon 		slab = zinfo->avail_base;
1263e2caf0e7SMatthew Dillon 		if (slab) {
1264ff10d954SMatthew Dillon 			MASSERT(slab->slgd == NULL);
1265ff10d954SMatthew Dillon 			slab->slgd = slgd;
1266ff10d954SMatthew Dillon 			zinfo->avail_base = slab->next;
1267e2caf0e7SMatthew Dillon 			--zinfo->avail_count;
1268e2caf0e7SMatthew Dillon 			if (__isthreaded)
1269e2caf0e7SMatthew Dillon 				_SPINUNLOCK(&sldepot->lock);
1270e2caf0e7SMatthew Dillon 			return slab;
1271ff10d954SMatthew Dillon 		}
1272e2caf0e7SMatthew Dillon 		if (__isthreaded)
1273e2caf0e7SMatthew Dillon 			_SPINUNLOCK(&sldepot->lock);
1274ff10d954SMatthew Dillon 	}
1275ff10d954SMatthew Dillon 
1276ff10d954SMatthew Dillon 	/*
1277ff10d954SMatthew Dillon 	 * Nothing in the depot, allocate a new slab by locating or assigning
1278ff10d954SMatthew Dillon 	 * a region and then using the system virtual memory allocator.
1279ff10d954SMatthew Dillon 	 */
1280ff10d954SMatthew Dillon 	slab = NULL;
1281ff10d954SMatthew Dillon 
1282ff10d954SMatthew Dillon 	/*
1283ff10d954SMatthew Dillon 	 * Calculate the start of the data chunks relative to the start
1284e2caf0e7SMatthew Dillon 	 * of the slab.  If chunk_size is a power of 2 we guarantee
1285e2caf0e7SMatthew Dillon 	 * power of 2 alignment.  If it is not we guarantee alignment
1286e2caf0e7SMatthew Dillon 	 * to the chunk size.
1287ff10d954SMatthew Dillon 	 */
1288ff10d954SMatthew Dillon 	if ((chunk_size ^ (chunk_size - 1)) == (chunk_size << 1) - 1) {
1289ff10d954SMatthew Dillon 		ispower2 = 1;
1290965b839fSSascha Wildner 		chunk_offset = roundup2(sizeof(*slab), chunk_size);
1291ff10d954SMatthew Dillon 	} else {
1292ff10d954SMatthew Dillon 		ispower2 = 0;
1293ff10d954SMatthew Dillon 		chunk_offset = sizeof(*slab) + chunking - 1;
1294ff10d954SMatthew Dillon 		chunk_offset -= chunk_offset % chunking;
1295ff10d954SMatthew Dillon 	}
1296ff10d954SMatthew Dillon 
1297ff10d954SMatthew Dillon 	/*
1298ff10d954SMatthew Dillon 	 * Calculate a reasonable number of chunks for the slab.
1299ff10d954SMatthew Dillon 	 *
1300ff10d954SMatthew Dillon 	 * Once initialized the MaxChunks[] array can only ever be
1301ff10d954SMatthew Dillon 	 * reinitialized to the same value.
1302ff10d954SMatthew Dillon 	 */
1303ff10d954SMatthew Dillon 	maxchunks = MaxChunks[zi];
1304ff10d954SMatthew Dillon 	if (maxchunks == 0) {
1305ff10d954SMatthew Dillon 		/*
1306ff10d954SMatthew Dillon 		 * First calculate how many chunks would fit in 1/1024
1307ff10d954SMatthew Dillon 		 * available memory.  This is around 2MB on a 32 bit
1308ff10d954SMatthew Dillon 		 * system and 128G on a 64-bit (48-bits available) system.
1309ff10d954SMatthew Dillon 		 */
1310ff10d954SMatthew Dillon 		maxchunks = (ssize_t)(NREGIONS_SIZE - chunk_offset) /
1311ff10d954SMatthew Dillon 			    (ssize_t)chunk_size;
1312ff10d954SMatthew Dillon 		if (maxchunks <= 0)
1313ff10d954SMatthew Dillon 			maxchunks = 1;
1314ff10d954SMatthew Dillon 
1315ff10d954SMatthew Dillon 		/*
1316ff10d954SMatthew Dillon 		 * A slab cannot handle more than MAXCHUNKS chunks, but
1317ff10d954SMatthew Dillon 		 * limit us to approximately MAXCHUNKS / 2 here because
1318ff10d954SMatthew Dillon 		 * we may have to expand maxchunks when we calculate the
1319ff10d954SMatthew Dillon 		 * actual power-of-2 slab.
1320ff10d954SMatthew Dillon 		 */
1321ff10d954SMatthew Dillon 		if (maxchunks > MAXCHUNKS / 2)
1322ff10d954SMatthew Dillon 			maxchunks = MAXCHUNKS / 2;
1323ff10d954SMatthew Dillon 
1324ff10d954SMatthew Dillon 		/*
1325ff10d954SMatthew Dillon 		 * Try to limit the slabs to BIGSLABSIZE (~128MB).  Larger
1326ff10d954SMatthew Dillon 		 * slabs will be created if the allocation does not fit.
1327ff10d954SMatthew Dillon 		 */
1328ff10d954SMatthew Dillon 		if (chunk_offset + chunk_size * maxchunks > BIGSLABSIZE) {
1329ff10d954SMatthew Dillon 			tmpchunks = (ssize_t)(BIGSLABSIZE - chunk_offset) /
1330ff10d954SMatthew Dillon 				    (ssize_t)chunk_size;
1331ff10d954SMatthew Dillon 			if (tmpchunks <= 0)
1332ff10d954SMatthew Dillon 				tmpchunks = 1;
1333ff10d954SMatthew Dillon 			if (maxchunks > tmpchunks)
1334ff10d954SMatthew Dillon 				maxchunks = tmpchunks;
1335ff10d954SMatthew Dillon 		}
1336ff10d954SMatthew Dillon 
1337ff10d954SMatthew Dillon 		/*
1338ff10d954SMatthew Dillon 		 * If the slab calculates to greater than 2MB see if we
1339ff10d954SMatthew Dillon 		 * can cut it down to ~2MB.  This controls VSZ but has
1340ff10d954SMatthew Dillon 		 * no effect on run-time size or performance.
1341ff10d954SMatthew Dillon 		 *
1342ff10d954SMatthew Dillon 		 * This is very important in case you core dump and also
1343ff10d954SMatthew Dillon 		 * important to reduce unnecessary region allocations.
1344ff10d954SMatthew Dillon 		 */
1345ff10d954SMatthew Dillon 		if (chunk_offset + chunk_size * maxchunks > NOMSLABSIZE) {
1346ff10d954SMatthew Dillon 			tmpchunks = (ssize_t)(NOMSLABSIZE - chunk_offset) /
1347ff10d954SMatthew Dillon 				    (ssize_t)chunk_size;
1348ff10d954SMatthew Dillon 			if (tmpchunks < 1)
1349ff10d954SMatthew Dillon 				tmpchunks = 1;
1350ff10d954SMatthew Dillon 			if (maxchunks > tmpchunks)
1351ff10d954SMatthew Dillon 				maxchunks = tmpchunks;
1352ff10d954SMatthew Dillon 		}
1353ff10d954SMatthew Dillon 
1354ff10d954SMatthew Dillon 		/*
1355ff10d954SMatthew Dillon 		 * If the slab calculates to greater than 128K see if we
1356ff10d954SMatthew Dillon 		 * can cut it down to ~128K while still maintaining a
1357ff10d954SMatthew Dillon 		 * reasonably large number of chunks in each slab.  This
1358ff10d954SMatthew Dillon 		 * controls VSZ but has no effect on run-time size or
1359ff10d954SMatthew Dillon 		 * performance.
1360ff10d954SMatthew Dillon 		 *
1361ff10d954SMatthew Dillon 		 * This is very important in case you core dump and also
1362ff10d954SMatthew Dillon 		 * important to reduce unnecessary region allocations.
1363ff10d954SMatthew Dillon 		 */
1364ff10d954SMatthew Dillon 		if (chunk_offset + chunk_size * maxchunks > LITSLABSIZE) {
1365ff10d954SMatthew Dillon 			tmpchunks = (ssize_t)(LITSLABSIZE - chunk_offset) /
1366ff10d954SMatthew Dillon 				    (ssize_t)chunk_size;
1367ff10d954SMatthew Dillon 			if (tmpchunks < 32)
1368ff10d954SMatthew Dillon 				tmpchunks = 32;
1369ff10d954SMatthew Dillon 			if (maxchunks > tmpchunks)
1370ff10d954SMatthew Dillon 				maxchunks = tmpchunks;
1371ff10d954SMatthew Dillon 		}
1372ff10d954SMatthew Dillon 
1373ff10d954SMatthew Dillon 		MaxChunks[zi] = maxchunks;
1374ff10d954SMatthew Dillon 	}
1375ff10d954SMatthew Dillon 	MASSERT(maxchunks > 0 && maxchunks <= MAXCHUNKS);
1376ff10d954SMatthew Dillon 
1377ff10d954SMatthew Dillon 	/*
1378ff10d954SMatthew Dillon 	 * Calculate the actual slab size.  maxchunks will be recalculated
1379ff10d954SMatthew Dillon 	 * a little later.
1380ff10d954SMatthew Dillon 	 */
1381ff10d954SMatthew Dillon 	slab_desire = chunk_offset + chunk_size * maxchunks;
1382ff10d954SMatthew Dillon 	slab_size = 8 * MAXCHUNKS;
1383ff10d954SMatthew Dillon 	power = 3 + MAXCHUNKS_BITS;
1384ff10d954SMatthew Dillon 	while (slab_size < slab_desire) {
1385ff10d954SMatthew Dillon 		slab_size <<= 1;
1386ff10d954SMatthew Dillon 		++power;
1387ff10d954SMatthew Dillon 	}
1388ff10d954SMatthew Dillon 
1389ff10d954SMatthew Dillon 	/*
1390ff10d954SMatthew Dillon 	 * Do a quick recalculation based on the actual slab size but not
1391ff10d954SMatthew Dillon 	 * yet dealing with whether the slab header is in-band or out-of-band.
1392ff10d954SMatthew Dillon 	 * The purpose here is to see if we can reasonably reduce slab_size
1393ff10d954SMatthew Dillon 	 * to a power of 4 to allow more chunk sizes to use the same slab
1394ff10d954SMatthew Dillon 	 * size.
1395ff10d954SMatthew Dillon 	 */
1396ff10d954SMatthew Dillon 	if ((power & 1) && slab_size > 32768) {
1397ff10d954SMatthew Dillon 		maxchunks = (slab_size - chunk_offset) / chunk_size;
1398ff10d954SMatthew Dillon 		if (maxchunks >= MAXCHUNKS / 8) {
1399ff10d954SMatthew Dillon 			slab_size >>= 1;
1400ff10d954SMatthew Dillon 			--power;
1401ff10d954SMatthew Dillon 		}
1402ff10d954SMatthew Dillon 	}
1403ff10d954SMatthew Dillon 	if ((power & 2) && slab_size > 32768 * 4) {
1404ff10d954SMatthew Dillon 		maxchunks = (slab_size - chunk_offset) / chunk_size;
1405ff10d954SMatthew Dillon 		if (maxchunks >= MAXCHUNKS / 4) {
1406ff10d954SMatthew Dillon 			slab_size >>= 2;
1407ff10d954SMatthew Dillon 			power -= 2;
1408ff10d954SMatthew Dillon 		}
1409ff10d954SMatthew Dillon 	}
1410ff10d954SMatthew Dillon 	/*
1411ff10d954SMatthew Dillon 	 * This case occurs when the slab_size is larger than 1/1024 available
1412ff10d954SMatthew Dillon 	 * UVM.
1413ff10d954SMatthew Dillon 	 */
1414ff10d954SMatthew Dillon 	nswath = slab_size / NREGIONS_SIZE;
1415ff10d954SMatthew Dillon 	if (nswath > NREGIONS)
1416ff10d954SMatthew Dillon 		return (NULL);
1417ff10d954SMatthew Dillon 
1418ff10d954SMatthew Dillon 
1419ff10d954SMatthew Dillon 	/*
1420ff10d954SMatthew Dillon 	 * Try to allocate from our current best region for this zi
1421ff10d954SMatthew Dillon 	 */
1422ff10d954SMatthew Dillon 	region_mask = ~(slab_size - 1);
1423ff10d954SMatthew Dillon 	ri = slgd->zone[zi].best_region;
1424ff10d954SMatthew Dillon 	if (Regions[ri].mask == region_mask) {
1425ff10d954SMatthew Dillon 		if ((slab = _vmem_alloc(ri, slab_size)) != NULL)
1426ff10d954SMatthew Dillon 			goto found;
1427ff10d954SMatthew Dillon 	}
1428ff10d954SMatthew Dillon 
1429ff10d954SMatthew Dillon 	/*
1430ff10d954SMatthew Dillon 	 * Try to find an existing region to allocate from.  The normal
1431ff10d954SMatthew Dillon 	 * case will be for allocations that are less than 1/1024 available
1432ff10d954SMatthew Dillon 	 * UVM, which fit into a single Regions[] entry.
1433ff10d954SMatthew Dillon 	 */
1434ff10d954SMatthew Dillon 	while (slab_size <= NREGIONS_SIZE) {
1435ff10d954SMatthew Dillon 		rx = -1;
1436ff10d954SMatthew Dillon 		for (ri = 0; ri < NREGIONS; ++ri) {
1437ff10d954SMatthew Dillon 			if (rx < 0 && Regions[ri].mask == 0)
1438ff10d954SMatthew Dillon 				rx = ri;
1439ff10d954SMatthew Dillon 			if (Regions[ri].mask == region_mask) {
1440ff10d954SMatthew Dillon 				slab = _vmem_alloc(ri, slab_size);
1441ff10d954SMatthew Dillon 				if (slab) {
1442ff10d954SMatthew Dillon 					slgd->zone[zi].best_region = ri;
1443ff10d954SMatthew Dillon 					goto found;
1444ff10d954SMatthew Dillon 				}
1445ff10d954SMatthew Dillon 			}
1446ff10d954SMatthew Dillon 		}
1447ff10d954SMatthew Dillon 
1448ff10d954SMatthew Dillon 		if (rx < 0)
1449ff10d954SMatthew Dillon 			return(NULL);
1450ff10d954SMatthew Dillon 
1451ff10d954SMatthew Dillon 		/*
1452ff10d954SMatthew Dillon 		 * This can fail, retry either way
1453ff10d954SMatthew Dillon 		 */
1454ff10d954SMatthew Dillon 		atomic_cmpset_ptr((void **)&Regions[rx].mask,
1455ff10d954SMatthew Dillon 				  NULL,
1456ff10d954SMatthew Dillon 				  (void *)region_mask);
1457ff10d954SMatthew Dillon 	}
1458ff10d954SMatthew Dillon 
1459ff10d954SMatthew Dillon 	for (;;) {
1460ff10d954SMatthew Dillon 		rx = -1;
1461ff10d954SMatthew Dillon 		for (ri = 0; ri < NREGIONS; ri += nswath) {
1462ff10d954SMatthew Dillon 			if (Regions[ri].mask == region_mask) {
1463ff10d954SMatthew Dillon 				slab = _vmem_alloc(ri, slab_size);
1464ff10d954SMatthew Dillon 				if (slab) {
1465ff10d954SMatthew Dillon 					slgd->zone[zi].best_region = ri;
1466ff10d954SMatthew Dillon 					goto found;
1467ff10d954SMatthew Dillon 				}
1468ff10d954SMatthew Dillon 			}
1469ff10d954SMatthew Dillon 			if (rx < 0) {
1470ff10d954SMatthew Dillon 				for (j = nswath - 1; j >= 0; --j) {
1471ff10d954SMatthew Dillon 					if (Regions[ri+j].mask != 0)
1472ff10d954SMatthew Dillon 						break;
1473ff10d954SMatthew Dillon 				}
1474ff10d954SMatthew Dillon 				if (j < 0)
1475ff10d954SMatthew Dillon 					rx = ri;
1476ff10d954SMatthew Dillon 			}
1477ff10d954SMatthew Dillon 		}
1478ff10d954SMatthew Dillon 
1479ff10d954SMatthew Dillon 		/*
1480ff10d954SMatthew Dillon 		 * We found a candidate, try to allocate it backwards so
1481ff10d954SMatthew Dillon 		 * another thread racing a slaballoc() does not see the
1482ff10d954SMatthew Dillon 		 * mask in the base index position until we are done.
1483ff10d954SMatthew Dillon 		 *
1484ff10d954SMatthew Dillon 		 * We can safely zero-out any partial allocations because
1485ff10d954SMatthew Dillon 		 * the mask is only accessed from the base index.  Any other
1486ff10d954SMatthew Dillon 		 * threads racing us will fail prior to us clearing the mask.
1487ff10d954SMatthew Dillon 		 */
1488ff10d954SMatthew Dillon 		if (rx < 0)
1489ff10d954SMatthew Dillon 			return(NULL);
1490ff10d954SMatthew Dillon 		for (j = nswath - 1; j >= 0; --j) {
1491ff10d954SMatthew Dillon 			if (!atomic_cmpset_ptr((void **)&Regions[rx+j].mask,
1492ff10d954SMatthew Dillon 					       NULL, (void *)region_mask)) {
1493ff10d954SMatthew Dillon 				while (++j < nswath)
1494ff10d954SMatthew Dillon 					Regions[rx+j].mask = 0;
1495ff10d954SMatthew Dillon 				break;
1496ff10d954SMatthew Dillon 			}
1497ff10d954SMatthew Dillon 		}
1498ff10d954SMatthew Dillon 		/* retry */
1499ff10d954SMatthew Dillon 	}
1500ff10d954SMatthew Dillon 
1501ff10d954SMatthew Dillon 	/*
1502ff10d954SMatthew Dillon 	 * Fill in the new slab in region ri.  If the slab_size completely
1503ff10d954SMatthew Dillon 	 * fills one or more region slots we move the slab structure out of
1504ff10d954SMatthew Dillon 	 * band which should optimize the chunking (particularly for a power
1505ff10d954SMatthew Dillon 	 * of 2).
1506ff10d954SMatthew Dillon 	 */
1507ff10d954SMatthew Dillon found:
1508ff10d954SMatthew Dillon 	region = &Regions[ri];
1509ff10d954SMatthew Dillon 	MASSERT(region->slab == NULL);
1510ff10d954SMatthew Dillon 	if (slab_size >= NREGIONS_SIZE) {
1511ff10d954SMatthew Dillon 		save = slab;
1512ff10d954SMatthew Dillon 		slab = memalloc(sizeof(*slab), 0);
1513ff10d954SMatthew Dillon 		bzero(slab, sizeof(*slab));
1514ff10d954SMatthew Dillon 		slab->chunks = save;
1515ff10d954SMatthew Dillon 		for (j = 0; j < nswath; ++j)
1516ff10d954SMatthew Dillon 			region[j].slab = slab;
1517ff10d954SMatthew Dillon 		chunk_offset = 0;
1518ff10d954SMatthew Dillon 	} else {
1519ff10d954SMatthew Dillon 		bzero(slab, sizeof(*slab));
1520ff10d954SMatthew Dillon 		slab->chunks = (char *)slab + chunk_offset;
1521ff10d954SMatthew Dillon 	}
1522ff10d954SMatthew Dillon 
1523ff10d954SMatthew Dillon 	/*
1524ff10d954SMatthew Dillon 	 * Calculate the start of the chunks memory and recalculate the
1525ff10d954SMatthew Dillon 	 * actual number of chunks the slab can hold.
1526ff10d954SMatthew Dillon 	 */
1527ff10d954SMatthew Dillon 	maxchunks = (slab_size - chunk_offset) / chunk_size;
1528ff10d954SMatthew Dillon 	if (maxchunks > MAXCHUNKS)
1529ff10d954SMatthew Dillon 		maxchunks = MAXCHUNKS;
1530ff10d954SMatthew Dillon 
1531ff10d954SMatthew Dillon 	/*
1532ff10d954SMatthew Dillon 	 * And fill in the rest
1533ff10d954SMatthew Dillon 	 */
1534ff10d954SMatthew Dillon 	slab->magic = ZALLOC_SLAB_MAGIC;
1535ff10d954SMatthew Dillon 	slab->navail = maxchunks;
1536ff10d954SMatthew Dillon 	slab->nmax = maxchunks;
1537ff10d954SMatthew Dillon 	slab->slab_size = slab_size;
1538ff10d954SMatthew Dillon 	slab->chunk_size = chunk_size;
1539ff10d954SMatthew Dillon 	slab->zone_index = zi;
1540ff10d954SMatthew Dillon 	slab->slgd = slgd;
1541ff10d954SMatthew Dillon 	slab->state = UNKNOWN;
1542ff10d954SMatthew Dillon 	slab->region = region;
1543ff10d954SMatthew Dillon 
1544ff10d954SMatthew Dillon 	for (ri = 0; ri < maxchunks; ri += LONG_BITS) {
1545ff10d954SMatthew Dillon 		if (ri + LONG_BITS <= maxchunks)
1546ff10d954SMatthew Dillon 			slab->bitmap[ri >> LONG_BITS_SHIFT] = ULONG_MAX;
1547ff10d954SMatthew Dillon 		else
1548ff10d954SMatthew Dillon 			slab->bitmap[ri >> LONG_BITS_SHIFT] =
1549ff10d954SMatthew Dillon 						(1LU << (maxchunks - ri)) - 1;
1550ff10d954SMatthew Dillon 	}
1551ff10d954SMatthew Dillon 	return (slab);
1552ff10d954SMatthew Dillon }
1553ff10d954SMatthew Dillon 
1554ff10d954SMatthew Dillon /*
1555ff10d954SMatthew Dillon  * Free a slab.
1556ff10d954SMatthew Dillon  */
1557ff10d954SMatthew Dillon static void
slabfree(slab_t slab)1558ff10d954SMatthew Dillon slabfree(slab_t slab)
1559ff10d954SMatthew Dillon {
1560ff10d954SMatthew Dillon 	int nswath;
1561ff10d954SMatthew Dillon 	int j;
1562ff10d954SMatthew Dillon 
1563ff10d954SMatthew Dillon 	if (slab->region->slab == slab) {
1564ff10d954SMatthew Dillon 		/*
1565ff10d954SMatthew Dillon 		 * Out-of-band slab.
1566ff10d954SMatthew Dillon 		 */
1567ff10d954SMatthew Dillon 		nswath = slab->slab_size / NREGIONS_SIZE;
1568ff10d954SMatthew Dillon 		for (j = 0; j < nswath; ++j)
1569ff10d954SMatthew Dillon 			slab->region[j].slab = NULL;
1570ff10d954SMatthew Dillon 		slab->magic = 0;
1571ff10d954SMatthew Dillon 		_vmem_free(slab->chunks, slab->slab_size);
1572ff10d954SMatthew Dillon 		memfree(slab, 0);
1573ff10d954SMatthew Dillon 	} else {
1574ff10d954SMatthew Dillon 		/*
1575ff10d954SMatthew Dillon 		 * In-band slab.
1576ff10d954SMatthew Dillon 		 */
1577ff10d954SMatthew Dillon 		slab->magic = 0;
1578ff10d954SMatthew Dillon 		_vmem_free(slab, slab->slab_size);
1579ff10d954SMatthew Dillon 	}
1580ff10d954SMatthew Dillon }
1581ff10d954SMatthew Dillon 
1582ff10d954SMatthew Dillon /*
1583ff10d954SMatthew Dillon  * Terminate a slab's use in the current thread.  The slab may still have
1584ff10d954SMatthew Dillon  * outstanding allocations and thus not be deallocatable.
1585ff10d954SMatthew Dillon  */
1586ff10d954SMatthew Dillon static void
slabterm(slglobaldata_t slgd,slab_t slab)1587ff10d954SMatthew Dillon slabterm(slglobaldata_t slgd, slab_t slab)
1588ff10d954SMatthew Dillon {
1589e2caf0e7SMatthew Dillon 	slglobaldata_t sldepot;
1590ff10d954SMatthew Dillon 	struct zoneinfo *zinfo;
1591ff10d954SMatthew Dillon 	int zi = slab->zone_index;
1592ff10d954SMatthew Dillon 
1593ff10d954SMatthew Dillon 	slab->slgd = NULL;
1594ff10d954SMatthew Dillon 	--slgd->nslabs;
1595e2caf0e7SMatthew Dillon 	sldepot = &slglobaldepot;
1596ff10d954SMatthew Dillon 	zinfo = &sldepot->zone[zi];
1597ff10d954SMatthew Dillon 
1598ff10d954SMatthew Dillon 	/*
1599e2caf0e7SMatthew Dillon 	 * Move the slab to the avail list or the empty list.
1600ff10d954SMatthew Dillon 	 */
1601e2caf0e7SMatthew Dillon 	if (__isthreaded)
1602e2caf0e7SMatthew Dillon 		_SPINLOCK(&sldepot->lock);
1603e2caf0e7SMatthew Dillon 	if (slab->navail == slab->nmax) {
1604e2caf0e7SMatthew Dillon 		slab->state = EMPTY;
1605e2caf0e7SMatthew Dillon 		slab->next = zinfo->empty_base;
1606e2caf0e7SMatthew Dillon 		zinfo->empty_base = slab;
1607e2caf0e7SMatthew Dillon 		++zinfo->empty_count;
1608e2caf0e7SMatthew Dillon 	} else {
1609ff10d954SMatthew Dillon 		slab->state = AVAIL;
1610e2caf0e7SMatthew Dillon 		slab->next = zinfo->avail_base;
1611e2caf0e7SMatthew Dillon 		zinfo->avail_base = slab;
1612e2caf0e7SMatthew Dillon 		++zinfo->avail_count;
1613e2caf0e7SMatthew Dillon 	}
1614ff10d954SMatthew Dillon 
1615ff10d954SMatthew Dillon 	/*
1616e2caf0e7SMatthew Dillon 	 * Clean extra slabs out of the empty list
1617ff10d954SMatthew Dillon 	 */
1618e2caf0e7SMatthew Dillon 	while (zinfo->empty_count > opt_cache) {
1619e2caf0e7SMatthew Dillon 		slab = zinfo->empty_base;
1620e2caf0e7SMatthew Dillon 		zinfo->empty_base = slab->next;
1621e2caf0e7SMatthew Dillon 		--zinfo->empty_count;
1622e2caf0e7SMatthew Dillon 		slab->state = UNKNOWN;
1623e2caf0e7SMatthew Dillon 		if (__isthreaded)
1624e2caf0e7SMatthew Dillon 			_SPINUNLOCK(&sldepot->lock);
1625e2caf0e7SMatthew Dillon 		slabfree(slab);
1626e2caf0e7SMatthew Dillon 		if (__isthreaded)
1627e2caf0e7SMatthew Dillon 			_SPINLOCK(&sldepot->lock);
1628ff10d954SMatthew Dillon 	}
1629e2caf0e7SMatthew Dillon 	if (__isthreaded)
1630e2caf0e7SMatthew Dillon 		_SPINUNLOCK(&sldepot->lock);
1631ff10d954SMatthew Dillon }
1632ff10d954SMatthew Dillon 
1633ff10d954SMatthew Dillon /*
1634ff10d954SMatthew Dillon  * _vmem_alloc()
1635ff10d954SMatthew Dillon  *
1636ff10d954SMatthew Dillon  *	Directly map memory in PAGE_SIZE'd chunks with the specified
1637ff10d954SMatthew Dillon  *	alignment.
1638ff10d954SMatthew Dillon  *
1639ff10d954SMatthew Dillon  *	Alignment must be a multiple of PAGE_SIZE.
1640ff10d954SMatthew Dillon  *
1641ff10d954SMatthew Dillon  *	Size must be >= alignment.
1642ff10d954SMatthew Dillon  */
1643ff10d954SMatthew Dillon static void *
_vmem_alloc(int ri,size_t slab_size)1644ff10d954SMatthew Dillon _vmem_alloc(int ri, size_t slab_size)
1645ff10d954SMatthew Dillon {
1646ff10d954SMatthew Dillon 	char *baddr = (void *)((uintptr_t)ri << NREGIONS_SHIFT);
1647ff10d954SMatthew Dillon 	char *eaddr;
1648ff10d954SMatthew Dillon 	char *addr;
1649ff10d954SMatthew Dillon 	char *save;
1650ff10d954SMatthew Dillon 	uintptr_t excess;
1651ff10d954SMatthew Dillon 
1652ff10d954SMatthew Dillon 	if (slab_size < NREGIONS_SIZE)
1653ff10d954SMatthew Dillon 		eaddr = baddr + NREGIONS_SIZE;
1654ff10d954SMatthew Dillon 	else
1655ff10d954SMatthew Dillon 		eaddr = baddr + slab_size;
1656ff10d954SMatthew Dillon 
1657ff10d954SMatthew Dillon 	/*
1658ff10d954SMatthew Dillon 	 * This usually just works but might not.
1659ff10d954SMatthew Dillon 	 */
1660ff10d954SMatthew Dillon 	addr = mmap(baddr, slab_size, PROT_READ|PROT_WRITE,
1661ff10d954SMatthew Dillon 		    MAP_PRIVATE | MAP_ANON | MAP_SIZEALIGN, -1, 0);
1662ff10d954SMatthew Dillon 	if (addr == MAP_FAILED) {
1663ff10d954SMatthew Dillon 		return (NULL);
1664ff10d954SMatthew Dillon 	}
1665ff10d954SMatthew Dillon 	if (addr < baddr || addr + slab_size > eaddr) {
1666ff10d954SMatthew Dillon 		munmap(addr, slab_size);
1667ff10d954SMatthew Dillon 		return (NULL);
1668ff10d954SMatthew Dillon 	}
1669ff10d954SMatthew Dillon 
1670ff10d954SMatthew Dillon 	/*
1671ff10d954SMatthew Dillon 	 * Check alignment.  The misaligned offset is also the excess
1672ff10d954SMatthew Dillon 	 * amount.  If misaligned unmap the excess so we have a chance of
1673ff10d954SMatthew Dillon 	 * mapping at the next alignment point and recursively try again.
1674ff10d954SMatthew Dillon 	 *
1675ff10d954SMatthew Dillon 	 * BBBBBBBBBBB BBBBBBBBBBB BBBBBBBBBBB	block alignment
1676ff10d954SMatthew Dillon 	 *   aaaaaaaaa aaaaaaaaaaa aa		mis-aligned allocation
1677ff10d954SMatthew Dillon 	 *   xxxxxxxxx				final excess calculation
1678ff10d954SMatthew Dillon 	 *   ^ returned address
1679ff10d954SMatthew Dillon 	 */
1680ff10d954SMatthew Dillon 	excess = (uintptr_t)addr & (slab_size - 1);
1681ff10d954SMatthew Dillon 	while (excess) {
1682ff10d954SMatthew Dillon 		excess = slab_size - excess;
1683ff10d954SMatthew Dillon 		save = addr;
1684ff10d954SMatthew Dillon 
1685ff10d954SMatthew Dillon 		munmap(save + excess, slab_size - excess);
1686ff10d954SMatthew Dillon 		addr = _vmem_alloc(ri, slab_size);
1687ff10d954SMatthew Dillon 		munmap(save, excess);
1688ff10d954SMatthew Dillon 		if (addr == NULL)
1689ff10d954SMatthew Dillon 			return (NULL);
1690ff10d954SMatthew Dillon 		if (addr < baddr || addr + slab_size > eaddr) {
1691ff10d954SMatthew Dillon 			munmap(addr, slab_size);
1692ff10d954SMatthew Dillon 			return (NULL);
1693ff10d954SMatthew Dillon 		}
1694ff10d954SMatthew Dillon 		excess = (uintptr_t)addr & (slab_size - 1);
1695ff10d954SMatthew Dillon 	}
1696ff10d954SMatthew Dillon 	return (addr);
1697ff10d954SMatthew Dillon }
1698ff10d954SMatthew Dillon 
1699ff10d954SMatthew Dillon /*
1700ff10d954SMatthew Dillon  * _vmem_free()
1701ff10d954SMatthew Dillon  *
1702ff10d954SMatthew Dillon  *	Free a chunk of memory allocated with _vmem_alloc()
1703ff10d954SMatthew Dillon  */
1704ff10d954SMatthew Dillon static void
_vmem_free(void * ptr,size_t size)1705ff10d954SMatthew Dillon _vmem_free(void *ptr, size_t size)
1706ff10d954SMatthew Dillon {
1707ff10d954SMatthew Dillon 	munmap(ptr, size);
1708ff10d954SMatthew Dillon }
1709ff10d954SMatthew Dillon 
1710ff10d954SMatthew Dillon /*
1711ff10d954SMatthew Dillon  * Panic on fatal conditions
1712ff10d954SMatthew Dillon  */
1713ff10d954SMatthew Dillon static void
_mpanic(const char * ctl,...)1714ff10d954SMatthew Dillon _mpanic(const char *ctl, ...)
1715ff10d954SMatthew Dillon {
1716ff10d954SMatthew Dillon 	va_list va;
1717ff10d954SMatthew Dillon 
1718ff10d954SMatthew Dillon 	if (malloc_panic == 0) {
1719ff10d954SMatthew Dillon 		malloc_panic = 1;
1720ff10d954SMatthew Dillon 		va_start(va, ctl);
1721ff10d954SMatthew Dillon 		vfprintf(stderr, ctl, va);
1722ff10d954SMatthew Dillon 		fprintf(stderr, "\n");
1723ff10d954SMatthew Dillon 		fflush(stderr);
1724ff10d954SMatthew Dillon 		va_end(va);
1725ff10d954SMatthew Dillon 	}
1726ff10d954SMatthew Dillon 	abort();
1727ff10d954SMatthew Dillon }
1728e2caf0e7SMatthew Dillon 
1729c9cb4430Szrj __weak_reference(__aligned_alloc, aligned_alloc);
1730e2caf0e7SMatthew Dillon __weak_reference(__malloc, malloc);
1731e2caf0e7SMatthew Dillon __weak_reference(__calloc, calloc);
1732e2caf0e7SMatthew Dillon __weak_reference(__posix_memalign, posix_memalign);
1733e2caf0e7SMatthew Dillon __weak_reference(__realloc, realloc);
1734e2caf0e7SMatthew Dillon __weak_reference(__free, free);
1735