xref: /netbsd-src/sys/kern/subr_vmem.c (revision f3b496ec9be495acbb17756f05d342b6b7b495e9)
1 /*	$NetBSD: subr_vmem.c,v 1.8 2006/08/21 09:05:22 martin Exp $	*/
2 
3 /*-
4  * Copyright (c)2006 YAMAMOTO Takashi,
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 /*
30  * reference:
31  * -	Magazines and Vmem: Extending the Slab Allocator
32  *	to Many CPUs and Arbitrary Resources
33  *	http://www.usenix.org/event/usenix01/bonwick.html
34  *
35  * TODO:
36  * -	implement vmem_xalloc/vmem_xfree
37  */
38 
39 #include <sys/cdefs.h>
40 __KERNEL_RCSID(0, "$NetBSD: subr_vmem.c,v 1.8 2006/08/21 09:05:22 martin Exp $");
41 
42 #define	VMEM_DEBUG
43 #if defined(_KERNEL)
44 #define	QCACHE
45 #endif /* defined(_KERNEL) */
46 
47 #include <sys/param.h>
48 #include <sys/hash.h>
49 #include <sys/queue.h>
50 
51 #if defined(_KERNEL)
52 #include <sys/systm.h>
53 #include <sys/lock.h>
54 #include <sys/malloc.h>
55 #include <sys/once.h>
56 #include <sys/pool.h>
57 #include <sys/proc.h>
58 #include <sys/vmem.h>
59 #else /* defined(_KERNEL) */
60 #include "../sys/vmem.h"
61 #endif /* defined(_KERNEL) */
62 
63 #if defined(_KERNEL)
64 #define	SIMPLELOCK_DECL(name)	struct simplelock name
65 #else /* defined(_KERNEL) */
66 #include <errno.h>
67 #include <assert.h>
68 #include <stdlib.h>
69 
70 #define	KASSERT(a)		assert(a)
71 #define	SIMPLELOCK_DECL(name)	/* nothing */
72 #define	LOCK_ASSERT(a)		/* nothing */
73 #define	simple_lock_init(a)	/* nothing */
74 #define	simple_lock(a)		/* nothing */
75 #define	simple_unlock(a)	/* nothing */
76 #define	ASSERT_SLEEPABLE(lk, msg) /* nothing */
77 #endif /* defined(_KERNEL) */
78 
79 struct vmem;
80 struct vmem_btag;
81 
82 #if defined(VMEM_DEBUG)
83 void vmem_dump(const vmem_t *);
84 #endif /* defined(VMEM_DEBUG) */
85 
86 #define	VMEM_MAXORDER		(sizeof(vmem_size_t) * CHAR_BIT)
87 #define	VMEM_HASHSIZE_INIT	4096	/* XXX */
88 
89 #define	VM_FITMASK	(VM_BESTFIT | VM_INSTANTFIT)
90 
91 CIRCLEQ_HEAD(vmem_seglist, vmem_btag);
92 LIST_HEAD(vmem_freelist, vmem_btag);
93 LIST_HEAD(vmem_hashlist, vmem_btag);
94 
95 #if defined(QCACHE)
96 #define	VMEM_QCACHE_IDX_MAX	32
97 
98 #define	QC_NAME_MAX	16
99 
100 struct qcache {
101 	struct pool qc_pool;
102 	struct pool_cache qc_cache;
103 	vmem_t *qc_vmem;
104 	char qc_name[QC_NAME_MAX];
105 };
106 typedef struct qcache qcache_t;
107 #define	QC_POOL_TO_QCACHE(pool)	((qcache_t *)(pool))
108 #endif /* defined(QCACHE) */
109 
110 /* vmem arena */
111 struct vmem {
112 	SIMPLELOCK_DECL(vm_lock);
113 	vmem_addr_t (*vm_allocfn)(vmem_t *, vmem_size_t, vmem_size_t *,
114 	    vm_flag_t);
115 	void (*vm_freefn)(vmem_t *, vmem_addr_t, vmem_size_t);
116 	vmem_t *vm_source;
117 	struct vmem_seglist vm_seglist;
118 	struct vmem_freelist vm_freelist[VMEM_MAXORDER];
119 	size_t vm_hashsize;
120 	size_t vm_nbusytag;
121 	struct vmem_hashlist *vm_hashlist;
122 	size_t vm_quantum_mask;
123 	int vm_quantum_shift;
124 	const char *vm_name;
125 
126 #if defined(QCACHE)
127 	/* quantum cache */
128 	size_t vm_qcache_max;
129 	struct pool_allocator vm_qcache_allocator;
130 	qcache_t vm_qcache[VMEM_QCACHE_IDX_MAX];
131 #endif /* defined(QCACHE) */
132 };
133 
134 #define	VMEM_LOCK(vm)	simple_lock(&vm->vm_lock)
135 #define	VMEM_UNLOCK(vm)	simple_unlock(&vm->vm_lock)
136 #define	VMEM_LOCK_INIT(vm)	simple_lock_init(&vm->vm_lock);
137 #define	VMEM_ASSERT_LOCKED(vm) \
138 	LOCK_ASSERT(simple_lock_held(&vm->vm_lock))
139 #define	VMEM_ASSERT_UNLOCKED(vm) \
140 	LOCK_ASSERT(!simple_lock_held(&vm->vm_lock))
141 
142 /* boundary tag */
143 struct vmem_btag {
144 	CIRCLEQ_ENTRY(vmem_btag) bt_seglist;
145 	union {
146 		LIST_ENTRY(vmem_btag) u_freelist; /* BT_TYPE_FREE */
147 		LIST_ENTRY(vmem_btag) u_hashlist; /* BT_TYPE_BUSY */
148 	} bt_u;
149 #define	bt_hashlist	bt_u.u_hashlist
150 #define	bt_freelist	bt_u.u_freelist
151 	vmem_addr_t bt_start;
152 	vmem_size_t bt_size;
153 	int bt_type;
154 };
155 
156 #define	BT_TYPE_SPAN		1
157 #define	BT_TYPE_SPAN_STATIC	2
158 #define	BT_TYPE_FREE		3
159 #define	BT_TYPE_BUSY		4
160 #define	BT_ISSPAN_P(bt)	((bt)->bt_type <= BT_TYPE_SPAN_STATIC)
161 
162 #define	BT_END(bt)	((bt)->bt_start + (bt)->bt_size)
163 
164 typedef struct vmem_btag bt_t;
165 
166 /* ---- misc */
167 
168 #define	ORDER2SIZE(order)	((vmem_size_t)1 << (order))
169 
170 static int
171 calc_order(vmem_size_t size)
172 {
173 	vmem_size_t target;
174 	int i;
175 
176 	KASSERT(size != 0);
177 
178 	i = 0;
179 	target = size >> 1;
180 	while (ORDER2SIZE(i) <= target) {
181 		i++;
182 	}
183 
184 	KASSERT(ORDER2SIZE(i) <= size);
185 	KASSERT(size < ORDER2SIZE(i + 1) || ORDER2SIZE(i + 1) < ORDER2SIZE(i));
186 
187 	return i;
188 }
189 
190 #if defined(_KERNEL)
191 static MALLOC_DEFINE(M_VMEM, "vmem", "vmem");
192 #endif /* defined(_KERNEL) */
193 
194 static void *
195 xmalloc(size_t sz, vm_flag_t flags)
196 {
197 
198 #if defined(_KERNEL)
199 	return malloc(sz, M_VMEM,
200 	    M_CANFAIL | ((flags & VM_SLEEP) ? M_WAITOK : M_NOWAIT));
201 #else /* defined(_KERNEL) */
202 	return malloc(sz);
203 #endif /* defined(_KERNEL) */
204 }
205 
206 static void
207 xfree(void *p)
208 {
209 
210 #if defined(_KERNEL)
211 	return free(p, M_VMEM);
212 #else /* defined(_KERNEL) */
213 	return free(p);
214 #endif /* defined(_KERNEL) */
215 }
216 
217 /* ---- boundary tag */
218 
219 #if defined(_KERNEL)
220 static struct pool_cache bt_poolcache;
221 static POOL_INIT(bt_pool, sizeof(bt_t), 0, 0, 0, "vmembtpl", NULL);
222 #endif /* defined(_KERNEL) */
223 
224 static bt_t *
225 bt_alloc(vmem_t *vm, vm_flag_t flags)
226 {
227 	bt_t *bt;
228 
229 #if defined(_KERNEL)
230 	/* XXX bootstrap */
231 	bt = pool_cache_get(&bt_poolcache,
232 	    (flags & VM_SLEEP) != 0 ? PR_WAITOK : PR_NOWAIT);
233 #else /* defined(_KERNEL) */
234 	bt = malloc(sizeof *bt);
235 #endif /* defined(_KERNEL) */
236 
237 	return bt;
238 }
239 
240 static void
241 bt_free(vmem_t *vm, bt_t *bt)
242 {
243 
244 #if defined(_KERNEL)
245 	/* XXX bootstrap */
246 	pool_cache_put(&bt_poolcache, bt);
247 #else /* defined(_KERNEL) */
248 	free(bt);
249 #endif /* defined(_KERNEL) */
250 }
251 
252 /*
253  * freelist[0] ... [1, 1]
254  * freelist[1] ... [2, 3]
255  * freelist[2] ... [4, 7]
256  * freelist[3] ... [8, 15]
257  *  :
258  * freelist[n] ... [(1 << n), (1 << (n + 1)) - 1]
259  *  :
260  */
261 
262 static struct vmem_freelist *
263 bt_freehead_tofree(vmem_t *vm, vmem_size_t size)
264 {
265 	const vmem_size_t qsize = size >> vm->vm_quantum_shift;
266 	int idx;
267 
268 	KASSERT((size & vm->vm_quantum_mask) == 0);
269 	KASSERT(size != 0);
270 
271 	idx = calc_order(qsize);
272 	KASSERT(idx >= 0);
273 	KASSERT(idx < VMEM_MAXORDER);
274 
275 	return &vm->vm_freelist[idx];
276 }
277 
278 static struct vmem_freelist *
279 bt_freehead_toalloc(vmem_t *vm, vmem_size_t size, vm_flag_t strat)
280 {
281 	const vmem_size_t qsize = size >> vm->vm_quantum_shift;
282 	int idx;
283 
284 	KASSERT((size & vm->vm_quantum_mask) == 0);
285 	KASSERT(size != 0);
286 
287 	idx = calc_order(qsize);
288 	if (strat == VM_INSTANTFIT && ORDER2SIZE(idx) != qsize) {
289 		idx++;
290 		/* check too large request? */
291 	}
292 	KASSERT(idx >= 0);
293 	KASSERT(idx < VMEM_MAXORDER);
294 
295 	return &vm->vm_freelist[idx];
296 }
297 
298 /* ---- boundary tag hash */
299 
300 static struct vmem_hashlist *
301 bt_hashhead(vmem_t *vm, vmem_addr_t addr)
302 {
303 	struct vmem_hashlist *list;
304 	unsigned int hash;
305 
306 	hash = hash32_buf(&addr, sizeof(addr), HASH32_BUF_INIT);
307 	list = &vm->vm_hashlist[hash % vm->vm_hashsize];
308 
309 	return list;
310 }
311 
312 static bt_t *
313 bt_lookupbusy(vmem_t *vm, vmem_addr_t addr)
314 {
315 	struct vmem_hashlist *list;
316 	bt_t *bt;
317 
318 	list = bt_hashhead(vm, addr);
319 	LIST_FOREACH(bt, list, bt_hashlist) {
320 		if (bt->bt_start == addr) {
321 			break;
322 		}
323 	}
324 
325 	return bt;
326 }
327 
328 static void
329 bt_rembusy(vmem_t *vm, bt_t *bt)
330 {
331 
332 	KASSERT(vm->vm_nbusytag > 0);
333 	vm->vm_nbusytag--;
334 	LIST_REMOVE(bt, bt_hashlist);
335 }
336 
337 static void
338 bt_insbusy(vmem_t *vm, bt_t *bt)
339 {
340 	struct vmem_hashlist *list;
341 
342 	KASSERT(bt->bt_type == BT_TYPE_BUSY);
343 
344 	list = bt_hashhead(vm, bt->bt_start);
345 	LIST_INSERT_HEAD(list, bt, bt_hashlist);
346 	vm->vm_nbusytag++;
347 }
348 
349 /* ---- boundary tag list */
350 
351 static void
352 bt_remseg(vmem_t *vm, bt_t *bt)
353 {
354 
355 	CIRCLEQ_REMOVE(&vm->vm_seglist, bt, bt_seglist);
356 }
357 
358 static void
359 bt_insseg(vmem_t *vm, bt_t *bt, bt_t *prev)
360 {
361 
362 	CIRCLEQ_INSERT_AFTER(&vm->vm_seglist, prev, bt, bt_seglist);
363 }
364 
365 static void
366 bt_insseg_tail(vmem_t *vm, bt_t *bt)
367 {
368 
369 	CIRCLEQ_INSERT_TAIL(&vm->vm_seglist, bt, bt_seglist);
370 }
371 
372 static void
373 bt_remfree(vmem_t *vm, bt_t *bt)
374 {
375 
376 	KASSERT(bt->bt_type == BT_TYPE_FREE);
377 
378 	LIST_REMOVE(bt, bt_freelist);
379 }
380 
381 static void
382 bt_insfree(vmem_t *vm, bt_t *bt)
383 {
384 	struct vmem_freelist *list;
385 
386 	list = bt_freehead_tofree(vm, bt->bt_size);
387 	LIST_INSERT_HEAD(list, bt, bt_freelist);
388 }
389 
390 /* ---- vmem internal functions */
391 
392 #if defined(QCACHE)
393 static inline vm_flag_t
394 prf_to_vmf(int prflags)
395 {
396 	vm_flag_t vmflags;
397 
398 	KASSERT((prflags & ~(PR_LIMITFAIL | PR_WAITOK | PR_NOWAIT)) == 0);
399 	if ((prflags & PR_WAITOK) != 0) {
400 		vmflags = VM_SLEEP;
401 	} else {
402 		vmflags = VM_NOSLEEP;
403 	}
404 	return vmflags;
405 }
406 
407 static inline int
408 vmf_to_prf(vm_flag_t vmflags)
409 {
410 	int prflags;
411 
412 	if ((vmflags & VM_SLEEP) != 0) {
413 		prflags = PR_WAITOK;
414 	} else {
415 		prflags = PR_NOWAIT;
416 	}
417 	return prflags;
418 }
419 
420 static size_t
421 qc_poolpage_size(size_t qcache_max)
422 {
423 	int i;
424 
425 	for (i = 0; ORDER2SIZE(i) <= qcache_max * 3; i++) {
426 		/* nothing */
427 	}
428 	return ORDER2SIZE(i);
429 }
430 
431 static void *
432 qc_poolpage_alloc(struct pool *pool, int prflags)
433 {
434 	qcache_t *qc = QC_POOL_TO_QCACHE(pool);
435 	vmem_t *vm = qc->qc_vmem;
436 
437 	return (void *)vmem_alloc(vm, pool->pr_alloc->pa_pagesz,
438 	    prf_to_vmf(prflags) | VM_INSTANTFIT);
439 }
440 
441 static void
442 qc_poolpage_free(struct pool *pool, void *addr)
443 {
444 	qcache_t *qc = QC_POOL_TO_QCACHE(pool);
445 	vmem_t *vm = qc->qc_vmem;
446 
447 	vmem_free(vm, (vmem_addr_t)addr, pool->pr_alloc->pa_pagesz);
448 }
449 
450 static void
451 qc_init(vmem_t *vm, size_t qcache_max)
452 {
453 	struct pool_allocator *pa;
454 	int qcache_idx_max;
455 	int i;
456 
457 	KASSERT((qcache_max & vm->vm_quantum_mask) == 0);
458 	if (qcache_max > (VMEM_QCACHE_IDX_MAX << vm->vm_quantum_shift)) {
459 		qcache_max = VMEM_QCACHE_IDX_MAX << vm->vm_quantum_shift;
460 	}
461 	vm->vm_qcache_max = qcache_max;
462 	pa = &vm->vm_qcache_allocator;
463 	memset(pa, 0, sizeof(*pa));
464 	pa->pa_alloc = qc_poolpage_alloc;
465 	pa->pa_free = qc_poolpage_free;
466 	pa->pa_pagesz = qc_poolpage_size(qcache_max);
467 
468 	qcache_idx_max = qcache_max >> vm->vm_quantum_shift;
469 	for (i = 1; i <= qcache_idx_max; i++) {
470 		qcache_t *qc = &vm->vm_qcache[i - 1];
471 		size_t size = i << vm->vm_quantum_shift;
472 
473 		qc->qc_vmem = vm;
474 		snprintf(qc->qc_name, sizeof(qc->qc_name), "%s-%zu",
475 		    vm->vm_name, size);
476 		pool_init(&qc->qc_pool, size, 0, 0,
477 		    PR_NOALIGN | PR_NOTOUCH /* XXX */, qc->qc_name, pa);
478 		pool_cache_init(&qc->qc_cache, &qc->qc_pool, NULL, NULL, NULL);
479 	}
480 }
481 
482 static boolean_t
483 qc_reap(vmem_t *vm)
484 {
485 	int i;
486 	int qcache_idx_max;
487 	boolean_t didsomething = FALSE;
488 
489 	qcache_idx_max = vm->vm_qcache_max >> vm->vm_quantum_shift;
490 	for (i = 1; i <= qcache_idx_max; i++) {
491 		qcache_t *qc = &vm->vm_qcache[i - 1];
492 
493 		if (pool_reclaim(&qc->qc_pool) != 0) {
494 			didsomething = TRUE;
495 		}
496 	}
497 
498 	return didsomething;
499 }
500 #endif /* defined(QCACHE) */
501 
502 #if defined(_KERNEL)
503 static int
504 vmem_init(void)
505 {
506 
507 	pool_cache_init(&bt_poolcache, &bt_pool, NULL, NULL, NULL);
508 	return 0;
509 }
510 #endif /* defined(_KERNEL) */
511 
512 static vmem_addr_t
513 vmem_add1(vmem_t *vm, vmem_addr_t addr, vmem_size_t size, vm_flag_t flags,
514     int spanbttype)
515 {
516 	bt_t *btspan;
517 	bt_t *btfree;
518 
519 	KASSERT((flags & (VM_SLEEP|VM_NOSLEEP)) != 0);
520 	KASSERT((~flags & (VM_SLEEP|VM_NOSLEEP)) != 0);
521 	VMEM_ASSERT_UNLOCKED(vm);
522 
523 	btspan = bt_alloc(vm, flags);
524 	if (btspan == NULL) {
525 		return VMEM_ADDR_NULL;
526 	}
527 	btfree = bt_alloc(vm, flags);
528 	if (btfree == NULL) {
529 		bt_free(vm, btspan);
530 		return VMEM_ADDR_NULL;
531 	}
532 
533 	btspan->bt_type = spanbttype;
534 	btspan->bt_start = addr;
535 	btspan->bt_size = size;
536 
537 	btfree->bt_type = BT_TYPE_FREE;
538 	btfree->bt_start = addr;
539 	btfree->bt_size = size;
540 
541 	VMEM_LOCK(vm);
542 	bt_insseg_tail(vm, btspan);
543 	bt_insseg(vm, btfree, btspan);
544 	bt_insfree(vm, btfree);
545 	VMEM_UNLOCK(vm);
546 
547 	return addr;
548 }
549 
550 static int
551 vmem_import(vmem_t *vm, vmem_size_t size, vm_flag_t flags)
552 {
553 	vmem_addr_t addr;
554 
555 	VMEM_ASSERT_UNLOCKED(vm);
556 
557 	if (vm->vm_allocfn == NULL) {
558 		return EINVAL;
559 	}
560 
561 	addr = (*vm->vm_allocfn)(vm->vm_source, size, &size, flags);
562 	if (addr == VMEM_ADDR_NULL) {
563 		return ENOMEM;
564 	}
565 
566 	if (vmem_add1(vm, addr, size, flags, BT_TYPE_SPAN) == VMEM_ADDR_NULL) {
567 		(*vm->vm_freefn)(vm->vm_source, addr, size);
568 		return ENOMEM;
569 	}
570 
571 	return 0;
572 }
573 
574 static int
575 vmem_rehash(vmem_t *vm, size_t newhashsize, vm_flag_t flags)
576 {
577 	bt_t *bt;
578 	int i;
579 	struct vmem_hashlist *newhashlist;
580 	struct vmem_hashlist *oldhashlist;
581 	size_t oldhashsize;
582 
583 	KASSERT(newhashsize > 0);
584 	VMEM_ASSERT_UNLOCKED(vm);
585 
586 	newhashlist =
587 	    xmalloc(sizeof(struct vmem_hashlist *) * newhashsize, flags);
588 	if (newhashlist == NULL) {
589 		return ENOMEM;
590 	}
591 	for (i = 0; i < newhashsize; i++) {
592 		LIST_INIT(&newhashlist[i]);
593 	}
594 
595 	VMEM_LOCK(vm);
596 	oldhashlist = vm->vm_hashlist;
597 	oldhashsize = vm->vm_hashsize;
598 	vm->vm_hashlist = newhashlist;
599 	vm->vm_hashsize = newhashsize;
600 	if (oldhashlist == NULL) {
601 		VMEM_UNLOCK(vm);
602 		return 0;
603 	}
604 	for (i = 0; i < oldhashsize; i++) {
605 		while ((bt = LIST_FIRST(&oldhashlist[i])) != NULL) {
606 			bt_rembusy(vm, bt); /* XXX */
607 			bt_insbusy(vm, bt);
608 		}
609 	}
610 	VMEM_UNLOCK(vm);
611 
612 	xfree(oldhashlist);
613 
614 	return 0;
615 }
616 
617 /* ---- vmem API */
618 
619 /*
620  * vmem_create: create an arena.
621  *
622  * => must not be called from interrupt context.
623  */
624 
625 vmem_t *
626 vmem_create(const char *name, vmem_addr_t base, vmem_size_t size,
627     vmem_size_t quantum,
628     vmem_addr_t (*allocfn)(vmem_t *, vmem_size_t, vmem_size_t *, vm_flag_t),
629     void (*freefn)(vmem_t *, vmem_addr_t, vmem_size_t),
630     vmem_t *source, vmem_size_t qcache_max, vm_flag_t flags)
631 {
632 	vmem_t *vm;
633 	int i;
634 #if defined(_KERNEL)
635 	static ONCE_DECL(control);
636 #endif /* defined(_KERNEL) */
637 
638 	KASSERT((flags & (VM_SLEEP|VM_NOSLEEP)) != 0);
639 	KASSERT((~flags & (VM_SLEEP|VM_NOSLEEP)) != 0);
640 
641 #if defined(_KERNEL)
642 	if (RUN_ONCE(&control, vmem_init)) {
643 		return NULL;
644 	}
645 #endif /* defined(_KERNEL) */
646 	vm = xmalloc(sizeof(*vm), flags);
647 	if (vm == NULL) {
648 		return NULL;
649 	}
650 
651 	VMEM_LOCK_INIT(vm);
652 	vm->vm_name = name;
653 	vm->vm_quantum_mask = quantum - 1;
654 	vm->vm_quantum_shift = calc_order(quantum);
655 	KASSERT(ORDER2SIZE(vm->vm_quantum_shift) == quantum);
656 	vm->vm_allocfn = allocfn;
657 	vm->vm_freefn = freefn;
658 	vm->vm_source = source;
659 	vm->vm_nbusytag = 0;
660 #if defined(QCACHE)
661 	qc_init(vm, qcache_max);
662 #endif /* defined(QCACHE) */
663 
664 	CIRCLEQ_INIT(&vm->vm_seglist);
665 	for (i = 0; i < VMEM_MAXORDER; i++) {
666 		LIST_INIT(&vm->vm_freelist[i]);
667 	}
668 	vm->vm_hashlist = NULL;
669 	if (vmem_rehash(vm, VMEM_HASHSIZE_INIT, flags)) {
670 		vmem_destroy(vm);
671 		return NULL;
672 	}
673 
674 	if (size != 0) {
675 		if (vmem_add(vm, base, size, flags) == 0) {
676 			vmem_destroy(vm);
677 			return NULL;
678 		}
679 	}
680 
681 	return vm;
682 }
683 
684 void
685 vmem_destroy(vmem_t *vm)
686 {
687 
688 	VMEM_ASSERT_UNLOCKED(vm);
689 
690 	if (vm->vm_hashlist != NULL) {
691 		int i;
692 
693 		for (i = 0; i < vm->vm_hashsize; i++) {
694 			bt_t *bt;
695 
696 			while ((bt = LIST_FIRST(&vm->vm_hashlist[i])) != NULL) {
697 				KASSERT(bt->bt_type == BT_TYPE_SPAN_STATIC);
698 				bt_free(vm, bt);
699 			}
700 		}
701 		xfree(vm->vm_hashlist);
702 	}
703 	xfree(vm);
704 }
705 
706 vmem_size_t
707 vmem_roundup_size(vmem_t *vm, vmem_size_t size)
708 {
709 
710 	return (size + vm->vm_quantum_mask) & ~vm->vm_quantum_mask;
711 }
712 
713 /*
714  * vmem_alloc:
715  *
716  * => caller must ensure appropriate spl,
717  *    if the arena can be accessed from interrupt context.
718  */
719 
720 vmem_addr_t
721 vmem_alloc(vmem_t *vm, vmem_size_t size0, vm_flag_t flags)
722 {
723 	struct vmem_freelist *list;
724 	struct vmem_freelist *first;
725 	struct vmem_freelist *end;
726 	bt_t *bt;
727 	bt_t *btnew;
728 	const vmem_size_t size = vmem_roundup_size(vm, size0);
729 	vm_flag_t strat = flags & VM_FITMASK;
730 
731 	KASSERT((flags & (VM_SLEEP|VM_NOSLEEP)) != 0);
732 	KASSERT((~flags & (VM_SLEEP|VM_NOSLEEP)) != 0);
733 	VMEM_ASSERT_UNLOCKED(vm);
734 
735 	KASSERT(size0 > 0);
736 	KASSERT(size > 0);
737 	KASSERT(strat == VM_BESTFIT || strat == VM_INSTANTFIT);
738 	if ((flags & VM_SLEEP) != 0) {
739 		ASSERT_SLEEPABLE(NULL, "vmem_alloc");
740 	}
741 
742 #if defined(QCACHE)
743 	if (size <= vm->vm_qcache_max) {
744 		int qidx = size >> vm->vm_quantum_shift;
745 		qcache_t *qc = &vm->vm_qcache[qidx - 1];
746 
747 		return (vmem_addr_t)pool_cache_get(&qc->qc_cache,
748 		    vmf_to_prf(flags));
749 	}
750 #endif /* defined(QCACHE) */
751 
752 	btnew = bt_alloc(vm, flags);
753 	if (btnew == NULL) {
754 		return VMEM_ADDR_NULL;
755 	}
756 
757 retry_strat:
758 	first = bt_freehead_toalloc(vm, size, strat);
759 	end = &vm->vm_freelist[VMEM_MAXORDER];
760 retry:
761 	bt = NULL;
762 	VMEM_LOCK(vm);
763 	if (strat == VM_INSTANTFIT) {
764 		for (list = first; list < end; list++) {
765 			bt = LIST_FIRST(list);
766 			if (bt != NULL) {
767 				goto gotit;
768 			}
769 		}
770 	} else { /* VM_BESTFIT */
771 		for (list = first; list < end; list++) {
772 			LIST_FOREACH(bt, list, bt_freelist) {
773 				if (bt->bt_size >= size) {
774 					goto gotit;
775 				}
776 			}
777 		}
778 	}
779 	VMEM_UNLOCK(vm);
780 #if 1
781 	if (strat == VM_INSTANTFIT) {
782 		strat = VM_BESTFIT;
783 		goto retry_strat;
784 	}
785 #endif
786 	if (vmem_import(vm, size, flags) == 0) {
787 		goto retry;
788 	}
789 	/* XXX */
790 	return VMEM_ADDR_NULL;
791 
792 gotit:
793 	KASSERT(bt->bt_type == BT_TYPE_FREE);
794 	KASSERT(bt->bt_size >= size);
795 	bt_remfree(vm, bt);
796 	if (bt->bt_size != size && bt->bt_size - size > vm->vm_quantum_mask) {
797 		/* split */
798 		btnew->bt_type = BT_TYPE_BUSY;
799 		btnew->bt_start = bt->bt_start;
800 		btnew->bt_size = size;
801 		bt->bt_start = bt->bt_start + size;
802 		bt->bt_size -= size;
803 		bt_insfree(vm, bt);
804 		bt_insseg(vm, btnew, CIRCLEQ_PREV(bt, bt_seglist));
805 		bt_insbusy(vm, btnew);
806 		VMEM_UNLOCK(vm);
807 	} else {
808 		bt->bt_type = BT_TYPE_BUSY;
809 		bt_insbusy(vm, bt);
810 		VMEM_UNLOCK(vm);
811 		bt_free(vm, btnew);
812 		btnew = bt;
813 	}
814 	KASSERT(btnew->bt_size >= size);
815 	btnew->bt_type = BT_TYPE_BUSY;
816 
817 	return btnew->bt_start;
818 }
819 
820 /*
821  * vmem_free:
822  *
823  * => caller must ensure appropriate spl,
824  *    if the arena can be accessed from interrupt context.
825  */
826 
827 void
828 vmem_free(vmem_t *vm, vmem_addr_t addr, vmem_size_t size)
829 {
830 	bt_t *bt;
831 	bt_t *t;
832 
833 	VMEM_ASSERT_UNLOCKED(vm);
834 
835 	KASSERT(addr != VMEM_ADDR_NULL);
836 	KASSERT(size > 0);
837 
838 #if defined(QCACHE)
839 	if (size <= vm->vm_qcache_max) {
840 		int qidx = (size + vm->vm_quantum_mask) >> vm->vm_quantum_shift;
841 		qcache_t *qc = &vm->vm_qcache[qidx - 1];
842 
843 		return pool_cache_put(&qc->qc_cache, (void *)addr);
844 	}
845 #endif /* defined(QCACHE) */
846 
847 	VMEM_LOCK(vm);
848 
849 	bt = bt_lookupbusy(vm, addr);
850 	KASSERT(bt != NULL);
851 	KASSERT(bt->bt_start == addr);
852 	KASSERT(bt->bt_size == vmem_roundup_size(vm, size) ||
853 	    bt->bt_size - vmem_roundup_size(vm, size) <= vm->vm_quantum_mask);
854 	KASSERT(bt->bt_type == BT_TYPE_BUSY);
855 	bt_rembusy(vm, bt);
856 	bt->bt_type = BT_TYPE_FREE;
857 
858 	/* coalesce */
859 	t = CIRCLEQ_NEXT(bt, bt_seglist);
860 	if (t != NULL && t->bt_type == BT_TYPE_FREE) {
861 		KASSERT(BT_END(bt) == t->bt_start);
862 		bt_remfree(vm, t);
863 		bt_remseg(vm, t);
864 		bt->bt_size += t->bt_size;
865 		bt_free(vm, t);
866 	}
867 	t = CIRCLEQ_PREV(bt, bt_seglist);
868 	if (t != NULL && t->bt_type == BT_TYPE_FREE) {
869 		KASSERT(BT_END(t) == bt->bt_start);
870 		bt_remfree(vm, t);
871 		bt_remseg(vm, t);
872 		bt->bt_size += t->bt_size;
873 		bt->bt_start = t->bt_start;
874 		bt_free(vm, t);
875 	}
876 
877 	t = CIRCLEQ_PREV(bt, bt_seglist);
878 	KASSERT(t != NULL);
879 	KASSERT(BT_ISSPAN_P(t) || t->bt_type == BT_TYPE_BUSY);
880 	if (vm->vm_freefn != NULL && t->bt_type == BT_TYPE_SPAN &&
881 	    t->bt_size == bt->bt_size) {
882 		vmem_addr_t spanaddr;
883 		vmem_size_t spansize;
884 
885 		KASSERT(t->bt_start == bt->bt_start);
886 		spanaddr = bt->bt_start;
887 		spansize = bt->bt_size;
888 		bt_remseg(vm, bt);
889 		bt_free(vm, bt);
890 		bt_remseg(vm, t);
891 		bt_free(vm, t);
892 		VMEM_UNLOCK(vm);
893 		(*vm->vm_freefn)(vm->vm_source, spanaddr, spansize);
894 	} else {
895 		bt_insfree(vm, bt);
896 		VMEM_UNLOCK(vm);
897 	}
898 }
899 
900 /*
901  * vmem_add:
902  *
903  * => caller must ensure appropriate spl,
904  *    if the arena can be accessed from interrupt context.
905  */
906 
907 vmem_addr_t
908 vmem_add(vmem_t *vm, vmem_addr_t addr, vmem_size_t size, vm_flag_t flags)
909 {
910 
911 	return vmem_add1(vm, addr, size, flags, BT_TYPE_SPAN_STATIC);
912 }
913 
914 /*
915  * vmem_reap: reap unused resources.
916  *
917  * => return TRUE if we successfully reaped something.
918  */
919 
920 boolean_t
921 vmem_reap(vmem_t *vm)
922 {
923 	boolean_t didsomething = FALSE;
924 
925 	VMEM_ASSERT_UNLOCKED(vm);
926 
927 #if defined(QCACHE)
928 	didsomething = qc_reap(vm);
929 #endif /* defined(QCACHE) */
930 	return didsomething;
931 }
932 
933 /* ---- debug */
934 
935 #if defined(VMEM_DEBUG)
936 
937 #if !defined(_KERNEL)
938 #include <stdio.h>
939 #endif /* !defined(_KERNEL) */
940 
941 void bt_dump(const bt_t *);
942 
943 void
944 bt_dump(const bt_t *bt)
945 {
946 
947 	printf("\t%p: %" PRIu64 ", %" PRIu64 ", %d\n",
948 	    bt, (uint64_t)bt->bt_start, (uint64_t)bt->bt_size,
949 	    bt->bt_type);
950 }
951 
952 void
953 vmem_dump(const vmem_t *vm)
954 {
955 	const bt_t *bt;
956 	int i;
957 
958 	printf("vmem %p '%s'\n", vm, vm->vm_name);
959 	CIRCLEQ_FOREACH(bt, &vm->vm_seglist, bt_seglist) {
960 		bt_dump(bt);
961 	}
962 
963 	for (i = 0; i < VMEM_MAXORDER; i++) {
964 		const struct vmem_freelist *fl = &vm->vm_freelist[i];
965 
966 		if (LIST_EMPTY(fl)) {
967 			continue;
968 		}
969 
970 		printf("freelist[%d]\n", i);
971 		LIST_FOREACH(bt, fl, bt_freelist) {
972 			bt_dump(bt);
973 			if (bt->bt_size) {
974 			}
975 		}
976 	}
977 }
978 
979 #if !defined(_KERNEL)
980 
981 #include <stdlib.h>
982 
983 int
984 main()
985 {
986 	vmem_t *vm;
987 	vmem_addr_t p;
988 	struct reg {
989 		vmem_addr_t p;
990 		vmem_size_t sz;
991 	} *reg = NULL;
992 	int nreg = 0;
993 	int nalloc = 0;
994 	int nfree = 0;
995 	vmem_size_t total = 0;
996 #if 1
997 	vm_flag_t strat = VM_INSTANTFIT;
998 #else
999 	vm_flag_t strat = VM_BESTFIT;
1000 #endif
1001 
1002 	vm = vmem_create("test", VMEM_ADDR_NULL, 0, 1,
1003 	    NULL, NULL, NULL, 0, VM_NOSLEEP);
1004 	if (vm == NULL) {
1005 		printf("vmem_create\n");
1006 		exit(EXIT_FAILURE);
1007 	}
1008 	vmem_dump(vm);
1009 
1010 	p = vmem_add(vm, 100, 200, VM_SLEEP);
1011 	p = vmem_add(vm, 2000, 1, VM_SLEEP);
1012 	p = vmem_add(vm, 40000, 0x10000000>>12, VM_SLEEP);
1013 	p = vmem_add(vm, 10000, 10000, VM_SLEEP);
1014 	p = vmem_add(vm, 500, 1000, VM_SLEEP);
1015 	vmem_dump(vm);
1016 	for (;;) {
1017 		struct reg *r;
1018 
1019 		if (rand() % 100 > 40) {
1020 			vmem_size_t sz = rand() % 500 + 1;
1021 
1022 			printf("=== alloc %" PRIu64 "\n", (uint64_t)sz);
1023 			p = vmem_alloc(vm, sz, strat|VM_SLEEP);
1024 			printf("-> %" PRIu64 "\n", (uint64_t)p);
1025 			vmem_dump(vm);
1026 			if (p == VMEM_ADDR_NULL) {
1027 				break;
1028 			}
1029 			nreg++;
1030 			reg = realloc(reg, sizeof(*reg) * nreg);
1031 			r = &reg[nreg - 1];
1032 			r->p = p;
1033 			r->sz = sz;
1034 			total += sz;
1035 			nalloc++;
1036 		} else if (nreg != 0) {
1037 			r = &reg[rand() % nreg];
1038 			printf("=== free %" PRIu64 ", %" PRIu64 "\n",
1039 			    (uint64_t)r->p, (uint64_t)r->sz);
1040 			vmem_free(vm, r->p, r->sz);
1041 			total -= r->sz;
1042 			vmem_dump(vm);
1043 			*r = reg[nreg - 1];
1044 			nreg--;
1045 			nfree++;
1046 		}
1047 		printf("total=%" PRIu64 "\n", (uint64_t)total);
1048 	}
1049 	fprintf(stderr, "total=%" PRIu64 ", nalloc=%d, nfree=%d\n",
1050 	    (uint64_t)total, nalloc, nfree);
1051 	exit(EXIT_SUCCESS);
1052 }
1053 #endif /* !defined(_KERNEL) */
1054 #endif /* defined(VMEM_DEBUG) */
1055