xref: /openbsd-src/sys/kern/subr_pool.c (revision 66ad965f4873a0970dea06fb53c307b8385e5a94)
1 /*	$OpenBSD: subr_pool.c,v 1.58 2007/12/11 15:04:58 tedu Exp $	*/
2 /*	$NetBSD: subr_pool.c,v 1.61 2001/09/26 07:14:56 chs Exp $	*/
3 
4 /*-
5  * Copyright (c) 1997, 1999, 2000 The NetBSD Foundation, Inc.
6  * All rights reserved.
7  *
8  * This code is derived from software contributed to The NetBSD Foundation
9  * by Paul Kranenburg; by Jason R. Thorpe of the Numerical Aerospace
10  * Simulation Facility, NASA Ames Research Center.
11  *
12  * Redistribution and use in source and binary forms, with or without
13  * modification, are permitted provided that the following conditions
14  * are met:
15  * 1. Redistributions of source code must retain the above copyright
16  *    notice, this list of conditions and the following disclaimer.
17  * 2. Redistributions in binary form must reproduce the above copyright
18  *    notice, this list of conditions and the following disclaimer in the
19  *    documentation and/or other materials provided with the distribution.
20  * 3. All advertising materials mentioning features or use of this software
21  *    must display the following acknowledgement:
22  *	This product includes software developed by the NetBSD
23  *	Foundation, Inc. and its contributors.
24  * 4. Neither the name of The NetBSD Foundation nor the names of its
25  *    contributors may be used to endorse or promote products derived
26  *    from this software without specific prior written permission.
27  *
28  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
29  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
30  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
31  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
32  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
33  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
34  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
35  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
36  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
37  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38  * POSSIBILITY OF SUCH DAMAGE.
39  */
40 
41 #include <sys/param.h>
42 #include <sys/systm.h>
43 #include <sys/proc.h>
44 #include <sys/errno.h>
45 #include <sys/kernel.h>
46 #include <sys/malloc.h>
47 #include <sys/pool.h>
48 #include <sys/syslog.h>
49 #include <sys/sysctl.h>
50 
51 #include <uvm/uvm.h>
52 
53 
54 /*
55  * Pool resource management utility.
56  *
57  * Memory is allocated in pages which are split into pieces according to
58  * the pool item size. Each page is kept on one of three lists in the
59  * pool structure: `pr_emptypages', `pr_fullpages' and `pr_partpages',
60  * for empty, full and partially-full pages respectively. The individual
61  * pool items are on a linked list headed by `ph_itemlist' in each page
62  * header. The memory for building the page list is either taken from
63  * the allocated pages themselves (for small pool items) or taken from
64  * an internal pool of page headers (`phpool').
65  */
66 
67 /* List of all pools */
68 TAILQ_HEAD(,pool) pool_head = TAILQ_HEAD_INITIALIZER(pool_head);
69 
70 /* Private pool for page header structures */
71 struct pool phpool;
72 
73 struct pool_item_header {
74 	/* Page headers */
75 	LIST_ENTRY(pool_item_header)
76 				ph_pagelist;	/* pool page list */
77 	TAILQ_HEAD(,pool_item)	ph_itemlist;	/* chunk list for this page */
78 	SPLAY_ENTRY(pool_item_header)
79 				ph_node;	/* Off-page page headers */
80 	int			ph_nmissing;	/* # of chunks in use */
81 	caddr_t			ph_page;	/* this page's address */
82 };
83 
84 struct pool_item {
85 #ifdef DIAGNOSTIC
86 	int pi_magic;
87 #endif
88 #ifdef DEADBEEF1
89 #define	PI_MAGIC DEADBEEF1
90 #else
91 #define	PI_MAGIC 0xdeafbeef
92 #endif
93 	/* Other entries use only this list entry */
94 	TAILQ_ENTRY(pool_item)	pi_list;
95 };
96 
97 #define	POOL_NEEDS_CATCHUP(pp)						\
98 	((pp)->pr_nitems < (pp)->pr_minitems)
99 
100 /*
101  * Every pool gets a unique serial number assigned to it. If this counter
102  * wraps, we're screwed, but we shouldn't create so many pools anyway.
103  */
104 unsigned int pool_serial;
105 
106 int	 pool_catchup(struct pool *);
107 void	 pool_prime_page(struct pool *, caddr_t, struct pool_item_header *);
108 void	 pool_update_curpage(struct pool *);
109 void	*pool_do_get(struct pool *, int);
110 void	 pool_do_put(struct pool *, void *);
111 void	 pr_rmpage(struct pool *, struct pool_item_header *,
112 	    struct pool_pagelist *);
113 int	pool_chk_page(struct pool *, const char *, struct pool_item_header *);
114 struct pool_item_header *pool_alloc_item_header(struct pool *, caddr_t , int);
115 
116 void	*pool_allocator_alloc(struct pool *, int);
117 void	 pool_allocator_free(struct pool *, void *);
118 
119 #ifdef DDB
120 void	 pool_print_pagelist(struct pool_pagelist *,
121 	    int (*)(const char *, ...));
122 void	 pool_print1(struct pool *, const char *, int (*)(const char *, ...));
123 #endif
124 
125 #define pool_sleep(pl) msleep(pl, &pl->pr_mtx, PSWP, pl->pr_wchan, 0)
126 
127 static __inline int
128 phtree_compare(struct pool_item_header *a, struct pool_item_header *b)
129 {
130 	if (a->ph_page < b->ph_page)
131 		return (-1);
132 	else if (a->ph_page > b->ph_page)
133 		return (1);
134 	else
135 		return (0);
136 }
137 
138 SPLAY_PROTOTYPE(phtree, pool_item_header, ph_node, phtree_compare);
139 SPLAY_GENERATE(phtree, pool_item_header, ph_node, phtree_compare);
140 
141 /*
142  * Return the pool page header based on page address.
143  */
144 static __inline struct pool_item_header *
145 pr_find_pagehead(struct pool *pp, caddr_t page)
146 {
147 	struct pool_item_header *ph, tmp;
148 
149 	if ((pp->pr_roflags & PR_PHINPAGE) != 0)
150 		return ((struct pool_item_header *)(page + pp->pr_phoffset));
151 
152 	tmp.ph_page = page;
153 	ph = SPLAY_FIND(phtree, &pp->pr_phtree, &tmp);
154 	return ph;
155 }
156 
157 /*
158  * Remove a page from the pool.
159  */
160 void
161 pr_rmpage(struct pool *pp, struct pool_item_header *ph,
162      struct pool_pagelist *pq)
163 {
164 
165 	/*
166 	 * If the page was idle, decrement the idle page count.
167 	 */
168 	if (ph->ph_nmissing == 0) {
169 #ifdef DIAGNOSTIC
170 		if (pp->pr_nidle == 0)
171 			panic("pr_rmpage: nidle inconsistent");
172 		if (pp->pr_nitems < pp->pr_itemsperpage)
173 			panic("pr_rmpage: nitems inconsistent");
174 #endif
175 		pp->pr_nidle--;
176 	}
177 
178 	pp->pr_nitems -= pp->pr_itemsperpage;
179 
180 	/*
181 	 * Unlink a page from the pool and release it (or queue it for release).
182 	 */
183 	LIST_REMOVE(ph, ph_pagelist);
184 	if ((pp->pr_roflags & PR_PHINPAGE) == 0)
185 		SPLAY_REMOVE(phtree, &pp->pr_phtree, ph);
186 	if (pq) {
187 		LIST_INSERT_HEAD(pq, ph, ph_pagelist);
188 	} else {
189 		pool_allocator_free(pp, ph->ph_page);
190 		if ((pp->pr_roflags & PR_PHINPAGE) == 0)
191 			pool_put(&phpool, ph);
192 	}
193 	pp->pr_npages--;
194 	pp->pr_npagefree++;
195 
196 	pool_update_curpage(pp);
197 }
198 
199 /*
200  * Initialize the given pool resource structure.
201  *
202  * We export this routine to allow other kernel parts to declare
203  * static pools that must be initialized before malloc() is available.
204  */
205 void
206 pool_init(struct pool *pp, size_t size, u_int align, u_int ioff, int flags,
207     const char *wchan, struct pool_allocator *palloc)
208 {
209 	int off, slack;
210 
211 #ifdef MALLOC_DEBUG
212 	if ((flags & PR_DEBUG) && (ioff != 0 || align != 0))
213 		flags &= ~PR_DEBUG;
214 #endif
215 	/*
216 	 * Check arguments and construct default values.
217 	 */
218 	if (palloc == NULL)
219 		palloc = &pool_allocator_nointr;
220 	if (palloc->pa_pagesz == 0) {
221 		palloc->pa_pagesz = PAGE_SIZE;
222 		palloc->pa_pagemask = ~(palloc->pa_pagesz - 1);
223 		palloc->pa_pageshift = ffs(palloc->pa_pagesz) - 1;
224 	}
225 
226 	if (align == 0)
227 		align = ALIGN(1);
228 
229 	if (size < sizeof(struct pool_item))
230 		size = sizeof(struct pool_item);
231 
232 	size = roundup(size, align);
233 #ifdef DIAGNOSTIC
234 	if (size > palloc->pa_pagesz)
235 		panic("pool_init: pool item size (%lu) too large",
236 		      (u_long)size);
237 #endif
238 
239 	/*
240 	 * Initialize the pool structure.
241 	 */
242 	LIST_INIT(&pp->pr_emptypages);
243 	LIST_INIT(&pp->pr_fullpages);
244 	LIST_INIT(&pp->pr_partpages);
245 	pp->pr_curpage = NULL;
246 	pp->pr_npages = 0;
247 	pp->pr_minitems = 0;
248 	pp->pr_minpages = 0;
249 	pp->pr_maxpages = 8;
250 	pp->pr_roflags = flags;
251 	pp->pr_flags = 0;
252 	pp->pr_size = size;
253 	pp->pr_align = align;
254 	pp->pr_wchan = wchan;
255 	pp->pr_alloc = palloc;
256 	pp->pr_nitems = 0;
257 	pp->pr_nout = 0;
258 	pp->pr_hardlimit = UINT_MAX;
259 	pp->pr_hardlimit_warning = NULL;
260 	pp->pr_hardlimit_ratecap.tv_sec = 0;
261 	pp->pr_hardlimit_ratecap.tv_usec = 0;
262 	pp->pr_hardlimit_warning_last.tv_sec = 0;
263 	pp->pr_hardlimit_warning_last.tv_usec = 0;
264 	pp->pr_serial = ++pool_serial;
265 	if (pool_serial == 0)
266 		panic("pool_init: too much uptime");
267 
268 	/*
269 	 * Decide whether to put the page header off page to avoid
270 	 * wasting too large a part of the page. Off-page page headers
271 	 * go on a hash table, so we can match a returned item
272 	 * with its header based on the page address.
273 	 * We use 1/16 of the page size as the threshold (XXX: tune)
274 	 */
275 	if (pp->pr_size < palloc->pa_pagesz/16) {
276 		/* Use the end of the page for the page header */
277 		pp->pr_roflags |= PR_PHINPAGE;
278 		pp->pr_phoffset = off = palloc->pa_pagesz -
279 		    ALIGN(sizeof(struct pool_item_header));
280 	} else {
281 		/* The page header will be taken from our page header pool */
282 		pp->pr_phoffset = 0;
283 		off = palloc->pa_pagesz;
284 		SPLAY_INIT(&pp->pr_phtree);
285 	}
286 
287 	/*
288 	 * Alignment is to take place at `ioff' within the item. This means
289 	 * we must reserve up to `align - 1' bytes on the page to allow
290 	 * appropriate positioning of each item.
291 	 *
292 	 * Silently enforce `0 <= ioff < align'.
293 	 */
294 	pp->pr_itemoffset = ioff = ioff % align;
295 	pp->pr_itemsperpage = (off - ((align - ioff) % align)) / pp->pr_size;
296 	KASSERT(pp->pr_itemsperpage != 0);
297 
298 	/*
299 	 * Use the slack between the chunks and the page header
300 	 * for "cache coloring".
301 	 */
302 	slack = off - pp->pr_itemsperpage * pp->pr_size;
303 	pp->pr_maxcolor = (slack / align) * align;
304 	pp->pr_curcolor = 0;
305 
306 	pp->pr_nget = 0;
307 	pp->pr_nfail = 0;
308 	pp->pr_nput = 0;
309 	pp->pr_npagealloc = 0;
310 	pp->pr_npagefree = 0;
311 	pp->pr_hiwat = 0;
312 	pp->pr_nidle = 0;
313 
314 	pp->pr_ipl = -1;
315 	mtx_init(&pp->pr_mtx, IPL_NONE);
316 
317 	if (phpool.pr_size == 0) {
318 		pool_init(&phpool, sizeof(struct pool_item_header), 0, 0,
319 		    0, "phpool", NULL);
320 		pool_setipl(&phpool, IPL_HIGH);
321 	}
322 
323 	/* Insert this into the list of all pools. */
324 	TAILQ_INSERT_TAIL(&pool_head, pp, pr_poollist);
325 }
326 
327 void
328 pool_setipl(struct pool *pp, int ipl)
329 {
330 	pp->pr_ipl = ipl;
331 	mtx_init(&pp->pr_mtx, ipl);
332 }
333 
334 /*
335  * Decommission a pool resource.
336  */
337 void
338 pool_destroy(struct pool *pp)
339 {
340 	struct pool_item_header *ph;
341 
342 #ifdef DIAGNOSTIC
343 	if (pp->pr_nout != 0)
344 		panic("pool_destroy: pool busy: still out: %u", pp->pr_nout);
345 #endif
346 
347 	/* Remove all pages */
348 	while ((ph = LIST_FIRST(&pp->pr_emptypages)) != NULL)
349 		pr_rmpage(pp, ph, NULL);
350 	KASSERT(LIST_EMPTY(&pp->pr_fullpages));
351 	KASSERT(LIST_EMPTY(&pp->pr_partpages));
352 
353 	/* Remove from global pool list */
354 	TAILQ_REMOVE(&pool_head, pp, pr_poollist);
355 }
356 
357 struct pool_item_header *
358 pool_alloc_item_header(struct pool *pp, caddr_t storage, int flags)
359 {
360 	struct pool_item_header *ph;
361 
362 	if ((pp->pr_roflags & PR_PHINPAGE) != 0)
363 		ph = (struct pool_item_header *)(storage + pp->pr_phoffset);
364 	else {
365 		ph = pool_get(&phpool, flags);
366 	}
367 
368 	return (ph);
369 }
370 
371 /*
372  * Grab an item from the pool; must be called at appropriate spl level
373  */
374 void *
375 pool_get(struct pool *pp, int flags)
376 {
377 	void *v;
378 
379 	mtx_enter(&pp->pr_mtx);
380 	v = pool_do_get(pp, flags);
381 	mtx_leave(&pp->pr_mtx);
382 	if (v && pp->pr_ctor && pp->pr_ctor(pp->pr_arg, v, flags)) {
383 		mtx_enter(&pp->pr_mtx);
384 		pool_do_put(pp, v);
385 		mtx_leave(&pp->pr_mtx);
386 		v = NULL;
387 	}
388 	if (v)
389 		pp->pr_nget++;
390 	return (v);
391 }
392 
393 void *
394 pool_do_get(struct pool *pp, int flags)
395 {
396 	struct pool_item *pi;
397 	struct pool_item_header *ph;
398 	void *v;
399 
400 #ifdef DIAGNOSTIC
401 	if ((flags & PR_WAITOK) != 0)
402 		splassert(IPL_NONE);
403 	if (pp->pr_ipl != -1)
404 		splassert(pp->pr_ipl);
405 #endif /* DIAGNOSTIC */
406 
407 #ifdef MALLOC_DEBUG
408 	if (pp->pr_roflags & PR_DEBUG) {
409 		void *addr;
410 
411 		addr = NULL;
412 		debug_malloc(pp->pr_size, M_DEBUG,
413 		    (flags & PR_WAITOK) ? M_WAITOK : M_NOWAIT, &addr);
414 		return (addr);
415 	}
416 #endif
417 
418 startover:
419 	/*
420 	 * Check to see if we've reached the hard limit.  If we have,
421 	 * and we can wait, then wait until an item has been returned to
422 	 * the pool.
423 	 */
424 #ifdef DIAGNOSTIC
425 	if (__predict_false(pp->pr_nout > pp->pr_hardlimit))
426 		panic("pool_do_get: %s: crossed hard limit", pp->pr_wchan);
427 #endif
428 	if (__predict_false(pp->pr_nout == pp->pr_hardlimit)) {
429 		if ((flags & PR_WAITOK) && !(flags & PR_LIMITFAIL)) {
430 			/*
431 			 * XXX: A warning isn't logged in this case.  Should
432 			 * it be?
433 			 */
434 			pp->pr_flags |= PR_WANTED;
435 			pool_sleep(pp);
436 			goto startover;
437 		}
438 
439 		/*
440 		 * Log a message that the hard limit has been hit.
441 		 */
442 		if (pp->pr_hardlimit_warning != NULL &&
443 		    ratecheck(&pp->pr_hardlimit_warning_last,
444 			      &pp->pr_hardlimit_ratecap))
445 			log(LOG_ERR, "%s\n", pp->pr_hardlimit_warning);
446 
447 		pp->pr_nfail++;
448 		return (NULL);
449 	}
450 
451 	/*
452 	 * The convention we use is that if `curpage' is not NULL, then
453 	 * it points at a non-empty bucket. In particular, `curpage'
454 	 * never points at a page header which has PR_PHINPAGE set and
455 	 * has no items in its bucket.
456 	 */
457 	if ((ph = pp->pr_curpage) == NULL) {
458 #ifdef DIAGNOSTIC
459 		if (pp->pr_nitems != 0) {
460 			printf("pool_do_get: %s: curpage NULL, nitems %u\n",
461 			    pp->pr_wchan, pp->pr_nitems);
462 			panic("pool_do_get: nitems inconsistent");
463 		}
464 #endif
465 
466 		/*
467 		 * Call the back-end page allocator for more memory.
468 		 */
469 		v = pool_allocator_alloc(pp, flags);
470 		if (__predict_true(v != NULL))
471 			ph = pool_alloc_item_header(pp, v, flags);
472 
473 		if (__predict_false(v == NULL || ph == NULL)) {
474 			if (v != NULL)
475 				pool_allocator_free(pp, v);
476 
477 			if ((flags & PR_WAITOK) == 0) {
478 				pp->pr_nfail++;
479 				return (NULL);
480 			}
481 
482 			/*
483 			 * Wait for items to be returned to this pool.
484 			 *
485 			 * XXX: maybe we should wake up once a second and
486 			 * try again?
487 			 */
488 			pp->pr_flags |= PR_WANTED;
489 			pool_sleep(pp);
490 			goto startover;
491 		}
492 
493 		/* We have more memory; add it to the pool */
494 		pool_prime_page(pp, v, ph);
495 		pp->pr_npagealloc++;
496 
497 		/* Start the allocation process over. */
498 		goto startover;
499 	}
500 	if (__predict_false((v = pi = TAILQ_FIRST(&ph->ph_itemlist)) == NULL)) {
501 		panic("pool_do_get: %s: page empty", pp->pr_wchan);
502 	}
503 #ifdef DIAGNOSTIC
504 	if (__predict_false(pp->pr_nitems == 0)) {
505 		printf("pool_do_get: %s: items on itemlist, nitems %u\n",
506 		    pp->pr_wchan, pp->pr_nitems);
507 		panic("pool_do_get: nitems inconsistent");
508 	}
509 #endif
510 
511 #ifdef DIAGNOSTIC
512 	if (__predict_false(pi->pi_magic != PI_MAGIC)) {
513 		panic("pool_do_get(%s): free list modified: magic=%x; page %p;"
514 		       " item addr %p",
515 			pp->pr_wchan, pi->pi_magic, ph->ph_page, pi);
516 	}
517 #endif
518 
519 	/*
520 	 * Remove from item list.
521 	 */
522 	TAILQ_REMOVE(&ph->ph_itemlist, pi, pi_list);
523 	pp->pr_nitems--;
524 	pp->pr_nout++;
525 	if (ph->ph_nmissing == 0) {
526 #ifdef DIAGNOSTIC
527 		if (__predict_false(pp->pr_nidle == 0))
528 			panic("pool_do_get: nidle inconsistent");
529 #endif
530 		pp->pr_nidle--;
531 
532 		/*
533 		 * This page was previously empty.  Move it to the list of
534 		 * partially-full pages.  This page is already curpage.
535 		 */
536 		LIST_REMOVE(ph, ph_pagelist);
537 		LIST_INSERT_HEAD(&pp->pr_partpages, ph, ph_pagelist);
538 	}
539 	ph->ph_nmissing++;
540 	if (TAILQ_EMPTY(&ph->ph_itemlist)) {
541 #ifdef DIAGNOSTIC
542 		if (__predict_false(ph->ph_nmissing != pp->pr_itemsperpage)) {
543 			panic("pool_do_get: %s: nmissing inconsistent",
544 			    pp->pr_wchan);
545 		}
546 #endif
547 		/*
548 		 * This page is now full.  Move it to the full list
549 		 * and select a new current page.
550 		 */
551 		LIST_REMOVE(ph, ph_pagelist);
552 		LIST_INSERT_HEAD(&pp->pr_fullpages, ph, ph_pagelist);
553 		pool_update_curpage(pp);
554 	}
555 
556 	/*
557 	 * If we have a low water mark and we are now below that low
558 	 * water mark, add more items to the pool.
559 	 */
560 	if (POOL_NEEDS_CATCHUP(pp) && pool_catchup(pp) != 0) {
561 		/*
562 		 * XXX: Should we log a warning?  Should we set up a timeout
563 		 * to try again in a second or so?  The latter could break
564 		 * a caller's assumptions about interrupt protection, etc.
565 		 */
566 	}
567 	return (v);
568 }
569 
570 /*
571  * Return resource to the pool; must be called at appropriate spl level
572  */
573 void
574 pool_put(struct pool *pp, void *v)
575 {
576 	if (pp->pr_dtor)
577 		pp->pr_dtor(pp->pr_arg, v);
578 	mtx_enter(&pp->pr_mtx);
579 	pool_do_put(pp, v);
580 	mtx_leave(&pp->pr_mtx);
581 	pp->pr_nput++;
582 }
583 
584 /*
585  * Internal version of pool_put().
586  */
587 void
588 pool_do_put(struct pool *pp, void *v)
589 {
590 	struct pool_item *pi = v;
591 	struct pool_item_header *ph;
592 	caddr_t page;
593 
594 #ifdef MALLOC_DEBUG
595 	if (pp->pr_roflags & PR_DEBUG) {
596 		debug_free(v, M_DEBUG);
597 		return;
598 	}
599 #endif
600 
601 	page = (caddr_t)((vaddr_t)v & pp->pr_alloc->pa_pagemask);
602 
603 #ifdef DIAGNOSTIC
604 	if (pp->pr_ipl != -1)
605 		splassert(pp->pr_ipl);
606 
607 	if (__predict_false(pp->pr_nout == 0)) {
608 		printf("pool %s: putting with none out\n",
609 		    pp->pr_wchan);
610 		panic("pool_do_put");
611 	}
612 #endif
613 
614 	if (__predict_false((ph = pr_find_pagehead(pp, page)) == NULL)) {
615 		panic("pool_do_put: %s: page header missing", pp->pr_wchan);
616 	}
617 
618 	/*
619 	 * Return to item list.
620 	 */
621 #ifdef DIAGNOSTIC
622 	pi->pi_magic = PI_MAGIC;
623 #endif
624 #ifdef DEBUG
625 	{
626 		int i, *ip = v;
627 
628 		for (i = 0; i < pp->pr_size / sizeof(int); i++) {
629 			*ip++ = PI_MAGIC;
630 		}
631 	}
632 #endif
633 
634 	TAILQ_INSERT_HEAD(&ph->ph_itemlist, pi, pi_list);
635 	ph->ph_nmissing--;
636 	pp->pr_nitems++;
637 	pp->pr_nout--;
638 
639 	/* Cancel "pool empty" condition if it exists */
640 	if (pp->pr_curpage == NULL)
641 		pp->pr_curpage = ph;
642 
643 	if (pp->pr_flags & PR_WANTED) {
644 		pp->pr_flags &= ~PR_WANTED;
645 		if (ph->ph_nmissing == 0)
646 			pp->pr_nidle++;
647 		wakeup(pp);
648 		return;
649 	}
650 
651 	/*
652 	 * If this page is now empty, do one of two things:
653 	 *
654 	 *	(1) If we have more pages than the page high water mark,
655 	 *	    free the page back to the system.
656 	 *
657 	 *	(2) Otherwise, move the page to the empty page list.
658 	 *
659 	 * Either way, select a new current page (so we use a partially-full
660 	 * page if one is available).
661 	 */
662 	if (ph->ph_nmissing == 0) {
663 		pp->pr_nidle++;
664 		if (pp->pr_nidle > pp->pr_maxpages) {
665 			pr_rmpage(pp, ph, NULL);
666 		} else {
667 			LIST_REMOVE(ph, ph_pagelist);
668 			LIST_INSERT_HEAD(&pp->pr_emptypages, ph, ph_pagelist);
669 		}
670 		pool_update_curpage(pp);
671 	}
672 
673 	/*
674 	 * If the page was previously completely full, move it to the
675 	 * partially-full list and make it the current page.  The next
676 	 * allocation will get the item from this page, instead of
677 	 * further fragmenting the pool.
678 	 */
679 	else if (ph->ph_nmissing == (pp->pr_itemsperpage - 1)) {
680 		LIST_REMOVE(ph, ph_pagelist);
681 		LIST_INSERT_HEAD(&pp->pr_partpages, ph, ph_pagelist);
682 		pp->pr_curpage = ph;
683 	}
684 }
685 
686 /*
687  * Add N items to the pool.
688  */
689 int
690 pool_prime(struct pool *pp, int n)
691 {
692 	struct pool_item_header *ph;
693 	caddr_t cp;
694 	int newpages;
695 
696 	mtx_enter(&pp->pr_mtx);
697 	newpages = roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage;
698 
699 	while (newpages-- > 0) {
700 		cp = pool_allocator_alloc(pp, PR_NOWAIT);
701 		if (__predict_true(cp != NULL))
702 			ph = pool_alloc_item_header(pp, cp, PR_NOWAIT);
703 		if (__predict_false(cp == NULL || ph == NULL)) {
704 			if (cp != NULL)
705 				pool_allocator_free(pp, cp);
706 			break;
707 		}
708 
709 		pool_prime_page(pp, cp, ph);
710 		pp->pr_npagealloc++;
711 		pp->pr_minpages++;
712 	}
713 
714 	if (pp->pr_minpages >= pp->pr_maxpages)
715 		pp->pr_maxpages = pp->pr_minpages + 1;	/* XXX */
716 
717 	mtx_leave(&pp->pr_mtx);
718 	return (0);
719 }
720 
721 /*
722  * Add a page worth of items to the pool.
723  *
724  * Note, we must be called with the pool descriptor LOCKED.
725  */
726 void
727 pool_prime_page(struct pool *pp, caddr_t storage, struct pool_item_header *ph)
728 {
729 	struct pool_item *pi;
730 	caddr_t cp = storage;
731 	unsigned int align = pp->pr_align;
732 	unsigned int ioff = pp->pr_itemoffset;
733 	int n;
734 
735 #ifdef DIAGNOSTIC
736 	if (((u_long)cp & (pp->pr_alloc->pa_pagesz - 1)) != 0)
737 		panic("pool_prime_page: %s: unaligned page", pp->pr_wchan);
738 #endif
739 
740 	/*
741 	 * Insert page header.
742 	 */
743 	LIST_INSERT_HEAD(&pp->pr_emptypages, ph, ph_pagelist);
744 	TAILQ_INIT(&ph->ph_itemlist);
745 	ph->ph_page = storage;
746 	ph->ph_nmissing = 0;
747 	if ((pp->pr_roflags & PR_PHINPAGE) == 0)
748 		SPLAY_INSERT(phtree, &pp->pr_phtree, ph);
749 
750 	pp->pr_nidle++;
751 
752 	/*
753 	 * Color this page.
754 	 */
755 	cp = (caddr_t)(cp + pp->pr_curcolor);
756 	if ((pp->pr_curcolor += align) > pp->pr_maxcolor)
757 		pp->pr_curcolor = 0;
758 
759 	/*
760 	 * Adjust storage to apply aligment to `pr_itemoffset' in each item.
761 	 */
762 	if (ioff != 0)
763 		cp = (caddr_t)(cp + (align - ioff));
764 
765 	/*
766 	 * Insert remaining chunks on the bucket list.
767 	 */
768 	n = pp->pr_itemsperpage;
769 	pp->pr_nitems += n;
770 
771 	while (n--) {
772 		pi = (struct pool_item *)cp;
773 
774 		KASSERT(((((vaddr_t)pi) + ioff) & (align - 1)) == 0);
775 
776 		/* Insert on page list */
777 		TAILQ_INSERT_TAIL(&ph->ph_itemlist, pi, pi_list);
778 #ifdef DIAGNOSTIC
779 		pi->pi_magic = PI_MAGIC;
780 #endif
781 		cp = (caddr_t)(cp + pp->pr_size);
782 	}
783 
784 	/*
785 	 * If the pool was depleted, point at the new page.
786 	 */
787 	if (pp->pr_curpage == NULL)
788 		pp->pr_curpage = ph;
789 
790 	if (++pp->pr_npages > pp->pr_hiwat)
791 		pp->pr_hiwat = pp->pr_npages;
792 }
793 
794 /*
795  * Used by pool_get() when nitems drops below the low water mark.  This
796  * is used to catch up pr_nitems with the low water mark.
797  *
798  * Note we never wait for memory here, we let the caller decide what to do.
799  */
800 int
801 pool_catchup(struct pool *pp)
802 {
803 	struct pool_item_header *ph;
804 	caddr_t cp;
805 	int error = 0;
806 
807 	while (POOL_NEEDS_CATCHUP(pp)) {
808 		/*
809 		 * Call the page back-end allocator for more memory.
810 		 */
811 		cp = pool_allocator_alloc(pp, PR_NOWAIT);
812 		if (__predict_true(cp != NULL))
813 			ph = pool_alloc_item_header(pp, cp, PR_NOWAIT);
814 		if (__predict_false(cp == NULL || ph == NULL)) {
815 			if (cp != NULL)
816 				pool_allocator_free(pp, cp);
817 			error = ENOMEM;
818 			break;
819 		}
820 		pool_prime_page(pp, cp, ph);
821 		pp->pr_npagealloc++;
822 	}
823 
824 	return (error);
825 }
826 
827 void
828 pool_update_curpage(struct pool *pp)
829 {
830 
831 	pp->pr_curpage = LIST_FIRST(&pp->pr_partpages);
832 	if (pp->pr_curpage == NULL) {
833 		pp->pr_curpage = LIST_FIRST(&pp->pr_emptypages);
834 	}
835 }
836 
837 void
838 pool_setlowat(struct pool *pp, int n)
839 {
840 
841 	pp->pr_minitems = n;
842 	pp->pr_minpages = (n == 0)
843 		? 0
844 		: roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage;
845 
846 	mtx_enter(&pp->pr_mtx);
847 	/* Make sure we're caught up with the newly-set low water mark. */
848 	if (POOL_NEEDS_CATCHUP(pp) && pool_catchup(pp) != 0) {
849 		/*
850 		 * XXX: Should we log a warning?  Should we set up a timeout
851 		 * to try again in a second or so?  The latter could break
852 		 * a caller's assumptions about interrupt protection, etc.
853 		 */
854 	}
855 	mtx_leave(&pp->pr_mtx);
856 }
857 
858 void
859 pool_sethiwat(struct pool *pp, int n)
860 {
861 
862 	pp->pr_maxpages = (n == 0)
863 		? 0
864 		: roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage;
865 }
866 
867 int
868 pool_sethardlimit(struct pool *pp, unsigned n, const char *warnmess, int ratecap)
869 {
870 	int error = 0;
871 
872 	if (n < pp->pr_nout) {
873 		error = EINVAL;
874 		goto done;
875 	}
876 
877 	pp->pr_hardlimit = n;
878 	pp->pr_hardlimit_warning = warnmess;
879 	pp->pr_hardlimit_ratecap.tv_sec = ratecap;
880 	pp->pr_hardlimit_warning_last.tv_sec = 0;
881 	pp->pr_hardlimit_warning_last.tv_usec = 0;
882 
883 	/*
884 	 * In-line version of pool_sethiwat().
885 	 */
886 	pp->pr_maxpages = (n == 0 || n == UINT_MAX)
887 		? n
888 		: roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage;
889 
890 done:
891 	return (error);
892 }
893 
894 void
895 pool_set_ctordtor(struct pool *pp, int (*ctor)(void *, void *, int),
896     void (*dtor)(void *, void *), void *arg)
897 {
898 	pp->pr_ctor = ctor;
899 	pp->pr_dtor = dtor;
900 	pp->pr_arg = arg;
901 }
902 /*
903  * Release all complete pages that have not been used recently.
904  *
905  * Returns non-zero if any pages have been reclaimed.
906  */
907 int
908 pool_reclaim(struct pool *pp)
909 {
910 	struct pool_item_header *ph, *phnext;
911 	struct pool_pagelist pq;
912 
913 	LIST_INIT(&pq);
914 
915 	mtx_enter(&pp->pr_mtx);
916 	for (ph = LIST_FIRST(&pp->pr_emptypages); ph != NULL; ph = phnext) {
917 		phnext = LIST_NEXT(ph, ph_pagelist);
918 
919 		/* Check our minimum page claim */
920 		if (pp->pr_npages <= pp->pr_minpages)
921 			break;
922 
923 		KASSERT(ph->ph_nmissing == 0);
924 
925 		/*
926 		 * If freeing this page would put us below
927 		 * the low water mark, stop now.
928 		 */
929 		if ((pp->pr_nitems - pp->pr_itemsperpage) <
930 		    pp->pr_minitems)
931 			break;
932 
933 		pr_rmpage(pp, ph, &pq);
934 	}
935 	mtx_leave(&pp->pr_mtx);
936 
937 	if (LIST_EMPTY(&pq))
938 		return (0);
939 	while ((ph = LIST_FIRST(&pq)) != NULL) {
940 		LIST_REMOVE(ph, ph_pagelist);
941 		pool_allocator_free(pp, ph->ph_page);
942 		if (pp->pr_roflags & PR_PHINPAGE)
943 			continue;
944 		pool_put(&phpool, ph);
945 	}
946 
947 	return (1);
948 }
949 
950 #ifdef DDB
951 #include <machine/db_machdep.h>
952 #include <ddb/db_interface.h>
953 #include <ddb/db_output.h>
954 
955 /*
956  * Diagnostic helpers.
957  */
958 void
959 pool_printit(struct pool *pp, const char *modif, int (*pr)(const char *, ...))
960 {
961 	pool_print1(pp, modif, pr);
962 }
963 
964 void
965 pool_print_pagelist(struct pool_pagelist *pl, int (*pr)(const char *, ...))
966 {
967 	struct pool_item_header *ph;
968 #ifdef DIAGNOSTIC
969 	struct pool_item *pi;
970 #endif
971 
972 	LIST_FOREACH(ph, pl, ph_pagelist) {
973 		(*pr)("\t\tpage %p, nmissing %d\n",
974 		    ph->ph_page, ph->ph_nmissing);
975 #ifdef DIAGNOSTIC
976 		TAILQ_FOREACH(pi, &ph->ph_itemlist, pi_list) {
977 			if (pi->pi_magic != PI_MAGIC) {
978 				(*pr)("\t\t\titem %p, magic 0x%x\n",
979 				    pi, pi->pi_magic);
980 			}
981 		}
982 #endif
983 	}
984 }
985 
986 void
987 pool_print1(struct pool *pp, const char *modif, int (*pr)(const char *, ...))
988 {
989 	struct pool_item_header *ph;
990 	int print_pagelist = 0;
991 	char c;
992 
993 	while ((c = *modif++) != '\0') {
994 		if (c == 'p')
995 			print_pagelist = 1;
996 		modif++;
997 	}
998 
999 	(*pr)("POOL %s: size %u, align %u, ioff %u, roflags 0x%08x\n",
1000 	    pp->pr_wchan, pp->pr_size, pp->pr_align, pp->pr_itemoffset,
1001 	    pp->pr_roflags);
1002 	(*pr)("\talloc %p\n", pp->pr_alloc);
1003 	(*pr)("\tminitems %u, minpages %u, maxpages %u, npages %u\n",
1004 	    pp->pr_minitems, pp->pr_minpages, pp->pr_maxpages, pp->pr_npages);
1005 	(*pr)("\titemsperpage %u, nitems %u, nout %u, hardlimit %u\n",
1006 	    pp->pr_itemsperpage, pp->pr_nitems, pp->pr_nout, pp->pr_hardlimit);
1007 
1008 	(*pr)("\n\tnget %lu, nfail %lu, nput %lu\n",
1009 	    pp->pr_nget, pp->pr_nfail, pp->pr_nput);
1010 	(*pr)("\tnpagealloc %lu, npagefree %lu, hiwat %u, nidle %lu\n",
1011 	    pp->pr_npagealloc, pp->pr_npagefree, pp->pr_hiwat, pp->pr_nidle);
1012 
1013 	if (print_pagelist == 0)
1014 		return;
1015 
1016 	if ((ph = LIST_FIRST(&pp->pr_emptypages)) != NULL)
1017 		(*pr)("\n\tempty page list:\n");
1018 	pool_print_pagelist(&pp->pr_emptypages, pr);
1019 	if ((ph = LIST_FIRST(&pp->pr_fullpages)) != NULL)
1020 		(*pr)("\n\tfull page list:\n");
1021 	pool_print_pagelist(&pp->pr_fullpages, pr);
1022 	if ((ph = LIST_FIRST(&pp->pr_partpages)) != NULL)
1023 		(*pr)("\n\tpartial-page list:\n");
1024 	pool_print_pagelist(&pp->pr_partpages, pr);
1025 
1026 	if (pp->pr_curpage == NULL)
1027 		(*pr)("\tno current page\n");
1028 	else
1029 		(*pr)("\tcurpage %p\n", pp->pr_curpage->ph_page);
1030 }
1031 
1032 void
1033 db_show_all_pools(db_expr_t expr, int haddr, db_expr_t count, char *modif)
1034 {
1035 	struct pool *pp;
1036 	char maxp[16];
1037 	int ovflw;
1038 	char mode;
1039 
1040 	mode = modif[0];
1041 	if (mode != '\0' && mode != 'a') {
1042 		db_printf("usage: show all pools [/a]\n");
1043 		return;
1044 	}
1045 
1046 	if (mode == '\0')
1047 		db_printf("%-10s%4s%9s%5s%9s%6s%6s%6s%6s%6s%6s%5s\n",
1048 		    "Name",
1049 		    "Size",
1050 		    "Requests",
1051 		    "Fail",
1052 		    "Releases",
1053 		    "Pgreq",
1054 		    "Pgrel",
1055 		    "Npage",
1056 		    "Hiwat",
1057 		    "Minpg",
1058 		    "Maxpg",
1059 		    "Idle");
1060 	else
1061 		db_printf("%-10s %18s %18s\n",
1062 		    "Name", "Address", "Allocator");
1063 
1064 	TAILQ_FOREACH(pp, &pool_head, pr_poollist) {
1065 		if (mode == 'a') {
1066 			db_printf("%-10s %18p %18p\n", pp->pr_wchan, pp,
1067 			    pp->pr_alloc);
1068 			continue;
1069 		}
1070 
1071 		if (!pp->pr_nget)
1072 			continue;
1073 
1074 		if (pp->pr_maxpages == UINT_MAX)
1075 			snprintf(maxp, sizeof maxp, "inf");
1076 		else
1077 			snprintf(maxp, sizeof maxp, "%u", pp->pr_maxpages);
1078 
1079 #define PRWORD(ovflw, fmt, width, fixed, val) do {	\
1080 	(ovflw) += db_printf((fmt),			\
1081 	    (width) - (fixed) - (ovflw) > 0 ?		\
1082 	    (width) - (fixed) - (ovflw) : 0,		\
1083 	    (val)) - (width);				\
1084 	if ((ovflw) < 0)				\
1085 		(ovflw) = 0;				\
1086 } while (/* CONSTCOND */0)
1087 
1088 		ovflw = 0;
1089 		PRWORD(ovflw, "%-*s", 10, 0, pp->pr_wchan);
1090 		PRWORD(ovflw, " %*u", 4, 1, pp->pr_size);
1091 		PRWORD(ovflw, " %*lu", 9, 1, pp->pr_nget);
1092 		PRWORD(ovflw, " %*lu", 5, 1, pp->pr_nfail);
1093 		PRWORD(ovflw, " %*lu", 9, 1, pp->pr_nput);
1094 		PRWORD(ovflw, " %*lu", 6, 1, pp->pr_npagealloc);
1095 		PRWORD(ovflw, " %*lu", 6, 1, pp->pr_npagefree);
1096 		PRWORD(ovflw, " %*d", 6, 1, pp->pr_npages);
1097 		PRWORD(ovflw, " %*d", 6, 1, pp->pr_hiwat);
1098 		PRWORD(ovflw, " %*d", 6, 1, pp->pr_minpages);
1099 		PRWORD(ovflw, " %*s", 6, 1, maxp);
1100 		PRWORD(ovflw, " %*lu\n", 5, 1, pp->pr_nidle);
1101 	}
1102 }
1103 
1104 int
1105 pool_chk_page(struct pool *pp, const char *label, struct pool_item_header *ph)
1106 {
1107 	struct pool_item *pi;
1108 	caddr_t page;
1109 	int n;
1110 
1111 	page = (caddr_t)((u_long)ph & pp->pr_alloc->pa_pagemask);
1112 	if (page != ph->ph_page &&
1113 	    (pp->pr_roflags & PR_PHINPAGE) != 0) {
1114 		if (label != NULL)
1115 			printf("%s: ", label);
1116 		printf("pool(%p:%s): page inconsistency: page %p;"
1117 		       " at page head addr %p (p %p)\n", pp,
1118 			pp->pr_wchan, ph->ph_page,
1119 			ph, page);
1120 		return 1;
1121 	}
1122 
1123 	for (pi = TAILQ_FIRST(&ph->ph_itemlist), n = 0;
1124 	     pi != NULL;
1125 	     pi = TAILQ_NEXT(pi,pi_list), n++) {
1126 
1127 #ifdef DIAGNOSTIC
1128 		if (pi->pi_magic != PI_MAGIC) {
1129 			if (label != NULL)
1130 				printf("%s: ", label);
1131 			printf("pool(%s): free list modified: magic=%x;"
1132 			       " page %p; item ordinal %d;"
1133 			       " addr %p (p %p)\n",
1134 				pp->pr_wchan, pi->pi_magic, ph->ph_page,
1135 				n, pi, page);
1136 			panic("pool");
1137 		}
1138 #endif
1139 		page =
1140 		    (caddr_t)((u_long)pi & pp->pr_alloc->pa_pagemask);
1141 		if (page == ph->ph_page)
1142 			continue;
1143 
1144 		if (label != NULL)
1145 			printf("%s: ", label);
1146 		printf("pool(%p:%s): page inconsistency: page %p;"
1147 		       " item ordinal %d; addr %p (p %p)\n", pp,
1148 			pp->pr_wchan, ph->ph_page,
1149 			n, pi, page);
1150 		return 1;
1151 	}
1152 	return 0;
1153 }
1154 
1155 int
1156 pool_chk(struct pool *pp, const char *label)
1157 {
1158 	struct pool_item_header *ph;
1159 	int r = 0;
1160 
1161 	LIST_FOREACH(ph, &pp->pr_emptypages, ph_pagelist) {
1162 		r = pool_chk_page(pp, label, ph);
1163 		if (r) {
1164 			goto out;
1165 		}
1166 	}
1167 	LIST_FOREACH(ph, &pp->pr_fullpages, ph_pagelist) {
1168 		r = pool_chk_page(pp, label, ph);
1169 		if (r) {
1170 			goto out;
1171 		}
1172 	}
1173 	LIST_FOREACH(ph, &pp->pr_partpages, ph_pagelist) {
1174 		r = pool_chk_page(pp, label, ph);
1175 		if (r) {
1176 			goto out;
1177 		}
1178 	}
1179 
1180 out:
1181 	return (r);
1182 }
1183 #endif
1184 
1185 /*
1186  * We have three different sysctls.
1187  * kern.pool.npools - the number of pools.
1188  * kern.pool.pool.<pool#> - the pool struct for the pool#.
1189  * kern.pool.name.<pool#> - the name for pool#.
1190  */
1191 int
1192 sysctl_dopool(int *name, u_int namelen, char *where, size_t *sizep)
1193 {
1194 	struct pool *pp, *foundpool = NULL;
1195 	size_t buflen = where != NULL ? *sizep : 0;
1196 	int npools = 0, s;
1197 	unsigned int lookfor;
1198 	size_t len;
1199 
1200 	switch (*name) {
1201 	case KERN_POOL_NPOOLS:
1202 		if (namelen != 1 || buflen != sizeof(int))
1203 			return (EINVAL);
1204 		lookfor = 0;
1205 		break;
1206 	case KERN_POOL_NAME:
1207 		if (namelen != 2 || buflen < 1)
1208 			return (EINVAL);
1209 		lookfor = name[1];
1210 		break;
1211 	case KERN_POOL_POOL:
1212 		if (namelen != 2 || buflen != sizeof(struct pool))
1213 			return (EINVAL);
1214 		lookfor = name[1];
1215 		break;
1216 	default:
1217 		return (EINVAL);
1218 	}
1219 
1220 	s = splvm();
1221 
1222 	TAILQ_FOREACH(pp, &pool_head, pr_poollist) {
1223 		npools++;
1224 		if (lookfor == pp->pr_serial) {
1225 			foundpool = pp;
1226 			break;
1227 		}
1228 	}
1229 
1230 	splx(s);
1231 
1232 	if (*name != KERN_POOL_NPOOLS && foundpool == NULL)
1233 		return (ENOENT);
1234 
1235 	switch (*name) {
1236 	case KERN_POOL_NPOOLS:
1237 		return copyout(&npools, where, buflen);
1238 	case KERN_POOL_NAME:
1239 		len = strlen(foundpool->pr_wchan) + 1;
1240 		if (*sizep < len)
1241 			return (ENOMEM);
1242 		*sizep = len;
1243 		return copyout(foundpool->pr_wchan, where, len);
1244 	case KERN_POOL_POOL:
1245 		return copyout(foundpool, where, buflen);
1246 	}
1247 	/* NOTREACHED */
1248 	return (0); /* XXX - Stupid gcc */
1249 }
1250 
1251 /*
1252  * Pool backend allocators.
1253  *
1254  * Each pool has a backend allocator that handles allocation, deallocation
1255  */
1256 void	*pool_page_alloc_oldnointr(struct pool *, int);
1257 void	pool_page_free_oldnointr(struct pool *, void *);
1258 void	*pool_page_alloc(struct pool *, int);
1259 void	pool_page_free(struct pool *, void *);
1260 
1261 /* previous nointr.  handles large allocations safely */
1262 struct pool_allocator pool_allocator_oldnointr = {
1263 	pool_page_alloc_oldnointr, pool_page_free_oldnointr, 0,
1264 };
1265 /* safe for interrupts, name preserved for compat
1266  * this is the default allocator */
1267 struct pool_allocator pool_allocator_nointr = {
1268 	pool_page_alloc, pool_page_free, 0,
1269 };
1270 
1271 /*
1272  * XXX - we have at least three different resources for the same allocation
1273  *  and each resource can be depleted. First we have the ready elements in
1274  *  the pool. Then we have the resource (typically a vm_map) for this
1275  *  allocator, then we have physical memory. Waiting for any of these can
1276  *  be unnecessary when any other is freed, but the kernel doesn't support
1277  *  sleeping on multiple addresses, so we have to fake. The caller sleeps on
1278  *  the pool (so that we can be awakened when an item is returned to the pool),
1279  *  but we set PA_WANT on the allocator. When a page is returned to
1280  *  the allocator and PA_WANT is set pool_allocator_free will wakeup all
1281  *  sleeping pools belonging to this allocator. (XXX - thundering herd).
1282  *  We also wake up the allocator in case someone without a pool (malloc)
1283  *  is sleeping waiting for this allocator.
1284  */
1285 
1286 void *
1287 pool_allocator_alloc(struct pool *pp, int flags)
1288 {
1289 	boolean_t waitok = (flags & PR_WAITOK) ? TRUE : FALSE;
1290 	void *v;
1291 
1292 	if (waitok)
1293 		mtx_leave(&pp->pr_mtx);
1294 	v = pp->pr_alloc->pa_alloc(pp, flags);
1295 	if (waitok)
1296 		mtx_enter(&pp->pr_mtx);
1297 
1298 	return (v);
1299 }
1300 
1301 void
1302 pool_allocator_free(struct pool *pp, void *v)
1303 {
1304 	struct pool_allocator *pa = pp->pr_alloc;
1305 
1306 	(*pa->pa_free)(pp, v);
1307 }
1308 
1309 void *
1310 pool_page_alloc(struct pool *pp, int flags)
1311 {
1312 	boolean_t waitok = (flags & PR_WAITOK) ? TRUE : FALSE;
1313 
1314 	return (uvm_km_getpage(waitok));
1315 }
1316 
1317 void
1318 pool_page_free(struct pool *pp, void *v)
1319 {
1320 
1321 	uvm_km_putpage(v);
1322 }
1323 
1324 void *
1325 pool_page_alloc_oldnointr(struct pool *pp, int flags)
1326 {
1327 	boolean_t waitok = (flags & PR_WAITOK) ? TRUE : FALSE;
1328 
1329 	splassert(IPL_NONE);
1330 
1331 	return ((void *)uvm_km_alloc_poolpage1(kernel_map, uvm.kernel_object,
1332 	    waitok));
1333 }
1334 
1335 void
1336 pool_page_free_oldnointr(struct pool *pp, void *v)
1337 {
1338 	splassert(IPL_NONE);
1339 
1340 	uvm_km_free_poolpage1(kernel_map, (vaddr_t)v);
1341 }
1342