xref: /openbsd-src/sys/kern/subr_pool.c (revision 5b859c19fe53bbea08f5c342e0a4470e99f883e1)
1 /*	$OpenBSD: subr_pool.c,v 1.168 2014/11/18 02:37:31 tedu Exp $	*/
2 /*	$NetBSD: subr_pool.c,v 1.61 2001/09/26 07:14:56 chs Exp $	*/
3 
4 /*-
5  * Copyright (c) 1997, 1999, 2000 The NetBSD Foundation, Inc.
6  * All rights reserved.
7  *
8  * This code is derived from software contributed to The NetBSD Foundation
9  * by Paul Kranenburg; by Jason R. Thorpe of the Numerical Aerospace
10  * Simulation Facility, NASA Ames Research Center.
11  *
12  * Redistribution and use in source and binary forms, with or without
13  * modification, are permitted provided that the following conditions
14  * are met:
15  * 1. Redistributions of source code must retain the above copyright
16  *    notice, this list of conditions and the following disclaimer.
17  * 2. Redistributions in binary form must reproduce the above copyright
18  *    notice, this list of conditions and the following disclaimer in the
19  *    documentation and/or other materials provided with the distribution.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
23  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
24  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
25  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31  * POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/errno.h>
37 #include <sys/kernel.h>
38 #include <sys/malloc.h>
39 #include <sys/pool.h>
40 #include <sys/syslog.h>
41 #include <sys/rwlock.h>
42 #include <sys/sysctl.h>
43 
44 #include <uvm/uvm_extern.h>
45 
46 /*
47  * Pool resource management utility.
48  *
49  * Memory is allocated in pages which are split into pieces according to
50  * the pool item size. Each page is kept on one of three lists in the
51  * pool structure: `pr_emptypages', `pr_fullpages' and `pr_partpages',
52  * for empty, full and partially-full pages respectively. The individual
53  * pool items are on a linked list headed by `ph_itemlist' in each page
54  * header. The memory for building the page list is either taken from
55  * the allocated pages themselves (for small pool items) or taken from
56  * an internal pool of page headers (`phpool').
57  */
58 
59 /* List of all pools */
60 SIMPLEQ_HEAD(,pool) pool_head = SIMPLEQ_HEAD_INITIALIZER(pool_head);
61 
62 /*
63  * Every pool gets a unique serial number assigned to it. If this counter
64  * wraps, we're screwed, but we shouldn't create so many pools anyway.
65  */
66 unsigned int pool_serial;
67 unsigned int pool_count;
68 
69 /* Lock the previous variables making up the global pool state */
70 struct rwlock pool_lock = RWLOCK_INITIALIZER("pools");
71 
72 /* Private pool for page header structures */
73 struct pool phpool;
74 
75 struct pool_item_header {
76 	/* Page headers */
77 	LIST_ENTRY(pool_item_header)
78 				ph_pagelist;	/* pool page list */
79 	XSIMPLEQ_HEAD(,pool_item) ph_itemlist;	/* chunk list for this page */
80 	RB_ENTRY(pool_item_header)
81 				ph_node;	/* Off-page page headers */
82 	int			ph_nmissing;	/* # of chunks in use */
83 	caddr_t			ph_page;	/* this page's address */
84 	u_long			ph_magic;
85 };
86 #define POOL_MAGICBIT (1 << 3) /* keep away from perturbed low bits */
87 #define POOL_PHPOISON(ph) ISSET((ph)->ph_magic, POOL_MAGICBIT)
88 
89 struct pool_item {
90 	u_long				pi_magic;
91 	XSIMPLEQ_ENTRY(pool_item)	pi_list;
92 };
93 #define POOL_IMAGIC(ph, pi) ((u_long)(pi) ^ (ph)->ph_magic)
94 
95 #ifdef POOL_DEBUG
96 int	pool_debug = 1;
97 #else
98 int	pool_debug = 0;
99 #endif
100 
101 #define	POOL_NEEDS_CATCHUP(pp)						\
102 	((pp)->pr_nitems < (pp)->pr_minitems)
103 
104 #define POOL_INPGHDR(pp) ((pp)->pr_phoffset != 0)
105 
106 struct pool_item_header *
107 	 pool_p_alloc(struct pool *, int, int *);
108 void	 pool_p_insert(struct pool *, struct pool_item_header *);
109 void	 pool_p_remove(struct pool *, struct pool_item_header *);
110 void	 pool_p_free(struct pool *, struct pool_item_header *);
111 
112 void	 pool_update_curpage(struct pool *);
113 void	*pool_do_get(struct pool *, int, int *);
114 int	 pool_chk_page(struct pool *, struct pool_item_header *, int);
115 int	 pool_chk(struct pool *);
116 void	 pool_get_done(void *, void *);
117 void	 pool_runqueue(struct pool *, int);
118 
119 void	*pool_allocator_alloc(struct pool *, int, int *);
120 void	 pool_allocator_free(struct pool *, void *);
121 
122 /*
123  * The default pool allocator.
124  */
125 void	*pool_page_alloc(struct pool *, int, int *);
126 void	pool_page_free(struct pool *, void *);
127 
128 /*
129  * safe for interrupts, name preserved for compat this is the default
130  * allocator
131  */
132 struct pool_allocator pool_allocator_nointr = {
133 	pool_page_alloc,
134 	pool_page_free
135 };
136 
137 void	*pool_large_alloc(struct pool *, int, int *);
138 void	pool_large_free(struct pool *, void *);
139 
140 struct pool_allocator pool_allocator_large = {
141 	pool_large_alloc,
142 	pool_large_free
143 };
144 
145 void	*pool_large_alloc_ni(struct pool *, int, int *);
146 void	pool_large_free_ni(struct pool *, void *);
147 
148 struct pool_allocator pool_allocator_large_ni = {
149 	pool_large_alloc_ni,
150 	pool_large_free_ni
151 };
152 
153 #ifdef DDB
154 void	 pool_print_pagelist(struct pool_pagelist *, int (*)(const char *, ...)
155 	     __attribute__((__format__(__kprintf__,1,2))));
156 void	 pool_print1(struct pool *, const char *, int (*)(const char *, ...)
157 	     __attribute__((__format__(__kprintf__,1,2))));
158 #endif
159 
160 #define pool_sleep(pl) msleep(pl, &pl->pr_mtx, PSWP, pl->pr_wchan, 0)
161 
162 static inline int
163 phtree_compare(struct pool_item_header *a, struct pool_item_header *b)
164 {
165 	vaddr_t va = (vaddr_t)a->ph_page;
166 	vaddr_t vb = (vaddr_t)b->ph_page;
167 
168 	/* the compares in this order are important for the NFIND to work */
169 	if (vb < va)
170 		return (-1);
171 	if (vb > va)
172 		return (1);
173 
174 	return (0);
175 }
176 
177 RB_PROTOTYPE(phtree, pool_item_header, ph_node, phtree_compare);
178 RB_GENERATE(phtree, pool_item_header, ph_node, phtree_compare);
179 
180 /*
181  * Return the pool page header based on page address.
182  */
183 static inline struct pool_item_header *
184 pr_find_pagehead(struct pool *pp, void *v)
185 {
186 	struct pool_item_header *ph, key;
187 
188 	if (POOL_INPGHDR(pp)) {
189 		caddr_t page;
190 
191 		page = (caddr_t)((vaddr_t)v & pp->pr_pgmask);
192 
193 		return ((struct pool_item_header *)(page + pp->pr_phoffset));
194 	}
195 
196 	key.ph_page = v;
197 	ph = RB_NFIND(phtree, &pp->pr_phtree, &key);
198 	if (ph == NULL)
199 		panic("%s: %s: page header missing", __func__, pp->pr_wchan);
200 
201 	KASSERT(ph->ph_page <= (caddr_t)v);
202 	if (ph->ph_page + pp->pr_pgsize <= (caddr_t)v)
203 		panic("%s: %s: incorrect page", __func__, pp->pr_wchan);
204 
205 	return (ph);
206 }
207 
208 /*
209  * Initialize the given pool resource structure.
210  *
211  * We export this routine to allow other kernel parts to declare
212  * static pools that must be initialized before malloc() is available.
213  */
214 void
215 pool_init(struct pool *pp, size_t size, u_int align, u_int ioff, int flags,
216     const char *wchan, struct pool_allocator *palloc)
217 {
218 	int off = 0;
219 	unsigned int pgsize = PAGE_SIZE, items;
220 #ifdef DIAGNOSTIC
221 	struct pool *iter;
222 	KASSERT(ioff == 0);
223 #endif
224 
225 	if (align == 0)
226 		align = ALIGN(1);
227 
228 	if (size < sizeof(struct pool_item))
229 		size = sizeof(struct pool_item);
230 
231 	size = roundup(size, align);
232 
233 	if (palloc == NULL) {
234 		while (size > pgsize)
235 			pgsize <<= 1;
236 
237 		if (pgsize > PAGE_SIZE) {
238 			palloc = ISSET(flags, PR_WAITOK) ?
239 			    &pool_allocator_large_ni : &pool_allocator_large;
240 		} else
241 			palloc = &pool_allocator_nointr;
242 	} else
243 		pgsize = palloc->pa_pagesz ? palloc->pa_pagesz : PAGE_SIZE;
244 
245 	items = pgsize / size;
246 
247 	/*
248 	 * Decide whether to put the page header off page to avoid
249 	 * wasting too large a part of the page. Off-page page headers
250 	 * go into an RB tree, so we can match a returned item with
251 	 * its header based on the page address.
252 	 */
253 	if (pgsize - (size * items) > sizeof(struct pool_item_header)) {
254 		off = pgsize - sizeof(struct pool_item_header);
255 	} else if (sizeof(struct pool_item_header) * 2 >= size) {
256 		off = pgsize - sizeof(struct pool_item_header);
257 		items = off / size;
258 	}
259 
260 	KASSERT(items > 0);
261 
262 	/*
263 	 * Initialize the pool structure.
264 	 */
265 	memset(pp, 0, sizeof(*pp));
266 	LIST_INIT(&pp->pr_emptypages);
267 	LIST_INIT(&pp->pr_fullpages);
268 	LIST_INIT(&pp->pr_partpages);
269 	pp->pr_curpage = NULL;
270 	pp->pr_npages = 0;
271 	pp->pr_minitems = 0;
272 	pp->pr_minpages = 0;
273 	pp->pr_maxpages = 8;
274 	pp->pr_roflags = flags;
275 	pp->pr_flags = 0;
276 	pp->pr_size = size;
277 	pp->pr_pgsize = pgsize;
278 	pp->pr_pgmask = ~0UL ^ (pgsize - 1);
279 	pp->pr_phoffset = off;
280 	pp->pr_itemsperpage = items;
281 	pp->pr_align = align;
282 	pp->pr_wchan = wchan;
283 	pp->pr_alloc = palloc;
284 	pp->pr_nitems = 0;
285 	pp->pr_nout = 0;
286 	pp->pr_hardlimit = UINT_MAX;
287 	pp->pr_hardlimit_warning = NULL;
288 	pp->pr_hardlimit_ratecap.tv_sec = 0;
289 	pp->pr_hardlimit_ratecap.tv_usec = 0;
290 	pp->pr_hardlimit_warning_last.tv_sec = 0;
291 	pp->pr_hardlimit_warning_last.tv_usec = 0;
292 	RB_INIT(&pp->pr_phtree);
293 
294 	pp->pr_nget = 0;
295 	pp->pr_nfail = 0;
296 	pp->pr_nput = 0;
297 	pp->pr_npagealloc = 0;
298 	pp->pr_npagefree = 0;
299 	pp->pr_hiwat = 0;
300 	pp->pr_nidle = 0;
301 
302 	pp->pr_ipl = -1;
303 	mtx_init(&pp->pr_mtx, IPL_NONE);
304 	mtx_init(&pp->pr_requests_mtx, IPL_NONE);
305 	TAILQ_INIT(&pp->pr_requests);
306 
307 	if (phpool.pr_size == 0) {
308 		pool_init(&phpool, sizeof(struct pool_item_header), 0, 0,
309 		    0, "phpool", NULL);
310 		pool_setipl(&phpool, IPL_HIGH);
311 
312 		/* make sure phpool wont "recurse" */
313 		KASSERT(POOL_INPGHDR(&phpool));
314 	}
315 
316 	/* pglistalloc/constraint parameters */
317 	pp->pr_crange = &kp_dirty;
318 
319 	/* Insert this into the list of all pools. */
320 	rw_enter_write(&pool_lock);
321 #ifdef DIAGNOSTIC
322 	SIMPLEQ_FOREACH(iter, &pool_head, pr_poollist) {
323 		if (iter == pp)
324 			panic("%s: pool %s already on list", __func__, wchan);
325 	}
326 #endif
327 
328 	pp->pr_serial = ++pool_serial;
329 	if (pool_serial == 0)
330 		panic("%s: too much uptime", __func__);
331 
332 	SIMPLEQ_INSERT_HEAD(&pool_head, pp, pr_poollist);
333 	pool_count++;
334 	rw_exit_write(&pool_lock);
335 }
336 
337 void
338 pool_setipl(struct pool *pp, int ipl)
339 {
340 	pp->pr_ipl = ipl;
341 	mtx_init(&pp->pr_mtx, ipl);
342 	mtx_init(&pp->pr_requests_mtx, ipl);
343 }
344 
345 /*
346  * Decommission a pool resource.
347  */
348 void
349 pool_destroy(struct pool *pp)
350 {
351 	struct pool_item_header *ph;
352 	struct pool *prev, *iter;
353 
354 #ifdef DIAGNOSTIC
355 	if (pp->pr_nout != 0)
356 		panic("%s: pool busy: still out: %u", __func__, pp->pr_nout);
357 #endif
358 
359 	/* Remove from global pool list */
360 	rw_enter_write(&pool_lock);
361 	pool_count--;
362 	if (pp == SIMPLEQ_FIRST(&pool_head))
363 		SIMPLEQ_REMOVE_HEAD(&pool_head, pr_poollist);
364 	else {
365 		prev = SIMPLEQ_FIRST(&pool_head);
366 		SIMPLEQ_FOREACH(iter, &pool_head, pr_poollist) {
367 			if (iter == pp) {
368 				SIMPLEQ_REMOVE_AFTER(&pool_head, prev,
369 				    pr_poollist);
370 				break;
371 			}
372 			prev = iter;
373 		}
374 	}
375 	rw_exit_write(&pool_lock);
376 
377 	/* Remove all pages */
378 	while ((ph = LIST_FIRST(&pp->pr_emptypages)) != NULL) {
379 		mtx_enter(&pp->pr_mtx);
380 		pool_p_remove(pp, ph);
381 		mtx_leave(&pp->pr_mtx);
382 		pool_p_free(pp, ph);
383 	}
384 	KASSERT(LIST_EMPTY(&pp->pr_fullpages));
385 	KASSERT(LIST_EMPTY(&pp->pr_partpages));
386 }
387 
388 void
389 pool_request_init(struct pool_request *pr,
390     void (*handler)(void *, void *), void *cookie)
391 {
392 	pr->pr_handler = handler;
393 	pr->pr_cookie = cookie;
394 	pr->pr_item = NULL;
395 }
396 
397 void
398 pool_request(struct pool *pp, struct pool_request *pr)
399 {
400 	mtx_enter(&pp->pr_requests_mtx);
401 	TAILQ_INSERT_TAIL(&pp->pr_requests, pr, pr_entry);
402 	pool_runqueue(pp, PR_NOWAIT);
403 	mtx_leave(&pp->pr_requests_mtx);
404 }
405 
406 struct pool_get_memory {
407 	struct mutex mtx;
408 	void * volatile v;
409 };
410 
411 /*
412  * Grab an item from the pool.
413  */
414 void *
415 pool_get(struct pool *pp, int flags)
416 {
417 	void *v = NULL;
418 	int slowdown = 0;
419 
420 	KASSERT(flags & (PR_WAITOK | PR_NOWAIT));
421 
422 
423 	mtx_enter(&pp->pr_mtx);
424 	if (pp->pr_nout >= pp->pr_hardlimit) {
425 		if (ISSET(flags, PR_NOWAIT|PR_LIMITFAIL))
426 			goto fail;
427 	} else if ((v = pool_do_get(pp, flags, &slowdown)) == NULL) {
428 		if (ISSET(flags, PR_NOWAIT))
429 			goto fail;
430 	}
431 	mtx_leave(&pp->pr_mtx);
432 
433 	if (slowdown && ISSET(flags, PR_WAITOK))
434 		yield();
435 
436 	if (v == NULL) {
437 		struct pool_get_memory mem =
438 		    { MUTEX_INITIALIZER(pp->pr_ipl), NULL };
439 		struct pool_request pr;
440 
441 		pool_request_init(&pr, pool_get_done, &mem);
442 		pool_request(pp, &pr);
443 
444 		mtx_enter(&mem.mtx);
445 		while (mem.v == NULL)
446 			msleep(&mem, &mem.mtx, PSWP, pp->pr_wchan, 0);
447 		mtx_leave(&mem.mtx);
448 
449 		v = mem.v;
450 	}
451 
452 	if (ISSET(flags, PR_ZERO))
453 		memset(v, 0, pp->pr_size);
454 
455 	return (v);
456 
457 fail:
458 	pp->pr_nfail++;
459 	mtx_leave(&pp->pr_mtx);
460 	return (NULL);
461 }
462 
463 void
464 pool_get_done(void *xmem, void *v)
465 {
466 	struct pool_get_memory *mem = xmem;
467 
468 	mtx_enter(&mem->mtx);
469 	mem->v = v;
470 	mtx_leave(&mem->mtx);
471 
472 	wakeup_one(mem);
473 }
474 
475 void
476 pool_runqueue(struct pool *pp, int flags)
477 {
478 	struct pool_requests prl = TAILQ_HEAD_INITIALIZER(prl);
479 	struct pool_request *pr;
480 
481 	MUTEX_ASSERT_UNLOCKED(&pp->pr_mtx);
482 	MUTEX_ASSERT_LOCKED(&pp->pr_requests_mtx);
483 
484 	if (pp->pr_requesting++)
485 		return;
486 
487 	do {
488 		pp->pr_requesting = 1;
489 
490 		/* no TAILQ_JOIN? :( */
491 		while ((pr = TAILQ_FIRST(&pp->pr_requests)) != NULL) {
492 			TAILQ_REMOVE(&pp->pr_requests, pr, pr_entry);
493 			TAILQ_INSERT_TAIL(&prl, pr, pr_entry);
494 		}
495 		if (TAILQ_EMPTY(&prl))
496 			continue;
497 
498 		mtx_leave(&pp->pr_requests_mtx);
499 
500 		mtx_enter(&pp->pr_mtx);
501 		pr = TAILQ_FIRST(&prl);
502 		while (pr != NULL) {
503 			int slowdown = 0;
504 
505 			if (pp->pr_nout >= pp->pr_hardlimit)
506 				break;
507 
508 			pr->pr_item = pool_do_get(pp, flags, &slowdown);
509 			if (pr->pr_item == NULL) /* || slowdown ? */
510 				break;
511 
512 			pr = TAILQ_NEXT(pr, pr_entry);
513 		}
514 		mtx_leave(&pp->pr_mtx);
515 
516 		while ((pr = TAILQ_FIRST(&prl)) != NULL &&
517 		    pr->pr_item != NULL) {
518 			TAILQ_REMOVE(&prl, pr, pr_entry);
519 			(*pr->pr_handler)(pr->pr_cookie, pr->pr_item);
520 		}
521 
522 		mtx_enter(&pp->pr_requests_mtx);
523 	} while (--pp->pr_requesting);
524 
525 	/* no TAILQ_JOIN :( */
526 	while ((pr = TAILQ_FIRST(&prl)) != NULL) {
527 		TAILQ_REMOVE(&prl, pr, pr_entry);
528 		TAILQ_INSERT_TAIL(&pp->pr_requests, pr, pr_entry);
529 	}
530 }
531 
532 void *
533 pool_do_get(struct pool *pp, int flags, int *slowdown)
534 {
535 	struct pool_item *pi;
536 	struct pool_item_header *ph;
537 
538 	MUTEX_ASSERT_LOCKED(&pp->pr_mtx);
539 
540 	/*
541 	 * Account for this item now to avoid races if we need to give up
542 	 * pr_mtx to allocate a page.
543 	 */
544 	pp->pr_nout++;
545 
546 	if (pp->pr_curpage == NULL) {
547 		mtx_leave(&pp->pr_mtx);
548 		ph = pool_p_alloc(pp, flags, slowdown);
549 		mtx_enter(&pp->pr_mtx);
550 
551 		if (ph == NULL) {
552 			pp->pr_nout--;
553 			return (NULL);
554 		}
555 
556 		pool_p_insert(pp, ph);
557 	}
558 
559 	ph = pp->pr_curpage;
560 	pi = XSIMPLEQ_FIRST(&ph->ph_itemlist);
561 	if (__predict_false(pi == NULL))
562 		panic("%s: %s: page empty", __func__, pp->pr_wchan);
563 
564 	if (__predict_false(pi->pi_magic != POOL_IMAGIC(ph, pi))) {
565 		panic("%s: %s free list modified: "
566 		    "page %p; item addr %p; offset 0x%x=0x%lx != 0x%lx",
567 		    __func__, pp->pr_wchan, ph->ph_page, pi,
568 		    0, pi->pi_magic, POOL_IMAGIC(ph, pi));
569 	}
570 
571 	XSIMPLEQ_REMOVE_HEAD(&ph->ph_itemlist, pi_list);
572 
573 #ifdef DIAGNOSTIC
574 	if (pool_debug && POOL_PHPOISON(ph)) {
575 		size_t pidx;
576 		uint32_t pval;
577 		if (poison_check(pi + 1, pp->pr_size - sizeof(*pi),
578 		    &pidx, &pval)) {
579 			int *ip = (int *)(pi + 1);
580 			panic("%s: %s free list modified: "
581 			    "page %p; item addr %p; offset 0x%zx=0x%x",
582 			    __func__, pp->pr_wchan, ph->ph_page, pi,
583 			    pidx * sizeof(int), ip[pidx]);
584 		}
585 	}
586 #endif /* DIAGNOSTIC */
587 
588 	if (ph->ph_nmissing++ == 0) {
589 		/*
590 		 * This page was previously empty.  Move it to the list of
591 		 * partially-full pages.  This page is already curpage.
592 		 */
593 		LIST_REMOVE(ph, ph_pagelist);
594 		LIST_INSERT_HEAD(&pp->pr_partpages, ph, ph_pagelist);
595 
596 		pp->pr_nidle--;
597 	}
598 
599 	if (ph->ph_nmissing == pp->pr_itemsperpage) {
600 		/*
601 		 * This page is now full.  Move it to the full list
602 		 * and select a new current page.
603 		 */
604 		LIST_REMOVE(ph, ph_pagelist);
605 		LIST_INSERT_HEAD(&pp->pr_fullpages, ph, ph_pagelist);
606 		pool_update_curpage(pp);
607 	}
608 
609 	pp->pr_nget++;
610 
611 	return (pi);
612 }
613 
614 /*
615  * Return resource to the pool.
616  */
617 void
618 pool_put(struct pool *pp, void *v)
619 {
620 	struct pool_item *pi = v;
621 	struct pool_item_header *ph, *freeph = NULL;
622 
623 #ifdef DIAGNOSTIC
624 	if (v == NULL)
625 		panic("%s: NULL item", __func__);
626 #endif
627 
628 	mtx_enter(&pp->pr_mtx);
629 
630 	ph = pr_find_pagehead(pp, v);
631 
632 #ifdef DIAGNOSTIC
633 	if (pool_debug) {
634 		struct pool_item *qi;
635 		XSIMPLEQ_FOREACH(qi, &ph->ph_itemlist, pi_list) {
636 			if (pi == qi) {
637 				panic("%s: %s: double pool_put: %p", __func__,
638 				    pp->pr_wchan, pi);
639 			}
640 		}
641 	}
642 #endif /* DIAGNOSTIC */
643 
644 	pi->pi_magic = POOL_IMAGIC(ph, pi);
645 	XSIMPLEQ_INSERT_HEAD(&ph->ph_itemlist, pi, pi_list);
646 #ifdef DIAGNOSTIC
647 	if (POOL_PHPOISON(ph))
648 		poison_mem(pi + 1, pp->pr_size - sizeof(*pi));
649 #endif /* DIAGNOSTIC */
650 
651 	if (ph->ph_nmissing-- == pp->pr_itemsperpage) {
652 		/*
653 		 * The page was previously completely full, move it to the
654 		 * partially-full list.
655 		 */
656 		LIST_REMOVE(ph, ph_pagelist);
657 		LIST_INSERT_HEAD(&pp->pr_partpages, ph, ph_pagelist);
658 	}
659 
660 	if (ph->ph_nmissing == 0) {
661 		/*
662 		 * The page is now empty, so move it to the empty page list.
663 	 	 */
664 		pp->pr_nidle++;
665 
666 		LIST_REMOVE(ph, ph_pagelist);
667 		LIST_INSERT_HEAD(&pp->pr_emptypages, ph, ph_pagelist);
668 		pool_update_curpage(pp);
669 	}
670 
671 	pp->pr_nout--;
672 	pp->pr_nput++;
673 
674 	/* is it time to free a page? */
675 	if (pp->pr_nidle > pp->pr_maxpages &&
676 	    (freeph = LIST_FIRST(&pp->pr_emptypages)) != NULL)
677 		pool_p_remove(pp, freeph);
678 	mtx_leave(&pp->pr_mtx);
679 
680 	if (freeph != NULL)
681 		pool_p_free(pp, freeph);
682 
683 	mtx_enter(&pp->pr_requests_mtx);
684 	pool_runqueue(pp, PR_NOWAIT);
685 	mtx_leave(&pp->pr_requests_mtx);
686 }
687 
688 /*
689  * Add N items to the pool.
690  */
691 int
692 pool_prime(struct pool *pp, int n)
693 {
694 	struct pool_pagelist pl = LIST_HEAD_INITIALIZER(pl);
695 	struct pool_item_header *ph;
696 	int newpages;
697 
698 	newpages = roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage;
699 
700 	while (newpages-- > 0) {
701 		int slowdown = 0;
702 
703 		ph = pool_p_alloc(pp, PR_NOWAIT, &slowdown);
704 		if (ph == NULL) /* or slowdown? */
705 			break;
706 
707 		LIST_INSERT_HEAD(&pl, ph, ph_pagelist);
708 	}
709 
710 	mtx_enter(&pp->pr_mtx);
711 	while ((ph = LIST_FIRST(&pl)) != NULL) {
712 		LIST_REMOVE(ph, ph_pagelist);
713 		pool_p_insert(pp, ph);
714 	}
715 	mtx_leave(&pp->pr_mtx);
716 
717 	return (0);
718 }
719 
720 struct pool_item_header *
721 pool_p_alloc(struct pool *pp, int flags, int *slowdown)
722 {
723 	struct pool_item_header *ph;
724 	struct pool_item *pi;
725 	caddr_t addr;
726 	int n;
727 
728 	MUTEX_ASSERT_UNLOCKED(&pp->pr_mtx);
729 	KASSERT(pp->pr_size >= sizeof(*pi));
730 
731 	addr = pool_allocator_alloc(pp, flags, slowdown);
732 	if (addr == NULL)
733 		return (NULL);
734 
735 	if (POOL_INPGHDR(pp))
736 		ph = (struct pool_item_header *)(addr + pp->pr_phoffset);
737 	else {
738 		ph = pool_get(&phpool, flags);
739 		if (ph == NULL) {
740 			pool_allocator_free(pp, addr);
741 			return (NULL);
742 		}
743 	}
744 
745 	XSIMPLEQ_INIT(&ph->ph_itemlist);
746 	ph->ph_page = addr;
747 	ph->ph_nmissing = 0;
748 	arc4random_buf(&ph->ph_magic, sizeof(ph->ph_magic));
749 #ifdef DIAGNOSTIC
750 	/* use a bit in ph_magic to record if we poison page items */
751 	if (pool_debug)
752 		SET(ph->ph_magic, POOL_MAGICBIT);
753 	else
754 		CLR(ph->ph_magic, POOL_MAGICBIT);
755 #endif /* DIAGNOSTIC */
756 
757 	n = pp->pr_itemsperpage;
758 	while (n--) {
759 		pi = (struct pool_item *)addr;
760 		pi->pi_magic = POOL_IMAGIC(ph, pi);
761 		XSIMPLEQ_INSERT_TAIL(&ph->ph_itemlist, pi, pi_list);
762 
763 #ifdef DIAGNOSTIC
764 		if (POOL_PHPOISON(ph))
765 			poison_mem(pi + 1, pp->pr_size - sizeof(*pi));
766 #endif /* DIAGNOSTIC */
767 
768 		addr += pp->pr_size;
769 	}
770 
771 	return (ph);
772 }
773 
774 void
775 pool_p_free(struct pool *pp, struct pool_item_header *ph)
776 {
777 	struct pool_item *pi;
778 
779         MUTEX_ASSERT_UNLOCKED(&pp->pr_mtx);
780         KASSERT(ph->ph_nmissing == 0);
781 
782 	XSIMPLEQ_FOREACH(pi, &ph->ph_itemlist, pi_list) {
783 		if (__predict_false(pi->pi_magic != POOL_IMAGIC(ph, pi))) {
784 			panic("%s: %s free list modified: "
785 			    "page %p; item addr %p; offset 0x%x=0x%lx",
786 			    __func__, pp->pr_wchan, ph->ph_page, pi,
787 			    0, pi->pi_magic);
788 		}
789 
790 #ifdef DIAGNOSTIC
791 		if (POOL_PHPOISON(ph)) {
792 			size_t pidx;
793 			uint32_t pval;
794 			if (poison_check(pi + 1, pp->pr_size - sizeof(*pi),
795 			    &pidx, &pval)) {
796 				int *ip = (int *)(pi + 1);
797 				panic("%s: %s free list modified: "
798 				    "page %p; item addr %p; offset 0x%zx=0x%x",
799 				    __func__, pp->pr_wchan, ph->ph_page, pi,
800 				    pidx * sizeof(int), ip[pidx]);
801 			}
802 		}
803 #endif
804         }
805 
806         pool_allocator_free(pp, ph->ph_page);
807 
808 	if (!POOL_INPGHDR(pp))
809 		pool_put(&phpool, ph);
810 }
811 
812 void
813 pool_p_insert(struct pool *pp, struct pool_item_header *ph)
814 {
815         MUTEX_ASSERT_LOCKED(&pp->pr_mtx);
816 
817 	/* If the pool was depleted, point at the new page */
818 	if (pp->pr_curpage == NULL)
819 		pp->pr_curpage = ph;
820 
821 	LIST_INSERT_HEAD(&pp->pr_emptypages, ph, ph_pagelist);
822 	if (!POOL_INPGHDR(pp))
823 		RB_INSERT(phtree, &pp->pr_phtree, ph);
824 
825 	pp->pr_nitems += pp->pr_itemsperpage;
826 	pp->pr_nidle++;
827 
828 	pp->pr_npagealloc++;
829 	if (++pp->pr_npages > pp->pr_hiwat)
830 		pp->pr_hiwat = pp->pr_npages;
831 }
832 
833 void
834 pool_p_remove(struct pool *pp, struct pool_item_header *ph)
835 {
836 	MUTEX_ASSERT_LOCKED(&pp->pr_mtx);
837 
838 	pp->pr_npagefree++;
839 	pp->pr_npages--;
840 	pp->pr_nidle--;
841 	pp->pr_nitems -= pp->pr_itemsperpage;
842 
843 	if (!POOL_INPGHDR(pp))
844 		RB_REMOVE(phtree, &pp->pr_phtree, ph);
845 	LIST_REMOVE(ph, ph_pagelist);
846 
847 	pool_update_curpage(pp);
848 }
849 
850 void
851 pool_update_curpage(struct pool *pp)
852 {
853 	pp->pr_curpage = LIST_FIRST(&pp->pr_partpages);
854 	if (pp->pr_curpage == NULL) {
855 		pp->pr_curpage = LIST_FIRST(&pp->pr_emptypages);
856 	}
857 }
858 
859 void
860 pool_setlowat(struct pool *pp, int n)
861 {
862 	int prime;
863 
864 	mtx_enter(&pp->pr_mtx);
865 	pp->pr_minitems = n;
866 	pp->pr_minpages = (n == 0)
867 		? 0
868 		: roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage;
869 
870 	if (pp->pr_nitems < n)
871 		prime = n - pp->pr_nitems;
872 	mtx_leave(&pp->pr_mtx);
873 
874 	if (prime > 0)
875 		pool_prime(pp, prime);
876 }
877 
878 void
879 pool_sethiwat(struct pool *pp, int n)
880 {
881 	pp->pr_maxpages = (n == 0)
882 		? 0
883 		: roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage;
884 }
885 
886 int
887 pool_sethardlimit(struct pool *pp, u_int n, const char *warnmsg, int ratecap)
888 {
889 	int error = 0;
890 
891 	if (n < pp->pr_nout) {
892 		error = EINVAL;
893 		goto done;
894 	}
895 
896 	pp->pr_hardlimit = n;
897 	pp->pr_hardlimit_warning = warnmsg;
898 	pp->pr_hardlimit_ratecap.tv_sec = ratecap;
899 	pp->pr_hardlimit_warning_last.tv_sec = 0;
900 	pp->pr_hardlimit_warning_last.tv_usec = 0;
901 
902 done:
903 	return (error);
904 }
905 
906 void
907 pool_set_constraints(struct pool *pp, const struct kmem_pa_mode *mode)
908 {
909 	pp->pr_crange = mode;
910 }
911 
912 /*
913  * Release all complete pages that have not been used recently.
914  *
915  * Returns non-zero if any pages have been reclaimed.
916  */
917 int
918 pool_reclaim(struct pool *pp)
919 {
920 	struct pool_item_header *ph, *phnext;
921 	struct pool_pagelist pl = LIST_HEAD_INITIALIZER(pl);
922 
923 	mtx_enter(&pp->pr_mtx);
924 	for (ph = LIST_FIRST(&pp->pr_emptypages); ph != NULL; ph = phnext) {
925 		phnext = LIST_NEXT(ph, ph_pagelist);
926 
927 		/* Check our minimum page claim */
928 		if (pp->pr_npages <= pp->pr_minpages)
929 			break;
930 
931 		/*
932 		 * If freeing this page would put us below
933 		 * the low water mark, stop now.
934 		 */
935 		if ((pp->pr_nitems - pp->pr_itemsperpage) <
936 		    pp->pr_minitems)
937 			break;
938 
939 		pool_p_remove(pp, ph);
940 		LIST_INSERT_HEAD(&pl, ph, ph_pagelist);
941 	}
942 	mtx_leave(&pp->pr_mtx);
943 
944 	if (LIST_EMPTY(&pl))
945 		return (0);
946 
947 	while ((ph = LIST_FIRST(&pl)) != NULL) {
948 		LIST_REMOVE(ph, ph_pagelist);
949 		pool_p_free(pp, ph);
950 	}
951 
952 	return (1);
953 }
954 
955 /*
956  * Release all complete pages that have not been used recently
957  * from all pools.
958  */
959 void
960 pool_reclaim_all(void)
961 {
962 	struct pool	*pp;
963 
964 	rw_enter_read(&pool_lock);
965 	SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist)
966 		pool_reclaim(pp);
967 	rw_exit_read(&pool_lock);
968 }
969 
970 #ifdef DDB
971 #include <machine/db_machdep.h>
972 #include <ddb/db_interface.h>
973 #include <ddb/db_output.h>
974 
975 /*
976  * Diagnostic helpers.
977  */
978 void
979 pool_printit(struct pool *pp, const char *modif,
980     int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2))))
981 {
982 	pool_print1(pp, modif, pr);
983 }
984 
985 void
986 pool_print_pagelist(struct pool_pagelist *pl,
987     int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2))))
988 {
989 	struct pool_item_header *ph;
990 	struct pool_item *pi;
991 
992 	LIST_FOREACH(ph, pl, ph_pagelist) {
993 		(*pr)("\t\tpage %p, nmissing %d\n",
994 		    ph->ph_page, ph->ph_nmissing);
995 		XSIMPLEQ_FOREACH(pi, &ph->ph_itemlist, pi_list) {
996 			if (pi->pi_magic != POOL_IMAGIC(ph, pi)) {
997 				(*pr)("\t\t\titem %p, magic 0x%lx\n",
998 				    pi, pi->pi_magic);
999 			}
1000 		}
1001 	}
1002 }
1003 
1004 void
1005 pool_print1(struct pool *pp, const char *modif,
1006     int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2))))
1007 {
1008 	struct pool_item_header *ph;
1009 	int print_pagelist = 0;
1010 	char c;
1011 
1012 	while ((c = *modif++) != '\0') {
1013 		if (c == 'p')
1014 			print_pagelist = 1;
1015 		modif++;
1016 	}
1017 
1018 	(*pr)("POOL %s: size %u, align %u, roflags 0x%08x\n",
1019 	    pp->pr_wchan, pp->pr_size, pp->pr_align,
1020 	    pp->pr_roflags);
1021 	(*pr)("\talloc %p\n", pp->pr_alloc);
1022 	(*pr)("\tminitems %u, minpages %u, maxpages %u, npages %u\n",
1023 	    pp->pr_minitems, pp->pr_minpages, pp->pr_maxpages, pp->pr_npages);
1024 	(*pr)("\titemsperpage %u, nitems %u, nout %u, hardlimit %u\n",
1025 	    pp->pr_itemsperpage, pp->pr_nitems, pp->pr_nout, pp->pr_hardlimit);
1026 
1027 	(*pr)("\n\tnget %lu, nfail %lu, nput %lu\n",
1028 	    pp->pr_nget, pp->pr_nfail, pp->pr_nput);
1029 	(*pr)("\tnpagealloc %lu, npagefree %lu, hiwat %u, nidle %lu\n",
1030 	    pp->pr_npagealloc, pp->pr_npagefree, pp->pr_hiwat, pp->pr_nidle);
1031 
1032 	if (print_pagelist == 0)
1033 		return;
1034 
1035 	if ((ph = LIST_FIRST(&pp->pr_emptypages)) != NULL)
1036 		(*pr)("\n\tempty page list:\n");
1037 	pool_print_pagelist(&pp->pr_emptypages, pr);
1038 	if ((ph = LIST_FIRST(&pp->pr_fullpages)) != NULL)
1039 		(*pr)("\n\tfull page list:\n");
1040 	pool_print_pagelist(&pp->pr_fullpages, pr);
1041 	if ((ph = LIST_FIRST(&pp->pr_partpages)) != NULL)
1042 		(*pr)("\n\tpartial-page list:\n");
1043 	pool_print_pagelist(&pp->pr_partpages, pr);
1044 
1045 	if (pp->pr_curpage == NULL)
1046 		(*pr)("\tno current page\n");
1047 	else
1048 		(*pr)("\tcurpage %p\n", pp->pr_curpage->ph_page);
1049 }
1050 
1051 void
1052 db_show_all_pools(db_expr_t expr, int haddr, db_expr_t count, char *modif)
1053 {
1054 	struct pool *pp;
1055 	char maxp[16];
1056 	int ovflw;
1057 	char mode;
1058 
1059 	mode = modif[0];
1060 	if (mode != '\0' && mode != 'a') {
1061 		db_printf("usage: show all pools [/a]\n");
1062 		return;
1063 	}
1064 
1065 	if (mode == '\0')
1066 		db_printf("%-10s%4s%9s%5s%9s%6s%6s%6s%6s%6s%6s%5s\n",
1067 		    "Name",
1068 		    "Size",
1069 		    "Requests",
1070 		    "Fail",
1071 		    "Releases",
1072 		    "Pgreq",
1073 		    "Pgrel",
1074 		    "Npage",
1075 		    "Hiwat",
1076 		    "Minpg",
1077 		    "Maxpg",
1078 		    "Idle");
1079 	else
1080 		db_printf("%-12s %18s %18s\n",
1081 		    "Name", "Address", "Allocator");
1082 
1083 	SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist) {
1084 		if (mode == 'a') {
1085 			db_printf("%-12s %18p %18p\n", pp->pr_wchan, pp,
1086 			    pp->pr_alloc);
1087 			continue;
1088 		}
1089 
1090 		if (!pp->pr_nget)
1091 			continue;
1092 
1093 		if (pp->pr_maxpages == UINT_MAX)
1094 			snprintf(maxp, sizeof maxp, "inf");
1095 		else
1096 			snprintf(maxp, sizeof maxp, "%u", pp->pr_maxpages);
1097 
1098 #define PRWORD(ovflw, fmt, width, fixed, val) do {	\
1099 	(ovflw) += db_printf((fmt),			\
1100 	    (width) - (fixed) - (ovflw) > 0 ?		\
1101 	    (width) - (fixed) - (ovflw) : 0,		\
1102 	    (val)) - (width);				\
1103 	if ((ovflw) < 0)				\
1104 		(ovflw) = 0;				\
1105 } while (/* CONSTCOND */0)
1106 
1107 		ovflw = 0;
1108 		PRWORD(ovflw, "%-*s", 10, 0, pp->pr_wchan);
1109 		PRWORD(ovflw, " %*u", 4, 1, pp->pr_size);
1110 		PRWORD(ovflw, " %*lu", 9, 1, pp->pr_nget);
1111 		PRWORD(ovflw, " %*lu", 5, 1, pp->pr_nfail);
1112 		PRWORD(ovflw, " %*lu", 9, 1, pp->pr_nput);
1113 		PRWORD(ovflw, " %*lu", 6, 1, pp->pr_npagealloc);
1114 		PRWORD(ovflw, " %*lu", 6, 1, pp->pr_npagefree);
1115 		PRWORD(ovflw, " %*d", 6, 1, pp->pr_npages);
1116 		PRWORD(ovflw, " %*d", 6, 1, pp->pr_hiwat);
1117 		PRWORD(ovflw, " %*d", 6, 1, pp->pr_minpages);
1118 		PRWORD(ovflw, " %*s", 6, 1, maxp);
1119 		PRWORD(ovflw, " %*lu\n", 5, 1, pp->pr_nidle);
1120 
1121 		pool_chk(pp);
1122 	}
1123 }
1124 #endif /* DDB */
1125 
1126 #if defined(POOL_DEBUG) || defined(DDB)
1127 int
1128 pool_chk_page(struct pool *pp, struct pool_item_header *ph, int expected)
1129 {
1130 	struct pool_item *pi;
1131 	caddr_t page;
1132 	int n;
1133 	const char *label = pp->pr_wchan;
1134 
1135 	page = (caddr_t)((u_long)ph & pp->pr_pgmask);
1136 	if (page != ph->ph_page && POOL_INPGHDR(pp)) {
1137 		printf("%s: ", label);
1138 		printf("pool(%p:%s): page inconsistency: page %p; "
1139 		    "at page head addr %p (p %p)\n",
1140 		    pp, pp->pr_wchan, ph->ph_page, ph, page);
1141 		return 1;
1142 	}
1143 
1144 	for (pi = XSIMPLEQ_FIRST(&ph->ph_itemlist), n = 0;
1145 	     pi != NULL;
1146 	     pi = XSIMPLEQ_NEXT(&ph->ph_itemlist, pi, pi_list), n++) {
1147 		if (pi->pi_magic != POOL_IMAGIC(ph, pi)) {
1148 			printf("%s: ", label);
1149 			printf("pool(%p:%s): free list modified: "
1150 			    "page %p; item ordinal %d; addr %p "
1151 			    "(p %p); offset 0x%x=0x%lx\n",
1152 			    pp, pp->pr_wchan, ph->ph_page, n, pi, page,
1153 			    0, pi->pi_magic);
1154 		}
1155 
1156 #ifdef DIAGNOSTIC
1157 		if (POOL_PHPOISON(ph)) {
1158 			size_t pidx;
1159 			uint32_t pval;
1160 			if (poison_check(pi + 1, pp->pr_size - sizeof(*pi),
1161 			    &pidx, &pval)) {
1162 				int *ip = (int *)(pi + 1);
1163 				printf("pool(%s): free list modified: "
1164 				    "page %p; item ordinal %d; addr %p "
1165 				    "(p %p); offset 0x%zx=0x%x\n",
1166 				    pp->pr_wchan, ph->ph_page, n, pi,
1167 				    page, pidx * sizeof(int), ip[pidx]);
1168 			}
1169 		}
1170 #endif /* DIAGNOSTIC */
1171 
1172 		page = (caddr_t)((u_long)pi & pp->pr_pgmask);
1173 		if (page == ph->ph_page)
1174 			continue;
1175 
1176 		printf("%s: ", label);
1177 		printf("pool(%p:%s): page inconsistency: page %p;"
1178 		    " item ordinal %d; addr %p (p %p)\n", pp,
1179 		    pp->pr_wchan, ph->ph_page, n, pi, page);
1180 		return 1;
1181 	}
1182 	if (n + ph->ph_nmissing != pp->pr_itemsperpage) {
1183 		printf("pool(%p:%s): page inconsistency: page %p;"
1184 		    " %d on list, %d missing, %d items per page\n", pp,
1185 		    pp->pr_wchan, ph->ph_page, n, ph->ph_nmissing,
1186 		    pp->pr_itemsperpage);
1187 		return 1;
1188 	}
1189 	if (expected >= 0 && n != expected) {
1190 		printf("pool(%p:%s): page inconsistency: page %p;"
1191 		    " %d on list, %d missing, %d expected\n", pp,
1192 		    pp->pr_wchan, ph->ph_page, n, ph->ph_nmissing,
1193 		    expected);
1194 		return 1;
1195 	}
1196 	return 0;
1197 }
1198 
1199 int
1200 pool_chk(struct pool *pp)
1201 {
1202 	struct pool_item_header *ph;
1203 	int r = 0;
1204 
1205 	LIST_FOREACH(ph, &pp->pr_emptypages, ph_pagelist)
1206 		r += pool_chk_page(pp, ph, pp->pr_itemsperpage);
1207 	LIST_FOREACH(ph, &pp->pr_fullpages, ph_pagelist)
1208 		r += pool_chk_page(pp, ph, 0);
1209 	LIST_FOREACH(ph, &pp->pr_partpages, ph_pagelist)
1210 		r += pool_chk_page(pp, ph, -1);
1211 
1212 	return (r);
1213 }
1214 #endif /* defined(POOL_DEBUG) || defined(DDB) */
1215 
1216 #ifdef DDB
1217 void
1218 pool_walk(struct pool *pp, int full,
1219     int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2))),
1220     void (*func)(void *, int, int (*)(const char *, ...)
1221 	    __attribute__((__format__(__kprintf__,1,2)))))
1222 {
1223 	struct pool_item_header *ph;
1224 	struct pool_item *pi;
1225 	caddr_t cp;
1226 	int n;
1227 
1228 	LIST_FOREACH(ph, &pp->pr_fullpages, ph_pagelist) {
1229 		cp = ph->ph_page;
1230 		n = ph->ph_nmissing;
1231 
1232 		while (n--) {
1233 			func(cp, full, pr);
1234 			cp += pp->pr_size;
1235 		}
1236 	}
1237 
1238 	LIST_FOREACH(ph, &pp->pr_partpages, ph_pagelist) {
1239 		cp = ph->ph_page;
1240 		n = ph->ph_nmissing;
1241 
1242 		do {
1243 			XSIMPLEQ_FOREACH(pi, &ph->ph_itemlist, pi_list) {
1244 				if (cp == (caddr_t)pi)
1245 					break;
1246 			}
1247 			if (cp != (caddr_t)pi) {
1248 				func(cp, full, pr);
1249 				n--;
1250 			}
1251 
1252 			cp += pp->pr_size;
1253 		} while (n > 0);
1254 	}
1255 }
1256 #endif
1257 
1258 /*
1259  * We have three different sysctls.
1260  * kern.pool.npools - the number of pools.
1261  * kern.pool.pool.<pool#> - the pool struct for the pool#.
1262  * kern.pool.name.<pool#> - the name for pool#.
1263  */
1264 int
1265 sysctl_dopool(int *name, u_int namelen, char *oldp, size_t *oldlenp)
1266 {
1267 	struct kinfo_pool pi;
1268 	struct pool *pp;
1269 	int rv = ENOENT;
1270 
1271 	switch (name[0]) {
1272 	case KERN_POOL_NPOOLS:
1273 		if (namelen != 1)
1274 			return (ENOTDIR);
1275 		return (sysctl_rdint(oldp, oldlenp, NULL, pool_count));
1276 
1277 	case KERN_POOL_NAME:
1278 	case KERN_POOL_POOL:
1279 		break;
1280 	default:
1281 		return (EOPNOTSUPP);
1282 	}
1283 
1284 	if (namelen != 2)
1285 		return (ENOTDIR);
1286 
1287 	rw_enter_read(&pool_lock);
1288 
1289 	SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist) {
1290 		if (name[1] == pp->pr_serial)
1291 			break;
1292 	}
1293 
1294 	if (pp == NULL)
1295 		goto done;
1296 
1297 	switch (name[0]) {
1298 	case KERN_POOL_NAME:
1299 		rv = sysctl_rdstring(oldp, oldlenp, NULL, pp->pr_wchan);
1300 		break;
1301 	case KERN_POOL_POOL:
1302 		memset(&pi, 0, sizeof(pi));
1303 
1304 		if (pp->pr_ipl != -1)
1305 			mtx_enter(&pp->pr_mtx);
1306 		pi.pr_size = pp->pr_size;
1307 		pi.pr_pgsize = pp->pr_pgsize;
1308 		pi.pr_itemsperpage = pp->pr_itemsperpage;
1309 		pi.pr_npages = pp->pr_npages;
1310 		pi.pr_minpages = pp->pr_minpages;
1311 		pi.pr_maxpages = pp->pr_maxpages;
1312 		pi.pr_hardlimit = pp->pr_hardlimit;
1313 		pi.pr_nout = pp->pr_nout;
1314 		pi.pr_nitems = pp->pr_nitems;
1315 		pi.pr_nget = pp->pr_nget;
1316 		pi.pr_nput = pp->pr_nput;
1317 		pi.pr_nfail = pp->pr_nfail;
1318 		pi.pr_npagealloc = pp->pr_npagealloc;
1319 		pi.pr_npagefree = pp->pr_npagefree;
1320 		pi.pr_hiwat = pp->pr_hiwat;
1321 		pi.pr_nidle = pp->pr_nidle;
1322 		if (pp->pr_ipl != -1)
1323 			mtx_leave(&pp->pr_mtx);
1324 
1325 		rv = sysctl_rdstruct(oldp, oldlenp, NULL, &pi, sizeof(pi));
1326 		break;
1327 	}
1328 
1329 done:
1330 	rw_exit_read(&pool_lock);
1331 
1332 	return (rv);
1333 }
1334 
1335 /*
1336  * Pool backend allocators.
1337  */
1338 
1339 void *
1340 pool_allocator_alloc(struct pool *pp, int flags, int *slowdown)
1341 {
1342 	void *v;
1343 
1344 	KERNEL_LOCK();
1345 	v = (*pp->pr_alloc->pa_alloc)(pp, flags, slowdown);
1346 	KERNEL_UNLOCK();
1347 
1348 #ifdef DIAGNOSTIC
1349 	if (v != NULL && POOL_INPGHDR(pp)) {
1350 		vaddr_t addr = (vaddr_t)v;
1351 		if ((addr & pp->pr_pgmask) != addr) {
1352 			panic("%s: %s page address %p isnt aligned to %u",
1353 			    __func__, pp->pr_wchan, v, pp->pr_pgsize);
1354 		}
1355 	}
1356 #endif
1357 
1358 	return (v);
1359 }
1360 
1361 void
1362 pool_allocator_free(struct pool *pp, void *v)
1363 {
1364 	struct pool_allocator *pa = pp->pr_alloc;
1365 
1366 	KERNEL_LOCK();
1367 	(*pa->pa_free)(pp, v);
1368 	KERNEL_UNLOCK();
1369 }
1370 
1371 void *
1372 pool_page_alloc(struct pool *pp, int flags, int *slowdown)
1373 {
1374 	struct kmem_dyn_mode kd = KMEM_DYN_INITIALIZER;
1375 
1376 	kd.kd_waitok = ISSET(flags, PR_WAITOK);
1377 	kd.kd_slowdown = slowdown;
1378 
1379 	return (km_alloc(pp->pr_pgsize, &kv_page, pp->pr_crange, &kd));
1380 }
1381 
1382 void
1383 pool_page_free(struct pool *pp, void *v)
1384 {
1385 	km_free(v, pp->pr_pgsize, &kv_page, pp->pr_crange);
1386 }
1387 
1388 void *
1389 pool_large_alloc(struct pool *pp, int flags, int *slowdown)
1390 {
1391 	struct kmem_va_mode kv = kv_intrsafe;
1392 	struct kmem_dyn_mode kd = KMEM_DYN_INITIALIZER;
1393 	void *v;
1394 	int s;
1395 
1396 	if (POOL_INPGHDR(pp))
1397 		kv.kv_align = pp->pr_pgsize;
1398 
1399 	kd.kd_waitok = ISSET(flags, PR_WAITOK);
1400 	kd.kd_slowdown = slowdown;
1401 
1402 	s = splvm();
1403 	v = km_alloc(pp->pr_pgsize, &kv, pp->pr_crange, &kd);
1404 	splx(s);
1405 
1406 	return (v);
1407 }
1408 
1409 void
1410 pool_large_free(struct pool *pp, void *v)
1411 {
1412 	struct kmem_va_mode kv = kv_intrsafe;
1413 	int s;
1414 
1415 	if (POOL_INPGHDR(pp))
1416 		kv.kv_align = pp->pr_pgsize;
1417 
1418 	s = splvm();
1419 	km_free(v, pp->pr_pgsize, &kv, pp->pr_crange);
1420 	splx(s);
1421 }
1422 
1423 void *
1424 pool_large_alloc_ni(struct pool *pp, int flags, int *slowdown)
1425 {
1426 	struct kmem_va_mode kv = kv_any;
1427 	struct kmem_dyn_mode kd = KMEM_DYN_INITIALIZER;
1428 
1429 	if (POOL_INPGHDR(pp))
1430 		kv.kv_align = pp->pr_pgsize;
1431 
1432 	kd.kd_waitok = ISSET(flags, PR_WAITOK);
1433 	kd.kd_slowdown = slowdown;
1434 
1435 	return (km_alloc(pp->pr_pgsize, &kv, pp->pr_crange, &kd));
1436 }
1437 
1438 void
1439 pool_large_free_ni(struct pool *pp, void *v)
1440 {
1441 	struct kmem_va_mode kv = kv_any;
1442 
1443 	if (POOL_INPGHDR(pp))
1444 		kv.kv_align = pp->pr_pgsize;
1445 
1446 	km_free(v, pp->pr_pgsize, &kv, pp->pr_crange);
1447 }
1448