xref: /openbsd-src/sys/kern/subr_pool.c (revision 74c39504b1ca5a4d16d0db22a19bb290d8dd4ba7)
1 /*	$OpenBSD: subr_pool.c,v 1.197 2016/09/15 01:24:08 dlg Exp $	*/
2 /*	$NetBSD: subr_pool.c,v 1.61 2001/09/26 07:14:56 chs Exp $	*/
3 
4 /*-
5  * Copyright (c) 1997, 1999, 2000 The NetBSD Foundation, Inc.
6  * All rights reserved.
7  *
8  * This code is derived from software contributed to The NetBSD Foundation
9  * by Paul Kranenburg; by Jason R. Thorpe of the Numerical Aerospace
10  * Simulation Facility, NASA Ames Research Center.
11  *
12  * Redistribution and use in source and binary forms, with or without
13  * modification, are permitted provided that the following conditions
14  * are met:
15  * 1. Redistributions of source code must retain the above copyright
16  *    notice, this list of conditions and the following disclaimer.
17  * 2. Redistributions in binary form must reproduce the above copyright
18  *    notice, this list of conditions and the following disclaimer in the
19  *    documentation and/or other materials provided with the distribution.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
23  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
24  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
25  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31  * POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/errno.h>
37 #include <sys/kernel.h>
38 #include <sys/malloc.h>
39 #include <sys/pool.h>
40 #include <sys/syslog.h>
41 #include <sys/rwlock.h>
42 #include <sys/sysctl.h>
43 #include <sys/task.h>
44 #include <sys/timeout.h>
45 
46 #include <uvm/uvm_extern.h>
47 
48 /*
49  * Pool resource management utility.
50  *
51  * Memory is allocated in pages which are split into pieces according to
52  * the pool item size. Each page is kept on one of three lists in the
53  * pool structure: `pr_emptypages', `pr_fullpages' and `pr_partpages',
54  * for empty, full and partially-full pages respectively. The individual
55  * pool items are on a linked list headed by `ph_itemlist' in each page
56  * header. The memory for building the page list is either taken from
57  * the allocated pages themselves (for small pool items) or taken from
58  * an internal pool of page headers (`phpool').
59  */
60 
61 /* List of all pools */
62 SIMPLEQ_HEAD(,pool) pool_head = SIMPLEQ_HEAD_INITIALIZER(pool_head);
63 
64 /*
65  * Every pool gets a unique serial number assigned to it. If this counter
66  * wraps, we're screwed, but we shouldn't create so many pools anyway.
67  */
68 unsigned int pool_serial;
69 unsigned int pool_count;
70 
71 /* Lock the previous variables making up the global pool state */
72 struct rwlock pool_lock = RWLOCK_INITIALIZER("pools");
73 
74 /* Private pool for page header structures */
75 struct pool phpool;
76 
77 struct pool_item_header {
78 	/* Page headers */
79 	TAILQ_ENTRY(pool_item_header)
80 				ph_pagelist;	/* pool page list */
81 	XSIMPLEQ_HEAD(,pool_item) ph_itemlist;	/* chunk list for this page */
82 	RBT_ENTRY(pool_item_header)
83 				ph_node;	/* Off-page page headers */
84 	int			ph_nmissing;	/* # of chunks in use */
85 	caddr_t			ph_page;	/* this page's address */
86 	caddr_t			ph_colored;	/* page's colored address */
87 	u_long			ph_magic;
88 	int			ph_tick;
89 };
90 #define POOL_MAGICBIT (1 << 3) /* keep away from perturbed low bits */
91 #define POOL_PHPOISON(ph) ISSET((ph)->ph_magic, POOL_MAGICBIT)
92 
93 struct pool_item {
94 	u_long				pi_magic;
95 	XSIMPLEQ_ENTRY(pool_item)	pi_list;
96 };
97 #define POOL_IMAGIC(ph, pi) ((u_long)(pi) ^ (ph)->ph_magic)
98 
99 #ifdef POOL_DEBUG
100 int	pool_debug = 1;
101 #else
102 int	pool_debug = 0;
103 #endif
104 
105 #define POOL_INPGHDR(pp) ((pp)->pr_phoffset != 0)
106 
107 struct pool_item_header *
108 	 pool_p_alloc(struct pool *, int, int *);
109 void	 pool_p_insert(struct pool *, struct pool_item_header *);
110 void	 pool_p_remove(struct pool *, struct pool_item_header *);
111 void	 pool_p_free(struct pool *, struct pool_item_header *);
112 
113 void	 pool_update_curpage(struct pool *);
114 void	*pool_do_get(struct pool *, int, int *);
115 int	 pool_chk_page(struct pool *, struct pool_item_header *, int);
116 int	 pool_chk(struct pool *);
117 void	 pool_get_done(void *, void *);
118 void	 pool_runqueue(struct pool *, int);
119 
120 void	*pool_allocator_alloc(struct pool *, int, int *);
121 void	 pool_allocator_free(struct pool *, void *);
122 
123 /*
124  * The default pool allocator.
125  */
126 void	*pool_page_alloc(struct pool *, int, int *);
127 void	pool_page_free(struct pool *, void *);
128 
129 /*
130  * safe for interrupts; this is the default allocator
131  */
132 struct pool_allocator pool_allocator_single = {
133 	pool_page_alloc,
134 	pool_page_free
135 };
136 
137 void	*pool_multi_alloc(struct pool *, int, int *);
138 void	pool_multi_free(struct pool *, void *);
139 
140 struct pool_allocator pool_allocator_multi = {
141 	pool_multi_alloc,
142 	pool_multi_free
143 };
144 
145 void	*pool_multi_alloc_ni(struct pool *, int, int *);
146 void	pool_multi_free_ni(struct pool *, void *);
147 
148 struct pool_allocator pool_allocator_multi_ni = {
149 	pool_multi_alloc_ni,
150 	pool_multi_free_ni
151 };
152 
153 #ifdef DDB
154 void	 pool_print_pagelist(struct pool_pagelist *, int (*)(const char *, ...)
155 	     __attribute__((__format__(__kprintf__,1,2))));
156 void	 pool_print1(struct pool *, const char *, int (*)(const char *, ...)
157 	     __attribute__((__format__(__kprintf__,1,2))));
158 #endif
159 
160 /* stale page garbage collectors */
161 void	pool_gc_sched(void *);
162 struct timeout pool_gc_tick = TIMEOUT_INITIALIZER(pool_gc_sched, NULL);
163 void	pool_gc_pages(void *);
164 struct task pool_gc_task = TASK_INITIALIZER(pool_gc_pages, NULL);
165 int pool_wait_free = 1;
166 int pool_wait_gc = 8;
167 
168 RBT_PROTOTYPE(phtree, pool_item_header, ph_node, phtree_compare);
169 
170 static inline int
171 phtree_compare(const struct pool_item_header *a,
172     const struct pool_item_header *b)
173 {
174 	vaddr_t va = (vaddr_t)a->ph_page;
175 	vaddr_t vb = (vaddr_t)b->ph_page;
176 
177 	/* the compares in this order are important for the NFIND to work */
178 	if (vb < va)
179 		return (-1);
180 	if (vb > va)
181 		return (1);
182 
183 	return (0);
184 }
185 
186 RBT_GENERATE(phtree, pool_item_header, ph_node, phtree_compare);
187 
188 /*
189  * Return the pool page header based on page address.
190  */
191 static inline struct pool_item_header *
192 pr_find_pagehead(struct pool *pp, void *v)
193 {
194 	struct pool_item_header *ph, key;
195 
196 	if (POOL_INPGHDR(pp)) {
197 		caddr_t page;
198 
199 		page = (caddr_t)((vaddr_t)v & pp->pr_pgmask);
200 
201 		return ((struct pool_item_header *)(page + pp->pr_phoffset));
202 	}
203 
204 	key.ph_page = v;
205 	ph = RBT_NFIND(phtree, &pp->pr_phtree, &key);
206 	if (ph == NULL)
207 		panic("%s: %s: page header missing", __func__, pp->pr_wchan);
208 
209 	KASSERT(ph->ph_page <= (caddr_t)v);
210 	if (ph->ph_page + pp->pr_pgsize <= (caddr_t)v)
211 		panic("%s: %s: incorrect page", __func__, pp->pr_wchan);
212 
213 	return (ph);
214 }
215 
216 /*
217  * Initialize the given pool resource structure.
218  *
219  * We export this routine to allow other kernel parts to declare
220  * static pools that must be initialized before malloc() is available.
221  */
222 void
223 pool_init(struct pool *pp, size_t size, u_int align, u_int ioff, int flags,
224     const char *wchan, struct pool_allocator *palloc)
225 {
226 	int off = 0, space;
227 	unsigned int pgsize = PAGE_SIZE, items;
228 #ifdef DIAGNOSTIC
229 	struct pool *iter;
230 	KASSERT(ioff == 0);
231 #endif
232 
233 	if (align == 0)
234 		align = ALIGN(1);
235 
236 	if (size < sizeof(struct pool_item))
237 		size = sizeof(struct pool_item);
238 
239 	size = roundup(size, align);
240 
241 	if (palloc == NULL) {
242 		while (size * 8 > pgsize)
243 			pgsize <<= 1;
244 
245 		if (pgsize > PAGE_SIZE) {
246 			palloc = ISSET(flags, PR_WAITOK) ?
247 			    &pool_allocator_multi_ni : &pool_allocator_multi;
248 		} else
249 			palloc = &pool_allocator_single;
250 	} else
251 		pgsize = palloc->pa_pagesz ? palloc->pa_pagesz : PAGE_SIZE;
252 
253 	items = pgsize / size;
254 
255 	/*
256 	 * Decide whether to put the page header off page to avoid
257 	 * wasting too large a part of the page. Off-page page headers
258 	 * go into an RB tree, so we can match a returned item with
259 	 * its header based on the page address.
260 	 */
261 	if (pgsize - (size * items) > sizeof(struct pool_item_header)) {
262 		off = pgsize - sizeof(struct pool_item_header);
263 	} else if (sizeof(struct pool_item_header) * 2 >= size) {
264 		off = pgsize - sizeof(struct pool_item_header);
265 		items = off / size;
266 	}
267 
268 	KASSERT(items > 0);
269 
270 	/*
271 	 * Initialize the pool structure.
272 	 */
273 	memset(pp, 0, sizeof(*pp));
274 	TAILQ_INIT(&pp->pr_emptypages);
275 	TAILQ_INIT(&pp->pr_fullpages);
276 	TAILQ_INIT(&pp->pr_partpages);
277 	pp->pr_curpage = NULL;
278 	pp->pr_npages = 0;
279 	pp->pr_minitems = 0;
280 	pp->pr_minpages = 0;
281 	pp->pr_maxpages = 8;
282 	pp->pr_size = size;
283 	pp->pr_pgsize = pgsize;
284 	pp->pr_pgmask = ~0UL ^ (pgsize - 1);
285 	pp->pr_phoffset = off;
286 	pp->pr_itemsperpage = items;
287 	pp->pr_wchan = wchan;
288 	pp->pr_alloc = palloc;
289 	pp->pr_nitems = 0;
290 	pp->pr_nout = 0;
291 	pp->pr_hardlimit = UINT_MAX;
292 	pp->pr_hardlimit_warning = NULL;
293 	pp->pr_hardlimit_ratecap.tv_sec = 0;
294 	pp->pr_hardlimit_ratecap.tv_usec = 0;
295 	pp->pr_hardlimit_warning_last.tv_sec = 0;
296 	pp->pr_hardlimit_warning_last.tv_usec = 0;
297 	RBT_INIT(phtree, &pp->pr_phtree);
298 
299 	/*
300 	 * Use the space between the chunks and the page header
301 	 * for cache coloring.
302 	 */
303 	space = POOL_INPGHDR(pp) ? pp->pr_phoffset : pp->pr_pgsize;
304 	space -= pp->pr_itemsperpage * pp->pr_size;
305 	pp->pr_align = align;
306 	pp->pr_maxcolors = (space / align) + 1;
307 
308 	pp->pr_nget = 0;
309 	pp->pr_nfail = 0;
310 	pp->pr_nput = 0;
311 	pp->pr_npagealloc = 0;
312 	pp->pr_npagefree = 0;
313 	pp->pr_hiwat = 0;
314 	pp->pr_nidle = 0;
315 
316 	pp->pr_ipl = -1;
317 	mtx_init(&pp->pr_mtx, IPL_NONE);
318 	mtx_init(&pp->pr_requests_mtx, IPL_NONE);
319 	TAILQ_INIT(&pp->pr_requests);
320 
321 	if (phpool.pr_size == 0) {
322 		pool_init(&phpool, sizeof(struct pool_item_header), 0, 0,
323 		    0, "phpool", NULL);
324 		pool_setipl(&phpool, IPL_HIGH);
325 
326 		/* make sure phpool wont "recurse" */
327 		KASSERT(POOL_INPGHDR(&phpool));
328 	}
329 
330 	/* pglistalloc/constraint parameters */
331 	pp->pr_crange = &kp_dirty;
332 
333 	/* Insert this into the list of all pools. */
334 	rw_enter_write(&pool_lock);
335 #ifdef DIAGNOSTIC
336 	SIMPLEQ_FOREACH(iter, &pool_head, pr_poollist) {
337 		if (iter == pp)
338 			panic("%s: pool %s already on list", __func__, wchan);
339 	}
340 #endif
341 
342 	pp->pr_serial = ++pool_serial;
343 	if (pool_serial == 0)
344 		panic("%s: too much uptime", __func__);
345 
346 	SIMPLEQ_INSERT_HEAD(&pool_head, pp, pr_poollist);
347 	pool_count++;
348 	rw_exit_write(&pool_lock);
349 }
350 
351 void
352 pool_setipl(struct pool *pp, int ipl)
353 {
354 	pp->pr_ipl = ipl;
355 	mtx_init(&pp->pr_mtx, ipl);
356 	mtx_init(&pp->pr_requests_mtx, ipl);
357 }
358 
359 /*
360  * Decommission a pool resource.
361  */
362 void
363 pool_destroy(struct pool *pp)
364 {
365 	struct pool_item_header *ph;
366 	struct pool *prev, *iter;
367 
368 #ifdef DIAGNOSTIC
369 	if (pp->pr_nout != 0)
370 		panic("%s: pool busy: still out: %u", __func__, pp->pr_nout);
371 #endif
372 
373 	/* Remove from global pool list */
374 	rw_enter_write(&pool_lock);
375 	pool_count--;
376 	if (pp == SIMPLEQ_FIRST(&pool_head))
377 		SIMPLEQ_REMOVE_HEAD(&pool_head, pr_poollist);
378 	else {
379 		prev = SIMPLEQ_FIRST(&pool_head);
380 		SIMPLEQ_FOREACH(iter, &pool_head, pr_poollist) {
381 			if (iter == pp) {
382 				SIMPLEQ_REMOVE_AFTER(&pool_head, prev,
383 				    pr_poollist);
384 				break;
385 			}
386 			prev = iter;
387 		}
388 	}
389 	rw_exit_write(&pool_lock);
390 
391 	/* Remove all pages */
392 	while ((ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL) {
393 		mtx_enter(&pp->pr_mtx);
394 		pool_p_remove(pp, ph);
395 		mtx_leave(&pp->pr_mtx);
396 		pool_p_free(pp, ph);
397 	}
398 	KASSERT(TAILQ_EMPTY(&pp->pr_fullpages));
399 	KASSERT(TAILQ_EMPTY(&pp->pr_partpages));
400 }
401 
402 void
403 pool_request_init(struct pool_request *pr,
404     void (*handler)(void *, void *), void *cookie)
405 {
406 	pr->pr_handler = handler;
407 	pr->pr_cookie = cookie;
408 	pr->pr_item = NULL;
409 }
410 
411 void
412 pool_request(struct pool *pp, struct pool_request *pr)
413 {
414 	mtx_enter(&pp->pr_requests_mtx);
415 	TAILQ_INSERT_TAIL(&pp->pr_requests, pr, pr_entry);
416 	pool_runqueue(pp, PR_NOWAIT);
417 	mtx_leave(&pp->pr_requests_mtx);
418 }
419 
420 struct pool_get_memory {
421 	struct mutex mtx;
422 	void * volatile v;
423 };
424 
425 /*
426  * Grab an item from the pool.
427  */
428 void *
429 pool_get(struct pool *pp, int flags)
430 {
431 	void *v = NULL;
432 	int slowdown = 0;
433 
434 	KASSERT(flags & (PR_WAITOK | PR_NOWAIT));
435 
436 
437 	mtx_enter(&pp->pr_mtx);
438 	if (pp->pr_nout >= pp->pr_hardlimit) {
439 		if (ISSET(flags, PR_NOWAIT|PR_LIMITFAIL))
440 			goto fail;
441 	} else if ((v = pool_do_get(pp, flags, &slowdown)) == NULL) {
442 		if (ISSET(flags, PR_NOWAIT))
443 			goto fail;
444 	}
445 	mtx_leave(&pp->pr_mtx);
446 
447 	if (slowdown && ISSET(flags, PR_WAITOK))
448 		yield();
449 
450 	if (v == NULL) {
451 		struct pool_get_memory mem = {
452 		    MUTEX_INITIALIZER((pp->pr_ipl == -1) ?
453 		    IPL_NONE : pp->pr_ipl), NULL };
454 		struct pool_request pr;
455 
456 		pool_request_init(&pr, pool_get_done, &mem);
457 		pool_request(pp, &pr);
458 
459 		mtx_enter(&mem.mtx);
460 		while (mem.v == NULL)
461 			msleep(&mem, &mem.mtx, PSWP, pp->pr_wchan, 0);
462 		mtx_leave(&mem.mtx);
463 
464 		v = mem.v;
465 	}
466 
467 	if (ISSET(flags, PR_ZERO))
468 		memset(v, 0, pp->pr_size);
469 
470 	return (v);
471 
472 fail:
473 	pp->pr_nfail++;
474 	mtx_leave(&pp->pr_mtx);
475 	return (NULL);
476 }
477 
478 void
479 pool_get_done(void *xmem, void *v)
480 {
481 	struct pool_get_memory *mem = xmem;
482 
483 	mtx_enter(&mem->mtx);
484 	mem->v = v;
485 	mtx_leave(&mem->mtx);
486 
487 	wakeup_one(mem);
488 }
489 
490 void
491 pool_runqueue(struct pool *pp, int flags)
492 {
493 	struct pool_requests prl = TAILQ_HEAD_INITIALIZER(prl);
494 	struct pool_request *pr;
495 
496 	MUTEX_ASSERT_UNLOCKED(&pp->pr_mtx);
497 	MUTEX_ASSERT_LOCKED(&pp->pr_requests_mtx);
498 
499 	if (pp->pr_requesting++)
500 		return;
501 
502 	do {
503 		pp->pr_requesting = 1;
504 
505 		/* no TAILQ_JOIN? :( */
506 		while ((pr = TAILQ_FIRST(&pp->pr_requests)) != NULL) {
507 			TAILQ_REMOVE(&pp->pr_requests, pr, pr_entry);
508 			TAILQ_INSERT_TAIL(&prl, pr, pr_entry);
509 		}
510 		if (TAILQ_EMPTY(&prl))
511 			continue;
512 
513 		mtx_leave(&pp->pr_requests_mtx);
514 
515 		mtx_enter(&pp->pr_mtx);
516 		pr = TAILQ_FIRST(&prl);
517 		while (pr != NULL) {
518 			int slowdown = 0;
519 
520 			if (pp->pr_nout >= pp->pr_hardlimit)
521 				break;
522 
523 			pr->pr_item = pool_do_get(pp, flags, &slowdown);
524 			if (pr->pr_item == NULL) /* || slowdown ? */
525 				break;
526 
527 			pr = TAILQ_NEXT(pr, pr_entry);
528 		}
529 		mtx_leave(&pp->pr_mtx);
530 
531 		while ((pr = TAILQ_FIRST(&prl)) != NULL &&
532 		    pr->pr_item != NULL) {
533 			TAILQ_REMOVE(&prl, pr, pr_entry);
534 			(*pr->pr_handler)(pr->pr_cookie, pr->pr_item);
535 		}
536 
537 		mtx_enter(&pp->pr_requests_mtx);
538 	} while (--pp->pr_requesting);
539 
540 	/* no TAILQ_JOIN :( */
541 	while ((pr = TAILQ_FIRST(&prl)) != NULL) {
542 		TAILQ_REMOVE(&prl, pr, pr_entry);
543 		TAILQ_INSERT_TAIL(&pp->pr_requests, pr, pr_entry);
544 	}
545 }
546 
547 void *
548 pool_do_get(struct pool *pp, int flags, int *slowdown)
549 {
550 	struct pool_item *pi;
551 	struct pool_item_header *ph;
552 
553 	MUTEX_ASSERT_LOCKED(&pp->pr_mtx);
554 
555 	if (pp->pr_ipl != -1)
556 		splassert(pp->pr_ipl);
557 
558 	/*
559 	 * Account for this item now to avoid races if we need to give up
560 	 * pr_mtx to allocate a page.
561 	 */
562 	pp->pr_nout++;
563 
564 	if (pp->pr_curpage == NULL) {
565 		mtx_leave(&pp->pr_mtx);
566 		ph = pool_p_alloc(pp, flags, slowdown);
567 		mtx_enter(&pp->pr_mtx);
568 
569 		if (ph == NULL) {
570 			pp->pr_nout--;
571 			return (NULL);
572 		}
573 
574 		pool_p_insert(pp, ph);
575 	}
576 
577 	ph = pp->pr_curpage;
578 	pi = XSIMPLEQ_FIRST(&ph->ph_itemlist);
579 	if (__predict_false(pi == NULL))
580 		panic("%s: %s: page empty", __func__, pp->pr_wchan);
581 
582 	if (__predict_false(pi->pi_magic != POOL_IMAGIC(ph, pi))) {
583 		panic("%s: %s free list modified: "
584 		    "page %p; item addr %p; offset 0x%x=0x%lx != 0x%lx",
585 		    __func__, pp->pr_wchan, ph->ph_page, pi,
586 		    0, pi->pi_magic, POOL_IMAGIC(ph, pi));
587 	}
588 
589 	XSIMPLEQ_REMOVE_HEAD(&ph->ph_itemlist, pi_list);
590 
591 #ifdef DIAGNOSTIC
592 	if (pool_debug && POOL_PHPOISON(ph)) {
593 		size_t pidx;
594 		uint32_t pval;
595 		if (poison_check(pi + 1, pp->pr_size - sizeof(*pi),
596 		    &pidx, &pval)) {
597 			int *ip = (int *)(pi + 1);
598 			panic("%s: %s free list modified: "
599 			    "page %p; item addr %p; offset 0x%zx=0x%x",
600 			    __func__, pp->pr_wchan, ph->ph_page, pi,
601 			    pidx * sizeof(int), ip[pidx]);
602 		}
603 	}
604 #endif /* DIAGNOSTIC */
605 
606 	if (ph->ph_nmissing++ == 0) {
607 		/*
608 		 * This page was previously empty.  Move it to the list of
609 		 * partially-full pages.  This page is already curpage.
610 		 */
611 		TAILQ_REMOVE(&pp->pr_emptypages, ph, ph_pagelist);
612 		TAILQ_INSERT_TAIL(&pp->pr_partpages, ph, ph_pagelist);
613 
614 		pp->pr_nidle--;
615 	}
616 
617 	if (ph->ph_nmissing == pp->pr_itemsperpage) {
618 		/*
619 		 * This page is now full.  Move it to the full list
620 		 * and select a new current page.
621 		 */
622 		TAILQ_REMOVE(&pp->pr_partpages, ph, ph_pagelist);
623 		TAILQ_INSERT_TAIL(&pp->pr_fullpages, ph, ph_pagelist);
624 		pool_update_curpage(pp);
625 	}
626 
627 	pp->pr_nget++;
628 
629 	return (pi);
630 }
631 
632 /*
633  * Return resource to the pool.
634  */
635 void
636 pool_put(struct pool *pp, void *v)
637 {
638 	struct pool_item *pi = v;
639 	struct pool_item_header *ph, *freeph = NULL;
640 
641 #ifdef DIAGNOSTIC
642 	if (v == NULL)
643 		panic("%s: NULL item", __func__);
644 #endif
645 
646 	mtx_enter(&pp->pr_mtx);
647 
648 	if (pp->pr_ipl != -1)
649 		splassert(pp->pr_ipl);
650 
651 	ph = pr_find_pagehead(pp, v);
652 
653 #ifdef DIAGNOSTIC
654 	if (pool_debug) {
655 		struct pool_item *qi;
656 		XSIMPLEQ_FOREACH(qi, &ph->ph_itemlist, pi_list) {
657 			if (pi == qi) {
658 				panic("%s: %s: double pool_put: %p", __func__,
659 				    pp->pr_wchan, pi);
660 			}
661 		}
662 	}
663 #endif /* DIAGNOSTIC */
664 
665 	pi->pi_magic = POOL_IMAGIC(ph, pi);
666 	XSIMPLEQ_INSERT_HEAD(&ph->ph_itemlist, pi, pi_list);
667 #ifdef DIAGNOSTIC
668 	if (POOL_PHPOISON(ph))
669 		poison_mem(pi + 1, pp->pr_size - sizeof(*pi));
670 #endif /* DIAGNOSTIC */
671 
672 	if (ph->ph_nmissing-- == pp->pr_itemsperpage) {
673 		/*
674 		 * The page was previously completely full, move it to the
675 		 * partially-full list.
676 		 */
677 		TAILQ_REMOVE(&pp->pr_fullpages, ph, ph_pagelist);
678 		TAILQ_INSERT_TAIL(&pp->pr_partpages, ph, ph_pagelist);
679 	}
680 
681 	if (ph->ph_nmissing == 0) {
682 		/*
683 		 * The page is now empty, so move it to the empty page list.
684 	 	 */
685 		pp->pr_nidle++;
686 
687 		ph->ph_tick = ticks;
688 		TAILQ_REMOVE(&pp->pr_partpages, ph, ph_pagelist);
689 		TAILQ_INSERT_TAIL(&pp->pr_emptypages, ph, ph_pagelist);
690 		pool_update_curpage(pp);
691 	}
692 
693 	pp->pr_nout--;
694 	pp->pr_nput++;
695 
696 	/* is it time to free a page? */
697 	if (pp->pr_nidle > pp->pr_maxpages &&
698 	    (ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL &&
699 	    (ticks - ph->ph_tick) > (hz * pool_wait_free)) {
700 		freeph = ph;
701 		pool_p_remove(pp, freeph);
702 	}
703 	mtx_leave(&pp->pr_mtx);
704 
705 	if (freeph != NULL)
706 		pool_p_free(pp, freeph);
707 
708 	if (!TAILQ_EMPTY(&pp->pr_requests)) {
709 		mtx_enter(&pp->pr_requests_mtx);
710 		pool_runqueue(pp, PR_NOWAIT);
711 		mtx_leave(&pp->pr_requests_mtx);
712 	}
713 }
714 
715 /*
716  * Add N items to the pool.
717  */
718 int
719 pool_prime(struct pool *pp, int n)
720 {
721 	struct pool_pagelist pl = TAILQ_HEAD_INITIALIZER(pl);
722 	struct pool_item_header *ph;
723 	int newpages;
724 
725 	newpages = roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage;
726 
727 	while (newpages-- > 0) {
728 		int slowdown = 0;
729 
730 		ph = pool_p_alloc(pp, PR_NOWAIT, &slowdown);
731 		if (ph == NULL) /* or slowdown? */
732 			break;
733 
734 		TAILQ_INSERT_TAIL(&pl, ph, ph_pagelist);
735 	}
736 
737 	mtx_enter(&pp->pr_mtx);
738 	while ((ph = TAILQ_FIRST(&pl)) != NULL) {
739 		TAILQ_REMOVE(&pl, ph, ph_pagelist);
740 		pool_p_insert(pp, ph);
741 	}
742 	mtx_leave(&pp->pr_mtx);
743 
744 	return (0);
745 }
746 
747 struct pool_item_header *
748 pool_p_alloc(struct pool *pp, int flags, int *slowdown)
749 {
750 	struct pool_item_header *ph;
751 	struct pool_item *pi;
752 	caddr_t addr;
753 	int n;
754 
755 	MUTEX_ASSERT_UNLOCKED(&pp->pr_mtx);
756 	KASSERT(pp->pr_size >= sizeof(*pi));
757 
758 	addr = pool_allocator_alloc(pp, flags, slowdown);
759 	if (addr == NULL)
760 		return (NULL);
761 
762 	if (POOL_INPGHDR(pp))
763 		ph = (struct pool_item_header *)(addr + pp->pr_phoffset);
764 	else {
765 		ph = pool_get(&phpool, flags);
766 		if (ph == NULL) {
767 			pool_allocator_free(pp, addr);
768 			return (NULL);
769 		}
770 	}
771 
772 	XSIMPLEQ_INIT(&ph->ph_itemlist);
773 	ph->ph_page = addr;
774 	addr += pp->pr_align * (pp->pr_npagealloc % pp->pr_maxcolors);
775 	ph->ph_colored = addr;
776 	ph->ph_nmissing = 0;
777 	arc4random_buf(&ph->ph_magic, sizeof(ph->ph_magic));
778 #ifdef DIAGNOSTIC
779 	/* use a bit in ph_magic to record if we poison page items */
780 	if (pool_debug)
781 		SET(ph->ph_magic, POOL_MAGICBIT);
782 	else
783 		CLR(ph->ph_magic, POOL_MAGICBIT);
784 #endif /* DIAGNOSTIC */
785 
786 	n = pp->pr_itemsperpage;
787 	while (n--) {
788 		pi = (struct pool_item *)addr;
789 		pi->pi_magic = POOL_IMAGIC(ph, pi);
790 		XSIMPLEQ_INSERT_TAIL(&ph->ph_itemlist, pi, pi_list);
791 
792 #ifdef DIAGNOSTIC
793 		if (POOL_PHPOISON(ph))
794 			poison_mem(pi + 1, pp->pr_size - sizeof(*pi));
795 #endif /* DIAGNOSTIC */
796 
797 		addr += pp->pr_size;
798 	}
799 
800 	return (ph);
801 }
802 
803 void
804 pool_p_free(struct pool *pp, struct pool_item_header *ph)
805 {
806 	struct pool_item *pi;
807 
808 	MUTEX_ASSERT_UNLOCKED(&pp->pr_mtx);
809 	KASSERT(ph->ph_nmissing == 0);
810 
811 	XSIMPLEQ_FOREACH(pi, &ph->ph_itemlist, pi_list) {
812 		if (__predict_false(pi->pi_magic != POOL_IMAGIC(ph, pi))) {
813 			panic("%s: %s free list modified: "
814 			    "page %p; item addr %p; offset 0x%x=0x%lx",
815 			    __func__, pp->pr_wchan, ph->ph_page, pi,
816 			    0, pi->pi_magic);
817 		}
818 
819 #ifdef DIAGNOSTIC
820 		if (POOL_PHPOISON(ph)) {
821 			size_t pidx;
822 			uint32_t pval;
823 			if (poison_check(pi + 1, pp->pr_size - sizeof(*pi),
824 			    &pidx, &pval)) {
825 				int *ip = (int *)(pi + 1);
826 				panic("%s: %s free list modified: "
827 				    "page %p; item addr %p; offset 0x%zx=0x%x",
828 				    __func__, pp->pr_wchan, ph->ph_page, pi,
829 				    pidx * sizeof(int), ip[pidx]);
830 			}
831 		}
832 #endif
833 	}
834 
835 	pool_allocator_free(pp, ph->ph_page);
836 
837 	if (!POOL_INPGHDR(pp))
838 		pool_put(&phpool, ph);
839 }
840 
841 void
842 pool_p_insert(struct pool *pp, struct pool_item_header *ph)
843 {
844 	MUTEX_ASSERT_LOCKED(&pp->pr_mtx);
845 
846 	/* If the pool was depleted, point at the new page */
847 	if (pp->pr_curpage == NULL)
848 		pp->pr_curpage = ph;
849 
850 	TAILQ_INSERT_TAIL(&pp->pr_emptypages, ph, ph_pagelist);
851 	if (!POOL_INPGHDR(pp))
852 		RBT_INSERT(phtree, &pp->pr_phtree, ph);
853 
854 	pp->pr_nitems += pp->pr_itemsperpage;
855 	pp->pr_nidle++;
856 
857 	pp->pr_npagealloc++;
858 	if (++pp->pr_npages > pp->pr_hiwat)
859 		pp->pr_hiwat = pp->pr_npages;
860 }
861 
862 void
863 pool_p_remove(struct pool *pp, struct pool_item_header *ph)
864 {
865 	MUTEX_ASSERT_LOCKED(&pp->pr_mtx);
866 
867 	pp->pr_npagefree++;
868 	pp->pr_npages--;
869 	pp->pr_nidle--;
870 	pp->pr_nitems -= pp->pr_itemsperpage;
871 
872 	if (!POOL_INPGHDR(pp))
873 		RBT_REMOVE(phtree, &pp->pr_phtree, ph);
874 	TAILQ_REMOVE(&pp->pr_emptypages, ph, ph_pagelist);
875 
876 	pool_update_curpage(pp);
877 }
878 
879 void
880 pool_update_curpage(struct pool *pp)
881 {
882 	pp->pr_curpage = TAILQ_LAST(&pp->pr_partpages, pool_pagelist);
883 	if (pp->pr_curpage == NULL) {
884 		pp->pr_curpage = TAILQ_LAST(&pp->pr_emptypages, pool_pagelist);
885 	}
886 }
887 
888 void
889 pool_setlowat(struct pool *pp, int n)
890 {
891 	int prime = 0;
892 
893 	mtx_enter(&pp->pr_mtx);
894 	pp->pr_minitems = n;
895 	pp->pr_minpages = (n == 0)
896 		? 0
897 		: roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage;
898 
899 	if (pp->pr_nitems < n)
900 		prime = n - pp->pr_nitems;
901 	mtx_leave(&pp->pr_mtx);
902 
903 	if (prime > 0)
904 		pool_prime(pp, prime);
905 }
906 
907 void
908 pool_sethiwat(struct pool *pp, int n)
909 {
910 	pp->pr_maxpages = (n == 0)
911 		? 0
912 		: roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage;
913 }
914 
915 int
916 pool_sethardlimit(struct pool *pp, u_int n, const char *warnmsg, int ratecap)
917 {
918 	int error = 0;
919 
920 	if (n < pp->pr_nout) {
921 		error = EINVAL;
922 		goto done;
923 	}
924 
925 	pp->pr_hardlimit = n;
926 	pp->pr_hardlimit_warning = warnmsg;
927 	pp->pr_hardlimit_ratecap.tv_sec = ratecap;
928 	pp->pr_hardlimit_warning_last.tv_sec = 0;
929 	pp->pr_hardlimit_warning_last.tv_usec = 0;
930 
931 done:
932 	return (error);
933 }
934 
935 void
936 pool_set_constraints(struct pool *pp, const struct kmem_pa_mode *mode)
937 {
938 	pp->pr_crange = mode;
939 }
940 
941 /*
942  * Release all complete pages that have not been used recently.
943  *
944  * Returns non-zero if any pages have been reclaimed.
945  */
946 int
947 pool_reclaim(struct pool *pp)
948 {
949 	struct pool_item_header *ph, *phnext;
950 	struct pool_pagelist pl = TAILQ_HEAD_INITIALIZER(pl);
951 
952 	mtx_enter(&pp->pr_mtx);
953 	for (ph = TAILQ_FIRST(&pp->pr_emptypages); ph != NULL; ph = phnext) {
954 		phnext = TAILQ_NEXT(ph, ph_pagelist);
955 
956 		/* Check our minimum page claim */
957 		if (pp->pr_npages <= pp->pr_minpages)
958 			break;
959 
960 		/*
961 		 * If freeing this page would put us below
962 		 * the low water mark, stop now.
963 		 */
964 		if ((pp->pr_nitems - pp->pr_itemsperpage) <
965 		    pp->pr_minitems)
966 			break;
967 
968 		pool_p_remove(pp, ph);
969 		TAILQ_INSERT_TAIL(&pl, ph, ph_pagelist);
970 	}
971 	mtx_leave(&pp->pr_mtx);
972 
973 	if (TAILQ_EMPTY(&pl))
974 		return (0);
975 
976 	while ((ph = TAILQ_FIRST(&pl)) != NULL) {
977 		TAILQ_REMOVE(&pl, ph, ph_pagelist);
978 		pool_p_free(pp, ph);
979 	}
980 
981 	return (1);
982 }
983 
984 /*
985  * Release all complete pages that have not been used recently
986  * from all pools.
987  */
988 void
989 pool_reclaim_all(void)
990 {
991 	struct pool	*pp;
992 
993 	rw_enter_read(&pool_lock);
994 	SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist)
995 		pool_reclaim(pp);
996 	rw_exit_read(&pool_lock);
997 }
998 
999 #ifdef DDB
1000 #include <machine/db_machdep.h>
1001 #include <ddb/db_output.h>
1002 
1003 /*
1004  * Diagnostic helpers.
1005  */
1006 void
1007 pool_printit(struct pool *pp, const char *modif,
1008     int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2))))
1009 {
1010 	pool_print1(pp, modif, pr);
1011 }
1012 
1013 void
1014 pool_print_pagelist(struct pool_pagelist *pl,
1015     int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2))))
1016 {
1017 	struct pool_item_header *ph;
1018 	struct pool_item *pi;
1019 
1020 	TAILQ_FOREACH(ph, pl, ph_pagelist) {
1021 		(*pr)("\t\tpage %p, color %p, nmissing %d\n",
1022 		    ph->ph_page, ph->ph_colored, ph->ph_nmissing);
1023 		XSIMPLEQ_FOREACH(pi, &ph->ph_itemlist, pi_list) {
1024 			if (pi->pi_magic != POOL_IMAGIC(ph, pi)) {
1025 				(*pr)("\t\t\titem %p, magic 0x%lx\n",
1026 				    pi, pi->pi_magic);
1027 			}
1028 		}
1029 	}
1030 }
1031 
1032 void
1033 pool_print1(struct pool *pp, const char *modif,
1034     int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2))))
1035 {
1036 	struct pool_item_header *ph;
1037 	int print_pagelist = 0;
1038 	char c;
1039 
1040 	while ((c = *modif++) != '\0') {
1041 		if (c == 'p')
1042 			print_pagelist = 1;
1043 		modif++;
1044 	}
1045 
1046 	(*pr)("POOL %s: size %u maxcolors %u\n", pp->pr_wchan, pp->pr_size,
1047 	    pp->pr_maxcolors);
1048 	(*pr)("\talloc %p\n", pp->pr_alloc);
1049 	(*pr)("\tminitems %u, minpages %u, maxpages %u, npages %u\n",
1050 	    pp->pr_minitems, pp->pr_minpages, pp->pr_maxpages, pp->pr_npages);
1051 	(*pr)("\titemsperpage %u, nitems %u, nout %u, hardlimit %u\n",
1052 	    pp->pr_itemsperpage, pp->pr_nitems, pp->pr_nout, pp->pr_hardlimit);
1053 
1054 	(*pr)("\n\tnget %lu, nfail %lu, nput %lu\n",
1055 	    pp->pr_nget, pp->pr_nfail, pp->pr_nput);
1056 	(*pr)("\tnpagealloc %lu, npagefree %lu, hiwat %u, nidle %lu\n",
1057 	    pp->pr_npagealloc, pp->pr_npagefree, pp->pr_hiwat, pp->pr_nidle);
1058 
1059 	if (print_pagelist == 0)
1060 		return;
1061 
1062 	if ((ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL)
1063 		(*pr)("\n\tempty page list:\n");
1064 	pool_print_pagelist(&pp->pr_emptypages, pr);
1065 	if ((ph = TAILQ_FIRST(&pp->pr_fullpages)) != NULL)
1066 		(*pr)("\n\tfull page list:\n");
1067 	pool_print_pagelist(&pp->pr_fullpages, pr);
1068 	if ((ph = TAILQ_FIRST(&pp->pr_partpages)) != NULL)
1069 		(*pr)("\n\tpartial-page list:\n");
1070 	pool_print_pagelist(&pp->pr_partpages, pr);
1071 
1072 	if (pp->pr_curpage == NULL)
1073 		(*pr)("\tno current page\n");
1074 	else
1075 		(*pr)("\tcurpage %p\n", pp->pr_curpage->ph_page);
1076 }
1077 
1078 void
1079 db_show_all_pools(db_expr_t expr, int haddr, db_expr_t count, char *modif)
1080 {
1081 	struct pool *pp;
1082 	char maxp[16];
1083 	int ovflw;
1084 	char mode;
1085 
1086 	mode = modif[0];
1087 	if (mode != '\0' && mode != 'a') {
1088 		db_printf("usage: show all pools [/a]\n");
1089 		return;
1090 	}
1091 
1092 	if (mode == '\0')
1093 		db_printf("%-10s%4s%9s%5s%9s%6s%6s%6s%6s%6s%6s%5s\n",
1094 		    "Name",
1095 		    "Size",
1096 		    "Requests",
1097 		    "Fail",
1098 		    "Releases",
1099 		    "Pgreq",
1100 		    "Pgrel",
1101 		    "Npage",
1102 		    "Hiwat",
1103 		    "Minpg",
1104 		    "Maxpg",
1105 		    "Idle");
1106 	else
1107 		db_printf("%-12s %18s %18s\n",
1108 		    "Name", "Address", "Allocator");
1109 
1110 	SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist) {
1111 		if (mode == 'a') {
1112 			db_printf("%-12s %18p %18p\n", pp->pr_wchan, pp,
1113 			    pp->pr_alloc);
1114 			continue;
1115 		}
1116 
1117 		if (!pp->pr_nget)
1118 			continue;
1119 
1120 		if (pp->pr_maxpages == UINT_MAX)
1121 			snprintf(maxp, sizeof maxp, "inf");
1122 		else
1123 			snprintf(maxp, sizeof maxp, "%u", pp->pr_maxpages);
1124 
1125 #define PRWORD(ovflw, fmt, width, fixed, val) do {	\
1126 	(ovflw) += db_printf((fmt),			\
1127 	    (width) - (fixed) - (ovflw) > 0 ?		\
1128 	    (width) - (fixed) - (ovflw) : 0,		\
1129 	    (val)) - (width);				\
1130 	if ((ovflw) < 0)				\
1131 		(ovflw) = 0;				\
1132 } while (/* CONSTCOND */0)
1133 
1134 		ovflw = 0;
1135 		PRWORD(ovflw, "%-*s", 10, 0, pp->pr_wchan);
1136 		PRWORD(ovflw, " %*u", 4, 1, pp->pr_size);
1137 		PRWORD(ovflw, " %*lu", 9, 1, pp->pr_nget);
1138 		PRWORD(ovflw, " %*lu", 5, 1, pp->pr_nfail);
1139 		PRWORD(ovflw, " %*lu", 9, 1, pp->pr_nput);
1140 		PRWORD(ovflw, " %*lu", 6, 1, pp->pr_npagealloc);
1141 		PRWORD(ovflw, " %*lu", 6, 1, pp->pr_npagefree);
1142 		PRWORD(ovflw, " %*d", 6, 1, pp->pr_npages);
1143 		PRWORD(ovflw, " %*d", 6, 1, pp->pr_hiwat);
1144 		PRWORD(ovflw, " %*d", 6, 1, pp->pr_minpages);
1145 		PRWORD(ovflw, " %*s", 6, 1, maxp);
1146 		PRWORD(ovflw, " %*lu\n", 5, 1, pp->pr_nidle);
1147 
1148 		pool_chk(pp);
1149 	}
1150 }
1151 #endif /* DDB */
1152 
1153 #if defined(POOL_DEBUG) || defined(DDB)
1154 int
1155 pool_chk_page(struct pool *pp, struct pool_item_header *ph, int expected)
1156 {
1157 	struct pool_item *pi;
1158 	caddr_t page;
1159 	int n;
1160 	const char *label = pp->pr_wchan;
1161 
1162 	page = (caddr_t)((u_long)ph & pp->pr_pgmask);
1163 	if (page != ph->ph_page && POOL_INPGHDR(pp)) {
1164 		printf("%s: ", label);
1165 		printf("pool(%p:%s): page inconsistency: page %p; "
1166 		    "at page head addr %p (p %p)\n",
1167 		    pp, pp->pr_wchan, ph->ph_page, ph, page);
1168 		return 1;
1169 	}
1170 
1171 	for (pi = XSIMPLEQ_FIRST(&ph->ph_itemlist), n = 0;
1172 	     pi != NULL;
1173 	     pi = XSIMPLEQ_NEXT(&ph->ph_itemlist, pi, pi_list), n++) {
1174 		if ((caddr_t)pi < ph->ph_page ||
1175 		    (caddr_t)pi >= ph->ph_page + pp->pr_pgsize) {
1176 			printf("%s: ", label);
1177 			printf("pool(%p:%s): page inconsistency: page %p;"
1178 			    " item ordinal %d; addr %p\n", pp,
1179 			    pp->pr_wchan, ph->ph_page, n, pi);
1180 			return (1);
1181 		}
1182 
1183 		if (pi->pi_magic != POOL_IMAGIC(ph, pi)) {
1184 			printf("%s: ", label);
1185 			printf("pool(%p:%s): free list modified: "
1186 			    "page %p; item ordinal %d; addr %p "
1187 			    "(p %p); offset 0x%x=0x%lx\n",
1188 			    pp, pp->pr_wchan, ph->ph_page, n, pi, page,
1189 			    0, pi->pi_magic);
1190 		}
1191 
1192 #ifdef DIAGNOSTIC
1193 		if (POOL_PHPOISON(ph)) {
1194 			size_t pidx;
1195 			uint32_t pval;
1196 			if (poison_check(pi + 1, pp->pr_size - sizeof(*pi),
1197 			    &pidx, &pval)) {
1198 				int *ip = (int *)(pi + 1);
1199 				printf("pool(%s): free list modified: "
1200 				    "page %p; item ordinal %d; addr %p "
1201 				    "(p %p); offset 0x%zx=0x%x\n",
1202 				    pp->pr_wchan, ph->ph_page, n, pi,
1203 				    page, pidx * sizeof(int), ip[pidx]);
1204 			}
1205 		}
1206 #endif /* DIAGNOSTIC */
1207 	}
1208 	if (n + ph->ph_nmissing != pp->pr_itemsperpage) {
1209 		printf("pool(%p:%s): page inconsistency: page %p;"
1210 		    " %d on list, %d missing, %d items per page\n", pp,
1211 		    pp->pr_wchan, ph->ph_page, n, ph->ph_nmissing,
1212 		    pp->pr_itemsperpage);
1213 		return 1;
1214 	}
1215 	if (expected >= 0 && n != expected) {
1216 		printf("pool(%p:%s): page inconsistency: page %p;"
1217 		    " %d on list, %d missing, %d expected\n", pp,
1218 		    pp->pr_wchan, ph->ph_page, n, ph->ph_nmissing,
1219 		    expected);
1220 		return 1;
1221 	}
1222 	return 0;
1223 }
1224 
1225 int
1226 pool_chk(struct pool *pp)
1227 {
1228 	struct pool_item_header *ph;
1229 	int r = 0;
1230 
1231 	TAILQ_FOREACH(ph, &pp->pr_emptypages, ph_pagelist)
1232 		r += pool_chk_page(pp, ph, pp->pr_itemsperpage);
1233 	TAILQ_FOREACH(ph, &pp->pr_fullpages, ph_pagelist)
1234 		r += pool_chk_page(pp, ph, 0);
1235 	TAILQ_FOREACH(ph, &pp->pr_partpages, ph_pagelist)
1236 		r += pool_chk_page(pp, ph, -1);
1237 
1238 	return (r);
1239 }
1240 #endif /* defined(POOL_DEBUG) || defined(DDB) */
1241 
1242 #ifdef DDB
1243 void
1244 pool_walk(struct pool *pp, int full,
1245     int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2))),
1246     void (*func)(void *, int, int (*)(const char *, ...)
1247 	    __attribute__((__format__(__kprintf__,1,2)))))
1248 {
1249 	struct pool_item_header *ph;
1250 	struct pool_item *pi;
1251 	caddr_t cp;
1252 	int n;
1253 
1254 	TAILQ_FOREACH(ph, &pp->pr_fullpages, ph_pagelist) {
1255 		cp = ph->ph_colored;
1256 		n = ph->ph_nmissing;
1257 
1258 		while (n--) {
1259 			func(cp, full, pr);
1260 			cp += pp->pr_size;
1261 		}
1262 	}
1263 
1264 	TAILQ_FOREACH(ph, &pp->pr_partpages, ph_pagelist) {
1265 		cp = ph->ph_colored;
1266 		n = ph->ph_nmissing;
1267 
1268 		do {
1269 			XSIMPLEQ_FOREACH(pi, &ph->ph_itemlist, pi_list) {
1270 				if (cp == (caddr_t)pi)
1271 					break;
1272 			}
1273 			if (cp != (caddr_t)pi) {
1274 				func(cp, full, pr);
1275 				n--;
1276 			}
1277 
1278 			cp += pp->pr_size;
1279 		} while (n > 0);
1280 	}
1281 }
1282 #endif
1283 
1284 /*
1285  * We have three different sysctls.
1286  * kern.pool.npools - the number of pools.
1287  * kern.pool.pool.<pool#> - the pool struct for the pool#.
1288  * kern.pool.name.<pool#> - the name for pool#.
1289  */
1290 int
1291 sysctl_dopool(int *name, u_int namelen, char *oldp, size_t *oldlenp)
1292 {
1293 	struct kinfo_pool pi;
1294 	struct pool *pp;
1295 	int rv = ENOENT;
1296 
1297 	switch (name[0]) {
1298 	case KERN_POOL_NPOOLS:
1299 		if (namelen != 1)
1300 			return (ENOTDIR);
1301 		return (sysctl_rdint(oldp, oldlenp, NULL, pool_count));
1302 
1303 	case KERN_POOL_NAME:
1304 	case KERN_POOL_POOL:
1305 		break;
1306 	default:
1307 		return (EOPNOTSUPP);
1308 	}
1309 
1310 	if (namelen != 2)
1311 		return (ENOTDIR);
1312 
1313 	rw_enter_read(&pool_lock);
1314 
1315 	SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist) {
1316 		if (name[1] == pp->pr_serial)
1317 			break;
1318 	}
1319 
1320 	if (pp == NULL)
1321 		goto done;
1322 
1323 	switch (name[0]) {
1324 	case KERN_POOL_NAME:
1325 		rv = sysctl_rdstring(oldp, oldlenp, NULL, pp->pr_wchan);
1326 		break;
1327 	case KERN_POOL_POOL:
1328 		memset(&pi, 0, sizeof(pi));
1329 
1330 		if (pp->pr_ipl != -1)
1331 			mtx_enter(&pp->pr_mtx);
1332 		pi.pr_size = pp->pr_size;
1333 		pi.pr_pgsize = pp->pr_pgsize;
1334 		pi.pr_itemsperpage = pp->pr_itemsperpage;
1335 		pi.pr_npages = pp->pr_npages;
1336 		pi.pr_minpages = pp->pr_minpages;
1337 		pi.pr_maxpages = pp->pr_maxpages;
1338 		pi.pr_hardlimit = pp->pr_hardlimit;
1339 		pi.pr_nout = pp->pr_nout;
1340 		pi.pr_nitems = pp->pr_nitems;
1341 		pi.pr_nget = pp->pr_nget;
1342 		pi.pr_nput = pp->pr_nput;
1343 		pi.pr_nfail = pp->pr_nfail;
1344 		pi.pr_npagealloc = pp->pr_npagealloc;
1345 		pi.pr_npagefree = pp->pr_npagefree;
1346 		pi.pr_hiwat = pp->pr_hiwat;
1347 		pi.pr_nidle = pp->pr_nidle;
1348 		if (pp->pr_ipl != -1)
1349 			mtx_leave(&pp->pr_mtx);
1350 
1351 		rv = sysctl_rdstruct(oldp, oldlenp, NULL, &pi, sizeof(pi));
1352 		break;
1353 	}
1354 
1355 done:
1356 	rw_exit_read(&pool_lock);
1357 
1358 	return (rv);
1359 }
1360 
1361 void
1362 pool_gc_sched(void *null)
1363 {
1364 	task_add(systqmp, &pool_gc_task);
1365 }
1366 
1367 void
1368 pool_gc_pages(void *null)
1369 {
1370 	struct pool *pp;
1371 	struct pool_item_header *ph, *freeph;
1372 	int s;
1373 
1374 	rw_enter_read(&pool_lock);
1375 	s = splvm(); /* XXX go to splvm until all pools _setipl properly */
1376 	SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist) {
1377 		if (pp->pr_nidle <= pp->pr_minpages || /* guess */
1378 		    !mtx_enter_try(&pp->pr_mtx)) /* try */
1379 			continue;
1380 
1381 		/* is it time to free a page? */
1382 		if (pp->pr_nidle > pp->pr_minpages &&
1383 		    (ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL &&
1384 		    (ticks - ph->ph_tick) > (hz * pool_wait_gc)) {
1385 			freeph = ph;
1386 			pool_p_remove(pp, freeph);
1387 		} else
1388 			freeph = NULL;
1389 
1390 		mtx_leave(&pp->pr_mtx);
1391 
1392 		if (freeph != NULL)
1393 			pool_p_free(pp, freeph);
1394 	}
1395 	splx(s);
1396 	rw_exit_read(&pool_lock);
1397 
1398 	timeout_add_sec(&pool_gc_tick, 1);
1399 }
1400 
1401 /*
1402  * Pool backend allocators.
1403  */
1404 
1405 void *
1406 pool_allocator_alloc(struct pool *pp, int flags, int *slowdown)
1407 {
1408 	void *v;
1409 
1410 	v = (*pp->pr_alloc->pa_alloc)(pp, flags, slowdown);
1411 
1412 #ifdef DIAGNOSTIC
1413 	if (v != NULL && POOL_INPGHDR(pp)) {
1414 		vaddr_t addr = (vaddr_t)v;
1415 		if ((addr & pp->pr_pgmask) != addr) {
1416 			panic("%s: %s page address %p isnt aligned to %u",
1417 			    __func__, pp->pr_wchan, v, pp->pr_pgsize);
1418 		}
1419 	}
1420 #endif
1421 
1422 	return (v);
1423 }
1424 
1425 void
1426 pool_allocator_free(struct pool *pp, void *v)
1427 {
1428 	struct pool_allocator *pa = pp->pr_alloc;
1429 
1430 	(*pa->pa_free)(pp, v);
1431 }
1432 
1433 void *
1434 pool_page_alloc(struct pool *pp, int flags, int *slowdown)
1435 {
1436 	struct kmem_dyn_mode kd = KMEM_DYN_INITIALIZER;
1437 
1438 	kd.kd_waitok = ISSET(flags, PR_WAITOK);
1439 	kd.kd_slowdown = slowdown;
1440 
1441 	return (km_alloc(pp->pr_pgsize, &kv_page, pp->pr_crange, &kd));
1442 }
1443 
1444 void
1445 pool_page_free(struct pool *pp, void *v)
1446 {
1447 	km_free(v, pp->pr_pgsize, &kv_page, pp->pr_crange);
1448 }
1449 
1450 void *
1451 pool_multi_alloc(struct pool *pp, int flags, int *slowdown)
1452 {
1453 	struct kmem_va_mode kv = kv_intrsafe;
1454 	struct kmem_dyn_mode kd = KMEM_DYN_INITIALIZER;
1455 	void *v;
1456 	int s;
1457 
1458 	if (POOL_INPGHDR(pp))
1459 		kv.kv_align = pp->pr_pgsize;
1460 
1461 	kd.kd_waitok = ISSET(flags, PR_WAITOK);
1462 	kd.kd_slowdown = slowdown;
1463 
1464 	s = splvm();
1465 	v = km_alloc(pp->pr_pgsize, &kv, pp->pr_crange, &kd);
1466 	splx(s);
1467 
1468 	return (v);
1469 }
1470 
1471 void
1472 pool_multi_free(struct pool *pp, void *v)
1473 {
1474 	struct kmem_va_mode kv = kv_intrsafe;
1475 	int s;
1476 
1477 	if (POOL_INPGHDR(pp))
1478 		kv.kv_align = pp->pr_pgsize;
1479 
1480 	s = splvm();
1481 	km_free(v, pp->pr_pgsize, &kv, pp->pr_crange);
1482 	splx(s);
1483 }
1484 
1485 void *
1486 pool_multi_alloc_ni(struct pool *pp, int flags, int *slowdown)
1487 {
1488 	struct kmem_va_mode kv = kv_any;
1489 	struct kmem_dyn_mode kd = KMEM_DYN_INITIALIZER;
1490 	void *v;
1491 
1492 	if (POOL_INPGHDR(pp))
1493 		kv.kv_align = pp->pr_pgsize;
1494 
1495 	kd.kd_waitok = ISSET(flags, PR_WAITOK);
1496 	kd.kd_slowdown = slowdown;
1497 
1498 	KERNEL_LOCK();
1499 	v = km_alloc(pp->pr_pgsize, &kv, pp->pr_crange, &kd);
1500 	KERNEL_UNLOCK();
1501 
1502 	return (v);
1503 }
1504 
1505 void
1506 pool_multi_free_ni(struct pool *pp, void *v)
1507 {
1508 	struct kmem_va_mode kv = kv_any;
1509 
1510 	if (POOL_INPGHDR(pp))
1511 		kv.kv_align = pp->pr_pgsize;
1512 
1513 	KERNEL_LOCK();
1514 	km_free(v, pp->pr_pgsize, &kv, pp->pr_crange);
1515 	KERNEL_UNLOCK();
1516 }
1517