xref: /openbsd-src/sys/kern/subr_pool.c (revision 0b7734b3d77bb9b21afec6f4621cae6c805dbd45)
1 /*	$OpenBSD: subr_pool.c,v 1.194 2016/01/15 11:21:58 dlg Exp $	*/
2 /*	$NetBSD: subr_pool.c,v 1.61 2001/09/26 07:14:56 chs Exp $	*/
3 
4 /*-
5  * Copyright (c) 1997, 1999, 2000 The NetBSD Foundation, Inc.
6  * All rights reserved.
7  *
8  * This code is derived from software contributed to The NetBSD Foundation
9  * by Paul Kranenburg; by Jason R. Thorpe of the Numerical Aerospace
10  * Simulation Facility, NASA Ames Research Center.
11  *
12  * Redistribution and use in source and binary forms, with or without
13  * modification, are permitted provided that the following conditions
14  * are met:
15  * 1. Redistributions of source code must retain the above copyright
16  *    notice, this list of conditions and the following disclaimer.
17  * 2. Redistributions in binary form must reproduce the above copyright
18  *    notice, this list of conditions and the following disclaimer in the
19  *    documentation and/or other materials provided with the distribution.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
23  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
24  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
25  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31  * POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/errno.h>
37 #include <sys/kernel.h>
38 #include <sys/malloc.h>
39 #include <sys/pool.h>
40 #include <sys/syslog.h>
41 #include <sys/rwlock.h>
42 #include <sys/sysctl.h>
43 #include <sys/task.h>
44 #include <sys/timeout.h>
45 
46 #include <uvm/uvm_extern.h>
47 
48 /*
49  * Pool resource management utility.
50  *
51  * Memory is allocated in pages which are split into pieces according to
52  * the pool item size. Each page is kept on one of three lists in the
53  * pool structure: `pr_emptypages', `pr_fullpages' and `pr_partpages',
54  * for empty, full and partially-full pages respectively. The individual
55  * pool items are on a linked list headed by `ph_itemlist' in each page
56  * header. The memory for building the page list is either taken from
57  * the allocated pages themselves (for small pool items) or taken from
58  * an internal pool of page headers (`phpool').
59  */
60 
61 /* List of all pools */
62 SIMPLEQ_HEAD(,pool) pool_head = SIMPLEQ_HEAD_INITIALIZER(pool_head);
63 
64 /*
65  * Every pool gets a unique serial number assigned to it. If this counter
66  * wraps, we're screwed, but we shouldn't create so many pools anyway.
67  */
68 unsigned int pool_serial;
69 unsigned int pool_count;
70 
71 /* Lock the previous variables making up the global pool state */
72 struct rwlock pool_lock = RWLOCK_INITIALIZER("pools");
73 
74 /* Private pool for page header structures */
75 struct pool phpool;
76 
77 struct pool_item_header {
78 	/* Page headers */
79 	TAILQ_ENTRY(pool_item_header)
80 				ph_pagelist;	/* pool page list */
81 	XSIMPLEQ_HEAD(,pool_item) ph_itemlist;	/* chunk list for this page */
82 	RB_ENTRY(pool_item_header)
83 				ph_node;	/* Off-page page headers */
84 	int			ph_nmissing;	/* # of chunks in use */
85 	caddr_t			ph_page;	/* this page's address */
86 	caddr_t			ph_colored;	/* page's colored address */
87 	u_long			ph_magic;
88 	int			ph_tick;
89 };
90 #define POOL_MAGICBIT (1 << 3) /* keep away from perturbed low bits */
91 #define POOL_PHPOISON(ph) ISSET((ph)->ph_magic, POOL_MAGICBIT)
92 
93 struct pool_item {
94 	u_long				pi_magic;
95 	XSIMPLEQ_ENTRY(pool_item)	pi_list;
96 };
97 #define POOL_IMAGIC(ph, pi) ((u_long)(pi) ^ (ph)->ph_magic)
98 
99 #ifdef POOL_DEBUG
100 int	pool_debug = 1;
101 #else
102 int	pool_debug = 0;
103 #endif
104 
105 #define POOL_INPGHDR(pp) ((pp)->pr_phoffset != 0)
106 
107 struct pool_item_header *
108 	 pool_p_alloc(struct pool *, int, int *);
109 void	 pool_p_insert(struct pool *, struct pool_item_header *);
110 void	 pool_p_remove(struct pool *, struct pool_item_header *);
111 void	 pool_p_free(struct pool *, struct pool_item_header *);
112 
113 void	 pool_update_curpage(struct pool *);
114 void	*pool_do_get(struct pool *, int, int *);
115 int	 pool_chk_page(struct pool *, struct pool_item_header *, int);
116 int	 pool_chk(struct pool *);
117 void	 pool_get_done(void *, void *);
118 void	 pool_runqueue(struct pool *, int);
119 
120 void	*pool_allocator_alloc(struct pool *, int, int *);
121 void	 pool_allocator_free(struct pool *, void *);
122 
123 /*
124  * The default pool allocator.
125  */
126 void	*pool_page_alloc(struct pool *, int, int *);
127 void	pool_page_free(struct pool *, void *);
128 
129 /*
130  * safe for interrupts; this is the default allocator
131  */
132 struct pool_allocator pool_allocator_single = {
133 	pool_page_alloc,
134 	pool_page_free
135 };
136 
137 void	*pool_multi_alloc(struct pool *, int, int *);
138 void	pool_multi_free(struct pool *, void *);
139 
140 struct pool_allocator pool_allocator_multi = {
141 	pool_multi_alloc,
142 	pool_multi_free
143 };
144 
145 void	*pool_multi_alloc_ni(struct pool *, int, int *);
146 void	pool_multi_free_ni(struct pool *, void *);
147 
148 struct pool_allocator pool_allocator_multi_ni = {
149 	pool_multi_alloc_ni,
150 	pool_multi_free_ni
151 };
152 
153 #ifdef DDB
154 void	 pool_print_pagelist(struct pool_pagelist *, int (*)(const char *, ...)
155 	     __attribute__((__format__(__kprintf__,1,2))));
156 void	 pool_print1(struct pool *, const char *, int (*)(const char *, ...)
157 	     __attribute__((__format__(__kprintf__,1,2))));
158 #endif
159 
160 /* stale page garbage collectors */
161 void	pool_gc_sched(void *);
162 struct timeout pool_gc_tick = TIMEOUT_INITIALIZER(pool_gc_sched, NULL);
163 void	pool_gc_pages(void *);
164 struct task pool_gc_task = TASK_INITIALIZER(pool_gc_pages, NULL);
165 int pool_wait_free = 1;
166 int pool_wait_gc = 8;
167 
168 static inline int
169 phtree_compare(struct pool_item_header *a, struct pool_item_header *b)
170 {
171 	vaddr_t va = (vaddr_t)a->ph_page;
172 	vaddr_t vb = (vaddr_t)b->ph_page;
173 
174 	/* the compares in this order are important for the NFIND to work */
175 	if (vb < va)
176 		return (-1);
177 	if (vb > va)
178 		return (1);
179 
180 	return (0);
181 }
182 
183 RB_PROTOTYPE(phtree, pool_item_header, ph_node, phtree_compare);
184 RB_GENERATE(phtree, pool_item_header, ph_node, phtree_compare);
185 
186 /*
187  * Return the pool page header based on page address.
188  */
189 static inline struct pool_item_header *
190 pr_find_pagehead(struct pool *pp, void *v)
191 {
192 	struct pool_item_header *ph, key;
193 
194 	if (POOL_INPGHDR(pp)) {
195 		caddr_t page;
196 
197 		page = (caddr_t)((vaddr_t)v & pp->pr_pgmask);
198 
199 		return ((struct pool_item_header *)(page + pp->pr_phoffset));
200 	}
201 
202 	key.ph_page = v;
203 	ph = RB_NFIND(phtree, &pp->pr_phtree, &key);
204 	if (ph == NULL)
205 		panic("%s: %s: page header missing", __func__, pp->pr_wchan);
206 
207 	KASSERT(ph->ph_page <= (caddr_t)v);
208 	if (ph->ph_page + pp->pr_pgsize <= (caddr_t)v)
209 		panic("%s: %s: incorrect page", __func__, pp->pr_wchan);
210 
211 	return (ph);
212 }
213 
214 /*
215  * Initialize the given pool resource structure.
216  *
217  * We export this routine to allow other kernel parts to declare
218  * static pools that must be initialized before malloc() is available.
219  */
220 void
221 pool_init(struct pool *pp, size_t size, u_int align, u_int ioff, int flags,
222     const char *wchan, struct pool_allocator *palloc)
223 {
224 	int off = 0, space;
225 	unsigned int pgsize = PAGE_SIZE, items;
226 #ifdef DIAGNOSTIC
227 	struct pool *iter;
228 	KASSERT(ioff == 0);
229 #endif
230 
231 	if (align == 0)
232 		align = ALIGN(1);
233 
234 	if (size < sizeof(struct pool_item))
235 		size = sizeof(struct pool_item);
236 
237 	size = roundup(size, align);
238 
239 	if (palloc == NULL) {
240 		while (size * 8 > pgsize)
241 			pgsize <<= 1;
242 
243 		if (pgsize > PAGE_SIZE) {
244 			palloc = ISSET(flags, PR_WAITOK) ?
245 			    &pool_allocator_multi_ni : &pool_allocator_multi;
246 		} else
247 			palloc = &pool_allocator_single;
248 	} else
249 		pgsize = palloc->pa_pagesz ? palloc->pa_pagesz : PAGE_SIZE;
250 
251 	items = pgsize / size;
252 
253 	/*
254 	 * Decide whether to put the page header off page to avoid
255 	 * wasting too large a part of the page. Off-page page headers
256 	 * go into an RB tree, so we can match a returned item with
257 	 * its header based on the page address.
258 	 */
259 	if (pgsize - (size * items) > sizeof(struct pool_item_header)) {
260 		off = pgsize - sizeof(struct pool_item_header);
261 	} else if (sizeof(struct pool_item_header) * 2 >= size) {
262 		off = pgsize - sizeof(struct pool_item_header);
263 		items = off / size;
264 	}
265 
266 	KASSERT(items > 0);
267 
268 	/*
269 	 * Initialize the pool structure.
270 	 */
271 	memset(pp, 0, sizeof(*pp));
272 	TAILQ_INIT(&pp->pr_emptypages);
273 	TAILQ_INIT(&pp->pr_fullpages);
274 	TAILQ_INIT(&pp->pr_partpages);
275 	pp->pr_curpage = NULL;
276 	pp->pr_npages = 0;
277 	pp->pr_minitems = 0;
278 	pp->pr_minpages = 0;
279 	pp->pr_maxpages = 8;
280 	pp->pr_size = size;
281 	pp->pr_pgsize = pgsize;
282 	pp->pr_pgmask = ~0UL ^ (pgsize - 1);
283 	pp->pr_phoffset = off;
284 	pp->pr_itemsperpage = items;
285 	pp->pr_wchan = wchan;
286 	pp->pr_alloc = palloc;
287 	pp->pr_nitems = 0;
288 	pp->pr_nout = 0;
289 	pp->pr_hardlimit = UINT_MAX;
290 	pp->pr_hardlimit_warning = NULL;
291 	pp->pr_hardlimit_ratecap.tv_sec = 0;
292 	pp->pr_hardlimit_ratecap.tv_usec = 0;
293 	pp->pr_hardlimit_warning_last.tv_sec = 0;
294 	pp->pr_hardlimit_warning_last.tv_usec = 0;
295 	RB_INIT(&pp->pr_phtree);
296 
297 	/*
298 	 * Use the space between the chunks and the page header
299 	 * for cache coloring.
300 	 */
301 	space = POOL_INPGHDR(pp) ? pp->pr_phoffset : pp->pr_pgsize;
302 	space -= pp->pr_itemsperpage * pp->pr_size;
303 	pp->pr_align = align;
304 	pp->pr_maxcolors = (space / align) + 1;
305 
306 	pp->pr_nget = 0;
307 	pp->pr_nfail = 0;
308 	pp->pr_nput = 0;
309 	pp->pr_npagealloc = 0;
310 	pp->pr_npagefree = 0;
311 	pp->pr_hiwat = 0;
312 	pp->pr_nidle = 0;
313 
314 	pp->pr_ipl = -1;
315 	mtx_init(&pp->pr_mtx, IPL_NONE);
316 	mtx_init(&pp->pr_requests_mtx, IPL_NONE);
317 	TAILQ_INIT(&pp->pr_requests);
318 
319 	if (phpool.pr_size == 0) {
320 		pool_init(&phpool, sizeof(struct pool_item_header), 0, 0,
321 		    0, "phpool", NULL);
322 		pool_setipl(&phpool, IPL_HIGH);
323 
324 		/* make sure phpool wont "recurse" */
325 		KASSERT(POOL_INPGHDR(&phpool));
326 	}
327 
328 	/* pglistalloc/constraint parameters */
329 	pp->pr_crange = &kp_dirty;
330 
331 	/* Insert this into the list of all pools. */
332 	rw_enter_write(&pool_lock);
333 #ifdef DIAGNOSTIC
334 	SIMPLEQ_FOREACH(iter, &pool_head, pr_poollist) {
335 		if (iter == pp)
336 			panic("%s: pool %s already on list", __func__, wchan);
337 	}
338 #endif
339 
340 	pp->pr_serial = ++pool_serial;
341 	if (pool_serial == 0)
342 		panic("%s: too much uptime", __func__);
343 
344 	SIMPLEQ_INSERT_HEAD(&pool_head, pp, pr_poollist);
345 	pool_count++;
346 	rw_exit_write(&pool_lock);
347 }
348 
349 void
350 pool_setipl(struct pool *pp, int ipl)
351 {
352 	pp->pr_ipl = ipl;
353 	mtx_init(&pp->pr_mtx, ipl);
354 	mtx_init(&pp->pr_requests_mtx, ipl);
355 }
356 
357 /*
358  * Decommission a pool resource.
359  */
360 void
361 pool_destroy(struct pool *pp)
362 {
363 	struct pool_item_header *ph;
364 	struct pool *prev, *iter;
365 
366 #ifdef DIAGNOSTIC
367 	if (pp->pr_nout != 0)
368 		panic("%s: pool busy: still out: %u", __func__, pp->pr_nout);
369 #endif
370 
371 	/* Remove from global pool list */
372 	rw_enter_write(&pool_lock);
373 	pool_count--;
374 	if (pp == SIMPLEQ_FIRST(&pool_head))
375 		SIMPLEQ_REMOVE_HEAD(&pool_head, pr_poollist);
376 	else {
377 		prev = SIMPLEQ_FIRST(&pool_head);
378 		SIMPLEQ_FOREACH(iter, &pool_head, pr_poollist) {
379 			if (iter == pp) {
380 				SIMPLEQ_REMOVE_AFTER(&pool_head, prev,
381 				    pr_poollist);
382 				break;
383 			}
384 			prev = iter;
385 		}
386 	}
387 	rw_exit_write(&pool_lock);
388 
389 	/* Remove all pages */
390 	while ((ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL) {
391 		mtx_enter(&pp->pr_mtx);
392 		pool_p_remove(pp, ph);
393 		mtx_leave(&pp->pr_mtx);
394 		pool_p_free(pp, ph);
395 	}
396 	KASSERT(TAILQ_EMPTY(&pp->pr_fullpages));
397 	KASSERT(TAILQ_EMPTY(&pp->pr_partpages));
398 }
399 
400 void
401 pool_request_init(struct pool_request *pr,
402     void (*handler)(void *, void *), void *cookie)
403 {
404 	pr->pr_handler = handler;
405 	pr->pr_cookie = cookie;
406 	pr->pr_item = NULL;
407 }
408 
409 void
410 pool_request(struct pool *pp, struct pool_request *pr)
411 {
412 	mtx_enter(&pp->pr_requests_mtx);
413 	TAILQ_INSERT_TAIL(&pp->pr_requests, pr, pr_entry);
414 	pool_runqueue(pp, PR_NOWAIT);
415 	mtx_leave(&pp->pr_requests_mtx);
416 }
417 
418 struct pool_get_memory {
419 	struct mutex mtx;
420 	void * volatile v;
421 };
422 
423 /*
424  * Grab an item from the pool.
425  */
426 void *
427 pool_get(struct pool *pp, int flags)
428 {
429 	void *v = NULL;
430 	int slowdown = 0;
431 
432 	KASSERT(flags & (PR_WAITOK | PR_NOWAIT));
433 
434 
435 	mtx_enter(&pp->pr_mtx);
436 	if (pp->pr_nout >= pp->pr_hardlimit) {
437 		if (ISSET(flags, PR_NOWAIT|PR_LIMITFAIL))
438 			goto fail;
439 	} else if ((v = pool_do_get(pp, flags, &slowdown)) == NULL) {
440 		if (ISSET(flags, PR_NOWAIT))
441 			goto fail;
442 	}
443 	mtx_leave(&pp->pr_mtx);
444 
445 	if (slowdown && ISSET(flags, PR_WAITOK))
446 		yield();
447 
448 	if (v == NULL) {
449 		struct pool_get_memory mem = {
450 		    MUTEX_INITIALIZER((pp->pr_ipl == -1) ?
451 		    IPL_NONE : pp->pr_ipl), NULL };
452 		struct pool_request pr;
453 
454 		pool_request_init(&pr, pool_get_done, &mem);
455 		pool_request(pp, &pr);
456 
457 		mtx_enter(&mem.mtx);
458 		while (mem.v == NULL)
459 			msleep(&mem, &mem.mtx, PSWP, pp->pr_wchan, 0);
460 		mtx_leave(&mem.mtx);
461 
462 		v = mem.v;
463 	}
464 
465 	if (ISSET(flags, PR_ZERO))
466 		memset(v, 0, pp->pr_size);
467 
468 	return (v);
469 
470 fail:
471 	pp->pr_nfail++;
472 	mtx_leave(&pp->pr_mtx);
473 	return (NULL);
474 }
475 
476 void
477 pool_get_done(void *xmem, void *v)
478 {
479 	struct pool_get_memory *mem = xmem;
480 
481 	mtx_enter(&mem->mtx);
482 	mem->v = v;
483 	mtx_leave(&mem->mtx);
484 
485 	wakeup_one(mem);
486 }
487 
488 void
489 pool_runqueue(struct pool *pp, int flags)
490 {
491 	struct pool_requests prl = TAILQ_HEAD_INITIALIZER(prl);
492 	struct pool_request *pr;
493 
494 	MUTEX_ASSERT_UNLOCKED(&pp->pr_mtx);
495 	MUTEX_ASSERT_LOCKED(&pp->pr_requests_mtx);
496 
497 	if (pp->pr_requesting++)
498 		return;
499 
500 	do {
501 		pp->pr_requesting = 1;
502 
503 		/* no TAILQ_JOIN? :( */
504 		while ((pr = TAILQ_FIRST(&pp->pr_requests)) != NULL) {
505 			TAILQ_REMOVE(&pp->pr_requests, pr, pr_entry);
506 			TAILQ_INSERT_TAIL(&prl, pr, pr_entry);
507 		}
508 		if (TAILQ_EMPTY(&prl))
509 			continue;
510 
511 		mtx_leave(&pp->pr_requests_mtx);
512 
513 		mtx_enter(&pp->pr_mtx);
514 		pr = TAILQ_FIRST(&prl);
515 		while (pr != NULL) {
516 			int slowdown = 0;
517 
518 			if (pp->pr_nout >= pp->pr_hardlimit)
519 				break;
520 
521 			pr->pr_item = pool_do_get(pp, flags, &slowdown);
522 			if (pr->pr_item == NULL) /* || slowdown ? */
523 				break;
524 
525 			pr = TAILQ_NEXT(pr, pr_entry);
526 		}
527 		mtx_leave(&pp->pr_mtx);
528 
529 		while ((pr = TAILQ_FIRST(&prl)) != NULL &&
530 		    pr->pr_item != NULL) {
531 			TAILQ_REMOVE(&prl, pr, pr_entry);
532 			(*pr->pr_handler)(pr->pr_cookie, pr->pr_item);
533 		}
534 
535 		mtx_enter(&pp->pr_requests_mtx);
536 	} while (--pp->pr_requesting);
537 
538 	/* no TAILQ_JOIN :( */
539 	while ((pr = TAILQ_FIRST(&prl)) != NULL) {
540 		TAILQ_REMOVE(&prl, pr, pr_entry);
541 		TAILQ_INSERT_TAIL(&pp->pr_requests, pr, pr_entry);
542 	}
543 }
544 
545 void *
546 pool_do_get(struct pool *pp, int flags, int *slowdown)
547 {
548 	struct pool_item *pi;
549 	struct pool_item_header *ph;
550 
551 	MUTEX_ASSERT_LOCKED(&pp->pr_mtx);
552 
553 	if (pp->pr_ipl != -1)
554 		splassert(pp->pr_ipl);
555 
556 	/*
557 	 * Account for this item now to avoid races if we need to give up
558 	 * pr_mtx to allocate a page.
559 	 */
560 	pp->pr_nout++;
561 
562 	if (pp->pr_curpage == NULL) {
563 		mtx_leave(&pp->pr_mtx);
564 		ph = pool_p_alloc(pp, flags, slowdown);
565 		mtx_enter(&pp->pr_mtx);
566 
567 		if (ph == NULL) {
568 			pp->pr_nout--;
569 			return (NULL);
570 		}
571 
572 		pool_p_insert(pp, ph);
573 	}
574 
575 	ph = pp->pr_curpage;
576 	pi = XSIMPLEQ_FIRST(&ph->ph_itemlist);
577 	if (__predict_false(pi == NULL))
578 		panic("%s: %s: page empty", __func__, pp->pr_wchan);
579 
580 	if (__predict_false(pi->pi_magic != POOL_IMAGIC(ph, pi))) {
581 		panic("%s: %s free list modified: "
582 		    "page %p; item addr %p; offset 0x%x=0x%lx != 0x%lx",
583 		    __func__, pp->pr_wchan, ph->ph_page, pi,
584 		    0, pi->pi_magic, POOL_IMAGIC(ph, pi));
585 	}
586 
587 	XSIMPLEQ_REMOVE_HEAD(&ph->ph_itemlist, pi_list);
588 
589 #ifdef DIAGNOSTIC
590 	if (pool_debug && POOL_PHPOISON(ph)) {
591 		size_t pidx;
592 		uint32_t pval;
593 		if (poison_check(pi + 1, pp->pr_size - sizeof(*pi),
594 		    &pidx, &pval)) {
595 			int *ip = (int *)(pi + 1);
596 			panic("%s: %s free list modified: "
597 			    "page %p; item addr %p; offset 0x%zx=0x%x",
598 			    __func__, pp->pr_wchan, ph->ph_page, pi,
599 			    pidx * sizeof(int), ip[pidx]);
600 		}
601 	}
602 #endif /* DIAGNOSTIC */
603 
604 	if (ph->ph_nmissing++ == 0) {
605 		/*
606 		 * This page was previously empty.  Move it to the list of
607 		 * partially-full pages.  This page is already curpage.
608 		 */
609 		TAILQ_REMOVE(&pp->pr_emptypages, ph, ph_pagelist);
610 		TAILQ_INSERT_TAIL(&pp->pr_partpages, ph, ph_pagelist);
611 
612 		pp->pr_nidle--;
613 	}
614 
615 	if (ph->ph_nmissing == pp->pr_itemsperpage) {
616 		/*
617 		 * This page is now full.  Move it to the full list
618 		 * and select a new current page.
619 		 */
620 		TAILQ_REMOVE(&pp->pr_partpages, ph, ph_pagelist);
621 		TAILQ_INSERT_TAIL(&pp->pr_fullpages, ph, ph_pagelist);
622 		pool_update_curpage(pp);
623 	}
624 
625 	pp->pr_nget++;
626 
627 	return (pi);
628 }
629 
630 /*
631  * Return resource to the pool.
632  */
633 void
634 pool_put(struct pool *pp, void *v)
635 {
636 	struct pool_item *pi = v;
637 	struct pool_item_header *ph, *freeph = NULL;
638 
639 #ifdef DIAGNOSTIC
640 	if (v == NULL)
641 		panic("%s: NULL item", __func__);
642 #endif
643 
644 	mtx_enter(&pp->pr_mtx);
645 
646 	if (pp->pr_ipl != -1)
647 		splassert(pp->pr_ipl);
648 
649 	ph = pr_find_pagehead(pp, v);
650 
651 #ifdef DIAGNOSTIC
652 	if (pool_debug) {
653 		struct pool_item *qi;
654 		XSIMPLEQ_FOREACH(qi, &ph->ph_itemlist, pi_list) {
655 			if (pi == qi) {
656 				panic("%s: %s: double pool_put: %p", __func__,
657 				    pp->pr_wchan, pi);
658 			}
659 		}
660 	}
661 #endif /* DIAGNOSTIC */
662 
663 	pi->pi_magic = POOL_IMAGIC(ph, pi);
664 	XSIMPLEQ_INSERT_HEAD(&ph->ph_itemlist, pi, pi_list);
665 #ifdef DIAGNOSTIC
666 	if (POOL_PHPOISON(ph))
667 		poison_mem(pi + 1, pp->pr_size - sizeof(*pi));
668 #endif /* DIAGNOSTIC */
669 
670 	if (ph->ph_nmissing-- == pp->pr_itemsperpage) {
671 		/*
672 		 * The page was previously completely full, move it to the
673 		 * partially-full list.
674 		 */
675 		TAILQ_REMOVE(&pp->pr_fullpages, ph, ph_pagelist);
676 		TAILQ_INSERT_TAIL(&pp->pr_partpages, ph, ph_pagelist);
677 	}
678 
679 	if (ph->ph_nmissing == 0) {
680 		/*
681 		 * The page is now empty, so move it to the empty page list.
682 	 	 */
683 		pp->pr_nidle++;
684 
685 		ph->ph_tick = ticks;
686 		TAILQ_REMOVE(&pp->pr_partpages, ph, ph_pagelist);
687 		TAILQ_INSERT_TAIL(&pp->pr_emptypages, ph, ph_pagelist);
688 		pool_update_curpage(pp);
689 	}
690 
691 	pp->pr_nout--;
692 	pp->pr_nput++;
693 
694 	/* is it time to free a page? */
695 	if (pp->pr_nidle > pp->pr_maxpages &&
696 	    (ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL &&
697 	    (ticks - ph->ph_tick) > (hz * pool_wait_free)) {
698 		freeph = ph;
699 		pool_p_remove(pp, freeph);
700 	}
701 	mtx_leave(&pp->pr_mtx);
702 
703 	if (freeph != NULL)
704 		pool_p_free(pp, freeph);
705 
706 	if (!TAILQ_EMPTY(&pp->pr_requests)) {
707 		mtx_enter(&pp->pr_requests_mtx);
708 		pool_runqueue(pp, PR_NOWAIT);
709 		mtx_leave(&pp->pr_requests_mtx);
710 	}
711 }
712 
713 /*
714  * Add N items to the pool.
715  */
716 int
717 pool_prime(struct pool *pp, int n)
718 {
719 	struct pool_pagelist pl = TAILQ_HEAD_INITIALIZER(pl);
720 	struct pool_item_header *ph;
721 	int newpages;
722 
723 	newpages = roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage;
724 
725 	while (newpages-- > 0) {
726 		int slowdown = 0;
727 
728 		ph = pool_p_alloc(pp, PR_NOWAIT, &slowdown);
729 		if (ph == NULL) /* or slowdown? */
730 			break;
731 
732 		TAILQ_INSERT_TAIL(&pl, ph, ph_pagelist);
733 	}
734 
735 	mtx_enter(&pp->pr_mtx);
736 	while ((ph = TAILQ_FIRST(&pl)) != NULL) {
737 		TAILQ_REMOVE(&pl, ph, ph_pagelist);
738 		pool_p_insert(pp, ph);
739 	}
740 	mtx_leave(&pp->pr_mtx);
741 
742 	return (0);
743 }
744 
745 struct pool_item_header *
746 pool_p_alloc(struct pool *pp, int flags, int *slowdown)
747 {
748 	struct pool_item_header *ph;
749 	struct pool_item *pi;
750 	caddr_t addr;
751 	int n;
752 
753 	MUTEX_ASSERT_UNLOCKED(&pp->pr_mtx);
754 	KASSERT(pp->pr_size >= sizeof(*pi));
755 
756 	addr = pool_allocator_alloc(pp, flags, slowdown);
757 	if (addr == NULL)
758 		return (NULL);
759 
760 	if (POOL_INPGHDR(pp))
761 		ph = (struct pool_item_header *)(addr + pp->pr_phoffset);
762 	else {
763 		ph = pool_get(&phpool, flags);
764 		if (ph == NULL) {
765 			pool_allocator_free(pp, addr);
766 			return (NULL);
767 		}
768 	}
769 
770 	XSIMPLEQ_INIT(&ph->ph_itemlist);
771 	ph->ph_page = addr;
772 	addr += pp->pr_align * (pp->pr_npagealloc % pp->pr_maxcolors);
773 	ph->ph_colored = addr;
774 	ph->ph_nmissing = 0;
775 	arc4random_buf(&ph->ph_magic, sizeof(ph->ph_magic));
776 #ifdef DIAGNOSTIC
777 	/* use a bit in ph_magic to record if we poison page items */
778 	if (pool_debug)
779 		SET(ph->ph_magic, POOL_MAGICBIT);
780 	else
781 		CLR(ph->ph_magic, POOL_MAGICBIT);
782 #endif /* DIAGNOSTIC */
783 
784 	n = pp->pr_itemsperpage;
785 	while (n--) {
786 		pi = (struct pool_item *)addr;
787 		pi->pi_magic = POOL_IMAGIC(ph, pi);
788 		XSIMPLEQ_INSERT_TAIL(&ph->ph_itemlist, pi, pi_list);
789 
790 #ifdef DIAGNOSTIC
791 		if (POOL_PHPOISON(ph))
792 			poison_mem(pi + 1, pp->pr_size - sizeof(*pi));
793 #endif /* DIAGNOSTIC */
794 
795 		addr += pp->pr_size;
796 	}
797 
798 	return (ph);
799 }
800 
801 void
802 pool_p_free(struct pool *pp, struct pool_item_header *ph)
803 {
804 	struct pool_item *pi;
805 
806 	MUTEX_ASSERT_UNLOCKED(&pp->pr_mtx);
807 	KASSERT(ph->ph_nmissing == 0);
808 
809 	XSIMPLEQ_FOREACH(pi, &ph->ph_itemlist, pi_list) {
810 		if (__predict_false(pi->pi_magic != POOL_IMAGIC(ph, pi))) {
811 			panic("%s: %s free list modified: "
812 			    "page %p; item addr %p; offset 0x%x=0x%lx",
813 			    __func__, pp->pr_wchan, ph->ph_page, pi,
814 			    0, pi->pi_magic);
815 		}
816 
817 #ifdef DIAGNOSTIC
818 		if (POOL_PHPOISON(ph)) {
819 			size_t pidx;
820 			uint32_t pval;
821 			if (poison_check(pi + 1, pp->pr_size - sizeof(*pi),
822 			    &pidx, &pval)) {
823 				int *ip = (int *)(pi + 1);
824 				panic("%s: %s free list modified: "
825 				    "page %p; item addr %p; offset 0x%zx=0x%x",
826 				    __func__, pp->pr_wchan, ph->ph_page, pi,
827 				    pidx * sizeof(int), ip[pidx]);
828 			}
829 		}
830 #endif
831 	}
832 
833 	pool_allocator_free(pp, ph->ph_page);
834 
835 	if (!POOL_INPGHDR(pp))
836 		pool_put(&phpool, ph);
837 }
838 
839 void
840 pool_p_insert(struct pool *pp, struct pool_item_header *ph)
841 {
842 	MUTEX_ASSERT_LOCKED(&pp->pr_mtx);
843 
844 	/* If the pool was depleted, point at the new page */
845 	if (pp->pr_curpage == NULL)
846 		pp->pr_curpage = ph;
847 
848 	TAILQ_INSERT_TAIL(&pp->pr_emptypages, ph, ph_pagelist);
849 	if (!POOL_INPGHDR(pp))
850 		RB_INSERT(phtree, &pp->pr_phtree, ph);
851 
852 	pp->pr_nitems += pp->pr_itemsperpage;
853 	pp->pr_nidle++;
854 
855 	pp->pr_npagealloc++;
856 	if (++pp->pr_npages > pp->pr_hiwat)
857 		pp->pr_hiwat = pp->pr_npages;
858 }
859 
860 void
861 pool_p_remove(struct pool *pp, struct pool_item_header *ph)
862 {
863 	MUTEX_ASSERT_LOCKED(&pp->pr_mtx);
864 
865 	pp->pr_npagefree++;
866 	pp->pr_npages--;
867 	pp->pr_nidle--;
868 	pp->pr_nitems -= pp->pr_itemsperpage;
869 
870 	if (!POOL_INPGHDR(pp))
871 		RB_REMOVE(phtree, &pp->pr_phtree, ph);
872 	TAILQ_REMOVE(&pp->pr_emptypages, ph, ph_pagelist);
873 
874 	pool_update_curpage(pp);
875 }
876 
877 void
878 pool_update_curpage(struct pool *pp)
879 {
880 	pp->pr_curpage = TAILQ_LAST(&pp->pr_partpages, pool_pagelist);
881 	if (pp->pr_curpage == NULL) {
882 		pp->pr_curpage = TAILQ_LAST(&pp->pr_emptypages, pool_pagelist);
883 	}
884 }
885 
886 void
887 pool_setlowat(struct pool *pp, int n)
888 {
889 	int prime = 0;
890 
891 	mtx_enter(&pp->pr_mtx);
892 	pp->pr_minitems = n;
893 	pp->pr_minpages = (n == 0)
894 		? 0
895 		: roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage;
896 
897 	if (pp->pr_nitems < n)
898 		prime = n - pp->pr_nitems;
899 	mtx_leave(&pp->pr_mtx);
900 
901 	if (prime > 0)
902 		pool_prime(pp, prime);
903 }
904 
905 void
906 pool_sethiwat(struct pool *pp, int n)
907 {
908 	pp->pr_maxpages = (n == 0)
909 		? 0
910 		: roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage;
911 }
912 
913 int
914 pool_sethardlimit(struct pool *pp, u_int n, const char *warnmsg, int ratecap)
915 {
916 	int error = 0;
917 
918 	if (n < pp->pr_nout) {
919 		error = EINVAL;
920 		goto done;
921 	}
922 
923 	pp->pr_hardlimit = n;
924 	pp->pr_hardlimit_warning = warnmsg;
925 	pp->pr_hardlimit_ratecap.tv_sec = ratecap;
926 	pp->pr_hardlimit_warning_last.tv_sec = 0;
927 	pp->pr_hardlimit_warning_last.tv_usec = 0;
928 
929 done:
930 	return (error);
931 }
932 
933 void
934 pool_set_constraints(struct pool *pp, const struct kmem_pa_mode *mode)
935 {
936 	pp->pr_crange = mode;
937 }
938 
939 /*
940  * Release all complete pages that have not been used recently.
941  *
942  * Returns non-zero if any pages have been reclaimed.
943  */
944 int
945 pool_reclaim(struct pool *pp)
946 {
947 	struct pool_item_header *ph, *phnext;
948 	struct pool_pagelist pl = TAILQ_HEAD_INITIALIZER(pl);
949 
950 	mtx_enter(&pp->pr_mtx);
951 	for (ph = TAILQ_FIRST(&pp->pr_emptypages); ph != NULL; ph = phnext) {
952 		phnext = TAILQ_NEXT(ph, ph_pagelist);
953 
954 		/* Check our minimum page claim */
955 		if (pp->pr_npages <= pp->pr_minpages)
956 			break;
957 
958 		/*
959 		 * If freeing this page would put us below
960 		 * the low water mark, stop now.
961 		 */
962 		if ((pp->pr_nitems - pp->pr_itemsperpage) <
963 		    pp->pr_minitems)
964 			break;
965 
966 		pool_p_remove(pp, ph);
967 		TAILQ_INSERT_TAIL(&pl, ph, ph_pagelist);
968 	}
969 	mtx_leave(&pp->pr_mtx);
970 
971 	if (TAILQ_EMPTY(&pl))
972 		return (0);
973 
974 	while ((ph = TAILQ_FIRST(&pl)) != NULL) {
975 		TAILQ_REMOVE(&pl, ph, ph_pagelist);
976 		pool_p_free(pp, ph);
977 	}
978 
979 	return (1);
980 }
981 
982 /*
983  * Release all complete pages that have not been used recently
984  * from all pools.
985  */
986 void
987 pool_reclaim_all(void)
988 {
989 	struct pool	*pp;
990 
991 	rw_enter_read(&pool_lock);
992 	SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist)
993 		pool_reclaim(pp);
994 	rw_exit_read(&pool_lock);
995 }
996 
997 #ifdef DDB
998 #include <machine/db_machdep.h>
999 #include <ddb/db_output.h>
1000 
1001 /*
1002  * Diagnostic helpers.
1003  */
1004 void
1005 pool_printit(struct pool *pp, const char *modif,
1006     int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2))))
1007 {
1008 	pool_print1(pp, modif, pr);
1009 }
1010 
1011 void
1012 pool_print_pagelist(struct pool_pagelist *pl,
1013     int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2))))
1014 {
1015 	struct pool_item_header *ph;
1016 	struct pool_item *pi;
1017 
1018 	TAILQ_FOREACH(ph, pl, ph_pagelist) {
1019 		(*pr)("\t\tpage %p, color %p, nmissing %d\n",
1020 		    ph->ph_page, ph->ph_colored, ph->ph_nmissing);
1021 		XSIMPLEQ_FOREACH(pi, &ph->ph_itemlist, pi_list) {
1022 			if (pi->pi_magic != POOL_IMAGIC(ph, pi)) {
1023 				(*pr)("\t\t\titem %p, magic 0x%lx\n",
1024 				    pi, pi->pi_magic);
1025 			}
1026 		}
1027 	}
1028 }
1029 
1030 void
1031 pool_print1(struct pool *pp, const char *modif,
1032     int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2))))
1033 {
1034 	struct pool_item_header *ph;
1035 	int print_pagelist = 0;
1036 	char c;
1037 
1038 	while ((c = *modif++) != '\0') {
1039 		if (c == 'p')
1040 			print_pagelist = 1;
1041 		modif++;
1042 	}
1043 
1044 	(*pr)("POOL %s: size %u maxcolors %u\n", pp->pr_wchan, pp->pr_size,
1045 	    pp->pr_maxcolors);
1046 	(*pr)("\talloc %p\n", pp->pr_alloc);
1047 	(*pr)("\tminitems %u, minpages %u, maxpages %u, npages %u\n",
1048 	    pp->pr_minitems, pp->pr_minpages, pp->pr_maxpages, pp->pr_npages);
1049 	(*pr)("\titemsperpage %u, nitems %u, nout %u, hardlimit %u\n",
1050 	    pp->pr_itemsperpage, pp->pr_nitems, pp->pr_nout, pp->pr_hardlimit);
1051 
1052 	(*pr)("\n\tnget %lu, nfail %lu, nput %lu\n",
1053 	    pp->pr_nget, pp->pr_nfail, pp->pr_nput);
1054 	(*pr)("\tnpagealloc %lu, npagefree %lu, hiwat %u, nidle %lu\n",
1055 	    pp->pr_npagealloc, pp->pr_npagefree, pp->pr_hiwat, pp->pr_nidle);
1056 
1057 	if (print_pagelist == 0)
1058 		return;
1059 
1060 	if ((ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL)
1061 		(*pr)("\n\tempty page list:\n");
1062 	pool_print_pagelist(&pp->pr_emptypages, pr);
1063 	if ((ph = TAILQ_FIRST(&pp->pr_fullpages)) != NULL)
1064 		(*pr)("\n\tfull page list:\n");
1065 	pool_print_pagelist(&pp->pr_fullpages, pr);
1066 	if ((ph = TAILQ_FIRST(&pp->pr_partpages)) != NULL)
1067 		(*pr)("\n\tpartial-page list:\n");
1068 	pool_print_pagelist(&pp->pr_partpages, pr);
1069 
1070 	if (pp->pr_curpage == NULL)
1071 		(*pr)("\tno current page\n");
1072 	else
1073 		(*pr)("\tcurpage %p\n", pp->pr_curpage->ph_page);
1074 }
1075 
1076 void
1077 db_show_all_pools(db_expr_t expr, int haddr, db_expr_t count, char *modif)
1078 {
1079 	struct pool *pp;
1080 	char maxp[16];
1081 	int ovflw;
1082 	char mode;
1083 
1084 	mode = modif[0];
1085 	if (mode != '\0' && mode != 'a') {
1086 		db_printf("usage: show all pools [/a]\n");
1087 		return;
1088 	}
1089 
1090 	if (mode == '\0')
1091 		db_printf("%-10s%4s%9s%5s%9s%6s%6s%6s%6s%6s%6s%5s\n",
1092 		    "Name",
1093 		    "Size",
1094 		    "Requests",
1095 		    "Fail",
1096 		    "Releases",
1097 		    "Pgreq",
1098 		    "Pgrel",
1099 		    "Npage",
1100 		    "Hiwat",
1101 		    "Minpg",
1102 		    "Maxpg",
1103 		    "Idle");
1104 	else
1105 		db_printf("%-12s %18s %18s\n",
1106 		    "Name", "Address", "Allocator");
1107 
1108 	SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist) {
1109 		if (mode == 'a') {
1110 			db_printf("%-12s %18p %18p\n", pp->pr_wchan, pp,
1111 			    pp->pr_alloc);
1112 			continue;
1113 		}
1114 
1115 		if (!pp->pr_nget)
1116 			continue;
1117 
1118 		if (pp->pr_maxpages == UINT_MAX)
1119 			snprintf(maxp, sizeof maxp, "inf");
1120 		else
1121 			snprintf(maxp, sizeof maxp, "%u", pp->pr_maxpages);
1122 
1123 #define PRWORD(ovflw, fmt, width, fixed, val) do {	\
1124 	(ovflw) += db_printf((fmt),			\
1125 	    (width) - (fixed) - (ovflw) > 0 ?		\
1126 	    (width) - (fixed) - (ovflw) : 0,		\
1127 	    (val)) - (width);				\
1128 	if ((ovflw) < 0)				\
1129 		(ovflw) = 0;				\
1130 } while (/* CONSTCOND */0)
1131 
1132 		ovflw = 0;
1133 		PRWORD(ovflw, "%-*s", 10, 0, pp->pr_wchan);
1134 		PRWORD(ovflw, " %*u", 4, 1, pp->pr_size);
1135 		PRWORD(ovflw, " %*lu", 9, 1, pp->pr_nget);
1136 		PRWORD(ovflw, " %*lu", 5, 1, pp->pr_nfail);
1137 		PRWORD(ovflw, " %*lu", 9, 1, pp->pr_nput);
1138 		PRWORD(ovflw, " %*lu", 6, 1, pp->pr_npagealloc);
1139 		PRWORD(ovflw, " %*lu", 6, 1, pp->pr_npagefree);
1140 		PRWORD(ovflw, " %*d", 6, 1, pp->pr_npages);
1141 		PRWORD(ovflw, " %*d", 6, 1, pp->pr_hiwat);
1142 		PRWORD(ovflw, " %*d", 6, 1, pp->pr_minpages);
1143 		PRWORD(ovflw, " %*s", 6, 1, maxp);
1144 		PRWORD(ovflw, " %*lu\n", 5, 1, pp->pr_nidle);
1145 
1146 		pool_chk(pp);
1147 	}
1148 }
1149 #endif /* DDB */
1150 
1151 #if defined(POOL_DEBUG) || defined(DDB)
1152 int
1153 pool_chk_page(struct pool *pp, struct pool_item_header *ph, int expected)
1154 {
1155 	struct pool_item *pi;
1156 	caddr_t page;
1157 	int n;
1158 	const char *label = pp->pr_wchan;
1159 
1160 	page = (caddr_t)((u_long)ph & pp->pr_pgmask);
1161 	if (page != ph->ph_page && POOL_INPGHDR(pp)) {
1162 		printf("%s: ", label);
1163 		printf("pool(%p:%s): page inconsistency: page %p; "
1164 		    "at page head addr %p (p %p)\n",
1165 		    pp, pp->pr_wchan, ph->ph_page, ph, page);
1166 		return 1;
1167 	}
1168 
1169 	for (pi = XSIMPLEQ_FIRST(&ph->ph_itemlist), n = 0;
1170 	     pi != NULL;
1171 	     pi = XSIMPLEQ_NEXT(&ph->ph_itemlist, pi, pi_list), n++) {
1172 		if ((caddr_t)pi < ph->ph_page ||
1173 		    (caddr_t)pi >= ph->ph_page + pp->pr_pgsize) {
1174 			printf("%s: ", label);
1175 			printf("pool(%p:%s): page inconsistency: page %p;"
1176 			    " item ordinal %d; addr %p\n", pp,
1177 			    pp->pr_wchan, ph->ph_page, n, pi);
1178 			return (1);
1179 		}
1180 
1181 		if (pi->pi_magic != POOL_IMAGIC(ph, pi)) {
1182 			printf("%s: ", label);
1183 			printf("pool(%p:%s): free list modified: "
1184 			    "page %p; item ordinal %d; addr %p "
1185 			    "(p %p); offset 0x%x=0x%lx\n",
1186 			    pp, pp->pr_wchan, ph->ph_page, n, pi, page,
1187 			    0, pi->pi_magic);
1188 		}
1189 
1190 #ifdef DIAGNOSTIC
1191 		if (POOL_PHPOISON(ph)) {
1192 			size_t pidx;
1193 			uint32_t pval;
1194 			if (poison_check(pi + 1, pp->pr_size - sizeof(*pi),
1195 			    &pidx, &pval)) {
1196 				int *ip = (int *)(pi + 1);
1197 				printf("pool(%s): free list modified: "
1198 				    "page %p; item ordinal %d; addr %p "
1199 				    "(p %p); offset 0x%zx=0x%x\n",
1200 				    pp->pr_wchan, ph->ph_page, n, pi,
1201 				    page, pidx * sizeof(int), ip[pidx]);
1202 			}
1203 		}
1204 #endif /* DIAGNOSTIC */
1205 	}
1206 	if (n + ph->ph_nmissing != pp->pr_itemsperpage) {
1207 		printf("pool(%p:%s): page inconsistency: page %p;"
1208 		    " %d on list, %d missing, %d items per page\n", pp,
1209 		    pp->pr_wchan, ph->ph_page, n, ph->ph_nmissing,
1210 		    pp->pr_itemsperpage);
1211 		return 1;
1212 	}
1213 	if (expected >= 0 && n != expected) {
1214 		printf("pool(%p:%s): page inconsistency: page %p;"
1215 		    " %d on list, %d missing, %d expected\n", pp,
1216 		    pp->pr_wchan, ph->ph_page, n, ph->ph_nmissing,
1217 		    expected);
1218 		return 1;
1219 	}
1220 	return 0;
1221 }
1222 
1223 int
1224 pool_chk(struct pool *pp)
1225 {
1226 	struct pool_item_header *ph;
1227 	int r = 0;
1228 
1229 	TAILQ_FOREACH(ph, &pp->pr_emptypages, ph_pagelist)
1230 		r += pool_chk_page(pp, ph, pp->pr_itemsperpage);
1231 	TAILQ_FOREACH(ph, &pp->pr_fullpages, ph_pagelist)
1232 		r += pool_chk_page(pp, ph, 0);
1233 	TAILQ_FOREACH(ph, &pp->pr_partpages, ph_pagelist)
1234 		r += pool_chk_page(pp, ph, -1);
1235 
1236 	return (r);
1237 }
1238 #endif /* defined(POOL_DEBUG) || defined(DDB) */
1239 
1240 #ifdef DDB
1241 void
1242 pool_walk(struct pool *pp, int full,
1243     int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2))),
1244     void (*func)(void *, int, int (*)(const char *, ...)
1245 	    __attribute__((__format__(__kprintf__,1,2)))))
1246 {
1247 	struct pool_item_header *ph;
1248 	struct pool_item *pi;
1249 	caddr_t cp;
1250 	int n;
1251 
1252 	TAILQ_FOREACH(ph, &pp->pr_fullpages, ph_pagelist) {
1253 		cp = ph->ph_colored;
1254 		n = ph->ph_nmissing;
1255 
1256 		while (n--) {
1257 			func(cp, full, pr);
1258 			cp += pp->pr_size;
1259 		}
1260 	}
1261 
1262 	TAILQ_FOREACH(ph, &pp->pr_partpages, ph_pagelist) {
1263 		cp = ph->ph_colored;
1264 		n = ph->ph_nmissing;
1265 
1266 		do {
1267 			XSIMPLEQ_FOREACH(pi, &ph->ph_itemlist, pi_list) {
1268 				if (cp == (caddr_t)pi)
1269 					break;
1270 			}
1271 			if (cp != (caddr_t)pi) {
1272 				func(cp, full, pr);
1273 				n--;
1274 			}
1275 
1276 			cp += pp->pr_size;
1277 		} while (n > 0);
1278 	}
1279 }
1280 #endif
1281 
1282 /*
1283  * We have three different sysctls.
1284  * kern.pool.npools - the number of pools.
1285  * kern.pool.pool.<pool#> - the pool struct for the pool#.
1286  * kern.pool.name.<pool#> - the name for pool#.
1287  */
1288 int
1289 sysctl_dopool(int *name, u_int namelen, char *oldp, size_t *oldlenp)
1290 {
1291 	struct kinfo_pool pi;
1292 	struct pool *pp;
1293 	int rv = ENOENT;
1294 
1295 	switch (name[0]) {
1296 	case KERN_POOL_NPOOLS:
1297 		if (namelen != 1)
1298 			return (ENOTDIR);
1299 		return (sysctl_rdint(oldp, oldlenp, NULL, pool_count));
1300 
1301 	case KERN_POOL_NAME:
1302 	case KERN_POOL_POOL:
1303 		break;
1304 	default:
1305 		return (EOPNOTSUPP);
1306 	}
1307 
1308 	if (namelen != 2)
1309 		return (ENOTDIR);
1310 
1311 	rw_enter_read(&pool_lock);
1312 
1313 	SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist) {
1314 		if (name[1] == pp->pr_serial)
1315 			break;
1316 	}
1317 
1318 	if (pp == NULL)
1319 		goto done;
1320 
1321 	switch (name[0]) {
1322 	case KERN_POOL_NAME:
1323 		rv = sysctl_rdstring(oldp, oldlenp, NULL, pp->pr_wchan);
1324 		break;
1325 	case KERN_POOL_POOL:
1326 		memset(&pi, 0, sizeof(pi));
1327 
1328 		if (pp->pr_ipl != -1)
1329 			mtx_enter(&pp->pr_mtx);
1330 		pi.pr_size = pp->pr_size;
1331 		pi.pr_pgsize = pp->pr_pgsize;
1332 		pi.pr_itemsperpage = pp->pr_itemsperpage;
1333 		pi.pr_npages = pp->pr_npages;
1334 		pi.pr_minpages = pp->pr_minpages;
1335 		pi.pr_maxpages = pp->pr_maxpages;
1336 		pi.pr_hardlimit = pp->pr_hardlimit;
1337 		pi.pr_nout = pp->pr_nout;
1338 		pi.pr_nitems = pp->pr_nitems;
1339 		pi.pr_nget = pp->pr_nget;
1340 		pi.pr_nput = pp->pr_nput;
1341 		pi.pr_nfail = pp->pr_nfail;
1342 		pi.pr_npagealloc = pp->pr_npagealloc;
1343 		pi.pr_npagefree = pp->pr_npagefree;
1344 		pi.pr_hiwat = pp->pr_hiwat;
1345 		pi.pr_nidle = pp->pr_nidle;
1346 		if (pp->pr_ipl != -1)
1347 			mtx_leave(&pp->pr_mtx);
1348 
1349 		rv = sysctl_rdstruct(oldp, oldlenp, NULL, &pi, sizeof(pi));
1350 		break;
1351 	}
1352 
1353 done:
1354 	rw_exit_read(&pool_lock);
1355 
1356 	return (rv);
1357 }
1358 
1359 void
1360 pool_gc_sched(void *null)
1361 {
1362 	task_add(systqmp, &pool_gc_task);
1363 }
1364 
1365 void
1366 pool_gc_pages(void *null)
1367 {
1368 	struct pool *pp;
1369 	struct pool_item_header *ph, *freeph;
1370 	int s;
1371 
1372 	rw_enter_read(&pool_lock);
1373 	s = splvm(); /* XXX go to splvm until all pools _setipl properly */
1374 	SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist) {
1375 		if (pp->pr_nidle <= pp->pr_minpages || /* guess */
1376 		    !mtx_enter_try(&pp->pr_mtx)) /* try */
1377 			continue;
1378 
1379 		/* is it time to free a page? */
1380 		if (pp->pr_nidle > pp->pr_minpages &&
1381 		    (ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL &&
1382 		    (ticks - ph->ph_tick) > (hz * pool_wait_gc)) {
1383 			freeph = ph;
1384 			pool_p_remove(pp, freeph);
1385 		} else
1386 			freeph = NULL;
1387 
1388 		mtx_leave(&pp->pr_mtx);
1389 
1390 		if (freeph != NULL)
1391 			pool_p_free(pp, freeph);
1392 	}
1393 	splx(s);
1394 	rw_exit_read(&pool_lock);
1395 
1396 	timeout_add_sec(&pool_gc_tick, 1);
1397 }
1398 
1399 /*
1400  * Pool backend allocators.
1401  */
1402 
1403 void *
1404 pool_allocator_alloc(struct pool *pp, int flags, int *slowdown)
1405 {
1406 	void *v;
1407 
1408 	v = (*pp->pr_alloc->pa_alloc)(pp, flags, slowdown);
1409 
1410 #ifdef DIAGNOSTIC
1411 	if (v != NULL && POOL_INPGHDR(pp)) {
1412 		vaddr_t addr = (vaddr_t)v;
1413 		if ((addr & pp->pr_pgmask) != addr) {
1414 			panic("%s: %s page address %p isnt aligned to %u",
1415 			    __func__, pp->pr_wchan, v, pp->pr_pgsize);
1416 		}
1417 	}
1418 #endif
1419 
1420 	return (v);
1421 }
1422 
1423 void
1424 pool_allocator_free(struct pool *pp, void *v)
1425 {
1426 	struct pool_allocator *pa = pp->pr_alloc;
1427 
1428 	(*pa->pa_free)(pp, v);
1429 }
1430 
1431 void *
1432 pool_page_alloc(struct pool *pp, int flags, int *slowdown)
1433 {
1434 	struct kmem_dyn_mode kd = KMEM_DYN_INITIALIZER;
1435 
1436 	kd.kd_waitok = ISSET(flags, PR_WAITOK);
1437 	kd.kd_slowdown = slowdown;
1438 
1439 	return (km_alloc(pp->pr_pgsize, &kv_page, pp->pr_crange, &kd));
1440 }
1441 
1442 void
1443 pool_page_free(struct pool *pp, void *v)
1444 {
1445 	km_free(v, pp->pr_pgsize, &kv_page, pp->pr_crange);
1446 }
1447 
1448 void *
1449 pool_multi_alloc(struct pool *pp, int flags, int *slowdown)
1450 {
1451 	struct kmem_va_mode kv = kv_intrsafe;
1452 	struct kmem_dyn_mode kd = KMEM_DYN_INITIALIZER;
1453 	void *v;
1454 	int s;
1455 
1456 	if (POOL_INPGHDR(pp))
1457 		kv.kv_align = pp->pr_pgsize;
1458 
1459 	kd.kd_waitok = ISSET(flags, PR_WAITOK);
1460 	kd.kd_slowdown = slowdown;
1461 
1462 	s = splvm();
1463 	v = km_alloc(pp->pr_pgsize, &kv, pp->pr_crange, &kd);
1464 	splx(s);
1465 
1466 	return (v);
1467 }
1468 
1469 void
1470 pool_multi_free(struct pool *pp, void *v)
1471 {
1472 	struct kmem_va_mode kv = kv_intrsafe;
1473 	int s;
1474 
1475 	if (POOL_INPGHDR(pp))
1476 		kv.kv_align = pp->pr_pgsize;
1477 
1478 	s = splvm();
1479 	km_free(v, pp->pr_pgsize, &kv, pp->pr_crange);
1480 	splx(s);
1481 }
1482 
1483 void *
1484 pool_multi_alloc_ni(struct pool *pp, int flags, int *slowdown)
1485 {
1486 	struct kmem_va_mode kv = kv_any;
1487 	struct kmem_dyn_mode kd = KMEM_DYN_INITIALIZER;
1488 	void *v;
1489 
1490 	if (POOL_INPGHDR(pp))
1491 		kv.kv_align = pp->pr_pgsize;
1492 
1493 	kd.kd_waitok = ISSET(flags, PR_WAITOK);
1494 	kd.kd_slowdown = slowdown;
1495 
1496 	KERNEL_LOCK();
1497 	v = km_alloc(pp->pr_pgsize, &kv, pp->pr_crange, &kd);
1498 	KERNEL_UNLOCK();
1499 
1500 	return (v);
1501 }
1502 
1503 void
1504 pool_multi_free_ni(struct pool *pp, void *v)
1505 {
1506 	struct kmem_va_mode kv = kv_any;
1507 
1508 	if (POOL_INPGHDR(pp))
1509 		kv.kv_align = pp->pr_pgsize;
1510 
1511 	KERNEL_LOCK();
1512 	km_free(v, pp->pr_pgsize, &kv, pp->pr_crange);
1513 	KERNEL_UNLOCK();
1514 }
1515