xref: /openbsd-src/sys/kern/subr_pool.c (revision cb39b41371628601fbe4c618205356d538b9d08a)
1 /*	$OpenBSD: subr_pool.c,v 1.185 2015/04/21 13:15:54 dlg Exp $	*/
2 /*	$NetBSD: subr_pool.c,v 1.61 2001/09/26 07:14:56 chs Exp $	*/
3 
4 /*-
5  * Copyright (c) 1997, 1999, 2000 The NetBSD Foundation, Inc.
6  * All rights reserved.
7  *
8  * This code is derived from software contributed to The NetBSD Foundation
9  * by Paul Kranenburg; by Jason R. Thorpe of the Numerical Aerospace
10  * Simulation Facility, NASA Ames Research Center.
11  *
12  * Redistribution and use in source and binary forms, with or without
13  * modification, are permitted provided that the following conditions
14  * are met:
15  * 1. Redistributions of source code must retain the above copyright
16  *    notice, this list of conditions and the following disclaimer.
17  * 2. Redistributions in binary form must reproduce the above copyright
18  *    notice, this list of conditions and the following disclaimer in the
19  *    documentation and/or other materials provided with the distribution.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
23  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
24  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
25  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31  * POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/errno.h>
37 #include <sys/kernel.h>
38 #include <sys/malloc.h>
39 #include <sys/pool.h>
40 #include <sys/syslog.h>
41 #include <sys/rwlock.h>
42 #include <sys/sysctl.h>
43 #include <sys/task.h>
44 #include <sys/timeout.h>
45 
46 #include <uvm/uvm_extern.h>
47 
48 /*
49  * Pool resource management utility.
50  *
51  * Memory is allocated in pages which are split into pieces according to
52  * the pool item size. Each page is kept on one of three lists in the
53  * pool structure: `pr_emptypages', `pr_fullpages' and `pr_partpages',
54  * for empty, full and partially-full pages respectively. The individual
55  * pool items are on a linked list headed by `ph_itemlist' in each page
56  * header. The memory for building the page list is either taken from
57  * the allocated pages themselves (for small pool items) or taken from
58  * an internal pool of page headers (`phpool').
59  */
60 
61 /* List of all pools */
62 SIMPLEQ_HEAD(,pool) pool_head = SIMPLEQ_HEAD_INITIALIZER(pool_head);
63 
64 /*
65  * Every pool gets a unique serial number assigned to it. If this counter
66  * wraps, we're screwed, but we shouldn't create so many pools anyway.
67  */
68 unsigned int pool_serial;
69 unsigned int pool_count;
70 
71 /* Lock the previous variables making up the global pool state */
72 struct rwlock pool_lock = RWLOCK_INITIALIZER("pools");
73 
74 /* Private pool for page header structures */
75 struct pool phpool;
76 
77 struct pool_item_header {
78 	/* Page headers */
79 	TAILQ_ENTRY(pool_item_header)
80 				ph_pagelist;	/* pool page list */
81 	XSIMPLEQ_HEAD(,pool_item) ph_itemlist;	/* chunk list for this page */
82 	RB_ENTRY(pool_item_header)
83 				ph_node;	/* Off-page page headers */
84 	int			ph_nmissing;	/* # of chunks in use */
85 	caddr_t			ph_page;	/* this page's address */
86 	caddr_t			ph_colored;	/* page's colored address */
87 	u_long			ph_magic;
88 	int			ph_tick;
89 };
90 #define POOL_MAGICBIT (1 << 3) /* keep away from perturbed low bits */
91 #define POOL_PHPOISON(ph) ISSET((ph)->ph_magic, POOL_MAGICBIT)
92 
93 struct pool_item {
94 	u_long				pi_magic;
95 	XSIMPLEQ_ENTRY(pool_item)	pi_list;
96 };
97 #define POOL_IMAGIC(ph, pi) ((u_long)(pi) ^ (ph)->ph_magic)
98 
99 #ifdef POOL_DEBUG
100 int	pool_debug = 1;
101 #else
102 int	pool_debug = 0;
103 #endif
104 
105 #define	POOL_NEEDS_CATCHUP(pp)						\
106 	((pp)->pr_nitems < (pp)->pr_minitems)
107 
108 #define POOL_INPGHDR(pp) ((pp)->pr_phoffset != 0)
109 
110 struct pool_item_header *
111 	 pool_p_alloc(struct pool *, int, int *);
112 void	 pool_p_insert(struct pool *, struct pool_item_header *);
113 void	 pool_p_remove(struct pool *, struct pool_item_header *);
114 void	 pool_p_free(struct pool *, struct pool_item_header *);
115 
116 void	 pool_update_curpage(struct pool *);
117 void	*pool_do_get(struct pool *, int, int *);
118 int	 pool_chk_page(struct pool *, struct pool_item_header *, int);
119 int	 pool_chk(struct pool *);
120 void	 pool_get_done(void *, void *);
121 void	 pool_runqueue(struct pool *, int);
122 
123 void	*pool_allocator_alloc(struct pool *, int, int *);
124 void	 pool_allocator_free(struct pool *, void *);
125 
126 /*
127  * The default pool allocator.
128  */
129 void	*pool_page_alloc(struct pool *, int, int *);
130 void	pool_page_free(struct pool *, void *);
131 
132 /*
133  * safe for interrupts, name preserved for compat this is the default
134  * allocator
135  */
136 struct pool_allocator pool_allocator_nointr = {
137 	pool_page_alloc,
138 	pool_page_free
139 };
140 
141 void	*pool_large_alloc(struct pool *, int, int *);
142 void	pool_large_free(struct pool *, void *);
143 
144 struct pool_allocator pool_allocator_large = {
145 	pool_large_alloc,
146 	pool_large_free
147 };
148 
149 void	*pool_large_alloc_ni(struct pool *, int, int *);
150 void	pool_large_free_ni(struct pool *, void *);
151 
152 struct pool_allocator pool_allocator_large_ni = {
153 	pool_large_alloc_ni,
154 	pool_large_free_ni
155 };
156 
157 #ifdef DDB
158 void	 pool_print_pagelist(struct pool_pagelist *, int (*)(const char *, ...)
159 	     __attribute__((__format__(__kprintf__,1,2))));
160 void	 pool_print1(struct pool *, const char *, int (*)(const char *, ...)
161 	     __attribute__((__format__(__kprintf__,1,2))));
162 #endif
163 
164 /* stale page garbage collectors */
165 void	pool_gc_sched(void *);
166 struct timeout pool_gc_tick = TIMEOUT_INITIALIZER(pool_gc_sched, NULL);
167 void	pool_gc_pages(void *);
168 struct task pool_gc_task = TASK_INITIALIZER(pool_gc_pages, NULL);
169 int pool_wait_free = 1;
170 int pool_wait_gc = 8;
171 
172 static inline int
173 phtree_compare(struct pool_item_header *a, struct pool_item_header *b)
174 {
175 	vaddr_t va = (vaddr_t)a->ph_page;
176 	vaddr_t vb = (vaddr_t)b->ph_page;
177 
178 	/* the compares in this order are important for the NFIND to work */
179 	if (vb < va)
180 		return (-1);
181 	if (vb > va)
182 		return (1);
183 
184 	return (0);
185 }
186 
187 RB_PROTOTYPE(phtree, pool_item_header, ph_node, phtree_compare);
188 RB_GENERATE(phtree, pool_item_header, ph_node, phtree_compare);
189 
190 /*
191  * Return the pool page header based on page address.
192  */
193 static inline struct pool_item_header *
194 pr_find_pagehead(struct pool *pp, void *v)
195 {
196 	struct pool_item_header *ph, key;
197 
198 	if (POOL_INPGHDR(pp)) {
199 		caddr_t page;
200 
201 		page = (caddr_t)((vaddr_t)v & pp->pr_pgmask);
202 
203 		return ((struct pool_item_header *)(page + pp->pr_phoffset));
204 	}
205 
206 	key.ph_page = v;
207 	ph = RB_NFIND(phtree, &pp->pr_phtree, &key);
208 	if (ph == NULL)
209 		panic("%s: %s: page header missing", __func__, pp->pr_wchan);
210 
211 	KASSERT(ph->ph_page <= (caddr_t)v);
212 	if (ph->ph_page + pp->pr_pgsize <= (caddr_t)v)
213 		panic("%s: %s: incorrect page", __func__, pp->pr_wchan);
214 
215 	return (ph);
216 }
217 
218 /*
219  * Initialize the given pool resource structure.
220  *
221  * We export this routine to allow other kernel parts to declare
222  * static pools that must be initialized before malloc() is available.
223  */
224 void
225 pool_init(struct pool *pp, size_t size, u_int align, u_int ioff, int flags,
226     const char *wchan, struct pool_allocator *palloc)
227 {
228 	int off = 0, space;
229 	unsigned int pgsize = PAGE_SIZE, items;
230 #ifdef DIAGNOSTIC
231 	struct pool *iter;
232 	KASSERT(ioff == 0);
233 #endif
234 
235 	if (align == 0)
236 		align = ALIGN(1);
237 
238 	if (size < sizeof(struct pool_item))
239 		size = sizeof(struct pool_item);
240 
241 	size = roundup(size, align);
242 
243 	if (palloc == NULL) {
244 		while (size > pgsize)
245 			pgsize <<= 1;
246 
247 		if (pgsize > PAGE_SIZE) {
248 			palloc = ISSET(flags, PR_WAITOK) ?
249 			    &pool_allocator_large_ni : &pool_allocator_large;
250 		} else
251 			palloc = &pool_allocator_nointr;
252 	} else
253 		pgsize = palloc->pa_pagesz ? palloc->pa_pagesz : PAGE_SIZE;
254 
255 	items = pgsize / size;
256 
257 	/*
258 	 * Decide whether to put the page header off page to avoid
259 	 * wasting too large a part of the page. Off-page page headers
260 	 * go into an RB tree, so we can match a returned item with
261 	 * its header based on the page address.
262 	 */
263 	if (pgsize - (size * items) > sizeof(struct pool_item_header)) {
264 		off = pgsize - sizeof(struct pool_item_header);
265 	} else if (sizeof(struct pool_item_header) * 2 >= size) {
266 		off = pgsize - sizeof(struct pool_item_header);
267 		items = off / size;
268 	}
269 
270 	KASSERT(items > 0);
271 
272 	/*
273 	 * Initialize the pool structure.
274 	 */
275 	memset(pp, 0, sizeof(*pp));
276 	TAILQ_INIT(&pp->pr_emptypages);
277 	TAILQ_INIT(&pp->pr_fullpages);
278 	TAILQ_INIT(&pp->pr_partpages);
279 	pp->pr_curpage = NULL;
280 	pp->pr_npages = 0;
281 	pp->pr_minitems = 0;
282 	pp->pr_minpages = 0;
283 	pp->pr_maxpages = 8;
284 	pp->pr_size = size;
285 	pp->pr_pgsize = pgsize;
286 	pp->pr_pgmask = ~0UL ^ (pgsize - 1);
287 	pp->pr_phoffset = off;
288 	pp->pr_itemsperpage = items;
289 	pp->pr_wchan = wchan;
290 	pp->pr_alloc = palloc;
291 	pp->pr_nitems = 0;
292 	pp->pr_nout = 0;
293 	pp->pr_hardlimit = UINT_MAX;
294 	pp->pr_hardlimit_warning = NULL;
295 	pp->pr_hardlimit_ratecap.tv_sec = 0;
296 	pp->pr_hardlimit_ratecap.tv_usec = 0;
297 	pp->pr_hardlimit_warning_last.tv_sec = 0;
298 	pp->pr_hardlimit_warning_last.tv_usec = 0;
299 	RB_INIT(&pp->pr_phtree);
300 
301 	/*
302 	 * Use the space between the chunks and the page header
303 	 * for cache coloring.
304 	 */
305 	space = POOL_INPGHDR(pp) ? pp->pr_phoffset : pp->pr_pgsize;
306 	space -= pp->pr_itemsperpage * pp->pr_size;
307 	pp->pr_align = align;
308 	pp->pr_maxcolors = (space / align) + 1;
309 
310 	pp->pr_nget = 0;
311 	pp->pr_nfail = 0;
312 	pp->pr_nput = 0;
313 	pp->pr_npagealloc = 0;
314 	pp->pr_npagefree = 0;
315 	pp->pr_hiwat = 0;
316 	pp->pr_nidle = 0;
317 
318 	pp->pr_ipl = -1;
319 	mtx_init(&pp->pr_mtx, IPL_NONE);
320 	mtx_init(&pp->pr_requests_mtx, IPL_NONE);
321 	TAILQ_INIT(&pp->pr_requests);
322 
323 	if (phpool.pr_size == 0) {
324 		pool_init(&phpool, sizeof(struct pool_item_header), 0, 0,
325 		    0, "phpool", NULL);
326 		pool_setipl(&phpool, IPL_HIGH);
327 
328 		/* make sure phpool wont "recurse" */
329 		KASSERT(POOL_INPGHDR(&phpool));
330 	}
331 
332 	/* pglistalloc/constraint parameters */
333 	pp->pr_crange = &kp_dirty;
334 
335 	/* Insert this into the list of all pools. */
336 	rw_enter_write(&pool_lock);
337 #ifdef DIAGNOSTIC
338 	SIMPLEQ_FOREACH(iter, &pool_head, pr_poollist) {
339 		if (iter == pp)
340 			panic("%s: pool %s already on list", __func__, wchan);
341 	}
342 #endif
343 
344 	pp->pr_serial = ++pool_serial;
345 	if (pool_serial == 0)
346 		panic("%s: too much uptime", __func__);
347 
348 	SIMPLEQ_INSERT_HEAD(&pool_head, pp, pr_poollist);
349 	pool_count++;
350 	rw_exit_write(&pool_lock);
351 }
352 
353 void
354 pool_setipl(struct pool *pp, int ipl)
355 {
356 	pp->pr_ipl = ipl;
357 	mtx_init(&pp->pr_mtx, ipl);
358 	mtx_init(&pp->pr_requests_mtx, ipl);
359 }
360 
361 /*
362  * Decommission a pool resource.
363  */
364 void
365 pool_destroy(struct pool *pp)
366 {
367 	struct pool_item_header *ph;
368 	struct pool *prev, *iter;
369 
370 #ifdef DIAGNOSTIC
371 	if (pp->pr_nout != 0)
372 		panic("%s: pool busy: still out: %u", __func__, pp->pr_nout);
373 #endif
374 
375 	/* Remove from global pool list */
376 	rw_enter_write(&pool_lock);
377 	pool_count--;
378 	if (pp == SIMPLEQ_FIRST(&pool_head))
379 		SIMPLEQ_REMOVE_HEAD(&pool_head, pr_poollist);
380 	else {
381 		prev = SIMPLEQ_FIRST(&pool_head);
382 		SIMPLEQ_FOREACH(iter, &pool_head, pr_poollist) {
383 			if (iter == pp) {
384 				SIMPLEQ_REMOVE_AFTER(&pool_head, prev,
385 				    pr_poollist);
386 				break;
387 			}
388 			prev = iter;
389 		}
390 	}
391 	rw_exit_write(&pool_lock);
392 
393 	/* Remove all pages */
394 	while ((ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL) {
395 		mtx_enter(&pp->pr_mtx);
396 		pool_p_remove(pp, ph);
397 		mtx_leave(&pp->pr_mtx);
398 		pool_p_free(pp, ph);
399 	}
400 	KASSERT(TAILQ_EMPTY(&pp->pr_fullpages));
401 	KASSERT(TAILQ_EMPTY(&pp->pr_partpages));
402 }
403 
404 void
405 pool_request_init(struct pool_request *pr,
406     void (*handler)(void *, void *), void *cookie)
407 {
408 	pr->pr_handler = handler;
409 	pr->pr_cookie = cookie;
410 	pr->pr_item = NULL;
411 }
412 
413 void
414 pool_request(struct pool *pp, struct pool_request *pr)
415 {
416 	mtx_enter(&pp->pr_requests_mtx);
417 	TAILQ_INSERT_TAIL(&pp->pr_requests, pr, pr_entry);
418 	pool_runqueue(pp, PR_NOWAIT);
419 	mtx_leave(&pp->pr_requests_mtx);
420 }
421 
422 struct pool_get_memory {
423 	struct mutex mtx;
424 	void * volatile v;
425 };
426 
427 /*
428  * Grab an item from the pool.
429  */
430 void *
431 pool_get(struct pool *pp, int flags)
432 {
433 	void *v = NULL;
434 	int slowdown = 0;
435 
436 	KASSERT(flags & (PR_WAITOK | PR_NOWAIT));
437 
438 
439 	mtx_enter(&pp->pr_mtx);
440 	if (pp->pr_nout >= pp->pr_hardlimit) {
441 		if (ISSET(flags, PR_NOWAIT|PR_LIMITFAIL))
442 			goto fail;
443 	} else if ((v = pool_do_get(pp, flags, &slowdown)) == NULL) {
444 		if (ISSET(flags, PR_NOWAIT))
445 			goto fail;
446 	}
447 	mtx_leave(&pp->pr_mtx);
448 
449 	if (slowdown && ISSET(flags, PR_WAITOK))
450 		yield();
451 
452 	if (v == NULL) {
453 		struct pool_get_memory mem = {
454 		    MUTEX_INITIALIZER((pp->pr_ipl == -1) ?
455 		    IPL_NONE : pp->pr_ipl), NULL };
456 		struct pool_request pr;
457 
458 		pool_request_init(&pr, pool_get_done, &mem);
459 		pool_request(pp, &pr);
460 
461 		mtx_enter(&mem.mtx);
462 		while (mem.v == NULL)
463 			msleep(&mem, &mem.mtx, PSWP, pp->pr_wchan, 0);
464 		mtx_leave(&mem.mtx);
465 
466 		v = mem.v;
467 	}
468 
469 	if (ISSET(flags, PR_ZERO))
470 		memset(v, 0, pp->pr_size);
471 
472 	return (v);
473 
474 fail:
475 	pp->pr_nfail++;
476 	mtx_leave(&pp->pr_mtx);
477 	return (NULL);
478 }
479 
480 void
481 pool_get_done(void *xmem, void *v)
482 {
483 	struct pool_get_memory *mem = xmem;
484 
485 	mtx_enter(&mem->mtx);
486 	mem->v = v;
487 	mtx_leave(&mem->mtx);
488 
489 	wakeup_one(mem);
490 }
491 
492 void
493 pool_runqueue(struct pool *pp, int flags)
494 {
495 	struct pool_requests prl = TAILQ_HEAD_INITIALIZER(prl);
496 	struct pool_request *pr;
497 
498 	MUTEX_ASSERT_UNLOCKED(&pp->pr_mtx);
499 	MUTEX_ASSERT_LOCKED(&pp->pr_requests_mtx);
500 
501 	if (pp->pr_requesting++)
502 		return;
503 
504 	do {
505 		pp->pr_requesting = 1;
506 
507 		/* no TAILQ_JOIN? :( */
508 		while ((pr = TAILQ_FIRST(&pp->pr_requests)) != NULL) {
509 			TAILQ_REMOVE(&pp->pr_requests, pr, pr_entry);
510 			TAILQ_INSERT_TAIL(&prl, pr, pr_entry);
511 		}
512 		if (TAILQ_EMPTY(&prl))
513 			continue;
514 
515 		mtx_leave(&pp->pr_requests_mtx);
516 
517 		mtx_enter(&pp->pr_mtx);
518 		pr = TAILQ_FIRST(&prl);
519 		while (pr != NULL) {
520 			int slowdown = 0;
521 
522 			if (pp->pr_nout >= pp->pr_hardlimit)
523 				break;
524 
525 			pr->pr_item = pool_do_get(pp, flags, &slowdown);
526 			if (pr->pr_item == NULL) /* || slowdown ? */
527 				break;
528 
529 			pr = TAILQ_NEXT(pr, pr_entry);
530 		}
531 		mtx_leave(&pp->pr_mtx);
532 
533 		while ((pr = TAILQ_FIRST(&prl)) != NULL &&
534 		    pr->pr_item != NULL) {
535 			TAILQ_REMOVE(&prl, pr, pr_entry);
536 			(*pr->pr_handler)(pr->pr_cookie, pr->pr_item);
537 		}
538 
539 		mtx_enter(&pp->pr_requests_mtx);
540 	} while (--pp->pr_requesting);
541 
542 	/* no TAILQ_JOIN :( */
543 	while ((pr = TAILQ_FIRST(&prl)) != NULL) {
544 		TAILQ_REMOVE(&prl, pr, pr_entry);
545 		TAILQ_INSERT_TAIL(&pp->pr_requests, pr, pr_entry);
546 	}
547 }
548 
549 void *
550 pool_do_get(struct pool *pp, int flags, int *slowdown)
551 {
552 	struct pool_item *pi;
553 	struct pool_item_header *ph;
554 
555 	MUTEX_ASSERT_LOCKED(&pp->pr_mtx);
556 
557 	if (pp->pr_ipl != -1)
558 		splassert(pp->pr_ipl);
559 
560 	/*
561 	 * Account for this item now to avoid races if we need to give up
562 	 * pr_mtx to allocate a page.
563 	 */
564 	pp->pr_nout++;
565 
566 	if (pp->pr_curpage == NULL) {
567 		mtx_leave(&pp->pr_mtx);
568 		ph = pool_p_alloc(pp, flags, slowdown);
569 		mtx_enter(&pp->pr_mtx);
570 
571 		if (ph == NULL) {
572 			pp->pr_nout--;
573 			return (NULL);
574 		}
575 
576 		pool_p_insert(pp, ph);
577 	}
578 
579 	ph = pp->pr_curpage;
580 	pi = XSIMPLEQ_FIRST(&ph->ph_itemlist);
581 	if (__predict_false(pi == NULL))
582 		panic("%s: %s: page empty", __func__, pp->pr_wchan);
583 
584 	if (__predict_false(pi->pi_magic != POOL_IMAGIC(ph, pi))) {
585 		panic("%s: %s free list modified: "
586 		    "page %p; item addr %p; offset 0x%x=0x%lx != 0x%lx",
587 		    __func__, pp->pr_wchan, ph->ph_page, pi,
588 		    0, pi->pi_magic, POOL_IMAGIC(ph, pi));
589 	}
590 
591 	XSIMPLEQ_REMOVE_HEAD(&ph->ph_itemlist, pi_list);
592 
593 #ifdef DIAGNOSTIC
594 	if (pool_debug && POOL_PHPOISON(ph)) {
595 		size_t pidx;
596 		uint32_t pval;
597 		if (poison_check(pi + 1, pp->pr_size - sizeof(*pi),
598 		    &pidx, &pval)) {
599 			int *ip = (int *)(pi + 1);
600 			panic("%s: %s free list modified: "
601 			    "page %p; item addr %p; offset 0x%zx=0x%x",
602 			    __func__, pp->pr_wchan, ph->ph_page, pi,
603 			    pidx * sizeof(int), ip[pidx]);
604 		}
605 	}
606 #endif /* DIAGNOSTIC */
607 
608 	if (ph->ph_nmissing++ == 0) {
609 		/*
610 		 * This page was previously empty.  Move it to the list of
611 		 * partially-full pages.  This page is already curpage.
612 		 */
613 		TAILQ_REMOVE(&pp->pr_emptypages, ph, ph_pagelist);
614 		TAILQ_INSERT_TAIL(&pp->pr_partpages, ph, ph_pagelist);
615 
616 		pp->pr_nidle--;
617 	}
618 
619 	if (ph->ph_nmissing == pp->pr_itemsperpage) {
620 		/*
621 		 * This page is now full.  Move it to the full list
622 		 * and select a new current page.
623 		 */
624 		TAILQ_REMOVE(&pp->pr_partpages, ph, ph_pagelist);
625 		TAILQ_INSERT_TAIL(&pp->pr_fullpages, ph, ph_pagelist);
626 		pool_update_curpage(pp);
627 	}
628 
629 	pp->pr_nget++;
630 
631 	return (pi);
632 }
633 
634 /*
635  * Return resource to the pool.
636  */
637 void
638 pool_put(struct pool *pp, void *v)
639 {
640 	struct pool_item *pi = v;
641 	struct pool_item_header *ph, *freeph = NULL;
642 	extern int ticks;
643 
644 #ifdef DIAGNOSTIC
645 	if (v == NULL)
646 		panic("%s: NULL item", __func__);
647 #endif
648 
649 	mtx_enter(&pp->pr_mtx);
650 
651 	if (pp->pr_ipl != -1)
652 		splassert(pp->pr_ipl);
653 
654 	ph = pr_find_pagehead(pp, v);
655 
656 #ifdef DIAGNOSTIC
657 	if (pool_debug) {
658 		struct pool_item *qi;
659 		XSIMPLEQ_FOREACH(qi, &ph->ph_itemlist, pi_list) {
660 			if (pi == qi) {
661 				panic("%s: %s: double pool_put: %p", __func__,
662 				    pp->pr_wchan, pi);
663 			}
664 		}
665 	}
666 #endif /* DIAGNOSTIC */
667 
668 	pi->pi_magic = POOL_IMAGIC(ph, pi);
669 	XSIMPLEQ_INSERT_HEAD(&ph->ph_itemlist, pi, pi_list);
670 #ifdef DIAGNOSTIC
671 	if (POOL_PHPOISON(ph))
672 		poison_mem(pi + 1, pp->pr_size - sizeof(*pi));
673 #endif /* DIAGNOSTIC */
674 
675 	if (ph->ph_nmissing-- == pp->pr_itemsperpage) {
676 		/*
677 		 * The page was previously completely full, move it to the
678 		 * partially-full list.
679 		 */
680 		TAILQ_REMOVE(&pp->pr_fullpages, ph, ph_pagelist);
681 		TAILQ_INSERT_TAIL(&pp->pr_partpages, ph, ph_pagelist);
682 	}
683 
684 	if (ph->ph_nmissing == 0) {
685 		/*
686 		 * The page is now empty, so move it to the empty page list.
687 	 	 */
688 		pp->pr_nidle++;
689 
690 		ph->ph_tick = ticks;
691 		TAILQ_REMOVE(&pp->pr_partpages, ph, ph_pagelist);
692 		TAILQ_INSERT_TAIL(&pp->pr_emptypages, ph, ph_pagelist);
693 		pool_update_curpage(pp);
694 	}
695 
696 	pp->pr_nout--;
697 	pp->pr_nput++;
698 
699 	/* is it time to free a page? */
700 	if (pp->pr_nidle > pp->pr_maxpages &&
701 	    (ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL &&
702 	    (ticks - ph->ph_tick) > (hz * pool_wait_free)) {
703 		freeph = ph;
704 		pool_p_remove(pp, freeph);
705 	}
706 	mtx_leave(&pp->pr_mtx);
707 
708 	if (freeph != NULL)
709 		pool_p_free(pp, freeph);
710 
711 	mtx_enter(&pp->pr_requests_mtx);
712 	pool_runqueue(pp, PR_NOWAIT);
713 	mtx_leave(&pp->pr_requests_mtx);
714 }
715 
716 /*
717  * Add N items to the pool.
718  */
719 int
720 pool_prime(struct pool *pp, int n)
721 {
722 	struct pool_pagelist pl = TAILQ_HEAD_INITIALIZER(pl);
723 	struct pool_item_header *ph;
724 	int newpages;
725 
726 	newpages = roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage;
727 
728 	while (newpages-- > 0) {
729 		int slowdown = 0;
730 
731 		ph = pool_p_alloc(pp, PR_NOWAIT, &slowdown);
732 		if (ph == NULL) /* or slowdown? */
733 			break;
734 
735 		TAILQ_INSERT_TAIL(&pl, ph, ph_pagelist);
736 	}
737 
738 	mtx_enter(&pp->pr_mtx);
739 	while ((ph = TAILQ_FIRST(&pl)) != NULL) {
740 		TAILQ_REMOVE(&pl, ph, ph_pagelist);
741 		pool_p_insert(pp, ph);
742 	}
743 	mtx_leave(&pp->pr_mtx);
744 
745 	return (0);
746 }
747 
748 struct pool_item_header *
749 pool_p_alloc(struct pool *pp, int flags, int *slowdown)
750 {
751 	struct pool_item_header *ph;
752 	struct pool_item *pi;
753 	caddr_t addr;
754 	int n;
755 
756 	MUTEX_ASSERT_UNLOCKED(&pp->pr_mtx);
757 	KASSERT(pp->pr_size >= sizeof(*pi));
758 
759 	addr = pool_allocator_alloc(pp, flags, slowdown);
760 	if (addr == NULL)
761 		return (NULL);
762 
763 	if (POOL_INPGHDR(pp))
764 		ph = (struct pool_item_header *)(addr + pp->pr_phoffset);
765 	else {
766 		ph = pool_get(&phpool, flags);
767 		if (ph == NULL) {
768 			pool_allocator_free(pp, addr);
769 			return (NULL);
770 		}
771 	}
772 
773 	XSIMPLEQ_INIT(&ph->ph_itemlist);
774 	ph->ph_page = addr;
775 	addr += pp->pr_align * (pp->pr_npagealloc % pp->pr_maxcolors);
776 	ph->ph_colored = addr;
777 	ph->ph_nmissing = 0;
778 	arc4random_buf(&ph->ph_magic, sizeof(ph->ph_magic));
779 #ifdef DIAGNOSTIC
780 	/* use a bit in ph_magic to record if we poison page items */
781 	if (pool_debug)
782 		SET(ph->ph_magic, POOL_MAGICBIT);
783 	else
784 		CLR(ph->ph_magic, POOL_MAGICBIT);
785 #endif /* DIAGNOSTIC */
786 
787 	n = pp->pr_itemsperpage;
788 	while (n--) {
789 		pi = (struct pool_item *)addr;
790 		pi->pi_magic = POOL_IMAGIC(ph, pi);
791 		XSIMPLEQ_INSERT_TAIL(&ph->ph_itemlist, pi, pi_list);
792 
793 #ifdef DIAGNOSTIC
794 		if (POOL_PHPOISON(ph))
795 			poison_mem(pi + 1, pp->pr_size - sizeof(*pi));
796 #endif /* DIAGNOSTIC */
797 
798 		addr += pp->pr_size;
799 	}
800 
801 	return (ph);
802 }
803 
804 void
805 pool_p_free(struct pool *pp, struct pool_item_header *ph)
806 {
807 	struct pool_item *pi;
808 
809 	MUTEX_ASSERT_UNLOCKED(&pp->pr_mtx);
810 	KASSERT(ph->ph_nmissing == 0);
811 
812 	XSIMPLEQ_FOREACH(pi, &ph->ph_itemlist, pi_list) {
813 		if (__predict_false(pi->pi_magic != POOL_IMAGIC(ph, pi))) {
814 			panic("%s: %s free list modified: "
815 			    "page %p; item addr %p; offset 0x%x=0x%lx",
816 			    __func__, pp->pr_wchan, ph->ph_page, pi,
817 			    0, pi->pi_magic);
818 		}
819 
820 #ifdef DIAGNOSTIC
821 		if (POOL_PHPOISON(ph)) {
822 			size_t pidx;
823 			uint32_t pval;
824 			if (poison_check(pi + 1, pp->pr_size - sizeof(*pi),
825 			    &pidx, &pval)) {
826 				int *ip = (int *)(pi + 1);
827 				panic("%s: %s free list modified: "
828 				    "page %p; item addr %p; offset 0x%zx=0x%x",
829 				    __func__, pp->pr_wchan, ph->ph_page, pi,
830 				    pidx * sizeof(int), ip[pidx]);
831 			}
832 		}
833 #endif
834 	}
835 
836 	pool_allocator_free(pp, ph->ph_page);
837 
838 	if (!POOL_INPGHDR(pp))
839 		pool_put(&phpool, ph);
840 }
841 
842 void
843 pool_p_insert(struct pool *pp, struct pool_item_header *ph)
844 {
845 	MUTEX_ASSERT_LOCKED(&pp->pr_mtx);
846 
847 	/* If the pool was depleted, point at the new page */
848 	if (pp->pr_curpage == NULL)
849 		pp->pr_curpage = ph;
850 
851 	TAILQ_INSERT_TAIL(&pp->pr_emptypages, ph, ph_pagelist);
852 	if (!POOL_INPGHDR(pp))
853 		RB_INSERT(phtree, &pp->pr_phtree, ph);
854 
855 	pp->pr_nitems += pp->pr_itemsperpage;
856 	pp->pr_nidle++;
857 
858 	pp->pr_npagealloc++;
859 	if (++pp->pr_npages > pp->pr_hiwat)
860 		pp->pr_hiwat = pp->pr_npages;
861 }
862 
863 void
864 pool_p_remove(struct pool *pp, struct pool_item_header *ph)
865 {
866 	MUTEX_ASSERT_LOCKED(&pp->pr_mtx);
867 
868 	pp->pr_npagefree++;
869 	pp->pr_npages--;
870 	pp->pr_nidle--;
871 	pp->pr_nitems -= pp->pr_itemsperpage;
872 
873 	if (!POOL_INPGHDR(pp))
874 		RB_REMOVE(phtree, &pp->pr_phtree, ph);
875 	TAILQ_REMOVE(&pp->pr_emptypages, ph, ph_pagelist);
876 
877 	pool_update_curpage(pp);
878 }
879 
880 void
881 pool_update_curpage(struct pool *pp)
882 {
883 	pp->pr_curpage = TAILQ_LAST(&pp->pr_partpages, pool_pagelist);
884 	if (pp->pr_curpage == NULL) {
885 		pp->pr_curpage = TAILQ_LAST(&pp->pr_emptypages, pool_pagelist);
886 	}
887 }
888 
889 void
890 pool_setlowat(struct pool *pp, int n)
891 {
892 	int prime = 0;
893 
894 	mtx_enter(&pp->pr_mtx);
895 	pp->pr_minitems = n;
896 	pp->pr_minpages = (n == 0)
897 		? 0
898 		: roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage;
899 
900 	if (pp->pr_nitems < n)
901 		prime = n - pp->pr_nitems;
902 	mtx_leave(&pp->pr_mtx);
903 
904 	if (prime > 0)
905 		pool_prime(pp, prime);
906 }
907 
908 void
909 pool_sethiwat(struct pool *pp, int n)
910 {
911 	pp->pr_maxpages = (n == 0)
912 		? 0
913 		: roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage;
914 }
915 
916 int
917 pool_sethardlimit(struct pool *pp, u_int n, const char *warnmsg, int ratecap)
918 {
919 	int error = 0;
920 
921 	if (n < pp->pr_nout) {
922 		error = EINVAL;
923 		goto done;
924 	}
925 
926 	pp->pr_hardlimit = n;
927 	pp->pr_hardlimit_warning = warnmsg;
928 	pp->pr_hardlimit_ratecap.tv_sec = ratecap;
929 	pp->pr_hardlimit_warning_last.tv_sec = 0;
930 	pp->pr_hardlimit_warning_last.tv_usec = 0;
931 
932 done:
933 	return (error);
934 }
935 
936 void
937 pool_set_constraints(struct pool *pp, const struct kmem_pa_mode *mode)
938 {
939 	pp->pr_crange = mode;
940 }
941 
942 /*
943  * Release all complete pages that have not been used recently.
944  *
945  * Returns non-zero if any pages have been reclaimed.
946  */
947 int
948 pool_reclaim(struct pool *pp)
949 {
950 	struct pool_item_header *ph, *phnext;
951 	struct pool_pagelist pl = TAILQ_HEAD_INITIALIZER(pl);
952 
953 	mtx_enter(&pp->pr_mtx);
954 	for (ph = TAILQ_FIRST(&pp->pr_emptypages); ph != NULL; ph = phnext) {
955 		phnext = TAILQ_NEXT(ph, ph_pagelist);
956 
957 		/* Check our minimum page claim */
958 		if (pp->pr_npages <= pp->pr_minpages)
959 			break;
960 
961 		/*
962 		 * If freeing this page would put us below
963 		 * the low water mark, stop now.
964 		 */
965 		if ((pp->pr_nitems - pp->pr_itemsperpage) <
966 		    pp->pr_minitems)
967 			break;
968 
969 		pool_p_remove(pp, ph);
970 		TAILQ_INSERT_TAIL(&pl, ph, ph_pagelist);
971 	}
972 	mtx_leave(&pp->pr_mtx);
973 
974 	if (TAILQ_EMPTY(&pl))
975 		return (0);
976 
977 	while ((ph = TAILQ_FIRST(&pl)) != NULL) {
978 		TAILQ_REMOVE(&pl, ph, ph_pagelist);
979 		pool_p_free(pp, ph);
980 	}
981 
982 	return (1);
983 }
984 
985 /*
986  * Release all complete pages that have not been used recently
987  * from all pools.
988  */
989 void
990 pool_reclaim_all(void)
991 {
992 	struct pool	*pp;
993 
994 	rw_enter_read(&pool_lock);
995 	SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist)
996 		pool_reclaim(pp);
997 	rw_exit_read(&pool_lock);
998 }
999 
1000 #ifdef DDB
1001 #include <machine/db_machdep.h>
1002 #include <ddb/db_output.h>
1003 
1004 /*
1005  * Diagnostic helpers.
1006  */
1007 void
1008 pool_printit(struct pool *pp, const char *modif,
1009     int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2))))
1010 {
1011 	pool_print1(pp, modif, pr);
1012 }
1013 
1014 void
1015 pool_print_pagelist(struct pool_pagelist *pl,
1016     int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2))))
1017 {
1018 	struct pool_item_header *ph;
1019 	struct pool_item *pi;
1020 
1021 	TAILQ_FOREACH(ph, pl, ph_pagelist) {
1022 		(*pr)("\t\tpage %p, color %p, nmissing %d\n",
1023 		    ph->ph_page, ph->ph_colored, ph->ph_nmissing);
1024 		XSIMPLEQ_FOREACH(pi, &ph->ph_itemlist, pi_list) {
1025 			if (pi->pi_magic != POOL_IMAGIC(ph, pi)) {
1026 				(*pr)("\t\t\titem %p, magic 0x%lx\n",
1027 				    pi, pi->pi_magic);
1028 			}
1029 		}
1030 	}
1031 }
1032 
1033 void
1034 pool_print1(struct pool *pp, const char *modif,
1035     int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2))))
1036 {
1037 	struct pool_item_header *ph;
1038 	int print_pagelist = 0;
1039 	char c;
1040 
1041 	while ((c = *modif++) != '\0') {
1042 		if (c == 'p')
1043 			print_pagelist = 1;
1044 		modif++;
1045 	}
1046 
1047 	(*pr)("POOL %s: size %u maxcolors %u\n", pp->pr_wchan, pp->pr_size,
1048 	    pp->pr_maxcolors);
1049 	(*pr)("\talloc %p\n", pp->pr_alloc);
1050 	(*pr)("\tminitems %u, minpages %u, maxpages %u, npages %u\n",
1051 	    pp->pr_minitems, pp->pr_minpages, pp->pr_maxpages, pp->pr_npages);
1052 	(*pr)("\titemsperpage %u, nitems %u, nout %u, hardlimit %u\n",
1053 	    pp->pr_itemsperpage, pp->pr_nitems, pp->pr_nout, pp->pr_hardlimit);
1054 
1055 	(*pr)("\n\tnget %lu, nfail %lu, nput %lu\n",
1056 	    pp->pr_nget, pp->pr_nfail, pp->pr_nput);
1057 	(*pr)("\tnpagealloc %lu, npagefree %lu, hiwat %u, nidle %lu\n",
1058 	    pp->pr_npagealloc, pp->pr_npagefree, pp->pr_hiwat, pp->pr_nidle);
1059 
1060 	if (print_pagelist == 0)
1061 		return;
1062 
1063 	if ((ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL)
1064 		(*pr)("\n\tempty page list:\n");
1065 	pool_print_pagelist(&pp->pr_emptypages, pr);
1066 	if ((ph = TAILQ_FIRST(&pp->pr_fullpages)) != NULL)
1067 		(*pr)("\n\tfull page list:\n");
1068 	pool_print_pagelist(&pp->pr_fullpages, pr);
1069 	if ((ph = TAILQ_FIRST(&pp->pr_partpages)) != NULL)
1070 		(*pr)("\n\tpartial-page list:\n");
1071 	pool_print_pagelist(&pp->pr_partpages, pr);
1072 
1073 	if (pp->pr_curpage == NULL)
1074 		(*pr)("\tno current page\n");
1075 	else
1076 		(*pr)("\tcurpage %p\n", pp->pr_curpage->ph_page);
1077 }
1078 
1079 void
1080 db_show_all_pools(db_expr_t expr, int haddr, db_expr_t count, char *modif)
1081 {
1082 	struct pool *pp;
1083 	char maxp[16];
1084 	int ovflw;
1085 	char mode;
1086 
1087 	mode = modif[0];
1088 	if (mode != '\0' && mode != 'a') {
1089 		db_printf("usage: show all pools [/a]\n");
1090 		return;
1091 	}
1092 
1093 	if (mode == '\0')
1094 		db_printf("%-10s%4s%9s%5s%9s%6s%6s%6s%6s%6s%6s%5s\n",
1095 		    "Name",
1096 		    "Size",
1097 		    "Requests",
1098 		    "Fail",
1099 		    "Releases",
1100 		    "Pgreq",
1101 		    "Pgrel",
1102 		    "Npage",
1103 		    "Hiwat",
1104 		    "Minpg",
1105 		    "Maxpg",
1106 		    "Idle");
1107 	else
1108 		db_printf("%-12s %18s %18s\n",
1109 		    "Name", "Address", "Allocator");
1110 
1111 	SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist) {
1112 		if (mode == 'a') {
1113 			db_printf("%-12s %18p %18p\n", pp->pr_wchan, pp,
1114 			    pp->pr_alloc);
1115 			continue;
1116 		}
1117 
1118 		if (!pp->pr_nget)
1119 			continue;
1120 
1121 		if (pp->pr_maxpages == UINT_MAX)
1122 			snprintf(maxp, sizeof maxp, "inf");
1123 		else
1124 			snprintf(maxp, sizeof maxp, "%u", pp->pr_maxpages);
1125 
1126 #define PRWORD(ovflw, fmt, width, fixed, val) do {	\
1127 	(ovflw) += db_printf((fmt),			\
1128 	    (width) - (fixed) - (ovflw) > 0 ?		\
1129 	    (width) - (fixed) - (ovflw) : 0,		\
1130 	    (val)) - (width);				\
1131 	if ((ovflw) < 0)				\
1132 		(ovflw) = 0;				\
1133 } while (/* CONSTCOND */0)
1134 
1135 		ovflw = 0;
1136 		PRWORD(ovflw, "%-*s", 10, 0, pp->pr_wchan);
1137 		PRWORD(ovflw, " %*u", 4, 1, pp->pr_size);
1138 		PRWORD(ovflw, " %*lu", 9, 1, pp->pr_nget);
1139 		PRWORD(ovflw, " %*lu", 5, 1, pp->pr_nfail);
1140 		PRWORD(ovflw, " %*lu", 9, 1, pp->pr_nput);
1141 		PRWORD(ovflw, " %*lu", 6, 1, pp->pr_npagealloc);
1142 		PRWORD(ovflw, " %*lu", 6, 1, pp->pr_npagefree);
1143 		PRWORD(ovflw, " %*d", 6, 1, pp->pr_npages);
1144 		PRWORD(ovflw, " %*d", 6, 1, pp->pr_hiwat);
1145 		PRWORD(ovflw, " %*d", 6, 1, pp->pr_minpages);
1146 		PRWORD(ovflw, " %*s", 6, 1, maxp);
1147 		PRWORD(ovflw, " %*lu\n", 5, 1, pp->pr_nidle);
1148 
1149 		pool_chk(pp);
1150 	}
1151 }
1152 #endif /* DDB */
1153 
1154 #if defined(POOL_DEBUG) || defined(DDB)
1155 int
1156 pool_chk_page(struct pool *pp, struct pool_item_header *ph, int expected)
1157 {
1158 	struct pool_item *pi;
1159 	caddr_t page;
1160 	int n;
1161 	const char *label = pp->pr_wchan;
1162 
1163 	page = (caddr_t)((u_long)ph & pp->pr_pgmask);
1164 	if (page != ph->ph_page && POOL_INPGHDR(pp)) {
1165 		printf("%s: ", label);
1166 		printf("pool(%p:%s): page inconsistency: page %p; "
1167 		    "at page head addr %p (p %p)\n",
1168 		    pp, pp->pr_wchan, ph->ph_page, ph, page);
1169 		return 1;
1170 	}
1171 
1172 	for (pi = XSIMPLEQ_FIRST(&ph->ph_itemlist), n = 0;
1173 	     pi != NULL;
1174 	     pi = XSIMPLEQ_NEXT(&ph->ph_itemlist, pi, pi_list), n++) {
1175 		if ((caddr_t)pi < ph->ph_page ||
1176 		    (caddr_t)pi >= ph->ph_page + pp->pr_pgsize) {
1177 			printf("%s: ", label);
1178 			printf("pool(%p:%s): page inconsistency: page %p;"
1179 			    " item ordinal %d; addr %p\n", pp,
1180 			    pp->pr_wchan, ph->ph_page, n, pi);
1181 			return (1);
1182 		}
1183 
1184 		if (pi->pi_magic != POOL_IMAGIC(ph, pi)) {
1185 			printf("%s: ", label);
1186 			printf("pool(%p:%s): free list modified: "
1187 			    "page %p; item ordinal %d; addr %p "
1188 			    "(p %p); offset 0x%x=0x%lx\n",
1189 			    pp, pp->pr_wchan, ph->ph_page, n, pi, page,
1190 			    0, pi->pi_magic);
1191 		}
1192 
1193 #ifdef DIAGNOSTIC
1194 		if (POOL_PHPOISON(ph)) {
1195 			size_t pidx;
1196 			uint32_t pval;
1197 			if (poison_check(pi + 1, pp->pr_size - sizeof(*pi),
1198 			    &pidx, &pval)) {
1199 				int *ip = (int *)(pi + 1);
1200 				printf("pool(%s): free list modified: "
1201 				    "page %p; item ordinal %d; addr %p "
1202 				    "(p %p); offset 0x%zx=0x%x\n",
1203 				    pp->pr_wchan, ph->ph_page, n, pi,
1204 				    page, pidx * sizeof(int), ip[pidx]);
1205 			}
1206 		}
1207 #endif /* DIAGNOSTIC */
1208 	}
1209 	if (n + ph->ph_nmissing != pp->pr_itemsperpage) {
1210 		printf("pool(%p:%s): page inconsistency: page %p;"
1211 		    " %d on list, %d missing, %d items per page\n", pp,
1212 		    pp->pr_wchan, ph->ph_page, n, ph->ph_nmissing,
1213 		    pp->pr_itemsperpage);
1214 		return 1;
1215 	}
1216 	if (expected >= 0 && n != expected) {
1217 		printf("pool(%p:%s): page inconsistency: page %p;"
1218 		    " %d on list, %d missing, %d expected\n", pp,
1219 		    pp->pr_wchan, ph->ph_page, n, ph->ph_nmissing,
1220 		    expected);
1221 		return 1;
1222 	}
1223 	return 0;
1224 }
1225 
1226 int
1227 pool_chk(struct pool *pp)
1228 {
1229 	struct pool_item_header *ph;
1230 	int r = 0;
1231 
1232 	TAILQ_FOREACH(ph, &pp->pr_emptypages, ph_pagelist)
1233 		r += pool_chk_page(pp, ph, pp->pr_itemsperpage);
1234 	TAILQ_FOREACH(ph, &pp->pr_fullpages, ph_pagelist)
1235 		r += pool_chk_page(pp, ph, 0);
1236 	TAILQ_FOREACH(ph, &pp->pr_partpages, ph_pagelist)
1237 		r += pool_chk_page(pp, ph, -1);
1238 
1239 	return (r);
1240 }
1241 #endif /* defined(POOL_DEBUG) || defined(DDB) */
1242 
1243 #ifdef DDB
1244 void
1245 pool_walk(struct pool *pp, int full,
1246     int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2))),
1247     void (*func)(void *, int, int (*)(const char *, ...)
1248 	    __attribute__((__format__(__kprintf__,1,2)))))
1249 {
1250 	struct pool_item_header *ph;
1251 	struct pool_item *pi;
1252 	caddr_t cp;
1253 	int n;
1254 
1255 	TAILQ_FOREACH(ph, &pp->pr_fullpages, ph_pagelist) {
1256 		cp = ph->ph_colored;
1257 		n = ph->ph_nmissing;
1258 
1259 		while (n--) {
1260 			func(cp, full, pr);
1261 			cp += pp->pr_size;
1262 		}
1263 	}
1264 
1265 	TAILQ_FOREACH(ph, &pp->pr_partpages, ph_pagelist) {
1266 		cp = ph->ph_colored;
1267 		n = ph->ph_nmissing;
1268 
1269 		do {
1270 			XSIMPLEQ_FOREACH(pi, &ph->ph_itemlist, pi_list) {
1271 				if (cp == (caddr_t)pi)
1272 					break;
1273 			}
1274 			if (cp != (caddr_t)pi) {
1275 				func(cp, full, pr);
1276 				n--;
1277 			}
1278 
1279 			cp += pp->pr_size;
1280 		} while (n > 0);
1281 	}
1282 }
1283 #endif
1284 
1285 /*
1286  * We have three different sysctls.
1287  * kern.pool.npools - the number of pools.
1288  * kern.pool.pool.<pool#> - the pool struct for the pool#.
1289  * kern.pool.name.<pool#> - the name for pool#.
1290  */
1291 int
1292 sysctl_dopool(int *name, u_int namelen, char *oldp, size_t *oldlenp)
1293 {
1294 	struct kinfo_pool pi;
1295 	struct pool *pp;
1296 	int rv = ENOENT;
1297 
1298 	switch (name[0]) {
1299 	case KERN_POOL_NPOOLS:
1300 		if (namelen != 1)
1301 			return (ENOTDIR);
1302 		return (sysctl_rdint(oldp, oldlenp, NULL, pool_count));
1303 
1304 	case KERN_POOL_NAME:
1305 	case KERN_POOL_POOL:
1306 		break;
1307 	default:
1308 		return (EOPNOTSUPP);
1309 	}
1310 
1311 	if (namelen != 2)
1312 		return (ENOTDIR);
1313 
1314 	rw_enter_read(&pool_lock);
1315 
1316 	SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist) {
1317 		if (name[1] == pp->pr_serial)
1318 			break;
1319 	}
1320 
1321 	if (pp == NULL)
1322 		goto done;
1323 
1324 	switch (name[0]) {
1325 	case KERN_POOL_NAME:
1326 		rv = sysctl_rdstring(oldp, oldlenp, NULL, pp->pr_wchan);
1327 		break;
1328 	case KERN_POOL_POOL:
1329 		memset(&pi, 0, sizeof(pi));
1330 
1331 		if (pp->pr_ipl != -1)
1332 			mtx_enter(&pp->pr_mtx);
1333 		pi.pr_size = pp->pr_size;
1334 		pi.pr_pgsize = pp->pr_pgsize;
1335 		pi.pr_itemsperpage = pp->pr_itemsperpage;
1336 		pi.pr_npages = pp->pr_npages;
1337 		pi.pr_minpages = pp->pr_minpages;
1338 		pi.pr_maxpages = pp->pr_maxpages;
1339 		pi.pr_hardlimit = pp->pr_hardlimit;
1340 		pi.pr_nout = pp->pr_nout;
1341 		pi.pr_nitems = pp->pr_nitems;
1342 		pi.pr_nget = pp->pr_nget;
1343 		pi.pr_nput = pp->pr_nput;
1344 		pi.pr_nfail = pp->pr_nfail;
1345 		pi.pr_npagealloc = pp->pr_npagealloc;
1346 		pi.pr_npagefree = pp->pr_npagefree;
1347 		pi.pr_hiwat = pp->pr_hiwat;
1348 		pi.pr_nidle = pp->pr_nidle;
1349 		if (pp->pr_ipl != -1)
1350 			mtx_leave(&pp->pr_mtx);
1351 
1352 		rv = sysctl_rdstruct(oldp, oldlenp, NULL, &pi, sizeof(pi));
1353 		break;
1354 	}
1355 
1356 done:
1357 	rw_exit_read(&pool_lock);
1358 
1359 	return (rv);
1360 }
1361 
1362 void
1363 pool_gc_sched(void *null)
1364 {
1365 	task_add(systqmp, &pool_gc_task);
1366 }
1367 
1368 void
1369 pool_gc_pages(void *null)
1370 {
1371 	extern int ticks;
1372 	struct pool *pp;
1373 	struct pool_item_header *ph, *freeph;
1374 	int s;
1375 
1376 	rw_enter_read(&pool_lock);
1377 	s = splvm(); /* XXX go to splvm until all pools _setipl properly */
1378 	SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist) {
1379 		if (pp->pr_nidle <= pp->pr_minpages || /* guess */
1380 		    !mtx_enter_try(&pp->pr_mtx)) /* try */
1381 			continue;
1382 
1383 		/* is it time to free a page? */
1384 		if (pp->pr_nidle > pp->pr_minpages &&
1385 		    (ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL &&
1386 		    (ticks - ph->ph_tick) > (hz * pool_wait_gc)) {
1387 			freeph = ph;
1388 			pool_p_remove(pp, freeph);
1389 		} else
1390 			freeph = NULL;
1391 
1392 		mtx_leave(&pp->pr_mtx);
1393 
1394 		if (freeph != NULL)
1395 			pool_p_free(pp, freeph);
1396 	}
1397 	splx(s);
1398 	rw_exit_read(&pool_lock);
1399 
1400 	timeout_add_sec(&pool_gc_tick, 1);
1401 }
1402 
1403 /*
1404  * Pool backend allocators.
1405  */
1406 
1407 void *
1408 pool_allocator_alloc(struct pool *pp, int flags, int *slowdown)
1409 {
1410 	void *v;
1411 
1412 	KERNEL_LOCK();
1413 	v = (*pp->pr_alloc->pa_alloc)(pp, flags, slowdown);
1414 	KERNEL_UNLOCK();
1415 
1416 #ifdef DIAGNOSTIC
1417 	if (v != NULL && POOL_INPGHDR(pp)) {
1418 		vaddr_t addr = (vaddr_t)v;
1419 		if ((addr & pp->pr_pgmask) != addr) {
1420 			panic("%s: %s page address %p isnt aligned to %u",
1421 			    __func__, pp->pr_wchan, v, pp->pr_pgsize);
1422 		}
1423 	}
1424 #endif
1425 
1426 	return (v);
1427 }
1428 
1429 void
1430 pool_allocator_free(struct pool *pp, void *v)
1431 {
1432 	struct pool_allocator *pa = pp->pr_alloc;
1433 
1434 	KERNEL_LOCK();
1435 	(*pa->pa_free)(pp, v);
1436 	KERNEL_UNLOCK();
1437 }
1438 
1439 void *
1440 pool_page_alloc(struct pool *pp, int flags, int *slowdown)
1441 {
1442 	struct kmem_dyn_mode kd = KMEM_DYN_INITIALIZER;
1443 
1444 	kd.kd_waitok = ISSET(flags, PR_WAITOK);
1445 	kd.kd_slowdown = slowdown;
1446 
1447 	return (km_alloc(pp->pr_pgsize, &kv_page, pp->pr_crange, &kd));
1448 }
1449 
1450 void
1451 pool_page_free(struct pool *pp, void *v)
1452 {
1453 	km_free(v, pp->pr_pgsize, &kv_page, pp->pr_crange);
1454 }
1455 
1456 void *
1457 pool_large_alloc(struct pool *pp, int flags, int *slowdown)
1458 {
1459 	struct kmem_va_mode kv = kv_intrsafe;
1460 	struct kmem_dyn_mode kd = KMEM_DYN_INITIALIZER;
1461 	void *v;
1462 	int s;
1463 
1464 	if (POOL_INPGHDR(pp))
1465 		kv.kv_align = pp->pr_pgsize;
1466 
1467 	kd.kd_waitok = ISSET(flags, PR_WAITOK);
1468 	kd.kd_slowdown = slowdown;
1469 
1470 	s = splvm();
1471 	v = km_alloc(pp->pr_pgsize, &kv, pp->pr_crange, &kd);
1472 	splx(s);
1473 
1474 	return (v);
1475 }
1476 
1477 void
1478 pool_large_free(struct pool *pp, void *v)
1479 {
1480 	struct kmem_va_mode kv = kv_intrsafe;
1481 	int s;
1482 
1483 	if (POOL_INPGHDR(pp))
1484 		kv.kv_align = pp->pr_pgsize;
1485 
1486 	s = splvm();
1487 	km_free(v, pp->pr_pgsize, &kv, pp->pr_crange);
1488 	splx(s);
1489 }
1490 
1491 void *
1492 pool_large_alloc_ni(struct pool *pp, int flags, int *slowdown)
1493 {
1494 	struct kmem_va_mode kv = kv_any;
1495 	struct kmem_dyn_mode kd = KMEM_DYN_INITIALIZER;
1496 
1497 	if (POOL_INPGHDR(pp))
1498 		kv.kv_align = pp->pr_pgsize;
1499 
1500 	kd.kd_waitok = ISSET(flags, PR_WAITOK);
1501 	kd.kd_slowdown = slowdown;
1502 
1503 	return (km_alloc(pp->pr_pgsize, &kv, pp->pr_crange, &kd));
1504 }
1505 
1506 void
1507 pool_large_free_ni(struct pool *pp, void *v)
1508 {
1509 	struct kmem_va_mode kv = kv_any;
1510 
1511 	if (POOL_INPGHDR(pp))
1512 		kv.kv_align = pp->pr_pgsize;
1513 
1514 	km_free(v, pp->pr_pgsize, &kv, pp->pr_crange);
1515 }
1516