xref: /openbsd-src/sys/kern/subr_pool.c (revision ae3cb403620ab940fbaabb3055fac045a63d56b7)
1 /*	$OpenBSD: subr_pool.c,v 1.220 2017/08/13 20:26:33 guenther Exp $	*/
2 /*	$NetBSD: subr_pool.c,v 1.61 2001/09/26 07:14:56 chs Exp $	*/
3 
4 /*-
5  * Copyright (c) 1997, 1999, 2000 The NetBSD Foundation, Inc.
6  * All rights reserved.
7  *
8  * This code is derived from software contributed to The NetBSD Foundation
9  * by Paul Kranenburg; by Jason R. Thorpe of the Numerical Aerospace
10  * Simulation Facility, NASA Ames Research Center.
11  *
12  * Redistribution and use in source and binary forms, with or without
13  * modification, are permitted provided that the following conditions
14  * are met:
15  * 1. Redistributions of source code must retain the above copyright
16  *    notice, this list of conditions and the following disclaimer.
17  * 2. Redistributions in binary form must reproduce the above copyright
18  *    notice, this list of conditions and the following disclaimer in the
19  *    documentation and/or other materials provided with the distribution.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
23  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
24  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
25  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31  * POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/errno.h>
37 #include <sys/kernel.h>
38 #include <sys/malloc.h>
39 #include <sys/pool.h>
40 #include <sys/syslog.h>
41 #include <sys/sysctl.h>
42 #include <sys/task.h>
43 #include <sys/timeout.h>
44 #include <sys/percpu.h>
45 
46 #include <uvm/uvm_extern.h>
47 
48 /*
49  * Pool resource management utility.
50  *
51  * Memory is allocated in pages which are split into pieces according to
52  * the pool item size. Each page is kept on one of three lists in the
53  * pool structure: `pr_emptypages', `pr_fullpages' and `pr_partpages',
54  * for empty, full and partially-full pages respectively. The individual
55  * pool items are on a linked list headed by `ph_items' in each page
56  * header. The memory for building the page list is either taken from
57  * the allocated pages themselves (for small pool items) or taken from
58  * an internal pool of page headers (`phpool').
59  */
60 
61 /* List of all pools */
62 SIMPLEQ_HEAD(,pool) pool_head = SIMPLEQ_HEAD_INITIALIZER(pool_head);
63 
64 /*
65  * Every pool gets a unique serial number assigned to it. If this counter
66  * wraps, we're screwed, but we shouldn't create so many pools anyway.
67  */
68 unsigned int pool_serial;
69 unsigned int pool_count;
70 
71 /* Lock the previous variables making up the global pool state */
72 struct rwlock pool_lock = RWLOCK_INITIALIZER("pools");
73 
74 /* Private pool for page header structures */
75 struct pool phpool;
76 
77 struct pool_lock_ops {
78 	void	(*pl_init)(struct pool *, union pool_lock *,
79 		    struct lock_type *);
80 	void	(*pl_enter)(union pool_lock * LOCK_FL_VARS);
81 	int	(*pl_enter_try)(union pool_lock * LOCK_FL_VARS);
82 	void	(*pl_leave)(union pool_lock * LOCK_FL_VARS);
83 	void	(*pl_assert_locked)(union pool_lock *);
84 	void	(*pl_assert_unlocked)(union pool_lock *);
85 	int	(*pl_sleep)(void *, union pool_lock *, int, const char *, int);
86 };
87 
88 static const struct pool_lock_ops pool_lock_ops_mtx;
89 static const struct pool_lock_ops pool_lock_ops_rw;
90 
91 #ifdef WITNESS
92 #define pl_init(pp, pl) do {						\
93 	static struct lock_type __lock_type = { .lt_name = #pl };	\
94 	(pp)->pr_lock_ops->pl_init(pp, pl, &__lock_type);		\
95 } while (0)
96 #else /* WITNESS */
97 #define pl_init(pp, pl)		(pp)->pr_lock_ops->pl_init(pp, pl, NULL)
98 #endif /* WITNESS */
99 
100 static inline void
101 pl_enter(struct pool *pp, union pool_lock *pl LOCK_FL_VARS)
102 {
103 	pp->pr_lock_ops->pl_enter(pl LOCK_FL_ARGS);
104 }
105 static inline int
106 pl_enter_try(struct pool *pp, union pool_lock *pl LOCK_FL_VARS)
107 {
108 	return pp->pr_lock_ops->pl_enter_try(pl LOCK_FL_ARGS);
109 }
110 static inline void
111 pl_leave(struct pool *pp, union pool_lock *pl LOCK_FL_VARS)
112 {
113 	pp->pr_lock_ops->pl_leave(pl LOCK_FL_ARGS);
114 }
115 static inline void
116 pl_assert_locked(struct pool *pp, union pool_lock *pl)
117 {
118 	pp->pr_lock_ops->pl_assert_locked(pl);
119 }
120 static inline void
121 pl_assert_unlocked(struct pool *pp, union pool_lock *pl)
122 {
123 	pp->pr_lock_ops->pl_assert_unlocked(pl);
124 }
125 static inline int
126 pl_sleep(struct pool *pp, void *ident, union pool_lock *lock, int priority,
127     const char *wmesg, int timo)
128 {
129 	return pp->pr_lock_ops->pl_sleep(ident, lock, priority, wmesg, timo);
130 }
131 
132 #ifdef WITNESS
133 # define pl_enter(pp,pl)	pl_enter(pp,pl LOCK_FILE_LINE)
134 # define pl_enter_try(pp,pl)	pl_enter_try(pp,pl LOCK_FILE_LINE)
135 # define pl_leave(pp,pl)	pl_leave(pp,pl LOCK_FILE_LINE)
136 #endif
137 
138 struct pool_item {
139 	u_long				pi_magic;
140 	XSIMPLEQ_ENTRY(pool_item)	pi_list;
141 };
142 #define POOL_IMAGIC(ph, pi) ((u_long)(pi) ^ (ph)->ph_magic)
143 
144 struct pool_page_header {
145 	/* Page headers */
146 	TAILQ_ENTRY(pool_page_header)
147 				ph_entry;	/* pool page list */
148 	XSIMPLEQ_HEAD(, pool_item)
149 				ph_items;	/* free items on the page */
150 	RBT_ENTRY(pool_page_header)
151 				ph_node;	/* off-page page headers */
152 	unsigned int		ph_nmissing;	/* # of chunks in use */
153 	caddr_t			ph_page;	/* this page's address */
154 	caddr_t			ph_colored;	/* page's colored address */
155 	unsigned long		ph_magic;
156 	int			ph_tick;
157 };
158 #define POOL_MAGICBIT (1 << 3) /* keep away from perturbed low bits */
159 #define POOL_PHPOISON(ph) ISSET((ph)->ph_magic, POOL_MAGICBIT)
160 
161 #ifdef MULTIPROCESSOR
162 struct pool_cache_item {
163 	struct pool_cache_item	*ci_next;	/* next item in list */
164 	unsigned long		 ci_nitems;	/* number of items in list */
165 	TAILQ_ENTRY(pool_cache_item)
166 				 ci_nextl;	/* entry in list of lists */
167 };
168 
169 /* we store whether the cached item is poisoned in the high bit of nitems */
170 #define POOL_CACHE_ITEM_NITEMS_MASK	0x7ffffffUL
171 #define POOL_CACHE_ITEM_NITEMS_POISON	0x8000000UL
172 
173 #define POOL_CACHE_ITEM_NITEMS(_ci)					\
174     ((_ci)->ci_nitems & POOL_CACHE_ITEM_NITEMS_MASK)
175 
176 #define POOL_CACHE_ITEM_POISONED(_ci)					\
177     ISSET((_ci)->ci_nitems, POOL_CACHE_ITEM_NITEMS_POISON)
178 
179 struct pool_cache {
180 	struct pool_cache_item	*pc_actv;	/* active list of items */
181 	unsigned long		 pc_nactv;	/* actv head nitems cache */
182 	struct pool_cache_item	*pc_prev;	/* previous list of items */
183 
184 	uint64_t		 pc_gen;	/* generation number */
185 	uint64_t		 pc_nget;	/* # of successful requests */
186 	uint64_t		 pc_nfail;	/* # of unsuccessful reqs */
187 	uint64_t		 pc_nput;	/* # of releases */
188 	uint64_t		 pc_nlget;	/* # of list requests */
189 	uint64_t		 pc_nlfail;	/* # of fails getting a list */
190 	uint64_t		 pc_nlput;	/* # of list releases */
191 
192 	int			 pc_nout;
193 };
194 
195 void	*pool_cache_get(struct pool *);
196 void	 pool_cache_put(struct pool *, void *);
197 void	 pool_cache_destroy(struct pool *);
198 void	 pool_cache_gc(struct pool *);
199 #endif
200 void	 pool_cache_pool_info(struct pool *, struct kinfo_pool *);
201 int	 pool_cache_info(struct pool *, void *, size_t *);
202 int	 pool_cache_cpus_info(struct pool *, void *, size_t *);
203 
204 #ifdef POOL_DEBUG
205 int	pool_debug = 1;
206 #else
207 int	pool_debug = 0;
208 #endif
209 
210 #define POOL_INPGHDR(pp) ((pp)->pr_phoffset != 0)
211 
212 struct pool_page_header *
213 	 pool_p_alloc(struct pool *, int, int *);
214 void	 pool_p_insert(struct pool *, struct pool_page_header *);
215 void	 pool_p_remove(struct pool *, struct pool_page_header *);
216 void	 pool_p_free(struct pool *, struct pool_page_header *);
217 
218 void	 pool_update_curpage(struct pool *);
219 void	*pool_do_get(struct pool *, int, int *);
220 void	 pool_do_put(struct pool *, void *);
221 int	 pool_chk_page(struct pool *, struct pool_page_header *, int);
222 int	 pool_chk(struct pool *);
223 void	 pool_get_done(struct pool *, void *, void *);
224 void	 pool_runqueue(struct pool *, int);
225 
226 void	*pool_allocator_alloc(struct pool *, int, int *);
227 void	 pool_allocator_free(struct pool *, void *);
228 
229 /*
230  * The default pool allocator.
231  */
232 void	*pool_page_alloc(struct pool *, int, int *);
233 void	pool_page_free(struct pool *, void *);
234 
235 /*
236  * safe for interrupts; this is the default allocator
237  */
238 struct pool_allocator pool_allocator_single = {
239 	pool_page_alloc,
240 	pool_page_free,
241 	POOL_ALLOC_SIZE(PAGE_SIZE, POOL_ALLOC_ALIGNED)
242 };
243 
244 void	*pool_multi_alloc(struct pool *, int, int *);
245 void	pool_multi_free(struct pool *, void *);
246 
247 struct pool_allocator pool_allocator_multi = {
248 	pool_multi_alloc,
249 	pool_multi_free,
250 	POOL_ALLOC_SIZES(PAGE_SIZE, (1UL << 31), POOL_ALLOC_ALIGNED)
251 };
252 
253 void	*pool_multi_alloc_ni(struct pool *, int, int *);
254 void	pool_multi_free_ni(struct pool *, void *);
255 
256 struct pool_allocator pool_allocator_multi_ni = {
257 	pool_multi_alloc_ni,
258 	pool_multi_free_ni,
259 	POOL_ALLOC_SIZES(PAGE_SIZE, (1UL << 31), POOL_ALLOC_ALIGNED)
260 };
261 
262 #ifdef DDB
263 void	 pool_print_pagelist(struct pool_pagelist *, int (*)(const char *, ...)
264 	     __attribute__((__format__(__kprintf__,1,2))));
265 void	 pool_print1(struct pool *, const char *, int (*)(const char *, ...)
266 	     __attribute__((__format__(__kprintf__,1,2))));
267 #endif
268 
269 /* stale page garbage collectors */
270 void	pool_gc_sched(void *);
271 struct timeout pool_gc_tick = TIMEOUT_INITIALIZER(pool_gc_sched, NULL);
272 void	pool_gc_pages(void *);
273 struct task pool_gc_task = TASK_INITIALIZER(pool_gc_pages, NULL);
274 int pool_wait_free = 1;
275 int pool_wait_gc = 8;
276 
277 RBT_PROTOTYPE(phtree, pool_page_header, ph_node, phtree_compare);
278 
279 static inline int
280 phtree_compare(const struct pool_page_header *a,
281     const struct pool_page_header *b)
282 {
283 	vaddr_t va = (vaddr_t)a->ph_page;
284 	vaddr_t vb = (vaddr_t)b->ph_page;
285 
286 	/* the compares in this order are important for the NFIND to work */
287 	if (vb < va)
288 		return (-1);
289 	if (vb > va)
290 		return (1);
291 
292 	return (0);
293 }
294 
295 RBT_GENERATE(phtree, pool_page_header, ph_node, phtree_compare);
296 
297 /*
298  * Return the pool page header based on page address.
299  */
300 static inline struct pool_page_header *
301 pr_find_pagehead(struct pool *pp, void *v)
302 {
303 	struct pool_page_header *ph, key;
304 
305 	if (POOL_INPGHDR(pp)) {
306 		caddr_t page;
307 
308 		page = (caddr_t)((vaddr_t)v & pp->pr_pgmask);
309 
310 		return ((struct pool_page_header *)(page + pp->pr_phoffset));
311 	}
312 
313 	key.ph_page = v;
314 	ph = RBT_NFIND(phtree, &pp->pr_phtree, &key);
315 	if (ph == NULL)
316 		panic("%s: %s: page header missing", __func__, pp->pr_wchan);
317 
318 	KASSERT(ph->ph_page <= (caddr_t)v);
319 	if (ph->ph_page + pp->pr_pgsize <= (caddr_t)v)
320 		panic("%s: %s: incorrect page", __func__, pp->pr_wchan);
321 
322 	return (ph);
323 }
324 
325 /*
326  * Initialize the given pool resource structure.
327  *
328  * We export this routine to allow other kernel parts to declare
329  * static pools that must be initialized before malloc() is available.
330  */
331 void
332 pool_init(struct pool *pp, size_t size, u_int align, int ipl, int flags,
333     const char *wchan, struct pool_allocator *palloc)
334 {
335 	int off = 0, space;
336 	unsigned int pgsize = PAGE_SIZE, items;
337 	size_t pa_pagesz;
338 #ifdef DIAGNOSTIC
339 	struct pool *iter;
340 #endif
341 
342 	if (align == 0)
343 		align = ALIGN(1);
344 
345 	if (size < sizeof(struct pool_item))
346 		size = sizeof(struct pool_item);
347 
348 	size = roundup(size, align);
349 
350 	while (size * 8 > pgsize)
351 		pgsize <<= 1;
352 
353 	if (palloc == NULL) {
354 		if (pgsize > PAGE_SIZE) {
355 			palloc = ISSET(flags, PR_WAITOK) ?
356 			    &pool_allocator_multi_ni : &pool_allocator_multi;
357 		} else
358 			palloc = &pool_allocator_single;
359 
360 		pa_pagesz = palloc->pa_pagesz;
361 	} else {
362 		size_t pgsizes;
363 
364 		pa_pagesz = palloc->pa_pagesz;
365 		if (pa_pagesz == 0)
366 			pa_pagesz = POOL_ALLOC_DEFAULT;
367 
368 		pgsizes = pa_pagesz & ~POOL_ALLOC_ALIGNED;
369 
370 		/* make sure the allocator can fit at least one item */
371 		if (size > pgsizes) {
372 			panic("%s: pool %s item size 0x%zx > "
373 			    "allocator %p sizes 0x%zx", __func__, wchan,
374 			    size, palloc, pgsizes);
375 		}
376 
377 		/* shrink pgsize until it fits into the range */
378 		while (!ISSET(pgsizes, pgsize))
379 			pgsize >>= 1;
380 	}
381 	KASSERT(ISSET(pa_pagesz, pgsize));
382 
383 	items = pgsize / size;
384 
385 	/*
386 	 * Decide whether to put the page header off page to avoid
387 	 * wasting too large a part of the page. Off-page page headers
388 	 * go into an RB tree, so we can match a returned item with
389 	 * its header based on the page address.
390 	 */
391 	if (ISSET(pa_pagesz, POOL_ALLOC_ALIGNED)) {
392 		if (pgsize - (size * items) >
393 		    sizeof(struct pool_page_header)) {
394 			off = pgsize - sizeof(struct pool_page_header);
395 		} else if (sizeof(struct pool_page_header) * 2 >= size) {
396 			off = pgsize - sizeof(struct pool_page_header);
397 			items = off / size;
398 		}
399 	}
400 
401 	KASSERT(items > 0);
402 
403 	/*
404 	 * Initialize the pool structure.
405 	 */
406 	memset(pp, 0, sizeof(*pp));
407 	if (ISSET(flags, PR_RWLOCK)) {
408 		KASSERT(flags & PR_WAITOK);
409 		pp->pr_lock_ops = &pool_lock_ops_rw;
410 	} else
411 		pp->pr_lock_ops = &pool_lock_ops_mtx;
412 	TAILQ_INIT(&pp->pr_emptypages);
413 	TAILQ_INIT(&pp->pr_fullpages);
414 	TAILQ_INIT(&pp->pr_partpages);
415 	pp->pr_curpage = NULL;
416 	pp->pr_npages = 0;
417 	pp->pr_minitems = 0;
418 	pp->pr_minpages = 0;
419 	pp->pr_maxpages = 8;
420 	pp->pr_size = size;
421 	pp->pr_pgsize = pgsize;
422 	pp->pr_pgmask = ~0UL ^ (pgsize - 1);
423 	pp->pr_phoffset = off;
424 	pp->pr_itemsperpage = items;
425 	pp->pr_wchan = wchan;
426 	pp->pr_alloc = palloc;
427 	pp->pr_nitems = 0;
428 	pp->pr_nout = 0;
429 	pp->pr_hardlimit = UINT_MAX;
430 	pp->pr_hardlimit_warning = NULL;
431 	pp->pr_hardlimit_ratecap.tv_sec = 0;
432 	pp->pr_hardlimit_ratecap.tv_usec = 0;
433 	pp->pr_hardlimit_warning_last.tv_sec = 0;
434 	pp->pr_hardlimit_warning_last.tv_usec = 0;
435 	RBT_INIT(phtree, &pp->pr_phtree);
436 
437 	/*
438 	 * Use the space between the chunks and the page header
439 	 * for cache coloring.
440 	 */
441 	space = POOL_INPGHDR(pp) ? pp->pr_phoffset : pp->pr_pgsize;
442 	space -= pp->pr_itemsperpage * pp->pr_size;
443 	pp->pr_align = align;
444 	pp->pr_maxcolors = (space / align) + 1;
445 
446 	pp->pr_nget = 0;
447 	pp->pr_nfail = 0;
448 	pp->pr_nput = 0;
449 	pp->pr_npagealloc = 0;
450 	pp->pr_npagefree = 0;
451 	pp->pr_hiwat = 0;
452 	pp->pr_nidle = 0;
453 
454 	pp->pr_ipl = ipl;
455 	pp->pr_flags = flags;
456 
457 	pl_init(pp, &pp->pr_lock);
458 	pl_init(pp, &pp->pr_requests_lock);
459 	TAILQ_INIT(&pp->pr_requests);
460 
461 	if (phpool.pr_size == 0) {
462 		pool_init(&phpool, sizeof(struct pool_page_header), 0,
463 		    IPL_HIGH, 0, "phpool", NULL);
464 
465 		/* make sure phpool wont "recurse" */
466 		KASSERT(POOL_INPGHDR(&phpool));
467 	}
468 
469 	/* pglistalloc/constraint parameters */
470 	pp->pr_crange = &kp_dirty;
471 
472 	/* Insert this into the list of all pools. */
473 	rw_enter_write(&pool_lock);
474 #ifdef DIAGNOSTIC
475 	SIMPLEQ_FOREACH(iter, &pool_head, pr_poollist) {
476 		if (iter == pp)
477 			panic("%s: pool %s already on list", __func__, wchan);
478 	}
479 #endif
480 
481 	pp->pr_serial = ++pool_serial;
482 	if (pool_serial == 0)
483 		panic("%s: too much uptime", __func__);
484 
485 	SIMPLEQ_INSERT_HEAD(&pool_head, pp, pr_poollist);
486 	pool_count++;
487 	rw_exit_write(&pool_lock);
488 }
489 
490 /*
491  * Decommission a pool resource.
492  */
493 void
494 pool_destroy(struct pool *pp)
495 {
496 	struct pool_page_header *ph;
497 	struct pool *prev, *iter;
498 
499 #ifdef MULTIPROCESSOR
500 	if (pp->pr_cache != NULL)
501 		pool_cache_destroy(pp);
502 #endif
503 
504 #ifdef DIAGNOSTIC
505 	if (pp->pr_nout != 0)
506 		panic("%s: pool busy: still out: %u", __func__, pp->pr_nout);
507 #endif
508 
509 	/* Remove from global pool list */
510 	rw_enter_write(&pool_lock);
511 	pool_count--;
512 	if (pp == SIMPLEQ_FIRST(&pool_head))
513 		SIMPLEQ_REMOVE_HEAD(&pool_head, pr_poollist);
514 	else {
515 		prev = SIMPLEQ_FIRST(&pool_head);
516 		SIMPLEQ_FOREACH(iter, &pool_head, pr_poollist) {
517 			if (iter == pp) {
518 				SIMPLEQ_REMOVE_AFTER(&pool_head, prev,
519 				    pr_poollist);
520 				break;
521 			}
522 			prev = iter;
523 		}
524 	}
525 	rw_exit_write(&pool_lock);
526 
527 	/* Remove all pages */
528 	while ((ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL) {
529 		pl_enter(pp, &pp->pr_lock);
530 		pool_p_remove(pp, ph);
531 		pl_leave(pp, &pp->pr_lock);
532 		pool_p_free(pp, ph);
533 	}
534 	KASSERT(TAILQ_EMPTY(&pp->pr_fullpages));
535 	KASSERT(TAILQ_EMPTY(&pp->pr_partpages));
536 }
537 
538 void
539 pool_request_init(struct pool_request *pr,
540     void (*handler)(struct pool *, void *, void *), void *cookie)
541 {
542 	pr->pr_handler = handler;
543 	pr->pr_cookie = cookie;
544 	pr->pr_item = NULL;
545 }
546 
547 void
548 pool_request(struct pool *pp, struct pool_request *pr)
549 {
550 	pl_enter(pp, &pp->pr_requests_lock);
551 	TAILQ_INSERT_TAIL(&pp->pr_requests, pr, pr_entry);
552 	pool_runqueue(pp, PR_NOWAIT);
553 	pl_leave(pp, &pp->pr_requests_lock);
554 }
555 
556 struct pool_get_memory {
557 	union pool_lock lock;
558 	void * volatile v;
559 };
560 
561 /*
562  * Grab an item from the pool.
563  */
564 void *
565 pool_get(struct pool *pp, int flags)
566 {
567 	void *v = NULL;
568 	int slowdown = 0;
569 
570 	KASSERT(flags & (PR_WAITOK | PR_NOWAIT));
571 	if (pp->pr_flags & PR_RWLOCK)
572 		KASSERT(flags & PR_WAITOK);
573 
574 #ifdef MULTIPROCESSOR
575 	if (pp->pr_cache != NULL) {
576 		v = pool_cache_get(pp);
577 		if (v != NULL)
578 			goto good;
579 	}
580 #endif
581 
582 	pl_enter(pp, &pp->pr_lock);
583 	if (pp->pr_nout >= pp->pr_hardlimit) {
584 		if (ISSET(flags, PR_NOWAIT|PR_LIMITFAIL))
585 			goto fail;
586 	} else if ((v = pool_do_get(pp, flags, &slowdown)) == NULL) {
587 		if (ISSET(flags, PR_NOWAIT))
588 			goto fail;
589 	}
590 	pl_leave(pp, &pp->pr_lock);
591 
592 	if ((slowdown || pool_debug == 2) && ISSET(flags, PR_WAITOK))
593 		yield();
594 
595 	if (v == NULL) {
596 		struct pool_get_memory mem = { .v = NULL };
597 		struct pool_request pr;
598 
599 		pl_init(pp, &mem.lock);
600 		pool_request_init(&pr, pool_get_done, &mem);
601 		pool_request(pp, &pr);
602 
603 		pl_enter(pp, &mem.lock);
604 		while (mem.v == NULL)
605 			pl_sleep(pp, &mem, &mem.lock, PSWP, pp->pr_wchan, 0);
606 		pl_leave(pp, &mem.lock);
607 
608 		v = mem.v;
609 	}
610 
611 #ifdef MULTIPROCESSOR
612 good:
613 #endif
614 	if (ISSET(flags, PR_ZERO))
615 		memset(v, 0, pp->pr_size);
616 
617 	return (v);
618 
619 fail:
620 	pp->pr_nfail++;
621 	pl_leave(pp, &pp->pr_lock);
622 	return (NULL);
623 }
624 
625 void
626 pool_get_done(struct pool *pp, void *xmem, void *v)
627 {
628 	struct pool_get_memory *mem = xmem;
629 
630 	pl_enter(pp, &mem->lock);
631 	mem->v = v;
632 	pl_leave(pp, &mem->lock);
633 
634 	wakeup_one(mem);
635 }
636 
637 void
638 pool_runqueue(struct pool *pp, int flags)
639 {
640 	struct pool_requests prl = TAILQ_HEAD_INITIALIZER(prl);
641 	struct pool_request *pr;
642 
643 	pl_assert_unlocked(pp, &pp->pr_lock);
644 	pl_assert_locked(pp, &pp->pr_requests_lock);
645 
646 	if (pp->pr_requesting++)
647 		return;
648 
649 	do {
650 		pp->pr_requesting = 1;
651 
652 		/* no TAILQ_JOIN? :( */
653 		while ((pr = TAILQ_FIRST(&pp->pr_requests)) != NULL) {
654 			TAILQ_REMOVE(&pp->pr_requests, pr, pr_entry);
655 			TAILQ_INSERT_TAIL(&prl, pr, pr_entry);
656 		}
657 		if (TAILQ_EMPTY(&prl))
658 			continue;
659 
660 		pl_leave(pp, &pp->pr_requests_lock);
661 
662 		pl_enter(pp, &pp->pr_lock);
663 		pr = TAILQ_FIRST(&prl);
664 		while (pr != NULL) {
665 			int slowdown = 0;
666 
667 			if (pp->pr_nout >= pp->pr_hardlimit)
668 				break;
669 
670 			pr->pr_item = pool_do_get(pp, flags, &slowdown);
671 			if (pr->pr_item == NULL) /* || slowdown ? */
672 				break;
673 
674 			pr = TAILQ_NEXT(pr, pr_entry);
675 		}
676 		pl_leave(pp, &pp->pr_lock);
677 
678 		while ((pr = TAILQ_FIRST(&prl)) != NULL &&
679 		    pr->pr_item != NULL) {
680 			TAILQ_REMOVE(&prl, pr, pr_entry);
681 			(*pr->pr_handler)(pp, pr->pr_cookie, pr->pr_item);
682 		}
683 
684 		pl_enter(pp, &pp->pr_requests_lock);
685 	} while (--pp->pr_requesting);
686 
687 	/* no TAILQ_JOIN :( */
688 	while ((pr = TAILQ_FIRST(&prl)) != NULL) {
689 		TAILQ_REMOVE(&prl, pr, pr_entry);
690 		TAILQ_INSERT_TAIL(&pp->pr_requests, pr, pr_entry);
691 	}
692 }
693 
694 void *
695 pool_do_get(struct pool *pp, int flags, int *slowdown)
696 {
697 	struct pool_item *pi;
698 	struct pool_page_header *ph;
699 
700 	pl_assert_locked(pp, &pp->pr_lock);
701 
702 	splassert(pp->pr_ipl);
703 
704 	/*
705 	 * Account for this item now to avoid races if we need to give up
706 	 * pr_lock to allocate a page.
707 	 */
708 	pp->pr_nout++;
709 
710 	if (pp->pr_curpage == NULL) {
711 		pl_leave(pp, &pp->pr_lock);
712 		ph = pool_p_alloc(pp, flags, slowdown);
713 		pl_enter(pp, &pp->pr_lock);
714 
715 		if (ph == NULL) {
716 			pp->pr_nout--;
717 			return (NULL);
718 		}
719 
720 		pool_p_insert(pp, ph);
721 	}
722 
723 	ph = pp->pr_curpage;
724 	pi = XSIMPLEQ_FIRST(&ph->ph_items);
725 	if (__predict_false(pi == NULL))
726 		panic("%s: %s: page empty", __func__, pp->pr_wchan);
727 
728 	if (__predict_false(pi->pi_magic != POOL_IMAGIC(ph, pi))) {
729 		panic("%s: %s free list modified: "
730 		    "page %p; item addr %p; offset 0x%x=0x%lx != 0x%lx",
731 		    __func__, pp->pr_wchan, ph->ph_page, pi,
732 		    0, pi->pi_magic, POOL_IMAGIC(ph, pi));
733 	}
734 
735 	XSIMPLEQ_REMOVE_HEAD(&ph->ph_items, pi_list);
736 
737 #ifdef DIAGNOSTIC
738 	if (pool_debug && POOL_PHPOISON(ph)) {
739 		size_t pidx;
740 		uint32_t pval;
741 		if (poison_check(pi + 1, pp->pr_size - sizeof(*pi),
742 		    &pidx, &pval)) {
743 			int *ip = (int *)(pi + 1);
744 			panic("%s: %s free list modified: "
745 			    "page %p; item addr %p; offset 0x%zx=0x%x",
746 			    __func__, pp->pr_wchan, ph->ph_page, pi,
747 			    (pidx * sizeof(int)) + sizeof(*pi), ip[pidx]);
748 		}
749 	}
750 #endif /* DIAGNOSTIC */
751 
752 	if (ph->ph_nmissing++ == 0) {
753 		/*
754 		 * This page was previously empty.  Move it to the list of
755 		 * partially-full pages.  This page is already curpage.
756 		 */
757 		TAILQ_REMOVE(&pp->pr_emptypages, ph, ph_entry);
758 		TAILQ_INSERT_TAIL(&pp->pr_partpages, ph, ph_entry);
759 
760 		pp->pr_nidle--;
761 	}
762 
763 	if (ph->ph_nmissing == pp->pr_itemsperpage) {
764 		/*
765 		 * This page is now full.  Move it to the full list
766 		 * and select a new current page.
767 		 */
768 		TAILQ_REMOVE(&pp->pr_partpages, ph, ph_entry);
769 		TAILQ_INSERT_TAIL(&pp->pr_fullpages, ph, ph_entry);
770 		pool_update_curpage(pp);
771 	}
772 
773 	pp->pr_nget++;
774 
775 	return (pi);
776 }
777 
778 /*
779  * Return resource to the pool.
780  */
781 void
782 pool_put(struct pool *pp, void *v)
783 {
784 	struct pool_page_header *ph, *freeph = NULL;
785 
786 #ifdef DIAGNOSTIC
787 	if (v == NULL)
788 		panic("%s: NULL item", __func__);
789 #endif
790 
791 #ifdef MULTIPROCESSOR
792 	if (pp->pr_cache != NULL && TAILQ_EMPTY(&pp->pr_requests)) {
793 		pool_cache_put(pp, v);
794 		return;
795 	}
796 #endif
797 
798 	pl_enter(pp, &pp->pr_lock);
799 
800 	pool_do_put(pp, v);
801 
802 	pp->pr_nout--;
803 	pp->pr_nput++;
804 
805 	/* is it time to free a page? */
806 	if (pp->pr_nidle > pp->pr_maxpages &&
807 	    (ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL &&
808 	    (ticks - ph->ph_tick) > (hz * pool_wait_free)) {
809 		freeph = ph;
810 		pool_p_remove(pp, freeph);
811 	}
812 
813 	pl_leave(pp, &pp->pr_lock);
814 
815 	if (freeph != NULL)
816 		pool_p_free(pp, freeph);
817 
818 	if (!TAILQ_EMPTY(&pp->pr_requests)) {
819 		pl_enter(pp, &pp->pr_requests_lock);
820 		pool_runqueue(pp, PR_NOWAIT);
821 		pl_leave(pp, &pp->pr_requests_lock);
822 	}
823 }
824 
825 void
826 pool_do_put(struct pool *pp, void *v)
827 {
828 	struct pool_item *pi = v;
829 	struct pool_page_header *ph;
830 
831 	splassert(pp->pr_ipl);
832 
833 	ph = pr_find_pagehead(pp, v);
834 
835 #ifdef DIAGNOSTIC
836 	if (pool_debug) {
837 		struct pool_item *qi;
838 		XSIMPLEQ_FOREACH(qi, &ph->ph_items, pi_list) {
839 			if (pi == qi) {
840 				panic("%s: %s: double pool_put: %p", __func__,
841 				    pp->pr_wchan, pi);
842 			}
843 		}
844 	}
845 #endif /* DIAGNOSTIC */
846 
847 	pi->pi_magic = POOL_IMAGIC(ph, pi);
848 	XSIMPLEQ_INSERT_HEAD(&ph->ph_items, pi, pi_list);
849 #ifdef DIAGNOSTIC
850 	if (POOL_PHPOISON(ph))
851 		poison_mem(pi + 1, pp->pr_size - sizeof(*pi));
852 #endif /* DIAGNOSTIC */
853 
854 	if (ph->ph_nmissing-- == pp->pr_itemsperpage) {
855 		/*
856 		 * The page was previously completely full, move it to the
857 		 * partially-full list.
858 		 */
859 		TAILQ_REMOVE(&pp->pr_fullpages, ph, ph_entry);
860 		TAILQ_INSERT_TAIL(&pp->pr_partpages, ph, ph_entry);
861 	}
862 
863 	if (ph->ph_nmissing == 0) {
864 		/*
865 		 * The page is now empty, so move it to the empty page list.
866 		 */
867 		pp->pr_nidle++;
868 
869 		ph->ph_tick = ticks;
870 		TAILQ_REMOVE(&pp->pr_partpages, ph, ph_entry);
871 		TAILQ_INSERT_TAIL(&pp->pr_emptypages, ph, ph_entry);
872 		pool_update_curpage(pp);
873 	}
874 }
875 
876 /*
877  * Add N items to the pool.
878  */
879 int
880 pool_prime(struct pool *pp, int n)
881 {
882 	struct pool_pagelist pl = TAILQ_HEAD_INITIALIZER(pl);
883 	struct pool_page_header *ph;
884 	int newpages;
885 
886 	newpages = roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage;
887 
888 	while (newpages-- > 0) {
889 		int slowdown = 0;
890 
891 		ph = pool_p_alloc(pp, PR_NOWAIT, &slowdown);
892 		if (ph == NULL) /* or slowdown? */
893 			break;
894 
895 		TAILQ_INSERT_TAIL(&pl, ph, ph_entry);
896 	}
897 
898 	pl_enter(pp, &pp->pr_lock);
899 	while ((ph = TAILQ_FIRST(&pl)) != NULL) {
900 		TAILQ_REMOVE(&pl, ph, ph_entry);
901 		pool_p_insert(pp, ph);
902 	}
903 	pl_leave(pp, &pp->pr_lock);
904 
905 	return (0);
906 }
907 
908 struct pool_page_header *
909 pool_p_alloc(struct pool *pp, int flags, int *slowdown)
910 {
911 	struct pool_page_header *ph;
912 	struct pool_item *pi;
913 	caddr_t addr;
914 	int n;
915 
916 	pl_assert_unlocked(pp, &pp->pr_lock);
917 	KASSERT(pp->pr_size >= sizeof(*pi));
918 
919 	addr = pool_allocator_alloc(pp, flags, slowdown);
920 	if (addr == NULL)
921 		return (NULL);
922 
923 	if (POOL_INPGHDR(pp))
924 		ph = (struct pool_page_header *)(addr + pp->pr_phoffset);
925 	else {
926 		ph = pool_get(&phpool, flags);
927 		if (ph == NULL) {
928 			pool_allocator_free(pp, addr);
929 			return (NULL);
930 		}
931 	}
932 
933 	XSIMPLEQ_INIT(&ph->ph_items);
934 	ph->ph_page = addr;
935 	addr += pp->pr_align * (pp->pr_npagealloc % pp->pr_maxcolors);
936 	ph->ph_colored = addr;
937 	ph->ph_nmissing = 0;
938 	arc4random_buf(&ph->ph_magic, sizeof(ph->ph_magic));
939 #ifdef DIAGNOSTIC
940 	/* use a bit in ph_magic to record if we poison page items */
941 	if (pool_debug)
942 		SET(ph->ph_magic, POOL_MAGICBIT);
943 	else
944 		CLR(ph->ph_magic, POOL_MAGICBIT);
945 #endif /* DIAGNOSTIC */
946 
947 	n = pp->pr_itemsperpage;
948 	while (n--) {
949 		pi = (struct pool_item *)addr;
950 		pi->pi_magic = POOL_IMAGIC(ph, pi);
951 		XSIMPLEQ_INSERT_TAIL(&ph->ph_items, pi, pi_list);
952 
953 #ifdef DIAGNOSTIC
954 		if (POOL_PHPOISON(ph))
955 			poison_mem(pi + 1, pp->pr_size - sizeof(*pi));
956 #endif /* DIAGNOSTIC */
957 
958 		addr += pp->pr_size;
959 	}
960 
961 	return (ph);
962 }
963 
964 void
965 pool_p_free(struct pool *pp, struct pool_page_header *ph)
966 {
967 	struct pool_item *pi;
968 
969 	pl_assert_unlocked(pp, &pp->pr_lock);
970 	KASSERT(ph->ph_nmissing == 0);
971 
972 	XSIMPLEQ_FOREACH(pi, &ph->ph_items, pi_list) {
973 		if (__predict_false(pi->pi_magic != POOL_IMAGIC(ph, pi))) {
974 			panic("%s: %s free list modified: "
975 			    "page %p; item addr %p; offset 0x%x=0x%lx",
976 			    __func__, pp->pr_wchan, ph->ph_page, pi,
977 			    0, pi->pi_magic);
978 		}
979 
980 #ifdef DIAGNOSTIC
981 		if (POOL_PHPOISON(ph)) {
982 			size_t pidx;
983 			uint32_t pval;
984 			if (poison_check(pi + 1, pp->pr_size - sizeof(*pi),
985 			    &pidx, &pval)) {
986 				int *ip = (int *)(pi + 1);
987 				panic("%s: %s free list modified: "
988 				    "page %p; item addr %p; offset 0x%zx=0x%x",
989 				    __func__, pp->pr_wchan, ph->ph_page, pi,
990 				    pidx * sizeof(int), ip[pidx]);
991 			}
992 		}
993 #endif
994 	}
995 
996 	pool_allocator_free(pp, ph->ph_page);
997 
998 	if (!POOL_INPGHDR(pp))
999 		pool_put(&phpool, ph);
1000 }
1001 
1002 void
1003 pool_p_insert(struct pool *pp, struct pool_page_header *ph)
1004 {
1005 	pl_assert_locked(pp, &pp->pr_lock);
1006 
1007 	/* If the pool was depleted, point at the new page */
1008 	if (pp->pr_curpage == NULL)
1009 		pp->pr_curpage = ph;
1010 
1011 	TAILQ_INSERT_TAIL(&pp->pr_emptypages, ph, ph_entry);
1012 	if (!POOL_INPGHDR(pp))
1013 		RBT_INSERT(phtree, &pp->pr_phtree, ph);
1014 
1015 	pp->pr_nitems += pp->pr_itemsperpage;
1016 	pp->pr_nidle++;
1017 
1018 	pp->pr_npagealloc++;
1019 	if (++pp->pr_npages > pp->pr_hiwat)
1020 		pp->pr_hiwat = pp->pr_npages;
1021 }
1022 
1023 void
1024 pool_p_remove(struct pool *pp, struct pool_page_header *ph)
1025 {
1026 	pl_assert_locked(pp, &pp->pr_lock);
1027 
1028 	pp->pr_npagefree++;
1029 	pp->pr_npages--;
1030 	pp->pr_nidle--;
1031 	pp->pr_nitems -= pp->pr_itemsperpage;
1032 
1033 	if (!POOL_INPGHDR(pp))
1034 		RBT_REMOVE(phtree, &pp->pr_phtree, ph);
1035 	TAILQ_REMOVE(&pp->pr_emptypages, ph, ph_entry);
1036 
1037 	pool_update_curpage(pp);
1038 }
1039 
1040 void
1041 pool_update_curpage(struct pool *pp)
1042 {
1043 	pp->pr_curpage = TAILQ_LAST(&pp->pr_partpages, pool_pagelist);
1044 	if (pp->pr_curpage == NULL) {
1045 		pp->pr_curpage = TAILQ_LAST(&pp->pr_emptypages, pool_pagelist);
1046 	}
1047 }
1048 
1049 void
1050 pool_setlowat(struct pool *pp, int n)
1051 {
1052 	int prime = 0;
1053 
1054 	pl_enter(pp, &pp->pr_lock);
1055 	pp->pr_minitems = n;
1056 	pp->pr_minpages = (n == 0)
1057 		? 0
1058 		: roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage;
1059 
1060 	if (pp->pr_nitems < n)
1061 		prime = n - pp->pr_nitems;
1062 	pl_leave(pp, &pp->pr_lock);
1063 
1064 	if (prime > 0)
1065 		pool_prime(pp, prime);
1066 }
1067 
1068 void
1069 pool_sethiwat(struct pool *pp, int n)
1070 {
1071 	pp->pr_maxpages = (n == 0)
1072 		? 0
1073 		: roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage;
1074 }
1075 
1076 int
1077 pool_sethardlimit(struct pool *pp, u_int n, const char *warnmsg, int ratecap)
1078 {
1079 	int error = 0;
1080 
1081 	if (n < pp->pr_nout) {
1082 		error = EINVAL;
1083 		goto done;
1084 	}
1085 
1086 	pp->pr_hardlimit = n;
1087 	pp->pr_hardlimit_warning = warnmsg;
1088 	pp->pr_hardlimit_ratecap.tv_sec = ratecap;
1089 	pp->pr_hardlimit_warning_last.tv_sec = 0;
1090 	pp->pr_hardlimit_warning_last.tv_usec = 0;
1091 
1092 done:
1093 	return (error);
1094 }
1095 
1096 void
1097 pool_set_constraints(struct pool *pp, const struct kmem_pa_mode *mode)
1098 {
1099 	pp->pr_crange = mode;
1100 }
1101 
1102 /*
1103  * Release all complete pages that have not been used recently.
1104  *
1105  * Returns non-zero if any pages have been reclaimed.
1106  */
1107 int
1108 pool_reclaim(struct pool *pp)
1109 {
1110 	struct pool_page_header *ph, *phnext;
1111 	struct pool_pagelist pl = TAILQ_HEAD_INITIALIZER(pl);
1112 
1113 	pl_enter(pp, &pp->pr_lock);
1114 	for (ph = TAILQ_FIRST(&pp->pr_emptypages); ph != NULL; ph = phnext) {
1115 		phnext = TAILQ_NEXT(ph, ph_entry);
1116 
1117 		/* Check our minimum page claim */
1118 		if (pp->pr_npages <= pp->pr_minpages)
1119 			break;
1120 
1121 		/*
1122 		 * If freeing this page would put us below
1123 		 * the low water mark, stop now.
1124 		 */
1125 		if ((pp->pr_nitems - pp->pr_itemsperpage) <
1126 		    pp->pr_minitems)
1127 			break;
1128 
1129 		pool_p_remove(pp, ph);
1130 		TAILQ_INSERT_TAIL(&pl, ph, ph_entry);
1131 	}
1132 	pl_leave(pp, &pp->pr_lock);
1133 
1134 	if (TAILQ_EMPTY(&pl))
1135 		return (0);
1136 
1137 	while ((ph = TAILQ_FIRST(&pl)) != NULL) {
1138 		TAILQ_REMOVE(&pl, ph, ph_entry);
1139 		pool_p_free(pp, ph);
1140 	}
1141 
1142 	return (1);
1143 }
1144 
1145 /*
1146  * Release all complete pages that have not been used recently
1147  * from all pools.
1148  */
1149 void
1150 pool_reclaim_all(void)
1151 {
1152 	struct pool	*pp;
1153 
1154 	rw_enter_read(&pool_lock);
1155 	SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist)
1156 		pool_reclaim(pp);
1157 	rw_exit_read(&pool_lock);
1158 }
1159 
1160 #ifdef DDB
1161 #include <machine/db_machdep.h>
1162 #include <ddb/db_output.h>
1163 
1164 /*
1165  * Diagnostic helpers.
1166  */
1167 void
1168 pool_printit(struct pool *pp, const char *modif,
1169     int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2))))
1170 {
1171 	pool_print1(pp, modif, pr);
1172 }
1173 
1174 void
1175 pool_print_pagelist(struct pool_pagelist *pl,
1176     int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2))))
1177 {
1178 	struct pool_page_header *ph;
1179 	struct pool_item *pi;
1180 
1181 	TAILQ_FOREACH(ph, pl, ph_entry) {
1182 		(*pr)("\t\tpage %p, color %p, nmissing %d\n",
1183 		    ph->ph_page, ph->ph_colored, ph->ph_nmissing);
1184 		XSIMPLEQ_FOREACH(pi, &ph->ph_items, pi_list) {
1185 			if (pi->pi_magic != POOL_IMAGIC(ph, pi)) {
1186 				(*pr)("\t\t\titem %p, magic 0x%lx\n",
1187 				    pi, pi->pi_magic);
1188 			}
1189 		}
1190 	}
1191 }
1192 
1193 void
1194 pool_print1(struct pool *pp, const char *modif,
1195     int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2))))
1196 {
1197 	struct pool_page_header *ph;
1198 	int print_pagelist = 0;
1199 	char c;
1200 
1201 	while ((c = *modif++) != '\0') {
1202 		if (c == 'p')
1203 			print_pagelist = 1;
1204 		modif++;
1205 	}
1206 
1207 	(*pr)("POOL %s: size %u maxcolors %u\n", pp->pr_wchan, pp->pr_size,
1208 	    pp->pr_maxcolors);
1209 	(*pr)("\talloc %p\n", pp->pr_alloc);
1210 	(*pr)("\tminitems %u, minpages %u, maxpages %u, npages %u\n",
1211 	    pp->pr_minitems, pp->pr_minpages, pp->pr_maxpages, pp->pr_npages);
1212 	(*pr)("\titemsperpage %u, nitems %u, nout %u, hardlimit %u\n",
1213 	    pp->pr_itemsperpage, pp->pr_nitems, pp->pr_nout, pp->pr_hardlimit);
1214 
1215 	(*pr)("\n\tnget %lu, nfail %lu, nput %lu\n",
1216 	    pp->pr_nget, pp->pr_nfail, pp->pr_nput);
1217 	(*pr)("\tnpagealloc %lu, npagefree %lu, hiwat %u, nidle %lu\n",
1218 	    pp->pr_npagealloc, pp->pr_npagefree, pp->pr_hiwat, pp->pr_nidle);
1219 
1220 	if (print_pagelist == 0)
1221 		return;
1222 
1223 	if ((ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL)
1224 		(*pr)("\n\tempty page list:\n");
1225 	pool_print_pagelist(&pp->pr_emptypages, pr);
1226 	if ((ph = TAILQ_FIRST(&pp->pr_fullpages)) != NULL)
1227 		(*pr)("\n\tfull page list:\n");
1228 	pool_print_pagelist(&pp->pr_fullpages, pr);
1229 	if ((ph = TAILQ_FIRST(&pp->pr_partpages)) != NULL)
1230 		(*pr)("\n\tpartial-page list:\n");
1231 	pool_print_pagelist(&pp->pr_partpages, pr);
1232 
1233 	if (pp->pr_curpage == NULL)
1234 		(*pr)("\tno current page\n");
1235 	else
1236 		(*pr)("\tcurpage %p\n", pp->pr_curpage->ph_page);
1237 }
1238 
1239 void
1240 db_show_all_pools(db_expr_t expr, int haddr, db_expr_t count, char *modif)
1241 {
1242 	struct pool *pp;
1243 	char maxp[16];
1244 	int ovflw;
1245 	char mode;
1246 
1247 	mode = modif[0];
1248 	if (mode != '\0' && mode != 'a') {
1249 		db_printf("usage: show all pools [/a]\n");
1250 		return;
1251 	}
1252 
1253 	if (mode == '\0')
1254 		db_printf("%-10s%4s%9s%5s%9s%6s%6s%6s%6s%6s%6s%5s\n",
1255 		    "Name",
1256 		    "Size",
1257 		    "Requests",
1258 		    "Fail",
1259 		    "Releases",
1260 		    "Pgreq",
1261 		    "Pgrel",
1262 		    "Npage",
1263 		    "Hiwat",
1264 		    "Minpg",
1265 		    "Maxpg",
1266 		    "Idle");
1267 	else
1268 		db_printf("%-12s %18s %18s\n",
1269 		    "Name", "Address", "Allocator");
1270 
1271 	SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist) {
1272 		if (mode == 'a') {
1273 			db_printf("%-12s %18p %18p\n", pp->pr_wchan, pp,
1274 			    pp->pr_alloc);
1275 			continue;
1276 		}
1277 
1278 		if (!pp->pr_nget)
1279 			continue;
1280 
1281 		if (pp->pr_maxpages == UINT_MAX)
1282 			snprintf(maxp, sizeof maxp, "inf");
1283 		else
1284 			snprintf(maxp, sizeof maxp, "%u", pp->pr_maxpages);
1285 
1286 #define PRWORD(ovflw, fmt, width, fixed, val) do {	\
1287 	(ovflw) += db_printf((fmt),			\
1288 	    (width) - (fixed) - (ovflw) > 0 ?		\
1289 	    (width) - (fixed) - (ovflw) : 0,		\
1290 	    (val)) - (width);				\
1291 	if ((ovflw) < 0)				\
1292 		(ovflw) = 0;				\
1293 } while (/* CONSTCOND */0)
1294 
1295 		ovflw = 0;
1296 		PRWORD(ovflw, "%-*s", 10, 0, pp->pr_wchan);
1297 		PRWORD(ovflw, " %*u", 4, 1, pp->pr_size);
1298 		PRWORD(ovflw, " %*lu", 9, 1, pp->pr_nget);
1299 		PRWORD(ovflw, " %*lu", 5, 1, pp->pr_nfail);
1300 		PRWORD(ovflw, " %*lu", 9, 1, pp->pr_nput);
1301 		PRWORD(ovflw, " %*lu", 6, 1, pp->pr_npagealloc);
1302 		PRWORD(ovflw, " %*lu", 6, 1, pp->pr_npagefree);
1303 		PRWORD(ovflw, " %*d", 6, 1, pp->pr_npages);
1304 		PRWORD(ovflw, " %*d", 6, 1, pp->pr_hiwat);
1305 		PRWORD(ovflw, " %*d", 6, 1, pp->pr_minpages);
1306 		PRWORD(ovflw, " %*s", 6, 1, maxp);
1307 		PRWORD(ovflw, " %*lu\n", 5, 1, pp->pr_nidle);
1308 
1309 		pool_chk(pp);
1310 	}
1311 }
1312 #endif /* DDB */
1313 
1314 #if defined(POOL_DEBUG) || defined(DDB)
1315 int
1316 pool_chk_page(struct pool *pp, struct pool_page_header *ph, int expected)
1317 {
1318 	struct pool_item *pi;
1319 	caddr_t page;
1320 	int n;
1321 	const char *label = pp->pr_wchan;
1322 
1323 	page = (caddr_t)((u_long)ph & pp->pr_pgmask);
1324 	if (page != ph->ph_page && POOL_INPGHDR(pp)) {
1325 		printf("%s: ", label);
1326 		printf("pool(%p:%s): page inconsistency: page %p; "
1327 		    "at page head addr %p (p %p)\n",
1328 		    pp, pp->pr_wchan, ph->ph_page, ph, page);
1329 		return 1;
1330 	}
1331 
1332 	for (pi = XSIMPLEQ_FIRST(&ph->ph_items), n = 0;
1333 	     pi != NULL;
1334 	     pi = XSIMPLEQ_NEXT(&ph->ph_items, pi, pi_list), n++) {
1335 		if ((caddr_t)pi < ph->ph_page ||
1336 		    (caddr_t)pi >= ph->ph_page + pp->pr_pgsize) {
1337 			printf("%s: ", label);
1338 			printf("pool(%p:%s): page inconsistency: page %p;"
1339 			    " item ordinal %d; addr %p\n", pp,
1340 			    pp->pr_wchan, ph->ph_page, n, pi);
1341 			return (1);
1342 		}
1343 
1344 		if (pi->pi_magic != POOL_IMAGIC(ph, pi)) {
1345 			printf("%s: ", label);
1346 			printf("pool(%p:%s): free list modified: "
1347 			    "page %p; item ordinal %d; addr %p "
1348 			    "(p %p); offset 0x%x=0x%lx\n",
1349 			    pp, pp->pr_wchan, ph->ph_page, n, pi, page,
1350 			    0, pi->pi_magic);
1351 		}
1352 
1353 #ifdef DIAGNOSTIC
1354 		if (POOL_PHPOISON(ph)) {
1355 			size_t pidx;
1356 			uint32_t pval;
1357 			if (poison_check(pi + 1, pp->pr_size - sizeof(*pi),
1358 			    &pidx, &pval)) {
1359 				int *ip = (int *)(pi + 1);
1360 				printf("pool(%s): free list modified: "
1361 				    "page %p; item ordinal %d; addr %p "
1362 				    "(p %p); offset 0x%zx=0x%x\n",
1363 				    pp->pr_wchan, ph->ph_page, n, pi,
1364 				    page, pidx * sizeof(int), ip[pidx]);
1365 			}
1366 		}
1367 #endif /* DIAGNOSTIC */
1368 	}
1369 	if (n + ph->ph_nmissing != pp->pr_itemsperpage) {
1370 		printf("pool(%p:%s): page inconsistency: page %p;"
1371 		    " %d on list, %d missing, %d items per page\n", pp,
1372 		    pp->pr_wchan, ph->ph_page, n, ph->ph_nmissing,
1373 		    pp->pr_itemsperpage);
1374 		return 1;
1375 	}
1376 	if (expected >= 0 && n != expected) {
1377 		printf("pool(%p:%s): page inconsistency: page %p;"
1378 		    " %d on list, %d missing, %d expected\n", pp,
1379 		    pp->pr_wchan, ph->ph_page, n, ph->ph_nmissing,
1380 		    expected);
1381 		return 1;
1382 	}
1383 	return 0;
1384 }
1385 
1386 int
1387 pool_chk(struct pool *pp)
1388 {
1389 	struct pool_page_header *ph;
1390 	int r = 0;
1391 
1392 	TAILQ_FOREACH(ph, &pp->pr_emptypages, ph_entry)
1393 		r += pool_chk_page(pp, ph, pp->pr_itemsperpage);
1394 	TAILQ_FOREACH(ph, &pp->pr_fullpages, ph_entry)
1395 		r += pool_chk_page(pp, ph, 0);
1396 	TAILQ_FOREACH(ph, &pp->pr_partpages, ph_entry)
1397 		r += pool_chk_page(pp, ph, -1);
1398 
1399 	return (r);
1400 }
1401 #endif /* defined(POOL_DEBUG) || defined(DDB) */
1402 
1403 #ifdef DDB
1404 void
1405 pool_walk(struct pool *pp, int full,
1406     int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2))),
1407     void (*func)(void *, int, int (*)(const char *, ...)
1408 	    __attribute__((__format__(__kprintf__,1,2)))))
1409 {
1410 	struct pool_page_header *ph;
1411 	struct pool_item *pi;
1412 	caddr_t cp;
1413 	int n;
1414 
1415 	TAILQ_FOREACH(ph, &pp->pr_fullpages, ph_entry) {
1416 		cp = ph->ph_colored;
1417 		n = ph->ph_nmissing;
1418 
1419 		while (n--) {
1420 			func(cp, full, pr);
1421 			cp += pp->pr_size;
1422 		}
1423 	}
1424 
1425 	TAILQ_FOREACH(ph, &pp->pr_partpages, ph_entry) {
1426 		cp = ph->ph_colored;
1427 		n = ph->ph_nmissing;
1428 
1429 		do {
1430 			XSIMPLEQ_FOREACH(pi, &ph->ph_items, pi_list) {
1431 				if (cp == (caddr_t)pi)
1432 					break;
1433 			}
1434 			if (cp != (caddr_t)pi) {
1435 				func(cp, full, pr);
1436 				n--;
1437 			}
1438 
1439 			cp += pp->pr_size;
1440 		} while (n > 0);
1441 	}
1442 }
1443 #endif
1444 
1445 /*
1446  * We have three different sysctls.
1447  * kern.pool.npools - the number of pools.
1448  * kern.pool.pool.<pool#> - the pool struct for the pool#.
1449  * kern.pool.name.<pool#> - the name for pool#.
1450  */
1451 int
1452 sysctl_dopool(int *name, u_int namelen, char *oldp, size_t *oldlenp)
1453 {
1454 	struct kinfo_pool pi;
1455 	struct pool *pp;
1456 	int rv = ENOENT;
1457 
1458 	switch (name[0]) {
1459 	case KERN_POOL_NPOOLS:
1460 		if (namelen != 1)
1461 			return (ENOTDIR);
1462 		return (sysctl_rdint(oldp, oldlenp, NULL, pool_count));
1463 
1464 	case KERN_POOL_NAME:
1465 	case KERN_POOL_POOL:
1466 	case KERN_POOL_CACHE:
1467 	case KERN_POOL_CACHE_CPUS:
1468 		break;
1469 	default:
1470 		return (EOPNOTSUPP);
1471 	}
1472 
1473 	if (namelen != 2)
1474 		return (ENOTDIR);
1475 
1476 	rw_enter_read(&pool_lock);
1477 
1478 	SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist) {
1479 		if (name[1] == pp->pr_serial)
1480 			break;
1481 	}
1482 
1483 	if (pp == NULL)
1484 		goto done;
1485 
1486 	switch (name[0]) {
1487 	case KERN_POOL_NAME:
1488 		rv = sysctl_rdstring(oldp, oldlenp, NULL, pp->pr_wchan);
1489 		break;
1490 	case KERN_POOL_POOL:
1491 		memset(&pi, 0, sizeof(pi));
1492 
1493 		pl_enter(pp, &pp->pr_lock);
1494 		pi.pr_size = pp->pr_size;
1495 		pi.pr_pgsize = pp->pr_pgsize;
1496 		pi.pr_itemsperpage = pp->pr_itemsperpage;
1497 		pi.pr_npages = pp->pr_npages;
1498 		pi.pr_minpages = pp->pr_minpages;
1499 		pi.pr_maxpages = pp->pr_maxpages;
1500 		pi.pr_hardlimit = pp->pr_hardlimit;
1501 		pi.pr_nout = pp->pr_nout;
1502 		pi.pr_nitems = pp->pr_nitems;
1503 		pi.pr_nget = pp->pr_nget;
1504 		pi.pr_nput = pp->pr_nput;
1505 		pi.pr_nfail = pp->pr_nfail;
1506 		pi.pr_npagealloc = pp->pr_npagealloc;
1507 		pi.pr_npagefree = pp->pr_npagefree;
1508 		pi.pr_hiwat = pp->pr_hiwat;
1509 		pi.pr_nidle = pp->pr_nidle;
1510 		pl_leave(pp, &pp->pr_lock);
1511 
1512 		pool_cache_pool_info(pp, &pi);
1513 
1514 		rv = sysctl_rdstruct(oldp, oldlenp, NULL, &pi, sizeof(pi));
1515 		break;
1516 
1517 	case KERN_POOL_CACHE:
1518 		rv = pool_cache_info(pp, oldp, oldlenp);
1519 		break;
1520 
1521 	case KERN_POOL_CACHE_CPUS:
1522 		rv = pool_cache_cpus_info(pp, oldp, oldlenp);
1523 		break;
1524 	}
1525 
1526 done:
1527 	rw_exit_read(&pool_lock);
1528 
1529 	return (rv);
1530 }
1531 
1532 void
1533 pool_gc_sched(void *null)
1534 {
1535 	task_add(systqmp, &pool_gc_task);
1536 }
1537 
1538 void
1539 pool_gc_pages(void *null)
1540 {
1541 	struct pool *pp;
1542 	struct pool_page_header *ph, *freeph;
1543 	int s;
1544 
1545 	rw_enter_read(&pool_lock);
1546 	s = splvm(); /* XXX go to splvm until all pools _setipl properly */
1547 	SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist) {
1548 #ifdef MULTIPROCESSOR
1549 		if (pp->pr_cache != NULL)
1550 			pool_cache_gc(pp);
1551 #endif
1552 
1553 		if (pp->pr_nidle <= pp->pr_minpages || /* guess */
1554 		    !pl_enter_try(pp, &pp->pr_lock)) /* try */
1555 			continue;
1556 
1557 		/* is it time to free a page? */
1558 		if (pp->pr_nidle > pp->pr_minpages &&
1559 		    (ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL &&
1560 		    (ticks - ph->ph_tick) > (hz * pool_wait_gc)) {
1561 			freeph = ph;
1562 			pool_p_remove(pp, freeph);
1563 		} else
1564 			freeph = NULL;
1565 
1566 		pl_leave(pp, &pp->pr_lock);
1567 
1568 		if (freeph != NULL)
1569 			pool_p_free(pp, freeph);
1570 	}
1571 	splx(s);
1572 	rw_exit_read(&pool_lock);
1573 
1574 	timeout_add_sec(&pool_gc_tick, 1);
1575 }
1576 
1577 /*
1578  * Pool backend allocators.
1579  */
1580 
1581 void *
1582 pool_allocator_alloc(struct pool *pp, int flags, int *slowdown)
1583 {
1584 	void *v;
1585 
1586 	v = (*pp->pr_alloc->pa_alloc)(pp, flags, slowdown);
1587 
1588 #ifdef DIAGNOSTIC
1589 	if (v != NULL && POOL_INPGHDR(pp)) {
1590 		vaddr_t addr = (vaddr_t)v;
1591 		if ((addr & pp->pr_pgmask) != addr) {
1592 			panic("%s: %s page address %p isnt aligned to %u",
1593 			    __func__, pp->pr_wchan, v, pp->pr_pgsize);
1594 		}
1595 	}
1596 #endif
1597 
1598 	return (v);
1599 }
1600 
1601 void
1602 pool_allocator_free(struct pool *pp, void *v)
1603 {
1604 	struct pool_allocator *pa = pp->pr_alloc;
1605 
1606 	(*pa->pa_free)(pp, v);
1607 }
1608 
1609 void *
1610 pool_page_alloc(struct pool *pp, int flags, int *slowdown)
1611 {
1612 	struct kmem_dyn_mode kd = KMEM_DYN_INITIALIZER;
1613 
1614 	kd.kd_waitok = ISSET(flags, PR_WAITOK);
1615 	kd.kd_slowdown = slowdown;
1616 
1617 	return (km_alloc(pp->pr_pgsize, &kv_page, pp->pr_crange, &kd));
1618 }
1619 
1620 void
1621 pool_page_free(struct pool *pp, void *v)
1622 {
1623 	km_free(v, pp->pr_pgsize, &kv_page, pp->pr_crange);
1624 }
1625 
1626 void *
1627 pool_multi_alloc(struct pool *pp, int flags, int *slowdown)
1628 {
1629 	struct kmem_va_mode kv = kv_intrsafe;
1630 	struct kmem_dyn_mode kd = KMEM_DYN_INITIALIZER;
1631 	void *v;
1632 	int s;
1633 
1634 	if (POOL_INPGHDR(pp))
1635 		kv.kv_align = pp->pr_pgsize;
1636 
1637 	kd.kd_waitok = ISSET(flags, PR_WAITOK);
1638 	kd.kd_slowdown = slowdown;
1639 
1640 	s = splvm();
1641 	v = km_alloc(pp->pr_pgsize, &kv, pp->pr_crange, &kd);
1642 	splx(s);
1643 
1644 	return (v);
1645 }
1646 
1647 void
1648 pool_multi_free(struct pool *pp, void *v)
1649 {
1650 	struct kmem_va_mode kv = kv_intrsafe;
1651 	int s;
1652 
1653 	if (POOL_INPGHDR(pp))
1654 		kv.kv_align = pp->pr_pgsize;
1655 
1656 	s = splvm();
1657 	km_free(v, pp->pr_pgsize, &kv, pp->pr_crange);
1658 	splx(s);
1659 }
1660 
1661 void *
1662 pool_multi_alloc_ni(struct pool *pp, int flags, int *slowdown)
1663 {
1664 	struct kmem_va_mode kv = kv_any;
1665 	struct kmem_dyn_mode kd = KMEM_DYN_INITIALIZER;
1666 	void *v;
1667 
1668 	if (POOL_INPGHDR(pp))
1669 		kv.kv_align = pp->pr_pgsize;
1670 
1671 	kd.kd_waitok = ISSET(flags, PR_WAITOK);
1672 	kd.kd_slowdown = slowdown;
1673 
1674 	KERNEL_LOCK();
1675 	v = km_alloc(pp->pr_pgsize, &kv, pp->pr_crange, &kd);
1676 	KERNEL_UNLOCK();
1677 
1678 	return (v);
1679 }
1680 
1681 void
1682 pool_multi_free_ni(struct pool *pp, void *v)
1683 {
1684 	struct kmem_va_mode kv = kv_any;
1685 
1686 	if (POOL_INPGHDR(pp))
1687 		kv.kv_align = pp->pr_pgsize;
1688 
1689 	KERNEL_LOCK();
1690 	km_free(v, pp->pr_pgsize, &kv, pp->pr_crange);
1691 	KERNEL_UNLOCK();
1692 }
1693 
1694 #ifdef MULTIPROCESSOR
1695 
1696 struct pool pool_caches; /* per cpu cache entries */
1697 
1698 void
1699 pool_cache_init(struct pool *pp)
1700 {
1701 	struct cpumem *cm;
1702 	struct pool_cache *pc;
1703 	struct cpumem_iter i;
1704 
1705 	if (pool_caches.pr_size == 0) {
1706 		pool_init(&pool_caches, sizeof(struct pool_cache),
1707 		    CACHELINESIZE, IPL_NONE, PR_WAITOK | PR_RWLOCK,
1708 		    "plcache", NULL);
1709 	}
1710 
1711 	/* must be able to use the pool items as cache list items */
1712 	KASSERT(pp->pr_size >= sizeof(struct pool_cache_item));
1713 
1714 	cm = cpumem_get(&pool_caches);
1715 
1716 	pl_init(pp, &pp->pr_cache_lock);
1717 	arc4random_buf(pp->pr_cache_magic, sizeof(pp->pr_cache_magic));
1718 	TAILQ_INIT(&pp->pr_cache_lists);
1719 	pp->pr_cache_nitems = 0;
1720 	pp->pr_cache_tick = ticks;
1721 	pp->pr_cache_items = 8;
1722 	pp->pr_cache_contention = 0;
1723 	pp->pr_cache_ngc = 0;
1724 
1725 	CPUMEM_FOREACH(pc, &i, cm) {
1726 		pc->pc_actv = NULL;
1727 		pc->pc_nactv = 0;
1728 		pc->pc_prev = NULL;
1729 
1730 		pc->pc_nget = 0;
1731 		pc->pc_nfail = 0;
1732 		pc->pc_nput = 0;
1733 		pc->pc_nlget = 0;
1734 		pc->pc_nlfail = 0;
1735 		pc->pc_nlput = 0;
1736 		pc->pc_nout = 0;
1737 	}
1738 
1739 	membar_producer();
1740 
1741 	pp->pr_cache = cm;
1742 }
1743 
1744 static inline void
1745 pool_cache_item_magic(struct pool *pp, struct pool_cache_item *ci)
1746 {
1747 	unsigned long *entry = (unsigned long *)&ci->ci_nextl;
1748 
1749 	entry[0] = pp->pr_cache_magic[0] ^ (u_long)ci;
1750 	entry[1] = pp->pr_cache_magic[1] ^ (u_long)ci->ci_next;
1751 }
1752 
1753 static inline void
1754 pool_cache_item_magic_check(struct pool *pp, struct pool_cache_item *ci)
1755 {
1756 	unsigned long *entry;
1757 	unsigned long val;
1758 
1759 	entry = (unsigned long *)&ci->ci_nextl;
1760 	val = pp->pr_cache_magic[0] ^ (u_long)ci;
1761 	if (*entry != val)
1762 		goto fail;
1763 
1764 	entry++;
1765 	val = pp->pr_cache_magic[1] ^ (u_long)ci->ci_next;
1766 	if (*entry != val)
1767 		goto fail;
1768 
1769 	return;
1770 
1771 fail:
1772 	panic("%s: %s cpu free list modified: item addr %p+%zu 0x%lx!=0x%lx",
1773 	    __func__, pp->pr_wchan, ci, (caddr_t)entry - (caddr_t)ci,
1774 	    *entry, val);
1775 }
1776 
1777 static inline void
1778 pool_list_enter(struct pool *pp)
1779 {
1780 	if (pl_enter_try(pp, &pp->pr_cache_lock) == 0) {
1781 		pl_enter(pp, &pp->pr_cache_lock);
1782 		pp->pr_cache_contention++;
1783 	}
1784 }
1785 
1786 static inline void
1787 pool_list_leave(struct pool *pp)
1788 {
1789 	pl_leave(pp, &pp->pr_cache_lock);
1790 }
1791 
1792 static inline struct pool_cache_item *
1793 pool_cache_list_alloc(struct pool *pp, struct pool_cache *pc)
1794 {
1795 	struct pool_cache_item *pl;
1796 
1797 	pool_list_enter(pp);
1798 	pl = TAILQ_FIRST(&pp->pr_cache_lists);
1799 	if (pl != NULL) {
1800 		TAILQ_REMOVE(&pp->pr_cache_lists, pl, ci_nextl);
1801 		pp->pr_cache_nitems -= POOL_CACHE_ITEM_NITEMS(pl);
1802 
1803 		pool_cache_item_magic(pp, pl);
1804 
1805 		pc->pc_nlget++;
1806 	} else
1807 		pc->pc_nlfail++;
1808 
1809 	/* fold this cpus nout into the global while we have the lock */
1810 	pp->pr_cache_nout += pc->pc_nout;
1811 	pc->pc_nout = 0;
1812 	pool_list_leave(pp);
1813 
1814 	return (pl);
1815 }
1816 
1817 static inline void
1818 pool_cache_list_free(struct pool *pp, struct pool_cache *pc,
1819     struct pool_cache_item *ci)
1820 {
1821 	pool_list_enter(pp);
1822 	if (TAILQ_EMPTY(&pp->pr_cache_lists))
1823 		pp->pr_cache_tick = ticks;
1824 
1825 	pp->pr_cache_nitems += POOL_CACHE_ITEM_NITEMS(ci);
1826 	TAILQ_INSERT_TAIL(&pp->pr_cache_lists, ci, ci_nextl);
1827 
1828 	pc->pc_nlput++;
1829 
1830 	/* fold this cpus nout into the global while we have the lock */
1831 	pp->pr_cache_nout += pc->pc_nout;
1832 	pc->pc_nout = 0;
1833 	pool_list_leave(pp);
1834 }
1835 
1836 static inline struct pool_cache *
1837 pool_cache_enter(struct pool *pp, int *s)
1838 {
1839 	struct pool_cache *pc;
1840 
1841 	pc = cpumem_enter(pp->pr_cache);
1842 	*s = splraise(pp->pr_ipl);
1843 	pc->pc_gen++;
1844 
1845 	return (pc);
1846 }
1847 
1848 static inline void
1849 pool_cache_leave(struct pool *pp, struct pool_cache *pc, int s)
1850 {
1851 	pc->pc_gen++;
1852 	splx(s);
1853 	cpumem_leave(pp->pr_cache, pc);
1854 }
1855 
1856 void *
1857 pool_cache_get(struct pool *pp)
1858 {
1859 	struct pool_cache *pc;
1860 	struct pool_cache_item *ci;
1861 	int s;
1862 
1863 	pc = pool_cache_enter(pp, &s);
1864 
1865 	if (pc->pc_actv != NULL) {
1866 		ci = pc->pc_actv;
1867 	} else if (pc->pc_prev != NULL) {
1868 		ci = pc->pc_prev;
1869 		pc->pc_prev = NULL;
1870 	} else if ((ci = pool_cache_list_alloc(pp, pc)) == NULL) {
1871 		pc->pc_nfail++;
1872 		goto done;
1873 	}
1874 
1875 	pool_cache_item_magic_check(pp, ci);
1876 #ifdef DIAGNOSTIC
1877 	if (pool_debug && POOL_CACHE_ITEM_POISONED(ci)) {
1878 		size_t pidx;
1879 		uint32_t pval;
1880 
1881 		if (poison_check(ci + 1, pp->pr_size - sizeof(*ci),
1882 		    &pidx, &pval)) {
1883 			int *ip = (int *)(ci + 1);
1884 			ip += pidx;
1885 
1886 			panic("%s: %s cpu free list modified: "
1887 			    "item addr %p+%zu 0x%x!=0x%x",
1888 			    __func__, pp->pr_wchan, ci,
1889 			    (caddr_t)ip - (caddr_t)ci, *ip, pval);
1890 		}
1891 	}
1892 #endif
1893 
1894 	pc->pc_actv = ci->ci_next;
1895 	pc->pc_nactv = POOL_CACHE_ITEM_NITEMS(ci) - 1;
1896 	pc->pc_nget++;
1897 	pc->pc_nout++;
1898 
1899 done:
1900 	pool_cache_leave(pp, pc, s);
1901 
1902 	return (ci);
1903 }
1904 
1905 void
1906 pool_cache_put(struct pool *pp, void *v)
1907 {
1908 	struct pool_cache *pc;
1909 	struct pool_cache_item *ci = v;
1910 	unsigned long nitems;
1911 	int s;
1912 #ifdef DIAGNOSTIC
1913 	int poison = pool_debug && pp->pr_size > sizeof(*ci);
1914 
1915 	if (poison)
1916 		poison_mem(ci + 1, pp->pr_size - sizeof(*ci));
1917 #endif
1918 
1919 	pc = pool_cache_enter(pp, &s);
1920 
1921 	nitems = pc->pc_nactv;
1922 	if (nitems >= pp->pr_cache_items) {
1923 		if (pc->pc_prev != NULL)
1924 			pool_cache_list_free(pp, pc, pc->pc_prev);
1925 
1926 		pc->pc_prev = pc->pc_actv;
1927 
1928 		pc->pc_actv = NULL;
1929 		pc->pc_nactv = 0;
1930 		nitems = 0;
1931 	}
1932 
1933 	ci->ci_next = pc->pc_actv;
1934 	ci->ci_nitems = ++nitems;
1935 #ifdef DIAGNOSTIC
1936 	ci->ci_nitems |= poison ? POOL_CACHE_ITEM_NITEMS_POISON : 0;
1937 #endif
1938 	pool_cache_item_magic(pp, ci);
1939 
1940 	pc->pc_actv = ci;
1941 	pc->pc_nactv = nitems;
1942 
1943 	pc->pc_nput++;
1944 	pc->pc_nout--;
1945 
1946 	pool_cache_leave(pp, pc, s);
1947 }
1948 
1949 struct pool_cache_item *
1950 pool_cache_list_put(struct pool *pp, struct pool_cache_item *pl)
1951 {
1952 	struct pool_cache_item *rpl, *next;
1953 
1954 	if (pl == NULL)
1955 		return (NULL);
1956 
1957 	rpl = TAILQ_NEXT(pl, ci_nextl);
1958 
1959 	pl_enter(pp, &pp->pr_lock);
1960 	do {
1961 		next = pl->ci_next;
1962 		pool_do_put(pp, pl);
1963 		pl = next;
1964 	} while (pl != NULL);
1965 	pl_leave(pp, &pp->pr_lock);
1966 
1967 	return (rpl);
1968 }
1969 
1970 void
1971 pool_cache_destroy(struct pool *pp)
1972 {
1973 	struct pool_cache *pc;
1974 	struct pool_cache_item *pl;
1975 	struct cpumem_iter i;
1976 	struct cpumem *cm;
1977 
1978 	rw_enter_write(&pool_lock); /* serialise with the gc */
1979 	cm = pp->pr_cache;
1980 	pp->pr_cache = NULL; /* make pool_put avoid the cache */
1981 	rw_exit_write(&pool_lock);
1982 
1983 	CPUMEM_FOREACH(pc, &i, cm) {
1984 		pool_cache_list_put(pp, pc->pc_actv);
1985 		pool_cache_list_put(pp, pc->pc_prev);
1986 	}
1987 
1988 	cpumem_put(&pool_caches, cm);
1989 
1990 	pl = TAILQ_FIRST(&pp->pr_cache_lists);
1991 	while (pl != NULL)
1992 		pl = pool_cache_list_put(pp, pl);
1993 }
1994 
1995 void
1996 pool_cache_gc(struct pool *pp)
1997 {
1998 	unsigned int contention, delta;
1999 
2000 	if ((ticks - pp->pr_cache_tick) > (hz * pool_wait_gc) &&
2001 	    !TAILQ_EMPTY(&pp->pr_cache_lists) &&
2002 	    pl_enter_try(pp, &pp->pr_cache_lock)) {
2003 		struct pool_cache_item *pl = NULL;
2004 
2005 		pl = TAILQ_FIRST(&pp->pr_cache_lists);
2006 		if (pl != NULL) {
2007 			TAILQ_REMOVE(&pp->pr_cache_lists, pl, ci_nextl);
2008 			pp->pr_cache_nitems -= POOL_CACHE_ITEM_NITEMS(pl);
2009 			pp->pr_cache_tick = ticks;
2010 
2011 			pp->pr_cache_ngc++;
2012 		}
2013 
2014 		pl_leave(pp, &pp->pr_cache_lock);
2015 
2016 		pool_cache_list_put(pp, pl);
2017 	}
2018 
2019 	/*
2020 	 * if there's a lot of contention on the pr_cache_mtx then consider
2021 	 * growing the length of the list to reduce the need to access the
2022 	 * global pool.
2023 	 */
2024 
2025 	contention = pp->pr_cache_contention;
2026 	delta = contention - pp->pr_cache_contention_prev;
2027 	if (delta > 8 /* magic */) {
2028 		if ((ncpusfound * 8 * 2) <= pp->pr_cache_nitems)
2029 			pp->pr_cache_items += 8;
2030 	} else if (delta == 0) {
2031 		if (pp->pr_cache_items > 8)
2032 			pp->pr_cache_items--;
2033 	}
2034 	pp->pr_cache_contention_prev = contention;
2035 }
2036 
2037 void
2038 pool_cache_pool_info(struct pool *pp, struct kinfo_pool *pi)
2039 {
2040 	struct pool_cache *pc;
2041 	struct cpumem_iter i;
2042 
2043 	if (pp->pr_cache == NULL)
2044 		return;
2045 
2046 	/* loop through the caches twice to collect stats */
2047 
2048 	/* once without the lock so we can yield while reading nget/nput */
2049 	CPUMEM_FOREACH(pc, &i, pp->pr_cache) {
2050 		uint64_t gen, nget, nput;
2051 
2052 		do {
2053 			while ((gen = pc->pc_gen) & 1)
2054 				yield();
2055 
2056 			nget = pc->pc_nget;
2057 			nput = pc->pc_nput;
2058 		} while (gen != pc->pc_gen);
2059 
2060 		pi->pr_nget += nget;
2061 		pi->pr_nput += nput;
2062 	}
2063 
2064 	/* and once with the mtx so we can get consistent nout values */
2065 	pl_enter(pp, &pp->pr_cache_lock);
2066 	CPUMEM_FOREACH(pc, &i, pp->pr_cache)
2067 		pi->pr_nout += pc->pc_nout;
2068 
2069 	pi->pr_nout += pp->pr_cache_nout;
2070 	pl_leave(pp, &pp->pr_cache_lock);
2071 }
2072 
2073 int
2074 pool_cache_info(struct pool *pp, void *oldp, size_t *oldlenp)
2075 {
2076 	struct kinfo_pool_cache kpc;
2077 
2078 	if (pp->pr_cache == NULL)
2079 		return (EOPNOTSUPP);
2080 
2081 	memset(&kpc, 0, sizeof(kpc)); /* don't leak padding */
2082 
2083 	pl_enter(pp, &pp->pr_cache_lock);
2084 	kpc.pr_ngc = pp->pr_cache_ngc;
2085 	kpc.pr_len = pp->pr_cache_items;
2086 	kpc.pr_nitems = pp->pr_cache_nitems;
2087 	kpc.pr_contention = pp->pr_cache_contention;
2088 	pl_leave(pp, &pp->pr_cache_lock);
2089 
2090 	return (sysctl_rdstruct(oldp, oldlenp, NULL, &kpc, sizeof(kpc)));
2091 }
2092 
2093 int
2094 pool_cache_cpus_info(struct pool *pp, void *oldp, size_t *oldlenp)
2095 {
2096 	struct pool_cache *pc;
2097 	struct kinfo_pool_cache_cpu *kpcc, *info;
2098 	unsigned int cpu = 0;
2099 	struct cpumem_iter i;
2100 	int error = 0;
2101 	size_t len;
2102 
2103 	if (pp->pr_cache == NULL)
2104 		return (EOPNOTSUPP);
2105 	if (*oldlenp % sizeof(*kpcc))
2106 		return (EINVAL);
2107 
2108 	kpcc = mallocarray(ncpusfound, sizeof(*kpcc), M_TEMP,
2109 	    M_WAITOK|M_CANFAIL|M_ZERO);
2110 	if (kpcc == NULL)
2111 		return (EIO);
2112 
2113 	len = ncpusfound * sizeof(*kpcc);
2114 
2115 	CPUMEM_FOREACH(pc, &i, pp->pr_cache) {
2116 		uint64_t gen;
2117 
2118 		if (cpu >= ncpusfound) {
2119 			error = EIO;
2120 			goto err;
2121 		}
2122 
2123 		info = &kpcc[cpu];
2124 		info->pr_cpu = cpu;
2125 
2126 		do {
2127 			while ((gen = pc->pc_gen) & 1)
2128 				yield();
2129 
2130 			info->pr_nget = pc->pc_nget;
2131 			info->pr_nfail = pc->pc_nfail;
2132 			info->pr_nput = pc->pc_nput;
2133 			info->pr_nlget = pc->pc_nlget;
2134 			info->pr_nlfail = pc->pc_nlfail;
2135 			info->pr_nlput = pc->pc_nlput;
2136 		} while (gen != pc->pc_gen);
2137 
2138 		cpu++;
2139 	}
2140 
2141 	error = sysctl_rdstruct(oldp, oldlenp, NULL, kpcc, len);
2142 err:
2143 	free(kpcc, M_TEMP, len);
2144 
2145 	return (error);
2146 }
2147 #else /* MULTIPROCESSOR */
2148 void
2149 pool_cache_init(struct pool *pp)
2150 {
2151 	/* nop */
2152 }
2153 
2154 void
2155 pool_cache_pool_info(struct pool *pp, struct kinfo_pool *pi)
2156 {
2157 	/* nop */
2158 }
2159 
2160 int
2161 pool_cache_info(struct pool *pp, void *oldp, size_t *oldlenp)
2162 {
2163 	return (EOPNOTSUPP);
2164 }
2165 
2166 int
2167 pool_cache_cpus_info(struct pool *pp, void *oldp, size_t *oldlenp)
2168 {
2169 	return (EOPNOTSUPP);
2170 }
2171 #endif /* MULTIPROCESSOR */
2172 
2173 
2174 void
2175 pool_lock_mtx_init(struct pool *pp, union pool_lock *lock,
2176     struct lock_type *type)
2177 {
2178 	_mtx_init_flags(&lock->prl_mtx, pp->pr_ipl, pp->pr_wchan, 0, type);
2179 }
2180 
2181 void
2182 pool_lock_mtx_enter(union pool_lock *lock LOCK_FL_VARS)
2183 {
2184 	_mtx_enter(&lock->prl_mtx LOCK_FL_ARGS);
2185 }
2186 
2187 int
2188 pool_lock_mtx_enter_try(union pool_lock *lock LOCK_FL_VARS)
2189 {
2190 	return (_mtx_enter_try(&lock->prl_mtx LOCK_FL_ARGS));
2191 }
2192 
2193 void
2194 pool_lock_mtx_leave(union pool_lock *lock LOCK_FL_VARS)
2195 {
2196 	_mtx_leave(&lock->prl_mtx LOCK_FL_ARGS);
2197 }
2198 
2199 void
2200 pool_lock_mtx_assert_locked(union pool_lock *lock)
2201 {
2202 	MUTEX_ASSERT_LOCKED(&lock->prl_mtx);
2203 }
2204 
2205 void
2206 pool_lock_mtx_assert_unlocked(union pool_lock *lock)
2207 {
2208 	MUTEX_ASSERT_UNLOCKED(&lock->prl_mtx);
2209 }
2210 
2211 int
2212 pool_lock_mtx_sleep(void *ident, union pool_lock *lock, int priority,
2213     const char *wmesg, int timo)
2214 {
2215 	return msleep(ident, &lock->prl_mtx, priority, wmesg, timo);
2216 }
2217 
2218 static const struct pool_lock_ops pool_lock_ops_mtx = {
2219 	pool_lock_mtx_init,
2220 	pool_lock_mtx_enter,
2221 	pool_lock_mtx_enter_try,
2222 	pool_lock_mtx_leave,
2223 	pool_lock_mtx_assert_locked,
2224 	pool_lock_mtx_assert_unlocked,
2225 	pool_lock_mtx_sleep,
2226 };
2227 
2228 void
2229 pool_lock_rw_init(struct pool *pp, union pool_lock *lock,
2230     struct lock_type *type)
2231 {
2232 	_rw_init_flags(&lock->prl_rwlock, pp->pr_wchan, 0, type);
2233 }
2234 
2235 void
2236 pool_lock_rw_enter(union pool_lock *lock LOCK_FL_VARS)
2237 {
2238 	_rw_enter_write(&lock->prl_rwlock LOCK_FL_ARGS);
2239 }
2240 
2241 int
2242 pool_lock_rw_enter_try(union pool_lock *lock LOCK_FL_VARS)
2243 {
2244 	return (_rw_enter(&lock->prl_rwlock, RW_WRITE | RW_NOSLEEP
2245 	    LOCK_FL_ARGS) == 0);
2246 }
2247 
2248 void
2249 pool_lock_rw_leave(union pool_lock *lock LOCK_FL_VARS)
2250 {
2251 	_rw_exit_write(&lock->prl_rwlock LOCK_FL_ARGS);
2252 }
2253 
2254 void
2255 pool_lock_rw_assert_locked(union pool_lock *lock)
2256 {
2257 	rw_assert_wrlock(&lock->prl_rwlock);
2258 }
2259 
2260 void
2261 pool_lock_rw_assert_unlocked(union pool_lock *lock)
2262 {
2263 	KASSERT(rw_status(&lock->prl_rwlock) != RW_WRITE);
2264 }
2265 
2266 int
2267 pool_lock_rw_sleep(void *ident, union pool_lock *lock, int priority,
2268     const char *wmesg, int timo)
2269 {
2270 	return rwsleep(ident, &lock->prl_rwlock, priority, wmesg, timo);
2271 }
2272 
2273 static const struct pool_lock_ops pool_lock_ops_rw = {
2274 	pool_lock_rw_init,
2275 	pool_lock_rw_enter,
2276 	pool_lock_rw_enter_try,
2277 	pool_lock_rw_leave,
2278 	pool_lock_rw_assert_locked,
2279 	pool_lock_rw_assert_unlocked,
2280 	pool_lock_rw_sleep,
2281 };
2282