xref: /openbsd-src/sys/kern/subr_pool.c (revision 6f31b16b9589b822b677516478fd56b65f41c3dd)
1 /*	$OpenBSD: subr_pool.c,v 1.221 2018/01/18 18:08:51 bluhm Exp $	*/
2 /*	$NetBSD: subr_pool.c,v 1.61 2001/09/26 07:14:56 chs Exp $	*/
3 
4 /*-
5  * Copyright (c) 1997, 1999, 2000 The NetBSD Foundation, Inc.
6  * All rights reserved.
7  *
8  * This code is derived from software contributed to The NetBSD Foundation
9  * by Paul Kranenburg; by Jason R. Thorpe of the Numerical Aerospace
10  * Simulation Facility, NASA Ames Research Center.
11  *
12  * Redistribution and use in source and binary forms, with or without
13  * modification, are permitted provided that the following conditions
14  * are met:
15  * 1. Redistributions of source code must retain the above copyright
16  *    notice, this list of conditions and the following disclaimer.
17  * 2. Redistributions in binary form must reproduce the above copyright
18  *    notice, this list of conditions and the following disclaimer in the
19  *    documentation and/or other materials provided with the distribution.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
23  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
24  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
25  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31  * POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/errno.h>
37 #include <sys/kernel.h>
38 #include <sys/malloc.h>
39 #include <sys/pool.h>
40 #include <sys/proc.h>
41 #include <sys/syslog.h>
42 #include <sys/sysctl.h>
43 #include <sys/task.h>
44 #include <sys/timeout.h>
45 #include <sys/percpu.h>
46 
47 #include <uvm/uvm_extern.h>
48 
49 /*
50  * Pool resource management utility.
51  *
52  * Memory is allocated in pages which are split into pieces according to
53  * the pool item size. Each page is kept on one of three lists in the
54  * pool structure: `pr_emptypages', `pr_fullpages' and `pr_partpages',
55  * for empty, full and partially-full pages respectively. The individual
56  * pool items are on a linked list headed by `ph_items' in each page
57  * header. The memory for building the page list is either taken from
58  * the allocated pages themselves (for small pool items) or taken from
59  * an internal pool of page headers (`phpool').
60  */
61 
62 /* List of all pools */
63 SIMPLEQ_HEAD(,pool) pool_head = SIMPLEQ_HEAD_INITIALIZER(pool_head);
64 
65 /*
66  * Every pool gets a unique serial number assigned to it. If this counter
67  * wraps, we're screwed, but we shouldn't create so many pools anyway.
68  */
69 unsigned int pool_serial;
70 unsigned int pool_count;
71 
72 /* Lock the previous variables making up the global pool state */
73 struct rwlock pool_lock = RWLOCK_INITIALIZER("pools");
74 
75 /* Private pool for page header structures */
76 struct pool phpool;
77 
78 struct pool_lock_ops {
79 	void	(*pl_init)(struct pool *, union pool_lock *,
80 		    struct lock_type *);
81 	void	(*pl_enter)(union pool_lock * LOCK_FL_VARS);
82 	int	(*pl_enter_try)(union pool_lock * LOCK_FL_VARS);
83 	void	(*pl_leave)(union pool_lock * LOCK_FL_VARS);
84 	void	(*pl_assert_locked)(union pool_lock *);
85 	void	(*pl_assert_unlocked)(union pool_lock *);
86 	int	(*pl_sleep)(void *, union pool_lock *, int, const char *, int);
87 };
88 
89 static const struct pool_lock_ops pool_lock_ops_mtx;
90 static const struct pool_lock_ops pool_lock_ops_rw;
91 
92 #ifdef WITNESS
93 #define pl_init(pp, pl) do {						\
94 	static struct lock_type __lock_type = { .lt_name = #pl };	\
95 	(pp)->pr_lock_ops->pl_init(pp, pl, &__lock_type);		\
96 } while (0)
97 #else /* WITNESS */
98 #define pl_init(pp, pl)		(pp)->pr_lock_ops->pl_init(pp, pl, NULL)
99 #endif /* WITNESS */
100 
101 static inline void
102 pl_enter(struct pool *pp, union pool_lock *pl LOCK_FL_VARS)
103 {
104 	pp->pr_lock_ops->pl_enter(pl LOCK_FL_ARGS);
105 }
106 static inline int
107 pl_enter_try(struct pool *pp, union pool_lock *pl LOCK_FL_VARS)
108 {
109 	return pp->pr_lock_ops->pl_enter_try(pl LOCK_FL_ARGS);
110 }
111 static inline void
112 pl_leave(struct pool *pp, union pool_lock *pl LOCK_FL_VARS)
113 {
114 	pp->pr_lock_ops->pl_leave(pl LOCK_FL_ARGS);
115 }
116 static inline void
117 pl_assert_locked(struct pool *pp, union pool_lock *pl)
118 {
119 	pp->pr_lock_ops->pl_assert_locked(pl);
120 }
121 static inline void
122 pl_assert_unlocked(struct pool *pp, union pool_lock *pl)
123 {
124 	pp->pr_lock_ops->pl_assert_unlocked(pl);
125 }
126 static inline int
127 pl_sleep(struct pool *pp, void *ident, union pool_lock *lock, int priority,
128     const char *wmesg, int timo)
129 {
130 	return pp->pr_lock_ops->pl_sleep(ident, lock, priority, wmesg, timo);
131 }
132 
133 #ifdef WITNESS
134 # define pl_enter(pp,pl)	pl_enter(pp,pl LOCK_FILE_LINE)
135 # define pl_enter_try(pp,pl)	pl_enter_try(pp,pl LOCK_FILE_LINE)
136 # define pl_leave(pp,pl)	pl_leave(pp,pl LOCK_FILE_LINE)
137 #endif
138 
139 struct pool_item {
140 	u_long				pi_magic;
141 	XSIMPLEQ_ENTRY(pool_item)	pi_list;
142 };
143 #define POOL_IMAGIC(ph, pi) ((u_long)(pi) ^ (ph)->ph_magic)
144 
145 struct pool_page_header {
146 	/* Page headers */
147 	TAILQ_ENTRY(pool_page_header)
148 				ph_entry;	/* pool page list */
149 	XSIMPLEQ_HEAD(, pool_item)
150 				ph_items;	/* free items on the page */
151 	RBT_ENTRY(pool_page_header)
152 				ph_node;	/* off-page page headers */
153 	unsigned int		ph_nmissing;	/* # of chunks in use */
154 	caddr_t			ph_page;	/* this page's address */
155 	caddr_t			ph_colored;	/* page's colored address */
156 	unsigned long		ph_magic;
157 	int			ph_tick;
158 };
159 #define POOL_MAGICBIT (1 << 3) /* keep away from perturbed low bits */
160 #define POOL_PHPOISON(ph) ISSET((ph)->ph_magic, POOL_MAGICBIT)
161 
162 #ifdef MULTIPROCESSOR
163 struct pool_cache_item {
164 	struct pool_cache_item	*ci_next;	/* next item in list */
165 	unsigned long		 ci_nitems;	/* number of items in list */
166 	TAILQ_ENTRY(pool_cache_item)
167 				 ci_nextl;	/* entry in list of lists */
168 };
169 
170 /* we store whether the cached item is poisoned in the high bit of nitems */
171 #define POOL_CACHE_ITEM_NITEMS_MASK	0x7ffffffUL
172 #define POOL_CACHE_ITEM_NITEMS_POISON	0x8000000UL
173 
174 #define POOL_CACHE_ITEM_NITEMS(_ci)					\
175     ((_ci)->ci_nitems & POOL_CACHE_ITEM_NITEMS_MASK)
176 
177 #define POOL_CACHE_ITEM_POISONED(_ci)					\
178     ISSET((_ci)->ci_nitems, POOL_CACHE_ITEM_NITEMS_POISON)
179 
180 struct pool_cache {
181 	struct pool_cache_item	*pc_actv;	/* active list of items */
182 	unsigned long		 pc_nactv;	/* actv head nitems cache */
183 	struct pool_cache_item	*pc_prev;	/* previous list of items */
184 
185 	uint64_t		 pc_gen;	/* generation number */
186 	uint64_t		 pc_nget;	/* # of successful requests */
187 	uint64_t		 pc_nfail;	/* # of unsuccessful reqs */
188 	uint64_t		 pc_nput;	/* # of releases */
189 	uint64_t		 pc_nlget;	/* # of list requests */
190 	uint64_t		 pc_nlfail;	/* # of fails getting a list */
191 	uint64_t		 pc_nlput;	/* # of list releases */
192 
193 	int			 pc_nout;
194 };
195 
196 void	*pool_cache_get(struct pool *);
197 void	 pool_cache_put(struct pool *, void *);
198 void	 pool_cache_destroy(struct pool *);
199 void	 pool_cache_gc(struct pool *);
200 #endif
201 void	 pool_cache_pool_info(struct pool *, struct kinfo_pool *);
202 int	 pool_cache_info(struct pool *, void *, size_t *);
203 int	 pool_cache_cpus_info(struct pool *, void *, size_t *);
204 
205 #ifdef POOL_DEBUG
206 int	pool_debug = 1;
207 #else
208 int	pool_debug = 0;
209 #endif
210 
211 #define POOL_INPGHDR(pp) ((pp)->pr_phoffset != 0)
212 
213 struct pool_page_header *
214 	 pool_p_alloc(struct pool *, int, int *);
215 void	 pool_p_insert(struct pool *, struct pool_page_header *);
216 void	 pool_p_remove(struct pool *, struct pool_page_header *);
217 void	 pool_p_free(struct pool *, struct pool_page_header *);
218 
219 void	 pool_update_curpage(struct pool *);
220 void	*pool_do_get(struct pool *, int, int *);
221 void	 pool_do_put(struct pool *, void *);
222 int	 pool_chk_page(struct pool *, struct pool_page_header *, int);
223 int	 pool_chk(struct pool *);
224 void	 pool_get_done(struct pool *, void *, void *);
225 void	 pool_runqueue(struct pool *, int);
226 
227 void	*pool_allocator_alloc(struct pool *, int, int *);
228 void	 pool_allocator_free(struct pool *, void *);
229 
230 /*
231  * The default pool allocator.
232  */
233 void	*pool_page_alloc(struct pool *, int, int *);
234 void	pool_page_free(struct pool *, void *);
235 
236 /*
237  * safe for interrupts; this is the default allocator
238  */
239 struct pool_allocator pool_allocator_single = {
240 	pool_page_alloc,
241 	pool_page_free,
242 	POOL_ALLOC_SIZE(PAGE_SIZE, POOL_ALLOC_ALIGNED)
243 };
244 
245 void	*pool_multi_alloc(struct pool *, int, int *);
246 void	pool_multi_free(struct pool *, void *);
247 
248 struct pool_allocator pool_allocator_multi = {
249 	pool_multi_alloc,
250 	pool_multi_free,
251 	POOL_ALLOC_SIZES(PAGE_SIZE, (1UL << 31), POOL_ALLOC_ALIGNED)
252 };
253 
254 void	*pool_multi_alloc_ni(struct pool *, int, int *);
255 void	pool_multi_free_ni(struct pool *, void *);
256 
257 struct pool_allocator pool_allocator_multi_ni = {
258 	pool_multi_alloc_ni,
259 	pool_multi_free_ni,
260 	POOL_ALLOC_SIZES(PAGE_SIZE, (1UL << 31), POOL_ALLOC_ALIGNED)
261 };
262 
263 #ifdef DDB
264 void	 pool_print_pagelist(struct pool_pagelist *, int (*)(const char *, ...)
265 	     __attribute__((__format__(__kprintf__,1,2))));
266 void	 pool_print1(struct pool *, const char *, int (*)(const char *, ...)
267 	     __attribute__((__format__(__kprintf__,1,2))));
268 #endif
269 
270 /* stale page garbage collectors */
271 void	pool_gc_sched(void *);
272 struct timeout pool_gc_tick = TIMEOUT_INITIALIZER(pool_gc_sched, NULL);
273 void	pool_gc_pages(void *);
274 struct task pool_gc_task = TASK_INITIALIZER(pool_gc_pages, NULL);
275 int pool_wait_free = 1;
276 int pool_wait_gc = 8;
277 
278 RBT_PROTOTYPE(phtree, pool_page_header, ph_node, phtree_compare);
279 
280 static inline int
281 phtree_compare(const struct pool_page_header *a,
282     const struct pool_page_header *b)
283 {
284 	vaddr_t va = (vaddr_t)a->ph_page;
285 	vaddr_t vb = (vaddr_t)b->ph_page;
286 
287 	/* the compares in this order are important for the NFIND to work */
288 	if (vb < va)
289 		return (-1);
290 	if (vb > va)
291 		return (1);
292 
293 	return (0);
294 }
295 
296 RBT_GENERATE(phtree, pool_page_header, ph_node, phtree_compare);
297 
298 /*
299  * Return the pool page header based on page address.
300  */
301 static inline struct pool_page_header *
302 pr_find_pagehead(struct pool *pp, void *v)
303 {
304 	struct pool_page_header *ph, key;
305 
306 	if (POOL_INPGHDR(pp)) {
307 		caddr_t page;
308 
309 		page = (caddr_t)((vaddr_t)v & pp->pr_pgmask);
310 
311 		return ((struct pool_page_header *)(page + pp->pr_phoffset));
312 	}
313 
314 	key.ph_page = v;
315 	ph = RBT_NFIND(phtree, &pp->pr_phtree, &key);
316 	if (ph == NULL)
317 		panic("%s: %s: page header missing", __func__, pp->pr_wchan);
318 
319 	KASSERT(ph->ph_page <= (caddr_t)v);
320 	if (ph->ph_page + pp->pr_pgsize <= (caddr_t)v)
321 		panic("%s: %s: incorrect page", __func__, pp->pr_wchan);
322 
323 	return (ph);
324 }
325 
326 /*
327  * Initialize the given pool resource structure.
328  *
329  * We export this routine to allow other kernel parts to declare
330  * static pools that must be initialized before malloc() is available.
331  */
332 void
333 pool_init(struct pool *pp, size_t size, u_int align, int ipl, int flags,
334     const char *wchan, struct pool_allocator *palloc)
335 {
336 	int off = 0, space;
337 	unsigned int pgsize = PAGE_SIZE, items;
338 	size_t pa_pagesz;
339 #ifdef DIAGNOSTIC
340 	struct pool *iter;
341 #endif
342 
343 	if (align == 0)
344 		align = ALIGN(1);
345 
346 	if (size < sizeof(struct pool_item))
347 		size = sizeof(struct pool_item);
348 
349 	size = roundup(size, align);
350 
351 	while (size * 8 > pgsize)
352 		pgsize <<= 1;
353 
354 	if (palloc == NULL) {
355 		if (pgsize > PAGE_SIZE) {
356 			palloc = ISSET(flags, PR_WAITOK) ?
357 			    &pool_allocator_multi_ni : &pool_allocator_multi;
358 		} else
359 			palloc = &pool_allocator_single;
360 
361 		pa_pagesz = palloc->pa_pagesz;
362 	} else {
363 		size_t pgsizes;
364 
365 		pa_pagesz = palloc->pa_pagesz;
366 		if (pa_pagesz == 0)
367 			pa_pagesz = POOL_ALLOC_DEFAULT;
368 
369 		pgsizes = pa_pagesz & ~POOL_ALLOC_ALIGNED;
370 
371 		/* make sure the allocator can fit at least one item */
372 		if (size > pgsizes) {
373 			panic("%s: pool %s item size 0x%zx > "
374 			    "allocator %p sizes 0x%zx", __func__, wchan,
375 			    size, palloc, pgsizes);
376 		}
377 
378 		/* shrink pgsize until it fits into the range */
379 		while (!ISSET(pgsizes, pgsize))
380 			pgsize >>= 1;
381 	}
382 	KASSERT(ISSET(pa_pagesz, pgsize));
383 
384 	items = pgsize / size;
385 
386 	/*
387 	 * Decide whether to put the page header off page to avoid
388 	 * wasting too large a part of the page. Off-page page headers
389 	 * go into an RB tree, so we can match a returned item with
390 	 * its header based on the page address.
391 	 */
392 	if (ISSET(pa_pagesz, POOL_ALLOC_ALIGNED)) {
393 		if (pgsize - (size * items) >
394 		    sizeof(struct pool_page_header)) {
395 			off = pgsize - sizeof(struct pool_page_header);
396 		} else if (sizeof(struct pool_page_header) * 2 >= size) {
397 			off = pgsize - sizeof(struct pool_page_header);
398 			items = off / size;
399 		}
400 	}
401 
402 	KASSERT(items > 0);
403 
404 	/*
405 	 * Initialize the pool structure.
406 	 */
407 	memset(pp, 0, sizeof(*pp));
408 	if (ISSET(flags, PR_RWLOCK)) {
409 		KASSERT(flags & PR_WAITOK);
410 		pp->pr_lock_ops = &pool_lock_ops_rw;
411 	} else
412 		pp->pr_lock_ops = &pool_lock_ops_mtx;
413 	TAILQ_INIT(&pp->pr_emptypages);
414 	TAILQ_INIT(&pp->pr_fullpages);
415 	TAILQ_INIT(&pp->pr_partpages);
416 	pp->pr_curpage = NULL;
417 	pp->pr_npages = 0;
418 	pp->pr_minitems = 0;
419 	pp->pr_minpages = 0;
420 	pp->pr_maxpages = 8;
421 	pp->pr_size = size;
422 	pp->pr_pgsize = pgsize;
423 	pp->pr_pgmask = ~0UL ^ (pgsize - 1);
424 	pp->pr_phoffset = off;
425 	pp->pr_itemsperpage = items;
426 	pp->pr_wchan = wchan;
427 	pp->pr_alloc = palloc;
428 	pp->pr_nitems = 0;
429 	pp->pr_nout = 0;
430 	pp->pr_hardlimit = UINT_MAX;
431 	pp->pr_hardlimit_warning = NULL;
432 	pp->pr_hardlimit_ratecap.tv_sec = 0;
433 	pp->pr_hardlimit_ratecap.tv_usec = 0;
434 	pp->pr_hardlimit_warning_last.tv_sec = 0;
435 	pp->pr_hardlimit_warning_last.tv_usec = 0;
436 	RBT_INIT(phtree, &pp->pr_phtree);
437 
438 	/*
439 	 * Use the space between the chunks and the page header
440 	 * for cache coloring.
441 	 */
442 	space = POOL_INPGHDR(pp) ? pp->pr_phoffset : pp->pr_pgsize;
443 	space -= pp->pr_itemsperpage * pp->pr_size;
444 	pp->pr_align = align;
445 	pp->pr_maxcolors = (space / align) + 1;
446 
447 	pp->pr_nget = 0;
448 	pp->pr_nfail = 0;
449 	pp->pr_nput = 0;
450 	pp->pr_npagealloc = 0;
451 	pp->pr_npagefree = 0;
452 	pp->pr_hiwat = 0;
453 	pp->pr_nidle = 0;
454 
455 	pp->pr_ipl = ipl;
456 	pp->pr_flags = flags;
457 
458 	pl_init(pp, &pp->pr_lock);
459 	pl_init(pp, &pp->pr_requests_lock);
460 	TAILQ_INIT(&pp->pr_requests);
461 
462 	if (phpool.pr_size == 0) {
463 		pool_init(&phpool, sizeof(struct pool_page_header), 0,
464 		    IPL_HIGH, 0, "phpool", NULL);
465 
466 		/* make sure phpool wont "recurse" */
467 		KASSERT(POOL_INPGHDR(&phpool));
468 	}
469 
470 	/* pglistalloc/constraint parameters */
471 	pp->pr_crange = &kp_dirty;
472 
473 	/* Insert this into the list of all pools. */
474 	rw_enter_write(&pool_lock);
475 #ifdef DIAGNOSTIC
476 	SIMPLEQ_FOREACH(iter, &pool_head, pr_poollist) {
477 		if (iter == pp)
478 			panic("%s: pool %s already on list", __func__, wchan);
479 	}
480 #endif
481 
482 	pp->pr_serial = ++pool_serial;
483 	if (pool_serial == 0)
484 		panic("%s: too much uptime", __func__);
485 
486 	SIMPLEQ_INSERT_HEAD(&pool_head, pp, pr_poollist);
487 	pool_count++;
488 	rw_exit_write(&pool_lock);
489 }
490 
491 /*
492  * Decommission a pool resource.
493  */
494 void
495 pool_destroy(struct pool *pp)
496 {
497 	struct pool_page_header *ph;
498 	struct pool *prev, *iter;
499 
500 #ifdef MULTIPROCESSOR
501 	if (pp->pr_cache != NULL)
502 		pool_cache_destroy(pp);
503 #endif
504 
505 #ifdef DIAGNOSTIC
506 	if (pp->pr_nout != 0)
507 		panic("%s: pool busy: still out: %u", __func__, pp->pr_nout);
508 #endif
509 
510 	/* Remove from global pool list */
511 	rw_enter_write(&pool_lock);
512 	pool_count--;
513 	if (pp == SIMPLEQ_FIRST(&pool_head))
514 		SIMPLEQ_REMOVE_HEAD(&pool_head, pr_poollist);
515 	else {
516 		prev = SIMPLEQ_FIRST(&pool_head);
517 		SIMPLEQ_FOREACH(iter, &pool_head, pr_poollist) {
518 			if (iter == pp) {
519 				SIMPLEQ_REMOVE_AFTER(&pool_head, prev,
520 				    pr_poollist);
521 				break;
522 			}
523 			prev = iter;
524 		}
525 	}
526 	rw_exit_write(&pool_lock);
527 
528 	/* Remove all pages */
529 	while ((ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL) {
530 		pl_enter(pp, &pp->pr_lock);
531 		pool_p_remove(pp, ph);
532 		pl_leave(pp, &pp->pr_lock);
533 		pool_p_free(pp, ph);
534 	}
535 	KASSERT(TAILQ_EMPTY(&pp->pr_fullpages));
536 	KASSERT(TAILQ_EMPTY(&pp->pr_partpages));
537 }
538 
539 void
540 pool_request_init(struct pool_request *pr,
541     void (*handler)(struct pool *, void *, void *), void *cookie)
542 {
543 	pr->pr_handler = handler;
544 	pr->pr_cookie = cookie;
545 	pr->pr_item = NULL;
546 }
547 
548 void
549 pool_request(struct pool *pp, struct pool_request *pr)
550 {
551 	pl_enter(pp, &pp->pr_requests_lock);
552 	TAILQ_INSERT_TAIL(&pp->pr_requests, pr, pr_entry);
553 	pool_runqueue(pp, PR_NOWAIT);
554 	pl_leave(pp, &pp->pr_requests_lock);
555 }
556 
557 struct pool_get_memory {
558 	union pool_lock lock;
559 	void * volatile v;
560 };
561 
562 /*
563  * Grab an item from the pool.
564  */
565 void *
566 pool_get(struct pool *pp, int flags)
567 {
568 	void *v = NULL;
569 	int slowdown = 0;
570 
571 	KASSERT(flags & (PR_WAITOK | PR_NOWAIT));
572 	if (pp->pr_flags & PR_RWLOCK)
573 		KASSERT(flags & PR_WAITOK);
574 
575 #ifdef MULTIPROCESSOR
576 	if (pp->pr_cache != NULL) {
577 		v = pool_cache_get(pp);
578 		if (v != NULL)
579 			goto good;
580 	}
581 #endif
582 
583 	pl_enter(pp, &pp->pr_lock);
584 	if (pp->pr_nout >= pp->pr_hardlimit) {
585 		if (ISSET(flags, PR_NOWAIT|PR_LIMITFAIL))
586 			goto fail;
587 	} else if ((v = pool_do_get(pp, flags, &slowdown)) == NULL) {
588 		if (ISSET(flags, PR_NOWAIT))
589 			goto fail;
590 	}
591 	pl_leave(pp, &pp->pr_lock);
592 
593 	if ((slowdown || pool_debug == 2) && ISSET(flags, PR_WAITOK))
594 		yield();
595 
596 	if (v == NULL) {
597 		struct pool_get_memory mem = { .v = NULL };
598 		struct pool_request pr;
599 
600 #ifdef DIAGNOSTIC
601 		if (ISSET(flags, PR_WAITOK) && curproc == &proc0)
602 			panic("%s: cannot sleep for memory during boot",
603 			    __func__);
604 #endif
605 		pl_init(pp, &mem.lock);
606 		pool_request_init(&pr, pool_get_done, &mem);
607 		pool_request(pp, &pr);
608 
609 		pl_enter(pp, &mem.lock);
610 		while (mem.v == NULL)
611 			pl_sleep(pp, &mem, &mem.lock, PSWP, pp->pr_wchan, 0);
612 		pl_leave(pp, &mem.lock);
613 
614 		v = mem.v;
615 	}
616 
617 #ifdef MULTIPROCESSOR
618 good:
619 #endif
620 	if (ISSET(flags, PR_ZERO))
621 		memset(v, 0, pp->pr_size);
622 
623 	return (v);
624 
625 fail:
626 	pp->pr_nfail++;
627 	pl_leave(pp, &pp->pr_lock);
628 	return (NULL);
629 }
630 
631 void
632 pool_get_done(struct pool *pp, void *xmem, void *v)
633 {
634 	struct pool_get_memory *mem = xmem;
635 
636 	pl_enter(pp, &mem->lock);
637 	mem->v = v;
638 	pl_leave(pp, &mem->lock);
639 
640 	wakeup_one(mem);
641 }
642 
643 void
644 pool_runqueue(struct pool *pp, int flags)
645 {
646 	struct pool_requests prl = TAILQ_HEAD_INITIALIZER(prl);
647 	struct pool_request *pr;
648 
649 	pl_assert_unlocked(pp, &pp->pr_lock);
650 	pl_assert_locked(pp, &pp->pr_requests_lock);
651 
652 	if (pp->pr_requesting++)
653 		return;
654 
655 	do {
656 		pp->pr_requesting = 1;
657 
658 		/* no TAILQ_JOIN? :( */
659 		while ((pr = TAILQ_FIRST(&pp->pr_requests)) != NULL) {
660 			TAILQ_REMOVE(&pp->pr_requests, pr, pr_entry);
661 			TAILQ_INSERT_TAIL(&prl, pr, pr_entry);
662 		}
663 		if (TAILQ_EMPTY(&prl))
664 			continue;
665 
666 		pl_leave(pp, &pp->pr_requests_lock);
667 
668 		pl_enter(pp, &pp->pr_lock);
669 		pr = TAILQ_FIRST(&prl);
670 		while (pr != NULL) {
671 			int slowdown = 0;
672 
673 			if (pp->pr_nout >= pp->pr_hardlimit)
674 				break;
675 
676 			pr->pr_item = pool_do_get(pp, flags, &slowdown);
677 			if (pr->pr_item == NULL) /* || slowdown ? */
678 				break;
679 
680 			pr = TAILQ_NEXT(pr, pr_entry);
681 		}
682 		pl_leave(pp, &pp->pr_lock);
683 
684 		while ((pr = TAILQ_FIRST(&prl)) != NULL &&
685 		    pr->pr_item != NULL) {
686 			TAILQ_REMOVE(&prl, pr, pr_entry);
687 			(*pr->pr_handler)(pp, pr->pr_cookie, pr->pr_item);
688 		}
689 
690 		pl_enter(pp, &pp->pr_requests_lock);
691 	} while (--pp->pr_requesting);
692 
693 	/* no TAILQ_JOIN :( */
694 	while ((pr = TAILQ_FIRST(&prl)) != NULL) {
695 		TAILQ_REMOVE(&prl, pr, pr_entry);
696 		TAILQ_INSERT_TAIL(&pp->pr_requests, pr, pr_entry);
697 	}
698 }
699 
700 void *
701 pool_do_get(struct pool *pp, int flags, int *slowdown)
702 {
703 	struct pool_item *pi;
704 	struct pool_page_header *ph;
705 
706 	pl_assert_locked(pp, &pp->pr_lock);
707 
708 	splassert(pp->pr_ipl);
709 
710 	/*
711 	 * Account for this item now to avoid races if we need to give up
712 	 * pr_lock to allocate a page.
713 	 */
714 	pp->pr_nout++;
715 
716 	if (pp->pr_curpage == NULL) {
717 		pl_leave(pp, &pp->pr_lock);
718 		ph = pool_p_alloc(pp, flags, slowdown);
719 		pl_enter(pp, &pp->pr_lock);
720 
721 		if (ph == NULL) {
722 			pp->pr_nout--;
723 			return (NULL);
724 		}
725 
726 		pool_p_insert(pp, ph);
727 	}
728 
729 	ph = pp->pr_curpage;
730 	pi = XSIMPLEQ_FIRST(&ph->ph_items);
731 	if (__predict_false(pi == NULL))
732 		panic("%s: %s: page empty", __func__, pp->pr_wchan);
733 
734 	if (__predict_false(pi->pi_magic != POOL_IMAGIC(ph, pi))) {
735 		panic("%s: %s free list modified: "
736 		    "page %p; item addr %p; offset 0x%x=0x%lx != 0x%lx",
737 		    __func__, pp->pr_wchan, ph->ph_page, pi,
738 		    0, pi->pi_magic, POOL_IMAGIC(ph, pi));
739 	}
740 
741 	XSIMPLEQ_REMOVE_HEAD(&ph->ph_items, pi_list);
742 
743 #ifdef DIAGNOSTIC
744 	if (pool_debug && POOL_PHPOISON(ph)) {
745 		size_t pidx;
746 		uint32_t pval;
747 		if (poison_check(pi + 1, pp->pr_size - sizeof(*pi),
748 		    &pidx, &pval)) {
749 			int *ip = (int *)(pi + 1);
750 			panic("%s: %s free list modified: "
751 			    "page %p; item addr %p; offset 0x%zx=0x%x",
752 			    __func__, pp->pr_wchan, ph->ph_page, pi,
753 			    (pidx * sizeof(int)) + sizeof(*pi), ip[pidx]);
754 		}
755 	}
756 #endif /* DIAGNOSTIC */
757 
758 	if (ph->ph_nmissing++ == 0) {
759 		/*
760 		 * This page was previously empty.  Move it to the list of
761 		 * partially-full pages.  This page is already curpage.
762 		 */
763 		TAILQ_REMOVE(&pp->pr_emptypages, ph, ph_entry);
764 		TAILQ_INSERT_TAIL(&pp->pr_partpages, ph, ph_entry);
765 
766 		pp->pr_nidle--;
767 	}
768 
769 	if (ph->ph_nmissing == pp->pr_itemsperpage) {
770 		/*
771 		 * This page is now full.  Move it to the full list
772 		 * and select a new current page.
773 		 */
774 		TAILQ_REMOVE(&pp->pr_partpages, ph, ph_entry);
775 		TAILQ_INSERT_TAIL(&pp->pr_fullpages, ph, ph_entry);
776 		pool_update_curpage(pp);
777 	}
778 
779 	pp->pr_nget++;
780 
781 	return (pi);
782 }
783 
784 /*
785  * Return resource to the pool.
786  */
787 void
788 pool_put(struct pool *pp, void *v)
789 {
790 	struct pool_page_header *ph, *freeph = NULL;
791 
792 #ifdef DIAGNOSTIC
793 	if (v == NULL)
794 		panic("%s: NULL item", __func__);
795 #endif
796 
797 #ifdef MULTIPROCESSOR
798 	if (pp->pr_cache != NULL && TAILQ_EMPTY(&pp->pr_requests)) {
799 		pool_cache_put(pp, v);
800 		return;
801 	}
802 #endif
803 
804 	pl_enter(pp, &pp->pr_lock);
805 
806 	pool_do_put(pp, v);
807 
808 	pp->pr_nout--;
809 	pp->pr_nput++;
810 
811 	/* is it time to free a page? */
812 	if (pp->pr_nidle > pp->pr_maxpages &&
813 	    (ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL &&
814 	    (ticks - ph->ph_tick) > (hz * pool_wait_free)) {
815 		freeph = ph;
816 		pool_p_remove(pp, freeph);
817 	}
818 
819 	pl_leave(pp, &pp->pr_lock);
820 
821 	if (freeph != NULL)
822 		pool_p_free(pp, freeph);
823 
824 	if (!TAILQ_EMPTY(&pp->pr_requests)) {
825 		pl_enter(pp, &pp->pr_requests_lock);
826 		pool_runqueue(pp, PR_NOWAIT);
827 		pl_leave(pp, &pp->pr_requests_lock);
828 	}
829 }
830 
831 void
832 pool_do_put(struct pool *pp, void *v)
833 {
834 	struct pool_item *pi = v;
835 	struct pool_page_header *ph;
836 
837 	splassert(pp->pr_ipl);
838 
839 	ph = pr_find_pagehead(pp, v);
840 
841 #ifdef DIAGNOSTIC
842 	if (pool_debug) {
843 		struct pool_item *qi;
844 		XSIMPLEQ_FOREACH(qi, &ph->ph_items, pi_list) {
845 			if (pi == qi) {
846 				panic("%s: %s: double pool_put: %p", __func__,
847 				    pp->pr_wchan, pi);
848 			}
849 		}
850 	}
851 #endif /* DIAGNOSTIC */
852 
853 	pi->pi_magic = POOL_IMAGIC(ph, pi);
854 	XSIMPLEQ_INSERT_HEAD(&ph->ph_items, pi, pi_list);
855 #ifdef DIAGNOSTIC
856 	if (POOL_PHPOISON(ph))
857 		poison_mem(pi + 1, pp->pr_size - sizeof(*pi));
858 #endif /* DIAGNOSTIC */
859 
860 	if (ph->ph_nmissing-- == pp->pr_itemsperpage) {
861 		/*
862 		 * The page was previously completely full, move it to the
863 		 * partially-full list.
864 		 */
865 		TAILQ_REMOVE(&pp->pr_fullpages, ph, ph_entry);
866 		TAILQ_INSERT_TAIL(&pp->pr_partpages, ph, ph_entry);
867 	}
868 
869 	if (ph->ph_nmissing == 0) {
870 		/*
871 		 * The page is now empty, so move it to the empty page list.
872 		 */
873 		pp->pr_nidle++;
874 
875 		ph->ph_tick = ticks;
876 		TAILQ_REMOVE(&pp->pr_partpages, ph, ph_entry);
877 		TAILQ_INSERT_TAIL(&pp->pr_emptypages, ph, ph_entry);
878 		pool_update_curpage(pp);
879 	}
880 }
881 
882 /*
883  * Add N items to the pool.
884  */
885 int
886 pool_prime(struct pool *pp, int n)
887 {
888 	struct pool_pagelist pl = TAILQ_HEAD_INITIALIZER(pl);
889 	struct pool_page_header *ph;
890 	int newpages;
891 
892 	newpages = roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage;
893 
894 	while (newpages-- > 0) {
895 		int slowdown = 0;
896 
897 		ph = pool_p_alloc(pp, PR_NOWAIT, &slowdown);
898 		if (ph == NULL) /* or slowdown? */
899 			break;
900 
901 		TAILQ_INSERT_TAIL(&pl, ph, ph_entry);
902 	}
903 
904 	pl_enter(pp, &pp->pr_lock);
905 	while ((ph = TAILQ_FIRST(&pl)) != NULL) {
906 		TAILQ_REMOVE(&pl, ph, ph_entry);
907 		pool_p_insert(pp, ph);
908 	}
909 	pl_leave(pp, &pp->pr_lock);
910 
911 	return (0);
912 }
913 
914 struct pool_page_header *
915 pool_p_alloc(struct pool *pp, int flags, int *slowdown)
916 {
917 	struct pool_page_header *ph;
918 	struct pool_item *pi;
919 	caddr_t addr;
920 	int n;
921 
922 	pl_assert_unlocked(pp, &pp->pr_lock);
923 	KASSERT(pp->pr_size >= sizeof(*pi));
924 
925 	addr = pool_allocator_alloc(pp, flags, slowdown);
926 	if (addr == NULL)
927 		return (NULL);
928 
929 	if (POOL_INPGHDR(pp))
930 		ph = (struct pool_page_header *)(addr + pp->pr_phoffset);
931 	else {
932 		ph = pool_get(&phpool, flags);
933 		if (ph == NULL) {
934 			pool_allocator_free(pp, addr);
935 			return (NULL);
936 		}
937 	}
938 
939 	XSIMPLEQ_INIT(&ph->ph_items);
940 	ph->ph_page = addr;
941 	addr += pp->pr_align * (pp->pr_npagealloc % pp->pr_maxcolors);
942 	ph->ph_colored = addr;
943 	ph->ph_nmissing = 0;
944 	arc4random_buf(&ph->ph_magic, sizeof(ph->ph_magic));
945 #ifdef DIAGNOSTIC
946 	/* use a bit in ph_magic to record if we poison page items */
947 	if (pool_debug)
948 		SET(ph->ph_magic, POOL_MAGICBIT);
949 	else
950 		CLR(ph->ph_magic, POOL_MAGICBIT);
951 #endif /* DIAGNOSTIC */
952 
953 	n = pp->pr_itemsperpage;
954 	while (n--) {
955 		pi = (struct pool_item *)addr;
956 		pi->pi_magic = POOL_IMAGIC(ph, pi);
957 		XSIMPLEQ_INSERT_TAIL(&ph->ph_items, pi, pi_list);
958 
959 #ifdef DIAGNOSTIC
960 		if (POOL_PHPOISON(ph))
961 			poison_mem(pi + 1, pp->pr_size - sizeof(*pi));
962 #endif /* DIAGNOSTIC */
963 
964 		addr += pp->pr_size;
965 	}
966 
967 	return (ph);
968 }
969 
970 void
971 pool_p_free(struct pool *pp, struct pool_page_header *ph)
972 {
973 	struct pool_item *pi;
974 
975 	pl_assert_unlocked(pp, &pp->pr_lock);
976 	KASSERT(ph->ph_nmissing == 0);
977 
978 	XSIMPLEQ_FOREACH(pi, &ph->ph_items, pi_list) {
979 		if (__predict_false(pi->pi_magic != POOL_IMAGIC(ph, pi))) {
980 			panic("%s: %s free list modified: "
981 			    "page %p; item addr %p; offset 0x%x=0x%lx",
982 			    __func__, pp->pr_wchan, ph->ph_page, pi,
983 			    0, pi->pi_magic);
984 		}
985 
986 #ifdef DIAGNOSTIC
987 		if (POOL_PHPOISON(ph)) {
988 			size_t pidx;
989 			uint32_t pval;
990 			if (poison_check(pi + 1, pp->pr_size - sizeof(*pi),
991 			    &pidx, &pval)) {
992 				int *ip = (int *)(pi + 1);
993 				panic("%s: %s free list modified: "
994 				    "page %p; item addr %p; offset 0x%zx=0x%x",
995 				    __func__, pp->pr_wchan, ph->ph_page, pi,
996 				    pidx * sizeof(int), ip[pidx]);
997 			}
998 		}
999 #endif
1000 	}
1001 
1002 	pool_allocator_free(pp, ph->ph_page);
1003 
1004 	if (!POOL_INPGHDR(pp))
1005 		pool_put(&phpool, ph);
1006 }
1007 
1008 void
1009 pool_p_insert(struct pool *pp, struct pool_page_header *ph)
1010 {
1011 	pl_assert_locked(pp, &pp->pr_lock);
1012 
1013 	/* If the pool was depleted, point at the new page */
1014 	if (pp->pr_curpage == NULL)
1015 		pp->pr_curpage = ph;
1016 
1017 	TAILQ_INSERT_TAIL(&pp->pr_emptypages, ph, ph_entry);
1018 	if (!POOL_INPGHDR(pp))
1019 		RBT_INSERT(phtree, &pp->pr_phtree, ph);
1020 
1021 	pp->pr_nitems += pp->pr_itemsperpage;
1022 	pp->pr_nidle++;
1023 
1024 	pp->pr_npagealloc++;
1025 	if (++pp->pr_npages > pp->pr_hiwat)
1026 		pp->pr_hiwat = pp->pr_npages;
1027 }
1028 
1029 void
1030 pool_p_remove(struct pool *pp, struct pool_page_header *ph)
1031 {
1032 	pl_assert_locked(pp, &pp->pr_lock);
1033 
1034 	pp->pr_npagefree++;
1035 	pp->pr_npages--;
1036 	pp->pr_nidle--;
1037 	pp->pr_nitems -= pp->pr_itemsperpage;
1038 
1039 	if (!POOL_INPGHDR(pp))
1040 		RBT_REMOVE(phtree, &pp->pr_phtree, ph);
1041 	TAILQ_REMOVE(&pp->pr_emptypages, ph, ph_entry);
1042 
1043 	pool_update_curpage(pp);
1044 }
1045 
1046 void
1047 pool_update_curpage(struct pool *pp)
1048 {
1049 	pp->pr_curpage = TAILQ_LAST(&pp->pr_partpages, pool_pagelist);
1050 	if (pp->pr_curpage == NULL) {
1051 		pp->pr_curpage = TAILQ_LAST(&pp->pr_emptypages, pool_pagelist);
1052 	}
1053 }
1054 
1055 void
1056 pool_setlowat(struct pool *pp, int n)
1057 {
1058 	int prime = 0;
1059 
1060 	pl_enter(pp, &pp->pr_lock);
1061 	pp->pr_minitems = n;
1062 	pp->pr_minpages = (n == 0)
1063 		? 0
1064 		: roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage;
1065 
1066 	if (pp->pr_nitems < n)
1067 		prime = n - pp->pr_nitems;
1068 	pl_leave(pp, &pp->pr_lock);
1069 
1070 	if (prime > 0)
1071 		pool_prime(pp, prime);
1072 }
1073 
1074 void
1075 pool_sethiwat(struct pool *pp, int n)
1076 {
1077 	pp->pr_maxpages = (n == 0)
1078 		? 0
1079 		: roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage;
1080 }
1081 
1082 int
1083 pool_sethardlimit(struct pool *pp, u_int n, const char *warnmsg, int ratecap)
1084 {
1085 	int error = 0;
1086 
1087 	if (n < pp->pr_nout) {
1088 		error = EINVAL;
1089 		goto done;
1090 	}
1091 
1092 	pp->pr_hardlimit = n;
1093 	pp->pr_hardlimit_warning = warnmsg;
1094 	pp->pr_hardlimit_ratecap.tv_sec = ratecap;
1095 	pp->pr_hardlimit_warning_last.tv_sec = 0;
1096 	pp->pr_hardlimit_warning_last.tv_usec = 0;
1097 
1098 done:
1099 	return (error);
1100 }
1101 
1102 void
1103 pool_set_constraints(struct pool *pp, const struct kmem_pa_mode *mode)
1104 {
1105 	pp->pr_crange = mode;
1106 }
1107 
1108 /*
1109  * Release all complete pages that have not been used recently.
1110  *
1111  * Returns non-zero if any pages have been reclaimed.
1112  */
1113 int
1114 pool_reclaim(struct pool *pp)
1115 {
1116 	struct pool_page_header *ph, *phnext;
1117 	struct pool_pagelist pl = TAILQ_HEAD_INITIALIZER(pl);
1118 
1119 	pl_enter(pp, &pp->pr_lock);
1120 	for (ph = TAILQ_FIRST(&pp->pr_emptypages); ph != NULL; ph = phnext) {
1121 		phnext = TAILQ_NEXT(ph, ph_entry);
1122 
1123 		/* Check our minimum page claim */
1124 		if (pp->pr_npages <= pp->pr_minpages)
1125 			break;
1126 
1127 		/*
1128 		 * If freeing this page would put us below
1129 		 * the low water mark, stop now.
1130 		 */
1131 		if ((pp->pr_nitems - pp->pr_itemsperpage) <
1132 		    pp->pr_minitems)
1133 			break;
1134 
1135 		pool_p_remove(pp, ph);
1136 		TAILQ_INSERT_TAIL(&pl, ph, ph_entry);
1137 	}
1138 	pl_leave(pp, &pp->pr_lock);
1139 
1140 	if (TAILQ_EMPTY(&pl))
1141 		return (0);
1142 
1143 	while ((ph = TAILQ_FIRST(&pl)) != NULL) {
1144 		TAILQ_REMOVE(&pl, ph, ph_entry);
1145 		pool_p_free(pp, ph);
1146 	}
1147 
1148 	return (1);
1149 }
1150 
1151 /*
1152  * Release all complete pages that have not been used recently
1153  * from all pools.
1154  */
1155 void
1156 pool_reclaim_all(void)
1157 {
1158 	struct pool	*pp;
1159 
1160 	rw_enter_read(&pool_lock);
1161 	SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist)
1162 		pool_reclaim(pp);
1163 	rw_exit_read(&pool_lock);
1164 }
1165 
1166 #ifdef DDB
1167 #include <machine/db_machdep.h>
1168 #include <ddb/db_output.h>
1169 
1170 /*
1171  * Diagnostic helpers.
1172  */
1173 void
1174 pool_printit(struct pool *pp, const char *modif,
1175     int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2))))
1176 {
1177 	pool_print1(pp, modif, pr);
1178 }
1179 
1180 void
1181 pool_print_pagelist(struct pool_pagelist *pl,
1182     int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2))))
1183 {
1184 	struct pool_page_header *ph;
1185 	struct pool_item *pi;
1186 
1187 	TAILQ_FOREACH(ph, pl, ph_entry) {
1188 		(*pr)("\t\tpage %p, color %p, nmissing %d\n",
1189 		    ph->ph_page, ph->ph_colored, ph->ph_nmissing);
1190 		XSIMPLEQ_FOREACH(pi, &ph->ph_items, pi_list) {
1191 			if (pi->pi_magic != POOL_IMAGIC(ph, pi)) {
1192 				(*pr)("\t\t\titem %p, magic 0x%lx\n",
1193 				    pi, pi->pi_magic);
1194 			}
1195 		}
1196 	}
1197 }
1198 
1199 void
1200 pool_print1(struct pool *pp, const char *modif,
1201     int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2))))
1202 {
1203 	struct pool_page_header *ph;
1204 	int print_pagelist = 0;
1205 	char c;
1206 
1207 	while ((c = *modif++) != '\0') {
1208 		if (c == 'p')
1209 			print_pagelist = 1;
1210 		modif++;
1211 	}
1212 
1213 	(*pr)("POOL %s: size %u maxcolors %u\n", pp->pr_wchan, pp->pr_size,
1214 	    pp->pr_maxcolors);
1215 	(*pr)("\talloc %p\n", pp->pr_alloc);
1216 	(*pr)("\tminitems %u, minpages %u, maxpages %u, npages %u\n",
1217 	    pp->pr_minitems, pp->pr_minpages, pp->pr_maxpages, pp->pr_npages);
1218 	(*pr)("\titemsperpage %u, nitems %u, nout %u, hardlimit %u\n",
1219 	    pp->pr_itemsperpage, pp->pr_nitems, pp->pr_nout, pp->pr_hardlimit);
1220 
1221 	(*pr)("\n\tnget %lu, nfail %lu, nput %lu\n",
1222 	    pp->pr_nget, pp->pr_nfail, pp->pr_nput);
1223 	(*pr)("\tnpagealloc %lu, npagefree %lu, hiwat %u, nidle %lu\n",
1224 	    pp->pr_npagealloc, pp->pr_npagefree, pp->pr_hiwat, pp->pr_nidle);
1225 
1226 	if (print_pagelist == 0)
1227 		return;
1228 
1229 	if ((ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL)
1230 		(*pr)("\n\tempty page list:\n");
1231 	pool_print_pagelist(&pp->pr_emptypages, pr);
1232 	if ((ph = TAILQ_FIRST(&pp->pr_fullpages)) != NULL)
1233 		(*pr)("\n\tfull page list:\n");
1234 	pool_print_pagelist(&pp->pr_fullpages, pr);
1235 	if ((ph = TAILQ_FIRST(&pp->pr_partpages)) != NULL)
1236 		(*pr)("\n\tpartial-page list:\n");
1237 	pool_print_pagelist(&pp->pr_partpages, pr);
1238 
1239 	if (pp->pr_curpage == NULL)
1240 		(*pr)("\tno current page\n");
1241 	else
1242 		(*pr)("\tcurpage %p\n", pp->pr_curpage->ph_page);
1243 }
1244 
1245 void
1246 db_show_all_pools(db_expr_t expr, int haddr, db_expr_t count, char *modif)
1247 {
1248 	struct pool *pp;
1249 	char maxp[16];
1250 	int ovflw;
1251 	char mode;
1252 
1253 	mode = modif[0];
1254 	if (mode != '\0' && mode != 'a') {
1255 		db_printf("usage: show all pools [/a]\n");
1256 		return;
1257 	}
1258 
1259 	if (mode == '\0')
1260 		db_printf("%-10s%4s%9s%5s%9s%6s%6s%6s%6s%6s%6s%5s\n",
1261 		    "Name",
1262 		    "Size",
1263 		    "Requests",
1264 		    "Fail",
1265 		    "Releases",
1266 		    "Pgreq",
1267 		    "Pgrel",
1268 		    "Npage",
1269 		    "Hiwat",
1270 		    "Minpg",
1271 		    "Maxpg",
1272 		    "Idle");
1273 	else
1274 		db_printf("%-12s %18s %18s\n",
1275 		    "Name", "Address", "Allocator");
1276 
1277 	SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist) {
1278 		if (mode == 'a') {
1279 			db_printf("%-12s %18p %18p\n", pp->pr_wchan, pp,
1280 			    pp->pr_alloc);
1281 			continue;
1282 		}
1283 
1284 		if (!pp->pr_nget)
1285 			continue;
1286 
1287 		if (pp->pr_maxpages == UINT_MAX)
1288 			snprintf(maxp, sizeof maxp, "inf");
1289 		else
1290 			snprintf(maxp, sizeof maxp, "%u", pp->pr_maxpages);
1291 
1292 #define PRWORD(ovflw, fmt, width, fixed, val) do {	\
1293 	(ovflw) += db_printf((fmt),			\
1294 	    (width) - (fixed) - (ovflw) > 0 ?		\
1295 	    (width) - (fixed) - (ovflw) : 0,		\
1296 	    (val)) - (width);				\
1297 	if ((ovflw) < 0)				\
1298 		(ovflw) = 0;				\
1299 } while (/* CONSTCOND */0)
1300 
1301 		ovflw = 0;
1302 		PRWORD(ovflw, "%-*s", 10, 0, pp->pr_wchan);
1303 		PRWORD(ovflw, " %*u", 4, 1, pp->pr_size);
1304 		PRWORD(ovflw, " %*lu", 9, 1, pp->pr_nget);
1305 		PRWORD(ovflw, " %*lu", 5, 1, pp->pr_nfail);
1306 		PRWORD(ovflw, " %*lu", 9, 1, pp->pr_nput);
1307 		PRWORD(ovflw, " %*lu", 6, 1, pp->pr_npagealloc);
1308 		PRWORD(ovflw, " %*lu", 6, 1, pp->pr_npagefree);
1309 		PRWORD(ovflw, " %*d", 6, 1, pp->pr_npages);
1310 		PRWORD(ovflw, " %*d", 6, 1, pp->pr_hiwat);
1311 		PRWORD(ovflw, " %*d", 6, 1, pp->pr_minpages);
1312 		PRWORD(ovflw, " %*s", 6, 1, maxp);
1313 		PRWORD(ovflw, " %*lu\n", 5, 1, pp->pr_nidle);
1314 
1315 		pool_chk(pp);
1316 	}
1317 }
1318 #endif /* DDB */
1319 
1320 #if defined(POOL_DEBUG) || defined(DDB)
1321 int
1322 pool_chk_page(struct pool *pp, struct pool_page_header *ph, int expected)
1323 {
1324 	struct pool_item *pi;
1325 	caddr_t page;
1326 	int n;
1327 	const char *label = pp->pr_wchan;
1328 
1329 	page = (caddr_t)((u_long)ph & pp->pr_pgmask);
1330 	if (page != ph->ph_page && POOL_INPGHDR(pp)) {
1331 		printf("%s: ", label);
1332 		printf("pool(%p:%s): page inconsistency: page %p; "
1333 		    "at page head addr %p (p %p)\n",
1334 		    pp, pp->pr_wchan, ph->ph_page, ph, page);
1335 		return 1;
1336 	}
1337 
1338 	for (pi = XSIMPLEQ_FIRST(&ph->ph_items), n = 0;
1339 	     pi != NULL;
1340 	     pi = XSIMPLEQ_NEXT(&ph->ph_items, pi, pi_list), n++) {
1341 		if ((caddr_t)pi < ph->ph_page ||
1342 		    (caddr_t)pi >= ph->ph_page + pp->pr_pgsize) {
1343 			printf("%s: ", label);
1344 			printf("pool(%p:%s): page inconsistency: page %p;"
1345 			    " item ordinal %d; addr %p\n", pp,
1346 			    pp->pr_wchan, ph->ph_page, n, pi);
1347 			return (1);
1348 		}
1349 
1350 		if (pi->pi_magic != POOL_IMAGIC(ph, pi)) {
1351 			printf("%s: ", label);
1352 			printf("pool(%p:%s): free list modified: "
1353 			    "page %p; item ordinal %d; addr %p "
1354 			    "(p %p); offset 0x%x=0x%lx\n",
1355 			    pp, pp->pr_wchan, ph->ph_page, n, pi, page,
1356 			    0, pi->pi_magic);
1357 		}
1358 
1359 #ifdef DIAGNOSTIC
1360 		if (POOL_PHPOISON(ph)) {
1361 			size_t pidx;
1362 			uint32_t pval;
1363 			if (poison_check(pi + 1, pp->pr_size - sizeof(*pi),
1364 			    &pidx, &pval)) {
1365 				int *ip = (int *)(pi + 1);
1366 				printf("pool(%s): free list modified: "
1367 				    "page %p; item ordinal %d; addr %p "
1368 				    "(p %p); offset 0x%zx=0x%x\n",
1369 				    pp->pr_wchan, ph->ph_page, n, pi,
1370 				    page, pidx * sizeof(int), ip[pidx]);
1371 			}
1372 		}
1373 #endif /* DIAGNOSTIC */
1374 	}
1375 	if (n + ph->ph_nmissing != pp->pr_itemsperpage) {
1376 		printf("pool(%p:%s): page inconsistency: page %p;"
1377 		    " %d on list, %d missing, %d items per page\n", pp,
1378 		    pp->pr_wchan, ph->ph_page, n, ph->ph_nmissing,
1379 		    pp->pr_itemsperpage);
1380 		return 1;
1381 	}
1382 	if (expected >= 0 && n != expected) {
1383 		printf("pool(%p:%s): page inconsistency: page %p;"
1384 		    " %d on list, %d missing, %d expected\n", pp,
1385 		    pp->pr_wchan, ph->ph_page, n, ph->ph_nmissing,
1386 		    expected);
1387 		return 1;
1388 	}
1389 	return 0;
1390 }
1391 
1392 int
1393 pool_chk(struct pool *pp)
1394 {
1395 	struct pool_page_header *ph;
1396 	int r = 0;
1397 
1398 	TAILQ_FOREACH(ph, &pp->pr_emptypages, ph_entry)
1399 		r += pool_chk_page(pp, ph, pp->pr_itemsperpage);
1400 	TAILQ_FOREACH(ph, &pp->pr_fullpages, ph_entry)
1401 		r += pool_chk_page(pp, ph, 0);
1402 	TAILQ_FOREACH(ph, &pp->pr_partpages, ph_entry)
1403 		r += pool_chk_page(pp, ph, -1);
1404 
1405 	return (r);
1406 }
1407 #endif /* defined(POOL_DEBUG) || defined(DDB) */
1408 
1409 #ifdef DDB
1410 void
1411 pool_walk(struct pool *pp, int full,
1412     int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2))),
1413     void (*func)(void *, int, int (*)(const char *, ...)
1414 	    __attribute__((__format__(__kprintf__,1,2)))))
1415 {
1416 	struct pool_page_header *ph;
1417 	struct pool_item *pi;
1418 	caddr_t cp;
1419 	int n;
1420 
1421 	TAILQ_FOREACH(ph, &pp->pr_fullpages, ph_entry) {
1422 		cp = ph->ph_colored;
1423 		n = ph->ph_nmissing;
1424 
1425 		while (n--) {
1426 			func(cp, full, pr);
1427 			cp += pp->pr_size;
1428 		}
1429 	}
1430 
1431 	TAILQ_FOREACH(ph, &pp->pr_partpages, ph_entry) {
1432 		cp = ph->ph_colored;
1433 		n = ph->ph_nmissing;
1434 
1435 		do {
1436 			XSIMPLEQ_FOREACH(pi, &ph->ph_items, pi_list) {
1437 				if (cp == (caddr_t)pi)
1438 					break;
1439 			}
1440 			if (cp != (caddr_t)pi) {
1441 				func(cp, full, pr);
1442 				n--;
1443 			}
1444 
1445 			cp += pp->pr_size;
1446 		} while (n > 0);
1447 	}
1448 }
1449 #endif
1450 
1451 /*
1452  * We have three different sysctls.
1453  * kern.pool.npools - the number of pools.
1454  * kern.pool.pool.<pool#> - the pool struct for the pool#.
1455  * kern.pool.name.<pool#> - the name for pool#.
1456  */
1457 int
1458 sysctl_dopool(int *name, u_int namelen, char *oldp, size_t *oldlenp)
1459 {
1460 	struct kinfo_pool pi;
1461 	struct pool *pp;
1462 	int rv = ENOENT;
1463 
1464 	switch (name[0]) {
1465 	case KERN_POOL_NPOOLS:
1466 		if (namelen != 1)
1467 			return (ENOTDIR);
1468 		return (sysctl_rdint(oldp, oldlenp, NULL, pool_count));
1469 
1470 	case KERN_POOL_NAME:
1471 	case KERN_POOL_POOL:
1472 	case KERN_POOL_CACHE:
1473 	case KERN_POOL_CACHE_CPUS:
1474 		break;
1475 	default:
1476 		return (EOPNOTSUPP);
1477 	}
1478 
1479 	if (namelen != 2)
1480 		return (ENOTDIR);
1481 
1482 	rw_enter_read(&pool_lock);
1483 
1484 	SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist) {
1485 		if (name[1] == pp->pr_serial)
1486 			break;
1487 	}
1488 
1489 	if (pp == NULL)
1490 		goto done;
1491 
1492 	switch (name[0]) {
1493 	case KERN_POOL_NAME:
1494 		rv = sysctl_rdstring(oldp, oldlenp, NULL, pp->pr_wchan);
1495 		break;
1496 	case KERN_POOL_POOL:
1497 		memset(&pi, 0, sizeof(pi));
1498 
1499 		pl_enter(pp, &pp->pr_lock);
1500 		pi.pr_size = pp->pr_size;
1501 		pi.pr_pgsize = pp->pr_pgsize;
1502 		pi.pr_itemsperpage = pp->pr_itemsperpage;
1503 		pi.pr_npages = pp->pr_npages;
1504 		pi.pr_minpages = pp->pr_minpages;
1505 		pi.pr_maxpages = pp->pr_maxpages;
1506 		pi.pr_hardlimit = pp->pr_hardlimit;
1507 		pi.pr_nout = pp->pr_nout;
1508 		pi.pr_nitems = pp->pr_nitems;
1509 		pi.pr_nget = pp->pr_nget;
1510 		pi.pr_nput = pp->pr_nput;
1511 		pi.pr_nfail = pp->pr_nfail;
1512 		pi.pr_npagealloc = pp->pr_npagealloc;
1513 		pi.pr_npagefree = pp->pr_npagefree;
1514 		pi.pr_hiwat = pp->pr_hiwat;
1515 		pi.pr_nidle = pp->pr_nidle;
1516 		pl_leave(pp, &pp->pr_lock);
1517 
1518 		pool_cache_pool_info(pp, &pi);
1519 
1520 		rv = sysctl_rdstruct(oldp, oldlenp, NULL, &pi, sizeof(pi));
1521 		break;
1522 
1523 	case KERN_POOL_CACHE:
1524 		rv = pool_cache_info(pp, oldp, oldlenp);
1525 		break;
1526 
1527 	case KERN_POOL_CACHE_CPUS:
1528 		rv = pool_cache_cpus_info(pp, oldp, oldlenp);
1529 		break;
1530 	}
1531 
1532 done:
1533 	rw_exit_read(&pool_lock);
1534 
1535 	return (rv);
1536 }
1537 
1538 void
1539 pool_gc_sched(void *null)
1540 {
1541 	task_add(systqmp, &pool_gc_task);
1542 }
1543 
1544 void
1545 pool_gc_pages(void *null)
1546 {
1547 	struct pool *pp;
1548 	struct pool_page_header *ph, *freeph;
1549 	int s;
1550 
1551 	rw_enter_read(&pool_lock);
1552 	s = splvm(); /* XXX go to splvm until all pools _setipl properly */
1553 	SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist) {
1554 #ifdef MULTIPROCESSOR
1555 		if (pp->pr_cache != NULL)
1556 			pool_cache_gc(pp);
1557 #endif
1558 
1559 		if (pp->pr_nidle <= pp->pr_minpages || /* guess */
1560 		    !pl_enter_try(pp, &pp->pr_lock)) /* try */
1561 			continue;
1562 
1563 		/* is it time to free a page? */
1564 		if (pp->pr_nidle > pp->pr_minpages &&
1565 		    (ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL &&
1566 		    (ticks - ph->ph_tick) > (hz * pool_wait_gc)) {
1567 			freeph = ph;
1568 			pool_p_remove(pp, freeph);
1569 		} else
1570 			freeph = NULL;
1571 
1572 		pl_leave(pp, &pp->pr_lock);
1573 
1574 		if (freeph != NULL)
1575 			pool_p_free(pp, freeph);
1576 	}
1577 	splx(s);
1578 	rw_exit_read(&pool_lock);
1579 
1580 	timeout_add_sec(&pool_gc_tick, 1);
1581 }
1582 
1583 /*
1584  * Pool backend allocators.
1585  */
1586 
1587 void *
1588 pool_allocator_alloc(struct pool *pp, int flags, int *slowdown)
1589 {
1590 	void *v;
1591 
1592 	v = (*pp->pr_alloc->pa_alloc)(pp, flags, slowdown);
1593 
1594 #ifdef DIAGNOSTIC
1595 	if (v != NULL && POOL_INPGHDR(pp)) {
1596 		vaddr_t addr = (vaddr_t)v;
1597 		if ((addr & pp->pr_pgmask) != addr) {
1598 			panic("%s: %s page address %p isnt aligned to %u",
1599 			    __func__, pp->pr_wchan, v, pp->pr_pgsize);
1600 		}
1601 	}
1602 #endif
1603 
1604 	return (v);
1605 }
1606 
1607 void
1608 pool_allocator_free(struct pool *pp, void *v)
1609 {
1610 	struct pool_allocator *pa = pp->pr_alloc;
1611 
1612 	(*pa->pa_free)(pp, v);
1613 }
1614 
1615 void *
1616 pool_page_alloc(struct pool *pp, int flags, int *slowdown)
1617 {
1618 	struct kmem_dyn_mode kd = KMEM_DYN_INITIALIZER;
1619 
1620 	kd.kd_waitok = ISSET(flags, PR_WAITOK);
1621 	kd.kd_slowdown = slowdown;
1622 
1623 	return (km_alloc(pp->pr_pgsize, &kv_page, pp->pr_crange, &kd));
1624 }
1625 
1626 void
1627 pool_page_free(struct pool *pp, void *v)
1628 {
1629 	km_free(v, pp->pr_pgsize, &kv_page, pp->pr_crange);
1630 }
1631 
1632 void *
1633 pool_multi_alloc(struct pool *pp, int flags, int *slowdown)
1634 {
1635 	struct kmem_va_mode kv = kv_intrsafe;
1636 	struct kmem_dyn_mode kd = KMEM_DYN_INITIALIZER;
1637 	void *v;
1638 	int s;
1639 
1640 	if (POOL_INPGHDR(pp))
1641 		kv.kv_align = pp->pr_pgsize;
1642 
1643 	kd.kd_waitok = ISSET(flags, PR_WAITOK);
1644 	kd.kd_slowdown = slowdown;
1645 
1646 	s = splvm();
1647 	v = km_alloc(pp->pr_pgsize, &kv, pp->pr_crange, &kd);
1648 	splx(s);
1649 
1650 	return (v);
1651 }
1652 
1653 void
1654 pool_multi_free(struct pool *pp, void *v)
1655 {
1656 	struct kmem_va_mode kv = kv_intrsafe;
1657 	int s;
1658 
1659 	if (POOL_INPGHDR(pp))
1660 		kv.kv_align = pp->pr_pgsize;
1661 
1662 	s = splvm();
1663 	km_free(v, pp->pr_pgsize, &kv, pp->pr_crange);
1664 	splx(s);
1665 }
1666 
1667 void *
1668 pool_multi_alloc_ni(struct pool *pp, int flags, int *slowdown)
1669 {
1670 	struct kmem_va_mode kv = kv_any;
1671 	struct kmem_dyn_mode kd = KMEM_DYN_INITIALIZER;
1672 	void *v;
1673 
1674 	if (POOL_INPGHDR(pp))
1675 		kv.kv_align = pp->pr_pgsize;
1676 
1677 	kd.kd_waitok = ISSET(flags, PR_WAITOK);
1678 	kd.kd_slowdown = slowdown;
1679 
1680 	KERNEL_LOCK();
1681 	v = km_alloc(pp->pr_pgsize, &kv, pp->pr_crange, &kd);
1682 	KERNEL_UNLOCK();
1683 
1684 	return (v);
1685 }
1686 
1687 void
1688 pool_multi_free_ni(struct pool *pp, void *v)
1689 {
1690 	struct kmem_va_mode kv = kv_any;
1691 
1692 	if (POOL_INPGHDR(pp))
1693 		kv.kv_align = pp->pr_pgsize;
1694 
1695 	KERNEL_LOCK();
1696 	km_free(v, pp->pr_pgsize, &kv, pp->pr_crange);
1697 	KERNEL_UNLOCK();
1698 }
1699 
1700 #ifdef MULTIPROCESSOR
1701 
1702 struct pool pool_caches; /* per cpu cache entries */
1703 
1704 void
1705 pool_cache_init(struct pool *pp)
1706 {
1707 	struct cpumem *cm;
1708 	struct pool_cache *pc;
1709 	struct cpumem_iter i;
1710 
1711 	if (pool_caches.pr_size == 0) {
1712 		pool_init(&pool_caches, sizeof(struct pool_cache),
1713 		    CACHELINESIZE, IPL_NONE, PR_WAITOK | PR_RWLOCK,
1714 		    "plcache", NULL);
1715 	}
1716 
1717 	/* must be able to use the pool items as cache list items */
1718 	KASSERT(pp->pr_size >= sizeof(struct pool_cache_item));
1719 
1720 	cm = cpumem_get(&pool_caches);
1721 
1722 	pl_init(pp, &pp->pr_cache_lock);
1723 	arc4random_buf(pp->pr_cache_magic, sizeof(pp->pr_cache_magic));
1724 	TAILQ_INIT(&pp->pr_cache_lists);
1725 	pp->pr_cache_nitems = 0;
1726 	pp->pr_cache_tick = ticks;
1727 	pp->pr_cache_items = 8;
1728 	pp->pr_cache_contention = 0;
1729 	pp->pr_cache_ngc = 0;
1730 
1731 	CPUMEM_FOREACH(pc, &i, cm) {
1732 		pc->pc_actv = NULL;
1733 		pc->pc_nactv = 0;
1734 		pc->pc_prev = NULL;
1735 
1736 		pc->pc_nget = 0;
1737 		pc->pc_nfail = 0;
1738 		pc->pc_nput = 0;
1739 		pc->pc_nlget = 0;
1740 		pc->pc_nlfail = 0;
1741 		pc->pc_nlput = 0;
1742 		pc->pc_nout = 0;
1743 	}
1744 
1745 	membar_producer();
1746 
1747 	pp->pr_cache = cm;
1748 }
1749 
1750 static inline void
1751 pool_cache_item_magic(struct pool *pp, struct pool_cache_item *ci)
1752 {
1753 	unsigned long *entry = (unsigned long *)&ci->ci_nextl;
1754 
1755 	entry[0] = pp->pr_cache_magic[0] ^ (u_long)ci;
1756 	entry[1] = pp->pr_cache_magic[1] ^ (u_long)ci->ci_next;
1757 }
1758 
1759 static inline void
1760 pool_cache_item_magic_check(struct pool *pp, struct pool_cache_item *ci)
1761 {
1762 	unsigned long *entry;
1763 	unsigned long val;
1764 
1765 	entry = (unsigned long *)&ci->ci_nextl;
1766 	val = pp->pr_cache_magic[0] ^ (u_long)ci;
1767 	if (*entry != val)
1768 		goto fail;
1769 
1770 	entry++;
1771 	val = pp->pr_cache_magic[1] ^ (u_long)ci->ci_next;
1772 	if (*entry != val)
1773 		goto fail;
1774 
1775 	return;
1776 
1777 fail:
1778 	panic("%s: %s cpu free list modified: item addr %p+%zu 0x%lx!=0x%lx",
1779 	    __func__, pp->pr_wchan, ci, (caddr_t)entry - (caddr_t)ci,
1780 	    *entry, val);
1781 }
1782 
1783 static inline void
1784 pool_list_enter(struct pool *pp)
1785 {
1786 	if (pl_enter_try(pp, &pp->pr_cache_lock) == 0) {
1787 		pl_enter(pp, &pp->pr_cache_lock);
1788 		pp->pr_cache_contention++;
1789 	}
1790 }
1791 
1792 static inline void
1793 pool_list_leave(struct pool *pp)
1794 {
1795 	pl_leave(pp, &pp->pr_cache_lock);
1796 }
1797 
1798 static inline struct pool_cache_item *
1799 pool_cache_list_alloc(struct pool *pp, struct pool_cache *pc)
1800 {
1801 	struct pool_cache_item *pl;
1802 
1803 	pool_list_enter(pp);
1804 	pl = TAILQ_FIRST(&pp->pr_cache_lists);
1805 	if (pl != NULL) {
1806 		TAILQ_REMOVE(&pp->pr_cache_lists, pl, ci_nextl);
1807 		pp->pr_cache_nitems -= POOL_CACHE_ITEM_NITEMS(pl);
1808 
1809 		pool_cache_item_magic(pp, pl);
1810 
1811 		pc->pc_nlget++;
1812 	} else
1813 		pc->pc_nlfail++;
1814 
1815 	/* fold this cpus nout into the global while we have the lock */
1816 	pp->pr_cache_nout += pc->pc_nout;
1817 	pc->pc_nout = 0;
1818 	pool_list_leave(pp);
1819 
1820 	return (pl);
1821 }
1822 
1823 static inline void
1824 pool_cache_list_free(struct pool *pp, struct pool_cache *pc,
1825     struct pool_cache_item *ci)
1826 {
1827 	pool_list_enter(pp);
1828 	if (TAILQ_EMPTY(&pp->pr_cache_lists))
1829 		pp->pr_cache_tick = ticks;
1830 
1831 	pp->pr_cache_nitems += POOL_CACHE_ITEM_NITEMS(ci);
1832 	TAILQ_INSERT_TAIL(&pp->pr_cache_lists, ci, ci_nextl);
1833 
1834 	pc->pc_nlput++;
1835 
1836 	/* fold this cpus nout into the global while we have the lock */
1837 	pp->pr_cache_nout += pc->pc_nout;
1838 	pc->pc_nout = 0;
1839 	pool_list_leave(pp);
1840 }
1841 
1842 static inline struct pool_cache *
1843 pool_cache_enter(struct pool *pp, int *s)
1844 {
1845 	struct pool_cache *pc;
1846 
1847 	pc = cpumem_enter(pp->pr_cache);
1848 	*s = splraise(pp->pr_ipl);
1849 	pc->pc_gen++;
1850 
1851 	return (pc);
1852 }
1853 
1854 static inline void
1855 pool_cache_leave(struct pool *pp, struct pool_cache *pc, int s)
1856 {
1857 	pc->pc_gen++;
1858 	splx(s);
1859 	cpumem_leave(pp->pr_cache, pc);
1860 }
1861 
1862 void *
1863 pool_cache_get(struct pool *pp)
1864 {
1865 	struct pool_cache *pc;
1866 	struct pool_cache_item *ci;
1867 	int s;
1868 
1869 	pc = pool_cache_enter(pp, &s);
1870 
1871 	if (pc->pc_actv != NULL) {
1872 		ci = pc->pc_actv;
1873 	} else if (pc->pc_prev != NULL) {
1874 		ci = pc->pc_prev;
1875 		pc->pc_prev = NULL;
1876 	} else if ((ci = pool_cache_list_alloc(pp, pc)) == NULL) {
1877 		pc->pc_nfail++;
1878 		goto done;
1879 	}
1880 
1881 	pool_cache_item_magic_check(pp, ci);
1882 #ifdef DIAGNOSTIC
1883 	if (pool_debug && POOL_CACHE_ITEM_POISONED(ci)) {
1884 		size_t pidx;
1885 		uint32_t pval;
1886 
1887 		if (poison_check(ci + 1, pp->pr_size - sizeof(*ci),
1888 		    &pidx, &pval)) {
1889 			int *ip = (int *)(ci + 1);
1890 			ip += pidx;
1891 
1892 			panic("%s: %s cpu free list modified: "
1893 			    "item addr %p+%zu 0x%x!=0x%x",
1894 			    __func__, pp->pr_wchan, ci,
1895 			    (caddr_t)ip - (caddr_t)ci, *ip, pval);
1896 		}
1897 	}
1898 #endif
1899 
1900 	pc->pc_actv = ci->ci_next;
1901 	pc->pc_nactv = POOL_CACHE_ITEM_NITEMS(ci) - 1;
1902 	pc->pc_nget++;
1903 	pc->pc_nout++;
1904 
1905 done:
1906 	pool_cache_leave(pp, pc, s);
1907 
1908 	return (ci);
1909 }
1910 
1911 void
1912 pool_cache_put(struct pool *pp, void *v)
1913 {
1914 	struct pool_cache *pc;
1915 	struct pool_cache_item *ci = v;
1916 	unsigned long nitems;
1917 	int s;
1918 #ifdef DIAGNOSTIC
1919 	int poison = pool_debug && pp->pr_size > sizeof(*ci);
1920 
1921 	if (poison)
1922 		poison_mem(ci + 1, pp->pr_size - sizeof(*ci));
1923 #endif
1924 
1925 	pc = pool_cache_enter(pp, &s);
1926 
1927 	nitems = pc->pc_nactv;
1928 	if (nitems >= pp->pr_cache_items) {
1929 		if (pc->pc_prev != NULL)
1930 			pool_cache_list_free(pp, pc, pc->pc_prev);
1931 
1932 		pc->pc_prev = pc->pc_actv;
1933 
1934 		pc->pc_actv = NULL;
1935 		pc->pc_nactv = 0;
1936 		nitems = 0;
1937 	}
1938 
1939 	ci->ci_next = pc->pc_actv;
1940 	ci->ci_nitems = ++nitems;
1941 #ifdef DIAGNOSTIC
1942 	ci->ci_nitems |= poison ? POOL_CACHE_ITEM_NITEMS_POISON : 0;
1943 #endif
1944 	pool_cache_item_magic(pp, ci);
1945 
1946 	pc->pc_actv = ci;
1947 	pc->pc_nactv = nitems;
1948 
1949 	pc->pc_nput++;
1950 	pc->pc_nout--;
1951 
1952 	pool_cache_leave(pp, pc, s);
1953 }
1954 
1955 struct pool_cache_item *
1956 pool_cache_list_put(struct pool *pp, struct pool_cache_item *pl)
1957 {
1958 	struct pool_cache_item *rpl, *next;
1959 
1960 	if (pl == NULL)
1961 		return (NULL);
1962 
1963 	rpl = TAILQ_NEXT(pl, ci_nextl);
1964 
1965 	pl_enter(pp, &pp->pr_lock);
1966 	do {
1967 		next = pl->ci_next;
1968 		pool_do_put(pp, pl);
1969 		pl = next;
1970 	} while (pl != NULL);
1971 	pl_leave(pp, &pp->pr_lock);
1972 
1973 	return (rpl);
1974 }
1975 
1976 void
1977 pool_cache_destroy(struct pool *pp)
1978 {
1979 	struct pool_cache *pc;
1980 	struct pool_cache_item *pl;
1981 	struct cpumem_iter i;
1982 	struct cpumem *cm;
1983 
1984 	rw_enter_write(&pool_lock); /* serialise with the gc */
1985 	cm = pp->pr_cache;
1986 	pp->pr_cache = NULL; /* make pool_put avoid the cache */
1987 	rw_exit_write(&pool_lock);
1988 
1989 	CPUMEM_FOREACH(pc, &i, cm) {
1990 		pool_cache_list_put(pp, pc->pc_actv);
1991 		pool_cache_list_put(pp, pc->pc_prev);
1992 	}
1993 
1994 	cpumem_put(&pool_caches, cm);
1995 
1996 	pl = TAILQ_FIRST(&pp->pr_cache_lists);
1997 	while (pl != NULL)
1998 		pl = pool_cache_list_put(pp, pl);
1999 }
2000 
2001 void
2002 pool_cache_gc(struct pool *pp)
2003 {
2004 	unsigned int contention, delta;
2005 
2006 	if ((ticks - pp->pr_cache_tick) > (hz * pool_wait_gc) &&
2007 	    !TAILQ_EMPTY(&pp->pr_cache_lists) &&
2008 	    pl_enter_try(pp, &pp->pr_cache_lock)) {
2009 		struct pool_cache_item *pl = NULL;
2010 
2011 		pl = TAILQ_FIRST(&pp->pr_cache_lists);
2012 		if (pl != NULL) {
2013 			TAILQ_REMOVE(&pp->pr_cache_lists, pl, ci_nextl);
2014 			pp->pr_cache_nitems -= POOL_CACHE_ITEM_NITEMS(pl);
2015 			pp->pr_cache_tick = ticks;
2016 
2017 			pp->pr_cache_ngc++;
2018 		}
2019 
2020 		pl_leave(pp, &pp->pr_cache_lock);
2021 
2022 		pool_cache_list_put(pp, pl);
2023 	}
2024 
2025 	/*
2026 	 * if there's a lot of contention on the pr_cache_mtx then consider
2027 	 * growing the length of the list to reduce the need to access the
2028 	 * global pool.
2029 	 */
2030 
2031 	contention = pp->pr_cache_contention;
2032 	delta = contention - pp->pr_cache_contention_prev;
2033 	if (delta > 8 /* magic */) {
2034 		if ((ncpusfound * 8 * 2) <= pp->pr_cache_nitems)
2035 			pp->pr_cache_items += 8;
2036 	} else if (delta == 0) {
2037 		if (pp->pr_cache_items > 8)
2038 			pp->pr_cache_items--;
2039 	}
2040 	pp->pr_cache_contention_prev = contention;
2041 }
2042 
2043 void
2044 pool_cache_pool_info(struct pool *pp, struct kinfo_pool *pi)
2045 {
2046 	struct pool_cache *pc;
2047 	struct cpumem_iter i;
2048 
2049 	if (pp->pr_cache == NULL)
2050 		return;
2051 
2052 	/* loop through the caches twice to collect stats */
2053 
2054 	/* once without the lock so we can yield while reading nget/nput */
2055 	CPUMEM_FOREACH(pc, &i, pp->pr_cache) {
2056 		uint64_t gen, nget, nput;
2057 
2058 		do {
2059 			while ((gen = pc->pc_gen) & 1)
2060 				yield();
2061 
2062 			nget = pc->pc_nget;
2063 			nput = pc->pc_nput;
2064 		} while (gen != pc->pc_gen);
2065 
2066 		pi->pr_nget += nget;
2067 		pi->pr_nput += nput;
2068 	}
2069 
2070 	/* and once with the mtx so we can get consistent nout values */
2071 	pl_enter(pp, &pp->pr_cache_lock);
2072 	CPUMEM_FOREACH(pc, &i, pp->pr_cache)
2073 		pi->pr_nout += pc->pc_nout;
2074 
2075 	pi->pr_nout += pp->pr_cache_nout;
2076 	pl_leave(pp, &pp->pr_cache_lock);
2077 }
2078 
2079 int
2080 pool_cache_info(struct pool *pp, void *oldp, size_t *oldlenp)
2081 {
2082 	struct kinfo_pool_cache kpc;
2083 
2084 	if (pp->pr_cache == NULL)
2085 		return (EOPNOTSUPP);
2086 
2087 	memset(&kpc, 0, sizeof(kpc)); /* don't leak padding */
2088 
2089 	pl_enter(pp, &pp->pr_cache_lock);
2090 	kpc.pr_ngc = pp->pr_cache_ngc;
2091 	kpc.pr_len = pp->pr_cache_items;
2092 	kpc.pr_nitems = pp->pr_cache_nitems;
2093 	kpc.pr_contention = pp->pr_cache_contention;
2094 	pl_leave(pp, &pp->pr_cache_lock);
2095 
2096 	return (sysctl_rdstruct(oldp, oldlenp, NULL, &kpc, sizeof(kpc)));
2097 }
2098 
2099 int
2100 pool_cache_cpus_info(struct pool *pp, void *oldp, size_t *oldlenp)
2101 {
2102 	struct pool_cache *pc;
2103 	struct kinfo_pool_cache_cpu *kpcc, *info;
2104 	unsigned int cpu = 0;
2105 	struct cpumem_iter i;
2106 	int error = 0;
2107 	size_t len;
2108 
2109 	if (pp->pr_cache == NULL)
2110 		return (EOPNOTSUPP);
2111 	if (*oldlenp % sizeof(*kpcc))
2112 		return (EINVAL);
2113 
2114 	kpcc = mallocarray(ncpusfound, sizeof(*kpcc), M_TEMP,
2115 	    M_WAITOK|M_CANFAIL|M_ZERO);
2116 	if (kpcc == NULL)
2117 		return (EIO);
2118 
2119 	len = ncpusfound * sizeof(*kpcc);
2120 
2121 	CPUMEM_FOREACH(pc, &i, pp->pr_cache) {
2122 		uint64_t gen;
2123 
2124 		if (cpu >= ncpusfound) {
2125 			error = EIO;
2126 			goto err;
2127 		}
2128 
2129 		info = &kpcc[cpu];
2130 		info->pr_cpu = cpu;
2131 
2132 		do {
2133 			while ((gen = pc->pc_gen) & 1)
2134 				yield();
2135 
2136 			info->pr_nget = pc->pc_nget;
2137 			info->pr_nfail = pc->pc_nfail;
2138 			info->pr_nput = pc->pc_nput;
2139 			info->pr_nlget = pc->pc_nlget;
2140 			info->pr_nlfail = pc->pc_nlfail;
2141 			info->pr_nlput = pc->pc_nlput;
2142 		} while (gen != pc->pc_gen);
2143 
2144 		cpu++;
2145 	}
2146 
2147 	error = sysctl_rdstruct(oldp, oldlenp, NULL, kpcc, len);
2148 err:
2149 	free(kpcc, M_TEMP, len);
2150 
2151 	return (error);
2152 }
2153 #else /* MULTIPROCESSOR */
2154 void
2155 pool_cache_init(struct pool *pp)
2156 {
2157 	/* nop */
2158 }
2159 
2160 void
2161 pool_cache_pool_info(struct pool *pp, struct kinfo_pool *pi)
2162 {
2163 	/* nop */
2164 }
2165 
2166 int
2167 pool_cache_info(struct pool *pp, void *oldp, size_t *oldlenp)
2168 {
2169 	return (EOPNOTSUPP);
2170 }
2171 
2172 int
2173 pool_cache_cpus_info(struct pool *pp, void *oldp, size_t *oldlenp)
2174 {
2175 	return (EOPNOTSUPP);
2176 }
2177 #endif /* MULTIPROCESSOR */
2178 
2179 
2180 void
2181 pool_lock_mtx_init(struct pool *pp, union pool_lock *lock,
2182     struct lock_type *type)
2183 {
2184 	_mtx_init_flags(&lock->prl_mtx, pp->pr_ipl, pp->pr_wchan, 0, type);
2185 }
2186 
2187 void
2188 pool_lock_mtx_enter(union pool_lock *lock LOCK_FL_VARS)
2189 {
2190 	_mtx_enter(&lock->prl_mtx LOCK_FL_ARGS);
2191 }
2192 
2193 int
2194 pool_lock_mtx_enter_try(union pool_lock *lock LOCK_FL_VARS)
2195 {
2196 	return (_mtx_enter_try(&lock->prl_mtx LOCK_FL_ARGS));
2197 }
2198 
2199 void
2200 pool_lock_mtx_leave(union pool_lock *lock LOCK_FL_VARS)
2201 {
2202 	_mtx_leave(&lock->prl_mtx LOCK_FL_ARGS);
2203 }
2204 
2205 void
2206 pool_lock_mtx_assert_locked(union pool_lock *lock)
2207 {
2208 	MUTEX_ASSERT_LOCKED(&lock->prl_mtx);
2209 }
2210 
2211 void
2212 pool_lock_mtx_assert_unlocked(union pool_lock *lock)
2213 {
2214 	MUTEX_ASSERT_UNLOCKED(&lock->prl_mtx);
2215 }
2216 
2217 int
2218 pool_lock_mtx_sleep(void *ident, union pool_lock *lock, int priority,
2219     const char *wmesg, int timo)
2220 {
2221 	return msleep(ident, &lock->prl_mtx, priority, wmesg, timo);
2222 }
2223 
2224 static const struct pool_lock_ops pool_lock_ops_mtx = {
2225 	pool_lock_mtx_init,
2226 	pool_lock_mtx_enter,
2227 	pool_lock_mtx_enter_try,
2228 	pool_lock_mtx_leave,
2229 	pool_lock_mtx_assert_locked,
2230 	pool_lock_mtx_assert_unlocked,
2231 	pool_lock_mtx_sleep,
2232 };
2233 
2234 void
2235 pool_lock_rw_init(struct pool *pp, union pool_lock *lock,
2236     struct lock_type *type)
2237 {
2238 	_rw_init_flags(&lock->prl_rwlock, pp->pr_wchan, 0, type);
2239 }
2240 
2241 void
2242 pool_lock_rw_enter(union pool_lock *lock LOCK_FL_VARS)
2243 {
2244 	_rw_enter_write(&lock->prl_rwlock LOCK_FL_ARGS);
2245 }
2246 
2247 int
2248 pool_lock_rw_enter_try(union pool_lock *lock LOCK_FL_VARS)
2249 {
2250 	return (_rw_enter(&lock->prl_rwlock, RW_WRITE | RW_NOSLEEP
2251 	    LOCK_FL_ARGS) == 0);
2252 }
2253 
2254 void
2255 pool_lock_rw_leave(union pool_lock *lock LOCK_FL_VARS)
2256 {
2257 	_rw_exit_write(&lock->prl_rwlock LOCK_FL_ARGS);
2258 }
2259 
2260 void
2261 pool_lock_rw_assert_locked(union pool_lock *lock)
2262 {
2263 	rw_assert_wrlock(&lock->prl_rwlock);
2264 }
2265 
2266 void
2267 pool_lock_rw_assert_unlocked(union pool_lock *lock)
2268 {
2269 	KASSERT(rw_status(&lock->prl_rwlock) != RW_WRITE);
2270 }
2271 
2272 int
2273 pool_lock_rw_sleep(void *ident, union pool_lock *lock, int priority,
2274     const char *wmesg, int timo)
2275 {
2276 	return rwsleep(ident, &lock->prl_rwlock, priority, wmesg, timo);
2277 }
2278 
2279 static const struct pool_lock_ops pool_lock_ops_rw = {
2280 	pool_lock_rw_init,
2281 	pool_lock_rw_enter,
2282 	pool_lock_rw_enter_try,
2283 	pool_lock_rw_leave,
2284 	pool_lock_rw_assert_locked,
2285 	pool_lock_rw_assert_unlocked,
2286 	pool_lock_rw_sleep,
2287 };
2288