xref: /openbsd-src/sys/kern/subr_pool.c (revision 4b70baf6e17fc8b27fc1f7fa7929335753fa94c3)
1 /*	$OpenBSD: subr_pool.c,v 1.227 2019/04/23 13:35:12 visa Exp $	*/
2 /*	$NetBSD: subr_pool.c,v 1.61 2001/09/26 07:14:56 chs Exp $	*/
3 
4 /*-
5  * Copyright (c) 1997, 1999, 2000 The NetBSD Foundation, Inc.
6  * All rights reserved.
7  *
8  * This code is derived from software contributed to The NetBSD Foundation
9  * by Paul Kranenburg; by Jason R. Thorpe of the Numerical Aerospace
10  * Simulation Facility, NASA Ames Research Center.
11  *
12  * Redistribution and use in source and binary forms, with or without
13  * modification, are permitted provided that the following conditions
14  * are met:
15  * 1. Redistributions of source code must retain the above copyright
16  *    notice, this list of conditions and the following disclaimer.
17  * 2. Redistributions in binary form must reproduce the above copyright
18  *    notice, this list of conditions and the following disclaimer in the
19  *    documentation and/or other materials provided with the distribution.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
23  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
24  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
25  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31  * POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/errno.h>
37 #include <sys/kernel.h>
38 #include <sys/malloc.h>
39 #include <sys/pool.h>
40 #include <sys/proc.h>
41 #include <sys/syslog.h>
42 #include <sys/sysctl.h>
43 #include <sys/task.h>
44 #include <sys/timeout.h>
45 #include <sys/percpu.h>
46 
47 #include <uvm/uvm_extern.h>
48 
49 /*
50  * Pool resource management utility.
51  *
52  * Memory is allocated in pages which are split into pieces according to
53  * the pool item size. Each page is kept on one of three lists in the
54  * pool structure: `pr_emptypages', `pr_fullpages' and `pr_partpages',
55  * for empty, full and partially-full pages respectively. The individual
56  * pool items are on a linked list headed by `ph_items' in each page
57  * header. The memory for building the page list is either taken from
58  * the allocated pages themselves (for small pool items) or taken from
59  * an internal pool of page headers (`phpool').
60  */
61 
62 /* List of all pools */
63 SIMPLEQ_HEAD(,pool) pool_head = SIMPLEQ_HEAD_INITIALIZER(pool_head);
64 
65 /*
66  * Every pool gets a unique serial number assigned to it. If this counter
67  * wraps, we're screwed, but we shouldn't create so many pools anyway.
68  */
69 unsigned int pool_serial;
70 unsigned int pool_count;
71 
72 /* Lock the previous variables making up the global pool state */
73 struct rwlock pool_lock = RWLOCK_INITIALIZER("pools");
74 
75 /* Private pool for page header structures */
76 struct pool phpool;
77 
78 struct pool_lock_ops {
79 	void	(*pl_init)(struct pool *, union pool_lock *,
80 		    const struct lock_type *);
81 	void	(*pl_enter)(union pool_lock *);
82 	int	(*pl_enter_try)(union pool_lock *);
83 	void	(*pl_leave)(union pool_lock *);
84 	void	(*pl_assert_locked)(union pool_lock *);
85 	void	(*pl_assert_unlocked)(union pool_lock *);
86 	int	(*pl_sleep)(void *, union pool_lock *, int, const char *, int);
87 };
88 
89 static const struct pool_lock_ops pool_lock_ops_mtx;
90 static const struct pool_lock_ops pool_lock_ops_rw;
91 
92 #ifdef WITNESS
93 #define pl_init(pp, pl) do {						\
94 	static const struct lock_type __lock_type = { .lt_name = #pl };	\
95 	(pp)->pr_lock_ops->pl_init(pp, pl, &__lock_type);		\
96 } while (0)
97 #else /* WITNESS */
98 #define pl_init(pp, pl)		(pp)->pr_lock_ops->pl_init(pp, pl, NULL)
99 #endif /* WITNESS */
100 
101 static inline void
102 pl_enter(struct pool *pp, union pool_lock *pl)
103 {
104 	pp->pr_lock_ops->pl_enter(pl);
105 }
106 static inline int
107 pl_enter_try(struct pool *pp, union pool_lock *pl)
108 {
109 	return pp->pr_lock_ops->pl_enter_try(pl);
110 }
111 static inline void
112 pl_leave(struct pool *pp, union pool_lock *pl)
113 {
114 	pp->pr_lock_ops->pl_leave(pl);
115 }
116 static inline void
117 pl_assert_locked(struct pool *pp, union pool_lock *pl)
118 {
119 	pp->pr_lock_ops->pl_assert_locked(pl);
120 }
121 static inline void
122 pl_assert_unlocked(struct pool *pp, union pool_lock *pl)
123 {
124 	pp->pr_lock_ops->pl_assert_unlocked(pl);
125 }
126 static inline int
127 pl_sleep(struct pool *pp, void *ident, union pool_lock *lock, int priority,
128     const char *wmesg, int timo)
129 {
130 	return pp->pr_lock_ops->pl_sleep(ident, lock, priority, wmesg, timo);
131 }
132 
133 struct pool_item {
134 	u_long				pi_magic;
135 	XSIMPLEQ_ENTRY(pool_item)	pi_list;
136 };
137 #define POOL_IMAGIC(ph, pi) ((u_long)(pi) ^ (ph)->ph_magic)
138 
139 struct pool_page_header {
140 	/* Page headers */
141 	TAILQ_ENTRY(pool_page_header)
142 				ph_entry;	/* pool page list */
143 	XSIMPLEQ_HEAD(, pool_item)
144 				ph_items;	/* free items on the page */
145 	RBT_ENTRY(pool_page_header)
146 				ph_node;	/* off-page page headers */
147 	unsigned int		ph_nmissing;	/* # of chunks in use */
148 	caddr_t			ph_page;	/* this page's address */
149 	caddr_t			ph_colored;	/* page's colored address */
150 	unsigned long		ph_magic;
151 	int			ph_tick;
152 };
153 #define POOL_MAGICBIT (1 << 3) /* keep away from perturbed low bits */
154 #define POOL_PHPOISON(ph) ISSET((ph)->ph_magic, POOL_MAGICBIT)
155 
156 #ifdef MULTIPROCESSOR
157 struct pool_cache_item {
158 	struct pool_cache_item	*ci_next;	/* next item in list */
159 	unsigned long		 ci_nitems;	/* number of items in list */
160 	TAILQ_ENTRY(pool_cache_item)
161 				 ci_nextl;	/* entry in list of lists */
162 };
163 
164 /* we store whether the cached item is poisoned in the high bit of nitems */
165 #define POOL_CACHE_ITEM_NITEMS_MASK	0x7ffffffUL
166 #define POOL_CACHE_ITEM_NITEMS_POISON	0x8000000UL
167 
168 #define POOL_CACHE_ITEM_NITEMS(_ci)					\
169     ((_ci)->ci_nitems & POOL_CACHE_ITEM_NITEMS_MASK)
170 
171 #define POOL_CACHE_ITEM_POISONED(_ci)					\
172     ISSET((_ci)->ci_nitems, POOL_CACHE_ITEM_NITEMS_POISON)
173 
174 struct pool_cache {
175 	struct pool_cache_item	*pc_actv;	/* active list of items */
176 	unsigned long		 pc_nactv;	/* actv head nitems cache */
177 	struct pool_cache_item	*pc_prev;	/* previous list of items */
178 
179 	uint64_t		 pc_gen;	/* generation number */
180 	uint64_t		 pc_nget;	/* # of successful requests */
181 	uint64_t		 pc_nfail;	/* # of unsuccessful reqs */
182 	uint64_t		 pc_nput;	/* # of releases */
183 	uint64_t		 pc_nlget;	/* # of list requests */
184 	uint64_t		 pc_nlfail;	/* # of fails getting a list */
185 	uint64_t		 pc_nlput;	/* # of list releases */
186 
187 	int			 pc_nout;
188 };
189 
190 void	*pool_cache_get(struct pool *);
191 void	 pool_cache_put(struct pool *, void *);
192 void	 pool_cache_destroy(struct pool *);
193 void	 pool_cache_gc(struct pool *);
194 #endif
195 void	 pool_cache_pool_info(struct pool *, struct kinfo_pool *);
196 int	 pool_cache_info(struct pool *, void *, size_t *);
197 int	 pool_cache_cpus_info(struct pool *, void *, size_t *);
198 
199 #ifdef POOL_DEBUG
200 int	pool_debug = 1;
201 #else
202 int	pool_debug = 0;
203 #endif
204 
205 #define POOL_INPGHDR(pp) ((pp)->pr_phoffset != 0)
206 
207 struct pool_page_header *
208 	 pool_p_alloc(struct pool *, int, int *);
209 void	 pool_p_insert(struct pool *, struct pool_page_header *);
210 void	 pool_p_remove(struct pool *, struct pool_page_header *);
211 void	 pool_p_free(struct pool *, struct pool_page_header *);
212 
213 void	 pool_update_curpage(struct pool *);
214 void	*pool_do_get(struct pool *, int, int *);
215 void	 pool_do_put(struct pool *, void *);
216 int	 pool_chk_page(struct pool *, struct pool_page_header *, int);
217 int	 pool_chk(struct pool *);
218 void	 pool_get_done(struct pool *, void *, void *);
219 void	 pool_runqueue(struct pool *, int);
220 
221 void	*pool_allocator_alloc(struct pool *, int, int *);
222 void	 pool_allocator_free(struct pool *, void *);
223 
224 /*
225  * The default pool allocator.
226  */
227 void	*pool_page_alloc(struct pool *, int, int *);
228 void	pool_page_free(struct pool *, void *);
229 
230 /*
231  * safe for interrupts; this is the default allocator
232  */
233 struct pool_allocator pool_allocator_single = {
234 	pool_page_alloc,
235 	pool_page_free,
236 	POOL_ALLOC_SIZE(PAGE_SIZE, POOL_ALLOC_ALIGNED)
237 };
238 
239 void	*pool_multi_alloc(struct pool *, int, int *);
240 void	pool_multi_free(struct pool *, void *);
241 
242 struct pool_allocator pool_allocator_multi = {
243 	pool_multi_alloc,
244 	pool_multi_free,
245 	POOL_ALLOC_SIZES(PAGE_SIZE, (1UL << 31), POOL_ALLOC_ALIGNED)
246 };
247 
248 void	*pool_multi_alloc_ni(struct pool *, int, int *);
249 void	pool_multi_free_ni(struct pool *, void *);
250 
251 struct pool_allocator pool_allocator_multi_ni = {
252 	pool_multi_alloc_ni,
253 	pool_multi_free_ni,
254 	POOL_ALLOC_SIZES(PAGE_SIZE, (1UL << 31), POOL_ALLOC_ALIGNED)
255 };
256 
257 #ifdef DDB
258 void	 pool_print_pagelist(struct pool_pagelist *, int (*)(const char *, ...)
259 	     __attribute__((__format__(__kprintf__,1,2))));
260 void	 pool_print1(struct pool *, const char *, int (*)(const char *, ...)
261 	     __attribute__((__format__(__kprintf__,1,2))));
262 #endif
263 
264 /* stale page garbage collectors */
265 void	pool_gc_sched(void *);
266 struct timeout pool_gc_tick = TIMEOUT_INITIALIZER(pool_gc_sched, NULL);
267 void	pool_gc_pages(void *);
268 struct task pool_gc_task = TASK_INITIALIZER(pool_gc_pages, NULL);
269 int pool_wait_free = 1;
270 int pool_wait_gc = 8;
271 
272 RBT_PROTOTYPE(phtree, pool_page_header, ph_node, phtree_compare);
273 
274 static inline int
275 phtree_compare(const struct pool_page_header *a,
276     const struct pool_page_header *b)
277 {
278 	vaddr_t va = (vaddr_t)a->ph_page;
279 	vaddr_t vb = (vaddr_t)b->ph_page;
280 
281 	/* the compares in this order are important for the NFIND to work */
282 	if (vb < va)
283 		return (-1);
284 	if (vb > va)
285 		return (1);
286 
287 	return (0);
288 }
289 
290 RBT_GENERATE(phtree, pool_page_header, ph_node, phtree_compare);
291 
292 /*
293  * Return the pool page header based on page address.
294  */
295 static inline struct pool_page_header *
296 pr_find_pagehead(struct pool *pp, void *v)
297 {
298 	struct pool_page_header *ph, key;
299 
300 	if (POOL_INPGHDR(pp)) {
301 		caddr_t page;
302 
303 		page = (caddr_t)((vaddr_t)v & pp->pr_pgmask);
304 
305 		return ((struct pool_page_header *)(page + pp->pr_phoffset));
306 	}
307 
308 	key.ph_page = v;
309 	ph = RBT_NFIND(phtree, &pp->pr_phtree, &key);
310 	if (ph == NULL)
311 		panic("%s: %s: page header missing", __func__, pp->pr_wchan);
312 
313 	KASSERT(ph->ph_page <= (caddr_t)v);
314 	if (ph->ph_page + pp->pr_pgsize <= (caddr_t)v)
315 		panic("%s: %s: incorrect page", __func__, pp->pr_wchan);
316 
317 	return (ph);
318 }
319 
320 /*
321  * Initialize the given pool resource structure.
322  *
323  * We export this routine to allow other kernel parts to declare
324  * static pools that must be initialized before malloc() is available.
325  */
326 void
327 pool_init(struct pool *pp, size_t size, u_int align, int ipl, int flags,
328     const char *wchan, struct pool_allocator *palloc)
329 {
330 	int off = 0, space;
331 	unsigned int pgsize = PAGE_SIZE, items;
332 	size_t pa_pagesz;
333 #ifdef DIAGNOSTIC
334 	struct pool *iter;
335 #endif
336 
337 	if (align == 0)
338 		align = ALIGN(1);
339 
340 	if (size < sizeof(struct pool_item))
341 		size = sizeof(struct pool_item);
342 
343 	size = roundup(size, align);
344 
345 	while (size * 8 > pgsize)
346 		pgsize <<= 1;
347 
348 	if (palloc == NULL) {
349 		if (pgsize > PAGE_SIZE) {
350 			palloc = ISSET(flags, PR_WAITOK) ?
351 			    &pool_allocator_multi_ni : &pool_allocator_multi;
352 		} else
353 			palloc = &pool_allocator_single;
354 
355 		pa_pagesz = palloc->pa_pagesz;
356 	} else {
357 		size_t pgsizes;
358 
359 		pa_pagesz = palloc->pa_pagesz;
360 		if (pa_pagesz == 0)
361 			pa_pagesz = POOL_ALLOC_DEFAULT;
362 
363 		pgsizes = pa_pagesz & ~POOL_ALLOC_ALIGNED;
364 
365 		/* make sure the allocator can fit at least one item */
366 		if (size > pgsizes) {
367 			panic("%s: pool %s item size 0x%zx > "
368 			    "allocator %p sizes 0x%zx", __func__, wchan,
369 			    size, palloc, pgsizes);
370 		}
371 
372 		/* shrink pgsize until it fits into the range */
373 		while (!ISSET(pgsizes, pgsize))
374 			pgsize >>= 1;
375 	}
376 	KASSERT(ISSET(pa_pagesz, pgsize));
377 
378 	items = pgsize / size;
379 
380 	/*
381 	 * Decide whether to put the page header off page to avoid
382 	 * wasting too large a part of the page. Off-page page headers
383 	 * go into an RB tree, so we can match a returned item with
384 	 * its header based on the page address.
385 	 */
386 	if (ISSET(pa_pagesz, POOL_ALLOC_ALIGNED)) {
387 		if (pgsize - (size * items) >
388 		    sizeof(struct pool_page_header)) {
389 			off = pgsize - sizeof(struct pool_page_header);
390 		} else if (sizeof(struct pool_page_header) * 2 >= size) {
391 			off = pgsize - sizeof(struct pool_page_header);
392 			items = off / size;
393 		}
394 	}
395 
396 	KASSERT(items > 0);
397 
398 	/*
399 	 * Initialize the pool structure.
400 	 */
401 	memset(pp, 0, sizeof(*pp));
402 	if (ISSET(flags, PR_RWLOCK)) {
403 		KASSERT(flags & PR_WAITOK);
404 		pp->pr_lock_ops = &pool_lock_ops_rw;
405 	} else
406 		pp->pr_lock_ops = &pool_lock_ops_mtx;
407 	TAILQ_INIT(&pp->pr_emptypages);
408 	TAILQ_INIT(&pp->pr_fullpages);
409 	TAILQ_INIT(&pp->pr_partpages);
410 	pp->pr_curpage = NULL;
411 	pp->pr_npages = 0;
412 	pp->pr_minitems = 0;
413 	pp->pr_minpages = 0;
414 	pp->pr_maxpages = 8;
415 	pp->pr_size = size;
416 	pp->pr_pgsize = pgsize;
417 	pp->pr_pgmask = ~0UL ^ (pgsize - 1);
418 	pp->pr_phoffset = off;
419 	pp->pr_itemsperpage = items;
420 	pp->pr_wchan = wchan;
421 	pp->pr_alloc = palloc;
422 	pp->pr_nitems = 0;
423 	pp->pr_nout = 0;
424 	pp->pr_hardlimit = UINT_MAX;
425 	pp->pr_hardlimit_warning = NULL;
426 	pp->pr_hardlimit_ratecap.tv_sec = 0;
427 	pp->pr_hardlimit_ratecap.tv_usec = 0;
428 	pp->pr_hardlimit_warning_last.tv_sec = 0;
429 	pp->pr_hardlimit_warning_last.tv_usec = 0;
430 	RBT_INIT(phtree, &pp->pr_phtree);
431 
432 	/*
433 	 * Use the space between the chunks and the page header
434 	 * for cache coloring.
435 	 */
436 	space = POOL_INPGHDR(pp) ? pp->pr_phoffset : pp->pr_pgsize;
437 	space -= pp->pr_itemsperpage * pp->pr_size;
438 	pp->pr_align = align;
439 	pp->pr_maxcolors = (space / align) + 1;
440 
441 	pp->pr_nget = 0;
442 	pp->pr_nfail = 0;
443 	pp->pr_nput = 0;
444 	pp->pr_npagealloc = 0;
445 	pp->pr_npagefree = 0;
446 	pp->pr_hiwat = 0;
447 	pp->pr_nidle = 0;
448 
449 	pp->pr_ipl = ipl;
450 	pp->pr_flags = flags;
451 
452 	pl_init(pp, &pp->pr_lock);
453 	pl_init(pp, &pp->pr_requests_lock);
454 	TAILQ_INIT(&pp->pr_requests);
455 
456 	if (phpool.pr_size == 0) {
457 		pool_init(&phpool, sizeof(struct pool_page_header), 0,
458 		    IPL_HIGH, 0, "phpool", NULL);
459 
460 		/* make sure phpool wont "recurse" */
461 		KASSERT(POOL_INPGHDR(&phpool));
462 	}
463 
464 	/* pglistalloc/constraint parameters */
465 	pp->pr_crange = &kp_dirty;
466 
467 	/* Insert this into the list of all pools. */
468 	rw_enter_write(&pool_lock);
469 #ifdef DIAGNOSTIC
470 	SIMPLEQ_FOREACH(iter, &pool_head, pr_poollist) {
471 		if (iter == pp)
472 			panic("%s: pool %s already on list", __func__, wchan);
473 	}
474 #endif
475 
476 	pp->pr_serial = ++pool_serial;
477 	if (pool_serial == 0)
478 		panic("%s: too much uptime", __func__);
479 
480 	SIMPLEQ_INSERT_HEAD(&pool_head, pp, pr_poollist);
481 	pool_count++;
482 	rw_exit_write(&pool_lock);
483 }
484 
485 /*
486  * Decommission a pool resource.
487  */
488 void
489 pool_destroy(struct pool *pp)
490 {
491 	struct pool_page_header *ph;
492 	struct pool *prev, *iter;
493 
494 #ifdef MULTIPROCESSOR
495 	if (pp->pr_cache != NULL)
496 		pool_cache_destroy(pp);
497 #endif
498 
499 #ifdef DIAGNOSTIC
500 	if (pp->pr_nout != 0)
501 		panic("%s: pool busy: still out: %u", __func__, pp->pr_nout);
502 #endif
503 
504 	/* Remove from global pool list */
505 	rw_enter_write(&pool_lock);
506 	pool_count--;
507 	if (pp == SIMPLEQ_FIRST(&pool_head))
508 		SIMPLEQ_REMOVE_HEAD(&pool_head, pr_poollist);
509 	else {
510 		prev = SIMPLEQ_FIRST(&pool_head);
511 		SIMPLEQ_FOREACH(iter, &pool_head, pr_poollist) {
512 			if (iter == pp) {
513 				SIMPLEQ_REMOVE_AFTER(&pool_head, prev,
514 				    pr_poollist);
515 				break;
516 			}
517 			prev = iter;
518 		}
519 	}
520 	rw_exit_write(&pool_lock);
521 
522 	/* Remove all pages */
523 	while ((ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL) {
524 		pl_enter(pp, &pp->pr_lock);
525 		pool_p_remove(pp, ph);
526 		pl_leave(pp, &pp->pr_lock);
527 		pool_p_free(pp, ph);
528 	}
529 	KASSERT(TAILQ_EMPTY(&pp->pr_fullpages));
530 	KASSERT(TAILQ_EMPTY(&pp->pr_partpages));
531 }
532 
533 void
534 pool_request_init(struct pool_request *pr,
535     void (*handler)(struct pool *, void *, void *), void *cookie)
536 {
537 	pr->pr_handler = handler;
538 	pr->pr_cookie = cookie;
539 	pr->pr_item = NULL;
540 }
541 
542 void
543 pool_request(struct pool *pp, struct pool_request *pr)
544 {
545 	pl_enter(pp, &pp->pr_requests_lock);
546 	TAILQ_INSERT_TAIL(&pp->pr_requests, pr, pr_entry);
547 	pool_runqueue(pp, PR_NOWAIT);
548 	pl_leave(pp, &pp->pr_requests_lock);
549 }
550 
551 struct pool_get_memory {
552 	union pool_lock lock;
553 	void * volatile v;
554 };
555 
556 /*
557  * Grab an item from the pool.
558  */
559 void *
560 pool_get(struct pool *pp, int flags)
561 {
562 	void *v = NULL;
563 	int slowdown = 0;
564 
565 	KASSERT(flags & (PR_WAITOK | PR_NOWAIT));
566 	if (pp->pr_flags & PR_RWLOCK)
567 		KASSERT(flags & PR_WAITOK);
568 
569 #ifdef MULTIPROCESSOR
570 	if (pp->pr_cache != NULL) {
571 		v = pool_cache_get(pp);
572 		if (v != NULL)
573 			goto good;
574 	}
575 #endif
576 
577 	pl_enter(pp, &pp->pr_lock);
578 	if (pp->pr_nout >= pp->pr_hardlimit) {
579 		if (ISSET(flags, PR_NOWAIT|PR_LIMITFAIL))
580 			goto fail;
581 	} else if ((v = pool_do_get(pp, flags, &slowdown)) == NULL) {
582 		if (ISSET(flags, PR_NOWAIT))
583 			goto fail;
584 	}
585 	pl_leave(pp, &pp->pr_lock);
586 
587 	if ((slowdown || pool_debug == 2) && ISSET(flags, PR_WAITOK))
588 		yield();
589 
590 	if (v == NULL) {
591 		struct pool_get_memory mem = { .v = NULL };
592 		struct pool_request pr;
593 
594 #ifdef DIAGNOSTIC
595 		if (ISSET(flags, PR_WAITOK) && curproc == &proc0)
596 			panic("%s: cannot sleep for memory during boot",
597 			    __func__);
598 #endif
599 		pl_init(pp, &mem.lock);
600 		pool_request_init(&pr, pool_get_done, &mem);
601 		pool_request(pp, &pr);
602 
603 		pl_enter(pp, &mem.lock);
604 		while (mem.v == NULL)
605 			pl_sleep(pp, &mem, &mem.lock, PSWP, pp->pr_wchan, 0);
606 		pl_leave(pp, &mem.lock);
607 
608 		v = mem.v;
609 	}
610 
611 #ifdef MULTIPROCESSOR
612 good:
613 #endif
614 	if (ISSET(flags, PR_ZERO))
615 		memset(v, 0, pp->pr_size);
616 
617 	return (v);
618 
619 fail:
620 	pp->pr_nfail++;
621 	pl_leave(pp, &pp->pr_lock);
622 	return (NULL);
623 }
624 
625 void
626 pool_get_done(struct pool *pp, void *xmem, void *v)
627 {
628 	struct pool_get_memory *mem = xmem;
629 
630 	pl_enter(pp, &mem->lock);
631 	mem->v = v;
632 	pl_leave(pp, &mem->lock);
633 
634 	wakeup_one(mem);
635 }
636 
637 void
638 pool_runqueue(struct pool *pp, int flags)
639 {
640 	struct pool_requests prl = TAILQ_HEAD_INITIALIZER(prl);
641 	struct pool_request *pr;
642 
643 	pl_assert_unlocked(pp, &pp->pr_lock);
644 	pl_assert_locked(pp, &pp->pr_requests_lock);
645 
646 	if (pp->pr_requesting++)
647 		return;
648 
649 	do {
650 		pp->pr_requesting = 1;
651 
652 		/* no TAILQ_JOIN? :( */
653 		while ((pr = TAILQ_FIRST(&pp->pr_requests)) != NULL) {
654 			TAILQ_REMOVE(&pp->pr_requests, pr, pr_entry);
655 			TAILQ_INSERT_TAIL(&prl, pr, pr_entry);
656 		}
657 		if (TAILQ_EMPTY(&prl))
658 			continue;
659 
660 		pl_leave(pp, &pp->pr_requests_lock);
661 
662 		pl_enter(pp, &pp->pr_lock);
663 		pr = TAILQ_FIRST(&prl);
664 		while (pr != NULL) {
665 			int slowdown = 0;
666 
667 			if (pp->pr_nout >= pp->pr_hardlimit)
668 				break;
669 
670 			pr->pr_item = pool_do_get(pp, flags, &slowdown);
671 			if (pr->pr_item == NULL) /* || slowdown ? */
672 				break;
673 
674 			pr = TAILQ_NEXT(pr, pr_entry);
675 		}
676 		pl_leave(pp, &pp->pr_lock);
677 
678 		while ((pr = TAILQ_FIRST(&prl)) != NULL &&
679 		    pr->pr_item != NULL) {
680 			TAILQ_REMOVE(&prl, pr, pr_entry);
681 			(*pr->pr_handler)(pp, pr->pr_cookie, pr->pr_item);
682 		}
683 
684 		pl_enter(pp, &pp->pr_requests_lock);
685 	} while (--pp->pr_requesting);
686 
687 	/* no TAILQ_JOIN :( */
688 	while ((pr = TAILQ_FIRST(&prl)) != NULL) {
689 		TAILQ_REMOVE(&prl, pr, pr_entry);
690 		TAILQ_INSERT_TAIL(&pp->pr_requests, pr, pr_entry);
691 	}
692 }
693 
694 void *
695 pool_do_get(struct pool *pp, int flags, int *slowdown)
696 {
697 	struct pool_item *pi;
698 	struct pool_page_header *ph;
699 
700 	pl_assert_locked(pp, &pp->pr_lock);
701 
702 	splassert(pp->pr_ipl);
703 
704 	/*
705 	 * Account for this item now to avoid races if we need to give up
706 	 * pr_lock to allocate a page.
707 	 */
708 	pp->pr_nout++;
709 
710 	if (pp->pr_curpage == NULL) {
711 		pl_leave(pp, &pp->pr_lock);
712 		ph = pool_p_alloc(pp, flags, slowdown);
713 		pl_enter(pp, &pp->pr_lock);
714 
715 		if (ph == NULL) {
716 			pp->pr_nout--;
717 			return (NULL);
718 		}
719 
720 		pool_p_insert(pp, ph);
721 	}
722 
723 	ph = pp->pr_curpage;
724 	pi = XSIMPLEQ_FIRST(&ph->ph_items);
725 	if (__predict_false(pi == NULL))
726 		panic("%s: %s: page empty", __func__, pp->pr_wchan);
727 
728 	if (__predict_false(pi->pi_magic != POOL_IMAGIC(ph, pi))) {
729 		panic("%s: %s free list modified: "
730 		    "page %p; item addr %p; offset 0x%x=0x%lx != 0x%lx",
731 		    __func__, pp->pr_wchan, ph->ph_page, pi,
732 		    0, pi->pi_magic, POOL_IMAGIC(ph, pi));
733 	}
734 
735 	XSIMPLEQ_REMOVE_HEAD(&ph->ph_items, pi_list);
736 
737 #ifdef DIAGNOSTIC
738 	if (pool_debug && POOL_PHPOISON(ph)) {
739 		size_t pidx;
740 		uint32_t pval;
741 		if (poison_check(pi + 1, pp->pr_size - sizeof(*pi),
742 		    &pidx, &pval)) {
743 			int *ip = (int *)(pi + 1);
744 			panic("%s: %s free list modified: "
745 			    "page %p; item addr %p; offset 0x%zx=0x%x",
746 			    __func__, pp->pr_wchan, ph->ph_page, pi,
747 			    (pidx * sizeof(int)) + sizeof(*pi), ip[pidx]);
748 		}
749 	}
750 #endif /* DIAGNOSTIC */
751 
752 	if (ph->ph_nmissing++ == 0) {
753 		/*
754 		 * This page was previously empty.  Move it to the list of
755 		 * partially-full pages.  This page is already curpage.
756 		 */
757 		TAILQ_REMOVE(&pp->pr_emptypages, ph, ph_entry);
758 		TAILQ_INSERT_TAIL(&pp->pr_partpages, ph, ph_entry);
759 
760 		pp->pr_nidle--;
761 	}
762 
763 	if (ph->ph_nmissing == pp->pr_itemsperpage) {
764 		/*
765 		 * This page is now full.  Move it to the full list
766 		 * and select a new current page.
767 		 */
768 		TAILQ_REMOVE(&pp->pr_partpages, ph, ph_entry);
769 		TAILQ_INSERT_TAIL(&pp->pr_fullpages, ph, ph_entry);
770 		pool_update_curpage(pp);
771 	}
772 
773 	pp->pr_nget++;
774 
775 	return (pi);
776 }
777 
778 /*
779  * Return resource to the pool.
780  */
781 void
782 pool_put(struct pool *pp, void *v)
783 {
784 	struct pool_page_header *ph, *freeph = NULL;
785 
786 #ifdef DIAGNOSTIC
787 	if (v == NULL)
788 		panic("%s: NULL item", __func__);
789 #endif
790 
791 #ifdef MULTIPROCESSOR
792 	if (pp->pr_cache != NULL && TAILQ_EMPTY(&pp->pr_requests)) {
793 		pool_cache_put(pp, v);
794 		return;
795 	}
796 #endif
797 
798 	pl_enter(pp, &pp->pr_lock);
799 
800 	pool_do_put(pp, v);
801 
802 	pp->pr_nout--;
803 	pp->pr_nput++;
804 
805 	/* is it time to free a page? */
806 	if (pp->pr_nidle > pp->pr_maxpages &&
807 	    (ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL &&
808 	    (ticks - ph->ph_tick) > (hz * pool_wait_free)) {
809 		freeph = ph;
810 		pool_p_remove(pp, freeph);
811 	}
812 
813 	pl_leave(pp, &pp->pr_lock);
814 
815 	if (freeph != NULL)
816 		pool_p_free(pp, freeph);
817 
818 	if (!TAILQ_EMPTY(&pp->pr_requests)) {
819 		pl_enter(pp, &pp->pr_requests_lock);
820 		pool_runqueue(pp, PR_NOWAIT);
821 		pl_leave(pp, &pp->pr_requests_lock);
822 	}
823 }
824 
825 void
826 pool_do_put(struct pool *pp, void *v)
827 {
828 	struct pool_item *pi = v;
829 	struct pool_page_header *ph;
830 
831 	splassert(pp->pr_ipl);
832 
833 	ph = pr_find_pagehead(pp, v);
834 
835 #ifdef DIAGNOSTIC
836 	if (pool_debug) {
837 		struct pool_item *qi;
838 		XSIMPLEQ_FOREACH(qi, &ph->ph_items, pi_list) {
839 			if (pi == qi) {
840 				panic("%s: %s: double pool_put: %p", __func__,
841 				    pp->pr_wchan, pi);
842 			}
843 		}
844 	}
845 #endif /* DIAGNOSTIC */
846 
847 	pi->pi_magic = POOL_IMAGIC(ph, pi);
848 	XSIMPLEQ_INSERT_HEAD(&ph->ph_items, pi, pi_list);
849 #ifdef DIAGNOSTIC
850 	if (POOL_PHPOISON(ph))
851 		poison_mem(pi + 1, pp->pr_size - sizeof(*pi));
852 #endif /* DIAGNOSTIC */
853 
854 	if (ph->ph_nmissing-- == pp->pr_itemsperpage) {
855 		/*
856 		 * The page was previously completely full, move it to the
857 		 * partially-full list.
858 		 */
859 		TAILQ_REMOVE(&pp->pr_fullpages, ph, ph_entry);
860 		TAILQ_INSERT_TAIL(&pp->pr_partpages, ph, ph_entry);
861 	}
862 
863 	if (ph->ph_nmissing == 0) {
864 		/*
865 		 * The page is now empty, so move it to the empty page list.
866 		 */
867 		pp->pr_nidle++;
868 
869 		ph->ph_tick = ticks;
870 		TAILQ_REMOVE(&pp->pr_partpages, ph, ph_entry);
871 		TAILQ_INSERT_TAIL(&pp->pr_emptypages, ph, ph_entry);
872 		pool_update_curpage(pp);
873 	}
874 }
875 
876 /*
877  * Add N items to the pool.
878  */
879 int
880 pool_prime(struct pool *pp, int n)
881 {
882 	struct pool_pagelist pl = TAILQ_HEAD_INITIALIZER(pl);
883 	struct pool_page_header *ph;
884 	int newpages;
885 
886 	newpages = roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage;
887 
888 	while (newpages-- > 0) {
889 		int slowdown = 0;
890 
891 		ph = pool_p_alloc(pp, PR_NOWAIT, &slowdown);
892 		if (ph == NULL) /* or slowdown? */
893 			break;
894 
895 		TAILQ_INSERT_TAIL(&pl, ph, ph_entry);
896 	}
897 
898 	pl_enter(pp, &pp->pr_lock);
899 	while ((ph = TAILQ_FIRST(&pl)) != NULL) {
900 		TAILQ_REMOVE(&pl, ph, ph_entry);
901 		pool_p_insert(pp, ph);
902 	}
903 	pl_leave(pp, &pp->pr_lock);
904 
905 	return (0);
906 }
907 
908 struct pool_page_header *
909 pool_p_alloc(struct pool *pp, int flags, int *slowdown)
910 {
911 	struct pool_page_header *ph;
912 	struct pool_item *pi;
913 	caddr_t addr;
914 	unsigned int order;
915 	int o;
916 	int n;
917 
918 	pl_assert_unlocked(pp, &pp->pr_lock);
919 	KASSERT(pp->pr_size >= sizeof(*pi));
920 
921 	addr = pool_allocator_alloc(pp, flags, slowdown);
922 	if (addr == NULL)
923 		return (NULL);
924 
925 	if (POOL_INPGHDR(pp))
926 		ph = (struct pool_page_header *)(addr + pp->pr_phoffset);
927 	else {
928 		ph = pool_get(&phpool, flags);
929 		if (ph == NULL) {
930 			pool_allocator_free(pp, addr);
931 			return (NULL);
932 		}
933 	}
934 
935 	XSIMPLEQ_INIT(&ph->ph_items);
936 	ph->ph_page = addr;
937 	addr += pp->pr_align * (pp->pr_npagealloc % pp->pr_maxcolors);
938 	ph->ph_colored = addr;
939 	ph->ph_nmissing = 0;
940 	arc4random_buf(&ph->ph_magic, sizeof(ph->ph_magic));
941 #ifdef DIAGNOSTIC
942 	/* use a bit in ph_magic to record if we poison page items */
943 	if (pool_debug)
944 		SET(ph->ph_magic, POOL_MAGICBIT);
945 	else
946 		CLR(ph->ph_magic, POOL_MAGICBIT);
947 #endif /* DIAGNOSTIC */
948 
949 	n = pp->pr_itemsperpage;
950 	o = 32;
951 	while (n--) {
952 		pi = (struct pool_item *)addr;
953 		pi->pi_magic = POOL_IMAGIC(ph, pi);
954 
955 		if (o == 32) {
956 			order = arc4random();
957 			o = 0;
958 		}
959 		if (ISSET(order, 1 << o++))
960 			XSIMPLEQ_INSERT_TAIL(&ph->ph_items, pi, pi_list);
961 		else
962 			XSIMPLEQ_INSERT_HEAD(&ph->ph_items, pi, pi_list);
963 
964 #ifdef DIAGNOSTIC
965 		if (POOL_PHPOISON(ph))
966 			poison_mem(pi + 1, pp->pr_size - sizeof(*pi));
967 #endif /* DIAGNOSTIC */
968 
969 		addr += pp->pr_size;
970 	}
971 
972 	return (ph);
973 }
974 
975 void
976 pool_p_free(struct pool *pp, struct pool_page_header *ph)
977 {
978 	struct pool_item *pi;
979 
980 	pl_assert_unlocked(pp, &pp->pr_lock);
981 	KASSERT(ph->ph_nmissing == 0);
982 
983 	XSIMPLEQ_FOREACH(pi, &ph->ph_items, pi_list) {
984 		if (__predict_false(pi->pi_magic != POOL_IMAGIC(ph, pi))) {
985 			panic("%s: %s free list modified: "
986 			    "page %p; item addr %p; offset 0x%x=0x%lx",
987 			    __func__, pp->pr_wchan, ph->ph_page, pi,
988 			    0, pi->pi_magic);
989 		}
990 
991 #ifdef DIAGNOSTIC
992 		if (POOL_PHPOISON(ph)) {
993 			size_t pidx;
994 			uint32_t pval;
995 			if (poison_check(pi + 1, pp->pr_size - sizeof(*pi),
996 			    &pidx, &pval)) {
997 				int *ip = (int *)(pi + 1);
998 				panic("%s: %s free list modified: "
999 				    "page %p; item addr %p; offset 0x%zx=0x%x",
1000 				    __func__, pp->pr_wchan, ph->ph_page, pi,
1001 				    pidx * sizeof(int), ip[pidx]);
1002 			}
1003 		}
1004 #endif
1005 	}
1006 
1007 	pool_allocator_free(pp, ph->ph_page);
1008 
1009 	if (!POOL_INPGHDR(pp))
1010 		pool_put(&phpool, ph);
1011 }
1012 
1013 void
1014 pool_p_insert(struct pool *pp, struct pool_page_header *ph)
1015 {
1016 	pl_assert_locked(pp, &pp->pr_lock);
1017 
1018 	/* If the pool was depleted, point at the new page */
1019 	if (pp->pr_curpage == NULL)
1020 		pp->pr_curpage = ph;
1021 
1022 	TAILQ_INSERT_TAIL(&pp->pr_emptypages, ph, ph_entry);
1023 	if (!POOL_INPGHDR(pp))
1024 		RBT_INSERT(phtree, &pp->pr_phtree, ph);
1025 
1026 	pp->pr_nitems += pp->pr_itemsperpage;
1027 	pp->pr_nidle++;
1028 
1029 	pp->pr_npagealloc++;
1030 	if (++pp->pr_npages > pp->pr_hiwat)
1031 		pp->pr_hiwat = pp->pr_npages;
1032 }
1033 
1034 void
1035 pool_p_remove(struct pool *pp, struct pool_page_header *ph)
1036 {
1037 	pl_assert_locked(pp, &pp->pr_lock);
1038 
1039 	pp->pr_npagefree++;
1040 	pp->pr_npages--;
1041 	pp->pr_nidle--;
1042 	pp->pr_nitems -= pp->pr_itemsperpage;
1043 
1044 	if (!POOL_INPGHDR(pp))
1045 		RBT_REMOVE(phtree, &pp->pr_phtree, ph);
1046 	TAILQ_REMOVE(&pp->pr_emptypages, ph, ph_entry);
1047 
1048 	pool_update_curpage(pp);
1049 }
1050 
1051 void
1052 pool_update_curpage(struct pool *pp)
1053 {
1054 	pp->pr_curpage = TAILQ_LAST(&pp->pr_partpages, pool_pagelist);
1055 	if (pp->pr_curpage == NULL) {
1056 		pp->pr_curpage = TAILQ_LAST(&pp->pr_emptypages, pool_pagelist);
1057 	}
1058 }
1059 
1060 void
1061 pool_setlowat(struct pool *pp, int n)
1062 {
1063 	int prime = 0;
1064 
1065 	pl_enter(pp, &pp->pr_lock);
1066 	pp->pr_minitems = n;
1067 	pp->pr_minpages = (n == 0)
1068 		? 0
1069 		: roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage;
1070 
1071 	if (pp->pr_nitems < n)
1072 		prime = n - pp->pr_nitems;
1073 	pl_leave(pp, &pp->pr_lock);
1074 
1075 	if (prime > 0)
1076 		pool_prime(pp, prime);
1077 }
1078 
1079 void
1080 pool_sethiwat(struct pool *pp, int n)
1081 {
1082 	pp->pr_maxpages = (n == 0)
1083 		? 0
1084 		: roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage;
1085 }
1086 
1087 int
1088 pool_sethardlimit(struct pool *pp, u_int n, const char *warnmsg, int ratecap)
1089 {
1090 	int error = 0;
1091 
1092 	if (n < pp->pr_nout) {
1093 		error = EINVAL;
1094 		goto done;
1095 	}
1096 
1097 	pp->pr_hardlimit = n;
1098 	pp->pr_hardlimit_warning = warnmsg;
1099 	pp->pr_hardlimit_ratecap.tv_sec = ratecap;
1100 	pp->pr_hardlimit_warning_last.tv_sec = 0;
1101 	pp->pr_hardlimit_warning_last.tv_usec = 0;
1102 
1103 done:
1104 	return (error);
1105 }
1106 
1107 void
1108 pool_set_constraints(struct pool *pp, const struct kmem_pa_mode *mode)
1109 {
1110 	pp->pr_crange = mode;
1111 }
1112 
1113 /*
1114  * Release all complete pages that have not been used recently.
1115  *
1116  * Returns non-zero if any pages have been reclaimed.
1117  */
1118 int
1119 pool_reclaim(struct pool *pp)
1120 {
1121 	struct pool_page_header *ph, *phnext;
1122 	struct pool_pagelist pl = TAILQ_HEAD_INITIALIZER(pl);
1123 
1124 	pl_enter(pp, &pp->pr_lock);
1125 	for (ph = TAILQ_FIRST(&pp->pr_emptypages); ph != NULL; ph = phnext) {
1126 		phnext = TAILQ_NEXT(ph, ph_entry);
1127 
1128 		/* Check our minimum page claim */
1129 		if (pp->pr_npages <= pp->pr_minpages)
1130 			break;
1131 
1132 		/*
1133 		 * If freeing this page would put us below
1134 		 * the low water mark, stop now.
1135 		 */
1136 		if ((pp->pr_nitems - pp->pr_itemsperpage) <
1137 		    pp->pr_minitems)
1138 			break;
1139 
1140 		pool_p_remove(pp, ph);
1141 		TAILQ_INSERT_TAIL(&pl, ph, ph_entry);
1142 	}
1143 	pl_leave(pp, &pp->pr_lock);
1144 
1145 	if (TAILQ_EMPTY(&pl))
1146 		return (0);
1147 
1148 	while ((ph = TAILQ_FIRST(&pl)) != NULL) {
1149 		TAILQ_REMOVE(&pl, ph, ph_entry);
1150 		pool_p_free(pp, ph);
1151 	}
1152 
1153 	return (1);
1154 }
1155 
1156 /*
1157  * Release all complete pages that have not been used recently
1158  * from all pools.
1159  */
1160 void
1161 pool_reclaim_all(void)
1162 {
1163 	struct pool	*pp;
1164 
1165 	rw_enter_read(&pool_lock);
1166 	SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist)
1167 		pool_reclaim(pp);
1168 	rw_exit_read(&pool_lock);
1169 }
1170 
1171 #ifdef DDB
1172 #include <machine/db_machdep.h>
1173 #include <ddb/db_output.h>
1174 
1175 /*
1176  * Diagnostic helpers.
1177  */
1178 void
1179 pool_printit(struct pool *pp, const char *modif,
1180     int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2))))
1181 {
1182 	pool_print1(pp, modif, pr);
1183 }
1184 
1185 void
1186 pool_print_pagelist(struct pool_pagelist *pl,
1187     int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2))))
1188 {
1189 	struct pool_page_header *ph;
1190 	struct pool_item *pi;
1191 
1192 	TAILQ_FOREACH(ph, pl, ph_entry) {
1193 		(*pr)("\t\tpage %p, color %p, nmissing %d\n",
1194 		    ph->ph_page, ph->ph_colored, ph->ph_nmissing);
1195 		XSIMPLEQ_FOREACH(pi, &ph->ph_items, pi_list) {
1196 			if (pi->pi_magic != POOL_IMAGIC(ph, pi)) {
1197 				(*pr)("\t\t\titem %p, magic 0x%lx\n",
1198 				    pi, pi->pi_magic);
1199 			}
1200 		}
1201 	}
1202 }
1203 
1204 void
1205 pool_print1(struct pool *pp, const char *modif,
1206     int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2))))
1207 {
1208 	struct pool_page_header *ph;
1209 	int print_pagelist = 0;
1210 	char c;
1211 
1212 	while ((c = *modif++) != '\0') {
1213 		if (c == 'p')
1214 			print_pagelist = 1;
1215 		modif++;
1216 	}
1217 
1218 	(*pr)("POOL %s: size %u maxcolors %u\n", pp->pr_wchan, pp->pr_size,
1219 	    pp->pr_maxcolors);
1220 	(*pr)("\talloc %p\n", pp->pr_alloc);
1221 	(*pr)("\tminitems %u, minpages %u, maxpages %u, npages %u\n",
1222 	    pp->pr_minitems, pp->pr_minpages, pp->pr_maxpages, pp->pr_npages);
1223 	(*pr)("\titemsperpage %u, nitems %u, nout %u, hardlimit %u\n",
1224 	    pp->pr_itemsperpage, pp->pr_nitems, pp->pr_nout, pp->pr_hardlimit);
1225 
1226 	(*pr)("\n\tnget %lu, nfail %lu, nput %lu\n",
1227 	    pp->pr_nget, pp->pr_nfail, pp->pr_nput);
1228 	(*pr)("\tnpagealloc %lu, npagefree %lu, hiwat %u, nidle %lu\n",
1229 	    pp->pr_npagealloc, pp->pr_npagefree, pp->pr_hiwat, pp->pr_nidle);
1230 
1231 	if (print_pagelist == 0)
1232 		return;
1233 
1234 	if ((ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL)
1235 		(*pr)("\n\tempty page list:\n");
1236 	pool_print_pagelist(&pp->pr_emptypages, pr);
1237 	if ((ph = TAILQ_FIRST(&pp->pr_fullpages)) != NULL)
1238 		(*pr)("\n\tfull page list:\n");
1239 	pool_print_pagelist(&pp->pr_fullpages, pr);
1240 	if ((ph = TAILQ_FIRST(&pp->pr_partpages)) != NULL)
1241 		(*pr)("\n\tpartial-page list:\n");
1242 	pool_print_pagelist(&pp->pr_partpages, pr);
1243 
1244 	if (pp->pr_curpage == NULL)
1245 		(*pr)("\tno current page\n");
1246 	else
1247 		(*pr)("\tcurpage %p\n", pp->pr_curpage->ph_page);
1248 }
1249 
1250 void
1251 db_show_all_pools(db_expr_t expr, int haddr, db_expr_t count, char *modif)
1252 {
1253 	struct pool *pp;
1254 	char maxp[16];
1255 	int ovflw;
1256 	char mode;
1257 
1258 	mode = modif[0];
1259 	if (mode != '\0' && mode != 'a') {
1260 		db_printf("usage: show all pools [/a]\n");
1261 		return;
1262 	}
1263 
1264 	if (mode == '\0')
1265 		db_printf("%-10s%4s%9s%5s%9s%6s%6s%6s%6s%6s%6s%5s\n",
1266 		    "Name",
1267 		    "Size",
1268 		    "Requests",
1269 		    "Fail",
1270 		    "Releases",
1271 		    "Pgreq",
1272 		    "Pgrel",
1273 		    "Npage",
1274 		    "Hiwat",
1275 		    "Minpg",
1276 		    "Maxpg",
1277 		    "Idle");
1278 	else
1279 		db_printf("%-12s %18s %18s\n",
1280 		    "Name", "Address", "Allocator");
1281 
1282 	SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist) {
1283 		if (mode == 'a') {
1284 			db_printf("%-12s %18p %18p\n", pp->pr_wchan, pp,
1285 			    pp->pr_alloc);
1286 			continue;
1287 		}
1288 
1289 		if (!pp->pr_nget)
1290 			continue;
1291 
1292 		if (pp->pr_maxpages == UINT_MAX)
1293 			snprintf(maxp, sizeof maxp, "inf");
1294 		else
1295 			snprintf(maxp, sizeof maxp, "%u", pp->pr_maxpages);
1296 
1297 #define PRWORD(ovflw, fmt, width, fixed, val) do {	\
1298 	(ovflw) += db_printf((fmt),			\
1299 	    (width) - (fixed) - (ovflw) > 0 ?		\
1300 	    (width) - (fixed) - (ovflw) : 0,		\
1301 	    (val)) - (width);				\
1302 	if ((ovflw) < 0)				\
1303 		(ovflw) = 0;				\
1304 } while (/* CONSTCOND */0)
1305 
1306 		ovflw = 0;
1307 		PRWORD(ovflw, "%-*s", 10, 0, pp->pr_wchan);
1308 		PRWORD(ovflw, " %*u", 4, 1, pp->pr_size);
1309 		PRWORD(ovflw, " %*lu", 9, 1, pp->pr_nget);
1310 		PRWORD(ovflw, " %*lu", 5, 1, pp->pr_nfail);
1311 		PRWORD(ovflw, " %*lu", 9, 1, pp->pr_nput);
1312 		PRWORD(ovflw, " %*lu", 6, 1, pp->pr_npagealloc);
1313 		PRWORD(ovflw, " %*lu", 6, 1, pp->pr_npagefree);
1314 		PRWORD(ovflw, " %*d", 6, 1, pp->pr_npages);
1315 		PRWORD(ovflw, " %*d", 6, 1, pp->pr_hiwat);
1316 		PRWORD(ovflw, " %*d", 6, 1, pp->pr_minpages);
1317 		PRWORD(ovflw, " %*s", 6, 1, maxp);
1318 		PRWORD(ovflw, " %*lu\n", 5, 1, pp->pr_nidle);
1319 
1320 		pool_chk(pp);
1321 	}
1322 }
1323 #endif /* DDB */
1324 
1325 #if defined(POOL_DEBUG) || defined(DDB)
1326 int
1327 pool_chk_page(struct pool *pp, struct pool_page_header *ph, int expected)
1328 {
1329 	struct pool_item *pi;
1330 	caddr_t page;
1331 	int n;
1332 	const char *label = pp->pr_wchan;
1333 
1334 	page = (caddr_t)((u_long)ph & pp->pr_pgmask);
1335 	if (page != ph->ph_page && POOL_INPGHDR(pp)) {
1336 		printf("%s: ", label);
1337 		printf("pool(%p:%s): page inconsistency: page %p; "
1338 		    "at page head addr %p (p %p)\n",
1339 		    pp, pp->pr_wchan, ph->ph_page, ph, page);
1340 		return 1;
1341 	}
1342 
1343 	for (pi = XSIMPLEQ_FIRST(&ph->ph_items), n = 0;
1344 	     pi != NULL;
1345 	     pi = XSIMPLEQ_NEXT(&ph->ph_items, pi, pi_list), n++) {
1346 		if ((caddr_t)pi < ph->ph_page ||
1347 		    (caddr_t)pi >= ph->ph_page + pp->pr_pgsize) {
1348 			printf("%s: ", label);
1349 			printf("pool(%p:%s): page inconsistency: page %p;"
1350 			    " item ordinal %d; addr %p\n", pp,
1351 			    pp->pr_wchan, ph->ph_page, n, pi);
1352 			return (1);
1353 		}
1354 
1355 		if (pi->pi_magic != POOL_IMAGIC(ph, pi)) {
1356 			printf("%s: ", label);
1357 			printf("pool(%p:%s): free list modified: "
1358 			    "page %p; item ordinal %d; addr %p "
1359 			    "(p %p); offset 0x%x=0x%lx\n",
1360 			    pp, pp->pr_wchan, ph->ph_page, n, pi, page,
1361 			    0, pi->pi_magic);
1362 		}
1363 
1364 #ifdef DIAGNOSTIC
1365 		if (POOL_PHPOISON(ph)) {
1366 			size_t pidx;
1367 			uint32_t pval;
1368 			if (poison_check(pi + 1, pp->pr_size - sizeof(*pi),
1369 			    &pidx, &pval)) {
1370 				int *ip = (int *)(pi + 1);
1371 				printf("pool(%s): free list modified: "
1372 				    "page %p; item ordinal %d; addr %p "
1373 				    "(p %p); offset 0x%zx=0x%x\n",
1374 				    pp->pr_wchan, ph->ph_page, n, pi,
1375 				    page, pidx * sizeof(int), ip[pidx]);
1376 			}
1377 		}
1378 #endif /* DIAGNOSTIC */
1379 	}
1380 	if (n + ph->ph_nmissing != pp->pr_itemsperpage) {
1381 		printf("pool(%p:%s): page inconsistency: page %p;"
1382 		    " %d on list, %d missing, %d items per page\n", pp,
1383 		    pp->pr_wchan, ph->ph_page, n, ph->ph_nmissing,
1384 		    pp->pr_itemsperpage);
1385 		return 1;
1386 	}
1387 	if (expected >= 0 && n != expected) {
1388 		printf("pool(%p:%s): page inconsistency: page %p;"
1389 		    " %d on list, %d missing, %d expected\n", pp,
1390 		    pp->pr_wchan, ph->ph_page, n, ph->ph_nmissing,
1391 		    expected);
1392 		return 1;
1393 	}
1394 	return 0;
1395 }
1396 
1397 int
1398 pool_chk(struct pool *pp)
1399 {
1400 	struct pool_page_header *ph;
1401 	int r = 0;
1402 
1403 	TAILQ_FOREACH(ph, &pp->pr_emptypages, ph_entry)
1404 		r += pool_chk_page(pp, ph, pp->pr_itemsperpage);
1405 	TAILQ_FOREACH(ph, &pp->pr_fullpages, ph_entry)
1406 		r += pool_chk_page(pp, ph, 0);
1407 	TAILQ_FOREACH(ph, &pp->pr_partpages, ph_entry)
1408 		r += pool_chk_page(pp, ph, -1);
1409 
1410 	return (r);
1411 }
1412 #endif /* defined(POOL_DEBUG) || defined(DDB) */
1413 
1414 #ifdef DDB
1415 void
1416 pool_walk(struct pool *pp, int full,
1417     int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2))),
1418     void (*func)(void *, int, int (*)(const char *, ...)
1419 	    __attribute__((__format__(__kprintf__,1,2)))))
1420 {
1421 	struct pool_page_header *ph;
1422 	struct pool_item *pi;
1423 	caddr_t cp;
1424 	int n;
1425 
1426 	TAILQ_FOREACH(ph, &pp->pr_fullpages, ph_entry) {
1427 		cp = ph->ph_colored;
1428 		n = ph->ph_nmissing;
1429 
1430 		while (n--) {
1431 			func(cp, full, pr);
1432 			cp += pp->pr_size;
1433 		}
1434 	}
1435 
1436 	TAILQ_FOREACH(ph, &pp->pr_partpages, ph_entry) {
1437 		cp = ph->ph_colored;
1438 		n = ph->ph_nmissing;
1439 
1440 		do {
1441 			XSIMPLEQ_FOREACH(pi, &ph->ph_items, pi_list) {
1442 				if (cp == (caddr_t)pi)
1443 					break;
1444 			}
1445 			if (cp != (caddr_t)pi) {
1446 				func(cp, full, pr);
1447 				n--;
1448 			}
1449 
1450 			cp += pp->pr_size;
1451 		} while (n > 0);
1452 	}
1453 }
1454 #endif
1455 
1456 /*
1457  * We have three different sysctls.
1458  * kern.pool.npools - the number of pools.
1459  * kern.pool.pool.<pool#> - the pool struct for the pool#.
1460  * kern.pool.name.<pool#> - the name for pool#.
1461  */
1462 int
1463 sysctl_dopool(int *name, u_int namelen, char *oldp, size_t *oldlenp)
1464 {
1465 	struct kinfo_pool pi;
1466 	struct pool *pp;
1467 	int rv = ENOENT;
1468 
1469 	switch (name[0]) {
1470 	case KERN_POOL_NPOOLS:
1471 		if (namelen != 1)
1472 			return (ENOTDIR);
1473 		return (sysctl_rdint(oldp, oldlenp, NULL, pool_count));
1474 
1475 	case KERN_POOL_NAME:
1476 	case KERN_POOL_POOL:
1477 	case KERN_POOL_CACHE:
1478 	case KERN_POOL_CACHE_CPUS:
1479 		break;
1480 	default:
1481 		return (EOPNOTSUPP);
1482 	}
1483 
1484 	if (namelen != 2)
1485 		return (ENOTDIR);
1486 
1487 	rw_enter_read(&pool_lock);
1488 
1489 	SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist) {
1490 		if (name[1] == pp->pr_serial)
1491 			break;
1492 	}
1493 
1494 	if (pp == NULL)
1495 		goto done;
1496 
1497 	switch (name[0]) {
1498 	case KERN_POOL_NAME:
1499 		rv = sysctl_rdstring(oldp, oldlenp, NULL, pp->pr_wchan);
1500 		break;
1501 	case KERN_POOL_POOL:
1502 		memset(&pi, 0, sizeof(pi));
1503 
1504 		pl_enter(pp, &pp->pr_lock);
1505 		pi.pr_size = pp->pr_size;
1506 		pi.pr_pgsize = pp->pr_pgsize;
1507 		pi.pr_itemsperpage = pp->pr_itemsperpage;
1508 		pi.pr_npages = pp->pr_npages;
1509 		pi.pr_minpages = pp->pr_minpages;
1510 		pi.pr_maxpages = pp->pr_maxpages;
1511 		pi.pr_hardlimit = pp->pr_hardlimit;
1512 		pi.pr_nout = pp->pr_nout;
1513 		pi.pr_nitems = pp->pr_nitems;
1514 		pi.pr_nget = pp->pr_nget;
1515 		pi.pr_nput = pp->pr_nput;
1516 		pi.pr_nfail = pp->pr_nfail;
1517 		pi.pr_npagealloc = pp->pr_npagealloc;
1518 		pi.pr_npagefree = pp->pr_npagefree;
1519 		pi.pr_hiwat = pp->pr_hiwat;
1520 		pi.pr_nidle = pp->pr_nidle;
1521 		pl_leave(pp, &pp->pr_lock);
1522 
1523 		pool_cache_pool_info(pp, &pi);
1524 
1525 		rv = sysctl_rdstruct(oldp, oldlenp, NULL, &pi, sizeof(pi));
1526 		break;
1527 
1528 	case KERN_POOL_CACHE:
1529 		rv = pool_cache_info(pp, oldp, oldlenp);
1530 		break;
1531 
1532 	case KERN_POOL_CACHE_CPUS:
1533 		rv = pool_cache_cpus_info(pp, oldp, oldlenp);
1534 		break;
1535 	}
1536 
1537 done:
1538 	rw_exit_read(&pool_lock);
1539 
1540 	return (rv);
1541 }
1542 
1543 void
1544 pool_gc_sched(void *null)
1545 {
1546 	task_add(systqmp, &pool_gc_task);
1547 }
1548 
1549 void
1550 pool_gc_pages(void *null)
1551 {
1552 	struct pool *pp;
1553 	struct pool_page_header *ph, *freeph;
1554 	int s;
1555 
1556 	rw_enter_read(&pool_lock);
1557 	s = splvm(); /* XXX go to splvm until all pools _setipl properly */
1558 	SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist) {
1559 #ifdef MULTIPROCESSOR
1560 		if (pp->pr_cache != NULL)
1561 			pool_cache_gc(pp);
1562 #endif
1563 
1564 		if (pp->pr_nidle <= pp->pr_minpages || /* guess */
1565 		    !pl_enter_try(pp, &pp->pr_lock)) /* try */
1566 			continue;
1567 
1568 		/* is it time to free a page? */
1569 		if (pp->pr_nidle > pp->pr_minpages &&
1570 		    (ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL &&
1571 		    (ticks - ph->ph_tick) > (hz * pool_wait_gc)) {
1572 			freeph = ph;
1573 			pool_p_remove(pp, freeph);
1574 		} else
1575 			freeph = NULL;
1576 
1577 		pl_leave(pp, &pp->pr_lock);
1578 
1579 		if (freeph != NULL)
1580 			pool_p_free(pp, freeph);
1581 	}
1582 	splx(s);
1583 	rw_exit_read(&pool_lock);
1584 
1585 	timeout_add_sec(&pool_gc_tick, 1);
1586 }
1587 
1588 /*
1589  * Pool backend allocators.
1590  */
1591 
1592 void *
1593 pool_allocator_alloc(struct pool *pp, int flags, int *slowdown)
1594 {
1595 	void *v;
1596 
1597 	v = (*pp->pr_alloc->pa_alloc)(pp, flags, slowdown);
1598 
1599 #ifdef DIAGNOSTIC
1600 	if (v != NULL && POOL_INPGHDR(pp)) {
1601 		vaddr_t addr = (vaddr_t)v;
1602 		if ((addr & pp->pr_pgmask) != addr) {
1603 			panic("%s: %s page address %p isnt aligned to %u",
1604 			    __func__, pp->pr_wchan, v, pp->pr_pgsize);
1605 		}
1606 	}
1607 #endif
1608 
1609 	return (v);
1610 }
1611 
1612 void
1613 pool_allocator_free(struct pool *pp, void *v)
1614 {
1615 	struct pool_allocator *pa = pp->pr_alloc;
1616 
1617 	(*pa->pa_free)(pp, v);
1618 }
1619 
1620 void *
1621 pool_page_alloc(struct pool *pp, int flags, int *slowdown)
1622 {
1623 	struct kmem_dyn_mode kd = KMEM_DYN_INITIALIZER;
1624 
1625 	kd.kd_waitok = ISSET(flags, PR_WAITOK);
1626 	kd.kd_slowdown = slowdown;
1627 
1628 	return (km_alloc(pp->pr_pgsize, &kv_page, pp->pr_crange, &kd));
1629 }
1630 
1631 void
1632 pool_page_free(struct pool *pp, void *v)
1633 {
1634 	km_free(v, pp->pr_pgsize, &kv_page, pp->pr_crange);
1635 }
1636 
1637 void *
1638 pool_multi_alloc(struct pool *pp, int flags, int *slowdown)
1639 {
1640 	struct kmem_va_mode kv = kv_intrsafe;
1641 	struct kmem_dyn_mode kd = KMEM_DYN_INITIALIZER;
1642 	void *v;
1643 	int s;
1644 
1645 	if (POOL_INPGHDR(pp))
1646 		kv.kv_align = pp->pr_pgsize;
1647 
1648 	kd.kd_waitok = ISSET(flags, PR_WAITOK);
1649 	kd.kd_slowdown = slowdown;
1650 
1651 	s = splvm();
1652 	v = km_alloc(pp->pr_pgsize, &kv, pp->pr_crange, &kd);
1653 	splx(s);
1654 
1655 	return (v);
1656 }
1657 
1658 void
1659 pool_multi_free(struct pool *pp, void *v)
1660 {
1661 	struct kmem_va_mode kv = kv_intrsafe;
1662 	int s;
1663 
1664 	if (POOL_INPGHDR(pp))
1665 		kv.kv_align = pp->pr_pgsize;
1666 
1667 	s = splvm();
1668 	km_free(v, pp->pr_pgsize, &kv, pp->pr_crange);
1669 	splx(s);
1670 }
1671 
1672 void *
1673 pool_multi_alloc_ni(struct pool *pp, int flags, int *slowdown)
1674 {
1675 	struct kmem_va_mode kv = kv_any;
1676 	struct kmem_dyn_mode kd = KMEM_DYN_INITIALIZER;
1677 	void *v;
1678 
1679 	if (POOL_INPGHDR(pp))
1680 		kv.kv_align = pp->pr_pgsize;
1681 
1682 	kd.kd_waitok = ISSET(flags, PR_WAITOK);
1683 	kd.kd_slowdown = slowdown;
1684 
1685 	KERNEL_LOCK();
1686 	v = km_alloc(pp->pr_pgsize, &kv, pp->pr_crange, &kd);
1687 	KERNEL_UNLOCK();
1688 
1689 	return (v);
1690 }
1691 
1692 void
1693 pool_multi_free_ni(struct pool *pp, void *v)
1694 {
1695 	struct kmem_va_mode kv = kv_any;
1696 
1697 	if (POOL_INPGHDR(pp))
1698 		kv.kv_align = pp->pr_pgsize;
1699 
1700 	KERNEL_LOCK();
1701 	km_free(v, pp->pr_pgsize, &kv, pp->pr_crange);
1702 	KERNEL_UNLOCK();
1703 }
1704 
1705 #ifdef MULTIPROCESSOR
1706 
1707 struct pool pool_caches; /* per cpu cache entries */
1708 
1709 void
1710 pool_cache_init(struct pool *pp)
1711 {
1712 	struct cpumem *cm;
1713 	struct pool_cache *pc;
1714 	struct cpumem_iter i;
1715 
1716 	if (pool_caches.pr_size == 0) {
1717 		pool_init(&pool_caches, sizeof(struct pool_cache),
1718 		    CACHELINESIZE, IPL_NONE, PR_WAITOK | PR_RWLOCK,
1719 		    "plcache", NULL);
1720 	}
1721 
1722 	/* must be able to use the pool items as cache list items */
1723 	KASSERT(pp->pr_size >= sizeof(struct pool_cache_item));
1724 
1725 	cm = cpumem_get(&pool_caches);
1726 
1727 	pl_init(pp, &pp->pr_cache_lock);
1728 	arc4random_buf(pp->pr_cache_magic, sizeof(pp->pr_cache_magic));
1729 	TAILQ_INIT(&pp->pr_cache_lists);
1730 	pp->pr_cache_nitems = 0;
1731 	pp->pr_cache_tick = ticks;
1732 	pp->pr_cache_items = 8;
1733 	pp->pr_cache_contention = 0;
1734 	pp->pr_cache_ngc = 0;
1735 
1736 	CPUMEM_FOREACH(pc, &i, cm) {
1737 		pc->pc_actv = NULL;
1738 		pc->pc_nactv = 0;
1739 		pc->pc_prev = NULL;
1740 
1741 		pc->pc_nget = 0;
1742 		pc->pc_nfail = 0;
1743 		pc->pc_nput = 0;
1744 		pc->pc_nlget = 0;
1745 		pc->pc_nlfail = 0;
1746 		pc->pc_nlput = 0;
1747 		pc->pc_nout = 0;
1748 	}
1749 
1750 	membar_producer();
1751 
1752 	pp->pr_cache = cm;
1753 }
1754 
1755 static inline void
1756 pool_cache_item_magic(struct pool *pp, struct pool_cache_item *ci)
1757 {
1758 	unsigned long *entry = (unsigned long *)&ci->ci_nextl;
1759 
1760 	entry[0] = pp->pr_cache_magic[0] ^ (u_long)ci;
1761 	entry[1] = pp->pr_cache_magic[1] ^ (u_long)ci->ci_next;
1762 }
1763 
1764 static inline void
1765 pool_cache_item_magic_check(struct pool *pp, struct pool_cache_item *ci)
1766 {
1767 	unsigned long *entry;
1768 	unsigned long val;
1769 
1770 	entry = (unsigned long *)&ci->ci_nextl;
1771 	val = pp->pr_cache_magic[0] ^ (u_long)ci;
1772 	if (*entry != val)
1773 		goto fail;
1774 
1775 	entry++;
1776 	val = pp->pr_cache_magic[1] ^ (u_long)ci->ci_next;
1777 	if (*entry != val)
1778 		goto fail;
1779 
1780 	return;
1781 
1782 fail:
1783 	panic("%s: %s cpu free list modified: item addr %p+%zu 0x%lx!=0x%lx",
1784 	    __func__, pp->pr_wchan, ci, (caddr_t)entry - (caddr_t)ci,
1785 	    *entry, val);
1786 }
1787 
1788 static inline void
1789 pool_list_enter(struct pool *pp)
1790 {
1791 	if (pl_enter_try(pp, &pp->pr_cache_lock) == 0) {
1792 		pl_enter(pp, &pp->pr_cache_lock);
1793 		pp->pr_cache_contention++;
1794 	}
1795 }
1796 
1797 static inline void
1798 pool_list_leave(struct pool *pp)
1799 {
1800 	pl_leave(pp, &pp->pr_cache_lock);
1801 }
1802 
1803 static inline struct pool_cache_item *
1804 pool_cache_list_alloc(struct pool *pp, struct pool_cache *pc)
1805 {
1806 	struct pool_cache_item *pl;
1807 
1808 	pool_list_enter(pp);
1809 	pl = TAILQ_FIRST(&pp->pr_cache_lists);
1810 	if (pl != NULL) {
1811 		TAILQ_REMOVE(&pp->pr_cache_lists, pl, ci_nextl);
1812 		pp->pr_cache_nitems -= POOL_CACHE_ITEM_NITEMS(pl);
1813 
1814 		pool_cache_item_magic(pp, pl);
1815 
1816 		pc->pc_nlget++;
1817 	} else
1818 		pc->pc_nlfail++;
1819 
1820 	/* fold this cpus nout into the global while we have the lock */
1821 	pp->pr_cache_nout += pc->pc_nout;
1822 	pc->pc_nout = 0;
1823 	pool_list_leave(pp);
1824 
1825 	return (pl);
1826 }
1827 
1828 static inline void
1829 pool_cache_list_free(struct pool *pp, struct pool_cache *pc,
1830     struct pool_cache_item *ci)
1831 {
1832 	pool_list_enter(pp);
1833 	if (TAILQ_EMPTY(&pp->pr_cache_lists))
1834 		pp->pr_cache_tick = ticks;
1835 
1836 	pp->pr_cache_nitems += POOL_CACHE_ITEM_NITEMS(ci);
1837 	TAILQ_INSERT_TAIL(&pp->pr_cache_lists, ci, ci_nextl);
1838 
1839 	pc->pc_nlput++;
1840 
1841 	/* fold this cpus nout into the global while we have the lock */
1842 	pp->pr_cache_nout += pc->pc_nout;
1843 	pc->pc_nout = 0;
1844 	pool_list_leave(pp);
1845 }
1846 
1847 static inline struct pool_cache *
1848 pool_cache_enter(struct pool *pp, int *s)
1849 {
1850 	struct pool_cache *pc;
1851 
1852 	pc = cpumem_enter(pp->pr_cache);
1853 	*s = splraise(pp->pr_ipl);
1854 	pc->pc_gen++;
1855 
1856 	return (pc);
1857 }
1858 
1859 static inline void
1860 pool_cache_leave(struct pool *pp, struct pool_cache *pc, int s)
1861 {
1862 	pc->pc_gen++;
1863 	splx(s);
1864 	cpumem_leave(pp->pr_cache, pc);
1865 }
1866 
1867 void *
1868 pool_cache_get(struct pool *pp)
1869 {
1870 	struct pool_cache *pc;
1871 	struct pool_cache_item *ci;
1872 	int s;
1873 
1874 	pc = pool_cache_enter(pp, &s);
1875 
1876 	if (pc->pc_actv != NULL) {
1877 		ci = pc->pc_actv;
1878 	} else if (pc->pc_prev != NULL) {
1879 		ci = pc->pc_prev;
1880 		pc->pc_prev = NULL;
1881 	} else if ((ci = pool_cache_list_alloc(pp, pc)) == NULL) {
1882 		pc->pc_nfail++;
1883 		goto done;
1884 	}
1885 
1886 	pool_cache_item_magic_check(pp, ci);
1887 #ifdef DIAGNOSTIC
1888 	if (pool_debug && POOL_CACHE_ITEM_POISONED(ci)) {
1889 		size_t pidx;
1890 		uint32_t pval;
1891 
1892 		if (poison_check(ci + 1, pp->pr_size - sizeof(*ci),
1893 		    &pidx, &pval)) {
1894 			int *ip = (int *)(ci + 1);
1895 			ip += pidx;
1896 
1897 			panic("%s: %s cpu free list modified: "
1898 			    "item addr %p+%zu 0x%x!=0x%x",
1899 			    __func__, pp->pr_wchan, ci,
1900 			    (caddr_t)ip - (caddr_t)ci, *ip, pval);
1901 		}
1902 	}
1903 #endif
1904 
1905 	pc->pc_actv = ci->ci_next;
1906 	pc->pc_nactv = POOL_CACHE_ITEM_NITEMS(ci) - 1;
1907 	pc->pc_nget++;
1908 	pc->pc_nout++;
1909 
1910 done:
1911 	pool_cache_leave(pp, pc, s);
1912 
1913 	return (ci);
1914 }
1915 
1916 void
1917 pool_cache_put(struct pool *pp, void *v)
1918 {
1919 	struct pool_cache *pc;
1920 	struct pool_cache_item *ci = v;
1921 	unsigned long nitems;
1922 	int s;
1923 #ifdef DIAGNOSTIC
1924 	int poison = pool_debug && pp->pr_size > sizeof(*ci);
1925 
1926 	if (poison)
1927 		poison_mem(ci + 1, pp->pr_size - sizeof(*ci));
1928 #endif
1929 
1930 	pc = pool_cache_enter(pp, &s);
1931 
1932 	nitems = pc->pc_nactv;
1933 	if (nitems >= pp->pr_cache_items) {
1934 		if (pc->pc_prev != NULL)
1935 			pool_cache_list_free(pp, pc, pc->pc_prev);
1936 
1937 		pc->pc_prev = pc->pc_actv;
1938 
1939 		pc->pc_actv = NULL;
1940 		pc->pc_nactv = 0;
1941 		nitems = 0;
1942 	}
1943 
1944 	ci->ci_next = pc->pc_actv;
1945 	ci->ci_nitems = ++nitems;
1946 #ifdef DIAGNOSTIC
1947 	ci->ci_nitems |= poison ? POOL_CACHE_ITEM_NITEMS_POISON : 0;
1948 #endif
1949 	pool_cache_item_magic(pp, ci);
1950 
1951 	pc->pc_actv = ci;
1952 	pc->pc_nactv = nitems;
1953 
1954 	pc->pc_nput++;
1955 	pc->pc_nout--;
1956 
1957 	pool_cache_leave(pp, pc, s);
1958 }
1959 
1960 struct pool_cache_item *
1961 pool_cache_list_put(struct pool *pp, struct pool_cache_item *pl)
1962 {
1963 	struct pool_cache_item *rpl, *next;
1964 
1965 	if (pl == NULL)
1966 		return (NULL);
1967 
1968 	rpl = TAILQ_NEXT(pl, ci_nextl);
1969 
1970 	pl_enter(pp, &pp->pr_lock);
1971 	do {
1972 		next = pl->ci_next;
1973 		pool_do_put(pp, pl);
1974 		pl = next;
1975 	} while (pl != NULL);
1976 	pl_leave(pp, &pp->pr_lock);
1977 
1978 	return (rpl);
1979 }
1980 
1981 void
1982 pool_cache_destroy(struct pool *pp)
1983 {
1984 	struct pool_cache *pc;
1985 	struct pool_cache_item *pl;
1986 	struct cpumem_iter i;
1987 	struct cpumem *cm;
1988 
1989 	rw_enter_write(&pool_lock); /* serialise with the gc */
1990 	cm = pp->pr_cache;
1991 	pp->pr_cache = NULL; /* make pool_put avoid the cache */
1992 	rw_exit_write(&pool_lock);
1993 
1994 	CPUMEM_FOREACH(pc, &i, cm) {
1995 		pool_cache_list_put(pp, pc->pc_actv);
1996 		pool_cache_list_put(pp, pc->pc_prev);
1997 	}
1998 
1999 	cpumem_put(&pool_caches, cm);
2000 
2001 	pl = TAILQ_FIRST(&pp->pr_cache_lists);
2002 	while (pl != NULL)
2003 		pl = pool_cache_list_put(pp, pl);
2004 }
2005 
2006 void
2007 pool_cache_gc(struct pool *pp)
2008 {
2009 	unsigned int contention, delta;
2010 
2011 	if ((ticks - pp->pr_cache_tick) > (hz * pool_wait_gc) &&
2012 	    !TAILQ_EMPTY(&pp->pr_cache_lists) &&
2013 	    pl_enter_try(pp, &pp->pr_cache_lock)) {
2014 		struct pool_cache_item *pl = NULL;
2015 
2016 		pl = TAILQ_FIRST(&pp->pr_cache_lists);
2017 		if (pl != NULL) {
2018 			TAILQ_REMOVE(&pp->pr_cache_lists, pl, ci_nextl);
2019 			pp->pr_cache_nitems -= POOL_CACHE_ITEM_NITEMS(pl);
2020 			pp->pr_cache_tick = ticks;
2021 
2022 			pp->pr_cache_ngc++;
2023 		}
2024 
2025 		pl_leave(pp, &pp->pr_cache_lock);
2026 
2027 		pool_cache_list_put(pp, pl);
2028 	}
2029 
2030 	/*
2031 	 * if there's a lot of contention on the pr_cache_mtx then consider
2032 	 * growing the length of the list to reduce the need to access the
2033 	 * global pool.
2034 	 */
2035 
2036 	contention = pp->pr_cache_contention;
2037 	delta = contention - pp->pr_cache_contention_prev;
2038 	if (delta > 8 /* magic */) {
2039 		if ((ncpusfound * 8 * 2) <= pp->pr_cache_nitems)
2040 			pp->pr_cache_items += 8;
2041 	} else if (delta == 0) {
2042 		if (pp->pr_cache_items > 8)
2043 			pp->pr_cache_items--;
2044 	}
2045 	pp->pr_cache_contention_prev = contention;
2046 }
2047 
2048 void
2049 pool_cache_pool_info(struct pool *pp, struct kinfo_pool *pi)
2050 {
2051 	struct pool_cache *pc;
2052 	struct cpumem_iter i;
2053 
2054 	if (pp->pr_cache == NULL)
2055 		return;
2056 
2057 	/* loop through the caches twice to collect stats */
2058 
2059 	/* once without the lock so we can yield while reading nget/nput */
2060 	CPUMEM_FOREACH(pc, &i, pp->pr_cache) {
2061 		uint64_t gen, nget, nput;
2062 
2063 		do {
2064 			while ((gen = pc->pc_gen) & 1)
2065 				yield();
2066 
2067 			nget = pc->pc_nget;
2068 			nput = pc->pc_nput;
2069 		} while (gen != pc->pc_gen);
2070 
2071 		pi->pr_nget += nget;
2072 		pi->pr_nput += nput;
2073 	}
2074 
2075 	/* and once with the mtx so we can get consistent nout values */
2076 	pl_enter(pp, &pp->pr_cache_lock);
2077 	CPUMEM_FOREACH(pc, &i, pp->pr_cache)
2078 		pi->pr_nout += pc->pc_nout;
2079 
2080 	pi->pr_nout += pp->pr_cache_nout;
2081 	pl_leave(pp, &pp->pr_cache_lock);
2082 }
2083 
2084 int
2085 pool_cache_info(struct pool *pp, void *oldp, size_t *oldlenp)
2086 {
2087 	struct kinfo_pool_cache kpc;
2088 
2089 	if (pp->pr_cache == NULL)
2090 		return (EOPNOTSUPP);
2091 
2092 	memset(&kpc, 0, sizeof(kpc)); /* don't leak padding */
2093 
2094 	pl_enter(pp, &pp->pr_cache_lock);
2095 	kpc.pr_ngc = pp->pr_cache_ngc;
2096 	kpc.pr_len = pp->pr_cache_items;
2097 	kpc.pr_nitems = pp->pr_cache_nitems;
2098 	kpc.pr_contention = pp->pr_cache_contention;
2099 	pl_leave(pp, &pp->pr_cache_lock);
2100 
2101 	return (sysctl_rdstruct(oldp, oldlenp, NULL, &kpc, sizeof(kpc)));
2102 }
2103 
2104 int
2105 pool_cache_cpus_info(struct pool *pp, void *oldp, size_t *oldlenp)
2106 {
2107 	struct pool_cache *pc;
2108 	struct kinfo_pool_cache_cpu *kpcc, *info;
2109 	unsigned int cpu = 0;
2110 	struct cpumem_iter i;
2111 	int error = 0;
2112 	size_t len;
2113 
2114 	if (pp->pr_cache == NULL)
2115 		return (EOPNOTSUPP);
2116 	if (*oldlenp % sizeof(*kpcc))
2117 		return (EINVAL);
2118 
2119 	kpcc = mallocarray(ncpusfound, sizeof(*kpcc), M_TEMP,
2120 	    M_WAITOK|M_CANFAIL|M_ZERO);
2121 	if (kpcc == NULL)
2122 		return (EIO);
2123 
2124 	len = ncpusfound * sizeof(*kpcc);
2125 
2126 	CPUMEM_FOREACH(pc, &i, pp->pr_cache) {
2127 		uint64_t gen;
2128 
2129 		if (cpu >= ncpusfound) {
2130 			error = EIO;
2131 			goto err;
2132 		}
2133 
2134 		info = &kpcc[cpu];
2135 		info->pr_cpu = cpu;
2136 
2137 		do {
2138 			while ((gen = pc->pc_gen) & 1)
2139 				yield();
2140 
2141 			info->pr_nget = pc->pc_nget;
2142 			info->pr_nfail = pc->pc_nfail;
2143 			info->pr_nput = pc->pc_nput;
2144 			info->pr_nlget = pc->pc_nlget;
2145 			info->pr_nlfail = pc->pc_nlfail;
2146 			info->pr_nlput = pc->pc_nlput;
2147 		} while (gen != pc->pc_gen);
2148 
2149 		cpu++;
2150 	}
2151 
2152 	error = sysctl_rdstruct(oldp, oldlenp, NULL, kpcc, len);
2153 err:
2154 	free(kpcc, M_TEMP, len);
2155 
2156 	return (error);
2157 }
2158 #else /* MULTIPROCESSOR */
2159 void
2160 pool_cache_init(struct pool *pp)
2161 {
2162 	/* nop */
2163 }
2164 
2165 void
2166 pool_cache_pool_info(struct pool *pp, struct kinfo_pool *pi)
2167 {
2168 	/* nop */
2169 }
2170 
2171 int
2172 pool_cache_info(struct pool *pp, void *oldp, size_t *oldlenp)
2173 {
2174 	return (EOPNOTSUPP);
2175 }
2176 
2177 int
2178 pool_cache_cpus_info(struct pool *pp, void *oldp, size_t *oldlenp)
2179 {
2180 	return (EOPNOTSUPP);
2181 }
2182 #endif /* MULTIPROCESSOR */
2183 
2184 
2185 void
2186 pool_lock_mtx_init(struct pool *pp, union pool_lock *lock,
2187     const struct lock_type *type)
2188 {
2189 	_mtx_init_flags(&lock->prl_mtx, pp->pr_ipl, pp->pr_wchan, 0, type);
2190 }
2191 
2192 void
2193 pool_lock_mtx_enter(union pool_lock *lock)
2194 {
2195 	mtx_enter(&lock->prl_mtx);
2196 }
2197 
2198 int
2199 pool_lock_mtx_enter_try(union pool_lock *lock)
2200 {
2201 	return (mtx_enter_try(&lock->prl_mtx));
2202 }
2203 
2204 void
2205 pool_lock_mtx_leave(union pool_lock *lock)
2206 {
2207 	mtx_leave(&lock->prl_mtx);
2208 }
2209 
2210 void
2211 pool_lock_mtx_assert_locked(union pool_lock *lock)
2212 {
2213 	MUTEX_ASSERT_LOCKED(&lock->prl_mtx);
2214 }
2215 
2216 void
2217 pool_lock_mtx_assert_unlocked(union pool_lock *lock)
2218 {
2219 	MUTEX_ASSERT_UNLOCKED(&lock->prl_mtx);
2220 }
2221 
2222 int
2223 pool_lock_mtx_sleep(void *ident, union pool_lock *lock, int priority,
2224     const char *wmesg, int timo)
2225 {
2226 	return msleep(ident, &lock->prl_mtx, priority, wmesg, timo);
2227 }
2228 
2229 static const struct pool_lock_ops pool_lock_ops_mtx = {
2230 	pool_lock_mtx_init,
2231 	pool_lock_mtx_enter,
2232 	pool_lock_mtx_enter_try,
2233 	pool_lock_mtx_leave,
2234 	pool_lock_mtx_assert_locked,
2235 	pool_lock_mtx_assert_unlocked,
2236 	pool_lock_mtx_sleep,
2237 };
2238 
2239 void
2240 pool_lock_rw_init(struct pool *pp, union pool_lock *lock,
2241     const struct lock_type *type)
2242 {
2243 	_rw_init_flags(&lock->prl_rwlock, pp->pr_wchan, 0, type);
2244 }
2245 
2246 void
2247 pool_lock_rw_enter(union pool_lock *lock)
2248 {
2249 	rw_enter_write(&lock->prl_rwlock);
2250 }
2251 
2252 int
2253 pool_lock_rw_enter_try(union pool_lock *lock)
2254 {
2255 	return (rw_enter(&lock->prl_rwlock, RW_WRITE | RW_NOSLEEP) == 0);
2256 }
2257 
2258 void
2259 pool_lock_rw_leave(union pool_lock *lock)
2260 {
2261 	rw_exit_write(&lock->prl_rwlock);
2262 }
2263 
2264 void
2265 pool_lock_rw_assert_locked(union pool_lock *lock)
2266 {
2267 	rw_assert_wrlock(&lock->prl_rwlock);
2268 }
2269 
2270 void
2271 pool_lock_rw_assert_unlocked(union pool_lock *lock)
2272 {
2273 	KASSERT(rw_status(&lock->prl_rwlock) != RW_WRITE);
2274 }
2275 
2276 int
2277 pool_lock_rw_sleep(void *ident, union pool_lock *lock, int priority,
2278     const char *wmesg, int timo)
2279 {
2280 	return rwsleep(ident, &lock->prl_rwlock, priority, wmesg, timo);
2281 }
2282 
2283 static const struct pool_lock_ops pool_lock_ops_rw = {
2284 	pool_lock_rw_init,
2285 	pool_lock_rw_enter,
2286 	pool_lock_rw_enter_try,
2287 	pool_lock_rw_leave,
2288 	pool_lock_rw_assert_locked,
2289 	pool_lock_rw_assert_unlocked,
2290 	pool_lock_rw_sleep,
2291 };
2292