xref: /openbsd-src/sys/kern/subr_pool.c (revision 24bb5fcea3ed904bc467217bdaadb5dfc618d5bf)
1 /*	$OpenBSD: subr_pool.c,v 1.234 2021/06/15 05:24:46 dlg Exp $	*/
2 /*	$NetBSD: subr_pool.c,v 1.61 2001/09/26 07:14:56 chs Exp $	*/
3 
4 /*-
5  * Copyright (c) 1997, 1999, 2000 The NetBSD Foundation, Inc.
6  * All rights reserved.
7  *
8  * This code is derived from software contributed to The NetBSD Foundation
9  * by Paul Kranenburg; by Jason R. Thorpe of the Numerical Aerospace
10  * Simulation Facility, NASA Ames Research Center.
11  *
12  * Redistribution and use in source and binary forms, with or without
13  * modification, are permitted provided that the following conditions
14  * are met:
15  * 1. Redistributions of source code must retain the above copyright
16  *    notice, this list of conditions and the following disclaimer.
17  * 2. Redistributions in binary form must reproduce the above copyright
18  *    notice, this list of conditions and the following disclaimer in the
19  *    documentation and/or other materials provided with the distribution.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
23  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
24  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
25  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31  * POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/errno.h>
37 #include <sys/kernel.h>
38 #include <sys/malloc.h>
39 #include <sys/pool.h>
40 #include <sys/proc.h>
41 #include <sys/syslog.h>
42 #include <sys/sysctl.h>
43 #include <sys/task.h>
44 #include <sys/time.h>
45 #include <sys/timeout.h>
46 #include <sys/percpu.h>
47 #include <sys/tracepoint.h>
48 
49 #include <uvm/uvm_extern.h>
50 
51 /*
52  * Pool resource management utility.
53  *
54  * Memory is allocated in pages which are split into pieces according to
55  * the pool item size. Each page is kept on one of three lists in the
56  * pool structure: `pr_emptypages', `pr_fullpages' and `pr_partpages',
57  * for empty, full and partially-full pages respectively. The individual
58  * pool items are on a linked list headed by `ph_items' in each page
59  * header. The memory for building the page list is either taken from
60  * the allocated pages themselves (for small pool items) or taken from
61  * an internal pool of page headers (`phpool').
62  */
63 
64 /* List of all pools */
65 SIMPLEQ_HEAD(,pool) pool_head = SIMPLEQ_HEAD_INITIALIZER(pool_head);
66 
67 /*
68  * Every pool gets a unique serial number assigned to it. If this counter
69  * wraps, we're screwed, but we shouldn't create so many pools anyway.
70  */
71 unsigned int pool_serial;
72 unsigned int pool_count;
73 
74 /* Lock the previous variables making up the global pool state */
75 struct rwlock pool_lock = RWLOCK_INITIALIZER("pools");
76 
77 /* Private pool for page header structures */
78 struct pool phpool;
79 
80 struct pool_lock_ops {
81 	void	(*pl_init)(struct pool *, union pool_lock *,
82 		    const struct lock_type *);
83 	void	(*pl_enter)(union pool_lock *);
84 	int	(*pl_enter_try)(union pool_lock *);
85 	void	(*pl_leave)(union pool_lock *);
86 	void	(*pl_assert_locked)(union pool_lock *);
87 	void	(*pl_assert_unlocked)(union pool_lock *);
88 	int	(*pl_sleep)(void *, union pool_lock *, int, const char *);
89 };
90 
91 static const struct pool_lock_ops pool_lock_ops_mtx;
92 static const struct pool_lock_ops pool_lock_ops_rw;
93 
94 #ifdef WITNESS
95 #define pl_init(pp, pl) do {						\
96 	static const struct lock_type __lock_type = { .lt_name = #pl };	\
97 	(pp)->pr_lock_ops->pl_init(pp, pl, &__lock_type);		\
98 } while (0)
99 #else /* WITNESS */
100 #define pl_init(pp, pl)		(pp)->pr_lock_ops->pl_init(pp, pl, NULL)
101 #endif /* WITNESS */
102 
103 static inline void
104 pl_enter(struct pool *pp, union pool_lock *pl)
105 {
106 	pp->pr_lock_ops->pl_enter(pl);
107 }
108 static inline int
109 pl_enter_try(struct pool *pp, union pool_lock *pl)
110 {
111 	return pp->pr_lock_ops->pl_enter_try(pl);
112 }
113 static inline void
114 pl_leave(struct pool *pp, union pool_lock *pl)
115 {
116 	pp->pr_lock_ops->pl_leave(pl);
117 }
118 static inline void
119 pl_assert_locked(struct pool *pp, union pool_lock *pl)
120 {
121 	pp->pr_lock_ops->pl_assert_locked(pl);
122 }
123 static inline void
124 pl_assert_unlocked(struct pool *pp, union pool_lock *pl)
125 {
126 	pp->pr_lock_ops->pl_assert_unlocked(pl);
127 }
128 static inline int
129 pl_sleep(struct pool *pp, void *ident, union pool_lock *lock, int priority,
130     const char *wmesg)
131 {
132 	return pp->pr_lock_ops->pl_sleep(ident, lock, priority, wmesg);
133 }
134 
135 struct pool_item {
136 	u_long				pi_magic;
137 	XSIMPLEQ_ENTRY(pool_item)	pi_list;
138 };
139 #define POOL_IMAGIC(ph, pi) ((u_long)(pi) ^ (ph)->ph_magic)
140 
141 struct pool_page_header {
142 	/* Page headers */
143 	TAILQ_ENTRY(pool_page_header)
144 				ph_entry;	/* pool page list */
145 	XSIMPLEQ_HEAD(, pool_item)
146 				ph_items;	/* free items on the page */
147 	RBT_ENTRY(pool_page_header)
148 				ph_node;	/* off-page page headers */
149 	unsigned int		ph_nmissing;	/* # of chunks in use */
150 	caddr_t			ph_page;	/* this page's address */
151 	caddr_t			ph_colored;	/* page's colored address */
152 	unsigned long		ph_magic;
153 	uint64_t		ph_timestamp;
154 };
155 #define POOL_MAGICBIT (1 << 3) /* keep away from perturbed low bits */
156 #define POOL_PHPOISON(ph) ISSET((ph)->ph_magic, POOL_MAGICBIT)
157 
158 #ifdef MULTIPROCESSOR
159 struct pool_cache_item {
160 	struct pool_cache_item	*ci_next;	/* next item in list */
161 	unsigned long		 ci_nitems;	/* number of items in list */
162 	TAILQ_ENTRY(pool_cache_item)
163 				 ci_nextl;	/* entry in list of lists */
164 };
165 
166 /* we store whether the cached item is poisoned in the high bit of nitems */
167 #define POOL_CACHE_ITEM_NITEMS_MASK	0x7ffffffUL
168 #define POOL_CACHE_ITEM_NITEMS_POISON	0x8000000UL
169 
170 #define POOL_CACHE_ITEM_NITEMS(_ci)					\
171     ((_ci)->ci_nitems & POOL_CACHE_ITEM_NITEMS_MASK)
172 
173 #define POOL_CACHE_ITEM_POISONED(_ci)					\
174     ISSET((_ci)->ci_nitems, POOL_CACHE_ITEM_NITEMS_POISON)
175 
176 struct pool_cache {
177 	struct pool_cache_item	*pc_actv;	/* active list of items */
178 	unsigned long		 pc_nactv;	/* actv head nitems cache */
179 	struct pool_cache_item	*pc_prev;	/* previous list of items */
180 
181 	uint64_t		 pc_gen;	/* generation number */
182 	uint64_t		 pc_nget;	/* # of successful requests */
183 	uint64_t		 pc_nfail;	/* # of unsuccessful reqs */
184 	uint64_t		 pc_nput;	/* # of releases */
185 	uint64_t		 pc_nlget;	/* # of list requests */
186 	uint64_t		 pc_nlfail;	/* # of fails getting a list */
187 	uint64_t		 pc_nlput;	/* # of list releases */
188 
189 	int			 pc_nout;
190 };
191 
192 void	*pool_cache_get(struct pool *);
193 void	 pool_cache_put(struct pool *, void *);
194 void	 pool_cache_destroy(struct pool *);
195 void	 pool_cache_gc(struct pool *);
196 #endif
197 void	 pool_cache_pool_info(struct pool *, struct kinfo_pool *);
198 int	 pool_cache_info(struct pool *, void *, size_t *);
199 int	 pool_cache_cpus_info(struct pool *, void *, size_t *);
200 
201 #ifdef POOL_DEBUG
202 int	pool_debug = 1;
203 #else
204 int	pool_debug = 0;
205 #endif
206 
207 #define POOL_INPGHDR(pp) ((pp)->pr_phoffset != 0)
208 
209 struct pool_page_header *
210 	 pool_p_alloc(struct pool *, int, int *);
211 void	 pool_p_insert(struct pool *, struct pool_page_header *);
212 void	 pool_p_remove(struct pool *, struct pool_page_header *);
213 void	 pool_p_free(struct pool *, struct pool_page_header *);
214 
215 void	 pool_update_curpage(struct pool *);
216 void	*pool_do_get(struct pool *, int, int *);
217 void	 pool_do_put(struct pool *, void *);
218 int	 pool_chk_page(struct pool *, struct pool_page_header *, int);
219 int	 pool_chk(struct pool *);
220 void	 pool_get_done(struct pool *, void *, void *);
221 void	 pool_runqueue(struct pool *, int);
222 
223 void	*pool_allocator_alloc(struct pool *, int, int *);
224 void	 pool_allocator_free(struct pool *, void *);
225 
226 /*
227  * The default pool allocator.
228  */
229 void	*pool_page_alloc(struct pool *, int, int *);
230 void	pool_page_free(struct pool *, void *);
231 
232 /*
233  * safe for interrupts; this is the default allocator
234  */
235 struct pool_allocator pool_allocator_single = {
236 	pool_page_alloc,
237 	pool_page_free,
238 	POOL_ALLOC_SIZE(PAGE_SIZE, POOL_ALLOC_ALIGNED)
239 };
240 
241 void	*pool_multi_alloc(struct pool *, int, int *);
242 void	pool_multi_free(struct pool *, void *);
243 
244 struct pool_allocator pool_allocator_multi = {
245 	pool_multi_alloc,
246 	pool_multi_free,
247 	POOL_ALLOC_SIZES(PAGE_SIZE, (1UL << 31), POOL_ALLOC_ALIGNED)
248 };
249 
250 void	*pool_multi_alloc_ni(struct pool *, int, int *);
251 void	pool_multi_free_ni(struct pool *, void *);
252 
253 struct pool_allocator pool_allocator_multi_ni = {
254 	pool_multi_alloc_ni,
255 	pool_multi_free_ni,
256 	POOL_ALLOC_SIZES(PAGE_SIZE, (1UL << 31), POOL_ALLOC_ALIGNED)
257 };
258 
259 #ifdef DDB
260 void	 pool_print_pagelist(struct pool_pagelist *, int (*)(const char *, ...)
261 	     __attribute__((__format__(__kprintf__,1,2))));
262 void	 pool_print1(struct pool *, const char *, int (*)(const char *, ...)
263 	     __attribute__((__format__(__kprintf__,1,2))));
264 #endif
265 
266 /* stale page garbage collectors */
267 void	pool_gc_sched(void *);
268 struct timeout pool_gc_tick = TIMEOUT_INITIALIZER(pool_gc_sched, NULL);
269 void	pool_gc_pages(void *);
270 struct task pool_gc_task = TASK_INITIALIZER(pool_gc_pages, NULL);
271 
272 #define POOL_WAIT_FREE	SEC_TO_NSEC(1)
273 #define POOL_WAIT_GC	SEC_TO_NSEC(8)
274 
275 RBT_PROTOTYPE(phtree, pool_page_header, ph_node, phtree_compare);
276 
277 static inline int
278 phtree_compare(const struct pool_page_header *a,
279     const struct pool_page_header *b)
280 {
281 	vaddr_t va = (vaddr_t)a->ph_page;
282 	vaddr_t vb = (vaddr_t)b->ph_page;
283 
284 	/* the compares in this order are important for the NFIND to work */
285 	if (vb < va)
286 		return (-1);
287 	if (vb > va)
288 		return (1);
289 
290 	return (0);
291 }
292 
293 RBT_GENERATE(phtree, pool_page_header, ph_node, phtree_compare);
294 
295 /*
296  * Return the pool page header based on page address.
297  */
298 static inline struct pool_page_header *
299 pr_find_pagehead(struct pool *pp, void *v)
300 {
301 	struct pool_page_header *ph, key;
302 
303 	if (POOL_INPGHDR(pp)) {
304 		caddr_t page;
305 
306 		page = (caddr_t)((vaddr_t)v & pp->pr_pgmask);
307 
308 		return ((struct pool_page_header *)(page + pp->pr_phoffset));
309 	}
310 
311 	key.ph_page = v;
312 	ph = RBT_NFIND(phtree, &pp->pr_phtree, &key);
313 	if (ph == NULL)
314 		panic("%s: %s: page header missing", __func__, pp->pr_wchan);
315 
316 	KASSERT(ph->ph_page <= (caddr_t)v);
317 	if (ph->ph_page + pp->pr_pgsize <= (caddr_t)v)
318 		panic("%s: %s: incorrect page", __func__, pp->pr_wchan);
319 
320 	return (ph);
321 }
322 
323 /*
324  * Initialize the given pool resource structure.
325  *
326  * We export this routine to allow other kernel parts to declare
327  * static pools that must be initialized before malloc() is available.
328  */
329 void
330 pool_init(struct pool *pp, size_t size, u_int align, int ipl, int flags,
331     const char *wchan, struct pool_allocator *palloc)
332 {
333 	int off = 0, space;
334 	unsigned int pgsize = PAGE_SIZE, items;
335 	size_t pa_pagesz;
336 #ifdef DIAGNOSTIC
337 	struct pool *iter;
338 #endif
339 
340 	if (align == 0)
341 		align = ALIGN(1);
342 
343 	if (size < sizeof(struct pool_item))
344 		size = sizeof(struct pool_item);
345 
346 	size = roundup(size, align);
347 
348 	while (size * 8 > pgsize)
349 		pgsize <<= 1;
350 
351 	if (palloc == NULL) {
352 		if (pgsize > PAGE_SIZE) {
353 			palloc = ISSET(flags, PR_WAITOK) ?
354 			    &pool_allocator_multi_ni : &pool_allocator_multi;
355 		} else
356 			palloc = &pool_allocator_single;
357 
358 		pa_pagesz = palloc->pa_pagesz;
359 	} else {
360 		size_t pgsizes;
361 
362 		pa_pagesz = palloc->pa_pagesz;
363 		if (pa_pagesz == 0)
364 			pa_pagesz = POOL_ALLOC_DEFAULT;
365 
366 		pgsizes = pa_pagesz & ~POOL_ALLOC_ALIGNED;
367 
368 		/* make sure the allocator can fit at least one item */
369 		if (size > pgsizes) {
370 			panic("%s: pool %s item size 0x%zx > "
371 			    "allocator %p sizes 0x%zx", __func__, wchan,
372 			    size, palloc, pgsizes);
373 		}
374 
375 		/* shrink pgsize until it fits into the range */
376 		while (!ISSET(pgsizes, pgsize))
377 			pgsize >>= 1;
378 	}
379 	KASSERT(ISSET(pa_pagesz, pgsize));
380 
381 	items = pgsize / size;
382 
383 	/*
384 	 * Decide whether to put the page header off page to avoid
385 	 * wasting too large a part of the page. Off-page page headers
386 	 * go into an RB tree, so we can match a returned item with
387 	 * its header based on the page address.
388 	 */
389 	if (ISSET(pa_pagesz, POOL_ALLOC_ALIGNED)) {
390 		if (pgsize - (size * items) >
391 		    sizeof(struct pool_page_header)) {
392 			off = pgsize - sizeof(struct pool_page_header);
393 		} else if (sizeof(struct pool_page_header) * 2 >= size) {
394 			off = pgsize - sizeof(struct pool_page_header);
395 			items = off / size;
396 		}
397 	}
398 
399 	KASSERT(items > 0);
400 
401 	/*
402 	 * Initialize the pool structure.
403 	 */
404 	memset(pp, 0, sizeof(*pp));
405 	if (ISSET(flags, PR_RWLOCK)) {
406 		KASSERT(flags & PR_WAITOK);
407 		pp->pr_lock_ops = &pool_lock_ops_rw;
408 	} else
409 		pp->pr_lock_ops = &pool_lock_ops_mtx;
410 	TAILQ_INIT(&pp->pr_emptypages);
411 	TAILQ_INIT(&pp->pr_fullpages);
412 	TAILQ_INIT(&pp->pr_partpages);
413 	pp->pr_curpage = NULL;
414 	pp->pr_npages = 0;
415 	pp->pr_minitems = 0;
416 	pp->pr_minpages = 0;
417 	pp->pr_maxpages = 8;
418 	pp->pr_size = size;
419 	pp->pr_pgsize = pgsize;
420 	pp->pr_pgmask = ~0UL ^ (pgsize - 1);
421 	pp->pr_phoffset = off;
422 	pp->pr_itemsperpage = items;
423 	pp->pr_wchan = wchan;
424 	pp->pr_alloc = palloc;
425 	pp->pr_nitems = 0;
426 	pp->pr_nout = 0;
427 	pp->pr_hardlimit = UINT_MAX;
428 	pp->pr_hardlimit_warning = NULL;
429 	pp->pr_hardlimit_ratecap.tv_sec = 0;
430 	pp->pr_hardlimit_ratecap.tv_usec = 0;
431 	pp->pr_hardlimit_warning_last.tv_sec = 0;
432 	pp->pr_hardlimit_warning_last.tv_usec = 0;
433 	RBT_INIT(phtree, &pp->pr_phtree);
434 
435 	/*
436 	 * Use the space between the chunks and the page header
437 	 * for cache coloring.
438 	 */
439 	space = POOL_INPGHDR(pp) ? pp->pr_phoffset : pp->pr_pgsize;
440 	space -= pp->pr_itemsperpage * pp->pr_size;
441 	pp->pr_align = align;
442 	pp->pr_maxcolors = (space / align) + 1;
443 
444 	pp->pr_nget = 0;
445 	pp->pr_nfail = 0;
446 	pp->pr_nput = 0;
447 	pp->pr_npagealloc = 0;
448 	pp->pr_npagefree = 0;
449 	pp->pr_hiwat = 0;
450 	pp->pr_nidle = 0;
451 
452 	pp->pr_ipl = ipl;
453 	pp->pr_flags = flags;
454 
455 	pl_init(pp, &pp->pr_lock);
456 	pl_init(pp, &pp->pr_requests_lock);
457 	TAILQ_INIT(&pp->pr_requests);
458 
459 	if (phpool.pr_size == 0) {
460 		pool_init(&phpool, sizeof(struct pool_page_header), 0,
461 		    IPL_HIGH, 0, "phpool", NULL);
462 
463 		/* make sure phpool won't "recurse" */
464 		KASSERT(POOL_INPGHDR(&phpool));
465 	}
466 
467 	/* pglistalloc/constraint parameters */
468 	pp->pr_crange = &kp_dirty;
469 
470 	/* Insert this into the list of all pools. */
471 	rw_enter_write(&pool_lock);
472 #ifdef DIAGNOSTIC
473 	SIMPLEQ_FOREACH(iter, &pool_head, pr_poollist) {
474 		if (iter == pp)
475 			panic("%s: pool %s already on list", __func__, wchan);
476 	}
477 #endif
478 
479 	pp->pr_serial = ++pool_serial;
480 	if (pool_serial == 0)
481 		panic("%s: too much uptime", __func__);
482 
483 	SIMPLEQ_INSERT_HEAD(&pool_head, pp, pr_poollist);
484 	pool_count++;
485 	rw_exit_write(&pool_lock);
486 }
487 
488 /*
489  * Decommission a pool resource.
490  */
491 void
492 pool_destroy(struct pool *pp)
493 {
494 	struct pool_page_header *ph;
495 	struct pool *prev, *iter;
496 
497 #ifdef MULTIPROCESSOR
498 	if (pp->pr_cache != NULL)
499 		pool_cache_destroy(pp);
500 #endif
501 
502 #ifdef DIAGNOSTIC
503 	if (pp->pr_nout != 0)
504 		panic("%s: pool busy: still out: %u", __func__, pp->pr_nout);
505 #endif
506 
507 	/* Remove from global pool list */
508 	rw_enter_write(&pool_lock);
509 	pool_count--;
510 	if (pp == SIMPLEQ_FIRST(&pool_head))
511 		SIMPLEQ_REMOVE_HEAD(&pool_head, pr_poollist);
512 	else {
513 		prev = SIMPLEQ_FIRST(&pool_head);
514 		SIMPLEQ_FOREACH(iter, &pool_head, pr_poollist) {
515 			if (iter == pp) {
516 				SIMPLEQ_REMOVE_AFTER(&pool_head, prev,
517 				    pr_poollist);
518 				break;
519 			}
520 			prev = iter;
521 		}
522 	}
523 	rw_exit_write(&pool_lock);
524 
525 	/* Remove all pages */
526 	while ((ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL) {
527 		pl_enter(pp, &pp->pr_lock);
528 		pool_p_remove(pp, ph);
529 		pl_leave(pp, &pp->pr_lock);
530 		pool_p_free(pp, ph);
531 	}
532 	KASSERT(TAILQ_EMPTY(&pp->pr_fullpages));
533 	KASSERT(TAILQ_EMPTY(&pp->pr_partpages));
534 }
535 
536 void
537 pool_request_init(struct pool_request *pr,
538     void (*handler)(struct pool *, void *, void *), void *cookie)
539 {
540 	pr->pr_handler = handler;
541 	pr->pr_cookie = cookie;
542 	pr->pr_item = NULL;
543 }
544 
545 void
546 pool_request(struct pool *pp, struct pool_request *pr)
547 {
548 	pl_enter(pp, &pp->pr_requests_lock);
549 	TAILQ_INSERT_TAIL(&pp->pr_requests, pr, pr_entry);
550 	pool_runqueue(pp, PR_NOWAIT);
551 	pl_leave(pp, &pp->pr_requests_lock);
552 }
553 
554 struct pool_get_memory {
555 	union pool_lock lock;
556 	void * volatile v;
557 };
558 
559 /*
560  * Grab an item from the pool.
561  */
562 void *
563 pool_get(struct pool *pp, int flags)
564 {
565 	void *v = NULL;
566 	int slowdown = 0;
567 
568 	KASSERT(flags & (PR_WAITOK | PR_NOWAIT));
569 	if (pp->pr_flags & PR_RWLOCK)
570 		KASSERT(flags & PR_WAITOK);
571 
572 #ifdef MULTIPROCESSOR
573 	if (pp->pr_cache != NULL) {
574 		v = pool_cache_get(pp);
575 		if (v != NULL)
576 			goto good;
577 	}
578 #endif
579 
580 	pl_enter(pp, &pp->pr_lock);
581 	if (pp->pr_nout >= pp->pr_hardlimit) {
582 		if (ISSET(flags, PR_NOWAIT|PR_LIMITFAIL))
583 			goto fail;
584 	} else if ((v = pool_do_get(pp, flags, &slowdown)) == NULL) {
585 		if (ISSET(flags, PR_NOWAIT))
586 			goto fail;
587 	}
588 	pl_leave(pp, &pp->pr_lock);
589 
590 	if ((slowdown || pool_debug == 2) && ISSET(flags, PR_WAITOK))
591 		yield();
592 
593 	if (v == NULL) {
594 		struct pool_get_memory mem = { .v = NULL };
595 		struct pool_request pr;
596 
597 #ifdef DIAGNOSTIC
598 		if (ISSET(flags, PR_WAITOK) && curproc == &proc0)
599 			panic("%s: cannot sleep for memory during boot",
600 			    __func__);
601 #endif
602 		pl_init(pp, &mem.lock);
603 		pool_request_init(&pr, pool_get_done, &mem);
604 		pool_request(pp, &pr);
605 
606 		pl_enter(pp, &mem.lock);
607 		while (mem.v == NULL)
608 			pl_sleep(pp, &mem, &mem.lock, PSWP, pp->pr_wchan);
609 		pl_leave(pp, &mem.lock);
610 
611 		v = mem.v;
612 	}
613 
614 #ifdef MULTIPROCESSOR
615 good:
616 #endif
617 	if (ISSET(flags, PR_ZERO))
618 		memset(v, 0, pp->pr_size);
619 
620 	TRACEPOINT(uvm, pool_get, pp, v, flags);
621 
622 	return (v);
623 
624 fail:
625 	pp->pr_nfail++;
626 	pl_leave(pp, &pp->pr_lock);
627 	return (NULL);
628 }
629 
630 void
631 pool_get_done(struct pool *pp, void *xmem, void *v)
632 {
633 	struct pool_get_memory *mem = xmem;
634 
635 	pl_enter(pp, &mem->lock);
636 	mem->v = v;
637 	pl_leave(pp, &mem->lock);
638 
639 	wakeup_one(mem);
640 }
641 
642 void
643 pool_runqueue(struct pool *pp, int flags)
644 {
645 	struct pool_requests prl = TAILQ_HEAD_INITIALIZER(prl);
646 	struct pool_request *pr;
647 
648 	pl_assert_unlocked(pp, &pp->pr_lock);
649 	pl_assert_locked(pp, &pp->pr_requests_lock);
650 
651 	if (pp->pr_requesting++)
652 		return;
653 
654 	do {
655 		pp->pr_requesting = 1;
656 
657 		TAILQ_CONCAT(&prl, &pp->pr_requests, pr_entry);
658 		if (TAILQ_EMPTY(&prl))
659 			continue;
660 
661 		pl_leave(pp, &pp->pr_requests_lock);
662 
663 		pl_enter(pp, &pp->pr_lock);
664 		pr = TAILQ_FIRST(&prl);
665 		while (pr != NULL) {
666 			int slowdown = 0;
667 
668 			if (pp->pr_nout >= pp->pr_hardlimit)
669 				break;
670 
671 			pr->pr_item = pool_do_get(pp, flags, &slowdown);
672 			if (pr->pr_item == NULL) /* || slowdown ? */
673 				break;
674 
675 			pr = TAILQ_NEXT(pr, pr_entry);
676 		}
677 		pl_leave(pp, &pp->pr_lock);
678 
679 		while ((pr = TAILQ_FIRST(&prl)) != NULL &&
680 		    pr->pr_item != NULL) {
681 			TAILQ_REMOVE(&prl, pr, pr_entry);
682 			(*pr->pr_handler)(pp, pr->pr_cookie, pr->pr_item);
683 		}
684 
685 		pl_enter(pp, &pp->pr_requests_lock);
686 	} while (--pp->pr_requesting);
687 
688 	TAILQ_CONCAT(&pp->pr_requests, &prl, pr_entry);
689 }
690 
691 void *
692 pool_do_get(struct pool *pp, int flags, int *slowdown)
693 {
694 	struct pool_item *pi;
695 	struct pool_page_header *ph;
696 
697 	pl_assert_locked(pp, &pp->pr_lock);
698 
699 	splassert(pp->pr_ipl);
700 
701 	/*
702 	 * Account for this item now to avoid races if we need to give up
703 	 * pr_lock to allocate a page.
704 	 */
705 	pp->pr_nout++;
706 
707 	if (pp->pr_curpage == NULL) {
708 		pl_leave(pp, &pp->pr_lock);
709 		ph = pool_p_alloc(pp, flags, slowdown);
710 		pl_enter(pp, &pp->pr_lock);
711 
712 		if (ph == NULL) {
713 			pp->pr_nout--;
714 			return (NULL);
715 		}
716 
717 		pool_p_insert(pp, ph);
718 	}
719 
720 	ph = pp->pr_curpage;
721 	pi = XSIMPLEQ_FIRST(&ph->ph_items);
722 	if (__predict_false(pi == NULL))
723 		panic("%s: %s: page empty", __func__, pp->pr_wchan);
724 
725 	if (__predict_false(pi->pi_magic != POOL_IMAGIC(ph, pi))) {
726 		panic("%s: %s free list modified: "
727 		    "page %p; item addr %p; offset 0x%x=0x%lx != 0x%lx",
728 		    __func__, pp->pr_wchan, ph->ph_page, pi,
729 		    0, pi->pi_magic, POOL_IMAGIC(ph, pi));
730 	}
731 
732 	XSIMPLEQ_REMOVE_HEAD(&ph->ph_items, pi_list);
733 
734 #ifdef DIAGNOSTIC
735 	if (pool_debug && POOL_PHPOISON(ph)) {
736 		size_t pidx;
737 		uint32_t pval;
738 		if (poison_check(pi + 1, pp->pr_size - sizeof(*pi),
739 		    &pidx, &pval)) {
740 			int *ip = (int *)(pi + 1);
741 			panic("%s: %s free list modified: "
742 			    "page %p; item addr %p; offset 0x%zx=0x%x",
743 			    __func__, pp->pr_wchan, ph->ph_page, pi,
744 			    (pidx * sizeof(int)) + sizeof(*pi), ip[pidx]);
745 		}
746 	}
747 #endif /* DIAGNOSTIC */
748 
749 	if (ph->ph_nmissing++ == 0) {
750 		/*
751 		 * This page was previously empty.  Move it to the list of
752 		 * partially-full pages.  This page is already curpage.
753 		 */
754 		TAILQ_REMOVE(&pp->pr_emptypages, ph, ph_entry);
755 		TAILQ_INSERT_TAIL(&pp->pr_partpages, ph, ph_entry);
756 
757 		pp->pr_nidle--;
758 	}
759 
760 	if (ph->ph_nmissing == pp->pr_itemsperpage) {
761 		/*
762 		 * This page is now full.  Move it to the full list
763 		 * and select a new current page.
764 		 */
765 		TAILQ_REMOVE(&pp->pr_partpages, ph, ph_entry);
766 		TAILQ_INSERT_TAIL(&pp->pr_fullpages, ph, ph_entry);
767 		pool_update_curpage(pp);
768 	}
769 
770 	pp->pr_nget++;
771 
772 	return (pi);
773 }
774 
775 /*
776  * Return resource to the pool.
777  */
778 void
779 pool_put(struct pool *pp, void *v)
780 {
781 	struct pool_page_header *ph, *freeph = NULL;
782 
783 #ifdef DIAGNOSTIC
784 	if (v == NULL)
785 		panic("%s: NULL item", __func__);
786 #endif
787 
788 	TRACEPOINT(uvm, pool_put, pp, v);
789 
790 #ifdef MULTIPROCESSOR
791 	if (pp->pr_cache != NULL && TAILQ_EMPTY(&pp->pr_requests)) {
792 		pool_cache_put(pp, v);
793 		return;
794 	}
795 #endif
796 
797 	pl_enter(pp, &pp->pr_lock);
798 
799 	pool_do_put(pp, v);
800 
801 	pp->pr_nout--;
802 	pp->pr_nput++;
803 
804 	/* is it time to free a page? */
805 	if (pp->pr_nidle > pp->pr_maxpages &&
806 	    (ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL &&
807 	    getnsecuptime() - ph->ph_timestamp > POOL_WAIT_FREE) {
808 		freeph = ph;
809 		pool_p_remove(pp, freeph);
810 	}
811 
812 	pl_leave(pp, &pp->pr_lock);
813 
814 	if (freeph != NULL)
815 		pool_p_free(pp, freeph);
816 
817 	pool_wakeup(pp);
818 }
819 
820 void
821 pool_wakeup(struct pool *pp)
822 {
823 	if (!TAILQ_EMPTY(&pp->pr_requests)) {
824 		pl_enter(pp, &pp->pr_requests_lock);
825 		pool_runqueue(pp, PR_NOWAIT);
826 		pl_leave(pp, &pp->pr_requests_lock);
827 	}
828 }
829 
830 void
831 pool_do_put(struct pool *pp, void *v)
832 {
833 	struct pool_item *pi = v;
834 	struct pool_page_header *ph;
835 
836 	splassert(pp->pr_ipl);
837 
838 	ph = pr_find_pagehead(pp, v);
839 
840 #ifdef DIAGNOSTIC
841 	if (pool_debug) {
842 		struct pool_item *qi;
843 		XSIMPLEQ_FOREACH(qi, &ph->ph_items, pi_list) {
844 			if (pi == qi) {
845 				panic("%s: %s: double pool_put: %p", __func__,
846 				    pp->pr_wchan, pi);
847 			}
848 		}
849 	}
850 #endif /* DIAGNOSTIC */
851 
852 	pi->pi_magic = POOL_IMAGIC(ph, pi);
853 	XSIMPLEQ_INSERT_HEAD(&ph->ph_items, pi, pi_list);
854 #ifdef DIAGNOSTIC
855 	if (POOL_PHPOISON(ph))
856 		poison_mem(pi + 1, pp->pr_size - sizeof(*pi));
857 #endif /* DIAGNOSTIC */
858 
859 	if (ph->ph_nmissing-- == pp->pr_itemsperpage) {
860 		/*
861 		 * The page was previously completely full, move it to the
862 		 * partially-full list.
863 		 */
864 		TAILQ_REMOVE(&pp->pr_fullpages, ph, ph_entry);
865 		TAILQ_INSERT_TAIL(&pp->pr_partpages, ph, ph_entry);
866 	}
867 
868 	if (ph->ph_nmissing == 0) {
869 		/*
870 		 * The page is now empty, so move it to the empty page list.
871 		 */
872 		pp->pr_nidle++;
873 
874 		ph->ph_timestamp = getnsecuptime();
875 		TAILQ_REMOVE(&pp->pr_partpages, ph, ph_entry);
876 		TAILQ_INSERT_TAIL(&pp->pr_emptypages, ph, ph_entry);
877 		pool_update_curpage(pp);
878 	}
879 }
880 
881 /*
882  * Add N items to the pool.
883  */
884 int
885 pool_prime(struct pool *pp, int n)
886 {
887 	struct pool_pagelist pl = TAILQ_HEAD_INITIALIZER(pl);
888 	struct pool_page_header *ph;
889 	int newpages;
890 
891 	newpages = roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage;
892 
893 	while (newpages-- > 0) {
894 		int slowdown = 0;
895 
896 		ph = pool_p_alloc(pp, PR_NOWAIT, &slowdown);
897 		if (ph == NULL) /* or slowdown? */
898 			break;
899 
900 		TAILQ_INSERT_TAIL(&pl, ph, ph_entry);
901 	}
902 
903 	pl_enter(pp, &pp->pr_lock);
904 	while ((ph = TAILQ_FIRST(&pl)) != NULL) {
905 		TAILQ_REMOVE(&pl, ph, ph_entry);
906 		pool_p_insert(pp, ph);
907 	}
908 	pl_leave(pp, &pp->pr_lock);
909 
910 	return (0);
911 }
912 
913 struct pool_page_header *
914 pool_p_alloc(struct pool *pp, int flags, int *slowdown)
915 {
916 	struct pool_page_header *ph;
917 	struct pool_item *pi;
918 	caddr_t addr;
919 	unsigned int order;
920 	int o;
921 	int n;
922 
923 	pl_assert_unlocked(pp, &pp->pr_lock);
924 	KASSERT(pp->pr_size >= sizeof(*pi));
925 
926 	addr = pool_allocator_alloc(pp, flags, slowdown);
927 	if (addr == NULL)
928 		return (NULL);
929 
930 	if (POOL_INPGHDR(pp))
931 		ph = (struct pool_page_header *)(addr + pp->pr_phoffset);
932 	else {
933 		ph = pool_get(&phpool, flags);
934 		if (ph == NULL) {
935 			pool_allocator_free(pp, addr);
936 			return (NULL);
937 		}
938 	}
939 
940 	XSIMPLEQ_INIT(&ph->ph_items);
941 	ph->ph_page = addr;
942 	addr += pp->pr_align * (pp->pr_npagealloc % pp->pr_maxcolors);
943 	ph->ph_colored = addr;
944 	ph->ph_nmissing = 0;
945 	arc4random_buf(&ph->ph_magic, sizeof(ph->ph_magic));
946 #ifdef DIAGNOSTIC
947 	/* use a bit in ph_magic to record if we poison page items */
948 	if (pool_debug)
949 		SET(ph->ph_magic, POOL_MAGICBIT);
950 	else
951 		CLR(ph->ph_magic, POOL_MAGICBIT);
952 #endif /* DIAGNOSTIC */
953 
954 	n = pp->pr_itemsperpage;
955 	o = 32;
956 	while (n--) {
957 		pi = (struct pool_item *)addr;
958 		pi->pi_magic = POOL_IMAGIC(ph, pi);
959 
960 		if (o == 32) {
961 			order = arc4random();
962 			o = 0;
963 		}
964 		if (ISSET(order, 1 << o++))
965 			XSIMPLEQ_INSERT_TAIL(&ph->ph_items, pi, pi_list);
966 		else
967 			XSIMPLEQ_INSERT_HEAD(&ph->ph_items, pi, pi_list);
968 
969 #ifdef DIAGNOSTIC
970 		if (POOL_PHPOISON(ph))
971 			poison_mem(pi + 1, pp->pr_size - sizeof(*pi));
972 #endif /* DIAGNOSTIC */
973 
974 		addr += pp->pr_size;
975 	}
976 
977 	return (ph);
978 }
979 
980 void
981 pool_p_free(struct pool *pp, struct pool_page_header *ph)
982 {
983 	struct pool_item *pi;
984 
985 	pl_assert_unlocked(pp, &pp->pr_lock);
986 	KASSERT(ph->ph_nmissing == 0);
987 
988 	XSIMPLEQ_FOREACH(pi, &ph->ph_items, pi_list) {
989 		if (__predict_false(pi->pi_magic != POOL_IMAGIC(ph, pi))) {
990 			panic("%s: %s free list modified: "
991 			    "page %p; item addr %p; offset 0x%x=0x%lx",
992 			    __func__, pp->pr_wchan, ph->ph_page, pi,
993 			    0, pi->pi_magic);
994 		}
995 
996 #ifdef DIAGNOSTIC
997 		if (POOL_PHPOISON(ph)) {
998 			size_t pidx;
999 			uint32_t pval;
1000 			if (poison_check(pi + 1, pp->pr_size - sizeof(*pi),
1001 			    &pidx, &pval)) {
1002 				int *ip = (int *)(pi + 1);
1003 				panic("%s: %s free list modified: "
1004 				    "page %p; item addr %p; offset 0x%zx=0x%x",
1005 				    __func__, pp->pr_wchan, ph->ph_page, pi,
1006 				    pidx * sizeof(int), ip[pidx]);
1007 			}
1008 		}
1009 #endif
1010 	}
1011 
1012 	pool_allocator_free(pp, ph->ph_page);
1013 
1014 	if (!POOL_INPGHDR(pp))
1015 		pool_put(&phpool, ph);
1016 }
1017 
1018 void
1019 pool_p_insert(struct pool *pp, struct pool_page_header *ph)
1020 {
1021 	pl_assert_locked(pp, &pp->pr_lock);
1022 
1023 	/* If the pool was depleted, point at the new page */
1024 	if (pp->pr_curpage == NULL)
1025 		pp->pr_curpage = ph;
1026 
1027 	TAILQ_INSERT_TAIL(&pp->pr_emptypages, ph, ph_entry);
1028 	if (!POOL_INPGHDR(pp))
1029 		RBT_INSERT(phtree, &pp->pr_phtree, ph);
1030 
1031 	pp->pr_nitems += pp->pr_itemsperpage;
1032 	pp->pr_nidle++;
1033 
1034 	pp->pr_npagealloc++;
1035 	if (++pp->pr_npages > pp->pr_hiwat)
1036 		pp->pr_hiwat = pp->pr_npages;
1037 }
1038 
1039 void
1040 pool_p_remove(struct pool *pp, struct pool_page_header *ph)
1041 {
1042 	pl_assert_locked(pp, &pp->pr_lock);
1043 
1044 	pp->pr_npagefree++;
1045 	pp->pr_npages--;
1046 	pp->pr_nidle--;
1047 	pp->pr_nitems -= pp->pr_itemsperpage;
1048 
1049 	if (!POOL_INPGHDR(pp))
1050 		RBT_REMOVE(phtree, &pp->pr_phtree, ph);
1051 	TAILQ_REMOVE(&pp->pr_emptypages, ph, ph_entry);
1052 
1053 	pool_update_curpage(pp);
1054 }
1055 
1056 void
1057 pool_update_curpage(struct pool *pp)
1058 {
1059 	pp->pr_curpage = TAILQ_LAST(&pp->pr_partpages, pool_pagelist);
1060 	if (pp->pr_curpage == NULL) {
1061 		pp->pr_curpage = TAILQ_LAST(&pp->pr_emptypages, pool_pagelist);
1062 	}
1063 }
1064 
1065 void
1066 pool_setlowat(struct pool *pp, int n)
1067 {
1068 	int prime = 0;
1069 
1070 	pl_enter(pp, &pp->pr_lock);
1071 	pp->pr_minitems = n;
1072 	pp->pr_minpages = (n == 0)
1073 		? 0
1074 		: roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage;
1075 
1076 	if (pp->pr_nitems < n)
1077 		prime = n - pp->pr_nitems;
1078 	pl_leave(pp, &pp->pr_lock);
1079 
1080 	if (prime > 0)
1081 		pool_prime(pp, prime);
1082 }
1083 
1084 void
1085 pool_sethiwat(struct pool *pp, int n)
1086 {
1087 	pp->pr_maxpages = (n == 0)
1088 		? 0
1089 		: roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage;
1090 }
1091 
1092 int
1093 pool_sethardlimit(struct pool *pp, u_int n, const char *warnmsg, int ratecap)
1094 {
1095 	int error = 0;
1096 
1097 	if (n < pp->pr_nout) {
1098 		error = EINVAL;
1099 		goto done;
1100 	}
1101 
1102 	pp->pr_hardlimit = n;
1103 	pp->pr_hardlimit_warning = warnmsg;
1104 	pp->pr_hardlimit_ratecap.tv_sec = ratecap;
1105 	pp->pr_hardlimit_warning_last.tv_sec = 0;
1106 	pp->pr_hardlimit_warning_last.tv_usec = 0;
1107 
1108 done:
1109 	return (error);
1110 }
1111 
1112 void
1113 pool_set_constraints(struct pool *pp, const struct kmem_pa_mode *mode)
1114 {
1115 	pp->pr_crange = mode;
1116 }
1117 
1118 /*
1119  * Release all complete pages that have not been used recently.
1120  *
1121  * Returns non-zero if any pages have been reclaimed.
1122  */
1123 int
1124 pool_reclaim(struct pool *pp)
1125 {
1126 	struct pool_page_header *ph, *phnext;
1127 	struct pool_pagelist pl = TAILQ_HEAD_INITIALIZER(pl);
1128 
1129 	pl_enter(pp, &pp->pr_lock);
1130 	for (ph = TAILQ_FIRST(&pp->pr_emptypages); ph != NULL; ph = phnext) {
1131 		phnext = TAILQ_NEXT(ph, ph_entry);
1132 
1133 		/* Check our minimum page claim */
1134 		if (pp->pr_npages <= pp->pr_minpages)
1135 			break;
1136 
1137 		/*
1138 		 * If freeing this page would put us below
1139 		 * the low water mark, stop now.
1140 		 */
1141 		if ((pp->pr_nitems - pp->pr_itemsperpage) <
1142 		    pp->pr_minitems)
1143 			break;
1144 
1145 		pool_p_remove(pp, ph);
1146 		TAILQ_INSERT_TAIL(&pl, ph, ph_entry);
1147 	}
1148 	pl_leave(pp, &pp->pr_lock);
1149 
1150 	if (TAILQ_EMPTY(&pl))
1151 		return (0);
1152 
1153 	while ((ph = TAILQ_FIRST(&pl)) != NULL) {
1154 		TAILQ_REMOVE(&pl, ph, ph_entry);
1155 		pool_p_free(pp, ph);
1156 	}
1157 
1158 	return (1);
1159 }
1160 
1161 /*
1162  * Release all complete pages that have not been used recently
1163  * from all pools.
1164  */
1165 void
1166 pool_reclaim_all(void)
1167 {
1168 	struct pool	*pp;
1169 
1170 	rw_enter_read(&pool_lock);
1171 	SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist)
1172 		pool_reclaim(pp);
1173 	rw_exit_read(&pool_lock);
1174 }
1175 
1176 #ifdef DDB
1177 #include <machine/db_machdep.h>
1178 #include <ddb/db_output.h>
1179 
1180 /*
1181  * Diagnostic helpers.
1182  */
1183 void
1184 pool_printit(struct pool *pp, const char *modif,
1185     int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2))))
1186 {
1187 	pool_print1(pp, modif, pr);
1188 }
1189 
1190 void
1191 pool_print_pagelist(struct pool_pagelist *pl,
1192     int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2))))
1193 {
1194 	struct pool_page_header *ph;
1195 	struct pool_item *pi;
1196 
1197 	TAILQ_FOREACH(ph, pl, ph_entry) {
1198 		(*pr)("\t\tpage %p, color %p, nmissing %d\n",
1199 		    ph->ph_page, ph->ph_colored, ph->ph_nmissing);
1200 		XSIMPLEQ_FOREACH(pi, &ph->ph_items, pi_list) {
1201 			if (pi->pi_magic != POOL_IMAGIC(ph, pi)) {
1202 				(*pr)("\t\t\titem %p, magic 0x%lx\n",
1203 				    pi, pi->pi_magic);
1204 			}
1205 		}
1206 	}
1207 }
1208 
1209 void
1210 pool_print1(struct pool *pp, const char *modif,
1211     int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2))))
1212 {
1213 	struct pool_page_header *ph;
1214 	int print_pagelist = 0;
1215 	char c;
1216 
1217 	while ((c = *modif++) != '\0') {
1218 		if (c == 'p')
1219 			print_pagelist = 1;
1220 		modif++;
1221 	}
1222 
1223 	(*pr)("POOL %s: size %u maxcolors %u\n", pp->pr_wchan, pp->pr_size,
1224 	    pp->pr_maxcolors);
1225 	(*pr)("\talloc %p\n", pp->pr_alloc);
1226 	(*pr)("\tminitems %u, minpages %u, maxpages %u, npages %u\n",
1227 	    pp->pr_minitems, pp->pr_minpages, pp->pr_maxpages, pp->pr_npages);
1228 	(*pr)("\titemsperpage %u, nitems %u, nout %u, hardlimit %u\n",
1229 	    pp->pr_itemsperpage, pp->pr_nitems, pp->pr_nout, pp->pr_hardlimit);
1230 
1231 	(*pr)("\n\tnget %lu, nfail %lu, nput %lu\n",
1232 	    pp->pr_nget, pp->pr_nfail, pp->pr_nput);
1233 	(*pr)("\tnpagealloc %lu, npagefree %lu, hiwat %u, nidle %lu\n",
1234 	    pp->pr_npagealloc, pp->pr_npagefree, pp->pr_hiwat, pp->pr_nidle);
1235 
1236 	if (print_pagelist == 0)
1237 		return;
1238 
1239 	if ((ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL)
1240 		(*pr)("\n\tempty page list:\n");
1241 	pool_print_pagelist(&pp->pr_emptypages, pr);
1242 	if ((ph = TAILQ_FIRST(&pp->pr_fullpages)) != NULL)
1243 		(*pr)("\n\tfull page list:\n");
1244 	pool_print_pagelist(&pp->pr_fullpages, pr);
1245 	if ((ph = TAILQ_FIRST(&pp->pr_partpages)) != NULL)
1246 		(*pr)("\n\tpartial-page list:\n");
1247 	pool_print_pagelist(&pp->pr_partpages, pr);
1248 
1249 	if (pp->pr_curpage == NULL)
1250 		(*pr)("\tno current page\n");
1251 	else
1252 		(*pr)("\tcurpage %p\n", pp->pr_curpage->ph_page);
1253 }
1254 
1255 void
1256 db_show_all_pools(db_expr_t expr, int haddr, db_expr_t count, char *modif)
1257 {
1258 	struct pool *pp;
1259 	char maxp[16];
1260 	int ovflw;
1261 	char mode;
1262 
1263 	mode = modif[0];
1264 	if (mode != '\0' && mode != 'a') {
1265 		db_printf("usage: show all pools [/a]\n");
1266 		return;
1267 	}
1268 
1269 	if (mode == '\0')
1270 		db_printf("%-10s%4s%9s%5s%9s%6s%6s%6s%6s%6s%6s%5s\n",
1271 		    "Name",
1272 		    "Size",
1273 		    "Requests",
1274 		    "Fail",
1275 		    "Releases",
1276 		    "Pgreq",
1277 		    "Pgrel",
1278 		    "Npage",
1279 		    "Hiwat",
1280 		    "Minpg",
1281 		    "Maxpg",
1282 		    "Idle");
1283 	else
1284 		db_printf("%-12s %18s %18s\n",
1285 		    "Name", "Address", "Allocator");
1286 
1287 	SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist) {
1288 		if (mode == 'a') {
1289 			db_printf("%-12s %18p %18p\n", pp->pr_wchan, pp,
1290 			    pp->pr_alloc);
1291 			continue;
1292 		}
1293 
1294 		if (!pp->pr_nget)
1295 			continue;
1296 
1297 		if (pp->pr_maxpages == UINT_MAX)
1298 			snprintf(maxp, sizeof maxp, "inf");
1299 		else
1300 			snprintf(maxp, sizeof maxp, "%u", pp->pr_maxpages);
1301 
1302 #define PRWORD(ovflw, fmt, width, fixed, val) do {	\
1303 	(ovflw) += db_printf((fmt),			\
1304 	    (width) - (fixed) - (ovflw) > 0 ?		\
1305 	    (width) - (fixed) - (ovflw) : 0,		\
1306 	    (val)) - (width);				\
1307 	if ((ovflw) < 0)				\
1308 		(ovflw) = 0;				\
1309 } while (/* CONSTCOND */0)
1310 
1311 		ovflw = 0;
1312 		PRWORD(ovflw, "%-*s", 10, 0, pp->pr_wchan);
1313 		PRWORD(ovflw, " %*u", 4, 1, pp->pr_size);
1314 		PRWORD(ovflw, " %*lu", 9, 1, pp->pr_nget);
1315 		PRWORD(ovflw, " %*lu", 5, 1, pp->pr_nfail);
1316 		PRWORD(ovflw, " %*lu", 9, 1, pp->pr_nput);
1317 		PRWORD(ovflw, " %*lu", 6, 1, pp->pr_npagealloc);
1318 		PRWORD(ovflw, " %*lu", 6, 1, pp->pr_npagefree);
1319 		PRWORD(ovflw, " %*d", 6, 1, pp->pr_npages);
1320 		PRWORD(ovflw, " %*d", 6, 1, pp->pr_hiwat);
1321 		PRWORD(ovflw, " %*d", 6, 1, pp->pr_minpages);
1322 		PRWORD(ovflw, " %*s", 6, 1, maxp);
1323 		PRWORD(ovflw, " %*lu\n", 5, 1, pp->pr_nidle);
1324 
1325 		pool_chk(pp);
1326 	}
1327 }
1328 #endif /* DDB */
1329 
1330 #if defined(POOL_DEBUG) || defined(DDB)
1331 int
1332 pool_chk_page(struct pool *pp, struct pool_page_header *ph, int expected)
1333 {
1334 	struct pool_item *pi;
1335 	caddr_t page;
1336 	int n;
1337 	const char *label = pp->pr_wchan;
1338 
1339 	page = (caddr_t)((u_long)ph & pp->pr_pgmask);
1340 	if (page != ph->ph_page && POOL_INPGHDR(pp)) {
1341 		printf("%s: ", label);
1342 		printf("pool(%p:%s): page inconsistency: page %p; "
1343 		    "at page head addr %p (p %p)\n",
1344 		    pp, pp->pr_wchan, ph->ph_page, ph, page);
1345 		return 1;
1346 	}
1347 
1348 	for (pi = XSIMPLEQ_FIRST(&ph->ph_items), n = 0;
1349 	     pi != NULL;
1350 	     pi = XSIMPLEQ_NEXT(&ph->ph_items, pi, pi_list), n++) {
1351 		if ((caddr_t)pi < ph->ph_page ||
1352 		    (caddr_t)pi >= ph->ph_page + pp->pr_pgsize) {
1353 			printf("%s: ", label);
1354 			printf("pool(%p:%s): page inconsistency: page %p;"
1355 			    " item ordinal %d; addr %p\n", pp,
1356 			    pp->pr_wchan, ph->ph_page, n, pi);
1357 			return (1);
1358 		}
1359 
1360 		if (pi->pi_magic != POOL_IMAGIC(ph, pi)) {
1361 			printf("%s: ", label);
1362 			printf("pool(%p:%s): free list modified: "
1363 			    "page %p; item ordinal %d; addr %p "
1364 			    "(p %p); offset 0x%x=0x%lx\n",
1365 			    pp, pp->pr_wchan, ph->ph_page, n, pi, page,
1366 			    0, pi->pi_magic);
1367 		}
1368 
1369 #ifdef DIAGNOSTIC
1370 		if (POOL_PHPOISON(ph)) {
1371 			size_t pidx;
1372 			uint32_t pval;
1373 			if (poison_check(pi + 1, pp->pr_size - sizeof(*pi),
1374 			    &pidx, &pval)) {
1375 				int *ip = (int *)(pi + 1);
1376 				printf("pool(%s): free list modified: "
1377 				    "page %p; item ordinal %d; addr %p "
1378 				    "(p %p); offset 0x%zx=0x%x\n",
1379 				    pp->pr_wchan, ph->ph_page, n, pi,
1380 				    page, pidx * sizeof(int), ip[pidx]);
1381 			}
1382 		}
1383 #endif /* DIAGNOSTIC */
1384 	}
1385 	if (n + ph->ph_nmissing != pp->pr_itemsperpage) {
1386 		printf("pool(%p:%s): page inconsistency: page %p;"
1387 		    " %d on list, %d missing, %d items per page\n", pp,
1388 		    pp->pr_wchan, ph->ph_page, n, ph->ph_nmissing,
1389 		    pp->pr_itemsperpage);
1390 		return 1;
1391 	}
1392 	if (expected >= 0 && n != expected) {
1393 		printf("pool(%p:%s): page inconsistency: page %p;"
1394 		    " %d on list, %d missing, %d expected\n", pp,
1395 		    pp->pr_wchan, ph->ph_page, n, ph->ph_nmissing,
1396 		    expected);
1397 		return 1;
1398 	}
1399 	return 0;
1400 }
1401 
1402 int
1403 pool_chk(struct pool *pp)
1404 {
1405 	struct pool_page_header *ph;
1406 	int r = 0;
1407 
1408 	TAILQ_FOREACH(ph, &pp->pr_emptypages, ph_entry)
1409 		r += pool_chk_page(pp, ph, pp->pr_itemsperpage);
1410 	TAILQ_FOREACH(ph, &pp->pr_fullpages, ph_entry)
1411 		r += pool_chk_page(pp, ph, 0);
1412 	TAILQ_FOREACH(ph, &pp->pr_partpages, ph_entry)
1413 		r += pool_chk_page(pp, ph, -1);
1414 
1415 	return (r);
1416 }
1417 #endif /* defined(POOL_DEBUG) || defined(DDB) */
1418 
1419 #ifdef DDB
1420 void
1421 pool_walk(struct pool *pp, int full,
1422     int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2))),
1423     void (*func)(void *, int, int (*)(const char *, ...)
1424 	    __attribute__((__format__(__kprintf__,1,2)))))
1425 {
1426 	struct pool_page_header *ph;
1427 	struct pool_item *pi;
1428 	caddr_t cp;
1429 	int n;
1430 
1431 	TAILQ_FOREACH(ph, &pp->pr_fullpages, ph_entry) {
1432 		cp = ph->ph_colored;
1433 		n = ph->ph_nmissing;
1434 
1435 		while (n--) {
1436 			func(cp, full, pr);
1437 			cp += pp->pr_size;
1438 		}
1439 	}
1440 
1441 	TAILQ_FOREACH(ph, &pp->pr_partpages, ph_entry) {
1442 		cp = ph->ph_colored;
1443 		n = ph->ph_nmissing;
1444 
1445 		do {
1446 			XSIMPLEQ_FOREACH(pi, &ph->ph_items, pi_list) {
1447 				if (cp == (caddr_t)pi)
1448 					break;
1449 			}
1450 			if (cp != (caddr_t)pi) {
1451 				func(cp, full, pr);
1452 				n--;
1453 			}
1454 
1455 			cp += pp->pr_size;
1456 		} while (n > 0);
1457 	}
1458 }
1459 #endif
1460 
1461 /*
1462  * We have three different sysctls.
1463  * kern.pool.npools - the number of pools.
1464  * kern.pool.pool.<pool#> - the pool struct for the pool#.
1465  * kern.pool.name.<pool#> - the name for pool#.
1466  */
1467 int
1468 sysctl_dopool(int *name, u_int namelen, char *oldp, size_t *oldlenp)
1469 {
1470 	struct kinfo_pool pi;
1471 	struct pool *pp;
1472 	int rv = ENOENT;
1473 
1474 	switch (name[0]) {
1475 	case KERN_POOL_NPOOLS:
1476 		if (namelen != 1)
1477 			return (ENOTDIR);
1478 		return (sysctl_rdint(oldp, oldlenp, NULL, pool_count));
1479 
1480 	case KERN_POOL_NAME:
1481 	case KERN_POOL_POOL:
1482 	case KERN_POOL_CACHE:
1483 	case KERN_POOL_CACHE_CPUS:
1484 		break;
1485 	default:
1486 		return (EOPNOTSUPP);
1487 	}
1488 
1489 	if (namelen != 2)
1490 		return (ENOTDIR);
1491 
1492 	rw_enter_read(&pool_lock);
1493 
1494 	SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist) {
1495 		if (name[1] == pp->pr_serial)
1496 			break;
1497 	}
1498 
1499 	if (pp == NULL)
1500 		goto done;
1501 
1502 	switch (name[0]) {
1503 	case KERN_POOL_NAME:
1504 		rv = sysctl_rdstring(oldp, oldlenp, NULL, pp->pr_wchan);
1505 		break;
1506 	case KERN_POOL_POOL:
1507 		memset(&pi, 0, sizeof(pi));
1508 
1509 		pl_enter(pp, &pp->pr_lock);
1510 		pi.pr_size = pp->pr_size;
1511 		pi.pr_pgsize = pp->pr_pgsize;
1512 		pi.pr_itemsperpage = pp->pr_itemsperpage;
1513 		pi.pr_npages = pp->pr_npages;
1514 		pi.pr_minpages = pp->pr_minpages;
1515 		pi.pr_maxpages = pp->pr_maxpages;
1516 		pi.pr_hardlimit = pp->pr_hardlimit;
1517 		pi.pr_nout = pp->pr_nout;
1518 		pi.pr_nitems = pp->pr_nitems;
1519 		pi.pr_nget = pp->pr_nget;
1520 		pi.pr_nput = pp->pr_nput;
1521 		pi.pr_nfail = pp->pr_nfail;
1522 		pi.pr_npagealloc = pp->pr_npagealloc;
1523 		pi.pr_npagefree = pp->pr_npagefree;
1524 		pi.pr_hiwat = pp->pr_hiwat;
1525 		pi.pr_nidle = pp->pr_nidle;
1526 		pl_leave(pp, &pp->pr_lock);
1527 
1528 		pool_cache_pool_info(pp, &pi);
1529 
1530 		rv = sysctl_rdstruct(oldp, oldlenp, NULL, &pi, sizeof(pi));
1531 		break;
1532 
1533 	case KERN_POOL_CACHE:
1534 		rv = pool_cache_info(pp, oldp, oldlenp);
1535 		break;
1536 
1537 	case KERN_POOL_CACHE_CPUS:
1538 		rv = pool_cache_cpus_info(pp, oldp, oldlenp);
1539 		break;
1540 	}
1541 
1542 done:
1543 	rw_exit_read(&pool_lock);
1544 
1545 	return (rv);
1546 }
1547 
1548 void
1549 pool_gc_sched(void *null)
1550 {
1551 	task_add(systqmp, &pool_gc_task);
1552 }
1553 
1554 void
1555 pool_gc_pages(void *null)
1556 {
1557 	struct pool *pp;
1558 	struct pool_page_header *ph, *freeph;
1559 	int s;
1560 
1561 	rw_enter_read(&pool_lock);
1562 	s = splvm(); /* XXX go to splvm until all pools _setipl properly */
1563 	SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist) {
1564 #ifdef MULTIPROCESSOR
1565 		if (pp->pr_cache != NULL)
1566 			pool_cache_gc(pp);
1567 #endif
1568 
1569 		if (pp->pr_nidle <= pp->pr_minpages || /* guess */
1570 		    !pl_enter_try(pp, &pp->pr_lock)) /* try */
1571 			continue;
1572 
1573 		/* is it time to free a page? */
1574 		if (pp->pr_nidle > pp->pr_minpages &&
1575 		    (ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL &&
1576 		    getnsecuptime() - ph->ph_timestamp > POOL_WAIT_GC) {
1577 			freeph = ph;
1578 			pool_p_remove(pp, freeph);
1579 		} else
1580 			freeph = NULL;
1581 
1582 		pl_leave(pp, &pp->pr_lock);
1583 
1584 		if (freeph != NULL)
1585 			pool_p_free(pp, freeph);
1586 	}
1587 	splx(s);
1588 	rw_exit_read(&pool_lock);
1589 
1590 	timeout_add_sec(&pool_gc_tick, 1);
1591 }
1592 
1593 /*
1594  * Pool backend allocators.
1595  */
1596 
1597 void *
1598 pool_allocator_alloc(struct pool *pp, int flags, int *slowdown)
1599 {
1600 	void *v;
1601 
1602 	v = (*pp->pr_alloc->pa_alloc)(pp, flags, slowdown);
1603 
1604 #ifdef DIAGNOSTIC
1605 	if (v != NULL && POOL_INPGHDR(pp)) {
1606 		vaddr_t addr = (vaddr_t)v;
1607 		if ((addr & pp->pr_pgmask) != addr) {
1608 			panic("%s: %s page address %p isn't aligned to %u",
1609 			    __func__, pp->pr_wchan, v, pp->pr_pgsize);
1610 		}
1611 	}
1612 #endif
1613 
1614 	return (v);
1615 }
1616 
1617 void
1618 pool_allocator_free(struct pool *pp, void *v)
1619 {
1620 	struct pool_allocator *pa = pp->pr_alloc;
1621 
1622 	(*pa->pa_free)(pp, v);
1623 }
1624 
1625 void *
1626 pool_page_alloc(struct pool *pp, int flags, int *slowdown)
1627 {
1628 	struct kmem_dyn_mode kd = KMEM_DYN_INITIALIZER;
1629 
1630 	kd.kd_waitok = ISSET(flags, PR_WAITOK);
1631 	kd.kd_slowdown = slowdown;
1632 
1633 	return (km_alloc(pp->pr_pgsize, &kv_page, pp->pr_crange, &kd));
1634 }
1635 
1636 void
1637 pool_page_free(struct pool *pp, void *v)
1638 {
1639 	km_free(v, pp->pr_pgsize, &kv_page, pp->pr_crange);
1640 }
1641 
1642 void *
1643 pool_multi_alloc(struct pool *pp, int flags, int *slowdown)
1644 {
1645 	struct kmem_va_mode kv = kv_intrsafe;
1646 	struct kmem_dyn_mode kd = KMEM_DYN_INITIALIZER;
1647 	void *v;
1648 	int s;
1649 
1650 	if (POOL_INPGHDR(pp))
1651 		kv.kv_align = pp->pr_pgsize;
1652 
1653 	kd.kd_waitok = ISSET(flags, PR_WAITOK);
1654 	kd.kd_slowdown = slowdown;
1655 
1656 	s = splvm();
1657 	v = km_alloc(pp->pr_pgsize, &kv, pp->pr_crange, &kd);
1658 	splx(s);
1659 
1660 	return (v);
1661 }
1662 
1663 void
1664 pool_multi_free(struct pool *pp, void *v)
1665 {
1666 	struct kmem_va_mode kv = kv_intrsafe;
1667 	int s;
1668 
1669 	if (POOL_INPGHDR(pp))
1670 		kv.kv_align = pp->pr_pgsize;
1671 
1672 	s = splvm();
1673 	km_free(v, pp->pr_pgsize, &kv, pp->pr_crange);
1674 	splx(s);
1675 }
1676 
1677 void *
1678 pool_multi_alloc_ni(struct pool *pp, int flags, int *slowdown)
1679 {
1680 	struct kmem_va_mode kv = kv_any;
1681 	struct kmem_dyn_mode kd = KMEM_DYN_INITIALIZER;
1682 	void *v;
1683 
1684 	if (POOL_INPGHDR(pp))
1685 		kv.kv_align = pp->pr_pgsize;
1686 
1687 	kd.kd_waitok = ISSET(flags, PR_WAITOK);
1688 	kd.kd_slowdown = slowdown;
1689 
1690 	KERNEL_LOCK();
1691 	v = km_alloc(pp->pr_pgsize, &kv, pp->pr_crange, &kd);
1692 	KERNEL_UNLOCK();
1693 
1694 	return (v);
1695 }
1696 
1697 void
1698 pool_multi_free_ni(struct pool *pp, void *v)
1699 {
1700 	struct kmem_va_mode kv = kv_any;
1701 
1702 	if (POOL_INPGHDR(pp))
1703 		kv.kv_align = pp->pr_pgsize;
1704 
1705 	KERNEL_LOCK();
1706 	km_free(v, pp->pr_pgsize, &kv, pp->pr_crange);
1707 	KERNEL_UNLOCK();
1708 }
1709 
1710 #ifdef MULTIPROCESSOR
1711 
1712 struct pool pool_caches; /* per cpu cache entries */
1713 
1714 void
1715 pool_cache_init(struct pool *pp)
1716 {
1717 	struct cpumem *cm;
1718 	struct pool_cache *pc;
1719 	struct cpumem_iter i;
1720 
1721 	if (pool_caches.pr_size == 0) {
1722 		pool_init(&pool_caches, sizeof(struct pool_cache),
1723 		    CACHELINESIZE, IPL_NONE, PR_WAITOK | PR_RWLOCK,
1724 		    "plcache", NULL);
1725 	}
1726 
1727 	/* must be able to use the pool items as cache list items */
1728 	KASSERT(pp->pr_size >= sizeof(struct pool_cache_item));
1729 
1730 	cm = cpumem_get(&pool_caches);
1731 
1732 	pl_init(pp, &pp->pr_cache_lock);
1733 	arc4random_buf(pp->pr_cache_magic, sizeof(pp->pr_cache_magic));
1734 	TAILQ_INIT(&pp->pr_cache_lists);
1735 	pp->pr_cache_nitems = 0;
1736 	pp->pr_cache_timestamp = getnsecuptime();
1737 	pp->pr_cache_items = 8;
1738 	pp->pr_cache_contention = 0;
1739 	pp->pr_cache_ngc = 0;
1740 
1741 	CPUMEM_FOREACH(pc, &i, cm) {
1742 		pc->pc_actv = NULL;
1743 		pc->pc_nactv = 0;
1744 		pc->pc_prev = NULL;
1745 
1746 		pc->pc_nget = 0;
1747 		pc->pc_nfail = 0;
1748 		pc->pc_nput = 0;
1749 		pc->pc_nlget = 0;
1750 		pc->pc_nlfail = 0;
1751 		pc->pc_nlput = 0;
1752 		pc->pc_nout = 0;
1753 	}
1754 
1755 	membar_producer();
1756 
1757 	pp->pr_cache = cm;
1758 }
1759 
1760 static inline void
1761 pool_cache_item_magic(struct pool *pp, struct pool_cache_item *ci)
1762 {
1763 	unsigned long *entry = (unsigned long *)&ci->ci_nextl;
1764 
1765 	entry[0] = pp->pr_cache_magic[0] ^ (u_long)ci;
1766 	entry[1] = pp->pr_cache_magic[1] ^ (u_long)ci->ci_next;
1767 }
1768 
1769 static inline void
1770 pool_cache_item_magic_check(struct pool *pp, struct pool_cache_item *ci)
1771 {
1772 	unsigned long *entry;
1773 	unsigned long val;
1774 
1775 	entry = (unsigned long *)&ci->ci_nextl;
1776 	val = pp->pr_cache_magic[0] ^ (u_long)ci;
1777 	if (*entry != val)
1778 		goto fail;
1779 
1780 	entry++;
1781 	val = pp->pr_cache_magic[1] ^ (u_long)ci->ci_next;
1782 	if (*entry != val)
1783 		goto fail;
1784 
1785 	return;
1786 
1787 fail:
1788 	panic("%s: %s cpu free list modified: item addr %p+%zu 0x%lx!=0x%lx",
1789 	    __func__, pp->pr_wchan, ci, (caddr_t)entry - (caddr_t)ci,
1790 	    *entry, val);
1791 }
1792 
1793 static inline void
1794 pool_list_enter(struct pool *pp)
1795 {
1796 	if (pl_enter_try(pp, &pp->pr_cache_lock) == 0) {
1797 		pl_enter(pp, &pp->pr_cache_lock);
1798 		pp->pr_cache_contention++;
1799 	}
1800 }
1801 
1802 static inline void
1803 pool_list_leave(struct pool *pp)
1804 {
1805 	pl_leave(pp, &pp->pr_cache_lock);
1806 }
1807 
1808 static inline struct pool_cache_item *
1809 pool_cache_list_alloc(struct pool *pp, struct pool_cache *pc)
1810 {
1811 	struct pool_cache_item *pl;
1812 
1813 	pool_list_enter(pp);
1814 	pl = TAILQ_FIRST(&pp->pr_cache_lists);
1815 	if (pl != NULL) {
1816 		TAILQ_REMOVE(&pp->pr_cache_lists, pl, ci_nextl);
1817 		pp->pr_cache_nitems -= POOL_CACHE_ITEM_NITEMS(pl);
1818 
1819 		pool_cache_item_magic(pp, pl);
1820 
1821 		pc->pc_nlget++;
1822 	} else
1823 		pc->pc_nlfail++;
1824 
1825 	/* fold this cpus nout into the global while we have the lock */
1826 	pp->pr_cache_nout += pc->pc_nout;
1827 	pc->pc_nout = 0;
1828 	pool_list_leave(pp);
1829 
1830 	return (pl);
1831 }
1832 
1833 static inline void
1834 pool_cache_list_free(struct pool *pp, struct pool_cache *pc,
1835     struct pool_cache_item *ci)
1836 {
1837 	pool_list_enter(pp);
1838 	if (TAILQ_EMPTY(&pp->pr_cache_lists))
1839 		pp->pr_cache_timestamp = getnsecuptime();
1840 
1841 	pp->pr_cache_nitems += POOL_CACHE_ITEM_NITEMS(ci);
1842 	TAILQ_INSERT_TAIL(&pp->pr_cache_lists, ci, ci_nextl);
1843 
1844 	pc->pc_nlput++;
1845 
1846 	/* fold this cpus nout into the global while we have the lock */
1847 	pp->pr_cache_nout += pc->pc_nout;
1848 	pc->pc_nout = 0;
1849 	pool_list_leave(pp);
1850 }
1851 
1852 static inline struct pool_cache *
1853 pool_cache_enter(struct pool *pp, int *s)
1854 {
1855 	struct pool_cache *pc;
1856 
1857 	pc = cpumem_enter(pp->pr_cache);
1858 	*s = splraise(pp->pr_ipl);
1859 	pc->pc_gen++;
1860 
1861 	return (pc);
1862 }
1863 
1864 static inline void
1865 pool_cache_leave(struct pool *pp, struct pool_cache *pc, int s)
1866 {
1867 	pc->pc_gen++;
1868 	splx(s);
1869 	cpumem_leave(pp->pr_cache, pc);
1870 }
1871 
1872 void *
1873 pool_cache_get(struct pool *pp)
1874 {
1875 	struct pool_cache *pc;
1876 	struct pool_cache_item *ci;
1877 	int s;
1878 
1879 	pc = pool_cache_enter(pp, &s);
1880 
1881 	if (pc->pc_actv != NULL) {
1882 		ci = pc->pc_actv;
1883 	} else if (pc->pc_prev != NULL) {
1884 		ci = pc->pc_prev;
1885 		pc->pc_prev = NULL;
1886 	} else if ((ci = pool_cache_list_alloc(pp, pc)) == NULL) {
1887 		pc->pc_nfail++;
1888 		goto done;
1889 	}
1890 
1891 	pool_cache_item_magic_check(pp, ci);
1892 #ifdef DIAGNOSTIC
1893 	if (pool_debug && POOL_CACHE_ITEM_POISONED(ci)) {
1894 		size_t pidx;
1895 		uint32_t pval;
1896 
1897 		if (poison_check(ci + 1, pp->pr_size - sizeof(*ci),
1898 		    &pidx, &pval)) {
1899 			int *ip = (int *)(ci + 1);
1900 			ip += pidx;
1901 
1902 			panic("%s: %s cpu free list modified: "
1903 			    "item addr %p+%zu 0x%x!=0x%x",
1904 			    __func__, pp->pr_wchan, ci,
1905 			    (caddr_t)ip - (caddr_t)ci, *ip, pval);
1906 		}
1907 	}
1908 #endif
1909 
1910 	pc->pc_actv = ci->ci_next;
1911 	pc->pc_nactv = POOL_CACHE_ITEM_NITEMS(ci) - 1;
1912 	pc->pc_nget++;
1913 	pc->pc_nout++;
1914 
1915 done:
1916 	pool_cache_leave(pp, pc, s);
1917 
1918 	return (ci);
1919 }
1920 
1921 void
1922 pool_cache_put(struct pool *pp, void *v)
1923 {
1924 	struct pool_cache *pc;
1925 	struct pool_cache_item *ci = v;
1926 	unsigned long nitems;
1927 	int s;
1928 #ifdef DIAGNOSTIC
1929 	int poison = pool_debug && pp->pr_size > sizeof(*ci);
1930 
1931 	if (poison)
1932 		poison_mem(ci + 1, pp->pr_size - sizeof(*ci));
1933 #endif
1934 
1935 	pc = pool_cache_enter(pp, &s);
1936 
1937 	nitems = pc->pc_nactv;
1938 	if (nitems >= pp->pr_cache_items) {
1939 		if (pc->pc_prev != NULL)
1940 			pool_cache_list_free(pp, pc, pc->pc_prev);
1941 
1942 		pc->pc_prev = pc->pc_actv;
1943 
1944 		pc->pc_actv = NULL;
1945 		pc->pc_nactv = 0;
1946 		nitems = 0;
1947 	}
1948 
1949 	ci->ci_next = pc->pc_actv;
1950 	ci->ci_nitems = ++nitems;
1951 #ifdef DIAGNOSTIC
1952 	ci->ci_nitems |= poison ? POOL_CACHE_ITEM_NITEMS_POISON : 0;
1953 #endif
1954 	pool_cache_item_magic(pp, ci);
1955 
1956 	pc->pc_actv = ci;
1957 	pc->pc_nactv = nitems;
1958 
1959 	pc->pc_nput++;
1960 	pc->pc_nout--;
1961 
1962 	pool_cache_leave(pp, pc, s);
1963 }
1964 
1965 struct pool_cache_item *
1966 pool_cache_list_put(struct pool *pp, struct pool_cache_item *pl)
1967 {
1968 	struct pool_cache_item *rpl, *next;
1969 
1970 	if (pl == NULL)
1971 		return (NULL);
1972 
1973 	rpl = TAILQ_NEXT(pl, ci_nextl);
1974 
1975 	pl_enter(pp, &pp->pr_lock);
1976 	do {
1977 		next = pl->ci_next;
1978 		pool_do_put(pp, pl);
1979 		pl = next;
1980 	} while (pl != NULL);
1981 	pl_leave(pp, &pp->pr_lock);
1982 
1983 	return (rpl);
1984 }
1985 
1986 void
1987 pool_cache_destroy(struct pool *pp)
1988 {
1989 	struct pool_cache *pc;
1990 	struct pool_cache_item *pl;
1991 	struct cpumem_iter i;
1992 	struct cpumem *cm;
1993 
1994 	rw_enter_write(&pool_lock); /* serialise with the gc */
1995 	cm = pp->pr_cache;
1996 	pp->pr_cache = NULL; /* make pool_put avoid the cache */
1997 	rw_exit_write(&pool_lock);
1998 
1999 	CPUMEM_FOREACH(pc, &i, cm) {
2000 		pool_cache_list_put(pp, pc->pc_actv);
2001 		pool_cache_list_put(pp, pc->pc_prev);
2002 	}
2003 
2004 	cpumem_put(&pool_caches, cm);
2005 
2006 	pl = TAILQ_FIRST(&pp->pr_cache_lists);
2007 	while (pl != NULL)
2008 		pl = pool_cache_list_put(pp, pl);
2009 }
2010 
2011 void
2012 pool_cache_gc(struct pool *pp)
2013 {
2014 	unsigned int contention, delta;
2015 
2016 	if (getnsecuptime() - pp->pr_cache_timestamp > POOL_WAIT_GC &&
2017 	    !TAILQ_EMPTY(&pp->pr_cache_lists) &&
2018 	    pl_enter_try(pp, &pp->pr_cache_lock)) {
2019 		struct pool_cache_item *pl = NULL;
2020 
2021 		pl = TAILQ_FIRST(&pp->pr_cache_lists);
2022 		if (pl != NULL) {
2023 			TAILQ_REMOVE(&pp->pr_cache_lists, pl, ci_nextl);
2024 			pp->pr_cache_nitems -= POOL_CACHE_ITEM_NITEMS(pl);
2025 			pp->pr_cache_timestamp = getnsecuptime();
2026 
2027 			pp->pr_cache_ngc++;
2028 		}
2029 
2030 		pl_leave(pp, &pp->pr_cache_lock);
2031 
2032 		pool_cache_list_put(pp, pl);
2033 	}
2034 
2035 	/*
2036 	 * if there's a lot of contention on the pr_cache_mtx then consider
2037 	 * growing the length of the list to reduce the need to access the
2038 	 * global pool.
2039 	 */
2040 
2041 	contention = pp->pr_cache_contention;
2042 	delta = contention - pp->pr_cache_contention_prev;
2043 	if (delta > 8 /* magic */) {
2044 		if ((ncpusfound * 8 * 2) <= pp->pr_cache_nitems)
2045 			pp->pr_cache_items += 8;
2046 	} else if (delta == 0) {
2047 		if (pp->pr_cache_items > 8)
2048 			pp->pr_cache_items--;
2049 	}
2050 	pp->pr_cache_contention_prev = contention;
2051 }
2052 
2053 void
2054 pool_cache_pool_info(struct pool *pp, struct kinfo_pool *pi)
2055 {
2056 	struct pool_cache *pc;
2057 	struct cpumem_iter i;
2058 
2059 	if (pp->pr_cache == NULL)
2060 		return;
2061 
2062 	/* loop through the caches twice to collect stats */
2063 
2064 	/* once without the lock so we can yield while reading nget/nput */
2065 	CPUMEM_FOREACH(pc, &i, pp->pr_cache) {
2066 		uint64_t gen, nget, nput;
2067 
2068 		do {
2069 			while ((gen = pc->pc_gen) & 1)
2070 				yield();
2071 
2072 			nget = pc->pc_nget;
2073 			nput = pc->pc_nput;
2074 		} while (gen != pc->pc_gen);
2075 
2076 		pi->pr_nget += nget;
2077 		pi->pr_nput += nput;
2078 	}
2079 
2080 	/* and once with the mtx so we can get consistent nout values */
2081 	pl_enter(pp, &pp->pr_cache_lock);
2082 	CPUMEM_FOREACH(pc, &i, pp->pr_cache)
2083 		pi->pr_nout += pc->pc_nout;
2084 
2085 	pi->pr_nout += pp->pr_cache_nout;
2086 	pl_leave(pp, &pp->pr_cache_lock);
2087 }
2088 
2089 int
2090 pool_cache_info(struct pool *pp, void *oldp, size_t *oldlenp)
2091 {
2092 	struct kinfo_pool_cache kpc;
2093 
2094 	if (pp->pr_cache == NULL)
2095 		return (EOPNOTSUPP);
2096 
2097 	memset(&kpc, 0, sizeof(kpc)); /* don't leak padding */
2098 
2099 	pl_enter(pp, &pp->pr_cache_lock);
2100 	kpc.pr_ngc = pp->pr_cache_ngc;
2101 	kpc.pr_len = pp->pr_cache_items;
2102 	kpc.pr_nitems = pp->pr_cache_nitems;
2103 	kpc.pr_contention = pp->pr_cache_contention;
2104 	pl_leave(pp, &pp->pr_cache_lock);
2105 
2106 	return (sysctl_rdstruct(oldp, oldlenp, NULL, &kpc, sizeof(kpc)));
2107 }
2108 
2109 int
2110 pool_cache_cpus_info(struct pool *pp, void *oldp, size_t *oldlenp)
2111 {
2112 	struct pool_cache *pc;
2113 	struct kinfo_pool_cache_cpu *kpcc, *info;
2114 	unsigned int cpu = 0;
2115 	struct cpumem_iter i;
2116 	int error = 0;
2117 	size_t len;
2118 
2119 	if (pp->pr_cache == NULL)
2120 		return (EOPNOTSUPP);
2121 	if (*oldlenp % sizeof(*kpcc))
2122 		return (EINVAL);
2123 
2124 	kpcc = mallocarray(ncpusfound, sizeof(*kpcc), M_TEMP,
2125 	    M_WAITOK|M_CANFAIL|M_ZERO);
2126 	if (kpcc == NULL)
2127 		return (EIO);
2128 
2129 	len = ncpusfound * sizeof(*kpcc);
2130 
2131 	CPUMEM_FOREACH(pc, &i, pp->pr_cache) {
2132 		uint64_t gen;
2133 
2134 		if (cpu >= ncpusfound) {
2135 			error = EIO;
2136 			goto err;
2137 		}
2138 
2139 		info = &kpcc[cpu];
2140 		info->pr_cpu = cpu;
2141 
2142 		do {
2143 			while ((gen = pc->pc_gen) & 1)
2144 				yield();
2145 
2146 			info->pr_nget = pc->pc_nget;
2147 			info->pr_nfail = pc->pc_nfail;
2148 			info->pr_nput = pc->pc_nput;
2149 			info->pr_nlget = pc->pc_nlget;
2150 			info->pr_nlfail = pc->pc_nlfail;
2151 			info->pr_nlput = pc->pc_nlput;
2152 		} while (gen != pc->pc_gen);
2153 
2154 		cpu++;
2155 	}
2156 
2157 	error = sysctl_rdstruct(oldp, oldlenp, NULL, kpcc, len);
2158 err:
2159 	free(kpcc, M_TEMP, len);
2160 
2161 	return (error);
2162 }
2163 #else /* MULTIPROCESSOR */
2164 void
2165 pool_cache_init(struct pool *pp)
2166 {
2167 	/* nop */
2168 }
2169 
2170 void
2171 pool_cache_pool_info(struct pool *pp, struct kinfo_pool *pi)
2172 {
2173 	/* nop */
2174 }
2175 
2176 int
2177 pool_cache_info(struct pool *pp, void *oldp, size_t *oldlenp)
2178 {
2179 	return (EOPNOTSUPP);
2180 }
2181 
2182 int
2183 pool_cache_cpus_info(struct pool *pp, void *oldp, size_t *oldlenp)
2184 {
2185 	return (EOPNOTSUPP);
2186 }
2187 #endif /* MULTIPROCESSOR */
2188 
2189 
2190 void
2191 pool_lock_mtx_init(struct pool *pp, union pool_lock *lock,
2192     const struct lock_type *type)
2193 {
2194 	_mtx_init_flags(&lock->prl_mtx, pp->pr_ipl, pp->pr_wchan, 0, type);
2195 }
2196 
2197 void
2198 pool_lock_mtx_enter(union pool_lock *lock)
2199 {
2200 	mtx_enter(&lock->prl_mtx);
2201 }
2202 
2203 int
2204 pool_lock_mtx_enter_try(union pool_lock *lock)
2205 {
2206 	return (mtx_enter_try(&lock->prl_mtx));
2207 }
2208 
2209 void
2210 pool_lock_mtx_leave(union pool_lock *lock)
2211 {
2212 	mtx_leave(&lock->prl_mtx);
2213 }
2214 
2215 void
2216 pool_lock_mtx_assert_locked(union pool_lock *lock)
2217 {
2218 	MUTEX_ASSERT_LOCKED(&lock->prl_mtx);
2219 }
2220 
2221 void
2222 pool_lock_mtx_assert_unlocked(union pool_lock *lock)
2223 {
2224 	MUTEX_ASSERT_UNLOCKED(&lock->prl_mtx);
2225 }
2226 
2227 int
2228 pool_lock_mtx_sleep(void *ident, union pool_lock *lock, int priority,
2229     const char *wmesg)
2230 {
2231 	return msleep_nsec(ident, &lock->prl_mtx, priority, wmesg, INFSLP);
2232 }
2233 
2234 static const struct pool_lock_ops pool_lock_ops_mtx = {
2235 	pool_lock_mtx_init,
2236 	pool_lock_mtx_enter,
2237 	pool_lock_mtx_enter_try,
2238 	pool_lock_mtx_leave,
2239 	pool_lock_mtx_assert_locked,
2240 	pool_lock_mtx_assert_unlocked,
2241 	pool_lock_mtx_sleep,
2242 };
2243 
2244 void
2245 pool_lock_rw_init(struct pool *pp, union pool_lock *lock,
2246     const struct lock_type *type)
2247 {
2248 	_rw_init_flags(&lock->prl_rwlock, pp->pr_wchan, 0, type);
2249 }
2250 
2251 void
2252 pool_lock_rw_enter(union pool_lock *lock)
2253 {
2254 	rw_enter_write(&lock->prl_rwlock);
2255 }
2256 
2257 int
2258 pool_lock_rw_enter_try(union pool_lock *lock)
2259 {
2260 	return (rw_enter(&lock->prl_rwlock, RW_WRITE | RW_NOSLEEP) == 0);
2261 }
2262 
2263 void
2264 pool_lock_rw_leave(union pool_lock *lock)
2265 {
2266 	rw_exit_write(&lock->prl_rwlock);
2267 }
2268 
2269 void
2270 pool_lock_rw_assert_locked(union pool_lock *lock)
2271 {
2272 	rw_assert_wrlock(&lock->prl_rwlock);
2273 }
2274 
2275 void
2276 pool_lock_rw_assert_unlocked(union pool_lock *lock)
2277 {
2278 	KASSERT(rw_status(&lock->prl_rwlock) != RW_WRITE);
2279 }
2280 
2281 int
2282 pool_lock_rw_sleep(void *ident, union pool_lock *lock, int priority,
2283     const char *wmesg)
2284 {
2285 	return rwsleep_nsec(ident, &lock->prl_rwlock, priority, wmesg, INFSLP);
2286 }
2287 
2288 static const struct pool_lock_ops pool_lock_ops_rw = {
2289 	pool_lock_rw_init,
2290 	pool_lock_rw_enter,
2291 	pool_lock_rw_enter_try,
2292 	pool_lock_rw_leave,
2293 	pool_lock_rw_assert_locked,
2294 	pool_lock_rw_assert_unlocked,
2295 	pool_lock_rw_sleep,
2296 };
2297