xref: /openbsd-src/sys/kern/subr_pool.c (revision 7276a6834fc0308879a4100cde7a600e782f4b7c)
1 /*	$OpenBSD: subr_pool.c,v 1.213 2017/06/16 01:33:20 dlg Exp $	*/
2 /*	$NetBSD: subr_pool.c,v 1.61 2001/09/26 07:14:56 chs Exp $	*/
3 
4 /*-
5  * Copyright (c) 1997, 1999, 2000 The NetBSD Foundation, Inc.
6  * All rights reserved.
7  *
8  * This code is derived from software contributed to The NetBSD Foundation
9  * by Paul Kranenburg; by Jason R. Thorpe of the Numerical Aerospace
10  * Simulation Facility, NASA Ames Research Center.
11  *
12  * Redistribution and use in source and binary forms, with or without
13  * modification, are permitted provided that the following conditions
14  * are met:
15  * 1. Redistributions of source code must retain the above copyright
16  *    notice, this list of conditions and the following disclaimer.
17  * 2. Redistributions in binary form must reproduce the above copyright
18  *    notice, this list of conditions and the following disclaimer in the
19  *    documentation and/or other materials provided with the distribution.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
23  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
24  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
25  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31  * POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/errno.h>
37 #include <sys/kernel.h>
38 #include <sys/malloc.h>
39 #include <sys/pool.h>
40 #include <sys/syslog.h>
41 #include <sys/rwlock.h>
42 #include <sys/sysctl.h>
43 #include <sys/task.h>
44 #include <sys/timeout.h>
45 #include <sys/percpu.h>
46 
47 #include <uvm/uvm_extern.h>
48 
49 /*
50  * Pool resource management utility.
51  *
52  * Memory is allocated in pages which are split into pieces according to
53  * the pool item size. Each page is kept on one of three lists in the
54  * pool structure: `pr_emptypages', `pr_fullpages' and `pr_partpages',
55  * for empty, full and partially-full pages respectively. The individual
56  * pool items are on a linked list headed by `ph_items' in each page
57  * header. The memory for building the page list is either taken from
58  * the allocated pages themselves (for small pool items) or taken from
59  * an internal pool of page headers (`phpool').
60  */
61 
62 /* List of all pools */
63 SIMPLEQ_HEAD(,pool) pool_head = SIMPLEQ_HEAD_INITIALIZER(pool_head);
64 
65 /*
66  * Every pool gets a unique serial number assigned to it. If this counter
67  * wraps, we're screwed, but we shouldn't create so many pools anyway.
68  */
69 unsigned int pool_serial;
70 unsigned int pool_count;
71 
72 /* Lock the previous variables making up the global pool state */
73 struct rwlock pool_lock = RWLOCK_INITIALIZER("pools");
74 
75 /* Private pool for page header structures */
76 struct pool phpool;
77 
78 struct pool_item {
79 	u_long				pi_magic;
80 	XSIMPLEQ_ENTRY(pool_item)	pi_list;
81 };
82 #define POOL_IMAGIC(ph, pi) ((u_long)(pi) ^ (ph)->ph_magic)
83 
84 struct pool_page_header {
85 	/* Page headers */
86 	TAILQ_ENTRY(pool_page_header)
87 				ph_entry;	/* pool page list */
88 	XSIMPLEQ_HEAD(, pool_item)
89 				ph_items;	/* free items on the page */
90 	RBT_ENTRY(pool_page_header)
91 				ph_node;	/* off-page page headers */
92 	unsigned int		ph_nmissing;	/* # of chunks in use */
93 	caddr_t			ph_page;	/* this page's address */
94 	caddr_t			ph_colored;	/* page's colored address */
95 	unsigned long		ph_magic;
96 	int			ph_tick;
97 };
98 #define POOL_MAGICBIT (1 << 3) /* keep away from perturbed low bits */
99 #define POOL_PHPOISON(ph) ISSET((ph)->ph_magic, POOL_MAGICBIT)
100 
101 #ifdef MULTIPROCESSOR
102 struct pool_cache_item {
103 	struct pool_cache_item	*ci_next;	/* next item in list */
104 	unsigned long		 ci_nitems;	/* number of items in list */
105 	TAILQ_ENTRY(pool_cache_item)
106 				 ci_nextl;	/* entry in list of lists */
107 };
108 
109 /* we store whether the cached item is poisoned in the high bit of nitems */
110 #define POOL_CACHE_ITEM_NITEMS_MASK	0x7ffffffUL
111 #define POOL_CACHE_ITEM_NITEMS_POISON	0x8000000UL
112 
113 #define POOL_CACHE_ITEM_NITEMS(_ci)					\
114     ((_ci)->ci_nitems & POOL_CACHE_ITEM_NITEMS_MASK)
115 
116 #define POOL_CACHE_ITEM_POISONED(_ci)					\
117     ISSET((_ci)->ci_nitems, POOL_CACHE_ITEM_NITEMS_POISON)
118 
119 struct pool_cache {
120 	struct pool_cache_item	*pc_actv;	/* active list of items */
121 	unsigned long		 pc_nactv;	/* actv head nitems cache */
122 	struct pool_cache_item	*pc_prev;	/* previous list of items */
123 
124 	uint64_t		 pc_gen;	/* generation number */
125 	uint64_t		 pc_nget;	/* # of successful requests */
126 	uint64_t		 pc_nfail;	/* # of unsuccessful reqs */
127 	uint64_t		 pc_nput;	/* # of releases */
128 	uint64_t		 pc_nlget;	/* # of list requests */
129 	uint64_t		 pc_nlfail;	/* # of fails getting a list */
130 	uint64_t		 pc_nlput;	/* # of list releases */
131 
132 	int			 pc_nout;
133 };
134 
135 void	*pool_cache_get(struct pool *);
136 void	 pool_cache_put(struct pool *, void *);
137 void	 pool_cache_destroy(struct pool *);
138 #endif
139 void	 pool_cache_pool_info(struct pool *, struct kinfo_pool *);
140 int	 pool_cache_info(struct pool *, void *, size_t *);
141 int	 pool_cache_cpus_info(struct pool *, void *, size_t *);
142 
143 #ifdef POOL_DEBUG
144 int	pool_debug = 1;
145 #else
146 int	pool_debug = 0;
147 #endif
148 
149 #define POOL_INPGHDR(pp) ((pp)->pr_phoffset != 0)
150 
151 struct pool_page_header *
152 	 pool_p_alloc(struct pool *, int, int *);
153 void	 pool_p_insert(struct pool *, struct pool_page_header *);
154 void	 pool_p_remove(struct pool *, struct pool_page_header *);
155 void	 pool_p_free(struct pool *, struct pool_page_header *);
156 
157 void	 pool_update_curpage(struct pool *);
158 void	*pool_do_get(struct pool *, int, int *);
159 void	 pool_do_put(struct pool *, void *);
160 int	 pool_chk_page(struct pool *, struct pool_page_header *, int);
161 int	 pool_chk(struct pool *);
162 void	 pool_get_done(void *, void *);
163 void	 pool_runqueue(struct pool *, int);
164 
165 void	*pool_allocator_alloc(struct pool *, int, int *);
166 void	 pool_allocator_free(struct pool *, void *);
167 
168 /*
169  * The default pool allocator.
170  */
171 void	*pool_page_alloc(struct pool *, int, int *);
172 void	pool_page_free(struct pool *, void *);
173 
174 /*
175  * safe for interrupts; this is the default allocator
176  */
177 struct pool_allocator pool_allocator_single = {
178 	pool_page_alloc,
179 	pool_page_free,
180 	POOL_ALLOC_SIZE(PAGE_SIZE, POOL_ALLOC_ALIGNED)
181 };
182 
183 void	*pool_multi_alloc(struct pool *, int, int *);
184 void	pool_multi_free(struct pool *, void *);
185 
186 struct pool_allocator pool_allocator_multi = {
187 	pool_multi_alloc,
188 	pool_multi_free,
189 	POOL_ALLOC_SIZES(PAGE_SIZE, (1UL << 31), POOL_ALLOC_ALIGNED)
190 };
191 
192 void	*pool_multi_alloc_ni(struct pool *, int, int *);
193 void	pool_multi_free_ni(struct pool *, void *);
194 
195 struct pool_allocator pool_allocator_multi_ni = {
196 	pool_multi_alloc_ni,
197 	pool_multi_free_ni,
198 	POOL_ALLOC_SIZES(PAGE_SIZE, (1UL << 31), POOL_ALLOC_ALIGNED)
199 };
200 
201 #ifdef DDB
202 void	 pool_print_pagelist(struct pool_pagelist *, int (*)(const char *, ...)
203 	     __attribute__((__format__(__kprintf__,1,2))));
204 void	 pool_print1(struct pool *, const char *, int (*)(const char *, ...)
205 	     __attribute__((__format__(__kprintf__,1,2))));
206 #endif
207 
208 /* stale page garbage collectors */
209 void	pool_gc_sched(void *);
210 struct timeout pool_gc_tick = TIMEOUT_INITIALIZER(pool_gc_sched, NULL);
211 void	pool_gc_pages(void *);
212 struct task pool_gc_task = TASK_INITIALIZER(pool_gc_pages, NULL);
213 int pool_wait_free = 1;
214 int pool_wait_gc = 8;
215 
216 RBT_PROTOTYPE(phtree, pool_page_header, ph_node, phtree_compare);
217 
218 static inline int
219 phtree_compare(const struct pool_page_header *a,
220     const struct pool_page_header *b)
221 {
222 	vaddr_t va = (vaddr_t)a->ph_page;
223 	vaddr_t vb = (vaddr_t)b->ph_page;
224 
225 	/* the compares in this order are important for the NFIND to work */
226 	if (vb < va)
227 		return (-1);
228 	if (vb > va)
229 		return (1);
230 
231 	return (0);
232 }
233 
234 RBT_GENERATE(phtree, pool_page_header, ph_node, phtree_compare);
235 
236 /*
237  * Return the pool page header based on page address.
238  */
239 static inline struct pool_page_header *
240 pr_find_pagehead(struct pool *pp, void *v)
241 {
242 	struct pool_page_header *ph, key;
243 
244 	if (POOL_INPGHDR(pp)) {
245 		caddr_t page;
246 
247 		page = (caddr_t)((vaddr_t)v & pp->pr_pgmask);
248 
249 		return ((struct pool_page_header *)(page + pp->pr_phoffset));
250 	}
251 
252 	key.ph_page = v;
253 	ph = RBT_NFIND(phtree, &pp->pr_phtree, &key);
254 	if (ph == NULL)
255 		panic("%s: %s: page header missing", __func__, pp->pr_wchan);
256 
257 	KASSERT(ph->ph_page <= (caddr_t)v);
258 	if (ph->ph_page + pp->pr_pgsize <= (caddr_t)v)
259 		panic("%s: %s: incorrect page", __func__, pp->pr_wchan);
260 
261 	return (ph);
262 }
263 
264 /*
265  * Initialize the given pool resource structure.
266  *
267  * We export this routine to allow other kernel parts to declare
268  * static pools that must be initialized before malloc() is available.
269  */
270 void
271 pool_init(struct pool *pp, size_t size, u_int align, int ipl, int flags,
272     const char *wchan, struct pool_allocator *palloc)
273 {
274 	int off = 0, space;
275 	unsigned int pgsize = PAGE_SIZE, items;
276 	size_t pa_pagesz;
277 #ifdef DIAGNOSTIC
278 	struct pool *iter;
279 #endif
280 
281 	if (align == 0)
282 		align = ALIGN(1);
283 
284 	if (size < sizeof(struct pool_item))
285 		size = sizeof(struct pool_item);
286 
287 	size = roundup(size, align);
288 
289 	while (size * 8 > pgsize)
290 		pgsize <<= 1;
291 
292 	if (palloc == NULL) {
293 		if (pgsize > PAGE_SIZE) {
294 			palloc = ISSET(flags, PR_WAITOK) ?
295 			    &pool_allocator_multi_ni : &pool_allocator_multi;
296 		} else
297 			palloc = &pool_allocator_single;
298 
299 		pa_pagesz = palloc->pa_pagesz;
300 	} else {
301 		size_t pgsizes;
302 
303 		pa_pagesz = palloc->pa_pagesz;
304 		if (pa_pagesz == 0)
305 			pa_pagesz = POOL_ALLOC_DEFAULT;
306 
307 		pgsizes = pa_pagesz & ~POOL_ALLOC_ALIGNED;
308 
309 		/* make sure the allocator can fit at least one item */
310 		if (size > pgsizes) {
311 			panic("%s: pool %s item size 0x%zx > "
312 			    "allocator %p sizes 0x%zx", __func__, wchan,
313 			    size, palloc, pgsizes);
314 		}
315 
316 		/* shrink pgsize until it fits into the range */
317 		while (!ISSET(pgsizes, pgsize))
318 			pgsize >>= 1;
319 	}
320 	KASSERT(ISSET(pa_pagesz, pgsize));
321 
322 	items = pgsize / size;
323 
324 	/*
325 	 * Decide whether to put the page header off page to avoid
326 	 * wasting too large a part of the page. Off-page page headers
327 	 * go into an RB tree, so we can match a returned item with
328 	 * its header based on the page address.
329 	 */
330 	if (ISSET(pa_pagesz, POOL_ALLOC_ALIGNED)) {
331 		if (pgsize - (size * items) >
332 		    sizeof(struct pool_page_header)) {
333 			off = pgsize - sizeof(struct pool_page_header);
334 		} else if (sizeof(struct pool_page_header) * 2 >= size) {
335 			off = pgsize - sizeof(struct pool_page_header);
336 			items = off / size;
337 		}
338 	}
339 
340 	KASSERT(items > 0);
341 
342 	/*
343 	 * Initialize the pool structure.
344 	 */
345 	memset(pp, 0, sizeof(*pp));
346 	TAILQ_INIT(&pp->pr_emptypages);
347 	TAILQ_INIT(&pp->pr_fullpages);
348 	TAILQ_INIT(&pp->pr_partpages);
349 	pp->pr_curpage = NULL;
350 	pp->pr_npages = 0;
351 	pp->pr_minitems = 0;
352 	pp->pr_minpages = 0;
353 	pp->pr_maxpages = 8;
354 	pp->pr_size = size;
355 	pp->pr_pgsize = pgsize;
356 	pp->pr_pgmask = ~0UL ^ (pgsize - 1);
357 	pp->pr_phoffset = off;
358 	pp->pr_itemsperpage = items;
359 	pp->pr_wchan = wchan;
360 	pp->pr_alloc = palloc;
361 	pp->pr_nitems = 0;
362 	pp->pr_nout = 0;
363 	pp->pr_hardlimit = UINT_MAX;
364 	pp->pr_hardlimit_warning = NULL;
365 	pp->pr_hardlimit_ratecap.tv_sec = 0;
366 	pp->pr_hardlimit_ratecap.tv_usec = 0;
367 	pp->pr_hardlimit_warning_last.tv_sec = 0;
368 	pp->pr_hardlimit_warning_last.tv_usec = 0;
369 	RBT_INIT(phtree, &pp->pr_phtree);
370 
371 	/*
372 	 * Use the space between the chunks and the page header
373 	 * for cache coloring.
374 	 */
375 	space = POOL_INPGHDR(pp) ? pp->pr_phoffset : pp->pr_pgsize;
376 	space -= pp->pr_itemsperpage * pp->pr_size;
377 	pp->pr_align = align;
378 	pp->pr_maxcolors = (space / align) + 1;
379 
380 	pp->pr_nget = 0;
381 	pp->pr_nfail = 0;
382 	pp->pr_nput = 0;
383 	pp->pr_npagealloc = 0;
384 	pp->pr_npagefree = 0;
385 	pp->pr_hiwat = 0;
386 	pp->pr_nidle = 0;
387 
388 	pp->pr_ipl = ipl;
389 	mtx_init_flags(&pp->pr_mtx, pp->pr_ipl, wchan, 0);
390 	mtx_init_flags(&pp->pr_requests_mtx, pp->pr_ipl, wchan, 0);
391 	TAILQ_INIT(&pp->pr_requests);
392 
393 	if (phpool.pr_size == 0) {
394 		pool_init(&phpool, sizeof(struct pool_page_header), 0,
395 		    IPL_HIGH, 0, "phpool", NULL);
396 
397 		/* make sure phpool wont "recurse" */
398 		KASSERT(POOL_INPGHDR(&phpool));
399 	}
400 
401 	/* pglistalloc/constraint parameters */
402 	pp->pr_crange = &kp_dirty;
403 
404 	/* Insert this into the list of all pools. */
405 	rw_enter_write(&pool_lock);
406 #ifdef DIAGNOSTIC
407 	SIMPLEQ_FOREACH(iter, &pool_head, pr_poollist) {
408 		if (iter == pp)
409 			panic("%s: pool %s already on list", __func__, wchan);
410 	}
411 #endif
412 
413 	pp->pr_serial = ++pool_serial;
414 	if (pool_serial == 0)
415 		panic("%s: too much uptime", __func__);
416 
417 	SIMPLEQ_INSERT_HEAD(&pool_head, pp, pr_poollist);
418 	pool_count++;
419 	rw_exit_write(&pool_lock);
420 }
421 
422 /*
423  * Decommission a pool resource.
424  */
425 void
426 pool_destroy(struct pool *pp)
427 {
428 	struct pool_page_header *ph;
429 	struct pool *prev, *iter;
430 
431 #ifdef MULTIPROCESSOR
432 	if (pp->pr_cache != NULL)
433 		pool_cache_destroy(pp);
434 #endif
435 
436 #ifdef DIAGNOSTIC
437 	if (pp->pr_nout != 0)
438 		panic("%s: pool busy: still out: %u", __func__, pp->pr_nout);
439 #endif
440 
441 	/* Remove from global pool list */
442 	rw_enter_write(&pool_lock);
443 	pool_count--;
444 	if (pp == SIMPLEQ_FIRST(&pool_head))
445 		SIMPLEQ_REMOVE_HEAD(&pool_head, pr_poollist);
446 	else {
447 		prev = SIMPLEQ_FIRST(&pool_head);
448 		SIMPLEQ_FOREACH(iter, &pool_head, pr_poollist) {
449 			if (iter == pp) {
450 				SIMPLEQ_REMOVE_AFTER(&pool_head, prev,
451 				    pr_poollist);
452 				break;
453 			}
454 			prev = iter;
455 		}
456 	}
457 	rw_exit_write(&pool_lock);
458 
459 	/* Remove all pages */
460 	while ((ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL) {
461 		mtx_enter(&pp->pr_mtx);
462 		pool_p_remove(pp, ph);
463 		mtx_leave(&pp->pr_mtx);
464 		pool_p_free(pp, ph);
465 	}
466 	KASSERT(TAILQ_EMPTY(&pp->pr_fullpages));
467 	KASSERT(TAILQ_EMPTY(&pp->pr_partpages));
468 }
469 
470 void
471 pool_request_init(struct pool_request *pr,
472     void (*handler)(void *, void *), void *cookie)
473 {
474 	pr->pr_handler = handler;
475 	pr->pr_cookie = cookie;
476 	pr->pr_item = NULL;
477 }
478 
479 void
480 pool_request(struct pool *pp, struct pool_request *pr)
481 {
482 	mtx_enter(&pp->pr_requests_mtx);
483 	TAILQ_INSERT_TAIL(&pp->pr_requests, pr, pr_entry);
484 	pool_runqueue(pp, PR_NOWAIT);
485 	mtx_leave(&pp->pr_requests_mtx);
486 }
487 
488 struct pool_get_memory {
489 	struct mutex mtx;
490 	void * volatile v;
491 };
492 
493 /*
494  * Grab an item from the pool.
495  */
496 void *
497 pool_get(struct pool *pp, int flags)
498 {
499 	void *v = NULL;
500 	int slowdown = 0;
501 
502 #ifdef MULTIPROCESSOR
503 	if (pp->pr_cache != NULL) {
504 		v = pool_cache_get(pp);
505 		if (v != NULL)
506 			goto good;
507 	}
508 #endif
509 
510 	KASSERT(flags & (PR_WAITOK | PR_NOWAIT));
511 
512 	mtx_enter(&pp->pr_mtx);
513 	if (pp->pr_nout >= pp->pr_hardlimit) {
514 		if (ISSET(flags, PR_NOWAIT|PR_LIMITFAIL))
515 			goto fail;
516 	} else if ((v = pool_do_get(pp, flags, &slowdown)) == NULL) {
517 		if (ISSET(flags, PR_NOWAIT))
518 			goto fail;
519 	}
520 	mtx_leave(&pp->pr_mtx);
521 
522 	if ((slowdown || pool_debug == 2) && ISSET(flags, PR_WAITOK))
523 		yield();
524 
525 	if (v == NULL) {
526 		struct pool_get_memory mem = {
527 		    MUTEX_INITIALIZER(pp->pr_ipl),
528 		    NULL };
529 		struct pool_request pr;
530 
531 		pool_request_init(&pr, pool_get_done, &mem);
532 		pool_request(pp, &pr);
533 
534 		mtx_enter(&mem.mtx);
535 		while (mem.v == NULL)
536 			msleep(&mem, &mem.mtx, PSWP, pp->pr_wchan, 0);
537 		mtx_leave(&mem.mtx);
538 
539 		v = mem.v;
540 	}
541 
542 #ifdef MULTIPROCESSOR
543 good:
544 #endif
545 	if (ISSET(flags, PR_ZERO))
546 		memset(v, 0, pp->pr_size);
547 
548 	return (v);
549 
550 fail:
551 	pp->pr_nfail++;
552 	mtx_leave(&pp->pr_mtx);
553 	return (NULL);
554 }
555 
556 void
557 pool_get_done(void *xmem, void *v)
558 {
559 	struct pool_get_memory *mem = xmem;
560 
561 	mtx_enter(&mem->mtx);
562 	mem->v = v;
563 	mtx_leave(&mem->mtx);
564 
565 	wakeup_one(mem);
566 }
567 
568 void
569 pool_runqueue(struct pool *pp, int flags)
570 {
571 	struct pool_requests prl = TAILQ_HEAD_INITIALIZER(prl);
572 	struct pool_request *pr;
573 
574 	MUTEX_ASSERT_UNLOCKED(&pp->pr_mtx);
575 	MUTEX_ASSERT_LOCKED(&pp->pr_requests_mtx);
576 
577 	if (pp->pr_requesting++)
578 		return;
579 
580 	do {
581 		pp->pr_requesting = 1;
582 
583 		/* no TAILQ_JOIN? :( */
584 		while ((pr = TAILQ_FIRST(&pp->pr_requests)) != NULL) {
585 			TAILQ_REMOVE(&pp->pr_requests, pr, pr_entry);
586 			TAILQ_INSERT_TAIL(&prl, pr, pr_entry);
587 		}
588 		if (TAILQ_EMPTY(&prl))
589 			continue;
590 
591 		mtx_leave(&pp->pr_requests_mtx);
592 
593 		mtx_enter(&pp->pr_mtx);
594 		pr = TAILQ_FIRST(&prl);
595 		while (pr != NULL) {
596 			int slowdown = 0;
597 
598 			if (pp->pr_nout >= pp->pr_hardlimit)
599 				break;
600 
601 			pr->pr_item = pool_do_get(pp, flags, &slowdown);
602 			if (pr->pr_item == NULL) /* || slowdown ? */
603 				break;
604 
605 			pr = TAILQ_NEXT(pr, pr_entry);
606 		}
607 		mtx_leave(&pp->pr_mtx);
608 
609 		while ((pr = TAILQ_FIRST(&prl)) != NULL &&
610 		    pr->pr_item != NULL) {
611 			TAILQ_REMOVE(&prl, pr, pr_entry);
612 			(*pr->pr_handler)(pr->pr_cookie, pr->pr_item);
613 		}
614 
615 		mtx_enter(&pp->pr_requests_mtx);
616 	} while (--pp->pr_requesting);
617 
618 	/* no TAILQ_JOIN :( */
619 	while ((pr = TAILQ_FIRST(&prl)) != NULL) {
620 		TAILQ_REMOVE(&prl, pr, pr_entry);
621 		TAILQ_INSERT_TAIL(&pp->pr_requests, pr, pr_entry);
622 	}
623 }
624 
625 void *
626 pool_do_get(struct pool *pp, int flags, int *slowdown)
627 {
628 	struct pool_item *pi;
629 	struct pool_page_header *ph;
630 
631 	MUTEX_ASSERT_LOCKED(&pp->pr_mtx);
632 
633 	splassert(pp->pr_ipl);
634 
635 	/*
636 	 * Account for this item now to avoid races if we need to give up
637 	 * pr_mtx to allocate a page.
638 	 */
639 	pp->pr_nout++;
640 
641 	if (pp->pr_curpage == NULL) {
642 		mtx_leave(&pp->pr_mtx);
643 		ph = pool_p_alloc(pp, flags, slowdown);
644 		mtx_enter(&pp->pr_mtx);
645 
646 		if (ph == NULL) {
647 			pp->pr_nout--;
648 			return (NULL);
649 		}
650 
651 		pool_p_insert(pp, ph);
652 	}
653 
654 	ph = pp->pr_curpage;
655 	pi = XSIMPLEQ_FIRST(&ph->ph_items);
656 	if (__predict_false(pi == NULL))
657 		panic("%s: %s: page empty", __func__, pp->pr_wchan);
658 
659 	if (__predict_false(pi->pi_magic != POOL_IMAGIC(ph, pi))) {
660 		panic("%s: %s free list modified: "
661 		    "page %p; item addr %p; offset 0x%x=0x%lx != 0x%lx",
662 		    __func__, pp->pr_wchan, ph->ph_page, pi,
663 		    0, pi->pi_magic, POOL_IMAGIC(ph, pi));
664 	}
665 
666 	XSIMPLEQ_REMOVE_HEAD(&ph->ph_items, pi_list);
667 
668 #ifdef DIAGNOSTIC
669 	if (pool_debug && POOL_PHPOISON(ph)) {
670 		size_t pidx;
671 		uint32_t pval;
672 		if (poison_check(pi + 1, pp->pr_size - sizeof(*pi),
673 		    &pidx, &pval)) {
674 			int *ip = (int *)(pi + 1);
675 			panic("%s: %s free list modified: "
676 			    "page %p; item addr %p; offset 0x%zx=0x%x",
677 			    __func__, pp->pr_wchan, ph->ph_page, pi,
678 			    pidx * sizeof(int), ip[pidx]);
679 		}
680 	}
681 #endif /* DIAGNOSTIC */
682 
683 	if (ph->ph_nmissing++ == 0) {
684 		/*
685 		 * This page was previously empty.  Move it to the list of
686 		 * partially-full pages.  This page is already curpage.
687 		 */
688 		TAILQ_REMOVE(&pp->pr_emptypages, ph, ph_entry);
689 		TAILQ_INSERT_TAIL(&pp->pr_partpages, ph, ph_entry);
690 
691 		pp->pr_nidle--;
692 	}
693 
694 	if (ph->ph_nmissing == pp->pr_itemsperpage) {
695 		/*
696 		 * This page is now full.  Move it to the full list
697 		 * and select a new current page.
698 		 */
699 		TAILQ_REMOVE(&pp->pr_partpages, ph, ph_entry);
700 		TAILQ_INSERT_TAIL(&pp->pr_fullpages, ph, ph_entry);
701 		pool_update_curpage(pp);
702 	}
703 
704 	pp->pr_nget++;
705 
706 	return (pi);
707 }
708 
709 /*
710  * Return resource to the pool.
711  */
712 void
713 pool_put(struct pool *pp, void *v)
714 {
715 	struct pool_page_header *ph, *freeph = NULL;
716 
717 #ifdef DIAGNOSTIC
718 	if (v == NULL)
719 		panic("%s: NULL item", __func__);
720 #endif
721 
722 #ifdef MULTIPROCESSOR
723 	if (pp->pr_cache != NULL && TAILQ_EMPTY(&pp->pr_requests)) {
724 		pool_cache_put(pp, v);
725 		return;
726 	}
727 #endif
728 
729 	mtx_enter(&pp->pr_mtx);
730 
731 	pool_do_put(pp, v);
732 
733 	pp->pr_nout--;
734 	pp->pr_nput++;
735 
736 	/* is it time to free a page? */
737 	if (pp->pr_nidle > pp->pr_maxpages &&
738 	    (ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL &&
739 	    (ticks - ph->ph_tick) > (hz * pool_wait_free)) {
740 		freeph = ph;
741 		pool_p_remove(pp, freeph);
742 	}
743 
744 	mtx_leave(&pp->pr_mtx);
745 
746 	if (freeph != NULL)
747 		pool_p_free(pp, freeph);
748 
749 	if (!TAILQ_EMPTY(&pp->pr_requests)) {
750 		mtx_enter(&pp->pr_requests_mtx);
751 		pool_runqueue(pp, PR_NOWAIT);
752 		mtx_leave(&pp->pr_requests_mtx);
753 	}
754 }
755 
756 void
757 pool_do_put(struct pool *pp, void *v)
758 {
759 	struct pool_item *pi = v;
760 	struct pool_page_header *ph;
761 
762 	splassert(pp->pr_ipl);
763 
764 	ph = pr_find_pagehead(pp, v);
765 
766 #ifdef DIAGNOSTIC
767 	if (pool_debug) {
768 		struct pool_item *qi;
769 		XSIMPLEQ_FOREACH(qi, &ph->ph_items, pi_list) {
770 			if (pi == qi) {
771 				panic("%s: %s: double pool_put: %p", __func__,
772 				    pp->pr_wchan, pi);
773 			}
774 		}
775 	}
776 #endif /* DIAGNOSTIC */
777 
778 	pi->pi_magic = POOL_IMAGIC(ph, pi);
779 	XSIMPLEQ_INSERT_HEAD(&ph->ph_items, pi, pi_list);
780 #ifdef DIAGNOSTIC
781 	if (POOL_PHPOISON(ph))
782 		poison_mem(pi + 1, pp->pr_size - sizeof(*pi));
783 #endif /* DIAGNOSTIC */
784 
785 	if (ph->ph_nmissing-- == pp->pr_itemsperpage) {
786 		/*
787 		 * The page was previously completely full, move it to the
788 		 * partially-full list.
789 		 */
790 		TAILQ_REMOVE(&pp->pr_fullpages, ph, ph_entry);
791 		TAILQ_INSERT_TAIL(&pp->pr_partpages, ph, ph_entry);
792 	}
793 
794 	if (ph->ph_nmissing == 0) {
795 		/*
796 		 * The page is now empty, so move it to the empty page list.
797 		 */
798 		pp->pr_nidle++;
799 
800 		ph->ph_tick = ticks;
801 		TAILQ_REMOVE(&pp->pr_partpages, ph, ph_entry);
802 		TAILQ_INSERT_TAIL(&pp->pr_emptypages, ph, ph_entry);
803 		pool_update_curpage(pp);
804 	}
805 }
806 
807 /*
808  * Add N items to the pool.
809  */
810 int
811 pool_prime(struct pool *pp, int n)
812 {
813 	struct pool_pagelist pl = TAILQ_HEAD_INITIALIZER(pl);
814 	struct pool_page_header *ph;
815 	int newpages;
816 
817 	newpages = roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage;
818 
819 	while (newpages-- > 0) {
820 		int slowdown = 0;
821 
822 		ph = pool_p_alloc(pp, PR_NOWAIT, &slowdown);
823 		if (ph == NULL) /* or slowdown? */
824 			break;
825 
826 		TAILQ_INSERT_TAIL(&pl, ph, ph_entry);
827 	}
828 
829 	mtx_enter(&pp->pr_mtx);
830 	while ((ph = TAILQ_FIRST(&pl)) != NULL) {
831 		TAILQ_REMOVE(&pl, ph, ph_entry);
832 		pool_p_insert(pp, ph);
833 	}
834 	mtx_leave(&pp->pr_mtx);
835 
836 	return (0);
837 }
838 
839 struct pool_page_header *
840 pool_p_alloc(struct pool *pp, int flags, int *slowdown)
841 {
842 	struct pool_page_header *ph;
843 	struct pool_item *pi;
844 	caddr_t addr;
845 	int n;
846 
847 	MUTEX_ASSERT_UNLOCKED(&pp->pr_mtx);
848 	KASSERT(pp->pr_size >= sizeof(*pi));
849 
850 	addr = pool_allocator_alloc(pp, flags, slowdown);
851 	if (addr == NULL)
852 		return (NULL);
853 
854 	if (POOL_INPGHDR(pp))
855 		ph = (struct pool_page_header *)(addr + pp->pr_phoffset);
856 	else {
857 		ph = pool_get(&phpool, flags);
858 		if (ph == NULL) {
859 			pool_allocator_free(pp, addr);
860 			return (NULL);
861 		}
862 	}
863 
864 	XSIMPLEQ_INIT(&ph->ph_items);
865 	ph->ph_page = addr;
866 	addr += pp->pr_align * (pp->pr_npagealloc % pp->pr_maxcolors);
867 	ph->ph_colored = addr;
868 	ph->ph_nmissing = 0;
869 	arc4random_buf(&ph->ph_magic, sizeof(ph->ph_magic));
870 #ifdef DIAGNOSTIC
871 	/* use a bit in ph_magic to record if we poison page items */
872 	if (pool_debug)
873 		SET(ph->ph_magic, POOL_MAGICBIT);
874 	else
875 		CLR(ph->ph_magic, POOL_MAGICBIT);
876 #endif /* DIAGNOSTIC */
877 
878 	n = pp->pr_itemsperpage;
879 	while (n--) {
880 		pi = (struct pool_item *)addr;
881 		pi->pi_magic = POOL_IMAGIC(ph, pi);
882 		XSIMPLEQ_INSERT_TAIL(&ph->ph_items, pi, pi_list);
883 
884 #ifdef DIAGNOSTIC
885 		if (POOL_PHPOISON(ph))
886 			poison_mem(pi + 1, pp->pr_size - sizeof(*pi));
887 #endif /* DIAGNOSTIC */
888 
889 		addr += pp->pr_size;
890 	}
891 
892 	return (ph);
893 }
894 
895 void
896 pool_p_free(struct pool *pp, struct pool_page_header *ph)
897 {
898 	struct pool_item *pi;
899 
900 	MUTEX_ASSERT_UNLOCKED(&pp->pr_mtx);
901 	KASSERT(ph->ph_nmissing == 0);
902 
903 	XSIMPLEQ_FOREACH(pi, &ph->ph_items, pi_list) {
904 		if (__predict_false(pi->pi_magic != POOL_IMAGIC(ph, pi))) {
905 			panic("%s: %s free list modified: "
906 			    "page %p; item addr %p; offset 0x%x=0x%lx",
907 			    __func__, pp->pr_wchan, ph->ph_page, pi,
908 			    0, pi->pi_magic);
909 		}
910 
911 #ifdef DIAGNOSTIC
912 		if (POOL_PHPOISON(ph)) {
913 			size_t pidx;
914 			uint32_t pval;
915 			if (poison_check(pi + 1, pp->pr_size - sizeof(*pi),
916 			    &pidx, &pval)) {
917 				int *ip = (int *)(pi + 1);
918 				panic("%s: %s free list modified: "
919 				    "page %p; item addr %p; offset 0x%zx=0x%x",
920 				    __func__, pp->pr_wchan, ph->ph_page, pi,
921 				    pidx * sizeof(int), ip[pidx]);
922 			}
923 		}
924 #endif
925 	}
926 
927 	pool_allocator_free(pp, ph->ph_page);
928 
929 	if (!POOL_INPGHDR(pp))
930 		pool_put(&phpool, ph);
931 }
932 
933 void
934 pool_p_insert(struct pool *pp, struct pool_page_header *ph)
935 {
936 	MUTEX_ASSERT_LOCKED(&pp->pr_mtx);
937 
938 	/* If the pool was depleted, point at the new page */
939 	if (pp->pr_curpage == NULL)
940 		pp->pr_curpage = ph;
941 
942 	TAILQ_INSERT_TAIL(&pp->pr_emptypages, ph, ph_entry);
943 	if (!POOL_INPGHDR(pp))
944 		RBT_INSERT(phtree, &pp->pr_phtree, ph);
945 
946 	pp->pr_nitems += pp->pr_itemsperpage;
947 	pp->pr_nidle++;
948 
949 	pp->pr_npagealloc++;
950 	if (++pp->pr_npages > pp->pr_hiwat)
951 		pp->pr_hiwat = pp->pr_npages;
952 }
953 
954 void
955 pool_p_remove(struct pool *pp, struct pool_page_header *ph)
956 {
957 	MUTEX_ASSERT_LOCKED(&pp->pr_mtx);
958 
959 	pp->pr_npagefree++;
960 	pp->pr_npages--;
961 	pp->pr_nidle--;
962 	pp->pr_nitems -= pp->pr_itemsperpage;
963 
964 	if (!POOL_INPGHDR(pp))
965 		RBT_REMOVE(phtree, &pp->pr_phtree, ph);
966 	TAILQ_REMOVE(&pp->pr_emptypages, ph, ph_entry);
967 
968 	pool_update_curpage(pp);
969 }
970 
971 void
972 pool_update_curpage(struct pool *pp)
973 {
974 	pp->pr_curpage = TAILQ_LAST(&pp->pr_partpages, pool_pagelist);
975 	if (pp->pr_curpage == NULL) {
976 		pp->pr_curpage = TAILQ_LAST(&pp->pr_emptypages, pool_pagelist);
977 	}
978 }
979 
980 void
981 pool_setlowat(struct pool *pp, int n)
982 {
983 	int prime = 0;
984 
985 	mtx_enter(&pp->pr_mtx);
986 	pp->pr_minitems = n;
987 	pp->pr_minpages = (n == 0)
988 		? 0
989 		: roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage;
990 
991 	if (pp->pr_nitems < n)
992 		prime = n - pp->pr_nitems;
993 	mtx_leave(&pp->pr_mtx);
994 
995 	if (prime > 0)
996 		pool_prime(pp, prime);
997 }
998 
999 void
1000 pool_sethiwat(struct pool *pp, int n)
1001 {
1002 	pp->pr_maxpages = (n == 0)
1003 		? 0
1004 		: roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage;
1005 }
1006 
1007 int
1008 pool_sethardlimit(struct pool *pp, u_int n, const char *warnmsg, int ratecap)
1009 {
1010 	int error = 0;
1011 
1012 	if (n < pp->pr_nout) {
1013 		error = EINVAL;
1014 		goto done;
1015 	}
1016 
1017 	pp->pr_hardlimit = n;
1018 	pp->pr_hardlimit_warning = warnmsg;
1019 	pp->pr_hardlimit_ratecap.tv_sec = ratecap;
1020 	pp->pr_hardlimit_warning_last.tv_sec = 0;
1021 	pp->pr_hardlimit_warning_last.tv_usec = 0;
1022 
1023 done:
1024 	return (error);
1025 }
1026 
1027 void
1028 pool_set_constraints(struct pool *pp, const struct kmem_pa_mode *mode)
1029 {
1030 	pp->pr_crange = mode;
1031 }
1032 
1033 /*
1034  * Release all complete pages that have not been used recently.
1035  *
1036  * Returns non-zero if any pages have been reclaimed.
1037  */
1038 int
1039 pool_reclaim(struct pool *pp)
1040 {
1041 	struct pool_page_header *ph, *phnext;
1042 	struct pool_pagelist pl = TAILQ_HEAD_INITIALIZER(pl);
1043 
1044 	mtx_enter(&pp->pr_mtx);
1045 	for (ph = TAILQ_FIRST(&pp->pr_emptypages); ph != NULL; ph = phnext) {
1046 		phnext = TAILQ_NEXT(ph, ph_entry);
1047 
1048 		/* Check our minimum page claim */
1049 		if (pp->pr_npages <= pp->pr_minpages)
1050 			break;
1051 
1052 		/*
1053 		 * If freeing this page would put us below
1054 		 * the low water mark, stop now.
1055 		 */
1056 		if ((pp->pr_nitems - pp->pr_itemsperpage) <
1057 		    pp->pr_minitems)
1058 			break;
1059 
1060 		pool_p_remove(pp, ph);
1061 		TAILQ_INSERT_TAIL(&pl, ph, ph_entry);
1062 	}
1063 	mtx_leave(&pp->pr_mtx);
1064 
1065 	if (TAILQ_EMPTY(&pl))
1066 		return (0);
1067 
1068 	while ((ph = TAILQ_FIRST(&pl)) != NULL) {
1069 		TAILQ_REMOVE(&pl, ph, ph_entry);
1070 		pool_p_free(pp, ph);
1071 	}
1072 
1073 	return (1);
1074 }
1075 
1076 /*
1077  * Release all complete pages that have not been used recently
1078  * from all pools.
1079  */
1080 void
1081 pool_reclaim_all(void)
1082 {
1083 	struct pool	*pp;
1084 
1085 	rw_enter_read(&pool_lock);
1086 	SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist)
1087 		pool_reclaim(pp);
1088 	rw_exit_read(&pool_lock);
1089 }
1090 
1091 #ifdef DDB
1092 #include <machine/db_machdep.h>
1093 #include <ddb/db_output.h>
1094 
1095 /*
1096  * Diagnostic helpers.
1097  */
1098 void
1099 pool_printit(struct pool *pp, const char *modif,
1100     int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2))))
1101 {
1102 	pool_print1(pp, modif, pr);
1103 }
1104 
1105 void
1106 pool_print_pagelist(struct pool_pagelist *pl,
1107     int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2))))
1108 {
1109 	struct pool_page_header *ph;
1110 	struct pool_item *pi;
1111 
1112 	TAILQ_FOREACH(ph, pl, ph_entry) {
1113 		(*pr)("\t\tpage %p, color %p, nmissing %d\n",
1114 		    ph->ph_page, ph->ph_colored, ph->ph_nmissing);
1115 		XSIMPLEQ_FOREACH(pi, &ph->ph_items, pi_list) {
1116 			if (pi->pi_magic != POOL_IMAGIC(ph, pi)) {
1117 				(*pr)("\t\t\titem %p, magic 0x%lx\n",
1118 				    pi, pi->pi_magic);
1119 			}
1120 		}
1121 	}
1122 }
1123 
1124 void
1125 pool_print1(struct pool *pp, const char *modif,
1126     int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2))))
1127 {
1128 	struct pool_page_header *ph;
1129 	int print_pagelist = 0;
1130 	char c;
1131 
1132 	while ((c = *modif++) != '\0') {
1133 		if (c == 'p')
1134 			print_pagelist = 1;
1135 		modif++;
1136 	}
1137 
1138 	(*pr)("POOL %s: size %u maxcolors %u\n", pp->pr_wchan, pp->pr_size,
1139 	    pp->pr_maxcolors);
1140 	(*pr)("\talloc %p\n", pp->pr_alloc);
1141 	(*pr)("\tminitems %u, minpages %u, maxpages %u, npages %u\n",
1142 	    pp->pr_minitems, pp->pr_minpages, pp->pr_maxpages, pp->pr_npages);
1143 	(*pr)("\titemsperpage %u, nitems %u, nout %u, hardlimit %u\n",
1144 	    pp->pr_itemsperpage, pp->pr_nitems, pp->pr_nout, pp->pr_hardlimit);
1145 
1146 	(*pr)("\n\tnget %lu, nfail %lu, nput %lu\n",
1147 	    pp->pr_nget, pp->pr_nfail, pp->pr_nput);
1148 	(*pr)("\tnpagealloc %lu, npagefree %lu, hiwat %u, nidle %lu\n",
1149 	    pp->pr_npagealloc, pp->pr_npagefree, pp->pr_hiwat, pp->pr_nidle);
1150 
1151 	if (print_pagelist == 0)
1152 		return;
1153 
1154 	if ((ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL)
1155 		(*pr)("\n\tempty page list:\n");
1156 	pool_print_pagelist(&pp->pr_emptypages, pr);
1157 	if ((ph = TAILQ_FIRST(&pp->pr_fullpages)) != NULL)
1158 		(*pr)("\n\tfull page list:\n");
1159 	pool_print_pagelist(&pp->pr_fullpages, pr);
1160 	if ((ph = TAILQ_FIRST(&pp->pr_partpages)) != NULL)
1161 		(*pr)("\n\tpartial-page list:\n");
1162 	pool_print_pagelist(&pp->pr_partpages, pr);
1163 
1164 	if (pp->pr_curpage == NULL)
1165 		(*pr)("\tno current page\n");
1166 	else
1167 		(*pr)("\tcurpage %p\n", pp->pr_curpage->ph_page);
1168 }
1169 
1170 void
1171 db_show_all_pools(db_expr_t expr, int haddr, db_expr_t count, char *modif)
1172 {
1173 	struct pool *pp;
1174 	char maxp[16];
1175 	int ovflw;
1176 	char mode;
1177 
1178 	mode = modif[0];
1179 	if (mode != '\0' && mode != 'a') {
1180 		db_printf("usage: show all pools [/a]\n");
1181 		return;
1182 	}
1183 
1184 	if (mode == '\0')
1185 		db_printf("%-10s%4s%9s%5s%9s%6s%6s%6s%6s%6s%6s%5s\n",
1186 		    "Name",
1187 		    "Size",
1188 		    "Requests",
1189 		    "Fail",
1190 		    "Releases",
1191 		    "Pgreq",
1192 		    "Pgrel",
1193 		    "Npage",
1194 		    "Hiwat",
1195 		    "Minpg",
1196 		    "Maxpg",
1197 		    "Idle");
1198 	else
1199 		db_printf("%-12s %18s %18s\n",
1200 		    "Name", "Address", "Allocator");
1201 
1202 	SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist) {
1203 		if (mode == 'a') {
1204 			db_printf("%-12s %18p %18p\n", pp->pr_wchan, pp,
1205 			    pp->pr_alloc);
1206 			continue;
1207 		}
1208 
1209 		if (!pp->pr_nget)
1210 			continue;
1211 
1212 		if (pp->pr_maxpages == UINT_MAX)
1213 			snprintf(maxp, sizeof maxp, "inf");
1214 		else
1215 			snprintf(maxp, sizeof maxp, "%u", pp->pr_maxpages);
1216 
1217 #define PRWORD(ovflw, fmt, width, fixed, val) do {	\
1218 	(ovflw) += db_printf((fmt),			\
1219 	    (width) - (fixed) - (ovflw) > 0 ?		\
1220 	    (width) - (fixed) - (ovflw) : 0,		\
1221 	    (val)) - (width);				\
1222 	if ((ovflw) < 0)				\
1223 		(ovflw) = 0;				\
1224 } while (/* CONSTCOND */0)
1225 
1226 		ovflw = 0;
1227 		PRWORD(ovflw, "%-*s", 10, 0, pp->pr_wchan);
1228 		PRWORD(ovflw, " %*u", 4, 1, pp->pr_size);
1229 		PRWORD(ovflw, " %*lu", 9, 1, pp->pr_nget);
1230 		PRWORD(ovflw, " %*lu", 5, 1, pp->pr_nfail);
1231 		PRWORD(ovflw, " %*lu", 9, 1, pp->pr_nput);
1232 		PRWORD(ovflw, " %*lu", 6, 1, pp->pr_npagealloc);
1233 		PRWORD(ovflw, " %*lu", 6, 1, pp->pr_npagefree);
1234 		PRWORD(ovflw, " %*d", 6, 1, pp->pr_npages);
1235 		PRWORD(ovflw, " %*d", 6, 1, pp->pr_hiwat);
1236 		PRWORD(ovflw, " %*d", 6, 1, pp->pr_minpages);
1237 		PRWORD(ovflw, " %*s", 6, 1, maxp);
1238 		PRWORD(ovflw, " %*lu\n", 5, 1, pp->pr_nidle);
1239 
1240 		pool_chk(pp);
1241 	}
1242 }
1243 #endif /* DDB */
1244 
1245 #if defined(POOL_DEBUG) || defined(DDB)
1246 int
1247 pool_chk_page(struct pool *pp, struct pool_page_header *ph, int expected)
1248 {
1249 	struct pool_item *pi;
1250 	caddr_t page;
1251 	int n;
1252 	const char *label = pp->pr_wchan;
1253 
1254 	page = (caddr_t)((u_long)ph & pp->pr_pgmask);
1255 	if (page != ph->ph_page && POOL_INPGHDR(pp)) {
1256 		printf("%s: ", label);
1257 		printf("pool(%p:%s): page inconsistency: page %p; "
1258 		    "at page head addr %p (p %p)\n",
1259 		    pp, pp->pr_wchan, ph->ph_page, ph, page);
1260 		return 1;
1261 	}
1262 
1263 	for (pi = XSIMPLEQ_FIRST(&ph->ph_items), n = 0;
1264 	     pi != NULL;
1265 	     pi = XSIMPLEQ_NEXT(&ph->ph_items, pi, pi_list), n++) {
1266 		if ((caddr_t)pi < ph->ph_page ||
1267 		    (caddr_t)pi >= ph->ph_page + pp->pr_pgsize) {
1268 			printf("%s: ", label);
1269 			printf("pool(%p:%s): page inconsistency: page %p;"
1270 			    " item ordinal %d; addr %p\n", pp,
1271 			    pp->pr_wchan, ph->ph_page, n, pi);
1272 			return (1);
1273 		}
1274 
1275 		if (pi->pi_magic != POOL_IMAGIC(ph, pi)) {
1276 			printf("%s: ", label);
1277 			printf("pool(%p:%s): free list modified: "
1278 			    "page %p; item ordinal %d; addr %p "
1279 			    "(p %p); offset 0x%x=0x%lx\n",
1280 			    pp, pp->pr_wchan, ph->ph_page, n, pi, page,
1281 			    0, pi->pi_magic);
1282 		}
1283 
1284 #ifdef DIAGNOSTIC
1285 		if (POOL_PHPOISON(ph)) {
1286 			size_t pidx;
1287 			uint32_t pval;
1288 			if (poison_check(pi + 1, pp->pr_size - sizeof(*pi),
1289 			    &pidx, &pval)) {
1290 				int *ip = (int *)(pi + 1);
1291 				printf("pool(%s): free list modified: "
1292 				    "page %p; item ordinal %d; addr %p "
1293 				    "(p %p); offset 0x%zx=0x%x\n",
1294 				    pp->pr_wchan, ph->ph_page, n, pi,
1295 				    page, pidx * sizeof(int), ip[pidx]);
1296 			}
1297 		}
1298 #endif /* DIAGNOSTIC */
1299 	}
1300 	if (n + ph->ph_nmissing != pp->pr_itemsperpage) {
1301 		printf("pool(%p:%s): page inconsistency: page %p;"
1302 		    " %d on list, %d missing, %d items per page\n", pp,
1303 		    pp->pr_wchan, ph->ph_page, n, ph->ph_nmissing,
1304 		    pp->pr_itemsperpage);
1305 		return 1;
1306 	}
1307 	if (expected >= 0 && n != expected) {
1308 		printf("pool(%p:%s): page inconsistency: page %p;"
1309 		    " %d on list, %d missing, %d expected\n", pp,
1310 		    pp->pr_wchan, ph->ph_page, n, ph->ph_nmissing,
1311 		    expected);
1312 		return 1;
1313 	}
1314 	return 0;
1315 }
1316 
1317 int
1318 pool_chk(struct pool *pp)
1319 {
1320 	struct pool_page_header *ph;
1321 	int r = 0;
1322 
1323 	TAILQ_FOREACH(ph, &pp->pr_emptypages, ph_entry)
1324 		r += pool_chk_page(pp, ph, pp->pr_itemsperpage);
1325 	TAILQ_FOREACH(ph, &pp->pr_fullpages, ph_entry)
1326 		r += pool_chk_page(pp, ph, 0);
1327 	TAILQ_FOREACH(ph, &pp->pr_partpages, ph_entry)
1328 		r += pool_chk_page(pp, ph, -1);
1329 
1330 	return (r);
1331 }
1332 #endif /* defined(POOL_DEBUG) || defined(DDB) */
1333 
1334 #ifdef DDB
1335 void
1336 pool_walk(struct pool *pp, int full,
1337     int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2))),
1338     void (*func)(void *, int, int (*)(const char *, ...)
1339 	    __attribute__((__format__(__kprintf__,1,2)))))
1340 {
1341 	struct pool_page_header *ph;
1342 	struct pool_item *pi;
1343 	caddr_t cp;
1344 	int n;
1345 
1346 	TAILQ_FOREACH(ph, &pp->pr_fullpages, ph_entry) {
1347 		cp = ph->ph_colored;
1348 		n = ph->ph_nmissing;
1349 
1350 		while (n--) {
1351 			func(cp, full, pr);
1352 			cp += pp->pr_size;
1353 		}
1354 	}
1355 
1356 	TAILQ_FOREACH(ph, &pp->pr_partpages, ph_entry) {
1357 		cp = ph->ph_colored;
1358 		n = ph->ph_nmissing;
1359 
1360 		do {
1361 			XSIMPLEQ_FOREACH(pi, &ph->ph_items, pi_list) {
1362 				if (cp == (caddr_t)pi)
1363 					break;
1364 			}
1365 			if (cp != (caddr_t)pi) {
1366 				func(cp, full, pr);
1367 				n--;
1368 			}
1369 
1370 			cp += pp->pr_size;
1371 		} while (n > 0);
1372 	}
1373 }
1374 #endif
1375 
1376 /*
1377  * We have three different sysctls.
1378  * kern.pool.npools - the number of pools.
1379  * kern.pool.pool.<pool#> - the pool struct for the pool#.
1380  * kern.pool.name.<pool#> - the name for pool#.
1381  */
1382 int
1383 sysctl_dopool(int *name, u_int namelen, char *oldp, size_t *oldlenp)
1384 {
1385 	struct kinfo_pool pi;
1386 	struct pool *pp;
1387 	int rv = ENOENT;
1388 
1389 	switch (name[0]) {
1390 	case KERN_POOL_NPOOLS:
1391 		if (namelen != 1)
1392 			return (ENOTDIR);
1393 		return (sysctl_rdint(oldp, oldlenp, NULL, pool_count));
1394 
1395 	case KERN_POOL_NAME:
1396 	case KERN_POOL_POOL:
1397 	case KERN_POOL_CACHE:
1398 	case KERN_POOL_CACHE_CPUS:
1399 		break;
1400 	default:
1401 		return (EOPNOTSUPP);
1402 	}
1403 
1404 	if (namelen != 2)
1405 		return (ENOTDIR);
1406 
1407 	rw_enter_read(&pool_lock);
1408 
1409 	SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist) {
1410 		if (name[1] == pp->pr_serial)
1411 			break;
1412 	}
1413 
1414 	if (pp == NULL)
1415 		goto done;
1416 
1417 	switch (name[0]) {
1418 	case KERN_POOL_NAME:
1419 		rv = sysctl_rdstring(oldp, oldlenp, NULL, pp->pr_wchan);
1420 		break;
1421 	case KERN_POOL_POOL:
1422 		memset(&pi, 0, sizeof(pi));
1423 
1424 		mtx_enter(&pp->pr_mtx);
1425 		pi.pr_size = pp->pr_size;
1426 		pi.pr_pgsize = pp->pr_pgsize;
1427 		pi.pr_itemsperpage = pp->pr_itemsperpage;
1428 		pi.pr_npages = pp->pr_npages;
1429 		pi.pr_minpages = pp->pr_minpages;
1430 		pi.pr_maxpages = pp->pr_maxpages;
1431 		pi.pr_hardlimit = pp->pr_hardlimit;
1432 		pi.pr_nout = pp->pr_nout;
1433 		pi.pr_nitems = pp->pr_nitems;
1434 		pi.pr_nget = pp->pr_nget;
1435 		pi.pr_nput = pp->pr_nput;
1436 		pi.pr_nfail = pp->pr_nfail;
1437 		pi.pr_npagealloc = pp->pr_npagealloc;
1438 		pi.pr_npagefree = pp->pr_npagefree;
1439 		pi.pr_hiwat = pp->pr_hiwat;
1440 		pi.pr_nidle = pp->pr_nidle;
1441 		mtx_leave(&pp->pr_mtx);
1442 
1443 		pool_cache_pool_info(pp, &pi);
1444 
1445 		rv = sysctl_rdstruct(oldp, oldlenp, NULL, &pi, sizeof(pi));
1446 		break;
1447 
1448 	case KERN_POOL_CACHE:
1449 		rv = pool_cache_info(pp, oldp, oldlenp);
1450 		break;
1451 
1452 	case KERN_POOL_CACHE_CPUS:
1453 		rv = pool_cache_cpus_info(pp, oldp, oldlenp);
1454 		break;
1455 	}
1456 
1457 done:
1458 	rw_exit_read(&pool_lock);
1459 
1460 	return (rv);
1461 }
1462 
1463 void
1464 pool_gc_sched(void *null)
1465 {
1466 	task_add(systqmp, &pool_gc_task);
1467 }
1468 
1469 void
1470 pool_gc_pages(void *null)
1471 {
1472 	struct pool *pp;
1473 	struct pool_page_header *ph, *freeph;
1474 	int s;
1475 
1476 	rw_enter_read(&pool_lock);
1477 	s = splvm(); /* XXX go to splvm until all pools _setipl properly */
1478 	SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist) {
1479 		if (pp->pr_nidle <= pp->pr_minpages || /* guess */
1480 		    !mtx_enter_try(&pp->pr_mtx)) /* try */
1481 			continue;
1482 
1483 		/* is it time to free a page? */
1484 		if (pp->pr_nidle > pp->pr_minpages &&
1485 		    (ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL &&
1486 		    (ticks - ph->ph_tick) > (hz * pool_wait_gc)) {
1487 			freeph = ph;
1488 			pool_p_remove(pp, freeph);
1489 		} else
1490 			freeph = NULL;
1491 
1492 		mtx_leave(&pp->pr_mtx);
1493 
1494 		if (freeph != NULL)
1495 			pool_p_free(pp, freeph);
1496 	}
1497 	splx(s);
1498 	rw_exit_read(&pool_lock);
1499 
1500 	timeout_add_sec(&pool_gc_tick, 1);
1501 }
1502 
1503 /*
1504  * Pool backend allocators.
1505  */
1506 
1507 void *
1508 pool_allocator_alloc(struct pool *pp, int flags, int *slowdown)
1509 {
1510 	void *v;
1511 
1512 	v = (*pp->pr_alloc->pa_alloc)(pp, flags, slowdown);
1513 
1514 #ifdef DIAGNOSTIC
1515 	if (v != NULL && POOL_INPGHDR(pp)) {
1516 		vaddr_t addr = (vaddr_t)v;
1517 		if ((addr & pp->pr_pgmask) != addr) {
1518 			panic("%s: %s page address %p isnt aligned to %u",
1519 			    __func__, pp->pr_wchan, v, pp->pr_pgsize);
1520 		}
1521 	}
1522 #endif
1523 
1524 	return (v);
1525 }
1526 
1527 void
1528 pool_allocator_free(struct pool *pp, void *v)
1529 {
1530 	struct pool_allocator *pa = pp->pr_alloc;
1531 
1532 	(*pa->pa_free)(pp, v);
1533 }
1534 
1535 void *
1536 pool_page_alloc(struct pool *pp, int flags, int *slowdown)
1537 {
1538 	struct kmem_dyn_mode kd = KMEM_DYN_INITIALIZER;
1539 
1540 	kd.kd_waitok = ISSET(flags, PR_WAITOK);
1541 	kd.kd_slowdown = slowdown;
1542 
1543 	return (km_alloc(pp->pr_pgsize, &kv_page, pp->pr_crange, &kd));
1544 }
1545 
1546 void
1547 pool_page_free(struct pool *pp, void *v)
1548 {
1549 	km_free(v, pp->pr_pgsize, &kv_page, pp->pr_crange);
1550 }
1551 
1552 void *
1553 pool_multi_alloc(struct pool *pp, int flags, int *slowdown)
1554 {
1555 	struct kmem_va_mode kv = kv_intrsafe;
1556 	struct kmem_dyn_mode kd = KMEM_DYN_INITIALIZER;
1557 	void *v;
1558 	int s;
1559 
1560 	if (POOL_INPGHDR(pp))
1561 		kv.kv_align = pp->pr_pgsize;
1562 
1563 	kd.kd_waitok = ISSET(flags, PR_WAITOK);
1564 	kd.kd_slowdown = slowdown;
1565 
1566 	s = splvm();
1567 	v = km_alloc(pp->pr_pgsize, &kv, pp->pr_crange, &kd);
1568 	splx(s);
1569 
1570 	return (v);
1571 }
1572 
1573 void
1574 pool_multi_free(struct pool *pp, void *v)
1575 {
1576 	struct kmem_va_mode kv = kv_intrsafe;
1577 	int s;
1578 
1579 	if (POOL_INPGHDR(pp))
1580 		kv.kv_align = pp->pr_pgsize;
1581 
1582 	s = splvm();
1583 	km_free(v, pp->pr_pgsize, &kv, pp->pr_crange);
1584 	splx(s);
1585 }
1586 
1587 void *
1588 pool_multi_alloc_ni(struct pool *pp, int flags, int *slowdown)
1589 {
1590 	struct kmem_va_mode kv = kv_any;
1591 	struct kmem_dyn_mode kd = KMEM_DYN_INITIALIZER;
1592 	void *v;
1593 
1594 	if (POOL_INPGHDR(pp))
1595 		kv.kv_align = pp->pr_pgsize;
1596 
1597 	kd.kd_waitok = ISSET(flags, PR_WAITOK);
1598 	kd.kd_slowdown = slowdown;
1599 
1600 	KERNEL_LOCK();
1601 	v = km_alloc(pp->pr_pgsize, &kv, pp->pr_crange, &kd);
1602 	KERNEL_UNLOCK();
1603 
1604 	return (v);
1605 }
1606 
1607 void
1608 pool_multi_free_ni(struct pool *pp, void *v)
1609 {
1610 	struct kmem_va_mode kv = kv_any;
1611 
1612 	if (POOL_INPGHDR(pp))
1613 		kv.kv_align = pp->pr_pgsize;
1614 
1615 	KERNEL_LOCK();
1616 	km_free(v, pp->pr_pgsize, &kv, pp->pr_crange);
1617 	KERNEL_UNLOCK();
1618 }
1619 
1620 #ifdef MULTIPROCESSOR
1621 
1622 struct pool pool_caches; /* per cpu cache entries */
1623 
1624 void
1625 pool_cache_init(struct pool *pp)
1626 {
1627 	struct cpumem *cm;
1628 	struct pool_cache *pc;
1629 	struct cpumem_iter i;
1630 
1631 	if (pool_caches.pr_size == 0) {
1632 		pool_init(&pool_caches, sizeof(struct pool_cache), 64,
1633 		    IPL_NONE, PR_WAITOK, "plcache", NULL);
1634 	}
1635 
1636 	/* must be able to use the pool items as cache list items */
1637 	KASSERT(pp->pr_size >= sizeof(struct pool_cache_item));
1638 
1639 	cm = cpumem_get(&pool_caches);
1640 
1641 	mtx_init(&pp->pr_cache_mtx, pp->pr_ipl);
1642 	arc4random_buf(pp->pr_cache_magic, sizeof(pp->pr_cache_magic));
1643 	TAILQ_INIT(&pp->pr_cache_lists);
1644 	pp->pr_cache_nlist = 0;
1645 	pp->pr_cache_items = 8;
1646 	pp->pr_cache_contention = 0;
1647 
1648 	CPUMEM_FOREACH(pc, &i, cm) {
1649 		pc->pc_actv = NULL;
1650 		pc->pc_nactv = 0;
1651 		pc->pc_prev = NULL;
1652 
1653 		pc->pc_nget = 0;
1654 		pc->pc_nfail = 0;
1655 		pc->pc_nput = 0;
1656 		pc->pc_nlget = 0;
1657 		pc->pc_nlfail = 0;
1658 		pc->pc_nlput = 0;
1659 		pc->pc_nout = 0;
1660 	}
1661 
1662 	pp->pr_cache = cm;
1663 }
1664 
1665 static inline void
1666 pool_cache_item_magic(struct pool *pp, struct pool_cache_item *ci)
1667 {
1668 	unsigned long *entry = (unsigned long *)&ci->ci_nextl;
1669 
1670 	entry[0] = pp->pr_cache_magic[0] ^ (u_long)ci;
1671 	entry[1] = pp->pr_cache_magic[1] ^ (u_long)ci->ci_next;
1672 }
1673 
1674 static inline void
1675 pool_cache_item_magic_check(struct pool *pp, struct pool_cache_item *ci)
1676 {
1677 	unsigned long *entry;
1678 	unsigned long val;
1679 
1680 	entry = (unsigned long *)&ci->ci_nextl;
1681 	val = pp->pr_cache_magic[0] ^ (u_long)ci;
1682 	if (*entry != val)
1683 		goto fail;
1684 
1685 	entry++;
1686 	val = pp->pr_cache_magic[1] ^ (u_long)ci->ci_next;
1687 	if (*entry != val)
1688 		goto fail;
1689 
1690 	return;
1691 
1692 fail:
1693 	panic("%s: %s cpu free list modified: item addr %p+%zu 0x%lx!=0x%lx",
1694 	    __func__, pp->pr_wchan, ci, (caddr_t)entry - (caddr_t)ci,
1695 	    *entry, val);
1696 }
1697 
1698 static inline void
1699 pool_list_enter(struct pool *pp)
1700 {
1701 	if (mtx_enter_try(&pp->pr_cache_mtx) == 0) {
1702 		mtx_enter(&pp->pr_cache_mtx);
1703 		pp->pr_cache_contention++;
1704 	}
1705 }
1706 
1707 static inline void
1708 pool_list_leave(struct pool *pp)
1709 {
1710 	mtx_leave(&pp->pr_cache_mtx);
1711 }
1712 
1713 static inline struct pool_cache_item *
1714 pool_cache_list_alloc(struct pool *pp, struct pool_cache *pc)
1715 {
1716 	struct pool_cache_item *pl;
1717 
1718 	pool_list_enter(pp);
1719 	pl = TAILQ_FIRST(&pp->pr_cache_lists);
1720 	if (pl != NULL) {
1721 		TAILQ_REMOVE(&pp->pr_cache_lists, pl, ci_nextl);
1722 		pp->pr_cache_nlist--;
1723 
1724 		pool_cache_item_magic(pp, pl);
1725 
1726 		pc->pc_nlget++;
1727 	} else
1728 		pc->pc_nlfail++;
1729 
1730 	/* fold this cpus nout into the global while we have the lock */
1731 	pp->pr_cache_nout += pc->pc_nout;
1732 	pc->pc_nout = 0;
1733 	pool_list_leave(pp);
1734 
1735 	return (pl);
1736 }
1737 
1738 static inline void
1739 pool_cache_list_free(struct pool *pp, struct pool_cache *pc,
1740     struct pool_cache_item *ci)
1741 {
1742 	pool_list_enter(pp);
1743 	TAILQ_INSERT_TAIL(&pp->pr_cache_lists, ci, ci_nextl);
1744 	pp->pr_cache_nlist++;
1745 
1746 	pc->pc_nlput++;
1747 
1748 	/* fold this cpus nout into the global while we have the lock */
1749 	pp->pr_cache_nout += pc->pc_nout;
1750 	pc->pc_nout = 0;
1751 	pool_list_leave(pp);
1752 }
1753 
1754 static inline struct pool_cache *
1755 pool_cache_enter(struct pool *pp, int *s)
1756 {
1757 	struct pool_cache *pc;
1758 
1759 	pc = cpumem_enter(pp->pr_cache);
1760 	*s = splraise(pp->pr_ipl);
1761 	pc->pc_gen++;
1762 
1763 	return (pc);
1764 }
1765 
1766 static inline void
1767 pool_cache_leave(struct pool *pp, struct pool_cache *pc, int s)
1768 {
1769 	pc->pc_gen++;
1770 	splx(s);
1771 	cpumem_leave(pp->pr_cache, pc);
1772 }
1773 
1774 void *
1775 pool_cache_get(struct pool *pp)
1776 {
1777 	struct pool_cache *pc;
1778 	struct pool_cache_item *ci;
1779 	int s;
1780 
1781 	pc = pool_cache_enter(pp, &s);
1782 
1783 	if (pc->pc_actv != NULL) {
1784 		ci = pc->pc_actv;
1785 	} else if (pc->pc_prev != NULL) {
1786 		ci = pc->pc_prev;
1787 		pc->pc_prev = NULL;
1788 	} else if ((ci = pool_cache_list_alloc(pp, pc)) == NULL) {
1789 		pc->pc_nfail++;
1790 		goto done;
1791 	}
1792 
1793 	pool_cache_item_magic_check(pp, ci);
1794 #ifdef DIAGNOSTIC
1795 	if (pool_debug && POOL_CACHE_ITEM_POISONED(ci)) {
1796 		size_t pidx;
1797 		uint32_t pval;
1798 
1799 		if (poison_check(ci + 1, pp->pr_size - sizeof(*ci),
1800 		    &pidx, &pval)) {
1801 			int *ip = (int *)(ci + 1);
1802 			ip += pidx;
1803 
1804 			panic("%s: %s cpu free list modified: "
1805 			    "item addr %p+%zu 0x%x!=0x%x",
1806 			    __func__, pp->pr_wchan, ci,
1807 			    (caddr_t)ip - (caddr_t)ci, *ip, pval);
1808 		}
1809 	}
1810 #endif
1811 
1812 	pc->pc_actv = ci->ci_next;
1813 	pc->pc_nactv = POOL_CACHE_ITEM_NITEMS(ci) - 1;
1814 	pc->pc_nget++;
1815 	pc->pc_nout++;
1816 
1817 done:
1818 	pool_cache_leave(pp, pc, s);
1819 
1820 	return (ci);
1821 }
1822 
1823 void
1824 pool_cache_put(struct pool *pp, void *v)
1825 {
1826 	struct pool_cache *pc;
1827 	struct pool_cache_item *ci = v;
1828 	unsigned long nitems;
1829 	int s;
1830 #ifdef DIAGNOSTIC
1831 	int poison = pool_debug && pp->pr_size > sizeof(*ci);
1832 
1833 	if (poison)
1834 		poison_mem(ci + 1, pp->pr_size - sizeof(*ci));
1835 #endif
1836 
1837 	pc = pool_cache_enter(pp, &s);
1838 
1839 	nitems = pc->pc_nactv;
1840 	if (nitems >= pp->pr_cache_items) {
1841 		if (pc->pc_prev != NULL)
1842 			pool_cache_list_free(pp, pc, pc->pc_prev);
1843 
1844 		pc->pc_prev = pc->pc_actv;
1845 
1846 		pc->pc_actv = NULL;
1847 		pc->pc_nactv = 0;
1848 		nitems = 0;
1849 	}
1850 
1851 	ci->ci_next = pc->pc_actv;
1852 	ci->ci_nitems = ++nitems;
1853 #ifdef DIAGNOSTIC
1854 	ci->ci_nitems |= poison ? POOL_CACHE_ITEM_NITEMS_POISON : 0;
1855 #endif
1856 	pool_cache_item_magic(pp, ci);
1857 
1858 	pc->pc_actv = ci;
1859 	pc->pc_nactv = nitems;
1860 
1861 	pc->pc_nput++;
1862 	pc->pc_nout--;
1863 
1864 	pool_cache_leave(pp, pc, s);
1865 }
1866 
1867 struct pool_cache_item *
1868 pool_cache_list_put(struct pool *pp, struct pool_cache_item *pl)
1869 {
1870 	struct pool_cache_item *rpl, *next;
1871 
1872 	if (pl == NULL)
1873 		return (NULL);
1874 
1875 	rpl = TAILQ_NEXT(pl, ci_nextl);
1876 
1877 	mtx_enter(&pp->pr_mtx);
1878 	do {
1879 		next = pl->ci_next;
1880 		pool_do_put(pp, pl);
1881 		pl = next;
1882 	} while (pl != NULL);
1883 	mtx_leave(&pp->pr_mtx);
1884 
1885 	return (rpl);
1886 }
1887 
1888 void
1889 pool_cache_destroy(struct pool *pp)
1890 {
1891 	struct pool_cache *pc;
1892 	struct pool_cache_item *pl;
1893 	struct cpumem_iter i;
1894 	struct cpumem *cm;
1895 
1896 	cm = pp->pr_cache;
1897 	pp->pr_cache = NULL; /* make pool_put avoid the cache */
1898 
1899 	CPUMEM_FOREACH(pc, &i, cm) {
1900 		pool_cache_list_put(pp, pc->pc_actv);
1901 		pool_cache_list_put(pp, pc->pc_prev);
1902 	}
1903 
1904 	cpumem_put(&pool_caches, cm);
1905 
1906 	pl = TAILQ_FIRST(&pp->pr_cache_lists);
1907 	while (pl != NULL)
1908 		pl = pool_cache_list_put(pp, pl);
1909 }
1910 
1911 void
1912 pool_cache_pool_info(struct pool *pp, struct kinfo_pool *pi)
1913 {
1914 	struct pool_cache *pc;
1915 	struct cpumem_iter i;
1916 
1917 	if (pp->pr_cache == NULL)
1918 		return;
1919 
1920 	/* loop through the caches twice to collect stats */
1921 
1922 	/* once without the mtx so we can yield while reading nget/nput */
1923 	CPUMEM_FOREACH(pc, &i, pp->pr_cache) {
1924 		uint64_t gen, nget, nput;
1925 
1926 		do {
1927 			while ((gen = pc->pc_gen) & 1)
1928 				yield();
1929 
1930 			nget = pc->pc_nget;
1931 			nput = pc->pc_nput;
1932 		} while (gen != pc->pc_gen);
1933 
1934 		pi->pr_nget += nget;
1935 		pi->pr_nput += nput;
1936 	}
1937 
1938 	/* and once with the mtx so we can get consistent nout values */
1939 	mtx_enter(&pp->pr_cache_mtx);
1940 	CPUMEM_FOREACH(pc, &i, pp->pr_cache)
1941 		pi->pr_nout += pc->pc_nout;
1942 
1943 	pi->pr_nout += pp->pr_cache_nout;
1944 	mtx_leave(&pp->pr_cache_mtx);
1945 }
1946 
1947 int
1948 pool_cache_info(struct pool *pp, void *oldp, size_t *oldlenp)
1949 {
1950 	struct kinfo_pool_cache kpc;
1951 
1952 	if (pp->pr_cache == NULL)
1953 		return (EOPNOTSUPP);
1954 
1955 	memset(&kpc, 0, sizeof(kpc)); /* don't leak padding */
1956 
1957 	mtx_enter(&pp->pr_cache_mtx);
1958 	kpc.pr_ngc = 0; /* notyet */
1959 	kpc.pr_len = pp->pr_cache_items;
1960 	kpc.pr_nlist = pp->pr_cache_nlist;
1961 	kpc.pr_contention = pp->pr_cache_contention;
1962 	mtx_leave(&pp->pr_cache_mtx);
1963 
1964 	return (sysctl_rdstruct(oldp, oldlenp, NULL, &kpc, sizeof(kpc)));
1965 }
1966 
1967 int
1968 pool_cache_cpus_info(struct pool *pp, void *oldp, size_t *oldlenp)
1969 {
1970 	struct pool_cache *pc;
1971 	struct kinfo_pool_cache_cpu *kpcc, *info;
1972 	unsigned int cpu = 0;
1973 	struct cpumem_iter i;
1974 	int error = 0;
1975 	size_t len;
1976 
1977 	if (pp->pr_cache == NULL)
1978 		return (EOPNOTSUPP);
1979 	if (*oldlenp % sizeof(*kpcc))
1980 		return (EINVAL);
1981 
1982 	kpcc = mallocarray(ncpusfound, sizeof(*kpcc), M_TEMP,
1983 	    M_WAITOK|M_CANFAIL|M_ZERO);
1984 	if (kpcc == NULL)
1985 		return (EIO);
1986 
1987 	len = ncpusfound * sizeof(*kpcc);
1988 
1989 	CPUMEM_FOREACH(pc, &i, pp->pr_cache) {
1990 		uint64_t gen;
1991 
1992 		if (cpu >= ncpusfound) {
1993 			error = EIO;
1994 			goto err;
1995 		}
1996 
1997 		info = &kpcc[cpu];
1998 		info->pr_cpu = cpu;
1999 
2000 		do {
2001 			while ((gen = pc->pc_gen) & 1)
2002 				yield();
2003 
2004 			info->pr_nget = pc->pc_nget;
2005 			info->pr_nfail = pc->pc_nfail;
2006 			info->pr_nput = pc->pc_nput;
2007 			info->pr_nlget = pc->pc_nlget;
2008 			info->pr_nlfail = pc->pc_nlfail;
2009 			info->pr_nlput = pc->pc_nlput;
2010 		} while (gen != pc->pc_gen);
2011 
2012 		cpu++;
2013 	}
2014 
2015 	error = sysctl_rdstruct(oldp, oldlenp, NULL, kpcc, len);
2016 err:
2017 	free(kpcc, M_TEMP, len);
2018 
2019 	return (error);
2020 }
2021 #else /* MULTIPROCESSOR */
2022 void
2023 pool_cache_init(struct pool *pp)
2024 {
2025 	/* nop */
2026 }
2027 
2028 void
2029 pool_cache_pool_info(struct pool *pp, struct kinfo_pool *pi)
2030 {
2031 	/* nop */
2032 }
2033 
2034 int
2035 pool_cache_info(struct pool *pp, void *oldp, size_t *oldlenp)
2036 {
2037 	return (EOPNOTSUPP);
2038 }
2039 
2040 int
2041 pool_cache_cpus_info(struct pool *pp, void *oldp, size_t *oldlenp)
2042 {
2043 	return (EOPNOTSUPP);
2044 }
2045 #endif /* MULTIPROCESSOR */
2046