xref: /openbsd-src/sys/kern/subr_pool.c (revision ee784f0af4636c106f29303731d9654eae653657)
1 /*	$OpenBSD: subr_pool.c,v 1.212 2017/06/15 03:50:50 dlg Exp $	*/
2 /*	$NetBSD: subr_pool.c,v 1.61 2001/09/26 07:14:56 chs Exp $	*/
3 
4 /*-
5  * Copyright (c) 1997, 1999, 2000 The NetBSD Foundation, Inc.
6  * All rights reserved.
7  *
8  * This code is derived from software contributed to The NetBSD Foundation
9  * by Paul Kranenburg; by Jason R. Thorpe of the Numerical Aerospace
10  * Simulation Facility, NASA Ames Research Center.
11  *
12  * Redistribution and use in source and binary forms, with or without
13  * modification, are permitted provided that the following conditions
14  * are met:
15  * 1. Redistributions of source code must retain the above copyright
16  *    notice, this list of conditions and the following disclaimer.
17  * 2. Redistributions in binary form must reproduce the above copyright
18  *    notice, this list of conditions and the following disclaimer in the
19  *    documentation and/or other materials provided with the distribution.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
23  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
24  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
25  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31  * POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/errno.h>
37 #include <sys/kernel.h>
38 #include <sys/malloc.h>
39 #include <sys/pool.h>
40 #include <sys/syslog.h>
41 #include <sys/rwlock.h>
42 #include <sys/sysctl.h>
43 #include <sys/task.h>
44 #include <sys/timeout.h>
45 #include <sys/percpu.h>
46 
47 #include <uvm/uvm_extern.h>
48 
49 /*
50  * Pool resource management utility.
51  *
52  * Memory is allocated in pages which are split into pieces according to
53  * the pool item size. Each page is kept on one of three lists in the
54  * pool structure: `pr_emptypages', `pr_fullpages' and `pr_partpages',
55  * for empty, full and partially-full pages respectively. The individual
56  * pool items are on a linked list headed by `ph_items' in each page
57  * header. The memory for building the page list is either taken from
58  * the allocated pages themselves (for small pool items) or taken from
59  * an internal pool of page headers (`phpool').
60  */
61 
62 /* List of all pools */
63 SIMPLEQ_HEAD(,pool) pool_head = SIMPLEQ_HEAD_INITIALIZER(pool_head);
64 
65 /*
66  * Every pool gets a unique serial number assigned to it. If this counter
67  * wraps, we're screwed, but we shouldn't create so many pools anyway.
68  */
69 unsigned int pool_serial;
70 unsigned int pool_count;
71 
72 /* Lock the previous variables making up the global pool state */
73 struct rwlock pool_lock = RWLOCK_INITIALIZER("pools");
74 
75 /* Private pool for page header structures */
76 struct pool phpool;
77 
78 struct pool_item {
79 	u_long				pi_magic;
80 	XSIMPLEQ_ENTRY(pool_item)	pi_list;
81 };
82 #define POOL_IMAGIC(ph, pi) ((u_long)(pi) ^ (ph)->ph_magic)
83 
84 struct pool_page_header {
85 	/* Page headers */
86 	TAILQ_ENTRY(pool_page_header)
87 				ph_entry;	/* pool page list */
88 	XSIMPLEQ_HEAD(, pool_item)
89 				ph_items;	/* free items on the page */
90 	RBT_ENTRY(pool_page_header)
91 				ph_node;	/* off-page page headers */
92 	unsigned int		ph_nmissing;	/* # of chunks in use */
93 	caddr_t			ph_page;	/* this page's address */
94 	caddr_t			ph_colored;	/* page's colored address */
95 	unsigned long		ph_magic;
96 	int			ph_tick;
97 };
98 #define POOL_MAGICBIT (1 << 3) /* keep away from perturbed low bits */
99 #define POOL_PHPOISON(ph) ISSET((ph)->ph_magic, POOL_MAGICBIT)
100 
101 #ifdef MULTIPROCESSOR
102 struct pool_cache_item {
103 	struct pool_cache_item	*ci_next;	/* next item in list */
104 	unsigned long		 ci_nitems;	/* number of items in list */
105 	TAILQ_ENTRY(pool_cache_item)
106 				 ci_nextl;	/* entry in list of lists */
107 };
108 
109 /* we store whether the cached item is poisoned in the high bit of nitems */
110 #define POOL_CACHE_ITEM_NITEMS_MASK	0x7ffffffUL
111 #define POOL_CACHE_ITEM_NITEMS_POISON	0x8000000UL
112 
113 #define POOL_CACHE_ITEM_NITEMS(_ci)					\
114     ((_ci)->ci_nitems & POOL_CACHE_ITEM_NITEMS_MASK)
115 
116 #define POOL_CACHE_ITEM_POISONED(_ci)					\
117     ISSET((_ci)->ci_nitems, POOL_CACHE_ITEM_NITEMS_POISON)
118 
119 struct pool_cache {
120 	struct pool_cache_item	*pc_actv;	/* active list of items */
121 	unsigned long		 pc_nactv;	/* actv head nitems cache */
122 	struct pool_cache_item	*pc_prev;	/* previous list of items */
123 
124 	uint64_t		 pc_gen;	/* generation number */
125 	uint64_t		 pc_nget;	/* # of successful requests */
126 	uint64_t		 pc_nfail;	/* # of unsuccessful reqs */
127 	uint64_t		 pc_nput;	/* # of releases */
128 	uint64_t		 pc_nlget;	/* # of list requests */
129 	uint64_t		 pc_nlfail;	/* # of fails getting a list */
130 	uint64_t		 pc_nlput;	/* # of list releases */
131 
132 	int			 pc_nout;
133 };
134 
135 void	*pool_cache_get(struct pool *);
136 void	 pool_cache_put(struct pool *, void *);
137 void	 pool_cache_destroy(struct pool *);
138 #endif
139 void	 pool_cache_pool_info(struct pool *, struct kinfo_pool *);
140 int	 pool_cache_info(struct pool *, void *, size_t *);
141 int	 pool_cache_cpus_info(struct pool *, void *, size_t *);
142 
143 #ifdef POOL_DEBUG
144 int	pool_debug = 1;
145 #else
146 int	pool_debug = 0;
147 #endif
148 
149 #define POOL_INPGHDR(pp) ((pp)->pr_phoffset != 0)
150 
151 struct pool_page_header *
152 	 pool_p_alloc(struct pool *, int, int *);
153 void	 pool_p_insert(struct pool *, struct pool_page_header *);
154 void	 pool_p_remove(struct pool *, struct pool_page_header *);
155 void	 pool_p_free(struct pool *, struct pool_page_header *);
156 
157 void	 pool_update_curpage(struct pool *);
158 void	*pool_do_get(struct pool *, int, int *);
159 int	 pool_chk_page(struct pool *, struct pool_page_header *, int);
160 int	 pool_chk(struct pool *);
161 void	 pool_get_done(void *, void *);
162 void	 pool_runqueue(struct pool *, int);
163 
164 void	*pool_allocator_alloc(struct pool *, int, int *);
165 void	 pool_allocator_free(struct pool *, void *);
166 
167 /*
168  * The default pool allocator.
169  */
170 void	*pool_page_alloc(struct pool *, int, int *);
171 void	pool_page_free(struct pool *, void *);
172 
173 /*
174  * safe for interrupts; this is the default allocator
175  */
176 struct pool_allocator pool_allocator_single = {
177 	pool_page_alloc,
178 	pool_page_free,
179 	POOL_ALLOC_SIZE(PAGE_SIZE, POOL_ALLOC_ALIGNED)
180 };
181 
182 void	*pool_multi_alloc(struct pool *, int, int *);
183 void	pool_multi_free(struct pool *, void *);
184 
185 struct pool_allocator pool_allocator_multi = {
186 	pool_multi_alloc,
187 	pool_multi_free,
188 	POOL_ALLOC_SIZES(PAGE_SIZE, (1UL << 31), POOL_ALLOC_ALIGNED)
189 };
190 
191 void	*pool_multi_alloc_ni(struct pool *, int, int *);
192 void	pool_multi_free_ni(struct pool *, void *);
193 
194 struct pool_allocator pool_allocator_multi_ni = {
195 	pool_multi_alloc_ni,
196 	pool_multi_free_ni,
197 	POOL_ALLOC_SIZES(PAGE_SIZE, (1UL << 31), POOL_ALLOC_ALIGNED)
198 };
199 
200 #ifdef DDB
201 void	 pool_print_pagelist(struct pool_pagelist *, int (*)(const char *, ...)
202 	     __attribute__((__format__(__kprintf__,1,2))));
203 void	 pool_print1(struct pool *, const char *, int (*)(const char *, ...)
204 	     __attribute__((__format__(__kprintf__,1,2))));
205 #endif
206 
207 /* stale page garbage collectors */
208 void	pool_gc_sched(void *);
209 struct timeout pool_gc_tick = TIMEOUT_INITIALIZER(pool_gc_sched, NULL);
210 void	pool_gc_pages(void *);
211 struct task pool_gc_task = TASK_INITIALIZER(pool_gc_pages, NULL);
212 int pool_wait_free = 1;
213 int pool_wait_gc = 8;
214 
215 RBT_PROTOTYPE(phtree, pool_page_header, ph_node, phtree_compare);
216 
217 static inline int
218 phtree_compare(const struct pool_page_header *a,
219     const struct pool_page_header *b)
220 {
221 	vaddr_t va = (vaddr_t)a->ph_page;
222 	vaddr_t vb = (vaddr_t)b->ph_page;
223 
224 	/* the compares in this order are important for the NFIND to work */
225 	if (vb < va)
226 		return (-1);
227 	if (vb > va)
228 		return (1);
229 
230 	return (0);
231 }
232 
233 RBT_GENERATE(phtree, pool_page_header, ph_node, phtree_compare);
234 
235 /*
236  * Return the pool page header based on page address.
237  */
238 static inline struct pool_page_header *
239 pr_find_pagehead(struct pool *pp, void *v)
240 {
241 	struct pool_page_header *ph, key;
242 
243 	if (POOL_INPGHDR(pp)) {
244 		caddr_t page;
245 
246 		page = (caddr_t)((vaddr_t)v & pp->pr_pgmask);
247 
248 		return ((struct pool_page_header *)(page + pp->pr_phoffset));
249 	}
250 
251 	key.ph_page = v;
252 	ph = RBT_NFIND(phtree, &pp->pr_phtree, &key);
253 	if (ph == NULL)
254 		panic("%s: %s: page header missing", __func__, pp->pr_wchan);
255 
256 	KASSERT(ph->ph_page <= (caddr_t)v);
257 	if (ph->ph_page + pp->pr_pgsize <= (caddr_t)v)
258 		panic("%s: %s: incorrect page", __func__, pp->pr_wchan);
259 
260 	return (ph);
261 }
262 
263 /*
264  * Initialize the given pool resource structure.
265  *
266  * We export this routine to allow other kernel parts to declare
267  * static pools that must be initialized before malloc() is available.
268  */
269 void
270 pool_init(struct pool *pp, size_t size, u_int align, int ipl, int flags,
271     const char *wchan, struct pool_allocator *palloc)
272 {
273 	int off = 0, space;
274 	unsigned int pgsize = PAGE_SIZE, items;
275 	size_t pa_pagesz;
276 #ifdef DIAGNOSTIC
277 	struct pool *iter;
278 #endif
279 
280 	if (align == 0)
281 		align = ALIGN(1);
282 
283 	if (size < sizeof(struct pool_item))
284 		size = sizeof(struct pool_item);
285 
286 	size = roundup(size, align);
287 
288 	while (size * 8 > pgsize)
289 		pgsize <<= 1;
290 
291 	if (palloc == NULL) {
292 		if (pgsize > PAGE_SIZE) {
293 			palloc = ISSET(flags, PR_WAITOK) ?
294 			    &pool_allocator_multi_ni : &pool_allocator_multi;
295 		} else
296 			palloc = &pool_allocator_single;
297 
298 		pa_pagesz = palloc->pa_pagesz;
299 	} else {
300 		size_t pgsizes;
301 
302 		pa_pagesz = palloc->pa_pagesz;
303 		if (pa_pagesz == 0)
304 			pa_pagesz = POOL_ALLOC_DEFAULT;
305 
306 		pgsizes = pa_pagesz & ~POOL_ALLOC_ALIGNED;
307 
308 		/* make sure the allocator can fit at least one item */
309 		if (size > pgsizes) {
310 			panic("%s: pool %s item size 0x%zx > "
311 			    "allocator %p sizes 0x%zx", __func__, wchan,
312 			    size, palloc, pgsizes);
313 		}
314 
315 		/* shrink pgsize until it fits into the range */
316 		while (!ISSET(pgsizes, pgsize))
317 			pgsize >>= 1;
318 	}
319 	KASSERT(ISSET(pa_pagesz, pgsize));
320 
321 	items = pgsize / size;
322 
323 	/*
324 	 * Decide whether to put the page header off page to avoid
325 	 * wasting too large a part of the page. Off-page page headers
326 	 * go into an RB tree, so we can match a returned item with
327 	 * its header based on the page address.
328 	 */
329 	if (ISSET(pa_pagesz, POOL_ALLOC_ALIGNED)) {
330 		if (pgsize - (size * items) >
331 		    sizeof(struct pool_page_header)) {
332 			off = pgsize - sizeof(struct pool_page_header);
333 		} else if (sizeof(struct pool_page_header) * 2 >= size) {
334 			off = pgsize - sizeof(struct pool_page_header);
335 			items = off / size;
336 		}
337 	}
338 
339 	KASSERT(items > 0);
340 
341 	/*
342 	 * Initialize the pool structure.
343 	 */
344 	memset(pp, 0, sizeof(*pp));
345 	TAILQ_INIT(&pp->pr_emptypages);
346 	TAILQ_INIT(&pp->pr_fullpages);
347 	TAILQ_INIT(&pp->pr_partpages);
348 	pp->pr_curpage = NULL;
349 	pp->pr_npages = 0;
350 	pp->pr_minitems = 0;
351 	pp->pr_minpages = 0;
352 	pp->pr_maxpages = 8;
353 	pp->pr_size = size;
354 	pp->pr_pgsize = pgsize;
355 	pp->pr_pgmask = ~0UL ^ (pgsize - 1);
356 	pp->pr_phoffset = off;
357 	pp->pr_itemsperpage = items;
358 	pp->pr_wchan = wchan;
359 	pp->pr_alloc = palloc;
360 	pp->pr_nitems = 0;
361 	pp->pr_nout = 0;
362 	pp->pr_hardlimit = UINT_MAX;
363 	pp->pr_hardlimit_warning = NULL;
364 	pp->pr_hardlimit_ratecap.tv_sec = 0;
365 	pp->pr_hardlimit_ratecap.tv_usec = 0;
366 	pp->pr_hardlimit_warning_last.tv_sec = 0;
367 	pp->pr_hardlimit_warning_last.tv_usec = 0;
368 	RBT_INIT(phtree, &pp->pr_phtree);
369 
370 	/*
371 	 * Use the space between the chunks and the page header
372 	 * for cache coloring.
373 	 */
374 	space = POOL_INPGHDR(pp) ? pp->pr_phoffset : pp->pr_pgsize;
375 	space -= pp->pr_itemsperpage * pp->pr_size;
376 	pp->pr_align = align;
377 	pp->pr_maxcolors = (space / align) + 1;
378 
379 	pp->pr_nget = 0;
380 	pp->pr_nfail = 0;
381 	pp->pr_nput = 0;
382 	pp->pr_npagealloc = 0;
383 	pp->pr_npagefree = 0;
384 	pp->pr_hiwat = 0;
385 	pp->pr_nidle = 0;
386 
387 	pp->pr_ipl = ipl;
388 	mtx_init_flags(&pp->pr_mtx, pp->pr_ipl, wchan, 0);
389 	mtx_init_flags(&pp->pr_requests_mtx, pp->pr_ipl, wchan, 0);
390 	TAILQ_INIT(&pp->pr_requests);
391 
392 	if (phpool.pr_size == 0) {
393 		pool_init(&phpool, sizeof(struct pool_page_header), 0,
394 		    IPL_HIGH, 0, "phpool", NULL);
395 
396 		/* make sure phpool wont "recurse" */
397 		KASSERT(POOL_INPGHDR(&phpool));
398 	}
399 
400 	/* pglistalloc/constraint parameters */
401 	pp->pr_crange = &kp_dirty;
402 
403 	/* Insert this into the list of all pools. */
404 	rw_enter_write(&pool_lock);
405 #ifdef DIAGNOSTIC
406 	SIMPLEQ_FOREACH(iter, &pool_head, pr_poollist) {
407 		if (iter == pp)
408 			panic("%s: pool %s already on list", __func__, wchan);
409 	}
410 #endif
411 
412 	pp->pr_serial = ++pool_serial;
413 	if (pool_serial == 0)
414 		panic("%s: too much uptime", __func__);
415 
416 	SIMPLEQ_INSERT_HEAD(&pool_head, pp, pr_poollist);
417 	pool_count++;
418 	rw_exit_write(&pool_lock);
419 }
420 
421 /*
422  * Decommission a pool resource.
423  */
424 void
425 pool_destroy(struct pool *pp)
426 {
427 	struct pool_page_header *ph;
428 	struct pool *prev, *iter;
429 
430 #ifdef MULTIPROCESSOR
431 	if (pp->pr_cache != NULL)
432 		pool_cache_destroy(pp);
433 #endif
434 
435 #ifdef DIAGNOSTIC
436 	if (pp->pr_nout != 0)
437 		panic("%s: pool busy: still out: %u", __func__, pp->pr_nout);
438 #endif
439 
440 	/* Remove from global pool list */
441 	rw_enter_write(&pool_lock);
442 	pool_count--;
443 	if (pp == SIMPLEQ_FIRST(&pool_head))
444 		SIMPLEQ_REMOVE_HEAD(&pool_head, pr_poollist);
445 	else {
446 		prev = SIMPLEQ_FIRST(&pool_head);
447 		SIMPLEQ_FOREACH(iter, &pool_head, pr_poollist) {
448 			if (iter == pp) {
449 				SIMPLEQ_REMOVE_AFTER(&pool_head, prev,
450 				    pr_poollist);
451 				break;
452 			}
453 			prev = iter;
454 		}
455 	}
456 	rw_exit_write(&pool_lock);
457 
458 	/* Remove all pages */
459 	while ((ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL) {
460 		mtx_enter(&pp->pr_mtx);
461 		pool_p_remove(pp, ph);
462 		mtx_leave(&pp->pr_mtx);
463 		pool_p_free(pp, ph);
464 	}
465 	KASSERT(TAILQ_EMPTY(&pp->pr_fullpages));
466 	KASSERT(TAILQ_EMPTY(&pp->pr_partpages));
467 }
468 
469 void
470 pool_request_init(struct pool_request *pr,
471     void (*handler)(void *, void *), void *cookie)
472 {
473 	pr->pr_handler = handler;
474 	pr->pr_cookie = cookie;
475 	pr->pr_item = NULL;
476 }
477 
478 void
479 pool_request(struct pool *pp, struct pool_request *pr)
480 {
481 	mtx_enter(&pp->pr_requests_mtx);
482 	TAILQ_INSERT_TAIL(&pp->pr_requests, pr, pr_entry);
483 	pool_runqueue(pp, PR_NOWAIT);
484 	mtx_leave(&pp->pr_requests_mtx);
485 }
486 
487 struct pool_get_memory {
488 	struct mutex mtx;
489 	void * volatile v;
490 };
491 
492 /*
493  * Grab an item from the pool.
494  */
495 void *
496 pool_get(struct pool *pp, int flags)
497 {
498 	void *v = NULL;
499 	int slowdown = 0;
500 
501 #ifdef MULTIPROCESSOR
502 	if (pp->pr_cache != NULL) {
503 		v = pool_cache_get(pp);
504 		if (v != NULL)
505 			goto good;
506 	}
507 #endif
508 
509 	KASSERT(flags & (PR_WAITOK | PR_NOWAIT));
510 
511 	mtx_enter(&pp->pr_mtx);
512 	if (pp->pr_nout >= pp->pr_hardlimit) {
513 		if (ISSET(flags, PR_NOWAIT|PR_LIMITFAIL))
514 			goto fail;
515 	} else if ((v = pool_do_get(pp, flags, &slowdown)) == NULL) {
516 		if (ISSET(flags, PR_NOWAIT))
517 			goto fail;
518 	}
519 	mtx_leave(&pp->pr_mtx);
520 
521 	if ((slowdown || pool_debug == 2) && ISSET(flags, PR_WAITOK))
522 		yield();
523 
524 	if (v == NULL) {
525 		struct pool_get_memory mem = {
526 		    MUTEX_INITIALIZER(pp->pr_ipl),
527 		    NULL };
528 		struct pool_request pr;
529 
530 		pool_request_init(&pr, pool_get_done, &mem);
531 		pool_request(pp, &pr);
532 
533 		mtx_enter(&mem.mtx);
534 		while (mem.v == NULL)
535 			msleep(&mem, &mem.mtx, PSWP, pp->pr_wchan, 0);
536 		mtx_leave(&mem.mtx);
537 
538 		v = mem.v;
539 	}
540 
541 #ifdef MULTIPROCESSOR
542 good:
543 #endif
544 	if (ISSET(flags, PR_ZERO))
545 		memset(v, 0, pp->pr_size);
546 
547 	return (v);
548 
549 fail:
550 	pp->pr_nfail++;
551 	mtx_leave(&pp->pr_mtx);
552 	return (NULL);
553 }
554 
555 void
556 pool_get_done(void *xmem, void *v)
557 {
558 	struct pool_get_memory *mem = xmem;
559 
560 	mtx_enter(&mem->mtx);
561 	mem->v = v;
562 	mtx_leave(&mem->mtx);
563 
564 	wakeup_one(mem);
565 }
566 
567 void
568 pool_runqueue(struct pool *pp, int flags)
569 {
570 	struct pool_requests prl = TAILQ_HEAD_INITIALIZER(prl);
571 	struct pool_request *pr;
572 
573 	MUTEX_ASSERT_UNLOCKED(&pp->pr_mtx);
574 	MUTEX_ASSERT_LOCKED(&pp->pr_requests_mtx);
575 
576 	if (pp->pr_requesting++)
577 		return;
578 
579 	do {
580 		pp->pr_requesting = 1;
581 
582 		/* no TAILQ_JOIN? :( */
583 		while ((pr = TAILQ_FIRST(&pp->pr_requests)) != NULL) {
584 			TAILQ_REMOVE(&pp->pr_requests, pr, pr_entry);
585 			TAILQ_INSERT_TAIL(&prl, pr, pr_entry);
586 		}
587 		if (TAILQ_EMPTY(&prl))
588 			continue;
589 
590 		mtx_leave(&pp->pr_requests_mtx);
591 
592 		mtx_enter(&pp->pr_mtx);
593 		pr = TAILQ_FIRST(&prl);
594 		while (pr != NULL) {
595 			int slowdown = 0;
596 
597 			if (pp->pr_nout >= pp->pr_hardlimit)
598 				break;
599 
600 			pr->pr_item = pool_do_get(pp, flags, &slowdown);
601 			if (pr->pr_item == NULL) /* || slowdown ? */
602 				break;
603 
604 			pr = TAILQ_NEXT(pr, pr_entry);
605 		}
606 		mtx_leave(&pp->pr_mtx);
607 
608 		while ((pr = TAILQ_FIRST(&prl)) != NULL &&
609 		    pr->pr_item != NULL) {
610 			TAILQ_REMOVE(&prl, pr, pr_entry);
611 			(*pr->pr_handler)(pr->pr_cookie, pr->pr_item);
612 		}
613 
614 		mtx_enter(&pp->pr_requests_mtx);
615 	} while (--pp->pr_requesting);
616 
617 	/* no TAILQ_JOIN :( */
618 	while ((pr = TAILQ_FIRST(&prl)) != NULL) {
619 		TAILQ_REMOVE(&prl, pr, pr_entry);
620 		TAILQ_INSERT_TAIL(&pp->pr_requests, pr, pr_entry);
621 	}
622 }
623 
624 void *
625 pool_do_get(struct pool *pp, int flags, int *slowdown)
626 {
627 	struct pool_item *pi;
628 	struct pool_page_header *ph;
629 
630 	MUTEX_ASSERT_LOCKED(&pp->pr_mtx);
631 
632 	splassert(pp->pr_ipl);
633 
634 	/*
635 	 * Account for this item now to avoid races if we need to give up
636 	 * pr_mtx to allocate a page.
637 	 */
638 	pp->pr_nout++;
639 
640 	if (pp->pr_curpage == NULL) {
641 		mtx_leave(&pp->pr_mtx);
642 		ph = pool_p_alloc(pp, flags, slowdown);
643 		mtx_enter(&pp->pr_mtx);
644 
645 		if (ph == NULL) {
646 			pp->pr_nout--;
647 			return (NULL);
648 		}
649 
650 		pool_p_insert(pp, ph);
651 	}
652 
653 	ph = pp->pr_curpage;
654 	pi = XSIMPLEQ_FIRST(&ph->ph_items);
655 	if (__predict_false(pi == NULL))
656 		panic("%s: %s: page empty", __func__, pp->pr_wchan);
657 
658 	if (__predict_false(pi->pi_magic != POOL_IMAGIC(ph, pi))) {
659 		panic("%s: %s free list modified: "
660 		    "page %p; item addr %p; offset 0x%x=0x%lx != 0x%lx",
661 		    __func__, pp->pr_wchan, ph->ph_page, pi,
662 		    0, pi->pi_magic, POOL_IMAGIC(ph, pi));
663 	}
664 
665 	XSIMPLEQ_REMOVE_HEAD(&ph->ph_items, pi_list);
666 
667 #ifdef DIAGNOSTIC
668 	if (pool_debug && POOL_PHPOISON(ph)) {
669 		size_t pidx;
670 		uint32_t pval;
671 		if (poison_check(pi + 1, pp->pr_size - sizeof(*pi),
672 		    &pidx, &pval)) {
673 			int *ip = (int *)(pi + 1);
674 			panic("%s: %s free list modified: "
675 			    "page %p; item addr %p; offset 0x%zx=0x%x",
676 			    __func__, pp->pr_wchan, ph->ph_page, pi,
677 			    pidx * sizeof(int), ip[pidx]);
678 		}
679 	}
680 #endif /* DIAGNOSTIC */
681 
682 	if (ph->ph_nmissing++ == 0) {
683 		/*
684 		 * This page was previously empty.  Move it to the list of
685 		 * partially-full pages.  This page is already curpage.
686 		 */
687 		TAILQ_REMOVE(&pp->pr_emptypages, ph, ph_entry);
688 		TAILQ_INSERT_TAIL(&pp->pr_partpages, ph, ph_entry);
689 
690 		pp->pr_nidle--;
691 	}
692 
693 	if (ph->ph_nmissing == pp->pr_itemsperpage) {
694 		/*
695 		 * This page is now full.  Move it to the full list
696 		 * and select a new current page.
697 		 */
698 		TAILQ_REMOVE(&pp->pr_partpages, ph, ph_entry);
699 		TAILQ_INSERT_TAIL(&pp->pr_fullpages, ph, ph_entry);
700 		pool_update_curpage(pp);
701 	}
702 
703 	pp->pr_nget++;
704 
705 	return (pi);
706 }
707 
708 /*
709  * Return resource to the pool.
710  */
711 void
712 pool_put(struct pool *pp, void *v)
713 {
714 	struct pool_item *pi = v;
715 	struct pool_page_header *ph, *freeph = NULL;
716 
717 #ifdef DIAGNOSTIC
718 	if (v == NULL)
719 		panic("%s: NULL item", __func__);
720 #endif
721 
722 #ifdef MULTIPROCESSOR
723 	if (pp->pr_cache != NULL && TAILQ_EMPTY(&pp->pr_requests)) {
724 		pool_cache_put(pp, v);
725 		return;
726 	}
727 #endif
728 
729 	mtx_enter(&pp->pr_mtx);
730 
731 	splassert(pp->pr_ipl);
732 
733 	ph = pr_find_pagehead(pp, v);
734 
735 #ifdef DIAGNOSTIC
736 	if (pool_debug) {
737 		struct pool_item *qi;
738 		XSIMPLEQ_FOREACH(qi, &ph->ph_items, pi_list) {
739 			if (pi == qi) {
740 				panic("%s: %s: double pool_put: %p", __func__,
741 				    pp->pr_wchan, pi);
742 			}
743 		}
744 	}
745 #endif /* DIAGNOSTIC */
746 
747 	pi->pi_magic = POOL_IMAGIC(ph, pi);
748 	XSIMPLEQ_INSERT_HEAD(&ph->ph_items, pi, pi_list);
749 #ifdef DIAGNOSTIC
750 	if (POOL_PHPOISON(ph))
751 		poison_mem(pi + 1, pp->pr_size - sizeof(*pi));
752 #endif /* DIAGNOSTIC */
753 
754 	if (ph->ph_nmissing-- == pp->pr_itemsperpage) {
755 		/*
756 		 * The page was previously completely full, move it to the
757 		 * partially-full list.
758 		 */
759 		TAILQ_REMOVE(&pp->pr_fullpages, ph, ph_entry);
760 		TAILQ_INSERT_TAIL(&pp->pr_partpages, ph, ph_entry);
761 	}
762 
763 	if (ph->ph_nmissing == 0) {
764 		/*
765 		 * The page is now empty, so move it to the empty page list.
766 		 */
767 		pp->pr_nidle++;
768 
769 		ph->ph_tick = ticks;
770 		TAILQ_REMOVE(&pp->pr_partpages, ph, ph_entry);
771 		TAILQ_INSERT_TAIL(&pp->pr_emptypages, ph, ph_entry);
772 		pool_update_curpage(pp);
773 	}
774 
775 	pp->pr_nout--;
776 	pp->pr_nput++;
777 
778 	/* is it time to free a page? */
779 	if (pp->pr_nidle > pp->pr_maxpages &&
780 	    (ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL &&
781 	    (ticks - ph->ph_tick) > (hz * pool_wait_free)) {
782 		freeph = ph;
783 		pool_p_remove(pp, freeph);
784 	}
785 	mtx_leave(&pp->pr_mtx);
786 
787 	if (freeph != NULL)
788 		pool_p_free(pp, freeph);
789 
790 	if (!TAILQ_EMPTY(&pp->pr_requests)) {
791 		mtx_enter(&pp->pr_requests_mtx);
792 		pool_runqueue(pp, PR_NOWAIT);
793 		mtx_leave(&pp->pr_requests_mtx);
794 	}
795 }
796 
797 /*
798  * Add N items to the pool.
799  */
800 int
801 pool_prime(struct pool *pp, int n)
802 {
803 	struct pool_pagelist pl = TAILQ_HEAD_INITIALIZER(pl);
804 	struct pool_page_header *ph;
805 	int newpages;
806 
807 	newpages = roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage;
808 
809 	while (newpages-- > 0) {
810 		int slowdown = 0;
811 
812 		ph = pool_p_alloc(pp, PR_NOWAIT, &slowdown);
813 		if (ph == NULL) /* or slowdown? */
814 			break;
815 
816 		TAILQ_INSERT_TAIL(&pl, ph, ph_entry);
817 	}
818 
819 	mtx_enter(&pp->pr_mtx);
820 	while ((ph = TAILQ_FIRST(&pl)) != NULL) {
821 		TAILQ_REMOVE(&pl, ph, ph_entry);
822 		pool_p_insert(pp, ph);
823 	}
824 	mtx_leave(&pp->pr_mtx);
825 
826 	return (0);
827 }
828 
829 struct pool_page_header *
830 pool_p_alloc(struct pool *pp, int flags, int *slowdown)
831 {
832 	struct pool_page_header *ph;
833 	struct pool_item *pi;
834 	caddr_t addr;
835 	int n;
836 
837 	MUTEX_ASSERT_UNLOCKED(&pp->pr_mtx);
838 	KASSERT(pp->pr_size >= sizeof(*pi));
839 
840 	addr = pool_allocator_alloc(pp, flags, slowdown);
841 	if (addr == NULL)
842 		return (NULL);
843 
844 	if (POOL_INPGHDR(pp))
845 		ph = (struct pool_page_header *)(addr + pp->pr_phoffset);
846 	else {
847 		ph = pool_get(&phpool, flags);
848 		if (ph == NULL) {
849 			pool_allocator_free(pp, addr);
850 			return (NULL);
851 		}
852 	}
853 
854 	XSIMPLEQ_INIT(&ph->ph_items);
855 	ph->ph_page = addr;
856 	addr += pp->pr_align * (pp->pr_npagealloc % pp->pr_maxcolors);
857 	ph->ph_colored = addr;
858 	ph->ph_nmissing = 0;
859 	arc4random_buf(&ph->ph_magic, sizeof(ph->ph_magic));
860 #ifdef DIAGNOSTIC
861 	/* use a bit in ph_magic to record if we poison page items */
862 	if (pool_debug)
863 		SET(ph->ph_magic, POOL_MAGICBIT);
864 	else
865 		CLR(ph->ph_magic, POOL_MAGICBIT);
866 #endif /* DIAGNOSTIC */
867 
868 	n = pp->pr_itemsperpage;
869 	while (n--) {
870 		pi = (struct pool_item *)addr;
871 		pi->pi_magic = POOL_IMAGIC(ph, pi);
872 		XSIMPLEQ_INSERT_TAIL(&ph->ph_items, pi, pi_list);
873 
874 #ifdef DIAGNOSTIC
875 		if (POOL_PHPOISON(ph))
876 			poison_mem(pi + 1, pp->pr_size - sizeof(*pi));
877 #endif /* DIAGNOSTIC */
878 
879 		addr += pp->pr_size;
880 	}
881 
882 	return (ph);
883 }
884 
885 void
886 pool_p_free(struct pool *pp, struct pool_page_header *ph)
887 {
888 	struct pool_item *pi;
889 
890 	MUTEX_ASSERT_UNLOCKED(&pp->pr_mtx);
891 	KASSERT(ph->ph_nmissing == 0);
892 
893 	XSIMPLEQ_FOREACH(pi, &ph->ph_items, pi_list) {
894 		if (__predict_false(pi->pi_magic != POOL_IMAGIC(ph, pi))) {
895 			panic("%s: %s free list modified: "
896 			    "page %p; item addr %p; offset 0x%x=0x%lx",
897 			    __func__, pp->pr_wchan, ph->ph_page, pi,
898 			    0, pi->pi_magic);
899 		}
900 
901 #ifdef DIAGNOSTIC
902 		if (POOL_PHPOISON(ph)) {
903 			size_t pidx;
904 			uint32_t pval;
905 			if (poison_check(pi + 1, pp->pr_size - sizeof(*pi),
906 			    &pidx, &pval)) {
907 				int *ip = (int *)(pi + 1);
908 				panic("%s: %s free list modified: "
909 				    "page %p; item addr %p; offset 0x%zx=0x%x",
910 				    __func__, pp->pr_wchan, ph->ph_page, pi,
911 				    pidx * sizeof(int), ip[pidx]);
912 			}
913 		}
914 #endif
915 	}
916 
917 	pool_allocator_free(pp, ph->ph_page);
918 
919 	if (!POOL_INPGHDR(pp))
920 		pool_put(&phpool, ph);
921 }
922 
923 void
924 pool_p_insert(struct pool *pp, struct pool_page_header *ph)
925 {
926 	MUTEX_ASSERT_LOCKED(&pp->pr_mtx);
927 
928 	/* If the pool was depleted, point at the new page */
929 	if (pp->pr_curpage == NULL)
930 		pp->pr_curpage = ph;
931 
932 	TAILQ_INSERT_TAIL(&pp->pr_emptypages, ph, ph_entry);
933 	if (!POOL_INPGHDR(pp))
934 		RBT_INSERT(phtree, &pp->pr_phtree, ph);
935 
936 	pp->pr_nitems += pp->pr_itemsperpage;
937 	pp->pr_nidle++;
938 
939 	pp->pr_npagealloc++;
940 	if (++pp->pr_npages > pp->pr_hiwat)
941 		pp->pr_hiwat = pp->pr_npages;
942 }
943 
944 void
945 pool_p_remove(struct pool *pp, struct pool_page_header *ph)
946 {
947 	MUTEX_ASSERT_LOCKED(&pp->pr_mtx);
948 
949 	pp->pr_npagefree++;
950 	pp->pr_npages--;
951 	pp->pr_nidle--;
952 	pp->pr_nitems -= pp->pr_itemsperpage;
953 
954 	if (!POOL_INPGHDR(pp))
955 		RBT_REMOVE(phtree, &pp->pr_phtree, ph);
956 	TAILQ_REMOVE(&pp->pr_emptypages, ph, ph_entry);
957 
958 	pool_update_curpage(pp);
959 }
960 
961 void
962 pool_update_curpage(struct pool *pp)
963 {
964 	pp->pr_curpage = TAILQ_LAST(&pp->pr_partpages, pool_pagelist);
965 	if (pp->pr_curpage == NULL) {
966 		pp->pr_curpage = TAILQ_LAST(&pp->pr_emptypages, pool_pagelist);
967 	}
968 }
969 
970 void
971 pool_setlowat(struct pool *pp, int n)
972 {
973 	int prime = 0;
974 
975 	mtx_enter(&pp->pr_mtx);
976 	pp->pr_minitems = n;
977 	pp->pr_minpages = (n == 0)
978 		? 0
979 		: roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage;
980 
981 	if (pp->pr_nitems < n)
982 		prime = n - pp->pr_nitems;
983 	mtx_leave(&pp->pr_mtx);
984 
985 	if (prime > 0)
986 		pool_prime(pp, prime);
987 }
988 
989 void
990 pool_sethiwat(struct pool *pp, int n)
991 {
992 	pp->pr_maxpages = (n == 0)
993 		? 0
994 		: roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage;
995 }
996 
997 int
998 pool_sethardlimit(struct pool *pp, u_int n, const char *warnmsg, int ratecap)
999 {
1000 	int error = 0;
1001 
1002 	if (n < pp->pr_nout) {
1003 		error = EINVAL;
1004 		goto done;
1005 	}
1006 
1007 	pp->pr_hardlimit = n;
1008 	pp->pr_hardlimit_warning = warnmsg;
1009 	pp->pr_hardlimit_ratecap.tv_sec = ratecap;
1010 	pp->pr_hardlimit_warning_last.tv_sec = 0;
1011 	pp->pr_hardlimit_warning_last.tv_usec = 0;
1012 
1013 done:
1014 	return (error);
1015 }
1016 
1017 void
1018 pool_set_constraints(struct pool *pp, const struct kmem_pa_mode *mode)
1019 {
1020 	pp->pr_crange = mode;
1021 }
1022 
1023 /*
1024  * Release all complete pages that have not been used recently.
1025  *
1026  * Returns non-zero if any pages have been reclaimed.
1027  */
1028 int
1029 pool_reclaim(struct pool *pp)
1030 {
1031 	struct pool_page_header *ph, *phnext;
1032 	struct pool_pagelist pl = TAILQ_HEAD_INITIALIZER(pl);
1033 
1034 	mtx_enter(&pp->pr_mtx);
1035 	for (ph = TAILQ_FIRST(&pp->pr_emptypages); ph != NULL; ph = phnext) {
1036 		phnext = TAILQ_NEXT(ph, ph_entry);
1037 
1038 		/* Check our minimum page claim */
1039 		if (pp->pr_npages <= pp->pr_minpages)
1040 			break;
1041 
1042 		/*
1043 		 * If freeing this page would put us below
1044 		 * the low water mark, stop now.
1045 		 */
1046 		if ((pp->pr_nitems - pp->pr_itemsperpage) <
1047 		    pp->pr_minitems)
1048 			break;
1049 
1050 		pool_p_remove(pp, ph);
1051 		TAILQ_INSERT_TAIL(&pl, ph, ph_entry);
1052 	}
1053 	mtx_leave(&pp->pr_mtx);
1054 
1055 	if (TAILQ_EMPTY(&pl))
1056 		return (0);
1057 
1058 	while ((ph = TAILQ_FIRST(&pl)) != NULL) {
1059 		TAILQ_REMOVE(&pl, ph, ph_entry);
1060 		pool_p_free(pp, ph);
1061 	}
1062 
1063 	return (1);
1064 }
1065 
1066 /*
1067  * Release all complete pages that have not been used recently
1068  * from all pools.
1069  */
1070 void
1071 pool_reclaim_all(void)
1072 {
1073 	struct pool	*pp;
1074 
1075 	rw_enter_read(&pool_lock);
1076 	SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist)
1077 		pool_reclaim(pp);
1078 	rw_exit_read(&pool_lock);
1079 }
1080 
1081 #ifdef DDB
1082 #include <machine/db_machdep.h>
1083 #include <ddb/db_output.h>
1084 
1085 /*
1086  * Diagnostic helpers.
1087  */
1088 void
1089 pool_printit(struct pool *pp, const char *modif,
1090     int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2))))
1091 {
1092 	pool_print1(pp, modif, pr);
1093 }
1094 
1095 void
1096 pool_print_pagelist(struct pool_pagelist *pl,
1097     int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2))))
1098 {
1099 	struct pool_page_header *ph;
1100 	struct pool_item *pi;
1101 
1102 	TAILQ_FOREACH(ph, pl, ph_entry) {
1103 		(*pr)("\t\tpage %p, color %p, nmissing %d\n",
1104 		    ph->ph_page, ph->ph_colored, ph->ph_nmissing);
1105 		XSIMPLEQ_FOREACH(pi, &ph->ph_items, pi_list) {
1106 			if (pi->pi_magic != POOL_IMAGIC(ph, pi)) {
1107 				(*pr)("\t\t\titem %p, magic 0x%lx\n",
1108 				    pi, pi->pi_magic);
1109 			}
1110 		}
1111 	}
1112 }
1113 
1114 void
1115 pool_print1(struct pool *pp, const char *modif,
1116     int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2))))
1117 {
1118 	struct pool_page_header *ph;
1119 	int print_pagelist = 0;
1120 	char c;
1121 
1122 	while ((c = *modif++) != '\0') {
1123 		if (c == 'p')
1124 			print_pagelist = 1;
1125 		modif++;
1126 	}
1127 
1128 	(*pr)("POOL %s: size %u maxcolors %u\n", pp->pr_wchan, pp->pr_size,
1129 	    pp->pr_maxcolors);
1130 	(*pr)("\talloc %p\n", pp->pr_alloc);
1131 	(*pr)("\tminitems %u, minpages %u, maxpages %u, npages %u\n",
1132 	    pp->pr_minitems, pp->pr_minpages, pp->pr_maxpages, pp->pr_npages);
1133 	(*pr)("\titemsperpage %u, nitems %u, nout %u, hardlimit %u\n",
1134 	    pp->pr_itemsperpage, pp->pr_nitems, pp->pr_nout, pp->pr_hardlimit);
1135 
1136 	(*pr)("\n\tnget %lu, nfail %lu, nput %lu\n",
1137 	    pp->pr_nget, pp->pr_nfail, pp->pr_nput);
1138 	(*pr)("\tnpagealloc %lu, npagefree %lu, hiwat %u, nidle %lu\n",
1139 	    pp->pr_npagealloc, pp->pr_npagefree, pp->pr_hiwat, pp->pr_nidle);
1140 
1141 	if (print_pagelist == 0)
1142 		return;
1143 
1144 	if ((ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL)
1145 		(*pr)("\n\tempty page list:\n");
1146 	pool_print_pagelist(&pp->pr_emptypages, pr);
1147 	if ((ph = TAILQ_FIRST(&pp->pr_fullpages)) != NULL)
1148 		(*pr)("\n\tfull page list:\n");
1149 	pool_print_pagelist(&pp->pr_fullpages, pr);
1150 	if ((ph = TAILQ_FIRST(&pp->pr_partpages)) != NULL)
1151 		(*pr)("\n\tpartial-page list:\n");
1152 	pool_print_pagelist(&pp->pr_partpages, pr);
1153 
1154 	if (pp->pr_curpage == NULL)
1155 		(*pr)("\tno current page\n");
1156 	else
1157 		(*pr)("\tcurpage %p\n", pp->pr_curpage->ph_page);
1158 }
1159 
1160 void
1161 db_show_all_pools(db_expr_t expr, int haddr, db_expr_t count, char *modif)
1162 {
1163 	struct pool *pp;
1164 	char maxp[16];
1165 	int ovflw;
1166 	char mode;
1167 
1168 	mode = modif[0];
1169 	if (mode != '\0' && mode != 'a') {
1170 		db_printf("usage: show all pools [/a]\n");
1171 		return;
1172 	}
1173 
1174 	if (mode == '\0')
1175 		db_printf("%-10s%4s%9s%5s%9s%6s%6s%6s%6s%6s%6s%5s\n",
1176 		    "Name",
1177 		    "Size",
1178 		    "Requests",
1179 		    "Fail",
1180 		    "Releases",
1181 		    "Pgreq",
1182 		    "Pgrel",
1183 		    "Npage",
1184 		    "Hiwat",
1185 		    "Minpg",
1186 		    "Maxpg",
1187 		    "Idle");
1188 	else
1189 		db_printf("%-12s %18s %18s\n",
1190 		    "Name", "Address", "Allocator");
1191 
1192 	SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist) {
1193 		if (mode == 'a') {
1194 			db_printf("%-12s %18p %18p\n", pp->pr_wchan, pp,
1195 			    pp->pr_alloc);
1196 			continue;
1197 		}
1198 
1199 		if (!pp->pr_nget)
1200 			continue;
1201 
1202 		if (pp->pr_maxpages == UINT_MAX)
1203 			snprintf(maxp, sizeof maxp, "inf");
1204 		else
1205 			snprintf(maxp, sizeof maxp, "%u", pp->pr_maxpages);
1206 
1207 #define PRWORD(ovflw, fmt, width, fixed, val) do {	\
1208 	(ovflw) += db_printf((fmt),			\
1209 	    (width) - (fixed) - (ovflw) > 0 ?		\
1210 	    (width) - (fixed) - (ovflw) : 0,		\
1211 	    (val)) - (width);				\
1212 	if ((ovflw) < 0)				\
1213 		(ovflw) = 0;				\
1214 } while (/* CONSTCOND */0)
1215 
1216 		ovflw = 0;
1217 		PRWORD(ovflw, "%-*s", 10, 0, pp->pr_wchan);
1218 		PRWORD(ovflw, " %*u", 4, 1, pp->pr_size);
1219 		PRWORD(ovflw, " %*lu", 9, 1, pp->pr_nget);
1220 		PRWORD(ovflw, " %*lu", 5, 1, pp->pr_nfail);
1221 		PRWORD(ovflw, " %*lu", 9, 1, pp->pr_nput);
1222 		PRWORD(ovflw, " %*lu", 6, 1, pp->pr_npagealloc);
1223 		PRWORD(ovflw, " %*lu", 6, 1, pp->pr_npagefree);
1224 		PRWORD(ovflw, " %*d", 6, 1, pp->pr_npages);
1225 		PRWORD(ovflw, " %*d", 6, 1, pp->pr_hiwat);
1226 		PRWORD(ovflw, " %*d", 6, 1, pp->pr_minpages);
1227 		PRWORD(ovflw, " %*s", 6, 1, maxp);
1228 		PRWORD(ovflw, " %*lu\n", 5, 1, pp->pr_nidle);
1229 
1230 		pool_chk(pp);
1231 	}
1232 }
1233 #endif /* DDB */
1234 
1235 #if defined(POOL_DEBUG) || defined(DDB)
1236 int
1237 pool_chk_page(struct pool *pp, struct pool_page_header *ph, int expected)
1238 {
1239 	struct pool_item *pi;
1240 	caddr_t page;
1241 	int n;
1242 	const char *label = pp->pr_wchan;
1243 
1244 	page = (caddr_t)((u_long)ph & pp->pr_pgmask);
1245 	if (page != ph->ph_page && POOL_INPGHDR(pp)) {
1246 		printf("%s: ", label);
1247 		printf("pool(%p:%s): page inconsistency: page %p; "
1248 		    "at page head addr %p (p %p)\n",
1249 		    pp, pp->pr_wchan, ph->ph_page, ph, page);
1250 		return 1;
1251 	}
1252 
1253 	for (pi = XSIMPLEQ_FIRST(&ph->ph_items), n = 0;
1254 	     pi != NULL;
1255 	     pi = XSIMPLEQ_NEXT(&ph->ph_items, pi, pi_list), n++) {
1256 		if ((caddr_t)pi < ph->ph_page ||
1257 		    (caddr_t)pi >= ph->ph_page + pp->pr_pgsize) {
1258 			printf("%s: ", label);
1259 			printf("pool(%p:%s): page inconsistency: page %p;"
1260 			    " item ordinal %d; addr %p\n", pp,
1261 			    pp->pr_wchan, ph->ph_page, n, pi);
1262 			return (1);
1263 		}
1264 
1265 		if (pi->pi_magic != POOL_IMAGIC(ph, pi)) {
1266 			printf("%s: ", label);
1267 			printf("pool(%p:%s): free list modified: "
1268 			    "page %p; item ordinal %d; addr %p "
1269 			    "(p %p); offset 0x%x=0x%lx\n",
1270 			    pp, pp->pr_wchan, ph->ph_page, n, pi, page,
1271 			    0, pi->pi_magic);
1272 		}
1273 
1274 #ifdef DIAGNOSTIC
1275 		if (POOL_PHPOISON(ph)) {
1276 			size_t pidx;
1277 			uint32_t pval;
1278 			if (poison_check(pi + 1, pp->pr_size - sizeof(*pi),
1279 			    &pidx, &pval)) {
1280 				int *ip = (int *)(pi + 1);
1281 				printf("pool(%s): free list modified: "
1282 				    "page %p; item ordinal %d; addr %p "
1283 				    "(p %p); offset 0x%zx=0x%x\n",
1284 				    pp->pr_wchan, ph->ph_page, n, pi,
1285 				    page, pidx * sizeof(int), ip[pidx]);
1286 			}
1287 		}
1288 #endif /* DIAGNOSTIC */
1289 	}
1290 	if (n + ph->ph_nmissing != pp->pr_itemsperpage) {
1291 		printf("pool(%p:%s): page inconsistency: page %p;"
1292 		    " %d on list, %d missing, %d items per page\n", pp,
1293 		    pp->pr_wchan, ph->ph_page, n, ph->ph_nmissing,
1294 		    pp->pr_itemsperpage);
1295 		return 1;
1296 	}
1297 	if (expected >= 0 && n != expected) {
1298 		printf("pool(%p:%s): page inconsistency: page %p;"
1299 		    " %d on list, %d missing, %d expected\n", pp,
1300 		    pp->pr_wchan, ph->ph_page, n, ph->ph_nmissing,
1301 		    expected);
1302 		return 1;
1303 	}
1304 	return 0;
1305 }
1306 
1307 int
1308 pool_chk(struct pool *pp)
1309 {
1310 	struct pool_page_header *ph;
1311 	int r = 0;
1312 
1313 	TAILQ_FOREACH(ph, &pp->pr_emptypages, ph_entry)
1314 		r += pool_chk_page(pp, ph, pp->pr_itemsperpage);
1315 	TAILQ_FOREACH(ph, &pp->pr_fullpages, ph_entry)
1316 		r += pool_chk_page(pp, ph, 0);
1317 	TAILQ_FOREACH(ph, &pp->pr_partpages, ph_entry)
1318 		r += pool_chk_page(pp, ph, -1);
1319 
1320 	return (r);
1321 }
1322 #endif /* defined(POOL_DEBUG) || defined(DDB) */
1323 
1324 #ifdef DDB
1325 void
1326 pool_walk(struct pool *pp, int full,
1327     int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2))),
1328     void (*func)(void *, int, int (*)(const char *, ...)
1329 	    __attribute__((__format__(__kprintf__,1,2)))))
1330 {
1331 	struct pool_page_header *ph;
1332 	struct pool_item *pi;
1333 	caddr_t cp;
1334 	int n;
1335 
1336 	TAILQ_FOREACH(ph, &pp->pr_fullpages, ph_entry) {
1337 		cp = ph->ph_colored;
1338 		n = ph->ph_nmissing;
1339 
1340 		while (n--) {
1341 			func(cp, full, pr);
1342 			cp += pp->pr_size;
1343 		}
1344 	}
1345 
1346 	TAILQ_FOREACH(ph, &pp->pr_partpages, ph_entry) {
1347 		cp = ph->ph_colored;
1348 		n = ph->ph_nmissing;
1349 
1350 		do {
1351 			XSIMPLEQ_FOREACH(pi, &ph->ph_items, pi_list) {
1352 				if (cp == (caddr_t)pi)
1353 					break;
1354 			}
1355 			if (cp != (caddr_t)pi) {
1356 				func(cp, full, pr);
1357 				n--;
1358 			}
1359 
1360 			cp += pp->pr_size;
1361 		} while (n > 0);
1362 	}
1363 }
1364 #endif
1365 
1366 /*
1367  * We have three different sysctls.
1368  * kern.pool.npools - the number of pools.
1369  * kern.pool.pool.<pool#> - the pool struct for the pool#.
1370  * kern.pool.name.<pool#> - the name for pool#.
1371  */
1372 int
1373 sysctl_dopool(int *name, u_int namelen, char *oldp, size_t *oldlenp)
1374 {
1375 	struct kinfo_pool pi;
1376 	struct pool *pp;
1377 	int rv = ENOENT;
1378 
1379 	switch (name[0]) {
1380 	case KERN_POOL_NPOOLS:
1381 		if (namelen != 1)
1382 			return (ENOTDIR);
1383 		return (sysctl_rdint(oldp, oldlenp, NULL, pool_count));
1384 
1385 	case KERN_POOL_NAME:
1386 	case KERN_POOL_POOL:
1387 	case KERN_POOL_CACHE:
1388 	case KERN_POOL_CACHE_CPUS:
1389 		break;
1390 	default:
1391 		return (EOPNOTSUPP);
1392 	}
1393 
1394 	if (namelen != 2)
1395 		return (ENOTDIR);
1396 
1397 	rw_enter_read(&pool_lock);
1398 
1399 	SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist) {
1400 		if (name[1] == pp->pr_serial)
1401 			break;
1402 	}
1403 
1404 	if (pp == NULL)
1405 		goto done;
1406 
1407 	switch (name[0]) {
1408 	case KERN_POOL_NAME:
1409 		rv = sysctl_rdstring(oldp, oldlenp, NULL, pp->pr_wchan);
1410 		break;
1411 	case KERN_POOL_POOL:
1412 		memset(&pi, 0, sizeof(pi));
1413 
1414 		mtx_enter(&pp->pr_mtx);
1415 		pi.pr_size = pp->pr_size;
1416 		pi.pr_pgsize = pp->pr_pgsize;
1417 		pi.pr_itemsperpage = pp->pr_itemsperpage;
1418 		pi.pr_npages = pp->pr_npages;
1419 		pi.pr_minpages = pp->pr_minpages;
1420 		pi.pr_maxpages = pp->pr_maxpages;
1421 		pi.pr_hardlimit = pp->pr_hardlimit;
1422 		pi.pr_nout = pp->pr_nout;
1423 		pi.pr_nitems = pp->pr_nitems;
1424 		pi.pr_nget = pp->pr_nget;
1425 		pi.pr_nput = pp->pr_nput;
1426 		pi.pr_nfail = pp->pr_nfail;
1427 		pi.pr_npagealloc = pp->pr_npagealloc;
1428 		pi.pr_npagefree = pp->pr_npagefree;
1429 		pi.pr_hiwat = pp->pr_hiwat;
1430 		pi.pr_nidle = pp->pr_nidle;
1431 		mtx_leave(&pp->pr_mtx);
1432 
1433 		pool_cache_pool_info(pp, &pi);
1434 
1435 		rv = sysctl_rdstruct(oldp, oldlenp, NULL, &pi, sizeof(pi));
1436 		break;
1437 
1438 	case KERN_POOL_CACHE:
1439 		rv = pool_cache_info(pp, oldp, oldlenp);
1440 		break;
1441 
1442 	case KERN_POOL_CACHE_CPUS:
1443 		rv = pool_cache_cpus_info(pp, oldp, oldlenp);
1444 		break;
1445 	}
1446 
1447 done:
1448 	rw_exit_read(&pool_lock);
1449 
1450 	return (rv);
1451 }
1452 
1453 void
1454 pool_gc_sched(void *null)
1455 {
1456 	task_add(systqmp, &pool_gc_task);
1457 }
1458 
1459 void
1460 pool_gc_pages(void *null)
1461 {
1462 	struct pool *pp;
1463 	struct pool_page_header *ph, *freeph;
1464 	int s;
1465 
1466 	rw_enter_read(&pool_lock);
1467 	s = splvm(); /* XXX go to splvm until all pools _setipl properly */
1468 	SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist) {
1469 		if (pp->pr_nidle <= pp->pr_minpages || /* guess */
1470 		    !mtx_enter_try(&pp->pr_mtx)) /* try */
1471 			continue;
1472 
1473 		/* is it time to free a page? */
1474 		if (pp->pr_nidle > pp->pr_minpages &&
1475 		    (ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL &&
1476 		    (ticks - ph->ph_tick) > (hz * pool_wait_gc)) {
1477 			freeph = ph;
1478 			pool_p_remove(pp, freeph);
1479 		} else
1480 			freeph = NULL;
1481 
1482 		mtx_leave(&pp->pr_mtx);
1483 
1484 		if (freeph != NULL)
1485 			pool_p_free(pp, freeph);
1486 	}
1487 	splx(s);
1488 	rw_exit_read(&pool_lock);
1489 
1490 	timeout_add_sec(&pool_gc_tick, 1);
1491 }
1492 
1493 /*
1494  * Pool backend allocators.
1495  */
1496 
1497 void *
1498 pool_allocator_alloc(struct pool *pp, int flags, int *slowdown)
1499 {
1500 	void *v;
1501 
1502 	v = (*pp->pr_alloc->pa_alloc)(pp, flags, slowdown);
1503 
1504 #ifdef DIAGNOSTIC
1505 	if (v != NULL && POOL_INPGHDR(pp)) {
1506 		vaddr_t addr = (vaddr_t)v;
1507 		if ((addr & pp->pr_pgmask) != addr) {
1508 			panic("%s: %s page address %p isnt aligned to %u",
1509 			    __func__, pp->pr_wchan, v, pp->pr_pgsize);
1510 		}
1511 	}
1512 #endif
1513 
1514 	return (v);
1515 }
1516 
1517 void
1518 pool_allocator_free(struct pool *pp, void *v)
1519 {
1520 	struct pool_allocator *pa = pp->pr_alloc;
1521 
1522 	(*pa->pa_free)(pp, v);
1523 }
1524 
1525 void *
1526 pool_page_alloc(struct pool *pp, int flags, int *slowdown)
1527 {
1528 	struct kmem_dyn_mode kd = KMEM_DYN_INITIALIZER;
1529 
1530 	kd.kd_waitok = ISSET(flags, PR_WAITOK);
1531 	kd.kd_slowdown = slowdown;
1532 
1533 	return (km_alloc(pp->pr_pgsize, &kv_page, pp->pr_crange, &kd));
1534 }
1535 
1536 void
1537 pool_page_free(struct pool *pp, void *v)
1538 {
1539 	km_free(v, pp->pr_pgsize, &kv_page, pp->pr_crange);
1540 }
1541 
1542 void *
1543 pool_multi_alloc(struct pool *pp, int flags, int *slowdown)
1544 {
1545 	struct kmem_va_mode kv = kv_intrsafe;
1546 	struct kmem_dyn_mode kd = KMEM_DYN_INITIALIZER;
1547 	void *v;
1548 	int s;
1549 
1550 	if (POOL_INPGHDR(pp))
1551 		kv.kv_align = pp->pr_pgsize;
1552 
1553 	kd.kd_waitok = ISSET(flags, PR_WAITOK);
1554 	kd.kd_slowdown = slowdown;
1555 
1556 	s = splvm();
1557 	v = km_alloc(pp->pr_pgsize, &kv, pp->pr_crange, &kd);
1558 	splx(s);
1559 
1560 	return (v);
1561 }
1562 
1563 void
1564 pool_multi_free(struct pool *pp, void *v)
1565 {
1566 	struct kmem_va_mode kv = kv_intrsafe;
1567 	int s;
1568 
1569 	if (POOL_INPGHDR(pp))
1570 		kv.kv_align = pp->pr_pgsize;
1571 
1572 	s = splvm();
1573 	km_free(v, pp->pr_pgsize, &kv, pp->pr_crange);
1574 	splx(s);
1575 }
1576 
1577 void *
1578 pool_multi_alloc_ni(struct pool *pp, int flags, int *slowdown)
1579 {
1580 	struct kmem_va_mode kv = kv_any;
1581 	struct kmem_dyn_mode kd = KMEM_DYN_INITIALIZER;
1582 	void *v;
1583 
1584 	if (POOL_INPGHDR(pp))
1585 		kv.kv_align = pp->pr_pgsize;
1586 
1587 	kd.kd_waitok = ISSET(flags, PR_WAITOK);
1588 	kd.kd_slowdown = slowdown;
1589 
1590 	KERNEL_LOCK();
1591 	v = km_alloc(pp->pr_pgsize, &kv, pp->pr_crange, &kd);
1592 	KERNEL_UNLOCK();
1593 
1594 	return (v);
1595 }
1596 
1597 void
1598 pool_multi_free_ni(struct pool *pp, void *v)
1599 {
1600 	struct kmem_va_mode kv = kv_any;
1601 
1602 	if (POOL_INPGHDR(pp))
1603 		kv.kv_align = pp->pr_pgsize;
1604 
1605 	KERNEL_LOCK();
1606 	km_free(v, pp->pr_pgsize, &kv, pp->pr_crange);
1607 	KERNEL_UNLOCK();
1608 }
1609 
1610 #ifdef MULTIPROCESSOR
1611 
1612 struct pool pool_caches; /* per cpu cache entries */
1613 
1614 void
1615 pool_cache_init(struct pool *pp)
1616 {
1617 	struct cpumem *cm;
1618 	struct pool_cache *pc;
1619 	struct cpumem_iter i;
1620 
1621 	if (pool_caches.pr_size == 0) {
1622 		pool_init(&pool_caches, sizeof(struct pool_cache), 64,
1623 		    IPL_NONE, PR_WAITOK, "plcache", NULL);
1624 	}
1625 
1626 	/* must be able to use the pool items as cache list items */
1627 	KASSERT(pp->pr_size >= sizeof(struct pool_cache_item));
1628 
1629 	cm = cpumem_get(&pool_caches);
1630 
1631 	mtx_init(&pp->pr_cache_mtx, pp->pr_ipl);
1632 	arc4random_buf(pp->pr_cache_magic, sizeof(pp->pr_cache_magic));
1633 	TAILQ_INIT(&pp->pr_cache_lists);
1634 	pp->pr_cache_nlist = 0;
1635 	pp->pr_cache_items = 8;
1636 	pp->pr_cache_contention = 0;
1637 
1638 	CPUMEM_FOREACH(pc, &i, cm) {
1639 		pc->pc_actv = NULL;
1640 		pc->pc_nactv = 0;
1641 		pc->pc_prev = NULL;
1642 
1643 		pc->pc_nget = 0;
1644 		pc->pc_nfail = 0;
1645 		pc->pc_nput = 0;
1646 		pc->pc_nlget = 0;
1647 		pc->pc_nlfail = 0;
1648 		pc->pc_nlput = 0;
1649 		pc->pc_nout = 0;
1650 	}
1651 
1652 	pp->pr_cache = cm;
1653 }
1654 
1655 static inline void
1656 pool_cache_item_magic(struct pool *pp, struct pool_cache_item *ci)
1657 {
1658 	unsigned long *entry = (unsigned long *)&ci->ci_nextl;
1659 
1660 	entry[0] = pp->pr_cache_magic[0] ^ (u_long)ci;
1661 	entry[1] = pp->pr_cache_magic[1] ^ (u_long)ci->ci_next;
1662 }
1663 
1664 static inline void
1665 pool_cache_item_magic_check(struct pool *pp, struct pool_cache_item *ci)
1666 {
1667 	unsigned long *entry;
1668 	unsigned long val;
1669 
1670 	entry = (unsigned long *)&ci->ci_nextl;
1671 	val = pp->pr_cache_magic[0] ^ (u_long)ci;
1672 	if (*entry != val)
1673 		goto fail;
1674 
1675 	entry++;
1676 	val = pp->pr_cache_magic[1] ^ (u_long)ci->ci_next;
1677 	if (*entry != val)
1678 		goto fail;
1679 
1680 	return;
1681 
1682 fail:
1683 	panic("%s: %s cpu free list modified: item addr %p+%zu 0x%lx!=0x%lx",
1684 	    __func__, pp->pr_wchan, ci, (caddr_t)entry - (caddr_t)ci,
1685 	    *entry, val);
1686 }
1687 
1688 static inline void
1689 pool_list_enter(struct pool *pp)
1690 {
1691 	if (mtx_enter_try(&pp->pr_cache_mtx) == 0) {
1692 		mtx_enter(&pp->pr_cache_mtx);
1693 		pp->pr_cache_contention++;
1694 	}
1695 }
1696 
1697 static inline void
1698 pool_list_leave(struct pool *pp)
1699 {
1700 	mtx_leave(&pp->pr_cache_mtx);
1701 }
1702 
1703 static inline struct pool_cache_item *
1704 pool_cache_list_alloc(struct pool *pp, struct pool_cache *pc)
1705 {
1706 	struct pool_cache_item *pl;
1707 
1708 	pool_list_enter(pp);
1709 	pl = TAILQ_FIRST(&pp->pr_cache_lists);
1710 	if (pl != NULL) {
1711 		TAILQ_REMOVE(&pp->pr_cache_lists, pl, ci_nextl);
1712 		pp->pr_cache_nlist--;
1713 
1714 		pool_cache_item_magic(pp, pl);
1715 
1716 		pc->pc_nlget++;
1717 	} else
1718 		pc->pc_nlfail++;
1719 
1720 	/* fold this cpus nout into the global while we have the lock */
1721 	pp->pr_cache_nout += pc->pc_nout;
1722 	pc->pc_nout = 0;
1723 	pool_list_leave(pp);
1724 
1725 	return (pl);
1726 }
1727 
1728 static inline void
1729 pool_cache_list_free(struct pool *pp, struct pool_cache *pc,
1730     struct pool_cache_item *ci)
1731 {
1732 	pool_list_enter(pp);
1733 	TAILQ_INSERT_TAIL(&pp->pr_cache_lists, ci, ci_nextl);
1734 	pp->pr_cache_nlist++;
1735 
1736 	pc->pc_nlput++;
1737 
1738 	/* fold this cpus nout into the global while we have the lock */
1739 	pp->pr_cache_nout += pc->pc_nout;
1740 	pc->pc_nout = 0;
1741 	pool_list_leave(pp);
1742 }
1743 
1744 static inline struct pool_cache *
1745 pool_cache_enter(struct pool *pp, int *s)
1746 {
1747 	struct pool_cache *pc;
1748 
1749 	pc = cpumem_enter(pp->pr_cache);
1750 	*s = splraise(pp->pr_ipl);
1751 	pc->pc_gen++;
1752 
1753 	return (pc);
1754 }
1755 
1756 static inline void
1757 pool_cache_leave(struct pool *pp, struct pool_cache *pc, int s)
1758 {
1759 	pc->pc_gen++;
1760 	splx(s);
1761 	cpumem_leave(pp->pr_cache, pc);
1762 }
1763 
1764 void *
1765 pool_cache_get(struct pool *pp)
1766 {
1767 	struct pool_cache *pc;
1768 	struct pool_cache_item *ci;
1769 	int s;
1770 
1771 	pc = pool_cache_enter(pp, &s);
1772 
1773 	if (pc->pc_actv != NULL) {
1774 		ci = pc->pc_actv;
1775 	} else if (pc->pc_prev != NULL) {
1776 		ci = pc->pc_prev;
1777 		pc->pc_prev = NULL;
1778 	} else if ((ci = pool_cache_list_alloc(pp, pc)) == NULL) {
1779 		pc->pc_nfail++;
1780 		goto done;
1781 	}
1782 
1783 	pool_cache_item_magic_check(pp, ci);
1784 #ifdef DIAGNOSTIC
1785 	if (pool_debug && POOL_CACHE_ITEM_POISONED(ci)) {
1786 		size_t pidx;
1787 		uint32_t pval;
1788 
1789 		if (poison_check(ci + 1, pp->pr_size - sizeof(*ci),
1790 		    &pidx, &pval)) {
1791 			int *ip = (int *)(ci + 1);
1792 			ip += pidx;
1793 
1794 			panic("%s: %s cpu free list modified: "
1795 			    "item addr %p+%zu 0x%x!=0x%x",
1796 			    __func__, pp->pr_wchan, ci,
1797 			    (caddr_t)ip - (caddr_t)ci, *ip, pval);
1798 		}
1799 	}
1800 #endif
1801 
1802 	pc->pc_actv = ci->ci_next;
1803 	pc->pc_nactv = POOL_CACHE_ITEM_NITEMS(ci) - 1;
1804 	pc->pc_nget++;
1805 	pc->pc_nout++;
1806 
1807 done:
1808 	pool_cache_leave(pp, pc, s);
1809 
1810 	return (ci);
1811 }
1812 
1813 void
1814 pool_cache_put(struct pool *pp, void *v)
1815 {
1816 	struct pool_cache *pc;
1817 	struct pool_cache_item *ci = v;
1818 	unsigned long nitems;
1819 	int s;
1820 #ifdef DIAGNOSTIC
1821 	int poison = pool_debug && pp->pr_size > sizeof(*ci);
1822 
1823 	if (poison)
1824 		poison_mem(ci + 1, pp->pr_size - sizeof(*ci));
1825 #endif
1826 
1827 	pc = pool_cache_enter(pp, &s);
1828 
1829 	nitems = pc->pc_nactv;
1830 	if (nitems >= pp->pr_cache_items) {
1831 		if (pc->pc_prev != NULL)
1832 			pool_cache_list_free(pp, pc, pc->pc_prev);
1833 
1834 		pc->pc_prev = pc->pc_actv;
1835 
1836 		pc->pc_actv = NULL;
1837 		pc->pc_nactv = 0;
1838 		nitems = 0;
1839 	}
1840 
1841 	ci->ci_next = pc->pc_actv;
1842 	ci->ci_nitems = ++nitems;
1843 #ifdef DIAGNOSTIC
1844 	ci->ci_nitems |= poison ? POOL_CACHE_ITEM_NITEMS_POISON : 0;
1845 #endif
1846 	pool_cache_item_magic(pp, ci);
1847 
1848 	pc->pc_actv = ci;
1849 	pc->pc_nactv = nitems;
1850 
1851 	pc->pc_nput++;
1852 	pc->pc_nout--;
1853 
1854 	pool_cache_leave(pp, pc, s);
1855 }
1856 
1857 struct pool_cache_item *
1858 pool_cache_list_put(struct pool *pp, struct pool_cache_item *pl)
1859 {
1860 	struct pool_cache_item *rpl, *next;
1861 
1862 	if (pl == NULL)
1863 		return (NULL);
1864 
1865 	rpl = TAILQ_NEXT(pl, ci_nextl);
1866 
1867 	do {
1868 		next = pl->ci_next;
1869 		pool_put(pp, pl);
1870 		pl = next;
1871 	} while (pl != NULL);
1872 
1873 	return (rpl);
1874 }
1875 
1876 void
1877 pool_cache_destroy(struct pool *pp)
1878 {
1879 	struct pool_cache *pc;
1880 	struct pool_cache_item *pl;
1881 	struct cpumem_iter i;
1882 	struct cpumem *cm;
1883 
1884 	cm = pp->pr_cache;
1885 	pp->pr_cache = NULL; /* make pool_put avoid the cache */
1886 
1887 	CPUMEM_FOREACH(pc, &i, cm) {
1888 		pool_cache_list_put(pp, pc->pc_actv);
1889 		pool_cache_list_put(pp, pc->pc_prev);
1890 	}
1891 
1892 	cpumem_put(&pool_caches, cm);
1893 
1894 	pl = TAILQ_FIRST(&pp->pr_cache_lists);
1895 	while (pl != NULL)
1896 		pl = pool_cache_list_put(pp, pl);
1897 }
1898 
1899 void
1900 pool_cache_pool_info(struct pool *pp, struct kinfo_pool *pi)
1901 {
1902 	struct pool_cache *pc;
1903 	struct cpumem_iter i;
1904 
1905 	if (pp->pr_cache == NULL)
1906 		return;
1907 
1908 	/* loop through the caches twice to collect stats */
1909 
1910 	/* once without the mtx so we can yield while reading nget/nput */
1911 	CPUMEM_FOREACH(pc, &i, pp->pr_cache) {
1912 		uint64_t gen, nget, nput;
1913 
1914 		do {
1915 			while ((gen = pc->pc_gen) & 1)
1916 				yield();
1917 
1918 			nget = pc->pc_nget;
1919 			nput = pc->pc_nput;
1920 		} while (gen != pc->pc_gen);
1921 
1922 		pi->pr_nget += nget;
1923 		pi->pr_nput += nput;
1924 	}
1925 
1926 	/* and once with the mtx so we can get consistent nout values */
1927 	mtx_enter(&pp->pr_cache_mtx);
1928 	CPUMEM_FOREACH(pc, &i, pp->pr_cache)
1929 		pi->pr_nout += pc->pc_nout;
1930 
1931 	pi->pr_nout += pp->pr_cache_nout;
1932 	mtx_leave(&pp->pr_cache_mtx);
1933 }
1934 
1935 int
1936 pool_cache_info(struct pool *pp, void *oldp, size_t *oldlenp)
1937 {
1938 	struct kinfo_pool_cache kpc;
1939 
1940 	if (pp->pr_cache == NULL)
1941 		return (EOPNOTSUPP);
1942 
1943 	memset(&kpc, 0, sizeof(kpc)); /* don't leak padding */
1944 
1945 	mtx_enter(&pp->pr_cache_mtx);
1946 	kpc.pr_ngc = 0; /* notyet */
1947 	kpc.pr_len = pp->pr_cache_items;
1948 	kpc.pr_nlist = pp->pr_cache_nlist;
1949 	kpc.pr_contention = pp->pr_cache_contention;
1950 	mtx_leave(&pp->pr_cache_mtx);
1951 
1952 	return (sysctl_rdstruct(oldp, oldlenp, NULL, &kpc, sizeof(kpc)));
1953 }
1954 
1955 int
1956 pool_cache_cpus_info(struct pool *pp, void *oldp, size_t *oldlenp)
1957 {
1958 	struct pool_cache *pc;
1959 	struct kinfo_pool_cache_cpu *kpcc, *info;
1960 	unsigned int cpu = 0;
1961 	struct cpumem_iter i;
1962 	int error = 0;
1963 	size_t len;
1964 
1965 	if (pp->pr_cache == NULL)
1966 		return (EOPNOTSUPP);
1967 	if (*oldlenp % sizeof(*kpcc))
1968 		return (EINVAL);
1969 
1970 	kpcc = mallocarray(ncpusfound, sizeof(*kpcc), M_TEMP,
1971 	    M_WAITOK|M_CANFAIL|M_ZERO);
1972 	if (kpcc == NULL)
1973 		return (EIO);
1974 
1975 	len = ncpusfound * sizeof(*kpcc);
1976 
1977 	CPUMEM_FOREACH(pc, &i, pp->pr_cache) {
1978 		uint64_t gen;
1979 
1980 		if (cpu >= ncpusfound) {
1981 			error = EIO;
1982 			goto err;
1983 		}
1984 
1985 		info = &kpcc[cpu];
1986 		info->pr_cpu = cpu;
1987 
1988 		do {
1989 			while ((gen = pc->pc_gen) & 1)
1990 				yield();
1991 
1992 			info->pr_nget = pc->pc_nget;
1993 			info->pr_nfail = pc->pc_nfail;
1994 			info->pr_nput = pc->pc_nput;
1995 			info->pr_nlget = pc->pc_nlget;
1996 			info->pr_nlfail = pc->pc_nlfail;
1997 			info->pr_nlput = pc->pc_nlput;
1998 		} while (gen != pc->pc_gen);
1999 
2000 		cpu++;
2001 	}
2002 
2003 	error = sysctl_rdstruct(oldp, oldlenp, NULL, kpcc, len);
2004 err:
2005 	free(kpcc, M_TEMP, len);
2006 
2007 	return (error);
2008 }
2009 #else /* MULTIPROCESSOR */
2010 void
2011 pool_cache_init(struct pool *pp)
2012 {
2013 	/* nop */
2014 }
2015 
2016 void
2017 pool_cache_pool_info(struct pool *pp, struct kinfo_pool *pi)
2018 {
2019 	/* nop */
2020 }
2021 
2022 int
2023 pool_cache_info(struct pool *pp, void *oldp, size_t *oldlenp)
2024 {
2025 	return (EOPNOTSUPP);
2026 }
2027 
2028 int
2029 pool_cache_cpus_info(struct pool *pp, void *oldp, size_t *oldlenp)
2030 {
2031 	return (EOPNOTSUPP);
2032 }
2033 #endif /* MULTIPROCESSOR */
2034