xref: /openbsd-src/sys/kern/subr_pool.c (revision 908b79cbbe8da7a8acef93f11e4f23059ae8d8a6)
1 /*	$OpenBSD: subr_pool.c,v 1.217 2017/06/23 01:21:55 dlg Exp $	*/
2 /*	$NetBSD: subr_pool.c,v 1.61 2001/09/26 07:14:56 chs Exp $	*/
3 
4 /*-
5  * Copyright (c) 1997, 1999, 2000 The NetBSD Foundation, Inc.
6  * All rights reserved.
7  *
8  * This code is derived from software contributed to The NetBSD Foundation
9  * by Paul Kranenburg; by Jason R. Thorpe of the Numerical Aerospace
10  * Simulation Facility, NASA Ames Research Center.
11  *
12  * Redistribution and use in source and binary forms, with or without
13  * modification, are permitted provided that the following conditions
14  * are met:
15  * 1. Redistributions of source code must retain the above copyright
16  *    notice, this list of conditions and the following disclaimer.
17  * 2. Redistributions in binary form must reproduce the above copyright
18  *    notice, this list of conditions and the following disclaimer in the
19  *    documentation and/or other materials provided with the distribution.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
23  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
24  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
25  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31  * POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/errno.h>
37 #include <sys/kernel.h>
38 #include <sys/malloc.h>
39 #include <sys/pool.h>
40 #include <sys/syslog.h>
41 #include <sys/rwlock.h>
42 #include <sys/sysctl.h>
43 #include <sys/task.h>
44 #include <sys/timeout.h>
45 #include <sys/percpu.h>
46 
47 #include <uvm/uvm_extern.h>
48 
49 /*
50  * Pool resource management utility.
51  *
52  * Memory is allocated in pages which are split into pieces according to
53  * the pool item size. Each page is kept on one of three lists in the
54  * pool structure: `pr_emptypages', `pr_fullpages' and `pr_partpages',
55  * for empty, full and partially-full pages respectively. The individual
56  * pool items are on a linked list headed by `ph_items' in each page
57  * header. The memory for building the page list is either taken from
58  * the allocated pages themselves (for small pool items) or taken from
59  * an internal pool of page headers (`phpool').
60  */
61 
62 /* List of all pools */
63 SIMPLEQ_HEAD(,pool) pool_head = SIMPLEQ_HEAD_INITIALIZER(pool_head);
64 
65 /*
66  * Every pool gets a unique serial number assigned to it. If this counter
67  * wraps, we're screwed, but we shouldn't create so many pools anyway.
68  */
69 unsigned int pool_serial;
70 unsigned int pool_count;
71 
72 /* Lock the previous variables making up the global pool state */
73 struct rwlock pool_lock = RWLOCK_INITIALIZER("pools");
74 
75 /* Private pool for page header structures */
76 struct pool phpool;
77 
78 struct pool_item {
79 	u_long				pi_magic;
80 	XSIMPLEQ_ENTRY(pool_item)	pi_list;
81 };
82 #define POOL_IMAGIC(ph, pi) ((u_long)(pi) ^ (ph)->ph_magic)
83 
84 struct pool_page_header {
85 	/* Page headers */
86 	TAILQ_ENTRY(pool_page_header)
87 				ph_entry;	/* pool page list */
88 	XSIMPLEQ_HEAD(, pool_item)
89 				ph_items;	/* free items on the page */
90 	RBT_ENTRY(pool_page_header)
91 				ph_node;	/* off-page page headers */
92 	unsigned int		ph_nmissing;	/* # of chunks in use */
93 	caddr_t			ph_page;	/* this page's address */
94 	caddr_t			ph_colored;	/* page's colored address */
95 	unsigned long		ph_magic;
96 	int			ph_tick;
97 };
98 #define POOL_MAGICBIT (1 << 3) /* keep away from perturbed low bits */
99 #define POOL_PHPOISON(ph) ISSET((ph)->ph_magic, POOL_MAGICBIT)
100 
101 #ifdef MULTIPROCESSOR
102 struct pool_cache_item {
103 	struct pool_cache_item	*ci_next;	/* next item in list */
104 	unsigned long		 ci_nitems;	/* number of items in list */
105 	TAILQ_ENTRY(pool_cache_item)
106 				 ci_nextl;	/* entry in list of lists */
107 };
108 
109 /* we store whether the cached item is poisoned in the high bit of nitems */
110 #define POOL_CACHE_ITEM_NITEMS_MASK	0x7ffffffUL
111 #define POOL_CACHE_ITEM_NITEMS_POISON	0x8000000UL
112 
113 #define POOL_CACHE_ITEM_NITEMS(_ci)					\
114     ((_ci)->ci_nitems & POOL_CACHE_ITEM_NITEMS_MASK)
115 
116 #define POOL_CACHE_ITEM_POISONED(_ci)					\
117     ISSET((_ci)->ci_nitems, POOL_CACHE_ITEM_NITEMS_POISON)
118 
119 struct pool_cache {
120 	struct pool_cache_item	*pc_actv;	/* active list of items */
121 	unsigned long		 pc_nactv;	/* actv head nitems cache */
122 	struct pool_cache_item	*pc_prev;	/* previous list of items */
123 
124 	uint64_t		 pc_gen;	/* generation number */
125 	uint64_t		 pc_nget;	/* # of successful requests */
126 	uint64_t		 pc_nfail;	/* # of unsuccessful reqs */
127 	uint64_t		 pc_nput;	/* # of releases */
128 	uint64_t		 pc_nlget;	/* # of list requests */
129 	uint64_t		 pc_nlfail;	/* # of fails getting a list */
130 	uint64_t		 pc_nlput;	/* # of list releases */
131 
132 	int			 pc_nout;
133 };
134 
135 void	*pool_cache_get(struct pool *);
136 void	 pool_cache_put(struct pool *, void *);
137 void	 pool_cache_destroy(struct pool *);
138 void	 pool_cache_gc(struct pool *);
139 #endif
140 void	 pool_cache_pool_info(struct pool *, struct kinfo_pool *);
141 int	 pool_cache_info(struct pool *, void *, size_t *);
142 int	 pool_cache_cpus_info(struct pool *, void *, size_t *);
143 
144 #ifdef POOL_DEBUG
145 int	pool_debug = 1;
146 #else
147 int	pool_debug = 0;
148 #endif
149 
150 #define POOL_INPGHDR(pp) ((pp)->pr_phoffset != 0)
151 
152 struct pool_page_header *
153 	 pool_p_alloc(struct pool *, int, int *);
154 void	 pool_p_insert(struct pool *, struct pool_page_header *);
155 void	 pool_p_remove(struct pool *, struct pool_page_header *);
156 void	 pool_p_free(struct pool *, struct pool_page_header *);
157 
158 void	 pool_update_curpage(struct pool *);
159 void	*pool_do_get(struct pool *, int, int *);
160 void	 pool_do_put(struct pool *, void *);
161 int	 pool_chk_page(struct pool *, struct pool_page_header *, int);
162 int	 pool_chk(struct pool *);
163 void	 pool_get_done(void *, void *);
164 void	 pool_runqueue(struct pool *, int);
165 
166 void	*pool_allocator_alloc(struct pool *, int, int *);
167 void	 pool_allocator_free(struct pool *, void *);
168 
169 /*
170  * The default pool allocator.
171  */
172 void	*pool_page_alloc(struct pool *, int, int *);
173 void	pool_page_free(struct pool *, void *);
174 
175 /*
176  * safe for interrupts; this is the default allocator
177  */
178 struct pool_allocator pool_allocator_single = {
179 	pool_page_alloc,
180 	pool_page_free,
181 	POOL_ALLOC_SIZE(PAGE_SIZE, POOL_ALLOC_ALIGNED)
182 };
183 
184 void	*pool_multi_alloc(struct pool *, int, int *);
185 void	pool_multi_free(struct pool *, void *);
186 
187 struct pool_allocator pool_allocator_multi = {
188 	pool_multi_alloc,
189 	pool_multi_free,
190 	POOL_ALLOC_SIZES(PAGE_SIZE, (1UL << 31), POOL_ALLOC_ALIGNED)
191 };
192 
193 void	*pool_multi_alloc_ni(struct pool *, int, int *);
194 void	pool_multi_free_ni(struct pool *, void *);
195 
196 struct pool_allocator pool_allocator_multi_ni = {
197 	pool_multi_alloc_ni,
198 	pool_multi_free_ni,
199 	POOL_ALLOC_SIZES(PAGE_SIZE, (1UL << 31), POOL_ALLOC_ALIGNED)
200 };
201 
202 #ifdef DDB
203 void	 pool_print_pagelist(struct pool_pagelist *, int (*)(const char *, ...)
204 	     __attribute__((__format__(__kprintf__,1,2))));
205 void	 pool_print1(struct pool *, const char *, int (*)(const char *, ...)
206 	     __attribute__((__format__(__kprintf__,1,2))));
207 #endif
208 
209 /* stale page garbage collectors */
210 void	pool_gc_sched(void *);
211 struct timeout pool_gc_tick = TIMEOUT_INITIALIZER(pool_gc_sched, NULL);
212 void	pool_gc_pages(void *);
213 struct task pool_gc_task = TASK_INITIALIZER(pool_gc_pages, NULL);
214 int pool_wait_free = 1;
215 int pool_wait_gc = 8;
216 
217 RBT_PROTOTYPE(phtree, pool_page_header, ph_node, phtree_compare);
218 
219 static inline int
220 phtree_compare(const struct pool_page_header *a,
221     const struct pool_page_header *b)
222 {
223 	vaddr_t va = (vaddr_t)a->ph_page;
224 	vaddr_t vb = (vaddr_t)b->ph_page;
225 
226 	/* the compares in this order are important for the NFIND to work */
227 	if (vb < va)
228 		return (-1);
229 	if (vb > va)
230 		return (1);
231 
232 	return (0);
233 }
234 
235 RBT_GENERATE(phtree, pool_page_header, ph_node, phtree_compare);
236 
237 /*
238  * Return the pool page header based on page address.
239  */
240 static inline struct pool_page_header *
241 pr_find_pagehead(struct pool *pp, void *v)
242 {
243 	struct pool_page_header *ph, key;
244 
245 	if (POOL_INPGHDR(pp)) {
246 		caddr_t page;
247 
248 		page = (caddr_t)((vaddr_t)v & pp->pr_pgmask);
249 
250 		return ((struct pool_page_header *)(page + pp->pr_phoffset));
251 	}
252 
253 	key.ph_page = v;
254 	ph = RBT_NFIND(phtree, &pp->pr_phtree, &key);
255 	if (ph == NULL)
256 		panic("%s: %s: page header missing", __func__, pp->pr_wchan);
257 
258 	KASSERT(ph->ph_page <= (caddr_t)v);
259 	if (ph->ph_page + pp->pr_pgsize <= (caddr_t)v)
260 		panic("%s: %s: incorrect page", __func__, pp->pr_wchan);
261 
262 	return (ph);
263 }
264 
265 /*
266  * Initialize the given pool resource structure.
267  *
268  * We export this routine to allow other kernel parts to declare
269  * static pools that must be initialized before malloc() is available.
270  */
271 void
272 pool_init(struct pool *pp, size_t size, u_int align, int ipl, int flags,
273     const char *wchan, struct pool_allocator *palloc)
274 {
275 	int off = 0, space;
276 	unsigned int pgsize = PAGE_SIZE, items;
277 	size_t pa_pagesz;
278 #ifdef DIAGNOSTIC
279 	struct pool *iter;
280 #endif
281 
282 	if (align == 0)
283 		align = ALIGN(1);
284 
285 	if (size < sizeof(struct pool_item))
286 		size = sizeof(struct pool_item);
287 
288 	size = roundup(size, align);
289 
290 	while (size * 8 > pgsize)
291 		pgsize <<= 1;
292 
293 	if (palloc == NULL) {
294 		if (pgsize > PAGE_SIZE) {
295 			palloc = ISSET(flags, PR_WAITOK) ?
296 			    &pool_allocator_multi_ni : &pool_allocator_multi;
297 		} else
298 			palloc = &pool_allocator_single;
299 
300 		pa_pagesz = palloc->pa_pagesz;
301 	} else {
302 		size_t pgsizes;
303 
304 		pa_pagesz = palloc->pa_pagesz;
305 		if (pa_pagesz == 0)
306 			pa_pagesz = POOL_ALLOC_DEFAULT;
307 
308 		pgsizes = pa_pagesz & ~POOL_ALLOC_ALIGNED;
309 
310 		/* make sure the allocator can fit at least one item */
311 		if (size > pgsizes) {
312 			panic("%s: pool %s item size 0x%zx > "
313 			    "allocator %p sizes 0x%zx", __func__, wchan,
314 			    size, palloc, pgsizes);
315 		}
316 
317 		/* shrink pgsize until it fits into the range */
318 		while (!ISSET(pgsizes, pgsize))
319 			pgsize >>= 1;
320 	}
321 	KASSERT(ISSET(pa_pagesz, pgsize));
322 
323 	items = pgsize / size;
324 
325 	/*
326 	 * Decide whether to put the page header off page to avoid
327 	 * wasting too large a part of the page. Off-page page headers
328 	 * go into an RB tree, so we can match a returned item with
329 	 * its header based on the page address.
330 	 */
331 	if (ISSET(pa_pagesz, POOL_ALLOC_ALIGNED)) {
332 		if (pgsize - (size * items) >
333 		    sizeof(struct pool_page_header)) {
334 			off = pgsize - sizeof(struct pool_page_header);
335 		} else if (sizeof(struct pool_page_header) * 2 >= size) {
336 			off = pgsize - sizeof(struct pool_page_header);
337 			items = off / size;
338 		}
339 	}
340 
341 	KASSERT(items > 0);
342 
343 	/*
344 	 * Initialize the pool structure.
345 	 */
346 	memset(pp, 0, sizeof(*pp));
347 	TAILQ_INIT(&pp->pr_emptypages);
348 	TAILQ_INIT(&pp->pr_fullpages);
349 	TAILQ_INIT(&pp->pr_partpages);
350 	pp->pr_curpage = NULL;
351 	pp->pr_npages = 0;
352 	pp->pr_minitems = 0;
353 	pp->pr_minpages = 0;
354 	pp->pr_maxpages = 8;
355 	pp->pr_size = size;
356 	pp->pr_pgsize = pgsize;
357 	pp->pr_pgmask = ~0UL ^ (pgsize - 1);
358 	pp->pr_phoffset = off;
359 	pp->pr_itemsperpage = items;
360 	pp->pr_wchan = wchan;
361 	pp->pr_alloc = palloc;
362 	pp->pr_nitems = 0;
363 	pp->pr_nout = 0;
364 	pp->pr_hardlimit = UINT_MAX;
365 	pp->pr_hardlimit_warning = NULL;
366 	pp->pr_hardlimit_ratecap.tv_sec = 0;
367 	pp->pr_hardlimit_ratecap.tv_usec = 0;
368 	pp->pr_hardlimit_warning_last.tv_sec = 0;
369 	pp->pr_hardlimit_warning_last.tv_usec = 0;
370 	RBT_INIT(phtree, &pp->pr_phtree);
371 
372 	/*
373 	 * Use the space between the chunks and the page header
374 	 * for cache coloring.
375 	 */
376 	space = POOL_INPGHDR(pp) ? pp->pr_phoffset : pp->pr_pgsize;
377 	space -= pp->pr_itemsperpage * pp->pr_size;
378 	pp->pr_align = align;
379 	pp->pr_maxcolors = (space / align) + 1;
380 
381 	pp->pr_nget = 0;
382 	pp->pr_nfail = 0;
383 	pp->pr_nput = 0;
384 	pp->pr_npagealloc = 0;
385 	pp->pr_npagefree = 0;
386 	pp->pr_hiwat = 0;
387 	pp->pr_nidle = 0;
388 
389 	pp->pr_ipl = ipl;
390 	mtx_init_flags(&pp->pr_mtx, pp->pr_ipl, wchan, 0);
391 	mtx_init_flags(&pp->pr_requests_mtx, pp->pr_ipl, wchan, 0);
392 	TAILQ_INIT(&pp->pr_requests);
393 
394 	if (phpool.pr_size == 0) {
395 		pool_init(&phpool, sizeof(struct pool_page_header), 0,
396 		    IPL_HIGH, 0, "phpool", NULL);
397 
398 		/* make sure phpool wont "recurse" */
399 		KASSERT(POOL_INPGHDR(&phpool));
400 	}
401 
402 	/* pglistalloc/constraint parameters */
403 	pp->pr_crange = &kp_dirty;
404 
405 	/* Insert this into the list of all pools. */
406 	rw_enter_write(&pool_lock);
407 #ifdef DIAGNOSTIC
408 	SIMPLEQ_FOREACH(iter, &pool_head, pr_poollist) {
409 		if (iter == pp)
410 			panic("%s: pool %s already on list", __func__, wchan);
411 	}
412 #endif
413 
414 	pp->pr_serial = ++pool_serial;
415 	if (pool_serial == 0)
416 		panic("%s: too much uptime", __func__);
417 
418 	SIMPLEQ_INSERT_HEAD(&pool_head, pp, pr_poollist);
419 	pool_count++;
420 	rw_exit_write(&pool_lock);
421 }
422 
423 /*
424  * Decommission a pool resource.
425  */
426 void
427 pool_destroy(struct pool *pp)
428 {
429 	struct pool_page_header *ph;
430 	struct pool *prev, *iter;
431 
432 #ifdef MULTIPROCESSOR
433 	if (pp->pr_cache != NULL)
434 		pool_cache_destroy(pp);
435 #endif
436 
437 #ifdef DIAGNOSTIC
438 	if (pp->pr_nout != 0)
439 		panic("%s: pool busy: still out: %u", __func__, pp->pr_nout);
440 #endif
441 
442 	/* Remove from global pool list */
443 	rw_enter_write(&pool_lock);
444 	pool_count--;
445 	if (pp == SIMPLEQ_FIRST(&pool_head))
446 		SIMPLEQ_REMOVE_HEAD(&pool_head, pr_poollist);
447 	else {
448 		prev = SIMPLEQ_FIRST(&pool_head);
449 		SIMPLEQ_FOREACH(iter, &pool_head, pr_poollist) {
450 			if (iter == pp) {
451 				SIMPLEQ_REMOVE_AFTER(&pool_head, prev,
452 				    pr_poollist);
453 				break;
454 			}
455 			prev = iter;
456 		}
457 	}
458 	rw_exit_write(&pool_lock);
459 
460 	/* Remove all pages */
461 	while ((ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL) {
462 		mtx_enter(&pp->pr_mtx);
463 		pool_p_remove(pp, ph);
464 		mtx_leave(&pp->pr_mtx);
465 		pool_p_free(pp, ph);
466 	}
467 	KASSERT(TAILQ_EMPTY(&pp->pr_fullpages));
468 	KASSERT(TAILQ_EMPTY(&pp->pr_partpages));
469 }
470 
471 void
472 pool_request_init(struct pool_request *pr,
473     void (*handler)(void *, void *), void *cookie)
474 {
475 	pr->pr_handler = handler;
476 	pr->pr_cookie = cookie;
477 	pr->pr_item = NULL;
478 }
479 
480 void
481 pool_request(struct pool *pp, struct pool_request *pr)
482 {
483 	mtx_enter(&pp->pr_requests_mtx);
484 	TAILQ_INSERT_TAIL(&pp->pr_requests, pr, pr_entry);
485 	pool_runqueue(pp, PR_NOWAIT);
486 	mtx_leave(&pp->pr_requests_mtx);
487 }
488 
489 struct pool_get_memory {
490 	struct mutex mtx;
491 	void * volatile v;
492 };
493 
494 /*
495  * Grab an item from the pool.
496  */
497 void *
498 pool_get(struct pool *pp, int flags)
499 {
500 	void *v = NULL;
501 	int slowdown = 0;
502 
503 #ifdef MULTIPROCESSOR
504 	if (pp->pr_cache != NULL) {
505 		v = pool_cache_get(pp);
506 		if (v != NULL)
507 			goto good;
508 	}
509 #endif
510 
511 	KASSERT(flags & (PR_WAITOK | PR_NOWAIT));
512 
513 	mtx_enter(&pp->pr_mtx);
514 	if (pp->pr_nout >= pp->pr_hardlimit) {
515 		if (ISSET(flags, PR_NOWAIT|PR_LIMITFAIL))
516 			goto fail;
517 	} else if ((v = pool_do_get(pp, flags, &slowdown)) == NULL) {
518 		if (ISSET(flags, PR_NOWAIT))
519 			goto fail;
520 	}
521 	mtx_leave(&pp->pr_mtx);
522 
523 	if ((slowdown || pool_debug == 2) && ISSET(flags, PR_WAITOK))
524 		yield();
525 
526 	if (v == NULL) {
527 		struct pool_get_memory mem = {
528 		    MUTEX_INITIALIZER(pp->pr_ipl),
529 		    NULL };
530 		struct pool_request pr;
531 
532 		pool_request_init(&pr, pool_get_done, &mem);
533 		pool_request(pp, &pr);
534 
535 		mtx_enter(&mem.mtx);
536 		while (mem.v == NULL)
537 			msleep(&mem, &mem.mtx, PSWP, pp->pr_wchan, 0);
538 		mtx_leave(&mem.mtx);
539 
540 		v = mem.v;
541 	}
542 
543 #ifdef MULTIPROCESSOR
544 good:
545 #endif
546 	if (ISSET(flags, PR_ZERO))
547 		memset(v, 0, pp->pr_size);
548 
549 	return (v);
550 
551 fail:
552 	pp->pr_nfail++;
553 	mtx_leave(&pp->pr_mtx);
554 	return (NULL);
555 }
556 
557 void
558 pool_get_done(void *xmem, void *v)
559 {
560 	struct pool_get_memory *mem = xmem;
561 
562 	mtx_enter(&mem->mtx);
563 	mem->v = v;
564 	mtx_leave(&mem->mtx);
565 
566 	wakeup_one(mem);
567 }
568 
569 void
570 pool_runqueue(struct pool *pp, int flags)
571 {
572 	struct pool_requests prl = TAILQ_HEAD_INITIALIZER(prl);
573 	struct pool_request *pr;
574 
575 	MUTEX_ASSERT_UNLOCKED(&pp->pr_mtx);
576 	MUTEX_ASSERT_LOCKED(&pp->pr_requests_mtx);
577 
578 	if (pp->pr_requesting++)
579 		return;
580 
581 	do {
582 		pp->pr_requesting = 1;
583 
584 		/* no TAILQ_JOIN? :( */
585 		while ((pr = TAILQ_FIRST(&pp->pr_requests)) != NULL) {
586 			TAILQ_REMOVE(&pp->pr_requests, pr, pr_entry);
587 			TAILQ_INSERT_TAIL(&prl, pr, pr_entry);
588 		}
589 		if (TAILQ_EMPTY(&prl))
590 			continue;
591 
592 		mtx_leave(&pp->pr_requests_mtx);
593 
594 		mtx_enter(&pp->pr_mtx);
595 		pr = TAILQ_FIRST(&prl);
596 		while (pr != NULL) {
597 			int slowdown = 0;
598 
599 			if (pp->pr_nout >= pp->pr_hardlimit)
600 				break;
601 
602 			pr->pr_item = pool_do_get(pp, flags, &slowdown);
603 			if (pr->pr_item == NULL) /* || slowdown ? */
604 				break;
605 
606 			pr = TAILQ_NEXT(pr, pr_entry);
607 		}
608 		mtx_leave(&pp->pr_mtx);
609 
610 		while ((pr = TAILQ_FIRST(&prl)) != NULL &&
611 		    pr->pr_item != NULL) {
612 			TAILQ_REMOVE(&prl, pr, pr_entry);
613 			(*pr->pr_handler)(pr->pr_cookie, pr->pr_item);
614 		}
615 
616 		mtx_enter(&pp->pr_requests_mtx);
617 	} while (--pp->pr_requesting);
618 
619 	/* no TAILQ_JOIN :( */
620 	while ((pr = TAILQ_FIRST(&prl)) != NULL) {
621 		TAILQ_REMOVE(&prl, pr, pr_entry);
622 		TAILQ_INSERT_TAIL(&pp->pr_requests, pr, pr_entry);
623 	}
624 }
625 
626 void *
627 pool_do_get(struct pool *pp, int flags, int *slowdown)
628 {
629 	struct pool_item *pi;
630 	struct pool_page_header *ph;
631 
632 	MUTEX_ASSERT_LOCKED(&pp->pr_mtx);
633 
634 	splassert(pp->pr_ipl);
635 
636 	/*
637 	 * Account for this item now to avoid races if we need to give up
638 	 * pr_mtx to allocate a page.
639 	 */
640 	pp->pr_nout++;
641 
642 	if (pp->pr_curpage == NULL) {
643 		mtx_leave(&pp->pr_mtx);
644 		ph = pool_p_alloc(pp, flags, slowdown);
645 		mtx_enter(&pp->pr_mtx);
646 
647 		if (ph == NULL) {
648 			pp->pr_nout--;
649 			return (NULL);
650 		}
651 
652 		pool_p_insert(pp, ph);
653 	}
654 
655 	ph = pp->pr_curpage;
656 	pi = XSIMPLEQ_FIRST(&ph->ph_items);
657 	if (__predict_false(pi == NULL))
658 		panic("%s: %s: page empty", __func__, pp->pr_wchan);
659 
660 	if (__predict_false(pi->pi_magic != POOL_IMAGIC(ph, pi))) {
661 		panic("%s: %s free list modified: "
662 		    "page %p; item addr %p; offset 0x%x=0x%lx != 0x%lx",
663 		    __func__, pp->pr_wchan, ph->ph_page, pi,
664 		    0, pi->pi_magic, POOL_IMAGIC(ph, pi));
665 	}
666 
667 	XSIMPLEQ_REMOVE_HEAD(&ph->ph_items, pi_list);
668 
669 #ifdef DIAGNOSTIC
670 	if (pool_debug && POOL_PHPOISON(ph)) {
671 		size_t pidx;
672 		uint32_t pval;
673 		if (poison_check(pi + 1, pp->pr_size - sizeof(*pi),
674 		    &pidx, &pval)) {
675 			int *ip = (int *)(pi + 1);
676 			panic("%s: %s free list modified: "
677 			    "page %p; item addr %p; offset 0x%zx=0x%x",
678 			    __func__, pp->pr_wchan, ph->ph_page, pi,
679 			    pidx * sizeof(int), ip[pidx]);
680 		}
681 	}
682 #endif /* DIAGNOSTIC */
683 
684 	if (ph->ph_nmissing++ == 0) {
685 		/*
686 		 * This page was previously empty.  Move it to the list of
687 		 * partially-full pages.  This page is already curpage.
688 		 */
689 		TAILQ_REMOVE(&pp->pr_emptypages, ph, ph_entry);
690 		TAILQ_INSERT_TAIL(&pp->pr_partpages, ph, ph_entry);
691 
692 		pp->pr_nidle--;
693 	}
694 
695 	if (ph->ph_nmissing == pp->pr_itemsperpage) {
696 		/*
697 		 * This page is now full.  Move it to the full list
698 		 * and select a new current page.
699 		 */
700 		TAILQ_REMOVE(&pp->pr_partpages, ph, ph_entry);
701 		TAILQ_INSERT_TAIL(&pp->pr_fullpages, ph, ph_entry);
702 		pool_update_curpage(pp);
703 	}
704 
705 	pp->pr_nget++;
706 
707 	return (pi);
708 }
709 
710 /*
711  * Return resource to the pool.
712  */
713 void
714 pool_put(struct pool *pp, void *v)
715 {
716 	struct pool_page_header *ph, *freeph = NULL;
717 
718 #ifdef DIAGNOSTIC
719 	if (v == NULL)
720 		panic("%s: NULL item", __func__);
721 #endif
722 
723 #ifdef MULTIPROCESSOR
724 	if (pp->pr_cache != NULL && TAILQ_EMPTY(&pp->pr_requests)) {
725 		pool_cache_put(pp, v);
726 		return;
727 	}
728 #endif
729 
730 	mtx_enter(&pp->pr_mtx);
731 
732 	pool_do_put(pp, v);
733 
734 	pp->pr_nout--;
735 	pp->pr_nput++;
736 
737 	/* is it time to free a page? */
738 	if (pp->pr_nidle > pp->pr_maxpages &&
739 	    (ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL &&
740 	    (ticks - ph->ph_tick) > (hz * pool_wait_free)) {
741 		freeph = ph;
742 		pool_p_remove(pp, freeph);
743 	}
744 
745 	mtx_leave(&pp->pr_mtx);
746 
747 	if (freeph != NULL)
748 		pool_p_free(pp, freeph);
749 
750 	if (!TAILQ_EMPTY(&pp->pr_requests)) {
751 		mtx_enter(&pp->pr_requests_mtx);
752 		pool_runqueue(pp, PR_NOWAIT);
753 		mtx_leave(&pp->pr_requests_mtx);
754 	}
755 }
756 
757 void
758 pool_do_put(struct pool *pp, void *v)
759 {
760 	struct pool_item *pi = v;
761 	struct pool_page_header *ph;
762 
763 	splassert(pp->pr_ipl);
764 
765 	ph = pr_find_pagehead(pp, v);
766 
767 #ifdef DIAGNOSTIC
768 	if (pool_debug) {
769 		struct pool_item *qi;
770 		XSIMPLEQ_FOREACH(qi, &ph->ph_items, pi_list) {
771 			if (pi == qi) {
772 				panic("%s: %s: double pool_put: %p", __func__,
773 				    pp->pr_wchan, pi);
774 			}
775 		}
776 	}
777 #endif /* DIAGNOSTIC */
778 
779 	pi->pi_magic = POOL_IMAGIC(ph, pi);
780 	XSIMPLEQ_INSERT_HEAD(&ph->ph_items, pi, pi_list);
781 #ifdef DIAGNOSTIC
782 	if (POOL_PHPOISON(ph))
783 		poison_mem(pi + 1, pp->pr_size - sizeof(*pi));
784 #endif /* DIAGNOSTIC */
785 
786 	if (ph->ph_nmissing-- == pp->pr_itemsperpage) {
787 		/*
788 		 * The page was previously completely full, move it to the
789 		 * partially-full list.
790 		 */
791 		TAILQ_REMOVE(&pp->pr_fullpages, ph, ph_entry);
792 		TAILQ_INSERT_TAIL(&pp->pr_partpages, ph, ph_entry);
793 	}
794 
795 	if (ph->ph_nmissing == 0) {
796 		/*
797 		 * The page is now empty, so move it to the empty page list.
798 		 */
799 		pp->pr_nidle++;
800 
801 		ph->ph_tick = ticks;
802 		TAILQ_REMOVE(&pp->pr_partpages, ph, ph_entry);
803 		TAILQ_INSERT_TAIL(&pp->pr_emptypages, ph, ph_entry);
804 		pool_update_curpage(pp);
805 	}
806 }
807 
808 /*
809  * Add N items to the pool.
810  */
811 int
812 pool_prime(struct pool *pp, int n)
813 {
814 	struct pool_pagelist pl = TAILQ_HEAD_INITIALIZER(pl);
815 	struct pool_page_header *ph;
816 	int newpages;
817 
818 	newpages = roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage;
819 
820 	while (newpages-- > 0) {
821 		int slowdown = 0;
822 
823 		ph = pool_p_alloc(pp, PR_NOWAIT, &slowdown);
824 		if (ph == NULL) /* or slowdown? */
825 			break;
826 
827 		TAILQ_INSERT_TAIL(&pl, ph, ph_entry);
828 	}
829 
830 	mtx_enter(&pp->pr_mtx);
831 	while ((ph = TAILQ_FIRST(&pl)) != NULL) {
832 		TAILQ_REMOVE(&pl, ph, ph_entry);
833 		pool_p_insert(pp, ph);
834 	}
835 	mtx_leave(&pp->pr_mtx);
836 
837 	return (0);
838 }
839 
840 struct pool_page_header *
841 pool_p_alloc(struct pool *pp, int flags, int *slowdown)
842 {
843 	struct pool_page_header *ph;
844 	struct pool_item *pi;
845 	caddr_t addr;
846 	int n;
847 
848 	MUTEX_ASSERT_UNLOCKED(&pp->pr_mtx);
849 	KASSERT(pp->pr_size >= sizeof(*pi));
850 
851 	addr = pool_allocator_alloc(pp, flags, slowdown);
852 	if (addr == NULL)
853 		return (NULL);
854 
855 	if (POOL_INPGHDR(pp))
856 		ph = (struct pool_page_header *)(addr + pp->pr_phoffset);
857 	else {
858 		ph = pool_get(&phpool, flags);
859 		if (ph == NULL) {
860 			pool_allocator_free(pp, addr);
861 			return (NULL);
862 		}
863 	}
864 
865 	XSIMPLEQ_INIT(&ph->ph_items);
866 	ph->ph_page = addr;
867 	addr += pp->pr_align * (pp->pr_npagealloc % pp->pr_maxcolors);
868 	ph->ph_colored = addr;
869 	ph->ph_nmissing = 0;
870 	arc4random_buf(&ph->ph_magic, sizeof(ph->ph_magic));
871 #ifdef DIAGNOSTIC
872 	/* use a bit in ph_magic to record if we poison page items */
873 	if (pool_debug)
874 		SET(ph->ph_magic, POOL_MAGICBIT);
875 	else
876 		CLR(ph->ph_magic, POOL_MAGICBIT);
877 #endif /* DIAGNOSTIC */
878 
879 	n = pp->pr_itemsperpage;
880 	while (n--) {
881 		pi = (struct pool_item *)addr;
882 		pi->pi_magic = POOL_IMAGIC(ph, pi);
883 		XSIMPLEQ_INSERT_TAIL(&ph->ph_items, pi, pi_list);
884 
885 #ifdef DIAGNOSTIC
886 		if (POOL_PHPOISON(ph))
887 			poison_mem(pi + 1, pp->pr_size - sizeof(*pi));
888 #endif /* DIAGNOSTIC */
889 
890 		addr += pp->pr_size;
891 	}
892 
893 	return (ph);
894 }
895 
896 void
897 pool_p_free(struct pool *pp, struct pool_page_header *ph)
898 {
899 	struct pool_item *pi;
900 
901 	MUTEX_ASSERT_UNLOCKED(&pp->pr_mtx);
902 	KASSERT(ph->ph_nmissing == 0);
903 
904 	XSIMPLEQ_FOREACH(pi, &ph->ph_items, pi_list) {
905 		if (__predict_false(pi->pi_magic != POOL_IMAGIC(ph, pi))) {
906 			panic("%s: %s free list modified: "
907 			    "page %p; item addr %p; offset 0x%x=0x%lx",
908 			    __func__, pp->pr_wchan, ph->ph_page, pi,
909 			    0, pi->pi_magic);
910 		}
911 
912 #ifdef DIAGNOSTIC
913 		if (POOL_PHPOISON(ph)) {
914 			size_t pidx;
915 			uint32_t pval;
916 			if (poison_check(pi + 1, pp->pr_size - sizeof(*pi),
917 			    &pidx, &pval)) {
918 				int *ip = (int *)(pi + 1);
919 				panic("%s: %s free list modified: "
920 				    "page %p; item addr %p; offset 0x%zx=0x%x",
921 				    __func__, pp->pr_wchan, ph->ph_page, pi,
922 				    pidx * sizeof(int), ip[pidx]);
923 			}
924 		}
925 #endif
926 	}
927 
928 	pool_allocator_free(pp, ph->ph_page);
929 
930 	if (!POOL_INPGHDR(pp))
931 		pool_put(&phpool, ph);
932 }
933 
934 void
935 pool_p_insert(struct pool *pp, struct pool_page_header *ph)
936 {
937 	MUTEX_ASSERT_LOCKED(&pp->pr_mtx);
938 
939 	/* If the pool was depleted, point at the new page */
940 	if (pp->pr_curpage == NULL)
941 		pp->pr_curpage = ph;
942 
943 	TAILQ_INSERT_TAIL(&pp->pr_emptypages, ph, ph_entry);
944 	if (!POOL_INPGHDR(pp))
945 		RBT_INSERT(phtree, &pp->pr_phtree, ph);
946 
947 	pp->pr_nitems += pp->pr_itemsperpage;
948 	pp->pr_nidle++;
949 
950 	pp->pr_npagealloc++;
951 	if (++pp->pr_npages > pp->pr_hiwat)
952 		pp->pr_hiwat = pp->pr_npages;
953 }
954 
955 void
956 pool_p_remove(struct pool *pp, struct pool_page_header *ph)
957 {
958 	MUTEX_ASSERT_LOCKED(&pp->pr_mtx);
959 
960 	pp->pr_npagefree++;
961 	pp->pr_npages--;
962 	pp->pr_nidle--;
963 	pp->pr_nitems -= pp->pr_itemsperpage;
964 
965 	if (!POOL_INPGHDR(pp))
966 		RBT_REMOVE(phtree, &pp->pr_phtree, ph);
967 	TAILQ_REMOVE(&pp->pr_emptypages, ph, ph_entry);
968 
969 	pool_update_curpage(pp);
970 }
971 
972 void
973 pool_update_curpage(struct pool *pp)
974 {
975 	pp->pr_curpage = TAILQ_LAST(&pp->pr_partpages, pool_pagelist);
976 	if (pp->pr_curpage == NULL) {
977 		pp->pr_curpage = TAILQ_LAST(&pp->pr_emptypages, pool_pagelist);
978 	}
979 }
980 
981 void
982 pool_setlowat(struct pool *pp, int n)
983 {
984 	int prime = 0;
985 
986 	mtx_enter(&pp->pr_mtx);
987 	pp->pr_minitems = n;
988 	pp->pr_minpages = (n == 0)
989 		? 0
990 		: roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage;
991 
992 	if (pp->pr_nitems < n)
993 		prime = n - pp->pr_nitems;
994 	mtx_leave(&pp->pr_mtx);
995 
996 	if (prime > 0)
997 		pool_prime(pp, prime);
998 }
999 
1000 void
1001 pool_sethiwat(struct pool *pp, int n)
1002 {
1003 	pp->pr_maxpages = (n == 0)
1004 		? 0
1005 		: roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage;
1006 }
1007 
1008 int
1009 pool_sethardlimit(struct pool *pp, u_int n, const char *warnmsg, int ratecap)
1010 {
1011 	int error = 0;
1012 
1013 	if (n < pp->pr_nout) {
1014 		error = EINVAL;
1015 		goto done;
1016 	}
1017 
1018 	pp->pr_hardlimit = n;
1019 	pp->pr_hardlimit_warning = warnmsg;
1020 	pp->pr_hardlimit_ratecap.tv_sec = ratecap;
1021 	pp->pr_hardlimit_warning_last.tv_sec = 0;
1022 	pp->pr_hardlimit_warning_last.tv_usec = 0;
1023 
1024 done:
1025 	return (error);
1026 }
1027 
1028 void
1029 pool_set_constraints(struct pool *pp, const struct kmem_pa_mode *mode)
1030 {
1031 	pp->pr_crange = mode;
1032 }
1033 
1034 /*
1035  * Release all complete pages that have not been used recently.
1036  *
1037  * Returns non-zero if any pages have been reclaimed.
1038  */
1039 int
1040 pool_reclaim(struct pool *pp)
1041 {
1042 	struct pool_page_header *ph, *phnext;
1043 	struct pool_pagelist pl = TAILQ_HEAD_INITIALIZER(pl);
1044 
1045 	mtx_enter(&pp->pr_mtx);
1046 	for (ph = TAILQ_FIRST(&pp->pr_emptypages); ph != NULL; ph = phnext) {
1047 		phnext = TAILQ_NEXT(ph, ph_entry);
1048 
1049 		/* Check our minimum page claim */
1050 		if (pp->pr_npages <= pp->pr_minpages)
1051 			break;
1052 
1053 		/*
1054 		 * If freeing this page would put us below
1055 		 * the low water mark, stop now.
1056 		 */
1057 		if ((pp->pr_nitems - pp->pr_itemsperpage) <
1058 		    pp->pr_minitems)
1059 			break;
1060 
1061 		pool_p_remove(pp, ph);
1062 		TAILQ_INSERT_TAIL(&pl, ph, ph_entry);
1063 	}
1064 	mtx_leave(&pp->pr_mtx);
1065 
1066 	if (TAILQ_EMPTY(&pl))
1067 		return (0);
1068 
1069 	while ((ph = TAILQ_FIRST(&pl)) != NULL) {
1070 		TAILQ_REMOVE(&pl, ph, ph_entry);
1071 		pool_p_free(pp, ph);
1072 	}
1073 
1074 	return (1);
1075 }
1076 
1077 /*
1078  * Release all complete pages that have not been used recently
1079  * from all pools.
1080  */
1081 void
1082 pool_reclaim_all(void)
1083 {
1084 	struct pool	*pp;
1085 
1086 	rw_enter_read(&pool_lock);
1087 	SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist)
1088 		pool_reclaim(pp);
1089 	rw_exit_read(&pool_lock);
1090 }
1091 
1092 #ifdef DDB
1093 #include <machine/db_machdep.h>
1094 #include <ddb/db_output.h>
1095 
1096 /*
1097  * Diagnostic helpers.
1098  */
1099 void
1100 pool_printit(struct pool *pp, const char *modif,
1101     int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2))))
1102 {
1103 	pool_print1(pp, modif, pr);
1104 }
1105 
1106 void
1107 pool_print_pagelist(struct pool_pagelist *pl,
1108     int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2))))
1109 {
1110 	struct pool_page_header *ph;
1111 	struct pool_item *pi;
1112 
1113 	TAILQ_FOREACH(ph, pl, ph_entry) {
1114 		(*pr)("\t\tpage %p, color %p, nmissing %d\n",
1115 		    ph->ph_page, ph->ph_colored, ph->ph_nmissing);
1116 		XSIMPLEQ_FOREACH(pi, &ph->ph_items, pi_list) {
1117 			if (pi->pi_magic != POOL_IMAGIC(ph, pi)) {
1118 				(*pr)("\t\t\titem %p, magic 0x%lx\n",
1119 				    pi, pi->pi_magic);
1120 			}
1121 		}
1122 	}
1123 }
1124 
1125 void
1126 pool_print1(struct pool *pp, const char *modif,
1127     int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2))))
1128 {
1129 	struct pool_page_header *ph;
1130 	int print_pagelist = 0;
1131 	char c;
1132 
1133 	while ((c = *modif++) != '\0') {
1134 		if (c == 'p')
1135 			print_pagelist = 1;
1136 		modif++;
1137 	}
1138 
1139 	(*pr)("POOL %s: size %u maxcolors %u\n", pp->pr_wchan, pp->pr_size,
1140 	    pp->pr_maxcolors);
1141 	(*pr)("\talloc %p\n", pp->pr_alloc);
1142 	(*pr)("\tminitems %u, minpages %u, maxpages %u, npages %u\n",
1143 	    pp->pr_minitems, pp->pr_minpages, pp->pr_maxpages, pp->pr_npages);
1144 	(*pr)("\titemsperpage %u, nitems %u, nout %u, hardlimit %u\n",
1145 	    pp->pr_itemsperpage, pp->pr_nitems, pp->pr_nout, pp->pr_hardlimit);
1146 
1147 	(*pr)("\n\tnget %lu, nfail %lu, nput %lu\n",
1148 	    pp->pr_nget, pp->pr_nfail, pp->pr_nput);
1149 	(*pr)("\tnpagealloc %lu, npagefree %lu, hiwat %u, nidle %lu\n",
1150 	    pp->pr_npagealloc, pp->pr_npagefree, pp->pr_hiwat, pp->pr_nidle);
1151 
1152 	if (print_pagelist == 0)
1153 		return;
1154 
1155 	if ((ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL)
1156 		(*pr)("\n\tempty page list:\n");
1157 	pool_print_pagelist(&pp->pr_emptypages, pr);
1158 	if ((ph = TAILQ_FIRST(&pp->pr_fullpages)) != NULL)
1159 		(*pr)("\n\tfull page list:\n");
1160 	pool_print_pagelist(&pp->pr_fullpages, pr);
1161 	if ((ph = TAILQ_FIRST(&pp->pr_partpages)) != NULL)
1162 		(*pr)("\n\tpartial-page list:\n");
1163 	pool_print_pagelist(&pp->pr_partpages, pr);
1164 
1165 	if (pp->pr_curpage == NULL)
1166 		(*pr)("\tno current page\n");
1167 	else
1168 		(*pr)("\tcurpage %p\n", pp->pr_curpage->ph_page);
1169 }
1170 
1171 void
1172 db_show_all_pools(db_expr_t expr, int haddr, db_expr_t count, char *modif)
1173 {
1174 	struct pool *pp;
1175 	char maxp[16];
1176 	int ovflw;
1177 	char mode;
1178 
1179 	mode = modif[0];
1180 	if (mode != '\0' && mode != 'a') {
1181 		db_printf("usage: show all pools [/a]\n");
1182 		return;
1183 	}
1184 
1185 	if (mode == '\0')
1186 		db_printf("%-10s%4s%9s%5s%9s%6s%6s%6s%6s%6s%6s%5s\n",
1187 		    "Name",
1188 		    "Size",
1189 		    "Requests",
1190 		    "Fail",
1191 		    "Releases",
1192 		    "Pgreq",
1193 		    "Pgrel",
1194 		    "Npage",
1195 		    "Hiwat",
1196 		    "Minpg",
1197 		    "Maxpg",
1198 		    "Idle");
1199 	else
1200 		db_printf("%-12s %18s %18s\n",
1201 		    "Name", "Address", "Allocator");
1202 
1203 	SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist) {
1204 		if (mode == 'a') {
1205 			db_printf("%-12s %18p %18p\n", pp->pr_wchan, pp,
1206 			    pp->pr_alloc);
1207 			continue;
1208 		}
1209 
1210 		if (!pp->pr_nget)
1211 			continue;
1212 
1213 		if (pp->pr_maxpages == UINT_MAX)
1214 			snprintf(maxp, sizeof maxp, "inf");
1215 		else
1216 			snprintf(maxp, sizeof maxp, "%u", pp->pr_maxpages);
1217 
1218 #define PRWORD(ovflw, fmt, width, fixed, val) do {	\
1219 	(ovflw) += db_printf((fmt),			\
1220 	    (width) - (fixed) - (ovflw) > 0 ?		\
1221 	    (width) - (fixed) - (ovflw) : 0,		\
1222 	    (val)) - (width);				\
1223 	if ((ovflw) < 0)				\
1224 		(ovflw) = 0;				\
1225 } while (/* CONSTCOND */0)
1226 
1227 		ovflw = 0;
1228 		PRWORD(ovflw, "%-*s", 10, 0, pp->pr_wchan);
1229 		PRWORD(ovflw, " %*u", 4, 1, pp->pr_size);
1230 		PRWORD(ovflw, " %*lu", 9, 1, pp->pr_nget);
1231 		PRWORD(ovflw, " %*lu", 5, 1, pp->pr_nfail);
1232 		PRWORD(ovflw, " %*lu", 9, 1, pp->pr_nput);
1233 		PRWORD(ovflw, " %*lu", 6, 1, pp->pr_npagealloc);
1234 		PRWORD(ovflw, " %*lu", 6, 1, pp->pr_npagefree);
1235 		PRWORD(ovflw, " %*d", 6, 1, pp->pr_npages);
1236 		PRWORD(ovflw, " %*d", 6, 1, pp->pr_hiwat);
1237 		PRWORD(ovflw, " %*d", 6, 1, pp->pr_minpages);
1238 		PRWORD(ovflw, " %*s", 6, 1, maxp);
1239 		PRWORD(ovflw, " %*lu\n", 5, 1, pp->pr_nidle);
1240 
1241 		pool_chk(pp);
1242 	}
1243 }
1244 #endif /* DDB */
1245 
1246 #if defined(POOL_DEBUG) || defined(DDB)
1247 int
1248 pool_chk_page(struct pool *pp, struct pool_page_header *ph, int expected)
1249 {
1250 	struct pool_item *pi;
1251 	caddr_t page;
1252 	int n;
1253 	const char *label = pp->pr_wchan;
1254 
1255 	page = (caddr_t)((u_long)ph & pp->pr_pgmask);
1256 	if (page != ph->ph_page && POOL_INPGHDR(pp)) {
1257 		printf("%s: ", label);
1258 		printf("pool(%p:%s): page inconsistency: page %p; "
1259 		    "at page head addr %p (p %p)\n",
1260 		    pp, pp->pr_wchan, ph->ph_page, ph, page);
1261 		return 1;
1262 	}
1263 
1264 	for (pi = XSIMPLEQ_FIRST(&ph->ph_items), n = 0;
1265 	     pi != NULL;
1266 	     pi = XSIMPLEQ_NEXT(&ph->ph_items, pi, pi_list), n++) {
1267 		if ((caddr_t)pi < ph->ph_page ||
1268 		    (caddr_t)pi >= ph->ph_page + pp->pr_pgsize) {
1269 			printf("%s: ", label);
1270 			printf("pool(%p:%s): page inconsistency: page %p;"
1271 			    " item ordinal %d; addr %p\n", pp,
1272 			    pp->pr_wchan, ph->ph_page, n, pi);
1273 			return (1);
1274 		}
1275 
1276 		if (pi->pi_magic != POOL_IMAGIC(ph, pi)) {
1277 			printf("%s: ", label);
1278 			printf("pool(%p:%s): free list modified: "
1279 			    "page %p; item ordinal %d; addr %p "
1280 			    "(p %p); offset 0x%x=0x%lx\n",
1281 			    pp, pp->pr_wchan, ph->ph_page, n, pi, page,
1282 			    0, pi->pi_magic);
1283 		}
1284 
1285 #ifdef DIAGNOSTIC
1286 		if (POOL_PHPOISON(ph)) {
1287 			size_t pidx;
1288 			uint32_t pval;
1289 			if (poison_check(pi + 1, pp->pr_size - sizeof(*pi),
1290 			    &pidx, &pval)) {
1291 				int *ip = (int *)(pi + 1);
1292 				printf("pool(%s): free list modified: "
1293 				    "page %p; item ordinal %d; addr %p "
1294 				    "(p %p); offset 0x%zx=0x%x\n",
1295 				    pp->pr_wchan, ph->ph_page, n, pi,
1296 				    page, pidx * sizeof(int), ip[pidx]);
1297 			}
1298 		}
1299 #endif /* DIAGNOSTIC */
1300 	}
1301 	if (n + ph->ph_nmissing != pp->pr_itemsperpage) {
1302 		printf("pool(%p:%s): page inconsistency: page %p;"
1303 		    " %d on list, %d missing, %d items per page\n", pp,
1304 		    pp->pr_wchan, ph->ph_page, n, ph->ph_nmissing,
1305 		    pp->pr_itemsperpage);
1306 		return 1;
1307 	}
1308 	if (expected >= 0 && n != expected) {
1309 		printf("pool(%p:%s): page inconsistency: page %p;"
1310 		    " %d on list, %d missing, %d expected\n", pp,
1311 		    pp->pr_wchan, ph->ph_page, n, ph->ph_nmissing,
1312 		    expected);
1313 		return 1;
1314 	}
1315 	return 0;
1316 }
1317 
1318 int
1319 pool_chk(struct pool *pp)
1320 {
1321 	struct pool_page_header *ph;
1322 	int r = 0;
1323 
1324 	TAILQ_FOREACH(ph, &pp->pr_emptypages, ph_entry)
1325 		r += pool_chk_page(pp, ph, pp->pr_itemsperpage);
1326 	TAILQ_FOREACH(ph, &pp->pr_fullpages, ph_entry)
1327 		r += pool_chk_page(pp, ph, 0);
1328 	TAILQ_FOREACH(ph, &pp->pr_partpages, ph_entry)
1329 		r += pool_chk_page(pp, ph, -1);
1330 
1331 	return (r);
1332 }
1333 #endif /* defined(POOL_DEBUG) || defined(DDB) */
1334 
1335 #ifdef DDB
1336 void
1337 pool_walk(struct pool *pp, int full,
1338     int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2))),
1339     void (*func)(void *, int, int (*)(const char *, ...)
1340 	    __attribute__((__format__(__kprintf__,1,2)))))
1341 {
1342 	struct pool_page_header *ph;
1343 	struct pool_item *pi;
1344 	caddr_t cp;
1345 	int n;
1346 
1347 	TAILQ_FOREACH(ph, &pp->pr_fullpages, ph_entry) {
1348 		cp = ph->ph_colored;
1349 		n = ph->ph_nmissing;
1350 
1351 		while (n--) {
1352 			func(cp, full, pr);
1353 			cp += pp->pr_size;
1354 		}
1355 	}
1356 
1357 	TAILQ_FOREACH(ph, &pp->pr_partpages, ph_entry) {
1358 		cp = ph->ph_colored;
1359 		n = ph->ph_nmissing;
1360 
1361 		do {
1362 			XSIMPLEQ_FOREACH(pi, &ph->ph_items, pi_list) {
1363 				if (cp == (caddr_t)pi)
1364 					break;
1365 			}
1366 			if (cp != (caddr_t)pi) {
1367 				func(cp, full, pr);
1368 				n--;
1369 			}
1370 
1371 			cp += pp->pr_size;
1372 		} while (n > 0);
1373 	}
1374 }
1375 #endif
1376 
1377 /*
1378  * We have three different sysctls.
1379  * kern.pool.npools - the number of pools.
1380  * kern.pool.pool.<pool#> - the pool struct for the pool#.
1381  * kern.pool.name.<pool#> - the name for pool#.
1382  */
1383 int
1384 sysctl_dopool(int *name, u_int namelen, char *oldp, size_t *oldlenp)
1385 {
1386 	struct kinfo_pool pi;
1387 	struct pool *pp;
1388 	int rv = ENOENT;
1389 
1390 	switch (name[0]) {
1391 	case KERN_POOL_NPOOLS:
1392 		if (namelen != 1)
1393 			return (ENOTDIR);
1394 		return (sysctl_rdint(oldp, oldlenp, NULL, pool_count));
1395 
1396 	case KERN_POOL_NAME:
1397 	case KERN_POOL_POOL:
1398 	case KERN_POOL_CACHE:
1399 	case KERN_POOL_CACHE_CPUS:
1400 		break;
1401 	default:
1402 		return (EOPNOTSUPP);
1403 	}
1404 
1405 	if (namelen != 2)
1406 		return (ENOTDIR);
1407 
1408 	rw_enter_read(&pool_lock);
1409 
1410 	SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist) {
1411 		if (name[1] == pp->pr_serial)
1412 			break;
1413 	}
1414 
1415 	if (pp == NULL)
1416 		goto done;
1417 
1418 	switch (name[0]) {
1419 	case KERN_POOL_NAME:
1420 		rv = sysctl_rdstring(oldp, oldlenp, NULL, pp->pr_wchan);
1421 		break;
1422 	case KERN_POOL_POOL:
1423 		memset(&pi, 0, sizeof(pi));
1424 
1425 		mtx_enter(&pp->pr_mtx);
1426 		pi.pr_size = pp->pr_size;
1427 		pi.pr_pgsize = pp->pr_pgsize;
1428 		pi.pr_itemsperpage = pp->pr_itemsperpage;
1429 		pi.pr_npages = pp->pr_npages;
1430 		pi.pr_minpages = pp->pr_minpages;
1431 		pi.pr_maxpages = pp->pr_maxpages;
1432 		pi.pr_hardlimit = pp->pr_hardlimit;
1433 		pi.pr_nout = pp->pr_nout;
1434 		pi.pr_nitems = pp->pr_nitems;
1435 		pi.pr_nget = pp->pr_nget;
1436 		pi.pr_nput = pp->pr_nput;
1437 		pi.pr_nfail = pp->pr_nfail;
1438 		pi.pr_npagealloc = pp->pr_npagealloc;
1439 		pi.pr_npagefree = pp->pr_npagefree;
1440 		pi.pr_hiwat = pp->pr_hiwat;
1441 		pi.pr_nidle = pp->pr_nidle;
1442 		mtx_leave(&pp->pr_mtx);
1443 
1444 		pool_cache_pool_info(pp, &pi);
1445 
1446 		rv = sysctl_rdstruct(oldp, oldlenp, NULL, &pi, sizeof(pi));
1447 		break;
1448 
1449 	case KERN_POOL_CACHE:
1450 		rv = pool_cache_info(pp, oldp, oldlenp);
1451 		break;
1452 
1453 	case KERN_POOL_CACHE_CPUS:
1454 		rv = pool_cache_cpus_info(pp, oldp, oldlenp);
1455 		break;
1456 	}
1457 
1458 done:
1459 	rw_exit_read(&pool_lock);
1460 
1461 	return (rv);
1462 }
1463 
1464 void
1465 pool_gc_sched(void *null)
1466 {
1467 	task_add(systqmp, &pool_gc_task);
1468 }
1469 
1470 void
1471 pool_gc_pages(void *null)
1472 {
1473 	struct pool *pp;
1474 	struct pool_page_header *ph, *freeph;
1475 	int s;
1476 
1477 	rw_enter_read(&pool_lock);
1478 	s = splvm(); /* XXX go to splvm until all pools _setipl properly */
1479 	SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist) {
1480 #ifdef MULTIPROCESSOR
1481 		if (pp->pr_cache != NULL)
1482 			pool_cache_gc(pp);
1483 #endif
1484 
1485 		if (pp->pr_nidle <= pp->pr_minpages || /* guess */
1486 		    !mtx_enter_try(&pp->pr_mtx)) /* try */
1487 			continue;
1488 
1489 		/* is it time to free a page? */
1490 		if (pp->pr_nidle > pp->pr_minpages &&
1491 		    (ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL &&
1492 		    (ticks - ph->ph_tick) > (hz * pool_wait_gc)) {
1493 			freeph = ph;
1494 			pool_p_remove(pp, freeph);
1495 		} else
1496 			freeph = NULL;
1497 
1498 		mtx_leave(&pp->pr_mtx);
1499 
1500 		if (freeph != NULL)
1501 			pool_p_free(pp, freeph);
1502 	}
1503 	splx(s);
1504 	rw_exit_read(&pool_lock);
1505 
1506 	timeout_add_sec(&pool_gc_tick, 1);
1507 }
1508 
1509 /*
1510  * Pool backend allocators.
1511  */
1512 
1513 void *
1514 pool_allocator_alloc(struct pool *pp, int flags, int *slowdown)
1515 {
1516 	void *v;
1517 
1518 	v = (*pp->pr_alloc->pa_alloc)(pp, flags, slowdown);
1519 
1520 #ifdef DIAGNOSTIC
1521 	if (v != NULL && POOL_INPGHDR(pp)) {
1522 		vaddr_t addr = (vaddr_t)v;
1523 		if ((addr & pp->pr_pgmask) != addr) {
1524 			panic("%s: %s page address %p isnt aligned to %u",
1525 			    __func__, pp->pr_wchan, v, pp->pr_pgsize);
1526 		}
1527 	}
1528 #endif
1529 
1530 	return (v);
1531 }
1532 
1533 void
1534 pool_allocator_free(struct pool *pp, void *v)
1535 {
1536 	struct pool_allocator *pa = pp->pr_alloc;
1537 
1538 	(*pa->pa_free)(pp, v);
1539 }
1540 
1541 void *
1542 pool_page_alloc(struct pool *pp, int flags, int *slowdown)
1543 {
1544 	struct kmem_dyn_mode kd = KMEM_DYN_INITIALIZER;
1545 
1546 	kd.kd_waitok = ISSET(flags, PR_WAITOK);
1547 	kd.kd_slowdown = slowdown;
1548 
1549 	return (km_alloc(pp->pr_pgsize, &kv_page, pp->pr_crange, &kd));
1550 }
1551 
1552 void
1553 pool_page_free(struct pool *pp, void *v)
1554 {
1555 	km_free(v, pp->pr_pgsize, &kv_page, pp->pr_crange);
1556 }
1557 
1558 void *
1559 pool_multi_alloc(struct pool *pp, int flags, int *slowdown)
1560 {
1561 	struct kmem_va_mode kv = kv_intrsafe;
1562 	struct kmem_dyn_mode kd = KMEM_DYN_INITIALIZER;
1563 	void *v;
1564 	int s;
1565 
1566 	if (POOL_INPGHDR(pp))
1567 		kv.kv_align = pp->pr_pgsize;
1568 
1569 	kd.kd_waitok = ISSET(flags, PR_WAITOK);
1570 	kd.kd_slowdown = slowdown;
1571 
1572 	s = splvm();
1573 	v = km_alloc(pp->pr_pgsize, &kv, pp->pr_crange, &kd);
1574 	splx(s);
1575 
1576 	return (v);
1577 }
1578 
1579 void
1580 pool_multi_free(struct pool *pp, void *v)
1581 {
1582 	struct kmem_va_mode kv = kv_intrsafe;
1583 	int s;
1584 
1585 	if (POOL_INPGHDR(pp))
1586 		kv.kv_align = pp->pr_pgsize;
1587 
1588 	s = splvm();
1589 	km_free(v, pp->pr_pgsize, &kv, pp->pr_crange);
1590 	splx(s);
1591 }
1592 
1593 void *
1594 pool_multi_alloc_ni(struct pool *pp, int flags, int *slowdown)
1595 {
1596 	struct kmem_va_mode kv = kv_any;
1597 	struct kmem_dyn_mode kd = KMEM_DYN_INITIALIZER;
1598 	void *v;
1599 
1600 	if (POOL_INPGHDR(pp))
1601 		kv.kv_align = pp->pr_pgsize;
1602 
1603 	kd.kd_waitok = ISSET(flags, PR_WAITOK);
1604 	kd.kd_slowdown = slowdown;
1605 
1606 	KERNEL_LOCK();
1607 	v = km_alloc(pp->pr_pgsize, &kv, pp->pr_crange, &kd);
1608 	KERNEL_UNLOCK();
1609 
1610 	return (v);
1611 }
1612 
1613 void
1614 pool_multi_free_ni(struct pool *pp, void *v)
1615 {
1616 	struct kmem_va_mode kv = kv_any;
1617 
1618 	if (POOL_INPGHDR(pp))
1619 		kv.kv_align = pp->pr_pgsize;
1620 
1621 	KERNEL_LOCK();
1622 	km_free(v, pp->pr_pgsize, &kv, pp->pr_crange);
1623 	KERNEL_UNLOCK();
1624 }
1625 
1626 #ifdef MULTIPROCESSOR
1627 
1628 struct pool pool_caches; /* per cpu cache entries */
1629 
1630 void
1631 pool_cache_init(struct pool *pp)
1632 {
1633 	struct cpumem *cm;
1634 	struct pool_cache *pc;
1635 	struct cpumem_iter i;
1636 
1637 	if (pool_caches.pr_size == 0) {
1638 		pool_init(&pool_caches, sizeof(struct pool_cache),
1639 		    CACHELINESIZE, IPL_NONE, PR_WAITOK, "plcache", NULL);
1640 	}
1641 
1642 	/* must be able to use the pool items as cache list items */
1643 	KASSERT(pp->pr_size >= sizeof(struct pool_cache_item));
1644 
1645 	cm = cpumem_get(&pool_caches);
1646 
1647 	mtx_init(&pp->pr_cache_mtx, pp->pr_ipl);
1648 	arc4random_buf(pp->pr_cache_magic, sizeof(pp->pr_cache_magic));
1649 	TAILQ_INIT(&pp->pr_cache_lists);
1650 	pp->pr_cache_nitems = 0;
1651 	pp->pr_cache_tick = ticks;
1652 	pp->pr_cache_items = 8;
1653 	pp->pr_cache_contention = 0;
1654 	pp->pr_cache_ngc = 0;
1655 
1656 	CPUMEM_FOREACH(pc, &i, cm) {
1657 		pc->pc_actv = NULL;
1658 		pc->pc_nactv = 0;
1659 		pc->pc_prev = NULL;
1660 
1661 		pc->pc_nget = 0;
1662 		pc->pc_nfail = 0;
1663 		pc->pc_nput = 0;
1664 		pc->pc_nlget = 0;
1665 		pc->pc_nlfail = 0;
1666 		pc->pc_nlput = 0;
1667 		pc->pc_nout = 0;
1668 	}
1669 
1670 	membar_producer();
1671 
1672 	pp->pr_cache = cm;
1673 }
1674 
1675 static inline void
1676 pool_cache_item_magic(struct pool *pp, struct pool_cache_item *ci)
1677 {
1678 	unsigned long *entry = (unsigned long *)&ci->ci_nextl;
1679 
1680 	entry[0] = pp->pr_cache_magic[0] ^ (u_long)ci;
1681 	entry[1] = pp->pr_cache_magic[1] ^ (u_long)ci->ci_next;
1682 }
1683 
1684 static inline void
1685 pool_cache_item_magic_check(struct pool *pp, struct pool_cache_item *ci)
1686 {
1687 	unsigned long *entry;
1688 	unsigned long val;
1689 
1690 	entry = (unsigned long *)&ci->ci_nextl;
1691 	val = pp->pr_cache_magic[0] ^ (u_long)ci;
1692 	if (*entry != val)
1693 		goto fail;
1694 
1695 	entry++;
1696 	val = pp->pr_cache_magic[1] ^ (u_long)ci->ci_next;
1697 	if (*entry != val)
1698 		goto fail;
1699 
1700 	return;
1701 
1702 fail:
1703 	panic("%s: %s cpu free list modified: item addr %p+%zu 0x%lx!=0x%lx",
1704 	    __func__, pp->pr_wchan, ci, (caddr_t)entry - (caddr_t)ci,
1705 	    *entry, val);
1706 }
1707 
1708 static inline void
1709 pool_list_enter(struct pool *pp)
1710 {
1711 	if (mtx_enter_try(&pp->pr_cache_mtx) == 0) {
1712 		mtx_enter(&pp->pr_cache_mtx);
1713 		pp->pr_cache_contention++;
1714 	}
1715 }
1716 
1717 static inline void
1718 pool_list_leave(struct pool *pp)
1719 {
1720 	mtx_leave(&pp->pr_cache_mtx);
1721 }
1722 
1723 static inline struct pool_cache_item *
1724 pool_cache_list_alloc(struct pool *pp, struct pool_cache *pc)
1725 {
1726 	struct pool_cache_item *pl;
1727 
1728 	pool_list_enter(pp);
1729 	pl = TAILQ_FIRST(&pp->pr_cache_lists);
1730 	if (pl != NULL) {
1731 		TAILQ_REMOVE(&pp->pr_cache_lists, pl, ci_nextl);
1732 		pp->pr_cache_nitems -= POOL_CACHE_ITEM_NITEMS(pl);
1733 
1734 		pool_cache_item_magic(pp, pl);
1735 
1736 		pc->pc_nlget++;
1737 	} else
1738 		pc->pc_nlfail++;
1739 
1740 	/* fold this cpus nout into the global while we have the lock */
1741 	pp->pr_cache_nout += pc->pc_nout;
1742 	pc->pc_nout = 0;
1743 	pool_list_leave(pp);
1744 
1745 	return (pl);
1746 }
1747 
1748 static inline void
1749 pool_cache_list_free(struct pool *pp, struct pool_cache *pc,
1750     struct pool_cache_item *ci)
1751 {
1752 	pool_list_enter(pp);
1753 	if (TAILQ_EMPTY(&pp->pr_cache_lists))
1754 		pp->pr_cache_tick = ticks;
1755 
1756 	pp->pr_cache_nitems += POOL_CACHE_ITEM_NITEMS(ci);
1757 	TAILQ_INSERT_TAIL(&pp->pr_cache_lists, ci, ci_nextl);
1758 
1759 	pc->pc_nlput++;
1760 
1761 	/* fold this cpus nout into the global while we have the lock */
1762 	pp->pr_cache_nout += pc->pc_nout;
1763 	pc->pc_nout = 0;
1764 	pool_list_leave(pp);
1765 }
1766 
1767 static inline struct pool_cache *
1768 pool_cache_enter(struct pool *pp, int *s)
1769 {
1770 	struct pool_cache *pc;
1771 
1772 	pc = cpumem_enter(pp->pr_cache);
1773 	*s = splraise(pp->pr_ipl);
1774 	pc->pc_gen++;
1775 
1776 	return (pc);
1777 }
1778 
1779 static inline void
1780 pool_cache_leave(struct pool *pp, struct pool_cache *pc, int s)
1781 {
1782 	pc->pc_gen++;
1783 	splx(s);
1784 	cpumem_leave(pp->pr_cache, pc);
1785 }
1786 
1787 void *
1788 pool_cache_get(struct pool *pp)
1789 {
1790 	struct pool_cache *pc;
1791 	struct pool_cache_item *ci;
1792 	int s;
1793 
1794 	pc = pool_cache_enter(pp, &s);
1795 
1796 	if (pc->pc_actv != NULL) {
1797 		ci = pc->pc_actv;
1798 	} else if (pc->pc_prev != NULL) {
1799 		ci = pc->pc_prev;
1800 		pc->pc_prev = NULL;
1801 	} else if ((ci = pool_cache_list_alloc(pp, pc)) == NULL) {
1802 		pc->pc_nfail++;
1803 		goto done;
1804 	}
1805 
1806 	pool_cache_item_magic_check(pp, ci);
1807 #ifdef DIAGNOSTIC
1808 	if (pool_debug && POOL_CACHE_ITEM_POISONED(ci)) {
1809 		size_t pidx;
1810 		uint32_t pval;
1811 
1812 		if (poison_check(ci + 1, pp->pr_size - sizeof(*ci),
1813 		    &pidx, &pval)) {
1814 			int *ip = (int *)(ci + 1);
1815 			ip += pidx;
1816 
1817 			panic("%s: %s cpu free list modified: "
1818 			    "item addr %p+%zu 0x%x!=0x%x",
1819 			    __func__, pp->pr_wchan, ci,
1820 			    (caddr_t)ip - (caddr_t)ci, *ip, pval);
1821 		}
1822 	}
1823 #endif
1824 
1825 	pc->pc_actv = ci->ci_next;
1826 	pc->pc_nactv = POOL_CACHE_ITEM_NITEMS(ci) - 1;
1827 	pc->pc_nget++;
1828 	pc->pc_nout++;
1829 
1830 done:
1831 	pool_cache_leave(pp, pc, s);
1832 
1833 	return (ci);
1834 }
1835 
1836 void
1837 pool_cache_put(struct pool *pp, void *v)
1838 {
1839 	struct pool_cache *pc;
1840 	struct pool_cache_item *ci = v;
1841 	unsigned long nitems;
1842 	int s;
1843 #ifdef DIAGNOSTIC
1844 	int poison = pool_debug && pp->pr_size > sizeof(*ci);
1845 
1846 	if (poison)
1847 		poison_mem(ci + 1, pp->pr_size - sizeof(*ci));
1848 #endif
1849 
1850 	pc = pool_cache_enter(pp, &s);
1851 
1852 	nitems = pc->pc_nactv;
1853 	if (nitems >= pp->pr_cache_items) {
1854 		if (pc->pc_prev != NULL)
1855 			pool_cache_list_free(pp, pc, pc->pc_prev);
1856 
1857 		pc->pc_prev = pc->pc_actv;
1858 
1859 		pc->pc_actv = NULL;
1860 		pc->pc_nactv = 0;
1861 		nitems = 0;
1862 	}
1863 
1864 	ci->ci_next = pc->pc_actv;
1865 	ci->ci_nitems = ++nitems;
1866 #ifdef DIAGNOSTIC
1867 	ci->ci_nitems |= poison ? POOL_CACHE_ITEM_NITEMS_POISON : 0;
1868 #endif
1869 	pool_cache_item_magic(pp, ci);
1870 
1871 	pc->pc_actv = ci;
1872 	pc->pc_nactv = nitems;
1873 
1874 	pc->pc_nput++;
1875 	pc->pc_nout--;
1876 
1877 	pool_cache_leave(pp, pc, s);
1878 }
1879 
1880 struct pool_cache_item *
1881 pool_cache_list_put(struct pool *pp, struct pool_cache_item *pl)
1882 {
1883 	struct pool_cache_item *rpl, *next;
1884 
1885 	if (pl == NULL)
1886 		return (NULL);
1887 
1888 	rpl = TAILQ_NEXT(pl, ci_nextl);
1889 
1890 	mtx_enter(&pp->pr_mtx);
1891 	do {
1892 		next = pl->ci_next;
1893 		pool_do_put(pp, pl);
1894 		pl = next;
1895 	} while (pl != NULL);
1896 	mtx_leave(&pp->pr_mtx);
1897 
1898 	return (rpl);
1899 }
1900 
1901 void
1902 pool_cache_destroy(struct pool *pp)
1903 {
1904 	struct pool_cache *pc;
1905 	struct pool_cache_item *pl;
1906 	struct cpumem_iter i;
1907 	struct cpumem *cm;
1908 
1909 	rw_enter_write(&pool_lock); /* serialise with the gc */
1910 	cm = pp->pr_cache;
1911 	pp->pr_cache = NULL; /* make pool_put avoid the cache */
1912 	rw_exit_write(&pool_lock);
1913 
1914 	CPUMEM_FOREACH(pc, &i, cm) {
1915 		pool_cache_list_put(pp, pc->pc_actv);
1916 		pool_cache_list_put(pp, pc->pc_prev);
1917 	}
1918 
1919 	cpumem_put(&pool_caches, cm);
1920 
1921 	pl = TAILQ_FIRST(&pp->pr_cache_lists);
1922 	while (pl != NULL)
1923 		pl = pool_cache_list_put(pp, pl);
1924 }
1925 
1926 void
1927 pool_cache_gc(struct pool *pp)
1928 {
1929 	unsigned int contention;
1930 
1931 	if ((ticks - pp->pr_cache_tick) > (hz * pool_wait_gc) &&
1932 	    !TAILQ_EMPTY(&pp->pr_cache_lists) &&
1933 	    mtx_enter_try(&pp->pr_cache_mtx)) {
1934 		struct pool_cache_item *pl = NULL;
1935 
1936 		pl = TAILQ_FIRST(&pp->pr_cache_lists);
1937 		if (pl != NULL) {
1938 			TAILQ_REMOVE(&pp->pr_cache_lists, pl, ci_nextl);
1939 			pp->pr_cache_nitems -= POOL_CACHE_ITEM_NITEMS(pl);
1940 			pp->pr_cache_tick = ticks;
1941 
1942 			pp->pr_cache_ngc++;
1943 		}
1944 
1945 		mtx_leave(&pp->pr_cache_mtx);
1946 
1947 		pool_cache_list_put(pp, pl);
1948 	}
1949 
1950 	/*
1951 	 * if there's a lot of contention on the pr_cache_mtx then consider
1952 	 * growing the length of the list to reduce the need to access the
1953 	 * global pool.
1954 	 */
1955 
1956 	contention = pp->pr_cache_contention;
1957 	if ((contention - pp->pr_cache_contention_prev) > 8 /* magic */) {
1958 		if ((ncpusfound * 8 * 2) <= pp->pr_cache_nitems)
1959 			pp->pr_cache_items += 8;
1960 	}
1961 	pp->pr_cache_contention_prev = contention;
1962 }
1963 
1964 void
1965 pool_cache_pool_info(struct pool *pp, struct kinfo_pool *pi)
1966 {
1967 	struct pool_cache *pc;
1968 	struct cpumem_iter i;
1969 
1970 	if (pp->pr_cache == NULL)
1971 		return;
1972 
1973 	/* loop through the caches twice to collect stats */
1974 
1975 	/* once without the mtx so we can yield while reading nget/nput */
1976 	CPUMEM_FOREACH(pc, &i, pp->pr_cache) {
1977 		uint64_t gen, nget, nput;
1978 
1979 		do {
1980 			while ((gen = pc->pc_gen) & 1)
1981 				yield();
1982 
1983 			nget = pc->pc_nget;
1984 			nput = pc->pc_nput;
1985 		} while (gen != pc->pc_gen);
1986 
1987 		pi->pr_nget += nget;
1988 		pi->pr_nput += nput;
1989 	}
1990 
1991 	/* and once with the mtx so we can get consistent nout values */
1992 	mtx_enter(&pp->pr_cache_mtx);
1993 	CPUMEM_FOREACH(pc, &i, pp->pr_cache)
1994 		pi->pr_nout += pc->pc_nout;
1995 
1996 	pi->pr_nout += pp->pr_cache_nout;
1997 	mtx_leave(&pp->pr_cache_mtx);
1998 }
1999 
2000 int
2001 pool_cache_info(struct pool *pp, void *oldp, size_t *oldlenp)
2002 {
2003 	struct kinfo_pool_cache kpc;
2004 
2005 	if (pp->pr_cache == NULL)
2006 		return (EOPNOTSUPP);
2007 
2008 	memset(&kpc, 0, sizeof(kpc)); /* don't leak padding */
2009 
2010 	mtx_enter(&pp->pr_cache_mtx);
2011 	kpc.pr_ngc = pp->pr_cache_ngc;
2012 	kpc.pr_len = pp->pr_cache_items;
2013 	kpc.pr_nitems = pp->pr_cache_nitems;
2014 	kpc.pr_contention = pp->pr_cache_contention;
2015 	mtx_leave(&pp->pr_cache_mtx);
2016 
2017 	return (sysctl_rdstruct(oldp, oldlenp, NULL, &kpc, sizeof(kpc)));
2018 }
2019 
2020 int
2021 pool_cache_cpus_info(struct pool *pp, void *oldp, size_t *oldlenp)
2022 {
2023 	struct pool_cache *pc;
2024 	struct kinfo_pool_cache_cpu *kpcc, *info;
2025 	unsigned int cpu = 0;
2026 	struct cpumem_iter i;
2027 	int error = 0;
2028 	size_t len;
2029 
2030 	if (pp->pr_cache == NULL)
2031 		return (EOPNOTSUPP);
2032 	if (*oldlenp % sizeof(*kpcc))
2033 		return (EINVAL);
2034 
2035 	kpcc = mallocarray(ncpusfound, sizeof(*kpcc), M_TEMP,
2036 	    M_WAITOK|M_CANFAIL|M_ZERO);
2037 	if (kpcc == NULL)
2038 		return (EIO);
2039 
2040 	len = ncpusfound * sizeof(*kpcc);
2041 
2042 	CPUMEM_FOREACH(pc, &i, pp->pr_cache) {
2043 		uint64_t gen;
2044 
2045 		if (cpu >= ncpusfound) {
2046 			error = EIO;
2047 			goto err;
2048 		}
2049 
2050 		info = &kpcc[cpu];
2051 		info->pr_cpu = cpu;
2052 
2053 		do {
2054 			while ((gen = pc->pc_gen) & 1)
2055 				yield();
2056 
2057 			info->pr_nget = pc->pc_nget;
2058 			info->pr_nfail = pc->pc_nfail;
2059 			info->pr_nput = pc->pc_nput;
2060 			info->pr_nlget = pc->pc_nlget;
2061 			info->pr_nlfail = pc->pc_nlfail;
2062 			info->pr_nlput = pc->pc_nlput;
2063 		} while (gen != pc->pc_gen);
2064 
2065 		cpu++;
2066 	}
2067 
2068 	error = sysctl_rdstruct(oldp, oldlenp, NULL, kpcc, len);
2069 err:
2070 	free(kpcc, M_TEMP, len);
2071 
2072 	return (error);
2073 }
2074 #else /* MULTIPROCESSOR */
2075 void
2076 pool_cache_init(struct pool *pp)
2077 {
2078 	/* nop */
2079 }
2080 
2081 void
2082 pool_cache_pool_info(struct pool *pp, struct kinfo_pool *pi)
2083 {
2084 	/* nop */
2085 }
2086 
2087 int
2088 pool_cache_info(struct pool *pp, void *oldp, size_t *oldlenp)
2089 {
2090 	return (EOPNOTSUPP);
2091 }
2092 
2093 int
2094 pool_cache_cpus_info(struct pool *pp, void *oldp, size_t *oldlenp)
2095 {
2096 	return (EOPNOTSUPP);
2097 }
2098 #endif /* MULTIPROCESSOR */
2099