xref: /openbsd-src/sys/kern/subr_pool.c (revision 7d335b5a08ebbd3bbbddac55da0218b99f53611a)
1 /*	$OpenBSD: subr_pool.c,v 1.225 2019/02/10 20:05:04 tedu Exp $	*/
2 /*	$NetBSD: subr_pool.c,v 1.61 2001/09/26 07:14:56 chs Exp $	*/
3 
4 /*-
5  * Copyright (c) 1997, 1999, 2000 The NetBSD Foundation, Inc.
6  * All rights reserved.
7  *
8  * This code is derived from software contributed to The NetBSD Foundation
9  * by Paul Kranenburg; by Jason R. Thorpe of the Numerical Aerospace
10  * Simulation Facility, NASA Ames Research Center.
11  *
12  * Redistribution and use in source and binary forms, with or without
13  * modification, are permitted provided that the following conditions
14  * are met:
15  * 1. Redistributions of source code must retain the above copyright
16  *    notice, this list of conditions and the following disclaimer.
17  * 2. Redistributions in binary form must reproduce the above copyright
18  *    notice, this list of conditions and the following disclaimer in the
19  *    documentation and/or other materials provided with the distribution.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
23  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
24  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
25  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31  * POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/errno.h>
37 #include <sys/kernel.h>
38 #include <sys/malloc.h>
39 #include <sys/pool.h>
40 #include <sys/proc.h>
41 #include <sys/syslog.h>
42 #include <sys/sysctl.h>
43 #include <sys/task.h>
44 #include <sys/timeout.h>
45 #include <sys/percpu.h>
46 
47 #include <uvm/uvm_extern.h>
48 
49 /*
50  * Pool resource management utility.
51  *
52  * Memory is allocated in pages which are split into pieces according to
53  * the pool item size. Each page is kept on one of three lists in the
54  * pool structure: `pr_emptypages', `pr_fullpages' and `pr_partpages',
55  * for empty, full and partially-full pages respectively. The individual
56  * pool items are on a linked list headed by `ph_items' in each page
57  * header. The memory for building the page list is either taken from
58  * the allocated pages themselves (for small pool items) or taken from
59  * an internal pool of page headers (`phpool').
60  */
61 
62 /* List of all pools */
63 SIMPLEQ_HEAD(,pool) pool_head = SIMPLEQ_HEAD_INITIALIZER(pool_head);
64 
65 /*
66  * Every pool gets a unique serial number assigned to it. If this counter
67  * wraps, we're screwed, but we shouldn't create so many pools anyway.
68  */
69 unsigned int pool_serial;
70 unsigned int pool_count;
71 
72 /* Lock the previous variables making up the global pool state */
73 struct rwlock pool_lock = RWLOCK_INITIALIZER("pools");
74 
75 /* Private pool for page header structures */
76 struct pool phpool;
77 
78 struct pool_lock_ops {
79 	void	(*pl_init)(struct pool *, union pool_lock *,
80 		    const struct lock_type *);
81 	void	(*pl_enter)(union pool_lock * LOCK_FL_VARS);
82 	int	(*pl_enter_try)(union pool_lock * LOCK_FL_VARS);
83 	void	(*pl_leave)(union pool_lock * LOCK_FL_VARS);
84 	void	(*pl_assert_locked)(union pool_lock *);
85 	void	(*pl_assert_unlocked)(union pool_lock *);
86 	int	(*pl_sleep)(void *, union pool_lock *, int, const char *, int);
87 };
88 
89 static const struct pool_lock_ops pool_lock_ops_mtx;
90 static const struct pool_lock_ops pool_lock_ops_rw;
91 
92 #ifdef WITNESS
93 #define pl_init(pp, pl) do {						\
94 	static const struct lock_type __lock_type = { .lt_name = #pl };	\
95 	(pp)->pr_lock_ops->pl_init(pp, pl, &__lock_type);		\
96 } while (0)
97 #else /* WITNESS */
98 #define pl_init(pp, pl)		(pp)->pr_lock_ops->pl_init(pp, pl, NULL)
99 #endif /* WITNESS */
100 
101 static inline void
102 pl_enter(struct pool *pp, union pool_lock *pl LOCK_FL_VARS)
103 {
104 	pp->pr_lock_ops->pl_enter(pl LOCK_FL_ARGS);
105 }
106 static inline int
107 pl_enter_try(struct pool *pp, union pool_lock *pl LOCK_FL_VARS)
108 {
109 	return pp->pr_lock_ops->pl_enter_try(pl LOCK_FL_ARGS);
110 }
111 static inline void
112 pl_leave(struct pool *pp, union pool_lock *pl LOCK_FL_VARS)
113 {
114 	pp->pr_lock_ops->pl_leave(pl LOCK_FL_ARGS);
115 }
116 static inline void
117 pl_assert_locked(struct pool *pp, union pool_lock *pl)
118 {
119 	pp->pr_lock_ops->pl_assert_locked(pl);
120 }
121 static inline void
122 pl_assert_unlocked(struct pool *pp, union pool_lock *pl)
123 {
124 	pp->pr_lock_ops->pl_assert_unlocked(pl);
125 }
126 static inline int
127 pl_sleep(struct pool *pp, void *ident, union pool_lock *lock, int priority,
128     const char *wmesg, int timo)
129 {
130 	return pp->pr_lock_ops->pl_sleep(ident, lock, priority, wmesg, timo);
131 }
132 
133 #ifdef WITNESS
134 # define pl_enter(pp,pl)	pl_enter(pp,pl LOCK_FILE_LINE)
135 # define pl_enter_try(pp,pl)	pl_enter_try(pp,pl LOCK_FILE_LINE)
136 # define pl_leave(pp,pl)	pl_leave(pp,pl LOCK_FILE_LINE)
137 #endif
138 
139 struct pool_item {
140 	u_long				pi_magic;
141 	XSIMPLEQ_ENTRY(pool_item)	pi_list;
142 };
143 #define POOL_IMAGIC(ph, pi) ((u_long)(pi) ^ (ph)->ph_magic)
144 
145 struct pool_page_header {
146 	/* Page headers */
147 	TAILQ_ENTRY(pool_page_header)
148 				ph_entry;	/* pool page list */
149 	XSIMPLEQ_HEAD(, pool_item)
150 				ph_items;	/* free items on the page */
151 	RBT_ENTRY(pool_page_header)
152 				ph_node;	/* off-page page headers */
153 	unsigned int		ph_nmissing;	/* # of chunks in use */
154 	caddr_t			ph_page;	/* this page's address */
155 	caddr_t			ph_colored;	/* page's colored address */
156 	unsigned long		ph_magic;
157 	int			ph_tick;
158 	int			ph_flags;
159 };
160 #define POOL_MAGICBIT (1 << 3) /* keep away from perturbed low bits */
161 #define POOL_PHPOISON(ph) ISSET((ph)->ph_magic, POOL_MAGICBIT)
162 
163 #ifdef MULTIPROCESSOR
164 struct pool_cache_item {
165 	struct pool_cache_item	*ci_next;	/* next item in list */
166 	unsigned long		 ci_nitems;	/* number of items in list */
167 	TAILQ_ENTRY(pool_cache_item)
168 				 ci_nextl;	/* entry in list of lists */
169 };
170 
171 /* we store whether the cached item is poisoned in the high bit of nitems */
172 #define POOL_CACHE_ITEM_NITEMS_MASK	0x7ffffffUL
173 #define POOL_CACHE_ITEM_NITEMS_POISON	0x8000000UL
174 
175 #define POOL_CACHE_ITEM_NITEMS(_ci)					\
176     ((_ci)->ci_nitems & POOL_CACHE_ITEM_NITEMS_MASK)
177 
178 #define POOL_CACHE_ITEM_POISONED(_ci)					\
179     ISSET((_ci)->ci_nitems, POOL_CACHE_ITEM_NITEMS_POISON)
180 
181 struct pool_cache {
182 	struct pool_cache_item	*pc_actv;	/* active list of items */
183 	unsigned long		 pc_nactv;	/* actv head nitems cache */
184 	struct pool_cache_item	*pc_prev;	/* previous list of items */
185 
186 	uint64_t		 pc_gen;	/* generation number */
187 	uint64_t		 pc_nget;	/* # of successful requests */
188 	uint64_t		 pc_nfail;	/* # of unsuccessful reqs */
189 	uint64_t		 pc_nput;	/* # of releases */
190 	uint64_t		 pc_nlget;	/* # of list requests */
191 	uint64_t		 pc_nlfail;	/* # of fails getting a list */
192 	uint64_t		 pc_nlput;	/* # of list releases */
193 
194 	int			 pc_nout;
195 };
196 
197 void	*pool_cache_get(struct pool *);
198 void	 pool_cache_put(struct pool *, void *);
199 void	 pool_cache_destroy(struct pool *);
200 void	 pool_cache_gc(struct pool *);
201 #endif
202 void	 pool_cache_pool_info(struct pool *, struct kinfo_pool *);
203 int	 pool_cache_info(struct pool *, void *, size_t *);
204 int	 pool_cache_cpus_info(struct pool *, void *, size_t *);
205 
206 #ifdef POOL_DEBUG
207 int	pool_debug = 1;
208 #else
209 int	pool_debug = 0;
210 #endif
211 
212 #define POOL_INPGHDR(pp) ((pp)->pr_phoffset != 0)
213 
214 struct pool_page_header *
215 	 pool_p_alloc(struct pool *, int, int *);
216 void	 pool_p_insert(struct pool *, struct pool_page_header *);
217 void	 pool_p_remove(struct pool *, struct pool_page_header *);
218 void	 pool_p_free(struct pool *, struct pool_page_header *);
219 
220 void	 pool_update_curpage(struct pool *);
221 void	*pool_do_get(struct pool *, int, int *);
222 void	 pool_do_put(struct pool *, void *);
223 int	 pool_chk_page(struct pool *, struct pool_page_header *, int);
224 int	 pool_chk(struct pool *);
225 void	 pool_get_done(struct pool *, void *, void *);
226 void	 pool_runqueue(struct pool *, int);
227 
228 void	*pool_allocator_alloc(struct pool *, int, int *);
229 void	 pool_allocator_free(struct pool *, int, void *);
230 
231 /*
232  * The default pool allocator.
233  */
234 void	*pool_page_alloc(struct pool *, int, int *);
235 void	pool_page_free(struct pool *, int, void *);
236 
237 /*
238  * safe for interrupts; this is the default allocator
239  */
240 struct pool_allocator pool_allocator_single = {
241 	pool_page_alloc,
242 	pool_page_free,
243 	POOL_ALLOC_SIZE(PAGE_SIZE, POOL_ALLOC_ALIGNED)
244 };
245 
246 void	*pool_multi_alloc(struct pool *, int, int *);
247 void	pool_multi_free(struct pool *, int, void *);
248 
249 struct pool_allocator pool_allocator_multi = {
250 	pool_multi_alloc,
251 	pool_multi_free,
252 	POOL_ALLOC_SIZES(PAGE_SIZE, (1UL << 31), POOL_ALLOC_ALIGNED)
253 };
254 
255 void	*pool_multi_alloc_ni(struct pool *, int, int *);
256 void	pool_multi_free_ni(struct pool *, int, void *);
257 
258 struct pool_allocator pool_allocator_multi_ni = {
259 	pool_multi_alloc_ni,
260 	pool_multi_free_ni,
261 	POOL_ALLOC_SIZES(PAGE_SIZE, (1UL << 31), POOL_ALLOC_ALIGNED)
262 };
263 
264 #ifdef DDB
265 void	 pool_print_pagelist(struct pool_pagelist *, int (*)(const char *, ...)
266 	     __attribute__((__format__(__kprintf__,1,2))));
267 void	 pool_print1(struct pool *, const char *, int (*)(const char *, ...)
268 	     __attribute__((__format__(__kprintf__,1,2))));
269 #endif
270 
271 /* stale page garbage collectors */
272 void	pool_gc_sched(void *);
273 struct timeout pool_gc_tick = TIMEOUT_INITIALIZER(pool_gc_sched, NULL);
274 void	pool_gc_pages(void *);
275 struct task pool_gc_task = TASK_INITIALIZER(pool_gc_pages, NULL);
276 int pool_wait_free = 1;
277 int pool_wait_gc = 8;
278 
279 RBT_PROTOTYPE(phtree, pool_page_header, ph_node, phtree_compare);
280 
281 static inline int
282 phtree_compare(const struct pool_page_header *a,
283     const struct pool_page_header *b)
284 {
285 	vaddr_t va = (vaddr_t)a->ph_page;
286 	vaddr_t vb = (vaddr_t)b->ph_page;
287 
288 	/* the compares in this order are important for the NFIND to work */
289 	if (vb < va)
290 		return (-1);
291 	if (vb > va)
292 		return (1);
293 
294 	return (0);
295 }
296 
297 RBT_GENERATE(phtree, pool_page_header, ph_node, phtree_compare);
298 
299 /*
300  * Return the pool page header based on page address.
301  */
302 static inline struct pool_page_header *
303 pr_find_pagehead(struct pool *pp, void *v)
304 {
305 	struct pool_page_header *ph, key;
306 
307 	if (POOL_INPGHDR(pp)) {
308 		caddr_t page;
309 
310 		page = (caddr_t)((vaddr_t)v & pp->pr_pgmask);
311 
312 		return ((struct pool_page_header *)(page + pp->pr_phoffset));
313 	}
314 
315 	key.ph_page = v;
316 	ph = RBT_NFIND(phtree, &pp->pr_phtree, &key);
317 	if (ph == NULL)
318 		panic("%s: %s: page header missing", __func__, pp->pr_wchan);
319 
320 	KASSERT(ph->ph_page <= (caddr_t)v);
321 	if (ph->ph_page + pp->pr_pgsize <= (caddr_t)v)
322 		panic("%s: %s: incorrect page", __func__, pp->pr_wchan);
323 
324 	return (ph);
325 }
326 
327 /*
328  * Initialize the given pool resource structure.
329  *
330  * We export this routine to allow other kernel parts to declare
331  * static pools that must be initialized before malloc() is available.
332  */
333 void
334 pool_init(struct pool *pp, size_t size, u_int align, int ipl, int flags,
335     const char *wchan, struct pool_allocator *palloc)
336 {
337 	int off = 0, space;
338 	unsigned int pgsize = PAGE_SIZE, items;
339 	size_t pa_pagesz;
340 #ifdef DIAGNOSTIC
341 	struct pool *iter;
342 #endif
343 
344 	if (align == 0)
345 		align = ALIGN(1);
346 
347 	if (size < sizeof(struct pool_item))
348 		size = sizeof(struct pool_item);
349 
350 	size = roundup(size, align);
351 
352 	while (size * 8 > pgsize)
353 		pgsize <<= 1;
354 
355 	if (palloc == NULL) {
356 		if (pgsize > PAGE_SIZE) {
357 			palloc = ISSET(flags, PR_WAITOK) ?
358 			    &pool_allocator_multi_ni : &pool_allocator_multi;
359 		} else
360 			palloc = &pool_allocator_single;
361 
362 		pa_pagesz = palloc->pa_pagesz;
363 	} else {
364 		size_t pgsizes;
365 
366 		pa_pagesz = palloc->pa_pagesz;
367 		if (pa_pagesz == 0)
368 			pa_pagesz = POOL_ALLOC_DEFAULT;
369 
370 		pgsizes = pa_pagesz & ~POOL_ALLOC_ALIGNED;
371 
372 		/* make sure the allocator can fit at least one item */
373 		if (size > pgsizes) {
374 			panic("%s: pool %s item size 0x%zx > "
375 			    "allocator %p sizes 0x%zx", __func__, wchan,
376 			    size, palloc, pgsizes);
377 		}
378 
379 		/* shrink pgsize until it fits into the range */
380 		while (!ISSET(pgsizes, pgsize))
381 			pgsize >>= 1;
382 	}
383 	KASSERT(ISSET(pa_pagesz, pgsize));
384 
385 	items = pgsize / size;
386 
387 	/*
388 	 * Decide whether to put the page header off page to avoid
389 	 * wasting too large a part of the page. Off-page page headers
390 	 * go into an RB tree, so we can match a returned item with
391 	 * its header based on the page address.
392 	 */
393 	if (ISSET(pa_pagesz, POOL_ALLOC_ALIGNED)) {
394 		if (pgsize - (size * items) >
395 		    sizeof(struct pool_page_header)) {
396 			off = pgsize - sizeof(struct pool_page_header);
397 		} else if (sizeof(struct pool_page_header) * 2 >= size) {
398 			off = pgsize - sizeof(struct pool_page_header);
399 			items = off / size;
400 		}
401 	}
402 
403 	KASSERT(items > 0);
404 
405 	/*
406 	 * Initialize the pool structure.
407 	 */
408 	memset(pp, 0, sizeof(*pp));
409 	if (ISSET(flags, PR_RWLOCK)) {
410 		KASSERT(flags & PR_WAITOK);
411 		pp->pr_lock_ops = &pool_lock_ops_rw;
412 	} else
413 		pp->pr_lock_ops = &pool_lock_ops_mtx;
414 	TAILQ_INIT(&pp->pr_emptypages);
415 	TAILQ_INIT(&pp->pr_fullpages);
416 	TAILQ_INIT(&pp->pr_partpages);
417 	pp->pr_curpage = NULL;
418 	pp->pr_npages = 0;
419 	pp->pr_minitems = 0;
420 	pp->pr_minpages = 0;
421 	pp->pr_maxpages = 8;
422 	pp->pr_size = size;
423 	pp->pr_pgsize = pgsize;
424 	pp->pr_pgmask = ~0UL ^ (pgsize - 1);
425 	pp->pr_phoffset = off;
426 	pp->pr_itemsperpage = items;
427 	pp->pr_wchan = wchan;
428 	pp->pr_alloc = palloc;
429 	pp->pr_nitems = 0;
430 	pp->pr_nout = 0;
431 	pp->pr_hardlimit = UINT_MAX;
432 	pp->pr_hardlimit_warning = NULL;
433 	pp->pr_hardlimit_ratecap.tv_sec = 0;
434 	pp->pr_hardlimit_ratecap.tv_usec = 0;
435 	pp->pr_hardlimit_warning_last.tv_sec = 0;
436 	pp->pr_hardlimit_warning_last.tv_usec = 0;
437 	RBT_INIT(phtree, &pp->pr_phtree);
438 
439 	/*
440 	 * Use the space between the chunks and the page header
441 	 * for cache coloring.
442 	 */
443 	space = POOL_INPGHDR(pp) ? pp->pr_phoffset : pp->pr_pgsize;
444 	space -= pp->pr_itemsperpage * pp->pr_size;
445 	pp->pr_align = align;
446 	pp->pr_maxcolors = (space / align) + 1;
447 
448 	pp->pr_nget = 0;
449 	pp->pr_nfail = 0;
450 	pp->pr_nput = 0;
451 	pp->pr_npagealloc = 0;
452 	pp->pr_npagefree = 0;
453 	pp->pr_hiwat = 0;
454 	pp->pr_nidle = 0;
455 
456 	pp->pr_ipl = ipl;
457 	pp->pr_flags = flags;
458 
459 	pl_init(pp, &pp->pr_lock);
460 	pl_init(pp, &pp->pr_requests_lock);
461 	TAILQ_INIT(&pp->pr_requests);
462 
463 	if (phpool.pr_size == 0) {
464 		pool_init(&phpool, sizeof(struct pool_page_header), 0,
465 		    IPL_HIGH, 0, "phpool", NULL);
466 
467 		/* make sure phpool wont "recurse" */
468 		KASSERT(POOL_INPGHDR(&phpool));
469 	}
470 
471 	/* pglistalloc/constraint parameters */
472 	pp->pr_crange = &kp_dirty;
473 
474 	/* Insert this into the list of all pools. */
475 	rw_enter_write(&pool_lock);
476 #ifdef DIAGNOSTIC
477 	SIMPLEQ_FOREACH(iter, &pool_head, pr_poollist) {
478 		if (iter == pp)
479 			panic("%s: pool %s already on list", __func__, wchan);
480 	}
481 #endif
482 
483 	pp->pr_serial = ++pool_serial;
484 	if (pool_serial == 0)
485 		panic("%s: too much uptime", __func__);
486 
487 	SIMPLEQ_INSERT_HEAD(&pool_head, pp, pr_poollist);
488 	pool_count++;
489 	rw_exit_write(&pool_lock);
490 }
491 
492 /*
493  * Decommission a pool resource.
494  */
495 void
496 pool_destroy(struct pool *pp)
497 {
498 	struct pool_page_header *ph;
499 	struct pool *prev, *iter;
500 
501 #ifdef MULTIPROCESSOR
502 	if (pp->pr_cache != NULL)
503 		pool_cache_destroy(pp);
504 #endif
505 
506 #ifdef DIAGNOSTIC
507 	if (pp->pr_nout != 0)
508 		panic("%s: pool busy: still out: %u", __func__, pp->pr_nout);
509 #endif
510 
511 	/* Remove from global pool list */
512 	rw_enter_write(&pool_lock);
513 	pool_count--;
514 	if (pp == SIMPLEQ_FIRST(&pool_head))
515 		SIMPLEQ_REMOVE_HEAD(&pool_head, pr_poollist);
516 	else {
517 		prev = SIMPLEQ_FIRST(&pool_head);
518 		SIMPLEQ_FOREACH(iter, &pool_head, pr_poollist) {
519 			if (iter == pp) {
520 				SIMPLEQ_REMOVE_AFTER(&pool_head, prev,
521 				    pr_poollist);
522 				break;
523 			}
524 			prev = iter;
525 		}
526 	}
527 	rw_exit_write(&pool_lock);
528 
529 	/* Remove all pages */
530 	while ((ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL) {
531 		pl_enter(pp, &pp->pr_lock);
532 		pool_p_remove(pp, ph);
533 		pl_leave(pp, &pp->pr_lock);
534 		pool_p_free(pp, ph);
535 	}
536 	KASSERT(TAILQ_EMPTY(&pp->pr_fullpages));
537 	KASSERT(TAILQ_EMPTY(&pp->pr_partpages));
538 }
539 
540 void
541 pool_request_init(struct pool_request *pr,
542     void (*handler)(struct pool *, void *, void *), void *cookie)
543 {
544 	pr->pr_handler = handler;
545 	pr->pr_cookie = cookie;
546 	pr->pr_item = NULL;
547 }
548 
549 void
550 pool_request(struct pool *pp, struct pool_request *pr)
551 {
552 	pl_enter(pp, &pp->pr_requests_lock);
553 	TAILQ_INSERT_TAIL(&pp->pr_requests, pr, pr_entry);
554 	pool_runqueue(pp, PR_NOWAIT);
555 	pl_leave(pp, &pp->pr_requests_lock);
556 }
557 
558 struct pool_get_memory {
559 	union pool_lock lock;
560 	void * volatile v;
561 };
562 
563 /*
564  * Grab an item from the pool.
565  */
566 void *
567 pool_get(struct pool *pp, int flags)
568 {
569 	void *v = NULL;
570 	int slowdown = 0;
571 
572 	KASSERT(flags & (PR_WAITOK | PR_NOWAIT));
573 	if (pp->pr_flags & PR_RWLOCK)
574 		KASSERT(flags & PR_WAITOK);
575 
576 #ifdef MULTIPROCESSOR
577 	if (pp->pr_cache != NULL) {
578 		v = pool_cache_get(pp);
579 		if (v != NULL)
580 			goto good;
581 	}
582 #endif
583 
584 	pl_enter(pp, &pp->pr_lock);
585 	if (pp->pr_nout >= pp->pr_hardlimit) {
586 		if (ISSET(flags, PR_NOWAIT|PR_LIMITFAIL))
587 			goto fail;
588 	} else if ((v = pool_do_get(pp, flags, &slowdown)) == NULL) {
589 		if (ISSET(flags, PR_NOWAIT))
590 			goto fail;
591 	}
592 	pl_leave(pp, &pp->pr_lock);
593 
594 	if ((slowdown || pool_debug == 2) && ISSET(flags, PR_WAITOK))
595 		yield();
596 
597 	if (v == NULL) {
598 		struct pool_get_memory mem = { .v = NULL };
599 		struct pool_request pr;
600 
601 #ifdef DIAGNOSTIC
602 		if (ISSET(flags, PR_WAITOK) && curproc == &proc0)
603 			panic("%s: cannot sleep for memory during boot",
604 			    __func__);
605 #endif
606 		pl_init(pp, &mem.lock);
607 		pool_request_init(&pr, pool_get_done, &mem);
608 		pool_request(pp, &pr);
609 
610 		pl_enter(pp, &mem.lock);
611 		while (mem.v == NULL)
612 			pl_sleep(pp, &mem, &mem.lock, PSWP, pp->pr_wchan, 0);
613 		pl_leave(pp, &mem.lock);
614 
615 		v = mem.v;
616 	}
617 
618 #ifdef MULTIPROCESSOR
619 good:
620 #endif
621 	if (ISSET(flags, PR_ZERO))
622 		memset(v, 0, pp->pr_size);
623 
624 	return (v);
625 
626 fail:
627 	pp->pr_nfail++;
628 	pl_leave(pp, &pp->pr_lock);
629 	return (NULL);
630 }
631 
632 void
633 pool_get_done(struct pool *pp, void *xmem, void *v)
634 {
635 	struct pool_get_memory *mem = xmem;
636 
637 	pl_enter(pp, &mem->lock);
638 	mem->v = v;
639 	pl_leave(pp, &mem->lock);
640 
641 	wakeup_one(mem);
642 }
643 
644 void
645 pool_runqueue(struct pool *pp, int flags)
646 {
647 	struct pool_requests prl = TAILQ_HEAD_INITIALIZER(prl);
648 	struct pool_request *pr;
649 
650 	pl_assert_unlocked(pp, &pp->pr_lock);
651 	pl_assert_locked(pp, &pp->pr_requests_lock);
652 
653 	if (pp->pr_requesting++)
654 		return;
655 
656 	do {
657 		pp->pr_requesting = 1;
658 
659 		/* no TAILQ_JOIN? :( */
660 		while ((pr = TAILQ_FIRST(&pp->pr_requests)) != NULL) {
661 			TAILQ_REMOVE(&pp->pr_requests, pr, pr_entry);
662 			TAILQ_INSERT_TAIL(&prl, pr, pr_entry);
663 		}
664 		if (TAILQ_EMPTY(&prl))
665 			continue;
666 
667 		pl_leave(pp, &pp->pr_requests_lock);
668 
669 		pl_enter(pp, &pp->pr_lock);
670 		pr = TAILQ_FIRST(&prl);
671 		while (pr != NULL) {
672 			int slowdown = 0;
673 
674 			if (pp->pr_nout >= pp->pr_hardlimit)
675 				break;
676 
677 			pr->pr_item = pool_do_get(pp, flags, &slowdown);
678 			if (pr->pr_item == NULL) /* || slowdown ? */
679 				break;
680 
681 			pr = TAILQ_NEXT(pr, pr_entry);
682 		}
683 		pl_leave(pp, &pp->pr_lock);
684 
685 		while ((pr = TAILQ_FIRST(&prl)) != NULL &&
686 		    pr->pr_item != NULL) {
687 			TAILQ_REMOVE(&prl, pr, pr_entry);
688 			(*pr->pr_handler)(pp, pr->pr_cookie, pr->pr_item);
689 		}
690 
691 		pl_enter(pp, &pp->pr_requests_lock);
692 	} while (--pp->pr_requesting);
693 
694 	/* no TAILQ_JOIN :( */
695 	while ((pr = TAILQ_FIRST(&prl)) != NULL) {
696 		TAILQ_REMOVE(&prl, pr, pr_entry);
697 		TAILQ_INSERT_TAIL(&pp->pr_requests, pr, pr_entry);
698 	}
699 }
700 
701 void *
702 pool_do_get(struct pool *pp, int flags, int *slowdown)
703 {
704 	struct pool_item *pi;
705 	struct pool_page_header *ph;
706 
707 	pl_assert_locked(pp, &pp->pr_lock);
708 
709 	splassert(pp->pr_ipl);
710 
711 	/*
712 	 * Account for this item now to avoid races if we need to give up
713 	 * pr_lock to allocate a page.
714 	 */
715 	pp->pr_nout++;
716 
717 	if (pp->pr_curpage == NULL) {
718 		pl_leave(pp, &pp->pr_lock);
719 		ph = pool_p_alloc(pp, flags, slowdown);
720 		pl_enter(pp, &pp->pr_lock);
721 
722 		if (ph == NULL) {
723 			pp->pr_nout--;
724 			return (NULL);
725 		}
726 
727 		pool_p_insert(pp, ph);
728 	}
729 
730 	ph = pp->pr_curpage;
731 	pi = XSIMPLEQ_FIRST(&ph->ph_items);
732 	if (__predict_false(pi == NULL))
733 		panic("%s: %s: page empty", __func__, pp->pr_wchan);
734 
735 	if (__predict_false(pi->pi_magic != POOL_IMAGIC(ph, pi))) {
736 		panic("%s: %s free list modified: "
737 		    "page %p; item addr %p; offset 0x%x=0x%lx != 0x%lx",
738 		    __func__, pp->pr_wchan, ph->ph_page, pi,
739 		    0, pi->pi_magic, POOL_IMAGIC(ph, pi));
740 	}
741 
742 	XSIMPLEQ_REMOVE_HEAD(&ph->ph_items, pi_list);
743 
744 #ifdef DIAGNOSTIC
745 	if (pool_debug && POOL_PHPOISON(ph)) {
746 		size_t pidx;
747 		uint32_t pval;
748 		if (poison_check(pi + 1, pp->pr_size - sizeof(*pi),
749 		    &pidx, &pval)) {
750 			int *ip = (int *)(pi + 1);
751 			panic("%s: %s free list modified: "
752 			    "page %p; item addr %p; offset 0x%zx=0x%x",
753 			    __func__, pp->pr_wchan, ph->ph_page, pi,
754 			    (pidx * sizeof(int)) + sizeof(*pi), ip[pidx]);
755 		}
756 	}
757 #endif /* DIAGNOSTIC */
758 
759 	if (ph->ph_nmissing++ == 0) {
760 		/*
761 		 * This page was previously empty.  Move it to the list of
762 		 * partially-full pages.  This page is already curpage.
763 		 */
764 		TAILQ_REMOVE(&pp->pr_emptypages, ph, ph_entry);
765 		TAILQ_INSERT_TAIL(&pp->pr_partpages, ph, ph_entry);
766 
767 		pp->pr_nidle--;
768 	}
769 
770 	if (ph->ph_nmissing == pp->pr_itemsperpage) {
771 		/*
772 		 * This page is now full.  Move it to the full list
773 		 * and select a new current page.
774 		 */
775 		TAILQ_REMOVE(&pp->pr_partpages, ph, ph_entry);
776 		TAILQ_INSERT_TAIL(&pp->pr_fullpages, ph, ph_entry);
777 		pool_update_curpage(pp);
778 	}
779 
780 	pp->pr_nget++;
781 
782 	return (pi);
783 }
784 
785 /*
786  * Return resource to the pool.
787  */
788 void
789 pool_put(struct pool *pp, void *v)
790 {
791 
792 #ifdef DIAGNOSTIC
793 	if (v == NULL)
794 		panic("%s: NULL item", __func__);
795 #endif
796 
797 #ifdef MULTIPROCESSOR
798 	if (pp->pr_cache != NULL && TAILQ_EMPTY(&pp->pr_requests)) {
799 		pool_cache_put(pp, v);
800 		return;
801 	}
802 #endif
803 
804 	pl_enter(pp, &pp->pr_lock);
805 
806 	pool_do_put(pp, v);
807 
808 	pp->pr_nout--;
809 	pp->pr_nput++;
810 
811 	pl_leave(pp, &pp->pr_lock);
812 
813 	if (!TAILQ_EMPTY(&pp->pr_requests)) {
814 		pl_enter(pp, &pp->pr_requests_lock);
815 		pool_runqueue(pp, PR_NOWAIT);
816 		pl_leave(pp, &pp->pr_requests_lock);
817 	}
818 }
819 
820 void
821 pool_do_put(struct pool *pp, void *v)
822 {
823 	struct pool_item *pi = v;
824 	struct pool_page_header *ph;
825 
826 	splassert(pp->pr_ipl);
827 
828 	ph = pr_find_pagehead(pp, v);
829 
830 #ifdef DIAGNOSTIC
831 	if (pool_debug) {
832 		struct pool_item *qi;
833 		XSIMPLEQ_FOREACH(qi, &ph->ph_items, pi_list) {
834 			if (pi == qi) {
835 				panic("%s: %s: double pool_put: %p", __func__,
836 				    pp->pr_wchan, pi);
837 			}
838 		}
839 	}
840 #endif /* DIAGNOSTIC */
841 
842 	pi->pi_magic = POOL_IMAGIC(ph, pi);
843 	XSIMPLEQ_INSERT_HEAD(&ph->ph_items, pi, pi_list);
844 #ifdef DIAGNOSTIC
845 	if (POOL_PHPOISON(ph))
846 		poison_mem(pi + 1, pp->pr_size - sizeof(*pi));
847 #endif /* DIAGNOSTIC */
848 
849 	if (ph->ph_nmissing-- == pp->pr_itemsperpage) {
850 		/*
851 		 * The page was previously completely full, move it to the
852 		 * partially-full list.
853 		 */
854 		TAILQ_REMOVE(&pp->pr_fullpages, ph, ph_entry);
855 		TAILQ_INSERT_TAIL(&pp->pr_partpages, ph, ph_entry);
856 	}
857 
858 	if (ph->ph_nmissing == 0) {
859 		/*
860 		 * The page is now empty, so move it to the empty page list.
861 		 */
862 		pp->pr_nidle++;
863 
864 		ph->ph_tick = ticks;
865 		TAILQ_REMOVE(&pp->pr_partpages, ph, ph_entry);
866 		TAILQ_INSERT_TAIL(&pp->pr_emptypages, ph, ph_entry);
867 		pool_update_curpage(pp);
868 	}
869 }
870 
871 /*
872  * Add N items to the pool.
873  */
874 int
875 pool_prime(struct pool *pp, int n)
876 {
877 	struct pool_pagelist pl = TAILQ_HEAD_INITIALIZER(pl);
878 	struct pool_page_header *ph;
879 	int newpages;
880 
881 	newpages = roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage;
882 
883 	while (newpages-- > 0) {
884 		int slowdown = 0;
885 
886 		ph = pool_p_alloc(pp, PR_NOWAIT, &slowdown);
887 		if (ph == NULL) /* or slowdown? */
888 			break;
889 
890 		TAILQ_INSERT_TAIL(&pl, ph, ph_entry);
891 	}
892 
893 	pl_enter(pp, &pp->pr_lock);
894 	while ((ph = TAILQ_FIRST(&pl)) != NULL) {
895 		TAILQ_REMOVE(&pl, ph, ph_entry);
896 		pool_p_insert(pp, ph);
897 	}
898 	pl_leave(pp, &pp->pr_lock);
899 
900 	return (0);
901 }
902 
903 struct pool_page_header *
904 pool_p_alloc(struct pool *pp, int flags, int *slowdown)
905 {
906 	struct pool_page_header *ph;
907 	struct pool_item *pi;
908 	caddr_t addr;
909 	unsigned int order;
910 	int o;
911 	int n;
912 
913 	pl_assert_unlocked(pp, &pp->pr_lock);
914 	KASSERT(pp->pr_size >= sizeof(*pi));
915 
916 	addr = pool_allocator_alloc(pp, flags, slowdown);
917 	if (addr == NULL)
918 		return (NULL);
919 
920 	if (POOL_INPGHDR(pp))
921 		ph = (struct pool_page_header *)(addr + pp->pr_phoffset);
922 	else {
923 		ph = pool_get(&phpool, flags);
924 		if (ph == NULL) {
925 			pool_allocator_free(pp, flags, addr);
926 			return (NULL);
927 		}
928 	}
929 	ph->ph_flags = flags;
930 
931 	XSIMPLEQ_INIT(&ph->ph_items);
932 	ph->ph_page = addr;
933 	addr += pp->pr_align * (pp->pr_npagealloc % pp->pr_maxcolors);
934 	ph->ph_colored = addr;
935 	ph->ph_nmissing = 0;
936 	arc4random_buf(&ph->ph_magic, sizeof(ph->ph_magic));
937 #ifdef DIAGNOSTIC
938 	/* use a bit in ph_magic to record if we poison page items */
939 	if (pool_debug)
940 		SET(ph->ph_magic, POOL_MAGICBIT);
941 	else
942 		CLR(ph->ph_magic, POOL_MAGICBIT);
943 #endif /* DIAGNOSTIC */
944 
945 	n = pp->pr_itemsperpage;
946 	o = 32;
947 	while (n--) {
948 		pi = (struct pool_item *)addr;
949 		pi->pi_magic = POOL_IMAGIC(ph, pi);
950 
951 		if (o == 32) {
952 			order = arc4random();
953 			o = 0;
954 		}
955 		if (ISSET(order, 1 << o++))
956 			XSIMPLEQ_INSERT_TAIL(&ph->ph_items, pi, pi_list);
957 		else
958 			XSIMPLEQ_INSERT_HEAD(&ph->ph_items, pi, pi_list);
959 
960 #ifdef DIAGNOSTIC
961 		if (POOL_PHPOISON(ph))
962 			poison_mem(pi + 1, pp->pr_size - sizeof(*pi));
963 #endif /* DIAGNOSTIC */
964 
965 		addr += pp->pr_size;
966 	}
967 
968 	return (ph);
969 }
970 
971 void
972 pool_p_free(struct pool *pp, struct pool_page_header *ph)
973 {
974 	struct pool_item *pi;
975 
976 	pl_assert_unlocked(pp, &pp->pr_lock);
977 	KASSERT(ph->ph_nmissing == 0);
978 
979 	XSIMPLEQ_FOREACH(pi, &ph->ph_items, pi_list) {
980 		if (__predict_false(pi->pi_magic != POOL_IMAGIC(ph, pi))) {
981 			panic("%s: %s free list modified: "
982 			    "page %p; item addr %p; offset 0x%x=0x%lx",
983 			    __func__, pp->pr_wchan, ph->ph_page, pi,
984 			    0, pi->pi_magic);
985 		}
986 
987 #ifdef DIAGNOSTIC
988 		if (POOL_PHPOISON(ph)) {
989 			size_t pidx;
990 			uint32_t pval;
991 			if (poison_check(pi + 1, pp->pr_size - sizeof(*pi),
992 			    &pidx, &pval)) {
993 				int *ip = (int *)(pi + 1);
994 				panic("%s: %s free list modified: "
995 				    "page %p; item addr %p; offset 0x%zx=0x%x",
996 				    __func__, pp->pr_wchan, ph->ph_page, pi,
997 				    pidx * sizeof(int), ip[pidx]);
998 			}
999 		}
1000 #endif
1001 	}
1002 
1003 	pool_allocator_free(pp, ph->ph_flags, ph->ph_page);
1004 
1005 	if (!POOL_INPGHDR(pp))
1006 		pool_put(&phpool, ph);
1007 }
1008 
1009 void
1010 pool_p_insert(struct pool *pp, struct pool_page_header *ph)
1011 {
1012 	pl_assert_locked(pp, &pp->pr_lock);
1013 
1014 	/* If the pool was depleted, point at the new page */
1015 	if (pp->pr_curpage == NULL)
1016 		pp->pr_curpage = ph;
1017 
1018 	TAILQ_INSERT_TAIL(&pp->pr_emptypages, ph, ph_entry);
1019 	if (!POOL_INPGHDR(pp))
1020 		RBT_INSERT(phtree, &pp->pr_phtree, ph);
1021 
1022 	pp->pr_nitems += pp->pr_itemsperpage;
1023 	pp->pr_nidle++;
1024 
1025 	pp->pr_npagealloc++;
1026 	if (++pp->pr_npages > pp->pr_hiwat)
1027 		pp->pr_hiwat = pp->pr_npages;
1028 }
1029 
1030 void
1031 pool_p_remove(struct pool *pp, struct pool_page_header *ph)
1032 {
1033 	pl_assert_locked(pp, &pp->pr_lock);
1034 
1035 	pp->pr_npagefree++;
1036 	pp->pr_npages--;
1037 	pp->pr_nidle--;
1038 	pp->pr_nitems -= pp->pr_itemsperpage;
1039 
1040 	if (!POOL_INPGHDR(pp))
1041 		RBT_REMOVE(phtree, &pp->pr_phtree, ph);
1042 	TAILQ_REMOVE(&pp->pr_emptypages, ph, ph_entry);
1043 
1044 	pool_update_curpage(pp);
1045 }
1046 
1047 void
1048 pool_update_curpage(struct pool *pp)
1049 {
1050 	pp->pr_curpage = TAILQ_LAST(&pp->pr_partpages, pool_pagelist);
1051 	if (pp->pr_curpage == NULL) {
1052 		pp->pr_curpage = TAILQ_LAST(&pp->pr_emptypages, pool_pagelist);
1053 	}
1054 }
1055 
1056 void
1057 pool_setlowat(struct pool *pp, int n)
1058 {
1059 	int prime = 0;
1060 
1061 	pl_enter(pp, &pp->pr_lock);
1062 	pp->pr_minitems = n;
1063 	pp->pr_minpages = (n == 0)
1064 		? 0
1065 		: roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage;
1066 
1067 	if (pp->pr_nitems < n)
1068 		prime = n - pp->pr_nitems;
1069 	pl_leave(pp, &pp->pr_lock);
1070 
1071 	if (prime > 0)
1072 		pool_prime(pp, prime);
1073 }
1074 
1075 void
1076 pool_sethiwat(struct pool *pp, int n)
1077 {
1078 	pp->pr_maxpages = (n == 0)
1079 		? 0
1080 		: roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage;
1081 }
1082 
1083 int
1084 pool_sethardlimit(struct pool *pp, u_int n, const char *warnmsg, int ratecap)
1085 {
1086 	int error = 0;
1087 
1088 	if (n < pp->pr_nout) {
1089 		error = EINVAL;
1090 		goto done;
1091 	}
1092 
1093 	pp->pr_hardlimit = n;
1094 	pp->pr_hardlimit_warning = warnmsg;
1095 	pp->pr_hardlimit_ratecap.tv_sec = ratecap;
1096 	pp->pr_hardlimit_warning_last.tv_sec = 0;
1097 	pp->pr_hardlimit_warning_last.tv_usec = 0;
1098 
1099 done:
1100 	return (error);
1101 }
1102 
1103 void
1104 pool_set_constraints(struct pool *pp, const struct kmem_pa_mode *mode)
1105 {
1106 	pp->pr_crange = mode;
1107 }
1108 
1109 /*
1110  * Release all complete pages that have not been used recently.
1111  *
1112  * Returns non-zero if any pages have been reclaimed.
1113  */
1114 int
1115 pool_reclaim(struct pool *pp)
1116 {
1117 	struct pool_page_header *ph, *phnext;
1118 	struct pool_pagelist pl = TAILQ_HEAD_INITIALIZER(pl);
1119 
1120 	pl_enter(pp, &pp->pr_lock);
1121 	for (ph = TAILQ_FIRST(&pp->pr_emptypages); ph != NULL; ph = phnext) {
1122 		phnext = TAILQ_NEXT(ph, ph_entry);
1123 
1124 		/* Check our minimum page claim */
1125 		if (pp->pr_npages <= pp->pr_minpages)
1126 			break;
1127 
1128 		/*
1129 		 * If freeing this page would put us below
1130 		 * the low water mark, stop now.
1131 		 */
1132 		if ((pp->pr_nitems - pp->pr_itemsperpage) <
1133 		    pp->pr_minitems)
1134 			break;
1135 
1136 		pool_p_remove(pp, ph);
1137 		TAILQ_INSERT_TAIL(&pl, ph, ph_entry);
1138 	}
1139 	pl_leave(pp, &pp->pr_lock);
1140 
1141 	if (TAILQ_EMPTY(&pl))
1142 		return (0);
1143 
1144 	while ((ph = TAILQ_FIRST(&pl)) != NULL) {
1145 		TAILQ_REMOVE(&pl, ph, ph_entry);
1146 		pool_p_free(pp, ph);
1147 	}
1148 
1149 	return (1);
1150 }
1151 
1152 /*
1153  * Release all complete pages that have not been used recently
1154  * from all pools.
1155  */
1156 void
1157 pool_reclaim_all(void)
1158 {
1159 	struct pool	*pp;
1160 
1161 	rw_enter_read(&pool_lock);
1162 	SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist)
1163 		pool_reclaim(pp);
1164 	rw_exit_read(&pool_lock);
1165 }
1166 
1167 #ifdef DDB
1168 #include <machine/db_machdep.h>
1169 #include <ddb/db_output.h>
1170 
1171 /*
1172  * Diagnostic helpers.
1173  */
1174 void
1175 pool_printit(struct pool *pp, const char *modif,
1176     int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2))))
1177 {
1178 	pool_print1(pp, modif, pr);
1179 }
1180 
1181 void
1182 pool_print_pagelist(struct pool_pagelist *pl,
1183     int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2))))
1184 {
1185 	struct pool_page_header *ph;
1186 	struct pool_item *pi;
1187 
1188 	TAILQ_FOREACH(ph, pl, ph_entry) {
1189 		(*pr)("\t\tpage %p, color %p, nmissing %d\n",
1190 		    ph->ph_page, ph->ph_colored, ph->ph_nmissing);
1191 		XSIMPLEQ_FOREACH(pi, &ph->ph_items, pi_list) {
1192 			if (pi->pi_magic != POOL_IMAGIC(ph, pi)) {
1193 				(*pr)("\t\t\titem %p, magic 0x%lx\n",
1194 				    pi, pi->pi_magic);
1195 			}
1196 		}
1197 	}
1198 }
1199 
1200 void
1201 pool_print1(struct pool *pp, const char *modif,
1202     int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2))))
1203 {
1204 	struct pool_page_header *ph;
1205 	int print_pagelist = 0;
1206 	char c;
1207 
1208 	while ((c = *modif++) != '\0') {
1209 		if (c == 'p')
1210 			print_pagelist = 1;
1211 		modif++;
1212 	}
1213 
1214 	(*pr)("POOL %s: size %u maxcolors %u\n", pp->pr_wchan, pp->pr_size,
1215 	    pp->pr_maxcolors);
1216 	(*pr)("\talloc %p\n", pp->pr_alloc);
1217 	(*pr)("\tminitems %u, minpages %u, maxpages %u, npages %u\n",
1218 	    pp->pr_minitems, pp->pr_minpages, pp->pr_maxpages, pp->pr_npages);
1219 	(*pr)("\titemsperpage %u, nitems %u, nout %u, hardlimit %u\n",
1220 	    pp->pr_itemsperpage, pp->pr_nitems, pp->pr_nout, pp->pr_hardlimit);
1221 
1222 	(*pr)("\n\tnget %lu, nfail %lu, nput %lu\n",
1223 	    pp->pr_nget, pp->pr_nfail, pp->pr_nput);
1224 	(*pr)("\tnpagealloc %lu, npagefree %lu, hiwat %u, nidle %lu\n",
1225 	    pp->pr_npagealloc, pp->pr_npagefree, pp->pr_hiwat, pp->pr_nidle);
1226 
1227 	if (print_pagelist == 0)
1228 		return;
1229 
1230 	if ((ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL)
1231 		(*pr)("\n\tempty page list:\n");
1232 	pool_print_pagelist(&pp->pr_emptypages, pr);
1233 	if ((ph = TAILQ_FIRST(&pp->pr_fullpages)) != NULL)
1234 		(*pr)("\n\tfull page list:\n");
1235 	pool_print_pagelist(&pp->pr_fullpages, pr);
1236 	if ((ph = TAILQ_FIRST(&pp->pr_partpages)) != NULL)
1237 		(*pr)("\n\tpartial-page list:\n");
1238 	pool_print_pagelist(&pp->pr_partpages, pr);
1239 
1240 	if (pp->pr_curpage == NULL)
1241 		(*pr)("\tno current page\n");
1242 	else
1243 		(*pr)("\tcurpage %p\n", pp->pr_curpage->ph_page);
1244 }
1245 
1246 void
1247 db_show_all_pools(db_expr_t expr, int haddr, db_expr_t count, char *modif)
1248 {
1249 	struct pool *pp;
1250 	char maxp[16];
1251 	int ovflw;
1252 	char mode;
1253 
1254 	mode = modif[0];
1255 	if (mode != '\0' && mode != 'a') {
1256 		db_printf("usage: show all pools [/a]\n");
1257 		return;
1258 	}
1259 
1260 	if (mode == '\0')
1261 		db_printf("%-10s%4s%9s%5s%9s%6s%6s%6s%6s%6s%6s%5s\n",
1262 		    "Name",
1263 		    "Size",
1264 		    "Requests",
1265 		    "Fail",
1266 		    "Releases",
1267 		    "Pgreq",
1268 		    "Pgrel",
1269 		    "Npage",
1270 		    "Hiwat",
1271 		    "Minpg",
1272 		    "Maxpg",
1273 		    "Idle");
1274 	else
1275 		db_printf("%-12s %18s %18s\n",
1276 		    "Name", "Address", "Allocator");
1277 
1278 	SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist) {
1279 		if (mode == 'a') {
1280 			db_printf("%-12s %18p %18p\n", pp->pr_wchan, pp,
1281 			    pp->pr_alloc);
1282 			continue;
1283 		}
1284 
1285 		if (!pp->pr_nget)
1286 			continue;
1287 
1288 		if (pp->pr_maxpages == UINT_MAX)
1289 			snprintf(maxp, sizeof maxp, "inf");
1290 		else
1291 			snprintf(maxp, sizeof maxp, "%u", pp->pr_maxpages);
1292 
1293 #define PRWORD(ovflw, fmt, width, fixed, val) do {	\
1294 	(ovflw) += db_printf((fmt),			\
1295 	    (width) - (fixed) - (ovflw) > 0 ?		\
1296 	    (width) - (fixed) - (ovflw) : 0,		\
1297 	    (val)) - (width);				\
1298 	if ((ovflw) < 0)				\
1299 		(ovflw) = 0;				\
1300 } while (/* CONSTCOND */0)
1301 
1302 		ovflw = 0;
1303 		PRWORD(ovflw, "%-*s", 10, 0, pp->pr_wchan);
1304 		PRWORD(ovflw, " %*u", 4, 1, pp->pr_size);
1305 		PRWORD(ovflw, " %*lu", 9, 1, pp->pr_nget);
1306 		PRWORD(ovflw, " %*lu", 5, 1, pp->pr_nfail);
1307 		PRWORD(ovflw, " %*lu", 9, 1, pp->pr_nput);
1308 		PRWORD(ovflw, " %*lu", 6, 1, pp->pr_npagealloc);
1309 		PRWORD(ovflw, " %*lu", 6, 1, pp->pr_npagefree);
1310 		PRWORD(ovflw, " %*d", 6, 1, pp->pr_npages);
1311 		PRWORD(ovflw, " %*d", 6, 1, pp->pr_hiwat);
1312 		PRWORD(ovflw, " %*d", 6, 1, pp->pr_minpages);
1313 		PRWORD(ovflw, " %*s", 6, 1, maxp);
1314 		PRWORD(ovflw, " %*lu\n", 5, 1, pp->pr_nidle);
1315 
1316 		pool_chk(pp);
1317 	}
1318 }
1319 #endif /* DDB */
1320 
1321 #if defined(POOL_DEBUG) || defined(DDB)
1322 int
1323 pool_chk_page(struct pool *pp, struct pool_page_header *ph, int expected)
1324 {
1325 	struct pool_item *pi;
1326 	caddr_t page;
1327 	int n;
1328 	const char *label = pp->pr_wchan;
1329 
1330 	page = (caddr_t)((u_long)ph & pp->pr_pgmask);
1331 	if (page != ph->ph_page && POOL_INPGHDR(pp)) {
1332 		printf("%s: ", label);
1333 		printf("pool(%p:%s): page inconsistency: page %p; "
1334 		    "at page head addr %p (p %p)\n",
1335 		    pp, pp->pr_wchan, ph->ph_page, ph, page);
1336 		return 1;
1337 	}
1338 
1339 	for (pi = XSIMPLEQ_FIRST(&ph->ph_items), n = 0;
1340 	     pi != NULL;
1341 	     pi = XSIMPLEQ_NEXT(&ph->ph_items, pi, pi_list), n++) {
1342 		if ((caddr_t)pi < ph->ph_page ||
1343 		    (caddr_t)pi >= ph->ph_page + pp->pr_pgsize) {
1344 			printf("%s: ", label);
1345 			printf("pool(%p:%s): page inconsistency: page %p;"
1346 			    " item ordinal %d; addr %p\n", pp,
1347 			    pp->pr_wchan, ph->ph_page, n, pi);
1348 			return (1);
1349 		}
1350 
1351 		if (pi->pi_magic != POOL_IMAGIC(ph, pi)) {
1352 			printf("%s: ", label);
1353 			printf("pool(%p:%s): free list modified: "
1354 			    "page %p; item ordinal %d; addr %p "
1355 			    "(p %p); offset 0x%x=0x%lx\n",
1356 			    pp, pp->pr_wchan, ph->ph_page, n, pi, page,
1357 			    0, pi->pi_magic);
1358 		}
1359 
1360 #ifdef DIAGNOSTIC
1361 		if (POOL_PHPOISON(ph)) {
1362 			size_t pidx;
1363 			uint32_t pval;
1364 			if (poison_check(pi + 1, pp->pr_size - sizeof(*pi),
1365 			    &pidx, &pval)) {
1366 				int *ip = (int *)(pi + 1);
1367 				printf("pool(%s): free list modified: "
1368 				    "page %p; item ordinal %d; addr %p "
1369 				    "(p %p); offset 0x%zx=0x%x\n",
1370 				    pp->pr_wchan, ph->ph_page, n, pi,
1371 				    page, pidx * sizeof(int), ip[pidx]);
1372 			}
1373 		}
1374 #endif /* DIAGNOSTIC */
1375 	}
1376 	if (n + ph->ph_nmissing != pp->pr_itemsperpage) {
1377 		printf("pool(%p:%s): page inconsistency: page %p;"
1378 		    " %d on list, %d missing, %d items per page\n", pp,
1379 		    pp->pr_wchan, ph->ph_page, n, ph->ph_nmissing,
1380 		    pp->pr_itemsperpage);
1381 		return 1;
1382 	}
1383 	if (expected >= 0 && n != expected) {
1384 		printf("pool(%p:%s): page inconsistency: page %p;"
1385 		    " %d on list, %d missing, %d expected\n", pp,
1386 		    pp->pr_wchan, ph->ph_page, n, ph->ph_nmissing,
1387 		    expected);
1388 		return 1;
1389 	}
1390 	return 0;
1391 }
1392 
1393 int
1394 pool_chk(struct pool *pp)
1395 {
1396 	struct pool_page_header *ph;
1397 	int r = 0;
1398 
1399 	TAILQ_FOREACH(ph, &pp->pr_emptypages, ph_entry)
1400 		r += pool_chk_page(pp, ph, pp->pr_itemsperpage);
1401 	TAILQ_FOREACH(ph, &pp->pr_fullpages, ph_entry)
1402 		r += pool_chk_page(pp, ph, 0);
1403 	TAILQ_FOREACH(ph, &pp->pr_partpages, ph_entry)
1404 		r += pool_chk_page(pp, ph, -1);
1405 
1406 	return (r);
1407 }
1408 #endif /* defined(POOL_DEBUG) || defined(DDB) */
1409 
1410 #ifdef DDB
1411 void
1412 pool_walk(struct pool *pp, int full,
1413     int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2))),
1414     void (*func)(void *, int, int (*)(const char *, ...)
1415 	    __attribute__((__format__(__kprintf__,1,2)))))
1416 {
1417 	struct pool_page_header *ph;
1418 	struct pool_item *pi;
1419 	caddr_t cp;
1420 	int n;
1421 
1422 	TAILQ_FOREACH(ph, &pp->pr_fullpages, ph_entry) {
1423 		cp = ph->ph_colored;
1424 		n = ph->ph_nmissing;
1425 
1426 		while (n--) {
1427 			func(cp, full, pr);
1428 			cp += pp->pr_size;
1429 		}
1430 	}
1431 
1432 	TAILQ_FOREACH(ph, &pp->pr_partpages, ph_entry) {
1433 		cp = ph->ph_colored;
1434 		n = ph->ph_nmissing;
1435 
1436 		do {
1437 			XSIMPLEQ_FOREACH(pi, &ph->ph_items, pi_list) {
1438 				if (cp == (caddr_t)pi)
1439 					break;
1440 			}
1441 			if (cp != (caddr_t)pi) {
1442 				func(cp, full, pr);
1443 				n--;
1444 			}
1445 
1446 			cp += pp->pr_size;
1447 		} while (n > 0);
1448 	}
1449 }
1450 #endif
1451 
1452 /*
1453  * We have three different sysctls.
1454  * kern.pool.npools - the number of pools.
1455  * kern.pool.pool.<pool#> - the pool struct for the pool#.
1456  * kern.pool.name.<pool#> - the name for pool#.
1457  */
1458 int
1459 sysctl_dopool(int *name, u_int namelen, char *oldp, size_t *oldlenp)
1460 {
1461 	struct kinfo_pool pi;
1462 	struct pool *pp;
1463 	int rv = ENOENT;
1464 
1465 	switch (name[0]) {
1466 	case KERN_POOL_NPOOLS:
1467 		if (namelen != 1)
1468 			return (ENOTDIR);
1469 		return (sysctl_rdint(oldp, oldlenp, NULL, pool_count));
1470 
1471 	case KERN_POOL_NAME:
1472 	case KERN_POOL_POOL:
1473 	case KERN_POOL_CACHE:
1474 	case KERN_POOL_CACHE_CPUS:
1475 		break;
1476 	default:
1477 		return (EOPNOTSUPP);
1478 	}
1479 
1480 	if (namelen != 2)
1481 		return (ENOTDIR);
1482 
1483 	rw_enter_read(&pool_lock);
1484 
1485 	SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist) {
1486 		if (name[1] == pp->pr_serial)
1487 			break;
1488 	}
1489 
1490 	if (pp == NULL)
1491 		goto done;
1492 
1493 	switch (name[0]) {
1494 	case KERN_POOL_NAME:
1495 		rv = sysctl_rdstring(oldp, oldlenp, NULL, pp->pr_wchan);
1496 		break;
1497 	case KERN_POOL_POOL:
1498 		memset(&pi, 0, sizeof(pi));
1499 
1500 		pl_enter(pp, &pp->pr_lock);
1501 		pi.pr_size = pp->pr_size;
1502 		pi.pr_pgsize = pp->pr_pgsize;
1503 		pi.pr_itemsperpage = pp->pr_itemsperpage;
1504 		pi.pr_npages = pp->pr_npages;
1505 		pi.pr_minpages = pp->pr_minpages;
1506 		pi.pr_maxpages = pp->pr_maxpages;
1507 		pi.pr_hardlimit = pp->pr_hardlimit;
1508 		pi.pr_nout = pp->pr_nout;
1509 		pi.pr_nitems = pp->pr_nitems;
1510 		pi.pr_nget = pp->pr_nget;
1511 		pi.pr_nput = pp->pr_nput;
1512 		pi.pr_nfail = pp->pr_nfail;
1513 		pi.pr_npagealloc = pp->pr_npagealloc;
1514 		pi.pr_npagefree = pp->pr_npagefree;
1515 		pi.pr_hiwat = pp->pr_hiwat;
1516 		pi.pr_nidle = pp->pr_nidle;
1517 		pl_leave(pp, &pp->pr_lock);
1518 
1519 		pool_cache_pool_info(pp, &pi);
1520 
1521 		rv = sysctl_rdstruct(oldp, oldlenp, NULL, &pi, sizeof(pi));
1522 		break;
1523 
1524 	case KERN_POOL_CACHE:
1525 		rv = pool_cache_info(pp, oldp, oldlenp);
1526 		break;
1527 
1528 	case KERN_POOL_CACHE_CPUS:
1529 		rv = pool_cache_cpus_info(pp, oldp, oldlenp);
1530 		break;
1531 	}
1532 
1533 done:
1534 	rw_exit_read(&pool_lock);
1535 
1536 	return (rv);
1537 }
1538 
1539 void
1540 pool_gc_sched(void *null)
1541 {
1542 	task_add(systqmp, &pool_gc_task);
1543 }
1544 
1545 void
1546 pool_gc_pages(void *null)
1547 {
1548 	struct pool *pp;
1549 	struct pool_page_header *ph, *freeph;
1550 	int s;
1551 
1552 	rw_enter_read(&pool_lock);
1553 	s = splvm(); /* XXX go to splvm until all pools _setipl properly */
1554 	SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist) {
1555 #ifdef MULTIPROCESSOR
1556 		if (pp->pr_cache != NULL)
1557 			pool_cache_gc(pp);
1558 #endif
1559 
1560 		if (pp->pr_nidle <= pp->pr_minpages || /* guess */
1561 		    !pl_enter_try(pp, &pp->pr_lock)) /* try */
1562 			continue;
1563 
1564 		/* is it time to free a page? */
1565 		if (pp->pr_nidle > pp->pr_minpages &&
1566 		    (ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL &&
1567 		    (ticks - ph->ph_tick) > (hz * pool_wait_gc)) {
1568 			freeph = ph;
1569 			pool_p_remove(pp, freeph);
1570 		} else
1571 			freeph = NULL;
1572 
1573 		pl_leave(pp, &pp->pr_lock);
1574 
1575 		if (freeph != NULL)
1576 			pool_p_free(pp, freeph);
1577 	}
1578 	splx(s);
1579 	rw_exit_read(&pool_lock);
1580 
1581 	timeout_add_sec(&pool_gc_tick, 1);
1582 }
1583 
1584 /*
1585  * Pool backend allocators.
1586  */
1587 
1588 void *
1589 pool_allocator_alloc(struct pool *pp, int flags, int *slowdown)
1590 {
1591 	void *v;
1592 
1593 	v = (*pp->pr_alloc->pa_alloc)(pp, flags, slowdown);
1594 
1595 #ifdef DIAGNOSTIC
1596 	if (v != NULL && POOL_INPGHDR(pp)) {
1597 		vaddr_t addr = (vaddr_t)v;
1598 		if ((addr & pp->pr_pgmask) != addr) {
1599 			panic("%s: %s page address %p isnt aligned to %u",
1600 			    __func__, pp->pr_wchan, v, pp->pr_pgsize);
1601 		}
1602 	}
1603 #endif
1604 
1605 	return (v);
1606 }
1607 
1608 void
1609 pool_allocator_free(struct pool *pp, int flags, void *v)
1610 {
1611 	struct pool_allocator *pa = pp->pr_alloc;
1612 
1613 	(*pa->pa_free)(pp, flags, v);
1614 }
1615 
1616 void *
1617 pool_page_alloc(struct pool *pp, int flags, int *slowdown)
1618 {
1619 	struct kmem_dyn_mode kd = KMEM_DYN_INITIALIZER;
1620 
1621 	kd.kd_waitok = ISSET(flags, PR_WAITOK);
1622 	kd.kd_slowdown = slowdown;
1623 
1624 	return (km_alloc(pp->pr_pgsize, &kv_page, pp->pr_crange, &kd));
1625 }
1626 
1627 void
1628 pool_page_free(struct pool *pp, int flags, void *v)
1629 {
1630 	km_free(v, pp->pr_pgsize, &kv_page, pp->pr_crange);
1631 }
1632 
1633 void *
1634 pool_multi_alloc(struct pool *pp, int flags, int *slowdown)
1635 {
1636 	struct kmem_va_mode kv = kv_intrsafe;
1637 	struct kmem_dyn_mode kd = KMEM_DYN_INITIALIZER;
1638 	void *v;
1639 	int s;
1640 
1641 	if (flags & PR_WAITOK)
1642 		return pool_multi_alloc_ni(pp, flags, slowdown);
1643 
1644 	if (POOL_INPGHDR(pp))
1645 		kv.kv_align = pp->pr_pgsize;
1646 
1647 	kd.kd_waitok = ISSET(flags, PR_WAITOK);
1648 	kd.kd_slowdown = slowdown;
1649 
1650 	s = splvm();
1651 	v = km_alloc(pp->pr_pgsize, &kv, pp->pr_crange, &kd);
1652 	splx(s);
1653 
1654 	return (v);
1655 }
1656 
1657 void
1658 pool_multi_free(struct pool *pp, int flags, void *v)
1659 {
1660 	struct kmem_va_mode kv = kv_intrsafe;
1661 	int s;
1662 
1663 	if (flags & PR_WAITOK) {
1664 		pool_multi_free_ni(pp, flags, v);
1665 		return;
1666 	}
1667 
1668 	if (POOL_INPGHDR(pp))
1669 		kv.kv_align = pp->pr_pgsize;
1670 
1671 	s = splvm();
1672 	km_free(v, pp->pr_pgsize, &kv, pp->pr_crange);
1673 	splx(s);
1674 }
1675 
1676 void *
1677 pool_multi_alloc_ni(struct pool *pp, int flags, int *slowdown)
1678 {
1679 	struct kmem_va_mode kv = kv_any;
1680 	struct kmem_dyn_mode kd = KMEM_DYN_INITIALIZER;
1681 	void *v;
1682 
1683 	if (POOL_INPGHDR(pp))
1684 		kv.kv_align = pp->pr_pgsize;
1685 
1686 	kd.kd_waitok = ISSET(flags, PR_WAITOK);
1687 	kd.kd_slowdown = slowdown;
1688 
1689 	KERNEL_LOCK();
1690 	v = km_alloc(pp->pr_pgsize, &kv, pp->pr_crange, &kd);
1691 	KERNEL_UNLOCK();
1692 
1693 	return (v);
1694 }
1695 
1696 void
1697 pool_multi_free_ni(struct pool *pp, int flags, void *v)
1698 {
1699 	struct kmem_va_mode kv = kv_any;
1700 
1701 	if (POOL_INPGHDR(pp))
1702 		kv.kv_align = pp->pr_pgsize;
1703 
1704 	KERNEL_LOCK();
1705 	km_free(v, pp->pr_pgsize, &kv, pp->pr_crange);
1706 	KERNEL_UNLOCK();
1707 }
1708 
1709 #ifdef MULTIPROCESSOR
1710 
1711 struct pool pool_caches; /* per cpu cache entries */
1712 
1713 void
1714 pool_cache_init(struct pool *pp)
1715 {
1716 	struct cpumem *cm;
1717 	struct pool_cache *pc;
1718 	struct cpumem_iter i;
1719 
1720 	if (pool_caches.pr_size == 0) {
1721 		pool_init(&pool_caches, sizeof(struct pool_cache),
1722 		    CACHELINESIZE, IPL_NONE, PR_WAITOK | PR_RWLOCK,
1723 		    "plcache", NULL);
1724 	}
1725 
1726 	/* must be able to use the pool items as cache list items */
1727 	KASSERT(pp->pr_size >= sizeof(struct pool_cache_item));
1728 
1729 	cm = cpumem_get(&pool_caches);
1730 
1731 	pl_init(pp, &pp->pr_cache_lock);
1732 	arc4random_buf(pp->pr_cache_magic, sizeof(pp->pr_cache_magic));
1733 	TAILQ_INIT(&pp->pr_cache_lists);
1734 	pp->pr_cache_nitems = 0;
1735 	pp->pr_cache_tick = ticks;
1736 	pp->pr_cache_items = 8;
1737 	pp->pr_cache_contention = 0;
1738 	pp->pr_cache_ngc = 0;
1739 
1740 	CPUMEM_FOREACH(pc, &i, cm) {
1741 		pc->pc_actv = NULL;
1742 		pc->pc_nactv = 0;
1743 		pc->pc_prev = NULL;
1744 
1745 		pc->pc_nget = 0;
1746 		pc->pc_nfail = 0;
1747 		pc->pc_nput = 0;
1748 		pc->pc_nlget = 0;
1749 		pc->pc_nlfail = 0;
1750 		pc->pc_nlput = 0;
1751 		pc->pc_nout = 0;
1752 	}
1753 
1754 	membar_producer();
1755 
1756 	pp->pr_cache = cm;
1757 }
1758 
1759 static inline void
1760 pool_cache_item_magic(struct pool *pp, struct pool_cache_item *ci)
1761 {
1762 	unsigned long *entry = (unsigned long *)&ci->ci_nextl;
1763 
1764 	entry[0] = pp->pr_cache_magic[0] ^ (u_long)ci;
1765 	entry[1] = pp->pr_cache_magic[1] ^ (u_long)ci->ci_next;
1766 }
1767 
1768 static inline void
1769 pool_cache_item_magic_check(struct pool *pp, struct pool_cache_item *ci)
1770 {
1771 	unsigned long *entry;
1772 	unsigned long val;
1773 
1774 	entry = (unsigned long *)&ci->ci_nextl;
1775 	val = pp->pr_cache_magic[0] ^ (u_long)ci;
1776 	if (*entry != val)
1777 		goto fail;
1778 
1779 	entry++;
1780 	val = pp->pr_cache_magic[1] ^ (u_long)ci->ci_next;
1781 	if (*entry != val)
1782 		goto fail;
1783 
1784 	return;
1785 
1786 fail:
1787 	panic("%s: %s cpu free list modified: item addr %p+%zu 0x%lx!=0x%lx",
1788 	    __func__, pp->pr_wchan, ci, (caddr_t)entry - (caddr_t)ci,
1789 	    *entry, val);
1790 }
1791 
1792 static inline void
1793 pool_list_enter(struct pool *pp)
1794 {
1795 	if (pl_enter_try(pp, &pp->pr_cache_lock) == 0) {
1796 		pl_enter(pp, &pp->pr_cache_lock);
1797 		pp->pr_cache_contention++;
1798 	}
1799 }
1800 
1801 static inline void
1802 pool_list_leave(struct pool *pp)
1803 {
1804 	pl_leave(pp, &pp->pr_cache_lock);
1805 }
1806 
1807 static inline struct pool_cache_item *
1808 pool_cache_list_alloc(struct pool *pp, struct pool_cache *pc)
1809 {
1810 	struct pool_cache_item *pl;
1811 
1812 	pool_list_enter(pp);
1813 	pl = TAILQ_FIRST(&pp->pr_cache_lists);
1814 	if (pl != NULL) {
1815 		TAILQ_REMOVE(&pp->pr_cache_lists, pl, ci_nextl);
1816 		pp->pr_cache_nitems -= POOL_CACHE_ITEM_NITEMS(pl);
1817 
1818 		pool_cache_item_magic(pp, pl);
1819 
1820 		pc->pc_nlget++;
1821 	} else
1822 		pc->pc_nlfail++;
1823 
1824 	/* fold this cpus nout into the global while we have the lock */
1825 	pp->pr_cache_nout += pc->pc_nout;
1826 	pc->pc_nout = 0;
1827 	pool_list_leave(pp);
1828 
1829 	return (pl);
1830 }
1831 
1832 static inline void
1833 pool_cache_list_free(struct pool *pp, struct pool_cache *pc,
1834     struct pool_cache_item *ci)
1835 {
1836 	pool_list_enter(pp);
1837 	if (TAILQ_EMPTY(&pp->pr_cache_lists))
1838 		pp->pr_cache_tick = ticks;
1839 
1840 	pp->pr_cache_nitems += POOL_CACHE_ITEM_NITEMS(ci);
1841 	TAILQ_INSERT_TAIL(&pp->pr_cache_lists, ci, ci_nextl);
1842 
1843 	pc->pc_nlput++;
1844 
1845 	/* fold this cpus nout into the global while we have the lock */
1846 	pp->pr_cache_nout += pc->pc_nout;
1847 	pc->pc_nout = 0;
1848 	pool_list_leave(pp);
1849 }
1850 
1851 static inline struct pool_cache *
1852 pool_cache_enter(struct pool *pp, int *s)
1853 {
1854 	struct pool_cache *pc;
1855 
1856 	pc = cpumem_enter(pp->pr_cache);
1857 	*s = splraise(pp->pr_ipl);
1858 	pc->pc_gen++;
1859 
1860 	return (pc);
1861 }
1862 
1863 static inline void
1864 pool_cache_leave(struct pool *pp, struct pool_cache *pc, int s)
1865 {
1866 	pc->pc_gen++;
1867 	splx(s);
1868 	cpumem_leave(pp->pr_cache, pc);
1869 }
1870 
1871 void *
1872 pool_cache_get(struct pool *pp)
1873 {
1874 	struct pool_cache *pc;
1875 	struct pool_cache_item *ci;
1876 	int s;
1877 
1878 	pc = pool_cache_enter(pp, &s);
1879 
1880 	if (pc->pc_actv != NULL) {
1881 		ci = pc->pc_actv;
1882 	} else if (pc->pc_prev != NULL) {
1883 		ci = pc->pc_prev;
1884 		pc->pc_prev = NULL;
1885 	} else if ((ci = pool_cache_list_alloc(pp, pc)) == NULL) {
1886 		pc->pc_nfail++;
1887 		goto done;
1888 	}
1889 
1890 	pool_cache_item_magic_check(pp, ci);
1891 #ifdef DIAGNOSTIC
1892 	if (pool_debug && POOL_CACHE_ITEM_POISONED(ci)) {
1893 		size_t pidx;
1894 		uint32_t pval;
1895 
1896 		if (poison_check(ci + 1, pp->pr_size - sizeof(*ci),
1897 		    &pidx, &pval)) {
1898 			int *ip = (int *)(ci + 1);
1899 			ip += pidx;
1900 
1901 			panic("%s: %s cpu free list modified: "
1902 			    "item addr %p+%zu 0x%x!=0x%x",
1903 			    __func__, pp->pr_wchan, ci,
1904 			    (caddr_t)ip - (caddr_t)ci, *ip, pval);
1905 		}
1906 	}
1907 #endif
1908 
1909 	pc->pc_actv = ci->ci_next;
1910 	pc->pc_nactv = POOL_CACHE_ITEM_NITEMS(ci) - 1;
1911 	pc->pc_nget++;
1912 	pc->pc_nout++;
1913 
1914 done:
1915 	pool_cache_leave(pp, pc, s);
1916 
1917 	return (ci);
1918 }
1919 
1920 void
1921 pool_cache_put(struct pool *pp, void *v)
1922 {
1923 	struct pool_cache *pc;
1924 	struct pool_cache_item *ci = v;
1925 	unsigned long nitems;
1926 	int s;
1927 #ifdef DIAGNOSTIC
1928 	int poison = pool_debug && pp->pr_size > sizeof(*ci);
1929 
1930 	if (poison)
1931 		poison_mem(ci + 1, pp->pr_size - sizeof(*ci));
1932 #endif
1933 
1934 	pc = pool_cache_enter(pp, &s);
1935 
1936 	nitems = pc->pc_nactv;
1937 	if (nitems >= pp->pr_cache_items) {
1938 		if (pc->pc_prev != NULL)
1939 			pool_cache_list_free(pp, pc, pc->pc_prev);
1940 
1941 		pc->pc_prev = pc->pc_actv;
1942 
1943 		pc->pc_actv = NULL;
1944 		pc->pc_nactv = 0;
1945 		nitems = 0;
1946 	}
1947 
1948 	ci->ci_next = pc->pc_actv;
1949 	ci->ci_nitems = ++nitems;
1950 #ifdef DIAGNOSTIC
1951 	ci->ci_nitems |= poison ? POOL_CACHE_ITEM_NITEMS_POISON : 0;
1952 #endif
1953 	pool_cache_item_magic(pp, ci);
1954 
1955 	pc->pc_actv = ci;
1956 	pc->pc_nactv = nitems;
1957 
1958 	pc->pc_nput++;
1959 	pc->pc_nout--;
1960 
1961 	pool_cache_leave(pp, pc, s);
1962 }
1963 
1964 struct pool_cache_item *
1965 pool_cache_list_put(struct pool *pp, struct pool_cache_item *pl)
1966 {
1967 	struct pool_cache_item *rpl, *next;
1968 
1969 	if (pl == NULL)
1970 		return (NULL);
1971 
1972 	rpl = TAILQ_NEXT(pl, ci_nextl);
1973 
1974 	pl_enter(pp, &pp->pr_lock);
1975 	do {
1976 		next = pl->ci_next;
1977 		pool_do_put(pp, pl);
1978 		pl = next;
1979 	} while (pl != NULL);
1980 	pl_leave(pp, &pp->pr_lock);
1981 
1982 	return (rpl);
1983 }
1984 
1985 void
1986 pool_cache_destroy(struct pool *pp)
1987 {
1988 	struct pool_cache *pc;
1989 	struct pool_cache_item *pl;
1990 	struct cpumem_iter i;
1991 	struct cpumem *cm;
1992 
1993 	rw_enter_write(&pool_lock); /* serialise with the gc */
1994 	cm = pp->pr_cache;
1995 	pp->pr_cache = NULL; /* make pool_put avoid the cache */
1996 	rw_exit_write(&pool_lock);
1997 
1998 	CPUMEM_FOREACH(pc, &i, cm) {
1999 		pool_cache_list_put(pp, pc->pc_actv);
2000 		pool_cache_list_put(pp, pc->pc_prev);
2001 	}
2002 
2003 	cpumem_put(&pool_caches, cm);
2004 
2005 	pl = TAILQ_FIRST(&pp->pr_cache_lists);
2006 	while (pl != NULL)
2007 		pl = pool_cache_list_put(pp, pl);
2008 }
2009 
2010 void
2011 pool_cache_gc(struct pool *pp)
2012 {
2013 	unsigned int contention, delta;
2014 
2015 	if ((ticks - pp->pr_cache_tick) > (hz * pool_wait_gc) &&
2016 	    !TAILQ_EMPTY(&pp->pr_cache_lists) &&
2017 	    pl_enter_try(pp, &pp->pr_cache_lock)) {
2018 		struct pool_cache_item *pl = NULL;
2019 
2020 		pl = TAILQ_FIRST(&pp->pr_cache_lists);
2021 		if (pl != NULL) {
2022 			TAILQ_REMOVE(&pp->pr_cache_lists, pl, ci_nextl);
2023 			pp->pr_cache_nitems -= POOL_CACHE_ITEM_NITEMS(pl);
2024 			pp->pr_cache_tick = ticks;
2025 
2026 			pp->pr_cache_ngc++;
2027 		}
2028 
2029 		pl_leave(pp, &pp->pr_cache_lock);
2030 
2031 		pool_cache_list_put(pp, pl);
2032 	}
2033 
2034 	/*
2035 	 * if there's a lot of contention on the pr_cache_mtx then consider
2036 	 * growing the length of the list to reduce the need to access the
2037 	 * global pool.
2038 	 */
2039 
2040 	contention = pp->pr_cache_contention;
2041 	delta = contention - pp->pr_cache_contention_prev;
2042 	if (delta > 8 /* magic */) {
2043 		if ((ncpusfound * 8 * 2) <= pp->pr_cache_nitems)
2044 			pp->pr_cache_items += 8;
2045 	} else if (delta == 0) {
2046 		if (pp->pr_cache_items > 8)
2047 			pp->pr_cache_items--;
2048 	}
2049 	pp->pr_cache_contention_prev = contention;
2050 }
2051 
2052 void
2053 pool_cache_pool_info(struct pool *pp, struct kinfo_pool *pi)
2054 {
2055 	struct pool_cache *pc;
2056 	struct cpumem_iter i;
2057 
2058 	if (pp->pr_cache == NULL)
2059 		return;
2060 
2061 	/* loop through the caches twice to collect stats */
2062 
2063 	/* once without the lock so we can yield while reading nget/nput */
2064 	CPUMEM_FOREACH(pc, &i, pp->pr_cache) {
2065 		uint64_t gen, nget, nput;
2066 
2067 		do {
2068 			while ((gen = pc->pc_gen) & 1)
2069 				yield();
2070 
2071 			nget = pc->pc_nget;
2072 			nput = pc->pc_nput;
2073 		} while (gen != pc->pc_gen);
2074 
2075 		pi->pr_nget += nget;
2076 		pi->pr_nput += nput;
2077 	}
2078 
2079 	/* and once with the mtx so we can get consistent nout values */
2080 	pl_enter(pp, &pp->pr_cache_lock);
2081 	CPUMEM_FOREACH(pc, &i, pp->pr_cache)
2082 		pi->pr_nout += pc->pc_nout;
2083 
2084 	pi->pr_nout += pp->pr_cache_nout;
2085 	pl_leave(pp, &pp->pr_cache_lock);
2086 }
2087 
2088 int
2089 pool_cache_info(struct pool *pp, void *oldp, size_t *oldlenp)
2090 {
2091 	struct kinfo_pool_cache kpc;
2092 
2093 	if (pp->pr_cache == NULL)
2094 		return (EOPNOTSUPP);
2095 
2096 	memset(&kpc, 0, sizeof(kpc)); /* don't leak padding */
2097 
2098 	pl_enter(pp, &pp->pr_cache_lock);
2099 	kpc.pr_ngc = pp->pr_cache_ngc;
2100 	kpc.pr_len = pp->pr_cache_items;
2101 	kpc.pr_nitems = pp->pr_cache_nitems;
2102 	kpc.pr_contention = pp->pr_cache_contention;
2103 	pl_leave(pp, &pp->pr_cache_lock);
2104 
2105 	return (sysctl_rdstruct(oldp, oldlenp, NULL, &kpc, sizeof(kpc)));
2106 }
2107 
2108 int
2109 pool_cache_cpus_info(struct pool *pp, void *oldp, size_t *oldlenp)
2110 {
2111 	struct pool_cache *pc;
2112 	struct kinfo_pool_cache_cpu *kpcc, *info;
2113 	unsigned int cpu = 0;
2114 	struct cpumem_iter i;
2115 	int error = 0;
2116 	size_t len;
2117 
2118 	if (pp->pr_cache == NULL)
2119 		return (EOPNOTSUPP);
2120 	if (*oldlenp % sizeof(*kpcc))
2121 		return (EINVAL);
2122 
2123 	kpcc = mallocarray(ncpusfound, sizeof(*kpcc), M_TEMP,
2124 	    M_WAITOK|M_CANFAIL|M_ZERO);
2125 	if (kpcc == NULL)
2126 		return (EIO);
2127 
2128 	len = ncpusfound * sizeof(*kpcc);
2129 
2130 	CPUMEM_FOREACH(pc, &i, pp->pr_cache) {
2131 		uint64_t gen;
2132 
2133 		if (cpu >= ncpusfound) {
2134 			error = EIO;
2135 			goto err;
2136 		}
2137 
2138 		info = &kpcc[cpu];
2139 		info->pr_cpu = cpu;
2140 
2141 		do {
2142 			while ((gen = pc->pc_gen) & 1)
2143 				yield();
2144 
2145 			info->pr_nget = pc->pc_nget;
2146 			info->pr_nfail = pc->pc_nfail;
2147 			info->pr_nput = pc->pc_nput;
2148 			info->pr_nlget = pc->pc_nlget;
2149 			info->pr_nlfail = pc->pc_nlfail;
2150 			info->pr_nlput = pc->pc_nlput;
2151 		} while (gen != pc->pc_gen);
2152 
2153 		cpu++;
2154 	}
2155 
2156 	error = sysctl_rdstruct(oldp, oldlenp, NULL, kpcc, len);
2157 err:
2158 	free(kpcc, M_TEMP, len);
2159 
2160 	return (error);
2161 }
2162 #else /* MULTIPROCESSOR */
2163 void
2164 pool_cache_init(struct pool *pp)
2165 {
2166 	/* nop */
2167 }
2168 
2169 void
2170 pool_cache_pool_info(struct pool *pp, struct kinfo_pool *pi)
2171 {
2172 	/* nop */
2173 }
2174 
2175 int
2176 pool_cache_info(struct pool *pp, void *oldp, size_t *oldlenp)
2177 {
2178 	return (EOPNOTSUPP);
2179 }
2180 
2181 int
2182 pool_cache_cpus_info(struct pool *pp, void *oldp, size_t *oldlenp)
2183 {
2184 	return (EOPNOTSUPP);
2185 }
2186 #endif /* MULTIPROCESSOR */
2187 
2188 
2189 void
2190 pool_lock_mtx_init(struct pool *pp, union pool_lock *lock,
2191     const struct lock_type *type)
2192 {
2193 	_mtx_init_flags(&lock->prl_mtx, pp->pr_ipl, pp->pr_wchan, 0, type);
2194 }
2195 
2196 void
2197 pool_lock_mtx_enter(union pool_lock *lock LOCK_FL_VARS)
2198 {
2199 	_mtx_enter(&lock->prl_mtx LOCK_FL_ARGS);
2200 }
2201 
2202 int
2203 pool_lock_mtx_enter_try(union pool_lock *lock LOCK_FL_VARS)
2204 {
2205 	return (_mtx_enter_try(&lock->prl_mtx LOCK_FL_ARGS));
2206 }
2207 
2208 void
2209 pool_lock_mtx_leave(union pool_lock *lock LOCK_FL_VARS)
2210 {
2211 	_mtx_leave(&lock->prl_mtx LOCK_FL_ARGS);
2212 }
2213 
2214 void
2215 pool_lock_mtx_assert_locked(union pool_lock *lock)
2216 {
2217 	MUTEX_ASSERT_LOCKED(&lock->prl_mtx);
2218 }
2219 
2220 void
2221 pool_lock_mtx_assert_unlocked(union pool_lock *lock)
2222 {
2223 	MUTEX_ASSERT_UNLOCKED(&lock->prl_mtx);
2224 }
2225 
2226 int
2227 pool_lock_mtx_sleep(void *ident, union pool_lock *lock, int priority,
2228     const char *wmesg, int timo)
2229 {
2230 	return msleep(ident, &lock->prl_mtx, priority, wmesg, timo);
2231 }
2232 
2233 static const struct pool_lock_ops pool_lock_ops_mtx = {
2234 	pool_lock_mtx_init,
2235 	pool_lock_mtx_enter,
2236 	pool_lock_mtx_enter_try,
2237 	pool_lock_mtx_leave,
2238 	pool_lock_mtx_assert_locked,
2239 	pool_lock_mtx_assert_unlocked,
2240 	pool_lock_mtx_sleep,
2241 };
2242 
2243 void
2244 pool_lock_rw_init(struct pool *pp, union pool_lock *lock,
2245     const struct lock_type *type)
2246 {
2247 	_rw_init_flags(&lock->prl_rwlock, pp->pr_wchan, 0, type);
2248 }
2249 
2250 void
2251 pool_lock_rw_enter(union pool_lock *lock LOCK_FL_VARS)
2252 {
2253 	_rw_enter_write(&lock->prl_rwlock LOCK_FL_ARGS);
2254 }
2255 
2256 int
2257 pool_lock_rw_enter_try(union pool_lock *lock LOCK_FL_VARS)
2258 {
2259 	return (_rw_enter(&lock->prl_rwlock, RW_WRITE | RW_NOSLEEP
2260 	    LOCK_FL_ARGS) == 0);
2261 }
2262 
2263 void
2264 pool_lock_rw_leave(union pool_lock *lock LOCK_FL_VARS)
2265 {
2266 	_rw_exit_write(&lock->prl_rwlock LOCK_FL_ARGS);
2267 }
2268 
2269 void
2270 pool_lock_rw_assert_locked(union pool_lock *lock)
2271 {
2272 	rw_assert_wrlock(&lock->prl_rwlock);
2273 }
2274 
2275 void
2276 pool_lock_rw_assert_unlocked(union pool_lock *lock)
2277 {
2278 	KASSERT(rw_status(&lock->prl_rwlock) != RW_WRITE);
2279 }
2280 
2281 int
2282 pool_lock_rw_sleep(void *ident, union pool_lock *lock, int priority,
2283     const char *wmesg, int timo)
2284 {
2285 	return rwsleep(ident, &lock->prl_rwlock, priority, wmesg, timo);
2286 }
2287 
2288 static const struct pool_lock_ops pool_lock_ops_rw = {
2289 	pool_lock_rw_init,
2290 	pool_lock_rw_enter,
2291 	pool_lock_rw_enter_try,
2292 	pool_lock_rw_leave,
2293 	pool_lock_rw_assert_locked,
2294 	pool_lock_rw_assert_unlocked,
2295 	pool_lock_rw_sleep,
2296 };
2297