xref: /openbsd-src/sys/kern/subr_pool.c (revision ac9b4aacc1da35008afea06a5d23c2f2dea9b93e)
1 /*	$OpenBSD: subr_pool.c,v 1.111 2011/11/23 02:05:17 dlg Exp $	*/
2 /*	$NetBSD: subr_pool.c,v 1.61 2001/09/26 07:14:56 chs Exp $	*/
3 
4 /*-
5  * Copyright (c) 1997, 1999, 2000 The NetBSD Foundation, Inc.
6  * All rights reserved.
7  *
8  * This code is derived from software contributed to The NetBSD Foundation
9  * by Paul Kranenburg; by Jason R. Thorpe of the Numerical Aerospace
10  * Simulation Facility, NASA Ames Research Center.
11  *
12  * Redistribution and use in source and binary forms, with or without
13  * modification, are permitted provided that the following conditions
14  * are met:
15  * 1. Redistributions of source code must retain the above copyright
16  *    notice, this list of conditions and the following disclaimer.
17  * 2. Redistributions in binary form must reproduce the above copyright
18  *    notice, this list of conditions and the following disclaimer in the
19  *    documentation and/or other materials provided with the distribution.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
23  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
24  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
25  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31  * POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/proc.h>
37 #include <sys/errno.h>
38 #include <sys/kernel.h>
39 #include <sys/malloc.h>
40 #include <sys/pool.h>
41 #include <sys/syslog.h>
42 #include <sys/sysctl.h>
43 
44 #include <uvm/uvm.h>
45 #include <dev/rndvar.h>
46 
47 /*
48  * Pool resource management utility.
49  *
50  * Memory is allocated in pages which are split into pieces according to
51  * the pool item size. Each page is kept on one of three lists in the
52  * pool structure: `pr_emptypages', `pr_fullpages' and `pr_partpages',
53  * for empty, full and partially-full pages respectively. The individual
54  * pool items are on a linked list headed by `ph_itemlist' in each page
55  * header. The memory for building the page list is either taken from
56  * the allocated pages themselves (for small pool items) or taken from
57  * an internal pool of page headers (`phpool').
58  */
59 
60 /* List of all pools */
61 TAILQ_HEAD(,pool) pool_head = TAILQ_HEAD_INITIALIZER(pool_head);
62 
63 /* Private pool for page header structures */
64 struct pool phpool;
65 
66 struct pool_item_header {
67 	/* Page headers */
68 	LIST_ENTRY(pool_item_header)
69 				ph_pagelist;	/* pool page list */
70 	TAILQ_HEAD(,pool_item)	ph_itemlist;	/* chunk list for this page */
71 	RB_ENTRY(pool_item_header)
72 				ph_node;	/* Off-page page headers */
73 	int			ph_nmissing;	/* # of chunks in use */
74 	caddr_t			ph_page;	/* this page's address */
75 	caddr_t			ph_colored;	/* page's colored address */
76 	int			ph_pagesize;
77 	int			ph_magic;
78 };
79 
80 struct pool_item {
81 #ifdef DIAGNOSTIC
82 	u_int32_t pi_magic;
83 #endif
84 	/* Other entries use only this list entry */
85 	TAILQ_ENTRY(pool_item)	pi_list;
86 };
87 
88 #ifdef DEADBEEF1
89 #define	PI_MAGIC DEADBEEF1
90 #else
91 #define	PI_MAGIC 0xdeafbeef
92 #endif
93 
94 #ifdef POOL_DEBUG
95 int	pool_debug = 1;
96 #else
97 int	pool_debug = 0;
98 #endif
99 
100 #define	POOL_NEEDS_CATCHUP(pp)						\
101 	((pp)->pr_nitems < (pp)->pr_minitems)
102 
103 /*
104  * Every pool gets a unique serial number assigned to it. If this counter
105  * wraps, we're screwed, but we shouldn't create so many pools anyway.
106  */
107 unsigned int pool_serial;
108 
109 int	 pool_catchup(struct pool *);
110 void	 pool_prime_page(struct pool *, caddr_t, struct pool_item_header *);
111 void	 pool_update_curpage(struct pool *);
112 void	*pool_do_get(struct pool *, int);
113 void	 pool_do_put(struct pool *, void *);
114 void	 pr_rmpage(struct pool *, struct pool_item_header *,
115 	    struct pool_pagelist *);
116 int	pool_chk_page(struct pool *, struct pool_item_header *, int);
117 struct pool_item_header *pool_alloc_item_header(struct pool *, caddr_t , int);
118 
119 void	*pool_allocator_alloc(struct pool *, int, int *);
120 void	 pool_allocator_free(struct pool *, void *);
121 
122 /*
123  * XXX - quick hack. For pools with large items we want to use a special
124  *       allocator. For now, instead of having the allocator figure out
125  *       the allocation size from the pool (which can be done trivially
126  *       with round_page(pr_itemsperpage * pr_size)) which would require
127  *	 lots of changes everywhere, we just create allocators for each
128  *	 size. We limit those to 128 pages.
129  */
130 #define POOL_LARGE_MAXPAGES 128
131 struct pool_allocator pool_allocator_large[POOL_LARGE_MAXPAGES];
132 struct pool_allocator pool_allocator_large_ni[POOL_LARGE_MAXPAGES];
133 void	*pool_large_alloc(struct pool *, int, int *);
134 void	pool_large_free(struct pool *, void *);
135 void	*pool_large_alloc_ni(struct pool *, int, int *);
136 void	pool_large_free_ni(struct pool *, void *);
137 
138 
139 #ifdef DDB
140 void	 pool_print_pagelist(struct pool_pagelist *,
141 	    int (*)(const char *, ...));
142 void	 pool_print1(struct pool *, const char *, int (*)(const char *, ...));
143 #endif
144 
145 #define pool_sleep(pl) msleep(pl, &pl->pr_mtx, PSWP, pl->pr_wchan, 0)
146 
147 static __inline int
148 phtree_compare(struct pool_item_header *a, struct pool_item_header *b)
149 {
150 	long diff = (vaddr_t)a->ph_page - (vaddr_t)b->ph_page;
151 	if (diff < 0)
152 		return -(-diff >= a->ph_pagesize);
153 	else if (diff > 0)
154 		return (diff >= b->ph_pagesize);
155 	else
156 		return (0);
157 }
158 
159 RB_PROTOTYPE(phtree, pool_item_header, ph_node, phtree_compare);
160 RB_GENERATE(phtree, pool_item_header, ph_node, phtree_compare);
161 
162 /*
163  * Return the pool page header based on page address.
164  */
165 static __inline struct pool_item_header *
166 pr_find_pagehead(struct pool *pp, void *v)
167 {
168 	struct pool_item_header *ph, tmp;
169 
170 	if ((pp->pr_roflags & PR_PHINPAGE) != 0) {
171 		caddr_t page;
172 
173 		page = (caddr_t)((vaddr_t)v & pp->pr_alloc->pa_pagemask);
174 
175 		return ((struct pool_item_header *)(page + pp->pr_phoffset));
176 	}
177 
178 	/*
179 	 * The trick we're using in the tree compare function is to compare
180 	 * two elements equal when they overlap. We want to return the
181 	 * page header that belongs to the element just before this address.
182 	 * We don't want this element to compare equal to the next element,
183 	 * so the compare function takes the pagesize from the lower element.
184 	 * If this header is the lower, its pagesize is zero, so it can't
185 	 * overlap with the next header. But if the header we're looking for
186 	 * is lower, we'll use its pagesize and it will overlap and return
187 	 * equal.
188 	 */
189 	tmp.ph_page = v;
190 	tmp.ph_pagesize = 0;
191 	ph = RB_FIND(phtree, &pp->pr_phtree, &tmp);
192 
193 	if (ph) {
194 		KASSERT(ph->ph_page <= (caddr_t)v);
195 		KASSERT(ph->ph_page + ph->ph_pagesize > (caddr_t)v);
196 	}
197 	return ph;
198 }
199 
200 /*
201  * Remove a page from the pool.
202  */
203 void
204 pr_rmpage(struct pool *pp, struct pool_item_header *ph,
205     struct pool_pagelist *pq)
206 {
207 
208 	/*
209 	 * If the page was idle, decrement the idle page count.
210 	 */
211 	if (ph->ph_nmissing == 0) {
212 #ifdef DIAGNOSTIC
213 		if (pp->pr_nidle == 0)
214 			panic("pr_rmpage: nidle inconsistent");
215 		if (pp->pr_nitems < pp->pr_itemsperpage)
216 			panic("pr_rmpage: nitems inconsistent");
217 #endif
218 		pp->pr_nidle--;
219 	}
220 
221 	pp->pr_nitems -= pp->pr_itemsperpage;
222 
223 	/*
224 	 * Unlink a page from the pool and release it (or queue it for release).
225 	 */
226 	LIST_REMOVE(ph, ph_pagelist);
227 	if ((pp->pr_roflags & PR_PHINPAGE) == 0)
228 		RB_REMOVE(phtree, &pp->pr_phtree, ph);
229 	pp->pr_npages--;
230 	pp->pr_npagefree++;
231 	pool_update_curpage(pp);
232 
233 	if (pq) {
234 		LIST_INSERT_HEAD(pq, ph, ph_pagelist);
235 	} else {
236 		pool_allocator_free(pp, ph->ph_page);
237 		if ((pp->pr_roflags & PR_PHINPAGE) == 0)
238 			pool_put(&phpool, ph);
239 	}
240 }
241 
242 /*
243  * Initialize the given pool resource structure.
244  *
245  * We export this routine to allow other kernel parts to declare
246  * static pools that must be initialized before malloc() is available.
247  */
248 void
249 pool_init(struct pool *pp, size_t size, u_int align, u_int ioff, int flags,
250     const char *wchan, struct pool_allocator *palloc)
251 {
252 	int off, slack;
253 
254 #ifdef MALLOC_DEBUG
255 	if ((flags & PR_DEBUG) && (ioff != 0 || align != 0))
256 		flags &= ~PR_DEBUG;
257 #endif
258 	/*
259 	 * Check arguments and construct default values.
260 	 */
261 	if (palloc == NULL) {
262 		if (size > PAGE_SIZE) {
263 			int psize;
264 
265 			/*
266 			 * XXX - should take align into account as well.
267 			 */
268 			if (size == round_page(size))
269 				psize = size / PAGE_SIZE;
270 			else
271 				psize = PAGE_SIZE / roundup(size % PAGE_SIZE,
272 				    1024);
273 			if (psize > POOL_LARGE_MAXPAGES)
274 				psize = POOL_LARGE_MAXPAGES;
275 			if (flags & PR_WAITOK)
276 				palloc = &pool_allocator_large_ni[psize-1];
277 			else
278 				palloc = &pool_allocator_large[psize-1];
279 			if (palloc->pa_pagesz == 0) {
280 				palloc->pa_pagesz = psize * PAGE_SIZE;
281 				if (flags & PR_WAITOK) {
282 					palloc->pa_alloc = pool_large_alloc_ni;
283 					palloc->pa_free = pool_large_free_ni;
284 				} else {
285 					palloc->pa_alloc = pool_large_alloc;
286 					palloc->pa_free = pool_large_free;
287 				}
288 			}
289 		} else {
290 			palloc = &pool_allocator_nointr;
291 		}
292 	}
293 	if (palloc->pa_pagesz == 0) {
294 		palloc->pa_pagesz = PAGE_SIZE;
295 	}
296 	if (palloc->pa_pagemask == 0) {
297 		palloc->pa_pagemask = ~(palloc->pa_pagesz - 1);
298 		palloc->pa_pageshift = ffs(palloc->pa_pagesz) - 1;
299 	}
300 
301 	if (align == 0)
302 		align = ALIGN(1);
303 
304 	if (size < sizeof(struct pool_item))
305 		size = sizeof(struct pool_item);
306 
307 	size = roundup(size, align);
308 #ifdef DIAGNOSTIC
309 	if (size > palloc->pa_pagesz)
310 		panic("pool_init: pool item size (%lu) too large",
311 		    (u_long)size);
312 #endif
313 
314 	/*
315 	 * Initialize the pool structure.
316 	 */
317 	LIST_INIT(&pp->pr_emptypages);
318 	LIST_INIT(&pp->pr_fullpages);
319 	LIST_INIT(&pp->pr_partpages);
320 	pp->pr_curpage = NULL;
321 	pp->pr_npages = 0;
322 	pp->pr_minitems = 0;
323 	pp->pr_minpages = 0;
324 	pp->pr_maxpages = 8;
325 	pp->pr_roflags = flags;
326 	pp->pr_flags = 0;
327 	pp->pr_size = size;
328 	pp->pr_align = align;
329 	pp->pr_wchan = wchan;
330 	pp->pr_alloc = palloc;
331 	pp->pr_nitems = 0;
332 	pp->pr_nout = 0;
333 	pp->pr_hardlimit = UINT_MAX;
334 	pp->pr_hardlimit_warning = NULL;
335 	pp->pr_hardlimit_ratecap.tv_sec = 0;
336 	pp->pr_hardlimit_ratecap.tv_usec = 0;
337 	pp->pr_hardlimit_warning_last.tv_sec = 0;
338 	pp->pr_hardlimit_warning_last.tv_usec = 0;
339 	pp->pr_serial = ++pool_serial;
340 	if (pool_serial == 0)
341 		panic("pool_init: too much uptime");
342 
343         /* constructor, destructor, and arg */
344 	pp->pr_ctor = NULL;
345 	pp->pr_dtor = NULL;
346 	pp->pr_arg = NULL;
347 
348 	/*
349 	 * Decide whether to put the page header off page to avoid
350 	 * wasting too large a part of the page. Off-page page headers
351 	 * go into an RB tree, so we can match a returned item with
352 	 * its header based on the page address.
353 	 * We use 1/16 of the page size as the threshold (XXX: tune)
354 	 */
355 	if (pp->pr_size < palloc->pa_pagesz/16 && pp->pr_size < PAGE_SIZE) {
356 		/* Use the end of the page for the page header */
357 		pp->pr_roflags |= PR_PHINPAGE;
358 		pp->pr_phoffset = off = palloc->pa_pagesz -
359 		    ALIGN(sizeof(struct pool_item_header));
360 	} else {
361 		/* The page header will be taken from our page header pool */
362 		pp->pr_phoffset = 0;
363 		off = palloc->pa_pagesz;
364 		RB_INIT(&pp->pr_phtree);
365 	}
366 
367 	/*
368 	 * Alignment is to take place at `ioff' within the item. This means
369 	 * we must reserve up to `align - 1' bytes on the page to allow
370 	 * appropriate positioning of each item.
371 	 *
372 	 * Silently enforce `0 <= ioff < align'.
373 	 */
374 	pp->pr_itemoffset = ioff = ioff % align;
375 	pp->pr_itemsperpage = (off - ((align - ioff) % align)) / pp->pr_size;
376 	KASSERT(pp->pr_itemsperpage != 0);
377 
378 	/*
379 	 * Use the slack between the chunks and the page header
380 	 * for "cache coloring".
381 	 */
382 	slack = off - pp->pr_itemsperpage * pp->pr_size;
383 	pp->pr_maxcolor = (slack / align) * align;
384 	pp->pr_curcolor = 0;
385 
386 	pp->pr_nget = 0;
387 	pp->pr_nfail = 0;
388 	pp->pr_nput = 0;
389 	pp->pr_npagealloc = 0;
390 	pp->pr_npagefree = 0;
391 	pp->pr_hiwat = 0;
392 	pp->pr_nidle = 0;
393 
394 	pp->pr_ipl = -1;
395 	mtx_init(&pp->pr_mtx, IPL_NONE);
396 
397 	if (phpool.pr_size == 0) {
398 		pool_init(&phpool, sizeof(struct pool_item_header), 0, 0,
399 		    0, "phpool", NULL);
400 		pool_setipl(&phpool, IPL_HIGH);
401 	}
402 
403 	/* pglistalloc/constraint parameters */
404 	pp->pr_crange = &kp_dirty;
405 
406 	/* Insert this into the list of all pools. */
407 	TAILQ_INSERT_HEAD(&pool_head, pp, pr_poollist);
408 }
409 
410 void
411 pool_setipl(struct pool *pp, int ipl)
412 {
413 	pp->pr_ipl = ipl;
414 	mtx_init(&pp->pr_mtx, ipl);
415 }
416 
417 /*
418  * Decommission a pool resource.
419  */
420 void
421 pool_destroy(struct pool *pp)
422 {
423 	struct pool_item_header *ph;
424 
425 #ifdef DIAGNOSTIC
426 	if (pp->pr_nout != 0)
427 		panic("pool_destroy: pool busy: still out: %u", pp->pr_nout);
428 #endif
429 
430 	/* Remove all pages */
431 	while ((ph = LIST_FIRST(&pp->pr_emptypages)) != NULL)
432 		pr_rmpage(pp, ph, NULL);
433 	KASSERT(LIST_EMPTY(&pp->pr_fullpages));
434 	KASSERT(LIST_EMPTY(&pp->pr_partpages));
435 
436 	/* Remove from global pool list */
437 	TAILQ_REMOVE(&pool_head, pp, pr_poollist);
438 }
439 
440 struct pool_item_header *
441 pool_alloc_item_header(struct pool *pp, caddr_t storage, int flags)
442 {
443 	struct pool_item_header *ph;
444 
445 	if ((pp->pr_roflags & PR_PHINPAGE) != 0)
446 		ph = (struct pool_item_header *)(storage + pp->pr_phoffset);
447 	else
448 		ph = pool_get(&phpool, (flags & ~(PR_WAITOK | PR_ZERO)) |
449 		    PR_NOWAIT);
450 	if (pool_debug && ph != NULL)
451 		ph->ph_magic = PI_MAGIC;
452 	return (ph);
453 }
454 
455 /*
456  * Grab an item from the pool; must be called at appropriate spl level
457  */
458 void *
459 pool_get(struct pool *pp, int flags)
460 {
461 	void *v;
462 
463 	KASSERT(flags & (PR_WAITOK | PR_NOWAIT));
464 
465 #ifdef DIAGNOSTIC
466 	if ((flags & PR_WAITOK) != 0)
467 		assertwaitok();
468 #endif /* DIAGNOSTIC */
469 
470 	mtx_enter(&pp->pr_mtx);
471 #ifdef POOL_DEBUG
472 	if (pp->pr_roflags & PR_DEBUGCHK) {
473 		if (pool_chk(pp))
474 			panic("before pool_get");
475 	}
476 #endif
477 	v = pool_do_get(pp, flags);
478 #ifdef POOL_DEBUG
479 	if (pp->pr_roflags & PR_DEBUGCHK) {
480 		if (pool_chk(pp))
481 			panic("after pool_get");
482 	}
483 #endif
484 	if (v != NULL)
485 		pp->pr_nget++;
486 	mtx_leave(&pp->pr_mtx);
487 	if (v == NULL)
488 		return (v);
489 
490 	if (pp->pr_ctor) {
491 		if (flags & PR_ZERO)
492 			panic("pool_get: PR_ZERO when ctor set");
493 		if (pp->pr_ctor(pp->pr_arg, v, flags)) {
494 			mtx_enter(&pp->pr_mtx);
495 			pp->pr_nget--;
496 			pool_do_put(pp, v);
497 			mtx_leave(&pp->pr_mtx);
498 			v = NULL;
499 		}
500 	} else {
501 		if (flags & PR_ZERO)
502 			memset(v, 0, pp->pr_size);
503 	}
504 	return (v);
505 }
506 
507 void *
508 pool_do_get(struct pool *pp, int flags)
509 {
510 	struct pool_item *pi;
511 	struct pool_item_header *ph;
512 	void *v;
513 	int slowdown = 0;
514 #if defined(DIAGNOSTIC) && defined(POOL_DEBUG)
515 	int i, *ip;
516 #endif
517 
518 #ifdef MALLOC_DEBUG
519 	if (pp->pr_roflags & PR_DEBUG) {
520 		void *addr;
521 
522 		addr = NULL;
523 		debug_malloc(pp->pr_size, M_DEBUG,
524 		    (flags & PR_WAITOK) ? M_WAITOK : M_NOWAIT, &addr);
525 		return (addr);
526 	}
527 #endif
528 
529 startover:
530 	/*
531 	 * Check to see if we've reached the hard limit.  If we have,
532 	 * and we can wait, then wait until an item has been returned to
533 	 * the pool.
534 	 */
535 #ifdef DIAGNOSTIC
536 	if (pp->pr_nout > pp->pr_hardlimit)
537 		panic("pool_do_get: %s: crossed hard limit", pp->pr_wchan);
538 #endif
539 	if (pp->pr_nout == pp->pr_hardlimit) {
540 		if ((flags & PR_WAITOK) && !(flags & PR_LIMITFAIL)) {
541 			/*
542 			 * XXX: A warning isn't logged in this case.  Should
543 			 * it be?
544 			 */
545 			pp->pr_flags |= PR_WANTED;
546 			pool_sleep(pp);
547 			goto startover;
548 		}
549 
550 		/*
551 		 * Log a message that the hard limit has been hit.
552 		 */
553 		if (pp->pr_hardlimit_warning != NULL &&
554 		    ratecheck(&pp->pr_hardlimit_warning_last,
555 		    &pp->pr_hardlimit_ratecap))
556 			log(LOG_ERR, "%s\n", pp->pr_hardlimit_warning);
557 
558 		pp->pr_nfail++;
559 		return (NULL);
560 	}
561 
562 	/*
563 	 * The convention we use is that if `curpage' is not NULL, then
564 	 * it points at a non-empty bucket. In particular, `curpage'
565 	 * never points at a page header which has PR_PHINPAGE set and
566 	 * has no items in its bucket.
567 	 */
568 	if ((ph = pp->pr_curpage) == NULL) {
569 #ifdef DIAGNOSTIC
570 		if (pp->pr_nitems != 0) {
571 			printf("pool_do_get: %s: curpage NULL, nitems %u\n",
572 			    pp->pr_wchan, pp->pr_nitems);
573 			panic("pool_do_get: nitems inconsistent");
574 		}
575 #endif
576 
577 		/*
578 		 * Call the back-end page allocator for more memory.
579 		 */
580 		v = pool_allocator_alloc(pp, flags, &slowdown);
581 		if (v != NULL)
582 			ph = pool_alloc_item_header(pp, v, flags);
583 
584 		if (v == NULL || ph == NULL) {
585 			if (v != NULL)
586 				pool_allocator_free(pp, v);
587 
588 			if ((flags & PR_WAITOK) == 0) {
589 				pp->pr_nfail++;
590 				return (NULL);
591 			}
592 
593 			/*
594 			 * Wait for items to be returned to this pool.
595 			 *
596 			 * XXX: maybe we should wake up once a second and
597 			 * try again?
598 			 */
599 			pp->pr_flags |= PR_WANTED;
600 			pool_sleep(pp);
601 			goto startover;
602 		}
603 
604 		/* We have more memory; add it to the pool */
605 		pool_prime_page(pp, v, ph);
606 		pp->pr_npagealloc++;
607 
608 		if (slowdown && (flags & PR_WAITOK)) {
609 			mtx_leave(&pp->pr_mtx);
610 			yield();
611 			mtx_enter(&pp->pr_mtx);
612 		}
613 
614 		/* Start the allocation process over. */
615 		goto startover;
616 	}
617 	if ((v = pi = TAILQ_FIRST(&ph->ph_itemlist)) == NULL) {
618 		panic("pool_do_get: %s: page empty", pp->pr_wchan);
619 	}
620 #ifdef DIAGNOSTIC
621 	if (pp->pr_nitems == 0) {
622 		printf("pool_do_get: %s: items on itemlist, nitems %u\n",
623 		    pp->pr_wchan, pp->pr_nitems);
624 		panic("pool_do_get: nitems inconsistent");
625 	}
626 #endif
627 
628 #ifdef DIAGNOSTIC
629 	if (pi->pi_magic != PI_MAGIC)
630 		panic("pool_do_get(%s): free list modified: "
631 		    "page %p; item addr %p; offset 0x%x=0x%x",
632 		    pp->pr_wchan, ph->ph_page, pi, 0, pi->pi_magic);
633 #ifdef POOL_DEBUG
634 	if (pool_debug && ph->ph_magic) {
635 		for (ip = (int *)pi, i = sizeof(*pi) / sizeof(int);
636 		    i < pp->pr_size / sizeof(int); i++) {
637 			if (ip[i] != ph->ph_magic) {
638 				panic("pool_do_get(%s): free list modified: "
639 				    "page %p; item addr %p; offset 0x%x=0x%x",
640 				    pp->pr_wchan, ph->ph_page, pi,
641 				    i * sizeof(int), ip[i]);
642 			}
643 		}
644 	}
645 #endif /* POOL_DEBUG */
646 #endif /* DIAGNOSTIC */
647 
648 	/*
649 	 * Remove from item list.
650 	 */
651 	TAILQ_REMOVE(&ph->ph_itemlist, pi, pi_list);
652 	pp->pr_nitems--;
653 	pp->pr_nout++;
654 	if (ph->ph_nmissing == 0) {
655 #ifdef DIAGNOSTIC
656 		if (pp->pr_nidle == 0)
657 			panic("pool_do_get: nidle inconsistent");
658 #endif
659 		pp->pr_nidle--;
660 
661 		/*
662 		 * This page was previously empty.  Move it to the list of
663 		 * partially-full pages.  This page is already curpage.
664 		 */
665 		LIST_REMOVE(ph, ph_pagelist);
666 		LIST_INSERT_HEAD(&pp->pr_partpages, ph, ph_pagelist);
667 	}
668 	ph->ph_nmissing++;
669 	if (TAILQ_EMPTY(&ph->ph_itemlist)) {
670 #ifdef DIAGNOSTIC
671 		if (ph->ph_nmissing != pp->pr_itemsperpage) {
672 			panic("pool_do_get: %s: nmissing inconsistent",
673 			    pp->pr_wchan);
674 		}
675 #endif
676 		/*
677 		 * This page is now full.  Move it to the full list
678 		 * and select a new current page.
679 		 */
680 		LIST_REMOVE(ph, ph_pagelist);
681 		LIST_INSERT_HEAD(&pp->pr_fullpages, ph, ph_pagelist);
682 		pool_update_curpage(pp);
683 	}
684 
685 	/*
686 	 * If we have a low water mark and we are now below that low
687 	 * water mark, add more items to the pool.
688 	 */
689 	if (POOL_NEEDS_CATCHUP(pp) && pool_catchup(pp) != 0) {
690 		/*
691 		 * XXX: Should we log a warning?  Should we set up a timeout
692 		 * to try again in a second or so?  The latter could break
693 		 * a caller's assumptions about interrupt protection, etc.
694 		 */
695 	}
696 	return (v);
697 }
698 
699 /*
700  * Return resource to the pool; must be called at appropriate spl level
701  */
702 void
703 pool_put(struct pool *pp, void *v)
704 {
705 	if (pp->pr_dtor)
706 		pp->pr_dtor(pp->pr_arg, v);
707 	mtx_enter(&pp->pr_mtx);
708 #ifdef POOL_DEBUG
709 	if (pp->pr_roflags & PR_DEBUGCHK) {
710 		if (pool_chk(pp))
711 			panic("before pool_put");
712 	}
713 #endif
714 	pool_do_put(pp, v);
715 #ifdef POOL_DEBUG
716 	if (pp->pr_roflags & PR_DEBUGCHK) {
717 		if (pool_chk(pp))
718 			panic("after pool_put");
719 	}
720 #endif
721 	pp->pr_nput++;
722 	mtx_leave(&pp->pr_mtx);
723 }
724 
725 /*
726  * Internal version of pool_put().
727  */
728 void
729 pool_do_put(struct pool *pp, void *v)
730 {
731 	struct pool_item *pi = v;
732 	struct pool_item_header *ph;
733 #if defined(DIAGNOSTIC) && defined(POOL_DEBUG)
734 	int i, *ip;
735 #endif
736 
737 	if (v == NULL)
738 		panic("pool_put of NULL");
739 
740 #ifdef MALLOC_DEBUG
741 	if (pp->pr_roflags & PR_DEBUG) {
742 		debug_free(v, M_DEBUG);
743 		return;
744 	}
745 #endif
746 
747 #ifdef DIAGNOSTIC
748 	if (pp->pr_ipl != -1)
749 		splassert(pp->pr_ipl);
750 
751 	if (pp->pr_nout == 0) {
752 		printf("pool %s: putting with none out\n",
753 		    pp->pr_wchan);
754 		panic("pool_do_put");
755 	}
756 #endif
757 
758 	if ((ph = pr_find_pagehead(pp, v)) == NULL) {
759 		panic("pool_do_put: %s: page header missing", pp->pr_wchan);
760 	}
761 
762 	/*
763 	 * Return to item list.
764 	 */
765 #ifdef DIAGNOSTIC
766 	pi->pi_magic = PI_MAGIC;
767 #ifdef POOL_DEBUG
768 	if (ph->ph_magic) {
769 		for (ip = (int *)pi, i = sizeof(*pi)/sizeof(int);
770 		    i < pp->pr_size / sizeof(int); i++)
771 			ip[i] = ph->ph_magic;
772 	}
773 #endif /* POOL_DEBUG */
774 #endif /* DIAGNOSTIC */
775 
776 	TAILQ_INSERT_HEAD(&ph->ph_itemlist, pi, pi_list);
777 	ph->ph_nmissing--;
778 	pp->pr_nitems++;
779 	pp->pr_nout--;
780 
781 	/* Cancel "pool empty" condition if it exists */
782 	if (pp->pr_curpage == NULL)
783 		pp->pr_curpage = ph;
784 
785 	if (pp->pr_flags & PR_WANTED) {
786 		pp->pr_flags &= ~PR_WANTED;
787 		wakeup(pp);
788 	}
789 
790 	/*
791 	 * If this page is now empty, do one of two things:
792 	 *
793 	 *	(1) If we have more pages than the page high water mark,
794 	 *	    free the page back to the system.
795 	 *
796 	 *	(2) Otherwise, move the page to the empty page list.
797 	 *
798 	 * Either way, select a new current page (so we use a partially-full
799 	 * page if one is available).
800 	 */
801 	if (ph->ph_nmissing == 0) {
802 		pp->pr_nidle++;
803 		if (pp->pr_nidle > pp->pr_maxpages) {
804 			pr_rmpage(pp, ph, NULL);
805 		} else {
806 			LIST_REMOVE(ph, ph_pagelist);
807 			LIST_INSERT_HEAD(&pp->pr_emptypages, ph, ph_pagelist);
808 			pool_update_curpage(pp);
809 		}
810 	}
811 
812 	/*
813 	 * If the page was previously completely full, move it to the
814 	 * partially-full list and make it the current page.  The next
815 	 * allocation will get the item from this page, instead of
816 	 * further fragmenting the pool.
817 	 */
818 	else if (ph->ph_nmissing == (pp->pr_itemsperpage - 1)) {
819 		LIST_REMOVE(ph, ph_pagelist);
820 		LIST_INSERT_HEAD(&pp->pr_partpages, ph, ph_pagelist);
821 		pp->pr_curpage = ph;
822 	}
823 }
824 
825 /*
826  * Add N items to the pool.
827  */
828 int
829 pool_prime(struct pool *pp, int n)
830 {
831 	struct pool_item_header *ph;
832 	caddr_t cp;
833 	int newpages;
834 	int slowdown;
835 
836 	mtx_enter(&pp->pr_mtx);
837 	newpages = roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage;
838 
839 	while (newpages-- > 0) {
840 		cp = pool_allocator_alloc(pp, PR_NOWAIT, &slowdown);
841 		if (cp != NULL)
842 			ph = pool_alloc_item_header(pp, cp, PR_NOWAIT);
843 		if (cp == NULL || ph == NULL) {
844 			if (cp != NULL)
845 				pool_allocator_free(pp, cp);
846 			break;
847 		}
848 
849 		pool_prime_page(pp, cp, ph);
850 		pp->pr_npagealloc++;
851 		pp->pr_minpages++;
852 	}
853 
854 	if (pp->pr_minpages >= pp->pr_maxpages)
855 		pp->pr_maxpages = pp->pr_minpages + 1;	/* XXX */
856 
857 	mtx_leave(&pp->pr_mtx);
858 	return (0);
859 }
860 
861 /*
862  * Add a page worth of items to the pool.
863  *
864  * Note, we must be called with the pool descriptor LOCKED.
865  */
866 void
867 pool_prime_page(struct pool *pp, caddr_t storage, struct pool_item_header *ph)
868 {
869 	struct pool_item *pi;
870 	caddr_t cp = storage;
871 	unsigned int align = pp->pr_align;
872 	unsigned int ioff = pp->pr_itemoffset;
873 	int n;
874 #if defined(DIAGNOSTIC) && defined(POOL_DEBUG)
875 	int i, *ip;
876 #endif
877 
878 	/*
879 	 * Insert page header.
880 	 */
881 	LIST_INSERT_HEAD(&pp->pr_emptypages, ph, ph_pagelist);
882 	TAILQ_INIT(&ph->ph_itemlist);
883 	ph->ph_page = storage;
884 	ph->ph_pagesize = pp->pr_alloc->pa_pagesz;
885 	ph->ph_nmissing = 0;
886 	if ((pp->pr_roflags & PR_PHINPAGE) == 0)
887 		RB_INSERT(phtree, &pp->pr_phtree, ph);
888 
889 	pp->pr_nidle++;
890 
891 	/*
892 	 * Color this page.
893 	 */
894 	cp = (caddr_t)(cp + pp->pr_curcolor);
895 	if ((pp->pr_curcolor += align) > pp->pr_maxcolor)
896 		pp->pr_curcolor = 0;
897 
898 	/*
899 	 * Adjust storage to apply alignment to `pr_itemoffset' in each item.
900 	 */
901 	if (ioff != 0)
902 		cp = (caddr_t)(cp + (align - ioff));
903 	ph->ph_colored = cp;
904 
905 	/*
906 	 * Insert remaining chunks on the bucket list.
907 	 */
908 	n = pp->pr_itemsperpage;
909 	pp->pr_nitems += n;
910 
911 	while (n--) {
912 		pi = (struct pool_item *)cp;
913 
914 		KASSERT(((((vaddr_t)pi) + ioff) & (align - 1)) == 0);
915 
916 		/* Insert on page list */
917 		TAILQ_INSERT_TAIL(&ph->ph_itemlist, pi, pi_list);
918 
919 #ifdef DIAGNOSTIC
920 		pi->pi_magic = PI_MAGIC;
921 #ifdef POOL_DEBUG
922 		if (ph->ph_magic) {
923 			for (ip = (int *)pi, i = sizeof(*pi)/sizeof(int);
924 			    i < pp->pr_size / sizeof(int); i++)
925 				ip[i] = ph->ph_magic;
926 		}
927 #endif /* POOL_DEBUG */
928 #endif /* DIAGNOSTIC */
929 		cp = (caddr_t)(cp + pp->pr_size);
930 	}
931 
932 	/*
933 	 * If the pool was depleted, point at the new page.
934 	 */
935 	if (pp->pr_curpage == NULL)
936 		pp->pr_curpage = ph;
937 
938 	if (++pp->pr_npages > pp->pr_hiwat)
939 		pp->pr_hiwat = pp->pr_npages;
940 }
941 
942 /*
943  * Used by pool_get() when nitems drops below the low water mark.  This
944  * is used to catch up pr_nitems with the low water mark.
945  *
946  * Note we never wait for memory here, we let the caller decide what to do.
947  */
948 int
949 pool_catchup(struct pool *pp)
950 {
951 	struct pool_item_header *ph;
952 	caddr_t cp;
953 	int error = 0;
954 	int slowdown;
955 
956 	while (POOL_NEEDS_CATCHUP(pp)) {
957 		/*
958 		 * Call the page back-end allocator for more memory.
959 		 */
960 		cp = pool_allocator_alloc(pp, PR_NOWAIT, &slowdown);
961 		if (cp != NULL)
962 			ph = pool_alloc_item_header(pp, cp, PR_NOWAIT);
963 		if (cp == NULL || ph == NULL) {
964 			if (cp != NULL)
965 				pool_allocator_free(pp, cp);
966 			error = ENOMEM;
967 			break;
968 		}
969 		pool_prime_page(pp, cp, ph);
970 		pp->pr_npagealloc++;
971 	}
972 
973 	return (error);
974 }
975 
976 void
977 pool_update_curpage(struct pool *pp)
978 {
979 
980 	pp->pr_curpage = LIST_FIRST(&pp->pr_partpages);
981 	if (pp->pr_curpage == NULL) {
982 		pp->pr_curpage = LIST_FIRST(&pp->pr_emptypages);
983 	}
984 }
985 
986 void
987 pool_setlowat(struct pool *pp, int n)
988 {
989 
990 	pp->pr_minitems = n;
991 	pp->pr_minpages = (n == 0)
992 		? 0
993 		: roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage;
994 
995 	mtx_enter(&pp->pr_mtx);
996 	/* Make sure we're caught up with the newly-set low water mark. */
997 	if (POOL_NEEDS_CATCHUP(pp) && pool_catchup(pp) != 0) {
998 		/*
999 		 * XXX: Should we log a warning?  Should we set up a timeout
1000 		 * to try again in a second or so?  The latter could break
1001 		 * a caller's assumptions about interrupt protection, etc.
1002 		 */
1003 	}
1004 	mtx_leave(&pp->pr_mtx);
1005 }
1006 
1007 void
1008 pool_sethiwat(struct pool *pp, int n)
1009 {
1010 
1011 	pp->pr_maxpages = (n == 0)
1012 		? 0
1013 		: roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage;
1014 }
1015 
1016 int
1017 pool_sethardlimit(struct pool *pp, u_int n, const char *warnmsg, int ratecap)
1018 {
1019 	int error = 0;
1020 
1021 	if (n < pp->pr_nout) {
1022 		error = EINVAL;
1023 		goto done;
1024 	}
1025 
1026 	pp->pr_hardlimit = n;
1027 	pp->pr_hardlimit_warning = warnmsg;
1028 	pp->pr_hardlimit_ratecap.tv_sec = ratecap;
1029 	pp->pr_hardlimit_warning_last.tv_sec = 0;
1030 	pp->pr_hardlimit_warning_last.tv_usec = 0;
1031 
1032 done:
1033 	return (error);
1034 }
1035 
1036 void
1037 pool_set_constraints(struct pool *pp, const struct kmem_pa_mode *mode)
1038 {
1039 	pp->pr_crange = mode;
1040 }
1041 
1042 void
1043 pool_set_ctordtor(struct pool *pp, int (*ctor)(void *, void *, int),
1044     void (*dtor)(void *, void *), void *arg)
1045 {
1046 	pp->pr_ctor = ctor;
1047 	pp->pr_dtor = dtor;
1048 	pp->pr_arg = arg;
1049 }
1050 /*
1051  * Release all complete pages that have not been used recently.
1052  *
1053  * Returns non-zero if any pages have been reclaimed.
1054  */
1055 int
1056 pool_reclaim(struct pool *pp)
1057 {
1058 	struct pool_item_header *ph, *phnext;
1059 	struct pool_pagelist pq;
1060 
1061 	LIST_INIT(&pq);
1062 
1063 	mtx_enter(&pp->pr_mtx);
1064 	for (ph = LIST_FIRST(&pp->pr_emptypages); ph != NULL; ph = phnext) {
1065 		phnext = LIST_NEXT(ph, ph_pagelist);
1066 
1067 		/* Check our minimum page claim */
1068 		if (pp->pr_npages <= pp->pr_minpages)
1069 			break;
1070 
1071 		KASSERT(ph->ph_nmissing == 0);
1072 
1073 		/*
1074 		 * If freeing this page would put us below
1075 		 * the low water mark, stop now.
1076 		 */
1077 		if ((pp->pr_nitems - pp->pr_itemsperpage) <
1078 		    pp->pr_minitems)
1079 			break;
1080 
1081 		pr_rmpage(pp, ph, &pq);
1082 	}
1083 	mtx_leave(&pp->pr_mtx);
1084 
1085 	if (LIST_EMPTY(&pq))
1086 		return (0);
1087 	while ((ph = LIST_FIRST(&pq)) != NULL) {
1088 		LIST_REMOVE(ph, ph_pagelist);
1089 		pool_allocator_free(pp, ph->ph_page);
1090 		if (pp->pr_roflags & PR_PHINPAGE)
1091 			continue;
1092 		pool_put(&phpool, ph);
1093 	}
1094 
1095 	return (1);
1096 }
1097 
1098 /*
1099  * Release all complete pages that have not been used recently
1100  * from all pools.
1101  */
1102 void
1103 pool_reclaim_all(void)
1104 {
1105 	struct pool	*pp;
1106 	int		s;
1107 
1108 	s = splhigh();
1109 	TAILQ_FOREACH(pp, &pool_head, pr_poollist)
1110 		pool_reclaim(pp);
1111 	splx(s);
1112 }
1113 
1114 #ifdef DDB
1115 #include <machine/db_machdep.h>
1116 #include <ddb/db_interface.h>
1117 #include <ddb/db_output.h>
1118 
1119 /*
1120  * Diagnostic helpers.
1121  */
1122 void
1123 pool_printit(struct pool *pp, const char *modif, int (*pr)(const char *, ...))
1124 {
1125 	pool_print1(pp, modif, pr);
1126 }
1127 
1128 void
1129 pool_print_pagelist(struct pool_pagelist *pl, int (*pr)(const char *, ...))
1130 {
1131 	struct pool_item_header *ph;
1132 #ifdef DIAGNOSTIC
1133 	struct pool_item *pi;
1134 #endif
1135 
1136 	LIST_FOREACH(ph, pl, ph_pagelist) {
1137 		(*pr)("\t\tpage %p, nmissing %d\n",
1138 		    ph->ph_page, ph->ph_nmissing);
1139 #ifdef DIAGNOSTIC
1140 		TAILQ_FOREACH(pi, &ph->ph_itemlist, pi_list) {
1141 			if (pi->pi_magic != PI_MAGIC) {
1142 				(*pr)("\t\t\titem %p, magic 0x%x\n",
1143 				    pi, pi->pi_magic);
1144 			}
1145 		}
1146 #endif
1147 	}
1148 }
1149 
1150 void
1151 pool_print1(struct pool *pp, const char *modif, int (*pr)(const char *, ...))
1152 {
1153 	struct pool_item_header *ph;
1154 	int print_pagelist = 0;
1155 	char c;
1156 
1157 	while ((c = *modif++) != '\0') {
1158 		if (c == 'p')
1159 			print_pagelist = 1;
1160 		modif++;
1161 	}
1162 
1163 	(*pr)("POOL %s: size %u, align %u, ioff %u, roflags 0x%08x\n",
1164 	    pp->pr_wchan, pp->pr_size, pp->pr_align, pp->pr_itemoffset,
1165 	    pp->pr_roflags);
1166 	(*pr)("\talloc %p\n", pp->pr_alloc);
1167 	(*pr)("\tminitems %u, minpages %u, maxpages %u, npages %u\n",
1168 	    pp->pr_minitems, pp->pr_minpages, pp->pr_maxpages, pp->pr_npages);
1169 	(*pr)("\titemsperpage %u, nitems %u, nout %u, hardlimit %u\n",
1170 	    pp->pr_itemsperpage, pp->pr_nitems, pp->pr_nout, pp->pr_hardlimit);
1171 
1172 	(*pr)("\n\tnget %lu, nfail %lu, nput %lu\n",
1173 	    pp->pr_nget, pp->pr_nfail, pp->pr_nput);
1174 	(*pr)("\tnpagealloc %lu, npagefree %lu, hiwat %u, nidle %lu\n",
1175 	    pp->pr_npagealloc, pp->pr_npagefree, pp->pr_hiwat, pp->pr_nidle);
1176 
1177 	if (print_pagelist == 0)
1178 		return;
1179 
1180 	if ((ph = LIST_FIRST(&pp->pr_emptypages)) != NULL)
1181 		(*pr)("\n\tempty page list:\n");
1182 	pool_print_pagelist(&pp->pr_emptypages, pr);
1183 	if ((ph = LIST_FIRST(&pp->pr_fullpages)) != NULL)
1184 		(*pr)("\n\tfull page list:\n");
1185 	pool_print_pagelist(&pp->pr_fullpages, pr);
1186 	if ((ph = LIST_FIRST(&pp->pr_partpages)) != NULL)
1187 		(*pr)("\n\tpartial-page list:\n");
1188 	pool_print_pagelist(&pp->pr_partpages, pr);
1189 
1190 	if (pp->pr_curpage == NULL)
1191 		(*pr)("\tno current page\n");
1192 	else
1193 		(*pr)("\tcurpage %p\n", pp->pr_curpage->ph_page);
1194 }
1195 
1196 void
1197 db_show_all_pools(db_expr_t expr, int haddr, db_expr_t count, char *modif)
1198 {
1199 	struct pool *pp;
1200 	char maxp[16];
1201 	int ovflw;
1202 	char mode;
1203 
1204 	mode = modif[0];
1205 	if (mode != '\0' && mode != 'a') {
1206 		db_printf("usage: show all pools [/a]\n");
1207 		return;
1208 	}
1209 
1210 	if (mode == '\0')
1211 		db_printf("%-10s%4s%9s%5s%9s%6s%6s%6s%6s%6s%6s%5s\n",
1212 		    "Name",
1213 		    "Size",
1214 		    "Requests",
1215 		    "Fail",
1216 		    "Releases",
1217 		    "Pgreq",
1218 		    "Pgrel",
1219 		    "Npage",
1220 		    "Hiwat",
1221 		    "Minpg",
1222 		    "Maxpg",
1223 		    "Idle");
1224 	else
1225 		db_printf("%-10s %18s %18s\n",
1226 		    "Name", "Address", "Allocator");
1227 
1228 	TAILQ_FOREACH(pp, &pool_head, pr_poollist) {
1229 		if (mode == 'a') {
1230 			db_printf("%-10s %18p %18p\n", pp->pr_wchan, pp,
1231 			    pp->pr_alloc);
1232 			continue;
1233 		}
1234 
1235 		if (!pp->pr_nget)
1236 			continue;
1237 
1238 		if (pp->pr_maxpages == UINT_MAX)
1239 			snprintf(maxp, sizeof maxp, "inf");
1240 		else
1241 			snprintf(maxp, sizeof maxp, "%u", pp->pr_maxpages);
1242 
1243 #define PRWORD(ovflw, fmt, width, fixed, val) do {	\
1244 	(ovflw) += db_printf((fmt),			\
1245 	    (width) - (fixed) - (ovflw) > 0 ?		\
1246 	    (width) - (fixed) - (ovflw) : 0,		\
1247 	    (val)) - (width);				\
1248 	if ((ovflw) < 0)				\
1249 		(ovflw) = 0;				\
1250 } while (/* CONSTCOND */0)
1251 
1252 		ovflw = 0;
1253 		PRWORD(ovflw, "%-*s", 10, 0, pp->pr_wchan);
1254 		PRWORD(ovflw, " %*u", 4, 1, pp->pr_size);
1255 		PRWORD(ovflw, " %*lu", 9, 1, pp->pr_nget);
1256 		PRWORD(ovflw, " %*lu", 5, 1, pp->pr_nfail);
1257 		PRWORD(ovflw, " %*lu", 9, 1, pp->pr_nput);
1258 		PRWORD(ovflw, " %*lu", 6, 1, pp->pr_npagealloc);
1259 		PRWORD(ovflw, " %*lu", 6, 1, pp->pr_npagefree);
1260 		PRWORD(ovflw, " %*d", 6, 1, pp->pr_npages);
1261 		PRWORD(ovflw, " %*d", 6, 1, pp->pr_hiwat);
1262 		PRWORD(ovflw, " %*d", 6, 1, pp->pr_minpages);
1263 		PRWORD(ovflw, " %*s", 6, 1, maxp);
1264 		PRWORD(ovflw, " %*lu\n", 5, 1, pp->pr_nidle);
1265 
1266 		pool_chk(pp);
1267 	}
1268 }
1269 
1270 int
1271 pool_chk_page(struct pool *pp, struct pool_item_header *ph, int expected)
1272 {
1273 	struct pool_item *pi;
1274 	caddr_t page;
1275 	int n;
1276 #if defined(DIAGNOSTIC) && defined(POOL_DEBUG)
1277 	int i, *ip;
1278 #endif
1279 	const char *label = pp->pr_wchan;
1280 
1281 	page = (caddr_t)((u_long)ph & pp->pr_alloc->pa_pagemask);
1282 	if (page != ph->ph_page &&
1283 	    (pp->pr_roflags & PR_PHINPAGE) != 0) {
1284 		printf("%s: ", label);
1285 		printf("pool(%p:%s): page inconsistency: page %p; "
1286 		    "at page head addr %p (p %p)\n",
1287 		    pp, pp->pr_wchan, ph->ph_page, ph, page);
1288 		return 1;
1289 	}
1290 
1291 	for (pi = TAILQ_FIRST(&ph->ph_itemlist), n = 0;
1292 	     pi != NULL;
1293 	     pi = TAILQ_NEXT(pi,pi_list), n++) {
1294 
1295 #ifdef DIAGNOSTIC
1296 		if (pi->pi_magic != PI_MAGIC) {
1297 			printf("%s: ", label);
1298 			printf("pool(%s): free list modified: "
1299 			    "page %p; item ordinal %d; addr %p "
1300 			    "(p %p); offset 0x%x=0x%x\n",
1301 			    pp->pr_wchan, ph->ph_page, n, pi, page,
1302 			    0, pi->pi_magic);
1303 		}
1304 #ifdef POOL_DEBUG
1305 		if (pool_debug && ph->ph_magic) {
1306 			for (ip = (int *)pi, i = sizeof(*pi) / sizeof(int);
1307 			    i < pp->pr_size / sizeof(int); i++) {
1308 				if (ip[i] != ph->ph_magic) {
1309 					printf("pool(%s): free list modified: "
1310 					    "page %p; item ordinal %d; addr %p "
1311 					    "(p %p); offset 0x%x=0x%x\n",
1312 					    pp->pr_wchan, ph->ph_page, n, pi,
1313 					    page, i * sizeof(int), ip[i]);
1314 				}
1315 			}
1316 		}
1317 
1318 #endif /* POOL_DEBUG */
1319 #endif /* DIAGNOSTIC */
1320 		page =
1321 		    (caddr_t)((u_long)pi & pp->pr_alloc->pa_pagemask);
1322 		if (page == ph->ph_page)
1323 			continue;
1324 
1325 		printf("%s: ", label);
1326 		printf("pool(%p:%s): page inconsistency: page %p;"
1327 		    " item ordinal %d; addr %p (p %p)\n", pp,
1328 		    pp->pr_wchan, ph->ph_page, n, pi, page);
1329 		return 1;
1330 	}
1331 	if (n + ph->ph_nmissing != pp->pr_itemsperpage) {
1332 		printf("pool(%p:%s): page inconsistency: page %p;"
1333 		    " %d on list, %d missing, %d items per page\n", pp,
1334 		    pp->pr_wchan, ph->ph_page, n, ph->ph_nmissing,
1335 		    pp->pr_itemsperpage);
1336 		return 1;
1337 	}
1338 	if (expected >= 0 && n != expected) {
1339 		printf("pool(%p:%s): page inconsistency: page %p;"
1340 		    " %d on list, %d missing, %d expected\n", pp,
1341 		    pp->pr_wchan, ph->ph_page, n, ph->ph_nmissing,
1342 		    expected);
1343 		return 1;
1344 	}
1345 	return 0;
1346 }
1347 
1348 int
1349 pool_chk(struct pool *pp)
1350 {
1351 	struct pool_item_header *ph;
1352 	int r = 0;
1353 
1354 	LIST_FOREACH(ph, &pp->pr_emptypages, ph_pagelist)
1355 		r += pool_chk_page(pp, ph, pp->pr_itemsperpage);
1356 	LIST_FOREACH(ph, &pp->pr_fullpages, ph_pagelist)
1357 		r += pool_chk_page(pp, ph, 0);
1358 	LIST_FOREACH(ph, &pp->pr_partpages, ph_pagelist)
1359 		r += pool_chk_page(pp, ph, -1);
1360 
1361 	return (r);
1362 }
1363 
1364 void
1365 pool_walk(struct pool *pp, int full, int (*pr)(const char *, ...),
1366     void (*func)(void *, int, int (*)(const char *, ...)))
1367 {
1368 	struct pool_item_header *ph;
1369 	struct pool_item *pi;
1370 	caddr_t cp;
1371 	int n;
1372 
1373 	LIST_FOREACH(ph, &pp->pr_fullpages, ph_pagelist) {
1374 		cp = ph->ph_colored;
1375 		n = ph->ph_nmissing;
1376 
1377 		while (n--) {
1378 			func(cp, full, pr);
1379 			cp += pp->pr_size;
1380 		}
1381 	}
1382 
1383 	LIST_FOREACH(ph, &pp->pr_partpages, ph_pagelist) {
1384 		cp = ph->ph_colored;
1385 		n = ph->ph_nmissing;
1386 
1387 		do {
1388 			TAILQ_FOREACH(pi, &ph->ph_itemlist, pi_list) {
1389 				if (cp == (caddr_t)pi)
1390 					break;
1391 			}
1392 			if (cp != (caddr_t)pi) {
1393 				func(cp, full, pr);
1394 				n--;
1395 			}
1396 
1397 			cp += pp->pr_size;
1398 		} while (n > 0);
1399 	}
1400 }
1401 #endif
1402 
1403 /*
1404  * We have three different sysctls.
1405  * kern.pool.npools - the number of pools.
1406  * kern.pool.pool.<pool#> - the pool struct for the pool#.
1407  * kern.pool.name.<pool#> - the name for pool#.
1408  */
1409 int
1410 sysctl_dopool(int *name, u_int namelen, char *where, size_t *sizep)
1411 {
1412 	struct pool *pp, *foundpool = NULL;
1413 	size_t buflen = where != NULL ? *sizep : 0;
1414 	int npools = 0, s;
1415 	unsigned int lookfor;
1416 	size_t len;
1417 
1418 	switch (*name) {
1419 	case KERN_POOL_NPOOLS:
1420 		if (namelen != 1 || buflen != sizeof(int))
1421 			return (EINVAL);
1422 		lookfor = 0;
1423 		break;
1424 	case KERN_POOL_NAME:
1425 		if (namelen != 2 || buflen < 1)
1426 			return (EINVAL);
1427 		lookfor = name[1];
1428 		break;
1429 	case KERN_POOL_POOL:
1430 		if (namelen != 2 || buflen != sizeof(struct pool))
1431 			return (EINVAL);
1432 		lookfor = name[1];
1433 		break;
1434 	default:
1435 		return (EINVAL);
1436 	}
1437 
1438 	s = splvm();
1439 
1440 	TAILQ_FOREACH(pp, &pool_head, pr_poollist) {
1441 		npools++;
1442 		if (lookfor == pp->pr_serial) {
1443 			foundpool = pp;
1444 			break;
1445 		}
1446 	}
1447 
1448 	splx(s);
1449 
1450 	if (*name != KERN_POOL_NPOOLS && foundpool == NULL)
1451 		return (ENOENT);
1452 
1453 	switch (*name) {
1454 	case KERN_POOL_NPOOLS:
1455 		return copyout(&npools, where, buflen);
1456 	case KERN_POOL_NAME:
1457 		len = strlen(foundpool->pr_wchan) + 1;
1458 		if (*sizep < len)
1459 			return (ENOMEM);
1460 		*sizep = len;
1461 		return copyout(foundpool->pr_wchan, where, len);
1462 	case KERN_POOL_POOL:
1463 		return copyout(foundpool, where, buflen);
1464 	}
1465 	/* NOTREACHED */
1466 	return (0); /* XXX - Stupid gcc */
1467 }
1468 
1469 /*
1470  * Pool backend allocators.
1471  *
1472  * Each pool has a backend allocator that handles allocation, deallocation
1473  */
1474 void	*pool_page_alloc(struct pool *, int, int *);
1475 void	pool_page_free(struct pool *, void *);
1476 
1477 /*
1478  * safe for interrupts, name preserved for compat this is the default
1479  * allocator
1480  */
1481 struct pool_allocator pool_allocator_nointr = {
1482 	pool_page_alloc, pool_page_free, 0,
1483 };
1484 
1485 /*
1486  * XXX - we have at least three different resources for the same allocation
1487  *  and each resource can be depleted. First we have the ready elements in
1488  *  the pool. Then we have the resource (typically a vm_map) for this
1489  *  allocator, then we have physical memory. Waiting for any of these can
1490  *  be unnecessary when any other is freed, but the kernel doesn't support
1491  *  sleeping on multiple addresses, so we have to fake. The caller sleeps on
1492  *  the pool (so that we can be awakened when an item is returned to the pool),
1493  *  but we set PA_WANT on the allocator. When a page is returned to
1494  *  the allocator and PA_WANT is set pool_allocator_free will wakeup all
1495  *  sleeping pools belonging to this allocator. (XXX - thundering herd).
1496  *  We also wake up the allocator in case someone without a pool (malloc)
1497  *  is sleeping waiting for this allocator.
1498  */
1499 
1500 void *
1501 pool_allocator_alloc(struct pool *pp, int flags, int *slowdown)
1502 {
1503 	boolean_t waitok = (flags & PR_WAITOK) ? TRUE : FALSE;
1504 	void *v;
1505 
1506 	if (waitok)
1507 		mtx_leave(&pp->pr_mtx);
1508 	v = pp->pr_alloc->pa_alloc(pp, flags, slowdown);
1509 	if (waitok)
1510 		mtx_enter(&pp->pr_mtx);
1511 
1512 	return (v);
1513 }
1514 
1515 void
1516 pool_allocator_free(struct pool *pp, void *v)
1517 {
1518 	struct pool_allocator *pa = pp->pr_alloc;
1519 
1520 	(*pa->pa_free)(pp, v);
1521 }
1522 
1523 void *
1524 pool_page_alloc(struct pool *pp, int flags, int *slowdown)
1525 {
1526 	struct kmem_dyn_mode kd = KMEM_DYN_INITIALIZER;
1527 
1528 	kd.kd_waitok = (flags & PR_WAITOK);
1529 	kd.kd_slowdown = slowdown;
1530 
1531 	return (km_alloc(PAGE_SIZE, &kv_page, pp->pr_crange, &kd));
1532 }
1533 
1534 void
1535 pool_page_free(struct pool *pp, void *v)
1536 {
1537 	km_free(v, PAGE_SIZE, &kv_page, pp->pr_crange);
1538 }
1539 
1540 void *
1541 pool_large_alloc(struct pool *pp, int flags, int *slowdown)
1542 {
1543 	struct kmem_dyn_mode kd = KMEM_DYN_INITIALIZER;
1544 	void *v;
1545 	int s;
1546 
1547 	kd.kd_waitok = (flags & PR_WAITOK);
1548 	kd.kd_slowdown = slowdown;
1549 
1550 	s = splvm();
1551 	v = km_alloc(pp->pr_alloc->pa_pagesz, &kv_intrsafe, pp->pr_crange,
1552 	    &kd);
1553 	splx(s);
1554 
1555 	return (v);
1556 }
1557 
1558 void
1559 pool_large_free(struct pool *pp, void *v)
1560 {
1561 	int s;
1562 
1563 	s = splvm();
1564 	km_free(v, pp->pr_alloc->pa_pagesz, &kv_intrsafe, pp->pr_crange);
1565 	splx(s);
1566 }
1567 
1568 void *
1569 pool_large_alloc_ni(struct pool *pp, int flags, int *slowdown)
1570 {
1571 	struct kmem_dyn_mode kd = KMEM_DYN_INITIALIZER;
1572 
1573 	kd.kd_waitok = (flags & PR_WAITOK);
1574 	kd.kd_slowdown = slowdown;
1575 
1576 	return (km_alloc(pp->pr_alloc->pa_pagesz, &kv_any, pp->pr_crange, &kd));
1577 }
1578 
1579 void
1580 pool_large_free_ni(struct pool *pp, void *v)
1581 {
1582 	km_free(v, pp->pr_alloc->pa_pagesz, &kv_any, pp->pr_crange);
1583 }
1584