xref: /openbsd-src/sys/kern/subr_pool.c (revision a475eda445250986cec8df88536063dd030a7fde)
1 /*	$OpenBSD: subr_pool.c,v 1.112 2012/12/24 19:43:11 guenther Exp $	*/
2 /*	$NetBSD: subr_pool.c,v 1.61 2001/09/26 07:14:56 chs Exp $	*/
3 
4 /*-
5  * Copyright (c) 1997, 1999, 2000 The NetBSD Foundation, Inc.
6  * All rights reserved.
7  *
8  * This code is derived from software contributed to The NetBSD Foundation
9  * by Paul Kranenburg; by Jason R. Thorpe of the Numerical Aerospace
10  * Simulation Facility, NASA Ames Research Center.
11  *
12  * Redistribution and use in source and binary forms, with or without
13  * modification, are permitted provided that the following conditions
14  * are met:
15  * 1. Redistributions of source code must retain the above copyright
16  *    notice, this list of conditions and the following disclaimer.
17  * 2. Redistributions in binary form must reproduce the above copyright
18  *    notice, this list of conditions and the following disclaimer in the
19  *    documentation and/or other materials provided with the distribution.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
23  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
24  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
25  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31  * POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/proc.h>
37 #include <sys/errno.h>
38 #include <sys/kernel.h>
39 #include <sys/malloc.h>
40 #include <sys/pool.h>
41 #include <sys/syslog.h>
42 #include <sys/sysctl.h>
43 
44 #include <uvm/uvm.h>
45 #include <dev/rndvar.h>
46 
47 /*
48  * Pool resource management utility.
49  *
50  * Memory is allocated in pages which are split into pieces according to
51  * the pool item size. Each page is kept on one of three lists in the
52  * pool structure: `pr_emptypages', `pr_fullpages' and `pr_partpages',
53  * for empty, full and partially-full pages respectively. The individual
54  * pool items are on a linked list headed by `ph_itemlist' in each page
55  * header. The memory for building the page list is either taken from
56  * the allocated pages themselves (for small pool items) or taken from
57  * an internal pool of page headers (`phpool').
58  */
59 
60 /* List of all pools */
61 TAILQ_HEAD(,pool) pool_head = TAILQ_HEAD_INITIALIZER(pool_head);
62 
63 /* Private pool for page header structures */
64 struct pool phpool;
65 
66 struct pool_item_header {
67 	/* Page headers */
68 	LIST_ENTRY(pool_item_header)
69 				ph_pagelist;	/* pool page list */
70 	TAILQ_HEAD(,pool_item)	ph_itemlist;	/* chunk list for this page */
71 	RB_ENTRY(pool_item_header)
72 				ph_node;	/* Off-page page headers */
73 	int			ph_nmissing;	/* # of chunks in use */
74 	caddr_t			ph_page;	/* this page's address */
75 	caddr_t			ph_colored;	/* page's colored address */
76 	int			ph_pagesize;
77 	int			ph_magic;
78 };
79 
80 struct pool_item {
81 #ifdef DIAGNOSTIC
82 	u_int32_t pi_magic;
83 #endif
84 	/* Other entries use only this list entry */
85 	TAILQ_ENTRY(pool_item)	pi_list;
86 };
87 
88 #ifdef DEADBEEF1
89 #define	PI_MAGIC DEADBEEF1
90 #else
91 #define	PI_MAGIC 0xdeafbeef
92 #endif
93 
94 #ifdef POOL_DEBUG
95 int	pool_debug = 1;
96 #else
97 int	pool_debug = 0;
98 #endif
99 
100 #define	POOL_NEEDS_CATCHUP(pp)						\
101 	((pp)->pr_nitems < (pp)->pr_minitems)
102 
103 /*
104  * Every pool gets a unique serial number assigned to it. If this counter
105  * wraps, we're screwed, but we shouldn't create so many pools anyway.
106  */
107 unsigned int pool_serial;
108 
109 int	 pool_catchup(struct pool *);
110 void	 pool_prime_page(struct pool *, caddr_t, struct pool_item_header *);
111 void	 pool_update_curpage(struct pool *);
112 void	*pool_do_get(struct pool *, int);
113 void	 pool_do_put(struct pool *, void *);
114 void	 pr_rmpage(struct pool *, struct pool_item_header *,
115 	    struct pool_pagelist *);
116 int	 pool_chk_page(struct pool *, struct pool_item_header *, int);
117 int	 pool_chk(struct pool *);
118 struct pool_item_header *pool_alloc_item_header(struct pool *, caddr_t , int);
119 
120 void	*pool_allocator_alloc(struct pool *, int, int *);
121 void	 pool_allocator_free(struct pool *, void *);
122 
123 /*
124  * XXX - quick hack. For pools with large items we want to use a special
125  *       allocator. For now, instead of having the allocator figure out
126  *       the allocation size from the pool (which can be done trivially
127  *       with round_page(pr_itemsperpage * pr_size)) which would require
128  *	 lots of changes everywhere, we just create allocators for each
129  *	 size. We limit those to 128 pages.
130  */
131 #define POOL_LARGE_MAXPAGES 128
132 struct pool_allocator pool_allocator_large[POOL_LARGE_MAXPAGES];
133 struct pool_allocator pool_allocator_large_ni[POOL_LARGE_MAXPAGES];
134 void	*pool_large_alloc(struct pool *, int, int *);
135 void	pool_large_free(struct pool *, void *);
136 void	*pool_large_alloc_ni(struct pool *, int, int *);
137 void	pool_large_free_ni(struct pool *, void *);
138 
139 
140 #ifdef DDB
141 void	 pool_print_pagelist(struct pool_pagelist *,
142 	    int (*)(const char *, ...));
143 void	 pool_print1(struct pool *, const char *, int (*)(const char *, ...));
144 #endif
145 
146 #define pool_sleep(pl) msleep(pl, &pl->pr_mtx, PSWP, pl->pr_wchan, 0)
147 
148 static __inline int
149 phtree_compare(struct pool_item_header *a, struct pool_item_header *b)
150 {
151 	long diff = (vaddr_t)a->ph_page - (vaddr_t)b->ph_page;
152 	if (diff < 0)
153 		return -(-diff >= a->ph_pagesize);
154 	else if (diff > 0)
155 		return (diff >= b->ph_pagesize);
156 	else
157 		return (0);
158 }
159 
160 RB_PROTOTYPE(phtree, pool_item_header, ph_node, phtree_compare);
161 RB_GENERATE(phtree, pool_item_header, ph_node, phtree_compare);
162 
163 /*
164  * Return the pool page header based on page address.
165  */
166 static __inline struct pool_item_header *
167 pr_find_pagehead(struct pool *pp, void *v)
168 {
169 	struct pool_item_header *ph, tmp;
170 
171 	if ((pp->pr_roflags & PR_PHINPAGE) != 0) {
172 		caddr_t page;
173 
174 		page = (caddr_t)((vaddr_t)v & pp->pr_alloc->pa_pagemask);
175 
176 		return ((struct pool_item_header *)(page + pp->pr_phoffset));
177 	}
178 
179 	/*
180 	 * The trick we're using in the tree compare function is to compare
181 	 * two elements equal when they overlap. We want to return the
182 	 * page header that belongs to the element just before this address.
183 	 * We don't want this element to compare equal to the next element,
184 	 * so the compare function takes the pagesize from the lower element.
185 	 * If this header is the lower, its pagesize is zero, so it can't
186 	 * overlap with the next header. But if the header we're looking for
187 	 * is lower, we'll use its pagesize and it will overlap and return
188 	 * equal.
189 	 */
190 	tmp.ph_page = v;
191 	tmp.ph_pagesize = 0;
192 	ph = RB_FIND(phtree, &pp->pr_phtree, &tmp);
193 
194 	if (ph) {
195 		KASSERT(ph->ph_page <= (caddr_t)v);
196 		KASSERT(ph->ph_page + ph->ph_pagesize > (caddr_t)v);
197 	}
198 	return ph;
199 }
200 
201 /*
202  * Remove a page from the pool.
203  */
204 void
205 pr_rmpage(struct pool *pp, struct pool_item_header *ph,
206     struct pool_pagelist *pq)
207 {
208 
209 	/*
210 	 * If the page was idle, decrement the idle page count.
211 	 */
212 	if (ph->ph_nmissing == 0) {
213 #ifdef DIAGNOSTIC
214 		if (pp->pr_nidle == 0)
215 			panic("pr_rmpage: nidle inconsistent");
216 		if (pp->pr_nitems < pp->pr_itemsperpage)
217 			panic("pr_rmpage: nitems inconsistent");
218 #endif
219 		pp->pr_nidle--;
220 	}
221 
222 	pp->pr_nitems -= pp->pr_itemsperpage;
223 
224 	/*
225 	 * Unlink a page from the pool and release it (or queue it for release).
226 	 */
227 	LIST_REMOVE(ph, ph_pagelist);
228 	if ((pp->pr_roflags & PR_PHINPAGE) == 0)
229 		RB_REMOVE(phtree, &pp->pr_phtree, ph);
230 	pp->pr_npages--;
231 	pp->pr_npagefree++;
232 	pool_update_curpage(pp);
233 
234 	if (pq) {
235 		LIST_INSERT_HEAD(pq, ph, ph_pagelist);
236 	} else {
237 		pool_allocator_free(pp, ph->ph_page);
238 		if ((pp->pr_roflags & PR_PHINPAGE) == 0)
239 			pool_put(&phpool, ph);
240 	}
241 }
242 
243 /*
244  * Initialize the given pool resource structure.
245  *
246  * We export this routine to allow other kernel parts to declare
247  * static pools that must be initialized before malloc() is available.
248  */
249 void
250 pool_init(struct pool *pp, size_t size, u_int align, u_int ioff, int flags,
251     const char *wchan, struct pool_allocator *palloc)
252 {
253 	int off, slack;
254 
255 #ifdef MALLOC_DEBUG
256 	if ((flags & PR_DEBUG) && (ioff != 0 || align != 0))
257 		flags &= ~PR_DEBUG;
258 #endif
259 	/*
260 	 * Check arguments and construct default values.
261 	 */
262 	if (palloc == NULL) {
263 		if (size > PAGE_SIZE) {
264 			int psize;
265 
266 			/*
267 			 * XXX - should take align into account as well.
268 			 */
269 			if (size == round_page(size))
270 				psize = size / PAGE_SIZE;
271 			else
272 				psize = PAGE_SIZE / roundup(size % PAGE_SIZE,
273 				    1024);
274 			if (psize > POOL_LARGE_MAXPAGES)
275 				psize = POOL_LARGE_MAXPAGES;
276 			if (flags & PR_WAITOK)
277 				palloc = &pool_allocator_large_ni[psize-1];
278 			else
279 				palloc = &pool_allocator_large[psize-1];
280 			if (palloc->pa_pagesz == 0) {
281 				palloc->pa_pagesz = psize * PAGE_SIZE;
282 				if (flags & PR_WAITOK) {
283 					palloc->pa_alloc = pool_large_alloc_ni;
284 					palloc->pa_free = pool_large_free_ni;
285 				} else {
286 					palloc->pa_alloc = pool_large_alloc;
287 					palloc->pa_free = pool_large_free;
288 				}
289 			}
290 		} else {
291 			palloc = &pool_allocator_nointr;
292 		}
293 	}
294 	if (palloc->pa_pagesz == 0) {
295 		palloc->pa_pagesz = PAGE_SIZE;
296 	}
297 	if (palloc->pa_pagemask == 0) {
298 		palloc->pa_pagemask = ~(palloc->pa_pagesz - 1);
299 		palloc->pa_pageshift = ffs(palloc->pa_pagesz) - 1;
300 	}
301 
302 	if (align == 0)
303 		align = ALIGN(1);
304 
305 	if (size < sizeof(struct pool_item))
306 		size = sizeof(struct pool_item);
307 
308 	size = roundup(size, align);
309 #ifdef DIAGNOSTIC
310 	if (size > palloc->pa_pagesz)
311 		panic("pool_init: pool item size (%lu) too large",
312 		    (u_long)size);
313 #endif
314 
315 	/*
316 	 * Initialize the pool structure.
317 	 */
318 	LIST_INIT(&pp->pr_emptypages);
319 	LIST_INIT(&pp->pr_fullpages);
320 	LIST_INIT(&pp->pr_partpages);
321 	pp->pr_curpage = NULL;
322 	pp->pr_npages = 0;
323 	pp->pr_minitems = 0;
324 	pp->pr_minpages = 0;
325 	pp->pr_maxpages = 8;
326 	pp->pr_roflags = flags;
327 	pp->pr_flags = 0;
328 	pp->pr_size = size;
329 	pp->pr_align = align;
330 	pp->pr_wchan = wchan;
331 	pp->pr_alloc = palloc;
332 	pp->pr_nitems = 0;
333 	pp->pr_nout = 0;
334 	pp->pr_hardlimit = UINT_MAX;
335 	pp->pr_hardlimit_warning = NULL;
336 	pp->pr_hardlimit_ratecap.tv_sec = 0;
337 	pp->pr_hardlimit_ratecap.tv_usec = 0;
338 	pp->pr_hardlimit_warning_last.tv_sec = 0;
339 	pp->pr_hardlimit_warning_last.tv_usec = 0;
340 	pp->pr_serial = ++pool_serial;
341 	if (pool_serial == 0)
342 		panic("pool_init: too much uptime");
343 
344         /* constructor, destructor, and arg */
345 	pp->pr_ctor = NULL;
346 	pp->pr_dtor = NULL;
347 	pp->pr_arg = NULL;
348 
349 	/*
350 	 * Decide whether to put the page header off page to avoid
351 	 * wasting too large a part of the page. Off-page page headers
352 	 * go into an RB tree, so we can match a returned item with
353 	 * its header based on the page address.
354 	 * We use 1/16 of the page size as the threshold (XXX: tune)
355 	 */
356 	if (pp->pr_size < palloc->pa_pagesz/16 && pp->pr_size < PAGE_SIZE) {
357 		/* Use the end of the page for the page header */
358 		pp->pr_roflags |= PR_PHINPAGE;
359 		pp->pr_phoffset = off = palloc->pa_pagesz -
360 		    ALIGN(sizeof(struct pool_item_header));
361 	} else {
362 		/* The page header will be taken from our page header pool */
363 		pp->pr_phoffset = 0;
364 		off = palloc->pa_pagesz;
365 		RB_INIT(&pp->pr_phtree);
366 	}
367 
368 	/*
369 	 * Alignment is to take place at `ioff' within the item. This means
370 	 * we must reserve up to `align - 1' bytes on the page to allow
371 	 * appropriate positioning of each item.
372 	 *
373 	 * Silently enforce `0 <= ioff < align'.
374 	 */
375 	pp->pr_itemoffset = ioff = ioff % align;
376 	pp->pr_itemsperpage = (off - ((align - ioff) % align)) / pp->pr_size;
377 	KASSERT(pp->pr_itemsperpage != 0);
378 
379 	/*
380 	 * Use the slack between the chunks and the page header
381 	 * for "cache coloring".
382 	 */
383 	slack = off - pp->pr_itemsperpage * pp->pr_size;
384 	pp->pr_maxcolor = (slack / align) * align;
385 	pp->pr_curcolor = 0;
386 
387 	pp->pr_nget = 0;
388 	pp->pr_nfail = 0;
389 	pp->pr_nput = 0;
390 	pp->pr_npagealloc = 0;
391 	pp->pr_npagefree = 0;
392 	pp->pr_hiwat = 0;
393 	pp->pr_nidle = 0;
394 
395 	pp->pr_ipl = -1;
396 	mtx_init(&pp->pr_mtx, IPL_NONE);
397 
398 	if (phpool.pr_size == 0) {
399 		pool_init(&phpool, sizeof(struct pool_item_header), 0, 0,
400 		    0, "phpool", NULL);
401 		pool_setipl(&phpool, IPL_HIGH);
402 	}
403 
404 	/* pglistalloc/constraint parameters */
405 	pp->pr_crange = &kp_dirty;
406 
407 	/* Insert this into the list of all pools. */
408 	TAILQ_INSERT_HEAD(&pool_head, pp, pr_poollist);
409 }
410 
411 void
412 pool_setipl(struct pool *pp, int ipl)
413 {
414 	pp->pr_ipl = ipl;
415 	mtx_init(&pp->pr_mtx, ipl);
416 }
417 
418 /*
419  * Decommission a pool resource.
420  */
421 void
422 pool_destroy(struct pool *pp)
423 {
424 	struct pool_item_header *ph;
425 
426 #ifdef DIAGNOSTIC
427 	if (pp->pr_nout != 0)
428 		panic("pool_destroy: pool busy: still out: %u", pp->pr_nout);
429 #endif
430 
431 	/* Remove all pages */
432 	while ((ph = LIST_FIRST(&pp->pr_emptypages)) != NULL)
433 		pr_rmpage(pp, ph, NULL);
434 	KASSERT(LIST_EMPTY(&pp->pr_fullpages));
435 	KASSERT(LIST_EMPTY(&pp->pr_partpages));
436 
437 	/* Remove from global pool list */
438 	TAILQ_REMOVE(&pool_head, pp, pr_poollist);
439 }
440 
441 struct pool_item_header *
442 pool_alloc_item_header(struct pool *pp, caddr_t storage, int flags)
443 {
444 	struct pool_item_header *ph;
445 
446 	if ((pp->pr_roflags & PR_PHINPAGE) != 0)
447 		ph = (struct pool_item_header *)(storage + pp->pr_phoffset);
448 	else
449 		ph = pool_get(&phpool, (flags & ~(PR_WAITOK | PR_ZERO)) |
450 		    PR_NOWAIT);
451 	if (pool_debug && ph != NULL)
452 		ph->ph_magic = PI_MAGIC;
453 	return (ph);
454 }
455 
456 /*
457  * Grab an item from the pool; must be called at appropriate spl level
458  */
459 void *
460 pool_get(struct pool *pp, int flags)
461 {
462 	void *v;
463 
464 	KASSERT(flags & (PR_WAITOK | PR_NOWAIT));
465 
466 #ifdef DIAGNOSTIC
467 	if ((flags & PR_WAITOK) != 0)
468 		assertwaitok();
469 #endif /* DIAGNOSTIC */
470 
471 	mtx_enter(&pp->pr_mtx);
472 #ifdef POOL_DEBUG
473 	if (pp->pr_roflags & PR_DEBUGCHK) {
474 		if (pool_chk(pp))
475 			panic("before pool_get");
476 	}
477 #endif
478 	v = pool_do_get(pp, flags);
479 #ifdef POOL_DEBUG
480 	if (pp->pr_roflags & PR_DEBUGCHK) {
481 		if (pool_chk(pp))
482 			panic("after pool_get");
483 	}
484 #endif
485 	if (v != NULL)
486 		pp->pr_nget++;
487 	mtx_leave(&pp->pr_mtx);
488 	if (v == NULL)
489 		return (v);
490 
491 	if (pp->pr_ctor) {
492 		if (flags & PR_ZERO)
493 			panic("pool_get: PR_ZERO when ctor set");
494 		if (pp->pr_ctor(pp->pr_arg, v, flags)) {
495 			mtx_enter(&pp->pr_mtx);
496 			pp->pr_nget--;
497 			pool_do_put(pp, v);
498 			mtx_leave(&pp->pr_mtx);
499 			v = NULL;
500 		}
501 	} else {
502 		if (flags & PR_ZERO)
503 			memset(v, 0, pp->pr_size);
504 	}
505 	return (v);
506 }
507 
508 void *
509 pool_do_get(struct pool *pp, int flags)
510 {
511 	struct pool_item *pi;
512 	struct pool_item_header *ph;
513 	void *v;
514 	int slowdown = 0;
515 #if defined(DIAGNOSTIC) && defined(POOL_DEBUG)
516 	int i, *ip;
517 #endif
518 
519 #ifdef MALLOC_DEBUG
520 	if (pp->pr_roflags & PR_DEBUG) {
521 		void *addr;
522 
523 		addr = NULL;
524 		debug_malloc(pp->pr_size, M_DEBUG,
525 		    (flags & PR_WAITOK) ? M_WAITOK : M_NOWAIT, &addr);
526 		return (addr);
527 	}
528 #endif
529 
530 startover:
531 	/*
532 	 * Check to see if we've reached the hard limit.  If we have,
533 	 * and we can wait, then wait until an item has been returned to
534 	 * the pool.
535 	 */
536 #ifdef DIAGNOSTIC
537 	if (pp->pr_nout > pp->pr_hardlimit)
538 		panic("pool_do_get: %s: crossed hard limit", pp->pr_wchan);
539 #endif
540 	if (pp->pr_nout == pp->pr_hardlimit) {
541 		if ((flags & PR_WAITOK) && !(flags & PR_LIMITFAIL)) {
542 			/*
543 			 * XXX: A warning isn't logged in this case.  Should
544 			 * it be?
545 			 */
546 			pp->pr_flags |= PR_WANTED;
547 			pool_sleep(pp);
548 			goto startover;
549 		}
550 
551 		/*
552 		 * Log a message that the hard limit has been hit.
553 		 */
554 		if (pp->pr_hardlimit_warning != NULL &&
555 		    ratecheck(&pp->pr_hardlimit_warning_last,
556 		    &pp->pr_hardlimit_ratecap))
557 			log(LOG_ERR, "%s\n", pp->pr_hardlimit_warning);
558 
559 		pp->pr_nfail++;
560 		return (NULL);
561 	}
562 
563 	/*
564 	 * The convention we use is that if `curpage' is not NULL, then
565 	 * it points at a non-empty bucket. In particular, `curpage'
566 	 * never points at a page header which has PR_PHINPAGE set and
567 	 * has no items in its bucket.
568 	 */
569 	if ((ph = pp->pr_curpage) == NULL) {
570 #ifdef DIAGNOSTIC
571 		if (pp->pr_nitems != 0) {
572 			printf("pool_do_get: %s: curpage NULL, nitems %u\n",
573 			    pp->pr_wchan, pp->pr_nitems);
574 			panic("pool_do_get: nitems inconsistent");
575 		}
576 #endif
577 
578 		/*
579 		 * Call the back-end page allocator for more memory.
580 		 */
581 		v = pool_allocator_alloc(pp, flags, &slowdown);
582 		if (v != NULL)
583 			ph = pool_alloc_item_header(pp, v, flags);
584 
585 		if (v == NULL || ph == NULL) {
586 			if (v != NULL)
587 				pool_allocator_free(pp, v);
588 
589 			if ((flags & PR_WAITOK) == 0) {
590 				pp->pr_nfail++;
591 				return (NULL);
592 			}
593 
594 			/*
595 			 * Wait for items to be returned to this pool.
596 			 *
597 			 * XXX: maybe we should wake up once a second and
598 			 * try again?
599 			 */
600 			pp->pr_flags |= PR_WANTED;
601 			pool_sleep(pp);
602 			goto startover;
603 		}
604 
605 		/* We have more memory; add it to the pool */
606 		pool_prime_page(pp, v, ph);
607 		pp->pr_npagealloc++;
608 
609 		if (slowdown && (flags & PR_WAITOK)) {
610 			mtx_leave(&pp->pr_mtx);
611 			yield();
612 			mtx_enter(&pp->pr_mtx);
613 		}
614 
615 		/* Start the allocation process over. */
616 		goto startover;
617 	}
618 	if ((v = pi = TAILQ_FIRST(&ph->ph_itemlist)) == NULL) {
619 		panic("pool_do_get: %s: page empty", pp->pr_wchan);
620 	}
621 #ifdef DIAGNOSTIC
622 	if (pp->pr_nitems == 0) {
623 		printf("pool_do_get: %s: items on itemlist, nitems %u\n",
624 		    pp->pr_wchan, pp->pr_nitems);
625 		panic("pool_do_get: nitems inconsistent");
626 	}
627 #endif
628 
629 #ifdef DIAGNOSTIC
630 	if (pi->pi_magic != PI_MAGIC)
631 		panic("pool_do_get(%s): free list modified: "
632 		    "page %p; item addr %p; offset 0x%x=0x%x",
633 		    pp->pr_wchan, ph->ph_page, pi, 0, pi->pi_magic);
634 #ifdef POOL_DEBUG
635 	if (pool_debug && ph->ph_magic) {
636 		for (ip = (int *)pi, i = sizeof(*pi) / sizeof(int);
637 		    i < pp->pr_size / sizeof(int); i++) {
638 			if (ip[i] != ph->ph_magic) {
639 				panic("pool_do_get(%s): free list modified: "
640 				    "page %p; item addr %p; offset 0x%x=0x%x",
641 				    pp->pr_wchan, ph->ph_page, pi,
642 				    i * sizeof(int), ip[i]);
643 			}
644 		}
645 	}
646 #endif /* POOL_DEBUG */
647 #endif /* DIAGNOSTIC */
648 
649 	/*
650 	 * Remove from item list.
651 	 */
652 	TAILQ_REMOVE(&ph->ph_itemlist, pi, pi_list);
653 	pp->pr_nitems--;
654 	pp->pr_nout++;
655 	if (ph->ph_nmissing == 0) {
656 #ifdef DIAGNOSTIC
657 		if (pp->pr_nidle == 0)
658 			panic("pool_do_get: nidle inconsistent");
659 #endif
660 		pp->pr_nidle--;
661 
662 		/*
663 		 * This page was previously empty.  Move it to the list of
664 		 * partially-full pages.  This page is already curpage.
665 		 */
666 		LIST_REMOVE(ph, ph_pagelist);
667 		LIST_INSERT_HEAD(&pp->pr_partpages, ph, ph_pagelist);
668 	}
669 	ph->ph_nmissing++;
670 	if (TAILQ_EMPTY(&ph->ph_itemlist)) {
671 #ifdef DIAGNOSTIC
672 		if (ph->ph_nmissing != pp->pr_itemsperpage) {
673 			panic("pool_do_get: %s: nmissing inconsistent",
674 			    pp->pr_wchan);
675 		}
676 #endif
677 		/*
678 		 * This page is now full.  Move it to the full list
679 		 * and select a new current page.
680 		 */
681 		LIST_REMOVE(ph, ph_pagelist);
682 		LIST_INSERT_HEAD(&pp->pr_fullpages, ph, ph_pagelist);
683 		pool_update_curpage(pp);
684 	}
685 
686 	/*
687 	 * If we have a low water mark and we are now below that low
688 	 * water mark, add more items to the pool.
689 	 */
690 	if (POOL_NEEDS_CATCHUP(pp) && pool_catchup(pp) != 0) {
691 		/*
692 		 * XXX: Should we log a warning?  Should we set up a timeout
693 		 * to try again in a second or so?  The latter could break
694 		 * a caller's assumptions about interrupt protection, etc.
695 		 */
696 	}
697 	return (v);
698 }
699 
700 /*
701  * Return resource to the pool; must be called at appropriate spl level
702  */
703 void
704 pool_put(struct pool *pp, void *v)
705 {
706 	if (pp->pr_dtor)
707 		pp->pr_dtor(pp->pr_arg, v);
708 	mtx_enter(&pp->pr_mtx);
709 #ifdef POOL_DEBUG
710 	if (pp->pr_roflags & PR_DEBUGCHK) {
711 		if (pool_chk(pp))
712 			panic("before pool_put");
713 	}
714 #endif
715 	pool_do_put(pp, v);
716 #ifdef POOL_DEBUG
717 	if (pp->pr_roflags & PR_DEBUGCHK) {
718 		if (pool_chk(pp))
719 			panic("after pool_put");
720 	}
721 #endif
722 	pp->pr_nput++;
723 	mtx_leave(&pp->pr_mtx);
724 }
725 
726 /*
727  * Internal version of pool_put().
728  */
729 void
730 pool_do_put(struct pool *pp, void *v)
731 {
732 	struct pool_item *pi = v;
733 	struct pool_item_header *ph;
734 #if defined(DIAGNOSTIC) && defined(POOL_DEBUG)
735 	int i, *ip;
736 #endif
737 
738 	if (v == NULL)
739 		panic("pool_put of NULL");
740 
741 #ifdef MALLOC_DEBUG
742 	if (pp->pr_roflags & PR_DEBUG) {
743 		debug_free(v, M_DEBUG);
744 		return;
745 	}
746 #endif
747 
748 #ifdef DIAGNOSTIC
749 	if (pp->pr_ipl != -1)
750 		splassert(pp->pr_ipl);
751 
752 	if (pp->pr_nout == 0) {
753 		printf("pool %s: putting with none out\n",
754 		    pp->pr_wchan);
755 		panic("pool_do_put");
756 	}
757 #endif
758 
759 	if ((ph = pr_find_pagehead(pp, v)) == NULL) {
760 		panic("pool_do_put: %s: page header missing", pp->pr_wchan);
761 	}
762 
763 	/*
764 	 * Return to item list.
765 	 */
766 #ifdef DIAGNOSTIC
767 	pi->pi_magic = PI_MAGIC;
768 #ifdef POOL_DEBUG
769 	if (ph->ph_magic) {
770 		for (ip = (int *)pi, i = sizeof(*pi)/sizeof(int);
771 		    i < pp->pr_size / sizeof(int); i++)
772 			ip[i] = ph->ph_magic;
773 	}
774 #endif /* POOL_DEBUG */
775 #endif /* DIAGNOSTIC */
776 
777 	TAILQ_INSERT_HEAD(&ph->ph_itemlist, pi, pi_list);
778 	ph->ph_nmissing--;
779 	pp->pr_nitems++;
780 	pp->pr_nout--;
781 
782 	/* Cancel "pool empty" condition if it exists */
783 	if (pp->pr_curpage == NULL)
784 		pp->pr_curpage = ph;
785 
786 	if (pp->pr_flags & PR_WANTED) {
787 		pp->pr_flags &= ~PR_WANTED;
788 		wakeup(pp);
789 	}
790 
791 	/*
792 	 * If this page is now empty, do one of two things:
793 	 *
794 	 *	(1) If we have more pages than the page high water mark,
795 	 *	    free the page back to the system.
796 	 *
797 	 *	(2) Otherwise, move the page to the empty page list.
798 	 *
799 	 * Either way, select a new current page (so we use a partially-full
800 	 * page if one is available).
801 	 */
802 	if (ph->ph_nmissing == 0) {
803 		pp->pr_nidle++;
804 		if (pp->pr_nidle > pp->pr_maxpages) {
805 			pr_rmpage(pp, ph, NULL);
806 		} else {
807 			LIST_REMOVE(ph, ph_pagelist);
808 			LIST_INSERT_HEAD(&pp->pr_emptypages, ph, ph_pagelist);
809 			pool_update_curpage(pp);
810 		}
811 	}
812 
813 	/*
814 	 * If the page was previously completely full, move it to the
815 	 * partially-full list and make it the current page.  The next
816 	 * allocation will get the item from this page, instead of
817 	 * further fragmenting the pool.
818 	 */
819 	else if (ph->ph_nmissing == (pp->pr_itemsperpage - 1)) {
820 		LIST_REMOVE(ph, ph_pagelist);
821 		LIST_INSERT_HEAD(&pp->pr_partpages, ph, ph_pagelist);
822 		pp->pr_curpage = ph;
823 	}
824 }
825 
826 /*
827  * Add N items to the pool.
828  */
829 int
830 pool_prime(struct pool *pp, int n)
831 {
832 	struct pool_item_header *ph;
833 	caddr_t cp;
834 	int newpages;
835 	int slowdown;
836 
837 	mtx_enter(&pp->pr_mtx);
838 	newpages = roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage;
839 
840 	while (newpages-- > 0) {
841 		cp = pool_allocator_alloc(pp, PR_NOWAIT, &slowdown);
842 		if (cp != NULL)
843 			ph = pool_alloc_item_header(pp, cp, PR_NOWAIT);
844 		if (cp == NULL || ph == NULL) {
845 			if (cp != NULL)
846 				pool_allocator_free(pp, cp);
847 			break;
848 		}
849 
850 		pool_prime_page(pp, cp, ph);
851 		pp->pr_npagealloc++;
852 		pp->pr_minpages++;
853 	}
854 
855 	if (pp->pr_minpages >= pp->pr_maxpages)
856 		pp->pr_maxpages = pp->pr_minpages + 1;	/* XXX */
857 
858 	mtx_leave(&pp->pr_mtx);
859 	return (0);
860 }
861 
862 /*
863  * Add a page worth of items to the pool.
864  *
865  * Note, we must be called with the pool descriptor LOCKED.
866  */
867 void
868 pool_prime_page(struct pool *pp, caddr_t storage, struct pool_item_header *ph)
869 {
870 	struct pool_item *pi;
871 	caddr_t cp = storage;
872 	unsigned int align = pp->pr_align;
873 	unsigned int ioff = pp->pr_itemoffset;
874 	int n;
875 #if defined(DIAGNOSTIC) && defined(POOL_DEBUG)
876 	int i, *ip;
877 #endif
878 
879 	/*
880 	 * Insert page header.
881 	 */
882 	LIST_INSERT_HEAD(&pp->pr_emptypages, ph, ph_pagelist);
883 	TAILQ_INIT(&ph->ph_itemlist);
884 	ph->ph_page = storage;
885 	ph->ph_pagesize = pp->pr_alloc->pa_pagesz;
886 	ph->ph_nmissing = 0;
887 	if ((pp->pr_roflags & PR_PHINPAGE) == 0)
888 		RB_INSERT(phtree, &pp->pr_phtree, ph);
889 
890 	pp->pr_nidle++;
891 
892 	/*
893 	 * Color this page.
894 	 */
895 	cp = (caddr_t)(cp + pp->pr_curcolor);
896 	if ((pp->pr_curcolor += align) > pp->pr_maxcolor)
897 		pp->pr_curcolor = 0;
898 
899 	/*
900 	 * Adjust storage to apply alignment to `pr_itemoffset' in each item.
901 	 */
902 	if (ioff != 0)
903 		cp = (caddr_t)(cp + (align - ioff));
904 	ph->ph_colored = cp;
905 
906 	/*
907 	 * Insert remaining chunks on the bucket list.
908 	 */
909 	n = pp->pr_itemsperpage;
910 	pp->pr_nitems += n;
911 
912 	while (n--) {
913 		pi = (struct pool_item *)cp;
914 
915 		KASSERT(((((vaddr_t)pi) + ioff) & (align - 1)) == 0);
916 
917 		/* Insert on page list */
918 		TAILQ_INSERT_TAIL(&ph->ph_itemlist, pi, pi_list);
919 
920 #ifdef DIAGNOSTIC
921 		pi->pi_magic = PI_MAGIC;
922 #ifdef POOL_DEBUG
923 		if (ph->ph_magic) {
924 			for (ip = (int *)pi, i = sizeof(*pi)/sizeof(int);
925 			    i < pp->pr_size / sizeof(int); i++)
926 				ip[i] = ph->ph_magic;
927 		}
928 #endif /* POOL_DEBUG */
929 #endif /* DIAGNOSTIC */
930 		cp = (caddr_t)(cp + pp->pr_size);
931 	}
932 
933 	/*
934 	 * If the pool was depleted, point at the new page.
935 	 */
936 	if (pp->pr_curpage == NULL)
937 		pp->pr_curpage = ph;
938 
939 	if (++pp->pr_npages > pp->pr_hiwat)
940 		pp->pr_hiwat = pp->pr_npages;
941 }
942 
943 /*
944  * Used by pool_get() when nitems drops below the low water mark.  This
945  * is used to catch up pr_nitems with the low water mark.
946  *
947  * Note we never wait for memory here, we let the caller decide what to do.
948  */
949 int
950 pool_catchup(struct pool *pp)
951 {
952 	struct pool_item_header *ph;
953 	caddr_t cp;
954 	int error = 0;
955 	int slowdown;
956 
957 	while (POOL_NEEDS_CATCHUP(pp)) {
958 		/*
959 		 * Call the page back-end allocator for more memory.
960 		 */
961 		cp = pool_allocator_alloc(pp, PR_NOWAIT, &slowdown);
962 		if (cp != NULL)
963 			ph = pool_alloc_item_header(pp, cp, PR_NOWAIT);
964 		if (cp == NULL || ph == NULL) {
965 			if (cp != NULL)
966 				pool_allocator_free(pp, cp);
967 			error = ENOMEM;
968 			break;
969 		}
970 		pool_prime_page(pp, cp, ph);
971 		pp->pr_npagealloc++;
972 	}
973 
974 	return (error);
975 }
976 
977 void
978 pool_update_curpage(struct pool *pp)
979 {
980 
981 	pp->pr_curpage = LIST_FIRST(&pp->pr_partpages);
982 	if (pp->pr_curpage == NULL) {
983 		pp->pr_curpage = LIST_FIRST(&pp->pr_emptypages);
984 	}
985 }
986 
987 void
988 pool_setlowat(struct pool *pp, int n)
989 {
990 
991 	pp->pr_minitems = n;
992 	pp->pr_minpages = (n == 0)
993 		? 0
994 		: roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage;
995 
996 	mtx_enter(&pp->pr_mtx);
997 	/* Make sure we're caught up with the newly-set low water mark. */
998 	if (POOL_NEEDS_CATCHUP(pp) && pool_catchup(pp) != 0) {
999 		/*
1000 		 * XXX: Should we log a warning?  Should we set up a timeout
1001 		 * to try again in a second or so?  The latter could break
1002 		 * a caller's assumptions about interrupt protection, etc.
1003 		 */
1004 	}
1005 	mtx_leave(&pp->pr_mtx);
1006 }
1007 
1008 void
1009 pool_sethiwat(struct pool *pp, int n)
1010 {
1011 
1012 	pp->pr_maxpages = (n == 0)
1013 		? 0
1014 		: roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage;
1015 }
1016 
1017 int
1018 pool_sethardlimit(struct pool *pp, u_int n, const char *warnmsg, int ratecap)
1019 {
1020 	int error = 0;
1021 
1022 	if (n < pp->pr_nout) {
1023 		error = EINVAL;
1024 		goto done;
1025 	}
1026 
1027 	pp->pr_hardlimit = n;
1028 	pp->pr_hardlimit_warning = warnmsg;
1029 	pp->pr_hardlimit_ratecap.tv_sec = ratecap;
1030 	pp->pr_hardlimit_warning_last.tv_sec = 0;
1031 	pp->pr_hardlimit_warning_last.tv_usec = 0;
1032 
1033 done:
1034 	return (error);
1035 }
1036 
1037 void
1038 pool_set_constraints(struct pool *pp, const struct kmem_pa_mode *mode)
1039 {
1040 	pp->pr_crange = mode;
1041 }
1042 
1043 void
1044 pool_set_ctordtor(struct pool *pp, int (*ctor)(void *, void *, int),
1045     void (*dtor)(void *, void *), void *arg)
1046 {
1047 	pp->pr_ctor = ctor;
1048 	pp->pr_dtor = dtor;
1049 	pp->pr_arg = arg;
1050 }
1051 /*
1052  * Release all complete pages that have not been used recently.
1053  *
1054  * Returns non-zero if any pages have been reclaimed.
1055  */
1056 int
1057 pool_reclaim(struct pool *pp)
1058 {
1059 	struct pool_item_header *ph, *phnext;
1060 	struct pool_pagelist pq;
1061 
1062 	LIST_INIT(&pq);
1063 
1064 	mtx_enter(&pp->pr_mtx);
1065 	for (ph = LIST_FIRST(&pp->pr_emptypages); ph != NULL; ph = phnext) {
1066 		phnext = LIST_NEXT(ph, ph_pagelist);
1067 
1068 		/* Check our minimum page claim */
1069 		if (pp->pr_npages <= pp->pr_minpages)
1070 			break;
1071 
1072 		KASSERT(ph->ph_nmissing == 0);
1073 
1074 		/*
1075 		 * If freeing this page would put us below
1076 		 * the low water mark, stop now.
1077 		 */
1078 		if ((pp->pr_nitems - pp->pr_itemsperpage) <
1079 		    pp->pr_minitems)
1080 			break;
1081 
1082 		pr_rmpage(pp, ph, &pq);
1083 	}
1084 	mtx_leave(&pp->pr_mtx);
1085 
1086 	if (LIST_EMPTY(&pq))
1087 		return (0);
1088 	while ((ph = LIST_FIRST(&pq)) != NULL) {
1089 		LIST_REMOVE(ph, ph_pagelist);
1090 		pool_allocator_free(pp, ph->ph_page);
1091 		if (pp->pr_roflags & PR_PHINPAGE)
1092 			continue;
1093 		pool_put(&phpool, ph);
1094 	}
1095 
1096 	return (1);
1097 }
1098 
1099 /*
1100  * Release all complete pages that have not been used recently
1101  * from all pools.
1102  */
1103 void
1104 pool_reclaim_all(void)
1105 {
1106 	struct pool	*pp;
1107 	int		s;
1108 
1109 	s = splhigh();
1110 	TAILQ_FOREACH(pp, &pool_head, pr_poollist)
1111 		pool_reclaim(pp);
1112 	splx(s);
1113 }
1114 
1115 #ifdef DDB
1116 #include <machine/db_machdep.h>
1117 #include <ddb/db_interface.h>
1118 #include <ddb/db_output.h>
1119 
1120 /*
1121  * Diagnostic helpers.
1122  */
1123 void
1124 pool_printit(struct pool *pp, const char *modif, int (*pr)(const char *, ...))
1125 {
1126 	pool_print1(pp, modif, pr);
1127 }
1128 
1129 void
1130 pool_print_pagelist(struct pool_pagelist *pl, int (*pr)(const char *, ...))
1131 {
1132 	struct pool_item_header *ph;
1133 #ifdef DIAGNOSTIC
1134 	struct pool_item *pi;
1135 #endif
1136 
1137 	LIST_FOREACH(ph, pl, ph_pagelist) {
1138 		(*pr)("\t\tpage %p, nmissing %d\n",
1139 		    ph->ph_page, ph->ph_nmissing);
1140 #ifdef DIAGNOSTIC
1141 		TAILQ_FOREACH(pi, &ph->ph_itemlist, pi_list) {
1142 			if (pi->pi_magic != PI_MAGIC) {
1143 				(*pr)("\t\t\titem %p, magic 0x%x\n",
1144 				    pi, pi->pi_magic);
1145 			}
1146 		}
1147 #endif
1148 	}
1149 }
1150 
1151 void
1152 pool_print1(struct pool *pp, const char *modif, int (*pr)(const char *, ...))
1153 {
1154 	struct pool_item_header *ph;
1155 	int print_pagelist = 0;
1156 	char c;
1157 
1158 	while ((c = *modif++) != '\0') {
1159 		if (c == 'p')
1160 			print_pagelist = 1;
1161 		modif++;
1162 	}
1163 
1164 	(*pr)("POOL %s: size %u, align %u, ioff %u, roflags 0x%08x\n",
1165 	    pp->pr_wchan, pp->pr_size, pp->pr_align, pp->pr_itemoffset,
1166 	    pp->pr_roflags);
1167 	(*pr)("\talloc %p\n", pp->pr_alloc);
1168 	(*pr)("\tminitems %u, minpages %u, maxpages %u, npages %u\n",
1169 	    pp->pr_minitems, pp->pr_minpages, pp->pr_maxpages, pp->pr_npages);
1170 	(*pr)("\titemsperpage %u, nitems %u, nout %u, hardlimit %u\n",
1171 	    pp->pr_itemsperpage, pp->pr_nitems, pp->pr_nout, pp->pr_hardlimit);
1172 
1173 	(*pr)("\n\tnget %lu, nfail %lu, nput %lu\n",
1174 	    pp->pr_nget, pp->pr_nfail, pp->pr_nput);
1175 	(*pr)("\tnpagealloc %lu, npagefree %lu, hiwat %u, nidle %lu\n",
1176 	    pp->pr_npagealloc, pp->pr_npagefree, pp->pr_hiwat, pp->pr_nidle);
1177 
1178 	if (print_pagelist == 0)
1179 		return;
1180 
1181 	if ((ph = LIST_FIRST(&pp->pr_emptypages)) != NULL)
1182 		(*pr)("\n\tempty page list:\n");
1183 	pool_print_pagelist(&pp->pr_emptypages, pr);
1184 	if ((ph = LIST_FIRST(&pp->pr_fullpages)) != NULL)
1185 		(*pr)("\n\tfull page list:\n");
1186 	pool_print_pagelist(&pp->pr_fullpages, pr);
1187 	if ((ph = LIST_FIRST(&pp->pr_partpages)) != NULL)
1188 		(*pr)("\n\tpartial-page list:\n");
1189 	pool_print_pagelist(&pp->pr_partpages, pr);
1190 
1191 	if (pp->pr_curpage == NULL)
1192 		(*pr)("\tno current page\n");
1193 	else
1194 		(*pr)("\tcurpage %p\n", pp->pr_curpage->ph_page);
1195 }
1196 
1197 void
1198 db_show_all_pools(db_expr_t expr, int haddr, db_expr_t count, char *modif)
1199 {
1200 	struct pool *pp;
1201 	char maxp[16];
1202 	int ovflw;
1203 	char mode;
1204 
1205 	mode = modif[0];
1206 	if (mode != '\0' && mode != 'a') {
1207 		db_printf("usage: show all pools [/a]\n");
1208 		return;
1209 	}
1210 
1211 	if (mode == '\0')
1212 		db_printf("%-10s%4s%9s%5s%9s%6s%6s%6s%6s%6s%6s%5s\n",
1213 		    "Name",
1214 		    "Size",
1215 		    "Requests",
1216 		    "Fail",
1217 		    "Releases",
1218 		    "Pgreq",
1219 		    "Pgrel",
1220 		    "Npage",
1221 		    "Hiwat",
1222 		    "Minpg",
1223 		    "Maxpg",
1224 		    "Idle");
1225 	else
1226 		db_printf("%-10s %18s %18s\n",
1227 		    "Name", "Address", "Allocator");
1228 
1229 	TAILQ_FOREACH(pp, &pool_head, pr_poollist) {
1230 		if (mode == 'a') {
1231 			db_printf("%-10s %18p %18p\n", pp->pr_wchan, pp,
1232 			    pp->pr_alloc);
1233 			continue;
1234 		}
1235 
1236 		if (!pp->pr_nget)
1237 			continue;
1238 
1239 		if (pp->pr_maxpages == UINT_MAX)
1240 			snprintf(maxp, sizeof maxp, "inf");
1241 		else
1242 			snprintf(maxp, sizeof maxp, "%u", pp->pr_maxpages);
1243 
1244 #define PRWORD(ovflw, fmt, width, fixed, val) do {	\
1245 	(ovflw) += db_printf((fmt),			\
1246 	    (width) - (fixed) - (ovflw) > 0 ?		\
1247 	    (width) - (fixed) - (ovflw) : 0,		\
1248 	    (val)) - (width);				\
1249 	if ((ovflw) < 0)				\
1250 		(ovflw) = 0;				\
1251 } while (/* CONSTCOND */0)
1252 
1253 		ovflw = 0;
1254 		PRWORD(ovflw, "%-*s", 10, 0, pp->pr_wchan);
1255 		PRWORD(ovflw, " %*u", 4, 1, pp->pr_size);
1256 		PRWORD(ovflw, " %*lu", 9, 1, pp->pr_nget);
1257 		PRWORD(ovflw, " %*lu", 5, 1, pp->pr_nfail);
1258 		PRWORD(ovflw, " %*lu", 9, 1, pp->pr_nput);
1259 		PRWORD(ovflw, " %*lu", 6, 1, pp->pr_npagealloc);
1260 		PRWORD(ovflw, " %*lu", 6, 1, pp->pr_npagefree);
1261 		PRWORD(ovflw, " %*d", 6, 1, pp->pr_npages);
1262 		PRWORD(ovflw, " %*d", 6, 1, pp->pr_hiwat);
1263 		PRWORD(ovflw, " %*d", 6, 1, pp->pr_minpages);
1264 		PRWORD(ovflw, " %*s", 6, 1, maxp);
1265 		PRWORD(ovflw, " %*lu\n", 5, 1, pp->pr_nidle);
1266 
1267 		pool_chk(pp);
1268 	}
1269 }
1270 #endif /* DDB */
1271 
1272 #if defined(POOL_DEBUG) || defined(DDB)
1273 int
1274 pool_chk_page(struct pool *pp, struct pool_item_header *ph, int expected)
1275 {
1276 	struct pool_item *pi;
1277 	caddr_t page;
1278 	int n;
1279 #if defined(DIAGNOSTIC) && defined(POOL_DEBUG)
1280 	int i, *ip;
1281 #endif
1282 	const char *label = pp->pr_wchan;
1283 
1284 	page = (caddr_t)((u_long)ph & pp->pr_alloc->pa_pagemask);
1285 	if (page != ph->ph_page &&
1286 	    (pp->pr_roflags & PR_PHINPAGE) != 0) {
1287 		printf("%s: ", label);
1288 		printf("pool(%p:%s): page inconsistency: page %p; "
1289 		    "at page head addr %p (p %p)\n",
1290 		    pp, pp->pr_wchan, ph->ph_page, ph, page);
1291 		return 1;
1292 	}
1293 
1294 	for (pi = TAILQ_FIRST(&ph->ph_itemlist), n = 0;
1295 	     pi != NULL;
1296 	     pi = TAILQ_NEXT(pi,pi_list), n++) {
1297 
1298 #ifdef DIAGNOSTIC
1299 		if (pi->pi_magic != PI_MAGIC) {
1300 			printf("%s: ", label);
1301 			printf("pool(%s): free list modified: "
1302 			    "page %p; item ordinal %d; addr %p "
1303 			    "(p %p); offset 0x%x=0x%x\n",
1304 			    pp->pr_wchan, ph->ph_page, n, pi, page,
1305 			    0, pi->pi_magic);
1306 		}
1307 #ifdef POOL_DEBUG
1308 		if (pool_debug && ph->ph_magic) {
1309 			for (ip = (int *)pi, i = sizeof(*pi) / sizeof(int);
1310 			    i < pp->pr_size / sizeof(int); i++) {
1311 				if (ip[i] != ph->ph_magic) {
1312 					printf("pool(%s): free list modified: "
1313 					    "page %p; item ordinal %d; addr %p "
1314 					    "(p %p); offset 0x%x=0x%x\n",
1315 					    pp->pr_wchan, ph->ph_page, n, pi,
1316 					    page, i * sizeof(int), ip[i]);
1317 				}
1318 			}
1319 		}
1320 
1321 #endif /* POOL_DEBUG */
1322 #endif /* DIAGNOSTIC */
1323 		page =
1324 		    (caddr_t)((u_long)pi & pp->pr_alloc->pa_pagemask);
1325 		if (page == ph->ph_page)
1326 			continue;
1327 
1328 		printf("%s: ", label);
1329 		printf("pool(%p:%s): page inconsistency: page %p;"
1330 		    " item ordinal %d; addr %p (p %p)\n", pp,
1331 		    pp->pr_wchan, ph->ph_page, n, pi, page);
1332 		return 1;
1333 	}
1334 	if (n + ph->ph_nmissing != pp->pr_itemsperpage) {
1335 		printf("pool(%p:%s): page inconsistency: page %p;"
1336 		    " %d on list, %d missing, %d items per page\n", pp,
1337 		    pp->pr_wchan, ph->ph_page, n, ph->ph_nmissing,
1338 		    pp->pr_itemsperpage);
1339 		return 1;
1340 	}
1341 	if (expected >= 0 && n != expected) {
1342 		printf("pool(%p:%s): page inconsistency: page %p;"
1343 		    " %d on list, %d missing, %d expected\n", pp,
1344 		    pp->pr_wchan, ph->ph_page, n, ph->ph_nmissing,
1345 		    expected);
1346 		return 1;
1347 	}
1348 	return 0;
1349 }
1350 
1351 int
1352 pool_chk(struct pool *pp)
1353 {
1354 	struct pool_item_header *ph;
1355 	int r = 0;
1356 
1357 	LIST_FOREACH(ph, &pp->pr_emptypages, ph_pagelist)
1358 		r += pool_chk_page(pp, ph, pp->pr_itemsperpage);
1359 	LIST_FOREACH(ph, &pp->pr_fullpages, ph_pagelist)
1360 		r += pool_chk_page(pp, ph, 0);
1361 	LIST_FOREACH(ph, &pp->pr_partpages, ph_pagelist)
1362 		r += pool_chk_page(pp, ph, -1);
1363 
1364 	return (r);
1365 }
1366 #endif /* defined(POOL_DEBUG) || defined(DDB) */
1367 
1368 #ifdef DDB
1369 void
1370 pool_walk(struct pool *pp, int full, int (*pr)(const char *, ...),
1371     void (*func)(void *, int, int (*)(const char *, ...)))
1372 {
1373 	struct pool_item_header *ph;
1374 	struct pool_item *pi;
1375 	caddr_t cp;
1376 	int n;
1377 
1378 	LIST_FOREACH(ph, &pp->pr_fullpages, ph_pagelist) {
1379 		cp = ph->ph_colored;
1380 		n = ph->ph_nmissing;
1381 
1382 		while (n--) {
1383 			func(cp, full, pr);
1384 			cp += pp->pr_size;
1385 		}
1386 	}
1387 
1388 	LIST_FOREACH(ph, &pp->pr_partpages, ph_pagelist) {
1389 		cp = ph->ph_colored;
1390 		n = ph->ph_nmissing;
1391 
1392 		do {
1393 			TAILQ_FOREACH(pi, &ph->ph_itemlist, pi_list) {
1394 				if (cp == (caddr_t)pi)
1395 					break;
1396 			}
1397 			if (cp != (caddr_t)pi) {
1398 				func(cp, full, pr);
1399 				n--;
1400 			}
1401 
1402 			cp += pp->pr_size;
1403 		} while (n > 0);
1404 	}
1405 }
1406 #endif
1407 
1408 /*
1409  * We have three different sysctls.
1410  * kern.pool.npools - the number of pools.
1411  * kern.pool.pool.<pool#> - the pool struct for the pool#.
1412  * kern.pool.name.<pool#> - the name for pool#.
1413  */
1414 int
1415 sysctl_dopool(int *name, u_int namelen, char *where, size_t *sizep)
1416 {
1417 	struct pool *pp, *foundpool = NULL;
1418 	size_t buflen = where != NULL ? *sizep : 0;
1419 	int npools = 0, s;
1420 	unsigned int lookfor;
1421 	size_t len;
1422 
1423 	switch (*name) {
1424 	case KERN_POOL_NPOOLS:
1425 		if (namelen != 1 || buflen != sizeof(int))
1426 			return (EINVAL);
1427 		lookfor = 0;
1428 		break;
1429 	case KERN_POOL_NAME:
1430 		if (namelen != 2 || buflen < 1)
1431 			return (EINVAL);
1432 		lookfor = name[1];
1433 		break;
1434 	case KERN_POOL_POOL:
1435 		if (namelen != 2 || buflen != sizeof(struct pool))
1436 			return (EINVAL);
1437 		lookfor = name[1];
1438 		break;
1439 	default:
1440 		return (EINVAL);
1441 	}
1442 
1443 	s = splvm();
1444 
1445 	TAILQ_FOREACH(pp, &pool_head, pr_poollist) {
1446 		npools++;
1447 		if (lookfor == pp->pr_serial) {
1448 			foundpool = pp;
1449 			break;
1450 		}
1451 	}
1452 
1453 	splx(s);
1454 
1455 	if (*name != KERN_POOL_NPOOLS && foundpool == NULL)
1456 		return (ENOENT);
1457 
1458 	switch (*name) {
1459 	case KERN_POOL_NPOOLS:
1460 		return copyout(&npools, where, buflen);
1461 	case KERN_POOL_NAME:
1462 		len = strlen(foundpool->pr_wchan) + 1;
1463 		if (*sizep < len)
1464 			return (ENOMEM);
1465 		*sizep = len;
1466 		return copyout(foundpool->pr_wchan, where, len);
1467 	case KERN_POOL_POOL:
1468 		return copyout(foundpool, where, buflen);
1469 	}
1470 	/* NOTREACHED */
1471 	return (0); /* XXX - Stupid gcc */
1472 }
1473 
1474 /*
1475  * Pool backend allocators.
1476  *
1477  * Each pool has a backend allocator that handles allocation, deallocation
1478  */
1479 void	*pool_page_alloc(struct pool *, int, int *);
1480 void	pool_page_free(struct pool *, void *);
1481 
1482 /*
1483  * safe for interrupts, name preserved for compat this is the default
1484  * allocator
1485  */
1486 struct pool_allocator pool_allocator_nointr = {
1487 	pool_page_alloc, pool_page_free, 0,
1488 };
1489 
1490 /*
1491  * XXX - we have at least three different resources for the same allocation
1492  *  and each resource can be depleted. First we have the ready elements in
1493  *  the pool. Then we have the resource (typically a vm_map) for this
1494  *  allocator, then we have physical memory. Waiting for any of these can
1495  *  be unnecessary when any other is freed, but the kernel doesn't support
1496  *  sleeping on multiple addresses, so we have to fake. The caller sleeps on
1497  *  the pool (so that we can be awakened when an item is returned to the pool),
1498  *  but we set PA_WANT on the allocator. When a page is returned to
1499  *  the allocator and PA_WANT is set pool_allocator_free will wakeup all
1500  *  sleeping pools belonging to this allocator. (XXX - thundering herd).
1501  *  We also wake up the allocator in case someone without a pool (malloc)
1502  *  is sleeping waiting for this allocator.
1503  */
1504 
1505 void *
1506 pool_allocator_alloc(struct pool *pp, int flags, int *slowdown)
1507 {
1508 	boolean_t waitok = (flags & PR_WAITOK) ? TRUE : FALSE;
1509 	void *v;
1510 
1511 	if (waitok)
1512 		mtx_leave(&pp->pr_mtx);
1513 	v = pp->pr_alloc->pa_alloc(pp, flags, slowdown);
1514 	if (waitok)
1515 		mtx_enter(&pp->pr_mtx);
1516 
1517 	return (v);
1518 }
1519 
1520 void
1521 pool_allocator_free(struct pool *pp, void *v)
1522 {
1523 	struct pool_allocator *pa = pp->pr_alloc;
1524 
1525 	(*pa->pa_free)(pp, v);
1526 }
1527 
1528 void *
1529 pool_page_alloc(struct pool *pp, int flags, int *slowdown)
1530 {
1531 	struct kmem_dyn_mode kd = KMEM_DYN_INITIALIZER;
1532 
1533 	kd.kd_waitok = (flags & PR_WAITOK);
1534 	kd.kd_slowdown = slowdown;
1535 
1536 	return (km_alloc(PAGE_SIZE, &kv_page, pp->pr_crange, &kd));
1537 }
1538 
1539 void
1540 pool_page_free(struct pool *pp, void *v)
1541 {
1542 	km_free(v, PAGE_SIZE, &kv_page, pp->pr_crange);
1543 }
1544 
1545 void *
1546 pool_large_alloc(struct pool *pp, int flags, int *slowdown)
1547 {
1548 	struct kmem_dyn_mode kd = KMEM_DYN_INITIALIZER;
1549 	void *v;
1550 	int s;
1551 
1552 	kd.kd_waitok = (flags & PR_WAITOK);
1553 	kd.kd_slowdown = slowdown;
1554 
1555 	s = splvm();
1556 	v = km_alloc(pp->pr_alloc->pa_pagesz, &kv_intrsafe, pp->pr_crange,
1557 	    &kd);
1558 	splx(s);
1559 
1560 	return (v);
1561 }
1562 
1563 void
1564 pool_large_free(struct pool *pp, void *v)
1565 {
1566 	int s;
1567 
1568 	s = splvm();
1569 	km_free(v, pp->pr_alloc->pa_pagesz, &kv_intrsafe, pp->pr_crange);
1570 	splx(s);
1571 }
1572 
1573 void *
1574 pool_large_alloc_ni(struct pool *pp, int flags, int *slowdown)
1575 {
1576 	struct kmem_dyn_mode kd = KMEM_DYN_INITIALIZER;
1577 
1578 	kd.kd_waitok = (flags & PR_WAITOK);
1579 	kd.kd_slowdown = slowdown;
1580 
1581 	return (km_alloc(pp->pr_alloc->pa_pagesz, &kv_any, pp->pr_crange, &kd));
1582 }
1583 
1584 void
1585 pool_large_free_ni(struct pool *pp, void *v)
1586 {
1587 	km_free(v, pp->pr_alloc->pa_pagesz, &kv_any, pp->pr_crange);
1588 }
1589