xref: /openbsd-src/sys/kern/subr_pool.c (revision daf88648c0e349d5c02e1504293082072c981640)
1 /*	$OpenBSD: subr_pool.c,v 1.48 2006/11/17 11:50:09 jmc Exp $	*/
2 /*	$NetBSD: subr_pool.c,v 1.61 2001/09/26 07:14:56 chs Exp $	*/
3 
4 /*-
5  * Copyright (c) 1997, 1999, 2000 The NetBSD Foundation, Inc.
6  * All rights reserved.
7  *
8  * This code is derived from software contributed to The NetBSD Foundation
9  * by Paul Kranenburg; by Jason R. Thorpe of the Numerical Aerospace
10  * Simulation Facility, NASA Ames Research Center.
11  *
12  * Redistribution and use in source and binary forms, with or without
13  * modification, are permitted provided that the following conditions
14  * are met:
15  * 1. Redistributions of source code must retain the above copyright
16  *    notice, this list of conditions and the following disclaimer.
17  * 2. Redistributions in binary form must reproduce the above copyright
18  *    notice, this list of conditions and the following disclaimer in the
19  *    documentation and/or other materials provided with the distribution.
20  * 3. All advertising materials mentioning features or use of this software
21  *    must display the following acknowledgement:
22  *	This product includes software developed by the NetBSD
23  *	Foundation, Inc. and its contributors.
24  * 4. Neither the name of The NetBSD Foundation nor the names of its
25  *    contributors may be used to endorse or promote products derived
26  *    from this software without specific prior written permission.
27  *
28  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
29  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
30  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
31  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
32  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
33  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
34  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
35  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
36  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
37  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38  * POSSIBILITY OF SUCH DAMAGE.
39  */
40 
41 #include <sys/param.h>
42 #include <sys/systm.h>
43 #include <sys/proc.h>
44 #include <sys/errno.h>
45 #include <sys/kernel.h>
46 #include <sys/malloc.h>
47 #include <sys/lock.h>
48 #include <sys/pool.h>
49 #include <sys/syslog.h>
50 #include <sys/sysctl.h>
51 
52 #include <uvm/uvm.h>
53 
54 /*
55  * XXX - for now.
56  */
57 #ifdef LOCKDEBUG
58 #define simple_lock_freecheck(a, s) do { /* nothing */ } while (0)
59 #define simple_lock_only_held(lkp, str) do { /* nothing */ } while (0)
60 #endif
61 
62 /*
63  * Pool resource management utility.
64  *
65  * Memory is allocated in pages which are split into pieces according to
66  * the pool item size. Each page is kept on one of three lists in the
67  * pool structure: `pr_emptypages', `pr_fullpages' and `pr_partpages',
68  * for empty, full and partially-full pages respectively. The individual
69  * pool items are on a linked list headed by `ph_itemlist' in each page
70  * header. The memory for building the page list is either taken from
71  * the allocated pages themselves (for small pool items) or taken from
72  * an internal pool of page headers (`phpool').
73  */
74 
75 /* List of all pools */
76 TAILQ_HEAD(,pool) pool_head = TAILQ_HEAD_INITIALIZER(pool_head);
77 
78 /* Private pool for page header structures */
79 static struct pool phpool;
80 
81 /* # of seconds to retain page after last use */
82 int pool_inactive_time = 10;
83 
84 /* This spin lock protects both pool_head */
85 struct simplelock pool_head_slock;
86 
87 struct pool_item_header {
88 	/* Page headers */
89 	LIST_ENTRY(pool_item_header)
90 				ph_pagelist;	/* pool page list */
91 	TAILQ_HEAD(,pool_item)	ph_itemlist;	/* chunk list for this page */
92 	SPLAY_ENTRY(pool_item_header)
93 				ph_node;	/* Off-page page headers */
94 	int			ph_nmissing;	/* # of chunks in use */
95 	caddr_t			ph_page;	/* this page's address */
96 	struct timeval		ph_time;	/* last referenced */
97 };
98 
99 struct pool_item {
100 #ifdef DIAGNOSTIC
101 	int pi_magic;
102 #endif
103 #define	PI_MAGIC 0xdeafbeef
104 	/* Other entries use only this list entry */
105 	TAILQ_ENTRY(pool_item)	pi_list;
106 };
107 
108 #define	POOL_NEEDS_CATCHUP(pp)						\
109 	((pp)->pr_nitems < (pp)->pr_minitems)
110 
111 /*
112  * Every pool gets a unique serial number assigned to it. If this counter
113  * wraps, we're screwed, but we shouldn't create so many pools anyway.
114  */
115 unsigned int pool_serial;
116 
117 /*
118  * Pool cache management.
119  *
120  * Pool caches provide a way for constructed objects to be cached by the
121  * pool subsystem.  This can lead to performance improvements by avoiding
122  * needless object construction/destruction; it is deferred until absolutely
123  * necessary.
124  *
125  * Caches are grouped into cache groups.  Each cache group references
126  * up to 16 constructed objects.  When a cache allocates an object
127  * from the pool, it calls the object's constructor and places it into
128  * a cache group.  When a cache group frees an object back to the pool,
129  * it first calls the object's destructor.  This allows the object to
130  * persist in constructed form while freed to the cache.
131  *
132  * Multiple caches may exist for each pool.  This allows a single
133  * object type to have multiple constructed forms.  The pool references
134  * each cache, so that when a pool is drained by the pagedaemon, it can
135  * drain each individual cache as well.  Each time a cache is drained,
136  * the most idle cache group is freed to the pool in its entirety.
137  *
138  * Pool caches are layed on top of pools.  By layering them, we can avoid
139  * the complexity of cache management for pools which would not benefit
140  * from it.
141  */
142 
143 /* The cache group pool. */
144 static struct pool pcgpool;
145 
146 /* The pool cache group. */
147 #define	PCG_NOBJECTS		16
148 struct pool_cache_group {
149 	TAILQ_ENTRY(pool_cache_group)
150 		pcg_list;	/* link in the pool cache's group list */
151 	u_int	pcg_avail;	/* # available objects */
152 				/* pointers to the objects */
153 	void	*pcg_objects[PCG_NOBJECTS];
154 };
155 
156 void	pool_cache_reclaim(struct pool_cache *);
157 void	pool_cache_do_invalidate(struct pool_cache *, int,
158     void (*)(struct pool *, void *));
159 
160 int	pool_catchup(struct pool *);
161 void	pool_prime_page(struct pool *, caddr_t, struct pool_item_header *);
162 void	pool_update_curpage(struct pool *);
163 void	pool_do_put(struct pool *, void *);
164 void	pr_rmpage(struct pool *, struct pool_item_header *,
165     struct pool_pagelist *);
166 int	pool_chk_page(struct pool *, const char *, struct pool_item_header *);
167 
168 void	*pool_allocator_alloc(struct pool *, int);
169 void	pool_allocator_free(struct pool *, void *);
170 
171 #ifdef DDB
172 void pool_print_pagelist(struct pool_pagelist *, int (*)(const char *, ...));
173 void pool_print1(struct pool *, const char *, int (*)(const char *, ...));
174 #endif
175 
176 
177 /*
178  * Pool log entry. An array of these is allocated in pool_init().
179  */
180 struct pool_log {
181 	const char	*pl_file;
182 	long		pl_line;
183 	int		pl_action;
184 #define	PRLOG_GET	1
185 #define	PRLOG_PUT	2
186 	void		*pl_addr;
187 };
188 
189 /* Number of entries in pool log buffers */
190 #ifndef POOL_LOGSIZE
191 #define	POOL_LOGSIZE	10
192 #endif
193 
194 int pool_logsize = POOL_LOGSIZE;
195 
196 #ifdef POOL_DIAGNOSTIC
197 static __inline void
198 pr_log(struct pool *pp, void *v, int action, const char *file, long line)
199 {
200 	int n = pp->pr_curlogentry;
201 	struct pool_log *pl;
202 
203 	if ((pp->pr_roflags & PR_LOGGING) == 0)
204 		return;
205 
206 	/*
207 	 * Fill in the current entry. Wrap around and overwrite
208 	 * the oldest entry if necessary.
209 	 */
210 	pl = &pp->pr_log[n];
211 	pl->pl_file = file;
212 	pl->pl_line = line;
213 	pl->pl_action = action;
214 	pl->pl_addr = v;
215 	if (++n >= pp->pr_logsize)
216 		n = 0;
217 	pp->pr_curlogentry = n;
218 }
219 
220 static void
221 pr_printlog(struct pool *pp, struct pool_item *pi,
222     int (*pr)(const char *, ...))
223 {
224 	int i = pp->pr_logsize;
225 	int n = pp->pr_curlogentry;
226 
227 	if ((pp->pr_roflags & PR_LOGGING) == 0)
228 		return;
229 
230 	/*
231 	 * Print all entries in this pool's log.
232 	 */
233 	while (i-- > 0) {
234 		struct pool_log *pl = &pp->pr_log[n];
235 		if (pl->pl_action != 0) {
236 			if (pi == NULL || pi == pl->pl_addr) {
237 				(*pr)("\tlog entry %d:\n", i);
238 				(*pr)("\t\taction = %s, addr = %p\n",
239 				    pl->pl_action == PRLOG_GET ? "get" : "put",
240 				    pl->pl_addr);
241 				(*pr)("\t\tfile: %s at line %lu\n",
242 				    pl->pl_file, pl->pl_line);
243 			}
244 		}
245 		if (++n >= pp->pr_logsize)
246 			n = 0;
247 	}
248 }
249 
250 static __inline void
251 pr_enter(struct pool *pp, const char *file, long line)
252 {
253 
254 	if (__predict_false(pp->pr_entered_file != NULL)) {
255 		printf("pool %s: reentrancy at file %s line %ld\n",
256 		    pp->pr_wchan, file, line);
257 		printf("         previous entry at file %s line %ld\n",
258 		    pp->pr_entered_file, pp->pr_entered_line);
259 		panic("pr_enter");
260 	}
261 
262 	pp->pr_entered_file = file;
263 	pp->pr_entered_line = line;
264 }
265 
266 static __inline void
267 pr_leave(struct pool *pp)
268 {
269 
270 	if (__predict_false(pp->pr_entered_file == NULL)) {
271 		printf("pool %s not entered?\n", pp->pr_wchan);
272 		panic("pr_leave");
273 	}
274 
275 	pp->pr_entered_file = NULL;
276 	pp->pr_entered_line = 0;
277 }
278 
279 static __inline void
280 pr_enter_check(struct pool *pp, int (*pr)(const char *, ...))
281 {
282 
283 	if (pp->pr_entered_file != NULL)
284 		(*pr)("\n\tcurrently entered from file %s line %ld\n",
285 		    pp->pr_entered_file, pp->pr_entered_line);
286 }
287 #else
288 #define	pr_log(pp, v, action, file, line)
289 #define	pr_printlog(pp, pi, pr)
290 #define	pr_enter(pp, file, line)
291 #define	pr_leave(pp)
292 #define	pr_enter_check(pp, pr)
293 #endif /* POOL_DIAGNOSTIC */
294 
295 static __inline int
296 phtree_compare(struct pool_item_header *a, struct pool_item_header *b)
297 {
298 	if (a->ph_page < b->ph_page)
299 		return (-1);
300 	else if (a->ph_page > b->ph_page)
301 		return (1);
302 	else
303 		return (0);
304 }
305 
306 SPLAY_PROTOTYPE(phtree, pool_item_header, ph_node, phtree_compare);
307 SPLAY_GENERATE(phtree, pool_item_header, ph_node, phtree_compare);
308 
309 /*
310  * Return the pool page header based on page address.
311  */
312 static __inline struct pool_item_header *
313 pr_find_pagehead(struct pool *pp, caddr_t page)
314 {
315 	struct pool_item_header *ph, tmp;
316 
317 	if ((pp->pr_roflags & PR_PHINPAGE) != 0)
318 		return ((struct pool_item_header *)(page + pp->pr_phoffset));
319 
320 	tmp.ph_page = page;
321 	ph = SPLAY_FIND(phtree, &pp->pr_phtree, &tmp);
322 	return ph;
323 }
324 
325 /*
326  * Remove a page from the pool.
327  */
328 void
329 pr_rmpage(struct pool *pp, struct pool_item_header *ph,
330      struct pool_pagelist *pq)
331 {
332 	int s;
333 
334 	/*
335 	 * If the page was idle, decrement the idle page count.
336 	 */
337 	if (ph->ph_nmissing == 0) {
338 #ifdef DIAGNOSTIC
339 		if (pp->pr_nidle == 0)
340 			panic("pr_rmpage: nidle inconsistent");
341 		if (pp->pr_nitems < pp->pr_itemsperpage)
342 			panic("pr_rmpage: nitems inconsistent");
343 #endif
344 		pp->pr_nidle--;
345 	}
346 
347 	pp->pr_nitems -= pp->pr_itemsperpage;
348 
349 	/*
350 	 * Unlink a page from the pool and release it (or queue it for release).
351 	 */
352 	LIST_REMOVE(ph, ph_pagelist);
353 	if (pq) {
354 		LIST_INSERT_HEAD(pq, ph, ph_pagelist);
355 	} else {
356 		pool_allocator_free(pp, ph->ph_page);
357 		if ((pp->pr_roflags & PR_PHINPAGE) == 0) {
358 			SPLAY_REMOVE(phtree, &pp->pr_phtree, ph);
359 			s = splhigh();
360 			pool_put(&phpool, ph);
361 			splx(s);
362 		}
363 	}
364 	pp->pr_npages--;
365 	pp->pr_npagefree++;
366 
367 	pool_update_curpage(pp);
368 }
369 
370 /*
371  * Initialize the given pool resource structure.
372  *
373  * We export this routine to allow other kernel parts to declare
374  * static pools that must be initialized before malloc() is available.
375  */
376 void
377 pool_init(struct pool *pp, size_t size, u_int align, u_int ioff, int flags,
378     const char *wchan, struct pool_allocator *palloc)
379 {
380 	int off, slack;
381 
382 #ifdef POOL_DIAGNOSTIC
383 	/*
384 	 * Always log if POOL_DIAGNOSTIC is defined.
385 	 */
386 	if (pool_logsize != 0)
387 		flags |= PR_LOGGING;
388 #endif
389 
390 #ifdef MALLOC_DEBUG
391 	if ((flags & PR_DEBUG) && (ioff != 0 || align != 0))
392 		flags &= ~PR_DEBUG;
393 #endif
394 	/*
395 	 * Check arguments and construct default values.
396 	 */
397 	if (palloc == NULL)
398 		palloc = &pool_allocator_nointr;
399 	if ((palloc->pa_flags & PA_INITIALIZED) == 0) {
400 		if (palloc->pa_pagesz == 0)
401 			palloc->pa_pagesz = PAGE_SIZE;
402 
403 		TAILQ_INIT(&palloc->pa_list);
404 
405 		simple_lock_init(&palloc->pa_slock);
406 		palloc->pa_pagemask = ~(palloc->pa_pagesz - 1);
407 		palloc->pa_pageshift = ffs(palloc->pa_pagesz) - 1;
408 		palloc->pa_flags |= PA_INITIALIZED;
409 	}
410 
411 	if (align == 0)
412 		align = ALIGN(1);
413 
414 	if (size < sizeof(struct pool_item))
415 		size = sizeof(struct pool_item);
416 
417 	size = roundup(size, align);
418 #ifdef DIAGNOSTIC
419 	if (size > palloc->pa_pagesz)
420 		panic("pool_init: pool item size (%lu) too large",
421 		      (u_long)size);
422 #endif
423 
424 	/*
425 	 * Initialize the pool structure.
426 	 */
427 	LIST_INIT(&pp->pr_emptypages);
428 	LIST_INIT(&pp->pr_fullpages);
429 	LIST_INIT(&pp->pr_partpages);
430 	TAILQ_INIT(&pp->pr_cachelist);
431 	pp->pr_curpage = NULL;
432 	pp->pr_npages = 0;
433 	pp->pr_minitems = 0;
434 	pp->pr_minpages = 0;
435 	pp->pr_maxpages = 8;
436 	pp->pr_roflags = flags;
437 	pp->pr_flags = 0;
438 	pp->pr_size = size;
439 	pp->pr_align = align;
440 	pp->pr_wchan = wchan;
441 	pp->pr_alloc = palloc;
442 	pp->pr_nitems = 0;
443 	pp->pr_nout = 0;
444 	pp->pr_hardlimit = UINT_MAX;
445 	pp->pr_hardlimit_warning = NULL;
446 	pp->pr_hardlimit_ratecap.tv_sec = 0;
447 	pp->pr_hardlimit_ratecap.tv_usec = 0;
448 	pp->pr_hardlimit_warning_last.tv_sec = 0;
449 	pp->pr_hardlimit_warning_last.tv_usec = 0;
450 	pp->pr_serial = ++pool_serial;
451 	if (pool_serial == 0)
452 		panic("pool_init: too much uptime");
453 
454 	/*
455 	 * Decide whether to put the page header off page to avoid
456 	 * wasting too large a part of the page. Off-page page headers
457 	 * go on a hash table, so we can match a returned item
458 	 * with its header based on the page address.
459 	 * We use 1/16 of the page size as the threshold (XXX: tune)
460 	 */
461 	if (pp->pr_size < palloc->pa_pagesz/16) {
462 		/* Use the end of the page for the page header */
463 		pp->pr_roflags |= PR_PHINPAGE;
464 		pp->pr_phoffset = off = palloc->pa_pagesz -
465 		    ALIGN(sizeof(struct pool_item_header));
466 	} else {
467 		/* The page header will be taken from our page header pool */
468 		pp->pr_phoffset = 0;
469 		off = palloc->pa_pagesz;
470 		SPLAY_INIT(&pp->pr_phtree);
471 	}
472 
473 	/*
474 	 * Alignment is to take place at `ioff' within the item. This means
475 	 * we must reserve up to `align - 1' bytes on the page to allow
476 	 * appropriate positioning of each item.
477 	 *
478 	 * Silently enforce `0 <= ioff < align'.
479 	 */
480 	pp->pr_itemoffset = ioff = ioff % align;
481 	pp->pr_itemsperpage = (off - ((align - ioff) % align)) / pp->pr_size;
482 	KASSERT(pp->pr_itemsperpage != 0);
483 
484 	/*
485 	 * Use the slack between the chunks and the page header
486 	 * for "cache coloring".
487 	 */
488 	slack = off - pp->pr_itemsperpage * pp->pr_size;
489 	pp->pr_maxcolor = (slack / align) * align;
490 	pp->pr_curcolor = 0;
491 
492 	pp->pr_nget = 0;
493 	pp->pr_nfail = 0;
494 	pp->pr_nput = 0;
495 	pp->pr_npagealloc = 0;
496 	pp->pr_npagefree = 0;
497 	pp->pr_hiwat = 0;
498 	pp->pr_nidle = 0;
499 
500 #ifdef POOL_DIAGNOSTIC
501 	if (flags & PR_LOGGING) {
502 		if (kmem_map == NULL ||
503 		    (pp->pr_log = malloc(pool_logsize * sizeof(struct pool_log),
504 		     M_TEMP, M_NOWAIT)) == NULL)
505 			pp->pr_roflags &= ~PR_LOGGING;
506 		pp->pr_curlogentry = 0;
507 		pp->pr_logsize = pool_logsize;
508 	}
509 #endif
510 
511 	pp->pr_entered_file = NULL;
512 	pp->pr_entered_line = 0;
513 
514 	simple_lock_init(&pp->pr_slock);
515 
516 	/*
517 	 * Initialize private page header pool and cache magazine pool if we
518 	 * haven't done so yet.
519 	 * XXX LOCKING.
520 	 */
521 	if (phpool.pr_size == 0) {
522 		pool_init(&phpool, sizeof(struct pool_item_header), 0, 0,
523 		    0, "phpool", NULL);
524 		pool_init(&pcgpool, sizeof(struct pool_cache_group), 0, 0,
525 		    0, "pcgpool", NULL);
526 	}
527 
528 	simple_lock_init(&pool_head_slock);
529 
530 	/* Insert this into the list of all pools. */
531 	simple_lock(&pool_head_slock);
532 	TAILQ_INSERT_TAIL(&pool_head, pp, pr_poollist);
533 	simple_unlock(&pool_head_slock);
534 
535 	/* Insert into the list of pools using this allocator. */
536 	simple_lock(&palloc->pa_slock);
537 	TAILQ_INSERT_TAIL(&palloc->pa_list, pp, pr_alloc_list);
538 	simple_unlock(&palloc->pa_slock);
539 }
540 
541 /*
542  * Decommission a pool resource.
543  */
544 void
545 pool_destroy(struct pool *pp)
546 {
547 	struct pool_item_header *ph;
548 	struct pool_cache *pc;
549 
550 	/* Locking order: pool_allocator -> pool */
551 	simple_lock(&pp->pr_alloc->pa_slock);
552 	TAILQ_REMOVE(&pp->pr_alloc->pa_list, pp, pr_alloc_list);
553 	simple_unlock(&pp->pr_alloc->pa_slock);
554 
555 	/* Destroy all caches for this pool. */
556 	while ((pc = TAILQ_FIRST(&pp->pr_cachelist)) != NULL)
557 		pool_cache_destroy(pc);
558 
559 #ifdef DIAGNOSTIC
560 	if (pp->pr_nout != 0) {
561 		pr_printlog(pp, NULL, printf);
562 		panic("pool_destroy: pool busy: still out: %u",
563 		    pp->pr_nout);
564 	}
565 #endif
566 
567 	/* Remove all pages */
568 	while ((ph = LIST_FIRST(&pp->pr_emptypages)) != NULL)
569 		pr_rmpage(pp, ph, NULL);
570 	KASSERT(LIST_EMPTY(&pp->pr_fullpages));
571 	KASSERT(LIST_EMPTY(&pp->pr_partpages));
572 
573 	/* Remove from global pool list */
574 	simple_lock(&pool_head_slock);
575 	TAILQ_REMOVE(&pool_head, pp, pr_poollist);
576 	simple_unlock(&pool_head_slock);
577 
578 #ifdef POOL_DIAGNOSTIC
579 	if ((pp->pr_roflags & PR_LOGGING) != 0)
580 		free(pp->pr_log, M_TEMP);
581 #endif
582 }
583 
584 static struct pool_item_header *
585 pool_alloc_item_header(struct pool *pp, caddr_t storage, int flags)
586 {
587 	struct pool_item_header *ph;
588 	int s;
589 
590 	LOCK_ASSERT(simple_lock_held(&pp->pr_slock) == 0);
591 
592 	if ((pp->pr_roflags & PR_PHINPAGE) != 0)
593 		ph = (struct pool_item_header *) (storage + pp->pr_phoffset);
594 	else {
595 		s = splhigh();
596 		ph = pool_get(&phpool, flags);
597 		splx(s);
598 	}
599 
600 	return (ph);
601 }
602 
603 /*
604  * Grab an item from the pool; must be called at appropriate spl level
605  */
606 void *
607 #ifdef POOL_DIAGNOSTIC
608 _pool_get(struct pool *pp, int flags, const char *file, long line)
609 #else
610 pool_get(struct pool *pp, int flags)
611 #endif
612 {
613 	struct pool_item *pi;
614 	struct pool_item_header *ph;
615 	void *v;
616 
617 #ifdef DIAGNOSTIC
618 	if ((flags & PR_WAITOK) != 0)
619 		splassert(IPL_NONE);
620 	if (__predict_false(curproc == NULL && /* doing_shutdown == 0 && XXX*/
621 			    (flags & PR_WAITOK) != 0))
622 		panic("pool_get: %s:must have NOWAIT", pp->pr_wchan);
623 
624 #ifdef LOCKDEBUG
625 	if (flags & PR_WAITOK)
626 		simple_lock_only_held(NULL, "pool_get(PR_WAITOK)");
627 #endif
628 #endif /* DIAGNOSTIC */
629 
630 #ifdef MALLOC_DEBUG
631 	if (pp->pr_roflags & PR_DEBUG) {
632 		void *addr;
633 
634 		addr = NULL;
635 		debug_malloc(pp->pr_size, M_DEBUG,
636 		    (flags & PR_WAITOK) ? M_WAITOK : M_NOWAIT, &addr);
637 		return (addr);
638 	}
639 #endif
640 
641 	simple_lock(&pp->pr_slock);
642 	pr_enter(pp, file, line);
643 
644  startover:
645 	/*
646 	 * Check to see if we've reached the hard limit.  If we have,
647 	 * and we can wait, then wait until an item has been returned to
648 	 * the pool.
649 	 */
650 #ifdef DIAGNOSTIC
651 	if (__predict_false(pp->pr_nout > pp->pr_hardlimit)) {
652 		pr_leave(pp);
653 		simple_unlock(&pp->pr_slock);
654 		panic("pool_get: %s: crossed hard limit", pp->pr_wchan);
655 	}
656 #endif
657 	if (__predict_false(pp->pr_nout == pp->pr_hardlimit)) {
658 		if ((flags & PR_WAITOK) && !(flags & PR_LIMITFAIL)) {
659 			/*
660 			 * XXX: A warning isn't logged in this case.  Should
661 			 * it be?
662 			 */
663 			pp->pr_flags |= PR_WANTED;
664 			pr_leave(pp);
665 			ltsleep(pp, PSWP, pp->pr_wchan, 0, &pp->pr_slock);
666 			pr_enter(pp, file, line);
667 			goto startover;
668 		}
669 
670 		/*
671 		 * Log a message that the hard limit has been hit.
672 		 */
673 		if (pp->pr_hardlimit_warning != NULL &&
674 		    ratecheck(&pp->pr_hardlimit_warning_last,
675 			      &pp->pr_hardlimit_ratecap))
676 			log(LOG_ERR, "%s\n", pp->pr_hardlimit_warning);
677 
678 		pp->pr_nfail++;
679 
680 		pr_leave(pp);
681 		simple_unlock(&pp->pr_slock);
682 		return (NULL);
683 	}
684 
685 	/*
686 	 * The convention we use is that if `curpage' is not NULL, then
687 	 * it points at a non-empty bucket. In particular, `curpage'
688 	 * never points at a page header which has PR_PHINPAGE set and
689 	 * has no items in its bucket.
690 	 */
691 	if ((ph = pp->pr_curpage) == NULL) {
692 #ifdef DIAGNOSTIC
693 		if (pp->pr_nitems != 0) {
694 			simple_unlock(&pp->pr_slock);
695 			printf("pool_get: %s: curpage NULL, nitems %u\n",
696 			    pp->pr_wchan, pp->pr_nitems);
697 			panic("pool_get: nitems inconsistent");
698 		}
699 #endif
700 
701 		/*
702 		 * Call the back-end page allocator for more memory.
703 		 * Release the pool lock, as the back-end page allocator
704 		 * may block.
705 		 */
706 		pr_leave(pp);
707 		simple_unlock(&pp->pr_slock);
708 		v = pool_allocator_alloc(pp, flags);
709 		if (__predict_true(v != NULL))
710 			ph = pool_alloc_item_header(pp, v, flags);
711 		simple_lock(&pp->pr_slock);
712 		pr_enter(pp, file, line);
713 
714 		if (__predict_false(v == NULL || ph == NULL)) {
715 			if (v != NULL)
716 				pool_allocator_free(pp, v);
717 
718 			/*
719 			 * We were unable to allocate a page or item
720 			 * header, but we released the lock during
721 			 * allocation, so perhaps items were freed
722 			 * back to the pool.  Check for this case.
723 			 */
724 			if (pp->pr_curpage != NULL)
725 				goto startover;
726 
727 			if ((flags & PR_WAITOK) == 0) {
728 				pp->pr_nfail++;
729 				pr_leave(pp);
730 				simple_unlock(&pp->pr_slock);
731 				return (NULL);
732 			}
733 
734 			/*
735 			 * Wait for items to be returned to this pool.
736 			 *
737 			 * XXX: maybe we should wake up once a second and
738 			 * try again?
739 			 */
740 			pp->pr_flags |= PR_WANTED;
741 			/* PA_WANTED is already set on the allocator. */
742 			pr_leave(pp);
743 			ltsleep(pp, PSWP, pp->pr_wchan, 0, &pp->pr_slock);
744 			pr_enter(pp, file, line);
745 			goto startover;
746 		}
747 
748 		/* We have more memory; add it to the pool */
749 		pool_prime_page(pp, v, ph);
750 		pp->pr_npagealloc++;
751 
752 		/* Start the allocation process over. */
753 		goto startover;
754 	}
755 	if (__predict_false((v = pi = TAILQ_FIRST(&ph->ph_itemlist)) == NULL)) {
756 		pr_leave(pp);
757 		simple_unlock(&pp->pr_slock);
758 		panic("pool_get: %s: page empty", pp->pr_wchan);
759 	}
760 #ifdef DIAGNOSTIC
761 	if (__predict_false(pp->pr_nitems == 0)) {
762 		pr_leave(pp);
763 		simple_unlock(&pp->pr_slock);
764 		printf("pool_get: %s: items on itemlist, nitems %u\n",
765 		    pp->pr_wchan, pp->pr_nitems);
766 		panic("pool_get: nitems inconsistent");
767 	}
768 #endif
769 
770 #ifdef POOL_DIAGNOSTIC
771 	pr_log(pp, v, PRLOG_GET, file, line);
772 #endif
773 
774 #ifdef DIAGNOSTIC
775 	if (__predict_false(pi->pi_magic != PI_MAGIC)) {
776 		pr_printlog(pp, pi, printf);
777 		panic("pool_get(%s): free list modified: magic=%x; page %p;"
778 		       " item addr %p",
779 			pp->pr_wchan, pi->pi_magic, ph->ph_page, pi);
780 	}
781 #endif
782 
783 	/*
784 	 * Remove from item list.
785 	 */
786 	TAILQ_REMOVE(&ph->ph_itemlist, pi, pi_list);
787 	pp->pr_nitems--;
788 	pp->pr_nout++;
789 	if (ph->ph_nmissing == 0) {
790 #ifdef DIAGNOSTIC
791 		if (__predict_false(pp->pr_nidle == 0))
792 			panic("pool_get: nidle inconsistent");
793 #endif
794 		pp->pr_nidle--;
795 
796 		/*
797 		 * This page was previously empty.  Move it to the list of
798 		 * partially-full pages.  This page is already curpage.
799 		 */
800 		LIST_REMOVE(ph, ph_pagelist);
801 		LIST_INSERT_HEAD(&pp->pr_partpages, ph, ph_pagelist);
802 	}
803 	ph->ph_nmissing++;
804 	if (TAILQ_EMPTY(&ph->ph_itemlist)) {
805 #ifdef DIAGNOSTIC
806 		if (__predict_false(ph->ph_nmissing != pp->pr_itemsperpage)) {
807 			pr_leave(pp);
808 			simple_unlock(&pp->pr_slock);
809 			panic("pool_get: %s: nmissing inconsistent",
810 			    pp->pr_wchan);
811 		}
812 #endif
813 		/*
814 		 * This page is now full.  Move it to the full list
815 		 * and select a new current page.
816 		 */
817 		LIST_REMOVE(ph, ph_pagelist);
818 		LIST_INSERT_HEAD(&pp->pr_fullpages, ph, ph_pagelist);
819 		pool_update_curpage(pp);
820 	}
821 
822 	pp->pr_nget++;
823 
824 	/*
825 	 * If we have a low water mark and we are now below that low
826 	 * water mark, add more items to the pool.
827 	 */
828 	if (POOL_NEEDS_CATCHUP(pp) && pool_catchup(pp) != 0) {
829 		/*
830 		 * XXX: Should we log a warning?  Should we set up a timeout
831 		 * to try again in a second or so?  The latter could break
832 		 * a caller's assumptions about interrupt protection, etc.
833 		 */
834 	}
835 
836 	pr_leave(pp);
837 	simple_unlock(&pp->pr_slock);
838 	return (v);
839 }
840 
841 /*
842  * Internal version of pool_put().  Pool is already locked/entered.
843  */
844 void
845 pool_do_put(struct pool *pp, void *v)
846 {
847 	struct pool_item *pi = v;
848 	struct pool_item_header *ph;
849 	caddr_t page;
850 
851 #ifdef MALLOC_DEBUG
852 	if (pp->pr_roflags & PR_DEBUG) {
853 		debug_free(v, M_DEBUG);
854 		return;
855 	}
856 #endif
857 
858 	LOCK_ASSERT(simple_lock_held(&pp->pr_slock));
859 
860 	page = (caddr_t)((vaddr_t)v & pp->pr_alloc->pa_pagemask);
861 
862 #ifdef DIAGNOSTIC
863 	if (__predict_false(pp->pr_nout == 0)) {
864 		printf("pool %s: putting with none out\n",
865 		    pp->pr_wchan);
866 		panic("pool_put");
867 	}
868 #endif
869 
870 	if (__predict_false((ph = pr_find_pagehead(pp, page)) == NULL)) {
871 		pr_printlog(pp, NULL, printf);
872 		panic("pool_put: %s: page header missing", pp->pr_wchan);
873 	}
874 
875 #ifdef LOCKDEBUG
876 	/*
877 	 * Check if we're freeing a locked simple lock.
878 	 */
879 	simple_lock_freecheck((caddr_t)pi, ((caddr_t)pi) + pp->pr_size);
880 #endif
881 
882 	/*
883 	 * Return to item list.
884 	 */
885 #ifdef DIAGNOSTIC
886 	pi->pi_magic = PI_MAGIC;
887 #endif
888 #ifdef DEBUG
889 	{
890 		int i, *ip = v;
891 
892 		for (i = 0; i < pp->pr_size / sizeof(int); i++) {
893 			*ip++ = PI_MAGIC;
894 		}
895 	}
896 #endif
897 
898 	TAILQ_INSERT_HEAD(&ph->ph_itemlist, pi, pi_list);
899 	ph->ph_nmissing--;
900 	pp->pr_nput++;
901 	pp->pr_nitems++;
902 	pp->pr_nout--;
903 
904 	/* Cancel "pool empty" condition if it exists */
905 	if (pp->pr_curpage == NULL)
906 		pp->pr_curpage = ph;
907 
908 	if (pp->pr_flags & PR_WANTED) {
909 		pp->pr_flags &= ~PR_WANTED;
910 		if (ph->ph_nmissing == 0)
911 			pp->pr_nidle++;
912 		wakeup(pp);
913 		return;
914 	}
915 
916 	/*
917 	 * If this page is now empty, do one of two things:
918 	 *
919 	 *	(1) If we have more pages than the page high water mark,
920 	 *	    free the page back to the system.
921 	 *
922 	 *	(2) Otherwise, move the page to the empty page list.
923 	 *
924 	 * Either way, select a new current page (so we use a partially-full
925 	 * page if one is available).
926 	 */
927 	if (ph->ph_nmissing == 0) {
928 		pp->pr_nidle++;
929 		if (pp->pr_nidle > pp->pr_maxpages ||
930 		    (pp->pr_alloc->pa_flags & PA_WANT) != 0) {
931 			pr_rmpage(pp, ph, NULL);
932 		} else {
933 			LIST_REMOVE(ph, ph_pagelist);
934 			LIST_INSERT_HEAD(&pp->pr_emptypages, ph, ph_pagelist);
935 
936 			/*
937 			 * Update the timestamp on the page.  A page must
938 			 * be idle for some period of time before it can
939 			 * be reclaimed by the pagedaemon.  This minimizes
940 			 * ping-pong'ing for memory.
941 			 */
942 			microuptime(&ph->ph_time);
943 		}
944 		pool_update_curpage(pp);
945 	}
946 
947 	/*
948 	 * If the page was previously completely full, move it to the
949 	 * partially-full list and make it the current page.  The next
950 	 * allocation will get the item from this page, instead of
951 	 * further fragmenting the pool.
952 	 */
953 	else if (ph->ph_nmissing == (pp->pr_itemsperpage - 1)) {
954 		LIST_REMOVE(ph, ph_pagelist);
955 		LIST_INSERT_HEAD(&pp->pr_partpages, ph, ph_pagelist);
956 		pp->pr_curpage = ph;
957 	}
958 }
959 
960 /*
961  * Return resource to the pool; must be called at appropriate spl level
962  */
963 #ifdef POOL_DIAGNOSTIC
964 void
965 _pool_put(struct pool *pp, void *v, const char *file, long line)
966 {
967 
968 	simple_lock(&pp->pr_slock);
969 	pr_enter(pp, file, line);
970 
971 	pr_log(pp, v, PRLOG_PUT, file, line);
972 
973 	pool_do_put(pp, v);
974 
975 	pr_leave(pp);
976 	simple_unlock(&pp->pr_slock);
977 }
978 #undef pool_put
979 #endif /* POOL_DIAGNOSTIC */
980 
981 void
982 pool_put(struct pool *pp, void *v)
983 {
984 
985 	simple_lock(&pp->pr_slock);
986 
987 	pool_do_put(pp, v);
988 
989 	simple_unlock(&pp->pr_slock);
990 }
991 
992 #ifdef POOL_DIAGNOSTIC
993 #define		pool_put(h, v)	_pool_put((h), (v), __FILE__, __LINE__)
994 #endif
995 
996 /*
997  * Add N items to the pool.
998  */
999 int
1000 pool_prime(struct pool *pp, int n)
1001 {
1002 	struct pool_item_header *ph;
1003 	caddr_t cp;
1004 	int newpages;
1005 
1006 	simple_lock(&pp->pr_slock);
1007 
1008 	newpages = roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage;
1009 
1010 	while (newpages-- > 0) {
1011 		simple_unlock(&pp->pr_slock);
1012 		cp = pool_allocator_alloc(pp, PR_NOWAIT);
1013 		if (__predict_true(cp != NULL))
1014 			ph = pool_alloc_item_header(pp, cp, PR_NOWAIT);
1015 		simple_lock(&pp->pr_slock);
1016 
1017 		if (__predict_false(cp == NULL || ph == NULL)) {
1018 			if (cp != NULL)
1019 				pool_allocator_free(pp, cp);
1020 			break;
1021 		}
1022 
1023 		pool_prime_page(pp, cp, ph);
1024 		pp->pr_npagealloc++;
1025 		pp->pr_minpages++;
1026 	}
1027 
1028 	if (pp->pr_minpages >= pp->pr_maxpages)
1029 		pp->pr_maxpages = pp->pr_minpages + 1;	/* XXX */
1030 
1031 	simple_unlock(&pp->pr_slock);
1032 	return (0);
1033 }
1034 
1035 /*
1036  * Add a page worth of items to the pool.
1037  *
1038  * Note, we must be called with the pool descriptor LOCKED.
1039  */
1040 void
1041 pool_prime_page(struct pool *pp, caddr_t storage, struct pool_item_header *ph)
1042 {
1043 	struct pool_item *pi;
1044 	caddr_t cp = storage;
1045 	unsigned int align = pp->pr_align;
1046 	unsigned int ioff = pp->pr_itemoffset;
1047 	int n;
1048 
1049 #ifdef DIAGNOSTIC
1050 	if (((u_long)cp & (pp->pr_alloc->pa_pagesz - 1)) != 0)
1051 		panic("pool_prime_page: %s: unaligned page", pp->pr_wchan);
1052 #endif
1053 
1054 	/*
1055 	 * Insert page header.
1056 	 */
1057 	LIST_INSERT_HEAD(&pp->pr_emptypages, ph, ph_pagelist);
1058 	TAILQ_INIT(&ph->ph_itemlist);
1059 	ph->ph_page = storage;
1060 	ph->ph_nmissing = 0;
1061 	memset(&ph->ph_time, 0, sizeof(ph->ph_time));
1062 	if ((pp->pr_roflags & PR_PHINPAGE) == 0)
1063 		SPLAY_INSERT(phtree, &pp->pr_phtree, ph);
1064 
1065 	pp->pr_nidle++;
1066 
1067 	/*
1068 	 * Color this page.
1069 	 */
1070 	cp = (caddr_t)(cp + pp->pr_curcolor);
1071 	if ((pp->pr_curcolor += align) > pp->pr_maxcolor)
1072 		pp->pr_curcolor = 0;
1073 
1074 	/*
1075 	 * Adjust storage to apply aligment to `pr_itemoffset' in each item.
1076 	 */
1077 	if (ioff != 0)
1078 		cp = (caddr_t)(cp + (align - ioff));
1079 
1080 	/*
1081 	 * Insert remaining chunks on the bucket list.
1082 	 */
1083 	n = pp->pr_itemsperpage;
1084 	pp->pr_nitems += n;
1085 
1086 	while (n--) {
1087 		pi = (struct pool_item *)cp;
1088 
1089 		KASSERT(((((vaddr_t)pi) + ioff) & (align - 1)) == 0);
1090 
1091 		/* Insert on page list */
1092 		TAILQ_INSERT_TAIL(&ph->ph_itemlist, pi, pi_list);
1093 #ifdef DIAGNOSTIC
1094 		pi->pi_magic = PI_MAGIC;
1095 #endif
1096 		cp = (caddr_t)(cp + pp->pr_size);
1097 	}
1098 
1099 	/*
1100 	 * If the pool was depleted, point at the new page.
1101 	 */
1102 	if (pp->pr_curpage == NULL)
1103 		pp->pr_curpage = ph;
1104 
1105 	if (++pp->pr_npages > pp->pr_hiwat)
1106 		pp->pr_hiwat = pp->pr_npages;
1107 }
1108 
1109 /*
1110  * Used by pool_get() when nitems drops below the low water mark.  This
1111  * is used to catch up pr_nitems with the low water mark.
1112  *
1113  * Note 1, we never wait for memory here, we let the caller decide what to do.
1114  *
1115  * Note 2, we must be called with the pool already locked, and we return
1116  * with it locked.
1117  */
1118 int
1119 pool_catchup(struct pool *pp)
1120 {
1121 	struct pool_item_header *ph;
1122 	caddr_t cp;
1123 	int error = 0;
1124 
1125 	while (POOL_NEEDS_CATCHUP(pp)) {
1126 		/*
1127 		 * Call the page back-end allocator for more memory.
1128 		 *
1129 		 * XXX: We never wait, so should we bother unlocking
1130 		 * the pool descriptor?
1131 		 */
1132 		simple_unlock(&pp->pr_slock);
1133 		cp = pool_allocator_alloc(pp, PR_NOWAIT);
1134 		if (__predict_true(cp != NULL))
1135 			ph = pool_alloc_item_header(pp, cp, PR_NOWAIT);
1136 		simple_lock(&pp->pr_slock);
1137 		if (__predict_false(cp == NULL || ph == NULL)) {
1138 			if (cp != NULL)
1139 				pool_allocator_free(pp, cp);
1140 			error = ENOMEM;
1141 			break;
1142 		}
1143 		pool_prime_page(pp, cp, ph);
1144 		pp->pr_npagealloc++;
1145 	}
1146 
1147 	return (error);
1148 }
1149 
1150 void
1151 pool_update_curpage(struct pool *pp)
1152 {
1153 
1154 	pp->pr_curpage = LIST_FIRST(&pp->pr_partpages);
1155 	if (pp->pr_curpage == NULL) {
1156 		pp->pr_curpage = LIST_FIRST(&pp->pr_emptypages);
1157 	}
1158 }
1159 
1160 void
1161 pool_setlowat(struct pool *pp, int n)
1162 {
1163 
1164 	simple_lock(&pp->pr_slock);
1165 
1166 	pp->pr_minitems = n;
1167 	pp->pr_minpages = (n == 0)
1168 		? 0
1169 		: roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage;
1170 
1171 	/* Make sure we're caught up with the newly-set low water mark. */
1172 	if (POOL_NEEDS_CATCHUP(pp) && pool_catchup(pp) != 0) {
1173 		/*
1174 		 * XXX: Should we log a warning?  Should we set up a timeout
1175 		 * to try again in a second or so?  The latter could break
1176 		 * a caller's assumptions about interrupt protection, etc.
1177 		 */
1178 	}
1179 
1180 	simple_unlock(&pp->pr_slock);
1181 }
1182 
1183 void
1184 pool_sethiwat(struct pool *pp, int n)
1185 {
1186 
1187 	simple_lock(&pp->pr_slock);
1188 
1189 	pp->pr_maxpages = (n == 0)
1190 		? 0
1191 		: roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage;
1192 
1193 	simple_unlock(&pp->pr_slock);
1194 }
1195 
1196 int
1197 pool_sethardlimit(struct pool *pp, unsigned n, const char *warnmess, int ratecap)
1198 {
1199 	int error = 0;
1200 
1201 	simple_lock(&pp->pr_slock);
1202 
1203 	if (n < pp->pr_nout) {
1204 		error = EINVAL;
1205 		goto done;
1206 	}
1207 
1208 	pp->pr_hardlimit = n;
1209 	pp->pr_hardlimit_warning = warnmess;
1210 	pp->pr_hardlimit_ratecap.tv_sec = ratecap;
1211 	pp->pr_hardlimit_warning_last.tv_sec = 0;
1212 	pp->pr_hardlimit_warning_last.tv_usec = 0;
1213 
1214 	/*
1215 	 * In-line version of pool_sethiwat(), because we don't want to
1216 	 * release the lock.
1217 	 */
1218 	pp->pr_maxpages = (n == 0 || n == UINT_MAX)
1219 		? n
1220 		: roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage;
1221 
1222  done:
1223 	simple_unlock(&pp->pr_slock);
1224 
1225 	return (error);
1226 }
1227 
1228 /*
1229  * Release all complete pages that have not been used recently.
1230  *
1231  * Returns non-zero if any pages have been reclaimed.
1232  */
1233 int
1234 #ifdef POOL_DIAGNOSTIC
1235 _pool_reclaim(struct pool *pp, const char *file, long line)
1236 #else
1237 pool_reclaim(struct pool *pp)
1238 #endif
1239 {
1240 	struct pool_item_header *ph, *phnext;
1241 	struct pool_cache *pc;
1242 	struct timeval curtime;
1243 	struct pool_pagelist pq;
1244 	struct timeval diff;
1245 	int s;
1246 
1247 	if (simple_lock_try(&pp->pr_slock) == 0)
1248 		return (0);
1249 	pr_enter(pp, file, line);
1250 
1251 	LIST_INIT(&pq);
1252 
1253 	/*
1254 	 * Reclaim items from the pool's caches.
1255 	 */
1256 	TAILQ_FOREACH(pc, &pp->pr_cachelist, pc_poollist)
1257 		pool_cache_reclaim(pc);
1258 
1259 	microuptime(&curtime);
1260 
1261 	for (ph = LIST_FIRST(&pp->pr_emptypages); ph != NULL; ph = phnext) {
1262 		phnext = LIST_NEXT(ph, ph_pagelist);
1263 
1264 		/* Check our minimum page claim */
1265 		if (pp->pr_npages <= pp->pr_minpages)
1266 			break;
1267 
1268 		KASSERT(ph->ph_nmissing == 0);
1269 		timersub(&curtime, &ph->ph_time, &diff);
1270 		if (diff.tv_sec < pool_inactive_time)
1271 			continue;
1272 
1273 		/*
1274 		 * If freeing this page would put us below
1275 		 * the low water mark, stop now.
1276 		 */
1277 		if ((pp->pr_nitems - pp->pr_itemsperpage) <
1278 		    pp->pr_minitems)
1279 			break;
1280 
1281 		pr_rmpage(pp, ph, &pq);
1282 	}
1283 
1284 	pr_leave(pp);
1285 	simple_unlock(&pp->pr_slock);
1286 	if (LIST_EMPTY(&pq))
1287 		return (0);
1288 	while ((ph = LIST_FIRST(&pq)) != NULL) {
1289 		LIST_REMOVE(ph, ph_pagelist);
1290 		pool_allocator_free(pp, ph->ph_page);
1291 		if (pp->pr_roflags & PR_PHINPAGE) {
1292 			continue;
1293 		}
1294 		SPLAY_REMOVE(phtree, &pp->pr_phtree, ph);
1295 		s = splhigh();
1296 		pool_put(&phpool, ph);
1297 		splx(s);
1298 	}
1299 
1300 	return (1);
1301 }
1302 
1303 #ifdef DDB
1304 #include <machine/db_machdep.h>
1305 #include <ddb/db_interface.h>
1306 #include <ddb/db_output.h>
1307 
1308 /*
1309  * Diagnostic helpers.
1310  */
1311 void
1312 pool_printit(struct pool *pp, const char *modif, int (*pr)(const char *, ...))
1313 {
1314 	int s;
1315 
1316 	s = splvm();
1317 	if (simple_lock_try(&pp->pr_slock) == 0) {
1318 		pr("pool %s is locked; try again later\n",
1319 		    pp->pr_wchan);
1320 		splx(s);
1321 		return;
1322 	}
1323 	pool_print1(pp, modif, pr);
1324 	simple_unlock(&pp->pr_slock);
1325 	splx(s);
1326 }
1327 
1328 void
1329 pool_print_pagelist(struct pool_pagelist *pl, int (*pr)(const char *, ...))
1330 {
1331 	struct pool_item_header *ph;
1332 #ifdef DIAGNOSTIC
1333 	struct pool_item *pi;
1334 #endif
1335 
1336 	LIST_FOREACH(ph, pl, ph_pagelist) {
1337 		(*pr)("\t\tpage %p, nmissing %d, time %lu,%lu\n",
1338 		    ph->ph_page, ph->ph_nmissing,
1339 		    (u_long)ph->ph_time.tv_sec,
1340 		    (u_long)ph->ph_time.tv_usec);
1341 #ifdef DIAGNOSTIC
1342 		TAILQ_FOREACH(pi, &ph->ph_itemlist, pi_list) {
1343 			if (pi->pi_magic != PI_MAGIC) {
1344 				(*pr)("\t\t\titem %p, magic 0x%x\n",
1345 				    pi, pi->pi_magic);
1346 			}
1347 		}
1348 #endif
1349 	}
1350 }
1351 
1352 void
1353 pool_print1(struct pool *pp, const char *modif, int (*pr)(const char *, ...))
1354 {
1355 	struct pool_item_header *ph;
1356 	struct pool_cache *pc;
1357 	struct pool_cache_group *pcg;
1358 	int i, print_log = 0, print_pagelist = 0, print_cache = 0;
1359 	char c;
1360 
1361 	while ((c = *modif++) != '\0') {
1362 		if (c == 'l')
1363 			print_log = 1;
1364 		if (c == 'p')
1365 			print_pagelist = 1;
1366 		if (c == 'c')
1367 			print_cache = 1;
1368 		modif++;
1369 	}
1370 
1371 	(*pr)("POOL %s: size %u, align %u, ioff %u, roflags 0x%08x\n",
1372 	    pp->pr_wchan, pp->pr_size, pp->pr_align, pp->pr_itemoffset,
1373 	    pp->pr_roflags);
1374 	(*pr)("\talloc %p\n", pp->pr_alloc);
1375 	(*pr)("\tminitems %u, minpages %u, maxpages %u, npages %u\n",
1376 	    pp->pr_minitems, pp->pr_minpages, pp->pr_maxpages, pp->pr_npages);
1377 	(*pr)("\titemsperpage %u, nitems %u, nout %u, hardlimit %u\n",
1378 	    pp->pr_itemsperpage, pp->pr_nitems, pp->pr_nout, pp->pr_hardlimit);
1379 
1380 	(*pr)("\n\tnget %lu, nfail %lu, nput %lu\n",
1381 	    pp->pr_nget, pp->pr_nfail, pp->pr_nput);
1382 	(*pr)("\tnpagealloc %lu, npagefree %lu, hiwat %u, nidle %lu\n",
1383 	    pp->pr_npagealloc, pp->pr_npagefree, pp->pr_hiwat, pp->pr_nidle);
1384 
1385 	if (print_pagelist == 0)
1386 		goto skip_pagelist;
1387 
1388 	if ((ph = LIST_FIRST(&pp->pr_emptypages)) != NULL)
1389 		(*pr)("\n\tempty page list:\n");
1390 	pool_print_pagelist(&pp->pr_emptypages, pr);
1391 	if ((ph = LIST_FIRST(&pp->pr_fullpages)) != NULL)
1392 		(*pr)("\n\tfull page list:\n");
1393 	pool_print_pagelist(&pp->pr_fullpages, pr);
1394 	if ((ph = LIST_FIRST(&pp->pr_partpages)) != NULL)
1395 		(*pr)("\n\tpartial-page list:\n");
1396 	pool_print_pagelist(&pp->pr_partpages, pr);
1397 
1398 	if (pp->pr_curpage == NULL)
1399 		(*pr)("\tno current page\n");
1400 	else
1401 		(*pr)("\tcurpage %p\n", pp->pr_curpage->ph_page);
1402 
1403 skip_pagelist:
1404 	if (print_log == 0)
1405 		goto skip_log;
1406 
1407 	(*pr)("\n");
1408 	if ((pp->pr_roflags & PR_LOGGING) == 0)
1409 		(*pr)("\tno log\n");
1410 	else
1411 		pr_printlog(pp, NULL, pr);
1412 
1413 skip_log:
1414 	if (print_cache == 0)
1415 		goto skip_cache;
1416 
1417 	TAILQ_FOREACH(pc, &pp->pr_cachelist, pc_poollist) {
1418 		(*pr)("\tcache %p: allocfrom %p freeto %p\n", pc,
1419 		    pc->pc_allocfrom, pc->pc_freeto);
1420 		(*pr)("\t    hits %lu misses %lu ngroups %lu nitems %lu\n",
1421 		    pc->pc_hits, pc->pc_misses, pc->pc_ngroups, pc->pc_nitems);
1422 		TAILQ_FOREACH(pcg, &pc->pc_grouplist, pcg_list) {
1423 			(*pr)("\t\tgroup %p: avail %d\n", pcg, pcg->pcg_avail);
1424 			for (i = 0; i < PCG_NOBJECTS; i++)
1425 				(*pr)("\t\t\t%p\n", pcg->pcg_objects[i]);
1426 		}
1427 	}
1428 
1429 skip_cache:
1430 	pr_enter_check(pp, pr);
1431 }
1432 
1433 void
1434 db_show_all_pools(db_expr_t expr, int haddr, db_expr_t count, char *modif)
1435 {
1436 	struct pool *pp;
1437 	char maxp[16];
1438 	int ovflw;
1439 	char mode;
1440 
1441 	mode = modif[0];
1442 	if (mode != '\0' && mode != 'a') {
1443 		db_printf("usage: show all pools [/a]\n");
1444 		return;
1445 	}
1446 
1447 	if (mode == '\0')
1448 		db_printf("%-10s%4s%9s%5s%9s%6s%6s%6s%6s%6s%6s%5s\n",
1449 		    "Name",
1450 		    "Size",
1451 		    "Requests",
1452 		    "Fail",
1453 		    "Releases",
1454 		    "Pgreq",
1455 		    "Pgrel",
1456 		    "Npage",
1457 		    "Hiwat",
1458 		    "Minpg",
1459 		    "Maxpg",
1460 		    "Idle");
1461 	else
1462 		db_printf("%-10s %18s %18s\n",
1463 		    "Name", "Address", "Allocator");
1464 
1465 	TAILQ_FOREACH(pp, &pool_head, pr_poollist) {
1466 		if (mode == 'a') {
1467 			db_printf("%-10s %18p %18p\n", pp->pr_wchan, pp,
1468 			    pp->pr_alloc);
1469 			continue;
1470 		}
1471 
1472 		if (!pp->pr_nget)
1473 			continue;
1474 
1475 		if (pp->pr_maxpages == UINT_MAX)
1476 			snprintf(maxp, sizeof maxp, "inf");
1477 		else
1478 			snprintf(maxp, sizeof maxp, "%u", pp->pr_maxpages);
1479 
1480 #define PRWORD(ovflw, fmt, width, fixed, val) do {	\
1481 	(ovflw) += db_printf((fmt),			\
1482 	    (width) - (fixed) - (ovflw) > 0 ?		\
1483 	    (width) - (fixed) - (ovflw) : 0,		\
1484 	    (val)) - (width);				\
1485 	if ((ovflw) < 0)				\
1486 		(ovflw) = 0;				\
1487 } while (/* CONSTCOND */0)
1488 
1489 		ovflw = 0;
1490 		PRWORD(ovflw, "%-*s", 10, 0, pp->pr_wchan);
1491 		PRWORD(ovflw, " %*u", 4, 1, pp->pr_size);
1492 		PRWORD(ovflw, " %*lu", 9, 1, pp->pr_nget);
1493 		PRWORD(ovflw, " %*lu", 5, 1, pp->pr_nfail);
1494 		PRWORD(ovflw, " %*lu", 9, 1, pp->pr_nput);
1495 		PRWORD(ovflw, " %*lu", 6, 1, pp->pr_npagealloc);
1496 		PRWORD(ovflw, " %*lu", 6, 1, pp->pr_npagefree);
1497 		PRWORD(ovflw, " %*d", 6, 1, pp->pr_npages);
1498 		PRWORD(ovflw, " %*d", 6, 1, pp->pr_hiwat);
1499 		PRWORD(ovflw, " %*d", 6, 1, pp->pr_minpages);
1500 		PRWORD(ovflw, " %*s", 6, 1, maxp);
1501 		PRWORD(ovflw, " %*lu\n", 5, 1, pp->pr_nidle);
1502 	}
1503 }
1504 
1505 int
1506 pool_chk_page(struct pool *pp, const char *label, struct pool_item_header *ph)
1507 {
1508 	struct pool_item *pi;
1509 	caddr_t page;
1510 	int n;
1511 
1512 	page = (caddr_t)((u_long)ph & pp->pr_alloc->pa_pagemask);
1513 	if (page != ph->ph_page &&
1514 	    (pp->pr_roflags & PR_PHINPAGE) != 0) {
1515 		if (label != NULL)
1516 			printf("%s: ", label);
1517 		printf("pool(%p:%s): page inconsistency: page %p;"
1518 		       " at page head addr %p (p %p)\n", pp,
1519 			pp->pr_wchan, ph->ph_page,
1520 			ph, page);
1521 		return 1;
1522 	}
1523 
1524 	for (pi = TAILQ_FIRST(&ph->ph_itemlist), n = 0;
1525 	     pi != NULL;
1526 	     pi = TAILQ_NEXT(pi,pi_list), n++) {
1527 
1528 #ifdef DIAGNOSTIC
1529 		if (pi->pi_magic != PI_MAGIC) {
1530 			if (label != NULL)
1531 				printf("%s: ", label);
1532 			printf("pool(%s): free list modified: magic=%x;"
1533 			       " page %p; item ordinal %d;"
1534 			       " addr %p (p %p)\n",
1535 				pp->pr_wchan, pi->pi_magic, ph->ph_page,
1536 				n, pi, page);
1537 			panic("pool");
1538 		}
1539 #endif
1540 		page =
1541 		    (caddr_t)((u_long)pi & pp->pr_alloc->pa_pagemask);
1542 		if (page == ph->ph_page)
1543 			continue;
1544 
1545 		if (label != NULL)
1546 			printf("%s: ", label);
1547 		printf("pool(%p:%s): page inconsistency: page %p;"
1548 		       " item ordinal %d; addr %p (p %p)\n", pp,
1549 			pp->pr_wchan, ph->ph_page,
1550 			n, pi, page);
1551 		return 1;
1552 	}
1553 	return 0;
1554 }
1555 
1556 int
1557 pool_chk(struct pool *pp, const char *label)
1558 {
1559 	struct pool_item_header *ph;
1560 	int r = 0;
1561 
1562 	simple_lock(&pp->pr_slock);
1563 	LIST_FOREACH(ph, &pp->pr_emptypages, ph_pagelist) {
1564 		r = pool_chk_page(pp, label, ph);
1565 		if (r) {
1566 			goto out;
1567 		}
1568 	}
1569 	LIST_FOREACH(ph, &pp->pr_fullpages, ph_pagelist) {
1570 		r = pool_chk_page(pp, label, ph);
1571 		if (r) {
1572 			goto out;
1573 		}
1574 	}
1575 	LIST_FOREACH(ph, &pp->pr_partpages, ph_pagelist) {
1576 		r = pool_chk_page(pp, label, ph);
1577 		if (r) {
1578 			goto out;
1579 		}
1580 	}
1581 
1582 out:
1583 	simple_unlock(&pp->pr_slock);
1584 	return (r);
1585 }
1586 #endif
1587 
1588 /*
1589  * pool_cache_init:
1590  *
1591  *	Initialize a pool cache.
1592  *
1593  *	NOTE: If the pool must be protected from interrupts, we expect
1594  *	to be called at the appropriate interrupt priority level.
1595  */
1596 void
1597 pool_cache_init(struct pool_cache *pc, struct pool *pp,
1598     int (*ctor)(void *, void *, int),
1599     void (*dtor)(void *, void *),
1600     void *arg)
1601 {
1602 
1603 	TAILQ_INIT(&pc->pc_grouplist);
1604 	simple_lock_init(&pc->pc_slock);
1605 
1606 	pc->pc_allocfrom = NULL;
1607 	pc->pc_freeto = NULL;
1608 	pc->pc_pool = pp;
1609 
1610 	pc->pc_ctor = ctor;
1611 	pc->pc_dtor = dtor;
1612 	pc->pc_arg  = arg;
1613 
1614 	pc->pc_hits   = 0;
1615 	pc->pc_misses = 0;
1616 
1617 	pc->pc_ngroups = 0;
1618 
1619 	pc->pc_nitems = 0;
1620 
1621 	simple_lock(&pp->pr_slock);
1622 	TAILQ_INSERT_TAIL(&pp->pr_cachelist, pc, pc_poollist);
1623 	simple_unlock(&pp->pr_slock);
1624 }
1625 
1626 /*
1627  * pool_cache_destroy:
1628  *
1629  *	Destroy a pool cache.
1630  */
1631 void
1632 pool_cache_destroy(struct pool_cache *pc)
1633 {
1634 	struct pool *pp = pc->pc_pool;
1635 
1636 	/* First, invalidate the entire cache. */
1637 	pool_cache_invalidate(pc);
1638 
1639 	/* ...and remove it from the pool's cache list. */
1640 	simple_lock(&pp->pr_slock);
1641 	TAILQ_REMOVE(&pp->pr_cachelist, pc, pc_poollist);
1642 	simple_unlock(&pp->pr_slock);
1643 }
1644 
1645 static __inline void *
1646 pcg_get(struct pool_cache_group *pcg)
1647 {
1648 	void *object;
1649 	u_int idx;
1650 
1651 	KASSERT(pcg->pcg_avail <= PCG_NOBJECTS);
1652 	KASSERT(pcg->pcg_avail != 0);
1653 	idx = --pcg->pcg_avail;
1654 
1655 	KASSERT(pcg->pcg_objects[idx] != NULL);
1656 	object = pcg->pcg_objects[idx];
1657 	pcg->pcg_objects[idx] = NULL;
1658 
1659 	return (object);
1660 }
1661 
1662 static __inline void
1663 pcg_put(struct pool_cache_group *pcg, void *object)
1664 {
1665 	u_int idx;
1666 
1667 	KASSERT(pcg->pcg_avail < PCG_NOBJECTS);
1668 	idx = pcg->pcg_avail++;
1669 
1670 	KASSERT(pcg->pcg_objects[idx] == NULL);
1671 	pcg->pcg_objects[idx] = object;
1672 }
1673 
1674 /*
1675  * pool_cache_get:
1676  *
1677  *	Get an object from a pool cache.
1678  */
1679 void *
1680 pool_cache_get(struct pool_cache *pc, int flags)
1681 {
1682 	struct pool_cache_group *pcg;
1683 	void *object;
1684 
1685 #ifdef LOCKDEBUG
1686 	if (flags & PR_WAITOK)
1687 		simple_lock_only_held(NULL, "pool_cache_get(PR_WAITOK)");
1688 #endif
1689 
1690 	simple_lock(&pc->pc_slock);
1691 
1692 	if ((pcg = pc->pc_allocfrom) == NULL) {
1693 		TAILQ_FOREACH(pcg, &pc->pc_grouplist, pcg_list) {
1694 			if (pcg->pcg_avail != 0) {
1695 				pc->pc_allocfrom = pcg;
1696 				goto have_group;
1697 			}
1698 		}
1699 
1700 		/*
1701 		 * No groups with any available objects.  Allocate
1702 		 * a new object, construct it, and return it to
1703 		 * the caller.  We will allocate a group, if necessary,
1704 		 * when the object is freed back to the cache.
1705 		 */
1706 		pc->pc_misses++;
1707 		simple_unlock(&pc->pc_slock);
1708 		object = pool_get(pc->pc_pool, flags);
1709 		if (object != NULL && pc->pc_ctor != NULL) {
1710 			if ((*pc->pc_ctor)(pc->pc_arg, object, flags) != 0) {
1711 				pool_put(pc->pc_pool, object);
1712 				return (NULL);
1713 			}
1714 		}
1715 		return (object);
1716 	}
1717 
1718  have_group:
1719 	pc->pc_hits++;
1720 	pc->pc_nitems--;
1721 	object = pcg_get(pcg);
1722 
1723 	if (pcg->pcg_avail == 0)
1724 		pc->pc_allocfrom = NULL;
1725 
1726 	simple_unlock(&pc->pc_slock);
1727 
1728 	return (object);
1729 }
1730 
1731 /*
1732  * pool_cache_put:
1733  *
1734  *	Put an object back to the pool cache.
1735  */
1736 void
1737 pool_cache_put(struct pool_cache *pc, void *object)
1738 {
1739 	struct pool_cache_group *pcg;
1740 	int s;
1741 
1742 	simple_lock(&pc->pc_slock);
1743 
1744 	if ((pcg = pc->pc_freeto) == NULL) {
1745 		TAILQ_FOREACH(pcg, &pc->pc_grouplist, pcg_list) {
1746 			if (pcg->pcg_avail != PCG_NOBJECTS) {
1747 				pc->pc_freeto = pcg;
1748 				goto have_group;
1749 			}
1750 		}
1751 
1752 		/*
1753 		 * No empty groups to free the object to.  Attempt to
1754 		 * allocate one.
1755 		 */
1756 		simple_unlock(&pc->pc_slock);
1757 		s = splvm();
1758 		pcg = pool_get(&pcgpool, PR_NOWAIT);
1759 		splx(s);
1760 		if (pcg != NULL) {
1761 			memset(pcg, 0, sizeof(*pcg));
1762 			simple_lock(&pc->pc_slock);
1763 			pc->pc_ngroups++;
1764 			TAILQ_INSERT_TAIL(&pc->pc_grouplist, pcg, pcg_list);
1765 			if (pc->pc_freeto == NULL)
1766 				pc->pc_freeto = pcg;
1767 			goto have_group;
1768 		}
1769 
1770 		/*
1771 		 * Unable to allocate a cache group; destruct the object
1772 		 * and free it back to the pool.
1773 		 */
1774 		pool_cache_destruct_object(pc, object);
1775 		return;
1776 	}
1777 
1778  have_group:
1779 	pc->pc_nitems++;
1780 	pcg_put(pcg, object);
1781 
1782 	if (pcg->pcg_avail == PCG_NOBJECTS)
1783 		pc->pc_freeto = NULL;
1784 
1785 	simple_unlock(&pc->pc_slock);
1786 }
1787 
1788 /*
1789  * pool_cache_destruct_object:
1790  *
1791  *	Force destruction of an object and its release back into
1792  *	the pool.
1793  */
1794 void
1795 pool_cache_destruct_object(struct pool_cache *pc, void *object)
1796 {
1797 
1798 	if (pc->pc_dtor != NULL)
1799 		(*pc->pc_dtor)(pc->pc_arg, object);
1800 	pool_put(pc->pc_pool, object);
1801 }
1802 
1803 /*
1804  * pool_cache_do_invalidate:
1805  *
1806  *	This internal function implements pool_cache_invalidate() and
1807  *	pool_cache_reclaim().
1808  */
1809 void
1810 pool_cache_do_invalidate(struct pool_cache *pc, int free_groups,
1811     void (*putit)(struct pool *, void *))
1812 {
1813 	struct pool_cache_group *pcg, *npcg;
1814 	void *object;
1815 	int s;
1816 
1817 	for (pcg = TAILQ_FIRST(&pc->pc_grouplist); pcg != NULL;
1818 	     pcg = npcg) {
1819 		npcg = TAILQ_NEXT(pcg, pcg_list);
1820 		while (pcg->pcg_avail != 0) {
1821 			pc->pc_nitems--;
1822 			object = pcg_get(pcg);
1823 			if (pcg->pcg_avail == 0 && pc->pc_allocfrom == pcg)
1824 				pc->pc_allocfrom = NULL;
1825 			if (pc->pc_dtor != NULL)
1826 				(*pc->pc_dtor)(pc->pc_arg, object);
1827 			(*putit)(pc->pc_pool, object);
1828 		}
1829 		if (free_groups) {
1830 			pc->pc_ngroups--;
1831 			TAILQ_REMOVE(&pc->pc_grouplist, pcg, pcg_list);
1832 			if (pc->pc_freeto == pcg)
1833 				pc->pc_freeto = NULL;
1834 			s = splvm();
1835 			pool_put(&pcgpool, pcg);
1836 			splx(s);
1837 		}
1838 	}
1839 }
1840 
1841 /*
1842  * pool_cache_invalidate:
1843  *
1844  *	Invalidate a pool cache (destruct and release all of the
1845  *	cached objects).
1846  */
1847 void
1848 pool_cache_invalidate(struct pool_cache *pc)
1849 {
1850 
1851 	simple_lock(&pc->pc_slock);
1852 	pool_cache_do_invalidate(pc, 0, pool_put);
1853 	simple_unlock(&pc->pc_slock);
1854 }
1855 
1856 /*
1857  * pool_cache_reclaim:
1858  *
1859  *	Reclaim a pool cache for pool_reclaim().
1860  */
1861 void
1862 pool_cache_reclaim(struct pool_cache *pc)
1863 {
1864 
1865 	simple_lock(&pc->pc_slock);
1866 	pool_cache_do_invalidate(pc, 1, pool_do_put);
1867 	simple_unlock(&pc->pc_slock);
1868 }
1869 
1870 /*
1871  * We have three different sysctls.
1872  * kern.pool.npools - the number of pools.
1873  * kern.pool.pool.<pool#> - the pool struct for the pool#.
1874  * kern.pool.name.<pool#> - the name for pool#.
1875  */
1876 int
1877 sysctl_dopool(int *name, u_int namelen, char *where, size_t *sizep)
1878 {
1879 	struct pool *pp, *foundpool = NULL;
1880 	size_t buflen = where != NULL ? *sizep : 0;
1881 	int npools = 0, s;
1882 	unsigned int lookfor;
1883 	size_t len;
1884 
1885 	switch (*name) {
1886 	case KERN_POOL_NPOOLS:
1887 		if (namelen != 1 || buflen != sizeof(int))
1888 			return (EINVAL);
1889 		lookfor = 0;
1890 		break;
1891 	case KERN_POOL_NAME:
1892 		if (namelen != 2 || buflen < 1)
1893 			return (EINVAL);
1894 		lookfor = name[1];
1895 		break;
1896 	case KERN_POOL_POOL:
1897 		if (namelen != 2 || buflen != sizeof(struct pool))
1898 			return (EINVAL);
1899 		lookfor = name[1];
1900 		break;
1901 	default:
1902 		return (EINVAL);
1903 	}
1904 
1905 	s = splvm();
1906 	simple_lock(&pool_head_slock);
1907 
1908 	TAILQ_FOREACH(pp, &pool_head, pr_poollist) {
1909 		npools++;
1910 		if (lookfor == pp->pr_serial) {
1911 			foundpool = pp;
1912 			break;
1913 		}
1914 	}
1915 
1916 	simple_unlock(&pool_head_slock);
1917 	splx(s);
1918 
1919 	if (lookfor != 0 && foundpool == NULL)
1920 		return (ENOENT);
1921 
1922 	switch (*name) {
1923 	case KERN_POOL_NPOOLS:
1924 		return copyout(&npools, where, buflen);
1925 	case KERN_POOL_NAME:
1926 		len = strlen(foundpool->pr_wchan) + 1;
1927 		if (*sizep < len)
1928 			return (ENOMEM);
1929 		*sizep = len;
1930 		return copyout(foundpool->pr_wchan, where, len);
1931 	case KERN_POOL_POOL:
1932 		return copyout(foundpool, where, buflen);
1933 	}
1934 	/* NOTREACHED */
1935 	return (0); /* XXX - Stupid gcc */
1936 }
1937 
1938 /*
1939  * Pool backend allocators.
1940  *
1941  * Each pool has a backend allocator that handles allocation, deallocation
1942  */
1943 void	*pool_page_alloc_kmem(struct pool *, int);
1944 void	pool_page_free_kmem(struct pool *, void *);
1945 void	*pool_page_alloc_oldnointr(struct pool *, int);
1946 void	pool_page_free_oldnointr(struct pool *, void *);
1947 void	*pool_page_alloc(struct pool *, int);
1948 void	pool_page_free(struct pool *, void *);
1949 
1950 /* old default allocator, interrupt safe */
1951 struct pool_allocator pool_allocator_kmem = {
1952 	pool_page_alloc_kmem, pool_page_free_kmem, 0,
1953 };
1954 /* previous nointr.  handles large allocations safely */
1955 struct pool_allocator pool_allocator_oldnointr = {
1956 	pool_page_alloc_oldnointr, pool_page_free_oldnointr, 0,
1957 };
1958 /* safe for interrupts, name preserved for compat
1959  * this is the default allocator */
1960 struct pool_allocator pool_allocator_nointr = {
1961 	pool_page_alloc, pool_page_free, 0,
1962 };
1963 
1964 /*
1965  * XXX - we have at least three different resources for the same allocation
1966  *  and each resource can be depleted. First we have the ready elements in
1967  *  the pool. Then we have the resource (typically a vm_map) for this
1968  *  allocator, then we have physical memory. Waiting for any of these can
1969  *  be unnecessary when any other is freed, but the kernel doesn't support
1970  *  sleeping on multiple addresses, so we have to fake. The caller sleeps on
1971  *  the pool (so that we can be awakened when an item is returned to the pool),
1972  *  but we set PA_WANT on the allocator. When a page is returned to
1973  *  the allocator and PA_WANT is set pool_allocator_free will wakeup all
1974  *  sleeping pools belonging to this allocator. (XXX - thundering herd).
1975  *  We also wake up the allocator in case someone without a pool (malloc)
1976  *  is sleeping waiting for this allocator.
1977  */
1978 
1979 void *
1980 pool_allocator_alloc(struct pool *pp, int flags)
1981 {
1982 
1983 	return (pp->pr_alloc->pa_alloc(pp, flags));
1984 }
1985 
1986 void
1987 pool_allocator_free(struct pool *pp, void *v)
1988 {
1989 	struct pool_allocator *pa = pp->pr_alloc;
1990 	int s;
1991 
1992 	(*pa->pa_free)(pp, v);
1993 
1994 	s = splvm();
1995 	simple_lock(&pa->pa_slock);
1996 	if ((pa->pa_flags & PA_WANT) == 0) {
1997 		simple_unlock(&pa->pa_slock);
1998 		splx(s);
1999 		return;
2000 	}
2001 
2002 	TAILQ_FOREACH(pp, &pa->pa_list, pr_alloc_list) {
2003 		simple_lock(&pp->pr_slock);
2004 		if ((pp->pr_flags & PR_WANTED) != 0) {
2005 			pp->pr_flags &= ~PR_WANTED;
2006 			wakeup(pp);
2007 		}
2008 		simple_unlock(&pp->pr_slock);
2009 	}
2010 	pa->pa_flags &= ~PA_WANT;
2011 	simple_unlock(&pa->pa_slock);
2012 	splx(s);
2013 }
2014 
2015 void *
2016 pool_page_alloc(struct pool *pp, int flags)
2017 {
2018 	boolean_t waitok = (flags & PR_WAITOK) ? TRUE : FALSE;
2019 
2020 	return (uvm_km_getpage(waitok));
2021 }
2022 
2023 void
2024 pool_page_free(struct pool *pp, void *v)
2025 {
2026 
2027 	uvm_km_putpage(v);
2028 }
2029 
2030 void *
2031 pool_page_alloc_kmem(struct pool *pp, int flags)
2032 {
2033 	boolean_t waitok = (flags & PR_WAITOK) ? TRUE : FALSE;
2034 
2035 	return ((void *)uvm_km_alloc_poolpage1(kmem_map, uvmexp.kmem_object,
2036 	    waitok));
2037 }
2038 
2039 void
2040 pool_page_free_kmem(struct pool *pp, void *v)
2041 {
2042 
2043 	uvm_km_free_poolpage1(kmem_map, (vaddr_t)v);
2044 }
2045 
2046 void *
2047 pool_page_alloc_oldnointr(struct pool *pp, int flags)
2048 {
2049 	boolean_t waitok = (flags & PR_WAITOK) ? TRUE : FALSE;
2050 
2051 	splassert(IPL_NONE);
2052 
2053 	return ((void *)uvm_km_alloc_poolpage1(kernel_map, uvm.kernel_object,
2054 	    waitok));
2055 }
2056 
2057 void
2058 pool_page_free_oldnointr(struct pool *pp, void *v)
2059 {
2060 	splassert(IPL_NONE);
2061 
2062 	uvm_km_free_poolpage1(kernel_map, (vaddr_t)v);
2063 }
2064