xref: /openbsd-src/sys/kern/subr_pool.c (revision b2ea75c1b17e1a9a339660e7ed45cd24946b230e)
1 /*	$OpenBSD: subr_pool.c,v 1.12 2001/08/07 21:02:22 art Exp $	*/
2 /*	$NetBSD: subr_pool.c,v 1.59 2001/06/05 18:51:04 thorpej Exp $	*/
3 
4 /*-
5  * Copyright (c) 1997, 1999, 2000 The NetBSD Foundation, Inc.
6  * All rights reserved.
7  *
8  * This code is derived from software contributed to The NetBSD Foundation
9  * by Paul Kranenburg; by Jason R. Thorpe of the Numerical Aerospace
10  * Simulation Facility, NASA Ames Research Center.
11  *
12  * Redistribution and use in source and binary forms, with or without
13  * modification, are permitted provided that the following conditions
14  * are met:
15  * 1. Redistributions of source code must retain the above copyright
16  *    notice, this list of conditions and the following disclaimer.
17  * 2. Redistributions in binary form must reproduce the above copyright
18  *    notice, this list of conditions and the following disclaimer in the
19  *    documentation and/or other materials provided with the distribution.
20  * 3. All advertising materials mentioning features or use of this software
21  *    must display the following acknowledgement:
22  *	This product includes software developed by the NetBSD
23  *	Foundation, Inc. and its contributors.
24  * 4. Neither the name of The NetBSD Foundation nor the names of its
25  *    contributors may be used to endorse or promote products derived
26  *    from this software without specific prior written permission.
27  *
28  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
29  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
30  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
31  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
32  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
33  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
34  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
35  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
36  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
37  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38  * POSSIBILITY OF SUCH DAMAGE.
39  */
40 
41 #include <sys/param.h>
42 #include <sys/systm.h>
43 #include <sys/proc.h>
44 #include <sys/errno.h>
45 #include <sys/kernel.h>
46 #include <sys/malloc.h>
47 #include <sys/lock.h>
48 #include <sys/pool.h>
49 #include <sys/syslog.h>
50 #include <sys/sysctl.h>
51 
52 #include <vm/vm.h>
53 #include <vm/vm_kern.h>
54 
55 #include <uvm/uvm.h>
56 
57 /*
58  * XXX - for now.
59  */
60 #define SIMPLELOCK_INITIALIZER { SLOCK_UNLOCKED }
61 #ifdef LOCKDEBUG
62 #define simple_lock_freecheck(a, s) do { /* nothing */ } while (0)
63 #define simple_lock_only_held(lkp, str) do { /* nothing */ } while (0)
64 #endif
65 #define LOCK_ASSERT(x) /* nothing */
66 
67 /*
68  * Pool resource management utility.
69  *
70  * Memory is allocated in pages which are split into pieces according
71  * to the pool item size. Each page is kept on a list headed by `pr_pagelist'
72  * in the pool structure and the individual pool items are on a linked list
73  * headed by `ph_itemlist' in each page header. The memory for building
74  * the page list is either taken from the allocated pages themselves (for
75  * small pool items) or taken from an internal pool of page headers (`phpool').
76  */
77 
78 /* List of all pools */
79 TAILQ_HEAD(,pool) pool_head = TAILQ_HEAD_INITIALIZER(pool_head);
80 
81 /* Private pool for page header structures */
82 static struct pool phpool;
83 
84 /* # of seconds to retain page after last use */
85 int pool_inactive_time = 10;
86 
87 /* Next candidate for drainage (see pool_drain()) */
88 static struct pool	*drainpp;
89 
90 /* This spin lock protects both pool_head and drainpp. */
91 struct simplelock pool_head_slock = SIMPLELOCK_INITIALIZER;
92 
93 struct pool_item_header {
94 	/* Page headers */
95 	TAILQ_ENTRY(pool_item_header)
96 				ph_pagelist;	/* pool page list */
97 	TAILQ_HEAD(,pool_item)	ph_itemlist;	/* chunk list for this page */
98 	LIST_ENTRY(pool_item_header)
99 				ph_hashlist;	/* Off-page page headers */
100 	int			ph_nmissing;	/* # of chunks in use */
101 	caddr_t			ph_page;	/* this page's address */
102 	struct timeval		ph_time;	/* last referenced */
103 };
104 
105 struct pool_item {
106 #ifdef DIAGNOSTIC
107 	int pi_magic;
108 #endif
109 #define	PI_MAGIC 0xdeadbeef
110 	/* Other entries use only this list entry */
111 	TAILQ_ENTRY(pool_item)	pi_list;
112 };
113 
114 
115 #define	PR_HASH_INDEX(pp,addr) \
116 	(((u_long)(addr) >> (pp)->pr_pageshift) & (PR_HASHTABSIZE - 1))
117 
118 #define	POOL_NEEDS_CATCHUP(pp)						\
119 	((pp)->pr_nitems < (pp)->pr_minitems)
120 
121 /*
122  * Every pool get a unique serial number assigned to it. If this counter
123  * wraps, we're screwed, but we shouldn't create so many pools anyway.
124  */
125 unsigned int pool_serial;
126 
127 /*
128  * Pool cache management.
129  *
130  * Pool caches provide a way for constructed objects to be cached by the
131  * pool subsystem.  This can lead to performance improvements by avoiding
132  * needless object construction/destruction; it is deferred until absolutely
133  * necessary.
134  *
135  * Caches are grouped into cache groups.  Each cache group references
136  * up to 16 constructed objects.  When a cache allocates an object
137  * from the pool, it calls the object's constructor and places it into
138  * a cache group.  When a cache group frees an object back to the pool,
139  * it first calls the object's destructor.  This allows the object to
140  * persist in constructed form while freed to the cache.
141  *
142  * Multiple caches may exist for each pool.  This allows a single
143  * object type to have multiple constructed forms.  The pool references
144  * each cache, so that when a pool is drained by the pagedaemon, it can
145  * drain each individual cache as well.  Each time a cache is drained,
146  * the most idle cache group is freed to the pool in its entirety.
147  *
148  * Pool caches are layed on top of pools.  By layering them, we can avoid
149  * the complexity of cache management for pools which would not benefit
150  * from it.
151  */
152 
153 /* The cache group pool. */
154 static struct pool pcgpool;
155 
156 /* The pool cache group. */
157 #define	PCG_NOBJECTS		16
158 struct pool_cache_group {
159 	TAILQ_ENTRY(pool_cache_group)
160 		pcg_list;	/* link in the pool cache's group list */
161 	u_int	pcg_avail;	/* # available objects */
162 				/* pointers to the objects */
163 	void	*pcg_objects[PCG_NOBJECTS];
164 };
165 
166 static void	pool_cache_reclaim(struct pool_cache *);
167 
168 static int	pool_catchup(struct pool *);
169 static void	pool_prime_page(struct pool *, caddr_t,
170 		    struct pool_item_header *);
171 static void	*pool_page_alloc(unsigned long, int, int);
172 static void	pool_page_free(void *, unsigned long, int);
173 
174 static void pool_print1(struct pool *, const char *,
175 	int (*)(const char *, ...));
176 
177 /*
178  * Pool log entry. An array of these is allocated in pool_init().
179  */
180 struct pool_log {
181 	const char	*pl_file;
182 	long		pl_line;
183 	int		pl_action;
184 #define	PRLOG_GET	1
185 #define	PRLOG_PUT	2
186 	void		*pl_addr;
187 };
188 
189 /* Number of entries in pool log buffers */
190 #ifndef POOL_LOGSIZE
191 #define	POOL_LOGSIZE	10
192 #endif
193 
194 int pool_logsize = POOL_LOGSIZE;
195 
196 #ifdef POOL_DIAGNOSTIC
197 static __inline void
198 pr_log(struct pool *pp, void *a, int action, const char *file, long line)
199 {
200 	int n = pp->pr_curlogentry;
201 	struct pool_log *pl;
202 
203 	if ((pp->pr_roflags & PR_LOGGING) == 0)
204 		return;
205 
206 	/*
207 	 * Fill in the current entry. Wrap around and overwrite
208 	 * the oldest entry if necessary.
209 	 */
210 	pl = &pp->pr_log[n];
211 	pl->pl_file = file;
212 	pl->pl_line = line;
213 	pl->pl_action = action;
214 	pl->pl_addr = v;
215 	if (++n >= pp->pr_logsize)
216 		n = 0;
217 	pp->pr_curlogentry = n;
218 }
219 
220 static void
221 pr_printlog(struct pool *pp, struct pool_item *pi,
222     int (*pr)(const char *, ...))
223 {
224 	int i = pp->pr_logsize;
225 	int n = pp->pr_curlogentry;
226 
227 	if ((pp->pr_roflags & PR_LOGGING) == 0)
228 		return;
229 
230 	/*
231 	 * Print all entries in this pool's log.
232 	 */
233 	while (i-- > 0) {
234 		struct pool_log *pl = &pp->pr_log[n];
235 		if (pl->pl_action != 0) {
236 			if (pi == NULL || pi == pl->pl_addr) {
237 				(*pr)("\tlog entry %d:\n", i);
238 				(*pr)("\t\taction = %s, addr = %p\n",
239 				    pl->pl_action == PRLOG_GET ? "get" : "put",
240 				    pl->pl_addr);
241 				(*pr)("\t\tfile: %s at line %lu\n",
242 				    pl->pl_file, pl->pl_line);
243 			}
244 		}
245 		if (++n >= pp->pr_logsize)
246 			n = 0;
247 	}
248 }
249 
250 static __inline void
251 pr_enter(struct pool *pp, const char *file, long line)
252 {
253 
254 	if (__predict_false(pp->pr_entered_file != NULL)) {
255 		printf("pool %s: reentrancy at file %s line %ld\n",
256 		    pp->pr_wchan, file, line);
257 		printf("         previous entry at file %s line %ld\n",
258 		    pp->pr_entered_file, pp->pr_entered_line);
259 		panic("pr_enter");
260 	}
261 
262 	pp->pr_entered_file = file;
263 	pp->pr_entered_line = line;
264 }
265 
266 static __inline void
267 pr_leave(struct pool *pp)
268 {
269 
270 	if (__predict_false(pp->pr_entered_file == NULL)) {
271 		printf("pool %s not entered?\n", pp->pr_wchan);
272 		panic("pr_leave");
273 	}
274 
275 	pp->pr_entered_file = NULL;
276 	pp->pr_entered_line = 0;
277 }
278 
279 static __inline__ void
280 pr_enter_check(struct pool *pp, int (*pr)(const char *, ...))
281 {
282 
283 	if (pp->pr_entered_file != NULL)
284 		(*pr)("\n\tcurrently entered from file %s line %ld\n",
285 		    pp->pr_entered_file, pp->pr_entered_line);
286 }
287 #else
288 #define	pr_log(pp, v, action, file, line)
289 #define	pr_printlog(pp, pi, pr)
290 #define	pr_enter(pp, file, line)
291 #define	pr_leave(pp)
292 #define	pr_enter_check(pp, pr)
293 #endif /* POOL_DIAGNOSTIC */
294 
295 /*
296  * Return the pool page header based on page address.
297  */
298 static __inline struct pool_item_header *
299 pr_find_pagehead(struct pool *pp, caddr_t page)
300 {
301 	struct pool_item_header *ph;
302 
303 	if ((pp->pr_roflags & PR_PHINPAGE) != 0)
304 		return ((struct pool_item_header *)(page + pp->pr_phoffset));
305 
306 	for (ph = LIST_FIRST(&pp->pr_hashtab[PR_HASH_INDEX(pp, page)]);
307 	     ph != NULL;
308 	     ph = LIST_NEXT(ph, ph_hashlist)) {
309 		if (ph->ph_page == page)
310 			return (ph);
311 	}
312 	return (NULL);
313 }
314 
315 /*
316  * Remove a page from the pool.
317  */
318 static __inline void
319 pr_rmpage(struct pool *pp, struct pool_item_header *ph)
320 {
321 
322 	/*
323 	 * If the page was idle, decrement the idle page count.
324 	 */
325 	if (ph->ph_nmissing == 0) {
326 #ifdef DIAGNOSTIC
327 		if (pp->pr_nidle == 0)
328 			panic("pr_rmpage: nidle inconsistent");
329 		if (pp->pr_nitems < pp->pr_itemsperpage)
330 			panic("pr_rmpage: nitems inconsistent");
331 #endif
332 		pp->pr_nidle--;
333 	}
334 
335 	pp->pr_nitems -= pp->pr_itemsperpage;
336 
337 	/*
338 	 * Unlink a page from the pool and release it.
339 	 */
340 	TAILQ_REMOVE(&pp->pr_pagelist, ph, ph_pagelist);
341 	(*pp->pr_free)(ph->ph_page, pp->pr_pagesz, pp->pr_mtype);
342 	pp->pr_npages--;
343 	pp->pr_npagefree++;
344 
345 	if ((pp->pr_roflags & PR_PHINPAGE) == 0) {
346 		int s;
347 		LIST_REMOVE(ph, ph_hashlist);
348 		s = splhigh();
349 		pool_put(&phpool, ph);
350 		splx(s);
351 	}
352 
353 	if (pp->pr_curpage == ph) {
354 		/*
355 		 * Find a new non-empty page header, if any.
356 		 * Start search from the page head, to increase the
357 		 * chance for "high water" pages to be freed.
358 		 */
359 		for (ph = TAILQ_FIRST(&pp->pr_pagelist); ph != NULL;
360 		     ph = TAILQ_NEXT(ph, ph_pagelist))
361 			if (TAILQ_FIRST(&ph->ph_itemlist) != NULL)
362 				break;
363 
364 		pp->pr_curpage = ph;
365 	}
366 }
367 
368 /*
369  * Initialize the given pool resource structure.
370  *
371  * We export this routine to allow other kernel parts to declare
372  * static pools that must be initialized before malloc() is available.
373  */
374 void
375 pool_init(struct pool *pp, size_t size, u_int align, u_int ioff, int flags,
376     const char *wchan, size_t pagesz,
377     void *(*alloc)(unsigned long, int, int),
378     void (*release)(void *, unsigned long, int),
379     int mtype)
380 {
381 	int off, slack, i;
382 
383 #ifdef POOL_DIAGNOSTIC
384 	/*
385 	 * Always log if POOL_DIAGNOSTIC is defined.
386 	 */
387 	if (pool_logsize != 0)
388 		flags |= PR_LOGGING;
389 #endif
390 
391 	/*
392 	 * Check arguments and construct default values.
393 	 */
394 	if (!powerof2(pagesz))
395 		panic("pool_init: page size invalid (%lx)\n", (u_long)pagesz);
396 
397 	if (alloc == NULL && release == NULL) {
398 		alloc = pool_page_alloc;
399 		release = pool_page_free;
400 		pagesz = PAGE_SIZE;	/* Rounds to PAGE_SIZE anyhow. */
401 	} else if ((alloc != NULL && release != NULL) == 0) {
402 		/* If you specifiy one, must specify both. */
403 		panic("pool_init: must specify alloc and release together");
404 	}
405 
406 	if (pagesz == 0)
407 		pagesz = PAGE_SIZE;
408 
409 	if (align == 0)
410 		align = ALIGN(1);
411 
412 	if (size < sizeof(struct pool_item))
413 		size = sizeof(struct pool_item);
414 
415 	size = ALIGN(size);
416 	if (size > pagesz)
417 		panic("pool_init: pool item size (%lu) too large",
418 		      (u_long)size);
419 
420 	/*
421 	 * Initialize the pool structure.
422 	 */
423 	TAILQ_INIT(&pp->pr_pagelist);
424 	TAILQ_INIT(&pp->pr_cachelist);
425 	pp->pr_curpage = NULL;
426 	pp->pr_npages = 0;
427 	pp->pr_minitems = 0;
428 	pp->pr_minpages = 0;
429 	pp->pr_maxpages = UINT_MAX;
430 	pp->pr_roflags = flags;
431 	pp->pr_flags = 0;
432 	pp->pr_size = size;
433 	pp->pr_align = align;
434 	pp->pr_wchan = wchan;
435 	pp->pr_mtype = mtype;
436 	pp->pr_alloc = alloc;
437 	pp->pr_free = release;
438 	pp->pr_pagesz = pagesz;
439 	pp->pr_pagemask = ~(pagesz - 1);
440 	pp->pr_pageshift = ffs(pagesz) - 1;
441 	pp->pr_nitems = 0;
442 	pp->pr_nout = 0;
443 	pp->pr_hardlimit = UINT_MAX;
444 	pp->pr_hardlimit_warning = NULL;
445 	pp->pr_hardlimit_ratecap.tv_sec = 0;
446 	pp->pr_hardlimit_ratecap.tv_usec = 0;
447 	pp->pr_hardlimit_warning_last.tv_sec = 0;
448 	pp->pr_hardlimit_warning_last.tv_usec = 0;
449 	pp->pr_serial = ++pool_serial;
450 	if (pool_serial == 0)
451 		panic("pool_init: too much uptime");
452 
453 	/*
454 	 * Decide whether to put the page header off page to avoid
455 	 * wasting too large a part of the page. Off-page page headers
456 	 * go on a hash table, so we can match a returned item
457 	 * with its header based on the page address.
458 	 * We use 1/16 of the page size as the threshold (XXX: tune)
459 	 */
460 	if (pp->pr_size < pagesz/16) {
461 		/* Use the end of the page for the page header */
462 		pp->pr_roflags |= PR_PHINPAGE;
463 		pp->pr_phoffset = off =
464 			pagesz - ALIGN(sizeof(struct pool_item_header));
465 	} else {
466 		/* The page header will be taken from our page header pool */
467 		pp->pr_phoffset = 0;
468 		off = pagesz;
469 		for (i = 0; i < PR_HASHTABSIZE; i++) {
470 			LIST_INIT(&pp->pr_hashtab[i]);
471 		}
472 	}
473 
474 	/*
475 	 * Alignment is to take place at `ioff' within the item. This means
476 	 * we must reserve up to `align - 1' bytes on the page to allow
477 	 * appropriate positioning of each item.
478 	 *
479 	 * Silently enforce `0 <= ioff < align'.
480 	 */
481 	pp->pr_itemoffset = ioff = ioff % align;
482 	pp->pr_itemsperpage = (off - ((align - ioff) % align)) / pp->pr_size;
483 	KASSERT(pp->pr_itemsperpage != 0);
484 
485 	/*
486 	 * Use the slack between the chunks and the page header
487 	 * for "cache coloring".
488 	 */
489 	slack = off - pp->pr_itemsperpage * pp->pr_size;
490 	pp->pr_maxcolor = (slack / align) * align;
491 	pp->pr_curcolor = 0;
492 
493 	pp->pr_nget = 0;
494 	pp->pr_nfail = 0;
495 	pp->pr_nput = 0;
496 	pp->pr_npagealloc = 0;
497 	pp->pr_npagefree = 0;
498 	pp->pr_hiwat = 0;
499 	pp->pr_nidle = 0;
500 
501 #ifdef POOL_DIAGNOSTIC
502 	if (flags & PR_LOGGING) {
503 		if (kmem_map == NULL ||
504 		    (pp->pr_log = malloc(pool_logsize * sizeof(struct pool_log),
505 		     M_TEMP, M_NOWAIT)) == NULL)
506 			pp->pr_roflags &= ~PR_LOGGING;
507 		pp->pr_curlogentry = 0;
508 		pp->pr_logsize = pool_logsize;
509 	}
510 #endif
511 
512 	pp->pr_entered_file = NULL;
513 	pp->pr_entered_line = 0;
514 
515 	simple_lock_init(&pp->pr_slock);
516 
517 	/*
518 	 * Initialize private page header pool and cache magazine pool if we
519 	 * haven't done so yet.
520 	 * XXX LOCKING.
521 	 */
522 	if (phpool.pr_size == 0) {
523 		pool_init(&phpool, sizeof(struct pool_item_header), 0, 0,
524 		    0, "phpool", 0, 0, 0, 0);
525 		pool_init(&pcgpool, sizeof(struct pool_cache_group), 0, 0,
526 		    0, "pcgpool", 0, 0, 0, 0);
527 	}
528 
529 	/* Insert into the list of all pools. */
530 	simple_lock(&pool_head_slock);
531 	TAILQ_INSERT_TAIL(&pool_head, pp, pr_poollist);
532 	simple_unlock(&pool_head_slock);
533 }
534 
535 /*
536  * De-commision a pool resource.
537  */
538 void
539 pool_destroy(struct pool *pp)
540 {
541 	struct pool_item_header *ph;
542 	struct pool_cache *pc;
543 
544 	/* Destroy all caches for this pool. */
545 	while ((pc = TAILQ_FIRST(&pp->pr_cachelist)) != NULL)
546 		pool_cache_destroy(pc);
547 
548 #ifdef DIAGNOSTIC
549 	if (pp->pr_nout != 0) {
550 		pr_printlog(pp, NULL, printf);
551 		panic("pool_destroy: pool busy: still out: %u\n",
552 		    pp->pr_nout);
553 	}
554 #endif
555 
556 	/* Remove all pages */
557 	if ((pp->pr_roflags & PR_STATIC) == 0)
558 		while ((ph = pp->pr_pagelist.tqh_first) != NULL)
559 			pr_rmpage(pp, ph);
560 
561 	/* Remove from global pool list */
562 	simple_lock(&pool_head_slock);
563 	TAILQ_REMOVE(&pool_head, pp, pr_poollist);
564 	/* XXX Only clear this if we were drainpp? */
565 	drainpp = NULL;
566 	simple_unlock(&pool_head_slock);
567 
568 #ifdef POOL_DIAGNOSTIC
569 	if ((pp->pr_roflags & PR_LOGGING) != 0)
570 		free(pp->pr_log, M_TEMP);
571 #endif
572 
573 	if (pp->pr_roflags & PR_FREEHEADER)
574 		free(pp, M_POOL);
575 }
576 
577 static __inline struct pool_item_header *
578 pool_alloc_item_header(struct pool *pp, caddr_t storage, int flags)
579 {
580 	struct pool_item_header *ph;
581 	int s;
582 
583 	LOCK_ASSERT(simple_lock_held(&pp->pr_slock) == 0);
584 
585 	if ((pp->pr_roflags & PR_PHINPAGE) != 0)
586 		ph = (struct pool_item_header *) (storage + pp->pr_phoffset);
587 	else {
588 		s = splhigh();
589 		ph = pool_get(&phpool, flags);
590 		splx(s);
591 	}
592 
593 	return (ph);
594 }
595 
596 /*
597  * Grab an item from the pool; must be called at appropriate spl level
598  */
599 void *
600 #ifdef POOL_DIAGNOSTIC
601 _pool_get(struct pool *pp, int flags, const char *file, long line)
602 #else
603 pool_get(struct pool *pp, int flags)
604 #endif
605 {
606 	struct pool_item *pi;
607 	struct pool_item_header *ph;
608 	void *v;
609 
610 #ifdef DIAGNOSTIC
611 	if (__predict_false((pp->pr_roflags & PR_STATIC) &&
612 			    (flags & PR_MALLOCOK))) {
613 		pr_printlog(pp, NULL, printf);
614 		panic("pool_get: static");
615 	}
616 
617 	if (__predict_false(curproc == NULL && /* doing_shutdown == 0 && XXX*/
618 			    (flags & PR_WAITOK) != 0))
619 		panic("pool_get: must have NOWAIT");
620 
621 #endif
622 	simple_lock(&pp->pr_slock);
623 	pr_enter(pp, file, line);
624 
625  startover:
626 	/*
627 	 * Check to see if we've reached the hard limit.  If we have,
628 	 * and we can wait, then wait until an item has been returned to
629 	 * the pool.
630 	 */
631 #ifdef DIAGNOSTIC
632 	if (__predict_false(pp->pr_nout > pp->pr_hardlimit)) {
633 		pr_leave(pp);
634 		simple_unlock(&pp->pr_slock);
635 		panic("pool_get: %s: crossed hard limit", pp->pr_wchan);
636 	}
637 #endif
638 	if (__predict_false(pp->pr_nout == pp->pr_hardlimit)) {
639 		if ((flags & PR_WAITOK) && !(flags & PR_LIMITFAIL)) {
640 			/*
641 			 * XXX: A warning isn't logged in this case.  Should
642 			 * it be?
643 			 */
644 			pp->pr_flags |= PR_WANTED;
645 			pr_leave(pp);
646 			simple_unlock(&pp->pr_slock);
647 			tsleep((caddr_t)pp, PSWP, (char *)pp->pr_wchan, 0);
648 			simple_lock(&pp->pr_slock);
649 			pr_enter(pp, file, line);
650 			goto startover;
651 		}
652 
653 		/*
654 		 * Log a message that the hard limit has been hit.
655 		 */
656 		if (pp->pr_hardlimit_warning != NULL &&
657 		    ratecheck(&pp->pr_hardlimit_warning_last,
658 			      &pp->pr_hardlimit_ratecap))
659 			log(LOG_ERR, "%s\n", pp->pr_hardlimit_warning);
660 
661 		if (flags & PR_URGENT)
662 			panic("pool_get: urgent");
663 
664 		pp->pr_nfail++;
665 
666 		pr_leave(pp);
667 		simple_unlock(&pp->pr_slock);
668 		return (NULL);
669 	}
670 
671 	/*
672 	 * The convention we use is that if `curpage' is not NULL, then
673 	 * it points at a non-empty bucket. In particular, `curpage'
674 	 * never points at a page header which has PR_PHINPAGE set and
675 	 * has no items in its bucket.
676 	 */
677 	if ((ph = pp->pr_curpage) == NULL) {
678 #ifdef DIAGNOSTIC
679 		if (pp->pr_nitems != 0) {
680 			simple_unlock(&pp->pr_slock);
681 			printf("pool_get: %s: curpage NULL, nitems %u\n",
682 			    pp->pr_wchan, pp->pr_nitems);
683 			panic("pool_get: nitems inconsistent\n");
684 		}
685 #endif
686 
687 		/*
688 		 * Call the back-end page allocator for more memory.
689 		 * Release the pool lock, as the back-end page allocator
690 		 * may block.
691 		 */
692 		pr_leave(pp);
693 		simple_unlock(&pp->pr_slock);
694 		v = (*pp->pr_alloc)(pp->pr_pagesz, flags, pp->pr_mtype);
695 		if (__predict_true(v != NULL))
696 			ph = pool_alloc_item_header(pp, v, flags);
697 		simple_lock(&pp->pr_slock);
698 		pr_enter(pp, file, line);
699 
700 		if (__predict_false(v == NULL || ph == NULL)) {
701 			if (v != NULL)
702 				(*pp->pr_free)(v, pp->pr_pagesz, pp->pr_mtype);
703 
704 			/*
705 			 * We were unable to allocate a page or item
706 			 * header, but we released the lock during
707 			 * allocation, so perhaps items were freed
708 			 * back to the pool.  Check for this case.
709 			 */
710 			if (pp->pr_curpage != NULL)
711 				goto startover;
712 
713 			if (flags & PR_URGENT)
714 				panic("pool_get: urgent");
715 
716 			if ((flags & PR_WAITOK) == 0) {
717 				pp->pr_nfail++;
718 				pr_leave(pp);
719 				simple_unlock(&pp->pr_slock);
720 				return (NULL);
721 			}
722 
723 			/*
724 			 * Wait for items to be returned to this pool.
725 			 *
726 			 * XXX: we actually want to wait just until
727 			 * the page allocator has memory again. Depending
728 			 * on this pool's usage, we might get stuck here
729 			 * for a long time.
730 			 *
731 			 * XXX: maybe we should wake up once a second and
732 			 * try again?
733 			 */
734 			pp->pr_flags |= PR_WANTED;
735 			pr_leave(pp);
736 			simple_unlock(&pp->pr_slock);
737 			tsleep((caddr_t)pp, PSWP, (char *)pp->pr_wchan, 0);
738 			simple_lock(&pp->pr_slock);
739 			pr_enter(pp, file, line);
740 			goto startover;
741 		}
742 
743 		/* We have more memory; add it to the pool */
744 		pp->pr_npagealloc++;
745 		pool_prime_page(pp, v, ph);
746 
747 		/* Start the allocation process over. */
748 		goto startover;
749 	}
750 
751 	if (__predict_false((v = pi = TAILQ_FIRST(&ph->ph_itemlist)) == NULL)) {
752 		pr_leave(pp);
753 		simple_unlock(&pp->pr_slock);
754 		panic("pool_get: %s: page empty", pp->pr_wchan);
755 	}
756 #ifdef DIAGNOSTIC
757 	if (__predict_false(pp->pr_nitems == 0)) {
758 		pr_leave(pp);
759 		simple_unlock(&pp->pr_slock);
760 		printf("pool_get: %s: items on itemlist, nitems %u\n",
761 		    pp->pr_wchan, pp->pr_nitems);
762 		panic("pool_get: nitems inconsistent\n");
763 	}
764 
765 	pr_log(pp, v, PRLOG_GET, file, line);
766 
767 	if (__predict_false(pi->pi_magic != PI_MAGIC)) {
768 		pr_printlog(pp, pi, printf);
769 		panic("pool_get(%s): free list modified: magic=%x; page %p;"
770 		       " item addr %p\n",
771 			pp->pr_wchan, pi->pi_magic, ph->ph_page, pi);
772 	}
773 #endif
774 
775 	/*
776 	 * Remove from item list.
777 	 */
778 	TAILQ_REMOVE(&ph->ph_itemlist, pi, pi_list);
779 	pp->pr_nitems--;
780 	pp->pr_nout++;
781 	if (ph->ph_nmissing == 0) {
782 #ifdef DIAGNOSTIC
783 		if (__predict_false(pp->pr_nidle == 0))
784 			panic("pool_get: nidle inconsistent");
785 #endif
786 		pp->pr_nidle--;
787 	}
788 	ph->ph_nmissing++;
789 	if (TAILQ_FIRST(&ph->ph_itemlist) == NULL) {
790 #ifdef DIAGNOSTIC
791 		if (__predict_false(ph->ph_nmissing != pp->pr_itemsperpage)) {
792 			pr_leave(pp);
793 			simple_unlock(&pp->pr_slock);
794 			panic("pool_get: %s: nmissing inconsistent",
795 			    pp->pr_wchan);
796 		}
797 #endif
798 		/*
799 		 * Find a new non-empty page header, if any.
800 		 * Start search from the page head, to increase
801 		 * the chance for "high water" pages to be freed.
802 		 *
803 		 * Migrate empty pages to the end of the list.  This
804 		 * will speed the update of curpage as pages become
805 		 * idle.  Empty pages intermingled with idle pages
806 		 * is no big deal.  As soon as a page becomes un-empty,
807 		 * it will move back to the head of the list.
808 		 */
809 		TAILQ_REMOVE(&pp->pr_pagelist, ph, ph_pagelist);
810 		TAILQ_INSERT_TAIL(&pp->pr_pagelist, ph, ph_pagelist);
811 		for (ph = TAILQ_FIRST(&pp->pr_pagelist); ph != NULL;
812 		     ph = TAILQ_NEXT(ph, ph_pagelist))
813 			if (TAILQ_FIRST(&ph->ph_itemlist) != NULL)
814 				break;
815 
816 		pp->pr_curpage = ph;
817 	}
818 
819 	pp->pr_nget++;
820 
821 	/*
822 	 * If we have a low water mark and we are now below that low
823 	 * water mark, add more items to the pool.
824 	 */
825 	if (POOL_NEEDS_CATCHUP(pp) && pool_catchup(pp) != 0) {
826 		/*
827 		 * XXX: Should we log a warning?  Should we set up a timeout
828 		 * to try again in a second or so?  The latter could break
829 		 * a caller's assumptions about interrupt protection, etc.
830 		 */
831 	}
832 
833 	pr_leave(pp);
834 	simple_unlock(&pp->pr_slock);
835 	return (v);
836 }
837 
838 /*
839  * Internal version of pool_put().  Pool is already locked/entered.
840  */
841 static void
842 pool_do_put(struct pool *pp, void *v)
843 {
844 	struct pool_item *pi = v;
845 	struct pool_item_header *ph;
846 	caddr_t page;
847 	int s;
848 
849 	page = (caddr_t)((u_long)v & pp->pr_pagemask);
850 
851 #ifdef DIAGNOSTIC
852 	if (__predict_false(pp->pr_nout == 0)) {
853 		printf("pool %s: putting with none out\n",
854 		    pp->pr_wchan);
855 		panic("pool_put");
856 	}
857 #endif
858 
859 	if (__predict_false((ph = pr_find_pagehead(pp, page)) == NULL)) {
860 		pr_printlog(pp, NULL, printf);
861 		panic("pool_put: %s: page header missing", pp->pr_wchan);
862 	}
863 
864 #ifdef LOCKDEBUG
865 	/*
866 	 * Check if we're freeing a locked simple lock.
867 	 */
868 	simple_lock_freecheck((caddr_t)pi, ((caddr_t)pi) + pp->pr_size);
869 #endif
870 
871 	/*
872 	 * Return to item list.
873 	 */
874 #ifdef DIAGNOSTIC
875 	pi->pi_magic = PI_MAGIC;
876 #endif
877 #ifdef DEBUG
878 	{
879 		int i, *ip = v;
880 
881 		for (i = 0; i < pp->pr_size / sizeof(int); i++) {
882 			*ip++ = PI_MAGIC;
883 		}
884 	}
885 #endif
886 
887 	TAILQ_INSERT_HEAD(&ph->ph_itemlist, pi, pi_list);
888 	ph->ph_nmissing--;
889 	pp->pr_nput++;
890 	pp->pr_nitems++;
891 	pp->pr_nout--;
892 
893 	/* Cancel "pool empty" condition if it exists */
894 	if (pp->pr_curpage == NULL)
895 		pp->pr_curpage = ph;
896 
897 	if (pp->pr_flags & PR_WANTED) {
898 		pp->pr_flags &= ~PR_WANTED;
899 		if (ph->ph_nmissing == 0)
900 			pp->pr_nidle++;
901 		wakeup((caddr_t)pp);
902 		return;
903 	}
904 
905 	/*
906 	 * If this page is now complete, do one of two things:
907 	 *
908 	 *	(1) If we have more pages than the page high water
909 	 *	    mark, free the page back to the system.
910 	 *
911 	 *	(2) Move it to the end of the page list, so that
912 	 *	    we minimize our chances of fragmenting the
913 	 *	    pool.  Idle pages migrate to the end (along with
914 	 *	    completely empty pages, so that we find un-empty
915 	 *	    pages more quickly when we update curpage) of the
916 	 *	    list so they can be more easily swept up by
917 	 *	    the pagedaemon when pages are scarce.
918 	 */
919 	if (ph->ph_nmissing == 0) {
920 		pp->pr_nidle++;
921 		if (pp->pr_npages > pp->pr_maxpages) {
922 			pr_rmpage(pp, ph);
923 		} else {
924 			TAILQ_REMOVE(&pp->pr_pagelist, ph, ph_pagelist);
925 			TAILQ_INSERT_TAIL(&pp->pr_pagelist, ph, ph_pagelist);
926 
927 			/*
928 			 * Update the timestamp on the page.  A page must
929 			 * be idle for some period of time before it can
930 			 * be reclaimed by the pagedaemon.  This minimizes
931 			 * ping-pong'ing for memory.
932 			 */
933 			s = splclock();
934 			ph->ph_time = mono_time;
935 			splx(s);
936 
937 			/*
938 			 * Update the current page pointer.  Just look for
939 			 * the first page with any free items.
940 			 *
941 			 * XXX: Maybe we want an option to look for the
942 			 * page with the fewest available items, to minimize
943 			 * fragmentation?
944 			 */
945 			for (ph = TAILQ_FIRST(&pp->pr_pagelist); ph != NULL;
946 			     ph = TAILQ_NEXT(ph, ph_pagelist))
947 				if (TAILQ_FIRST(&ph->ph_itemlist) != NULL)
948 					break;
949 
950 			pp->pr_curpage = ph;
951 		}
952 	}
953 	/*
954 	 * If the page has just become un-empty, move it to the head of
955 	 * the list, and make it the current page.  The next allocation
956 	 * will get the item from this page, instead of further fragmenting
957 	 * the pool.
958 	 */
959 	else if (ph->ph_nmissing == (pp->pr_itemsperpage - 1)) {
960 		TAILQ_REMOVE(&pp->pr_pagelist, ph, ph_pagelist);
961 		TAILQ_INSERT_HEAD(&pp->pr_pagelist, ph, ph_pagelist);
962 		pp->pr_curpage = ph;
963 	}
964 }
965 
966 /*
967  * Return resource to the pool; must be called at appropriate spl level
968  */
969 #ifdef POOL_DIAGNOSTIC
970 void
971 _pool_put(struct pool *pp, void *v, const char *file, long line)
972 {
973 
974 	simple_lock(&pp->pr_slock);
975 	pr_enter(pp, file, line);
976 
977 	pr_log(pp, v, PRLOG_PUT, file, line);
978 
979 	pool_do_put(pp, v);
980 
981 	pr_leave(pp);
982 	simple_unlock(&pp->pr_slock);
983 }
984 #undef pool_put
985 #endif /* POOL_DIAGNOSTIC */
986 
987 void
988 pool_put(struct pool *pp, void *v)
989 {
990 
991 	simple_lock(&pp->pr_slock);
992 
993 	pool_do_put(pp, v);
994 
995 	simple_unlock(&pp->pr_slock);
996 }
997 
998 #ifdef POOL_DIAGNOSTIC
999 #define		pool_put(h, v)	_pool_put((h), (v), __FILE__, __LINE__)
1000 #endif
1001 
1002 /*
1003  * Add N items to the pool.
1004  */
1005 int
1006 pool_prime(struct pool *pp, int n)
1007 {
1008 	struct pool_item_header *ph;
1009 	caddr_t cp;
1010 	int newpages, error = 0;
1011 
1012 	simple_lock(&pp->pr_slock);
1013 
1014 	newpages = roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage;
1015 
1016 	while (newpages-- > 0) {
1017 		simple_unlock(&pp->pr_slock);
1018 		cp = (*pp->pr_alloc)(pp->pr_pagesz, PR_NOWAIT, pp->pr_mtype);
1019 		if (__predict_true(cp != NULL))
1020 			ph = pool_alloc_item_header(pp, cp, PR_NOWAIT);
1021 		simple_lock(&pp->pr_slock);
1022 
1023 		if (__predict_false(cp == NULL || ph == NULL)) {
1024 			error = ENOMEM;
1025 			if (cp != NULL)
1026 				(*pp->pr_free)(cp, pp->pr_pagesz, pp->pr_mtype);
1027 			break;
1028 		}
1029 
1030 		pool_prime_page(pp, cp, ph);
1031 		pp->pr_npagealloc++;
1032 		pp->pr_minpages++;
1033 	}
1034 
1035 	if (pp->pr_minpages >= pp->pr_maxpages)
1036 		pp->pr_maxpages = pp->pr_minpages + 1;	/* XXX */
1037 
1038 	simple_unlock(&pp->pr_slock);
1039 	return (0);
1040 }
1041 
1042 /*
1043  * Add a page worth of items to the pool.
1044  *
1045  * Note, we must be called with the pool descriptor LOCKED.
1046  */
1047 static void
1048 pool_prime_page(struct pool *pp, caddr_t storage, struct pool_item_header *ph)
1049 {
1050 	struct pool_item *pi;
1051 	caddr_t cp = storage;
1052 	unsigned int align = pp->pr_align;
1053 	unsigned int ioff = pp->pr_itemoffset;
1054 	int n;
1055 
1056 	if (((u_long)cp & (pp->pr_pagesz - 1)) != 0)
1057 		panic("pool_prime_page: %s: unaligned page", pp->pr_wchan);
1058 
1059 	if ((pp->pr_roflags & PR_PHINPAGE) == 0)
1060 		LIST_INSERT_HEAD(&pp->pr_hashtab[PR_HASH_INDEX(pp, cp)],
1061 		    ph, ph_hashlist);
1062 
1063 	/*
1064 	 * Insert page header.
1065 	 */
1066 	TAILQ_INSERT_HEAD(&pp->pr_pagelist, ph, ph_pagelist);
1067 	TAILQ_INIT(&ph->ph_itemlist);
1068 	ph->ph_page = storage;
1069 	ph->ph_nmissing = 0;
1070 	memset(&ph->ph_time, 0, sizeof(ph->ph_time));
1071 
1072 	pp->pr_nidle++;
1073 
1074 	/*
1075 	 * Color this page.
1076 	 */
1077 	cp = (caddr_t)(cp + pp->pr_curcolor);
1078 	if ((pp->pr_curcolor += align) > pp->pr_maxcolor)
1079 		pp->pr_curcolor = 0;
1080 
1081 	/*
1082 	 * Adjust storage to apply aligment to `pr_itemoffset' in each item.
1083 	 */
1084 	if (ioff != 0)
1085 		cp = (caddr_t)(cp + (align - ioff));
1086 
1087 	/*
1088 	 * Insert remaining chunks on the bucket list.
1089 	 */
1090 	n = pp->pr_itemsperpage;
1091 	pp->pr_nitems += n;
1092 
1093 	while (n--) {
1094 		pi = (struct pool_item *)cp;
1095 
1096 		/* Insert on page list */
1097 		TAILQ_INSERT_TAIL(&ph->ph_itemlist, pi, pi_list);
1098 #ifdef DIAGNOSTIC
1099 		pi->pi_magic = PI_MAGIC;
1100 #endif
1101 		cp = (caddr_t)(cp + pp->pr_size);
1102 	}
1103 
1104 	/*
1105 	 * If the pool was depleted, point at the new page.
1106 	 */
1107 	if (pp->pr_curpage == NULL)
1108 		pp->pr_curpage = ph;
1109 
1110 	if (++pp->pr_npages > pp->pr_hiwat)
1111 		pp->pr_hiwat = pp->pr_npages;
1112 }
1113 
1114 /*
1115  * Used by pool_get() when nitems drops below the low water mark.  This
1116  * is used to catch up nitmes with the low water mark.
1117  *
1118  * Note 1, we never wait for memory here, we let the caller decide what to do.
1119  *
1120  * Note 2, this doesn't work with static pools.
1121  *
1122  * Note 3, we must be called with the pool already locked, and we return
1123  * with it locked.
1124  */
1125 static int
1126 pool_catchup(struct pool *pp)
1127 {
1128 	struct pool_item_header *ph;
1129 	caddr_t cp;
1130 	int error = 0;
1131 
1132 	if (pp->pr_roflags & PR_STATIC) {
1133 		/*
1134 		 * We dropped below the low water mark, and this is not a
1135 		 * good thing.  Log a warning.
1136 		 *
1137 		 * XXX: rate-limit this?
1138 		 */
1139 		printf("WARNING: static pool `%s' dropped below low water "
1140 		    "mark\n", pp->pr_wchan);
1141 		return (0);
1142 	}
1143 
1144 	while (POOL_NEEDS_CATCHUP(pp)) {
1145 		/*
1146 		 * Call the page back-end allocator for more memory.
1147 		 *
1148 		 * XXX: We never wait, so should we bother unlocking
1149 		 * the pool descriptor?
1150 		 */
1151 		simple_unlock(&pp->pr_slock);
1152 		cp = (*pp->pr_alloc)(pp->pr_pagesz, PR_NOWAIT, pp->pr_mtype);
1153 		if (__predict_true(cp != NULL))
1154 			ph = pool_alloc_item_header(pp, cp, PR_NOWAIT);
1155 		simple_lock(&pp->pr_slock);
1156 		if (__predict_false(cp == NULL || ph == NULL)) {
1157 			if (cp != NULL)
1158 				(*pp->pr_free)(cp, pp->pr_pagesz, pp->pr_mtype);
1159 			error = ENOMEM;
1160 			break;
1161 		}
1162 		pool_prime_page(pp, cp, ph);
1163 		pp->pr_npagealloc++;
1164 	}
1165 
1166 	return (error);
1167 }
1168 
1169 void
1170 pool_setlowat(struct pool *pp, int n)
1171 {
1172 	int error;
1173 
1174 	simple_lock(&pp->pr_slock);
1175 
1176 	pp->pr_minitems = n;
1177 	pp->pr_minpages = (n == 0)
1178 		? 0
1179 		: roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage;
1180 
1181 	/* Make sure we're caught up with the newly-set low water mark. */
1182 	if (POOL_NEEDS_CATCHUP(pp) && (error = pool_catchup(pp) != 0)) {
1183 		/*
1184 		 * XXX: Should we log a warning?  Should we set up a timeout
1185 		 * to try again in a second or so?  The latter could break
1186 		 * a caller's assumptions about interrupt protection, etc.
1187 		 */
1188 	}
1189 
1190 	simple_unlock(&pp->pr_slock);
1191 }
1192 
1193 void
1194 pool_sethiwat(struct pool *pp, int n)
1195 {
1196 
1197 	simple_lock(&pp->pr_slock);
1198 
1199 	pp->pr_maxpages = (n == 0)
1200 		? 0
1201 		: roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage;
1202 
1203 	simple_unlock(&pp->pr_slock);
1204 }
1205 
1206 void
1207 pool_sethardlimit(struct pool *pp, int n, const char *warnmess, int ratecap)
1208 {
1209 
1210 	simple_lock(&pp->pr_slock);
1211 
1212 	pp->pr_hardlimit = n;
1213 	pp->pr_hardlimit_warning = warnmess;
1214 	pp->pr_hardlimit_ratecap.tv_sec = ratecap;
1215 	pp->pr_hardlimit_warning_last.tv_sec = 0;
1216 	pp->pr_hardlimit_warning_last.tv_usec = 0;
1217 
1218 	/*
1219 	 * In-line version of pool_sethiwat(), because we don't want to
1220 	 * release the lock.
1221 	 */
1222 	pp->pr_maxpages = (n == 0)
1223 		? 0
1224 		: roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage;
1225 
1226 	simple_unlock(&pp->pr_slock);
1227 }
1228 
1229 /*
1230  * Default page allocator.
1231  */
1232 static void *
1233 pool_page_alloc(unsigned long sz, int flags, int mtype)
1234 {
1235 	boolean_t waitok = (flags & PR_WAITOK) ? TRUE : FALSE;
1236 
1237 	return ((void *)uvm_km_alloc_poolpage(waitok));
1238 }
1239 
1240 static void
1241 pool_page_free(void *v, unsigned long sz, int mtype)
1242 {
1243 	uvm_km_free_poolpage((vaddr_t)v);
1244 }
1245 
1246 /*
1247  * Alternate pool page allocator for pools that know they will
1248  * never be accessed in interrupt context.
1249  */
1250 void *
1251 pool_page_alloc_nointr(unsigned long sz, int flags, int mtype)
1252 {
1253 	boolean_t waitok = (flags & PR_WAITOK) ? TRUE : FALSE;
1254 
1255 	return ((void *)uvm_km_alloc_poolpage1(kernel_map, uvm.kernel_object,
1256 	    waitok));
1257 }
1258 
1259 void
1260 pool_page_free_nointr(void *v, unsigned long sz, int mtype)
1261 {
1262 
1263 	uvm_km_free_poolpage1(kernel_map, (vaddr_t)v);
1264 }
1265 
1266 
1267 /*
1268  * Release all complete pages that have not been used recently.
1269  */
1270 void
1271 #ifdef POOL_DIAGNOSTIC
1272 _pool_reclaim(struct pool *pp, const char *file, long line)
1273 #else
1274 pool_reclaim(struct pool *pp)
1275 #endif
1276 {
1277 	struct pool_item_header *ph, *phnext;
1278 	struct pool_cache *pc;
1279 	struct timeval curtime;
1280 	int s;
1281 
1282 	if (pp->pr_roflags & PR_STATIC)
1283 		return;
1284 
1285 	if (simple_lock_try(&pp->pr_slock) == 0)
1286 		return;
1287 	pr_enter(pp, file, line);
1288 
1289 	/*
1290 	 * Reclaim items from the pool's caches.
1291 	 */
1292 	for (pc = TAILQ_FIRST(&pp->pr_cachelist); pc != NULL;
1293 	     pc = TAILQ_NEXT(pc, pc_poollist))
1294 		pool_cache_reclaim(pc);
1295 
1296 	s = splclock();
1297 	curtime = mono_time;
1298 	splx(s);
1299 
1300 	for (ph = TAILQ_FIRST(&pp->pr_pagelist); ph != NULL; ph = phnext) {
1301 		phnext = TAILQ_NEXT(ph, ph_pagelist);
1302 
1303 		/* Check our minimum page claim */
1304 		if (pp->pr_npages <= pp->pr_minpages)
1305 			break;
1306 
1307 		if (ph->ph_nmissing == 0) {
1308 			struct timeval diff;
1309 			timersub(&curtime, &ph->ph_time, &diff);
1310 			if (diff.tv_sec < pool_inactive_time)
1311 				continue;
1312 
1313 			/*
1314 			 * If freeing this page would put us below
1315 			 * the low water mark, stop now.
1316 			 */
1317 			if ((pp->pr_nitems - pp->pr_itemsperpage) <
1318 			    pp->pr_minitems)
1319 				break;
1320 
1321 			pr_rmpage(pp, ph);
1322 		}
1323 	}
1324 
1325 	pr_leave(pp);
1326 	simple_unlock(&pp->pr_slock);
1327 }
1328 
1329 
1330 /*
1331  * Drain pools, one at a time.
1332  *
1333  * Note, we must never be called from an interrupt context.
1334  */
1335 void
1336 pool_drain(void *arg)
1337 {
1338 	struct pool *pp;
1339 	int s;
1340 
1341 	s = splvm();
1342 	simple_lock(&pool_head_slock);
1343 
1344 	if (drainpp == NULL && (drainpp = TAILQ_FIRST(&pool_head)) == NULL)
1345 		goto out;
1346 
1347 	pp = drainpp;
1348 	drainpp = TAILQ_NEXT(pp, pr_poollist);
1349 
1350 	pool_reclaim(pp);
1351 
1352  out:
1353 	simple_unlock(&pool_head_slock);
1354 	splx(s);
1355 }
1356 
1357 
1358 /*
1359  * Diagnostic helpers.
1360  */
1361 void
1362 pool_printit(struct pool *pp, const char *modif, int (*pr)(const char *, ...))
1363 {
1364 	int s;
1365 
1366 	s = splvm();
1367 	if (simple_lock_try(&pp->pr_slock) == 0) {
1368 		printf("pool %s is locked; try again later\n",
1369 		    pp->pr_wchan);
1370 		splx(s);
1371 		return;
1372 	}
1373 	pool_print1(pp, modif, printf);
1374 	simple_unlock(&pp->pr_slock);
1375 	splx(s);
1376 }
1377 
1378 static void
1379 pool_print1(struct pool *pp, const char *modif, int (*pr)(const char *, ...))
1380 {
1381 	struct pool_item_header *ph;
1382 	struct pool_cache *pc;
1383 	struct pool_cache_group *pcg;
1384 #ifdef DIAGNOSTIC
1385 	struct pool_item *pi;
1386 #endif
1387 	int i, print_log = 0, print_pagelist = 0, print_cache = 0;
1388 	char c;
1389 
1390 	while ((c = *modif++) != '\0') {
1391 		if (c == 'l')
1392 			print_log = 1;
1393 		if (c == 'p')
1394 			print_pagelist = 1;
1395 		if (c == 'c')
1396 			print_cache = 1;
1397 		modif++;
1398 	}
1399 
1400 	(*pr)("POOL %s: size %u, align %u, ioff %u, roflags 0x%08x\n",
1401 	    pp->pr_wchan, pp->pr_size, pp->pr_align, pp->pr_itemoffset,
1402 	    pp->pr_roflags);
1403 	(*pr)("\tpagesz %u, mtype %d\n", pp->pr_pagesz, pp->pr_mtype);
1404 	(*pr)("\talloc %p, release %p\n", pp->pr_alloc, pp->pr_free);
1405 	(*pr)("\tminitems %u, minpages %u, maxpages %u, npages %u\n",
1406 	    pp->pr_minitems, pp->pr_minpages, pp->pr_maxpages, pp->pr_npages);
1407 	(*pr)("\titemsperpage %u, nitems %u, nout %u, hardlimit %u\n",
1408 	    pp->pr_itemsperpage, pp->pr_nitems, pp->pr_nout, pp->pr_hardlimit);
1409 
1410 	(*pr)("\n\tnget %lu, nfail %lu, nput %lu\n",
1411 	    pp->pr_nget, pp->pr_nfail, pp->pr_nput);
1412 	(*pr)("\tnpagealloc %lu, npagefree %lu, hiwat %u, nidle %lu\n",
1413 	    pp->pr_npagealloc, pp->pr_npagefree, pp->pr_hiwat, pp->pr_nidle);
1414 
1415 	if (print_pagelist == 0)
1416 		goto skip_pagelist;
1417 
1418 	if ((ph = TAILQ_FIRST(&pp->pr_pagelist)) != NULL)
1419 		(*pr)("\n\tpage list:\n");
1420 	for (; ph != NULL; ph = TAILQ_NEXT(ph, ph_pagelist)) {
1421 		(*pr)("\t\tpage %p, nmissing %d, time %lu,%lu\n",
1422 		    ph->ph_page, ph->ph_nmissing,
1423 		    (u_long)ph->ph_time.tv_sec,
1424 		    (u_long)ph->ph_time.tv_usec);
1425 #ifdef DIAGNOSTIC
1426 		for (pi = TAILQ_FIRST(&ph->ph_itemlist); pi != NULL;
1427 		     pi = TAILQ_NEXT(pi, pi_list)) {
1428 			if (pi->pi_magic != PI_MAGIC) {
1429 				(*pr)("\t\t\titem %p, magic 0x%x\n",
1430 				    pi, pi->pi_magic);
1431 			}
1432 		}
1433 #endif
1434 	}
1435 	if (pp->pr_curpage == NULL)
1436 		(*pr)("\tno current page\n");
1437 	else
1438 		(*pr)("\tcurpage %p\n", pp->pr_curpage->ph_page);
1439 
1440  skip_pagelist:
1441 
1442 	if (print_log == 0)
1443 		goto skip_log;
1444 
1445 	(*pr)("\n");
1446 	if ((pp->pr_roflags & PR_LOGGING) == 0)
1447 		(*pr)("\tno log\n");
1448 	else
1449 		pr_printlog(pp, NULL, pr);
1450 
1451  skip_log:
1452 
1453 	if (print_cache == 0)
1454 		goto skip_cache;
1455 
1456 	for (pc = TAILQ_FIRST(&pp->pr_cachelist); pc != NULL;
1457 	     pc = TAILQ_NEXT(pc, pc_poollist)) {
1458 		(*pr)("\tcache %p: allocfrom %p freeto %p\n", pc,
1459 		    pc->pc_allocfrom, pc->pc_freeto);
1460 		(*pr)("\t    hits %lu misses %lu ngroups %lu nitems %lu\n",
1461 		    pc->pc_hits, pc->pc_misses, pc->pc_ngroups, pc->pc_nitems);
1462 		for (pcg = TAILQ_FIRST(&pc->pc_grouplist); pcg != NULL;
1463 		     pcg = TAILQ_NEXT(pcg, pcg_list)) {
1464 			(*pr)("\t\tgroup %p: avail %d\n", pcg, pcg->pcg_avail);
1465 			for (i = 0; i < PCG_NOBJECTS; i++)
1466 				(*pr)("\t\t\t%p\n", pcg->pcg_objects[i]);
1467 		}
1468 	}
1469 
1470  skip_cache:
1471 
1472 	pr_enter_check(pp, pr);
1473 }
1474 
1475 int
1476 pool_chk(struct pool *pp, const char *label)
1477 {
1478 	struct pool_item_header *ph;
1479 	int r = 0;
1480 
1481 	simple_lock(&pp->pr_slock);
1482 
1483 	for (ph = TAILQ_FIRST(&pp->pr_pagelist); ph != NULL;
1484 	     ph = TAILQ_NEXT(ph, ph_pagelist)) {
1485 
1486 		struct pool_item *pi;
1487 		int n;
1488 		caddr_t page;
1489 
1490 		page = (caddr_t)((u_long)ph & pp->pr_pagemask);
1491 		if (page != ph->ph_page &&
1492 		    (pp->pr_roflags & PR_PHINPAGE) != 0) {
1493 			if (label != NULL)
1494 				printf("%s: ", label);
1495 			printf("pool(%p:%s): page inconsistency: page %p;"
1496 			       " at page head addr %p (p %p)\n", pp,
1497 				pp->pr_wchan, ph->ph_page,
1498 				ph, page);
1499 			r++;
1500 			goto out;
1501 		}
1502 
1503 		for (pi = TAILQ_FIRST(&ph->ph_itemlist), n = 0;
1504 		     pi != NULL;
1505 		     pi = TAILQ_NEXT(pi,pi_list), n++) {
1506 
1507 #ifdef DIAGNOSTIC
1508 			if (pi->pi_magic != PI_MAGIC) {
1509 				if (label != NULL)
1510 					printf("%s: ", label);
1511 				printf("pool(%s): free list modified: magic=%x;"
1512 				       " page %p; item ordinal %d;"
1513 				       " addr %p (p %p)\n",
1514 					pp->pr_wchan, pi->pi_magic, ph->ph_page,
1515 					n, pi, page);
1516 				panic("pool");
1517 			}
1518 #endif
1519 			page = (caddr_t)((u_long)pi & pp->pr_pagemask);
1520 			if (page == ph->ph_page)
1521 				continue;
1522 
1523 			if (label != NULL)
1524 				printf("%s: ", label);
1525 			printf("pool(%p:%s): page inconsistency: page %p;"
1526 			       " item ordinal %d; addr %p (p %p)\n", pp,
1527 				pp->pr_wchan, ph->ph_page,
1528 				n, pi, page);
1529 			r++;
1530 			goto out;
1531 		}
1532 	}
1533 out:
1534 	simple_unlock(&pp->pr_slock);
1535 	return (r);
1536 }
1537 
1538 /*
1539  * pool_cache_init:
1540  *
1541  *	Initialize a pool cache.
1542  *
1543  *	NOTE: If the pool must be protected from interrupts, we expect
1544  *	to be called at the appropriate interrupt priority level.
1545  */
1546 void
1547 pool_cache_init(struct pool_cache *pc, struct pool *pp,
1548     int (*ctor)(void *, void *, int),
1549     void (*dtor)(void *, void *),
1550     void *arg)
1551 {
1552 
1553 	TAILQ_INIT(&pc->pc_grouplist);
1554 	simple_lock_init(&pc->pc_slock);
1555 
1556 	pc->pc_allocfrom = NULL;
1557 	pc->pc_freeto = NULL;
1558 	pc->pc_pool = pp;
1559 
1560 	pc->pc_ctor = ctor;
1561 	pc->pc_dtor = dtor;
1562 	pc->pc_arg  = arg;
1563 
1564 	pc->pc_hits   = 0;
1565 	pc->pc_misses = 0;
1566 
1567 	pc->pc_ngroups = 0;
1568 
1569 	pc->pc_nitems = 0;
1570 
1571 	simple_lock(&pp->pr_slock);
1572 	TAILQ_INSERT_TAIL(&pp->pr_cachelist, pc, pc_poollist);
1573 	simple_unlock(&pp->pr_slock);
1574 }
1575 
1576 /*
1577  * pool_cache_destroy:
1578  *
1579  *	Destroy a pool cache.
1580  */
1581 void
1582 pool_cache_destroy(struct pool_cache *pc)
1583 {
1584 	struct pool *pp = pc->pc_pool;
1585 
1586 	/* First, invalidate the entire cache. */
1587 	pool_cache_invalidate(pc);
1588 
1589 	/* ...and remove it from the pool's cache list. */
1590 	simple_lock(&pp->pr_slock);
1591 	TAILQ_REMOVE(&pp->pr_cachelist, pc, pc_poollist);
1592 	simple_unlock(&pp->pr_slock);
1593 }
1594 
1595 static __inline void *
1596 pcg_get(struct pool_cache_group *pcg)
1597 {
1598 	void *object;
1599 	u_int idx;
1600 
1601 	KASSERT(pcg->pcg_avail <= PCG_NOBJECTS);
1602 	KASSERT(pcg->pcg_avail != 0);
1603 	idx = --pcg->pcg_avail;
1604 
1605 	KASSERT(pcg->pcg_objects[idx] != NULL);
1606 	object = pcg->pcg_objects[idx];
1607 	pcg->pcg_objects[idx] = NULL;
1608 
1609 	return (object);
1610 }
1611 
1612 static __inline void
1613 pcg_put(struct pool_cache_group *pcg, void *object)
1614 {
1615 	u_int idx;
1616 
1617 	KASSERT(pcg->pcg_avail < PCG_NOBJECTS);
1618 	idx = pcg->pcg_avail++;
1619 
1620 	KASSERT(pcg->pcg_objects[idx] == NULL);
1621 	pcg->pcg_objects[idx] = object;
1622 }
1623 
1624 /*
1625  * pool_cache_get:
1626  *
1627  *	Get an object from a pool cache.
1628  */
1629 void *
1630 pool_cache_get(struct pool_cache *pc, int flags)
1631 {
1632 	struct pool_cache_group *pcg;
1633 	void *object;
1634 
1635 #ifdef LOCKDEBUG
1636 	if (flags & PR_WAITOK)
1637 		simple_lock_only_held(NULL, "pool_cache_get(PR_WAITOK)");
1638 #endif
1639 
1640 	simple_lock(&pc->pc_slock);
1641 
1642 	if ((pcg = pc->pc_allocfrom) == NULL) {
1643 		for (pcg = TAILQ_FIRST(&pc->pc_grouplist); pcg != NULL;
1644 		     pcg = TAILQ_NEXT(pcg, pcg_list)) {
1645 			if (pcg->pcg_avail != 0) {
1646 				pc->pc_allocfrom = pcg;
1647 				goto have_group;
1648 			}
1649 		}
1650 
1651 		/*
1652 		 * No groups with any available objects.  Allocate
1653 		 * a new object, construct it, and return it to
1654 		 * the caller.  We will allocate a group, if necessary,
1655 		 * when the object is freed back to the cache.
1656 		 */
1657 		pc->pc_misses++;
1658 		simple_unlock(&pc->pc_slock);
1659 		object = pool_get(pc->pc_pool, flags);
1660 		if (object != NULL && pc->pc_ctor != NULL) {
1661 			if ((*pc->pc_ctor)(pc->pc_arg, object, flags) != 0) {
1662 				pool_put(pc->pc_pool, object);
1663 				return (NULL);
1664 			}
1665 		}
1666 		return (object);
1667 	}
1668 
1669  have_group:
1670 	pc->pc_hits++;
1671 	pc->pc_nitems--;
1672 	object = pcg_get(pcg);
1673 
1674 	if (pcg->pcg_avail == 0)
1675 		pc->pc_allocfrom = NULL;
1676 
1677 	simple_unlock(&pc->pc_slock);
1678 
1679 	return (object);
1680 }
1681 
1682 /*
1683  * pool_cache_put:
1684  *
1685  *	Put an object back to the pool cache.
1686  */
1687 void
1688 pool_cache_put(struct pool_cache *pc, void *object)
1689 {
1690 	struct pool_cache_group *pcg;
1691 
1692 	simple_lock(&pc->pc_slock);
1693 
1694 	if ((pcg = pc->pc_freeto) == NULL) {
1695 		for (pcg = TAILQ_FIRST(&pc->pc_grouplist); pcg != NULL;
1696 		     pcg = TAILQ_NEXT(pcg, pcg_list)) {
1697 			if (pcg->pcg_avail != PCG_NOBJECTS) {
1698 				pc->pc_freeto = pcg;
1699 				goto have_group;
1700 			}
1701 		}
1702 
1703 		/*
1704 		 * No empty groups to free the object to.  Attempt to
1705 		 * allocate one.
1706 		 */
1707 		simple_unlock(&pc->pc_slock);
1708 		pcg = pool_get(&pcgpool, PR_NOWAIT);
1709 		if (pcg != NULL) {
1710 			memset(pcg, 0, sizeof(*pcg));
1711 			simple_lock(&pc->pc_slock);
1712 			pc->pc_ngroups++;
1713 			TAILQ_INSERT_TAIL(&pc->pc_grouplist, pcg, pcg_list);
1714 			if (pc->pc_freeto == NULL)
1715 				pc->pc_freeto = pcg;
1716 			goto have_group;
1717 		}
1718 
1719 		/*
1720 		 * Unable to allocate a cache group; destruct the object
1721 		 * and free it back to the pool.
1722 		 */
1723 		pool_cache_destruct_object(pc, object);
1724 		return;
1725 	}
1726 
1727  have_group:
1728 	pc->pc_nitems++;
1729 	pcg_put(pcg, object);
1730 
1731 	if (pcg->pcg_avail == PCG_NOBJECTS)
1732 		pc->pc_freeto = NULL;
1733 
1734 	simple_unlock(&pc->pc_slock);
1735 }
1736 
1737 /*
1738  * pool_cache_destruct_object:
1739  *
1740  *	Force destruction of an object and its release back into
1741  *	the pool.
1742  */
1743 void
1744 pool_cache_destruct_object(struct pool_cache *pc, void *object)
1745 {
1746 
1747 	if (pc->pc_dtor != NULL)
1748 		(*pc->pc_dtor)(pc->pc_arg, object);
1749 	pool_put(pc->pc_pool, object);
1750 }
1751 
1752 /*
1753  * pool_cache_do_invalidate:
1754  *
1755  *	This internal function implements pool_cache_invalidate() and
1756  *	pool_cache_reclaim().
1757  */
1758 static void
1759 pool_cache_do_invalidate(struct pool_cache *pc, int free_groups,
1760     void (*putit)(struct pool *, void *))
1761 {
1762 	struct pool_cache_group *pcg, *npcg;
1763 	void *object;
1764 
1765 	for (pcg = TAILQ_FIRST(&pc->pc_grouplist); pcg != NULL;
1766 	     pcg = npcg) {
1767 		npcg = TAILQ_NEXT(pcg, pcg_list);
1768 		while (pcg->pcg_avail != 0) {
1769 			pc->pc_nitems--;
1770 			object = pcg_get(pcg);
1771 			if (pcg->pcg_avail == 0 && pc->pc_allocfrom == pcg)
1772 				pc->pc_allocfrom = NULL;
1773 			if (pc->pc_dtor != NULL)
1774 				(*pc->pc_dtor)(pc->pc_arg, object);
1775 			(*putit)(pc->pc_pool, object);
1776 		}
1777 		if (free_groups) {
1778 			pc->pc_ngroups--;
1779 			TAILQ_REMOVE(&pc->pc_grouplist, pcg, pcg_list);
1780 			if (pc->pc_freeto == pcg)
1781 				pc->pc_freeto = NULL;
1782 			pool_put(&pcgpool, pcg);
1783 		}
1784 	}
1785 }
1786 
1787 /*
1788  * pool_cache_invalidate:
1789  *
1790  *	Invalidate a pool cache (destruct and release all of the
1791  *	cached objects).
1792  */
1793 void
1794 pool_cache_invalidate(struct pool_cache *pc)
1795 {
1796 
1797 	simple_lock(&pc->pc_slock);
1798 	pool_cache_do_invalidate(pc, 0, pool_put);
1799 	simple_unlock(&pc->pc_slock);
1800 }
1801 
1802 /*
1803  * pool_cache_reclaim:
1804  *
1805  *	Reclaim a pool cache for pool_reclaim().
1806  */
1807 static void
1808 pool_cache_reclaim(struct pool_cache *pc)
1809 {
1810 
1811 	simple_lock(&pc->pc_slock);
1812 	pool_cache_do_invalidate(pc, 1, pool_do_put);
1813 	simple_unlock(&pc->pc_slock);
1814 }
1815 
1816 /*
1817  * We have three different sysctls.
1818  * kern.pool.npools - the number of pools.
1819  * kern.pool.pool.<pool#> - the pool struct for the pool#.
1820  * kern.pool.name.<pool#> - the name for pool#.[6~
1821  */
1822 int
1823 sysctl_dopool(int *name, u_int namelen, char *where, size_t *sizep)
1824 {
1825 	struct pool *pp, *foundpool = NULL;
1826 	size_t buflen = where != NULL ? *sizep : 0;
1827 	int npools = 0, s;
1828 	unsigned int lookfor;
1829 	size_t len;
1830 
1831 	switch (*name) {
1832 	case KERN_POOL_NPOOLS:
1833 		if (namelen != 1 || buflen != sizeof(int))
1834 			return (EINVAL);
1835 		lookfor = 0;
1836 		break;
1837 	case KERN_POOL_NAME:
1838 		if (namelen != 2 || buflen < 1)
1839 			return (EINVAL);
1840 		lookfor = name[1];
1841 		break;
1842 	case KERN_POOL_POOL:
1843 		if (namelen != 2 || buflen != sizeof(struct pool))
1844 			return (EINVAL);
1845 		lookfor = name[1];
1846 		break;
1847 	default:
1848 		return (EINVAL);
1849 	}
1850 
1851 	s = splvm();
1852 	simple_lock(&pool_head_slock);
1853 
1854 	TAILQ_FOREACH(pp, &pool_head, pr_poollist) {
1855 		npools++;
1856 		if (lookfor == pp->pr_serial) {
1857 			foundpool = pp;
1858 			break;
1859 		}
1860 	}
1861 
1862 	simple_unlock(&pool_head_slock);
1863 	splx(s);
1864 
1865 	if (lookfor != 0 && foundpool == NULL)
1866 		return (ENOENT);
1867 
1868 	switch (*name) {
1869 	case KERN_POOL_NPOOLS:
1870 		return copyout(&npools, where, buflen);
1871 	case KERN_POOL_NAME:
1872 		len = strlen(foundpool->pr_wchan) + 1;
1873 		if (*sizep < len)
1874 			return (ENOMEM);
1875 		*sizep = len;
1876 		return copyout(foundpool->pr_wchan, where, len);
1877 	case KERN_POOL_POOL:
1878 		return copyout(foundpool, where, buflen);
1879 	}
1880 	/* NOTREACHED */
1881 	return (0); /* XXX - Stupid gcc */
1882 }
1883