xref: /netbsd-src/sys/kern/subr_pool.c (revision 17306b8fd0952c7489f93f0230818481e5a1e2c9)
1 /*	$NetBSD: subr_pool.c,v 1.59 2001/06/05 18:51:04 thorpej Exp $	*/
2 
3 /*-
4  * Copyright (c) 1997, 1999, 2000 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Paul Kranenburg; by Jason R. Thorpe of the Numerical Aerospace
9  * Simulation Facility, NASA Ames Research Center.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 3. All advertising materials mentioning features or use of this software
20  *    must display the following acknowledgement:
21  *	This product includes software developed by the NetBSD
22  *	Foundation, Inc. and its contributors.
23  * 4. Neither the name of The NetBSD Foundation nor the names of its
24  *    contributors may be used to endorse or promote products derived
25  *    from this software without specific prior written permission.
26  *
27  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
28  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
29  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
30  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
31  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37  * POSSIBILITY OF SUCH DAMAGE.
38  */
39 
40 #include "opt_pool.h"
41 #include "opt_poollog.h"
42 #include "opt_lockdebug.h"
43 
44 #include <sys/param.h>
45 #include <sys/systm.h>
46 #include <sys/proc.h>
47 #include <sys/errno.h>
48 #include <sys/kernel.h>
49 #include <sys/malloc.h>
50 #include <sys/lock.h>
51 #include <sys/pool.h>
52 #include <sys/syslog.h>
53 
54 #include <uvm/uvm.h>
55 
56 /*
57  * Pool resource management utility.
58  *
59  * Memory is allocated in pages which are split into pieces according
60  * to the pool item size. Each page is kept on a list headed by `pr_pagelist'
61  * in the pool structure and the individual pool items are on a linked list
62  * headed by `ph_itemlist' in each page header. The memory for building
63  * the page list is either taken from the allocated pages themselves (for
64  * small pool items) or taken from an internal pool of page headers (`phpool').
65  */
66 
67 /* List of all pools */
68 TAILQ_HEAD(,pool) pool_head = TAILQ_HEAD_INITIALIZER(pool_head);
69 
70 /* Private pool for page header structures */
71 static struct pool phpool;
72 
73 /* # of seconds to retain page after last use */
74 int pool_inactive_time = 10;
75 
76 /* Next candidate for drainage (see pool_drain()) */
77 static struct pool	*drainpp;
78 
79 /* This spin lock protects both pool_head and drainpp. */
80 struct simplelock pool_head_slock = SIMPLELOCK_INITIALIZER;
81 
82 struct pool_item_header {
83 	/* Page headers */
84 	TAILQ_ENTRY(pool_item_header)
85 				ph_pagelist;	/* pool page list */
86 	TAILQ_HEAD(,pool_item)	ph_itemlist;	/* chunk list for this page */
87 	LIST_ENTRY(pool_item_header)
88 				ph_hashlist;	/* Off-page page headers */
89 	int			ph_nmissing;	/* # of chunks in use */
90 	caddr_t			ph_page;	/* this page's address */
91 	struct timeval		ph_time;	/* last referenced */
92 };
93 
94 struct pool_item {
95 #ifdef DIAGNOSTIC
96 	int pi_magic;
97 #endif
98 #define	PI_MAGIC 0xdeadbeef
99 	/* Other entries use only this list entry */
100 	TAILQ_ENTRY(pool_item)	pi_list;
101 };
102 
103 #define	PR_HASH_INDEX(pp,addr) \
104 	(((u_long)(addr) >> (pp)->pr_pageshift) & (PR_HASHTABSIZE - 1))
105 
106 #define	POOL_NEEDS_CATCHUP(pp)						\
107 	((pp)->pr_nitems < (pp)->pr_minitems)
108 
109 /*
110  * Pool cache management.
111  *
112  * Pool caches provide a way for constructed objects to be cached by the
113  * pool subsystem.  This can lead to performance improvements by avoiding
114  * needless object construction/destruction; it is deferred until absolutely
115  * necessary.
116  *
117  * Caches are grouped into cache groups.  Each cache group references
118  * up to 16 constructed objects.  When a cache allocates an object
119  * from the pool, it calls the object's constructor and places it into
120  * a cache group.  When a cache group frees an object back to the pool,
121  * it first calls the object's destructor.  This allows the object to
122  * persist in constructed form while freed to the cache.
123  *
124  * Multiple caches may exist for each pool.  This allows a single
125  * object type to have multiple constructed forms.  The pool references
126  * each cache, so that when a pool is drained by the pagedaemon, it can
127  * drain each individual cache as well.  Each time a cache is drained,
128  * the most idle cache group is freed to the pool in its entirety.
129  *
130  * Pool caches are layed on top of pools.  By layering them, we can avoid
131  * the complexity of cache management for pools which would not benefit
132  * from it.
133  */
134 
135 /* The cache group pool. */
136 static struct pool pcgpool;
137 
138 /* The pool cache group. */
139 #define	PCG_NOBJECTS		16
140 struct pool_cache_group {
141 	TAILQ_ENTRY(pool_cache_group)
142 		pcg_list;	/* link in the pool cache's group list */
143 	u_int	pcg_avail;	/* # available objects */
144 				/* pointers to the objects */
145 	void	*pcg_objects[PCG_NOBJECTS];
146 };
147 
148 static void	pool_cache_reclaim(struct pool_cache *);
149 
150 static int	pool_catchup(struct pool *);
151 static void	pool_prime_page(struct pool *, caddr_t,
152 		    struct pool_item_header *);
153 static void	*pool_page_alloc(unsigned long, int, int);
154 static void	pool_page_free(void *, unsigned long, int);
155 
156 static void pool_print1(struct pool *, const char *,
157 	void (*)(const char *, ...));
158 
159 /*
160  * Pool log entry. An array of these is allocated in pool_init().
161  */
162 struct pool_log {
163 	const char	*pl_file;
164 	long		pl_line;
165 	int		pl_action;
166 #define	PRLOG_GET	1
167 #define	PRLOG_PUT	2
168 	void		*pl_addr;
169 };
170 
171 /* Number of entries in pool log buffers */
172 #ifndef POOL_LOGSIZE
173 #define	POOL_LOGSIZE	10
174 #endif
175 
176 int pool_logsize = POOL_LOGSIZE;
177 
178 #ifdef POOL_DIAGNOSTIC
179 static __inline void
180 pr_log(struct pool *pp, void *v, int action, const char *file, long line)
181 {
182 	int n = pp->pr_curlogentry;
183 	struct pool_log *pl;
184 
185 	if ((pp->pr_roflags & PR_LOGGING) == 0)
186 		return;
187 
188 	/*
189 	 * Fill in the current entry. Wrap around and overwrite
190 	 * the oldest entry if necessary.
191 	 */
192 	pl = &pp->pr_log[n];
193 	pl->pl_file = file;
194 	pl->pl_line = line;
195 	pl->pl_action = action;
196 	pl->pl_addr = v;
197 	if (++n >= pp->pr_logsize)
198 		n = 0;
199 	pp->pr_curlogentry = n;
200 }
201 
202 static void
203 pr_printlog(struct pool *pp, struct pool_item *pi,
204     void (*pr)(const char *, ...))
205 {
206 	int i = pp->pr_logsize;
207 	int n = pp->pr_curlogentry;
208 
209 	if ((pp->pr_roflags & PR_LOGGING) == 0)
210 		return;
211 
212 	/*
213 	 * Print all entries in this pool's log.
214 	 */
215 	while (i-- > 0) {
216 		struct pool_log *pl = &pp->pr_log[n];
217 		if (pl->pl_action != 0) {
218 			if (pi == NULL || pi == pl->pl_addr) {
219 				(*pr)("\tlog entry %d:\n", i);
220 				(*pr)("\t\taction = %s, addr = %p\n",
221 				    pl->pl_action == PRLOG_GET ? "get" : "put",
222 				    pl->pl_addr);
223 				(*pr)("\t\tfile: %s at line %lu\n",
224 				    pl->pl_file, pl->pl_line);
225 			}
226 		}
227 		if (++n >= pp->pr_logsize)
228 			n = 0;
229 	}
230 }
231 
232 static __inline void
233 pr_enter(struct pool *pp, const char *file, long line)
234 {
235 
236 	if (__predict_false(pp->pr_entered_file != NULL)) {
237 		printf("pool %s: reentrancy at file %s line %ld\n",
238 		    pp->pr_wchan, file, line);
239 		printf("         previous entry at file %s line %ld\n",
240 		    pp->pr_entered_file, pp->pr_entered_line);
241 		panic("pr_enter");
242 	}
243 
244 	pp->pr_entered_file = file;
245 	pp->pr_entered_line = line;
246 }
247 
248 static __inline void
249 pr_leave(struct pool *pp)
250 {
251 
252 	if (__predict_false(pp->pr_entered_file == NULL)) {
253 		printf("pool %s not entered?\n", pp->pr_wchan);
254 		panic("pr_leave");
255 	}
256 
257 	pp->pr_entered_file = NULL;
258 	pp->pr_entered_line = 0;
259 }
260 
261 static __inline void
262 pr_enter_check(struct pool *pp, void (*pr)(const char *, ...))
263 {
264 
265 	if (pp->pr_entered_file != NULL)
266 		(*pr)("\n\tcurrently entered from file %s line %ld\n",
267 		    pp->pr_entered_file, pp->pr_entered_line);
268 }
269 #else
270 #define	pr_log(pp, v, action, file, line)
271 #define	pr_printlog(pp, pi, pr)
272 #define	pr_enter(pp, file, line)
273 #define	pr_leave(pp)
274 #define	pr_enter_check(pp, pr)
275 #endif /* POOL_DIAGNOSTIC */
276 
277 /*
278  * Return the pool page header based on page address.
279  */
280 static __inline struct pool_item_header *
281 pr_find_pagehead(struct pool *pp, caddr_t page)
282 {
283 	struct pool_item_header *ph;
284 
285 	if ((pp->pr_roflags & PR_PHINPAGE) != 0)
286 		return ((struct pool_item_header *)(page + pp->pr_phoffset));
287 
288 	for (ph = LIST_FIRST(&pp->pr_hashtab[PR_HASH_INDEX(pp, page)]);
289 	     ph != NULL;
290 	     ph = LIST_NEXT(ph, ph_hashlist)) {
291 		if (ph->ph_page == page)
292 			return (ph);
293 	}
294 	return (NULL);
295 }
296 
297 /*
298  * Remove a page from the pool.
299  */
300 static __inline void
301 pr_rmpage(struct pool *pp, struct pool_item_header *ph)
302 {
303 
304 	/*
305 	 * If the page was idle, decrement the idle page count.
306 	 */
307 	if (ph->ph_nmissing == 0) {
308 #ifdef DIAGNOSTIC
309 		if (pp->pr_nidle == 0)
310 			panic("pr_rmpage: nidle inconsistent");
311 		if (pp->pr_nitems < pp->pr_itemsperpage)
312 			panic("pr_rmpage: nitems inconsistent");
313 #endif
314 		pp->pr_nidle--;
315 	}
316 
317 	pp->pr_nitems -= pp->pr_itemsperpage;
318 
319 	/*
320 	 * Unlink a page from the pool and release it.
321 	 */
322 	TAILQ_REMOVE(&pp->pr_pagelist, ph, ph_pagelist);
323 	(*pp->pr_free)(ph->ph_page, pp->pr_pagesz, pp->pr_mtype);
324 	pp->pr_npages--;
325 	pp->pr_npagefree++;
326 
327 	if ((pp->pr_roflags & PR_PHINPAGE) == 0) {
328 		int s;
329 		LIST_REMOVE(ph, ph_hashlist);
330 		s = splhigh();
331 		pool_put(&phpool, ph);
332 		splx(s);
333 	}
334 
335 	if (pp->pr_curpage == ph) {
336 		/*
337 		 * Find a new non-empty page header, if any.
338 		 * Start search from the page head, to increase the
339 		 * chance for "high water" pages to be freed.
340 		 */
341 		for (ph = TAILQ_FIRST(&pp->pr_pagelist); ph != NULL;
342 		     ph = TAILQ_NEXT(ph, ph_pagelist))
343 			if (TAILQ_FIRST(&ph->ph_itemlist) != NULL)
344 				break;
345 
346 		pp->pr_curpage = ph;
347 	}
348 }
349 
350 /*
351  * Initialize the given pool resource structure.
352  *
353  * We export this routine to allow other kernel parts to declare
354  * static pools that must be initialized before malloc() is available.
355  */
356 void
357 pool_init(struct pool *pp, size_t size, u_int align, u_int ioff, int flags,
358     const char *wchan, size_t pagesz,
359     void *(*alloc)(unsigned long, int, int),
360     void (*release)(void *, unsigned long, int),
361     int mtype)
362 {
363 	int off, slack, i;
364 
365 #ifdef POOL_DIAGNOSTIC
366 	/*
367 	 * Always log if POOL_DIAGNOSTIC is defined.
368 	 */
369 	if (pool_logsize != 0)
370 		flags |= PR_LOGGING;
371 #endif
372 
373 	/*
374 	 * Check arguments and construct default values.
375 	 */
376 	if (!powerof2(pagesz))
377 		panic("pool_init: page size invalid (%lx)\n", (u_long)pagesz);
378 
379 	if (alloc == NULL && release == NULL) {
380 		alloc = pool_page_alloc;
381 		release = pool_page_free;
382 		pagesz = PAGE_SIZE;	/* Rounds to PAGE_SIZE anyhow. */
383 	} else if ((alloc != NULL && release != NULL) == 0) {
384 		/* If you specifiy one, must specify both. */
385 		panic("pool_init: must specify alloc and release together");
386 	}
387 
388 	if (pagesz == 0)
389 		pagesz = PAGE_SIZE;
390 
391 	if (align == 0)
392 		align = ALIGN(1);
393 
394 	if (size < sizeof(struct pool_item))
395 		size = sizeof(struct pool_item);
396 
397 	size = ALIGN(size);
398 	if (size > pagesz)
399 		panic("pool_init: pool item size (%lu) too large",
400 		      (u_long)size);
401 
402 	/*
403 	 * Initialize the pool structure.
404 	 */
405 	TAILQ_INIT(&pp->pr_pagelist);
406 	TAILQ_INIT(&pp->pr_cachelist);
407 	pp->pr_curpage = NULL;
408 	pp->pr_npages = 0;
409 	pp->pr_minitems = 0;
410 	pp->pr_minpages = 0;
411 	pp->pr_maxpages = UINT_MAX;
412 	pp->pr_roflags = flags;
413 	pp->pr_flags = 0;
414 	pp->pr_size = size;
415 	pp->pr_align = align;
416 	pp->pr_wchan = wchan;
417 	pp->pr_mtype = mtype;
418 	pp->pr_alloc = alloc;
419 	pp->pr_free = release;
420 	pp->pr_pagesz = pagesz;
421 	pp->pr_pagemask = ~(pagesz - 1);
422 	pp->pr_pageshift = ffs(pagesz) - 1;
423 	pp->pr_nitems = 0;
424 	pp->pr_nout = 0;
425 	pp->pr_hardlimit = UINT_MAX;
426 	pp->pr_hardlimit_warning = NULL;
427 	pp->pr_hardlimit_ratecap.tv_sec = 0;
428 	pp->pr_hardlimit_ratecap.tv_usec = 0;
429 	pp->pr_hardlimit_warning_last.tv_sec = 0;
430 	pp->pr_hardlimit_warning_last.tv_usec = 0;
431 
432 	/*
433 	 * Decide whether to put the page header off page to avoid
434 	 * wasting too large a part of the page. Off-page page headers
435 	 * go on a hash table, so we can match a returned item
436 	 * with its header based on the page address.
437 	 * We use 1/16 of the page size as the threshold (XXX: tune)
438 	 */
439 	if (pp->pr_size < pagesz/16) {
440 		/* Use the end of the page for the page header */
441 		pp->pr_roflags |= PR_PHINPAGE;
442 		pp->pr_phoffset = off =
443 			pagesz - ALIGN(sizeof(struct pool_item_header));
444 	} else {
445 		/* The page header will be taken from our page header pool */
446 		pp->pr_phoffset = 0;
447 		off = pagesz;
448 		for (i = 0; i < PR_HASHTABSIZE; i++) {
449 			LIST_INIT(&pp->pr_hashtab[i]);
450 		}
451 	}
452 
453 	/*
454 	 * Alignment is to take place at `ioff' within the item. This means
455 	 * we must reserve up to `align - 1' bytes on the page to allow
456 	 * appropriate positioning of each item.
457 	 *
458 	 * Silently enforce `0 <= ioff < align'.
459 	 */
460 	pp->pr_itemoffset = ioff = ioff % align;
461 	pp->pr_itemsperpage = (off - ((align - ioff) % align)) / pp->pr_size;
462 	KASSERT(pp->pr_itemsperpage != 0);
463 
464 	/*
465 	 * Use the slack between the chunks and the page header
466 	 * for "cache coloring".
467 	 */
468 	slack = off - pp->pr_itemsperpage * pp->pr_size;
469 	pp->pr_maxcolor = (slack / align) * align;
470 	pp->pr_curcolor = 0;
471 
472 	pp->pr_nget = 0;
473 	pp->pr_nfail = 0;
474 	pp->pr_nput = 0;
475 	pp->pr_npagealloc = 0;
476 	pp->pr_npagefree = 0;
477 	pp->pr_hiwat = 0;
478 	pp->pr_nidle = 0;
479 
480 #ifdef POOL_DIAGNOSTIC
481 	if (flags & PR_LOGGING) {
482 		if (kmem_map == NULL ||
483 		    (pp->pr_log = malloc(pool_logsize * sizeof(struct pool_log),
484 		     M_TEMP, M_NOWAIT)) == NULL)
485 			pp->pr_roflags &= ~PR_LOGGING;
486 		pp->pr_curlogentry = 0;
487 		pp->pr_logsize = pool_logsize;
488 	}
489 #endif
490 
491 	pp->pr_entered_file = NULL;
492 	pp->pr_entered_line = 0;
493 
494 	simple_lock_init(&pp->pr_slock);
495 
496 	/*
497 	 * Initialize private page header pool and cache magazine pool if we
498 	 * haven't done so yet.
499 	 * XXX LOCKING.
500 	 */
501 	if (phpool.pr_size == 0) {
502 		pool_init(&phpool, sizeof(struct pool_item_header), 0, 0,
503 		    0, "phpool", 0, 0, 0, 0);
504 		pool_init(&pcgpool, sizeof(struct pool_cache_group), 0, 0,
505 		    0, "pcgpool", 0, 0, 0, 0);
506 	}
507 
508 	/* Insert into the list of all pools. */
509 	simple_lock(&pool_head_slock);
510 	TAILQ_INSERT_TAIL(&pool_head, pp, pr_poollist);
511 	simple_unlock(&pool_head_slock);
512 }
513 
514 /*
515  * De-commision a pool resource.
516  */
517 void
518 pool_destroy(struct pool *pp)
519 {
520 	struct pool_item_header *ph;
521 	struct pool_cache *pc;
522 
523 	/* Destroy all caches for this pool. */
524 	while ((pc = TAILQ_FIRST(&pp->pr_cachelist)) != NULL)
525 		pool_cache_destroy(pc);
526 
527 #ifdef DIAGNOSTIC
528 	if (pp->pr_nout != 0) {
529 		pr_printlog(pp, NULL, printf);
530 		panic("pool_destroy: pool busy: still out: %u\n",
531 		    pp->pr_nout);
532 	}
533 #endif
534 
535 	/* Remove all pages */
536 	if ((pp->pr_roflags & PR_STATIC) == 0)
537 		while ((ph = pp->pr_pagelist.tqh_first) != NULL)
538 			pr_rmpage(pp, ph);
539 
540 	/* Remove from global pool list */
541 	simple_lock(&pool_head_slock);
542 	TAILQ_REMOVE(&pool_head, pp, pr_poollist);
543 	/* XXX Only clear this if we were drainpp? */
544 	drainpp = NULL;
545 	simple_unlock(&pool_head_slock);
546 
547 #ifdef POOL_DIAGNOSTIC
548 	if ((pp->pr_roflags & PR_LOGGING) != 0)
549 		free(pp->pr_log, M_TEMP);
550 #endif
551 
552 	if (pp->pr_roflags & PR_FREEHEADER)
553 		free(pp, M_POOL);
554 }
555 
556 static __inline struct pool_item_header *
557 pool_alloc_item_header(struct pool *pp, caddr_t storage, int flags)
558 {
559 	struct pool_item_header *ph;
560 	int s;
561 
562 	LOCK_ASSERT(simple_lock_held(&pp->pr_slock) == 0);
563 
564 	if ((pp->pr_roflags & PR_PHINPAGE) != 0)
565 		ph = (struct pool_item_header *) (storage + pp->pr_phoffset);
566 	else {
567 		s = splhigh();
568 		ph = pool_get(&phpool, flags);
569 		splx(s);
570 	}
571 
572 	return (ph);
573 }
574 
575 /*
576  * Grab an item from the pool; must be called at appropriate spl level
577  */
578 void *
579 #ifdef POOL_DIAGNOSTIC
580 _pool_get(struct pool *pp, int flags, const char *file, long line)
581 #else
582 pool_get(struct pool *pp, int flags)
583 #endif
584 {
585 	struct pool_item *pi;
586 	struct pool_item_header *ph;
587 	void *v;
588 
589 #ifdef DIAGNOSTIC
590 	if (__predict_false((pp->pr_roflags & PR_STATIC) &&
591 			    (flags & PR_MALLOCOK))) {
592 		pr_printlog(pp, NULL, printf);
593 		panic("pool_get: static");
594 	}
595 
596 	if (__predict_false(curproc == NULL && doing_shutdown == 0 &&
597 			    (flags & PR_WAITOK) != 0))
598 		panic("pool_get: must have NOWAIT");
599 
600 #ifdef LOCKDEBUG
601 	if (flags & PR_WAITOK)
602 		simple_lock_only_held(NULL, "pool_get(PR_WAITOK)");
603 #endif
604 #endif /* DIAGNOSTIC */
605 
606 	simple_lock(&pp->pr_slock);
607 	pr_enter(pp, file, line);
608 
609  startover:
610 	/*
611 	 * Check to see if we've reached the hard limit.  If we have,
612 	 * and we can wait, then wait until an item has been returned to
613 	 * the pool.
614 	 */
615 #ifdef DIAGNOSTIC
616 	if (__predict_false(pp->pr_nout > pp->pr_hardlimit)) {
617 		pr_leave(pp);
618 		simple_unlock(&pp->pr_slock);
619 		panic("pool_get: %s: crossed hard limit", pp->pr_wchan);
620 	}
621 #endif
622 	if (__predict_false(pp->pr_nout == pp->pr_hardlimit)) {
623 		if ((flags & PR_WAITOK) && !(flags & PR_LIMITFAIL)) {
624 			/*
625 			 * XXX: A warning isn't logged in this case.  Should
626 			 * it be?
627 			 */
628 			pp->pr_flags |= PR_WANTED;
629 			pr_leave(pp);
630 			ltsleep(pp, PSWP, pp->pr_wchan, 0, &pp->pr_slock);
631 			pr_enter(pp, file, line);
632 			goto startover;
633 		}
634 
635 		/*
636 		 * Log a message that the hard limit has been hit.
637 		 */
638 		if (pp->pr_hardlimit_warning != NULL &&
639 		    ratecheck(&pp->pr_hardlimit_warning_last,
640 			      &pp->pr_hardlimit_ratecap))
641 			log(LOG_ERR, "%s\n", pp->pr_hardlimit_warning);
642 
643 		if (flags & PR_URGENT)
644 			panic("pool_get: urgent");
645 
646 		pp->pr_nfail++;
647 
648 		pr_leave(pp);
649 		simple_unlock(&pp->pr_slock);
650 		return (NULL);
651 	}
652 
653 	/*
654 	 * The convention we use is that if `curpage' is not NULL, then
655 	 * it points at a non-empty bucket. In particular, `curpage'
656 	 * never points at a page header which has PR_PHINPAGE set and
657 	 * has no items in its bucket.
658 	 */
659 	if ((ph = pp->pr_curpage) == NULL) {
660 #ifdef DIAGNOSTIC
661 		if (pp->pr_nitems != 0) {
662 			simple_unlock(&pp->pr_slock);
663 			printf("pool_get: %s: curpage NULL, nitems %u\n",
664 			    pp->pr_wchan, pp->pr_nitems);
665 			panic("pool_get: nitems inconsistent\n");
666 		}
667 #endif
668 
669 		/*
670 		 * Call the back-end page allocator for more memory.
671 		 * Release the pool lock, as the back-end page allocator
672 		 * may block.
673 		 */
674 		pr_leave(pp);
675 		simple_unlock(&pp->pr_slock);
676 		v = (*pp->pr_alloc)(pp->pr_pagesz, flags, pp->pr_mtype);
677 		if (__predict_true(v != NULL))
678 			ph = pool_alloc_item_header(pp, v, flags);
679 		simple_lock(&pp->pr_slock);
680 		pr_enter(pp, file, line);
681 
682 		if (__predict_false(v == NULL || ph == NULL)) {
683 			if (v != NULL)
684 				(*pp->pr_free)(v, pp->pr_pagesz, pp->pr_mtype);
685 
686 			/*
687 			 * We were unable to allocate a page or item
688 			 * header, but we released the lock during
689 			 * allocation, so perhaps items were freed
690 			 * back to the pool.  Check for this case.
691 			 */
692 			if (pp->pr_curpage != NULL)
693 				goto startover;
694 
695 			if (flags & PR_URGENT)
696 				panic("pool_get: urgent");
697 
698 			if ((flags & PR_WAITOK) == 0) {
699 				pp->pr_nfail++;
700 				pr_leave(pp);
701 				simple_unlock(&pp->pr_slock);
702 				return (NULL);
703 			}
704 
705 			/*
706 			 * Wait for items to be returned to this pool.
707 			 *
708 			 * XXX: we actually want to wait just until
709 			 * the page allocator has memory again. Depending
710 			 * on this pool's usage, we might get stuck here
711 			 * for a long time.
712 			 *
713 			 * XXX: maybe we should wake up once a second and
714 			 * try again?
715 			 */
716 			pp->pr_flags |= PR_WANTED;
717 			pr_leave(pp);
718 			ltsleep(pp, PSWP, pp->pr_wchan, 0, &pp->pr_slock);
719 			pr_enter(pp, file, line);
720 			goto startover;
721 		}
722 
723 		/* We have more memory; add it to the pool */
724 		pool_prime_page(pp, v, ph);
725 		pp->pr_npagealloc++;
726 
727 		/* Start the allocation process over. */
728 		goto startover;
729 	}
730 
731 	if (__predict_false((v = pi = TAILQ_FIRST(&ph->ph_itemlist)) == NULL)) {
732 		pr_leave(pp);
733 		simple_unlock(&pp->pr_slock);
734 		panic("pool_get: %s: page empty", pp->pr_wchan);
735 	}
736 #ifdef DIAGNOSTIC
737 	if (__predict_false(pp->pr_nitems == 0)) {
738 		pr_leave(pp);
739 		simple_unlock(&pp->pr_slock);
740 		printf("pool_get: %s: items on itemlist, nitems %u\n",
741 		    pp->pr_wchan, pp->pr_nitems);
742 		panic("pool_get: nitems inconsistent\n");
743 	}
744 
745 	pr_log(pp, v, PRLOG_GET, file, line);
746 
747 	if (__predict_false(pi->pi_magic != PI_MAGIC)) {
748 		pr_printlog(pp, pi, printf);
749 		panic("pool_get(%s): free list modified: magic=%x; page %p;"
750 		       " item addr %p\n",
751 			pp->pr_wchan, pi->pi_magic, ph->ph_page, pi);
752 	}
753 #endif
754 
755 	/*
756 	 * Remove from item list.
757 	 */
758 	TAILQ_REMOVE(&ph->ph_itemlist, pi, pi_list);
759 	pp->pr_nitems--;
760 	pp->pr_nout++;
761 	if (ph->ph_nmissing == 0) {
762 #ifdef DIAGNOSTIC
763 		if (__predict_false(pp->pr_nidle == 0))
764 			panic("pool_get: nidle inconsistent");
765 #endif
766 		pp->pr_nidle--;
767 	}
768 	ph->ph_nmissing++;
769 	if (TAILQ_FIRST(&ph->ph_itemlist) == NULL) {
770 #ifdef DIAGNOSTIC
771 		if (__predict_false(ph->ph_nmissing != pp->pr_itemsperpage)) {
772 			pr_leave(pp);
773 			simple_unlock(&pp->pr_slock);
774 			panic("pool_get: %s: nmissing inconsistent",
775 			    pp->pr_wchan);
776 		}
777 #endif
778 		/*
779 		 * Find a new non-empty page header, if any.
780 		 * Start search from the page head, to increase
781 		 * the chance for "high water" pages to be freed.
782 		 *
783 		 * Migrate empty pages to the end of the list.  This
784 		 * will speed the update of curpage as pages become
785 		 * idle.  Empty pages intermingled with idle pages
786 		 * is no big deal.  As soon as a page becomes un-empty,
787 		 * it will move back to the head of the list.
788 		 */
789 		TAILQ_REMOVE(&pp->pr_pagelist, ph, ph_pagelist);
790 		TAILQ_INSERT_TAIL(&pp->pr_pagelist, ph, ph_pagelist);
791 		for (ph = TAILQ_FIRST(&pp->pr_pagelist); ph != NULL;
792 		     ph = TAILQ_NEXT(ph, ph_pagelist))
793 			if (TAILQ_FIRST(&ph->ph_itemlist) != NULL)
794 				break;
795 
796 		pp->pr_curpage = ph;
797 	}
798 
799 	pp->pr_nget++;
800 
801 	/*
802 	 * If we have a low water mark and we are now below that low
803 	 * water mark, add more items to the pool.
804 	 */
805 	if (POOL_NEEDS_CATCHUP(pp) && pool_catchup(pp) != 0) {
806 		/*
807 		 * XXX: Should we log a warning?  Should we set up a timeout
808 		 * to try again in a second or so?  The latter could break
809 		 * a caller's assumptions about interrupt protection, etc.
810 		 */
811 	}
812 
813 	pr_leave(pp);
814 	simple_unlock(&pp->pr_slock);
815 	return (v);
816 }
817 
818 /*
819  * Internal version of pool_put().  Pool is already locked/entered.
820  */
821 static void
822 pool_do_put(struct pool *pp, void *v)
823 {
824 	struct pool_item *pi = v;
825 	struct pool_item_header *ph;
826 	caddr_t page;
827 	int s;
828 
829 	page = (caddr_t)((u_long)v & pp->pr_pagemask);
830 
831 #ifdef DIAGNOSTIC
832 	if (__predict_false(pp->pr_nout == 0)) {
833 		printf("pool %s: putting with none out\n",
834 		    pp->pr_wchan);
835 		panic("pool_put");
836 	}
837 #endif
838 
839 	if (__predict_false((ph = pr_find_pagehead(pp, page)) == NULL)) {
840 		pr_printlog(pp, NULL, printf);
841 		panic("pool_put: %s: page header missing", pp->pr_wchan);
842 	}
843 
844 #ifdef LOCKDEBUG
845 	/*
846 	 * Check if we're freeing a locked simple lock.
847 	 */
848 	simple_lock_freecheck((caddr_t)pi, ((caddr_t)pi) + pp->pr_size);
849 #endif
850 
851 	/*
852 	 * Return to item list.
853 	 */
854 #ifdef DIAGNOSTIC
855 	pi->pi_magic = PI_MAGIC;
856 #endif
857 #ifdef DEBUG
858 	{
859 		int i, *ip = v;
860 
861 		for (i = 0; i < pp->pr_size / sizeof(int); i++) {
862 			*ip++ = PI_MAGIC;
863 		}
864 	}
865 #endif
866 
867 	TAILQ_INSERT_HEAD(&ph->ph_itemlist, pi, pi_list);
868 	ph->ph_nmissing--;
869 	pp->pr_nput++;
870 	pp->pr_nitems++;
871 	pp->pr_nout--;
872 
873 	/* Cancel "pool empty" condition if it exists */
874 	if (pp->pr_curpage == NULL)
875 		pp->pr_curpage = ph;
876 
877 	if (pp->pr_flags & PR_WANTED) {
878 		pp->pr_flags &= ~PR_WANTED;
879 		if (ph->ph_nmissing == 0)
880 			pp->pr_nidle++;
881 		wakeup((caddr_t)pp);
882 		return;
883 	}
884 
885 	/*
886 	 * If this page is now complete, do one of two things:
887 	 *
888 	 *	(1) If we have more pages than the page high water
889 	 *	    mark, free the page back to the system.
890 	 *
891 	 *	(2) Move it to the end of the page list, so that
892 	 *	    we minimize our chances of fragmenting the
893 	 *	    pool.  Idle pages migrate to the end (along with
894 	 *	    completely empty pages, so that we find un-empty
895 	 *	    pages more quickly when we update curpage) of the
896 	 *	    list so they can be more easily swept up by
897 	 *	    the pagedaemon when pages are scarce.
898 	 */
899 	if (ph->ph_nmissing == 0) {
900 		pp->pr_nidle++;
901 		if (pp->pr_npages > pp->pr_maxpages) {
902 			pr_rmpage(pp, ph);
903 		} else {
904 			TAILQ_REMOVE(&pp->pr_pagelist, ph, ph_pagelist);
905 			TAILQ_INSERT_TAIL(&pp->pr_pagelist, ph, ph_pagelist);
906 
907 			/*
908 			 * Update the timestamp on the page.  A page must
909 			 * be idle for some period of time before it can
910 			 * be reclaimed by the pagedaemon.  This minimizes
911 			 * ping-pong'ing for memory.
912 			 */
913 			s = splclock();
914 			ph->ph_time = mono_time;
915 			splx(s);
916 
917 			/*
918 			 * Update the current page pointer.  Just look for
919 			 * the first page with any free items.
920 			 *
921 			 * XXX: Maybe we want an option to look for the
922 			 * page with the fewest available items, to minimize
923 			 * fragmentation?
924 			 */
925 			for (ph = TAILQ_FIRST(&pp->pr_pagelist); ph != NULL;
926 			     ph = TAILQ_NEXT(ph, ph_pagelist))
927 				if (TAILQ_FIRST(&ph->ph_itemlist) != NULL)
928 					break;
929 
930 			pp->pr_curpage = ph;
931 		}
932 	}
933 	/*
934 	 * If the page has just become un-empty, move it to the head of
935 	 * the list, and make it the current page.  The next allocation
936 	 * will get the item from this page, instead of further fragmenting
937 	 * the pool.
938 	 */
939 	else if (ph->ph_nmissing == (pp->pr_itemsperpage - 1)) {
940 		TAILQ_REMOVE(&pp->pr_pagelist, ph, ph_pagelist);
941 		TAILQ_INSERT_HEAD(&pp->pr_pagelist, ph, ph_pagelist);
942 		pp->pr_curpage = ph;
943 	}
944 }
945 
946 /*
947  * Return resource to the pool; must be called at appropriate spl level
948  */
949 #ifdef POOL_DIAGNOSTIC
950 void
951 _pool_put(struct pool *pp, void *v, const char *file, long line)
952 {
953 
954 	simple_lock(&pp->pr_slock);
955 	pr_enter(pp, file, line);
956 
957 	pr_log(pp, v, PRLOG_PUT, file, line);
958 
959 	pool_do_put(pp, v);
960 
961 	pr_leave(pp);
962 	simple_unlock(&pp->pr_slock);
963 }
964 #undef pool_put
965 #endif /* POOL_DIAGNOSTIC */
966 
967 void
968 pool_put(struct pool *pp, void *v)
969 {
970 
971 	simple_lock(&pp->pr_slock);
972 
973 	pool_do_put(pp, v);
974 
975 	simple_unlock(&pp->pr_slock);
976 }
977 
978 #ifdef POOL_DIAGNOSTIC
979 #define		pool_put(h, v)	_pool_put((h), (v), __FILE__, __LINE__)
980 #endif
981 
982 /*
983  * Add N items to the pool.
984  */
985 int
986 pool_prime(struct pool *pp, int n)
987 {
988 	struct pool_item_header *ph;
989 	caddr_t cp;
990 	int newpages, error = 0;
991 
992 	simple_lock(&pp->pr_slock);
993 
994 	newpages = roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage;
995 
996 	while (newpages-- > 0) {
997 		simple_unlock(&pp->pr_slock);
998 		cp = (*pp->pr_alloc)(pp->pr_pagesz, PR_NOWAIT, pp->pr_mtype);
999 		if (__predict_true(cp != NULL))
1000 			ph = pool_alloc_item_header(pp, cp, PR_NOWAIT);
1001 		simple_lock(&pp->pr_slock);
1002 
1003 		if (__predict_false(cp == NULL || ph == NULL)) {
1004 			error = ENOMEM;
1005 			if (cp != NULL)
1006 				(*pp->pr_free)(cp, pp->pr_pagesz, pp->pr_mtype);
1007 			break;
1008 		}
1009 
1010 		pool_prime_page(pp, cp, ph);
1011 		pp->pr_npagealloc++;
1012 		pp->pr_minpages++;
1013 	}
1014 
1015 	if (pp->pr_minpages >= pp->pr_maxpages)
1016 		pp->pr_maxpages = pp->pr_minpages + 1;	/* XXX */
1017 
1018 	simple_unlock(&pp->pr_slock);
1019 	return (0);
1020 }
1021 
1022 /*
1023  * Add a page worth of items to the pool.
1024  *
1025  * Note, we must be called with the pool descriptor LOCKED.
1026  */
1027 static void
1028 pool_prime_page(struct pool *pp, caddr_t storage, struct pool_item_header *ph)
1029 {
1030 	struct pool_item *pi;
1031 	caddr_t cp = storage;
1032 	unsigned int align = pp->pr_align;
1033 	unsigned int ioff = pp->pr_itemoffset;
1034 	int n;
1035 
1036 	if (((u_long)cp & (pp->pr_pagesz - 1)) != 0)
1037 		panic("pool_prime_page: %s: unaligned page", pp->pr_wchan);
1038 
1039 	if ((pp->pr_roflags & PR_PHINPAGE) == 0)
1040 		LIST_INSERT_HEAD(&pp->pr_hashtab[PR_HASH_INDEX(pp, cp)],
1041 		    ph, ph_hashlist);
1042 
1043 	/*
1044 	 * Insert page header.
1045 	 */
1046 	TAILQ_INSERT_HEAD(&pp->pr_pagelist, ph, ph_pagelist);
1047 	TAILQ_INIT(&ph->ph_itemlist);
1048 	ph->ph_page = storage;
1049 	ph->ph_nmissing = 0;
1050 	memset(&ph->ph_time, 0, sizeof(ph->ph_time));
1051 
1052 	pp->pr_nidle++;
1053 
1054 	/*
1055 	 * Color this page.
1056 	 */
1057 	cp = (caddr_t)(cp + pp->pr_curcolor);
1058 	if ((pp->pr_curcolor += align) > pp->pr_maxcolor)
1059 		pp->pr_curcolor = 0;
1060 
1061 	/*
1062 	 * Adjust storage to apply aligment to `pr_itemoffset' in each item.
1063 	 */
1064 	if (ioff != 0)
1065 		cp = (caddr_t)(cp + (align - ioff));
1066 
1067 	/*
1068 	 * Insert remaining chunks on the bucket list.
1069 	 */
1070 	n = pp->pr_itemsperpage;
1071 	pp->pr_nitems += n;
1072 
1073 	while (n--) {
1074 		pi = (struct pool_item *)cp;
1075 
1076 		/* Insert on page list */
1077 		TAILQ_INSERT_TAIL(&ph->ph_itemlist, pi, pi_list);
1078 #ifdef DIAGNOSTIC
1079 		pi->pi_magic = PI_MAGIC;
1080 #endif
1081 		cp = (caddr_t)(cp + pp->pr_size);
1082 	}
1083 
1084 	/*
1085 	 * If the pool was depleted, point at the new page.
1086 	 */
1087 	if (pp->pr_curpage == NULL)
1088 		pp->pr_curpage = ph;
1089 
1090 	if (++pp->pr_npages > pp->pr_hiwat)
1091 		pp->pr_hiwat = pp->pr_npages;
1092 }
1093 
1094 /*
1095  * Used by pool_get() when nitems drops below the low water mark.  This
1096  * is used to catch up nitmes with the low water mark.
1097  *
1098  * Note 1, we never wait for memory here, we let the caller decide what to do.
1099  *
1100  * Note 2, this doesn't work with static pools.
1101  *
1102  * Note 3, we must be called with the pool already locked, and we return
1103  * with it locked.
1104  */
1105 static int
1106 pool_catchup(struct pool *pp)
1107 {
1108 	struct pool_item_header *ph;
1109 	caddr_t cp;
1110 	int error = 0;
1111 
1112 	if (pp->pr_roflags & PR_STATIC) {
1113 		/*
1114 		 * We dropped below the low water mark, and this is not a
1115 		 * good thing.  Log a warning.
1116 		 *
1117 		 * XXX: rate-limit this?
1118 		 */
1119 		printf("WARNING: static pool `%s' dropped below low water "
1120 		    "mark\n", pp->pr_wchan);
1121 		return (0);
1122 	}
1123 
1124 	while (POOL_NEEDS_CATCHUP(pp)) {
1125 		/*
1126 		 * Call the page back-end allocator for more memory.
1127 		 *
1128 		 * XXX: We never wait, so should we bother unlocking
1129 		 * the pool descriptor?
1130 		 */
1131 		simple_unlock(&pp->pr_slock);
1132 		cp = (*pp->pr_alloc)(pp->pr_pagesz, PR_NOWAIT, pp->pr_mtype);
1133 		if (__predict_true(cp != NULL))
1134 			ph = pool_alloc_item_header(pp, cp, PR_NOWAIT);
1135 		simple_lock(&pp->pr_slock);
1136 		if (__predict_false(cp == NULL || ph == NULL)) {
1137 			if (cp != NULL)
1138 				(*pp->pr_free)(cp, pp->pr_pagesz, pp->pr_mtype);
1139 			error = ENOMEM;
1140 			break;
1141 		}
1142 		pool_prime_page(pp, cp, ph);
1143 		pp->pr_npagealloc++;
1144 	}
1145 
1146 	return (error);
1147 }
1148 
1149 void
1150 pool_setlowat(struct pool *pp, int n)
1151 {
1152 	int error;
1153 
1154 	simple_lock(&pp->pr_slock);
1155 
1156 	pp->pr_minitems = n;
1157 	pp->pr_minpages = (n == 0)
1158 		? 0
1159 		: roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage;
1160 
1161 	/* Make sure we're caught up with the newly-set low water mark. */
1162 	if (POOL_NEEDS_CATCHUP(pp) && (error = pool_catchup(pp) != 0)) {
1163 		/*
1164 		 * XXX: Should we log a warning?  Should we set up a timeout
1165 		 * to try again in a second or so?  The latter could break
1166 		 * a caller's assumptions about interrupt protection, etc.
1167 		 */
1168 	}
1169 
1170 	simple_unlock(&pp->pr_slock);
1171 }
1172 
1173 void
1174 pool_sethiwat(struct pool *pp, int n)
1175 {
1176 
1177 	simple_lock(&pp->pr_slock);
1178 
1179 	pp->pr_maxpages = (n == 0)
1180 		? 0
1181 		: roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage;
1182 
1183 	simple_unlock(&pp->pr_slock);
1184 }
1185 
1186 void
1187 pool_sethardlimit(struct pool *pp, int n, const char *warnmess, int ratecap)
1188 {
1189 
1190 	simple_lock(&pp->pr_slock);
1191 
1192 	pp->pr_hardlimit = n;
1193 	pp->pr_hardlimit_warning = warnmess;
1194 	pp->pr_hardlimit_ratecap.tv_sec = ratecap;
1195 	pp->pr_hardlimit_warning_last.tv_sec = 0;
1196 	pp->pr_hardlimit_warning_last.tv_usec = 0;
1197 
1198 	/*
1199 	 * In-line version of pool_sethiwat(), because we don't want to
1200 	 * release the lock.
1201 	 */
1202 	pp->pr_maxpages = (n == 0)
1203 		? 0
1204 		: roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage;
1205 
1206 	simple_unlock(&pp->pr_slock);
1207 }
1208 
1209 /*
1210  * Default page allocator.
1211  */
1212 static void *
1213 pool_page_alloc(unsigned long sz, int flags, int mtype)
1214 {
1215 	boolean_t waitok = (flags & PR_WAITOK) ? TRUE : FALSE;
1216 
1217 	return ((void *)uvm_km_alloc_poolpage(waitok));
1218 }
1219 
1220 static void
1221 pool_page_free(void *v, unsigned long sz, int mtype)
1222 {
1223 
1224 	uvm_km_free_poolpage((vaddr_t)v);
1225 }
1226 
1227 /*
1228  * Alternate pool page allocator for pools that know they will
1229  * never be accessed in interrupt context.
1230  */
1231 void *
1232 pool_page_alloc_nointr(unsigned long sz, int flags, int mtype)
1233 {
1234 	boolean_t waitok = (flags & PR_WAITOK) ? TRUE : FALSE;
1235 
1236 	return ((void *)uvm_km_alloc_poolpage1(kernel_map, uvm.kernel_object,
1237 	    waitok));
1238 }
1239 
1240 void
1241 pool_page_free_nointr(void *v, unsigned long sz, int mtype)
1242 {
1243 
1244 	uvm_km_free_poolpage1(kernel_map, (vaddr_t)v);
1245 }
1246 
1247 
1248 /*
1249  * Release all complete pages that have not been used recently.
1250  */
1251 void
1252 #ifdef POOL_DIAGNOSTIC
1253 _pool_reclaim(struct pool *pp, const char *file, long line)
1254 #else
1255 pool_reclaim(struct pool *pp)
1256 #endif
1257 {
1258 	struct pool_item_header *ph, *phnext;
1259 	struct pool_cache *pc;
1260 	struct timeval curtime;
1261 	int s;
1262 
1263 	if (pp->pr_roflags & PR_STATIC)
1264 		return;
1265 
1266 	if (simple_lock_try(&pp->pr_slock) == 0)
1267 		return;
1268 	pr_enter(pp, file, line);
1269 
1270 	/*
1271 	 * Reclaim items from the pool's caches.
1272 	 */
1273 	for (pc = TAILQ_FIRST(&pp->pr_cachelist); pc != NULL;
1274 	     pc = TAILQ_NEXT(pc, pc_poollist))
1275 		pool_cache_reclaim(pc);
1276 
1277 	s = splclock();
1278 	curtime = mono_time;
1279 	splx(s);
1280 
1281 	for (ph = TAILQ_FIRST(&pp->pr_pagelist); ph != NULL; ph = phnext) {
1282 		phnext = TAILQ_NEXT(ph, ph_pagelist);
1283 
1284 		/* Check our minimum page claim */
1285 		if (pp->pr_npages <= pp->pr_minpages)
1286 			break;
1287 
1288 		if (ph->ph_nmissing == 0) {
1289 			struct timeval diff;
1290 			timersub(&curtime, &ph->ph_time, &diff);
1291 			if (diff.tv_sec < pool_inactive_time)
1292 				continue;
1293 
1294 			/*
1295 			 * If freeing this page would put us below
1296 			 * the low water mark, stop now.
1297 			 */
1298 			if ((pp->pr_nitems - pp->pr_itemsperpage) <
1299 			    pp->pr_minitems)
1300 				break;
1301 
1302 			pr_rmpage(pp, ph);
1303 		}
1304 	}
1305 
1306 	pr_leave(pp);
1307 	simple_unlock(&pp->pr_slock);
1308 }
1309 
1310 
1311 /*
1312  * Drain pools, one at a time.
1313  *
1314  * Note, we must never be called from an interrupt context.
1315  */
1316 void
1317 pool_drain(void *arg)
1318 {
1319 	struct pool *pp;
1320 	int s;
1321 
1322 	s = splvm();
1323 	simple_lock(&pool_head_slock);
1324 
1325 	if (drainpp == NULL && (drainpp = TAILQ_FIRST(&pool_head)) == NULL)
1326 		goto out;
1327 
1328 	pp = drainpp;
1329 	drainpp = TAILQ_NEXT(pp, pr_poollist);
1330 
1331 	pool_reclaim(pp);
1332 
1333  out:
1334 	simple_unlock(&pool_head_slock);
1335 	splx(s);
1336 }
1337 
1338 
1339 /*
1340  * Diagnostic helpers.
1341  */
1342 void
1343 pool_print(struct pool *pp, const char *modif)
1344 {
1345 	int s;
1346 
1347 	s = splvm();
1348 	if (simple_lock_try(&pp->pr_slock) == 0) {
1349 		printf("pool %s is locked; try again later\n",
1350 		    pp->pr_wchan);
1351 		splx(s);
1352 		return;
1353 	}
1354 	pool_print1(pp, modif, printf);
1355 	simple_unlock(&pp->pr_slock);
1356 	splx(s);
1357 }
1358 
1359 void
1360 pool_printit(struct pool *pp, const char *modif, void (*pr)(const char *, ...))
1361 {
1362 	int didlock = 0;
1363 
1364 	if (pp == NULL) {
1365 		(*pr)("Must specify a pool to print.\n");
1366 		return;
1367 	}
1368 
1369 	/*
1370 	 * Called from DDB; interrupts should be blocked, and all
1371 	 * other processors should be paused.  We can skip locking
1372 	 * the pool in this case.
1373 	 *
1374 	 * We do a simple_lock_try() just to print the lock
1375 	 * status, however.
1376 	 */
1377 
1378 	if (simple_lock_try(&pp->pr_slock) == 0)
1379 		(*pr)("WARNING: pool %s is locked\n", pp->pr_wchan);
1380 	else
1381 		didlock = 1;
1382 
1383 	pool_print1(pp, modif, pr);
1384 
1385 	if (didlock)
1386 		simple_unlock(&pp->pr_slock);
1387 }
1388 
1389 static void
1390 pool_print1(struct pool *pp, const char *modif, void (*pr)(const char *, ...))
1391 {
1392 	struct pool_item_header *ph;
1393 	struct pool_cache *pc;
1394 	struct pool_cache_group *pcg;
1395 #ifdef DIAGNOSTIC
1396 	struct pool_item *pi;
1397 #endif
1398 	int i, print_log = 0, print_pagelist = 0, print_cache = 0;
1399 	char c;
1400 
1401 	while ((c = *modif++) != '\0') {
1402 		if (c == 'l')
1403 			print_log = 1;
1404 		if (c == 'p')
1405 			print_pagelist = 1;
1406 		if (c == 'c')
1407 			print_cache = 1;
1408 		modif++;
1409 	}
1410 
1411 	(*pr)("POOL %s: size %u, align %u, ioff %u, roflags 0x%08x\n",
1412 	    pp->pr_wchan, pp->pr_size, pp->pr_align, pp->pr_itemoffset,
1413 	    pp->pr_roflags);
1414 	(*pr)("\tpagesz %u, mtype %d\n", pp->pr_pagesz, pp->pr_mtype);
1415 	(*pr)("\talloc %p, release %p\n", pp->pr_alloc, pp->pr_free);
1416 	(*pr)("\tminitems %u, minpages %u, maxpages %u, npages %u\n",
1417 	    pp->pr_minitems, pp->pr_minpages, pp->pr_maxpages, pp->pr_npages);
1418 	(*pr)("\titemsperpage %u, nitems %u, nout %u, hardlimit %u\n",
1419 	    pp->pr_itemsperpage, pp->pr_nitems, pp->pr_nout, pp->pr_hardlimit);
1420 
1421 	(*pr)("\n\tnget %lu, nfail %lu, nput %lu\n",
1422 	    pp->pr_nget, pp->pr_nfail, pp->pr_nput);
1423 	(*pr)("\tnpagealloc %lu, npagefree %lu, hiwat %u, nidle %lu\n",
1424 	    pp->pr_npagealloc, pp->pr_npagefree, pp->pr_hiwat, pp->pr_nidle);
1425 
1426 	if (print_pagelist == 0)
1427 		goto skip_pagelist;
1428 
1429 	if ((ph = TAILQ_FIRST(&pp->pr_pagelist)) != NULL)
1430 		(*pr)("\n\tpage list:\n");
1431 	for (; ph != NULL; ph = TAILQ_NEXT(ph, ph_pagelist)) {
1432 		(*pr)("\t\tpage %p, nmissing %d, time %lu,%lu\n",
1433 		    ph->ph_page, ph->ph_nmissing,
1434 		    (u_long)ph->ph_time.tv_sec,
1435 		    (u_long)ph->ph_time.tv_usec);
1436 #ifdef DIAGNOSTIC
1437 		for (pi = TAILQ_FIRST(&ph->ph_itemlist); pi != NULL;
1438 		     pi = TAILQ_NEXT(pi, pi_list)) {
1439 			if (pi->pi_magic != PI_MAGIC) {
1440 				(*pr)("\t\t\titem %p, magic 0x%x\n",
1441 				    pi, pi->pi_magic);
1442 			}
1443 		}
1444 #endif
1445 	}
1446 	if (pp->pr_curpage == NULL)
1447 		(*pr)("\tno current page\n");
1448 	else
1449 		(*pr)("\tcurpage %p\n", pp->pr_curpage->ph_page);
1450 
1451  skip_pagelist:
1452 
1453 	if (print_log == 0)
1454 		goto skip_log;
1455 
1456 	(*pr)("\n");
1457 	if ((pp->pr_roflags & PR_LOGGING) == 0)
1458 		(*pr)("\tno log\n");
1459 	else
1460 		pr_printlog(pp, NULL, pr);
1461 
1462  skip_log:
1463 
1464 	if (print_cache == 0)
1465 		goto skip_cache;
1466 
1467 	for (pc = TAILQ_FIRST(&pp->pr_cachelist); pc != NULL;
1468 	     pc = TAILQ_NEXT(pc, pc_poollist)) {
1469 		(*pr)("\tcache %p: allocfrom %p freeto %p\n", pc,
1470 		    pc->pc_allocfrom, pc->pc_freeto);
1471 		(*pr)("\t    hits %lu misses %lu ngroups %lu nitems %lu\n",
1472 		    pc->pc_hits, pc->pc_misses, pc->pc_ngroups, pc->pc_nitems);
1473 		for (pcg = TAILQ_FIRST(&pc->pc_grouplist); pcg != NULL;
1474 		     pcg = TAILQ_NEXT(pcg, pcg_list)) {
1475 			(*pr)("\t\tgroup %p: avail %d\n", pcg, pcg->pcg_avail);
1476 			for (i = 0; i < PCG_NOBJECTS; i++)
1477 				(*pr)("\t\t\t%p\n", pcg->pcg_objects[i]);
1478 		}
1479 	}
1480 
1481  skip_cache:
1482 
1483 	pr_enter_check(pp, pr);
1484 }
1485 
1486 int
1487 pool_chk(struct pool *pp, const char *label)
1488 {
1489 	struct pool_item_header *ph;
1490 	int r = 0;
1491 
1492 	simple_lock(&pp->pr_slock);
1493 
1494 	for (ph = TAILQ_FIRST(&pp->pr_pagelist); ph != NULL;
1495 	     ph = TAILQ_NEXT(ph, ph_pagelist)) {
1496 
1497 		struct pool_item *pi;
1498 		int n;
1499 		caddr_t page;
1500 
1501 		page = (caddr_t)((u_long)ph & pp->pr_pagemask);
1502 		if (page != ph->ph_page &&
1503 		    (pp->pr_roflags & PR_PHINPAGE) != 0) {
1504 			if (label != NULL)
1505 				printf("%s: ", label);
1506 			printf("pool(%p:%s): page inconsistency: page %p;"
1507 			       " at page head addr %p (p %p)\n", pp,
1508 				pp->pr_wchan, ph->ph_page,
1509 				ph, page);
1510 			r++;
1511 			goto out;
1512 		}
1513 
1514 		for (pi = TAILQ_FIRST(&ph->ph_itemlist), n = 0;
1515 		     pi != NULL;
1516 		     pi = TAILQ_NEXT(pi,pi_list), n++) {
1517 
1518 #ifdef DIAGNOSTIC
1519 			if (pi->pi_magic != PI_MAGIC) {
1520 				if (label != NULL)
1521 					printf("%s: ", label);
1522 				printf("pool(%s): free list modified: magic=%x;"
1523 				       " page %p; item ordinal %d;"
1524 				       " addr %p (p %p)\n",
1525 					pp->pr_wchan, pi->pi_magic, ph->ph_page,
1526 					n, pi, page);
1527 				panic("pool");
1528 			}
1529 #endif
1530 			page = (caddr_t)((u_long)pi & pp->pr_pagemask);
1531 			if (page == ph->ph_page)
1532 				continue;
1533 
1534 			if (label != NULL)
1535 				printf("%s: ", label);
1536 			printf("pool(%p:%s): page inconsistency: page %p;"
1537 			       " item ordinal %d; addr %p (p %p)\n", pp,
1538 				pp->pr_wchan, ph->ph_page,
1539 				n, pi, page);
1540 			r++;
1541 			goto out;
1542 		}
1543 	}
1544 out:
1545 	simple_unlock(&pp->pr_slock);
1546 	return (r);
1547 }
1548 
1549 /*
1550  * pool_cache_init:
1551  *
1552  *	Initialize a pool cache.
1553  *
1554  *	NOTE: If the pool must be protected from interrupts, we expect
1555  *	to be called at the appropriate interrupt priority level.
1556  */
1557 void
1558 pool_cache_init(struct pool_cache *pc, struct pool *pp,
1559     int (*ctor)(void *, void *, int),
1560     void (*dtor)(void *, void *),
1561     void *arg)
1562 {
1563 
1564 	TAILQ_INIT(&pc->pc_grouplist);
1565 	simple_lock_init(&pc->pc_slock);
1566 
1567 	pc->pc_allocfrom = NULL;
1568 	pc->pc_freeto = NULL;
1569 	pc->pc_pool = pp;
1570 
1571 	pc->pc_ctor = ctor;
1572 	pc->pc_dtor = dtor;
1573 	pc->pc_arg  = arg;
1574 
1575 	pc->pc_hits   = 0;
1576 	pc->pc_misses = 0;
1577 
1578 	pc->pc_ngroups = 0;
1579 
1580 	pc->pc_nitems = 0;
1581 
1582 	simple_lock(&pp->pr_slock);
1583 	TAILQ_INSERT_TAIL(&pp->pr_cachelist, pc, pc_poollist);
1584 	simple_unlock(&pp->pr_slock);
1585 }
1586 
1587 /*
1588  * pool_cache_destroy:
1589  *
1590  *	Destroy a pool cache.
1591  */
1592 void
1593 pool_cache_destroy(struct pool_cache *pc)
1594 {
1595 	struct pool *pp = pc->pc_pool;
1596 
1597 	/* First, invalidate the entire cache. */
1598 	pool_cache_invalidate(pc);
1599 
1600 	/* ...and remove it from the pool's cache list. */
1601 	simple_lock(&pp->pr_slock);
1602 	TAILQ_REMOVE(&pp->pr_cachelist, pc, pc_poollist);
1603 	simple_unlock(&pp->pr_slock);
1604 }
1605 
1606 static __inline void *
1607 pcg_get(struct pool_cache_group *pcg)
1608 {
1609 	void *object;
1610 	u_int idx;
1611 
1612 	KASSERT(pcg->pcg_avail <= PCG_NOBJECTS);
1613 	KASSERT(pcg->pcg_avail != 0);
1614 	idx = --pcg->pcg_avail;
1615 
1616 	KASSERT(pcg->pcg_objects[idx] != NULL);
1617 	object = pcg->pcg_objects[idx];
1618 	pcg->pcg_objects[idx] = NULL;
1619 
1620 	return (object);
1621 }
1622 
1623 static __inline void
1624 pcg_put(struct pool_cache_group *pcg, void *object)
1625 {
1626 	u_int idx;
1627 
1628 	KASSERT(pcg->pcg_avail < PCG_NOBJECTS);
1629 	idx = pcg->pcg_avail++;
1630 
1631 	KASSERT(pcg->pcg_objects[idx] == NULL);
1632 	pcg->pcg_objects[idx] = object;
1633 }
1634 
1635 /*
1636  * pool_cache_get:
1637  *
1638  *	Get an object from a pool cache.
1639  */
1640 void *
1641 pool_cache_get(struct pool_cache *pc, int flags)
1642 {
1643 	struct pool_cache_group *pcg;
1644 	void *object;
1645 
1646 #ifdef LOCKDEBUG
1647 	if (flags & PR_WAITOK)
1648 		simple_lock_only_held(NULL, "pool_cache_get(PR_WAITOK)");
1649 #endif
1650 
1651 	simple_lock(&pc->pc_slock);
1652 
1653 	if ((pcg = pc->pc_allocfrom) == NULL) {
1654 		for (pcg = TAILQ_FIRST(&pc->pc_grouplist); pcg != NULL;
1655 		     pcg = TAILQ_NEXT(pcg, pcg_list)) {
1656 			if (pcg->pcg_avail != 0) {
1657 				pc->pc_allocfrom = pcg;
1658 				goto have_group;
1659 			}
1660 		}
1661 
1662 		/*
1663 		 * No groups with any available objects.  Allocate
1664 		 * a new object, construct it, and return it to
1665 		 * the caller.  We will allocate a group, if necessary,
1666 		 * when the object is freed back to the cache.
1667 		 */
1668 		pc->pc_misses++;
1669 		simple_unlock(&pc->pc_slock);
1670 		object = pool_get(pc->pc_pool, flags);
1671 		if (object != NULL && pc->pc_ctor != NULL) {
1672 			if ((*pc->pc_ctor)(pc->pc_arg, object, flags) != 0) {
1673 				pool_put(pc->pc_pool, object);
1674 				return (NULL);
1675 			}
1676 		}
1677 		return (object);
1678 	}
1679 
1680  have_group:
1681 	pc->pc_hits++;
1682 	pc->pc_nitems--;
1683 	object = pcg_get(pcg);
1684 
1685 	if (pcg->pcg_avail == 0)
1686 		pc->pc_allocfrom = NULL;
1687 
1688 	simple_unlock(&pc->pc_slock);
1689 
1690 	return (object);
1691 }
1692 
1693 /*
1694  * pool_cache_put:
1695  *
1696  *	Put an object back to the pool cache.
1697  */
1698 void
1699 pool_cache_put(struct pool_cache *pc, void *object)
1700 {
1701 	struct pool_cache_group *pcg;
1702 
1703 	simple_lock(&pc->pc_slock);
1704 
1705 	if ((pcg = pc->pc_freeto) == NULL) {
1706 		for (pcg = TAILQ_FIRST(&pc->pc_grouplist); pcg != NULL;
1707 		     pcg = TAILQ_NEXT(pcg, pcg_list)) {
1708 			if (pcg->pcg_avail != PCG_NOBJECTS) {
1709 				pc->pc_freeto = pcg;
1710 				goto have_group;
1711 			}
1712 		}
1713 
1714 		/*
1715 		 * No empty groups to free the object to.  Attempt to
1716 		 * allocate one.
1717 		 */
1718 		simple_unlock(&pc->pc_slock);
1719 		pcg = pool_get(&pcgpool, PR_NOWAIT);
1720 		if (pcg != NULL) {
1721 			memset(pcg, 0, sizeof(*pcg));
1722 			simple_lock(&pc->pc_slock);
1723 			pc->pc_ngroups++;
1724 			TAILQ_INSERT_TAIL(&pc->pc_grouplist, pcg, pcg_list);
1725 			if (pc->pc_freeto == NULL)
1726 				pc->pc_freeto = pcg;
1727 			goto have_group;
1728 		}
1729 
1730 		/*
1731 		 * Unable to allocate a cache group; destruct the object
1732 		 * and free it back to the pool.
1733 		 */
1734 		pool_cache_destruct_object(pc, object);
1735 		return;
1736 	}
1737 
1738  have_group:
1739 	pc->pc_nitems++;
1740 	pcg_put(pcg, object);
1741 
1742 	if (pcg->pcg_avail == PCG_NOBJECTS)
1743 		pc->pc_freeto = NULL;
1744 
1745 	simple_unlock(&pc->pc_slock);
1746 }
1747 
1748 /*
1749  * pool_cache_destruct_object:
1750  *
1751  *	Force destruction of an object and its release back into
1752  *	the pool.
1753  */
1754 void
1755 pool_cache_destruct_object(struct pool_cache *pc, void *object)
1756 {
1757 
1758 	if (pc->pc_dtor != NULL)
1759 		(*pc->pc_dtor)(pc->pc_arg, object);
1760 	pool_put(pc->pc_pool, object);
1761 }
1762 
1763 /*
1764  * pool_cache_do_invalidate:
1765  *
1766  *	This internal function implements pool_cache_invalidate() and
1767  *	pool_cache_reclaim().
1768  */
1769 static void
1770 pool_cache_do_invalidate(struct pool_cache *pc, int free_groups,
1771     void (*putit)(struct pool *, void *))
1772 {
1773 	struct pool_cache_group *pcg, *npcg;
1774 	void *object;
1775 
1776 	for (pcg = TAILQ_FIRST(&pc->pc_grouplist); pcg != NULL;
1777 	     pcg = npcg) {
1778 		npcg = TAILQ_NEXT(pcg, pcg_list);
1779 		while (pcg->pcg_avail != 0) {
1780 			pc->pc_nitems--;
1781 			object = pcg_get(pcg);
1782 			if (pcg->pcg_avail == 0 && pc->pc_allocfrom == pcg)
1783 				pc->pc_allocfrom = NULL;
1784 			if (pc->pc_dtor != NULL)
1785 				(*pc->pc_dtor)(pc->pc_arg, object);
1786 			(*putit)(pc->pc_pool, object);
1787 		}
1788 		if (free_groups) {
1789 			pc->pc_ngroups--;
1790 			TAILQ_REMOVE(&pc->pc_grouplist, pcg, pcg_list);
1791 			if (pc->pc_freeto == pcg)
1792 				pc->pc_freeto = NULL;
1793 			pool_put(&pcgpool, pcg);
1794 		}
1795 	}
1796 }
1797 
1798 /*
1799  * pool_cache_invalidate:
1800  *
1801  *	Invalidate a pool cache (destruct and release all of the
1802  *	cached objects).
1803  */
1804 void
1805 pool_cache_invalidate(struct pool_cache *pc)
1806 {
1807 
1808 	simple_lock(&pc->pc_slock);
1809 	pool_cache_do_invalidate(pc, 0, pool_put);
1810 	simple_unlock(&pc->pc_slock);
1811 }
1812 
1813 /*
1814  * pool_cache_reclaim:
1815  *
1816  *	Reclaim a pool cache for pool_reclaim().
1817  */
1818 static void
1819 pool_cache_reclaim(struct pool_cache *pc)
1820 {
1821 
1822 	simple_lock(&pc->pc_slock);
1823 	pool_cache_do_invalidate(pc, 1, pool_do_put);
1824 	simple_unlock(&pc->pc_slock);
1825 }
1826