1 /* $OpenBSD: subr_pool.c,v 1.12 2001/08/07 21:02:22 art Exp $ */ 2 /* $NetBSD: subr_pool.c,v 1.59 2001/06/05 18:51:04 thorpej Exp $ */ 3 4 /*- 5 * Copyright (c) 1997, 1999, 2000 The NetBSD Foundation, Inc. 6 * All rights reserved. 7 * 8 * This code is derived from software contributed to The NetBSD Foundation 9 * by Paul Kranenburg; by Jason R. Thorpe of the Numerical Aerospace 10 * Simulation Facility, NASA Ames Research Center. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 3. All advertising materials mentioning features or use of this software 21 * must display the following acknowledgement: 22 * This product includes software developed by the NetBSD 23 * Foundation, Inc. and its contributors. 24 * 4. Neither the name of The NetBSD Foundation nor the names of its 25 * contributors may be used to endorse or promote products derived 26 * from this software without specific prior written permission. 27 * 28 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 29 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 30 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 31 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 32 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 33 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 34 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 35 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 36 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 37 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 38 * POSSIBILITY OF SUCH DAMAGE. 39 */ 40 41 #include <sys/param.h> 42 #include <sys/systm.h> 43 #include <sys/proc.h> 44 #include <sys/errno.h> 45 #include <sys/kernel.h> 46 #include <sys/malloc.h> 47 #include <sys/lock.h> 48 #include <sys/pool.h> 49 #include <sys/syslog.h> 50 #include <sys/sysctl.h> 51 52 #include <vm/vm.h> 53 #include <vm/vm_kern.h> 54 55 #include <uvm/uvm.h> 56 57 /* 58 * XXX - for now. 59 */ 60 #define SIMPLELOCK_INITIALIZER { SLOCK_UNLOCKED } 61 #ifdef LOCKDEBUG 62 #define simple_lock_freecheck(a, s) do { /* nothing */ } while (0) 63 #define simple_lock_only_held(lkp, str) do { /* nothing */ } while (0) 64 #endif 65 #define LOCK_ASSERT(x) /* nothing */ 66 67 /* 68 * Pool resource management utility. 69 * 70 * Memory is allocated in pages which are split into pieces according 71 * to the pool item size. Each page is kept on a list headed by `pr_pagelist' 72 * in the pool structure and the individual pool items are on a linked list 73 * headed by `ph_itemlist' in each page header. The memory for building 74 * the page list is either taken from the allocated pages themselves (for 75 * small pool items) or taken from an internal pool of page headers (`phpool'). 76 */ 77 78 /* List of all pools */ 79 TAILQ_HEAD(,pool) pool_head = TAILQ_HEAD_INITIALIZER(pool_head); 80 81 /* Private pool for page header structures */ 82 static struct pool phpool; 83 84 /* # of seconds to retain page after last use */ 85 int pool_inactive_time = 10; 86 87 /* Next candidate for drainage (see pool_drain()) */ 88 static struct pool *drainpp; 89 90 /* This spin lock protects both pool_head and drainpp. */ 91 struct simplelock pool_head_slock = SIMPLELOCK_INITIALIZER; 92 93 struct pool_item_header { 94 /* Page headers */ 95 TAILQ_ENTRY(pool_item_header) 96 ph_pagelist; /* pool page list */ 97 TAILQ_HEAD(,pool_item) ph_itemlist; /* chunk list for this page */ 98 LIST_ENTRY(pool_item_header) 99 ph_hashlist; /* Off-page page headers */ 100 int ph_nmissing; /* # of chunks in use */ 101 caddr_t ph_page; /* this page's address */ 102 struct timeval ph_time; /* last referenced */ 103 }; 104 105 struct pool_item { 106 #ifdef DIAGNOSTIC 107 int pi_magic; 108 #endif 109 #define PI_MAGIC 0xdeadbeef 110 /* Other entries use only this list entry */ 111 TAILQ_ENTRY(pool_item) pi_list; 112 }; 113 114 115 #define PR_HASH_INDEX(pp,addr) \ 116 (((u_long)(addr) >> (pp)->pr_pageshift) & (PR_HASHTABSIZE - 1)) 117 118 #define POOL_NEEDS_CATCHUP(pp) \ 119 ((pp)->pr_nitems < (pp)->pr_minitems) 120 121 /* 122 * Every pool get a unique serial number assigned to it. If this counter 123 * wraps, we're screwed, but we shouldn't create so many pools anyway. 124 */ 125 unsigned int pool_serial; 126 127 /* 128 * Pool cache management. 129 * 130 * Pool caches provide a way for constructed objects to be cached by the 131 * pool subsystem. This can lead to performance improvements by avoiding 132 * needless object construction/destruction; it is deferred until absolutely 133 * necessary. 134 * 135 * Caches are grouped into cache groups. Each cache group references 136 * up to 16 constructed objects. When a cache allocates an object 137 * from the pool, it calls the object's constructor and places it into 138 * a cache group. When a cache group frees an object back to the pool, 139 * it first calls the object's destructor. This allows the object to 140 * persist in constructed form while freed to the cache. 141 * 142 * Multiple caches may exist for each pool. This allows a single 143 * object type to have multiple constructed forms. The pool references 144 * each cache, so that when a pool is drained by the pagedaemon, it can 145 * drain each individual cache as well. Each time a cache is drained, 146 * the most idle cache group is freed to the pool in its entirety. 147 * 148 * Pool caches are layed on top of pools. By layering them, we can avoid 149 * the complexity of cache management for pools which would not benefit 150 * from it. 151 */ 152 153 /* The cache group pool. */ 154 static struct pool pcgpool; 155 156 /* The pool cache group. */ 157 #define PCG_NOBJECTS 16 158 struct pool_cache_group { 159 TAILQ_ENTRY(pool_cache_group) 160 pcg_list; /* link in the pool cache's group list */ 161 u_int pcg_avail; /* # available objects */ 162 /* pointers to the objects */ 163 void *pcg_objects[PCG_NOBJECTS]; 164 }; 165 166 static void pool_cache_reclaim(struct pool_cache *); 167 168 static int pool_catchup(struct pool *); 169 static void pool_prime_page(struct pool *, caddr_t, 170 struct pool_item_header *); 171 static void *pool_page_alloc(unsigned long, int, int); 172 static void pool_page_free(void *, unsigned long, int); 173 174 static void pool_print1(struct pool *, const char *, 175 int (*)(const char *, ...)); 176 177 /* 178 * Pool log entry. An array of these is allocated in pool_init(). 179 */ 180 struct pool_log { 181 const char *pl_file; 182 long pl_line; 183 int pl_action; 184 #define PRLOG_GET 1 185 #define PRLOG_PUT 2 186 void *pl_addr; 187 }; 188 189 /* Number of entries in pool log buffers */ 190 #ifndef POOL_LOGSIZE 191 #define POOL_LOGSIZE 10 192 #endif 193 194 int pool_logsize = POOL_LOGSIZE; 195 196 #ifdef POOL_DIAGNOSTIC 197 static __inline void 198 pr_log(struct pool *pp, void *a, int action, const char *file, long line) 199 { 200 int n = pp->pr_curlogentry; 201 struct pool_log *pl; 202 203 if ((pp->pr_roflags & PR_LOGGING) == 0) 204 return; 205 206 /* 207 * Fill in the current entry. Wrap around and overwrite 208 * the oldest entry if necessary. 209 */ 210 pl = &pp->pr_log[n]; 211 pl->pl_file = file; 212 pl->pl_line = line; 213 pl->pl_action = action; 214 pl->pl_addr = v; 215 if (++n >= pp->pr_logsize) 216 n = 0; 217 pp->pr_curlogentry = n; 218 } 219 220 static void 221 pr_printlog(struct pool *pp, struct pool_item *pi, 222 int (*pr)(const char *, ...)) 223 { 224 int i = pp->pr_logsize; 225 int n = pp->pr_curlogentry; 226 227 if ((pp->pr_roflags & PR_LOGGING) == 0) 228 return; 229 230 /* 231 * Print all entries in this pool's log. 232 */ 233 while (i-- > 0) { 234 struct pool_log *pl = &pp->pr_log[n]; 235 if (pl->pl_action != 0) { 236 if (pi == NULL || pi == pl->pl_addr) { 237 (*pr)("\tlog entry %d:\n", i); 238 (*pr)("\t\taction = %s, addr = %p\n", 239 pl->pl_action == PRLOG_GET ? "get" : "put", 240 pl->pl_addr); 241 (*pr)("\t\tfile: %s at line %lu\n", 242 pl->pl_file, pl->pl_line); 243 } 244 } 245 if (++n >= pp->pr_logsize) 246 n = 0; 247 } 248 } 249 250 static __inline void 251 pr_enter(struct pool *pp, const char *file, long line) 252 { 253 254 if (__predict_false(pp->pr_entered_file != NULL)) { 255 printf("pool %s: reentrancy at file %s line %ld\n", 256 pp->pr_wchan, file, line); 257 printf(" previous entry at file %s line %ld\n", 258 pp->pr_entered_file, pp->pr_entered_line); 259 panic("pr_enter"); 260 } 261 262 pp->pr_entered_file = file; 263 pp->pr_entered_line = line; 264 } 265 266 static __inline void 267 pr_leave(struct pool *pp) 268 { 269 270 if (__predict_false(pp->pr_entered_file == NULL)) { 271 printf("pool %s not entered?\n", pp->pr_wchan); 272 panic("pr_leave"); 273 } 274 275 pp->pr_entered_file = NULL; 276 pp->pr_entered_line = 0; 277 } 278 279 static __inline__ void 280 pr_enter_check(struct pool *pp, int (*pr)(const char *, ...)) 281 { 282 283 if (pp->pr_entered_file != NULL) 284 (*pr)("\n\tcurrently entered from file %s line %ld\n", 285 pp->pr_entered_file, pp->pr_entered_line); 286 } 287 #else 288 #define pr_log(pp, v, action, file, line) 289 #define pr_printlog(pp, pi, pr) 290 #define pr_enter(pp, file, line) 291 #define pr_leave(pp) 292 #define pr_enter_check(pp, pr) 293 #endif /* POOL_DIAGNOSTIC */ 294 295 /* 296 * Return the pool page header based on page address. 297 */ 298 static __inline struct pool_item_header * 299 pr_find_pagehead(struct pool *pp, caddr_t page) 300 { 301 struct pool_item_header *ph; 302 303 if ((pp->pr_roflags & PR_PHINPAGE) != 0) 304 return ((struct pool_item_header *)(page + pp->pr_phoffset)); 305 306 for (ph = LIST_FIRST(&pp->pr_hashtab[PR_HASH_INDEX(pp, page)]); 307 ph != NULL; 308 ph = LIST_NEXT(ph, ph_hashlist)) { 309 if (ph->ph_page == page) 310 return (ph); 311 } 312 return (NULL); 313 } 314 315 /* 316 * Remove a page from the pool. 317 */ 318 static __inline void 319 pr_rmpage(struct pool *pp, struct pool_item_header *ph) 320 { 321 322 /* 323 * If the page was idle, decrement the idle page count. 324 */ 325 if (ph->ph_nmissing == 0) { 326 #ifdef DIAGNOSTIC 327 if (pp->pr_nidle == 0) 328 panic("pr_rmpage: nidle inconsistent"); 329 if (pp->pr_nitems < pp->pr_itemsperpage) 330 panic("pr_rmpage: nitems inconsistent"); 331 #endif 332 pp->pr_nidle--; 333 } 334 335 pp->pr_nitems -= pp->pr_itemsperpage; 336 337 /* 338 * Unlink a page from the pool and release it. 339 */ 340 TAILQ_REMOVE(&pp->pr_pagelist, ph, ph_pagelist); 341 (*pp->pr_free)(ph->ph_page, pp->pr_pagesz, pp->pr_mtype); 342 pp->pr_npages--; 343 pp->pr_npagefree++; 344 345 if ((pp->pr_roflags & PR_PHINPAGE) == 0) { 346 int s; 347 LIST_REMOVE(ph, ph_hashlist); 348 s = splhigh(); 349 pool_put(&phpool, ph); 350 splx(s); 351 } 352 353 if (pp->pr_curpage == ph) { 354 /* 355 * Find a new non-empty page header, if any. 356 * Start search from the page head, to increase the 357 * chance for "high water" pages to be freed. 358 */ 359 for (ph = TAILQ_FIRST(&pp->pr_pagelist); ph != NULL; 360 ph = TAILQ_NEXT(ph, ph_pagelist)) 361 if (TAILQ_FIRST(&ph->ph_itemlist) != NULL) 362 break; 363 364 pp->pr_curpage = ph; 365 } 366 } 367 368 /* 369 * Initialize the given pool resource structure. 370 * 371 * We export this routine to allow other kernel parts to declare 372 * static pools that must be initialized before malloc() is available. 373 */ 374 void 375 pool_init(struct pool *pp, size_t size, u_int align, u_int ioff, int flags, 376 const char *wchan, size_t pagesz, 377 void *(*alloc)(unsigned long, int, int), 378 void (*release)(void *, unsigned long, int), 379 int mtype) 380 { 381 int off, slack, i; 382 383 #ifdef POOL_DIAGNOSTIC 384 /* 385 * Always log if POOL_DIAGNOSTIC is defined. 386 */ 387 if (pool_logsize != 0) 388 flags |= PR_LOGGING; 389 #endif 390 391 /* 392 * Check arguments and construct default values. 393 */ 394 if (!powerof2(pagesz)) 395 panic("pool_init: page size invalid (%lx)\n", (u_long)pagesz); 396 397 if (alloc == NULL && release == NULL) { 398 alloc = pool_page_alloc; 399 release = pool_page_free; 400 pagesz = PAGE_SIZE; /* Rounds to PAGE_SIZE anyhow. */ 401 } else if ((alloc != NULL && release != NULL) == 0) { 402 /* If you specifiy one, must specify both. */ 403 panic("pool_init: must specify alloc and release together"); 404 } 405 406 if (pagesz == 0) 407 pagesz = PAGE_SIZE; 408 409 if (align == 0) 410 align = ALIGN(1); 411 412 if (size < sizeof(struct pool_item)) 413 size = sizeof(struct pool_item); 414 415 size = ALIGN(size); 416 if (size > pagesz) 417 panic("pool_init: pool item size (%lu) too large", 418 (u_long)size); 419 420 /* 421 * Initialize the pool structure. 422 */ 423 TAILQ_INIT(&pp->pr_pagelist); 424 TAILQ_INIT(&pp->pr_cachelist); 425 pp->pr_curpage = NULL; 426 pp->pr_npages = 0; 427 pp->pr_minitems = 0; 428 pp->pr_minpages = 0; 429 pp->pr_maxpages = UINT_MAX; 430 pp->pr_roflags = flags; 431 pp->pr_flags = 0; 432 pp->pr_size = size; 433 pp->pr_align = align; 434 pp->pr_wchan = wchan; 435 pp->pr_mtype = mtype; 436 pp->pr_alloc = alloc; 437 pp->pr_free = release; 438 pp->pr_pagesz = pagesz; 439 pp->pr_pagemask = ~(pagesz - 1); 440 pp->pr_pageshift = ffs(pagesz) - 1; 441 pp->pr_nitems = 0; 442 pp->pr_nout = 0; 443 pp->pr_hardlimit = UINT_MAX; 444 pp->pr_hardlimit_warning = NULL; 445 pp->pr_hardlimit_ratecap.tv_sec = 0; 446 pp->pr_hardlimit_ratecap.tv_usec = 0; 447 pp->pr_hardlimit_warning_last.tv_sec = 0; 448 pp->pr_hardlimit_warning_last.tv_usec = 0; 449 pp->pr_serial = ++pool_serial; 450 if (pool_serial == 0) 451 panic("pool_init: too much uptime"); 452 453 /* 454 * Decide whether to put the page header off page to avoid 455 * wasting too large a part of the page. Off-page page headers 456 * go on a hash table, so we can match a returned item 457 * with its header based on the page address. 458 * We use 1/16 of the page size as the threshold (XXX: tune) 459 */ 460 if (pp->pr_size < pagesz/16) { 461 /* Use the end of the page for the page header */ 462 pp->pr_roflags |= PR_PHINPAGE; 463 pp->pr_phoffset = off = 464 pagesz - ALIGN(sizeof(struct pool_item_header)); 465 } else { 466 /* The page header will be taken from our page header pool */ 467 pp->pr_phoffset = 0; 468 off = pagesz; 469 for (i = 0; i < PR_HASHTABSIZE; i++) { 470 LIST_INIT(&pp->pr_hashtab[i]); 471 } 472 } 473 474 /* 475 * Alignment is to take place at `ioff' within the item. This means 476 * we must reserve up to `align - 1' bytes on the page to allow 477 * appropriate positioning of each item. 478 * 479 * Silently enforce `0 <= ioff < align'. 480 */ 481 pp->pr_itemoffset = ioff = ioff % align; 482 pp->pr_itemsperpage = (off - ((align - ioff) % align)) / pp->pr_size; 483 KASSERT(pp->pr_itemsperpage != 0); 484 485 /* 486 * Use the slack between the chunks and the page header 487 * for "cache coloring". 488 */ 489 slack = off - pp->pr_itemsperpage * pp->pr_size; 490 pp->pr_maxcolor = (slack / align) * align; 491 pp->pr_curcolor = 0; 492 493 pp->pr_nget = 0; 494 pp->pr_nfail = 0; 495 pp->pr_nput = 0; 496 pp->pr_npagealloc = 0; 497 pp->pr_npagefree = 0; 498 pp->pr_hiwat = 0; 499 pp->pr_nidle = 0; 500 501 #ifdef POOL_DIAGNOSTIC 502 if (flags & PR_LOGGING) { 503 if (kmem_map == NULL || 504 (pp->pr_log = malloc(pool_logsize * sizeof(struct pool_log), 505 M_TEMP, M_NOWAIT)) == NULL) 506 pp->pr_roflags &= ~PR_LOGGING; 507 pp->pr_curlogentry = 0; 508 pp->pr_logsize = pool_logsize; 509 } 510 #endif 511 512 pp->pr_entered_file = NULL; 513 pp->pr_entered_line = 0; 514 515 simple_lock_init(&pp->pr_slock); 516 517 /* 518 * Initialize private page header pool and cache magazine pool if we 519 * haven't done so yet. 520 * XXX LOCKING. 521 */ 522 if (phpool.pr_size == 0) { 523 pool_init(&phpool, sizeof(struct pool_item_header), 0, 0, 524 0, "phpool", 0, 0, 0, 0); 525 pool_init(&pcgpool, sizeof(struct pool_cache_group), 0, 0, 526 0, "pcgpool", 0, 0, 0, 0); 527 } 528 529 /* Insert into the list of all pools. */ 530 simple_lock(&pool_head_slock); 531 TAILQ_INSERT_TAIL(&pool_head, pp, pr_poollist); 532 simple_unlock(&pool_head_slock); 533 } 534 535 /* 536 * De-commision a pool resource. 537 */ 538 void 539 pool_destroy(struct pool *pp) 540 { 541 struct pool_item_header *ph; 542 struct pool_cache *pc; 543 544 /* Destroy all caches for this pool. */ 545 while ((pc = TAILQ_FIRST(&pp->pr_cachelist)) != NULL) 546 pool_cache_destroy(pc); 547 548 #ifdef DIAGNOSTIC 549 if (pp->pr_nout != 0) { 550 pr_printlog(pp, NULL, printf); 551 panic("pool_destroy: pool busy: still out: %u\n", 552 pp->pr_nout); 553 } 554 #endif 555 556 /* Remove all pages */ 557 if ((pp->pr_roflags & PR_STATIC) == 0) 558 while ((ph = pp->pr_pagelist.tqh_first) != NULL) 559 pr_rmpage(pp, ph); 560 561 /* Remove from global pool list */ 562 simple_lock(&pool_head_slock); 563 TAILQ_REMOVE(&pool_head, pp, pr_poollist); 564 /* XXX Only clear this if we were drainpp? */ 565 drainpp = NULL; 566 simple_unlock(&pool_head_slock); 567 568 #ifdef POOL_DIAGNOSTIC 569 if ((pp->pr_roflags & PR_LOGGING) != 0) 570 free(pp->pr_log, M_TEMP); 571 #endif 572 573 if (pp->pr_roflags & PR_FREEHEADER) 574 free(pp, M_POOL); 575 } 576 577 static __inline struct pool_item_header * 578 pool_alloc_item_header(struct pool *pp, caddr_t storage, int flags) 579 { 580 struct pool_item_header *ph; 581 int s; 582 583 LOCK_ASSERT(simple_lock_held(&pp->pr_slock) == 0); 584 585 if ((pp->pr_roflags & PR_PHINPAGE) != 0) 586 ph = (struct pool_item_header *) (storage + pp->pr_phoffset); 587 else { 588 s = splhigh(); 589 ph = pool_get(&phpool, flags); 590 splx(s); 591 } 592 593 return (ph); 594 } 595 596 /* 597 * Grab an item from the pool; must be called at appropriate spl level 598 */ 599 void * 600 #ifdef POOL_DIAGNOSTIC 601 _pool_get(struct pool *pp, int flags, const char *file, long line) 602 #else 603 pool_get(struct pool *pp, int flags) 604 #endif 605 { 606 struct pool_item *pi; 607 struct pool_item_header *ph; 608 void *v; 609 610 #ifdef DIAGNOSTIC 611 if (__predict_false((pp->pr_roflags & PR_STATIC) && 612 (flags & PR_MALLOCOK))) { 613 pr_printlog(pp, NULL, printf); 614 panic("pool_get: static"); 615 } 616 617 if (__predict_false(curproc == NULL && /* doing_shutdown == 0 && XXX*/ 618 (flags & PR_WAITOK) != 0)) 619 panic("pool_get: must have NOWAIT"); 620 621 #endif 622 simple_lock(&pp->pr_slock); 623 pr_enter(pp, file, line); 624 625 startover: 626 /* 627 * Check to see if we've reached the hard limit. If we have, 628 * and we can wait, then wait until an item has been returned to 629 * the pool. 630 */ 631 #ifdef DIAGNOSTIC 632 if (__predict_false(pp->pr_nout > pp->pr_hardlimit)) { 633 pr_leave(pp); 634 simple_unlock(&pp->pr_slock); 635 panic("pool_get: %s: crossed hard limit", pp->pr_wchan); 636 } 637 #endif 638 if (__predict_false(pp->pr_nout == pp->pr_hardlimit)) { 639 if ((flags & PR_WAITOK) && !(flags & PR_LIMITFAIL)) { 640 /* 641 * XXX: A warning isn't logged in this case. Should 642 * it be? 643 */ 644 pp->pr_flags |= PR_WANTED; 645 pr_leave(pp); 646 simple_unlock(&pp->pr_slock); 647 tsleep((caddr_t)pp, PSWP, (char *)pp->pr_wchan, 0); 648 simple_lock(&pp->pr_slock); 649 pr_enter(pp, file, line); 650 goto startover; 651 } 652 653 /* 654 * Log a message that the hard limit has been hit. 655 */ 656 if (pp->pr_hardlimit_warning != NULL && 657 ratecheck(&pp->pr_hardlimit_warning_last, 658 &pp->pr_hardlimit_ratecap)) 659 log(LOG_ERR, "%s\n", pp->pr_hardlimit_warning); 660 661 if (flags & PR_URGENT) 662 panic("pool_get: urgent"); 663 664 pp->pr_nfail++; 665 666 pr_leave(pp); 667 simple_unlock(&pp->pr_slock); 668 return (NULL); 669 } 670 671 /* 672 * The convention we use is that if `curpage' is not NULL, then 673 * it points at a non-empty bucket. In particular, `curpage' 674 * never points at a page header which has PR_PHINPAGE set and 675 * has no items in its bucket. 676 */ 677 if ((ph = pp->pr_curpage) == NULL) { 678 #ifdef DIAGNOSTIC 679 if (pp->pr_nitems != 0) { 680 simple_unlock(&pp->pr_slock); 681 printf("pool_get: %s: curpage NULL, nitems %u\n", 682 pp->pr_wchan, pp->pr_nitems); 683 panic("pool_get: nitems inconsistent\n"); 684 } 685 #endif 686 687 /* 688 * Call the back-end page allocator for more memory. 689 * Release the pool lock, as the back-end page allocator 690 * may block. 691 */ 692 pr_leave(pp); 693 simple_unlock(&pp->pr_slock); 694 v = (*pp->pr_alloc)(pp->pr_pagesz, flags, pp->pr_mtype); 695 if (__predict_true(v != NULL)) 696 ph = pool_alloc_item_header(pp, v, flags); 697 simple_lock(&pp->pr_slock); 698 pr_enter(pp, file, line); 699 700 if (__predict_false(v == NULL || ph == NULL)) { 701 if (v != NULL) 702 (*pp->pr_free)(v, pp->pr_pagesz, pp->pr_mtype); 703 704 /* 705 * We were unable to allocate a page or item 706 * header, but we released the lock during 707 * allocation, so perhaps items were freed 708 * back to the pool. Check for this case. 709 */ 710 if (pp->pr_curpage != NULL) 711 goto startover; 712 713 if (flags & PR_URGENT) 714 panic("pool_get: urgent"); 715 716 if ((flags & PR_WAITOK) == 0) { 717 pp->pr_nfail++; 718 pr_leave(pp); 719 simple_unlock(&pp->pr_slock); 720 return (NULL); 721 } 722 723 /* 724 * Wait for items to be returned to this pool. 725 * 726 * XXX: we actually want to wait just until 727 * the page allocator has memory again. Depending 728 * on this pool's usage, we might get stuck here 729 * for a long time. 730 * 731 * XXX: maybe we should wake up once a second and 732 * try again? 733 */ 734 pp->pr_flags |= PR_WANTED; 735 pr_leave(pp); 736 simple_unlock(&pp->pr_slock); 737 tsleep((caddr_t)pp, PSWP, (char *)pp->pr_wchan, 0); 738 simple_lock(&pp->pr_slock); 739 pr_enter(pp, file, line); 740 goto startover; 741 } 742 743 /* We have more memory; add it to the pool */ 744 pp->pr_npagealloc++; 745 pool_prime_page(pp, v, ph); 746 747 /* Start the allocation process over. */ 748 goto startover; 749 } 750 751 if (__predict_false((v = pi = TAILQ_FIRST(&ph->ph_itemlist)) == NULL)) { 752 pr_leave(pp); 753 simple_unlock(&pp->pr_slock); 754 panic("pool_get: %s: page empty", pp->pr_wchan); 755 } 756 #ifdef DIAGNOSTIC 757 if (__predict_false(pp->pr_nitems == 0)) { 758 pr_leave(pp); 759 simple_unlock(&pp->pr_slock); 760 printf("pool_get: %s: items on itemlist, nitems %u\n", 761 pp->pr_wchan, pp->pr_nitems); 762 panic("pool_get: nitems inconsistent\n"); 763 } 764 765 pr_log(pp, v, PRLOG_GET, file, line); 766 767 if (__predict_false(pi->pi_magic != PI_MAGIC)) { 768 pr_printlog(pp, pi, printf); 769 panic("pool_get(%s): free list modified: magic=%x; page %p;" 770 " item addr %p\n", 771 pp->pr_wchan, pi->pi_magic, ph->ph_page, pi); 772 } 773 #endif 774 775 /* 776 * Remove from item list. 777 */ 778 TAILQ_REMOVE(&ph->ph_itemlist, pi, pi_list); 779 pp->pr_nitems--; 780 pp->pr_nout++; 781 if (ph->ph_nmissing == 0) { 782 #ifdef DIAGNOSTIC 783 if (__predict_false(pp->pr_nidle == 0)) 784 panic("pool_get: nidle inconsistent"); 785 #endif 786 pp->pr_nidle--; 787 } 788 ph->ph_nmissing++; 789 if (TAILQ_FIRST(&ph->ph_itemlist) == NULL) { 790 #ifdef DIAGNOSTIC 791 if (__predict_false(ph->ph_nmissing != pp->pr_itemsperpage)) { 792 pr_leave(pp); 793 simple_unlock(&pp->pr_slock); 794 panic("pool_get: %s: nmissing inconsistent", 795 pp->pr_wchan); 796 } 797 #endif 798 /* 799 * Find a new non-empty page header, if any. 800 * Start search from the page head, to increase 801 * the chance for "high water" pages to be freed. 802 * 803 * Migrate empty pages to the end of the list. This 804 * will speed the update of curpage as pages become 805 * idle. Empty pages intermingled with idle pages 806 * is no big deal. As soon as a page becomes un-empty, 807 * it will move back to the head of the list. 808 */ 809 TAILQ_REMOVE(&pp->pr_pagelist, ph, ph_pagelist); 810 TAILQ_INSERT_TAIL(&pp->pr_pagelist, ph, ph_pagelist); 811 for (ph = TAILQ_FIRST(&pp->pr_pagelist); ph != NULL; 812 ph = TAILQ_NEXT(ph, ph_pagelist)) 813 if (TAILQ_FIRST(&ph->ph_itemlist) != NULL) 814 break; 815 816 pp->pr_curpage = ph; 817 } 818 819 pp->pr_nget++; 820 821 /* 822 * If we have a low water mark and we are now below that low 823 * water mark, add more items to the pool. 824 */ 825 if (POOL_NEEDS_CATCHUP(pp) && pool_catchup(pp) != 0) { 826 /* 827 * XXX: Should we log a warning? Should we set up a timeout 828 * to try again in a second or so? The latter could break 829 * a caller's assumptions about interrupt protection, etc. 830 */ 831 } 832 833 pr_leave(pp); 834 simple_unlock(&pp->pr_slock); 835 return (v); 836 } 837 838 /* 839 * Internal version of pool_put(). Pool is already locked/entered. 840 */ 841 static void 842 pool_do_put(struct pool *pp, void *v) 843 { 844 struct pool_item *pi = v; 845 struct pool_item_header *ph; 846 caddr_t page; 847 int s; 848 849 page = (caddr_t)((u_long)v & pp->pr_pagemask); 850 851 #ifdef DIAGNOSTIC 852 if (__predict_false(pp->pr_nout == 0)) { 853 printf("pool %s: putting with none out\n", 854 pp->pr_wchan); 855 panic("pool_put"); 856 } 857 #endif 858 859 if (__predict_false((ph = pr_find_pagehead(pp, page)) == NULL)) { 860 pr_printlog(pp, NULL, printf); 861 panic("pool_put: %s: page header missing", pp->pr_wchan); 862 } 863 864 #ifdef LOCKDEBUG 865 /* 866 * Check if we're freeing a locked simple lock. 867 */ 868 simple_lock_freecheck((caddr_t)pi, ((caddr_t)pi) + pp->pr_size); 869 #endif 870 871 /* 872 * Return to item list. 873 */ 874 #ifdef DIAGNOSTIC 875 pi->pi_magic = PI_MAGIC; 876 #endif 877 #ifdef DEBUG 878 { 879 int i, *ip = v; 880 881 for (i = 0; i < pp->pr_size / sizeof(int); i++) { 882 *ip++ = PI_MAGIC; 883 } 884 } 885 #endif 886 887 TAILQ_INSERT_HEAD(&ph->ph_itemlist, pi, pi_list); 888 ph->ph_nmissing--; 889 pp->pr_nput++; 890 pp->pr_nitems++; 891 pp->pr_nout--; 892 893 /* Cancel "pool empty" condition if it exists */ 894 if (pp->pr_curpage == NULL) 895 pp->pr_curpage = ph; 896 897 if (pp->pr_flags & PR_WANTED) { 898 pp->pr_flags &= ~PR_WANTED; 899 if (ph->ph_nmissing == 0) 900 pp->pr_nidle++; 901 wakeup((caddr_t)pp); 902 return; 903 } 904 905 /* 906 * If this page is now complete, do one of two things: 907 * 908 * (1) If we have more pages than the page high water 909 * mark, free the page back to the system. 910 * 911 * (2) Move it to the end of the page list, so that 912 * we minimize our chances of fragmenting the 913 * pool. Idle pages migrate to the end (along with 914 * completely empty pages, so that we find un-empty 915 * pages more quickly when we update curpage) of the 916 * list so they can be more easily swept up by 917 * the pagedaemon when pages are scarce. 918 */ 919 if (ph->ph_nmissing == 0) { 920 pp->pr_nidle++; 921 if (pp->pr_npages > pp->pr_maxpages) { 922 pr_rmpage(pp, ph); 923 } else { 924 TAILQ_REMOVE(&pp->pr_pagelist, ph, ph_pagelist); 925 TAILQ_INSERT_TAIL(&pp->pr_pagelist, ph, ph_pagelist); 926 927 /* 928 * Update the timestamp on the page. A page must 929 * be idle for some period of time before it can 930 * be reclaimed by the pagedaemon. This minimizes 931 * ping-pong'ing for memory. 932 */ 933 s = splclock(); 934 ph->ph_time = mono_time; 935 splx(s); 936 937 /* 938 * Update the current page pointer. Just look for 939 * the first page with any free items. 940 * 941 * XXX: Maybe we want an option to look for the 942 * page with the fewest available items, to minimize 943 * fragmentation? 944 */ 945 for (ph = TAILQ_FIRST(&pp->pr_pagelist); ph != NULL; 946 ph = TAILQ_NEXT(ph, ph_pagelist)) 947 if (TAILQ_FIRST(&ph->ph_itemlist) != NULL) 948 break; 949 950 pp->pr_curpage = ph; 951 } 952 } 953 /* 954 * If the page has just become un-empty, move it to the head of 955 * the list, and make it the current page. The next allocation 956 * will get the item from this page, instead of further fragmenting 957 * the pool. 958 */ 959 else if (ph->ph_nmissing == (pp->pr_itemsperpage - 1)) { 960 TAILQ_REMOVE(&pp->pr_pagelist, ph, ph_pagelist); 961 TAILQ_INSERT_HEAD(&pp->pr_pagelist, ph, ph_pagelist); 962 pp->pr_curpage = ph; 963 } 964 } 965 966 /* 967 * Return resource to the pool; must be called at appropriate spl level 968 */ 969 #ifdef POOL_DIAGNOSTIC 970 void 971 _pool_put(struct pool *pp, void *v, const char *file, long line) 972 { 973 974 simple_lock(&pp->pr_slock); 975 pr_enter(pp, file, line); 976 977 pr_log(pp, v, PRLOG_PUT, file, line); 978 979 pool_do_put(pp, v); 980 981 pr_leave(pp); 982 simple_unlock(&pp->pr_slock); 983 } 984 #undef pool_put 985 #endif /* POOL_DIAGNOSTIC */ 986 987 void 988 pool_put(struct pool *pp, void *v) 989 { 990 991 simple_lock(&pp->pr_slock); 992 993 pool_do_put(pp, v); 994 995 simple_unlock(&pp->pr_slock); 996 } 997 998 #ifdef POOL_DIAGNOSTIC 999 #define pool_put(h, v) _pool_put((h), (v), __FILE__, __LINE__) 1000 #endif 1001 1002 /* 1003 * Add N items to the pool. 1004 */ 1005 int 1006 pool_prime(struct pool *pp, int n) 1007 { 1008 struct pool_item_header *ph; 1009 caddr_t cp; 1010 int newpages, error = 0; 1011 1012 simple_lock(&pp->pr_slock); 1013 1014 newpages = roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 1015 1016 while (newpages-- > 0) { 1017 simple_unlock(&pp->pr_slock); 1018 cp = (*pp->pr_alloc)(pp->pr_pagesz, PR_NOWAIT, pp->pr_mtype); 1019 if (__predict_true(cp != NULL)) 1020 ph = pool_alloc_item_header(pp, cp, PR_NOWAIT); 1021 simple_lock(&pp->pr_slock); 1022 1023 if (__predict_false(cp == NULL || ph == NULL)) { 1024 error = ENOMEM; 1025 if (cp != NULL) 1026 (*pp->pr_free)(cp, pp->pr_pagesz, pp->pr_mtype); 1027 break; 1028 } 1029 1030 pool_prime_page(pp, cp, ph); 1031 pp->pr_npagealloc++; 1032 pp->pr_minpages++; 1033 } 1034 1035 if (pp->pr_minpages >= pp->pr_maxpages) 1036 pp->pr_maxpages = pp->pr_minpages + 1; /* XXX */ 1037 1038 simple_unlock(&pp->pr_slock); 1039 return (0); 1040 } 1041 1042 /* 1043 * Add a page worth of items to the pool. 1044 * 1045 * Note, we must be called with the pool descriptor LOCKED. 1046 */ 1047 static void 1048 pool_prime_page(struct pool *pp, caddr_t storage, struct pool_item_header *ph) 1049 { 1050 struct pool_item *pi; 1051 caddr_t cp = storage; 1052 unsigned int align = pp->pr_align; 1053 unsigned int ioff = pp->pr_itemoffset; 1054 int n; 1055 1056 if (((u_long)cp & (pp->pr_pagesz - 1)) != 0) 1057 panic("pool_prime_page: %s: unaligned page", pp->pr_wchan); 1058 1059 if ((pp->pr_roflags & PR_PHINPAGE) == 0) 1060 LIST_INSERT_HEAD(&pp->pr_hashtab[PR_HASH_INDEX(pp, cp)], 1061 ph, ph_hashlist); 1062 1063 /* 1064 * Insert page header. 1065 */ 1066 TAILQ_INSERT_HEAD(&pp->pr_pagelist, ph, ph_pagelist); 1067 TAILQ_INIT(&ph->ph_itemlist); 1068 ph->ph_page = storage; 1069 ph->ph_nmissing = 0; 1070 memset(&ph->ph_time, 0, sizeof(ph->ph_time)); 1071 1072 pp->pr_nidle++; 1073 1074 /* 1075 * Color this page. 1076 */ 1077 cp = (caddr_t)(cp + pp->pr_curcolor); 1078 if ((pp->pr_curcolor += align) > pp->pr_maxcolor) 1079 pp->pr_curcolor = 0; 1080 1081 /* 1082 * Adjust storage to apply aligment to `pr_itemoffset' in each item. 1083 */ 1084 if (ioff != 0) 1085 cp = (caddr_t)(cp + (align - ioff)); 1086 1087 /* 1088 * Insert remaining chunks on the bucket list. 1089 */ 1090 n = pp->pr_itemsperpage; 1091 pp->pr_nitems += n; 1092 1093 while (n--) { 1094 pi = (struct pool_item *)cp; 1095 1096 /* Insert on page list */ 1097 TAILQ_INSERT_TAIL(&ph->ph_itemlist, pi, pi_list); 1098 #ifdef DIAGNOSTIC 1099 pi->pi_magic = PI_MAGIC; 1100 #endif 1101 cp = (caddr_t)(cp + pp->pr_size); 1102 } 1103 1104 /* 1105 * If the pool was depleted, point at the new page. 1106 */ 1107 if (pp->pr_curpage == NULL) 1108 pp->pr_curpage = ph; 1109 1110 if (++pp->pr_npages > pp->pr_hiwat) 1111 pp->pr_hiwat = pp->pr_npages; 1112 } 1113 1114 /* 1115 * Used by pool_get() when nitems drops below the low water mark. This 1116 * is used to catch up nitmes with the low water mark. 1117 * 1118 * Note 1, we never wait for memory here, we let the caller decide what to do. 1119 * 1120 * Note 2, this doesn't work with static pools. 1121 * 1122 * Note 3, we must be called with the pool already locked, and we return 1123 * with it locked. 1124 */ 1125 static int 1126 pool_catchup(struct pool *pp) 1127 { 1128 struct pool_item_header *ph; 1129 caddr_t cp; 1130 int error = 0; 1131 1132 if (pp->pr_roflags & PR_STATIC) { 1133 /* 1134 * We dropped below the low water mark, and this is not a 1135 * good thing. Log a warning. 1136 * 1137 * XXX: rate-limit this? 1138 */ 1139 printf("WARNING: static pool `%s' dropped below low water " 1140 "mark\n", pp->pr_wchan); 1141 return (0); 1142 } 1143 1144 while (POOL_NEEDS_CATCHUP(pp)) { 1145 /* 1146 * Call the page back-end allocator for more memory. 1147 * 1148 * XXX: We never wait, so should we bother unlocking 1149 * the pool descriptor? 1150 */ 1151 simple_unlock(&pp->pr_slock); 1152 cp = (*pp->pr_alloc)(pp->pr_pagesz, PR_NOWAIT, pp->pr_mtype); 1153 if (__predict_true(cp != NULL)) 1154 ph = pool_alloc_item_header(pp, cp, PR_NOWAIT); 1155 simple_lock(&pp->pr_slock); 1156 if (__predict_false(cp == NULL || ph == NULL)) { 1157 if (cp != NULL) 1158 (*pp->pr_free)(cp, pp->pr_pagesz, pp->pr_mtype); 1159 error = ENOMEM; 1160 break; 1161 } 1162 pool_prime_page(pp, cp, ph); 1163 pp->pr_npagealloc++; 1164 } 1165 1166 return (error); 1167 } 1168 1169 void 1170 pool_setlowat(struct pool *pp, int n) 1171 { 1172 int error; 1173 1174 simple_lock(&pp->pr_slock); 1175 1176 pp->pr_minitems = n; 1177 pp->pr_minpages = (n == 0) 1178 ? 0 1179 : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 1180 1181 /* Make sure we're caught up with the newly-set low water mark. */ 1182 if (POOL_NEEDS_CATCHUP(pp) && (error = pool_catchup(pp) != 0)) { 1183 /* 1184 * XXX: Should we log a warning? Should we set up a timeout 1185 * to try again in a second or so? The latter could break 1186 * a caller's assumptions about interrupt protection, etc. 1187 */ 1188 } 1189 1190 simple_unlock(&pp->pr_slock); 1191 } 1192 1193 void 1194 pool_sethiwat(struct pool *pp, int n) 1195 { 1196 1197 simple_lock(&pp->pr_slock); 1198 1199 pp->pr_maxpages = (n == 0) 1200 ? 0 1201 : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 1202 1203 simple_unlock(&pp->pr_slock); 1204 } 1205 1206 void 1207 pool_sethardlimit(struct pool *pp, int n, const char *warnmess, int ratecap) 1208 { 1209 1210 simple_lock(&pp->pr_slock); 1211 1212 pp->pr_hardlimit = n; 1213 pp->pr_hardlimit_warning = warnmess; 1214 pp->pr_hardlimit_ratecap.tv_sec = ratecap; 1215 pp->pr_hardlimit_warning_last.tv_sec = 0; 1216 pp->pr_hardlimit_warning_last.tv_usec = 0; 1217 1218 /* 1219 * In-line version of pool_sethiwat(), because we don't want to 1220 * release the lock. 1221 */ 1222 pp->pr_maxpages = (n == 0) 1223 ? 0 1224 : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 1225 1226 simple_unlock(&pp->pr_slock); 1227 } 1228 1229 /* 1230 * Default page allocator. 1231 */ 1232 static void * 1233 pool_page_alloc(unsigned long sz, int flags, int mtype) 1234 { 1235 boolean_t waitok = (flags & PR_WAITOK) ? TRUE : FALSE; 1236 1237 return ((void *)uvm_km_alloc_poolpage(waitok)); 1238 } 1239 1240 static void 1241 pool_page_free(void *v, unsigned long sz, int mtype) 1242 { 1243 uvm_km_free_poolpage((vaddr_t)v); 1244 } 1245 1246 /* 1247 * Alternate pool page allocator for pools that know they will 1248 * never be accessed in interrupt context. 1249 */ 1250 void * 1251 pool_page_alloc_nointr(unsigned long sz, int flags, int mtype) 1252 { 1253 boolean_t waitok = (flags & PR_WAITOK) ? TRUE : FALSE; 1254 1255 return ((void *)uvm_km_alloc_poolpage1(kernel_map, uvm.kernel_object, 1256 waitok)); 1257 } 1258 1259 void 1260 pool_page_free_nointr(void *v, unsigned long sz, int mtype) 1261 { 1262 1263 uvm_km_free_poolpage1(kernel_map, (vaddr_t)v); 1264 } 1265 1266 1267 /* 1268 * Release all complete pages that have not been used recently. 1269 */ 1270 void 1271 #ifdef POOL_DIAGNOSTIC 1272 _pool_reclaim(struct pool *pp, const char *file, long line) 1273 #else 1274 pool_reclaim(struct pool *pp) 1275 #endif 1276 { 1277 struct pool_item_header *ph, *phnext; 1278 struct pool_cache *pc; 1279 struct timeval curtime; 1280 int s; 1281 1282 if (pp->pr_roflags & PR_STATIC) 1283 return; 1284 1285 if (simple_lock_try(&pp->pr_slock) == 0) 1286 return; 1287 pr_enter(pp, file, line); 1288 1289 /* 1290 * Reclaim items from the pool's caches. 1291 */ 1292 for (pc = TAILQ_FIRST(&pp->pr_cachelist); pc != NULL; 1293 pc = TAILQ_NEXT(pc, pc_poollist)) 1294 pool_cache_reclaim(pc); 1295 1296 s = splclock(); 1297 curtime = mono_time; 1298 splx(s); 1299 1300 for (ph = TAILQ_FIRST(&pp->pr_pagelist); ph != NULL; ph = phnext) { 1301 phnext = TAILQ_NEXT(ph, ph_pagelist); 1302 1303 /* Check our minimum page claim */ 1304 if (pp->pr_npages <= pp->pr_minpages) 1305 break; 1306 1307 if (ph->ph_nmissing == 0) { 1308 struct timeval diff; 1309 timersub(&curtime, &ph->ph_time, &diff); 1310 if (diff.tv_sec < pool_inactive_time) 1311 continue; 1312 1313 /* 1314 * If freeing this page would put us below 1315 * the low water mark, stop now. 1316 */ 1317 if ((pp->pr_nitems - pp->pr_itemsperpage) < 1318 pp->pr_minitems) 1319 break; 1320 1321 pr_rmpage(pp, ph); 1322 } 1323 } 1324 1325 pr_leave(pp); 1326 simple_unlock(&pp->pr_slock); 1327 } 1328 1329 1330 /* 1331 * Drain pools, one at a time. 1332 * 1333 * Note, we must never be called from an interrupt context. 1334 */ 1335 void 1336 pool_drain(void *arg) 1337 { 1338 struct pool *pp; 1339 int s; 1340 1341 s = splvm(); 1342 simple_lock(&pool_head_slock); 1343 1344 if (drainpp == NULL && (drainpp = TAILQ_FIRST(&pool_head)) == NULL) 1345 goto out; 1346 1347 pp = drainpp; 1348 drainpp = TAILQ_NEXT(pp, pr_poollist); 1349 1350 pool_reclaim(pp); 1351 1352 out: 1353 simple_unlock(&pool_head_slock); 1354 splx(s); 1355 } 1356 1357 1358 /* 1359 * Diagnostic helpers. 1360 */ 1361 void 1362 pool_printit(struct pool *pp, const char *modif, int (*pr)(const char *, ...)) 1363 { 1364 int s; 1365 1366 s = splvm(); 1367 if (simple_lock_try(&pp->pr_slock) == 0) { 1368 printf("pool %s is locked; try again later\n", 1369 pp->pr_wchan); 1370 splx(s); 1371 return; 1372 } 1373 pool_print1(pp, modif, printf); 1374 simple_unlock(&pp->pr_slock); 1375 splx(s); 1376 } 1377 1378 static void 1379 pool_print1(struct pool *pp, const char *modif, int (*pr)(const char *, ...)) 1380 { 1381 struct pool_item_header *ph; 1382 struct pool_cache *pc; 1383 struct pool_cache_group *pcg; 1384 #ifdef DIAGNOSTIC 1385 struct pool_item *pi; 1386 #endif 1387 int i, print_log = 0, print_pagelist = 0, print_cache = 0; 1388 char c; 1389 1390 while ((c = *modif++) != '\0') { 1391 if (c == 'l') 1392 print_log = 1; 1393 if (c == 'p') 1394 print_pagelist = 1; 1395 if (c == 'c') 1396 print_cache = 1; 1397 modif++; 1398 } 1399 1400 (*pr)("POOL %s: size %u, align %u, ioff %u, roflags 0x%08x\n", 1401 pp->pr_wchan, pp->pr_size, pp->pr_align, pp->pr_itemoffset, 1402 pp->pr_roflags); 1403 (*pr)("\tpagesz %u, mtype %d\n", pp->pr_pagesz, pp->pr_mtype); 1404 (*pr)("\talloc %p, release %p\n", pp->pr_alloc, pp->pr_free); 1405 (*pr)("\tminitems %u, minpages %u, maxpages %u, npages %u\n", 1406 pp->pr_minitems, pp->pr_minpages, pp->pr_maxpages, pp->pr_npages); 1407 (*pr)("\titemsperpage %u, nitems %u, nout %u, hardlimit %u\n", 1408 pp->pr_itemsperpage, pp->pr_nitems, pp->pr_nout, pp->pr_hardlimit); 1409 1410 (*pr)("\n\tnget %lu, nfail %lu, nput %lu\n", 1411 pp->pr_nget, pp->pr_nfail, pp->pr_nput); 1412 (*pr)("\tnpagealloc %lu, npagefree %lu, hiwat %u, nidle %lu\n", 1413 pp->pr_npagealloc, pp->pr_npagefree, pp->pr_hiwat, pp->pr_nidle); 1414 1415 if (print_pagelist == 0) 1416 goto skip_pagelist; 1417 1418 if ((ph = TAILQ_FIRST(&pp->pr_pagelist)) != NULL) 1419 (*pr)("\n\tpage list:\n"); 1420 for (; ph != NULL; ph = TAILQ_NEXT(ph, ph_pagelist)) { 1421 (*pr)("\t\tpage %p, nmissing %d, time %lu,%lu\n", 1422 ph->ph_page, ph->ph_nmissing, 1423 (u_long)ph->ph_time.tv_sec, 1424 (u_long)ph->ph_time.tv_usec); 1425 #ifdef DIAGNOSTIC 1426 for (pi = TAILQ_FIRST(&ph->ph_itemlist); pi != NULL; 1427 pi = TAILQ_NEXT(pi, pi_list)) { 1428 if (pi->pi_magic != PI_MAGIC) { 1429 (*pr)("\t\t\titem %p, magic 0x%x\n", 1430 pi, pi->pi_magic); 1431 } 1432 } 1433 #endif 1434 } 1435 if (pp->pr_curpage == NULL) 1436 (*pr)("\tno current page\n"); 1437 else 1438 (*pr)("\tcurpage %p\n", pp->pr_curpage->ph_page); 1439 1440 skip_pagelist: 1441 1442 if (print_log == 0) 1443 goto skip_log; 1444 1445 (*pr)("\n"); 1446 if ((pp->pr_roflags & PR_LOGGING) == 0) 1447 (*pr)("\tno log\n"); 1448 else 1449 pr_printlog(pp, NULL, pr); 1450 1451 skip_log: 1452 1453 if (print_cache == 0) 1454 goto skip_cache; 1455 1456 for (pc = TAILQ_FIRST(&pp->pr_cachelist); pc != NULL; 1457 pc = TAILQ_NEXT(pc, pc_poollist)) { 1458 (*pr)("\tcache %p: allocfrom %p freeto %p\n", pc, 1459 pc->pc_allocfrom, pc->pc_freeto); 1460 (*pr)("\t hits %lu misses %lu ngroups %lu nitems %lu\n", 1461 pc->pc_hits, pc->pc_misses, pc->pc_ngroups, pc->pc_nitems); 1462 for (pcg = TAILQ_FIRST(&pc->pc_grouplist); pcg != NULL; 1463 pcg = TAILQ_NEXT(pcg, pcg_list)) { 1464 (*pr)("\t\tgroup %p: avail %d\n", pcg, pcg->pcg_avail); 1465 for (i = 0; i < PCG_NOBJECTS; i++) 1466 (*pr)("\t\t\t%p\n", pcg->pcg_objects[i]); 1467 } 1468 } 1469 1470 skip_cache: 1471 1472 pr_enter_check(pp, pr); 1473 } 1474 1475 int 1476 pool_chk(struct pool *pp, const char *label) 1477 { 1478 struct pool_item_header *ph; 1479 int r = 0; 1480 1481 simple_lock(&pp->pr_slock); 1482 1483 for (ph = TAILQ_FIRST(&pp->pr_pagelist); ph != NULL; 1484 ph = TAILQ_NEXT(ph, ph_pagelist)) { 1485 1486 struct pool_item *pi; 1487 int n; 1488 caddr_t page; 1489 1490 page = (caddr_t)((u_long)ph & pp->pr_pagemask); 1491 if (page != ph->ph_page && 1492 (pp->pr_roflags & PR_PHINPAGE) != 0) { 1493 if (label != NULL) 1494 printf("%s: ", label); 1495 printf("pool(%p:%s): page inconsistency: page %p;" 1496 " at page head addr %p (p %p)\n", pp, 1497 pp->pr_wchan, ph->ph_page, 1498 ph, page); 1499 r++; 1500 goto out; 1501 } 1502 1503 for (pi = TAILQ_FIRST(&ph->ph_itemlist), n = 0; 1504 pi != NULL; 1505 pi = TAILQ_NEXT(pi,pi_list), n++) { 1506 1507 #ifdef DIAGNOSTIC 1508 if (pi->pi_magic != PI_MAGIC) { 1509 if (label != NULL) 1510 printf("%s: ", label); 1511 printf("pool(%s): free list modified: magic=%x;" 1512 " page %p; item ordinal %d;" 1513 " addr %p (p %p)\n", 1514 pp->pr_wchan, pi->pi_magic, ph->ph_page, 1515 n, pi, page); 1516 panic("pool"); 1517 } 1518 #endif 1519 page = (caddr_t)((u_long)pi & pp->pr_pagemask); 1520 if (page == ph->ph_page) 1521 continue; 1522 1523 if (label != NULL) 1524 printf("%s: ", label); 1525 printf("pool(%p:%s): page inconsistency: page %p;" 1526 " item ordinal %d; addr %p (p %p)\n", pp, 1527 pp->pr_wchan, ph->ph_page, 1528 n, pi, page); 1529 r++; 1530 goto out; 1531 } 1532 } 1533 out: 1534 simple_unlock(&pp->pr_slock); 1535 return (r); 1536 } 1537 1538 /* 1539 * pool_cache_init: 1540 * 1541 * Initialize a pool cache. 1542 * 1543 * NOTE: If the pool must be protected from interrupts, we expect 1544 * to be called at the appropriate interrupt priority level. 1545 */ 1546 void 1547 pool_cache_init(struct pool_cache *pc, struct pool *pp, 1548 int (*ctor)(void *, void *, int), 1549 void (*dtor)(void *, void *), 1550 void *arg) 1551 { 1552 1553 TAILQ_INIT(&pc->pc_grouplist); 1554 simple_lock_init(&pc->pc_slock); 1555 1556 pc->pc_allocfrom = NULL; 1557 pc->pc_freeto = NULL; 1558 pc->pc_pool = pp; 1559 1560 pc->pc_ctor = ctor; 1561 pc->pc_dtor = dtor; 1562 pc->pc_arg = arg; 1563 1564 pc->pc_hits = 0; 1565 pc->pc_misses = 0; 1566 1567 pc->pc_ngroups = 0; 1568 1569 pc->pc_nitems = 0; 1570 1571 simple_lock(&pp->pr_slock); 1572 TAILQ_INSERT_TAIL(&pp->pr_cachelist, pc, pc_poollist); 1573 simple_unlock(&pp->pr_slock); 1574 } 1575 1576 /* 1577 * pool_cache_destroy: 1578 * 1579 * Destroy a pool cache. 1580 */ 1581 void 1582 pool_cache_destroy(struct pool_cache *pc) 1583 { 1584 struct pool *pp = pc->pc_pool; 1585 1586 /* First, invalidate the entire cache. */ 1587 pool_cache_invalidate(pc); 1588 1589 /* ...and remove it from the pool's cache list. */ 1590 simple_lock(&pp->pr_slock); 1591 TAILQ_REMOVE(&pp->pr_cachelist, pc, pc_poollist); 1592 simple_unlock(&pp->pr_slock); 1593 } 1594 1595 static __inline void * 1596 pcg_get(struct pool_cache_group *pcg) 1597 { 1598 void *object; 1599 u_int idx; 1600 1601 KASSERT(pcg->pcg_avail <= PCG_NOBJECTS); 1602 KASSERT(pcg->pcg_avail != 0); 1603 idx = --pcg->pcg_avail; 1604 1605 KASSERT(pcg->pcg_objects[idx] != NULL); 1606 object = pcg->pcg_objects[idx]; 1607 pcg->pcg_objects[idx] = NULL; 1608 1609 return (object); 1610 } 1611 1612 static __inline void 1613 pcg_put(struct pool_cache_group *pcg, void *object) 1614 { 1615 u_int idx; 1616 1617 KASSERT(pcg->pcg_avail < PCG_NOBJECTS); 1618 idx = pcg->pcg_avail++; 1619 1620 KASSERT(pcg->pcg_objects[idx] == NULL); 1621 pcg->pcg_objects[idx] = object; 1622 } 1623 1624 /* 1625 * pool_cache_get: 1626 * 1627 * Get an object from a pool cache. 1628 */ 1629 void * 1630 pool_cache_get(struct pool_cache *pc, int flags) 1631 { 1632 struct pool_cache_group *pcg; 1633 void *object; 1634 1635 #ifdef LOCKDEBUG 1636 if (flags & PR_WAITOK) 1637 simple_lock_only_held(NULL, "pool_cache_get(PR_WAITOK)"); 1638 #endif 1639 1640 simple_lock(&pc->pc_slock); 1641 1642 if ((pcg = pc->pc_allocfrom) == NULL) { 1643 for (pcg = TAILQ_FIRST(&pc->pc_grouplist); pcg != NULL; 1644 pcg = TAILQ_NEXT(pcg, pcg_list)) { 1645 if (pcg->pcg_avail != 0) { 1646 pc->pc_allocfrom = pcg; 1647 goto have_group; 1648 } 1649 } 1650 1651 /* 1652 * No groups with any available objects. Allocate 1653 * a new object, construct it, and return it to 1654 * the caller. We will allocate a group, if necessary, 1655 * when the object is freed back to the cache. 1656 */ 1657 pc->pc_misses++; 1658 simple_unlock(&pc->pc_slock); 1659 object = pool_get(pc->pc_pool, flags); 1660 if (object != NULL && pc->pc_ctor != NULL) { 1661 if ((*pc->pc_ctor)(pc->pc_arg, object, flags) != 0) { 1662 pool_put(pc->pc_pool, object); 1663 return (NULL); 1664 } 1665 } 1666 return (object); 1667 } 1668 1669 have_group: 1670 pc->pc_hits++; 1671 pc->pc_nitems--; 1672 object = pcg_get(pcg); 1673 1674 if (pcg->pcg_avail == 0) 1675 pc->pc_allocfrom = NULL; 1676 1677 simple_unlock(&pc->pc_slock); 1678 1679 return (object); 1680 } 1681 1682 /* 1683 * pool_cache_put: 1684 * 1685 * Put an object back to the pool cache. 1686 */ 1687 void 1688 pool_cache_put(struct pool_cache *pc, void *object) 1689 { 1690 struct pool_cache_group *pcg; 1691 1692 simple_lock(&pc->pc_slock); 1693 1694 if ((pcg = pc->pc_freeto) == NULL) { 1695 for (pcg = TAILQ_FIRST(&pc->pc_grouplist); pcg != NULL; 1696 pcg = TAILQ_NEXT(pcg, pcg_list)) { 1697 if (pcg->pcg_avail != PCG_NOBJECTS) { 1698 pc->pc_freeto = pcg; 1699 goto have_group; 1700 } 1701 } 1702 1703 /* 1704 * No empty groups to free the object to. Attempt to 1705 * allocate one. 1706 */ 1707 simple_unlock(&pc->pc_slock); 1708 pcg = pool_get(&pcgpool, PR_NOWAIT); 1709 if (pcg != NULL) { 1710 memset(pcg, 0, sizeof(*pcg)); 1711 simple_lock(&pc->pc_slock); 1712 pc->pc_ngroups++; 1713 TAILQ_INSERT_TAIL(&pc->pc_grouplist, pcg, pcg_list); 1714 if (pc->pc_freeto == NULL) 1715 pc->pc_freeto = pcg; 1716 goto have_group; 1717 } 1718 1719 /* 1720 * Unable to allocate a cache group; destruct the object 1721 * and free it back to the pool. 1722 */ 1723 pool_cache_destruct_object(pc, object); 1724 return; 1725 } 1726 1727 have_group: 1728 pc->pc_nitems++; 1729 pcg_put(pcg, object); 1730 1731 if (pcg->pcg_avail == PCG_NOBJECTS) 1732 pc->pc_freeto = NULL; 1733 1734 simple_unlock(&pc->pc_slock); 1735 } 1736 1737 /* 1738 * pool_cache_destruct_object: 1739 * 1740 * Force destruction of an object and its release back into 1741 * the pool. 1742 */ 1743 void 1744 pool_cache_destruct_object(struct pool_cache *pc, void *object) 1745 { 1746 1747 if (pc->pc_dtor != NULL) 1748 (*pc->pc_dtor)(pc->pc_arg, object); 1749 pool_put(pc->pc_pool, object); 1750 } 1751 1752 /* 1753 * pool_cache_do_invalidate: 1754 * 1755 * This internal function implements pool_cache_invalidate() and 1756 * pool_cache_reclaim(). 1757 */ 1758 static void 1759 pool_cache_do_invalidate(struct pool_cache *pc, int free_groups, 1760 void (*putit)(struct pool *, void *)) 1761 { 1762 struct pool_cache_group *pcg, *npcg; 1763 void *object; 1764 1765 for (pcg = TAILQ_FIRST(&pc->pc_grouplist); pcg != NULL; 1766 pcg = npcg) { 1767 npcg = TAILQ_NEXT(pcg, pcg_list); 1768 while (pcg->pcg_avail != 0) { 1769 pc->pc_nitems--; 1770 object = pcg_get(pcg); 1771 if (pcg->pcg_avail == 0 && pc->pc_allocfrom == pcg) 1772 pc->pc_allocfrom = NULL; 1773 if (pc->pc_dtor != NULL) 1774 (*pc->pc_dtor)(pc->pc_arg, object); 1775 (*putit)(pc->pc_pool, object); 1776 } 1777 if (free_groups) { 1778 pc->pc_ngroups--; 1779 TAILQ_REMOVE(&pc->pc_grouplist, pcg, pcg_list); 1780 if (pc->pc_freeto == pcg) 1781 pc->pc_freeto = NULL; 1782 pool_put(&pcgpool, pcg); 1783 } 1784 } 1785 } 1786 1787 /* 1788 * pool_cache_invalidate: 1789 * 1790 * Invalidate a pool cache (destruct and release all of the 1791 * cached objects). 1792 */ 1793 void 1794 pool_cache_invalidate(struct pool_cache *pc) 1795 { 1796 1797 simple_lock(&pc->pc_slock); 1798 pool_cache_do_invalidate(pc, 0, pool_put); 1799 simple_unlock(&pc->pc_slock); 1800 } 1801 1802 /* 1803 * pool_cache_reclaim: 1804 * 1805 * Reclaim a pool cache for pool_reclaim(). 1806 */ 1807 static void 1808 pool_cache_reclaim(struct pool_cache *pc) 1809 { 1810 1811 simple_lock(&pc->pc_slock); 1812 pool_cache_do_invalidate(pc, 1, pool_do_put); 1813 simple_unlock(&pc->pc_slock); 1814 } 1815 1816 /* 1817 * We have three different sysctls. 1818 * kern.pool.npools - the number of pools. 1819 * kern.pool.pool.<pool#> - the pool struct for the pool#. 1820 * kern.pool.name.<pool#> - the name for pool#.[6~ 1821 */ 1822 int 1823 sysctl_dopool(int *name, u_int namelen, char *where, size_t *sizep) 1824 { 1825 struct pool *pp, *foundpool = NULL; 1826 size_t buflen = where != NULL ? *sizep : 0; 1827 int npools = 0, s; 1828 unsigned int lookfor; 1829 size_t len; 1830 1831 switch (*name) { 1832 case KERN_POOL_NPOOLS: 1833 if (namelen != 1 || buflen != sizeof(int)) 1834 return (EINVAL); 1835 lookfor = 0; 1836 break; 1837 case KERN_POOL_NAME: 1838 if (namelen != 2 || buflen < 1) 1839 return (EINVAL); 1840 lookfor = name[1]; 1841 break; 1842 case KERN_POOL_POOL: 1843 if (namelen != 2 || buflen != sizeof(struct pool)) 1844 return (EINVAL); 1845 lookfor = name[1]; 1846 break; 1847 default: 1848 return (EINVAL); 1849 } 1850 1851 s = splvm(); 1852 simple_lock(&pool_head_slock); 1853 1854 TAILQ_FOREACH(pp, &pool_head, pr_poollist) { 1855 npools++; 1856 if (lookfor == pp->pr_serial) { 1857 foundpool = pp; 1858 break; 1859 } 1860 } 1861 1862 simple_unlock(&pool_head_slock); 1863 splx(s); 1864 1865 if (lookfor != 0 && foundpool == NULL) 1866 return (ENOENT); 1867 1868 switch (*name) { 1869 case KERN_POOL_NPOOLS: 1870 return copyout(&npools, where, buflen); 1871 case KERN_POOL_NAME: 1872 len = strlen(foundpool->pr_wchan) + 1; 1873 if (*sizep < len) 1874 return (ENOMEM); 1875 *sizep = len; 1876 return copyout(foundpool->pr_wchan, where, len); 1877 case KERN_POOL_POOL: 1878 return copyout(foundpool, where, buflen); 1879 } 1880 /* NOTREACHED */ 1881 return (0); /* XXX - Stupid gcc */ 1882 } 1883