1 /* $OpenBSD: subr_pool.c,v 1.14 2001/11/06 19:53:20 miod Exp $ */ 2 /* $NetBSD: subr_pool.c,v 1.59 2001/06/05 18:51:04 thorpej Exp $ */ 3 4 /*- 5 * Copyright (c) 1997, 1999, 2000 The NetBSD Foundation, Inc. 6 * All rights reserved. 7 * 8 * This code is derived from software contributed to The NetBSD Foundation 9 * by Paul Kranenburg; by Jason R. Thorpe of the Numerical Aerospace 10 * Simulation Facility, NASA Ames Research Center. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 3. All advertising materials mentioning features or use of this software 21 * must display the following acknowledgement: 22 * This product includes software developed by the NetBSD 23 * Foundation, Inc. and its contributors. 24 * 4. Neither the name of The NetBSD Foundation nor the names of its 25 * contributors may be used to endorse or promote products derived 26 * from this software without specific prior written permission. 27 * 28 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 29 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 30 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 31 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 32 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 33 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 34 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 35 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 36 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 37 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 38 * POSSIBILITY OF SUCH DAMAGE. 39 */ 40 41 #include <sys/param.h> 42 #include <sys/systm.h> 43 #include <sys/proc.h> 44 #include <sys/errno.h> 45 #include <sys/kernel.h> 46 #include <sys/malloc.h> 47 #include <sys/lock.h> 48 #include <sys/pool.h> 49 #include <sys/syslog.h> 50 #include <sys/sysctl.h> 51 52 #include <uvm/uvm.h> 53 54 /* 55 * XXX - for now. 56 */ 57 #define SIMPLELOCK_INITIALIZER { SLOCK_UNLOCKED } 58 #ifdef LOCKDEBUG 59 #define simple_lock_freecheck(a, s) do { /* nothing */ } while (0) 60 #define simple_lock_only_held(lkp, str) do { /* nothing */ } while (0) 61 #endif 62 #define LOCK_ASSERT(x) /* nothing */ 63 64 /* 65 * Pool resource management utility. 66 * 67 * Memory is allocated in pages which are split into pieces according 68 * to the pool item size. Each page is kept on a list headed by `pr_pagelist' 69 * in the pool structure and the individual pool items are on a linked list 70 * headed by `ph_itemlist' in each page header. The memory for building 71 * the page list is either taken from the allocated pages themselves (for 72 * small pool items) or taken from an internal pool of page headers (`phpool'). 73 */ 74 75 /* List of all pools */ 76 TAILQ_HEAD(,pool) pool_head = TAILQ_HEAD_INITIALIZER(pool_head); 77 78 /* Private pool for page header structures */ 79 static struct pool phpool; 80 81 /* # of seconds to retain page after last use */ 82 int pool_inactive_time = 10; 83 84 /* Next candidate for drainage (see pool_drain()) */ 85 static struct pool *drainpp; 86 87 /* This spin lock protects both pool_head and drainpp. */ 88 struct simplelock pool_head_slock = SIMPLELOCK_INITIALIZER; 89 90 struct pool_item_header { 91 /* Page headers */ 92 TAILQ_ENTRY(pool_item_header) 93 ph_pagelist; /* pool page list */ 94 TAILQ_HEAD(,pool_item) ph_itemlist; /* chunk list for this page */ 95 LIST_ENTRY(pool_item_header) 96 ph_hashlist; /* Off-page page headers */ 97 int ph_nmissing; /* # of chunks in use */ 98 caddr_t ph_page; /* this page's address */ 99 struct timeval ph_time; /* last referenced */ 100 }; 101 102 struct pool_item { 103 #ifdef DIAGNOSTIC 104 int pi_magic; 105 #endif 106 #define PI_MAGIC 0xdeadbeef 107 /* Other entries use only this list entry */ 108 TAILQ_ENTRY(pool_item) pi_list; 109 }; 110 111 112 #define PR_HASH_INDEX(pp,addr) \ 113 (((u_long)(addr) >> (pp)->pr_pageshift) & (PR_HASHTABSIZE - 1)) 114 115 #define POOL_NEEDS_CATCHUP(pp) \ 116 ((pp)->pr_nitems < (pp)->pr_minitems) 117 118 /* 119 * Every pool get a unique serial number assigned to it. If this counter 120 * wraps, we're screwed, but we shouldn't create so many pools anyway. 121 */ 122 unsigned int pool_serial; 123 124 /* 125 * Pool cache management. 126 * 127 * Pool caches provide a way for constructed objects to be cached by the 128 * pool subsystem. This can lead to performance improvements by avoiding 129 * needless object construction/destruction; it is deferred until absolutely 130 * necessary. 131 * 132 * Caches are grouped into cache groups. Each cache group references 133 * up to 16 constructed objects. When a cache allocates an object 134 * from the pool, it calls the object's constructor and places it into 135 * a cache group. When a cache group frees an object back to the pool, 136 * it first calls the object's destructor. This allows the object to 137 * persist in constructed form while freed to the cache. 138 * 139 * Multiple caches may exist for each pool. This allows a single 140 * object type to have multiple constructed forms. The pool references 141 * each cache, so that when a pool is drained by the pagedaemon, it can 142 * drain each individual cache as well. Each time a cache is drained, 143 * the most idle cache group is freed to the pool in its entirety. 144 * 145 * Pool caches are layed on top of pools. By layering them, we can avoid 146 * the complexity of cache management for pools which would not benefit 147 * from it. 148 */ 149 150 /* The cache group pool. */ 151 static struct pool pcgpool; 152 153 /* The pool cache group. */ 154 #define PCG_NOBJECTS 16 155 struct pool_cache_group { 156 TAILQ_ENTRY(pool_cache_group) 157 pcg_list; /* link in the pool cache's group list */ 158 u_int pcg_avail; /* # available objects */ 159 /* pointers to the objects */ 160 void *pcg_objects[PCG_NOBJECTS]; 161 }; 162 163 static void pool_cache_reclaim(struct pool_cache *); 164 165 static int pool_catchup(struct pool *); 166 static void pool_prime_page(struct pool *, caddr_t, 167 struct pool_item_header *); 168 static void *pool_page_alloc(unsigned long, int, int); 169 static void pool_page_free(void *, unsigned long, int); 170 171 static void pool_print1(struct pool *, const char *, 172 int (*)(const char *, ...)); 173 174 /* 175 * Pool log entry. An array of these is allocated in pool_init(). 176 */ 177 struct pool_log { 178 const char *pl_file; 179 long pl_line; 180 int pl_action; 181 #define PRLOG_GET 1 182 #define PRLOG_PUT 2 183 void *pl_addr; 184 }; 185 186 /* Number of entries in pool log buffers */ 187 #ifndef POOL_LOGSIZE 188 #define POOL_LOGSIZE 10 189 #endif 190 191 int pool_logsize = POOL_LOGSIZE; 192 193 #ifdef POOL_DIAGNOSTIC 194 static __inline void 195 pr_log(struct pool *pp, void *a, int action, const char *file, long line) 196 { 197 int n = pp->pr_curlogentry; 198 struct pool_log *pl; 199 200 if ((pp->pr_roflags & PR_LOGGING) == 0) 201 return; 202 203 /* 204 * Fill in the current entry. Wrap around and overwrite 205 * the oldest entry if necessary. 206 */ 207 pl = &pp->pr_log[n]; 208 pl->pl_file = file; 209 pl->pl_line = line; 210 pl->pl_action = action; 211 pl->pl_addr = v; 212 if (++n >= pp->pr_logsize) 213 n = 0; 214 pp->pr_curlogentry = n; 215 } 216 217 static void 218 pr_printlog(struct pool *pp, struct pool_item *pi, 219 int (*pr)(const char *, ...)) 220 { 221 int i = pp->pr_logsize; 222 int n = pp->pr_curlogentry; 223 224 if ((pp->pr_roflags & PR_LOGGING) == 0) 225 return; 226 227 /* 228 * Print all entries in this pool's log. 229 */ 230 while (i-- > 0) { 231 struct pool_log *pl = &pp->pr_log[n]; 232 if (pl->pl_action != 0) { 233 if (pi == NULL || pi == pl->pl_addr) { 234 (*pr)("\tlog entry %d:\n", i); 235 (*pr)("\t\taction = %s, addr = %p\n", 236 pl->pl_action == PRLOG_GET ? "get" : "put", 237 pl->pl_addr); 238 (*pr)("\t\tfile: %s at line %lu\n", 239 pl->pl_file, pl->pl_line); 240 } 241 } 242 if (++n >= pp->pr_logsize) 243 n = 0; 244 } 245 } 246 247 static __inline void 248 pr_enter(struct pool *pp, const char *file, long line) 249 { 250 251 if (__predict_false(pp->pr_entered_file != NULL)) { 252 printf("pool %s: reentrancy at file %s line %ld\n", 253 pp->pr_wchan, file, line); 254 printf(" previous entry at file %s line %ld\n", 255 pp->pr_entered_file, pp->pr_entered_line); 256 panic("pr_enter"); 257 } 258 259 pp->pr_entered_file = file; 260 pp->pr_entered_line = line; 261 } 262 263 static __inline void 264 pr_leave(struct pool *pp) 265 { 266 267 if (__predict_false(pp->pr_entered_file == NULL)) { 268 printf("pool %s not entered?\n", pp->pr_wchan); 269 panic("pr_leave"); 270 } 271 272 pp->pr_entered_file = NULL; 273 pp->pr_entered_line = 0; 274 } 275 276 static __inline__ void 277 pr_enter_check(struct pool *pp, int (*pr)(const char *, ...)) 278 { 279 280 if (pp->pr_entered_file != NULL) 281 (*pr)("\n\tcurrently entered from file %s line %ld\n", 282 pp->pr_entered_file, pp->pr_entered_line); 283 } 284 #else 285 #define pr_log(pp, v, action, file, line) 286 #define pr_printlog(pp, pi, pr) 287 #define pr_enter(pp, file, line) 288 #define pr_leave(pp) 289 #define pr_enter_check(pp, pr) 290 #endif /* POOL_DIAGNOSTIC */ 291 292 /* 293 * Return the pool page header based on page address. 294 */ 295 static __inline struct pool_item_header * 296 pr_find_pagehead(struct pool *pp, caddr_t page) 297 { 298 struct pool_item_header *ph; 299 300 if ((pp->pr_roflags & PR_PHINPAGE) != 0) 301 return ((struct pool_item_header *)(page + pp->pr_phoffset)); 302 303 for (ph = LIST_FIRST(&pp->pr_hashtab[PR_HASH_INDEX(pp, page)]); 304 ph != NULL; 305 ph = LIST_NEXT(ph, ph_hashlist)) { 306 if (ph->ph_page == page) 307 return (ph); 308 } 309 return (NULL); 310 } 311 312 /* 313 * Remove a page from the pool. 314 */ 315 static __inline void 316 pr_rmpage(struct pool *pp, struct pool_item_header *ph) 317 { 318 319 /* 320 * If the page was idle, decrement the idle page count. 321 */ 322 if (ph->ph_nmissing == 0) { 323 #ifdef DIAGNOSTIC 324 if (pp->pr_nidle == 0) 325 panic("pr_rmpage: nidle inconsistent"); 326 if (pp->pr_nitems < pp->pr_itemsperpage) 327 panic("pr_rmpage: nitems inconsistent"); 328 #endif 329 pp->pr_nidle--; 330 } 331 332 pp->pr_nitems -= pp->pr_itemsperpage; 333 334 /* 335 * Unlink a page from the pool and release it. 336 */ 337 TAILQ_REMOVE(&pp->pr_pagelist, ph, ph_pagelist); 338 (*pp->pr_free)(ph->ph_page, pp->pr_pagesz, pp->pr_mtype); 339 pp->pr_npages--; 340 pp->pr_npagefree++; 341 342 if ((pp->pr_roflags & PR_PHINPAGE) == 0) { 343 int s; 344 LIST_REMOVE(ph, ph_hashlist); 345 s = splhigh(); 346 pool_put(&phpool, ph); 347 splx(s); 348 } 349 350 if (pp->pr_curpage == ph) { 351 /* 352 * Find a new non-empty page header, if any. 353 * Start search from the page head, to increase the 354 * chance for "high water" pages to be freed. 355 */ 356 for (ph = TAILQ_FIRST(&pp->pr_pagelist); ph != NULL; 357 ph = TAILQ_NEXT(ph, ph_pagelist)) 358 if (TAILQ_FIRST(&ph->ph_itemlist) != NULL) 359 break; 360 361 pp->pr_curpage = ph; 362 } 363 } 364 365 /* 366 * Initialize the given pool resource structure. 367 * 368 * We export this routine to allow other kernel parts to declare 369 * static pools that must be initialized before malloc() is available. 370 */ 371 void 372 pool_init(struct pool *pp, size_t size, u_int align, u_int ioff, int flags, 373 const char *wchan, size_t pagesz, 374 void *(*alloc)(unsigned long, int, int), 375 void (*release)(void *, unsigned long, int), 376 int mtype) 377 { 378 int off, slack, i; 379 380 #ifdef POOL_DIAGNOSTIC 381 /* 382 * Always log if POOL_DIAGNOSTIC is defined. 383 */ 384 if (pool_logsize != 0) 385 flags |= PR_LOGGING; 386 #endif 387 388 /* 389 * Check arguments and construct default values. 390 */ 391 if (!powerof2(pagesz)) 392 panic("pool_init: page size invalid (%lx)\n", (u_long)pagesz); 393 394 if (alloc == NULL && release == NULL) { 395 alloc = pool_page_alloc; 396 release = pool_page_free; 397 pagesz = PAGE_SIZE; /* Rounds to PAGE_SIZE anyhow. */ 398 } else if ((alloc != NULL && release != NULL) == 0) { 399 /* If you specifiy one, must specify both. */ 400 panic("pool_init: must specify alloc and release together"); 401 } 402 403 if (pagesz == 0) 404 pagesz = PAGE_SIZE; 405 406 if (align == 0) 407 align = ALIGN(1); 408 409 if (size < sizeof(struct pool_item)) 410 size = sizeof(struct pool_item); 411 412 size = ALIGN(size); 413 if (size > pagesz) 414 panic("pool_init: pool item size (%lu) too large", 415 (u_long)size); 416 417 /* 418 * Initialize the pool structure. 419 */ 420 TAILQ_INIT(&pp->pr_pagelist); 421 TAILQ_INIT(&pp->pr_cachelist); 422 pp->pr_curpage = NULL; 423 pp->pr_npages = 0; 424 pp->pr_minitems = 0; 425 pp->pr_minpages = 0; 426 pp->pr_maxpages = UINT_MAX; 427 pp->pr_roflags = flags; 428 pp->pr_flags = 0; 429 pp->pr_size = size; 430 pp->pr_align = align; 431 pp->pr_wchan = wchan; 432 pp->pr_mtype = mtype; 433 pp->pr_alloc = alloc; 434 pp->pr_free = release; 435 pp->pr_pagesz = pagesz; 436 pp->pr_pagemask = ~(pagesz - 1); 437 pp->pr_pageshift = ffs(pagesz) - 1; 438 pp->pr_nitems = 0; 439 pp->pr_nout = 0; 440 pp->pr_hardlimit = UINT_MAX; 441 pp->pr_hardlimit_warning = NULL; 442 pp->pr_hardlimit_ratecap.tv_sec = 0; 443 pp->pr_hardlimit_ratecap.tv_usec = 0; 444 pp->pr_hardlimit_warning_last.tv_sec = 0; 445 pp->pr_hardlimit_warning_last.tv_usec = 0; 446 pp->pr_serial = ++pool_serial; 447 if (pool_serial == 0) 448 panic("pool_init: too much uptime"); 449 450 /* 451 * Decide whether to put the page header off page to avoid 452 * wasting too large a part of the page. Off-page page headers 453 * go on a hash table, so we can match a returned item 454 * with its header based on the page address. 455 * We use 1/16 of the page size as the threshold (XXX: tune) 456 */ 457 if (pp->pr_size < pagesz/16) { 458 /* Use the end of the page for the page header */ 459 pp->pr_roflags |= PR_PHINPAGE; 460 pp->pr_phoffset = off = 461 pagesz - ALIGN(sizeof(struct pool_item_header)); 462 } else { 463 /* The page header will be taken from our page header pool */ 464 pp->pr_phoffset = 0; 465 off = pagesz; 466 for (i = 0; i < PR_HASHTABSIZE; i++) { 467 LIST_INIT(&pp->pr_hashtab[i]); 468 } 469 } 470 471 /* 472 * Alignment is to take place at `ioff' within the item. This means 473 * we must reserve up to `align - 1' bytes on the page to allow 474 * appropriate positioning of each item. 475 * 476 * Silently enforce `0 <= ioff < align'. 477 */ 478 pp->pr_itemoffset = ioff = ioff % align; 479 pp->pr_itemsperpage = (off - ((align - ioff) % align)) / pp->pr_size; 480 KASSERT(pp->pr_itemsperpage != 0); 481 482 /* 483 * Use the slack between the chunks and the page header 484 * for "cache coloring". 485 */ 486 slack = off - pp->pr_itemsperpage * pp->pr_size; 487 pp->pr_maxcolor = (slack / align) * align; 488 pp->pr_curcolor = 0; 489 490 pp->pr_nget = 0; 491 pp->pr_nfail = 0; 492 pp->pr_nput = 0; 493 pp->pr_npagealloc = 0; 494 pp->pr_npagefree = 0; 495 pp->pr_hiwat = 0; 496 pp->pr_nidle = 0; 497 498 #ifdef POOL_DIAGNOSTIC 499 if (flags & PR_LOGGING) { 500 if (kmem_map == NULL || 501 (pp->pr_log = malloc(pool_logsize * sizeof(struct pool_log), 502 M_TEMP, M_NOWAIT)) == NULL) 503 pp->pr_roflags &= ~PR_LOGGING; 504 pp->pr_curlogentry = 0; 505 pp->pr_logsize = pool_logsize; 506 } 507 #endif 508 509 pp->pr_entered_file = NULL; 510 pp->pr_entered_line = 0; 511 512 simple_lock_init(&pp->pr_slock); 513 514 /* 515 * Initialize private page header pool and cache magazine pool if we 516 * haven't done so yet. 517 * XXX LOCKING. 518 */ 519 if (phpool.pr_size == 0) { 520 pool_init(&phpool, sizeof(struct pool_item_header), 0, 0, 521 0, "phpool", 0, 0, 0, 0); 522 pool_init(&pcgpool, sizeof(struct pool_cache_group), 0, 0, 523 0, "pcgpool", 0, 0, 0, 0); 524 } 525 526 /* Insert into the list of all pools. */ 527 simple_lock(&pool_head_slock); 528 TAILQ_INSERT_TAIL(&pool_head, pp, pr_poollist); 529 simple_unlock(&pool_head_slock); 530 } 531 532 /* 533 * De-commision a pool resource. 534 */ 535 void 536 pool_destroy(struct pool *pp) 537 { 538 struct pool_item_header *ph; 539 struct pool_cache *pc; 540 541 /* Destroy all caches for this pool. */ 542 while ((pc = TAILQ_FIRST(&pp->pr_cachelist)) != NULL) 543 pool_cache_destroy(pc); 544 545 #ifdef DIAGNOSTIC 546 if (pp->pr_nout != 0) { 547 pr_printlog(pp, NULL, printf); 548 panic("pool_destroy: pool busy: still out: %u\n", 549 pp->pr_nout); 550 } 551 #endif 552 553 /* Remove all pages */ 554 if ((pp->pr_roflags & PR_STATIC) == 0) 555 while ((ph = pp->pr_pagelist.tqh_first) != NULL) 556 pr_rmpage(pp, ph); 557 558 /* Remove from global pool list */ 559 simple_lock(&pool_head_slock); 560 TAILQ_REMOVE(&pool_head, pp, pr_poollist); 561 /* XXX Only clear this if we were drainpp? */ 562 drainpp = NULL; 563 simple_unlock(&pool_head_slock); 564 565 #ifdef POOL_DIAGNOSTIC 566 if ((pp->pr_roflags & PR_LOGGING) != 0) 567 free(pp->pr_log, M_TEMP); 568 #endif 569 570 if (pp->pr_roflags & PR_FREEHEADER) 571 free(pp, M_POOL); 572 } 573 574 static __inline struct pool_item_header * 575 pool_alloc_item_header(struct pool *pp, caddr_t storage, int flags) 576 { 577 struct pool_item_header *ph; 578 int s; 579 580 LOCK_ASSERT(simple_lock_held(&pp->pr_slock) == 0); 581 582 if ((pp->pr_roflags & PR_PHINPAGE) != 0) 583 ph = (struct pool_item_header *) (storage + pp->pr_phoffset); 584 else { 585 s = splhigh(); 586 ph = pool_get(&phpool, flags); 587 splx(s); 588 } 589 590 return (ph); 591 } 592 593 /* 594 * Grab an item from the pool; must be called at appropriate spl level 595 */ 596 void * 597 #ifdef POOL_DIAGNOSTIC 598 _pool_get(struct pool *pp, int flags, const char *file, long line) 599 #else 600 pool_get(struct pool *pp, int flags) 601 #endif 602 { 603 struct pool_item *pi; 604 struct pool_item_header *ph; 605 void *v; 606 607 #ifdef DIAGNOSTIC 608 if (__predict_false((pp->pr_roflags & PR_STATIC) && 609 (flags & PR_MALLOCOK))) { 610 pr_printlog(pp, NULL, printf); 611 panic("pool_get: static"); 612 } 613 614 if (__predict_false(curproc == NULL && /* doing_shutdown == 0 && XXX*/ 615 (flags & PR_WAITOK) != 0)) 616 panic("pool_get: must have NOWAIT"); 617 618 #endif 619 simple_lock(&pp->pr_slock); 620 pr_enter(pp, file, line); 621 622 startover: 623 /* 624 * Check to see if we've reached the hard limit. If we have, 625 * and we can wait, then wait until an item has been returned to 626 * the pool. 627 */ 628 #ifdef DIAGNOSTIC 629 if (__predict_false(pp->pr_nout > pp->pr_hardlimit)) { 630 pr_leave(pp); 631 simple_unlock(&pp->pr_slock); 632 panic("pool_get: %s: crossed hard limit", pp->pr_wchan); 633 } 634 #endif 635 if (__predict_false(pp->pr_nout == pp->pr_hardlimit)) { 636 if ((flags & PR_WAITOK) && !(flags & PR_LIMITFAIL)) { 637 /* 638 * XXX: A warning isn't logged in this case. Should 639 * it be? 640 */ 641 pp->pr_flags |= PR_WANTED; 642 pr_leave(pp); 643 simple_unlock(&pp->pr_slock); 644 tsleep((caddr_t)pp, PSWP, (char *)pp->pr_wchan, 0); 645 simple_lock(&pp->pr_slock); 646 pr_enter(pp, file, line); 647 goto startover; 648 } 649 650 /* 651 * Log a message that the hard limit has been hit. 652 */ 653 if (pp->pr_hardlimit_warning != NULL && 654 ratecheck(&pp->pr_hardlimit_warning_last, 655 &pp->pr_hardlimit_ratecap)) 656 log(LOG_ERR, "%s\n", pp->pr_hardlimit_warning); 657 658 if (flags & PR_URGENT) 659 panic("pool_get: urgent"); 660 661 pp->pr_nfail++; 662 663 pr_leave(pp); 664 simple_unlock(&pp->pr_slock); 665 return (NULL); 666 } 667 668 /* 669 * The convention we use is that if `curpage' is not NULL, then 670 * it points at a non-empty bucket. In particular, `curpage' 671 * never points at a page header which has PR_PHINPAGE set and 672 * has no items in its bucket. 673 */ 674 if ((ph = pp->pr_curpage) == NULL) { 675 #ifdef DIAGNOSTIC 676 if (pp->pr_nitems != 0) { 677 simple_unlock(&pp->pr_slock); 678 printf("pool_get: %s: curpage NULL, nitems %u\n", 679 pp->pr_wchan, pp->pr_nitems); 680 panic("pool_get: nitems inconsistent\n"); 681 } 682 #endif 683 684 /* 685 * Call the back-end page allocator for more memory. 686 * Release the pool lock, as the back-end page allocator 687 * may block. 688 */ 689 pr_leave(pp); 690 simple_unlock(&pp->pr_slock); 691 v = (*pp->pr_alloc)(pp->pr_pagesz, flags, pp->pr_mtype); 692 if (__predict_true(v != NULL)) 693 ph = pool_alloc_item_header(pp, v, flags); 694 simple_lock(&pp->pr_slock); 695 pr_enter(pp, file, line); 696 697 if (__predict_false(v == NULL || ph == NULL)) { 698 if (v != NULL) 699 (*pp->pr_free)(v, pp->pr_pagesz, pp->pr_mtype); 700 701 /* 702 * We were unable to allocate a page or item 703 * header, but we released the lock during 704 * allocation, so perhaps items were freed 705 * back to the pool. Check for this case. 706 */ 707 if (pp->pr_curpage != NULL) 708 goto startover; 709 710 if (flags & PR_URGENT) 711 panic("pool_get: urgent"); 712 713 if ((flags & PR_WAITOK) == 0) { 714 pp->pr_nfail++; 715 pr_leave(pp); 716 simple_unlock(&pp->pr_slock); 717 return (NULL); 718 } 719 720 /* 721 * Wait for items to be returned to this pool. 722 * 723 * XXX: we actually want to wait just until 724 * the page allocator has memory again. Depending 725 * on this pool's usage, we might get stuck here 726 * for a long time. 727 * 728 * XXX: maybe we should wake up once a second and 729 * try again? 730 */ 731 pp->pr_flags |= PR_WANTED; 732 pr_leave(pp); 733 simple_unlock(&pp->pr_slock); 734 tsleep((caddr_t)pp, PSWP, (char *)pp->pr_wchan, 0); 735 simple_lock(&pp->pr_slock); 736 pr_enter(pp, file, line); 737 goto startover; 738 } 739 740 /* We have more memory; add it to the pool */ 741 pp->pr_npagealloc++; 742 pool_prime_page(pp, v, ph); 743 744 /* Start the allocation process over. */ 745 goto startover; 746 } 747 748 if (__predict_false((v = pi = TAILQ_FIRST(&ph->ph_itemlist)) == NULL)) { 749 pr_leave(pp); 750 simple_unlock(&pp->pr_slock); 751 panic("pool_get: %s: page empty", pp->pr_wchan); 752 } 753 #ifdef DIAGNOSTIC 754 if (__predict_false(pp->pr_nitems == 0)) { 755 pr_leave(pp); 756 simple_unlock(&pp->pr_slock); 757 printf("pool_get: %s: items on itemlist, nitems %u\n", 758 pp->pr_wchan, pp->pr_nitems); 759 panic("pool_get: nitems inconsistent\n"); 760 } 761 762 pr_log(pp, v, PRLOG_GET, file, line); 763 764 if (__predict_false(pi->pi_magic != PI_MAGIC)) { 765 pr_printlog(pp, pi, printf); 766 panic("pool_get(%s): free list modified: magic=%x; page %p;" 767 " item addr %p\n", 768 pp->pr_wchan, pi->pi_magic, ph->ph_page, pi); 769 } 770 #endif 771 772 /* 773 * Remove from item list. 774 */ 775 TAILQ_REMOVE(&ph->ph_itemlist, pi, pi_list); 776 pp->pr_nitems--; 777 pp->pr_nout++; 778 if (ph->ph_nmissing == 0) { 779 #ifdef DIAGNOSTIC 780 if (__predict_false(pp->pr_nidle == 0)) 781 panic("pool_get: nidle inconsistent"); 782 #endif 783 pp->pr_nidle--; 784 } 785 ph->ph_nmissing++; 786 if (TAILQ_FIRST(&ph->ph_itemlist) == NULL) { 787 #ifdef DIAGNOSTIC 788 if (__predict_false(ph->ph_nmissing != pp->pr_itemsperpage)) { 789 pr_leave(pp); 790 simple_unlock(&pp->pr_slock); 791 panic("pool_get: %s: nmissing inconsistent", 792 pp->pr_wchan); 793 } 794 #endif 795 /* 796 * Find a new non-empty page header, if any. 797 * Start search from the page head, to increase 798 * the chance for "high water" pages to be freed. 799 * 800 * Migrate empty pages to the end of the list. This 801 * will speed the update of curpage as pages become 802 * idle. Empty pages intermingled with idle pages 803 * is no big deal. As soon as a page becomes un-empty, 804 * it will move back to the head of the list. 805 */ 806 TAILQ_REMOVE(&pp->pr_pagelist, ph, ph_pagelist); 807 TAILQ_INSERT_TAIL(&pp->pr_pagelist, ph, ph_pagelist); 808 for (ph = TAILQ_FIRST(&pp->pr_pagelist); ph != NULL; 809 ph = TAILQ_NEXT(ph, ph_pagelist)) 810 if (TAILQ_FIRST(&ph->ph_itemlist) != NULL) 811 break; 812 813 pp->pr_curpage = ph; 814 } 815 816 pp->pr_nget++; 817 818 /* 819 * If we have a low water mark and we are now below that low 820 * water mark, add more items to the pool. 821 */ 822 if (POOL_NEEDS_CATCHUP(pp) && pool_catchup(pp) != 0) { 823 /* 824 * XXX: Should we log a warning? Should we set up a timeout 825 * to try again in a second or so? The latter could break 826 * a caller's assumptions about interrupt protection, etc. 827 */ 828 } 829 830 pr_leave(pp); 831 simple_unlock(&pp->pr_slock); 832 return (v); 833 } 834 835 /* 836 * Internal version of pool_put(). Pool is already locked/entered. 837 */ 838 static void 839 pool_do_put(struct pool *pp, void *v) 840 { 841 struct pool_item *pi = v; 842 struct pool_item_header *ph; 843 caddr_t page; 844 int s; 845 846 page = (caddr_t)((u_long)v & pp->pr_pagemask); 847 848 #ifdef DIAGNOSTIC 849 if (__predict_false(pp->pr_nout == 0)) { 850 printf("pool %s: putting with none out\n", 851 pp->pr_wchan); 852 panic("pool_put"); 853 } 854 #endif 855 856 if (__predict_false((ph = pr_find_pagehead(pp, page)) == NULL)) { 857 pr_printlog(pp, NULL, printf); 858 panic("pool_put: %s: page header missing", pp->pr_wchan); 859 } 860 861 #ifdef LOCKDEBUG 862 /* 863 * Check if we're freeing a locked simple lock. 864 */ 865 simple_lock_freecheck((caddr_t)pi, ((caddr_t)pi) + pp->pr_size); 866 #endif 867 868 /* 869 * Return to item list. 870 */ 871 #ifdef DIAGNOSTIC 872 pi->pi_magic = PI_MAGIC; 873 #endif 874 #ifdef DEBUG 875 { 876 int i, *ip = v; 877 878 for (i = 0; i < pp->pr_size / sizeof(int); i++) { 879 *ip++ = PI_MAGIC; 880 } 881 } 882 #endif 883 884 TAILQ_INSERT_HEAD(&ph->ph_itemlist, pi, pi_list); 885 ph->ph_nmissing--; 886 pp->pr_nput++; 887 pp->pr_nitems++; 888 pp->pr_nout--; 889 890 /* Cancel "pool empty" condition if it exists */ 891 if (pp->pr_curpage == NULL) 892 pp->pr_curpage = ph; 893 894 if (pp->pr_flags & PR_WANTED) { 895 pp->pr_flags &= ~PR_WANTED; 896 if (ph->ph_nmissing == 0) 897 pp->pr_nidle++; 898 wakeup((caddr_t)pp); 899 return; 900 } 901 902 /* 903 * If this page is now complete, do one of two things: 904 * 905 * (1) If we have more pages than the page high water 906 * mark, free the page back to the system. 907 * 908 * (2) Move it to the end of the page list, so that 909 * we minimize our chances of fragmenting the 910 * pool. Idle pages migrate to the end (along with 911 * completely empty pages, so that we find un-empty 912 * pages more quickly when we update curpage) of the 913 * list so they can be more easily swept up by 914 * the pagedaemon when pages are scarce. 915 */ 916 if (ph->ph_nmissing == 0) { 917 pp->pr_nidle++; 918 if (pp->pr_npages > pp->pr_maxpages) { 919 pr_rmpage(pp, ph); 920 } else { 921 TAILQ_REMOVE(&pp->pr_pagelist, ph, ph_pagelist); 922 TAILQ_INSERT_TAIL(&pp->pr_pagelist, ph, ph_pagelist); 923 924 /* 925 * Update the timestamp on the page. A page must 926 * be idle for some period of time before it can 927 * be reclaimed by the pagedaemon. This minimizes 928 * ping-pong'ing for memory. 929 */ 930 s = splclock(); 931 ph->ph_time = mono_time; 932 splx(s); 933 934 /* 935 * Update the current page pointer. Just look for 936 * the first page with any free items. 937 * 938 * XXX: Maybe we want an option to look for the 939 * page with the fewest available items, to minimize 940 * fragmentation? 941 */ 942 for (ph = TAILQ_FIRST(&pp->pr_pagelist); ph != NULL; 943 ph = TAILQ_NEXT(ph, ph_pagelist)) 944 if (TAILQ_FIRST(&ph->ph_itemlist) != NULL) 945 break; 946 947 pp->pr_curpage = ph; 948 } 949 } 950 /* 951 * If the page has just become un-empty, move it to the head of 952 * the list, and make it the current page. The next allocation 953 * will get the item from this page, instead of further fragmenting 954 * the pool. 955 */ 956 else if (ph->ph_nmissing == (pp->pr_itemsperpage - 1)) { 957 TAILQ_REMOVE(&pp->pr_pagelist, ph, ph_pagelist); 958 TAILQ_INSERT_HEAD(&pp->pr_pagelist, ph, ph_pagelist); 959 pp->pr_curpage = ph; 960 } 961 } 962 963 /* 964 * Return resource to the pool; must be called at appropriate spl level 965 */ 966 #ifdef POOL_DIAGNOSTIC 967 void 968 _pool_put(struct pool *pp, void *v, const char *file, long line) 969 { 970 971 simple_lock(&pp->pr_slock); 972 pr_enter(pp, file, line); 973 974 pr_log(pp, v, PRLOG_PUT, file, line); 975 976 pool_do_put(pp, v); 977 978 pr_leave(pp); 979 simple_unlock(&pp->pr_slock); 980 } 981 #undef pool_put 982 #endif /* POOL_DIAGNOSTIC */ 983 984 void 985 pool_put(struct pool *pp, void *v) 986 { 987 988 simple_lock(&pp->pr_slock); 989 990 pool_do_put(pp, v); 991 992 simple_unlock(&pp->pr_slock); 993 } 994 995 #ifdef POOL_DIAGNOSTIC 996 #define pool_put(h, v) _pool_put((h), (v), __FILE__, __LINE__) 997 #endif 998 999 /* 1000 * Add N items to the pool. 1001 */ 1002 int 1003 pool_prime(struct pool *pp, int n) 1004 { 1005 struct pool_item_header *ph; 1006 caddr_t cp; 1007 int newpages, error = 0; 1008 1009 simple_lock(&pp->pr_slock); 1010 1011 newpages = roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 1012 1013 while (newpages-- > 0) { 1014 simple_unlock(&pp->pr_slock); 1015 cp = (*pp->pr_alloc)(pp->pr_pagesz, PR_NOWAIT, pp->pr_mtype); 1016 if (__predict_true(cp != NULL)) 1017 ph = pool_alloc_item_header(pp, cp, PR_NOWAIT); 1018 simple_lock(&pp->pr_slock); 1019 1020 if (__predict_false(cp == NULL || ph == NULL)) { 1021 error = ENOMEM; 1022 if (cp != NULL) 1023 (*pp->pr_free)(cp, pp->pr_pagesz, pp->pr_mtype); 1024 break; 1025 } 1026 1027 pool_prime_page(pp, cp, ph); 1028 pp->pr_npagealloc++; 1029 pp->pr_minpages++; 1030 } 1031 1032 if (pp->pr_minpages >= pp->pr_maxpages) 1033 pp->pr_maxpages = pp->pr_minpages + 1; /* XXX */ 1034 1035 simple_unlock(&pp->pr_slock); 1036 return (0); 1037 } 1038 1039 /* 1040 * Add a page worth of items to the pool. 1041 * 1042 * Note, we must be called with the pool descriptor LOCKED. 1043 */ 1044 static void 1045 pool_prime_page(struct pool *pp, caddr_t storage, struct pool_item_header *ph) 1046 { 1047 struct pool_item *pi; 1048 caddr_t cp = storage; 1049 unsigned int align = pp->pr_align; 1050 unsigned int ioff = pp->pr_itemoffset; 1051 int n; 1052 1053 if (((u_long)cp & (pp->pr_pagesz - 1)) != 0) 1054 panic("pool_prime_page: %s: unaligned page", pp->pr_wchan); 1055 1056 if ((pp->pr_roflags & PR_PHINPAGE) == 0) 1057 LIST_INSERT_HEAD(&pp->pr_hashtab[PR_HASH_INDEX(pp, cp)], 1058 ph, ph_hashlist); 1059 1060 /* 1061 * Insert page header. 1062 */ 1063 TAILQ_INSERT_HEAD(&pp->pr_pagelist, ph, ph_pagelist); 1064 TAILQ_INIT(&ph->ph_itemlist); 1065 ph->ph_page = storage; 1066 ph->ph_nmissing = 0; 1067 memset(&ph->ph_time, 0, sizeof(ph->ph_time)); 1068 1069 pp->pr_nidle++; 1070 1071 /* 1072 * Color this page. 1073 */ 1074 cp = (caddr_t)(cp + pp->pr_curcolor); 1075 if ((pp->pr_curcolor += align) > pp->pr_maxcolor) 1076 pp->pr_curcolor = 0; 1077 1078 /* 1079 * Adjust storage to apply aligment to `pr_itemoffset' in each item. 1080 */ 1081 if (ioff != 0) 1082 cp = (caddr_t)(cp + (align - ioff)); 1083 1084 /* 1085 * Insert remaining chunks on the bucket list. 1086 */ 1087 n = pp->pr_itemsperpage; 1088 pp->pr_nitems += n; 1089 1090 while (n--) { 1091 pi = (struct pool_item *)cp; 1092 1093 /* Insert on page list */ 1094 TAILQ_INSERT_TAIL(&ph->ph_itemlist, pi, pi_list); 1095 #ifdef DIAGNOSTIC 1096 pi->pi_magic = PI_MAGIC; 1097 #endif 1098 cp = (caddr_t)(cp + pp->pr_size); 1099 } 1100 1101 /* 1102 * If the pool was depleted, point at the new page. 1103 */ 1104 if (pp->pr_curpage == NULL) 1105 pp->pr_curpage = ph; 1106 1107 if (++pp->pr_npages > pp->pr_hiwat) 1108 pp->pr_hiwat = pp->pr_npages; 1109 } 1110 1111 /* 1112 * Used by pool_get() when nitems drops below the low water mark. This 1113 * is used to catch up nitmes with the low water mark. 1114 * 1115 * Note 1, we never wait for memory here, we let the caller decide what to do. 1116 * 1117 * Note 2, this doesn't work with static pools. 1118 * 1119 * Note 3, we must be called with the pool already locked, and we return 1120 * with it locked. 1121 */ 1122 static int 1123 pool_catchup(struct pool *pp) 1124 { 1125 struct pool_item_header *ph; 1126 caddr_t cp; 1127 int error = 0; 1128 1129 if (pp->pr_roflags & PR_STATIC) { 1130 /* 1131 * We dropped below the low water mark, and this is not a 1132 * good thing. Log a warning. 1133 * 1134 * XXX: rate-limit this? 1135 */ 1136 printf("WARNING: static pool `%s' dropped below low water " 1137 "mark\n", pp->pr_wchan); 1138 return (0); 1139 } 1140 1141 while (POOL_NEEDS_CATCHUP(pp)) { 1142 /* 1143 * Call the page back-end allocator for more memory. 1144 * 1145 * XXX: We never wait, so should we bother unlocking 1146 * the pool descriptor? 1147 */ 1148 simple_unlock(&pp->pr_slock); 1149 cp = (*pp->pr_alloc)(pp->pr_pagesz, PR_NOWAIT, pp->pr_mtype); 1150 if (__predict_true(cp != NULL)) 1151 ph = pool_alloc_item_header(pp, cp, PR_NOWAIT); 1152 simple_lock(&pp->pr_slock); 1153 if (__predict_false(cp == NULL || ph == NULL)) { 1154 if (cp != NULL) 1155 (*pp->pr_free)(cp, pp->pr_pagesz, pp->pr_mtype); 1156 error = ENOMEM; 1157 break; 1158 } 1159 pool_prime_page(pp, cp, ph); 1160 pp->pr_npagealloc++; 1161 } 1162 1163 return (error); 1164 } 1165 1166 void 1167 pool_setlowat(struct pool *pp, int n) 1168 { 1169 int error; 1170 1171 simple_lock(&pp->pr_slock); 1172 1173 pp->pr_minitems = n; 1174 pp->pr_minpages = (n == 0) 1175 ? 0 1176 : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 1177 1178 /* Make sure we're caught up with the newly-set low water mark. */ 1179 if (POOL_NEEDS_CATCHUP(pp) && (error = pool_catchup(pp) != 0)) { 1180 /* 1181 * XXX: Should we log a warning? Should we set up a timeout 1182 * to try again in a second or so? The latter could break 1183 * a caller's assumptions about interrupt protection, etc. 1184 */ 1185 } 1186 1187 simple_unlock(&pp->pr_slock); 1188 } 1189 1190 void 1191 pool_sethiwat(struct pool *pp, int n) 1192 { 1193 1194 simple_lock(&pp->pr_slock); 1195 1196 pp->pr_maxpages = (n == 0) 1197 ? 0 1198 : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 1199 1200 simple_unlock(&pp->pr_slock); 1201 } 1202 1203 void 1204 pool_sethardlimit(struct pool *pp, int n, const char *warnmess, int ratecap) 1205 { 1206 1207 simple_lock(&pp->pr_slock); 1208 1209 pp->pr_hardlimit = n; 1210 pp->pr_hardlimit_warning = warnmess; 1211 pp->pr_hardlimit_ratecap.tv_sec = ratecap; 1212 pp->pr_hardlimit_warning_last.tv_sec = 0; 1213 pp->pr_hardlimit_warning_last.tv_usec = 0; 1214 1215 /* 1216 * In-line version of pool_sethiwat(), because we don't want to 1217 * release the lock. 1218 */ 1219 pp->pr_maxpages = (n == 0) 1220 ? 0 1221 : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 1222 1223 simple_unlock(&pp->pr_slock); 1224 } 1225 1226 /* 1227 * Default page allocator. 1228 */ 1229 static void * 1230 pool_page_alloc(unsigned long sz, int flags, int mtype) 1231 { 1232 boolean_t waitok = (flags & PR_WAITOK) ? TRUE : FALSE; 1233 1234 return ((void *)uvm_km_alloc_poolpage(waitok)); 1235 } 1236 1237 static void 1238 pool_page_free(void *v, unsigned long sz, int mtype) 1239 { 1240 uvm_km_free_poolpage((vaddr_t)v); 1241 } 1242 1243 /* 1244 * Alternate pool page allocator for pools that know they will 1245 * never be accessed in interrupt context. 1246 */ 1247 void * 1248 pool_page_alloc_nointr(unsigned long sz, int flags, int mtype) 1249 { 1250 boolean_t waitok = (flags & PR_WAITOK) ? TRUE : FALSE; 1251 1252 return ((void *)uvm_km_alloc_poolpage1(kernel_map, uvm.kernel_object, 1253 waitok)); 1254 } 1255 1256 void 1257 pool_page_free_nointr(void *v, unsigned long sz, int mtype) 1258 { 1259 1260 uvm_km_free_poolpage1(kernel_map, (vaddr_t)v); 1261 } 1262 1263 1264 /* 1265 * Release all complete pages that have not been used recently. 1266 */ 1267 void 1268 #ifdef POOL_DIAGNOSTIC 1269 _pool_reclaim(struct pool *pp, const char *file, long line) 1270 #else 1271 pool_reclaim(struct pool *pp) 1272 #endif 1273 { 1274 struct pool_item_header *ph, *phnext; 1275 struct pool_cache *pc; 1276 struct timeval curtime; 1277 int s; 1278 1279 if (pp->pr_roflags & PR_STATIC) 1280 return; 1281 1282 if (simple_lock_try(&pp->pr_slock) == 0) 1283 return; 1284 pr_enter(pp, file, line); 1285 1286 /* 1287 * Reclaim items from the pool's caches. 1288 */ 1289 for (pc = TAILQ_FIRST(&pp->pr_cachelist); pc != NULL; 1290 pc = TAILQ_NEXT(pc, pc_poollist)) 1291 pool_cache_reclaim(pc); 1292 1293 s = splclock(); 1294 curtime = mono_time; 1295 splx(s); 1296 1297 for (ph = TAILQ_FIRST(&pp->pr_pagelist); ph != NULL; ph = phnext) { 1298 phnext = TAILQ_NEXT(ph, ph_pagelist); 1299 1300 /* Check our minimum page claim */ 1301 if (pp->pr_npages <= pp->pr_minpages) 1302 break; 1303 1304 if (ph->ph_nmissing == 0) { 1305 struct timeval diff; 1306 timersub(&curtime, &ph->ph_time, &diff); 1307 if (diff.tv_sec < pool_inactive_time) 1308 continue; 1309 1310 /* 1311 * If freeing this page would put us below 1312 * the low water mark, stop now. 1313 */ 1314 if ((pp->pr_nitems - pp->pr_itemsperpage) < 1315 pp->pr_minitems) 1316 break; 1317 1318 pr_rmpage(pp, ph); 1319 } 1320 } 1321 1322 pr_leave(pp); 1323 simple_unlock(&pp->pr_slock); 1324 } 1325 1326 1327 /* 1328 * Drain pools, one at a time. 1329 * 1330 * Note, we must never be called from an interrupt context. 1331 */ 1332 void 1333 pool_drain(void *arg) 1334 { 1335 struct pool *pp; 1336 int s; 1337 1338 s = splvm(); 1339 simple_lock(&pool_head_slock); 1340 1341 if (drainpp == NULL && (drainpp = TAILQ_FIRST(&pool_head)) == NULL) 1342 goto out; 1343 1344 pp = drainpp; 1345 drainpp = TAILQ_NEXT(pp, pr_poollist); 1346 1347 pool_reclaim(pp); 1348 1349 out: 1350 simple_unlock(&pool_head_slock); 1351 splx(s); 1352 } 1353 1354 1355 /* 1356 * Diagnostic helpers. 1357 */ 1358 void 1359 pool_printit(struct pool *pp, const char *modif, int (*pr)(const char *, ...)) 1360 { 1361 int s; 1362 1363 s = splvm(); 1364 if (simple_lock_try(&pp->pr_slock) == 0) { 1365 printf("pool %s is locked; try again later\n", 1366 pp->pr_wchan); 1367 splx(s); 1368 return; 1369 } 1370 pool_print1(pp, modif, printf); 1371 simple_unlock(&pp->pr_slock); 1372 splx(s); 1373 } 1374 1375 static void 1376 pool_print1(struct pool *pp, const char *modif, int (*pr)(const char *, ...)) 1377 { 1378 struct pool_item_header *ph; 1379 struct pool_cache *pc; 1380 struct pool_cache_group *pcg; 1381 #ifdef DIAGNOSTIC 1382 struct pool_item *pi; 1383 #endif 1384 int i, print_log = 0, print_pagelist = 0, print_cache = 0; 1385 char c; 1386 1387 while ((c = *modif++) != '\0') { 1388 if (c == 'l') 1389 print_log = 1; 1390 if (c == 'p') 1391 print_pagelist = 1; 1392 if (c == 'c') 1393 print_cache = 1; 1394 modif++; 1395 } 1396 1397 (*pr)("POOL %s: size %u, align %u, ioff %u, roflags 0x%08x\n", 1398 pp->pr_wchan, pp->pr_size, pp->pr_align, pp->pr_itemoffset, 1399 pp->pr_roflags); 1400 (*pr)("\tpagesz %u, mtype %d\n", pp->pr_pagesz, pp->pr_mtype); 1401 (*pr)("\talloc %p, release %p\n", pp->pr_alloc, pp->pr_free); 1402 (*pr)("\tminitems %u, minpages %u, maxpages %u, npages %u\n", 1403 pp->pr_minitems, pp->pr_minpages, pp->pr_maxpages, pp->pr_npages); 1404 (*pr)("\titemsperpage %u, nitems %u, nout %u, hardlimit %u\n", 1405 pp->pr_itemsperpage, pp->pr_nitems, pp->pr_nout, pp->pr_hardlimit); 1406 1407 (*pr)("\n\tnget %lu, nfail %lu, nput %lu\n", 1408 pp->pr_nget, pp->pr_nfail, pp->pr_nput); 1409 (*pr)("\tnpagealloc %lu, npagefree %lu, hiwat %u, nidle %lu\n", 1410 pp->pr_npagealloc, pp->pr_npagefree, pp->pr_hiwat, pp->pr_nidle); 1411 1412 if (print_pagelist == 0) 1413 goto skip_pagelist; 1414 1415 if ((ph = TAILQ_FIRST(&pp->pr_pagelist)) != NULL) 1416 (*pr)("\n\tpage list:\n"); 1417 for (; ph != NULL; ph = TAILQ_NEXT(ph, ph_pagelist)) { 1418 (*pr)("\t\tpage %p, nmissing %d, time %lu,%lu\n", 1419 ph->ph_page, ph->ph_nmissing, 1420 (u_long)ph->ph_time.tv_sec, 1421 (u_long)ph->ph_time.tv_usec); 1422 #ifdef DIAGNOSTIC 1423 for (pi = TAILQ_FIRST(&ph->ph_itemlist); pi != NULL; 1424 pi = TAILQ_NEXT(pi, pi_list)) { 1425 if (pi->pi_magic != PI_MAGIC) { 1426 (*pr)("\t\t\titem %p, magic 0x%x\n", 1427 pi, pi->pi_magic); 1428 } 1429 } 1430 #endif 1431 } 1432 if (pp->pr_curpage == NULL) 1433 (*pr)("\tno current page\n"); 1434 else 1435 (*pr)("\tcurpage %p\n", pp->pr_curpage->ph_page); 1436 1437 skip_pagelist: 1438 1439 if (print_log == 0) 1440 goto skip_log; 1441 1442 (*pr)("\n"); 1443 if ((pp->pr_roflags & PR_LOGGING) == 0) 1444 (*pr)("\tno log\n"); 1445 else 1446 pr_printlog(pp, NULL, pr); 1447 1448 skip_log: 1449 1450 if (print_cache == 0) 1451 goto skip_cache; 1452 1453 for (pc = TAILQ_FIRST(&pp->pr_cachelist); pc != NULL; 1454 pc = TAILQ_NEXT(pc, pc_poollist)) { 1455 (*pr)("\tcache %p: allocfrom %p freeto %p\n", pc, 1456 pc->pc_allocfrom, pc->pc_freeto); 1457 (*pr)("\t hits %lu misses %lu ngroups %lu nitems %lu\n", 1458 pc->pc_hits, pc->pc_misses, pc->pc_ngroups, pc->pc_nitems); 1459 for (pcg = TAILQ_FIRST(&pc->pc_grouplist); pcg != NULL; 1460 pcg = TAILQ_NEXT(pcg, pcg_list)) { 1461 (*pr)("\t\tgroup %p: avail %d\n", pcg, pcg->pcg_avail); 1462 for (i = 0; i < PCG_NOBJECTS; i++) 1463 (*pr)("\t\t\t%p\n", pcg->pcg_objects[i]); 1464 } 1465 } 1466 1467 skip_cache: 1468 1469 pr_enter_check(pp, pr); 1470 } 1471 1472 int 1473 pool_chk(struct pool *pp, const char *label) 1474 { 1475 struct pool_item_header *ph; 1476 int r = 0; 1477 1478 simple_lock(&pp->pr_slock); 1479 1480 for (ph = TAILQ_FIRST(&pp->pr_pagelist); ph != NULL; 1481 ph = TAILQ_NEXT(ph, ph_pagelist)) { 1482 1483 struct pool_item *pi; 1484 int n; 1485 caddr_t page; 1486 1487 page = (caddr_t)((u_long)ph & pp->pr_pagemask); 1488 if (page != ph->ph_page && 1489 (pp->pr_roflags & PR_PHINPAGE) != 0) { 1490 if (label != NULL) 1491 printf("%s: ", label); 1492 printf("pool(%p:%s): page inconsistency: page %p;" 1493 " at page head addr %p (p %p)\n", pp, 1494 pp->pr_wchan, ph->ph_page, 1495 ph, page); 1496 r++; 1497 goto out; 1498 } 1499 1500 for (pi = TAILQ_FIRST(&ph->ph_itemlist), n = 0; 1501 pi != NULL; 1502 pi = TAILQ_NEXT(pi,pi_list), n++) { 1503 1504 #ifdef DIAGNOSTIC 1505 if (pi->pi_magic != PI_MAGIC) { 1506 if (label != NULL) 1507 printf("%s: ", label); 1508 printf("pool(%s): free list modified: magic=%x;" 1509 " page %p; item ordinal %d;" 1510 " addr %p (p %p)\n", 1511 pp->pr_wchan, pi->pi_magic, ph->ph_page, 1512 n, pi, page); 1513 panic("pool"); 1514 } 1515 #endif 1516 page = (caddr_t)((u_long)pi & pp->pr_pagemask); 1517 if (page == ph->ph_page) 1518 continue; 1519 1520 if (label != NULL) 1521 printf("%s: ", label); 1522 printf("pool(%p:%s): page inconsistency: page %p;" 1523 " item ordinal %d; addr %p (p %p)\n", pp, 1524 pp->pr_wchan, ph->ph_page, 1525 n, pi, page); 1526 r++; 1527 goto out; 1528 } 1529 } 1530 out: 1531 simple_unlock(&pp->pr_slock); 1532 return (r); 1533 } 1534 1535 /* 1536 * pool_cache_init: 1537 * 1538 * Initialize a pool cache. 1539 * 1540 * NOTE: If the pool must be protected from interrupts, we expect 1541 * to be called at the appropriate interrupt priority level. 1542 */ 1543 void 1544 pool_cache_init(struct pool_cache *pc, struct pool *pp, 1545 int (*ctor)(void *, void *, int), 1546 void (*dtor)(void *, void *), 1547 void *arg) 1548 { 1549 1550 TAILQ_INIT(&pc->pc_grouplist); 1551 simple_lock_init(&pc->pc_slock); 1552 1553 pc->pc_allocfrom = NULL; 1554 pc->pc_freeto = NULL; 1555 pc->pc_pool = pp; 1556 1557 pc->pc_ctor = ctor; 1558 pc->pc_dtor = dtor; 1559 pc->pc_arg = arg; 1560 1561 pc->pc_hits = 0; 1562 pc->pc_misses = 0; 1563 1564 pc->pc_ngroups = 0; 1565 1566 pc->pc_nitems = 0; 1567 1568 simple_lock(&pp->pr_slock); 1569 TAILQ_INSERT_TAIL(&pp->pr_cachelist, pc, pc_poollist); 1570 simple_unlock(&pp->pr_slock); 1571 } 1572 1573 /* 1574 * pool_cache_destroy: 1575 * 1576 * Destroy a pool cache. 1577 */ 1578 void 1579 pool_cache_destroy(struct pool_cache *pc) 1580 { 1581 struct pool *pp = pc->pc_pool; 1582 1583 /* First, invalidate the entire cache. */ 1584 pool_cache_invalidate(pc); 1585 1586 /* ...and remove it from the pool's cache list. */ 1587 simple_lock(&pp->pr_slock); 1588 TAILQ_REMOVE(&pp->pr_cachelist, pc, pc_poollist); 1589 simple_unlock(&pp->pr_slock); 1590 } 1591 1592 static __inline void * 1593 pcg_get(struct pool_cache_group *pcg) 1594 { 1595 void *object; 1596 u_int idx; 1597 1598 KASSERT(pcg->pcg_avail <= PCG_NOBJECTS); 1599 KASSERT(pcg->pcg_avail != 0); 1600 idx = --pcg->pcg_avail; 1601 1602 KASSERT(pcg->pcg_objects[idx] != NULL); 1603 object = pcg->pcg_objects[idx]; 1604 pcg->pcg_objects[idx] = NULL; 1605 1606 return (object); 1607 } 1608 1609 static __inline void 1610 pcg_put(struct pool_cache_group *pcg, void *object) 1611 { 1612 u_int idx; 1613 1614 KASSERT(pcg->pcg_avail < PCG_NOBJECTS); 1615 idx = pcg->pcg_avail++; 1616 1617 KASSERT(pcg->pcg_objects[idx] == NULL); 1618 pcg->pcg_objects[idx] = object; 1619 } 1620 1621 /* 1622 * pool_cache_get: 1623 * 1624 * Get an object from a pool cache. 1625 */ 1626 void * 1627 pool_cache_get(struct pool_cache *pc, int flags) 1628 { 1629 struct pool_cache_group *pcg; 1630 void *object; 1631 1632 #ifdef LOCKDEBUG 1633 if (flags & PR_WAITOK) 1634 simple_lock_only_held(NULL, "pool_cache_get(PR_WAITOK)"); 1635 #endif 1636 1637 simple_lock(&pc->pc_slock); 1638 1639 if ((pcg = pc->pc_allocfrom) == NULL) { 1640 for (pcg = TAILQ_FIRST(&pc->pc_grouplist); pcg != NULL; 1641 pcg = TAILQ_NEXT(pcg, pcg_list)) { 1642 if (pcg->pcg_avail != 0) { 1643 pc->pc_allocfrom = pcg; 1644 goto have_group; 1645 } 1646 } 1647 1648 /* 1649 * No groups with any available objects. Allocate 1650 * a new object, construct it, and return it to 1651 * the caller. We will allocate a group, if necessary, 1652 * when the object is freed back to the cache. 1653 */ 1654 pc->pc_misses++; 1655 simple_unlock(&pc->pc_slock); 1656 object = pool_get(pc->pc_pool, flags); 1657 if (object != NULL && pc->pc_ctor != NULL) { 1658 if ((*pc->pc_ctor)(pc->pc_arg, object, flags) != 0) { 1659 pool_put(pc->pc_pool, object); 1660 return (NULL); 1661 } 1662 } 1663 return (object); 1664 } 1665 1666 have_group: 1667 pc->pc_hits++; 1668 pc->pc_nitems--; 1669 object = pcg_get(pcg); 1670 1671 if (pcg->pcg_avail == 0) 1672 pc->pc_allocfrom = NULL; 1673 1674 simple_unlock(&pc->pc_slock); 1675 1676 return (object); 1677 } 1678 1679 /* 1680 * pool_cache_put: 1681 * 1682 * Put an object back to the pool cache. 1683 */ 1684 void 1685 pool_cache_put(struct pool_cache *pc, void *object) 1686 { 1687 struct pool_cache_group *pcg; 1688 1689 simple_lock(&pc->pc_slock); 1690 1691 if ((pcg = pc->pc_freeto) == NULL) { 1692 for (pcg = TAILQ_FIRST(&pc->pc_grouplist); pcg != NULL; 1693 pcg = TAILQ_NEXT(pcg, pcg_list)) { 1694 if (pcg->pcg_avail != PCG_NOBJECTS) { 1695 pc->pc_freeto = pcg; 1696 goto have_group; 1697 } 1698 } 1699 1700 /* 1701 * No empty groups to free the object to. Attempt to 1702 * allocate one. 1703 */ 1704 simple_unlock(&pc->pc_slock); 1705 pcg = pool_get(&pcgpool, PR_NOWAIT); 1706 if (pcg != NULL) { 1707 memset(pcg, 0, sizeof(*pcg)); 1708 simple_lock(&pc->pc_slock); 1709 pc->pc_ngroups++; 1710 TAILQ_INSERT_TAIL(&pc->pc_grouplist, pcg, pcg_list); 1711 if (pc->pc_freeto == NULL) 1712 pc->pc_freeto = pcg; 1713 goto have_group; 1714 } 1715 1716 /* 1717 * Unable to allocate a cache group; destruct the object 1718 * and free it back to the pool. 1719 */ 1720 pool_cache_destruct_object(pc, object); 1721 return; 1722 } 1723 1724 have_group: 1725 pc->pc_nitems++; 1726 pcg_put(pcg, object); 1727 1728 if (pcg->pcg_avail == PCG_NOBJECTS) 1729 pc->pc_freeto = NULL; 1730 1731 simple_unlock(&pc->pc_slock); 1732 } 1733 1734 /* 1735 * pool_cache_destruct_object: 1736 * 1737 * Force destruction of an object and its release back into 1738 * the pool. 1739 */ 1740 void 1741 pool_cache_destruct_object(struct pool_cache *pc, void *object) 1742 { 1743 1744 if (pc->pc_dtor != NULL) 1745 (*pc->pc_dtor)(pc->pc_arg, object); 1746 pool_put(pc->pc_pool, object); 1747 } 1748 1749 /* 1750 * pool_cache_do_invalidate: 1751 * 1752 * This internal function implements pool_cache_invalidate() and 1753 * pool_cache_reclaim(). 1754 */ 1755 static void 1756 pool_cache_do_invalidate(struct pool_cache *pc, int free_groups, 1757 void (*putit)(struct pool *, void *)) 1758 { 1759 struct pool_cache_group *pcg, *npcg; 1760 void *object; 1761 1762 for (pcg = TAILQ_FIRST(&pc->pc_grouplist); pcg != NULL; 1763 pcg = npcg) { 1764 npcg = TAILQ_NEXT(pcg, pcg_list); 1765 while (pcg->pcg_avail != 0) { 1766 pc->pc_nitems--; 1767 object = pcg_get(pcg); 1768 if (pcg->pcg_avail == 0 && pc->pc_allocfrom == pcg) 1769 pc->pc_allocfrom = NULL; 1770 if (pc->pc_dtor != NULL) 1771 (*pc->pc_dtor)(pc->pc_arg, object); 1772 (*putit)(pc->pc_pool, object); 1773 } 1774 if (free_groups) { 1775 pc->pc_ngroups--; 1776 TAILQ_REMOVE(&pc->pc_grouplist, pcg, pcg_list); 1777 if (pc->pc_freeto == pcg) 1778 pc->pc_freeto = NULL; 1779 pool_put(&pcgpool, pcg); 1780 } 1781 } 1782 } 1783 1784 /* 1785 * pool_cache_invalidate: 1786 * 1787 * Invalidate a pool cache (destruct and release all of the 1788 * cached objects). 1789 */ 1790 void 1791 pool_cache_invalidate(struct pool_cache *pc) 1792 { 1793 1794 simple_lock(&pc->pc_slock); 1795 pool_cache_do_invalidate(pc, 0, pool_put); 1796 simple_unlock(&pc->pc_slock); 1797 } 1798 1799 /* 1800 * pool_cache_reclaim: 1801 * 1802 * Reclaim a pool cache for pool_reclaim(). 1803 */ 1804 static void 1805 pool_cache_reclaim(struct pool_cache *pc) 1806 { 1807 1808 simple_lock(&pc->pc_slock); 1809 pool_cache_do_invalidate(pc, 1, pool_do_put); 1810 simple_unlock(&pc->pc_slock); 1811 } 1812 1813 /* 1814 * We have three different sysctls. 1815 * kern.pool.npools - the number of pools. 1816 * kern.pool.pool.<pool#> - the pool struct for the pool#. 1817 * kern.pool.name.<pool#> - the name for pool#.[6~ 1818 */ 1819 int 1820 sysctl_dopool(int *name, u_int namelen, char *where, size_t *sizep) 1821 { 1822 struct pool *pp, *foundpool = NULL; 1823 size_t buflen = where != NULL ? *sizep : 0; 1824 int npools = 0, s; 1825 unsigned int lookfor; 1826 size_t len; 1827 1828 switch (*name) { 1829 case KERN_POOL_NPOOLS: 1830 if (namelen != 1 || buflen != sizeof(int)) 1831 return (EINVAL); 1832 lookfor = 0; 1833 break; 1834 case KERN_POOL_NAME: 1835 if (namelen != 2 || buflen < 1) 1836 return (EINVAL); 1837 lookfor = name[1]; 1838 break; 1839 case KERN_POOL_POOL: 1840 if (namelen != 2 || buflen != sizeof(struct pool)) 1841 return (EINVAL); 1842 lookfor = name[1]; 1843 break; 1844 default: 1845 return (EINVAL); 1846 } 1847 1848 s = splvm(); 1849 simple_lock(&pool_head_slock); 1850 1851 TAILQ_FOREACH(pp, &pool_head, pr_poollist) { 1852 npools++; 1853 if (lookfor == pp->pr_serial) { 1854 foundpool = pp; 1855 break; 1856 } 1857 } 1858 1859 simple_unlock(&pool_head_slock); 1860 splx(s); 1861 1862 if (lookfor != 0 && foundpool == NULL) 1863 return (ENOENT); 1864 1865 switch (*name) { 1866 case KERN_POOL_NPOOLS: 1867 return copyout(&npools, where, buflen); 1868 case KERN_POOL_NAME: 1869 len = strlen(foundpool->pr_wchan) + 1; 1870 if (*sizep < len) 1871 return (ENOMEM); 1872 *sizep = len; 1873 return copyout(foundpool->pr_wchan, where, len); 1874 case KERN_POOL_POOL: 1875 return copyout(foundpool, where, buflen); 1876 } 1877 /* NOTREACHED */ 1878 return (0); /* XXX - Stupid gcc */ 1879 } 1880