1 /* $NetBSD: subr_pool.c,v 1.116 2006/04/15 14:23:11 simonb Exp $ */ 2 3 /*- 4 * Copyright (c) 1997, 1999, 2000 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Paul Kranenburg; by Jason R. Thorpe of the Numerical Aerospace 9 * Simulation Facility, NASA Ames Research Center. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. All advertising materials mentioning features or use of this software 20 * must display the following acknowledgement: 21 * This product includes software developed by the NetBSD 22 * Foundation, Inc. and its contributors. 23 * 4. Neither the name of The NetBSD Foundation nor the names of its 24 * contributors may be used to endorse or promote products derived 25 * from this software without specific prior written permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 28 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 29 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 30 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 31 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 32 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 33 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 34 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 35 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 36 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 37 * POSSIBILITY OF SUCH DAMAGE. 38 */ 39 40 #include <sys/cdefs.h> 41 __KERNEL_RCSID(0, "$NetBSD: subr_pool.c,v 1.116 2006/04/15 14:23:11 simonb Exp $"); 42 43 #include "opt_pool.h" 44 #include "opt_poollog.h" 45 #include "opt_lockdebug.h" 46 47 #include <sys/param.h> 48 #include <sys/systm.h> 49 #include <sys/proc.h> 50 #include <sys/errno.h> 51 #include <sys/kernel.h> 52 #include <sys/malloc.h> 53 #include <sys/lock.h> 54 #include <sys/pool.h> 55 #include <sys/syslog.h> 56 57 #include <uvm/uvm.h> 58 59 /* 60 * Pool resource management utility. 61 * 62 * Memory is allocated in pages which are split into pieces according to 63 * the pool item size. Each page is kept on one of three lists in the 64 * pool structure: `pr_emptypages', `pr_fullpages' and `pr_partpages', 65 * for empty, full and partially-full pages respectively. The individual 66 * pool items are on a linked list headed by `ph_itemlist' in each page 67 * header. The memory for building the page list is either taken from 68 * the allocated pages themselves (for small pool items) or taken from 69 * an internal pool of page headers (`phpool'). 70 */ 71 72 /* List of all pools */ 73 LIST_HEAD(,pool) pool_head = LIST_HEAD_INITIALIZER(pool_head); 74 75 /* Private pool for page header structures */ 76 #define PHPOOL_MAX 8 77 static struct pool phpool[PHPOOL_MAX]; 78 #define PHPOOL_FREELIST_NELEM(idx) (((idx) == 0) ? 0 : (1 << (idx))) 79 80 #ifdef POOL_SUBPAGE 81 /* Pool of subpages for use by normal pools. */ 82 static struct pool psppool; 83 #endif 84 85 static void *pool_page_alloc_meta(struct pool *, int); 86 static void pool_page_free_meta(struct pool *, void *); 87 88 /* allocator for pool metadata */ 89 static struct pool_allocator pool_allocator_meta = { 90 pool_page_alloc_meta, pool_page_free_meta 91 }; 92 93 /* # of seconds to retain page after last use */ 94 int pool_inactive_time = 10; 95 96 /* Next candidate for drainage (see pool_drain()) */ 97 static struct pool *drainpp; 98 99 /* This spin lock protects both pool_head and drainpp. */ 100 struct simplelock pool_head_slock = SIMPLELOCK_INITIALIZER; 101 102 typedef uint8_t pool_item_freelist_t; 103 104 struct pool_item_header { 105 /* Page headers */ 106 LIST_ENTRY(pool_item_header) 107 ph_pagelist; /* pool page list */ 108 SPLAY_ENTRY(pool_item_header) 109 ph_node; /* Off-page page headers */ 110 caddr_t ph_page; /* this page's address */ 111 struct timeval ph_time; /* last referenced */ 112 union { 113 /* !PR_NOTOUCH */ 114 struct { 115 LIST_HEAD(, pool_item) 116 phu_itemlist; /* chunk list for this page */ 117 } phu_normal; 118 /* PR_NOTOUCH */ 119 struct { 120 uint16_t 121 phu_off; /* start offset in page */ 122 pool_item_freelist_t 123 phu_firstfree; /* first free item */ 124 /* 125 * XXX it might be better to use 126 * a simple bitmap and ffs(3) 127 */ 128 } phu_notouch; 129 } ph_u; 130 uint16_t ph_nmissing; /* # of chunks in use */ 131 }; 132 #define ph_itemlist ph_u.phu_normal.phu_itemlist 133 #define ph_off ph_u.phu_notouch.phu_off 134 #define ph_firstfree ph_u.phu_notouch.phu_firstfree 135 136 struct pool_item { 137 #ifdef DIAGNOSTIC 138 u_int pi_magic; 139 #endif 140 #define PI_MAGIC 0xdeadbeefU 141 /* Other entries use only this list entry */ 142 LIST_ENTRY(pool_item) pi_list; 143 }; 144 145 #define POOL_NEEDS_CATCHUP(pp) \ 146 ((pp)->pr_nitems < (pp)->pr_minitems) 147 148 /* 149 * Pool cache management. 150 * 151 * Pool caches provide a way for constructed objects to be cached by the 152 * pool subsystem. This can lead to performance improvements by avoiding 153 * needless object construction/destruction; it is deferred until absolutely 154 * necessary. 155 * 156 * Caches are grouped into cache groups. Each cache group references 157 * up to 16 constructed objects. When a cache allocates an object 158 * from the pool, it calls the object's constructor and places it into 159 * a cache group. When a cache group frees an object back to the pool, 160 * it first calls the object's destructor. This allows the object to 161 * persist in constructed form while freed to the cache. 162 * 163 * Multiple caches may exist for each pool. This allows a single 164 * object type to have multiple constructed forms. The pool references 165 * each cache, so that when a pool is drained by the pagedaemon, it can 166 * drain each individual cache as well. Each time a cache is drained, 167 * the most idle cache group is freed to the pool in its entirety. 168 * 169 * Pool caches are layed on top of pools. By layering them, we can avoid 170 * the complexity of cache management for pools which would not benefit 171 * from it. 172 */ 173 174 /* The cache group pool. */ 175 static struct pool pcgpool; 176 177 static void pool_cache_reclaim(struct pool_cache *, struct pool_pagelist *, 178 struct pool_cache_grouplist *); 179 static void pcg_grouplist_free(struct pool_cache_grouplist *); 180 181 static int pool_catchup(struct pool *); 182 static void pool_prime_page(struct pool *, caddr_t, 183 struct pool_item_header *); 184 static void pool_update_curpage(struct pool *); 185 186 static int pool_grow(struct pool *, int); 187 void *pool_allocator_alloc(struct pool *, int); 188 void pool_allocator_free(struct pool *, void *); 189 190 static void pool_print_pagelist(struct pool *, struct pool_pagelist *, 191 void (*)(const char *, ...)); 192 static void pool_print1(struct pool *, const char *, 193 void (*)(const char *, ...)); 194 195 static int pool_chk_page(struct pool *, const char *, 196 struct pool_item_header *); 197 198 /* 199 * Pool log entry. An array of these is allocated in pool_init(). 200 */ 201 struct pool_log { 202 const char *pl_file; 203 long pl_line; 204 int pl_action; 205 #define PRLOG_GET 1 206 #define PRLOG_PUT 2 207 void *pl_addr; 208 }; 209 210 #ifdef POOL_DIAGNOSTIC 211 /* Number of entries in pool log buffers */ 212 #ifndef POOL_LOGSIZE 213 #define POOL_LOGSIZE 10 214 #endif 215 216 int pool_logsize = POOL_LOGSIZE; 217 218 static inline void 219 pr_log(struct pool *pp, void *v, int action, const char *file, long line) 220 { 221 int n = pp->pr_curlogentry; 222 struct pool_log *pl; 223 224 if ((pp->pr_roflags & PR_LOGGING) == 0) 225 return; 226 227 /* 228 * Fill in the current entry. Wrap around and overwrite 229 * the oldest entry if necessary. 230 */ 231 pl = &pp->pr_log[n]; 232 pl->pl_file = file; 233 pl->pl_line = line; 234 pl->pl_action = action; 235 pl->pl_addr = v; 236 if (++n >= pp->pr_logsize) 237 n = 0; 238 pp->pr_curlogentry = n; 239 } 240 241 static void 242 pr_printlog(struct pool *pp, struct pool_item *pi, 243 void (*pr)(const char *, ...)) 244 { 245 int i = pp->pr_logsize; 246 int n = pp->pr_curlogentry; 247 248 if ((pp->pr_roflags & PR_LOGGING) == 0) 249 return; 250 251 /* 252 * Print all entries in this pool's log. 253 */ 254 while (i-- > 0) { 255 struct pool_log *pl = &pp->pr_log[n]; 256 if (pl->pl_action != 0) { 257 if (pi == NULL || pi == pl->pl_addr) { 258 (*pr)("\tlog entry %d:\n", i); 259 (*pr)("\t\taction = %s, addr = %p\n", 260 pl->pl_action == PRLOG_GET ? "get" : "put", 261 pl->pl_addr); 262 (*pr)("\t\tfile: %s at line %lu\n", 263 pl->pl_file, pl->pl_line); 264 } 265 } 266 if (++n >= pp->pr_logsize) 267 n = 0; 268 } 269 } 270 271 static inline void 272 pr_enter(struct pool *pp, const char *file, long line) 273 { 274 275 if (__predict_false(pp->pr_entered_file != NULL)) { 276 printf("pool %s: reentrancy at file %s line %ld\n", 277 pp->pr_wchan, file, line); 278 printf(" previous entry at file %s line %ld\n", 279 pp->pr_entered_file, pp->pr_entered_line); 280 panic("pr_enter"); 281 } 282 283 pp->pr_entered_file = file; 284 pp->pr_entered_line = line; 285 } 286 287 static inline void 288 pr_leave(struct pool *pp) 289 { 290 291 if (__predict_false(pp->pr_entered_file == NULL)) { 292 printf("pool %s not entered?\n", pp->pr_wchan); 293 panic("pr_leave"); 294 } 295 296 pp->pr_entered_file = NULL; 297 pp->pr_entered_line = 0; 298 } 299 300 static inline void 301 pr_enter_check(struct pool *pp, void (*pr)(const char *, ...)) 302 { 303 304 if (pp->pr_entered_file != NULL) 305 (*pr)("\n\tcurrently entered from file %s line %ld\n", 306 pp->pr_entered_file, pp->pr_entered_line); 307 } 308 #else 309 #define pr_log(pp, v, action, file, line) 310 #define pr_printlog(pp, pi, pr) 311 #define pr_enter(pp, file, line) 312 #define pr_leave(pp) 313 #define pr_enter_check(pp, pr) 314 #endif /* POOL_DIAGNOSTIC */ 315 316 static inline int 317 pr_item_notouch_index(const struct pool *pp, const struct pool_item_header *ph, 318 const void *v) 319 { 320 const char *cp = v; 321 int idx; 322 323 KASSERT(pp->pr_roflags & PR_NOTOUCH); 324 idx = (cp - ph->ph_page - ph->ph_off) / pp->pr_size; 325 KASSERT(idx < pp->pr_itemsperpage); 326 return idx; 327 } 328 329 #define PR_FREELIST_ALIGN(p) \ 330 roundup((uintptr_t)(p), sizeof(pool_item_freelist_t)) 331 #define PR_FREELIST(ph) ((pool_item_freelist_t *)PR_FREELIST_ALIGN((ph) + 1)) 332 #define PR_INDEX_USED ((pool_item_freelist_t)-1) 333 #define PR_INDEX_EOL ((pool_item_freelist_t)-2) 334 335 static inline void 336 pr_item_notouch_put(const struct pool *pp, struct pool_item_header *ph, 337 void *obj) 338 { 339 int idx = pr_item_notouch_index(pp, ph, obj); 340 pool_item_freelist_t *freelist = PR_FREELIST(ph); 341 342 KASSERT(freelist[idx] == PR_INDEX_USED); 343 freelist[idx] = ph->ph_firstfree; 344 ph->ph_firstfree = idx; 345 } 346 347 static inline void * 348 pr_item_notouch_get(const struct pool *pp, struct pool_item_header *ph) 349 { 350 int idx = ph->ph_firstfree; 351 pool_item_freelist_t *freelist = PR_FREELIST(ph); 352 353 KASSERT(freelist[idx] != PR_INDEX_USED); 354 ph->ph_firstfree = freelist[idx]; 355 freelist[idx] = PR_INDEX_USED; 356 357 return ph->ph_page + ph->ph_off + idx * pp->pr_size; 358 } 359 360 static inline int 361 phtree_compare(struct pool_item_header *a, struct pool_item_header *b) 362 { 363 if (a->ph_page < b->ph_page) 364 return (-1); 365 else if (a->ph_page > b->ph_page) 366 return (1); 367 else 368 return (0); 369 } 370 371 SPLAY_PROTOTYPE(phtree, pool_item_header, ph_node, phtree_compare); 372 SPLAY_GENERATE(phtree, pool_item_header, ph_node, phtree_compare); 373 374 /* 375 * Return the pool page header based on page address. 376 */ 377 static inline struct pool_item_header * 378 pr_find_pagehead(struct pool *pp, caddr_t page) 379 { 380 struct pool_item_header *ph, tmp; 381 382 if ((pp->pr_roflags & PR_PHINPAGE) != 0) 383 return ((struct pool_item_header *)(page + pp->pr_phoffset)); 384 385 tmp.ph_page = page; 386 ph = SPLAY_FIND(phtree, &pp->pr_phtree, &tmp); 387 return ph; 388 } 389 390 static void 391 pr_pagelist_free(struct pool *pp, struct pool_pagelist *pq) 392 { 393 struct pool_item_header *ph; 394 int s; 395 396 while ((ph = LIST_FIRST(pq)) != NULL) { 397 LIST_REMOVE(ph, ph_pagelist); 398 pool_allocator_free(pp, ph->ph_page); 399 if ((pp->pr_roflags & PR_PHINPAGE) == 0) { 400 s = splvm(); 401 pool_put(pp->pr_phpool, ph); 402 splx(s); 403 } 404 } 405 } 406 407 /* 408 * Remove a page from the pool. 409 */ 410 static inline void 411 pr_rmpage(struct pool *pp, struct pool_item_header *ph, 412 struct pool_pagelist *pq) 413 { 414 415 LOCK_ASSERT(simple_lock_held(&pp->pr_slock)); 416 417 /* 418 * If the page was idle, decrement the idle page count. 419 */ 420 if (ph->ph_nmissing == 0) { 421 #ifdef DIAGNOSTIC 422 if (pp->pr_nidle == 0) 423 panic("pr_rmpage: nidle inconsistent"); 424 if (pp->pr_nitems < pp->pr_itemsperpage) 425 panic("pr_rmpage: nitems inconsistent"); 426 #endif 427 pp->pr_nidle--; 428 } 429 430 pp->pr_nitems -= pp->pr_itemsperpage; 431 432 /* 433 * Unlink the page from the pool and queue it for release. 434 */ 435 LIST_REMOVE(ph, ph_pagelist); 436 if ((pp->pr_roflags & PR_PHINPAGE) == 0) 437 SPLAY_REMOVE(phtree, &pp->pr_phtree, ph); 438 LIST_INSERT_HEAD(pq, ph, ph_pagelist); 439 440 pp->pr_npages--; 441 pp->pr_npagefree++; 442 443 pool_update_curpage(pp); 444 } 445 446 /* 447 * Initialize all the pools listed in the "pools" link set. 448 */ 449 void 450 link_pool_init(void) 451 { 452 __link_set_decl(pools, struct link_pool_init); 453 struct link_pool_init * const *pi; 454 455 __link_set_foreach(pi, pools) 456 pool_init((*pi)->pp, (*pi)->size, (*pi)->align, 457 (*pi)->align_offset, (*pi)->flags, (*pi)->wchan, 458 (*pi)->palloc); 459 } 460 461 /* 462 * Initialize the given pool resource structure. 463 * 464 * We export this routine to allow other kernel parts to declare 465 * static pools that must be initialized before malloc() is available. 466 */ 467 void 468 pool_init(struct pool *pp, size_t size, u_int align, u_int ioff, int flags, 469 const char *wchan, struct pool_allocator *palloc) 470 { 471 #ifdef DEBUG 472 struct pool *pp1; 473 #endif 474 size_t trysize, phsize; 475 int off, slack, s; 476 477 KASSERT((1UL << (CHAR_BIT * sizeof(pool_item_freelist_t))) - 2 >= 478 PHPOOL_FREELIST_NELEM(PHPOOL_MAX - 1)); 479 480 #ifdef DEBUG 481 /* 482 * Check that the pool hasn't already been initialised and 483 * added to the list of all pools. 484 */ 485 LIST_FOREACH(pp1, &pool_head, pr_poollist) { 486 if (pp == pp1) 487 panic("pool_init: pool %s already initialised", 488 wchan); 489 } 490 #endif 491 492 #ifdef POOL_DIAGNOSTIC 493 /* 494 * Always log if POOL_DIAGNOSTIC is defined. 495 */ 496 if (pool_logsize != 0) 497 flags |= PR_LOGGING; 498 #endif 499 500 if (palloc == NULL) 501 palloc = &pool_allocator_kmem; 502 #ifdef POOL_SUBPAGE 503 if (size > palloc->pa_pagesz) { 504 if (palloc == &pool_allocator_kmem) 505 palloc = &pool_allocator_kmem_fullpage; 506 else if (palloc == &pool_allocator_nointr) 507 palloc = &pool_allocator_nointr_fullpage; 508 } 509 #endif /* POOL_SUBPAGE */ 510 if ((palloc->pa_flags & PA_INITIALIZED) == 0) { 511 if (palloc->pa_pagesz == 0) 512 palloc->pa_pagesz = PAGE_SIZE; 513 514 TAILQ_INIT(&palloc->pa_list); 515 516 simple_lock_init(&palloc->pa_slock); 517 palloc->pa_pagemask = ~(palloc->pa_pagesz - 1); 518 palloc->pa_pageshift = ffs(palloc->pa_pagesz) - 1; 519 palloc->pa_flags |= PA_INITIALIZED; 520 } 521 522 if (align == 0) 523 align = ALIGN(1); 524 525 if (size < sizeof(struct pool_item)) 526 size = sizeof(struct pool_item); 527 528 size = roundup(size, align); 529 #ifdef DIAGNOSTIC 530 if (size > palloc->pa_pagesz) 531 panic("pool_init: pool item size (%lu) too large", 532 (u_long)size); 533 #endif 534 535 /* 536 * Initialize the pool structure. 537 */ 538 LIST_INIT(&pp->pr_emptypages); 539 LIST_INIT(&pp->pr_fullpages); 540 LIST_INIT(&pp->pr_partpages); 541 LIST_INIT(&pp->pr_cachelist); 542 pp->pr_curpage = NULL; 543 pp->pr_npages = 0; 544 pp->pr_minitems = 0; 545 pp->pr_minpages = 0; 546 pp->pr_maxpages = UINT_MAX; 547 pp->pr_roflags = flags; 548 pp->pr_flags = 0; 549 pp->pr_size = size; 550 pp->pr_align = align; 551 pp->pr_wchan = wchan; 552 pp->pr_alloc = palloc; 553 pp->pr_nitems = 0; 554 pp->pr_nout = 0; 555 pp->pr_hardlimit = UINT_MAX; 556 pp->pr_hardlimit_warning = NULL; 557 pp->pr_hardlimit_ratecap.tv_sec = 0; 558 pp->pr_hardlimit_ratecap.tv_usec = 0; 559 pp->pr_hardlimit_warning_last.tv_sec = 0; 560 pp->pr_hardlimit_warning_last.tv_usec = 0; 561 pp->pr_drain_hook = NULL; 562 pp->pr_drain_hook_arg = NULL; 563 564 /* 565 * Decide whether to put the page header off page to avoid 566 * wasting too large a part of the page or too big item. 567 * Off-page page headers go on a hash table, so we can match 568 * a returned item with its header based on the page address. 569 * We use 1/16 of the page size and about 8 times of the item 570 * size as the threshold (XXX: tune) 571 * 572 * However, we'll put the header into the page if we can put 573 * it without wasting any items. 574 * 575 * Silently enforce `0 <= ioff < align'. 576 */ 577 pp->pr_itemoffset = ioff %= align; 578 /* See the comment below about reserved bytes. */ 579 trysize = palloc->pa_pagesz - ((align - ioff) % align); 580 phsize = ALIGN(sizeof(struct pool_item_header)); 581 if ((pp->pr_roflags & PR_NOTOUCH) == 0 && 582 (pp->pr_size < MIN(palloc->pa_pagesz / 16, phsize << 3) || 583 trysize / pp->pr_size == (trysize - phsize) / pp->pr_size)) { 584 /* Use the end of the page for the page header */ 585 pp->pr_roflags |= PR_PHINPAGE; 586 pp->pr_phoffset = off = palloc->pa_pagesz - phsize; 587 } else { 588 /* The page header will be taken from our page header pool */ 589 pp->pr_phoffset = 0; 590 off = palloc->pa_pagesz; 591 SPLAY_INIT(&pp->pr_phtree); 592 } 593 594 /* 595 * Alignment is to take place at `ioff' within the item. This means 596 * we must reserve up to `align - 1' bytes on the page to allow 597 * appropriate positioning of each item. 598 */ 599 pp->pr_itemsperpage = (off - ((align - ioff) % align)) / pp->pr_size; 600 KASSERT(pp->pr_itemsperpage != 0); 601 if ((pp->pr_roflags & PR_NOTOUCH)) { 602 int idx; 603 604 for (idx = 0; pp->pr_itemsperpage > PHPOOL_FREELIST_NELEM(idx); 605 idx++) { 606 /* nothing */ 607 } 608 if (idx >= PHPOOL_MAX) { 609 /* 610 * if you see this panic, consider to tweak 611 * PHPOOL_MAX and PHPOOL_FREELIST_NELEM. 612 */ 613 panic("%s: too large itemsperpage(%d) for PR_NOTOUCH", 614 pp->pr_wchan, pp->pr_itemsperpage); 615 } 616 pp->pr_phpool = &phpool[idx]; 617 } else if ((pp->pr_roflags & PR_PHINPAGE) == 0) { 618 pp->pr_phpool = &phpool[0]; 619 } 620 #if defined(DIAGNOSTIC) 621 else { 622 pp->pr_phpool = NULL; 623 } 624 #endif 625 626 /* 627 * Use the slack between the chunks and the page header 628 * for "cache coloring". 629 */ 630 slack = off - pp->pr_itemsperpage * pp->pr_size; 631 pp->pr_maxcolor = (slack / align) * align; 632 pp->pr_curcolor = 0; 633 634 pp->pr_nget = 0; 635 pp->pr_nfail = 0; 636 pp->pr_nput = 0; 637 pp->pr_npagealloc = 0; 638 pp->pr_npagefree = 0; 639 pp->pr_hiwat = 0; 640 pp->pr_nidle = 0; 641 642 #ifdef POOL_DIAGNOSTIC 643 if (flags & PR_LOGGING) { 644 if (kmem_map == NULL || 645 (pp->pr_log = malloc(pool_logsize * sizeof(struct pool_log), 646 M_TEMP, M_NOWAIT)) == NULL) 647 pp->pr_roflags &= ~PR_LOGGING; 648 pp->pr_curlogentry = 0; 649 pp->pr_logsize = pool_logsize; 650 } 651 #endif 652 653 pp->pr_entered_file = NULL; 654 pp->pr_entered_line = 0; 655 656 simple_lock_init(&pp->pr_slock); 657 658 /* 659 * Initialize private page header pool and cache magazine pool if we 660 * haven't done so yet. 661 * XXX LOCKING. 662 */ 663 if (phpool[0].pr_size == 0) { 664 int idx; 665 for (idx = 0; idx < PHPOOL_MAX; idx++) { 666 static char phpool_names[PHPOOL_MAX][6+1+6+1]; 667 int nelem; 668 size_t sz; 669 670 nelem = PHPOOL_FREELIST_NELEM(idx); 671 snprintf(phpool_names[idx], sizeof(phpool_names[idx]), 672 "phpool-%d", nelem); 673 sz = sizeof(struct pool_item_header); 674 if (nelem) { 675 sz = PR_FREELIST_ALIGN(sz) 676 + nelem * sizeof(pool_item_freelist_t); 677 } 678 pool_init(&phpool[idx], sz, 0, 0, 0, 679 phpool_names[idx], &pool_allocator_meta); 680 } 681 #ifdef POOL_SUBPAGE 682 pool_init(&psppool, POOL_SUBPAGE, POOL_SUBPAGE, 0, 683 PR_RECURSIVE, "psppool", &pool_allocator_meta); 684 #endif 685 pool_init(&pcgpool, sizeof(struct pool_cache_group), 0, 0, 686 0, "pcgpool", &pool_allocator_meta); 687 } 688 689 /* Insert into the list of all pools. */ 690 simple_lock(&pool_head_slock); 691 LIST_INSERT_HEAD(&pool_head, pp, pr_poollist); 692 simple_unlock(&pool_head_slock); 693 694 /* Insert this into the list of pools using this allocator. */ 695 s = splvm(); 696 simple_lock(&palloc->pa_slock); 697 TAILQ_INSERT_TAIL(&palloc->pa_list, pp, pr_alloc_list); 698 simple_unlock(&palloc->pa_slock); 699 splx(s); 700 } 701 702 /* 703 * De-commision a pool resource. 704 */ 705 void 706 pool_destroy(struct pool *pp) 707 { 708 struct pool_pagelist pq; 709 struct pool_item_header *ph; 710 int s; 711 712 /* Remove from global pool list */ 713 simple_lock(&pool_head_slock); 714 LIST_REMOVE(pp, pr_poollist); 715 if (drainpp == pp) 716 drainpp = NULL; 717 simple_unlock(&pool_head_slock); 718 719 /* Remove this pool from its allocator's list of pools. */ 720 s = splvm(); 721 simple_lock(&pp->pr_alloc->pa_slock); 722 TAILQ_REMOVE(&pp->pr_alloc->pa_list, pp, pr_alloc_list); 723 simple_unlock(&pp->pr_alloc->pa_slock); 724 splx(s); 725 726 s = splvm(); 727 simple_lock(&pp->pr_slock); 728 729 KASSERT(LIST_EMPTY(&pp->pr_cachelist)); 730 731 #ifdef DIAGNOSTIC 732 if (pp->pr_nout != 0) { 733 pr_printlog(pp, NULL, printf); 734 panic("pool_destroy: pool busy: still out: %u", 735 pp->pr_nout); 736 } 737 #endif 738 739 KASSERT(LIST_EMPTY(&pp->pr_fullpages)); 740 KASSERT(LIST_EMPTY(&pp->pr_partpages)); 741 742 /* Remove all pages */ 743 LIST_INIT(&pq); 744 while ((ph = LIST_FIRST(&pp->pr_emptypages)) != NULL) 745 pr_rmpage(pp, ph, &pq); 746 747 simple_unlock(&pp->pr_slock); 748 splx(s); 749 750 pr_pagelist_free(pp, &pq); 751 752 #ifdef POOL_DIAGNOSTIC 753 if ((pp->pr_roflags & PR_LOGGING) != 0) 754 free(pp->pr_log, M_TEMP); 755 #endif 756 } 757 758 void 759 pool_set_drain_hook(struct pool *pp, void (*fn)(void *, int), void *arg) 760 { 761 762 /* XXX no locking -- must be used just after pool_init() */ 763 #ifdef DIAGNOSTIC 764 if (pp->pr_drain_hook != NULL) 765 panic("pool_set_drain_hook(%s): already set", pp->pr_wchan); 766 #endif 767 pp->pr_drain_hook = fn; 768 pp->pr_drain_hook_arg = arg; 769 } 770 771 static struct pool_item_header * 772 pool_alloc_item_header(struct pool *pp, caddr_t storage, int flags) 773 { 774 struct pool_item_header *ph; 775 int s; 776 777 LOCK_ASSERT(simple_lock_held(&pp->pr_slock) == 0); 778 779 if ((pp->pr_roflags & PR_PHINPAGE) != 0) 780 ph = (struct pool_item_header *) (storage + pp->pr_phoffset); 781 else { 782 s = splvm(); 783 ph = pool_get(pp->pr_phpool, flags); 784 splx(s); 785 } 786 787 return (ph); 788 } 789 790 /* 791 * Grab an item from the pool; must be called at appropriate spl level 792 */ 793 void * 794 #ifdef POOL_DIAGNOSTIC 795 _pool_get(struct pool *pp, int flags, const char *file, long line) 796 #else 797 pool_get(struct pool *pp, int flags) 798 #endif 799 { 800 struct pool_item *pi; 801 struct pool_item_header *ph; 802 void *v; 803 804 #ifdef DIAGNOSTIC 805 if (__predict_false(pp->pr_itemsperpage == 0)) 806 panic("pool_get: pool %p: pr_itemsperpage is zero, " 807 "pool not initialized?", pp); 808 if (__predict_false(curlwp == NULL && doing_shutdown == 0 && 809 (flags & PR_WAITOK) != 0)) 810 panic("pool_get: %s: must have NOWAIT", pp->pr_wchan); 811 812 #endif /* DIAGNOSTIC */ 813 #ifdef LOCKDEBUG 814 if (flags & PR_WAITOK) 815 simple_lock_only_held(NULL, "pool_get(PR_WAITOK)"); 816 SCHED_ASSERT_UNLOCKED(); 817 #endif 818 819 simple_lock(&pp->pr_slock); 820 pr_enter(pp, file, line); 821 822 startover: 823 /* 824 * Check to see if we've reached the hard limit. If we have, 825 * and we can wait, then wait until an item has been returned to 826 * the pool. 827 */ 828 #ifdef DIAGNOSTIC 829 if (__predict_false(pp->pr_nout > pp->pr_hardlimit)) { 830 pr_leave(pp); 831 simple_unlock(&pp->pr_slock); 832 panic("pool_get: %s: crossed hard limit", pp->pr_wchan); 833 } 834 #endif 835 if (__predict_false(pp->pr_nout == pp->pr_hardlimit)) { 836 if (pp->pr_drain_hook != NULL) { 837 /* 838 * Since the drain hook is going to free things 839 * back to the pool, unlock, call the hook, re-lock, 840 * and check the hardlimit condition again. 841 */ 842 pr_leave(pp); 843 simple_unlock(&pp->pr_slock); 844 (*pp->pr_drain_hook)(pp->pr_drain_hook_arg, flags); 845 simple_lock(&pp->pr_slock); 846 pr_enter(pp, file, line); 847 if (pp->pr_nout < pp->pr_hardlimit) 848 goto startover; 849 } 850 851 if ((flags & PR_WAITOK) && !(flags & PR_LIMITFAIL)) { 852 /* 853 * XXX: A warning isn't logged in this case. Should 854 * it be? 855 */ 856 pp->pr_flags |= PR_WANTED; 857 pr_leave(pp); 858 ltsleep(pp, PSWP, pp->pr_wchan, 0, &pp->pr_slock); 859 pr_enter(pp, file, line); 860 goto startover; 861 } 862 863 /* 864 * Log a message that the hard limit has been hit. 865 */ 866 if (pp->pr_hardlimit_warning != NULL && 867 ratecheck(&pp->pr_hardlimit_warning_last, 868 &pp->pr_hardlimit_ratecap)) 869 log(LOG_ERR, "%s\n", pp->pr_hardlimit_warning); 870 871 pp->pr_nfail++; 872 873 pr_leave(pp); 874 simple_unlock(&pp->pr_slock); 875 return (NULL); 876 } 877 878 /* 879 * The convention we use is that if `curpage' is not NULL, then 880 * it points at a non-empty bucket. In particular, `curpage' 881 * never points at a page header which has PR_PHINPAGE set and 882 * has no items in its bucket. 883 */ 884 if ((ph = pp->pr_curpage) == NULL) { 885 int error; 886 887 #ifdef DIAGNOSTIC 888 if (pp->pr_nitems != 0) { 889 simple_unlock(&pp->pr_slock); 890 printf("pool_get: %s: curpage NULL, nitems %u\n", 891 pp->pr_wchan, pp->pr_nitems); 892 panic("pool_get: nitems inconsistent"); 893 } 894 #endif 895 896 /* 897 * Call the back-end page allocator for more memory. 898 * Release the pool lock, as the back-end page allocator 899 * may block. 900 */ 901 pr_leave(pp); 902 error = pool_grow(pp, flags); 903 pr_enter(pp, file, line); 904 if (error != 0) { 905 /* 906 * We were unable to allocate a page or item 907 * header, but we released the lock during 908 * allocation, so perhaps items were freed 909 * back to the pool. Check for this case. 910 */ 911 if (pp->pr_curpage != NULL) 912 goto startover; 913 914 if ((flags & PR_WAITOK) == 0) { 915 pp->pr_nfail++; 916 pr_leave(pp); 917 simple_unlock(&pp->pr_slock); 918 return (NULL); 919 } 920 921 /* 922 * Wait for items to be returned to this pool. 923 * 924 * wake up once a second and try again, 925 * as the check in pool_cache_put_paddr() is racy. 926 */ 927 pp->pr_flags |= PR_WANTED; 928 /* PA_WANTED is already set on the allocator. */ 929 pr_leave(pp); 930 ltsleep(pp, PSWP, pp->pr_wchan, hz, &pp->pr_slock); 931 pr_enter(pp, file, line); 932 } 933 934 /* Start the allocation process over. */ 935 goto startover; 936 } 937 if (pp->pr_roflags & PR_NOTOUCH) { 938 #ifdef DIAGNOSTIC 939 if (__predict_false(ph->ph_nmissing == pp->pr_itemsperpage)) { 940 pr_leave(pp); 941 simple_unlock(&pp->pr_slock); 942 panic("pool_get: %s: page empty", pp->pr_wchan); 943 } 944 #endif 945 v = pr_item_notouch_get(pp, ph); 946 #ifdef POOL_DIAGNOSTIC 947 pr_log(pp, v, PRLOG_GET, file, line); 948 #endif 949 } else { 950 v = pi = LIST_FIRST(&ph->ph_itemlist); 951 if (__predict_false(v == NULL)) { 952 pr_leave(pp); 953 simple_unlock(&pp->pr_slock); 954 panic("pool_get: %s: page empty", pp->pr_wchan); 955 } 956 #ifdef DIAGNOSTIC 957 if (__predict_false(pp->pr_nitems == 0)) { 958 pr_leave(pp); 959 simple_unlock(&pp->pr_slock); 960 printf("pool_get: %s: items on itemlist, nitems %u\n", 961 pp->pr_wchan, pp->pr_nitems); 962 panic("pool_get: nitems inconsistent"); 963 } 964 #endif 965 966 #ifdef POOL_DIAGNOSTIC 967 pr_log(pp, v, PRLOG_GET, file, line); 968 #endif 969 970 #ifdef DIAGNOSTIC 971 if (__predict_false(pi->pi_magic != PI_MAGIC)) { 972 pr_printlog(pp, pi, printf); 973 panic("pool_get(%s): free list modified: " 974 "magic=%x; page %p; item addr %p\n", 975 pp->pr_wchan, pi->pi_magic, ph->ph_page, pi); 976 } 977 #endif 978 979 /* 980 * Remove from item list. 981 */ 982 LIST_REMOVE(pi, pi_list); 983 } 984 pp->pr_nitems--; 985 pp->pr_nout++; 986 if (ph->ph_nmissing == 0) { 987 #ifdef DIAGNOSTIC 988 if (__predict_false(pp->pr_nidle == 0)) 989 panic("pool_get: nidle inconsistent"); 990 #endif 991 pp->pr_nidle--; 992 993 /* 994 * This page was previously empty. Move it to the list of 995 * partially-full pages. This page is already curpage. 996 */ 997 LIST_REMOVE(ph, ph_pagelist); 998 LIST_INSERT_HEAD(&pp->pr_partpages, ph, ph_pagelist); 999 } 1000 ph->ph_nmissing++; 1001 if (ph->ph_nmissing == pp->pr_itemsperpage) { 1002 #ifdef DIAGNOSTIC 1003 if (__predict_false((pp->pr_roflags & PR_NOTOUCH) == 0 && 1004 !LIST_EMPTY(&ph->ph_itemlist))) { 1005 pr_leave(pp); 1006 simple_unlock(&pp->pr_slock); 1007 panic("pool_get: %s: nmissing inconsistent", 1008 pp->pr_wchan); 1009 } 1010 #endif 1011 /* 1012 * This page is now full. Move it to the full list 1013 * and select a new current page. 1014 */ 1015 LIST_REMOVE(ph, ph_pagelist); 1016 LIST_INSERT_HEAD(&pp->pr_fullpages, ph, ph_pagelist); 1017 pool_update_curpage(pp); 1018 } 1019 1020 pp->pr_nget++; 1021 pr_leave(pp); 1022 1023 /* 1024 * If we have a low water mark and we are now below that low 1025 * water mark, add more items to the pool. 1026 */ 1027 if (POOL_NEEDS_CATCHUP(pp) && pool_catchup(pp) != 0) { 1028 /* 1029 * XXX: Should we log a warning? Should we set up a timeout 1030 * to try again in a second or so? The latter could break 1031 * a caller's assumptions about interrupt protection, etc. 1032 */ 1033 } 1034 1035 simple_unlock(&pp->pr_slock); 1036 return (v); 1037 } 1038 1039 /* 1040 * Internal version of pool_put(). Pool is already locked/entered. 1041 */ 1042 static void 1043 pool_do_put(struct pool *pp, void *v, struct pool_pagelist *pq) 1044 { 1045 struct pool_item *pi = v; 1046 struct pool_item_header *ph; 1047 caddr_t page; 1048 int s; 1049 1050 LOCK_ASSERT(simple_lock_held(&pp->pr_slock)); 1051 SCHED_ASSERT_UNLOCKED(); 1052 1053 page = (caddr_t)((u_long)v & pp->pr_alloc->pa_pagemask); 1054 1055 #ifdef DIAGNOSTIC 1056 if (__predict_false(pp->pr_nout == 0)) { 1057 printf("pool %s: putting with none out\n", 1058 pp->pr_wchan); 1059 panic("pool_put"); 1060 } 1061 #endif 1062 1063 if (__predict_false((ph = pr_find_pagehead(pp, page)) == NULL)) { 1064 pr_printlog(pp, NULL, printf); 1065 panic("pool_put: %s: page header missing", pp->pr_wchan); 1066 } 1067 1068 #ifdef LOCKDEBUG 1069 /* 1070 * Check if we're freeing a locked simple lock. 1071 */ 1072 simple_lock_freecheck((caddr_t)pi, ((caddr_t)pi) + pp->pr_size); 1073 #endif 1074 1075 /* 1076 * Return to item list. 1077 */ 1078 if (pp->pr_roflags & PR_NOTOUCH) { 1079 pr_item_notouch_put(pp, ph, v); 1080 } else { 1081 #ifdef DIAGNOSTIC 1082 pi->pi_magic = PI_MAGIC; 1083 #endif 1084 #ifdef DEBUG 1085 { 1086 int i, *ip = v; 1087 1088 for (i = 0; i < pp->pr_size / sizeof(int); i++) { 1089 *ip++ = PI_MAGIC; 1090 } 1091 } 1092 #endif 1093 1094 LIST_INSERT_HEAD(&ph->ph_itemlist, pi, pi_list); 1095 } 1096 KDASSERT(ph->ph_nmissing != 0); 1097 ph->ph_nmissing--; 1098 pp->pr_nput++; 1099 pp->pr_nitems++; 1100 pp->pr_nout--; 1101 1102 /* Cancel "pool empty" condition if it exists */ 1103 if (pp->pr_curpage == NULL) 1104 pp->pr_curpage = ph; 1105 1106 if (pp->pr_flags & PR_WANTED) { 1107 pp->pr_flags &= ~PR_WANTED; 1108 if (ph->ph_nmissing == 0) 1109 pp->pr_nidle++; 1110 wakeup((caddr_t)pp); 1111 return; 1112 } 1113 1114 /* 1115 * If this page is now empty, do one of two things: 1116 * 1117 * (1) If we have more pages than the page high water mark, 1118 * free the page back to the system. ONLY CONSIDER 1119 * FREEING BACK A PAGE IF WE HAVE MORE THAN OUR MINIMUM PAGE 1120 * CLAIM. 1121 * 1122 * (2) Otherwise, move the page to the empty page list. 1123 * 1124 * Either way, select a new current page (so we use a partially-full 1125 * page if one is available). 1126 */ 1127 if (ph->ph_nmissing == 0) { 1128 pp->pr_nidle++; 1129 if (pp->pr_npages > pp->pr_minpages && 1130 (pp->pr_npages > pp->pr_maxpages || 1131 (pp->pr_alloc->pa_flags & PA_WANT) != 0)) { 1132 pr_rmpage(pp, ph, pq); 1133 } else { 1134 LIST_REMOVE(ph, ph_pagelist); 1135 LIST_INSERT_HEAD(&pp->pr_emptypages, ph, ph_pagelist); 1136 1137 /* 1138 * Update the timestamp on the page. A page must 1139 * be idle for some period of time before it can 1140 * be reclaimed by the pagedaemon. This minimizes 1141 * ping-pong'ing for memory. 1142 */ 1143 s = splclock(); 1144 ph->ph_time = mono_time; 1145 splx(s); 1146 } 1147 pool_update_curpage(pp); 1148 } 1149 1150 /* 1151 * If the page was previously completely full, move it to the 1152 * partially-full list and make it the current page. The next 1153 * allocation will get the item from this page, instead of 1154 * further fragmenting the pool. 1155 */ 1156 else if (ph->ph_nmissing == (pp->pr_itemsperpage - 1)) { 1157 LIST_REMOVE(ph, ph_pagelist); 1158 LIST_INSERT_HEAD(&pp->pr_partpages, ph, ph_pagelist); 1159 pp->pr_curpage = ph; 1160 } 1161 } 1162 1163 /* 1164 * Return resource to the pool; must be called at appropriate spl level 1165 */ 1166 #ifdef POOL_DIAGNOSTIC 1167 void 1168 _pool_put(struct pool *pp, void *v, const char *file, long line) 1169 { 1170 struct pool_pagelist pq; 1171 1172 LIST_INIT(&pq); 1173 1174 simple_lock(&pp->pr_slock); 1175 pr_enter(pp, file, line); 1176 1177 pr_log(pp, v, PRLOG_PUT, file, line); 1178 1179 pool_do_put(pp, v, &pq); 1180 1181 pr_leave(pp); 1182 simple_unlock(&pp->pr_slock); 1183 1184 pr_pagelist_free(pp, &pq); 1185 } 1186 #undef pool_put 1187 #endif /* POOL_DIAGNOSTIC */ 1188 1189 void 1190 pool_put(struct pool *pp, void *v) 1191 { 1192 struct pool_pagelist pq; 1193 1194 LIST_INIT(&pq); 1195 1196 simple_lock(&pp->pr_slock); 1197 pool_do_put(pp, v, &pq); 1198 simple_unlock(&pp->pr_slock); 1199 1200 pr_pagelist_free(pp, &pq); 1201 } 1202 1203 #ifdef POOL_DIAGNOSTIC 1204 #define pool_put(h, v) _pool_put((h), (v), __FILE__, __LINE__) 1205 #endif 1206 1207 /* 1208 * pool_grow: grow a pool by a page. 1209 * 1210 * => called with pool locked. 1211 * => unlock and relock the pool. 1212 * => return with pool locked. 1213 */ 1214 1215 static int 1216 pool_grow(struct pool *pp, int flags) 1217 { 1218 struct pool_item_header *ph = NULL; 1219 char *cp; 1220 1221 simple_unlock(&pp->pr_slock); 1222 cp = pool_allocator_alloc(pp, flags); 1223 if (__predict_true(cp != NULL)) { 1224 ph = pool_alloc_item_header(pp, cp, flags); 1225 } 1226 if (__predict_false(cp == NULL || ph == NULL)) { 1227 if (cp != NULL) { 1228 pool_allocator_free(pp, cp); 1229 } 1230 simple_lock(&pp->pr_slock); 1231 return ENOMEM; 1232 } 1233 1234 simple_lock(&pp->pr_slock); 1235 pool_prime_page(pp, cp, ph); 1236 pp->pr_npagealloc++; 1237 return 0; 1238 } 1239 1240 /* 1241 * Add N items to the pool. 1242 */ 1243 int 1244 pool_prime(struct pool *pp, int n) 1245 { 1246 int newpages; 1247 int error = 0; 1248 1249 simple_lock(&pp->pr_slock); 1250 1251 newpages = roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 1252 1253 while (newpages-- > 0) { 1254 error = pool_grow(pp, PR_NOWAIT); 1255 if (error) { 1256 break; 1257 } 1258 pp->pr_minpages++; 1259 } 1260 1261 if (pp->pr_minpages >= pp->pr_maxpages) 1262 pp->pr_maxpages = pp->pr_minpages + 1; /* XXX */ 1263 1264 simple_unlock(&pp->pr_slock); 1265 return error; 1266 } 1267 1268 /* 1269 * Add a page worth of items to the pool. 1270 * 1271 * Note, we must be called with the pool descriptor LOCKED. 1272 */ 1273 static void 1274 pool_prime_page(struct pool *pp, caddr_t storage, struct pool_item_header *ph) 1275 { 1276 struct pool_item *pi; 1277 caddr_t cp = storage; 1278 unsigned int align = pp->pr_align; 1279 unsigned int ioff = pp->pr_itemoffset; 1280 int n; 1281 int s; 1282 1283 LOCK_ASSERT(simple_lock_held(&pp->pr_slock)); 1284 1285 #ifdef DIAGNOSTIC 1286 if (((u_long)cp & (pp->pr_alloc->pa_pagesz - 1)) != 0) 1287 panic("pool_prime_page: %s: unaligned page", pp->pr_wchan); 1288 #endif 1289 1290 /* 1291 * Insert page header. 1292 */ 1293 LIST_INSERT_HEAD(&pp->pr_emptypages, ph, ph_pagelist); 1294 LIST_INIT(&ph->ph_itemlist); 1295 ph->ph_page = storage; 1296 ph->ph_nmissing = 0; 1297 s = splclock(); 1298 ph->ph_time = mono_time; 1299 splx(s); 1300 if ((pp->pr_roflags & PR_PHINPAGE) == 0) 1301 SPLAY_INSERT(phtree, &pp->pr_phtree, ph); 1302 1303 pp->pr_nidle++; 1304 1305 /* 1306 * Color this page. 1307 */ 1308 cp = (caddr_t)(cp + pp->pr_curcolor); 1309 if ((pp->pr_curcolor += align) > pp->pr_maxcolor) 1310 pp->pr_curcolor = 0; 1311 1312 /* 1313 * Adjust storage to apply aligment to `pr_itemoffset' in each item. 1314 */ 1315 if (ioff != 0) 1316 cp = (caddr_t)(cp + (align - ioff)); 1317 1318 /* 1319 * Insert remaining chunks on the bucket list. 1320 */ 1321 n = pp->pr_itemsperpage; 1322 pp->pr_nitems += n; 1323 1324 if (pp->pr_roflags & PR_NOTOUCH) { 1325 pool_item_freelist_t *freelist = PR_FREELIST(ph); 1326 int i; 1327 1328 ph->ph_off = cp - storage; 1329 ph->ph_firstfree = 0; 1330 for (i = 0; i < n - 1; i++) 1331 freelist[i] = i + 1; 1332 freelist[n - 1] = PR_INDEX_EOL; 1333 } else { 1334 while (n--) { 1335 pi = (struct pool_item *)cp; 1336 1337 KASSERT(((((vaddr_t)pi) + ioff) & (align - 1)) == 0); 1338 1339 /* Insert on page list */ 1340 LIST_INSERT_HEAD(&ph->ph_itemlist, pi, pi_list); 1341 #ifdef DIAGNOSTIC 1342 pi->pi_magic = PI_MAGIC; 1343 #endif 1344 cp = (caddr_t)(cp + pp->pr_size); 1345 } 1346 } 1347 1348 /* 1349 * If the pool was depleted, point at the new page. 1350 */ 1351 if (pp->pr_curpage == NULL) 1352 pp->pr_curpage = ph; 1353 1354 if (++pp->pr_npages > pp->pr_hiwat) 1355 pp->pr_hiwat = pp->pr_npages; 1356 } 1357 1358 /* 1359 * Used by pool_get() when nitems drops below the low water mark. This 1360 * is used to catch up pr_nitems with the low water mark. 1361 * 1362 * Note 1, we never wait for memory here, we let the caller decide what to do. 1363 * 1364 * Note 2, we must be called with the pool already locked, and we return 1365 * with it locked. 1366 */ 1367 static int 1368 pool_catchup(struct pool *pp) 1369 { 1370 int error = 0; 1371 1372 while (POOL_NEEDS_CATCHUP(pp)) { 1373 error = pool_grow(pp, PR_NOWAIT); 1374 if (error) { 1375 break; 1376 } 1377 } 1378 return error; 1379 } 1380 1381 static void 1382 pool_update_curpage(struct pool *pp) 1383 { 1384 1385 pp->pr_curpage = LIST_FIRST(&pp->pr_partpages); 1386 if (pp->pr_curpage == NULL) { 1387 pp->pr_curpage = LIST_FIRST(&pp->pr_emptypages); 1388 } 1389 } 1390 1391 void 1392 pool_setlowat(struct pool *pp, int n) 1393 { 1394 1395 simple_lock(&pp->pr_slock); 1396 1397 pp->pr_minitems = n; 1398 pp->pr_minpages = (n == 0) 1399 ? 0 1400 : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 1401 1402 /* Make sure we're caught up with the newly-set low water mark. */ 1403 if (POOL_NEEDS_CATCHUP(pp) && pool_catchup(pp) != 0) { 1404 /* 1405 * XXX: Should we log a warning? Should we set up a timeout 1406 * to try again in a second or so? The latter could break 1407 * a caller's assumptions about interrupt protection, etc. 1408 */ 1409 } 1410 1411 simple_unlock(&pp->pr_slock); 1412 } 1413 1414 void 1415 pool_sethiwat(struct pool *pp, int n) 1416 { 1417 1418 simple_lock(&pp->pr_slock); 1419 1420 pp->pr_maxpages = (n == 0) 1421 ? 0 1422 : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 1423 1424 simple_unlock(&pp->pr_slock); 1425 } 1426 1427 void 1428 pool_sethardlimit(struct pool *pp, int n, const char *warnmess, int ratecap) 1429 { 1430 1431 simple_lock(&pp->pr_slock); 1432 1433 pp->pr_hardlimit = n; 1434 pp->pr_hardlimit_warning = warnmess; 1435 pp->pr_hardlimit_ratecap.tv_sec = ratecap; 1436 pp->pr_hardlimit_warning_last.tv_sec = 0; 1437 pp->pr_hardlimit_warning_last.tv_usec = 0; 1438 1439 /* 1440 * In-line version of pool_sethiwat(), because we don't want to 1441 * release the lock. 1442 */ 1443 pp->pr_maxpages = (n == 0) 1444 ? 0 1445 : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 1446 1447 simple_unlock(&pp->pr_slock); 1448 } 1449 1450 /* 1451 * Release all complete pages that have not been used recently. 1452 */ 1453 int 1454 #ifdef POOL_DIAGNOSTIC 1455 _pool_reclaim(struct pool *pp, const char *file, long line) 1456 #else 1457 pool_reclaim(struct pool *pp) 1458 #endif 1459 { 1460 struct pool_item_header *ph, *phnext; 1461 struct pool_cache *pc; 1462 struct pool_pagelist pq; 1463 struct pool_cache_grouplist pcgl; 1464 struct timeval curtime, diff; 1465 int s; 1466 1467 if (pp->pr_drain_hook != NULL) { 1468 /* 1469 * The drain hook must be called with the pool unlocked. 1470 */ 1471 (*pp->pr_drain_hook)(pp->pr_drain_hook_arg, PR_NOWAIT); 1472 } 1473 1474 if (simple_lock_try(&pp->pr_slock) == 0) 1475 return (0); 1476 pr_enter(pp, file, line); 1477 1478 LIST_INIT(&pq); 1479 LIST_INIT(&pcgl); 1480 1481 /* 1482 * Reclaim items from the pool's caches. 1483 */ 1484 LIST_FOREACH(pc, &pp->pr_cachelist, pc_poollist) 1485 pool_cache_reclaim(pc, &pq, &pcgl); 1486 1487 s = splclock(); 1488 curtime = mono_time; 1489 splx(s); 1490 1491 for (ph = LIST_FIRST(&pp->pr_emptypages); ph != NULL; ph = phnext) { 1492 phnext = LIST_NEXT(ph, ph_pagelist); 1493 1494 /* Check our minimum page claim */ 1495 if (pp->pr_npages <= pp->pr_minpages) 1496 break; 1497 1498 KASSERT(ph->ph_nmissing == 0); 1499 timersub(&curtime, &ph->ph_time, &diff); 1500 if (diff.tv_sec < pool_inactive_time) 1501 continue; 1502 1503 /* 1504 * If freeing this page would put us below 1505 * the low water mark, stop now. 1506 */ 1507 if ((pp->pr_nitems - pp->pr_itemsperpage) < 1508 pp->pr_minitems) 1509 break; 1510 1511 pr_rmpage(pp, ph, &pq); 1512 } 1513 1514 pr_leave(pp); 1515 simple_unlock(&pp->pr_slock); 1516 if (LIST_EMPTY(&pq) && LIST_EMPTY(&pcgl)) 1517 return 0; 1518 1519 pr_pagelist_free(pp, &pq); 1520 pcg_grouplist_free(&pcgl); 1521 return (1); 1522 } 1523 1524 /* 1525 * Drain pools, one at a time. 1526 * 1527 * Note, we must never be called from an interrupt context. 1528 */ 1529 void 1530 pool_drain(void *arg) 1531 { 1532 struct pool *pp; 1533 int s; 1534 1535 pp = NULL; 1536 s = splvm(); 1537 simple_lock(&pool_head_slock); 1538 if (drainpp == NULL) { 1539 drainpp = LIST_FIRST(&pool_head); 1540 } 1541 if (drainpp) { 1542 pp = drainpp; 1543 drainpp = LIST_NEXT(pp, pr_poollist); 1544 } 1545 simple_unlock(&pool_head_slock); 1546 if (pp) 1547 pool_reclaim(pp); 1548 splx(s); 1549 } 1550 1551 /* 1552 * Diagnostic helpers. 1553 */ 1554 void 1555 pool_print(struct pool *pp, const char *modif) 1556 { 1557 int s; 1558 1559 s = splvm(); 1560 if (simple_lock_try(&pp->pr_slock) == 0) { 1561 printf("pool %s is locked; try again later\n", 1562 pp->pr_wchan); 1563 splx(s); 1564 return; 1565 } 1566 pool_print1(pp, modif, printf); 1567 simple_unlock(&pp->pr_slock); 1568 splx(s); 1569 } 1570 1571 void 1572 pool_printall(const char *modif, void (*pr)(const char *, ...)) 1573 { 1574 struct pool *pp; 1575 1576 if (simple_lock_try(&pool_head_slock) == 0) { 1577 (*pr)("WARNING: pool_head_slock is locked\n"); 1578 } else { 1579 simple_unlock(&pool_head_slock); 1580 } 1581 1582 LIST_FOREACH(pp, &pool_head, pr_poollist) { 1583 pool_printit(pp, modif, pr); 1584 } 1585 } 1586 1587 void 1588 pool_printit(struct pool *pp, const char *modif, void (*pr)(const char *, ...)) 1589 { 1590 1591 if (pp == NULL) { 1592 (*pr)("Must specify a pool to print.\n"); 1593 return; 1594 } 1595 1596 /* 1597 * Called from DDB; interrupts should be blocked, and all 1598 * other processors should be paused. We can skip locking 1599 * the pool in this case. 1600 * 1601 * We do a simple_lock_try() just to print the lock 1602 * status, however. 1603 */ 1604 1605 if (simple_lock_try(&pp->pr_slock) == 0) 1606 (*pr)("WARNING: pool %s is locked\n", pp->pr_wchan); 1607 else 1608 simple_unlock(&pp->pr_slock); 1609 1610 pool_print1(pp, modif, pr); 1611 } 1612 1613 static void 1614 pool_print_pagelist(struct pool *pp, struct pool_pagelist *pl, 1615 void (*pr)(const char *, ...)) 1616 { 1617 struct pool_item_header *ph; 1618 #ifdef DIAGNOSTIC 1619 struct pool_item *pi; 1620 #endif 1621 1622 LIST_FOREACH(ph, pl, ph_pagelist) { 1623 (*pr)("\t\tpage %p, nmissing %d, time %lu,%lu\n", 1624 ph->ph_page, ph->ph_nmissing, 1625 (u_long)ph->ph_time.tv_sec, 1626 (u_long)ph->ph_time.tv_usec); 1627 #ifdef DIAGNOSTIC 1628 if (!(pp->pr_roflags & PR_NOTOUCH)) { 1629 LIST_FOREACH(pi, &ph->ph_itemlist, pi_list) { 1630 if (pi->pi_magic != PI_MAGIC) { 1631 (*pr)("\t\t\titem %p, magic 0x%x\n", 1632 pi, pi->pi_magic); 1633 } 1634 } 1635 } 1636 #endif 1637 } 1638 } 1639 1640 static void 1641 pool_print1(struct pool *pp, const char *modif, void (*pr)(const char *, ...)) 1642 { 1643 struct pool_item_header *ph; 1644 struct pool_cache *pc; 1645 struct pool_cache_group *pcg; 1646 int i, print_log = 0, print_pagelist = 0, print_cache = 0; 1647 char c; 1648 1649 while ((c = *modif++) != '\0') { 1650 if (c == 'l') 1651 print_log = 1; 1652 if (c == 'p') 1653 print_pagelist = 1; 1654 if (c == 'c') 1655 print_cache = 1; 1656 } 1657 1658 (*pr)("POOL %s: size %u, align %u, ioff %u, roflags 0x%08x\n", 1659 pp->pr_wchan, pp->pr_size, pp->pr_align, pp->pr_itemoffset, 1660 pp->pr_roflags); 1661 (*pr)("\talloc %p\n", pp->pr_alloc); 1662 (*pr)("\tminitems %u, minpages %u, maxpages %u, npages %u\n", 1663 pp->pr_minitems, pp->pr_minpages, pp->pr_maxpages, pp->pr_npages); 1664 (*pr)("\titemsperpage %u, nitems %u, nout %u, hardlimit %u\n", 1665 pp->pr_itemsperpage, pp->pr_nitems, pp->pr_nout, pp->pr_hardlimit); 1666 1667 (*pr)("\n\tnget %lu, nfail %lu, nput %lu\n", 1668 pp->pr_nget, pp->pr_nfail, pp->pr_nput); 1669 (*pr)("\tnpagealloc %lu, npagefree %lu, hiwat %u, nidle %lu\n", 1670 pp->pr_npagealloc, pp->pr_npagefree, pp->pr_hiwat, pp->pr_nidle); 1671 1672 if (print_pagelist == 0) 1673 goto skip_pagelist; 1674 1675 if ((ph = LIST_FIRST(&pp->pr_emptypages)) != NULL) 1676 (*pr)("\n\tempty page list:\n"); 1677 pool_print_pagelist(pp, &pp->pr_emptypages, pr); 1678 if ((ph = LIST_FIRST(&pp->pr_fullpages)) != NULL) 1679 (*pr)("\n\tfull page list:\n"); 1680 pool_print_pagelist(pp, &pp->pr_fullpages, pr); 1681 if ((ph = LIST_FIRST(&pp->pr_partpages)) != NULL) 1682 (*pr)("\n\tpartial-page list:\n"); 1683 pool_print_pagelist(pp, &pp->pr_partpages, pr); 1684 1685 if (pp->pr_curpage == NULL) 1686 (*pr)("\tno current page\n"); 1687 else 1688 (*pr)("\tcurpage %p\n", pp->pr_curpage->ph_page); 1689 1690 skip_pagelist: 1691 if (print_log == 0) 1692 goto skip_log; 1693 1694 (*pr)("\n"); 1695 if ((pp->pr_roflags & PR_LOGGING) == 0) 1696 (*pr)("\tno log\n"); 1697 else 1698 pr_printlog(pp, NULL, pr); 1699 1700 skip_log: 1701 if (print_cache == 0) 1702 goto skip_cache; 1703 1704 #define PR_GROUPLIST(pcg) \ 1705 (*pr)("\t\tgroup %p: avail %d\n", pcg, pcg->pcg_avail); \ 1706 for (i = 0; i < PCG_NOBJECTS; i++) { \ 1707 if (pcg->pcg_objects[i].pcgo_pa != \ 1708 POOL_PADDR_INVALID) { \ 1709 (*pr)("\t\t\t%p, 0x%llx\n", \ 1710 pcg->pcg_objects[i].pcgo_va, \ 1711 (unsigned long long) \ 1712 pcg->pcg_objects[i].pcgo_pa); \ 1713 } else { \ 1714 (*pr)("\t\t\t%p\n", \ 1715 pcg->pcg_objects[i].pcgo_va); \ 1716 } \ 1717 } 1718 1719 LIST_FOREACH(pc, &pp->pr_cachelist, pc_poollist) { 1720 (*pr)("\tcache %p\n", pc); 1721 (*pr)("\t hits %lu misses %lu ngroups %lu nitems %lu\n", 1722 pc->pc_hits, pc->pc_misses, pc->pc_ngroups, pc->pc_nitems); 1723 (*pr)("\t full groups:\n"); 1724 LIST_FOREACH(pcg, &pc->pc_fullgroups, pcg_list) { 1725 PR_GROUPLIST(pcg); 1726 } 1727 (*pr)("\t partial groups:\n"); 1728 LIST_FOREACH(pcg, &pc->pc_partgroups, pcg_list) { 1729 PR_GROUPLIST(pcg); 1730 } 1731 (*pr)("\t empty groups:\n"); 1732 LIST_FOREACH(pcg, &pc->pc_emptygroups, pcg_list) { 1733 PR_GROUPLIST(pcg); 1734 } 1735 } 1736 #undef PR_GROUPLIST 1737 1738 skip_cache: 1739 pr_enter_check(pp, pr); 1740 } 1741 1742 static int 1743 pool_chk_page(struct pool *pp, const char *label, struct pool_item_header *ph) 1744 { 1745 struct pool_item *pi; 1746 caddr_t page; 1747 int n; 1748 1749 page = (caddr_t)((u_long)ph & pp->pr_alloc->pa_pagemask); 1750 if (page != ph->ph_page && 1751 (pp->pr_roflags & PR_PHINPAGE) != 0) { 1752 if (label != NULL) 1753 printf("%s: ", label); 1754 printf("pool(%p:%s): page inconsistency: page %p;" 1755 " at page head addr %p (p %p)\n", pp, 1756 pp->pr_wchan, ph->ph_page, 1757 ph, page); 1758 return 1; 1759 } 1760 1761 if ((pp->pr_roflags & PR_NOTOUCH) != 0) 1762 return 0; 1763 1764 for (pi = LIST_FIRST(&ph->ph_itemlist), n = 0; 1765 pi != NULL; 1766 pi = LIST_NEXT(pi,pi_list), n++) { 1767 1768 #ifdef DIAGNOSTIC 1769 if (pi->pi_magic != PI_MAGIC) { 1770 if (label != NULL) 1771 printf("%s: ", label); 1772 printf("pool(%s): free list modified: magic=%x;" 1773 " page %p; item ordinal %d;" 1774 " addr %p (p %p)\n", 1775 pp->pr_wchan, pi->pi_magic, ph->ph_page, 1776 n, pi, page); 1777 panic("pool"); 1778 } 1779 #endif 1780 page = 1781 (caddr_t)((u_long)pi & pp->pr_alloc->pa_pagemask); 1782 if (page == ph->ph_page) 1783 continue; 1784 1785 if (label != NULL) 1786 printf("%s: ", label); 1787 printf("pool(%p:%s): page inconsistency: page %p;" 1788 " item ordinal %d; addr %p (p %p)\n", pp, 1789 pp->pr_wchan, ph->ph_page, 1790 n, pi, page); 1791 return 1; 1792 } 1793 return 0; 1794 } 1795 1796 1797 int 1798 pool_chk(struct pool *pp, const char *label) 1799 { 1800 struct pool_item_header *ph; 1801 int r = 0; 1802 1803 simple_lock(&pp->pr_slock); 1804 LIST_FOREACH(ph, &pp->pr_emptypages, ph_pagelist) { 1805 r = pool_chk_page(pp, label, ph); 1806 if (r) { 1807 goto out; 1808 } 1809 } 1810 LIST_FOREACH(ph, &pp->pr_fullpages, ph_pagelist) { 1811 r = pool_chk_page(pp, label, ph); 1812 if (r) { 1813 goto out; 1814 } 1815 } 1816 LIST_FOREACH(ph, &pp->pr_partpages, ph_pagelist) { 1817 r = pool_chk_page(pp, label, ph); 1818 if (r) { 1819 goto out; 1820 } 1821 } 1822 1823 out: 1824 simple_unlock(&pp->pr_slock); 1825 return (r); 1826 } 1827 1828 /* 1829 * pool_cache_init: 1830 * 1831 * Initialize a pool cache. 1832 * 1833 * NOTE: If the pool must be protected from interrupts, we expect 1834 * to be called at the appropriate interrupt priority level. 1835 */ 1836 void 1837 pool_cache_init(struct pool_cache *pc, struct pool *pp, 1838 int (*ctor)(void *, void *, int), 1839 void (*dtor)(void *, void *), 1840 void *arg) 1841 { 1842 1843 LIST_INIT(&pc->pc_emptygroups); 1844 LIST_INIT(&pc->pc_fullgroups); 1845 LIST_INIT(&pc->pc_partgroups); 1846 simple_lock_init(&pc->pc_slock); 1847 1848 pc->pc_pool = pp; 1849 1850 pc->pc_ctor = ctor; 1851 pc->pc_dtor = dtor; 1852 pc->pc_arg = arg; 1853 1854 pc->pc_hits = 0; 1855 pc->pc_misses = 0; 1856 1857 pc->pc_ngroups = 0; 1858 1859 pc->pc_nitems = 0; 1860 1861 simple_lock(&pp->pr_slock); 1862 LIST_INSERT_HEAD(&pp->pr_cachelist, pc, pc_poollist); 1863 simple_unlock(&pp->pr_slock); 1864 } 1865 1866 /* 1867 * pool_cache_destroy: 1868 * 1869 * Destroy a pool cache. 1870 */ 1871 void 1872 pool_cache_destroy(struct pool_cache *pc) 1873 { 1874 struct pool *pp = pc->pc_pool; 1875 1876 /* First, invalidate the entire cache. */ 1877 pool_cache_invalidate(pc); 1878 1879 /* ...and remove it from the pool's cache list. */ 1880 simple_lock(&pp->pr_slock); 1881 LIST_REMOVE(pc, pc_poollist); 1882 simple_unlock(&pp->pr_slock); 1883 } 1884 1885 static inline void * 1886 pcg_get(struct pool_cache_group *pcg, paddr_t *pap) 1887 { 1888 void *object; 1889 u_int idx; 1890 1891 KASSERT(pcg->pcg_avail <= PCG_NOBJECTS); 1892 KASSERT(pcg->pcg_avail != 0); 1893 idx = --pcg->pcg_avail; 1894 1895 KASSERT(pcg->pcg_objects[idx].pcgo_va != NULL); 1896 object = pcg->pcg_objects[idx].pcgo_va; 1897 if (pap != NULL) 1898 *pap = pcg->pcg_objects[idx].pcgo_pa; 1899 pcg->pcg_objects[idx].pcgo_va = NULL; 1900 1901 return (object); 1902 } 1903 1904 static inline void 1905 pcg_put(struct pool_cache_group *pcg, void *object, paddr_t pa) 1906 { 1907 u_int idx; 1908 1909 KASSERT(pcg->pcg_avail < PCG_NOBJECTS); 1910 idx = pcg->pcg_avail++; 1911 1912 KASSERT(pcg->pcg_objects[idx].pcgo_va == NULL); 1913 pcg->pcg_objects[idx].pcgo_va = object; 1914 pcg->pcg_objects[idx].pcgo_pa = pa; 1915 } 1916 1917 static void 1918 pcg_grouplist_free(struct pool_cache_grouplist *pcgl) 1919 { 1920 struct pool_cache_group *pcg; 1921 int s; 1922 1923 s = splvm(); 1924 while ((pcg = LIST_FIRST(pcgl)) != NULL) { 1925 LIST_REMOVE(pcg, pcg_list); 1926 pool_put(&pcgpool, pcg); 1927 } 1928 splx(s); 1929 } 1930 1931 /* 1932 * pool_cache_get{,_paddr}: 1933 * 1934 * Get an object from a pool cache (optionally returning 1935 * the physical address of the object). 1936 */ 1937 void * 1938 pool_cache_get_paddr(struct pool_cache *pc, int flags, paddr_t *pap) 1939 { 1940 struct pool_cache_group *pcg; 1941 void *object; 1942 1943 #ifdef LOCKDEBUG 1944 if (flags & PR_WAITOK) 1945 simple_lock_only_held(NULL, "pool_cache_get(PR_WAITOK)"); 1946 #endif 1947 1948 simple_lock(&pc->pc_slock); 1949 1950 pcg = LIST_FIRST(&pc->pc_partgroups); 1951 if (pcg == NULL) { 1952 pcg = LIST_FIRST(&pc->pc_fullgroups); 1953 if (pcg != NULL) { 1954 LIST_REMOVE(pcg, pcg_list); 1955 LIST_INSERT_HEAD(&pc->pc_partgroups, pcg, pcg_list); 1956 } 1957 } 1958 if (pcg == NULL) { 1959 1960 /* 1961 * No groups with any available objects. Allocate 1962 * a new object, construct it, and return it to 1963 * the caller. We will allocate a group, if necessary, 1964 * when the object is freed back to the cache. 1965 */ 1966 pc->pc_misses++; 1967 simple_unlock(&pc->pc_slock); 1968 object = pool_get(pc->pc_pool, flags); 1969 if (object != NULL && pc->pc_ctor != NULL) { 1970 if ((*pc->pc_ctor)(pc->pc_arg, object, flags) != 0) { 1971 pool_put(pc->pc_pool, object); 1972 return (NULL); 1973 } 1974 } 1975 if (object != NULL && pap != NULL) { 1976 #ifdef POOL_VTOPHYS 1977 *pap = POOL_VTOPHYS(object); 1978 #else 1979 *pap = POOL_PADDR_INVALID; 1980 #endif 1981 } 1982 return (object); 1983 } 1984 1985 pc->pc_hits++; 1986 pc->pc_nitems--; 1987 object = pcg_get(pcg, pap); 1988 1989 if (pcg->pcg_avail == 0) { 1990 LIST_REMOVE(pcg, pcg_list); 1991 LIST_INSERT_HEAD(&pc->pc_emptygroups, pcg, pcg_list); 1992 } 1993 simple_unlock(&pc->pc_slock); 1994 1995 return (object); 1996 } 1997 1998 /* 1999 * pool_cache_put{,_paddr}: 2000 * 2001 * Put an object back to the pool cache (optionally caching the 2002 * physical address of the object). 2003 */ 2004 void 2005 pool_cache_put_paddr(struct pool_cache *pc, void *object, paddr_t pa) 2006 { 2007 struct pool_cache_group *pcg; 2008 int s; 2009 2010 if (__predict_false((pc->pc_pool->pr_flags & PR_WANTED) != 0)) { 2011 goto destruct; 2012 } 2013 2014 simple_lock(&pc->pc_slock); 2015 2016 pcg = LIST_FIRST(&pc->pc_partgroups); 2017 if (pcg == NULL) { 2018 pcg = LIST_FIRST(&pc->pc_emptygroups); 2019 if (pcg != NULL) { 2020 LIST_REMOVE(pcg, pcg_list); 2021 LIST_INSERT_HEAD(&pc->pc_partgroups, pcg, pcg_list); 2022 } 2023 } 2024 if (pcg == NULL) { 2025 2026 /* 2027 * No empty groups to free the object to. Attempt to 2028 * allocate one. 2029 */ 2030 simple_unlock(&pc->pc_slock); 2031 s = splvm(); 2032 pcg = pool_get(&pcgpool, PR_NOWAIT); 2033 splx(s); 2034 if (pcg == NULL) { 2035 destruct: 2036 2037 /* 2038 * Unable to allocate a cache group; destruct the object 2039 * and free it back to the pool. 2040 */ 2041 pool_cache_destruct_object(pc, object); 2042 return; 2043 } 2044 memset(pcg, 0, sizeof(*pcg)); 2045 simple_lock(&pc->pc_slock); 2046 pc->pc_ngroups++; 2047 LIST_INSERT_HEAD(&pc->pc_partgroups, pcg, pcg_list); 2048 } 2049 2050 pc->pc_nitems++; 2051 pcg_put(pcg, object, pa); 2052 2053 if (pcg->pcg_avail == PCG_NOBJECTS) { 2054 LIST_REMOVE(pcg, pcg_list); 2055 LIST_INSERT_HEAD(&pc->pc_fullgroups, pcg, pcg_list); 2056 } 2057 simple_unlock(&pc->pc_slock); 2058 } 2059 2060 /* 2061 * pool_cache_destruct_object: 2062 * 2063 * Force destruction of an object and its release back into 2064 * the pool. 2065 */ 2066 void 2067 pool_cache_destruct_object(struct pool_cache *pc, void *object) 2068 { 2069 2070 if (pc->pc_dtor != NULL) 2071 (*pc->pc_dtor)(pc->pc_arg, object); 2072 pool_put(pc->pc_pool, object); 2073 } 2074 2075 static void 2076 pool_do_cache_invalidate_grouplist(struct pool_cache_grouplist *pcgsl, 2077 struct pool_cache *pc, struct pool_pagelist *pq, 2078 struct pool_cache_grouplist *pcgdl) 2079 { 2080 struct pool_cache_group *pcg, *npcg; 2081 void *object; 2082 2083 for (pcg = LIST_FIRST(pcgsl); pcg != NULL; pcg = npcg) { 2084 npcg = LIST_NEXT(pcg, pcg_list); 2085 while (pcg->pcg_avail != 0) { 2086 pc->pc_nitems--; 2087 object = pcg_get(pcg, NULL); 2088 if (pc->pc_dtor != NULL) 2089 (*pc->pc_dtor)(pc->pc_arg, object); 2090 pool_do_put(pc->pc_pool, object, pq); 2091 } 2092 pc->pc_ngroups--; 2093 LIST_REMOVE(pcg, pcg_list); 2094 LIST_INSERT_HEAD(pcgdl, pcg, pcg_list); 2095 } 2096 } 2097 2098 static void 2099 pool_do_cache_invalidate(struct pool_cache *pc, struct pool_pagelist *pq, 2100 struct pool_cache_grouplist *pcgl) 2101 { 2102 2103 LOCK_ASSERT(simple_lock_held(&pc->pc_slock)); 2104 LOCK_ASSERT(simple_lock_held(&pc->pc_pool->pr_slock)); 2105 2106 pool_do_cache_invalidate_grouplist(&pc->pc_fullgroups, pc, pq, pcgl); 2107 pool_do_cache_invalidate_grouplist(&pc->pc_partgroups, pc, pq, pcgl); 2108 2109 KASSERT(LIST_EMPTY(&pc->pc_partgroups)); 2110 KASSERT(LIST_EMPTY(&pc->pc_fullgroups)); 2111 KASSERT(pc->pc_nitems == 0); 2112 } 2113 2114 /* 2115 * pool_cache_invalidate: 2116 * 2117 * Invalidate a pool cache (destruct and release all of the 2118 * cached objects). 2119 */ 2120 void 2121 pool_cache_invalidate(struct pool_cache *pc) 2122 { 2123 struct pool_pagelist pq; 2124 struct pool_cache_grouplist pcgl; 2125 2126 LIST_INIT(&pq); 2127 LIST_INIT(&pcgl); 2128 2129 simple_lock(&pc->pc_slock); 2130 simple_lock(&pc->pc_pool->pr_slock); 2131 2132 pool_do_cache_invalidate(pc, &pq, &pcgl); 2133 2134 simple_unlock(&pc->pc_pool->pr_slock); 2135 simple_unlock(&pc->pc_slock); 2136 2137 pr_pagelist_free(pc->pc_pool, &pq); 2138 pcg_grouplist_free(&pcgl); 2139 } 2140 2141 /* 2142 * pool_cache_reclaim: 2143 * 2144 * Reclaim a pool cache for pool_reclaim(). 2145 */ 2146 static void 2147 pool_cache_reclaim(struct pool_cache *pc, struct pool_pagelist *pq, 2148 struct pool_cache_grouplist *pcgl) 2149 { 2150 2151 /* 2152 * We're locking in the wrong order (normally pool_cache -> pool, 2153 * but the pool is already locked when we get here), so we have 2154 * to use trylock. If we can't lock the pool_cache, it's not really 2155 * a big deal here. 2156 */ 2157 if (simple_lock_try(&pc->pc_slock) == 0) 2158 return; 2159 2160 pool_do_cache_invalidate(pc, pq, pcgl); 2161 2162 simple_unlock(&pc->pc_slock); 2163 } 2164 2165 /* 2166 * Pool backend allocators. 2167 * 2168 * Each pool has a backend allocator that handles allocation, deallocation, 2169 * and any additional draining that might be needed. 2170 * 2171 * We provide two standard allocators: 2172 * 2173 * pool_allocator_kmem - the default when no allocator is specified 2174 * 2175 * pool_allocator_nointr - used for pools that will not be accessed 2176 * in interrupt context. 2177 */ 2178 void *pool_page_alloc(struct pool *, int); 2179 void pool_page_free(struct pool *, void *); 2180 2181 #ifdef POOL_SUBPAGE 2182 struct pool_allocator pool_allocator_kmem_fullpage = { 2183 pool_page_alloc, pool_page_free, 0, 2184 }; 2185 #else 2186 struct pool_allocator pool_allocator_kmem = { 2187 pool_page_alloc, pool_page_free, 0, 2188 }; 2189 #endif 2190 2191 void *pool_page_alloc_nointr(struct pool *, int); 2192 void pool_page_free_nointr(struct pool *, void *); 2193 2194 #ifdef POOL_SUBPAGE 2195 struct pool_allocator pool_allocator_nointr_fullpage = { 2196 pool_page_alloc_nointr, pool_page_free_nointr, 0, 2197 }; 2198 #else 2199 struct pool_allocator pool_allocator_nointr = { 2200 pool_page_alloc_nointr, pool_page_free_nointr, 0, 2201 }; 2202 #endif 2203 2204 #ifdef POOL_SUBPAGE 2205 void *pool_subpage_alloc(struct pool *, int); 2206 void pool_subpage_free(struct pool *, void *); 2207 2208 struct pool_allocator pool_allocator_kmem = { 2209 pool_subpage_alloc, pool_subpage_free, POOL_SUBPAGE, 2210 }; 2211 2212 void *pool_subpage_alloc_nointr(struct pool *, int); 2213 void pool_subpage_free_nointr(struct pool *, void *); 2214 2215 struct pool_allocator pool_allocator_nointr = { 2216 pool_subpage_alloc, pool_subpage_free, POOL_SUBPAGE, 2217 }; 2218 #endif /* POOL_SUBPAGE */ 2219 2220 /* 2221 * We have at least three different resources for the same allocation and 2222 * each resource can be depleted. First, we have the ready elements in the 2223 * pool. Then we have the resource (typically a vm_map) for this allocator. 2224 * Finally, we have physical memory. Waiting for any of these can be 2225 * unnecessary when any other is freed, but the kernel doesn't support 2226 * sleeping on multiple wait channels, so we have to employ another strategy. 2227 * 2228 * The caller sleeps on the pool (so that it can be awakened when an item 2229 * is returned to the pool), but we set PA_WANT on the allocator. When a 2230 * page is returned to the allocator and PA_WANT is set, pool_allocator_free 2231 * will wake up all sleeping pools belonging to this allocator. 2232 * 2233 * XXX Thundering herd. 2234 */ 2235 void * 2236 pool_allocator_alloc(struct pool *org, int flags) 2237 { 2238 struct pool_allocator *pa = org->pr_alloc; 2239 struct pool *pp, *start; 2240 int s, freed; 2241 void *res; 2242 2243 LOCK_ASSERT(!simple_lock_held(&org->pr_slock)); 2244 2245 do { 2246 if ((res = (*pa->pa_alloc)(org, flags)) != NULL) 2247 return (res); 2248 if ((flags & PR_WAITOK) == 0) { 2249 /* 2250 * We only run the drain hookhere if PR_NOWAIT. 2251 * In other cases, the hook will be run in 2252 * pool_reclaim(). 2253 */ 2254 if (org->pr_drain_hook != NULL) { 2255 (*org->pr_drain_hook)(org->pr_drain_hook_arg, 2256 flags); 2257 if ((res = (*pa->pa_alloc)(org, flags)) != NULL) 2258 return (res); 2259 } 2260 break; 2261 } 2262 2263 /* 2264 * Drain all pools, that use this allocator. 2265 * We do this to reclaim VA space. 2266 * pa_alloc is responsible for waiting for 2267 * physical memory. 2268 * 2269 * XXX We risk looping forever if start if someone 2270 * calls pool_destroy on "start". But there is no 2271 * other way to have potentially sleeping pool_reclaim, 2272 * non-sleeping locks on pool_allocator, and some 2273 * stirring of drained pools in the allocator. 2274 * 2275 * XXX Maybe we should use pool_head_slock for locking 2276 * the allocators? 2277 */ 2278 freed = 0; 2279 2280 s = splvm(); 2281 simple_lock(&pa->pa_slock); 2282 pp = start = TAILQ_FIRST(&pa->pa_list); 2283 do { 2284 TAILQ_REMOVE(&pa->pa_list, pp, pr_alloc_list); 2285 TAILQ_INSERT_TAIL(&pa->pa_list, pp, pr_alloc_list); 2286 simple_unlock(&pa->pa_slock); 2287 freed = pool_reclaim(pp); 2288 simple_lock(&pa->pa_slock); 2289 } while ((pp = TAILQ_FIRST(&pa->pa_list)) != start && 2290 freed == 0); 2291 2292 if (freed == 0) { 2293 /* 2294 * We set PA_WANT here, the caller will most likely 2295 * sleep waiting for pages (if not, this won't hurt 2296 * that much), and there is no way to set this in 2297 * the caller without violating locking order. 2298 */ 2299 pa->pa_flags |= PA_WANT; 2300 } 2301 simple_unlock(&pa->pa_slock); 2302 splx(s); 2303 } while (freed); 2304 return (NULL); 2305 } 2306 2307 void 2308 pool_allocator_free(struct pool *pp, void *v) 2309 { 2310 struct pool_allocator *pa = pp->pr_alloc; 2311 int s; 2312 2313 LOCK_ASSERT(!simple_lock_held(&pp->pr_slock)); 2314 2315 (*pa->pa_free)(pp, v); 2316 2317 s = splvm(); 2318 simple_lock(&pa->pa_slock); 2319 if ((pa->pa_flags & PA_WANT) == 0) { 2320 simple_unlock(&pa->pa_slock); 2321 splx(s); 2322 return; 2323 } 2324 2325 TAILQ_FOREACH(pp, &pa->pa_list, pr_alloc_list) { 2326 simple_lock(&pp->pr_slock); 2327 if ((pp->pr_flags & PR_WANTED) != 0) { 2328 pp->pr_flags &= ~PR_WANTED; 2329 wakeup(pp); 2330 } 2331 simple_unlock(&pp->pr_slock); 2332 } 2333 pa->pa_flags &= ~PA_WANT; 2334 simple_unlock(&pa->pa_slock); 2335 splx(s); 2336 } 2337 2338 void * 2339 pool_page_alloc(struct pool *pp, int flags) 2340 { 2341 boolean_t waitok = (flags & PR_WAITOK) ? TRUE : FALSE; 2342 2343 return ((void *) uvm_km_alloc_poolpage_cache(kmem_map, waitok)); 2344 } 2345 2346 void 2347 pool_page_free(struct pool *pp, void *v) 2348 { 2349 2350 uvm_km_free_poolpage_cache(kmem_map, (vaddr_t) v); 2351 } 2352 2353 static void * 2354 pool_page_alloc_meta(struct pool *pp, int flags) 2355 { 2356 boolean_t waitok = (flags & PR_WAITOK) ? TRUE : FALSE; 2357 2358 return ((void *) uvm_km_alloc_poolpage(kmem_map, waitok)); 2359 } 2360 2361 static void 2362 pool_page_free_meta(struct pool *pp, void *v) 2363 { 2364 2365 uvm_km_free_poolpage(kmem_map, (vaddr_t) v); 2366 } 2367 2368 #ifdef POOL_SUBPAGE 2369 /* Sub-page allocator, for machines with large hardware pages. */ 2370 void * 2371 pool_subpage_alloc(struct pool *pp, int flags) 2372 { 2373 void *v; 2374 int s; 2375 s = splvm(); 2376 v = pool_get(&psppool, flags); 2377 splx(s); 2378 return v; 2379 } 2380 2381 void 2382 pool_subpage_free(struct pool *pp, void *v) 2383 { 2384 int s; 2385 s = splvm(); 2386 pool_put(&psppool, v); 2387 splx(s); 2388 } 2389 2390 /* We don't provide a real nointr allocator. Maybe later. */ 2391 void * 2392 pool_subpage_alloc_nointr(struct pool *pp, int flags) 2393 { 2394 2395 return (pool_subpage_alloc(pp, flags)); 2396 } 2397 2398 void 2399 pool_subpage_free_nointr(struct pool *pp, void *v) 2400 { 2401 2402 pool_subpage_free(pp, v); 2403 } 2404 #endif /* POOL_SUBPAGE */ 2405 void * 2406 pool_page_alloc_nointr(struct pool *pp, int flags) 2407 { 2408 boolean_t waitok = (flags & PR_WAITOK) ? TRUE : FALSE; 2409 2410 return ((void *) uvm_km_alloc_poolpage_cache(kernel_map, waitok)); 2411 } 2412 2413 void 2414 pool_page_free_nointr(struct pool *pp, void *v) 2415 { 2416 2417 uvm_km_free_poolpage_cache(kernel_map, (vaddr_t) v); 2418 } 2419