1 /* $NetBSD: subr_pool.c,v 1.113 2006/03/17 10:09:25 yamt Exp $ */ 2 3 /*- 4 * Copyright (c) 1997, 1999, 2000 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Paul Kranenburg; by Jason R. Thorpe of the Numerical Aerospace 9 * Simulation Facility, NASA Ames Research Center. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. All advertising materials mentioning features or use of this software 20 * must display the following acknowledgement: 21 * This product includes software developed by the NetBSD 22 * Foundation, Inc. and its contributors. 23 * 4. Neither the name of The NetBSD Foundation nor the names of its 24 * contributors may be used to endorse or promote products derived 25 * from this software without specific prior written permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 28 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 29 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 30 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 31 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 32 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 33 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 34 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 35 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 36 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 37 * POSSIBILITY OF SUCH DAMAGE. 38 */ 39 40 #include <sys/cdefs.h> 41 __KERNEL_RCSID(0, "$NetBSD: subr_pool.c,v 1.113 2006/03/17 10:09:25 yamt Exp $"); 42 43 #include "opt_pool.h" 44 #include "opt_poollog.h" 45 #include "opt_lockdebug.h" 46 47 #include <sys/param.h> 48 #include <sys/systm.h> 49 #include <sys/proc.h> 50 #include <sys/errno.h> 51 #include <sys/kernel.h> 52 #include <sys/malloc.h> 53 #include <sys/lock.h> 54 #include <sys/pool.h> 55 #include <sys/syslog.h> 56 57 #include <uvm/uvm.h> 58 59 /* 60 * Pool resource management utility. 61 * 62 * Memory is allocated in pages which are split into pieces according to 63 * the pool item size. Each page is kept on one of three lists in the 64 * pool structure: `pr_emptypages', `pr_fullpages' and `pr_partpages', 65 * for empty, full and partially-full pages respectively. The individual 66 * pool items are on a linked list headed by `ph_itemlist' in each page 67 * header. The memory for building the page list is either taken from 68 * the allocated pages themselves (for small pool items) or taken from 69 * an internal pool of page headers (`phpool'). 70 */ 71 72 /* List of all pools */ 73 LIST_HEAD(,pool) pool_head = LIST_HEAD_INITIALIZER(pool_head); 74 75 /* Private pool for page header structures */ 76 #define PHPOOL_MAX 8 77 static struct pool phpool[PHPOOL_MAX]; 78 #define PHPOOL_FREELIST_NELEM(idx) (((idx) == 0) ? 0 : (1 << (idx))) 79 80 #ifdef POOL_SUBPAGE 81 /* Pool of subpages for use by normal pools. */ 82 static struct pool psppool; 83 #endif 84 85 static void *pool_page_alloc_meta(struct pool *, int); 86 static void pool_page_free_meta(struct pool *, void *); 87 88 /* allocator for pool metadata */ 89 static struct pool_allocator pool_allocator_meta = { 90 pool_page_alloc_meta, pool_page_free_meta 91 }; 92 93 /* # of seconds to retain page after last use */ 94 int pool_inactive_time = 10; 95 96 /* Next candidate for drainage (see pool_drain()) */ 97 static struct pool *drainpp; 98 99 /* This spin lock protects both pool_head and drainpp. */ 100 struct simplelock pool_head_slock = SIMPLELOCK_INITIALIZER; 101 102 typedef uint8_t pool_item_freelist_t; 103 104 struct pool_item_header { 105 /* Page headers */ 106 LIST_ENTRY(pool_item_header) 107 ph_pagelist; /* pool page list */ 108 SPLAY_ENTRY(pool_item_header) 109 ph_node; /* Off-page page headers */ 110 caddr_t ph_page; /* this page's address */ 111 struct timeval ph_time; /* last referenced */ 112 union { 113 /* !PR_NOTOUCH */ 114 struct { 115 LIST_HEAD(, pool_item) 116 phu_itemlist; /* chunk list for this page */ 117 } phu_normal; 118 /* PR_NOTOUCH */ 119 struct { 120 uint16_t 121 phu_off; /* start offset in page */ 122 pool_item_freelist_t 123 phu_firstfree; /* first free item */ 124 /* 125 * XXX it might be better to use 126 * a simple bitmap and ffs(3) 127 */ 128 } phu_notouch; 129 } ph_u; 130 uint16_t ph_nmissing; /* # of chunks in use */ 131 }; 132 #define ph_itemlist ph_u.phu_normal.phu_itemlist 133 #define ph_off ph_u.phu_notouch.phu_off 134 #define ph_firstfree ph_u.phu_notouch.phu_firstfree 135 136 struct pool_item { 137 #ifdef DIAGNOSTIC 138 u_int pi_magic; 139 #endif 140 #define PI_MAGIC 0xdeadbeefU 141 /* Other entries use only this list entry */ 142 LIST_ENTRY(pool_item) pi_list; 143 }; 144 145 #define POOL_NEEDS_CATCHUP(pp) \ 146 ((pp)->pr_nitems < (pp)->pr_minitems) 147 148 /* 149 * Pool cache management. 150 * 151 * Pool caches provide a way for constructed objects to be cached by the 152 * pool subsystem. This can lead to performance improvements by avoiding 153 * needless object construction/destruction; it is deferred until absolutely 154 * necessary. 155 * 156 * Caches are grouped into cache groups. Each cache group references 157 * up to 16 constructed objects. When a cache allocates an object 158 * from the pool, it calls the object's constructor and places it into 159 * a cache group. When a cache group frees an object back to the pool, 160 * it first calls the object's destructor. This allows the object to 161 * persist in constructed form while freed to the cache. 162 * 163 * Multiple caches may exist for each pool. This allows a single 164 * object type to have multiple constructed forms. The pool references 165 * each cache, so that when a pool is drained by the pagedaemon, it can 166 * drain each individual cache as well. Each time a cache is drained, 167 * the most idle cache group is freed to the pool in its entirety. 168 * 169 * Pool caches are layed on top of pools. By layering them, we can avoid 170 * the complexity of cache management for pools which would not benefit 171 * from it. 172 */ 173 174 /* The cache group pool. */ 175 static struct pool pcgpool; 176 177 static void pool_cache_reclaim(struct pool_cache *, struct pool_pagelist *, 178 struct pool_cache_grouplist *); 179 static void pcg_grouplist_free(struct pool_cache_grouplist *); 180 181 static int pool_catchup(struct pool *); 182 static void pool_prime_page(struct pool *, caddr_t, 183 struct pool_item_header *); 184 static void pool_update_curpage(struct pool *); 185 186 static int pool_grow(struct pool *, int); 187 void *pool_allocator_alloc(struct pool *, int); 188 void pool_allocator_free(struct pool *, void *); 189 190 static void pool_print_pagelist(struct pool *, struct pool_pagelist *, 191 void (*)(const char *, ...)); 192 static void pool_print1(struct pool *, const char *, 193 void (*)(const char *, ...)); 194 195 static int pool_chk_page(struct pool *, const char *, 196 struct pool_item_header *); 197 198 /* 199 * Pool log entry. An array of these is allocated in pool_init(). 200 */ 201 struct pool_log { 202 const char *pl_file; 203 long pl_line; 204 int pl_action; 205 #define PRLOG_GET 1 206 #define PRLOG_PUT 2 207 void *pl_addr; 208 }; 209 210 #ifdef POOL_DIAGNOSTIC 211 /* Number of entries in pool log buffers */ 212 #ifndef POOL_LOGSIZE 213 #define POOL_LOGSIZE 10 214 #endif 215 216 int pool_logsize = POOL_LOGSIZE; 217 218 static inline void 219 pr_log(struct pool *pp, void *v, int action, const char *file, long line) 220 { 221 int n = pp->pr_curlogentry; 222 struct pool_log *pl; 223 224 if ((pp->pr_roflags & PR_LOGGING) == 0) 225 return; 226 227 /* 228 * Fill in the current entry. Wrap around and overwrite 229 * the oldest entry if necessary. 230 */ 231 pl = &pp->pr_log[n]; 232 pl->pl_file = file; 233 pl->pl_line = line; 234 pl->pl_action = action; 235 pl->pl_addr = v; 236 if (++n >= pp->pr_logsize) 237 n = 0; 238 pp->pr_curlogentry = n; 239 } 240 241 static void 242 pr_printlog(struct pool *pp, struct pool_item *pi, 243 void (*pr)(const char *, ...)) 244 { 245 int i = pp->pr_logsize; 246 int n = pp->pr_curlogentry; 247 248 if ((pp->pr_roflags & PR_LOGGING) == 0) 249 return; 250 251 /* 252 * Print all entries in this pool's log. 253 */ 254 while (i-- > 0) { 255 struct pool_log *pl = &pp->pr_log[n]; 256 if (pl->pl_action != 0) { 257 if (pi == NULL || pi == pl->pl_addr) { 258 (*pr)("\tlog entry %d:\n", i); 259 (*pr)("\t\taction = %s, addr = %p\n", 260 pl->pl_action == PRLOG_GET ? "get" : "put", 261 pl->pl_addr); 262 (*pr)("\t\tfile: %s at line %lu\n", 263 pl->pl_file, pl->pl_line); 264 } 265 } 266 if (++n >= pp->pr_logsize) 267 n = 0; 268 } 269 } 270 271 static inline void 272 pr_enter(struct pool *pp, const char *file, long line) 273 { 274 275 if (__predict_false(pp->pr_entered_file != NULL)) { 276 printf("pool %s: reentrancy at file %s line %ld\n", 277 pp->pr_wchan, file, line); 278 printf(" previous entry at file %s line %ld\n", 279 pp->pr_entered_file, pp->pr_entered_line); 280 panic("pr_enter"); 281 } 282 283 pp->pr_entered_file = file; 284 pp->pr_entered_line = line; 285 } 286 287 static inline void 288 pr_leave(struct pool *pp) 289 { 290 291 if (__predict_false(pp->pr_entered_file == NULL)) { 292 printf("pool %s not entered?\n", pp->pr_wchan); 293 panic("pr_leave"); 294 } 295 296 pp->pr_entered_file = NULL; 297 pp->pr_entered_line = 0; 298 } 299 300 static inline void 301 pr_enter_check(struct pool *pp, void (*pr)(const char *, ...)) 302 { 303 304 if (pp->pr_entered_file != NULL) 305 (*pr)("\n\tcurrently entered from file %s line %ld\n", 306 pp->pr_entered_file, pp->pr_entered_line); 307 } 308 #else 309 #define pr_log(pp, v, action, file, line) 310 #define pr_printlog(pp, pi, pr) 311 #define pr_enter(pp, file, line) 312 #define pr_leave(pp) 313 #define pr_enter_check(pp, pr) 314 #endif /* POOL_DIAGNOSTIC */ 315 316 static inline int 317 pr_item_notouch_index(const struct pool *pp, const struct pool_item_header *ph, 318 const void *v) 319 { 320 const char *cp = v; 321 int idx; 322 323 KASSERT(pp->pr_roflags & PR_NOTOUCH); 324 idx = (cp - ph->ph_page - ph->ph_off) / pp->pr_size; 325 KASSERT(idx < pp->pr_itemsperpage); 326 return idx; 327 } 328 329 #define PR_FREELIST_ALIGN(p) \ 330 roundup((uintptr_t)(p), sizeof(pool_item_freelist_t)) 331 #define PR_FREELIST(ph) ((pool_item_freelist_t *)PR_FREELIST_ALIGN((ph) + 1)) 332 #define PR_INDEX_USED ((pool_item_freelist_t)-1) 333 #define PR_INDEX_EOL ((pool_item_freelist_t)-2) 334 335 static inline void 336 pr_item_notouch_put(const struct pool *pp, struct pool_item_header *ph, 337 void *obj) 338 { 339 int idx = pr_item_notouch_index(pp, ph, obj); 340 pool_item_freelist_t *freelist = PR_FREELIST(ph); 341 342 KASSERT(freelist[idx] == PR_INDEX_USED); 343 freelist[idx] = ph->ph_firstfree; 344 ph->ph_firstfree = idx; 345 } 346 347 static inline void * 348 pr_item_notouch_get(const struct pool *pp, struct pool_item_header *ph) 349 { 350 int idx = ph->ph_firstfree; 351 pool_item_freelist_t *freelist = PR_FREELIST(ph); 352 353 KASSERT(freelist[idx] != PR_INDEX_USED); 354 ph->ph_firstfree = freelist[idx]; 355 freelist[idx] = PR_INDEX_USED; 356 357 return ph->ph_page + ph->ph_off + idx * pp->pr_size; 358 } 359 360 static inline int 361 phtree_compare(struct pool_item_header *a, struct pool_item_header *b) 362 { 363 if (a->ph_page < b->ph_page) 364 return (-1); 365 else if (a->ph_page > b->ph_page) 366 return (1); 367 else 368 return (0); 369 } 370 371 SPLAY_PROTOTYPE(phtree, pool_item_header, ph_node, phtree_compare); 372 SPLAY_GENERATE(phtree, pool_item_header, ph_node, phtree_compare); 373 374 /* 375 * Return the pool page header based on page address. 376 */ 377 static inline struct pool_item_header * 378 pr_find_pagehead(struct pool *pp, caddr_t page) 379 { 380 struct pool_item_header *ph, tmp; 381 382 if ((pp->pr_roflags & PR_PHINPAGE) != 0) 383 return ((struct pool_item_header *)(page + pp->pr_phoffset)); 384 385 tmp.ph_page = page; 386 ph = SPLAY_FIND(phtree, &pp->pr_phtree, &tmp); 387 return ph; 388 } 389 390 static void 391 pr_pagelist_free(struct pool *pp, struct pool_pagelist *pq) 392 { 393 struct pool_item_header *ph; 394 int s; 395 396 while ((ph = LIST_FIRST(pq)) != NULL) { 397 LIST_REMOVE(ph, ph_pagelist); 398 pool_allocator_free(pp, ph->ph_page); 399 if ((pp->pr_roflags & PR_PHINPAGE) == 0) { 400 s = splvm(); 401 pool_put(pp->pr_phpool, ph); 402 splx(s); 403 } 404 } 405 } 406 407 /* 408 * Remove a page from the pool. 409 */ 410 static inline void 411 pr_rmpage(struct pool *pp, struct pool_item_header *ph, 412 struct pool_pagelist *pq) 413 { 414 415 LOCK_ASSERT(simple_lock_held(&pp->pr_slock)); 416 417 /* 418 * If the page was idle, decrement the idle page count. 419 */ 420 if (ph->ph_nmissing == 0) { 421 #ifdef DIAGNOSTIC 422 if (pp->pr_nidle == 0) 423 panic("pr_rmpage: nidle inconsistent"); 424 if (pp->pr_nitems < pp->pr_itemsperpage) 425 panic("pr_rmpage: nitems inconsistent"); 426 #endif 427 pp->pr_nidle--; 428 } 429 430 pp->pr_nitems -= pp->pr_itemsperpage; 431 432 /* 433 * Unlink the page from the pool and queue it for release. 434 */ 435 LIST_REMOVE(ph, ph_pagelist); 436 if ((pp->pr_roflags & PR_PHINPAGE) == 0) 437 SPLAY_REMOVE(phtree, &pp->pr_phtree, ph); 438 LIST_INSERT_HEAD(pq, ph, ph_pagelist); 439 440 pp->pr_npages--; 441 pp->pr_npagefree++; 442 443 pool_update_curpage(pp); 444 } 445 446 /* 447 * Initialize all the pools listed in the "pools" link set. 448 */ 449 void 450 link_pool_init(void) 451 { 452 __link_set_decl(pools, struct link_pool_init); 453 struct link_pool_init * const *pi; 454 455 __link_set_foreach(pi, pools) 456 pool_init((*pi)->pp, (*pi)->size, (*pi)->align, 457 (*pi)->align_offset, (*pi)->flags, (*pi)->wchan, 458 (*pi)->palloc); 459 } 460 461 /* 462 * Initialize the given pool resource structure. 463 * 464 * We export this routine to allow other kernel parts to declare 465 * static pools that must be initialized before malloc() is available. 466 */ 467 void 468 pool_init(struct pool *pp, size_t size, u_int align, u_int ioff, int flags, 469 const char *wchan, struct pool_allocator *palloc) 470 { 471 int off, slack; 472 size_t trysize, phsize; 473 int s; 474 475 KASSERT((1UL << (CHAR_BIT * sizeof(pool_item_freelist_t))) - 2 >= 476 PHPOOL_FREELIST_NELEM(PHPOOL_MAX - 1)); 477 478 #ifdef POOL_DIAGNOSTIC 479 /* 480 * Always log if POOL_DIAGNOSTIC is defined. 481 */ 482 if (pool_logsize != 0) 483 flags |= PR_LOGGING; 484 #endif 485 486 if (palloc == NULL) 487 palloc = &pool_allocator_kmem; 488 #ifdef POOL_SUBPAGE 489 if (size > palloc->pa_pagesz) { 490 if (palloc == &pool_allocator_kmem) 491 palloc = &pool_allocator_kmem_fullpage; 492 else if (palloc == &pool_allocator_nointr) 493 palloc = &pool_allocator_nointr_fullpage; 494 } 495 #endif /* POOL_SUBPAGE */ 496 if ((palloc->pa_flags & PA_INITIALIZED) == 0) { 497 if (palloc->pa_pagesz == 0) 498 palloc->pa_pagesz = PAGE_SIZE; 499 500 TAILQ_INIT(&palloc->pa_list); 501 502 simple_lock_init(&palloc->pa_slock); 503 palloc->pa_pagemask = ~(palloc->pa_pagesz - 1); 504 palloc->pa_pageshift = ffs(palloc->pa_pagesz) - 1; 505 palloc->pa_flags |= PA_INITIALIZED; 506 } 507 508 if (align == 0) 509 align = ALIGN(1); 510 511 if (size < sizeof(struct pool_item)) 512 size = sizeof(struct pool_item); 513 514 size = roundup(size, align); 515 #ifdef DIAGNOSTIC 516 if (size > palloc->pa_pagesz) 517 panic("pool_init: pool item size (%lu) too large", 518 (u_long)size); 519 #endif 520 521 /* 522 * Initialize the pool structure. 523 */ 524 LIST_INIT(&pp->pr_emptypages); 525 LIST_INIT(&pp->pr_fullpages); 526 LIST_INIT(&pp->pr_partpages); 527 LIST_INIT(&pp->pr_cachelist); 528 pp->pr_curpage = NULL; 529 pp->pr_npages = 0; 530 pp->pr_minitems = 0; 531 pp->pr_minpages = 0; 532 pp->pr_maxpages = UINT_MAX; 533 pp->pr_roflags = flags; 534 pp->pr_flags = 0; 535 pp->pr_size = size; 536 pp->pr_align = align; 537 pp->pr_wchan = wchan; 538 pp->pr_alloc = palloc; 539 pp->pr_nitems = 0; 540 pp->pr_nout = 0; 541 pp->pr_hardlimit = UINT_MAX; 542 pp->pr_hardlimit_warning = NULL; 543 pp->pr_hardlimit_ratecap.tv_sec = 0; 544 pp->pr_hardlimit_ratecap.tv_usec = 0; 545 pp->pr_hardlimit_warning_last.tv_sec = 0; 546 pp->pr_hardlimit_warning_last.tv_usec = 0; 547 pp->pr_drain_hook = NULL; 548 pp->pr_drain_hook_arg = NULL; 549 550 /* 551 * Decide whether to put the page header off page to avoid 552 * wasting too large a part of the page or too big item. 553 * Off-page page headers go on a hash table, so we can match 554 * a returned item with its header based on the page address. 555 * We use 1/16 of the page size and about 8 times of the item 556 * size as the threshold (XXX: tune) 557 * 558 * However, we'll put the header into the page if we can put 559 * it without wasting any items. 560 * 561 * Silently enforce `0 <= ioff < align'. 562 */ 563 pp->pr_itemoffset = ioff %= align; 564 /* See the comment below about reserved bytes. */ 565 trysize = palloc->pa_pagesz - ((align - ioff) % align); 566 phsize = ALIGN(sizeof(struct pool_item_header)); 567 if ((pp->pr_roflags & PR_NOTOUCH) == 0 && 568 (pp->pr_size < MIN(palloc->pa_pagesz / 16, phsize << 3) || 569 trysize / pp->pr_size == (trysize - phsize) / pp->pr_size)) { 570 /* Use the end of the page for the page header */ 571 pp->pr_roflags |= PR_PHINPAGE; 572 pp->pr_phoffset = off = palloc->pa_pagesz - phsize; 573 } else { 574 /* The page header will be taken from our page header pool */ 575 pp->pr_phoffset = 0; 576 off = palloc->pa_pagesz; 577 SPLAY_INIT(&pp->pr_phtree); 578 } 579 580 /* 581 * Alignment is to take place at `ioff' within the item. This means 582 * we must reserve up to `align - 1' bytes on the page to allow 583 * appropriate positioning of each item. 584 */ 585 pp->pr_itemsperpage = (off - ((align - ioff) % align)) / pp->pr_size; 586 KASSERT(pp->pr_itemsperpage != 0); 587 if ((pp->pr_roflags & PR_NOTOUCH)) { 588 int idx; 589 590 for (idx = 0; pp->pr_itemsperpage > PHPOOL_FREELIST_NELEM(idx); 591 idx++) { 592 /* nothing */ 593 } 594 if (idx >= PHPOOL_MAX) { 595 /* 596 * if you see this panic, consider to tweak 597 * PHPOOL_MAX and PHPOOL_FREELIST_NELEM. 598 */ 599 panic("%s: too large itemsperpage(%d) for PR_NOTOUCH", 600 pp->pr_wchan, pp->pr_itemsperpage); 601 } 602 pp->pr_phpool = &phpool[idx]; 603 } else if ((pp->pr_roflags & PR_PHINPAGE) == 0) { 604 pp->pr_phpool = &phpool[0]; 605 } 606 #if defined(DIAGNOSTIC) 607 else { 608 pp->pr_phpool = NULL; 609 } 610 #endif 611 612 /* 613 * Use the slack between the chunks and the page header 614 * for "cache coloring". 615 */ 616 slack = off - pp->pr_itemsperpage * pp->pr_size; 617 pp->pr_maxcolor = (slack / align) * align; 618 pp->pr_curcolor = 0; 619 620 pp->pr_nget = 0; 621 pp->pr_nfail = 0; 622 pp->pr_nput = 0; 623 pp->pr_npagealloc = 0; 624 pp->pr_npagefree = 0; 625 pp->pr_hiwat = 0; 626 pp->pr_nidle = 0; 627 628 #ifdef POOL_DIAGNOSTIC 629 if (flags & PR_LOGGING) { 630 if (kmem_map == NULL || 631 (pp->pr_log = malloc(pool_logsize * sizeof(struct pool_log), 632 M_TEMP, M_NOWAIT)) == NULL) 633 pp->pr_roflags &= ~PR_LOGGING; 634 pp->pr_curlogentry = 0; 635 pp->pr_logsize = pool_logsize; 636 } 637 #endif 638 639 pp->pr_entered_file = NULL; 640 pp->pr_entered_line = 0; 641 642 simple_lock_init(&pp->pr_slock); 643 644 /* 645 * Initialize private page header pool and cache magazine pool if we 646 * haven't done so yet. 647 * XXX LOCKING. 648 */ 649 if (phpool[0].pr_size == 0) { 650 int idx; 651 for (idx = 0; idx < PHPOOL_MAX; idx++) { 652 static char phpool_names[PHPOOL_MAX][6+1+6+1]; 653 int nelem; 654 size_t sz; 655 656 nelem = PHPOOL_FREELIST_NELEM(idx); 657 snprintf(phpool_names[idx], sizeof(phpool_names[idx]), 658 "phpool-%d", nelem); 659 sz = sizeof(struct pool_item_header); 660 if (nelem) { 661 sz = PR_FREELIST_ALIGN(sz) 662 + nelem * sizeof(pool_item_freelist_t); 663 } 664 pool_init(&phpool[idx], sz, 0, 0, 0, 665 phpool_names[idx], &pool_allocator_meta); 666 } 667 #ifdef POOL_SUBPAGE 668 pool_init(&psppool, POOL_SUBPAGE, POOL_SUBPAGE, 0, 669 PR_RECURSIVE, "psppool", &pool_allocator_meta); 670 #endif 671 pool_init(&pcgpool, sizeof(struct pool_cache_group), 0, 0, 672 0, "pcgpool", &pool_allocator_meta); 673 } 674 675 /* Insert into the list of all pools. */ 676 simple_lock(&pool_head_slock); 677 LIST_INSERT_HEAD(&pool_head, pp, pr_poollist); 678 simple_unlock(&pool_head_slock); 679 680 /* Insert this into the list of pools using this allocator. */ 681 s = splvm(); 682 simple_lock(&palloc->pa_slock); 683 TAILQ_INSERT_TAIL(&palloc->pa_list, pp, pr_alloc_list); 684 simple_unlock(&palloc->pa_slock); 685 splx(s); 686 } 687 688 /* 689 * De-commision a pool resource. 690 */ 691 void 692 pool_destroy(struct pool *pp) 693 { 694 struct pool_pagelist pq; 695 struct pool_item_header *ph; 696 int s; 697 698 /* Remove from global pool list */ 699 simple_lock(&pool_head_slock); 700 LIST_REMOVE(pp, pr_poollist); 701 if (drainpp == pp) 702 drainpp = NULL; 703 simple_unlock(&pool_head_slock); 704 705 /* Remove this pool from its allocator's list of pools. */ 706 s = splvm(); 707 simple_lock(&pp->pr_alloc->pa_slock); 708 TAILQ_REMOVE(&pp->pr_alloc->pa_list, pp, pr_alloc_list); 709 simple_unlock(&pp->pr_alloc->pa_slock); 710 splx(s); 711 712 s = splvm(); 713 simple_lock(&pp->pr_slock); 714 715 KASSERT(LIST_EMPTY(&pp->pr_cachelist)); 716 717 #ifdef DIAGNOSTIC 718 if (pp->pr_nout != 0) { 719 pr_printlog(pp, NULL, printf); 720 panic("pool_destroy: pool busy: still out: %u", 721 pp->pr_nout); 722 } 723 #endif 724 725 KASSERT(LIST_EMPTY(&pp->pr_fullpages)); 726 KASSERT(LIST_EMPTY(&pp->pr_partpages)); 727 728 /* Remove all pages */ 729 LIST_INIT(&pq); 730 while ((ph = LIST_FIRST(&pp->pr_emptypages)) != NULL) 731 pr_rmpage(pp, ph, &pq); 732 733 simple_unlock(&pp->pr_slock); 734 splx(s); 735 736 pr_pagelist_free(pp, &pq); 737 738 #ifdef POOL_DIAGNOSTIC 739 if ((pp->pr_roflags & PR_LOGGING) != 0) 740 free(pp->pr_log, M_TEMP); 741 #endif 742 } 743 744 void 745 pool_set_drain_hook(struct pool *pp, void (*fn)(void *, int), void *arg) 746 { 747 748 /* XXX no locking -- must be used just after pool_init() */ 749 #ifdef DIAGNOSTIC 750 if (pp->pr_drain_hook != NULL) 751 panic("pool_set_drain_hook(%s): already set", pp->pr_wchan); 752 #endif 753 pp->pr_drain_hook = fn; 754 pp->pr_drain_hook_arg = arg; 755 } 756 757 static struct pool_item_header * 758 pool_alloc_item_header(struct pool *pp, caddr_t storage, int flags) 759 { 760 struct pool_item_header *ph; 761 int s; 762 763 LOCK_ASSERT(simple_lock_held(&pp->pr_slock) == 0); 764 765 if ((pp->pr_roflags & PR_PHINPAGE) != 0) 766 ph = (struct pool_item_header *) (storage + pp->pr_phoffset); 767 else { 768 s = splvm(); 769 ph = pool_get(pp->pr_phpool, flags); 770 splx(s); 771 } 772 773 return (ph); 774 } 775 776 /* 777 * Grab an item from the pool; must be called at appropriate spl level 778 */ 779 void * 780 #ifdef POOL_DIAGNOSTIC 781 _pool_get(struct pool *pp, int flags, const char *file, long line) 782 #else 783 pool_get(struct pool *pp, int flags) 784 #endif 785 { 786 struct pool_item *pi; 787 struct pool_item_header *ph; 788 void *v; 789 790 #ifdef DIAGNOSTIC 791 if (__predict_false(pp->pr_itemsperpage == 0)) 792 panic("pool_get: pool %p: pr_itemsperpage is zero, " 793 "pool not initialized?", pp); 794 if (__predict_false(curlwp == NULL && doing_shutdown == 0 && 795 (flags & PR_WAITOK) != 0)) 796 panic("pool_get: %s: must have NOWAIT", pp->pr_wchan); 797 798 #endif /* DIAGNOSTIC */ 799 #ifdef LOCKDEBUG 800 if (flags & PR_WAITOK) 801 simple_lock_only_held(NULL, "pool_get(PR_WAITOK)"); 802 SCHED_ASSERT_UNLOCKED(); 803 #endif 804 805 simple_lock(&pp->pr_slock); 806 pr_enter(pp, file, line); 807 808 startover: 809 /* 810 * Check to see if we've reached the hard limit. If we have, 811 * and we can wait, then wait until an item has been returned to 812 * the pool. 813 */ 814 #ifdef DIAGNOSTIC 815 if (__predict_false(pp->pr_nout > pp->pr_hardlimit)) { 816 pr_leave(pp); 817 simple_unlock(&pp->pr_slock); 818 panic("pool_get: %s: crossed hard limit", pp->pr_wchan); 819 } 820 #endif 821 if (__predict_false(pp->pr_nout == pp->pr_hardlimit)) { 822 if (pp->pr_drain_hook != NULL) { 823 /* 824 * Since the drain hook is going to free things 825 * back to the pool, unlock, call the hook, re-lock, 826 * and check the hardlimit condition again. 827 */ 828 pr_leave(pp); 829 simple_unlock(&pp->pr_slock); 830 (*pp->pr_drain_hook)(pp->pr_drain_hook_arg, flags); 831 simple_lock(&pp->pr_slock); 832 pr_enter(pp, file, line); 833 if (pp->pr_nout < pp->pr_hardlimit) 834 goto startover; 835 } 836 837 if ((flags & PR_WAITOK) && !(flags & PR_LIMITFAIL)) { 838 /* 839 * XXX: A warning isn't logged in this case. Should 840 * it be? 841 */ 842 pp->pr_flags |= PR_WANTED; 843 pr_leave(pp); 844 ltsleep(pp, PSWP, pp->pr_wchan, 0, &pp->pr_slock); 845 pr_enter(pp, file, line); 846 goto startover; 847 } 848 849 /* 850 * Log a message that the hard limit has been hit. 851 */ 852 if (pp->pr_hardlimit_warning != NULL && 853 ratecheck(&pp->pr_hardlimit_warning_last, 854 &pp->pr_hardlimit_ratecap)) 855 log(LOG_ERR, "%s\n", pp->pr_hardlimit_warning); 856 857 pp->pr_nfail++; 858 859 pr_leave(pp); 860 simple_unlock(&pp->pr_slock); 861 return (NULL); 862 } 863 864 /* 865 * The convention we use is that if `curpage' is not NULL, then 866 * it points at a non-empty bucket. In particular, `curpage' 867 * never points at a page header which has PR_PHINPAGE set and 868 * has no items in its bucket. 869 */ 870 if ((ph = pp->pr_curpage) == NULL) { 871 int error; 872 873 #ifdef DIAGNOSTIC 874 if (pp->pr_nitems != 0) { 875 simple_unlock(&pp->pr_slock); 876 printf("pool_get: %s: curpage NULL, nitems %u\n", 877 pp->pr_wchan, pp->pr_nitems); 878 panic("pool_get: nitems inconsistent"); 879 } 880 #endif 881 882 /* 883 * Call the back-end page allocator for more memory. 884 * Release the pool lock, as the back-end page allocator 885 * may block. 886 */ 887 pr_leave(pp); 888 error = pool_grow(pp, flags); 889 pr_enter(pp, file, line); 890 if (error != 0) { 891 /* 892 * We were unable to allocate a page or item 893 * header, but we released the lock during 894 * allocation, so perhaps items were freed 895 * back to the pool. Check for this case. 896 */ 897 if (pp->pr_curpage != NULL) 898 goto startover; 899 900 if ((flags & PR_WAITOK) == 0) { 901 pp->pr_nfail++; 902 pr_leave(pp); 903 simple_unlock(&pp->pr_slock); 904 return (NULL); 905 } 906 907 /* 908 * Wait for items to be returned to this pool. 909 * 910 * wake up once a second and try again, 911 * as the check in pool_cache_put_paddr() is racy. 912 */ 913 pp->pr_flags |= PR_WANTED; 914 /* PA_WANTED is already set on the allocator. */ 915 pr_leave(pp); 916 ltsleep(pp, PSWP, pp->pr_wchan, hz, &pp->pr_slock); 917 pr_enter(pp, file, line); 918 } 919 920 /* Start the allocation process over. */ 921 goto startover; 922 } 923 if (pp->pr_roflags & PR_NOTOUCH) { 924 #ifdef DIAGNOSTIC 925 if (__predict_false(ph->ph_nmissing == pp->pr_itemsperpage)) { 926 pr_leave(pp); 927 simple_unlock(&pp->pr_slock); 928 panic("pool_get: %s: page empty", pp->pr_wchan); 929 } 930 #endif 931 v = pr_item_notouch_get(pp, ph); 932 #ifdef POOL_DIAGNOSTIC 933 pr_log(pp, v, PRLOG_GET, file, line); 934 #endif 935 } else { 936 v = pi = LIST_FIRST(&ph->ph_itemlist); 937 if (__predict_false(v == NULL)) { 938 pr_leave(pp); 939 simple_unlock(&pp->pr_slock); 940 panic("pool_get: %s: page empty", pp->pr_wchan); 941 } 942 #ifdef DIAGNOSTIC 943 if (__predict_false(pp->pr_nitems == 0)) { 944 pr_leave(pp); 945 simple_unlock(&pp->pr_slock); 946 printf("pool_get: %s: items on itemlist, nitems %u\n", 947 pp->pr_wchan, pp->pr_nitems); 948 panic("pool_get: nitems inconsistent"); 949 } 950 #endif 951 952 #ifdef POOL_DIAGNOSTIC 953 pr_log(pp, v, PRLOG_GET, file, line); 954 #endif 955 956 #ifdef DIAGNOSTIC 957 if (__predict_false(pi->pi_magic != PI_MAGIC)) { 958 pr_printlog(pp, pi, printf); 959 panic("pool_get(%s): free list modified: " 960 "magic=%x; page %p; item addr %p\n", 961 pp->pr_wchan, pi->pi_magic, ph->ph_page, pi); 962 } 963 #endif 964 965 /* 966 * Remove from item list. 967 */ 968 LIST_REMOVE(pi, pi_list); 969 } 970 pp->pr_nitems--; 971 pp->pr_nout++; 972 if (ph->ph_nmissing == 0) { 973 #ifdef DIAGNOSTIC 974 if (__predict_false(pp->pr_nidle == 0)) 975 panic("pool_get: nidle inconsistent"); 976 #endif 977 pp->pr_nidle--; 978 979 /* 980 * This page was previously empty. Move it to the list of 981 * partially-full pages. This page is already curpage. 982 */ 983 LIST_REMOVE(ph, ph_pagelist); 984 LIST_INSERT_HEAD(&pp->pr_partpages, ph, ph_pagelist); 985 } 986 ph->ph_nmissing++; 987 if (ph->ph_nmissing == pp->pr_itemsperpage) { 988 #ifdef DIAGNOSTIC 989 if (__predict_false((pp->pr_roflags & PR_NOTOUCH) == 0 && 990 !LIST_EMPTY(&ph->ph_itemlist))) { 991 pr_leave(pp); 992 simple_unlock(&pp->pr_slock); 993 panic("pool_get: %s: nmissing inconsistent", 994 pp->pr_wchan); 995 } 996 #endif 997 /* 998 * This page is now full. Move it to the full list 999 * and select a new current page. 1000 */ 1001 LIST_REMOVE(ph, ph_pagelist); 1002 LIST_INSERT_HEAD(&pp->pr_fullpages, ph, ph_pagelist); 1003 pool_update_curpage(pp); 1004 } 1005 1006 pp->pr_nget++; 1007 pr_leave(pp); 1008 1009 /* 1010 * If we have a low water mark and we are now below that low 1011 * water mark, add more items to the pool. 1012 */ 1013 if (POOL_NEEDS_CATCHUP(pp) && pool_catchup(pp) != 0) { 1014 /* 1015 * XXX: Should we log a warning? Should we set up a timeout 1016 * to try again in a second or so? The latter could break 1017 * a caller's assumptions about interrupt protection, etc. 1018 */ 1019 } 1020 1021 simple_unlock(&pp->pr_slock); 1022 return (v); 1023 } 1024 1025 /* 1026 * Internal version of pool_put(). Pool is already locked/entered. 1027 */ 1028 static void 1029 pool_do_put(struct pool *pp, void *v, struct pool_pagelist *pq) 1030 { 1031 struct pool_item *pi = v; 1032 struct pool_item_header *ph; 1033 caddr_t page; 1034 int s; 1035 1036 LOCK_ASSERT(simple_lock_held(&pp->pr_slock)); 1037 SCHED_ASSERT_UNLOCKED(); 1038 1039 page = (caddr_t)((u_long)v & pp->pr_alloc->pa_pagemask); 1040 1041 #ifdef DIAGNOSTIC 1042 if (__predict_false(pp->pr_nout == 0)) { 1043 printf("pool %s: putting with none out\n", 1044 pp->pr_wchan); 1045 panic("pool_put"); 1046 } 1047 #endif 1048 1049 if (__predict_false((ph = pr_find_pagehead(pp, page)) == NULL)) { 1050 pr_printlog(pp, NULL, printf); 1051 panic("pool_put: %s: page header missing", pp->pr_wchan); 1052 } 1053 1054 #ifdef LOCKDEBUG 1055 /* 1056 * Check if we're freeing a locked simple lock. 1057 */ 1058 simple_lock_freecheck((caddr_t)pi, ((caddr_t)pi) + pp->pr_size); 1059 #endif 1060 1061 /* 1062 * Return to item list. 1063 */ 1064 if (pp->pr_roflags & PR_NOTOUCH) { 1065 pr_item_notouch_put(pp, ph, v); 1066 } else { 1067 #ifdef DIAGNOSTIC 1068 pi->pi_magic = PI_MAGIC; 1069 #endif 1070 #ifdef DEBUG 1071 { 1072 int i, *ip = v; 1073 1074 for (i = 0; i < pp->pr_size / sizeof(int); i++) { 1075 *ip++ = PI_MAGIC; 1076 } 1077 } 1078 #endif 1079 1080 LIST_INSERT_HEAD(&ph->ph_itemlist, pi, pi_list); 1081 } 1082 KDASSERT(ph->ph_nmissing != 0); 1083 ph->ph_nmissing--; 1084 pp->pr_nput++; 1085 pp->pr_nitems++; 1086 pp->pr_nout--; 1087 1088 /* Cancel "pool empty" condition if it exists */ 1089 if (pp->pr_curpage == NULL) 1090 pp->pr_curpage = ph; 1091 1092 if (pp->pr_flags & PR_WANTED) { 1093 pp->pr_flags &= ~PR_WANTED; 1094 if (ph->ph_nmissing == 0) 1095 pp->pr_nidle++; 1096 wakeup((caddr_t)pp); 1097 return; 1098 } 1099 1100 /* 1101 * If this page is now empty, do one of two things: 1102 * 1103 * (1) If we have more pages than the page high water mark, 1104 * free the page back to the system. ONLY CONSIDER 1105 * FREEING BACK A PAGE IF WE HAVE MORE THAN OUR MINIMUM PAGE 1106 * CLAIM. 1107 * 1108 * (2) Otherwise, move the page to the empty page list. 1109 * 1110 * Either way, select a new current page (so we use a partially-full 1111 * page if one is available). 1112 */ 1113 if (ph->ph_nmissing == 0) { 1114 pp->pr_nidle++; 1115 if (pp->pr_npages > pp->pr_minpages && 1116 (pp->pr_npages > pp->pr_maxpages || 1117 (pp->pr_alloc->pa_flags & PA_WANT) != 0)) { 1118 pr_rmpage(pp, ph, pq); 1119 } else { 1120 LIST_REMOVE(ph, ph_pagelist); 1121 LIST_INSERT_HEAD(&pp->pr_emptypages, ph, ph_pagelist); 1122 1123 /* 1124 * Update the timestamp on the page. A page must 1125 * be idle for some period of time before it can 1126 * be reclaimed by the pagedaemon. This minimizes 1127 * ping-pong'ing for memory. 1128 */ 1129 s = splclock(); 1130 ph->ph_time = mono_time; 1131 splx(s); 1132 } 1133 pool_update_curpage(pp); 1134 } 1135 1136 /* 1137 * If the page was previously completely full, move it to the 1138 * partially-full list and make it the current page. The next 1139 * allocation will get the item from this page, instead of 1140 * further fragmenting the pool. 1141 */ 1142 else if (ph->ph_nmissing == (pp->pr_itemsperpage - 1)) { 1143 LIST_REMOVE(ph, ph_pagelist); 1144 LIST_INSERT_HEAD(&pp->pr_partpages, ph, ph_pagelist); 1145 pp->pr_curpage = ph; 1146 } 1147 } 1148 1149 /* 1150 * Return resource to the pool; must be called at appropriate spl level 1151 */ 1152 #ifdef POOL_DIAGNOSTIC 1153 void 1154 _pool_put(struct pool *pp, void *v, const char *file, long line) 1155 { 1156 struct pool_pagelist pq; 1157 1158 LIST_INIT(&pq); 1159 1160 simple_lock(&pp->pr_slock); 1161 pr_enter(pp, file, line); 1162 1163 pr_log(pp, v, PRLOG_PUT, file, line); 1164 1165 pool_do_put(pp, v, &pq); 1166 1167 pr_leave(pp); 1168 simple_unlock(&pp->pr_slock); 1169 1170 pr_pagelist_free(pp, &pq); 1171 } 1172 #undef pool_put 1173 #endif /* POOL_DIAGNOSTIC */ 1174 1175 void 1176 pool_put(struct pool *pp, void *v) 1177 { 1178 struct pool_pagelist pq; 1179 1180 LIST_INIT(&pq); 1181 1182 simple_lock(&pp->pr_slock); 1183 pool_do_put(pp, v, &pq); 1184 simple_unlock(&pp->pr_slock); 1185 1186 pr_pagelist_free(pp, &pq); 1187 } 1188 1189 #ifdef POOL_DIAGNOSTIC 1190 #define pool_put(h, v) _pool_put((h), (v), __FILE__, __LINE__) 1191 #endif 1192 1193 /* 1194 * pool_grow: grow a pool by a page. 1195 * 1196 * => called with pool locked. 1197 * => unlock and relock the pool. 1198 * => return with pool locked. 1199 */ 1200 1201 static int 1202 pool_grow(struct pool *pp, int flags) 1203 { 1204 struct pool_item_header *ph = NULL; 1205 char *cp; 1206 1207 simple_unlock(&pp->pr_slock); 1208 cp = pool_allocator_alloc(pp, flags); 1209 if (__predict_true(cp != NULL)) { 1210 ph = pool_alloc_item_header(pp, cp, flags); 1211 } 1212 if (__predict_false(cp == NULL || ph == NULL)) { 1213 if (cp != NULL) { 1214 pool_allocator_free(pp, cp); 1215 } 1216 simple_lock(&pp->pr_slock); 1217 return ENOMEM; 1218 } 1219 1220 simple_lock(&pp->pr_slock); 1221 pool_prime_page(pp, cp, ph); 1222 pp->pr_npagealloc++; 1223 pp->pr_minpages++; 1224 return 0; 1225 } 1226 1227 /* 1228 * Add N items to the pool. 1229 */ 1230 int 1231 pool_prime(struct pool *pp, int n) 1232 { 1233 int newpages; 1234 int error = 0; 1235 1236 simple_lock(&pp->pr_slock); 1237 1238 newpages = roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 1239 1240 while (newpages-- > 0) { 1241 error = pool_grow(pp, PR_NOWAIT); 1242 if (error) { 1243 break; 1244 } 1245 pp->pr_minpages++; 1246 } 1247 1248 if (pp->pr_minpages >= pp->pr_maxpages) 1249 pp->pr_maxpages = pp->pr_minpages + 1; /* XXX */ 1250 1251 simple_unlock(&pp->pr_slock); 1252 return error; 1253 } 1254 1255 /* 1256 * Add a page worth of items to the pool. 1257 * 1258 * Note, we must be called with the pool descriptor LOCKED. 1259 */ 1260 static void 1261 pool_prime_page(struct pool *pp, caddr_t storage, struct pool_item_header *ph) 1262 { 1263 struct pool_item *pi; 1264 caddr_t cp = storage; 1265 unsigned int align = pp->pr_align; 1266 unsigned int ioff = pp->pr_itemoffset; 1267 int n; 1268 int s; 1269 1270 LOCK_ASSERT(simple_lock_held(&pp->pr_slock)); 1271 1272 #ifdef DIAGNOSTIC 1273 if (((u_long)cp & (pp->pr_alloc->pa_pagesz - 1)) != 0) 1274 panic("pool_prime_page: %s: unaligned page", pp->pr_wchan); 1275 #endif 1276 1277 /* 1278 * Insert page header. 1279 */ 1280 LIST_INSERT_HEAD(&pp->pr_emptypages, ph, ph_pagelist); 1281 LIST_INIT(&ph->ph_itemlist); 1282 ph->ph_page = storage; 1283 ph->ph_nmissing = 0; 1284 s = splclock(); 1285 ph->ph_time = mono_time; 1286 splx(s); 1287 if ((pp->pr_roflags & PR_PHINPAGE) == 0) 1288 SPLAY_INSERT(phtree, &pp->pr_phtree, ph); 1289 1290 pp->pr_nidle++; 1291 1292 /* 1293 * Color this page. 1294 */ 1295 cp = (caddr_t)(cp + pp->pr_curcolor); 1296 if ((pp->pr_curcolor += align) > pp->pr_maxcolor) 1297 pp->pr_curcolor = 0; 1298 1299 /* 1300 * Adjust storage to apply aligment to `pr_itemoffset' in each item. 1301 */ 1302 if (ioff != 0) 1303 cp = (caddr_t)(cp + (align - ioff)); 1304 1305 /* 1306 * Insert remaining chunks on the bucket list. 1307 */ 1308 n = pp->pr_itemsperpage; 1309 pp->pr_nitems += n; 1310 1311 if (pp->pr_roflags & PR_NOTOUCH) { 1312 pool_item_freelist_t *freelist = PR_FREELIST(ph); 1313 int i; 1314 1315 ph->ph_off = cp - storage; 1316 ph->ph_firstfree = 0; 1317 for (i = 0; i < n - 1; i++) 1318 freelist[i] = i + 1; 1319 freelist[n - 1] = PR_INDEX_EOL; 1320 } else { 1321 while (n--) { 1322 pi = (struct pool_item *)cp; 1323 1324 KASSERT(((((vaddr_t)pi) + ioff) & (align - 1)) == 0); 1325 1326 /* Insert on page list */ 1327 LIST_INSERT_HEAD(&ph->ph_itemlist, pi, pi_list); 1328 #ifdef DIAGNOSTIC 1329 pi->pi_magic = PI_MAGIC; 1330 #endif 1331 cp = (caddr_t)(cp + pp->pr_size); 1332 } 1333 } 1334 1335 /* 1336 * If the pool was depleted, point at the new page. 1337 */ 1338 if (pp->pr_curpage == NULL) 1339 pp->pr_curpage = ph; 1340 1341 if (++pp->pr_npages > pp->pr_hiwat) 1342 pp->pr_hiwat = pp->pr_npages; 1343 } 1344 1345 /* 1346 * Used by pool_get() when nitems drops below the low water mark. This 1347 * is used to catch up pr_nitems with the low water mark. 1348 * 1349 * Note 1, we never wait for memory here, we let the caller decide what to do. 1350 * 1351 * Note 2, we must be called with the pool already locked, and we return 1352 * with it locked. 1353 */ 1354 static int 1355 pool_catchup(struct pool *pp) 1356 { 1357 int error = 0; 1358 1359 while (POOL_NEEDS_CATCHUP(pp)) { 1360 error = pool_grow(pp, PR_NOWAIT); 1361 if (error) { 1362 break; 1363 } 1364 } 1365 return error; 1366 } 1367 1368 static void 1369 pool_update_curpage(struct pool *pp) 1370 { 1371 1372 pp->pr_curpage = LIST_FIRST(&pp->pr_partpages); 1373 if (pp->pr_curpage == NULL) { 1374 pp->pr_curpage = LIST_FIRST(&pp->pr_emptypages); 1375 } 1376 } 1377 1378 void 1379 pool_setlowat(struct pool *pp, int n) 1380 { 1381 1382 simple_lock(&pp->pr_slock); 1383 1384 pp->pr_minitems = n; 1385 pp->pr_minpages = (n == 0) 1386 ? 0 1387 : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 1388 1389 /* Make sure we're caught up with the newly-set low water mark. */ 1390 if (POOL_NEEDS_CATCHUP(pp) && pool_catchup(pp) != 0) { 1391 /* 1392 * XXX: Should we log a warning? Should we set up a timeout 1393 * to try again in a second or so? The latter could break 1394 * a caller's assumptions about interrupt protection, etc. 1395 */ 1396 } 1397 1398 simple_unlock(&pp->pr_slock); 1399 } 1400 1401 void 1402 pool_sethiwat(struct pool *pp, int n) 1403 { 1404 1405 simple_lock(&pp->pr_slock); 1406 1407 pp->pr_maxpages = (n == 0) 1408 ? 0 1409 : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 1410 1411 simple_unlock(&pp->pr_slock); 1412 } 1413 1414 void 1415 pool_sethardlimit(struct pool *pp, int n, const char *warnmess, int ratecap) 1416 { 1417 1418 simple_lock(&pp->pr_slock); 1419 1420 pp->pr_hardlimit = n; 1421 pp->pr_hardlimit_warning = warnmess; 1422 pp->pr_hardlimit_ratecap.tv_sec = ratecap; 1423 pp->pr_hardlimit_warning_last.tv_sec = 0; 1424 pp->pr_hardlimit_warning_last.tv_usec = 0; 1425 1426 /* 1427 * In-line version of pool_sethiwat(), because we don't want to 1428 * release the lock. 1429 */ 1430 pp->pr_maxpages = (n == 0) 1431 ? 0 1432 : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 1433 1434 simple_unlock(&pp->pr_slock); 1435 } 1436 1437 /* 1438 * Release all complete pages that have not been used recently. 1439 */ 1440 int 1441 #ifdef POOL_DIAGNOSTIC 1442 _pool_reclaim(struct pool *pp, const char *file, long line) 1443 #else 1444 pool_reclaim(struct pool *pp) 1445 #endif 1446 { 1447 struct pool_item_header *ph, *phnext; 1448 struct pool_cache *pc; 1449 struct pool_pagelist pq; 1450 struct pool_cache_grouplist pcgl; 1451 struct timeval curtime, diff; 1452 int s; 1453 1454 if (pp->pr_drain_hook != NULL) { 1455 /* 1456 * The drain hook must be called with the pool unlocked. 1457 */ 1458 (*pp->pr_drain_hook)(pp->pr_drain_hook_arg, PR_NOWAIT); 1459 } 1460 1461 if (simple_lock_try(&pp->pr_slock) == 0) 1462 return (0); 1463 pr_enter(pp, file, line); 1464 1465 LIST_INIT(&pq); 1466 LIST_INIT(&pcgl); 1467 1468 /* 1469 * Reclaim items from the pool's caches. 1470 */ 1471 LIST_FOREACH(pc, &pp->pr_cachelist, pc_poollist) 1472 pool_cache_reclaim(pc, &pq, &pcgl); 1473 1474 s = splclock(); 1475 curtime = mono_time; 1476 splx(s); 1477 1478 for (ph = LIST_FIRST(&pp->pr_emptypages); ph != NULL; ph = phnext) { 1479 phnext = LIST_NEXT(ph, ph_pagelist); 1480 1481 /* Check our minimum page claim */ 1482 if (pp->pr_npages <= pp->pr_minpages) 1483 break; 1484 1485 KASSERT(ph->ph_nmissing == 0); 1486 timersub(&curtime, &ph->ph_time, &diff); 1487 if (diff.tv_sec < pool_inactive_time) 1488 continue; 1489 1490 /* 1491 * If freeing this page would put us below 1492 * the low water mark, stop now. 1493 */ 1494 if ((pp->pr_nitems - pp->pr_itemsperpage) < 1495 pp->pr_minitems) 1496 break; 1497 1498 pr_rmpage(pp, ph, &pq); 1499 } 1500 1501 pr_leave(pp); 1502 simple_unlock(&pp->pr_slock); 1503 if (LIST_EMPTY(&pq) && LIST_EMPTY(&pcgl)) 1504 return 0; 1505 1506 pr_pagelist_free(pp, &pq); 1507 pcg_grouplist_free(&pcgl); 1508 return (1); 1509 } 1510 1511 /* 1512 * Drain pools, one at a time. 1513 * 1514 * Note, we must never be called from an interrupt context. 1515 */ 1516 void 1517 pool_drain(void *arg) 1518 { 1519 struct pool *pp; 1520 int s; 1521 1522 pp = NULL; 1523 s = splvm(); 1524 simple_lock(&pool_head_slock); 1525 if (drainpp == NULL) { 1526 drainpp = LIST_FIRST(&pool_head); 1527 } 1528 if (drainpp) { 1529 pp = drainpp; 1530 drainpp = LIST_NEXT(pp, pr_poollist); 1531 } 1532 simple_unlock(&pool_head_slock); 1533 pool_reclaim(pp); 1534 splx(s); 1535 } 1536 1537 /* 1538 * Diagnostic helpers. 1539 */ 1540 void 1541 pool_print(struct pool *pp, const char *modif) 1542 { 1543 int s; 1544 1545 s = splvm(); 1546 if (simple_lock_try(&pp->pr_slock) == 0) { 1547 printf("pool %s is locked; try again later\n", 1548 pp->pr_wchan); 1549 splx(s); 1550 return; 1551 } 1552 pool_print1(pp, modif, printf); 1553 simple_unlock(&pp->pr_slock); 1554 splx(s); 1555 } 1556 1557 void 1558 pool_printall(const char *modif, void (*pr)(const char *, ...)) 1559 { 1560 struct pool *pp; 1561 1562 if (simple_lock_try(&pool_head_slock) == 0) { 1563 (*pr)("WARNING: pool_head_slock is locked\n"); 1564 } else { 1565 simple_unlock(&pool_head_slock); 1566 } 1567 1568 LIST_FOREACH(pp, &pool_head, pr_poollist) { 1569 pool_printit(pp, modif, pr); 1570 } 1571 } 1572 1573 void 1574 pool_printit(struct pool *pp, const char *modif, void (*pr)(const char *, ...)) 1575 { 1576 1577 if (pp == NULL) { 1578 (*pr)("Must specify a pool to print.\n"); 1579 return; 1580 } 1581 1582 /* 1583 * Called from DDB; interrupts should be blocked, and all 1584 * other processors should be paused. We can skip locking 1585 * the pool in this case. 1586 * 1587 * We do a simple_lock_try() just to print the lock 1588 * status, however. 1589 */ 1590 1591 if (simple_lock_try(&pp->pr_slock) == 0) 1592 (*pr)("WARNING: pool %s is locked\n", pp->pr_wchan); 1593 else 1594 simple_unlock(&pp->pr_slock); 1595 1596 pool_print1(pp, modif, pr); 1597 } 1598 1599 static void 1600 pool_print_pagelist(struct pool *pp, struct pool_pagelist *pl, 1601 void (*pr)(const char *, ...)) 1602 { 1603 struct pool_item_header *ph; 1604 #ifdef DIAGNOSTIC 1605 struct pool_item *pi; 1606 #endif 1607 1608 LIST_FOREACH(ph, pl, ph_pagelist) { 1609 (*pr)("\t\tpage %p, nmissing %d, time %lu,%lu\n", 1610 ph->ph_page, ph->ph_nmissing, 1611 (u_long)ph->ph_time.tv_sec, 1612 (u_long)ph->ph_time.tv_usec); 1613 #ifdef DIAGNOSTIC 1614 if (!(pp->pr_roflags & PR_NOTOUCH)) { 1615 LIST_FOREACH(pi, &ph->ph_itemlist, pi_list) { 1616 if (pi->pi_magic != PI_MAGIC) { 1617 (*pr)("\t\t\titem %p, magic 0x%x\n", 1618 pi, pi->pi_magic); 1619 } 1620 } 1621 } 1622 #endif 1623 } 1624 } 1625 1626 static void 1627 pool_print1(struct pool *pp, const char *modif, void (*pr)(const char *, ...)) 1628 { 1629 struct pool_item_header *ph; 1630 struct pool_cache *pc; 1631 struct pool_cache_group *pcg; 1632 int i, print_log = 0, print_pagelist = 0, print_cache = 0; 1633 char c; 1634 1635 while ((c = *modif++) != '\0') { 1636 if (c == 'l') 1637 print_log = 1; 1638 if (c == 'p') 1639 print_pagelist = 1; 1640 if (c == 'c') 1641 print_cache = 1; 1642 } 1643 1644 (*pr)("POOL %s: size %u, align %u, ioff %u, roflags 0x%08x\n", 1645 pp->pr_wchan, pp->pr_size, pp->pr_align, pp->pr_itemoffset, 1646 pp->pr_roflags); 1647 (*pr)("\talloc %p\n", pp->pr_alloc); 1648 (*pr)("\tminitems %u, minpages %u, maxpages %u, npages %u\n", 1649 pp->pr_minitems, pp->pr_minpages, pp->pr_maxpages, pp->pr_npages); 1650 (*pr)("\titemsperpage %u, nitems %u, nout %u, hardlimit %u\n", 1651 pp->pr_itemsperpage, pp->pr_nitems, pp->pr_nout, pp->pr_hardlimit); 1652 1653 (*pr)("\n\tnget %lu, nfail %lu, nput %lu\n", 1654 pp->pr_nget, pp->pr_nfail, pp->pr_nput); 1655 (*pr)("\tnpagealloc %lu, npagefree %lu, hiwat %u, nidle %lu\n", 1656 pp->pr_npagealloc, pp->pr_npagefree, pp->pr_hiwat, pp->pr_nidle); 1657 1658 if (print_pagelist == 0) 1659 goto skip_pagelist; 1660 1661 if ((ph = LIST_FIRST(&pp->pr_emptypages)) != NULL) 1662 (*pr)("\n\tempty page list:\n"); 1663 pool_print_pagelist(pp, &pp->pr_emptypages, pr); 1664 if ((ph = LIST_FIRST(&pp->pr_fullpages)) != NULL) 1665 (*pr)("\n\tfull page list:\n"); 1666 pool_print_pagelist(pp, &pp->pr_fullpages, pr); 1667 if ((ph = LIST_FIRST(&pp->pr_partpages)) != NULL) 1668 (*pr)("\n\tpartial-page list:\n"); 1669 pool_print_pagelist(pp, &pp->pr_partpages, pr); 1670 1671 if (pp->pr_curpage == NULL) 1672 (*pr)("\tno current page\n"); 1673 else 1674 (*pr)("\tcurpage %p\n", pp->pr_curpage->ph_page); 1675 1676 skip_pagelist: 1677 if (print_log == 0) 1678 goto skip_log; 1679 1680 (*pr)("\n"); 1681 if ((pp->pr_roflags & PR_LOGGING) == 0) 1682 (*pr)("\tno log\n"); 1683 else 1684 pr_printlog(pp, NULL, pr); 1685 1686 skip_log: 1687 if (print_cache == 0) 1688 goto skip_cache; 1689 1690 #define PR_GROUPLIST(pcg) \ 1691 (*pr)("\t\tgroup %p: avail %d\n", pcg, pcg->pcg_avail); \ 1692 for (i = 0; i < PCG_NOBJECTS; i++) { \ 1693 if (pcg->pcg_objects[i].pcgo_pa != \ 1694 POOL_PADDR_INVALID) { \ 1695 (*pr)("\t\t\t%p, 0x%llx\n", \ 1696 pcg->pcg_objects[i].pcgo_va, \ 1697 (unsigned long long) \ 1698 pcg->pcg_objects[i].pcgo_pa); \ 1699 } else { \ 1700 (*pr)("\t\t\t%p\n", \ 1701 pcg->pcg_objects[i].pcgo_va); \ 1702 } \ 1703 } 1704 1705 LIST_FOREACH(pc, &pp->pr_cachelist, pc_poollist) { 1706 (*pr)("\tcache %p\n", pc); 1707 (*pr)("\t hits %lu misses %lu ngroups %lu nitems %lu\n", 1708 pc->pc_hits, pc->pc_misses, pc->pc_ngroups, pc->pc_nitems); 1709 (*pr)("\t full groups:\n"); 1710 LIST_FOREACH(pcg, &pc->pc_fullgroups, pcg_list) { 1711 PR_GROUPLIST(pcg); 1712 } 1713 (*pr)("\t partial groups:\n"); 1714 LIST_FOREACH(pcg, &pc->pc_partgroups, pcg_list) { 1715 PR_GROUPLIST(pcg); 1716 } 1717 (*pr)("\t empty groups:\n"); 1718 LIST_FOREACH(pcg, &pc->pc_emptygroups, pcg_list) { 1719 PR_GROUPLIST(pcg); 1720 } 1721 } 1722 #undef PR_GROUPLIST 1723 1724 skip_cache: 1725 pr_enter_check(pp, pr); 1726 } 1727 1728 static int 1729 pool_chk_page(struct pool *pp, const char *label, struct pool_item_header *ph) 1730 { 1731 struct pool_item *pi; 1732 caddr_t page; 1733 int n; 1734 1735 page = (caddr_t)((u_long)ph & pp->pr_alloc->pa_pagemask); 1736 if (page != ph->ph_page && 1737 (pp->pr_roflags & PR_PHINPAGE) != 0) { 1738 if (label != NULL) 1739 printf("%s: ", label); 1740 printf("pool(%p:%s): page inconsistency: page %p;" 1741 " at page head addr %p (p %p)\n", pp, 1742 pp->pr_wchan, ph->ph_page, 1743 ph, page); 1744 return 1; 1745 } 1746 1747 if ((pp->pr_roflags & PR_NOTOUCH) != 0) 1748 return 0; 1749 1750 for (pi = LIST_FIRST(&ph->ph_itemlist), n = 0; 1751 pi != NULL; 1752 pi = LIST_NEXT(pi,pi_list), n++) { 1753 1754 #ifdef DIAGNOSTIC 1755 if (pi->pi_magic != PI_MAGIC) { 1756 if (label != NULL) 1757 printf("%s: ", label); 1758 printf("pool(%s): free list modified: magic=%x;" 1759 " page %p; item ordinal %d;" 1760 " addr %p (p %p)\n", 1761 pp->pr_wchan, pi->pi_magic, ph->ph_page, 1762 n, pi, page); 1763 panic("pool"); 1764 } 1765 #endif 1766 page = 1767 (caddr_t)((u_long)pi & pp->pr_alloc->pa_pagemask); 1768 if (page == ph->ph_page) 1769 continue; 1770 1771 if (label != NULL) 1772 printf("%s: ", label); 1773 printf("pool(%p:%s): page inconsistency: page %p;" 1774 " item ordinal %d; addr %p (p %p)\n", pp, 1775 pp->pr_wchan, ph->ph_page, 1776 n, pi, page); 1777 return 1; 1778 } 1779 return 0; 1780 } 1781 1782 1783 int 1784 pool_chk(struct pool *pp, const char *label) 1785 { 1786 struct pool_item_header *ph; 1787 int r = 0; 1788 1789 simple_lock(&pp->pr_slock); 1790 LIST_FOREACH(ph, &pp->pr_emptypages, ph_pagelist) { 1791 r = pool_chk_page(pp, label, ph); 1792 if (r) { 1793 goto out; 1794 } 1795 } 1796 LIST_FOREACH(ph, &pp->pr_fullpages, ph_pagelist) { 1797 r = pool_chk_page(pp, label, ph); 1798 if (r) { 1799 goto out; 1800 } 1801 } 1802 LIST_FOREACH(ph, &pp->pr_partpages, ph_pagelist) { 1803 r = pool_chk_page(pp, label, ph); 1804 if (r) { 1805 goto out; 1806 } 1807 } 1808 1809 out: 1810 simple_unlock(&pp->pr_slock); 1811 return (r); 1812 } 1813 1814 /* 1815 * pool_cache_init: 1816 * 1817 * Initialize a pool cache. 1818 * 1819 * NOTE: If the pool must be protected from interrupts, we expect 1820 * to be called at the appropriate interrupt priority level. 1821 */ 1822 void 1823 pool_cache_init(struct pool_cache *pc, struct pool *pp, 1824 int (*ctor)(void *, void *, int), 1825 void (*dtor)(void *, void *), 1826 void *arg) 1827 { 1828 1829 LIST_INIT(&pc->pc_emptygroups); 1830 LIST_INIT(&pc->pc_fullgroups); 1831 LIST_INIT(&pc->pc_partgroups); 1832 simple_lock_init(&pc->pc_slock); 1833 1834 pc->pc_pool = pp; 1835 1836 pc->pc_ctor = ctor; 1837 pc->pc_dtor = dtor; 1838 pc->pc_arg = arg; 1839 1840 pc->pc_hits = 0; 1841 pc->pc_misses = 0; 1842 1843 pc->pc_ngroups = 0; 1844 1845 pc->pc_nitems = 0; 1846 1847 simple_lock(&pp->pr_slock); 1848 LIST_INSERT_HEAD(&pp->pr_cachelist, pc, pc_poollist); 1849 simple_unlock(&pp->pr_slock); 1850 } 1851 1852 /* 1853 * pool_cache_destroy: 1854 * 1855 * Destroy a pool cache. 1856 */ 1857 void 1858 pool_cache_destroy(struct pool_cache *pc) 1859 { 1860 struct pool *pp = pc->pc_pool; 1861 1862 /* First, invalidate the entire cache. */ 1863 pool_cache_invalidate(pc); 1864 1865 /* ...and remove it from the pool's cache list. */ 1866 simple_lock(&pp->pr_slock); 1867 LIST_REMOVE(pc, pc_poollist); 1868 simple_unlock(&pp->pr_slock); 1869 } 1870 1871 static inline void * 1872 pcg_get(struct pool_cache_group *pcg, paddr_t *pap) 1873 { 1874 void *object; 1875 u_int idx; 1876 1877 KASSERT(pcg->pcg_avail <= PCG_NOBJECTS); 1878 KASSERT(pcg->pcg_avail != 0); 1879 idx = --pcg->pcg_avail; 1880 1881 KASSERT(pcg->pcg_objects[idx].pcgo_va != NULL); 1882 object = pcg->pcg_objects[idx].pcgo_va; 1883 if (pap != NULL) 1884 *pap = pcg->pcg_objects[idx].pcgo_pa; 1885 pcg->pcg_objects[idx].pcgo_va = NULL; 1886 1887 return (object); 1888 } 1889 1890 static inline void 1891 pcg_put(struct pool_cache_group *pcg, void *object, paddr_t pa) 1892 { 1893 u_int idx; 1894 1895 KASSERT(pcg->pcg_avail < PCG_NOBJECTS); 1896 idx = pcg->pcg_avail++; 1897 1898 KASSERT(pcg->pcg_objects[idx].pcgo_va == NULL); 1899 pcg->pcg_objects[idx].pcgo_va = object; 1900 pcg->pcg_objects[idx].pcgo_pa = pa; 1901 } 1902 1903 static void 1904 pcg_grouplist_free(struct pool_cache_grouplist *pcgl) 1905 { 1906 struct pool_cache_group *pcg; 1907 int s; 1908 1909 s = splvm(); 1910 while ((pcg = LIST_FIRST(pcgl)) != NULL) { 1911 LIST_REMOVE(pcg, pcg_list); 1912 pool_put(&pcgpool, pcg); 1913 } 1914 splx(s); 1915 } 1916 1917 /* 1918 * pool_cache_get{,_paddr}: 1919 * 1920 * Get an object from a pool cache (optionally returning 1921 * the physical address of the object). 1922 */ 1923 void * 1924 pool_cache_get_paddr(struct pool_cache *pc, int flags, paddr_t *pap) 1925 { 1926 struct pool_cache_group *pcg; 1927 void *object; 1928 1929 #ifdef LOCKDEBUG 1930 if (flags & PR_WAITOK) 1931 simple_lock_only_held(NULL, "pool_cache_get(PR_WAITOK)"); 1932 #endif 1933 1934 simple_lock(&pc->pc_slock); 1935 1936 pcg = LIST_FIRST(&pc->pc_partgroups); 1937 if (pcg == NULL) { 1938 pcg = LIST_FIRST(&pc->pc_fullgroups); 1939 if (pcg != NULL) { 1940 LIST_REMOVE(pcg, pcg_list); 1941 LIST_INSERT_HEAD(&pc->pc_partgroups, pcg, pcg_list); 1942 } 1943 } 1944 if (pcg == NULL) { 1945 1946 /* 1947 * No groups with any available objects. Allocate 1948 * a new object, construct it, and return it to 1949 * the caller. We will allocate a group, if necessary, 1950 * when the object is freed back to the cache. 1951 */ 1952 pc->pc_misses++; 1953 simple_unlock(&pc->pc_slock); 1954 object = pool_get(pc->pc_pool, flags); 1955 if (object != NULL && pc->pc_ctor != NULL) { 1956 if ((*pc->pc_ctor)(pc->pc_arg, object, flags) != 0) { 1957 pool_put(pc->pc_pool, object); 1958 return (NULL); 1959 } 1960 } 1961 if (object != NULL && pap != NULL) { 1962 #ifdef POOL_VTOPHYS 1963 *pap = POOL_VTOPHYS(object); 1964 #else 1965 *pap = POOL_PADDR_INVALID; 1966 #endif 1967 } 1968 return (object); 1969 } 1970 1971 pc->pc_hits++; 1972 pc->pc_nitems--; 1973 object = pcg_get(pcg, pap); 1974 1975 if (pcg->pcg_avail == 0) { 1976 LIST_REMOVE(pcg, pcg_list); 1977 LIST_INSERT_HEAD(&pc->pc_emptygroups, pcg, pcg_list); 1978 } 1979 simple_unlock(&pc->pc_slock); 1980 1981 return (object); 1982 } 1983 1984 /* 1985 * pool_cache_put{,_paddr}: 1986 * 1987 * Put an object back to the pool cache (optionally caching the 1988 * physical address of the object). 1989 */ 1990 void 1991 pool_cache_put_paddr(struct pool_cache *pc, void *object, paddr_t pa) 1992 { 1993 struct pool_cache_group *pcg; 1994 int s; 1995 1996 if (__predict_false((pc->pc_pool->pr_flags & PR_WANTED) != 0)) { 1997 goto destruct; 1998 } 1999 2000 simple_lock(&pc->pc_slock); 2001 2002 pcg = LIST_FIRST(&pc->pc_partgroups); 2003 if (pcg == NULL) { 2004 pcg = LIST_FIRST(&pc->pc_emptygroups); 2005 if (pcg != NULL) { 2006 LIST_REMOVE(pcg, pcg_list); 2007 LIST_INSERT_HEAD(&pc->pc_partgroups, pcg, pcg_list); 2008 } 2009 } 2010 if (pcg == NULL) { 2011 2012 /* 2013 * No empty groups to free the object to. Attempt to 2014 * allocate one. 2015 */ 2016 simple_unlock(&pc->pc_slock); 2017 s = splvm(); 2018 pcg = pool_get(&pcgpool, PR_NOWAIT); 2019 splx(s); 2020 if (pcg == NULL) { 2021 destruct: 2022 2023 /* 2024 * Unable to allocate a cache group; destruct the object 2025 * and free it back to the pool. 2026 */ 2027 pool_cache_destruct_object(pc, object); 2028 return; 2029 } 2030 memset(pcg, 0, sizeof(*pcg)); 2031 simple_lock(&pc->pc_slock); 2032 pc->pc_ngroups++; 2033 LIST_INSERT_HEAD(&pc->pc_partgroups, pcg, pcg_list); 2034 } 2035 2036 pc->pc_nitems++; 2037 pcg_put(pcg, object, pa); 2038 2039 if (pcg->pcg_avail == PCG_NOBJECTS) { 2040 LIST_REMOVE(pcg, pcg_list); 2041 LIST_INSERT_HEAD(&pc->pc_fullgroups, pcg, pcg_list); 2042 } 2043 simple_unlock(&pc->pc_slock); 2044 } 2045 2046 /* 2047 * pool_cache_destruct_object: 2048 * 2049 * Force destruction of an object and its release back into 2050 * the pool. 2051 */ 2052 void 2053 pool_cache_destruct_object(struct pool_cache *pc, void *object) 2054 { 2055 2056 if (pc->pc_dtor != NULL) 2057 (*pc->pc_dtor)(pc->pc_arg, object); 2058 pool_put(pc->pc_pool, object); 2059 } 2060 2061 static void 2062 pool_do_cache_invalidate_grouplist(struct pool_cache_grouplist *pcgsl, 2063 struct pool_cache *pc, struct pool_pagelist *pq, 2064 struct pool_cache_grouplist *pcgdl) 2065 { 2066 struct pool_cache_group *pcg, *npcg; 2067 void *object; 2068 2069 for (pcg = LIST_FIRST(pcgsl); pcg != NULL; pcg = npcg) { 2070 npcg = LIST_NEXT(pcg, pcg_list); 2071 while (pcg->pcg_avail != 0) { 2072 pc->pc_nitems--; 2073 object = pcg_get(pcg, NULL); 2074 if (pc->pc_dtor != NULL) 2075 (*pc->pc_dtor)(pc->pc_arg, object); 2076 pool_do_put(pc->pc_pool, object, pq); 2077 } 2078 pc->pc_ngroups--; 2079 LIST_REMOVE(pcg, pcg_list); 2080 LIST_INSERT_HEAD(pcgdl, pcg, pcg_list); 2081 } 2082 } 2083 2084 static void 2085 pool_do_cache_invalidate(struct pool_cache *pc, struct pool_pagelist *pq, 2086 struct pool_cache_grouplist *pcgl) 2087 { 2088 2089 LOCK_ASSERT(simple_lock_held(&pc->pc_slock)); 2090 LOCK_ASSERT(simple_lock_held(&pc->pc_pool->pr_slock)); 2091 2092 pool_do_cache_invalidate_grouplist(&pc->pc_fullgroups, pc, pq, pcgl); 2093 pool_do_cache_invalidate_grouplist(&pc->pc_partgroups, pc, pq, pcgl); 2094 2095 KASSERT(LIST_EMPTY(&pc->pc_partgroups)); 2096 KASSERT(LIST_EMPTY(&pc->pc_fullgroups)); 2097 KASSERT(pc->pc_nitems == 0); 2098 } 2099 2100 /* 2101 * pool_cache_invalidate: 2102 * 2103 * Invalidate a pool cache (destruct and release all of the 2104 * cached objects). 2105 */ 2106 void 2107 pool_cache_invalidate(struct pool_cache *pc) 2108 { 2109 struct pool_pagelist pq; 2110 struct pool_cache_grouplist pcgl; 2111 2112 LIST_INIT(&pq); 2113 LIST_INIT(&pcgl); 2114 2115 simple_lock(&pc->pc_slock); 2116 simple_lock(&pc->pc_pool->pr_slock); 2117 2118 pool_do_cache_invalidate(pc, &pq, &pcgl); 2119 2120 simple_unlock(&pc->pc_pool->pr_slock); 2121 simple_unlock(&pc->pc_slock); 2122 2123 pr_pagelist_free(pc->pc_pool, &pq); 2124 pcg_grouplist_free(&pcgl); 2125 } 2126 2127 /* 2128 * pool_cache_reclaim: 2129 * 2130 * Reclaim a pool cache for pool_reclaim(). 2131 */ 2132 static void 2133 pool_cache_reclaim(struct pool_cache *pc, struct pool_pagelist *pq, 2134 struct pool_cache_grouplist *pcgl) 2135 { 2136 2137 /* 2138 * We're locking in the wrong order (normally pool_cache -> pool, 2139 * but the pool is already locked when we get here), so we have 2140 * to use trylock. If we can't lock the pool_cache, it's not really 2141 * a big deal here. 2142 */ 2143 if (simple_lock_try(&pc->pc_slock) == 0) 2144 return; 2145 2146 pool_do_cache_invalidate(pc, pq, pcgl); 2147 2148 simple_unlock(&pc->pc_slock); 2149 } 2150 2151 /* 2152 * Pool backend allocators. 2153 * 2154 * Each pool has a backend allocator that handles allocation, deallocation, 2155 * and any additional draining that might be needed. 2156 * 2157 * We provide two standard allocators: 2158 * 2159 * pool_allocator_kmem - the default when no allocator is specified 2160 * 2161 * pool_allocator_nointr - used for pools that will not be accessed 2162 * in interrupt context. 2163 */ 2164 void *pool_page_alloc(struct pool *, int); 2165 void pool_page_free(struct pool *, void *); 2166 2167 #ifdef POOL_SUBPAGE 2168 struct pool_allocator pool_allocator_kmem_fullpage = { 2169 pool_page_alloc, pool_page_free, 0, 2170 }; 2171 #else 2172 struct pool_allocator pool_allocator_kmem = { 2173 pool_page_alloc, pool_page_free, 0, 2174 }; 2175 #endif 2176 2177 void *pool_page_alloc_nointr(struct pool *, int); 2178 void pool_page_free_nointr(struct pool *, void *); 2179 2180 #ifdef POOL_SUBPAGE 2181 struct pool_allocator pool_allocator_nointr_fullpage = { 2182 pool_page_alloc_nointr, pool_page_free_nointr, 0, 2183 }; 2184 #else 2185 struct pool_allocator pool_allocator_nointr = { 2186 pool_page_alloc_nointr, pool_page_free_nointr, 0, 2187 }; 2188 #endif 2189 2190 #ifdef POOL_SUBPAGE 2191 void *pool_subpage_alloc(struct pool *, int); 2192 void pool_subpage_free(struct pool *, void *); 2193 2194 struct pool_allocator pool_allocator_kmem = { 2195 pool_subpage_alloc, pool_subpage_free, POOL_SUBPAGE, 2196 }; 2197 2198 void *pool_subpage_alloc_nointr(struct pool *, int); 2199 void pool_subpage_free_nointr(struct pool *, void *); 2200 2201 struct pool_allocator pool_allocator_nointr = { 2202 pool_subpage_alloc, pool_subpage_free, POOL_SUBPAGE, 2203 }; 2204 #endif /* POOL_SUBPAGE */ 2205 2206 /* 2207 * We have at least three different resources for the same allocation and 2208 * each resource can be depleted. First, we have the ready elements in the 2209 * pool. Then we have the resource (typically a vm_map) for this allocator. 2210 * Finally, we have physical memory. Waiting for any of these can be 2211 * unnecessary when any other is freed, but the kernel doesn't support 2212 * sleeping on multiple wait channels, so we have to employ another strategy. 2213 * 2214 * The caller sleeps on the pool (so that it can be awakened when an item 2215 * is returned to the pool), but we set PA_WANT on the allocator. When a 2216 * page is returned to the allocator and PA_WANT is set, pool_allocator_free 2217 * will wake up all sleeping pools belonging to this allocator. 2218 * 2219 * XXX Thundering herd. 2220 */ 2221 void * 2222 pool_allocator_alloc(struct pool *org, int flags) 2223 { 2224 struct pool_allocator *pa = org->pr_alloc; 2225 struct pool *pp, *start; 2226 int s, freed; 2227 void *res; 2228 2229 LOCK_ASSERT(!simple_lock_held(&org->pr_slock)); 2230 2231 do { 2232 if ((res = (*pa->pa_alloc)(org, flags)) != NULL) 2233 return (res); 2234 if ((flags & PR_WAITOK) == 0) { 2235 /* 2236 * We only run the drain hookhere if PR_NOWAIT. 2237 * In other cases, the hook will be run in 2238 * pool_reclaim(). 2239 */ 2240 if (org->pr_drain_hook != NULL) { 2241 (*org->pr_drain_hook)(org->pr_drain_hook_arg, 2242 flags); 2243 if ((res = (*pa->pa_alloc)(org, flags)) != NULL) 2244 return (res); 2245 } 2246 break; 2247 } 2248 2249 /* 2250 * Drain all pools, that use this allocator. 2251 * We do this to reclaim VA space. 2252 * pa_alloc is responsible for waiting for 2253 * physical memory. 2254 * 2255 * XXX We risk looping forever if start if someone 2256 * calls pool_destroy on "start". But there is no 2257 * other way to have potentially sleeping pool_reclaim, 2258 * non-sleeping locks on pool_allocator, and some 2259 * stirring of drained pools in the allocator. 2260 * 2261 * XXX Maybe we should use pool_head_slock for locking 2262 * the allocators? 2263 */ 2264 freed = 0; 2265 2266 s = splvm(); 2267 simple_lock(&pa->pa_slock); 2268 pp = start = TAILQ_FIRST(&pa->pa_list); 2269 do { 2270 TAILQ_REMOVE(&pa->pa_list, pp, pr_alloc_list); 2271 TAILQ_INSERT_TAIL(&pa->pa_list, pp, pr_alloc_list); 2272 simple_unlock(&pa->pa_slock); 2273 freed = pool_reclaim(pp); 2274 simple_lock(&pa->pa_slock); 2275 } while ((pp = TAILQ_FIRST(&pa->pa_list)) != start && 2276 freed == 0); 2277 2278 if (freed == 0) { 2279 /* 2280 * We set PA_WANT here, the caller will most likely 2281 * sleep waiting for pages (if not, this won't hurt 2282 * that much), and there is no way to set this in 2283 * the caller without violating locking order. 2284 */ 2285 pa->pa_flags |= PA_WANT; 2286 } 2287 simple_unlock(&pa->pa_slock); 2288 splx(s); 2289 } while (freed); 2290 return (NULL); 2291 } 2292 2293 void 2294 pool_allocator_free(struct pool *pp, void *v) 2295 { 2296 struct pool_allocator *pa = pp->pr_alloc; 2297 int s; 2298 2299 LOCK_ASSERT(!simple_lock_held(&pp->pr_slock)); 2300 2301 (*pa->pa_free)(pp, v); 2302 2303 s = splvm(); 2304 simple_lock(&pa->pa_slock); 2305 if ((pa->pa_flags & PA_WANT) == 0) { 2306 simple_unlock(&pa->pa_slock); 2307 splx(s); 2308 return; 2309 } 2310 2311 TAILQ_FOREACH(pp, &pa->pa_list, pr_alloc_list) { 2312 simple_lock(&pp->pr_slock); 2313 if ((pp->pr_flags & PR_WANTED) != 0) { 2314 pp->pr_flags &= ~PR_WANTED; 2315 wakeup(pp); 2316 } 2317 simple_unlock(&pp->pr_slock); 2318 } 2319 pa->pa_flags &= ~PA_WANT; 2320 simple_unlock(&pa->pa_slock); 2321 splx(s); 2322 } 2323 2324 void * 2325 pool_page_alloc(struct pool *pp, int flags) 2326 { 2327 boolean_t waitok = (flags & PR_WAITOK) ? TRUE : FALSE; 2328 2329 return ((void *) uvm_km_alloc_poolpage_cache(kmem_map, waitok)); 2330 } 2331 2332 void 2333 pool_page_free(struct pool *pp, void *v) 2334 { 2335 2336 uvm_km_free_poolpage_cache(kmem_map, (vaddr_t) v); 2337 } 2338 2339 static void * 2340 pool_page_alloc_meta(struct pool *pp, int flags) 2341 { 2342 boolean_t waitok = (flags & PR_WAITOK) ? TRUE : FALSE; 2343 2344 return ((void *) uvm_km_alloc_poolpage(kmem_map, waitok)); 2345 } 2346 2347 static void 2348 pool_page_free_meta(struct pool *pp, void *v) 2349 { 2350 2351 uvm_km_free_poolpage(kmem_map, (vaddr_t) v); 2352 } 2353 2354 #ifdef POOL_SUBPAGE 2355 /* Sub-page allocator, for machines with large hardware pages. */ 2356 void * 2357 pool_subpage_alloc(struct pool *pp, int flags) 2358 { 2359 void *v; 2360 int s; 2361 s = splvm(); 2362 v = pool_get(&psppool, flags); 2363 splx(s); 2364 return v; 2365 } 2366 2367 void 2368 pool_subpage_free(struct pool *pp, void *v) 2369 { 2370 int s; 2371 s = splvm(); 2372 pool_put(&psppool, v); 2373 splx(s); 2374 } 2375 2376 /* We don't provide a real nointr allocator. Maybe later. */ 2377 void * 2378 pool_subpage_alloc_nointr(struct pool *pp, int flags) 2379 { 2380 2381 return (pool_subpage_alloc(pp, flags)); 2382 } 2383 2384 void 2385 pool_subpage_free_nointr(struct pool *pp, void *v) 2386 { 2387 2388 pool_subpage_free(pp, v); 2389 } 2390 #endif /* POOL_SUBPAGE */ 2391 void * 2392 pool_page_alloc_nointr(struct pool *pp, int flags) 2393 { 2394 boolean_t waitok = (flags & PR_WAITOK) ? TRUE : FALSE; 2395 2396 return ((void *) uvm_km_alloc_poolpage_cache(kernel_map, waitok)); 2397 } 2398 2399 void 2400 pool_page_free_nointr(struct pool *pp, void *v) 2401 { 2402 2403 uvm_km_free_poolpage_cache(kernel_map, (vaddr_t) v); 2404 } 2405