1 /* $NetBSD: subr_pool.c,v 1.110 2005/12/24 19:12:23 perry Exp $ */ 2 3 /*- 4 * Copyright (c) 1997, 1999, 2000 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Paul Kranenburg; by Jason R. Thorpe of the Numerical Aerospace 9 * Simulation Facility, NASA Ames Research Center. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. All advertising materials mentioning features or use of this software 20 * must display the following acknowledgement: 21 * This product includes software developed by the NetBSD 22 * Foundation, Inc. and its contributors. 23 * 4. Neither the name of The NetBSD Foundation nor the names of its 24 * contributors may be used to endorse or promote products derived 25 * from this software without specific prior written permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 28 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 29 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 30 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 31 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 32 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 33 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 34 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 35 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 36 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 37 * POSSIBILITY OF SUCH DAMAGE. 38 */ 39 40 #include <sys/cdefs.h> 41 __KERNEL_RCSID(0, "$NetBSD: subr_pool.c,v 1.110 2005/12/24 19:12:23 perry Exp $"); 42 43 #include "opt_pool.h" 44 #include "opt_poollog.h" 45 #include "opt_lockdebug.h" 46 47 #include <sys/param.h> 48 #include <sys/systm.h> 49 #include <sys/proc.h> 50 #include <sys/errno.h> 51 #include <sys/kernel.h> 52 #include <sys/malloc.h> 53 #include <sys/lock.h> 54 #include <sys/pool.h> 55 #include <sys/syslog.h> 56 57 #include <uvm/uvm.h> 58 59 /* 60 * Pool resource management utility. 61 * 62 * Memory is allocated in pages which are split into pieces according to 63 * the pool item size. Each page is kept on one of three lists in the 64 * pool structure: `pr_emptypages', `pr_fullpages' and `pr_partpages', 65 * for empty, full and partially-full pages respectively. The individual 66 * pool items are on a linked list headed by `ph_itemlist' in each page 67 * header. The memory for building the page list is either taken from 68 * the allocated pages themselves (for small pool items) or taken from 69 * an internal pool of page headers (`phpool'). 70 */ 71 72 /* List of all pools */ 73 LIST_HEAD(,pool) pool_head = LIST_HEAD_INITIALIZER(pool_head); 74 75 /* Private pool for page header structures */ 76 #define PHPOOL_MAX 8 77 static struct pool phpool[PHPOOL_MAX]; 78 #define PHPOOL_FREELIST_NELEM(idx) (((idx) == 0) ? 0 : (1 << (idx))) 79 80 #ifdef POOL_SUBPAGE 81 /* Pool of subpages for use by normal pools. */ 82 static struct pool psppool; 83 #endif 84 85 static void *pool_page_alloc_meta(struct pool *, int); 86 static void pool_page_free_meta(struct pool *, void *); 87 88 /* allocator for pool metadata */ 89 static struct pool_allocator pool_allocator_meta = { 90 pool_page_alloc_meta, pool_page_free_meta 91 }; 92 93 /* # of seconds to retain page after last use */ 94 int pool_inactive_time = 10; 95 96 /* Next candidate for drainage (see pool_drain()) */ 97 static struct pool *drainpp; 98 99 /* This spin lock protects both pool_head and drainpp. */ 100 struct simplelock pool_head_slock = SIMPLELOCK_INITIALIZER; 101 102 typedef uint8_t pool_item_freelist_t; 103 104 struct pool_item_header { 105 /* Page headers */ 106 LIST_ENTRY(pool_item_header) 107 ph_pagelist; /* pool page list */ 108 SPLAY_ENTRY(pool_item_header) 109 ph_node; /* Off-page page headers */ 110 caddr_t ph_page; /* this page's address */ 111 struct timeval ph_time; /* last referenced */ 112 union { 113 /* !PR_NOTOUCH */ 114 struct { 115 LIST_HEAD(, pool_item) 116 phu_itemlist; /* chunk list for this page */ 117 } phu_normal; 118 /* PR_NOTOUCH */ 119 struct { 120 uint16_t 121 phu_off; /* start offset in page */ 122 pool_item_freelist_t 123 phu_firstfree; /* first free item */ 124 /* 125 * XXX it might be better to use 126 * a simple bitmap and ffs(3) 127 */ 128 } phu_notouch; 129 } ph_u; 130 uint16_t ph_nmissing; /* # of chunks in use */ 131 }; 132 #define ph_itemlist ph_u.phu_normal.phu_itemlist 133 #define ph_off ph_u.phu_notouch.phu_off 134 #define ph_firstfree ph_u.phu_notouch.phu_firstfree 135 136 struct pool_item { 137 #ifdef DIAGNOSTIC 138 u_int pi_magic; 139 #endif 140 #define PI_MAGIC 0xdeadbeefU 141 /* Other entries use only this list entry */ 142 LIST_ENTRY(pool_item) pi_list; 143 }; 144 145 #define POOL_NEEDS_CATCHUP(pp) \ 146 ((pp)->pr_nitems < (pp)->pr_minitems) 147 148 /* 149 * Pool cache management. 150 * 151 * Pool caches provide a way for constructed objects to be cached by the 152 * pool subsystem. This can lead to performance improvements by avoiding 153 * needless object construction/destruction; it is deferred until absolutely 154 * necessary. 155 * 156 * Caches are grouped into cache groups. Each cache group references 157 * up to 16 constructed objects. When a cache allocates an object 158 * from the pool, it calls the object's constructor and places it into 159 * a cache group. When a cache group frees an object back to the pool, 160 * it first calls the object's destructor. This allows the object to 161 * persist in constructed form while freed to the cache. 162 * 163 * Multiple caches may exist for each pool. This allows a single 164 * object type to have multiple constructed forms. The pool references 165 * each cache, so that when a pool is drained by the pagedaemon, it can 166 * drain each individual cache as well. Each time a cache is drained, 167 * the most idle cache group is freed to the pool in its entirety. 168 * 169 * Pool caches are layed on top of pools. By layering them, we can avoid 170 * the complexity of cache management for pools which would not benefit 171 * from it. 172 */ 173 174 /* The cache group pool. */ 175 static struct pool pcgpool; 176 177 static void pool_cache_reclaim(struct pool_cache *, struct pool_pagelist *, 178 struct pool_cache_grouplist *); 179 static void pcg_grouplist_free(struct pool_cache_grouplist *); 180 181 static int pool_catchup(struct pool *); 182 static void pool_prime_page(struct pool *, caddr_t, 183 struct pool_item_header *); 184 static void pool_update_curpage(struct pool *); 185 186 void *pool_allocator_alloc(struct pool *, int); 187 void pool_allocator_free(struct pool *, void *); 188 189 static void pool_print_pagelist(struct pool *, struct pool_pagelist *, 190 void (*)(const char *, ...)); 191 static void pool_print1(struct pool *, const char *, 192 void (*)(const char *, ...)); 193 194 static int pool_chk_page(struct pool *, const char *, 195 struct pool_item_header *); 196 197 /* 198 * Pool log entry. An array of these is allocated in pool_init(). 199 */ 200 struct pool_log { 201 const char *pl_file; 202 long pl_line; 203 int pl_action; 204 #define PRLOG_GET 1 205 #define PRLOG_PUT 2 206 void *pl_addr; 207 }; 208 209 #ifdef POOL_DIAGNOSTIC 210 /* Number of entries in pool log buffers */ 211 #ifndef POOL_LOGSIZE 212 #define POOL_LOGSIZE 10 213 #endif 214 215 int pool_logsize = POOL_LOGSIZE; 216 217 static inline void 218 pr_log(struct pool *pp, void *v, int action, const char *file, long line) 219 { 220 int n = pp->pr_curlogentry; 221 struct pool_log *pl; 222 223 if ((pp->pr_roflags & PR_LOGGING) == 0) 224 return; 225 226 /* 227 * Fill in the current entry. Wrap around and overwrite 228 * the oldest entry if necessary. 229 */ 230 pl = &pp->pr_log[n]; 231 pl->pl_file = file; 232 pl->pl_line = line; 233 pl->pl_action = action; 234 pl->pl_addr = v; 235 if (++n >= pp->pr_logsize) 236 n = 0; 237 pp->pr_curlogentry = n; 238 } 239 240 static void 241 pr_printlog(struct pool *pp, struct pool_item *pi, 242 void (*pr)(const char *, ...)) 243 { 244 int i = pp->pr_logsize; 245 int n = pp->pr_curlogentry; 246 247 if ((pp->pr_roflags & PR_LOGGING) == 0) 248 return; 249 250 /* 251 * Print all entries in this pool's log. 252 */ 253 while (i-- > 0) { 254 struct pool_log *pl = &pp->pr_log[n]; 255 if (pl->pl_action != 0) { 256 if (pi == NULL || pi == pl->pl_addr) { 257 (*pr)("\tlog entry %d:\n", i); 258 (*pr)("\t\taction = %s, addr = %p\n", 259 pl->pl_action == PRLOG_GET ? "get" : "put", 260 pl->pl_addr); 261 (*pr)("\t\tfile: %s at line %lu\n", 262 pl->pl_file, pl->pl_line); 263 } 264 } 265 if (++n >= pp->pr_logsize) 266 n = 0; 267 } 268 } 269 270 static inline void 271 pr_enter(struct pool *pp, const char *file, long line) 272 { 273 274 if (__predict_false(pp->pr_entered_file != NULL)) { 275 printf("pool %s: reentrancy at file %s line %ld\n", 276 pp->pr_wchan, file, line); 277 printf(" previous entry at file %s line %ld\n", 278 pp->pr_entered_file, pp->pr_entered_line); 279 panic("pr_enter"); 280 } 281 282 pp->pr_entered_file = file; 283 pp->pr_entered_line = line; 284 } 285 286 static inline void 287 pr_leave(struct pool *pp) 288 { 289 290 if (__predict_false(pp->pr_entered_file == NULL)) { 291 printf("pool %s not entered?\n", pp->pr_wchan); 292 panic("pr_leave"); 293 } 294 295 pp->pr_entered_file = NULL; 296 pp->pr_entered_line = 0; 297 } 298 299 static inline void 300 pr_enter_check(struct pool *pp, void (*pr)(const char *, ...)) 301 { 302 303 if (pp->pr_entered_file != NULL) 304 (*pr)("\n\tcurrently entered from file %s line %ld\n", 305 pp->pr_entered_file, pp->pr_entered_line); 306 } 307 #else 308 #define pr_log(pp, v, action, file, line) 309 #define pr_printlog(pp, pi, pr) 310 #define pr_enter(pp, file, line) 311 #define pr_leave(pp) 312 #define pr_enter_check(pp, pr) 313 #endif /* POOL_DIAGNOSTIC */ 314 315 static inline int 316 pr_item_notouch_index(const struct pool *pp, const struct pool_item_header *ph, 317 const void *v) 318 { 319 const char *cp = v; 320 int idx; 321 322 KASSERT(pp->pr_roflags & PR_NOTOUCH); 323 idx = (cp - ph->ph_page - ph->ph_off) / pp->pr_size; 324 KASSERT(idx < pp->pr_itemsperpage); 325 return idx; 326 } 327 328 #define PR_FREELIST_ALIGN(p) \ 329 roundup((uintptr_t)(p), sizeof(pool_item_freelist_t)) 330 #define PR_FREELIST(ph) ((pool_item_freelist_t *)PR_FREELIST_ALIGN((ph) + 1)) 331 #define PR_INDEX_USED ((pool_item_freelist_t)-1) 332 #define PR_INDEX_EOL ((pool_item_freelist_t)-2) 333 334 static inline void 335 pr_item_notouch_put(const struct pool *pp, struct pool_item_header *ph, 336 void *obj) 337 { 338 int idx = pr_item_notouch_index(pp, ph, obj); 339 pool_item_freelist_t *freelist = PR_FREELIST(ph); 340 341 KASSERT(freelist[idx] == PR_INDEX_USED); 342 freelist[idx] = ph->ph_firstfree; 343 ph->ph_firstfree = idx; 344 } 345 346 static inline void * 347 pr_item_notouch_get(const struct pool *pp, struct pool_item_header *ph) 348 { 349 int idx = ph->ph_firstfree; 350 pool_item_freelist_t *freelist = PR_FREELIST(ph); 351 352 KASSERT(freelist[idx] != PR_INDEX_USED); 353 ph->ph_firstfree = freelist[idx]; 354 freelist[idx] = PR_INDEX_USED; 355 356 return ph->ph_page + ph->ph_off + idx * pp->pr_size; 357 } 358 359 static inline int 360 phtree_compare(struct pool_item_header *a, struct pool_item_header *b) 361 { 362 if (a->ph_page < b->ph_page) 363 return (-1); 364 else if (a->ph_page > b->ph_page) 365 return (1); 366 else 367 return (0); 368 } 369 370 SPLAY_PROTOTYPE(phtree, pool_item_header, ph_node, phtree_compare); 371 SPLAY_GENERATE(phtree, pool_item_header, ph_node, phtree_compare); 372 373 /* 374 * Return the pool page header based on page address. 375 */ 376 static inline struct pool_item_header * 377 pr_find_pagehead(struct pool *pp, caddr_t page) 378 { 379 struct pool_item_header *ph, tmp; 380 381 if ((pp->pr_roflags & PR_PHINPAGE) != 0) 382 return ((struct pool_item_header *)(page + pp->pr_phoffset)); 383 384 tmp.ph_page = page; 385 ph = SPLAY_FIND(phtree, &pp->pr_phtree, &tmp); 386 return ph; 387 } 388 389 static void 390 pr_pagelist_free(struct pool *pp, struct pool_pagelist *pq) 391 { 392 struct pool_item_header *ph; 393 int s; 394 395 while ((ph = LIST_FIRST(pq)) != NULL) { 396 LIST_REMOVE(ph, ph_pagelist); 397 pool_allocator_free(pp, ph->ph_page); 398 if ((pp->pr_roflags & PR_PHINPAGE) == 0) { 399 s = splvm(); 400 pool_put(pp->pr_phpool, ph); 401 splx(s); 402 } 403 } 404 } 405 406 /* 407 * Remove a page from the pool. 408 */ 409 static inline void 410 pr_rmpage(struct pool *pp, struct pool_item_header *ph, 411 struct pool_pagelist *pq) 412 { 413 414 LOCK_ASSERT(simple_lock_held(&pp->pr_slock)); 415 416 /* 417 * If the page was idle, decrement the idle page count. 418 */ 419 if (ph->ph_nmissing == 0) { 420 #ifdef DIAGNOSTIC 421 if (pp->pr_nidle == 0) 422 panic("pr_rmpage: nidle inconsistent"); 423 if (pp->pr_nitems < pp->pr_itemsperpage) 424 panic("pr_rmpage: nitems inconsistent"); 425 #endif 426 pp->pr_nidle--; 427 } 428 429 pp->pr_nitems -= pp->pr_itemsperpage; 430 431 /* 432 * Unlink the page from the pool and queue it for release. 433 */ 434 LIST_REMOVE(ph, ph_pagelist); 435 if ((pp->pr_roflags & PR_PHINPAGE) == 0) 436 SPLAY_REMOVE(phtree, &pp->pr_phtree, ph); 437 LIST_INSERT_HEAD(pq, ph, ph_pagelist); 438 439 pp->pr_npages--; 440 pp->pr_npagefree++; 441 442 pool_update_curpage(pp); 443 } 444 445 /* 446 * Initialize all the pools listed in the "pools" link set. 447 */ 448 void 449 link_pool_init(void) 450 { 451 __link_set_decl(pools, struct link_pool_init); 452 struct link_pool_init * const *pi; 453 454 __link_set_foreach(pi, pools) 455 pool_init((*pi)->pp, (*pi)->size, (*pi)->align, 456 (*pi)->align_offset, (*pi)->flags, (*pi)->wchan, 457 (*pi)->palloc); 458 } 459 460 /* 461 * Initialize the given pool resource structure. 462 * 463 * We export this routine to allow other kernel parts to declare 464 * static pools that must be initialized before malloc() is available. 465 */ 466 void 467 pool_init(struct pool *pp, size_t size, u_int align, u_int ioff, int flags, 468 const char *wchan, struct pool_allocator *palloc) 469 { 470 int off, slack; 471 size_t trysize, phsize; 472 int s; 473 474 KASSERT((1UL << (CHAR_BIT * sizeof(pool_item_freelist_t))) - 2 >= 475 PHPOOL_FREELIST_NELEM(PHPOOL_MAX - 1)); 476 477 #ifdef POOL_DIAGNOSTIC 478 /* 479 * Always log if POOL_DIAGNOSTIC is defined. 480 */ 481 if (pool_logsize != 0) 482 flags |= PR_LOGGING; 483 #endif 484 485 #ifdef POOL_SUBPAGE 486 /* 487 * XXX We don't provide a real `nointr' back-end 488 * yet; all sub-pages come from a kmem back-end. 489 * maybe some day... 490 */ 491 if (palloc == NULL) { 492 extern struct pool_allocator pool_allocator_kmem_subpage; 493 palloc = &pool_allocator_kmem_subpage; 494 } 495 /* 496 * We'll assume any user-specified back-end allocator 497 * will deal with sub-pages, or simply don't care. 498 */ 499 #else 500 if (palloc == NULL) 501 palloc = &pool_allocator_kmem; 502 #endif /* POOL_SUBPAGE */ 503 if ((palloc->pa_flags & PA_INITIALIZED) == 0) { 504 if (palloc->pa_pagesz == 0) { 505 #ifdef POOL_SUBPAGE 506 if (palloc == &pool_allocator_kmem) 507 palloc->pa_pagesz = PAGE_SIZE; 508 else 509 palloc->pa_pagesz = POOL_SUBPAGE; 510 #else 511 palloc->pa_pagesz = PAGE_SIZE; 512 #endif /* POOL_SUBPAGE */ 513 } 514 515 TAILQ_INIT(&palloc->pa_list); 516 517 simple_lock_init(&palloc->pa_slock); 518 palloc->pa_pagemask = ~(palloc->pa_pagesz - 1); 519 palloc->pa_pageshift = ffs(palloc->pa_pagesz) - 1; 520 palloc->pa_flags |= PA_INITIALIZED; 521 } 522 523 if (align == 0) 524 align = ALIGN(1); 525 526 if (size < sizeof(struct pool_item)) 527 size = sizeof(struct pool_item); 528 529 size = roundup(size, align); 530 #ifdef DIAGNOSTIC 531 if (size > palloc->pa_pagesz) 532 panic("pool_init: pool item size (%lu) too large", 533 (u_long)size); 534 #endif 535 536 /* 537 * Initialize the pool structure. 538 */ 539 LIST_INIT(&pp->pr_emptypages); 540 LIST_INIT(&pp->pr_fullpages); 541 LIST_INIT(&pp->pr_partpages); 542 LIST_INIT(&pp->pr_cachelist); 543 pp->pr_curpage = NULL; 544 pp->pr_npages = 0; 545 pp->pr_minitems = 0; 546 pp->pr_minpages = 0; 547 pp->pr_maxpages = UINT_MAX; 548 pp->pr_roflags = flags; 549 pp->pr_flags = 0; 550 pp->pr_size = size; 551 pp->pr_align = align; 552 pp->pr_wchan = wchan; 553 pp->pr_alloc = palloc; 554 pp->pr_nitems = 0; 555 pp->pr_nout = 0; 556 pp->pr_hardlimit = UINT_MAX; 557 pp->pr_hardlimit_warning = NULL; 558 pp->pr_hardlimit_ratecap.tv_sec = 0; 559 pp->pr_hardlimit_ratecap.tv_usec = 0; 560 pp->pr_hardlimit_warning_last.tv_sec = 0; 561 pp->pr_hardlimit_warning_last.tv_usec = 0; 562 pp->pr_drain_hook = NULL; 563 pp->pr_drain_hook_arg = NULL; 564 565 /* 566 * Decide whether to put the page header off page to avoid 567 * wasting too large a part of the page or too big item. 568 * Off-page page headers go on a hash table, so we can match 569 * a returned item with its header based on the page address. 570 * We use 1/16 of the page size and about 8 times of the item 571 * size as the threshold (XXX: tune) 572 * 573 * However, we'll put the header into the page if we can put 574 * it without wasting any items. 575 * 576 * Silently enforce `0 <= ioff < align'. 577 */ 578 pp->pr_itemoffset = ioff %= align; 579 /* See the comment below about reserved bytes. */ 580 trysize = palloc->pa_pagesz - ((align - ioff) % align); 581 phsize = ALIGN(sizeof(struct pool_item_header)); 582 if ((pp->pr_roflags & PR_NOTOUCH) == 0 && 583 (pp->pr_size < MIN(palloc->pa_pagesz / 16, phsize << 3) || 584 trysize / pp->pr_size == (trysize - phsize) / pp->pr_size)) { 585 /* Use the end of the page for the page header */ 586 pp->pr_roflags |= PR_PHINPAGE; 587 pp->pr_phoffset = off = palloc->pa_pagesz - phsize; 588 } else { 589 /* The page header will be taken from our page header pool */ 590 pp->pr_phoffset = 0; 591 off = palloc->pa_pagesz; 592 SPLAY_INIT(&pp->pr_phtree); 593 } 594 595 /* 596 * Alignment is to take place at `ioff' within the item. This means 597 * we must reserve up to `align - 1' bytes on the page to allow 598 * appropriate positioning of each item. 599 */ 600 pp->pr_itemsperpage = (off - ((align - ioff) % align)) / pp->pr_size; 601 KASSERT(pp->pr_itemsperpage != 0); 602 if ((pp->pr_roflags & PR_NOTOUCH)) { 603 int idx; 604 605 for (idx = 0; pp->pr_itemsperpage > PHPOOL_FREELIST_NELEM(idx); 606 idx++) { 607 /* nothing */ 608 } 609 if (idx >= PHPOOL_MAX) { 610 /* 611 * if you see this panic, consider to tweak 612 * PHPOOL_MAX and PHPOOL_FREELIST_NELEM. 613 */ 614 panic("%s: too large itemsperpage(%d) for PR_NOTOUCH", 615 pp->pr_wchan, pp->pr_itemsperpage); 616 } 617 pp->pr_phpool = &phpool[idx]; 618 } else if ((pp->pr_roflags & PR_PHINPAGE) == 0) { 619 pp->pr_phpool = &phpool[0]; 620 } 621 #if defined(DIAGNOSTIC) 622 else { 623 pp->pr_phpool = NULL; 624 } 625 #endif 626 627 /* 628 * Use the slack between the chunks and the page header 629 * for "cache coloring". 630 */ 631 slack = off - pp->pr_itemsperpage * pp->pr_size; 632 pp->pr_maxcolor = (slack / align) * align; 633 pp->pr_curcolor = 0; 634 635 pp->pr_nget = 0; 636 pp->pr_nfail = 0; 637 pp->pr_nput = 0; 638 pp->pr_npagealloc = 0; 639 pp->pr_npagefree = 0; 640 pp->pr_hiwat = 0; 641 pp->pr_nidle = 0; 642 643 #ifdef POOL_DIAGNOSTIC 644 if (flags & PR_LOGGING) { 645 if (kmem_map == NULL || 646 (pp->pr_log = malloc(pool_logsize * sizeof(struct pool_log), 647 M_TEMP, M_NOWAIT)) == NULL) 648 pp->pr_roflags &= ~PR_LOGGING; 649 pp->pr_curlogentry = 0; 650 pp->pr_logsize = pool_logsize; 651 } 652 #endif 653 654 pp->pr_entered_file = NULL; 655 pp->pr_entered_line = 0; 656 657 simple_lock_init(&pp->pr_slock); 658 659 /* 660 * Initialize private page header pool and cache magazine pool if we 661 * haven't done so yet. 662 * XXX LOCKING. 663 */ 664 if (phpool[0].pr_size == 0) { 665 int idx; 666 for (idx = 0; idx < PHPOOL_MAX; idx++) { 667 static char phpool_names[PHPOOL_MAX][6+1+6+1]; 668 int nelem; 669 size_t sz; 670 671 nelem = PHPOOL_FREELIST_NELEM(idx); 672 snprintf(phpool_names[idx], sizeof(phpool_names[idx]), 673 "phpool-%d", nelem); 674 sz = sizeof(struct pool_item_header); 675 if (nelem) { 676 sz = PR_FREELIST_ALIGN(sz) 677 + nelem * sizeof(pool_item_freelist_t); 678 } 679 pool_init(&phpool[idx], sz, 0, 0, 0, 680 phpool_names[idx], &pool_allocator_meta); 681 } 682 #ifdef POOL_SUBPAGE 683 pool_init(&psppool, POOL_SUBPAGE, POOL_SUBPAGE, 0, 684 PR_RECURSIVE, "psppool", &pool_allocator_meta); 685 #endif 686 pool_init(&pcgpool, sizeof(struct pool_cache_group), 0, 0, 687 0, "pcgpool", &pool_allocator_meta); 688 } 689 690 /* Insert into the list of all pools. */ 691 simple_lock(&pool_head_slock); 692 LIST_INSERT_HEAD(&pool_head, pp, pr_poollist); 693 simple_unlock(&pool_head_slock); 694 695 /* Insert this into the list of pools using this allocator. */ 696 s = splvm(); 697 simple_lock(&palloc->pa_slock); 698 TAILQ_INSERT_TAIL(&palloc->pa_list, pp, pr_alloc_list); 699 simple_unlock(&palloc->pa_slock); 700 splx(s); 701 } 702 703 /* 704 * De-commision a pool resource. 705 */ 706 void 707 pool_destroy(struct pool *pp) 708 { 709 struct pool_pagelist pq; 710 struct pool_item_header *ph; 711 int s; 712 713 /* Remove from global pool list */ 714 simple_lock(&pool_head_slock); 715 LIST_REMOVE(pp, pr_poollist); 716 if (drainpp == pp) 717 drainpp = NULL; 718 simple_unlock(&pool_head_slock); 719 720 /* Remove this pool from its allocator's list of pools. */ 721 s = splvm(); 722 simple_lock(&pp->pr_alloc->pa_slock); 723 TAILQ_REMOVE(&pp->pr_alloc->pa_list, pp, pr_alloc_list); 724 simple_unlock(&pp->pr_alloc->pa_slock); 725 splx(s); 726 727 s = splvm(); 728 simple_lock(&pp->pr_slock); 729 730 KASSERT(LIST_EMPTY(&pp->pr_cachelist)); 731 732 #ifdef DIAGNOSTIC 733 if (pp->pr_nout != 0) { 734 pr_printlog(pp, NULL, printf); 735 panic("pool_destroy: pool busy: still out: %u", 736 pp->pr_nout); 737 } 738 #endif 739 740 KASSERT(LIST_EMPTY(&pp->pr_fullpages)); 741 KASSERT(LIST_EMPTY(&pp->pr_partpages)); 742 743 /* Remove all pages */ 744 LIST_INIT(&pq); 745 while ((ph = LIST_FIRST(&pp->pr_emptypages)) != NULL) 746 pr_rmpage(pp, ph, &pq); 747 748 simple_unlock(&pp->pr_slock); 749 splx(s); 750 751 pr_pagelist_free(pp, &pq); 752 753 #ifdef POOL_DIAGNOSTIC 754 if ((pp->pr_roflags & PR_LOGGING) != 0) 755 free(pp->pr_log, M_TEMP); 756 #endif 757 } 758 759 void 760 pool_set_drain_hook(struct pool *pp, void (*fn)(void *, int), void *arg) 761 { 762 763 /* XXX no locking -- must be used just after pool_init() */ 764 #ifdef DIAGNOSTIC 765 if (pp->pr_drain_hook != NULL) 766 panic("pool_set_drain_hook(%s): already set", pp->pr_wchan); 767 #endif 768 pp->pr_drain_hook = fn; 769 pp->pr_drain_hook_arg = arg; 770 } 771 772 static struct pool_item_header * 773 pool_alloc_item_header(struct pool *pp, caddr_t storage, int flags) 774 { 775 struct pool_item_header *ph; 776 int s; 777 778 LOCK_ASSERT(simple_lock_held(&pp->pr_slock) == 0); 779 780 if ((pp->pr_roflags & PR_PHINPAGE) != 0) 781 ph = (struct pool_item_header *) (storage + pp->pr_phoffset); 782 else { 783 s = splvm(); 784 ph = pool_get(pp->pr_phpool, flags); 785 splx(s); 786 } 787 788 return (ph); 789 } 790 791 /* 792 * Grab an item from the pool; must be called at appropriate spl level 793 */ 794 void * 795 #ifdef POOL_DIAGNOSTIC 796 _pool_get(struct pool *pp, int flags, const char *file, long line) 797 #else 798 pool_get(struct pool *pp, int flags) 799 #endif 800 { 801 struct pool_item *pi; 802 struct pool_item_header *ph; 803 void *v; 804 805 #ifdef DIAGNOSTIC 806 if (__predict_false(pp->pr_itemsperpage == 0)) 807 panic("pool_get: pool %p: pr_itemsperpage is zero, " 808 "pool not initialized?", pp); 809 if (__predict_false(curlwp == NULL && doing_shutdown == 0 && 810 (flags & PR_WAITOK) != 0)) 811 panic("pool_get: %s: must have NOWAIT", pp->pr_wchan); 812 813 #endif /* DIAGNOSTIC */ 814 #ifdef LOCKDEBUG 815 if (flags & PR_WAITOK) 816 simple_lock_only_held(NULL, "pool_get(PR_WAITOK)"); 817 SCHED_ASSERT_UNLOCKED(); 818 #endif 819 820 simple_lock(&pp->pr_slock); 821 pr_enter(pp, file, line); 822 823 startover: 824 /* 825 * Check to see if we've reached the hard limit. If we have, 826 * and we can wait, then wait until an item has been returned to 827 * the pool. 828 */ 829 #ifdef DIAGNOSTIC 830 if (__predict_false(pp->pr_nout > pp->pr_hardlimit)) { 831 pr_leave(pp); 832 simple_unlock(&pp->pr_slock); 833 panic("pool_get: %s: crossed hard limit", pp->pr_wchan); 834 } 835 #endif 836 if (__predict_false(pp->pr_nout == pp->pr_hardlimit)) { 837 if (pp->pr_drain_hook != NULL) { 838 /* 839 * Since the drain hook is going to free things 840 * back to the pool, unlock, call the hook, re-lock, 841 * and check the hardlimit condition again. 842 */ 843 pr_leave(pp); 844 simple_unlock(&pp->pr_slock); 845 (*pp->pr_drain_hook)(pp->pr_drain_hook_arg, flags); 846 simple_lock(&pp->pr_slock); 847 pr_enter(pp, file, line); 848 if (pp->pr_nout < pp->pr_hardlimit) 849 goto startover; 850 } 851 852 if ((flags & PR_WAITOK) && !(flags & PR_LIMITFAIL)) { 853 /* 854 * XXX: A warning isn't logged in this case. Should 855 * it be? 856 */ 857 pp->pr_flags |= PR_WANTED; 858 pr_leave(pp); 859 ltsleep(pp, PSWP, pp->pr_wchan, 0, &pp->pr_slock); 860 pr_enter(pp, file, line); 861 goto startover; 862 } 863 864 /* 865 * Log a message that the hard limit has been hit. 866 */ 867 if (pp->pr_hardlimit_warning != NULL && 868 ratecheck(&pp->pr_hardlimit_warning_last, 869 &pp->pr_hardlimit_ratecap)) 870 log(LOG_ERR, "%s\n", pp->pr_hardlimit_warning); 871 872 pp->pr_nfail++; 873 874 pr_leave(pp); 875 simple_unlock(&pp->pr_slock); 876 return (NULL); 877 } 878 879 /* 880 * The convention we use is that if `curpage' is not NULL, then 881 * it points at a non-empty bucket. In particular, `curpage' 882 * never points at a page header which has PR_PHINPAGE set and 883 * has no items in its bucket. 884 */ 885 if ((ph = pp->pr_curpage) == NULL) { 886 #ifdef DIAGNOSTIC 887 if (pp->pr_nitems != 0) { 888 simple_unlock(&pp->pr_slock); 889 printf("pool_get: %s: curpage NULL, nitems %u\n", 890 pp->pr_wchan, pp->pr_nitems); 891 panic("pool_get: nitems inconsistent"); 892 } 893 #endif 894 895 /* 896 * Call the back-end page allocator for more memory. 897 * Release the pool lock, as the back-end page allocator 898 * may block. 899 */ 900 pr_leave(pp); 901 simple_unlock(&pp->pr_slock); 902 v = pool_allocator_alloc(pp, flags); 903 if (__predict_true(v != NULL)) 904 ph = pool_alloc_item_header(pp, v, flags); 905 906 if (__predict_false(v == NULL || ph == NULL)) { 907 if (v != NULL) 908 pool_allocator_free(pp, v); 909 910 simple_lock(&pp->pr_slock); 911 pr_enter(pp, file, line); 912 913 /* 914 * We were unable to allocate a page or item 915 * header, but we released the lock during 916 * allocation, so perhaps items were freed 917 * back to the pool. Check for this case. 918 */ 919 if (pp->pr_curpage != NULL) 920 goto startover; 921 922 if ((flags & PR_WAITOK) == 0) { 923 pp->pr_nfail++; 924 pr_leave(pp); 925 simple_unlock(&pp->pr_slock); 926 return (NULL); 927 } 928 929 /* 930 * Wait for items to be returned to this pool. 931 * 932 * wake up once a second and try again, 933 * as the check in pool_cache_put_paddr() is racy. 934 */ 935 pp->pr_flags |= PR_WANTED; 936 /* PA_WANTED is already set on the allocator. */ 937 pr_leave(pp); 938 ltsleep(pp, PSWP, pp->pr_wchan, hz, &pp->pr_slock); 939 pr_enter(pp, file, line); 940 goto startover; 941 } 942 943 /* We have more memory; add it to the pool */ 944 simple_lock(&pp->pr_slock); 945 pr_enter(pp, file, line); 946 pool_prime_page(pp, v, ph); 947 pp->pr_npagealloc++; 948 949 /* Start the allocation process over. */ 950 goto startover; 951 } 952 if (pp->pr_roflags & PR_NOTOUCH) { 953 #ifdef DIAGNOSTIC 954 if (__predict_false(ph->ph_nmissing == pp->pr_itemsperpage)) { 955 pr_leave(pp); 956 simple_unlock(&pp->pr_slock); 957 panic("pool_get: %s: page empty", pp->pr_wchan); 958 } 959 #endif 960 v = pr_item_notouch_get(pp, ph); 961 #ifdef POOL_DIAGNOSTIC 962 pr_log(pp, v, PRLOG_GET, file, line); 963 #endif 964 } else { 965 v = pi = LIST_FIRST(&ph->ph_itemlist); 966 if (__predict_false(v == NULL)) { 967 pr_leave(pp); 968 simple_unlock(&pp->pr_slock); 969 panic("pool_get: %s: page empty", pp->pr_wchan); 970 } 971 #ifdef DIAGNOSTIC 972 if (__predict_false(pp->pr_nitems == 0)) { 973 pr_leave(pp); 974 simple_unlock(&pp->pr_slock); 975 printf("pool_get: %s: items on itemlist, nitems %u\n", 976 pp->pr_wchan, pp->pr_nitems); 977 panic("pool_get: nitems inconsistent"); 978 } 979 #endif 980 981 #ifdef POOL_DIAGNOSTIC 982 pr_log(pp, v, PRLOG_GET, file, line); 983 #endif 984 985 #ifdef DIAGNOSTIC 986 if (__predict_false(pi->pi_magic != PI_MAGIC)) { 987 pr_printlog(pp, pi, printf); 988 panic("pool_get(%s): free list modified: " 989 "magic=%x; page %p; item addr %p\n", 990 pp->pr_wchan, pi->pi_magic, ph->ph_page, pi); 991 } 992 #endif 993 994 /* 995 * Remove from item list. 996 */ 997 LIST_REMOVE(pi, pi_list); 998 } 999 pp->pr_nitems--; 1000 pp->pr_nout++; 1001 if (ph->ph_nmissing == 0) { 1002 #ifdef DIAGNOSTIC 1003 if (__predict_false(pp->pr_nidle == 0)) 1004 panic("pool_get: nidle inconsistent"); 1005 #endif 1006 pp->pr_nidle--; 1007 1008 /* 1009 * This page was previously empty. Move it to the list of 1010 * partially-full pages. This page is already curpage. 1011 */ 1012 LIST_REMOVE(ph, ph_pagelist); 1013 LIST_INSERT_HEAD(&pp->pr_partpages, ph, ph_pagelist); 1014 } 1015 ph->ph_nmissing++; 1016 if (ph->ph_nmissing == pp->pr_itemsperpage) { 1017 #ifdef DIAGNOSTIC 1018 if (__predict_false((pp->pr_roflags & PR_NOTOUCH) == 0 && 1019 !LIST_EMPTY(&ph->ph_itemlist))) { 1020 pr_leave(pp); 1021 simple_unlock(&pp->pr_slock); 1022 panic("pool_get: %s: nmissing inconsistent", 1023 pp->pr_wchan); 1024 } 1025 #endif 1026 /* 1027 * This page is now full. Move it to the full list 1028 * and select a new current page. 1029 */ 1030 LIST_REMOVE(ph, ph_pagelist); 1031 LIST_INSERT_HEAD(&pp->pr_fullpages, ph, ph_pagelist); 1032 pool_update_curpage(pp); 1033 } 1034 1035 pp->pr_nget++; 1036 1037 /* 1038 * If we have a low water mark and we are now below that low 1039 * water mark, add more items to the pool. 1040 */ 1041 if (POOL_NEEDS_CATCHUP(pp) && pool_catchup(pp) != 0) { 1042 /* 1043 * XXX: Should we log a warning? Should we set up a timeout 1044 * to try again in a second or so? The latter could break 1045 * a caller's assumptions about interrupt protection, etc. 1046 */ 1047 } 1048 1049 pr_leave(pp); 1050 simple_unlock(&pp->pr_slock); 1051 return (v); 1052 } 1053 1054 /* 1055 * Internal version of pool_put(). Pool is already locked/entered. 1056 */ 1057 static void 1058 pool_do_put(struct pool *pp, void *v, struct pool_pagelist *pq) 1059 { 1060 struct pool_item *pi = v; 1061 struct pool_item_header *ph; 1062 caddr_t page; 1063 int s; 1064 1065 LOCK_ASSERT(simple_lock_held(&pp->pr_slock)); 1066 SCHED_ASSERT_UNLOCKED(); 1067 1068 page = (caddr_t)((u_long)v & pp->pr_alloc->pa_pagemask); 1069 1070 #ifdef DIAGNOSTIC 1071 if (__predict_false(pp->pr_nout == 0)) { 1072 printf("pool %s: putting with none out\n", 1073 pp->pr_wchan); 1074 panic("pool_put"); 1075 } 1076 #endif 1077 1078 if (__predict_false((ph = pr_find_pagehead(pp, page)) == NULL)) { 1079 pr_printlog(pp, NULL, printf); 1080 panic("pool_put: %s: page header missing", pp->pr_wchan); 1081 } 1082 1083 #ifdef LOCKDEBUG 1084 /* 1085 * Check if we're freeing a locked simple lock. 1086 */ 1087 simple_lock_freecheck((caddr_t)pi, ((caddr_t)pi) + pp->pr_size); 1088 #endif 1089 1090 /* 1091 * Return to item list. 1092 */ 1093 if (pp->pr_roflags & PR_NOTOUCH) { 1094 pr_item_notouch_put(pp, ph, v); 1095 } else { 1096 #ifdef DIAGNOSTIC 1097 pi->pi_magic = PI_MAGIC; 1098 #endif 1099 #ifdef DEBUG 1100 { 1101 int i, *ip = v; 1102 1103 for (i = 0; i < pp->pr_size / sizeof(int); i++) { 1104 *ip++ = PI_MAGIC; 1105 } 1106 } 1107 #endif 1108 1109 LIST_INSERT_HEAD(&ph->ph_itemlist, pi, pi_list); 1110 } 1111 KDASSERT(ph->ph_nmissing != 0); 1112 ph->ph_nmissing--; 1113 pp->pr_nput++; 1114 pp->pr_nitems++; 1115 pp->pr_nout--; 1116 1117 /* Cancel "pool empty" condition if it exists */ 1118 if (pp->pr_curpage == NULL) 1119 pp->pr_curpage = ph; 1120 1121 if (pp->pr_flags & PR_WANTED) { 1122 pp->pr_flags &= ~PR_WANTED; 1123 if (ph->ph_nmissing == 0) 1124 pp->pr_nidle++; 1125 wakeup((caddr_t)pp); 1126 return; 1127 } 1128 1129 /* 1130 * If this page is now empty, do one of two things: 1131 * 1132 * (1) If we have more pages than the page high water mark, 1133 * free the page back to the system. ONLY CONSIDER 1134 * FREEING BACK A PAGE IF WE HAVE MORE THAN OUR MINIMUM PAGE 1135 * CLAIM. 1136 * 1137 * (2) Otherwise, move the page to the empty page list. 1138 * 1139 * Either way, select a new current page (so we use a partially-full 1140 * page if one is available). 1141 */ 1142 if (ph->ph_nmissing == 0) { 1143 pp->pr_nidle++; 1144 if (pp->pr_npages > pp->pr_minpages && 1145 (pp->pr_npages > pp->pr_maxpages || 1146 (pp->pr_alloc->pa_flags & PA_WANT) != 0)) { 1147 pr_rmpage(pp, ph, pq); 1148 } else { 1149 LIST_REMOVE(ph, ph_pagelist); 1150 LIST_INSERT_HEAD(&pp->pr_emptypages, ph, ph_pagelist); 1151 1152 /* 1153 * Update the timestamp on the page. A page must 1154 * be idle for some period of time before it can 1155 * be reclaimed by the pagedaemon. This minimizes 1156 * ping-pong'ing for memory. 1157 */ 1158 s = splclock(); 1159 ph->ph_time = mono_time; 1160 splx(s); 1161 } 1162 pool_update_curpage(pp); 1163 } 1164 1165 /* 1166 * If the page was previously completely full, move it to the 1167 * partially-full list and make it the current page. The next 1168 * allocation will get the item from this page, instead of 1169 * further fragmenting the pool. 1170 */ 1171 else if (ph->ph_nmissing == (pp->pr_itemsperpage - 1)) { 1172 LIST_REMOVE(ph, ph_pagelist); 1173 LIST_INSERT_HEAD(&pp->pr_partpages, ph, ph_pagelist); 1174 pp->pr_curpage = ph; 1175 } 1176 } 1177 1178 /* 1179 * Return resource to the pool; must be called at appropriate spl level 1180 */ 1181 #ifdef POOL_DIAGNOSTIC 1182 void 1183 _pool_put(struct pool *pp, void *v, const char *file, long line) 1184 { 1185 struct pool_pagelist pq; 1186 1187 LIST_INIT(&pq); 1188 1189 simple_lock(&pp->pr_slock); 1190 pr_enter(pp, file, line); 1191 1192 pr_log(pp, v, PRLOG_PUT, file, line); 1193 1194 pool_do_put(pp, v, &pq); 1195 1196 pr_leave(pp); 1197 simple_unlock(&pp->pr_slock); 1198 1199 pr_pagelist_free(pp, &pq); 1200 } 1201 #undef pool_put 1202 #endif /* POOL_DIAGNOSTIC */ 1203 1204 void 1205 pool_put(struct pool *pp, void *v) 1206 { 1207 struct pool_pagelist pq; 1208 1209 LIST_INIT(&pq); 1210 1211 simple_lock(&pp->pr_slock); 1212 pool_do_put(pp, v, &pq); 1213 simple_unlock(&pp->pr_slock); 1214 1215 pr_pagelist_free(pp, &pq); 1216 } 1217 1218 #ifdef POOL_DIAGNOSTIC 1219 #define pool_put(h, v) _pool_put((h), (v), __FILE__, __LINE__) 1220 #endif 1221 1222 /* 1223 * Add N items to the pool. 1224 */ 1225 int 1226 pool_prime(struct pool *pp, int n) 1227 { 1228 struct pool_item_header *ph = NULL; 1229 caddr_t cp; 1230 int newpages; 1231 1232 simple_lock(&pp->pr_slock); 1233 1234 newpages = roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 1235 1236 while (newpages-- > 0) { 1237 simple_unlock(&pp->pr_slock); 1238 cp = pool_allocator_alloc(pp, PR_NOWAIT); 1239 if (__predict_true(cp != NULL)) 1240 ph = pool_alloc_item_header(pp, cp, PR_NOWAIT); 1241 1242 if (__predict_false(cp == NULL || ph == NULL)) { 1243 if (cp != NULL) 1244 pool_allocator_free(pp, cp); 1245 simple_lock(&pp->pr_slock); 1246 break; 1247 } 1248 1249 simple_lock(&pp->pr_slock); 1250 pool_prime_page(pp, cp, ph); 1251 pp->pr_npagealloc++; 1252 pp->pr_minpages++; 1253 } 1254 1255 if (pp->pr_minpages >= pp->pr_maxpages) 1256 pp->pr_maxpages = pp->pr_minpages + 1; /* XXX */ 1257 1258 simple_unlock(&pp->pr_slock); 1259 return (0); 1260 } 1261 1262 /* 1263 * Add a page worth of items to the pool. 1264 * 1265 * Note, we must be called with the pool descriptor LOCKED. 1266 */ 1267 static void 1268 pool_prime_page(struct pool *pp, caddr_t storage, struct pool_item_header *ph) 1269 { 1270 struct pool_item *pi; 1271 caddr_t cp = storage; 1272 unsigned int align = pp->pr_align; 1273 unsigned int ioff = pp->pr_itemoffset; 1274 int n; 1275 int s; 1276 1277 LOCK_ASSERT(simple_lock_held(&pp->pr_slock)); 1278 1279 #ifdef DIAGNOSTIC 1280 if (((u_long)cp & (pp->pr_alloc->pa_pagesz - 1)) != 0) 1281 panic("pool_prime_page: %s: unaligned page", pp->pr_wchan); 1282 #endif 1283 1284 /* 1285 * Insert page header. 1286 */ 1287 LIST_INSERT_HEAD(&pp->pr_emptypages, ph, ph_pagelist); 1288 LIST_INIT(&ph->ph_itemlist); 1289 ph->ph_page = storage; 1290 ph->ph_nmissing = 0; 1291 s = splclock(); 1292 ph->ph_time = mono_time; 1293 splx(s); 1294 if ((pp->pr_roflags & PR_PHINPAGE) == 0) 1295 SPLAY_INSERT(phtree, &pp->pr_phtree, ph); 1296 1297 pp->pr_nidle++; 1298 1299 /* 1300 * Color this page. 1301 */ 1302 cp = (caddr_t)(cp + pp->pr_curcolor); 1303 if ((pp->pr_curcolor += align) > pp->pr_maxcolor) 1304 pp->pr_curcolor = 0; 1305 1306 /* 1307 * Adjust storage to apply aligment to `pr_itemoffset' in each item. 1308 */ 1309 if (ioff != 0) 1310 cp = (caddr_t)(cp + (align - ioff)); 1311 1312 /* 1313 * Insert remaining chunks on the bucket list. 1314 */ 1315 n = pp->pr_itemsperpage; 1316 pp->pr_nitems += n; 1317 1318 if (pp->pr_roflags & PR_NOTOUCH) { 1319 pool_item_freelist_t *freelist = PR_FREELIST(ph); 1320 int i; 1321 1322 ph->ph_off = cp - storage; 1323 ph->ph_firstfree = 0; 1324 for (i = 0; i < n - 1; i++) 1325 freelist[i] = i + 1; 1326 freelist[n - 1] = PR_INDEX_EOL; 1327 } else { 1328 while (n--) { 1329 pi = (struct pool_item *)cp; 1330 1331 KASSERT(((((vaddr_t)pi) + ioff) & (align - 1)) == 0); 1332 1333 /* Insert on page list */ 1334 LIST_INSERT_HEAD(&ph->ph_itemlist, pi, pi_list); 1335 #ifdef DIAGNOSTIC 1336 pi->pi_magic = PI_MAGIC; 1337 #endif 1338 cp = (caddr_t)(cp + pp->pr_size); 1339 } 1340 } 1341 1342 /* 1343 * If the pool was depleted, point at the new page. 1344 */ 1345 if (pp->pr_curpage == NULL) 1346 pp->pr_curpage = ph; 1347 1348 if (++pp->pr_npages > pp->pr_hiwat) 1349 pp->pr_hiwat = pp->pr_npages; 1350 } 1351 1352 /* 1353 * Used by pool_get() when nitems drops below the low water mark. This 1354 * is used to catch up pr_nitems with the low water mark. 1355 * 1356 * Note 1, we never wait for memory here, we let the caller decide what to do. 1357 * 1358 * Note 2, we must be called with the pool already locked, and we return 1359 * with it locked. 1360 */ 1361 static int 1362 pool_catchup(struct pool *pp) 1363 { 1364 struct pool_item_header *ph = NULL; 1365 caddr_t cp; 1366 int error = 0; 1367 1368 while (POOL_NEEDS_CATCHUP(pp)) { 1369 /* 1370 * Call the page back-end allocator for more memory. 1371 * 1372 * XXX: We never wait, so should we bother unlocking 1373 * the pool descriptor? 1374 */ 1375 simple_unlock(&pp->pr_slock); 1376 cp = pool_allocator_alloc(pp, PR_NOWAIT); 1377 if (__predict_true(cp != NULL)) 1378 ph = pool_alloc_item_header(pp, cp, PR_NOWAIT); 1379 if (__predict_false(cp == NULL || ph == NULL)) { 1380 if (cp != NULL) 1381 pool_allocator_free(pp, cp); 1382 error = ENOMEM; 1383 simple_lock(&pp->pr_slock); 1384 break; 1385 } 1386 simple_lock(&pp->pr_slock); 1387 pool_prime_page(pp, cp, ph); 1388 pp->pr_npagealloc++; 1389 } 1390 1391 return (error); 1392 } 1393 1394 static void 1395 pool_update_curpage(struct pool *pp) 1396 { 1397 1398 pp->pr_curpage = LIST_FIRST(&pp->pr_partpages); 1399 if (pp->pr_curpage == NULL) { 1400 pp->pr_curpage = LIST_FIRST(&pp->pr_emptypages); 1401 } 1402 } 1403 1404 void 1405 pool_setlowat(struct pool *pp, int n) 1406 { 1407 1408 simple_lock(&pp->pr_slock); 1409 1410 pp->pr_minitems = n; 1411 pp->pr_minpages = (n == 0) 1412 ? 0 1413 : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 1414 1415 /* Make sure we're caught up with the newly-set low water mark. */ 1416 if (POOL_NEEDS_CATCHUP(pp) && pool_catchup(pp) != 0) { 1417 /* 1418 * XXX: Should we log a warning? Should we set up a timeout 1419 * to try again in a second or so? The latter could break 1420 * a caller's assumptions about interrupt protection, etc. 1421 */ 1422 } 1423 1424 simple_unlock(&pp->pr_slock); 1425 } 1426 1427 void 1428 pool_sethiwat(struct pool *pp, int n) 1429 { 1430 1431 simple_lock(&pp->pr_slock); 1432 1433 pp->pr_maxpages = (n == 0) 1434 ? 0 1435 : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 1436 1437 simple_unlock(&pp->pr_slock); 1438 } 1439 1440 void 1441 pool_sethardlimit(struct pool *pp, int n, const char *warnmess, int ratecap) 1442 { 1443 1444 simple_lock(&pp->pr_slock); 1445 1446 pp->pr_hardlimit = n; 1447 pp->pr_hardlimit_warning = warnmess; 1448 pp->pr_hardlimit_ratecap.tv_sec = ratecap; 1449 pp->pr_hardlimit_warning_last.tv_sec = 0; 1450 pp->pr_hardlimit_warning_last.tv_usec = 0; 1451 1452 /* 1453 * In-line version of pool_sethiwat(), because we don't want to 1454 * release the lock. 1455 */ 1456 pp->pr_maxpages = (n == 0) 1457 ? 0 1458 : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 1459 1460 simple_unlock(&pp->pr_slock); 1461 } 1462 1463 /* 1464 * Release all complete pages that have not been used recently. 1465 */ 1466 int 1467 #ifdef POOL_DIAGNOSTIC 1468 _pool_reclaim(struct pool *pp, const char *file, long line) 1469 #else 1470 pool_reclaim(struct pool *pp) 1471 #endif 1472 { 1473 struct pool_item_header *ph, *phnext; 1474 struct pool_cache *pc; 1475 struct pool_pagelist pq; 1476 struct pool_cache_grouplist pcgl; 1477 struct timeval curtime, diff; 1478 int s; 1479 1480 if (pp->pr_drain_hook != NULL) { 1481 /* 1482 * The drain hook must be called with the pool unlocked. 1483 */ 1484 (*pp->pr_drain_hook)(pp->pr_drain_hook_arg, PR_NOWAIT); 1485 } 1486 1487 if (simple_lock_try(&pp->pr_slock) == 0) 1488 return (0); 1489 pr_enter(pp, file, line); 1490 1491 LIST_INIT(&pq); 1492 LIST_INIT(&pcgl); 1493 1494 /* 1495 * Reclaim items from the pool's caches. 1496 */ 1497 LIST_FOREACH(pc, &pp->pr_cachelist, pc_poollist) 1498 pool_cache_reclaim(pc, &pq, &pcgl); 1499 1500 s = splclock(); 1501 curtime = mono_time; 1502 splx(s); 1503 1504 for (ph = LIST_FIRST(&pp->pr_emptypages); ph != NULL; ph = phnext) { 1505 phnext = LIST_NEXT(ph, ph_pagelist); 1506 1507 /* Check our minimum page claim */ 1508 if (pp->pr_npages <= pp->pr_minpages) 1509 break; 1510 1511 KASSERT(ph->ph_nmissing == 0); 1512 timersub(&curtime, &ph->ph_time, &diff); 1513 if (diff.tv_sec < pool_inactive_time) 1514 continue; 1515 1516 /* 1517 * If freeing this page would put us below 1518 * the low water mark, stop now. 1519 */ 1520 if ((pp->pr_nitems - pp->pr_itemsperpage) < 1521 pp->pr_minitems) 1522 break; 1523 1524 pr_rmpage(pp, ph, &pq); 1525 } 1526 1527 pr_leave(pp); 1528 simple_unlock(&pp->pr_slock); 1529 if (LIST_EMPTY(&pq) && LIST_EMPTY(&pcgl)) 1530 return 0; 1531 1532 pr_pagelist_free(pp, &pq); 1533 pcg_grouplist_free(&pcgl); 1534 return (1); 1535 } 1536 1537 /* 1538 * Drain pools, one at a time. 1539 * 1540 * Note, we must never be called from an interrupt context. 1541 */ 1542 void 1543 pool_drain(void *arg) 1544 { 1545 struct pool *pp; 1546 int s; 1547 1548 pp = NULL; 1549 s = splvm(); 1550 simple_lock(&pool_head_slock); 1551 if (drainpp == NULL) { 1552 drainpp = LIST_FIRST(&pool_head); 1553 } 1554 if (drainpp) { 1555 pp = drainpp; 1556 drainpp = LIST_NEXT(pp, pr_poollist); 1557 } 1558 simple_unlock(&pool_head_slock); 1559 pool_reclaim(pp); 1560 splx(s); 1561 } 1562 1563 /* 1564 * Diagnostic helpers. 1565 */ 1566 void 1567 pool_print(struct pool *pp, const char *modif) 1568 { 1569 int s; 1570 1571 s = splvm(); 1572 if (simple_lock_try(&pp->pr_slock) == 0) { 1573 printf("pool %s is locked; try again later\n", 1574 pp->pr_wchan); 1575 splx(s); 1576 return; 1577 } 1578 pool_print1(pp, modif, printf); 1579 simple_unlock(&pp->pr_slock); 1580 splx(s); 1581 } 1582 1583 void 1584 pool_printall(const char *modif, void (*pr)(const char *, ...)) 1585 { 1586 struct pool *pp; 1587 1588 if (simple_lock_try(&pool_head_slock) == 0) { 1589 (*pr)("WARNING: pool_head_slock is locked\n"); 1590 } else { 1591 simple_unlock(&pool_head_slock); 1592 } 1593 1594 LIST_FOREACH(pp, &pool_head, pr_poollist) { 1595 pool_printit(pp, modif, pr); 1596 } 1597 } 1598 1599 void 1600 pool_printit(struct pool *pp, const char *modif, void (*pr)(const char *, ...)) 1601 { 1602 1603 if (pp == NULL) { 1604 (*pr)("Must specify a pool to print.\n"); 1605 return; 1606 } 1607 1608 /* 1609 * Called from DDB; interrupts should be blocked, and all 1610 * other processors should be paused. We can skip locking 1611 * the pool in this case. 1612 * 1613 * We do a simple_lock_try() just to print the lock 1614 * status, however. 1615 */ 1616 1617 if (simple_lock_try(&pp->pr_slock) == 0) 1618 (*pr)("WARNING: pool %s is locked\n", pp->pr_wchan); 1619 else 1620 simple_unlock(&pp->pr_slock); 1621 1622 pool_print1(pp, modif, pr); 1623 } 1624 1625 static void 1626 pool_print_pagelist(struct pool *pp, struct pool_pagelist *pl, 1627 void (*pr)(const char *, ...)) 1628 { 1629 struct pool_item_header *ph; 1630 #ifdef DIAGNOSTIC 1631 struct pool_item *pi; 1632 #endif 1633 1634 LIST_FOREACH(ph, pl, ph_pagelist) { 1635 (*pr)("\t\tpage %p, nmissing %d, time %lu,%lu\n", 1636 ph->ph_page, ph->ph_nmissing, 1637 (u_long)ph->ph_time.tv_sec, 1638 (u_long)ph->ph_time.tv_usec); 1639 #ifdef DIAGNOSTIC 1640 if (!(pp->pr_roflags & PR_NOTOUCH)) { 1641 LIST_FOREACH(pi, &ph->ph_itemlist, pi_list) { 1642 if (pi->pi_magic != PI_MAGIC) { 1643 (*pr)("\t\t\titem %p, magic 0x%x\n", 1644 pi, pi->pi_magic); 1645 } 1646 } 1647 } 1648 #endif 1649 } 1650 } 1651 1652 static void 1653 pool_print1(struct pool *pp, const char *modif, void (*pr)(const char *, ...)) 1654 { 1655 struct pool_item_header *ph; 1656 struct pool_cache *pc; 1657 struct pool_cache_group *pcg; 1658 int i, print_log = 0, print_pagelist = 0, print_cache = 0; 1659 char c; 1660 1661 while ((c = *modif++) != '\0') { 1662 if (c == 'l') 1663 print_log = 1; 1664 if (c == 'p') 1665 print_pagelist = 1; 1666 if (c == 'c') 1667 print_cache = 1; 1668 } 1669 1670 (*pr)("POOL %s: size %u, align %u, ioff %u, roflags 0x%08x\n", 1671 pp->pr_wchan, pp->pr_size, pp->pr_align, pp->pr_itemoffset, 1672 pp->pr_roflags); 1673 (*pr)("\talloc %p\n", pp->pr_alloc); 1674 (*pr)("\tminitems %u, minpages %u, maxpages %u, npages %u\n", 1675 pp->pr_minitems, pp->pr_minpages, pp->pr_maxpages, pp->pr_npages); 1676 (*pr)("\titemsperpage %u, nitems %u, nout %u, hardlimit %u\n", 1677 pp->pr_itemsperpage, pp->pr_nitems, pp->pr_nout, pp->pr_hardlimit); 1678 1679 (*pr)("\n\tnget %lu, nfail %lu, nput %lu\n", 1680 pp->pr_nget, pp->pr_nfail, pp->pr_nput); 1681 (*pr)("\tnpagealloc %lu, npagefree %lu, hiwat %u, nidle %lu\n", 1682 pp->pr_npagealloc, pp->pr_npagefree, pp->pr_hiwat, pp->pr_nidle); 1683 1684 if (print_pagelist == 0) 1685 goto skip_pagelist; 1686 1687 if ((ph = LIST_FIRST(&pp->pr_emptypages)) != NULL) 1688 (*pr)("\n\tempty page list:\n"); 1689 pool_print_pagelist(pp, &pp->pr_emptypages, pr); 1690 if ((ph = LIST_FIRST(&pp->pr_fullpages)) != NULL) 1691 (*pr)("\n\tfull page list:\n"); 1692 pool_print_pagelist(pp, &pp->pr_fullpages, pr); 1693 if ((ph = LIST_FIRST(&pp->pr_partpages)) != NULL) 1694 (*pr)("\n\tpartial-page list:\n"); 1695 pool_print_pagelist(pp, &pp->pr_partpages, pr); 1696 1697 if (pp->pr_curpage == NULL) 1698 (*pr)("\tno current page\n"); 1699 else 1700 (*pr)("\tcurpage %p\n", pp->pr_curpage->ph_page); 1701 1702 skip_pagelist: 1703 if (print_log == 0) 1704 goto skip_log; 1705 1706 (*pr)("\n"); 1707 if ((pp->pr_roflags & PR_LOGGING) == 0) 1708 (*pr)("\tno log\n"); 1709 else 1710 pr_printlog(pp, NULL, pr); 1711 1712 skip_log: 1713 if (print_cache == 0) 1714 goto skip_cache; 1715 1716 #define PR_GROUPLIST(pcg) \ 1717 (*pr)("\t\tgroup %p: avail %d\n", pcg, pcg->pcg_avail); \ 1718 for (i = 0; i < PCG_NOBJECTS; i++) { \ 1719 if (pcg->pcg_objects[i].pcgo_pa != \ 1720 POOL_PADDR_INVALID) { \ 1721 (*pr)("\t\t\t%p, 0x%llx\n", \ 1722 pcg->pcg_objects[i].pcgo_va, \ 1723 (unsigned long long) \ 1724 pcg->pcg_objects[i].pcgo_pa); \ 1725 } else { \ 1726 (*pr)("\t\t\t%p\n", \ 1727 pcg->pcg_objects[i].pcgo_va); \ 1728 } \ 1729 } 1730 1731 LIST_FOREACH(pc, &pp->pr_cachelist, pc_poollist) { 1732 (*pr)("\tcache %p\n", pc); 1733 (*pr)("\t hits %lu misses %lu ngroups %lu nitems %lu\n", 1734 pc->pc_hits, pc->pc_misses, pc->pc_ngroups, pc->pc_nitems); 1735 (*pr)("\t full groups:\n"); 1736 LIST_FOREACH(pcg, &pc->pc_fullgroups, pcg_list) { 1737 PR_GROUPLIST(pcg); 1738 } 1739 (*pr)("\t partial groups:\n"); 1740 LIST_FOREACH(pcg, &pc->pc_partgroups, pcg_list) { 1741 PR_GROUPLIST(pcg); 1742 } 1743 (*pr)("\t empty groups:\n"); 1744 LIST_FOREACH(pcg, &pc->pc_emptygroups, pcg_list) { 1745 PR_GROUPLIST(pcg); 1746 } 1747 } 1748 #undef PR_GROUPLIST 1749 1750 skip_cache: 1751 pr_enter_check(pp, pr); 1752 } 1753 1754 static int 1755 pool_chk_page(struct pool *pp, const char *label, struct pool_item_header *ph) 1756 { 1757 struct pool_item *pi; 1758 caddr_t page; 1759 int n; 1760 1761 page = (caddr_t)((u_long)ph & pp->pr_alloc->pa_pagemask); 1762 if (page != ph->ph_page && 1763 (pp->pr_roflags & PR_PHINPAGE) != 0) { 1764 if (label != NULL) 1765 printf("%s: ", label); 1766 printf("pool(%p:%s): page inconsistency: page %p;" 1767 " at page head addr %p (p %p)\n", pp, 1768 pp->pr_wchan, ph->ph_page, 1769 ph, page); 1770 return 1; 1771 } 1772 1773 if ((pp->pr_roflags & PR_NOTOUCH) != 0) 1774 return 0; 1775 1776 for (pi = LIST_FIRST(&ph->ph_itemlist), n = 0; 1777 pi != NULL; 1778 pi = LIST_NEXT(pi,pi_list), n++) { 1779 1780 #ifdef DIAGNOSTIC 1781 if (pi->pi_magic != PI_MAGIC) { 1782 if (label != NULL) 1783 printf("%s: ", label); 1784 printf("pool(%s): free list modified: magic=%x;" 1785 " page %p; item ordinal %d;" 1786 " addr %p (p %p)\n", 1787 pp->pr_wchan, pi->pi_magic, ph->ph_page, 1788 n, pi, page); 1789 panic("pool"); 1790 } 1791 #endif 1792 page = 1793 (caddr_t)((u_long)pi & pp->pr_alloc->pa_pagemask); 1794 if (page == ph->ph_page) 1795 continue; 1796 1797 if (label != NULL) 1798 printf("%s: ", label); 1799 printf("pool(%p:%s): page inconsistency: page %p;" 1800 " item ordinal %d; addr %p (p %p)\n", pp, 1801 pp->pr_wchan, ph->ph_page, 1802 n, pi, page); 1803 return 1; 1804 } 1805 return 0; 1806 } 1807 1808 1809 int 1810 pool_chk(struct pool *pp, const char *label) 1811 { 1812 struct pool_item_header *ph; 1813 int r = 0; 1814 1815 simple_lock(&pp->pr_slock); 1816 LIST_FOREACH(ph, &pp->pr_emptypages, ph_pagelist) { 1817 r = pool_chk_page(pp, label, ph); 1818 if (r) { 1819 goto out; 1820 } 1821 } 1822 LIST_FOREACH(ph, &pp->pr_fullpages, ph_pagelist) { 1823 r = pool_chk_page(pp, label, ph); 1824 if (r) { 1825 goto out; 1826 } 1827 } 1828 LIST_FOREACH(ph, &pp->pr_partpages, ph_pagelist) { 1829 r = pool_chk_page(pp, label, ph); 1830 if (r) { 1831 goto out; 1832 } 1833 } 1834 1835 out: 1836 simple_unlock(&pp->pr_slock); 1837 return (r); 1838 } 1839 1840 /* 1841 * pool_cache_init: 1842 * 1843 * Initialize a pool cache. 1844 * 1845 * NOTE: If the pool must be protected from interrupts, we expect 1846 * to be called at the appropriate interrupt priority level. 1847 */ 1848 void 1849 pool_cache_init(struct pool_cache *pc, struct pool *pp, 1850 int (*ctor)(void *, void *, int), 1851 void (*dtor)(void *, void *), 1852 void *arg) 1853 { 1854 1855 LIST_INIT(&pc->pc_emptygroups); 1856 LIST_INIT(&pc->pc_fullgroups); 1857 LIST_INIT(&pc->pc_partgroups); 1858 simple_lock_init(&pc->pc_slock); 1859 1860 pc->pc_pool = pp; 1861 1862 pc->pc_ctor = ctor; 1863 pc->pc_dtor = dtor; 1864 pc->pc_arg = arg; 1865 1866 pc->pc_hits = 0; 1867 pc->pc_misses = 0; 1868 1869 pc->pc_ngroups = 0; 1870 1871 pc->pc_nitems = 0; 1872 1873 simple_lock(&pp->pr_slock); 1874 LIST_INSERT_HEAD(&pp->pr_cachelist, pc, pc_poollist); 1875 simple_unlock(&pp->pr_slock); 1876 } 1877 1878 /* 1879 * pool_cache_destroy: 1880 * 1881 * Destroy a pool cache. 1882 */ 1883 void 1884 pool_cache_destroy(struct pool_cache *pc) 1885 { 1886 struct pool *pp = pc->pc_pool; 1887 1888 /* First, invalidate the entire cache. */ 1889 pool_cache_invalidate(pc); 1890 1891 /* ...and remove it from the pool's cache list. */ 1892 simple_lock(&pp->pr_slock); 1893 LIST_REMOVE(pc, pc_poollist); 1894 simple_unlock(&pp->pr_slock); 1895 } 1896 1897 static inline void * 1898 pcg_get(struct pool_cache_group *pcg, paddr_t *pap) 1899 { 1900 void *object; 1901 u_int idx; 1902 1903 KASSERT(pcg->pcg_avail <= PCG_NOBJECTS); 1904 KASSERT(pcg->pcg_avail != 0); 1905 idx = --pcg->pcg_avail; 1906 1907 KASSERT(pcg->pcg_objects[idx].pcgo_va != NULL); 1908 object = pcg->pcg_objects[idx].pcgo_va; 1909 if (pap != NULL) 1910 *pap = pcg->pcg_objects[idx].pcgo_pa; 1911 pcg->pcg_objects[idx].pcgo_va = NULL; 1912 1913 return (object); 1914 } 1915 1916 static inline void 1917 pcg_put(struct pool_cache_group *pcg, void *object, paddr_t pa) 1918 { 1919 u_int idx; 1920 1921 KASSERT(pcg->pcg_avail < PCG_NOBJECTS); 1922 idx = pcg->pcg_avail++; 1923 1924 KASSERT(pcg->pcg_objects[idx].pcgo_va == NULL); 1925 pcg->pcg_objects[idx].pcgo_va = object; 1926 pcg->pcg_objects[idx].pcgo_pa = pa; 1927 } 1928 1929 static void 1930 pcg_grouplist_free(struct pool_cache_grouplist *pcgl) 1931 { 1932 struct pool_cache_group *pcg; 1933 int s; 1934 1935 s = splvm(); 1936 while ((pcg = LIST_FIRST(pcgl)) != NULL) { 1937 LIST_REMOVE(pcg, pcg_list); 1938 pool_put(&pcgpool, pcg); 1939 } 1940 splx(s); 1941 } 1942 1943 /* 1944 * pool_cache_get{,_paddr}: 1945 * 1946 * Get an object from a pool cache (optionally returning 1947 * the physical address of the object). 1948 */ 1949 void * 1950 pool_cache_get_paddr(struct pool_cache *pc, int flags, paddr_t *pap) 1951 { 1952 struct pool_cache_group *pcg; 1953 void *object; 1954 1955 #ifdef LOCKDEBUG 1956 if (flags & PR_WAITOK) 1957 simple_lock_only_held(NULL, "pool_cache_get(PR_WAITOK)"); 1958 #endif 1959 1960 simple_lock(&pc->pc_slock); 1961 1962 pcg = LIST_FIRST(&pc->pc_partgroups); 1963 if (pcg == NULL) { 1964 pcg = LIST_FIRST(&pc->pc_fullgroups); 1965 if (pcg != NULL) { 1966 LIST_REMOVE(pcg, pcg_list); 1967 LIST_INSERT_HEAD(&pc->pc_partgroups, pcg, pcg_list); 1968 } 1969 } 1970 if (pcg == NULL) { 1971 1972 /* 1973 * No groups with any available objects. Allocate 1974 * a new object, construct it, and return it to 1975 * the caller. We will allocate a group, if necessary, 1976 * when the object is freed back to the cache. 1977 */ 1978 pc->pc_misses++; 1979 simple_unlock(&pc->pc_slock); 1980 object = pool_get(pc->pc_pool, flags); 1981 if (object != NULL && pc->pc_ctor != NULL) { 1982 if ((*pc->pc_ctor)(pc->pc_arg, object, flags) != 0) { 1983 pool_put(pc->pc_pool, object); 1984 return (NULL); 1985 } 1986 } 1987 if (object != NULL && pap != NULL) { 1988 #ifdef POOL_VTOPHYS 1989 *pap = POOL_VTOPHYS(object); 1990 #else 1991 *pap = POOL_PADDR_INVALID; 1992 #endif 1993 } 1994 return (object); 1995 } 1996 1997 pc->pc_hits++; 1998 pc->pc_nitems--; 1999 object = pcg_get(pcg, pap); 2000 2001 if (pcg->pcg_avail == 0) { 2002 LIST_REMOVE(pcg, pcg_list); 2003 LIST_INSERT_HEAD(&pc->pc_emptygroups, pcg, pcg_list); 2004 } 2005 simple_unlock(&pc->pc_slock); 2006 2007 return (object); 2008 } 2009 2010 /* 2011 * pool_cache_put{,_paddr}: 2012 * 2013 * Put an object back to the pool cache (optionally caching the 2014 * physical address of the object). 2015 */ 2016 void 2017 pool_cache_put_paddr(struct pool_cache *pc, void *object, paddr_t pa) 2018 { 2019 struct pool_cache_group *pcg; 2020 int s; 2021 2022 if (__predict_false((pc->pc_pool->pr_flags & PR_WANTED) != 0)) { 2023 goto destruct; 2024 } 2025 2026 simple_lock(&pc->pc_slock); 2027 2028 pcg = LIST_FIRST(&pc->pc_partgroups); 2029 if (pcg == NULL) { 2030 pcg = LIST_FIRST(&pc->pc_emptygroups); 2031 if (pcg != NULL) { 2032 LIST_REMOVE(pcg, pcg_list); 2033 LIST_INSERT_HEAD(&pc->pc_partgroups, pcg, pcg_list); 2034 } 2035 } 2036 if (pcg == NULL) { 2037 2038 /* 2039 * No empty groups to free the object to. Attempt to 2040 * allocate one. 2041 */ 2042 simple_unlock(&pc->pc_slock); 2043 s = splvm(); 2044 pcg = pool_get(&pcgpool, PR_NOWAIT); 2045 splx(s); 2046 if (pcg == NULL) { 2047 destruct: 2048 2049 /* 2050 * Unable to allocate a cache group; destruct the object 2051 * and free it back to the pool. 2052 */ 2053 pool_cache_destruct_object(pc, object); 2054 return; 2055 } 2056 memset(pcg, 0, sizeof(*pcg)); 2057 simple_lock(&pc->pc_slock); 2058 pc->pc_ngroups++; 2059 LIST_INSERT_HEAD(&pc->pc_partgroups, pcg, pcg_list); 2060 } 2061 2062 pc->pc_nitems++; 2063 pcg_put(pcg, object, pa); 2064 2065 if (pcg->pcg_avail == PCG_NOBJECTS) { 2066 LIST_REMOVE(pcg, pcg_list); 2067 LIST_INSERT_HEAD(&pc->pc_fullgroups, pcg, pcg_list); 2068 } 2069 simple_unlock(&pc->pc_slock); 2070 } 2071 2072 /* 2073 * pool_cache_destruct_object: 2074 * 2075 * Force destruction of an object and its release back into 2076 * the pool. 2077 */ 2078 void 2079 pool_cache_destruct_object(struct pool_cache *pc, void *object) 2080 { 2081 2082 if (pc->pc_dtor != NULL) 2083 (*pc->pc_dtor)(pc->pc_arg, object); 2084 pool_put(pc->pc_pool, object); 2085 } 2086 2087 static void 2088 pool_do_cache_invalidate_grouplist(struct pool_cache_grouplist *pcgsl, 2089 struct pool_cache *pc, struct pool_pagelist *pq, 2090 struct pool_cache_grouplist *pcgdl) 2091 { 2092 struct pool_cache_group *pcg, *npcg; 2093 void *object; 2094 2095 for (pcg = LIST_FIRST(pcgsl); pcg != NULL; pcg = npcg) { 2096 npcg = LIST_NEXT(pcg, pcg_list); 2097 while (pcg->pcg_avail != 0) { 2098 pc->pc_nitems--; 2099 object = pcg_get(pcg, NULL); 2100 if (pc->pc_dtor != NULL) 2101 (*pc->pc_dtor)(pc->pc_arg, object); 2102 pool_do_put(pc->pc_pool, object, pq); 2103 } 2104 pc->pc_ngroups--; 2105 LIST_REMOVE(pcg, pcg_list); 2106 LIST_INSERT_HEAD(pcgdl, pcg, pcg_list); 2107 } 2108 } 2109 2110 static void 2111 pool_do_cache_invalidate(struct pool_cache *pc, struct pool_pagelist *pq, 2112 struct pool_cache_grouplist *pcgl) 2113 { 2114 2115 LOCK_ASSERT(simple_lock_held(&pc->pc_slock)); 2116 LOCK_ASSERT(simple_lock_held(&pc->pc_pool->pr_slock)); 2117 2118 pool_do_cache_invalidate_grouplist(&pc->pc_fullgroups, pc, pq, pcgl); 2119 pool_do_cache_invalidate_grouplist(&pc->pc_partgroups, pc, pq, pcgl); 2120 2121 KASSERT(LIST_EMPTY(&pc->pc_partgroups)); 2122 KASSERT(LIST_EMPTY(&pc->pc_fullgroups)); 2123 KASSERT(pc->pc_nitems == 0); 2124 } 2125 2126 /* 2127 * pool_cache_invalidate: 2128 * 2129 * Invalidate a pool cache (destruct and release all of the 2130 * cached objects). 2131 */ 2132 void 2133 pool_cache_invalidate(struct pool_cache *pc) 2134 { 2135 struct pool_pagelist pq; 2136 struct pool_cache_grouplist pcgl; 2137 2138 LIST_INIT(&pq); 2139 LIST_INIT(&pcgl); 2140 2141 simple_lock(&pc->pc_slock); 2142 simple_lock(&pc->pc_pool->pr_slock); 2143 2144 pool_do_cache_invalidate(pc, &pq, &pcgl); 2145 2146 simple_unlock(&pc->pc_pool->pr_slock); 2147 simple_unlock(&pc->pc_slock); 2148 2149 pr_pagelist_free(pc->pc_pool, &pq); 2150 pcg_grouplist_free(&pcgl); 2151 } 2152 2153 /* 2154 * pool_cache_reclaim: 2155 * 2156 * Reclaim a pool cache for pool_reclaim(). 2157 */ 2158 static void 2159 pool_cache_reclaim(struct pool_cache *pc, struct pool_pagelist *pq, 2160 struct pool_cache_grouplist *pcgl) 2161 { 2162 2163 /* 2164 * We're locking in the wrong order (normally pool_cache -> pool, 2165 * but the pool is already locked when we get here), so we have 2166 * to use trylock. If we can't lock the pool_cache, it's not really 2167 * a big deal here. 2168 */ 2169 if (simple_lock_try(&pc->pc_slock) == 0) 2170 return; 2171 2172 pool_do_cache_invalidate(pc, pq, pcgl); 2173 2174 simple_unlock(&pc->pc_slock); 2175 } 2176 2177 /* 2178 * Pool backend allocators. 2179 * 2180 * Each pool has a backend allocator that handles allocation, deallocation, 2181 * and any additional draining that might be needed. 2182 * 2183 * We provide two standard allocators: 2184 * 2185 * pool_allocator_kmem - the default when no allocator is specified 2186 * 2187 * pool_allocator_nointr - used for pools that will not be accessed 2188 * in interrupt context. 2189 */ 2190 void *pool_page_alloc(struct pool *, int); 2191 void pool_page_free(struct pool *, void *); 2192 2193 struct pool_allocator pool_allocator_kmem = { 2194 pool_page_alloc, pool_page_free, 0, 2195 }; 2196 2197 void *pool_page_alloc_nointr(struct pool *, int); 2198 void pool_page_free_nointr(struct pool *, void *); 2199 2200 struct pool_allocator pool_allocator_nointr = { 2201 pool_page_alloc_nointr, pool_page_free_nointr, 0, 2202 }; 2203 2204 #ifdef POOL_SUBPAGE 2205 void *pool_subpage_alloc(struct pool *, int); 2206 void pool_subpage_free(struct pool *, void *); 2207 2208 struct pool_allocator pool_allocator_kmem_subpage = { 2209 pool_subpage_alloc, pool_subpage_free, 0, 2210 }; 2211 #endif /* POOL_SUBPAGE */ 2212 2213 /* 2214 * We have at least three different resources for the same allocation and 2215 * each resource can be depleted. First, we have the ready elements in the 2216 * pool. Then we have the resource (typically a vm_map) for this allocator. 2217 * Finally, we have physical memory. Waiting for any of these can be 2218 * unnecessary when any other is freed, but the kernel doesn't support 2219 * sleeping on multiple wait channels, so we have to employ another strategy. 2220 * 2221 * The caller sleeps on the pool (so that it can be awakened when an item 2222 * is returned to the pool), but we set PA_WANT on the allocator. When a 2223 * page is returned to the allocator and PA_WANT is set, pool_allocator_free 2224 * will wake up all sleeping pools belonging to this allocator. 2225 * 2226 * XXX Thundering herd. 2227 */ 2228 void * 2229 pool_allocator_alloc(struct pool *org, int flags) 2230 { 2231 struct pool_allocator *pa = org->pr_alloc; 2232 struct pool *pp, *start; 2233 int s, freed; 2234 void *res; 2235 2236 LOCK_ASSERT(!simple_lock_held(&org->pr_slock)); 2237 2238 do { 2239 if ((res = (*pa->pa_alloc)(org, flags)) != NULL) 2240 return (res); 2241 if ((flags & PR_WAITOK) == 0) { 2242 /* 2243 * We only run the drain hookhere if PR_NOWAIT. 2244 * In other cases, the hook will be run in 2245 * pool_reclaim(). 2246 */ 2247 if (org->pr_drain_hook != NULL) { 2248 (*org->pr_drain_hook)(org->pr_drain_hook_arg, 2249 flags); 2250 if ((res = (*pa->pa_alloc)(org, flags)) != NULL) 2251 return (res); 2252 } 2253 break; 2254 } 2255 2256 /* 2257 * Drain all pools, that use this allocator. 2258 * We do this to reclaim VA space. 2259 * pa_alloc is responsible for waiting for 2260 * physical memory. 2261 * 2262 * XXX We risk looping forever if start if someone 2263 * calls pool_destroy on "start". But there is no 2264 * other way to have potentially sleeping pool_reclaim, 2265 * non-sleeping locks on pool_allocator, and some 2266 * stirring of drained pools in the allocator. 2267 * 2268 * XXX Maybe we should use pool_head_slock for locking 2269 * the allocators? 2270 */ 2271 freed = 0; 2272 2273 s = splvm(); 2274 simple_lock(&pa->pa_slock); 2275 pp = start = TAILQ_FIRST(&pa->pa_list); 2276 do { 2277 TAILQ_REMOVE(&pa->pa_list, pp, pr_alloc_list); 2278 TAILQ_INSERT_TAIL(&pa->pa_list, pp, pr_alloc_list); 2279 simple_unlock(&pa->pa_slock); 2280 freed = pool_reclaim(pp); 2281 simple_lock(&pa->pa_slock); 2282 } while ((pp = TAILQ_FIRST(&pa->pa_list)) != start && 2283 freed == 0); 2284 2285 if (freed == 0) { 2286 /* 2287 * We set PA_WANT here, the caller will most likely 2288 * sleep waiting for pages (if not, this won't hurt 2289 * that much), and there is no way to set this in 2290 * the caller without violating locking order. 2291 */ 2292 pa->pa_flags |= PA_WANT; 2293 } 2294 simple_unlock(&pa->pa_slock); 2295 splx(s); 2296 } while (freed); 2297 return (NULL); 2298 } 2299 2300 void 2301 pool_allocator_free(struct pool *pp, void *v) 2302 { 2303 struct pool_allocator *pa = pp->pr_alloc; 2304 int s; 2305 2306 LOCK_ASSERT(!simple_lock_held(&pp->pr_slock)); 2307 2308 (*pa->pa_free)(pp, v); 2309 2310 s = splvm(); 2311 simple_lock(&pa->pa_slock); 2312 if ((pa->pa_flags & PA_WANT) == 0) { 2313 simple_unlock(&pa->pa_slock); 2314 splx(s); 2315 return; 2316 } 2317 2318 TAILQ_FOREACH(pp, &pa->pa_list, pr_alloc_list) { 2319 simple_lock(&pp->pr_slock); 2320 if ((pp->pr_flags & PR_WANTED) != 0) { 2321 pp->pr_flags &= ~PR_WANTED; 2322 wakeup(pp); 2323 } 2324 simple_unlock(&pp->pr_slock); 2325 } 2326 pa->pa_flags &= ~PA_WANT; 2327 simple_unlock(&pa->pa_slock); 2328 splx(s); 2329 } 2330 2331 void * 2332 pool_page_alloc(struct pool *pp, int flags) 2333 { 2334 boolean_t waitok = (flags & PR_WAITOK) ? TRUE : FALSE; 2335 2336 return ((void *) uvm_km_alloc_poolpage_cache(kmem_map, waitok)); 2337 } 2338 2339 void 2340 pool_page_free(struct pool *pp, void *v) 2341 { 2342 2343 uvm_km_free_poolpage_cache(kmem_map, (vaddr_t) v); 2344 } 2345 2346 static void * 2347 pool_page_alloc_meta(struct pool *pp, int flags) 2348 { 2349 boolean_t waitok = (flags & PR_WAITOK) ? TRUE : FALSE; 2350 2351 return ((void *) uvm_km_alloc_poolpage(kmem_map, waitok)); 2352 } 2353 2354 static void 2355 pool_page_free_meta(struct pool *pp, void *v) 2356 { 2357 2358 uvm_km_free_poolpage(kmem_map, (vaddr_t) v); 2359 } 2360 2361 #ifdef POOL_SUBPAGE 2362 /* Sub-page allocator, for machines with large hardware pages. */ 2363 void * 2364 pool_subpage_alloc(struct pool *pp, int flags) 2365 { 2366 void *v; 2367 int s; 2368 s = splvm(); 2369 v = pool_get(&psppool, flags); 2370 splx(s); 2371 return v; 2372 } 2373 2374 void 2375 pool_subpage_free(struct pool *pp, void *v) 2376 { 2377 int s; 2378 s = splvm(); 2379 pool_put(&psppool, v); 2380 splx(s); 2381 } 2382 2383 /* We don't provide a real nointr allocator. Maybe later. */ 2384 void * 2385 pool_page_alloc_nointr(struct pool *pp, int flags) 2386 { 2387 2388 return (pool_subpage_alloc(pp, flags)); 2389 } 2390 2391 void 2392 pool_page_free_nointr(struct pool *pp, void *v) 2393 { 2394 2395 pool_subpage_free(pp, v); 2396 } 2397 #else 2398 void * 2399 pool_page_alloc_nointr(struct pool *pp, int flags) 2400 { 2401 boolean_t waitok = (flags & PR_WAITOK) ? TRUE : FALSE; 2402 2403 return ((void *) uvm_km_alloc_poolpage_cache(kernel_map, waitok)); 2404 } 2405 2406 void 2407 pool_page_free_nointr(struct pool *pp, void *v) 2408 { 2409 2410 uvm_km_free_poolpage_cache(kernel_map, (vaddr_t) v); 2411 } 2412 #endif /* POOL_SUBPAGE */ 2413