1 /* $NetBSD: subr_pool.c,v 1.101 2005/06/18 01:34:03 thorpej Exp $ */ 2 3 /*- 4 * Copyright (c) 1997, 1999, 2000 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Paul Kranenburg; by Jason R. Thorpe of the Numerical Aerospace 9 * Simulation Facility, NASA Ames Research Center. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. All advertising materials mentioning features or use of this software 20 * must display the following acknowledgement: 21 * This product includes software developed by the NetBSD 22 * Foundation, Inc. and its contributors. 23 * 4. Neither the name of The NetBSD Foundation nor the names of its 24 * contributors may be used to endorse or promote products derived 25 * from this software without specific prior written permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 28 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 29 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 30 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 31 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 32 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 33 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 34 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 35 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 36 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 37 * POSSIBILITY OF SUCH DAMAGE. 38 */ 39 40 #include <sys/cdefs.h> 41 __KERNEL_RCSID(0, "$NetBSD: subr_pool.c,v 1.101 2005/06/18 01:34:03 thorpej Exp $"); 42 43 #include "opt_pool.h" 44 #include "opt_poollog.h" 45 #include "opt_lockdebug.h" 46 47 #include <sys/param.h> 48 #include <sys/systm.h> 49 #include <sys/proc.h> 50 #include <sys/errno.h> 51 #include <sys/kernel.h> 52 #include <sys/malloc.h> 53 #include <sys/lock.h> 54 #include <sys/pool.h> 55 #include <sys/syslog.h> 56 57 #include <uvm/uvm.h> 58 59 /* 60 * Pool resource management utility. 61 * 62 * Memory is allocated in pages which are split into pieces according to 63 * the pool item size. Each page is kept on one of three lists in the 64 * pool structure: `pr_emptypages', `pr_fullpages' and `pr_partpages', 65 * for empty, full and partially-full pages respectively. The individual 66 * pool items are on a linked list headed by `ph_itemlist' in each page 67 * header. The memory for building the page list is either taken from 68 * the allocated pages themselves (for small pool items) or taken from 69 * an internal pool of page headers (`phpool'). 70 */ 71 72 /* List of all pools */ 73 TAILQ_HEAD(,pool) pool_head = TAILQ_HEAD_INITIALIZER(pool_head); 74 75 /* Private pool for page header structures */ 76 #define PHPOOL_MAX 8 77 static struct pool phpool[PHPOOL_MAX]; 78 #define PHPOOL_FREELIST_NELEM(idx) (((idx) == 0) ? 0 : (1 << (idx))) 79 80 #ifdef POOL_SUBPAGE 81 /* Pool of subpages for use by normal pools. */ 82 static struct pool psppool; 83 #endif 84 85 static void *pool_page_alloc_meta(struct pool *, int); 86 static void pool_page_free_meta(struct pool *, void *); 87 88 /* allocator for pool metadata */ 89 static struct pool_allocator pool_allocator_meta = { 90 pool_page_alloc_meta, pool_page_free_meta 91 }; 92 93 /* # of seconds to retain page after last use */ 94 int pool_inactive_time = 10; 95 96 /* Next candidate for drainage (see pool_drain()) */ 97 static struct pool *drainpp; 98 99 /* This spin lock protects both pool_head and drainpp. */ 100 struct simplelock pool_head_slock = SIMPLELOCK_INITIALIZER; 101 102 typedef uint8_t pool_item_freelist_t; 103 104 struct pool_item_header { 105 /* Page headers */ 106 LIST_ENTRY(pool_item_header) 107 ph_pagelist; /* pool page list */ 108 SPLAY_ENTRY(pool_item_header) 109 ph_node; /* Off-page page headers */ 110 caddr_t ph_page; /* this page's address */ 111 struct timeval ph_time; /* last referenced */ 112 union { 113 /* !PR_NOTOUCH */ 114 struct { 115 TAILQ_HEAD(, pool_item) 116 phu_itemlist; /* chunk list for this page */ 117 } phu_normal; 118 /* PR_NOTOUCH */ 119 struct { 120 uint16_t 121 phu_off; /* start offset in page */ 122 pool_item_freelist_t 123 phu_firstfree; /* first free item */ 124 /* 125 * XXX it might be better to use 126 * a simple bitmap and ffs(3) 127 */ 128 } phu_notouch; 129 } ph_u; 130 uint16_t ph_nmissing; /* # of chunks in use */ 131 }; 132 #define ph_itemlist ph_u.phu_normal.phu_itemlist 133 #define ph_off ph_u.phu_notouch.phu_off 134 #define ph_firstfree ph_u.phu_notouch.phu_firstfree 135 136 struct pool_item { 137 #ifdef DIAGNOSTIC 138 u_int pi_magic; 139 #endif 140 #define PI_MAGIC 0xdeadbeefU 141 /* Other entries use only this list entry */ 142 TAILQ_ENTRY(pool_item) pi_list; 143 }; 144 145 #define POOL_NEEDS_CATCHUP(pp) \ 146 ((pp)->pr_nitems < (pp)->pr_minitems) 147 148 /* 149 * Pool cache management. 150 * 151 * Pool caches provide a way for constructed objects to be cached by the 152 * pool subsystem. This can lead to performance improvements by avoiding 153 * needless object construction/destruction; it is deferred until absolutely 154 * necessary. 155 * 156 * Caches are grouped into cache groups. Each cache group references 157 * up to 16 constructed objects. When a cache allocates an object 158 * from the pool, it calls the object's constructor and places it into 159 * a cache group. When a cache group frees an object back to the pool, 160 * it first calls the object's destructor. This allows the object to 161 * persist in constructed form while freed to the cache. 162 * 163 * Multiple caches may exist for each pool. This allows a single 164 * object type to have multiple constructed forms. The pool references 165 * each cache, so that when a pool is drained by the pagedaemon, it can 166 * drain each individual cache as well. Each time a cache is drained, 167 * the most idle cache group is freed to the pool in its entirety. 168 * 169 * Pool caches are layed on top of pools. By layering them, we can avoid 170 * the complexity of cache management for pools which would not benefit 171 * from it. 172 */ 173 174 /* The cache group pool. */ 175 static struct pool pcgpool; 176 177 static void pool_cache_reclaim(struct pool_cache *, struct pool_pagelist *); 178 179 static int pool_catchup(struct pool *); 180 static void pool_prime_page(struct pool *, caddr_t, 181 struct pool_item_header *); 182 static void pool_update_curpage(struct pool *); 183 184 void *pool_allocator_alloc(struct pool *, int); 185 void pool_allocator_free(struct pool *, void *); 186 187 static void pool_print_pagelist(struct pool *, struct pool_pagelist *, 188 void (*)(const char *, ...)); 189 static void pool_print1(struct pool *, const char *, 190 void (*)(const char *, ...)); 191 192 static int pool_chk_page(struct pool *, const char *, 193 struct pool_item_header *); 194 195 /* 196 * Pool log entry. An array of these is allocated in pool_init(). 197 */ 198 struct pool_log { 199 const char *pl_file; 200 long pl_line; 201 int pl_action; 202 #define PRLOG_GET 1 203 #define PRLOG_PUT 2 204 void *pl_addr; 205 }; 206 207 #ifdef POOL_DIAGNOSTIC 208 /* Number of entries in pool log buffers */ 209 #ifndef POOL_LOGSIZE 210 #define POOL_LOGSIZE 10 211 #endif 212 213 int pool_logsize = POOL_LOGSIZE; 214 215 static __inline void 216 pr_log(struct pool *pp, void *v, int action, const char *file, long line) 217 { 218 int n = pp->pr_curlogentry; 219 struct pool_log *pl; 220 221 if ((pp->pr_roflags & PR_LOGGING) == 0) 222 return; 223 224 /* 225 * Fill in the current entry. Wrap around and overwrite 226 * the oldest entry if necessary. 227 */ 228 pl = &pp->pr_log[n]; 229 pl->pl_file = file; 230 pl->pl_line = line; 231 pl->pl_action = action; 232 pl->pl_addr = v; 233 if (++n >= pp->pr_logsize) 234 n = 0; 235 pp->pr_curlogentry = n; 236 } 237 238 static void 239 pr_printlog(struct pool *pp, struct pool_item *pi, 240 void (*pr)(const char *, ...)) 241 { 242 int i = pp->pr_logsize; 243 int n = pp->pr_curlogentry; 244 245 if ((pp->pr_roflags & PR_LOGGING) == 0) 246 return; 247 248 /* 249 * Print all entries in this pool's log. 250 */ 251 while (i-- > 0) { 252 struct pool_log *pl = &pp->pr_log[n]; 253 if (pl->pl_action != 0) { 254 if (pi == NULL || pi == pl->pl_addr) { 255 (*pr)("\tlog entry %d:\n", i); 256 (*pr)("\t\taction = %s, addr = %p\n", 257 pl->pl_action == PRLOG_GET ? "get" : "put", 258 pl->pl_addr); 259 (*pr)("\t\tfile: %s at line %lu\n", 260 pl->pl_file, pl->pl_line); 261 } 262 } 263 if (++n >= pp->pr_logsize) 264 n = 0; 265 } 266 } 267 268 static __inline void 269 pr_enter(struct pool *pp, const char *file, long line) 270 { 271 272 if (__predict_false(pp->pr_entered_file != NULL)) { 273 printf("pool %s: reentrancy at file %s line %ld\n", 274 pp->pr_wchan, file, line); 275 printf(" previous entry at file %s line %ld\n", 276 pp->pr_entered_file, pp->pr_entered_line); 277 panic("pr_enter"); 278 } 279 280 pp->pr_entered_file = file; 281 pp->pr_entered_line = line; 282 } 283 284 static __inline void 285 pr_leave(struct pool *pp) 286 { 287 288 if (__predict_false(pp->pr_entered_file == NULL)) { 289 printf("pool %s not entered?\n", pp->pr_wchan); 290 panic("pr_leave"); 291 } 292 293 pp->pr_entered_file = NULL; 294 pp->pr_entered_line = 0; 295 } 296 297 static __inline void 298 pr_enter_check(struct pool *pp, void (*pr)(const char *, ...)) 299 { 300 301 if (pp->pr_entered_file != NULL) 302 (*pr)("\n\tcurrently entered from file %s line %ld\n", 303 pp->pr_entered_file, pp->pr_entered_line); 304 } 305 #else 306 #define pr_log(pp, v, action, file, line) 307 #define pr_printlog(pp, pi, pr) 308 #define pr_enter(pp, file, line) 309 #define pr_leave(pp) 310 #define pr_enter_check(pp, pr) 311 #endif /* POOL_DIAGNOSTIC */ 312 313 static __inline int 314 pr_item_notouch_index(const struct pool *pp, const struct pool_item_header *ph, 315 const void *v) 316 { 317 const char *cp = v; 318 int idx; 319 320 KASSERT(pp->pr_roflags & PR_NOTOUCH); 321 idx = (cp - ph->ph_page - ph->ph_off) / pp->pr_size; 322 KASSERT(idx < pp->pr_itemsperpage); 323 return idx; 324 } 325 326 #define PR_FREELIST_ALIGN(p) \ 327 roundup((uintptr_t)(p), sizeof(pool_item_freelist_t)) 328 #define PR_FREELIST(ph) ((pool_item_freelist_t *)PR_FREELIST_ALIGN((ph) + 1)) 329 #define PR_INDEX_USED ((pool_item_freelist_t)-1) 330 #define PR_INDEX_EOL ((pool_item_freelist_t)-2) 331 332 static __inline void 333 pr_item_notouch_put(const struct pool *pp, struct pool_item_header *ph, 334 void *obj) 335 { 336 int idx = pr_item_notouch_index(pp, ph, obj); 337 pool_item_freelist_t *freelist = PR_FREELIST(ph); 338 339 KASSERT(freelist[idx] == PR_INDEX_USED); 340 freelist[idx] = ph->ph_firstfree; 341 ph->ph_firstfree = idx; 342 } 343 344 static __inline void * 345 pr_item_notouch_get(const struct pool *pp, struct pool_item_header *ph) 346 { 347 int idx = ph->ph_firstfree; 348 pool_item_freelist_t *freelist = PR_FREELIST(ph); 349 350 KASSERT(freelist[idx] != PR_INDEX_USED); 351 ph->ph_firstfree = freelist[idx]; 352 freelist[idx] = PR_INDEX_USED; 353 354 return ph->ph_page + ph->ph_off + idx * pp->pr_size; 355 } 356 357 static __inline int 358 phtree_compare(struct pool_item_header *a, struct pool_item_header *b) 359 { 360 if (a->ph_page < b->ph_page) 361 return (-1); 362 else if (a->ph_page > b->ph_page) 363 return (1); 364 else 365 return (0); 366 } 367 368 SPLAY_PROTOTYPE(phtree, pool_item_header, ph_node, phtree_compare); 369 SPLAY_GENERATE(phtree, pool_item_header, ph_node, phtree_compare); 370 371 /* 372 * Return the pool page header based on page address. 373 */ 374 static __inline struct pool_item_header * 375 pr_find_pagehead(struct pool *pp, caddr_t page) 376 { 377 struct pool_item_header *ph, tmp; 378 379 if ((pp->pr_roflags & PR_PHINPAGE) != 0) 380 return ((struct pool_item_header *)(page + pp->pr_phoffset)); 381 382 tmp.ph_page = page; 383 ph = SPLAY_FIND(phtree, &pp->pr_phtree, &tmp); 384 return ph; 385 } 386 387 static void 388 pr_pagelist_free(struct pool *pp, struct pool_pagelist *pq) 389 { 390 struct pool_item_header *ph; 391 int s; 392 393 while ((ph = LIST_FIRST(pq)) != NULL) { 394 LIST_REMOVE(ph, ph_pagelist); 395 pool_allocator_free(pp, ph->ph_page); 396 if ((pp->pr_roflags & PR_PHINPAGE) == 0) { 397 s = splvm(); 398 pool_put(pp->pr_phpool, ph); 399 splx(s); 400 } 401 } 402 } 403 404 /* 405 * Remove a page from the pool. 406 */ 407 static __inline void 408 pr_rmpage(struct pool *pp, struct pool_item_header *ph, 409 struct pool_pagelist *pq) 410 { 411 412 LOCK_ASSERT(simple_lock_held(&pp->pr_slock)); 413 414 /* 415 * If the page was idle, decrement the idle page count. 416 */ 417 if (ph->ph_nmissing == 0) { 418 #ifdef DIAGNOSTIC 419 if (pp->pr_nidle == 0) 420 panic("pr_rmpage: nidle inconsistent"); 421 if (pp->pr_nitems < pp->pr_itemsperpage) 422 panic("pr_rmpage: nitems inconsistent"); 423 #endif 424 pp->pr_nidle--; 425 } 426 427 pp->pr_nitems -= pp->pr_itemsperpage; 428 429 /* 430 * Unlink the page from the pool and queue it for release. 431 */ 432 LIST_REMOVE(ph, ph_pagelist); 433 if ((pp->pr_roflags & PR_PHINPAGE) == 0) 434 SPLAY_REMOVE(phtree, &pp->pr_phtree, ph); 435 LIST_INSERT_HEAD(pq, ph, ph_pagelist); 436 437 pp->pr_npages--; 438 pp->pr_npagefree++; 439 440 pool_update_curpage(pp); 441 } 442 443 /* 444 * Initialize all the pools listed in the "pools" link set. 445 */ 446 void 447 link_pool_init(void) 448 { 449 __link_set_decl(pools, struct link_pool_init); 450 struct link_pool_init * const *pi; 451 452 __link_set_foreach(pi, pools) 453 pool_init((*pi)->pp, (*pi)->size, (*pi)->align, 454 (*pi)->align_offset, (*pi)->flags, (*pi)->wchan, 455 (*pi)->palloc); 456 } 457 458 /* 459 * Initialize the given pool resource structure. 460 * 461 * We export this routine to allow other kernel parts to declare 462 * static pools that must be initialized before malloc() is available. 463 */ 464 void 465 pool_init(struct pool *pp, size_t size, u_int align, u_int ioff, int flags, 466 const char *wchan, struct pool_allocator *palloc) 467 { 468 int off, slack; 469 size_t trysize, phsize; 470 int s; 471 472 KASSERT((1UL << (CHAR_BIT * sizeof(pool_item_freelist_t))) - 2 >= 473 PHPOOL_FREELIST_NELEM(PHPOOL_MAX - 1)); 474 475 #ifdef POOL_DIAGNOSTIC 476 /* 477 * Always log if POOL_DIAGNOSTIC is defined. 478 */ 479 if (pool_logsize != 0) 480 flags |= PR_LOGGING; 481 #endif 482 483 #ifdef POOL_SUBPAGE 484 /* 485 * XXX We don't provide a real `nointr' back-end 486 * yet; all sub-pages come from a kmem back-end. 487 * maybe some day... 488 */ 489 if (palloc == NULL) { 490 extern struct pool_allocator pool_allocator_kmem_subpage; 491 palloc = &pool_allocator_kmem_subpage; 492 } 493 /* 494 * We'll assume any user-specified back-end allocator 495 * will deal with sub-pages, or simply don't care. 496 */ 497 #else 498 if (palloc == NULL) 499 palloc = &pool_allocator_kmem; 500 #endif /* POOL_SUBPAGE */ 501 if ((palloc->pa_flags & PA_INITIALIZED) == 0) { 502 if (palloc->pa_pagesz == 0) { 503 #ifdef POOL_SUBPAGE 504 if (palloc == &pool_allocator_kmem) 505 palloc->pa_pagesz = PAGE_SIZE; 506 else 507 palloc->pa_pagesz = POOL_SUBPAGE; 508 #else 509 palloc->pa_pagesz = PAGE_SIZE; 510 #endif /* POOL_SUBPAGE */ 511 } 512 513 TAILQ_INIT(&palloc->pa_list); 514 515 simple_lock_init(&palloc->pa_slock); 516 palloc->pa_pagemask = ~(palloc->pa_pagesz - 1); 517 palloc->pa_pageshift = ffs(palloc->pa_pagesz) - 1; 518 palloc->pa_flags |= PA_INITIALIZED; 519 } 520 521 if (align == 0) 522 align = ALIGN(1); 523 524 if (size < sizeof(struct pool_item)) 525 size = sizeof(struct pool_item); 526 527 size = roundup(size, align); 528 #ifdef DIAGNOSTIC 529 if (size > palloc->pa_pagesz) 530 panic("pool_init: pool item size (%lu) too large", 531 (u_long)size); 532 #endif 533 534 /* 535 * Initialize the pool structure. 536 */ 537 LIST_INIT(&pp->pr_emptypages); 538 LIST_INIT(&pp->pr_fullpages); 539 LIST_INIT(&pp->pr_partpages); 540 TAILQ_INIT(&pp->pr_cachelist); 541 pp->pr_curpage = NULL; 542 pp->pr_npages = 0; 543 pp->pr_minitems = 0; 544 pp->pr_minpages = 0; 545 pp->pr_maxpages = UINT_MAX; 546 pp->pr_roflags = flags; 547 pp->pr_flags = 0; 548 pp->pr_size = size; 549 pp->pr_align = align; 550 pp->pr_wchan = wchan; 551 pp->pr_alloc = palloc; 552 pp->pr_nitems = 0; 553 pp->pr_nout = 0; 554 pp->pr_hardlimit = UINT_MAX; 555 pp->pr_hardlimit_warning = NULL; 556 pp->pr_hardlimit_ratecap.tv_sec = 0; 557 pp->pr_hardlimit_ratecap.tv_usec = 0; 558 pp->pr_hardlimit_warning_last.tv_sec = 0; 559 pp->pr_hardlimit_warning_last.tv_usec = 0; 560 pp->pr_drain_hook = NULL; 561 pp->pr_drain_hook_arg = NULL; 562 563 /* 564 * Decide whether to put the page header off page to avoid 565 * wasting too large a part of the page or too big item. 566 * Off-page page headers go on a hash table, so we can match 567 * a returned item with its header based on the page address. 568 * We use 1/16 of the page size and about 8 times of the item 569 * size as the threshold (XXX: tune) 570 * 571 * However, we'll put the header into the page if we can put 572 * it without wasting any items. 573 * 574 * Silently enforce `0 <= ioff < align'. 575 */ 576 pp->pr_itemoffset = ioff %= align; 577 /* See the comment below about reserved bytes. */ 578 trysize = palloc->pa_pagesz - ((align - ioff) % align); 579 phsize = ALIGN(sizeof(struct pool_item_header)); 580 if ((pp->pr_roflags & PR_NOTOUCH) == 0 && 581 (pp->pr_size < MIN(palloc->pa_pagesz / 16, phsize << 3) || 582 trysize / pp->pr_size == (trysize - phsize) / pp->pr_size)) { 583 /* Use the end of the page for the page header */ 584 pp->pr_roflags |= PR_PHINPAGE; 585 pp->pr_phoffset = off = palloc->pa_pagesz - phsize; 586 } else { 587 /* The page header will be taken from our page header pool */ 588 pp->pr_phoffset = 0; 589 off = palloc->pa_pagesz; 590 SPLAY_INIT(&pp->pr_phtree); 591 } 592 593 /* 594 * Alignment is to take place at `ioff' within the item. This means 595 * we must reserve up to `align - 1' bytes on the page to allow 596 * appropriate positioning of each item. 597 */ 598 pp->pr_itemsperpage = (off - ((align - ioff) % align)) / pp->pr_size; 599 KASSERT(pp->pr_itemsperpage != 0); 600 if ((pp->pr_roflags & PR_NOTOUCH)) { 601 int idx; 602 603 for (idx = 0; pp->pr_itemsperpage > PHPOOL_FREELIST_NELEM(idx); 604 idx++) { 605 /* nothing */ 606 } 607 if (idx >= PHPOOL_MAX) { 608 /* 609 * if you see this panic, consider to tweak 610 * PHPOOL_MAX and PHPOOL_FREELIST_NELEM. 611 */ 612 panic("%s: too large itemsperpage(%d) for PR_NOTOUCH", 613 pp->pr_wchan, pp->pr_itemsperpage); 614 } 615 pp->pr_phpool = &phpool[idx]; 616 } else if ((pp->pr_roflags & PR_PHINPAGE) == 0) { 617 pp->pr_phpool = &phpool[0]; 618 } 619 #if defined(DIAGNOSTIC) 620 else { 621 pp->pr_phpool = NULL; 622 } 623 #endif 624 625 /* 626 * Use the slack between the chunks and the page header 627 * for "cache coloring". 628 */ 629 slack = off - pp->pr_itemsperpage * pp->pr_size; 630 pp->pr_maxcolor = (slack / align) * align; 631 pp->pr_curcolor = 0; 632 633 pp->pr_nget = 0; 634 pp->pr_nfail = 0; 635 pp->pr_nput = 0; 636 pp->pr_npagealloc = 0; 637 pp->pr_npagefree = 0; 638 pp->pr_hiwat = 0; 639 pp->pr_nidle = 0; 640 641 #ifdef POOL_DIAGNOSTIC 642 if (flags & PR_LOGGING) { 643 if (kmem_map == NULL || 644 (pp->pr_log = malloc(pool_logsize * sizeof(struct pool_log), 645 M_TEMP, M_NOWAIT)) == NULL) 646 pp->pr_roflags &= ~PR_LOGGING; 647 pp->pr_curlogentry = 0; 648 pp->pr_logsize = pool_logsize; 649 } 650 #endif 651 652 pp->pr_entered_file = NULL; 653 pp->pr_entered_line = 0; 654 655 simple_lock_init(&pp->pr_slock); 656 657 /* 658 * Initialize private page header pool and cache magazine pool if we 659 * haven't done so yet. 660 * XXX LOCKING. 661 */ 662 if (phpool[0].pr_size == 0) { 663 int idx; 664 for (idx = 0; idx < PHPOOL_MAX; idx++) { 665 static char phpool_names[PHPOOL_MAX][6+1+6+1]; 666 int nelem; 667 size_t sz; 668 669 nelem = PHPOOL_FREELIST_NELEM(idx); 670 snprintf(phpool_names[idx], sizeof(phpool_names[idx]), 671 "phpool-%d", nelem); 672 sz = sizeof(struct pool_item_header); 673 if (nelem) { 674 sz = PR_FREELIST_ALIGN(sz) 675 + nelem * sizeof(pool_item_freelist_t); 676 } 677 pool_init(&phpool[idx], sz, 0, 0, 0, 678 phpool_names[idx], &pool_allocator_meta); 679 } 680 #ifdef POOL_SUBPAGE 681 pool_init(&psppool, POOL_SUBPAGE, POOL_SUBPAGE, 0, 682 PR_RECURSIVE, "psppool", &pool_allocator_meta); 683 #endif 684 pool_init(&pcgpool, sizeof(struct pool_cache_group), 0, 0, 685 0, "pcgpool", &pool_allocator_meta); 686 } 687 688 /* Insert into the list of all pools. */ 689 simple_lock(&pool_head_slock); 690 TAILQ_INSERT_TAIL(&pool_head, pp, pr_poollist); 691 simple_unlock(&pool_head_slock); 692 693 /* Insert this into the list of pools using this allocator. */ 694 s = splvm(); 695 simple_lock(&palloc->pa_slock); 696 TAILQ_INSERT_TAIL(&palloc->pa_list, pp, pr_alloc_list); 697 simple_unlock(&palloc->pa_slock); 698 splx(s); 699 } 700 701 /* 702 * De-commision a pool resource. 703 */ 704 void 705 pool_destroy(struct pool *pp) 706 { 707 struct pool_pagelist pq; 708 struct pool_item_header *ph; 709 int s; 710 711 /* Remove from global pool list */ 712 simple_lock(&pool_head_slock); 713 TAILQ_REMOVE(&pool_head, pp, pr_poollist); 714 if (drainpp == pp) 715 drainpp = NULL; 716 simple_unlock(&pool_head_slock); 717 718 /* Remove this pool from its allocator's list of pools. */ 719 s = splvm(); 720 simple_lock(&pp->pr_alloc->pa_slock); 721 TAILQ_REMOVE(&pp->pr_alloc->pa_list, pp, pr_alloc_list); 722 simple_unlock(&pp->pr_alloc->pa_slock); 723 splx(s); 724 725 s = splvm(); 726 simple_lock(&pp->pr_slock); 727 728 KASSERT(TAILQ_EMPTY(&pp->pr_cachelist)); 729 730 #ifdef DIAGNOSTIC 731 if (pp->pr_nout != 0) { 732 pr_printlog(pp, NULL, printf); 733 panic("pool_destroy: pool busy: still out: %u", 734 pp->pr_nout); 735 } 736 #endif 737 738 KASSERT(LIST_EMPTY(&pp->pr_fullpages)); 739 KASSERT(LIST_EMPTY(&pp->pr_partpages)); 740 741 /* Remove all pages */ 742 LIST_INIT(&pq); 743 while ((ph = LIST_FIRST(&pp->pr_emptypages)) != NULL) 744 pr_rmpage(pp, ph, &pq); 745 746 simple_unlock(&pp->pr_slock); 747 splx(s); 748 749 pr_pagelist_free(pp, &pq); 750 751 #ifdef POOL_DIAGNOSTIC 752 if ((pp->pr_roflags & PR_LOGGING) != 0) 753 free(pp->pr_log, M_TEMP); 754 #endif 755 } 756 757 void 758 pool_set_drain_hook(struct pool *pp, void (*fn)(void *, int), void *arg) 759 { 760 761 /* XXX no locking -- must be used just after pool_init() */ 762 #ifdef DIAGNOSTIC 763 if (pp->pr_drain_hook != NULL) 764 panic("pool_set_drain_hook(%s): already set", pp->pr_wchan); 765 #endif 766 pp->pr_drain_hook = fn; 767 pp->pr_drain_hook_arg = arg; 768 } 769 770 static struct pool_item_header * 771 pool_alloc_item_header(struct pool *pp, caddr_t storage, int flags) 772 { 773 struct pool_item_header *ph; 774 int s; 775 776 LOCK_ASSERT(simple_lock_held(&pp->pr_slock) == 0); 777 778 if ((pp->pr_roflags & PR_PHINPAGE) != 0) 779 ph = (struct pool_item_header *) (storage + pp->pr_phoffset); 780 else { 781 s = splvm(); 782 ph = pool_get(pp->pr_phpool, flags); 783 splx(s); 784 } 785 786 return (ph); 787 } 788 789 /* 790 * Grab an item from the pool; must be called at appropriate spl level 791 */ 792 void * 793 #ifdef POOL_DIAGNOSTIC 794 _pool_get(struct pool *pp, int flags, const char *file, long line) 795 #else 796 pool_get(struct pool *pp, int flags) 797 #endif 798 { 799 struct pool_item *pi; 800 struct pool_item_header *ph; 801 void *v; 802 803 #ifdef DIAGNOSTIC 804 if (__predict_false(pp->pr_itemsperpage == 0)) 805 panic("pool_get: pool %p: pr_itemsperpage is zero, " 806 "pool not initialized?", pp); 807 if (__predict_false(curlwp == NULL && doing_shutdown == 0 && 808 (flags & PR_WAITOK) != 0)) 809 panic("pool_get: %s: must have NOWAIT", pp->pr_wchan); 810 811 #ifdef LOCKDEBUG 812 if (flags & PR_WAITOK) 813 simple_lock_only_held(NULL, "pool_get(PR_WAITOK)"); 814 #endif 815 #endif /* DIAGNOSTIC */ 816 817 simple_lock(&pp->pr_slock); 818 pr_enter(pp, file, line); 819 820 startover: 821 /* 822 * Check to see if we've reached the hard limit. If we have, 823 * and we can wait, then wait until an item has been returned to 824 * the pool. 825 */ 826 #ifdef DIAGNOSTIC 827 if (__predict_false(pp->pr_nout > pp->pr_hardlimit)) { 828 pr_leave(pp); 829 simple_unlock(&pp->pr_slock); 830 panic("pool_get: %s: crossed hard limit", pp->pr_wchan); 831 } 832 #endif 833 if (__predict_false(pp->pr_nout == pp->pr_hardlimit)) { 834 if (pp->pr_drain_hook != NULL) { 835 /* 836 * Since the drain hook is going to free things 837 * back to the pool, unlock, call the hook, re-lock, 838 * and check the hardlimit condition again. 839 */ 840 pr_leave(pp); 841 simple_unlock(&pp->pr_slock); 842 (*pp->pr_drain_hook)(pp->pr_drain_hook_arg, flags); 843 simple_lock(&pp->pr_slock); 844 pr_enter(pp, file, line); 845 if (pp->pr_nout < pp->pr_hardlimit) 846 goto startover; 847 } 848 849 if ((flags & PR_WAITOK) && !(flags & PR_LIMITFAIL)) { 850 /* 851 * XXX: A warning isn't logged in this case. Should 852 * it be? 853 */ 854 pp->pr_flags |= PR_WANTED; 855 pr_leave(pp); 856 ltsleep(pp, PSWP, pp->pr_wchan, 0, &pp->pr_slock); 857 pr_enter(pp, file, line); 858 goto startover; 859 } 860 861 /* 862 * Log a message that the hard limit has been hit. 863 */ 864 if (pp->pr_hardlimit_warning != NULL && 865 ratecheck(&pp->pr_hardlimit_warning_last, 866 &pp->pr_hardlimit_ratecap)) 867 log(LOG_ERR, "%s\n", pp->pr_hardlimit_warning); 868 869 pp->pr_nfail++; 870 871 pr_leave(pp); 872 simple_unlock(&pp->pr_slock); 873 return (NULL); 874 } 875 876 /* 877 * The convention we use is that if `curpage' is not NULL, then 878 * it points at a non-empty bucket. In particular, `curpage' 879 * never points at a page header which has PR_PHINPAGE set and 880 * has no items in its bucket. 881 */ 882 if ((ph = pp->pr_curpage) == NULL) { 883 #ifdef DIAGNOSTIC 884 if (pp->pr_nitems != 0) { 885 simple_unlock(&pp->pr_slock); 886 printf("pool_get: %s: curpage NULL, nitems %u\n", 887 pp->pr_wchan, pp->pr_nitems); 888 panic("pool_get: nitems inconsistent"); 889 } 890 #endif 891 892 /* 893 * Call the back-end page allocator for more memory. 894 * Release the pool lock, as the back-end page allocator 895 * may block. 896 */ 897 pr_leave(pp); 898 simple_unlock(&pp->pr_slock); 899 v = pool_allocator_alloc(pp, flags); 900 if (__predict_true(v != NULL)) 901 ph = pool_alloc_item_header(pp, v, flags); 902 903 if (__predict_false(v == NULL || ph == NULL)) { 904 if (v != NULL) 905 pool_allocator_free(pp, v); 906 907 simple_lock(&pp->pr_slock); 908 pr_enter(pp, file, line); 909 910 /* 911 * We were unable to allocate a page or item 912 * header, but we released the lock during 913 * allocation, so perhaps items were freed 914 * back to the pool. Check for this case. 915 */ 916 if (pp->pr_curpage != NULL) 917 goto startover; 918 919 if ((flags & PR_WAITOK) == 0) { 920 pp->pr_nfail++; 921 pr_leave(pp); 922 simple_unlock(&pp->pr_slock); 923 return (NULL); 924 } 925 926 /* 927 * Wait for items to be returned to this pool. 928 * 929 * XXX: maybe we should wake up once a second and 930 * try again? 931 */ 932 pp->pr_flags |= PR_WANTED; 933 /* PA_WANTED is already set on the allocator. */ 934 pr_leave(pp); 935 ltsleep(pp, PSWP, pp->pr_wchan, 0, &pp->pr_slock); 936 pr_enter(pp, file, line); 937 goto startover; 938 } 939 940 /* We have more memory; add it to the pool */ 941 simple_lock(&pp->pr_slock); 942 pr_enter(pp, file, line); 943 pool_prime_page(pp, v, ph); 944 pp->pr_npagealloc++; 945 946 /* Start the allocation process over. */ 947 goto startover; 948 } 949 if (pp->pr_roflags & PR_NOTOUCH) { 950 #ifdef DIAGNOSTIC 951 if (__predict_false(ph->ph_nmissing == pp->pr_itemsperpage)) { 952 pr_leave(pp); 953 simple_unlock(&pp->pr_slock); 954 panic("pool_get: %s: page empty", pp->pr_wchan); 955 } 956 #endif 957 v = pr_item_notouch_get(pp, ph); 958 #ifdef POOL_DIAGNOSTIC 959 pr_log(pp, v, PRLOG_GET, file, line); 960 #endif 961 } else { 962 v = pi = TAILQ_FIRST(&ph->ph_itemlist); 963 if (__predict_false(v == NULL)) { 964 pr_leave(pp); 965 simple_unlock(&pp->pr_slock); 966 panic("pool_get: %s: page empty", pp->pr_wchan); 967 } 968 #ifdef DIAGNOSTIC 969 if (__predict_false(pp->pr_nitems == 0)) { 970 pr_leave(pp); 971 simple_unlock(&pp->pr_slock); 972 printf("pool_get: %s: items on itemlist, nitems %u\n", 973 pp->pr_wchan, pp->pr_nitems); 974 panic("pool_get: nitems inconsistent"); 975 } 976 #endif 977 978 #ifdef POOL_DIAGNOSTIC 979 pr_log(pp, v, PRLOG_GET, file, line); 980 #endif 981 982 #ifdef DIAGNOSTIC 983 if (__predict_false(pi->pi_magic != PI_MAGIC)) { 984 pr_printlog(pp, pi, printf); 985 panic("pool_get(%s): free list modified: " 986 "magic=%x; page %p; item addr %p\n", 987 pp->pr_wchan, pi->pi_magic, ph->ph_page, pi); 988 } 989 #endif 990 991 /* 992 * Remove from item list. 993 */ 994 TAILQ_REMOVE(&ph->ph_itemlist, pi, pi_list); 995 } 996 pp->pr_nitems--; 997 pp->pr_nout++; 998 if (ph->ph_nmissing == 0) { 999 #ifdef DIAGNOSTIC 1000 if (__predict_false(pp->pr_nidle == 0)) 1001 panic("pool_get: nidle inconsistent"); 1002 #endif 1003 pp->pr_nidle--; 1004 1005 /* 1006 * This page was previously empty. Move it to the list of 1007 * partially-full pages. This page is already curpage. 1008 */ 1009 LIST_REMOVE(ph, ph_pagelist); 1010 LIST_INSERT_HEAD(&pp->pr_partpages, ph, ph_pagelist); 1011 } 1012 ph->ph_nmissing++; 1013 if (ph->ph_nmissing == pp->pr_itemsperpage) { 1014 #ifdef DIAGNOSTIC 1015 if (__predict_false((pp->pr_roflags & PR_NOTOUCH) == 0 && 1016 !TAILQ_EMPTY(&ph->ph_itemlist))) { 1017 pr_leave(pp); 1018 simple_unlock(&pp->pr_slock); 1019 panic("pool_get: %s: nmissing inconsistent", 1020 pp->pr_wchan); 1021 } 1022 #endif 1023 /* 1024 * This page is now full. Move it to the full list 1025 * and select a new current page. 1026 */ 1027 LIST_REMOVE(ph, ph_pagelist); 1028 LIST_INSERT_HEAD(&pp->pr_fullpages, ph, ph_pagelist); 1029 pool_update_curpage(pp); 1030 } 1031 1032 pp->pr_nget++; 1033 1034 /* 1035 * If we have a low water mark and we are now below that low 1036 * water mark, add more items to the pool. 1037 */ 1038 if (POOL_NEEDS_CATCHUP(pp) && pool_catchup(pp) != 0) { 1039 /* 1040 * XXX: Should we log a warning? Should we set up a timeout 1041 * to try again in a second or so? The latter could break 1042 * a caller's assumptions about interrupt protection, etc. 1043 */ 1044 } 1045 1046 pr_leave(pp); 1047 simple_unlock(&pp->pr_slock); 1048 return (v); 1049 } 1050 1051 /* 1052 * Internal version of pool_put(). Pool is already locked/entered. 1053 */ 1054 static void 1055 pool_do_put(struct pool *pp, void *v, struct pool_pagelist *pq) 1056 { 1057 struct pool_item *pi = v; 1058 struct pool_item_header *ph; 1059 caddr_t page; 1060 int s; 1061 1062 LOCK_ASSERT(simple_lock_held(&pp->pr_slock)); 1063 1064 page = (caddr_t)((u_long)v & pp->pr_alloc->pa_pagemask); 1065 1066 #ifdef DIAGNOSTIC 1067 if (__predict_false(pp->pr_nout == 0)) { 1068 printf("pool %s: putting with none out\n", 1069 pp->pr_wchan); 1070 panic("pool_put"); 1071 } 1072 #endif 1073 1074 if (__predict_false((ph = pr_find_pagehead(pp, page)) == NULL)) { 1075 pr_printlog(pp, NULL, printf); 1076 panic("pool_put: %s: page header missing", pp->pr_wchan); 1077 } 1078 1079 #ifdef LOCKDEBUG 1080 /* 1081 * Check if we're freeing a locked simple lock. 1082 */ 1083 simple_lock_freecheck((caddr_t)pi, ((caddr_t)pi) + pp->pr_size); 1084 #endif 1085 1086 /* 1087 * Return to item list. 1088 */ 1089 if (pp->pr_roflags & PR_NOTOUCH) { 1090 pr_item_notouch_put(pp, ph, v); 1091 } else { 1092 #ifdef DIAGNOSTIC 1093 pi->pi_magic = PI_MAGIC; 1094 #endif 1095 #ifdef DEBUG 1096 { 1097 int i, *ip = v; 1098 1099 for (i = 0; i < pp->pr_size / sizeof(int); i++) { 1100 *ip++ = PI_MAGIC; 1101 } 1102 } 1103 #endif 1104 1105 TAILQ_INSERT_HEAD(&ph->ph_itemlist, pi, pi_list); 1106 } 1107 KDASSERT(ph->ph_nmissing != 0); 1108 ph->ph_nmissing--; 1109 pp->pr_nput++; 1110 pp->pr_nitems++; 1111 pp->pr_nout--; 1112 1113 /* Cancel "pool empty" condition if it exists */ 1114 if (pp->pr_curpage == NULL) 1115 pp->pr_curpage = ph; 1116 1117 if (pp->pr_flags & PR_WANTED) { 1118 pp->pr_flags &= ~PR_WANTED; 1119 if (ph->ph_nmissing == 0) 1120 pp->pr_nidle++; 1121 wakeup((caddr_t)pp); 1122 return; 1123 } 1124 1125 /* 1126 * If this page is now empty, do one of two things: 1127 * 1128 * (1) If we have more pages than the page high water mark, 1129 * free the page back to the system. ONLY CONSIDER 1130 * FREEING BACK A PAGE IF WE HAVE MORE THAN OUR MINIMUM PAGE 1131 * CLAIM. 1132 * 1133 * (2) Otherwise, move the page to the empty page list. 1134 * 1135 * Either way, select a new current page (so we use a partially-full 1136 * page if one is available). 1137 */ 1138 if (ph->ph_nmissing == 0) { 1139 pp->pr_nidle++; 1140 if (pp->pr_npages > pp->pr_minpages && 1141 (pp->pr_npages > pp->pr_maxpages || 1142 (pp->pr_alloc->pa_flags & PA_WANT) != 0)) { 1143 pr_rmpage(pp, ph, pq); 1144 } else { 1145 LIST_REMOVE(ph, ph_pagelist); 1146 LIST_INSERT_HEAD(&pp->pr_emptypages, ph, ph_pagelist); 1147 1148 /* 1149 * Update the timestamp on the page. A page must 1150 * be idle for some period of time before it can 1151 * be reclaimed by the pagedaemon. This minimizes 1152 * ping-pong'ing for memory. 1153 */ 1154 s = splclock(); 1155 ph->ph_time = mono_time; 1156 splx(s); 1157 } 1158 pool_update_curpage(pp); 1159 } 1160 1161 /* 1162 * If the page was previously completely full, move it to the 1163 * partially-full list and make it the current page. The next 1164 * allocation will get the item from this page, instead of 1165 * further fragmenting the pool. 1166 */ 1167 else if (ph->ph_nmissing == (pp->pr_itemsperpage - 1)) { 1168 LIST_REMOVE(ph, ph_pagelist); 1169 LIST_INSERT_HEAD(&pp->pr_partpages, ph, ph_pagelist); 1170 pp->pr_curpage = ph; 1171 } 1172 } 1173 1174 /* 1175 * Return resource to the pool; must be called at appropriate spl level 1176 */ 1177 #ifdef POOL_DIAGNOSTIC 1178 void 1179 _pool_put(struct pool *pp, void *v, const char *file, long line) 1180 { 1181 struct pool_pagelist pq; 1182 1183 LIST_INIT(&pq); 1184 1185 simple_lock(&pp->pr_slock); 1186 pr_enter(pp, file, line); 1187 1188 pr_log(pp, v, PRLOG_PUT, file, line); 1189 1190 pool_do_put(pp, v, &pq); 1191 1192 pr_leave(pp); 1193 simple_unlock(&pp->pr_slock); 1194 1195 if (! LIST_EMPTY(&pq)) 1196 pr_pagelist_free(pp, &pq); 1197 } 1198 #undef pool_put 1199 #endif /* POOL_DIAGNOSTIC */ 1200 1201 void 1202 pool_put(struct pool *pp, void *v) 1203 { 1204 struct pool_pagelist pq; 1205 1206 LIST_INIT(&pq); 1207 1208 simple_lock(&pp->pr_slock); 1209 pool_do_put(pp, v, &pq); 1210 simple_unlock(&pp->pr_slock); 1211 1212 if (! LIST_EMPTY(&pq)) 1213 pr_pagelist_free(pp, &pq); 1214 } 1215 1216 #ifdef POOL_DIAGNOSTIC 1217 #define pool_put(h, v) _pool_put((h), (v), __FILE__, __LINE__) 1218 #endif 1219 1220 /* 1221 * Add N items to the pool. 1222 */ 1223 int 1224 pool_prime(struct pool *pp, int n) 1225 { 1226 struct pool_item_header *ph = NULL; 1227 caddr_t cp; 1228 int newpages; 1229 1230 simple_lock(&pp->pr_slock); 1231 1232 newpages = roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 1233 1234 while (newpages-- > 0) { 1235 simple_unlock(&pp->pr_slock); 1236 cp = pool_allocator_alloc(pp, PR_NOWAIT); 1237 if (__predict_true(cp != NULL)) 1238 ph = pool_alloc_item_header(pp, cp, PR_NOWAIT); 1239 1240 if (__predict_false(cp == NULL || ph == NULL)) { 1241 if (cp != NULL) 1242 pool_allocator_free(pp, cp); 1243 simple_lock(&pp->pr_slock); 1244 break; 1245 } 1246 1247 simple_lock(&pp->pr_slock); 1248 pool_prime_page(pp, cp, ph); 1249 pp->pr_npagealloc++; 1250 pp->pr_minpages++; 1251 } 1252 1253 if (pp->pr_minpages >= pp->pr_maxpages) 1254 pp->pr_maxpages = pp->pr_minpages + 1; /* XXX */ 1255 1256 simple_unlock(&pp->pr_slock); 1257 return (0); 1258 } 1259 1260 /* 1261 * Add a page worth of items to the pool. 1262 * 1263 * Note, we must be called with the pool descriptor LOCKED. 1264 */ 1265 static void 1266 pool_prime_page(struct pool *pp, caddr_t storage, struct pool_item_header *ph) 1267 { 1268 struct pool_item *pi; 1269 caddr_t cp = storage; 1270 unsigned int align = pp->pr_align; 1271 unsigned int ioff = pp->pr_itemoffset; 1272 int n; 1273 int s; 1274 1275 LOCK_ASSERT(simple_lock_held(&pp->pr_slock)); 1276 1277 #ifdef DIAGNOSTIC 1278 if (((u_long)cp & (pp->pr_alloc->pa_pagesz - 1)) != 0) 1279 panic("pool_prime_page: %s: unaligned page", pp->pr_wchan); 1280 #endif 1281 1282 /* 1283 * Insert page header. 1284 */ 1285 LIST_INSERT_HEAD(&pp->pr_emptypages, ph, ph_pagelist); 1286 TAILQ_INIT(&ph->ph_itemlist); 1287 ph->ph_page = storage; 1288 ph->ph_nmissing = 0; 1289 s = splclock(); 1290 ph->ph_time = mono_time; 1291 splx(s); 1292 if ((pp->pr_roflags & PR_PHINPAGE) == 0) 1293 SPLAY_INSERT(phtree, &pp->pr_phtree, ph); 1294 1295 pp->pr_nidle++; 1296 1297 /* 1298 * Color this page. 1299 */ 1300 cp = (caddr_t)(cp + pp->pr_curcolor); 1301 if ((pp->pr_curcolor += align) > pp->pr_maxcolor) 1302 pp->pr_curcolor = 0; 1303 1304 /* 1305 * Adjust storage to apply aligment to `pr_itemoffset' in each item. 1306 */ 1307 if (ioff != 0) 1308 cp = (caddr_t)(cp + (align - ioff)); 1309 1310 /* 1311 * Insert remaining chunks on the bucket list. 1312 */ 1313 n = pp->pr_itemsperpage; 1314 pp->pr_nitems += n; 1315 1316 if (pp->pr_roflags & PR_NOTOUCH) { 1317 pool_item_freelist_t *freelist = PR_FREELIST(ph); 1318 int i; 1319 1320 ph->ph_off = cp - storage; 1321 ph->ph_firstfree = 0; 1322 for (i = 0; i < n - 1; i++) 1323 freelist[i] = i + 1; 1324 freelist[n - 1] = PR_INDEX_EOL; 1325 } else { 1326 while (n--) { 1327 pi = (struct pool_item *)cp; 1328 1329 KASSERT(((((vaddr_t)pi) + ioff) & (align - 1)) == 0); 1330 1331 /* Insert on page list */ 1332 TAILQ_INSERT_TAIL(&ph->ph_itemlist, pi, pi_list); 1333 #ifdef DIAGNOSTIC 1334 pi->pi_magic = PI_MAGIC; 1335 #endif 1336 cp = (caddr_t)(cp + pp->pr_size); 1337 } 1338 } 1339 1340 /* 1341 * If the pool was depleted, point at the new page. 1342 */ 1343 if (pp->pr_curpage == NULL) 1344 pp->pr_curpage = ph; 1345 1346 if (++pp->pr_npages > pp->pr_hiwat) 1347 pp->pr_hiwat = pp->pr_npages; 1348 } 1349 1350 /* 1351 * Used by pool_get() when nitems drops below the low water mark. This 1352 * is used to catch up pr_nitems with the low water mark. 1353 * 1354 * Note 1, we never wait for memory here, we let the caller decide what to do. 1355 * 1356 * Note 2, we must be called with the pool already locked, and we return 1357 * with it locked. 1358 */ 1359 static int 1360 pool_catchup(struct pool *pp) 1361 { 1362 struct pool_item_header *ph = NULL; 1363 caddr_t cp; 1364 int error = 0; 1365 1366 while (POOL_NEEDS_CATCHUP(pp)) { 1367 /* 1368 * Call the page back-end allocator for more memory. 1369 * 1370 * XXX: We never wait, so should we bother unlocking 1371 * the pool descriptor? 1372 */ 1373 simple_unlock(&pp->pr_slock); 1374 cp = pool_allocator_alloc(pp, PR_NOWAIT); 1375 if (__predict_true(cp != NULL)) 1376 ph = pool_alloc_item_header(pp, cp, PR_NOWAIT); 1377 if (__predict_false(cp == NULL || ph == NULL)) { 1378 if (cp != NULL) 1379 pool_allocator_free(pp, cp); 1380 error = ENOMEM; 1381 simple_lock(&pp->pr_slock); 1382 break; 1383 } 1384 simple_lock(&pp->pr_slock); 1385 pool_prime_page(pp, cp, ph); 1386 pp->pr_npagealloc++; 1387 } 1388 1389 return (error); 1390 } 1391 1392 static void 1393 pool_update_curpage(struct pool *pp) 1394 { 1395 1396 pp->pr_curpage = LIST_FIRST(&pp->pr_partpages); 1397 if (pp->pr_curpage == NULL) { 1398 pp->pr_curpage = LIST_FIRST(&pp->pr_emptypages); 1399 } 1400 } 1401 1402 void 1403 pool_setlowat(struct pool *pp, int n) 1404 { 1405 1406 simple_lock(&pp->pr_slock); 1407 1408 pp->pr_minitems = n; 1409 pp->pr_minpages = (n == 0) 1410 ? 0 1411 : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 1412 1413 /* Make sure we're caught up with the newly-set low water mark. */ 1414 if (POOL_NEEDS_CATCHUP(pp) && pool_catchup(pp) != 0) { 1415 /* 1416 * XXX: Should we log a warning? Should we set up a timeout 1417 * to try again in a second or so? The latter could break 1418 * a caller's assumptions about interrupt protection, etc. 1419 */ 1420 } 1421 1422 simple_unlock(&pp->pr_slock); 1423 } 1424 1425 void 1426 pool_sethiwat(struct pool *pp, int n) 1427 { 1428 1429 simple_lock(&pp->pr_slock); 1430 1431 pp->pr_maxpages = (n == 0) 1432 ? 0 1433 : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 1434 1435 simple_unlock(&pp->pr_slock); 1436 } 1437 1438 void 1439 pool_sethardlimit(struct pool *pp, int n, const char *warnmess, int ratecap) 1440 { 1441 1442 simple_lock(&pp->pr_slock); 1443 1444 pp->pr_hardlimit = n; 1445 pp->pr_hardlimit_warning = warnmess; 1446 pp->pr_hardlimit_ratecap.tv_sec = ratecap; 1447 pp->pr_hardlimit_warning_last.tv_sec = 0; 1448 pp->pr_hardlimit_warning_last.tv_usec = 0; 1449 1450 /* 1451 * In-line version of pool_sethiwat(), because we don't want to 1452 * release the lock. 1453 */ 1454 pp->pr_maxpages = (n == 0) 1455 ? 0 1456 : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 1457 1458 simple_unlock(&pp->pr_slock); 1459 } 1460 1461 /* 1462 * Release all complete pages that have not been used recently. 1463 */ 1464 int 1465 #ifdef POOL_DIAGNOSTIC 1466 _pool_reclaim(struct pool *pp, const char *file, long line) 1467 #else 1468 pool_reclaim(struct pool *pp) 1469 #endif 1470 { 1471 struct pool_item_header *ph, *phnext; 1472 struct pool_cache *pc; 1473 struct timeval curtime; 1474 struct pool_pagelist pq; 1475 struct timeval diff; 1476 int s; 1477 1478 if (pp->pr_drain_hook != NULL) { 1479 /* 1480 * The drain hook must be called with the pool unlocked. 1481 */ 1482 (*pp->pr_drain_hook)(pp->pr_drain_hook_arg, PR_NOWAIT); 1483 } 1484 1485 if (simple_lock_try(&pp->pr_slock) == 0) 1486 return (0); 1487 pr_enter(pp, file, line); 1488 1489 LIST_INIT(&pq); 1490 1491 /* 1492 * Reclaim items from the pool's caches. 1493 */ 1494 TAILQ_FOREACH(pc, &pp->pr_cachelist, pc_poollist) 1495 pool_cache_reclaim(pc, &pq); 1496 1497 s = splclock(); 1498 curtime = mono_time; 1499 splx(s); 1500 1501 for (ph = LIST_FIRST(&pp->pr_emptypages); ph != NULL; ph = phnext) { 1502 phnext = LIST_NEXT(ph, ph_pagelist); 1503 1504 /* Check our minimum page claim */ 1505 if (pp->pr_npages <= pp->pr_minpages) 1506 break; 1507 1508 KASSERT(ph->ph_nmissing == 0); 1509 timersub(&curtime, &ph->ph_time, &diff); 1510 if (diff.tv_sec < pool_inactive_time) 1511 continue; 1512 1513 /* 1514 * If freeing this page would put us below 1515 * the low water mark, stop now. 1516 */ 1517 if ((pp->pr_nitems - pp->pr_itemsperpage) < 1518 pp->pr_minitems) 1519 break; 1520 1521 pr_rmpage(pp, ph, &pq); 1522 } 1523 1524 pr_leave(pp); 1525 simple_unlock(&pp->pr_slock); 1526 if (LIST_EMPTY(&pq)) 1527 return (0); 1528 1529 pr_pagelist_free(pp, &pq); 1530 return (1); 1531 } 1532 1533 /* 1534 * Drain pools, one at a time. 1535 * 1536 * Note, we must never be called from an interrupt context. 1537 */ 1538 void 1539 pool_drain(void *arg) 1540 { 1541 struct pool *pp; 1542 int s; 1543 1544 pp = NULL; 1545 s = splvm(); 1546 simple_lock(&pool_head_slock); 1547 if (drainpp == NULL) { 1548 drainpp = TAILQ_FIRST(&pool_head); 1549 } 1550 if (drainpp) { 1551 pp = drainpp; 1552 drainpp = TAILQ_NEXT(pp, pr_poollist); 1553 } 1554 simple_unlock(&pool_head_slock); 1555 pool_reclaim(pp); 1556 splx(s); 1557 } 1558 1559 /* 1560 * Diagnostic helpers. 1561 */ 1562 void 1563 pool_print(struct pool *pp, const char *modif) 1564 { 1565 int s; 1566 1567 s = splvm(); 1568 if (simple_lock_try(&pp->pr_slock) == 0) { 1569 printf("pool %s is locked; try again later\n", 1570 pp->pr_wchan); 1571 splx(s); 1572 return; 1573 } 1574 pool_print1(pp, modif, printf); 1575 simple_unlock(&pp->pr_slock); 1576 splx(s); 1577 } 1578 1579 void 1580 pool_printit(struct pool *pp, const char *modif, void (*pr)(const char *, ...)) 1581 { 1582 int didlock = 0; 1583 1584 if (pp == NULL) { 1585 (*pr)("Must specify a pool to print.\n"); 1586 return; 1587 } 1588 1589 /* 1590 * Called from DDB; interrupts should be blocked, and all 1591 * other processors should be paused. We can skip locking 1592 * the pool in this case. 1593 * 1594 * We do a simple_lock_try() just to print the lock 1595 * status, however. 1596 */ 1597 1598 if (simple_lock_try(&pp->pr_slock) == 0) 1599 (*pr)("WARNING: pool %s is locked\n", pp->pr_wchan); 1600 else 1601 didlock = 1; 1602 1603 pool_print1(pp, modif, pr); 1604 1605 if (didlock) 1606 simple_unlock(&pp->pr_slock); 1607 } 1608 1609 static void 1610 pool_print_pagelist(struct pool *pp, struct pool_pagelist *pl, 1611 void (*pr)(const char *, ...)) 1612 { 1613 struct pool_item_header *ph; 1614 #ifdef DIAGNOSTIC 1615 struct pool_item *pi; 1616 #endif 1617 1618 LIST_FOREACH(ph, pl, ph_pagelist) { 1619 (*pr)("\t\tpage %p, nmissing %d, time %lu,%lu\n", 1620 ph->ph_page, ph->ph_nmissing, 1621 (u_long)ph->ph_time.tv_sec, 1622 (u_long)ph->ph_time.tv_usec); 1623 #ifdef DIAGNOSTIC 1624 if (!(pp->pr_roflags & PR_NOTOUCH)) { 1625 TAILQ_FOREACH(pi, &ph->ph_itemlist, pi_list) { 1626 if (pi->pi_magic != PI_MAGIC) { 1627 (*pr)("\t\t\titem %p, magic 0x%x\n", 1628 pi, pi->pi_magic); 1629 } 1630 } 1631 } 1632 #endif 1633 } 1634 } 1635 1636 static void 1637 pool_print1(struct pool *pp, const char *modif, void (*pr)(const char *, ...)) 1638 { 1639 struct pool_item_header *ph; 1640 struct pool_cache *pc; 1641 struct pool_cache_group *pcg; 1642 int i, print_log = 0, print_pagelist = 0, print_cache = 0; 1643 char c; 1644 1645 while ((c = *modif++) != '\0') { 1646 if (c == 'l') 1647 print_log = 1; 1648 if (c == 'p') 1649 print_pagelist = 1; 1650 if (c == 'c') 1651 print_cache = 1; 1652 } 1653 1654 (*pr)("POOL %s: size %u, align %u, ioff %u, roflags 0x%08x\n", 1655 pp->pr_wchan, pp->pr_size, pp->pr_align, pp->pr_itemoffset, 1656 pp->pr_roflags); 1657 (*pr)("\talloc %p\n", pp->pr_alloc); 1658 (*pr)("\tminitems %u, minpages %u, maxpages %u, npages %u\n", 1659 pp->pr_minitems, pp->pr_minpages, pp->pr_maxpages, pp->pr_npages); 1660 (*pr)("\titemsperpage %u, nitems %u, nout %u, hardlimit %u\n", 1661 pp->pr_itemsperpage, pp->pr_nitems, pp->pr_nout, pp->pr_hardlimit); 1662 1663 (*pr)("\n\tnget %lu, nfail %lu, nput %lu\n", 1664 pp->pr_nget, pp->pr_nfail, pp->pr_nput); 1665 (*pr)("\tnpagealloc %lu, npagefree %lu, hiwat %u, nidle %lu\n", 1666 pp->pr_npagealloc, pp->pr_npagefree, pp->pr_hiwat, pp->pr_nidle); 1667 1668 if (print_pagelist == 0) 1669 goto skip_pagelist; 1670 1671 if ((ph = LIST_FIRST(&pp->pr_emptypages)) != NULL) 1672 (*pr)("\n\tempty page list:\n"); 1673 pool_print_pagelist(pp, &pp->pr_emptypages, pr); 1674 if ((ph = LIST_FIRST(&pp->pr_fullpages)) != NULL) 1675 (*pr)("\n\tfull page list:\n"); 1676 pool_print_pagelist(pp, &pp->pr_fullpages, pr); 1677 if ((ph = LIST_FIRST(&pp->pr_partpages)) != NULL) 1678 (*pr)("\n\tpartial-page list:\n"); 1679 pool_print_pagelist(pp, &pp->pr_partpages, pr); 1680 1681 if (pp->pr_curpage == NULL) 1682 (*pr)("\tno current page\n"); 1683 else 1684 (*pr)("\tcurpage %p\n", pp->pr_curpage->ph_page); 1685 1686 skip_pagelist: 1687 if (print_log == 0) 1688 goto skip_log; 1689 1690 (*pr)("\n"); 1691 if ((pp->pr_roflags & PR_LOGGING) == 0) 1692 (*pr)("\tno log\n"); 1693 else 1694 pr_printlog(pp, NULL, pr); 1695 1696 skip_log: 1697 if (print_cache == 0) 1698 goto skip_cache; 1699 1700 TAILQ_FOREACH(pc, &pp->pr_cachelist, pc_poollist) { 1701 (*pr)("\tcache %p: allocfrom %p freeto %p\n", pc, 1702 pc->pc_allocfrom, pc->pc_freeto); 1703 (*pr)("\t hits %lu misses %lu ngroups %lu nitems %lu\n", 1704 pc->pc_hits, pc->pc_misses, pc->pc_ngroups, pc->pc_nitems); 1705 TAILQ_FOREACH(pcg, &pc->pc_grouplist, pcg_list) { 1706 (*pr)("\t\tgroup %p: avail %d\n", pcg, pcg->pcg_avail); 1707 for (i = 0; i < PCG_NOBJECTS; i++) { 1708 if (pcg->pcg_objects[i].pcgo_pa != 1709 POOL_PADDR_INVALID) { 1710 (*pr)("\t\t\t%p, 0x%llx\n", 1711 pcg->pcg_objects[i].pcgo_va, 1712 (unsigned long long) 1713 pcg->pcg_objects[i].pcgo_pa); 1714 } else { 1715 (*pr)("\t\t\t%p\n", 1716 pcg->pcg_objects[i].pcgo_va); 1717 } 1718 } 1719 } 1720 } 1721 1722 skip_cache: 1723 pr_enter_check(pp, pr); 1724 } 1725 1726 static int 1727 pool_chk_page(struct pool *pp, const char *label, struct pool_item_header *ph) 1728 { 1729 struct pool_item *pi; 1730 caddr_t page; 1731 int n; 1732 1733 page = (caddr_t)((u_long)ph & pp->pr_alloc->pa_pagemask); 1734 if (page != ph->ph_page && 1735 (pp->pr_roflags & PR_PHINPAGE) != 0) { 1736 if (label != NULL) 1737 printf("%s: ", label); 1738 printf("pool(%p:%s): page inconsistency: page %p;" 1739 " at page head addr %p (p %p)\n", pp, 1740 pp->pr_wchan, ph->ph_page, 1741 ph, page); 1742 return 1; 1743 } 1744 1745 if ((pp->pr_roflags & PR_NOTOUCH) != 0) 1746 return 0; 1747 1748 for (pi = TAILQ_FIRST(&ph->ph_itemlist), n = 0; 1749 pi != NULL; 1750 pi = TAILQ_NEXT(pi,pi_list), n++) { 1751 1752 #ifdef DIAGNOSTIC 1753 if (pi->pi_magic != PI_MAGIC) { 1754 if (label != NULL) 1755 printf("%s: ", label); 1756 printf("pool(%s): free list modified: magic=%x;" 1757 " page %p; item ordinal %d;" 1758 " addr %p (p %p)\n", 1759 pp->pr_wchan, pi->pi_magic, ph->ph_page, 1760 n, pi, page); 1761 panic("pool"); 1762 } 1763 #endif 1764 page = 1765 (caddr_t)((u_long)pi & pp->pr_alloc->pa_pagemask); 1766 if (page == ph->ph_page) 1767 continue; 1768 1769 if (label != NULL) 1770 printf("%s: ", label); 1771 printf("pool(%p:%s): page inconsistency: page %p;" 1772 " item ordinal %d; addr %p (p %p)\n", pp, 1773 pp->pr_wchan, ph->ph_page, 1774 n, pi, page); 1775 return 1; 1776 } 1777 return 0; 1778 } 1779 1780 1781 int 1782 pool_chk(struct pool *pp, const char *label) 1783 { 1784 struct pool_item_header *ph; 1785 int r = 0; 1786 1787 simple_lock(&pp->pr_slock); 1788 LIST_FOREACH(ph, &pp->pr_emptypages, ph_pagelist) { 1789 r = pool_chk_page(pp, label, ph); 1790 if (r) { 1791 goto out; 1792 } 1793 } 1794 LIST_FOREACH(ph, &pp->pr_fullpages, ph_pagelist) { 1795 r = pool_chk_page(pp, label, ph); 1796 if (r) { 1797 goto out; 1798 } 1799 } 1800 LIST_FOREACH(ph, &pp->pr_partpages, ph_pagelist) { 1801 r = pool_chk_page(pp, label, ph); 1802 if (r) { 1803 goto out; 1804 } 1805 } 1806 1807 out: 1808 simple_unlock(&pp->pr_slock); 1809 return (r); 1810 } 1811 1812 /* 1813 * pool_cache_init: 1814 * 1815 * Initialize a pool cache. 1816 * 1817 * NOTE: If the pool must be protected from interrupts, we expect 1818 * to be called at the appropriate interrupt priority level. 1819 */ 1820 void 1821 pool_cache_init(struct pool_cache *pc, struct pool *pp, 1822 int (*ctor)(void *, void *, int), 1823 void (*dtor)(void *, void *), 1824 void *arg) 1825 { 1826 1827 TAILQ_INIT(&pc->pc_grouplist); 1828 simple_lock_init(&pc->pc_slock); 1829 1830 pc->pc_allocfrom = NULL; 1831 pc->pc_freeto = NULL; 1832 pc->pc_pool = pp; 1833 1834 pc->pc_ctor = ctor; 1835 pc->pc_dtor = dtor; 1836 pc->pc_arg = arg; 1837 1838 pc->pc_hits = 0; 1839 pc->pc_misses = 0; 1840 1841 pc->pc_ngroups = 0; 1842 1843 pc->pc_nitems = 0; 1844 1845 simple_lock(&pp->pr_slock); 1846 TAILQ_INSERT_TAIL(&pp->pr_cachelist, pc, pc_poollist); 1847 simple_unlock(&pp->pr_slock); 1848 } 1849 1850 /* 1851 * pool_cache_destroy: 1852 * 1853 * Destroy a pool cache. 1854 */ 1855 void 1856 pool_cache_destroy(struct pool_cache *pc) 1857 { 1858 struct pool *pp = pc->pc_pool; 1859 1860 /* First, invalidate the entire cache. */ 1861 pool_cache_invalidate(pc); 1862 1863 /* ...and remove it from the pool's cache list. */ 1864 simple_lock(&pp->pr_slock); 1865 TAILQ_REMOVE(&pp->pr_cachelist, pc, pc_poollist); 1866 simple_unlock(&pp->pr_slock); 1867 } 1868 1869 static __inline void * 1870 pcg_get(struct pool_cache_group *pcg, paddr_t *pap) 1871 { 1872 void *object; 1873 u_int idx; 1874 1875 KASSERT(pcg->pcg_avail <= PCG_NOBJECTS); 1876 KASSERT(pcg->pcg_avail != 0); 1877 idx = --pcg->pcg_avail; 1878 1879 KASSERT(pcg->pcg_objects[idx].pcgo_va != NULL); 1880 object = pcg->pcg_objects[idx].pcgo_va; 1881 if (pap != NULL) 1882 *pap = pcg->pcg_objects[idx].pcgo_pa; 1883 pcg->pcg_objects[idx].pcgo_va = NULL; 1884 1885 return (object); 1886 } 1887 1888 static __inline void 1889 pcg_put(struct pool_cache_group *pcg, void *object, paddr_t pa) 1890 { 1891 u_int idx; 1892 1893 KASSERT(pcg->pcg_avail < PCG_NOBJECTS); 1894 idx = pcg->pcg_avail++; 1895 1896 KASSERT(pcg->pcg_objects[idx].pcgo_va == NULL); 1897 pcg->pcg_objects[idx].pcgo_va = object; 1898 pcg->pcg_objects[idx].pcgo_pa = pa; 1899 } 1900 1901 /* 1902 * pool_cache_get{,_paddr}: 1903 * 1904 * Get an object from a pool cache (optionally returning 1905 * the physical address of the object). 1906 */ 1907 void * 1908 pool_cache_get_paddr(struct pool_cache *pc, int flags, paddr_t *pap) 1909 { 1910 struct pool_cache_group *pcg; 1911 void *object; 1912 1913 #ifdef LOCKDEBUG 1914 if (flags & PR_WAITOK) 1915 simple_lock_only_held(NULL, "pool_cache_get(PR_WAITOK)"); 1916 #endif 1917 1918 simple_lock(&pc->pc_slock); 1919 1920 if ((pcg = pc->pc_allocfrom) == NULL) { 1921 TAILQ_FOREACH(pcg, &pc->pc_grouplist, pcg_list) { 1922 if (pcg->pcg_avail != 0) { 1923 pc->pc_allocfrom = pcg; 1924 goto have_group; 1925 } 1926 } 1927 1928 /* 1929 * No groups with any available objects. Allocate 1930 * a new object, construct it, and return it to 1931 * the caller. We will allocate a group, if necessary, 1932 * when the object is freed back to the cache. 1933 */ 1934 pc->pc_misses++; 1935 simple_unlock(&pc->pc_slock); 1936 object = pool_get(pc->pc_pool, flags); 1937 if (object != NULL && pc->pc_ctor != NULL) { 1938 if ((*pc->pc_ctor)(pc->pc_arg, object, flags) != 0) { 1939 pool_put(pc->pc_pool, object); 1940 return (NULL); 1941 } 1942 } 1943 if (object != NULL && pap != NULL) { 1944 #ifdef POOL_VTOPHYS 1945 *pap = POOL_VTOPHYS(object); 1946 #else 1947 *pap = POOL_PADDR_INVALID; 1948 #endif 1949 } 1950 return (object); 1951 } 1952 1953 have_group: 1954 pc->pc_hits++; 1955 pc->pc_nitems--; 1956 object = pcg_get(pcg, pap); 1957 1958 if (pcg->pcg_avail == 0) 1959 pc->pc_allocfrom = NULL; 1960 1961 simple_unlock(&pc->pc_slock); 1962 1963 return (object); 1964 } 1965 1966 /* 1967 * pool_cache_put{,_paddr}: 1968 * 1969 * Put an object back to the pool cache (optionally caching the 1970 * physical address of the object). 1971 */ 1972 void 1973 pool_cache_put_paddr(struct pool_cache *pc, void *object, paddr_t pa) 1974 { 1975 struct pool_cache_group *pcg; 1976 int s; 1977 1978 simple_lock(&pc->pc_slock); 1979 1980 if ((pcg = pc->pc_freeto) == NULL) { 1981 TAILQ_FOREACH(pcg, &pc->pc_grouplist, pcg_list) { 1982 if (pcg->pcg_avail != PCG_NOBJECTS) { 1983 pc->pc_freeto = pcg; 1984 goto have_group; 1985 } 1986 } 1987 1988 /* 1989 * No empty groups to free the object to. Attempt to 1990 * allocate one. 1991 */ 1992 simple_unlock(&pc->pc_slock); 1993 s = splvm(); 1994 pcg = pool_get(&pcgpool, PR_NOWAIT); 1995 splx(s); 1996 if (pcg != NULL) { 1997 memset(pcg, 0, sizeof(*pcg)); 1998 simple_lock(&pc->pc_slock); 1999 pc->pc_ngroups++; 2000 TAILQ_INSERT_TAIL(&pc->pc_grouplist, pcg, pcg_list); 2001 if (pc->pc_freeto == NULL) 2002 pc->pc_freeto = pcg; 2003 goto have_group; 2004 } 2005 2006 /* 2007 * Unable to allocate a cache group; destruct the object 2008 * and free it back to the pool. 2009 */ 2010 pool_cache_destruct_object(pc, object); 2011 return; 2012 } 2013 2014 have_group: 2015 pc->pc_nitems++; 2016 pcg_put(pcg, object, pa); 2017 2018 if (pcg->pcg_avail == PCG_NOBJECTS) 2019 pc->pc_freeto = NULL; 2020 2021 simple_unlock(&pc->pc_slock); 2022 } 2023 2024 /* 2025 * pool_cache_destruct_object: 2026 * 2027 * Force destruction of an object and its release back into 2028 * the pool. 2029 */ 2030 void 2031 pool_cache_destruct_object(struct pool_cache *pc, void *object) 2032 { 2033 2034 if (pc->pc_dtor != NULL) 2035 (*pc->pc_dtor)(pc->pc_arg, object); 2036 pool_put(pc->pc_pool, object); 2037 } 2038 2039 /* 2040 * pool_cache_invalidate: 2041 * 2042 * Invalidate a pool cache (destruct and release all of the 2043 * cached objects). 2044 */ 2045 void 2046 pool_cache_invalidate(struct pool_cache *pc) 2047 { 2048 struct pool_pagelist pq; 2049 struct pool_cache_group *pcg, *npcg; 2050 void *object; 2051 2052 LIST_INIT(&pq); 2053 2054 simple_lock(&pc->pc_slock); 2055 simple_lock(&pc->pc_pool->pr_slock); 2056 2057 for (pcg = TAILQ_FIRST(&pc->pc_grouplist); pcg != NULL; 2058 pcg = npcg) { 2059 npcg = TAILQ_NEXT(pcg, pcg_list); 2060 while (pcg->pcg_avail != 0) { 2061 pc->pc_nitems--; 2062 object = pcg_get(pcg, NULL); 2063 if (pcg->pcg_avail == 0 && pc->pc_allocfrom == pcg) 2064 pc->pc_allocfrom = NULL; 2065 if (pc->pc_dtor != NULL) 2066 (*pc->pc_dtor)(pc->pc_arg, object); 2067 pool_do_put(pc->pc_pool, object, &pq); 2068 } 2069 } 2070 2071 simple_unlock(&pc->pc_pool->pr_slock); 2072 simple_unlock(&pc->pc_slock); 2073 2074 if (! LIST_EMPTY(&pq)) 2075 pr_pagelist_free(pc->pc_pool, &pq); 2076 } 2077 2078 /* 2079 * pool_cache_reclaim: 2080 * 2081 * Reclaim a pool cache for pool_reclaim(). 2082 */ 2083 static void 2084 pool_cache_reclaim(struct pool_cache *pc, struct pool_pagelist *pq) 2085 { 2086 struct pool_cache_group *pcg, *npcg; 2087 void *object; 2088 int s; 2089 2090 /* 2091 * We're locking in the wrong order (normally pool_cache -> pool, 2092 * but the pool is already locked when we get here), so we have 2093 * to use trylock. If we can't lock the pool_cache, it's not really 2094 * a big deal here. 2095 */ 2096 if (simple_lock_try(&pc->pc_slock) == 0) 2097 return; 2098 2099 for (pcg = TAILQ_FIRST(&pc->pc_grouplist); pcg != NULL; 2100 pcg = npcg) { 2101 npcg = TAILQ_NEXT(pcg, pcg_list); 2102 while (pcg->pcg_avail != 0) { 2103 pc->pc_nitems--; 2104 object = pcg_get(pcg, NULL); 2105 if (pcg->pcg_avail == 0 && pc->pc_allocfrom == pcg) 2106 pc->pc_allocfrom = NULL; 2107 if (pc->pc_dtor != NULL) 2108 (*pc->pc_dtor)(pc->pc_arg, object); 2109 pool_do_put(pc->pc_pool, object, pq); 2110 } 2111 pc->pc_ngroups--; 2112 TAILQ_REMOVE(&pc->pc_grouplist, pcg, pcg_list); 2113 if (pc->pc_freeto == pcg) 2114 pc->pc_freeto = NULL; 2115 s = splvm(); 2116 pool_put(&pcgpool, pcg); 2117 splx(s); 2118 } 2119 2120 simple_unlock(&pc->pc_slock); 2121 } 2122 2123 /* 2124 * Pool backend allocators. 2125 * 2126 * Each pool has a backend allocator that handles allocation, deallocation, 2127 * and any additional draining that might be needed. 2128 * 2129 * We provide two standard allocators: 2130 * 2131 * pool_allocator_kmem - the default when no allocator is specified 2132 * 2133 * pool_allocator_nointr - used for pools that will not be accessed 2134 * in interrupt context. 2135 */ 2136 void *pool_page_alloc(struct pool *, int); 2137 void pool_page_free(struct pool *, void *); 2138 2139 struct pool_allocator pool_allocator_kmem = { 2140 pool_page_alloc, pool_page_free, 0, 2141 }; 2142 2143 void *pool_page_alloc_nointr(struct pool *, int); 2144 void pool_page_free_nointr(struct pool *, void *); 2145 2146 struct pool_allocator pool_allocator_nointr = { 2147 pool_page_alloc_nointr, pool_page_free_nointr, 0, 2148 }; 2149 2150 #ifdef POOL_SUBPAGE 2151 void *pool_subpage_alloc(struct pool *, int); 2152 void pool_subpage_free(struct pool *, void *); 2153 2154 struct pool_allocator pool_allocator_kmem_subpage = { 2155 pool_subpage_alloc, pool_subpage_free, 0, 2156 }; 2157 #endif /* POOL_SUBPAGE */ 2158 2159 /* 2160 * We have at least three different resources for the same allocation and 2161 * each resource can be depleted. First, we have the ready elements in the 2162 * pool. Then we have the resource (typically a vm_map) for this allocator. 2163 * Finally, we have physical memory. Waiting for any of these can be 2164 * unnecessary when any other is freed, but the kernel doesn't support 2165 * sleeping on multiple wait channels, so we have to employ another strategy. 2166 * 2167 * The caller sleeps on the pool (so that it can be awakened when an item 2168 * is returned to the pool), but we set PA_WANT on the allocator. When a 2169 * page is returned to the allocator and PA_WANT is set, pool_allocator_free 2170 * will wake up all sleeping pools belonging to this allocator. 2171 * 2172 * XXX Thundering herd. 2173 */ 2174 void * 2175 pool_allocator_alloc(struct pool *org, int flags) 2176 { 2177 struct pool_allocator *pa = org->pr_alloc; 2178 struct pool *pp, *start; 2179 int s, freed; 2180 void *res; 2181 2182 LOCK_ASSERT(!simple_lock_held(&org->pr_slock)); 2183 2184 do { 2185 if ((res = (*pa->pa_alloc)(org, flags)) != NULL) 2186 return (res); 2187 if ((flags & PR_WAITOK) == 0) { 2188 /* 2189 * We only run the drain hookhere if PR_NOWAIT. 2190 * In other cases, the hook will be run in 2191 * pool_reclaim(). 2192 */ 2193 if (org->pr_drain_hook != NULL) { 2194 (*org->pr_drain_hook)(org->pr_drain_hook_arg, 2195 flags); 2196 if ((res = (*pa->pa_alloc)(org, flags)) != NULL) 2197 return (res); 2198 } 2199 break; 2200 } 2201 2202 /* 2203 * Drain all pools, except "org", that use this 2204 * allocator. We do this to reclaim VA space. 2205 * pa_alloc is responsible for waiting for 2206 * physical memory. 2207 * 2208 * XXX We risk looping forever if start if someone 2209 * calls pool_destroy on "start". But there is no 2210 * other way to have potentially sleeping pool_reclaim, 2211 * non-sleeping locks on pool_allocator, and some 2212 * stirring of drained pools in the allocator. 2213 * 2214 * XXX Maybe we should use pool_head_slock for locking 2215 * the allocators? 2216 */ 2217 freed = 0; 2218 2219 s = splvm(); 2220 simple_lock(&pa->pa_slock); 2221 pp = start = TAILQ_FIRST(&pa->pa_list); 2222 do { 2223 TAILQ_REMOVE(&pa->pa_list, pp, pr_alloc_list); 2224 TAILQ_INSERT_TAIL(&pa->pa_list, pp, pr_alloc_list); 2225 if (pp == org) 2226 continue; 2227 simple_unlock(&pa->pa_slock); 2228 freed = pool_reclaim(pp); 2229 simple_lock(&pa->pa_slock); 2230 } while ((pp = TAILQ_FIRST(&pa->pa_list)) != start && 2231 freed == 0); 2232 2233 if (freed == 0) { 2234 /* 2235 * We set PA_WANT here, the caller will most likely 2236 * sleep waiting for pages (if not, this won't hurt 2237 * that much), and there is no way to set this in 2238 * the caller without violating locking order. 2239 */ 2240 pa->pa_flags |= PA_WANT; 2241 } 2242 simple_unlock(&pa->pa_slock); 2243 splx(s); 2244 } while (freed); 2245 return (NULL); 2246 } 2247 2248 void 2249 pool_allocator_free(struct pool *pp, void *v) 2250 { 2251 struct pool_allocator *pa = pp->pr_alloc; 2252 int s; 2253 2254 LOCK_ASSERT(!simple_lock_held(&pp->pr_slock)); 2255 2256 (*pa->pa_free)(pp, v); 2257 2258 s = splvm(); 2259 simple_lock(&pa->pa_slock); 2260 if ((pa->pa_flags & PA_WANT) == 0) { 2261 simple_unlock(&pa->pa_slock); 2262 splx(s); 2263 return; 2264 } 2265 2266 TAILQ_FOREACH(pp, &pa->pa_list, pr_alloc_list) { 2267 simple_lock(&pp->pr_slock); 2268 if ((pp->pr_flags & PR_WANTED) != 0) { 2269 pp->pr_flags &= ~PR_WANTED; 2270 wakeup(pp); 2271 } 2272 simple_unlock(&pp->pr_slock); 2273 } 2274 pa->pa_flags &= ~PA_WANT; 2275 simple_unlock(&pa->pa_slock); 2276 splx(s); 2277 } 2278 2279 void * 2280 pool_page_alloc(struct pool *pp, int flags) 2281 { 2282 boolean_t waitok = (flags & PR_WAITOK) ? TRUE : FALSE; 2283 2284 return ((void *) uvm_km_alloc_poolpage_cache(kmem_map, waitok)); 2285 } 2286 2287 void 2288 pool_page_free(struct pool *pp, void *v) 2289 { 2290 2291 uvm_km_free_poolpage_cache(kmem_map, (vaddr_t) v); 2292 } 2293 2294 static void * 2295 pool_page_alloc_meta(struct pool *pp, int flags) 2296 { 2297 boolean_t waitok = (flags & PR_WAITOK) ? TRUE : FALSE; 2298 2299 return ((void *) uvm_km_alloc_poolpage(kmem_map, waitok)); 2300 } 2301 2302 static void 2303 pool_page_free_meta(struct pool *pp, void *v) 2304 { 2305 2306 uvm_km_free_poolpage(kmem_map, (vaddr_t) v); 2307 } 2308 2309 #ifdef POOL_SUBPAGE 2310 /* Sub-page allocator, for machines with large hardware pages. */ 2311 void * 2312 pool_subpage_alloc(struct pool *pp, int flags) 2313 { 2314 void *v; 2315 int s; 2316 s = splvm(); 2317 v = pool_get(&psppool, flags); 2318 splx(s); 2319 return v; 2320 } 2321 2322 void 2323 pool_subpage_free(struct pool *pp, void *v) 2324 { 2325 int s; 2326 s = splvm(); 2327 pool_put(&psppool, v); 2328 splx(s); 2329 } 2330 2331 /* We don't provide a real nointr allocator. Maybe later. */ 2332 void * 2333 pool_page_alloc_nointr(struct pool *pp, int flags) 2334 { 2335 2336 return (pool_subpage_alloc(pp, flags)); 2337 } 2338 2339 void 2340 pool_page_free_nointr(struct pool *pp, void *v) 2341 { 2342 2343 pool_subpage_free(pp, v); 2344 } 2345 #else 2346 void * 2347 pool_page_alloc_nointr(struct pool *pp, int flags) 2348 { 2349 boolean_t waitok = (flags & PR_WAITOK) ? TRUE : FALSE; 2350 2351 return ((void *) uvm_km_alloc_poolpage_cache(kernel_map, waitok)); 2352 } 2353 2354 void 2355 pool_page_free_nointr(struct pool *pp, void *v) 2356 { 2357 2358 uvm_km_free_poolpage_cache(kernel_map, (vaddr_t) v); 2359 } 2360 #endif /* POOL_SUBPAGE */ 2361