1 /* $NetBSD: subr_pool.c,v 1.100 2005/04/01 11:59:37 yamt Exp $ */ 2 3 /*- 4 * Copyright (c) 1997, 1999, 2000 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Paul Kranenburg; by Jason R. Thorpe of the Numerical Aerospace 9 * Simulation Facility, NASA Ames Research Center. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. All advertising materials mentioning features or use of this software 20 * must display the following acknowledgement: 21 * This product includes software developed by the NetBSD 22 * Foundation, Inc. and its contributors. 23 * 4. Neither the name of The NetBSD Foundation nor the names of its 24 * contributors may be used to endorse or promote products derived 25 * from this software without specific prior written permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 28 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 29 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 30 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 31 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 32 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 33 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 34 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 35 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 36 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 37 * POSSIBILITY OF SUCH DAMAGE. 38 */ 39 40 #include <sys/cdefs.h> 41 __KERNEL_RCSID(0, "$NetBSD: subr_pool.c,v 1.100 2005/04/01 11:59:37 yamt Exp $"); 42 43 #include "opt_pool.h" 44 #include "opt_poollog.h" 45 #include "opt_lockdebug.h" 46 47 #include <sys/param.h> 48 #include <sys/systm.h> 49 #include <sys/proc.h> 50 #include <sys/errno.h> 51 #include <sys/kernel.h> 52 #include <sys/malloc.h> 53 #include <sys/lock.h> 54 #include <sys/pool.h> 55 #include <sys/syslog.h> 56 57 #include <uvm/uvm.h> 58 59 /* 60 * Pool resource management utility. 61 * 62 * Memory is allocated in pages which are split into pieces according to 63 * the pool item size. Each page is kept on one of three lists in the 64 * pool structure: `pr_emptypages', `pr_fullpages' and `pr_partpages', 65 * for empty, full and partially-full pages respectively. The individual 66 * pool items are on a linked list headed by `ph_itemlist' in each page 67 * header. The memory for building the page list is either taken from 68 * the allocated pages themselves (for small pool items) or taken from 69 * an internal pool of page headers (`phpool'). 70 */ 71 72 /* List of all pools */ 73 TAILQ_HEAD(,pool) pool_head = TAILQ_HEAD_INITIALIZER(pool_head); 74 75 /* Private pool for page header structures */ 76 #define PHPOOL_MAX 8 77 static struct pool phpool[PHPOOL_MAX]; 78 #define PHPOOL_FREELIST_NELEM(idx) (((idx) == 0) ? 0 : (1 << (idx))) 79 80 #ifdef POOL_SUBPAGE 81 /* Pool of subpages for use by normal pools. */ 82 static struct pool psppool; 83 #endif 84 85 static void *pool_page_alloc_meta(struct pool *, int); 86 static void pool_page_free_meta(struct pool *, void *); 87 88 /* allocator for pool metadata */ 89 static struct pool_allocator pool_allocator_meta = { 90 pool_page_alloc_meta, pool_page_free_meta 91 }; 92 93 /* # of seconds to retain page after last use */ 94 int pool_inactive_time = 10; 95 96 /* Next candidate for drainage (see pool_drain()) */ 97 static struct pool *drainpp; 98 99 /* This spin lock protects both pool_head and drainpp. */ 100 struct simplelock pool_head_slock = SIMPLELOCK_INITIALIZER; 101 102 typedef uint8_t pool_item_freelist_t; 103 104 struct pool_item_header { 105 /* Page headers */ 106 LIST_ENTRY(pool_item_header) 107 ph_pagelist; /* pool page list */ 108 SPLAY_ENTRY(pool_item_header) 109 ph_node; /* Off-page page headers */ 110 caddr_t ph_page; /* this page's address */ 111 struct timeval ph_time; /* last referenced */ 112 union { 113 /* !PR_NOTOUCH */ 114 struct { 115 TAILQ_HEAD(, pool_item) 116 phu_itemlist; /* chunk list for this page */ 117 } phu_normal; 118 /* PR_NOTOUCH */ 119 struct { 120 uint16_t 121 phu_off; /* start offset in page */ 122 pool_item_freelist_t 123 phu_firstfree; /* first free item */ 124 /* 125 * XXX it might be better to use 126 * a simple bitmap and ffs(3) 127 */ 128 } phu_notouch; 129 } ph_u; 130 uint16_t ph_nmissing; /* # of chunks in use */ 131 }; 132 #define ph_itemlist ph_u.phu_normal.phu_itemlist 133 #define ph_off ph_u.phu_notouch.phu_off 134 #define ph_firstfree ph_u.phu_notouch.phu_firstfree 135 136 struct pool_item { 137 #ifdef DIAGNOSTIC 138 u_int pi_magic; 139 #endif 140 #define PI_MAGIC 0xdeadbeefU 141 /* Other entries use only this list entry */ 142 TAILQ_ENTRY(pool_item) pi_list; 143 }; 144 145 #define POOL_NEEDS_CATCHUP(pp) \ 146 ((pp)->pr_nitems < (pp)->pr_minitems) 147 148 /* 149 * Pool cache management. 150 * 151 * Pool caches provide a way for constructed objects to be cached by the 152 * pool subsystem. This can lead to performance improvements by avoiding 153 * needless object construction/destruction; it is deferred until absolutely 154 * necessary. 155 * 156 * Caches are grouped into cache groups. Each cache group references 157 * up to 16 constructed objects. When a cache allocates an object 158 * from the pool, it calls the object's constructor and places it into 159 * a cache group. When a cache group frees an object back to the pool, 160 * it first calls the object's destructor. This allows the object to 161 * persist in constructed form while freed to the cache. 162 * 163 * Multiple caches may exist for each pool. This allows a single 164 * object type to have multiple constructed forms. The pool references 165 * each cache, so that when a pool is drained by the pagedaemon, it can 166 * drain each individual cache as well. Each time a cache is drained, 167 * the most idle cache group is freed to the pool in its entirety. 168 * 169 * Pool caches are layed on top of pools. By layering them, we can avoid 170 * the complexity of cache management for pools which would not benefit 171 * from it. 172 */ 173 174 /* The cache group pool. */ 175 static struct pool pcgpool; 176 177 static void pool_cache_reclaim(struct pool_cache *); 178 179 static int pool_catchup(struct pool *); 180 static void pool_prime_page(struct pool *, caddr_t, 181 struct pool_item_header *); 182 static void pool_update_curpage(struct pool *); 183 184 void *pool_allocator_alloc(struct pool *, int); 185 void pool_allocator_free(struct pool *, void *); 186 187 static void pool_print_pagelist(struct pool *, struct pool_pagelist *, 188 void (*)(const char *, ...)); 189 static void pool_print1(struct pool *, const char *, 190 void (*)(const char *, ...)); 191 192 static int pool_chk_page(struct pool *, const char *, 193 struct pool_item_header *); 194 195 /* 196 * Pool log entry. An array of these is allocated in pool_init(). 197 */ 198 struct pool_log { 199 const char *pl_file; 200 long pl_line; 201 int pl_action; 202 #define PRLOG_GET 1 203 #define PRLOG_PUT 2 204 void *pl_addr; 205 }; 206 207 #ifdef POOL_DIAGNOSTIC 208 /* Number of entries in pool log buffers */ 209 #ifndef POOL_LOGSIZE 210 #define POOL_LOGSIZE 10 211 #endif 212 213 int pool_logsize = POOL_LOGSIZE; 214 215 static __inline void 216 pr_log(struct pool *pp, void *v, int action, const char *file, long line) 217 { 218 int n = pp->pr_curlogentry; 219 struct pool_log *pl; 220 221 if ((pp->pr_roflags & PR_LOGGING) == 0) 222 return; 223 224 /* 225 * Fill in the current entry. Wrap around and overwrite 226 * the oldest entry if necessary. 227 */ 228 pl = &pp->pr_log[n]; 229 pl->pl_file = file; 230 pl->pl_line = line; 231 pl->pl_action = action; 232 pl->pl_addr = v; 233 if (++n >= pp->pr_logsize) 234 n = 0; 235 pp->pr_curlogentry = n; 236 } 237 238 static void 239 pr_printlog(struct pool *pp, struct pool_item *pi, 240 void (*pr)(const char *, ...)) 241 { 242 int i = pp->pr_logsize; 243 int n = pp->pr_curlogentry; 244 245 if ((pp->pr_roflags & PR_LOGGING) == 0) 246 return; 247 248 /* 249 * Print all entries in this pool's log. 250 */ 251 while (i-- > 0) { 252 struct pool_log *pl = &pp->pr_log[n]; 253 if (pl->pl_action != 0) { 254 if (pi == NULL || pi == pl->pl_addr) { 255 (*pr)("\tlog entry %d:\n", i); 256 (*pr)("\t\taction = %s, addr = %p\n", 257 pl->pl_action == PRLOG_GET ? "get" : "put", 258 pl->pl_addr); 259 (*pr)("\t\tfile: %s at line %lu\n", 260 pl->pl_file, pl->pl_line); 261 } 262 } 263 if (++n >= pp->pr_logsize) 264 n = 0; 265 } 266 } 267 268 static __inline void 269 pr_enter(struct pool *pp, const char *file, long line) 270 { 271 272 if (__predict_false(pp->pr_entered_file != NULL)) { 273 printf("pool %s: reentrancy at file %s line %ld\n", 274 pp->pr_wchan, file, line); 275 printf(" previous entry at file %s line %ld\n", 276 pp->pr_entered_file, pp->pr_entered_line); 277 panic("pr_enter"); 278 } 279 280 pp->pr_entered_file = file; 281 pp->pr_entered_line = line; 282 } 283 284 static __inline void 285 pr_leave(struct pool *pp) 286 { 287 288 if (__predict_false(pp->pr_entered_file == NULL)) { 289 printf("pool %s not entered?\n", pp->pr_wchan); 290 panic("pr_leave"); 291 } 292 293 pp->pr_entered_file = NULL; 294 pp->pr_entered_line = 0; 295 } 296 297 static __inline void 298 pr_enter_check(struct pool *pp, void (*pr)(const char *, ...)) 299 { 300 301 if (pp->pr_entered_file != NULL) 302 (*pr)("\n\tcurrently entered from file %s line %ld\n", 303 pp->pr_entered_file, pp->pr_entered_line); 304 } 305 #else 306 #define pr_log(pp, v, action, file, line) 307 #define pr_printlog(pp, pi, pr) 308 #define pr_enter(pp, file, line) 309 #define pr_leave(pp) 310 #define pr_enter_check(pp, pr) 311 #endif /* POOL_DIAGNOSTIC */ 312 313 static __inline int 314 pr_item_notouch_index(const struct pool *pp, const struct pool_item_header *ph, 315 const void *v) 316 { 317 const char *cp = v; 318 int idx; 319 320 KASSERT(pp->pr_roflags & PR_NOTOUCH); 321 idx = (cp - ph->ph_page - ph->ph_off) / pp->pr_size; 322 KASSERT(idx < pp->pr_itemsperpage); 323 return idx; 324 } 325 326 #define PR_FREELIST_ALIGN(p) \ 327 roundup((uintptr_t)(p), sizeof(pool_item_freelist_t)) 328 #define PR_FREELIST(ph) ((pool_item_freelist_t *)PR_FREELIST_ALIGN((ph) + 1)) 329 #define PR_INDEX_USED ((pool_item_freelist_t)-1) 330 #define PR_INDEX_EOL ((pool_item_freelist_t)-2) 331 332 static __inline void 333 pr_item_notouch_put(const struct pool *pp, struct pool_item_header *ph, 334 void *obj) 335 { 336 int idx = pr_item_notouch_index(pp, ph, obj); 337 pool_item_freelist_t *freelist = PR_FREELIST(ph); 338 339 KASSERT(freelist[idx] == PR_INDEX_USED); 340 freelist[idx] = ph->ph_firstfree; 341 ph->ph_firstfree = idx; 342 } 343 344 static __inline void * 345 pr_item_notouch_get(const struct pool *pp, struct pool_item_header *ph) 346 { 347 int idx = ph->ph_firstfree; 348 pool_item_freelist_t *freelist = PR_FREELIST(ph); 349 350 KASSERT(freelist[idx] != PR_INDEX_USED); 351 ph->ph_firstfree = freelist[idx]; 352 freelist[idx] = PR_INDEX_USED; 353 354 return ph->ph_page + ph->ph_off + idx * pp->pr_size; 355 } 356 357 static __inline int 358 phtree_compare(struct pool_item_header *a, struct pool_item_header *b) 359 { 360 if (a->ph_page < b->ph_page) 361 return (-1); 362 else if (a->ph_page > b->ph_page) 363 return (1); 364 else 365 return (0); 366 } 367 368 SPLAY_PROTOTYPE(phtree, pool_item_header, ph_node, phtree_compare); 369 SPLAY_GENERATE(phtree, pool_item_header, ph_node, phtree_compare); 370 371 /* 372 * Return the pool page header based on page address. 373 */ 374 static __inline struct pool_item_header * 375 pr_find_pagehead(struct pool *pp, caddr_t page) 376 { 377 struct pool_item_header *ph, tmp; 378 379 if ((pp->pr_roflags & PR_PHINPAGE) != 0) 380 return ((struct pool_item_header *)(page + pp->pr_phoffset)); 381 382 tmp.ph_page = page; 383 ph = SPLAY_FIND(phtree, &pp->pr_phtree, &tmp); 384 return ph; 385 } 386 387 /* 388 * Remove a page from the pool. 389 */ 390 static __inline void 391 pr_rmpage(struct pool *pp, struct pool_item_header *ph, 392 struct pool_pagelist *pq) 393 { 394 int s; 395 396 LOCK_ASSERT(!simple_lock_held(&pp->pr_slock) || pq != NULL); 397 398 /* 399 * If the page was idle, decrement the idle page count. 400 */ 401 if (ph->ph_nmissing == 0) { 402 #ifdef DIAGNOSTIC 403 if (pp->pr_nidle == 0) 404 panic("pr_rmpage: nidle inconsistent"); 405 if (pp->pr_nitems < pp->pr_itemsperpage) 406 panic("pr_rmpage: nitems inconsistent"); 407 #endif 408 pp->pr_nidle--; 409 } 410 411 pp->pr_nitems -= pp->pr_itemsperpage; 412 413 /* 414 * Unlink a page from the pool and release it (or queue it for release). 415 */ 416 LIST_REMOVE(ph, ph_pagelist); 417 if ((pp->pr_roflags & PR_PHINPAGE) == 0) 418 SPLAY_REMOVE(phtree, &pp->pr_phtree, ph); 419 if (pq) { 420 LIST_INSERT_HEAD(pq, ph, ph_pagelist); 421 } else { 422 pool_allocator_free(pp, ph->ph_page); 423 if ((pp->pr_roflags & PR_PHINPAGE) == 0) { 424 s = splvm(); 425 pool_put(pp->pr_phpool, ph); 426 splx(s); 427 } 428 } 429 pp->pr_npages--; 430 pp->pr_npagefree++; 431 432 pool_update_curpage(pp); 433 } 434 435 /* 436 * Initialize all the pools listed in the "pools" link set. 437 */ 438 void 439 link_pool_init(void) 440 { 441 __link_set_decl(pools, struct link_pool_init); 442 struct link_pool_init * const *pi; 443 444 __link_set_foreach(pi, pools) 445 pool_init((*pi)->pp, (*pi)->size, (*pi)->align, 446 (*pi)->align_offset, (*pi)->flags, (*pi)->wchan, 447 (*pi)->palloc); 448 } 449 450 /* 451 * Initialize the given pool resource structure. 452 * 453 * We export this routine to allow other kernel parts to declare 454 * static pools that must be initialized before malloc() is available. 455 */ 456 void 457 pool_init(struct pool *pp, size_t size, u_int align, u_int ioff, int flags, 458 const char *wchan, struct pool_allocator *palloc) 459 { 460 int off, slack; 461 size_t trysize, phsize; 462 int s; 463 464 KASSERT((1UL << (CHAR_BIT * sizeof(pool_item_freelist_t))) - 2 >= 465 PHPOOL_FREELIST_NELEM(PHPOOL_MAX - 1)); 466 467 #ifdef POOL_DIAGNOSTIC 468 /* 469 * Always log if POOL_DIAGNOSTIC is defined. 470 */ 471 if (pool_logsize != 0) 472 flags |= PR_LOGGING; 473 #endif 474 475 #ifdef POOL_SUBPAGE 476 /* 477 * XXX We don't provide a real `nointr' back-end 478 * yet; all sub-pages come from a kmem back-end. 479 * maybe some day... 480 */ 481 if (palloc == NULL) { 482 extern struct pool_allocator pool_allocator_kmem_subpage; 483 palloc = &pool_allocator_kmem_subpage; 484 } 485 /* 486 * We'll assume any user-specified back-end allocator 487 * will deal with sub-pages, or simply don't care. 488 */ 489 #else 490 if (palloc == NULL) 491 palloc = &pool_allocator_kmem; 492 #endif /* POOL_SUBPAGE */ 493 if ((palloc->pa_flags & PA_INITIALIZED) == 0) { 494 if (palloc->pa_pagesz == 0) { 495 #ifdef POOL_SUBPAGE 496 if (palloc == &pool_allocator_kmem) 497 palloc->pa_pagesz = PAGE_SIZE; 498 else 499 palloc->pa_pagesz = POOL_SUBPAGE; 500 #else 501 palloc->pa_pagesz = PAGE_SIZE; 502 #endif /* POOL_SUBPAGE */ 503 } 504 505 TAILQ_INIT(&palloc->pa_list); 506 507 simple_lock_init(&palloc->pa_slock); 508 palloc->pa_pagemask = ~(palloc->pa_pagesz - 1); 509 palloc->pa_pageshift = ffs(palloc->pa_pagesz) - 1; 510 palloc->pa_flags |= PA_INITIALIZED; 511 } 512 513 if (align == 0) 514 align = ALIGN(1); 515 516 if (size < sizeof(struct pool_item)) 517 size = sizeof(struct pool_item); 518 519 size = roundup(size, align); 520 #ifdef DIAGNOSTIC 521 if (size > palloc->pa_pagesz) 522 panic("pool_init: pool item size (%lu) too large", 523 (u_long)size); 524 #endif 525 526 /* 527 * Initialize the pool structure. 528 */ 529 LIST_INIT(&pp->pr_emptypages); 530 LIST_INIT(&pp->pr_fullpages); 531 LIST_INIT(&pp->pr_partpages); 532 TAILQ_INIT(&pp->pr_cachelist); 533 pp->pr_curpage = NULL; 534 pp->pr_npages = 0; 535 pp->pr_minitems = 0; 536 pp->pr_minpages = 0; 537 pp->pr_maxpages = UINT_MAX; 538 pp->pr_roflags = flags; 539 pp->pr_flags = 0; 540 pp->pr_size = size; 541 pp->pr_align = align; 542 pp->pr_wchan = wchan; 543 pp->pr_alloc = palloc; 544 pp->pr_nitems = 0; 545 pp->pr_nout = 0; 546 pp->pr_hardlimit = UINT_MAX; 547 pp->pr_hardlimit_warning = NULL; 548 pp->pr_hardlimit_ratecap.tv_sec = 0; 549 pp->pr_hardlimit_ratecap.tv_usec = 0; 550 pp->pr_hardlimit_warning_last.tv_sec = 0; 551 pp->pr_hardlimit_warning_last.tv_usec = 0; 552 pp->pr_drain_hook = NULL; 553 pp->pr_drain_hook_arg = NULL; 554 555 /* 556 * Decide whether to put the page header off page to avoid 557 * wasting too large a part of the page or too big item. 558 * Off-page page headers go on a hash table, so we can match 559 * a returned item with its header based on the page address. 560 * We use 1/16 of the page size and about 8 times of the item 561 * size as the threshold (XXX: tune) 562 * 563 * However, we'll put the header into the page if we can put 564 * it without wasting any items. 565 * 566 * Silently enforce `0 <= ioff < align'. 567 */ 568 pp->pr_itemoffset = ioff %= align; 569 /* See the comment below about reserved bytes. */ 570 trysize = palloc->pa_pagesz - ((align - ioff) % align); 571 phsize = ALIGN(sizeof(struct pool_item_header)); 572 if ((pp->pr_roflags & PR_NOTOUCH) == 0 && 573 (pp->pr_size < MIN(palloc->pa_pagesz / 16, phsize << 3) || 574 trysize / pp->pr_size == (trysize - phsize) / pp->pr_size)) { 575 /* Use the end of the page for the page header */ 576 pp->pr_roflags |= PR_PHINPAGE; 577 pp->pr_phoffset = off = palloc->pa_pagesz - phsize; 578 } else { 579 /* The page header will be taken from our page header pool */ 580 pp->pr_phoffset = 0; 581 off = palloc->pa_pagesz; 582 SPLAY_INIT(&pp->pr_phtree); 583 } 584 585 /* 586 * Alignment is to take place at `ioff' within the item. This means 587 * we must reserve up to `align - 1' bytes on the page to allow 588 * appropriate positioning of each item. 589 */ 590 pp->pr_itemsperpage = (off - ((align - ioff) % align)) / pp->pr_size; 591 KASSERT(pp->pr_itemsperpage != 0); 592 if ((pp->pr_roflags & PR_NOTOUCH)) { 593 int idx; 594 595 for (idx = 0; pp->pr_itemsperpage > PHPOOL_FREELIST_NELEM(idx); 596 idx++) { 597 /* nothing */ 598 } 599 if (idx >= PHPOOL_MAX) { 600 /* 601 * if you see this panic, consider to tweak 602 * PHPOOL_MAX and PHPOOL_FREELIST_NELEM. 603 */ 604 panic("%s: too large itemsperpage(%d) for PR_NOTOUCH", 605 pp->pr_wchan, pp->pr_itemsperpage); 606 } 607 pp->pr_phpool = &phpool[idx]; 608 } else if ((pp->pr_roflags & PR_PHINPAGE) == 0) { 609 pp->pr_phpool = &phpool[0]; 610 } 611 #if defined(DIAGNOSTIC) 612 else { 613 pp->pr_phpool = NULL; 614 } 615 #endif 616 617 /* 618 * Use the slack between the chunks and the page header 619 * for "cache coloring". 620 */ 621 slack = off - pp->pr_itemsperpage * pp->pr_size; 622 pp->pr_maxcolor = (slack / align) * align; 623 pp->pr_curcolor = 0; 624 625 pp->pr_nget = 0; 626 pp->pr_nfail = 0; 627 pp->pr_nput = 0; 628 pp->pr_npagealloc = 0; 629 pp->pr_npagefree = 0; 630 pp->pr_hiwat = 0; 631 pp->pr_nidle = 0; 632 633 #ifdef POOL_DIAGNOSTIC 634 if (flags & PR_LOGGING) { 635 if (kmem_map == NULL || 636 (pp->pr_log = malloc(pool_logsize * sizeof(struct pool_log), 637 M_TEMP, M_NOWAIT)) == NULL) 638 pp->pr_roflags &= ~PR_LOGGING; 639 pp->pr_curlogentry = 0; 640 pp->pr_logsize = pool_logsize; 641 } 642 #endif 643 644 pp->pr_entered_file = NULL; 645 pp->pr_entered_line = 0; 646 647 simple_lock_init(&pp->pr_slock); 648 649 /* 650 * Initialize private page header pool and cache magazine pool if we 651 * haven't done so yet. 652 * XXX LOCKING. 653 */ 654 if (phpool[0].pr_size == 0) { 655 int idx; 656 for (idx = 0; idx < PHPOOL_MAX; idx++) { 657 static char phpool_names[PHPOOL_MAX][6+1+6+1]; 658 int nelem; 659 size_t sz; 660 661 nelem = PHPOOL_FREELIST_NELEM(idx); 662 snprintf(phpool_names[idx], sizeof(phpool_names[idx]), 663 "phpool-%d", nelem); 664 sz = sizeof(struct pool_item_header); 665 if (nelem) { 666 sz = PR_FREELIST_ALIGN(sz) 667 + nelem * sizeof(pool_item_freelist_t); 668 } 669 pool_init(&phpool[idx], sz, 0, 0, 0, 670 phpool_names[idx], &pool_allocator_meta); 671 } 672 #ifdef POOL_SUBPAGE 673 pool_init(&psppool, POOL_SUBPAGE, POOL_SUBPAGE, 0, 674 PR_RECURSIVE, "psppool", &pool_allocator_meta); 675 #endif 676 pool_init(&pcgpool, sizeof(struct pool_cache_group), 0, 0, 677 0, "pcgpool", &pool_allocator_meta); 678 } 679 680 /* Insert into the list of all pools. */ 681 simple_lock(&pool_head_slock); 682 TAILQ_INSERT_TAIL(&pool_head, pp, pr_poollist); 683 simple_unlock(&pool_head_slock); 684 685 /* Insert this into the list of pools using this allocator. */ 686 s = splvm(); 687 simple_lock(&palloc->pa_slock); 688 TAILQ_INSERT_TAIL(&palloc->pa_list, pp, pr_alloc_list); 689 simple_unlock(&palloc->pa_slock); 690 splx(s); 691 } 692 693 /* 694 * De-commision a pool resource. 695 */ 696 void 697 pool_destroy(struct pool *pp) 698 { 699 struct pool_item_header *ph; 700 struct pool_cache *pc; 701 int s; 702 703 /* Locking order: pool_allocator -> pool */ 704 s = splvm(); 705 simple_lock(&pp->pr_alloc->pa_slock); 706 TAILQ_REMOVE(&pp->pr_alloc->pa_list, pp, pr_alloc_list); 707 simple_unlock(&pp->pr_alloc->pa_slock); 708 splx(s); 709 710 /* Destroy all caches for this pool. */ 711 while ((pc = TAILQ_FIRST(&pp->pr_cachelist)) != NULL) 712 pool_cache_destroy(pc); 713 714 #ifdef DIAGNOSTIC 715 if (pp->pr_nout != 0) { 716 pr_printlog(pp, NULL, printf); 717 panic("pool_destroy: pool busy: still out: %u", 718 pp->pr_nout); 719 } 720 #endif 721 722 /* Remove all pages */ 723 while ((ph = LIST_FIRST(&pp->pr_emptypages)) != NULL) 724 pr_rmpage(pp, ph, NULL); 725 KASSERT(LIST_EMPTY(&pp->pr_fullpages)); 726 KASSERT(LIST_EMPTY(&pp->pr_partpages)); 727 728 /* Remove from global pool list */ 729 simple_lock(&pool_head_slock); 730 TAILQ_REMOVE(&pool_head, pp, pr_poollist); 731 if (drainpp == pp) { 732 drainpp = NULL; 733 } 734 simple_unlock(&pool_head_slock); 735 736 #ifdef POOL_DIAGNOSTIC 737 if ((pp->pr_roflags & PR_LOGGING) != 0) 738 free(pp->pr_log, M_TEMP); 739 #endif 740 } 741 742 void 743 pool_set_drain_hook(struct pool *pp, void (*fn)(void *, int), void *arg) 744 { 745 746 /* XXX no locking -- must be used just after pool_init() */ 747 #ifdef DIAGNOSTIC 748 if (pp->pr_drain_hook != NULL) 749 panic("pool_set_drain_hook(%s): already set", pp->pr_wchan); 750 #endif 751 pp->pr_drain_hook = fn; 752 pp->pr_drain_hook_arg = arg; 753 } 754 755 static struct pool_item_header * 756 pool_alloc_item_header(struct pool *pp, caddr_t storage, int flags) 757 { 758 struct pool_item_header *ph; 759 int s; 760 761 LOCK_ASSERT(simple_lock_held(&pp->pr_slock) == 0); 762 763 if ((pp->pr_roflags & PR_PHINPAGE) != 0) 764 ph = (struct pool_item_header *) (storage + pp->pr_phoffset); 765 else { 766 s = splvm(); 767 ph = pool_get(pp->pr_phpool, flags); 768 splx(s); 769 } 770 771 return (ph); 772 } 773 774 /* 775 * Grab an item from the pool; must be called at appropriate spl level 776 */ 777 void * 778 #ifdef POOL_DIAGNOSTIC 779 _pool_get(struct pool *pp, int flags, const char *file, long line) 780 #else 781 pool_get(struct pool *pp, int flags) 782 #endif 783 { 784 struct pool_item *pi; 785 struct pool_item_header *ph; 786 void *v; 787 788 #ifdef DIAGNOSTIC 789 if (__predict_false(pp->pr_itemsperpage == 0)) 790 panic("pool_get: pool %p: pr_itemsperpage is zero, " 791 "pool not initialized?", pp); 792 if (__predict_false(curlwp == NULL && doing_shutdown == 0 && 793 (flags & PR_WAITOK) != 0)) 794 panic("pool_get: %s: must have NOWAIT", pp->pr_wchan); 795 796 #ifdef LOCKDEBUG 797 if (flags & PR_WAITOK) 798 simple_lock_only_held(NULL, "pool_get(PR_WAITOK)"); 799 #endif 800 #endif /* DIAGNOSTIC */ 801 802 simple_lock(&pp->pr_slock); 803 pr_enter(pp, file, line); 804 805 startover: 806 /* 807 * Check to see if we've reached the hard limit. If we have, 808 * and we can wait, then wait until an item has been returned to 809 * the pool. 810 */ 811 #ifdef DIAGNOSTIC 812 if (__predict_false(pp->pr_nout > pp->pr_hardlimit)) { 813 pr_leave(pp); 814 simple_unlock(&pp->pr_slock); 815 panic("pool_get: %s: crossed hard limit", pp->pr_wchan); 816 } 817 #endif 818 if (__predict_false(pp->pr_nout == pp->pr_hardlimit)) { 819 if (pp->pr_drain_hook != NULL) { 820 /* 821 * Since the drain hook is going to free things 822 * back to the pool, unlock, call the hook, re-lock, 823 * and check the hardlimit condition again. 824 */ 825 pr_leave(pp); 826 simple_unlock(&pp->pr_slock); 827 (*pp->pr_drain_hook)(pp->pr_drain_hook_arg, flags); 828 simple_lock(&pp->pr_slock); 829 pr_enter(pp, file, line); 830 if (pp->pr_nout < pp->pr_hardlimit) 831 goto startover; 832 } 833 834 if ((flags & PR_WAITOK) && !(flags & PR_LIMITFAIL)) { 835 /* 836 * XXX: A warning isn't logged in this case. Should 837 * it be? 838 */ 839 pp->pr_flags |= PR_WANTED; 840 pr_leave(pp); 841 ltsleep(pp, PSWP, pp->pr_wchan, 0, &pp->pr_slock); 842 pr_enter(pp, file, line); 843 goto startover; 844 } 845 846 /* 847 * Log a message that the hard limit has been hit. 848 */ 849 if (pp->pr_hardlimit_warning != NULL && 850 ratecheck(&pp->pr_hardlimit_warning_last, 851 &pp->pr_hardlimit_ratecap)) 852 log(LOG_ERR, "%s\n", pp->pr_hardlimit_warning); 853 854 pp->pr_nfail++; 855 856 pr_leave(pp); 857 simple_unlock(&pp->pr_slock); 858 return (NULL); 859 } 860 861 /* 862 * The convention we use is that if `curpage' is not NULL, then 863 * it points at a non-empty bucket. In particular, `curpage' 864 * never points at a page header which has PR_PHINPAGE set and 865 * has no items in its bucket. 866 */ 867 if ((ph = pp->pr_curpage) == NULL) { 868 #ifdef DIAGNOSTIC 869 if (pp->pr_nitems != 0) { 870 simple_unlock(&pp->pr_slock); 871 printf("pool_get: %s: curpage NULL, nitems %u\n", 872 pp->pr_wchan, pp->pr_nitems); 873 panic("pool_get: nitems inconsistent"); 874 } 875 #endif 876 877 /* 878 * Call the back-end page allocator for more memory. 879 * Release the pool lock, as the back-end page allocator 880 * may block. 881 */ 882 pr_leave(pp); 883 simple_unlock(&pp->pr_slock); 884 v = pool_allocator_alloc(pp, flags); 885 if (__predict_true(v != NULL)) 886 ph = pool_alloc_item_header(pp, v, flags); 887 888 if (__predict_false(v == NULL || ph == NULL)) { 889 if (v != NULL) 890 pool_allocator_free(pp, v); 891 892 simple_lock(&pp->pr_slock); 893 pr_enter(pp, file, line); 894 895 /* 896 * We were unable to allocate a page or item 897 * header, but we released the lock during 898 * allocation, so perhaps items were freed 899 * back to the pool. Check for this case. 900 */ 901 if (pp->pr_curpage != NULL) 902 goto startover; 903 904 if ((flags & PR_WAITOK) == 0) { 905 pp->pr_nfail++; 906 pr_leave(pp); 907 simple_unlock(&pp->pr_slock); 908 return (NULL); 909 } 910 911 /* 912 * Wait for items to be returned to this pool. 913 * 914 * XXX: maybe we should wake up once a second and 915 * try again? 916 */ 917 pp->pr_flags |= PR_WANTED; 918 /* PA_WANTED is already set on the allocator. */ 919 pr_leave(pp); 920 ltsleep(pp, PSWP, pp->pr_wchan, 0, &pp->pr_slock); 921 pr_enter(pp, file, line); 922 goto startover; 923 } 924 925 /* We have more memory; add it to the pool */ 926 simple_lock(&pp->pr_slock); 927 pr_enter(pp, file, line); 928 pool_prime_page(pp, v, ph); 929 pp->pr_npagealloc++; 930 931 /* Start the allocation process over. */ 932 goto startover; 933 } 934 if (pp->pr_roflags & PR_NOTOUCH) { 935 #ifdef DIAGNOSTIC 936 if (__predict_false(ph->ph_nmissing == pp->pr_itemsperpage)) { 937 pr_leave(pp); 938 simple_unlock(&pp->pr_slock); 939 panic("pool_get: %s: page empty", pp->pr_wchan); 940 } 941 #endif 942 v = pr_item_notouch_get(pp, ph); 943 #ifdef POOL_DIAGNOSTIC 944 pr_log(pp, v, PRLOG_GET, file, line); 945 #endif 946 } else { 947 v = pi = TAILQ_FIRST(&ph->ph_itemlist); 948 if (__predict_false(v == NULL)) { 949 pr_leave(pp); 950 simple_unlock(&pp->pr_slock); 951 panic("pool_get: %s: page empty", pp->pr_wchan); 952 } 953 #ifdef DIAGNOSTIC 954 if (__predict_false(pp->pr_nitems == 0)) { 955 pr_leave(pp); 956 simple_unlock(&pp->pr_slock); 957 printf("pool_get: %s: items on itemlist, nitems %u\n", 958 pp->pr_wchan, pp->pr_nitems); 959 panic("pool_get: nitems inconsistent"); 960 } 961 #endif 962 963 #ifdef POOL_DIAGNOSTIC 964 pr_log(pp, v, PRLOG_GET, file, line); 965 #endif 966 967 #ifdef DIAGNOSTIC 968 if (__predict_false(pi->pi_magic != PI_MAGIC)) { 969 pr_printlog(pp, pi, printf); 970 panic("pool_get(%s): free list modified: " 971 "magic=%x; page %p; item addr %p\n", 972 pp->pr_wchan, pi->pi_magic, ph->ph_page, pi); 973 } 974 #endif 975 976 /* 977 * Remove from item list. 978 */ 979 TAILQ_REMOVE(&ph->ph_itemlist, pi, pi_list); 980 } 981 pp->pr_nitems--; 982 pp->pr_nout++; 983 if (ph->ph_nmissing == 0) { 984 #ifdef DIAGNOSTIC 985 if (__predict_false(pp->pr_nidle == 0)) 986 panic("pool_get: nidle inconsistent"); 987 #endif 988 pp->pr_nidle--; 989 990 /* 991 * This page was previously empty. Move it to the list of 992 * partially-full pages. This page is already curpage. 993 */ 994 LIST_REMOVE(ph, ph_pagelist); 995 LIST_INSERT_HEAD(&pp->pr_partpages, ph, ph_pagelist); 996 } 997 ph->ph_nmissing++; 998 if (ph->ph_nmissing == pp->pr_itemsperpage) { 999 #ifdef DIAGNOSTIC 1000 if (__predict_false((pp->pr_roflags & PR_NOTOUCH) == 0 && 1001 !TAILQ_EMPTY(&ph->ph_itemlist))) { 1002 pr_leave(pp); 1003 simple_unlock(&pp->pr_slock); 1004 panic("pool_get: %s: nmissing inconsistent", 1005 pp->pr_wchan); 1006 } 1007 #endif 1008 /* 1009 * This page is now full. Move it to the full list 1010 * and select a new current page. 1011 */ 1012 LIST_REMOVE(ph, ph_pagelist); 1013 LIST_INSERT_HEAD(&pp->pr_fullpages, ph, ph_pagelist); 1014 pool_update_curpage(pp); 1015 } 1016 1017 pp->pr_nget++; 1018 1019 /* 1020 * If we have a low water mark and we are now below that low 1021 * water mark, add more items to the pool. 1022 */ 1023 if (POOL_NEEDS_CATCHUP(pp) && pool_catchup(pp) != 0) { 1024 /* 1025 * XXX: Should we log a warning? Should we set up a timeout 1026 * to try again in a second or so? The latter could break 1027 * a caller's assumptions about interrupt protection, etc. 1028 */ 1029 } 1030 1031 pr_leave(pp); 1032 simple_unlock(&pp->pr_slock); 1033 return (v); 1034 } 1035 1036 /* 1037 * Internal version of pool_put(). Pool is already locked/entered. 1038 */ 1039 static void 1040 pool_do_put(struct pool *pp, void *v) 1041 { 1042 struct pool_item *pi = v; 1043 struct pool_item_header *ph; 1044 caddr_t page; 1045 int s; 1046 1047 LOCK_ASSERT(simple_lock_held(&pp->pr_slock)); 1048 1049 page = (caddr_t)((u_long)v & pp->pr_alloc->pa_pagemask); 1050 1051 #ifdef DIAGNOSTIC 1052 if (__predict_false(pp->pr_nout == 0)) { 1053 printf("pool %s: putting with none out\n", 1054 pp->pr_wchan); 1055 panic("pool_put"); 1056 } 1057 #endif 1058 1059 if (__predict_false((ph = pr_find_pagehead(pp, page)) == NULL)) { 1060 pr_printlog(pp, NULL, printf); 1061 panic("pool_put: %s: page header missing", pp->pr_wchan); 1062 } 1063 1064 #ifdef LOCKDEBUG 1065 /* 1066 * Check if we're freeing a locked simple lock. 1067 */ 1068 simple_lock_freecheck((caddr_t)pi, ((caddr_t)pi) + pp->pr_size); 1069 #endif 1070 1071 /* 1072 * Return to item list. 1073 */ 1074 if (pp->pr_roflags & PR_NOTOUCH) { 1075 pr_item_notouch_put(pp, ph, v); 1076 } else { 1077 #ifdef DIAGNOSTIC 1078 pi->pi_magic = PI_MAGIC; 1079 #endif 1080 #ifdef DEBUG 1081 { 1082 int i, *ip = v; 1083 1084 for (i = 0; i < pp->pr_size / sizeof(int); i++) { 1085 *ip++ = PI_MAGIC; 1086 } 1087 } 1088 #endif 1089 1090 TAILQ_INSERT_HEAD(&ph->ph_itemlist, pi, pi_list); 1091 } 1092 KDASSERT(ph->ph_nmissing != 0); 1093 ph->ph_nmissing--; 1094 pp->pr_nput++; 1095 pp->pr_nitems++; 1096 pp->pr_nout--; 1097 1098 /* Cancel "pool empty" condition if it exists */ 1099 if (pp->pr_curpage == NULL) 1100 pp->pr_curpage = ph; 1101 1102 if (pp->pr_flags & PR_WANTED) { 1103 pp->pr_flags &= ~PR_WANTED; 1104 if (ph->ph_nmissing == 0) 1105 pp->pr_nidle++; 1106 wakeup((caddr_t)pp); 1107 return; 1108 } 1109 1110 /* 1111 * If this page is now empty, do one of two things: 1112 * 1113 * (1) If we have more pages than the page high water mark, 1114 * free the page back to the system. ONLY CONSIDER 1115 * FREEING BACK A PAGE IF WE HAVE MORE THAN OUR MINIMUM PAGE 1116 * CLAIM. 1117 * 1118 * (2) Otherwise, move the page to the empty page list. 1119 * 1120 * Either way, select a new current page (so we use a partially-full 1121 * page if one is available). 1122 */ 1123 if (ph->ph_nmissing == 0) { 1124 pp->pr_nidle++; 1125 if (pp->pr_npages > pp->pr_minpages && 1126 (pp->pr_npages > pp->pr_maxpages || 1127 (pp->pr_alloc->pa_flags & PA_WANT) != 0)) { 1128 simple_unlock(&pp->pr_slock); 1129 pr_rmpage(pp, ph, NULL); 1130 simple_lock(&pp->pr_slock); 1131 } else { 1132 LIST_REMOVE(ph, ph_pagelist); 1133 LIST_INSERT_HEAD(&pp->pr_emptypages, ph, ph_pagelist); 1134 1135 /* 1136 * Update the timestamp on the page. A page must 1137 * be idle for some period of time before it can 1138 * be reclaimed by the pagedaemon. This minimizes 1139 * ping-pong'ing for memory. 1140 */ 1141 s = splclock(); 1142 ph->ph_time = mono_time; 1143 splx(s); 1144 } 1145 pool_update_curpage(pp); 1146 } 1147 1148 /* 1149 * If the page was previously completely full, move it to the 1150 * partially-full list and make it the current page. The next 1151 * allocation will get the item from this page, instead of 1152 * further fragmenting the pool. 1153 */ 1154 else if (ph->ph_nmissing == (pp->pr_itemsperpage - 1)) { 1155 LIST_REMOVE(ph, ph_pagelist); 1156 LIST_INSERT_HEAD(&pp->pr_partpages, ph, ph_pagelist); 1157 pp->pr_curpage = ph; 1158 } 1159 } 1160 1161 /* 1162 * Return resource to the pool; must be called at appropriate spl level 1163 */ 1164 #ifdef POOL_DIAGNOSTIC 1165 void 1166 _pool_put(struct pool *pp, void *v, const char *file, long line) 1167 { 1168 1169 simple_lock(&pp->pr_slock); 1170 pr_enter(pp, file, line); 1171 1172 pr_log(pp, v, PRLOG_PUT, file, line); 1173 1174 pool_do_put(pp, v); 1175 1176 pr_leave(pp); 1177 simple_unlock(&pp->pr_slock); 1178 } 1179 #undef pool_put 1180 #endif /* POOL_DIAGNOSTIC */ 1181 1182 void 1183 pool_put(struct pool *pp, void *v) 1184 { 1185 1186 simple_lock(&pp->pr_slock); 1187 1188 pool_do_put(pp, v); 1189 1190 simple_unlock(&pp->pr_slock); 1191 } 1192 1193 #ifdef POOL_DIAGNOSTIC 1194 #define pool_put(h, v) _pool_put((h), (v), __FILE__, __LINE__) 1195 #endif 1196 1197 /* 1198 * Add N items to the pool. 1199 */ 1200 int 1201 pool_prime(struct pool *pp, int n) 1202 { 1203 struct pool_item_header *ph = NULL; 1204 caddr_t cp; 1205 int newpages; 1206 1207 simple_lock(&pp->pr_slock); 1208 1209 newpages = roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 1210 1211 while (newpages-- > 0) { 1212 simple_unlock(&pp->pr_slock); 1213 cp = pool_allocator_alloc(pp, PR_NOWAIT); 1214 if (__predict_true(cp != NULL)) 1215 ph = pool_alloc_item_header(pp, cp, PR_NOWAIT); 1216 1217 if (__predict_false(cp == NULL || ph == NULL)) { 1218 if (cp != NULL) 1219 pool_allocator_free(pp, cp); 1220 simple_lock(&pp->pr_slock); 1221 break; 1222 } 1223 1224 simple_lock(&pp->pr_slock); 1225 pool_prime_page(pp, cp, ph); 1226 pp->pr_npagealloc++; 1227 pp->pr_minpages++; 1228 } 1229 1230 if (pp->pr_minpages >= pp->pr_maxpages) 1231 pp->pr_maxpages = pp->pr_minpages + 1; /* XXX */ 1232 1233 simple_unlock(&pp->pr_slock); 1234 return (0); 1235 } 1236 1237 /* 1238 * Add a page worth of items to the pool. 1239 * 1240 * Note, we must be called with the pool descriptor LOCKED. 1241 */ 1242 static void 1243 pool_prime_page(struct pool *pp, caddr_t storage, struct pool_item_header *ph) 1244 { 1245 struct pool_item *pi; 1246 caddr_t cp = storage; 1247 unsigned int align = pp->pr_align; 1248 unsigned int ioff = pp->pr_itemoffset; 1249 int n; 1250 int s; 1251 1252 LOCK_ASSERT(simple_lock_held(&pp->pr_slock)); 1253 1254 #ifdef DIAGNOSTIC 1255 if (((u_long)cp & (pp->pr_alloc->pa_pagesz - 1)) != 0) 1256 panic("pool_prime_page: %s: unaligned page", pp->pr_wchan); 1257 #endif 1258 1259 /* 1260 * Insert page header. 1261 */ 1262 LIST_INSERT_HEAD(&pp->pr_emptypages, ph, ph_pagelist); 1263 TAILQ_INIT(&ph->ph_itemlist); 1264 ph->ph_page = storage; 1265 ph->ph_nmissing = 0; 1266 s = splclock(); 1267 ph->ph_time = mono_time; 1268 splx(s); 1269 if ((pp->pr_roflags & PR_PHINPAGE) == 0) 1270 SPLAY_INSERT(phtree, &pp->pr_phtree, ph); 1271 1272 pp->pr_nidle++; 1273 1274 /* 1275 * Color this page. 1276 */ 1277 cp = (caddr_t)(cp + pp->pr_curcolor); 1278 if ((pp->pr_curcolor += align) > pp->pr_maxcolor) 1279 pp->pr_curcolor = 0; 1280 1281 /* 1282 * Adjust storage to apply aligment to `pr_itemoffset' in each item. 1283 */ 1284 if (ioff != 0) 1285 cp = (caddr_t)(cp + (align - ioff)); 1286 1287 /* 1288 * Insert remaining chunks on the bucket list. 1289 */ 1290 n = pp->pr_itemsperpage; 1291 pp->pr_nitems += n; 1292 1293 if (pp->pr_roflags & PR_NOTOUCH) { 1294 pool_item_freelist_t *freelist = PR_FREELIST(ph); 1295 int i; 1296 1297 ph->ph_off = cp - storage; 1298 ph->ph_firstfree = 0; 1299 for (i = 0; i < n - 1; i++) 1300 freelist[i] = i + 1; 1301 freelist[n - 1] = PR_INDEX_EOL; 1302 } else { 1303 while (n--) { 1304 pi = (struct pool_item *)cp; 1305 1306 KASSERT(((((vaddr_t)pi) + ioff) & (align - 1)) == 0); 1307 1308 /* Insert on page list */ 1309 TAILQ_INSERT_TAIL(&ph->ph_itemlist, pi, pi_list); 1310 #ifdef DIAGNOSTIC 1311 pi->pi_magic = PI_MAGIC; 1312 #endif 1313 cp = (caddr_t)(cp + pp->pr_size); 1314 } 1315 } 1316 1317 /* 1318 * If the pool was depleted, point at the new page. 1319 */ 1320 if (pp->pr_curpage == NULL) 1321 pp->pr_curpage = ph; 1322 1323 if (++pp->pr_npages > pp->pr_hiwat) 1324 pp->pr_hiwat = pp->pr_npages; 1325 } 1326 1327 /* 1328 * Used by pool_get() when nitems drops below the low water mark. This 1329 * is used to catch up pr_nitems with the low water mark. 1330 * 1331 * Note 1, we never wait for memory here, we let the caller decide what to do. 1332 * 1333 * Note 2, we must be called with the pool already locked, and we return 1334 * with it locked. 1335 */ 1336 static int 1337 pool_catchup(struct pool *pp) 1338 { 1339 struct pool_item_header *ph = NULL; 1340 caddr_t cp; 1341 int error = 0; 1342 1343 while (POOL_NEEDS_CATCHUP(pp)) { 1344 /* 1345 * Call the page back-end allocator for more memory. 1346 * 1347 * XXX: We never wait, so should we bother unlocking 1348 * the pool descriptor? 1349 */ 1350 simple_unlock(&pp->pr_slock); 1351 cp = pool_allocator_alloc(pp, PR_NOWAIT); 1352 if (__predict_true(cp != NULL)) 1353 ph = pool_alloc_item_header(pp, cp, PR_NOWAIT); 1354 if (__predict_false(cp == NULL || ph == NULL)) { 1355 if (cp != NULL) 1356 pool_allocator_free(pp, cp); 1357 error = ENOMEM; 1358 simple_lock(&pp->pr_slock); 1359 break; 1360 } 1361 simple_lock(&pp->pr_slock); 1362 pool_prime_page(pp, cp, ph); 1363 pp->pr_npagealloc++; 1364 } 1365 1366 return (error); 1367 } 1368 1369 static void 1370 pool_update_curpage(struct pool *pp) 1371 { 1372 1373 pp->pr_curpage = LIST_FIRST(&pp->pr_partpages); 1374 if (pp->pr_curpage == NULL) { 1375 pp->pr_curpage = LIST_FIRST(&pp->pr_emptypages); 1376 } 1377 } 1378 1379 void 1380 pool_setlowat(struct pool *pp, int n) 1381 { 1382 1383 simple_lock(&pp->pr_slock); 1384 1385 pp->pr_minitems = n; 1386 pp->pr_minpages = (n == 0) 1387 ? 0 1388 : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 1389 1390 /* Make sure we're caught up with the newly-set low water mark. */ 1391 if (POOL_NEEDS_CATCHUP(pp) && pool_catchup(pp) != 0) { 1392 /* 1393 * XXX: Should we log a warning? Should we set up a timeout 1394 * to try again in a second or so? The latter could break 1395 * a caller's assumptions about interrupt protection, etc. 1396 */ 1397 } 1398 1399 simple_unlock(&pp->pr_slock); 1400 } 1401 1402 void 1403 pool_sethiwat(struct pool *pp, int n) 1404 { 1405 1406 simple_lock(&pp->pr_slock); 1407 1408 pp->pr_maxpages = (n == 0) 1409 ? 0 1410 : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 1411 1412 simple_unlock(&pp->pr_slock); 1413 } 1414 1415 void 1416 pool_sethardlimit(struct pool *pp, int n, const char *warnmess, int ratecap) 1417 { 1418 1419 simple_lock(&pp->pr_slock); 1420 1421 pp->pr_hardlimit = n; 1422 pp->pr_hardlimit_warning = warnmess; 1423 pp->pr_hardlimit_ratecap.tv_sec = ratecap; 1424 pp->pr_hardlimit_warning_last.tv_sec = 0; 1425 pp->pr_hardlimit_warning_last.tv_usec = 0; 1426 1427 /* 1428 * In-line version of pool_sethiwat(), because we don't want to 1429 * release the lock. 1430 */ 1431 pp->pr_maxpages = (n == 0) 1432 ? 0 1433 : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 1434 1435 simple_unlock(&pp->pr_slock); 1436 } 1437 1438 /* 1439 * Release all complete pages that have not been used recently. 1440 */ 1441 int 1442 #ifdef POOL_DIAGNOSTIC 1443 _pool_reclaim(struct pool *pp, const char *file, long line) 1444 #else 1445 pool_reclaim(struct pool *pp) 1446 #endif 1447 { 1448 struct pool_item_header *ph, *phnext; 1449 struct pool_cache *pc; 1450 struct timeval curtime; 1451 struct pool_pagelist pq; 1452 struct timeval diff; 1453 int s; 1454 1455 if (pp->pr_drain_hook != NULL) { 1456 /* 1457 * The drain hook must be called with the pool unlocked. 1458 */ 1459 (*pp->pr_drain_hook)(pp->pr_drain_hook_arg, PR_NOWAIT); 1460 } 1461 1462 if (simple_lock_try(&pp->pr_slock) == 0) 1463 return (0); 1464 pr_enter(pp, file, line); 1465 1466 LIST_INIT(&pq); 1467 1468 /* 1469 * Reclaim items from the pool's caches. 1470 */ 1471 TAILQ_FOREACH(pc, &pp->pr_cachelist, pc_poollist) 1472 pool_cache_reclaim(pc); 1473 1474 s = splclock(); 1475 curtime = mono_time; 1476 splx(s); 1477 1478 for (ph = LIST_FIRST(&pp->pr_emptypages); ph != NULL; ph = phnext) { 1479 phnext = LIST_NEXT(ph, ph_pagelist); 1480 1481 /* Check our minimum page claim */ 1482 if (pp->pr_npages <= pp->pr_minpages) 1483 break; 1484 1485 KASSERT(ph->ph_nmissing == 0); 1486 timersub(&curtime, &ph->ph_time, &diff); 1487 if (diff.tv_sec < pool_inactive_time) 1488 continue; 1489 1490 /* 1491 * If freeing this page would put us below 1492 * the low water mark, stop now. 1493 */ 1494 if ((pp->pr_nitems - pp->pr_itemsperpage) < 1495 pp->pr_minitems) 1496 break; 1497 1498 pr_rmpage(pp, ph, &pq); 1499 } 1500 1501 pr_leave(pp); 1502 simple_unlock(&pp->pr_slock); 1503 if (LIST_EMPTY(&pq)) 1504 return (0); 1505 1506 while ((ph = LIST_FIRST(&pq)) != NULL) { 1507 LIST_REMOVE(ph, ph_pagelist); 1508 pool_allocator_free(pp, ph->ph_page); 1509 if (pp->pr_roflags & PR_PHINPAGE) { 1510 continue; 1511 } 1512 s = splvm(); 1513 pool_put(pp->pr_phpool, ph); 1514 splx(s); 1515 } 1516 1517 return (1); 1518 } 1519 1520 /* 1521 * Drain pools, one at a time. 1522 * 1523 * Note, we must never be called from an interrupt context. 1524 */ 1525 void 1526 pool_drain(void *arg) 1527 { 1528 struct pool *pp; 1529 int s; 1530 1531 pp = NULL; 1532 s = splvm(); 1533 simple_lock(&pool_head_slock); 1534 if (drainpp == NULL) { 1535 drainpp = TAILQ_FIRST(&pool_head); 1536 } 1537 if (drainpp) { 1538 pp = drainpp; 1539 drainpp = TAILQ_NEXT(pp, pr_poollist); 1540 } 1541 simple_unlock(&pool_head_slock); 1542 pool_reclaim(pp); 1543 splx(s); 1544 } 1545 1546 /* 1547 * Diagnostic helpers. 1548 */ 1549 void 1550 pool_print(struct pool *pp, const char *modif) 1551 { 1552 int s; 1553 1554 s = splvm(); 1555 if (simple_lock_try(&pp->pr_slock) == 0) { 1556 printf("pool %s is locked; try again later\n", 1557 pp->pr_wchan); 1558 splx(s); 1559 return; 1560 } 1561 pool_print1(pp, modif, printf); 1562 simple_unlock(&pp->pr_slock); 1563 splx(s); 1564 } 1565 1566 void 1567 pool_printit(struct pool *pp, const char *modif, void (*pr)(const char *, ...)) 1568 { 1569 int didlock = 0; 1570 1571 if (pp == NULL) { 1572 (*pr)("Must specify a pool to print.\n"); 1573 return; 1574 } 1575 1576 /* 1577 * Called from DDB; interrupts should be blocked, and all 1578 * other processors should be paused. We can skip locking 1579 * the pool in this case. 1580 * 1581 * We do a simple_lock_try() just to print the lock 1582 * status, however. 1583 */ 1584 1585 if (simple_lock_try(&pp->pr_slock) == 0) 1586 (*pr)("WARNING: pool %s is locked\n", pp->pr_wchan); 1587 else 1588 didlock = 1; 1589 1590 pool_print1(pp, modif, pr); 1591 1592 if (didlock) 1593 simple_unlock(&pp->pr_slock); 1594 } 1595 1596 static void 1597 pool_print_pagelist(struct pool *pp, struct pool_pagelist *pl, 1598 void (*pr)(const char *, ...)) 1599 { 1600 struct pool_item_header *ph; 1601 #ifdef DIAGNOSTIC 1602 struct pool_item *pi; 1603 #endif 1604 1605 LIST_FOREACH(ph, pl, ph_pagelist) { 1606 (*pr)("\t\tpage %p, nmissing %d, time %lu,%lu\n", 1607 ph->ph_page, ph->ph_nmissing, 1608 (u_long)ph->ph_time.tv_sec, 1609 (u_long)ph->ph_time.tv_usec); 1610 #ifdef DIAGNOSTIC 1611 if (!(pp->pr_roflags & PR_NOTOUCH)) { 1612 TAILQ_FOREACH(pi, &ph->ph_itemlist, pi_list) { 1613 if (pi->pi_magic != PI_MAGIC) { 1614 (*pr)("\t\t\titem %p, magic 0x%x\n", 1615 pi, pi->pi_magic); 1616 } 1617 } 1618 } 1619 #endif 1620 } 1621 } 1622 1623 static void 1624 pool_print1(struct pool *pp, const char *modif, void (*pr)(const char *, ...)) 1625 { 1626 struct pool_item_header *ph; 1627 struct pool_cache *pc; 1628 struct pool_cache_group *pcg; 1629 int i, print_log = 0, print_pagelist = 0, print_cache = 0; 1630 char c; 1631 1632 while ((c = *modif++) != '\0') { 1633 if (c == 'l') 1634 print_log = 1; 1635 if (c == 'p') 1636 print_pagelist = 1; 1637 if (c == 'c') 1638 print_cache = 1; 1639 } 1640 1641 (*pr)("POOL %s: size %u, align %u, ioff %u, roflags 0x%08x\n", 1642 pp->pr_wchan, pp->pr_size, pp->pr_align, pp->pr_itemoffset, 1643 pp->pr_roflags); 1644 (*pr)("\talloc %p\n", pp->pr_alloc); 1645 (*pr)("\tminitems %u, minpages %u, maxpages %u, npages %u\n", 1646 pp->pr_minitems, pp->pr_minpages, pp->pr_maxpages, pp->pr_npages); 1647 (*pr)("\titemsperpage %u, nitems %u, nout %u, hardlimit %u\n", 1648 pp->pr_itemsperpage, pp->pr_nitems, pp->pr_nout, pp->pr_hardlimit); 1649 1650 (*pr)("\n\tnget %lu, nfail %lu, nput %lu\n", 1651 pp->pr_nget, pp->pr_nfail, pp->pr_nput); 1652 (*pr)("\tnpagealloc %lu, npagefree %lu, hiwat %u, nidle %lu\n", 1653 pp->pr_npagealloc, pp->pr_npagefree, pp->pr_hiwat, pp->pr_nidle); 1654 1655 if (print_pagelist == 0) 1656 goto skip_pagelist; 1657 1658 if ((ph = LIST_FIRST(&pp->pr_emptypages)) != NULL) 1659 (*pr)("\n\tempty page list:\n"); 1660 pool_print_pagelist(pp, &pp->pr_emptypages, pr); 1661 if ((ph = LIST_FIRST(&pp->pr_fullpages)) != NULL) 1662 (*pr)("\n\tfull page list:\n"); 1663 pool_print_pagelist(pp, &pp->pr_fullpages, pr); 1664 if ((ph = LIST_FIRST(&pp->pr_partpages)) != NULL) 1665 (*pr)("\n\tpartial-page list:\n"); 1666 pool_print_pagelist(pp, &pp->pr_partpages, pr); 1667 1668 if (pp->pr_curpage == NULL) 1669 (*pr)("\tno current page\n"); 1670 else 1671 (*pr)("\tcurpage %p\n", pp->pr_curpage->ph_page); 1672 1673 skip_pagelist: 1674 if (print_log == 0) 1675 goto skip_log; 1676 1677 (*pr)("\n"); 1678 if ((pp->pr_roflags & PR_LOGGING) == 0) 1679 (*pr)("\tno log\n"); 1680 else 1681 pr_printlog(pp, NULL, pr); 1682 1683 skip_log: 1684 if (print_cache == 0) 1685 goto skip_cache; 1686 1687 TAILQ_FOREACH(pc, &pp->pr_cachelist, pc_poollist) { 1688 (*pr)("\tcache %p: allocfrom %p freeto %p\n", pc, 1689 pc->pc_allocfrom, pc->pc_freeto); 1690 (*pr)("\t hits %lu misses %lu ngroups %lu nitems %lu\n", 1691 pc->pc_hits, pc->pc_misses, pc->pc_ngroups, pc->pc_nitems); 1692 TAILQ_FOREACH(pcg, &pc->pc_grouplist, pcg_list) { 1693 (*pr)("\t\tgroup %p: avail %d\n", pcg, pcg->pcg_avail); 1694 for (i = 0; i < PCG_NOBJECTS; i++) { 1695 if (pcg->pcg_objects[i].pcgo_pa != 1696 POOL_PADDR_INVALID) { 1697 (*pr)("\t\t\t%p, 0x%llx\n", 1698 pcg->pcg_objects[i].pcgo_va, 1699 (unsigned long long) 1700 pcg->pcg_objects[i].pcgo_pa); 1701 } else { 1702 (*pr)("\t\t\t%p\n", 1703 pcg->pcg_objects[i].pcgo_va); 1704 } 1705 } 1706 } 1707 } 1708 1709 skip_cache: 1710 pr_enter_check(pp, pr); 1711 } 1712 1713 static int 1714 pool_chk_page(struct pool *pp, const char *label, struct pool_item_header *ph) 1715 { 1716 struct pool_item *pi; 1717 caddr_t page; 1718 int n; 1719 1720 page = (caddr_t)((u_long)ph & pp->pr_alloc->pa_pagemask); 1721 if (page != ph->ph_page && 1722 (pp->pr_roflags & PR_PHINPAGE) != 0) { 1723 if (label != NULL) 1724 printf("%s: ", label); 1725 printf("pool(%p:%s): page inconsistency: page %p;" 1726 " at page head addr %p (p %p)\n", pp, 1727 pp->pr_wchan, ph->ph_page, 1728 ph, page); 1729 return 1; 1730 } 1731 1732 if ((pp->pr_roflags & PR_NOTOUCH) != 0) 1733 return 0; 1734 1735 for (pi = TAILQ_FIRST(&ph->ph_itemlist), n = 0; 1736 pi != NULL; 1737 pi = TAILQ_NEXT(pi,pi_list), n++) { 1738 1739 #ifdef DIAGNOSTIC 1740 if (pi->pi_magic != PI_MAGIC) { 1741 if (label != NULL) 1742 printf("%s: ", label); 1743 printf("pool(%s): free list modified: magic=%x;" 1744 " page %p; item ordinal %d;" 1745 " addr %p (p %p)\n", 1746 pp->pr_wchan, pi->pi_magic, ph->ph_page, 1747 n, pi, page); 1748 panic("pool"); 1749 } 1750 #endif 1751 page = 1752 (caddr_t)((u_long)pi & pp->pr_alloc->pa_pagemask); 1753 if (page == ph->ph_page) 1754 continue; 1755 1756 if (label != NULL) 1757 printf("%s: ", label); 1758 printf("pool(%p:%s): page inconsistency: page %p;" 1759 " item ordinal %d; addr %p (p %p)\n", pp, 1760 pp->pr_wchan, ph->ph_page, 1761 n, pi, page); 1762 return 1; 1763 } 1764 return 0; 1765 } 1766 1767 1768 int 1769 pool_chk(struct pool *pp, const char *label) 1770 { 1771 struct pool_item_header *ph; 1772 int r = 0; 1773 1774 simple_lock(&pp->pr_slock); 1775 LIST_FOREACH(ph, &pp->pr_emptypages, ph_pagelist) { 1776 r = pool_chk_page(pp, label, ph); 1777 if (r) { 1778 goto out; 1779 } 1780 } 1781 LIST_FOREACH(ph, &pp->pr_fullpages, ph_pagelist) { 1782 r = pool_chk_page(pp, label, ph); 1783 if (r) { 1784 goto out; 1785 } 1786 } 1787 LIST_FOREACH(ph, &pp->pr_partpages, ph_pagelist) { 1788 r = pool_chk_page(pp, label, ph); 1789 if (r) { 1790 goto out; 1791 } 1792 } 1793 1794 out: 1795 simple_unlock(&pp->pr_slock); 1796 return (r); 1797 } 1798 1799 /* 1800 * pool_cache_init: 1801 * 1802 * Initialize a pool cache. 1803 * 1804 * NOTE: If the pool must be protected from interrupts, we expect 1805 * to be called at the appropriate interrupt priority level. 1806 */ 1807 void 1808 pool_cache_init(struct pool_cache *pc, struct pool *pp, 1809 int (*ctor)(void *, void *, int), 1810 void (*dtor)(void *, void *), 1811 void *arg) 1812 { 1813 1814 TAILQ_INIT(&pc->pc_grouplist); 1815 simple_lock_init(&pc->pc_slock); 1816 1817 pc->pc_allocfrom = NULL; 1818 pc->pc_freeto = NULL; 1819 pc->pc_pool = pp; 1820 1821 pc->pc_ctor = ctor; 1822 pc->pc_dtor = dtor; 1823 pc->pc_arg = arg; 1824 1825 pc->pc_hits = 0; 1826 pc->pc_misses = 0; 1827 1828 pc->pc_ngroups = 0; 1829 1830 pc->pc_nitems = 0; 1831 1832 simple_lock(&pp->pr_slock); 1833 TAILQ_INSERT_TAIL(&pp->pr_cachelist, pc, pc_poollist); 1834 simple_unlock(&pp->pr_slock); 1835 } 1836 1837 /* 1838 * pool_cache_destroy: 1839 * 1840 * Destroy a pool cache. 1841 */ 1842 void 1843 pool_cache_destroy(struct pool_cache *pc) 1844 { 1845 struct pool *pp = pc->pc_pool; 1846 1847 /* First, invalidate the entire cache. */ 1848 pool_cache_invalidate(pc); 1849 1850 /* ...and remove it from the pool's cache list. */ 1851 simple_lock(&pp->pr_slock); 1852 TAILQ_REMOVE(&pp->pr_cachelist, pc, pc_poollist); 1853 simple_unlock(&pp->pr_slock); 1854 } 1855 1856 static __inline void * 1857 pcg_get(struct pool_cache_group *pcg, paddr_t *pap) 1858 { 1859 void *object; 1860 u_int idx; 1861 1862 KASSERT(pcg->pcg_avail <= PCG_NOBJECTS); 1863 KASSERT(pcg->pcg_avail != 0); 1864 idx = --pcg->pcg_avail; 1865 1866 KASSERT(pcg->pcg_objects[idx].pcgo_va != NULL); 1867 object = pcg->pcg_objects[idx].pcgo_va; 1868 if (pap != NULL) 1869 *pap = pcg->pcg_objects[idx].pcgo_pa; 1870 pcg->pcg_objects[idx].pcgo_va = NULL; 1871 1872 return (object); 1873 } 1874 1875 static __inline void 1876 pcg_put(struct pool_cache_group *pcg, void *object, paddr_t pa) 1877 { 1878 u_int idx; 1879 1880 KASSERT(pcg->pcg_avail < PCG_NOBJECTS); 1881 idx = pcg->pcg_avail++; 1882 1883 KASSERT(pcg->pcg_objects[idx].pcgo_va == NULL); 1884 pcg->pcg_objects[idx].pcgo_va = object; 1885 pcg->pcg_objects[idx].pcgo_pa = pa; 1886 } 1887 1888 /* 1889 * pool_cache_get{,_paddr}: 1890 * 1891 * Get an object from a pool cache (optionally returning 1892 * the physical address of the object). 1893 */ 1894 void * 1895 pool_cache_get_paddr(struct pool_cache *pc, int flags, paddr_t *pap) 1896 { 1897 struct pool_cache_group *pcg; 1898 void *object; 1899 1900 #ifdef LOCKDEBUG 1901 if (flags & PR_WAITOK) 1902 simple_lock_only_held(NULL, "pool_cache_get(PR_WAITOK)"); 1903 #endif 1904 1905 simple_lock(&pc->pc_slock); 1906 1907 if ((pcg = pc->pc_allocfrom) == NULL) { 1908 TAILQ_FOREACH(pcg, &pc->pc_grouplist, pcg_list) { 1909 if (pcg->pcg_avail != 0) { 1910 pc->pc_allocfrom = pcg; 1911 goto have_group; 1912 } 1913 } 1914 1915 /* 1916 * No groups with any available objects. Allocate 1917 * a new object, construct it, and return it to 1918 * the caller. We will allocate a group, if necessary, 1919 * when the object is freed back to the cache. 1920 */ 1921 pc->pc_misses++; 1922 simple_unlock(&pc->pc_slock); 1923 object = pool_get(pc->pc_pool, flags); 1924 if (object != NULL && pc->pc_ctor != NULL) { 1925 if ((*pc->pc_ctor)(pc->pc_arg, object, flags) != 0) { 1926 pool_put(pc->pc_pool, object); 1927 return (NULL); 1928 } 1929 } 1930 if (object != NULL && pap != NULL) { 1931 #ifdef POOL_VTOPHYS 1932 *pap = POOL_VTOPHYS(object); 1933 #else 1934 *pap = POOL_PADDR_INVALID; 1935 #endif 1936 } 1937 return (object); 1938 } 1939 1940 have_group: 1941 pc->pc_hits++; 1942 pc->pc_nitems--; 1943 object = pcg_get(pcg, pap); 1944 1945 if (pcg->pcg_avail == 0) 1946 pc->pc_allocfrom = NULL; 1947 1948 simple_unlock(&pc->pc_slock); 1949 1950 return (object); 1951 } 1952 1953 /* 1954 * pool_cache_put{,_paddr}: 1955 * 1956 * Put an object back to the pool cache (optionally caching the 1957 * physical address of the object). 1958 */ 1959 void 1960 pool_cache_put_paddr(struct pool_cache *pc, void *object, paddr_t pa) 1961 { 1962 struct pool_cache_group *pcg; 1963 int s; 1964 1965 simple_lock(&pc->pc_slock); 1966 1967 if ((pcg = pc->pc_freeto) == NULL) { 1968 TAILQ_FOREACH(pcg, &pc->pc_grouplist, pcg_list) { 1969 if (pcg->pcg_avail != PCG_NOBJECTS) { 1970 pc->pc_freeto = pcg; 1971 goto have_group; 1972 } 1973 } 1974 1975 /* 1976 * No empty groups to free the object to. Attempt to 1977 * allocate one. 1978 */ 1979 simple_unlock(&pc->pc_slock); 1980 s = splvm(); 1981 pcg = pool_get(&pcgpool, PR_NOWAIT); 1982 splx(s); 1983 if (pcg != NULL) { 1984 memset(pcg, 0, sizeof(*pcg)); 1985 simple_lock(&pc->pc_slock); 1986 pc->pc_ngroups++; 1987 TAILQ_INSERT_TAIL(&pc->pc_grouplist, pcg, pcg_list); 1988 if (pc->pc_freeto == NULL) 1989 pc->pc_freeto = pcg; 1990 goto have_group; 1991 } 1992 1993 /* 1994 * Unable to allocate a cache group; destruct the object 1995 * and free it back to the pool. 1996 */ 1997 pool_cache_destruct_object(pc, object); 1998 return; 1999 } 2000 2001 have_group: 2002 pc->pc_nitems++; 2003 pcg_put(pcg, object, pa); 2004 2005 if (pcg->pcg_avail == PCG_NOBJECTS) 2006 pc->pc_freeto = NULL; 2007 2008 simple_unlock(&pc->pc_slock); 2009 } 2010 2011 /* 2012 * pool_cache_destruct_object: 2013 * 2014 * Force destruction of an object and its release back into 2015 * the pool. 2016 */ 2017 void 2018 pool_cache_destruct_object(struct pool_cache *pc, void *object) 2019 { 2020 2021 if (pc->pc_dtor != NULL) 2022 (*pc->pc_dtor)(pc->pc_arg, object); 2023 pool_put(pc->pc_pool, object); 2024 } 2025 2026 /* 2027 * pool_cache_do_invalidate: 2028 * 2029 * This internal function implements pool_cache_invalidate() and 2030 * pool_cache_reclaim(). 2031 */ 2032 static void 2033 pool_cache_do_invalidate(struct pool_cache *pc, int free_groups, 2034 void (*putit)(struct pool *, void *)) 2035 { 2036 struct pool_cache_group *pcg, *npcg; 2037 void *object; 2038 int s; 2039 2040 for (pcg = TAILQ_FIRST(&pc->pc_grouplist); pcg != NULL; 2041 pcg = npcg) { 2042 npcg = TAILQ_NEXT(pcg, pcg_list); 2043 while (pcg->pcg_avail != 0) { 2044 pc->pc_nitems--; 2045 object = pcg_get(pcg, NULL); 2046 if (pcg->pcg_avail == 0 && pc->pc_allocfrom == pcg) 2047 pc->pc_allocfrom = NULL; 2048 if (pc->pc_dtor != NULL) 2049 (*pc->pc_dtor)(pc->pc_arg, object); 2050 (*putit)(pc->pc_pool, object); 2051 } 2052 if (free_groups) { 2053 pc->pc_ngroups--; 2054 TAILQ_REMOVE(&pc->pc_grouplist, pcg, pcg_list); 2055 if (pc->pc_freeto == pcg) 2056 pc->pc_freeto = NULL; 2057 s = splvm(); 2058 pool_put(&pcgpool, pcg); 2059 splx(s); 2060 } 2061 } 2062 } 2063 2064 /* 2065 * pool_cache_invalidate: 2066 * 2067 * Invalidate a pool cache (destruct and release all of the 2068 * cached objects). 2069 */ 2070 void 2071 pool_cache_invalidate(struct pool_cache *pc) 2072 { 2073 2074 simple_lock(&pc->pc_slock); 2075 pool_cache_do_invalidate(pc, 0, pool_put); 2076 simple_unlock(&pc->pc_slock); 2077 } 2078 2079 /* 2080 * pool_cache_reclaim: 2081 * 2082 * Reclaim a pool cache for pool_reclaim(). 2083 */ 2084 static void 2085 pool_cache_reclaim(struct pool_cache *pc) 2086 { 2087 2088 simple_lock(&pc->pc_slock); 2089 pool_cache_do_invalidate(pc, 1, pool_do_put); 2090 simple_unlock(&pc->pc_slock); 2091 } 2092 2093 /* 2094 * Pool backend allocators. 2095 * 2096 * Each pool has a backend allocator that handles allocation, deallocation, 2097 * and any additional draining that might be needed. 2098 * 2099 * We provide two standard allocators: 2100 * 2101 * pool_allocator_kmem - the default when no allocator is specified 2102 * 2103 * pool_allocator_nointr - used for pools that will not be accessed 2104 * in interrupt context. 2105 */ 2106 void *pool_page_alloc(struct pool *, int); 2107 void pool_page_free(struct pool *, void *); 2108 2109 struct pool_allocator pool_allocator_kmem = { 2110 pool_page_alloc, pool_page_free, 0, 2111 }; 2112 2113 void *pool_page_alloc_nointr(struct pool *, int); 2114 void pool_page_free_nointr(struct pool *, void *); 2115 2116 struct pool_allocator pool_allocator_nointr = { 2117 pool_page_alloc_nointr, pool_page_free_nointr, 0, 2118 }; 2119 2120 #ifdef POOL_SUBPAGE 2121 void *pool_subpage_alloc(struct pool *, int); 2122 void pool_subpage_free(struct pool *, void *); 2123 2124 struct pool_allocator pool_allocator_kmem_subpage = { 2125 pool_subpage_alloc, pool_subpage_free, 0, 2126 }; 2127 #endif /* POOL_SUBPAGE */ 2128 2129 /* 2130 * We have at least three different resources for the same allocation and 2131 * each resource can be depleted. First, we have the ready elements in the 2132 * pool. Then we have the resource (typically a vm_map) for this allocator. 2133 * Finally, we have physical memory. Waiting for any of these can be 2134 * unnecessary when any other is freed, but the kernel doesn't support 2135 * sleeping on multiple wait channels, so we have to employ another strategy. 2136 * 2137 * The caller sleeps on the pool (so that it can be awakened when an item 2138 * is returned to the pool), but we set PA_WANT on the allocator. When a 2139 * page is returned to the allocator and PA_WANT is set, pool_allocator_free 2140 * will wake up all sleeping pools belonging to this allocator. 2141 * 2142 * XXX Thundering herd. 2143 */ 2144 void * 2145 pool_allocator_alloc(struct pool *org, int flags) 2146 { 2147 struct pool_allocator *pa = org->pr_alloc; 2148 struct pool *pp, *start; 2149 int s, freed; 2150 void *res; 2151 2152 LOCK_ASSERT(!simple_lock_held(&org->pr_slock)); 2153 2154 do { 2155 if ((res = (*pa->pa_alloc)(org, flags)) != NULL) 2156 return (res); 2157 if ((flags & PR_WAITOK) == 0) { 2158 /* 2159 * We only run the drain hookhere if PR_NOWAIT. 2160 * In other cases, the hook will be run in 2161 * pool_reclaim(). 2162 */ 2163 if (org->pr_drain_hook != NULL) { 2164 (*org->pr_drain_hook)(org->pr_drain_hook_arg, 2165 flags); 2166 if ((res = (*pa->pa_alloc)(org, flags)) != NULL) 2167 return (res); 2168 } 2169 break; 2170 } 2171 2172 /* 2173 * Drain all pools, except "org", that use this 2174 * allocator. We do this to reclaim VA space. 2175 * pa_alloc is responsible for waiting for 2176 * physical memory. 2177 * 2178 * XXX We risk looping forever if start if someone 2179 * calls pool_destroy on "start". But there is no 2180 * other way to have potentially sleeping pool_reclaim, 2181 * non-sleeping locks on pool_allocator, and some 2182 * stirring of drained pools in the allocator. 2183 * 2184 * XXX Maybe we should use pool_head_slock for locking 2185 * the allocators? 2186 */ 2187 freed = 0; 2188 2189 s = splvm(); 2190 simple_lock(&pa->pa_slock); 2191 pp = start = TAILQ_FIRST(&pa->pa_list); 2192 do { 2193 TAILQ_REMOVE(&pa->pa_list, pp, pr_alloc_list); 2194 TAILQ_INSERT_TAIL(&pa->pa_list, pp, pr_alloc_list); 2195 if (pp == org) 2196 continue; 2197 simple_unlock(&pa->pa_slock); 2198 freed = pool_reclaim(pp); 2199 simple_lock(&pa->pa_slock); 2200 } while ((pp = TAILQ_FIRST(&pa->pa_list)) != start && 2201 freed == 0); 2202 2203 if (freed == 0) { 2204 /* 2205 * We set PA_WANT here, the caller will most likely 2206 * sleep waiting for pages (if not, this won't hurt 2207 * that much), and there is no way to set this in 2208 * the caller without violating locking order. 2209 */ 2210 pa->pa_flags |= PA_WANT; 2211 } 2212 simple_unlock(&pa->pa_slock); 2213 splx(s); 2214 } while (freed); 2215 return (NULL); 2216 } 2217 2218 void 2219 pool_allocator_free(struct pool *pp, void *v) 2220 { 2221 struct pool_allocator *pa = pp->pr_alloc; 2222 int s; 2223 2224 LOCK_ASSERT(!simple_lock_held(&pp->pr_slock)); 2225 2226 (*pa->pa_free)(pp, v); 2227 2228 s = splvm(); 2229 simple_lock(&pa->pa_slock); 2230 if ((pa->pa_flags & PA_WANT) == 0) { 2231 simple_unlock(&pa->pa_slock); 2232 splx(s); 2233 return; 2234 } 2235 2236 TAILQ_FOREACH(pp, &pa->pa_list, pr_alloc_list) { 2237 simple_lock(&pp->pr_slock); 2238 if ((pp->pr_flags & PR_WANTED) != 0) { 2239 pp->pr_flags &= ~PR_WANTED; 2240 wakeup(pp); 2241 } 2242 simple_unlock(&pp->pr_slock); 2243 } 2244 pa->pa_flags &= ~PA_WANT; 2245 simple_unlock(&pa->pa_slock); 2246 splx(s); 2247 } 2248 2249 void * 2250 pool_page_alloc(struct pool *pp, int flags) 2251 { 2252 boolean_t waitok = (flags & PR_WAITOK) ? TRUE : FALSE; 2253 2254 return ((void *) uvm_km_alloc_poolpage_cache(kmem_map, waitok)); 2255 } 2256 2257 void 2258 pool_page_free(struct pool *pp, void *v) 2259 { 2260 2261 uvm_km_free_poolpage_cache(kmem_map, (vaddr_t) v); 2262 } 2263 2264 static void * 2265 pool_page_alloc_meta(struct pool *pp, int flags) 2266 { 2267 boolean_t waitok = (flags & PR_WAITOK) ? TRUE : FALSE; 2268 2269 return ((void *) uvm_km_alloc_poolpage(kmem_map, waitok)); 2270 } 2271 2272 static void 2273 pool_page_free_meta(struct pool *pp, void *v) 2274 { 2275 2276 uvm_km_free_poolpage(kmem_map, (vaddr_t) v); 2277 } 2278 2279 #ifdef POOL_SUBPAGE 2280 /* Sub-page allocator, for machines with large hardware pages. */ 2281 void * 2282 pool_subpage_alloc(struct pool *pp, int flags) 2283 { 2284 void *v; 2285 int s; 2286 s = splvm(); 2287 v = pool_get(&psppool, flags); 2288 splx(s); 2289 return v; 2290 } 2291 2292 void 2293 pool_subpage_free(struct pool *pp, void *v) 2294 { 2295 int s; 2296 s = splvm(); 2297 pool_put(&psppool, v); 2298 splx(s); 2299 } 2300 2301 /* We don't provide a real nointr allocator. Maybe later. */ 2302 void * 2303 pool_page_alloc_nointr(struct pool *pp, int flags) 2304 { 2305 2306 return (pool_subpage_alloc(pp, flags)); 2307 } 2308 2309 void 2310 pool_page_free_nointr(struct pool *pp, void *v) 2311 { 2312 2313 pool_subpage_free(pp, v); 2314 } 2315 #else 2316 void * 2317 pool_page_alloc_nointr(struct pool *pp, int flags) 2318 { 2319 boolean_t waitok = (flags & PR_WAITOK) ? TRUE : FALSE; 2320 2321 return ((void *) uvm_km_alloc_poolpage_cache(kernel_map, waitok)); 2322 } 2323 2324 void 2325 pool_page_free_nointr(struct pool *pp, void *v) 2326 { 2327 2328 uvm_km_free_poolpage_cache(kernel_map, (vaddr_t) v); 2329 } 2330 #endif /* POOL_SUBPAGE */ 2331