1 /* $NetBSD: subr_pool.c,v 1.123 2006/10/12 01:32:18 christos Exp $ */ 2 3 /*- 4 * Copyright (c) 1997, 1999, 2000 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Paul Kranenburg; by Jason R. Thorpe of the Numerical Aerospace 9 * Simulation Facility, NASA Ames Research Center. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. All advertising materials mentioning features or use of this software 20 * must display the following acknowledgement: 21 * This product includes software developed by the NetBSD 22 * Foundation, Inc. and its contributors. 23 * 4. Neither the name of The NetBSD Foundation nor the names of its 24 * contributors may be used to endorse or promote products derived 25 * from this software without specific prior written permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 28 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 29 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 30 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 31 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 32 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 33 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 34 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 35 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 36 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 37 * POSSIBILITY OF SUCH DAMAGE. 38 */ 39 40 #include <sys/cdefs.h> 41 __KERNEL_RCSID(0, "$NetBSD: subr_pool.c,v 1.123 2006/10/12 01:32:18 christos Exp $"); 42 43 #include "opt_pool.h" 44 #include "opt_poollog.h" 45 #include "opt_lockdebug.h" 46 47 #include <sys/param.h> 48 #include <sys/systm.h> 49 #include <sys/proc.h> 50 #include <sys/errno.h> 51 #include <sys/kernel.h> 52 #include <sys/malloc.h> 53 #include <sys/lock.h> 54 #include <sys/pool.h> 55 #include <sys/syslog.h> 56 57 #include <uvm/uvm.h> 58 59 /* 60 * Pool resource management utility. 61 * 62 * Memory is allocated in pages which are split into pieces according to 63 * the pool item size. Each page is kept on one of three lists in the 64 * pool structure: `pr_emptypages', `pr_fullpages' and `pr_partpages', 65 * for empty, full and partially-full pages respectively. The individual 66 * pool items are on a linked list headed by `ph_itemlist' in each page 67 * header. The memory for building the page list is either taken from 68 * the allocated pages themselves (for small pool items) or taken from 69 * an internal pool of page headers (`phpool'). 70 */ 71 72 /* List of all pools */ 73 LIST_HEAD(,pool) pool_head = LIST_HEAD_INITIALIZER(pool_head); 74 75 /* Private pool for page header structures */ 76 #define PHPOOL_MAX 8 77 static struct pool phpool[PHPOOL_MAX]; 78 #define PHPOOL_FREELIST_NELEM(idx) (((idx) == 0) ? 0 : (1 << (idx))) 79 80 #ifdef POOL_SUBPAGE 81 /* Pool of subpages for use by normal pools. */ 82 static struct pool psppool; 83 #endif 84 85 static SLIST_HEAD(, pool_allocator) pa_deferinitq = 86 SLIST_HEAD_INITIALIZER(pa_deferinitq); 87 88 static void *pool_page_alloc_meta(struct pool *, int); 89 static void pool_page_free_meta(struct pool *, void *); 90 91 /* allocator for pool metadata */ 92 static struct pool_allocator pool_allocator_meta = { 93 pool_page_alloc_meta, pool_page_free_meta, 94 .pa_backingmapptr = &kmem_map, 95 }; 96 97 /* # of seconds to retain page after last use */ 98 int pool_inactive_time = 10; 99 100 /* Next candidate for drainage (see pool_drain()) */ 101 static struct pool *drainpp; 102 103 /* This spin lock protects both pool_head and drainpp. */ 104 struct simplelock pool_head_slock = SIMPLELOCK_INITIALIZER; 105 106 typedef uint8_t pool_item_freelist_t; 107 108 struct pool_item_header { 109 /* Page headers */ 110 LIST_ENTRY(pool_item_header) 111 ph_pagelist; /* pool page list */ 112 SPLAY_ENTRY(pool_item_header) 113 ph_node; /* Off-page page headers */ 114 caddr_t ph_page; /* this page's address */ 115 struct timeval ph_time; /* last referenced */ 116 union { 117 /* !PR_NOTOUCH */ 118 struct { 119 LIST_HEAD(, pool_item) 120 phu_itemlist; /* chunk list for this page */ 121 } phu_normal; 122 /* PR_NOTOUCH */ 123 struct { 124 uint16_t 125 phu_off; /* start offset in page */ 126 pool_item_freelist_t 127 phu_firstfree; /* first free item */ 128 /* 129 * XXX it might be better to use 130 * a simple bitmap and ffs(3) 131 */ 132 } phu_notouch; 133 } ph_u; 134 uint16_t ph_nmissing; /* # of chunks in use */ 135 }; 136 #define ph_itemlist ph_u.phu_normal.phu_itemlist 137 #define ph_off ph_u.phu_notouch.phu_off 138 #define ph_firstfree ph_u.phu_notouch.phu_firstfree 139 140 struct pool_item { 141 #ifdef DIAGNOSTIC 142 u_int pi_magic; 143 #endif 144 #define PI_MAGIC 0xdeadbeefU 145 /* Other entries use only this list entry */ 146 LIST_ENTRY(pool_item) pi_list; 147 }; 148 149 #define POOL_NEEDS_CATCHUP(pp) \ 150 ((pp)->pr_nitems < (pp)->pr_minitems) 151 152 /* 153 * Pool cache management. 154 * 155 * Pool caches provide a way for constructed objects to be cached by the 156 * pool subsystem. This can lead to performance improvements by avoiding 157 * needless object construction/destruction; it is deferred until absolutely 158 * necessary. 159 * 160 * Caches are grouped into cache groups. Each cache group references 161 * up to 16 constructed objects. When a cache allocates an object 162 * from the pool, it calls the object's constructor and places it into 163 * a cache group. When a cache group frees an object back to the pool, 164 * it first calls the object's destructor. This allows the object to 165 * persist in constructed form while freed to the cache. 166 * 167 * Multiple caches may exist for each pool. This allows a single 168 * object type to have multiple constructed forms. The pool references 169 * each cache, so that when a pool is drained by the pagedaemon, it can 170 * drain each individual cache as well. Each time a cache is drained, 171 * the most idle cache group is freed to the pool in its entirety. 172 * 173 * Pool caches are layed on top of pools. By layering them, we can avoid 174 * the complexity of cache management for pools which would not benefit 175 * from it. 176 */ 177 178 /* The cache group pool. */ 179 static struct pool pcgpool; 180 181 static void pool_cache_reclaim(struct pool_cache *, struct pool_pagelist *, 182 struct pool_cache_grouplist *); 183 static void pcg_grouplist_free(struct pool_cache_grouplist *); 184 185 static int pool_catchup(struct pool *); 186 static void pool_prime_page(struct pool *, caddr_t, 187 struct pool_item_header *); 188 static void pool_update_curpage(struct pool *); 189 190 static int pool_grow(struct pool *, int); 191 static void *pool_allocator_alloc(struct pool *, int); 192 static void pool_allocator_free(struct pool *, void *); 193 194 static void pool_print_pagelist(struct pool *, struct pool_pagelist *, 195 void (*)(const char *, ...)); 196 static void pool_print1(struct pool *, const char *, 197 void (*)(const char *, ...)); 198 199 static int pool_chk_page(struct pool *, const char *, 200 struct pool_item_header *); 201 202 /* 203 * Pool log entry. An array of these is allocated in pool_init(). 204 */ 205 struct pool_log { 206 const char *pl_file; 207 long pl_line; 208 int pl_action; 209 #define PRLOG_GET 1 210 #define PRLOG_PUT 2 211 void *pl_addr; 212 }; 213 214 #ifdef POOL_DIAGNOSTIC 215 /* Number of entries in pool log buffers */ 216 #ifndef POOL_LOGSIZE 217 #define POOL_LOGSIZE 10 218 #endif 219 220 int pool_logsize = POOL_LOGSIZE; 221 222 static inline void 223 pr_log(struct pool *pp, void *v, int action, const char *file, long line) 224 { 225 int n = pp->pr_curlogentry; 226 struct pool_log *pl; 227 228 if ((pp->pr_roflags & PR_LOGGING) == 0) 229 return; 230 231 /* 232 * Fill in the current entry. Wrap around and overwrite 233 * the oldest entry if necessary. 234 */ 235 pl = &pp->pr_log[n]; 236 pl->pl_file = file; 237 pl->pl_line = line; 238 pl->pl_action = action; 239 pl->pl_addr = v; 240 if (++n >= pp->pr_logsize) 241 n = 0; 242 pp->pr_curlogentry = n; 243 } 244 245 static void 246 pr_printlog(struct pool *pp, struct pool_item *pi, 247 void (*pr)(const char *, ...)) 248 { 249 int i = pp->pr_logsize; 250 int n = pp->pr_curlogentry; 251 252 if ((pp->pr_roflags & PR_LOGGING) == 0) 253 return; 254 255 /* 256 * Print all entries in this pool's log. 257 */ 258 while (i-- > 0) { 259 struct pool_log *pl = &pp->pr_log[n]; 260 if (pl->pl_action != 0) { 261 if (pi == NULL || pi == pl->pl_addr) { 262 (*pr)("\tlog entry %d:\n", i); 263 (*pr)("\t\taction = %s, addr = %p\n", 264 pl->pl_action == PRLOG_GET ? "get" : "put", 265 pl->pl_addr); 266 (*pr)("\t\tfile: %s at line %lu\n", 267 pl->pl_file, pl->pl_line); 268 } 269 } 270 if (++n >= pp->pr_logsize) 271 n = 0; 272 } 273 } 274 275 static inline void 276 pr_enter(struct pool *pp, const char *file, long line) 277 { 278 279 if (__predict_false(pp->pr_entered_file != NULL)) { 280 printf("pool %s: reentrancy at file %s line %ld\n", 281 pp->pr_wchan, file, line); 282 printf(" previous entry at file %s line %ld\n", 283 pp->pr_entered_file, pp->pr_entered_line); 284 panic("pr_enter"); 285 } 286 287 pp->pr_entered_file = file; 288 pp->pr_entered_line = line; 289 } 290 291 static inline void 292 pr_leave(struct pool *pp) 293 { 294 295 if (__predict_false(pp->pr_entered_file == NULL)) { 296 printf("pool %s not entered?\n", pp->pr_wchan); 297 panic("pr_leave"); 298 } 299 300 pp->pr_entered_file = NULL; 301 pp->pr_entered_line = 0; 302 } 303 304 static inline void 305 pr_enter_check(struct pool *pp, void (*pr)(const char *, ...)) 306 { 307 308 if (pp->pr_entered_file != NULL) 309 (*pr)("\n\tcurrently entered from file %s line %ld\n", 310 pp->pr_entered_file, pp->pr_entered_line); 311 } 312 #else 313 #define pr_log(pp, v, action, file, line) 314 #define pr_printlog(pp, pi, pr) 315 #define pr_enter(pp, file, line) 316 #define pr_leave(pp) 317 #define pr_enter_check(pp, pr) 318 #endif /* POOL_DIAGNOSTIC */ 319 320 static inline int 321 pr_item_notouch_index(const struct pool *pp, const struct pool_item_header *ph, 322 const void *v) 323 { 324 const char *cp = v; 325 int idx; 326 327 KASSERT(pp->pr_roflags & PR_NOTOUCH); 328 idx = (cp - ph->ph_page - ph->ph_off) / pp->pr_size; 329 KASSERT(idx < pp->pr_itemsperpage); 330 return idx; 331 } 332 333 #define PR_FREELIST_ALIGN(p) \ 334 roundup((uintptr_t)(p), sizeof(pool_item_freelist_t)) 335 #define PR_FREELIST(ph) ((pool_item_freelist_t *)PR_FREELIST_ALIGN((ph) + 1)) 336 #define PR_INDEX_USED ((pool_item_freelist_t)-1) 337 #define PR_INDEX_EOL ((pool_item_freelist_t)-2) 338 339 static inline void 340 pr_item_notouch_put(const struct pool *pp, struct pool_item_header *ph, 341 void *obj) 342 { 343 int idx = pr_item_notouch_index(pp, ph, obj); 344 pool_item_freelist_t *freelist = PR_FREELIST(ph); 345 346 KASSERT(freelist[idx] == PR_INDEX_USED); 347 freelist[idx] = ph->ph_firstfree; 348 ph->ph_firstfree = idx; 349 } 350 351 static inline void * 352 pr_item_notouch_get(const struct pool *pp, struct pool_item_header *ph) 353 { 354 int idx = ph->ph_firstfree; 355 pool_item_freelist_t *freelist = PR_FREELIST(ph); 356 357 KASSERT(freelist[idx] != PR_INDEX_USED); 358 ph->ph_firstfree = freelist[idx]; 359 freelist[idx] = PR_INDEX_USED; 360 361 return ph->ph_page + ph->ph_off + idx * pp->pr_size; 362 } 363 364 static inline int 365 phtree_compare(struct pool_item_header *a, struct pool_item_header *b) 366 { 367 368 /* 369 * we consider pool_item_header with smaller ph_page bigger. 370 * (this unnatural ordering is for the benefit of pr_find_pagehead.) 371 */ 372 373 if (a->ph_page < b->ph_page) 374 return (1); 375 else if (a->ph_page > b->ph_page) 376 return (-1); 377 else 378 return (0); 379 } 380 381 SPLAY_PROTOTYPE(phtree, pool_item_header, ph_node, phtree_compare); 382 SPLAY_GENERATE(phtree, pool_item_header, ph_node, phtree_compare); 383 384 /* 385 * Return the pool page header based on item address. 386 */ 387 static inline struct pool_item_header * 388 pr_find_pagehead(struct pool *pp, void *v) 389 { 390 struct pool_item_header *ph, tmp; 391 392 if ((pp->pr_roflags & PR_NOALIGN) != 0) { 393 tmp.ph_page = (caddr_t)(uintptr_t)v; 394 ph = SPLAY_FIND(phtree, &pp->pr_phtree, &tmp); 395 if (ph == NULL) { 396 ph = SPLAY_ROOT(&pp->pr_phtree); 397 if (ph != NULL && phtree_compare(&tmp, ph) >= 0) { 398 ph = SPLAY_NEXT(phtree, &pp->pr_phtree, ph); 399 } 400 KASSERT(ph == NULL || phtree_compare(&tmp, ph) < 0); 401 } 402 } else { 403 caddr_t page = 404 (caddr_t)((uintptr_t)v & pp->pr_alloc->pa_pagemask); 405 406 if ((pp->pr_roflags & PR_PHINPAGE) != 0) { 407 ph = (void *)(page + pp->pr_phoffset); 408 } else { 409 tmp.ph_page = page; 410 ph = SPLAY_FIND(phtree, &pp->pr_phtree, &tmp); 411 } 412 } 413 414 KASSERT(ph == NULL || ((pp->pr_roflags & PR_PHINPAGE) != 0) || 415 (ph->ph_page <= (char *)v && 416 (char *)v < ph->ph_page + pp->pr_alloc->pa_pagesz)); 417 return ph; 418 } 419 420 static void 421 pr_pagelist_free(struct pool *pp, struct pool_pagelist *pq) 422 { 423 struct pool_item_header *ph; 424 int s; 425 426 while ((ph = LIST_FIRST(pq)) != NULL) { 427 LIST_REMOVE(ph, ph_pagelist); 428 pool_allocator_free(pp, ph->ph_page); 429 if ((pp->pr_roflags & PR_PHINPAGE) == 0) { 430 s = splvm(); 431 pool_put(pp->pr_phpool, ph); 432 splx(s); 433 } 434 } 435 } 436 437 /* 438 * Remove a page from the pool. 439 */ 440 static inline void 441 pr_rmpage(struct pool *pp, struct pool_item_header *ph, 442 struct pool_pagelist *pq) 443 { 444 445 LOCK_ASSERT(simple_lock_held(&pp->pr_slock)); 446 447 /* 448 * If the page was idle, decrement the idle page count. 449 */ 450 if (ph->ph_nmissing == 0) { 451 #ifdef DIAGNOSTIC 452 if (pp->pr_nidle == 0) 453 panic("pr_rmpage: nidle inconsistent"); 454 if (pp->pr_nitems < pp->pr_itemsperpage) 455 panic("pr_rmpage: nitems inconsistent"); 456 #endif 457 pp->pr_nidle--; 458 } 459 460 pp->pr_nitems -= pp->pr_itemsperpage; 461 462 /* 463 * Unlink the page from the pool and queue it for release. 464 */ 465 LIST_REMOVE(ph, ph_pagelist); 466 if ((pp->pr_roflags & PR_PHINPAGE) == 0) 467 SPLAY_REMOVE(phtree, &pp->pr_phtree, ph); 468 LIST_INSERT_HEAD(pq, ph, ph_pagelist); 469 470 pp->pr_npages--; 471 pp->pr_npagefree++; 472 473 pool_update_curpage(pp); 474 } 475 476 static boolean_t 477 pa_starved_p(struct pool_allocator *pa) 478 { 479 480 if (pa->pa_backingmap != NULL) { 481 return vm_map_starved_p(pa->pa_backingmap); 482 } 483 return FALSE; 484 } 485 486 static int 487 pool_reclaim_callback(struct callback_entry *ce __unused, void *obj, 488 void *arg __unused) 489 { 490 struct pool *pp = obj; 491 struct pool_allocator *pa = pp->pr_alloc; 492 493 KASSERT(&pp->pr_reclaimerentry == ce); 494 pool_reclaim(pp); 495 if (!pa_starved_p(pa)) { 496 return CALLBACK_CHAIN_ABORT; 497 } 498 return CALLBACK_CHAIN_CONTINUE; 499 } 500 501 static void 502 pool_reclaim_register(struct pool *pp) 503 { 504 struct vm_map *map = pp->pr_alloc->pa_backingmap; 505 int s; 506 507 if (map == NULL) { 508 return; 509 } 510 511 s = splvm(); /* not necessary for INTRSAFE maps, but don't care. */ 512 callback_register(&vm_map_to_kernel(map)->vmk_reclaim_callback, 513 &pp->pr_reclaimerentry, pp, pool_reclaim_callback); 514 splx(s); 515 } 516 517 static void 518 pool_reclaim_unregister(struct pool *pp) 519 { 520 struct vm_map *map = pp->pr_alloc->pa_backingmap; 521 int s; 522 523 if (map == NULL) { 524 return; 525 } 526 527 s = splvm(); /* not necessary for INTRSAFE maps, but don't care. */ 528 callback_unregister(&vm_map_to_kernel(map)->vmk_reclaim_callback, 529 &pp->pr_reclaimerentry); 530 splx(s); 531 } 532 533 static void 534 pa_reclaim_register(struct pool_allocator *pa) 535 { 536 struct vm_map *map = *pa->pa_backingmapptr; 537 struct pool *pp; 538 539 KASSERT(pa->pa_backingmap == NULL); 540 if (map == NULL) { 541 SLIST_INSERT_HEAD(&pa_deferinitq, pa, pa_q); 542 return; 543 } 544 pa->pa_backingmap = map; 545 TAILQ_FOREACH(pp, &pa->pa_list, pr_alloc_list) { 546 pool_reclaim_register(pp); 547 } 548 } 549 550 /* 551 * Initialize all the pools listed in the "pools" link set. 552 */ 553 void 554 pool_subsystem_init(void) 555 { 556 struct pool_allocator *pa; 557 __link_set_decl(pools, struct link_pool_init); 558 struct link_pool_init * const *pi; 559 560 __link_set_foreach(pi, pools) 561 pool_init((*pi)->pp, (*pi)->size, (*pi)->align, 562 (*pi)->align_offset, (*pi)->flags, (*pi)->wchan, 563 (*pi)->palloc); 564 565 while ((pa = SLIST_FIRST(&pa_deferinitq)) != NULL) { 566 KASSERT(pa->pa_backingmapptr != NULL); 567 KASSERT(*pa->pa_backingmapptr != NULL); 568 SLIST_REMOVE_HEAD(&pa_deferinitq, pa_q); 569 pa_reclaim_register(pa); 570 } 571 } 572 573 /* 574 * Initialize the given pool resource structure. 575 * 576 * We export this routine to allow other kernel parts to declare 577 * static pools that must be initialized before malloc() is available. 578 */ 579 void 580 pool_init(struct pool *pp, size_t size, u_int align, u_int ioff, int flags, 581 const char *wchan, struct pool_allocator *palloc) 582 { 583 #ifdef DEBUG 584 struct pool *pp1; 585 #endif 586 size_t trysize, phsize; 587 int off, slack, s; 588 589 KASSERT((1UL << (CHAR_BIT * sizeof(pool_item_freelist_t))) - 2 >= 590 PHPOOL_FREELIST_NELEM(PHPOOL_MAX - 1)); 591 592 #ifdef DEBUG 593 /* 594 * Check that the pool hasn't already been initialised and 595 * added to the list of all pools. 596 */ 597 LIST_FOREACH(pp1, &pool_head, pr_poollist) { 598 if (pp == pp1) 599 panic("pool_init: pool %s already initialised", 600 wchan); 601 } 602 #endif 603 604 #ifdef POOL_DIAGNOSTIC 605 /* 606 * Always log if POOL_DIAGNOSTIC is defined. 607 */ 608 if (pool_logsize != 0) 609 flags |= PR_LOGGING; 610 #endif 611 612 if (palloc == NULL) 613 palloc = &pool_allocator_kmem; 614 #ifdef POOL_SUBPAGE 615 if (size > palloc->pa_pagesz) { 616 if (palloc == &pool_allocator_kmem) 617 palloc = &pool_allocator_kmem_fullpage; 618 else if (palloc == &pool_allocator_nointr) 619 palloc = &pool_allocator_nointr_fullpage; 620 } 621 #endif /* POOL_SUBPAGE */ 622 if ((palloc->pa_flags & PA_INITIALIZED) == 0) { 623 if (palloc->pa_pagesz == 0) 624 palloc->pa_pagesz = PAGE_SIZE; 625 626 TAILQ_INIT(&palloc->pa_list); 627 628 simple_lock_init(&palloc->pa_slock); 629 palloc->pa_pagemask = ~(palloc->pa_pagesz - 1); 630 palloc->pa_pageshift = ffs(palloc->pa_pagesz) - 1; 631 632 if (palloc->pa_backingmapptr != NULL) { 633 pa_reclaim_register(palloc); 634 } 635 palloc->pa_flags |= PA_INITIALIZED; 636 } 637 638 if (align == 0) 639 align = ALIGN(1); 640 641 if ((flags & PR_NOTOUCH) == 0 && size < sizeof(struct pool_item)) 642 size = sizeof(struct pool_item); 643 644 size = roundup(size, align); 645 #ifdef DIAGNOSTIC 646 if (size > palloc->pa_pagesz) 647 panic("pool_init: pool item size (%zu) too large", size); 648 #endif 649 650 /* 651 * Initialize the pool structure. 652 */ 653 LIST_INIT(&pp->pr_emptypages); 654 LIST_INIT(&pp->pr_fullpages); 655 LIST_INIT(&pp->pr_partpages); 656 LIST_INIT(&pp->pr_cachelist); 657 pp->pr_curpage = NULL; 658 pp->pr_npages = 0; 659 pp->pr_minitems = 0; 660 pp->pr_minpages = 0; 661 pp->pr_maxpages = UINT_MAX; 662 pp->pr_roflags = flags; 663 pp->pr_flags = 0; 664 pp->pr_size = size; 665 pp->pr_align = align; 666 pp->pr_wchan = wchan; 667 pp->pr_alloc = palloc; 668 pp->pr_nitems = 0; 669 pp->pr_nout = 0; 670 pp->pr_hardlimit = UINT_MAX; 671 pp->pr_hardlimit_warning = NULL; 672 pp->pr_hardlimit_ratecap.tv_sec = 0; 673 pp->pr_hardlimit_ratecap.tv_usec = 0; 674 pp->pr_hardlimit_warning_last.tv_sec = 0; 675 pp->pr_hardlimit_warning_last.tv_usec = 0; 676 pp->pr_drain_hook = NULL; 677 pp->pr_drain_hook_arg = NULL; 678 679 /* 680 * Decide whether to put the page header off page to avoid 681 * wasting too large a part of the page or too big item. 682 * Off-page page headers go on a hash table, so we can match 683 * a returned item with its header based on the page address. 684 * We use 1/16 of the page size and about 8 times of the item 685 * size as the threshold (XXX: tune) 686 * 687 * However, we'll put the header into the page if we can put 688 * it without wasting any items. 689 * 690 * Silently enforce `0 <= ioff < align'. 691 */ 692 pp->pr_itemoffset = ioff %= align; 693 /* See the comment below about reserved bytes. */ 694 trysize = palloc->pa_pagesz - ((align - ioff) % align); 695 phsize = ALIGN(sizeof(struct pool_item_header)); 696 if ((pp->pr_roflags & (PR_NOTOUCH | PR_NOALIGN)) == 0 && 697 (pp->pr_size < MIN(palloc->pa_pagesz / 16, phsize << 3) || 698 trysize / pp->pr_size == (trysize - phsize) / pp->pr_size)) { 699 /* Use the end of the page for the page header */ 700 pp->pr_roflags |= PR_PHINPAGE; 701 pp->pr_phoffset = off = palloc->pa_pagesz - phsize; 702 } else { 703 /* The page header will be taken from our page header pool */ 704 pp->pr_phoffset = 0; 705 off = palloc->pa_pagesz; 706 SPLAY_INIT(&pp->pr_phtree); 707 } 708 709 /* 710 * Alignment is to take place at `ioff' within the item. This means 711 * we must reserve up to `align - 1' bytes on the page to allow 712 * appropriate positioning of each item. 713 */ 714 pp->pr_itemsperpage = (off - ((align - ioff) % align)) / pp->pr_size; 715 KASSERT(pp->pr_itemsperpage != 0); 716 if ((pp->pr_roflags & PR_NOTOUCH)) { 717 int idx; 718 719 for (idx = 0; pp->pr_itemsperpage > PHPOOL_FREELIST_NELEM(idx); 720 idx++) { 721 /* nothing */ 722 } 723 if (idx >= PHPOOL_MAX) { 724 /* 725 * if you see this panic, consider to tweak 726 * PHPOOL_MAX and PHPOOL_FREELIST_NELEM. 727 */ 728 panic("%s: too large itemsperpage(%d) for PR_NOTOUCH", 729 pp->pr_wchan, pp->pr_itemsperpage); 730 } 731 pp->pr_phpool = &phpool[idx]; 732 } else if ((pp->pr_roflags & PR_PHINPAGE) == 0) { 733 pp->pr_phpool = &phpool[0]; 734 } 735 #if defined(DIAGNOSTIC) 736 else { 737 pp->pr_phpool = NULL; 738 } 739 #endif 740 741 /* 742 * Use the slack between the chunks and the page header 743 * for "cache coloring". 744 */ 745 slack = off - pp->pr_itemsperpage * pp->pr_size; 746 pp->pr_maxcolor = (slack / align) * align; 747 pp->pr_curcolor = 0; 748 749 pp->pr_nget = 0; 750 pp->pr_nfail = 0; 751 pp->pr_nput = 0; 752 pp->pr_npagealloc = 0; 753 pp->pr_npagefree = 0; 754 pp->pr_hiwat = 0; 755 pp->pr_nidle = 0; 756 757 #ifdef POOL_DIAGNOSTIC 758 if (flags & PR_LOGGING) { 759 if (kmem_map == NULL || 760 (pp->pr_log = malloc(pool_logsize * sizeof(struct pool_log), 761 M_TEMP, M_NOWAIT)) == NULL) 762 pp->pr_roflags &= ~PR_LOGGING; 763 pp->pr_curlogentry = 0; 764 pp->pr_logsize = pool_logsize; 765 } 766 #endif 767 768 pp->pr_entered_file = NULL; 769 pp->pr_entered_line = 0; 770 771 simple_lock_init(&pp->pr_slock); 772 773 /* 774 * Initialize private page header pool and cache magazine pool if we 775 * haven't done so yet. 776 * XXX LOCKING. 777 */ 778 if (phpool[0].pr_size == 0) { 779 int idx; 780 for (idx = 0; idx < PHPOOL_MAX; idx++) { 781 static char phpool_names[PHPOOL_MAX][6+1+6+1]; 782 int nelem; 783 size_t sz; 784 785 nelem = PHPOOL_FREELIST_NELEM(idx); 786 snprintf(phpool_names[idx], sizeof(phpool_names[idx]), 787 "phpool-%d", nelem); 788 sz = sizeof(struct pool_item_header); 789 if (nelem) { 790 sz = PR_FREELIST_ALIGN(sz) 791 + nelem * sizeof(pool_item_freelist_t); 792 } 793 pool_init(&phpool[idx], sz, 0, 0, 0, 794 phpool_names[idx], &pool_allocator_meta); 795 } 796 #ifdef POOL_SUBPAGE 797 pool_init(&psppool, POOL_SUBPAGE, POOL_SUBPAGE, 0, 798 PR_RECURSIVE, "psppool", &pool_allocator_meta); 799 #endif 800 pool_init(&pcgpool, sizeof(struct pool_cache_group), 0, 0, 801 0, "pcgpool", &pool_allocator_meta); 802 } 803 804 /* Insert into the list of all pools. */ 805 simple_lock(&pool_head_slock); 806 LIST_INSERT_HEAD(&pool_head, pp, pr_poollist); 807 simple_unlock(&pool_head_slock); 808 809 /* Insert this into the list of pools using this allocator. */ 810 s = splvm(); 811 simple_lock(&palloc->pa_slock); 812 TAILQ_INSERT_TAIL(&palloc->pa_list, pp, pr_alloc_list); 813 simple_unlock(&palloc->pa_slock); 814 splx(s); 815 pool_reclaim_register(pp); 816 } 817 818 /* 819 * De-commision a pool resource. 820 */ 821 void 822 pool_destroy(struct pool *pp) 823 { 824 struct pool_pagelist pq; 825 struct pool_item_header *ph; 826 int s; 827 828 /* Remove from global pool list */ 829 simple_lock(&pool_head_slock); 830 LIST_REMOVE(pp, pr_poollist); 831 if (drainpp == pp) 832 drainpp = NULL; 833 simple_unlock(&pool_head_slock); 834 835 /* Remove this pool from its allocator's list of pools. */ 836 pool_reclaim_unregister(pp); 837 s = splvm(); 838 simple_lock(&pp->pr_alloc->pa_slock); 839 TAILQ_REMOVE(&pp->pr_alloc->pa_list, pp, pr_alloc_list); 840 simple_unlock(&pp->pr_alloc->pa_slock); 841 splx(s); 842 843 s = splvm(); 844 simple_lock(&pp->pr_slock); 845 846 KASSERT(LIST_EMPTY(&pp->pr_cachelist)); 847 848 #ifdef DIAGNOSTIC 849 if (pp->pr_nout != 0) { 850 pr_printlog(pp, NULL, printf); 851 panic("pool_destroy: pool busy: still out: %u", 852 pp->pr_nout); 853 } 854 #endif 855 856 KASSERT(LIST_EMPTY(&pp->pr_fullpages)); 857 KASSERT(LIST_EMPTY(&pp->pr_partpages)); 858 859 /* Remove all pages */ 860 LIST_INIT(&pq); 861 while ((ph = LIST_FIRST(&pp->pr_emptypages)) != NULL) 862 pr_rmpage(pp, ph, &pq); 863 864 simple_unlock(&pp->pr_slock); 865 splx(s); 866 867 pr_pagelist_free(pp, &pq); 868 869 #ifdef POOL_DIAGNOSTIC 870 if ((pp->pr_roflags & PR_LOGGING) != 0) 871 free(pp->pr_log, M_TEMP); 872 #endif 873 } 874 875 void 876 pool_set_drain_hook(struct pool *pp, void (*fn)(void *, int), void *arg) 877 { 878 879 /* XXX no locking -- must be used just after pool_init() */ 880 #ifdef DIAGNOSTIC 881 if (pp->pr_drain_hook != NULL) 882 panic("pool_set_drain_hook(%s): already set", pp->pr_wchan); 883 #endif 884 pp->pr_drain_hook = fn; 885 pp->pr_drain_hook_arg = arg; 886 } 887 888 static struct pool_item_header * 889 pool_alloc_item_header(struct pool *pp, caddr_t storage, int flags) 890 { 891 struct pool_item_header *ph; 892 int s; 893 894 LOCK_ASSERT(simple_lock_held(&pp->pr_slock) == 0); 895 896 if ((pp->pr_roflags & PR_PHINPAGE) != 0) 897 ph = (struct pool_item_header *) (storage + pp->pr_phoffset); 898 else { 899 s = splvm(); 900 ph = pool_get(pp->pr_phpool, flags); 901 splx(s); 902 } 903 904 return (ph); 905 } 906 907 /* 908 * Grab an item from the pool; must be called at appropriate spl level 909 */ 910 void * 911 #ifdef POOL_DIAGNOSTIC 912 _pool_get(struct pool *pp, int flags, const char *file, long line) 913 #else 914 pool_get(struct pool *pp, int flags) 915 #endif 916 { 917 struct pool_item *pi; 918 struct pool_item_header *ph; 919 void *v; 920 921 #ifdef DIAGNOSTIC 922 if (__predict_false(pp->pr_itemsperpage == 0)) 923 panic("pool_get: pool %p: pr_itemsperpage is zero, " 924 "pool not initialized?", pp); 925 if (__predict_false(curlwp == NULL && doing_shutdown == 0 && 926 (flags & PR_WAITOK) != 0)) 927 panic("pool_get: %s: must have NOWAIT", pp->pr_wchan); 928 929 #endif /* DIAGNOSTIC */ 930 #ifdef LOCKDEBUG 931 if (flags & PR_WAITOK) 932 ASSERT_SLEEPABLE(NULL, "pool_get(PR_WAITOK)"); 933 SCHED_ASSERT_UNLOCKED(); 934 #endif 935 936 simple_lock(&pp->pr_slock); 937 pr_enter(pp, file, line); 938 939 startover: 940 /* 941 * Check to see if we've reached the hard limit. If we have, 942 * and we can wait, then wait until an item has been returned to 943 * the pool. 944 */ 945 #ifdef DIAGNOSTIC 946 if (__predict_false(pp->pr_nout > pp->pr_hardlimit)) { 947 pr_leave(pp); 948 simple_unlock(&pp->pr_slock); 949 panic("pool_get: %s: crossed hard limit", pp->pr_wchan); 950 } 951 #endif 952 if (__predict_false(pp->pr_nout == pp->pr_hardlimit)) { 953 if (pp->pr_drain_hook != NULL) { 954 /* 955 * Since the drain hook is going to free things 956 * back to the pool, unlock, call the hook, re-lock, 957 * and check the hardlimit condition again. 958 */ 959 pr_leave(pp); 960 simple_unlock(&pp->pr_slock); 961 (*pp->pr_drain_hook)(pp->pr_drain_hook_arg, flags); 962 simple_lock(&pp->pr_slock); 963 pr_enter(pp, file, line); 964 if (pp->pr_nout < pp->pr_hardlimit) 965 goto startover; 966 } 967 968 if ((flags & PR_WAITOK) && !(flags & PR_LIMITFAIL)) { 969 /* 970 * XXX: A warning isn't logged in this case. Should 971 * it be? 972 */ 973 pp->pr_flags |= PR_WANTED; 974 pr_leave(pp); 975 ltsleep(pp, PSWP, pp->pr_wchan, 0, &pp->pr_slock); 976 pr_enter(pp, file, line); 977 goto startover; 978 } 979 980 /* 981 * Log a message that the hard limit has been hit. 982 */ 983 if (pp->pr_hardlimit_warning != NULL && 984 ratecheck(&pp->pr_hardlimit_warning_last, 985 &pp->pr_hardlimit_ratecap)) 986 log(LOG_ERR, "%s\n", pp->pr_hardlimit_warning); 987 988 pp->pr_nfail++; 989 990 pr_leave(pp); 991 simple_unlock(&pp->pr_slock); 992 return (NULL); 993 } 994 995 /* 996 * The convention we use is that if `curpage' is not NULL, then 997 * it points at a non-empty bucket. In particular, `curpage' 998 * never points at a page header which has PR_PHINPAGE set and 999 * has no items in its bucket. 1000 */ 1001 if ((ph = pp->pr_curpage) == NULL) { 1002 int error; 1003 1004 #ifdef DIAGNOSTIC 1005 if (pp->pr_nitems != 0) { 1006 simple_unlock(&pp->pr_slock); 1007 printf("pool_get: %s: curpage NULL, nitems %u\n", 1008 pp->pr_wchan, pp->pr_nitems); 1009 panic("pool_get: nitems inconsistent"); 1010 } 1011 #endif 1012 1013 /* 1014 * Call the back-end page allocator for more memory. 1015 * Release the pool lock, as the back-end page allocator 1016 * may block. 1017 */ 1018 pr_leave(pp); 1019 error = pool_grow(pp, flags); 1020 pr_enter(pp, file, line); 1021 if (error != 0) { 1022 /* 1023 * We were unable to allocate a page or item 1024 * header, but we released the lock during 1025 * allocation, so perhaps items were freed 1026 * back to the pool. Check for this case. 1027 */ 1028 if (pp->pr_curpage != NULL) 1029 goto startover; 1030 1031 pp->pr_nfail++; 1032 pr_leave(pp); 1033 simple_unlock(&pp->pr_slock); 1034 return (NULL); 1035 } 1036 1037 /* Start the allocation process over. */ 1038 goto startover; 1039 } 1040 if (pp->pr_roflags & PR_NOTOUCH) { 1041 #ifdef DIAGNOSTIC 1042 if (__predict_false(ph->ph_nmissing == pp->pr_itemsperpage)) { 1043 pr_leave(pp); 1044 simple_unlock(&pp->pr_slock); 1045 panic("pool_get: %s: page empty", pp->pr_wchan); 1046 } 1047 #endif 1048 v = pr_item_notouch_get(pp, ph); 1049 #ifdef POOL_DIAGNOSTIC 1050 pr_log(pp, v, PRLOG_GET, file, line); 1051 #endif 1052 } else { 1053 v = pi = LIST_FIRST(&ph->ph_itemlist); 1054 if (__predict_false(v == NULL)) { 1055 pr_leave(pp); 1056 simple_unlock(&pp->pr_slock); 1057 panic("pool_get: %s: page empty", pp->pr_wchan); 1058 } 1059 #ifdef DIAGNOSTIC 1060 if (__predict_false(pp->pr_nitems == 0)) { 1061 pr_leave(pp); 1062 simple_unlock(&pp->pr_slock); 1063 printf("pool_get: %s: items on itemlist, nitems %u\n", 1064 pp->pr_wchan, pp->pr_nitems); 1065 panic("pool_get: nitems inconsistent"); 1066 } 1067 #endif 1068 1069 #ifdef POOL_DIAGNOSTIC 1070 pr_log(pp, v, PRLOG_GET, file, line); 1071 #endif 1072 1073 #ifdef DIAGNOSTIC 1074 if (__predict_false(pi->pi_magic != PI_MAGIC)) { 1075 pr_printlog(pp, pi, printf); 1076 panic("pool_get(%s): free list modified: " 1077 "magic=%x; page %p; item addr %p\n", 1078 pp->pr_wchan, pi->pi_magic, ph->ph_page, pi); 1079 } 1080 #endif 1081 1082 /* 1083 * Remove from item list. 1084 */ 1085 LIST_REMOVE(pi, pi_list); 1086 } 1087 pp->pr_nitems--; 1088 pp->pr_nout++; 1089 if (ph->ph_nmissing == 0) { 1090 #ifdef DIAGNOSTIC 1091 if (__predict_false(pp->pr_nidle == 0)) 1092 panic("pool_get: nidle inconsistent"); 1093 #endif 1094 pp->pr_nidle--; 1095 1096 /* 1097 * This page was previously empty. Move it to the list of 1098 * partially-full pages. This page is already curpage. 1099 */ 1100 LIST_REMOVE(ph, ph_pagelist); 1101 LIST_INSERT_HEAD(&pp->pr_partpages, ph, ph_pagelist); 1102 } 1103 ph->ph_nmissing++; 1104 if (ph->ph_nmissing == pp->pr_itemsperpage) { 1105 #ifdef DIAGNOSTIC 1106 if (__predict_false((pp->pr_roflags & PR_NOTOUCH) == 0 && 1107 !LIST_EMPTY(&ph->ph_itemlist))) { 1108 pr_leave(pp); 1109 simple_unlock(&pp->pr_slock); 1110 panic("pool_get: %s: nmissing inconsistent", 1111 pp->pr_wchan); 1112 } 1113 #endif 1114 /* 1115 * This page is now full. Move it to the full list 1116 * and select a new current page. 1117 */ 1118 LIST_REMOVE(ph, ph_pagelist); 1119 LIST_INSERT_HEAD(&pp->pr_fullpages, ph, ph_pagelist); 1120 pool_update_curpage(pp); 1121 } 1122 1123 pp->pr_nget++; 1124 pr_leave(pp); 1125 1126 /* 1127 * If we have a low water mark and we are now below that low 1128 * water mark, add more items to the pool. 1129 */ 1130 if (POOL_NEEDS_CATCHUP(pp) && pool_catchup(pp) != 0) { 1131 /* 1132 * XXX: Should we log a warning? Should we set up a timeout 1133 * to try again in a second or so? The latter could break 1134 * a caller's assumptions about interrupt protection, etc. 1135 */ 1136 } 1137 1138 simple_unlock(&pp->pr_slock); 1139 return (v); 1140 } 1141 1142 /* 1143 * Internal version of pool_put(). Pool is already locked/entered. 1144 */ 1145 static void 1146 pool_do_put(struct pool *pp, void *v, struct pool_pagelist *pq) 1147 { 1148 struct pool_item *pi = v; 1149 struct pool_item_header *ph; 1150 1151 LOCK_ASSERT(simple_lock_held(&pp->pr_slock)); 1152 SCHED_ASSERT_UNLOCKED(); 1153 1154 #ifdef DIAGNOSTIC 1155 if (__predict_false(pp->pr_nout == 0)) { 1156 printf("pool %s: putting with none out\n", 1157 pp->pr_wchan); 1158 panic("pool_put"); 1159 } 1160 #endif 1161 1162 if (__predict_false((ph = pr_find_pagehead(pp, v)) == NULL)) { 1163 pr_printlog(pp, NULL, printf); 1164 panic("pool_put: %s: page header missing", pp->pr_wchan); 1165 } 1166 1167 #ifdef LOCKDEBUG 1168 /* 1169 * Check if we're freeing a locked simple lock. 1170 */ 1171 simple_lock_freecheck((caddr_t)pi, ((caddr_t)pi) + pp->pr_size); 1172 #endif 1173 1174 /* 1175 * Return to item list. 1176 */ 1177 if (pp->pr_roflags & PR_NOTOUCH) { 1178 pr_item_notouch_put(pp, ph, v); 1179 } else { 1180 #ifdef DIAGNOSTIC 1181 pi->pi_magic = PI_MAGIC; 1182 #endif 1183 #ifdef DEBUG 1184 { 1185 int i, *ip = v; 1186 1187 for (i = 0; i < pp->pr_size / sizeof(int); i++) { 1188 *ip++ = PI_MAGIC; 1189 } 1190 } 1191 #endif 1192 1193 LIST_INSERT_HEAD(&ph->ph_itemlist, pi, pi_list); 1194 } 1195 KDASSERT(ph->ph_nmissing != 0); 1196 ph->ph_nmissing--; 1197 pp->pr_nput++; 1198 pp->pr_nitems++; 1199 pp->pr_nout--; 1200 1201 /* Cancel "pool empty" condition if it exists */ 1202 if (pp->pr_curpage == NULL) 1203 pp->pr_curpage = ph; 1204 1205 if (pp->pr_flags & PR_WANTED) { 1206 pp->pr_flags &= ~PR_WANTED; 1207 if (ph->ph_nmissing == 0) 1208 pp->pr_nidle++; 1209 wakeup((caddr_t)pp); 1210 return; 1211 } 1212 1213 /* 1214 * If this page is now empty, do one of two things: 1215 * 1216 * (1) If we have more pages than the page high water mark, 1217 * free the page back to the system. ONLY CONSIDER 1218 * FREEING BACK A PAGE IF WE HAVE MORE THAN OUR MINIMUM PAGE 1219 * CLAIM. 1220 * 1221 * (2) Otherwise, move the page to the empty page list. 1222 * 1223 * Either way, select a new current page (so we use a partially-full 1224 * page if one is available). 1225 */ 1226 if (ph->ph_nmissing == 0) { 1227 pp->pr_nidle++; 1228 if (pp->pr_npages > pp->pr_minpages && 1229 (pp->pr_npages > pp->pr_maxpages || 1230 pa_starved_p(pp->pr_alloc))) { 1231 pr_rmpage(pp, ph, pq); 1232 } else { 1233 LIST_REMOVE(ph, ph_pagelist); 1234 LIST_INSERT_HEAD(&pp->pr_emptypages, ph, ph_pagelist); 1235 1236 /* 1237 * Update the timestamp on the page. A page must 1238 * be idle for some period of time before it can 1239 * be reclaimed by the pagedaemon. This minimizes 1240 * ping-pong'ing for memory. 1241 */ 1242 getmicrotime(&ph->ph_time); 1243 } 1244 pool_update_curpage(pp); 1245 } 1246 1247 /* 1248 * If the page was previously completely full, move it to the 1249 * partially-full list and make it the current page. The next 1250 * allocation will get the item from this page, instead of 1251 * further fragmenting the pool. 1252 */ 1253 else if (ph->ph_nmissing == (pp->pr_itemsperpage - 1)) { 1254 LIST_REMOVE(ph, ph_pagelist); 1255 LIST_INSERT_HEAD(&pp->pr_partpages, ph, ph_pagelist); 1256 pp->pr_curpage = ph; 1257 } 1258 } 1259 1260 /* 1261 * Return resource to the pool; must be called at appropriate spl level 1262 */ 1263 #ifdef POOL_DIAGNOSTIC 1264 void 1265 _pool_put(struct pool *pp, void *v, const char *file, long line) 1266 { 1267 struct pool_pagelist pq; 1268 1269 LIST_INIT(&pq); 1270 1271 simple_lock(&pp->pr_slock); 1272 pr_enter(pp, file, line); 1273 1274 pr_log(pp, v, PRLOG_PUT, file, line); 1275 1276 pool_do_put(pp, v, &pq); 1277 1278 pr_leave(pp); 1279 simple_unlock(&pp->pr_slock); 1280 1281 pr_pagelist_free(pp, &pq); 1282 } 1283 #undef pool_put 1284 #endif /* POOL_DIAGNOSTIC */ 1285 1286 void 1287 pool_put(struct pool *pp, void *v) 1288 { 1289 struct pool_pagelist pq; 1290 1291 LIST_INIT(&pq); 1292 1293 simple_lock(&pp->pr_slock); 1294 pool_do_put(pp, v, &pq); 1295 simple_unlock(&pp->pr_slock); 1296 1297 pr_pagelist_free(pp, &pq); 1298 } 1299 1300 #ifdef POOL_DIAGNOSTIC 1301 #define pool_put(h, v) _pool_put((h), (v), __FILE__, __LINE__) 1302 #endif 1303 1304 /* 1305 * pool_grow: grow a pool by a page. 1306 * 1307 * => called with pool locked. 1308 * => unlock and relock the pool. 1309 * => return with pool locked. 1310 */ 1311 1312 static int 1313 pool_grow(struct pool *pp, int flags) 1314 { 1315 struct pool_item_header *ph = NULL; 1316 char *cp; 1317 1318 simple_unlock(&pp->pr_slock); 1319 cp = pool_allocator_alloc(pp, flags); 1320 if (__predict_true(cp != NULL)) { 1321 ph = pool_alloc_item_header(pp, cp, flags); 1322 } 1323 if (__predict_false(cp == NULL || ph == NULL)) { 1324 if (cp != NULL) { 1325 pool_allocator_free(pp, cp); 1326 } 1327 simple_lock(&pp->pr_slock); 1328 return ENOMEM; 1329 } 1330 1331 simple_lock(&pp->pr_slock); 1332 pool_prime_page(pp, cp, ph); 1333 pp->pr_npagealloc++; 1334 return 0; 1335 } 1336 1337 /* 1338 * Add N items to the pool. 1339 */ 1340 int 1341 pool_prime(struct pool *pp, int n) 1342 { 1343 int newpages; 1344 int error = 0; 1345 1346 simple_lock(&pp->pr_slock); 1347 1348 newpages = roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 1349 1350 while (newpages-- > 0) { 1351 error = pool_grow(pp, PR_NOWAIT); 1352 if (error) { 1353 break; 1354 } 1355 pp->pr_minpages++; 1356 } 1357 1358 if (pp->pr_minpages >= pp->pr_maxpages) 1359 pp->pr_maxpages = pp->pr_minpages + 1; /* XXX */ 1360 1361 simple_unlock(&pp->pr_slock); 1362 return error; 1363 } 1364 1365 /* 1366 * Add a page worth of items to the pool. 1367 * 1368 * Note, we must be called with the pool descriptor LOCKED. 1369 */ 1370 static void 1371 pool_prime_page(struct pool *pp, caddr_t storage, struct pool_item_header *ph) 1372 { 1373 struct pool_item *pi; 1374 caddr_t cp = storage; 1375 unsigned int align = pp->pr_align; 1376 unsigned int ioff = pp->pr_itemoffset; 1377 int n; 1378 1379 LOCK_ASSERT(simple_lock_held(&pp->pr_slock)); 1380 1381 #ifdef DIAGNOSTIC 1382 if ((pp->pr_roflags & PR_NOALIGN) == 0 && 1383 ((uintptr_t)cp & (pp->pr_alloc->pa_pagesz - 1)) != 0) 1384 panic("pool_prime_page: %s: unaligned page", pp->pr_wchan); 1385 #endif 1386 1387 /* 1388 * Insert page header. 1389 */ 1390 LIST_INSERT_HEAD(&pp->pr_emptypages, ph, ph_pagelist); 1391 LIST_INIT(&ph->ph_itemlist); 1392 ph->ph_page = storage; 1393 ph->ph_nmissing = 0; 1394 getmicrotime(&ph->ph_time); 1395 if ((pp->pr_roflags & PR_PHINPAGE) == 0) 1396 SPLAY_INSERT(phtree, &pp->pr_phtree, ph); 1397 1398 pp->pr_nidle++; 1399 1400 /* 1401 * Color this page. 1402 */ 1403 cp = (caddr_t)(cp + pp->pr_curcolor); 1404 if ((pp->pr_curcolor += align) > pp->pr_maxcolor) 1405 pp->pr_curcolor = 0; 1406 1407 /* 1408 * Adjust storage to apply aligment to `pr_itemoffset' in each item. 1409 */ 1410 if (ioff != 0) 1411 cp = (caddr_t)(cp + (align - ioff)); 1412 1413 /* 1414 * Insert remaining chunks on the bucket list. 1415 */ 1416 n = pp->pr_itemsperpage; 1417 pp->pr_nitems += n; 1418 1419 if (pp->pr_roflags & PR_NOTOUCH) { 1420 pool_item_freelist_t *freelist = PR_FREELIST(ph); 1421 int i; 1422 1423 ph->ph_off = cp - storage; 1424 ph->ph_firstfree = 0; 1425 for (i = 0; i < n - 1; i++) 1426 freelist[i] = i + 1; 1427 freelist[n - 1] = PR_INDEX_EOL; 1428 } else { 1429 while (n--) { 1430 pi = (struct pool_item *)cp; 1431 1432 KASSERT(((((vaddr_t)pi) + ioff) & (align - 1)) == 0); 1433 1434 /* Insert on page list */ 1435 LIST_INSERT_HEAD(&ph->ph_itemlist, pi, pi_list); 1436 #ifdef DIAGNOSTIC 1437 pi->pi_magic = PI_MAGIC; 1438 #endif 1439 cp = (caddr_t)(cp + pp->pr_size); 1440 } 1441 } 1442 1443 /* 1444 * If the pool was depleted, point at the new page. 1445 */ 1446 if (pp->pr_curpage == NULL) 1447 pp->pr_curpage = ph; 1448 1449 if (++pp->pr_npages > pp->pr_hiwat) 1450 pp->pr_hiwat = pp->pr_npages; 1451 } 1452 1453 /* 1454 * Used by pool_get() when nitems drops below the low water mark. This 1455 * is used to catch up pr_nitems with the low water mark. 1456 * 1457 * Note 1, we never wait for memory here, we let the caller decide what to do. 1458 * 1459 * Note 2, we must be called with the pool already locked, and we return 1460 * with it locked. 1461 */ 1462 static int 1463 pool_catchup(struct pool *pp) 1464 { 1465 int error = 0; 1466 1467 while (POOL_NEEDS_CATCHUP(pp)) { 1468 error = pool_grow(pp, PR_NOWAIT); 1469 if (error) { 1470 break; 1471 } 1472 } 1473 return error; 1474 } 1475 1476 static void 1477 pool_update_curpage(struct pool *pp) 1478 { 1479 1480 pp->pr_curpage = LIST_FIRST(&pp->pr_partpages); 1481 if (pp->pr_curpage == NULL) { 1482 pp->pr_curpage = LIST_FIRST(&pp->pr_emptypages); 1483 } 1484 } 1485 1486 void 1487 pool_setlowat(struct pool *pp, int n) 1488 { 1489 1490 simple_lock(&pp->pr_slock); 1491 1492 pp->pr_minitems = n; 1493 pp->pr_minpages = (n == 0) 1494 ? 0 1495 : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 1496 1497 /* Make sure we're caught up with the newly-set low water mark. */ 1498 if (POOL_NEEDS_CATCHUP(pp) && pool_catchup(pp) != 0) { 1499 /* 1500 * XXX: Should we log a warning? Should we set up a timeout 1501 * to try again in a second or so? The latter could break 1502 * a caller's assumptions about interrupt protection, etc. 1503 */ 1504 } 1505 1506 simple_unlock(&pp->pr_slock); 1507 } 1508 1509 void 1510 pool_sethiwat(struct pool *pp, int n) 1511 { 1512 1513 simple_lock(&pp->pr_slock); 1514 1515 pp->pr_maxpages = (n == 0) 1516 ? 0 1517 : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 1518 1519 simple_unlock(&pp->pr_slock); 1520 } 1521 1522 void 1523 pool_sethardlimit(struct pool *pp, int n, const char *warnmess, int ratecap) 1524 { 1525 1526 simple_lock(&pp->pr_slock); 1527 1528 pp->pr_hardlimit = n; 1529 pp->pr_hardlimit_warning = warnmess; 1530 pp->pr_hardlimit_ratecap.tv_sec = ratecap; 1531 pp->pr_hardlimit_warning_last.tv_sec = 0; 1532 pp->pr_hardlimit_warning_last.tv_usec = 0; 1533 1534 /* 1535 * In-line version of pool_sethiwat(), because we don't want to 1536 * release the lock. 1537 */ 1538 pp->pr_maxpages = (n == 0) 1539 ? 0 1540 : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 1541 1542 simple_unlock(&pp->pr_slock); 1543 } 1544 1545 /* 1546 * Release all complete pages that have not been used recently. 1547 */ 1548 int 1549 #ifdef POOL_DIAGNOSTIC 1550 _pool_reclaim(struct pool *pp, const char *file, long line) 1551 #else 1552 pool_reclaim(struct pool *pp) 1553 #endif 1554 { 1555 struct pool_item_header *ph, *phnext; 1556 struct pool_cache *pc; 1557 struct pool_pagelist pq; 1558 struct pool_cache_grouplist pcgl; 1559 struct timeval curtime, diff; 1560 1561 if (pp->pr_drain_hook != NULL) { 1562 /* 1563 * The drain hook must be called with the pool unlocked. 1564 */ 1565 (*pp->pr_drain_hook)(pp->pr_drain_hook_arg, PR_NOWAIT); 1566 } 1567 1568 if (simple_lock_try(&pp->pr_slock) == 0) 1569 return (0); 1570 pr_enter(pp, file, line); 1571 1572 LIST_INIT(&pq); 1573 LIST_INIT(&pcgl); 1574 1575 /* 1576 * Reclaim items from the pool's caches. 1577 */ 1578 LIST_FOREACH(pc, &pp->pr_cachelist, pc_poollist) 1579 pool_cache_reclaim(pc, &pq, &pcgl); 1580 1581 getmicrotime(&curtime); 1582 1583 for (ph = LIST_FIRST(&pp->pr_emptypages); ph != NULL; ph = phnext) { 1584 phnext = LIST_NEXT(ph, ph_pagelist); 1585 1586 /* Check our minimum page claim */ 1587 if (pp->pr_npages <= pp->pr_minpages) 1588 break; 1589 1590 KASSERT(ph->ph_nmissing == 0); 1591 timersub(&curtime, &ph->ph_time, &diff); 1592 if (diff.tv_sec < pool_inactive_time 1593 && !pa_starved_p(pp->pr_alloc)) 1594 continue; 1595 1596 /* 1597 * If freeing this page would put us below 1598 * the low water mark, stop now. 1599 */ 1600 if ((pp->pr_nitems - pp->pr_itemsperpage) < 1601 pp->pr_minitems) 1602 break; 1603 1604 pr_rmpage(pp, ph, &pq); 1605 } 1606 1607 pr_leave(pp); 1608 simple_unlock(&pp->pr_slock); 1609 if (LIST_EMPTY(&pq) && LIST_EMPTY(&pcgl)) 1610 return 0; 1611 1612 pr_pagelist_free(pp, &pq); 1613 pcg_grouplist_free(&pcgl); 1614 return (1); 1615 } 1616 1617 /* 1618 * Drain pools, one at a time. 1619 * 1620 * Note, we must never be called from an interrupt context. 1621 */ 1622 void 1623 pool_drain(void *arg __unused) 1624 { 1625 struct pool *pp; 1626 int s; 1627 1628 pp = NULL; 1629 s = splvm(); 1630 simple_lock(&pool_head_slock); 1631 if (drainpp == NULL) { 1632 drainpp = LIST_FIRST(&pool_head); 1633 } 1634 if (drainpp) { 1635 pp = drainpp; 1636 drainpp = LIST_NEXT(pp, pr_poollist); 1637 } 1638 simple_unlock(&pool_head_slock); 1639 if (pp) 1640 pool_reclaim(pp); 1641 splx(s); 1642 } 1643 1644 /* 1645 * Diagnostic helpers. 1646 */ 1647 void 1648 pool_print(struct pool *pp, const char *modif) 1649 { 1650 int s; 1651 1652 s = splvm(); 1653 if (simple_lock_try(&pp->pr_slock) == 0) { 1654 printf("pool %s is locked; try again later\n", 1655 pp->pr_wchan); 1656 splx(s); 1657 return; 1658 } 1659 pool_print1(pp, modif, printf); 1660 simple_unlock(&pp->pr_slock); 1661 splx(s); 1662 } 1663 1664 void 1665 pool_printall(const char *modif, void (*pr)(const char *, ...)) 1666 { 1667 struct pool *pp; 1668 1669 if (simple_lock_try(&pool_head_slock) == 0) { 1670 (*pr)("WARNING: pool_head_slock is locked\n"); 1671 } else { 1672 simple_unlock(&pool_head_slock); 1673 } 1674 1675 LIST_FOREACH(pp, &pool_head, pr_poollist) { 1676 pool_printit(pp, modif, pr); 1677 } 1678 } 1679 1680 void 1681 pool_printit(struct pool *pp, const char *modif, void (*pr)(const char *, ...)) 1682 { 1683 1684 if (pp == NULL) { 1685 (*pr)("Must specify a pool to print.\n"); 1686 return; 1687 } 1688 1689 /* 1690 * Called from DDB; interrupts should be blocked, and all 1691 * other processors should be paused. We can skip locking 1692 * the pool in this case. 1693 * 1694 * We do a simple_lock_try() just to print the lock 1695 * status, however. 1696 */ 1697 1698 if (simple_lock_try(&pp->pr_slock) == 0) 1699 (*pr)("WARNING: pool %s is locked\n", pp->pr_wchan); 1700 else 1701 simple_unlock(&pp->pr_slock); 1702 1703 pool_print1(pp, modif, pr); 1704 } 1705 1706 static void 1707 pool_print_pagelist(struct pool *pp __unused, struct pool_pagelist *pl, 1708 void (*pr)(const char *, ...)) 1709 { 1710 struct pool_item_header *ph; 1711 #ifdef DIAGNOSTIC 1712 struct pool_item *pi; 1713 #endif 1714 1715 LIST_FOREACH(ph, pl, ph_pagelist) { 1716 (*pr)("\t\tpage %p, nmissing %d, time %lu,%lu\n", 1717 ph->ph_page, ph->ph_nmissing, 1718 (u_long)ph->ph_time.tv_sec, 1719 (u_long)ph->ph_time.tv_usec); 1720 #ifdef DIAGNOSTIC 1721 if (!(pp->pr_roflags & PR_NOTOUCH)) { 1722 LIST_FOREACH(pi, &ph->ph_itemlist, pi_list) { 1723 if (pi->pi_magic != PI_MAGIC) { 1724 (*pr)("\t\t\titem %p, magic 0x%x\n", 1725 pi, pi->pi_magic); 1726 } 1727 } 1728 } 1729 #endif 1730 } 1731 } 1732 1733 static void 1734 pool_print1(struct pool *pp, const char *modif, void (*pr)(const char *, ...)) 1735 { 1736 struct pool_item_header *ph; 1737 struct pool_cache *pc; 1738 struct pool_cache_group *pcg; 1739 int i, print_log = 0, print_pagelist = 0, print_cache = 0; 1740 char c; 1741 1742 while ((c = *modif++) != '\0') { 1743 if (c == 'l') 1744 print_log = 1; 1745 if (c == 'p') 1746 print_pagelist = 1; 1747 if (c == 'c') 1748 print_cache = 1; 1749 } 1750 1751 (*pr)("POOL %s: size %u, align %u, ioff %u, roflags 0x%08x\n", 1752 pp->pr_wchan, pp->pr_size, pp->pr_align, pp->pr_itemoffset, 1753 pp->pr_roflags); 1754 (*pr)("\talloc %p\n", pp->pr_alloc); 1755 (*pr)("\tminitems %u, minpages %u, maxpages %u, npages %u\n", 1756 pp->pr_minitems, pp->pr_minpages, pp->pr_maxpages, pp->pr_npages); 1757 (*pr)("\titemsperpage %u, nitems %u, nout %u, hardlimit %u\n", 1758 pp->pr_itemsperpage, pp->pr_nitems, pp->pr_nout, pp->pr_hardlimit); 1759 1760 (*pr)("\n\tnget %lu, nfail %lu, nput %lu\n", 1761 pp->pr_nget, pp->pr_nfail, pp->pr_nput); 1762 (*pr)("\tnpagealloc %lu, npagefree %lu, hiwat %u, nidle %lu\n", 1763 pp->pr_npagealloc, pp->pr_npagefree, pp->pr_hiwat, pp->pr_nidle); 1764 1765 if (print_pagelist == 0) 1766 goto skip_pagelist; 1767 1768 if ((ph = LIST_FIRST(&pp->pr_emptypages)) != NULL) 1769 (*pr)("\n\tempty page list:\n"); 1770 pool_print_pagelist(pp, &pp->pr_emptypages, pr); 1771 if ((ph = LIST_FIRST(&pp->pr_fullpages)) != NULL) 1772 (*pr)("\n\tfull page list:\n"); 1773 pool_print_pagelist(pp, &pp->pr_fullpages, pr); 1774 if ((ph = LIST_FIRST(&pp->pr_partpages)) != NULL) 1775 (*pr)("\n\tpartial-page list:\n"); 1776 pool_print_pagelist(pp, &pp->pr_partpages, pr); 1777 1778 if (pp->pr_curpage == NULL) 1779 (*pr)("\tno current page\n"); 1780 else 1781 (*pr)("\tcurpage %p\n", pp->pr_curpage->ph_page); 1782 1783 skip_pagelist: 1784 if (print_log == 0) 1785 goto skip_log; 1786 1787 (*pr)("\n"); 1788 if ((pp->pr_roflags & PR_LOGGING) == 0) 1789 (*pr)("\tno log\n"); 1790 else { 1791 pr_printlog(pp, NULL, pr); 1792 } 1793 1794 skip_log: 1795 if (print_cache == 0) 1796 goto skip_cache; 1797 1798 #define PR_GROUPLIST(pcg) \ 1799 (*pr)("\t\tgroup %p: avail %d\n", pcg, pcg->pcg_avail); \ 1800 for (i = 0; i < PCG_NOBJECTS; i++) { \ 1801 if (pcg->pcg_objects[i].pcgo_pa != \ 1802 POOL_PADDR_INVALID) { \ 1803 (*pr)("\t\t\t%p, 0x%llx\n", \ 1804 pcg->pcg_objects[i].pcgo_va, \ 1805 (unsigned long long) \ 1806 pcg->pcg_objects[i].pcgo_pa); \ 1807 } else { \ 1808 (*pr)("\t\t\t%p\n", \ 1809 pcg->pcg_objects[i].pcgo_va); \ 1810 } \ 1811 } 1812 1813 LIST_FOREACH(pc, &pp->pr_cachelist, pc_poollist) { 1814 (*pr)("\tcache %p\n", pc); 1815 (*pr)("\t hits %lu misses %lu ngroups %lu nitems %lu\n", 1816 pc->pc_hits, pc->pc_misses, pc->pc_ngroups, pc->pc_nitems); 1817 (*pr)("\t full groups:\n"); 1818 LIST_FOREACH(pcg, &pc->pc_fullgroups, pcg_list) { 1819 PR_GROUPLIST(pcg); 1820 } 1821 (*pr)("\t partial groups:\n"); 1822 LIST_FOREACH(pcg, &pc->pc_partgroups, pcg_list) { 1823 PR_GROUPLIST(pcg); 1824 } 1825 (*pr)("\t empty groups:\n"); 1826 LIST_FOREACH(pcg, &pc->pc_emptygroups, pcg_list) { 1827 PR_GROUPLIST(pcg); 1828 } 1829 } 1830 #undef PR_GROUPLIST 1831 1832 skip_cache: 1833 pr_enter_check(pp, pr); 1834 } 1835 1836 static int 1837 pool_chk_page(struct pool *pp, const char *label, struct pool_item_header *ph) 1838 { 1839 struct pool_item *pi; 1840 caddr_t page; 1841 int n; 1842 1843 if ((pp->pr_roflags & PR_NOALIGN) == 0) { 1844 page = (caddr_t)((uintptr_t)ph & pp->pr_alloc->pa_pagemask); 1845 if (page != ph->ph_page && 1846 (pp->pr_roflags & PR_PHINPAGE) != 0) { 1847 if (label != NULL) 1848 printf("%s: ", label); 1849 printf("pool(%p:%s): page inconsistency: page %p;" 1850 " at page head addr %p (p %p)\n", pp, 1851 pp->pr_wchan, ph->ph_page, 1852 ph, page); 1853 return 1; 1854 } 1855 } 1856 1857 if ((pp->pr_roflags & PR_NOTOUCH) != 0) 1858 return 0; 1859 1860 for (pi = LIST_FIRST(&ph->ph_itemlist), n = 0; 1861 pi != NULL; 1862 pi = LIST_NEXT(pi,pi_list), n++) { 1863 1864 #ifdef DIAGNOSTIC 1865 if (pi->pi_magic != PI_MAGIC) { 1866 if (label != NULL) 1867 printf("%s: ", label); 1868 printf("pool(%s): free list modified: magic=%x;" 1869 " page %p; item ordinal %d; addr %p\n", 1870 pp->pr_wchan, pi->pi_magic, ph->ph_page, 1871 n, pi); 1872 panic("pool"); 1873 } 1874 #endif 1875 if ((pp->pr_roflags & PR_NOALIGN) != 0) { 1876 continue; 1877 } 1878 page = (caddr_t)((uintptr_t)pi & pp->pr_alloc->pa_pagemask); 1879 if (page == ph->ph_page) 1880 continue; 1881 1882 if (label != NULL) 1883 printf("%s: ", label); 1884 printf("pool(%p:%s): page inconsistency: page %p;" 1885 " item ordinal %d; addr %p (p %p)\n", pp, 1886 pp->pr_wchan, ph->ph_page, 1887 n, pi, page); 1888 return 1; 1889 } 1890 return 0; 1891 } 1892 1893 1894 int 1895 pool_chk(struct pool *pp, const char *label) 1896 { 1897 struct pool_item_header *ph; 1898 int r = 0; 1899 1900 simple_lock(&pp->pr_slock); 1901 LIST_FOREACH(ph, &pp->pr_emptypages, ph_pagelist) { 1902 r = pool_chk_page(pp, label, ph); 1903 if (r) { 1904 goto out; 1905 } 1906 } 1907 LIST_FOREACH(ph, &pp->pr_fullpages, ph_pagelist) { 1908 r = pool_chk_page(pp, label, ph); 1909 if (r) { 1910 goto out; 1911 } 1912 } 1913 LIST_FOREACH(ph, &pp->pr_partpages, ph_pagelist) { 1914 r = pool_chk_page(pp, label, ph); 1915 if (r) { 1916 goto out; 1917 } 1918 } 1919 1920 out: 1921 simple_unlock(&pp->pr_slock); 1922 return (r); 1923 } 1924 1925 /* 1926 * pool_cache_init: 1927 * 1928 * Initialize a pool cache. 1929 * 1930 * NOTE: If the pool must be protected from interrupts, we expect 1931 * to be called at the appropriate interrupt priority level. 1932 */ 1933 void 1934 pool_cache_init(struct pool_cache *pc, struct pool *pp, 1935 int (*ctor)(void *, void *, int), 1936 void (*dtor)(void *, void *), 1937 void *arg) 1938 { 1939 1940 LIST_INIT(&pc->pc_emptygroups); 1941 LIST_INIT(&pc->pc_fullgroups); 1942 LIST_INIT(&pc->pc_partgroups); 1943 simple_lock_init(&pc->pc_slock); 1944 1945 pc->pc_pool = pp; 1946 1947 pc->pc_ctor = ctor; 1948 pc->pc_dtor = dtor; 1949 pc->pc_arg = arg; 1950 1951 pc->pc_hits = 0; 1952 pc->pc_misses = 0; 1953 1954 pc->pc_ngroups = 0; 1955 1956 pc->pc_nitems = 0; 1957 1958 simple_lock(&pp->pr_slock); 1959 LIST_INSERT_HEAD(&pp->pr_cachelist, pc, pc_poollist); 1960 simple_unlock(&pp->pr_slock); 1961 } 1962 1963 /* 1964 * pool_cache_destroy: 1965 * 1966 * Destroy a pool cache. 1967 */ 1968 void 1969 pool_cache_destroy(struct pool_cache *pc) 1970 { 1971 struct pool *pp = pc->pc_pool; 1972 1973 /* First, invalidate the entire cache. */ 1974 pool_cache_invalidate(pc); 1975 1976 /* ...and remove it from the pool's cache list. */ 1977 simple_lock(&pp->pr_slock); 1978 LIST_REMOVE(pc, pc_poollist); 1979 simple_unlock(&pp->pr_slock); 1980 } 1981 1982 static inline void * 1983 pcg_get(struct pool_cache_group *pcg, paddr_t *pap) 1984 { 1985 void *object; 1986 u_int idx; 1987 1988 KASSERT(pcg->pcg_avail <= PCG_NOBJECTS); 1989 KASSERT(pcg->pcg_avail != 0); 1990 idx = --pcg->pcg_avail; 1991 1992 KASSERT(pcg->pcg_objects[idx].pcgo_va != NULL); 1993 object = pcg->pcg_objects[idx].pcgo_va; 1994 if (pap != NULL) 1995 *pap = pcg->pcg_objects[idx].pcgo_pa; 1996 pcg->pcg_objects[idx].pcgo_va = NULL; 1997 1998 return (object); 1999 } 2000 2001 static inline void 2002 pcg_put(struct pool_cache_group *pcg, void *object, paddr_t pa) 2003 { 2004 u_int idx; 2005 2006 KASSERT(pcg->pcg_avail < PCG_NOBJECTS); 2007 idx = pcg->pcg_avail++; 2008 2009 KASSERT(pcg->pcg_objects[idx].pcgo_va == NULL); 2010 pcg->pcg_objects[idx].pcgo_va = object; 2011 pcg->pcg_objects[idx].pcgo_pa = pa; 2012 } 2013 2014 static void 2015 pcg_grouplist_free(struct pool_cache_grouplist *pcgl) 2016 { 2017 struct pool_cache_group *pcg; 2018 int s; 2019 2020 s = splvm(); 2021 while ((pcg = LIST_FIRST(pcgl)) != NULL) { 2022 LIST_REMOVE(pcg, pcg_list); 2023 pool_put(&pcgpool, pcg); 2024 } 2025 splx(s); 2026 } 2027 2028 /* 2029 * pool_cache_get{,_paddr}: 2030 * 2031 * Get an object from a pool cache (optionally returning 2032 * the physical address of the object). 2033 */ 2034 void * 2035 pool_cache_get_paddr(struct pool_cache *pc, int flags, paddr_t *pap) 2036 { 2037 struct pool_cache_group *pcg; 2038 void *object; 2039 2040 #ifdef LOCKDEBUG 2041 if (flags & PR_WAITOK) 2042 ASSERT_SLEEPABLE(NULL, "pool_cache_get(PR_WAITOK)"); 2043 #endif 2044 2045 simple_lock(&pc->pc_slock); 2046 2047 pcg = LIST_FIRST(&pc->pc_partgroups); 2048 if (pcg == NULL) { 2049 pcg = LIST_FIRST(&pc->pc_fullgroups); 2050 if (pcg != NULL) { 2051 LIST_REMOVE(pcg, pcg_list); 2052 LIST_INSERT_HEAD(&pc->pc_partgroups, pcg, pcg_list); 2053 } 2054 } 2055 if (pcg == NULL) { 2056 2057 /* 2058 * No groups with any available objects. Allocate 2059 * a new object, construct it, and return it to 2060 * the caller. We will allocate a group, if necessary, 2061 * when the object is freed back to the cache. 2062 */ 2063 pc->pc_misses++; 2064 simple_unlock(&pc->pc_slock); 2065 object = pool_get(pc->pc_pool, flags); 2066 if (object != NULL && pc->pc_ctor != NULL) { 2067 if ((*pc->pc_ctor)(pc->pc_arg, object, flags) != 0) { 2068 pool_put(pc->pc_pool, object); 2069 return (NULL); 2070 } 2071 } 2072 if (object != NULL && pap != NULL) { 2073 #ifdef POOL_VTOPHYS 2074 *pap = POOL_VTOPHYS(object); 2075 #else 2076 *pap = POOL_PADDR_INVALID; 2077 #endif 2078 } 2079 return (object); 2080 } 2081 2082 pc->pc_hits++; 2083 pc->pc_nitems--; 2084 object = pcg_get(pcg, pap); 2085 2086 if (pcg->pcg_avail == 0) { 2087 LIST_REMOVE(pcg, pcg_list); 2088 LIST_INSERT_HEAD(&pc->pc_emptygroups, pcg, pcg_list); 2089 } 2090 simple_unlock(&pc->pc_slock); 2091 2092 return (object); 2093 } 2094 2095 /* 2096 * pool_cache_put{,_paddr}: 2097 * 2098 * Put an object back to the pool cache (optionally caching the 2099 * physical address of the object). 2100 */ 2101 void 2102 pool_cache_put_paddr(struct pool_cache *pc, void *object, paddr_t pa) 2103 { 2104 struct pool_cache_group *pcg; 2105 int s; 2106 2107 if (__predict_false((pc->pc_pool->pr_flags & PR_WANTED) != 0)) { 2108 goto destruct; 2109 } 2110 2111 simple_lock(&pc->pc_slock); 2112 2113 pcg = LIST_FIRST(&pc->pc_partgroups); 2114 if (pcg == NULL) { 2115 pcg = LIST_FIRST(&pc->pc_emptygroups); 2116 if (pcg != NULL) { 2117 LIST_REMOVE(pcg, pcg_list); 2118 LIST_INSERT_HEAD(&pc->pc_partgroups, pcg, pcg_list); 2119 } 2120 } 2121 if (pcg == NULL) { 2122 2123 /* 2124 * No empty groups to free the object to. Attempt to 2125 * allocate one. 2126 */ 2127 simple_unlock(&pc->pc_slock); 2128 s = splvm(); 2129 pcg = pool_get(&pcgpool, PR_NOWAIT); 2130 splx(s); 2131 if (pcg == NULL) { 2132 destruct: 2133 2134 /* 2135 * Unable to allocate a cache group; destruct the object 2136 * and free it back to the pool. 2137 */ 2138 pool_cache_destruct_object(pc, object); 2139 return; 2140 } 2141 memset(pcg, 0, sizeof(*pcg)); 2142 simple_lock(&pc->pc_slock); 2143 pc->pc_ngroups++; 2144 LIST_INSERT_HEAD(&pc->pc_partgroups, pcg, pcg_list); 2145 } 2146 2147 pc->pc_nitems++; 2148 pcg_put(pcg, object, pa); 2149 2150 if (pcg->pcg_avail == PCG_NOBJECTS) { 2151 LIST_REMOVE(pcg, pcg_list); 2152 LIST_INSERT_HEAD(&pc->pc_fullgroups, pcg, pcg_list); 2153 } 2154 simple_unlock(&pc->pc_slock); 2155 } 2156 2157 /* 2158 * pool_cache_destruct_object: 2159 * 2160 * Force destruction of an object and its release back into 2161 * the pool. 2162 */ 2163 void 2164 pool_cache_destruct_object(struct pool_cache *pc, void *object) 2165 { 2166 2167 if (pc->pc_dtor != NULL) 2168 (*pc->pc_dtor)(pc->pc_arg, object); 2169 pool_put(pc->pc_pool, object); 2170 } 2171 2172 static void 2173 pool_do_cache_invalidate_grouplist(struct pool_cache_grouplist *pcgsl, 2174 struct pool_cache *pc, struct pool_pagelist *pq, 2175 struct pool_cache_grouplist *pcgdl) 2176 { 2177 struct pool_cache_group *pcg, *npcg; 2178 void *object; 2179 2180 for (pcg = LIST_FIRST(pcgsl); pcg != NULL; pcg = npcg) { 2181 npcg = LIST_NEXT(pcg, pcg_list); 2182 while (pcg->pcg_avail != 0) { 2183 pc->pc_nitems--; 2184 object = pcg_get(pcg, NULL); 2185 if (pc->pc_dtor != NULL) 2186 (*pc->pc_dtor)(pc->pc_arg, object); 2187 pool_do_put(pc->pc_pool, object, pq); 2188 } 2189 pc->pc_ngroups--; 2190 LIST_REMOVE(pcg, pcg_list); 2191 LIST_INSERT_HEAD(pcgdl, pcg, pcg_list); 2192 } 2193 } 2194 2195 static void 2196 pool_do_cache_invalidate(struct pool_cache *pc, struct pool_pagelist *pq, 2197 struct pool_cache_grouplist *pcgl) 2198 { 2199 2200 LOCK_ASSERT(simple_lock_held(&pc->pc_slock)); 2201 LOCK_ASSERT(simple_lock_held(&pc->pc_pool->pr_slock)); 2202 2203 pool_do_cache_invalidate_grouplist(&pc->pc_fullgroups, pc, pq, pcgl); 2204 pool_do_cache_invalidate_grouplist(&pc->pc_partgroups, pc, pq, pcgl); 2205 2206 KASSERT(LIST_EMPTY(&pc->pc_partgroups)); 2207 KASSERT(LIST_EMPTY(&pc->pc_fullgroups)); 2208 KASSERT(pc->pc_nitems == 0); 2209 } 2210 2211 /* 2212 * pool_cache_invalidate: 2213 * 2214 * Invalidate a pool cache (destruct and release all of the 2215 * cached objects). 2216 */ 2217 void 2218 pool_cache_invalidate(struct pool_cache *pc) 2219 { 2220 struct pool_pagelist pq; 2221 struct pool_cache_grouplist pcgl; 2222 2223 LIST_INIT(&pq); 2224 LIST_INIT(&pcgl); 2225 2226 simple_lock(&pc->pc_slock); 2227 simple_lock(&pc->pc_pool->pr_slock); 2228 2229 pool_do_cache_invalidate(pc, &pq, &pcgl); 2230 2231 simple_unlock(&pc->pc_pool->pr_slock); 2232 simple_unlock(&pc->pc_slock); 2233 2234 pr_pagelist_free(pc->pc_pool, &pq); 2235 pcg_grouplist_free(&pcgl); 2236 } 2237 2238 /* 2239 * pool_cache_reclaim: 2240 * 2241 * Reclaim a pool cache for pool_reclaim(). 2242 */ 2243 static void 2244 pool_cache_reclaim(struct pool_cache *pc, struct pool_pagelist *pq, 2245 struct pool_cache_grouplist *pcgl) 2246 { 2247 2248 /* 2249 * We're locking in the wrong order (normally pool_cache -> pool, 2250 * but the pool is already locked when we get here), so we have 2251 * to use trylock. If we can't lock the pool_cache, it's not really 2252 * a big deal here. 2253 */ 2254 if (simple_lock_try(&pc->pc_slock) == 0) 2255 return; 2256 2257 pool_do_cache_invalidate(pc, pq, pcgl); 2258 2259 simple_unlock(&pc->pc_slock); 2260 } 2261 2262 /* 2263 * Pool backend allocators. 2264 * 2265 * Each pool has a backend allocator that handles allocation, deallocation, 2266 * and any additional draining that might be needed. 2267 * 2268 * We provide two standard allocators: 2269 * 2270 * pool_allocator_kmem - the default when no allocator is specified 2271 * 2272 * pool_allocator_nointr - used for pools that will not be accessed 2273 * in interrupt context. 2274 */ 2275 void *pool_page_alloc(struct pool *, int); 2276 void pool_page_free(struct pool *, void *); 2277 2278 #ifdef POOL_SUBPAGE 2279 struct pool_allocator pool_allocator_kmem_fullpage = { 2280 pool_page_alloc, pool_page_free, 0, 2281 .pa_backingmapptr = &kmem_map, 2282 }; 2283 #else 2284 struct pool_allocator pool_allocator_kmem = { 2285 pool_page_alloc, pool_page_free, 0, 2286 .pa_backingmapptr = &kmem_map, 2287 }; 2288 #endif 2289 2290 void *pool_page_alloc_nointr(struct pool *, int); 2291 void pool_page_free_nointr(struct pool *, void *); 2292 2293 #ifdef POOL_SUBPAGE 2294 struct pool_allocator pool_allocator_nointr_fullpage = { 2295 pool_page_alloc_nointr, pool_page_free_nointr, 0, 2296 .pa_backingmapptr = &kernel_map, 2297 }; 2298 #else 2299 struct pool_allocator pool_allocator_nointr = { 2300 pool_page_alloc_nointr, pool_page_free_nointr, 0, 2301 .pa_backingmapptr = &kernel_map, 2302 }; 2303 #endif 2304 2305 #ifdef POOL_SUBPAGE 2306 void *pool_subpage_alloc(struct pool *, int); 2307 void pool_subpage_free(struct pool *, void *); 2308 2309 struct pool_allocator pool_allocator_kmem = { 2310 pool_subpage_alloc, pool_subpage_free, POOL_SUBPAGE, 2311 .pa_backingmapptr = &kmem_map, 2312 }; 2313 2314 void *pool_subpage_alloc_nointr(struct pool *, int); 2315 void pool_subpage_free_nointr(struct pool *, void *); 2316 2317 struct pool_allocator pool_allocator_nointr = { 2318 pool_subpage_alloc, pool_subpage_free, POOL_SUBPAGE, 2319 .pa_backingmapptr = &kmem_map, 2320 }; 2321 #endif /* POOL_SUBPAGE */ 2322 2323 static void * 2324 pool_allocator_alloc(struct pool *pp, int flags) 2325 { 2326 struct pool_allocator *pa = pp->pr_alloc; 2327 void *res; 2328 2329 LOCK_ASSERT(!simple_lock_held(&pp->pr_slock)); 2330 2331 res = (*pa->pa_alloc)(pp, flags); 2332 if (res == NULL && (flags & PR_WAITOK) == 0) { 2333 /* 2334 * We only run the drain hook here if PR_NOWAIT. 2335 * In other cases, the hook will be run in 2336 * pool_reclaim(). 2337 */ 2338 if (pp->pr_drain_hook != NULL) { 2339 (*pp->pr_drain_hook)(pp->pr_drain_hook_arg, flags); 2340 res = (*pa->pa_alloc)(pp, flags); 2341 } 2342 } 2343 return res; 2344 } 2345 2346 static void 2347 pool_allocator_free(struct pool *pp, void *v) 2348 { 2349 struct pool_allocator *pa = pp->pr_alloc; 2350 2351 LOCK_ASSERT(!simple_lock_held(&pp->pr_slock)); 2352 2353 (*pa->pa_free)(pp, v); 2354 } 2355 2356 void * 2357 pool_page_alloc(struct pool *pp __unused, int flags) 2358 { 2359 boolean_t waitok = (flags & PR_WAITOK) ? TRUE : FALSE; 2360 2361 return ((void *) uvm_km_alloc_poolpage_cache(kmem_map, waitok)); 2362 } 2363 2364 void 2365 pool_page_free(struct pool *pp __unused, void *v) 2366 { 2367 2368 uvm_km_free_poolpage_cache(kmem_map, (vaddr_t) v); 2369 } 2370 2371 static void * 2372 pool_page_alloc_meta(struct pool *pp __unused, int flags) 2373 { 2374 boolean_t waitok = (flags & PR_WAITOK) ? TRUE : FALSE; 2375 2376 return ((void *) uvm_km_alloc_poolpage(kmem_map, waitok)); 2377 } 2378 2379 static void 2380 pool_page_free_meta(struct pool *pp __unused, void *v) 2381 { 2382 2383 uvm_km_free_poolpage(kmem_map, (vaddr_t) v); 2384 } 2385 2386 #ifdef POOL_SUBPAGE 2387 /* Sub-page allocator, for machines with large hardware pages. */ 2388 void * 2389 pool_subpage_alloc(struct pool *pp, int flags) 2390 { 2391 void *v; 2392 int s; 2393 s = splvm(); 2394 v = pool_get(&psppool, flags); 2395 splx(s); 2396 return v; 2397 } 2398 2399 void 2400 pool_subpage_free(struct pool *pp, void *v) 2401 { 2402 int s; 2403 s = splvm(); 2404 pool_put(&psppool, v); 2405 splx(s); 2406 } 2407 2408 /* We don't provide a real nointr allocator. Maybe later. */ 2409 void * 2410 pool_subpage_alloc_nointr(struct pool *pp, int flags) 2411 { 2412 2413 return (pool_subpage_alloc(pp, flags)); 2414 } 2415 2416 void 2417 pool_subpage_free_nointr(struct pool *pp, void *v) 2418 { 2419 2420 pool_subpage_free(pp, v); 2421 } 2422 #endif /* POOL_SUBPAGE */ 2423 void * 2424 pool_page_alloc_nointr(struct pool *pp __unused, int flags) 2425 { 2426 boolean_t waitok = (flags & PR_WAITOK) ? TRUE : FALSE; 2427 2428 return ((void *) uvm_km_alloc_poolpage_cache(kernel_map, waitok)); 2429 } 2430 2431 void 2432 pool_page_free_nointr(struct pool *pp __unused, void *v) 2433 { 2434 2435 uvm_km_free_poolpage_cache(kernel_map, (vaddr_t) v); 2436 } 2437