1 /* $NetBSD: subr_pool.c,v 1.122 2006/09/03 06:25:19 christos Exp $ */ 2 3 /*- 4 * Copyright (c) 1997, 1999, 2000 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Paul Kranenburg; by Jason R. Thorpe of the Numerical Aerospace 9 * Simulation Facility, NASA Ames Research Center. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. All advertising materials mentioning features or use of this software 20 * must display the following acknowledgement: 21 * This product includes software developed by the NetBSD 22 * Foundation, Inc. and its contributors. 23 * 4. Neither the name of The NetBSD Foundation nor the names of its 24 * contributors may be used to endorse or promote products derived 25 * from this software without specific prior written permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 28 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 29 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 30 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 31 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 32 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 33 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 34 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 35 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 36 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 37 * POSSIBILITY OF SUCH DAMAGE. 38 */ 39 40 #include <sys/cdefs.h> 41 __KERNEL_RCSID(0, "$NetBSD: subr_pool.c,v 1.122 2006/09/03 06:25:19 christos Exp $"); 42 43 #include "opt_pool.h" 44 #include "opt_poollog.h" 45 #include "opt_lockdebug.h" 46 47 #include <sys/param.h> 48 #include <sys/systm.h> 49 #include <sys/proc.h> 50 #include <sys/errno.h> 51 #include <sys/kernel.h> 52 #include <sys/malloc.h> 53 #include <sys/lock.h> 54 #include <sys/pool.h> 55 #include <sys/syslog.h> 56 57 #include <uvm/uvm.h> 58 59 /* 60 * Pool resource management utility. 61 * 62 * Memory is allocated in pages which are split into pieces according to 63 * the pool item size. Each page is kept on one of three lists in the 64 * pool structure: `pr_emptypages', `pr_fullpages' and `pr_partpages', 65 * for empty, full and partially-full pages respectively. The individual 66 * pool items are on a linked list headed by `ph_itemlist' in each page 67 * header. The memory for building the page list is either taken from 68 * the allocated pages themselves (for small pool items) or taken from 69 * an internal pool of page headers (`phpool'). 70 */ 71 72 /* List of all pools */ 73 LIST_HEAD(,pool) pool_head = LIST_HEAD_INITIALIZER(pool_head); 74 75 /* Private pool for page header structures */ 76 #define PHPOOL_MAX 8 77 static struct pool phpool[PHPOOL_MAX]; 78 #define PHPOOL_FREELIST_NELEM(idx) (((idx) == 0) ? 0 : (1 << (idx))) 79 80 #ifdef POOL_SUBPAGE 81 /* Pool of subpages for use by normal pools. */ 82 static struct pool psppool; 83 #endif 84 85 static SLIST_HEAD(, pool_allocator) pa_deferinitq = 86 SLIST_HEAD_INITIALIZER(pa_deferinitq); 87 88 static void *pool_page_alloc_meta(struct pool *, int); 89 static void pool_page_free_meta(struct pool *, void *); 90 91 /* allocator for pool metadata */ 92 static struct pool_allocator pool_allocator_meta = { 93 pool_page_alloc_meta, pool_page_free_meta, 94 .pa_backingmapptr = &kmem_map, 95 }; 96 97 /* # of seconds to retain page after last use */ 98 int pool_inactive_time = 10; 99 100 /* Next candidate for drainage (see pool_drain()) */ 101 static struct pool *drainpp; 102 103 /* This spin lock protects both pool_head and drainpp. */ 104 struct simplelock pool_head_slock = SIMPLELOCK_INITIALIZER; 105 106 typedef uint8_t pool_item_freelist_t; 107 108 struct pool_item_header { 109 /* Page headers */ 110 LIST_ENTRY(pool_item_header) 111 ph_pagelist; /* pool page list */ 112 SPLAY_ENTRY(pool_item_header) 113 ph_node; /* Off-page page headers */ 114 caddr_t ph_page; /* this page's address */ 115 struct timeval ph_time; /* last referenced */ 116 union { 117 /* !PR_NOTOUCH */ 118 struct { 119 LIST_HEAD(, pool_item) 120 phu_itemlist; /* chunk list for this page */ 121 } phu_normal; 122 /* PR_NOTOUCH */ 123 struct { 124 uint16_t 125 phu_off; /* start offset in page */ 126 pool_item_freelist_t 127 phu_firstfree; /* first free item */ 128 /* 129 * XXX it might be better to use 130 * a simple bitmap and ffs(3) 131 */ 132 } phu_notouch; 133 } ph_u; 134 uint16_t ph_nmissing; /* # of chunks in use */ 135 }; 136 #define ph_itemlist ph_u.phu_normal.phu_itemlist 137 #define ph_off ph_u.phu_notouch.phu_off 138 #define ph_firstfree ph_u.phu_notouch.phu_firstfree 139 140 struct pool_item { 141 #ifdef DIAGNOSTIC 142 u_int pi_magic; 143 #endif 144 #define PI_MAGIC 0xdeadbeefU 145 /* Other entries use only this list entry */ 146 LIST_ENTRY(pool_item) pi_list; 147 }; 148 149 #define POOL_NEEDS_CATCHUP(pp) \ 150 ((pp)->pr_nitems < (pp)->pr_minitems) 151 152 /* 153 * Pool cache management. 154 * 155 * Pool caches provide a way for constructed objects to be cached by the 156 * pool subsystem. This can lead to performance improvements by avoiding 157 * needless object construction/destruction; it is deferred until absolutely 158 * necessary. 159 * 160 * Caches are grouped into cache groups. Each cache group references 161 * up to 16 constructed objects. When a cache allocates an object 162 * from the pool, it calls the object's constructor and places it into 163 * a cache group. When a cache group frees an object back to the pool, 164 * it first calls the object's destructor. This allows the object to 165 * persist in constructed form while freed to the cache. 166 * 167 * Multiple caches may exist for each pool. This allows a single 168 * object type to have multiple constructed forms. The pool references 169 * each cache, so that when a pool is drained by the pagedaemon, it can 170 * drain each individual cache as well. Each time a cache is drained, 171 * the most idle cache group is freed to the pool in its entirety. 172 * 173 * Pool caches are layed on top of pools. By layering them, we can avoid 174 * the complexity of cache management for pools which would not benefit 175 * from it. 176 */ 177 178 /* The cache group pool. */ 179 static struct pool pcgpool; 180 181 static void pool_cache_reclaim(struct pool_cache *, struct pool_pagelist *, 182 struct pool_cache_grouplist *); 183 static void pcg_grouplist_free(struct pool_cache_grouplist *); 184 185 static int pool_catchup(struct pool *); 186 static void pool_prime_page(struct pool *, caddr_t, 187 struct pool_item_header *); 188 static void pool_update_curpage(struct pool *); 189 190 static int pool_grow(struct pool *, int); 191 static void *pool_allocator_alloc(struct pool *, int); 192 static void pool_allocator_free(struct pool *, void *); 193 194 static void pool_print_pagelist(struct pool *, struct pool_pagelist *, 195 void (*)(const char *, ...)); 196 static void pool_print1(struct pool *, const char *, 197 void (*)(const char *, ...)); 198 199 static int pool_chk_page(struct pool *, const char *, 200 struct pool_item_header *); 201 202 /* 203 * Pool log entry. An array of these is allocated in pool_init(). 204 */ 205 struct pool_log { 206 const char *pl_file; 207 long pl_line; 208 int pl_action; 209 #define PRLOG_GET 1 210 #define PRLOG_PUT 2 211 void *pl_addr; 212 }; 213 214 #ifdef POOL_DIAGNOSTIC 215 /* Number of entries in pool log buffers */ 216 #ifndef POOL_LOGSIZE 217 #define POOL_LOGSIZE 10 218 #endif 219 220 int pool_logsize = POOL_LOGSIZE; 221 222 static inline void 223 pr_log(struct pool *pp, void *v, int action, const char *file, long line) 224 { 225 int n = pp->pr_curlogentry; 226 struct pool_log *pl; 227 228 if ((pp->pr_roflags & PR_LOGGING) == 0) 229 return; 230 231 /* 232 * Fill in the current entry. Wrap around and overwrite 233 * the oldest entry if necessary. 234 */ 235 pl = &pp->pr_log[n]; 236 pl->pl_file = file; 237 pl->pl_line = line; 238 pl->pl_action = action; 239 pl->pl_addr = v; 240 if (++n >= pp->pr_logsize) 241 n = 0; 242 pp->pr_curlogentry = n; 243 } 244 245 static void 246 pr_printlog(struct pool *pp, struct pool_item *pi, 247 void (*pr)(const char *, ...)) 248 { 249 int i = pp->pr_logsize; 250 int n = pp->pr_curlogentry; 251 252 if ((pp->pr_roflags & PR_LOGGING) == 0) 253 return; 254 255 /* 256 * Print all entries in this pool's log. 257 */ 258 while (i-- > 0) { 259 struct pool_log *pl = &pp->pr_log[n]; 260 if (pl->pl_action != 0) { 261 if (pi == NULL || pi == pl->pl_addr) { 262 (*pr)("\tlog entry %d:\n", i); 263 (*pr)("\t\taction = %s, addr = %p\n", 264 pl->pl_action == PRLOG_GET ? "get" : "put", 265 pl->pl_addr); 266 (*pr)("\t\tfile: %s at line %lu\n", 267 pl->pl_file, pl->pl_line); 268 } 269 } 270 if (++n >= pp->pr_logsize) 271 n = 0; 272 } 273 } 274 275 static inline void 276 pr_enter(struct pool *pp, const char *file, long line) 277 { 278 279 if (__predict_false(pp->pr_entered_file != NULL)) { 280 printf("pool %s: reentrancy at file %s line %ld\n", 281 pp->pr_wchan, file, line); 282 printf(" previous entry at file %s line %ld\n", 283 pp->pr_entered_file, pp->pr_entered_line); 284 panic("pr_enter"); 285 } 286 287 pp->pr_entered_file = file; 288 pp->pr_entered_line = line; 289 } 290 291 static inline void 292 pr_leave(struct pool *pp) 293 { 294 295 if (__predict_false(pp->pr_entered_file == NULL)) { 296 printf("pool %s not entered?\n", pp->pr_wchan); 297 panic("pr_leave"); 298 } 299 300 pp->pr_entered_file = NULL; 301 pp->pr_entered_line = 0; 302 } 303 304 static inline void 305 pr_enter_check(struct pool *pp, void (*pr)(const char *, ...)) 306 { 307 308 if (pp->pr_entered_file != NULL) 309 (*pr)("\n\tcurrently entered from file %s line %ld\n", 310 pp->pr_entered_file, pp->pr_entered_line); 311 } 312 #else 313 #define pr_log(pp, v, action, file, line) 314 #define pr_printlog(pp, pi, pr) 315 #define pr_enter(pp, file, line) 316 #define pr_leave(pp) 317 #define pr_enter_check(pp, pr) 318 #endif /* POOL_DIAGNOSTIC */ 319 320 static inline int 321 pr_item_notouch_index(const struct pool *pp, const struct pool_item_header *ph, 322 const void *v) 323 { 324 const char *cp = v; 325 int idx; 326 327 KASSERT(pp->pr_roflags & PR_NOTOUCH); 328 idx = (cp - ph->ph_page - ph->ph_off) / pp->pr_size; 329 KASSERT(idx < pp->pr_itemsperpage); 330 return idx; 331 } 332 333 #define PR_FREELIST_ALIGN(p) \ 334 roundup((uintptr_t)(p), sizeof(pool_item_freelist_t)) 335 #define PR_FREELIST(ph) ((pool_item_freelist_t *)PR_FREELIST_ALIGN((ph) + 1)) 336 #define PR_INDEX_USED ((pool_item_freelist_t)-1) 337 #define PR_INDEX_EOL ((pool_item_freelist_t)-2) 338 339 static inline void 340 pr_item_notouch_put(const struct pool *pp, struct pool_item_header *ph, 341 void *obj) 342 { 343 int idx = pr_item_notouch_index(pp, ph, obj); 344 pool_item_freelist_t *freelist = PR_FREELIST(ph); 345 346 KASSERT(freelist[idx] == PR_INDEX_USED); 347 freelist[idx] = ph->ph_firstfree; 348 ph->ph_firstfree = idx; 349 } 350 351 static inline void * 352 pr_item_notouch_get(const struct pool *pp, struct pool_item_header *ph) 353 { 354 int idx = ph->ph_firstfree; 355 pool_item_freelist_t *freelist = PR_FREELIST(ph); 356 357 KASSERT(freelist[idx] != PR_INDEX_USED); 358 ph->ph_firstfree = freelist[idx]; 359 freelist[idx] = PR_INDEX_USED; 360 361 return ph->ph_page + ph->ph_off + idx * pp->pr_size; 362 } 363 364 static inline int 365 phtree_compare(struct pool_item_header *a, struct pool_item_header *b) 366 { 367 368 /* 369 * we consider pool_item_header with smaller ph_page bigger. 370 * (this unnatural ordering is for the benefit of pr_find_pagehead.) 371 */ 372 373 if (a->ph_page < b->ph_page) 374 return (1); 375 else if (a->ph_page > b->ph_page) 376 return (-1); 377 else 378 return (0); 379 } 380 381 SPLAY_PROTOTYPE(phtree, pool_item_header, ph_node, phtree_compare); 382 SPLAY_GENERATE(phtree, pool_item_header, ph_node, phtree_compare); 383 384 /* 385 * Return the pool page header based on item address. 386 */ 387 static inline struct pool_item_header * 388 pr_find_pagehead(struct pool *pp, void *v) 389 { 390 struct pool_item_header *ph, tmp; 391 392 if ((pp->pr_roflags & PR_NOALIGN) != 0) { 393 tmp.ph_page = (caddr_t)(uintptr_t)v; 394 ph = SPLAY_FIND(phtree, &pp->pr_phtree, &tmp); 395 if (ph == NULL) { 396 ph = SPLAY_ROOT(&pp->pr_phtree); 397 if (ph != NULL && phtree_compare(&tmp, ph) >= 0) { 398 ph = SPLAY_NEXT(phtree, &pp->pr_phtree, ph); 399 } 400 KASSERT(ph == NULL || phtree_compare(&tmp, ph) < 0); 401 } 402 } else { 403 caddr_t page = 404 (caddr_t)((uintptr_t)v & pp->pr_alloc->pa_pagemask); 405 406 if ((pp->pr_roflags & PR_PHINPAGE) != 0) { 407 ph = (void *)(page + pp->pr_phoffset); 408 } else { 409 tmp.ph_page = page; 410 ph = SPLAY_FIND(phtree, &pp->pr_phtree, &tmp); 411 } 412 } 413 414 KASSERT(ph == NULL || ((pp->pr_roflags & PR_PHINPAGE) != 0) || 415 (ph->ph_page <= (char *)v && 416 (char *)v < ph->ph_page + pp->pr_alloc->pa_pagesz)); 417 return ph; 418 } 419 420 static void 421 pr_pagelist_free(struct pool *pp, struct pool_pagelist *pq) 422 { 423 struct pool_item_header *ph; 424 int s; 425 426 while ((ph = LIST_FIRST(pq)) != NULL) { 427 LIST_REMOVE(ph, ph_pagelist); 428 pool_allocator_free(pp, ph->ph_page); 429 if ((pp->pr_roflags & PR_PHINPAGE) == 0) { 430 s = splvm(); 431 pool_put(pp->pr_phpool, ph); 432 splx(s); 433 } 434 } 435 } 436 437 /* 438 * Remove a page from the pool. 439 */ 440 static inline void 441 pr_rmpage(struct pool *pp, struct pool_item_header *ph, 442 struct pool_pagelist *pq) 443 { 444 445 LOCK_ASSERT(simple_lock_held(&pp->pr_slock)); 446 447 /* 448 * If the page was idle, decrement the idle page count. 449 */ 450 if (ph->ph_nmissing == 0) { 451 #ifdef DIAGNOSTIC 452 if (pp->pr_nidle == 0) 453 panic("pr_rmpage: nidle inconsistent"); 454 if (pp->pr_nitems < pp->pr_itemsperpage) 455 panic("pr_rmpage: nitems inconsistent"); 456 #endif 457 pp->pr_nidle--; 458 } 459 460 pp->pr_nitems -= pp->pr_itemsperpage; 461 462 /* 463 * Unlink the page from the pool and queue it for release. 464 */ 465 LIST_REMOVE(ph, ph_pagelist); 466 if ((pp->pr_roflags & PR_PHINPAGE) == 0) 467 SPLAY_REMOVE(phtree, &pp->pr_phtree, ph); 468 LIST_INSERT_HEAD(pq, ph, ph_pagelist); 469 470 pp->pr_npages--; 471 pp->pr_npagefree++; 472 473 pool_update_curpage(pp); 474 } 475 476 static boolean_t 477 pa_starved_p(struct pool_allocator *pa) 478 { 479 480 if (pa->pa_backingmap != NULL) { 481 return vm_map_starved_p(pa->pa_backingmap); 482 } 483 return FALSE; 484 } 485 486 static int 487 pool_reclaim_callback(struct callback_entry *ce, void *obj, void *arg) 488 { 489 struct pool *pp = obj; 490 struct pool_allocator *pa = pp->pr_alloc; 491 492 KASSERT(&pp->pr_reclaimerentry == ce); 493 pool_reclaim(pp); 494 if (!pa_starved_p(pa)) { 495 return CALLBACK_CHAIN_ABORT; 496 } 497 return CALLBACK_CHAIN_CONTINUE; 498 } 499 500 static void 501 pool_reclaim_register(struct pool *pp) 502 { 503 struct vm_map *map = pp->pr_alloc->pa_backingmap; 504 int s; 505 506 if (map == NULL) { 507 return; 508 } 509 510 s = splvm(); /* not necessary for INTRSAFE maps, but don't care. */ 511 callback_register(&vm_map_to_kernel(map)->vmk_reclaim_callback, 512 &pp->pr_reclaimerentry, pp, pool_reclaim_callback); 513 splx(s); 514 } 515 516 static void 517 pool_reclaim_unregister(struct pool *pp) 518 { 519 struct vm_map *map = pp->pr_alloc->pa_backingmap; 520 int s; 521 522 if (map == NULL) { 523 return; 524 } 525 526 s = splvm(); /* not necessary for INTRSAFE maps, but don't care. */ 527 callback_unregister(&vm_map_to_kernel(map)->vmk_reclaim_callback, 528 &pp->pr_reclaimerentry); 529 splx(s); 530 } 531 532 static void 533 pa_reclaim_register(struct pool_allocator *pa) 534 { 535 struct vm_map *map = *pa->pa_backingmapptr; 536 struct pool *pp; 537 538 KASSERT(pa->pa_backingmap == NULL); 539 if (map == NULL) { 540 SLIST_INSERT_HEAD(&pa_deferinitq, pa, pa_q); 541 return; 542 } 543 pa->pa_backingmap = map; 544 TAILQ_FOREACH(pp, &pa->pa_list, pr_alloc_list) { 545 pool_reclaim_register(pp); 546 } 547 } 548 549 /* 550 * Initialize all the pools listed in the "pools" link set. 551 */ 552 void 553 pool_subsystem_init(void) 554 { 555 struct pool_allocator *pa; 556 __link_set_decl(pools, struct link_pool_init); 557 struct link_pool_init * const *pi; 558 559 __link_set_foreach(pi, pools) 560 pool_init((*pi)->pp, (*pi)->size, (*pi)->align, 561 (*pi)->align_offset, (*pi)->flags, (*pi)->wchan, 562 (*pi)->palloc); 563 564 while ((pa = SLIST_FIRST(&pa_deferinitq)) != NULL) { 565 KASSERT(pa->pa_backingmapptr != NULL); 566 KASSERT(*pa->pa_backingmapptr != NULL); 567 SLIST_REMOVE_HEAD(&pa_deferinitq, pa_q); 568 pa_reclaim_register(pa); 569 } 570 } 571 572 /* 573 * Initialize the given pool resource structure. 574 * 575 * We export this routine to allow other kernel parts to declare 576 * static pools that must be initialized before malloc() is available. 577 */ 578 void 579 pool_init(struct pool *pp, size_t size, u_int align, u_int ioff, int flags, 580 const char *wchan, struct pool_allocator *palloc) 581 { 582 #ifdef DEBUG 583 struct pool *pp1; 584 #endif 585 size_t trysize, phsize; 586 int off, slack, s; 587 588 KASSERT((1UL << (CHAR_BIT * sizeof(pool_item_freelist_t))) - 2 >= 589 PHPOOL_FREELIST_NELEM(PHPOOL_MAX - 1)); 590 591 #ifdef DEBUG 592 /* 593 * Check that the pool hasn't already been initialised and 594 * added to the list of all pools. 595 */ 596 LIST_FOREACH(pp1, &pool_head, pr_poollist) { 597 if (pp == pp1) 598 panic("pool_init: pool %s already initialised", 599 wchan); 600 } 601 #endif 602 603 #ifdef POOL_DIAGNOSTIC 604 /* 605 * Always log if POOL_DIAGNOSTIC is defined. 606 */ 607 if (pool_logsize != 0) 608 flags |= PR_LOGGING; 609 #endif 610 611 if (palloc == NULL) 612 palloc = &pool_allocator_kmem; 613 #ifdef POOL_SUBPAGE 614 if (size > palloc->pa_pagesz) { 615 if (palloc == &pool_allocator_kmem) 616 palloc = &pool_allocator_kmem_fullpage; 617 else if (palloc == &pool_allocator_nointr) 618 palloc = &pool_allocator_nointr_fullpage; 619 } 620 #endif /* POOL_SUBPAGE */ 621 if ((palloc->pa_flags & PA_INITIALIZED) == 0) { 622 if (palloc->pa_pagesz == 0) 623 palloc->pa_pagesz = PAGE_SIZE; 624 625 TAILQ_INIT(&palloc->pa_list); 626 627 simple_lock_init(&palloc->pa_slock); 628 palloc->pa_pagemask = ~(palloc->pa_pagesz - 1); 629 palloc->pa_pageshift = ffs(palloc->pa_pagesz) - 1; 630 631 if (palloc->pa_backingmapptr != NULL) { 632 pa_reclaim_register(palloc); 633 } 634 palloc->pa_flags |= PA_INITIALIZED; 635 } 636 637 if (align == 0) 638 align = ALIGN(1); 639 640 if ((flags & PR_NOTOUCH) == 0 && size < sizeof(struct pool_item)) 641 size = sizeof(struct pool_item); 642 643 size = roundup(size, align); 644 #ifdef DIAGNOSTIC 645 if (size > palloc->pa_pagesz) 646 panic("pool_init: pool item size (%zu) too large", size); 647 #endif 648 649 /* 650 * Initialize the pool structure. 651 */ 652 LIST_INIT(&pp->pr_emptypages); 653 LIST_INIT(&pp->pr_fullpages); 654 LIST_INIT(&pp->pr_partpages); 655 LIST_INIT(&pp->pr_cachelist); 656 pp->pr_curpage = NULL; 657 pp->pr_npages = 0; 658 pp->pr_minitems = 0; 659 pp->pr_minpages = 0; 660 pp->pr_maxpages = UINT_MAX; 661 pp->pr_roflags = flags; 662 pp->pr_flags = 0; 663 pp->pr_size = size; 664 pp->pr_align = align; 665 pp->pr_wchan = wchan; 666 pp->pr_alloc = palloc; 667 pp->pr_nitems = 0; 668 pp->pr_nout = 0; 669 pp->pr_hardlimit = UINT_MAX; 670 pp->pr_hardlimit_warning = NULL; 671 pp->pr_hardlimit_ratecap.tv_sec = 0; 672 pp->pr_hardlimit_ratecap.tv_usec = 0; 673 pp->pr_hardlimit_warning_last.tv_sec = 0; 674 pp->pr_hardlimit_warning_last.tv_usec = 0; 675 pp->pr_drain_hook = NULL; 676 pp->pr_drain_hook_arg = NULL; 677 678 /* 679 * Decide whether to put the page header off page to avoid 680 * wasting too large a part of the page or too big item. 681 * Off-page page headers go on a hash table, so we can match 682 * a returned item with its header based on the page address. 683 * We use 1/16 of the page size and about 8 times of the item 684 * size as the threshold (XXX: tune) 685 * 686 * However, we'll put the header into the page if we can put 687 * it without wasting any items. 688 * 689 * Silently enforce `0 <= ioff < align'. 690 */ 691 pp->pr_itemoffset = ioff %= align; 692 /* See the comment below about reserved bytes. */ 693 trysize = palloc->pa_pagesz - ((align - ioff) % align); 694 phsize = ALIGN(sizeof(struct pool_item_header)); 695 if ((pp->pr_roflags & (PR_NOTOUCH | PR_NOALIGN)) == 0 && 696 (pp->pr_size < MIN(palloc->pa_pagesz / 16, phsize << 3) || 697 trysize / pp->pr_size == (trysize - phsize) / pp->pr_size)) { 698 /* Use the end of the page for the page header */ 699 pp->pr_roflags |= PR_PHINPAGE; 700 pp->pr_phoffset = off = palloc->pa_pagesz - phsize; 701 } else { 702 /* The page header will be taken from our page header pool */ 703 pp->pr_phoffset = 0; 704 off = palloc->pa_pagesz; 705 SPLAY_INIT(&pp->pr_phtree); 706 } 707 708 /* 709 * Alignment is to take place at `ioff' within the item. This means 710 * we must reserve up to `align - 1' bytes on the page to allow 711 * appropriate positioning of each item. 712 */ 713 pp->pr_itemsperpage = (off - ((align - ioff) % align)) / pp->pr_size; 714 KASSERT(pp->pr_itemsperpage != 0); 715 if ((pp->pr_roflags & PR_NOTOUCH)) { 716 int idx; 717 718 for (idx = 0; pp->pr_itemsperpage > PHPOOL_FREELIST_NELEM(idx); 719 idx++) { 720 /* nothing */ 721 } 722 if (idx >= PHPOOL_MAX) { 723 /* 724 * if you see this panic, consider to tweak 725 * PHPOOL_MAX and PHPOOL_FREELIST_NELEM. 726 */ 727 panic("%s: too large itemsperpage(%d) for PR_NOTOUCH", 728 pp->pr_wchan, pp->pr_itemsperpage); 729 } 730 pp->pr_phpool = &phpool[idx]; 731 } else if ((pp->pr_roflags & PR_PHINPAGE) == 0) { 732 pp->pr_phpool = &phpool[0]; 733 } 734 #if defined(DIAGNOSTIC) 735 else { 736 pp->pr_phpool = NULL; 737 } 738 #endif 739 740 /* 741 * Use the slack between the chunks and the page header 742 * for "cache coloring". 743 */ 744 slack = off - pp->pr_itemsperpage * pp->pr_size; 745 pp->pr_maxcolor = (slack / align) * align; 746 pp->pr_curcolor = 0; 747 748 pp->pr_nget = 0; 749 pp->pr_nfail = 0; 750 pp->pr_nput = 0; 751 pp->pr_npagealloc = 0; 752 pp->pr_npagefree = 0; 753 pp->pr_hiwat = 0; 754 pp->pr_nidle = 0; 755 756 #ifdef POOL_DIAGNOSTIC 757 if (flags & PR_LOGGING) { 758 if (kmem_map == NULL || 759 (pp->pr_log = malloc(pool_logsize * sizeof(struct pool_log), 760 M_TEMP, M_NOWAIT)) == NULL) 761 pp->pr_roflags &= ~PR_LOGGING; 762 pp->pr_curlogentry = 0; 763 pp->pr_logsize = pool_logsize; 764 } 765 #endif 766 767 pp->pr_entered_file = NULL; 768 pp->pr_entered_line = 0; 769 770 simple_lock_init(&pp->pr_slock); 771 772 /* 773 * Initialize private page header pool and cache magazine pool if we 774 * haven't done so yet. 775 * XXX LOCKING. 776 */ 777 if (phpool[0].pr_size == 0) { 778 int idx; 779 for (idx = 0; idx < PHPOOL_MAX; idx++) { 780 static char phpool_names[PHPOOL_MAX][6+1+6+1]; 781 int nelem; 782 size_t sz; 783 784 nelem = PHPOOL_FREELIST_NELEM(idx); 785 snprintf(phpool_names[idx], sizeof(phpool_names[idx]), 786 "phpool-%d", nelem); 787 sz = sizeof(struct pool_item_header); 788 if (nelem) { 789 sz = PR_FREELIST_ALIGN(sz) 790 + nelem * sizeof(pool_item_freelist_t); 791 } 792 pool_init(&phpool[idx], sz, 0, 0, 0, 793 phpool_names[idx], &pool_allocator_meta); 794 } 795 #ifdef POOL_SUBPAGE 796 pool_init(&psppool, POOL_SUBPAGE, POOL_SUBPAGE, 0, 797 PR_RECURSIVE, "psppool", &pool_allocator_meta); 798 #endif 799 pool_init(&pcgpool, sizeof(struct pool_cache_group), 0, 0, 800 0, "pcgpool", &pool_allocator_meta); 801 } 802 803 /* Insert into the list of all pools. */ 804 simple_lock(&pool_head_slock); 805 LIST_INSERT_HEAD(&pool_head, pp, pr_poollist); 806 simple_unlock(&pool_head_slock); 807 808 /* Insert this into the list of pools using this allocator. */ 809 s = splvm(); 810 simple_lock(&palloc->pa_slock); 811 TAILQ_INSERT_TAIL(&palloc->pa_list, pp, pr_alloc_list); 812 simple_unlock(&palloc->pa_slock); 813 splx(s); 814 pool_reclaim_register(pp); 815 } 816 817 /* 818 * De-commision a pool resource. 819 */ 820 void 821 pool_destroy(struct pool *pp) 822 { 823 struct pool_pagelist pq; 824 struct pool_item_header *ph; 825 int s; 826 827 /* Remove from global pool list */ 828 simple_lock(&pool_head_slock); 829 LIST_REMOVE(pp, pr_poollist); 830 if (drainpp == pp) 831 drainpp = NULL; 832 simple_unlock(&pool_head_slock); 833 834 /* Remove this pool from its allocator's list of pools. */ 835 pool_reclaim_unregister(pp); 836 s = splvm(); 837 simple_lock(&pp->pr_alloc->pa_slock); 838 TAILQ_REMOVE(&pp->pr_alloc->pa_list, pp, pr_alloc_list); 839 simple_unlock(&pp->pr_alloc->pa_slock); 840 splx(s); 841 842 s = splvm(); 843 simple_lock(&pp->pr_slock); 844 845 KASSERT(LIST_EMPTY(&pp->pr_cachelist)); 846 847 #ifdef DIAGNOSTIC 848 if (pp->pr_nout != 0) { 849 pr_printlog(pp, NULL, printf); 850 panic("pool_destroy: pool busy: still out: %u", 851 pp->pr_nout); 852 } 853 #endif 854 855 KASSERT(LIST_EMPTY(&pp->pr_fullpages)); 856 KASSERT(LIST_EMPTY(&pp->pr_partpages)); 857 858 /* Remove all pages */ 859 LIST_INIT(&pq); 860 while ((ph = LIST_FIRST(&pp->pr_emptypages)) != NULL) 861 pr_rmpage(pp, ph, &pq); 862 863 simple_unlock(&pp->pr_slock); 864 splx(s); 865 866 pr_pagelist_free(pp, &pq); 867 868 #ifdef POOL_DIAGNOSTIC 869 if ((pp->pr_roflags & PR_LOGGING) != 0) 870 free(pp->pr_log, M_TEMP); 871 #endif 872 } 873 874 void 875 pool_set_drain_hook(struct pool *pp, void (*fn)(void *, int), void *arg) 876 { 877 878 /* XXX no locking -- must be used just after pool_init() */ 879 #ifdef DIAGNOSTIC 880 if (pp->pr_drain_hook != NULL) 881 panic("pool_set_drain_hook(%s): already set", pp->pr_wchan); 882 #endif 883 pp->pr_drain_hook = fn; 884 pp->pr_drain_hook_arg = arg; 885 } 886 887 static struct pool_item_header * 888 pool_alloc_item_header(struct pool *pp, caddr_t storage, int flags) 889 { 890 struct pool_item_header *ph; 891 int s; 892 893 LOCK_ASSERT(simple_lock_held(&pp->pr_slock) == 0); 894 895 if ((pp->pr_roflags & PR_PHINPAGE) != 0) 896 ph = (struct pool_item_header *) (storage + pp->pr_phoffset); 897 else { 898 s = splvm(); 899 ph = pool_get(pp->pr_phpool, flags); 900 splx(s); 901 } 902 903 return (ph); 904 } 905 906 /* 907 * Grab an item from the pool; must be called at appropriate spl level 908 */ 909 void * 910 #ifdef POOL_DIAGNOSTIC 911 _pool_get(struct pool *pp, int flags, const char *file, long line) 912 #else 913 pool_get(struct pool *pp, int flags) 914 #endif 915 { 916 struct pool_item *pi; 917 struct pool_item_header *ph; 918 void *v; 919 920 #ifdef DIAGNOSTIC 921 if (__predict_false(pp->pr_itemsperpage == 0)) 922 panic("pool_get: pool %p: pr_itemsperpage is zero, " 923 "pool not initialized?", pp); 924 if (__predict_false(curlwp == NULL && doing_shutdown == 0 && 925 (flags & PR_WAITOK) != 0)) 926 panic("pool_get: %s: must have NOWAIT", pp->pr_wchan); 927 928 #endif /* DIAGNOSTIC */ 929 #ifdef LOCKDEBUG 930 if (flags & PR_WAITOK) 931 ASSERT_SLEEPABLE(NULL, "pool_get(PR_WAITOK)"); 932 SCHED_ASSERT_UNLOCKED(); 933 #endif 934 935 simple_lock(&pp->pr_slock); 936 pr_enter(pp, file, line); 937 938 startover: 939 /* 940 * Check to see if we've reached the hard limit. If we have, 941 * and we can wait, then wait until an item has been returned to 942 * the pool. 943 */ 944 #ifdef DIAGNOSTIC 945 if (__predict_false(pp->pr_nout > pp->pr_hardlimit)) { 946 pr_leave(pp); 947 simple_unlock(&pp->pr_slock); 948 panic("pool_get: %s: crossed hard limit", pp->pr_wchan); 949 } 950 #endif 951 if (__predict_false(pp->pr_nout == pp->pr_hardlimit)) { 952 if (pp->pr_drain_hook != NULL) { 953 /* 954 * Since the drain hook is going to free things 955 * back to the pool, unlock, call the hook, re-lock, 956 * and check the hardlimit condition again. 957 */ 958 pr_leave(pp); 959 simple_unlock(&pp->pr_slock); 960 (*pp->pr_drain_hook)(pp->pr_drain_hook_arg, flags); 961 simple_lock(&pp->pr_slock); 962 pr_enter(pp, file, line); 963 if (pp->pr_nout < pp->pr_hardlimit) 964 goto startover; 965 } 966 967 if ((flags & PR_WAITOK) && !(flags & PR_LIMITFAIL)) { 968 /* 969 * XXX: A warning isn't logged in this case. Should 970 * it be? 971 */ 972 pp->pr_flags |= PR_WANTED; 973 pr_leave(pp); 974 ltsleep(pp, PSWP, pp->pr_wchan, 0, &pp->pr_slock); 975 pr_enter(pp, file, line); 976 goto startover; 977 } 978 979 /* 980 * Log a message that the hard limit has been hit. 981 */ 982 if (pp->pr_hardlimit_warning != NULL && 983 ratecheck(&pp->pr_hardlimit_warning_last, 984 &pp->pr_hardlimit_ratecap)) 985 log(LOG_ERR, "%s\n", pp->pr_hardlimit_warning); 986 987 pp->pr_nfail++; 988 989 pr_leave(pp); 990 simple_unlock(&pp->pr_slock); 991 return (NULL); 992 } 993 994 /* 995 * The convention we use is that if `curpage' is not NULL, then 996 * it points at a non-empty bucket. In particular, `curpage' 997 * never points at a page header which has PR_PHINPAGE set and 998 * has no items in its bucket. 999 */ 1000 if ((ph = pp->pr_curpage) == NULL) { 1001 int error; 1002 1003 #ifdef DIAGNOSTIC 1004 if (pp->pr_nitems != 0) { 1005 simple_unlock(&pp->pr_slock); 1006 printf("pool_get: %s: curpage NULL, nitems %u\n", 1007 pp->pr_wchan, pp->pr_nitems); 1008 panic("pool_get: nitems inconsistent"); 1009 } 1010 #endif 1011 1012 /* 1013 * Call the back-end page allocator for more memory. 1014 * Release the pool lock, as the back-end page allocator 1015 * may block. 1016 */ 1017 pr_leave(pp); 1018 error = pool_grow(pp, flags); 1019 pr_enter(pp, file, line); 1020 if (error != 0) { 1021 /* 1022 * We were unable to allocate a page or item 1023 * header, but we released the lock during 1024 * allocation, so perhaps items were freed 1025 * back to the pool. Check for this case. 1026 */ 1027 if (pp->pr_curpage != NULL) 1028 goto startover; 1029 1030 pp->pr_nfail++; 1031 pr_leave(pp); 1032 simple_unlock(&pp->pr_slock); 1033 return (NULL); 1034 } 1035 1036 /* Start the allocation process over. */ 1037 goto startover; 1038 } 1039 if (pp->pr_roflags & PR_NOTOUCH) { 1040 #ifdef DIAGNOSTIC 1041 if (__predict_false(ph->ph_nmissing == pp->pr_itemsperpage)) { 1042 pr_leave(pp); 1043 simple_unlock(&pp->pr_slock); 1044 panic("pool_get: %s: page empty", pp->pr_wchan); 1045 } 1046 #endif 1047 v = pr_item_notouch_get(pp, ph); 1048 #ifdef POOL_DIAGNOSTIC 1049 pr_log(pp, v, PRLOG_GET, file, line); 1050 #endif 1051 } else { 1052 v = pi = LIST_FIRST(&ph->ph_itemlist); 1053 if (__predict_false(v == NULL)) { 1054 pr_leave(pp); 1055 simple_unlock(&pp->pr_slock); 1056 panic("pool_get: %s: page empty", pp->pr_wchan); 1057 } 1058 #ifdef DIAGNOSTIC 1059 if (__predict_false(pp->pr_nitems == 0)) { 1060 pr_leave(pp); 1061 simple_unlock(&pp->pr_slock); 1062 printf("pool_get: %s: items on itemlist, nitems %u\n", 1063 pp->pr_wchan, pp->pr_nitems); 1064 panic("pool_get: nitems inconsistent"); 1065 } 1066 #endif 1067 1068 #ifdef POOL_DIAGNOSTIC 1069 pr_log(pp, v, PRLOG_GET, file, line); 1070 #endif 1071 1072 #ifdef DIAGNOSTIC 1073 if (__predict_false(pi->pi_magic != PI_MAGIC)) { 1074 pr_printlog(pp, pi, printf); 1075 panic("pool_get(%s): free list modified: " 1076 "magic=%x; page %p; item addr %p\n", 1077 pp->pr_wchan, pi->pi_magic, ph->ph_page, pi); 1078 } 1079 #endif 1080 1081 /* 1082 * Remove from item list. 1083 */ 1084 LIST_REMOVE(pi, pi_list); 1085 } 1086 pp->pr_nitems--; 1087 pp->pr_nout++; 1088 if (ph->ph_nmissing == 0) { 1089 #ifdef DIAGNOSTIC 1090 if (__predict_false(pp->pr_nidle == 0)) 1091 panic("pool_get: nidle inconsistent"); 1092 #endif 1093 pp->pr_nidle--; 1094 1095 /* 1096 * This page was previously empty. Move it to the list of 1097 * partially-full pages. This page is already curpage. 1098 */ 1099 LIST_REMOVE(ph, ph_pagelist); 1100 LIST_INSERT_HEAD(&pp->pr_partpages, ph, ph_pagelist); 1101 } 1102 ph->ph_nmissing++; 1103 if (ph->ph_nmissing == pp->pr_itemsperpage) { 1104 #ifdef DIAGNOSTIC 1105 if (__predict_false((pp->pr_roflags & PR_NOTOUCH) == 0 && 1106 !LIST_EMPTY(&ph->ph_itemlist))) { 1107 pr_leave(pp); 1108 simple_unlock(&pp->pr_slock); 1109 panic("pool_get: %s: nmissing inconsistent", 1110 pp->pr_wchan); 1111 } 1112 #endif 1113 /* 1114 * This page is now full. Move it to the full list 1115 * and select a new current page. 1116 */ 1117 LIST_REMOVE(ph, ph_pagelist); 1118 LIST_INSERT_HEAD(&pp->pr_fullpages, ph, ph_pagelist); 1119 pool_update_curpage(pp); 1120 } 1121 1122 pp->pr_nget++; 1123 pr_leave(pp); 1124 1125 /* 1126 * If we have a low water mark and we are now below that low 1127 * water mark, add more items to the pool. 1128 */ 1129 if (POOL_NEEDS_CATCHUP(pp) && pool_catchup(pp) != 0) { 1130 /* 1131 * XXX: Should we log a warning? Should we set up a timeout 1132 * to try again in a second or so? The latter could break 1133 * a caller's assumptions about interrupt protection, etc. 1134 */ 1135 } 1136 1137 simple_unlock(&pp->pr_slock); 1138 return (v); 1139 } 1140 1141 /* 1142 * Internal version of pool_put(). Pool is already locked/entered. 1143 */ 1144 static void 1145 pool_do_put(struct pool *pp, void *v, struct pool_pagelist *pq) 1146 { 1147 struct pool_item *pi = v; 1148 struct pool_item_header *ph; 1149 1150 LOCK_ASSERT(simple_lock_held(&pp->pr_slock)); 1151 SCHED_ASSERT_UNLOCKED(); 1152 1153 #ifdef DIAGNOSTIC 1154 if (__predict_false(pp->pr_nout == 0)) { 1155 printf("pool %s: putting with none out\n", 1156 pp->pr_wchan); 1157 panic("pool_put"); 1158 } 1159 #endif 1160 1161 if (__predict_false((ph = pr_find_pagehead(pp, v)) == NULL)) { 1162 pr_printlog(pp, NULL, printf); 1163 panic("pool_put: %s: page header missing", pp->pr_wchan); 1164 } 1165 1166 #ifdef LOCKDEBUG 1167 /* 1168 * Check if we're freeing a locked simple lock. 1169 */ 1170 simple_lock_freecheck((caddr_t)pi, ((caddr_t)pi) + pp->pr_size); 1171 #endif 1172 1173 /* 1174 * Return to item list. 1175 */ 1176 if (pp->pr_roflags & PR_NOTOUCH) { 1177 pr_item_notouch_put(pp, ph, v); 1178 } else { 1179 #ifdef DIAGNOSTIC 1180 pi->pi_magic = PI_MAGIC; 1181 #endif 1182 #ifdef DEBUG 1183 { 1184 int i, *ip = v; 1185 1186 for (i = 0; i < pp->pr_size / sizeof(int); i++) { 1187 *ip++ = PI_MAGIC; 1188 } 1189 } 1190 #endif 1191 1192 LIST_INSERT_HEAD(&ph->ph_itemlist, pi, pi_list); 1193 } 1194 KDASSERT(ph->ph_nmissing != 0); 1195 ph->ph_nmissing--; 1196 pp->pr_nput++; 1197 pp->pr_nitems++; 1198 pp->pr_nout--; 1199 1200 /* Cancel "pool empty" condition if it exists */ 1201 if (pp->pr_curpage == NULL) 1202 pp->pr_curpage = ph; 1203 1204 if (pp->pr_flags & PR_WANTED) { 1205 pp->pr_flags &= ~PR_WANTED; 1206 if (ph->ph_nmissing == 0) 1207 pp->pr_nidle++; 1208 wakeup((caddr_t)pp); 1209 return; 1210 } 1211 1212 /* 1213 * If this page is now empty, do one of two things: 1214 * 1215 * (1) If we have more pages than the page high water mark, 1216 * free the page back to the system. ONLY CONSIDER 1217 * FREEING BACK A PAGE IF WE HAVE MORE THAN OUR MINIMUM PAGE 1218 * CLAIM. 1219 * 1220 * (2) Otherwise, move the page to the empty page list. 1221 * 1222 * Either way, select a new current page (so we use a partially-full 1223 * page if one is available). 1224 */ 1225 if (ph->ph_nmissing == 0) { 1226 pp->pr_nidle++; 1227 if (pp->pr_npages > pp->pr_minpages && 1228 (pp->pr_npages > pp->pr_maxpages || 1229 pa_starved_p(pp->pr_alloc))) { 1230 pr_rmpage(pp, ph, pq); 1231 } else { 1232 LIST_REMOVE(ph, ph_pagelist); 1233 LIST_INSERT_HEAD(&pp->pr_emptypages, ph, ph_pagelist); 1234 1235 /* 1236 * Update the timestamp on the page. A page must 1237 * be idle for some period of time before it can 1238 * be reclaimed by the pagedaemon. This minimizes 1239 * ping-pong'ing for memory. 1240 */ 1241 getmicrotime(&ph->ph_time); 1242 } 1243 pool_update_curpage(pp); 1244 } 1245 1246 /* 1247 * If the page was previously completely full, move it to the 1248 * partially-full list and make it the current page. The next 1249 * allocation will get the item from this page, instead of 1250 * further fragmenting the pool. 1251 */ 1252 else if (ph->ph_nmissing == (pp->pr_itemsperpage - 1)) { 1253 LIST_REMOVE(ph, ph_pagelist); 1254 LIST_INSERT_HEAD(&pp->pr_partpages, ph, ph_pagelist); 1255 pp->pr_curpage = ph; 1256 } 1257 } 1258 1259 /* 1260 * Return resource to the pool; must be called at appropriate spl level 1261 */ 1262 #ifdef POOL_DIAGNOSTIC 1263 void 1264 _pool_put(struct pool *pp, void *v, const char *file, long line) 1265 { 1266 struct pool_pagelist pq; 1267 1268 LIST_INIT(&pq); 1269 1270 simple_lock(&pp->pr_slock); 1271 pr_enter(pp, file, line); 1272 1273 pr_log(pp, v, PRLOG_PUT, file, line); 1274 1275 pool_do_put(pp, v, &pq); 1276 1277 pr_leave(pp); 1278 simple_unlock(&pp->pr_slock); 1279 1280 pr_pagelist_free(pp, &pq); 1281 } 1282 #undef pool_put 1283 #endif /* POOL_DIAGNOSTIC */ 1284 1285 void 1286 pool_put(struct pool *pp, void *v) 1287 { 1288 struct pool_pagelist pq; 1289 1290 LIST_INIT(&pq); 1291 1292 simple_lock(&pp->pr_slock); 1293 pool_do_put(pp, v, &pq); 1294 simple_unlock(&pp->pr_slock); 1295 1296 pr_pagelist_free(pp, &pq); 1297 } 1298 1299 #ifdef POOL_DIAGNOSTIC 1300 #define pool_put(h, v) _pool_put((h), (v), __FILE__, __LINE__) 1301 #endif 1302 1303 /* 1304 * pool_grow: grow a pool by a page. 1305 * 1306 * => called with pool locked. 1307 * => unlock and relock the pool. 1308 * => return with pool locked. 1309 */ 1310 1311 static int 1312 pool_grow(struct pool *pp, int flags) 1313 { 1314 struct pool_item_header *ph = NULL; 1315 char *cp; 1316 1317 simple_unlock(&pp->pr_slock); 1318 cp = pool_allocator_alloc(pp, flags); 1319 if (__predict_true(cp != NULL)) { 1320 ph = pool_alloc_item_header(pp, cp, flags); 1321 } 1322 if (__predict_false(cp == NULL || ph == NULL)) { 1323 if (cp != NULL) { 1324 pool_allocator_free(pp, cp); 1325 } 1326 simple_lock(&pp->pr_slock); 1327 return ENOMEM; 1328 } 1329 1330 simple_lock(&pp->pr_slock); 1331 pool_prime_page(pp, cp, ph); 1332 pp->pr_npagealloc++; 1333 return 0; 1334 } 1335 1336 /* 1337 * Add N items to the pool. 1338 */ 1339 int 1340 pool_prime(struct pool *pp, int n) 1341 { 1342 int newpages; 1343 int error = 0; 1344 1345 simple_lock(&pp->pr_slock); 1346 1347 newpages = roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 1348 1349 while (newpages-- > 0) { 1350 error = pool_grow(pp, PR_NOWAIT); 1351 if (error) { 1352 break; 1353 } 1354 pp->pr_minpages++; 1355 } 1356 1357 if (pp->pr_minpages >= pp->pr_maxpages) 1358 pp->pr_maxpages = pp->pr_minpages + 1; /* XXX */ 1359 1360 simple_unlock(&pp->pr_slock); 1361 return error; 1362 } 1363 1364 /* 1365 * Add a page worth of items to the pool. 1366 * 1367 * Note, we must be called with the pool descriptor LOCKED. 1368 */ 1369 static void 1370 pool_prime_page(struct pool *pp, caddr_t storage, struct pool_item_header *ph) 1371 { 1372 struct pool_item *pi; 1373 caddr_t cp = storage; 1374 unsigned int align = pp->pr_align; 1375 unsigned int ioff = pp->pr_itemoffset; 1376 int n; 1377 1378 LOCK_ASSERT(simple_lock_held(&pp->pr_slock)); 1379 1380 #ifdef DIAGNOSTIC 1381 if ((pp->pr_roflags & PR_NOALIGN) == 0 && 1382 ((uintptr_t)cp & (pp->pr_alloc->pa_pagesz - 1)) != 0) 1383 panic("pool_prime_page: %s: unaligned page", pp->pr_wchan); 1384 #endif 1385 1386 /* 1387 * Insert page header. 1388 */ 1389 LIST_INSERT_HEAD(&pp->pr_emptypages, ph, ph_pagelist); 1390 LIST_INIT(&ph->ph_itemlist); 1391 ph->ph_page = storage; 1392 ph->ph_nmissing = 0; 1393 getmicrotime(&ph->ph_time); 1394 if ((pp->pr_roflags & PR_PHINPAGE) == 0) 1395 SPLAY_INSERT(phtree, &pp->pr_phtree, ph); 1396 1397 pp->pr_nidle++; 1398 1399 /* 1400 * Color this page. 1401 */ 1402 cp = (caddr_t)(cp + pp->pr_curcolor); 1403 if ((pp->pr_curcolor += align) > pp->pr_maxcolor) 1404 pp->pr_curcolor = 0; 1405 1406 /* 1407 * Adjust storage to apply aligment to `pr_itemoffset' in each item. 1408 */ 1409 if (ioff != 0) 1410 cp = (caddr_t)(cp + (align - ioff)); 1411 1412 /* 1413 * Insert remaining chunks on the bucket list. 1414 */ 1415 n = pp->pr_itemsperpage; 1416 pp->pr_nitems += n; 1417 1418 if (pp->pr_roflags & PR_NOTOUCH) { 1419 pool_item_freelist_t *freelist = PR_FREELIST(ph); 1420 int i; 1421 1422 ph->ph_off = cp - storage; 1423 ph->ph_firstfree = 0; 1424 for (i = 0; i < n - 1; i++) 1425 freelist[i] = i + 1; 1426 freelist[n - 1] = PR_INDEX_EOL; 1427 } else { 1428 while (n--) { 1429 pi = (struct pool_item *)cp; 1430 1431 KASSERT(((((vaddr_t)pi) + ioff) & (align - 1)) == 0); 1432 1433 /* Insert on page list */ 1434 LIST_INSERT_HEAD(&ph->ph_itemlist, pi, pi_list); 1435 #ifdef DIAGNOSTIC 1436 pi->pi_magic = PI_MAGIC; 1437 #endif 1438 cp = (caddr_t)(cp + pp->pr_size); 1439 } 1440 } 1441 1442 /* 1443 * If the pool was depleted, point at the new page. 1444 */ 1445 if (pp->pr_curpage == NULL) 1446 pp->pr_curpage = ph; 1447 1448 if (++pp->pr_npages > pp->pr_hiwat) 1449 pp->pr_hiwat = pp->pr_npages; 1450 } 1451 1452 /* 1453 * Used by pool_get() when nitems drops below the low water mark. This 1454 * is used to catch up pr_nitems with the low water mark. 1455 * 1456 * Note 1, we never wait for memory here, we let the caller decide what to do. 1457 * 1458 * Note 2, we must be called with the pool already locked, and we return 1459 * with it locked. 1460 */ 1461 static int 1462 pool_catchup(struct pool *pp) 1463 { 1464 int error = 0; 1465 1466 while (POOL_NEEDS_CATCHUP(pp)) { 1467 error = pool_grow(pp, PR_NOWAIT); 1468 if (error) { 1469 break; 1470 } 1471 } 1472 return error; 1473 } 1474 1475 static void 1476 pool_update_curpage(struct pool *pp) 1477 { 1478 1479 pp->pr_curpage = LIST_FIRST(&pp->pr_partpages); 1480 if (pp->pr_curpage == NULL) { 1481 pp->pr_curpage = LIST_FIRST(&pp->pr_emptypages); 1482 } 1483 } 1484 1485 void 1486 pool_setlowat(struct pool *pp, int n) 1487 { 1488 1489 simple_lock(&pp->pr_slock); 1490 1491 pp->pr_minitems = n; 1492 pp->pr_minpages = (n == 0) 1493 ? 0 1494 : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 1495 1496 /* Make sure we're caught up with the newly-set low water mark. */ 1497 if (POOL_NEEDS_CATCHUP(pp) && pool_catchup(pp) != 0) { 1498 /* 1499 * XXX: Should we log a warning? Should we set up a timeout 1500 * to try again in a second or so? The latter could break 1501 * a caller's assumptions about interrupt protection, etc. 1502 */ 1503 } 1504 1505 simple_unlock(&pp->pr_slock); 1506 } 1507 1508 void 1509 pool_sethiwat(struct pool *pp, int n) 1510 { 1511 1512 simple_lock(&pp->pr_slock); 1513 1514 pp->pr_maxpages = (n == 0) 1515 ? 0 1516 : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 1517 1518 simple_unlock(&pp->pr_slock); 1519 } 1520 1521 void 1522 pool_sethardlimit(struct pool *pp, int n, const char *warnmess, int ratecap) 1523 { 1524 1525 simple_lock(&pp->pr_slock); 1526 1527 pp->pr_hardlimit = n; 1528 pp->pr_hardlimit_warning = warnmess; 1529 pp->pr_hardlimit_ratecap.tv_sec = ratecap; 1530 pp->pr_hardlimit_warning_last.tv_sec = 0; 1531 pp->pr_hardlimit_warning_last.tv_usec = 0; 1532 1533 /* 1534 * In-line version of pool_sethiwat(), because we don't want to 1535 * release the lock. 1536 */ 1537 pp->pr_maxpages = (n == 0) 1538 ? 0 1539 : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 1540 1541 simple_unlock(&pp->pr_slock); 1542 } 1543 1544 /* 1545 * Release all complete pages that have not been used recently. 1546 */ 1547 int 1548 #ifdef POOL_DIAGNOSTIC 1549 _pool_reclaim(struct pool *pp, const char *file, long line) 1550 #else 1551 pool_reclaim(struct pool *pp) 1552 #endif 1553 { 1554 struct pool_item_header *ph, *phnext; 1555 struct pool_cache *pc; 1556 struct pool_pagelist pq; 1557 struct pool_cache_grouplist pcgl; 1558 struct timeval curtime, diff; 1559 1560 if (pp->pr_drain_hook != NULL) { 1561 /* 1562 * The drain hook must be called with the pool unlocked. 1563 */ 1564 (*pp->pr_drain_hook)(pp->pr_drain_hook_arg, PR_NOWAIT); 1565 } 1566 1567 if (simple_lock_try(&pp->pr_slock) == 0) 1568 return (0); 1569 pr_enter(pp, file, line); 1570 1571 LIST_INIT(&pq); 1572 LIST_INIT(&pcgl); 1573 1574 /* 1575 * Reclaim items from the pool's caches. 1576 */ 1577 LIST_FOREACH(pc, &pp->pr_cachelist, pc_poollist) 1578 pool_cache_reclaim(pc, &pq, &pcgl); 1579 1580 getmicrotime(&curtime); 1581 1582 for (ph = LIST_FIRST(&pp->pr_emptypages); ph != NULL; ph = phnext) { 1583 phnext = LIST_NEXT(ph, ph_pagelist); 1584 1585 /* Check our minimum page claim */ 1586 if (pp->pr_npages <= pp->pr_minpages) 1587 break; 1588 1589 KASSERT(ph->ph_nmissing == 0); 1590 timersub(&curtime, &ph->ph_time, &diff); 1591 if (diff.tv_sec < pool_inactive_time 1592 && !pa_starved_p(pp->pr_alloc)) 1593 continue; 1594 1595 /* 1596 * If freeing this page would put us below 1597 * the low water mark, stop now. 1598 */ 1599 if ((pp->pr_nitems - pp->pr_itemsperpage) < 1600 pp->pr_minitems) 1601 break; 1602 1603 pr_rmpage(pp, ph, &pq); 1604 } 1605 1606 pr_leave(pp); 1607 simple_unlock(&pp->pr_slock); 1608 if (LIST_EMPTY(&pq) && LIST_EMPTY(&pcgl)) 1609 return 0; 1610 1611 pr_pagelist_free(pp, &pq); 1612 pcg_grouplist_free(&pcgl); 1613 return (1); 1614 } 1615 1616 /* 1617 * Drain pools, one at a time. 1618 * 1619 * Note, we must never be called from an interrupt context. 1620 */ 1621 void 1622 pool_drain(void *arg) 1623 { 1624 struct pool *pp; 1625 int s; 1626 1627 pp = NULL; 1628 s = splvm(); 1629 simple_lock(&pool_head_slock); 1630 if (drainpp == NULL) { 1631 drainpp = LIST_FIRST(&pool_head); 1632 } 1633 if (drainpp) { 1634 pp = drainpp; 1635 drainpp = LIST_NEXT(pp, pr_poollist); 1636 } 1637 simple_unlock(&pool_head_slock); 1638 if (pp) 1639 pool_reclaim(pp); 1640 splx(s); 1641 } 1642 1643 /* 1644 * Diagnostic helpers. 1645 */ 1646 void 1647 pool_print(struct pool *pp, const char *modif) 1648 { 1649 int s; 1650 1651 s = splvm(); 1652 if (simple_lock_try(&pp->pr_slock) == 0) { 1653 printf("pool %s is locked; try again later\n", 1654 pp->pr_wchan); 1655 splx(s); 1656 return; 1657 } 1658 pool_print1(pp, modif, printf); 1659 simple_unlock(&pp->pr_slock); 1660 splx(s); 1661 } 1662 1663 void 1664 pool_printall(const char *modif, void (*pr)(const char *, ...)) 1665 { 1666 struct pool *pp; 1667 1668 if (simple_lock_try(&pool_head_slock) == 0) { 1669 (*pr)("WARNING: pool_head_slock is locked\n"); 1670 } else { 1671 simple_unlock(&pool_head_slock); 1672 } 1673 1674 LIST_FOREACH(pp, &pool_head, pr_poollist) { 1675 pool_printit(pp, modif, pr); 1676 } 1677 } 1678 1679 void 1680 pool_printit(struct pool *pp, const char *modif, void (*pr)(const char *, ...)) 1681 { 1682 1683 if (pp == NULL) { 1684 (*pr)("Must specify a pool to print.\n"); 1685 return; 1686 } 1687 1688 /* 1689 * Called from DDB; interrupts should be blocked, and all 1690 * other processors should be paused. We can skip locking 1691 * the pool in this case. 1692 * 1693 * We do a simple_lock_try() just to print the lock 1694 * status, however. 1695 */ 1696 1697 if (simple_lock_try(&pp->pr_slock) == 0) 1698 (*pr)("WARNING: pool %s is locked\n", pp->pr_wchan); 1699 else 1700 simple_unlock(&pp->pr_slock); 1701 1702 pool_print1(pp, modif, pr); 1703 } 1704 1705 static void 1706 pool_print_pagelist(struct pool *pp, struct pool_pagelist *pl, 1707 void (*pr)(const char *, ...)) 1708 { 1709 struct pool_item_header *ph; 1710 #ifdef DIAGNOSTIC 1711 struct pool_item *pi; 1712 #endif 1713 1714 LIST_FOREACH(ph, pl, ph_pagelist) { 1715 (*pr)("\t\tpage %p, nmissing %d, time %lu,%lu\n", 1716 ph->ph_page, ph->ph_nmissing, 1717 (u_long)ph->ph_time.tv_sec, 1718 (u_long)ph->ph_time.tv_usec); 1719 #ifdef DIAGNOSTIC 1720 if (!(pp->pr_roflags & PR_NOTOUCH)) { 1721 LIST_FOREACH(pi, &ph->ph_itemlist, pi_list) { 1722 if (pi->pi_magic != PI_MAGIC) { 1723 (*pr)("\t\t\titem %p, magic 0x%x\n", 1724 pi, pi->pi_magic); 1725 } 1726 } 1727 } 1728 #endif 1729 } 1730 } 1731 1732 static void 1733 pool_print1(struct pool *pp, const char *modif, void (*pr)(const char *, ...)) 1734 { 1735 struct pool_item_header *ph; 1736 struct pool_cache *pc; 1737 struct pool_cache_group *pcg; 1738 int i, print_log = 0, print_pagelist = 0, print_cache = 0; 1739 char c; 1740 1741 while ((c = *modif++) != '\0') { 1742 if (c == 'l') 1743 print_log = 1; 1744 if (c == 'p') 1745 print_pagelist = 1; 1746 if (c == 'c') 1747 print_cache = 1; 1748 } 1749 1750 (*pr)("POOL %s: size %u, align %u, ioff %u, roflags 0x%08x\n", 1751 pp->pr_wchan, pp->pr_size, pp->pr_align, pp->pr_itemoffset, 1752 pp->pr_roflags); 1753 (*pr)("\talloc %p\n", pp->pr_alloc); 1754 (*pr)("\tminitems %u, minpages %u, maxpages %u, npages %u\n", 1755 pp->pr_minitems, pp->pr_minpages, pp->pr_maxpages, pp->pr_npages); 1756 (*pr)("\titemsperpage %u, nitems %u, nout %u, hardlimit %u\n", 1757 pp->pr_itemsperpage, pp->pr_nitems, pp->pr_nout, pp->pr_hardlimit); 1758 1759 (*pr)("\n\tnget %lu, nfail %lu, nput %lu\n", 1760 pp->pr_nget, pp->pr_nfail, pp->pr_nput); 1761 (*pr)("\tnpagealloc %lu, npagefree %lu, hiwat %u, nidle %lu\n", 1762 pp->pr_npagealloc, pp->pr_npagefree, pp->pr_hiwat, pp->pr_nidle); 1763 1764 if (print_pagelist == 0) 1765 goto skip_pagelist; 1766 1767 if ((ph = LIST_FIRST(&pp->pr_emptypages)) != NULL) 1768 (*pr)("\n\tempty page list:\n"); 1769 pool_print_pagelist(pp, &pp->pr_emptypages, pr); 1770 if ((ph = LIST_FIRST(&pp->pr_fullpages)) != NULL) 1771 (*pr)("\n\tfull page list:\n"); 1772 pool_print_pagelist(pp, &pp->pr_fullpages, pr); 1773 if ((ph = LIST_FIRST(&pp->pr_partpages)) != NULL) 1774 (*pr)("\n\tpartial-page list:\n"); 1775 pool_print_pagelist(pp, &pp->pr_partpages, pr); 1776 1777 if (pp->pr_curpage == NULL) 1778 (*pr)("\tno current page\n"); 1779 else 1780 (*pr)("\tcurpage %p\n", pp->pr_curpage->ph_page); 1781 1782 skip_pagelist: 1783 if (print_log == 0) 1784 goto skip_log; 1785 1786 (*pr)("\n"); 1787 if ((pp->pr_roflags & PR_LOGGING) == 0) 1788 (*pr)("\tno log\n"); 1789 else { 1790 pr_printlog(pp, NULL, pr); 1791 } 1792 1793 skip_log: 1794 if (print_cache == 0) 1795 goto skip_cache; 1796 1797 #define PR_GROUPLIST(pcg) \ 1798 (*pr)("\t\tgroup %p: avail %d\n", pcg, pcg->pcg_avail); \ 1799 for (i = 0; i < PCG_NOBJECTS; i++) { \ 1800 if (pcg->pcg_objects[i].pcgo_pa != \ 1801 POOL_PADDR_INVALID) { \ 1802 (*pr)("\t\t\t%p, 0x%llx\n", \ 1803 pcg->pcg_objects[i].pcgo_va, \ 1804 (unsigned long long) \ 1805 pcg->pcg_objects[i].pcgo_pa); \ 1806 } else { \ 1807 (*pr)("\t\t\t%p\n", \ 1808 pcg->pcg_objects[i].pcgo_va); \ 1809 } \ 1810 } 1811 1812 LIST_FOREACH(pc, &pp->pr_cachelist, pc_poollist) { 1813 (*pr)("\tcache %p\n", pc); 1814 (*pr)("\t hits %lu misses %lu ngroups %lu nitems %lu\n", 1815 pc->pc_hits, pc->pc_misses, pc->pc_ngroups, pc->pc_nitems); 1816 (*pr)("\t full groups:\n"); 1817 LIST_FOREACH(pcg, &pc->pc_fullgroups, pcg_list) { 1818 PR_GROUPLIST(pcg); 1819 } 1820 (*pr)("\t partial groups:\n"); 1821 LIST_FOREACH(pcg, &pc->pc_partgroups, pcg_list) { 1822 PR_GROUPLIST(pcg); 1823 } 1824 (*pr)("\t empty groups:\n"); 1825 LIST_FOREACH(pcg, &pc->pc_emptygroups, pcg_list) { 1826 PR_GROUPLIST(pcg); 1827 } 1828 } 1829 #undef PR_GROUPLIST 1830 1831 skip_cache: 1832 pr_enter_check(pp, pr); 1833 } 1834 1835 static int 1836 pool_chk_page(struct pool *pp, const char *label, struct pool_item_header *ph) 1837 { 1838 struct pool_item *pi; 1839 caddr_t page; 1840 int n; 1841 1842 if ((pp->pr_roflags & PR_NOALIGN) == 0) { 1843 page = (caddr_t)((uintptr_t)ph & pp->pr_alloc->pa_pagemask); 1844 if (page != ph->ph_page && 1845 (pp->pr_roflags & PR_PHINPAGE) != 0) { 1846 if (label != NULL) 1847 printf("%s: ", label); 1848 printf("pool(%p:%s): page inconsistency: page %p;" 1849 " at page head addr %p (p %p)\n", pp, 1850 pp->pr_wchan, ph->ph_page, 1851 ph, page); 1852 return 1; 1853 } 1854 } 1855 1856 if ((pp->pr_roflags & PR_NOTOUCH) != 0) 1857 return 0; 1858 1859 for (pi = LIST_FIRST(&ph->ph_itemlist), n = 0; 1860 pi != NULL; 1861 pi = LIST_NEXT(pi,pi_list), n++) { 1862 1863 #ifdef DIAGNOSTIC 1864 if (pi->pi_magic != PI_MAGIC) { 1865 if (label != NULL) 1866 printf("%s: ", label); 1867 printf("pool(%s): free list modified: magic=%x;" 1868 " page %p; item ordinal %d; addr %p\n", 1869 pp->pr_wchan, pi->pi_magic, ph->ph_page, 1870 n, pi); 1871 panic("pool"); 1872 } 1873 #endif 1874 if ((pp->pr_roflags & PR_NOALIGN) != 0) { 1875 continue; 1876 } 1877 page = (caddr_t)((uintptr_t)pi & pp->pr_alloc->pa_pagemask); 1878 if (page == ph->ph_page) 1879 continue; 1880 1881 if (label != NULL) 1882 printf("%s: ", label); 1883 printf("pool(%p:%s): page inconsistency: page %p;" 1884 " item ordinal %d; addr %p (p %p)\n", pp, 1885 pp->pr_wchan, ph->ph_page, 1886 n, pi, page); 1887 return 1; 1888 } 1889 return 0; 1890 } 1891 1892 1893 int 1894 pool_chk(struct pool *pp, const char *label) 1895 { 1896 struct pool_item_header *ph; 1897 int r = 0; 1898 1899 simple_lock(&pp->pr_slock); 1900 LIST_FOREACH(ph, &pp->pr_emptypages, ph_pagelist) { 1901 r = pool_chk_page(pp, label, ph); 1902 if (r) { 1903 goto out; 1904 } 1905 } 1906 LIST_FOREACH(ph, &pp->pr_fullpages, ph_pagelist) { 1907 r = pool_chk_page(pp, label, ph); 1908 if (r) { 1909 goto out; 1910 } 1911 } 1912 LIST_FOREACH(ph, &pp->pr_partpages, ph_pagelist) { 1913 r = pool_chk_page(pp, label, ph); 1914 if (r) { 1915 goto out; 1916 } 1917 } 1918 1919 out: 1920 simple_unlock(&pp->pr_slock); 1921 return (r); 1922 } 1923 1924 /* 1925 * pool_cache_init: 1926 * 1927 * Initialize a pool cache. 1928 * 1929 * NOTE: If the pool must be protected from interrupts, we expect 1930 * to be called at the appropriate interrupt priority level. 1931 */ 1932 void 1933 pool_cache_init(struct pool_cache *pc, struct pool *pp, 1934 int (*ctor)(void *, void *, int), 1935 void (*dtor)(void *, void *), 1936 void *arg) 1937 { 1938 1939 LIST_INIT(&pc->pc_emptygroups); 1940 LIST_INIT(&pc->pc_fullgroups); 1941 LIST_INIT(&pc->pc_partgroups); 1942 simple_lock_init(&pc->pc_slock); 1943 1944 pc->pc_pool = pp; 1945 1946 pc->pc_ctor = ctor; 1947 pc->pc_dtor = dtor; 1948 pc->pc_arg = arg; 1949 1950 pc->pc_hits = 0; 1951 pc->pc_misses = 0; 1952 1953 pc->pc_ngroups = 0; 1954 1955 pc->pc_nitems = 0; 1956 1957 simple_lock(&pp->pr_slock); 1958 LIST_INSERT_HEAD(&pp->pr_cachelist, pc, pc_poollist); 1959 simple_unlock(&pp->pr_slock); 1960 } 1961 1962 /* 1963 * pool_cache_destroy: 1964 * 1965 * Destroy a pool cache. 1966 */ 1967 void 1968 pool_cache_destroy(struct pool_cache *pc) 1969 { 1970 struct pool *pp = pc->pc_pool; 1971 1972 /* First, invalidate the entire cache. */ 1973 pool_cache_invalidate(pc); 1974 1975 /* ...and remove it from the pool's cache list. */ 1976 simple_lock(&pp->pr_slock); 1977 LIST_REMOVE(pc, pc_poollist); 1978 simple_unlock(&pp->pr_slock); 1979 } 1980 1981 static inline void * 1982 pcg_get(struct pool_cache_group *pcg, paddr_t *pap) 1983 { 1984 void *object; 1985 u_int idx; 1986 1987 KASSERT(pcg->pcg_avail <= PCG_NOBJECTS); 1988 KASSERT(pcg->pcg_avail != 0); 1989 idx = --pcg->pcg_avail; 1990 1991 KASSERT(pcg->pcg_objects[idx].pcgo_va != NULL); 1992 object = pcg->pcg_objects[idx].pcgo_va; 1993 if (pap != NULL) 1994 *pap = pcg->pcg_objects[idx].pcgo_pa; 1995 pcg->pcg_objects[idx].pcgo_va = NULL; 1996 1997 return (object); 1998 } 1999 2000 static inline void 2001 pcg_put(struct pool_cache_group *pcg, void *object, paddr_t pa) 2002 { 2003 u_int idx; 2004 2005 KASSERT(pcg->pcg_avail < PCG_NOBJECTS); 2006 idx = pcg->pcg_avail++; 2007 2008 KASSERT(pcg->pcg_objects[idx].pcgo_va == NULL); 2009 pcg->pcg_objects[idx].pcgo_va = object; 2010 pcg->pcg_objects[idx].pcgo_pa = pa; 2011 } 2012 2013 static void 2014 pcg_grouplist_free(struct pool_cache_grouplist *pcgl) 2015 { 2016 struct pool_cache_group *pcg; 2017 int s; 2018 2019 s = splvm(); 2020 while ((pcg = LIST_FIRST(pcgl)) != NULL) { 2021 LIST_REMOVE(pcg, pcg_list); 2022 pool_put(&pcgpool, pcg); 2023 } 2024 splx(s); 2025 } 2026 2027 /* 2028 * pool_cache_get{,_paddr}: 2029 * 2030 * Get an object from a pool cache (optionally returning 2031 * the physical address of the object). 2032 */ 2033 void * 2034 pool_cache_get_paddr(struct pool_cache *pc, int flags, paddr_t *pap) 2035 { 2036 struct pool_cache_group *pcg; 2037 void *object; 2038 2039 #ifdef LOCKDEBUG 2040 if (flags & PR_WAITOK) 2041 ASSERT_SLEEPABLE(NULL, "pool_cache_get(PR_WAITOK)"); 2042 #endif 2043 2044 simple_lock(&pc->pc_slock); 2045 2046 pcg = LIST_FIRST(&pc->pc_partgroups); 2047 if (pcg == NULL) { 2048 pcg = LIST_FIRST(&pc->pc_fullgroups); 2049 if (pcg != NULL) { 2050 LIST_REMOVE(pcg, pcg_list); 2051 LIST_INSERT_HEAD(&pc->pc_partgroups, pcg, pcg_list); 2052 } 2053 } 2054 if (pcg == NULL) { 2055 2056 /* 2057 * No groups with any available objects. Allocate 2058 * a new object, construct it, and return it to 2059 * the caller. We will allocate a group, if necessary, 2060 * when the object is freed back to the cache. 2061 */ 2062 pc->pc_misses++; 2063 simple_unlock(&pc->pc_slock); 2064 object = pool_get(pc->pc_pool, flags); 2065 if (object != NULL && pc->pc_ctor != NULL) { 2066 if ((*pc->pc_ctor)(pc->pc_arg, object, flags) != 0) { 2067 pool_put(pc->pc_pool, object); 2068 return (NULL); 2069 } 2070 } 2071 if (object != NULL && pap != NULL) { 2072 #ifdef POOL_VTOPHYS 2073 *pap = POOL_VTOPHYS(object); 2074 #else 2075 *pap = POOL_PADDR_INVALID; 2076 #endif 2077 } 2078 return (object); 2079 } 2080 2081 pc->pc_hits++; 2082 pc->pc_nitems--; 2083 object = pcg_get(pcg, pap); 2084 2085 if (pcg->pcg_avail == 0) { 2086 LIST_REMOVE(pcg, pcg_list); 2087 LIST_INSERT_HEAD(&pc->pc_emptygroups, pcg, pcg_list); 2088 } 2089 simple_unlock(&pc->pc_slock); 2090 2091 return (object); 2092 } 2093 2094 /* 2095 * pool_cache_put{,_paddr}: 2096 * 2097 * Put an object back to the pool cache (optionally caching the 2098 * physical address of the object). 2099 */ 2100 void 2101 pool_cache_put_paddr(struct pool_cache *pc, void *object, paddr_t pa) 2102 { 2103 struct pool_cache_group *pcg; 2104 int s; 2105 2106 if (__predict_false((pc->pc_pool->pr_flags & PR_WANTED) != 0)) { 2107 goto destruct; 2108 } 2109 2110 simple_lock(&pc->pc_slock); 2111 2112 pcg = LIST_FIRST(&pc->pc_partgroups); 2113 if (pcg == NULL) { 2114 pcg = LIST_FIRST(&pc->pc_emptygroups); 2115 if (pcg != NULL) { 2116 LIST_REMOVE(pcg, pcg_list); 2117 LIST_INSERT_HEAD(&pc->pc_partgroups, pcg, pcg_list); 2118 } 2119 } 2120 if (pcg == NULL) { 2121 2122 /* 2123 * No empty groups to free the object to. Attempt to 2124 * allocate one. 2125 */ 2126 simple_unlock(&pc->pc_slock); 2127 s = splvm(); 2128 pcg = pool_get(&pcgpool, PR_NOWAIT); 2129 splx(s); 2130 if (pcg == NULL) { 2131 destruct: 2132 2133 /* 2134 * Unable to allocate a cache group; destruct the object 2135 * and free it back to the pool. 2136 */ 2137 pool_cache_destruct_object(pc, object); 2138 return; 2139 } 2140 memset(pcg, 0, sizeof(*pcg)); 2141 simple_lock(&pc->pc_slock); 2142 pc->pc_ngroups++; 2143 LIST_INSERT_HEAD(&pc->pc_partgroups, pcg, pcg_list); 2144 } 2145 2146 pc->pc_nitems++; 2147 pcg_put(pcg, object, pa); 2148 2149 if (pcg->pcg_avail == PCG_NOBJECTS) { 2150 LIST_REMOVE(pcg, pcg_list); 2151 LIST_INSERT_HEAD(&pc->pc_fullgroups, pcg, pcg_list); 2152 } 2153 simple_unlock(&pc->pc_slock); 2154 } 2155 2156 /* 2157 * pool_cache_destruct_object: 2158 * 2159 * Force destruction of an object and its release back into 2160 * the pool. 2161 */ 2162 void 2163 pool_cache_destruct_object(struct pool_cache *pc, void *object) 2164 { 2165 2166 if (pc->pc_dtor != NULL) 2167 (*pc->pc_dtor)(pc->pc_arg, object); 2168 pool_put(pc->pc_pool, object); 2169 } 2170 2171 static void 2172 pool_do_cache_invalidate_grouplist(struct pool_cache_grouplist *pcgsl, 2173 struct pool_cache *pc, struct pool_pagelist *pq, 2174 struct pool_cache_grouplist *pcgdl) 2175 { 2176 struct pool_cache_group *pcg, *npcg; 2177 void *object; 2178 2179 for (pcg = LIST_FIRST(pcgsl); pcg != NULL; pcg = npcg) { 2180 npcg = LIST_NEXT(pcg, pcg_list); 2181 while (pcg->pcg_avail != 0) { 2182 pc->pc_nitems--; 2183 object = pcg_get(pcg, NULL); 2184 if (pc->pc_dtor != NULL) 2185 (*pc->pc_dtor)(pc->pc_arg, object); 2186 pool_do_put(pc->pc_pool, object, pq); 2187 } 2188 pc->pc_ngroups--; 2189 LIST_REMOVE(pcg, pcg_list); 2190 LIST_INSERT_HEAD(pcgdl, pcg, pcg_list); 2191 } 2192 } 2193 2194 static void 2195 pool_do_cache_invalidate(struct pool_cache *pc, struct pool_pagelist *pq, 2196 struct pool_cache_grouplist *pcgl) 2197 { 2198 2199 LOCK_ASSERT(simple_lock_held(&pc->pc_slock)); 2200 LOCK_ASSERT(simple_lock_held(&pc->pc_pool->pr_slock)); 2201 2202 pool_do_cache_invalidate_grouplist(&pc->pc_fullgroups, pc, pq, pcgl); 2203 pool_do_cache_invalidate_grouplist(&pc->pc_partgroups, pc, pq, pcgl); 2204 2205 KASSERT(LIST_EMPTY(&pc->pc_partgroups)); 2206 KASSERT(LIST_EMPTY(&pc->pc_fullgroups)); 2207 KASSERT(pc->pc_nitems == 0); 2208 } 2209 2210 /* 2211 * pool_cache_invalidate: 2212 * 2213 * Invalidate a pool cache (destruct and release all of the 2214 * cached objects). 2215 */ 2216 void 2217 pool_cache_invalidate(struct pool_cache *pc) 2218 { 2219 struct pool_pagelist pq; 2220 struct pool_cache_grouplist pcgl; 2221 2222 LIST_INIT(&pq); 2223 LIST_INIT(&pcgl); 2224 2225 simple_lock(&pc->pc_slock); 2226 simple_lock(&pc->pc_pool->pr_slock); 2227 2228 pool_do_cache_invalidate(pc, &pq, &pcgl); 2229 2230 simple_unlock(&pc->pc_pool->pr_slock); 2231 simple_unlock(&pc->pc_slock); 2232 2233 pr_pagelist_free(pc->pc_pool, &pq); 2234 pcg_grouplist_free(&pcgl); 2235 } 2236 2237 /* 2238 * pool_cache_reclaim: 2239 * 2240 * Reclaim a pool cache for pool_reclaim(). 2241 */ 2242 static void 2243 pool_cache_reclaim(struct pool_cache *pc, struct pool_pagelist *pq, 2244 struct pool_cache_grouplist *pcgl) 2245 { 2246 2247 /* 2248 * We're locking in the wrong order (normally pool_cache -> pool, 2249 * but the pool is already locked when we get here), so we have 2250 * to use trylock. If we can't lock the pool_cache, it's not really 2251 * a big deal here. 2252 */ 2253 if (simple_lock_try(&pc->pc_slock) == 0) 2254 return; 2255 2256 pool_do_cache_invalidate(pc, pq, pcgl); 2257 2258 simple_unlock(&pc->pc_slock); 2259 } 2260 2261 /* 2262 * Pool backend allocators. 2263 * 2264 * Each pool has a backend allocator that handles allocation, deallocation, 2265 * and any additional draining that might be needed. 2266 * 2267 * We provide two standard allocators: 2268 * 2269 * pool_allocator_kmem - the default when no allocator is specified 2270 * 2271 * pool_allocator_nointr - used for pools that will not be accessed 2272 * in interrupt context. 2273 */ 2274 void *pool_page_alloc(struct pool *, int); 2275 void pool_page_free(struct pool *, void *); 2276 2277 #ifdef POOL_SUBPAGE 2278 struct pool_allocator pool_allocator_kmem_fullpage = { 2279 pool_page_alloc, pool_page_free, 0, 2280 .pa_backingmapptr = &kmem_map, 2281 }; 2282 #else 2283 struct pool_allocator pool_allocator_kmem = { 2284 pool_page_alloc, pool_page_free, 0, 2285 .pa_backingmapptr = &kmem_map, 2286 }; 2287 #endif 2288 2289 void *pool_page_alloc_nointr(struct pool *, int); 2290 void pool_page_free_nointr(struct pool *, void *); 2291 2292 #ifdef POOL_SUBPAGE 2293 struct pool_allocator pool_allocator_nointr_fullpage = { 2294 pool_page_alloc_nointr, pool_page_free_nointr, 0, 2295 .pa_backingmapptr = &kernel_map, 2296 }; 2297 #else 2298 struct pool_allocator pool_allocator_nointr = { 2299 pool_page_alloc_nointr, pool_page_free_nointr, 0, 2300 .pa_backingmapptr = &kernel_map, 2301 }; 2302 #endif 2303 2304 #ifdef POOL_SUBPAGE 2305 void *pool_subpage_alloc(struct pool *, int); 2306 void pool_subpage_free(struct pool *, void *); 2307 2308 struct pool_allocator pool_allocator_kmem = { 2309 pool_subpage_alloc, pool_subpage_free, POOL_SUBPAGE, 2310 .pa_backingmapptr = &kmem_map, 2311 }; 2312 2313 void *pool_subpage_alloc_nointr(struct pool *, int); 2314 void pool_subpage_free_nointr(struct pool *, void *); 2315 2316 struct pool_allocator pool_allocator_nointr = { 2317 pool_subpage_alloc, pool_subpage_free, POOL_SUBPAGE, 2318 .pa_backingmapptr = &kmem_map, 2319 }; 2320 #endif /* POOL_SUBPAGE */ 2321 2322 static void * 2323 pool_allocator_alloc(struct pool *pp, int flags) 2324 { 2325 struct pool_allocator *pa = pp->pr_alloc; 2326 void *res; 2327 2328 LOCK_ASSERT(!simple_lock_held(&pp->pr_slock)); 2329 2330 res = (*pa->pa_alloc)(pp, flags); 2331 if (res == NULL && (flags & PR_WAITOK) == 0) { 2332 /* 2333 * We only run the drain hook here if PR_NOWAIT. 2334 * In other cases, the hook will be run in 2335 * pool_reclaim(). 2336 */ 2337 if (pp->pr_drain_hook != NULL) { 2338 (*pp->pr_drain_hook)(pp->pr_drain_hook_arg, flags); 2339 res = (*pa->pa_alloc)(pp, flags); 2340 } 2341 } 2342 return res; 2343 } 2344 2345 static void 2346 pool_allocator_free(struct pool *pp, void *v) 2347 { 2348 struct pool_allocator *pa = pp->pr_alloc; 2349 2350 LOCK_ASSERT(!simple_lock_held(&pp->pr_slock)); 2351 2352 (*pa->pa_free)(pp, v); 2353 } 2354 2355 void * 2356 pool_page_alloc(struct pool *pp, int flags) 2357 { 2358 boolean_t waitok = (flags & PR_WAITOK) ? TRUE : FALSE; 2359 2360 return ((void *) uvm_km_alloc_poolpage_cache(kmem_map, waitok)); 2361 } 2362 2363 void 2364 pool_page_free(struct pool *pp, void *v) 2365 { 2366 2367 uvm_km_free_poolpage_cache(kmem_map, (vaddr_t) v); 2368 } 2369 2370 static void * 2371 pool_page_alloc_meta(struct pool *pp, int flags) 2372 { 2373 boolean_t waitok = (flags & PR_WAITOK) ? TRUE : FALSE; 2374 2375 return ((void *) uvm_km_alloc_poolpage(kmem_map, waitok)); 2376 } 2377 2378 static void 2379 pool_page_free_meta(struct pool *pp, void *v) 2380 { 2381 2382 uvm_km_free_poolpage(kmem_map, (vaddr_t) v); 2383 } 2384 2385 #ifdef POOL_SUBPAGE 2386 /* Sub-page allocator, for machines with large hardware pages. */ 2387 void * 2388 pool_subpage_alloc(struct pool *pp, int flags) 2389 { 2390 void *v; 2391 int s; 2392 s = splvm(); 2393 v = pool_get(&psppool, flags); 2394 splx(s); 2395 return v; 2396 } 2397 2398 void 2399 pool_subpage_free(struct pool *pp, void *v) 2400 { 2401 int s; 2402 s = splvm(); 2403 pool_put(&psppool, v); 2404 splx(s); 2405 } 2406 2407 /* We don't provide a real nointr allocator. Maybe later. */ 2408 void * 2409 pool_subpage_alloc_nointr(struct pool *pp, int flags) 2410 { 2411 2412 return (pool_subpage_alloc(pp, flags)); 2413 } 2414 2415 void 2416 pool_subpage_free_nointr(struct pool *pp, void *v) 2417 { 2418 2419 pool_subpage_free(pp, v); 2420 } 2421 #endif /* POOL_SUBPAGE */ 2422 void * 2423 pool_page_alloc_nointr(struct pool *pp, int flags) 2424 { 2425 boolean_t waitok = (flags & PR_WAITOK) ? TRUE : FALSE; 2426 2427 return ((void *) uvm_km_alloc_poolpage_cache(kernel_map, waitok)); 2428 } 2429 2430 void 2431 pool_page_free_nointr(struct pool *pp, void *v) 2432 { 2433 2434 uvm_km_free_poolpage_cache(kernel_map, (vaddr_t) v); 2435 } 2436