1 /* $NetBSD: subr_pool.c,v 1.131 2007/08/18 00:37:14 ad Exp $ */ 2 3 /*- 4 * Copyright (c) 1997, 1999, 2000, 2002 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Paul Kranenburg; by Jason R. Thorpe of the Numerical Aerospace 9 * Simulation Facility, NASA Ames Research Center. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. All advertising materials mentioning features or use of this software 20 * must display the following acknowledgement: 21 * This product includes software developed by the NetBSD 22 * Foundation, Inc. and its contributors. 23 * 4. Neither the name of The NetBSD Foundation nor the names of its 24 * contributors may be used to endorse or promote products derived 25 * from this software without specific prior written permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 28 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 29 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 30 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 31 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 32 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 33 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 34 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 35 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 36 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 37 * POSSIBILITY OF SUCH DAMAGE. 38 */ 39 40 #include <sys/cdefs.h> 41 __KERNEL_RCSID(0, "$NetBSD: subr_pool.c,v 1.131 2007/08/18 00:37:14 ad Exp $"); 42 43 #include "opt_pool.h" 44 #include "opt_poollog.h" 45 #include "opt_lockdebug.h" 46 47 #include <sys/param.h> 48 #include <sys/systm.h> 49 #include <sys/proc.h> 50 #include <sys/errno.h> 51 #include <sys/kernel.h> 52 #include <sys/malloc.h> 53 #include <sys/lock.h> 54 #include <sys/pool.h> 55 #include <sys/syslog.h> 56 #include <sys/debug.h> 57 58 #include <uvm/uvm.h> 59 60 /* 61 * Pool resource management utility. 62 * 63 * Memory is allocated in pages which are split into pieces according to 64 * the pool item size. Each page is kept on one of three lists in the 65 * pool structure: `pr_emptypages', `pr_fullpages' and `pr_partpages', 66 * for empty, full and partially-full pages respectively. The individual 67 * pool items are on a linked list headed by `ph_itemlist' in each page 68 * header. The memory for building the page list is either taken from 69 * the allocated pages themselves (for small pool items) or taken from 70 * an internal pool of page headers (`phpool'). 71 */ 72 73 /* List of all pools */ 74 LIST_HEAD(,pool) pool_head = LIST_HEAD_INITIALIZER(pool_head); 75 76 /* Private pool for page header structures */ 77 #define PHPOOL_MAX 8 78 static struct pool phpool[PHPOOL_MAX]; 79 #define PHPOOL_FREELIST_NELEM(idx) (((idx) == 0) ? 0 : (1 << (idx))) 80 81 #ifdef POOL_SUBPAGE 82 /* Pool of subpages for use by normal pools. */ 83 static struct pool psppool; 84 #endif 85 86 static SLIST_HEAD(, pool_allocator) pa_deferinitq = 87 SLIST_HEAD_INITIALIZER(pa_deferinitq); 88 89 static void *pool_page_alloc_meta(struct pool *, int); 90 static void pool_page_free_meta(struct pool *, void *); 91 92 /* allocator for pool metadata */ 93 static struct pool_allocator pool_allocator_meta = { 94 pool_page_alloc_meta, pool_page_free_meta, 95 .pa_backingmapptr = &kmem_map, 96 }; 97 98 /* # of seconds to retain page after last use */ 99 int pool_inactive_time = 10; 100 101 /* Next candidate for drainage (see pool_drain()) */ 102 static struct pool *drainpp; 103 104 /* This spin lock protects both pool_head and drainpp. */ 105 struct simplelock pool_head_slock = SIMPLELOCK_INITIALIZER; 106 107 typedef uint8_t pool_item_freelist_t; 108 109 struct pool_item_header { 110 /* Page headers */ 111 LIST_ENTRY(pool_item_header) 112 ph_pagelist; /* pool page list */ 113 SPLAY_ENTRY(pool_item_header) 114 ph_node; /* Off-page page headers */ 115 void * ph_page; /* this page's address */ 116 struct timeval ph_time; /* last referenced */ 117 union { 118 /* !PR_NOTOUCH */ 119 struct { 120 LIST_HEAD(, pool_item) 121 phu_itemlist; /* chunk list for this page */ 122 } phu_normal; 123 /* PR_NOTOUCH */ 124 struct { 125 uint16_t 126 phu_off; /* start offset in page */ 127 pool_item_freelist_t 128 phu_firstfree; /* first free item */ 129 /* 130 * XXX it might be better to use 131 * a simple bitmap and ffs(3) 132 */ 133 } phu_notouch; 134 } ph_u; 135 uint16_t ph_nmissing; /* # of chunks in use */ 136 }; 137 #define ph_itemlist ph_u.phu_normal.phu_itemlist 138 #define ph_off ph_u.phu_notouch.phu_off 139 #define ph_firstfree ph_u.phu_notouch.phu_firstfree 140 141 struct pool_item { 142 #ifdef DIAGNOSTIC 143 u_int pi_magic; 144 #endif 145 #define PI_MAGIC 0xdeadbeefU 146 /* Other entries use only this list entry */ 147 LIST_ENTRY(pool_item) pi_list; 148 }; 149 150 #define POOL_NEEDS_CATCHUP(pp) \ 151 ((pp)->pr_nitems < (pp)->pr_minitems) 152 153 /* 154 * Pool cache management. 155 * 156 * Pool caches provide a way for constructed objects to be cached by the 157 * pool subsystem. This can lead to performance improvements by avoiding 158 * needless object construction/destruction; it is deferred until absolutely 159 * necessary. 160 * 161 * Caches are grouped into cache groups. Each cache group references 162 * up to 16 constructed objects. When a cache allocates an object 163 * from the pool, it calls the object's constructor and places it into 164 * a cache group. When a cache group frees an object back to the pool, 165 * it first calls the object's destructor. This allows the object to 166 * persist in constructed form while freed to the cache. 167 * 168 * Multiple caches may exist for each pool. This allows a single 169 * object type to have multiple constructed forms. The pool references 170 * each cache, so that when a pool is drained by the pagedaemon, it can 171 * drain each individual cache as well. Each time a cache is drained, 172 * the most idle cache group is freed to the pool in its entirety. 173 * 174 * Pool caches are layed on top of pools. By layering them, we can avoid 175 * the complexity of cache management for pools which would not benefit 176 * from it. 177 */ 178 179 /* The cache group pool. */ 180 static struct pool pcgpool; 181 182 static void pool_cache_reclaim(struct pool_cache *, struct pool_pagelist *, 183 struct pool_cache_grouplist *); 184 static void pcg_grouplist_free(struct pool_cache_grouplist *); 185 186 static int pool_catchup(struct pool *); 187 static void pool_prime_page(struct pool *, void *, 188 struct pool_item_header *); 189 static void pool_update_curpage(struct pool *); 190 191 static int pool_grow(struct pool *, int); 192 static void *pool_allocator_alloc(struct pool *, int); 193 static void pool_allocator_free(struct pool *, void *); 194 195 static void pool_print_pagelist(struct pool *, struct pool_pagelist *, 196 void (*)(const char *, ...)); 197 static void pool_print1(struct pool *, const char *, 198 void (*)(const char *, ...)); 199 200 static int pool_chk_page(struct pool *, const char *, 201 struct pool_item_header *); 202 203 /* 204 * Pool log entry. An array of these is allocated in pool_init(). 205 */ 206 struct pool_log { 207 const char *pl_file; 208 long pl_line; 209 int pl_action; 210 #define PRLOG_GET 1 211 #define PRLOG_PUT 2 212 void *pl_addr; 213 }; 214 215 #ifdef POOL_DIAGNOSTIC 216 /* Number of entries in pool log buffers */ 217 #ifndef POOL_LOGSIZE 218 #define POOL_LOGSIZE 10 219 #endif 220 221 int pool_logsize = POOL_LOGSIZE; 222 223 static inline void 224 pr_log(struct pool *pp, void *v, int action, const char *file, long line) 225 { 226 int n = pp->pr_curlogentry; 227 struct pool_log *pl; 228 229 if ((pp->pr_roflags & PR_LOGGING) == 0) 230 return; 231 232 /* 233 * Fill in the current entry. Wrap around and overwrite 234 * the oldest entry if necessary. 235 */ 236 pl = &pp->pr_log[n]; 237 pl->pl_file = file; 238 pl->pl_line = line; 239 pl->pl_action = action; 240 pl->pl_addr = v; 241 if (++n >= pp->pr_logsize) 242 n = 0; 243 pp->pr_curlogentry = n; 244 } 245 246 static void 247 pr_printlog(struct pool *pp, struct pool_item *pi, 248 void (*pr)(const char *, ...)) 249 { 250 int i = pp->pr_logsize; 251 int n = pp->pr_curlogentry; 252 253 if ((pp->pr_roflags & PR_LOGGING) == 0) 254 return; 255 256 /* 257 * Print all entries in this pool's log. 258 */ 259 while (i-- > 0) { 260 struct pool_log *pl = &pp->pr_log[n]; 261 if (pl->pl_action != 0) { 262 if (pi == NULL || pi == pl->pl_addr) { 263 (*pr)("\tlog entry %d:\n", i); 264 (*pr)("\t\taction = %s, addr = %p\n", 265 pl->pl_action == PRLOG_GET ? "get" : "put", 266 pl->pl_addr); 267 (*pr)("\t\tfile: %s at line %lu\n", 268 pl->pl_file, pl->pl_line); 269 } 270 } 271 if (++n >= pp->pr_logsize) 272 n = 0; 273 } 274 } 275 276 static inline void 277 pr_enter(struct pool *pp, const char *file, long line) 278 { 279 280 if (__predict_false(pp->pr_entered_file != NULL)) { 281 printf("pool %s: reentrancy at file %s line %ld\n", 282 pp->pr_wchan, file, line); 283 printf(" previous entry at file %s line %ld\n", 284 pp->pr_entered_file, pp->pr_entered_line); 285 panic("pr_enter"); 286 } 287 288 pp->pr_entered_file = file; 289 pp->pr_entered_line = line; 290 } 291 292 static inline void 293 pr_leave(struct pool *pp) 294 { 295 296 if (__predict_false(pp->pr_entered_file == NULL)) { 297 printf("pool %s not entered?\n", pp->pr_wchan); 298 panic("pr_leave"); 299 } 300 301 pp->pr_entered_file = NULL; 302 pp->pr_entered_line = 0; 303 } 304 305 static inline void 306 pr_enter_check(struct pool *pp, void (*pr)(const char *, ...)) 307 { 308 309 if (pp->pr_entered_file != NULL) 310 (*pr)("\n\tcurrently entered from file %s line %ld\n", 311 pp->pr_entered_file, pp->pr_entered_line); 312 } 313 #else 314 #define pr_log(pp, v, action, file, line) 315 #define pr_printlog(pp, pi, pr) 316 #define pr_enter(pp, file, line) 317 #define pr_leave(pp) 318 #define pr_enter_check(pp, pr) 319 #endif /* POOL_DIAGNOSTIC */ 320 321 static inline int 322 pr_item_notouch_index(const struct pool *pp, const struct pool_item_header *ph, 323 const void *v) 324 { 325 const char *cp = v; 326 int idx; 327 328 KASSERT(pp->pr_roflags & PR_NOTOUCH); 329 idx = (cp - (char *)ph->ph_page - ph->ph_off) / pp->pr_size; 330 KASSERT(idx < pp->pr_itemsperpage); 331 return idx; 332 } 333 334 #define PR_FREELIST_ALIGN(p) \ 335 roundup((uintptr_t)(p), sizeof(pool_item_freelist_t)) 336 #define PR_FREELIST(ph) ((pool_item_freelist_t *)PR_FREELIST_ALIGN((ph) + 1)) 337 #define PR_INDEX_USED ((pool_item_freelist_t)-1) 338 #define PR_INDEX_EOL ((pool_item_freelist_t)-2) 339 340 static inline void 341 pr_item_notouch_put(const struct pool *pp, struct pool_item_header *ph, 342 void *obj) 343 { 344 int idx = pr_item_notouch_index(pp, ph, obj); 345 pool_item_freelist_t *freelist = PR_FREELIST(ph); 346 347 KASSERT(freelist[idx] == PR_INDEX_USED); 348 freelist[idx] = ph->ph_firstfree; 349 ph->ph_firstfree = idx; 350 } 351 352 static inline void * 353 pr_item_notouch_get(const struct pool *pp, struct pool_item_header *ph) 354 { 355 int idx = ph->ph_firstfree; 356 pool_item_freelist_t *freelist = PR_FREELIST(ph); 357 358 KASSERT(freelist[idx] != PR_INDEX_USED); 359 ph->ph_firstfree = freelist[idx]; 360 freelist[idx] = PR_INDEX_USED; 361 362 return (char *)ph->ph_page + ph->ph_off + idx * pp->pr_size; 363 } 364 365 static inline int 366 phtree_compare(struct pool_item_header *a, struct pool_item_header *b) 367 { 368 369 /* 370 * we consider pool_item_header with smaller ph_page bigger. 371 * (this unnatural ordering is for the benefit of pr_find_pagehead.) 372 */ 373 374 if (a->ph_page < b->ph_page) 375 return (1); 376 else if (a->ph_page > b->ph_page) 377 return (-1); 378 else 379 return (0); 380 } 381 382 SPLAY_PROTOTYPE(phtree, pool_item_header, ph_node, phtree_compare); 383 SPLAY_GENERATE(phtree, pool_item_header, ph_node, phtree_compare); 384 385 /* 386 * Return the pool page header based on item address. 387 */ 388 static inline struct pool_item_header * 389 pr_find_pagehead(struct pool *pp, void *v) 390 { 391 struct pool_item_header *ph, tmp; 392 393 if ((pp->pr_roflags & PR_NOALIGN) != 0) { 394 tmp.ph_page = (void *)(uintptr_t)v; 395 ph = SPLAY_FIND(phtree, &pp->pr_phtree, &tmp); 396 if (ph == NULL) { 397 ph = SPLAY_ROOT(&pp->pr_phtree); 398 if (ph != NULL && phtree_compare(&tmp, ph) >= 0) { 399 ph = SPLAY_NEXT(phtree, &pp->pr_phtree, ph); 400 } 401 KASSERT(ph == NULL || phtree_compare(&tmp, ph) < 0); 402 } 403 } else { 404 void *page = 405 (void *)((uintptr_t)v & pp->pr_alloc->pa_pagemask); 406 407 if ((pp->pr_roflags & PR_PHINPAGE) != 0) { 408 ph = (struct pool_item_header *)((char *)page + pp->pr_phoffset); 409 } else { 410 tmp.ph_page = page; 411 ph = SPLAY_FIND(phtree, &pp->pr_phtree, &tmp); 412 } 413 } 414 415 KASSERT(ph == NULL || ((pp->pr_roflags & PR_PHINPAGE) != 0) || 416 ((char *)ph->ph_page <= (char *)v && 417 (char *)v < (char *)ph->ph_page + pp->pr_alloc->pa_pagesz)); 418 return ph; 419 } 420 421 static void 422 pr_pagelist_free(struct pool *pp, struct pool_pagelist *pq) 423 { 424 struct pool_item_header *ph; 425 int s; 426 427 while ((ph = LIST_FIRST(pq)) != NULL) { 428 LIST_REMOVE(ph, ph_pagelist); 429 pool_allocator_free(pp, ph->ph_page); 430 if ((pp->pr_roflags & PR_PHINPAGE) == 0) { 431 s = splvm(); 432 pool_put(pp->pr_phpool, ph); 433 splx(s); 434 } 435 } 436 } 437 438 /* 439 * Remove a page from the pool. 440 */ 441 static inline void 442 pr_rmpage(struct pool *pp, struct pool_item_header *ph, 443 struct pool_pagelist *pq) 444 { 445 446 LOCK_ASSERT(simple_lock_held(&pp->pr_slock)); 447 448 /* 449 * If the page was idle, decrement the idle page count. 450 */ 451 if (ph->ph_nmissing == 0) { 452 #ifdef DIAGNOSTIC 453 if (pp->pr_nidle == 0) 454 panic("pr_rmpage: nidle inconsistent"); 455 if (pp->pr_nitems < pp->pr_itemsperpage) 456 panic("pr_rmpage: nitems inconsistent"); 457 #endif 458 pp->pr_nidle--; 459 } 460 461 pp->pr_nitems -= pp->pr_itemsperpage; 462 463 /* 464 * Unlink the page from the pool and queue it for release. 465 */ 466 LIST_REMOVE(ph, ph_pagelist); 467 if ((pp->pr_roflags & PR_PHINPAGE) == 0) 468 SPLAY_REMOVE(phtree, &pp->pr_phtree, ph); 469 LIST_INSERT_HEAD(pq, ph, ph_pagelist); 470 471 pp->pr_npages--; 472 pp->pr_npagefree++; 473 474 pool_update_curpage(pp); 475 } 476 477 static bool 478 pa_starved_p(struct pool_allocator *pa) 479 { 480 481 if (pa->pa_backingmap != NULL) { 482 return vm_map_starved_p(pa->pa_backingmap); 483 } 484 return false; 485 } 486 487 static int 488 pool_reclaim_callback(struct callback_entry *ce, void *obj, void *arg) 489 { 490 struct pool *pp = obj; 491 struct pool_allocator *pa = pp->pr_alloc; 492 493 KASSERT(&pp->pr_reclaimerentry == ce); 494 pool_reclaim(pp); 495 if (!pa_starved_p(pa)) { 496 return CALLBACK_CHAIN_ABORT; 497 } 498 return CALLBACK_CHAIN_CONTINUE; 499 } 500 501 static void 502 pool_reclaim_register(struct pool *pp) 503 { 504 struct vm_map *map = pp->pr_alloc->pa_backingmap; 505 int s; 506 507 if (map == NULL) { 508 return; 509 } 510 511 s = splvm(); /* not necessary for INTRSAFE maps, but don't care. */ 512 callback_register(&vm_map_to_kernel(map)->vmk_reclaim_callback, 513 &pp->pr_reclaimerentry, pp, pool_reclaim_callback); 514 splx(s); 515 } 516 517 static void 518 pool_reclaim_unregister(struct pool *pp) 519 { 520 struct vm_map *map = pp->pr_alloc->pa_backingmap; 521 int s; 522 523 if (map == NULL) { 524 return; 525 } 526 527 s = splvm(); /* not necessary for INTRSAFE maps, but don't care. */ 528 callback_unregister(&vm_map_to_kernel(map)->vmk_reclaim_callback, 529 &pp->pr_reclaimerentry); 530 splx(s); 531 } 532 533 static void 534 pa_reclaim_register(struct pool_allocator *pa) 535 { 536 struct vm_map *map = *pa->pa_backingmapptr; 537 struct pool *pp; 538 539 KASSERT(pa->pa_backingmap == NULL); 540 if (map == NULL) { 541 SLIST_INSERT_HEAD(&pa_deferinitq, pa, pa_q); 542 return; 543 } 544 pa->pa_backingmap = map; 545 TAILQ_FOREACH(pp, &pa->pa_list, pr_alloc_list) { 546 pool_reclaim_register(pp); 547 } 548 } 549 550 /* 551 * Initialize all the pools listed in the "pools" link set. 552 */ 553 void 554 pool_subsystem_init(void) 555 { 556 struct pool_allocator *pa; 557 __link_set_decl(pools, struct link_pool_init); 558 struct link_pool_init * const *pi; 559 560 __link_set_foreach(pi, pools) 561 pool_init((*pi)->pp, (*pi)->size, (*pi)->align, 562 (*pi)->align_offset, (*pi)->flags, (*pi)->wchan, 563 (*pi)->palloc, (*pi)->ipl); 564 565 while ((pa = SLIST_FIRST(&pa_deferinitq)) != NULL) { 566 KASSERT(pa->pa_backingmapptr != NULL); 567 KASSERT(*pa->pa_backingmapptr != NULL); 568 SLIST_REMOVE_HEAD(&pa_deferinitq, pa_q); 569 pa_reclaim_register(pa); 570 } 571 } 572 573 /* 574 * Initialize the given pool resource structure. 575 * 576 * We export this routine to allow other kernel parts to declare 577 * static pools that must be initialized before malloc() is available. 578 */ 579 void 580 pool_init(struct pool *pp, size_t size, u_int align, u_int ioff, int flags, 581 const char *wchan, struct pool_allocator *palloc, int ipl) 582 { 583 #ifdef DEBUG 584 struct pool *pp1; 585 #endif 586 size_t trysize, phsize; 587 int off, slack, s; 588 589 KASSERT((1UL << (CHAR_BIT * sizeof(pool_item_freelist_t))) - 2 >= 590 PHPOOL_FREELIST_NELEM(PHPOOL_MAX - 1)); 591 592 #ifdef DEBUG 593 /* 594 * Check that the pool hasn't already been initialised and 595 * added to the list of all pools. 596 */ 597 LIST_FOREACH(pp1, &pool_head, pr_poollist) { 598 if (pp == pp1) 599 panic("pool_init: pool %s already initialised", 600 wchan); 601 } 602 #endif 603 604 #ifdef POOL_DIAGNOSTIC 605 /* 606 * Always log if POOL_DIAGNOSTIC is defined. 607 */ 608 if (pool_logsize != 0) 609 flags |= PR_LOGGING; 610 #endif 611 612 if (palloc == NULL) 613 palloc = &pool_allocator_kmem; 614 #ifdef POOL_SUBPAGE 615 if (size > palloc->pa_pagesz) { 616 if (palloc == &pool_allocator_kmem) 617 palloc = &pool_allocator_kmem_fullpage; 618 else if (palloc == &pool_allocator_nointr) 619 palloc = &pool_allocator_nointr_fullpage; 620 } 621 #endif /* POOL_SUBPAGE */ 622 if ((palloc->pa_flags & PA_INITIALIZED) == 0) { 623 if (palloc->pa_pagesz == 0) 624 palloc->pa_pagesz = PAGE_SIZE; 625 626 TAILQ_INIT(&palloc->pa_list); 627 628 simple_lock_init(&palloc->pa_slock); 629 palloc->pa_pagemask = ~(palloc->pa_pagesz - 1); 630 palloc->pa_pageshift = ffs(palloc->pa_pagesz) - 1; 631 632 if (palloc->pa_backingmapptr != NULL) { 633 pa_reclaim_register(palloc); 634 } 635 palloc->pa_flags |= PA_INITIALIZED; 636 } 637 638 if (align == 0) 639 align = ALIGN(1); 640 641 if ((flags & PR_NOTOUCH) == 0 && size < sizeof(struct pool_item)) 642 size = sizeof(struct pool_item); 643 644 size = roundup(size, align); 645 #ifdef DIAGNOSTIC 646 if (size > palloc->pa_pagesz) 647 panic("pool_init: pool item size (%zu) too large", size); 648 #endif 649 650 /* 651 * Initialize the pool structure. 652 */ 653 LIST_INIT(&pp->pr_emptypages); 654 LIST_INIT(&pp->pr_fullpages); 655 LIST_INIT(&pp->pr_partpages); 656 LIST_INIT(&pp->pr_cachelist); 657 pp->pr_curpage = NULL; 658 pp->pr_npages = 0; 659 pp->pr_minitems = 0; 660 pp->pr_minpages = 0; 661 pp->pr_maxpages = UINT_MAX; 662 pp->pr_roflags = flags; 663 pp->pr_flags = 0; 664 pp->pr_size = size; 665 pp->pr_align = align; 666 pp->pr_wchan = wchan; 667 pp->pr_alloc = palloc; 668 pp->pr_nitems = 0; 669 pp->pr_nout = 0; 670 pp->pr_hardlimit = UINT_MAX; 671 pp->pr_hardlimit_warning = NULL; 672 pp->pr_hardlimit_ratecap.tv_sec = 0; 673 pp->pr_hardlimit_ratecap.tv_usec = 0; 674 pp->pr_hardlimit_warning_last.tv_sec = 0; 675 pp->pr_hardlimit_warning_last.tv_usec = 0; 676 pp->pr_drain_hook = NULL; 677 pp->pr_drain_hook_arg = NULL; 678 pp->pr_freecheck = NULL; 679 680 /* 681 * Decide whether to put the page header off page to avoid 682 * wasting too large a part of the page or too big item. 683 * Off-page page headers go on a hash table, so we can match 684 * a returned item with its header based on the page address. 685 * We use 1/16 of the page size and about 8 times of the item 686 * size as the threshold (XXX: tune) 687 * 688 * However, we'll put the header into the page if we can put 689 * it without wasting any items. 690 * 691 * Silently enforce `0 <= ioff < align'. 692 */ 693 pp->pr_itemoffset = ioff %= align; 694 /* See the comment below about reserved bytes. */ 695 trysize = palloc->pa_pagesz - ((align - ioff) % align); 696 phsize = ALIGN(sizeof(struct pool_item_header)); 697 if ((pp->pr_roflags & (PR_NOTOUCH | PR_NOALIGN)) == 0 && 698 (pp->pr_size < MIN(palloc->pa_pagesz / 16, phsize << 3) || 699 trysize / pp->pr_size == (trysize - phsize) / pp->pr_size)) { 700 /* Use the end of the page for the page header */ 701 pp->pr_roflags |= PR_PHINPAGE; 702 pp->pr_phoffset = off = palloc->pa_pagesz - phsize; 703 } else { 704 /* The page header will be taken from our page header pool */ 705 pp->pr_phoffset = 0; 706 off = palloc->pa_pagesz; 707 SPLAY_INIT(&pp->pr_phtree); 708 } 709 710 /* 711 * Alignment is to take place at `ioff' within the item. This means 712 * we must reserve up to `align - 1' bytes on the page to allow 713 * appropriate positioning of each item. 714 */ 715 pp->pr_itemsperpage = (off - ((align - ioff) % align)) / pp->pr_size; 716 KASSERT(pp->pr_itemsperpage != 0); 717 if ((pp->pr_roflags & PR_NOTOUCH)) { 718 int idx; 719 720 for (idx = 0; pp->pr_itemsperpage > PHPOOL_FREELIST_NELEM(idx); 721 idx++) { 722 /* nothing */ 723 } 724 if (idx >= PHPOOL_MAX) { 725 /* 726 * if you see this panic, consider to tweak 727 * PHPOOL_MAX and PHPOOL_FREELIST_NELEM. 728 */ 729 panic("%s: too large itemsperpage(%d) for PR_NOTOUCH", 730 pp->pr_wchan, pp->pr_itemsperpage); 731 } 732 pp->pr_phpool = &phpool[idx]; 733 } else if ((pp->pr_roflags & PR_PHINPAGE) == 0) { 734 pp->pr_phpool = &phpool[0]; 735 } 736 #if defined(DIAGNOSTIC) 737 else { 738 pp->pr_phpool = NULL; 739 } 740 #endif 741 742 /* 743 * Use the slack between the chunks and the page header 744 * for "cache coloring". 745 */ 746 slack = off - pp->pr_itemsperpage * pp->pr_size; 747 pp->pr_maxcolor = (slack / align) * align; 748 pp->pr_curcolor = 0; 749 750 pp->pr_nget = 0; 751 pp->pr_nfail = 0; 752 pp->pr_nput = 0; 753 pp->pr_npagealloc = 0; 754 pp->pr_npagefree = 0; 755 pp->pr_hiwat = 0; 756 pp->pr_nidle = 0; 757 758 #ifdef POOL_DIAGNOSTIC 759 if (flags & PR_LOGGING) { 760 if (kmem_map == NULL || 761 (pp->pr_log = malloc(pool_logsize * sizeof(struct pool_log), 762 M_TEMP, M_NOWAIT)) == NULL) 763 pp->pr_roflags &= ~PR_LOGGING; 764 pp->pr_curlogentry = 0; 765 pp->pr_logsize = pool_logsize; 766 } 767 #endif 768 769 pp->pr_entered_file = NULL; 770 pp->pr_entered_line = 0; 771 772 simple_lock_init(&pp->pr_slock); 773 774 /* 775 * Initialize private page header pool and cache magazine pool if we 776 * haven't done so yet. 777 * XXX LOCKING. 778 */ 779 if (phpool[0].pr_size == 0) { 780 int idx; 781 for (idx = 0; idx < PHPOOL_MAX; idx++) { 782 static char phpool_names[PHPOOL_MAX][6+1+6+1]; 783 int nelem; 784 size_t sz; 785 786 nelem = PHPOOL_FREELIST_NELEM(idx); 787 snprintf(phpool_names[idx], sizeof(phpool_names[idx]), 788 "phpool-%d", nelem); 789 sz = sizeof(struct pool_item_header); 790 if (nelem) { 791 sz = PR_FREELIST_ALIGN(sz) 792 + nelem * sizeof(pool_item_freelist_t); 793 } 794 pool_init(&phpool[idx], sz, 0, 0, 0, 795 phpool_names[idx], &pool_allocator_meta, IPL_VM); 796 } 797 #ifdef POOL_SUBPAGE 798 pool_init(&psppool, POOL_SUBPAGE, POOL_SUBPAGE, 0, 799 PR_RECURSIVE, "psppool", &pool_allocator_meta, IPL_VM); 800 #endif 801 pool_init(&pcgpool, sizeof(struct pool_cache_group), 0, 0, 802 0, "pcgpool", &pool_allocator_meta, IPL_VM); 803 } 804 805 /* Insert into the list of all pools. */ 806 simple_lock(&pool_head_slock); 807 LIST_INSERT_HEAD(&pool_head, pp, pr_poollist); 808 simple_unlock(&pool_head_slock); 809 810 /* Insert this into the list of pools using this allocator. */ 811 s = splvm(); 812 simple_lock(&palloc->pa_slock); 813 TAILQ_INSERT_TAIL(&palloc->pa_list, pp, pr_alloc_list); 814 simple_unlock(&palloc->pa_slock); 815 splx(s); 816 pool_reclaim_register(pp); 817 } 818 819 /* 820 * De-commision a pool resource. 821 */ 822 void 823 pool_destroy(struct pool *pp) 824 { 825 struct pool_pagelist pq; 826 struct pool_item_header *ph; 827 int s; 828 829 /* Remove from global pool list */ 830 simple_lock(&pool_head_slock); 831 LIST_REMOVE(pp, pr_poollist); 832 if (drainpp == pp) 833 drainpp = NULL; 834 simple_unlock(&pool_head_slock); 835 836 /* Remove this pool from its allocator's list of pools. */ 837 pool_reclaim_unregister(pp); 838 s = splvm(); 839 simple_lock(&pp->pr_alloc->pa_slock); 840 TAILQ_REMOVE(&pp->pr_alloc->pa_list, pp, pr_alloc_list); 841 simple_unlock(&pp->pr_alloc->pa_slock); 842 splx(s); 843 844 s = splvm(); 845 simple_lock(&pp->pr_slock); 846 847 KASSERT(LIST_EMPTY(&pp->pr_cachelist)); 848 849 #ifdef DIAGNOSTIC 850 if (pp->pr_nout != 0) { 851 pr_printlog(pp, NULL, printf); 852 panic("pool_destroy: pool busy: still out: %u", 853 pp->pr_nout); 854 } 855 #endif 856 857 KASSERT(LIST_EMPTY(&pp->pr_fullpages)); 858 KASSERT(LIST_EMPTY(&pp->pr_partpages)); 859 860 /* Remove all pages */ 861 LIST_INIT(&pq); 862 while ((ph = LIST_FIRST(&pp->pr_emptypages)) != NULL) 863 pr_rmpage(pp, ph, &pq); 864 865 simple_unlock(&pp->pr_slock); 866 splx(s); 867 868 pr_pagelist_free(pp, &pq); 869 870 #ifdef POOL_DIAGNOSTIC 871 if ((pp->pr_roflags & PR_LOGGING) != 0) 872 free(pp->pr_log, M_TEMP); 873 #endif 874 } 875 876 void 877 pool_set_drain_hook(struct pool *pp, void (*fn)(void *, int), void *arg) 878 { 879 880 /* XXX no locking -- must be used just after pool_init() */ 881 #ifdef DIAGNOSTIC 882 if (pp->pr_drain_hook != NULL) 883 panic("pool_set_drain_hook(%s): already set", pp->pr_wchan); 884 #endif 885 pp->pr_drain_hook = fn; 886 pp->pr_drain_hook_arg = arg; 887 } 888 889 static struct pool_item_header * 890 pool_alloc_item_header(struct pool *pp, void *storage, int flags) 891 { 892 struct pool_item_header *ph; 893 int s; 894 895 LOCK_ASSERT(simple_lock_held(&pp->pr_slock) == 0); 896 897 if ((pp->pr_roflags & PR_PHINPAGE) != 0) 898 ph = (struct pool_item_header *) ((char *)storage + pp->pr_phoffset); 899 else { 900 s = splvm(); 901 ph = pool_get(pp->pr_phpool, flags); 902 splx(s); 903 } 904 905 return (ph); 906 } 907 908 /* 909 * Grab an item from the pool; must be called at appropriate spl level 910 */ 911 void * 912 #ifdef POOL_DIAGNOSTIC 913 _pool_get(struct pool *pp, int flags, const char *file, long line) 914 #else 915 pool_get(struct pool *pp, int flags) 916 #endif 917 { 918 struct pool_item *pi; 919 struct pool_item_header *ph; 920 void *v; 921 922 #ifdef DIAGNOSTIC 923 if (__predict_false(pp->pr_itemsperpage == 0)) 924 panic("pool_get: pool %p: pr_itemsperpage is zero, " 925 "pool not initialized?", pp); 926 if (__predict_false(curlwp == NULL && doing_shutdown == 0 && 927 (flags & PR_WAITOK) != 0)) 928 panic("pool_get: %s: must have NOWAIT", pp->pr_wchan); 929 930 #endif /* DIAGNOSTIC */ 931 #ifdef LOCKDEBUG 932 if (flags & PR_WAITOK) 933 ASSERT_SLEEPABLE(NULL, "pool_get(PR_WAITOK)"); 934 #endif 935 936 simple_lock(&pp->pr_slock); 937 pr_enter(pp, file, line); 938 939 startover: 940 /* 941 * Check to see if we've reached the hard limit. If we have, 942 * and we can wait, then wait until an item has been returned to 943 * the pool. 944 */ 945 #ifdef DIAGNOSTIC 946 if (__predict_false(pp->pr_nout > pp->pr_hardlimit)) { 947 pr_leave(pp); 948 simple_unlock(&pp->pr_slock); 949 panic("pool_get: %s: crossed hard limit", pp->pr_wchan); 950 } 951 #endif 952 if (__predict_false(pp->pr_nout == pp->pr_hardlimit)) { 953 if (pp->pr_drain_hook != NULL) { 954 /* 955 * Since the drain hook is going to free things 956 * back to the pool, unlock, call the hook, re-lock, 957 * and check the hardlimit condition again. 958 */ 959 pr_leave(pp); 960 simple_unlock(&pp->pr_slock); 961 (*pp->pr_drain_hook)(pp->pr_drain_hook_arg, flags); 962 simple_lock(&pp->pr_slock); 963 pr_enter(pp, file, line); 964 if (pp->pr_nout < pp->pr_hardlimit) 965 goto startover; 966 } 967 968 if ((flags & PR_WAITOK) && !(flags & PR_LIMITFAIL)) { 969 /* 970 * XXX: A warning isn't logged in this case. Should 971 * it be? 972 */ 973 pp->pr_flags |= PR_WANTED; 974 pr_leave(pp); 975 ltsleep(pp, PSWP, pp->pr_wchan, 0, &pp->pr_slock); 976 pr_enter(pp, file, line); 977 goto startover; 978 } 979 980 /* 981 * Log a message that the hard limit has been hit. 982 */ 983 if (pp->pr_hardlimit_warning != NULL && 984 ratecheck(&pp->pr_hardlimit_warning_last, 985 &pp->pr_hardlimit_ratecap)) 986 log(LOG_ERR, "%s\n", pp->pr_hardlimit_warning); 987 988 pp->pr_nfail++; 989 990 pr_leave(pp); 991 simple_unlock(&pp->pr_slock); 992 return (NULL); 993 } 994 995 /* 996 * The convention we use is that if `curpage' is not NULL, then 997 * it points at a non-empty bucket. In particular, `curpage' 998 * never points at a page header which has PR_PHINPAGE set and 999 * has no items in its bucket. 1000 */ 1001 if ((ph = pp->pr_curpage) == NULL) { 1002 int error; 1003 1004 #ifdef DIAGNOSTIC 1005 if (pp->pr_nitems != 0) { 1006 simple_unlock(&pp->pr_slock); 1007 printf("pool_get: %s: curpage NULL, nitems %u\n", 1008 pp->pr_wchan, pp->pr_nitems); 1009 panic("pool_get: nitems inconsistent"); 1010 } 1011 #endif 1012 1013 /* 1014 * Call the back-end page allocator for more memory. 1015 * Release the pool lock, as the back-end page allocator 1016 * may block. 1017 */ 1018 pr_leave(pp); 1019 error = pool_grow(pp, flags); 1020 pr_enter(pp, file, line); 1021 if (error != 0) { 1022 /* 1023 * We were unable to allocate a page or item 1024 * header, but we released the lock during 1025 * allocation, so perhaps items were freed 1026 * back to the pool. Check for this case. 1027 */ 1028 if (pp->pr_curpage != NULL) 1029 goto startover; 1030 1031 pp->pr_nfail++; 1032 pr_leave(pp); 1033 simple_unlock(&pp->pr_slock); 1034 return (NULL); 1035 } 1036 1037 /* Start the allocation process over. */ 1038 goto startover; 1039 } 1040 if (pp->pr_roflags & PR_NOTOUCH) { 1041 #ifdef DIAGNOSTIC 1042 if (__predict_false(ph->ph_nmissing == pp->pr_itemsperpage)) { 1043 pr_leave(pp); 1044 simple_unlock(&pp->pr_slock); 1045 panic("pool_get: %s: page empty", pp->pr_wchan); 1046 } 1047 #endif 1048 v = pr_item_notouch_get(pp, ph); 1049 #ifdef POOL_DIAGNOSTIC 1050 pr_log(pp, v, PRLOG_GET, file, line); 1051 #endif 1052 } else { 1053 v = pi = LIST_FIRST(&ph->ph_itemlist); 1054 if (__predict_false(v == NULL)) { 1055 pr_leave(pp); 1056 simple_unlock(&pp->pr_slock); 1057 panic("pool_get: %s: page empty", pp->pr_wchan); 1058 } 1059 #ifdef DIAGNOSTIC 1060 if (__predict_false(pp->pr_nitems == 0)) { 1061 pr_leave(pp); 1062 simple_unlock(&pp->pr_slock); 1063 printf("pool_get: %s: items on itemlist, nitems %u\n", 1064 pp->pr_wchan, pp->pr_nitems); 1065 panic("pool_get: nitems inconsistent"); 1066 } 1067 #endif 1068 1069 #ifdef POOL_DIAGNOSTIC 1070 pr_log(pp, v, PRLOG_GET, file, line); 1071 #endif 1072 1073 #ifdef DIAGNOSTIC 1074 if (__predict_false(pi->pi_magic != PI_MAGIC)) { 1075 pr_printlog(pp, pi, printf); 1076 panic("pool_get(%s): free list modified: " 1077 "magic=%x; page %p; item addr %p\n", 1078 pp->pr_wchan, pi->pi_magic, ph->ph_page, pi); 1079 } 1080 #endif 1081 1082 /* 1083 * Remove from item list. 1084 */ 1085 LIST_REMOVE(pi, pi_list); 1086 } 1087 pp->pr_nitems--; 1088 pp->pr_nout++; 1089 if (ph->ph_nmissing == 0) { 1090 #ifdef DIAGNOSTIC 1091 if (__predict_false(pp->pr_nidle == 0)) 1092 panic("pool_get: nidle inconsistent"); 1093 #endif 1094 pp->pr_nidle--; 1095 1096 /* 1097 * This page was previously empty. Move it to the list of 1098 * partially-full pages. This page is already curpage. 1099 */ 1100 LIST_REMOVE(ph, ph_pagelist); 1101 LIST_INSERT_HEAD(&pp->pr_partpages, ph, ph_pagelist); 1102 } 1103 ph->ph_nmissing++; 1104 if (ph->ph_nmissing == pp->pr_itemsperpage) { 1105 #ifdef DIAGNOSTIC 1106 if (__predict_false((pp->pr_roflags & PR_NOTOUCH) == 0 && 1107 !LIST_EMPTY(&ph->ph_itemlist))) { 1108 pr_leave(pp); 1109 simple_unlock(&pp->pr_slock); 1110 panic("pool_get: %s: nmissing inconsistent", 1111 pp->pr_wchan); 1112 } 1113 #endif 1114 /* 1115 * This page is now full. Move it to the full list 1116 * and select a new current page. 1117 */ 1118 LIST_REMOVE(ph, ph_pagelist); 1119 LIST_INSERT_HEAD(&pp->pr_fullpages, ph, ph_pagelist); 1120 pool_update_curpage(pp); 1121 } 1122 1123 pp->pr_nget++; 1124 pr_leave(pp); 1125 1126 /* 1127 * If we have a low water mark and we are now below that low 1128 * water mark, add more items to the pool. 1129 */ 1130 if (POOL_NEEDS_CATCHUP(pp) && pool_catchup(pp) != 0) { 1131 /* 1132 * XXX: Should we log a warning? Should we set up a timeout 1133 * to try again in a second or so? The latter could break 1134 * a caller's assumptions about interrupt protection, etc. 1135 */ 1136 } 1137 1138 simple_unlock(&pp->pr_slock); 1139 KASSERT((((vaddr_t)v + pp->pr_itemoffset) & (pp->pr_align - 1)) == 0); 1140 FREECHECK_OUT(&pp->pr_freecheck, v); 1141 return (v); 1142 } 1143 1144 /* 1145 * Internal version of pool_put(). Pool is already locked/entered. 1146 */ 1147 static void 1148 pool_do_put(struct pool *pp, void *v, struct pool_pagelist *pq) 1149 { 1150 struct pool_item *pi = v; 1151 struct pool_item_header *ph; 1152 1153 LOCK_ASSERT(simple_lock_held(&pp->pr_slock)); 1154 FREECHECK_IN(&pp->pr_freecheck, v); 1155 1156 #ifdef DIAGNOSTIC 1157 if (__predict_false(pp->pr_nout == 0)) { 1158 printf("pool %s: putting with none out\n", 1159 pp->pr_wchan); 1160 panic("pool_put"); 1161 } 1162 #endif 1163 1164 if (__predict_false((ph = pr_find_pagehead(pp, v)) == NULL)) { 1165 pr_printlog(pp, NULL, printf); 1166 panic("pool_put: %s: page header missing", pp->pr_wchan); 1167 } 1168 1169 #ifdef LOCKDEBUG 1170 /* 1171 * Check if we're freeing a locked simple lock. 1172 */ 1173 simple_lock_freecheck(pi, (char *)pi + pp->pr_size); 1174 #endif 1175 1176 /* 1177 * Return to item list. 1178 */ 1179 if (pp->pr_roflags & PR_NOTOUCH) { 1180 pr_item_notouch_put(pp, ph, v); 1181 } else { 1182 #ifdef DIAGNOSTIC 1183 pi->pi_magic = PI_MAGIC; 1184 #endif 1185 #ifdef DEBUG 1186 { 1187 int i, *ip = v; 1188 1189 for (i = 0; i < pp->pr_size / sizeof(int); i++) { 1190 *ip++ = PI_MAGIC; 1191 } 1192 } 1193 #endif 1194 1195 LIST_INSERT_HEAD(&ph->ph_itemlist, pi, pi_list); 1196 } 1197 KDASSERT(ph->ph_nmissing != 0); 1198 ph->ph_nmissing--; 1199 pp->pr_nput++; 1200 pp->pr_nitems++; 1201 pp->pr_nout--; 1202 1203 /* Cancel "pool empty" condition if it exists */ 1204 if (pp->pr_curpage == NULL) 1205 pp->pr_curpage = ph; 1206 1207 if (pp->pr_flags & PR_WANTED) { 1208 pp->pr_flags &= ~PR_WANTED; 1209 if (ph->ph_nmissing == 0) 1210 pp->pr_nidle++; 1211 wakeup((void *)pp); 1212 return; 1213 } 1214 1215 /* 1216 * If this page is now empty, do one of two things: 1217 * 1218 * (1) If we have more pages than the page high water mark, 1219 * free the page back to the system. ONLY CONSIDER 1220 * FREEING BACK A PAGE IF WE HAVE MORE THAN OUR MINIMUM PAGE 1221 * CLAIM. 1222 * 1223 * (2) Otherwise, move the page to the empty page list. 1224 * 1225 * Either way, select a new current page (so we use a partially-full 1226 * page if one is available). 1227 */ 1228 if (ph->ph_nmissing == 0) { 1229 pp->pr_nidle++; 1230 if (pp->pr_npages > pp->pr_minpages && 1231 (pp->pr_npages > pp->pr_maxpages || 1232 pa_starved_p(pp->pr_alloc))) { 1233 pr_rmpage(pp, ph, pq); 1234 } else { 1235 LIST_REMOVE(ph, ph_pagelist); 1236 LIST_INSERT_HEAD(&pp->pr_emptypages, ph, ph_pagelist); 1237 1238 /* 1239 * Update the timestamp on the page. A page must 1240 * be idle for some period of time before it can 1241 * be reclaimed by the pagedaemon. This minimizes 1242 * ping-pong'ing for memory. 1243 */ 1244 getmicrotime(&ph->ph_time); 1245 } 1246 pool_update_curpage(pp); 1247 } 1248 1249 /* 1250 * If the page was previously completely full, move it to the 1251 * partially-full list and make it the current page. The next 1252 * allocation will get the item from this page, instead of 1253 * further fragmenting the pool. 1254 */ 1255 else if (ph->ph_nmissing == (pp->pr_itemsperpage - 1)) { 1256 LIST_REMOVE(ph, ph_pagelist); 1257 LIST_INSERT_HEAD(&pp->pr_partpages, ph, ph_pagelist); 1258 pp->pr_curpage = ph; 1259 } 1260 } 1261 1262 /* 1263 * Return resource to the pool; must be called at appropriate spl level 1264 */ 1265 #ifdef POOL_DIAGNOSTIC 1266 void 1267 _pool_put(struct pool *pp, void *v, const char *file, long line) 1268 { 1269 struct pool_pagelist pq; 1270 1271 LIST_INIT(&pq); 1272 1273 simple_lock(&pp->pr_slock); 1274 pr_enter(pp, file, line); 1275 1276 pr_log(pp, v, PRLOG_PUT, file, line); 1277 1278 pool_do_put(pp, v, &pq); 1279 1280 pr_leave(pp); 1281 simple_unlock(&pp->pr_slock); 1282 1283 pr_pagelist_free(pp, &pq); 1284 } 1285 #undef pool_put 1286 #endif /* POOL_DIAGNOSTIC */ 1287 1288 void 1289 pool_put(struct pool *pp, void *v) 1290 { 1291 struct pool_pagelist pq; 1292 1293 LIST_INIT(&pq); 1294 1295 simple_lock(&pp->pr_slock); 1296 pool_do_put(pp, v, &pq); 1297 simple_unlock(&pp->pr_slock); 1298 1299 pr_pagelist_free(pp, &pq); 1300 } 1301 1302 #ifdef POOL_DIAGNOSTIC 1303 #define pool_put(h, v) _pool_put((h), (v), __FILE__, __LINE__) 1304 #endif 1305 1306 /* 1307 * pool_grow: grow a pool by a page. 1308 * 1309 * => called with pool locked. 1310 * => unlock and relock the pool. 1311 * => return with pool locked. 1312 */ 1313 1314 static int 1315 pool_grow(struct pool *pp, int flags) 1316 { 1317 struct pool_item_header *ph = NULL; 1318 char *cp; 1319 1320 simple_unlock(&pp->pr_slock); 1321 cp = pool_allocator_alloc(pp, flags); 1322 if (__predict_true(cp != NULL)) { 1323 ph = pool_alloc_item_header(pp, cp, flags); 1324 } 1325 if (__predict_false(cp == NULL || ph == NULL)) { 1326 if (cp != NULL) { 1327 pool_allocator_free(pp, cp); 1328 } 1329 simple_lock(&pp->pr_slock); 1330 return ENOMEM; 1331 } 1332 1333 simple_lock(&pp->pr_slock); 1334 pool_prime_page(pp, cp, ph); 1335 pp->pr_npagealloc++; 1336 return 0; 1337 } 1338 1339 /* 1340 * Add N items to the pool. 1341 */ 1342 int 1343 pool_prime(struct pool *pp, int n) 1344 { 1345 int newpages; 1346 int error = 0; 1347 1348 simple_lock(&pp->pr_slock); 1349 1350 newpages = roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 1351 1352 while (newpages-- > 0) { 1353 error = pool_grow(pp, PR_NOWAIT); 1354 if (error) { 1355 break; 1356 } 1357 pp->pr_minpages++; 1358 } 1359 1360 if (pp->pr_minpages >= pp->pr_maxpages) 1361 pp->pr_maxpages = pp->pr_minpages + 1; /* XXX */ 1362 1363 simple_unlock(&pp->pr_slock); 1364 return error; 1365 } 1366 1367 /* 1368 * Add a page worth of items to the pool. 1369 * 1370 * Note, we must be called with the pool descriptor LOCKED. 1371 */ 1372 static void 1373 pool_prime_page(struct pool *pp, void *storage, struct pool_item_header *ph) 1374 { 1375 struct pool_item *pi; 1376 void *cp = storage; 1377 const unsigned int align = pp->pr_align; 1378 const unsigned int ioff = pp->pr_itemoffset; 1379 int n; 1380 1381 LOCK_ASSERT(simple_lock_held(&pp->pr_slock)); 1382 1383 #ifdef DIAGNOSTIC 1384 if ((pp->pr_roflags & PR_NOALIGN) == 0 && 1385 ((uintptr_t)cp & (pp->pr_alloc->pa_pagesz - 1)) != 0) 1386 panic("pool_prime_page: %s: unaligned page", pp->pr_wchan); 1387 #endif 1388 1389 /* 1390 * Insert page header. 1391 */ 1392 LIST_INSERT_HEAD(&pp->pr_emptypages, ph, ph_pagelist); 1393 LIST_INIT(&ph->ph_itemlist); 1394 ph->ph_page = storage; 1395 ph->ph_nmissing = 0; 1396 getmicrotime(&ph->ph_time); 1397 if ((pp->pr_roflags & PR_PHINPAGE) == 0) 1398 SPLAY_INSERT(phtree, &pp->pr_phtree, ph); 1399 1400 pp->pr_nidle++; 1401 1402 /* 1403 * Color this page. 1404 */ 1405 cp = (char *)cp + pp->pr_curcolor; 1406 if ((pp->pr_curcolor += align) > pp->pr_maxcolor) 1407 pp->pr_curcolor = 0; 1408 1409 /* 1410 * Adjust storage to apply aligment to `pr_itemoffset' in each item. 1411 */ 1412 if (ioff != 0) 1413 cp = (char *)cp + align - ioff; 1414 1415 KASSERT((((vaddr_t)cp + ioff) & (align - 1)) == 0); 1416 1417 /* 1418 * Insert remaining chunks on the bucket list. 1419 */ 1420 n = pp->pr_itemsperpage; 1421 pp->pr_nitems += n; 1422 1423 if (pp->pr_roflags & PR_NOTOUCH) { 1424 pool_item_freelist_t *freelist = PR_FREELIST(ph); 1425 int i; 1426 1427 ph->ph_off = (char *)cp - (char *)storage; 1428 ph->ph_firstfree = 0; 1429 for (i = 0; i < n - 1; i++) 1430 freelist[i] = i + 1; 1431 freelist[n - 1] = PR_INDEX_EOL; 1432 } else { 1433 while (n--) { 1434 pi = (struct pool_item *)cp; 1435 1436 KASSERT(((((vaddr_t)pi) + ioff) & (align - 1)) == 0); 1437 1438 /* Insert on page list */ 1439 LIST_INSERT_HEAD(&ph->ph_itemlist, pi, pi_list); 1440 #ifdef DIAGNOSTIC 1441 pi->pi_magic = PI_MAGIC; 1442 #endif 1443 cp = (char *)cp + pp->pr_size; 1444 1445 KASSERT((((vaddr_t)cp + ioff) & (align - 1)) == 0); 1446 } 1447 } 1448 1449 /* 1450 * If the pool was depleted, point at the new page. 1451 */ 1452 if (pp->pr_curpage == NULL) 1453 pp->pr_curpage = ph; 1454 1455 if (++pp->pr_npages > pp->pr_hiwat) 1456 pp->pr_hiwat = pp->pr_npages; 1457 } 1458 1459 /* 1460 * Used by pool_get() when nitems drops below the low water mark. This 1461 * is used to catch up pr_nitems with the low water mark. 1462 * 1463 * Note 1, we never wait for memory here, we let the caller decide what to do. 1464 * 1465 * Note 2, we must be called with the pool already locked, and we return 1466 * with it locked. 1467 */ 1468 static int 1469 pool_catchup(struct pool *pp) 1470 { 1471 int error = 0; 1472 1473 while (POOL_NEEDS_CATCHUP(pp)) { 1474 error = pool_grow(pp, PR_NOWAIT); 1475 if (error) { 1476 break; 1477 } 1478 } 1479 return error; 1480 } 1481 1482 static void 1483 pool_update_curpage(struct pool *pp) 1484 { 1485 1486 pp->pr_curpage = LIST_FIRST(&pp->pr_partpages); 1487 if (pp->pr_curpage == NULL) { 1488 pp->pr_curpage = LIST_FIRST(&pp->pr_emptypages); 1489 } 1490 } 1491 1492 void 1493 pool_setlowat(struct pool *pp, int n) 1494 { 1495 1496 simple_lock(&pp->pr_slock); 1497 1498 pp->pr_minitems = n; 1499 pp->pr_minpages = (n == 0) 1500 ? 0 1501 : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 1502 1503 /* Make sure we're caught up with the newly-set low water mark. */ 1504 if (POOL_NEEDS_CATCHUP(pp) && pool_catchup(pp) != 0) { 1505 /* 1506 * XXX: Should we log a warning? Should we set up a timeout 1507 * to try again in a second or so? The latter could break 1508 * a caller's assumptions about interrupt protection, etc. 1509 */ 1510 } 1511 1512 simple_unlock(&pp->pr_slock); 1513 } 1514 1515 void 1516 pool_sethiwat(struct pool *pp, int n) 1517 { 1518 1519 simple_lock(&pp->pr_slock); 1520 1521 pp->pr_maxpages = (n == 0) 1522 ? 0 1523 : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 1524 1525 simple_unlock(&pp->pr_slock); 1526 } 1527 1528 void 1529 pool_sethardlimit(struct pool *pp, int n, const char *warnmess, int ratecap) 1530 { 1531 1532 simple_lock(&pp->pr_slock); 1533 1534 pp->pr_hardlimit = n; 1535 pp->pr_hardlimit_warning = warnmess; 1536 pp->pr_hardlimit_ratecap.tv_sec = ratecap; 1537 pp->pr_hardlimit_warning_last.tv_sec = 0; 1538 pp->pr_hardlimit_warning_last.tv_usec = 0; 1539 1540 /* 1541 * In-line version of pool_sethiwat(), because we don't want to 1542 * release the lock. 1543 */ 1544 pp->pr_maxpages = (n == 0) 1545 ? 0 1546 : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 1547 1548 simple_unlock(&pp->pr_slock); 1549 } 1550 1551 /* 1552 * Release all complete pages that have not been used recently. 1553 */ 1554 int 1555 #ifdef POOL_DIAGNOSTIC 1556 _pool_reclaim(struct pool *pp, const char *file, long line) 1557 #else 1558 pool_reclaim(struct pool *pp) 1559 #endif 1560 { 1561 struct pool_item_header *ph, *phnext; 1562 struct pool_cache *pc; 1563 struct pool_pagelist pq; 1564 struct pool_cache_grouplist pcgl; 1565 struct timeval curtime, diff; 1566 1567 if (pp->pr_drain_hook != NULL) { 1568 /* 1569 * The drain hook must be called with the pool unlocked. 1570 */ 1571 (*pp->pr_drain_hook)(pp->pr_drain_hook_arg, PR_NOWAIT); 1572 } 1573 1574 if (simple_lock_try(&pp->pr_slock) == 0) 1575 return (0); 1576 pr_enter(pp, file, line); 1577 1578 LIST_INIT(&pq); 1579 LIST_INIT(&pcgl); 1580 1581 /* 1582 * Reclaim items from the pool's caches. 1583 */ 1584 LIST_FOREACH(pc, &pp->pr_cachelist, pc_poollist) 1585 pool_cache_reclaim(pc, &pq, &pcgl); 1586 1587 getmicrotime(&curtime); 1588 1589 for (ph = LIST_FIRST(&pp->pr_emptypages); ph != NULL; ph = phnext) { 1590 phnext = LIST_NEXT(ph, ph_pagelist); 1591 1592 /* Check our minimum page claim */ 1593 if (pp->pr_npages <= pp->pr_minpages) 1594 break; 1595 1596 KASSERT(ph->ph_nmissing == 0); 1597 timersub(&curtime, &ph->ph_time, &diff); 1598 if (diff.tv_sec < pool_inactive_time 1599 && !pa_starved_p(pp->pr_alloc)) 1600 continue; 1601 1602 /* 1603 * If freeing this page would put us below 1604 * the low water mark, stop now. 1605 */ 1606 if ((pp->pr_nitems - pp->pr_itemsperpage) < 1607 pp->pr_minitems) 1608 break; 1609 1610 pr_rmpage(pp, ph, &pq); 1611 } 1612 1613 pr_leave(pp); 1614 simple_unlock(&pp->pr_slock); 1615 if (LIST_EMPTY(&pq) && LIST_EMPTY(&pcgl)) 1616 return 0; 1617 1618 pr_pagelist_free(pp, &pq); 1619 pcg_grouplist_free(&pcgl); 1620 return (1); 1621 } 1622 1623 /* 1624 * Drain pools, one at a time. 1625 * 1626 * Note, we must never be called from an interrupt context. 1627 * 1628 * XXX Pool can disappear while draining. 1629 */ 1630 void 1631 pool_drain(void *arg) 1632 { 1633 struct pool *pp; 1634 int s; 1635 1636 pp = NULL; 1637 s = splvm(); 1638 simple_lock(&pool_head_slock); 1639 if (drainpp == NULL) { 1640 drainpp = LIST_FIRST(&pool_head); 1641 } 1642 if (drainpp) { 1643 pp = drainpp; 1644 drainpp = LIST_NEXT(pp, pr_poollist); 1645 } 1646 simple_unlock(&pool_head_slock); 1647 if (pp) 1648 pool_reclaim(pp); 1649 splx(s); 1650 } 1651 1652 /* 1653 * Diagnostic helpers. 1654 */ 1655 void 1656 pool_print(struct pool *pp, const char *modif) 1657 { 1658 int s; 1659 1660 s = splvm(); 1661 if (simple_lock_try(&pp->pr_slock) == 0) { 1662 printf("pool %s is locked; try again later\n", 1663 pp->pr_wchan); 1664 splx(s); 1665 return; 1666 } 1667 pool_print1(pp, modif, printf); 1668 simple_unlock(&pp->pr_slock); 1669 splx(s); 1670 } 1671 1672 void 1673 pool_printall(const char *modif, void (*pr)(const char *, ...)) 1674 { 1675 struct pool *pp; 1676 1677 if (simple_lock_try(&pool_head_slock) == 0) { 1678 (*pr)("WARNING: pool_head_slock is locked\n"); 1679 } else { 1680 simple_unlock(&pool_head_slock); 1681 } 1682 1683 LIST_FOREACH(pp, &pool_head, pr_poollist) { 1684 pool_printit(pp, modif, pr); 1685 } 1686 } 1687 1688 void 1689 pool_printit(struct pool *pp, const char *modif, void (*pr)(const char *, ...)) 1690 { 1691 1692 if (pp == NULL) { 1693 (*pr)("Must specify a pool to print.\n"); 1694 return; 1695 } 1696 1697 /* 1698 * Called from DDB; interrupts should be blocked, and all 1699 * other processors should be paused. We can skip locking 1700 * the pool in this case. 1701 * 1702 * We do a simple_lock_try() just to print the lock 1703 * status, however. 1704 */ 1705 1706 if (simple_lock_try(&pp->pr_slock) == 0) 1707 (*pr)("WARNING: pool %s is locked\n", pp->pr_wchan); 1708 else 1709 simple_unlock(&pp->pr_slock); 1710 1711 pool_print1(pp, modif, pr); 1712 } 1713 1714 static void 1715 pool_print_pagelist(struct pool *pp, struct pool_pagelist *pl, 1716 void (*pr)(const char *, ...)) 1717 { 1718 struct pool_item_header *ph; 1719 #ifdef DIAGNOSTIC 1720 struct pool_item *pi; 1721 #endif 1722 1723 LIST_FOREACH(ph, pl, ph_pagelist) { 1724 (*pr)("\t\tpage %p, nmissing %d, time %lu,%lu\n", 1725 ph->ph_page, ph->ph_nmissing, 1726 (u_long)ph->ph_time.tv_sec, 1727 (u_long)ph->ph_time.tv_usec); 1728 #ifdef DIAGNOSTIC 1729 if (!(pp->pr_roflags & PR_NOTOUCH)) { 1730 LIST_FOREACH(pi, &ph->ph_itemlist, pi_list) { 1731 if (pi->pi_magic != PI_MAGIC) { 1732 (*pr)("\t\t\titem %p, magic 0x%x\n", 1733 pi, pi->pi_magic); 1734 } 1735 } 1736 } 1737 #endif 1738 } 1739 } 1740 1741 static void 1742 pool_print1(struct pool *pp, const char *modif, void (*pr)(const char *, ...)) 1743 { 1744 struct pool_item_header *ph; 1745 struct pool_cache *pc; 1746 struct pool_cache_group *pcg; 1747 int i, print_log = 0, print_pagelist = 0, print_cache = 0; 1748 char c; 1749 1750 while ((c = *modif++) != '\0') { 1751 if (c == 'l') 1752 print_log = 1; 1753 if (c == 'p') 1754 print_pagelist = 1; 1755 if (c == 'c') 1756 print_cache = 1; 1757 } 1758 1759 (*pr)("POOL %s: size %u, align %u, ioff %u, roflags 0x%08x\n", 1760 pp->pr_wchan, pp->pr_size, pp->pr_align, pp->pr_itemoffset, 1761 pp->pr_roflags); 1762 (*pr)("\talloc %p\n", pp->pr_alloc); 1763 (*pr)("\tminitems %u, minpages %u, maxpages %u, npages %u\n", 1764 pp->pr_minitems, pp->pr_minpages, pp->pr_maxpages, pp->pr_npages); 1765 (*pr)("\titemsperpage %u, nitems %u, nout %u, hardlimit %u\n", 1766 pp->pr_itemsperpage, pp->pr_nitems, pp->pr_nout, pp->pr_hardlimit); 1767 1768 (*pr)("\n\tnget %lu, nfail %lu, nput %lu\n", 1769 pp->pr_nget, pp->pr_nfail, pp->pr_nput); 1770 (*pr)("\tnpagealloc %lu, npagefree %lu, hiwat %u, nidle %lu\n", 1771 pp->pr_npagealloc, pp->pr_npagefree, pp->pr_hiwat, pp->pr_nidle); 1772 1773 if (print_pagelist == 0) 1774 goto skip_pagelist; 1775 1776 if ((ph = LIST_FIRST(&pp->pr_emptypages)) != NULL) 1777 (*pr)("\n\tempty page list:\n"); 1778 pool_print_pagelist(pp, &pp->pr_emptypages, pr); 1779 if ((ph = LIST_FIRST(&pp->pr_fullpages)) != NULL) 1780 (*pr)("\n\tfull page list:\n"); 1781 pool_print_pagelist(pp, &pp->pr_fullpages, pr); 1782 if ((ph = LIST_FIRST(&pp->pr_partpages)) != NULL) 1783 (*pr)("\n\tpartial-page list:\n"); 1784 pool_print_pagelist(pp, &pp->pr_partpages, pr); 1785 1786 if (pp->pr_curpage == NULL) 1787 (*pr)("\tno current page\n"); 1788 else 1789 (*pr)("\tcurpage %p\n", pp->pr_curpage->ph_page); 1790 1791 skip_pagelist: 1792 if (print_log == 0) 1793 goto skip_log; 1794 1795 (*pr)("\n"); 1796 if ((pp->pr_roflags & PR_LOGGING) == 0) 1797 (*pr)("\tno log\n"); 1798 else { 1799 pr_printlog(pp, NULL, pr); 1800 } 1801 1802 skip_log: 1803 if (print_cache == 0) 1804 goto skip_cache; 1805 1806 #define PR_GROUPLIST(pcg) \ 1807 (*pr)("\t\tgroup %p: avail %d\n", pcg, pcg->pcg_avail); \ 1808 for (i = 0; i < PCG_NOBJECTS; i++) { \ 1809 if (pcg->pcg_objects[i].pcgo_pa != \ 1810 POOL_PADDR_INVALID) { \ 1811 (*pr)("\t\t\t%p, 0x%llx\n", \ 1812 pcg->pcg_objects[i].pcgo_va, \ 1813 (unsigned long long) \ 1814 pcg->pcg_objects[i].pcgo_pa); \ 1815 } else { \ 1816 (*pr)("\t\t\t%p\n", \ 1817 pcg->pcg_objects[i].pcgo_va); \ 1818 } \ 1819 } 1820 1821 LIST_FOREACH(pc, &pp->pr_cachelist, pc_poollist) { 1822 (*pr)("\tcache %p\n", pc); 1823 (*pr)("\t hits %lu misses %lu ngroups %lu nitems %lu\n", 1824 pc->pc_hits, pc->pc_misses, pc->pc_ngroups, pc->pc_nitems); 1825 (*pr)("\t full groups:\n"); 1826 LIST_FOREACH(pcg, &pc->pc_fullgroups, pcg_list) { 1827 PR_GROUPLIST(pcg); 1828 } 1829 (*pr)("\t partial groups:\n"); 1830 LIST_FOREACH(pcg, &pc->pc_partgroups, pcg_list) { 1831 PR_GROUPLIST(pcg); 1832 } 1833 (*pr)("\t empty groups:\n"); 1834 LIST_FOREACH(pcg, &pc->pc_emptygroups, pcg_list) { 1835 PR_GROUPLIST(pcg); 1836 } 1837 } 1838 #undef PR_GROUPLIST 1839 1840 skip_cache: 1841 pr_enter_check(pp, pr); 1842 } 1843 1844 static int 1845 pool_chk_page(struct pool *pp, const char *label, struct pool_item_header *ph) 1846 { 1847 struct pool_item *pi; 1848 void *page; 1849 int n; 1850 1851 if ((pp->pr_roflags & PR_NOALIGN) == 0) { 1852 page = (void *)((uintptr_t)ph & pp->pr_alloc->pa_pagemask); 1853 if (page != ph->ph_page && 1854 (pp->pr_roflags & PR_PHINPAGE) != 0) { 1855 if (label != NULL) 1856 printf("%s: ", label); 1857 printf("pool(%p:%s): page inconsistency: page %p;" 1858 " at page head addr %p (p %p)\n", pp, 1859 pp->pr_wchan, ph->ph_page, 1860 ph, page); 1861 return 1; 1862 } 1863 } 1864 1865 if ((pp->pr_roflags & PR_NOTOUCH) != 0) 1866 return 0; 1867 1868 for (pi = LIST_FIRST(&ph->ph_itemlist), n = 0; 1869 pi != NULL; 1870 pi = LIST_NEXT(pi,pi_list), n++) { 1871 1872 #ifdef DIAGNOSTIC 1873 if (pi->pi_magic != PI_MAGIC) { 1874 if (label != NULL) 1875 printf("%s: ", label); 1876 printf("pool(%s): free list modified: magic=%x;" 1877 " page %p; item ordinal %d; addr %p\n", 1878 pp->pr_wchan, pi->pi_magic, ph->ph_page, 1879 n, pi); 1880 panic("pool"); 1881 } 1882 #endif 1883 if ((pp->pr_roflags & PR_NOALIGN) != 0) { 1884 continue; 1885 } 1886 page = (void *)((uintptr_t)pi & pp->pr_alloc->pa_pagemask); 1887 if (page == ph->ph_page) 1888 continue; 1889 1890 if (label != NULL) 1891 printf("%s: ", label); 1892 printf("pool(%p:%s): page inconsistency: page %p;" 1893 " item ordinal %d; addr %p (p %p)\n", pp, 1894 pp->pr_wchan, ph->ph_page, 1895 n, pi, page); 1896 return 1; 1897 } 1898 return 0; 1899 } 1900 1901 1902 int 1903 pool_chk(struct pool *pp, const char *label) 1904 { 1905 struct pool_item_header *ph; 1906 int r = 0; 1907 1908 simple_lock(&pp->pr_slock); 1909 LIST_FOREACH(ph, &pp->pr_emptypages, ph_pagelist) { 1910 r = pool_chk_page(pp, label, ph); 1911 if (r) { 1912 goto out; 1913 } 1914 } 1915 LIST_FOREACH(ph, &pp->pr_fullpages, ph_pagelist) { 1916 r = pool_chk_page(pp, label, ph); 1917 if (r) { 1918 goto out; 1919 } 1920 } 1921 LIST_FOREACH(ph, &pp->pr_partpages, ph_pagelist) { 1922 r = pool_chk_page(pp, label, ph); 1923 if (r) { 1924 goto out; 1925 } 1926 } 1927 1928 out: 1929 simple_unlock(&pp->pr_slock); 1930 return (r); 1931 } 1932 1933 /* 1934 * pool_cache_init: 1935 * 1936 * Initialize a pool cache. 1937 * 1938 * NOTE: If the pool must be protected from interrupts, we expect 1939 * to be called at the appropriate interrupt priority level. 1940 */ 1941 void 1942 pool_cache_init(struct pool_cache *pc, struct pool *pp, 1943 int (*ctor)(void *, void *, int), 1944 void (*dtor)(void *, void *), 1945 void *arg) 1946 { 1947 1948 LIST_INIT(&pc->pc_emptygroups); 1949 LIST_INIT(&pc->pc_fullgroups); 1950 LIST_INIT(&pc->pc_partgroups); 1951 simple_lock_init(&pc->pc_slock); 1952 1953 pc->pc_pool = pp; 1954 1955 pc->pc_ctor = ctor; 1956 pc->pc_dtor = dtor; 1957 pc->pc_arg = arg; 1958 1959 pc->pc_hits = 0; 1960 pc->pc_misses = 0; 1961 1962 pc->pc_ngroups = 0; 1963 1964 pc->pc_nitems = 0; 1965 1966 simple_lock(&pp->pr_slock); 1967 LIST_INSERT_HEAD(&pp->pr_cachelist, pc, pc_poollist); 1968 simple_unlock(&pp->pr_slock); 1969 } 1970 1971 /* 1972 * pool_cache_destroy: 1973 * 1974 * Destroy a pool cache. 1975 */ 1976 void 1977 pool_cache_destroy(struct pool_cache *pc) 1978 { 1979 struct pool *pp = pc->pc_pool; 1980 1981 /* First, invalidate the entire cache. */ 1982 pool_cache_invalidate(pc); 1983 1984 /* ...and remove it from the pool's cache list. */ 1985 simple_lock(&pp->pr_slock); 1986 LIST_REMOVE(pc, pc_poollist); 1987 simple_unlock(&pp->pr_slock); 1988 } 1989 1990 static inline void * 1991 pcg_get(struct pool_cache_group *pcg, paddr_t *pap) 1992 { 1993 void *object; 1994 u_int idx; 1995 1996 KASSERT(pcg->pcg_avail <= PCG_NOBJECTS); 1997 KASSERT(pcg->pcg_avail != 0); 1998 idx = --pcg->pcg_avail; 1999 2000 KASSERT(pcg->pcg_objects[idx].pcgo_va != NULL); 2001 object = pcg->pcg_objects[idx].pcgo_va; 2002 if (pap != NULL) 2003 *pap = pcg->pcg_objects[idx].pcgo_pa; 2004 pcg->pcg_objects[idx].pcgo_va = NULL; 2005 2006 return (object); 2007 } 2008 2009 static inline void 2010 pcg_put(struct pool_cache_group *pcg, void *object, paddr_t pa) 2011 { 2012 u_int idx; 2013 2014 KASSERT(pcg->pcg_avail < PCG_NOBJECTS); 2015 idx = pcg->pcg_avail++; 2016 2017 KASSERT(pcg->pcg_objects[idx].pcgo_va == NULL); 2018 pcg->pcg_objects[idx].pcgo_va = object; 2019 pcg->pcg_objects[idx].pcgo_pa = pa; 2020 } 2021 2022 static void 2023 pcg_grouplist_free(struct pool_cache_grouplist *pcgl) 2024 { 2025 struct pool_cache_group *pcg; 2026 int s; 2027 2028 s = splvm(); 2029 while ((pcg = LIST_FIRST(pcgl)) != NULL) { 2030 LIST_REMOVE(pcg, pcg_list); 2031 pool_put(&pcgpool, pcg); 2032 } 2033 splx(s); 2034 } 2035 2036 /* 2037 * pool_cache_get{,_paddr}: 2038 * 2039 * Get an object from a pool cache (optionally returning 2040 * the physical address of the object). 2041 */ 2042 void * 2043 pool_cache_get_paddr(struct pool_cache *pc, int flags, paddr_t *pap) 2044 { 2045 struct pool_cache_group *pcg; 2046 void *object; 2047 2048 #ifdef LOCKDEBUG 2049 if (flags & PR_WAITOK) 2050 ASSERT_SLEEPABLE(NULL, "pool_cache_get(PR_WAITOK)"); 2051 #endif 2052 2053 simple_lock(&pc->pc_slock); 2054 2055 pcg = LIST_FIRST(&pc->pc_partgroups); 2056 if (pcg == NULL) { 2057 pcg = LIST_FIRST(&pc->pc_fullgroups); 2058 if (pcg != NULL) { 2059 LIST_REMOVE(pcg, pcg_list); 2060 LIST_INSERT_HEAD(&pc->pc_partgroups, pcg, pcg_list); 2061 } 2062 } 2063 if (pcg == NULL) { 2064 2065 /* 2066 * No groups with any available objects. Allocate 2067 * a new object, construct it, and return it to 2068 * the caller. We will allocate a group, if necessary, 2069 * when the object is freed back to the cache. 2070 */ 2071 pc->pc_misses++; 2072 simple_unlock(&pc->pc_slock); 2073 object = pool_get(pc->pc_pool, flags); 2074 if (object != NULL && pc->pc_ctor != NULL) { 2075 if ((*pc->pc_ctor)(pc->pc_arg, object, flags) != 0) { 2076 pool_put(pc->pc_pool, object); 2077 return (NULL); 2078 } 2079 } 2080 KASSERT((((vaddr_t)object + pc->pc_pool->pr_itemoffset) & 2081 (pc->pc_pool->pr_align - 1)) == 0); 2082 if (object != NULL && pap != NULL) { 2083 #ifdef POOL_VTOPHYS 2084 *pap = POOL_VTOPHYS(object); 2085 #else 2086 *pap = POOL_PADDR_INVALID; 2087 #endif 2088 } 2089 2090 FREECHECK_OUT(&pc->pc_freecheck, object); 2091 return (object); 2092 } 2093 2094 pc->pc_hits++; 2095 pc->pc_nitems--; 2096 object = pcg_get(pcg, pap); 2097 2098 if (pcg->pcg_avail == 0) { 2099 LIST_REMOVE(pcg, pcg_list); 2100 LIST_INSERT_HEAD(&pc->pc_emptygroups, pcg, pcg_list); 2101 } 2102 simple_unlock(&pc->pc_slock); 2103 2104 KASSERT((((vaddr_t)object + pc->pc_pool->pr_itemoffset) & 2105 (pc->pc_pool->pr_align - 1)) == 0); 2106 FREECHECK_OUT(&pc->pc_freecheck, object); 2107 return (object); 2108 } 2109 2110 /* 2111 * pool_cache_put{,_paddr}: 2112 * 2113 * Put an object back to the pool cache (optionally caching the 2114 * physical address of the object). 2115 */ 2116 void 2117 pool_cache_put_paddr(struct pool_cache *pc, void *object, paddr_t pa) 2118 { 2119 struct pool_cache_group *pcg; 2120 int s; 2121 2122 FREECHECK_IN(&pc->pc_freecheck, object); 2123 2124 if (__predict_false((pc->pc_pool->pr_flags & PR_WANTED) != 0)) { 2125 goto destruct; 2126 } 2127 2128 simple_lock(&pc->pc_slock); 2129 2130 pcg = LIST_FIRST(&pc->pc_partgroups); 2131 if (pcg == NULL) { 2132 pcg = LIST_FIRST(&pc->pc_emptygroups); 2133 if (pcg != NULL) { 2134 LIST_REMOVE(pcg, pcg_list); 2135 LIST_INSERT_HEAD(&pc->pc_partgroups, pcg, pcg_list); 2136 } 2137 } 2138 if (pcg == NULL) { 2139 2140 /* 2141 * No empty groups to free the object to. Attempt to 2142 * allocate one. 2143 */ 2144 simple_unlock(&pc->pc_slock); 2145 s = splvm(); 2146 pcg = pool_get(&pcgpool, PR_NOWAIT); 2147 splx(s); 2148 if (pcg == NULL) { 2149 destruct: 2150 2151 /* 2152 * Unable to allocate a cache group; destruct the object 2153 * and free it back to the pool. 2154 */ 2155 pool_cache_destruct_object(pc, object); 2156 return; 2157 } 2158 memset(pcg, 0, sizeof(*pcg)); 2159 simple_lock(&pc->pc_slock); 2160 pc->pc_ngroups++; 2161 LIST_INSERT_HEAD(&pc->pc_partgroups, pcg, pcg_list); 2162 } 2163 2164 pc->pc_nitems++; 2165 pcg_put(pcg, object, pa); 2166 2167 if (pcg->pcg_avail == PCG_NOBJECTS) { 2168 LIST_REMOVE(pcg, pcg_list); 2169 LIST_INSERT_HEAD(&pc->pc_fullgroups, pcg, pcg_list); 2170 } 2171 simple_unlock(&pc->pc_slock); 2172 } 2173 2174 /* 2175 * pool_cache_destruct_object: 2176 * 2177 * Force destruction of an object and its release back into 2178 * the pool. 2179 */ 2180 void 2181 pool_cache_destruct_object(struct pool_cache *pc, void *object) 2182 { 2183 2184 if (pc->pc_dtor != NULL) 2185 (*pc->pc_dtor)(pc->pc_arg, object); 2186 pool_put(pc->pc_pool, object); 2187 } 2188 2189 /* 2190 * pool_do_cache_invalidate_grouplist: 2191 * 2192 * Invalidate a single grouplist and destruct all objects. 2193 * XXX This is too expensive. We should swap the list then 2194 * unlock. 2195 */ 2196 static void 2197 pool_do_cache_invalidate_grouplist(struct pool_cache_grouplist *pcgsl, 2198 struct pool_cache *pc, struct pool_pagelist *pq, 2199 struct pool_cache_grouplist *pcgdl) 2200 { 2201 struct pool_cache_group *pcg; 2202 void *object; 2203 2204 LOCK_ASSERT(simple_lock_held(&pc->pc_slock)); 2205 LOCK_ASSERT(simple_lock_held(&pc->pc_pool->pr_slock)); 2206 2207 while ((pcg = LIST_FIRST(pcgsl)) != NULL) { 2208 pc->pc_ngroups--; 2209 LIST_REMOVE(pcg, pcg_list); 2210 LIST_INSERT_HEAD(pcgdl, pcg, pcg_list); 2211 pc->pc_nitems -= pcg->pcg_avail; 2212 simple_unlock(&pc->pc_pool->pr_slock); 2213 simple_unlock(&pc->pc_slock); 2214 2215 while (pcg->pcg_avail != 0) { 2216 object = pcg_get(pcg, NULL); 2217 if (pc->pc_dtor != NULL) 2218 (*pc->pc_dtor)(pc->pc_arg, object); 2219 simple_lock(&pc->pc_pool->pr_slock); 2220 pool_do_put(pc->pc_pool, object, pq); 2221 simple_unlock(&pc->pc_pool->pr_slock); 2222 } 2223 2224 simple_lock(&pc->pc_slock); 2225 simple_lock(&pc->pc_pool->pr_slock); 2226 } 2227 } 2228 2229 static void 2230 pool_do_cache_invalidate(struct pool_cache *pc, struct pool_pagelist *pq, 2231 struct pool_cache_grouplist *pcgl) 2232 { 2233 2234 LOCK_ASSERT(simple_lock_held(&pc->pc_slock)); 2235 LOCK_ASSERT(simple_lock_held(&pc->pc_pool->pr_slock)); 2236 2237 pool_do_cache_invalidate_grouplist(&pc->pc_fullgroups, pc, pq, pcgl); 2238 pool_do_cache_invalidate_grouplist(&pc->pc_partgroups, pc, pq, pcgl); 2239 2240 KASSERT(LIST_EMPTY(&pc->pc_partgroups)); 2241 KASSERT(LIST_EMPTY(&pc->pc_fullgroups)); 2242 KASSERT(pc->pc_nitems == 0); 2243 } 2244 2245 /* 2246 * pool_cache_invalidate: 2247 * 2248 * Invalidate a pool cache (destruct and release all of the 2249 * cached objects). 2250 */ 2251 void 2252 pool_cache_invalidate(struct pool_cache *pc) 2253 { 2254 struct pool_pagelist pq; 2255 struct pool_cache_grouplist pcgl; 2256 2257 LIST_INIT(&pq); 2258 LIST_INIT(&pcgl); 2259 2260 simple_lock(&pc->pc_slock); 2261 simple_lock(&pc->pc_pool->pr_slock); 2262 2263 pool_do_cache_invalidate(pc, &pq, &pcgl); 2264 2265 simple_unlock(&pc->pc_pool->pr_slock); 2266 simple_unlock(&pc->pc_slock); 2267 2268 pr_pagelist_free(pc->pc_pool, &pq); 2269 pcg_grouplist_free(&pcgl); 2270 } 2271 2272 /* 2273 * pool_cache_reclaim: 2274 * 2275 * Reclaim a pool cache for pool_reclaim(). 2276 */ 2277 static void 2278 pool_cache_reclaim(struct pool_cache *pc, struct pool_pagelist *pq, 2279 struct pool_cache_grouplist *pcgl) 2280 { 2281 2282 /* 2283 * We're locking in the wrong order (normally pool_cache -> pool, 2284 * but the pool is already locked when we get here), so we have 2285 * to use trylock. If we can't lock the pool_cache, it's not really 2286 * a big deal here. 2287 */ 2288 if (simple_lock_try(&pc->pc_slock) == 0) 2289 return; 2290 2291 pool_do_cache_invalidate(pc, pq, pcgl); 2292 2293 simple_unlock(&pc->pc_slock); 2294 } 2295 2296 /* 2297 * Pool backend allocators. 2298 * 2299 * Each pool has a backend allocator that handles allocation, deallocation, 2300 * and any additional draining that might be needed. 2301 * 2302 * We provide two standard allocators: 2303 * 2304 * pool_allocator_kmem - the default when no allocator is specified 2305 * 2306 * pool_allocator_nointr - used for pools that will not be accessed 2307 * in interrupt context. 2308 */ 2309 void *pool_page_alloc(struct pool *, int); 2310 void pool_page_free(struct pool *, void *); 2311 2312 #ifdef POOL_SUBPAGE 2313 struct pool_allocator pool_allocator_kmem_fullpage = { 2314 pool_page_alloc, pool_page_free, 0, 2315 .pa_backingmapptr = &kmem_map, 2316 }; 2317 #else 2318 struct pool_allocator pool_allocator_kmem = { 2319 pool_page_alloc, pool_page_free, 0, 2320 .pa_backingmapptr = &kmem_map, 2321 }; 2322 #endif 2323 2324 void *pool_page_alloc_nointr(struct pool *, int); 2325 void pool_page_free_nointr(struct pool *, void *); 2326 2327 #ifdef POOL_SUBPAGE 2328 struct pool_allocator pool_allocator_nointr_fullpage = { 2329 pool_page_alloc_nointr, pool_page_free_nointr, 0, 2330 .pa_backingmapptr = &kernel_map, 2331 }; 2332 #else 2333 struct pool_allocator pool_allocator_nointr = { 2334 pool_page_alloc_nointr, pool_page_free_nointr, 0, 2335 .pa_backingmapptr = &kernel_map, 2336 }; 2337 #endif 2338 2339 #ifdef POOL_SUBPAGE 2340 void *pool_subpage_alloc(struct pool *, int); 2341 void pool_subpage_free(struct pool *, void *); 2342 2343 struct pool_allocator pool_allocator_kmem = { 2344 pool_subpage_alloc, pool_subpage_free, POOL_SUBPAGE, 2345 .pa_backingmapptr = &kmem_map, 2346 }; 2347 2348 void *pool_subpage_alloc_nointr(struct pool *, int); 2349 void pool_subpage_free_nointr(struct pool *, void *); 2350 2351 struct pool_allocator pool_allocator_nointr = { 2352 pool_subpage_alloc, pool_subpage_free, POOL_SUBPAGE, 2353 .pa_backingmapptr = &kmem_map, 2354 }; 2355 #endif /* POOL_SUBPAGE */ 2356 2357 static void * 2358 pool_allocator_alloc(struct pool *pp, int flags) 2359 { 2360 struct pool_allocator *pa = pp->pr_alloc; 2361 void *res; 2362 2363 LOCK_ASSERT(!simple_lock_held(&pp->pr_slock)); 2364 2365 res = (*pa->pa_alloc)(pp, flags); 2366 if (res == NULL && (flags & PR_WAITOK) == 0) { 2367 /* 2368 * We only run the drain hook here if PR_NOWAIT. 2369 * In other cases, the hook will be run in 2370 * pool_reclaim(). 2371 */ 2372 if (pp->pr_drain_hook != NULL) { 2373 (*pp->pr_drain_hook)(pp->pr_drain_hook_arg, flags); 2374 res = (*pa->pa_alloc)(pp, flags); 2375 } 2376 } 2377 return res; 2378 } 2379 2380 static void 2381 pool_allocator_free(struct pool *pp, void *v) 2382 { 2383 struct pool_allocator *pa = pp->pr_alloc; 2384 2385 LOCK_ASSERT(!simple_lock_held(&pp->pr_slock)); 2386 2387 (*pa->pa_free)(pp, v); 2388 } 2389 2390 void * 2391 pool_page_alloc(struct pool *pp, int flags) 2392 { 2393 bool waitok = (flags & PR_WAITOK) ? true : false; 2394 2395 return ((void *) uvm_km_alloc_poolpage_cache(kmem_map, waitok)); 2396 } 2397 2398 void 2399 pool_page_free(struct pool *pp, void *v) 2400 { 2401 2402 uvm_km_free_poolpage_cache(kmem_map, (vaddr_t) v); 2403 } 2404 2405 static void * 2406 pool_page_alloc_meta(struct pool *pp, int flags) 2407 { 2408 bool waitok = (flags & PR_WAITOK) ? true : false; 2409 2410 return ((void *) uvm_km_alloc_poolpage(kmem_map, waitok)); 2411 } 2412 2413 static void 2414 pool_page_free_meta(struct pool *pp, void *v) 2415 { 2416 2417 uvm_km_free_poolpage(kmem_map, (vaddr_t) v); 2418 } 2419 2420 #ifdef POOL_SUBPAGE 2421 /* Sub-page allocator, for machines with large hardware pages. */ 2422 void * 2423 pool_subpage_alloc(struct pool *pp, int flags) 2424 { 2425 void *v; 2426 int s; 2427 s = splvm(); 2428 v = pool_get(&psppool, flags); 2429 splx(s); 2430 return v; 2431 } 2432 2433 void 2434 pool_subpage_free(struct pool *pp, void *v) 2435 { 2436 int s; 2437 s = splvm(); 2438 pool_put(&psppool, v); 2439 splx(s); 2440 } 2441 2442 /* We don't provide a real nointr allocator. Maybe later. */ 2443 void * 2444 pool_subpage_alloc_nointr(struct pool *pp, int flags) 2445 { 2446 2447 return (pool_subpage_alloc(pp, flags)); 2448 } 2449 2450 void 2451 pool_subpage_free_nointr(struct pool *pp, void *v) 2452 { 2453 2454 pool_subpage_free(pp, v); 2455 } 2456 #endif /* POOL_SUBPAGE */ 2457 void * 2458 pool_page_alloc_nointr(struct pool *pp, int flags) 2459 { 2460 bool waitok = (flags & PR_WAITOK) ? true : false; 2461 2462 return ((void *) uvm_km_alloc_poolpage_cache(kernel_map, waitok)); 2463 } 2464 2465 void 2466 pool_page_free_nointr(struct pool *pp, void *v) 2467 { 2468 2469 uvm_km_free_poolpage_cache(kernel_map, (vaddr_t) v); 2470 } 2471