1 /* $NetBSD: subr_pool.c,v 1.183 2010/04/25 11:49:04 ad Exp $ */ 2 3 /*- 4 * Copyright (c) 1997, 1999, 2000, 2002, 2007, 2008, 2010 5 * The NetBSD Foundation, Inc. 6 * All rights reserved. 7 * 8 * This code is derived from software contributed to The NetBSD Foundation 9 * by Paul Kranenburg; by Jason R. Thorpe of the Numerical Aerospace 10 * Simulation Facility, NASA Ames Research Center, and by Andrew Doran. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 23 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 24 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 25 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31 * POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include <sys/cdefs.h> 35 __KERNEL_RCSID(0, "$NetBSD: subr_pool.c,v 1.183 2010/04/25 11:49:04 ad Exp $"); 36 37 #include "opt_ddb.h" 38 #include "opt_pool.h" 39 #include "opt_poollog.h" 40 #include "opt_lockdebug.h" 41 42 #include <sys/param.h> 43 #include <sys/systm.h> 44 #include <sys/bitops.h> 45 #include <sys/proc.h> 46 #include <sys/errno.h> 47 #include <sys/kernel.h> 48 #include <sys/malloc.h> 49 #include <sys/pool.h> 50 #include <sys/syslog.h> 51 #include <sys/debug.h> 52 #include <sys/lockdebug.h> 53 #include <sys/xcall.h> 54 #include <sys/cpu.h> 55 #include <sys/atomic.h> 56 57 #include <uvm/uvm.h> 58 59 /* 60 * Pool resource management utility. 61 * 62 * Memory is allocated in pages which are split into pieces according to 63 * the pool item size. Each page is kept on one of three lists in the 64 * pool structure: `pr_emptypages', `pr_fullpages' and `pr_partpages', 65 * for empty, full and partially-full pages respectively. The individual 66 * pool items are on a linked list headed by `ph_itemlist' in each page 67 * header. The memory for building the page list is either taken from 68 * the allocated pages themselves (for small pool items) or taken from 69 * an internal pool of page headers (`phpool'). 70 */ 71 72 /* List of all pools */ 73 static TAILQ_HEAD(, pool) pool_head = TAILQ_HEAD_INITIALIZER(pool_head); 74 75 /* Private pool for page header structures */ 76 #define PHPOOL_MAX 8 77 static struct pool phpool[PHPOOL_MAX]; 78 #define PHPOOL_FREELIST_NELEM(idx) \ 79 (((idx) == 0) ? 0 : BITMAP_SIZE * (1 << (idx))) 80 81 #ifdef POOL_SUBPAGE 82 /* Pool of subpages for use by normal pools. */ 83 static struct pool psppool; 84 #endif 85 86 static SLIST_HEAD(, pool_allocator) pa_deferinitq = 87 SLIST_HEAD_INITIALIZER(pa_deferinitq); 88 89 static void *pool_page_alloc_meta(struct pool *, int); 90 static void pool_page_free_meta(struct pool *, void *); 91 92 /* allocator for pool metadata */ 93 struct pool_allocator pool_allocator_meta = { 94 pool_page_alloc_meta, pool_page_free_meta, 95 .pa_backingmapptr = &kmem_map, 96 }; 97 98 /* # of seconds to retain page after last use */ 99 int pool_inactive_time = 10; 100 101 /* Next candidate for drainage (see pool_drain()) */ 102 static struct pool *drainpp; 103 104 /* This lock protects both pool_head and drainpp. */ 105 static kmutex_t pool_head_lock; 106 static kcondvar_t pool_busy; 107 108 /* This lock protects initialization of a potentially shared pool allocator */ 109 static kmutex_t pool_allocator_lock; 110 111 typedef uint32_t pool_item_bitmap_t; 112 #define BITMAP_SIZE (CHAR_BIT * sizeof(pool_item_bitmap_t)) 113 #define BITMAP_MASK (BITMAP_SIZE - 1) 114 115 struct pool_item_header { 116 /* Page headers */ 117 LIST_ENTRY(pool_item_header) 118 ph_pagelist; /* pool page list */ 119 SPLAY_ENTRY(pool_item_header) 120 ph_node; /* Off-page page headers */ 121 void * ph_page; /* this page's address */ 122 uint32_t ph_time; /* last referenced */ 123 uint16_t ph_nmissing; /* # of chunks in use */ 124 uint16_t ph_off; /* start offset in page */ 125 union { 126 /* !PR_NOTOUCH */ 127 struct { 128 LIST_HEAD(, pool_item) 129 phu_itemlist; /* chunk list for this page */ 130 } phu_normal; 131 /* PR_NOTOUCH */ 132 struct { 133 pool_item_bitmap_t phu_bitmap[1]; 134 } phu_notouch; 135 } ph_u; 136 }; 137 #define ph_itemlist ph_u.phu_normal.phu_itemlist 138 #define ph_bitmap ph_u.phu_notouch.phu_bitmap 139 140 struct pool_item { 141 #ifdef DIAGNOSTIC 142 u_int pi_magic; 143 #endif 144 #define PI_MAGIC 0xdeaddeadU 145 /* Other entries use only this list entry */ 146 LIST_ENTRY(pool_item) pi_list; 147 }; 148 149 #define POOL_NEEDS_CATCHUP(pp) \ 150 ((pp)->pr_nitems < (pp)->pr_minitems) 151 152 /* 153 * Pool cache management. 154 * 155 * Pool caches provide a way for constructed objects to be cached by the 156 * pool subsystem. This can lead to performance improvements by avoiding 157 * needless object construction/destruction; it is deferred until absolutely 158 * necessary. 159 * 160 * Caches are grouped into cache groups. Each cache group references up 161 * to PCG_NUMOBJECTS constructed objects. When a cache allocates an 162 * object from the pool, it calls the object's constructor and places it 163 * into a cache group. When a cache group frees an object back to the 164 * pool, it first calls the object's destructor. This allows the object 165 * to persist in constructed form while freed to the cache. 166 * 167 * The pool references each cache, so that when a pool is drained by the 168 * pagedaemon, it can drain each individual cache as well. Each time a 169 * cache is drained, the most idle cache group is freed to the pool in 170 * its entirety. 171 * 172 * Pool caches are layed on top of pools. By layering them, we can avoid 173 * the complexity of cache management for pools which would not benefit 174 * from it. 175 */ 176 177 static struct pool pcg_normal_pool; 178 static struct pool pcg_large_pool; 179 static struct pool cache_pool; 180 static struct pool cache_cpu_pool; 181 182 /* List of all caches. */ 183 TAILQ_HEAD(,pool_cache) pool_cache_head = 184 TAILQ_HEAD_INITIALIZER(pool_cache_head); 185 186 int pool_cache_disable; /* global disable for caching */ 187 static const pcg_t pcg_dummy; /* zero sized: always empty, yet always full */ 188 189 static bool pool_cache_put_slow(pool_cache_cpu_t *, int, 190 void *); 191 static bool pool_cache_get_slow(pool_cache_cpu_t *, int, 192 void **, paddr_t *, int); 193 static void pool_cache_cpu_init1(struct cpu_info *, pool_cache_t); 194 static void pool_cache_invalidate_groups(pool_cache_t, pcg_t *); 195 static void pool_cache_invalidate_cpu(pool_cache_t, u_int); 196 static void pool_cache_xcall(pool_cache_t); 197 198 static int pool_catchup(struct pool *); 199 static void pool_prime_page(struct pool *, void *, 200 struct pool_item_header *); 201 static void pool_update_curpage(struct pool *); 202 203 static int pool_grow(struct pool *, int); 204 static void *pool_allocator_alloc(struct pool *, int); 205 static void pool_allocator_free(struct pool *, void *); 206 207 static void pool_print_pagelist(struct pool *, struct pool_pagelist *, 208 void (*)(const char *, ...)); 209 static void pool_print1(struct pool *, const char *, 210 void (*)(const char *, ...)); 211 212 static int pool_chk_page(struct pool *, const char *, 213 struct pool_item_header *); 214 215 /* 216 * Pool log entry. An array of these is allocated in pool_init(). 217 */ 218 struct pool_log { 219 const char *pl_file; 220 long pl_line; 221 int pl_action; 222 #define PRLOG_GET 1 223 #define PRLOG_PUT 2 224 void *pl_addr; 225 }; 226 227 #ifdef POOL_DIAGNOSTIC 228 /* Number of entries in pool log buffers */ 229 #ifndef POOL_LOGSIZE 230 #define POOL_LOGSIZE 10 231 #endif 232 233 int pool_logsize = POOL_LOGSIZE; 234 235 static inline void 236 pr_log(struct pool *pp, void *v, int action, const char *file, long line) 237 { 238 int n; 239 struct pool_log *pl; 240 241 if ((pp->pr_roflags & PR_LOGGING) == 0) 242 return; 243 244 if (pp->pr_log == NULL) { 245 if (kmem_map != NULL) 246 pp->pr_log = malloc( 247 pool_logsize * sizeof(struct pool_log), 248 M_TEMP, M_NOWAIT | M_ZERO); 249 if (pp->pr_log == NULL) 250 return; 251 pp->pr_curlogentry = 0; 252 pp->pr_logsize = pool_logsize; 253 } 254 255 /* 256 * Fill in the current entry. Wrap around and overwrite 257 * the oldest entry if necessary. 258 */ 259 n = pp->pr_curlogentry; 260 pl = &pp->pr_log[n]; 261 pl->pl_file = file; 262 pl->pl_line = line; 263 pl->pl_action = action; 264 pl->pl_addr = v; 265 if (++n >= pp->pr_logsize) 266 n = 0; 267 pp->pr_curlogentry = n; 268 } 269 270 static void 271 pr_printlog(struct pool *pp, struct pool_item *pi, 272 void (*pr)(const char *, ...)) 273 { 274 int i = pp->pr_logsize; 275 int n = pp->pr_curlogentry; 276 277 if (pp->pr_log == NULL) 278 return; 279 280 /* 281 * Print all entries in this pool's log. 282 */ 283 while (i-- > 0) { 284 struct pool_log *pl = &pp->pr_log[n]; 285 if (pl->pl_action != 0) { 286 if (pi == NULL || pi == pl->pl_addr) { 287 (*pr)("\tlog entry %d:\n", i); 288 (*pr)("\t\taction = %s, addr = %p\n", 289 pl->pl_action == PRLOG_GET ? "get" : "put", 290 pl->pl_addr); 291 (*pr)("\t\tfile: %s at line %lu\n", 292 pl->pl_file, pl->pl_line); 293 } 294 } 295 if (++n >= pp->pr_logsize) 296 n = 0; 297 } 298 } 299 300 static inline void 301 pr_enter(struct pool *pp, const char *file, long line) 302 { 303 304 if (__predict_false(pp->pr_entered_file != NULL)) { 305 printf("pool %s: reentrancy at file %s line %ld\n", 306 pp->pr_wchan, file, line); 307 printf(" previous entry at file %s line %ld\n", 308 pp->pr_entered_file, pp->pr_entered_line); 309 panic("pr_enter"); 310 } 311 312 pp->pr_entered_file = file; 313 pp->pr_entered_line = line; 314 } 315 316 static inline void 317 pr_leave(struct pool *pp) 318 { 319 320 if (__predict_false(pp->pr_entered_file == NULL)) { 321 printf("pool %s not entered?\n", pp->pr_wchan); 322 panic("pr_leave"); 323 } 324 325 pp->pr_entered_file = NULL; 326 pp->pr_entered_line = 0; 327 } 328 329 static inline void 330 pr_enter_check(struct pool *pp, void (*pr)(const char *, ...)) 331 { 332 333 if (pp->pr_entered_file != NULL) 334 (*pr)("\n\tcurrently entered from file %s line %ld\n", 335 pp->pr_entered_file, pp->pr_entered_line); 336 } 337 #else 338 #define pr_log(pp, v, action, file, line) 339 #define pr_printlog(pp, pi, pr) 340 #define pr_enter(pp, file, line) 341 #define pr_leave(pp) 342 #define pr_enter_check(pp, pr) 343 #endif /* POOL_DIAGNOSTIC */ 344 345 static inline unsigned int 346 pr_item_notouch_index(const struct pool *pp, const struct pool_item_header *ph, 347 const void *v) 348 { 349 const char *cp = v; 350 unsigned int idx; 351 352 KASSERT(pp->pr_roflags & PR_NOTOUCH); 353 idx = (cp - (char *)ph->ph_page - ph->ph_off) / pp->pr_size; 354 KASSERT(idx < pp->pr_itemsperpage); 355 return idx; 356 } 357 358 static inline void 359 pr_item_notouch_put(const struct pool *pp, struct pool_item_header *ph, 360 void *obj) 361 { 362 unsigned int idx = pr_item_notouch_index(pp, ph, obj); 363 pool_item_bitmap_t *bitmap = ph->ph_bitmap + (idx / BITMAP_SIZE); 364 pool_item_bitmap_t mask = 1 << (idx & BITMAP_MASK); 365 366 KASSERT((*bitmap & mask) == 0); 367 *bitmap |= mask; 368 } 369 370 static inline void * 371 pr_item_notouch_get(const struct pool *pp, struct pool_item_header *ph) 372 { 373 pool_item_bitmap_t *bitmap = ph->ph_bitmap; 374 unsigned int idx; 375 int i; 376 377 for (i = 0; ; i++) { 378 int bit; 379 380 KASSERT((i * BITMAP_SIZE) < pp->pr_itemsperpage); 381 bit = ffs32(bitmap[i]); 382 if (bit) { 383 pool_item_bitmap_t mask; 384 385 bit--; 386 idx = (i * BITMAP_SIZE) + bit; 387 mask = 1 << bit; 388 KASSERT((bitmap[i] & mask) != 0); 389 bitmap[i] &= ~mask; 390 break; 391 } 392 } 393 KASSERT(idx < pp->pr_itemsperpage); 394 return (char *)ph->ph_page + ph->ph_off + idx * pp->pr_size; 395 } 396 397 static inline void 398 pr_item_notouch_init(const struct pool *pp, struct pool_item_header *ph) 399 { 400 pool_item_bitmap_t *bitmap = ph->ph_bitmap; 401 const int n = howmany(pp->pr_itemsperpage, BITMAP_SIZE); 402 int i; 403 404 for (i = 0; i < n; i++) { 405 bitmap[i] = (pool_item_bitmap_t)-1; 406 } 407 } 408 409 static inline int 410 phtree_compare(struct pool_item_header *a, struct pool_item_header *b) 411 { 412 413 /* 414 * we consider pool_item_header with smaller ph_page bigger. 415 * (this unnatural ordering is for the benefit of pr_find_pagehead.) 416 */ 417 418 if (a->ph_page < b->ph_page) 419 return (1); 420 else if (a->ph_page > b->ph_page) 421 return (-1); 422 else 423 return (0); 424 } 425 426 SPLAY_PROTOTYPE(phtree, pool_item_header, ph_node, phtree_compare); 427 SPLAY_GENERATE(phtree, pool_item_header, ph_node, phtree_compare); 428 429 static inline struct pool_item_header * 430 pr_find_pagehead_noalign(struct pool *pp, void *v) 431 { 432 struct pool_item_header *ph, tmp; 433 434 tmp.ph_page = (void *)(uintptr_t)v; 435 ph = SPLAY_FIND(phtree, &pp->pr_phtree, &tmp); 436 if (ph == NULL) { 437 ph = SPLAY_ROOT(&pp->pr_phtree); 438 if (ph != NULL && phtree_compare(&tmp, ph) >= 0) { 439 ph = SPLAY_NEXT(phtree, &pp->pr_phtree, ph); 440 } 441 KASSERT(ph == NULL || phtree_compare(&tmp, ph) < 0); 442 } 443 444 return ph; 445 } 446 447 /* 448 * Return the pool page header based on item address. 449 */ 450 static inline struct pool_item_header * 451 pr_find_pagehead(struct pool *pp, void *v) 452 { 453 struct pool_item_header *ph, tmp; 454 455 if ((pp->pr_roflags & PR_NOALIGN) != 0) { 456 ph = pr_find_pagehead_noalign(pp, v); 457 } else { 458 void *page = 459 (void *)((uintptr_t)v & pp->pr_alloc->pa_pagemask); 460 461 if ((pp->pr_roflags & PR_PHINPAGE) != 0) { 462 ph = (struct pool_item_header *)((char *)page + pp->pr_phoffset); 463 } else { 464 tmp.ph_page = page; 465 ph = SPLAY_FIND(phtree, &pp->pr_phtree, &tmp); 466 } 467 } 468 469 KASSERT(ph == NULL || ((pp->pr_roflags & PR_PHINPAGE) != 0) || 470 ((char *)ph->ph_page <= (char *)v && 471 (char *)v < (char *)ph->ph_page + pp->pr_alloc->pa_pagesz)); 472 return ph; 473 } 474 475 static void 476 pr_pagelist_free(struct pool *pp, struct pool_pagelist *pq) 477 { 478 struct pool_item_header *ph; 479 480 while ((ph = LIST_FIRST(pq)) != NULL) { 481 LIST_REMOVE(ph, ph_pagelist); 482 pool_allocator_free(pp, ph->ph_page); 483 if ((pp->pr_roflags & PR_PHINPAGE) == 0) 484 pool_put(pp->pr_phpool, ph); 485 } 486 } 487 488 /* 489 * Remove a page from the pool. 490 */ 491 static inline void 492 pr_rmpage(struct pool *pp, struct pool_item_header *ph, 493 struct pool_pagelist *pq) 494 { 495 496 KASSERT(mutex_owned(&pp->pr_lock)); 497 498 /* 499 * If the page was idle, decrement the idle page count. 500 */ 501 if (ph->ph_nmissing == 0) { 502 #ifdef DIAGNOSTIC 503 if (pp->pr_nidle == 0) 504 panic("pr_rmpage: nidle inconsistent"); 505 if (pp->pr_nitems < pp->pr_itemsperpage) 506 panic("pr_rmpage: nitems inconsistent"); 507 #endif 508 pp->pr_nidle--; 509 } 510 511 pp->pr_nitems -= pp->pr_itemsperpage; 512 513 /* 514 * Unlink the page from the pool and queue it for release. 515 */ 516 LIST_REMOVE(ph, ph_pagelist); 517 if ((pp->pr_roflags & PR_PHINPAGE) == 0) 518 SPLAY_REMOVE(phtree, &pp->pr_phtree, ph); 519 LIST_INSERT_HEAD(pq, ph, ph_pagelist); 520 521 pp->pr_npages--; 522 pp->pr_npagefree++; 523 524 pool_update_curpage(pp); 525 } 526 527 static bool 528 pa_starved_p(struct pool_allocator *pa) 529 { 530 531 if (pa->pa_backingmap != NULL) { 532 return vm_map_starved_p(pa->pa_backingmap); 533 } 534 return false; 535 } 536 537 static int 538 pool_reclaim_callback(struct callback_entry *ce, void *obj, void *arg) 539 { 540 struct pool *pp = obj; 541 struct pool_allocator *pa = pp->pr_alloc; 542 543 KASSERT(&pp->pr_reclaimerentry == ce); 544 pool_reclaim(pp); 545 if (!pa_starved_p(pa)) { 546 return CALLBACK_CHAIN_ABORT; 547 } 548 return CALLBACK_CHAIN_CONTINUE; 549 } 550 551 static void 552 pool_reclaim_register(struct pool *pp) 553 { 554 struct vm_map *map = pp->pr_alloc->pa_backingmap; 555 int s; 556 557 if (map == NULL) { 558 return; 559 } 560 561 s = splvm(); /* not necessary for INTRSAFE maps, but don't care. */ 562 callback_register(&vm_map_to_kernel(map)->vmk_reclaim_callback, 563 &pp->pr_reclaimerentry, pp, pool_reclaim_callback); 564 splx(s); 565 } 566 567 static void 568 pool_reclaim_unregister(struct pool *pp) 569 { 570 struct vm_map *map = pp->pr_alloc->pa_backingmap; 571 int s; 572 573 if (map == NULL) { 574 return; 575 } 576 577 s = splvm(); /* not necessary for INTRSAFE maps, but don't care. */ 578 callback_unregister(&vm_map_to_kernel(map)->vmk_reclaim_callback, 579 &pp->pr_reclaimerentry); 580 splx(s); 581 } 582 583 static void 584 pa_reclaim_register(struct pool_allocator *pa) 585 { 586 struct vm_map *map = *pa->pa_backingmapptr; 587 struct pool *pp; 588 589 KASSERT(pa->pa_backingmap == NULL); 590 if (map == NULL) { 591 SLIST_INSERT_HEAD(&pa_deferinitq, pa, pa_q); 592 return; 593 } 594 pa->pa_backingmap = map; 595 TAILQ_FOREACH(pp, &pa->pa_list, pr_alloc_list) { 596 pool_reclaim_register(pp); 597 } 598 } 599 600 /* 601 * Initialize all the pools listed in the "pools" link set. 602 */ 603 void 604 pool_subsystem_init(void) 605 { 606 struct pool_allocator *pa; 607 608 mutex_init(&pool_head_lock, MUTEX_DEFAULT, IPL_NONE); 609 mutex_init(&pool_allocator_lock, MUTEX_DEFAULT, IPL_NONE); 610 cv_init(&pool_busy, "poolbusy"); 611 612 while ((pa = SLIST_FIRST(&pa_deferinitq)) != NULL) { 613 KASSERT(pa->pa_backingmapptr != NULL); 614 KASSERT(*pa->pa_backingmapptr != NULL); 615 SLIST_REMOVE_HEAD(&pa_deferinitq, pa_q); 616 pa_reclaim_register(pa); 617 } 618 619 pool_init(&cache_pool, sizeof(struct pool_cache), coherency_unit, 620 0, 0, "pcache", &pool_allocator_nointr, IPL_NONE); 621 622 pool_init(&cache_cpu_pool, sizeof(pool_cache_cpu_t), coherency_unit, 623 0, 0, "pcachecpu", &pool_allocator_nointr, IPL_NONE); 624 } 625 626 /* 627 * Initialize the given pool resource structure. 628 * 629 * We export this routine to allow other kernel parts to declare 630 * static pools that must be initialized before malloc() is available. 631 */ 632 void 633 pool_init(struct pool *pp, size_t size, u_int align, u_int ioff, int flags, 634 const char *wchan, struct pool_allocator *palloc, int ipl) 635 { 636 struct pool *pp1; 637 size_t trysize, phsize; 638 int off, slack; 639 640 #ifdef DEBUG 641 /* 642 * Check that the pool hasn't already been initialised and 643 * added to the list of all pools. 644 */ 645 TAILQ_FOREACH(pp1, &pool_head, pr_poollist) { 646 if (pp == pp1) 647 panic("pool_init: pool %s already initialised", 648 wchan); 649 } 650 #endif 651 652 #ifdef POOL_DIAGNOSTIC 653 /* 654 * Always log if POOL_DIAGNOSTIC is defined. 655 */ 656 if (pool_logsize != 0) 657 flags |= PR_LOGGING; 658 #endif 659 660 if (palloc == NULL) 661 palloc = &pool_allocator_kmem; 662 #ifdef POOL_SUBPAGE 663 if (size > palloc->pa_pagesz) { 664 if (palloc == &pool_allocator_kmem) 665 palloc = &pool_allocator_kmem_fullpage; 666 else if (palloc == &pool_allocator_nointr) 667 palloc = &pool_allocator_nointr_fullpage; 668 } 669 #endif /* POOL_SUBPAGE */ 670 if (!cold) 671 mutex_enter(&pool_allocator_lock); 672 if (palloc->pa_refcnt++ == 0) { 673 if (palloc->pa_pagesz == 0) 674 palloc->pa_pagesz = PAGE_SIZE; 675 676 TAILQ_INIT(&palloc->pa_list); 677 678 mutex_init(&palloc->pa_lock, MUTEX_DEFAULT, IPL_VM); 679 palloc->pa_pagemask = ~(palloc->pa_pagesz - 1); 680 palloc->pa_pageshift = ffs(palloc->pa_pagesz) - 1; 681 682 if (palloc->pa_backingmapptr != NULL) { 683 pa_reclaim_register(palloc); 684 } 685 } 686 if (!cold) 687 mutex_exit(&pool_allocator_lock); 688 689 if (align == 0) 690 align = ALIGN(1); 691 692 if ((flags & PR_NOTOUCH) == 0 && size < sizeof(struct pool_item)) 693 size = sizeof(struct pool_item); 694 695 size = roundup(size, align); 696 #ifdef DIAGNOSTIC 697 if (size > palloc->pa_pagesz) 698 panic("pool_init: pool item size (%zu) too large", size); 699 #endif 700 701 /* 702 * Initialize the pool structure. 703 */ 704 LIST_INIT(&pp->pr_emptypages); 705 LIST_INIT(&pp->pr_fullpages); 706 LIST_INIT(&pp->pr_partpages); 707 pp->pr_cache = NULL; 708 pp->pr_curpage = NULL; 709 pp->pr_npages = 0; 710 pp->pr_minitems = 0; 711 pp->pr_minpages = 0; 712 pp->pr_maxpages = UINT_MAX; 713 pp->pr_roflags = flags; 714 pp->pr_flags = 0; 715 pp->pr_size = size; 716 pp->pr_align = align; 717 pp->pr_wchan = wchan; 718 pp->pr_alloc = palloc; 719 pp->pr_nitems = 0; 720 pp->pr_nout = 0; 721 pp->pr_hardlimit = UINT_MAX; 722 pp->pr_hardlimit_warning = NULL; 723 pp->pr_hardlimit_ratecap.tv_sec = 0; 724 pp->pr_hardlimit_ratecap.tv_usec = 0; 725 pp->pr_hardlimit_warning_last.tv_sec = 0; 726 pp->pr_hardlimit_warning_last.tv_usec = 0; 727 pp->pr_drain_hook = NULL; 728 pp->pr_drain_hook_arg = NULL; 729 pp->pr_freecheck = NULL; 730 731 /* 732 * Decide whether to put the page header off page to avoid 733 * wasting too large a part of the page or too big item. 734 * Off-page page headers go on a hash table, so we can match 735 * a returned item with its header based on the page address. 736 * We use 1/16 of the page size and about 8 times of the item 737 * size as the threshold (XXX: tune) 738 * 739 * However, we'll put the header into the page if we can put 740 * it without wasting any items. 741 * 742 * Silently enforce `0 <= ioff < align'. 743 */ 744 pp->pr_itemoffset = ioff %= align; 745 /* See the comment below about reserved bytes. */ 746 trysize = palloc->pa_pagesz - ((align - ioff) % align); 747 phsize = ALIGN(sizeof(struct pool_item_header)); 748 if ((pp->pr_roflags & (PR_NOTOUCH | PR_NOALIGN)) == 0 && 749 (pp->pr_size < MIN(palloc->pa_pagesz / 16, phsize << 3) || 750 trysize / pp->pr_size == (trysize - phsize) / pp->pr_size)) { 751 /* Use the end of the page for the page header */ 752 pp->pr_roflags |= PR_PHINPAGE; 753 pp->pr_phoffset = off = palloc->pa_pagesz - phsize; 754 } else { 755 /* The page header will be taken from our page header pool */ 756 pp->pr_phoffset = 0; 757 off = palloc->pa_pagesz; 758 SPLAY_INIT(&pp->pr_phtree); 759 } 760 761 /* 762 * Alignment is to take place at `ioff' within the item. This means 763 * we must reserve up to `align - 1' bytes on the page to allow 764 * appropriate positioning of each item. 765 */ 766 pp->pr_itemsperpage = (off - ((align - ioff) % align)) / pp->pr_size; 767 KASSERT(pp->pr_itemsperpage != 0); 768 if ((pp->pr_roflags & PR_NOTOUCH)) { 769 int idx; 770 771 for (idx = 0; pp->pr_itemsperpage > PHPOOL_FREELIST_NELEM(idx); 772 idx++) { 773 /* nothing */ 774 } 775 if (idx >= PHPOOL_MAX) { 776 /* 777 * if you see this panic, consider to tweak 778 * PHPOOL_MAX and PHPOOL_FREELIST_NELEM. 779 */ 780 panic("%s: too large itemsperpage(%d) for PR_NOTOUCH", 781 pp->pr_wchan, pp->pr_itemsperpage); 782 } 783 pp->pr_phpool = &phpool[idx]; 784 } else if ((pp->pr_roflags & PR_PHINPAGE) == 0) { 785 pp->pr_phpool = &phpool[0]; 786 } 787 #if defined(DIAGNOSTIC) 788 else { 789 pp->pr_phpool = NULL; 790 } 791 #endif 792 793 /* 794 * Use the slack between the chunks and the page header 795 * for "cache coloring". 796 */ 797 slack = off - pp->pr_itemsperpage * pp->pr_size; 798 pp->pr_maxcolor = (slack / align) * align; 799 pp->pr_curcolor = 0; 800 801 pp->pr_nget = 0; 802 pp->pr_nfail = 0; 803 pp->pr_nput = 0; 804 pp->pr_npagealloc = 0; 805 pp->pr_npagefree = 0; 806 pp->pr_hiwat = 0; 807 pp->pr_nidle = 0; 808 pp->pr_refcnt = 0; 809 810 pp->pr_log = NULL; 811 812 pp->pr_entered_file = NULL; 813 pp->pr_entered_line = 0; 814 815 mutex_init(&pp->pr_lock, MUTEX_DEFAULT, ipl); 816 cv_init(&pp->pr_cv, wchan); 817 pp->pr_ipl = ipl; 818 819 /* 820 * Initialize private page header pool and cache magazine pool if we 821 * haven't done so yet. 822 * XXX LOCKING. 823 */ 824 if (phpool[0].pr_size == 0) { 825 int idx; 826 for (idx = 0; idx < PHPOOL_MAX; idx++) { 827 static char phpool_names[PHPOOL_MAX][6+1+6+1]; 828 int nelem; 829 size_t sz; 830 831 nelem = PHPOOL_FREELIST_NELEM(idx); 832 snprintf(phpool_names[idx], sizeof(phpool_names[idx]), 833 "phpool-%d", nelem); 834 sz = sizeof(struct pool_item_header); 835 if (nelem) { 836 sz = offsetof(struct pool_item_header, 837 ph_bitmap[howmany(nelem, BITMAP_SIZE)]); 838 } 839 pool_init(&phpool[idx], sz, 0, 0, 0, 840 phpool_names[idx], &pool_allocator_meta, IPL_VM); 841 } 842 #ifdef POOL_SUBPAGE 843 pool_init(&psppool, POOL_SUBPAGE, POOL_SUBPAGE, 0, 844 PR_RECURSIVE, "psppool", &pool_allocator_meta, IPL_VM); 845 #endif 846 847 size = sizeof(pcg_t) + 848 (PCG_NOBJECTS_NORMAL - 1) * sizeof(pcgpair_t); 849 pool_init(&pcg_normal_pool, size, coherency_unit, 0, 0, 850 "pcgnormal", &pool_allocator_meta, IPL_VM); 851 852 size = sizeof(pcg_t) + 853 (PCG_NOBJECTS_LARGE - 1) * sizeof(pcgpair_t); 854 pool_init(&pcg_large_pool, size, coherency_unit, 0, 0, 855 "pcglarge", &pool_allocator_meta, IPL_VM); 856 } 857 858 /* Insert into the list of all pools. */ 859 if (!cold) 860 mutex_enter(&pool_head_lock); 861 TAILQ_FOREACH(pp1, &pool_head, pr_poollist) { 862 if (strcmp(pp1->pr_wchan, pp->pr_wchan) > 0) 863 break; 864 } 865 if (pp1 == NULL) 866 TAILQ_INSERT_TAIL(&pool_head, pp, pr_poollist); 867 else 868 TAILQ_INSERT_BEFORE(pp1, pp, pr_poollist); 869 if (!cold) 870 mutex_exit(&pool_head_lock); 871 872 /* Insert this into the list of pools using this allocator. */ 873 if (!cold) 874 mutex_enter(&palloc->pa_lock); 875 TAILQ_INSERT_TAIL(&palloc->pa_list, pp, pr_alloc_list); 876 if (!cold) 877 mutex_exit(&palloc->pa_lock); 878 879 pool_reclaim_register(pp); 880 } 881 882 /* 883 * De-commision a pool resource. 884 */ 885 void 886 pool_destroy(struct pool *pp) 887 { 888 struct pool_pagelist pq; 889 struct pool_item_header *ph; 890 891 /* Remove from global pool list */ 892 mutex_enter(&pool_head_lock); 893 while (pp->pr_refcnt != 0) 894 cv_wait(&pool_busy, &pool_head_lock); 895 TAILQ_REMOVE(&pool_head, pp, pr_poollist); 896 if (drainpp == pp) 897 drainpp = NULL; 898 mutex_exit(&pool_head_lock); 899 900 /* Remove this pool from its allocator's list of pools. */ 901 pool_reclaim_unregister(pp); 902 mutex_enter(&pp->pr_alloc->pa_lock); 903 TAILQ_REMOVE(&pp->pr_alloc->pa_list, pp, pr_alloc_list); 904 mutex_exit(&pp->pr_alloc->pa_lock); 905 906 mutex_enter(&pool_allocator_lock); 907 if (--pp->pr_alloc->pa_refcnt == 0) 908 mutex_destroy(&pp->pr_alloc->pa_lock); 909 mutex_exit(&pool_allocator_lock); 910 911 mutex_enter(&pp->pr_lock); 912 913 KASSERT(pp->pr_cache == NULL); 914 915 #ifdef DIAGNOSTIC 916 if (pp->pr_nout != 0) { 917 pr_printlog(pp, NULL, printf); 918 panic("pool_destroy: pool busy: still out: %u", 919 pp->pr_nout); 920 } 921 #endif 922 923 KASSERT(LIST_EMPTY(&pp->pr_fullpages)); 924 KASSERT(LIST_EMPTY(&pp->pr_partpages)); 925 926 /* Remove all pages */ 927 LIST_INIT(&pq); 928 while ((ph = LIST_FIRST(&pp->pr_emptypages)) != NULL) 929 pr_rmpage(pp, ph, &pq); 930 931 mutex_exit(&pp->pr_lock); 932 933 pr_pagelist_free(pp, &pq); 934 935 #ifdef POOL_DIAGNOSTIC 936 if (pp->pr_log != NULL) { 937 free(pp->pr_log, M_TEMP); 938 pp->pr_log = NULL; 939 } 940 #endif 941 942 cv_destroy(&pp->pr_cv); 943 mutex_destroy(&pp->pr_lock); 944 } 945 946 void 947 pool_set_drain_hook(struct pool *pp, void (*fn)(void *, int), void *arg) 948 { 949 950 /* XXX no locking -- must be used just after pool_init() */ 951 #ifdef DIAGNOSTIC 952 if (pp->pr_drain_hook != NULL) 953 panic("pool_set_drain_hook(%s): already set", pp->pr_wchan); 954 #endif 955 pp->pr_drain_hook = fn; 956 pp->pr_drain_hook_arg = arg; 957 } 958 959 static struct pool_item_header * 960 pool_alloc_item_header(struct pool *pp, void *storage, int flags) 961 { 962 struct pool_item_header *ph; 963 964 if ((pp->pr_roflags & PR_PHINPAGE) != 0) 965 ph = (struct pool_item_header *) ((char *)storage + pp->pr_phoffset); 966 else 967 ph = pool_get(pp->pr_phpool, flags); 968 969 return (ph); 970 } 971 972 /* 973 * Grab an item from the pool. 974 */ 975 void * 976 #ifdef POOL_DIAGNOSTIC 977 _pool_get(struct pool *pp, int flags, const char *file, long line) 978 #else 979 pool_get(struct pool *pp, int flags) 980 #endif 981 { 982 struct pool_item *pi; 983 struct pool_item_header *ph; 984 void *v; 985 986 #ifdef DIAGNOSTIC 987 if (__predict_false(pp->pr_itemsperpage == 0)) 988 panic("pool_get: pool %p: pr_itemsperpage is zero, " 989 "pool not initialized?", pp); 990 if (__predict_false(curlwp == NULL && doing_shutdown == 0 && 991 (flags & PR_WAITOK) != 0)) 992 panic("pool_get: %s: must have NOWAIT", pp->pr_wchan); 993 994 #endif /* DIAGNOSTIC */ 995 #ifdef LOCKDEBUG 996 if (flags & PR_WAITOK) { 997 ASSERT_SLEEPABLE(); 998 } 999 #endif 1000 1001 mutex_enter(&pp->pr_lock); 1002 pr_enter(pp, file, line); 1003 1004 startover: 1005 /* 1006 * Check to see if we've reached the hard limit. If we have, 1007 * and we can wait, then wait until an item has been returned to 1008 * the pool. 1009 */ 1010 #ifdef DIAGNOSTIC 1011 if (__predict_false(pp->pr_nout > pp->pr_hardlimit)) { 1012 pr_leave(pp); 1013 mutex_exit(&pp->pr_lock); 1014 panic("pool_get: %s: crossed hard limit", pp->pr_wchan); 1015 } 1016 #endif 1017 if (__predict_false(pp->pr_nout == pp->pr_hardlimit)) { 1018 if (pp->pr_drain_hook != NULL) { 1019 /* 1020 * Since the drain hook is going to free things 1021 * back to the pool, unlock, call the hook, re-lock, 1022 * and check the hardlimit condition again. 1023 */ 1024 pr_leave(pp); 1025 mutex_exit(&pp->pr_lock); 1026 (*pp->pr_drain_hook)(pp->pr_drain_hook_arg, flags); 1027 mutex_enter(&pp->pr_lock); 1028 pr_enter(pp, file, line); 1029 if (pp->pr_nout < pp->pr_hardlimit) 1030 goto startover; 1031 } 1032 1033 if ((flags & PR_WAITOK) && !(flags & PR_LIMITFAIL)) { 1034 /* 1035 * XXX: A warning isn't logged in this case. Should 1036 * it be? 1037 */ 1038 pp->pr_flags |= PR_WANTED; 1039 pr_leave(pp); 1040 cv_wait(&pp->pr_cv, &pp->pr_lock); 1041 pr_enter(pp, file, line); 1042 goto startover; 1043 } 1044 1045 /* 1046 * Log a message that the hard limit has been hit. 1047 */ 1048 if (pp->pr_hardlimit_warning != NULL && 1049 ratecheck(&pp->pr_hardlimit_warning_last, 1050 &pp->pr_hardlimit_ratecap)) 1051 log(LOG_ERR, "%s\n", pp->pr_hardlimit_warning); 1052 1053 pp->pr_nfail++; 1054 1055 pr_leave(pp); 1056 mutex_exit(&pp->pr_lock); 1057 return (NULL); 1058 } 1059 1060 /* 1061 * The convention we use is that if `curpage' is not NULL, then 1062 * it points at a non-empty bucket. In particular, `curpage' 1063 * never points at a page header which has PR_PHINPAGE set and 1064 * has no items in its bucket. 1065 */ 1066 if ((ph = pp->pr_curpage) == NULL) { 1067 int error; 1068 1069 #ifdef DIAGNOSTIC 1070 if (pp->pr_nitems != 0) { 1071 mutex_exit(&pp->pr_lock); 1072 printf("pool_get: %s: curpage NULL, nitems %u\n", 1073 pp->pr_wchan, pp->pr_nitems); 1074 panic("pool_get: nitems inconsistent"); 1075 } 1076 #endif 1077 1078 /* 1079 * Call the back-end page allocator for more memory. 1080 * Release the pool lock, as the back-end page allocator 1081 * may block. 1082 */ 1083 pr_leave(pp); 1084 error = pool_grow(pp, flags); 1085 pr_enter(pp, file, line); 1086 if (error != 0) { 1087 /* 1088 * We were unable to allocate a page or item 1089 * header, but we released the lock during 1090 * allocation, so perhaps items were freed 1091 * back to the pool. Check for this case. 1092 */ 1093 if (pp->pr_curpage != NULL) 1094 goto startover; 1095 1096 pp->pr_nfail++; 1097 pr_leave(pp); 1098 mutex_exit(&pp->pr_lock); 1099 return (NULL); 1100 } 1101 1102 /* Start the allocation process over. */ 1103 goto startover; 1104 } 1105 if (pp->pr_roflags & PR_NOTOUCH) { 1106 #ifdef DIAGNOSTIC 1107 if (__predict_false(ph->ph_nmissing == pp->pr_itemsperpage)) { 1108 pr_leave(pp); 1109 mutex_exit(&pp->pr_lock); 1110 panic("pool_get: %s: page empty", pp->pr_wchan); 1111 } 1112 #endif 1113 v = pr_item_notouch_get(pp, ph); 1114 #ifdef POOL_DIAGNOSTIC 1115 pr_log(pp, v, PRLOG_GET, file, line); 1116 #endif 1117 } else { 1118 v = pi = LIST_FIRST(&ph->ph_itemlist); 1119 if (__predict_false(v == NULL)) { 1120 pr_leave(pp); 1121 mutex_exit(&pp->pr_lock); 1122 panic("pool_get: %s: page empty", pp->pr_wchan); 1123 } 1124 #ifdef DIAGNOSTIC 1125 if (__predict_false(pp->pr_nitems == 0)) { 1126 pr_leave(pp); 1127 mutex_exit(&pp->pr_lock); 1128 printf("pool_get: %s: items on itemlist, nitems %u\n", 1129 pp->pr_wchan, pp->pr_nitems); 1130 panic("pool_get: nitems inconsistent"); 1131 } 1132 #endif 1133 1134 #ifdef POOL_DIAGNOSTIC 1135 pr_log(pp, v, PRLOG_GET, file, line); 1136 #endif 1137 1138 #ifdef DIAGNOSTIC 1139 if (__predict_false(pi->pi_magic != PI_MAGIC)) { 1140 pr_printlog(pp, pi, printf); 1141 panic("pool_get(%s): free list modified: " 1142 "magic=%x; page %p; item addr %p\n", 1143 pp->pr_wchan, pi->pi_magic, ph->ph_page, pi); 1144 } 1145 #endif 1146 1147 /* 1148 * Remove from item list. 1149 */ 1150 LIST_REMOVE(pi, pi_list); 1151 } 1152 pp->pr_nitems--; 1153 pp->pr_nout++; 1154 if (ph->ph_nmissing == 0) { 1155 #ifdef DIAGNOSTIC 1156 if (__predict_false(pp->pr_nidle == 0)) 1157 panic("pool_get: nidle inconsistent"); 1158 #endif 1159 pp->pr_nidle--; 1160 1161 /* 1162 * This page was previously empty. Move it to the list of 1163 * partially-full pages. This page is already curpage. 1164 */ 1165 LIST_REMOVE(ph, ph_pagelist); 1166 LIST_INSERT_HEAD(&pp->pr_partpages, ph, ph_pagelist); 1167 } 1168 ph->ph_nmissing++; 1169 if (ph->ph_nmissing == pp->pr_itemsperpage) { 1170 #ifdef DIAGNOSTIC 1171 if (__predict_false((pp->pr_roflags & PR_NOTOUCH) == 0 && 1172 !LIST_EMPTY(&ph->ph_itemlist))) { 1173 pr_leave(pp); 1174 mutex_exit(&pp->pr_lock); 1175 panic("pool_get: %s: nmissing inconsistent", 1176 pp->pr_wchan); 1177 } 1178 #endif 1179 /* 1180 * This page is now full. Move it to the full list 1181 * and select a new current page. 1182 */ 1183 LIST_REMOVE(ph, ph_pagelist); 1184 LIST_INSERT_HEAD(&pp->pr_fullpages, ph, ph_pagelist); 1185 pool_update_curpage(pp); 1186 } 1187 1188 pp->pr_nget++; 1189 pr_leave(pp); 1190 1191 /* 1192 * If we have a low water mark and we are now below that low 1193 * water mark, add more items to the pool. 1194 */ 1195 if (POOL_NEEDS_CATCHUP(pp) && pool_catchup(pp) != 0) { 1196 /* 1197 * XXX: Should we log a warning? Should we set up a timeout 1198 * to try again in a second or so? The latter could break 1199 * a caller's assumptions about interrupt protection, etc. 1200 */ 1201 } 1202 1203 mutex_exit(&pp->pr_lock); 1204 KASSERT((((vaddr_t)v + pp->pr_itemoffset) & (pp->pr_align - 1)) == 0); 1205 FREECHECK_OUT(&pp->pr_freecheck, v); 1206 return (v); 1207 } 1208 1209 /* 1210 * Internal version of pool_put(). Pool is already locked/entered. 1211 */ 1212 static void 1213 pool_do_put(struct pool *pp, void *v, struct pool_pagelist *pq) 1214 { 1215 struct pool_item *pi = v; 1216 struct pool_item_header *ph; 1217 1218 KASSERT(mutex_owned(&pp->pr_lock)); 1219 FREECHECK_IN(&pp->pr_freecheck, v); 1220 LOCKDEBUG_MEM_CHECK(v, pp->pr_size); 1221 1222 #ifdef DIAGNOSTIC 1223 if (__predict_false(pp->pr_nout == 0)) { 1224 printf("pool %s: putting with none out\n", 1225 pp->pr_wchan); 1226 panic("pool_put"); 1227 } 1228 #endif 1229 1230 if (__predict_false((ph = pr_find_pagehead(pp, v)) == NULL)) { 1231 pr_printlog(pp, NULL, printf); 1232 panic("pool_put: %s: page header missing", pp->pr_wchan); 1233 } 1234 1235 /* 1236 * Return to item list. 1237 */ 1238 if (pp->pr_roflags & PR_NOTOUCH) { 1239 pr_item_notouch_put(pp, ph, v); 1240 } else { 1241 #ifdef DIAGNOSTIC 1242 pi->pi_magic = PI_MAGIC; 1243 #endif 1244 #ifdef DEBUG 1245 { 1246 int i, *ip = v; 1247 1248 for (i = 0; i < pp->pr_size / sizeof(int); i++) { 1249 *ip++ = PI_MAGIC; 1250 } 1251 } 1252 #endif 1253 1254 LIST_INSERT_HEAD(&ph->ph_itemlist, pi, pi_list); 1255 } 1256 KDASSERT(ph->ph_nmissing != 0); 1257 ph->ph_nmissing--; 1258 pp->pr_nput++; 1259 pp->pr_nitems++; 1260 pp->pr_nout--; 1261 1262 /* Cancel "pool empty" condition if it exists */ 1263 if (pp->pr_curpage == NULL) 1264 pp->pr_curpage = ph; 1265 1266 if (pp->pr_flags & PR_WANTED) { 1267 pp->pr_flags &= ~PR_WANTED; 1268 cv_broadcast(&pp->pr_cv); 1269 } 1270 1271 /* 1272 * If this page is now empty, do one of two things: 1273 * 1274 * (1) If we have more pages than the page high water mark, 1275 * free the page back to the system. ONLY CONSIDER 1276 * FREEING BACK A PAGE IF WE HAVE MORE THAN OUR MINIMUM PAGE 1277 * CLAIM. 1278 * 1279 * (2) Otherwise, move the page to the empty page list. 1280 * 1281 * Either way, select a new current page (so we use a partially-full 1282 * page if one is available). 1283 */ 1284 if (ph->ph_nmissing == 0) { 1285 pp->pr_nidle++; 1286 if (pp->pr_npages > pp->pr_minpages && 1287 pp->pr_npages > pp->pr_maxpages) { 1288 pr_rmpage(pp, ph, pq); 1289 } else { 1290 LIST_REMOVE(ph, ph_pagelist); 1291 LIST_INSERT_HEAD(&pp->pr_emptypages, ph, ph_pagelist); 1292 1293 /* 1294 * Update the timestamp on the page. A page must 1295 * be idle for some period of time before it can 1296 * be reclaimed by the pagedaemon. This minimizes 1297 * ping-pong'ing for memory. 1298 * 1299 * note for 64-bit time_t: truncating to 32-bit is not 1300 * a problem for our usage. 1301 */ 1302 ph->ph_time = time_uptime; 1303 } 1304 pool_update_curpage(pp); 1305 } 1306 1307 /* 1308 * If the page was previously completely full, move it to the 1309 * partially-full list and make it the current page. The next 1310 * allocation will get the item from this page, instead of 1311 * further fragmenting the pool. 1312 */ 1313 else if (ph->ph_nmissing == (pp->pr_itemsperpage - 1)) { 1314 LIST_REMOVE(ph, ph_pagelist); 1315 LIST_INSERT_HEAD(&pp->pr_partpages, ph, ph_pagelist); 1316 pp->pr_curpage = ph; 1317 } 1318 } 1319 1320 /* 1321 * Return resource to the pool. 1322 */ 1323 #ifdef POOL_DIAGNOSTIC 1324 void 1325 _pool_put(struct pool *pp, void *v, const char *file, long line) 1326 { 1327 struct pool_pagelist pq; 1328 1329 LIST_INIT(&pq); 1330 1331 mutex_enter(&pp->pr_lock); 1332 pr_enter(pp, file, line); 1333 1334 pr_log(pp, v, PRLOG_PUT, file, line); 1335 1336 pool_do_put(pp, v, &pq); 1337 1338 pr_leave(pp); 1339 mutex_exit(&pp->pr_lock); 1340 1341 pr_pagelist_free(pp, &pq); 1342 } 1343 #undef pool_put 1344 #endif /* POOL_DIAGNOSTIC */ 1345 1346 void 1347 pool_put(struct pool *pp, void *v) 1348 { 1349 struct pool_pagelist pq; 1350 1351 LIST_INIT(&pq); 1352 1353 mutex_enter(&pp->pr_lock); 1354 pool_do_put(pp, v, &pq); 1355 mutex_exit(&pp->pr_lock); 1356 1357 pr_pagelist_free(pp, &pq); 1358 } 1359 1360 #ifdef POOL_DIAGNOSTIC 1361 #define pool_put(h, v) _pool_put((h), (v), __FILE__, __LINE__) 1362 #endif 1363 1364 /* 1365 * pool_grow: grow a pool by a page. 1366 * 1367 * => called with pool locked. 1368 * => unlock and relock the pool. 1369 * => return with pool locked. 1370 */ 1371 1372 static int 1373 pool_grow(struct pool *pp, int flags) 1374 { 1375 struct pool_item_header *ph = NULL; 1376 char *cp; 1377 1378 mutex_exit(&pp->pr_lock); 1379 cp = pool_allocator_alloc(pp, flags); 1380 if (__predict_true(cp != NULL)) { 1381 ph = pool_alloc_item_header(pp, cp, flags); 1382 } 1383 if (__predict_false(cp == NULL || ph == NULL)) { 1384 if (cp != NULL) { 1385 pool_allocator_free(pp, cp); 1386 } 1387 mutex_enter(&pp->pr_lock); 1388 return ENOMEM; 1389 } 1390 1391 mutex_enter(&pp->pr_lock); 1392 pool_prime_page(pp, cp, ph); 1393 pp->pr_npagealloc++; 1394 return 0; 1395 } 1396 1397 /* 1398 * Add N items to the pool. 1399 */ 1400 int 1401 pool_prime(struct pool *pp, int n) 1402 { 1403 int newpages; 1404 int error = 0; 1405 1406 mutex_enter(&pp->pr_lock); 1407 1408 newpages = roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 1409 1410 while (newpages-- > 0) { 1411 error = pool_grow(pp, PR_NOWAIT); 1412 if (error) { 1413 break; 1414 } 1415 pp->pr_minpages++; 1416 } 1417 1418 if (pp->pr_minpages >= pp->pr_maxpages) 1419 pp->pr_maxpages = pp->pr_minpages + 1; /* XXX */ 1420 1421 mutex_exit(&pp->pr_lock); 1422 return error; 1423 } 1424 1425 /* 1426 * Add a page worth of items to the pool. 1427 * 1428 * Note, we must be called with the pool descriptor LOCKED. 1429 */ 1430 static void 1431 pool_prime_page(struct pool *pp, void *storage, struct pool_item_header *ph) 1432 { 1433 struct pool_item *pi; 1434 void *cp = storage; 1435 const unsigned int align = pp->pr_align; 1436 const unsigned int ioff = pp->pr_itemoffset; 1437 int n; 1438 1439 KASSERT(mutex_owned(&pp->pr_lock)); 1440 1441 #ifdef DIAGNOSTIC 1442 if ((pp->pr_roflags & PR_NOALIGN) == 0 && 1443 ((uintptr_t)cp & (pp->pr_alloc->pa_pagesz - 1)) != 0) 1444 panic("pool_prime_page: %s: unaligned page", pp->pr_wchan); 1445 #endif 1446 1447 /* 1448 * Insert page header. 1449 */ 1450 LIST_INSERT_HEAD(&pp->pr_emptypages, ph, ph_pagelist); 1451 LIST_INIT(&ph->ph_itemlist); 1452 ph->ph_page = storage; 1453 ph->ph_nmissing = 0; 1454 ph->ph_time = time_uptime; 1455 if ((pp->pr_roflags & PR_PHINPAGE) == 0) 1456 SPLAY_INSERT(phtree, &pp->pr_phtree, ph); 1457 1458 pp->pr_nidle++; 1459 1460 /* 1461 * Color this page. 1462 */ 1463 ph->ph_off = pp->pr_curcolor; 1464 cp = (char *)cp + ph->ph_off; 1465 if ((pp->pr_curcolor += align) > pp->pr_maxcolor) 1466 pp->pr_curcolor = 0; 1467 1468 /* 1469 * Adjust storage to apply aligment to `pr_itemoffset' in each item. 1470 */ 1471 if (ioff != 0) 1472 cp = (char *)cp + align - ioff; 1473 1474 KASSERT((((vaddr_t)cp + ioff) & (align - 1)) == 0); 1475 1476 /* 1477 * Insert remaining chunks on the bucket list. 1478 */ 1479 n = pp->pr_itemsperpage; 1480 pp->pr_nitems += n; 1481 1482 if (pp->pr_roflags & PR_NOTOUCH) { 1483 pr_item_notouch_init(pp, ph); 1484 } else { 1485 while (n--) { 1486 pi = (struct pool_item *)cp; 1487 1488 KASSERT(((((vaddr_t)pi) + ioff) & (align - 1)) == 0); 1489 1490 /* Insert on page list */ 1491 LIST_INSERT_HEAD(&ph->ph_itemlist, pi, pi_list); 1492 #ifdef DIAGNOSTIC 1493 pi->pi_magic = PI_MAGIC; 1494 #endif 1495 cp = (char *)cp + pp->pr_size; 1496 1497 KASSERT((((vaddr_t)cp + ioff) & (align - 1)) == 0); 1498 } 1499 } 1500 1501 /* 1502 * If the pool was depleted, point at the new page. 1503 */ 1504 if (pp->pr_curpage == NULL) 1505 pp->pr_curpage = ph; 1506 1507 if (++pp->pr_npages > pp->pr_hiwat) 1508 pp->pr_hiwat = pp->pr_npages; 1509 } 1510 1511 /* 1512 * Used by pool_get() when nitems drops below the low water mark. This 1513 * is used to catch up pr_nitems with the low water mark. 1514 * 1515 * Note 1, we never wait for memory here, we let the caller decide what to do. 1516 * 1517 * Note 2, we must be called with the pool already locked, and we return 1518 * with it locked. 1519 */ 1520 static int 1521 pool_catchup(struct pool *pp) 1522 { 1523 int error = 0; 1524 1525 while (POOL_NEEDS_CATCHUP(pp)) { 1526 error = pool_grow(pp, PR_NOWAIT); 1527 if (error) { 1528 break; 1529 } 1530 } 1531 return error; 1532 } 1533 1534 static void 1535 pool_update_curpage(struct pool *pp) 1536 { 1537 1538 pp->pr_curpage = LIST_FIRST(&pp->pr_partpages); 1539 if (pp->pr_curpage == NULL) { 1540 pp->pr_curpage = LIST_FIRST(&pp->pr_emptypages); 1541 } 1542 KASSERT((pp->pr_curpage == NULL && pp->pr_nitems == 0) || 1543 (pp->pr_curpage != NULL && pp->pr_nitems > 0)); 1544 } 1545 1546 void 1547 pool_setlowat(struct pool *pp, int n) 1548 { 1549 1550 mutex_enter(&pp->pr_lock); 1551 1552 pp->pr_minitems = n; 1553 pp->pr_minpages = (n == 0) 1554 ? 0 1555 : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 1556 1557 /* Make sure we're caught up with the newly-set low water mark. */ 1558 if (POOL_NEEDS_CATCHUP(pp) && pool_catchup(pp) != 0) { 1559 /* 1560 * XXX: Should we log a warning? Should we set up a timeout 1561 * to try again in a second or so? The latter could break 1562 * a caller's assumptions about interrupt protection, etc. 1563 */ 1564 } 1565 1566 mutex_exit(&pp->pr_lock); 1567 } 1568 1569 void 1570 pool_sethiwat(struct pool *pp, int n) 1571 { 1572 1573 mutex_enter(&pp->pr_lock); 1574 1575 pp->pr_maxpages = (n == 0) 1576 ? 0 1577 : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 1578 1579 mutex_exit(&pp->pr_lock); 1580 } 1581 1582 void 1583 pool_sethardlimit(struct pool *pp, int n, const char *warnmess, int ratecap) 1584 { 1585 1586 mutex_enter(&pp->pr_lock); 1587 1588 pp->pr_hardlimit = n; 1589 pp->pr_hardlimit_warning = warnmess; 1590 pp->pr_hardlimit_ratecap.tv_sec = ratecap; 1591 pp->pr_hardlimit_warning_last.tv_sec = 0; 1592 pp->pr_hardlimit_warning_last.tv_usec = 0; 1593 1594 /* 1595 * In-line version of pool_sethiwat(), because we don't want to 1596 * release the lock. 1597 */ 1598 pp->pr_maxpages = (n == 0) 1599 ? 0 1600 : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 1601 1602 mutex_exit(&pp->pr_lock); 1603 } 1604 1605 /* 1606 * Release all complete pages that have not been used recently. 1607 */ 1608 int 1609 #ifdef POOL_DIAGNOSTIC 1610 _pool_reclaim(struct pool *pp, const char *file, long line) 1611 #else 1612 pool_reclaim(struct pool *pp) 1613 #endif 1614 { 1615 struct pool_item_header *ph, *phnext; 1616 struct pool_pagelist pq; 1617 uint32_t curtime; 1618 bool klock; 1619 int rv; 1620 1621 if (pp->pr_drain_hook != NULL) { 1622 /* 1623 * The drain hook must be called with the pool unlocked. 1624 */ 1625 (*pp->pr_drain_hook)(pp->pr_drain_hook_arg, PR_NOWAIT); 1626 } 1627 1628 /* 1629 * XXXSMP Because we do not want to cause non-MPSAFE code 1630 * to block. 1631 */ 1632 if (pp->pr_ipl == IPL_SOFTNET || pp->pr_ipl == IPL_SOFTCLOCK || 1633 pp->pr_ipl == IPL_SOFTSERIAL) { 1634 KERNEL_LOCK(1, NULL); 1635 klock = true; 1636 } else 1637 klock = false; 1638 1639 /* Reclaim items from the pool's cache (if any). */ 1640 if (pp->pr_cache != NULL) 1641 pool_cache_invalidate(pp->pr_cache); 1642 1643 if (mutex_tryenter(&pp->pr_lock) == 0) { 1644 if (klock) { 1645 KERNEL_UNLOCK_ONE(NULL); 1646 } 1647 return (0); 1648 } 1649 pr_enter(pp, file, line); 1650 1651 LIST_INIT(&pq); 1652 1653 curtime = time_uptime; 1654 1655 for (ph = LIST_FIRST(&pp->pr_emptypages); ph != NULL; ph = phnext) { 1656 phnext = LIST_NEXT(ph, ph_pagelist); 1657 1658 /* Check our minimum page claim */ 1659 if (pp->pr_npages <= pp->pr_minpages) 1660 break; 1661 1662 KASSERT(ph->ph_nmissing == 0); 1663 if (curtime - ph->ph_time < pool_inactive_time 1664 && !pa_starved_p(pp->pr_alloc)) 1665 continue; 1666 1667 /* 1668 * If freeing this page would put us below 1669 * the low water mark, stop now. 1670 */ 1671 if ((pp->pr_nitems - pp->pr_itemsperpage) < 1672 pp->pr_minitems) 1673 break; 1674 1675 pr_rmpage(pp, ph, &pq); 1676 } 1677 1678 pr_leave(pp); 1679 mutex_exit(&pp->pr_lock); 1680 1681 if (LIST_EMPTY(&pq)) 1682 rv = 0; 1683 else { 1684 pr_pagelist_free(pp, &pq); 1685 rv = 1; 1686 } 1687 1688 if (klock) { 1689 KERNEL_UNLOCK_ONE(NULL); 1690 } 1691 1692 return (rv); 1693 } 1694 1695 /* 1696 * Drain pools, one at a time. This is a two stage process; 1697 * drain_start kicks off a cross call to drain CPU-level caches 1698 * if the pool has an associated pool_cache. drain_end waits 1699 * for those cross calls to finish, and then drains the cache 1700 * (if any) and pool. 1701 * 1702 * Note, must never be called from interrupt context. 1703 */ 1704 void 1705 pool_drain_start(struct pool **ppp, uint64_t *wp) 1706 { 1707 struct pool *pp; 1708 1709 KASSERT(!TAILQ_EMPTY(&pool_head)); 1710 1711 pp = NULL; 1712 1713 /* Find next pool to drain, and add a reference. */ 1714 mutex_enter(&pool_head_lock); 1715 do { 1716 if (drainpp == NULL) { 1717 drainpp = TAILQ_FIRST(&pool_head); 1718 } 1719 if (drainpp != NULL) { 1720 pp = drainpp; 1721 drainpp = TAILQ_NEXT(pp, pr_poollist); 1722 } 1723 /* 1724 * Skip completely idle pools. We depend on at least 1725 * one pool in the system being active. 1726 */ 1727 } while (pp == NULL || pp->pr_npages == 0); 1728 pp->pr_refcnt++; 1729 mutex_exit(&pool_head_lock); 1730 1731 /* If there is a pool_cache, drain CPU level caches. */ 1732 *ppp = pp; 1733 if (pp->pr_cache != NULL) { 1734 *wp = xc_broadcast(0, (xcfunc_t)pool_cache_xcall, 1735 pp->pr_cache, NULL); 1736 } 1737 } 1738 1739 void 1740 pool_drain_end(struct pool *pp, uint64_t where) 1741 { 1742 1743 if (pp == NULL) 1744 return; 1745 1746 KASSERT(pp->pr_refcnt > 0); 1747 1748 /* Wait for remote draining to complete. */ 1749 if (pp->pr_cache != NULL) 1750 xc_wait(where); 1751 1752 /* Drain the cache (if any) and pool.. */ 1753 pool_reclaim(pp); 1754 1755 /* Finally, unlock the pool. */ 1756 mutex_enter(&pool_head_lock); 1757 pp->pr_refcnt--; 1758 cv_broadcast(&pool_busy); 1759 mutex_exit(&pool_head_lock); 1760 } 1761 1762 /* 1763 * Diagnostic helpers. 1764 */ 1765 void 1766 pool_print(struct pool *pp, const char *modif) 1767 { 1768 1769 pool_print1(pp, modif, printf); 1770 } 1771 1772 void 1773 pool_printall(const char *modif, void (*pr)(const char *, ...)) 1774 { 1775 struct pool *pp; 1776 1777 TAILQ_FOREACH(pp, &pool_head, pr_poollist) { 1778 pool_printit(pp, modif, pr); 1779 } 1780 } 1781 1782 void 1783 pool_printit(struct pool *pp, const char *modif, void (*pr)(const char *, ...)) 1784 { 1785 1786 if (pp == NULL) { 1787 (*pr)("Must specify a pool to print.\n"); 1788 return; 1789 } 1790 1791 pool_print1(pp, modif, pr); 1792 } 1793 1794 static void 1795 pool_print_pagelist(struct pool *pp, struct pool_pagelist *pl, 1796 void (*pr)(const char *, ...)) 1797 { 1798 struct pool_item_header *ph; 1799 #ifdef DIAGNOSTIC 1800 struct pool_item *pi; 1801 #endif 1802 1803 LIST_FOREACH(ph, pl, ph_pagelist) { 1804 (*pr)("\t\tpage %p, nmissing %d, time %" PRIu32 "\n", 1805 ph->ph_page, ph->ph_nmissing, ph->ph_time); 1806 #ifdef DIAGNOSTIC 1807 if (!(pp->pr_roflags & PR_NOTOUCH)) { 1808 LIST_FOREACH(pi, &ph->ph_itemlist, pi_list) { 1809 if (pi->pi_magic != PI_MAGIC) { 1810 (*pr)("\t\t\titem %p, magic 0x%x\n", 1811 pi, pi->pi_magic); 1812 } 1813 } 1814 } 1815 #endif 1816 } 1817 } 1818 1819 static void 1820 pool_print1(struct pool *pp, const char *modif, void (*pr)(const char *, ...)) 1821 { 1822 struct pool_item_header *ph; 1823 pool_cache_t pc; 1824 pcg_t *pcg; 1825 pool_cache_cpu_t *cc; 1826 uint64_t cpuhit, cpumiss; 1827 int i, print_log = 0, print_pagelist = 0, print_cache = 0; 1828 char c; 1829 1830 while ((c = *modif++) != '\0') { 1831 if (c == 'l') 1832 print_log = 1; 1833 if (c == 'p') 1834 print_pagelist = 1; 1835 if (c == 'c') 1836 print_cache = 1; 1837 } 1838 1839 if ((pc = pp->pr_cache) != NULL) { 1840 (*pr)("POOL CACHE"); 1841 } else { 1842 (*pr)("POOL"); 1843 } 1844 1845 (*pr)(" %s: size %u, align %u, ioff %u, roflags 0x%08x\n", 1846 pp->pr_wchan, pp->pr_size, pp->pr_align, pp->pr_itemoffset, 1847 pp->pr_roflags); 1848 (*pr)("\talloc %p\n", pp->pr_alloc); 1849 (*pr)("\tminitems %u, minpages %u, maxpages %u, npages %u\n", 1850 pp->pr_minitems, pp->pr_minpages, pp->pr_maxpages, pp->pr_npages); 1851 (*pr)("\titemsperpage %u, nitems %u, nout %u, hardlimit %u\n", 1852 pp->pr_itemsperpage, pp->pr_nitems, pp->pr_nout, pp->pr_hardlimit); 1853 1854 (*pr)("\tnget %lu, nfail %lu, nput %lu\n", 1855 pp->pr_nget, pp->pr_nfail, pp->pr_nput); 1856 (*pr)("\tnpagealloc %lu, npagefree %lu, hiwat %u, nidle %lu\n", 1857 pp->pr_npagealloc, pp->pr_npagefree, pp->pr_hiwat, pp->pr_nidle); 1858 1859 if (print_pagelist == 0) 1860 goto skip_pagelist; 1861 1862 if ((ph = LIST_FIRST(&pp->pr_emptypages)) != NULL) 1863 (*pr)("\n\tempty page list:\n"); 1864 pool_print_pagelist(pp, &pp->pr_emptypages, pr); 1865 if ((ph = LIST_FIRST(&pp->pr_fullpages)) != NULL) 1866 (*pr)("\n\tfull page list:\n"); 1867 pool_print_pagelist(pp, &pp->pr_fullpages, pr); 1868 if ((ph = LIST_FIRST(&pp->pr_partpages)) != NULL) 1869 (*pr)("\n\tpartial-page list:\n"); 1870 pool_print_pagelist(pp, &pp->pr_partpages, pr); 1871 1872 if (pp->pr_curpage == NULL) 1873 (*pr)("\tno current page\n"); 1874 else 1875 (*pr)("\tcurpage %p\n", pp->pr_curpage->ph_page); 1876 1877 skip_pagelist: 1878 if (print_log == 0) 1879 goto skip_log; 1880 1881 (*pr)("\n"); 1882 if ((pp->pr_roflags & PR_LOGGING) == 0) 1883 (*pr)("\tno log\n"); 1884 else { 1885 pr_printlog(pp, NULL, pr); 1886 } 1887 1888 skip_log: 1889 1890 #define PR_GROUPLIST(pcg) \ 1891 (*pr)("\t\tgroup %p: avail %d\n", pcg, pcg->pcg_avail); \ 1892 for (i = 0; i < pcg->pcg_size; i++) { \ 1893 if (pcg->pcg_objects[i].pcgo_pa != \ 1894 POOL_PADDR_INVALID) { \ 1895 (*pr)("\t\t\t%p, 0x%llx\n", \ 1896 pcg->pcg_objects[i].pcgo_va, \ 1897 (unsigned long long) \ 1898 pcg->pcg_objects[i].pcgo_pa); \ 1899 } else { \ 1900 (*pr)("\t\t\t%p\n", \ 1901 pcg->pcg_objects[i].pcgo_va); \ 1902 } \ 1903 } 1904 1905 if (pc != NULL) { 1906 cpuhit = 0; 1907 cpumiss = 0; 1908 for (i = 0; i < __arraycount(pc->pc_cpus); i++) { 1909 if ((cc = pc->pc_cpus[i]) == NULL) 1910 continue; 1911 cpuhit += cc->cc_hits; 1912 cpumiss += cc->cc_misses; 1913 } 1914 (*pr)("\tcpu layer hits %llu misses %llu\n", cpuhit, cpumiss); 1915 (*pr)("\tcache layer hits %llu misses %llu\n", 1916 pc->pc_hits, pc->pc_misses); 1917 (*pr)("\tcache layer entry uncontended %llu contended %llu\n", 1918 pc->pc_hits + pc->pc_misses - pc->pc_contended, 1919 pc->pc_contended); 1920 (*pr)("\tcache layer empty groups %u full groups %u\n", 1921 pc->pc_nempty, pc->pc_nfull); 1922 if (print_cache) { 1923 (*pr)("\tfull cache groups:\n"); 1924 for (pcg = pc->pc_fullgroups; pcg != NULL; 1925 pcg = pcg->pcg_next) { 1926 PR_GROUPLIST(pcg); 1927 } 1928 (*pr)("\tempty cache groups:\n"); 1929 for (pcg = pc->pc_emptygroups; pcg != NULL; 1930 pcg = pcg->pcg_next) { 1931 PR_GROUPLIST(pcg); 1932 } 1933 } 1934 } 1935 #undef PR_GROUPLIST 1936 1937 pr_enter_check(pp, pr); 1938 } 1939 1940 static int 1941 pool_chk_page(struct pool *pp, const char *label, struct pool_item_header *ph) 1942 { 1943 struct pool_item *pi; 1944 void *page; 1945 int n; 1946 1947 if ((pp->pr_roflags & PR_NOALIGN) == 0) { 1948 page = (void *)((uintptr_t)ph & pp->pr_alloc->pa_pagemask); 1949 if (page != ph->ph_page && 1950 (pp->pr_roflags & PR_PHINPAGE) != 0) { 1951 if (label != NULL) 1952 printf("%s: ", label); 1953 printf("pool(%p:%s): page inconsistency: page %p;" 1954 " at page head addr %p (p %p)\n", pp, 1955 pp->pr_wchan, ph->ph_page, 1956 ph, page); 1957 return 1; 1958 } 1959 } 1960 1961 if ((pp->pr_roflags & PR_NOTOUCH) != 0) 1962 return 0; 1963 1964 for (pi = LIST_FIRST(&ph->ph_itemlist), n = 0; 1965 pi != NULL; 1966 pi = LIST_NEXT(pi,pi_list), n++) { 1967 1968 #ifdef DIAGNOSTIC 1969 if (pi->pi_magic != PI_MAGIC) { 1970 if (label != NULL) 1971 printf("%s: ", label); 1972 printf("pool(%s): free list modified: magic=%x;" 1973 " page %p; item ordinal %d; addr %p\n", 1974 pp->pr_wchan, pi->pi_magic, ph->ph_page, 1975 n, pi); 1976 panic("pool"); 1977 } 1978 #endif 1979 if ((pp->pr_roflags & PR_NOALIGN) != 0) { 1980 continue; 1981 } 1982 page = (void *)((uintptr_t)pi & pp->pr_alloc->pa_pagemask); 1983 if (page == ph->ph_page) 1984 continue; 1985 1986 if (label != NULL) 1987 printf("%s: ", label); 1988 printf("pool(%p:%s): page inconsistency: page %p;" 1989 " item ordinal %d; addr %p (p %p)\n", pp, 1990 pp->pr_wchan, ph->ph_page, 1991 n, pi, page); 1992 return 1; 1993 } 1994 return 0; 1995 } 1996 1997 1998 int 1999 pool_chk(struct pool *pp, const char *label) 2000 { 2001 struct pool_item_header *ph; 2002 int r = 0; 2003 2004 mutex_enter(&pp->pr_lock); 2005 LIST_FOREACH(ph, &pp->pr_emptypages, ph_pagelist) { 2006 r = pool_chk_page(pp, label, ph); 2007 if (r) { 2008 goto out; 2009 } 2010 } 2011 LIST_FOREACH(ph, &pp->pr_fullpages, ph_pagelist) { 2012 r = pool_chk_page(pp, label, ph); 2013 if (r) { 2014 goto out; 2015 } 2016 } 2017 LIST_FOREACH(ph, &pp->pr_partpages, ph_pagelist) { 2018 r = pool_chk_page(pp, label, ph); 2019 if (r) { 2020 goto out; 2021 } 2022 } 2023 2024 out: 2025 mutex_exit(&pp->pr_lock); 2026 return (r); 2027 } 2028 2029 /* 2030 * pool_cache_init: 2031 * 2032 * Initialize a pool cache. 2033 */ 2034 pool_cache_t 2035 pool_cache_init(size_t size, u_int align, u_int align_offset, u_int flags, 2036 const char *wchan, struct pool_allocator *palloc, int ipl, 2037 int (*ctor)(void *, void *, int), void (*dtor)(void *, void *), void *arg) 2038 { 2039 pool_cache_t pc; 2040 2041 pc = pool_get(&cache_pool, PR_WAITOK); 2042 if (pc == NULL) 2043 return NULL; 2044 2045 pool_cache_bootstrap(pc, size, align, align_offset, flags, wchan, 2046 palloc, ipl, ctor, dtor, arg); 2047 2048 return pc; 2049 } 2050 2051 /* 2052 * pool_cache_bootstrap: 2053 * 2054 * Kernel-private version of pool_cache_init(). The caller 2055 * provides initial storage. 2056 */ 2057 void 2058 pool_cache_bootstrap(pool_cache_t pc, size_t size, u_int align, 2059 u_int align_offset, u_int flags, const char *wchan, 2060 struct pool_allocator *palloc, int ipl, 2061 int (*ctor)(void *, void *, int), void (*dtor)(void *, void *), 2062 void *arg) 2063 { 2064 CPU_INFO_ITERATOR cii; 2065 pool_cache_t pc1; 2066 struct cpu_info *ci; 2067 struct pool *pp; 2068 2069 pp = &pc->pc_pool; 2070 if (palloc == NULL && ipl == IPL_NONE) 2071 palloc = &pool_allocator_nointr; 2072 pool_init(pp, size, align, align_offset, flags, wchan, palloc, ipl); 2073 mutex_init(&pc->pc_lock, MUTEX_DEFAULT, ipl); 2074 2075 if (ctor == NULL) { 2076 ctor = (int (*)(void *, void *, int))nullop; 2077 } 2078 if (dtor == NULL) { 2079 dtor = (void (*)(void *, void *))nullop; 2080 } 2081 2082 pc->pc_emptygroups = NULL; 2083 pc->pc_fullgroups = NULL; 2084 pc->pc_partgroups = NULL; 2085 pc->pc_ctor = ctor; 2086 pc->pc_dtor = dtor; 2087 pc->pc_arg = arg; 2088 pc->pc_hits = 0; 2089 pc->pc_misses = 0; 2090 pc->pc_nempty = 0; 2091 pc->pc_npart = 0; 2092 pc->pc_nfull = 0; 2093 pc->pc_contended = 0; 2094 pc->pc_refcnt = 0; 2095 pc->pc_freecheck = NULL; 2096 2097 if ((flags & PR_LARGECACHE) != 0) { 2098 pc->pc_pcgsize = PCG_NOBJECTS_LARGE; 2099 pc->pc_pcgpool = &pcg_large_pool; 2100 } else { 2101 pc->pc_pcgsize = PCG_NOBJECTS_NORMAL; 2102 pc->pc_pcgpool = &pcg_normal_pool; 2103 } 2104 2105 /* Allocate per-CPU caches. */ 2106 memset(pc->pc_cpus, 0, sizeof(pc->pc_cpus)); 2107 pc->pc_ncpu = 0; 2108 if (ncpu < 2) { 2109 /* XXX For sparc: boot CPU is not attached yet. */ 2110 pool_cache_cpu_init1(curcpu(), pc); 2111 } else { 2112 for (CPU_INFO_FOREACH(cii, ci)) { 2113 pool_cache_cpu_init1(ci, pc); 2114 } 2115 } 2116 2117 /* Add to list of all pools. */ 2118 if (__predict_true(!cold)) 2119 mutex_enter(&pool_head_lock); 2120 TAILQ_FOREACH(pc1, &pool_cache_head, pc_cachelist) { 2121 if (strcmp(pc1->pc_pool.pr_wchan, pc->pc_pool.pr_wchan) > 0) 2122 break; 2123 } 2124 if (pc1 == NULL) 2125 TAILQ_INSERT_TAIL(&pool_cache_head, pc, pc_cachelist); 2126 else 2127 TAILQ_INSERT_BEFORE(pc1, pc, pc_cachelist); 2128 if (__predict_true(!cold)) 2129 mutex_exit(&pool_head_lock); 2130 2131 membar_sync(); 2132 pp->pr_cache = pc; 2133 } 2134 2135 /* 2136 * pool_cache_destroy: 2137 * 2138 * Destroy a pool cache. 2139 */ 2140 void 2141 pool_cache_destroy(pool_cache_t pc) 2142 { 2143 struct pool *pp = &pc->pc_pool; 2144 u_int i; 2145 2146 /* Remove it from the global list. */ 2147 mutex_enter(&pool_head_lock); 2148 while (pc->pc_refcnt != 0) 2149 cv_wait(&pool_busy, &pool_head_lock); 2150 TAILQ_REMOVE(&pool_cache_head, pc, pc_cachelist); 2151 mutex_exit(&pool_head_lock); 2152 2153 /* First, invalidate the entire cache. */ 2154 pool_cache_invalidate(pc); 2155 2156 /* Disassociate it from the pool. */ 2157 mutex_enter(&pp->pr_lock); 2158 pp->pr_cache = NULL; 2159 mutex_exit(&pp->pr_lock); 2160 2161 /* Destroy per-CPU data */ 2162 for (i = 0; i < __arraycount(pc->pc_cpus); i++) 2163 pool_cache_invalidate_cpu(pc, i); 2164 2165 /* Finally, destroy it. */ 2166 mutex_destroy(&pc->pc_lock); 2167 pool_destroy(pp); 2168 pool_put(&cache_pool, pc); 2169 } 2170 2171 /* 2172 * pool_cache_cpu_init1: 2173 * 2174 * Called for each pool_cache whenever a new CPU is attached. 2175 */ 2176 static void 2177 pool_cache_cpu_init1(struct cpu_info *ci, pool_cache_t pc) 2178 { 2179 pool_cache_cpu_t *cc; 2180 int index; 2181 2182 index = ci->ci_index; 2183 2184 KASSERT(index < __arraycount(pc->pc_cpus)); 2185 2186 if ((cc = pc->pc_cpus[index]) != NULL) { 2187 KASSERT(cc->cc_cpuindex == index); 2188 return; 2189 } 2190 2191 /* 2192 * The first CPU is 'free'. This needs to be the case for 2193 * bootstrap - we may not be able to allocate yet. 2194 */ 2195 if (pc->pc_ncpu == 0) { 2196 cc = &pc->pc_cpu0; 2197 pc->pc_ncpu = 1; 2198 } else { 2199 mutex_enter(&pc->pc_lock); 2200 pc->pc_ncpu++; 2201 mutex_exit(&pc->pc_lock); 2202 cc = pool_get(&cache_cpu_pool, PR_WAITOK); 2203 } 2204 2205 cc->cc_ipl = pc->pc_pool.pr_ipl; 2206 cc->cc_iplcookie = makeiplcookie(cc->cc_ipl); 2207 cc->cc_cache = pc; 2208 cc->cc_cpuindex = index; 2209 cc->cc_hits = 0; 2210 cc->cc_misses = 0; 2211 cc->cc_current = __UNCONST(&pcg_dummy); 2212 cc->cc_previous = __UNCONST(&pcg_dummy); 2213 2214 pc->pc_cpus[index] = cc; 2215 } 2216 2217 /* 2218 * pool_cache_cpu_init: 2219 * 2220 * Called whenever a new CPU is attached. 2221 */ 2222 void 2223 pool_cache_cpu_init(struct cpu_info *ci) 2224 { 2225 pool_cache_t pc; 2226 2227 mutex_enter(&pool_head_lock); 2228 TAILQ_FOREACH(pc, &pool_cache_head, pc_cachelist) { 2229 pc->pc_refcnt++; 2230 mutex_exit(&pool_head_lock); 2231 2232 pool_cache_cpu_init1(ci, pc); 2233 2234 mutex_enter(&pool_head_lock); 2235 pc->pc_refcnt--; 2236 cv_broadcast(&pool_busy); 2237 } 2238 mutex_exit(&pool_head_lock); 2239 } 2240 2241 /* 2242 * pool_cache_reclaim: 2243 * 2244 * Reclaim memory from a pool cache. 2245 */ 2246 bool 2247 pool_cache_reclaim(pool_cache_t pc) 2248 { 2249 2250 return pool_reclaim(&pc->pc_pool); 2251 } 2252 2253 static void 2254 pool_cache_destruct_object1(pool_cache_t pc, void *object) 2255 { 2256 2257 (*pc->pc_dtor)(pc->pc_arg, object); 2258 pool_put(&pc->pc_pool, object); 2259 } 2260 2261 /* 2262 * pool_cache_destruct_object: 2263 * 2264 * Force destruction of an object and its release back into 2265 * the pool. 2266 */ 2267 void 2268 pool_cache_destruct_object(pool_cache_t pc, void *object) 2269 { 2270 2271 FREECHECK_IN(&pc->pc_freecheck, object); 2272 2273 pool_cache_destruct_object1(pc, object); 2274 } 2275 2276 /* 2277 * pool_cache_invalidate_groups: 2278 * 2279 * Invalidate a chain of groups and destruct all objects. 2280 */ 2281 static void 2282 pool_cache_invalidate_groups(pool_cache_t pc, pcg_t *pcg) 2283 { 2284 void *object; 2285 pcg_t *next; 2286 int i; 2287 2288 for (; pcg != NULL; pcg = next) { 2289 next = pcg->pcg_next; 2290 2291 for (i = 0; i < pcg->pcg_avail; i++) { 2292 object = pcg->pcg_objects[i].pcgo_va; 2293 pool_cache_destruct_object1(pc, object); 2294 } 2295 2296 if (pcg->pcg_size == PCG_NOBJECTS_LARGE) { 2297 pool_put(&pcg_large_pool, pcg); 2298 } else { 2299 KASSERT(pcg->pcg_size == PCG_NOBJECTS_NORMAL); 2300 pool_put(&pcg_normal_pool, pcg); 2301 } 2302 } 2303 } 2304 2305 /* 2306 * pool_cache_invalidate: 2307 * 2308 * Invalidate a pool cache (destruct and release all of the 2309 * cached objects). Does not reclaim objects from the pool. 2310 * 2311 * Note: For pool caches that provide constructed objects, there 2312 * is an assumption that another level of synchronization is occurring 2313 * between the input to the constructor and the cache invalidation. 2314 */ 2315 void 2316 pool_cache_invalidate(pool_cache_t pc) 2317 { 2318 pcg_t *full, *empty, *part; 2319 #if 0 2320 uint64_t where; 2321 2322 if (ncpu < 2 || !mp_online) { 2323 /* 2324 * We might be called early enough in the boot process 2325 * for the CPU data structures to not be fully initialized. 2326 * In this case, simply gather the local CPU's cache now 2327 * since it will be the only one running. 2328 */ 2329 pool_cache_xcall(pc); 2330 } else { 2331 /* 2332 * Gather all of the CPU-specific caches into the 2333 * global cache. 2334 */ 2335 where = xc_broadcast(0, (xcfunc_t)pool_cache_xcall, pc, NULL); 2336 xc_wait(where); 2337 } 2338 #endif 2339 mutex_enter(&pc->pc_lock); 2340 full = pc->pc_fullgroups; 2341 empty = pc->pc_emptygroups; 2342 part = pc->pc_partgroups; 2343 pc->pc_fullgroups = NULL; 2344 pc->pc_emptygroups = NULL; 2345 pc->pc_partgroups = NULL; 2346 pc->pc_nfull = 0; 2347 pc->pc_nempty = 0; 2348 pc->pc_npart = 0; 2349 mutex_exit(&pc->pc_lock); 2350 2351 pool_cache_invalidate_groups(pc, full); 2352 pool_cache_invalidate_groups(pc, empty); 2353 pool_cache_invalidate_groups(pc, part); 2354 } 2355 2356 /* 2357 * pool_cache_invalidate_cpu: 2358 * 2359 * Invalidate all CPU-bound cached objects in pool cache, the CPU being 2360 * identified by its associated index. 2361 * It is caller's responsibility to ensure that no operation is 2362 * taking place on this pool cache while doing this invalidation. 2363 * WARNING: as no inter-CPU locking is enforced, trying to invalidate 2364 * pool cached objects from a CPU different from the one currently running 2365 * may result in an undefined behaviour. 2366 */ 2367 static void 2368 pool_cache_invalidate_cpu(pool_cache_t pc, u_int index) 2369 { 2370 2371 pool_cache_cpu_t *cc; 2372 pcg_t *pcg; 2373 2374 if ((cc = pc->pc_cpus[index]) == NULL) 2375 return; 2376 2377 if ((pcg = cc->cc_current) != &pcg_dummy) { 2378 pcg->pcg_next = NULL; 2379 pool_cache_invalidate_groups(pc, pcg); 2380 } 2381 if ((pcg = cc->cc_previous) != &pcg_dummy) { 2382 pcg->pcg_next = NULL; 2383 pool_cache_invalidate_groups(pc, pcg); 2384 } 2385 if (cc != &pc->pc_cpu0) 2386 pool_put(&cache_cpu_pool, cc); 2387 2388 } 2389 2390 void 2391 pool_cache_set_drain_hook(pool_cache_t pc, void (*fn)(void *, int), void *arg) 2392 { 2393 2394 pool_set_drain_hook(&pc->pc_pool, fn, arg); 2395 } 2396 2397 void 2398 pool_cache_setlowat(pool_cache_t pc, int n) 2399 { 2400 2401 pool_setlowat(&pc->pc_pool, n); 2402 } 2403 2404 void 2405 pool_cache_sethiwat(pool_cache_t pc, int n) 2406 { 2407 2408 pool_sethiwat(&pc->pc_pool, n); 2409 } 2410 2411 void 2412 pool_cache_sethardlimit(pool_cache_t pc, int n, const char *warnmess, int ratecap) 2413 { 2414 2415 pool_sethardlimit(&pc->pc_pool, n, warnmess, ratecap); 2416 } 2417 2418 static bool __noinline 2419 pool_cache_get_slow(pool_cache_cpu_t *cc, int s, void **objectp, 2420 paddr_t *pap, int flags) 2421 { 2422 pcg_t *pcg, *cur; 2423 uint64_t ncsw; 2424 pool_cache_t pc; 2425 void *object; 2426 2427 KASSERT(cc->cc_current->pcg_avail == 0); 2428 KASSERT(cc->cc_previous->pcg_avail == 0); 2429 2430 pc = cc->cc_cache; 2431 cc->cc_misses++; 2432 2433 /* 2434 * Nothing was available locally. Try and grab a group 2435 * from the cache. 2436 */ 2437 if (__predict_false(!mutex_tryenter(&pc->pc_lock))) { 2438 ncsw = curlwp->l_ncsw; 2439 mutex_enter(&pc->pc_lock); 2440 pc->pc_contended++; 2441 2442 /* 2443 * If we context switched while locking, then 2444 * our view of the per-CPU data is invalid: 2445 * retry. 2446 */ 2447 if (curlwp->l_ncsw != ncsw) { 2448 mutex_exit(&pc->pc_lock); 2449 return true; 2450 } 2451 } 2452 2453 if (__predict_true((pcg = pc->pc_fullgroups) != NULL)) { 2454 /* 2455 * If there's a full group, release our empty 2456 * group back to the cache. Install the full 2457 * group as cc_current and return. 2458 */ 2459 if (__predict_true((cur = cc->cc_current) != &pcg_dummy)) { 2460 KASSERT(cur->pcg_avail == 0); 2461 cur->pcg_next = pc->pc_emptygroups; 2462 pc->pc_emptygroups = cur; 2463 pc->pc_nempty++; 2464 } 2465 KASSERT(pcg->pcg_avail == pcg->pcg_size); 2466 cc->cc_current = pcg; 2467 pc->pc_fullgroups = pcg->pcg_next; 2468 pc->pc_hits++; 2469 pc->pc_nfull--; 2470 mutex_exit(&pc->pc_lock); 2471 return true; 2472 } 2473 2474 /* 2475 * Nothing available locally or in cache. Take the slow 2476 * path: fetch a new object from the pool and construct 2477 * it. 2478 */ 2479 pc->pc_misses++; 2480 mutex_exit(&pc->pc_lock); 2481 splx(s); 2482 2483 object = pool_get(&pc->pc_pool, flags); 2484 *objectp = object; 2485 if (__predict_false(object == NULL)) 2486 return false; 2487 2488 if (__predict_false((*pc->pc_ctor)(pc->pc_arg, object, flags) != 0)) { 2489 pool_put(&pc->pc_pool, object); 2490 *objectp = NULL; 2491 return false; 2492 } 2493 2494 KASSERT((((vaddr_t)object + pc->pc_pool.pr_itemoffset) & 2495 (pc->pc_pool.pr_align - 1)) == 0); 2496 2497 if (pap != NULL) { 2498 #ifdef POOL_VTOPHYS 2499 *pap = POOL_VTOPHYS(object); 2500 #else 2501 *pap = POOL_PADDR_INVALID; 2502 #endif 2503 } 2504 2505 FREECHECK_OUT(&pc->pc_freecheck, object); 2506 return false; 2507 } 2508 2509 /* 2510 * pool_cache_get{,_paddr}: 2511 * 2512 * Get an object from a pool cache (optionally returning 2513 * the physical address of the object). 2514 */ 2515 void * 2516 pool_cache_get_paddr(pool_cache_t pc, int flags, paddr_t *pap) 2517 { 2518 pool_cache_cpu_t *cc; 2519 pcg_t *pcg; 2520 void *object; 2521 int s; 2522 2523 #ifdef LOCKDEBUG 2524 if (flags & PR_WAITOK) { 2525 ASSERT_SLEEPABLE(); 2526 } 2527 #endif 2528 2529 /* Lock out interrupts and disable preemption. */ 2530 s = splvm(); 2531 while (/* CONSTCOND */ true) { 2532 /* Try and allocate an object from the current group. */ 2533 cc = pc->pc_cpus[curcpu()->ci_index]; 2534 KASSERT(cc->cc_cache == pc); 2535 pcg = cc->cc_current; 2536 if (__predict_true(pcg->pcg_avail > 0)) { 2537 object = pcg->pcg_objects[--pcg->pcg_avail].pcgo_va; 2538 if (__predict_false(pap != NULL)) 2539 *pap = pcg->pcg_objects[pcg->pcg_avail].pcgo_pa; 2540 #if defined(DIAGNOSTIC) 2541 pcg->pcg_objects[pcg->pcg_avail].pcgo_va = NULL; 2542 KASSERT(pcg->pcg_avail < pcg->pcg_size); 2543 KASSERT(object != NULL); 2544 #endif 2545 cc->cc_hits++; 2546 splx(s); 2547 FREECHECK_OUT(&pc->pc_freecheck, object); 2548 return object; 2549 } 2550 2551 /* 2552 * That failed. If the previous group isn't empty, swap 2553 * it with the current group and allocate from there. 2554 */ 2555 pcg = cc->cc_previous; 2556 if (__predict_true(pcg->pcg_avail > 0)) { 2557 cc->cc_previous = cc->cc_current; 2558 cc->cc_current = pcg; 2559 continue; 2560 } 2561 2562 /* 2563 * Can't allocate from either group: try the slow path. 2564 * If get_slow() allocated an object for us, or if 2565 * no more objects are available, it will return false. 2566 * Otherwise, we need to retry. 2567 */ 2568 if (!pool_cache_get_slow(cc, s, &object, pap, flags)) 2569 break; 2570 } 2571 2572 return object; 2573 } 2574 2575 static bool __noinline 2576 pool_cache_put_slow(pool_cache_cpu_t *cc, int s, void *object) 2577 { 2578 pcg_t *pcg, *cur; 2579 uint64_t ncsw; 2580 pool_cache_t pc; 2581 2582 KASSERT(cc->cc_current->pcg_avail == cc->cc_current->pcg_size); 2583 KASSERT(cc->cc_previous->pcg_avail == cc->cc_previous->pcg_size); 2584 2585 pc = cc->cc_cache; 2586 pcg = NULL; 2587 cc->cc_misses++; 2588 2589 /* 2590 * If there are no empty groups in the cache then allocate one 2591 * while still unlocked. 2592 */ 2593 if (__predict_false(pc->pc_emptygroups == NULL)) { 2594 if (__predict_true(!pool_cache_disable)) { 2595 pcg = pool_get(pc->pc_pcgpool, PR_NOWAIT); 2596 } 2597 if (__predict_true(pcg != NULL)) { 2598 pcg->pcg_avail = 0; 2599 pcg->pcg_size = pc->pc_pcgsize; 2600 } 2601 } 2602 2603 /* Lock the cache. */ 2604 if (__predict_false(!mutex_tryenter(&pc->pc_lock))) { 2605 ncsw = curlwp->l_ncsw; 2606 mutex_enter(&pc->pc_lock); 2607 pc->pc_contended++; 2608 2609 /* 2610 * If we context switched while locking, then our view of 2611 * the per-CPU data is invalid: retry. 2612 */ 2613 if (__predict_false(curlwp->l_ncsw != ncsw)) { 2614 mutex_exit(&pc->pc_lock); 2615 if (pcg != NULL) { 2616 pool_put(pc->pc_pcgpool, pcg); 2617 } 2618 return true; 2619 } 2620 } 2621 2622 /* If there are no empty groups in the cache then allocate one. */ 2623 if (pcg == NULL && pc->pc_emptygroups != NULL) { 2624 pcg = pc->pc_emptygroups; 2625 pc->pc_emptygroups = pcg->pcg_next; 2626 pc->pc_nempty--; 2627 } 2628 2629 /* 2630 * If there's a empty group, release our full group back 2631 * to the cache. Install the empty group to the local CPU 2632 * and return. 2633 */ 2634 if (pcg != NULL) { 2635 KASSERT(pcg->pcg_avail == 0); 2636 if (__predict_false(cc->cc_previous == &pcg_dummy)) { 2637 cc->cc_previous = pcg; 2638 } else { 2639 cur = cc->cc_current; 2640 if (__predict_true(cur != &pcg_dummy)) { 2641 KASSERT(cur->pcg_avail == cur->pcg_size); 2642 cur->pcg_next = pc->pc_fullgroups; 2643 pc->pc_fullgroups = cur; 2644 pc->pc_nfull++; 2645 } 2646 cc->cc_current = pcg; 2647 } 2648 pc->pc_hits++; 2649 mutex_exit(&pc->pc_lock); 2650 return true; 2651 } 2652 2653 /* 2654 * Nothing available locally or in cache, and we didn't 2655 * allocate an empty group. Take the slow path and destroy 2656 * the object here and now. 2657 */ 2658 pc->pc_misses++; 2659 mutex_exit(&pc->pc_lock); 2660 splx(s); 2661 pool_cache_destruct_object(pc, object); 2662 2663 return false; 2664 } 2665 2666 /* 2667 * pool_cache_put{,_paddr}: 2668 * 2669 * Put an object back to the pool cache (optionally caching the 2670 * physical address of the object). 2671 */ 2672 void 2673 pool_cache_put_paddr(pool_cache_t pc, void *object, paddr_t pa) 2674 { 2675 pool_cache_cpu_t *cc; 2676 pcg_t *pcg; 2677 int s; 2678 2679 KASSERT(object != NULL); 2680 FREECHECK_IN(&pc->pc_freecheck, object); 2681 2682 /* Lock out interrupts and disable preemption. */ 2683 s = splvm(); 2684 while (/* CONSTCOND */ true) { 2685 /* If the current group isn't full, release it there. */ 2686 cc = pc->pc_cpus[curcpu()->ci_index]; 2687 KASSERT(cc->cc_cache == pc); 2688 pcg = cc->cc_current; 2689 if (__predict_true(pcg->pcg_avail < pcg->pcg_size)) { 2690 pcg->pcg_objects[pcg->pcg_avail].pcgo_va = object; 2691 pcg->pcg_objects[pcg->pcg_avail].pcgo_pa = pa; 2692 pcg->pcg_avail++; 2693 cc->cc_hits++; 2694 splx(s); 2695 return; 2696 } 2697 2698 /* 2699 * That failed. If the previous group isn't full, swap 2700 * it with the current group and try again. 2701 */ 2702 pcg = cc->cc_previous; 2703 if (__predict_true(pcg->pcg_avail < pcg->pcg_size)) { 2704 cc->cc_previous = cc->cc_current; 2705 cc->cc_current = pcg; 2706 continue; 2707 } 2708 2709 /* 2710 * Can't free to either group: try the slow path. 2711 * If put_slow() releases the object for us, it 2712 * will return false. Otherwise we need to retry. 2713 */ 2714 if (!pool_cache_put_slow(cc, s, object)) 2715 break; 2716 } 2717 } 2718 2719 /* 2720 * pool_cache_xcall: 2721 * 2722 * Transfer objects from the per-CPU cache to the global cache. 2723 * Run within a cross-call thread. 2724 */ 2725 static void 2726 pool_cache_xcall(pool_cache_t pc) 2727 { 2728 pool_cache_cpu_t *cc; 2729 pcg_t *prev, *cur, **list; 2730 int s; 2731 2732 s = splvm(); 2733 mutex_enter(&pc->pc_lock); 2734 cc = pc->pc_cpus[curcpu()->ci_index]; 2735 cur = cc->cc_current; 2736 cc->cc_current = __UNCONST(&pcg_dummy); 2737 prev = cc->cc_previous; 2738 cc->cc_previous = __UNCONST(&pcg_dummy); 2739 if (cur != &pcg_dummy) { 2740 if (cur->pcg_avail == cur->pcg_size) { 2741 list = &pc->pc_fullgroups; 2742 pc->pc_nfull++; 2743 } else if (cur->pcg_avail == 0) { 2744 list = &pc->pc_emptygroups; 2745 pc->pc_nempty++; 2746 } else { 2747 list = &pc->pc_partgroups; 2748 pc->pc_npart++; 2749 } 2750 cur->pcg_next = *list; 2751 *list = cur; 2752 } 2753 if (prev != &pcg_dummy) { 2754 if (prev->pcg_avail == prev->pcg_size) { 2755 list = &pc->pc_fullgroups; 2756 pc->pc_nfull++; 2757 } else if (prev->pcg_avail == 0) { 2758 list = &pc->pc_emptygroups; 2759 pc->pc_nempty++; 2760 } else { 2761 list = &pc->pc_partgroups; 2762 pc->pc_npart++; 2763 } 2764 prev->pcg_next = *list; 2765 *list = prev; 2766 } 2767 mutex_exit(&pc->pc_lock); 2768 splx(s); 2769 } 2770 2771 /* 2772 * Pool backend allocators. 2773 * 2774 * Each pool has a backend allocator that handles allocation, deallocation, 2775 * and any additional draining that might be needed. 2776 * 2777 * We provide two standard allocators: 2778 * 2779 * pool_allocator_kmem - the default when no allocator is specified 2780 * 2781 * pool_allocator_nointr - used for pools that will not be accessed 2782 * in interrupt context. 2783 */ 2784 void *pool_page_alloc(struct pool *, int); 2785 void pool_page_free(struct pool *, void *); 2786 2787 #ifdef POOL_SUBPAGE 2788 struct pool_allocator pool_allocator_kmem_fullpage = { 2789 pool_page_alloc, pool_page_free, 0, 2790 .pa_backingmapptr = &kmem_map, 2791 }; 2792 #else 2793 struct pool_allocator pool_allocator_kmem = { 2794 pool_page_alloc, pool_page_free, 0, 2795 .pa_backingmapptr = &kmem_map, 2796 }; 2797 #endif 2798 2799 void *pool_page_alloc_nointr(struct pool *, int); 2800 void pool_page_free_nointr(struct pool *, void *); 2801 2802 #ifdef POOL_SUBPAGE 2803 struct pool_allocator pool_allocator_nointr_fullpage = { 2804 pool_page_alloc_nointr, pool_page_free_nointr, 0, 2805 .pa_backingmapptr = &kernel_map, 2806 }; 2807 #else 2808 struct pool_allocator pool_allocator_nointr = { 2809 pool_page_alloc_nointr, pool_page_free_nointr, 0, 2810 .pa_backingmapptr = &kernel_map, 2811 }; 2812 #endif 2813 2814 #ifdef POOL_SUBPAGE 2815 void *pool_subpage_alloc(struct pool *, int); 2816 void pool_subpage_free(struct pool *, void *); 2817 2818 struct pool_allocator pool_allocator_kmem = { 2819 pool_subpage_alloc, pool_subpage_free, POOL_SUBPAGE, 2820 .pa_backingmapptr = &kmem_map, 2821 }; 2822 2823 void *pool_subpage_alloc_nointr(struct pool *, int); 2824 void pool_subpage_free_nointr(struct pool *, void *); 2825 2826 struct pool_allocator pool_allocator_nointr = { 2827 pool_subpage_alloc, pool_subpage_free, POOL_SUBPAGE, 2828 .pa_backingmapptr = &kmem_map, 2829 }; 2830 #endif /* POOL_SUBPAGE */ 2831 2832 static void * 2833 pool_allocator_alloc(struct pool *pp, int flags) 2834 { 2835 struct pool_allocator *pa = pp->pr_alloc; 2836 void *res; 2837 2838 res = (*pa->pa_alloc)(pp, flags); 2839 if (res == NULL && (flags & PR_WAITOK) == 0) { 2840 /* 2841 * We only run the drain hook here if PR_NOWAIT. 2842 * In other cases, the hook will be run in 2843 * pool_reclaim(). 2844 */ 2845 if (pp->pr_drain_hook != NULL) { 2846 (*pp->pr_drain_hook)(pp->pr_drain_hook_arg, flags); 2847 res = (*pa->pa_alloc)(pp, flags); 2848 } 2849 } 2850 return res; 2851 } 2852 2853 static void 2854 pool_allocator_free(struct pool *pp, void *v) 2855 { 2856 struct pool_allocator *pa = pp->pr_alloc; 2857 2858 (*pa->pa_free)(pp, v); 2859 } 2860 2861 void * 2862 pool_page_alloc(struct pool *pp, int flags) 2863 { 2864 bool waitok = (flags & PR_WAITOK) ? true : false; 2865 2866 return ((void *) uvm_km_alloc_poolpage_cache(kmem_map, waitok)); 2867 } 2868 2869 void 2870 pool_page_free(struct pool *pp, void *v) 2871 { 2872 2873 uvm_km_free_poolpage_cache(kmem_map, (vaddr_t) v); 2874 } 2875 2876 static void * 2877 pool_page_alloc_meta(struct pool *pp, int flags) 2878 { 2879 bool waitok = (flags & PR_WAITOK) ? true : false; 2880 2881 return ((void *) uvm_km_alloc_poolpage(kmem_map, waitok)); 2882 } 2883 2884 static void 2885 pool_page_free_meta(struct pool *pp, void *v) 2886 { 2887 2888 uvm_km_free_poolpage(kmem_map, (vaddr_t) v); 2889 } 2890 2891 #ifdef POOL_SUBPAGE 2892 /* Sub-page allocator, for machines with large hardware pages. */ 2893 void * 2894 pool_subpage_alloc(struct pool *pp, int flags) 2895 { 2896 return pool_get(&psppool, flags); 2897 } 2898 2899 void 2900 pool_subpage_free(struct pool *pp, void *v) 2901 { 2902 pool_put(&psppool, v); 2903 } 2904 2905 /* We don't provide a real nointr allocator. Maybe later. */ 2906 void * 2907 pool_subpage_alloc_nointr(struct pool *pp, int flags) 2908 { 2909 2910 return (pool_subpage_alloc(pp, flags)); 2911 } 2912 2913 void 2914 pool_subpage_free_nointr(struct pool *pp, void *v) 2915 { 2916 2917 pool_subpage_free(pp, v); 2918 } 2919 #endif /* POOL_SUBPAGE */ 2920 void * 2921 pool_page_alloc_nointr(struct pool *pp, int flags) 2922 { 2923 bool waitok = (flags & PR_WAITOK) ? true : false; 2924 2925 return ((void *) uvm_km_alloc_poolpage_cache(kernel_map, waitok)); 2926 } 2927 2928 void 2929 pool_page_free_nointr(struct pool *pp, void *v) 2930 { 2931 2932 uvm_km_free_poolpage_cache(kernel_map, (vaddr_t) v); 2933 } 2934 2935 #if defined(DDB) 2936 static bool 2937 pool_in_page(struct pool *pp, struct pool_item_header *ph, uintptr_t addr) 2938 { 2939 2940 return (uintptr_t)ph->ph_page <= addr && 2941 addr < (uintptr_t)ph->ph_page + pp->pr_alloc->pa_pagesz; 2942 } 2943 2944 static bool 2945 pool_in_item(struct pool *pp, void *item, uintptr_t addr) 2946 { 2947 2948 return (uintptr_t)item <= addr && addr < (uintptr_t)item + pp->pr_size; 2949 } 2950 2951 static bool 2952 pool_in_cg(struct pool *pp, struct pool_cache_group *pcg, uintptr_t addr) 2953 { 2954 int i; 2955 2956 if (pcg == NULL) { 2957 return false; 2958 } 2959 for (i = 0; i < pcg->pcg_avail; i++) { 2960 if (pool_in_item(pp, pcg->pcg_objects[i].pcgo_va, addr)) { 2961 return true; 2962 } 2963 } 2964 return false; 2965 } 2966 2967 static bool 2968 pool_allocated(struct pool *pp, struct pool_item_header *ph, uintptr_t addr) 2969 { 2970 2971 if ((pp->pr_roflags & PR_NOTOUCH) != 0) { 2972 unsigned int idx = pr_item_notouch_index(pp, ph, (void *)addr); 2973 pool_item_bitmap_t *bitmap = 2974 ph->ph_bitmap + (idx / BITMAP_SIZE); 2975 pool_item_bitmap_t mask = 1 << (idx & BITMAP_MASK); 2976 2977 return (*bitmap & mask) == 0; 2978 } else { 2979 struct pool_item *pi; 2980 2981 LIST_FOREACH(pi, &ph->ph_itemlist, pi_list) { 2982 if (pool_in_item(pp, pi, addr)) { 2983 return false; 2984 } 2985 } 2986 return true; 2987 } 2988 } 2989 2990 void 2991 pool_whatis(uintptr_t addr, void (*pr)(const char *, ...)) 2992 { 2993 struct pool *pp; 2994 2995 TAILQ_FOREACH(pp, &pool_head, pr_poollist) { 2996 struct pool_item_header *ph; 2997 uintptr_t item; 2998 bool allocated = true; 2999 bool incache = false; 3000 bool incpucache = false; 3001 char cpucachestr[32]; 3002 3003 if ((pp->pr_roflags & PR_PHINPAGE) != 0) { 3004 LIST_FOREACH(ph, &pp->pr_fullpages, ph_pagelist) { 3005 if (pool_in_page(pp, ph, addr)) { 3006 goto found; 3007 } 3008 } 3009 LIST_FOREACH(ph, &pp->pr_partpages, ph_pagelist) { 3010 if (pool_in_page(pp, ph, addr)) { 3011 allocated = 3012 pool_allocated(pp, ph, addr); 3013 goto found; 3014 } 3015 } 3016 LIST_FOREACH(ph, &pp->pr_emptypages, ph_pagelist) { 3017 if (pool_in_page(pp, ph, addr)) { 3018 allocated = false; 3019 goto found; 3020 } 3021 } 3022 continue; 3023 } else { 3024 ph = pr_find_pagehead_noalign(pp, (void *)addr); 3025 if (ph == NULL || !pool_in_page(pp, ph, addr)) { 3026 continue; 3027 } 3028 allocated = pool_allocated(pp, ph, addr); 3029 } 3030 found: 3031 if (allocated && pp->pr_cache) { 3032 pool_cache_t pc = pp->pr_cache; 3033 struct pool_cache_group *pcg; 3034 int i; 3035 3036 for (pcg = pc->pc_fullgroups; pcg != NULL; 3037 pcg = pcg->pcg_next) { 3038 if (pool_in_cg(pp, pcg, addr)) { 3039 incache = true; 3040 goto print; 3041 } 3042 } 3043 for (i = 0; i < __arraycount(pc->pc_cpus); i++) { 3044 pool_cache_cpu_t *cc; 3045 3046 if ((cc = pc->pc_cpus[i]) == NULL) { 3047 continue; 3048 } 3049 if (pool_in_cg(pp, cc->cc_current, addr) || 3050 pool_in_cg(pp, cc->cc_previous, addr)) { 3051 struct cpu_info *ci = 3052 cpu_lookup(i); 3053 3054 incpucache = true; 3055 snprintf(cpucachestr, 3056 sizeof(cpucachestr), 3057 "cached by CPU %u", 3058 ci->ci_index); 3059 goto print; 3060 } 3061 } 3062 } 3063 print: 3064 item = (uintptr_t)ph->ph_page + ph->ph_off; 3065 item = item + rounddown(addr - item, pp->pr_size); 3066 (*pr)("%p is %p+%zu in POOL '%s' (%s)\n", 3067 (void *)addr, item, (size_t)(addr - item), 3068 pp->pr_wchan, 3069 incpucache ? cpucachestr : 3070 incache ? "cached" : allocated ? "allocated" : "free"); 3071 } 3072 } 3073 #endif /* defined(DDB) */ 3074