1 /* $NetBSD: subr_pool.c,v 1.186 2010/06/03 10:40:17 pooka Exp $ */ 2 3 /*- 4 * Copyright (c) 1997, 1999, 2000, 2002, 2007, 2008, 2010 5 * The NetBSD Foundation, Inc. 6 * All rights reserved. 7 * 8 * This code is derived from software contributed to The NetBSD Foundation 9 * by Paul Kranenburg; by Jason R. Thorpe of the Numerical Aerospace 10 * Simulation Facility, NASA Ames Research Center, and by Andrew Doran. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 23 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 24 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 25 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31 * POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include <sys/cdefs.h> 35 __KERNEL_RCSID(0, "$NetBSD: subr_pool.c,v 1.186 2010/06/03 10:40:17 pooka Exp $"); 36 37 #include "opt_ddb.h" 38 #include "opt_pool.h" 39 #include "opt_poollog.h" 40 #include "opt_lockdebug.h" 41 42 #include <sys/param.h> 43 #include <sys/systm.h> 44 #include <sys/bitops.h> 45 #include <sys/proc.h> 46 #include <sys/errno.h> 47 #include <sys/kernel.h> 48 #include <sys/malloc.h> 49 #include <sys/pool.h> 50 #include <sys/syslog.h> 51 #include <sys/debug.h> 52 #include <sys/lockdebug.h> 53 #include <sys/xcall.h> 54 #include <sys/cpu.h> 55 #include <sys/atomic.h> 56 57 #include <uvm/uvm.h> 58 59 /* 60 * Pool resource management utility. 61 * 62 * Memory is allocated in pages which are split into pieces according to 63 * the pool item size. Each page is kept on one of three lists in the 64 * pool structure: `pr_emptypages', `pr_fullpages' and `pr_partpages', 65 * for empty, full and partially-full pages respectively. The individual 66 * pool items are on a linked list headed by `ph_itemlist' in each page 67 * header. The memory for building the page list is either taken from 68 * the allocated pages themselves (for small pool items) or taken from 69 * an internal pool of page headers (`phpool'). 70 */ 71 72 /* List of all pools */ 73 static TAILQ_HEAD(, pool) pool_head = TAILQ_HEAD_INITIALIZER(pool_head); 74 75 /* Private pool for page header structures */ 76 #define PHPOOL_MAX 8 77 static struct pool phpool[PHPOOL_MAX]; 78 #define PHPOOL_FREELIST_NELEM(idx) \ 79 (((idx) == 0) ? 0 : BITMAP_SIZE * (1 << (idx))) 80 81 #ifdef POOL_SUBPAGE 82 /* Pool of subpages for use by normal pools. */ 83 static struct pool psppool; 84 #endif 85 86 static SLIST_HEAD(, pool_allocator) pa_deferinitq = 87 SLIST_HEAD_INITIALIZER(pa_deferinitq); 88 89 static void *pool_page_alloc_meta(struct pool *, int); 90 static void pool_page_free_meta(struct pool *, void *); 91 92 /* allocator for pool metadata */ 93 struct pool_allocator pool_allocator_meta = { 94 pool_page_alloc_meta, pool_page_free_meta, 95 .pa_backingmapptr = &kmem_map, 96 }; 97 98 /* # of seconds to retain page after last use */ 99 int pool_inactive_time = 10; 100 101 /* Next candidate for drainage (see pool_drain()) */ 102 static struct pool *drainpp; 103 104 /* This lock protects both pool_head and drainpp. */ 105 static kmutex_t pool_head_lock; 106 static kcondvar_t pool_busy; 107 108 /* This lock protects initialization of a potentially shared pool allocator */ 109 static kmutex_t pool_allocator_lock; 110 111 typedef uint32_t pool_item_bitmap_t; 112 #define BITMAP_SIZE (CHAR_BIT * sizeof(pool_item_bitmap_t)) 113 #define BITMAP_MASK (BITMAP_SIZE - 1) 114 115 struct pool_item_header { 116 /* Page headers */ 117 LIST_ENTRY(pool_item_header) 118 ph_pagelist; /* pool page list */ 119 SPLAY_ENTRY(pool_item_header) 120 ph_node; /* Off-page page headers */ 121 void * ph_page; /* this page's address */ 122 uint32_t ph_time; /* last referenced */ 123 uint16_t ph_nmissing; /* # of chunks in use */ 124 uint16_t ph_off; /* start offset in page */ 125 union { 126 /* !PR_NOTOUCH */ 127 struct { 128 LIST_HEAD(, pool_item) 129 phu_itemlist; /* chunk list for this page */ 130 } phu_normal; 131 /* PR_NOTOUCH */ 132 struct { 133 pool_item_bitmap_t phu_bitmap[1]; 134 } phu_notouch; 135 } ph_u; 136 }; 137 #define ph_itemlist ph_u.phu_normal.phu_itemlist 138 #define ph_bitmap ph_u.phu_notouch.phu_bitmap 139 140 struct pool_item { 141 #ifdef DIAGNOSTIC 142 u_int pi_magic; 143 #endif 144 #define PI_MAGIC 0xdeaddeadU 145 /* Other entries use only this list entry */ 146 LIST_ENTRY(pool_item) pi_list; 147 }; 148 149 #define POOL_NEEDS_CATCHUP(pp) \ 150 ((pp)->pr_nitems < (pp)->pr_minitems) 151 152 /* 153 * Pool cache management. 154 * 155 * Pool caches provide a way for constructed objects to be cached by the 156 * pool subsystem. This can lead to performance improvements by avoiding 157 * needless object construction/destruction; it is deferred until absolutely 158 * necessary. 159 * 160 * Caches are grouped into cache groups. Each cache group references up 161 * to PCG_NUMOBJECTS constructed objects. When a cache allocates an 162 * object from the pool, it calls the object's constructor and places it 163 * into a cache group. When a cache group frees an object back to the 164 * pool, it first calls the object's destructor. This allows the object 165 * to persist in constructed form while freed to the cache. 166 * 167 * The pool references each cache, so that when a pool is drained by the 168 * pagedaemon, it can drain each individual cache as well. Each time a 169 * cache is drained, the most idle cache group is freed to the pool in 170 * its entirety. 171 * 172 * Pool caches are layed on top of pools. By layering them, we can avoid 173 * the complexity of cache management for pools which would not benefit 174 * from it. 175 */ 176 177 static struct pool pcg_normal_pool; 178 static struct pool pcg_large_pool; 179 static struct pool cache_pool; 180 static struct pool cache_cpu_pool; 181 182 /* List of all caches. */ 183 TAILQ_HEAD(,pool_cache) pool_cache_head = 184 TAILQ_HEAD_INITIALIZER(pool_cache_head); 185 186 int pool_cache_disable; /* global disable for caching */ 187 static const pcg_t pcg_dummy; /* zero sized: always empty, yet always full */ 188 189 static bool pool_cache_put_slow(pool_cache_cpu_t *, int, 190 void *); 191 static bool pool_cache_get_slow(pool_cache_cpu_t *, int, 192 void **, paddr_t *, int); 193 static void pool_cache_cpu_init1(struct cpu_info *, pool_cache_t); 194 static void pool_cache_invalidate_groups(pool_cache_t, pcg_t *); 195 static void pool_cache_invalidate_cpu(pool_cache_t, u_int); 196 static void pool_cache_xcall(pool_cache_t); 197 198 static int pool_catchup(struct pool *); 199 static void pool_prime_page(struct pool *, void *, 200 struct pool_item_header *); 201 static void pool_update_curpage(struct pool *); 202 203 static int pool_grow(struct pool *, int); 204 static void *pool_allocator_alloc(struct pool *, int); 205 static void pool_allocator_free(struct pool *, void *); 206 207 static void pool_print_pagelist(struct pool *, struct pool_pagelist *, 208 void (*)(const char *, ...)); 209 static void pool_print1(struct pool *, const char *, 210 void (*)(const char *, ...)); 211 212 static int pool_chk_page(struct pool *, const char *, 213 struct pool_item_header *); 214 215 /* 216 * Pool log entry. An array of these is allocated in pool_init(). 217 */ 218 struct pool_log { 219 const char *pl_file; 220 long pl_line; 221 int pl_action; 222 #define PRLOG_GET 1 223 #define PRLOG_PUT 2 224 void *pl_addr; 225 }; 226 227 #ifdef POOL_DIAGNOSTIC 228 /* Number of entries in pool log buffers */ 229 #ifndef POOL_LOGSIZE 230 #define POOL_LOGSIZE 10 231 #endif 232 233 int pool_logsize = POOL_LOGSIZE; 234 235 static inline void 236 pr_log(struct pool *pp, void *v, int action, const char *file, long line) 237 { 238 int n; 239 struct pool_log *pl; 240 241 if ((pp->pr_roflags & PR_LOGGING) == 0) 242 return; 243 244 if (pp->pr_log == NULL) { 245 if (kmem_map != NULL) 246 pp->pr_log = malloc( 247 pool_logsize * sizeof(struct pool_log), 248 M_TEMP, M_NOWAIT | M_ZERO); 249 if (pp->pr_log == NULL) 250 return; 251 pp->pr_curlogentry = 0; 252 pp->pr_logsize = pool_logsize; 253 } 254 255 /* 256 * Fill in the current entry. Wrap around and overwrite 257 * the oldest entry if necessary. 258 */ 259 n = pp->pr_curlogentry; 260 pl = &pp->pr_log[n]; 261 pl->pl_file = file; 262 pl->pl_line = line; 263 pl->pl_action = action; 264 pl->pl_addr = v; 265 if (++n >= pp->pr_logsize) 266 n = 0; 267 pp->pr_curlogentry = n; 268 } 269 270 static void 271 pr_printlog(struct pool *pp, struct pool_item *pi, 272 void (*pr)(const char *, ...)) 273 { 274 int i = pp->pr_logsize; 275 int n = pp->pr_curlogentry; 276 277 if (pp->pr_log == NULL) 278 return; 279 280 /* 281 * Print all entries in this pool's log. 282 */ 283 while (i-- > 0) { 284 struct pool_log *pl = &pp->pr_log[n]; 285 if (pl->pl_action != 0) { 286 if (pi == NULL || pi == pl->pl_addr) { 287 (*pr)("\tlog entry %d:\n", i); 288 (*pr)("\t\taction = %s, addr = %p\n", 289 pl->pl_action == PRLOG_GET ? "get" : "put", 290 pl->pl_addr); 291 (*pr)("\t\tfile: %s at line %lu\n", 292 pl->pl_file, pl->pl_line); 293 } 294 } 295 if (++n >= pp->pr_logsize) 296 n = 0; 297 } 298 } 299 300 static inline void 301 pr_enter(struct pool *pp, const char *file, long line) 302 { 303 304 if (__predict_false(pp->pr_entered_file != NULL)) { 305 printf("pool %s: reentrancy at file %s line %ld\n", 306 pp->pr_wchan, file, line); 307 printf(" previous entry at file %s line %ld\n", 308 pp->pr_entered_file, pp->pr_entered_line); 309 panic("pr_enter"); 310 } 311 312 pp->pr_entered_file = file; 313 pp->pr_entered_line = line; 314 } 315 316 static inline void 317 pr_leave(struct pool *pp) 318 { 319 320 if (__predict_false(pp->pr_entered_file == NULL)) { 321 printf("pool %s not entered?\n", pp->pr_wchan); 322 panic("pr_leave"); 323 } 324 325 pp->pr_entered_file = NULL; 326 pp->pr_entered_line = 0; 327 } 328 329 static inline void 330 pr_enter_check(struct pool *pp, void (*pr)(const char *, ...)) 331 { 332 333 if (pp->pr_entered_file != NULL) 334 (*pr)("\n\tcurrently entered from file %s line %ld\n", 335 pp->pr_entered_file, pp->pr_entered_line); 336 } 337 #else 338 #define pr_log(pp, v, action, file, line) 339 #define pr_printlog(pp, pi, pr) 340 #define pr_enter(pp, file, line) 341 #define pr_leave(pp) 342 #define pr_enter_check(pp, pr) 343 #endif /* POOL_DIAGNOSTIC */ 344 345 static inline unsigned int 346 pr_item_notouch_index(const struct pool *pp, const struct pool_item_header *ph, 347 const void *v) 348 { 349 const char *cp = v; 350 unsigned int idx; 351 352 KASSERT(pp->pr_roflags & PR_NOTOUCH); 353 idx = (cp - (char *)ph->ph_page - ph->ph_off) / pp->pr_size; 354 KASSERT(idx < pp->pr_itemsperpage); 355 return idx; 356 } 357 358 static inline void 359 pr_item_notouch_put(const struct pool *pp, struct pool_item_header *ph, 360 void *obj) 361 { 362 unsigned int idx = pr_item_notouch_index(pp, ph, obj); 363 pool_item_bitmap_t *bitmap = ph->ph_bitmap + (idx / BITMAP_SIZE); 364 pool_item_bitmap_t mask = 1 << (idx & BITMAP_MASK); 365 366 KASSERT((*bitmap & mask) == 0); 367 *bitmap |= mask; 368 } 369 370 static inline void * 371 pr_item_notouch_get(const struct pool *pp, struct pool_item_header *ph) 372 { 373 pool_item_bitmap_t *bitmap = ph->ph_bitmap; 374 unsigned int idx; 375 int i; 376 377 for (i = 0; ; i++) { 378 int bit; 379 380 KASSERT((i * BITMAP_SIZE) < pp->pr_itemsperpage); 381 bit = ffs32(bitmap[i]); 382 if (bit) { 383 pool_item_bitmap_t mask; 384 385 bit--; 386 idx = (i * BITMAP_SIZE) + bit; 387 mask = 1 << bit; 388 KASSERT((bitmap[i] & mask) != 0); 389 bitmap[i] &= ~mask; 390 break; 391 } 392 } 393 KASSERT(idx < pp->pr_itemsperpage); 394 return (char *)ph->ph_page + ph->ph_off + idx * pp->pr_size; 395 } 396 397 static inline void 398 pr_item_notouch_init(const struct pool *pp, struct pool_item_header *ph) 399 { 400 pool_item_bitmap_t *bitmap = ph->ph_bitmap; 401 const int n = howmany(pp->pr_itemsperpage, BITMAP_SIZE); 402 int i; 403 404 for (i = 0; i < n; i++) { 405 bitmap[i] = (pool_item_bitmap_t)-1; 406 } 407 } 408 409 static inline int 410 phtree_compare(struct pool_item_header *a, struct pool_item_header *b) 411 { 412 413 /* 414 * we consider pool_item_header with smaller ph_page bigger. 415 * (this unnatural ordering is for the benefit of pr_find_pagehead.) 416 */ 417 418 if (a->ph_page < b->ph_page) 419 return (1); 420 else if (a->ph_page > b->ph_page) 421 return (-1); 422 else 423 return (0); 424 } 425 426 SPLAY_PROTOTYPE(phtree, pool_item_header, ph_node, phtree_compare); 427 SPLAY_GENERATE(phtree, pool_item_header, ph_node, phtree_compare); 428 429 static inline struct pool_item_header * 430 pr_find_pagehead_noalign(struct pool *pp, void *v) 431 { 432 struct pool_item_header *ph, tmp; 433 434 tmp.ph_page = (void *)(uintptr_t)v; 435 ph = SPLAY_FIND(phtree, &pp->pr_phtree, &tmp); 436 if (ph == NULL) { 437 ph = SPLAY_ROOT(&pp->pr_phtree); 438 if (ph != NULL && phtree_compare(&tmp, ph) >= 0) { 439 ph = SPLAY_NEXT(phtree, &pp->pr_phtree, ph); 440 } 441 KASSERT(ph == NULL || phtree_compare(&tmp, ph) < 0); 442 } 443 444 return ph; 445 } 446 447 /* 448 * Return the pool page header based on item address. 449 */ 450 static inline struct pool_item_header * 451 pr_find_pagehead(struct pool *pp, void *v) 452 { 453 struct pool_item_header *ph, tmp; 454 455 if ((pp->pr_roflags & PR_NOALIGN) != 0) { 456 ph = pr_find_pagehead_noalign(pp, v); 457 } else { 458 void *page = 459 (void *)((uintptr_t)v & pp->pr_alloc->pa_pagemask); 460 461 if ((pp->pr_roflags & PR_PHINPAGE) != 0) { 462 ph = (struct pool_item_header *)((char *)page + pp->pr_phoffset); 463 } else { 464 tmp.ph_page = page; 465 ph = SPLAY_FIND(phtree, &pp->pr_phtree, &tmp); 466 } 467 } 468 469 KASSERT(ph == NULL || ((pp->pr_roflags & PR_PHINPAGE) != 0) || 470 ((char *)ph->ph_page <= (char *)v && 471 (char *)v < (char *)ph->ph_page + pp->pr_alloc->pa_pagesz)); 472 return ph; 473 } 474 475 static void 476 pr_pagelist_free(struct pool *pp, struct pool_pagelist *pq) 477 { 478 struct pool_item_header *ph; 479 480 while ((ph = LIST_FIRST(pq)) != NULL) { 481 LIST_REMOVE(ph, ph_pagelist); 482 pool_allocator_free(pp, ph->ph_page); 483 if ((pp->pr_roflags & PR_PHINPAGE) == 0) 484 pool_put(pp->pr_phpool, ph); 485 } 486 } 487 488 /* 489 * Remove a page from the pool. 490 */ 491 static inline void 492 pr_rmpage(struct pool *pp, struct pool_item_header *ph, 493 struct pool_pagelist *pq) 494 { 495 496 KASSERT(mutex_owned(&pp->pr_lock)); 497 498 /* 499 * If the page was idle, decrement the idle page count. 500 */ 501 if (ph->ph_nmissing == 0) { 502 #ifdef DIAGNOSTIC 503 if (pp->pr_nidle == 0) 504 panic("pr_rmpage: nidle inconsistent"); 505 if (pp->pr_nitems < pp->pr_itemsperpage) 506 panic("pr_rmpage: nitems inconsistent"); 507 #endif 508 pp->pr_nidle--; 509 } 510 511 pp->pr_nitems -= pp->pr_itemsperpage; 512 513 /* 514 * Unlink the page from the pool and queue it for release. 515 */ 516 LIST_REMOVE(ph, ph_pagelist); 517 if ((pp->pr_roflags & PR_PHINPAGE) == 0) 518 SPLAY_REMOVE(phtree, &pp->pr_phtree, ph); 519 LIST_INSERT_HEAD(pq, ph, ph_pagelist); 520 521 pp->pr_npages--; 522 pp->pr_npagefree++; 523 524 pool_update_curpage(pp); 525 } 526 527 static bool 528 pa_starved_p(struct pool_allocator *pa) 529 { 530 531 if (pa->pa_backingmap != NULL) { 532 return vm_map_starved_p(pa->pa_backingmap); 533 } 534 return false; 535 } 536 537 static int 538 pool_reclaim_callback(struct callback_entry *ce, void *obj, void *arg) 539 { 540 struct pool *pp = obj; 541 struct pool_allocator *pa = pp->pr_alloc; 542 543 KASSERT(&pp->pr_reclaimerentry == ce); 544 pool_reclaim(pp); 545 if (!pa_starved_p(pa)) { 546 return CALLBACK_CHAIN_ABORT; 547 } 548 return CALLBACK_CHAIN_CONTINUE; 549 } 550 551 static void 552 pool_reclaim_register(struct pool *pp) 553 { 554 struct vm_map *map = pp->pr_alloc->pa_backingmap; 555 int s; 556 557 if (map == NULL) { 558 return; 559 } 560 561 s = splvm(); /* not necessary for INTRSAFE maps, but don't care. */ 562 callback_register(&vm_map_to_kernel(map)->vmk_reclaim_callback, 563 &pp->pr_reclaimerentry, pp, pool_reclaim_callback); 564 splx(s); 565 566 #ifdef DIAGNOSTIC 567 /* Diagnostic drain attempt. */ 568 uvm_km_va_drain(map, 0); 569 #endif 570 } 571 572 static void 573 pool_reclaim_unregister(struct pool *pp) 574 { 575 struct vm_map *map = pp->pr_alloc->pa_backingmap; 576 int s; 577 578 if (map == NULL) { 579 return; 580 } 581 582 s = splvm(); /* not necessary for INTRSAFE maps, but don't care. */ 583 callback_unregister(&vm_map_to_kernel(map)->vmk_reclaim_callback, 584 &pp->pr_reclaimerentry); 585 splx(s); 586 } 587 588 static void 589 pa_reclaim_register(struct pool_allocator *pa) 590 { 591 struct vm_map *map = *pa->pa_backingmapptr; 592 struct pool *pp; 593 594 KASSERT(pa->pa_backingmap == NULL); 595 if (map == NULL) { 596 SLIST_INSERT_HEAD(&pa_deferinitq, pa, pa_q); 597 return; 598 } 599 pa->pa_backingmap = map; 600 TAILQ_FOREACH(pp, &pa->pa_list, pr_alloc_list) { 601 pool_reclaim_register(pp); 602 } 603 } 604 605 /* 606 * Initialize all the pools listed in the "pools" link set. 607 */ 608 void 609 pool_subsystem_init(void) 610 { 611 struct pool_allocator *pa; 612 613 mutex_init(&pool_head_lock, MUTEX_DEFAULT, IPL_NONE); 614 mutex_init(&pool_allocator_lock, MUTEX_DEFAULT, IPL_NONE); 615 cv_init(&pool_busy, "poolbusy"); 616 617 while ((pa = SLIST_FIRST(&pa_deferinitq)) != NULL) { 618 KASSERT(pa->pa_backingmapptr != NULL); 619 KASSERT(*pa->pa_backingmapptr != NULL); 620 SLIST_REMOVE_HEAD(&pa_deferinitq, pa_q); 621 pa_reclaim_register(pa); 622 } 623 624 pool_init(&cache_pool, sizeof(struct pool_cache), coherency_unit, 625 0, 0, "pcache", &pool_allocator_nointr, IPL_NONE); 626 627 pool_init(&cache_cpu_pool, sizeof(pool_cache_cpu_t), coherency_unit, 628 0, 0, "pcachecpu", &pool_allocator_nointr, IPL_NONE); 629 } 630 631 /* 632 * Initialize the given pool resource structure. 633 * 634 * We export this routine to allow other kernel parts to declare 635 * static pools that must be initialized before malloc() is available. 636 */ 637 void 638 pool_init(struct pool *pp, size_t size, u_int align, u_int ioff, int flags, 639 const char *wchan, struct pool_allocator *palloc, int ipl) 640 { 641 struct pool *pp1; 642 size_t trysize, phsize; 643 int off, slack; 644 645 #ifdef DEBUG 646 /* 647 * Check that the pool hasn't already been initialised and 648 * added to the list of all pools. 649 */ 650 TAILQ_FOREACH(pp1, &pool_head, pr_poollist) { 651 if (pp == pp1) 652 panic("pool_init: pool %s already initialised", 653 wchan); 654 } 655 #endif 656 657 #ifdef POOL_DIAGNOSTIC 658 /* 659 * Always log if POOL_DIAGNOSTIC is defined. 660 */ 661 if (pool_logsize != 0) 662 flags |= PR_LOGGING; 663 #endif 664 665 if (palloc == NULL) 666 palloc = &pool_allocator_kmem; 667 #ifdef POOL_SUBPAGE 668 if (size > palloc->pa_pagesz) { 669 if (palloc == &pool_allocator_kmem) 670 palloc = &pool_allocator_kmem_fullpage; 671 else if (palloc == &pool_allocator_nointr) 672 palloc = &pool_allocator_nointr_fullpage; 673 } 674 #endif /* POOL_SUBPAGE */ 675 if (!cold) 676 mutex_enter(&pool_allocator_lock); 677 if (palloc->pa_refcnt++ == 0) { 678 if (palloc->pa_pagesz == 0) 679 palloc->pa_pagesz = PAGE_SIZE; 680 681 TAILQ_INIT(&palloc->pa_list); 682 683 mutex_init(&palloc->pa_lock, MUTEX_DEFAULT, IPL_VM); 684 palloc->pa_pagemask = ~(palloc->pa_pagesz - 1); 685 palloc->pa_pageshift = ffs(palloc->pa_pagesz) - 1; 686 687 if (palloc->pa_backingmapptr != NULL) { 688 pa_reclaim_register(palloc); 689 } 690 } 691 if (!cold) 692 mutex_exit(&pool_allocator_lock); 693 694 if (align == 0) 695 align = ALIGN(1); 696 697 if ((flags & PR_NOTOUCH) == 0 && size < sizeof(struct pool_item)) 698 size = sizeof(struct pool_item); 699 700 size = roundup(size, align); 701 #ifdef DIAGNOSTIC 702 if (size > palloc->pa_pagesz) 703 panic("pool_init: pool item size (%zu) too large", size); 704 #endif 705 706 /* 707 * Initialize the pool structure. 708 */ 709 LIST_INIT(&pp->pr_emptypages); 710 LIST_INIT(&pp->pr_fullpages); 711 LIST_INIT(&pp->pr_partpages); 712 pp->pr_cache = NULL; 713 pp->pr_curpage = NULL; 714 pp->pr_npages = 0; 715 pp->pr_minitems = 0; 716 pp->pr_minpages = 0; 717 pp->pr_maxpages = UINT_MAX; 718 pp->pr_roflags = flags; 719 pp->pr_flags = 0; 720 pp->pr_size = size; 721 pp->pr_align = align; 722 pp->pr_wchan = wchan; 723 pp->pr_alloc = palloc; 724 pp->pr_nitems = 0; 725 pp->pr_nout = 0; 726 pp->pr_hardlimit = UINT_MAX; 727 pp->pr_hardlimit_warning = NULL; 728 pp->pr_hardlimit_ratecap.tv_sec = 0; 729 pp->pr_hardlimit_ratecap.tv_usec = 0; 730 pp->pr_hardlimit_warning_last.tv_sec = 0; 731 pp->pr_hardlimit_warning_last.tv_usec = 0; 732 pp->pr_drain_hook = NULL; 733 pp->pr_drain_hook_arg = NULL; 734 pp->pr_freecheck = NULL; 735 736 /* 737 * Decide whether to put the page header off page to avoid 738 * wasting too large a part of the page or too big item. 739 * Off-page page headers go on a hash table, so we can match 740 * a returned item with its header based on the page address. 741 * We use 1/16 of the page size and about 8 times of the item 742 * size as the threshold (XXX: tune) 743 * 744 * However, we'll put the header into the page if we can put 745 * it without wasting any items. 746 * 747 * Silently enforce `0 <= ioff < align'. 748 */ 749 pp->pr_itemoffset = ioff %= align; 750 /* See the comment below about reserved bytes. */ 751 trysize = palloc->pa_pagesz - ((align - ioff) % align); 752 phsize = ALIGN(sizeof(struct pool_item_header)); 753 if ((pp->pr_roflags & (PR_NOTOUCH | PR_NOALIGN)) == 0 && 754 (pp->pr_size < MIN(palloc->pa_pagesz / 16, phsize << 3) || 755 trysize / pp->pr_size == (trysize - phsize) / pp->pr_size)) { 756 /* Use the end of the page for the page header */ 757 pp->pr_roflags |= PR_PHINPAGE; 758 pp->pr_phoffset = off = palloc->pa_pagesz - phsize; 759 } else { 760 /* The page header will be taken from our page header pool */ 761 pp->pr_phoffset = 0; 762 off = palloc->pa_pagesz; 763 SPLAY_INIT(&pp->pr_phtree); 764 } 765 766 /* 767 * Alignment is to take place at `ioff' within the item. This means 768 * we must reserve up to `align - 1' bytes on the page to allow 769 * appropriate positioning of each item. 770 */ 771 pp->pr_itemsperpage = (off - ((align - ioff) % align)) / pp->pr_size; 772 KASSERT(pp->pr_itemsperpage != 0); 773 if ((pp->pr_roflags & PR_NOTOUCH)) { 774 int idx; 775 776 for (idx = 0; pp->pr_itemsperpage > PHPOOL_FREELIST_NELEM(idx); 777 idx++) { 778 /* nothing */ 779 } 780 if (idx >= PHPOOL_MAX) { 781 /* 782 * if you see this panic, consider to tweak 783 * PHPOOL_MAX and PHPOOL_FREELIST_NELEM. 784 */ 785 panic("%s: too large itemsperpage(%d) for PR_NOTOUCH", 786 pp->pr_wchan, pp->pr_itemsperpage); 787 } 788 pp->pr_phpool = &phpool[idx]; 789 } else if ((pp->pr_roflags & PR_PHINPAGE) == 0) { 790 pp->pr_phpool = &phpool[0]; 791 } 792 #if defined(DIAGNOSTIC) 793 else { 794 pp->pr_phpool = NULL; 795 } 796 #endif 797 798 /* 799 * Use the slack between the chunks and the page header 800 * for "cache coloring". 801 */ 802 slack = off - pp->pr_itemsperpage * pp->pr_size; 803 pp->pr_maxcolor = (slack / align) * align; 804 pp->pr_curcolor = 0; 805 806 pp->pr_nget = 0; 807 pp->pr_nfail = 0; 808 pp->pr_nput = 0; 809 pp->pr_npagealloc = 0; 810 pp->pr_npagefree = 0; 811 pp->pr_hiwat = 0; 812 pp->pr_nidle = 0; 813 pp->pr_refcnt = 0; 814 815 pp->pr_log = NULL; 816 817 pp->pr_entered_file = NULL; 818 pp->pr_entered_line = 0; 819 820 mutex_init(&pp->pr_lock, MUTEX_DEFAULT, ipl); 821 cv_init(&pp->pr_cv, wchan); 822 pp->pr_ipl = ipl; 823 824 /* 825 * Initialize private page header pool and cache magazine pool if we 826 * haven't done so yet. 827 * XXX LOCKING. 828 */ 829 if (phpool[0].pr_size == 0) { 830 int idx; 831 for (idx = 0; idx < PHPOOL_MAX; idx++) { 832 static char phpool_names[PHPOOL_MAX][6+1+6+1]; 833 int nelem; 834 size_t sz; 835 836 nelem = PHPOOL_FREELIST_NELEM(idx); 837 snprintf(phpool_names[idx], sizeof(phpool_names[idx]), 838 "phpool-%d", nelem); 839 sz = sizeof(struct pool_item_header); 840 if (nelem) { 841 sz = offsetof(struct pool_item_header, 842 ph_bitmap[howmany(nelem, BITMAP_SIZE)]); 843 } 844 pool_init(&phpool[idx], sz, 0, 0, 0, 845 phpool_names[idx], &pool_allocator_meta, IPL_VM); 846 } 847 #ifdef POOL_SUBPAGE 848 pool_init(&psppool, POOL_SUBPAGE, POOL_SUBPAGE, 0, 849 PR_RECURSIVE, "psppool", &pool_allocator_meta, IPL_VM); 850 #endif 851 852 size = sizeof(pcg_t) + 853 (PCG_NOBJECTS_NORMAL - 1) * sizeof(pcgpair_t); 854 pool_init(&pcg_normal_pool, size, coherency_unit, 0, 0, 855 "pcgnormal", &pool_allocator_meta, IPL_VM); 856 857 size = sizeof(pcg_t) + 858 (PCG_NOBJECTS_LARGE - 1) * sizeof(pcgpair_t); 859 pool_init(&pcg_large_pool, size, coherency_unit, 0, 0, 860 "pcglarge", &pool_allocator_meta, IPL_VM); 861 } 862 863 /* Insert into the list of all pools. */ 864 if (!cold) 865 mutex_enter(&pool_head_lock); 866 TAILQ_FOREACH(pp1, &pool_head, pr_poollist) { 867 if (strcmp(pp1->pr_wchan, pp->pr_wchan) > 0) 868 break; 869 } 870 if (pp1 == NULL) 871 TAILQ_INSERT_TAIL(&pool_head, pp, pr_poollist); 872 else 873 TAILQ_INSERT_BEFORE(pp1, pp, pr_poollist); 874 if (!cold) 875 mutex_exit(&pool_head_lock); 876 877 /* Insert this into the list of pools using this allocator. */ 878 if (!cold) 879 mutex_enter(&palloc->pa_lock); 880 TAILQ_INSERT_TAIL(&palloc->pa_list, pp, pr_alloc_list); 881 if (!cold) 882 mutex_exit(&palloc->pa_lock); 883 884 pool_reclaim_register(pp); 885 } 886 887 /* 888 * De-commision a pool resource. 889 */ 890 void 891 pool_destroy(struct pool *pp) 892 { 893 struct pool_pagelist pq; 894 struct pool_item_header *ph; 895 896 /* Remove from global pool list */ 897 mutex_enter(&pool_head_lock); 898 while (pp->pr_refcnt != 0) 899 cv_wait(&pool_busy, &pool_head_lock); 900 TAILQ_REMOVE(&pool_head, pp, pr_poollist); 901 if (drainpp == pp) 902 drainpp = NULL; 903 mutex_exit(&pool_head_lock); 904 905 /* Remove this pool from its allocator's list of pools. */ 906 pool_reclaim_unregister(pp); 907 mutex_enter(&pp->pr_alloc->pa_lock); 908 TAILQ_REMOVE(&pp->pr_alloc->pa_list, pp, pr_alloc_list); 909 mutex_exit(&pp->pr_alloc->pa_lock); 910 911 mutex_enter(&pool_allocator_lock); 912 if (--pp->pr_alloc->pa_refcnt == 0) 913 mutex_destroy(&pp->pr_alloc->pa_lock); 914 mutex_exit(&pool_allocator_lock); 915 916 mutex_enter(&pp->pr_lock); 917 918 KASSERT(pp->pr_cache == NULL); 919 920 #ifdef DIAGNOSTIC 921 if (pp->pr_nout != 0) { 922 pr_printlog(pp, NULL, printf); 923 panic("pool_destroy: pool busy: still out: %u", 924 pp->pr_nout); 925 } 926 #endif 927 928 KASSERT(LIST_EMPTY(&pp->pr_fullpages)); 929 KASSERT(LIST_EMPTY(&pp->pr_partpages)); 930 931 /* Remove all pages */ 932 LIST_INIT(&pq); 933 while ((ph = LIST_FIRST(&pp->pr_emptypages)) != NULL) 934 pr_rmpage(pp, ph, &pq); 935 936 mutex_exit(&pp->pr_lock); 937 938 pr_pagelist_free(pp, &pq); 939 940 #ifdef POOL_DIAGNOSTIC 941 if (pp->pr_log != NULL) { 942 free(pp->pr_log, M_TEMP); 943 pp->pr_log = NULL; 944 } 945 #endif 946 947 cv_destroy(&pp->pr_cv); 948 mutex_destroy(&pp->pr_lock); 949 } 950 951 void 952 pool_set_drain_hook(struct pool *pp, void (*fn)(void *, int), void *arg) 953 { 954 955 /* XXX no locking -- must be used just after pool_init() */ 956 #ifdef DIAGNOSTIC 957 if (pp->pr_drain_hook != NULL) 958 panic("pool_set_drain_hook(%s): already set", pp->pr_wchan); 959 #endif 960 pp->pr_drain_hook = fn; 961 pp->pr_drain_hook_arg = arg; 962 } 963 964 static struct pool_item_header * 965 pool_alloc_item_header(struct pool *pp, void *storage, int flags) 966 { 967 struct pool_item_header *ph; 968 969 if ((pp->pr_roflags & PR_PHINPAGE) != 0) 970 ph = (struct pool_item_header *) ((char *)storage + pp->pr_phoffset); 971 else 972 ph = pool_get(pp->pr_phpool, flags); 973 974 return (ph); 975 } 976 977 /* 978 * Grab an item from the pool. 979 */ 980 void * 981 #ifdef POOL_DIAGNOSTIC 982 _pool_get(struct pool *pp, int flags, const char *file, long line) 983 #else 984 pool_get(struct pool *pp, int flags) 985 #endif 986 { 987 struct pool_item *pi; 988 struct pool_item_header *ph; 989 void *v; 990 991 #ifdef DIAGNOSTIC 992 if (pp->pr_itemsperpage == 0) 993 panic("pool_get: pool '%s': pr_itemsperpage is zero, " 994 "pool not initialized?", pp->pr_wchan); 995 if ((cpu_intr_p() || cpu_softintr_p()) && pp->pr_ipl == IPL_NONE && 996 !cold && panicstr == NULL) 997 panic("pool '%s' is IPL_NONE, but called from " 998 "interrupt context\n", pp->pr_wchan); 999 #endif 1000 if (flags & PR_WAITOK) { 1001 ASSERT_SLEEPABLE(); 1002 } 1003 1004 mutex_enter(&pp->pr_lock); 1005 pr_enter(pp, file, line); 1006 1007 startover: 1008 /* 1009 * Check to see if we've reached the hard limit. If we have, 1010 * and we can wait, then wait until an item has been returned to 1011 * the pool. 1012 */ 1013 #ifdef DIAGNOSTIC 1014 if (__predict_false(pp->pr_nout > pp->pr_hardlimit)) { 1015 pr_leave(pp); 1016 mutex_exit(&pp->pr_lock); 1017 panic("pool_get: %s: crossed hard limit", pp->pr_wchan); 1018 } 1019 #endif 1020 if (__predict_false(pp->pr_nout == pp->pr_hardlimit)) { 1021 if (pp->pr_drain_hook != NULL) { 1022 /* 1023 * Since the drain hook is going to free things 1024 * back to the pool, unlock, call the hook, re-lock, 1025 * and check the hardlimit condition again. 1026 */ 1027 pr_leave(pp); 1028 mutex_exit(&pp->pr_lock); 1029 (*pp->pr_drain_hook)(pp->pr_drain_hook_arg, flags); 1030 mutex_enter(&pp->pr_lock); 1031 pr_enter(pp, file, line); 1032 if (pp->pr_nout < pp->pr_hardlimit) 1033 goto startover; 1034 } 1035 1036 if ((flags & PR_WAITOK) && !(flags & PR_LIMITFAIL)) { 1037 /* 1038 * XXX: A warning isn't logged in this case. Should 1039 * it be? 1040 */ 1041 pp->pr_flags |= PR_WANTED; 1042 pr_leave(pp); 1043 cv_wait(&pp->pr_cv, &pp->pr_lock); 1044 pr_enter(pp, file, line); 1045 goto startover; 1046 } 1047 1048 /* 1049 * Log a message that the hard limit has been hit. 1050 */ 1051 if (pp->pr_hardlimit_warning != NULL && 1052 ratecheck(&pp->pr_hardlimit_warning_last, 1053 &pp->pr_hardlimit_ratecap)) 1054 log(LOG_ERR, "%s\n", pp->pr_hardlimit_warning); 1055 1056 pp->pr_nfail++; 1057 1058 pr_leave(pp); 1059 mutex_exit(&pp->pr_lock); 1060 return (NULL); 1061 } 1062 1063 /* 1064 * The convention we use is that if `curpage' is not NULL, then 1065 * it points at a non-empty bucket. In particular, `curpage' 1066 * never points at a page header which has PR_PHINPAGE set and 1067 * has no items in its bucket. 1068 */ 1069 if ((ph = pp->pr_curpage) == NULL) { 1070 int error; 1071 1072 #ifdef DIAGNOSTIC 1073 if (pp->pr_nitems != 0) { 1074 mutex_exit(&pp->pr_lock); 1075 printf("pool_get: %s: curpage NULL, nitems %u\n", 1076 pp->pr_wchan, pp->pr_nitems); 1077 panic("pool_get: nitems inconsistent"); 1078 } 1079 #endif 1080 1081 /* 1082 * Call the back-end page allocator for more memory. 1083 * Release the pool lock, as the back-end page allocator 1084 * may block. 1085 */ 1086 pr_leave(pp); 1087 error = pool_grow(pp, flags); 1088 pr_enter(pp, file, line); 1089 if (error != 0) { 1090 /* 1091 * We were unable to allocate a page or item 1092 * header, but we released the lock during 1093 * allocation, so perhaps items were freed 1094 * back to the pool. Check for this case. 1095 */ 1096 if (pp->pr_curpage != NULL) 1097 goto startover; 1098 1099 pp->pr_nfail++; 1100 pr_leave(pp); 1101 mutex_exit(&pp->pr_lock); 1102 return (NULL); 1103 } 1104 1105 /* Start the allocation process over. */ 1106 goto startover; 1107 } 1108 if (pp->pr_roflags & PR_NOTOUCH) { 1109 #ifdef DIAGNOSTIC 1110 if (__predict_false(ph->ph_nmissing == pp->pr_itemsperpage)) { 1111 pr_leave(pp); 1112 mutex_exit(&pp->pr_lock); 1113 panic("pool_get: %s: page empty", pp->pr_wchan); 1114 } 1115 #endif 1116 v = pr_item_notouch_get(pp, ph); 1117 #ifdef POOL_DIAGNOSTIC 1118 pr_log(pp, v, PRLOG_GET, file, line); 1119 #endif 1120 } else { 1121 v = pi = LIST_FIRST(&ph->ph_itemlist); 1122 if (__predict_false(v == NULL)) { 1123 pr_leave(pp); 1124 mutex_exit(&pp->pr_lock); 1125 panic("pool_get: %s: page empty", pp->pr_wchan); 1126 } 1127 #ifdef DIAGNOSTIC 1128 if (__predict_false(pp->pr_nitems == 0)) { 1129 pr_leave(pp); 1130 mutex_exit(&pp->pr_lock); 1131 printf("pool_get: %s: items on itemlist, nitems %u\n", 1132 pp->pr_wchan, pp->pr_nitems); 1133 panic("pool_get: nitems inconsistent"); 1134 } 1135 #endif 1136 1137 #ifdef POOL_DIAGNOSTIC 1138 pr_log(pp, v, PRLOG_GET, file, line); 1139 #endif 1140 1141 #ifdef DIAGNOSTIC 1142 if (__predict_false(pi->pi_magic != PI_MAGIC)) { 1143 pr_printlog(pp, pi, printf); 1144 panic("pool_get(%s): free list modified: " 1145 "magic=%x; page %p; item addr %p\n", 1146 pp->pr_wchan, pi->pi_magic, ph->ph_page, pi); 1147 } 1148 #endif 1149 1150 /* 1151 * Remove from item list. 1152 */ 1153 LIST_REMOVE(pi, pi_list); 1154 } 1155 pp->pr_nitems--; 1156 pp->pr_nout++; 1157 if (ph->ph_nmissing == 0) { 1158 #ifdef DIAGNOSTIC 1159 if (__predict_false(pp->pr_nidle == 0)) 1160 panic("pool_get: nidle inconsistent"); 1161 #endif 1162 pp->pr_nidle--; 1163 1164 /* 1165 * This page was previously empty. Move it to the list of 1166 * partially-full pages. This page is already curpage. 1167 */ 1168 LIST_REMOVE(ph, ph_pagelist); 1169 LIST_INSERT_HEAD(&pp->pr_partpages, ph, ph_pagelist); 1170 } 1171 ph->ph_nmissing++; 1172 if (ph->ph_nmissing == pp->pr_itemsperpage) { 1173 #ifdef DIAGNOSTIC 1174 if (__predict_false((pp->pr_roflags & PR_NOTOUCH) == 0 && 1175 !LIST_EMPTY(&ph->ph_itemlist))) { 1176 pr_leave(pp); 1177 mutex_exit(&pp->pr_lock); 1178 panic("pool_get: %s: nmissing inconsistent", 1179 pp->pr_wchan); 1180 } 1181 #endif 1182 /* 1183 * This page is now full. Move it to the full list 1184 * and select a new current page. 1185 */ 1186 LIST_REMOVE(ph, ph_pagelist); 1187 LIST_INSERT_HEAD(&pp->pr_fullpages, ph, ph_pagelist); 1188 pool_update_curpage(pp); 1189 } 1190 1191 pp->pr_nget++; 1192 pr_leave(pp); 1193 1194 /* 1195 * If we have a low water mark and we are now below that low 1196 * water mark, add more items to the pool. 1197 */ 1198 if (POOL_NEEDS_CATCHUP(pp) && pool_catchup(pp) != 0) { 1199 /* 1200 * XXX: Should we log a warning? Should we set up a timeout 1201 * to try again in a second or so? The latter could break 1202 * a caller's assumptions about interrupt protection, etc. 1203 */ 1204 } 1205 1206 mutex_exit(&pp->pr_lock); 1207 KASSERT((((vaddr_t)v + pp->pr_itemoffset) & (pp->pr_align - 1)) == 0); 1208 FREECHECK_OUT(&pp->pr_freecheck, v); 1209 return (v); 1210 } 1211 1212 /* 1213 * Internal version of pool_put(). Pool is already locked/entered. 1214 */ 1215 static void 1216 pool_do_put(struct pool *pp, void *v, struct pool_pagelist *pq) 1217 { 1218 struct pool_item *pi = v; 1219 struct pool_item_header *ph; 1220 1221 KASSERT(mutex_owned(&pp->pr_lock)); 1222 FREECHECK_IN(&pp->pr_freecheck, v); 1223 LOCKDEBUG_MEM_CHECK(v, pp->pr_size); 1224 1225 #ifdef DIAGNOSTIC 1226 if (__predict_false(pp->pr_nout == 0)) { 1227 printf("pool %s: putting with none out\n", 1228 pp->pr_wchan); 1229 panic("pool_put"); 1230 } 1231 #endif 1232 1233 if (__predict_false((ph = pr_find_pagehead(pp, v)) == NULL)) { 1234 pr_printlog(pp, NULL, printf); 1235 panic("pool_put: %s: page header missing", pp->pr_wchan); 1236 } 1237 1238 /* 1239 * Return to item list. 1240 */ 1241 if (pp->pr_roflags & PR_NOTOUCH) { 1242 pr_item_notouch_put(pp, ph, v); 1243 } else { 1244 #ifdef DIAGNOSTIC 1245 pi->pi_magic = PI_MAGIC; 1246 #endif 1247 #ifdef DEBUG 1248 { 1249 int i, *ip = v; 1250 1251 for (i = 0; i < pp->pr_size / sizeof(int); i++) { 1252 *ip++ = PI_MAGIC; 1253 } 1254 } 1255 #endif 1256 1257 LIST_INSERT_HEAD(&ph->ph_itemlist, pi, pi_list); 1258 } 1259 KDASSERT(ph->ph_nmissing != 0); 1260 ph->ph_nmissing--; 1261 pp->pr_nput++; 1262 pp->pr_nitems++; 1263 pp->pr_nout--; 1264 1265 /* Cancel "pool empty" condition if it exists */ 1266 if (pp->pr_curpage == NULL) 1267 pp->pr_curpage = ph; 1268 1269 if (pp->pr_flags & PR_WANTED) { 1270 pp->pr_flags &= ~PR_WANTED; 1271 cv_broadcast(&pp->pr_cv); 1272 } 1273 1274 /* 1275 * If this page is now empty, do one of two things: 1276 * 1277 * (1) If we have more pages than the page high water mark, 1278 * free the page back to the system. ONLY CONSIDER 1279 * FREEING BACK A PAGE IF WE HAVE MORE THAN OUR MINIMUM PAGE 1280 * CLAIM. 1281 * 1282 * (2) Otherwise, move the page to the empty page list. 1283 * 1284 * Either way, select a new current page (so we use a partially-full 1285 * page if one is available). 1286 */ 1287 if (ph->ph_nmissing == 0) { 1288 pp->pr_nidle++; 1289 if (pp->pr_npages > pp->pr_minpages && 1290 pp->pr_npages > pp->pr_maxpages) { 1291 pr_rmpage(pp, ph, pq); 1292 } else { 1293 LIST_REMOVE(ph, ph_pagelist); 1294 LIST_INSERT_HEAD(&pp->pr_emptypages, ph, ph_pagelist); 1295 1296 /* 1297 * Update the timestamp on the page. A page must 1298 * be idle for some period of time before it can 1299 * be reclaimed by the pagedaemon. This minimizes 1300 * ping-pong'ing for memory. 1301 * 1302 * note for 64-bit time_t: truncating to 32-bit is not 1303 * a problem for our usage. 1304 */ 1305 ph->ph_time = time_uptime; 1306 } 1307 pool_update_curpage(pp); 1308 } 1309 1310 /* 1311 * If the page was previously completely full, move it to the 1312 * partially-full list and make it the current page. The next 1313 * allocation will get the item from this page, instead of 1314 * further fragmenting the pool. 1315 */ 1316 else if (ph->ph_nmissing == (pp->pr_itemsperpage - 1)) { 1317 LIST_REMOVE(ph, ph_pagelist); 1318 LIST_INSERT_HEAD(&pp->pr_partpages, ph, ph_pagelist); 1319 pp->pr_curpage = ph; 1320 } 1321 } 1322 1323 /* 1324 * Return resource to the pool. 1325 */ 1326 #ifdef POOL_DIAGNOSTIC 1327 void 1328 _pool_put(struct pool *pp, void *v, const char *file, long line) 1329 { 1330 struct pool_pagelist pq; 1331 1332 LIST_INIT(&pq); 1333 1334 mutex_enter(&pp->pr_lock); 1335 pr_enter(pp, file, line); 1336 1337 pr_log(pp, v, PRLOG_PUT, file, line); 1338 1339 pool_do_put(pp, v, &pq); 1340 1341 pr_leave(pp); 1342 mutex_exit(&pp->pr_lock); 1343 1344 pr_pagelist_free(pp, &pq); 1345 } 1346 #undef pool_put 1347 #endif /* POOL_DIAGNOSTIC */ 1348 1349 void 1350 pool_put(struct pool *pp, void *v) 1351 { 1352 struct pool_pagelist pq; 1353 1354 LIST_INIT(&pq); 1355 1356 mutex_enter(&pp->pr_lock); 1357 pool_do_put(pp, v, &pq); 1358 mutex_exit(&pp->pr_lock); 1359 1360 pr_pagelist_free(pp, &pq); 1361 } 1362 1363 #ifdef POOL_DIAGNOSTIC 1364 #define pool_put(h, v) _pool_put((h), (v), __FILE__, __LINE__) 1365 #endif 1366 1367 /* 1368 * pool_grow: grow a pool by a page. 1369 * 1370 * => called with pool locked. 1371 * => unlock and relock the pool. 1372 * => return with pool locked. 1373 */ 1374 1375 static int 1376 pool_grow(struct pool *pp, int flags) 1377 { 1378 struct pool_item_header *ph = NULL; 1379 char *cp; 1380 1381 mutex_exit(&pp->pr_lock); 1382 cp = pool_allocator_alloc(pp, flags); 1383 if (__predict_true(cp != NULL)) { 1384 ph = pool_alloc_item_header(pp, cp, flags); 1385 } 1386 if (__predict_false(cp == NULL || ph == NULL)) { 1387 if (cp != NULL) { 1388 pool_allocator_free(pp, cp); 1389 } 1390 mutex_enter(&pp->pr_lock); 1391 return ENOMEM; 1392 } 1393 1394 mutex_enter(&pp->pr_lock); 1395 pool_prime_page(pp, cp, ph); 1396 pp->pr_npagealloc++; 1397 return 0; 1398 } 1399 1400 /* 1401 * Add N items to the pool. 1402 */ 1403 int 1404 pool_prime(struct pool *pp, int n) 1405 { 1406 int newpages; 1407 int error = 0; 1408 1409 mutex_enter(&pp->pr_lock); 1410 1411 newpages = roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 1412 1413 while (newpages-- > 0) { 1414 error = pool_grow(pp, PR_NOWAIT); 1415 if (error) { 1416 break; 1417 } 1418 pp->pr_minpages++; 1419 } 1420 1421 if (pp->pr_minpages >= pp->pr_maxpages) 1422 pp->pr_maxpages = pp->pr_minpages + 1; /* XXX */ 1423 1424 mutex_exit(&pp->pr_lock); 1425 return error; 1426 } 1427 1428 /* 1429 * Add a page worth of items to the pool. 1430 * 1431 * Note, we must be called with the pool descriptor LOCKED. 1432 */ 1433 static void 1434 pool_prime_page(struct pool *pp, void *storage, struct pool_item_header *ph) 1435 { 1436 struct pool_item *pi; 1437 void *cp = storage; 1438 const unsigned int align = pp->pr_align; 1439 const unsigned int ioff = pp->pr_itemoffset; 1440 int n; 1441 1442 KASSERT(mutex_owned(&pp->pr_lock)); 1443 1444 #ifdef DIAGNOSTIC 1445 if ((pp->pr_roflags & PR_NOALIGN) == 0 && 1446 ((uintptr_t)cp & (pp->pr_alloc->pa_pagesz - 1)) != 0) 1447 panic("pool_prime_page: %s: unaligned page", pp->pr_wchan); 1448 #endif 1449 1450 /* 1451 * Insert page header. 1452 */ 1453 LIST_INSERT_HEAD(&pp->pr_emptypages, ph, ph_pagelist); 1454 LIST_INIT(&ph->ph_itemlist); 1455 ph->ph_page = storage; 1456 ph->ph_nmissing = 0; 1457 ph->ph_time = time_uptime; 1458 if ((pp->pr_roflags & PR_PHINPAGE) == 0) 1459 SPLAY_INSERT(phtree, &pp->pr_phtree, ph); 1460 1461 pp->pr_nidle++; 1462 1463 /* 1464 * Color this page. 1465 */ 1466 ph->ph_off = pp->pr_curcolor; 1467 cp = (char *)cp + ph->ph_off; 1468 if ((pp->pr_curcolor += align) > pp->pr_maxcolor) 1469 pp->pr_curcolor = 0; 1470 1471 /* 1472 * Adjust storage to apply aligment to `pr_itemoffset' in each item. 1473 */ 1474 if (ioff != 0) 1475 cp = (char *)cp + align - ioff; 1476 1477 KASSERT((((vaddr_t)cp + ioff) & (align - 1)) == 0); 1478 1479 /* 1480 * Insert remaining chunks on the bucket list. 1481 */ 1482 n = pp->pr_itemsperpage; 1483 pp->pr_nitems += n; 1484 1485 if (pp->pr_roflags & PR_NOTOUCH) { 1486 pr_item_notouch_init(pp, ph); 1487 } else { 1488 while (n--) { 1489 pi = (struct pool_item *)cp; 1490 1491 KASSERT(((((vaddr_t)pi) + ioff) & (align - 1)) == 0); 1492 1493 /* Insert on page list */ 1494 LIST_INSERT_HEAD(&ph->ph_itemlist, pi, pi_list); 1495 #ifdef DIAGNOSTIC 1496 pi->pi_magic = PI_MAGIC; 1497 #endif 1498 cp = (char *)cp + pp->pr_size; 1499 1500 KASSERT((((vaddr_t)cp + ioff) & (align - 1)) == 0); 1501 } 1502 } 1503 1504 /* 1505 * If the pool was depleted, point at the new page. 1506 */ 1507 if (pp->pr_curpage == NULL) 1508 pp->pr_curpage = ph; 1509 1510 if (++pp->pr_npages > pp->pr_hiwat) 1511 pp->pr_hiwat = pp->pr_npages; 1512 } 1513 1514 /* 1515 * Used by pool_get() when nitems drops below the low water mark. This 1516 * is used to catch up pr_nitems with the low water mark. 1517 * 1518 * Note 1, we never wait for memory here, we let the caller decide what to do. 1519 * 1520 * Note 2, we must be called with the pool already locked, and we return 1521 * with it locked. 1522 */ 1523 static int 1524 pool_catchup(struct pool *pp) 1525 { 1526 int error = 0; 1527 1528 while (POOL_NEEDS_CATCHUP(pp)) { 1529 error = pool_grow(pp, PR_NOWAIT); 1530 if (error) { 1531 break; 1532 } 1533 } 1534 return error; 1535 } 1536 1537 static void 1538 pool_update_curpage(struct pool *pp) 1539 { 1540 1541 pp->pr_curpage = LIST_FIRST(&pp->pr_partpages); 1542 if (pp->pr_curpage == NULL) { 1543 pp->pr_curpage = LIST_FIRST(&pp->pr_emptypages); 1544 } 1545 KASSERT((pp->pr_curpage == NULL && pp->pr_nitems == 0) || 1546 (pp->pr_curpage != NULL && pp->pr_nitems > 0)); 1547 } 1548 1549 void 1550 pool_setlowat(struct pool *pp, int n) 1551 { 1552 1553 mutex_enter(&pp->pr_lock); 1554 1555 pp->pr_minitems = n; 1556 pp->pr_minpages = (n == 0) 1557 ? 0 1558 : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 1559 1560 /* Make sure we're caught up with the newly-set low water mark. */ 1561 if (POOL_NEEDS_CATCHUP(pp) && pool_catchup(pp) != 0) { 1562 /* 1563 * XXX: Should we log a warning? Should we set up a timeout 1564 * to try again in a second or so? The latter could break 1565 * a caller's assumptions about interrupt protection, etc. 1566 */ 1567 } 1568 1569 mutex_exit(&pp->pr_lock); 1570 } 1571 1572 void 1573 pool_sethiwat(struct pool *pp, int n) 1574 { 1575 1576 mutex_enter(&pp->pr_lock); 1577 1578 pp->pr_maxpages = (n == 0) 1579 ? 0 1580 : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 1581 1582 mutex_exit(&pp->pr_lock); 1583 } 1584 1585 void 1586 pool_sethardlimit(struct pool *pp, int n, const char *warnmess, int ratecap) 1587 { 1588 1589 mutex_enter(&pp->pr_lock); 1590 1591 pp->pr_hardlimit = n; 1592 pp->pr_hardlimit_warning = warnmess; 1593 pp->pr_hardlimit_ratecap.tv_sec = ratecap; 1594 pp->pr_hardlimit_warning_last.tv_sec = 0; 1595 pp->pr_hardlimit_warning_last.tv_usec = 0; 1596 1597 /* 1598 * In-line version of pool_sethiwat(), because we don't want to 1599 * release the lock. 1600 */ 1601 pp->pr_maxpages = (n == 0) 1602 ? 0 1603 : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 1604 1605 mutex_exit(&pp->pr_lock); 1606 } 1607 1608 /* 1609 * Release all complete pages that have not been used recently. 1610 * 1611 * Might be called from interrupt context. 1612 */ 1613 int 1614 #ifdef POOL_DIAGNOSTIC 1615 _pool_reclaim(struct pool *pp, const char *file, long line) 1616 #else 1617 pool_reclaim(struct pool *pp) 1618 #endif 1619 { 1620 struct pool_item_header *ph, *phnext; 1621 struct pool_pagelist pq; 1622 uint32_t curtime; 1623 bool klock; 1624 int rv; 1625 1626 if (cpu_intr_p() || cpu_softintr_p()) { 1627 KASSERT(pp->pr_ipl != IPL_NONE); 1628 } 1629 1630 if (pp->pr_drain_hook != NULL) { 1631 /* 1632 * The drain hook must be called with the pool unlocked. 1633 */ 1634 (*pp->pr_drain_hook)(pp->pr_drain_hook_arg, PR_NOWAIT); 1635 } 1636 1637 /* 1638 * XXXSMP Because we do not want to cause non-MPSAFE code 1639 * to block. 1640 */ 1641 if (pp->pr_ipl == IPL_SOFTNET || pp->pr_ipl == IPL_SOFTCLOCK || 1642 pp->pr_ipl == IPL_SOFTSERIAL) { 1643 KERNEL_LOCK(1, NULL); 1644 klock = true; 1645 } else 1646 klock = false; 1647 1648 /* Reclaim items from the pool's cache (if any). */ 1649 if (pp->pr_cache != NULL) 1650 pool_cache_invalidate(pp->pr_cache); 1651 1652 if (mutex_tryenter(&pp->pr_lock) == 0) { 1653 if (klock) { 1654 KERNEL_UNLOCK_ONE(NULL); 1655 } 1656 return (0); 1657 } 1658 pr_enter(pp, file, line); 1659 1660 LIST_INIT(&pq); 1661 1662 curtime = time_uptime; 1663 1664 for (ph = LIST_FIRST(&pp->pr_emptypages); ph != NULL; ph = phnext) { 1665 phnext = LIST_NEXT(ph, ph_pagelist); 1666 1667 /* Check our minimum page claim */ 1668 if (pp->pr_npages <= pp->pr_minpages) 1669 break; 1670 1671 KASSERT(ph->ph_nmissing == 0); 1672 if (curtime - ph->ph_time < pool_inactive_time 1673 && !pa_starved_p(pp->pr_alloc)) 1674 continue; 1675 1676 /* 1677 * If freeing this page would put us below 1678 * the low water mark, stop now. 1679 */ 1680 if ((pp->pr_nitems - pp->pr_itemsperpage) < 1681 pp->pr_minitems) 1682 break; 1683 1684 pr_rmpage(pp, ph, &pq); 1685 } 1686 1687 pr_leave(pp); 1688 mutex_exit(&pp->pr_lock); 1689 1690 if (LIST_EMPTY(&pq)) 1691 rv = 0; 1692 else { 1693 pr_pagelist_free(pp, &pq); 1694 rv = 1; 1695 } 1696 1697 if (klock) { 1698 KERNEL_UNLOCK_ONE(NULL); 1699 } 1700 1701 return (rv); 1702 } 1703 1704 /* 1705 * Drain pools, one at a time. This is a two stage process; 1706 * drain_start kicks off a cross call to drain CPU-level caches 1707 * if the pool has an associated pool_cache. drain_end waits 1708 * for those cross calls to finish, and then drains the cache 1709 * (if any) and pool. 1710 * 1711 * Note, must never be called from interrupt context. 1712 */ 1713 void 1714 pool_drain_start(struct pool **ppp, uint64_t *wp) 1715 { 1716 struct pool *pp; 1717 1718 KASSERT(!TAILQ_EMPTY(&pool_head)); 1719 1720 pp = NULL; 1721 1722 /* Find next pool to drain, and add a reference. */ 1723 mutex_enter(&pool_head_lock); 1724 do { 1725 if (drainpp == NULL) { 1726 drainpp = TAILQ_FIRST(&pool_head); 1727 } 1728 if (drainpp != NULL) { 1729 pp = drainpp; 1730 drainpp = TAILQ_NEXT(pp, pr_poollist); 1731 } 1732 /* 1733 * Skip completely idle pools. We depend on at least 1734 * one pool in the system being active. 1735 */ 1736 } while (pp == NULL || pp->pr_npages == 0); 1737 pp->pr_refcnt++; 1738 mutex_exit(&pool_head_lock); 1739 1740 /* If there is a pool_cache, drain CPU level caches. */ 1741 *ppp = pp; 1742 if (pp->pr_cache != NULL) { 1743 *wp = xc_broadcast(0, (xcfunc_t)pool_cache_xcall, 1744 pp->pr_cache, NULL); 1745 } 1746 } 1747 1748 bool 1749 pool_drain_end(struct pool *pp, uint64_t where) 1750 { 1751 bool reclaimed; 1752 1753 if (pp == NULL) 1754 return false; 1755 1756 KASSERT(pp->pr_refcnt > 0); 1757 1758 /* Wait for remote draining to complete. */ 1759 if (pp->pr_cache != NULL) 1760 xc_wait(where); 1761 1762 /* Drain the cache (if any) and pool.. */ 1763 reclaimed = pool_reclaim(pp); 1764 1765 /* Finally, unlock the pool. */ 1766 mutex_enter(&pool_head_lock); 1767 pp->pr_refcnt--; 1768 cv_broadcast(&pool_busy); 1769 mutex_exit(&pool_head_lock); 1770 1771 return reclaimed; 1772 } 1773 1774 /* 1775 * Diagnostic helpers. 1776 */ 1777 void 1778 pool_print(struct pool *pp, const char *modif) 1779 { 1780 1781 pool_print1(pp, modif, printf); 1782 } 1783 1784 void 1785 pool_printall(const char *modif, void (*pr)(const char *, ...)) 1786 { 1787 struct pool *pp; 1788 1789 TAILQ_FOREACH(pp, &pool_head, pr_poollist) { 1790 pool_printit(pp, modif, pr); 1791 } 1792 } 1793 1794 void 1795 pool_printit(struct pool *pp, const char *modif, void (*pr)(const char *, ...)) 1796 { 1797 1798 if (pp == NULL) { 1799 (*pr)("Must specify a pool to print.\n"); 1800 return; 1801 } 1802 1803 pool_print1(pp, modif, pr); 1804 } 1805 1806 static void 1807 pool_print_pagelist(struct pool *pp, struct pool_pagelist *pl, 1808 void (*pr)(const char *, ...)) 1809 { 1810 struct pool_item_header *ph; 1811 #ifdef DIAGNOSTIC 1812 struct pool_item *pi; 1813 #endif 1814 1815 LIST_FOREACH(ph, pl, ph_pagelist) { 1816 (*pr)("\t\tpage %p, nmissing %d, time %" PRIu32 "\n", 1817 ph->ph_page, ph->ph_nmissing, ph->ph_time); 1818 #ifdef DIAGNOSTIC 1819 if (!(pp->pr_roflags & PR_NOTOUCH)) { 1820 LIST_FOREACH(pi, &ph->ph_itemlist, pi_list) { 1821 if (pi->pi_magic != PI_MAGIC) { 1822 (*pr)("\t\t\titem %p, magic 0x%x\n", 1823 pi, pi->pi_magic); 1824 } 1825 } 1826 } 1827 #endif 1828 } 1829 } 1830 1831 static void 1832 pool_print1(struct pool *pp, const char *modif, void (*pr)(const char *, ...)) 1833 { 1834 struct pool_item_header *ph; 1835 pool_cache_t pc; 1836 pcg_t *pcg; 1837 pool_cache_cpu_t *cc; 1838 uint64_t cpuhit, cpumiss; 1839 int i, print_log = 0, print_pagelist = 0, print_cache = 0; 1840 char c; 1841 1842 while ((c = *modif++) != '\0') { 1843 if (c == 'l') 1844 print_log = 1; 1845 if (c == 'p') 1846 print_pagelist = 1; 1847 if (c == 'c') 1848 print_cache = 1; 1849 } 1850 1851 if ((pc = pp->pr_cache) != NULL) { 1852 (*pr)("POOL CACHE"); 1853 } else { 1854 (*pr)("POOL"); 1855 } 1856 1857 (*pr)(" %s: size %u, align %u, ioff %u, roflags 0x%08x\n", 1858 pp->pr_wchan, pp->pr_size, pp->pr_align, pp->pr_itemoffset, 1859 pp->pr_roflags); 1860 (*pr)("\talloc %p\n", pp->pr_alloc); 1861 (*pr)("\tminitems %u, minpages %u, maxpages %u, npages %u\n", 1862 pp->pr_minitems, pp->pr_minpages, pp->pr_maxpages, pp->pr_npages); 1863 (*pr)("\titemsperpage %u, nitems %u, nout %u, hardlimit %u\n", 1864 pp->pr_itemsperpage, pp->pr_nitems, pp->pr_nout, pp->pr_hardlimit); 1865 1866 (*pr)("\tnget %lu, nfail %lu, nput %lu\n", 1867 pp->pr_nget, pp->pr_nfail, pp->pr_nput); 1868 (*pr)("\tnpagealloc %lu, npagefree %lu, hiwat %u, nidle %lu\n", 1869 pp->pr_npagealloc, pp->pr_npagefree, pp->pr_hiwat, pp->pr_nidle); 1870 1871 if (print_pagelist == 0) 1872 goto skip_pagelist; 1873 1874 if ((ph = LIST_FIRST(&pp->pr_emptypages)) != NULL) 1875 (*pr)("\n\tempty page list:\n"); 1876 pool_print_pagelist(pp, &pp->pr_emptypages, pr); 1877 if ((ph = LIST_FIRST(&pp->pr_fullpages)) != NULL) 1878 (*pr)("\n\tfull page list:\n"); 1879 pool_print_pagelist(pp, &pp->pr_fullpages, pr); 1880 if ((ph = LIST_FIRST(&pp->pr_partpages)) != NULL) 1881 (*pr)("\n\tpartial-page list:\n"); 1882 pool_print_pagelist(pp, &pp->pr_partpages, pr); 1883 1884 if (pp->pr_curpage == NULL) 1885 (*pr)("\tno current page\n"); 1886 else 1887 (*pr)("\tcurpage %p\n", pp->pr_curpage->ph_page); 1888 1889 skip_pagelist: 1890 if (print_log == 0) 1891 goto skip_log; 1892 1893 (*pr)("\n"); 1894 if ((pp->pr_roflags & PR_LOGGING) == 0) 1895 (*pr)("\tno log\n"); 1896 else { 1897 pr_printlog(pp, NULL, pr); 1898 } 1899 1900 skip_log: 1901 1902 #define PR_GROUPLIST(pcg) \ 1903 (*pr)("\t\tgroup %p: avail %d\n", pcg, pcg->pcg_avail); \ 1904 for (i = 0; i < pcg->pcg_size; i++) { \ 1905 if (pcg->pcg_objects[i].pcgo_pa != \ 1906 POOL_PADDR_INVALID) { \ 1907 (*pr)("\t\t\t%p, 0x%llx\n", \ 1908 pcg->pcg_objects[i].pcgo_va, \ 1909 (unsigned long long) \ 1910 pcg->pcg_objects[i].pcgo_pa); \ 1911 } else { \ 1912 (*pr)("\t\t\t%p\n", \ 1913 pcg->pcg_objects[i].pcgo_va); \ 1914 } \ 1915 } 1916 1917 if (pc != NULL) { 1918 cpuhit = 0; 1919 cpumiss = 0; 1920 for (i = 0; i < __arraycount(pc->pc_cpus); i++) { 1921 if ((cc = pc->pc_cpus[i]) == NULL) 1922 continue; 1923 cpuhit += cc->cc_hits; 1924 cpumiss += cc->cc_misses; 1925 } 1926 (*pr)("\tcpu layer hits %llu misses %llu\n", cpuhit, cpumiss); 1927 (*pr)("\tcache layer hits %llu misses %llu\n", 1928 pc->pc_hits, pc->pc_misses); 1929 (*pr)("\tcache layer entry uncontended %llu contended %llu\n", 1930 pc->pc_hits + pc->pc_misses - pc->pc_contended, 1931 pc->pc_contended); 1932 (*pr)("\tcache layer empty groups %u full groups %u\n", 1933 pc->pc_nempty, pc->pc_nfull); 1934 if (print_cache) { 1935 (*pr)("\tfull cache groups:\n"); 1936 for (pcg = pc->pc_fullgroups; pcg != NULL; 1937 pcg = pcg->pcg_next) { 1938 PR_GROUPLIST(pcg); 1939 } 1940 (*pr)("\tempty cache groups:\n"); 1941 for (pcg = pc->pc_emptygroups; pcg != NULL; 1942 pcg = pcg->pcg_next) { 1943 PR_GROUPLIST(pcg); 1944 } 1945 } 1946 } 1947 #undef PR_GROUPLIST 1948 1949 pr_enter_check(pp, pr); 1950 } 1951 1952 static int 1953 pool_chk_page(struct pool *pp, const char *label, struct pool_item_header *ph) 1954 { 1955 struct pool_item *pi; 1956 void *page; 1957 int n; 1958 1959 if ((pp->pr_roflags & PR_NOALIGN) == 0) { 1960 page = (void *)((uintptr_t)ph & pp->pr_alloc->pa_pagemask); 1961 if (page != ph->ph_page && 1962 (pp->pr_roflags & PR_PHINPAGE) != 0) { 1963 if (label != NULL) 1964 printf("%s: ", label); 1965 printf("pool(%p:%s): page inconsistency: page %p;" 1966 " at page head addr %p (p %p)\n", pp, 1967 pp->pr_wchan, ph->ph_page, 1968 ph, page); 1969 return 1; 1970 } 1971 } 1972 1973 if ((pp->pr_roflags & PR_NOTOUCH) != 0) 1974 return 0; 1975 1976 for (pi = LIST_FIRST(&ph->ph_itemlist), n = 0; 1977 pi != NULL; 1978 pi = LIST_NEXT(pi,pi_list), n++) { 1979 1980 #ifdef DIAGNOSTIC 1981 if (pi->pi_magic != PI_MAGIC) { 1982 if (label != NULL) 1983 printf("%s: ", label); 1984 printf("pool(%s): free list modified: magic=%x;" 1985 " page %p; item ordinal %d; addr %p\n", 1986 pp->pr_wchan, pi->pi_magic, ph->ph_page, 1987 n, pi); 1988 panic("pool"); 1989 } 1990 #endif 1991 if ((pp->pr_roflags & PR_NOALIGN) != 0) { 1992 continue; 1993 } 1994 page = (void *)((uintptr_t)pi & pp->pr_alloc->pa_pagemask); 1995 if (page == ph->ph_page) 1996 continue; 1997 1998 if (label != NULL) 1999 printf("%s: ", label); 2000 printf("pool(%p:%s): page inconsistency: page %p;" 2001 " item ordinal %d; addr %p (p %p)\n", pp, 2002 pp->pr_wchan, ph->ph_page, 2003 n, pi, page); 2004 return 1; 2005 } 2006 return 0; 2007 } 2008 2009 2010 int 2011 pool_chk(struct pool *pp, const char *label) 2012 { 2013 struct pool_item_header *ph; 2014 int r = 0; 2015 2016 mutex_enter(&pp->pr_lock); 2017 LIST_FOREACH(ph, &pp->pr_emptypages, ph_pagelist) { 2018 r = pool_chk_page(pp, label, ph); 2019 if (r) { 2020 goto out; 2021 } 2022 } 2023 LIST_FOREACH(ph, &pp->pr_fullpages, ph_pagelist) { 2024 r = pool_chk_page(pp, label, ph); 2025 if (r) { 2026 goto out; 2027 } 2028 } 2029 LIST_FOREACH(ph, &pp->pr_partpages, ph_pagelist) { 2030 r = pool_chk_page(pp, label, ph); 2031 if (r) { 2032 goto out; 2033 } 2034 } 2035 2036 out: 2037 mutex_exit(&pp->pr_lock); 2038 return (r); 2039 } 2040 2041 /* 2042 * pool_cache_init: 2043 * 2044 * Initialize a pool cache. 2045 */ 2046 pool_cache_t 2047 pool_cache_init(size_t size, u_int align, u_int align_offset, u_int flags, 2048 const char *wchan, struct pool_allocator *palloc, int ipl, 2049 int (*ctor)(void *, void *, int), void (*dtor)(void *, void *), void *arg) 2050 { 2051 pool_cache_t pc; 2052 2053 pc = pool_get(&cache_pool, PR_WAITOK); 2054 if (pc == NULL) 2055 return NULL; 2056 2057 pool_cache_bootstrap(pc, size, align, align_offset, flags, wchan, 2058 palloc, ipl, ctor, dtor, arg); 2059 2060 return pc; 2061 } 2062 2063 /* 2064 * pool_cache_bootstrap: 2065 * 2066 * Kernel-private version of pool_cache_init(). The caller 2067 * provides initial storage. 2068 */ 2069 void 2070 pool_cache_bootstrap(pool_cache_t pc, size_t size, u_int align, 2071 u_int align_offset, u_int flags, const char *wchan, 2072 struct pool_allocator *palloc, int ipl, 2073 int (*ctor)(void *, void *, int), void (*dtor)(void *, void *), 2074 void *arg) 2075 { 2076 CPU_INFO_ITERATOR cii; 2077 pool_cache_t pc1; 2078 struct cpu_info *ci; 2079 struct pool *pp; 2080 2081 pp = &pc->pc_pool; 2082 if (palloc == NULL && ipl == IPL_NONE) 2083 palloc = &pool_allocator_nointr; 2084 pool_init(pp, size, align, align_offset, flags, wchan, palloc, ipl); 2085 mutex_init(&pc->pc_lock, MUTEX_DEFAULT, ipl); 2086 2087 if (ctor == NULL) { 2088 ctor = (int (*)(void *, void *, int))nullop; 2089 } 2090 if (dtor == NULL) { 2091 dtor = (void (*)(void *, void *))nullop; 2092 } 2093 2094 pc->pc_emptygroups = NULL; 2095 pc->pc_fullgroups = NULL; 2096 pc->pc_partgroups = NULL; 2097 pc->pc_ctor = ctor; 2098 pc->pc_dtor = dtor; 2099 pc->pc_arg = arg; 2100 pc->pc_hits = 0; 2101 pc->pc_misses = 0; 2102 pc->pc_nempty = 0; 2103 pc->pc_npart = 0; 2104 pc->pc_nfull = 0; 2105 pc->pc_contended = 0; 2106 pc->pc_refcnt = 0; 2107 pc->pc_freecheck = NULL; 2108 2109 if ((flags & PR_LARGECACHE) != 0) { 2110 pc->pc_pcgsize = PCG_NOBJECTS_LARGE; 2111 pc->pc_pcgpool = &pcg_large_pool; 2112 } else { 2113 pc->pc_pcgsize = PCG_NOBJECTS_NORMAL; 2114 pc->pc_pcgpool = &pcg_normal_pool; 2115 } 2116 2117 /* Allocate per-CPU caches. */ 2118 memset(pc->pc_cpus, 0, sizeof(pc->pc_cpus)); 2119 pc->pc_ncpu = 0; 2120 if (ncpu < 2) { 2121 /* XXX For sparc: boot CPU is not attached yet. */ 2122 pool_cache_cpu_init1(curcpu(), pc); 2123 } else { 2124 for (CPU_INFO_FOREACH(cii, ci)) { 2125 pool_cache_cpu_init1(ci, pc); 2126 } 2127 } 2128 2129 /* Add to list of all pools. */ 2130 if (__predict_true(!cold)) 2131 mutex_enter(&pool_head_lock); 2132 TAILQ_FOREACH(pc1, &pool_cache_head, pc_cachelist) { 2133 if (strcmp(pc1->pc_pool.pr_wchan, pc->pc_pool.pr_wchan) > 0) 2134 break; 2135 } 2136 if (pc1 == NULL) 2137 TAILQ_INSERT_TAIL(&pool_cache_head, pc, pc_cachelist); 2138 else 2139 TAILQ_INSERT_BEFORE(pc1, pc, pc_cachelist); 2140 if (__predict_true(!cold)) 2141 mutex_exit(&pool_head_lock); 2142 2143 membar_sync(); 2144 pp->pr_cache = pc; 2145 } 2146 2147 /* 2148 * pool_cache_destroy: 2149 * 2150 * Destroy a pool cache. 2151 */ 2152 void 2153 pool_cache_destroy(pool_cache_t pc) 2154 { 2155 struct pool *pp = &pc->pc_pool; 2156 u_int i; 2157 2158 /* Remove it from the global list. */ 2159 mutex_enter(&pool_head_lock); 2160 while (pc->pc_refcnt != 0) 2161 cv_wait(&pool_busy, &pool_head_lock); 2162 TAILQ_REMOVE(&pool_cache_head, pc, pc_cachelist); 2163 mutex_exit(&pool_head_lock); 2164 2165 /* First, invalidate the entire cache. */ 2166 pool_cache_invalidate(pc); 2167 2168 /* Disassociate it from the pool. */ 2169 mutex_enter(&pp->pr_lock); 2170 pp->pr_cache = NULL; 2171 mutex_exit(&pp->pr_lock); 2172 2173 /* Destroy per-CPU data */ 2174 for (i = 0; i < __arraycount(pc->pc_cpus); i++) 2175 pool_cache_invalidate_cpu(pc, i); 2176 2177 /* Finally, destroy it. */ 2178 mutex_destroy(&pc->pc_lock); 2179 pool_destroy(pp); 2180 pool_put(&cache_pool, pc); 2181 } 2182 2183 /* 2184 * pool_cache_cpu_init1: 2185 * 2186 * Called for each pool_cache whenever a new CPU is attached. 2187 */ 2188 static void 2189 pool_cache_cpu_init1(struct cpu_info *ci, pool_cache_t pc) 2190 { 2191 pool_cache_cpu_t *cc; 2192 int index; 2193 2194 index = ci->ci_index; 2195 2196 KASSERT(index < __arraycount(pc->pc_cpus)); 2197 2198 if ((cc = pc->pc_cpus[index]) != NULL) { 2199 KASSERT(cc->cc_cpuindex == index); 2200 return; 2201 } 2202 2203 /* 2204 * The first CPU is 'free'. This needs to be the case for 2205 * bootstrap - we may not be able to allocate yet. 2206 */ 2207 if (pc->pc_ncpu == 0) { 2208 cc = &pc->pc_cpu0; 2209 pc->pc_ncpu = 1; 2210 } else { 2211 mutex_enter(&pc->pc_lock); 2212 pc->pc_ncpu++; 2213 mutex_exit(&pc->pc_lock); 2214 cc = pool_get(&cache_cpu_pool, PR_WAITOK); 2215 } 2216 2217 cc->cc_ipl = pc->pc_pool.pr_ipl; 2218 cc->cc_iplcookie = makeiplcookie(cc->cc_ipl); 2219 cc->cc_cache = pc; 2220 cc->cc_cpuindex = index; 2221 cc->cc_hits = 0; 2222 cc->cc_misses = 0; 2223 cc->cc_current = __UNCONST(&pcg_dummy); 2224 cc->cc_previous = __UNCONST(&pcg_dummy); 2225 2226 pc->pc_cpus[index] = cc; 2227 } 2228 2229 /* 2230 * pool_cache_cpu_init: 2231 * 2232 * Called whenever a new CPU is attached. 2233 */ 2234 void 2235 pool_cache_cpu_init(struct cpu_info *ci) 2236 { 2237 pool_cache_t pc; 2238 2239 mutex_enter(&pool_head_lock); 2240 TAILQ_FOREACH(pc, &pool_cache_head, pc_cachelist) { 2241 pc->pc_refcnt++; 2242 mutex_exit(&pool_head_lock); 2243 2244 pool_cache_cpu_init1(ci, pc); 2245 2246 mutex_enter(&pool_head_lock); 2247 pc->pc_refcnt--; 2248 cv_broadcast(&pool_busy); 2249 } 2250 mutex_exit(&pool_head_lock); 2251 } 2252 2253 /* 2254 * pool_cache_reclaim: 2255 * 2256 * Reclaim memory from a pool cache. 2257 */ 2258 bool 2259 pool_cache_reclaim(pool_cache_t pc) 2260 { 2261 2262 return pool_reclaim(&pc->pc_pool); 2263 } 2264 2265 static void 2266 pool_cache_destruct_object1(pool_cache_t pc, void *object) 2267 { 2268 2269 (*pc->pc_dtor)(pc->pc_arg, object); 2270 pool_put(&pc->pc_pool, object); 2271 } 2272 2273 /* 2274 * pool_cache_destruct_object: 2275 * 2276 * Force destruction of an object and its release back into 2277 * the pool. 2278 */ 2279 void 2280 pool_cache_destruct_object(pool_cache_t pc, void *object) 2281 { 2282 2283 FREECHECK_IN(&pc->pc_freecheck, object); 2284 2285 pool_cache_destruct_object1(pc, object); 2286 } 2287 2288 /* 2289 * pool_cache_invalidate_groups: 2290 * 2291 * Invalidate a chain of groups and destruct all objects. 2292 */ 2293 static void 2294 pool_cache_invalidate_groups(pool_cache_t pc, pcg_t *pcg) 2295 { 2296 void *object; 2297 pcg_t *next; 2298 int i; 2299 2300 for (; pcg != NULL; pcg = next) { 2301 next = pcg->pcg_next; 2302 2303 for (i = 0; i < pcg->pcg_avail; i++) { 2304 object = pcg->pcg_objects[i].pcgo_va; 2305 pool_cache_destruct_object1(pc, object); 2306 } 2307 2308 if (pcg->pcg_size == PCG_NOBJECTS_LARGE) { 2309 pool_put(&pcg_large_pool, pcg); 2310 } else { 2311 KASSERT(pcg->pcg_size == PCG_NOBJECTS_NORMAL); 2312 pool_put(&pcg_normal_pool, pcg); 2313 } 2314 } 2315 } 2316 2317 /* 2318 * pool_cache_invalidate: 2319 * 2320 * Invalidate a pool cache (destruct and release all of the 2321 * cached objects). Does not reclaim objects from the pool. 2322 * 2323 * Note: For pool caches that provide constructed objects, there 2324 * is an assumption that another level of synchronization is occurring 2325 * between the input to the constructor and the cache invalidation. 2326 */ 2327 void 2328 pool_cache_invalidate(pool_cache_t pc) 2329 { 2330 pcg_t *full, *empty, *part; 2331 #if 0 2332 uint64_t where; 2333 2334 if (ncpu < 2 || !mp_online) { 2335 /* 2336 * We might be called early enough in the boot process 2337 * for the CPU data structures to not be fully initialized. 2338 * In this case, simply gather the local CPU's cache now 2339 * since it will be the only one running. 2340 */ 2341 pool_cache_xcall(pc); 2342 } else { 2343 /* 2344 * Gather all of the CPU-specific caches into the 2345 * global cache. 2346 */ 2347 where = xc_broadcast(0, (xcfunc_t)pool_cache_xcall, pc, NULL); 2348 xc_wait(where); 2349 } 2350 #endif 2351 mutex_enter(&pc->pc_lock); 2352 full = pc->pc_fullgroups; 2353 empty = pc->pc_emptygroups; 2354 part = pc->pc_partgroups; 2355 pc->pc_fullgroups = NULL; 2356 pc->pc_emptygroups = NULL; 2357 pc->pc_partgroups = NULL; 2358 pc->pc_nfull = 0; 2359 pc->pc_nempty = 0; 2360 pc->pc_npart = 0; 2361 mutex_exit(&pc->pc_lock); 2362 2363 pool_cache_invalidate_groups(pc, full); 2364 pool_cache_invalidate_groups(pc, empty); 2365 pool_cache_invalidate_groups(pc, part); 2366 } 2367 2368 /* 2369 * pool_cache_invalidate_cpu: 2370 * 2371 * Invalidate all CPU-bound cached objects in pool cache, the CPU being 2372 * identified by its associated index. 2373 * It is caller's responsibility to ensure that no operation is 2374 * taking place on this pool cache while doing this invalidation. 2375 * WARNING: as no inter-CPU locking is enforced, trying to invalidate 2376 * pool cached objects from a CPU different from the one currently running 2377 * may result in an undefined behaviour. 2378 */ 2379 static void 2380 pool_cache_invalidate_cpu(pool_cache_t pc, u_int index) 2381 { 2382 2383 pool_cache_cpu_t *cc; 2384 pcg_t *pcg; 2385 2386 if ((cc = pc->pc_cpus[index]) == NULL) 2387 return; 2388 2389 if ((pcg = cc->cc_current) != &pcg_dummy) { 2390 pcg->pcg_next = NULL; 2391 pool_cache_invalidate_groups(pc, pcg); 2392 } 2393 if ((pcg = cc->cc_previous) != &pcg_dummy) { 2394 pcg->pcg_next = NULL; 2395 pool_cache_invalidate_groups(pc, pcg); 2396 } 2397 if (cc != &pc->pc_cpu0) 2398 pool_put(&cache_cpu_pool, cc); 2399 2400 } 2401 2402 void 2403 pool_cache_set_drain_hook(pool_cache_t pc, void (*fn)(void *, int), void *arg) 2404 { 2405 2406 pool_set_drain_hook(&pc->pc_pool, fn, arg); 2407 } 2408 2409 void 2410 pool_cache_setlowat(pool_cache_t pc, int n) 2411 { 2412 2413 pool_setlowat(&pc->pc_pool, n); 2414 } 2415 2416 void 2417 pool_cache_sethiwat(pool_cache_t pc, int n) 2418 { 2419 2420 pool_sethiwat(&pc->pc_pool, n); 2421 } 2422 2423 void 2424 pool_cache_sethardlimit(pool_cache_t pc, int n, const char *warnmess, int ratecap) 2425 { 2426 2427 pool_sethardlimit(&pc->pc_pool, n, warnmess, ratecap); 2428 } 2429 2430 static bool __noinline 2431 pool_cache_get_slow(pool_cache_cpu_t *cc, int s, void **objectp, 2432 paddr_t *pap, int flags) 2433 { 2434 pcg_t *pcg, *cur; 2435 uint64_t ncsw; 2436 pool_cache_t pc; 2437 void *object; 2438 2439 KASSERT(cc->cc_current->pcg_avail == 0); 2440 KASSERT(cc->cc_previous->pcg_avail == 0); 2441 2442 pc = cc->cc_cache; 2443 cc->cc_misses++; 2444 2445 /* 2446 * Nothing was available locally. Try and grab a group 2447 * from the cache. 2448 */ 2449 if (__predict_false(!mutex_tryenter(&pc->pc_lock))) { 2450 ncsw = curlwp->l_ncsw; 2451 mutex_enter(&pc->pc_lock); 2452 pc->pc_contended++; 2453 2454 /* 2455 * If we context switched while locking, then 2456 * our view of the per-CPU data is invalid: 2457 * retry. 2458 */ 2459 if (curlwp->l_ncsw != ncsw) { 2460 mutex_exit(&pc->pc_lock); 2461 return true; 2462 } 2463 } 2464 2465 if (__predict_true((pcg = pc->pc_fullgroups) != NULL)) { 2466 /* 2467 * If there's a full group, release our empty 2468 * group back to the cache. Install the full 2469 * group as cc_current and return. 2470 */ 2471 if (__predict_true((cur = cc->cc_current) != &pcg_dummy)) { 2472 KASSERT(cur->pcg_avail == 0); 2473 cur->pcg_next = pc->pc_emptygroups; 2474 pc->pc_emptygroups = cur; 2475 pc->pc_nempty++; 2476 } 2477 KASSERT(pcg->pcg_avail == pcg->pcg_size); 2478 cc->cc_current = pcg; 2479 pc->pc_fullgroups = pcg->pcg_next; 2480 pc->pc_hits++; 2481 pc->pc_nfull--; 2482 mutex_exit(&pc->pc_lock); 2483 return true; 2484 } 2485 2486 /* 2487 * Nothing available locally or in cache. Take the slow 2488 * path: fetch a new object from the pool and construct 2489 * it. 2490 */ 2491 pc->pc_misses++; 2492 mutex_exit(&pc->pc_lock); 2493 splx(s); 2494 2495 object = pool_get(&pc->pc_pool, flags); 2496 *objectp = object; 2497 if (__predict_false(object == NULL)) 2498 return false; 2499 2500 if (__predict_false((*pc->pc_ctor)(pc->pc_arg, object, flags) != 0)) { 2501 pool_put(&pc->pc_pool, object); 2502 *objectp = NULL; 2503 return false; 2504 } 2505 2506 KASSERT((((vaddr_t)object + pc->pc_pool.pr_itemoffset) & 2507 (pc->pc_pool.pr_align - 1)) == 0); 2508 2509 if (pap != NULL) { 2510 #ifdef POOL_VTOPHYS 2511 *pap = POOL_VTOPHYS(object); 2512 #else 2513 *pap = POOL_PADDR_INVALID; 2514 #endif 2515 } 2516 2517 FREECHECK_OUT(&pc->pc_freecheck, object); 2518 return false; 2519 } 2520 2521 /* 2522 * pool_cache_get{,_paddr}: 2523 * 2524 * Get an object from a pool cache (optionally returning 2525 * the physical address of the object). 2526 */ 2527 void * 2528 pool_cache_get_paddr(pool_cache_t pc, int flags, paddr_t *pap) 2529 { 2530 pool_cache_cpu_t *cc; 2531 pcg_t *pcg; 2532 void *object; 2533 int s; 2534 2535 KASSERTMSG((!cpu_intr_p() && !cpu_softintr_p()) || 2536 (pc->pc_pool.pr_ipl != IPL_NONE || cold || panicstr != NULL), 2537 ("pool '%s' is IPL_NONE, but called from interrupt context\n", 2538 pc->pc_pool.pr_wchan)); 2539 2540 if (flags & PR_WAITOK) { 2541 ASSERT_SLEEPABLE(); 2542 } 2543 2544 /* Lock out interrupts and disable preemption. */ 2545 s = splvm(); 2546 while (/* CONSTCOND */ true) { 2547 /* Try and allocate an object from the current group. */ 2548 cc = pc->pc_cpus[curcpu()->ci_index]; 2549 KASSERT(cc->cc_cache == pc); 2550 pcg = cc->cc_current; 2551 if (__predict_true(pcg->pcg_avail > 0)) { 2552 object = pcg->pcg_objects[--pcg->pcg_avail].pcgo_va; 2553 if (__predict_false(pap != NULL)) 2554 *pap = pcg->pcg_objects[pcg->pcg_avail].pcgo_pa; 2555 #if defined(DIAGNOSTIC) 2556 pcg->pcg_objects[pcg->pcg_avail].pcgo_va = NULL; 2557 KASSERT(pcg->pcg_avail < pcg->pcg_size); 2558 KASSERT(object != NULL); 2559 #endif 2560 cc->cc_hits++; 2561 splx(s); 2562 FREECHECK_OUT(&pc->pc_freecheck, object); 2563 return object; 2564 } 2565 2566 /* 2567 * That failed. If the previous group isn't empty, swap 2568 * it with the current group and allocate from there. 2569 */ 2570 pcg = cc->cc_previous; 2571 if (__predict_true(pcg->pcg_avail > 0)) { 2572 cc->cc_previous = cc->cc_current; 2573 cc->cc_current = pcg; 2574 continue; 2575 } 2576 2577 /* 2578 * Can't allocate from either group: try the slow path. 2579 * If get_slow() allocated an object for us, or if 2580 * no more objects are available, it will return false. 2581 * Otherwise, we need to retry. 2582 */ 2583 if (!pool_cache_get_slow(cc, s, &object, pap, flags)) 2584 break; 2585 } 2586 2587 return object; 2588 } 2589 2590 static bool __noinline 2591 pool_cache_put_slow(pool_cache_cpu_t *cc, int s, void *object) 2592 { 2593 pcg_t *pcg, *cur; 2594 uint64_t ncsw; 2595 pool_cache_t pc; 2596 2597 KASSERT(cc->cc_current->pcg_avail == cc->cc_current->pcg_size); 2598 KASSERT(cc->cc_previous->pcg_avail == cc->cc_previous->pcg_size); 2599 2600 pc = cc->cc_cache; 2601 pcg = NULL; 2602 cc->cc_misses++; 2603 2604 /* 2605 * If there are no empty groups in the cache then allocate one 2606 * while still unlocked. 2607 */ 2608 if (__predict_false(pc->pc_emptygroups == NULL)) { 2609 if (__predict_true(!pool_cache_disable)) { 2610 pcg = pool_get(pc->pc_pcgpool, PR_NOWAIT); 2611 } 2612 if (__predict_true(pcg != NULL)) { 2613 pcg->pcg_avail = 0; 2614 pcg->pcg_size = pc->pc_pcgsize; 2615 } 2616 } 2617 2618 /* Lock the cache. */ 2619 if (__predict_false(!mutex_tryenter(&pc->pc_lock))) { 2620 ncsw = curlwp->l_ncsw; 2621 mutex_enter(&pc->pc_lock); 2622 pc->pc_contended++; 2623 2624 /* 2625 * If we context switched while locking, then our view of 2626 * the per-CPU data is invalid: retry. 2627 */ 2628 if (__predict_false(curlwp->l_ncsw != ncsw)) { 2629 mutex_exit(&pc->pc_lock); 2630 if (pcg != NULL) { 2631 pool_put(pc->pc_pcgpool, pcg); 2632 } 2633 return true; 2634 } 2635 } 2636 2637 /* If there are no empty groups in the cache then allocate one. */ 2638 if (pcg == NULL && pc->pc_emptygroups != NULL) { 2639 pcg = pc->pc_emptygroups; 2640 pc->pc_emptygroups = pcg->pcg_next; 2641 pc->pc_nempty--; 2642 } 2643 2644 /* 2645 * If there's a empty group, release our full group back 2646 * to the cache. Install the empty group to the local CPU 2647 * and return. 2648 */ 2649 if (pcg != NULL) { 2650 KASSERT(pcg->pcg_avail == 0); 2651 if (__predict_false(cc->cc_previous == &pcg_dummy)) { 2652 cc->cc_previous = pcg; 2653 } else { 2654 cur = cc->cc_current; 2655 if (__predict_true(cur != &pcg_dummy)) { 2656 KASSERT(cur->pcg_avail == cur->pcg_size); 2657 cur->pcg_next = pc->pc_fullgroups; 2658 pc->pc_fullgroups = cur; 2659 pc->pc_nfull++; 2660 } 2661 cc->cc_current = pcg; 2662 } 2663 pc->pc_hits++; 2664 mutex_exit(&pc->pc_lock); 2665 return true; 2666 } 2667 2668 /* 2669 * Nothing available locally or in cache, and we didn't 2670 * allocate an empty group. Take the slow path and destroy 2671 * the object here and now. 2672 */ 2673 pc->pc_misses++; 2674 mutex_exit(&pc->pc_lock); 2675 splx(s); 2676 pool_cache_destruct_object(pc, object); 2677 2678 return false; 2679 } 2680 2681 /* 2682 * pool_cache_put{,_paddr}: 2683 * 2684 * Put an object back to the pool cache (optionally caching the 2685 * physical address of the object). 2686 */ 2687 void 2688 pool_cache_put_paddr(pool_cache_t pc, void *object, paddr_t pa) 2689 { 2690 pool_cache_cpu_t *cc; 2691 pcg_t *pcg; 2692 int s; 2693 2694 KASSERT(object != NULL); 2695 FREECHECK_IN(&pc->pc_freecheck, object); 2696 2697 /* Lock out interrupts and disable preemption. */ 2698 s = splvm(); 2699 while (/* CONSTCOND */ true) { 2700 /* If the current group isn't full, release it there. */ 2701 cc = pc->pc_cpus[curcpu()->ci_index]; 2702 KASSERT(cc->cc_cache == pc); 2703 pcg = cc->cc_current; 2704 if (__predict_true(pcg->pcg_avail < pcg->pcg_size)) { 2705 pcg->pcg_objects[pcg->pcg_avail].pcgo_va = object; 2706 pcg->pcg_objects[pcg->pcg_avail].pcgo_pa = pa; 2707 pcg->pcg_avail++; 2708 cc->cc_hits++; 2709 splx(s); 2710 return; 2711 } 2712 2713 /* 2714 * That failed. If the previous group isn't full, swap 2715 * it with the current group and try again. 2716 */ 2717 pcg = cc->cc_previous; 2718 if (__predict_true(pcg->pcg_avail < pcg->pcg_size)) { 2719 cc->cc_previous = cc->cc_current; 2720 cc->cc_current = pcg; 2721 continue; 2722 } 2723 2724 /* 2725 * Can't free to either group: try the slow path. 2726 * If put_slow() releases the object for us, it 2727 * will return false. Otherwise we need to retry. 2728 */ 2729 if (!pool_cache_put_slow(cc, s, object)) 2730 break; 2731 } 2732 } 2733 2734 /* 2735 * pool_cache_xcall: 2736 * 2737 * Transfer objects from the per-CPU cache to the global cache. 2738 * Run within a cross-call thread. 2739 */ 2740 static void 2741 pool_cache_xcall(pool_cache_t pc) 2742 { 2743 pool_cache_cpu_t *cc; 2744 pcg_t *prev, *cur, **list; 2745 int s; 2746 2747 s = splvm(); 2748 mutex_enter(&pc->pc_lock); 2749 cc = pc->pc_cpus[curcpu()->ci_index]; 2750 cur = cc->cc_current; 2751 cc->cc_current = __UNCONST(&pcg_dummy); 2752 prev = cc->cc_previous; 2753 cc->cc_previous = __UNCONST(&pcg_dummy); 2754 if (cur != &pcg_dummy) { 2755 if (cur->pcg_avail == cur->pcg_size) { 2756 list = &pc->pc_fullgroups; 2757 pc->pc_nfull++; 2758 } else if (cur->pcg_avail == 0) { 2759 list = &pc->pc_emptygroups; 2760 pc->pc_nempty++; 2761 } else { 2762 list = &pc->pc_partgroups; 2763 pc->pc_npart++; 2764 } 2765 cur->pcg_next = *list; 2766 *list = cur; 2767 } 2768 if (prev != &pcg_dummy) { 2769 if (prev->pcg_avail == prev->pcg_size) { 2770 list = &pc->pc_fullgroups; 2771 pc->pc_nfull++; 2772 } else if (prev->pcg_avail == 0) { 2773 list = &pc->pc_emptygroups; 2774 pc->pc_nempty++; 2775 } else { 2776 list = &pc->pc_partgroups; 2777 pc->pc_npart++; 2778 } 2779 prev->pcg_next = *list; 2780 *list = prev; 2781 } 2782 mutex_exit(&pc->pc_lock); 2783 splx(s); 2784 } 2785 2786 /* 2787 * Pool backend allocators. 2788 * 2789 * Each pool has a backend allocator that handles allocation, deallocation, 2790 * and any additional draining that might be needed. 2791 * 2792 * We provide two standard allocators: 2793 * 2794 * pool_allocator_kmem - the default when no allocator is specified 2795 * 2796 * pool_allocator_nointr - used for pools that will not be accessed 2797 * in interrupt context. 2798 */ 2799 void *pool_page_alloc(struct pool *, int); 2800 void pool_page_free(struct pool *, void *); 2801 2802 #ifdef POOL_SUBPAGE 2803 struct pool_allocator pool_allocator_kmem_fullpage = { 2804 pool_page_alloc, pool_page_free, 0, 2805 .pa_backingmapptr = &kmem_map, 2806 }; 2807 #else 2808 struct pool_allocator pool_allocator_kmem = { 2809 pool_page_alloc, pool_page_free, 0, 2810 .pa_backingmapptr = &kmem_map, 2811 }; 2812 #endif 2813 2814 void *pool_page_alloc_nointr(struct pool *, int); 2815 void pool_page_free_nointr(struct pool *, void *); 2816 2817 #ifdef POOL_SUBPAGE 2818 struct pool_allocator pool_allocator_nointr_fullpage = { 2819 pool_page_alloc_nointr, pool_page_free_nointr, 0, 2820 .pa_backingmapptr = &kernel_map, 2821 }; 2822 #else 2823 struct pool_allocator pool_allocator_nointr = { 2824 pool_page_alloc_nointr, pool_page_free_nointr, 0, 2825 .pa_backingmapptr = &kernel_map, 2826 }; 2827 #endif 2828 2829 #ifdef POOL_SUBPAGE 2830 void *pool_subpage_alloc(struct pool *, int); 2831 void pool_subpage_free(struct pool *, void *); 2832 2833 struct pool_allocator pool_allocator_kmem = { 2834 pool_subpage_alloc, pool_subpage_free, POOL_SUBPAGE, 2835 .pa_backingmapptr = &kmem_map, 2836 }; 2837 2838 void *pool_subpage_alloc_nointr(struct pool *, int); 2839 void pool_subpage_free_nointr(struct pool *, void *); 2840 2841 struct pool_allocator pool_allocator_nointr = { 2842 pool_subpage_alloc, pool_subpage_free, POOL_SUBPAGE, 2843 .pa_backingmapptr = &kmem_map, 2844 }; 2845 #endif /* POOL_SUBPAGE */ 2846 2847 static void * 2848 pool_allocator_alloc(struct pool *pp, int flags) 2849 { 2850 struct pool_allocator *pa = pp->pr_alloc; 2851 void *res; 2852 2853 res = (*pa->pa_alloc)(pp, flags); 2854 if (res == NULL && (flags & PR_WAITOK) == 0) { 2855 /* 2856 * We only run the drain hook here if PR_NOWAIT. 2857 * In other cases, the hook will be run in 2858 * pool_reclaim(). 2859 */ 2860 if (pp->pr_drain_hook != NULL) { 2861 (*pp->pr_drain_hook)(pp->pr_drain_hook_arg, flags); 2862 res = (*pa->pa_alloc)(pp, flags); 2863 } 2864 } 2865 return res; 2866 } 2867 2868 static void 2869 pool_allocator_free(struct pool *pp, void *v) 2870 { 2871 struct pool_allocator *pa = pp->pr_alloc; 2872 2873 (*pa->pa_free)(pp, v); 2874 } 2875 2876 void * 2877 pool_page_alloc(struct pool *pp, int flags) 2878 { 2879 bool waitok = (flags & PR_WAITOK) ? true : false; 2880 2881 return ((void *) uvm_km_alloc_poolpage_cache(kmem_map, waitok)); 2882 } 2883 2884 void 2885 pool_page_free(struct pool *pp, void *v) 2886 { 2887 2888 uvm_km_free_poolpage_cache(kmem_map, (vaddr_t) v); 2889 } 2890 2891 static void * 2892 pool_page_alloc_meta(struct pool *pp, int flags) 2893 { 2894 bool waitok = (flags & PR_WAITOK) ? true : false; 2895 2896 return ((void *) uvm_km_alloc_poolpage(kmem_map, waitok)); 2897 } 2898 2899 static void 2900 pool_page_free_meta(struct pool *pp, void *v) 2901 { 2902 2903 uvm_km_free_poolpage(kmem_map, (vaddr_t) v); 2904 } 2905 2906 #ifdef POOL_SUBPAGE 2907 /* Sub-page allocator, for machines with large hardware pages. */ 2908 void * 2909 pool_subpage_alloc(struct pool *pp, int flags) 2910 { 2911 return pool_get(&psppool, flags); 2912 } 2913 2914 void 2915 pool_subpage_free(struct pool *pp, void *v) 2916 { 2917 pool_put(&psppool, v); 2918 } 2919 2920 /* We don't provide a real nointr allocator. Maybe later. */ 2921 void * 2922 pool_subpage_alloc_nointr(struct pool *pp, int flags) 2923 { 2924 2925 return (pool_subpage_alloc(pp, flags)); 2926 } 2927 2928 void 2929 pool_subpage_free_nointr(struct pool *pp, void *v) 2930 { 2931 2932 pool_subpage_free(pp, v); 2933 } 2934 #endif /* POOL_SUBPAGE */ 2935 void * 2936 pool_page_alloc_nointr(struct pool *pp, int flags) 2937 { 2938 bool waitok = (flags & PR_WAITOK) ? true : false; 2939 2940 return ((void *) uvm_km_alloc_poolpage_cache(kernel_map, waitok)); 2941 } 2942 2943 void 2944 pool_page_free_nointr(struct pool *pp, void *v) 2945 { 2946 2947 uvm_km_free_poolpage_cache(kernel_map, (vaddr_t) v); 2948 } 2949 2950 #if defined(DDB) 2951 static bool 2952 pool_in_page(struct pool *pp, struct pool_item_header *ph, uintptr_t addr) 2953 { 2954 2955 return (uintptr_t)ph->ph_page <= addr && 2956 addr < (uintptr_t)ph->ph_page + pp->pr_alloc->pa_pagesz; 2957 } 2958 2959 static bool 2960 pool_in_item(struct pool *pp, void *item, uintptr_t addr) 2961 { 2962 2963 return (uintptr_t)item <= addr && addr < (uintptr_t)item + pp->pr_size; 2964 } 2965 2966 static bool 2967 pool_in_cg(struct pool *pp, struct pool_cache_group *pcg, uintptr_t addr) 2968 { 2969 int i; 2970 2971 if (pcg == NULL) { 2972 return false; 2973 } 2974 for (i = 0; i < pcg->pcg_avail; i++) { 2975 if (pool_in_item(pp, pcg->pcg_objects[i].pcgo_va, addr)) { 2976 return true; 2977 } 2978 } 2979 return false; 2980 } 2981 2982 static bool 2983 pool_allocated(struct pool *pp, struct pool_item_header *ph, uintptr_t addr) 2984 { 2985 2986 if ((pp->pr_roflags & PR_NOTOUCH) != 0) { 2987 unsigned int idx = pr_item_notouch_index(pp, ph, (void *)addr); 2988 pool_item_bitmap_t *bitmap = 2989 ph->ph_bitmap + (idx / BITMAP_SIZE); 2990 pool_item_bitmap_t mask = 1 << (idx & BITMAP_MASK); 2991 2992 return (*bitmap & mask) == 0; 2993 } else { 2994 struct pool_item *pi; 2995 2996 LIST_FOREACH(pi, &ph->ph_itemlist, pi_list) { 2997 if (pool_in_item(pp, pi, addr)) { 2998 return false; 2999 } 3000 } 3001 return true; 3002 } 3003 } 3004 3005 void 3006 pool_whatis(uintptr_t addr, void (*pr)(const char *, ...)) 3007 { 3008 struct pool *pp; 3009 3010 TAILQ_FOREACH(pp, &pool_head, pr_poollist) { 3011 struct pool_item_header *ph; 3012 uintptr_t item; 3013 bool allocated = true; 3014 bool incache = false; 3015 bool incpucache = false; 3016 char cpucachestr[32]; 3017 3018 if ((pp->pr_roflags & PR_PHINPAGE) != 0) { 3019 LIST_FOREACH(ph, &pp->pr_fullpages, ph_pagelist) { 3020 if (pool_in_page(pp, ph, addr)) { 3021 goto found; 3022 } 3023 } 3024 LIST_FOREACH(ph, &pp->pr_partpages, ph_pagelist) { 3025 if (pool_in_page(pp, ph, addr)) { 3026 allocated = 3027 pool_allocated(pp, ph, addr); 3028 goto found; 3029 } 3030 } 3031 LIST_FOREACH(ph, &pp->pr_emptypages, ph_pagelist) { 3032 if (pool_in_page(pp, ph, addr)) { 3033 allocated = false; 3034 goto found; 3035 } 3036 } 3037 continue; 3038 } else { 3039 ph = pr_find_pagehead_noalign(pp, (void *)addr); 3040 if (ph == NULL || !pool_in_page(pp, ph, addr)) { 3041 continue; 3042 } 3043 allocated = pool_allocated(pp, ph, addr); 3044 } 3045 found: 3046 if (allocated && pp->pr_cache) { 3047 pool_cache_t pc = pp->pr_cache; 3048 struct pool_cache_group *pcg; 3049 int i; 3050 3051 for (pcg = pc->pc_fullgroups; pcg != NULL; 3052 pcg = pcg->pcg_next) { 3053 if (pool_in_cg(pp, pcg, addr)) { 3054 incache = true; 3055 goto print; 3056 } 3057 } 3058 for (i = 0; i < __arraycount(pc->pc_cpus); i++) { 3059 pool_cache_cpu_t *cc; 3060 3061 if ((cc = pc->pc_cpus[i]) == NULL) { 3062 continue; 3063 } 3064 if (pool_in_cg(pp, cc->cc_current, addr) || 3065 pool_in_cg(pp, cc->cc_previous, addr)) { 3066 struct cpu_info *ci = 3067 cpu_lookup(i); 3068 3069 incpucache = true; 3070 snprintf(cpucachestr, 3071 sizeof(cpucachestr), 3072 "cached by CPU %u", 3073 ci->ci_index); 3074 goto print; 3075 } 3076 } 3077 } 3078 print: 3079 item = (uintptr_t)ph->ph_page + ph->ph_off; 3080 item = item + rounddown(addr - item, pp->pr_size); 3081 (*pr)("%p is %p+%zu in POOL '%s' (%s)\n", 3082 (void *)addr, item, (size_t)(addr - item), 3083 pp->pr_wchan, 3084 incpucache ? cpucachestr : 3085 incache ? "cached" : allocated ? "allocated" : "free"); 3086 } 3087 } 3088 #endif /* defined(DDB) */ 3089