1 /* $NetBSD: subr_pool.c,v 1.194 2012/02/04 22:11:42 para Exp $ */ 2 3 /*- 4 * Copyright (c) 1997, 1999, 2000, 2002, 2007, 2008, 2010 5 * The NetBSD Foundation, Inc. 6 * All rights reserved. 7 * 8 * This code is derived from software contributed to The NetBSD Foundation 9 * by Paul Kranenburg; by Jason R. Thorpe of the Numerical Aerospace 10 * Simulation Facility, NASA Ames Research Center, and by Andrew Doran. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 23 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 24 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 25 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31 * POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include <sys/cdefs.h> 35 __KERNEL_RCSID(0, "$NetBSD: subr_pool.c,v 1.194 2012/02/04 22:11:42 para Exp $"); 36 37 #include "opt_ddb.h" 38 #include "opt_pool.h" 39 #include "opt_poollog.h" 40 #include "opt_lockdebug.h" 41 42 #include <sys/param.h> 43 #include <sys/systm.h> 44 #include <sys/bitops.h> 45 #include <sys/proc.h> 46 #include <sys/errno.h> 47 #include <sys/kernel.h> 48 #include <sys/malloc.h> 49 #include <sys/vmem.h> 50 #include <sys/pool.h> 51 #include <sys/syslog.h> 52 #include <sys/debug.h> 53 #include <sys/lockdebug.h> 54 #include <sys/xcall.h> 55 #include <sys/cpu.h> 56 #include <sys/atomic.h> 57 58 #include <uvm/uvm_extern.h> 59 60 /* 61 * Pool resource management utility. 62 * 63 * Memory is allocated in pages which are split into pieces according to 64 * the pool item size. Each page is kept on one of three lists in the 65 * pool structure: `pr_emptypages', `pr_fullpages' and `pr_partpages', 66 * for empty, full and partially-full pages respectively. The individual 67 * pool items are on a linked list headed by `ph_itemlist' in each page 68 * header. The memory for building the page list is either taken from 69 * the allocated pages themselves (for small pool items) or taken from 70 * an internal pool of page headers (`phpool'). 71 */ 72 73 /* List of all pools */ 74 static TAILQ_HEAD(, pool) pool_head = TAILQ_HEAD_INITIALIZER(pool_head); 75 76 /* Private pool for page header structures */ 77 #define PHPOOL_MAX 8 78 static struct pool phpool[PHPOOL_MAX]; 79 #define PHPOOL_FREELIST_NELEM(idx) \ 80 (((idx) == 0) ? 0 : BITMAP_SIZE * (1 << (idx))) 81 82 #ifdef POOL_SUBPAGE 83 /* Pool of subpages for use by normal pools. */ 84 static struct pool psppool; 85 #endif 86 87 static void *pool_page_alloc_meta(struct pool *, int); 88 static void pool_page_free_meta(struct pool *, void *); 89 90 /* allocator for pool metadata */ 91 struct pool_allocator pool_allocator_meta = { 92 .pa_alloc = pool_page_alloc_meta, 93 .pa_free = pool_page_free_meta, 94 .pa_pagesz = 0 95 }; 96 97 /* # of seconds to retain page after last use */ 98 int pool_inactive_time = 10; 99 100 /* Next candidate for drainage (see pool_drain()) */ 101 static struct pool *drainpp; 102 103 /* This lock protects both pool_head and drainpp. */ 104 static kmutex_t pool_head_lock; 105 static kcondvar_t pool_busy; 106 107 /* This lock protects initialization of a potentially shared pool allocator */ 108 static kmutex_t pool_allocator_lock; 109 110 typedef uint32_t pool_item_bitmap_t; 111 #define BITMAP_SIZE (CHAR_BIT * sizeof(pool_item_bitmap_t)) 112 #define BITMAP_MASK (BITMAP_SIZE - 1) 113 114 struct pool_item_header { 115 /* Page headers */ 116 LIST_ENTRY(pool_item_header) 117 ph_pagelist; /* pool page list */ 118 SPLAY_ENTRY(pool_item_header) 119 ph_node; /* Off-page page headers */ 120 void * ph_page; /* this page's address */ 121 uint32_t ph_time; /* last referenced */ 122 uint16_t ph_nmissing; /* # of chunks in use */ 123 uint16_t ph_off; /* start offset in page */ 124 union { 125 /* !PR_NOTOUCH */ 126 struct { 127 LIST_HEAD(, pool_item) 128 phu_itemlist; /* chunk list for this page */ 129 } phu_normal; 130 /* PR_NOTOUCH */ 131 struct { 132 pool_item_bitmap_t phu_bitmap[1]; 133 } phu_notouch; 134 } ph_u; 135 }; 136 #define ph_itemlist ph_u.phu_normal.phu_itemlist 137 #define ph_bitmap ph_u.phu_notouch.phu_bitmap 138 139 struct pool_item { 140 #ifdef DIAGNOSTIC 141 u_int pi_magic; 142 #endif 143 #define PI_MAGIC 0xdeaddeadU 144 /* Other entries use only this list entry */ 145 LIST_ENTRY(pool_item) pi_list; 146 }; 147 148 #define POOL_NEEDS_CATCHUP(pp) \ 149 ((pp)->pr_nitems < (pp)->pr_minitems) 150 151 /* 152 * Pool cache management. 153 * 154 * Pool caches provide a way for constructed objects to be cached by the 155 * pool subsystem. This can lead to performance improvements by avoiding 156 * needless object construction/destruction; it is deferred until absolutely 157 * necessary. 158 * 159 * Caches are grouped into cache groups. Each cache group references up 160 * to PCG_NUMOBJECTS constructed objects. When a cache allocates an 161 * object from the pool, it calls the object's constructor and places it 162 * into a cache group. When a cache group frees an object back to the 163 * pool, it first calls the object's destructor. This allows the object 164 * to persist in constructed form while freed to the cache. 165 * 166 * The pool references each cache, so that when a pool is drained by the 167 * pagedaemon, it can drain each individual cache as well. Each time a 168 * cache is drained, the most idle cache group is freed to the pool in 169 * its entirety. 170 * 171 * Pool caches are layed on top of pools. By layering them, we can avoid 172 * the complexity of cache management for pools which would not benefit 173 * from it. 174 */ 175 176 static struct pool pcg_normal_pool; 177 static struct pool pcg_large_pool; 178 static struct pool cache_pool; 179 static struct pool cache_cpu_pool; 180 181 pool_cache_t pnbuf_cache; /* pathname buffer cache */ 182 183 /* List of all caches. */ 184 TAILQ_HEAD(,pool_cache) pool_cache_head = 185 TAILQ_HEAD_INITIALIZER(pool_cache_head); 186 187 int pool_cache_disable; /* global disable for caching */ 188 static const pcg_t pcg_dummy; /* zero sized: always empty, yet always full */ 189 190 static bool pool_cache_put_slow(pool_cache_cpu_t *, int, 191 void *); 192 static bool pool_cache_get_slow(pool_cache_cpu_t *, int, 193 void **, paddr_t *, int); 194 static void pool_cache_cpu_init1(struct cpu_info *, pool_cache_t); 195 static void pool_cache_invalidate_groups(pool_cache_t, pcg_t *); 196 static void pool_cache_invalidate_cpu(pool_cache_t, u_int); 197 static void pool_cache_xcall(pool_cache_t); 198 199 static int pool_catchup(struct pool *); 200 static void pool_prime_page(struct pool *, void *, 201 struct pool_item_header *); 202 static void pool_update_curpage(struct pool *); 203 204 static int pool_grow(struct pool *, int); 205 static void *pool_allocator_alloc(struct pool *, int); 206 static void pool_allocator_free(struct pool *, void *); 207 208 static void pool_print_pagelist(struct pool *, struct pool_pagelist *, 209 void (*)(const char *, ...)); 210 static void pool_print1(struct pool *, const char *, 211 void (*)(const char *, ...)); 212 213 static int pool_chk_page(struct pool *, const char *, 214 struct pool_item_header *); 215 216 /* 217 * Pool log entry. An array of these is allocated in pool_init(). 218 */ 219 struct pool_log { 220 const char *pl_file; 221 long pl_line; 222 int pl_action; 223 #define PRLOG_GET 1 224 #define PRLOG_PUT 2 225 void *pl_addr; 226 }; 227 228 #ifdef POOL_DIAGNOSTIC 229 /* Number of entries in pool log buffers */ 230 #ifndef POOL_LOGSIZE 231 #define POOL_LOGSIZE 10 232 #endif 233 234 int pool_logsize = POOL_LOGSIZE; 235 236 static inline void 237 pr_log(struct pool *pp, void *v, int action, const char *file, long line) 238 { 239 int n; 240 struct pool_log *pl; 241 242 if ((pp->pr_roflags & PR_LOGGING) == 0) 243 return; 244 245 if (pp->pr_log == NULL) { 246 if (kmem_map != NULL) 247 pp->pr_log = malloc( 248 pool_logsize * sizeof(struct pool_log), 249 M_TEMP, M_NOWAIT | M_ZERO); 250 if (pp->pr_log == NULL) 251 return; 252 pp->pr_curlogentry = 0; 253 pp->pr_logsize = pool_logsize; 254 } 255 256 /* 257 * Fill in the current entry. Wrap around and overwrite 258 * the oldest entry if necessary. 259 */ 260 n = pp->pr_curlogentry; 261 pl = &pp->pr_log[n]; 262 pl->pl_file = file; 263 pl->pl_line = line; 264 pl->pl_action = action; 265 pl->pl_addr = v; 266 if (++n >= pp->pr_logsize) 267 n = 0; 268 pp->pr_curlogentry = n; 269 } 270 271 static void 272 pr_printlog(struct pool *pp, struct pool_item *pi, 273 void (*pr)(const char *, ...)) 274 { 275 int i = pp->pr_logsize; 276 int n = pp->pr_curlogentry; 277 278 if (pp->pr_log == NULL) 279 return; 280 281 /* 282 * Print all entries in this pool's log. 283 */ 284 while (i-- > 0) { 285 struct pool_log *pl = &pp->pr_log[n]; 286 if (pl->pl_action != 0) { 287 if (pi == NULL || pi == pl->pl_addr) { 288 (*pr)("\tlog entry %d:\n", i); 289 (*pr)("\t\taction = %s, addr = %p\n", 290 pl->pl_action == PRLOG_GET ? "get" : "put", 291 pl->pl_addr); 292 (*pr)("\t\tfile: %s at line %lu\n", 293 pl->pl_file, pl->pl_line); 294 } 295 } 296 if (++n >= pp->pr_logsize) 297 n = 0; 298 } 299 } 300 301 static inline void 302 pr_enter(struct pool *pp, const char *file, long line) 303 { 304 305 if (__predict_false(pp->pr_entered_file != NULL)) { 306 printf("pool %s: reentrancy at file %s line %ld\n", 307 pp->pr_wchan, file, line); 308 printf(" previous entry at file %s line %ld\n", 309 pp->pr_entered_file, pp->pr_entered_line); 310 panic("pr_enter"); 311 } 312 313 pp->pr_entered_file = file; 314 pp->pr_entered_line = line; 315 } 316 317 static inline void 318 pr_leave(struct pool *pp) 319 { 320 321 if (__predict_false(pp->pr_entered_file == NULL)) { 322 printf("pool %s not entered?\n", pp->pr_wchan); 323 panic("pr_leave"); 324 } 325 326 pp->pr_entered_file = NULL; 327 pp->pr_entered_line = 0; 328 } 329 330 static inline void 331 pr_enter_check(struct pool *pp, void (*pr)(const char *, ...)) 332 { 333 334 if (pp->pr_entered_file != NULL) 335 (*pr)("\n\tcurrently entered from file %s line %ld\n", 336 pp->pr_entered_file, pp->pr_entered_line); 337 } 338 #else 339 #define pr_log(pp, v, action, file, line) 340 #define pr_printlog(pp, pi, pr) 341 #define pr_enter(pp, file, line) 342 #define pr_leave(pp) 343 #define pr_enter_check(pp, pr) 344 #endif /* POOL_DIAGNOSTIC */ 345 346 static inline unsigned int 347 pr_item_notouch_index(const struct pool *pp, const struct pool_item_header *ph, 348 const void *v) 349 { 350 const char *cp = v; 351 unsigned int idx; 352 353 KASSERT(pp->pr_roflags & PR_NOTOUCH); 354 idx = (cp - (char *)ph->ph_page - ph->ph_off) / pp->pr_size; 355 KASSERT(idx < pp->pr_itemsperpage); 356 return idx; 357 } 358 359 static inline void 360 pr_item_notouch_put(const struct pool *pp, struct pool_item_header *ph, 361 void *obj) 362 { 363 unsigned int idx = pr_item_notouch_index(pp, ph, obj); 364 pool_item_bitmap_t *bitmap = ph->ph_bitmap + (idx / BITMAP_SIZE); 365 pool_item_bitmap_t mask = 1 << (idx & BITMAP_MASK); 366 367 KASSERT((*bitmap & mask) == 0); 368 *bitmap |= mask; 369 } 370 371 static inline void * 372 pr_item_notouch_get(const struct pool *pp, struct pool_item_header *ph) 373 { 374 pool_item_bitmap_t *bitmap = ph->ph_bitmap; 375 unsigned int idx; 376 int i; 377 378 for (i = 0; ; i++) { 379 int bit; 380 381 KASSERT((i * BITMAP_SIZE) < pp->pr_itemsperpage); 382 bit = ffs32(bitmap[i]); 383 if (bit) { 384 pool_item_bitmap_t mask; 385 386 bit--; 387 idx = (i * BITMAP_SIZE) + bit; 388 mask = 1 << bit; 389 KASSERT((bitmap[i] & mask) != 0); 390 bitmap[i] &= ~mask; 391 break; 392 } 393 } 394 KASSERT(idx < pp->pr_itemsperpage); 395 return (char *)ph->ph_page + ph->ph_off + idx * pp->pr_size; 396 } 397 398 static inline void 399 pr_item_notouch_init(const struct pool *pp, struct pool_item_header *ph) 400 { 401 pool_item_bitmap_t *bitmap = ph->ph_bitmap; 402 const int n = howmany(pp->pr_itemsperpage, BITMAP_SIZE); 403 int i; 404 405 for (i = 0; i < n; i++) { 406 bitmap[i] = (pool_item_bitmap_t)-1; 407 } 408 } 409 410 static inline int 411 phtree_compare(struct pool_item_header *a, struct pool_item_header *b) 412 { 413 414 /* 415 * we consider pool_item_header with smaller ph_page bigger. 416 * (this unnatural ordering is for the benefit of pr_find_pagehead.) 417 */ 418 419 if (a->ph_page < b->ph_page) 420 return (1); 421 else if (a->ph_page > b->ph_page) 422 return (-1); 423 else 424 return (0); 425 } 426 427 SPLAY_PROTOTYPE(phtree, pool_item_header, ph_node, phtree_compare); 428 SPLAY_GENERATE(phtree, pool_item_header, ph_node, phtree_compare); 429 430 static inline struct pool_item_header * 431 pr_find_pagehead_noalign(struct pool *pp, void *v) 432 { 433 struct pool_item_header *ph, tmp; 434 435 tmp.ph_page = (void *)(uintptr_t)v; 436 ph = SPLAY_FIND(phtree, &pp->pr_phtree, &tmp); 437 if (ph == NULL) { 438 ph = SPLAY_ROOT(&pp->pr_phtree); 439 if (ph != NULL && phtree_compare(&tmp, ph) >= 0) { 440 ph = SPLAY_NEXT(phtree, &pp->pr_phtree, ph); 441 } 442 KASSERT(ph == NULL || phtree_compare(&tmp, ph) < 0); 443 } 444 445 return ph; 446 } 447 448 /* 449 * Return the pool page header based on item address. 450 */ 451 static inline struct pool_item_header * 452 pr_find_pagehead(struct pool *pp, void *v) 453 { 454 struct pool_item_header *ph, tmp; 455 456 if ((pp->pr_roflags & PR_NOALIGN) != 0) { 457 ph = pr_find_pagehead_noalign(pp, v); 458 } else { 459 void *page = 460 (void *)((uintptr_t)v & pp->pr_alloc->pa_pagemask); 461 462 if ((pp->pr_roflags & PR_PHINPAGE) != 0) { 463 ph = (struct pool_item_header *)((char *)page + pp->pr_phoffset); 464 } else { 465 tmp.ph_page = page; 466 ph = SPLAY_FIND(phtree, &pp->pr_phtree, &tmp); 467 } 468 } 469 470 KASSERT(ph == NULL || ((pp->pr_roflags & PR_PHINPAGE) != 0) || 471 ((char *)ph->ph_page <= (char *)v && 472 (char *)v < (char *)ph->ph_page + pp->pr_alloc->pa_pagesz)); 473 return ph; 474 } 475 476 static void 477 pr_pagelist_free(struct pool *pp, struct pool_pagelist *pq) 478 { 479 struct pool_item_header *ph; 480 481 while ((ph = LIST_FIRST(pq)) != NULL) { 482 LIST_REMOVE(ph, ph_pagelist); 483 pool_allocator_free(pp, ph->ph_page); 484 if ((pp->pr_roflags & PR_PHINPAGE) == 0) 485 pool_put(pp->pr_phpool, ph); 486 } 487 } 488 489 /* 490 * Remove a page from the pool. 491 */ 492 static inline void 493 pr_rmpage(struct pool *pp, struct pool_item_header *ph, 494 struct pool_pagelist *pq) 495 { 496 497 KASSERT(mutex_owned(&pp->pr_lock)); 498 499 /* 500 * If the page was idle, decrement the idle page count. 501 */ 502 if (ph->ph_nmissing == 0) { 503 #ifdef DIAGNOSTIC 504 if (pp->pr_nidle == 0) 505 panic("pr_rmpage: nidle inconsistent"); 506 if (pp->pr_nitems < pp->pr_itemsperpage) 507 panic("pr_rmpage: nitems inconsistent"); 508 #endif 509 pp->pr_nidle--; 510 } 511 512 pp->pr_nitems -= pp->pr_itemsperpage; 513 514 /* 515 * Unlink the page from the pool and queue it for release. 516 */ 517 LIST_REMOVE(ph, ph_pagelist); 518 if ((pp->pr_roflags & PR_PHINPAGE) == 0) 519 SPLAY_REMOVE(phtree, &pp->pr_phtree, ph); 520 LIST_INSERT_HEAD(pq, ph, ph_pagelist); 521 522 pp->pr_npages--; 523 pp->pr_npagefree++; 524 525 pool_update_curpage(pp); 526 } 527 528 /* 529 * Initialize all the pools listed in the "pools" link set. 530 */ 531 void 532 pool_subsystem_init(void) 533 { 534 size_t size; 535 int idx; 536 537 mutex_init(&pool_head_lock, MUTEX_DEFAULT, IPL_NONE); 538 mutex_init(&pool_allocator_lock, MUTEX_DEFAULT, IPL_NONE); 539 cv_init(&pool_busy, "poolbusy"); 540 541 /* 542 * Initialize private page header pool and cache magazine pool if we 543 * haven't done so yet. 544 */ 545 for (idx = 0; idx < PHPOOL_MAX; idx++) { 546 static char phpool_names[PHPOOL_MAX][6+1+6+1]; 547 int nelem; 548 size_t sz; 549 550 nelem = PHPOOL_FREELIST_NELEM(idx); 551 snprintf(phpool_names[idx], sizeof(phpool_names[idx]), 552 "phpool-%d", nelem); 553 sz = sizeof(struct pool_item_header); 554 if (nelem) { 555 sz = offsetof(struct pool_item_header, 556 ph_bitmap[howmany(nelem, BITMAP_SIZE)]); 557 } 558 pool_init(&phpool[idx], sz, 0, 0, 0, 559 phpool_names[idx], &pool_allocator_meta, IPL_VM); 560 } 561 #ifdef POOL_SUBPAGE 562 pool_init(&psppool, POOL_SUBPAGE, POOL_SUBPAGE, 0, 563 PR_RECURSIVE, "psppool", &pool_allocator_meta, IPL_VM); 564 #endif 565 566 size = sizeof(pcg_t) + 567 (PCG_NOBJECTS_NORMAL - 1) * sizeof(pcgpair_t); 568 pool_init(&pcg_normal_pool, size, coherency_unit, 0, 0, 569 "pcgnormal", &pool_allocator_meta, IPL_VM); 570 571 size = sizeof(pcg_t) + 572 (PCG_NOBJECTS_LARGE - 1) * sizeof(pcgpair_t); 573 pool_init(&pcg_large_pool, size, coherency_unit, 0, 0, 574 "pcglarge", &pool_allocator_meta, IPL_VM); 575 576 pool_init(&cache_pool, sizeof(struct pool_cache), coherency_unit, 577 0, 0, "pcache", &pool_allocator_meta, IPL_NONE); 578 579 pool_init(&cache_cpu_pool, sizeof(pool_cache_cpu_t), coherency_unit, 580 0, 0, "pcachecpu", &pool_allocator_meta, IPL_NONE); 581 } 582 583 /* 584 * Initialize the given pool resource structure. 585 * 586 * We export this routine to allow other kernel parts to declare 587 * static pools that must be initialized before malloc() is available. 588 */ 589 void 590 pool_init(struct pool *pp, size_t size, u_int align, u_int ioff, int flags, 591 const char *wchan, struct pool_allocator *palloc, int ipl) 592 { 593 struct pool *pp1; 594 size_t trysize, phsize; 595 int off, slack; 596 597 #ifdef DEBUG 598 /* 599 * Check that the pool hasn't already been initialised and 600 * added to the list of all pools. 601 */ 602 TAILQ_FOREACH(pp1, &pool_head, pr_poollist) { 603 if (pp == pp1) 604 panic("pool_init: pool %s already initialised", 605 wchan); 606 } 607 #endif 608 609 #ifdef POOL_DIAGNOSTIC 610 /* 611 * Always log if POOL_DIAGNOSTIC is defined. 612 */ 613 if (pool_logsize != 0) 614 flags |= PR_LOGGING; 615 #endif 616 617 if (palloc == NULL) 618 palloc = &pool_allocator_kmem; 619 #ifdef POOL_SUBPAGE 620 if (size > palloc->pa_pagesz) { 621 if (palloc == &pool_allocator_kmem) 622 palloc = &pool_allocator_kmem_fullpage; 623 else if (palloc == &pool_allocator_nointr) 624 palloc = &pool_allocator_nointr_fullpage; 625 } 626 #endif /* POOL_SUBPAGE */ 627 if (!cold) 628 mutex_enter(&pool_allocator_lock); 629 if (palloc->pa_refcnt++ == 0) { 630 if (palloc->pa_pagesz == 0) 631 palloc->pa_pagesz = PAGE_SIZE; 632 633 TAILQ_INIT(&palloc->pa_list); 634 635 mutex_init(&palloc->pa_lock, MUTEX_DEFAULT, IPL_VM); 636 palloc->pa_pagemask = ~(palloc->pa_pagesz - 1); 637 palloc->pa_pageshift = ffs(palloc->pa_pagesz) - 1; 638 } 639 if (!cold) 640 mutex_exit(&pool_allocator_lock); 641 642 if (align == 0) 643 align = ALIGN(1); 644 645 if ((flags & PR_NOTOUCH) == 0 && size < sizeof(struct pool_item)) 646 size = sizeof(struct pool_item); 647 648 size = roundup(size, align); 649 #ifdef DIAGNOSTIC 650 if (size > palloc->pa_pagesz) 651 panic("pool_init: pool item size (%zu) too large", size); 652 #endif 653 654 /* 655 * Initialize the pool structure. 656 */ 657 LIST_INIT(&pp->pr_emptypages); 658 LIST_INIT(&pp->pr_fullpages); 659 LIST_INIT(&pp->pr_partpages); 660 pp->pr_cache = NULL; 661 pp->pr_curpage = NULL; 662 pp->pr_npages = 0; 663 pp->pr_minitems = 0; 664 pp->pr_minpages = 0; 665 pp->pr_maxpages = UINT_MAX; 666 pp->pr_roflags = flags; 667 pp->pr_flags = 0; 668 pp->pr_size = size; 669 pp->pr_align = align; 670 pp->pr_wchan = wchan; 671 pp->pr_alloc = palloc; 672 pp->pr_nitems = 0; 673 pp->pr_nout = 0; 674 pp->pr_hardlimit = UINT_MAX; 675 pp->pr_hardlimit_warning = NULL; 676 pp->pr_hardlimit_ratecap.tv_sec = 0; 677 pp->pr_hardlimit_ratecap.tv_usec = 0; 678 pp->pr_hardlimit_warning_last.tv_sec = 0; 679 pp->pr_hardlimit_warning_last.tv_usec = 0; 680 pp->pr_drain_hook = NULL; 681 pp->pr_drain_hook_arg = NULL; 682 pp->pr_freecheck = NULL; 683 684 /* 685 * Decide whether to put the page header off page to avoid 686 * wasting too large a part of the page or too big item. 687 * Off-page page headers go on a hash table, so we can match 688 * a returned item with its header based on the page address. 689 * We use 1/16 of the page size and about 8 times of the item 690 * size as the threshold (XXX: tune) 691 * 692 * However, we'll put the header into the page if we can put 693 * it without wasting any items. 694 * 695 * Silently enforce `0 <= ioff < align'. 696 */ 697 pp->pr_itemoffset = ioff %= align; 698 /* See the comment below about reserved bytes. */ 699 trysize = palloc->pa_pagesz - ((align - ioff) % align); 700 phsize = ALIGN(sizeof(struct pool_item_header)); 701 if ((pp->pr_roflags & (PR_NOTOUCH | PR_NOALIGN)) == 0 && 702 (pp->pr_size < MIN(palloc->pa_pagesz / 16, phsize << 3) || 703 trysize / pp->pr_size == (trysize - phsize) / pp->pr_size)) { 704 /* Use the end of the page for the page header */ 705 pp->pr_roflags |= PR_PHINPAGE; 706 pp->pr_phoffset = off = palloc->pa_pagesz - phsize; 707 } else { 708 /* The page header will be taken from our page header pool */ 709 pp->pr_phoffset = 0; 710 off = palloc->pa_pagesz; 711 SPLAY_INIT(&pp->pr_phtree); 712 } 713 714 /* 715 * Alignment is to take place at `ioff' within the item. This means 716 * we must reserve up to `align - 1' bytes on the page to allow 717 * appropriate positioning of each item. 718 */ 719 pp->pr_itemsperpage = (off - ((align - ioff) % align)) / pp->pr_size; 720 KASSERT(pp->pr_itemsperpage != 0); 721 if ((pp->pr_roflags & PR_NOTOUCH)) { 722 int idx; 723 724 for (idx = 0; pp->pr_itemsperpage > PHPOOL_FREELIST_NELEM(idx); 725 idx++) { 726 /* nothing */ 727 } 728 if (idx >= PHPOOL_MAX) { 729 /* 730 * if you see this panic, consider to tweak 731 * PHPOOL_MAX and PHPOOL_FREELIST_NELEM. 732 */ 733 panic("%s: too large itemsperpage(%d) for PR_NOTOUCH", 734 pp->pr_wchan, pp->pr_itemsperpage); 735 } 736 pp->pr_phpool = &phpool[idx]; 737 } else if ((pp->pr_roflags & PR_PHINPAGE) == 0) { 738 pp->pr_phpool = &phpool[0]; 739 } 740 #if defined(DIAGNOSTIC) 741 else { 742 pp->pr_phpool = NULL; 743 } 744 #endif 745 746 /* 747 * Use the slack between the chunks and the page header 748 * for "cache coloring". 749 */ 750 slack = off - pp->pr_itemsperpage * pp->pr_size; 751 pp->pr_maxcolor = (slack / align) * align; 752 pp->pr_curcolor = 0; 753 754 pp->pr_nget = 0; 755 pp->pr_nfail = 0; 756 pp->pr_nput = 0; 757 pp->pr_npagealloc = 0; 758 pp->pr_npagefree = 0; 759 pp->pr_hiwat = 0; 760 pp->pr_nidle = 0; 761 pp->pr_refcnt = 0; 762 763 pp->pr_log = NULL; 764 765 pp->pr_entered_file = NULL; 766 pp->pr_entered_line = 0; 767 768 mutex_init(&pp->pr_lock, MUTEX_DEFAULT, ipl); 769 cv_init(&pp->pr_cv, wchan); 770 pp->pr_ipl = ipl; 771 772 /* Insert into the list of all pools. */ 773 if (!cold) 774 mutex_enter(&pool_head_lock); 775 TAILQ_FOREACH(pp1, &pool_head, pr_poollist) { 776 if (strcmp(pp1->pr_wchan, pp->pr_wchan) > 0) 777 break; 778 } 779 if (pp1 == NULL) 780 TAILQ_INSERT_TAIL(&pool_head, pp, pr_poollist); 781 else 782 TAILQ_INSERT_BEFORE(pp1, pp, pr_poollist); 783 if (!cold) 784 mutex_exit(&pool_head_lock); 785 786 /* Insert this into the list of pools using this allocator. */ 787 if (!cold) 788 mutex_enter(&palloc->pa_lock); 789 TAILQ_INSERT_TAIL(&palloc->pa_list, pp, pr_alloc_list); 790 if (!cold) 791 mutex_exit(&palloc->pa_lock); 792 } 793 794 /* 795 * De-commision a pool resource. 796 */ 797 void 798 pool_destroy(struct pool *pp) 799 { 800 struct pool_pagelist pq; 801 struct pool_item_header *ph; 802 803 /* Remove from global pool list */ 804 mutex_enter(&pool_head_lock); 805 while (pp->pr_refcnt != 0) 806 cv_wait(&pool_busy, &pool_head_lock); 807 TAILQ_REMOVE(&pool_head, pp, pr_poollist); 808 if (drainpp == pp) 809 drainpp = NULL; 810 mutex_exit(&pool_head_lock); 811 812 /* Remove this pool from its allocator's list of pools. */ 813 mutex_enter(&pp->pr_alloc->pa_lock); 814 TAILQ_REMOVE(&pp->pr_alloc->pa_list, pp, pr_alloc_list); 815 mutex_exit(&pp->pr_alloc->pa_lock); 816 817 mutex_enter(&pool_allocator_lock); 818 if (--pp->pr_alloc->pa_refcnt == 0) 819 mutex_destroy(&pp->pr_alloc->pa_lock); 820 mutex_exit(&pool_allocator_lock); 821 822 mutex_enter(&pp->pr_lock); 823 824 KASSERT(pp->pr_cache == NULL); 825 826 #ifdef DIAGNOSTIC 827 if (pp->pr_nout != 0) { 828 pr_printlog(pp, NULL, printf); 829 panic("pool_destroy: pool busy: still out: %u", 830 pp->pr_nout); 831 } 832 #endif 833 834 KASSERT(LIST_EMPTY(&pp->pr_fullpages)); 835 KASSERT(LIST_EMPTY(&pp->pr_partpages)); 836 837 /* Remove all pages */ 838 LIST_INIT(&pq); 839 while ((ph = LIST_FIRST(&pp->pr_emptypages)) != NULL) 840 pr_rmpage(pp, ph, &pq); 841 842 mutex_exit(&pp->pr_lock); 843 844 pr_pagelist_free(pp, &pq); 845 846 #ifdef POOL_DIAGNOSTIC 847 if (pp->pr_log != NULL) { 848 free(pp->pr_log, M_TEMP); 849 pp->pr_log = NULL; 850 } 851 #endif 852 853 cv_destroy(&pp->pr_cv); 854 mutex_destroy(&pp->pr_lock); 855 } 856 857 void 858 pool_set_drain_hook(struct pool *pp, void (*fn)(void *, int), void *arg) 859 { 860 861 /* XXX no locking -- must be used just after pool_init() */ 862 #ifdef DIAGNOSTIC 863 if (pp->pr_drain_hook != NULL) 864 panic("pool_set_drain_hook(%s): already set", pp->pr_wchan); 865 #endif 866 pp->pr_drain_hook = fn; 867 pp->pr_drain_hook_arg = arg; 868 } 869 870 static struct pool_item_header * 871 pool_alloc_item_header(struct pool *pp, void *storage, int flags) 872 { 873 struct pool_item_header *ph; 874 875 if ((pp->pr_roflags & PR_PHINPAGE) != 0) 876 ph = (struct pool_item_header *) ((char *)storage + pp->pr_phoffset); 877 else 878 ph = pool_get(pp->pr_phpool, flags); 879 880 return (ph); 881 } 882 883 /* 884 * Grab an item from the pool. 885 */ 886 void * 887 #ifdef POOL_DIAGNOSTIC 888 _pool_get(struct pool *pp, int flags, const char *file, long line) 889 #else 890 pool_get(struct pool *pp, int flags) 891 #endif 892 { 893 struct pool_item *pi; 894 struct pool_item_header *ph; 895 void *v; 896 897 #ifdef DIAGNOSTIC 898 if (pp->pr_itemsperpage == 0) 899 panic("pool_get: pool '%s': pr_itemsperpage is zero, " 900 "pool not initialized?", pp->pr_wchan); 901 if ((cpu_intr_p() || cpu_softintr_p()) && pp->pr_ipl == IPL_NONE && 902 !cold && panicstr == NULL) 903 panic("pool '%s' is IPL_NONE, but called from " 904 "interrupt context\n", pp->pr_wchan); 905 #endif 906 if (flags & PR_WAITOK) { 907 ASSERT_SLEEPABLE(); 908 } 909 910 mutex_enter(&pp->pr_lock); 911 pr_enter(pp, file, line); 912 913 startover: 914 /* 915 * Check to see if we've reached the hard limit. If we have, 916 * and we can wait, then wait until an item has been returned to 917 * the pool. 918 */ 919 #ifdef DIAGNOSTIC 920 if (__predict_false(pp->pr_nout > pp->pr_hardlimit)) { 921 pr_leave(pp); 922 mutex_exit(&pp->pr_lock); 923 panic("pool_get: %s: crossed hard limit", pp->pr_wchan); 924 } 925 #endif 926 if (__predict_false(pp->pr_nout == pp->pr_hardlimit)) { 927 if (pp->pr_drain_hook != NULL) { 928 /* 929 * Since the drain hook is going to free things 930 * back to the pool, unlock, call the hook, re-lock, 931 * and check the hardlimit condition again. 932 */ 933 pr_leave(pp); 934 mutex_exit(&pp->pr_lock); 935 (*pp->pr_drain_hook)(pp->pr_drain_hook_arg, flags); 936 mutex_enter(&pp->pr_lock); 937 pr_enter(pp, file, line); 938 if (pp->pr_nout < pp->pr_hardlimit) 939 goto startover; 940 } 941 942 if ((flags & PR_WAITOK) && !(flags & PR_LIMITFAIL)) { 943 /* 944 * XXX: A warning isn't logged in this case. Should 945 * it be? 946 */ 947 pp->pr_flags |= PR_WANTED; 948 pr_leave(pp); 949 cv_wait(&pp->pr_cv, &pp->pr_lock); 950 pr_enter(pp, file, line); 951 goto startover; 952 } 953 954 /* 955 * Log a message that the hard limit has been hit. 956 */ 957 if (pp->pr_hardlimit_warning != NULL && 958 ratecheck(&pp->pr_hardlimit_warning_last, 959 &pp->pr_hardlimit_ratecap)) 960 log(LOG_ERR, "%s\n", pp->pr_hardlimit_warning); 961 962 pp->pr_nfail++; 963 964 pr_leave(pp); 965 mutex_exit(&pp->pr_lock); 966 return (NULL); 967 } 968 969 /* 970 * The convention we use is that if `curpage' is not NULL, then 971 * it points at a non-empty bucket. In particular, `curpage' 972 * never points at a page header which has PR_PHINPAGE set and 973 * has no items in its bucket. 974 */ 975 if ((ph = pp->pr_curpage) == NULL) { 976 int error; 977 978 #ifdef DIAGNOSTIC 979 if (pp->pr_nitems != 0) { 980 mutex_exit(&pp->pr_lock); 981 printf("pool_get: %s: curpage NULL, nitems %u\n", 982 pp->pr_wchan, pp->pr_nitems); 983 panic("pool_get: nitems inconsistent"); 984 } 985 #endif 986 987 /* 988 * Call the back-end page allocator for more memory. 989 * Release the pool lock, as the back-end page allocator 990 * may block. 991 */ 992 pr_leave(pp); 993 error = pool_grow(pp, flags); 994 pr_enter(pp, file, line); 995 if (error != 0) { 996 /* 997 * We were unable to allocate a page or item 998 * header, but we released the lock during 999 * allocation, so perhaps items were freed 1000 * back to the pool. Check for this case. 1001 */ 1002 if (pp->pr_curpage != NULL) 1003 goto startover; 1004 1005 pp->pr_nfail++; 1006 pr_leave(pp); 1007 mutex_exit(&pp->pr_lock); 1008 return (NULL); 1009 } 1010 1011 /* Start the allocation process over. */ 1012 goto startover; 1013 } 1014 if (pp->pr_roflags & PR_NOTOUCH) { 1015 #ifdef DIAGNOSTIC 1016 if (__predict_false(ph->ph_nmissing == pp->pr_itemsperpage)) { 1017 pr_leave(pp); 1018 mutex_exit(&pp->pr_lock); 1019 panic("pool_get: %s: page empty", pp->pr_wchan); 1020 } 1021 #endif 1022 v = pr_item_notouch_get(pp, ph); 1023 #ifdef POOL_DIAGNOSTIC 1024 pr_log(pp, v, PRLOG_GET, file, line); 1025 #endif 1026 } else { 1027 v = pi = LIST_FIRST(&ph->ph_itemlist); 1028 if (__predict_false(v == NULL)) { 1029 pr_leave(pp); 1030 mutex_exit(&pp->pr_lock); 1031 panic("pool_get: %s: page empty", pp->pr_wchan); 1032 } 1033 #ifdef DIAGNOSTIC 1034 if (__predict_false(pp->pr_nitems == 0)) { 1035 pr_leave(pp); 1036 mutex_exit(&pp->pr_lock); 1037 printf("pool_get: %s: items on itemlist, nitems %u\n", 1038 pp->pr_wchan, pp->pr_nitems); 1039 panic("pool_get: nitems inconsistent"); 1040 } 1041 #endif 1042 1043 #ifdef POOL_DIAGNOSTIC 1044 pr_log(pp, v, PRLOG_GET, file, line); 1045 #endif 1046 1047 #ifdef DIAGNOSTIC 1048 if (__predict_false(pi->pi_magic != PI_MAGIC)) { 1049 pr_printlog(pp, pi, printf); 1050 panic("pool_get(%s): free list modified: " 1051 "magic=%x; page %p; item addr %p\n", 1052 pp->pr_wchan, pi->pi_magic, ph->ph_page, pi); 1053 } 1054 #endif 1055 1056 /* 1057 * Remove from item list. 1058 */ 1059 LIST_REMOVE(pi, pi_list); 1060 } 1061 pp->pr_nitems--; 1062 pp->pr_nout++; 1063 if (ph->ph_nmissing == 0) { 1064 #ifdef DIAGNOSTIC 1065 if (__predict_false(pp->pr_nidle == 0)) 1066 panic("pool_get: nidle inconsistent"); 1067 #endif 1068 pp->pr_nidle--; 1069 1070 /* 1071 * This page was previously empty. Move it to the list of 1072 * partially-full pages. This page is already curpage. 1073 */ 1074 LIST_REMOVE(ph, ph_pagelist); 1075 LIST_INSERT_HEAD(&pp->pr_partpages, ph, ph_pagelist); 1076 } 1077 ph->ph_nmissing++; 1078 if (ph->ph_nmissing == pp->pr_itemsperpage) { 1079 #ifdef DIAGNOSTIC 1080 if (__predict_false((pp->pr_roflags & PR_NOTOUCH) == 0 && 1081 !LIST_EMPTY(&ph->ph_itemlist))) { 1082 pr_leave(pp); 1083 mutex_exit(&pp->pr_lock); 1084 panic("pool_get: %s: nmissing inconsistent", 1085 pp->pr_wchan); 1086 } 1087 #endif 1088 /* 1089 * This page is now full. Move it to the full list 1090 * and select a new current page. 1091 */ 1092 LIST_REMOVE(ph, ph_pagelist); 1093 LIST_INSERT_HEAD(&pp->pr_fullpages, ph, ph_pagelist); 1094 pool_update_curpage(pp); 1095 } 1096 1097 pp->pr_nget++; 1098 pr_leave(pp); 1099 1100 /* 1101 * If we have a low water mark and we are now below that low 1102 * water mark, add more items to the pool. 1103 */ 1104 if (POOL_NEEDS_CATCHUP(pp) && pool_catchup(pp) != 0) { 1105 /* 1106 * XXX: Should we log a warning? Should we set up a timeout 1107 * to try again in a second or so? The latter could break 1108 * a caller's assumptions about interrupt protection, etc. 1109 */ 1110 } 1111 1112 mutex_exit(&pp->pr_lock); 1113 KASSERT((((vaddr_t)v + pp->pr_itemoffset) & (pp->pr_align - 1)) == 0); 1114 FREECHECK_OUT(&pp->pr_freecheck, v); 1115 return (v); 1116 } 1117 1118 /* 1119 * Internal version of pool_put(). Pool is already locked/entered. 1120 */ 1121 static void 1122 pool_do_put(struct pool *pp, void *v, struct pool_pagelist *pq) 1123 { 1124 struct pool_item *pi = v; 1125 struct pool_item_header *ph; 1126 1127 KASSERT(mutex_owned(&pp->pr_lock)); 1128 FREECHECK_IN(&pp->pr_freecheck, v); 1129 LOCKDEBUG_MEM_CHECK(v, pp->pr_size); 1130 1131 #ifdef DIAGNOSTIC 1132 if (__predict_false(pp->pr_nout == 0)) { 1133 printf("pool %s: putting with none out\n", 1134 pp->pr_wchan); 1135 panic("pool_put"); 1136 } 1137 #endif 1138 1139 if (__predict_false((ph = pr_find_pagehead(pp, v)) == NULL)) { 1140 pr_printlog(pp, NULL, printf); 1141 panic("pool_put: %s: page header missing", pp->pr_wchan); 1142 } 1143 1144 /* 1145 * Return to item list. 1146 */ 1147 if (pp->pr_roflags & PR_NOTOUCH) { 1148 pr_item_notouch_put(pp, ph, v); 1149 } else { 1150 #ifdef DIAGNOSTIC 1151 pi->pi_magic = PI_MAGIC; 1152 #endif 1153 #ifdef DEBUG 1154 { 1155 int i, *ip = v; 1156 1157 for (i = 0; i < pp->pr_size / sizeof(int); i++) { 1158 *ip++ = PI_MAGIC; 1159 } 1160 } 1161 #endif 1162 1163 LIST_INSERT_HEAD(&ph->ph_itemlist, pi, pi_list); 1164 } 1165 KDASSERT(ph->ph_nmissing != 0); 1166 ph->ph_nmissing--; 1167 pp->pr_nput++; 1168 pp->pr_nitems++; 1169 pp->pr_nout--; 1170 1171 /* Cancel "pool empty" condition if it exists */ 1172 if (pp->pr_curpage == NULL) 1173 pp->pr_curpage = ph; 1174 1175 if (pp->pr_flags & PR_WANTED) { 1176 pp->pr_flags &= ~PR_WANTED; 1177 cv_broadcast(&pp->pr_cv); 1178 } 1179 1180 /* 1181 * If this page is now empty, do one of two things: 1182 * 1183 * (1) If we have more pages than the page high water mark, 1184 * free the page back to the system. ONLY CONSIDER 1185 * FREEING BACK A PAGE IF WE HAVE MORE THAN OUR MINIMUM PAGE 1186 * CLAIM. 1187 * 1188 * (2) Otherwise, move the page to the empty page list. 1189 * 1190 * Either way, select a new current page (so we use a partially-full 1191 * page if one is available). 1192 */ 1193 if (ph->ph_nmissing == 0) { 1194 pp->pr_nidle++; 1195 if (pp->pr_npages > pp->pr_minpages && 1196 pp->pr_npages > pp->pr_maxpages) { 1197 pr_rmpage(pp, ph, pq); 1198 } else { 1199 LIST_REMOVE(ph, ph_pagelist); 1200 LIST_INSERT_HEAD(&pp->pr_emptypages, ph, ph_pagelist); 1201 1202 /* 1203 * Update the timestamp on the page. A page must 1204 * be idle for some period of time before it can 1205 * be reclaimed by the pagedaemon. This minimizes 1206 * ping-pong'ing for memory. 1207 * 1208 * note for 64-bit time_t: truncating to 32-bit is not 1209 * a problem for our usage. 1210 */ 1211 ph->ph_time = time_uptime; 1212 } 1213 pool_update_curpage(pp); 1214 } 1215 1216 /* 1217 * If the page was previously completely full, move it to the 1218 * partially-full list and make it the current page. The next 1219 * allocation will get the item from this page, instead of 1220 * further fragmenting the pool. 1221 */ 1222 else if (ph->ph_nmissing == (pp->pr_itemsperpage - 1)) { 1223 LIST_REMOVE(ph, ph_pagelist); 1224 LIST_INSERT_HEAD(&pp->pr_partpages, ph, ph_pagelist); 1225 pp->pr_curpage = ph; 1226 } 1227 } 1228 1229 /* 1230 * Return resource to the pool. 1231 */ 1232 #ifdef POOL_DIAGNOSTIC 1233 void 1234 _pool_put(struct pool *pp, void *v, const char *file, long line) 1235 { 1236 struct pool_pagelist pq; 1237 1238 LIST_INIT(&pq); 1239 1240 mutex_enter(&pp->pr_lock); 1241 pr_enter(pp, file, line); 1242 1243 pr_log(pp, v, PRLOG_PUT, file, line); 1244 1245 pool_do_put(pp, v, &pq); 1246 1247 pr_leave(pp); 1248 mutex_exit(&pp->pr_lock); 1249 1250 pr_pagelist_free(pp, &pq); 1251 } 1252 #undef pool_put 1253 #endif /* POOL_DIAGNOSTIC */ 1254 1255 void 1256 pool_put(struct pool *pp, void *v) 1257 { 1258 struct pool_pagelist pq; 1259 1260 LIST_INIT(&pq); 1261 1262 mutex_enter(&pp->pr_lock); 1263 pool_do_put(pp, v, &pq); 1264 mutex_exit(&pp->pr_lock); 1265 1266 pr_pagelist_free(pp, &pq); 1267 } 1268 1269 #ifdef POOL_DIAGNOSTIC 1270 #define pool_put(h, v) _pool_put((h), (v), __FILE__, __LINE__) 1271 #endif 1272 1273 /* 1274 * pool_grow: grow a pool by a page. 1275 * 1276 * => called with pool locked. 1277 * => unlock and relock the pool. 1278 * => return with pool locked. 1279 */ 1280 1281 static int 1282 pool_grow(struct pool *pp, int flags) 1283 { 1284 struct pool_item_header *ph = NULL; 1285 char *cp; 1286 1287 mutex_exit(&pp->pr_lock); 1288 cp = pool_allocator_alloc(pp, flags); 1289 if (__predict_true(cp != NULL)) { 1290 ph = pool_alloc_item_header(pp, cp, flags); 1291 } 1292 if (__predict_false(cp == NULL || ph == NULL)) { 1293 if (cp != NULL) { 1294 pool_allocator_free(pp, cp); 1295 } 1296 mutex_enter(&pp->pr_lock); 1297 return ENOMEM; 1298 } 1299 1300 mutex_enter(&pp->pr_lock); 1301 pool_prime_page(pp, cp, ph); 1302 pp->pr_npagealloc++; 1303 return 0; 1304 } 1305 1306 /* 1307 * Add N items to the pool. 1308 */ 1309 int 1310 pool_prime(struct pool *pp, int n) 1311 { 1312 int newpages; 1313 int error = 0; 1314 1315 mutex_enter(&pp->pr_lock); 1316 1317 newpages = roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 1318 1319 while (newpages-- > 0) { 1320 error = pool_grow(pp, PR_NOWAIT); 1321 if (error) { 1322 break; 1323 } 1324 pp->pr_minpages++; 1325 } 1326 1327 if (pp->pr_minpages >= pp->pr_maxpages) 1328 pp->pr_maxpages = pp->pr_minpages + 1; /* XXX */ 1329 1330 mutex_exit(&pp->pr_lock); 1331 return error; 1332 } 1333 1334 /* 1335 * Add a page worth of items to the pool. 1336 * 1337 * Note, we must be called with the pool descriptor LOCKED. 1338 */ 1339 static void 1340 pool_prime_page(struct pool *pp, void *storage, struct pool_item_header *ph) 1341 { 1342 struct pool_item *pi; 1343 void *cp = storage; 1344 const unsigned int align = pp->pr_align; 1345 const unsigned int ioff = pp->pr_itemoffset; 1346 int n; 1347 1348 KASSERT(mutex_owned(&pp->pr_lock)); 1349 1350 #ifdef DIAGNOSTIC 1351 if ((pp->pr_roflags & PR_NOALIGN) == 0 && 1352 ((uintptr_t)cp & (pp->pr_alloc->pa_pagesz - 1)) != 0) 1353 panic("pool_prime_page: %s: unaligned page", pp->pr_wchan); 1354 #endif 1355 1356 /* 1357 * Insert page header. 1358 */ 1359 LIST_INSERT_HEAD(&pp->pr_emptypages, ph, ph_pagelist); 1360 LIST_INIT(&ph->ph_itemlist); 1361 ph->ph_page = storage; 1362 ph->ph_nmissing = 0; 1363 ph->ph_time = time_uptime; 1364 if ((pp->pr_roflags & PR_PHINPAGE) == 0) 1365 SPLAY_INSERT(phtree, &pp->pr_phtree, ph); 1366 1367 pp->pr_nidle++; 1368 1369 /* 1370 * Color this page. 1371 */ 1372 ph->ph_off = pp->pr_curcolor; 1373 cp = (char *)cp + ph->ph_off; 1374 if ((pp->pr_curcolor += align) > pp->pr_maxcolor) 1375 pp->pr_curcolor = 0; 1376 1377 /* 1378 * Adjust storage to apply aligment to `pr_itemoffset' in each item. 1379 */ 1380 if (ioff != 0) 1381 cp = (char *)cp + align - ioff; 1382 1383 KASSERT((((vaddr_t)cp + ioff) & (align - 1)) == 0); 1384 1385 /* 1386 * Insert remaining chunks on the bucket list. 1387 */ 1388 n = pp->pr_itemsperpage; 1389 pp->pr_nitems += n; 1390 1391 if (pp->pr_roflags & PR_NOTOUCH) { 1392 pr_item_notouch_init(pp, ph); 1393 } else { 1394 while (n--) { 1395 pi = (struct pool_item *)cp; 1396 1397 KASSERT(((((vaddr_t)pi) + ioff) & (align - 1)) == 0); 1398 1399 /* Insert on page list */ 1400 LIST_INSERT_HEAD(&ph->ph_itemlist, pi, pi_list); 1401 #ifdef DIAGNOSTIC 1402 pi->pi_magic = PI_MAGIC; 1403 #endif 1404 cp = (char *)cp + pp->pr_size; 1405 1406 KASSERT((((vaddr_t)cp + ioff) & (align - 1)) == 0); 1407 } 1408 } 1409 1410 /* 1411 * If the pool was depleted, point at the new page. 1412 */ 1413 if (pp->pr_curpage == NULL) 1414 pp->pr_curpage = ph; 1415 1416 if (++pp->pr_npages > pp->pr_hiwat) 1417 pp->pr_hiwat = pp->pr_npages; 1418 } 1419 1420 /* 1421 * Used by pool_get() when nitems drops below the low water mark. This 1422 * is used to catch up pr_nitems with the low water mark. 1423 * 1424 * Note 1, we never wait for memory here, we let the caller decide what to do. 1425 * 1426 * Note 2, we must be called with the pool already locked, and we return 1427 * with it locked. 1428 */ 1429 static int 1430 pool_catchup(struct pool *pp) 1431 { 1432 int error = 0; 1433 1434 while (POOL_NEEDS_CATCHUP(pp)) { 1435 error = pool_grow(pp, PR_NOWAIT); 1436 if (error) { 1437 break; 1438 } 1439 } 1440 return error; 1441 } 1442 1443 static void 1444 pool_update_curpage(struct pool *pp) 1445 { 1446 1447 pp->pr_curpage = LIST_FIRST(&pp->pr_partpages); 1448 if (pp->pr_curpage == NULL) { 1449 pp->pr_curpage = LIST_FIRST(&pp->pr_emptypages); 1450 } 1451 KASSERT((pp->pr_curpage == NULL && pp->pr_nitems == 0) || 1452 (pp->pr_curpage != NULL && pp->pr_nitems > 0)); 1453 } 1454 1455 void 1456 pool_setlowat(struct pool *pp, int n) 1457 { 1458 1459 mutex_enter(&pp->pr_lock); 1460 1461 pp->pr_minitems = n; 1462 pp->pr_minpages = (n == 0) 1463 ? 0 1464 : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 1465 1466 /* Make sure we're caught up with the newly-set low water mark. */ 1467 if (POOL_NEEDS_CATCHUP(pp) && pool_catchup(pp) != 0) { 1468 /* 1469 * XXX: Should we log a warning? Should we set up a timeout 1470 * to try again in a second or so? The latter could break 1471 * a caller's assumptions about interrupt protection, etc. 1472 */ 1473 } 1474 1475 mutex_exit(&pp->pr_lock); 1476 } 1477 1478 void 1479 pool_sethiwat(struct pool *pp, int n) 1480 { 1481 1482 mutex_enter(&pp->pr_lock); 1483 1484 pp->pr_maxpages = (n == 0) 1485 ? 0 1486 : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 1487 1488 mutex_exit(&pp->pr_lock); 1489 } 1490 1491 void 1492 pool_sethardlimit(struct pool *pp, int n, const char *warnmess, int ratecap) 1493 { 1494 1495 mutex_enter(&pp->pr_lock); 1496 1497 pp->pr_hardlimit = n; 1498 pp->pr_hardlimit_warning = warnmess; 1499 pp->pr_hardlimit_ratecap.tv_sec = ratecap; 1500 pp->pr_hardlimit_warning_last.tv_sec = 0; 1501 pp->pr_hardlimit_warning_last.tv_usec = 0; 1502 1503 /* 1504 * In-line version of pool_sethiwat(), because we don't want to 1505 * release the lock. 1506 */ 1507 pp->pr_maxpages = (n == 0) 1508 ? 0 1509 : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 1510 1511 mutex_exit(&pp->pr_lock); 1512 } 1513 1514 /* 1515 * Release all complete pages that have not been used recently. 1516 * 1517 * Might be called from interrupt context. 1518 */ 1519 int 1520 #ifdef POOL_DIAGNOSTIC 1521 _pool_reclaim(struct pool *pp, const char *file, long line) 1522 #else 1523 pool_reclaim(struct pool *pp) 1524 #endif 1525 { 1526 struct pool_item_header *ph, *phnext; 1527 struct pool_pagelist pq; 1528 uint32_t curtime; 1529 bool klock; 1530 int rv; 1531 1532 if (cpu_intr_p() || cpu_softintr_p()) { 1533 KASSERT(pp->pr_ipl != IPL_NONE); 1534 } 1535 1536 if (pp->pr_drain_hook != NULL) { 1537 /* 1538 * The drain hook must be called with the pool unlocked. 1539 */ 1540 (*pp->pr_drain_hook)(pp->pr_drain_hook_arg, PR_NOWAIT); 1541 } 1542 1543 /* 1544 * XXXSMP Because we do not want to cause non-MPSAFE code 1545 * to block. 1546 */ 1547 if (pp->pr_ipl == IPL_SOFTNET || pp->pr_ipl == IPL_SOFTCLOCK || 1548 pp->pr_ipl == IPL_SOFTSERIAL) { 1549 KERNEL_LOCK(1, NULL); 1550 klock = true; 1551 } else 1552 klock = false; 1553 1554 /* Reclaim items from the pool's cache (if any). */ 1555 if (pp->pr_cache != NULL) 1556 pool_cache_invalidate(pp->pr_cache); 1557 1558 if (mutex_tryenter(&pp->pr_lock) == 0) { 1559 if (klock) { 1560 KERNEL_UNLOCK_ONE(NULL); 1561 } 1562 return (0); 1563 } 1564 pr_enter(pp, file, line); 1565 1566 LIST_INIT(&pq); 1567 1568 curtime = time_uptime; 1569 1570 for (ph = LIST_FIRST(&pp->pr_emptypages); ph != NULL; ph = phnext) { 1571 phnext = LIST_NEXT(ph, ph_pagelist); 1572 1573 /* Check our minimum page claim */ 1574 if (pp->pr_npages <= pp->pr_minpages) 1575 break; 1576 1577 KASSERT(ph->ph_nmissing == 0); 1578 if (curtime - ph->ph_time < pool_inactive_time) 1579 continue; 1580 1581 /* 1582 * If freeing this page would put us below 1583 * the low water mark, stop now. 1584 */ 1585 if ((pp->pr_nitems - pp->pr_itemsperpage) < 1586 pp->pr_minitems) 1587 break; 1588 1589 pr_rmpage(pp, ph, &pq); 1590 } 1591 1592 pr_leave(pp); 1593 mutex_exit(&pp->pr_lock); 1594 1595 if (LIST_EMPTY(&pq)) 1596 rv = 0; 1597 else { 1598 pr_pagelist_free(pp, &pq); 1599 rv = 1; 1600 } 1601 1602 if (klock) { 1603 KERNEL_UNLOCK_ONE(NULL); 1604 } 1605 1606 return (rv); 1607 } 1608 1609 /* 1610 * Drain pools, one at a time. This is a two stage process; 1611 * drain_start kicks off a cross call to drain CPU-level caches 1612 * if the pool has an associated pool_cache. drain_end waits 1613 * for those cross calls to finish, and then drains the cache 1614 * (if any) and pool. 1615 * 1616 * Note, must never be called from interrupt context. 1617 */ 1618 void 1619 pool_drain_start(struct pool **ppp, uint64_t *wp) 1620 { 1621 struct pool *pp; 1622 1623 KASSERT(!TAILQ_EMPTY(&pool_head)); 1624 1625 pp = NULL; 1626 1627 /* Find next pool to drain, and add a reference. */ 1628 mutex_enter(&pool_head_lock); 1629 do { 1630 if (drainpp == NULL) { 1631 drainpp = TAILQ_FIRST(&pool_head); 1632 } 1633 if (drainpp != NULL) { 1634 pp = drainpp; 1635 drainpp = TAILQ_NEXT(pp, pr_poollist); 1636 } 1637 /* 1638 * Skip completely idle pools. We depend on at least 1639 * one pool in the system being active. 1640 */ 1641 } while (pp == NULL || pp->pr_npages == 0); 1642 pp->pr_refcnt++; 1643 mutex_exit(&pool_head_lock); 1644 1645 /* If there is a pool_cache, drain CPU level caches. */ 1646 *ppp = pp; 1647 if (pp->pr_cache != NULL) { 1648 *wp = xc_broadcast(0, (xcfunc_t)pool_cache_xcall, 1649 pp->pr_cache, NULL); 1650 } 1651 } 1652 1653 bool 1654 pool_drain_end(struct pool *pp, uint64_t where) 1655 { 1656 bool reclaimed; 1657 1658 if (pp == NULL) 1659 return false; 1660 1661 KASSERT(pp->pr_refcnt > 0); 1662 1663 /* Wait for remote draining to complete. */ 1664 if (pp->pr_cache != NULL) 1665 xc_wait(where); 1666 1667 /* Drain the cache (if any) and pool.. */ 1668 reclaimed = pool_reclaim(pp); 1669 1670 /* Finally, unlock the pool. */ 1671 mutex_enter(&pool_head_lock); 1672 pp->pr_refcnt--; 1673 cv_broadcast(&pool_busy); 1674 mutex_exit(&pool_head_lock); 1675 1676 return reclaimed; 1677 } 1678 1679 /* 1680 * Diagnostic helpers. 1681 */ 1682 void 1683 pool_print(struct pool *pp, const char *modif) 1684 { 1685 1686 pool_print1(pp, modif, printf); 1687 } 1688 1689 void 1690 pool_printall(const char *modif, void (*pr)(const char *, ...)) 1691 { 1692 struct pool *pp; 1693 1694 TAILQ_FOREACH(pp, &pool_head, pr_poollist) { 1695 pool_printit(pp, modif, pr); 1696 } 1697 } 1698 1699 void 1700 pool_printit(struct pool *pp, const char *modif, void (*pr)(const char *, ...)) 1701 { 1702 1703 if (pp == NULL) { 1704 (*pr)("Must specify a pool to print.\n"); 1705 return; 1706 } 1707 1708 pool_print1(pp, modif, pr); 1709 } 1710 1711 static void 1712 pool_print_pagelist(struct pool *pp, struct pool_pagelist *pl, 1713 void (*pr)(const char *, ...)) 1714 { 1715 struct pool_item_header *ph; 1716 #ifdef DIAGNOSTIC 1717 struct pool_item *pi; 1718 #endif 1719 1720 LIST_FOREACH(ph, pl, ph_pagelist) { 1721 (*pr)("\t\tpage %p, nmissing %d, time %" PRIu32 "\n", 1722 ph->ph_page, ph->ph_nmissing, ph->ph_time); 1723 #ifdef DIAGNOSTIC 1724 if (!(pp->pr_roflags & PR_NOTOUCH)) { 1725 LIST_FOREACH(pi, &ph->ph_itemlist, pi_list) { 1726 if (pi->pi_magic != PI_MAGIC) { 1727 (*pr)("\t\t\titem %p, magic 0x%x\n", 1728 pi, pi->pi_magic); 1729 } 1730 } 1731 } 1732 #endif 1733 } 1734 } 1735 1736 static void 1737 pool_print1(struct pool *pp, const char *modif, void (*pr)(const char *, ...)) 1738 { 1739 struct pool_item_header *ph; 1740 pool_cache_t pc; 1741 pcg_t *pcg; 1742 pool_cache_cpu_t *cc; 1743 uint64_t cpuhit, cpumiss; 1744 int i, print_log = 0, print_pagelist = 0, print_cache = 0; 1745 char c; 1746 1747 while ((c = *modif++) != '\0') { 1748 if (c == 'l') 1749 print_log = 1; 1750 if (c == 'p') 1751 print_pagelist = 1; 1752 if (c == 'c') 1753 print_cache = 1; 1754 } 1755 1756 if ((pc = pp->pr_cache) != NULL) { 1757 (*pr)("POOL CACHE"); 1758 } else { 1759 (*pr)("POOL"); 1760 } 1761 1762 (*pr)(" %s: size %u, align %u, ioff %u, roflags 0x%08x\n", 1763 pp->pr_wchan, pp->pr_size, pp->pr_align, pp->pr_itemoffset, 1764 pp->pr_roflags); 1765 (*pr)("\talloc %p\n", pp->pr_alloc); 1766 (*pr)("\tminitems %u, minpages %u, maxpages %u, npages %u\n", 1767 pp->pr_minitems, pp->pr_minpages, pp->pr_maxpages, pp->pr_npages); 1768 (*pr)("\titemsperpage %u, nitems %u, nout %u, hardlimit %u\n", 1769 pp->pr_itemsperpage, pp->pr_nitems, pp->pr_nout, pp->pr_hardlimit); 1770 1771 (*pr)("\tnget %lu, nfail %lu, nput %lu\n", 1772 pp->pr_nget, pp->pr_nfail, pp->pr_nput); 1773 (*pr)("\tnpagealloc %lu, npagefree %lu, hiwat %u, nidle %lu\n", 1774 pp->pr_npagealloc, pp->pr_npagefree, pp->pr_hiwat, pp->pr_nidle); 1775 1776 if (print_pagelist == 0) 1777 goto skip_pagelist; 1778 1779 if ((ph = LIST_FIRST(&pp->pr_emptypages)) != NULL) 1780 (*pr)("\n\tempty page list:\n"); 1781 pool_print_pagelist(pp, &pp->pr_emptypages, pr); 1782 if ((ph = LIST_FIRST(&pp->pr_fullpages)) != NULL) 1783 (*pr)("\n\tfull page list:\n"); 1784 pool_print_pagelist(pp, &pp->pr_fullpages, pr); 1785 if ((ph = LIST_FIRST(&pp->pr_partpages)) != NULL) 1786 (*pr)("\n\tpartial-page list:\n"); 1787 pool_print_pagelist(pp, &pp->pr_partpages, pr); 1788 1789 if (pp->pr_curpage == NULL) 1790 (*pr)("\tno current page\n"); 1791 else 1792 (*pr)("\tcurpage %p\n", pp->pr_curpage->ph_page); 1793 1794 skip_pagelist: 1795 if (print_log == 0) 1796 goto skip_log; 1797 1798 (*pr)("\n"); 1799 if ((pp->pr_roflags & PR_LOGGING) == 0) 1800 (*pr)("\tno log\n"); 1801 else { 1802 pr_printlog(pp, NULL, pr); 1803 } 1804 1805 skip_log: 1806 1807 #define PR_GROUPLIST(pcg) \ 1808 (*pr)("\t\tgroup %p: avail %d\n", pcg, pcg->pcg_avail); \ 1809 for (i = 0; i < pcg->pcg_size; i++) { \ 1810 if (pcg->pcg_objects[i].pcgo_pa != \ 1811 POOL_PADDR_INVALID) { \ 1812 (*pr)("\t\t\t%p, 0x%llx\n", \ 1813 pcg->pcg_objects[i].pcgo_va, \ 1814 (unsigned long long) \ 1815 pcg->pcg_objects[i].pcgo_pa); \ 1816 } else { \ 1817 (*pr)("\t\t\t%p\n", \ 1818 pcg->pcg_objects[i].pcgo_va); \ 1819 } \ 1820 } 1821 1822 if (pc != NULL) { 1823 cpuhit = 0; 1824 cpumiss = 0; 1825 for (i = 0; i < __arraycount(pc->pc_cpus); i++) { 1826 if ((cc = pc->pc_cpus[i]) == NULL) 1827 continue; 1828 cpuhit += cc->cc_hits; 1829 cpumiss += cc->cc_misses; 1830 } 1831 (*pr)("\tcpu layer hits %llu misses %llu\n", cpuhit, cpumiss); 1832 (*pr)("\tcache layer hits %llu misses %llu\n", 1833 pc->pc_hits, pc->pc_misses); 1834 (*pr)("\tcache layer entry uncontended %llu contended %llu\n", 1835 pc->pc_hits + pc->pc_misses - pc->pc_contended, 1836 pc->pc_contended); 1837 (*pr)("\tcache layer empty groups %u full groups %u\n", 1838 pc->pc_nempty, pc->pc_nfull); 1839 if (print_cache) { 1840 (*pr)("\tfull cache groups:\n"); 1841 for (pcg = pc->pc_fullgroups; pcg != NULL; 1842 pcg = pcg->pcg_next) { 1843 PR_GROUPLIST(pcg); 1844 } 1845 (*pr)("\tempty cache groups:\n"); 1846 for (pcg = pc->pc_emptygroups; pcg != NULL; 1847 pcg = pcg->pcg_next) { 1848 PR_GROUPLIST(pcg); 1849 } 1850 } 1851 } 1852 #undef PR_GROUPLIST 1853 1854 pr_enter_check(pp, pr); 1855 } 1856 1857 static int 1858 pool_chk_page(struct pool *pp, const char *label, struct pool_item_header *ph) 1859 { 1860 struct pool_item *pi; 1861 void *page; 1862 int n; 1863 1864 if ((pp->pr_roflags & PR_NOALIGN) == 0) { 1865 page = (void *)((uintptr_t)ph & pp->pr_alloc->pa_pagemask); 1866 if (page != ph->ph_page && 1867 (pp->pr_roflags & PR_PHINPAGE) != 0) { 1868 if (label != NULL) 1869 printf("%s: ", label); 1870 printf("pool(%p:%s): page inconsistency: page %p;" 1871 " at page head addr %p (p %p)\n", pp, 1872 pp->pr_wchan, ph->ph_page, 1873 ph, page); 1874 return 1; 1875 } 1876 } 1877 1878 if ((pp->pr_roflags & PR_NOTOUCH) != 0) 1879 return 0; 1880 1881 for (pi = LIST_FIRST(&ph->ph_itemlist), n = 0; 1882 pi != NULL; 1883 pi = LIST_NEXT(pi,pi_list), n++) { 1884 1885 #ifdef DIAGNOSTIC 1886 if (pi->pi_magic != PI_MAGIC) { 1887 if (label != NULL) 1888 printf("%s: ", label); 1889 printf("pool(%s): free list modified: magic=%x;" 1890 " page %p; item ordinal %d; addr %p\n", 1891 pp->pr_wchan, pi->pi_magic, ph->ph_page, 1892 n, pi); 1893 panic("pool"); 1894 } 1895 #endif 1896 if ((pp->pr_roflags & PR_NOALIGN) != 0) { 1897 continue; 1898 } 1899 page = (void *)((uintptr_t)pi & pp->pr_alloc->pa_pagemask); 1900 if (page == ph->ph_page) 1901 continue; 1902 1903 if (label != NULL) 1904 printf("%s: ", label); 1905 printf("pool(%p:%s): page inconsistency: page %p;" 1906 " item ordinal %d; addr %p (p %p)\n", pp, 1907 pp->pr_wchan, ph->ph_page, 1908 n, pi, page); 1909 return 1; 1910 } 1911 return 0; 1912 } 1913 1914 1915 int 1916 pool_chk(struct pool *pp, const char *label) 1917 { 1918 struct pool_item_header *ph; 1919 int r = 0; 1920 1921 mutex_enter(&pp->pr_lock); 1922 LIST_FOREACH(ph, &pp->pr_emptypages, ph_pagelist) { 1923 r = pool_chk_page(pp, label, ph); 1924 if (r) { 1925 goto out; 1926 } 1927 } 1928 LIST_FOREACH(ph, &pp->pr_fullpages, ph_pagelist) { 1929 r = pool_chk_page(pp, label, ph); 1930 if (r) { 1931 goto out; 1932 } 1933 } 1934 LIST_FOREACH(ph, &pp->pr_partpages, ph_pagelist) { 1935 r = pool_chk_page(pp, label, ph); 1936 if (r) { 1937 goto out; 1938 } 1939 } 1940 1941 out: 1942 mutex_exit(&pp->pr_lock); 1943 return (r); 1944 } 1945 1946 /* 1947 * pool_cache_init: 1948 * 1949 * Initialize a pool cache. 1950 */ 1951 pool_cache_t 1952 pool_cache_init(size_t size, u_int align, u_int align_offset, u_int flags, 1953 const char *wchan, struct pool_allocator *palloc, int ipl, 1954 int (*ctor)(void *, void *, int), void (*dtor)(void *, void *), void *arg) 1955 { 1956 pool_cache_t pc; 1957 1958 pc = pool_get(&cache_pool, PR_WAITOK); 1959 if (pc == NULL) 1960 return NULL; 1961 1962 pool_cache_bootstrap(pc, size, align, align_offset, flags, wchan, 1963 palloc, ipl, ctor, dtor, arg); 1964 1965 return pc; 1966 } 1967 1968 /* 1969 * pool_cache_bootstrap: 1970 * 1971 * Kernel-private version of pool_cache_init(). The caller 1972 * provides initial storage. 1973 */ 1974 void 1975 pool_cache_bootstrap(pool_cache_t pc, size_t size, u_int align, 1976 u_int align_offset, u_int flags, const char *wchan, 1977 struct pool_allocator *palloc, int ipl, 1978 int (*ctor)(void *, void *, int), void (*dtor)(void *, void *), 1979 void *arg) 1980 { 1981 CPU_INFO_ITERATOR cii; 1982 pool_cache_t pc1; 1983 struct cpu_info *ci; 1984 struct pool *pp; 1985 1986 pp = &pc->pc_pool; 1987 if (palloc == NULL && ipl == IPL_NONE) 1988 palloc = &pool_allocator_nointr; 1989 pool_init(pp, size, align, align_offset, flags, wchan, palloc, ipl); 1990 mutex_init(&pc->pc_lock, MUTEX_DEFAULT, ipl); 1991 1992 if (ctor == NULL) { 1993 ctor = (int (*)(void *, void *, int))nullop; 1994 } 1995 if (dtor == NULL) { 1996 dtor = (void (*)(void *, void *))nullop; 1997 } 1998 1999 pc->pc_emptygroups = NULL; 2000 pc->pc_fullgroups = NULL; 2001 pc->pc_partgroups = NULL; 2002 pc->pc_ctor = ctor; 2003 pc->pc_dtor = dtor; 2004 pc->pc_arg = arg; 2005 pc->pc_hits = 0; 2006 pc->pc_misses = 0; 2007 pc->pc_nempty = 0; 2008 pc->pc_npart = 0; 2009 pc->pc_nfull = 0; 2010 pc->pc_contended = 0; 2011 pc->pc_refcnt = 0; 2012 pc->pc_freecheck = NULL; 2013 2014 if ((flags & PR_LARGECACHE) != 0) { 2015 pc->pc_pcgsize = PCG_NOBJECTS_LARGE; 2016 pc->pc_pcgpool = &pcg_large_pool; 2017 } else { 2018 pc->pc_pcgsize = PCG_NOBJECTS_NORMAL; 2019 pc->pc_pcgpool = &pcg_normal_pool; 2020 } 2021 2022 /* Allocate per-CPU caches. */ 2023 memset(pc->pc_cpus, 0, sizeof(pc->pc_cpus)); 2024 pc->pc_ncpu = 0; 2025 if (ncpu < 2) { 2026 /* XXX For sparc: boot CPU is not attached yet. */ 2027 pool_cache_cpu_init1(curcpu(), pc); 2028 } else { 2029 for (CPU_INFO_FOREACH(cii, ci)) { 2030 pool_cache_cpu_init1(ci, pc); 2031 } 2032 } 2033 2034 /* Add to list of all pools. */ 2035 if (__predict_true(!cold)) 2036 mutex_enter(&pool_head_lock); 2037 TAILQ_FOREACH(pc1, &pool_cache_head, pc_cachelist) { 2038 if (strcmp(pc1->pc_pool.pr_wchan, pc->pc_pool.pr_wchan) > 0) 2039 break; 2040 } 2041 if (pc1 == NULL) 2042 TAILQ_INSERT_TAIL(&pool_cache_head, pc, pc_cachelist); 2043 else 2044 TAILQ_INSERT_BEFORE(pc1, pc, pc_cachelist); 2045 if (__predict_true(!cold)) 2046 mutex_exit(&pool_head_lock); 2047 2048 membar_sync(); 2049 pp->pr_cache = pc; 2050 } 2051 2052 /* 2053 * pool_cache_destroy: 2054 * 2055 * Destroy a pool cache. 2056 */ 2057 void 2058 pool_cache_destroy(pool_cache_t pc) 2059 { 2060 2061 pool_cache_bootstrap_destroy(pc); 2062 pool_put(&cache_pool, pc); 2063 } 2064 2065 /* 2066 * pool_cache_bootstrap_destroy: 2067 * 2068 * Destroy a pool cache. 2069 */ 2070 void 2071 pool_cache_bootstrap_destroy(pool_cache_t pc) 2072 { 2073 struct pool *pp = &pc->pc_pool; 2074 u_int i; 2075 2076 /* Remove it from the global list. */ 2077 mutex_enter(&pool_head_lock); 2078 while (pc->pc_refcnt != 0) 2079 cv_wait(&pool_busy, &pool_head_lock); 2080 TAILQ_REMOVE(&pool_cache_head, pc, pc_cachelist); 2081 mutex_exit(&pool_head_lock); 2082 2083 /* First, invalidate the entire cache. */ 2084 pool_cache_invalidate(pc); 2085 2086 /* Disassociate it from the pool. */ 2087 mutex_enter(&pp->pr_lock); 2088 pp->pr_cache = NULL; 2089 mutex_exit(&pp->pr_lock); 2090 2091 /* Destroy per-CPU data */ 2092 for (i = 0; i < __arraycount(pc->pc_cpus); i++) 2093 pool_cache_invalidate_cpu(pc, i); 2094 2095 /* Finally, destroy it. */ 2096 mutex_destroy(&pc->pc_lock); 2097 pool_destroy(pp); 2098 } 2099 2100 /* 2101 * pool_cache_cpu_init1: 2102 * 2103 * Called for each pool_cache whenever a new CPU is attached. 2104 */ 2105 static void 2106 pool_cache_cpu_init1(struct cpu_info *ci, pool_cache_t pc) 2107 { 2108 pool_cache_cpu_t *cc; 2109 int index; 2110 2111 index = ci->ci_index; 2112 2113 KASSERT(index < __arraycount(pc->pc_cpus)); 2114 2115 if ((cc = pc->pc_cpus[index]) != NULL) { 2116 KASSERT(cc->cc_cpuindex == index); 2117 return; 2118 } 2119 2120 /* 2121 * The first CPU is 'free'. This needs to be the case for 2122 * bootstrap - we may not be able to allocate yet. 2123 */ 2124 if (pc->pc_ncpu == 0) { 2125 cc = &pc->pc_cpu0; 2126 pc->pc_ncpu = 1; 2127 } else { 2128 mutex_enter(&pc->pc_lock); 2129 pc->pc_ncpu++; 2130 mutex_exit(&pc->pc_lock); 2131 cc = pool_get(&cache_cpu_pool, PR_WAITOK); 2132 } 2133 2134 cc->cc_ipl = pc->pc_pool.pr_ipl; 2135 cc->cc_iplcookie = makeiplcookie(cc->cc_ipl); 2136 cc->cc_cache = pc; 2137 cc->cc_cpuindex = index; 2138 cc->cc_hits = 0; 2139 cc->cc_misses = 0; 2140 cc->cc_current = __UNCONST(&pcg_dummy); 2141 cc->cc_previous = __UNCONST(&pcg_dummy); 2142 2143 pc->pc_cpus[index] = cc; 2144 } 2145 2146 /* 2147 * pool_cache_cpu_init: 2148 * 2149 * Called whenever a new CPU is attached. 2150 */ 2151 void 2152 pool_cache_cpu_init(struct cpu_info *ci) 2153 { 2154 pool_cache_t pc; 2155 2156 mutex_enter(&pool_head_lock); 2157 TAILQ_FOREACH(pc, &pool_cache_head, pc_cachelist) { 2158 pc->pc_refcnt++; 2159 mutex_exit(&pool_head_lock); 2160 2161 pool_cache_cpu_init1(ci, pc); 2162 2163 mutex_enter(&pool_head_lock); 2164 pc->pc_refcnt--; 2165 cv_broadcast(&pool_busy); 2166 } 2167 mutex_exit(&pool_head_lock); 2168 } 2169 2170 /* 2171 * pool_cache_reclaim: 2172 * 2173 * Reclaim memory from a pool cache. 2174 */ 2175 bool 2176 pool_cache_reclaim(pool_cache_t pc) 2177 { 2178 2179 return pool_reclaim(&pc->pc_pool); 2180 } 2181 2182 static void 2183 pool_cache_destruct_object1(pool_cache_t pc, void *object) 2184 { 2185 2186 (*pc->pc_dtor)(pc->pc_arg, object); 2187 pool_put(&pc->pc_pool, object); 2188 } 2189 2190 /* 2191 * pool_cache_destruct_object: 2192 * 2193 * Force destruction of an object and its release back into 2194 * the pool. 2195 */ 2196 void 2197 pool_cache_destruct_object(pool_cache_t pc, void *object) 2198 { 2199 2200 FREECHECK_IN(&pc->pc_freecheck, object); 2201 2202 pool_cache_destruct_object1(pc, object); 2203 } 2204 2205 /* 2206 * pool_cache_invalidate_groups: 2207 * 2208 * Invalidate a chain of groups and destruct all objects. 2209 */ 2210 static void 2211 pool_cache_invalidate_groups(pool_cache_t pc, pcg_t *pcg) 2212 { 2213 void *object; 2214 pcg_t *next; 2215 int i; 2216 2217 for (; pcg != NULL; pcg = next) { 2218 next = pcg->pcg_next; 2219 2220 for (i = 0; i < pcg->pcg_avail; i++) { 2221 object = pcg->pcg_objects[i].pcgo_va; 2222 pool_cache_destruct_object1(pc, object); 2223 } 2224 2225 if (pcg->pcg_size == PCG_NOBJECTS_LARGE) { 2226 pool_put(&pcg_large_pool, pcg); 2227 } else { 2228 KASSERT(pcg->pcg_size == PCG_NOBJECTS_NORMAL); 2229 pool_put(&pcg_normal_pool, pcg); 2230 } 2231 } 2232 } 2233 2234 /* 2235 * pool_cache_invalidate: 2236 * 2237 * Invalidate a pool cache (destruct and release all of the 2238 * cached objects). Does not reclaim objects from the pool. 2239 * 2240 * Note: For pool caches that provide constructed objects, there 2241 * is an assumption that another level of synchronization is occurring 2242 * between the input to the constructor and the cache invalidation. 2243 */ 2244 void 2245 pool_cache_invalidate(pool_cache_t pc) 2246 { 2247 pcg_t *full, *empty, *part; 2248 #if 0 2249 uint64_t where; 2250 2251 if (ncpu < 2 || !mp_online) { 2252 /* 2253 * We might be called early enough in the boot process 2254 * for the CPU data structures to not be fully initialized. 2255 * In this case, simply gather the local CPU's cache now 2256 * since it will be the only one running. 2257 */ 2258 pool_cache_xcall(pc); 2259 } else { 2260 /* 2261 * Gather all of the CPU-specific caches into the 2262 * global cache. 2263 */ 2264 where = xc_broadcast(0, (xcfunc_t)pool_cache_xcall, pc, NULL); 2265 xc_wait(where); 2266 } 2267 #endif 2268 mutex_enter(&pc->pc_lock); 2269 full = pc->pc_fullgroups; 2270 empty = pc->pc_emptygroups; 2271 part = pc->pc_partgroups; 2272 pc->pc_fullgroups = NULL; 2273 pc->pc_emptygroups = NULL; 2274 pc->pc_partgroups = NULL; 2275 pc->pc_nfull = 0; 2276 pc->pc_nempty = 0; 2277 pc->pc_npart = 0; 2278 mutex_exit(&pc->pc_lock); 2279 2280 pool_cache_invalidate_groups(pc, full); 2281 pool_cache_invalidate_groups(pc, empty); 2282 pool_cache_invalidate_groups(pc, part); 2283 } 2284 2285 /* 2286 * pool_cache_invalidate_cpu: 2287 * 2288 * Invalidate all CPU-bound cached objects in pool cache, the CPU being 2289 * identified by its associated index. 2290 * It is caller's responsibility to ensure that no operation is 2291 * taking place on this pool cache while doing this invalidation. 2292 * WARNING: as no inter-CPU locking is enforced, trying to invalidate 2293 * pool cached objects from a CPU different from the one currently running 2294 * may result in an undefined behaviour. 2295 */ 2296 static void 2297 pool_cache_invalidate_cpu(pool_cache_t pc, u_int index) 2298 { 2299 2300 pool_cache_cpu_t *cc; 2301 pcg_t *pcg; 2302 2303 if ((cc = pc->pc_cpus[index]) == NULL) 2304 return; 2305 2306 if ((pcg = cc->cc_current) != &pcg_dummy) { 2307 pcg->pcg_next = NULL; 2308 pool_cache_invalidate_groups(pc, pcg); 2309 } 2310 if ((pcg = cc->cc_previous) != &pcg_dummy) { 2311 pcg->pcg_next = NULL; 2312 pool_cache_invalidate_groups(pc, pcg); 2313 } 2314 if (cc != &pc->pc_cpu0) 2315 pool_put(&cache_cpu_pool, cc); 2316 2317 } 2318 2319 void 2320 pool_cache_set_drain_hook(pool_cache_t pc, void (*fn)(void *, int), void *arg) 2321 { 2322 2323 pool_set_drain_hook(&pc->pc_pool, fn, arg); 2324 } 2325 2326 void 2327 pool_cache_setlowat(pool_cache_t pc, int n) 2328 { 2329 2330 pool_setlowat(&pc->pc_pool, n); 2331 } 2332 2333 void 2334 pool_cache_sethiwat(pool_cache_t pc, int n) 2335 { 2336 2337 pool_sethiwat(&pc->pc_pool, n); 2338 } 2339 2340 void 2341 pool_cache_sethardlimit(pool_cache_t pc, int n, const char *warnmess, int ratecap) 2342 { 2343 2344 pool_sethardlimit(&pc->pc_pool, n, warnmess, ratecap); 2345 } 2346 2347 static bool __noinline 2348 pool_cache_get_slow(pool_cache_cpu_t *cc, int s, void **objectp, 2349 paddr_t *pap, int flags) 2350 { 2351 pcg_t *pcg, *cur; 2352 uint64_t ncsw; 2353 pool_cache_t pc; 2354 void *object; 2355 2356 KASSERT(cc->cc_current->pcg_avail == 0); 2357 KASSERT(cc->cc_previous->pcg_avail == 0); 2358 2359 pc = cc->cc_cache; 2360 cc->cc_misses++; 2361 2362 /* 2363 * Nothing was available locally. Try and grab a group 2364 * from the cache. 2365 */ 2366 if (__predict_false(!mutex_tryenter(&pc->pc_lock))) { 2367 ncsw = curlwp->l_ncsw; 2368 mutex_enter(&pc->pc_lock); 2369 pc->pc_contended++; 2370 2371 /* 2372 * If we context switched while locking, then 2373 * our view of the per-CPU data is invalid: 2374 * retry. 2375 */ 2376 if (curlwp->l_ncsw != ncsw) { 2377 mutex_exit(&pc->pc_lock); 2378 return true; 2379 } 2380 } 2381 2382 if (__predict_true((pcg = pc->pc_fullgroups) != NULL)) { 2383 /* 2384 * If there's a full group, release our empty 2385 * group back to the cache. Install the full 2386 * group as cc_current and return. 2387 */ 2388 if (__predict_true((cur = cc->cc_current) != &pcg_dummy)) { 2389 KASSERT(cur->pcg_avail == 0); 2390 cur->pcg_next = pc->pc_emptygroups; 2391 pc->pc_emptygroups = cur; 2392 pc->pc_nempty++; 2393 } 2394 KASSERT(pcg->pcg_avail == pcg->pcg_size); 2395 cc->cc_current = pcg; 2396 pc->pc_fullgroups = pcg->pcg_next; 2397 pc->pc_hits++; 2398 pc->pc_nfull--; 2399 mutex_exit(&pc->pc_lock); 2400 return true; 2401 } 2402 2403 /* 2404 * Nothing available locally or in cache. Take the slow 2405 * path: fetch a new object from the pool and construct 2406 * it. 2407 */ 2408 pc->pc_misses++; 2409 mutex_exit(&pc->pc_lock); 2410 splx(s); 2411 2412 object = pool_get(&pc->pc_pool, flags); 2413 *objectp = object; 2414 if (__predict_false(object == NULL)) 2415 return false; 2416 2417 if (__predict_false((*pc->pc_ctor)(pc->pc_arg, object, flags) != 0)) { 2418 pool_put(&pc->pc_pool, object); 2419 *objectp = NULL; 2420 return false; 2421 } 2422 2423 KASSERT((((vaddr_t)object + pc->pc_pool.pr_itemoffset) & 2424 (pc->pc_pool.pr_align - 1)) == 0); 2425 2426 if (pap != NULL) { 2427 #ifdef POOL_VTOPHYS 2428 *pap = POOL_VTOPHYS(object); 2429 #else 2430 *pap = POOL_PADDR_INVALID; 2431 #endif 2432 } 2433 2434 FREECHECK_OUT(&pc->pc_freecheck, object); 2435 return false; 2436 } 2437 2438 /* 2439 * pool_cache_get{,_paddr}: 2440 * 2441 * Get an object from a pool cache (optionally returning 2442 * the physical address of the object). 2443 */ 2444 void * 2445 pool_cache_get_paddr(pool_cache_t pc, int flags, paddr_t *pap) 2446 { 2447 pool_cache_cpu_t *cc; 2448 pcg_t *pcg; 2449 void *object; 2450 int s; 2451 2452 KASSERTMSG((!cpu_intr_p() && !cpu_softintr_p()) || 2453 (pc->pc_pool.pr_ipl != IPL_NONE || cold || panicstr != NULL), 2454 "pool '%s' is IPL_NONE, but called from interrupt context\n", 2455 pc->pc_pool.pr_wchan); 2456 2457 if (flags & PR_WAITOK) { 2458 ASSERT_SLEEPABLE(); 2459 } 2460 2461 /* Lock out interrupts and disable preemption. */ 2462 s = splvm(); 2463 while (/* CONSTCOND */ true) { 2464 /* Try and allocate an object from the current group. */ 2465 cc = pc->pc_cpus[curcpu()->ci_index]; 2466 KASSERT(cc->cc_cache == pc); 2467 pcg = cc->cc_current; 2468 if (__predict_true(pcg->pcg_avail > 0)) { 2469 object = pcg->pcg_objects[--pcg->pcg_avail].pcgo_va; 2470 if (__predict_false(pap != NULL)) 2471 *pap = pcg->pcg_objects[pcg->pcg_avail].pcgo_pa; 2472 #if defined(DIAGNOSTIC) 2473 pcg->pcg_objects[pcg->pcg_avail].pcgo_va = NULL; 2474 KASSERT(pcg->pcg_avail < pcg->pcg_size); 2475 KASSERT(object != NULL); 2476 #endif 2477 cc->cc_hits++; 2478 splx(s); 2479 FREECHECK_OUT(&pc->pc_freecheck, object); 2480 return object; 2481 } 2482 2483 /* 2484 * That failed. If the previous group isn't empty, swap 2485 * it with the current group and allocate from there. 2486 */ 2487 pcg = cc->cc_previous; 2488 if (__predict_true(pcg->pcg_avail > 0)) { 2489 cc->cc_previous = cc->cc_current; 2490 cc->cc_current = pcg; 2491 continue; 2492 } 2493 2494 /* 2495 * Can't allocate from either group: try the slow path. 2496 * If get_slow() allocated an object for us, or if 2497 * no more objects are available, it will return false. 2498 * Otherwise, we need to retry. 2499 */ 2500 if (!pool_cache_get_slow(cc, s, &object, pap, flags)) 2501 break; 2502 } 2503 2504 return object; 2505 } 2506 2507 static bool __noinline 2508 pool_cache_put_slow(pool_cache_cpu_t *cc, int s, void *object) 2509 { 2510 pcg_t *pcg, *cur; 2511 uint64_t ncsw; 2512 pool_cache_t pc; 2513 2514 KASSERT(cc->cc_current->pcg_avail == cc->cc_current->pcg_size); 2515 KASSERT(cc->cc_previous->pcg_avail == cc->cc_previous->pcg_size); 2516 2517 pc = cc->cc_cache; 2518 pcg = NULL; 2519 cc->cc_misses++; 2520 2521 /* 2522 * If there are no empty groups in the cache then allocate one 2523 * while still unlocked. 2524 */ 2525 if (__predict_false(pc->pc_emptygroups == NULL)) { 2526 if (__predict_true(!pool_cache_disable)) { 2527 pcg = pool_get(pc->pc_pcgpool, PR_NOWAIT); 2528 } 2529 if (__predict_true(pcg != NULL)) { 2530 pcg->pcg_avail = 0; 2531 pcg->pcg_size = pc->pc_pcgsize; 2532 } 2533 } 2534 2535 /* Lock the cache. */ 2536 if (__predict_false(!mutex_tryenter(&pc->pc_lock))) { 2537 ncsw = curlwp->l_ncsw; 2538 mutex_enter(&pc->pc_lock); 2539 pc->pc_contended++; 2540 2541 /* 2542 * If we context switched while locking, then our view of 2543 * the per-CPU data is invalid: retry. 2544 */ 2545 if (__predict_false(curlwp->l_ncsw != ncsw)) { 2546 mutex_exit(&pc->pc_lock); 2547 if (pcg != NULL) { 2548 pool_put(pc->pc_pcgpool, pcg); 2549 } 2550 return true; 2551 } 2552 } 2553 2554 /* If there are no empty groups in the cache then allocate one. */ 2555 if (pcg == NULL && pc->pc_emptygroups != NULL) { 2556 pcg = pc->pc_emptygroups; 2557 pc->pc_emptygroups = pcg->pcg_next; 2558 pc->pc_nempty--; 2559 } 2560 2561 /* 2562 * If there's a empty group, release our full group back 2563 * to the cache. Install the empty group to the local CPU 2564 * and return. 2565 */ 2566 if (pcg != NULL) { 2567 KASSERT(pcg->pcg_avail == 0); 2568 if (__predict_false(cc->cc_previous == &pcg_dummy)) { 2569 cc->cc_previous = pcg; 2570 } else { 2571 cur = cc->cc_current; 2572 if (__predict_true(cur != &pcg_dummy)) { 2573 KASSERT(cur->pcg_avail == cur->pcg_size); 2574 cur->pcg_next = pc->pc_fullgroups; 2575 pc->pc_fullgroups = cur; 2576 pc->pc_nfull++; 2577 } 2578 cc->cc_current = pcg; 2579 } 2580 pc->pc_hits++; 2581 mutex_exit(&pc->pc_lock); 2582 return true; 2583 } 2584 2585 /* 2586 * Nothing available locally or in cache, and we didn't 2587 * allocate an empty group. Take the slow path and destroy 2588 * the object here and now. 2589 */ 2590 pc->pc_misses++; 2591 mutex_exit(&pc->pc_lock); 2592 splx(s); 2593 pool_cache_destruct_object(pc, object); 2594 2595 return false; 2596 } 2597 2598 /* 2599 * pool_cache_put{,_paddr}: 2600 * 2601 * Put an object back to the pool cache (optionally caching the 2602 * physical address of the object). 2603 */ 2604 void 2605 pool_cache_put_paddr(pool_cache_t pc, void *object, paddr_t pa) 2606 { 2607 pool_cache_cpu_t *cc; 2608 pcg_t *pcg; 2609 int s; 2610 2611 KASSERT(object != NULL); 2612 FREECHECK_IN(&pc->pc_freecheck, object); 2613 2614 /* Lock out interrupts and disable preemption. */ 2615 s = splvm(); 2616 while (/* CONSTCOND */ true) { 2617 /* If the current group isn't full, release it there. */ 2618 cc = pc->pc_cpus[curcpu()->ci_index]; 2619 KASSERT(cc->cc_cache == pc); 2620 pcg = cc->cc_current; 2621 if (__predict_true(pcg->pcg_avail < pcg->pcg_size)) { 2622 pcg->pcg_objects[pcg->pcg_avail].pcgo_va = object; 2623 pcg->pcg_objects[pcg->pcg_avail].pcgo_pa = pa; 2624 pcg->pcg_avail++; 2625 cc->cc_hits++; 2626 splx(s); 2627 return; 2628 } 2629 2630 /* 2631 * That failed. If the previous group isn't full, swap 2632 * it with the current group and try again. 2633 */ 2634 pcg = cc->cc_previous; 2635 if (__predict_true(pcg->pcg_avail < pcg->pcg_size)) { 2636 cc->cc_previous = cc->cc_current; 2637 cc->cc_current = pcg; 2638 continue; 2639 } 2640 2641 /* 2642 * Can't free to either group: try the slow path. 2643 * If put_slow() releases the object for us, it 2644 * will return false. Otherwise we need to retry. 2645 */ 2646 if (!pool_cache_put_slow(cc, s, object)) 2647 break; 2648 } 2649 } 2650 2651 /* 2652 * pool_cache_xcall: 2653 * 2654 * Transfer objects from the per-CPU cache to the global cache. 2655 * Run within a cross-call thread. 2656 */ 2657 static void 2658 pool_cache_xcall(pool_cache_t pc) 2659 { 2660 pool_cache_cpu_t *cc; 2661 pcg_t *prev, *cur, **list; 2662 int s; 2663 2664 s = splvm(); 2665 mutex_enter(&pc->pc_lock); 2666 cc = pc->pc_cpus[curcpu()->ci_index]; 2667 cur = cc->cc_current; 2668 cc->cc_current = __UNCONST(&pcg_dummy); 2669 prev = cc->cc_previous; 2670 cc->cc_previous = __UNCONST(&pcg_dummy); 2671 if (cur != &pcg_dummy) { 2672 if (cur->pcg_avail == cur->pcg_size) { 2673 list = &pc->pc_fullgroups; 2674 pc->pc_nfull++; 2675 } else if (cur->pcg_avail == 0) { 2676 list = &pc->pc_emptygroups; 2677 pc->pc_nempty++; 2678 } else { 2679 list = &pc->pc_partgroups; 2680 pc->pc_npart++; 2681 } 2682 cur->pcg_next = *list; 2683 *list = cur; 2684 } 2685 if (prev != &pcg_dummy) { 2686 if (prev->pcg_avail == prev->pcg_size) { 2687 list = &pc->pc_fullgroups; 2688 pc->pc_nfull++; 2689 } else if (prev->pcg_avail == 0) { 2690 list = &pc->pc_emptygroups; 2691 pc->pc_nempty++; 2692 } else { 2693 list = &pc->pc_partgroups; 2694 pc->pc_npart++; 2695 } 2696 prev->pcg_next = *list; 2697 *list = prev; 2698 } 2699 mutex_exit(&pc->pc_lock); 2700 splx(s); 2701 } 2702 2703 /* 2704 * Pool backend allocators. 2705 * 2706 * Each pool has a backend allocator that handles allocation, deallocation, 2707 * and any additional draining that might be needed. 2708 * 2709 * We provide two standard allocators: 2710 * 2711 * pool_allocator_kmem - the default when no allocator is specified 2712 * 2713 * pool_allocator_nointr - used for pools that will not be accessed 2714 * in interrupt context. 2715 */ 2716 void *pool_page_alloc(struct pool *, int); 2717 void pool_page_free(struct pool *, void *); 2718 2719 #ifdef POOL_SUBPAGE 2720 struct pool_allocator pool_allocator_kmem_fullpage = { 2721 .pa_alloc = pool_page_alloc, 2722 .pa_free = pool_page_free, 2723 .pa_pagesz = 0 2724 }; 2725 #else 2726 struct pool_allocator pool_allocator_kmem = { 2727 .pa_alloc = pool_page_alloc, 2728 .pa_free = pool_page_free, 2729 .pa_pagesz = 0 2730 }; 2731 #endif 2732 2733 #ifdef POOL_SUBPAGE 2734 struct pool_allocator pool_allocator_nointr_fullpage = { 2735 .pa_alloc = pool_page_alloc, 2736 .pa_free = pool_page_free, 2737 .pa_pagesz = 0 2738 }; 2739 #else 2740 struct pool_allocator pool_allocator_nointr = { 2741 .pa_alloc = pool_page_alloc, 2742 .pa_free = pool_page_free, 2743 .pa_pagesz = 0 2744 }; 2745 #endif 2746 2747 #ifdef POOL_SUBPAGE 2748 void *pool_subpage_alloc(struct pool *, int); 2749 void pool_subpage_free(struct pool *, void *); 2750 2751 struct pool_allocator pool_allocator_kmem = { 2752 .pa_alloc = pool_subpage_alloc, 2753 .pa_free = pool_subpage_free, 2754 .pa_pagesz = POOL_SUBPAGE 2755 }; 2756 2757 struct pool_allocator pool_allocator_nointr = { 2758 .pa_alloc = pool_subpage_alloc, 2759 .pa_free = pool_subpage_free, 2760 .pa_pagesz = POOL_SUBPAGE 2761 }; 2762 #endif /* POOL_SUBPAGE */ 2763 2764 static void * 2765 pool_allocator_alloc(struct pool *pp, int flags) 2766 { 2767 struct pool_allocator *pa = pp->pr_alloc; 2768 void *res; 2769 2770 res = (*pa->pa_alloc)(pp, flags); 2771 if (res == NULL && (flags & PR_WAITOK) == 0) { 2772 /* 2773 * We only run the drain hook here if PR_NOWAIT. 2774 * In other cases, the hook will be run in 2775 * pool_reclaim(). 2776 */ 2777 if (pp->pr_drain_hook != NULL) { 2778 (*pp->pr_drain_hook)(pp->pr_drain_hook_arg, flags); 2779 res = (*pa->pa_alloc)(pp, flags); 2780 } 2781 } 2782 return res; 2783 } 2784 2785 static void 2786 pool_allocator_free(struct pool *pp, void *v) 2787 { 2788 struct pool_allocator *pa = pp->pr_alloc; 2789 2790 (*pa->pa_free)(pp, v); 2791 } 2792 2793 void * 2794 pool_page_alloc(struct pool *pp, int flags) 2795 { 2796 const vm_flag_t vflags = (flags & PR_WAITOK) ? VM_SLEEP: VM_NOSLEEP; 2797 vmem_addr_t va; 2798 int ret; 2799 2800 ret = uvm_km_kmem_alloc(kmem_va_arena, pp->pr_alloc->pa_pagesz, 2801 vflags | VM_INSTANTFIT, &va); 2802 2803 return ret ? NULL : (void *)va; 2804 } 2805 2806 void 2807 pool_page_free(struct pool *pp, void *v) 2808 { 2809 2810 uvm_km_kmem_free(kmem_va_arena, (vaddr_t)v, pp->pr_alloc->pa_pagesz); 2811 } 2812 2813 static void * 2814 pool_page_alloc_meta(struct pool *pp, int flags) 2815 { 2816 const vm_flag_t vflags = (flags & PR_WAITOK) ? VM_SLEEP: VM_NOSLEEP; 2817 vmem_addr_t va; 2818 int ret; 2819 2820 ret = vmem_alloc(kmem_meta_arena, pp->pr_alloc->pa_pagesz, 2821 vflags | VM_INSTANTFIT, &va); 2822 2823 return ret ? NULL : (void *)va; 2824 } 2825 2826 static void 2827 pool_page_free_meta(struct pool *pp, void *v) 2828 { 2829 2830 vmem_free(kmem_meta_arena, (vmem_addr_t)v, pp->pr_alloc->pa_pagesz); 2831 } 2832 2833 #ifdef POOL_SUBPAGE 2834 /* Sub-page allocator, for machines with large hardware pages. */ 2835 void * 2836 pool_subpage_alloc(struct pool *pp, int flags) 2837 { 2838 return pool_get(&psppool, flags); 2839 } 2840 2841 void 2842 pool_subpage_free(struct pool *pp, void *v) 2843 { 2844 pool_put(&psppool, v); 2845 } 2846 2847 #endif /* POOL_SUBPAGE */ 2848 2849 #if defined(DDB) 2850 static bool 2851 pool_in_page(struct pool *pp, struct pool_item_header *ph, uintptr_t addr) 2852 { 2853 2854 return (uintptr_t)ph->ph_page <= addr && 2855 addr < (uintptr_t)ph->ph_page + pp->pr_alloc->pa_pagesz; 2856 } 2857 2858 static bool 2859 pool_in_item(struct pool *pp, void *item, uintptr_t addr) 2860 { 2861 2862 return (uintptr_t)item <= addr && addr < (uintptr_t)item + pp->pr_size; 2863 } 2864 2865 static bool 2866 pool_in_cg(struct pool *pp, struct pool_cache_group *pcg, uintptr_t addr) 2867 { 2868 int i; 2869 2870 if (pcg == NULL) { 2871 return false; 2872 } 2873 for (i = 0; i < pcg->pcg_avail; i++) { 2874 if (pool_in_item(pp, pcg->pcg_objects[i].pcgo_va, addr)) { 2875 return true; 2876 } 2877 } 2878 return false; 2879 } 2880 2881 static bool 2882 pool_allocated(struct pool *pp, struct pool_item_header *ph, uintptr_t addr) 2883 { 2884 2885 if ((pp->pr_roflags & PR_NOTOUCH) != 0) { 2886 unsigned int idx = pr_item_notouch_index(pp, ph, (void *)addr); 2887 pool_item_bitmap_t *bitmap = 2888 ph->ph_bitmap + (idx / BITMAP_SIZE); 2889 pool_item_bitmap_t mask = 1 << (idx & BITMAP_MASK); 2890 2891 return (*bitmap & mask) == 0; 2892 } else { 2893 struct pool_item *pi; 2894 2895 LIST_FOREACH(pi, &ph->ph_itemlist, pi_list) { 2896 if (pool_in_item(pp, pi, addr)) { 2897 return false; 2898 } 2899 } 2900 return true; 2901 } 2902 } 2903 2904 void 2905 pool_whatis(uintptr_t addr, void (*pr)(const char *, ...)) 2906 { 2907 struct pool *pp; 2908 2909 TAILQ_FOREACH(pp, &pool_head, pr_poollist) { 2910 struct pool_item_header *ph; 2911 uintptr_t item; 2912 bool allocated = true; 2913 bool incache = false; 2914 bool incpucache = false; 2915 char cpucachestr[32]; 2916 2917 if ((pp->pr_roflags & PR_PHINPAGE) != 0) { 2918 LIST_FOREACH(ph, &pp->pr_fullpages, ph_pagelist) { 2919 if (pool_in_page(pp, ph, addr)) { 2920 goto found; 2921 } 2922 } 2923 LIST_FOREACH(ph, &pp->pr_partpages, ph_pagelist) { 2924 if (pool_in_page(pp, ph, addr)) { 2925 allocated = 2926 pool_allocated(pp, ph, addr); 2927 goto found; 2928 } 2929 } 2930 LIST_FOREACH(ph, &pp->pr_emptypages, ph_pagelist) { 2931 if (pool_in_page(pp, ph, addr)) { 2932 allocated = false; 2933 goto found; 2934 } 2935 } 2936 continue; 2937 } else { 2938 ph = pr_find_pagehead_noalign(pp, (void *)addr); 2939 if (ph == NULL || !pool_in_page(pp, ph, addr)) { 2940 continue; 2941 } 2942 allocated = pool_allocated(pp, ph, addr); 2943 } 2944 found: 2945 if (allocated && pp->pr_cache) { 2946 pool_cache_t pc = pp->pr_cache; 2947 struct pool_cache_group *pcg; 2948 int i; 2949 2950 for (pcg = pc->pc_fullgroups; pcg != NULL; 2951 pcg = pcg->pcg_next) { 2952 if (pool_in_cg(pp, pcg, addr)) { 2953 incache = true; 2954 goto print; 2955 } 2956 } 2957 for (i = 0; i < __arraycount(pc->pc_cpus); i++) { 2958 pool_cache_cpu_t *cc; 2959 2960 if ((cc = pc->pc_cpus[i]) == NULL) { 2961 continue; 2962 } 2963 if (pool_in_cg(pp, cc->cc_current, addr) || 2964 pool_in_cg(pp, cc->cc_previous, addr)) { 2965 struct cpu_info *ci = 2966 cpu_lookup(i); 2967 2968 incpucache = true; 2969 snprintf(cpucachestr, 2970 sizeof(cpucachestr), 2971 "cached by CPU %u", 2972 ci->ci_index); 2973 goto print; 2974 } 2975 } 2976 } 2977 print: 2978 item = (uintptr_t)ph->ph_page + ph->ph_off; 2979 item = item + rounddown(addr - item, pp->pr_size); 2980 (*pr)("%p is %p+%zu in POOL '%s' (%s)\n", 2981 (void *)addr, item, (size_t)(addr - item), 2982 pp->pr_wchan, 2983 incpucache ? cpucachestr : 2984 incache ? "cached" : allocated ? "allocated" : "free"); 2985 } 2986 } 2987 #endif /* defined(DDB) */ 2988