1 /* $NetBSD: subr_pool.c,v 1.174 2009/09/13 18:45:11 pooka Exp $ */ 2 3 /*- 4 * Copyright (c) 1997, 1999, 2000, 2002, 2007, 2008 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Paul Kranenburg; by Jason R. Thorpe of the Numerical Aerospace 9 * Simulation Facility, NASA Ames Research Center, and by Andrew Doran. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 #include <sys/cdefs.h> 34 __KERNEL_RCSID(0, "$NetBSD: subr_pool.c,v 1.174 2009/09/13 18:45:11 pooka Exp $"); 35 36 #include "opt_ddb.h" 37 #include "opt_pool.h" 38 #include "opt_poollog.h" 39 #include "opt_lockdebug.h" 40 41 #include <sys/param.h> 42 #include <sys/systm.h> 43 #include <sys/bitops.h> 44 #include <sys/proc.h> 45 #include <sys/errno.h> 46 #include <sys/kernel.h> 47 #include <sys/malloc.h> 48 #include <sys/pool.h> 49 #include <sys/syslog.h> 50 #include <sys/debug.h> 51 #include <sys/lockdebug.h> 52 #include <sys/xcall.h> 53 #include <sys/cpu.h> 54 #include <sys/atomic.h> 55 56 #include <uvm/uvm.h> 57 58 /* 59 * Pool resource management utility. 60 * 61 * Memory is allocated in pages which are split into pieces according to 62 * the pool item size. Each page is kept on one of three lists in the 63 * pool structure: `pr_emptypages', `pr_fullpages' and `pr_partpages', 64 * for empty, full and partially-full pages respectively. The individual 65 * pool items are on a linked list headed by `ph_itemlist' in each page 66 * header. The memory for building the page list is either taken from 67 * the allocated pages themselves (for small pool items) or taken from 68 * an internal pool of page headers (`phpool'). 69 */ 70 71 /* List of all pools */ 72 static TAILQ_HEAD(, pool) pool_head = TAILQ_HEAD_INITIALIZER(pool_head); 73 74 /* Private pool for page header structures */ 75 #define PHPOOL_MAX 8 76 static struct pool phpool[PHPOOL_MAX]; 77 #define PHPOOL_FREELIST_NELEM(idx) \ 78 (((idx) == 0) ? 0 : BITMAP_SIZE * (1 << (idx))) 79 80 #ifdef POOL_SUBPAGE 81 /* Pool of subpages for use by normal pools. */ 82 static struct pool psppool; 83 #endif 84 85 static SLIST_HEAD(, pool_allocator) pa_deferinitq = 86 SLIST_HEAD_INITIALIZER(pa_deferinitq); 87 88 static void *pool_page_alloc_meta(struct pool *, int); 89 static void pool_page_free_meta(struct pool *, void *); 90 91 /* allocator for pool metadata */ 92 struct pool_allocator pool_allocator_meta = { 93 pool_page_alloc_meta, pool_page_free_meta, 94 .pa_backingmapptr = &kmem_map, 95 }; 96 97 /* # of seconds to retain page after last use */ 98 int pool_inactive_time = 10; 99 100 /* Next candidate for drainage (see pool_drain()) */ 101 static struct pool *drainpp; 102 103 /* This lock protects both pool_head and drainpp. */ 104 static kmutex_t pool_head_lock; 105 static kcondvar_t pool_busy; 106 107 typedef uint32_t pool_item_bitmap_t; 108 #define BITMAP_SIZE (CHAR_BIT * sizeof(pool_item_bitmap_t)) 109 #define BITMAP_MASK (BITMAP_SIZE - 1) 110 111 struct pool_item_header { 112 /* Page headers */ 113 LIST_ENTRY(pool_item_header) 114 ph_pagelist; /* pool page list */ 115 SPLAY_ENTRY(pool_item_header) 116 ph_node; /* Off-page page headers */ 117 void * ph_page; /* this page's address */ 118 uint32_t ph_time; /* last referenced */ 119 uint16_t ph_nmissing; /* # of chunks in use */ 120 uint16_t ph_off; /* start offset in page */ 121 union { 122 /* !PR_NOTOUCH */ 123 struct { 124 LIST_HEAD(, pool_item) 125 phu_itemlist; /* chunk list for this page */ 126 } phu_normal; 127 /* PR_NOTOUCH */ 128 struct { 129 pool_item_bitmap_t phu_bitmap[1]; 130 } phu_notouch; 131 } ph_u; 132 }; 133 #define ph_itemlist ph_u.phu_normal.phu_itemlist 134 #define ph_bitmap ph_u.phu_notouch.phu_bitmap 135 136 struct pool_item { 137 #ifdef DIAGNOSTIC 138 u_int pi_magic; 139 #endif 140 #define PI_MAGIC 0xdeaddeadU 141 /* Other entries use only this list entry */ 142 LIST_ENTRY(pool_item) pi_list; 143 }; 144 145 #define POOL_NEEDS_CATCHUP(pp) \ 146 ((pp)->pr_nitems < (pp)->pr_minitems) 147 148 /* 149 * Pool cache management. 150 * 151 * Pool caches provide a way for constructed objects to be cached by the 152 * pool subsystem. This can lead to performance improvements by avoiding 153 * needless object construction/destruction; it is deferred until absolutely 154 * necessary. 155 * 156 * Caches are grouped into cache groups. Each cache group references up 157 * to PCG_NUMOBJECTS constructed objects. When a cache allocates an 158 * object from the pool, it calls the object's constructor and places it 159 * into a cache group. When a cache group frees an object back to the 160 * pool, it first calls the object's destructor. This allows the object 161 * to persist in constructed form while freed to the cache. 162 * 163 * The pool references each cache, so that when a pool is drained by the 164 * pagedaemon, it can drain each individual cache as well. Each time a 165 * cache is drained, the most idle cache group is freed to the pool in 166 * its entirety. 167 * 168 * Pool caches are layed on top of pools. By layering them, we can avoid 169 * the complexity of cache management for pools which would not benefit 170 * from it. 171 */ 172 173 static struct pool pcg_normal_pool; 174 static struct pool pcg_large_pool; 175 static struct pool cache_pool; 176 static struct pool cache_cpu_pool; 177 178 /* List of all caches. */ 179 TAILQ_HEAD(,pool_cache) pool_cache_head = 180 TAILQ_HEAD_INITIALIZER(pool_cache_head); 181 182 int pool_cache_disable; /* global disable for caching */ 183 static const pcg_t pcg_dummy; /* zero sized: always empty, yet always full */ 184 185 static bool pool_cache_put_slow(pool_cache_cpu_t *, int, 186 void *); 187 static bool pool_cache_get_slow(pool_cache_cpu_t *, int, 188 void **, paddr_t *, int); 189 static void pool_cache_cpu_init1(struct cpu_info *, pool_cache_t); 190 static void pool_cache_invalidate_groups(pool_cache_t, pcg_t *); 191 static void pool_cache_xcall(pool_cache_t); 192 193 static int pool_catchup(struct pool *); 194 static void pool_prime_page(struct pool *, void *, 195 struct pool_item_header *); 196 static void pool_update_curpage(struct pool *); 197 198 static int pool_grow(struct pool *, int); 199 static void *pool_allocator_alloc(struct pool *, int); 200 static void pool_allocator_free(struct pool *, void *); 201 202 static void pool_print_pagelist(struct pool *, struct pool_pagelist *, 203 void (*)(const char *, ...)); 204 static void pool_print1(struct pool *, const char *, 205 void (*)(const char *, ...)); 206 207 static int pool_chk_page(struct pool *, const char *, 208 struct pool_item_header *); 209 210 /* 211 * Pool log entry. An array of these is allocated in pool_init(). 212 */ 213 struct pool_log { 214 const char *pl_file; 215 long pl_line; 216 int pl_action; 217 #define PRLOG_GET 1 218 #define PRLOG_PUT 2 219 void *pl_addr; 220 }; 221 222 #ifdef POOL_DIAGNOSTIC 223 /* Number of entries in pool log buffers */ 224 #ifndef POOL_LOGSIZE 225 #define POOL_LOGSIZE 10 226 #endif 227 228 int pool_logsize = POOL_LOGSIZE; 229 230 static inline void 231 pr_log(struct pool *pp, void *v, int action, const char *file, long line) 232 { 233 int n = pp->pr_curlogentry; 234 struct pool_log *pl; 235 236 if ((pp->pr_roflags & PR_LOGGING) == 0) 237 return; 238 239 /* 240 * Fill in the current entry. Wrap around and overwrite 241 * the oldest entry if necessary. 242 */ 243 pl = &pp->pr_log[n]; 244 pl->pl_file = file; 245 pl->pl_line = line; 246 pl->pl_action = action; 247 pl->pl_addr = v; 248 if (++n >= pp->pr_logsize) 249 n = 0; 250 pp->pr_curlogentry = n; 251 } 252 253 static void 254 pr_printlog(struct pool *pp, struct pool_item *pi, 255 void (*pr)(const char *, ...)) 256 { 257 int i = pp->pr_logsize; 258 int n = pp->pr_curlogentry; 259 260 if ((pp->pr_roflags & PR_LOGGING) == 0) 261 return; 262 263 /* 264 * Print all entries in this pool's log. 265 */ 266 while (i-- > 0) { 267 struct pool_log *pl = &pp->pr_log[n]; 268 if (pl->pl_action != 0) { 269 if (pi == NULL || pi == pl->pl_addr) { 270 (*pr)("\tlog entry %d:\n", i); 271 (*pr)("\t\taction = %s, addr = %p\n", 272 pl->pl_action == PRLOG_GET ? "get" : "put", 273 pl->pl_addr); 274 (*pr)("\t\tfile: %s at line %lu\n", 275 pl->pl_file, pl->pl_line); 276 } 277 } 278 if (++n >= pp->pr_logsize) 279 n = 0; 280 } 281 } 282 283 static inline void 284 pr_enter(struct pool *pp, const char *file, long line) 285 { 286 287 if (__predict_false(pp->pr_entered_file != NULL)) { 288 printf("pool %s: reentrancy at file %s line %ld\n", 289 pp->pr_wchan, file, line); 290 printf(" previous entry at file %s line %ld\n", 291 pp->pr_entered_file, pp->pr_entered_line); 292 panic("pr_enter"); 293 } 294 295 pp->pr_entered_file = file; 296 pp->pr_entered_line = line; 297 } 298 299 static inline void 300 pr_leave(struct pool *pp) 301 { 302 303 if (__predict_false(pp->pr_entered_file == NULL)) { 304 printf("pool %s not entered?\n", pp->pr_wchan); 305 panic("pr_leave"); 306 } 307 308 pp->pr_entered_file = NULL; 309 pp->pr_entered_line = 0; 310 } 311 312 static inline void 313 pr_enter_check(struct pool *pp, void (*pr)(const char *, ...)) 314 { 315 316 if (pp->pr_entered_file != NULL) 317 (*pr)("\n\tcurrently entered from file %s line %ld\n", 318 pp->pr_entered_file, pp->pr_entered_line); 319 } 320 #else 321 #define pr_log(pp, v, action, file, line) 322 #define pr_printlog(pp, pi, pr) 323 #define pr_enter(pp, file, line) 324 #define pr_leave(pp) 325 #define pr_enter_check(pp, pr) 326 #endif /* POOL_DIAGNOSTIC */ 327 328 static inline unsigned int 329 pr_item_notouch_index(const struct pool *pp, const struct pool_item_header *ph, 330 const void *v) 331 { 332 const char *cp = v; 333 unsigned int idx; 334 335 KASSERT(pp->pr_roflags & PR_NOTOUCH); 336 idx = (cp - (char *)ph->ph_page - ph->ph_off) / pp->pr_size; 337 KASSERT(idx < pp->pr_itemsperpage); 338 return idx; 339 } 340 341 static inline void 342 pr_item_notouch_put(const struct pool *pp, struct pool_item_header *ph, 343 void *obj) 344 { 345 unsigned int idx = pr_item_notouch_index(pp, ph, obj); 346 pool_item_bitmap_t *bitmap = ph->ph_bitmap + (idx / BITMAP_SIZE); 347 pool_item_bitmap_t mask = 1 << (idx & BITMAP_MASK); 348 349 KASSERT((*bitmap & mask) == 0); 350 *bitmap |= mask; 351 } 352 353 static inline void * 354 pr_item_notouch_get(const struct pool *pp, struct pool_item_header *ph) 355 { 356 pool_item_bitmap_t *bitmap = ph->ph_bitmap; 357 unsigned int idx; 358 int i; 359 360 for (i = 0; ; i++) { 361 int bit; 362 363 KASSERT((i * BITMAP_SIZE) < pp->pr_itemsperpage); 364 bit = ffs32(bitmap[i]); 365 if (bit) { 366 pool_item_bitmap_t mask; 367 368 bit--; 369 idx = (i * BITMAP_SIZE) + bit; 370 mask = 1 << bit; 371 KASSERT((bitmap[i] & mask) != 0); 372 bitmap[i] &= ~mask; 373 break; 374 } 375 } 376 KASSERT(idx < pp->pr_itemsperpage); 377 return (char *)ph->ph_page + ph->ph_off + idx * pp->pr_size; 378 } 379 380 static inline void 381 pr_item_notouch_init(const struct pool *pp, struct pool_item_header *ph) 382 { 383 pool_item_bitmap_t *bitmap = ph->ph_bitmap; 384 const int n = howmany(pp->pr_itemsperpage, BITMAP_SIZE); 385 int i; 386 387 for (i = 0; i < n; i++) { 388 bitmap[i] = (pool_item_bitmap_t)-1; 389 } 390 } 391 392 static inline int 393 phtree_compare(struct pool_item_header *a, struct pool_item_header *b) 394 { 395 396 /* 397 * we consider pool_item_header with smaller ph_page bigger. 398 * (this unnatural ordering is for the benefit of pr_find_pagehead.) 399 */ 400 401 if (a->ph_page < b->ph_page) 402 return (1); 403 else if (a->ph_page > b->ph_page) 404 return (-1); 405 else 406 return (0); 407 } 408 409 SPLAY_PROTOTYPE(phtree, pool_item_header, ph_node, phtree_compare); 410 SPLAY_GENERATE(phtree, pool_item_header, ph_node, phtree_compare); 411 412 static inline struct pool_item_header * 413 pr_find_pagehead_noalign(struct pool *pp, void *v) 414 { 415 struct pool_item_header *ph, tmp; 416 417 tmp.ph_page = (void *)(uintptr_t)v; 418 ph = SPLAY_FIND(phtree, &pp->pr_phtree, &tmp); 419 if (ph == NULL) { 420 ph = SPLAY_ROOT(&pp->pr_phtree); 421 if (ph != NULL && phtree_compare(&tmp, ph) >= 0) { 422 ph = SPLAY_NEXT(phtree, &pp->pr_phtree, ph); 423 } 424 KASSERT(ph == NULL || phtree_compare(&tmp, ph) < 0); 425 } 426 427 return ph; 428 } 429 430 /* 431 * Return the pool page header based on item address. 432 */ 433 static inline struct pool_item_header * 434 pr_find_pagehead(struct pool *pp, void *v) 435 { 436 struct pool_item_header *ph, tmp; 437 438 if ((pp->pr_roflags & PR_NOALIGN) != 0) { 439 ph = pr_find_pagehead_noalign(pp, v); 440 } else { 441 void *page = 442 (void *)((uintptr_t)v & pp->pr_alloc->pa_pagemask); 443 444 if ((pp->pr_roflags & PR_PHINPAGE) != 0) { 445 ph = (struct pool_item_header *)((char *)page + pp->pr_phoffset); 446 } else { 447 tmp.ph_page = page; 448 ph = SPLAY_FIND(phtree, &pp->pr_phtree, &tmp); 449 } 450 } 451 452 KASSERT(ph == NULL || ((pp->pr_roflags & PR_PHINPAGE) != 0) || 453 ((char *)ph->ph_page <= (char *)v && 454 (char *)v < (char *)ph->ph_page + pp->pr_alloc->pa_pagesz)); 455 return ph; 456 } 457 458 static void 459 pr_pagelist_free(struct pool *pp, struct pool_pagelist *pq) 460 { 461 struct pool_item_header *ph; 462 463 while ((ph = LIST_FIRST(pq)) != NULL) { 464 LIST_REMOVE(ph, ph_pagelist); 465 pool_allocator_free(pp, ph->ph_page); 466 if ((pp->pr_roflags & PR_PHINPAGE) == 0) 467 pool_put(pp->pr_phpool, ph); 468 } 469 } 470 471 /* 472 * Remove a page from the pool. 473 */ 474 static inline void 475 pr_rmpage(struct pool *pp, struct pool_item_header *ph, 476 struct pool_pagelist *pq) 477 { 478 479 KASSERT(mutex_owned(&pp->pr_lock)); 480 481 /* 482 * If the page was idle, decrement the idle page count. 483 */ 484 if (ph->ph_nmissing == 0) { 485 #ifdef DIAGNOSTIC 486 if (pp->pr_nidle == 0) 487 panic("pr_rmpage: nidle inconsistent"); 488 if (pp->pr_nitems < pp->pr_itemsperpage) 489 panic("pr_rmpage: nitems inconsistent"); 490 #endif 491 pp->pr_nidle--; 492 } 493 494 pp->pr_nitems -= pp->pr_itemsperpage; 495 496 /* 497 * Unlink the page from the pool and queue it for release. 498 */ 499 LIST_REMOVE(ph, ph_pagelist); 500 if ((pp->pr_roflags & PR_PHINPAGE) == 0) 501 SPLAY_REMOVE(phtree, &pp->pr_phtree, ph); 502 LIST_INSERT_HEAD(pq, ph, ph_pagelist); 503 504 pp->pr_npages--; 505 pp->pr_npagefree++; 506 507 pool_update_curpage(pp); 508 } 509 510 static bool 511 pa_starved_p(struct pool_allocator *pa) 512 { 513 514 if (pa->pa_backingmap != NULL) { 515 return vm_map_starved_p(pa->pa_backingmap); 516 } 517 return false; 518 } 519 520 static int 521 pool_reclaim_callback(struct callback_entry *ce, void *obj, void *arg) 522 { 523 struct pool *pp = obj; 524 struct pool_allocator *pa = pp->pr_alloc; 525 526 KASSERT(&pp->pr_reclaimerentry == ce); 527 pool_reclaim(pp); 528 if (!pa_starved_p(pa)) { 529 return CALLBACK_CHAIN_ABORT; 530 } 531 return CALLBACK_CHAIN_CONTINUE; 532 } 533 534 static void 535 pool_reclaim_register(struct pool *pp) 536 { 537 struct vm_map *map = pp->pr_alloc->pa_backingmap; 538 int s; 539 540 if (map == NULL) { 541 return; 542 } 543 544 s = splvm(); /* not necessary for INTRSAFE maps, but don't care. */ 545 callback_register(&vm_map_to_kernel(map)->vmk_reclaim_callback, 546 &pp->pr_reclaimerentry, pp, pool_reclaim_callback); 547 splx(s); 548 } 549 550 static void 551 pool_reclaim_unregister(struct pool *pp) 552 { 553 struct vm_map *map = pp->pr_alloc->pa_backingmap; 554 int s; 555 556 if (map == NULL) { 557 return; 558 } 559 560 s = splvm(); /* not necessary for INTRSAFE maps, but don't care. */ 561 callback_unregister(&vm_map_to_kernel(map)->vmk_reclaim_callback, 562 &pp->pr_reclaimerentry); 563 splx(s); 564 } 565 566 static void 567 pa_reclaim_register(struct pool_allocator *pa) 568 { 569 struct vm_map *map = *pa->pa_backingmapptr; 570 struct pool *pp; 571 572 KASSERT(pa->pa_backingmap == NULL); 573 if (map == NULL) { 574 SLIST_INSERT_HEAD(&pa_deferinitq, pa, pa_q); 575 return; 576 } 577 pa->pa_backingmap = map; 578 TAILQ_FOREACH(pp, &pa->pa_list, pr_alloc_list) { 579 pool_reclaim_register(pp); 580 } 581 } 582 583 /* 584 * Initialize all the pools listed in the "pools" link set. 585 */ 586 void 587 pool_subsystem_init(void) 588 { 589 struct pool_allocator *pa; 590 591 mutex_init(&pool_head_lock, MUTEX_DEFAULT, IPL_NONE); 592 cv_init(&pool_busy, "poolbusy"); 593 594 while ((pa = SLIST_FIRST(&pa_deferinitq)) != NULL) { 595 KASSERT(pa->pa_backingmapptr != NULL); 596 KASSERT(*pa->pa_backingmapptr != NULL); 597 SLIST_REMOVE_HEAD(&pa_deferinitq, pa_q); 598 pa_reclaim_register(pa); 599 } 600 601 pool_init(&cache_pool, sizeof(struct pool_cache), coherency_unit, 602 0, 0, "pcache", &pool_allocator_nointr, IPL_NONE); 603 604 pool_init(&cache_cpu_pool, sizeof(pool_cache_cpu_t), coherency_unit, 605 0, 0, "pcachecpu", &pool_allocator_nointr, IPL_NONE); 606 } 607 608 /* 609 * Initialize the given pool resource structure. 610 * 611 * We export this routine to allow other kernel parts to declare 612 * static pools that must be initialized before malloc() is available. 613 */ 614 void 615 pool_init(struct pool *pp, size_t size, u_int align, u_int ioff, int flags, 616 const char *wchan, struct pool_allocator *palloc, int ipl) 617 { 618 struct pool *pp1; 619 size_t trysize, phsize; 620 int off, slack; 621 622 #ifdef DEBUG 623 /* 624 * Check that the pool hasn't already been initialised and 625 * added to the list of all pools. 626 */ 627 TAILQ_FOREACH(pp1, &pool_head, pr_poollist) { 628 if (pp == pp1) 629 panic("pool_init: pool %s already initialised", 630 wchan); 631 } 632 #endif 633 634 #ifdef POOL_DIAGNOSTIC 635 /* 636 * Always log if POOL_DIAGNOSTIC is defined. 637 */ 638 if (pool_logsize != 0) 639 flags |= PR_LOGGING; 640 #endif 641 642 if (palloc == NULL) 643 palloc = &pool_allocator_kmem; 644 #ifdef POOL_SUBPAGE 645 if (size > palloc->pa_pagesz) { 646 if (palloc == &pool_allocator_kmem) 647 palloc = &pool_allocator_kmem_fullpage; 648 else if (palloc == &pool_allocator_nointr) 649 palloc = &pool_allocator_nointr_fullpage; 650 } 651 #endif /* POOL_SUBPAGE */ 652 if ((palloc->pa_flags & PA_INITIALIZED) == 0) { 653 if (palloc->pa_pagesz == 0) 654 palloc->pa_pagesz = PAGE_SIZE; 655 656 TAILQ_INIT(&palloc->pa_list); 657 658 mutex_init(&palloc->pa_lock, MUTEX_DEFAULT, IPL_VM); 659 palloc->pa_pagemask = ~(palloc->pa_pagesz - 1); 660 palloc->pa_pageshift = ffs(palloc->pa_pagesz) - 1; 661 662 if (palloc->pa_backingmapptr != NULL) { 663 pa_reclaim_register(palloc); 664 } 665 palloc->pa_flags |= PA_INITIALIZED; 666 } 667 668 if (align == 0) 669 align = ALIGN(1); 670 671 if ((flags & PR_NOTOUCH) == 0 && size < sizeof(struct pool_item)) 672 size = sizeof(struct pool_item); 673 674 size = roundup(size, align); 675 #ifdef DIAGNOSTIC 676 if (size > palloc->pa_pagesz) 677 panic("pool_init: pool item size (%zu) too large", size); 678 #endif 679 680 /* 681 * Initialize the pool structure. 682 */ 683 LIST_INIT(&pp->pr_emptypages); 684 LIST_INIT(&pp->pr_fullpages); 685 LIST_INIT(&pp->pr_partpages); 686 pp->pr_cache = NULL; 687 pp->pr_curpage = NULL; 688 pp->pr_npages = 0; 689 pp->pr_minitems = 0; 690 pp->pr_minpages = 0; 691 pp->pr_maxpages = UINT_MAX; 692 pp->pr_roflags = flags; 693 pp->pr_flags = 0; 694 pp->pr_size = size; 695 pp->pr_align = align; 696 pp->pr_wchan = wchan; 697 pp->pr_alloc = palloc; 698 pp->pr_nitems = 0; 699 pp->pr_nout = 0; 700 pp->pr_hardlimit = UINT_MAX; 701 pp->pr_hardlimit_warning = NULL; 702 pp->pr_hardlimit_ratecap.tv_sec = 0; 703 pp->pr_hardlimit_ratecap.tv_usec = 0; 704 pp->pr_hardlimit_warning_last.tv_sec = 0; 705 pp->pr_hardlimit_warning_last.tv_usec = 0; 706 pp->pr_drain_hook = NULL; 707 pp->pr_drain_hook_arg = NULL; 708 pp->pr_freecheck = NULL; 709 710 /* 711 * Decide whether to put the page header off page to avoid 712 * wasting too large a part of the page or too big item. 713 * Off-page page headers go on a hash table, so we can match 714 * a returned item with its header based on the page address. 715 * We use 1/16 of the page size and about 8 times of the item 716 * size as the threshold (XXX: tune) 717 * 718 * However, we'll put the header into the page if we can put 719 * it without wasting any items. 720 * 721 * Silently enforce `0 <= ioff < align'. 722 */ 723 pp->pr_itemoffset = ioff %= align; 724 /* See the comment below about reserved bytes. */ 725 trysize = palloc->pa_pagesz - ((align - ioff) % align); 726 phsize = ALIGN(sizeof(struct pool_item_header)); 727 if ((pp->pr_roflags & (PR_NOTOUCH | PR_NOALIGN)) == 0 && 728 (pp->pr_size < MIN(palloc->pa_pagesz / 16, phsize << 3) || 729 trysize / pp->pr_size == (trysize - phsize) / pp->pr_size)) { 730 /* Use the end of the page for the page header */ 731 pp->pr_roflags |= PR_PHINPAGE; 732 pp->pr_phoffset = off = palloc->pa_pagesz - phsize; 733 } else { 734 /* The page header will be taken from our page header pool */ 735 pp->pr_phoffset = 0; 736 off = palloc->pa_pagesz; 737 SPLAY_INIT(&pp->pr_phtree); 738 } 739 740 /* 741 * Alignment is to take place at `ioff' within the item. This means 742 * we must reserve up to `align - 1' bytes on the page to allow 743 * appropriate positioning of each item. 744 */ 745 pp->pr_itemsperpage = (off - ((align - ioff) % align)) / pp->pr_size; 746 KASSERT(pp->pr_itemsperpage != 0); 747 if ((pp->pr_roflags & PR_NOTOUCH)) { 748 int idx; 749 750 for (idx = 0; pp->pr_itemsperpage > PHPOOL_FREELIST_NELEM(idx); 751 idx++) { 752 /* nothing */ 753 } 754 if (idx >= PHPOOL_MAX) { 755 /* 756 * if you see this panic, consider to tweak 757 * PHPOOL_MAX and PHPOOL_FREELIST_NELEM. 758 */ 759 panic("%s: too large itemsperpage(%d) for PR_NOTOUCH", 760 pp->pr_wchan, pp->pr_itemsperpage); 761 } 762 pp->pr_phpool = &phpool[idx]; 763 } else if ((pp->pr_roflags & PR_PHINPAGE) == 0) { 764 pp->pr_phpool = &phpool[0]; 765 } 766 #if defined(DIAGNOSTIC) 767 else { 768 pp->pr_phpool = NULL; 769 } 770 #endif 771 772 /* 773 * Use the slack between the chunks and the page header 774 * for "cache coloring". 775 */ 776 slack = off - pp->pr_itemsperpage * pp->pr_size; 777 pp->pr_maxcolor = (slack / align) * align; 778 pp->pr_curcolor = 0; 779 780 pp->pr_nget = 0; 781 pp->pr_nfail = 0; 782 pp->pr_nput = 0; 783 pp->pr_npagealloc = 0; 784 pp->pr_npagefree = 0; 785 pp->pr_hiwat = 0; 786 pp->pr_nidle = 0; 787 pp->pr_refcnt = 0; 788 789 #ifdef POOL_DIAGNOSTIC 790 if (flags & PR_LOGGING) { 791 if (kmem_map == NULL || 792 (pp->pr_log = malloc(pool_logsize * sizeof(struct pool_log), 793 M_TEMP, M_NOWAIT)) == NULL) 794 pp->pr_roflags &= ~PR_LOGGING; 795 pp->pr_curlogentry = 0; 796 pp->pr_logsize = pool_logsize; 797 } 798 #endif 799 800 pp->pr_entered_file = NULL; 801 pp->pr_entered_line = 0; 802 803 mutex_init(&pp->pr_lock, MUTEX_DEFAULT, ipl); 804 cv_init(&pp->pr_cv, wchan); 805 pp->pr_ipl = ipl; 806 807 /* 808 * Initialize private page header pool and cache magazine pool if we 809 * haven't done so yet. 810 * XXX LOCKING. 811 */ 812 if (phpool[0].pr_size == 0) { 813 int idx; 814 for (idx = 0; idx < PHPOOL_MAX; idx++) { 815 static char phpool_names[PHPOOL_MAX][6+1+6+1]; 816 int nelem; 817 size_t sz; 818 819 nelem = PHPOOL_FREELIST_NELEM(idx); 820 snprintf(phpool_names[idx], sizeof(phpool_names[idx]), 821 "phpool-%d", nelem); 822 sz = sizeof(struct pool_item_header); 823 if (nelem) { 824 sz = offsetof(struct pool_item_header, 825 ph_bitmap[howmany(nelem, BITMAP_SIZE)]); 826 } 827 pool_init(&phpool[idx], sz, 0, 0, 0, 828 phpool_names[idx], &pool_allocator_meta, IPL_VM); 829 } 830 #ifdef POOL_SUBPAGE 831 pool_init(&psppool, POOL_SUBPAGE, POOL_SUBPAGE, 0, 832 PR_RECURSIVE, "psppool", &pool_allocator_meta, IPL_VM); 833 #endif 834 835 size = sizeof(pcg_t) + 836 (PCG_NOBJECTS_NORMAL - 1) * sizeof(pcgpair_t); 837 pool_init(&pcg_normal_pool, size, coherency_unit, 0, 0, 838 "pcgnormal", &pool_allocator_meta, IPL_VM); 839 840 size = sizeof(pcg_t) + 841 (PCG_NOBJECTS_LARGE - 1) * sizeof(pcgpair_t); 842 pool_init(&pcg_large_pool, size, coherency_unit, 0, 0, 843 "pcglarge", &pool_allocator_meta, IPL_VM); 844 } 845 846 /* Insert into the list of all pools. */ 847 if (__predict_true(!cold)) 848 mutex_enter(&pool_head_lock); 849 TAILQ_FOREACH(pp1, &pool_head, pr_poollist) { 850 if (strcmp(pp1->pr_wchan, pp->pr_wchan) > 0) 851 break; 852 } 853 if (pp1 == NULL) 854 TAILQ_INSERT_TAIL(&pool_head, pp, pr_poollist); 855 else 856 TAILQ_INSERT_BEFORE(pp1, pp, pr_poollist); 857 if (__predict_true(!cold)) 858 mutex_exit(&pool_head_lock); 859 860 /* Insert this into the list of pools using this allocator. */ 861 if (__predict_true(!cold)) 862 mutex_enter(&palloc->pa_lock); 863 TAILQ_INSERT_TAIL(&palloc->pa_list, pp, pr_alloc_list); 864 if (__predict_true(!cold)) 865 mutex_exit(&palloc->pa_lock); 866 867 pool_reclaim_register(pp); 868 } 869 870 /* 871 * De-commision a pool resource. 872 */ 873 void 874 pool_destroy(struct pool *pp) 875 { 876 struct pool_pagelist pq; 877 struct pool_item_header *ph; 878 879 /* Remove from global pool list */ 880 mutex_enter(&pool_head_lock); 881 while (pp->pr_refcnt != 0) 882 cv_wait(&pool_busy, &pool_head_lock); 883 TAILQ_REMOVE(&pool_head, pp, pr_poollist); 884 if (drainpp == pp) 885 drainpp = NULL; 886 mutex_exit(&pool_head_lock); 887 888 /* Remove this pool from its allocator's list of pools. */ 889 pool_reclaim_unregister(pp); 890 mutex_enter(&pp->pr_alloc->pa_lock); 891 TAILQ_REMOVE(&pp->pr_alloc->pa_list, pp, pr_alloc_list); 892 mutex_exit(&pp->pr_alloc->pa_lock); 893 894 mutex_enter(&pp->pr_lock); 895 896 KASSERT(pp->pr_cache == NULL); 897 898 #ifdef DIAGNOSTIC 899 if (pp->pr_nout != 0) { 900 pr_printlog(pp, NULL, printf); 901 panic("pool_destroy: pool busy: still out: %u", 902 pp->pr_nout); 903 } 904 #endif 905 906 KASSERT(LIST_EMPTY(&pp->pr_fullpages)); 907 KASSERT(LIST_EMPTY(&pp->pr_partpages)); 908 909 /* Remove all pages */ 910 LIST_INIT(&pq); 911 while ((ph = LIST_FIRST(&pp->pr_emptypages)) != NULL) 912 pr_rmpage(pp, ph, &pq); 913 914 mutex_exit(&pp->pr_lock); 915 916 pr_pagelist_free(pp, &pq); 917 918 #ifdef POOL_DIAGNOSTIC 919 if ((pp->pr_roflags & PR_LOGGING) != 0) 920 free(pp->pr_log, M_TEMP); 921 #endif 922 923 cv_destroy(&pp->pr_cv); 924 mutex_destroy(&pp->pr_lock); 925 } 926 927 void 928 pool_set_drain_hook(struct pool *pp, void (*fn)(void *, int), void *arg) 929 { 930 931 /* XXX no locking -- must be used just after pool_init() */ 932 #ifdef DIAGNOSTIC 933 if (pp->pr_drain_hook != NULL) 934 panic("pool_set_drain_hook(%s): already set", pp->pr_wchan); 935 #endif 936 pp->pr_drain_hook = fn; 937 pp->pr_drain_hook_arg = arg; 938 } 939 940 static struct pool_item_header * 941 pool_alloc_item_header(struct pool *pp, void *storage, int flags) 942 { 943 struct pool_item_header *ph; 944 945 if ((pp->pr_roflags & PR_PHINPAGE) != 0) 946 ph = (struct pool_item_header *) ((char *)storage + pp->pr_phoffset); 947 else 948 ph = pool_get(pp->pr_phpool, flags); 949 950 return (ph); 951 } 952 953 /* 954 * Grab an item from the pool. 955 */ 956 void * 957 #ifdef POOL_DIAGNOSTIC 958 _pool_get(struct pool *pp, int flags, const char *file, long line) 959 #else 960 pool_get(struct pool *pp, int flags) 961 #endif 962 { 963 struct pool_item *pi; 964 struct pool_item_header *ph; 965 void *v; 966 967 #ifdef DIAGNOSTIC 968 if (__predict_false(pp->pr_itemsperpage == 0)) 969 panic("pool_get: pool %p: pr_itemsperpage is zero, " 970 "pool not initialized?", pp); 971 if (__predict_false(curlwp == NULL && doing_shutdown == 0 && 972 (flags & PR_WAITOK) != 0)) 973 panic("pool_get: %s: must have NOWAIT", pp->pr_wchan); 974 975 #endif /* DIAGNOSTIC */ 976 #ifdef LOCKDEBUG 977 if (flags & PR_WAITOK) { 978 ASSERT_SLEEPABLE(); 979 } 980 #endif 981 982 mutex_enter(&pp->pr_lock); 983 pr_enter(pp, file, line); 984 985 startover: 986 /* 987 * Check to see if we've reached the hard limit. If we have, 988 * and we can wait, then wait until an item has been returned to 989 * the pool. 990 */ 991 #ifdef DIAGNOSTIC 992 if (__predict_false(pp->pr_nout > pp->pr_hardlimit)) { 993 pr_leave(pp); 994 mutex_exit(&pp->pr_lock); 995 panic("pool_get: %s: crossed hard limit", pp->pr_wchan); 996 } 997 #endif 998 if (__predict_false(pp->pr_nout == pp->pr_hardlimit)) { 999 if (pp->pr_drain_hook != NULL) { 1000 /* 1001 * Since the drain hook is going to free things 1002 * back to the pool, unlock, call the hook, re-lock, 1003 * and check the hardlimit condition again. 1004 */ 1005 pr_leave(pp); 1006 mutex_exit(&pp->pr_lock); 1007 (*pp->pr_drain_hook)(pp->pr_drain_hook_arg, flags); 1008 mutex_enter(&pp->pr_lock); 1009 pr_enter(pp, file, line); 1010 if (pp->pr_nout < pp->pr_hardlimit) 1011 goto startover; 1012 } 1013 1014 if ((flags & PR_WAITOK) && !(flags & PR_LIMITFAIL)) { 1015 /* 1016 * XXX: A warning isn't logged in this case. Should 1017 * it be? 1018 */ 1019 pp->pr_flags |= PR_WANTED; 1020 pr_leave(pp); 1021 cv_wait(&pp->pr_cv, &pp->pr_lock); 1022 pr_enter(pp, file, line); 1023 goto startover; 1024 } 1025 1026 /* 1027 * Log a message that the hard limit has been hit. 1028 */ 1029 if (pp->pr_hardlimit_warning != NULL && 1030 ratecheck(&pp->pr_hardlimit_warning_last, 1031 &pp->pr_hardlimit_ratecap)) 1032 log(LOG_ERR, "%s\n", pp->pr_hardlimit_warning); 1033 1034 pp->pr_nfail++; 1035 1036 pr_leave(pp); 1037 mutex_exit(&pp->pr_lock); 1038 return (NULL); 1039 } 1040 1041 /* 1042 * The convention we use is that if `curpage' is not NULL, then 1043 * it points at a non-empty bucket. In particular, `curpage' 1044 * never points at a page header which has PR_PHINPAGE set and 1045 * has no items in its bucket. 1046 */ 1047 if ((ph = pp->pr_curpage) == NULL) { 1048 int error; 1049 1050 #ifdef DIAGNOSTIC 1051 if (pp->pr_nitems != 0) { 1052 mutex_exit(&pp->pr_lock); 1053 printf("pool_get: %s: curpage NULL, nitems %u\n", 1054 pp->pr_wchan, pp->pr_nitems); 1055 panic("pool_get: nitems inconsistent"); 1056 } 1057 #endif 1058 1059 /* 1060 * Call the back-end page allocator for more memory. 1061 * Release the pool lock, as the back-end page allocator 1062 * may block. 1063 */ 1064 pr_leave(pp); 1065 error = pool_grow(pp, flags); 1066 pr_enter(pp, file, line); 1067 if (error != 0) { 1068 /* 1069 * We were unable to allocate a page or item 1070 * header, but we released the lock during 1071 * allocation, so perhaps items were freed 1072 * back to the pool. Check for this case. 1073 */ 1074 if (pp->pr_curpage != NULL) 1075 goto startover; 1076 1077 pp->pr_nfail++; 1078 pr_leave(pp); 1079 mutex_exit(&pp->pr_lock); 1080 return (NULL); 1081 } 1082 1083 /* Start the allocation process over. */ 1084 goto startover; 1085 } 1086 if (pp->pr_roflags & PR_NOTOUCH) { 1087 #ifdef DIAGNOSTIC 1088 if (__predict_false(ph->ph_nmissing == pp->pr_itemsperpage)) { 1089 pr_leave(pp); 1090 mutex_exit(&pp->pr_lock); 1091 panic("pool_get: %s: page empty", pp->pr_wchan); 1092 } 1093 #endif 1094 v = pr_item_notouch_get(pp, ph); 1095 #ifdef POOL_DIAGNOSTIC 1096 pr_log(pp, v, PRLOG_GET, file, line); 1097 #endif 1098 } else { 1099 v = pi = LIST_FIRST(&ph->ph_itemlist); 1100 if (__predict_false(v == NULL)) { 1101 pr_leave(pp); 1102 mutex_exit(&pp->pr_lock); 1103 panic("pool_get: %s: page empty", pp->pr_wchan); 1104 } 1105 #ifdef DIAGNOSTIC 1106 if (__predict_false(pp->pr_nitems == 0)) { 1107 pr_leave(pp); 1108 mutex_exit(&pp->pr_lock); 1109 printf("pool_get: %s: items on itemlist, nitems %u\n", 1110 pp->pr_wchan, pp->pr_nitems); 1111 panic("pool_get: nitems inconsistent"); 1112 } 1113 #endif 1114 1115 #ifdef POOL_DIAGNOSTIC 1116 pr_log(pp, v, PRLOG_GET, file, line); 1117 #endif 1118 1119 #ifdef DIAGNOSTIC 1120 if (__predict_false(pi->pi_magic != PI_MAGIC)) { 1121 pr_printlog(pp, pi, printf); 1122 panic("pool_get(%s): free list modified: " 1123 "magic=%x; page %p; item addr %p\n", 1124 pp->pr_wchan, pi->pi_magic, ph->ph_page, pi); 1125 } 1126 #endif 1127 1128 /* 1129 * Remove from item list. 1130 */ 1131 LIST_REMOVE(pi, pi_list); 1132 } 1133 pp->pr_nitems--; 1134 pp->pr_nout++; 1135 if (ph->ph_nmissing == 0) { 1136 #ifdef DIAGNOSTIC 1137 if (__predict_false(pp->pr_nidle == 0)) 1138 panic("pool_get: nidle inconsistent"); 1139 #endif 1140 pp->pr_nidle--; 1141 1142 /* 1143 * This page was previously empty. Move it to the list of 1144 * partially-full pages. This page is already curpage. 1145 */ 1146 LIST_REMOVE(ph, ph_pagelist); 1147 LIST_INSERT_HEAD(&pp->pr_partpages, ph, ph_pagelist); 1148 } 1149 ph->ph_nmissing++; 1150 if (ph->ph_nmissing == pp->pr_itemsperpage) { 1151 #ifdef DIAGNOSTIC 1152 if (__predict_false((pp->pr_roflags & PR_NOTOUCH) == 0 && 1153 !LIST_EMPTY(&ph->ph_itemlist))) { 1154 pr_leave(pp); 1155 mutex_exit(&pp->pr_lock); 1156 panic("pool_get: %s: nmissing inconsistent", 1157 pp->pr_wchan); 1158 } 1159 #endif 1160 /* 1161 * This page is now full. Move it to the full list 1162 * and select a new current page. 1163 */ 1164 LIST_REMOVE(ph, ph_pagelist); 1165 LIST_INSERT_HEAD(&pp->pr_fullpages, ph, ph_pagelist); 1166 pool_update_curpage(pp); 1167 } 1168 1169 pp->pr_nget++; 1170 pr_leave(pp); 1171 1172 /* 1173 * If we have a low water mark and we are now below that low 1174 * water mark, add more items to the pool. 1175 */ 1176 if (POOL_NEEDS_CATCHUP(pp) && pool_catchup(pp) != 0) { 1177 /* 1178 * XXX: Should we log a warning? Should we set up a timeout 1179 * to try again in a second or so? The latter could break 1180 * a caller's assumptions about interrupt protection, etc. 1181 */ 1182 } 1183 1184 mutex_exit(&pp->pr_lock); 1185 KASSERT((((vaddr_t)v + pp->pr_itemoffset) & (pp->pr_align - 1)) == 0); 1186 FREECHECK_OUT(&pp->pr_freecheck, v); 1187 return (v); 1188 } 1189 1190 /* 1191 * Internal version of pool_put(). Pool is already locked/entered. 1192 */ 1193 static void 1194 pool_do_put(struct pool *pp, void *v, struct pool_pagelist *pq) 1195 { 1196 struct pool_item *pi = v; 1197 struct pool_item_header *ph; 1198 1199 KASSERT(mutex_owned(&pp->pr_lock)); 1200 FREECHECK_IN(&pp->pr_freecheck, v); 1201 LOCKDEBUG_MEM_CHECK(v, pp->pr_size); 1202 1203 #ifdef DIAGNOSTIC 1204 if (__predict_false(pp->pr_nout == 0)) { 1205 printf("pool %s: putting with none out\n", 1206 pp->pr_wchan); 1207 panic("pool_put"); 1208 } 1209 #endif 1210 1211 if (__predict_false((ph = pr_find_pagehead(pp, v)) == NULL)) { 1212 pr_printlog(pp, NULL, printf); 1213 panic("pool_put: %s: page header missing", pp->pr_wchan); 1214 } 1215 1216 /* 1217 * Return to item list. 1218 */ 1219 if (pp->pr_roflags & PR_NOTOUCH) { 1220 pr_item_notouch_put(pp, ph, v); 1221 } else { 1222 #ifdef DIAGNOSTIC 1223 pi->pi_magic = PI_MAGIC; 1224 #endif 1225 #ifdef DEBUG 1226 { 1227 int i, *ip = v; 1228 1229 for (i = 0; i < pp->pr_size / sizeof(int); i++) { 1230 *ip++ = PI_MAGIC; 1231 } 1232 } 1233 #endif 1234 1235 LIST_INSERT_HEAD(&ph->ph_itemlist, pi, pi_list); 1236 } 1237 KDASSERT(ph->ph_nmissing != 0); 1238 ph->ph_nmissing--; 1239 pp->pr_nput++; 1240 pp->pr_nitems++; 1241 pp->pr_nout--; 1242 1243 /* Cancel "pool empty" condition if it exists */ 1244 if (pp->pr_curpage == NULL) 1245 pp->pr_curpage = ph; 1246 1247 if (pp->pr_flags & PR_WANTED) { 1248 pp->pr_flags &= ~PR_WANTED; 1249 cv_broadcast(&pp->pr_cv); 1250 } 1251 1252 /* 1253 * If this page is now empty, do one of two things: 1254 * 1255 * (1) If we have more pages than the page high water mark, 1256 * free the page back to the system. ONLY CONSIDER 1257 * FREEING BACK A PAGE IF WE HAVE MORE THAN OUR MINIMUM PAGE 1258 * CLAIM. 1259 * 1260 * (2) Otherwise, move the page to the empty page list. 1261 * 1262 * Either way, select a new current page (so we use a partially-full 1263 * page if one is available). 1264 */ 1265 if (ph->ph_nmissing == 0) { 1266 pp->pr_nidle++; 1267 if (pp->pr_npages > pp->pr_minpages && 1268 pp->pr_npages > pp->pr_maxpages) { 1269 pr_rmpage(pp, ph, pq); 1270 } else { 1271 LIST_REMOVE(ph, ph_pagelist); 1272 LIST_INSERT_HEAD(&pp->pr_emptypages, ph, ph_pagelist); 1273 1274 /* 1275 * Update the timestamp on the page. A page must 1276 * be idle for some period of time before it can 1277 * be reclaimed by the pagedaemon. This minimizes 1278 * ping-pong'ing for memory. 1279 * 1280 * note for 64-bit time_t: truncating to 32-bit is not 1281 * a problem for our usage. 1282 */ 1283 ph->ph_time = time_uptime; 1284 } 1285 pool_update_curpage(pp); 1286 } 1287 1288 /* 1289 * If the page was previously completely full, move it to the 1290 * partially-full list and make it the current page. The next 1291 * allocation will get the item from this page, instead of 1292 * further fragmenting the pool. 1293 */ 1294 else if (ph->ph_nmissing == (pp->pr_itemsperpage - 1)) { 1295 LIST_REMOVE(ph, ph_pagelist); 1296 LIST_INSERT_HEAD(&pp->pr_partpages, ph, ph_pagelist); 1297 pp->pr_curpage = ph; 1298 } 1299 } 1300 1301 /* 1302 * Return resource to the pool. 1303 */ 1304 #ifdef POOL_DIAGNOSTIC 1305 void 1306 _pool_put(struct pool *pp, void *v, const char *file, long line) 1307 { 1308 struct pool_pagelist pq; 1309 1310 LIST_INIT(&pq); 1311 1312 mutex_enter(&pp->pr_lock); 1313 pr_enter(pp, file, line); 1314 1315 pr_log(pp, v, PRLOG_PUT, file, line); 1316 1317 pool_do_put(pp, v, &pq); 1318 1319 pr_leave(pp); 1320 mutex_exit(&pp->pr_lock); 1321 1322 pr_pagelist_free(pp, &pq); 1323 } 1324 #undef pool_put 1325 #endif /* POOL_DIAGNOSTIC */ 1326 1327 void 1328 pool_put(struct pool *pp, void *v) 1329 { 1330 struct pool_pagelist pq; 1331 1332 LIST_INIT(&pq); 1333 1334 mutex_enter(&pp->pr_lock); 1335 pool_do_put(pp, v, &pq); 1336 mutex_exit(&pp->pr_lock); 1337 1338 pr_pagelist_free(pp, &pq); 1339 } 1340 1341 #ifdef POOL_DIAGNOSTIC 1342 #define pool_put(h, v) _pool_put((h), (v), __FILE__, __LINE__) 1343 #endif 1344 1345 /* 1346 * pool_grow: grow a pool by a page. 1347 * 1348 * => called with pool locked. 1349 * => unlock and relock the pool. 1350 * => return with pool locked. 1351 */ 1352 1353 static int 1354 pool_grow(struct pool *pp, int flags) 1355 { 1356 struct pool_item_header *ph = NULL; 1357 char *cp; 1358 1359 mutex_exit(&pp->pr_lock); 1360 cp = pool_allocator_alloc(pp, flags); 1361 if (__predict_true(cp != NULL)) { 1362 ph = pool_alloc_item_header(pp, cp, flags); 1363 } 1364 if (__predict_false(cp == NULL || ph == NULL)) { 1365 if (cp != NULL) { 1366 pool_allocator_free(pp, cp); 1367 } 1368 mutex_enter(&pp->pr_lock); 1369 return ENOMEM; 1370 } 1371 1372 mutex_enter(&pp->pr_lock); 1373 pool_prime_page(pp, cp, ph); 1374 pp->pr_npagealloc++; 1375 return 0; 1376 } 1377 1378 /* 1379 * Add N items to the pool. 1380 */ 1381 int 1382 pool_prime(struct pool *pp, int n) 1383 { 1384 int newpages; 1385 int error = 0; 1386 1387 mutex_enter(&pp->pr_lock); 1388 1389 newpages = roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 1390 1391 while (newpages-- > 0) { 1392 error = pool_grow(pp, PR_NOWAIT); 1393 if (error) { 1394 break; 1395 } 1396 pp->pr_minpages++; 1397 } 1398 1399 if (pp->pr_minpages >= pp->pr_maxpages) 1400 pp->pr_maxpages = pp->pr_minpages + 1; /* XXX */ 1401 1402 mutex_exit(&pp->pr_lock); 1403 return error; 1404 } 1405 1406 /* 1407 * Add a page worth of items to the pool. 1408 * 1409 * Note, we must be called with the pool descriptor LOCKED. 1410 */ 1411 static void 1412 pool_prime_page(struct pool *pp, void *storage, struct pool_item_header *ph) 1413 { 1414 struct pool_item *pi; 1415 void *cp = storage; 1416 const unsigned int align = pp->pr_align; 1417 const unsigned int ioff = pp->pr_itemoffset; 1418 int n; 1419 1420 KASSERT(mutex_owned(&pp->pr_lock)); 1421 1422 #ifdef DIAGNOSTIC 1423 if ((pp->pr_roflags & PR_NOALIGN) == 0 && 1424 ((uintptr_t)cp & (pp->pr_alloc->pa_pagesz - 1)) != 0) 1425 panic("pool_prime_page: %s: unaligned page", pp->pr_wchan); 1426 #endif 1427 1428 /* 1429 * Insert page header. 1430 */ 1431 LIST_INSERT_HEAD(&pp->pr_emptypages, ph, ph_pagelist); 1432 LIST_INIT(&ph->ph_itemlist); 1433 ph->ph_page = storage; 1434 ph->ph_nmissing = 0; 1435 ph->ph_time = time_uptime; 1436 if ((pp->pr_roflags & PR_PHINPAGE) == 0) 1437 SPLAY_INSERT(phtree, &pp->pr_phtree, ph); 1438 1439 pp->pr_nidle++; 1440 1441 /* 1442 * Color this page. 1443 */ 1444 ph->ph_off = pp->pr_curcolor; 1445 cp = (char *)cp + ph->ph_off; 1446 if ((pp->pr_curcolor += align) > pp->pr_maxcolor) 1447 pp->pr_curcolor = 0; 1448 1449 /* 1450 * Adjust storage to apply aligment to `pr_itemoffset' in each item. 1451 */ 1452 if (ioff != 0) 1453 cp = (char *)cp + align - ioff; 1454 1455 KASSERT((((vaddr_t)cp + ioff) & (align - 1)) == 0); 1456 1457 /* 1458 * Insert remaining chunks on the bucket list. 1459 */ 1460 n = pp->pr_itemsperpage; 1461 pp->pr_nitems += n; 1462 1463 if (pp->pr_roflags & PR_NOTOUCH) { 1464 pr_item_notouch_init(pp, ph); 1465 } else { 1466 while (n--) { 1467 pi = (struct pool_item *)cp; 1468 1469 KASSERT(((((vaddr_t)pi) + ioff) & (align - 1)) == 0); 1470 1471 /* Insert on page list */ 1472 LIST_INSERT_HEAD(&ph->ph_itemlist, pi, pi_list); 1473 #ifdef DIAGNOSTIC 1474 pi->pi_magic = PI_MAGIC; 1475 #endif 1476 cp = (char *)cp + pp->pr_size; 1477 1478 KASSERT((((vaddr_t)cp + ioff) & (align - 1)) == 0); 1479 } 1480 } 1481 1482 /* 1483 * If the pool was depleted, point at the new page. 1484 */ 1485 if (pp->pr_curpage == NULL) 1486 pp->pr_curpage = ph; 1487 1488 if (++pp->pr_npages > pp->pr_hiwat) 1489 pp->pr_hiwat = pp->pr_npages; 1490 } 1491 1492 /* 1493 * Used by pool_get() when nitems drops below the low water mark. This 1494 * is used to catch up pr_nitems with the low water mark. 1495 * 1496 * Note 1, we never wait for memory here, we let the caller decide what to do. 1497 * 1498 * Note 2, we must be called with the pool already locked, and we return 1499 * with it locked. 1500 */ 1501 static int 1502 pool_catchup(struct pool *pp) 1503 { 1504 int error = 0; 1505 1506 while (POOL_NEEDS_CATCHUP(pp)) { 1507 error = pool_grow(pp, PR_NOWAIT); 1508 if (error) { 1509 break; 1510 } 1511 } 1512 return error; 1513 } 1514 1515 static void 1516 pool_update_curpage(struct pool *pp) 1517 { 1518 1519 pp->pr_curpage = LIST_FIRST(&pp->pr_partpages); 1520 if (pp->pr_curpage == NULL) { 1521 pp->pr_curpage = LIST_FIRST(&pp->pr_emptypages); 1522 } 1523 KASSERT((pp->pr_curpage == NULL && pp->pr_nitems == 0) || 1524 (pp->pr_curpage != NULL && pp->pr_nitems > 0)); 1525 } 1526 1527 void 1528 pool_setlowat(struct pool *pp, int n) 1529 { 1530 1531 mutex_enter(&pp->pr_lock); 1532 1533 pp->pr_minitems = n; 1534 pp->pr_minpages = (n == 0) 1535 ? 0 1536 : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 1537 1538 /* Make sure we're caught up with the newly-set low water mark. */ 1539 if (POOL_NEEDS_CATCHUP(pp) && pool_catchup(pp) != 0) { 1540 /* 1541 * XXX: Should we log a warning? Should we set up a timeout 1542 * to try again in a second or so? The latter could break 1543 * a caller's assumptions about interrupt protection, etc. 1544 */ 1545 } 1546 1547 mutex_exit(&pp->pr_lock); 1548 } 1549 1550 void 1551 pool_sethiwat(struct pool *pp, int n) 1552 { 1553 1554 mutex_enter(&pp->pr_lock); 1555 1556 pp->pr_maxpages = (n == 0) 1557 ? 0 1558 : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 1559 1560 mutex_exit(&pp->pr_lock); 1561 } 1562 1563 void 1564 pool_sethardlimit(struct pool *pp, int n, const char *warnmess, int ratecap) 1565 { 1566 1567 mutex_enter(&pp->pr_lock); 1568 1569 pp->pr_hardlimit = n; 1570 pp->pr_hardlimit_warning = warnmess; 1571 pp->pr_hardlimit_ratecap.tv_sec = ratecap; 1572 pp->pr_hardlimit_warning_last.tv_sec = 0; 1573 pp->pr_hardlimit_warning_last.tv_usec = 0; 1574 1575 /* 1576 * In-line version of pool_sethiwat(), because we don't want to 1577 * release the lock. 1578 */ 1579 pp->pr_maxpages = (n == 0) 1580 ? 0 1581 : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 1582 1583 mutex_exit(&pp->pr_lock); 1584 } 1585 1586 /* 1587 * Release all complete pages that have not been used recently. 1588 */ 1589 int 1590 #ifdef POOL_DIAGNOSTIC 1591 _pool_reclaim(struct pool *pp, const char *file, long line) 1592 #else 1593 pool_reclaim(struct pool *pp) 1594 #endif 1595 { 1596 struct pool_item_header *ph, *phnext; 1597 struct pool_pagelist pq; 1598 uint32_t curtime; 1599 bool klock; 1600 int rv; 1601 1602 if (pp->pr_drain_hook != NULL) { 1603 /* 1604 * The drain hook must be called with the pool unlocked. 1605 */ 1606 (*pp->pr_drain_hook)(pp->pr_drain_hook_arg, PR_NOWAIT); 1607 } 1608 1609 /* 1610 * XXXSMP Because we do not want to cause non-MPSAFE code 1611 * to block. 1612 */ 1613 if (pp->pr_ipl == IPL_SOFTNET || pp->pr_ipl == IPL_SOFTCLOCK || 1614 pp->pr_ipl == IPL_SOFTSERIAL) { 1615 KERNEL_LOCK(1, NULL); 1616 klock = true; 1617 } else 1618 klock = false; 1619 1620 /* Reclaim items from the pool's cache (if any). */ 1621 if (pp->pr_cache != NULL) 1622 pool_cache_invalidate(pp->pr_cache); 1623 1624 if (mutex_tryenter(&pp->pr_lock) == 0) { 1625 if (klock) { 1626 KERNEL_UNLOCK_ONE(NULL); 1627 } 1628 return (0); 1629 } 1630 pr_enter(pp, file, line); 1631 1632 LIST_INIT(&pq); 1633 1634 curtime = time_uptime; 1635 1636 for (ph = LIST_FIRST(&pp->pr_emptypages); ph != NULL; ph = phnext) { 1637 phnext = LIST_NEXT(ph, ph_pagelist); 1638 1639 /* Check our minimum page claim */ 1640 if (pp->pr_npages <= pp->pr_minpages) 1641 break; 1642 1643 KASSERT(ph->ph_nmissing == 0); 1644 if (curtime - ph->ph_time < pool_inactive_time 1645 && !pa_starved_p(pp->pr_alloc)) 1646 continue; 1647 1648 /* 1649 * If freeing this page would put us below 1650 * the low water mark, stop now. 1651 */ 1652 if ((pp->pr_nitems - pp->pr_itemsperpage) < 1653 pp->pr_minitems) 1654 break; 1655 1656 pr_rmpage(pp, ph, &pq); 1657 } 1658 1659 pr_leave(pp); 1660 mutex_exit(&pp->pr_lock); 1661 1662 if (LIST_EMPTY(&pq)) 1663 rv = 0; 1664 else { 1665 pr_pagelist_free(pp, &pq); 1666 rv = 1; 1667 } 1668 1669 if (klock) { 1670 KERNEL_UNLOCK_ONE(NULL); 1671 } 1672 1673 return (rv); 1674 } 1675 1676 /* 1677 * Drain pools, one at a time. This is a two stage process; 1678 * drain_start kicks off a cross call to drain CPU-level caches 1679 * if the pool has an associated pool_cache. drain_end waits 1680 * for those cross calls to finish, and then drains the cache 1681 * (if any) and pool. 1682 * 1683 * Note, must never be called from interrupt context. 1684 */ 1685 void 1686 pool_drain_start(struct pool **ppp, uint64_t *wp) 1687 { 1688 struct pool *pp; 1689 1690 KASSERT(!TAILQ_EMPTY(&pool_head)); 1691 1692 pp = NULL; 1693 1694 /* Find next pool to drain, and add a reference. */ 1695 mutex_enter(&pool_head_lock); 1696 do { 1697 if (drainpp == NULL) { 1698 drainpp = TAILQ_FIRST(&pool_head); 1699 } 1700 if (drainpp != NULL) { 1701 pp = drainpp; 1702 drainpp = TAILQ_NEXT(pp, pr_poollist); 1703 } 1704 /* 1705 * Skip completely idle pools. We depend on at least 1706 * one pool in the system being active. 1707 */ 1708 } while (pp == NULL || pp->pr_npages == 0); 1709 pp->pr_refcnt++; 1710 mutex_exit(&pool_head_lock); 1711 1712 /* If there is a pool_cache, drain CPU level caches. */ 1713 *ppp = pp; 1714 if (pp->pr_cache != NULL) { 1715 *wp = xc_broadcast(0, (xcfunc_t)pool_cache_xcall, 1716 pp->pr_cache, NULL); 1717 } 1718 } 1719 1720 void 1721 pool_drain_end(struct pool *pp, uint64_t where) 1722 { 1723 1724 if (pp == NULL) 1725 return; 1726 1727 KASSERT(pp->pr_refcnt > 0); 1728 1729 /* Wait for remote draining to complete. */ 1730 if (pp->pr_cache != NULL) 1731 xc_wait(where); 1732 1733 /* Drain the cache (if any) and pool.. */ 1734 pool_reclaim(pp); 1735 1736 /* Finally, unlock the pool. */ 1737 mutex_enter(&pool_head_lock); 1738 pp->pr_refcnt--; 1739 cv_broadcast(&pool_busy); 1740 mutex_exit(&pool_head_lock); 1741 } 1742 1743 /* 1744 * Diagnostic helpers. 1745 */ 1746 void 1747 pool_print(struct pool *pp, const char *modif) 1748 { 1749 1750 pool_print1(pp, modif, printf); 1751 } 1752 1753 void 1754 pool_printall(const char *modif, void (*pr)(const char *, ...)) 1755 { 1756 struct pool *pp; 1757 1758 TAILQ_FOREACH(pp, &pool_head, pr_poollist) { 1759 pool_printit(pp, modif, pr); 1760 } 1761 } 1762 1763 void 1764 pool_printit(struct pool *pp, const char *modif, void (*pr)(const char *, ...)) 1765 { 1766 1767 if (pp == NULL) { 1768 (*pr)("Must specify a pool to print.\n"); 1769 return; 1770 } 1771 1772 pool_print1(pp, modif, pr); 1773 } 1774 1775 static void 1776 pool_print_pagelist(struct pool *pp, struct pool_pagelist *pl, 1777 void (*pr)(const char *, ...)) 1778 { 1779 struct pool_item_header *ph; 1780 #ifdef DIAGNOSTIC 1781 struct pool_item *pi; 1782 #endif 1783 1784 LIST_FOREACH(ph, pl, ph_pagelist) { 1785 (*pr)("\t\tpage %p, nmissing %d, time %" PRIu32 "\n", 1786 ph->ph_page, ph->ph_nmissing, ph->ph_time); 1787 #ifdef DIAGNOSTIC 1788 if (!(pp->pr_roflags & PR_NOTOUCH)) { 1789 LIST_FOREACH(pi, &ph->ph_itemlist, pi_list) { 1790 if (pi->pi_magic != PI_MAGIC) { 1791 (*pr)("\t\t\titem %p, magic 0x%x\n", 1792 pi, pi->pi_magic); 1793 } 1794 } 1795 } 1796 #endif 1797 } 1798 } 1799 1800 static void 1801 pool_print1(struct pool *pp, const char *modif, void (*pr)(const char *, ...)) 1802 { 1803 struct pool_item_header *ph; 1804 pool_cache_t pc; 1805 pcg_t *pcg; 1806 pool_cache_cpu_t *cc; 1807 uint64_t cpuhit, cpumiss; 1808 int i, print_log = 0, print_pagelist = 0, print_cache = 0; 1809 char c; 1810 1811 while ((c = *modif++) != '\0') { 1812 if (c == 'l') 1813 print_log = 1; 1814 if (c == 'p') 1815 print_pagelist = 1; 1816 if (c == 'c') 1817 print_cache = 1; 1818 } 1819 1820 if ((pc = pp->pr_cache) != NULL) { 1821 (*pr)("POOL CACHE"); 1822 } else { 1823 (*pr)("POOL"); 1824 } 1825 1826 (*pr)(" %s: size %u, align %u, ioff %u, roflags 0x%08x\n", 1827 pp->pr_wchan, pp->pr_size, pp->pr_align, pp->pr_itemoffset, 1828 pp->pr_roflags); 1829 (*pr)("\talloc %p\n", pp->pr_alloc); 1830 (*pr)("\tminitems %u, minpages %u, maxpages %u, npages %u\n", 1831 pp->pr_minitems, pp->pr_minpages, pp->pr_maxpages, pp->pr_npages); 1832 (*pr)("\titemsperpage %u, nitems %u, nout %u, hardlimit %u\n", 1833 pp->pr_itemsperpage, pp->pr_nitems, pp->pr_nout, pp->pr_hardlimit); 1834 1835 (*pr)("\tnget %lu, nfail %lu, nput %lu\n", 1836 pp->pr_nget, pp->pr_nfail, pp->pr_nput); 1837 (*pr)("\tnpagealloc %lu, npagefree %lu, hiwat %u, nidle %lu\n", 1838 pp->pr_npagealloc, pp->pr_npagefree, pp->pr_hiwat, pp->pr_nidle); 1839 1840 if (print_pagelist == 0) 1841 goto skip_pagelist; 1842 1843 if ((ph = LIST_FIRST(&pp->pr_emptypages)) != NULL) 1844 (*pr)("\n\tempty page list:\n"); 1845 pool_print_pagelist(pp, &pp->pr_emptypages, pr); 1846 if ((ph = LIST_FIRST(&pp->pr_fullpages)) != NULL) 1847 (*pr)("\n\tfull page list:\n"); 1848 pool_print_pagelist(pp, &pp->pr_fullpages, pr); 1849 if ((ph = LIST_FIRST(&pp->pr_partpages)) != NULL) 1850 (*pr)("\n\tpartial-page list:\n"); 1851 pool_print_pagelist(pp, &pp->pr_partpages, pr); 1852 1853 if (pp->pr_curpage == NULL) 1854 (*pr)("\tno current page\n"); 1855 else 1856 (*pr)("\tcurpage %p\n", pp->pr_curpage->ph_page); 1857 1858 skip_pagelist: 1859 if (print_log == 0) 1860 goto skip_log; 1861 1862 (*pr)("\n"); 1863 if ((pp->pr_roflags & PR_LOGGING) == 0) 1864 (*pr)("\tno log\n"); 1865 else { 1866 pr_printlog(pp, NULL, pr); 1867 } 1868 1869 skip_log: 1870 1871 #define PR_GROUPLIST(pcg) \ 1872 (*pr)("\t\tgroup %p: avail %d\n", pcg, pcg->pcg_avail); \ 1873 for (i = 0; i < pcg->pcg_size; i++) { \ 1874 if (pcg->pcg_objects[i].pcgo_pa != \ 1875 POOL_PADDR_INVALID) { \ 1876 (*pr)("\t\t\t%p, 0x%llx\n", \ 1877 pcg->pcg_objects[i].pcgo_va, \ 1878 (unsigned long long) \ 1879 pcg->pcg_objects[i].pcgo_pa); \ 1880 } else { \ 1881 (*pr)("\t\t\t%p\n", \ 1882 pcg->pcg_objects[i].pcgo_va); \ 1883 } \ 1884 } 1885 1886 if (pc != NULL) { 1887 cpuhit = 0; 1888 cpumiss = 0; 1889 for (i = 0; i < MAXCPUS; i++) { 1890 if ((cc = pc->pc_cpus[i]) == NULL) 1891 continue; 1892 cpuhit += cc->cc_hits; 1893 cpumiss += cc->cc_misses; 1894 } 1895 (*pr)("\tcpu layer hits %llu misses %llu\n", cpuhit, cpumiss); 1896 (*pr)("\tcache layer hits %llu misses %llu\n", 1897 pc->pc_hits, pc->pc_misses); 1898 (*pr)("\tcache layer entry uncontended %llu contended %llu\n", 1899 pc->pc_hits + pc->pc_misses - pc->pc_contended, 1900 pc->pc_contended); 1901 (*pr)("\tcache layer empty groups %u full groups %u\n", 1902 pc->pc_nempty, pc->pc_nfull); 1903 if (print_cache) { 1904 (*pr)("\tfull cache groups:\n"); 1905 for (pcg = pc->pc_fullgroups; pcg != NULL; 1906 pcg = pcg->pcg_next) { 1907 PR_GROUPLIST(pcg); 1908 } 1909 (*pr)("\tempty cache groups:\n"); 1910 for (pcg = pc->pc_emptygroups; pcg != NULL; 1911 pcg = pcg->pcg_next) { 1912 PR_GROUPLIST(pcg); 1913 } 1914 } 1915 } 1916 #undef PR_GROUPLIST 1917 1918 pr_enter_check(pp, pr); 1919 } 1920 1921 static int 1922 pool_chk_page(struct pool *pp, const char *label, struct pool_item_header *ph) 1923 { 1924 struct pool_item *pi; 1925 void *page; 1926 int n; 1927 1928 if ((pp->pr_roflags & PR_NOALIGN) == 0) { 1929 page = (void *)((uintptr_t)ph & pp->pr_alloc->pa_pagemask); 1930 if (page != ph->ph_page && 1931 (pp->pr_roflags & PR_PHINPAGE) != 0) { 1932 if (label != NULL) 1933 printf("%s: ", label); 1934 printf("pool(%p:%s): page inconsistency: page %p;" 1935 " at page head addr %p (p %p)\n", pp, 1936 pp->pr_wchan, ph->ph_page, 1937 ph, page); 1938 return 1; 1939 } 1940 } 1941 1942 if ((pp->pr_roflags & PR_NOTOUCH) != 0) 1943 return 0; 1944 1945 for (pi = LIST_FIRST(&ph->ph_itemlist), n = 0; 1946 pi != NULL; 1947 pi = LIST_NEXT(pi,pi_list), n++) { 1948 1949 #ifdef DIAGNOSTIC 1950 if (pi->pi_magic != PI_MAGIC) { 1951 if (label != NULL) 1952 printf("%s: ", label); 1953 printf("pool(%s): free list modified: magic=%x;" 1954 " page %p; item ordinal %d; addr %p\n", 1955 pp->pr_wchan, pi->pi_magic, ph->ph_page, 1956 n, pi); 1957 panic("pool"); 1958 } 1959 #endif 1960 if ((pp->pr_roflags & PR_NOALIGN) != 0) { 1961 continue; 1962 } 1963 page = (void *)((uintptr_t)pi & pp->pr_alloc->pa_pagemask); 1964 if (page == ph->ph_page) 1965 continue; 1966 1967 if (label != NULL) 1968 printf("%s: ", label); 1969 printf("pool(%p:%s): page inconsistency: page %p;" 1970 " item ordinal %d; addr %p (p %p)\n", pp, 1971 pp->pr_wchan, ph->ph_page, 1972 n, pi, page); 1973 return 1; 1974 } 1975 return 0; 1976 } 1977 1978 1979 int 1980 pool_chk(struct pool *pp, const char *label) 1981 { 1982 struct pool_item_header *ph; 1983 int r = 0; 1984 1985 mutex_enter(&pp->pr_lock); 1986 LIST_FOREACH(ph, &pp->pr_emptypages, ph_pagelist) { 1987 r = pool_chk_page(pp, label, ph); 1988 if (r) { 1989 goto out; 1990 } 1991 } 1992 LIST_FOREACH(ph, &pp->pr_fullpages, ph_pagelist) { 1993 r = pool_chk_page(pp, label, ph); 1994 if (r) { 1995 goto out; 1996 } 1997 } 1998 LIST_FOREACH(ph, &pp->pr_partpages, ph_pagelist) { 1999 r = pool_chk_page(pp, label, ph); 2000 if (r) { 2001 goto out; 2002 } 2003 } 2004 2005 out: 2006 mutex_exit(&pp->pr_lock); 2007 return (r); 2008 } 2009 2010 /* 2011 * pool_cache_init: 2012 * 2013 * Initialize a pool cache. 2014 */ 2015 pool_cache_t 2016 pool_cache_init(size_t size, u_int align, u_int align_offset, u_int flags, 2017 const char *wchan, struct pool_allocator *palloc, int ipl, 2018 int (*ctor)(void *, void *, int), void (*dtor)(void *, void *), void *arg) 2019 { 2020 pool_cache_t pc; 2021 2022 pc = pool_get(&cache_pool, PR_WAITOK); 2023 if (pc == NULL) 2024 return NULL; 2025 2026 pool_cache_bootstrap(pc, size, align, align_offset, flags, wchan, 2027 palloc, ipl, ctor, dtor, arg); 2028 2029 return pc; 2030 } 2031 2032 /* 2033 * pool_cache_bootstrap: 2034 * 2035 * Kernel-private version of pool_cache_init(). The caller 2036 * provides initial storage. 2037 */ 2038 void 2039 pool_cache_bootstrap(pool_cache_t pc, size_t size, u_int align, 2040 u_int align_offset, u_int flags, const char *wchan, 2041 struct pool_allocator *palloc, int ipl, 2042 int (*ctor)(void *, void *, int), void (*dtor)(void *, void *), 2043 void *arg) 2044 { 2045 CPU_INFO_ITERATOR cii; 2046 pool_cache_t pc1; 2047 struct cpu_info *ci; 2048 struct pool *pp; 2049 2050 pp = &pc->pc_pool; 2051 if (palloc == NULL && ipl == IPL_NONE) 2052 palloc = &pool_allocator_nointr; 2053 pool_init(pp, size, align, align_offset, flags, wchan, palloc, ipl); 2054 mutex_init(&pc->pc_lock, MUTEX_DEFAULT, ipl); 2055 2056 if (ctor == NULL) { 2057 ctor = (int (*)(void *, void *, int))nullop; 2058 } 2059 if (dtor == NULL) { 2060 dtor = (void (*)(void *, void *))nullop; 2061 } 2062 2063 pc->pc_emptygroups = NULL; 2064 pc->pc_fullgroups = NULL; 2065 pc->pc_partgroups = NULL; 2066 pc->pc_ctor = ctor; 2067 pc->pc_dtor = dtor; 2068 pc->pc_arg = arg; 2069 pc->pc_hits = 0; 2070 pc->pc_misses = 0; 2071 pc->pc_nempty = 0; 2072 pc->pc_npart = 0; 2073 pc->pc_nfull = 0; 2074 pc->pc_contended = 0; 2075 pc->pc_refcnt = 0; 2076 pc->pc_freecheck = NULL; 2077 2078 if ((flags & PR_LARGECACHE) != 0) { 2079 pc->pc_pcgsize = PCG_NOBJECTS_LARGE; 2080 pc->pc_pcgpool = &pcg_large_pool; 2081 } else { 2082 pc->pc_pcgsize = PCG_NOBJECTS_NORMAL; 2083 pc->pc_pcgpool = &pcg_normal_pool; 2084 } 2085 2086 /* Allocate per-CPU caches. */ 2087 memset(pc->pc_cpus, 0, sizeof(pc->pc_cpus)); 2088 pc->pc_ncpu = 0; 2089 if (ncpu < 2) { 2090 /* XXX For sparc: boot CPU is not attached yet. */ 2091 pool_cache_cpu_init1(curcpu(), pc); 2092 } else { 2093 for (CPU_INFO_FOREACH(cii, ci)) { 2094 pool_cache_cpu_init1(ci, pc); 2095 } 2096 } 2097 2098 /* Add to list of all pools. */ 2099 if (__predict_true(!cold)) 2100 mutex_enter(&pool_head_lock); 2101 TAILQ_FOREACH(pc1, &pool_cache_head, pc_cachelist) { 2102 if (strcmp(pc1->pc_pool.pr_wchan, pc->pc_pool.pr_wchan) > 0) 2103 break; 2104 } 2105 if (pc1 == NULL) 2106 TAILQ_INSERT_TAIL(&pool_cache_head, pc, pc_cachelist); 2107 else 2108 TAILQ_INSERT_BEFORE(pc1, pc, pc_cachelist); 2109 if (__predict_true(!cold)) 2110 mutex_exit(&pool_head_lock); 2111 2112 membar_sync(); 2113 pp->pr_cache = pc; 2114 } 2115 2116 /* 2117 * pool_cache_destroy: 2118 * 2119 * Destroy a pool cache. 2120 */ 2121 void 2122 pool_cache_destroy(pool_cache_t pc) 2123 { 2124 struct pool *pp = &pc->pc_pool; 2125 pool_cache_cpu_t *cc; 2126 pcg_t *pcg; 2127 int i; 2128 2129 /* Remove it from the global list. */ 2130 mutex_enter(&pool_head_lock); 2131 while (pc->pc_refcnt != 0) 2132 cv_wait(&pool_busy, &pool_head_lock); 2133 TAILQ_REMOVE(&pool_cache_head, pc, pc_cachelist); 2134 mutex_exit(&pool_head_lock); 2135 2136 /* First, invalidate the entire cache. */ 2137 pool_cache_invalidate(pc); 2138 2139 /* Disassociate it from the pool. */ 2140 mutex_enter(&pp->pr_lock); 2141 pp->pr_cache = NULL; 2142 mutex_exit(&pp->pr_lock); 2143 2144 /* Destroy per-CPU data */ 2145 for (i = 0; i < MAXCPUS; i++) { 2146 if ((cc = pc->pc_cpus[i]) == NULL) 2147 continue; 2148 if ((pcg = cc->cc_current) != &pcg_dummy) { 2149 pcg->pcg_next = NULL; 2150 pool_cache_invalidate_groups(pc, pcg); 2151 } 2152 if ((pcg = cc->cc_previous) != &pcg_dummy) { 2153 pcg->pcg_next = NULL; 2154 pool_cache_invalidate_groups(pc, pcg); 2155 } 2156 if (cc != &pc->pc_cpu0) 2157 pool_put(&cache_cpu_pool, cc); 2158 } 2159 2160 /* Finally, destroy it. */ 2161 mutex_destroy(&pc->pc_lock); 2162 pool_destroy(pp); 2163 pool_put(&cache_pool, pc); 2164 } 2165 2166 /* 2167 * pool_cache_cpu_init1: 2168 * 2169 * Called for each pool_cache whenever a new CPU is attached. 2170 */ 2171 static void 2172 pool_cache_cpu_init1(struct cpu_info *ci, pool_cache_t pc) 2173 { 2174 pool_cache_cpu_t *cc; 2175 int index; 2176 2177 index = ci->ci_index; 2178 2179 KASSERT(index < MAXCPUS); 2180 2181 if ((cc = pc->pc_cpus[index]) != NULL) { 2182 KASSERT(cc->cc_cpuindex == index); 2183 return; 2184 } 2185 2186 /* 2187 * The first CPU is 'free'. This needs to be the case for 2188 * bootstrap - we may not be able to allocate yet. 2189 */ 2190 if (pc->pc_ncpu == 0) { 2191 cc = &pc->pc_cpu0; 2192 pc->pc_ncpu = 1; 2193 } else { 2194 mutex_enter(&pc->pc_lock); 2195 pc->pc_ncpu++; 2196 mutex_exit(&pc->pc_lock); 2197 cc = pool_get(&cache_cpu_pool, PR_WAITOK); 2198 } 2199 2200 cc->cc_ipl = pc->pc_pool.pr_ipl; 2201 cc->cc_iplcookie = makeiplcookie(cc->cc_ipl); 2202 cc->cc_cache = pc; 2203 cc->cc_cpuindex = index; 2204 cc->cc_hits = 0; 2205 cc->cc_misses = 0; 2206 cc->cc_current = __UNCONST(&pcg_dummy); 2207 cc->cc_previous = __UNCONST(&pcg_dummy); 2208 2209 pc->pc_cpus[index] = cc; 2210 } 2211 2212 /* 2213 * pool_cache_cpu_init: 2214 * 2215 * Called whenever a new CPU is attached. 2216 */ 2217 void 2218 pool_cache_cpu_init(struct cpu_info *ci) 2219 { 2220 pool_cache_t pc; 2221 2222 mutex_enter(&pool_head_lock); 2223 TAILQ_FOREACH(pc, &pool_cache_head, pc_cachelist) { 2224 pc->pc_refcnt++; 2225 mutex_exit(&pool_head_lock); 2226 2227 pool_cache_cpu_init1(ci, pc); 2228 2229 mutex_enter(&pool_head_lock); 2230 pc->pc_refcnt--; 2231 cv_broadcast(&pool_busy); 2232 } 2233 mutex_exit(&pool_head_lock); 2234 } 2235 2236 /* 2237 * pool_cache_reclaim: 2238 * 2239 * Reclaim memory from a pool cache. 2240 */ 2241 bool 2242 pool_cache_reclaim(pool_cache_t pc) 2243 { 2244 2245 return pool_reclaim(&pc->pc_pool); 2246 } 2247 2248 static void 2249 pool_cache_destruct_object1(pool_cache_t pc, void *object) 2250 { 2251 2252 (*pc->pc_dtor)(pc->pc_arg, object); 2253 pool_put(&pc->pc_pool, object); 2254 } 2255 2256 /* 2257 * pool_cache_destruct_object: 2258 * 2259 * Force destruction of an object and its release back into 2260 * the pool. 2261 */ 2262 void 2263 pool_cache_destruct_object(pool_cache_t pc, void *object) 2264 { 2265 2266 FREECHECK_IN(&pc->pc_freecheck, object); 2267 2268 pool_cache_destruct_object1(pc, object); 2269 } 2270 2271 /* 2272 * pool_cache_invalidate_groups: 2273 * 2274 * Invalidate a chain of groups and destruct all objects. 2275 */ 2276 static void 2277 pool_cache_invalidate_groups(pool_cache_t pc, pcg_t *pcg) 2278 { 2279 void *object; 2280 pcg_t *next; 2281 int i; 2282 2283 for (; pcg != NULL; pcg = next) { 2284 next = pcg->pcg_next; 2285 2286 for (i = 0; i < pcg->pcg_avail; i++) { 2287 object = pcg->pcg_objects[i].pcgo_va; 2288 pool_cache_destruct_object1(pc, object); 2289 } 2290 2291 if (pcg->pcg_size == PCG_NOBJECTS_LARGE) { 2292 pool_put(&pcg_large_pool, pcg); 2293 } else { 2294 KASSERT(pcg->pcg_size == PCG_NOBJECTS_NORMAL); 2295 pool_put(&pcg_normal_pool, pcg); 2296 } 2297 } 2298 } 2299 2300 /* 2301 * pool_cache_invalidate: 2302 * 2303 * Invalidate a pool cache (destruct and release all of the 2304 * cached objects). Does not reclaim objects from the pool. 2305 */ 2306 void 2307 pool_cache_invalidate(pool_cache_t pc) 2308 { 2309 pcg_t *full, *empty, *part; 2310 2311 mutex_enter(&pc->pc_lock); 2312 full = pc->pc_fullgroups; 2313 empty = pc->pc_emptygroups; 2314 part = pc->pc_partgroups; 2315 pc->pc_fullgroups = NULL; 2316 pc->pc_emptygroups = NULL; 2317 pc->pc_partgroups = NULL; 2318 pc->pc_nfull = 0; 2319 pc->pc_nempty = 0; 2320 pc->pc_npart = 0; 2321 mutex_exit(&pc->pc_lock); 2322 2323 pool_cache_invalidate_groups(pc, full); 2324 pool_cache_invalidate_groups(pc, empty); 2325 pool_cache_invalidate_groups(pc, part); 2326 } 2327 2328 void 2329 pool_cache_set_drain_hook(pool_cache_t pc, void (*fn)(void *, int), void *arg) 2330 { 2331 2332 pool_set_drain_hook(&pc->pc_pool, fn, arg); 2333 } 2334 2335 void 2336 pool_cache_setlowat(pool_cache_t pc, int n) 2337 { 2338 2339 pool_setlowat(&pc->pc_pool, n); 2340 } 2341 2342 void 2343 pool_cache_sethiwat(pool_cache_t pc, int n) 2344 { 2345 2346 pool_sethiwat(&pc->pc_pool, n); 2347 } 2348 2349 void 2350 pool_cache_sethardlimit(pool_cache_t pc, int n, const char *warnmess, int ratecap) 2351 { 2352 2353 pool_sethardlimit(&pc->pc_pool, n, warnmess, ratecap); 2354 } 2355 2356 static bool __noinline 2357 pool_cache_get_slow(pool_cache_cpu_t *cc, int s, void **objectp, 2358 paddr_t *pap, int flags) 2359 { 2360 pcg_t *pcg, *cur; 2361 uint64_t ncsw; 2362 pool_cache_t pc; 2363 void *object; 2364 2365 KASSERT(cc->cc_current->pcg_avail == 0); 2366 KASSERT(cc->cc_previous->pcg_avail == 0); 2367 2368 pc = cc->cc_cache; 2369 cc->cc_misses++; 2370 2371 /* 2372 * Nothing was available locally. Try and grab a group 2373 * from the cache. 2374 */ 2375 if (__predict_false(!mutex_tryenter(&pc->pc_lock))) { 2376 ncsw = curlwp->l_ncsw; 2377 mutex_enter(&pc->pc_lock); 2378 pc->pc_contended++; 2379 2380 /* 2381 * If we context switched while locking, then 2382 * our view of the per-CPU data is invalid: 2383 * retry. 2384 */ 2385 if (curlwp->l_ncsw != ncsw) { 2386 mutex_exit(&pc->pc_lock); 2387 return true; 2388 } 2389 } 2390 2391 if (__predict_true((pcg = pc->pc_fullgroups) != NULL)) { 2392 /* 2393 * If there's a full group, release our empty 2394 * group back to the cache. Install the full 2395 * group as cc_current and return. 2396 */ 2397 if (__predict_true((cur = cc->cc_current) != &pcg_dummy)) { 2398 KASSERT(cur->pcg_avail == 0); 2399 cur->pcg_next = pc->pc_emptygroups; 2400 pc->pc_emptygroups = cur; 2401 pc->pc_nempty++; 2402 } 2403 KASSERT(pcg->pcg_avail == pcg->pcg_size); 2404 cc->cc_current = pcg; 2405 pc->pc_fullgroups = pcg->pcg_next; 2406 pc->pc_hits++; 2407 pc->pc_nfull--; 2408 mutex_exit(&pc->pc_lock); 2409 return true; 2410 } 2411 2412 /* 2413 * Nothing available locally or in cache. Take the slow 2414 * path: fetch a new object from the pool and construct 2415 * it. 2416 */ 2417 pc->pc_misses++; 2418 mutex_exit(&pc->pc_lock); 2419 splx(s); 2420 2421 object = pool_get(&pc->pc_pool, flags); 2422 *objectp = object; 2423 if (__predict_false(object == NULL)) 2424 return false; 2425 2426 if (__predict_false((*pc->pc_ctor)(pc->pc_arg, object, flags) != 0)) { 2427 pool_put(&pc->pc_pool, object); 2428 *objectp = NULL; 2429 return false; 2430 } 2431 2432 KASSERT((((vaddr_t)object + pc->pc_pool.pr_itemoffset) & 2433 (pc->pc_pool.pr_align - 1)) == 0); 2434 2435 if (pap != NULL) { 2436 #ifdef POOL_VTOPHYS 2437 *pap = POOL_VTOPHYS(object); 2438 #else 2439 *pap = POOL_PADDR_INVALID; 2440 #endif 2441 } 2442 2443 FREECHECK_OUT(&pc->pc_freecheck, object); 2444 return false; 2445 } 2446 2447 /* 2448 * pool_cache_get{,_paddr}: 2449 * 2450 * Get an object from a pool cache (optionally returning 2451 * the physical address of the object). 2452 */ 2453 void * 2454 pool_cache_get_paddr(pool_cache_t pc, int flags, paddr_t *pap) 2455 { 2456 pool_cache_cpu_t *cc; 2457 pcg_t *pcg; 2458 void *object; 2459 int s; 2460 2461 #ifdef LOCKDEBUG 2462 if (flags & PR_WAITOK) { 2463 ASSERT_SLEEPABLE(); 2464 } 2465 #endif 2466 2467 /* Lock out interrupts and disable preemption. */ 2468 s = splvm(); 2469 while (/* CONSTCOND */ true) { 2470 /* Try and allocate an object from the current group. */ 2471 cc = pc->pc_cpus[curcpu()->ci_index]; 2472 KASSERT(cc->cc_cache == pc); 2473 pcg = cc->cc_current; 2474 if (__predict_true(pcg->pcg_avail > 0)) { 2475 object = pcg->pcg_objects[--pcg->pcg_avail].pcgo_va; 2476 if (__predict_false(pap != NULL)) 2477 *pap = pcg->pcg_objects[pcg->pcg_avail].pcgo_pa; 2478 #if defined(DIAGNOSTIC) 2479 pcg->pcg_objects[pcg->pcg_avail].pcgo_va = NULL; 2480 KASSERT(pcg->pcg_avail < pcg->pcg_size); 2481 KASSERT(object != NULL); 2482 #endif 2483 cc->cc_hits++; 2484 splx(s); 2485 FREECHECK_OUT(&pc->pc_freecheck, object); 2486 return object; 2487 } 2488 2489 /* 2490 * That failed. If the previous group isn't empty, swap 2491 * it with the current group and allocate from there. 2492 */ 2493 pcg = cc->cc_previous; 2494 if (__predict_true(pcg->pcg_avail > 0)) { 2495 cc->cc_previous = cc->cc_current; 2496 cc->cc_current = pcg; 2497 continue; 2498 } 2499 2500 /* 2501 * Can't allocate from either group: try the slow path. 2502 * If get_slow() allocated an object for us, or if 2503 * no more objects are available, it will return false. 2504 * Otherwise, we need to retry. 2505 */ 2506 if (!pool_cache_get_slow(cc, s, &object, pap, flags)) 2507 break; 2508 } 2509 2510 return object; 2511 } 2512 2513 static bool __noinline 2514 pool_cache_put_slow(pool_cache_cpu_t *cc, int s, void *object) 2515 { 2516 pcg_t *pcg, *cur; 2517 uint64_t ncsw; 2518 pool_cache_t pc; 2519 2520 KASSERT(cc->cc_current->pcg_avail == cc->cc_current->pcg_size); 2521 KASSERT(cc->cc_previous->pcg_avail == cc->cc_previous->pcg_size); 2522 2523 pc = cc->cc_cache; 2524 pcg = NULL; 2525 cc->cc_misses++; 2526 2527 /* 2528 * If there are no empty groups in the cache then allocate one 2529 * while still unlocked. 2530 */ 2531 if (__predict_false(pc->pc_emptygroups == NULL)) { 2532 if (__predict_true(!pool_cache_disable)) { 2533 pcg = pool_get(pc->pc_pcgpool, PR_NOWAIT); 2534 } 2535 if (__predict_true(pcg != NULL)) { 2536 pcg->pcg_avail = 0; 2537 pcg->pcg_size = pc->pc_pcgsize; 2538 } 2539 } 2540 2541 /* Lock the cache. */ 2542 if (__predict_false(!mutex_tryenter(&pc->pc_lock))) { 2543 ncsw = curlwp->l_ncsw; 2544 mutex_enter(&pc->pc_lock); 2545 pc->pc_contended++; 2546 2547 /* 2548 * If we context switched while locking, then our view of 2549 * the per-CPU data is invalid: retry. 2550 */ 2551 if (__predict_false(curlwp->l_ncsw != ncsw)) { 2552 mutex_exit(&pc->pc_lock); 2553 if (pcg != NULL) { 2554 pool_put(pc->pc_pcgpool, pcg); 2555 } 2556 return true; 2557 } 2558 } 2559 2560 /* If there are no empty groups in the cache then allocate one. */ 2561 if (pcg == NULL && pc->pc_emptygroups != NULL) { 2562 pcg = pc->pc_emptygroups; 2563 pc->pc_emptygroups = pcg->pcg_next; 2564 pc->pc_nempty--; 2565 } 2566 2567 /* 2568 * If there's a empty group, release our full group back 2569 * to the cache. Install the empty group to the local CPU 2570 * and return. 2571 */ 2572 if (pcg != NULL) { 2573 KASSERT(pcg->pcg_avail == 0); 2574 if (__predict_false(cc->cc_previous == &pcg_dummy)) { 2575 cc->cc_previous = pcg; 2576 } else { 2577 cur = cc->cc_current; 2578 if (__predict_true(cur != &pcg_dummy)) { 2579 KASSERT(cur->pcg_avail == cur->pcg_size); 2580 cur->pcg_next = pc->pc_fullgroups; 2581 pc->pc_fullgroups = cur; 2582 pc->pc_nfull++; 2583 } 2584 cc->cc_current = pcg; 2585 } 2586 pc->pc_hits++; 2587 mutex_exit(&pc->pc_lock); 2588 return true; 2589 } 2590 2591 /* 2592 * Nothing available locally or in cache, and we didn't 2593 * allocate an empty group. Take the slow path and destroy 2594 * the object here and now. 2595 */ 2596 pc->pc_misses++; 2597 mutex_exit(&pc->pc_lock); 2598 splx(s); 2599 pool_cache_destruct_object(pc, object); 2600 2601 return false; 2602 } 2603 2604 /* 2605 * pool_cache_put{,_paddr}: 2606 * 2607 * Put an object back to the pool cache (optionally caching the 2608 * physical address of the object). 2609 */ 2610 void 2611 pool_cache_put_paddr(pool_cache_t pc, void *object, paddr_t pa) 2612 { 2613 pool_cache_cpu_t *cc; 2614 pcg_t *pcg; 2615 int s; 2616 2617 KASSERT(object != NULL); 2618 FREECHECK_IN(&pc->pc_freecheck, object); 2619 2620 /* Lock out interrupts and disable preemption. */ 2621 s = splvm(); 2622 while (/* CONSTCOND */ true) { 2623 /* If the current group isn't full, release it there. */ 2624 cc = pc->pc_cpus[curcpu()->ci_index]; 2625 KASSERT(cc->cc_cache == pc); 2626 pcg = cc->cc_current; 2627 if (__predict_true(pcg->pcg_avail < pcg->pcg_size)) { 2628 pcg->pcg_objects[pcg->pcg_avail].pcgo_va = object; 2629 pcg->pcg_objects[pcg->pcg_avail].pcgo_pa = pa; 2630 pcg->pcg_avail++; 2631 cc->cc_hits++; 2632 splx(s); 2633 return; 2634 } 2635 2636 /* 2637 * That failed. If the previous group isn't full, swap 2638 * it with the current group and try again. 2639 */ 2640 pcg = cc->cc_previous; 2641 if (__predict_true(pcg->pcg_avail < pcg->pcg_size)) { 2642 cc->cc_previous = cc->cc_current; 2643 cc->cc_current = pcg; 2644 continue; 2645 } 2646 2647 /* 2648 * Can't free to either group: try the slow path. 2649 * If put_slow() releases the object for us, it 2650 * will return false. Otherwise we need to retry. 2651 */ 2652 if (!pool_cache_put_slow(cc, s, object)) 2653 break; 2654 } 2655 } 2656 2657 /* 2658 * pool_cache_xcall: 2659 * 2660 * Transfer objects from the per-CPU cache to the global cache. 2661 * Run within a cross-call thread. 2662 */ 2663 static void 2664 pool_cache_xcall(pool_cache_t pc) 2665 { 2666 pool_cache_cpu_t *cc; 2667 pcg_t *prev, *cur, **list; 2668 int s; 2669 2670 s = splvm(); 2671 mutex_enter(&pc->pc_lock); 2672 cc = pc->pc_cpus[curcpu()->ci_index]; 2673 cur = cc->cc_current; 2674 cc->cc_current = __UNCONST(&pcg_dummy); 2675 prev = cc->cc_previous; 2676 cc->cc_previous = __UNCONST(&pcg_dummy); 2677 if (cur != &pcg_dummy) { 2678 if (cur->pcg_avail == cur->pcg_size) { 2679 list = &pc->pc_fullgroups; 2680 pc->pc_nfull++; 2681 } else if (cur->pcg_avail == 0) { 2682 list = &pc->pc_emptygroups; 2683 pc->pc_nempty++; 2684 } else { 2685 list = &pc->pc_partgroups; 2686 pc->pc_npart++; 2687 } 2688 cur->pcg_next = *list; 2689 *list = cur; 2690 } 2691 if (prev != &pcg_dummy) { 2692 if (prev->pcg_avail == prev->pcg_size) { 2693 list = &pc->pc_fullgroups; 2694 pc->pc_nfull++; 2695 } else if (prev->pcg_avail == 0) { 2696 list = &pc->pc_emptygroups; 2697 pc->pc_nempty++; 2698 } else { 2699 list = &pc->pc_partgroups; 2700 pc->pc_npart++; 2701 } 2702 prev->pcg_next = *list; 2703 *list = prev; 2704 } 2705 mutex_exit(&pc->pc_lock); 2706 splx(s); 2707 } 2708 2709 /* 2710 * Pool backend allocators. 2711 * 2712 * Each pool has a backend allocator that handles allocation, deallocation, 2713 * and any additional draining that might be needed. 2714 * 2715 * We provide two standard allocators: 2716 * 2717 * pool_allocator_kmem - the default when no allocator is specified 2718 * 2719 * pool_allocator_nointr - used for pools that will not be accessed 2720 * in interrupt context. 2721 */ 2722 void *pool_page_alloc(struct pool *, int); 2723 void pool_page_free(struct pool *, void *); 2724 2725 #ifdef POOL_SUBPAGE 2726 struct pool_allocator pool_allocator_kmem_fullpage = { 2727 pool_page_alloc, pool_page_free, 0, 2728 .pa_backingmapptr = &kmem_map, 2729 }; 2730 #else 2731 struct pool_allocator pool_allocator_kmem = { 2732 pool_page_alloc, pool_page_free, 0, 2733 .pa_backingmapptr = &kmem_map, 2734 }; 2735 #endif 2736 2737 void *pool_page_alloc_nointr(struct pool *, int); 2738 void pool_page_free_nointr(struct pool *, void *); 2739 2740 #ifdef POOL_SUBPAGE 2741 struct pool_allocator pool_allocator_nointr_fullpage = { 2742 pool_page_alloc_nointr, pool_page_free_nointr, 0, 2743 .pa_backingmapptr = &kernel_map, 2744 }; 2745 #else 2746 struct pool_allocator pool_allocator_nointr = { 2747 pool_page_alloc_nointr, pool_page_free_nointr, 0, 2748 .pa_backingmapptr = &kernel_map, 2749 }; 2750 #endif 2751 2752 #ifdef POOL_SUBPAGE 2753 void *pool_subpage_alloc(struct pool *, int); 2754 void pool_subpage_free(struct pool *, void *); 2755 2756 struct pool_allocator pool_allocator_kmem = { 2757 pool_subpage_alloc, pool_subpage_free, POOL_SUBPAGE, 2758 .pa_backingmapptr = &kmem_map, 2759 }; 2760 2761 void *pool_subpage_alloc_nointr(struct pool *, int); 2762 void pool_subpage_free_nointr(struct pool *, void *); 2763 2764 struct pool_allocator pool_allocator_nointr = { 2765 pool_subpage_alloc, pool_subpage_free, POOL_SUBPAGE, 2766 .pa_backingmapptr = &kmem_map, 2767 }; 2768 #endif /* POOL_SUBPAGE */ 2769 2770 static void * 2771 pool_allocator_alloc(struct pool *pp, int flags) 2772 { 2773 struct pool_allocator *pa = pp->pr_alloc; 2774 void *res; 2775 2776 res = (*pa->pa_alloc)(pp, flags); 2777 if (res == NULL && (flags & PR_WAITOK) == 0) { 2778 /* 2779 * We only run the drain hook here if PR_NOWAIT. 2780 * In other cases, the hook will be run in 2781 * pool_reclaim(). 2782 */ 2783 if (pp->pr_drain_hook != NULL) { 2784 (*pp->pr_drain_hook)(pp->pr_drain_hook_arg, flags); 2785 res = (*pa->pa_alloc)(pp, flags); 2786 } 2787 } 2788 return res; 2789 } 2790 2791 static void 2792 pool_allocator_free(struct pool *pp, void *v) 2793 { 2794 struct pool_allocator *pa = pp->pr_alloc; 2795 2796 (*pa->pa_free)(pp, v); 2797 } 2798 2799 void * 2800 pool_page_alloc(struct pool *pp, int flags) 2801 { 2802 bool waitok = (flags & PR_WAITOK) ? true : false; 2803 2804 return ((void *) uvm_km_alloc_poolpage_cache(kmem_map, waitok)); 2805 } 2806 2807 void 2808 pool_page_free(struct pool *pp, void *v) 2809 { 2810 2811 uvm_km_free_poolpage_cache(kmem_map, (vaddr_t) v); 2812 } 2813 2814 static void * 2815 pool_page_alloc_meta(struct pool *pp, int flags) 2816 { 2817 bool waitok = (flags & PR_WAITOK) ? true : false; 2818 2819 return ((void *) uvm_km_alloc_poolpage(kmem_map, waitok)); 2820 } 2821 2822 static void 2823 pool_page_free_meta(struct pool *pp, void *v) 2824 { 2825 2826 uvm_km_free_poolpage(kmem_map, (vaddr_t) v); 2827 } 2828 2829 #ifdef POOL_SUBPAGE 2830 /* Sub-page allocator, for machines with large hardware pages. */ 2831 void * 2832 pool_subpage_alloc(struct pool *pp, int flags) 2833 { 2834 return pool_get(&psppool, flags); 2835 } 2836 2837 void 2838 pool_subpage_free(struct pool *pp, void *v) 2839 { 2840 pool_put(&psppool, v); 2841 } 2842 2843 /* We don't provide a real nointr allocator. Maybe later. */ 2844 void * 2845 pool_subpage_alloc_nointr(struct pool *pp, int flags) 2846 { 2847 2848 return (pool_subpage_alloc(pp, flags)); 2849 } 2850 2851 void 2852 pool_subpage_free_nointr(struct pool *pp, void *v) 2853 { 2854 2855 pool_subpage_free(pp, v); 2856 } 2857 #endif /* POOL_SUBPAGE */ 2858 void * 2859 pool_page_alloc_nointr(struct pool *pp, int flags) 2860 { 2861 bool waitok = (flags & PR_WAITOK) ? true : false; 2862 2863 return ((void *) uvm_km_alloc_poolpage_cache(kernel_map, waitok)); 2864 } 2865 2866 void 2867 pool_page_free_nointr(struct pool *pp, void *v) 2868 { 2869 2870 uvm_km_free_poolpage_cache(kernel_map, (vaddr_t) v); 2871 } 2872 2873 #if defined(DDB) 2874 static bool 2875 pool_in_page(struct pool *pp, struct pool_item_header *ph, uintptr_t addr) 2876 { 2877 2878 return (uintptr_t)ph->ph_page <= addr && 2879 addr < (uintptr_t)ph->ph_page + pp->pr_alloc->pa_pagesz; 2880 } 2881 2882 static bool 2883 pool_in_item(struct pool *pp, void *item, uintptr_t addr) 2884 { 2885 2886 return (uintptr_t)item <= addr && addr < (uintptr_t)item + pp->pr_size; 2887 } 2888 2889 static bool 2890 pool_in_cg(struct pool *pp, struct pool_cache_group *pcg, uintptr_t addr) 2891 { 2892 int i; 2893 2894 if (pcg == NULL) { 2895 return false; 2896 } 2897 for (i = 0; i < pcg->pcg_avail; i++) { 2898 if (pool_in_item(pp, pcg->pcg_objects[i].pcgo_va, addr)) { 2899 return true; 2900 } 2901 } 2902 return false; 2903 } 2904 2905 static bool 2906 pool_allocated(struct pool *pp, struct pool_item_header *ph, uintptr_t addr) 2907 { 2908 2909 if ((pp->pr_roflags & PR_NOTOUCH) != 0) { 2910 unsigned int idx = pr_item_notouch_index(pp, ph, (void *)addr); 2911 pool_item_bitmap_t *bitmap = 2912 ph->ph_bitmap + (idx / BITMAP_SIZE); 2913 pool_item_bitmap_t mask = 1 << (idx & BITMAP_MASK); 2914 2915 return (*bitmap & mask) == 0; 2916 } else { 2917 struct pool_item *pi; 2918 2919 LIST_FOREACH(pi, &ph->ph_itemlist, pi_list) { 2920 if (pool_in_item(pp, pi, addr)) { 2921 return false; 2922 } 2923 } 2924 return true; 2925 } 2926 } 2927 2928 void 2929 pool_whatis(uintptr_t addr, void (*pr)(const char *, ...)) 2930 { 2931 struct pool *pp; 2932 2933 TAILQ_FOREACH(pp, &pool_head, pr_poollist) { 2934 struct pool_item_header *ph; 2935 uintptr_t item; 2936 bool allocated = true; 2937 bool incache = false; 2938 bool incpucache = false; 2939 char cpucachestr[32]; 2940 2941 if ((pp->pr_roflags & PR_PHINPAGE) != 0) { 2942 LIST_FOREACH(ph, &pp->pr_fullpages, ph_pagelist) { 2943 if (pool_in_page(pp, ph, addr)) { 2944 goto found; 2945 } 2946 } 2947 LIST_FOREACH(ph, &pp->pr_partpages, ph_pagelist) { 2948 if (pool_in_page(pp, ph, addr)) { 2949 allocated = 2950 pool_allocated(pp, ph, addr); 2951 goto found; 2952 } 2953 } 2954 LIST_FOREACH(ph, &pp->pr_emptypages, ph_pagelist) { 2955 if (pool_in_page(pp, ph, addr)) { 2956 allocated = false; 2957 goto found; 2958 } 2959 } 2960 continue; 2961 } else { 2962 ph = pr_find_pagehead_noalign(pp, (void *)addr); 2963 if (ph == NULL || !pool_in_page(pp, ph, addr)) { 2964 continue; 2965 } 2966 allocated = pool_allocated(pp, ph, addr); 2967 } 2968 found: 2969 if (allocated && pp->pr_cache) { 2970 pool_cache_t pc = pp->pr_cache; 2971 struct pool_cache_group *pcg; 2972 int i; 2973 2974 for (pcg = pc->pc_fullgroups; pcg != NULL; 2975 pcg = pcg->pcg_next) { 2976 if (pool_in_cg(pp, pcg, addr)) { 2977 incache = true; 2978 goto print; 2979 } 2980 } 2981 for (i = 0; i < MAXCPUS; i++) { 2982 pool_cache_cpu_t *cc; 2983 2984 if ((cc = pc->pc_cpus[i]) == NULL) { 2985 continue; 2986 } 2987 if (pool_in_cg(pp, cc->cc_current, addr) || 2988 pool_in_cg(pp, cc->cc_previous, addr)) { 2989 struct cpu_info *ci = 2990 cpu_lookup(i); 2991 2992 incpucache = true; 2993 snprintf(cpucachestr, 2994 sizeof(cpucachestr), 2995 "cached by CPU %u", 2996 ci->ci_index); 2997 goto print; 2998 } 2999 } 3000 } 3001 print: 3002 item = (uintptr_t)ph->ph_page + ph->ph_off; 3003 item = item + rounddown(addr - item, pp->pr_size); 3004 (*pr)("%p is %p+%zu in POOL '%s' (%s)\n", 3005 (void *)addr, item, (size_t)(addr - item), 3006 pp->pr_wchan, 3007 incpucache ? cpucachestr : 3008 incache ? "cached" : allocated ? "allocated" : "free"); 3009 } 3010 } 3011 #endif /* defined(DDB) */ 3012