1 /* $NetBSD: subr_pool.c,v 1.197 2012/06/05 22:51:47 jym Exp $ */ 2 3 /*- 4 * Copyright (c) 1997, 1999, 2000, 2002, 2007, 2008, 2010 5 * The NetBSD Foundation, Inc. 6 * All rights reserved. 7 * 8 * This code is derived from software contributed to The NetBSD Foundation 9 * by Paul Kranenburg; by Jason R. Thorpe of the Numerical Aerospace 10 * Simulation Facility, NASA Ames Research Center, and by Andrew Doran. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 23 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 24 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 25 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31 * POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include <sys/cdefs.h> 35 __KERNEL_RCSID(0, "$NetBSD: subr_pool.c,v 1.197 2012/06/05 22:51:47 jym Exp $"); 36 37 #include "opt_ddb.h" 38 #include "opt_lockdebug.h" 39 40 #include <sys/param.h> 41 #include <sys/systm.h> 42 #include <sys/bitops.h> 43 #include <sys/proc.h> 44 #include <sys/errno.h> 45 #include <sys/kernel.h> 46 #include <sys/vmem.h> 47 #include <sys/pool.h> 48 #include <sys/syslog.h> 49 #include <sys/debug.h> 50 #include <sys/lockdebug.h> 51 #include <sys/xcall.h> 52 #include <sys/cpu.h> 53 #include <sys/atomic.h> 54 55 #include <uvm/uvm_extern.h> 56 57 /* 58 * Pool resource management utility. 59 * 60 * Memory is allocated in pages which are split into pieces according to 61 * the pool item size. Each page is kept on one of three lists in the 62 * pool structure: `pr_emptypages', `pr_fullpages' and `pr_partpages', 63 * for empty, full and partially-full pages respectively. The individual 64 * pool items are on a linked list headed by `ph_itemlist' in each page 65 * header. The memory for building the page list is either taken from 66 * the allocated pages themselves (for small pool items) or taken from 67 * an internal pool of page headers (`phpool'). 68 */ 69 70 /* List of all pools */ 71 static TAILQ_HEAD(, pool) pool_head = TAILQ_HEAD_INITIALIZER(pool_head); 72 73 /* Private pool for page header structures */ 74 #define PHPOOL_MAX 8 75 static struct pool phpool[PHPOOL_MAX]; 76 #define PHPOOL_FREELIST_NELEM(idx) \ 77 (((idx) == 0) ? 0 : BITMAP_SIZE * (1 << (idx))) 78 79 #ifdef POOL_SUBPAGE 80 /* Pool of subpages for use by normal pools. */ 81 static struct pool psppool; 82 #endif 83 84 static void *pool_page_alloc_meta(struct pool *, int); 85 static void pool_page_free_meta(struct pool *, void *); 86 87 /* allocator for pool metadata */ 88 struct pool_allocator pool_allocator_meta = { 89 .pa_alloc = pool_page_alloc_meta, 90 .pa_free = pool_page_free_meta, 91 .pa_pagesz = 0 92 }; 93 94 /* # of seconds to retain page after last use */ 95 int pool_inactive_time = 10; 96 97 /* Next candidate for drainage (see pool_drain()) */ 98 static struct pool *drainpp; 99 100 /* This lock protects both pool_head and drainpp. */ 101 static kmutex_t pool_head_lock; 102 static kcondvar_t pool_busy; 103 104 /* This lock protects initialization of a potentially shared pool allocator */ 105 static kmutex_t pool_allocator_lock; 106 107 typedef uint32_t pool_item_bitmap_t; 108 #define BITMAP_SIZE (CHAR_BIT * sizeof(pool_item_bitmap_t)) 109 #define BITMAP_MASK (BITMAP_SIZE - 1) 110 111 struct pool_item_header { 112 /* Page headers */ 113 LIST_ENTRY(pool_item_header) 114 ph_pagelist; /* pool page list */ 115 SPLAY_ENTRY(pool_item_header) 116 ph_node; /* Off-page page headers */ 117 void * ph_page; /* this page's address */ 118 uint32_t ph_time; /* last referenced */ 119 uint16_t ph_nmissing; /* # of chunks in use */ 120 uint16_t ph_off; /* start offset in page */ 121 union { 122 /* !PR_NOTOUCH */ 123 struct { 124 LIST_HEAD(, pool_item) 125 phu_itemlist; /* chunk list for this page */ 126 } phu_normal; 127 /* PR_NOTOUCH */ 128 struct { 129 pool_item_bitmap_t phu_bitmap[1]; 130 } phu_notouch; 131 } ph_u; 132 }; 133 #define ph_itemlist ph_u.phu_normal.phu_itemlist 134 #define ph_bitmap ph_u.phu_notouch.phu_bitmap 135 136 struct pool_item { 137 #ifdef DIAGNOSTIC 138 u_int pi_magic; 139 #endif 140 #define PI_MAGIC 0xdeaddeadU 141 /* Other entries use only this list entry */ 142 LIST_ENTRY(pool_item) pi_list; 143 }; 144 145 #define POOL_NEEDS_CATCHUP(pp) \ 146 ((pp)->pr_nitems < (pp)->pr_minitems) 147 148 /* 149 * Pool cache management. 150 * 151 * Pool caches provide a way for constructed objects to be cached by the 152 * pool subsystem. This can lead to performance improvements by avoiding 153 * needless object construction/destruction; it is deferred until absolutely 154 * necessary. 155 * 156 * Caches are grouped into cache groups. Each cache group references up 157 * to PCG_NUMOBJECTS constructed objects. When a cache allocates an 158 * object from the pool, it calls the object's constructor and places it 159 * into a cache group. When a cache group frees an object back to the 160 * pool, it first calls the object's destructor. This allows the object 161 * to persist in constructed form while freed to the cache. 162 * 163 * The pool references each cache, so that when a pool is drained by the 164 * pagedaemon, it can drain each individual cache as well. Each time a 165 * cache is drained, the most idle cache group is freed to the pool in 166 * its entirety. 167 * 168 * Pool caches are layed on top of pools. By layering them, we can avoid 169 * the complexity of cache management for pools which would not benefit 170 * from it. 171 */ 172 173 static struct pool pcg_normal_pool; 174 static struct pool pcg_large_pool; 175 static struct pool cache_pool; 176 static struct pool cache_cpu_pool; 177 178 pool_cache_t pnbuf_cache; /* pathname buffer cache */ 179 180 /* List of all caches. */ 181 TAILQ_HEAD(,pool_cache) pool_cache_head = 182 TAILQ_HEAD_INITIALIZER(pool_cache_head); 183 184 int pool_cache_disable; /* global disable for caching */ 185 static const pcg_t pcg_dummy; /* zero sized: always empty, yet always full */ 186 187 static bool pool_cache_put_slow(pool_cache_cpu_t *, int, 188 void *); 189 static bool pool_cache_get_slow(pool_cache_cpu_t *, int, 190 void **, paddr_t *, int); 191 static void pool_cache_cpu_init1(struct cpu_info *, pool_cache_t); 192 static void pool_cache_invalidate_groups(pool_cache_t, pcg_t *); 193 static void pool_cache_invalidate_cpu(pool_cache_t, u_int); 194 static void pool_cache_transfer(pool_cache_t); 195 196 static int pool_catchup(struct pool *); 197 static void pool_prime_page(struct pool *, void *, 198 struct pool_item_header *); 199 static void pool_update_curpage(struct pool *); 200 201 static int pool_grow(struct pool *, int); 202 static void *pool_allocator_alloc(struct pool *, int); 203 static void pool_allocator_free(struct pool *, void *); 204 205 static void pool_print_pagelist(struct pool *, struct pool_pagelist *, 206 void (*)(const char *, ...)); 207 static void pool_print1(struct pool *, const char *, 208 void (*)(const char *, ...)); 209 210 static int pool_chk_page(struct pool *, const char *, 211 struct pool_item_header *); 212 213 static inline unsigned int 214 pr_item_notouch_index(const struct pool *pp, const struct pool_item_header *ph, 215 const void *v) 216 { 217 const char *cp = v; 218 unsigned int idx; 219 220 KASSERT(pp->pr_roflags & PR_NOTOUCH); 221 idx = (cp - (char *)ph->ph_page - ph->ph_off) / pp->pr_size; 222 KASSERT(idx < pp->pr_itemsperpage); 223 return idx; 224 } 225 226 static inline void 227 pr_item_notouch_put(const struct pool *pp, struct pool_item_header *ph, 228 void *obj) 229 { 230 unsigned int idx = pr_item_notouch_index(pp, ph, obj); 231 pool_item_bitmap_t *bitmap = ph->ph_bitmap + (idx / BITMAP_SIZE); 232 pool_item_bitmap_t mask = 1 << (idx & BITMAP_MASK); 233 234 KASSERT((*bitmap & mask) == 0); 235 *bitmap |= mask; 236 } 237 238 static inline void * 239 pr_item_notouch_get(const struct pool *pp, struct pool_item_header *ph) 240 { 241 pool_item_bitmap_t *bitmap = ph->ph_bitmap; 242 unsigned int idx; 243 int i; 244 245 for (i = 0; ; i++) { 246 int bit; 247 248 KASSERT((i * BITMAP_SIZE) < pp->pr_itemsperpage); 249 bit = ffs32(bitmap[i]); 250 if (bit) { 251 pool_item_bitmap_t mask; 252 253 bit--; 254 idx = (i * BITMAP_SIZE) + bit; 255 mask = 1 << bit; 256 KASSERT((bitmap[i] & mask) != 0); 257 bitmap[i] &= ~mask; 258 break; 259 } 260 } 261 KASSERT(idx < pp->pr_itemsperpage); 262 return (char *)ph->ph_page + ph->ph_off + idx * pp->pr_size; 263 } 264 265 static inline void 266 pr_item_notouch_init(const struct pool *pp, struct pool_item_header *ph) 267 { 268 pool_item_bitmap_t *bitmap = ph->ph_bitmap; 269 const int n = howmany(pp->pr_itemsperpage, BITMAP_SIZE); 270 int i; 271 272 for (i = 0; i < n; i++) { 273 bitmap[i] = (pool_item_bitmap_t)-1; 274 } 275 } 276 277 static inline int 278 phtree_compare(struct pool_item_header *a, struct pool_item_header *b) 279 { 280 281 /* 282 * we consider pool_item_header with smaller ph_page bigger. 283 * (this unnatural ordering is for the benefit of pr_find_pagehead.) 284 */ 285 286 if (a->ph_page < b->ph_page) 287 return (1); 288 else if (a->ph_page > b->ph_page) 289 return (-1); 290 else 291 return (0); 292 } 293 294 SPLAY_PROTOTYPE(phtree, pool_item_header, ph_node, phtree_compare); 295 SPLAY_GENERATE(phtree, pool_item_header, ph_node, phtree_compare); 296 297 static inline struct pool_item_header * 298 pr_find_pagehead_noalign(struct pool *pp, void *v) 299 { 300 struct pool_item_header *ph, tmp; 301 302 tmp.ph_page = (void *)(uintptr_t)v; 303 ph = SPLAY_FIND(phtree, &pp->pr_phtree, &tmp); 304 if (ph == NULL) { 305 ph = SPLAY_ROOT(&pp->pr_phtree); 306 if (ph != NULL && phtree_compare(&tmp, ph) >= 0) { 307 ph = SPLAY_NEXT(phtree, &pp->pr_phtree, ph); 308 } 309 KASSERT(ph == NULL || phtree_compare(&tmp, ph) < 0); 310 } 311 312 return ph; 313 } 314 315 /* 316 * Return the pool page header based on item address. 317 */ 318 static inline struct pool_item_header * 319 pr_find_pagehead(struct pool *pp, void *v) 320 { 321 struct pool_item_header *ph, tmp; 322 323 if ((pp->pr_roflags & PR_NOALIGN) != 0) { 324 ph = pr_find_pagehead_noalign(pp, v); 325 } else { 326 void *page = 327 (void *)((uintptr_t)v & pp->pr_alloc->pa_pagemask); 328 329 if ((pp->pr_roflags & PR_PHINPAGE) != 0) { 330 ph = (struct pool_item_header *)((char *)page + pp->pr_phoffset); 331 } else { 332 tmp.ph_page = page; 333 ph = SPLAY_FIND(phtree, &pp->pr_phtree, &tmp); 334 } 335 } 336 337 KASSERT(ph == NULL || ((pp->pr_roflags & PR_PHINPAGE) != 0) || 338 ((char *)ph->ph_page <= (char *)v && 339 (char *)v < (char *)ph->ph_page + pp->pr_alloc->pa_pagesz)); 340 return ph; 341 } 342 343 static void 344 pr_pagelist_free(struct pool *pp, struct pool_pagelist *pq) 345 { 346 struct pool_item_header *ph; 347 348 while ((ph = LIST_FIRST(pq)) != NULL) { 349 LIST_REMOVE(ph, ph_pagelist); 350 pool_allocator_free(pp, ph->ph_page); 351 if ((pp->pr_roflags & PR_PHINPAGE) == 0) 352 pool_put(pp->pr_phpool, ph); 353 } 354 } 355 356 /* 357 * Remove a page from the pool. 358 */ 359 static inline void 360 pr_rmpage(struct pool *pp, struct pool_item_header *ph, 361 struct pool_pagelist *pq) 362 { 363 364 KASSERT(mutex_owned(&pp->pr_lock)); 365 366 /* 367 * If the page was idle, decrement the idle page count. 368 */ 369 if (ph->ph_nmissing == 0) { 370 #ifdef DIAGNOSTIC 371 if (pp->pr_nidle == 0) 372 panic("pr_rmpage: nidle inconsistent"); 373 if (pp->pr_nitems < pp->pr_itemsperpage) 374 panic("pr_rmpage: nitems inconsistent"); 375 #endif 376 pp->pr_nidle--; 377 } 378 379 pp->pr_nitems -= pp->pr_itemsperpage; 380 381 /* 382 * Unlink the page from the pool and queue it for release. 383 */ 384 LIST_REMOVE(ph, ph_pagelist); 385 if ((pp->pr_roflags & PR_PHINPAGE) == 0) 386 SPLAY_REMOVE(phtree, &pp->pr_phtree, ph); 387 LIST_INSERT_HEAD(pq, ph, ph_pagelist); 388 389 pp->pr_npages--; 390 pp->pr_npagefree++; 391 392 pool_update_curpage(pp); 393 } 394 395 /* 396 * Initialize all the pools listed in the "pools" link set. 397 */ 398 void 399 pool_subsystem_init(void) 400 { 401 size_t size; 402 int idx; 403 404 mutex_init(&pool_head_lock, MUTEX_DEFAULT, IPL_NONE); 405 mutex_init(&pool_allocator_lock, MUTEX_DEFAULT, IPL_NONE); 406 cv_init(&pool_busy, "poolbusy"); 407 408 /* 409 * Initialize private page header pool and cache magazine pool if we 410 * haven't done so yet. 411 */ 412 for (idx = 0; idx < PHPOOL_MAX; idx++) { 413 static char phpool_names[PHPOOL_MAX][6+1+6+1]; 414 int nelem; 415 size_t sz; 416 417 nelem = PHPOOL_FREELIST_NELEM(idx); 418 snprintf(phpool_names[idx], sizeof(phpool_names[idx]), 419 "phpool-%d", nelem); 420 sz = sizeof(struct pool_item_header); 421 if (nelem) { 422 sz = offsetof(struct pool_item_header, 423 ph_bitmap[howmany(nelem, BITMAP_SIZE)]); 424 } 425 pool_init(&phpool[idx], sz, 0, 0, 0, 426 phpool_names[idx], &pool_allocator_meta, IPL_VM); 427 } 428 #ifdef POOL_SUBPAGE 429 pool_init(&psppool, POOL_SUBPAGE, POOL_SUBPAGE, 0, 430 PR_RECURSIVE, "psppool", &pool_allocator_meta, IPL_VM); 431 #endif 432 433 size = sizeof(pcg_t) + 434 (PCG_NOBJECTS_NORMAL - 1) * sizeof(pcgpair_t); 435 pool_init(&pcg_normal_pool, size, coherency_unit, 0, 0, 436 "pcgnormal", &pool_allocator_meta, IPL_VM); 437 438 size = sizeof(pcg_t) + 439 (PCG_NOBJECTS_LARGE - 1) * sizeof(pcgpair_t); 440 pool_init(&pcg_large_pool, size, coherency_unit, 0, 0, 441 "pcglarge", &pool_allocator_meta, IPL_VM); 442 443 pool_init(&cache_pool, sizeof(struct pool_cache), coherency_unit, 444 0, 0, "pcache", &pool_allocator_meta, IPL_NONE); 445 446 pool_init(&cache_cpu_pool, sizeof(pool_cache_cpu_t), coherency_unit, 447 0, 0, "pcachecpu", &pool_allocator_meta, IPL_NONE); 448 } 449 450 /* 451 * Initialize the given pool resource structure. 452 * 453 * We export this routine to allow other kernel parts to declare 454 * static pools that must be initialized before kmem(9) is available. 455 */ 456 void 457 pool_init(struct pool *pp, size_t size, u_int align, u_int ioff, int flags, 458 const char *wchan, struct pool_allocator *palloc, int ipl) 459 { 460 struct pool *pp1; 461 size_t trysize, phsize; 462 int off, slack; 463 464 #ifdef DEBUG 465 /* 466 * Check that the pool hasn't already been initialised and 467 * added to the list of all pools. 468 */ 469 TAILQ_FOREACH(pp1, &pool_head, pr_poollist) { 470 if (pp == pp1) 471 panic("pool_init: pool %s already initialised", 472 wchan); 473 } 474 #endif 475 476 if (palloc == NULL) 477 palloc = &pool_allocator_kmem; 478 #ifdef POOL_SUBPAGE 479 if (size > palloc->pa_pagesz) { 480 if (palloc == &pool_allocator_kmem) 481 palloc = &pool_allocator_kmem_fullpage; 482 else if (palloc == &pool_allocator_nointr) 483 palloc = &pool_allocator_nointr_fullpage; 484 } 485 #endif /* POOL_SUBPAGE */ 486 if (!cold) 487 mutex_enter(&pool_allocator_lock); 488 if (palloc->pa_refcnt++ == 0) { 489 if (palloc->pa_pagesz == 0) 490 palloc->pa_pagesz = PAGE_SIZE; 491 492 TAILQ_INIT(&palloc->pa_list); 493 494 mutex_init(&palloc->pa_lock, MUTEX_DEFAULT, IPL_VM); 495 palloc->pa_pagemask = ~(palloc->pa_pagesz - 1); 496 palloc->pa_pageshift = ffs(palloc->pa_pagesz) - 1; 497 } 498 if (!cold) 499 mutex_exit(&pool_allocator_lock); 500 501 if (align == 0) 502 align = ALIGN(1); 503 504 if ((flags & PR_NOTOUCH) == 0 && size < sizeof(struct pool_item)) 505 size = sizeof(struct pool_item); 506 507 size = roundup(size, align); 508 #ifdef DIAGNOSTIC 509 if (size > palloc->pa_pagesz) 510 panic("pool_init: pool item size (%zu) too large", size); 511 #endif 512 513 /* 514 * Initialize the pool structure. 515 */ 516 LIST_INIT(&pp->pr_emptypages); 517 LIST_INIT(&pp->pr_fullpages); 518 LIST_INIT(&pp->pr_partpages); 519 pp->pr_cache = NULL; 520 pp->pr_curpage = NULL; 521 pp->pr_npages = 0; 522 pp->pr_minitems = 0; 523 pp->pr_minpages = 0; 524 pp->pr_maxpages = UINT_MAX; 525 pp->pr_roflags = flags; 526 pp->pr_flags = 0; 527 pp->pr_size = size; 528 pp->pr_align = align; 529 pp->pr_wchan = wchan; 530 pp->pr_alloc = palloc; 531 pp->pr_nitems = 0; 532 pp->pr_nout = 0; 533 pp->pr_hardlimit = UINT_MAX; 534 pp->pr_hardlimit_warning = NULL; 535 pp->pr_hardlimit_ratecap.tv_sec = 0; 536 pp->pr_hardlimit_ratecap.tv_usec = 0; 537 pp->pr_hardlimit_warning_last.tv_sec = 0; 538 pp->pr_hardlimit_warning_last.tv_usec = 0; 539 pp->pr_drain_hook = NULL; 540 pp->pr_drain_hook_arg = NULL; 541 pp->pr_freecheck = NULL; 542 543 /* 544 * Decide whether to put the page header off page to avoid 545 * wasting too large a part of the page or too big item. 546 * Off-page page headers go on a hash table, so we can match 547 * a returned item with its header based on the page address. 548 * We use 1/16 of the page size and about 8 times of the item 549 * size as the threshold (XXX: tune) 550 * 551 * However, we'll put the header into the page if we can put 552 * it without wasting any items. 553 * 554 * Silently enforce `0 <= ioff < align'. 555 */ 556 pp->pr_itemoffset = ioff %= align; 557 /* See the comment below about reserved bytes. */ 558 trysize = palloc->pa_pagesz - ((align - ioff) % align); 559 phsize = ALIGN(sizeof(struct pool_item_header)); 560 if ((pp->pr_roflags & (PR_NOTOUCH | PR_NOALIGN)) == 0 && 561 (pp->pr_size < MIN(palloc->pa_pagesz / 16, phsize << 3) || 562 trysize / pp->pr_size == (trysize - phsize) / pp->pr_size)) { 563 /* Use the end of the page for the page header */ 564 pp->pr_roflags |= PR_PHINPAGE; 565 pp->pr_phoffset = off = palloc->pa_pagesz - phsize; 566 } else { 567 /* The page header will be taken from our page header pool */ 568 pp->pr_phoffset = 0; 569 off = palloc->pa_pagesz; 570 SPLAY_INIT(&pp->pr_phtree); 571 } 572 573 /* 574 * Alignment is to take place at `ioff' within the item. This means 575 * we must reserve up to `align - 1' bytes on the page to allow 576 * appropriate positioning of each item. 577 */ 578 pp->pr_itemsperpage = (off - ((align - ioff) % align)) / pp->pr_size; 579 KASSERT(pp->pr_itemsperpage != 0); 580 if ((pp->pr_roflags & PR_NOTOUCH)) { 581 int idx; 582 583 for (idx = 0; pp->pr_itemsperpage > PHPOOL_FREELIST_NELEM(idx); 584 idx++) { 585 /* nothing */ 586 } 587 if (idx >= PHPOOL_MAX) { 588 /* 589 * if you see this panic, consider to tweak 590 * PHPOOL_MAX and PHPOOL_FREELIST_NELEM. 591 */ 592 panic("%s: too large itemsperpage(%d) for PR_NOTOUCH", 593 pp->pr_wchan, pp->pr_itemsperpage); 594 } 595 pp->pr_phpool = &phpool[idx]; 596 } else if ((pp->pr_roflags & PR_PHINPAGE) == 0) { 597 pp->pr_phpool = &phpool[0]; 598 } 599 #if defined(DIAGNOSTIC) 600 else { 601 pp->pr_phpool = NULL; 602 } 603 #endif 604 605 /* 606 * Use the slack between the chunks and the page header 607 * for "cache coloring". 608 */ 609 slack = off - pp->pr_itemsperpage * pp->pr_size; 610 pp->pr_maxcolor = (slack / align) * align; 611 pp->pr_curcolor = 0; 612 613 pp->pr_nget = 0; 614 pp->pr_nfail = 0; 615 pp->pr_nput = 0; 616 pp->pr_npagealloc = 0; 617 pp->pr_npagefree = 0; 618 pp->pr_hiwat = 0; 619 pp->pr_nidle = 0; 620 pp->pr_refcnt = 0; 621 622 mutex_init(&pp->pr_lock, MUTEX_DEFAULT, ipl); 623 cv_init(&pp->pr_cv, wchan); 624 pp->pr_ipl = ipl; 625 626 /* Insert into the list of all pools. */ 627 if (!cold) 628 mutex_enter(&pool_head_lock); 629 TAILQ_FOREACH(pp1, &pool_head, pr_poollist) { 630 if (strcmp(pp1->pr_wchan, pp->pr_wchan) > 0) 631 break; 632 } 633 if (pp1 == NULL) 634 TAILQ_INSERT_TAIL(&pool_head, pp, pr_poollist); 635 else 636 TAILQ_INSERT_BEFORE(pp1, pp, pr_poollist); 637 if (!cold) 638 mutex_exit(&pool_head_lock); 639 640 /* Insert this into the list of pools using this allocator. */ 641 if (!cold) 642 mutex_enter(&palloc->pa_lock); 643 TAILQ_INSERT_TAIL(&palloc->pa_list, pp, pr_alloc_list); 644 if (!cold) 645 mutex_exit(&palloc->pa_lock); 646 } 647 648 /* 649 * De-commision a pool resource. 650 */ 651 void 652 pool_destroy(struct pool *pp) 653 { 654 struct pool_pagelist pq; 655 struct pool_item_header *ph; 656 657 /* Remove from global pool list */ 658 mutex_enter(&pool_head_lock); 659 while (pp->pr_refcnt != 0) 660 cv_wait(&pool_busy, &pool_head_lock); 661 TAILQ_REMOVE(&pool_head, pp, pr_poollist); 662 if (drainpp == pp) 663 drainpp = NULL; 664 mutex_exit(&pool_head_lock); 665 666 /* Remove this pool from its allocator's list of pools. */ 667 mutex_enter(&pp->pr_alloc->pa_lock); 668 TAILQ_REMOVE(&pp->pr_alloc->pa_list, pp, pr_alloc_list); 669 mutex_exit(&pp->pr_alloc->pa_lock); 670 671 mutex_enter(&pool_allocator_lock); 672 if (--pp->pr_alloc->pa_refcnt == 0) 673 mutex_destroy(&pp->pr_alloc->pa_lock); 674 mutex_exit(&pool_allocator_lock); 675 676 mutex_enter(&pp->pr_lock); 677 678 KASSERT(pp->pr_cache == NULL); 679 680 #ifdef DIAGNOSTIC 681 if (pp->pr_nout != 0) { 682 panic("pool_destroy: pool busy: still out: %u", 683 pp->pr_nout); 684 } 685 #endif 686 687 KASSERT(LIST_EMPTY(&pp->pr_fullpages)); 688 KASSERT(LIST_EMPTY(&pp->pr_partpages)); 689 690 /* Remove all pages */ 691 LIST_INIT(&pq); 692 while ((ph = LIST_FIRST(&pp->pr_emptypages)) != NULL) 693 pr_rmpage(pp, ph, &pq); 694 695 mutex_exit(&pp->pr_lock); 696 697 pr_pagelist_free(pp, &pq); 698 cv_destroy(&pp->pr_cv); 699 mutex_destroy(&pp->pr_lock); 700 } 701 702 void 703 pool_set_drain_hook(struct pool *pp, void (*fn)(void *, int), void *arg) 704 { 705 706 /* XXX no locking -- must be used just after pool_init() */ 707 #ifdef DIAGNOSTIC 708 if (pp->pr_drain_hook != NULL) 709 panic("pool_set_drain_hook(%s): already set", pp->pr_wchan); 710 #endif 711 pp->pr_drain_hook = fn; 712 pp->pr_drain_hook_arg = arg; 713 } 714 715 static struct pool_item_header * 716 pool_alloc_item_header(struct pool *pp, void *storage, int flags) 717 { 718 struct pool_item_header *ph; 719 720 if ((pp->pr_roflags & PR_PHINPAGE) != 0) 721 ph = (struct pool_item_header *) ((char *)storage + pp->pr_phoffset); 722 else 723 ph = pool_get(pp->pr_phpool, flags); 724 725 return (ph); 726 } 727 728 /* 729 * Grab an item from the pool. 730 */ 731 void * 732 pool_get(struct pool *pp, int flags) 733 { 734 struct pool_item *pi; 735 struct pool_item_header *ph; 736 void *v; 737 738 #ifdef DIAGNOSTIC 739 if (pp->pr_itemsperpage == 0) 740 panic("pool_get: pool '%s': pr_itemsperpage is zero, " 741 "pool not initialized?", pp->pr_wchan); 742 if ((cpu_intr_p() || cpu_softintr_p()) && pp->pr_ipl == IPL_NONE && 743 !cold && panicstr == NULL) 744 panic("pool '%s' is IPL_NONE, but called from " 745 "interrupt context\n", pp->pr_wchan); 746 #endif 747 if (flags & PR_WAITOK) { 748 ASSERT_SLEEPABLE(); 749 } 750 751 mutex_enter(&pp->pr_lock); 752 startover: 753 /* 754 * Check to see if we've reached the hard limit. If we have, 755 * and we can wait, then wait until an item has been returned to 756 * the pool. 757 */ 758 #ifdef DIAGNOSTIC 759 if (__predict_false(pp->pr_nout > pp->pr_hardlimit)) { 760 mutex_exit(&pp->pr_lock); 761 panic("pool_get: %s: crossed hard limit", pp->pr_wchan); 762 } 763 #endif 764 if (__predict_false(pp->pr_nout == pp->pr_hardlimit)) { 765 if (pp->pr_drain_hook != NULL) { 766 /* 767 * Since the drain hook is going to free things 768 * back to the pool, unlock, call the hook, re-lock, 769 * and check the hardlimit condition again. 770 */ 771 mutex_exit(&pp->pr_lock); 772 (*pp->pr_drain_hook)(pp->pr_drain_hook_arg, flags); 773 mutex_enter(&pp->pr_lock); 774 if (pp->pr_nout < pp->pr_hardlimit) 775 goto startover; 776 } 777 778 if ((flags & PR_WAITOK) && !(flags & PR_LIMITFAIL)) { 779 /* 780 * XXX: A warning isn't logged in this case. Should 781 * it be? 782 */ 783 pp->pr_flags |= PR_WANTED; 784 cv_wait(&pp->pr_cv, &pp->pr_lock); 785 goto startover; 786 } 787 788 /* 789 * Log a message that the hard limit has been hit. 790 */ 791 if (pp->pr_hardlimit_warning != NULL && 792 ratecheck(&pp->pr_hardlimit_warning_last, 793 &pp->pr_hardlimit_ratecap)) 794 log(LOG_ERR, "%s\n", pp->pr_hardlimit_warning); 795 796 pp->pr_nfail++; 797 798 mutex_exit(&pp->pr_lock); 799 return (NULL); 800 } 801 802 /* 803 * The convention we use is that if `curpage' is not NULL, then 804 * it points at a non-empty bucket. In particular, `curpage' 805 * never points at a page header which has PR_PHINPAGE set and 806 * has no items in its bucket. 807 */ 808 if ((ph = pp->pr_curpage) == NULL) { 809 int error; 810 811 #ifdef DIAGNOSTIC 812 if (pp->pr_nitems != 0) { 813 mutex_exit(&pp->pr_lock); 814 printf("pool_get: %s: curpage NULL, nitems %u\n", 815 pp->pr_wchan, pp->pr_nitems); 816 panic("pool_get: nitems inconsistent"); 817 } 818 #endif 819 820 /* 821 * Call the back-end page allocator for more memory. 822 * Release the pool lock, as the back-end page allocator 823 * may block. 824 */ 825 error = pool_grow(pp, flags); 826 if (error != 0) { 827 /* 828 * We were unable to allocate a page or item 829 * header, but we released the lock during 830 * allocation, so perhaps items were freed 831 * back to the pool. Check for this case. 832 */ 833 if (pp->pr_curpage != NULL) 834 goto startover; 835 836 pp->pr_nfail++; 837 mutex_exit(&pp->pr_lock); 838 return (NULL); 839 } 840 841 /* Start the allocation process over. */ 842 goto startover; 843 } 844 if (pp->pr_roflags & PR_NOTOUCH) { 845 #ifdef DIAGNOSTIC 846 if (__predict_false(ph->ph_nmissing == pp->pr_itemsperpage)) { 847 mutex_exit(&pp->pr_lock); 848 panic("pool_get: %s: page empty", pp->pr_wchan); 849 } 850 #endif 851 v = pr_item_notouch_get(pp, ph); 852 } else { 853 v = pi = LIST_FIRST(&ph->ph_itemlist); 854 if (__predict_false(v == NULL)) { 855 mutex_exit(&pp->pr_lock); 856 panic("pool_get: %s: page empty", pp->pr_wchan); 857 } 858 #ifdef DIAGNOSTIC 859 if (__predict_false(pp->pr_nitems == 0)) { 860 mutex_exit(&pp->pr_lock); 861 printf("pool_get: %s: items on itemlist, nitems %u\n", 862 pp->pr_wchan, pp->pr_nitems); 863 panic("pool_get: nitems inconsistent"); 864 } 865 #endif 866 867 #ifdef DIAGNOSTIC 868 if (__predict_false(pi->pi_magic != PI_MAGIC)) { 869 panic("pool_get(%s): free list modified: " 870 "magic=%x; page %p; item addr %p\n", 871 pp->pr_wchan, pi->pi_magic, ph->ph_page, pi); 872 } 873 #endif 874 875 /* 876 * Remove from item list. 877 */ 878 LIST_REMOVE(pi, pi_list); 879 } 880 pp->pr_nitems--; 881 pp->pr_nout++; 882 if (ph->ph_nmissing == 0) { 883 #ifdef DIAGNOSTIC 884 if (__predict_false(pp->pr_nidle == 0)) 885 panic("pool_get: nidle inconsistent"); 886 #endif 887 pp->pr_nidle--; 888 889 /* 890 * This page was previously empty. Move it to the list of 891 * partially-full pages. This page is already curpage. 892 */ 893 LIST_REMOVE(ph, ph_pagelist); 894 LIST_INSERT_HEAD(&pp->pr_partpages, ph, ph_pagelist); 895 } 896 ph->ph_nmissing++; 897 if (ph->ph_nmissing == pp->pr_itemsperpage) { 898 #ifdef DIAGNOSTIC 899 if (__predict_false((pp->pr_roflags & PR_NOTOUCH) == 0 && 900 !LIST_EMPTY(&ph->ph_itemlist))) { 901 mutex_exit(&pp->pr_lock); 902 panic("pool_get: %s: nmissing inconsistent", 903 pp->pr_wchan); 904 } 905 #endif 906 /* 907 * This page is now full. Move it to the full list 908 * and select a new current page. 909 */ 910 LIST_REMOVE(ph, ph_pagelist); 911 LIST_INSERT_HEAD(&pp->pr_fullpages, ph, ph_pagelist); 912 pool_update_curpage(pp); 913 } 914 915 pp->pr_nget++; 916 917 /* 918 * If we have a low water mark and we are now below that low 919 * water mark, add more items to the pool. 920 */ 921 if (POOL_NEEDS_CATCHUP(pp) && pool_catchup(pp) != 0) { 922 /* 923 * XXX: Should we log a warning? Should we set up a timeout 924 * to try again in a second or so? The latter could break 925 * a caller's assumptions about interrupt protection, etc. 926 */ 927 } 928 929 mutex_exit(&pp->pr_lock); 930 KASSERT((((vaddr_t)v + pp->pr_itemoffset) & (pp->pr_align - 1)) == 0); 931 FREECHECK_OUT(&pp->pr_freecheck, v); 932 return (v); 933 } 934 935 /* 936 * Internal version of pool_put(). Pool is already locked/entered. 937 */ 938 static void 939 pool_do_put(struct pool *pp, void *v, struct pool_pagelist *pq) 940 { 941 struct pool_item *pi = v; 942 struct pool_item_header *ph; 943 944 KASSERT(mutex_owned(&pp->pr_lock)); 945 FREECHECK_IN(&pp->pr_freecheck, v); 946 LOCKDEBUG_MEM_CHECK(v, pp->pr_size); 947 948 #ifdef DIAGNOSTIC 949 if (__predict_false(pp->pr_nout == 0)) { 950 printf("pool %s: putting with none out\n", 951 pp->pr_wchan); 952 panic("pool_put"); 953 } 954 #endif 955 956 if (__predict_false((ph = pr_find_pagehead(pp, v)) == NULL)) { 957 panic("pool_put: %s: page header missing", pp->pr_wchan); 958 } 959 960 /* 961 * Return to item list. 962 */ 963 if (pp->pr_roflags & PR_NOTOUCH) { 964 pr_item_notouch_put(pp, ph, v); 965 } else { 966 #ifdef DIAGNOSTIC 967 pi->pi_magic = PI_MAGIC; 968 #endif 969 #ifdef DEBUG 970 { 971 int i, *ip = v; 972 973 for (i = 0; i < pp->pr_size / sizeof(int); i++) { 974 *ip++ = PI_MAGIC; 975 } 976 } 977 #endif 978 979 LIST_INSERT_HEAD(&ph->ph_itemlist, pi, pi_list); 980 } 981 KDASSERT(ph->ph_nmissing != 0); 982 ph->ph_nmissing--; 983 pp->pr_nput++; 984 pp->pr_nitems++; 985 pp->pr_nout--; 986 987 /* Cancel "pool empty" condition if it exists */ 988 if (pp->pr_curpage == NULL) 989 pp->pr_curpage = ph; 990 991 if (pp->pr_flags & PR_WANTED) { 992 pp->pr_flags &= ~PR_WANTED; 993 cv_broadcast(&pp->pr_cv); 994 } 995 996 /* 997 * If this page is now empty, do one of two things: 998 * 999 * (1) If we have more pages than the page high water mark, 1000 * free the page back to the system. ONLY CONSIDER 1001 * FREEING BACK A PAGE IF WE HAVE MORE THAN OUR MINIMUM PAGE 1002 * CLAIM. 1003 * 1004 * (2) Otherwise, move the page to the empty page list. 1005 * 1006 * Either way, select a new current page (so we use a partially-full 1007 * page if one is available). 1008 */ 1009 if (ph->ph_nmissing == 0) { 1010 pp->pr_nidle++; 1011 if (pp->pr_npages > pp->pr_minpages && 1012 pp->pr_npages > pp->pr_maxpages) { 1013 pr_rmpage(pp, ph, pq); 1014 } else { 1015 LIST_REMOVE(ph, ph_pagelist); 1016 LIST_INSERT_HEAD(&pp->pr_emptypages, ph, ph_pagelist); 1017 1018 /* 1019 * Update the timestamp on the page. A page must 1020 * be idle for some period of time before it can 1021 * be reclaimed by the pagedaemon. This minimizes 1022 * ping-pong'ing for memory. 1023 * 1024 * note for 64-bit time_t: truncating to 32-bit is not 1025 * a problem for our usage. 1026 */ 1027 ph->ph_time = time_uptime; 1028 } 1029 pool_update_curpage(pp); 1030 } 1031 1032 /* 1033 * If the page was previously completely full, move it to the 1034 * partially-full list and make it the current page. The next 1035 * allocation will get the item from this page, instead of 1036 * further fragmenting the pool. 1037 */ 1038 else if (ph->ph_nmissing == (pp->pr_itemsperpage - 1)) { 1039 LIST_REMOVE(ph, ph_pagelist); 1040 LIST_INSERT_HEAD(&pp->pr_partpages, ph, ph_pagelist); 1041 pp->pr_curpage = ph; 1042 } 1043 } 1044 1045 void 1046 pool_put(struct pool *pp, void *v) 1047 { 1048 struct pool_pagelist pq; 1049 1050 LIST_INIT(&pq); 1051 1052 mutex_enter(&pp->pr_lock); 1053 pool_do_put(pp, v, &pq); 1054 mutex_exit(&pp->pr_lock); 1055 1056 pr_pagelist_free(pp, &pq); 1057 } 1058 1059 /* 1060 * pool_grow: grow a pool by a page. 1061 * 1062 * => called with pool locked. 1063 * => unlock and relock the pool. 1064 * => return with pool locked. 1065 */ 1066 1067 static int 1068 pool_grow(struct pool *pp, int flags) 1069 { 1070 struct pool_item_header *ph = NULL; 1071 char *cp; 1072 1073 mutex_exit(&pp->pr_lock); 1074 cp = pool_allocator_alloc(pp, flags); 1075 if (__predict_true(cp != NULL)) { 1076 ph = pool_alloc_item_header(pp, cp, flags); 1077 } 1078 if (__predict_false(cp == NULL || ph == NULL)) { 1079 if (cp != NULL) { 1080 pool_allocator_free(pp, cp); 1081 } 1082 mutex_enter(&pp->pr_lock); 1083 return ENOMEM; 1084 } 1085 1086 mutex_enter(&pp->pr_lock); 1087 pool_prime_page(pp, cp, ph); 1088 pp->pr_npagealloc++; 1089 return 0; 1090 } 1091 1092 /* 1093 * Add N items to the pool. 1094 */ 1095 int 1096 pool_prime(struct pool *pp, int n) 1097 { 1098 int newpages; 1099 int error = 0; 1100 1101 mutex_enter(&pp->pr_lock); 1102 1103 newpages = roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 1104 1105 while (newpages-- > 0) { 1106 error = pool_grow(pp, PR_NOWAIT); 1107 if (error) { 1108 break; 1109 } 1110 pp->pr_minpages++; 1111 } 1112 1113 if (pp->pr_minpages >= pp->pr_maxpages) 1114 pp->pr_maxpages = pp->pr_minpages + 1; /* XXX */ 1115 1116 mutex_exit(&pp->pr_lock); 1117 return error; 1118 } 1119 1120 /* 1121 * Add a page worth of items to the pool. 1122 * 1123 * Note, we must be called with the pool descriptor LOCKED. 1124 */ 1125 static void 1126 pool_prime_page(struct pool *pp, void *storage, struct pool_item_header *ph) 1127 { 1128 struct pool_item *pi; 1129 void *cp = storage; 1130 const unsigned int align = pp->pr_align; 1131 const unsigned int ioff = pp->pr_itemoffset; 1132 int n; 1133 1134 KASSERT(mutex_owned(&pp->pr_lock)); 1135 1136 #ifdef DIAGNOSTIC 1137 if ((pp->pr_roflags & PR_NOALIGN) == 0 && 1138 ((uintptr_t)cp & (pp->pr_alloc->pa_pagesz - 1)) != 0) 1139 panic("pool_prime_page: %s: unaligned page", pp->pr_wchan); 1140 #endif 1141 1142 /* 1143 * Insert page header. 1144 */ 1145 LIST_INSERT_HEAD(&pp->pr_emptypages, ph, ph_pagelist); 1146 LIST_INIT(&ph->ph_itemlist); 1147 ph->ph_page = storage; 1148 ph->ph_nmissing = 0; 1149 ph->ph_time = time_uptime; 1150 if ((pp->pr_roflags & PR_PHINPAGE) == 0) 1151 SPLAY_INSERT(phtree, &pp->pr_phtree, ph); 1152 1153 pp->pr_nidle++; 1154 1155 /* 1156 * Color this page. 1157 */ 1158 ph->ph_off = pp->pr_curcolor; 1159 cp = (char *)cp + ph->ph_off; 1160 if ((pp->pr_curcolor += align) > pp->pr_maxcolor) 1161 pp->pr_curcolor = 0; 1162 1163 /* 1164 * Adjust storage to apply aligment to `pr_itemoffset' in each item. 1165 */ 1166 if (ioff != 0) 1167 cp = (char *)cp + align - ioff; 1168 1169 KASSERT((((vaddr_t)cp + ioff) & (align - 1)) == 0); 1170 1171 /* 1172 * Insert remaining chunks on the bucket list. 1173 */ 1174 n = pp->pr_itemsperpage; 1175 pp->pr_nitems += n; 1176 1177 if (pp->pr_roflags & PR_NOTOUCH) { 1178 pr_item_notouch_init(pp, ph); 1179 } else { 1180 while (n--) { 1181 pi = (struct pool_item *)cp; 1182 1183 KASSERT(((((vaddr_t)pi) + ioff) & (align - 1)) == 0); 1184 1185 /* Insert on page list */ 1186 LIST_INSERT_HEAD(&ph->ph_itemlist, pi, pi_list); 1187 #ifdef DIAGNOSTIC 1188 pi->pi_magic = PI_MAGIC; 1189 #endif 1190 cp = (char *)cp + pp->pr_size; 1191 1192 KASSERT((((vaddr_t)cp + ioff) & (align - 1)) == 0); 1193 } 1194 } 1195 1196 /* 1197 * If the pool was depleted, point at the new page. 1198 */ 1199 if (pp->pr_curpage == NULL) 1200 pp->pr_curpage = ph; 1201 1202 if (++pp->pr_npages > pp->pr_hiwat) 1203 pp->pr_hiwat = pp->pr_npages; 1204 } 1205 1206 /* 1207 * Used by pool_get() when nitems drops below the low water mark. This 1208 * is used to catch up pr_nitems with the low water mark. 1209 * 1210 * Note 1, we never wait for memory here, we let the caller decide what to do. 1211 * 1212 * Note 2, we must be called with the pool already locked, and we return 1213 * with it locked. 1214 */ 1215 static int 1216 pool_catchup(struct pool *pp) 1217 { 1218 int error = 0; 1219 1220 while (POOL_NEEDS_CATCHUP(pp)) { 1221 error = pool_grow(pp, PR_NOWAIT); 1222 if (error) { 1223 break; 1224 } 1225 } 1226 return error; 1227 } 1228 1229 static void 1230 pool_update_curpage(struct pool *pp) 1231 { 1232 1233 pp->pr_curpage = LIST_FIRST(&pp->pr_partpages); 1234 if (pp->pr_curpage == NULL) { 1235 pp->pr_curpage = LIST_FIRST(&pp->pr_emptypages); 1236 } 1237 KASSERT((pp->pr_curpage == NULL && pp->pr_nitems == 0) || 1238 (pp->pr_curpage != NULL && pp->pr_nitems > 0)); 1239 } 1240 1241 void 1242 pool_setlowat(struct pool *pp, int n) 1243 { 1244 1245 mutex_enter(&pp->pr_lock); 1246 1247 pp->pr_minitems = n; 1248 pp->pr_minpages = (n == 0) 1249 ? 0 1250 : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 1251 1252 /* Make sure we're caught up with the newly-set low water mark. */ 1253 if (POOL_NEEDS_CATCHUP(pp) && pool_catchup(pp) != 0) { 1254 /* 1255 * XXX: Should we log a warning? Should we set up a timeout 1256 * to try again in a second or so? The latter could break 1257 * a caller's assumptions about interrupt protection, etc. 1258 */ 1259 } 1260 1261 mutex_exit(&pp->pr_lock); 1262 } 1263 1264 void 1265 pool_sethiwat(struct pool *pp, int n) 1266 { 1267 1268 mutex_enter(&pp->pr_lock); 1269 1270 pp->pr_maxpages = (n == 0) 1271 ? 0 1272 : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 1273 1274 mutex_exit(&pp->pr_lock); 1275 } 1276 1277 void 1278 pool_sethardlimit(struct pool *pp, int n, const char *warnmess, int ratecap) 1279 { 1280 1281 mutex_enter(&pp->pr_lock); 1282 1283 pp->pr_hardlimit = n; 1284 pp->pr_hardlimit_warning = warnmess; 1285 pp->pr_hardlimit_ratecap.tv_sec = ratecap; 1286 pp->pr_hardlimit_warning_last.tv_sec = 0; 1287 pp->pr_hardlimit_warning_last.tv_usec = 0; 1288 1289 /* 1290 * In-line version of pool_sethiwat(), because we don't want to 1291 * release the lock. 1292 */ 1293 pp->pr_maxpages = (n == 0) 1294 ? 0 1295 : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 1296 1297 mutex_exit(&pp->pr_lock); 1298 } 1299 1300 /* 1301 * Release all complete pages that have not been used recently. 1302 * 1303 * Must not be called from interrupt context. 1304 */ 1305 int 1306 pool_reclaim(struct pool *pp) 1307 { 1308 struct pool_item_header *ph, *phnext; 1309 struct pool_pagelist pq; 1310 uint32_t curtime; 1311 bool klock; 1312 int rv; 1313 1314 KASSERT(!cpu_intr_p() && !cpu_softintr_p()); 1315 1316 if (pp->pr_drain_hook != NULL) { 1317 /* 1318 * The drain hook must be called with the pool unlocked. 1319 */ 1320 (*pp->pr_drain_hook)(pp->pr_drain_hook_arg, PR_NOWAIT); 1321 } 1322 1323 /* 1324 * XXXSMP Because we do not want to cause non-MPSAFE code 1325 * to block. 1326 */ 1327 if (pp->pr_ipl == IPL_SOFTNET || pp->pr_ipl == IPL_SOFTCLOCK || 1328 pp->pr_ipl == IPL_SOFTSERIAL) { 1329 KERNEL_LOCK(1, NULL); 1330 klock = true; 1331 } else 1332 klock = false; 1333 1334 /* Reclaim items from the pool's cache (if any). */ 1335 if (pp->pr_cache != NULL) 1336 pool_cache_invalidate(pp->pr_cache); 1337 1338 if (mutex_tryenter(&pp->pr_lock) == 0) { 1339 if (klock) { 1340 KERNEL_UNLOCK_ONE(NULL); 1341 } 1342 return (0); 1343 } 1344 1345 LIST_INIT(&pq); 1346 1347 curtime = time_uptime; 1348 1349 for (ph = LIST_FIRST(&pp->pr_emptypages); ph != NULL; ph = phnext) { 1350 phnext = LIST_NEXT(ph, ph_pagelist); 1351 1352 /* Check our minimum page claim */ 1353 if (pp->pr_npages <= pp->pr_minpages) 1354 break; 1355 1356 KASSERT(ph->ph_nmissing == 0); 1357 if (curtime - ph->ph_time < pool_inactive_time) 1358 continue; 1359 1360 /* 1361 * If freeing this page would put us below 1362 * the low water mark, stop now. 1363 */ 1364 if ((pp->pr_nitems - pp->pr_itemsperpage) < 1365 pp->pr_minitems) 1366 break; 1367 1368 pr_rmpage(pp, ph, &pq); 1369 } 1370 1371 mutex_exit(&pp->pr_lock); 1372 1373 if (LIST_EMPTY(&pq)) 1374 rv = 0; 1375 else { 1376 pr_pagelist_free(pp, &pq); 1377 rv = 1; 1378 } 1379 1380 if (klock) { 1381 KERNEL_UNLOCK_ONE(NULL); 1382 } 1383 1384 return (rv); 1385 } 1386 1387 /* 1388 * Drain pools, one at a time. The drained pool is returned within ppp. 1389 * 1390 * Note, must never be called from interrupt context. 1391 */ 1392 bool 1393 pool_drain(struct pool **ppp) 1394 { 1395 bool reclaimed; 1396 struct pool *pp; 1397 1398 KASSERT(!TAILQ_EMPTY(&pool_head)); 1399 1400 pp = NULL; 1401 1402 /* Find next pool to drain, and add a reference. */ 1403 mutex_enter(&pool_head_lock); 1404 do { 1405 if (drainpp == NULL) { 1406 drainpp = TAILQ_FIRST(&pool_head); 1407 } 1408 if (drainpp != NULL) { 1409 pp = drainpp; 1410 drainpp = TAILQ_NEXT(pp, pr_poollist); 1411 } 1412 /* 1413 * Skip completely idle pools. We depend on at least 1414 * one pool in the system being active. 1415 */ 1416 } while (pp == NULL || pp->pr_npages == 0); 1417 pp->pr_refcnt++; 1418 mutex_exit(&pool_head_lock); 1419 1420 /* Drain the cache (if any) and pool.. */ 1421 reclaimed = pool_reclaim(pp); 1422 1423 /* Finally, unlock the pool. */ 1424 mutex_enter(&pool_head_lock); 1425 pp->pr_refcnt--; 1426 cv_broadcast(&pool_busy); 1427 mutex_exit(&pool_head_lock); 1428 1429 if (ppp != NULL) 1430 *ppp = pp; 1431 1432 return reclaimed; 1433 } 1434 1435 /* 1436 * Diagnostic helpers. 1437 */ 1438 1439 void 1440 pool_printall(const char *modif, void (*pr)(const char *, ...)) 1441 { 1442 struct pool *pp; 1443 1444 TAILQ_FOREACH(pp, &pool_head, pr_poollist) { 1445 pool_printit(pp, modif, pr); 1446 } 1447 } 1448 1449 void 1450 pool_printit(struct pool *pp, const char *modif, void (*pr)(const char *, ...)) 1451 { 1452 1453 if (pp == NULL) { 1454 (*pr)("Must specify a pool to print.\n"); 1455 return; 1456 } 1457 1458 pool_print1(pp, modif, pr); 1459 } 1460 1461 static void 1462 pool_print_pagelist(struct pool *pp, struct pool_pagelist *pl, 1463 void (*pr)(const char *, ...)) 1464 { 1465 struct pool_item_header *ph; 1466 #ifdef DIAGNOSTIC 1467 struct pool_item *pi; 1468 #endif 1469 1470 LIST_FOREACH(ph, pl, ph_pagelist) { 1471 (*pr)("\t\tpage %p, nmissing %d, time %" PRIu32 "\n", 1472 ph->ph_page, ph->ph_nmissing, ph->ph_time); 1473 #ifdef DIAGNOSTIC 1474 if (!(pp->pr_roflags & PR_NOTOUCH)) { 1475 LIST_FOREACH(pi, &ph->ph_itemlist, pi_list) { 1476 if (pi->pi_magic != PI_MAGIC) { 1477 (*pr)("\t\t\titem %p, magic 0x%x\n", 1478 pi, pi->pi_magic); 1479 } 1480 } 1481 } 1482 #endif 1483 } 1484 } 1485 1486 static void 1487 pool_print1(struct pool *pp, const char *modif, void (*pr)(const char *, ...)) 1488 { 1489 struct pool_item_header *ph; 1490 pool_cache_t pc; 1491 pcg_t *pcg; 1492 pool_cache_cpu_t *cc; 1493 uint64_t cpuhit, cpumiss; 1494 int i, print_log = 0, print_pagelist = 0, print_cache = 0; 1495 char c; 1496 1497 while ((c = *modif++) != '\0') { 1498 if (c == 'l') 1499 print_log = 1; 1500 if (c == 'p') 1501 print_pagelist = 1; 1502 if (c == 'c') 1503 print_cache = 1; 1504 } 1505 1506 if ((pc = pp->pr_cache) != NULL) { 1507 (*pr)("POOL CACHE"); 1508 } else { 1509 (*pr)("POOL"); 1510 } 1511 1512 (*pr)(" %s: size %u, align %u, ioff %u, roflags 0x%08x\n", 1513 pp->pr_wchan, pp->pr_size, pp->pr_align, pp->pr_itemoffset, 1514 pp->pr_roflags); 1515 (*pr)("\talloc %p\n", pp->pr_alloc); 1516 (*pr)("\tminitems %u, minpages %u, maxpages %u, npages %u\n", 1517 pp->pr_minitems, pp->pr_minpages, pp->pr_maxpages, pp->pr_npages); 1518 (*pr)("\titemsperpage %u, nitems %u, nout %u, hardlimit %u\n", 1519 pp->pr_itemsperpage, pp->pr_nitems, pp->pr_nout, pp->pr_hardlimit); 1520 1521 (*pr)("\tnget %lu, nfail %lu, nput %lu\n", 1522 pp->pr_nget, pp->pr_nfail, pp->pr_nput); 1523 (*pr)("\tnpagealloc %lu, npagefree %lu, hiwat %u, nidle %lu\n", 1524 pp->pr_npagealloc, pp->pr_npagefree, pp->pr_hiwat, pp->pr_nidle); 1525 1526 if (print_pagelist == 0) 1527 goto skip_pagelist; 1528 1529 if ((ph = LIST_FIRST(&pp->pr_emptypages)) != NULL) 1530 (*pr)("\n\tempty page list:\n"); 1531 pool_print_pagelist(pp, &pp->pr_emptypages, pr); 1532 if ((ph = LIST_FIRST(&pp->pr_fullpages)) != NULL) 1533 (*pr)("\n\tfull page list:\n"); 1534 pool_print_pagelist(pp, &pp->pr_fullpages, pr); 1535 if ((ph = LIST_FIRST(&pp->pr_partpages)) != NULL) 1536 (*pr)("\n\tpartial-page list:\n"); 1537 pool_print_pagelist(pp, &pp->pr_partpages, pr); 1538 1539 if (pp->pr_curpage == NULL) 1540 (*pr)("\tno current page\n"); 1541 else 1542 (*pr)("\tcurpage %p\n", pp->pr_curpage->ph_page); 1543 1544 skip_pagelist: 1545 if (print_log == 0) 1546 goto skip_log; 1547 1548 (*pr)("\n"); 1549 1550 skip_log: 1551 1552 #define PR_GROUPLIST(pcg) \ 1553 (*pr)("\t\tgroup %p: avail %d\n", pcg, pcg->pcg_avail); \ 1554 for (i = 0; i < pcg->pcg_size; i++) { \ 1555 if (pcg->pcg_objects[i].pcgo_pa != \ 1556 POOL_PADDR_INVALID) { \ 1557 (*pr)("\t\t\t%p, 0x%llx\n", \ 1558 pcg->pcg_objects[i].pcgo_va, \ 1559 (unsigned long long) \ 1560 pcg->pcg_objects[i].pcgo_pa); \ 1561 } else { \ 1562 (*pr)("\t\t\t%p\n", \ 1563 pcg->pcg_objects[i].pcgo_va); \ 1564 } \ 1565 } 1566 1567 if (pc != NULL) { 1568 cpuhit = 0; 1569 cpumiss = 0; 1570 for (i = 0; i < __arraycount(pc->pc_cpus); i++) { 1571 if ((cc = pc->pc_cpus[i]) == NULL) 1572 continue; 1573 cpuhit += cc->cc_hits; 1574 cpumiss += cc->cc_misses; 1575 } 1576 (*pr)("\tcpu layer hits %llu misses %llu\n", cpuhit, cpumiss); 1577 (*pr)("\tcache layer hits %llu misses %llu\n", 1578 pc->pc_hits, pc->pc_misses); 1579 (*pr)("\tcache layer entry uncontended %llu contended %llu\n", 1580 pc->pc_hits + pc->pc_misses - pc->pc_contended, 1581 pc->pc_contended); 1582 (*pr)("\tcache layer empty groups %u full groups %u\n", 1583 pc->pc_nempty, pc->pc_nfull); 1584 if (print_cache) { 1585 (*pr)("\tfull cache groups:\n"); 1586 for (pcg = pc->pc_fullgroups; pcg != NULL; 1587 pcg = pcg->pcg_next) { 1588 PR_GROUPLIST(pcg); 1589 } 1590 (*pr)("\tempty cache groups:\n"); 1591 for (pcg = pc->pc_emptygroups; pcg != NULL; 1592 pcg = pcg->pcg_next) { 1593 PR_GROUPLIST(pcg); 1594 } 1595 } 1596 } 1597 #undef PR_GROUPLIST 1598 } 1599 1600 static int 1601 pool_chk_page(struct pool *pp, const char *label, struct pool_item_header *ph) 1602 { 1603 struct pool_item *pi; 1604 void *page; 1605 int n; 1606 1607 if ((pp->pr_roflags & PR_NOALIGN) == 0) { 1608 page = (void *)((uintptr_t)ph & pp->pr_alloc->pa_pagemask); 1609 if (page != ph->ph_page && 1610 (pp->pr_roflags & PR_PHINPAGE) != 0) { 1611 if (label != NULL) 1612 printf("%s: ", label); 1613 printf("pool(%p:%s): page inconsistency: page %p;" 1614 " at page head addr %p (p %p)\n", pp, 1615 pp->pr_wchan, ph->ph_page, 1616 ph, page); 1617 return 1; 1618 } 1619 } 1620 1621 if ((pp->pr_roflags & PR_NOTOUCH) != 0) 1622 return 0; 1623 1624 for (pi = LIST_FIRST(&ph->ph_itemlist), n = 0; 1625 pi != NULL; 1626 pi = LIST_NEXT(pi,pi_list), n++) { 1627 1628 #ifdef DIAGNOSTIC 1629 if (pi->pi_magic != PI_MAGIC) { 1630 if (label != NULL) 1631 printf("%s: ", label); 1632 printf("pool(%s): free list modified: magic=%x;" 1633 " page %p; item ordinal %d; addr %p\n", 1634 pp->pr_wchan, pi->pi_magic, ph->ph_page, 1635 n, pi); 1636 panic("pool"); 1637 } 1638 #endif 1639 if ((pp->pr_roflags & PR_NOALIGN) != 0) { 1640 continue; 1641 } 1642 page = (void *)((uintptr_t)pi & pp->pr_alloc->pa_pagemask); 1643 if (page == ph->ph_page) 1644 continue; 1645 1646 if (label != NULL) 1647 printf("%s: ", label); 1648 printf("pool(%p:%s): page inconsistency: page %p;" 1649 " item ordinal %d; addr %p (p %p)\n", pp, 1650 pp->pr_wchan, ph->ph_page, 1651 n, pi, page); 1652 return 1; 1653 } 1654 return 0; 1655 } 1656 1657 1658 int 1659 pool_chk(struct pool *pp, const char *label) 1660 { 1661 struct pool_item_header *ph; 1662 int r = 0; 1663 1664 mutex_enter(&pp->pr_lock); 1665 LIST_FOREACH(ph, &pp->pr_emptypages, ph_pagelist) { 1666 r = pool_chk_page(pp, label, ph); 1667 if (r) { 1668 goto out; 1669 } 1670 } 1671 LIST_FOREACH(ph, &pp->pr_fullpages, ph_pagelist) { 1672 r = pool_chk_page(pp, label, ph); 1673 if (r) { 1674 goto out; 1675 } 1676 } 1677 LIST_FOREACH(ph, &pp->pr_partpages, ph_pagelist) { 1678 r = pool_chk_page(pp, label, ph); 1679 if (r) { 1680 goto out; 1681 } 1682 } 1683 1684 out: 1685 mutex_exit(&pp->pr_lock); 1686 return (r); 1687 } 1688 1689 /* 1690 * pool_cache_init: 1691 * 1692 * Initialize a pool cache. 1693 */ 1694 pool_cache_t 1695 pool_cache_init(size_t size, u_int align, u_int align_offset, u_int flags, 1696 const char *wchan, struct pool_allocator *palloc, int ipl, 1697 int (*ctor)(void *, void *, int), void (*dtor)(void *, void *), void *arg) 1698 { 1699 pool_cache_t pc; 1700 1701 pc = pool_get(&cache_pool, PR_WAITOK); 1702 if (pc == NULL) 1703 return NULL; 1704 1705 pool_cache_bootstrap(pc, size, align, align_offset, flags, wchan, 1706 palloc, ipl, ctor, dtor, arg); 1707 1708 return pc; 1709 } 1710 1711 /* 1712 * pool_cache_bootstrap: 1713 * 1714 * Kernel-private version of pool_cache_init(). The caller 1715 * provides initial storage. 1716 */ 1717 void 1718 pool_cache_bootstrap(pool_cache_t pc, size_t size, u_int align, 1719 u_int align_offset, u_int flags, const char *wchan, 1720 struct pool_allocator *palloc, int ipl, 1721 int (*ctor)(void *, void *, int), void (*dtor)(void *, void *), 1722 void *arg) 1723 { 1724 CPU_INFO_ITERATOR cii; 1725 pool_cache_t pc1; 1726 struct cpu_info *ci; 1727 struct pool *pp; 1728 1729 pp = &pc->pc_pool; 1730 if (palloc == NULL && ipl == IPL_NONE) 1731 palloc = &pool_allocator_nointr; 1732 pool_init(pp, size, align, align_offset, flags, wchan, palloc, ipl); 1733 mutex_init(&pc->pc_lock, MUTEX_DEFAULT, ipl); 1734 1735 if (ctor == NULL) { 1736 ctor = (int (*)(void *, void *, int))nullop; 1737 } 1738 if (dtor == NULL) { 1739 dtor = (void (*)(void *, void *))nullop; 1740 } 1741 1742 pc->pc_emptygroups = NULL; 1743 pc->pc_fullgroups = NULL; 1744 pc->pc_partgroups = NULL; 1745 pc->pc_ctor = ctor; 1746 pc->pc_dtor = dtor; 1747 pc->pc_arg = arg; 1748 pc->pc_hits = 0; 1749 pc->pc_misses = 0; 1750 pc->pc_nempty = 0; 1751 pc->pc_npart = 0; 1752 pc->pc_nfull = 0; 1753 pc->pc_contended = 0; 1754 pc->pc_refcnt = 0; 1755 pc->pc_freecheck = NULL; 1756 1757 if ((flags & PR_LARGECACHE) != 0) { 1758 pc->pc_pcgsize = PCG_NOBJECTS_LARGE; 1759 pc->pc_pcgpool = &pcg_large_pool; 1760 } else { 1761 pc->pc_pcgsize = PCG_NOBJECTS_NORMAL; 1762 pc->pc_pcgpool = &pcg_normal_pool; 1763 } 1764 1765 /* Allocate per-CPU caches. */ 1766 memset(pc->pc_cpus, 0, sizeof(pc->pc_cpus)); 1767 pc->pc_ncpu = 0; 1768 if (ncpu < 2) { 1769 /* XXX For sparc: boot CPU is not attached yet. */ 1770 pool_cache_cpu_init1(curcpu(), pc); 1771 } else { 1772 for (CPU_INFO_FOREACH(cii, ci)) { 1773 pool_cache_cpu_init1(ci, pc); 1774 } 1775 } 1776 1777 /* Add to list of all pools. */ 1778 if (__predict_true(!cold)) 1779 mutex_enter(&pool_head_lock); 1780 TAILQ_FOREACH(pc1, &pool_cache_head, pc_cachelist) { 1781 if (strcmp(pc1->pc_pool.pr_wchan, pc->pc_pool.pr_wchan) > 0) 1782 break; 1783 } 1784 if (pc1 == NULL) 1785 TAILQ_INSERT_TAIL(&pool_cache_head, pc, pc_cachelist); 1786 else 1787 TAILQ_INSERT_BEFORE(pc1, pc, pc_cachelist); 1788 if (__predict_true(!cold)) 1789 mutex_exit(&pool_head_lock); 1790 1791 membar_sync(); 1792 pp->pr_cache = pc; 1793 } 1794 1795 /* 1796 * pool_cache_destroy: 1797 * 1798 * Destroy a pool cache. 1799 */ 1800 void 1801 pool_cache_destroy(pool_cache_t pc) 1802 { 1803 1804 pool_cache_bootstrap_destroy(pc); 1805 pool_put(&cache_pool, pc); 1806 } 1807 1808 /* 1809 * pool_cache_bootstrap_destroy: 1810 * 1811 * Destroy a pool cache. 1812 */ 1813 void 1814 pool_cache_bootstrap_destroy(pool_cache_t pc) 1815 { 1816 struct pool *pp = &pc->pc_pool; 1817 u_int i; 1818 1819 /* Remove it from the global list. */ 1820 mutex_enter(&pool_head_lock); 1821 while (pc->pc_refcnt != 0) 1822 cv_wait(&pool_busy, &pool_head_lock); 1823 TAILQ_REMOVE(&pool_cache_head, pc, pc_cachelist); 1824 mutex_exit(&pool_head_lock); 1825 1826 /* First, invalidate the entire cache. */ 1827 pool_cache_invalidate(pc); 1828 1829 /* Disassociate it from the pool. */ 1830 mutex_enter(&pp->pr_lock); 1831 pp->pr_cache = NULL; 1832 mutex_exit(&pp->pr_lock); 1833 1834 /* Destroy per-CPU data */ 1835 for (i = 0; i < __arraycount(pc->pc_cpus); i++) 1836 pool_cache_invalidate_cpu(pc, i); 1837 1838 /* Finally, destroy it. */ 1839 mutex_destroy(&pc->pc_lock); 1840 pool_destroy(pp); 1841 } 1842 1843 /* 1844 * pool_cache_cpu_init1: 1845 * 1846 * Called for each pool_cache whenever a new CPU is attached. 1847 */ 1848 static void 1849 pool_cache_cpu_init1(struct cpu_info *ci, pool_cache_t pc) 1850 { 1851 pool_cache_cpu_t *cc; 1852 int index; 1853 1854 index = ci->ci_index; 1855 1856 KASSERT(index < __arraycount(pc->pc_cpus)); 1857 1858 if ((cc = pc->pc_cpus[index]) != NULL) { 1859 KASSERT(cc->cc_cpuindex == index); 1860 return; 1861 } 1862 1863 /* 1864 * The first CPU is 'free'. This needs to be the case for 1865 * bootstrap - we may not be able to allocate yet. 1866 */ 1867 if (pc->pc_ncpu == 0) { 1868 cc = &pc->pc_cpu0; 1869 pc->pc_ncpu = 1; 1870 } else { 1871 mutex_enter(&pc->pc_lock); 1872 pc->pc_ncpu++; 1873 mutex_exit(&pc->pc_lock); 1874 cc = pool_get(&cache_cpu_pool, PR_WAITOK); 1875 } 1876 1877 cc->cc_ipl = pc->pc_pool.pr_ipl; 1878 cc->cc_iplcookie = makeiplcookie(cc->cc_ipl); 1879 cc->cc_cache = pc; 1880 cc->cc_cpuindex = index; 1881 cc->cc_hits = 0; 1882 cc->cc_misses = 0; 1883 cc->cc_current = __UNCONST(&pcg_dummy); 1884 cc->cc_previous = __UNCONST(&pcg_dummy); 1885 1886 pc->pc_cpus[index] = cc; 1887 } 1888 1889 /* 1890 * pool_cache_cpu_init: 1891 * 1892 * Called whenever a new CPU is attached. 1893 */ 1894 void 1895 pool_cache_cpu_init(struct cpu_info *ci) 1896 { 1897 pool_cache_t pc; 1898 1899 mutex_enter(&pool_head_lock); 1900 TAILQ_FOREACH(pc, &pool_cache_head, pc_cachelist) { 1901 pc->pc_refcnt++; 1902 mutex_exit(&pool_head_lock); 1903 1904 pool_cache_cpu_init1(ci, pc); 1905 1906 mutex_enter(&pool_head_lock); 1907 pc->pc_refcnt--; 1908 cv_broadcast(&pool_busy); 1909 } 1910 mutex_exit(&pool_head_lock); 1911 } 1912 1913 /* 1914 * pool_cache_reclaim: 1915 * 1916 * Reclaim memory from a pool cache. 1917 */ 1918 bool 1919 pool_cache_reclaim(pool_cache_t pc) 1920 { 1921 1922 return pool_reclaim(&pc->pc_pool); 1923 } 1924 1925 static void 1926 pool_cache_destruct_object1(pool_cache_t pc, void *object) 1927 { 1928 1929 (*pc->pc_dtor)(pc->pc_arg, object); 1930 pool_put(&pc->pc_pool, object); 1931 } 1932 1933 /* 1934 * pool_cache_destruct_object: 1935 * 1936 * Force destruction of an object and its release back into 1937 * the pool. 1938 */ 1939 void 1940 pool_cache_destruct_object(pool_cache_t pc, void *object) 1941 { 1942 1943 FREECHECK_IN(&pc->pc_freecheck, object); 1944 1945 pool_cache_destruct_object1(pc, object); 1946 } 1947 1948 /* 1949 * pool_cache_invalidate_groups: 1950 * 1951 * Invalidate a chain of groups and destruct all objects. 1952 */ 1953 static void 1954 pool_cache_invalidate_groups(pool_cache_t pc, pcg_t *pcg) 1955 { 1956 void *object; 1957 pcg_t *next; 1958 int i; 1959 1960 for (; pcg != NULL; pcg = next) { 1961 next = pcg->pcg_next; 1962 1963 for (i = 0; i < pcg->pcg_avail; i++) { 1964 object = pcg->pcg_objects[i].pcgo_va; 1965 pool_cache_destruct_object1(pc, object); 1966 } 1967 1968 if (pcg->pcg_size == PCG_NOBJECTS_LARGE) { 1969 pool_put(&pcg_large_pool, pcg); 1970 } else { 1971 KASSERT(pcg->pcg_size == PCG_NOBJECTS_NORMAL); 1972 pool_put(&pcg_normal_pool, pcg); 1973 } 1974 } 1975 } 1976 1977 /* 1978 * pool_cache_invalidate: 1979 * 1980 * Invalidate a pool cache (destruct and release all of the 1981 * cached objects). Does not reclaim objects from the pool. 1982 * 1983 * Note: For pool caches that provide constructed objects, there 1984 * is an assumption that another level of synchronization is occurring 1985 * between the input to the constructor and the cache invalidation. 1986 * 1987 * Invalidation is a costly process and should not be called from 1988 * interrupt context. 1989 */ 1990 void 1991 pool_cache_invalidate(pool_cache_t pc) 1992 { 1993 uint64_t where; 1994 pcg_t *full, *empty, *part; 1995 1996 KASSERT(!cpu_intr_p() && !cpu_softintr_p()); 1997 1998 if (ncpu < 2 || !mp_online) { 1999 /* 2000 * We might be called early enough in the boot process 2001 * for the CPU data structures to not be fully initialized. 2002 * In this case, transfer the content of the local CPU's 2003 * cache back into global cache as only this CPU is currently 2004 * running. 2005 */ 2006 pool_cache_transfer(pc); 2007 } else { 2008 /* 2009 * Signal all CPUs that they must transfer their local 2010 * cache back to the global pool then wait for the xcall to 2011 * complete. 2012 */ 2013 where = xc_broadcast(0, (xcfunc_t)pool_cache_transfer, 2014 pc, NULL); 2015 xc_wait(where); 2016 } 2017 2018 /* Empty pool caches, then invalidate objects */ 2019 mutex_enter(&pc->pc_lock); 2020 full = pc->pc_fullgroups; 2021 empty = pc->pc_emptygroups; 2022 part = pc->pc_partgroups; 2023 pc->pc_fullgroups = NULL; 2024 pc->pc_emptygroups = NULL; 2025 pc->pc_partgroups = NULL; 2026 pc->pc_nfull = 0; 2027 pc->pc_nempty = 0; 2028 pc->pc_npart = 0; 2029 mutex_exit(&pc->pc_lock); 2030 2031 pool_cache_invalidate_groups(pc, full); 2032 pool_cache_invalidate_groups(pc, empty); 2033 pool_cache_invalidate_groups(pc, part); 2034 } 2035 2036 /* 2037 * pool_cache_invalidate_cpu: 2038 * 2039 * Invalidate all CPU-bound cached objects in pool cache, the CPU being 2040 * identified by its associated index. 2041 * It is caller's responsibility to ensure that no operation is 2042 * taking place on this pool cache while doing this invalidation. 2043 * WARNING: as no inter-CPU locking is enforced, trying to invalidate 2044 * pool cached objects from a CPU different from the one currently running 2045 * may result in an undefined behaviour. 2046 */ 2047 static void 2048 pool_cache_invalidate_cpu(pool_cache_t pc, u_int index) 2049 { 2050 pool_cache_cpu_t *cc; 2051 pcg_t *pcg; 2052 2053 if ((cc = pc->pc_cpus[index]) == NULL) 2054 return; 2055 2056 if ((pcg = cc->cc_current) != &pcg_dummy) { 2057 pcg->pcg_next = NULL; 2058 pool_cache_invalidate_groups(pc, pcg); 2059 } 2060 if ((pcg = cc->cc_previous) != &pcg_dummy) { 2061 pcg->pcg_next = NULL; 2062 pool_cache_invalidate_groups(pc, pcg); 2063 } 2064 if (cc != &pc->pc_cpu0) 2065 pool_put(&cache_cpu_pool, cc); 2066 2067 } 2068 2069 void 2070 pool_cache_set_drain_hook(pool_cache_t pc, void (*fn)(void *, int), void *arg) 2071 { 2072 2073 pool_set_drain_hook(&pc->pc_pool, fn, arg); 2074 } 2075 2076 void 2077 pool_cache_setlowat(pool_cache_t pc, int n) 2078 { 2079 2080 pool_setlowat(&pc->pc_pool, n); 2081 } 2082 2083 void 2084 pool_cache_sethiwat(pool_cache_t pc, int n) 2085 { 2086 2087 pool_sethiwat(&pc->pc_pool, n); 2088 } 2089 2090 void 2091 pool_cache_sethardlimit(pool_cache_t pc, int n, const char *warnmess, int ratecap) 2092 { 2093 2094 pool_sethardlimit(&pc->pc_pool, n, warnmess, ratecap); 2095 } 2096 2097 static bool __noinline 2098 pool_cache_get_slow(pool_cache_cpu_t *cc, int s, void **objectp, 2099 paddr_t *pap, int flags) 2100 { 2101 pcg_t *pcg, *cur; 2102 uint64_t ncsw; 2103 pool_cache_t pc; 2104 void *object; 2105 2106 KASSERT(cc->cc_current->pcg_avail == 0); 2107 KASSERT(cc->cc_previous->pcg_avail == 0); 2108 2109 pc = cc->cc_cache; 2110 cc->cc_misses++; 2111 2112 /* 2113 * Nothing was available locally. Try and grab a group 2114 * from the cache. 2115 */ 2116 if (__predict_false(!mutex_tryenter(&pc->pc_lock))) { 2117 ncsw = curlwp->l_ncsw; 2118 mutex_enter(&pc->pc_lock); 2119 pc->pc_contended++; 2120 2121 /* 2122 * If we context switched while locking, then 2123 * our view of the per-CPU data is invalid: 2124 * retry. 2125 */ 2126 if (curlwp->l_ncsw != ncsw) { 2127 mutex_exit(&pc->pc_lock); 2128 return true; 2129 } 2130 } 2131 2132 if (__predict_true((pcg = pc->pc_fullgroups) != NULL)) { 2133 /* 2134 * If there's a full group, release our empty 2135 * group back to the cache. Install the full 2136 * group as cc_current and return. 2137 */ 2138 if (__predict_true((cur = cc->cc_current) != &pcg_dummy)) { 2139 KASSERT(cur->pcg_avail == 0); 2140 cur->pcg_next = pc->pc_emptygroups; 2141 pc->pc_emptygroups = cur; 2142 pc->pc_nempty++; 2143 } 2144 KASSERT(pcg->pcg_avail == pcg->pcg_size); 2145 cc->cc_current = pcg; 2146 pc->pc_fullgroups = pcg->pcg_next; 2147 pc->pc_hits++; 2148 pc->pc_nfull--; 2149 mutex_exit(&pc->pc_lock); 2150 return true; 2151 } 2152 2153 /* 2154 * Nothing available locally or in cache. Take the slow 2155 * path: fetch a new object from the pool and construct 2156 * it. 2157 */ 2158 pc->pc_misses++; 2159 mutex_exit(&pc->pc_lock); 2160 splx(s); 2161 2162 object = pool_get(&pc->pc_pool, flags); 2163 *objectp = object; 2164 if (__predict_false(object == NULL)) 2165 return false; 2166 2167 if (__predict_false((*pc->pc_ctor)(pc->pc_arg, object, flags) != 0)) { 2168 pool_put(&pc->pc_pool, object); 2169 *objectp = NULL; 2170 return false; 2171 } 2172 2173 KASSERT((((vaddr_t)object + pc->pc_pool.pr_itemoffset) & 2174 (pc->pc_pool.pr_align - 1)) == 0); 2175 2176 if (pap != NULL) { 2177 #ifdef POOL_VTOPHYS 2178 *pap = POOL_VTOPHYS(object); 2179 #else 2180 *pap = POOL_PADDR_INVALID; 2181 #endif 2182 } 2183 2184 FREECHECK_OUT(&pc->pc_freecheck, object); 2185 return false; 2186 } 2187 2188 /* 2189 * pool_cache_get{,_paddr}: 2190 * 2191 * Get an object from a pool cache (optionally returning 2192 * the physical address of the object). 2193 */ 2194 void * 2195 pool_cache_get_paddr(pool_cache_t pc, int flags, paddr_t *pap) 2196 { 2197 pool_cache_cpu_t *cc; 2198 pcg_t *pcg; 2199 void *object; 2200 int s; 2201 2202 KASSERTMSG((!cpu_intr_p() && !cpu_softintr_p()) || 2203 (pc->pc_pool.pr_ipl != IPL_NONE || cold || panicstr != NULL), 2204 "pool '%s' is IPL_NONE, but called from interrupt context\n", 2205 pc->pc_pool.pr_wchan); 2206 2207 if (flags & PR_WAITOK) { 2208 ASSERT_SLEEPABLE(); 2209 } 2210 2211 /* Lock out interrupts and disable preemption. */ 2212 s = splvm(); 2213 while (/* CONSTCOND */ true) { 2214 /* Try and allocate an object from the current group. */ 2215 cc = pc->pc_cpus[curcpu()->ci_index]; 2216 KASSERT(cc->cc_cache == pc); 2217 pcg = cc->cc_current; 2218 if (__predict_true(pcg->pcg_avail > 0)) { 2219 object = pcg->pcg_objects[--pcg->pcg_avail].pcgo_va; 2220 if (__predict_false(pap != NULL)) 2221 *pap = pcg->pcg_objects[pcg->pcg_avail].pcgo_pa; 2222 #if defined(DIAGNOSTIC) 2223 pcg->pcg_objects[pcg->pcg_avail].pcgo_va = NULL; 2224 KASSERT(pcg->pcg_avail < pcg->pcg_size); 2225 KASSERT(object != NULL); 2226 #endif 2227 cc->cc_hits++; 2228 splx(s); 2229 FREECHECK_OUT(&pc->pc_freecheck, object); 2230 return object; 2231 } 2232 2233 /* 2234 * That failed. If the previous group isn't empty, swap 2235 * it with the current group and allocate from there. 2236 */ 2237 pcg = cc->cc_previous; 2238 if (__predict_true(pcg->pcg_avail > 0)) { 2239 cc->cc_previous = cc->cc_current; 2240 cc->cc_current = pcg; 2241 continue; 2242 } 2243 2244 /* 2245 * Can't allocate from either group: try the slow path. 2246 * If get_slow() allocated an object for us, or if 2247 * no more objects are available, it will return false. 2248 * Otherwise, we need to retry. 2249 */ 2250 if (!pool_cache_get_slow(cc, s, &object, pap, flags)) 2251 break; 2252 } 2253 2254 return object; 2255 } 2256 2257 static bool __noinline 2258 pool_cache_put_slow(pool_cache_cpu_t *cc, int s, void *object) 2259 { 2260 pcg_t *pcg, *cur; 2261 uint64_t ncsw; 2262 pool_cache_t pc; 2263 2264 KASSERT(cc->cc_current->pcg_avail == cc->cc_current->pcg_size); 2265 KASSERT(cc->cc_previous->pcg_avail == cc->cc_previous->pcg_size); 2266 2267 pc = cc->cc_cache; 2268 pcg = NULL; 2269 cc->cc_misses++; 2270 2271 /* 2272 * If there are no empty groups in the cache then allocate one 2273 * while still unlocked. 2274 */ 2275 if (__predict_false(pc->pc_emptygroups == NULL)) { 2276 if (__predict_true(!pool_cache_disable)) { 2277 pcg = pool_get(pc->pc_pcgpool, PR_NOWAIT); 2278 } 2279 if (__predict_true(pcg != NULL)) { 2280 pcg->pcg_avail = 0; 2281 pcg->pcg_size = pc->pc_pcgsize; 2282 } 2283 } 2284 2285 /* Lock the cache. */ 2286 if (__predict_false(!mutex_tryenter(&pc->pc_lock))) { 2287 ncsw = curlwp->l_ncsw; 2288 mutex_enter(&pc->pc_lock); 2289 pc->pc_contended++; 2290 2291 /* 2292 * If we context switched while locking, then our view of 2293 * the per-CPU data is invalid: retry. 2294 */ 2295 if (__predict_false(curlwp->l_ncsw != ncsw)) { 2296 mutex_exit(&pc->pc_lock); 2297 if (pcg != NULL) { 2298 pool_put(pc->pc_pcgpool, pcg); 2299 } 2300 return true; 2301 } 2302 } 2303 2304 /* If there are no empty groups in the cache then allocate one. */ 2305 if (pcg == NULL && pc->pc_emptygroups != NULL) { 2306 pcg = pc->pc_emptygroups; 2307 pc->pc_emptygroups = pcg->pcg_next; 2308 pc->pc_nempty--; 2309 } 2310 2311 /* 2312 * If there's a empty group, release our full group back 2313 * to the cache. Install the empty group to the local CPU 2314 * and return. 2315 */ 2316 if (pcg != NULL) { 2317 KASSERT(pcg->pcg_avail == 0); 2318 if (__predict_false(cc->cc_previous == &pcg_dummy)) { 2319 cc->cc_previous = pcg; 2320 } else { 2321 cur = cc->cc_current; 2322 if (__predict_true(cur != &pcg_dummy)) { 2323 KASSERT(cur->pcg_avail == cur->pcg_size); 2324 cur->pcg_next = pc->pc_fullgroups; 2325 pc->pc_fullgroups = cur; 2326 pc->pc_nfull++; 2327 } 2328 cc->cc_current = pcg; 2329 } 2330 pc->pc_hits++; 2331 mutex_exit(&pc->pc_lock); 2332 return true; 2333 } 2334 2335 /* 2336 * Nothing available locally or in cache, and we didn't 2337 * allocate an empty group. Take the slow path and destroy 2338 * the object here and now. 2339 */ 2340 pc->pc_misses++; 2341 mutex_exit(&pc->pc_lock); 2342 splx(s); 2343 pool_cache_destruct_object(pc, object); 2344 2345 return false; 2346 } 2347 2348 /* 2349 * pool_cache_put{,_paddr}: 2350 * 2351 * Put an object back to the pool cache (optionally caching the 2352 * physical address of the object). 2353 */ 2354 void 2355 pool_cache_put_paddr(pool_cache_t pc, void *object, paddr_t pa) 2356 { 2357 pool_cache_cpu_t *cc; 2358 pcg_t *pcg; 2359 int s; 2360 2361 KASSERT(object != NULL); 2362 FREECHECK_IN(&pc->pc_freecheck, object); 2363 2364 /* Lock out interrupts and disable preemption. */ 2365 s = splvm(); 2366 while (/* CONSTCOND */ true) { 2367 /* If the current group isn't full, release it there. */ 2368 cc = pc->pc_cpus[curcpu()->ci_index]; 2369 KASSERT(cc->cc_cache == pc); 2370 pcg = cc->cc_current; 2371 if (__predict_true(pcg->pcg_avail < pcg->pcg_size)) { 2372 pcg->pcg_objects[pcg->pcg_avail].pcgo_va = object; 2373 pcg->pcg_objects[pcg->pcg_avail].pcgo_pa = pa; 2374 pcg->pcg_avail++; 2375 cc->cc_hits++; 2376 splx(s); 2377 return; 2378 } 2379 2380 /* 2381 * That failed. If the previous group isn't full, swap 2382 * it with the current group and try again. 2383 */ 2384 pcg = cc->cc_previous; 2385 if (__predict_true(pcg->pcg_avail < pcg->pcg_size)) { 2386 cc->cc_previous = cc->cc_current; 2387 cc->cc_current = pcg; 2388 continue; 2389 } 2390 2391 /* 2392 * Can't free to either group: try the slow path. 2393 * If put_slow() releases the object for us, it 2394 * will return false. Otherwise we need to retry. 2395 */ 2396 if (!pool_cache_put_slow(cc, s, object)) 2397 break; 2398 } 2399 } 2400 2401 /* 2402 * pool_cache_transfer: 2403 * 2404 * Transfer objects from the per-CPU cache to the global cache. 2405 * Run within a cross-call thread. 2406 */ 2407 static void 2408 pool_cache_transfer(pool_cache_t pc) 2409 { 2410 pool_cache_cpu_t *cc; 2411 pcg_t *prev, *cur, **list; 2412 int s; 2413 2414 s = splvm(); 2415 mutex_enter(&pc->pc_lock); 2416 cc = pc->pc_cpus[curcpu()->ci_index]; 2417 cur = cc->cc_current; 2418 cc->cc_current = __UNCONST(&pcg_dummy); 2419 prev = cc->cc_previous; 2420 cc->cc_previous = __UNCONST(&pcg_dummy); 2421 if (cur != &pcg_dummy) { 2422 if (cur->pcg_avail == cur->pcg_size) { 2423 list = &pc->pc_fullgroups; 2424 pc->pc_nfull++; 2425 } else if (cur->pcg_avail == 0) { 2426 list = &pc->pc_emptygroups; 2427 pc->pc_nempty++; 2428 } else { 2429 list = &pc->pc_partgroups; 2430 pc->pc_npart++; 2431 } 2432 cur->pcg_next = *list; 2433 *list = cur; 2434 } 2435 if (prev != &pcg_dummy) { 2436 if (prev->pcg_avail == prev->pcg_size) { 2437 list = &pc->pc_fullgroups; 2438 pc->pc_nfull++; 2439 } else if (prev->pcg_avail == 0) { 2440 list = &pc->pc_emptygroups; 2441 pc->pc_nempty++; 2442 } else { 2443 list = &pc->pc_partgroups; 2444 pc->pc_npart++; 2445 } 2446 prev->pcg_next = *list; 2447 *list = prev; 2448 } 2449 mutex_exit(&pc->pc_lock); 2450 splx(s); 2451 } 2452 2453 /* 2454 * Pool backend allocators. 2455 * 2456 * Each pool has a backend allocator that handles allocation, deallocation, 2457 * and any additional draining that might be needed. 2458 * 2459 * We provide two standard allocators: 2460 * 2461 * pool_allocator_kmem - the default when no allocator is specified 2462 * 2463 * pool_allocator_nointr - used for pools that will not be accessed 2464 * in interrupt context. 2465 */ 2466 void *pool_page_alloc(struct pool *, int); 2467 void pool_page_free(struct pool *, void *); 2468 2469 #ifdef POOL_SUBPAGE 2470 struct pool_allocator pool_allocator_kmem_fullpage = { 2471 .pa_alloc = pool_page_alloc, 2472 .pa_free = pool_page_free, 2473 .pa_pagesz = 0 2474 }; 2475 #else 2476 struct pool_allocator pool_allocator_kmem = { 2477 .pa_alloc = pool_page_alloc, 2478 .pa_free = pool_page_free, 2479 .pa_pagesz = 0 2480 }; 2481 #endif 2482 2483 #ifdef POOL_SUBPAGE 2484 struct pool_allocator pool_allocator_nointr_fullpage = { 2485 .pa_alloc = pool_page_alloc, 2486 .pa_free = pool_page_free, 2487 .pa_pagesz = 0 2488 }; 2489 #else 2490 struct pool_allocator pool_allocator_nointr = { 2491 .pa_alloc = pool_page_alloc, 2492 .pa_free = pool_page_free, 2493 .pa_pagesz = 0 2494 }; 2495 #endif 2496 2497 #ifdef POOL_SUBPAGE 2498 void *pool_subpage_alloc(struct pool *, int); 2499 void pool_subpage_free(struct pool *, void *); 2500 2501 struct pool_allocator pool_allocator_kmem = { 2502 .pa_alloc = pool_subpage_alloc, 2503 .pa_free = pool_subpage_free, 2504 .pa_pagesz = POOL_SUBPAGE 2505 }; 2506 2507 struct pool_allocator pool_allocator_nointr = { 2508 .pa_alloc = pool_subpage_alloc, 2509 .pa_free = pool_subpage_free, 2510 .pa_pagesz = POOL_SUBPAGE 2511 }; 2512 #endif /* POOL_SUBPAGE */ 2513 2514 static void * 2515 pool_allocator_alloc(struct pool *pp, int flags) 2516 { 2517 struct pool_allocator *pa = pp->pr_alloc; 2518 void *res; 2519 2520 res = (*pa->pa_alloc)(pp, flags); 2521 if (res == NULL && (flags & PR_WAITOK) == 0) { 2522 /* 2523 * We only run the drain hook here if PR_NOWAIT. 2524 * In other cases, the hook will be run in 2525 * pool_reclaim(). 2526 */ 2527 if (pp->pr_drain_hook != NULL) { 2528 (*pp->pr_drain_hook)(pp->pr_drain_hook_arg, flags); 2529 res = (*pa->pa_alloc)(pp, flags); 2530 } 2531 } 2532 return res; 2533 } 2534 2535 static void 2536 pool_allocator_free(struct pool *pp, void *v) 2537 { 2538 struct pool_allocator *pa = pp->pr_alloc; 2539 2540 (*pa->pa_free)(pp, v); 2541 } 2542 2543 void * 2544 pool_page_alloc(struct pool *pp, int flags) 2545 { 2546 const vm_flag_t vflags = (flags & PR_WAITOK) ? VM_SLEEP: VM_NOSLEEP; 2547 vmem_addr_t va; 2548 int ret; 2549 2550 ret = uvm_km_kmem_alloc(kmem_va_arena, pp->pr_alloc->pa_pagesz, 2551 vflags | VM_INSTANTFIT, &va); 2552 2553 return ret ? NULL : (void *)va; 2554 } 2555 2556 void 2557 pool_page_free(struct pool *pp, void *v) 2558 { 2559 2560 uvm_km_kmem_free(kmem_va_arena, (vaddr_t)v, pp->pr_alloc->pa_pagesz); 2561 } 2562 2563 static void * 2564 pool_page_alloc_meta(struct pool *pp, int flags) 2565 { 2566 const vm_flag_t vflags = (flags & PR_WAITOK) ? VM_SLEEP: VM_NOSLEEP; 2567 vmem_addr_t va; 2568 int ret; 2569 2570 ret = vmem_alloc(kmem_meta_arena, pp->pr_alloc->pa_pagesz, 2571 vflags | VM_INSTANTFIT, &va); 2572 2573 return ret ? NULL : (void *)va; 2574 } 2575 2576 static void 2577 pool_page_free_meta(struct pool *pp, void *v) 2578 { 2579 2580 vmem_free(kmem_meta_arena, (vmem_addr_t)v, pp->pr_alloc->pa_pagesz); 2581 } 2582 2583 #ifdef POOL_SUBPAGE 2584 /* Sub-page allocator, for machines with large hardware pages. */ 2585 void * 2586 pool_subpage_alloc(struct pool *pp, int flags) 2587 { 2588 return pool_get(&psppool, flags); 2589 } 2590 2591 void 2592 pool_subpage_free(struct pool *pp, void *v) 2593 { 2594 pool_put(&psppool, v); 2595 } 2596 2597 #endif /* POOL_SUBPAGE */ 2598 2599 #if defined(DDB) 2600 static bool 2601 pool_in_page(struct pool *pp, struct pool_item_header *ph, uintptr_t addr) 2602 { 2603 2604 return (uintptr_t)ph->ph_page <= addr && 2605 addr < (uintptr_t)ph->ph_page + pp->pr_alloc->pa_pagesz; 2606 } 2607 2608 static bool 2609 pool_in_item(struct pool *pp, void *item, uintptr_t addr) 2610 { 2611 2612 return (uintptr_t)item <= addr && addr < (uintptr_t)item + pp->pr_size; 2613 } 2614 2615 static bool 2616 pool_in_cg(struct pool *pp, struct pool_cache_group *pcg, uintptr_t addr) 2617 { 2618 int i; 2619 2620 if (pcg == NULL) { 2621 return false; 2622 } 2623 for (i = 0; i < pcg->pcg_avail; i++) { 2624 if (pool_in_item(pp, pcg->pcg_objects[i].pcgo_va, addr)) { 2625 return true; 2626 } 2627 } 2628 return false; 2629 } 2630 2631 static bool 2632 pool_allocated(struct pool *pp, struct pool_item_header *ph, uintptr_t addr) 2633 { 2634 2635 if ((pp->pr_roflags & PR_NOTOUCH) != 0) { 2636 unsigned int idx = pr_item_notouch_index(pp, ph, (void *)addr); 2637 pool_item_bitmap_t *bitmap = 2638 ph->ph_bitmap + (idx / BITMAP_SIZE); 2639 pool_item_bitmap_t mask = 1 << (idx & BITMAP_MASK); 2640 2641 return (*bitmap & mask) == 0; 2642 } else { 2643 struct pool_item *pi; 2644 2645 LIST_FOREACH(pi, &ph->ph_itemlist, pi_list) { 2646 if (pool_in_item(pp, pi, addr)) { 2647 return false; 2648 } 2649 } 2650 return true; 2651 } 2652 } 2653 2654 void 2655 pool_whatis(uintptr_t addr, void (*pr)(const char *, ...)) 2656 { 2657 struct pool *pp; 2658 2659 TAILQ_FOREACH(pp, &pool_head, pr_poollist) { 2660 struct pool_item_header *ph; 2661 uintptr_t item; 2662 bool allocated = true; 2663 bool incache = false; 2664 bool incpucache = false; 2665 char cpucachestr[32]; 2666 2667 if ((pp->pr_roflags & PR_PHINPAGE) != 0) { 2668 LIST_FOREACH(ph, &pp->pr_fullpages, ph_pagelist) { 2669 if (pool_in_page(pp, ph, addr)) { 2670 goto found; 2671 } 2672 } 2673 LIST_FOREACH(ph, &pp->pr_partpages, ph_pagelist) { 2674 if (pool_in_page(pp, ph, addr)) { 2675 allocated = 2676 pool_allocated(pp, ph, addr); 2677 goto found; 2678 } 2679 } 2680 LIST_FOREACH(ph, &pp->pr_emptypages, ph_pagelist) { 2681 if (pool_in_page(pp, ph, addr)) { 2682 allocated = false; 2683 goto found; 2684 } 2685 } 2686 continue; 2687 } else { 2688 ph = pr_find_pagehead_noalign(pp, (void *)addr); 2689 if (ph == NULL || !pool_in_page(pp, ph, addr)) { 2690 continue; 2691 } 2692 allocated = pool_allocated(pp, ph, addr); 2693 } 2694 found: 2695 if (allocated && pp->pr_cache) { 2696 pool_cache_t pc = pp->pr_cache; 2697 struct pool_cache_group *pcg; 2698 int i; 2699 2700 for (pcg = pc->pc_fullgroups; pcg != NULL; 2701 pcg = pcg->pcg_next) { 2702 if (pool_in_cg(pp, pcg, addr)) { 2703 incache = true; 2704 goto print; 2705 } 2706 } 2707 for (i = 0; i < __arraycount(pc->pc_cpus); i++) { 2708 pool_cache_cpu_t *cc; 2709 2710 if ((cc = pc->pc_cpus[i]) == NULL) { 2711 continue; 2712 } 2713 if (pool_in_cg(pp, cc->cc_current, addr) || 2714 pool_in_cg(pp, cc->cc_previous, addr)) { 2715 struct cpu_info *ci = 2716 cpu_lookup(i); 2717 2718 incpucache = true; 2719 snprintf(cpucachestr, 2720 sizeof(cpucachestr), 2721 "cached by CPU %u", 2722 ci->ci_index); 2723 goto print; 2724 } 2725 } 2726 } 2727 print: 2728 item = (uintptr_t)ph->ph_page + ph->ph_off; 2729 item = item + rounddown(addr - item, pp->pr_size); 2730 (*pr)("%p is %p+%zu in POOL '%s' (%s)\n", 2731 (void *)addr, item, (size_t)(addr - item), 2732 pp->pr_wchan, 2733 incpucache ? cpucachestr : 2734 incache ? "cached" : allocated ? "allocated" : "free"); 2735 } 2736 } 2737 #endif /* defined(DDB) */ 2738