1 /* $NetBSD: subr_pool.c,v 1.261 2019/10/16 18:29:49 christos Exp $ */ 2 3 /* 4 * Copyright (c) 1997, 1999, 2000, 2002, 2007, 2008, 2010, 2014, 2015, 2018 5 * The NetBSD Foundation, Inc. 6 * All rights reserved. 7 * 8 * This code is derived from software contributed to The NetBSD Foundation 9 * by Paul Kranenburg; by Jason R. Thorpe of the Numerical Aerospace 10 * Simulation Facility, NASA Ames Research Center; by Andrew Doran, and by 11 * Maxime Villard. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 23 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 24 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 25 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 26 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 27 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 28 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 29 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 30 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 31 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 32 * POSSIBILITY OF SUCH DAMAGE. 33 */ 34 35 #include <sys/cdefs.h> 36 __KERNEL_RCSID(0, "$NetBSD: subr_pool.c,v 1.261 2019/10/16 18:29:49 christos Exp $"); 37 38 #ifdef _KERNEL_OPT 39 #include "opt_ddb.h" 40 #include "opt_lockdebug.h" 41 #include "opt_pool.h" 42 #include "opt_kleak.h" 43 #endif 44 45 #include <sys/param.h> 46 #include <sys/systm.h> 47 #include <sys/sysctl.h> 48 #include <sys/bitops.h> 49 #include <sys/proc.h> 50 #include <sys/errno.h> 51 #include <sys/kernel.h> 52 #include <sys/vmem.h> 53 #include <sys/pool.h> 54 #include <sys/syslog.h> 55 #include <sys/debug.h> 56 #include <sys/lockdebug.h> 57 #include <sys/xcall.h> 58 #include <sys/cpu.h> 59 #include <sys/atomic.h> 60 #include <sys/asan.h> 61 62 #include <uvm/uvm_extern.h> 63 64 /* 65 * Pool resource management utility. 66 * 67 * Memory is allocated in pages which are split into pieces according to 68 * the pool item size. Each page is kept on one of three lists in the 69 * pool structure: `pr_emptypages', `pr_fullpages' and `pr_partpages', 70 * for empty, full and partially-full pages respectively. The individual 71 * pool items are on a linked list headed by `ph_itemlist' in each page 72 * header. The memory for building the page list is either taken from 73 * the allocated pages themselves (for small pool items) or taken from 74 * an internal pool of page headers (`phpool'). 75 */ 76 77 /* List of all pools. Non static as needed by 'vmstat -m' */ 78 TAILQ_HEAD(, pool) pool_head = TAILQ_HEAD_INITIALIZER(pool_head); 79 80 /* Private pool for page header structures */ 81 #define PHPOOL_MAX 8 82 static struct pool phpool[PHPOOL_MAX]; 83 #define PHPOOL_FREELIST_NELEM(idx) \ 84 (((idx) == 0) ? BITMAP_MIN_SIZE : BITMAP_SIZE * (1 << (idx))) 85 86 #if defined(DIAGNOSTIC) || defined(KASAN) 87 #define POOL_REDZONE 88 #endif 89 90 #ifdef POOL_REDZONE 91 # ifdef KASAN 92 # define POOL_REDZONE_SIZE 8 93 # else 94 # define POOL_REDZONE_SIZE 2 95 # endif 96 static void pool_redzone_init(struct pool *, size_t); 97 static void pool_redzone_fill(struct pool *, void *); 98 static void pool_redzone_check(struct pool *, void *); 99 static void pool_cache_redzone_check(pool_cache_t, void *); 100 #else 101 # define pool_redzone_init(pp, sz) __nothing 102 # define pool_redzone_fill(pp, ptr) __nothing 103 # define pool_redzone_check(pp, ptr) __nothing 104 # define pool_cache_redzone_check(pc, ptr) __nothing 105 #endif 106 107 #ifdef KLEAK 108 static void pool_kleak_fill(struct pool *, void *); 109 static void pool_cache_kleak_fill(pool_cache_t, void *); 110 #else 111 #define pool_kleak_fill(pp, ptr) __nothing 112 #define pool_cache_kleak_fill(pc, ptr) __nothing 113 #endif 114 115 #ifdef POOL_QUARANTINE 116 static void pool_quarantine_init(struct pool *); 117 static void pool_quarantine_flush(struct pool *); 118 static bool pool_put_quarantine(struct pool *, void *, 119 struct pool_pagelist *); 120 static bool pool_cache_put_quarantine(pool_cache_t, void *, paddr_t); 121 #else 122 #define pool_quarantine_init(a) __nothing 123 #define pool_quarantine_flush(a) __nothing 124 #define pool_put_quarantine(a, b, c) false 125 #define pool_cache_put_quarantine(a, b, c) false 126 #endif 127 128 #define NO_CTOR __FPTRCAST(int (*)(void *, void *, int), nullop) 129 #define NO_DTOR __FPTRCAST(void (*)(void *, void *), nullop) 130 131 #if defined(KASAN) || defined(KLEAK) 132 #define pc_has_ctor(pc) ((pc)->pc_ctor != NO_CTOR) 133 #define pc_has_dtor(pc) ((pc)->pc_dtor != NO_DTOR) 134 #endif 135 136 /* 137 * Pool backend allocators. 138 * 139 * Each pool has a backend allocator that handles allocation, deallocation, 140 * and any additional draining that might be needed. 141 * 142 * We provide two standard allocators: 143 * 144 * pool_allocator_kmem - the default when no allocator is specified 145 * 146 * pool_allocator_nointr - used for pools that will not be accessed 147 * in interrupt context. 148 */ 149 void *pool_page_alloc(struct pool *, int); 150 void pool_page_free(struct pool *, void *); 151 152 static void *pool_page_alloc_meta(struct pool *, int); 153 static void pool_page_free_meta(struct pool *, void *); 154 155 struct pool_allocator pool_allocator_kmem = { 156 .pa_alloc = pool_page_alloc, 157 .pa_free = pool_page_free, 158 .pa_pagesz = 0 159 }; 160 161 struct pool_allocator pool_allocator_nointr = { 162 .pa_alloc = pool_page_alloc, 163 .pa_free = pool_page_free, 164 .pa_pagesz = 0 165 }; 166 167 struct pool_allocator pool_allocator_meta = { 168 .pa_alloc = pool_page_alloc_meta, 169 .pa_free = pool_page_free_meta, 170 .pa_pagesz = 0 171 }; 172 173 #define POOL_ALLOCATOR_BIG_BASE 13 174 static struct pool_allocator pool_allocator_big[] = { 175 { 176 .pa_alloc = pool_page_alloc, 177 .pa_free = pool_page_free, 178 .pa_pagesz = 1 << (POOL_ALLOCATOR_BIG_BASE + 0), 179 }, 180 { 181 .pa_alloc = pool_page_alloc, 182 .pa_free = pool_page_free, 183 .pa_pagesz = 1 << (POOL_ALLOCATOR_BIG_BASE + 1), 184 }, 185 { 186 .pa_alloc = pool_page_alloc, 187 .pa_free = pool_page_free, 188 .pa_pagesz = 1 << (POOL_ALLOCATOR_BIG_BASE + 2), 189 }, 190 { 191 .pa_alloc = pool_page_alloc, 192 .pa_free = pool_page_free, 193 .pa_pagesz = 1 << (POOL_ALLOCATOR_BIG_BASE + 3), 194 }, 195 { 196 .pa_alloc = pool_page_alloc, 197 .pa_free = pool_page_free, 198 .pa_pagesz = 1 << (POOL_ALLOCATOR_BIG_BASE + 4), 199 }, 200 { 201 .pa_alloc = pool_page_alloc, 202 .pa_free = pool_page_free, 203 .pa_pagesz = 1 << (POOL_ALLOCATOR_BIG_BASE + 5), 204 }, 205 { 206 .pa_alloc = pool_page_alloc, 207 .pa_free = pool_page_free, 208 .pa_pagesz = 1 << (POOL_ALLOCATOR_BIG_BASE + 6), 209 }, 210 { 211 .pa_alloc = pool_page_alloc, 212 .pa_free = pool_page_free, 213 .pa_pagesz = 1 << (POOL_ALLOCATOR_BIG_BASE + 7), 214 } 215 }; 216 217 static int pool_bigidx(size_t); 218 219 /* # of seconds to retain page after last use */ 220 int pool_inactive_time = 10; 221 222 /* Next candidate for drainage (see pool_drain()) */ 223 static struct pool *drainpp; 224 225 /* This lock protects both pool_head and drainpp. */ 226 static kmutex_t pool_head_lock; 227 static kcondvar_t pool_busy; 228 229 /* This lock protects initialization of a potentially shared pool allocator */ 230 static kmutex_t pool_allocator_lock; 231 232 static unsigned int poolid_counter = 0; 233 234 typedef uint32_t pool_item_bitmap_t; 235 #define BITMAP_SIZE (CHAR_BIT * sizeof(pool_item_bitmap_t)) 236 #define BITMAP_MASK (BITMAP_SIZE - 1) 237 #define BITMAP_MIN_SIZE (CHAR_BIT * sizeof(((struct pool_item_header *)NULL)->ph_u2)) 238 239 struct pool_item_header { 240 /* Page headers */ 241 LIST_ENTRY(pool_item_header) 242 ph_pagelist; /* pool page list */ 243 union { 244 /* !PR_PHINPAGE */ 245 struct { 246 SPLAY_ENTRY(pool_item_header) 247 phu_node; /* off-page page headers */ 248 } phu_offpage; 249 /* PR_PHINPAGE */ 250 struct { 251 unsigned int phu_poolid; 252 } phu_onpage; 253 } ph_u1; 254 void * ph_page; /* this page's address */ 255 uint32_t ph_time; /* last referenced */ 256 uint16_t ph_nmissing; /* # of chunks in use */ 257 uint16_t ph_off; /* start offset in page */ 258 union { 259 /* !PR_USEBMAP */ 260 struct { 261 LIST_HEAD(, pool_item) 262 phu_itemlist; /* chunk list for this page */ 263 } phu_normal; 264 /* PR_USEBMAP */ 265 struct { 266 pool_item_bitmap_t phu_bitmap[1]; 267 } phu_notouch; 268 } ph_u2; 269 }; 270 #define ph_node ph_u1.phu_offpage.phu_node 271 #define ph_poolid ph_u1.phu_onpage.phu_poolid 272 #define ph_itemlist ph_u2.phu_normal.phu_itemlist 273 #define ph_bitmap ph_u2.phu_notouch.phu_bitmap 274 275 #define PHSIZE ALIGN(sizeof(struct pool_item_header)) 276 277 CTASSERT(offsetof(struct pool_item_header, ph_u2) + 278 BITMAP_MIN_SIZE / CHAR_BIT == sizeof(struct pool_item_header)); 279 280 #if defined(DIAGNOSTIC) && !defined(KASAN) 281 #define POOL_CHECK_MAGIC 282 #endif 283 284 struct pool_item { 285 #ifdef POOL_CHECK_MAGIC 286 u_int pi_magic; 287 #endif 288 #define PI_MAGIC 0xdeaddeadU 289 /* Other entries use only this list entry */ 290 LIST_ENTRY(pool_item) pi_list; 291 }; 292 293 #define POOL_NEEDS_CATCHUP(pp) \ 294 ((pp)->pr_nitems < (pp)->pr_minitems) 295 #define POOL_OBJ_TO_PAGE(pp, v) \ 296 (void *)((uintptr_t)v & pp->pr_alloc->pa_pagemask) 297 298 /* 299 * Pool cache management. 300 * 301 * Pool caches provide a way for constructed objects to be cached by the 302 * pool subsystem. This can lead to performance improvements by avoiding 303 * needless object construction/destruction; it is deferred until absolutely 304 * necessary. 305 * 306 * Caches are grouped into cache groups. Each cache group references up 307 * to PCG_NUMOBJECTS constructed objects. When a cache allocates an 308 * object from the pool, it calls the object's constructor and places it 309 * into a cache group. When a cache group frees an object back to the 310 * pool, it first calls the object's destructor. This allows the object 311 * to persist in constructed form while freed to the cache. 312 * 313 * The pool references each cache, so that when a pool is drained by the 314 * pagedaemon, it can drain each individual cache as well. Each time a 315 * cache is drained, the most idle cache group is freed to the pool in 316 * its entirety. 317 * 318 * Pool caches are layed on top of pools. By layering them, we can avoid 319 * the complexity of cache management for pools which would not benefit 320 * from it. 321 */ 322 323 static struct pool pcg_normal_pool; 324 static struct pool pcg_large_pool; 325 static struct pool cache_pool; 326 static struct pool cache_cpu_pool; 327 328 /* List of all caches. */ 329 TAILQ_HEAD(,pool_cache) pool_cache_head = 330 TAILQ_HEAD_INITIALIZER(pool_cache_head); 331 332 int pool_cache_disable; /* global disable for caching */ 333 static const pcg_t pcg_dummy; /* zero sized: always empty, yet always full */ 334 335 static bool pool_cache_put_slow(pool_cache_cpu_t *, int, 336 void *); 337 static bool pool_cache_get_slow(pool_cache_cpu_t *, int, 338 void **, paddr_t *, int); 339 static void pool_cache_cpu_init1(struct cpu_info *, pool_cache_t); 340 static void pool_cache_invalidate_groups(pool_cache_t, pcg_t *); 341 static void pool_cache_invalidate_cpu(pool_cache_t, u_int); 342 static void pool_cache_transfer(pool_cache_t); 343 344 static int pool_catchup(struct pool *); 345 static void pool_prime_page(struct pool *, void *, 346 struct pool_item_header *); 347 static void pool_update_curpage(struct pool *); 348 349 static int pool_grow(struct pool *, int); 350 static void *pool_allocator_alloc(struct pool *, int); 351 static void pool_allocator_free(struct pool *, void *); 352 353 static void pool_print_pagelist(struct pool *, struct pool_pagelist *, 354 void (*)(const char *, ...) __printflike(1, 2)); 355 static void pool_print1(struct pool *, const char *, 356 void (*)(const char *, ...) __printflike(1, 2)); 357 358 static int pool_chk_page(struct pool *, const char *, 359 struct pool_item_header *); 360 361 /* -------------------------------------------------------------------------- */ 362 363 static inline unsigned int 364 pr_item_bitmap_index(const struct pool *pp, const struct pool_item_header *ph, 365 const void *v) 366 { 367 const char *cp = v; 368 unsigned int idx; 369 370 KASSERT(pp->pr_roflags & PR_USEBMAP); 371 idx = (cp - (char *)ph->ph_page - ph->ph_off) / pp->pr_size; 372 373 if (__predict_false(idx >= pp->pr_itemsperpage)) { 374 panic("%s: [%s] %u >= %u", __func__, pp->pr_wchan, idx, 375 pp->pr_itemsperpage); 376 } 377 378 return idx; 379 } 380 381 static inline void 382 pr_item_bitmap_put(const struct pool *pp, struct pool_item_header *ph, 383 void *obj) 384 { 385 unsigned int idx = pr_item_bitmap_index(pp, ph, obj); 386 pool_item_bitmap_t *bitmap = ph->ph_bitmap + (idx / BITMAP_SIZE); 387 pool_item_bitmap_t mask = 1U << (idx & BITMAP_MASK); 388 389 if (__predict_false((*bitmap & mask) != 0)) { 390 panic("%s: [%s] %p already freed", __func__, pp->pr_wchan, obj); 391 } 392 393 *bitmap |= mask; 394 } 395 396 static inline void * 397 pr_item_bitmap_get(const struct pool *pp, struct pool_item_header *ph) 398 { 399 pool_item_bitmap_t *bitmap = ph->ph_bitmap; 400 unsigned int idx; 401 int i; 402 403 for (i = 0; ; i++) { 404 int bit; 405 406 KASSERT((i * BITMAP_SIZE) < pp->pr_itemsperpage); 407 bit = ffs32(bitmap[i]); 408 if (bit) { 409 pool_item_bitmap_t mask; 410 411 bit--; 412 idx = (i * BITMAP_SIZE) + bit; 413 mask = 1U << bit; 414 KASSERT((bitmap[i] & mask) != 0); 415 bitmap[i] &= ~mask; 416 break; 417 } 418 } 419 KASSERT(idx < pp->pr_itemsperpage); 420 return (char *)ph->ph_page + ph->ph_off + idx * pp->pr_size; 421 } 422 423 static inline void 424 pr_item_bitmap_init(const struct pool *pp, struct pool_item_header *ph) 425 { 426 pool_item_bitmap_t *bitmap = ph->ph_bitmap; 427 const int n = howmany(pp->pr_itemsperpage, BITMAP_SIZE); 428 int i; 429 430 for (i = 0; i < n; i++) { 431 bitmap[i] = (pool_item_bitmap_t)-1; 432 } 433 } 434 435 /* -------------------------------------------------------------------------- */ 436 437 static inline void 438 pr_item_linkedlist_put(const struct pool *pp, struct pool_item_header *ph, 439 void *obj) 440 { 441 struct pool_item *pi = obj; 442 443 #ifdef POOL_CHECK_MAGIC 444 pi->pi_magic = PI_MAGIC; 445 #endif 446 447 if (pp->pr_redzone) { 448 /* 449 * Mark the pool_item as valid. The rest is already 450 * invalid. 451 */ 452 kasan_mark(pi, sizeof(*pi), sizeof(*pi), 0); 453 } 454 455 LIST_INSERT_HEAD(&ph->ph_itemlist, pi, pi_list); 456 } 457 458 static inline void * 459 pr_item_linkedlist_get(struct pool *pp, struct pool_item_header *ph) 460 { 461 struct pool_item *pi; 462 void *v; 463 464 v = pi = LIST_FIRST(&ph->ph_itemlist); 465 if (__predict_false(v == NULL)) { 466 mutex_exit(&pp->pr_lock); 467 panic("%s: [%s] page empty", __func__, pp->pr_wchan); 468 } 469 KASSERTMSG((pp->pr_nitems > 0), 470 "%s: [%s] nitems %u inconsistent on itemlist", 471 __func__, pp->pr_wchan, pp->pr_nitems); 472 #ifdef POOL_CHECK_MAGIC 473 KASSERTMSG((pi->pi_magic == PI_MAGIC), 474 "%s: [%s] free list modified: " 475 "magic=%x; page %p; item addr %p", __func__, 476 pp->pr_wchan, pi->pi_magic, ph->ph_page, pi); 477 #endif 478 479 /* 480 * Remove from item list. 481 */ 482 LIST_REMOVE(pi, pi_list); 483 484 return v; 485 } 486 487 /* -------------------------------------------------------------------------- */ 488 489 static inline void 490 pr_phinpage_check(struct pool *pp, struct pool_item_header *ph, void *page, 491 void *object) 492 { 493 if (__predict_false((void *)ph->ph_page != page)) { 494 panic("%s: [%s] item %p not part of pool", __func__, 495 pp->pr_wchan, object); 496 } 497 if (__predict_false((char *)object < (char *)page + ph->ph_off)) { 498 panic("%s: [%s] item %p below item space", __func__, 499 pp->pr_wchan, object); 500 } 501 if (__predict_false(ph->ph_poolid != pp->pr_poolid)) { 502 panic("%s: [%s] item %p poolid %u != %u", __func__, 503 pp->pr_wchan, object, ph->ph_poolid, pp->pr_poolid); 504 } 505 } 506 507 static inline void 508 pc_phinpage_check(pool_cache_t pc, void *object) 509 { 510 struct pool_item_header *ph; 511 struct pool *pp; 512 void *page; 513 514 pp = &pc->pc_pool; 515 page = POOL_OBJ_TO_PAGE(pp, object); 516 ph = (struct pool_item_header *)page; 517 518 pr_phinpage_check(pp, ph, page, object); 519 } 520 521 /* -------------------------------------------------------------------------- */ 522 523 static inline int 524 phtree_compare(struct pool_item_header *a, struct pool_item_header *b) 525 { 526 527 /* 528 * We consider pool_item_header with smaller ph_page bigger. This 529 * unnatural ordering is for the benefit of pr_find_pagehead. 530 */ 531 if (a->ph_page < b->ph_page) 532 return 1; 533 else if (a->ph_page > b->ph_page) 534 return -1; 535 else 536 return 0; 537 } 538 539 SPLAY_PROTOTYPE(phtree, pool_item_header, ph_node, phtree_compare); 540 SPLAY_GENERATE(phtree, pool_item_header, ph_node, phtree_compare); 541 542 static inline struct pool_item_header * 543 pr_find_pagehead_noalign(struct pool *pp, void *v) 544 { 545 struct pool_item_header *ph, tmp; 546 547 tmp.ph_page = (void *)(uintptr_t)v; 548 ph = SPLAY_FIND(phtree, &pp->pr_phtree, &tmp); 549 if (ph == NULL) { 550 ph = SPLAY_ROOT(&pp->pr_phtree); 551 if (ph != NULL && phtree_compare(&tmp, ph) >= 0) { 552 ph = SPLAY_NEXT(phtree, &pp->pr_phtree, ph); 553 } 554 KASSERT(ph == NULL || phtree_compare(&tmp, ph) < 0); 555 } 556 557 return ph; 558 } 559 560 /* 561 * Return the pool page header based on item address. 562 */ 563 static inline struct pool_item_header * 564 pr_find_pagehead(struct pool *pp, void *v) 565 { 566 struct pool_item_header *ph, tmp; 567 568 if ((pp->pr_roflags & PR_NOALIGN) != 0) { 569 ph = pr_find_pagehead_noalign(pp, v); 570 } else { 571 void *page = POOL_OBJ_TO_PAGE(pp, v); 572 if ((pp->pr_roflags & PR_PHINPAGE) != 0) { 573 ph = (struct pool_item_header *)page; 574 pr_phinpage_check(pp, ph, page, v); 575 } else { 576 tmp.ph_page = page; 577 ph = SPLAY_FIND(phtree, &pp->pr_phtree, &tmp); 578 } 579 } 580 581 KASSERT(ph == NULL || ((pp->pr_roflags & PR_PHINPAGE) != 0) || 582 ((char *)ph->ph_page <= (char *)v && 583 (char *)v < (char *)ph->ph_page + pp->pr_alloc->pa_pagesz)); 584 return ph; 585 } 586 587 static void 588 pr_pagelist_free(struct pool *pp, struct pool_pagelist *pq) 589 { 590 struct pool_item_header *ph; 591 592 while ((ph = LIST_FIRST(pq)) != NULL) { 593 LIST_REMOVE(ph, ph_pagelist); 594 pool_allocator_free(pp, ph->ph_page); 595 if ((pp->pr_roflags & PR_PHINPAGE) == 0) 596 pool_put(pp->pr_phpool, ph); 597 } 598 } 599 600 /* 601 * Remove a page from the pool. 602 */ 603 static inline void 604 pr_rmpage(struct pool *pp, struct pool_item_header *ph, 605 struct pool_pagelist *pq) 606 { 607 608 KASSERT(mutex_owned(&pp->pr_lock)); 609 610 /* 611 * If the page was idle, decrement the idle page count. 612 */ 613 if (ph->ph_nmissing == 0) { 614 KASSERT(pp->pr_nidle != 0); 615 KASSERTMSG((pp->pr_nitems >= pp->pr_itemsperpage), 616 "%s: [%s] nitems=%u < itemsperpage=%u", __func__, 617 pp->pr_wchan, pp->pr_nitems, pp->pr_itemsperpage); 618 pp->pr_nidle--; 619 } 620 621 pp->pr_nitems -= pp->pr_itemsperpage; 622 623 /* 624 * Unlink the page from the pool and queue it for release. 625 */ 626 LIST_REMOVE(ph, ph_pagelist); 627 if (pp->pr_roflags & PR_PHINPAGE) { 628 if (__predict_false(ph->ph_poolid != pp->pr_poolid)) { 629 panic("%s: [%s] ph %p poolid %u != %u", 630 __func__, pp->pr_wchan, ph, ph->ph_poolid, 631 pp->pr_poolid); 632 } 633 } else { 634 SPLAY_REMOVE(phtree, &pp->pr_phtree, ph); 635 } 636 LIST_INSERT_HEAD(pq, ph, ph_pagelist); 637 638 pp->pr_npages--; 639 pp->pr_npagefree++; 640 641 pool_update_curpage(pp); 642 } 643 644 /* 645 * Initialize all the pools listed in the "pools" link set. 646 */ 647 void 648 pool_subsystem_init(void) 649 { 650 size_t size; 651 int idx; 652 653 mutex_init(&pool_head_lock, MUTEX_DEFAULT, IPL_NONE); 654 mutex_init(&pool_allocator_lock, MUTEX_DEFAULT, IPL_NONE); 655 cv_init(&pool_busy, "poolbusy"); 656 657 /* 658 * Initialize private page header pool and cache magazine pool if we 659 * haven't done so yet. 660 */ 661 for (idx = 0; idx < PHPOOL_MAX; idx++) { 662 static char phpool_names[PHPOOL_MAX][6+1+6+1]; 663 int nelem; 664 size_t sz; 665 666 nelem = PHPOOL_FREELIST_NELEM(idx); 667 KASSERT(nelem != 0); 668 snprintf(phpool_names[idx], sizeof(phpool_names[idx]), 669 "phpool-%d", nelem); 670 sz = offsetof(struct pool_item_header, 671 ph_bitmap[howmany(nelem, BITMAP_SIZE)]); 672 pool_init(&phpool[idx], sz, 0, 0, 0, 673 phpool_names[idx], &pool_allocator_meta, IPL_VM); 674 } 675 676 size = sizeof(pcg_t) + 677 (PCG_NOBJECTS_NORMAL - 1) * sizeof(pcgpair_t); 678 pool_init(&pcg_normal_pool, size, coherency_unit, 0, 0, 679 "pcgnormal", &pool_allocator_meta, IPL_VM); 680 681 size = sizeof(pcg_t) + 682 (PCG_NOBJECTS_LARGE - 1) * sizeof(pcgpair_t); 683 pool_init(&pcg_large_pool, size, coherency_unit, 0, 0, 684 "pcglarge", &pool_allocator_meta, IPL_VM); 685 686 pool_init(&cache_pool, sizeof(struct pool_cache), coherency_unit, 687 0, 0, "pcache", &pool_allocator_meta, IPL_NONE); 688 689 pool_init(&cache_cpu_pool, sizeof(pool_cache_cpu_t), coherency_unit, 690 0, 0, "pcachecpu", &pool_allocator_meta, IPL_NONE); 691 } 692 693 static inline bool 694 pool_init_is_phinpage(const struct pool *pp) 695 { 696 size_t pagesize; 697 698 if (pp->pr_roflags & PR_PHINPAGE) { 699 return true; 700 } 701 if (pp->pr_roflags & (PR_NOTOUCH | PR_NOALIGN)) { 702 return false; 703 } 704 705 pagesize = pp->pr_alloc->pa_pagesz; 706 707 /* 708 * Threshold: the item size is below 1/16 of a page size, and below 709 * 8 times the page header size. The latter ensures we go off-page 710 * if the page header would make us waste a rather big item. 711 */ 712 if (pp->pr_size < MIN(pagesize / 16, PHSIZE * 8)) { 713 return true; 714 } 715 716 /* Put the header into the page if it doesn't waste any items. */ 717 if (pagesize / pp->pr_size == (pagesize - PHSIZE) / pp->pr_size) { 718 return true; 719 } 720 721 return false; 722 } 723 724 static inline bool 725 pool_init_is_usebmap(const struct pool *pp) 726 { 727 size_t bmapsize; 728 729 if (pp->pr_roflags & PR_NOTOUCH) { 730 return true; 731 } 732 733 /* 734 * If we're off-page, go with a bitmap. 735 */ 736 if (!(pp->pr_roflags & PR_PHINPAGE)) { 737 return true; 738 } 739 740 /* 741 * If we're on-page, and the page header can already contain a bitmap 742 * big enough to cover all the items of the page, go with a bitmap. 743 */ 744 bmapsize = roundup(PHSIZE, pp->pr_align) - 745 offsetof(struct pool_item_header, ph_bitmap[0]); 746 KASSERT(bmapsize % sizeof(pool_item_bitmap_t) == 0); 747 if (pp->pr_itemsperpage <= bmapsize * CHAR_BIT) { 748 return true; 749 } 750 751 return false; 752 } 753 754 /* 755 * Initialize the given pool resource structure. 756 * 757 * We export this routine to allow other kernel parts to declare 758 * static pools that must be initialized before kmem(9) is available. 759 */ 760 void 761 pool_init(struct pool *pp, size_t size, u_int align, u_int ioff, int flags, 762 const char *wchan, struct pool_allocator *palloc, int ipl) 763 { 764 struct pool *pp1; 765 size_t prsize; 766 int itemspace, slack; 767 768 /* XXX ioff will be removed. */ 769 KASSERT(ioff == 0); 770 771 #ifdef DEBUG 772 if (__predict_true(!cold)) 773 mutex_enter(&pool_head_lock); 774 /* 775 * Check that the pool hasn't already been initialised and 776 * added to the list of all pools. 777 */ 778 TAILQ_FOREACH(pp1, &pool_head, pr_poollist) { 779 if (pp == pp1) 780 panic("%s: [%s] already initialised", __func__, 781 wchan); 782 } 783 if (__predict_true(!cold)) 784 mutex_exit(&pool_head_lock); 785 #endif 786 787 if (palloc == NULL) 788 palloc = &pool_allocator_kmem; 789 790 if (!cold) 791 mutex_enter(&pool_allocator_lock); 792 if (palloc->pa_refcnt++ == 0) { 793 if (palloc->pa_pagesz == 0) 794 palloc->pa_pagesz = PAGE_SIZE; 795 796 TAILQ_INIT(&palloc->pa_list); 797 798 mutex_init(&palloc->pa_lock, MUTEX_DEFAULT, IPL_VM); 799 palloc->pa_pagemask = ~(palloc->pa_pagesz - 1); 800 palloc->pa_pageshift = ffs(palloc->pa_pagesz) - 1; 801 } 802 if (!cold) 803 mutex_exit(&pool_allocator_lock); 804 805 if (align == 0) 806 align = ALIGN(1); 807 808 prsize = size; 809 if ((flags & PR_NOTOUCH) == 0 && prsize < sizeof(struct pool_item)) 810 prsize = sizeof(struct pool_item); 811 812 prsize = roundup(prsize, align); 813 KASSERTMSG((prsize <= palloc->pa_pagesz), 814 "%s: [%s] pool item size (%zu) larger than page size (%u)", 815 __func__, wchan, prsize, palloc->pa_pagesz); 816 817 /* 818 * Initialize the pool structure. 819 */ 820 LIST_INIT(&pp->pr_emptypages); 821 LIST_INIT(&pp->pr_fullpages); 822 LIST_INIT(&pp->pr_partpages); 823 pp->pr_cache = NULL; 824 pp->pr_curpage = NULL; 825 pp->pr_npages = 0; 826 pp->pr_minitems = 0; 827 pp->pr_minpages = 0; 828 pp->pr_maxpages = UINT_MAX; 829 pp->pr_roflags = flags; 830 pp->pr_flags = 0; 831 pp->pr_size = prsize; 832 pp->pr_reqsize = size; 833 pp->pr_align = align; 834 pp->pr_wchan = wchan; 835 pp->pr_alloc = palloc; 836 pp->pr_poolid = atomic_inc_uint_nv(&poolid_counter); 837 pp->pr_nitems = 0; 838 pp->pr_nout = 0; 839 pp->pr_hardlimit = UINT_MAX; 840 pp->pr_hardlimit_warning = NULL; 841 pp->pr_hardlimit_ratecap.tv_sec = 0; 842 pp->pr_hardlimit_ratecap.tv_usec = 0; 843 pp->pr_hardlimit_warning_last.tv_sec = 0; 844 pp->pr_hardlimit_warning_last.tv_usec = 0; 845 pp->pr_drain_hook = NULL; 846 pp->pr_drain_hook_arg = NULL; 847 pp->pr_freecheck = NULL; 848 pp->pr_redzone = false; 849 pool_redzone_init(pp, size); 850 pool_quarantine_init(pp); 851 852 /* 853 * Decide whether to put the page header off-page to avoid wasting too 854 * large a part of the page or too big an item. Off-page page headers 855 * go on a hash table, so we can match a returned item with its header 856 * based on the page address. 857 */ 858 if (pool_init_is_phinpage(pp)) { 859 /* Use the beginning of the page for the page header */ 860 itemspace = palloc->pa_pagesz - roundup(PHSIZE, align); 861 pp->pr_itemoffset = roundup(PHSIZE, align); 862 pp->pr_roflags |= PR_PHINPAGE; 863 } else { 864 /* The page header will be taken from our page header pool */ 865 itemspace = palloc->pa_pagesz; 866 pp->pr_itemoffset = 0; 867 SPLAY_INIT(&pp->pr_phtree); 868 } 869 870 pp->pr_itemsperpage = itemspace / pp->pr_size; 871 KASSERT(pp->pr_itemsperpage != 0); 872 873 /* 874 * Decide whether to use a bitmap or a linked list to manage freed 875 * items. 876 */ 877 if (pool_init_is_usebmap(pp)) { 878 pp->pr_roflags |= PR_USEBMAP; 879 } 880 881 /* 882 * If we're off-page, then we're using a bitmap; choose the appropriate 883 * pool to allocate page headers, whose size varies depending on the 884 * bitmap. If we're on-page, nothing to do. 885 */ 886 if (!(pp->pr_roflags & PR_PHINPAGE)) { 887 int idx; 888 889 KASSERT(pp->pr_roflags & PR_USEBMAP); 890 891 for (idx = 0; pp->pr_itemsperpage > PHPOOL_FREELIST_NELEM(idx); 892 idx++) { 893 /* nothing */ 894 } 895 if (idx >= PHPOOL_MAX) { 896 /* 897 * if you see this panic, consider to tweak 898 * PHPOOL_MAX and PHPOOL_FREELIST_NELEM. 899 */ 900 panic("%s: [%s] too large itemsperpage(%d) for " 901 "PR_USEBMAP", __func__, 902 pp->pr_wchan, pp->pr_itemsperpage); 903 } 904 pp->pr_phpool = &phpool[idx]; 905 } else { 906 pp->pr_phpool = NULL; 907 } 908 909 /* 910 * Use the slack between the chunks and the page header 911 * for "cache coloring". 912 */ 913 slack = itemspace - pp->pr_itemsperpage * pp->pr_size; 914 pp->pr_maxcolor = rounddown(slack, align); 915 pp->pr_curcolor = 0; 916 917 pp->pr_nget = 0; 918 pp->pr_nfail = 0; 919 pp->pr_nput = 0; 920 pp->pr_npagealloc = 0; 921 pp->pr_npagefree = 0; 922 pp->pr_hiwat = 0; 923 pp->pr_nidle = 0; 924 pp->pr_refcnt = 0; 925 926 mutex_init(&pp->pr_lock, MUTEX_DEFAULT, ipl); 927 cv_init(&pp->pr_cv, wchan); 928 pp->pr_ipl = ipl; 929 930 /* Insert into the list of all pools. */ 931 if (!cold) 932 mutex_enter(&pool_head_lock); 933 TAILQ_FOREACH(pp1, &pool_head, pr_poollist) { 934 if (strcmp(pp1->pr_wchan, pp->pr_wchan) > 0) 935 break; 936 } 937 if (pp1 == NULL) 938 TAILQ_INSERT_TAIL(&pool_head, pp, pr_poollist); 939 else 940 TAILQ_INSERT_BEFORE(pp1, pp, pr_poollist); 941 if (!cold) 942 mutex_exit(&pool_head_lock); 943 944 /* Insert this into the list of pools using this allocator. */ 945 if (!cold) 946 mutex_enter(&palloc->pa_lock); 947 TAILQ_INSERT_TAIL(&palloc->pa_list, pp, pr_alloc_list); 948 if (!cold) 949 mutex_exit(&palloc->pa_lock); 950 } 951 952 /* 953 * De-commision a pool resource. 954 */ 955 void 956 pool_destroy(struct pool *pp) 957 { 958 struct pool_pagelist pq; 959 struct pool_item_header *ph; 960 961 pool_quarantine_flush(pp); 962 963 /* Remove from global pool list */ 964 mutex_enter(&pool_head_lock); 965 while (pp->pr_refcnt != 0) 966 cv_wait(&pool_busy, &pool_head_lock); 967 TAILQ_REMOVE(&pool_head, pp, pr_poollist); 968 if (drainpp == pp) 969 drainpp = NULL; 970 mutex_exit(&pool_head_lock); 971 972 /* Remove this pool from its allocator's list of pools. */ 973 mutex_enter(&pp->pr_alloc->pa_lock); 974 TAILQ_REMOVE(&pp->pr_alloc->pa_list, pp, pr_alloc_list); 975 mutex_exit(&pp->pr_alloc->pa_lock); 976 977 mutex_enter(&pool_allocator_lock); 978 if (--pp->pr_alloc->pa_refcnt == 0) 979 mutex_destroy(&pp->pr_alloc->pa_lock); 980 mutex_exit(&pool_allocator_lock); 981 982 mutex_enter(&pp->pr_lock); 983 984 KASSERT(pp->pr_cache == NULL); 985 KASSERTMSG((pp->pr_nout == 0), 986 "%s: [%s] pool busy: still out: %u", __func__, pp->pr_wchan, 987 pp->pr_nout); 988 KASSERT(LIST_EMPTY(&pp->pr_fullpages)); 989 KASSERT(LIST_EMPTY(&pp->pr_partpages)); 990 991 /* Remove all pages */ 992 LIST_INIT(&pq); 993 while ((ph = LIST_FIRST(&pp->pr_emptypages)) != NULL) 994 pr_rmpage(pp, ph, &pq); 995 996 mutex_exit(&pp->pr_lock); 997 998 pr_pagelist_free(pp, &pq); 999 cv_destroy(&pp->pr_cv); 1000 mutex_destroy(&pp->pr_lock); 1001 } 1002 1003 void 1004 pool_set_drain_hook(struct pool *pp, void (*fn)(void *, int), void *arg) 1005 { 1006 1007 /* XXX no locking -- must be used just after pool_init() */ 1008 KASSERTMSG((pp->pr_drain_hook == NULL), 1009 "%s: [%s] already set", __func__, pp->pr_wchan); 1010 pp->pr_drain_hook = fn; 1011 pp->pr_drain_hook_arg = arg; 1012 } 1013 1014 static struct pool_item_header * 1015 pool_alloc_item_header(struct pool *pp, void *storage, int flags) 1016 { 1017 struct pool_item_header *ph; 1018 1019 if ((pp->pr_roflags & PR_PHINPAGE) != 0) 1020 ph = storage; 1021 else 1022 ph = pool_get(pp->pr_phpool, flags); 1023 1024 return ph; 1025 } 1026 1027 /* 1028 * Grab an item from the pool. 1029 */ 1030 void * 1031 pool_get(struct pool *pp, int flags) 1032 { 1033 struct pool_item_header *ph; 1034 void *v; 1035 1036 KASSERT(!(flags & PR_NOWAIT) != !(flags & PR_WAITOK)); 1037 KASSERTMSG((pp->pr_itemsperpage != 0), 1038 "%s: [%s] pr_itemsperpage is zero, " 1039 "pool not initialized?", __func__, pp->pr_wchan); 1040 KASSERTMSG((!(cpu_intr_p() || cpu_softintr_p()) 1041 || pp->pr_ipl != IPL_NONE || cold || panicstr != NULL), 1042 "%s: [%s] is IPL_NONE, but called from interrupt context", 1043 __func__, pp->pr_wchan); 1044 if (flags & PR_WAITOK) { 1045 ASSERT_SLEEPABLE(); 1046 } 1047 1048 mutex_enter(&pp->pr_lock); 1049 startover: 1050 /* 1051 * Check to see if we've reached the hard limit. If we have, 1052 * and we can wait, then wait until an item has been returned to 1053 * the pool. 1054 */ 1055 KASSERTMSG((pp->pr_nout <= pp->pr_hardlimit), 1056 "%s: %s: crossed hard limit", __func__, pp->pr_wchan); 1057 if (__predict_false(pp->pr_nout == pp->pr_hardlimit)) { 1058 if (pp->pr_drain_hook != NULL) { 1059 /* 1060 * Since the drain hook is going to free things 1061 * back to the pool, unlock, call the hook, re-lock, 1062 * and check the hardlimit condition again. 1063 */ 1064 mutex_exit(&pp->pr_lock); 1065 (*pp->pr_drain_hook)(pp->pr_drain_hook_arg, flags); 1066 mutex_enter(&pp->pr_lock); 1067 if (pp->pr_nout < pp->pr_hardlimit) 1068 goto startover; 1069 } 1070 1071 if ((flags & PR_WAITOK) && !(flags & PR_LIMITFAIL)) { 1072 /* 1073 * XXX: A warning isn't logged in this case. Should 1074 * it be? 1075 */ 1076 pp->pr_flags |= PR_WANTED; 1077 do { 1078 cv_wait(&pp->pr_cv, &pp->pr_lock); 1079 } while (pp->pr_flags & PR_WANTED); 1080 goto startover; 1081 } 1082 1083 /* 1084 * Log a message that the hard limit has been hit. 1085 */ 1086 if (pp->pr_hardlimit_warning != NULL && 1087 ratecheck(&pp->pr_hardlimit_warning_last, 1088 &pp->pr_hardlimit_ratecap)) 1089 log(LOG_ERR, "%s\n", pp->pr_hardlimit_warning); 1090 1091 pp->pr_nfail++; 1092 1093 mutex_exit(&pp->pr_lock); 1094 KASSERT((flags & (PR_NOWAIT|PR_LIMITFAIL)) != 0); 1095 return NULL; 1096 } 1097 1098 /* 1099 * The convention we use is that if `curpage' is not NULL, then 1100 * it points at a non-empty bucket. In particular, `curpage' 1101 * never points at a page header which has PR_PHINPAGE set and 1102 * has no items in its bucket. 1103 */ 1104 if ((ph = pp->pr_curpage) == NULL) { 1105 int error; 1106 1107 KASSERTMSG((pp->pr_nitems == 0), 1108 "%s: [%s] curpage NULL, inconsistent nitems %u", 1109 __func__, pp->pr_wchan, pp->pr_nitems); 1110 1111 /* 1112 * Call the back-end page allocator for more memory. 1113 * Release the pool lock, as the back-end page allocator 1114 * may block. 1115 */ 1116 error = pool_grow(pp, flags); 1117 if (error != 0) { 1118 /* 1119 * pool_grow aborts when another thread 1120 * is allocating a new page. Retry if it 1121 * waited for it. 1122 */ 1123 if (error == ERESTART) 1124 goto startover; 1125 1126 /* 1127 * We were unable to allocate a page or item 1128 * header, but we released the lock during 1129 * allocation, so perhaps items were freed 1130 * back to the pool. Check for this case. 1131 */ 1132 if (pp->pr_curpage != NULL) 1133 goto startover; 1134 1135 pp->pr_nfail++; 1136 mutex_exit(&pp->pr_lock); 1137 KASSERT((flags & (PR_WAITOK|PR_NOWAIT)) == PR_NOWAIT); 1138 return NULL; 1139 } 1140 1141 /* Start the allocation process over. */ 1142 goto startover; 1143 } 1144 if (pp->pr_roflags & PR_USEBMAP) { 1145 KASSERTMSG((ph->ph_nmissing < pp->pr_itemsperpage), 1146 "%s: [%s] pool page empty", __func__, pp->pr_wchan); 1147 v = pr_item_bitmap_get(pp, ph); 1148 } else { 1149 v = pr_item_linkedlist_get(pp, ph); 1150 } 1151 pp->pr_nitems--; 1152 pp->pr_nout++; 1153 if (ph->ph_nmissing == 0) { 1154 KASSERT(pp->pr_nidle > 0); 1155 pp->pr_nidle--; 1156 1157 /* 1158 * This page was previously empty. Move it to the list of 1159 * partially-full pages. This page is already curpage. 1160 */ 1161 LIST_REMOVE(ph, ph_pagelist); 1162 LIST_INSERT_HEAD(&pp->pr_partpages, ph, ph_pagelist); 1163 } 1164 ph->ph_nmissing++; 1165 if (ph->ph_nmissing == pp->pr_itemsperpage) { 1166 KASSERTMSG(((pp->pr_roflags & PR_USEBMAP) || 1167 LIST_EMPTY(&ph->ph_itemlist)), 1168 "%s: [%s] nmissing (%u) inconsistent", __func__, 1169 pp->pr_wchan, ph->ph_nmissing); 1170 /* 1171 * This page is now full. Move it to the full list 1172 * and select a new current page. 1173 */ 1174 LIST_REMOVE(ph, ph_pagelist); 1175 LIST_INSERT_HEAD(&pp->pr_fullpages, ph, ph_pagelist); 1176 pool_update_curpage(pp); 1177 } 1178 1179 pp->pr_nget++; 1180 1181 /* 1182 * If we have a low water mark and we are now below that low 1183 * water mark, add more items to the pool. 1184 */ 1185 if (POOL_NEEDS_CATCHUP(pp) && pool_catchup(pp) != 0) { 1186 /* 1187 * XXX: Should we log a warning? Should we set up a timeout 1188 * to try again in a second or so? The latter could break 1189 * a caller's assumptions about interrupt protection, etc. 1190 */ 1191 } 1192 1193 mutex_exit(&pp->pr_lock); 1194 KASSERT((((vaddr_t)v) & (pp->pr_align - 1)) == 0); 1195 FREECHECK_OUT(&pp->pr_freecheck, v); 1196 pool_redzone_fill(pp, v); 1197 if (flags & PR_ZERO) 1198 memset(v, 0, pp->pr_reqsize); 1199 else 1200 pool_kleak_fill(pp, v); 1201 return v; 1202 } 1203 1204 /* 1205 * Internal version of pool_put(). Pool is already locked/entered. 1206 */ 1207 static void 1208 pool_do_put(struct pool *pp, void *v, struct pool_pagelist *pq) 1209 { 1210 struct pool_item_header *ph; 1211 1212 KASSERT(mutex_owned(&pp->pr_lock)); 1213 pool_redzone_check(pp, v); 1214 FREECHECK_IN(&pp->pr_freecheck, v); 1215 LOCKDEBUG_MEM_CHECK(v, pp->pr_size); 1216 1217 KASSERTMSG((pp->pr_nout > 0), 1218 "%s: [%s] putting with none out", __func__, pp->pr_wchan); 1219 1220 if (__predict_false((ph = pr_find_pagehead(pp, v)) == NULL)) { 1221 panic("%s: [%s] page header missing", __func__, pp->pr_wchan); 1222 } 1223 1224 /* 1225 * Return to item list. 1226 */ 1227 if (pp->pr_roflags & PR_USEBMAP) { 1228 pr_item_bitmap_put(pp, ph, v); 1229 } else { 1230 pr_item_linkedlist_put(pp, ph, v); 1231 } 1232 KDASSERT(ph->ph_nmissing != 0); 1233 ph->ph_nmissing--; 1234 pp->pr_nput++; 1235 pp->pr_nitems++; 1236 pp->pr_nout--; 1237 1238 /* Cancel "pool empty" condition if it exists */ 1239 if (pp->pr_curpage == NULL) 1240 pp->pr_curpage = ph; 1241 1242 if (pp->pr_flags & PR_WANTED) { 1243 pp->pr_flags &= ~PR_WANTED; 1244 cv_broadcast(&pp->pr_cv); 1245 } 1246 1247 /* 1248 * If this page is now empty, do one of two things: 1249 * 1250 * (1) If we have more pages than the page high water mark, 1251 * free the page back to the system. ONLY CONSIDER 1252 * FREEING BACK A PAGE IF WE HAVE MORE THAN OUR MINIMUM PAGE 1253 * CLAIM. 1254 * 1255 * (2) Otherwise, move the page to the empty page list. 1256 * 1257 * Either way, select a new current page (so we use a partially-full 1258 * page if one is available). 1259 */ 1260 if (ph->ph_nmissing == 0) { 1261 pp->pr_nidle++; 1262 if (pp->pr_npages > pp->pr_minpages && 1263 pp->pr_npages > pp->pr_maxpages) { 1264 pr_rmpage(pp, ph, pq); 1265 } else { 1266 LIST_REMOVE(ph, ph_pagelist); 1267 LIST_INSERT_HEAD(&pp->pr_emptypages, ph, ph_pagelist); 1268 1269 /* 1270 * Update the timestamp on the page. A page must 1271 * be idle for some period of time before it can 1272 * be reclaimed by the pagedaemon. This minimizes 1273 * ping-pong'ing for memory. 1274 * 1275 * note for 64-bit time_t: truncating to 32-bit is not 1276 * a problem for our usage. 1277 */ 1278 ph->ph_time = time_uptime; 1279 } 1280 pool_update_curpage(pp); 1281 } 1282 1283 /* 1284 * If the page was previously completely full, move it to the 1285 * partially-full list and make it the current page. The next 1286 * allocation will get the item from this page, instead of 1287 * further fragmenting the pool. 1288 */ 1289 else if (ph->ph_nmissing == (pp->pr_itemsperpage - 1)) { 1290 LIST_REMOVE(ph, ph_pagelist); 1291 LIST_INSERT_HEAD(&pp->pr_partpages, ph, ph_pagelist); 1292 pp->pr_curpage = ph; 1293 } 1294 } 1295 1296 void 1297 pool_put(struct pool *pp, void *v) 1298 { 1299 struct pool_pagelist pq; 1300 1301 LIST_INIT(&pq); 1302 1303 mutex_enter(&pp->pr_lock); 1304 if (!pool_put_quarantine(pp, v, &pq)) { 1305 pool_do_put(pp, v, &pq); 1306 } 1307 mutex_exit(&pp->pr_lock); 1308 1309 pr_pagelist_free(pp, &pq); 1310 } 1311 1312 /* 1313 * pool_grow: grow a pool by a page. 1314 * 1315 * => called with pool locked. 1316 * => unlock and relock the pool. 1317 * => return with pool locked. 1318 */ 1319 1320 static int 1321 pool_grow(struct pool *pp, int flags) 1322 { 1323 struct pool_item_header *ph; 1324 char *storage; 1325 1326 /* 1327 * If there's a pool_grow in progress, wait for it to complete 1328 * and try again from the top. 1329 */ 1330 if (pp->pr_flags & PR_GROWING) { 1331 if (flags & PR_WAITOK) { 1332 do { 1333 cv_wait(&pp->pr_cv, &pp->pr_lock); 1334 } while (pp->pr_flags & PR_GROWING); 1335 return ERESTART; 1336 } else { 1337 if (pp->pr_flags & PR_GROWINGNOWAIT) { 1338 /* 1339 * This needs an unlock/relock dance so 1340 * that the other caller has a chance to 1341 * run and actually do the thing. Note 1342 * that this is effectively a busy-wait. 1343 */ 1344 mutex_exit(&pp->pr_lock); 1345 mutex_enter(&pp->pr_lock); 1346 return ERESTART; 1347 } 1348 return EWOULDBLOCK; 1349 } 1350 } 1351 pp->pr_flags |= PR_GROWING; 1352 if (flags & PR_WAITOK) 1353 mutex_exit(&pp->pr_lock); 1354 else 1355 pp->pr_flags |= PR_GROWINGNOWAIT; 1356 1357 storage = pool_allocator_alloc(pp, flags); 1358 if (__predict_false(storage == NULL)) 1359 goto out; 1360 1361 ph = pool_alloc_item_header(pp, storage, flags); 1362 if (__predict_false(ph == NULL)) { 1363 pool_allocator_free(pp, storage); 1364 goto out; 1365 } 1366 1367 if (flags & PR_WAITOK) 1368 mutex_enter(&pp->pr_lock); 1369 pool_prime_page(pp, storage, ph); 1370 pp->pr_npagealloc++; 1371 KASSERT(pp->pr_flags & PR_GROWING); 1372 pp->pr_flags &= ~(PR_GROWING|PR_GROWINGNOWAIT); 1373 /* 1374 * If anyone was waiting for pool_grow, notify them that we 1375 * may have just done it. 1376 */ 1377 cv_broadcast(&pp->pr_cv); 1378 return 0; 1379 out: 1380 if (flags & PR_WAITOK) 1381 mutex_enter(&pp->pr_lock); 1382 KASSERT(pp->pr_flags & PR_GROWING); 1383 pp->pr_flags &= ~(PR_GROWING|PR_GROWINGNOWAIT); 1384 return ENOMEM; 1385 } 1386 1387 /* 1388 * Add N items to the pool. 1389 */ 1390 int 1391 pool_prime(struct pool *pp, int n) 1392 { 1393 int newpages; 1394 int error = 0; 1395 1396 mutex_enter(&pp->pr_lock); 1397 1398 newpages = roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 1399 1400 while (newpages > 0) { 1401 error = pool_grow(pp, PR_NOWAIT); 1402 if (error) { 1403 if (error == ERESTART) 1404 continue; 1405 break; 1406 } 1407 pp->pr_minpages++; 1408 newpages--; 1409 } 1410 1411 if (pp->pr_minpages >= pp->pr_maxpages) 1412 pp->pr_maxpages = pp->pr_minpages + 1; /* XXX */ 1413 1414 mutex_exit(&pp->pr_lock); 1415 return error; 1416 } 1417 1418 /* 1419 * Add a page worth of items to the pool. 1420 * 1421 * Note, we must be called with the pool descriptor LOCKED. 1422 */ 1423 static void 1424 pool_prime_page(struct pool *pp, void *storage, struct pool_item_header *ph) 1425 { 1426 const unsigned int align = pp->pr_align; 1427 struct pool_item *pi; 1428 void *cp = storage; 1429 int n; 1430 1431 KASSERT(mutex_owned(&pp->pr_lock)); 1432 KASSERTMSG(((pp->pr_roflags & PR_NOALIGN) || 1433 (((uintptr_t)cp & (pp->pr_alloc->pa_pagesz - 1)) == 0)), 1434 "%s: [%s] unaligned page: %p", __func__, pp->pr_wchan, cp); 1435 1436 /* 1437 * Insert page header. 1438 */ 1439 LIST_INSERT_HEAD(&pp->pr_emptypages, ph, ph_pagelist); 1440 LIST_INIT(&ph->ph_itemlist); 1441 ph->ph_page = storage; 1442 ph->ph_nmissing = 0; 1443 ph->ph_time = time_uptime; 1444 if (pp->pr_roflags & PR_PHINPAGE) 1445 ph->ph_poolid = pp->pr_poolid; 1446 else 1447 SPLAY_INSERT(phtree, &pp->pr_phtree, ph); 1448 1449 pp->pr_nidle++; 1450 1451 /* 1452 * The item space starts after the on-page header, if any. 1453 */ 1454 ph->ph_off = pp->pr_itemoffset; 1455 1456 /* 1457 * Color this page. 1458 */ 1459 ph->ph_off += pp->pr_curcolor; 1460 cp = (char *)cp + ph->ph_off; 1461 if ((pp->pr_curcolor += align) > pp->pr_maxcolor) 1462 pp->pr_curcolor = 0; 1463 1464 KASSERT((((vaddr_t)cp) & (align - 1)) == 0); 1465 1466 /* 1467 * Insert remaining chunks on the bucket list. 1468 */ 1469 n = pp->pr_itemsperpage; 1470 pp->pr_nitems += n; 1471 1472 if (pp->pr_roflags & PR_USEBMAP) { 1473 pr_item_bitmap_init(pp, ph); 1474 } else { 1475 while (n--) { 1476 pi = (struct pool_item *)cp; 1477 1478 KASSERT((((vaddr_t)pi) & (align - 1)) == 0); 1479 1480 /* Insert on page list */ 1481 LIST_INSERT_HEAD(&ph->ph_itemlist, pi, pi_list); 1482 #ifdef POOL_CHECK_MAGIC 1483 pi->pi_magic = PI_MAGIC; 1484 #endif 1485 cp = (char *)cp + pp->pr_size; 1486 1487 KASSERT((((vaddr_t)cp) & (align - 1)) == 0); 1488 } 1489 } 1490 1491 /* 1492 * If the pool was depleted, point at the new page. 1493 */ 1494 if (pp->pr_curpage == NULL) 1495 pp->pr_curpage = ph; 1496 1497 if (++pp->pr_npages > pp->pr_hiwat) 1498 pp->pr_hiwat = pp->pr_npages; 1499 } 1500 1501 /* 1502 * Used by pool_get() when nitems drops below the low water mark. This 1503 * is used to catch up pr_nitems with the low water mark. 1504 * 1505 * Note 1, we never wait for memory here, we let the caller decide what to do. 1506 * 1507 * Note 2, we must be called with the pool already locked, and we return 1508 * with it locked. 1509 */ 1510 static int 1511 pool_catchup(struct pool *pp) 1512 { 1513 int error = 0; 1514 1515 while (POOL_NEEDS_CATCHUP(pp)) { 1516 error = pool_grow(pp, PR_NOWAIT); 1517 if (error) { 1518 if (error == ERESTART) 1519 continue; 1520 break; 1521 } 1522 } 1523 return error; 1524 } 1525 1526 static void 1527 pool_update_curpage(struct pool *pp) 1528 { 1529 1530 pp->pr_curpage = LIST_FIRST(&pp->pr_partpages); 1531 if (pp->pr_curpage == NULL) { 1532 pp->pr_curpage = LIST_FIRST(&pp->pr_emptypages); 1533 } 1534 KASSERT((pp->pr_curpage == NULL && pp->pr_nitems == 0) || 1535 (pp->pr_curpage != NULL && pp->pr_nitems > 0)); 1536 } 1537 1538 void 1539 pool_setlowat(struct pool *pp, int n) 1540 { 1541 1542 mutex_enter(&pp->pr_lock); 1543 1544 pp->pr_minitems = n; 1545 pp->pr_minpages = (n == 0) 1546 ? 0 1547 : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 1548 1549 /* Make sure we're caught up with the newly-set low water mark. */ 1550 if (POOL_NEEDS_CATCHUP(pp) && pool_catchup(pp) != 0) { 1551 /* 1552 * XXX: Should we log a warning? Should we set up a timeout 1553 * to try again in a second or so? The latter could break 1554 * a caller's assumptions about interrupt protection, etc. 1555 */ 1556 } 1557 1558 mutex_exit(&pp->pr_lock); 1559 } 1560 1561 void 1562 pool_sethiwat(struct pool *pp, int n) 1563 { 1564 1565 mutex_enter(&pp->pr_lock); 1566 1567 pp->pr_maxpages = (n == 0) 1568 ? 0 1569 : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 1570 1571 mutex_exit(&pp->pr_lock); 1572 } 1573 1574 void 1575 pool_sethardlimit(struct pool *pp, int n, const char *warnmess, int ratecap) 1576 { 1577 1578 mutex_enter(&pp->pr_lock); 1579 1580 pp->pr_hardlimit = n; 1581 pp->pr_hardlimit_warning = warnmess; 1582 pp->pr_hardlimit_ratecap.tv_sec = ratecap; 1583 pp->pr_hardlimit_warning_last.tv_sec = 0; 1584 pp->pr_hardlimit_warning_last.tv_usec = 0; 1585 1586 /* 1587 * In-line version of pool_sethiwat(), because we don't want to 1588 * release the lock. 1589 */ 1590 pp->pr_maxpages = (n == 0) 1591 ? 0 1592 : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 1593 1594 mutex_exit(&pp->pr_lock); 1595 } 1596 1597 /* 1598 * Release all complete pages that have not been used recently. 1599 * 1600 * Must not be called from interrupt context. 1601 */ 1602 int 1603 pool_reclaim(struct pool *pp) 1604 { 1605 struct pool_item_header *ph, *phnext; 1606 struct pool_pagelist pq; 1607 uint32_t curtime; 1608 bool klock; 1609 int rv; 1610 1611 KASSERT(!cpu_intr_p() && !cpu_softintr_p()); 1612 1613 if (pp->pr_drain_hook != NULL) { 1614 /* 1615 * The drain hook must be called with the pool unlocked. 1616 */ 1617 (*pp->pr_drain_hook)(pp->pr_drain_hook_arg, PR_NOWAIT); 1618 } 1619 1620 /* 1621 * XXXSMP Because we do not want to cause non-MPSAFE code 1622 * to block. 1623 */ 1624 if (pp->pr_ipl == IPL_SOFTNET || pp->pr_ipl == IPL_SOFTCLOCK || 1625 pp->pr_ipl == IPL_SOFTSERIAL) { 1626 KERNEL_LOCK(1, NULL); 1627 klock = true; 1628 } else 1629 klock = false; 1630 1631 /* Reclaim items from the pool's cache (if any). */ 1632 if (pp->pr_cache != NULL) 1633 pool_cache_invalidate(pp->pr_cache); 1634 1635 if (mutex_tryenter(&pp->pr_lock) == 0) { 1636 if (klock) { 1637 KERNEL_UNLOCK_ONE(NULL); 1638 } 1639 return 0; 1640 } 1641 1642 LIST_INIT(&pq); 1643 1644 curtime = time_uptime; 1645 1646 for (ph = LIST_FIRST(&pp->pr_emptypages); ph != NULL; ph = phnext) { 1647 phnext = LIST_NEXT(ph, ph_pagelist); 1648 1649 /* Check our minimum page claim */ 1650 if (pp->pr_npages <= pp->pr_minpages) 1651 break; 1652 1653 KASSERT(ph->ph_nmissing == 0); 1654 if (curtime - ph->ph_time < pool_inactive_time) 1655 continue; 1656 1657 /* 1658 * If freeing this page would put us below 1659 * the low water mark, stop now. 1660 */ 1661 if ((pp->pr_nitems - pp->pr_itemsperpage) < 1662 pp->pr_minitems) 1663 break; 1664 1665 pr_rmpage(pp, ph, &pq); 1666 } 1667 1668 mutex_exit(&pp->pr_lock); 1669 1670 if (LIST_EMPTY(&pq)) 1671 rv = 0; 1672 else { 1673 pr_pagelist_free(pp, &pq); 1674 rv = 1; 1675 } 1676 1677 if (klock) { 1678 KERNEL_UNLOCK_ONE(NULL); 1679 } 1680 1681 return rv; 1682 } 1683 1684 /* 1685 * Drain pools, one at a time. The drained pool is returned within ppp. 1686 * 1687 * Note, must never be called from interrupt context. 1688 */ 1689 bool 1690 pool_drain(struct pool **ppp) 1691 { 1692 bool reclaimed; 1693 struct pool *pp; 1694 1695 KASSERT(!TAILQ_EMPTY(&pool_head)); 1696 1697 pp = NULL; 1698 1699 /* Find next pool to drain, and add a reference. */ 1700 mutex_enter(&pool_head_lock); 1701 do { 1702 if (drainpp == NULL) { 1703 drainpp = TAILQ_FIRST(&pool_head); 1704 } 1705 if (drainpp != NULL) { 1706 pp = drainpp; 1707 drainpp = TAILQ_NEXT(pp, pr_poollist); 1708 } 1709 /* 1710 * Skip completely idle pools. We depend on at least 1711 * one pool in the system being active. 1712 */ 1713 } while (pp == NULL || pp->pr_npages == 0); 1714 pp->pr_refcnt++; 1715 mutex_exit(&pool_head_lock); 1716 1717 /* Drain the cache (if any) and pool.. */ 1718 reclaimed = pool_reclaim(pp); 1719 1720 /* Finally, unlock the pool. */ 1721 mutex_enter(&pool_head_lock); 1722 pp->pr_refcnt--; 1723 cv_broadcast(&pool_busy); 1724 mutex_exit(&pool_head_lock); 1725 1726 if (ppp != NULL) 1727 *ppp = pp; 1728 1729 return reclaimed; 1730 } 1731 1732 /* 1733 * Calculate the total number of pages consumed by pools. 1734 */ 1735 int 1736 pool_totalpages(void) 1737 { 1738 1739 mutex_enter(&pool_head_lock); 1740 int pages = pool_totalpages_locked(); 1741 mutex_exit(&pool_head_lock); 1742 1743 return pages; 1744 } 1745 1746 int 1747 pool_totalpages_locked(void) 1748 { 1749 struct pool *pp; 1750 uint64_t total = 0; 1751 1752 TAILQ_FOREACH(pp, &pool_head, pr_poollist) { 1753 uint64_t bytes = pp->pr_npages * pp->pr_alloc->pa_pagesz; 1754 1755 if ((pp->pr_roflags & PR_RECURSIVE) != 0) 1756 bytes -= (pp->pr_nout * pp->pr_size); 1757 total += bytes; 1758 } 1759 1760 return atop(total); 1761 } 1762 1763 /* 1764 * Diagnostic helpers. 1765 */ 1766 1767 void 1768 pool_printall(const char *modif, void (*pr)(const char *, ...)) 1769 { 1770 struct pool *pp; 1771 1772 TAILQ_FOREACH(pp, &pool_head, pr_poollist) { 1773 pool_printit(pp, modif, pr); 1774 } 1775 } 1776 1777 void 1778 pool_printit(struct pool *pp, const char *modif, void (*pr)(const char *, ...)) 1779 { 1780 1781 if (pp == NULL) { 1782 (*pr)("Must specify a pool to print.\n"); 1783 return; 1784 } 1785 1786 pool_print1(pp, modif, pr); 1787 } 1788 1789 static void 1790 pool_print_pagelist(struct pool *pp, struct pool_pagelist *pl, 1791 void (*pr)(const char *, ...)) 1792 { 1793 struct pool_item_header *ph; 1794 1795 LIST_FOREACH(ph, pl, ph_pagelist) { 1796 (*pr)("\t\tpage %p, nmissing %d, time %" PRIu32 "\n", 1797 ph->ph_page, ph->ph_nmissing, ph->ph_time); 1798 #ifdef POOL_CHECK_MAGIC 1799 struct pool_item *pi; 1800 if (!(pp->pr_roflags & PR_USEBMAP)) { 1801 LIST_FOREACH(pi, &ph->ph_itemlist, pi_list) { 1802 if (pi->pi_magic != PI_MAGIC) { 1803 (*pr)("\t\t\titem %p, magic 0x%x\n", 1804 pi, pi->pi_magic); 1805 } 1806 } 1807 } 1808 #endif 1809 } 1810 } 1811 1812 static void 1813 pool_print1(struct pool *pp, const char *modif, void (*pr)(const char *, ...)) 1814 { 1815 struct pool_item_header *ph; 1816 pool_cache_t pc; 1817 pcg_t *pcg; 1818 pool_cache_cpu_t *cc; 1819 uint64_t cpuhit, cpumiss; 1820 int i, print_log = 0, print_pagelist = 0, print_cache = 0; 1821 char c; 1822 1823 while ((c = *modif++) != '\0') { 1824 if (c == 'l') 1825 print_log = 1; 1826 if (c == 'p') 1827 print_pagelist = 1; 1828 if (c == 'c') 1829 print_cache = 1; 1830 } 1831 1832 if ((pc = pp->pr_cache) != NULL) { 1833 (*pr)("POOL CACHE"); 1834 } else { 1835 (*pr)("POOL"); 1836 } 1837 1838 (*pr)(" %s: size %u, align %u, ioff %u, roflags 0x%08x\n", 1839 pp->pr_wchan, pp->pr_size, pp->pr_align, pp->pr_itemoffset, 1840 pp->pr_roflags); 1841 (*pr)("\talloc %p\n", pp->pr_alloc); 1842 (*pr)("\tminitems %u, minpages %u, maxpages %u, npages %u\n", 1843 pp->pr_minitems, pp->pr_minpages, pp->pr_maxpages, pp->pr_npages); 1844 (*pr)("\titemsperpage %u, nitems %u, nout %u, hardlimit %u\n", 1845 pp->pr_itemsperpage, pp->pr_nitems, pp->pr_nout, pp->pr_hardlimit); 1846 1847 (*pr)("\tnget %lu, nfail %lu, nput %lu\n", 1848 pp->pr_nget, pp->pr_nfail, pp->pr_nput); 1849 (*pr)("\tnpagealloc %lu, npagefree %lu, hiwat %u, nidle %lu\n", 1850 pp->pr_npagealloc, pp->pr_npagefree, pp->pr_hiwat, pp->pr_nidle); 1851 1852 if (print_pagelist == 0) 1853 goto skip_pagelist; 1854 1855 if ((ph = LIST_FIRST(&pp->pr_emptypages)) != NULL) 1856 (*pr)("\n\tempty page list:\n"); 1857 pool_print_pagelist(pp, &pp->pr_emptypages, pr); 1858 if ((ph = LIST_FIRST(&pp->pr_fullpages)) != NULL) 1859 (*pr)("\n\tfull page list:\n"); 1860 pool_print_pagelist(pp, &pp->pr_fullpages, pr); 1861 if ((ph = LIST_FIRST(&pp->pr_partpages)) != NULL) 1862 (*pr)("\n\tpartial-page list:\n"); 1863 pool_print_pagelist(pp, &pp->pr_partpages, pr); 1864 1865 if (pp->pr_curpage == NULL) 1866 (*pr)("\tno current page\n"); 1867 else 1868 (*pr)("\tcurpage %p\n", pp->pr_curpage->ph_page); 1869 1870 skip_pagelist: 1871 if (print_log == 0) 1872 goto skip_log; 1873 1874 (*pr)("\n"); 1875 1876 skip_log: 1877 1878 #define PR_GROUPLIST(pcg) \ 1879 (*pr)("\t\tgroup %p: avail %d\n", pcg, pcg->pcg_avail); \ 1880 for (i = 0; i < pcg->pcg_size; i++) { \ 1881 if (pcg->pcg_objects[i].pcgo_pa != \ 1882 POOL_PADDR_INVALID) { \ 1883 (*pr)("\t\t\t%p, 0x%llx\n", \ 1884 pcg->pcg_objects[i].pcgo_va, \ 1885 (unsigned long long) \ 1886 pcg->pcg_objects[i].pcgo_pa); \ 1887 } else { \ 1888 (*pr)("\t\t\t%p\n", \ 1889 pcg->pcg_objects[i].pcgo_va); \ 1890 } \ 1891 } 1892 1893 if (pc != NULL) { 1894 cpuhit = 0; 1895 cpumiss = 0; 1896 for (i = 0; i < __arraycount(pc->pc_cpus); i++) { 1897 if ((cc = pc->pc_cpus[i]) == NULL) 1898 continue; 1899 cpuhit += cc->cc_hits; 1900 cpumiss += cc->cc_misses; 1901 } 1902 (*pr)("\tcpu layer hits %llu misses %llu\n", cpuhit, cpumiss); 1903 (*pr)("\tcache layer hits %llu misses %llu\n", 1904 pc->pc_hits, pc->pc_misses); 1905 (*pr)("\tcache layer entry uncontended %llu contended %llu\n", 1906 pc->pc_hits + pc->pc_misses - pc->pc_contended, 1907 pc->pc_contended); 1908 (*pr)("\tcache layer empty groups %u full groups %u\n", 1909 pc->pc_nempty, pc->pc_nfull); 1910 if (print_cache) { 1911 (*pr)("\tfull cache groups:\n"); 1912 for (pcg = pc->pc_fullgroups; pcg != NULL; 1913 pcg = pcg->pcg_next) { 1914 PR_GROUPLIST(pcg); 1915 } 1916 (*pr)("\tempty cache groups:\n"); 1917 for (pcg = pc->pc_emptygroups; pcg != NULL; 1918 pcg = pcg->pcg_next) { 1919 PR_GROUPLIST(pcg); 1920 } 1921 } 1922 } 1923 #undef PR_GROUPLIST 1924 } 1925 1926 static int 1927 pool_chk_page(struct pool *pp, const char *label, struct pool_item_header *ph) 1928 { 1929 struct pool_item *pi; 1930 void *page; 1931 int n; 1932 1933 if ((pp->pr_roflags & PR_NOALIGN) == 0) { 1934 page = POOL_OBJ_TO_PAGE(pp, ph); 1935 if (page != ph->ph_page && 1936 (pp->pr_roflags & PR_PHINPAGE) != 0) { 1937 if (label != NULL) 1938 printf("%s: ", label); 1939 printf("pool(%p:%s): page inconsistency: page %p;" 1940 " at page head addr %p (p %p)\n", pp, 1941 pp->pr_wchan, ph->ph_page, 1942 ph, page); 1943 return 1; 1944 } 1945 } 1946 1947 if ((pp->pr_roflags & PR_USEBMAP) != 0) 1948 return 0; 1949 1950 for (pi = LIST_FIRST(&ph->ph_itemlist), n = 0; 1951 pi != NULL; 1952 pi = LIST_NEXT(pi,pi_list), n++) { 1953 1954 #ifdef POOL_CHECK_MAGIC 1955 if (pi->pi_magic != PI_MAGIC) { 1956 if (label != NULL) 1957 printf("%s: ", label); 1958 printf("pool(%s): free list modified: magic=%x;" 1959 " page %p; item ordinal %d; addr %p\n", 1960 pp->pr_wchan, pi->pi_magic, ph->ph_page, 1961 n, pi); 1962 panic("pool"); 1963 } 1964 #endif 1965 if ((pp->pr_roflags & PR_NOALIGN) != 0) { 1966 continue; 1967 } 1968 page = POOL_OBJ_TO_PAGE(pp, pi); 1969 if (page == ph->ph_page) 1970 continue; 1971 1972 if (label != NULL) 1973 printf("%s: ", label); 1974 printf("pool(%p:%s): page inconsistency: page %p;" 1975 " item ordinal %d; addr %p (p %p)\n", pp, 1976 pp->pr_wchan, ph->ph_page, 1977 n, pi, page); 1978 return 1; 1979 } 1980 return 0; 1981 } 1982 1983 1984 int 1985 pool_chk(struct pool *pp, const char *label) 1986 { 1987 struct pool_item_header *ph; 1988 int r = 0; 1989 1990 mutex_enter(&pp->pr_lock); 1991 LIST_FOREACH(ph, &pp->pr_emptypages, ph_pagelist) { 1992 r = pool_chk_page(pp, label, ph); 1993 if (r) { 1994 goto out; 1995 } 1996 } 1997 LIST_FOREACH(ph, &pp->pr_fullpages, ph_pagelist) { 1998 r = pool_chk_page(pp, label, ph); 1999 if (r) { 2000 goto out; 2001 } 2002 } 2003 LIST_FOREACH(ph, &pp->pr_partpages, ph_pagelist) { 2004 r = pool_chk_page(pp, label, ph); 2005 if (r) { 2006 goto out; 2007 } 2008 } 2009 2010 out: 2011 mutex_exit(&pp->pr_lock); 2012 return r; 2013 } 2014 2015 /* 2016 * pool_cache_init: 2017 * 2018 * Initialize a pool cache. 2019 */ 2020 pool_cache_t 2021 pool_cache_init(size_t size, u_int align, u_int align_offset, u_int flags, 2022 const char *wchan, struct pool_allocator *palloc, int ipl, 2023 int (*ctor)(void *, void *, int), void (*dtor)(void *, void *), void *arg) 2024 { 2025 pool_cache_t pc; 2026 2027 pc = pool_get(&cache_pool, PR_WAITOK); 2028 if (pc == NULL) 2029 return NULL; 2030 2031 pool_cache_bootstrap(pc, size, align, align_offset, flags, wchan, 2032 palloc, ipl, ctor, dtor, arg); 2033 2034 return pc; 2035 } 2036 2037 /* 2038 * pool_cache_bootstrap: 2039 * 2040 * Kernel-private version of pool_cache_init(). The caller 2041 * provides initial storage. 2042 */ 2043 void 2044 pool_cache_bootstrap(pool_cache_t pc, size_t size, u_int align, 2045 u_int align_offset, u_int flags, const char *wchan, 2046 struct pool_allocator *palloc, int ipl, 2047 int (*ctor)(void *, void *, int), void (*dtor)(void *, void *), 2048 void *arg) 2049 { 2050 CPU_INFO_ITERATOR cii; 2051 pool_cache_t pc1; 2052 struct cpu_info *ci; 2053 struct pool *pp; 2054 2055 pp = &pc->pc_pool; 2056 if (palloc == NULL && ipl == IPL_NONE) { 2057 if (size > PAGE_SIZE) { 2058 int bigidx = pool_bigidx(size); 2059 2060 palloc = &pool_allocator_big[bigidx]; 2061 flags |= PR_NOALIGN; 2062 } else 2063 palloc = &pool_allocator_nointr; 2064 } 2065 pool_init(pp, size, align, align_offset, flags, wchan, palloc, ipl); 2066 mutex_init(&pc->pc_lock, MUTEX_DEFAULT, ipl); 2067 2068 if (ctor == NULL) { 2069 ctor = NO_CTOR; 2070 } 2071 if (dtor == NULL) { 2072 dtor = NO_DTOR; 2073 } 2074 2075 pc->pc_emptygroups = NULL; 2076 pc->pc_fullgroups = NULL; 2077 pc->pc_partgroups = NULL; 2078 pc->pc_ctor = ctor; 2079 pc->pc_dtor = dtor; 2080 pc->pc_arg = arg; 2081 pc->pc_hits = 0; 2082 pc->pc_misses = 0; 2083 pc->pc_nempty = 0; 2084 pc->pc_npart = 0; 2085 pc->pc_nfull = 0; 2086 pc->pc_contended = 0; 2087 pc->pc_refcnt = 0; 2088 pc->pc_freecheck = NULL; 2089 2090 if ((flags & PR_LARGECACHE) != 0) { 2091 pc->pc_pcgsize = PCG_NOBJECTS_LARGE; 2092 pc->pc_pcgpool = &pcg_large_pool; 2093 } else { 2094 pc->pc_pcgsize = PCG_NOBJECTS_NORMAL; 2095 pc->pc_pcgpool = &pcg_normal_pool; 2096 } 2097 2098 /* Allocate per-CPU caches. */ 2099 memset(pc->pc_cpus, 0, sizeof(pc->pc_cpus)); 2100 pc->pc_ncpu = 0; 2101 if (ncpu < 2) { 2102 /* XXX For sparc: boot CPU is not attached yet. */ 2103 pool_cache_cpu_init1(curcpu(), pc); 2104 } else { 2105 for (CPU_INFO_FOREACH(cii, ci)) { 2106 pool_cache_cpu_init1(ci, pc); 2107 } 2108 } 2109 2110 /* Add to list of all pools. */ 2111 if (__predict_true(!cold)) 2112 mutex_enter(&pool_head_lock); 2113 TAILQ_FOREACH(pc1, &pool_cache_head, pc_cachelist) { 2114 if (strcmp(pc1->pc_pool.pr_wchan, pc->pc_pool.pr_wchan) > 0) 2115 break; 2116 } 2117 if (pc1 == NULL) 2118 TAILQ_INSERT_TAIL(&pool_cache_head, pc, pc_cachelist); 2119 else 2120 TAILQ_INSERT_BEFORE(pc1, pc, pc_cachelist); 2121 if (__predict_true(!cold)) 2122 mutex_exit(&pool_head_lock); 2123 2124 membar_sync(); 2125 pp->pr_cache = pc; 2126 } 2127 2128 /* 2129 * pool_cache_destroy: 2130 * 2131 * Destroy a pool cache. 2132 */ 2133 void 2134 pool_cache_destroy(pool_cache_t pc) 2135 { 2136 2137 pool_cache_bootstrap_destroy(pc); 2138 pool_put(&cache_pool, pc); 2139 } 2140 2141 /* 2142 * pool_cache_bootstrap_destroy: 2143 * 2144 * Destroy a pool cache. 2145 */ 2146 void 2147 pool_cache_bootstrap_destroy(pool_cache_t pc) 2148 { 2149 struct pool *pp = &pc->pc_pool; 2150 u_int i; 2151 2152 /* Remove it from the global list. */ 2153 mutex_enter(&pool_head_lock); 2154 while (pc->pc_refcnt != 0) 2155 cv_wait(&pool_busy, &pool_head_lock); 2156 TAILQ_REMOVE(&pool_cache_head, pc, pc_cachelist); 2157 mutex_exit(&pool_head_lock); 2158 2159 /* First, invalidate the entire cache. */ 2160 pool_cache_invalidate(pc); 2161 2162 /* Disassociate it from the pool. */ 2163 mutex_enter(&pp->pr_lock); 2164 pp->pr_cache = NULL; 2165 mutex_exit(&pp->pr_lock); 2166 2167 /* Destroy per-CPU data */ 2168 for (i = 0; i < __arraycount(pc->pc_cpus); i++) 2169 pool_cache_invalidate_cpu(pc, i); 2170 2171 /* Finally, destroy it. */ 2172 mutex_destroy(&pc->pc_lock); 2173 pool_destroy(pp); 2174 } 2175 2176 /* 2177 * pool_cache_cpu_init1: 2178 * 2179 * Called for each pool_cache whenever a new CPU is attached. 2180 */ 2181 static void 2182 pool_cache_cpu_init1(struct cpu_info *ci, pool_cache_t pc) 2183 { 2184 pool_cache_cpu_t *cc; 2185 int index; 2186 2187 index = ci->ci_index; 2188 2189 KASSERT(index < __arraycount(pc->pc_cpus)); 2190 2191 if ((cc = pc->pc_cpus[index]) != NULL) { 2192 KASSERT(cc->cc_cpuindex == index); 2193 return; 2194 } 2195 2196 /* 2197 * The first CPU is 'free'. This needs to be the case for 2198 * bootstrap - we may not be able to allocate yet. 2199 */ 2200 if (pc->pc_ncpu == 0) { 2201 cc = &pc->pc_cpu0; 2202 pc->pc_ncpu = 1; 2203 } else { 2204 mutex_enter(&pc->pc_lock); 2205 pc->pc_ncpu++; 2206 mutex_exit(&pc->pc_lock); 2207 cc = pool_get(&cache_cpu_pool, PR_WAITOK); 2208 } 2209 2210 cc->cc_ipl = pc->pc_pool.pr_ipl; 2211 cc->cc_iplcookie = makeiplcookie(cc->cc_ipl); 2212 cc->cc_cache = pc; 2213 cc->cc_cpuindex = index; 2214 cc->cc_hits = 0; 2215 cc->cc_misses = 0; 2216 cc->cc_current = __UNCONST(&pcg_dummy); 2217 cc->cc_previous = __UNCONST(&pcg_dummy); 2218 2219 pc->pc_cpus[index] = cc; 2220 } 2221 2222 /* 2223 * pool_cache_cpu_init: 2224 * 2225 * Called whenever a new CPU is attached. 2226 */ 2227 void 2228 pool_cache_cpu_init(struct cpu_info *ci) 2229 { 2230 pool_cache_t pc; 2231 2232 mutex_enter(&pool_head_lock); 2233 TAILQ_FOREACH(pc, &pool_cache_head, pc_cachelist) { 2234 pc->pc_refcnt++; 2235 mutex_exit(&pool_head_lock); 2236 2237 pool_cache_cpu_init1(ci, pc); 2238 2239 mutex_enter(&pool_head_lock); 2240 pc->pc_refcnt--; 2241 cv_broadcast(&pool_busy); 2242 } 2243 mutex_exit(&pool_head_lock); 2244 } 2245 2246 /* 2247 * pool_cache_reclaim: 2248 * 2249 * Reclaim memory from a pool cache. 2250 */ 2251 bool 2252 pool_cache_reclaim(pool_cache_t pc) 2253 { 2254 2255 return pool_reclaim(&pc->pc_pool); 2256 } 2257 2258 static void 2259 pool_cache_destruct_object1(pool_cache_t pc, void *object) 2260 { 2261 (*pc->pc_dtor)(pc->pc_arg, object); 2262 pool_put(&pc->pc_pool, object); 2263 } 2264 2265 /* 2266 * pool_cache_destruct_object: 2267 * 2268 * Force destruction of an object and its release back into 2269 * the pool. 2270 */ 2271 void 2272 pool_cache_destruct_object(pool_cache_t pc, void *object) 2273 { 2274 2275 FREECHECK_IN(&pc->pc_freecheck, object); 2276 2277 pool_cache_destruct_object1(pc, object); 2278 } 2279 2280 /* 2281 * pool_cache_invalidate_groups: 2282 * 2283 * Invalidate a chain of groups and destruct all objects. 2284 */ 2285 static void 2286 pool_cache_invalidate_groups(pool_cache_t pc, pcg_t *pcg) 2287 { 2288 void *object; 2289 pcg_t *next; 2290 int i; 2291 2292 for (; pcg != NULL; pcg = next) { 2293 next = pcg->pcg_next; 2294 2295 for (i = 0; i < pcg->pcg_avail; i++) { 2296 object = pcg->pcg_objects[i].pcgo_va; 2297 pool_cache_destruct_object1(pc, object); 2298 } 2299 2300 if (pcg->pcg_size == PCG_NOBJECTS_LARGE) { 2301 pool_put(&pcg_large_pool, pcg); 2302 } else { 2303 KASSERT(pcg->pcg_size == PCG_NOBJECTS_NORMAL); 2304 pool_put(&pcg_normal_pool, pcg); 2305 } 2306 } 2307 } 2308 2309 /* 2310 * pool_cache_invalidate: 2311 * 2312 * Invalidate a pool cache (destruct and release all of the 2313 * cached objects). Does not reclaim objects from the pool. 2314 * 2315 * Note: For pool caches that provide constructed objects, there 2316 * is an assumption that another level of synchronization is occurring 2317 * between the input to the constructor and the cache invalidation. 2318 * 2319 * Invalidation is a costly process and should not be called from 2320 * interrupt context. 2321 */ 2322 void 2323 pool_cache_invalidate(pool_cache_t pc) 2324 { 2325 uint64_t where; 2326 pcg_t *full, *empty, *part; 2327 2328 KASSERT(!cpu_intr_p() && !cpu_softintr_p()); 2329 2330 if (ncpu < 2 || !mp_online) { 2331 /* 2332 * We might be called early enough in the boot process 2333 * for the CPU data structures to not be fully initialized. 2334 * In this case, transfer the content of the local CPU's 2335 * cache back into global cache as only this CPU is currently 2336 * running. 2337 */ 2338 pool_cache_transfer(pc); 2339 } else { 2340 /* 2341 * Signal all CPUs that they must transfer their local 2342 * cache back to the global pool then wait for the xcall to 2343 * complete. 2344 */ 2345 where = xc_broadcast(0, 2346 __FPTRCAST(xcfunc_t, pool_cache_transfer), pc, NULL); 2347 xc_wait(where); 2348 } 2349 2350 /* Empty pool caches, then invalidate objects */ 2351 mutex_enter(&pc->pc_lock); 2352 full = pc->pc_fullgroups; 2353 empty = pc->pc_emptygroups; 2354 part = pc->pc_partgroups; 2355 pc->pc_fullgroups = NULL; 2356 pc->pc_emptygroups = NULL; 2357 pc->pc_partgroups = NULL; 2358 pc->pc_nfull = 0; 2359 pc->pc_nempty = 0; 2360 pc->pc_npart = 0; 2361 mutex_exit(&pc->pc_lock); 2362 2363 pool_cache_invalidate_groups(pc, full); 2364 pool_cache_invalidate_groups(pc, empty); 2365 pool_cache_invalidate_groups(pc, part); 2366 } 2367 2368 /* 2369 * pool_cache_invalidate_cpu: 2370 * 2371 * Invalidate all CPU-bound cached objects in pool cache, the CPU being 2372 * identified by its associated index. 2373 * It is caller's responsibility to ensure that no operation is 2374 * taking place on this pool cache while doing this invalidation. 2375 * WARNING: as no inter-CPU locking is enforced, trying to invalidate 2376 * pool cached objects from a CPU different from the one currently running 2377 * may result in an undefined behaviour. 2378 */ 2379 static void 2380 pool_cache_invalidate_cpu(pool_cache_t pc, u_int index) 2381 { 2382 pool_cache_cpu_t *cc; 2383 pcg_t *pcg; 2384 2385 if ((cc = pc->pc_cpus[index]) == NULL) 2386 return; 2387 2388 if ((pcg = cc->cc_current) != &pcg_dummy) { 2389 pcg->pcg_next = NULL; 2390 pool_cache_invalidate_groups(pc, pcg); 2391 } 2392 if ((pcg = cc->cc_previous) != &pcg_dummy) { 2393 pcg->pcg_next = NULL; 2394 pool_cache_invalidate_groups(pc, pcg); 2395 } 2396 if (cc != &pc->pc_cpu0) 2397 pool_put(&cache_cpu_pool, cc); 2398 2399 } 2400 2401 void 2402 pool_cache_set_drain_hook(pool_cache_t pc, void (*fn)(void *, int), void *arg) 2403 { 2404 2405 pool_set_drain_hook(&pc->pc_pool, fn, arg); 2406 } 2407 2408 void 2409 pool_cache_setlowat(pool_cache_t pc, int n) 2410 { 2411 2412 pool_setlowat(&pc->pc_pool, n); 2413 } 2414 2415 void 2416 pool_cache_sethiwat(pool_cache_t pc, int n) 2417 { 2418 2419 pool_sethiwat(&pc->pc_pool, n); 2420 } 2421 2422 void 2423 pool_cache_sethardlimit(pool_cache_t pc, int n, const char *warnmess, int ratecap) 2424 { 2425 2426 pool_sethardlimit(&pc->pc_pool, n, warnmess, ratecap); 2427 } 2428 2429 static bool __noinline 2430 pool_cache_get_slow(pool_cache_cpu_t *cc, int s, void **objectp, 2431 paddr_t *pap, int flags) 2432 { 2433 pcg_t *pcg, *cur; 2434 uint64_t ncsw; 2435 pool_cache_t pc; 2436 void *object; 2437 2438 KASSERT(cc->cc_current->pcg_avail == 0); 2439 KASSERT(cc->cc_previous->pcg_avail == 0); 2440 2441 pc = cc->cc_cache; 2442 cc->cc_misses++; 2443 2444 /* 2445 * Nothing was available locally. Try and grab a group 2446 * from the cache. 2447 */ 2448 if (__predict_false(!mutex_tryenter(&pc->pc_lock))) { 2449 ncsw = curlwp->l_ncsw; 2450 mutex_enter(&pc->pc_lock); 2451 pc->pc_contended++; 2452 2453 /* 2454 * If we context switched while locking, then 2455 * our view of the per-CPU data is invalid: 2456 * retry. 2457 */ 2458 if (curlwp->l_ncsw != ncsw) { 2459 mutex_exit(&pc->pc_lock); 2460 return true; 2461 } 2462 } 2463 2464 if (__predict_true((pcg = pc->pc_fullgroups) != NULL)) { 2465 /* 2466 * If there's a full group, release our empty 2467 * group back to the cache. Install the full 2468 * group as cc_current and return. 2469 */ 2470 if (__predict_true((cur = cc->cc_current) != &pcg_dummy)) { 2471 KASSERT(cur->pcg_avail == 0); 2472 cur->pcg_next = pc->pc_emptygroups; 2473 pc->pc_emptygroups = cur; 2474 pc->pc_nempty++; 2475 } 2476 KASSERT(pcg->pcg_avail == pcg->pcg_size); 2477 cc->cc_current = pcg; 2478 pc->pc_fullgroups = pcg->pcg_next; 2479 pc->pc_hits++; 2480 pc->pc_nfull--; 2481 mutex_exit(&pc->pc_lock); 2482 return true; 2483 } 2484 2485 /* 2486 * Nothing available locally or in cache. Take the slow 2487 * path: fetch a new object from the pool and construct 2488 * it. 2489 */ 2490 pc->pc_misses++; 2491 mutex_exit(&pc->pc_lock); 2492 splx(s); 2493 2494 object = pool_get(&pc->pc_pool, flags); 2495 *objectp = object; 2496 if (__predict_false(object == NULL)) { 2497 KASSERT((flags & (PR_WAITOK|PR_NOWAIT)) == PR_NOWAIT); 2498 return false; 2499 } 2500 2501 if (__predict_false((*pc->pc_ctor)(pc->pc_arg, object, flags) != 0)) { 2502 pool_put(&pc->pc_pool, object); 2503 *objectp = NULL; 2504 return false; 2505 } 2506 2507 KASSERT((((vaddr_t)object) & (pc->pc_pool.pr_align - 1)) == 0); 2508 2509 if (pap != NULL) { 2510 #ifdef POOL_VTOPHYS 2511 *pap = POOL_VTOPHYS(object); 2512 #else 2513 *pap = POOL_PADDR_INVALID; 2514 #endif 2515 } 2516 2517 FREECHECK_OUT(&pc->pc_freecheck, object); 2518 pool_cache_kleak_fill(pc, object); 2519 return false; 2520 } 2521 2522 /* 2523 * pool_cache_get{,_paddr}: 2524 * 2525 * Get an object from a pool cache (optionally returning 2526 * the physical address of the object). 2527 */ 2528 void * 2529 pool_cache_get_paddr(pool_cache_t pc, int flags, paddr_t *pap) 2530 { 2531 pool_cache_cpu_t *cc; 2532 pcg_t *pcg; 2533 void *object; 2534 int s; 2535 2536 KASSERT(!(flags & PR_NOWAIT) != !(flags & PR_WAITOK)); 2537 KASSERTMSG((!cpu_intr_p() && !cpu_softintr_p()) || 2538 (pc->pc_pool.pr_ipl != IPL_NONE || cold || panicstr != NULL), 2539 "%s: [%s] is IPL_NONE, but called from interrupt context", 2540 __func__, pc->pc_pool.pr_wchan); 2541 2542 if (flags & PR_WAITOK) { 2543 ASSERT_SLEEPABLE(); 2544 } 2545 2546 /* Lock out interrupts and disable preemption. */ 2547 s = splvm(); 2548 while (/* CONSTCOND */ true) { 2549 /* Try and allocate an object from the current group. */ 2550 cc = pc->pc_cpus[curcpu()->ci_index]; 2551 KASSERT(cc->cc_cache == pc); 2552 pcg = cc->cc_current; 2553 if (__predict_true(pcg->pcg_avail > 0)) { 2554 object = pcg->pcg_objects[--pcg->pcg_avail].pcgo_va; 2555 if (__predict_false(pap != NULL)) 2556 *pap = pcg->pcg_objects[pcg->pcg_avail].pcgo_pa; 2557 #if defined(DIAGNOSTIC) 2558 pcg->pcg_objects[pcg->pcg_avail].pcgo_va = NULL; 2559 KASSERT(pcg->pcg_avail < pcg->pcg_size); 2560 KASSERT(object != NULL); 2561 #endif 2562 cc->cc_hits++; 2563 splx(s); 2564 FREECHECK_OUT(&pc->pc_freecheck, object); 2565 pool_redzone_fill(&pc->pc_pool, object); 2566 pool_cache_kleak_fill(pc, object); 2567 return object; 2568 } 2569 2570 /* 2571 * That failed. If the previous group isn't empty, swap 2572 * it with the current group and allocate from there. 2573 */ 2574 pcg = cc->cc_previous; 2575 if (__predict_true(pcg->pcg_avail > 0)) { 2576 cc->cc_previous = cc->cc_current; 2577 cc->cc_current = pcg; 2578 continue; 2579 } 2580 2581 /* 2582 * Can't allocate from either group: try the slow path. 2583 * If get_slow() allocated an object for us, or if 2584 * no more objects are available, it will return false. 2585 * Otherwise, we need to retry. 2586 */ 2587 if (!pool_cache_get_slow(cc, s, &object, pap, flags)) 2588 break; 2589 } 2590 2591 /* 2592 * We would like to KASSERT(object || (flags & PR_NOWAIT)), but 2593 * pool_cache_get can fail even in the PR_WAITOK case, if the 2594 * constructor fails. 2595 */ 2596 return object; 2597 } 2598 2599 static bool __noinline 2600 pool_cache_put_slow(pool_cache_cpu_t *cc, int s, void *object) 2601 { 2602 struct lwp *l = curlwp; 2603 pcg_t *pcg, *cur; 2604 uint64_t ncsw; 2605 pool_cache_t pc; 2606 2607 KASSERT(cc->cc_current->pcg_avail == cc->cc_current->pcg_size); 2608 KASSERT(cc->cc_previous->pcg_avail == cc->cc_previous->pcg_size); 2609 2610 pc = cc->cc_cache; 2611 pcg = NULL; 2612 cc->cc_misses++; 2613 ncsw = l->l_ncsw; 2614 2615 /* 2616 * If there are no empty groups in the cache then allocate one 2617 * while still unlocked. 2618 */ 2619 if (__predict_false(pc->pc_emptygroups == NULL)) { 2620 if (__predict_true(!pool_cache_disable)) { 2621 pcg = pool_get(pc->pc_pcgpool, PR_NOWAIT); 2622 } 2623 /* 2624 * If pool_get() blocked, then our view of 2625 * the per-CPU data is invalid: retry. 2626 */ 2627 if (__predict_false(l->l_ncsw != ncsw)) { 2628 if (pcg != NULL) { 2629 pool_put(pc->pc_pcgpool, pcg); 2630 } 2631 return true; 2632 } 2633 if (__predict_true(pcg != NULL)) { 2634 pcg->pcg_avail = 0; 2635 pcg->pcg_size = pc->pc_pcgsize; 2636 } 2637 } 2638 2639 /* Lock the cache. */ 2640 if (__predict_false(!mutex_tryenter(&pc->pc_lock))) { 2641 mutex_enter(&pc->pc_lock); 2642 pc->pc_contended++; 2643 2644 /* 2645 * If we context switched while locking, then our view of 2646 * the per-CPU data is invalid: retry. 2647 */ 2648 if (__predict_false(l->l_ncsw != ncsw)) { 2649 mutex_exit(&pc->pc_lock); 2650 if (pcg != NULL) { 2651 pool_put(pc->pc_pcgpool, pcg); 2652 } 2653 return true; 2654 } 2655 } 2656 2657 /* If there are no empty groups in the cache then allocate one. */ 2658 if (pcg == NULL && pc->pc_emptygroups != NULL) { 2659 pcg = pc->pc_emptygroups; 2660 pc->pc_emptygroups = pcg->pcg_next; 2661 pc->pc_nempty--; 2662 } 2663 2664 /* 2665 * If there's a empty group, release our full group back 2666 * to the cache. Install the empty group to the local CPU 2667 * and return. 2668 */ 2669 if (pcg != NULL) { 2670 KASSERT(pcg->pcg_avail == 0); 2671 if (__predict_false(cc->cc_previous == &pcg_dummy)) { 2672 cc->cc_previous = pcg; 2673 } else { 2674 cur = cc->cc_current; 2675 if (__predict_true(cur != &pcg_dummy)) { 2676 KASSERT(cur->pcg_avail == cur->pcg_size); 2677 cur->pcg_next = pc->pc_fullgroups; 2678 pc->pc_fullgroups = cur; 2679 pc->pc_nfull++; 2680 } 2681 cc->cc_current = pcg; 2682 } 2683 pc->pc_hits++; 2684 mutex_exit(&pc->pc_lock); 2685 return true; 2686 } 2687 2688 /* 2689 * Nothing available locally or in cache, and we didn't 2690 * allocate an empty group. Take the slow path and destroy 2691 * the object here and now. 2692 */ 2693 pc->pc_misses++; 2694 mutex_exit(&pc->pc_lock); 2695 splx(s); 2696 pool_cache_destruct_object(pc, object); 2697 2698 return false; 2699 } 2700 2701 /* 2702 * pool_cache_put{,_paddr}: 2703 * 2704 * Put an object back to the pool cache (optionally caching the 2705 * physical address of the object). 2706 */ 2707 void 2708 pool_cache_put_paddr(pool_cache_t pc, void *object, paddr_t pa) 2709 { 2710 pool_cache_cpu_t *cc; 2711 pcg_t *pcg; 2712 int s; 2713 2714 KASSERT(object != NULL); 2715 pool_cache_redzone_check(pc, object); 2716 FREECHECK_IN(&pc->pc_freecheck, object); 2717 2718 if (pc->pc_pool.pr_roflags & PR_PHINPAGE) { 2719 pc_phinpage_check(pc, object); 2720 } 2721 2722 if (pool_cache_put_quarantine(pc, object, pa)) { 2723 return; 2724 } 2725 2726 /* Lock out interrupts and disable preemption. */ 2727 s = splvm(); 2728 while (/* CONSTCOND */ true) { 2729 /* If the current group isn't full, release it there. */ 2730 cc = pc->pc_cpus[curcpu()->ci_index]; 2731 KASSERT(cc->cc_cache == pc); 2732 pcg = cc->cc_current; 2733 if (__predict_true(pcg->pcg_avail < pcg->pcg_size)) { 2734 pcg->pcg_objects[pcg->pcg_avail].pcgo_va = object; 2735 pcg->pcg_objects[pcg->pcg_avail].pcgo_pa = pa; 2736 pcg->pcg_avail++; 2737 cc->cc_hits++; 2738 splx(s); 2739 return; 2740 } 2741 2742 /* 2743 * That failed. If the previous group isn't full, swap 2744 * it with the current group and try again. 2745 */ 2746 pcg = cc->cc_previous; 2747 if (__predict_true(pcg->pcg_avail < pcg->pcg_size)) { 2748 cc->cc_previous = cc->cc_current; 2749 cc->cc_current = pcg; 2750 continue; 2751 } 2752 2753 /* 2754 * Can't free to either group: try the slow path. 2755 * If put_slow() releases the object for us, it 2756 * will return false. Otherwise we need to retry. 2757 */ 2758 if (!pool_cache_put_slow(cc, s, object)) 2759 break; 2760 } 2761 } 2762 2763 /* 2764 * pool_cache_transfer: 2765 * 2766 * Transfer objects from the per-CPU cache to the global cache. 2767 * Run within a cross-call thread. 2768 */ 2769 static void 2770 pool_cache_transfer(pool_cache_t pc) 2771 { 2772 pool_cache_cpu_t *cc; 2773 pcg_t *prev, *cur, **list; 2774 int s; 2775 2776 s = splvm(); 2777 mutex_enter(&pc->pc_lock); 2778 cc = pc->pc_cpus[curcpu()->ci_index]; 2779 cur = cc->cc_current; 2780 cc->cc_current = __UNCONST(&pcg_dummy); 2781 prev = cc->cc_previous; 2782 cc->cc_previous = __UNCONST(&pcg_dummy); 2783 if (cur != &pcg_dummy) { 2784 if (cur->pcg_avail == cur->pcg_size) { 2785 list = &pc->pc_fullgroups; 2786 pc->pc_nfull++; 2787 } else if (cur->pcg_avail == 0) { 2788 list = &pc->pc_emptygroups; 2789 pc->pc_nempty++; 2790 } else { 2791 list = &pc->pc_partgroups; 2792 pc->pc_npart++; 2793 } 2794 cur->pcg_next = *list; 2795 *list = cur; 2796 } 2797 if (prev != &pcg_dummy) { 2798 if (prev->pcg_avail == prev->pcg_size) { 2799 list = &pc->pc_fullgroups; 2800 pc->pc_nfull++; 2801 } else if (prev->pcg_avail == 0) { 2802 list = &pc->pc_emptygroups; 2803 pc->pc_nempty++; 2804 } else { 2805 list = &pc->pc_partgroups; 2806 pc->pc_npart++; 2807 } 2808 prev->pcg_next = *list; 2809 *list = prev; 2810 } 2811 mutex_exit(&pc->pc_lock); 2812 splx(s); 2813 } 2814 2815 static int 2816 pool_bigidx(size_t size) 2817 { 2818 int i; 2819 2820 for (i = 0; i < __arraycount(pool_allocator_big); i++) { 2821 if (1 << (i + POOL_ALLOCATOR_BIG_BASE) >= size) 2822 return i; 2823 } 2824 panic("pool item size %zu too large, use a custom allocator", size); 2825 } 2826 2827 static void * 2828 pool_allocator_alloc(struct pool *pp, int flags) 2829 { 2830 struct pool_allocator *pa = pp->pr_alloc; 2831 void *res; 2832 2833 res = (*pa->pa_alloc)(pp, flags); 2834 if (res == NULL && (flags & PR_WAITOK) == 0) { 2835 /* 2836 * We only run the drain hook here if PR_NOWAIT. 2837 * In other cases, the hook will be run in 2838 * pool_reclaim(). 2839 */ 2840 if (pp->pr_drain_hook != NULL) { 2841 (*pp->pr_drain_hook)(pp->pr_drain_hook_arg, flags); 2842 res = (*pa->pa_alloc)(pp, flags); 2843 } 2844 } 2845 return res; 2846 } 2847 2848 static void 2849 pool_allocator_free(struct pool *pp, void *v) 2850 { 2851 struct pool_allocator *pa = pp->pr_alloc; 2852 2853 if (pp->pr_redzone) { 2854 kasan_mark(v, pa->pa_pagesz, pa->pa_pagesz, 0); 2855 } 2856 (*pa->pa_free)(pp, v); 2857 } 2858 2859 void * 2860 pool_page_alloc(struct pool *pp, int flags) 2861 { 2862 const vm_flag_t vflags = (flags & PR_WAITOK) ? VM_SLEEP: VM_NOSLEEP; 2863 vmem_addr_t va; 2864 int ret; 2865 2866 ret = uvm_km_kmem_alloc(kmem_va_arena, pp->pr_alloc->pa_pagesz, 2867 vflags | VM_INSTANTFIT, &va); 2868 2869 return ret ? NULL : (void *)va; 2870 } 2871 2872 void 2873 pool_page_free(struct pool *pp, void *v) 2874 { 2875 2876 uvm_km_kmem_free(kmem_va_arena, (vaddr_t)v, pp->pr_alloc->pa_pagesz); 2877 } 2878 2879 static void * 2880 pool_page_alloc_meta(struct pool *pp, int flags) 2881 { 2882 const vm_flag_t vflags = (flags & PR_WAITOK) ? VM_SLEEP: VM_NOSLEEP; 2883 vmem_addr_t va; 2884 int ret; 2885 2886 ret = vmem_alloc(kmem_meta_arena, pp->pr_alloc->pa_pagesz, 2887 vflags | VM_INSTANTFIT, &va); 2888 2889 return ret ? NULL : (void *)va; 2890 } 2891 2892 static void 2893 pool_page_free_meta(struct pool *pp, void *v) 2894 { 2895 2896 vmem_free(kmem_meta_arena, (vmem_addr_t)v, pp->pr_alloc->pa_pagesz); 2897 } 2898 2899 #ifdef KLEAK 2900 static void 2901 pool_kleak_fill(struct pool *pp, void *p) 2902 { 2903 if (__predict_false(pp->pr_roflags & PR_NOTOUCH)) { 2904 return; 2905 } 2906 kleak_fill_area(p, pp->pr_size); 2907 } 2908 2909 static void 2910 pool_cache_kleak_fill(pool_cache_t pc, void *p) 2911 { 2912 if (__predict_false(pc_has_ctor(pc) || pc_has_dtor(pc))) { 2913 return; 2914 } 2915 pool_kleak_fill(&pc->pc_pool, p); 2916 } 2917 #endif 2918 2919 #ifdef POOL_QUARANTINE 2920 static void 2921 pool_quarantine_init(struct pool *pp) 2922 { 2923 pp->pr_quar.rotor = 0; 2924 memset(&pp->pr_quar, 0, sizeof(pp->pr_quar)); 2925 } 2926 2927 static void 2928 pool_quarantine_flush(struct pool *pp) 2929 { 2930 pool_quar_t *quar = &pp->pr_quar; 2931 struct pool_pagelist pq; 2932 size_t i; 2933 2934 LIST_INIT(&pq); 2935 2936 mutex_enter(&pp->pr_lock); 2937 for (i = 0; i < POOL_QUARANTINE_DEPTH; i++) { 2938 if (quar->list[i] == 0) 2939 continue; 2940 pool_do_put(pp, (void *)quar->list[i], &pq); 2941 } 2942 mutex_exit(&pp->pr_lock); 2943 2944 pr_pagelist_free(pp, &pq); 2945 } 2946 2947 static bool 2948 pool_put_quarantine(struct pool *pp, void *v, struct pool_pagelist *pq) 2949 { 2950 pool_quar_t *quar = &pp->pr_quar; 2951 uintptr_t old; 2952 2953 if (pp->pr_roflags & PR_NOTOUCH) { 2954 return false; 2955 } 2956 2957 pool_redzone_check(pp, v); 2958 2959 old = quar->list[quar->rotor]; 2960 quar->list[quar->rotor] = (uintptr_t)v; 2961 quar->rotor = (quar->rotor + 1) % POOL_QUARANTINE_DEPTH; 2962 if (old != 0) { 2963 pool_do_put(pp, (void *)old, pq); 2964 } 2965 2966 return true; 2967 } 2968 2969 static bool 2970 pool_cache_put_quarantine(pool_cache_t pc, void *p, paddr_t pa) 2971 { 2972 pool_cache_destruct_object(pc, p); 2973 return true; 2974 } 2975 #endif 2976 2977 #ifdef POOL_REDZONE 2978 #if defined(_LP64) 2979 # define PRIME 0x9e37fffffffc0000UL 2980 #else /* defined(_LP64) */ 2981 # define PRIME 0x9e3779b1 2982 #endif /* defined(_LP64) */ 2983 #define STATIC_BYTE 0xFE 2984 CTASSERT(POOL_REDZONE_SIZE > 1); 2985 2986 #ifndef KASAN 2987 static inline uint8_t 2988 pool_pattern_generate(const void *p) 2989 { 2990 return (uint8_t)(((uintptr_t)p) * PRIME 2991 >> ((sizeof(uintptr_t) - sizeof(uint8_t))) * CHAR_BIT); 2992 } 2993 #endif 2994 2995 static void 2996 pool_redzone_init(struct pool *pp, size_t requested_size) 2997 { 2998 size_t redzsz; 2999 size_t nsz; 3000 3001 #ifdef KASAN 3002 redzsz = requested_size; 3003 kasan_add_redzone(&redzsz); 3004 redzsz -= requested_size; 3005 #else 3006 redzsz = POOL_REDZONE_SIZE; 3007 #endif 3008 3009 if (pp->pr_roflags & PR_NOTOUCH) { 3010 pp->pr_redzone = false; 3011 return; 3012 } 3013 3014 /* 3015 * We may have extended the requested size earlier; check if 3016 * there's naturally space in the padding for a red zone. 3017 */ 3018 if (pp->pr_size - requested_size >= redzsz) { 3019 pp->pr_reqsize_with_redzone = requested_size + redzsz; 3020 pp->pr_redzone = true; 3021 return; 3022 } 3023 3024 /* 3025 * No space in the natural padding; check if we can extend a 3026 * bit the size of the pool. 3027 */ 3028 nsz = roundup(pp->pr_size + redzsz, pp->pr_align); 3029 if (nsz <= pp->pr_alloc->pa_pagesz) { 3030 /* Ok, we can */ 3031 pp->pr_size = nsz; 3032 pp->pr_reqsize_with_redzone = requested_size + redzsz; 3033 pp->pr_redzone = true; 3034 } else { 3035 /* No space for a red zone... snif :'( */ 3036 pp->pr_redzone = false; 3037 printf("pool redzone disabled for '%s'\n", pp->pr_wchan); 3038 } 3039 } 3040 3041 static void 3042 pool_redzone_fill(struct pool *pp, void *p) 3043 { 3044 if (!pp->pr_redzone) 3045 return; 3046 #ifdef KASAN 3047 kasan_mark(p, pp->pr_reqsize, pp->pr_reqsize_with_redzone, 3048 KASAN_POOL_REDZONE); 3049 #else 3050 uint8_t *cp, pat; 3051 const uint8_t *ep; 3052 3053 cp = (uint8_t *)p + pp->pr_reqsize; 3054 ep = cp + POOL_REDZONE_SIZE; 3055 3056 /* 3057 * We really don't want the first byte of the red zone to be '\0'; 3058 * an off-by-one in a string may not be properly detected. 3059 */ 3060 pat = pool_pattern_generate(cp); 3061 *cp = (pat == '\0') ? STATIC_BYTE: pat; 3062 cp++; 3063 3064 while (cp < ep) { 3065 *cp = pool_pattern_generate(cp); 3066 cp++; 3067 } 3068 #endif 3069 } 3070 3071 static void 3072 pool_redzone_check(struct pool *pp, void *p) 3073 { 3074 if (!pp->pr_redzone) 3075 return; 3076 #ifdef KASAN 3077 kasan_mark(p, 0, pp->pr_reqsize_with_redzone, KASAN_POOL_FREED); 3078 #else 3079 uint8_t *cp, pat, expected; 3080 const uint8_t *ep; 3081 3082 cp = (uint8_t *)p + pp->pr_reqsize; 3083 ep = cp + POOL_REDZONE_SIZE; 3084 3085 pat = pool_pattern_generate(cp); 3086 expected = (pat == '\0') ? STATIC_BYTE: pat; 3087 if (__predict_false(expected != *cp)) { 3088 printf("%s: %p: 0x%02x != 0x%02x\n", 3089 __func__, cp, *cp, expected); 3090 } 3091 cp++; 3092 3093 while (cp < ep) { 3094 expected = pool_pattern_generate(cp); 3095 if (__predict_false(*cp != expected)) { 3096 printf("%s: %p: 0x%02x != 0x%02x\n", 3097 __func__, cp, *cp, expected); 3098 } 3099 cp++; 3100 } 3101 #endif 3102 } 3103 3104 static void 3105 pool_cache_redzone_check(pool_cache_t pc, void *p) 3106 { 3107 #ifdef KASAN 3108 /* If there is a ctor/dtor, leave the data as valid. */ 3109 if (__predict_false(pc_has_ctor(pc) || pc_has_dtor(pc))) { 3110 return; 3111 } 3112 #endif 3113 pool_redzone_check(&pc->pc_pool, p); 3114 } 3115 3116 #endif /* POOL_REDZONE */ 3117 3118 #if defined(DDB) 3119 static bool 3120 pool_in_page(struct pool *pp, struct pool_item_header *ph, uintptr_t addr) 3121 { 3122 3123 return (uintptr_t)ph->ph_page <= addr && 3124 addr < (uintptr_t)ph->ph_page + pp->pr_alloc->pa_pagesz; 3125 } 3126 3127 static bool 3128 pool_in_item(struct pool *pp, void *item, uintptr_t addr) 3129 { 3130 3131 return (uintptr_t)item <= addr && addr < (uintptr_t)item + pp->pr_size; 3132 } 3133 3134 static bool 3135 pool_in_cg(struct pool *pp, struct pool_cache_group *pcg, uintptr_t addr) 3136 { 3137 int i; 3138 3139 if (pcg == NULL) { 3140 return false; 3141 } 3142 for (i = 0; i < pcg->pcg_avail; i++) { 3143 if (pool_in_item(pp, pcg->pcg_objects[i].pcgo_va, addr)) { 3144 return true; 3145 } 3146 } 3147 return false; 3148 } 3149 3150 static bool 3151 pool_allocated(struct pool *pp, struct pool_item_header *ph, uintptr_t addr) 3152 { 3153 3154 if ((pp->pr_roflags & PR_USEBMAP) != 0) { 3155 unsigned int idx = pr_item_bitmap_index(pp, ph, (void *)addr); 3156 pool_item_bitmap_t *bitmap = 3157 ph->ph_bitmap + (idx / BITMAP_SIZE); 3158 pool_item_bitmap_t mask = 1 << (idx & BITMAP_MASK); 3159 3160 return (*bitmap & mask) == 0; 3161 } else { 3162 struct pool_item *pi; 3163 3164 LIST_FOREACH(pi, &ph->ph_itemlist, pi_list) { 3165 if (pool_in_item(pp, pi, addr)) { 3166 return false; 3167 } 3168 } 3169 return true; 3170 } 3171 } 3172 3173 void 3174 pool_whatis(uintptr_t addr, void (*pr)(const char *, ...)) 3175 { 3176 struct pool *pp; 3177 3178 TAILQ_FOREACH(pp, &pool_head, pr_poollist) { 3179 struct pool_item_header *ph; 3180 uintptr_t item; 3181 bool allocated = true; 3182 bool incache = false; 3183 bool incpucache = false; 3184 char cpucachestr[32]; 3185 3186 if ((pp->pr_roflags & PR_PHINPAGE) != 0) { 3187 LIST_FOREACH(ph, &pp->pr_fullpages, ph_pagelist) { 3188 if (pool_in_page(pp, ph, addr)) { 3189 goto found; 3190 } 3191 } 3192 LIST_FOREACH(ph, &pp->pr_partpages, ph_pagelist) { 3193 if (pool_in_page(pp, ph, addr)) { 3194 allocated = 3195 pool_allocated(pp, ph, addr); 3196 goto found; 3197 } 3198 } 3199 LIST_FOREACH(ph, &pp->pr_emptypages, ph_pagelist) { 3200 if (pool_in_page(pp, ph, addr)) { 3201 allocated = false; 3202 goto found; 3203 } 3204 } 3205 continue; 3206 } else { 3207 ph = pr_find_pagehead_noalign(pp, (void *)addr); 3208 if (ph == NULL || !pool_in_page(pp, ph, addr)) { 3209 continue; 3210 } 3211 allocated = pool_allocated(pp, ph, addr); 3212 } 3213 found: 3214 if (allocated && pp->pr_cache) { 3215 pool_cache_t pc = pp->pr_cache; 3216 struct pool_cache_group *pcg; 3217 int i; 3218 3219 for (pcg = pc->pc_fullgroups; pcg != NULL; 3220 pcg = pcg->pcg_next) { 3221 if (pool_in_cg(pp, pcg, addr)) { 3222 incache = true; 3223 goto print; 3224 } 3225 } 3226 for (i = 0; i < __arraycount(pc->pc_cpus); i++) { 3227 pool_cache_cpu_t *cc; 3228 3229 if ((cc = pc->pc_cpus[i]) == NULL) { 3230 continue; 3231 } 3232 if (pool_in_cg(pp, cc->cc_current, addr) || 3233 pool_in_cg(pp, cc->cc_previous, addr)) { 3234 struct cpu_info *ci = 3235 cpu_lookup(i); 3236 3237 incpucache = true; 3238 snprintf(cpucachestr, 3239 sizeof(cpucachestr), 3240 "cached by CPU %u", 3241 ci->ci_index); 3242 goto print; 3243 } 3244 } 3245 } 3246 print: 3247 item = (uintptr_t)ph->ph_page + ph->ph_off; 3248 item = item + rounddown(addr - item, pp->pr_size); 3249 (*pr)("%p is %p+%zu in POOL '%s' (%s)\n", 3250 (void *)addr, item, (size_t)(addr - item), 3251 pp->pr_wchan, 3252 incpucache ? cpucachestr : 3253 incache ? "cached" : allocated ? "allocated" : "free"); 3254 } 3255 } 3256 #endif /* defined(DDB) */ 3257 3258 static int 3259 pool_sysctl(SYSCTLFN_ARGS) 3260 { 3261 struct pool_sysctl data; 3262 struct pool *pp; 3263 struct pool_cache *pc; 3264 pool_cache_cpu_t *cc; 3265 int error; 3266 size_t i, written; 3267 3268 if (oldp == NULL) { 3269 *oldlenp = 0; 3270 TAILQ_FOREACH(pp, &pool_head, pr_poollist) 3271 *oldlenp += sizeof(data); 3272 return 0; 3273 } 3274 3275 memset(&data, 0, sizeof(data)); 3276 error = 0; 3277 written = 0; 3278 TAILQ_FOREACH(pp, &pool_head, pr_poollist) { 3279 if (written + sizeof(data) > *oldlenp) 3280 break; 3281 strlcpy(data.pr_wchan, pp->pr_wchan, sizeof(data.pr_wchan)); 3282 data.pr_pagesize = pp->pr_alloc->pa_pagesz; 3283 data.pr_flags = pp->pr_roflags | pp->pr_flags; 3284 #define COPY(field) data.field = pp->field 3285 COPY(pr_size); 3286 3287 COPY(pr_itemsperpage); 3288 COPY(pr_nitems); 3289 COPY(pr_nout); 3290 COPY(pr_hardlimit); 3291 COPY(pr_npages); 3292 COPY(pr_minpages); 3293 COPY(pr_maxpages); 3294 3295 COPY(pr_nget); 3296 COPY(pr_nfail); 3297 COPY(pr_nput); 3298 COPY(pr_npagealloc); 3299 COPY(pr_npagefree); 3300 COPY(pr_hiwat); 3301 COPY(pr_nidle); 3302 #undef COPY 3303 3304 data.pr_cache_nmiss_pcpu = 0; 3305 data.pr_cache_nhit_pcpu = 0; 3306 if (pp->pr_cache) { 3307 pc = pp->pr_cache; 3308 data.pr_cache_meta_size = pc->pc_pcgsize; 3309 data.pr_cache_nfull = pc->pc_nfull; 3310 data.pr_cache_npartial = pc->pc_npart; 3311 data.pr_cache_nempty = pc->pc_nempty; 3312 data.pr_cache_ncontended = pc->pc_contended; 3313 data.pr_cache_nmiss_global = pc->pc_misses; 3314 data.pr_cache_nhit_global = pc->pc_hits; 3315 for (i = 0; i < pc->pc_ncpu; ++i) { 3316 cc = pc->pc_cpus[i]; 3317 if (cc == NULL) 3318 continue; 3319 data.pr_cache_nmiss_pcpu += cc->cc_misses; 3320 data.pr_cache_nhit_pcpu += cc->cc_hits; 3321 } 3322 } else { 3323 data.pr_cache_meta_size = 0; 3324 data.pr_cache_nfull = 0; 3325 data.pr_cache_npartial = 0; 3326 data.pr_cache_nempty = 0; 3327 data.pr_cache_ncontended = 0; 3328 data.pr_cache_nmiss_global = 0; 3329 data.pr_cache_nhit_global = 0; 3330 } 3331 3332 error = sysctl_copyout(l, &data, oldp, sizeof(data)); 3333 if (error) 3334 break; 3335 written += sizeof(data); 3336 oldp = (char *)oldp + sizeof(data); 3337 } 3338 3339 *oldlenp = written; 3340 return error; 3341 } 3342 3343 SYSCTL_SETUP(sysctl_pool_setup, "sysctl kern.pool setup") 3344 { 3345 const struct sysctlnode *rnode = NULL; 3346 3347 sysctl_createv(clog, 0, NULL, &rnode, 3348 CTLFLAG_PERMANENT, 3349 CTLTYPE_STRUCT, "pool", 3350 SYSCTL_DESCR("Get pool statistics"), 3351 pool_sysctl, 0, NULL, 0, 3352 CTL_KERN, CTL_CREATE, CTL_EOL); 3353 } 3354