1 /* $OpenBSD: subr_pool.c,v 1.237 2025/01/04 09:26:01 mvs Exp $ */ 2 /* $NetBSD: subr_pool.c,v 1.61 2001/09/26 07:14:56 chs Exp $ */ 3 4 /*- 5 * Copyright (c) 1997, 1999, 2000 The NetBSD Foundation, Inc. 6 * All rights reserved. 7 * 8 * This code is derived from software contributed to The NetBSD Foundation 9 * by Paul Kranenburg; by Jason R. Thorpe of the Numerical Aerospace 10 * Simulation Facility, NASA Ames Research Center. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 23 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 24 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 25 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31 * POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include <sys/param.h> 35 #include <sys/systm.h> 36 #include <sys/errno.h> 37 #include <sys/malloc.h> 38 #include <sys/pool.h> 39 #include <sys/proc.h> 40 #include <sys/sysctl.h> 41 #include <sys/task.h> 42 #include <sys/time.h> 43 #include <sys/timeout.h> 44 #include <sys/percpu.h> 45 #include <sys/tracepoint.h> 46 47 #include <uvm/uvm_extern.h> 48 49 /* 50 * Pool resource management utility. 51 * 52 * Memory is allocated in pages which are split into pieces according to 53 * the pool item size. Each page is kept on one of three lists in the 54 * pool structure: `pr_emptypages', `pr_fullpages' and `pr_partpages', 55 * for empty, full and partially-full pages respectively. The individual 56 * pool items are on a linked list headed by `ph_items' in each page 57 * header. The memory for building the page list is either taken from 58 * the allocated pages themselves (for small pool items) or taken from 59 * an internal pool of page headers (`phpool'). 60 */ 61 62 /* List of all pools */ 63 SIMPLEQ_HEAD(,pool) pool_head = SIMPLEQ_HEAD_INITIALIZER(pool_head); 64 65 /* 66 * Every pool gets a unique serial number assigned to it. If this counter 67 * wraps, we're screwed, but we shouldn't create so many pools anyway. 68 */ 69 unsigned int pool_serial; 70 unsigned int pool_count; 71 72 /* Lock the previous variables making up the global pool state */ 73 struct rwlock pool_lock = RWLOCK_INITIALIZER("pools"); 74 75 /* Private pool for page header structures */ 76 struct pool phpool; 77 78 struct pool_lock_ops { 79 void (*pl_init)(struct pool *, union pool_lock *, 80 const struct lock_type *); 81 void (*pl_enter)(union pool_lock *); 82 int (*pl_enter_try)(union pool_lock *); 83 void (*pl_leave)(union pool_lock *); 84 void (*pl_assert_locked)(union pool_lock *); 85 void (*pl_assert_unlocked)(union pool_lock *); 86 int (*pl_sleep)(void *, union pool_lock *, int, const char *); 87 }; 88 89 static const struct pool_lock_ops pool_lock_ops_mtx; 90 static const struct pool_lock_ops pool_lock_ops_rw; 91 92 #ifdef WITNESS 93 #define pl_init(pp, pl) do { \ 94 static const struct lock_type __lock_type = { .lt_name = #pl }; \ 95 (pp)->pr_lock_ops->pl_init(pp, pl, &__lock_type); \ 96 } while (0) 97 #else /* WITNESS */ 98 #define pl_init(pp, pl) (pp)->pr_lock_ops->pl_init(pp, pl, NULL) 99 #endif /* WITNESS */ 100 101 static inline void 102 pl_enter(struct pool *pp, union pool_lock *pl) 103 { 104 pp->pr_lock_ops->pl_enter(pl); 105 } 106 static inline int 107 pl_enter_try(struct pool *pp, union pool_lock *pl) 108 { 109 return pp->pr_lock_ops->pl_enter_try(pl); 110 } 111 static inline void 112 pl_leave(struct pool *pp, union pool_lock *pl) 113 { 114 pp->pr_lock_ops->pl_leave(pl); 115 } 116 static inline void 117 pl_assert_locked(struct pool *pp, union pool_lock *pl) 118 { 119 pp->pr_lock_ops->pl_assert_locked(pl); 120 } 121 static inline void 122 pl_assert_unlocked(struct pool *pp, union pool_lock *pl) 123 { 124 pp->pr_lock_ops->pl_assert_unlocked(pl); 125 } 126 static inline int 127 pl_sleep(struct pool *pp, void *ident, union pool_lock *lock, int priority, 128 const char *wmesg) 129 { 130 return pp->pr_lock_ops->pl_sleep(ident, lock, priority, wmesg); 131 } 132 133 struct pool_item { 134 u_long pi_magic; 135 XSIMPLEQ_ENTRY(pool_item) pi_list; 136 }; 137 #define POOL_IMAGIC(ph, pi) ((u_long)(pi) ^ (ph)->ph_magic) 138 139 struct pool_page_header { 140 /* Page headers */ 141 TAILQ_ENTRY(pool_page_header) 142 ph_entry; /* pool page list */ 143 XSIMPLEQ_HEAD(, pool_item) 144 ph_items; /* free items on the page */ 145 RBT_ENTRY(pool_page_header) 146 ph_node; /* off-page page headers */ 147 unsigned int ph_nmissing; /* # of chunks in use */ 148 caddr_t ph_page; /* this page's address */ 149 caddr_t ph_colored; /* page's colored address */ 150 unsigned long ph_magic; 151 uint64_t ph_timestamp; 152 }; 153 #define POOL_MAGICBIT (1 << 3) /* keep away from perturbed low bits */ 154 #define POOL_PHPOISON(ph) ISSET((ph)->ph_magic, POOL_MAGICBIT) 155 156 #ifdef MULTIPROCESSOR 157 struct pool_cache_item { 158 struct pool_cache_item *ci_next; /* next item in list */ 159 unsigned long ci_nitems; /* number of items in list */ 160 TAILQ_ENTRY(pool_cache_item) 161 ci_nextl; /* entry in list of lists */ 162 }; 163 164 /* we store whether the cached item is poisoned in the high bit of nitems */ 165 #define POOL_CACHE_ITEM_NITEMS_MASK 0x7ffffffUL 166 #define POOL_CACHE_ITEM_NITEMS_POISON 0x8000000UL 167 168 #define POOL_CACHE_ITEM_NITEMS(_ci) \ 169 ((_ci)->ci_nitems & POOL_CACHE_ITEM_NITEMS_MASK) 170 171 #define POOL_CACHE_ITEM_POISONED(_ci) \ 172 ISSET((_ci)->ci_nitems, POOL_CACHE_ITEM_NITEMS_POISON) 173 174 struct pool_cache { 175 struct pool_cache_item *pc_actv; /* active list of items */ 176 unsigned long pc_nactv; /* actv head nitems cache */ 177 struct pool_cache_item *pc_prev; /* previous list of items */ 178 179 uint64_t pc_gen; /* generation number */ 180 uint64_t pc_nget; /* # of successful requests */ 181 uint64_t pc_nfail; /* # of unsuccessful reqs */ 182 uint64_t pc_nput; /* # of releases */ 183 uint64_t pc_nlget; /* # of list requests */ 184 uint64_t pc_nlfail; /* # of fails getting a list */ 185 uint64_t pc_nlput; /* # of list releases */ 186 187 int pc_nout; 188 }; 189 190 void *pool_cache_get(struct pool *); 191 void pool_cache_put(struct pool *, void *); 192 void pool_cache_destroy(struct pool *); 193 void pool_cache_gc(struct pool *); 194 #endif 195 void pool_cache_pool_info(struct pool *, struct kinfo_pool *); 196 int pool_cache_info(struct pool *, void *, size_t *); 197 int pool_cache_cpus_info(struct pool *, void *, size_t *); 198 199 #ifdef POOL_DEBUG 200 int pool_debug = 1; 201 #else 202 int pool_debug = 0; 203 #endif 204 205 #define POOL_INPGHDR(pp) ((pp)->pr_phoffset != 0) 206 207 struct pool_page_header * 208 pool_p_alloc(struct pool *, int, int *); 209 void pool_p_insert(struct pool *, struct pool_page_header *); 210 void pool_p_remove(struct pool *, struct pool_page_header *); 211 void pool_p_free(struct pool *, struct pool_page_header *); 212 213 void pool_update_curpage(struct pool *); 214 void *pool_do_get(struct pool *, int, int *); 215 void pool_do_put(struct pool *, void *); 216 int pool_chk_page(struct pool *, struct pool_page_header *, int); 217 int pool_chk(struct pool *); 218 void pool_get_done(struct pool *, void *, void *); 219 void pool_runqueue(struct pool *, int); 220 221 void *pool_allocator_alloc(struct pool *, int, int *); 222 void pool_allocator_free(struct pool *, void *); 223 224 /* 225 * The default pool allocator. 226 */ 227 void *pool_page_alloc(struct pool *, int, int *); 228 void pool_page_free(struct pool *, void *); 229 230 /* 231 * safe for interrupts; this is the default allocator 232 */ 233 struct pool_allocator pool_allocator_single = { 234 pool_page_alloc, 235 pool_page_free, 236 POOL_ALLOC_SIZE(PAGE_SIZE, POOL_ALLOC_ALIGNED) 237 }; 238 239 void *pool_multi_alloc(struct pool *, int, int *); 240 void pool_multi_free(struct pool *, void *); 241 242 struct pool_allocator pool_allocator_multi = { 243 pool_multi_alloc, 244 pool_multi_free, 245 POOL_ALLOC_SIZES(PAGE_SIZE, (1UL << 31), POOL_ALLOC_ALIGNED) 246 }; 247 248 void *pool_multi_alloc_ni(struct pool *, int, int *); 249 void pool_multi_free_ni(struct pool *, void *); 250 251 struct pool_allocator pool_allocator_multi_ni = { 252 pool_multi_alloc_ni, 253 pool_multi_free_ni, 254 POOL_ALLOC_SIZES(PAGE_SIZE, (1UL << 31), POOL_ALLOC_ALIGNED) 255 }; 256 257 #ifdef DDB 258 void pool_print_pagelist(struct pool_pagelist *, int (*)(const char *, ...) 259 __attribute__((__format__(__kprintf__,1,2)))); 260 void pool_print1(struct pool *, const char *, int (*)(const char *, ...) 261 __attribute__((__format__(__kprintf__,1,2)))); 262 #endif 263 264 /* stale page garbage collectors */ 265 void pool_gc_sched(void *); 266 struct timeout pool_gc_tick = TIMEOUT_INITIALIZER(pool_gc_sched, NULL); 267 void pool_gc_pages(void *); 268 struct task pool_gc_task = TASK_INITIALIZER(pool_gc_pages, NULL); 269 270 #define POOL_WAIT_FREE SEC_TO_NSEC(1) 271 #define POOL_WAIT_GC SEC_TO_NSEC(8) 272 273 RBT_PROTOTYPE(phtree, pool_page_header, ph_node, phtree_compare); 274 275 static inline int 276 phtree_compare(const struct pool_page_header *a, 277 const struct pool_page_header *b) 278 { 279 vaddr_t va = (vaddr_t)a->ph_page; 280 vaddr_t vb = (vaddr_t)b->ph_page; 281 282 /* the compares in this order are important for the NFIND to work */ 283 if (vb < va) 284 return (-1); 285 if (vb > va) 286 return (1); 287 288 return (0); 289 } 290 291 RBT_GENERATE(phtree, pool_page_header, ph_node, phtree_compare); 292 293 /* 294 * Return the pool page header based on page address. 295 */ 296 static inline struct pool_page_header * 297 pr_find_pagehead(struct pool *pp, void *v) 298 { 299 struct pool_page_header *ph, key; 300 301 if (POOL_INPGHDR(pp)) { 302 caddr_t page; 303 304 page = (caddr_t)((vaddr_t)v & pp->pr_pgmask); 305 306 return ((struct pool_page_header *)(page + pp->pr_phoffset)); 307 } 308 309 key.ph_page = v; 310 ph = RBT_NFIND(phtree, &pp->pr_phtree, &key); 311 if (ph == NULL) 312 panic("%s: %s: page header missing", __func__, pp->pr_wchan); 313 314 KASSERT(ph->ph_page <= (caddr_t)v); 315 if (ph->ph_page + pp->pr_pgsize <= (caddr_t)v) 316 panic("%s: %s: incorrect page", __func__, pp->pr_wchan); 317 318 return (ph); 319 } 320 321 /* 322 * Initialize the given pool resource structure. 323 * 324 * We export this routine to allow other kernel parts to declare 325 * static pools that must be initialized before malloc() is available. 326 */ 327 void 328 pool_init(struct pool *pp, size_t size, u_int align, int ipl, int flags, 329 const char *wchan, struct pool_allocator *palloc) 330 { 331 int off = 0, space; 332 unsigned int pgsize = PAGE_SIZE, items; 333 size_t pa_pagesz; 334 #ifdef DIAGNOSTIC 335 struct pool *iter; 336 #endif 337 338 if (align == 0) 339 align = ALIGN(1); 340 341 if (size < sizeof(struct pool_item)) 342 size = sizeof(struct pool_item); 343 344 size = roundup(size, align); 345 346 while (size * 8 > pgsize) 347 pgsize <<= 1; 348 349 if (palloc == NULL) { 350 if (pgsize > PAGE_SIZE) { 351 palloc = ISSET(flags, PR_WAITOK) ? 352 &pool_allocator_multi_ni : &pool_allocator_multi; 353 } else 354 palloc = &pool_allocator_single; 355 356 pa_pagesz = palloc->pa_pagesz; 357 } else { 358 size_t pgsizes; 359 360 pa_pagesz = palloc->pa_pagesz; 361 if (pa_pagesz == 0) 362 pa_pagesz = POOL_ALLOC_DEFAULT; 363 364 pgsizes = pa_pagesz & ~POOL_ALLOC_ALIGNED; 365 366 /* make sure the allocator can fit at least one item */ 367 if (size > pgsizes) { 368 panic("%s: pool %s item size 0x%zx > " 369 "allocator %p sizes 0x%zx", __func__, wchan, 370 size, palloc, pgsizes); 371 } 372 373 /* shrink pgsize until it fits into the range */ 374 while (!ISSET(pgsizes, pgsize)) 375 pgsize >>= 1; 376 } 377 KASSERT(ISSET(pa_pagesz, pgsize)); 378 379 items = pgsize / size; 380 381 /* 382 * Decide whether to put the page header off page to avoid 383 * wasting too large a part of the page. Off-page page headers 384 * go into an RB tree, so we can match a returned item with 385 * its header based on the page address. 386 */ 387 if (ISSET(pa_pagesz, POOL_ALLOC_ALIGNED)) { 388 if (pgsize - (size * items) > 389 sizeof(struct pool_page_header)) { 390 off = pgsize - sizeof(struct pool_page_header); 391 } else if (sizeof(struct pool_page_header) * 2 >= size) { 392 off = pgsize - sizeof(struct pool_page_header); 393 items = off / size; 394 } 395 } 396 397 KASSERT(items > 0); 398 399 /* 400 * Initialize the pool structure. 401 */ 402 memset(pp, 0, sizeof(*pp)); 403 refcnt_init(&pp->pr_refcnt); 404 if (ISSET(flags, PR_RWLOCK)) { 405 KASSERT(flags & PR_WAITOK); 406 pp->pr_lock_ops = &pool_lock_ops_rw; 407 } else 408 pp->pr_lock_ops = &pool_lock_ops_mtx; 409 TAILQ_INIT(&pp->pr_emptypages); 410 TAILQ_INIT(&pp->pr_fullpages); 411 TAILQ_INIT(&pp->pr_partpages); 412 pp->pr_curpage = NULL; 413 pp->pr_npages = 0; 414 pp->pr_minitems = 0; 415 pp->pr_minpages = 0; 416 pp->pr_maxpages = 8; 417 pp->pr_size = size; 418 pp->pr_pgsize = pgsize; 419 pp->pr_pgmask = ~0UL ^ (pgsize - 1); 420 pp->pr_phoffset = off; 421 pp->pr_itemsperpage = items; 422 pp->pr_wchan = wchan; 423 pp->pr_alloc = palloc; 424 pp->pr_nitems = 0; 425 pp->pr_nout = 0; 426 pp->pr_hardlimit = UINT_MAX; 427 pp->pr_hardlimit_warning = NULL; 428 pp->pr_hardlimit_ratecap.tv_sec = 0; 429 pp->pr_hardlimit_ratecap.tv_usec = 0; 430 pp->pr_hardlimit_warning_last.tv_sec = 0; 431 pp->pr_hardlimit_warning_last.tv_usec = 0; 432 RBT_INIT(phtree, &pp->pr_phtree); 433 434 /* 435 * Use the space between the chunks and the page header 436 * for cache coloring. 437 */ 438 space = POOL_INPGHDR(pp) ? pp->pr_phoffset : pp->pr_pgsize; 439 space -= pp->pr_itemsperpage * pp->pr_size; 440 pp->pr_align = align; 441 pp->pr_maxcolors = (space / align) + 1; 442 443 pp->pr_nget = 0; 444 pp->pr_nfail = 0; 445 pp->pr_nput = 0; 446 pp->pr_npagealloc = 0; 447 pp->pr_npagefree = 0; 448 pp->pr_hiwat = 0; 449 pp->pr_nidle = 0; 450 451 pp->pr_ipl = ipl; 452 pp->pr_flags = flags; 453 454 pl_init(pp, &pp->pr_lock); 455 pl_init(pp, &pp->pr_requests_lock); 456 TAILQ_INIT(&pp->pr_requests); 457 458 if (phpool.pr_size == 0) { 459 pool_init(&phpool, sizeof(struct pool_page_header), 0, 460 IPL_HIGH, 0, "phpool", NULL); 461 462 /* make sure phpool won't "recurse" */ 463 KASSERT(POOL_INPGHDR(&phpool)); 464 } 465 466 /* pglistalloc/constraint parameters */ 467 pp->pr_crange = &kp_dirty; 468 469 /* Insert this into the list of all pools. */ 470 rw_enter_write(&pool_lock); 471 #ifdef DIAGNOSTIC 472 SIMPLEQ_FOREACH(iter, &pool_head, pr_poollist) { 473 if (iter == pp) 474 panic("%s: pool %s already on list", __func__, wchan); 475 } 476 #endif 477 478 pp->pr_serial = ++pool_serial; 479 if (pool_serial == 0) 480 panic("%s: too much uptime", __func__); 481 482 SIMPLEQ_INSERT_HEAD(&pool_head, pp, pr_poollist); 483 pool_count++; 484 rw_exit_write(&pool_lock); 485 } 486 487 /* 488 * Decommission a pool resource. 489 */ 490 void 491 pool_destroy(struct pool *pp) 492 { 493 struct pool_page_header *ph; 494 struct pool *prev, *iter; 495 496 #ifdef DIAGNOSTIC 497 if (pp->pr_nout != 0) 498 panic("%s: pool busy: still out: %u", __func__, pp->pr_nout); 499 #endif 500 501 /* Remove from global pool list */ 502 rw_enter_write(&pool_lock); 503 pool_count--; 504 if (pp == SIMPLEQ_FIRST(&pool_head)) 505 SIMPLEQ_REMOVE_HEAD(&pool_head, pr_poollist); 506 else { 507 prev = SIMPLEQ_FIRST(&pool_head); 508 SIMPLEQ_FOREACH(iter, &pool_head, pr_poollist) { 509 if (iter == pp) { 510 SIMPLEQ_REMOVE_AFTER(&pool_head, prev, 511 pr_poollist); 512 break; 513 } 514 prev = iter; 515 } 516 } 517 rw_exit_write(&pool_lock); 518 519 /* Wait for concurrent sysctl_dopool() */ 520 refcnt_finalize(&pp->pr_refcnt, "pooldtor"); 521 522 #ifdef MULTIPROCESSOR 523 if (pp->pr_cache != NULL) 524 pool_cache_destroy(pp); 525 #endif 526 527 /* Remove all pages */ 528 while ((ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL) { 529 pl_enter(pp, &pp->pr_lock); 530 pool_p_remove(pp, ph); 531 pl_leave(pp, &pp->pr_lock); 532 pool_p_free(pp, ph); 533 } 534 KASSERT(TAILQ_EMPTY(&pp->pr_fullpages)); 535 KASSERT(TAILQ_EMPTY(&pp->pr_partpages)); 536 } 537 538 void 539 pool_request_init(struct pool_request *pr, 540 void (*handler)(struct pool *, void *, void *), void *cookie) 541 { 542 pr->pr_handler = handler; 543 pr->pr_cookie = cookie; 544 pr->pr_item = NULL; 545 } 546 547 void 548 pool_request(struct pool *pp, struct pool_request *pr) 549 { 550 pl_enter(pp, &pp->pr_requests_lock); 551 TAILQ_INSERT_TAIL(&pp->pr_requests, pr, pr_entry); 552 pool_runqueue(pp, PR_NOWAIT); 553 pl_leave(pp, &pp->pr_requests_lock); 554 } 555 556 struct pool_get_memory { 557 union pool_lock lock; 558 void * volatile v; 559 }; 560 561 /* 562 * Grab an item from the pool. 563 */ 564 void * 565 pool_get(struct pool *pp, int flags) 566 { 567 void *v = NULL; 568 int slowdown = 0; 569 570 KASSERT(flags & (PR_WAITOK | PR_NOWAIT)); 571 if (pp->pr_flags & PR_RWLOCK) 572 KASSERT(flags & PR_WAITOK); 573 574 #ifdef MULTIPROCESSOR 575 if (pp->pr_cache != NULL) { 576 v = pool_cache_get(pp); 577 if (v != NULL) 578 goto good; 579 } 580 #endif 581 582 pl_enter(pp, &pp->pr_lock); 583 if (pp->pr_nout >= pp->pr_hardlimit) { 584 if (ISSET(flags, PR_NOWAIT|PR_LIMITFAIL)) 585 goto fail; 586 } else if ((v = pool_do_get(pp, flags, &slowdown)) == NULL) { 587 if (ISSET(flags, PR_NOWAIT)) 588 goto fail; 589 } 590 pl_leave(pp, &pp->pr_lock); 591 592 if ((slowdown || pool_debug == 2) && ISSET(flags, PR_WAITOK)) 593 yield(); 594 595 if (v == NULL) { 596 struct pool_get_memory mem = { .v = NULL }; 597 struct pool_request pr; 598 599 #ifdef DIAGNOSTIC 600 if (ISSET(flags, PR_WAITOK) && curproc == &proc0) 601 panic("%s: cannot sleep for memory during boot", 602 __func__); 603 #endif 604 pl_init(pp, &mem.lock); 605 pool_request_init(&pr, pool_get_done, &mem); 606 pool_request(pp, &pr); 607 608 pl_enter(pp, &mem.lock); 609 while (mem.v == NULL) 610 pl_sleep(pp, &mem, &mem.lock, PSWP, pp->pr_wchan); 611 pl_leave(pp, &mem.lock); 612 613 v = mem.v; 614 } 615 616 #ifdef MULTIPROCESSOR 617 good: 618 #endif 619 if (ISSET(flags, PR_ZERO)) 620 memset(v, 0, pp->pr_size); 621 622 TRACEPOINT(uvm, pool_get, pp, v, flags); 623 624 return (v); 625 626 fail: 627 pp->pr_nfail++; 628 pl_leave(pp, &pp->pr_lock); 629 return (NULL); 630 } 631 632 void 633 pool_get_done(struct pool *pp, void *xmem, void *v) 634 { 635 struct pool_get_memory *mem = xmem; 636 637 pl_enter(pp, &mem->lock); 638 mem->v = v; 639 pl_leave(pp, &mem->lock); 640 641 wakeup_one(mem); 642 } 643 644 void 645 pool_runqueue(struct pool *pp, int flags) 646 { 647 struct pool_requests prl = TAILQ_HEAD_INITIALIZER(prl); 648 struct pool_request *pr; 649 650 pl_assert_unlocked(pp, &pp->pr_lock); 651 pl_assert_locked(pp, &pp->pr_requests_lock); 652 653 if (pp->pr_requesting++) 654 return; 655 656 do { 657 pp->pr_requesting = 1; 658 659 TAILQ_CONCAT(&prl, &pp->pr_requests, pr_entry); 660 if (TAILQ_EMPTY(&prl)) 661 continue; 662 663 pl_leave(pp, &pp->pr_requests_lock); 664 665 pl_enter(pp, &pp->pr_lock); 666 pr = TAILQ_FIRST(&prl); 667 while (pr != NULL) { 668 int slowdown = 0; 669 670 if (pp->pr_nout >= pp->pr_hardlimit) 671 break; 672 673 pr->pr_item = pool_do_get(pp, flags, &slowdown); 674 if (pr->pr_item == NULL) /* || slowdown ? */ 675 break; 676 677 pr = TAILQ_NEXT(pr, pr_entry); 678 } 679 pl_leave(pp, &pp->pr_lock); 680 681 while ((pr = TAILQ_FIRST(&prl)) != NULL && 682 pr->pr_item != NULL) { 683 TAILQ_REMOVE(&prl, pr, pr_entry); 684 (*pr->pr_handler)(pp, pr->pr_cookie, pr->pr_item); 685 } 686 687 pl_enter(pp, &pp->pr_requests_lock); 688 } while (--pp->pr_requesting); 689 690 TAILQ_CONCAT(&pp->pr_requests, &prl, pr_entry); 691 } 692 693 void * 694 pool_do_get(struct pool *pp, int flags, int *slowdown) 695 { 696 struct pool_item *pi; 697 struct pool_page_header *ph; 698 699 pl_assert_locked(pp, &pp->pr_lock); 700 701 splassert(pp->pr_ipl); 702 703 /* 704 * Account for this item now to avoid races if we need to give up 705 * pr_lock to allocate a page. 706 */ 707 pp->pr_nout++; 708 709 if (pp->pr_curpage == NULL) { 710 pl_leave(pp, &pp->pr_lock); 711 ph = pool_p_alloc(pp, flags, slowdown); 712 pl_enter(pp, &pp->pr_lock); 713 714 if (ph == NULL) { 715 pp->pr_nout--; 716 return (NULL); 717 } 718 719 pool_p_insert(pp, ph); 720 } 721 722 ph = pp->pr_curpage; 723 pi = XSIMPLEQ_FIRST(&ph->ph_items); 724 if (__predict_false(pi == NULL)) 725 panic("%s: %s: page empty", __func__, pp->pr_wchan); 726 727 if (__predict_false(pi->pi_magic != POOL_IMAGIC(ph, pi))) { 728 panic("%s: %s free list modified: " 729 "page %p; item addr %p; offset 0x%x=0x%lx != 0x%lx", 730 __func__, pp->pr_wchan, ph->ph_page, pi, 731 0, pi->pi_magic, POOL_IMAGIC(ph, pi)); 732 } 733 734 XSIMPLEQ_REMOVE_HEAD(&ph->ph_items, pi_list); 735 736 #ifdef DIAGNOSTIC 737 if (pool_debug && POOL_PHPOISON(ph)) { 738 size_t pidx; 739 uint32_t pval; 740 if (poison_check(pi + 1, pp->pr_size - sizeof(*pi), 741 &pidx, &pval)) { 742 int *ip = (int *)(pi + 1); 743 panic("%s: %s free list modified: " 744 "page %p; item addr %p; offset 0x%zx=0x%x", 745 __func__, pp->pr_wchan, ph->ph_page, pi, 746 (pidx * sizeof(int)) + sizeof(*pi), ip[pidx]); 747 } 748 } 749 #endif /* DIAGNOSTIC */ 750 751 if (ph->ph_nmissing++ == 0) { 752 /* 753 * This page was previously empty. Move it to the list of 754 * partially-full pages. This page is already curpage. 755 */ 756 TAILQ_REMOVE(&pp->pr_emptypages, ph, ph_entry); 757 TAILQ_INSERT_TAIL(&pp->pr_partpages, ph, ph_entry); 758 759 pp->pr_nidle--; 760 } 761 762 if (ph->ph_nmissing == pp->pr_itemsperpage) { 763 /* 764 * This page is now full. Move it to the full list 765 * and select a new current page. 766 */ 767 TAILQ_REMOVE(&pp->pr_partpages, ph, ph_entry); 768 TAILQ_INSERT_TAIL(&pp->pr_fullpages, ph, ph_entry); 769 pool_update_curpage(pp); 770 } 771 772 pp->pr_nget++; 773 774 return (pi); 775 } 776 777 /* 778 * Return resource to the pool. 779 */ 780 void 781 pool_put(struct pool *pp, void *v) 782 { 783 struct pool_page_header *ph, *freeph = NULL; 784 785 #ifdef DIAGNOSTIC 786 if (v == NULL) 787 panic("%s: NULL item", __func__); 788 #endif 789 790 TRACEPOINT(uvm, pool_put, pp, v); 791 792 #ifdef MULTIPROCESSOR 793 if (pp->pr_cache != NULL && TAILQ_EMPTY(&pp->pr_requests)) { 794 pool_cache_put(pp, v); 795 return; 796 } 797 #endif 798 799 pl_enter(pp, &pp->pr_lock); 800 801 pool_do_put(pp, v); 802 803 pp->pr_nout--; 804 pp->pr_nput++; 805 806 /* is it time to free a page? */ 807 if (pp->pr_nidle > pp->pr_maxpages && 808 (ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL && 809 getnsecuptime() - ph->ph_timestamp > POOL_WAIT_FREE) { 810 freeph = ph; 811 pool_p_remove(pp, freeph); 812 } 813 814 pl_leave(pp, &pp->pr_lock); 815 816 if (freeph != NULL) 817 pool_p_free(pp, freeph); 818 819 pool_wakeup(pp); 820 } 821 822 void 823 pool_wakeup(struct pool *pp) 824 { 825 if (!TAILQ_EMPTY(&pp->pr_requests)) { 826 pl_enter(pp, &pp->pr_requests_lock); 827 pool_runqueue(pp, PR_NOWAIT); 828 pl_leave(pp, &pp->pr_requests_lock); 829 } 830 } 831 832 void 833 pool_do_put(struct pool *pp, void *v) 834 { 835 struct pool_item *pi = v; 836 struct pool_page_header *ph; 837 838 splassert(pp->pr_ipl); 839 840 ph = pr_find_pagehead(pp, v); 841 842 #ifdef DIAGNOSTIC 843 if (pool_debug) { 844 struct pool_item *qi; 845 XSIMPLEQ_FOREACH(qi, &ph->ph_items, pi_list) { 846 if (pi == qi) { 847 panic("%s: %s: double pool_put: %p", __func__, 848 pp->pr_wchan, pi); 849 } 850 } 851 } 852 #endif /* DIAGNOSTIC */ 853 854 pi->pi_magic = POOL_IMAGIC(ph, pi); 855 XSIMPLEQ_INSERT_HEAD(&ph->ph_items, pi, pi_list); 856 #ifdef DIAGNOSTIC 857 if (POOL_PHPOISON(ph)) 858 poison_mem(pi + 1, pp->pr_size - sizeof(*pi)); 859 #endif /* DIAGNOSTIC */ 860 861 if (ph->ph_nmissing-- == pp->pr_itemsperpage) { 862 /* 863 * The page was previously completely full, move it to the 864 * partially-full list. 865 */ 866 TAILQ_REMOVE(&pp->pr_fullpages, ph, ph_entry); 867 TAILQ_INSERT_TAIL(&pp->pr_partpages, ph, ph_entry); 868 } 869 870 if (ph->ph_nmissing == 0) { 871 /* 872 * The page is now empty, so move it to the empty page list. 873 */ 874 pp->pr_nidle++; 875 876 ph->ph_timestamp = getnsecuptime(); 877 TAILQ_REMOVE(&pp->pr_partpages, ph, ph_entry); 878 TAILQ_INSERT_TAIL(&pp->pr_emptypages, ph, ph_entry); 879 pool_update_curpage(pp); 880 } 881 } 882 883 /* 884 * Add N items to the pool. 885 */ 886 int 887 pool_prime(struct pool *pp, int n) 888 { 889 struct pool_pagelist pl = TAILQ_HEAD_INITIALIZER(pl); 890 struct pool_page_header *ph; 891 int newpages; 892 893 newpages = roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 894 895 while (newpages-- > 0) { 896 int slowdown = 0; 897 898 ph = pool_p_alloc(pp, PR_NOWAIT, &slowdown); 899 if (ph == NULL) /* or slowdown? */ 900 break; 901 902 TAILQ_INSERT_TAIL(&pl, ph, ph_entry); 903 } 904 905 pl_enter(pp, &pp->pr_lock); 906 while ((ph = TAILQ_FIRST(&pl)) != NULL) { 907 TAILQ_REMOVE(&pl, ph, ph_entry); 908 pool_p_insert(pp, ph); 909 } 910 pl_leave(pp, &pp->pr_lock); 911 912 return (0); 913 } 914 915 struct pool_page_header * 916 pool_p_alloc(struct pool *pp, int flags, int *slowdown) 917 { 918 struct pool_page_header *ph; 919 struct pool_item *pi; 920 caddr_t addr; 921 unsigned int order; 922 int o; 923 int n; 924 925 pl_assert_unlocked(pp, &pp->pr_lock); 926 KASSERT(pp->pr_size >= sizeof(*pi)); 927 928 addr = pool_allocator_alloc(pp, flags, slowdown); 929 if (addr == NULL) 930 return (NULL); 931 932 if (POOL_INPGHDR(pp)) 933 ph = (struct pool_page_header *)(addr + pp->pr_phoffset); 934 else { 935 ph = pool_get(&phpool, flags); 936 if (ph == NULL) { 937 pool_allocator_free(pp, addr); 938 return (NULL); 939 } 940 } 941 942 XSIMPLEQ_INIT(&ph->ph_items); 943 ph->ph_page = addr; 944 addr += pp->pr_align * (pp->pr_npagealloc % pp->pr_maxcolors); 945 ph->ph_colored = addr; 946 ph->ph_nmissing = 0; 947 arc4random_buf(&ph->ph_magic, sizeof(ph->ph_magic)); 948 #ifdef DIAGNOSTIC 949 /* use a bit in ph_magic to record if we poison page items */ 950 if (pool_debug) 951 SET(ph->ph_magic, POOL_MAGICBIT); 952 else 953 CLR(ph->ph_magic, POOL_MAGICBIT); 954 #endif /* DIAGNOSTIC */ 955 956 n = pp->pr_itemsperpage; 957 o = 32; 958 while (n--) { 959 pi = (struct pool_item *)addr; 960 pi->pi_magic = POOL_IMAGIC(ph, pi); 961 962 if (o == 32) { 963 order = arc4random(); 964 o = 0; 965 } 966 if (ISSET(order, 1U << o++)) 967 XSIMPLEQ_INSERT_TAIL(&ph->ph_items, pi, pi_list); 968 else 969 XSIMPLEQ_INSERT_HEAD(&ph->ph_items, pi, pi_list); 970 971 #ifdef DIAGNOSTIC 972 if (POOL_PHPOISON(ph)) 973 poison_mem(pi + 1, pp->pr_size - sizeof(*pi)); 974 #endif /* DIAGNOSTIC */ 975 976 addr += pp->pr_size; 977 } 978 979 return (ph); 980 } 981 982 void 983 pool_p_free(struct pool *pp, struct pool_page_header *ph) 984 { 985 struct pool_item *pi; 986 987 pl_assert_unlocked(pp, &pp->pr_lock); 988 KASSERT(ph->ph_nmissing == 0); 989 990 XSIMPLEQ_FOREACH(pi, &ph->ph_items, pi_list) { 991 if (__predict_false(pi->pi_magic != POOL_IMAGIC(ph, pi))) { 992 panic("%s: %s free list modified: " 993 "page %p; item addr %p; offset 0x%x=0x%lx", 994 __func__, pp->pr_wchan, ph->ph_page, pi, 995 0, pi->pi_magic); 996 } 997 998 #ifdef DIAGNOSTIC 999 if (POOL_PHPOISON(ph)) { 1000 size_t pidx; 1001 uint32_t pval; 1002 if (poison_check(pi + 1, pp->pr_size - sizeof(*pi), 1003 &pidx, &pval)) { 1004 int *ip = (int *)(pi + 1); 1005 panic("%s: %s free list modified: " 1006 "page %p; item addr %p; offset 0x%zx=0x%x", 1007 __func__, pp->pr_wchan, ph->ph_page, pi, 1008 pidx * sizeof(int), ip[pidx]); 1009 } 1010 } 1011 #endif 1012 } 1013 1014 pool_allocator_free(pp, ph->ph_page); 1015 1016 if (!POOL_INPGHDR(pp)) 1017 pool_put(&phpool, ph); 1018 } 1019 1020 void 1021 pool_p_insert(struct pool *pp, struct pool_page_header *ph) 1022 { 1023 pl_assert_locked(pp, &pp->pr_lock); 1024 1025 /* If the pool was depleted, point at the new page */ 1026 if (pp->pr_curpage == NULL) 1027 pp->pr_curpage = ph; 1028 1029 TAILQ_INSERT_TAIL(&pp->pr_emptypages, ph, ph_entry); 1030 if (!POOL_INPGHDR(pp)) 1031 RBT_INSERT(phtree, &pp->pr_phtree, ph); 1032 1033 pp->pr_nitems += pp->pr_itemsperpage; 1034 pp->pr_nidle++; 1035 1036 pp->pr_npagealloc++; 1037 if (++pp->pr_npages > pp->pr_hiwat) 1038 pp->pr_hiwat = pp->pr_npages; 1039 } 1040 1041 void 1042 pool_p_remove(struct pool *pp, struct pool_page_header *ph) 1043 { 1044 pl_assert_locked(pp, &pp->pr_lock); 1045 1046 pp->pr_npagefree++; 1047 pp->pr_npages--; 1048 pp->pr_nidle--; 1049 pp->pr_nitems -= pp->pr_itemsperpage; 1050 1051 if (!POOL_INPGHDR(pp)) 1052 RBT_REMOVE(phtree, &pp->pr_phtree, ph); 1053 TAILQ_REMOVE(&pp->pr_emptypages, ph, ph_entry); 1054 1055 pool_update_curpage(pp); 1056 } 1057 1058 void 1059 pool_update_curpage(struct pool *pp) 1060 { 1061 pp->pr_curpage = TAILQ_LAST(&pp->pr_partpages, pool_pagelist); 1062 if (pp->pr_curpage == NULL) { 1063 pp->pr_curpage = TAILQ_LAST(&pp->pr_emptypages, pool_pagelist); 1064 } 1065 } 1066 1067 void 1068 pool_setlowat(struct pool *pp, int n) 1069 { 1070 int prime = 0; 1071 1072 pl_enter(pp, &pp->pr_lock); 1073 pp->pr_minitems = n; 1074 pp->pr_minpages = (n == 0) 1075 ? 0 1076 : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 1077 1078 if (pp->pr_nitems < n) 1079 prime = n - pp->pr_nitems; 1080 pl_leave(pp, &pp->pr_lock); 1081 1082 if (prime > 0) 1083 pool_prime(pp, prime); 1084 } 1085 1086 void 1087 pool_sethiwat(struct pool *pp, int n) 1088 { 1089 pp->pr_maxpages = (n == 0) 1090 ? 0 1091 : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 1092 } 1093 1094 int 1095 pool_sethardlimit(struct pool *pp, u_int n, const char *warnmsg, int ratecap) 1096 { 1097 int error = 0; 1098 1099 if (n < pp->pr_nout) { 1100 error = EINVAL; 1101 goto done; 1102 } 1103 1104 pp->pr_hardlimit = n; 1105 pp->pr_hardlimit_warning = warnmsg; 1106 pp->pr_hardlimit_ratecap.tv_sec = ratecap; 1107 pp->pr_hardlimit_warning_last.tv_sec = 0; 1108 pp->pr_hardlimit_warning_last.tv_usec = 0; 1109 1110 done: 1111 return (error); 1112 } 1113 1114 void 1115 pool_set_constraints(struct pool *pp, const struct kmem_pa_mode *mode) 1116 { 1117 pp->pr_crange = mode; 1118 } 1119 1120 /* 1121 * Release all complete pages that have not been used recently. 1122 * 1123 * Returns non-zero if any pages have been reclaimed. 1124 */ 1125 int 1126 pool_reclaim(struct pool *pp) 1127 { 1128 struct pool_page_header *ph, *phnext; 1129 struct pool_pagelist pl = TAILQ_HEAD_INITIALIZER(pl); 1130 1131 pl_enter(pp, &pp->pr_lock); 1132 for (ph = TAILQ_FIRST(&pp->pr_emptypages); ph != NULL; ph = phnext) { 1133 phnext = TAILQ_NEXT(ph, ph_entry); 1134 1135 /* Check our minimum page claim */ 1136 if (pp->pr_npages <= pp->pr_minpages) 1137 break; 1138 1139 /* 1140 * If freeing this page would put us below 1141 * the low water mark, stop now. 1142 */ 1143 if ((pp->pr_nitems - pp->pr_itemsperpage) < 1144 pp->pr_minitems) 1145 break; 1146 1147 pool_p_remove(pp, ph); 1148 TAILQ_INSERT_TAIL(&pl, ph, ph_entry); 1149 } 1150 pl_leave(pp, &pp->pr_lock); 1151 1152 if (TAILQ_EMPTY(&pl)) 1153 return (0); 1154 1155 while ((ph = TAILQ_FIRST(&pl)) != NULL) { 1156 TAILQ_REMOVE(&pl, ph, ph_entry); 1157 pool_p_free(pp, ph); 1158 } 1159 1160 return (1); 1161 } 1162 1163 /* 1164 * Release all complete pages that have not been used recently 1165 * from all pools. 1166 */ 1167 void 1168 pool_reclaim_all(void) 1169 { 1170 struct pool *pp; 1171 1172 rw_enter_read(&pool_lock); 1173 SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist) 1174 pool_reclaim(pp); 1175 rw_exit_read(&pool_lock); 1176 } 1177 1178 #ifdef DDB 1179 #include <machine/db_machdep.h> 1180 #include <ddb/db_output.h> 1181 1182 /* 1183 * Diagnostic helpers. 1184 */ 1185 void 1186 pool_printit(struct pool *pp, const char *modif, 1187 int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2)))) 1188 { 1189 pool_print1(pp, modif, pr); 1190 } 1191 1192 void 1193 pool_print_pagelist(struct pool_pagelist *pl, 1194 int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2)))) 1195 { 1196 struct pool_page_header *ph; 1197 struct pool_item *pi; 1198 1199 TAILQ_FOREACH(ph, pl, ph_entry) { 1200 (*pr)("\t\tpage %p, color %p, nmissing %d\n", 1201 ph->ph_page, ph->ph_colored, ph->ph_nmissing); 1202 XSIMPLEQ_FOREACH(pi, &ph->ph_items, pi_list) { 1203 if (pi->pi_magic != POOL_IMAGIC(ph, pi)) { 1204 (*pr)("\t\t\titem %p, magic 0x%lx\n", 1205 pi, pi->pi_magic); 1206 } 1207 } 1208 } 1209 } 1210 1211 void 1212 pool_print1(struct pool *pp, const char *modif, 1213 int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2)))) 1214 { 1215 struct pool_page_header *ph; 1216 int print_pagelist = 0; 1217 char c; 1218 1219 while ((c = *modif++) != '\0') { 1220 if (c == 'p') 1221 print_pagelist = 1; 1222 modif++; 1223 } 1224 1225 (*pr)("POOL %s: size %u maxcolors %u\n", pp->pr_wchan, pp->pr_size, 1226 pp->pr_maxcolors); 1227 (*pr)("\talloc %p\n", pp->pr_alloc); 1228 (*pr)("\tminitems %u, minpages %u, maxpages %u, npages %u\n", 1229 pp->pr_minitems, pp->pr_minpages, pp->pr_maxpages, pp->pr_npages); 1230 (*pr)("\titemsperpage %u, nitems %u, nout %u, hardlimit %u\n", 1231 pp->pr_itemsperpage, pp->pr_nitems, pp->pr_nout, pp->pr_hardlimit); 1232 1233 (*pr)("\n\tnget %lu, nfail %lu, nput %lu\n", 1234 pp->pr_nget, pp->pr_nfail, pp->pr_nput); 1235 (*pr)("\tnpagealloc %lu, npagefree %lu, hiwat %u, nidle %lu\n", 1236 pp->pr_npagealloc, pp->pr_npagefree, pp->pr_hiwat, pp->pr_nidle); 1237 1238 if (print_pagelist == 0) 1239 return; 1240 1241 if ((ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL) 1242 (*pr)("\n\tempty page list:\n"); 1243 pool_print_pagelist(&pp->pr_emptypages, pr); 1244 if ((ph = TAILQ_FIRST(&pp->pr_fullpages)) != NULL) 1245 (*pr)("\n\tfull page list:\n"); 1246 pool_print_pagelist(&pp->pr_fullpages, pr); 1247 if ((ph = TAILQ_FIRST(&pp->pr_partpages)) != NULL) 1248 (*pr)("\n\tpartial-page list:\n"); 1249 pool_print_pagelist(&pp->pr_partpages, pr); 1250 1251 if (pp->pr_curpage == NULL) 1252 (*pr)("\tno current page\n"); 1253 else 1254 (*pr)("\tcurpage %p\n", pp->pr_curpage->ph_page); 1255 } 1256 1257 void 1258 db_show_all_pools(db_expr_t expr, int haddr, db_expr_t count, char *modif) 1259 { 1260 struct pool *pp; 1261 char maxp[16]; 1262 int ovflw; 1263 char mode; 1264 1265 mode = modif[0]; 1266 if (mode != '\0' && mode != 'a') { 1267 db_printf("usage: show all pools [/a]\n"); 1268 return; 1269 } 1270 1271 if (mode == '\0') 1272 db_printf("%-10s%4s%9s%5s%9s%6s%6s%6s%6s%6s%6s%5s\n", 1273 "Name", 1274 "Size", 1275 "Requests", 1276 "Fail", 1277 "Releases", 1278 "Pgreq", 1279 "Pgrel", 1280 "Npage", 1281 "Hiwat", 1282 "Minpg", 1283 "Maxpg", 1284 "Idle"); 1285 else 1286 db_printf("%-12s %18s %18s\n", 1287 "Name", "Address", "Allocator"); 1288 1289 SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist) { 1290 if (mode == 'a') { 1291 db_printf("%-12s %18p %18p\n", pp->pr_wchan, pp, 1292 pp->pr_alloc); 1293 continue; 1294 } 1295 1296 if (!pp->pr_nget) 1297 continue; 1298 1299 if (pp->pr_maxpages == UINT_MAX) 1300 snprintf(maxp, sizeof maxp, "inf"); 1301 else 1302 snprintf(maxp, sizeof maxp, "%u", pp->pr_maxpages); 1303 1304 #define PRWORD(ovflw, fmt, width, fixed, val) do { \ 1305 (ovflw) += db_printf((fmt), \ 1306 (width) - (fixed) - (ovflw) > 0 ? \ 1307 (width) - (fixed) - (ovflw) : 0, \ 1308 (val)) - (width); \ 1309 if ((ovflw) < 0) \ 1310 (ovflw) = 0; \ 1311 } while (/* CONSTCOND */0) 1312 1313 ovflw = 0; 1314 PRWORD(ovflw, "%-*s", 10, 0, pp->pr_wchan); 1315 PRWORD(ovflw, " %*u", 4, 1, pp->pr_size); 1316 PRWORD(ovflw, " %*lu", 9, 1, pp->pr_nget); 1317 PRWORD(ovflw, " %*lu", 5, 1, pp->pr_nfail); 1318 PRWORD(ovflw, " %*lu", 9, 1, pp->pr_nput); 1319 PRWORD(ovflw, " %*lu", 6, 1, pp->pr_npagealloc); 1320 PRWORD(ovflw, " %*lu", 6, 1, pp->pr_npagefree); 1321 PRWORD(ovflw, " %*d", 6, 1, pp->pr_npages); 1322 PRWORD(ovflw, " %*d", 6, 1, pp->pr_hiwat); 1323 PRWORD(ovflw, " %*d", 6, 1, pp->pr_minpages); 1324 PRWORD(ovflw, " %*s", 6, 1, maxp); 1325 PRWORD(ovflw, " %*lu\n", 5, 1, pp->pr_nidle); 1326 1327 pool_chk(pp); 1328 } 1329 } 1330 #endif /* DDB */ 1331 1332 #if defined(POOL_DEBUG) || defined(DDB) 1333 int 1334 pool_chk_page(struct pool *pp, struct pool_page_header *ph, int expected) 1335 { 1336 struct pool_item *pi; 1337 caddr_t page; 1338 int n; 1339 const char *label = pp->pr_wchan; 1340 1341 page = (caddr_t)((u_long)ph & pp->pr_pgmask); 1342 if (page != ph->ph_page && POOL_INPGHDR(pp)) { 1343 printf("%s: ", label); 1344 printf("pool(%p:%s): page inconsistency: page %p; " 1345 "at page head addr %p (p %p)\n", 1346 pp, pp->pr_wchan, ph->ph_page, ph, page); 1347 return 1; 1348 } 1349 1350 for (pi = XSIMPLEQ_FIRST(&ph->ph_items), n = 0; 1351 pi != NULL; 1352 pi = XSIMPLEQ_NEXT(&ph->ph_items, pi, pi_list), n++) { 1353 if ((caddr_t)pi < ph->ph_page || 1354 (caddr_t)pi >= ph->ph_page + pp->pr_pgsize) { 1355 printf("%s: ", label); 1356 printf("pool(%p:%s): page inconsistency: page %p;" 1357 " item ordinal %d; addr %p\n", pp, 1358 pp->pr_wchan, ph->ph_page, n, pi); 1359 return (1); 1360 } 1361 1362 if (pi->pi_magic != POOL_IMAGIC(ph, pi)) { 1363 printf("%s: ", label); 1364 printf("pool(%p:%s): free list modified: " 1365 "page %p; item ordinal %d; addr %p " 1366 "(p %p); offset 0x%x=0x%lx\n", 1367 pp, pp->pr_wchan, ph->ph_page, n, pi, page, 1368 0, pi->pi_magic); 1369 } 1370 1371 #ifdef DIAGNOSTIC 1372 if (POOL_PHPOISON(ph)) { 1373 size_t pidx; 1374 uint32_t pval; 1375 if (poison_check(pi + 1, pp->pr_size - sizeof(*pi), 1376 &pidx, &pval)) { 1377 int *ip = (int *)(pi + 1); 1378 printf("pool(%s): free list modified: " 1379 "page %p; item ordinal %d; addr %p " 1380 "(p %p); offset 0x%zx=0x%x\n", 1381 pp->pr_wchan, ph->ph_page, n, pi, 1382 page, pidx * sizeof(int), ip[pidx]); 1383 } 1384 } 1385 #endif /* DIAGNOSTIC */ 1386 } 1387 if (n + ph->ph_nmissing != pp->pr_itemsperpage) { 1388 printf("pool(%p:%s): page inconsistency: page %p;" 1389 " %d on list, %d missing, %d items per page\n", pp, 1390 pp->pr_wchan, ph->ph_page, n, ph->ph_nmissing, 1391 pp->pr_itemsperpage); 1392 return 1; 1393 } 1394 if (expected >= 0 && n != expected) { 1395 printf("pool(%p:%s): page inconsistency: page %p;" 1396 " %d on list, %d missing, %d expected\n", pp, 1397 pp->pr_wchan, ph->ph_page, n, ph->ph_nmissing, 1398 expected); 1399 return 1; 1400 } 1401 return 0; 1402 } 1403 1404 int 1405 pool_chk(struct pool *pp) 1406 { 1407 struct pool_page_header *ph; 1408 int r = 0; 1409 1410 TAILQ_FOREACH(ph, &pp->pr_emptypages, ph_entry) 1411 r += pool_chk_page(pp, ph, pp->pr_itemsperpage); 1412 TAILQ_FOREACH(ph, &pp->pr_fullpages, ph_entry) 1413 r += pool_chk_page(pp, ph, 0); 1414 TAILQ_FOREACH(ph, &pp->pr_partpages, ph_entry) 1415 r += pool_chk_page(pp, ph, -1); 1416 1417 return (r); 1418 } 1419 #endif /* defined(POOL_DEBUG) || defined(DDB) */ 1420 1421 #ifdef DDB 1422 void 1423 pool_walk(struct pool *pp, int full, 1424 int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2))), 1425 void (*func)(void *, int, int (*)(const char *, ...) 1426 __attribute__((__format__(__kprintf__,1,2))))) 1427 { 1428 struct pool_page_header *ph; 1429 struct pool_item *pi; 1430 caddr_t cp; 1431 int n; 1432 1433 TAILQ_FOREACH(ph, &pp->pr_fullpages, ph_entry) { 1434 cp = ph->ph_colored; 1435 n = ph->ph_nmissing; 1436 1437 while (n--) { 1438 func(cp, full, pr); 1439 cp += pp->pr_size; 1440 } 1441 } 1442 1443 TAILQ_FOREACH(ph, &pp->pr_partpages, ph_entry) { 1444 cp = ph->ph_colored; 1445 n = ph->ph_nmissing; 1446 1447 do { 1448 XSIMPLEQ_FOREACH(pi, &ph->ph_items, pi_list) { 1449 if (cp == (caddr_t)pi) 1450 break; 1451 } 1452 if (cp != (caddr_t)pi) { 1453 func(cp, full, pr); 1454 n--; 1455 } 1456 1457 cp += pp->pr_size; 1458 } while (n > 0); 1459 } 1460 } 1461 #endif 1462 1463 /* 1464 * We have three different sysctls. 1465 * kern.pool.npools - the number of pools. 1466 * kern.pool.pool.<pool#> - the pool struct for the pool#. 1467 * kern.pool.name.<pool#> - the name for pool#. 1468 */ 1469 int 1470 sysctl_dopool(int *name, u_int namelen, char *oldp, size_t *oldlenp) 1471 { 1472 struct kinfo_pool pi; 1473 struct pool *pp; 1474 int rv = EOPNOTSUPP; 1475 1476 switch (name[0]) { 1477 case KERN_POOL_NPOOLS: 1478 if (namelen != 1) 1479 return (ENOTDIR); 1480 return (sysctl_rdint(oldp, oldlenp, NULL, pool_count)); 1481 1482 case KERN_POOL_NAME: 1483 case KERN_POOL_POOL: 1484 case KERN_POOL_CACHE: 1485 case KERN_POOL_CACHE_CPUS: 1486 break; 1487 default: 1488 return (EOPNOTSUPP); 1489 } 1490 1491 if (namelen != 2) 1492 return (ENOTDIR); 1493 1494 rw_enter_read(&pool_lock); 1495 SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist) { 1496 if (name[1] == pp->pr_serial) { 1497 refcnt_take(&pp->pr_refcnt); 1498 break; 1499 } 1500 } 1501 rw_exit_read(&pool_lock); 1502 1503 if (pp == NULL) 1504 return (ENOENT); 1505 1506 switch (name[0]) { 1507 case KERN_POOL_NAME: 1508 rv = sysctl_rdstring(oldp, oldlenp, NULL, pp->pr_wchan); 1509 break; 1510 case KERN_POOL_POOL: 1511 memset(&pi, 0, sizeof(pi)); 1512 1513 pl_enter(pp, &pp->pr_lock); 1514 pi.pr_size = pp->pr_size; 1515 pi.pr_pgsize = pp->pr_pgsize; 1516 pi.pr_itemsperpage = pp->pr_itemsperpage; 1517 pi.pr_npages = pp->pr_npages; 1518 pi.pr_minpages = pp->pr_minpages; 1519 pi.pr_maxpages = pp->pr_maxpages; 1520 pi.pr_hardlimit = pp->pr_hardlimit; 1521 pi.pr_nout = pp->pr_nout; 1522 pi.pr_nitems = pp->pr_nitems; 1523 pi.pr_nget = pp->pr_nget; 1524 pi.pr_nput = pp->pr_nput; 1525 pi.pr_nfail = pp->pr_nfail; 1526 pi.pr_npagealloc = pp->pr_npagealloc; 1527 pi.pr_npagefree = pp->pr_npagefree; 1528 pi.pr_hiwat = pp->pr_hiwat; 1529 pi.pr_nidle = pp->pr_nidle; 1530 pl_leave(pp, &pp->pr_lock); 1531 1532 pool_cache_pool_info(pp, &pi); 1533 1534 rv = sysctl_rdstruct(oldp, oldlenp, NULL, &pi, sizeof(pi)); 1535 break; 1536 1537 case KERN_POOL_CACHE: 1538 rv = pool_cache_info(pp, oldp, oldlenp); 1539 break; 1540 1541 case KERN_POOL_CACHE_CPUS: 1542 rv = pool_cache_cpus_info(pp, oldp, oldlenp); 1543 break; 1544 } 1545 1546 refcnt_rele_wake(&pp->pr_refcnt); 1547 1548 return (rv); 1549 } 1550 1551 void 1552 pool_gc_sched(void *null) 1553 { 1554 task_add(systqmp, &pool_gc_task); 1555 } 1556 1557 void 1558 pool_gc_pages(void *null) 1559 { 1560 struct pool *pp; 1561 struct pool_page_header *ph, *freeph; 1562 int s; 1563 1564 rw_enter_read(&pool_lock); 1565 s = splvm(); /* XXX go to splvm until all pools _setipl properly */ 1566 SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist) { 1567 #ifdef MULTIPROCESSOR 1568 if (pp->pr_cache != NULL) 1569 pool_cache_gc(pp); 1570 #endif 1571 1572 if (pp->pr_nidle <= pp->pr_minpages || /* guess */ 1573 !pl_enter_try(pp, &pp->pr_lock)) /* try */ 1574 continue; 1575 1576 /* is it time to free a page? */ 1577 if (pp->pr_nidle > pp->pr_minpages && 1578 (ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL && 1579 getnsecuptime() - ph->ph_timestamp > POOL_WAIT_GC) { 1580 freeph = ph; 1581 pool_p_remove(pp, freeph); 1582 } else 1583 freeph = NULL; 1584 1585 pl_leave(pp, &pp->pr_lock); 1586 1587 if (freeph != NULL) 1588 pool_p_free(pp, freeph); 1589 } 1590 splx(s); 1591 rw_exit_read(&pool_lock); 1592 1593 timeout_add_sec(&pool_gc_tick, 1); 1594 } 1595 1596 /* 1597 * Pool backend allocators. 1598 */ 1599 1600 void * 1601 pool_allocator_alloc(struct pool *pp, int flags, int *slowdown) 1602 { 1603 void *v; 1604 1605 v = (*pp->pr_alloc->pa_alloc)(pp, flags, slowdown); 1606 1607 #ifdef DIAGNOSTIC 1608 if (v != NULL && POOL_INPGHDR(pp)) { 1609 vaddr_t addr = (vaddr_t)v; 1610 if ((addr & pp->pr_pgmask) != addr) { 1611 panic("%s: %s page address %p isn't aligned to %u", 1612 __func__, pp->pr_wchan, v, pp->pr_pgsize); 1613 } 1614 } 1615 #endif 1616 1617 return (v); 1618 } 1619 1620 void 1621 pool_allocator_free(struct pool *pp, void *v) 1622 { 1623 struct pool_allocator *pa = pp->pr_alloc; 1624 1625 (*pa->pa_free)(pp, v); 1626 } 1627 1628 void * 1629 pool_page_alloc(struct pool *pp, int flags, int *slowdown) 1630 { 1631 struct kmem_dyn_mode kd = KMEM_DYN_INITIALIZER; 1632 1633 kd.kd_waitok = ISSET(flags, PR_WAITOK); 1634 kd.kd_slowdown = slowdown; 1635 1636 return (km_alloc(pp->pr_pgsize, &kv_page, pp->pr_crange, &kd)); 1637 } 1638 1639 void 1640 pool_page_free(struct pool *pp, void *v) 1641 { 1642 km_free(v, pp->pr_pgsize, &kv_page, pp->pr_crange); 1643 } 1644 1645 void * 1646 pool_multi_alloc(struct pool *pp, int flags, int *slowdown) 1647 { 1648 struct kmem_va_mode kv = kv_intrsafe; 1649 struct kmem_dyn_mode kd = KMEM_DYN_INITIALIZER; 1650 void *v; 1651 int s; 1652 1653 if (POOL_INPGHDR(pp)) 1654 kv.kv_align = pp->pr_pgsize; 1655 1656 kd.kd_waitok = ISSET(flags, PR_WAITOK); 1657 kd.kd_slowdown = slowdown; 1658 1659 s = splvm(); 1660 v = km_alloc(pp->pr_pgsize, &kv, pp->pr_crange, &kd); 1661 splx(s); 1662 1663 return (v); 1664 } 1665 1666 void 1667 pool_multi_free(struct pool *pp, void *v) 1668 { 1669 struct kmem_va_mode kv = kv_intrsafe; 1670 int s; 1671 1672 if (POOL_INPGHDR(pp)) 1673 kv.kv_align = pp->pr_pgsize; 1674 1675 s = splvm(); 1676 km_free(v, pp->pr_pgsize, &kv, pp->pr_crange); 1677 splx(s); 1678 } 1679 1680 void * 1681 pool_multi_alloc_ni(struct pool *pp, int flags, int *slowdown) 1682 { 1683 struct kmem_va_mode kv = kv_any; 1684 struct kmem_dyn_mode kd = KMEM_DYN_INITIALIZER; 1685 void *v; 1686 1687 if (POOL_INPGHDR(pp)) 1688 kv.kv_align = pp->pr_pgsize; 1689 1690 kd.kd_waitok = ISSET(flags, PR_WAITOK); 1691 kd.kd_slowdown = slowdown; 1692 1693 KERNEL_LOCK(); 1694 v = km_alloc(pp->pr_pgsize, &kv, pp->pr_crange, &kd); 1695 KERNEL_UNLOCK(); 1696 1697 return (v); 1698 } 1699 1700 void 1701 pool_multi_free_ni(struct pool *pp, void *v) 1702 { 1703 struct kmem_va_mode kv = kv_any; 1704 1705 if (POOL_INPGHDR(pp)) 1706 kv.kv_align = pp->pr_pgsize; 1707 1708 KERNEL_LOCK(); 1709 km_free(v, pp->pr_pgsize, &kv, pp->pr_crange); 1710 KERNEL_UNLOCK(); 1711 } 1712 1713 #ifdef MULTIPROCESSOR 1714 1715 struct pool pool_caches; /* per cpu cache entries */ 1716 1717 void 1718 pool_cache_init(struct pool *pp) 1719 { 1720 struct cpumem *cm; 1721 struct pool_cache *pc; 1722 struct cpumem_iter i; 1723 1724 if (pool_caches.pr_size == 0) { 1725 pool_init(&pool_caches, sizeof(struct pool_cache), 1726 CACHELINESIZE, IPL_NONE, PR_WAITOK | PR_RWLOCK, 1727 "plcache", NULL); 1728 } 1729 1730 /* must be able to use the pool items as cache list items */ 1731 KASSERT(pp->pr_size >= sizeof(struct pool_cache_item)); 1732 1733 cm = cpumem_get(&pool_caches); 1734 1735 pl_init(pp, &pp->pr_cache_lock); 1736 arc4random_buf(pp->pr_cache_magic, sizeof(pp->pr_cache_magic)); 1737 TAILQ_INIT(&pp->pr_cache_lists); 1738 pp->pr_cache_nitems = 0; 1739 pp->pr_cache_timestamp = getnsecuptime(); 1740 pp->pr_cache_items = 8; 1741 pp->pr_cache_contention = 0; 1742 pp->pr_cache_ngc = 0; 1743 1744 CPUMEM_FOREACH(pc, &i, cm) { 1745 pc->pc_actv = NULL; 1746 pc->pc_nactv = 0; 1747 pc->pc_prev = NULL; 1748 1749 pc->pc_nget = 0; 1750 pc->pc_nfail = 0; 1751 pc->pc_nput = 0; 1752 pc->pc_nlget = 0; 1753 pc->pc_nlfail = 0; 1754 pc->pc_nlput = 0; 1755 pc->pc_nout = 0; 1756 } 1757 1758 membar_producer(); 1759 1760 pp->pr_cache = cm; 1761 } 1762 1763 static inline void 1764 pool_cache_item_magic(struct pool *pp, struct pool_cache_item *ci) 1765 { 1766 unsigned long *entry = (unsigned long *)&ci->ci_nextl; 1767 1768 entry[0] = pp->pr_cache_magic[0] ^ (u_long)ci; 1769 entry[1] = pp->pr_cache_magic[1] ^ (u_long)ci->ci_next; 1770 } 1771 1772 static inline void 1773 pool_cache_item_magic_check(struct pool *pp, struct pool_cache_item *ci) 1774 { 1775 unsigned long *entry; 1776 unsigned long val; 1777 1778 entry = (unsigned long *)&ci->ci_nextl; 1779 val = pp->pr_cache_magic[0] ^ (u_long)ci; 1780 if (*entry != val) 1781 goto fail; 1782 1783 entry++; 1784 val = pp->pr_cache_magic[1] ^ (u_long)ci->ci_next; 1785 if (*entry != val) 1786 goto fail; 1787 1788 return; 1789 1790 fail: 1791 panic("%s: %s cpu free list modified: item addr %p+%zu 0x%lx!=0x%lx", 1792 __func__, pp->pr_wchan, ci, (caddr_t)entry - (caddr_t)ci, 1793 *entry, val); 1794 } 1795 1796 static inline void 1797 pool_list_enter(struct pool *pp) 1798 { 1799 if (pl_enter_try(pp, &pp->pr_cache_lock) == 0) { 1800 pl_enter(pp, &pp->pr_cache_lock); 1801 pp->pr_cache_contention++; 1802 } 1803 } 1804 1805 static inline void 1806 pool_list_leave(struct pool *pp) 1807 { 1808 pl_leave(pp, &pp->pr_cache_lock); 1809 } 1810 1811 static inline struct pool_cache_item * 1812 pool_cache_list_alloc(struct pool *pp, struct pool_cache *pc) 1813 { 1814 struct pool_cache_item *pl; 1815 1816 pool_list_enter(pp); 1817 pl = TAILQ_FIRST(&pp->pr_cache_lists); 1818 if (pl != NULL) { 1819 TAILQ_REMOVE(&pp->pr_cache_lists, pl, ci_nextl); 1820 pp->pr_cache_nitems -= POOL_CACHE_ITEM_NITEMS(pl); 1821 1822 pool_cache_item_magic(pp, pl); 1823 1824 pc->pc_nlget++; 1825 } else 1826 pc->pc_nlfail++; 1827 1828 /* fold this cpus nout into the global while we have the lock */ 1829 pp->pr_cache_nout += pc->pc_nout; 1830 pc->pc_nout = 0; 1831 pool_list_leave(pp); 1832 1833 return (pl); 1834 } 1835 1836 static inline void 1837 pool_cache_list_free(struct pool *pp, struct pool_cache *pc, 1838 struct pool_cache_item *ci) 1839 { 1840 pool_list_enter(pp); 1841 if (TAILQ_EMPTY(&pp->pr_cache_lists)) 1842 pp->pr_cache_timestamp = getnsecuptime(); 1843 1844 pp->pr_cache_nitems += POOL_CACHE_ITEM_NITEMS(ci); 1845 TAILQ_INSERT_TAIL(&pp->pr_cache_lists, ci, ci_nextl); 1846 1847 pc->pc_nlput++; 1848 1849 /* fold this cpus nout into the global while we have the lock */ 1850 pp->pr_cache_nout += pc->pc_nout; 1851 pc->pc_nout = 0; 1852 pool_list_leave(pp); 1853 } 1854 1855 static inline struct pool_cache * 1856 pool_cache_enter(struct pool *pp, int *s) 1857 { 1858 struct pool_cache *pc; 1859 1860 pc = cpumem_enter(pp->pr_cache); 1861 *s = splraise(pp->pr_ipl); 1862 pc->pc_gen++; 1863 1864 return (pc); 1865 } 1866 1867 static inline void 1868 pool_cache_leave(struct pool *pp, struct pool_cache *pc, int s) 1869 { 1870 pc->pc_gen++; 1871 splx(s); 1872 cpumem_leave(pp->pr_cache, pc); 1873 } 1874 1875 void * 1876 pool_cache_get(struct pool *pp) 1877 { 1878 struct pool_cache *pc; 1879 struct pool_cache_item *ci; 1880 int s; 1881 1882 pc = pool_cache_enter(pp, &s); 1883 1884 if (pc->pc_actv != NULL) { 1885 ci = pc->pc_actv; 1886 } else if (pc->pc_prev != NULL) { 1887 ci = pc->pc_prev; 1888 pc->pc_prev = NULL; 1889 } else if ((ci = pool_cache_list_alloc(pp, pc)) == NULL) { 1890 pc->pc_nfail++; 1891 goto done; 1892 } 1893 1894 pool_cache_item_magic_check(pp, ci); 1895 #ifdef DIAGNOSTIC 1896 if (pool_debug && POOL_CACHE_ITEM_POISONED(ci)) { 1897 size_t pidx; 1898 uint32_t pval; 1899 1900 if (poison_check(ci + 1, pp->pr_size - sizeof(*ci), 1901 &pidx, &pval)) { 1902 int *ip = (int *)(ci + 1); 1903 ip += pidx; 1904 1905 panic("%s: %s cpu free list modified: " 1906 "item addr %p+%zu 0x%x!=0x%x", 1907 __func__, pp->pr_wchan, ci, 1908 (caddr_t)ip - (caddr_t)ci, *ip, pval); 1909 } 1910 } 1911 #endif 1912 1913 pc->pc_actv = ci->ci_next; 1914 pc->pc_nactv = POOL_CACHE_ITEM_NITEMS(ci) - 1; 1915 pc->pc_nget++; 1916 pc->pc_nout++; 1917 1918 done: 1919 pool_cache_leave(pp, pc, s); 1920 1921 return (ci); 1922 } 1923 1924 void 1925 pool_cache_put(struct pool *pp, void *v) 1926 { 1927 struct pool_cache *pc; 1928 struct pool_cache_item *ci = v; 1929 unsigned long nitems; 1930 int s; 1931 #ifdef DIAGNOSTIC 1932 int poison = pool_debug && pp->pr_size > sizeof(*ci); 1933 1934 if (poison) 1935 poison_mem(ci + 1, pp->pr_size - sizeof(*ci)); 1936 #endif 1937 1938 pc = pool_cache_enter(pp, &s); 1939 1940 nitems = pc->pc_nactv; 1941 if (nitems >= pp->pr_cache_items) { 1942 if (pc->pc_prev != NULL) 1943 pool_cache_list_free(pp, pc, pc->pc_prev); 1944 1945 pc->pc_prev = pc->pc_actv; 1946 1947 pc->pc_actv = NULL; 1948 pc->pc_nactv = 0; 1949 nitems = 0; 1950 } 1951 1952 ci->ci_next = pc->pc_actv; 1953 ci->ci_nitems = ++nitems; 1954 #ifdef DIAGNOSTIC 1955 ci->ci_nitems |= poison ? POOL_CACHE_ITEM_NITEMS_POISON : 0; 1956 #endif 1957 pool_cache_item_magic(pp, ci); 1958 1959 pc->pc_actv = ci; 1960 pc->pc_nactv = nitems; 1961 1962 pc->pc_nput++; 1963 pc->pc_nout--; 1964 1965 pool_cache_leave(pp, pc, s); 1966 } 1967 1968 struct pool_cache_item * 1969 pool_cache_list_put(struct pool *pp, struct pool_cache_item *pl) 1970 { 1971 struct pool_cache_item *rpl, *next; 1972 1973 if (pl == NULL) 1974 return (NULL); 1975 1976 rpl = TAILQ_NEXT(pl, ci_nextl); 1977 1978 pl_enter(pp, &pp->pr_lock); 1979 do { 1980 next = pl->ci_next; 1981 pool_do_put(pp, pl); 1982 pl = next; 1983 } while (pl != NULL); 1984 pl_leave(pp, &pp->pr_lock); 1985 1986 return (rpl); 1987 } 1988 1989 void 1990 pool_cache_destroy(struct pool *pp) 1991 { 1992 struct pool_cache *pc; 1993 struct pool_cache_item *pl; 1994 struct cpumem_iter i; 1995 struct cpumem *cm; 1996 1997 rw_enter_write(&pool_lock); /* serialise with the gc */ 1998 cm = pp->pr_cache; 1999 pp->pr_cache = NULL; /* make pool_put avoid the cache */ 2000 rw_exit_write(&pool_lock); 2001 2002 CPUMEM_FOREACH(pc, &i, cm) { 2003 pool_cache_list_put(pp, pc->pc_actv); 2004 pool_cache_list_put(pp, pc->pc_prev); 2005 } 2006 2007 cpumem_put(&pool_caches, cm); 2008 2009 pl = TAILQ_FIRST(&pp->pr_cache_lists); 2010 while (pl != NULL) 2011 pl = pool_cache_list_put(pp, pl); 2012 } 2013 2014 void 2015 pool_cache_gc(struct pool *pp) 2016 { 2017 unsigned int contention, delta; 2018 2019 if (getnsecuptime() - pp->pr_cache_timestamp > POOL_WAIT_GC && 2020 !TAILQ_EMPTY(&pp->pr_cache_lists) && 2021 pl_enter_try(pp, &pp->pr_cache_lock)) { 2022 struct pool_cache_item *pl = NULL; 2023 2024 pl = TAILQ_FIRST(&pp->pr_cache_lists); 2025 if (pl != NULL) { 2026 TAILQ_REMOVE(&pp->pr_cache_lists, pl, ci_nextl); 2027 pp->pr_cache_nitems -= POOL_CACHE_ITEM_NITEMS(pl); 2028 pp->pr_cache_timestamp = getnsecuptime(); 2029 2030 pp->pr_cache_ngc++; 2031 } 2032 2033 pl_leave(pp, &pp->pr_cache_lock); 2034 2035 pool_cache_list_put(pp, pl); 2036 } 2037 2038 /* 2039 * if there's a lot of contention on the pr_cache_mtx then consider 2040 * growing the length of the list to reduce the need to access the 2041 * global pool. 2042 */ 2043 2044 contention = pp->pr_cache_contention; 2045 delta = contention - pp->pr_cache_contention_prev; 2046 if (delta > 8 /* magic */) { 2047 if ((ncpusfound * 8 * 2) <= pp->pr_cache_nitems) 2048 pp->pr_cache_items += 8; 2049 } else if (delta == 0) { 2050 if (pp->pr_cache_items > 8) 2051 pp->pr_cache_items--; 2052 } 2053 pp->pr_cache_contention_prev = contention; 2054 } 2055 2056 void 2057 pool_cache_pool_info(struct pool *pp, struct kinfo_pool *pi) 2058 { 2059 struct pool_cache *pc; 2060 struct cpumem_iter i; 2061 2062 if (pp->pr_cache == NULL) 2063 return; 2064 2065 /* loop through the caches twice to collect stats */ 2066 2067 /* once without the lock so we can yield while reading nget/nput */ 2068 CPUMEM_FOREACH(pc, &i, pp->pr_cache) { 2069 uint64_t gen, nget, nput; 2070 2071 do { 2072 while ((gen = pc->pc_gen) & 1) 2073 yield(); 2074 2075 nget = pc->pc_nget; 2076 nput = pc->pc_nput; 2077 } while (gen != pc->pc_gen); 2078 2079 pi->pr_nget += nget; 2080 pi->pr_nput += nput; 2081 } 2082 2083 /* and once with the mtx so we can get consistent nout values */ 2084 pl_enter(pp, &pp->pr_cache_lock); 2085 CPUMEM_FOREACH(pc, &i, pp->pr_cache) 2086 pi->pr_nout += pc->pc_nout; 2087 2088 pi->pr_nout += pp->pr_cache_nout; 2089 pl_leave(pp, &pp->pr_cache_lock); 2090 } 2091 2092 int 2093 pool_cache_info(struct pool *pp, void *oldp, size_t *oldlenp) 2094 { 2095 struct kinfo_pool_cache kpc; 2096 2097 if (pp->pr_cache == NULL) 2098 return (EOPNOTSUPP); 2099 2100 memset(&kpc, 0, sizeof(kpc)); /* don't leak padding */ 2101 2102 pl_enter(pp, &pp->pr_cache_lock); 2103 kpc.pr_ngc = pp->pr_cache_ngc; 2104 kpc.pr_len = pp->pr_cache_items; 2105 kpc.pr_nitems = pp->pr_cache_nitems; 2106 kpc.pr_contention = pp->pr_cache_contention; 2107 pl_leave(pp, &pp->pr_cache_lock); 2108 2109 return (sysctl_rdstruct(oldp, oldlenp, NULL, &kpc, sizeof(kpc))); 2110 } 2111 2112 int 2113 pool_cache_cpus_info(struct pool *pp, void *oldp, size_t *oldlenp) 2114 { 2115 struct pool_cache *pc; 2116 struct kinfo_pool_cache_cpu *kpcc, *info; 2117 unsigned int cpu = 0; 2118 struct cpumem_iter i; 2119 int error = 0; 2120 size_t len; 2121 2122 if (pp->pr_cache == NULL) 2123 return (EOPNOTSUPP); 2124 if (*oldlenp % sizeof(*kpcc)) 2125 return (EINVAL); 2126 2127 kpcc = mallocarray(ncpusfound, sizeof(*kpcc), M_TEMP, 2128 M_WAITOK|M_CANFAIL|M_ZERO); 2129 if (kpcc == NULL) 2130 return (EIO); 2131 2132 len = ncpusfound * sizeof(*kpcc); 2133 2134 CPUMEM_FOREACH(pc, &i, pp->pr_cache) { 2135 uint64_t gen; 2136 2137 if (cpu >= ncpusfound) { 2138 error = EIO; 2139 goto err; 2140 } 2141 2142 info = &kpcc[cpu]; 2143 info->pr_cpu = cpu; 2144 2145 do { 2146 while ((gen = pc->pc_gen) & 1) 2147 yield(); 2148 2149 info->pr_nget = pc->pc_nget; 2150 info->pr_nfail = pc->pc_nfail; 2151 info->pr_nput = pc->pc_nput; 2152 info->pr_nlget = pc->pc_nlget; 2153 info->pr_nlfail = pc->pc_nlfail; 2154 info->pr_nlput = pc->pc_nlput; 2155 } while (gen != pc->pc_gen); 2156 2157 cpu++; 2158 } 2159 2160 error = sysctl_rdstruct(oldp, oldlenp, NULL, kpcc, len); 2161 err: 2162 free(kpcc, M_TEMP, len); 2163 2164 return (error); 2165 } 2166 #else /* MULTIPROCESSOR */ 2167 void 2168 pool_cache_init(struct pool *pp) 2169 { 2170 /* nop */ 2171 } 2172 2173 void 2174 pool_cache_pool_info(struct pool *pp, struct kinfo_pool *pi) 2175 { 2176 /* nop */ 2177 } 2178 2179 int 2180 pool_cache_info(struct pool *pp, void *oldp, size_t *oldlenp) 2181 { 2182 return (EOPNOTSUPP); 2183 } 2184 2185 int 2186 pool_cache_cpus_info(struct pool *pp, void *oldp, size_t *oldlenp) 2187 { 2188 return (EOPNOTSUPP); 2189 } 2190 #endif /* MULTIPROCESSOR */ 2191 2192 2193 void 2194 pool_lock_mtx_init(struct pool *pp, union pool_lock *lock, 2195 const struct lock_type *type) 2196 { 2197 _mtx_init_flags(&lock->prl_mtx, pp->pr_ipl, pp->pr_wchan, 0, type); 2198 } 2199 2200 void 2201 pool_lock_mtx_enter(union pool_lock *lock) 2202 { 2203 mtx_enter(&lock->prl_mtx); 2204 } 2205 2206 int 2207 pool_lock_mtx_enter_try(union pool_lock *lock) 2208 { 2209 return (mtx_enter_try(&lock->prl_mtx)); 2210 } 2211 2212 void 2213 pool_lock_mtx_leave(union pool_lock *lock) 2214 { 2215 mtx_leave(&lock->prl_mtx); 2216 } 2217 2218 void 2219 pool_lock_mtx_assert_locked(union pool_lock *lock) 2220 { 2221 MUTEX_ASSERT_LOCKED(&lock->prl_mtx); 2222 } 2223 2224 void 2225 pool_lock_mtx_assert_unlocked(union pool_lock *lock) 2226 { 2227 MUTEX_ASSERT_UNLOCKED(&lock->prl_mtx); 2228 } 2229 2230 int 2231 pool_lock_mtx_sleep(void *ident, union pool_lock *lock, int priority, 2232 const char *wmesg) 2233 { 2234 return msleep_nsec(ident, &lock->prl_mtx, priority, wmesg, INFSLP); 2235 } 2236 2237 static const struct pool_lock_ops pool_lock_ops_mtx = { 2238 pool_lock_mtx_init, 2239 pool_lock_mtx_enter, 2240 pool_lock_mtx_enter_try, 2241 pool_lock_mtx_leave, 2242 pool_lock_mtx_assert_locked, 2243 pool_lock_mtx_assert_unlocked, 2244 pool_lock_mtx_sleep, 2245 }; 2246 2247 void 2248 pool_lock_rw_init(struct pool *pp, union pool_lock *lock, 2249 const struct lock_type *type) 2250 { 2251 _rw_init_flags(&lock->prl_rwlock, pp->pr_wchan, 0, type); 2252 } 2253 2254 void 2255 pool_lock_rw_enter(union pool_lock *lock) 2256 { 2257 rw_enter_write(&lock->prl_rwlock); 2258 } 2259 2260 int 2261 pool_lock_rw_enter_try(union pool_lock *lock) 2262 { 2263 return (rw_enter(&lock->prl_rwlock, RW_WRITE | RW_NOSLEEP) == 0); 2264 } 2265 2266 void 2267 pool_lock_rw_leave(union pool_lock *lock) 2268 { 2269 rw_exit_write(&lock->prl_rwlock); 2270 } 2271 2272 void 2273 pool_lock_rw_assert_locked(union pool_lock *lock) 2274 { 2275 rw_assert_wrlock(&lock->prl_rwlock); 2276 } 2277 2278 void 2279 pool_lock_rw_assert_unlocked(union pool_lock *lock) 2280 { 2281 KASSERT(rw_status(&lock->prl_rwlock) != RW_WRITE); 2282 } 2283 2284 int 2285 pool_lock_rw_sleep(void *ident, union pool_lock *lock, int priority, 2286 const char *wmesg) 2287 { 2288 return rwsleep_nsec(ident, &lock->prl_rwlock, priority, wmesg, INFSLP); 2289 } 2290 2291 static const struct pool_lock_ops pool_lock_ops_rw = { 2292 pool_lock_rw_init, 2293 pool_lock_rw_enter, 2294 pool_lock_rw_enter_try, 2295 pool_lock_rw_leave, 2296 pool_lock_rw_assert_locked, 2297 pool_lock_rw_assert_unlocked, 2298 pool_lock_rw_sleep, 2299 }; 2300