1 /* $OpenBSD: subr_pool.c,v 1.231 2021/01/02 03:23:59 cheloha Exp $ */ 2 /* $NetBSD: subr_pool.c,v 1.61 2001/09/26 07:14:56 chs Exp $ */ 3 4 /*- 5 * Copyright (c) 1997, 1999, 2000 The NetBSD Foundation, Inc. 6 * All rights reserved. 7 * 8 * This code is derived from software contributed to The NetBSD Foundation 9 * by Paul Kranenburg; by Jason R. Thorpe of the Numerical Aerospace 10 * Simulation Facility, NASA Ames Research Center. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 23 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 24 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 25 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31 * POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include <sys/param.h> 35 #include <sys/systm.h> 36 #include <sys/errno.h> 37 #include <sys/kernel.h> 38 #include <sys/malloc.h> 39 #include <sys/pool.h> 40 #include <sys/proc.h> 41 #include <sys/syslog.h> 42 #include <sys/sysctl.h> 43 #include <sys/task.h> 44 #include <sys/time.h> 45 #include <sys/timeout.h> 46 #include <sys/percpu.h> 47 48 #include <uvm/uvm_extern.h> 49 50 /* 51 * Pool resource management utility. 52 * 53 * Memory is allocated in pages which are split into pieces according to 54 * the pool item size. Each page is kept on one of three lists in the 55 * pool structure: `pr_emptypages', `pr_fullpages' and `pr_partpages', 56 * for empty, full and partially-full pages respectively. The individual 57 * pool items are on a linked list headed by `ph_items' in each page 58 * header. The memory for building the page list is either taken from 59 * the allocated pages themselves (for small pool items) or taken from 60 * an internal pool of page headers (`phpool'). 61 */ 62 63 /* List of all pools */ 64 SIMPLEQ_HEAD(,pool) pool_head = SIMPLEQ_HEAD_INITIALIZER(pool_head); 65 66 /* 67 * Every pool gets a unique serial number assigned to it. If this counter 68 * wraps, we're screwed, but we shouldn't create so many pools anyway. 69 */ 70 unsigned int pool_serial; 71 unsigned int pool_count; 72 73 /* Lock the previous variables making up the global pool state */ 74 struct rwlock pool_lock = RWLOCK_INITIALIZER("pools"); 75 76 /* Private pool for page header structures */ 77 struct pool phpool; 78 79 struct pool_lock_ops { 80 void (*pl_init)(struct pool *, union pool_lock *, 81 const struct lock_type *); 82 void (*pl_enter)(union pool_lock *); 83 int (*pl_enter_try)(union pool_lock *); 84 void (*pl_leave)(union pool_lock *); 85 void (*pl_assert_locked)(union pool_lock *); 86 void (*pl_assert_unlocked)(union pool_lock *); 87 int (*pl_sleep)(void *, union pool_lock *, int, const char *); 88 }; 89 90 static const struct pool_lock_ops pool_lock_ops_mtx; 91 static const struct pool_lock_ops pool_lock_ops_rw; 92 93 #ifdef WITNESS 94 #define pl_init(pp, pl) do { \ 95 static const struct lock_type __lock_type = { .lt_name = #pl }; \ 96 (pp)->pr_lock_ops->pl_init(pp, pl, &__lock_type); \ 97 } while (0) 98 #else /* WITNESS */ 99 #define pl_init(pp, pl) (pp)->pr_lock_ops->pl_init(pp, pl, NULL) 100 #endif /* WITNESS */ 101 102 static inline void 103 pl_enter(struct pool *pp, union pool_lock *pl) 104 { 105 pp->pr_lock_ops->pl_enter(pl); 106 } 107 static inline int 108 pl_enter_try(struct pool *pp, union pool_lock *pl) 109 { 110 return pp->pr_lock_ops->pl_enter_try(pl); 111 } 112 static inline void 113 pl_leave(struct pool *pp, union pool_lock *pl) 114 { 115 pp->pr_lock_ops->pl_leave(pl); 116 } 117 static inline void 118 pl_assert_locked(struct pool *pp, union pool_lock *pl) 119 { 120 pp->pr_lock_ops->pl_assert_locked(pl); 121 } 122 static inline void 123 pl_assert_unlocked(struct pool *pp, union pool_lock *pl) 124 { 125 pp->pr_lock_ops->pl_assert_unlocked(pl); 126 } 127 static inline int 128 pl_sleep(struct pool *pp, void *ident, union pool_lock *lock, int priority, 129 const char *wmesg) 130 { 131 return pp->pr_lock_ops->pl_sleep(ident, lock, priority, wmesg); 132 } 133 134 struct pool_item { 135 u_long pi_magic; 136 XSIMPLEQ_ENTRY(pool_item) pi_list; 137 }; 138 #define POOL_IMAGIC(ph, pi) ((u_long)(pi) ^ (ph)->ph_magic) 139 140 struct pool_page_header { 141 /* Page headers */ 142 TAILQ_ENTRY(pool_page_header) 143 ph_entry; /* pool page list */ 144 XSIMPLEQ_HEAD(, pool_item) 145 ph_items; /* free items on the page */ 146 RBT_ENTRY(pool_page_header) 147 ph_node; /* off-page page headers */ 148 unsigned int ph_nmissing; /* # of chunks in use */ 149 caddr_t ph_page; /* this page's address */ 150 caddr_t ph_colored; /* page's colored address */ 151 unsigned long ph_magic; 152 uint64_t ph_timestamp; 153 }; 154 #define POOL_MAGICBIT (1 << 3) /* keep away from perturbed low bits */ 155 #define POOL_PHPOISON(ph) ISSET((ph)->ph_magic, POOL_MAGICBIT) 156 157 #ifdef MULTIPROCESSOR 158 struct pool_cache_item { 159 struct pool_cache_item *ci_next; /* next item in list */ 160 unsigned long ci_nitems; /* number of items in list */ 161 TAILQ_ENTRY(pool_cache_item) 162 ci_nextl; /* entry in list of lists */ 163 }; 164 165 /* we store whether the cached item is poisoned in the high bit of nitems */ 166 #define POOL_CACHE_ITEM_NITEMS_MASK 0x7ffffffUL 167 #define POOL_CACHE_ITEM_NITEMS_POISON 0x8000000UL 168 169 #define POOL_CACHE_ITEM_NITEMS(_ci) \ 170 ((_ci)->ci_nitems & POOL_CACHE_ITEM_NITEMS_MASK) 171 172 #define POOL_CACHE_ITEM_POISONED(_ci) \ 173 ISSET((_ci)->ci_nitems, POOL_CACHE_ITEM_NITEMS_POISON) 174 175 struct pool_cache { 176 struct pool_cache_item *pc_actv; /* active list of items */ 177 unsigned long pc_nactv; /* actv head nitems cache */ 178 struct pool_cache_item *pc_prev; /* previous list of items */ 179 180 uint64_t pc_gen; /* generation number */ 181 uint64_t pc_nget; /* # of successful requests */ 182 uint64_t pc_nfail; /* # of unsuccessful reqs */ 183 uint64_t pc_nput; /* # of releases */ 184 uint64_t pc_nlget; /* # of list requests */ 185 uint64_t pc_nlfail; /* # of fails getting a list */ 186 uint64_t pc_nlput; /* # of list releases */ 187 188 int pc_nout; 189 }; 190 191 void *pool_cache_get(struct pool *); 192 void pool_cache_put(struct pool *, void *); 193 void pool_cache_destroy(struct pool *); 194 void pool_cache_gc(struct pool *); 195 #endif 196 void pool_cache_pool_info(struct pool *, struct kinfo_pool *); 197 int pool_cache_info(struct pool *, void *, size_t *); 198 int pool_cache_cpus_info(struct pool *, void *, size_t *); 199 200 #ifdef POOL_DEBUG 201 int pool_debug = 1; 202 #else 203 int pool_debug = 0; 204 #endif 205 206 #define POOL_INPGHDR(pp) ((pp)->pr_phoffset != 0) 207 208 struct pool_page_header * 209 pool_p_alloc(struct pool *, int, int *); 210 void pool_p_insert(struct pool *, struct pool_page_header *); 211 void pool_p_remove(struct pool *, struct pool_page_header *); 212 void pool_p_free(struct pool *, struct pool_page_header *); 213 214 void pool_update_curpage(struct pool *); 215 void *pool_do_get(struct pool *, int, int *); 216 void pool_do_put(struct pool *, void *); 217 int pool_chk_page(struct pool *, struct pool_page_header *, int); 218 int pool_chk(struct pool *); 219 void pool_get_done(struct pool *, void *, void *); 220 void pool_runqueue(struct pool *, int); 221 222 void *pool_allocator_alloc(struct pool *, int, int *); 223 void pool_allocator_free(struct pool *, void *); 224 225 /* 226 * The default pool allocator. 227 */ 228 void *pool_page_alloc(struct pool *, int, int *); 229 void pool_page_free(struct pool *, void *); 230 231 /* 232 * safe for interrupts; this is the default allocator 233 */ 234 struct pool_allocator pool_allocator_single = { 235 pool_page_alloc, 236 pool_page_free, 237 POOL_ALLOC_SIZE(PAGE_SIZE, POOL_ALLOC_ALIGNED) 238 }; 239 240 void *pool_multi_alloc(struct pool *, int, int *); 241 void pool_multi_free(struct pool *, void *); 242 243 struct pool_allocator pool_allocator_multi = { 244 pool_multi_alloc, 245 pool_multi_free, 246 POOL_ALLOC_SIZES(PAGE_SIZE, (1UL << 31), POOL_ALLOC_ALIGNED) 247 }; 248 249 void *pool_multi_alloc_ni(struct pool *, int, int *); 250 void pool_multi_free_ni(struct pool *, void *); 251 252 struct pool_allocator pool_allocator_multi_ni = { 253 pool_multi_alloc_ni, 254 pool_multi_free_ni, 255 POOL_ALLOC_SIZES(PAGE_SIZE, (1UL << 31), POOL_ALLOC_ALIGNED) 256 }; 257 258 #ifdef DDB 259 void pool_print_pagelist(struct pool_pagelist *, int (*)(const char *, ...) 260 __attribute__((__format__(__kprintf__,1,2)))); 261 void pool_print1(struct pool *, const char *, int (*)(const char *, ...) 262 __attribute__((__format__(__kprintf__,1,2)))); 263 #endif 264 265 /* stale page garbage collectors */ 266 void pool_gc_sched(void *); 267 struct timeout pool_gc_tick = TIMEOUT_INITIALIZER(pool_gc_sched, NULL); 268 void pool_gc_pages(void *); 269 struct task pool_gc_task = TASK_INITIALIZER(pool_gc_pages, NULL); 270 271 #define POOL_WAIT_FREE SEC_TO_NSEC(1) 272 #define POOL_WAIT_GC SEC_TO_NSEC(8) 273 274 /* 275 * TODO Move getnsecuptime() to kern_tc.c and document it when we 276 * have callers in other modules. 277 */ 278 static uint64_t 279 getnsecuptime(void) 280 { 281 struct timespec now; 282 283 getnanouptime(&now); 284 return TIMESPEC_TO_NSEC(&now); 285 } 286 287 RBT_PROTOTYPE(phtree, pool_page_header, ph_node, phtree_compare); 288 289 static inline int 290 phtree_compare(const struct pool_page_header *a, 291 const struct pool_page_header *b) 292 { 293 vaddr_t va = (vaddr_t)a->ph_page; 294 vaddr_t vb = (vaddr_t)b->ph_page; 295 296 /* the compares in this order are important for the NFIND to work */ 297 if (vb < va) 298 return (-1); 299 if (vb > va) 300 return (1); 301 302 return (0); 303 } 304 305 RBT_GENERATE(phtree, pool_page_header, ph_node, phtree_compare); 306 307 /* 308 * Return the pool page header based on page address. 309 */ 310 static inline struct pool_page_header * 311 pr_find_pagehead(struct pool *pp, void *v) 312 { 313 struct pool_page_header *ph, key; 314 315 if (POOL_INPGHDR(pp)) { 316 caddr_t page; 317 318 page = (caddr_t)((vaddr_t)v & pp->pr_pgmask); 319 320 return ((struct pool_page_header *)(page + pp->pr_phoffset)); 321 } 322 323 key.ph_page = v; 324 ph = RBT_NFIND(phtree, &pp->pr_phtree, &key); 325 if (ph == NULL) 326 panic("%s: %s: page header missing", __func__, pp->pr_wchan); 327 328 KASSERT(ph->ph_page <= (caddr_t)v); 329 if (ph->ph_page + pp->pr_pgsize <= (caddr_t)v) 330 panic("%s: %s: incorrect page", __func__, pp->pr_wchan); 331 332 return (ph); 333 } 334 335 /* 336 * Initialize the given pool resource structure. 337 * 338 * We export this routine to allow other kernel parts to declare 339 * static pools that must be initialized before malloc() is available. 340 */ 341 void 342 pool_init(struct pool *pp, size_t size, u_int align, int ipl, int flags, 343 const char *wchan, struct pool_allocator *palloc) 344 { 345 int off = 0, space; 346 unsigned int pgsize = PAGE_SIZE, items; 347 size_t pa_pagesz; 348 #ifdef DIAGNOSTIC 349 struct pool *iter; 350 #endif 351 352 if (align == 0) 353 align = ALIGN(1); 354 355 if (size < sizeof(struct pool_item)) 356 size = sizeof(struct pool_item); 357 358 size = roundup(size, align); 359 360 while (size * 8 > pgsize) 361 pgsize <<= 1; 362 363 if (palloc == NULL) { 364 if (pgsize > PAGE_SIZE) { 365 palloc = ISSET(flags, PR_WAITOK) ? 366 &pool_allocator_multi_ni : &pool_allocator_multi; 367 } else 368 palloc = &pool_allocator_single; 369 370 pa_pagesz = palloc->pa_pagesz; 371 } else { 372 size_t pgsizes; 373 374 pa_pagesz = palloc->pa_pagesz; 375 if (pa_pagesz == 0) 376 pa_pagesz = POOL_ALLOC_DEFAULT; 377 378 pgsizes = pa_pagesz & ~POOL_ALLOC_ALIGNED; 379 380 /* make sure the allocator can fit at least one item */ 381 if (size > pgsizes) { 382 panic("%s: pool %s item size 0x%zx > " 383 "allocator %p sizes 0x%zx", __func__, wchan, 384 size, palloc, pgsizes); 385 } 386 387 /* shrink pgsize until it fits into the range */ 388 while (!ISSET(pgsizes, pgsize)) 389 pgsize >>= 1; 390 } 391 KASSERT(ISSET(pa_pagesz, pgsize)); 392 393 items = pgsize / size; 394 395 /* 396 * Decide whether to put the page header off page to avoid 397 * wasting too large a part of the page. Off-page page headers 398 * go into an RB tree, so we can match a returned item with 399 * its header based on the page address. 400 */ 401 if (ISSET(pa_pagesz, POOL_ALLOC_ALIGNED)) { 402 if (pgsize - (size * items) > 403 sizeof(struct pool_page_header)) { 404 off = pgsize - sizeof(struct pool_page_header); 405 } else if (sizeof(struct pool_page_header) * 2 >= size) { 406 off = pgsize - sizeof(struct pool_page_header); 407 items = off / size; 408 } 409 } 410 411 KASSERT(items > 0); 412 413 /* 414 * Initialize the pool structure. 415 */ 416 memset(pp, 0, sizeof(*pp)); 417 if (ISSET(flags, PR_RWLOCK)) { 418 KASSERT(flags & PR_WAITOK); 419 pp->pr_lock_ops = &pool_lock_ops_rw; 420 } else 421 pp->pr_lock_ops = &pool_lock_ops_mtx; 422 TAILQ_INIT(&pp->pr_emptypages); 423 TAILQ_INIT(&pp->pr_fullpages); 424 TAILQ_INIT(&pp->pr_partpages); 425 pp->pr_curpage = NULL; 426 pp->pr_npages = 0; 427 pp->pr_minitems = 0; 428 pp->pr_minpages = 0; 429 pp->pr_maxpages = 8; 430 pp->pr_size = size; 431 pp->pr_pgsize = pgsize; 432 pp->pr_pgmask = ~0UL ^ (pgsize - 1); 433 pp->pr_phoffset = off; 434 pp->pr_itemsperpage = items; 435 pp->pr_wchan = wchan; 436 pp->pr_alloc = palloc; 437 pp->pr_nitems = 0; 438 pp->pr_nout = 0; 439 pp->pr_hardlimit = UINT_MAX; 440 pp->pr_hardlimit_warning = NULL; 441 pp->pr_hardlimit_ratecap.tv_sec = 0; 442 pp->pr_hardlimit_ratecap.tv_usec = 0; 443 pp->pr_hardlimit_warning_last.tv_sec = 0; 444 pp->pr_hardlimit_warning_last.tv_usec = 0; 445 RBT_INIT(phtree, &pp->pr_phtree); 446 447 /* 448 * Use the space between the chunks and the page header 449 * for cache coloring. 450 */ 451 space = POOL_INPGHDR(pp) ? pp->pr_phoffset : pp->pr_pgsize; 452 space -= pp->pr_itemsperpage * pp->pr_size; 453 pp->pr_align = align; 454 pp->pr_maxcolors = (space / align) + 1; 455 456 pp->pr_nget = 0; 457 pp->pr_nfail = 0; 458 pp->pr_nput = 0; 459 pp->pr_npagealloc = 0; 460 pp->pr_npagefree = 0; 461 pp->pr_hiwat = 0; 462 pp->pr_nidle = 0; 463 464 pp->pr_ipl = ipl; 465 pp->pr_flags = flags; 466 467 pl_init(pp, &pp->pr_lock); 468 pl_init(pp, &pp->pr_requests_lock); 469 TAILQ_INIT(&pp->pr_requests); 470 471 if (phpool.pr_size == 0) { 472 pool_init(&phpool, sizeof(struct pool_page_header), 0, 473 IPL_HIGH, 0, "phpool", NULL); 474 475 /* make sure phpool wont "recurse" */ 476 KASSERT(POOL_INPGHDR(&phpool)); 477 } 478 479 /* pglistalloc/constraint parameters */ 480 pp->pr_crange = &kp_dirty; 481 482 /* Insert this into the list of all pools. */ 483 rw_enter_write(&pool_lock); 484 #ifdef DIAGNOSTIC 485 SIMPLEQ_FOREACH(iter, &pool_head, pr_poollist) { 486 if (iter == pp) 487 panic("%s: pool %s already on list", __func__, wchan); 488 } 489 #endif 490 491 pp->pr_serial = ++pool_serial; 492 if (pool_serial == 0) 493 panic("%s: too much uptime", __func__); 494 495 SIMPLEQ_INSERT_HEAD(&pool_head, pp, pr_poollist); 496 pool_count++; 497 rw_exit_write(&pool_lock); 498 } 499 500 /* 501 * Decommission a pool resource. 502 */ 503 void 504 pool_destroy(struct pool *pp) 505 { 506 struct pool_page_header *ph; 507 struct pool *prev, *iter; 508 509 #ifdef MULTIPROCESSOR 510 if (pp->pr_cache != NULL) 511 pool_cache_destroy(pp); 512 #endif 513 514 #ifdef DIAGNOSTIC 515 if (pp->pr_nout != 0) 516 panic("%s: pool busy: still out: %u", __func__, pp->pr_nout); 517 #endif 518 519 /* Remove from global pool list */ 520 rw_enter_write(&pool_lock); 521 pool_count--; 522 if (pp == SIMPLEQ_FIRST(&pool_head)) 523 SIMPLEQ_REMOVE_HEAD(&pool_head, pr_poollist); 524 else { 525 prev = SIMPLEQ_FIRST(&pool_head); 526 SIMPLEQ_FOREACH(iter, &pool_head, pr_poollist) { 527 if (iter == pp) { 528 SIMPLEQ_REMOVE_AFTER(&pool_head, prev, 529 pr_poollist); 530 break; 531 } 532 prev = iter; 533 } 534 } 535 rw_exit_write(&pool_lock); 536 537 /* Remove all pages */ 538 while ((ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL) { 539 pl_enter(pp, &pp->pr_lock); 540 pool_p_remove(pp, ph); 541 pl_leave(pp, &pp->pr_lock); 542 pool_p_free(pp, ph); 543 } 544 KASSERT(TAILQ_EMPTY(&pp->pr_fullpages)); 545 KASSERT(TAILQ_EMPTY(&pp->pr_partpages)); 546 } 547 548 void 549 pool_request_init(struct pool_request *pr, 550 void (*handler)(struct pool *, void *, void *), void *cookie) 551 { 552 pr->pr_handler = handler; 553 pr->pr_cookie = cookie; 554 pr->pr_item = NULL; 555 } 556 557 void 558 pool_request(struct pool *pp, struct pool_request *pr) 559 { 560 pl_enter(pp, &pp->pr_requests_lock); 561 TAILQ_INSERT_TAIL(&pp->pr_requests, pr, pr_entry); 562 pool_runqueue(pp, PR_NOWAIT); 563 pl_leave(pp, &pp->pr_requests_lock); 564 } 565 566 struct pool_get_memory { 567 union pool_lock lock; 568 void * volatile v; 569 }; 570 571 /* 572 * Grab an item from the pool. 573 */ 574 void * 575 pool_get(struct pool *pp, int flags) 576 { 577 void *v = NULL; 578 int slowdown = 0; 579 580 KASSERT(flags & (PR_WAITOK | PR_NOWAIT)); 581 if (pp->pr_flags & PR_RWLOCK) 582 KASSERT(flags & PR_WAITOK); 583 584 #ifdef MULTIPROCESSOR 585 if (pp->pr_cache != NULL) { 586 v = pool_cache_get(pp); 587 if (v != NULL) 588 goto good; 589 } 590 #endif 591 592 pl_enter(pp, &pp->pr_lock); 593 if (pp->pr_nout >= pp->pr_hardlimit) { 594 if (ISSET(flags, PR_NOWAIT|PR_LIMITFAIL)) 595 goto fail; 596 } else if ((v = pool_do_get(pp, flags, &slowdown)) == NULL) { 597 if (ISSET(flags, PR_NOWAIT)) 598 goto fail; 599 } 600 pl_leave(pp, &pp->pr_lock); 601 602 if ((slowdown || pool_debug == 2) && ISSET(flags, PR_WAITOK)) 603 yield(); 604 605 if (v == NULL) { 606 struct pool_get_memory mem = { .v = NULL }; 607 struct pool_request pr; 608 609 #ifdef DIAGNOSTIC 610 if (ISSET(flags, PR_WAITOK) && curproc == &proc0) 611 panic("%s: cannot sleep for memory during boot", 612 __func__); 613 #endif 614 pl_init(pp, &mem.lock); 615 pool_request_init(&pr, pool_get_done, &mem); 616 pool_request(pp, &pr); 617 618 pl_enter(pp, &mem.lock); 619 while (mem.v == NULL) 620 pl_sleep(pp, &mem, &mem.lock, PSWP, pp->pr_wchan); 621 pl_leave(pp, &mem.lock); 622 623 v = mem.v; 624 } 625 626 #ifdef MULTIPROCESSOR 627 good: 628 #endif 629 if (ISSET(flags, PR_ZERO)) 630 memset(v, 0, pp->pr_size); 631 632 return (v); 633 634 fail: 635 pp->pr_nfail++; 636 pl_leave(pp, &pp->pr_lock); 637 return (NULL); 638 } 639 640 void 641 pool_get_done(struct pool *pp, void *xmem, void *v) 642 { 643 struct pool_get_memory *mem = xmem; 644 645 pl_enter(pp, &mem->lock); 646 mem->v = v; 647 pl_leave(pp, &mem->lock); 648 649 wakeup_one(mem); 650 } 651 652 void 653 pool_runqueue(struct pool *pp, int flags) 654 { 655 struct pool_requests prl = TAILQ_HEAD_INITIALIZER(prl); 656 struct pool_request *pr; 657 658 pl_assert_unlocked(pp, &pp->pr_lock); 659 pl_assert_locked(pp, &pp->pr_requests_lock); 660 661 if (pp->pr_requesting++) 662 return; 663 664 do { 665 pp->pr_requesting = 1; 666 667 TAILQ_CONCAT(&prl, &pp->pr_requests, pr_entry); 668 if (TAILQ_EMPTY(&prl)) 669 continue; 670 671 pl_leave(pp, &pp->pr_requests_lock); 672 673 pl_enter(pp, &pp->pr_lock); 674 pr = TAILQ_FIRST(&prl); 675 while (pr != NULL) { 676 int slowdown = 0; 677 678 if (pp->pr_nout >= pp->pr_hardlimit) 679 break; 680 681 pr->pr_item = pool_do_get(pp, flags, &slowdown); 682 if (pr->pr_item == NULL) /* || slowdown ? */ 683 break; 684 685 pr = TAILQ_NEXT(pr, pr_entry); 686 } 687 pl_leave(pp, &pp->pr_lock); 688 689 while ((pr = TAILQ_FIRST(&prl)) != NULL && 690 pr->pr_item != NULL) { 691 TAILQ_REMOVE(&prl, pr, pr_entry); 692 (*pr->pr_handler)(pp, pr->pr_cookie, pr->pr_item); 693 } 694 695 pl_enter(pp, &pp->pr_requests_lock); 696 } while (--pp->pr_requesting); 697 698 TAILQ_CONCAT(&pp->pr_requests, &prl, pr_entry); 699 } 700 701 void * 702 pool_do_get(struct pool *pp, int flags, int *slowdown) 703 { 704 struct pool_item *pi; 705 struct pool_page_header *ph; 706 707 pl_assert_locked(pp, &pp->pr_lock); 708 709 splassert(pp->pr_ipl); 710 711 /* 712 * Account for this item now to avoid races if we need to give up 713 * pr_lock to allocate a page. 714 */ 715 pp->pr_nout++; 716 717 if (pp->pr_curpage == NULL) { 718 pl_leave(pp, &pp->pr_lock); 719 ph = pool_p_alloc(pp, flags, slowdown); 720 pl_enter(pp, &pp->pr_lock); 721 722 if (ph == NULL) { 723 pp->pr_nout--; 724 return (NULL); 725 } 726 727 pool_p_insert(pp, ph); 728 } 729 730 ph = pp->pr_curpage; 731 pi = XSIMPLEQ_FIRST(&ph->ph_items); 732 if (__predict_false(pi == NULL)) 733 panic("%s: %s: page empty", __func__, pp->pr_wchan); 734 735 if (__predict_false(pi->pi_magic != POOL_IMAGIC(ph, pi))) { 736 panic("%s: %s free list modified: " 737 "page %p; item addr %p; offset 0x%x=0x%lx != 0x%lx", 738 __func__, pp->pr_wchan, ph->ph_page, pi, 739 0, pi->pi_magic, POOL_IMAGIC(ph, pi)); 740 } 741 742 XSIMPLEQ_REMOVE_HEAD(&ph->ph_items, pi_list); 743 744 #ifdef DIAGNOSTIC 745 if (pool_debug && POOL_PHPOISON(ph)) { 746 size_t pidx; 747 uint32_t pval; 748 if (poison_check(pi + 1, pp->pr_size - sizeof(*pi), 749 &pidx, &pval)) { 750 int *ip = (int *)(pi + 1); 751 panic("%s: %s free list modified: " 752 "page %p; item addr %p; offset 0x%zx=0x%x", 753 __func__, pp->pr_wchan, ph->ph_page, pi, 754 (pidx * sizeof(int)) + sizeof(*pi), ip[pidx]); 755 } 756 } 757 #endif /* DIAGNOSTIC */ 758 759 if (ph->ph_nmissing++ == 0) { 760 /* 761 * This page was previously empty. Move it to the list of 762 * partially-full pages. This page is already curpage. 763 */ 764 TAILQ_REMOVE(&pp->pr_emptypages, ph, ph_entry); 765 TAILQ_INSERT_TAIL(&pp->pr_partpages, ph, ph_entry); 766 767 pp->pr_nidle--; 768 } 769 770 if (ph->ph_nmissing == pp->pr_itemsperpage) { 771 /* 772 * This page is now full. Move it to the full list 773 * and select a new current page. 774 */ 775 TAILQ_REMOVE(&pp->pr_partpages, ph, ph_entry); 776 TAILQ_INSERT_TAIL(&pp->pr_fullpages, ph, ph_entry); 777 pool_update_curpage(pp); 778 } 779 780 pp->pr_nget++; 781 782 return (pi); 783 } 784 785 /* 786 * Return resource to the pool. 787 */ 788 void 789 pool_put(struct pool *pp, void *v) 790 { 791 struct pool_page_header *ph, *freeph = NULL; 792 793 #ifdef DIAGNOSTIC 794 if (v == NULL) 795 panic("%s: NULL item", __func__); 796 #endif 797 798 #ifdef MULTIPROCESSOR 799 if (pp->pr_cache != NULL && TAILQ_EMPTY(&pp->pr_requests)) { 800 pool_cache_put(pp, v); 801 return; 802 } 803 #endif 804 805 pl_enter(pp, &pp->pr_lock); 806 807 pool_do_put(pp, v); 808 809 pp->pr_nout--; 810 pp->pr_nput++; 811 812 /* is it time to free a page? */ 813 if (pp->pr_nidle > pp->pr_maxpages && 814 (ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL && 815 getnsecuptime() - ph->ph_timestamp > POOL_WAIT_FREE) { 816 freeph = ph; 817 pool_p_remove(pp, freeph); 818 } 819 820 pl_leave(pp, &pp->pr_lock); 821 822 if (freeph != NULL) 823 pool_p_free(pp, freeph); 824 825 pool_wakeup(pp); 826 } 827 828 void 829 pool_wakeup(struct pool *pp) 830 { 831 if (!TAILQ_EMPTY(&pp->pr_requests)) { 832 pl_enter(pp, &pp->pr_requests_lock); 833 pool_runqueue(pp, PR_NOWAIT); 834 pl_leave(pp, &pp->pr_requests_lock); 835 } 836 } 837 838 void 839 pool_do_put(struct pool *pp, void *v) 840 { 841 struct pool_item *pi = v; 842 struct pool_page_header *ph; 843 844 splassert(pp->pr_ipl); 845 846 ph = pr_find_pagehead(pp, v); 847 848 #ifdef DIAGNOSTIC 849 if (pool_debug) { 850 struct pool_item *qi; 851 XSIMPLEQ_FOREACH(qi, &ph->ph_items, pi_list) { 852 if (pi == qi) { 853 panic("%s: %s: double pool_put: %p", __func__, 854 pp->pr_wchan, pi); 855 } 856 } 857 } 858 #endif /* DIAGNOSTIC */ 859 860 pi->pi_magic = POOL_IMAGIC(ph, pi); 861 XSIMPLEQ_INSERT_HEAD(&ph->ph_items, pi, pi_list); 862 #ifdef DIAGNOSTIC 863 if (POOL_PHPOISON(ph)) 864 poison_mem(pi + 1, pp->pr_size - sizeof(*pi)); 865 #endif /* DIAGNOSTIC */ 866 867 if (ph->ph_nmissing-- == pp->pr_itemsperpage) { 868 /* 869 * The page was previously completely full, move it to the 870 * partially-full list. 871 */ 872 TAILQ_REMOVE(&pp->pr_fullpages, ph, ph_entry); 873 TAILQ_INSERT_TAIL(&pp->pr_partpages, ph, ph_entry); 874 } 875 876 if (ph->ph_nmissing == 0) { 877 /* 878 * The page is now empty, so move it to the empty page list. 879 */ 880 pp->pr_nidle++; 881 882 ph->ph_timestamp = getnsecuptime(); 883 TAILQ_REMOVE(&pp->pr_partpages, ph, ph_entry); 884 TAILQ_INSERT_TAIL(&pp->pr_emptypages, ph, ph_entry); 885 pool_update_curpage(pp); 886 } 887 } 888 889 /* 890 * Add N items to the pool. 891 */ 892 int 893 pool_prime(struct pool *pp, int n) 894 { 895 struct pool_pagelist pl = TAILQ_HEAD_INITIALIZER(pl); 896 struct pool_page_header *ph; 897 int newpages; 898 899 newpages = roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 900 901 while (newpages-- > 0) { 902 int slowdown = 0; 903 904 ph = pool_p_alloc(pp, PR_NOWAIT, &slowdown); 905 if (ph == NULL) /* or slowdown? */ 906 break; 907 908 TAILQ_INSERT_TAIL(&pl, ph, ph_entry); 909 } 910 911 pl_enter(pp, &pp->pr_lock); 912 while ((ph = TAILQ_FIRST(&pl)) != NULL) { 913 TAILQ_REMOVE(&pl, ph, ph_entry); 914 pool_p_insert(pp, ph); 915 } 916 pl_leave(pp, &pp->pr_lock); 917 918 return (0); 919 } 920 921 struct pool_page_header * 922 pool_p_alloc(struct pool *pp, int flags, int *slowdown) 923 { 924 struct pool_page_header *ph; 925 struct pool_item *pi; 926 caddr_t addr; 927 unsigned int order; 928 int o; 929 int n; 930 931 pl_assert_unlocked(pp, &pp->pr_lock); 932 KASSERT(pp->pr_size >= sizeof(*pi)); 933 934 addr = pool_allocator_alloc(pp, flags, slowdown); 935 if (addr == NULL) 936 return (NULL); 937 938 if (POOL_INPGHDR(pp)) 939 ph = (struct pool_page_header *)(addr + pp->pr_phoffset); 940 else { 941 ph = pool_get(&phpool, flags); 942 if (ph == NULL) { 943 pool_allocator_free(pp, addr); 944 return (NULL); 945 } 946 } 947 948 XSIMPLEQ_INIT(&ph->ph_items); 949 ph->ph_page = addr; 950 addr += pp->pr_align * (pp->pr_npagealloc % pp->pr_maxcolors); 951 ph->ph_colored = addr; 952 ph->ph_nmissing = 0; 953 arc4random_buf(&ph->ph_magic, sizeof(ph->ph_magic)); 954 #ifdef DIAGNOSTIC 955 /* use a bit in ph_magic to record if we poison page items */ 956 if (pool_debug) 957 SET(ph->ph_magic, POOL_MAGICBIT); 958 else 959 CLR(ph->ph_magic, POOL_MAGICBIT); 960 #endif /* DIAGNOSTIC */ 961 962 n = pp->pr_itemsperpage; 963 o = 32; 964 while (n--) { 965 pi = (struct pool_item *)addr; 966 pi->pi_magic = POOL_IMAGIC(ph, pi); 967 968 if (o == 32) { 969 order = arc4random(); 970 o = 0; 971 } 972 if (ISSET(order, 1 << o++)) 973 XSIMPLEQ_INSERT_TAIL(&ph->ph_items, pi, pi_list); 974 else 975 XSIMPLEQ_INSERT_HEAD(&ph->ph_items, pi, pi_list); 976 977 #ifdef DIAGNOSTIC 978 if (POOL_PHPOISON(ph)) 979 poison_mem(pi + 1, pp->pr_size - sizeof(*pi)); 980 #endif /* DIAGNOSTIC */ 981 982 addr += pp->pr_size; 983 } 984 985 return (ph); 986 } 987 988 void 989 pool_p_free(struct pool *pp, struct pool_page_header *ph) 990 { 991 struct pool_item *pi; 992 993 pl_assert_unlocked(pp, &pp->pr_lock); 994 KASSERT(ph->ph_nmissing == 0); 995 996 XSIMPLEQ_FOREACH(pi, &ph->ph_items, pi_list) { 997 if (__predict_false(pi->pi_magic != POOL_IMAGIC(ph, pi))) { 998 panic("%s: %s free list modified: " 999 "page %p; item addr %p; offset 0x%x=0x%lx", 1000 __func__, pp->pr_wchan, ph->ph_page, pi, 1001 0, pi->pi_magic); 1002 } 1003 1004 #ifdef DIAGNOSTIC 1005 if (POOL_PHPOISON(ph)) { 1006 size_t pidx; 1007 uint32_t pval; 1008 if (poison_check(pi + 1, pp->pr_size - sizeof(*pi), 1009 &pidx, &pval)) { 1010 int *ip = (int *)(pi + 1); 1011 panic("%s: %s free list modified: " 1012 "page %p; item addr %p; offset 0x%zx=0x%x", 1013 __func__, pp->pr_wchan, ph->ph_page, pi, 1014 pidx * sizeof(int), ip[pidx]); 1015 } 1016 } 1017 #endif 1018 } 1019 1020 pool_allocator_free(pp, ph->ph_page); 1021 1022 if (!POOL_INPGHDR(pp)) 1023 pool_put(&phpool, ph); 1024 } 1025 1026 void 1027 pool_p_insert(struct pool *pp, struct pool_page_header *ph) 1028 { 1029 pl_assert_locked(pp, &pp->pr_lock); 1030 1031 /* If the pool was depleted, point at the new page */ 1032 if (pp->pr_curpage == NULL) 1033 pp->pr_curpage = ph; 1034 1035 TAILQ_INSERT_TAIL(&pp->pr_emptypages, ph, ph_entry); 1036 if (!POOL_INPGHDR(pp)) 1037 RBT_INSERT(phtree, &pp->pr_phtree, ph); 1038 1039 pp->pr_nitems += pp->pr_itemsperpage; 1040 pp->pr_nidle++; 1041 1042 pp->pr_npagealloc++; 1043 if (++pp->pr_npages > pp->pr_hiwat) 1044 pp->pr_hiwat = pp->pr_npages; 1045 } 1046 1047 void 1048 pool_p_remove(struct pool *pp, struct pool_page_header *ph) 1049 { 1050 pl_assert_locked(pp, &pp->pr_lock); 1051 1052 pp->pr_npagefree++; 1053 pp->pr_npages--; 1054 pp->pr_nidle--; 1055 pp->pr_nitems -= pp->pr_itemsperpage; 1056 1057 if (!POOL_INPGHDR(pp)) 1058 RBT_REMOVE(phtree, &pp->pr_phtree, ph); 1059 TAILQ_REMOVE(&pp->pr_emptypages, ph, ph_entry); 1060 1061 pool_update_curpage(pp); 1062 } 1063 1064 void 1065 pool_update_curpage(struct pool *pp) 1066 { 1067 pp->pr_curpage = TAILQ_LAST(&pp->pr_partpages, pool_pagelist); 1068 if (pp->pr_curpage == NULL) { 1069 pp->pr_curpage = TAILQ_LAST(&pp->pr_emptypages, pool_pagelist); 1070 } 1071 } 1072 1073 void 1074 pool_setlowat(struct pool *pp, int n) 1075 { 1076 int prime = 0; 1077 1078 pl_enter(pp, &pp->pr_lock); 1079 pp->pr_minitems = n; 1080 pp->pr_minpages = (n == 0) 1081 ? 0 1082 : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 1083 1084 if (pp->pr_nitems < n) 1085 prime = n - pp->pr_nitems; 1086 pl_leave(pp, &pp->pr_lock); 1087 1088 if (prime > 0) 1089 pool_prime(pp, prime); 1090 } 1091 1092 void 1093 pool_sethiwat(struct pool *pp, int n) 1094 { 1095 pp->pr_maxpages = (n == 0) 1096 ? 0 1097 : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 1098 } 1099 1100 int 1101 pool_sethardlimit(struct pool *pp, u_int n, const char *warnmsg, int ratecap) 1102 { 1103 int error = 0; 1104 1105 if (n < pp->pr_nout) { 1106 error = EINVAL; 1107 goto done; 1108 } 1109 1110 pp->pr_hardlimit = n; 1111 pp->pr_hardlimit_warning = warnmsg; 1112 pp->pr_hardlimit_ratecap.tv_sec = ratecap; 1113 pp->pr_hardlimit_warning_last.tv_sec = 0; 1114 pp->pr_hardlimit_warning_last.tv_usec = 0; 1115 1116 done: 1117 return (error); 1118 } 1119 1120 void 1121 pool_set_constraints(struct pool *pp, const struct kmem_pa_mode *mode) 1122 { 1123 pp->pr_crange = mode; 1124 } 1125 1126 /* 1127 * Release all complete pages that have not been used recently. 1128 * 1129 * Returns non-zero if any pages have been reclaimed. 1130 */ 1131 int 1132 pool_reclaim(struct pool *pp) 1133 { 1134 struct pool_page_header *ph, *phnext; 1135 struct pool_pagelist pl = TAILQ_HEAD_INITIALIZER(pl); 1136 1137 pl_enter(pp, &pp->pr_lock); 1138 for (ph = TAILQ_FIRST(&pp->pr_emptypages); ph != NULL; ph = phnext) { 1139 phnext = TAILQ_NEXT(ph, ph_entry); 1140 1141 /* Check our minimum page claim */ 1142 if (pp->pr_npages <= pp->pr_minpages) 1143 break; 1144 1145 /* 1146 * If freeing this page would put us below 1147 * the low water mark, stop now. 1148 */ 1149 if ((pp->pr_nitems - pp->pr_itemsperpage) < 1150 pp->pr_minitems) 1151 break; 1152 1153 pool_p_remove(pp, ph); 1154 TAILQ_INSERT_TAIL(&pl, ph, ph_entry); 1155 } 1156 pl_leave(pp, &pp->pr_lock); 1157 1158 if (TAILQ_EMPTY(&pl)) 1159 return (0); 1160 1161 while ((ph = TAILQ_FIRST(&pl)) != NULL) { 1162 TAILQ_REMOVE(&pl, ph, ph_entry); 1163 pool_p_free(pp, ph); 1164 } 1165 1166 return (1); 1167 } 1168 1169 /* 1170 * Release all complete pages that have not been used recently 1171 * from all pools. 1172 */ 1173 void 1174 pool_reclaim_all(void) 1175 { 1176 struct pool *pp; 1177 1178 rw_enter_read(&pool_lock); 1179 SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist) 1180 pool_reclaim(pp); 1181 rw_exit_read(&pool_lock); 1182 } 1183 1184 #ifdef DDB 1185 #include <machine/db_machdep.h> 1186 #include <ddb/db_output.h> 1187 1188 /* 1189 * Diagnostic helpers. 1190 */ 1191 void 1192 pool_printit(struct pool *pp, const char *modif, 1193 int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2)))) 1194 { 1195 pool_print1(pp, modif, pr); 1196 } 1197 1198 void 1199 pool_print_pagelist(struct pool_pagelist *pl, 1200 int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2)))) 1201 { 1202 struct pool_page_header *ph; 1203 struct pool_item *pi; 1204 1205 TAILQ_FOREACH(ph, pl, ph_entry) { 1206 (*pr)("\t\tpage %p, color %p, nmissing %d\n", 1207 ph->ph_page, ph->ph_colored, ph->ph_nmissing); 1208 XSIMPLEQ_FOREACH(pi, &ph->ph_items, pi_list) { 1209 if (pi->pi_magic != POOL_IMAGIC(ph, pi)) { 1210 (*pr)("\t\t\titem %p, magic 0x%lx\n", 1211 pi, pi->pi_magic); 1212 } 1213 } 1214 } 1215 } 1216 1217 void 1218 pool_print1(struct pool *pp, const char *modif, 1219 int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2)))) 1220 { 1221 struct pool_page_header *ph; 1222 int print_pagelist = 0; 1223 char c; 1224 1225 while ((c = *modif++) != '\0') { 1226 if (c == 'p') 1227 print_pagelist = 1; 1228 modif++; 1229 } 1230 1231 (*pr)("POOL %s: size %u maxcolors %u\n", pp->pr_wchan, pp->pr_size, 1232 pp->pr_maxcolors); 1233 (*pr)("\talloc %p\n", pp->pr_alloc); 1234 (*pr)("\tminitems %u, minpages %u, maxpages %u, npages %u\n", 1235 pp->pr_minitems, pp->pr_minpages, pp->pr_maxpages, pp->pr_npages); 1236 (*pr)("\titemsperpage %u, nitems %u, nout %u, hardlimit %u\n", 1237 pp->pr_itemsperpage, pp->pr_nitems, pp->pr_nout, pp->pr_hardlimit); 1238 1239 (*pr)("\n\tnget %lu, nfail %lu, nput %lu\n", 1240 pp->pr_nget, pp->pr_nfail, pp->pr_nput); 1241 (*pr)("\tnpagealloc %lu, npagefree %lu, hiwat %u, nidle %lu\n", 1242 pp->pr_npagealloc, pp->pr_npagefree, pp->pr_hiwat, pp->pr_nidle); 1243 1244 if (print_pagelist == 0) 1245 return; 1246 1247 if ((ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL) 1248 (*pr)("\n\tempty page list:\n"); 1249 pool_print_pagelist(&pp->pr_emptypages, pr); 1250 if ((ph = TAILQ_FIRST(&pp->pr_fullpages)) != NULL) 1251 (*pr)("\n\tfull page list:\n"); 1252 pool_print_pagelist(&pp->pr_fullpages, pr); 1253 if ((ph = TAILQ_FIRST(&pp->pr_partpages)) != NULL) 1254 (*pr)("\n\tpartial-page list:\n"); 1255 pool_print_pagelist(&pp->pr_partpages, pr); 1256 1257 if (pp->pr_curpage == NULL) 1258 (*pr)("\tno current page\n"); 1259 else 1260 (*pr)("\tcurpage %p\n", pp->pr_curpage->ph_page); 1261 } 1262 1263 void 1264 db_show_all_pools(db_expr_t expr, int haddr, db_expr_t count, char *modif) 1265 { 1266 struct pool *pp; 1267 char maxp[16]; 1268 int ovflw; 1269 char mode; 1270 1271 mode = modif[0]; 1272 if (mode != '\0' && mode != 'a') { 1273 db_printf("usage: show all pools [/a]\n"); 1274 return; 1275 } 1276 1277 if (mode == '\0') 1278 db_printf("%-10s%4s%9s%5s%9s%6s%6s%6s%6s%6s%6s%5s\n", 1279 "Name", 1280 "Size", 1281 "Requests", 1282 "Fail", 1283 "Releases", 1284 "Pgreq", 1285 "Pgrel", 1286 "Npage", 1287 "Hiwat", 1288 "Minpg", 1289 "Maxpg", 1290 "Idle"); 1291 else 1292 db_printf("%-12s %18s %18s\n", 1293 "Name", "Address", "Allocator"); 1294 1295 SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist) { 1296 if (mode == 'a') { 1297 db_printf("%-12s %18p %18p\n", pp->pr_wchan, pp, 1298 pp->pr_alloc); 1299 continue; 1300 } 1301 1302 if (!pp->pr_nget) 1303 continue; 1304 1305 if (pp->pr_maxpages == UINT_MAX) 1306 snprintf(maxp, sizeof maxp, "inf"); 1307 else 1308 snprintf(maxp, sizeof maxp, "%u", pp->pr_maxpages); 1309 1310 #define PRWORD(ovflw, fmt, width, fixed, val) do { \ 1311 (ovflw) += db_printf((fmt), \ 1312 (width) - (fixed) - (ovflw) > 0 ? \ 1313 (width) - (fixed) - (ovflw) : 0, \ 1314 (val)) - (width); \ 1315 if ((ovflw) < 0) \ 1316 (ovflw) = 0; \ 1317 } while (/* CONSTCOND */0) 1318 1319 ovflw = 0; 1320 PRWORD(ovflw, "%-*s", 10, 0, pp->pr_wchan); 1321 PRWORD(ovflw, " %*u", 4, 1, pp->pr_size); 1322 PRWORD(ovflw, " %*lu", 9, 1, pp->pr_nget); 1323 PRWORD(ovflw, " %*lu", 5, 1, pp->pr_nfail); 1324 PRWORD(ovflw, " %*lu", 9, 1, pp->pr_nput); 1325 PRWORD(ovflw, " %*lu", 6, 1, pp->pr_npagealloc); 1326 PRWORD(ovflw, " %*lu", 6, 1, pp->pr_npagefree); 1327 PRWORD(ovflw, " %*d", 6, 1, pp->pr_npages); 1328 PRWORD(ovflw, " %*d", 6, 1, pp->pr_hiwat); 1329 PRWORD(ovflw, " %*d", 6, 1, pp->pr_minpages); 1330 PRWORD(ovflw, " %*s", 6, 1, maxp); 1331 PRWORD(ovflw, " %*lu\n", 5, 1, pp->pr_nidle); 1332 1333 pool_chk(pp); 1334 } 1335 } 1336 #endif /* DDB */ 1337 1338 #if defined(POOL_DEBUG) || defined(DDB) 1339 int 1340 pool_chk_page(struct pool *pp, struct pool_page_header *ph, int expected) 1341 { 1342 struct pool_item *pi; 1343 caddr_t page; 1344 int n; 1345 const char *label = pp->pr_wchan; 1346 1347 page = (caddr_t)((u_long)ph & pp->pr_pgmask); 1348 if (page != ph->ph_page && POOL_INPGHDR(pp)) { 1349 printf("%s: ", label); 1350 printf("pool(%p:%s): page inconsistency: page %p; " 1351 "at page head addr %p (p %p)\n", 1352 pp, pp->pr_wchan, ph->ph_page, ph, page); 1353 return 1; 1354 } 1355 1356 for (pi = XSIMPLEQ_FIRST(&ph->ph_items), n = 0; 1357 pi != NULL; 1358 pi = XSIMPLEQ_NEXT(&ph->ph_items, pi, pi_list), n++) { 1359 if ((caddr_t)pi < ph->ph_page || 1360 (caddr_t)pi >= ph->ph_page + pp->pr_pgsize) { 1361 printf("%s: ", label); 1362 printf("pool(%p:%s): page inconsistency: page %p;" 1363 " item ordinal %d; addr %p\n", pp, 1364 pp->pr_wchan, ph->ph_page, n, pi); 1365 return (1); 1366 } 1367 1368 if (pi->pi_magic != POOL_IMAGIC(ph, pi)) { 1369 printf("%s: ", label); 1370 printf("pool(%p:%s): free list modified: " 1371 "page %p; item ordinal %d; addr %p " 1372 "(p %p); offset 0x%x=0x%lx\n", 1373 pp, pp->pr_wchan, ph->ph_page, n, pi, page, 1374 0, pi->pi_magic); 1375 } 1376 1377 #ifdef DIAGNOSTIC 1378 if (POOL_PHPOISON(ph)) { 1379 size_t pidx; 1380 uint32_t pval; 1381 if (poison_check(pi + 1, pp->pr_size - sizeof(*pi), 1382 &pidx, &pval)) { 1383 int *ip = (int *)(pi + 1); 1384 printf("pool(%s): free list modified: " 1385 "page %p; item ordinal %d; addr %p " 1386 "(p %p); offset 0x%zx=0x%x\n", 1387 pp->pr_wchan, ph->ph_page, n, pi, 1388 page, pidx * sizeof(int), ip[pidx]); 1389 } 1390 } 1391 #endif /* DIAGNOSTIC */ 1392 } 1393 if (n + ph->ph_nmissing != pp->pr_itemsperpage) { 1394 printf("pool(%p:%s): page inconsistency: page %p;" 1395 " %d on list, %d missing, %d items per page\n", pp, 1396 pp->pr_wchan, ph->ph_page, n, ph->ph_nmissing, 1397 pp->pr_itemsperpage); 1398 return 1; 1399 } 1400 if (expected >= 0 && n != expected) { 1401 printf("pool(%p:%s): page inconsistency: page %p;" 1402 " %d on list, %d missing, %d expected\n", pp, 1403 pp->pr_wchan, ph->ph_page, n, ph->ph_nmissing, 1404 expected); 1405 return 1; 1406 } 1407 return 0; 1408 } 1409 1410 int 1411 pool_chk(struct pool *pp) 1412 { 1413 struct pool_page_header *ph; 1414 int r = 0; 1415 1416 TAILQ_FOREACH(ph, &pp->pr_emptypages, ph_entry) 1417 r += pool_chk_page(pp, ph, pp->pr_itemsperpage); 1418 TAILQ_FOREACH(ph, &pp->pr_fullpages, ph_entry) 1419 r += pool_chk_page(pp, ph, 0); 1420 TAILQ_FOREACH(ph, &pp->pr_partpages, ph_entry) 1421 r += pool_chk_page(pp, ph, -1); 1422 1423 return (r); 1424 } 1425 #endif /* defined(POOL_DEBUG) || defined(DDB) */ 1426 1427 #ifdef DDB 1428 void 1429 pool_walk(struct pool *pp, int full, 1430 int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2))), 1431 void (*func)(void *, int, int (*)(const char *, ...) 1432 __attribute__((__format__(__kprintf__,1,2))))) 1433 { 1434 struct pool_page_header *ph; 1435 struct pool_item *pi; 1436 caddr_t cp; 1437 int n; 1438 1439 TAILQ_FOREACH(ph, &pp->pr_fullpages, ph_entry) { 1440 cp = ph->ph_colored; 1441 n = ph->ph_nmissing; 1442 1443 while (n--) { 1444 func(cp, full, pr); 1445 cp += pp->pr_size; 1446 } 1447 } 1448 1449 TAILQ_FOREACH(ph, &pp->pr_partpages, ph_entry) { 1450 cp = ph->ph_colored; 1451 n = ph->ph_nmissing; 1452 1453 do { 1454 XSIMPLEQ_FOREACH(pi, &ph->ph_items, pi_list) { 1455 if (cp == (caddr_t)pi) 1456 break; 1457 } 1458 if (cp != (caddr_t)pi) { 1459 func(cp, full, pr); 1460 n--; 1461 } 1462 1463 cp += pp->pr_size; 1464 } while (n > 0); 1465 } 1466 } 1467 #endif 1468 1469 /* 1470 * We have three different sysctls. 1471 * kern.pool.npools - the number of pools. 1472 * kern.pool.pool.<pool#> - the pool struct for the pool#. 1473 * kern.pool.name.<pool#> - the name for pool#. 1474 */ 1475 int 1476 sysctl_dopool(int *name, u_int namelen, char *oldp, size_t *oldlenp) 1477 { 1478 struct kinfo_pool pi; 1479 struct pool *pp; 1480 int rv = ENOENT; 1481 1482 switch (name[0]) { 1483 case KERN_POOL_NPOOLS: 1484 if (namelen != 1) 1485 return (ENOTDIR); 1486 return (sysctl_rdint(oldp, oldlenp, NULL, pool_count)); 1487 1488 case KERN_POOL_NAME: 1489 case KERN_POOL_POOL: 1490 case KERN_POOL_CACHE: 1491 case KERN_POOL_CACHE_CPUS: 1492 break; 1493 default: 1494 return (EOPNOTSUPP); 1495 } 1496 1497 if (namelen != 2) 1498 return (ENOTDIR); 1499 1500 rw_enter_read(&pool_lock); 1501 1502 SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist) { 1503 if (name[1] == pp->pr_serial) 1504 break; 1505 } 1506 1507 if (pp == NULL) 1508 goto done; 1509 1510 switch (name[0]) { 1511 case KERN_POOL_NAME: 1512 rv = sysctl_rdstring(oldp, oldlenp, NULL, pp->pr_wchan); 1513 break; 1514 case KERN_POOL_POOL: 1515 memset(&pi, 0, sizeof(pi)); 1516 1517 pl_enter(pp, &pp->pr_lock); 1518 pi.pr_size = pp->pr_size; 1519 pi.pr_pgsize = pp->pr_pgsize; 1520 pi.pr_itemsperpage = pp->pr_itemsperpage; 1521 pi.pr_npages = pp->pr_npages; 1522 pi.pr_minpages = pp->pr_minpages; 1523 pi.pr_maxpages = pp->pr_maxpages; 1524 pi.pr_hardlimit = pp->pr_hardlimit; 1525 pi.pr_nout = pp->pr_nout; 1526 pi.pr_nitems = pp->pr_nitems; 1527 pi.pr_nget = pp->pr_nget; 1528 pi.pr_nput = pp->pr_nput; 1529 pi.pr_nfail = pp->pr_nfail; 1530 pi.pr_npagealloc = pp->pr_npagealloc; 1531 pi.pr_npagefree = pp->pr_npagefree; 1532 pi.pr_hiwat = pp->pr_hiwat; 1533 pi.pr_nidle = pp->pr_nidle; 1534 pl_leave(pp, &pp->pr_lock); 1535 1536 pool_cache_pool_info(pp, &pi); 1537 1538 rv = sysctl_rdstruct(oldp, oldlenp, NULL, &pi, sizeof(pi)); 1539 break; 1540 1541 case KERN_POOL_CACHE: 1542 rv = pool_cache_info(pp, oldp, oldlenp); 1543 break; 1544 1545 case KERN_POOL_CACHE_CPUS: 1546 rv = pool_cache_cpus_info(pp, oldp, oldlenp); 1547 break; 1548 } 1549 1550 done: 1551 rw_exit_read(&pool_lock); 1552 1553 return (rv); 1554 } 1555 1556 void 1557 pool_gc_sched(void *null) 1558 { 1559 task_add(systqmp, &pool_gc_task); 1560 } 1561 1562 void 1563 pool_gc_pages(void *null) 1564 { 1565 struct pool *pp; 1566 struct pool_page_header *ph, *freeph; 1567 int s; 1568 1569 rw_enter_read(&pool_lock); 1570 s = splvm(); /* XXX go to splvm until all pools _setipl properly */ 1571 SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist) { 1572 #ifdef MULTIPROCESSOR 1573 if (pp->pr_cache != NULL) 1574 pool_cache_gc(pp); 1575 #endif 1576 1577 if (pp->pr_nidle <= pp->pr_minpages || /* guess */ 1578 !pl_enter_try(pp, &pp->pr_lock)) /* try */ 1579 continue; 1580 1581 /* is it time to free a page? */ 1582 if (pp->pr_nidle > pp->pr_minpages && 1583 (ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL && 1584 getnsecuptime() - ph->ph_timestamp > POOL_WAIT_GC) { 1585 freeph = ph; 1586 pool_p_remove(pp, freeph); 1587 } else 1588 freeph = NULL; 1589 1590 pl_leave(pp, &pp->pr_lock); 1591 1592 if (freeph != NULL) 1593 pool_p_free(pp, freeph); 1594 } 1595 splx(s); 1596 rw_exit_read(&pool_lock); 1597 1598 timeout_add_sec(&pool_gc_tick, 1); 1599 } 1600 1601 /* 1602 * Pool backend allocators. 1603 */ 1604 1605 void * 1606 pool_allocator_alloc(struct pool *pp, int flags, int *slowdown) 1607 { 1608 void *v; 1609 1610 v = (*pp->pr_alloc->pa_alloc)(pp, flags, slowdown); 1611 1612 #ifdef DIAGNOSTIC 1613 if (v != NULL && POOL_INPGHDR(pp)) { 1614 vaddr_t addr = (vaddr_t)v; 1615 if ((addr & pp->pr_pgmask) != addr) { 1616 panic("%s: %s page address %p isnt aligned to %u", 1617 __func__, pp->pr_wchan, v, pp->pr_pgsize); 1618 } 1619 } 1620 #endif 1621 1622 return (v); 1623 } 1624 1625 void 1626 pool_allocator_free(struct pool *pp, void *v) 1627 { 1628 struct pool_allocator *pa = pp->pr_alloc; 1629 1630 (*pa->pa_free)(pp, v); 1631 } 1632 1633 void * 1634 pool_page_alloc(struct pool *pp, int flags, int *slowdown) 1635 { 1636 struct kmem_dyn_mode kd = KMEM_DYN_INITIALIZER; 1637 1638 kd.kd_waitok = ISSET(flags, PR_WAITOK); 1639 kd.kd_slowdown = slowdown; 1640 1641 return (km_alloc(pp->pr_pgsize, &kv_page, pp->pr_crange, &kd)); 1642 } 1643 1644 void 1645 pool_page_free(struct pool *pp, void *v) 1646 { 1647 km_free(v, pp->pr_pgsize, &kv_page, pp->pr_crange); 1648 } 1649 1650 void * 1651 pool_multi_alloc(struct pool *pp, int flags, int *slowdown) 1652 { 1653 struct kmem_va_mode kv = kv_intrsafe; 1654 struct kmem_dyn_mode kd = KMEM_DYN_INITIALIZER; 1655 void *v; 1656 int s; 1657 1658 if (POOL_INPGHDR(pp)) 1659 kv.kv_align = pp->pr_pgsize; 1660 1661 kd.kd_waitok = ISSET(flags, PR_WAITOK); 1662 kd.kd_slowdown = slowdown; 1663 1664 s = splvm(); 1665 v = km_alloc(pp->pr_pgsize, &kv, pp->pr_crange, &kd); 1666 splx(s); 1667 1668 return (v); 1669 } 1670 1671 void 1672 pool_multi_free(struct pool *pp, void *v) 1673 { 1674 struct kmem_va_mode kv = kv_intrsafe; 1675 int s; 1676 1677 if (POOL_INPGHDR(pp)) 1678 kv.kv_align = pp->pr_pgsize; 1679 1680 s = splvm(); 1681 km_free(v, pp->pr_pgsize, &kv, pp->pr_crange); 1682 splx(s); 1683 } 1684 1685 void * 1686 pool_multi_alloc_ni(struct pool *pp, int flags, int *slowdown) 1687 { 1688 struct kmem_va_mode kv = kv_any; 1689 struct kmem_dyn_mode kd = KMEM_DYN_INITIALIZER; 1690 void *v; 1691 1692 if (POOL_INPGHDR(pp)) 1693 kv.kv_align = pp->pr_pgsize; 1694 1695 kd.kd_waitok = ISSET(flags, PR_WAITOK); 1696 kd.kd_slowdown = slowdown; 1697 1698 KERNEL_LOCK(); 1699 v = km_alloc(pp->pr_pgsize, &kv, pp->pr_crange, &kd); 1700 KERNEL_UNLOCK(); 1701 1702 return (v); 1703 } 1704 1705 void 1706 pool_multi_free_ni(struct pool *pp, void *v) 1707 { 1708 struct kmem_va_mode kv = kv_any; 1709 1710 if (POOL_INPGHDR(pp)) 1711 kv.kv_align = pp->pr_pgsize; 1712 1713 KERNEL_LOCK(); 1714 km_free(v, pp->pr_pgsize, &kv, pp->pr_crange); 1715 KERNEL_UNLOCK(); 1716 } 1717 1718 #ifdef MULTIPROCESSOR 1719 1720 struct pool pool_caches; /* per cpu cache entries */ 1721 1722 void 1723 pool_cache_init(struct pool *pp) 1724 { 1725 struct cpumem *cm; 1726 struct pool_cache *pc; 1727 struct cpumem_iter i; 1728 1729 if (pool_caches.pr_size == 0) { 1730 pool_init(&pool_caches, sizeof(struct pool_cache), 1731 CACHELINESIZE, IPL_NONE, PR_WAITOK | PR_RWLOCK, 1732 "plcache", NULL); 1733 } 1734 1735 /* must be able to use the pool items as cache list items */ 1736 KASSERT(pp->pr_size >= sizeof(struct pool_cache_item)); 1737 1738 cm = cpumem_get(&pool_caches); 1739 1740 pl_init(pp, &pp->pr_cache_lock); 1741 arc4random_buf(pp->pr_cache_magic, sizeof(pp->pr_cache_magic)); 1742 TAILQ_INIT(&pp->pr_cache_lists); 1743 pp->pr_cache_nitems = 0; 1744 pp->pr_cache_timestamp = getnsecuptime(); 1745 pp->pr_cache_items = 8; 1746 pp->pr_cache_contention = 0; 1747 pp->pr_cache_ngc = 0; 1748 1749 CPUMEM_FOREACH(pc, &i, cm) { 1750 pc->pc_actv = NULL; 1751 pc->pc_nactv = 0; 1752 pc->pc_prev = NULL; 1753 1754 pc->pc_nget = 0; 1755 pc->pc_nfail = 0; 1756 pc->pc_nput = 0; 1757 pc->pc_nlget = 0; 1758 pc->pc_nlfail = 0; 1759 pc->pc_nlput = 0; 1760 pc->pc_nout = 0; 1761 } 1762 1763 membar_producer(); 1764 1765 pp->pr_cache = cm; 1766 } 1767 1768 static inline void 1769 pool_cache_item_magic(struct pool *pp, struct pool_cache_item *ci) 1770 { 1771 unsigned long *entry = (unsigned long *)&ci->ci_nextl; 1772 1773 entry[0] = pp->pr_cache_magic[0] ^ (u_long)ci; 1774 entry[1] = pp->pr_cache_magic[1] ^ (u_long)ci->ci_next; 1775 } 1776 1777 static inline void 1778 pool_cache_item_magic_check(struct pool *pp, struct pool_cache_item *ci) 1779 { 1780 unsigned long *entry; 1781 unsigned long val; 1782 1783 entry = (unsigned long *)&ci->ci_nextl; 1784 val = pp->pr_cache_magic[0] ^ (u_long)ci; 1785 if (*entry != val) 1786 goto fail; 1787 1788 entry++; 1789 val = pp->pr_cache_magic[1] ^ (u_long)ci->ci_next; 1790 if (*entry != val) 1791 goto fail; 1792 1793 return; 1794 1795 fail: 1796 panic("%s: %s cpu free list modified: item addr %p+%zu 0x%lx!=0x%lx", 1797 __func__, pp->pr_wchan, ci, (caddr_t)entry - (caddr_t)ci, 1798 *entry, val); 1799 } 1800 1801 static inline void 1802 pool_list_enter(struct pool *pp) 1803 { 1804 if (pl_enter_try(pp, &pp->pr_cache_lock) == 0) { 1805 pl_enter(pp, &pp->pr_cache_lock); 1806 pp->pr_cache_contention++; 1807 } 1808 } 1809 1810 static inline void 1811 pool_list_leave(struct pool *pp) 1812 { 1813 pl_leave(pp, &pp->pr_cache_lock); 1814 } 1815 1816 static inline struct pool_cache_item * 1817 pool_cache_list_alloc(struct pool *pp, struct pool_cache *pc) 1818 { 1819 struct pool_cache_item *pl; 1820 1821 pool_list_enter(pp); 1822 pl = TAILQ_FIRST(&pp->pr_cache_lists); 1823 if (pl != NULL) { 1824 TAILQ_REMOVE(&pp->pr_cache_lists, pl, ci_nextl); 1825 pp->pr_cache_nitems -= POOL_CACHE_ITEM_NITEMS(pl); 1826 1827 pool_cache_item_magic(pp, pl); 1828 1829 pc->pc_nlget++; 1830 } else 1831 pc->pc_nlfail++; 1832 1833 /* fold this cpus nout into the global while we have the lock */ 1834 pp->pr_cache_nout += pc->pc_nout; 1835 pc->pc_nout = 0; 1836 pool_list_leave(pp); 1837 1838 return (pl); 1839 } 1840 1841 static inline void 1842 pool_cache_list_free(struct pool *pp, struct pool_cache *pc, 1843 struct pool_cache_item *ci) 1844 { 1845 pool_list_enter(pp); 1846 if (TAILQ_EMPTY(&pp->pr_cache_lists)) 1847 pp->pr_cache_timestamp = getnsecuptime(); 1848 1849 pp->pr_cache_nitems += POOL_CACHE_ITEM_NITEMS(ci); 1850 TAILQ_INSERT_TAIL(&pp->pr_cache_lists, ci, ci_nextl); 1851 1852 pc->pc_nlput++; 1853 1854 /* fold this cpus nout into the global while we have the lock */ 1855 pp->pr_cache_nout += pc->pc_nout; 1856 pc->pc_nout = 0; 1857 pool_list_leave(pp); 1858 } 1859 1860 static inline struct pool_cache * 1861 pool_cache_enter(struct pool *pp, int *s) 1862 { 1863 struct pool_cache *pc; 1864 1865 pc = cpumem_enter(pp->pr_cache); 1866 *s = splraise(pp->pr_ipl); 1867 pc->pc_gen++; 1868 1869 return (pc); 1870 } 1871 1872 static inline void 1873 pool_cache_leave(struct pool *pp, struct pool_cache *pc, int s) 1874 { 1875 pc->pc_gen++; 1876 splx(s); 1877 cpumem_leave(pp->pr_cache, pc); 1878 } 1879 1880 void * 1881 pool_cache_get(struct pool *pp) 1882 { 1883 struct pool_cache *pc; 1884 struct pool_cache_item *ci; 1885 int s; 1886 1887 pc = pool_cache_enter(pp, &s); 1888 1889 if (pc->pc_actv != NULL) { 1890 ci = pc->pc_actv; 1891 } else if (pc->pc_prev != NULL) { 1892 ci = pc->pc_prev; 1893 pc->pc_prev = NULL; 1894 } else if ((ci = pool_cache_list_alloc(pp, pc)) == NULL) { 1895 pc->pc_nfail++; 1896 goto done; 1897 } 1898 1899 pool_cache_item_magic_check(pp, ci); 1900 #ifdef DIAGNOSTIC 1901 if (pool_debug && POOL_CACHE_ITEM_POISONED(ci)) { 1902 size_t pidx; 1903 uint32_t pval; 1904 1905 if (poison_check(ci + 1, pp->pr_size - sizeof(*ci), 1906 &pidx, &pval)) { 1907 int *ip = (int *)(ci + 1); 1908 ip += pidx; 1909 1910 panic("%s: %s cpu free list modified: " 1911 "item addr %p+%zu 0x%x!=0x%x", 1912 __func__, pp->pr_wchan, ci, 1913 (caddr_t)ip - (caddr_t)ci, *ip, pval); 1914 } 1915 } 1916 #endif 1917 1918 pc->pc_actv = ci->ci_next; 1919 pc->pc_nactv = POOL_CACHE_ITEM_NITEMS(ci) - 1; 1920 pc->pc_nget++; 1921 pc->pc_nout++; 1922 1923 done: 1924 pool_cache_leave(pp, pc, s); 1925 1926 return (ci); 1927 } 1928 1929 void 1930 pool_cache_put(struct pool *pp, void *v) 1931 { 1932 struct pool_cache *pc; 1933 struct pool_cache_item *ci = v; 1934 unsigned long nitems; 1935 int s; 1936 #ifdef DIAGNOSTIC 1937 int poison = pool_debug && pp->pr_size > sizeof(*ci); 1938 1939 if (poison) 1940 poison_mem(ci + 1, pp->pr_size - sizeof(*ci)); 1941 #endif 1942 1943 pc = pool_cache_enter(pp, &s); 1944 1945 nitems = pc->pc_nactv; 1946 if (nitems >= pp->pr_cache_items) { 1947 if (pc->pc_prev != NULL) 1948 pool_cache_list_free(pp, pc, pc->pc_prev); 1949 1950 pc->pc_prev = pc->pc_actv; 1951 1952 pc->pc_actv = NULL; 1953 pc->pc_nactv = 0; 1954 nitems = 0; 1955 } 1956 1957 ci->ci_next = pc->pc_actv; 1958 ci->ci_nitems = ++nitems; 1959 #ifdef DIAGNOSTIC 1960 ci->ci_nitems |= poison ? POOL_CACHE_ITEM_NITEMS_POISON : 0; 1961 #endif 1962 pool_cache_item_magic(pp, ci); 1963 1964 pc->pc_actv = ci; 1965 pc->pc_nactv = nitems; 1966 1967 pc->pc_nput++; 1968 pc->pc_nout--; 1969 1970 pool_cache_leave(pp, pc, s); 1971 } 1972 1973 struct pool_cache_item * 1974 pool_cache_list_put(struct pool *pp, struct pool_cache_item *pl) 1975 { 1976 struct pool_cache_item *rpl, *next; 1977 1978 if (pl == NULL) 1979 return (NULL); 1980 1981 rpl = TAILQ_NEXT(pl, ci_nextl); 1982 1983 pl_enter(pp, &pp->pr_lock); 1984 do { 1985 next = pl->ci_next; 1986 pool_do_put(pp, pl); 1987 pl = next; 1988 } while (pl != NULL); 1989 pl_leave(pp, &pp->pr_lock); 1990 1991 return (rpl); 1992 } 1993 1994 void 1995 pool_cache_destroy(struct pool *pp) 1996 { 1997 struct pool_cache *pc; 1998 struct pool_cache_item *pl; 1999 struct cpumem_iter i; 2000 struct cpumem *cm; 2001 2002 rw_enter_write(&pool_lock); /* serialise with the gc */ 2003 cm = pp->pr_cache; 2004 pp->pr_cache = NULL; /* make pool_put avoid the cache */ 2005 rw_exit_write(&pool_lock); 2006 2007 CPUMEM_FOREACH(pc, &i, cm) { 2008 pool_cache_list_put(pp, pc->pc_actv); 2009 pool_cache_list_put(pp, pc->pc_prev); 2010 } 2011 2012 cpumem_put(&pool_caches, cm); 2013 2014 pl = TAILQ_FIRST(&pp->pr_cache_lists); 2015 while (pl != NULL) 2016 pl = pool_cache_list_put(pp, pl); 2017 } 2018 2019 void 2020 pool_cache_gc(struct pool *pp) 2021 { 2022 unsigned int contention, delta; 2023 2024 if (getnsecuptime() - pp->pr_cache_timestamp > POOL_WAIT_GC && 2025 !TAILQ_EMPTY(&pp->pr_cache_lists) && 2026 pl_enter_try(pp, &pp->pr_cache_lock)) { 2027 struct pool_cache_item *pl = NULL; 2028 2029 pl = TAILQ_FIRST(&pp->pr_cache_lists); 2030 if (pl != NULL) { 2031 TAILQ_REMOVE(&pp->pr_cache_lists, pl, ci_nextl); 2032 pp->pr_cache_nitems -= POOL_CACHE_ITEM_NITEMS(pl); 2033 pp->pr_cache_timestamp = getnsecuptime(); 2034 2035 pp->pr_cache_ngc++; 2036 } 2037 2038 pl_leave(pp, &pp->pr_cache_lock); 2039 2040 pool_cache_list_put(pp, pl); 2041 } 2042 2043 /* 2044 * if there's a lot of contention on the pr_cache_mtx then consider 2045 * growing the length of the list to reduce the need to access the 2046 * global pool. 2047 */ 2048 2049 contention = pp->pr_cache_contention; 2050 delta = contention - pp->pr_cache_contention_prev; 2051 if (delta > 8 /* magic */) { 2052 if ((ncpusfound * 8 * 2) <= pp->pr_cache_nitems) 2053 pp->pr_cache_items += 8; 2054 } else if (delta == 0) { 2055 if (pp->pr_cache_items > 8) 2056 pp->pr_cache_items--; 2057 } 2058 pp->pr_cache_contention_prev = contention; 2059 } 2060 2061 void 2062 pool_cache_pool_info(struct pool *pp, struct kinfo_pool *pi) 2063 { 2064 struct pool_cache *pc; 2065 struct cpumem_iter i; 2066 2067 if (pp->pr_cache == NULL) 2068 return; 2069 2070 /* loop through the caches twice to collect stats */ 2071 2072 /* once without the lock so we can yield while reading nget/nput */ 2073 CPUMEM_FOREACH(pc, &i, pp->pr_cache) { 2074 uint64_t gen, nget, nput; 2075 2076 do { 2077 while ((gen = pc->pc_gen) & 1) 2078 yield(); 2079 2080 nget = pc->pc_nget; 2081 nput = pc->pc_nput; 2082 } while (gen != pc->pc_gen); 2083 2084 pi->pr_nget += nget; 2085 pi->pr_nput += nput; 2086 } 2087 2088 /* and once with the mtx so we can get consistent nout values */ 2089 pl_enter(pp, &pp->pr_cache_lock); 2090 CPUMEM_FOREACH(pc, &i, pp->pr_cache) 2091 pi->pr_nout += pc->pc_nout; 2092 2093 pi->pr_nout += pp->pr_cache_nout; 2094 pl_leave(pp, &pp->pr_cache_lock); 2095 } 2096 2097 int 2098 pool_cache_info(struct pool *pp, void *oldp, size_t *oldlenp) 2099 { 2100 struct kinfo_pool_cache kpc; 2101 2102 if (pp->pr_cache == NULL) 2103 return (EOPNOTSUPP); 2104 2105 memset(&kpc, 0, sizeof(kpc)); /* don't leak padding */ 2106 2107 pl_enter(pp, &pp->pr_cache_lock); 2108 kpc.pr_ngc = pp->pr_cache_ngc; 2109 kpc.pr_len = pp->pr_cache_items; 2110 kpc.pr_nitems = pp->pr_cache_nitems; 2111 kpc.pr_contention = pp->pr_cache_contention; 2112 pl_leave(pp, &pp->pr_cache_lock); 2113 2114 return (sysctl_rdstruct(oldp, oldlenp, NULL, &kpc, sizeof(kpc))); 2115 } 2116 2117 int 2118 pool_cache_cpus_info(struct pool *pp, void *oldp, size_t *oldlenp) 2119 { 2120 struct pool_cache *pc; 2121 struct kinfo_pool_cache_cpu *kpcc, *info; 2122 unsigned int cpu = 0; 2123 struct cpumem_iter i; 2124 int error = 0; 2125 size_t len; 2126 2127 if (pp->pr_cache == NULL) 2128 return (EOPNOTSUPP); 2129 if (*oldlenp % sizeof(*kpcc)) 2130 return (EINVAL); 2131 2132 kpcc = mallocarray(ncpusfound, sizeof(*kpcc), M_TEMP, 2133 M_WAITOK|M_CANFAIL|M_ZERO); 2134 if (kpcc == NULL) 2135 return (EIO); 2136 2137 len = ncpusfound * sizeof(*kpcc); 2138 2139 CPUMEM_FOREACH(pc, &i, pp->pr_cache) { 2140 uint64_t gen; 2141 2142 if (cpu >= ncpusfound) { 2143 error = EIO; 2144 goto err; 2145 } 2146 2147 info = &kpcc[cpu]; 2148 info->pr_cpu = cpu; 2149 2150 do { 2151 while ((gen = pc->pc_gen) & 1) 2152 yield(); 2153 2154 info->pr_nget = pc->pc_nget; 2155 info->pr_nfail = pc->pc_nfail; 2156 info->pr_nput = pc->pc_nput; 2157 info->pr_nlget = pc->pc_nlget; 2158 info->pr_nlfail = pc->pc_nlfail; 2159 info->pr_nlput = pc->pc_nlput; 2160 } while (gen != pc->pc_gen); 2161 2162 cpu++; 2163 } 2164 2165 error = sysctl_rdstruct(oldp, oldlenp, NULL, kpcc, len); 2166 err: 2167 free(kpcc, M_TEMP, len); 2168 2169 return (error); 2170 } 2171 #else /* MULTIPROCESSOR */ 2172 void 2173 pool_cache_init(struct pool *pp) 2174 { 2175 /* nop */ 2176 } 2177 2178 void 2179 pool_cache_pool_info(struct pool *pp, struct kinfo_pool *pi) 2180 { 2181 /* nop */ 2182 } 2183 2184 int 2185 pool_cache_info(struct pool *pp, void *oldp, size_t *oldlenp) 2186 { 2187 return (EOPNOTSUPP); 2188 } 2189 2190 int 2191 pool_cache_cpus_info(struct pool *pp, void *oldp, size_t *oldlenp) 2192 { 2193 return (EOPNOTSUPP); 2194 } 2195 #endif /* MULTIPROCESSOR */ 2196 2197 2198 void 2199 pool_lock_mtx_init(struct pool *pp, union pool_lock *lock, 2200 const struct lock_type *type) 2201 { 2202 _mtx_init_flags(&lock->prl_mtx, pp->pr_ipl, pp->pr_wchan, 0, type); 2203 } 2204 2205 void 2206 pool_lock_mtx_enter(union pool_lock *lock) 2207 { 2208 mtx_enter(&lock->prl_mtx); 2209 } 2210 2211 int 2212 pool_lock_mtx_enter_try(union pool_lock *lock) 2213 { 2214 return (mtx_enter_try(&lock->prl_mtx)); 2215 } 2216 2217 void 2218 pool_lock_mtx_leave(union pool_lock *lock) 2219 { 2220 mtx_leave(&lock->prl_mtx); 2221 } 2222 2223 void 2224 pool_lock_mtx_assert_locked(union pool_lock *lock) 2225 { 2226 MUTEX_ASSERT_LOCKED(&lock->prl_mtx); 2227 } 2228 2229 void 2230 pool_lock_mtx_assert_unlocked(union pool_lock *lock) 2231 { 2232 MUTEX_ASSERT_UNLOCKED(&lock->prl_mtx); 2233 } 2234 2235 int 2236 pool_lock_mtx_sleep(void *ident, union pool_lock *lock, int priority, 2237 const char *wmesg) 2238 { 2239 return msleep_nsec(ident, &lock->prl_mtx, priority, wmesg, INFSLP); 2240 } 2241 2242 static const struct pool_lock_ops pool_lock_ops_mtx = { 2243 pool_lock_mtx_init, 2244 pool_lock_mtx_enter, 2245 pool_lock_mtx_enter_try, 2246 pool_lock_mtx_leave, 2247 pool_lock_mtx_assert_locked, 2248 pool_lock_mtx_assert_unlocked, 2249 pool_lock_mtx_sleep, 2250 }; 2251 2252 void 2253 pool_lock_rw_init(struct pool *pp, union pool_lock *lock, 2254 const struct lock_type *type) 2255 { 2256 _rw_init_flags(&lock->prl_rwlock, pp->pr_wchan, 0, type); 2257 } 2258 2259 void 2260 pool_lock_rw_enter(union pool_lock *lock) 2261 { 2262 rw_enter_write(&lock->prl_rwlock); 2263 } 2264 2265 int 2266 pool_lock_rw_enter_try(union pool_lock *lock) 2267 { 2268 return (rw_enter(&lock->prl_rwlock, RW_WRITE | RW_NOSLEEP) == 0); 2269 } 2270 2271 void 2272 pool_lock_rw_leave(union pool_lock *lock) 2273 { 2274 rw_exit_write(&lock->prl_rwlock); 2275 } 2276 2277 void 2278 pool_lock_rw_assert_locked(union pool_lock *lock) 2279 { 2280 rw_assert_wrlock(&lock->prl_rwlock); 2281 } 2282 2283 void 2284 pool_lock_rw_assert_unlocked(union pool_lock *lock) 2285 { 2286 KASSERT(rw_status(&lock->prl_rwlock) != RW_WRITE); 2287 } 2288 2289 int 2290 pool_lock_rw_sleep(void *ident, union pool_lock *lock, int priority, 2291 const char *wmesg) 2292 { 2293 return rwsleep_nsec(ident, &lock->prl_rwlock, priority, wmesg, INFSLP); 2294 } 2295 2296 static const struct pool_lock_ops pool_lock_ops_rw = { 2297 pool_lock_rw_init, 2298 pool_lock_rw_enter, 2299 pool_lock_rw_enter_try, 2300 pool_lock_rw_leave, 2301 pool_lock_rw_assert_locked, 2302 pool_lock_rw_assert_unlocked, 2303 pool_lock_rw_sleep, 2304 }; 2305