1 /* $OpenBSD: subr_pool.c,v 1.234 2021/06/15 05:24:46 dlg Exp $ */ 2 /* $NetBSD: subr_pool.c,v 1.61 2001/09/26 07:14:56 chs Exp $ */ 3 4 /*- 5 * Copyright (c) 1997, 1999, 2000 The NetBSD Foundation, Inc. 6 * All rights reserved. 7 * 8 * This code is derived from software contributed to The NetBSD Foundation 9 * by Paul Kranenburg; by Jason R. Thorpe of the Numerical Aerospace 10 * Simulation Facility, NASA Ames Research Center. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 23 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 24 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 25 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31 * POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include <sys/param.h> 35 #include <sys/systm.h> 36 #include <sys/errno.h> 37 #include <sys/kernel.h> 38 #include <sys/malloc.h> 39 #include <sys/pool.h> 40 #include <sys/proc.h> 41 #include <sys/syslog.h> 42 #include <sys/sysctl.h> 43 #include <sys/task.h> 44 #include <sys/time.h> 45 #include <sys/timeout.h> 46 #include <sys/percpu.h> 47 #include <sys/tracepoint.h> 48 49 #include <uvm/uvm_extern.h> 50 51 /* 52 * Pool resource management utility. 53 * 54 * Memory is allocated in pages which are split into pieces according to 55 * the pool item size. Each page is kept on one of three lists in the 56 * pool structure: `pr_emptypages', `pr_fullpages' and `pr_partpages', 57 * for empty, full and partially-full pages respectively. The individual 58 * pool items are on a linked list headed by `ph_items' in each page 59 * header. The memory for building the page list is either taken from 60 * the allocated pages themselves (for small pool items) or taken from 61 * an internal pool of page headers (`phpool'). 62 */ 63 64 /* List of all pools */ 65 SIMPLEQ_HEAD(,pool) pool_head = SIMPLEQ_HEAD_INITIALIZER(pool_head); 66 67 /* 68 * Every pool gets a unique serial number assigned to it. If this counter 69 * wraps, we're screwed, but we shouldn't create so many pools anyway. 70 */ 71 unsigned int pool_serial; 72 unsigned int pool_count; 73 74 /* Lock the previous variables making up the global pool state */ 75 struct rwlock pool_lock = RWLOCK_INITIALIZER("pools"); 76 77 /* Private pool for page header structures */ 78 struct pool phpool; 79 80 struct pool_lock_ops { 81 void (*pl_init)(struct pool *, union pool_lock *, 82 const struct lock_type *); 83 void (*pl_enter)(union pool_lock *); 84 int (*pl_enter_try)(union pool_lock *); 85 void (*pl_leave)(union pool_lock *); 86 void (*pl_assert_locked)(union pool_lock *); 87 void (*pl_assert_unlocked)(union pool_lock *); 88 int (*pl_sleep)(void *, union pool_lock *, int, const char *); 89 }; 90 91 static const struct pool_lock_ops pool_lock_ops_mtx; 92 static const struct pool_lock_ops pool_lock_ops_rw; 93 94 #ifdef WITNESS 95 #define pl_init(pp, pl) do { \ 96 static const struct lock_type __lock_type = { .lt_name = #pl }; \ 97 (pp)->pr_lock_ops->pl_init(pp, pl, &__lock_type); \ 98 } while (0) 99 #else /* WITNESS */ 100 #define pl_init(pp, pl) (pp)->pr_lock_ops->pl_init(pp, pl, NULL) 101 #endif /* WITNESS */ 102 103 static inline void 104 pl_enter(struct pool *pp, union pool_lock *pl) 105 { 106 pp->pr_lock_ops->pl_enter(pl); 107 } 108 static inline int 109 pl_enter_try(struct pool *pp, union pool_lock *pl) 110 { 111 return pp->pr_lock_ops->pl_enter_try(pl); 112 } 113 static inline void 114 pl_leave(struct pool *pp, union pool_lock *pl) 115 { 116 pp->pr_lock_ops->pl_leave(pl); 117 } 118 static inline void 119 pl_assert_locked(struct pool *pp, union pool_lock *pl) 120 { 121 pp->pr_lock_ops->pl_assert_locked(pl); 122 } 123 static inline void 124 pl_assert_unlocked(struct pool *pp, union pool_lock *pl) 125 { 126 pp->pr_lock_ops->pl_assert_unlocked(pl); 127 } 128 static inline int 129 pl_sleep(struct pool *pp, void *ident, union pool_lock *lock, int priority, 130 const char *wmesg) 131 { 132 return pp->pr_lock_ops->pl_sleep(ident, lock, priority, wmesg); 133 } 134 135 struct pool_item { 136 u_long pi_magic; 137 XSIMPLEQ_ENTRY(pool_item) pi_list; 138 }; 139 #define POOL_IMAGIC(ph, pi) ((u_long)(pi) ^ (ph)->ph_magic) 140 141 struct pool_page_header { 142 /* Page headers */ 143 TAILQ_ENTRY(pool_page_header) 144 ph_entry; /* pool page list */ 145 XSIMPLEQ_HEAD(, pool_item) 146 ph_items; /* free items on the page */ 147 RBT_ENTRY(pool_page_header) 148 ph_node; /* off-page page headers */ 149 unsigned int ph_nmissing; /* # of chunks in use */ 150 caddr_t ph_page; /* this page's address */ 151 caddr_t ph_colored; /* page's colored address */ 152 unsigned long ph_magic; 153 uint64_t ph_timestamp; 154 }; 155 #define POOL_MAGICBIT (1 << 3) /* keep away from perturbed low bits */ 156 #define POOL_PHPOISON(ph) ISSET((ph)->ph_magic, POOL_MAGICBIT) 157 158 #ifdef MULTIPROCESSOR 159 struct pool_cache_item { 160 struct pool_cache_item *ci_next; /* next item in list */ 161 unsigned long ci_nitems; /* number of items in list */ 162 TAILQ_ENTRY(pool_cache_item) 163 ci_nextl; /* entry in list of lists */ 164 }; 165 166 /* we store whether the cached item is poisoned in the high bit of nitems */ 167 #define POOL_CACHE_ITEM_NITEMS_MASK 0x7ffffffUL 168 #define POOL_CACHE_ITEM_NITEMS_POISON 0x8000000UL 169 170 #define POOL_CACHE_ITEM_NITEMS(_ci) \ 171 ((_ci)->ci_nitems & POOL_CACHE_ITEM_NITEMS_MASK) 172 173 #define POOL_CACHE_ITEM_POISONED(_ci) \ 174 ISSET((_ci)->ci_nitems, POOL_CACHE_ITEM_NITEMS_POISON) 175 176 struct pool_cache { 177 struct pool_cache_item *pc_actv; /* active list of items */ 178 unsigned long pc_nactv; /* actv head nitems cache */ 179 struct pool_cache_item *pc_prev; /* previous list of items */ 180 181 uint64_t pc_gen; /* generation number */ 182 uint64_t pc_nget; /* # of successful requests */ 183 uint64_t pc_nfail; /* # of unsuccessful reqs */ 184 uint64_t pc_nput; /* # of releases */ 185 uint64_t pc_nlget; /* # of list requests */ 186 uint64_t pc_nlfail; /* # of fails getting a list */ 187 uint64_t pc_nlput; /* # of list releases */ 188 189 int pc_nout; 190 }; 191 192 void *pool_cache_get(struct pool *); 193 void pool_cache_put(struct pool *, void *); 194 void pool_cache_destroy(struct pool *); 195 void pool_cache_gc(struct pool *); 196 #endif 197 void pool_cache_pool_info(struct pool *, struct kinfo_pool *); 198 int pool_cache_info(struct pool *, void *, size_t *); 199 int pool_cache_cpus_info(struct pool *, void *, size_t *); 200 201 #ifdef POOL_DEBUG 202 int pool_debug = 1; 203 #else 204 int pool_debug = 0; 205 #endif 206 207 #define POOL_INPGHDR(pp) ((pp)->pr_phoffset != 0) 208 209 struct pool_page_header * 210 pool_p_alloc(struct pool *, int, int *); 211 void pool_p_insert(struct pool *, struct pool_page_header *); 212 void pool_p_remove(struct pool *, struct pool_page_header *); 213 void pool_p_free(struct pool *, struct pool_page_header *); 214 215 void pool_update_curpage(struct pool *); 216 void *pool_do_get(struct pool *, int, int *); 217 void pool_do_put(struct pool *, void *); 218 int pool_chk_page(struct pool *, struct pool_page_header *, int); 219 int pool_chk(struct pool *); 220 void pool_get_done(struct pool *, void *, void *); 221 void pool_runqueue(struct pool *, int); 222 223 void *pool_allocator_alloc(struct pool *, int, int *); 224 void pool_allocator_free(struct pool *, void *); 225 226 /* 227 * The default pool allocator. 228 */ 229 void *pool_page_alloc(struct pool *, int, int *); 230 void pool_page_free(struct pool *, void *); 231 232 /* 233 * safe for interrupts; this is the default allocator 234 */ 235 struct pool_allocator pool_allocator_single = { 236 pool_page_alloc, 237 pool_page_free, 238 POOL_ALLOC_SIZE(PAGE_SIZE, POOL_ALLOC_ALIGNED) 239 }; 240 241 void *pool_multi_alloc(struct pool *, int, int *); 242 void pool_multi_free(struct pool *, void *); 243 244 struct pool_allocator pool_allocator_multi = { 245 pool_multi_alloc, 246 pool_multi_free, 247 POOL_ALLOC_SIZES(PAGE_SIZE, (1UL << 31), POOL_ALLOC_ALIGNED) 248 }; 249 250 void *pool_multi_alloc_ni(struct pool *, int, int *); 251 void pool_multi_free_ni(struct pool *, void *); 252 253 struct pool_allocator pool_allocator_multi_ni = { 254 pool_multi_alloc_ni, 255 pool_multi_free_ni, 256 POOL_ALLOC_SIZES(PAGE_SIZE, (1UL << 31), POOL_ALLOC_ALIGNED) 257 }; 258 259 #ifdef DDB 260 void pool_print_pagelist(struct pool_pagelist *, int (*)(const char *, ...) 261 __attribute__((__format__(__kprintf__,1,2)))); 262 void pool_print1(struct pool *, const char *, int (*)(const char *, ...) 263 __attribute__((__format__(__kprintf__,1,2)))); 264 #endif 265 266 /* stale page garbage collectors */ 267 void pool_gc_sched(void *); 268 struct timeout pool_gc_tick = TIMEOUT_INITIALIZER(pool_gc_sched, NULL); 269 void pool_gc_pages(void *); 270 struct task pool_gc_task = TASK_INITIALIZER(pool_gc_pages, NULL); 271 272 #define POOL_WAIT_FREE SEC_TO_NSEC(1) 273 #define POOL_WAIT_GC SEC_TO_NSEC(8) 274 275 RBT_PROTOTYPE(phtree, pool_page_header, ph_node, phtree_compare); 276 277 static inline int 278 phtree_compare(const struct pool_page_header *a, 279 const struct pool_page_header *b) 280 { 281 vaddr_t va = (vaddr_t)a->ph_page; 282 vaddr_t vb = (vaddr_t)b->ph_page; 283 284 /* the compares in this order are important for the NFIND to work */ 285 if (vb < va) 286 return (-1); 287 if (vb > va) 288 return (1); 289 290 return (0); 291 } 292 293 RBT_GENERATE(phtree, pool_page_header, ph_node, phtree_compare); 294 295 /* 296 * Return the pool page header based on page address. 297 */ 298 static inline struct pool_page_header * 299 pr_find_pagehead(struct pool *pp, void *v) 300 { 301 struct pool_page_header *ph, key; 302 303 if (POOL_INPGHDR(pp)) { 304 caddr_t page; 305 306 page = (caddr_t)((vaddr_t)v & pp->pr_pgmask); 307 308 return ((struct pool_page_header *)(page + pp->pr_phoffset)); 309 } 310 311 key.ph_page = v; 312 ph = RBT_NFIND(phtree, &pp->pr_phtree, &key); 313 if (ph == NULL) 314 panic("%s: %s: page header missing", __func__, pp->pr_wchan); 315 316 KASSERT(ph->ph_page <= (caddr_t)v); 317 if (ph->ph_page + pp->pr_pgsize <= (caddr_t)v) 318 panic("%s: %s: incorrect page", __func__, pp->pr_wchan); 319 320 return (ph); 321 } 322 323 /* 324 * Initialize the given pool resource structure. 325 * 326 * We export this routine to allow other kernel parts to declare 327 * static pools that must be initialized before malloc() is available. 328 */ 329 void 330 pool_init(struct pool *pp, size_t size, u_int align, int ipl, int flags, 331 const char *wchan, struct pool_allocator *palloc) 332 { 333 int off = 0, space; 334 unsigned int pgsize = PAGE_SIZE, items; 335 size_t pa_pagesz; 336 #ifdef DIAGNOSTIC 337 struct pool *iter; 338 #endif 339 340 if (align == 0) 341 align = ALIGN(1); 342 343 if (size < sizeof(struct pool_item)) 344 size = sizeof(struct pool_item); 345 346 size = roundup(size, align); 347 348 while (size * 8 > pgsize) 349 pgsize <<= 1; 350 351 if (palloc == NULL) { 352 if (pgsize > PAGE_SIZE) { 353 palloc = ISSET(flags, PR_WAITOK) ? 354 &pool_allocator_multi_ni : &pool_allocator_multi; 355 } else 356 palloc = &pool_allocator_single; 357 358 pa_pagesz = palloc->pa_pagesz; 359 } else { 360 size_t pgsizes; 361 362 pa_pagesz = palloc->pa_pagesz; 363 if (pa_pagesz == 0) 364 pa_pagesz = POOL_ALLOC_DEFAULT; 365 366 pgsizes = pa_pagesz & ~POOL_ALLOC_ALIGNED; 367 368 /* make sure the allocator can fit at least one item */ 369 if (size > pgsizes) { 370 panic("%s: pool %s item size 0x%zx > " 371 "allocator %p sizes 0x%zx", __func__, wchan, 372 size, palloc, pgsizes); 373 } 374 375 /* shrink pgsize until it fits into the range */ 376 while (!ISSET(pgsizes, pgsize)) 377 pgsize >>= 1; 378 } 379 KASSERT(ISSET(pa_pagesz, pgsize)); 380 381 items = pgsize / size; 382 383 /* 384 * Decide whether to put the page header off page to avoid 385 * wasting too large a part of the page. Off-page page headers 386 * go into an RB tree, so we can match a returned item with 387 * its header based on the page address. 388 */ 389 if (ISSET(pa_pagesz, POOL_ALLOC_ALIGNED)) { 390 if (pgsize - (size * items) > 391 sizeof(struct pool_page_header)) { 392 off = pgsize - sizeof(struct pool_page_header); 393 } else if (sizeof(struct pool_page_header) * 2 >= size) { 394 off = pgsize - sizeof(struct pool_page_header); 395 items = off / size; 396 } 397 } 398 399 KASSERT(items > 0); 400 401 /* 402 * Initialize the pool structure. 403 */ 404 memset(pp, 0, sizeof(*pp)); 405 if (ISSET(flags, PR_RWLOCK)) { 406 KASSERT(flags & PR_WAITOK); 407 pp->pr_lock_ops = &pool_lock_ops_rw; 408 } else 409 pp->pr_lock_ops = &pool_lock_ops_mtx; 410 TAILQ_INIT(&pp->pr_emptypages); 411 TAILQ_INIT(&pp->pr_fullpages); 412 TAILQ_INIT(&pp->pr_partpages); 413 pp->pr_curpage = NULL; 414 pp->pr_npages = 0; 415 pp->pr_minitems = 0; 416 pp->pr_minpages = 0; 417 pp->pr_maxpages = 8; 418 pp->pr_size = size; 419 pp->pr_pgsize = pgsize; 420 pp->pr_pgmask = ~0UL ^ (pgsize - 1); 421 pp->pr_phoffset = off; 422 pp->pr_itemsperpage = items; 423 pp->pr_wchan = wchan; 424 pp->pr_alloc = palloc; 425 pp->pr_nitems = 0; 426 pp->pr_nout = 0; 427 pp->pr_hardlimit = UINT_MAX; 428 pp->pr_hardlimit_warning = NULL; 429 pp->pr_hardlimit_ratecap.tv_sec = 0; 430 pp->pr_hardlimit_ratecap.tv_usec = 0; 431 pp->pr_hardlimit_warning_last.tv_sec = 0; 432 pp->pr_hardlimit_warning_last.tv_usec = 0; 433 RBT_INIT(phtree, &pp->pr_phtree); 434 435 /* 436 * Use the space between the chunks and the page header 437 * for cache coloring. 438 */ 439 space = POOL_INPGHDR(pp) ? pp->pr_phoffset : pp->pr_pgsize; 440 space -= pp->pr_itemsperpage * pp->pr_size; 441 pp->pr_align = align; 442 pp->pr_maxcolors = (space / align) + 1; 443 444 pp->pr_nget = 0; 445 pp->pr_nfail = 0; 446 pp->pr_nput = 0; 447 pp->pr_npagealloc = 0; 448 pp->pr_npagefree = 0; 449 pp->pr_hiwat = 0; 450 pp->pr_nidle = 0; 451 452 pp->pr_ipl = ipl; 453 pp->pr_flags = flags; 454 455 pl_init(pp, &pp->pr_lock); 456 pl_init(pp, &pp->pr_requests_lock); 457 TAILQ_INIT(&pp->pr_requests); 458 459 if (phpool.pr_size == 0) { 460 pool_init(&phpool, sizeof(struct pool_page_header), 0, 461 IPL_HIGH, 0, "phpool", NULL); 462 463 /* make sure phpool won't "recurse" */ 464 KASSERT(POOL_INPGHDR(&phpool)); 465 } 466 467 /* pglistalloc/constraint parameters */ 468 pp->pr_crange = &kp_dirty; 469 470 /* Insert this into the list of all pools. */ 471 rw_enter_write(&pool_lock); 472 #ifdef DIAGNOSTIC 473 SIMPLEQ_FOREACH(iter, &pool_head, pr_poollist) { 474 if (iter == pp) 475 panic("%s: pool %s already on list", __func__, wchan); 476 } 477 #endif 478 479 pp->pr_serial = ++pool_serial; 480 if (pool_serial == 0) 481 panic("%s: too much uptime", __func__); 482 483 SIMPLEQ_INSERT_HEAD(&pool_head, pp, pr_poollist); 484 pool_count++; 485 rw_exit_write(&pool_lock); 486 } 487 488 /* 489 * Decommission a pool resource. 490 */ 491 void 492 pool_destroy(struct pool *pp) 493 { 494 struct pool_page_header *ph; 495 struct pool *prev, *iter; 496 497 #ifdef MULTIPROCESSOR 498 if (pp->pr_cache != NULL) 499 pool_cache_destroy(pp); 500 #endif 501 502 #ifdef DIAGNOSTIC 503 if (pp->pr_nout != 0) 504 panic("%s: pool busy: still out: %u", __func__, pp->pr_nout); 505 #endif 506 507 /* Remove from global pool list */ 508 rw_enter_write(&pool_lock); 509 pool_count--; 510 if (pp == SIMPLEQ_FIRST(&pool_head)) 511 SIMPLEQ_REMOVE_HEAD(&pool_head, pr_poollist); 512 else { 513 prev = SIMPLEQ_FIRST(&pool_head); 514 SIMPLEQ_FOREACH(iter, &pool_head, pr_poollist) { 515 if (iter == pp) { 516 SIMPLEQ_REMOVE_AFTER(&pool_head, prev, 517 pr_poollist); 518 break; 519 } 520 prev = iter; 521 } 522 } 523 rw_exit_write(&pool_lock); 524 525 /* Remove all pages */ 526 while ((ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL) { 527 pl_enter(pp, &pp->pr_lock); 528 pool_p_remove(pp, ph); 529 pl_leave(pp, &pp->pr_lock); 530 pool_p_free(pp, ph); 531 } 532 KASSERT(TAILQ_EMPTY(&pp->pr_fullpages)); 533 KASSERT(TAILQ_EMPTY(&pp->pr_partpages)); 534 } 535 536 void 537 pool_request_init(struct pool_request *pr, 538 void (*handler)(struct pool *, void *, void *), void *cookie) 539 { 540 pr->pr_handler = handler; 541 pr->pr_cookie = cookie; 542 pr->pr_item = NULL; 543 } 544 545 void 546 pool_request(struct pool *pp, struct pool_request *pr) 547 { 548 pl_enter(pp, &pp->pr_requests_lock); 549 TAILQ_INSERT_TAIL(&pp->pr_requests, pr, pr_entry); 550 pool_runqueue(pp, PR_NOWAIT); 551 pl_leave(pp, &pp->pr_requests_lock); 552 } 553 554 struct pool_get_memory { 555 union pool_lock lock; 556 void * volatile v; 557 }; 558 559 /* 560 * Grab an item from the pool. 561 */ 562 void * 563 pool_get(struct pool *pp, int flags) 564 { 565 void *v = NULL; 566 int slowdown = 0; 567 568 KASSERT(flags & (PR_WAITOK | PR_NOWAIT)); 569 if (pp->pr_flags & PR_RWLOCK) 570 KASSERT(flags & PR_WAITOK); 571 572 #ifdef MULTIPROCESSOR 573 if (pp->pr_cache != NULL) { 574 v = pool_cache_get(pp); 575 if (v != NULL) 576 goto good; 577 } 578 #endif 579 580 pl_enter(pp, &pp->pr_lock); 581 if (pp->pr_nout >= pp->pr_hardlimit) { 582 if (ISSET(flags, PR_NOWAIT|PR_LIMITFAIL)) 583 goto fail; 584 } else if ((v = pool_do_get(pp, flags, &slowdown)) == NULL) { 585 if (ISSET(flags, PR_NOWAIT)) 586 goto fail; 587 } 588 pl_leave(pp, &pp->pr_lock); 589 590 if ((slowdown || pool_debug == 2) && ISSET(flags, PR_WAITOK)) 591 yield(); 592 593 if (v == NULL) { 594 struct pool_get_memory mem = { .v = NULL }; 595 struct pool_request pr; 596 597 #ifdef DIAGNOSTIC 598 if (ISSET(flags, PR_WAITOK) && curproc == &proc0) 599 panic("%s: cannot sleep for memory during boot", 600 __func__); 601 #endif 602 pl_init(pp, &mem.lock); 603 pool_request_init(&pr, pool_get_done, &mem); 604 pool_request(pp, &pr); 605 606 pl_enter(pp, &mem.lock); 607 while (mem.v == NULL) 608 pl_sleep(pp, &mem, &mem.lock, PSWP, pp->pr_wchan); 609 pl_leave(pp, &mem.lock); 610 611 v = mem.v; 612 } 613 614 #ifdef MULTIPROCESSOR 615 good: 616 #endif 617 if (ISSET(flags, PR_ZERO)) 618 memset(v, 0, pp->pr_size); 619 620 TRACEPOINT(uvm, pool_get, pp, v, flags); 621 622 return (v); 623 624 fail: 625 pp->pr_nfail++; 626 pl_leave(pp, &pp->pr_lock); 627 return (NULL); 628 } 629 630 void 631 pool_get_done(struct pool *pp, void *xmem, void *v) 632 { 633 struct pool_get_memory *mem = xmem; 634 635 pl_enter(pp, &mem->lock); 636 mem->v = v; 637 pl_leave(pp, &mem->lock); 638 639 wakeup_one(mem); 640 } 641 642 void 643 pool_runqueue(struct pool *pp, int flags) 644 { 645 struct pool_requests prl = TAILQ_HEAD_INITIALIZER(prl); 646 struct pool_request *pr; 647 648 pl_assert_unlocked(pp, &pp->pr_lock); 649 pl_assert_locked(pp, &pp->pr_requests_lock); 650 651 if (pp->pr_requesting++) 652 return; 653 654 do { 655 pp->pr_requesting = 1; 656 657 TAILQ_CONCAT(&prl, &pp->pr_requests, pr_entry); 658 if (TAILQ_EMPTY(&prl)) 659 continue; 660 661 pl_leave(pp, &pp->pr_requests_lock); 662 663 pl_enter(pp, &pp->pr_lock); 664 pr = TAILQ_FIRST(&prl); 665 while (pr != NULL) { 666 int slowdown = 0; 667 668 if (pp->pr_nout >= pp->pr_hardlimit) 669 break; 670 671 pr->pr_item = pool_do_get(pp, flags, &slowdown); 672 if (pr->pr_item == NULL) /* || slowdown ? */ 673 break; 674 675 pr = TAILQ_NEXT(pr, pr_entry); 676 } 677 pl_leave(pp, &pp->pr_lock); 678 679 while ((pr = TAILQ_FIRST(&prl)) != NULL && 680 pr->pr_item != NULL) { 681 TAILQ_REMOVE(&prl, pr, pr_entry); 682 (*pr->pr_handler)(pp, pr->pr_cookie, pr->pr_item); 683 } 684 685 pl_enter(pp, &pp->pr_requests_lock); 686 } while (--pp->pr_requesting); 687 688 TAILQ_CONCAT(&pp->pr_requests, &prl, pr_entry); 689 } 690 691 void * 692 pool_do_get(struct pool *pp, int flags, int *slowdown) 693 { 694 struct pool_item *pi; 695 struct pool_page_header *ph; 696 697 pl_assert_locked(pp, &pp->pr_lock); 698 699 splassert(pp->pr_ipl); 700 701 /* 702 * Account for this item now to avoid races if we need to give up 703 * pr_lock to allocate a page. 704 */ 705 pp->pr_nout++; 706 707 if (pp->pr_curpage == NULL) { 708 pl_leave(pp, &pp->pr_lock); 709 ph = pool_p_alloc(pp, flags, slowdown); 710 pl_enter(pp, &pp->pr_lock); 711 712 if (ph == NULL) { 713 pp->pr_nout--; 714 return (NULL); 715 } 716 717 pool_p_insert(pp, ph); 718 } 719 720 ph = pp->pr_curpage; 721 pi = XSIMPLEQ_FIRST(&ph->ph_items); 722 if (__predict_false(pi == NULL)) 723 panic("%s: %s: page empty", __func__, pp->pr_wchan); 724 725 if (__predict_false(pi->pi_magic != POOL_IMAGIC(ph, pi))) { 726 panic("%s: %s free list modified: " 727 "page %p; item addr %p; offset 0x%x=0x%lx != 0x%lx", 728 __func__, pp->pr_wchan, ph->ph_page, pi, 729 0, pi->pi_magic, POOL_IMAGIC(ph, pi)); 730 } 731 732 XSIMPLEQ_REMOVE_HEAD(&ph->ph_items, pi_list); 733 734 #ifdef DIAGNOSTIC 735 if (pool_debug && POOL_PHPOISON(ph)) { 736 size_t pidx; 737 uint32_t pval; 738 if (poison_check(pi + 1, pp->pr_size - sizeof(*pi), 739 &pidx, &pval)) { 740 int *ip = (int *)(pi + 1); 741 panic("%s: %s free list modified: " 742 "page %p; item addr %p; offset 0x%zx=0x%x", 743 __func__, pp->pr_wchan, ph->ph_page, pi, 744 (pidx * sizeof(int)) + sizeof(*pi), ip[pidx]); 745 } 746 } 747 #endif /* DIAGNOSTIC */ 748 749 if (ph->ph_nmissing++ == 0) { 750 /* 751 * This page was previously empty. Move it to the list of 752 * partially-full pages. This page is already curpage. 753 */ 754 TAILQ_REMOVE(&pp->pr_emptypages, ph, ph_entry); 755 TAILQ_INSERT_TAIL(&pp->pr_partpages, ph, ph_entry); 756 757 pp->pr_nidle--; 758 } 759 760 if (ph->ph_nmissing == pp->pr_itemsperpage) { 761 /* 762 * This page is now full. Move it to the full list 763 * and select a new current page. 764 */ 765 TAILQ_REMOVE(&pp->pr_partpages, ph, ph_entry); 766 TAILQ_INSERT_TAIL(&pp->pr_fullpages, ph, ph_entry); 767 pool_update_curpage(pp); 768 } 769 770 pp->pr_nget++; 771 772 return (pi); 773 } 774 775 /* 776 * Return resource to the pool. 777 */ 778 void 779 pool_put(struct pool *pp, void *v) 780 { 781 struct pool_page_header *ph, *freeph = NULL; 782 783 #ifdef DIAGNOSTIC 784 if (v == NULL) 785 panic("%s: NULL item", __func__); 786 #endif 787 788 TRACEPOINT(uvm, pool_put, pp, v); 789 790 #ifdef MULTIPROCESSOR 791 if (pp->pr_cache != NULL && TAILQ_EMPTY(&pp->pr_requests)) { 792 pool_cache_put(pp, v); 793 return; 794 } 795 #endif 796 797 pl_enter(pp, &pp->pr_lock); 798 799 pool_do_put(pp, v); 800 801 pp->pr_nout--; 802 pp->pr_nput++; 803 804 /* is it time to free a page? */ 805 if (pp->pr_nidle > pp->pr_maxpages && 806 (ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL && 807 getnsecuptime() - ph->ph_timestamp > POOL_WAIT_FREE) { 808 freeph = ph; 809 pool_p_remove(pp, freeph); 810 } 811 812 pl_leave(pp, &pp->pr_lock); 813 814 if (freeph != NULL) 815 pool_p_free(pp, freeph); 816 817 pool_wakeup(pp); 818 } 819 820 void 821 pool_wakeup(struct pool *pp) 822 { 823 if (!TAILQ_EMPTY(&pp->pr_requests)) { 824 pl_enter(pp, &pp->pr_requests_lock); 825 pool_runqueue(pp, PR_NOWAIT); 826 pl_leave(pp, &pp->pr_requests_lock); 827 } 828 } 829 830 void 831 pool_do_put(struct pool *pp, void *v) 832 { 833 struct pool_item *pi = v; 834 struct pool_page_header *ph; 835 836 splassert(pp->pr_ipl); 837 838 ph = pr_find_pagehead(pp, v); 839 840 #ifdef DIAGNOSTIC 841 if (pool_debug) { 842 struct pool_item *qi; 843 XSIMPLEQ_FOREACH(qi, &ph->ph_items, pi_list) { 844 if (pi == qi) { 845 panic("%s: %s: double pool_put: %p", __func__, 846 pp->pr_wchan, pi); 847 } 848 } 849 } 850 #endif /* DIAGNOSTIC */ 851 852 pi->pi_magic = POOL_IMAGIC(ph, pi); 853 XSIMPLEQ_INSERT_HEAD(&ph->ph_items, pi, pi_list); 854 #ifdef DIAGNOSTIC 855 if (POOL_PHPOISON(ph)) 856 poison_mem(pi + 1, pp->pr_size - sizeof(*pi)); 857 #endif /* DIAGNOSTIC */ 858 859 if (ph->ph_nmissing-- == pp->pr_itemsperpage) { 860 /* 861 * The page was previously completely full, move it to the 862 * partially-full list. 863 */ 864 TAILQ_REMOVE(&pp->pr_fullpages, ph, ph_entry); 865 TAILQ_INSERT_TAIL(&pp->pr_partpages, ph, ph_entry); 866 } 867 868 if (ph->ph_nmissing == 0) { 869 /* 870 * The page is now empty, so move it to the empty page list. 871 */ 872 pp->pr_nidle++; 873 874 ph->ph_timestamp = getnsecuptime(); 875 TAILQ_REMOVE(&pp->pr_partpages, ph, ph_entry); 876 TAILQ_INSERT_TAIL(&pp->pr_emptypages, ph, ph_entry); 877 pool_update_curpage(pp); 878 } 879 } 880 881 /* 882 * Add N items to the pool. 883 */ 884 int 885 pool_prime(struct pool *pp, int n) 886 { 887 struct pool_pagelist pl = TAILQ_HEAD_INITIALIZER(pl); 888 struct pool_page_header *ph; 889 int newpages; 890 891 newpages = roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 892 893 while (newpages-- > 0) { 894 int slowdown = 0; 895 896 ph = pool_p_alloc(pp, PR_NOWAIT, &slowdown); 897 if (ph == NULL) /* or slowdown? */ 898 break; 899 900 TAILQ_INSERT_TAIL(&pl, ph, ph_entry); 901 } 902 903 pl_enter(pp, &pp->pr_lock); 904 while ((ph = TAILQ_FIRST(&pl)) != NULL) { 905 TAILQ_REMOVE(&pl, ph, ph_entry); 906 pool_p_insert(pp, ph); 907 } 908 pl_leave(pp, &pp->pr_lock); 909 910 return (0); 911 } 912 913 struct pool_page_header * 914 pool_p_alloc(struct pool *pp, int flags, int *slowdown) 915 { 916 struct pool_page_header *ph; 917 struct pool_item *pi; 918 caddr_t addr; 919 unsigned int order; 920 int o; 921 int n; 922 923 pl_assert_unlocked(pp, &pp->pr_lock); 924 KASSERT(pp->pr_size >= sizeof(*pi)); 925 926 addr = pool_allocator_alloc(pp, flags, slowdown); 927 if (addr == NULL) 928 return (NULL); 929 930 if (POOL_INPGHDR(pp)) 931 ph = (struct pool_page_header *)(addr + pp->pr_phoffset); 932 else { 933 ph = pool_get(&phpool, flags); 934 if (ph == NULL) { 935 pool_allocator_free(pp, addr); 936 return (NULL); 937 } 938 } 939 940 XSIMPLEQ_INIT(&ph->ph_items); 941 ph->ph_page = addr; 942 addr += pp->pr_align * (pp->pr_npagealloc % pp->pr_maxcolors); 943 ph->ph_colored = addr; 944 ph->ph_nmissing = 0; 945 arc4random_buf(&ph->ph_magic, sizeof(ph->ph_magic)); 946 #ifdef DIAGNOSTIC 947 /* use a bit in ph_magic to record if we poison page items */ 948 if (pool_debug) 949 SET(ph->ph_magic, POOL_MAGICBIT); 950 else 951 CLR(ph->ph_magic, POOL_MAGICBIT); 952 #endif /* DIAGNOSTIC */ 953 954 n = pp->pr_itemsperpage; 955 o = 32; 956 while (n--) { 957 pi = (struct pool_item *)addr; 958 pi->pi_magic = POOL_IMAGIC(ph, pi); 959 960 if (o == 32) { 961 order = arc4random(); 962 o = 0; 963 } 964 if (ISSET(order, 1 << o++)) 965 XSIMPLEQ_INSERT_TAIL(&ph->ph_items, pi, pi_list); 966 else 967 XSIMPLEQ_INSERT_HEAD(&ph->ph_items, pi, pi_list); 968 969 #ifdef DIAGNOSTIC 970 if (POOL_PHPOISON(ph)) 971 poison_mem(pi + 1, pp->pr_size - sizeof(*pi)); 972 #endif /* DIAGNOSTIC */ 973 974 addr += pp->pr_size; 975 } 976 977 return (ph); 978 } 979 980 void 981 pool_p_free(struct pool *pp, struct pool_page_header *ph) 982 { 983 struct pool_item *pi; 984 985 pl_assert_unlocked(pp, &pp->pr_lock); 986 KASSERT(ph->ph_nmissing == 0); 987 988 XSIMPLEQ_FOREACH(pi, &ph->ph_items, pi_list) { 989 if (__predict_false(pi->pi_magic != POOL_IMAGIC(ph, pi))) { 990 panic("%s: %s free list modified: " 991 "page %p; item addr %p; offset 0x%x=0x%lx", 992 __func__, pp->pr_wchan, ph->ph_page, pi, 993 0, pi->pi_magic); 994 } 995 996 #ifdef DIAGNOSTIC 997 if (POOL_PHPOISON(ph)) { 998 size_t pidx; 999 uint32_t pval; 1000 if (poison_check(pi + 1, pp->pr_size - sizeof(*pi), 1001 &pidx, &pval)) { 1002 int *ip = (int *)(pi + 1); 1003 panic("%s: %s free list modified: " 1004 "page %p; item addr %p; offset 0x%zx=0x%x", 1005 __func__, pp->pr_wchan, ph->ph_page, pi, 1006 pidx * sizeof(int), ip[pidx]); 1007 } 1008 } 1009 #endif 1010 } 1011 1012 pool_allocator_free(pp, ph->ph_page); 1013 1014 if (!POOL_INPGHDR(pp)) 1015 pool_put(&phpool, ph); 1016 } 1017 1018 void 1019 pool_p_insert(struct pool *pp, struct pool_page_header *ph) 1020 { 1021 pl_assert_locked(pp, &pp->pr_lock); 1022 1023 /* If the pool was depleted, point at the new page */ 1024 if (pp->pr_curpage == NULL) 1025 pp->pr_curpage = ph; 1026 1027 TAILQ_INSERT_TAIL(&pp->pr_emptypages, ph, ph_entry); 1028 if (!POOL_INPGHDR(pp)) 1029 RBT_INSERT(phtree, &pp->pr_phtree, ph); 1030 1031 pp->pr_nitems += pp->pr_itemsperpage; 1032 pp->pr_nidle++; 1033 1034 pp->pr_npagealloc++; 1035 if (++pp->pr_npages > pp->pr_hiwat) 1036 pp->pr_hiwat = pp->pr_npages; 1037 } 1038 1039 void 1040 pool_p_remove(struct pool *pp, struct pool_page_header *ph) 1041 { 1042 pl_assert_locked(pp, &pp->pr_lock); 1043 1044 pp->pr_npagefree++; 1045 pp->pr_npages--; 1046 pp->pr_nidle--; 1047 pp->pr_nitems -= pp->pr_itemsperpage; 1048 1049 if (!POOL_INPGHDR(pp)) 1050 RBT_REMOVE(phtree, &pp->pr_phtree, ph); 1051 TAILQ_REMOVE(&pp->pr_emptypages, ph, ph_entry); 1052 1053 pool_update_curpage(pp); 1054 } 1055 1056 void 1057 pool_update_curpage(struct pool *pp) 1058 { 1059 pp->pr_curpage = TAILQ_LAST(&pp->pr_partpages, pool_pagelist); 1060 if (pp->pr_curpage == NULL) { 1061 pp->pr_curpage = TAILQ_LAST(&pp->pr_emptypages, pool_pagelist); 1062 } 1063 } 1064 1065 void 1066 pool_setlowat(struct pool *pp, int n) 1067 { 1068 int prime = 0; 1069 1070 pl_enter(pp, &pp->pr_lock); 1071 pp->pr_minitems = n; 1072 pp->pr_minpages = (n == 0) 1073 ? 0 1074 : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 1075 1076 if (pp->pr_nitems < n) 1077 prime = n - pp->pr_nitems; 1078 pl_leave(pp, &pp->pr_lock); 1079 1080 if (prime > 0) 1081 pool_prime(pp, prime); 1082 } 1083 1084 void 1085 pool_sethiwat(struct pool *pp, int n) 1086 { 1087 pp->pr_maxpages = (n == 0) 1088 ? 0 1089 : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 1090 } 1091 1092 int 1093 pool_sethardlimit(struct pool *pp, u_int n, const char *warnmsg, int ratecap) 1094 { 1095 int error = 0; 1096 1097 if (n < pp->pr_nout) { 1098 error = EINVAL; 1099 goto done; 1100 } 1101 1102 pp->pr_hardlimit = n; 1103 pp->pr_hardlimit_warning = warnmsg; 1104 pp->pr_hardlimit_ratecap.tv_sec = ratecap; 1105 pp->pr_hardlimit_warning_last.tv_sec = 0; 1106 pp->pr_hardlimit_warning_last.tv_usec = 0; 1107 1108 done: 1109 return (error); 1110 } 1111 1112 void 1113 pool_set_constraints(struct pool *pp, const struct kmem_pa_mode *mode) 1114 { 1115 pp->pr_crange = mode; 1116 } 1117 1118 /* 1119 * Release all complete pages that have not been used recently. 1120 * 1121 * Returns non-zero if any pages have been reclaimed. 1122 */ 1123 int 1124 pool_reclaim(struct pool *pp) 1125 { 1126 struct pool_page_header *ph, *phnext; 1127 struct pool_pagelist pl = TAILQ_HEAD_INITIALIZER(pl); 1128 1129 pl_enter(pp, &pp->pr_lock); 1130 for (ph = TAILQ_FIRST(&pp->pr_emptypages); ph != NULL; ph = phnext) { 1131 phnext = TAILQ_NEXT(ph, ph_entry); 1132 1133 /* Check our minimum page claim */ 1134 if (pp->pr_npages <= pp->pr_minpages) 1135 break; 1136 1137 /* 1138 * If freeing this page would put us below 1139 * the low water mark, stop now. 1140 */ 1141 if ((pp->pr_nitems - pp->pr_itemsperpage) < 1142 pp->pr_minitems) 1143 break; 1144 1145 pool_p_remove(pp, ph); 1146 TAILQ_INSERT_TAIL(&pl, ph, ph_entry); 1147 } 1148 pl_leave(pp, &pp->pr_lock); 1149 1150 if (TAILQ_EMPTY(&pl)) 1151 return (0); 1152 1153 while ((ph = TAILQ_FIRST(&pl)) != NULL) { 1154 TAILQ_REMOVE(&pl, ph, ph_entry); 1155 pool_p_free(pp, ph); 1156 } 1157 1158 return (1); 1159 } 1160 1161 /* 1162 * Release all complete pages that have not been used recently 1163 * from all pools. 1164 */ 1165 void 1166 pool_reclaim_all(void) 1167 { 1168 struct pool *pp; 1169 1170 rw_enter_read(&pool_lock); 1171 SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist) 1172 pool_reclaim(pp); 1173 rw_exit_read(&pool_lock); 1174 } 1175 1176 #ifdef DDB 1177 #include <machine/db_machdep.h> 1178 #include <ddb/db_output.h> 1179 1180 /* 1181 * Diagnostic helpers. 1182 */ 1183 void 1184 pool_printit(struct pool *pp, const char *modif, 1185 int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2)))) 1186 { 1187 pool_print1(pp, modif, pr); 1188 } 1189 1190 void 1191 pool_print_pagelist(struct pool_pagelist *pl, 1192 int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2)))) 1193 { 1194 struct pool_page_header *ph; 1195 struct pool_item *pi; 1196 1197 TAILQ_FOREACH(ph, pl, ph_entry) { 1198 (*pr)("\t\tpage %p, color %p, nmissing %d\n", 1199 ph->ph_page, ph->ph_colored, ph->ph_nmissing); 1200 XSIMPLEQ_FOREACH(pi, &ph->ph_items, pi_list) { 1201 if (pi->pi_magic != POOL_IMAGIC(ph, pi)) { 1202 (*pr)("\t\t\titem %p, magic 0x%lx\n", 1203 pi, pi->pi_magic); 1204 } 1205 } 1206 } 1207 } 1208 1209 void 1210 pool_print1(struct pool *pp, const char *modif, 1211 int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2)))) 1212 { 1213 struct pool_page_header *ph; 1214 int print_pagelist = 0; 1215 char c; 1216 1217 while ((c = *modif++) != '\0') { 1218 if (c == 'p') 1219 print_pagelist = 1; 1220 modif++; 1221 } 1222 1223 (*pr)("POOL %s: size %u maxcolors %u\n", pp->pr_wchan, pp->pr_size, 1224 pp->pr_maxcolors); 1225 (*pr)("\talloc %p\n", pp->pr_alloc); 1226 (*pr)("\tminitems %u, minpages %u, maxpages %u, npages %u\n", 1227 pp->pr_minitems, pp->pr_minpages, pp->pr_maxpages, pp->pr_npages); 1228 (*pr)("\titemsperpage %u, nitems %u, nout %u, hardlimit %u\n", 1229 pp->pr_itemsperpage, pp->pr_nitems, pp->pr_nout, pp->pr_hardlimit); 1230 1231 (*pr)("\n\tnget %lu, nfail %lu, nput %lu\n", 1232 pp->pr_nget, pp->pr_nfail, pp->pr_nput); 1233 (*pr)("\tnpagealloc %lu, npagefree %lu, hiwat %u, nidle %lu\n", 1234 pp->pr_npagealloc, pp->pr_npagefree, pp->pr_hiwat, pp->pr_nidle); 1235 1236 if (print_pagelist == 0) 1237 return; 1238 1239 if ((ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL) 1240 (*pr)("\n\tempty page list:\n"); 1241 pool_print_pagelist(&pp->pr_emptypages, pr); 1242 if ((ph = TAILQ_FIRST(&pp->pr_fullpages)) != NULL) 1243 (*pr)("\n\tfull page list:\n"); 1244 pool_print_pagelist(&pp->pr_fullpages, pr); 1245 if ((ph = TAILQ_FIRST(&pp->pr_partpages)) != NULL) 1246 (*pr)("\n\tpartial-page list:\n"); 1247 pool_print_pagelist(&pp->pr_partpages, pr); 1248 1249 if (pp->pr_curpage == NULL) 1250 (*pr)("\tno current page\n"); 1251 else 1252 (*pr)("\tcurpage %p\n", pp->pr_curpage->ph_page); 1253 } 1254 1255 void 1256 db_show_all_pools(db_expr_t expr, int haddr, db_expr_t count, char *modif) 1257 { 1258 struct pool *pp; 1259 char maxp[16]; 1260 int ovflw; 1261 char mode; 1262 1263 mode = modif[0]; 1264 if (mode != '\0' && mode != 'a') { 1265 db_printf("usage: show all pools [/a]\n"); 1266 return; 1267 } 1268 1269 if (mode == '\0') 1270 db_printf("%-10s%4s%9s%5s%9s%6s%6s%6s%6s%6s%6s%5s\n", 1271 "Name", 1272 "Size", 1273 "Requests", 1274 "Fail", 1275 "Releases", 1276 "Pgreq", 1277 "Pgrel", 1278 "Npage", 1279 "Hiwat", 1280 "Minpg", 1281 "Maxpg", 1282 "Idle"); 1283 else 1284 db_printf("%-12s %18s %18s\n", 1285 "Name", "Address", "Allocator"); 1286 1287 SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist) { 1288 if (mode == 'a') { 1289 db_printf("%-12s %18p %18p\n", pp->pr_wchan, pp, 1290 pp->pr_alloc); 1291 continue; 1292 } 1293 1294 if (!pp->pr_nget) 1295 continue; 1296 1297 if (pp->pr_maxpages == UINT_MAX) 1298 snprintf(maxp, sizeof maxp, "inf"); 1299 else 1300 snprintf(maxp, sizeof maxp, "%u", pp->pr_maxpages); 1301 1302 #define PRWORD(ovflw, fmt, width, fixed, val) do { \ 1303 (ovflw) += db_printf((fmt), \ 1304 (width) - (fixed) - (ovflw) > 0 ? \ 1305 (width) - (fixed) - (ovflw) : 0, \ 1306 (val)) - (width); \ 1307 if ((ovflw) < 0) \ 1308 (ovflw) = 0; \ 1309 } while (/* CONSTCOND */0) 1310 1311 ovflw = 0; 1312 PRWORD(ovflw, "%-*s", 10, 0, pp->pr_wchan); 1313 PRWORD(ovflw, " %*u", 4, 1, pp->pr_size); 1314 PRWORD(ovflw, " %*lu", 9, 1, pp->pr_nget); 1315 PRWORD(ovflw, " %*lu", 5, 1, pp->pr_nfail); 1316 PRWORD(ovflw, " %*lu", 9, 1, pp->pr_nput); 1317 PRWORD(ovflw, " %*lu", 6, 1, pp->pr_npagealloc); 1318 PRWORD(ovflw, " %*lu", 6, 1, pp->pr_npagefree); 1319 PRWORD(ovflw, " %*d", 6, 1, pp->pr_npages); 1320 PRWORD(ovflw, " %*d", 6, 1, pp->pr_hiwat); 1321 PRWORD(ovflw, " %*d", 6, 1, pp->pr_minpages); 1322 PRWORD(ovflw, " %*s", 6, 1, maxp); 1323 PRWORD(ovflw, " %*lu\n", 5, 1, pp->pr_nidle); 1324 1325 pool_chk(pp); 1326 } 1327 } 1328 #endif /* DDB */ 1329 1330 #if defined(POOL_DEBUG) || defined(DDB) 1331 int 1332 pool_chk_page(struct pool *pp, struct pool_page_header *ph, int expected) 1333 { 1334 struct pool_item *pi; 1335 caddr_t page; 1336 int n; 1337 const char *label = pp->pr_wchan; 1338 1339 page = (caddr_t)((u_long)ph & pp->pr_pgmask); 1340 if (page != ph->ph_page && POOL_INPGHDR(pp)) { 1341 printf("%s: ", label); 1342 printf("pool(%p:%s): page inconsistency: page %p; " 1343 "at page head addr %p (p %p)\n", 1344 pp, pp->pr_wchan, ph->ph_page, ph, page); 1345 return 1; 1346 } 1347 1348 for (pi = XSIMPLEQ_FIRST(&ph->ph_items), n = 0; 1349 pi != NULL; 1350 pi = XSIMPLEQ_NEXT(&ph->ph_items, pi, pi_list), n++) { 1351 if ((caddr_t)pi < ph->ph_page || 1352 (caddr_t)pi >= ph->ph_page + pp->pr_pgsize) { 1353 printf("%s: ", label); 1354 printf("pool(%p:%s): page inconsistency: page %p;" 1355 " item ordinal %d; addr %p\n", pp, 1356 pp->pr_wchan, ph->ph_page, n, pi); 1357 return (1); 1358 } 1359 1360 if (pi->pi_magic != POOL_IMAGIC(ph, pi)) { 1361 printf("%s: ", label); 1362 printf("pool(%p:%s): free list modified: " 1363 "page %p; item ordinal %d; addr %p " 1364 "(p %p); offset 0x%x=0x%lx\n", 1365 pp, pp->pr_wchan, ph->ph_page, n, pi, page, 1366 0, pi->pi_magic); 1367 } 1368 1369 #ifdef DIAGNOSTIC 1370 if (POOL_PHPOISON(ph)) { 1371 size_t pidx; 1372 uint32_t pval; 1373 if (poison_check(pi + 1, pp->pr_size - sizeof(*pi), 1374 &pidx, &pval)) { 1375 int *ip = (int *)(pi + 1); 1376 printf("pool(%s): free list modified: " 1377 "page %p; item ordinal %d; addr %p " 1378 "(p %p); offset 0x%zx=0x%x\n", 1379 pp->pr_wchan, ph->ph_page, n, pi, 1380 page, pidx * sizeof(int), ip[pidx]); 1381 } 1382 } 1383 #endif /* DIAGNOSTIC */ 1384 } 1385 if (n + ph->ph_nmissing != pp->pr_itemsperpage) { 1386 printf("pool(%p:%s): page inconsistency: page %p;" 1387 " %d on list, %d missing, %d items per page\n", pp, 1388 pp->pr_wchan, ph->ph_page, n, ph->ph_nmissing, 1389 pp->pr_itemsperpage); 1390 return 1; 1391 } 1392 if (expected >= 0 && n != expected) { 1393 printf("pool(%p:%s): page inconsistency: page %p;" 1394 " %d on list, %d missing, %d expected\n", pp, 1395 pp->pr_wchan, ph->ph_page, n, ph->ph_nmissing, 1396 expected); 1397 return 1; 1398 } 1399 return 0; 1400 } 1401 1402 int 1403 pool_chk(struct pool *pp) 1404 { 1405 struct pool_page_header *ph; 1406 int r = 0; 1407 1408 TAILQ_FOREACH(ph, &pp->pr_emptypages, ph_entry) 1409 r += pool_chk_page(pp, ph, pp->pr_itemsperpage); 1410 TAILQ_FOREACH(ph, &pp->pr_fullpages, ph_entry) 1411 r += pool_chk_page(pp, ph, 0); 1412 TAILQ_FOREACH(ph, &pp->pr_partpages, ph_entry) 1413 r += pool_chk_page(pp, ph, -1); 1414 1415 return (r); 1416 } 1417 #endif /* defined(POOL_DEBUG) || defined(DDB) */ 1418 1419 #ifdef DDB 1420 void 1421 pool_walk(struct pool *pp, int full, 1422 int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2))), 1423 void (*func)(void *, int, int (*)(const char *, ...) 1424 __attribute__((__format__(__kprintf__,1,2))))) 1425 { 1426 struct pool_page_header *ph; 1427 struct pool_item *pi; 1428 caddr_t cp; 1429 int n; 1430 1431 TAILQ_FOREACH(ph, &pp->pr_fullpages, ph_entry) { 1432 cp = ph->ph_colored; 1433 n = ph->ph_nmissing; 1434 1435 while (n--) { 1436 func(cp, full, pr); 1437 cp += pp->pr_size; 1438 } 1439 } 1440 1441 TAILQ_FOREACH(ph, &pp->pr_partpages, ph_entry) { 1442 cp = ph->ph_colored; 1443 n = ph->ph_nmissing; 1444 1445 do { 1446 XSIMPLEQ_FOREACH(pi, &ph->ph_items, pi_list) { 1447 if (cp == (caddr_t)pi) 1448 break; 1449 } 1450 if (cp != (caddr_t)pi) { 1451 func(cp, full, pr); 1452 n--; 1453 } 1454 1455 cp += pp->pr_size; 1456 } while (n > 0); 1457 } 1458 } 1459 #endif 1460 1461 /* 1462 * We have three different sysctls. 1463 * kern.pool.npools - the number of pools. 1464 * kern.pool.pool.<pool#> - the pool struct for the pool#. 1465 * kern.pool.name.<pool#> - the name for pool#. 1466 */ 1467 int 1468 sysctl_dopool(int *name, u_int namelen, char *oldp, size_t *oldlenp) 1469 { 1470 struct kinfo_pool pi; 1471 struct pool *pp; 1472 int rv = ENOENT; 1473 1474 switch (name[0]) { 1475 case KERN_POOL_NPOOLS: 1476 if (namelen != 1) 1477 return (ENOTDIR); 1478 return (sysctl_rdint(oldp, oldlenp, NULL, pool_count)); 1479 1480 case KERN_POOL_NAME: 1481 case KERN_POOL_POOL: 1482 case KERN_POOL_CACHE: 1483 case KERN_POOL_CACHE_CPUS: 1484 break; 1485 default: 1486 return (EOPNOTSUPP); 1487 } 1488 1489 if (namelen != 2) 1490 return (ENOTDIR); 1491 1492 rw_enter_read(&pool_lock); 1493 1494 SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist) { 1495 if (name[1] == pp->pr_serial) 1496 break; 1497 } 1498 1499 if (pp == NULL) 1500 goto done; 1501 1502 switch (name[0]) { 1503 case KERN_POOL_NAME: 1504 rv = sysctl_rdstring(oldp, oldlenp, NULL, pp->pr_wchan); 1505 break; 1506 case KERN_POOL_POOL: 1507 memset(&pi, 0, sizeof(pi)); 1508 1509 pl_enter(pp, &pp->pr_lock); 1510 pi.pr_size = pp->pr_size; 1511 pi.pr_pgsize = pp->pr_pgsize; 1512 pi.pr_itemsperpage = pp->pr_itemsperpage; 1513 pi.pr_npages = pp->pr_npages; 1514 pi.pr_minpages = pp->pr_minpages; 1515 pi.pr_maxpages = pp->pr_maxpages; 1516 pi.pr_hardlimit = pp->pr_hardlimit; 1517 pi.pr_nout = pp->pr_nout; 1518 pi.pr_nitems = pp->pr_nitems; 1519 pi.pr_nget = pp->pr_nget; 1520 pi.pr_nput = pp->pr_nput; 1521 pi.pr_nfail = pp->pr_nfail; 1522 pi.pr_npagealloc = pp->pr_npagealloc; 1523 pi.pr_npagefree = pp->pr_npagefree; 1524 pi.pr_hiwat = pp->pr_hiwat; 1525 pi.pr_nidle = pp->pr_nidle; 1526 pl_leave(pp, &pp->pr_lock); 1527 1528 pool_cache_pool_info(pp, &pi); 1529 1530 rv = sysctl_rdstruct(oldp, oldlenp, NULL, &pi, sizeof(pi)); 1531 break; 1532 1533 case KERN_POOL_CACHE: 1534 rv = pool_cache_info(pp, oldp, oldlenp); 1535 break; 1536 1537 case KERN_POOL_CACHE_CPUS: 1538 rv = pool_cache_cpus_info(pp, oldp, oldlenp); 1539 break; 1540 } 1541 1542 done: 1543 rw_exit_read(&pool_lock); 1544 1545 return (rv); 1546 } 1547 1548 void 1549 pool_gc_sched(void *null) 1550 { 1551 task_add(systqmp, &pool_gc_task); 1552 } 1553 1554 void 1555 pool_gc_pages(void *null) 1556 { 1557 struct pool *pp; 1558 struct pool_page_header *ph, *freeph; 1559 int s; 1560 1561 rw_enter_read(&pool_lock); 1562 s = splvm(); /* XXX go to splvm until all pools _setipl properly */ 1563 SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist) { 1564 #ifdef MULTIPROCESSOR 1565 if (pp->pr_cache != NULL) 1566 pool_cache_gc(pp); 1567 #endif 1568 1569 if (pp->pr_nidle <= pp->pr_minpages || /* guess */ 1570 !pl_enter_try(pp, &pp->pr_lock)) /* try */ 1571 continue; 1572 1573 /* is it time to free a page? */ 1574 if (pp->pr_nidle > pp->pr_minpages && 1575 (ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL && 1576 getnsecuptime() - ph->ph_timestamp > POOL_WAIT_GC) { 1577 freeph = ph; 1578 pool_p_remove(pp, freeph); 1579 } else 1580 freeph = NULL; 1581 1582 pl_leave(pp, &pp->pr_lock); 1583 1584 if (freeph != NULL) 1585 pool_p_free(pp, freeph); 1586 } 1587 splx(s); 1588 rw_exit_read(&pool_lock); 1589 1590 timeout_add_sec(&pool_gc_tick, 1); 1591 } 1592 1593 /* 1594 * Pool backend allocators. 1595 */ 1596 1597 void * 1598 pool_allocator_alloc(struct pool *pp, int flags, int *slowdown) 1599 { 1600 void *v; 1601 1602 v = (*pp->pr_alloc->pa_alloc)(pp, flags, slowdown); 1603 1604 #ifdef DIAGNOSTIC 1605 if (v != NULL && POOL_INPGHDR(pp)) { 1606 vaddr_t addr = (vaddr_t)v; 1607 if ((addr & pp->pr_pgmask) != addr) { 1608 panic("%s: %s page address %p isn't aligned to %u", 1609 __func__, pp->pr_wchan, v, pp->pr_pgsize); 1610 } 1611 } 1612 #endif 1613 1614 return (v); 1615 } 1616 1617 void 1618 pool_allocator_free(struct pool *pp, void *v) 1619 { 1620 struct pool_allocator *pa = pp->pr_alloc; 1621 1622 (*pa->pa_free)(pp, v); 1623 } 1624 1625 void * 1626 pool_page_alloc(struct pool *pp, int flags, int *slowdown) 1627 { 1628 struct kmem_dyn_mode kd = KMEM_DYN_INITIALIZER; 1629 1630 kd.kd_waitok = ISSET(flags, PR_WAITOK); 1631 kd.kd_slowdown = slowdown; 1632 1633 return (km_alloc(pp->pr_pgsize, &kv_page, pp->pr_crange, &kd)); 1634 } 1635 1636 void 1637 pool_page_free(struct pool *pp, void *v) 1638 { 1639 km_free(v, pp->pr_pgsize, &kv_page, pp->pr_crange); 1640 } 1641 1642 void * 1643 pool_multi_alloc(struct pool *pp, int flags, int *slowdown) 1644 { 1645 struct kmem_va_mode kv = kv_intrsafe; 1646 struct kmem_dyn_mode kd = KMEM_DYN_INITIALIZER; 1647 void *v; 1648 int s; 1649 1650 if (POOL_INPGHDR(pp)) 1651 kv.kv_align = pp->pr_pgsize; 1652 1653 kd.kd_waitok = ISSET(flags, PR_WAITOK); 1654 kd.kd_slowdown = slowdown; 1655 1656 s = splvm(); 1657 v = km_alloc(pp->pr_pgsize, &kv, pp->pr_crange, &kd); 1658 splx(s); 1659 1660 return (v); 1661 } 1662 1663 void 1664 pool_multi_free(struct pool *pp, void *v) 1665 { 1666 struct kmem_va_mode kv = kv_intrsafe; 1667 int s; 1668 1669 if (POOL_INPGHDR(pp)) 1670 kv.kv_align = pp->pr_pgsize; 1671 1672 s = splvm(); 1673 km_free(v, pp->pr_pgsize, &kv, pp->pr_crange); 1674 splx(s); 1675 } 1676 1677 void * 1678 pool_multi_alloc_ni(struct pool *pp, int flags, int *slowdown) 1679 { 1680 struct kmem_va_mode kv = kv_any; 1681 struct kmem_dyn_mode kd = KMEM_DYN_INITIALIZER; 1682 void *v; 1683 1684 if (POOL_INPGHDR(pp)) 1685 kv.kv_align = pp->pr_pgsize; 1686 1687 kd.kd_waitok = ISSET(flags, PR_WAITOK); 1688 kd.kd_slowdown = slowdown; 1689 1690 KERNEL_LOCK(); 1691 v = km_alloc(pp->pr_pgsize, &kv, pp->pr_crange, &kd); 1692 KERNEL_UNLOCK(); 1693 1694 return (v); 1695 } 1696 1697 void 1698 pool_multi_free_ni(struct pool *pp, void *v) 1699 { 1700 struct kmem_va_mode kv = kv_any; 1701 1702 if (POOL_INPGHDR(pp)) 1703 kv.kv_align = pp->pr_pgsize; 1704 1705 KERNEL_LOCK(); 1706 km_free(v, pp->pr_pgsize, &kv, pp->pr_crange); 1707 KERNEL_UNLOCK(); 1708 } 1709 1710 #ifdef MULTIPROCESSOR 1711 1712 struct pool pool_caches; /* per cpu cache entries */ 1713 1714 void 1715 pool_cache_init(struct pool *pp) 1716 { 1717 struct cpumem *cm; 1718 struct pool_cache *pc; 1719 struct cpumem_iter i; 1720 1721 if (pool_caches.pr_size == 0) { 1722 pool_init(&pool_caches, sizeof(struct pool_cache), 1723 CACHELINESIZE, IPL_NONE, PR_WAITOK | PR_RWLOCK, 1724 "plcache", NULL); 1725 } 1726 1727 /* must be able to use the pool items as cache list items */ 1728 KASSERT(pp->pr_size >= sizeof(struct pool_cache_item)); 1729 1730 cm = cpumem_get(&pool_caches); 1731 1732 pl_init(pp, &pp->pr_cache_lock); 1733 arc4random_buf(pp->pr_cache_magic, sizeof(pp->pr_cache_magic)); 1734 TAILQ_INIT(&pp->pr_cache_lists); 1735 pp->pr_cache_nitems = 0; 1736 pp->pr_cache_timestamp = getnsecuptime(); 1737 pp->pr_cache_items = 8; 1738 pp->pr_cache_contention = 0; 1739 pp->pr_cache_ngc = 0; 1740 1741 CPUMEM_FOREACH(pc, &i, cm) { 1742 pc->pc_actv = NULL; 1743 pc->pc_nactv = 0; 1744 pc->pc_prev = NULL; 1745 1746 pc->pc_nget = 0; 1747 pc->pc_nfail = 0; 1748 pc->pc_nput = 0; 1749 pc->pc_nlget = 0; 1750 pc->pc_nlfail = 0; 1751 pc->pc_nlput = 0; 1752 pc->pc_nout = 0; 1753 } 1754 1755 membar_producer(); 1756 1757 pp->pr_cache = cm; 1758 } 1759 1760 static inline void 1761 pool_cache_item_magic(struct pool *pp, struct pool_cache_item *ci) 1762 { 1763 unsigned long *entry = (unsigned long *)&ci->ci_nextl; 1764 1765 entry[0] = pp->pr_cache_magic[0] ^ (u_long)ci; 1766 entry[1] = pp->pr_cache_magic[1] ^ (u_long)ci->ci_next; 1767 } 1768 1769 static inline void 1770 pool_cache_item_magic_check(struct pool *pp, struct pool_cache_item *ci) 1771 { 1772 unsigned long *entry; 1773 unsigned long val; 1774 1775 entry = (unsigned long *)&ci->ci_nextl; 1776 val = pp->pr_cache_magic[0] ^ (u_long)ci; 1777 if (*entry != val) 1778 goto fail; 1779 1780 entry++; 1781 val = pp->pr_cache_magic[1] ^ (u_long)ci->ci_next; 1782 if (*entry != val) 1783 goto fail; 1784 1785 return; 1786 1787 fail: 1788 panic("%s: %s cpu free list modified: item addr %p+%zu 0x%lx!=0x%lx", 1789 __func__, pp->pr_wchan, ci, (caddr_t)entry - (caddr_t)ci, 1790 *entry, val); 1791 } 1792 1793 static inline void 1794 pool_list_enter(struct pool *pp) 1795 { 1796 if (pl_enter_try(pp, &pp->pr_cache_lock) == 0) { 1797 pl_enter(pp, &pp->pr_cache_lock); 1798 pp->pr_cache_contention++; 1799 } 1800 } 1801 1802 static inline void 1803 pool_list_leave(struct pool *pp) 1804 { 1805 pl_leave(pp, &pp->pr_cache_lock); 1806 } 1807 1808 static inline struct pool_cache_item * 1809 pool_cache_list_alloc(struct pool *pp, struct pool_cache *pc) 1810 { 1811 struct pool_cache_item *pl; 1812 1813 pool_list_enter(pp); 1814 pl = TAILQ_FIRST(&pp->pr_cache_lists); 1815 if (pl != NULL) { 1816 TAILQ_REMOVE(&pp->pr_cache_lists, pl, ci_nextl); 1817 pp->pr_cache_nitems -= POOL_CACHE_ITEM_NITEMS(pl); 1818 1819 pool_cache_item_magic(pp, pl); 1820 1821 pc->pc_nlget++; 1822 } else 1823 pc->pc_nlfail++; 1824 1825 /* fold this cpus nout into the global while we have the lock */ 1826 pp->pr_cache_nout += pc->pc_nout; 1827 pc->pc_nout = 0; 1828 pool_list_leave(pp); 1829 1830 return (pl); 1831 } 1832 1833 static inline void 1834 pool_cache_list_free(struct pool *pp, struct pool_cache *pc, 1835 struct pool_cache_item *ci) 1836 { 1837 pool_list_enter(pp); 1838 if (TAILQ_EMPTY(&pp->pr_cache_lists)) 1839 pp->pr_cache_timestamp = getnsecuptime(); 1840 1841 pp->pr_cache_nitems += POOL_CACHE_ITEM_NITEMS(ci); 1842 TAILQ_INSERT_TAIL(&pp->pr_cache_lists, ci, ci_nextl); 1843 1844 pc->pc_nlput++; 1845 1846 /* fold this cpus nout into the global while we have the lock */ 1847 pp->pr_cache_nout += pc->pc_nout; 1848 pc->pc_nout = 0; 1849 pool_list_leave(pp); 1850 } 1851 1852 static inline struct pool_cache * 1853 pool_cache_enter(struct pool *pp, int *s) 1854 { 1855 struct pool_cache *pc; 1856 1857 pc = cpumem_enter(pp->pr_cache); 1858 *s = splraise(pp->pr_ipl); 1859 pc->pc_gen++; 1860 1861 return (pc); 1862 } 1863 1864 static inline void 1865 pool_cache_leave(struct pool *pp, struct pool_cache *pc, int s) 1866 { 1867 pc->pc_gen++; 1868 splx(s); 1869 cpumem_leave(pp->pr_cache, pc); 1870 } 1871 1872 void * 1873 pool_cache_get(struct pool *pp) 1874 { 1875 struct pool_cache *pc; 1876 struct pool_cache_item *ci; 1877 int s; 1878 1879 pc = pool_cache_enter(pp, &s); 1880 1881 if (pc->pc_actv != NULL) { 1882 ci = pc->pc_actv; 1883 } else if (pc->pc_prev != NULL) { 1884 ci = pc->pc_prev; 1885 pc->pc_prev = NULL; 1886 } else if ((ci = pool_cache_list_alloc(pp, pc)) == NULL) { 1887 pc->pc_nfail++; 1888 goto done; 1889 } 1890 1891 pool_cache_item_magic_check(pp, ci); 1892 #ifdef DIAGNOSTIC 1893 if (pool_debug && POOL_CACHE_ITEM_POISONED(ci)) { 1894 size_t pidx; 1895 uint32_t pval; 1896 1897 if (poison_check(ci + 1, pp->pr_size - sizeof(*ci), 1898 &pidx, &pval)) { 1899 int *ip = (int *)(ci + 1); 1900 ip += pidx; 1901 1902 panic("%s: %s cpu free list modified: " 1903 "item addr %p+%zu 0x%x!=0x%x", 1904 __func__, pp->pr_wchan, ci, 1905 (caddr_t)ip - (caddr_t)ci, *ip, pval); 1906 } 1907 } 1908 #endif 1909 1910 pc->pc_actv = ci->ci_next; 1911 pc->pc_nactv = POOL_CACHE_ITEM_NITEMS(ci) - 1; 1912 pc->pc_nget++; 1913 pc->pc_nout++; 1914 1915 done: 1916 pool_cache_leave(pp, pc, s); 1917 1918 return (ci); 1919 } 1920 1921 void 1922 pool_cache_put(struct pool *pp, void *v) 1923 { 1924 struct pool_cache *pc; 1925 struct pool_cache_item *ci = v; 1926 unsigned long nitems; 1927 int s; 1928 #ifdef DIAGNOSTIC 1929 int poison = pool_debug && pp->pr_size > sizeof(*ci); 1930 1931 if (poison) 1932 poison_mem(ci + 1, pp->pr_size - sizeof(*ci)); 1933 #endif 1934 1935 pc = pool_cache_enter(pp, &s); 1936 1937 nitems = pc->pc_nactv; 1938 if (nitems >= pp->pr_cache_items) { 1939 if (pc->pc_prev != NULL) 1940 pool_cache_list_free(pp, pc, pc->pc_prev); 1941 1942 pc->pc_prev = pc->pc_actv; 1943 1944 pc->pc_actv = NULL; 1945 pc->pc_nactv = 0; 1946 nitems = 0; 1947 } 1948 1949 ci->ci_next = pc->pc_actv; 1950 ci->ci_nitems = ++nitems; 1951 #ifdef DIAGNOSTIC 1952 ci->ci_nitems |= poison ? POOL_CACHE_ITEM_NITEMS_POISON : 0; 1953 #endif 1954 pool_cache_item_magic(pp, ci); 1955 1956 pc->pc_actv = ci; 1957 pc->pc_nactv = nitems; 1958 1959 pc->pc_nput++; 1960 pc->pc_nout--; 1961 1962 pool_cache_leave(pp, pc, s); 1963 } 1964 1965 struct pool_cache_item * 1966 pool_cache_list_put(struct pool *pp, struct pool_cache_item *pl) 1967 { 1968 struct pool_cache_item *rpl, *next; 1969 1970 if (pl == NULL) 1971 return (NULL); 1972 1973 rpl = TAILQ_NEXT(pl, ci_nextl); 1974 1975 pl_enter(pp, &pp->pr_lock); 1976 do { 1977 next = pl->ci_next; 1978 pool_do_put(pp, pl); 1979 pl = next; 1980 } while (pl != NULL); 1981 pl_leave(pp, &pp->pr_lock); 1982 1983 return (rpl); 1984 } 1985 1986 void 1987 pool_cache_destroy(struct pool *pp) 1988 { 1989 struct pool_cache *pc; 1990 struct pool_cache_item *pl; 1991 struct cpumem_iter i; 1992 struct cpumem *cm; 1993 1994 rw_enter_write(&pool_lock); /* serialise with the gc */ 1995 cm = pp->pr_cache; 1996 pp->pr_cache = NULL; /* make pool_put avoid the cache */ 1997 rw_exit_write(&pool_lock); 1998 1999 CPUMEM_FOREACH(pc, &i, cm) { 2000 pool_cache_list_put(pp, pc->pc_actv); 2001 pool_cache_list_put(pp, pc->pc_prev); 2002 } 2003 2004 cpumem_put(&pool_caches, cm); 2005 2006 pl = TAILQ_FIRST(&pp->pr_cache_lists); 2007 while (pl != NULL) 2008 pl = pool_cache_list_put(pp, pl); 2009 } 2010 2011 void 2012 pool_cache_gc(struct pool *pp) 2013 { 2014 unsigned int contention, delta; 2015 2016 if (getnsecuptime() - pp->pr_cache_timestamp > POOL_WAIT_GC && 2017 !TAILQ_EMPTY(&pp->pr_cache_lists) && 2018 pl_enter_try(pp, &pp->pr_cache_lock)) { 2019 struct pool_cache_item *pl = NULL; 2020 2021 pl = TAILQ_FIRST(&pp->pr_cache_lists); 2022 if (pl != NULL) { 2023 TAILQ_REMOVE(&pp->pr_cache_lists, pl, ci_nextl); 2024 pp->pr_cache_nitems -= POOL_CACHE_ITEM_NITEMS(pl); 2025 pp->pr_cache_timestamp = getnsecuptime(); 2026 2027 pp->pr_cache_ngc++; 2028 } 2029 2030 pl_leave(pp, &pp->pr_cache_lock); 2031 2032 pool_cache_list_put(pp, pl); 2033 } 2034 2035 /* 2036 * if there's a lot of contention on the pr_cache_mtx then consider 2037 * growing the length of the list to reduce the need to access the 2038 * global pool. 2039 */ 2040 2041 contention = pp->pr_cache_contention; 2042 delta = contention - pp->pr_cache_contention_prev; 2043 if (delta > 8 /* magic */) { 2044 if ((ncpusfound * 8 * 2) <= pp->pr_cache_nitems) 2045 pp->pr_cache_items += 8; 2046 } else if (delta == 0) { 2047 if (pp->pr_cache_items > 8) 2048 pp->pr_cache_items--; 2049 } 2050 pp->pr_cache_contention_prev = contention; 2051 } 2052 2053 void 2054 pool_cache_pool_info(struct pool *pp, struct kinfo_pool *pi) 2055 { 2056 struct pool_cache *pc; 2057 struct cpumem_iter i; 2058 2059 if (pp->pr_cache == NULL) 2060 return; 2061 2062 /* loop through the caches twice to collect stats */ 2063 2064 /* once without the lock so we can yield while reading nget/nput */ 2065 CPUMEM_FOREACH(pc, &i, pp->pr_cache) { 2066 uint64_t gen, nget, nput; 2067 2068 do { 2069 while ((gen = pc->pc_gen) & 1) 2070 yield(); 2071 2072 nget = pc->pc_nget; 2073 nput = pc->pc_nput; 2074 } while (gen != pc->pc_gen); 2075 2076 pi->pr_nget += nget; 2077 pi->pr_nput += nput; 2078 } 2079 2080 /* and once with the mtx so we can get consistent nout values */ 2081 pl_enter(pp, &pp->pr_cache_lock); 2082 CPUMEM_FOREACH(pc, &i, pp->pr_cache) 2083 pi->pr_nout += pc->pc_nout; 2084 2085 pi->pr_nout += pp->pr_cache_nout; 2086 pl_leave(pp, &pp->pr_cache_lock); 2087 } 2088 2089 int 2090 pool_cache_info(struct pool *pp, void *oldp, size_t *oldlenp) 2091 { 2092 struct kinfo_pool_cache kpc; 2093 2094 if (pp->pr_cache == NULL) 2095 return (EOPNOTSUPP); 2096 2097 memset(&kpc, 0, sizeof(kpc)); /* don't leak padding */ 2098 2099 pl_enter(pp, &pp->pr_cache_lock); 2100 kpc.pr_ngc = pp->pr_cache_ngc; 2101 kpc.pr_len = pp->pr_cache_items; 2102 kpc.pr_nitems = pp->pr_cache_nitems; 2103 kpc.pr_contention = pp->pr_cache_contention; 2104 pl_leave(pp, &pp->pr_cache_lock); 2105 2106 return (sysctl_rdstruct(oldp, oldlenp, NULL, &kpc, sizeof(kpc))); 2107 } 2108 2109 int 2110 pool_cache_cpus_info(struct pool *pp, void *oldp, size_t *oldlenp) 2111 { 2112 struct pool_cache *pc; 2113 struct kinfo_pool_cache_cpu *kpcc, *info; 2114 unsigned int cpu = 0; 2115 struct cpumem_iter i; 2116 int error = 0; 2117 size_t len; 2118 2119 if (pp->pr_cache == NULL) 2120 return (EOPNOTSUPP); 2121 if (*oldlenp % sizeof(*kpcc)) 2122 return (EINVAL); 2123 2124 kpcc = mallocarray(ncpusfound, sizeof(*kpcc), M_TEMP, 2125 M_WAITOK|M_CANFAIL|M_ZERO); 2126 if (kpcc == NULL) 2127 return (EIO); 2128 2129 len = ncpusfound * sizeof(*kpcc); 2130 2131 CPUMEM_FOREACH(pc, &i, pp->pr_cache) { 2132 uint64_t gen; 2133 2134 if (cpu >= ncpusfound) { 2135 error = EIO; 2136 goto err; 2137 } 2138 2139 info = &kpcc[cpu]; 2140 info->pr_cpu = cpu; 2141 2142 do { 2143 while ((gen = pc->pc_gen) & 1) 2144 yield(); 2145 2146 info->pr_nget = pc->pc_nget; 2147 info->pr_nfail = pc->pc_nfail; 2148 info->pr_nput = pc->pc_nput; 2149 info->pr_nlget = pc->pc_nlget; 2150 info->pr_nlfail = pc->pc_nlfail; 2151 info->pr_nlput = pc->pc_nlput; 2152 } while (gen != pc->pc_gen); 2153 2154 cpu++; 2155 } 2156 2157 error = sysctl_rdstruct(oldp, oldlenp, NULL, kpcc, len); 2158 err: 2159 free(kpcc, M_TEMP, len); 2160 2161 return (error); 2162 } 2163 #else /* MULTIPROCESSOR */ 2164 void 2165 pool_cache_init(struct pool *pp) 2166 { 2167 /* nop */ 2168 } 2169 2170 void 2171 pool_cache_pool_info(struct pool *pp, struct kinfo_pool *pi) 2172 { 2173 /* nop */ 2174 } 2175 2176 int 2177 pool_cache_info(struct pool *pp, void *oldp, size_t *oldlenp) 2178 { 2179 return (EOPNOTSUPP); 2180 } 2181 2182 int 2183 pool_cache_cpus_info(struct pool *pp, void *oldp, size_t *oldlenp) 2184 { 2185 return (EOPNOTSUPP); 2186 } 2187 #endif /* MULTIPROCESSOR */ 2188 2189 2190 void 2191 pool_lock_mtx_init(struct pool *pp, union pool_lock *lock, 2192 const struct lock_type *type) 2193 { 2194 _mtx_init_flags(&lock->prl_mtx, pp->pr_ipl, pp->pr_wchan, 0, type); 2195 } 2196 2197 void 2198 pool_lock_mtx_enter(union pool_lock *lock) 2199 { 2200 mtx_enter(&lock->prl_mtx); 2201 } 2202 2203 int 2204 pool_lock_mtx_enter_try(union pool_lock *lock) 2205 { 2206 return (mtx_enter_try(&lock->prl_mtx)); 2207 } 2208 2209 void 2210 pool_lock_mtx_leave(union pool_lock *lock) 2211 { 2212 mtx_leave(&lock->prl_mtx); 2213 } 2214 2215 void 2216 pool_lock_mtx_assert_locked(union pool_lock *lock) 2217 { 2218 MUTEX_ASSERT_LOCKED(&lock->prl_mtx); 2219 } 2220 2221 void 2222 pool_lock_mtx_assert_unlocked(union pool_lock *lock) 2223 { 2224 MUTEX_ASSERT_UNLOCKED(&lock->prl_mtx); 2225 } 2226 2227 int 2228 pool_lock_mtx_sleep(void *ident, union pool_lock *lock, int priority, 2229 const char *wmesg) 2230 { 2231 return msleep_nsec(ident, &lock->prl_mtx, priority, wmesg, INFSLP); 2232 } 2233 2234 static const struct pool_lock_ops pool_lock_ops_mtx = { 2235 pool_lock_mtx_init, 2236 pool_lock_mtx_enter, 2237 pool_lock_mtx_enter_try, 2238 pool_lock_mtx_leave, 2239 pool_lock_mtx_assert_locked, 2240 pool_lock_mtx_assert_unlocked, 2241 pool_lock_mtx_sleep, 2242 }; 2243 2244 void 2245 pool_lock_rw_init(struct pool *pp, union pool_lock *lock, 2246 const struct lock_type *type) 2247 { 2248 _rw_init_flags(&lock->prl_rwlock, pp->pr_wchan, 0, type); 2249 } 2250 2251 void 2252 pool_lock_rw_enter(union pool_lock *lock) 2253 { 2254 rw_enter_write(&lock->prl_rwlock); 2255 } 2256 2257 int 2258 pool_lock_rw_enter_try(union pool_lock *lock) 2259 { 2260 return (rw_enter(&lock->prl_rwlock, RW_WRITE | RW_NOSLEEP) == 0); 2261 } 2262 2263 void 2264 pool_lock_rw_leave(union pool_lock *lock) 2265 { 2266 rw_exit_write(&lock->prl_rwlock); 2267 } 2268 2269 void 2270 pool_lock_rw_assert_locked(union pool_lock *lock) 2271 { 2272 rw_assert_wrlock(&lock->prl_rwlock); 2273 } 2274 2275 void 2276 pool_lock_rw_assert_unlocked(union pool_lock *lock) 2277 { 2278 KASSERT(rw_status(&lock->prl_rwlock) != RW_WRITE); 2279 } 2280 2281 int 2282 pool_lock_rw_sleep(void *ident, union pool_lock *lock, int priority, 2283 const char *wmesg) 2284 { 2285 return rwsleep_nsec(ident, &lock->prl_rwlock, priority, wmesg, INFSLP); 2286 } 2287 2288 static const struct pool_lock_ops pool_lock_ops_rw = { 2289 pool_lock_rw_init, 2290 pool_lock_rw_enter, 2291 pool_lock_rw_enter_try, 2292 pool_lock_rw_leave, 2293 pool_lock_rw_assert_locked, 2294 pool_lock_rw_assert_unlocked, 2295 pool_lock_rw_sleep, 2296 }; 2297