1 /* $OpenBSD: subr_pool.c,v 1.236 2022/08/14 01:58:28 jsg Exp $ */ 2 /* $NetBSD: subr_pool.c,v 1.61 2001/09/26 07:14:56 chs Exp $ */ 3 4 /*- 5 * Copyright (c) 1997, 1999, 2000 The NetBSD Foundation, Inc. 6 * All rights reserved. 7 * 8 * This code is derived from software contributed to The NetBSD Foundation 9 * by Paul Kranenburg; by Jason R. Thorpe of the Numerical Aerospace 10 * Simulation Facility, NASA Ames Research Center. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 23 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 24 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 25 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31 * POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include <sys/param.h> 35 #include <sys/systm.h> 36 #include <sys/errno.h> 37 #include <sys/malloc.h> 38 #include <sys/pool.h> 39 #include <sys/proc.h> 40 #include <sys/sysctl.h> 41 #include <sys/task.h> 42 #include <sys/time.h> 43 #include <sys/timeout.h> 44 #include <sys/percpu.h> 45 #include <sys/tracepoint.h> 46 47 #include <uvm/uvm_extern.h> 48 49 /* 50 * Pool resource management utility. 51 * 52 * Memory is allocated in pages which are split into pieces according to 53 * the pool item size. Each page is kept on one of three lists in the 54 * pool structure: `pr_emptypages', `pr_fullpages' and `pr_partpages', 55 * for empty, full and partially-full pages respectively. The individual 56 * pool items are on a linked list headed by `ph_items' in each page 57 * header. The memory for building the page list is either taken from 58 * the allocated pages themselves (for small pool items) or taken from 59 * an internal pool of page headers (`phpool'). 60 */ 61 62 /* List of all pools */ 63 SIMPLEQ_HEAD(,pool) pool_head = SIMPLEQ_HEAD_INITIALIZER(pool_head); 64 65 /* 66 * Every pool gets a unique serial number assigned to it. If this counter 67 * wraps, we're screwed, but we shouldn't create so many pools anyway. 68 */ 69 unsigned int pool_serial; 70 unsigned int pool_count; 71 72 /* Lock the previous variables making up the global pool state */ 73 struct rwlock pool_lock = RWLOCK_INITIALIZER("pools"); 74 75 /* Private pool for page header structures */ 76 struct pool phpool; 77 78 struct pool_lock_ops { 79 void (*pl_init)(struct pool *, union pool_lock *, 80 const struct lock_type *); 81 void (*pl_enter)(union pool_lock *); 82 int (*pl_enter_try)(union pool_lock *); 83 void (*pl_leave)(union pool_lock *); 84 void (*pl_assert_locked)(union pool_lock *); 85 void (*pl_assert_unlocked)(union pool_lock *); 86 int (*pl_sleep)(void *, union pool_lock *, int, const char *); 87 }; 88 89 static const struct pool_lock_ops pool_lock_ops_mtx; 90 static const struct pool_lock_ops pool_lock_ops_rw; 91 92 #ifdef WITNESS 93 #define pl_init(pp, pl) do { \ 94 static const struct lock_type __lock_type = { .lt_name = #pl }; \ 95 (pp)->pr_lock_ops->pl_init(pp, pl, &__lock_type); \ 96 } while (0) 97 #else /* WITNESS */ 98 #define pl_init(pp, pl) (pp)->pr_lock_ops->pl_init(pp, pl, NULL) 99 #endif /* WITNESS */ 100 101 static inline void 102 pl_enter(struct pool *pp, union pool_lock *pl) 103 { 104 pp->pr_lock_ops->pl_enter(pl); 105 } 106 static inline int 107 pl_enter_try(struct pool *pp, union pool_lock *pl) 108 { 109 return pp->pr_lock_ops->pl_enter_try(pl); 110 } 111 static inline void 112 pl_leave(struct pool *pp, union pool_lock *pl) 113 { 114 pp->pr_lock_ops->pl_leave(pl); 115 } 116 static inline void 117 pl_assert_locked(struct pool *pp, union pool_lock *pl) 118 { 119 pp->pr_lock_ops->pl_assert_locked(pl); 120 } 121 static inline void 122 pl_assert_unlocked(struct pool *pp, union pool_lock *pl) 123 { 124 pp->pr_lock_ops->pl_assert_unlocked(pl); 125 } 126 static inline int 127 pl_sleep(struct pool *pp, void *ident, union pool_lock *lock, int priority, 128 const char *wmesg) 129 { 130 return pp->pr_lock_ops->pl_sleep(ident, lock, priority, wmesg); 131 } 132 133 struct pool_item { 134 u_long pi_magic; 135 XSIMPLEQ_ENTRY(pool_item) pi_list; 136 }; 137 #define POOL_IMAGIC(ph, pi) ((u_long)(pi) ^ (ph)->ph_magic) 138 139 struct pool_page_header { 140 /* Page headers */ 141 TAILQ_ENTRY(pool_page_header) 142 ph_entry; /* pool page list */ 143 XSIMPLEQ_HEAD(, pool_item) 144 ph_items; /* free items on the page */ 145 RBT_ENTRY(pool_page_header) 146 ph_node; /* off-page page headers */ 147 unsigned int ph_nmissing; /* # of chunks in use */ 148 caddr_t ph_page; /* this page's address */ 149 caddr_t ph_colored; /* page's colored address */ 150 unsigned long ph_magic; 151 uint64_t ph_timestamp; 152 }; 153 #define POOL_MAGICBIT (1 << 3) /* keep away from perturbed low bits */ 154 #define POOL_PHPOISON(ph) ISSET((ph)->ph_magic, POOL_MAGICBIT) 155 156 #ifdef MULTIPROCESSOR 157 struct pool_cache_item { 158 struct pool_cache_item *ci_next; /* next item in list */ 159 unsigned long ci_nitems; /* number of items in list */ 160 TAILQ_ENTRY(pool_cache_item) 161 ci_nextl; /* entry in list of lists */ 162 }; 163 164 /* we store whether the cached item is poisoned in the high bit of nitems */ 165 #define POOL_CACHE_ITEM_NITEMS_MASK 0x7ffffffUL 166 #define POOL_CACHE_ITEM_NITEMS_POISON 0x8000000UL 167 168 #define POOL_CACHE_ITEM_NITEMS(_ci) \ 169 ((_ci)->ci_nitems & POOL_CACHE_ITEM_NITEMS_MASK) 170 171 #define POOL_CACHE_ITEM_POISONED(_ci) \ 172 ISSET((_ci)->ci_nitems, POOL_CACHE_ITEM_NITEMS_POISON) 173 174 struct pool_cache { 175 struct pool_cache_item *pc_actv; /* active list of items */ 176 unsigned long pc_nactv; /* actv head nitems cache */ 177 struct pool_cache_item *pc_prev; /* previous list of items */ 178 179 uint64_t pc_gen; /* generation number */ 180 uint64_t pc_nget; /* # of successful requests */ 181 uint64_t pc_nfail; /* # of unsuccessful reqs */ 182 uint64_t pc_nput; /* # of releases */ 183 uint64_t pc_nlget; /* # of list requests */ 184 uint64_t pc_nlfail; /* # of fails getting a list */ 185 uint64_t pc_nlput; /* # of list releases */ 186 187 int pc_nout; 188 }; 189 190 void *pool_cache_get(struct pool *); 191 void pool_cache_put(struct pool *, void *); 192 void pool_cache_destroy(struct pool *); 193 void pool_cache_gc(struct pool *); 194 #endif 195 void pool_cache_pool_info(struct pool *, struct kinfo_pool *); 196 int pool_cache_info(struct pool *, void *, size_t *); 197 int pool_cache_cpus_info(struct pool *, void *, size_t *); 198 199 #ifdef POOL_DEBUG 200 int pool_debug = 1; 201 #else 202 int pool_debug = 0; 203 #endif 204 205 #define POOL_INPGHDR(pp) ((pp)->pr_phoffset != 0) 206 207 struct pool_page_header * 208 pool_p_alloc(struct pool *, int, int *); 209 void pool_p_insert(struct pool *, struct pool_page_header *); 210 void pool_p_remove(struct pool *, struct pool_page_header *); 211 void pool_p_free(struct pool *, struct pool_page_header *); 212 213 void pool_update_curpage(struct pool *); 214 void *pool_do_get(struct pool *, int, int *); 215 void pool_do_put(struct pool *, void *); 216 int pool_chk_page(struct pool *, struct pool_page_header *, int); 217 int pool_chk(struct pool *); 218 void pool_get_done(struct pool *, void *, void *); 219 void pool_runqueue(struct pool *, int); 220 221 void *pool_allocator_alloc(struct pool *, int, int *); 222 void pool_allocator_free(struct pool *, void *); 223 224 /* 225 * The default pool allocator. 226 */ 227 void *pool_page_alloc(struct pool *, int, int *); 228 void pool_page_free(struct pool *, void *); 229 230 /* 231 * safe for interrupts; this is the default allocator 232 */ 233 struct pool_allocator pool_allocator_single = { 234 pool_page_alloc, 235 pool_page_free, 236 POOL_ALLOC_SIZE(PAGE_SIZE, POOL_ALLOC_ALIGNED) 237 }; 238 239 void *pool_multi_alloc(struct pool *, int, int *); 240 void pool_multi_free(struct pool *, void *); 241 242 struct pool_allocator pool_allocator_multi = { 243 pool_multi_alloc, 244 pool_multi_free, 245 POOL_ALLOC_SIZES(PAGE_SIZE, (1UL << 31), POOL_ALLOC_ALIGNED) 246 }; 247 248 void *pool_multi_alloc_ni(struct pool *, int, int *); 249 void pool_multi_free_ni(struct pool *, void *); 250 251 struct pool_allocator pool_allocator_multi_ni = { 252 pool_multi_alloc_ni, 253 pool_multi_free_ni, 254 POOL_ALLOC_SIZES(PAGE_SIZE, (1UL << 31), POOL_ALLOC_ALIGNED) 255 }; 256 257 #ifdef DDB 258 void pool_print_pagelist(struct pool_pagelist *, int (*)(const char *, ...) 259 __attribute__((__format__(__kprintf__,1,2)))); 260 void pool_print1(struct pool *, const char *, int (*)(const char *, ...) 261 __attribute__((__format__(__kprintf__,1,2)))); 262 #endif 263 264 /* stale page garbage collectors */ 265 void pool_gc_sched(void *); 266 struct timeout pool_gc_tick = TIMEOUT_INITIALIZER(pool_gc_sched, NULL); 267 void pool_gc_pages(void *); 268 struct task pool_gc_task = TASK_INITIALIZER(pool_gc_pages, NULL); 269 270 #define POOL_WAIT_FREE SEC_TO_NSEC(1) 271 #define POOL_WAIT_GC SEC_TO_NSEC(8) 272 273 RBT_PROTOTYPE(phtree, pool_page_header, ph_node, phtree_compare); 274 275 static inline int 276 phtree_compare(const struct pool_page_header *a, 277 const struct pool_page_header *b) 278 { 279 vaddr_t va = (vaddr_t)a->ph_page; 280 vaddr_t vb = (vaddr_t)b->ph_page; 281 282 /* the compares in this order are important for the NFIND to work */ 283 if (vb < va) 284 return (-1); 285 if (vb > va) 286 return (1); 287 288 return (0); 289 } 290 291 RBT_GENERATE(phtree, pool_page_header, ph_node, phtree_compare); 292 293 /* 294 * Return the pool page header based on page address. 295 */ 296 static inline struct pool_page_header * 297 pr_find_pagehead(struct pool *pp, void *v) 298 { 299 struct pool_page_header *ph, key; 300 301 if (POOL_INPGHDR(pp)) { 302 caddr_t page; 303 304 page = (caddr_t)((vaddr_t)v & pp->pr_pgmask); 305 306 return ((struct pool_page_header *)(page + pp->pr_phoffset)); 307 } 308 309 key.ph_page = v; 310 ph = RBT_NFIND(phtree, &pp->pr_phtree, &key); 311 if (ph == NULL) 312 panic("%s: %s: page header missing", __func__, pp->pr_wchan); 313 314 KASSERT(ph->ph_page <= (caddr_t)v); 315 if (ph->ph_page + pp->pr_pgsize <= (caddr_t)v) 316 panic("%s: %s: incorrect page", __func__, pp->pr_wchan); 317 318 return (ph); 319 } 320 321 /* 322 * Initialize the given pool resource structure. 323 * 324 * We export this routine to allow other kernel parts to declare 325 * static pools that must be initialized before malloc() is available. 326 */ 327 void 328 pool_init(struct pool *pp, size_t size, u_int align, int ipl, int flags, 329 const char *wchan, struct pool_allocator *palloc) 330 { 331 int off = 0, space; 332 unsigned int pgsize = PAGE_SIZE, items; 333 size_t pa_pagesz; 334 #ifdef DIAGNOSTIC 335 struct pool *iter; 336 #endif 337 338 if (align == 0) 339 align = ALIGN(1); 340 341 if (size < sizeof(struct pool_item)) 342 size = sizeof(struct pool_item); 343 344 size = roundup(size, align); 345 346 while (size * 8 > pgsize) 347 pgsize <<= 1; 348 349 if (palloc == NULL) { 350 if (pgsize > PAGE_SIZE) { 351 palloc = ISSET(flags, PR_WAITOK) ? 352 &pool_allocator_multi_ni : &pool_allocator_multi; 353 } else 354 palloc = &pool_allocator_single; 355 356 pa_pagesz = palloc->pa_pagesz; 357 } else { 358 size_t pgsizes; 359 360 pa_pagesz = palloc->pa_pagesz; 361 if (pa_pagesz == 0) 362 pa_pagesz = POOL_ALLOC_DEFAULT; 363 364 pgsizes = pa_pagesz & ~POOL_ALLOC_ALIGNED; 365 366 /* make sure the allocator can fit at least one item */ 367 if (size > pgsizes) { 368 panic("%s: pool %s item size 0x%zx > " 369 "allocator %p sizes 0x%zx", __func__, wchan, 370 size, palloc, pgsizes); 371 } 372 373 /* shrink pgsize until it fits into the range */ 374 while (!ISSET(pgsizes, pgsize)) 375 pgsize >>= 1; 376 } 377 KASSERT(ISSET(pa_pagesz, pgsize)); 378 379 items = pgsize / size; 380 381 /* 382 * Decide whether to put the page header off page to avoid 383 * wasting too large a part of the page. Off-page page headers 384 * go into an RB tree, so we can match a returned item with 385 * its header based on the page address. 386 */ 387 if (ISSET(pa_pagesz, POOL_ALLOC_ALIGNED)) { 388 if (pgsize - (size * items) > 389 sizeof(struct pool_page_header)) { 390 off = pgsize - sizeof(struct pool_page_header); 391 } else if (sizeof(struct pool_page_header) * 2 >= size) { 392 off = pgsize - sizeof(struct pool_page_header); 393 items = off / size; 394 } 395 } 396 397 KASSERT(items > 0); 398 399 /* 400 * Initialize the pool structure. 401 */ 402 memset(pp, 0, sizeof(*pp)); 403 if (ISSET(flags, PR_RWLOCK)) { 404 KASSERT(flags & PR_WAITOK); 405 pp->pr_lock_ops = &pool_lock_ops_rw; 406 } else 407 pp->pr_lock_ops = &pool_lock_ops_mtx; 408 TAILQ_INIT(&pp->pr_emptypages); 409 TAILQ_INIT(&pp->pr_fullpages); 410 TAILQ_INIT(&pp->pr_partpages); 411 pp->pr_curpage = NULL; 412 pp->pr_npages = 0; 413 pp->pr_minitems = 0; 414 pp->pr_minpages = 0; 415 pp->pr_maxpages = 8; 416 pp->pr_size = size; 417 pp->pr_pgsize = pgsize; 418 pp->pr_pgmask = ~0UL ^ (pgsize - 1); 419 pp->pr_phoffset = off; 420 pp->pr_itemsperpage = items; 421 pp->pr_wchan = wchan; 422 pp->pr_alloc = palloc; 423 pp->pr_nitems = 0; 424 pp->pr_nout = 0; 425 pp->pr_hardlimit = UINT_MAX; 426 pp->pr_hardlimit_warning = NULL; 427 pp->pr_hardlimit_ratecap.tv_sec = 0; 428 pp->pr_hardlimit_ratecap.tv_usec = 0; 429 pp->pr_hardlimit_warning_last.tv_sec = 0; 430 pp->pr_hardlimit_warning_last.tv_usec = 0; 431 RBT_INIT(phtree, &pp->pr_phtree); 432 433 /* 434 * Use the space between the chunks and the page header 435 * for cache coloring. 436 */ 437 space = POOL_INPGHDR(pp) ? pp->pr_phoffset : pp->pr_pgsize; 438 space -= pp->pr_itemsperpage * pp->pr_size; 439 pp->pr_align = align; 440 pp->pr_maxcolors = (space / align) + 1; 441 442 pp->pr_nget = 0; 443 pp->pr_nfail = 0; 444 pp->pr_nput = 0; 445 pp->pr_npagealloc = 0; 446 pp->pr_npagefree = 0; 447 pp->pr_hiwat = 0; 448 pp->pr_nidle = 0; 449 450 pp->pr_ipl = ipl; 451 pp->pr_flags = flags; 452 453 pl_init(pp, &pp->pr_lock); 454 pl_init(pp, &pp->pr_requests_lock); 455 TAILQ_INIT(&pp->pr_requests); 456 457 if (phpool.pr_size == 0) { 458 pool_init(&phpool, sizeof(struct pool_page_header), 0, 459 IPL_HIGH, 0, "phpool", NULL); 460 461 /* make sure phpool won't "recurse" */ 462 KASSERT(POOL_INPGHDR(&phpool)); 463 } 464 465 /* pglistalloc/constraint parameters */ 466 pp->pr_crange = &kp_dirty; 467 468 /* Insert this into the list of all pools. */ 469 rw_enter_write(&pool_lock); 470 #ifdef DIAGNOSTIC 471 SIMPLEQ_FOREACH(iter, &pool_head, pr_poollist) { 472 if (iter == pp) 473 panic("%s: pool %s already on list", __func__, wchan); 474 } 475 #endif 476 477 pp->pr_serial = ++pool_serial; 478 if (pool_serial == 0) 479 panic("%s: too much uptime", __func__); 480 481 SIMPLEQ_INSERT_HEAD(&pool_head, pp, pr_poollist); 482 pool_count++; 483 rw_exit_write(&pool_lock); 484 } 485 486 /* 487 * Decommission a pool resource. 488 */ 489 void 490 pool_destroy(struct pool *pp) 491 { 492 struct pool_page_header *ph; 493 struct pool *prev, *iter; 494 495 #ifdef MULTIPROCESSOR 496 if (pp->pr_cache != NULL) 497 pool_cache_destroy(pp); 498 #endif 499 500 #ifdef DIAGNOSTIC 501 if (pp->pr_nout != 0) 502 panic("%s: pool busy: still out: %u", __func__, pp->pr_nout); 503 #endif 504 505 /* Remove from global pool list */ 506 rw_enter_write(&pool_lock); 507 pool_count--; 508 if (pp == SIMPLEQ_FIRST(&pool_head)) 509 SIMPLEQ_REMOVE_HEAD(&pool_head, pr_poollist); 510 else { 511 prev = SIMPLEQ_FIRST(&pool_head); 512 SIMPLEQ_FOREACH(iter, &pool_head, pr_poollist) { 513 if (iter == pp) { 514 SIMPLEQ_REMOVE_AFTER(&pool_head, prev, 515 pr_poollist); 516 break; 517 } 518 prev = iter; 519 } 520 } 521 rw_exit_write(&pool_lock); 522 523 /* Remove all pages */ 524 while ((ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL) { 525 pl_enter(pp, &pp->pr_lock); 526 pool_p_remove(pp, ph); 527 pl_leave(pp, &pp->pr_lock); 528 pool_p_free(pp, ph); 529 } 530 KASSERT(TAILQ_EMPTY(&pp->pr_fullpages)); 531 KASSERT(TAILQ_EMPTY(&pp->pr_partpages)); 532 } 533 534 void 535 pool_request_init(struct pool_request *pr, 536 void (*handler)(struct pool *, void *, void *), void *cookie) 537 { 538 pr->pr_handler = handler; 539 pr->pr_cookie = cookie; 540 pr->pr_item = NULL; 541 } 542 543 void 544 pool_request(struct pool *pp, struct pool_request *pr) 545 { 546 pl_enter(pp, &pp->pr_requests_lock); 547 TAILQ_INSERT_TAIL(&pp->pr_requests, pr, pr_entry); 548 pool_runqueue(pp, PR_NOWAIT); 549 pl_leave(pp, &pp->pr_requests_lock); 550 } 551 552 struct pool_get_memory { 553 union pool_lock lock; 554 void * volatile v; 555 }; 556 557 /* 558 * Grab an item from the pool. 559 */ 560 void * 561 pool_get(struct pool *pp, int flags) 562 { 563 void *v = NULL; 564 int slowdown = 0; 565 566 KASSERT(flags & (PR_WAITOK | PR_NOWAIT)); 567 if (pp->pr_flags & PR_RWLOCK) 568 KASSERT(flags & PR_WAITOK); 569 570 #ifdef MULTIPROCESSOR 571 if (pp->pr_cache != NULL) { 572 v = pool_cache_get(pp); 573 if (v != NULL) 574 goto good; 575 } 576 #endif 577 578 pl_enter(pp, &pp->pr_lock); 579 if (pp->pr_nout >= pp->pr_hardlimit) { 580 if (ISSET(flags, PR_NOWAIT|PR_LIMITFAIL)) 581 goto fail; 582 } else if ((v = pool_do_get(pp, flags, &slowdown)) == NULL) { 583 if (ISSET(flags, PR_NOWAIT)) 584 goto fail; 585 } 586 pl_leave(pp, &pp->pr_lock); 587 588 if ((slowdown || pool_debug == 2) && ISSET(flags, PR_WAITOK)) 589 yield(); 590 591 if (v == NULL) { 592 struct pool_get_memory mem = { .v = NULL }; 593 struct pool_request pr; 594 595 #ifdef DIAGNOSTIC 596 if (ISSET(flags, PR_WAITOK) && curproc == &proc0) 597 panic("%s: cannot sleep for memory during boot", 598 __func__); 599 #endif 600 pl_init(pp, &mem.lock); 601 pool_request_init(&pr, pool_get_done, &mem); 602 pool_request(pp, &pr); 603 604 pl_enter(pp, &mem.lock); 605 while (mem.v == NULL) 606 pl_sleep(pp, &mem, &mem.lock, PSWP, pp->pr_wchan); 607 pl_leave(pp, &mem.lock); 608 609 v = mem.v; 610 } 611 612 #ifdef MULTIPROCESSOR 613 good: 614 #endif 615 if (ISSET(flags, PR_ZERO)) 616 memset(v, 0, pp->pr_size); 617 618 TRACEPOINT(uvm, pool_get, pp, v, flags); 619 620 return (v); 621 622 fail: 623 pp->pr_nfail++; 624 pl_leave(pp, &pp->pr_lock); 625 return (NULL); 626 } 627 628 void 629 pool_get_done(struct pool *pp, void *xmem, void *v) 630 { 631 struct pool_get_memory *mem = xmem; 632 633 pl_enter(pp, &mem->lock); 634 mem->v = v; 635 pl_leave(pp, &mem->lock); 636 637 wakeup_one(mem); 638 } 639 640 void 641 pool_runqueue(struct pool *pp, int flags) 642 { 643 struct pool_requests prl = TAILQ_HEAD_INITIALIZER(prl); 644 struct pool_request *pr; 645 646 pl_assert_unlocked(pp, &pp->pr_lock); 647 pl_assert_locked(pp, &pp->pr_requests_lock); 648 649 if (pp->pr_requesting++) 650 return; 651 652 do { 653 pp->pr_requesting = 1; 654 655 TAILQ_CONCAT(&prl, &pp->pr_requests, pr_entry); 656 if (TAILQ_EMPTY(&prl)) 657 continue; 658 659 pl_leave(pp, &pp->pr_requests_lock); 660 661 pl_enter(pp, &pp->pr_lock); 662 pr = TAILQ_FIRST(&prl); 663 while (pr != NULL) { 664 int slowdown = 0; 665 666 if (pp->pr_nout >= pp->pr_hardlimit) 667 break; 668 669 pr->pr_item = pool_do_get(pp, flags, &slowdown); 670 if (pr->pr_item == NULL) /* || slowdown ? */ 671 break; 672 673 pr = TAILQ_NEXT(pr, pr_entry); 674 } 675 pl_leave(pp, &pp->pr_lock); 676 677 while ((pr = TAILQ_FIRST(&prl)) != NULL && 678 pr->pr_item != NULL) { 679 TAILQ_REMOVE(&prl, pr, pr_entry); 680 (*pr->pr_handler)(pp, pr->pr_cookie, pr->pr_item); 681 } 682 683 pl_enter(pp, &pp->pr_requests_lock); 684 } while (--pp->pr_requesting); 685 686 TAILQ_CONCAT(&pp->pr_requests, &prl, pr_entry); 687 } 688 689 void * 690 pool_do_get(struct pool *pp, int flags, int *slowdown) 691 { 692 struct pool_item *pi; 693 struct pool_page_header *ph; 694 695 pl_assert_locked(pp, &pp->pr_lock); 696 697 splassert(pp->pr_ipl); 698 699 /* 700 * Account for this item now to avoid races if we need to give up 701 * pr_lock to allocate a page. 702 */ 703 pp->pr_nout++; 704 705 if (pp->pr_curpage == NULL) { 706 pl_leave(pp, &pp->pr_lock); 707 ph = pool_p_alloc(pp, flags, slowdown); 708 pl_enter(pp, &pp->pr_lock); 709 710 if (ph == NULL) { 711 pp->pr_nout--; 712 return (NULL); 713 } 714 715 pool_p_insert(pp, ph); 716 } 717 718 ph = pp->pr_curpage; 719 pi = XSIMPLEQ_FIRST(&ph->ph_items); 720 if (__predict_false(pi == NULL)) 721 panic("%s: %s: page empty", __func__, pp->pr_wchan); 722 723 if (__predict_false(pi->pi_magic != POOL_IMAGIC(ph, pi))) { 724 panic("%s: %s free list modified: " 725 "page %p; item addr %p; offset 0x%x=0x%lx != 0x%lx", 726 __func__, pp->pr_wchan, ph->ph_page, pi, 727 0, pi->pi_magic, POOL_IMAGIC(ph, pi)); 728 } 729 730 XSIMPLEQ_REMOVE_HEAD(&ph->ph_items, pi_list); 731 732 #ifdef DIAGNOSTIC 733 if (pool_debug && POOL_PHPOISON(ph)) { 734 size_t pidx; 735 uint32_t pval; 736 if (poison_check(pi + 1, pp->pr_size - sizeof(*pi), 737 &pidx, &pval)) { 738 int *ip = (int *)(pi + 1); 739 panic("%s: %s free list modified: " 740 "page %p; item addr %p; offset 0x%zx=0x%x", 741 __func__, pp->pr_wchan, ph->ph_page, pi, 742 (pidx * sizeof(int)) + sizeof(*pi), ip[pidx]); 743 } 744 } 745 #endif /* DIAGNOSTIC */ 746 747 if (ph->ph_nmissing++ == 0) { 748 /* 749 * This page was previously empty. Move it to the list of 750 * partially-full pages. This page is already curpage. 751 */ 752 TAILQ_REMOVE(&pp->pr_emptypages, ph, ph_entry); 753 TAILQ_INSERT_TAIL(&pp->pr_partpages, ph, ph_entry); 754 755 pp->pr_nidle--; 756 } 757 758 if (ph->ph_nmissing == pp->pr_itemsperpage) { 759 /* 760 * This page is now full. Move it to the full list 761 * and select a new current page. 762 */ 763 TAILQ_REMOVE(&pp->pr_partpages, ph, ph_entry); 764 TAILQ_INSERT_TAIL(&pp->pr_fullpages, ph, ph_entry); 765 pool_update_curpage(pp); 766 } 767 768 pp->pr_nget++; 769 770 return (pi); 771 } 772 773 /* 774 * Return resource to the pool. 775 */ 776 void 777 pool_put(struct pool *pp, void *v) 778 { 779 struct pool_page_header *ph, *freeph = NULL; 780 781 #ifdef DIAGNOSTIC 782 if (v == NULL) 783 panic("%s: NULL item", __func__); 784 #endif 785 786 TRACEPOINT(uvm, pool_put, pp, v); 787 788 #ifdef MULTIPROCESSOR 789 if (pp->pr_cache != NULL && TAILQ_EMPTY(&pp->pr_requests)) { 790 pool_cache_put(pp, v); 791 return; 792 } 793 #endif 794 795 pl_enter(pp, &pp->pr_lock); 796 797 pool_do_put(pp, v); 798 799 pp->pr_nout--; 800 pp->pr_nput++; 801 802 /* is it time to free a page? */ 803 if (pp->pr_nidle > pp->pr_maxpages && 804 (ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL && 805 getnsecuptime() - ph->ph_timestamp > POOL_WAIT_FREE) { 806 freeph = ph; 807 pool_p_remove(pp, freeph); 808 } 809 810 pl_leave(pp, &pp->pr_lock); 811 812 if (freeph != NULL) 813 pool_p_free(pp, freeph); 814 815 pool_wakeup(pp); 816 } 817 818 void 819 pool_wakeup(struct pool *pp) 820 { 821 if (!TAILQ_EMPTY(&pp->pr_requests)) { 822 pl_enter(pp, &pp->pr_requests_lock); 823 pool_runqueue(pp, PR_NOWAIT); 824 pl_leave(pp, &pp->pr_requests_lock); 825 } 826 } 827 828 void 829 pool_do_put(struct pool *pp, void *v) 830 { 831 struct pool_item *pi = v; 832 struct pool_page_header *ph; 833 834 splassert(pp->pr_ipl); 835 836 ph = pr_find_pagehead(pp, v); 837 838 #ifdef DIAGNOSTIC 839 if (pool_debug) { 840 struct pool_item *qi; 841 XSIMPLEQ_FOREACH(qi, &ph->ph_items, pi_list) { 842 if (pi == qi) { 843 panic("%s: %s: double pool_put: %p", __func__, 844 pp->pr_wchan, pi); 845 } 846 } 847 } 848 #endif /* DIAGNOSTIC */ 849 850 pi->pi_magic = POOL_IMAGIC(ph, pi); 851 XSIMPLEQ_INSERT_HEAD(&ph->ph_items, pi, pi_list); 852 #ifdef DIAGNOSTIC 853 if (POOL_PHPOISON(ph)) 854 poison_mem(pi + 1, pp->pr_size - sizeof(*pi)); 855 #endif /* DIAGNOSTIC */ 856 857 if (ph->ph_nmissing-- == pp->pr_itemsperpage) { 858 /* 859 * The page was previously completely full, move it to the 860 * partially-full list. 861 */ 862 TAILQ_REMOVE(&pp->pr_fullpages, ph, ph_entry); 863 TAILQ_INSERT_TAIL(&pp->pr_partpages, ph, ph_entry); 864 } 865 866 if (ph->ph_nmissing == 0) { 867 /* 868 * The page is now empty, so move it to the empty page list. 869 */ 870 pp->pr_nidle++; 871 872 ph->ph_timestamp = getnsecuptime(); 873 TAILQ_REMOVE(&pp->pr_partpages, ph, ph_entry); 874 TAILQ_INSERT_TAIL(&pp->pr_emptypages, ph, ph_entry); 875 pool_update_curpage(pp); 876 } 877 } 878 879 /* 880 * Add N items to the pool. 881 */ 882 int 883 pool_prime(struct pool *pp, int n) 884 { 885 struct pool_pagelist pl = TAILQ_HEAD_INITIALIZER(pl); 886 struct pool_page_header *ph; 887 int newpages; 888 889 newpages = roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 890 891 while (newpages-- > 0) { 892 int slowdown = 0; 893 894 ph = pool_p_alloc(pp, PR_NOWAIT, &slowdown); 895 if (ph == NULL) /* or slowdown? */ 896 break; 897 898 TAILQ_INSERT_TAIL(&pl, ph, ph_entry); 899 } 900 901 pl_enter(pp, &pp->pr_lock); 902 while ((ph = TAILQ_FIRST(&pl)) != NULL) { 903 TAILQ_REMOVE(&pl, ph, ph_entry); 904 pool_p_insert(pp, ph); 905 } 906 pl_leave(pp, &pp->pr_lock); 907 908 return (0); 909 } 910 911 struct pool_page_header * 912 pool_p_alloc(struct pool *pp, int flags, int *slowdown) 913 { 914 struct pool_page_header *ph; 915 struct pool_item *pi; 916 caddr_t addr; 917 unsigned int order; 918 int o; 919 int n; 920 921 pl_assert_unlocked(pp, &pp->pr_lock); 922 KASSERT(pp->pr_size >= sizeof(*pi)); 923 924 addr = pool_allocator_alloc(pp, flags, slowdown); 925 if (addr == NULL) 926 return (NULL); 927 928 if (POOL_INPGHDR(pp)) 929 ph = (struct pool_page_header *)(addr + pp->pr_phoffset); 930 else { 931 ph = pool_get(&phpool, flags); 932 if (ph == NULL) { 933 pool_allocator_free(pp, addr); 934 return (NULL); 935 } 936 } 937 938 XSIMPLEQ_INIT(&ph->ph_items); 939 ph->ph_page = addr; 940 addr += pp->pr_align * (pp->pr_npagealloc % pp->pr_maxcolors); 941 ph->ph_colored = addr; 942 ph->ph_nmissing = 0; 943 arc4random_buf(&ph->ph_magic, sizeof(ph->ph_magic)); 944 #ifdef DIAGNOSTIC 945 /* use a bit in ph_magic to record if we poison page items */ 946 if (pool_debug) 947 SET(ph->ph_magic, POOL_MAGICBIT); 948 else 949 CLR(ph->ph_magic, POOL_MAGICBIT); 950 #endif /* DIAGNOSTIC */ 951 952 n = pp->pr_itemsperpage; 953 o = 32; 954 while (n--) { 955 pi = (struct pool_item *)addr; 956 pi->pi_magic = POOL_IMAGIC(ph, pi); 957 958 if (o == 32) { 959 order = arc4random(); 960 o = 0; 961 } 962 if (ISSET(order, 1U << o++)) 963 XSIMPLEQ_INSERT_TAIL(&ph->ph_items, pi, pi_list); 964 else 965 XSIMPLEQ_INSERT_HEAD(&ph->ph_items, pi, pi_list); 966 967 #ifdef DIAGNOSTIC 968 if (POOL_PHPOISON(ph)) 969 poison_mem(pi + 1, pp->pr_size - sizeof(*pi)); 970 #endif /* DIAGNOSTIC */ 971 972 addr += pp->pr_size; 973 } 974 975 return (ph); 976 } 977 978 void 979 pool_p_free(struct pool *pp, struct pool_page_header *ph) 980 { 981 struct pool_item *pi; 982 983 pl_assert_unlocked(pp, &pp->pr_lock); 984 KASSERT(ph->ph_nmissing == 0); 985 986 XSIMPLEQ_FOREACH(pi, &ph->ph_items, pi_list) { 987 if (__predict_false(pi->pi_magic != POOL_IMAGIC(ph, pi))) { 988 panic("%s: %s free list modified: " 989 "page %p; item addr %p; offset 0x%x=0x%lx", 990 __func__, pp->pr_wchan, ph->ph_page, pi, 991 0, pi->pi_magic); 992 } 993 994 #ifdef DIAGNOSTIC 995 if (POOL_PHPOISON(ph)) { 996 size_t pidx; 997 uint32_t pval; 998 if (poison_check(pi + 1, pp->pr_size - sizeof(*pi), 999 &pidx, &pval)) { 1000 int *ip = (int *)(pi + 1); 1001 panic("%s: %s free list modified: " 1002 "page %p; item addr %p; offset 0x%zx=0x%x", 1003 __func__, pp->pr_wchan, ph->ph_page, pi, 1004 pidx * sizeof(int), ip[pidx]); 1005 } 1006 } 1007 #endif 1008 } 1009 1010 pool_allocator_free(pp, ph->ph_page); 1011 1012 if (!POOL_INPGHDR(pp)) 1013 pool_put(&phpool, ph); 1014 } 1015 1016 void 1017 pool_p_insert(struct pool *pp, struct pool_page_header *ph) 1018 { 1019 pl_assert_locked(pp, &pp->pr_lock); 1020 1021 /* If the pool was depleted, point at the new page */ 1022 if (pp->pr_curpage == NULL) 1023 pp->pr_curpage = ph; 1024 1025 TAILQ_INSERT_TAIL(&pp->pr_emptypages, ph, ph_entry); 1026 if (!POOL_INPGHDR(pp)) 1027 RBT_INSERT(phtree, &pp->pr_phtree, ph); 1028 1029 pp->pr_nitems += pp->pr_itemsperpage; 1030 pp->pr_nidle++; 1031 1032 pp->pr_npagealloc++; 1033 if (++pp->pr_npages > pp->pr_hiwat) 1034 pp->pr_hiwat = pp->pr_npages; 1035 } 1036 1037 void 1038 pool_p_remove(struct pool *pp, struct pool_page_header *ph) 1039 { 1040 pl_assert_locked(pp, &pp->pr_lock); 1041 1042 pp->pr_npagefree++; 1043 pp->pr_npages--; 1044 pp->pr_nidle--; 1045 pp->pr_nitems -= pp->pr_itemsperpage; 1046 1047 if (!POOL_INPGHDR(pp)) 1048 RBT_REMOVE(phtree, &pp->pr_phtree, ph); 1049 TAILQ_REMOVE(&pp->pr_emptypages, ph, ph_entry); 1050 1051 pool_update_curpage(pp); 1052 } 1053 1054 void 1055 pool_update_curpage(struct pool *pp) 1056 { 1057 pp->pr_curpage = TAILQ_LAST(&pp->pr_partpages, pool_pagelist); 1058 if (pp->pr_curpage == NULL) { 1059 pp->pr_curpage = TAILQ_LAST(&pp->pr_emptypages, pool_pagelist); 1060 } 1061 } 1062 1063 void 1064 pool_setlowat(struct pool *pp, int n) 1065 { 1066 int prime = 0; 1067 1068 pl_enter(pp, &pp->pr_lock); 1069 pp->pr_minitems = n; 1070 pp->pr_minpages = (n == 0) 1071 ? 0 1072 : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 1073 1074 if (pp->pr_nitems < n) 1075 prime = n - pp->pr_nitems; 1076 pl_leave(pp, &pp->pr_lock); 1077 1078 if (prime > 0) 1079 pool_prime(pp, prime); 1080 } 1081 1082 void 1083 pool_sethiwat(struct pool *pp, int n) 1084 { 1085 pp->pr_maxpages = (n == 0) 1086 ? 0 1087 : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 1088 } 1089 1090 int 1091 pool_sethardlimit(struct pool *pp, u_int n, const char *warnmsg, int ratecap) 1092 { 1093 int error = 0; 1094 1095 if (n < pp->pr_nout) { 1096 error = EINVAL; 1097 goto done; 1098 } 1099 1100 pp->pr_hardlimit = n; 1101 pp->pr_hardlimit_warning = warnmsg; 1102 pp->pr_hardlimit_ratecap.tv_sec = ratecap; 1103 pp->pr_hardlimit_warning_last.tv_sec = 0; 1104 pp->pr_hardlimit_warning_last.tv_usec = 0; 1105 1106 done: 1107 return (error); 1108 } 1109 1110 void 1111 pool_set_constraints(struct pool *pp, const struct kmem_pa_mode *mode) 1112 { 1113 pp->pr_crange = mode; 1114 } 1115 1116 /* 1117 * Release all complete pages that have not been used recently. 1118 * 1119 * Returns non-zero if any pages have been reclaimed. 1120 */ 1121 int 1122 pool_reclaim(struct pool *pp) 1123 { 1124 struct pool_page_header *ph, *phnext; 1125 struct pool_pagelist pl = TAILQ_HEAD_INITIALIZER(pl); 1126 1127 pl_enter(pp, &pp->pr_lock); 1128 for (ph = TAILQ_FIRST(&pp->pr_emptypages); ph != NULL; ph = phnext) { 1129 phnext = TAILQ_NEXT(ph, ph_entry); 1130 1131 /* Check our minimum page claim */ 1132 if (pp->pr_npages <= pp->pr_minpages) 1133 break; 1134 1135 /* 1136 * If freeing this page would put us below 1137 * the low water mark, stop now. 1138 */ 1139 if ((pp->pr_nitems - pp->pr_itemsperpage) < 1140 pp->pr_minitems) 1141 break; 1142 1143 pool_p_remove(pp, ph); 1144 TAILQ_INSERT_TAIL(&pl, ph, ph_entry); 1145 } 1146 pl_leave(pp, &pp->pr_lock); 1147 1148 if (TAILQ_EMPTY(&pl)) 1149 return (0); 1150 1151 while ((ph = TAILQ_FIRST(&pl)) != NULL) { 1152 TAILQ_REMOVE(&pl, ph, ph_entry); 1153 pool_p_free(pp, ph); 1154 } 1155 1156 return (1); 1157 } 1158 1159 /* 1160 * Release all complete pages that have not been used recently 1161 * from all pools. 1162 */ 1163 void 1164 pool_reclaim_all(void) 1165 { 1166 struct pool *pp; 1167 1168 rw_enter_read(&pool_lock); 1169 SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist) 1170 pool_reclaim(pp); 1171 rw_exit_read(&pool_lock); 1172 } 1173 1174 #ifdef DDB 1175 #include <machine/db_machdep.h> 1176 #include <ddb/db_output.h> 1177 1178 /* 1179 * Diagnostic helpers. 1180 */ 1181 void 1182 pool_printit(struct pool *pp, const char *modif, 1183 int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2)))) 1184 { 1185 pool_print1(pp, modif, pr); 1186 } 1187 1188 void 1189 pool_print_pagelist(struct pool_pagelist *pl, 1190 int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2)))) 1191 { 1192 struct pool_page_header *ph; 1193 struct pool_item *pi; 1194 1195 TAILQ_FOREACH(ph, pl, ph_entry) { 1196 (*pr)("\t\tpage %p, color %p, nmissing %d\n", 1197 ph->ph_page, ph->ph_colored, ph->ph_nmissing); 1198 XSIMPLEQ_FOREACH(pi, &ph->ph_items, pi_list) { 1199 if (pi->pi_magic != POOL_IMAGIC(ph, pi)) { 1200 (*pr)("\t\t\titem %p, magic 0x%lx\n", 1201 pi, pi->pi_magic); 1202 } 1203 } 1204 } 1205 } 1206 1207 void 1208 pool_print1(struct pool *pp, const char *modif, 1209 int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2)))) 1210 { 1211 struct pool_page_header *ph; 1212 int print_pagelist = 0; 1213 char c; 1214 1215 while ((c = *modif++) != '\0') { 1216 if (c == 'p') 1217 print_pagelist = 1; 1218 modif++; 1219 } 1220 1221 (*pr)("POOL %s: size %u maxcolors %u\n", pp->pr_wchan, pp->pr_size, 1222 pp->pr_maxcolors); 1223 (*pr)("\talloc %p\n", pp->pr_alloc); 1224 (*pr)("\tminitems %u, minpages %u, maxpages %u, npages %u\n", 1225 pp->pr_minitems, pp->pr_minpages, pp->pr_maxpages, pp->pr_npages); 1226 (*pr)("\titemsperpage %u, nitems %u, nout %u, hardlimit %u\n", 1227 pp->pr_itemsperpage, pp->pr_nitems, pp->pr_nout, pp->pr_hardlimit); 1228 1229 (*pr)("\n\tnget %lu, nfail %lu, nput %lu\n", 1230 pp->pr_nget, pp->pr_nfail, pp->pr_nput); 1231 (*pr)("\tnpagealloc %lu, npagefree %lu, hiwat %u, nidle %lu\n", 1232 pp->pr_npagealloc, pp->pr_npagefree, pp->pr_hiwat, pp->pr_nidle); 1233 1234 if (print_pagelist == 0) 1235 return; 1236 1237 if ((ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL) 1238 (*pr)("\n\tempty page list:\n"); 1239 pool_print_pagelist(&pp->pr_emptypages, pr); 1240 if ((ph = TAILQ_FIRST(&pp->pr_fullpages)) != NULL) 1241 (*pr)("\n\tfull page list:\n"); 1242 pool_print_pagelist(&pp->pr_fullpages, pr); 1243 if ((ph = TAILQ_FIRST(&pp->pr_partpages)) != NULL) 1244 (*pr)("\n\tpartial-page list:\n"); 1245 pool_print_pagelist(&pp->pr_partpages, pr); 1246 1247 if (pp->pr_curpage == NULL) 1248 (*pr)("\tno current page\n"); 1249 else 1250 (*pr)("\tcurpage %p\n", pp->pr_curpage->ph_page); 1251 } 1252 1253 void 1254 db_show_all_pools(db_expr_t expr, int haddr, db_expr_t count, char *modif) 1255 { 1256 struct pool *pp; 1257 char maxp[16]; 1258 int ovflw; 1259 char mode; 1260 1261 mode = modif[0]; 1262 if (mode != '\0' && mode != 'a') { 1263 db_printf("usage: show all pools [/a]\n"); 1264 return; 1265 } 1266 1267 if (mode == '\0') 1268 db_printf("%-10s%4s%9s%5s%9s%6s%6s%6s%6s%6s%6s%5s\n", 1269 "Name", 1270 "Size", 1271 "Requests", 1272 "Fail", 1273 "Releases", 1274 "Pgreq", 1275 "Pgrel", 1276 "Npage", 1277 "Hiwat", 1278 "Minpg", 1279 "Maxpg", 1280 "Idle"); 1281 else 1282 db_printf("%-12s %18s %18s\n", 1283 "Name", "Address", "Allocator"); 1284 1285 SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist) { 1286 if (mode == 'a') { 1287 db_printf("%-12s %18p %18p\n", pp->pr_wchan, pp, 1288 pp->pr_alloc); 1289 continue; 1290 } 1291 1292 if (!pp->pr_nget) 1293 continue; 1294 1295 if (pp->pr_maxpages == UINT_MAX) 1296 snprintf(maxp, sizeof maxp, "inf"); 1297 else 1298 snprintf(maxp, sizeof maxp, "%u", pp->pr_maxpages); 1299 1300 #define PRWORD(ovflw, fmt, width, fixed, val) do { \ 1301 (ovflw) += db_printf((fmt), \ 1302 (width) - (fixed) - (ovflw) > 0 ? \ 1303 (width) - (fixed) - (ovflw) : 0, \ 1304 (val)) - (width); \ 1305 if ((ovflw) < 0) \ 1306 (ovflw) = 0; \ 1307 } while (/* CONSTCOND */0) 1308 1309 ovflw = 0; 1310 PRWORD(ovflw, "%-*s", 10, 0, pp->pr_wchan); 1311 PRWORD(ovflw, " %*u", 4, 1, pp->pr_size); 1312 PRWORD(ovflw, " %*lu", 9, 1, pp->pr_nget); 1313 PRWORD(ovflw, " %*lu", 5, 1, pp->pr_nfail); 1314 PRWORD(ovflw, " %*lu", 9, 1, pp->pr_nput); 1315 PRWORD(ovflw, " %*lu", 6, 1, pp->pr_npagealloc); 1316 PRWORD(ovflw, " %*lu", 6, 1, pp->pr_npagefree); 1317 PRWORD(ovflw, " %*d", 6, 1, pp->pr_npages); 1318 PRWORD(ovflw, " %*d", 6, 1, pp->pr_hiwat); 1319 PRWORD(ovflw, " %*d", 6, 1, pp->pr_minpages); 1320 PRWORD(ovflw, " %*s", 6, 1, maxp); 1321 PRWORD(ovflw, " %*lu\n", 5, 1, pp->pr_nidle); 1322 1323 pool_chk(pp); 1324 } 1325 } 1326 #endif /* DDB */ 1327 1328 #if defined(POOL_DEBUG) || defined(DDB) 1329 int 1330 pool_chk_page(struct pool *pp, struct pool_page_header *ph, int expected) 1331 { 1332 struct pool_item *pi; 1333 caddr_t page; 1334 int n; 1335 const char *label = pp->pr_wchan; 1336 1337 page = (caddr_t)((u_long)ph & pp->pr_pgmask); 1338 if (page != ph->ph_page && POOL_INPGHDR(pp)) { 1339 printf("%s: ", label); 1340 printf("pool(%p:%s): page inconsistency: page %p; " 1341 "at page head addr %p (p %p)\n", 1342 pp, pp->pr_wchan, ph->ph_page, ph, page); 1343 return 1; 1344 } 1345 1346 for (pi = XSIMPLEQ_FIRST(&ph->ph_items), n = 0; 1347 pi != NULL; 1348 pi = XSIMPLEQ_NEXT(&ph->ph_items, pi, pi_list), n++) { 1349 if ((caddr_t)pi < ph->ph_page || 1350 (caddr_t)pi >= ph->ph_page + pp->pr_pgsize) { 1351 printf("%s: ", label); 1352 printf("pool(%p:%s): page inconsistency: page %p;" 1353 " item ordinal %d; addr %p\n", pp, 1354 pp->pr_wchan, ph->ph_page, n, pi); 1355 return (1); 1356 } 1357 1358 if (pi->pi_magic != POOL_IMAGIC(ph, pi)) { 1359 printf("%s: ", label); 1360 printf("pool(%p:%s): free list modified: " 1361 "page %p; item ordinal %d; addr %p " 1362 "(p %p); offset 0x%x=0x%lx\n", 1363 pp, pp->pr_wchan, ph->ph_page, n, pi, page, 1364 0, pi->pi_magic); 1365 } 1366 1367 #ifdef DIAGNOSTIC 1368 if (POOL_PHPOISON(ph)) { 1369 size_t pidx; 1370 uint32_t pval; 1371 if (poison_check(pi + 1, pp->pr_size - sizeof(*pi), 1372 &pidx, &pval)) { 1373 int *ip = (int *)(pi + 1); 1374 printf("pool(%s): free list modified: " 1375 "page %p; item ordinal %d; addr %p " 1376 "(p %p); offset 0x%zx=0x%x\n", 1377 pp->pr_wchan, ph->ph_page, n, pi, 1378 page, pidx * sizeof(int), ip[pidx]); 1379 } 1380 } 1381 #endif /* DIAGNOSTIC */ 1382 } 1383 if (n + ph->ph_nmissing != pp->pr_itemsperpage) { 1384 printf("pool(%p:%s): page inconsistency: page %p;" 1385 " %d on list, %d missing, %d items per page\n", pp, 1386 pp->pr_wchan, ph->ph_page, n, ph->ph_nmissing, 1387 pp->pr_itemsperpage); 1388 return 1; 1389 } 1390 if (expected >= 0 && n != expected) { 1391 printf("pool(%p:%s): page inconsistency: page %p;" 1392 " %d on list, %d missing, %d expected\n", pp, 1393 pp->pr_wchan, ph->ph_page, n, ph->ph_nmissing, 1394 expected); 1395 return 1; 1396 } 1397 return 0; 1398 } 1399 1400 int 1401 pool_chk(struct pool *pp) 1402 { 1403 struct pool_page_header *ph; 1404 int r = 0; 1405 1406 TAILQ_FOREACH(ph, &pp->pr_emptypages, ph_entry) 1407 r += pool_chk_page(pp, ph, pp->pr_itemsperpage); 1408 TAILQ_FOREACH(ph, &pp->pr_fullpages, ph_entry) 1409 r += pool_chk_page(pp, ph, 0); 1410 TAILQ_FOREACH(ph, &pp->pr_partpages, ph_entry) 1411 r += pool_chk_page(pp, ph, -1); 1412 1413 return (r); 1414 } 1415 #endif /* defined(POOL_DEBUG) || defined(DDB) */ 1416 1417 #ifdef DDB 1418 void 1419 pool_walk(struct pool *pp, int full, 1420 int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2))), 1421 void (*func)(void *, int, int (*)(const char *, ...) 1422 __attribute__((__format__(__kprintf__,1,2))))) 1423 { 1424 struct pool_page_header *ph; 1425 struct pool_item *pi; 1426 caddr_t cp; 1427 int n; 1428 1429 TAILQ_FOREACH(ph, &pp->pr_fullpages, ph_entry) { 1430 cp = ph->ph_colored; 1431 n = ph->ph_nmissing; 1432 1433 while (n--) { 1434 func(cp, full, pr); 1435 cp += pp->pr_size; 1436 } 1437 } 1438 1439 TAILQ_FOREACH(ph, &pp->pr_partpages, ph_entry) { 1440 cp = ph->ph_colored; 1441 n = ph->ph_nmissing; 1442 1443 do { 1444 XSIMPLEQ_FOREACH(pi, &ph->ph_items, pi_list) { 1445 if (cp == (caddr_t)pi) 1446 break; 1447 } 1448 if (cp != (caddr_t)pi) { 1449 func(cp, full, pr); 1450 n--; 1451 } 1452 1453 cp += pp->pr_size; 1454 } while (n > 0); 1455 } 1456 } 1457 #endif 1458 1459 /* 1460 * We have three different sysctls. 1461 * kern.pool.npools - the number of pools. 1462 * kern.pool.pool.<pool#> - the pool struct for the pool#. 1463 * kern.pool.name.<pool#> - the name for pool#. 1464 */ 1465 int 1466 sysctl_dopool(int *name, u_int namelen, char *oldp, size_t *oldlenp) 1467 { 1468 struct kinfo_pool pi; 1469 struct pool *pp; 1470 int rv = ENOENT; 1471 1472 switch (name[0]) { 1473 case KERN_POOL_NPOOLS: 1474 if (namelen != 1) 1475 return (ENOTDIR); 1476 return (sysctl_rdint(oldp, oldlenp, NULL, pool_count)); 1477 1478 case KERN_POOL_NAME: 1479 case KERN_POOL_POOL: 1480 case KERN_POOL_CACHE: 1481 case KERN_POOL_CACHE_CPUS: 1482 break; 1483 default: 1484 return (EOPNOTSUPP); 1485 } 1486 1487 if (namelen != 2) 1488 return (ENOTDIR); 1489 1490 rw_enter_read(&pool_lock); 1491 1492 SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist) { 1493 if (name[1] == pp->pr_serial) 1494 break; 1495 } 1496 1497 if (pp == NULL) 1498 goto done; 1499 1500 switch (name[0]) { 1501 case KERN_POOL_NAME: 1502 rv = sysctl_rdstring(oldp, oldlenp, NULL, pp->pr_wchan); 1503 break; 1504 case KERN_POOL_POOL: 1505 memset(&pi, 0, sizeof(pi)); 1506 1507 pl_enter(pp, &pp->pr_lock); 1508 pi.pr_size = pp->pr_size; 1509 pi.pr_pgsize = pp->pr_pgsize; 1510 pi.pr_itemsperpage = pp->pr_itemsperpage; 1511 pi.pr_npages = pp->pr_npages; 1512 pi.pr_minpages = pp->pr_minpages; 1513 pi.pr_maxpages = pp->pr_maxpages; 1514 pi.pr_hardlimit = pp->pr_hardlimit; 1515 pi.pr_nout = pp->pr_nout; 1516 pi.pr_nitems = pp->pr_nitems; 1517 pi.pr_nget = pp->pr_nget; 1518 pi.pr_nput = pp->pr_nput; 1519 pi.pr_nfail = pp->pr_nfail; 1520 pi.pr_npagealloc = pp->pr_npagealloc; 1521 pi.pr_npagefree = pp->pr_npagefree; 1522 pi.pr_hiwat = pp->pr_hiwat; 1523 pi.pr_nidle = pp->pr_nidle; 1524 pl_leave(pp, &pp->pr_lock); 1525 1526 pool_cache_pool_info(pp, &pi); 1527 1528 rv = sysctl_rdstruct(oldp, oldlenp, NULL, &pi, sizeof(pi)); 1529 break; 1530 1531 case KERN_POOL_CACHE: 1532 rv = pool_cache_info(pp, oldp, oldlenp); 1533 break; 1534 1535 case KERN_POOL_CACHE_CPUS: 1536 rv = pool_cache_cpus_info(pp, oldp, oldlenp); 1537 break; 1538 } 1539 1540 done: 1541 rw_exit_read(&pool_lock); 1542 1543 return (rv); 1544 } 1545 1546 void 1547 pool_gc_sched(void *null) 1548 { 1549 task_add(systqmp, &pool_gc_task); 1550 } 1551 1552 void 1553 pool_gc_pages(void *null) 1554 { 1555 struct pool *pp; 1556 struct pool_page_header *ph, *freeph; 1557 int s; 1558 1559 rw_enter_read(&pool_lock); 1560 s = splvm(); /* XXX go to splvm until all pools _setipl properly */ 1561 SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist) { 1562 #ifdef MULTIPROCESSOR 1563 if (pp->pr_cache != NULL) 1564 pool_cache_gc(pp); 1565 #endif 1566 1567 if (pp->pr_nidle <= pp->pr_minpages || /* guess */ 1568 !pl_enter_try(pp, &pp->pr_lock)) /* try */ 1569 continue; 1570 1571 /* is it time to free a page? */ 1572 if (pp->pr_nidle > pp->pr_minpages && 1573 (ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL && 1574 getnsecuptime() - ph->ph_timestamp > POOL_WAIT_GC) { 1575 freeph = ph; 1576 pool_p_remove(pp, freeph); 1577 } else 1578 freeph = NULL; 1579 1580 pl_leave(pp, &pp->pr_lock); 1581 1582 if (freeph != NULL) 1583 pool_p_free(pp, freeph); 1584 } 1585 splx(s); 1586 rw_exit_read(&pool_lock); 1587 1588 timeout_add_sec(&pool_gc_tick, 1); 1589 } 1590 1591 /* 1592 * Pool backend allocators. 1593 */ 1594 1595 void * 1596 pool_allocator_alloc(struct pool *pp, int flags, int *slowdown) 1597 { 1598 void *v; 1599 1600 v = (*pp->pr_alloc->pa_alloc)(pp, flags, slowdown); 1601 1602 #ifdef DIAGNOSTIC 1603 if (v != NULL && POOL_INPGHDR(pp)) { 1604 vaddr_t addr = (vaddr_t)v; 1605 if ((addr & pp->pr_pgmask) != addr) { 1606 panic("%s: %s page address %p isn't aligned to %u", 1607 __func__, pp->pr_wchan, v, pp->pr_pgsize); 1608 } 1609 } 1610 #endif 1611 1612 return (v); 1613 } 1614 1615 void 1616 pool_allocator_free(struct pool *pp, void *v) 1617 { 1618 struct pool_allocator *pa = pp->pr_alloc; 1619 1620 (*pa->pa_free)(pp, v); 1621 } 1622 1623 void * 1624 pool_page_alloc(struct pool *pp, int flags, int *slowdown) 1625 { 1626 struct kmem_dyn_mode kd = KMEM_DYN_INITIALIZER; 1627 1628 kd.kd_waitok = ISSET(flags, PR_WAITOK); 1629 kd.kd_slowdown = slowdown; 1630 1631 return (km_alloc(pp->pr_pgsize, &kv_page, pp->pr_crange, &kd)); 1632 } 1633 1634 void 1635 pool_page_free(struct pool *pp, void *v) 1636 { 1637 km_free(v, pp->pr_pgsize, &kv_page, pp->pr_crange); 1638 } 1639 1640 void * 1641 pool_multi_alloc(struct pool *pp, int flags, int *slowdown) 1642 { 1643 struct kmem_va_mode kv = kv_intrsafe; 1644 struct kmem_dyn_mode kd = KMEM_DYN_INITIALIZER; 1645 void *v; 1646 int s; 1647 1648 if (POOL_INPGHDR(pp)) 1649 kv.kv_align = pp->pr_pgsize; 1650 1651 kd.kd_waitok = ISSET(flags, PR_WAITOK); 1652 kd.kd_slowdown = slowdown; 1653 1654 s = splvm(); 1655 v = km_alloc(pp->pr_pgsize, &kv, pp->pr_crange, &kd); 1656 splx(s); 1657 1658 return (v); 1659 } 1660 1661 void 1662 pool_multi_free(struct pool *pp, void *v) 1663 { 1664 struct kmem_va_mode kv = kv_intrsafe; 1665 int s; 1666 1667 if (POOL_INPGHDR(pp)) 1668 kv.kv_align = pp->pr_pgsize; 1669 1670 s = splvm(); 1671 km_free(v, pp->pr_pgsize, &kv, pp->pr_crange); 1672 splx(s); 1673 } 1674 1675 void * 1676 pool_multi_alloc_ni(struct pool *pp, int flags, int *slowdown) 1677 { 1678 struct kmem_va_mode kv = kv_any; 1679 struct kmem_dyn_mode kd = KMEM_DYN_INITIALIZER; 1680 void *v; 1681 1682 if (POOL_INPGHDR(pp)) 1683 kv.kv_align = pp->pr_pgsize; 1684 1685 kd.kd_waitok = ISSET(flags, PR_WAITOK); 1686 kd.kd_slowdown = slowdown; 1687 1688 KERNEL_LOCK(); 1689 v = km_alloc(pp->pr_pgsize, &kv, pp->pr_crange, &kd); 1690 KERNEL_UNLOCK(); 1691 1692 return (v); 1693 } 1694 1695 void 1696 pool_multi_free_ni(struct pool *pp, void *v) 1697 { 1698 struct kmem_va_mode kv = kv_any; 1699 1700 if (POOL_INPGHDR(pp)) 1701 kv.kv_align = pp->pr_pgsize; 1702 1703 KERNEL_LOCK(); 1704 km_free(v, pp->pr_pgsize, &kv, pp->pr_crange); 1705 KERNEL_UNLOCK(); 1706 } 1707 1708 #ifdef MULTIPROCESSOR 1709 1710 struct pool pool_caches; /* per cpu cache entries */ 1711 1712 void 1713 pool_cache_init(struct pool *pp) 1714 { 1715 struct cpumem *cm; 1716 struct pool_cache *pc; 1717 struct cpumem_iter i; 1718 1719 if (pool_caches.pr_size == 0) { 1720 pool_init(&pool_caches, sizeof(struct pool_cache), 1721 CACHELINESIZE, IPL_NONE, PR_WAITOK | PR_RWLOCK, 1722 "plcache", NULL); 1723 } 1724 1725 /* must be able to use the pool items as cache list items */ 1726 KASSERT(pp->pr_size >= sizeof(struct pool_cache_item)); 1727 1728 cm = cpumem_get(&pool_caches); 1729 1730 pl_init(pp, &pp->pr_cache_lock); 1731 arc4random_buf(pp->pr_cache_magic, sizeof(pp->pr_cache_magic)); 1732 TAILQ_INIT(&pp->pr_cache_lists); 1733 pp->pr_cache_nitems = 0; 1734 pp->pr_cache_timestamp = getnsecuptime(); 1735 pp->pr_cache_items = 8; 1736 pp->pr_cache_contention = 0; 1737 pp->pr_cache_ngc = 0; 1738 1739 CPUMEM_FOREACH(pc, &i, cm) { 1740 pc->pc_actv = NULL; 1741 pc->pc_nactv = 0; 1742 pc->pc_prev = NULL; 1743 1744 pc->pc_nget = 0; 1745 pc->pc_nfail = 0; 1746 pc->pc_nput = 0; 1747 pc->pc_nlget = 0; 1748 pc->pc_nlfail = 0; 1749 pc->pc_nlput = 0; 1750 pc->pc_nout = 0; 1751 } 1752 1753 membar_producer(); 1754 1755 pp->pr_cache = cm; 1756 } 1757 1758 static inline void 1759 pool_cache_item_magic(struct pool *pp, struct pool_cache_item *ci) 1760 { 1761 unsigned long *entry = (unsigned long *)&ci->ci_nextl; 1762 1763 entry[0] = pp->pr_cache_magic[0] ^ (u_long)ci; 1764 entry[1] = pp->pr_cache_magic[1] ^ (u_long)ci->ci_next; 1765 } 1766 1767 static inline void 1768 pool_cache_item_magic_check(struct pool *pp, struct pool_cache_item *ci) 1769 { 1770 unsigned long *entry; 1771 unsigned long val; 1772 1773 entry = (unsigned long *)&ci->ci_nextl; 1774 val = pp->pr_cache_magic[0] ^ (u_long)ci; 1775 if (*entry != val) 1776 goto fail; 1777 1778 entry++; 1779 val = pp->pr_cache_magic[1] ^ (u_long)ci->ci_next; 1780 if (*entry != val) 1781 goto fail; 1782 1783 return; 1784 1785 fail: 1786 panic("%s: %s cpu free list modified: item addr %p+%zu 0x%lx!=0x%lx", 1787 __func__, pp->pr_wchan, ci, (caddr_t)entry - (caddr_t)ci, 1788 *entry, val); 1789 } 1790 1791 static inline void 1792 pool_list_enter(struct pool *pp) 1793 { 1794 if (pl_enter_try(pp, &pp->pr_cache_lock) == 0) { 1795 pl_enter(pp, &pp->pr_cache_lock); 1796 pp->pr_cache_contention++; 1797 } 1798 } 1799 1800 static inline void 1801 pool_list_leave(struct pool *pp) 1802 { 1803 pl_leave(pp, &pp->pr_cache_lock); 1804 } 1805 1806 static inline struct pool_cache_item * 1807 pool_cache_list_alloc(struct pool *pp, struct pool_cache *pc) 1808 { 1809 struct pool_cache_item *pl; 1810 1811 pool_list_enter(pp); 1812 pl = TAILQ_FIRST(&pp->pr_cache_lists); 1813 if (pl != NULL) { 1814 TAILQ_REMOVE(&pp->pr_cache_lists, pl, ci_nextl); 1815 pp->pr_cache_nitems -= POOL_CACHE_ITEM_NITEMS(pl); 1816 1817 pool_cache_item_magic(pp, pl); 1818 1819 pc->pc_nlget++; 1820 } else 1821 pc->pc_nlfail++; 1822 1823 /* fold this cpus nout into the global while we have the lock */ 1824 pp->pr_cache_nout += pc->pc_nout; 1825 pc->pc_nout = 0; 1826 pool_list_leave(pp); 1827 1828 return (pl); 1829 } 1830 1831 static inline void 1832 pool_cache_list_free(struct pool *pp, struct pool_cache *pc, 1833 struct pool_cache_item *ci) 1834 { 1835 pool_list_enter(pp); 1836 if (TAILQ_EMPTY(&pp->pr_cache_lists)) 1837 pp->pr_cache_timestamp = getnsecuptime(); 1838 1839 pp->pr_cache_nitems += POOL_CACHE_ITEM_NITEMS(ci); 1840 TAILQ_INSERT_TAIL(&pp->pr_cache_lists, ci, ci_nextl); 1841 1842 pc->pc_nlput++; 1843 1844 /* fold this cpus nout into the global while we have the lock */ 1845 pp->pr_cache_nout += pc->pc_nout; 1846 pc->pc_nout = 0; 1847 pool_list_leave(pp); 1848 } 1849 1850 static inline struct pool_cache * 1851 pool_cache_enter(struct pool *pp, int *s) 1852 { 1853 struct pool_cache *pc; 1854 1855 pc = cpumem_enter(pp->pr_cache); 1856 *s = splraise(pp->pr_ipl); 1857 pc->pc_gen++; 1858 1859 return (pc); 1860 } 1861 1862 static inline void 1863 pool_cache_leave(struct pool *pp, struct pool_cache *pc, int s) 1864 { 1865 pc->pc_gen++; 1866 splx(s); 1867 cpumem_leave(pp->pr_cache, pc); 1868 } 1869 1870 void * 1871 pool_cache_get(struct pool *pp) 1872 { 1873 struct pool_cache *pc; 1874 struct pool_cache_item *ci; 1875 int s; 1876 1877 pc = pool_cache_enter(pp, &s); 1878 1879 if (pc->pc_actv != NULL) { 1880 ci = pc->pc_actv; 1881 } else if (pc->pc_prev != NULL) { 1882 ci = pc->pc_prev; 1883 pc->pc_prev = NULL; 1884 } else if ((ci = pool_cache_list_alloc(pp, pc)) == NULL) { 1885 pc->pc_nfail++; 1886 goto done; 1887 } 1888 1889 pool_cache_item_magic_check(pp, ci); 1890 #ifdef DIAGNOSTIC 1891 if (pool_debug && POOL_CACHE_ITEM_POISONED(ci)) { 1892 size_t pidx; 1893 uint32_t pval; 1894 1895 if (poison_check(ci + 1, pp->pr_size - sizeof(*ci), 1896 &pidx, &pval)) { 1897 int *ip = (int *)(ci + 1); 1898 ip += pidx; 1899 1900 panic("%s: %s cpu free list modified: " 1901 "item addr %p+%zu 0x%x!=0x%x", 1902 __func__, pp->pr_wchan, ci, 1903 (caddr_t)ip - (caddr_t)ci, *ip, pval); 1904 } 1905 } 1906 #endif 1907 1908 pc->pc_actv = ci->ci_next; 1909 pc->pc_nactv = POOL_CACHE_ITEM_NITEMS(ci) - 1; 1910 pc->pc_nget++; 1911 pc->pc_nout++; 1912 1913 done: 1914 pool_cache_leave(pp, pc, s); 1915 1916 return (ci); 1917 } 1918 1919 void 1920 pool_cache_put(struct pool *pp, void *v) 1921 { 1922 struct pool_cache *pc; 1923 struct pool_cache_item *ci = v; 1924 unsigned long nitems; 1925 int s; 1926 #ifdef DIAGNOSTIC 1927 int poison = pool_debug && pp->pr_size > sizeof(*ci); 1928 1929 if (poison) 1930 poison_mem(ci + 1, pp->pr_size - sizeof(*ci)); 1931 #endif 1932 1933 pc = pool_cache_enter(pp, &s); 1934 1935 nitems = pc->pc_nactv; 1936 if (nitems >= pp->pr_cache_items) { 1937 if (pc->pc_prev != NULL) 1938 pool_cache_list_free(pp, pc, pc->pc_prev); 1939 1940 pc->pc_prev = pc->pc_actv; 1941 1942 pc->pc_actv = NULL; 1943 pc->pc_nactv = 0; 1944 nitems = 0; 1945 } 1946 1947 ci->ci_next = pc->pc_actv; 1948 ci->ci_nitems = ++nitems; 1949 #ifdef DIAGNOSTIC 1950 ci->ci_nitems |= poison ? POOL_CACHE_ITEM_NITEMS_POISON : 0; 1951 #endif 1952 pool_cache_item_magic(pp, ci); 1953 1954 pc->pc_actv = ci; 1955 pc->pc_nactv = nitems; 1956 1957 pc->pc_nput++; 1958 pc->pc_nout--; 1959 1960 pool_cache_leave(pp, pc, s); 1961 } 1962 1963 struct pool_cache_item * 1964 pool_cache_list_put(struct pool *pp, struct pool_cache_item *pl) 1965 { 1966 struct pool_cache_item *rpl, *next; 1967 1968 if (pl == NULL) 1969 return (NULL); 1970 1971 rpl = TAILQ_NEXT(pl, ci_nextl); 1972 1973 pl_enter(pp, &pp->pr_lock); 1974 do { 1975 next = pl->ci_next; 1976 pool_do_put(pp, pl); 1977 pl = next; 1978 } while (pl != NULL); 1979 pl_leave(pp, &pp->pr_lock); 1980 1981 return (rpl); 1982 } 1983 1984 void 1985 pool_cache_destroy(struct pool *pp) 1986 { 1987 struct pool_cache *pc; 1988 struct pool_cache_item *pl; 1989 struct cpumem_iter i; 1990 struct cpumem *cm; 1991 1992 rw_enter_write(&pool_lock); /* serialise with the gc */ 1993 cm = pp->pr_cache; 1994 pp->pr_cache = NULL; /* make pool_put avoid the cache */ 1995 rw_exit_write(&pool_lock); 1996 1997 CPUMEM_FOREACH(pc, &i, cm) { 1998 pool_cache_list_put(pp, pc->pc_actv); 1999 pool_cache_list_put(pp, pc->pc_prev); 2000 } 2001 2002 cpumem_put(&pool_caches, cm); 2003 2004 pl = TAILQ_FIRST(&pp->pr_cache_lists); 2005 while (pl != NULL) 2006 pl = pool_cache_list_put(pp, pl); 2007 } 2008 2009 void 2010 pool_cache_gc(struct pool *pp) 2011 { 2012 unsigned int contention, delta; 2013 2014 if (getnsecuptime() - pp->pr_cache_timestamp > POOL_WAIT_GC && 2015 !TAILQ_EMPTY(&pp->pr_cache_lists) && 2016 pl_enter_try(pp, &pp->pr_cache_lock)) { 2017 struct pool_cache_item *pl = NULL; 2018 2019 pl = TAILQ_FIRST(&pp->pr_cache_lists); 2020 if (pl != NULL) { 2021 TAILQ_REMOVE(&pp->pr_cache_lists, pl, ci_nextl); 2022 pp->pr_cache_nitems -= POOL_CACHE_ITEM_NITEMS(pl); 2023 pp->pr_cache_timestamp = getnsecuptime(); 2024 2025 pp->pr_cache_ngc++; 2026 } 2027 2028 pl_leave(pp, &pp->pr_cache_lock); 2029 2030 pool_cache_list_put(pp, pl); 2031 } 2032 2033 /* 2034 * if there's a lot of contention on the pr_cache_mtx then consider 2035 * growing the length of the list to reduce the need to access the 2036 * global pool. 2037 */ 2038 2039 contention = pp->pr_cache_contention; 2040 delta = contention - pp->pr_cache_contention_prev; 2041 if (delta > 8 /* magic */) { 2042 if ((ncpusfound * 8 * 2) <= pp->pr_cache_nitems) 2043 pp->pr_cache_items += 8; 2044 } else if (delta == 0) { 2045 if (pp->pr_cache_items > 8) 2046 pp->pr_cache_items--; 2047 } 2048 pp->pr_cache_contention_prev = contention; 2049 } 2050 2051 void 2052 pool_cache_pool_info(struct pool *pp, struct kinfo_pool *pi) 2053 { 2054 struct pool_cache *pc; 2055 struct cpumem_iter i; 2056 2057 if (pp->pr_cache == NULL) 2058 return; 2059 2060 /* loop through the caches twice to collect stats */ 2061 2062 /* once without the lock so we can yield while reading nget/nput */ 2063 CPUMEM_FOREACH(pc, &i, pp->pr_cache) { 2064 uint64_t gen, nget, nput; 2065 2066 do { 2067 while ((gen = pc->pc_gen) & 1) 2068 yield(); 2069 2070 nget = pc->pc_nget; 2071 nput = pc->pc_nput; 2072 } while (gen != pc->pc_gen); 2073 2074 pi->pr_nget += nget; 2075 pi->pr_nput += nput; 2076 } 2077 2078 /* and once with the mtx so we can get consistent nout values */ 2079 pl_enter(pp, &pp->pr_cache_lock); 2080 CPUMEM_FOREACH(pc, &i, pp->pr_cache) 2081 pi->pr_nout += pc->pc_nout; 2082 2083 pi->pr_nout += pp->pr_cache_nout; 2084 pl_leave(pp, &pp->pr_cache_lock); 2085 } 2086 2087 int 2088 pool_cache_info(struct pool *pp, void *oldp, size_t *oldlenp) 2089 { 2090 struct kinfo_pool_cache kpc; 2091 2092 if (pp->pr_cache == NULL) 2093 return (EOPNOTSUPP); 2094 2095 memset(&kpc, 0, sizeof(kpc)); /* don't leak padding */ 2096 2097 pl_enter(pp, &pp->pr_cache_lock); 2098 kpc.pr_ngc = pp->pr_cache_ngc; 2099 kpc.pr_len = pp->pr_cache_items; 2100 kpc.pr_nitems = pp->pr_cache_nitems; 2101 kpc.pr_contention = pp->pr_cache_contention; 2102 pl_leave(pp, &pp->pr_cache_lock); 2103 2104 return (sysctl_rdstruct(oldp, oldlenp, NULL, &kpc, sizeof(kpc))); 2105 } 2106 2107 int 2108 pool_cache_cpus_info(struct pool *pp, void *oldp, size_t *oldlenp) 2109 { 2110 struct pool_cache *pc; 2111 struct kinfo_pool_cache_cpu *kpcc, *info; 2112 unsigned int cpu = 0; 2113 struct cpumem_iter i; 2114 int error = 0; 2115 size_t len; 2116 2117 if (pp->pr_cache == NULL) 2118 return (EOPNOTSUPP); 2119 if (*oldlenp % sizeof(*kpcc)) 2120 return (EINVAL); 2121 2122 kpcc = mallocarray(ncpusfound, sizeof(*kpcc), M_TEMP, 2123 M_WAITOK|M_CANFAIL|M_ZERO); 2124 if (kpcc == NULL) 2125 return (EIO); 2126 2127 len = ncpusfound * sizeof(*kpcc); 2128 2129 CPUMEM_FOREACH(pc, &i, pp->pr_cache) { 2130 uint64_t gen; 2131 2132 if (cpu >= ncpusfound) { 2133 error = EIO; 2134 goto err; 2135 } 2136 2137 info = &kpcc[cpu]; 2138 info->pr_cpu = cpu; 2139 2140 do { 2141 while ((gen = pc->pc_gen) & 1) 2142 yield(); 2143 2144 info->pr_nget = pc->pc_nget; 2145 info->pr_nfail = pc->pc_nfail; 2146 info->pr_nput = pc->pc_nput; 2147 info->pr_nlget = pc->pc_nlget; 2148 info->pr_nlfail = pc->pc_nlfail; 2149 info->pr_nlput = pc->pc_nlput; 2150 } while (gen != pc->pc_gen); 2151 2152 cpu++; 2153 } 2154 2155 error = sysctl_rdstruct(oldp, oldlenp, NULL, kpcc, len); 2156 err: 2157 free(kpcc, M_TEMP, len); 2158 2159 return (error); 2160 } 2161 #else /* MULTIPROCESSOR */ 2162 void 2163 pool_cache_init(struct pool *pp) 2164 { 2165 /* nop */ 2166 } 2167 2168 void 2169 pool_cache_pool_info(struct pool *pp, struct kinfo_pool *pi) 2170 { 2171 /* nop */ 2172 } 2173 2174 int 2175 pool_cache_info(struct pool *pp, void *oldp, size_t *oldlenp) 2176 { 2177 return (EOPNOTSUPP); 2178 } 2179 2180 int 2181 pool_cache_cpus_info(struct pool *pp, void *oldp, size_t *oldlenp) 2182 { 2183 return (EOPNOTSUPP); 2184 } 2185 #endif /* MULTIPROCESSOR */ 2186 2187 2188 void 2189 pool_lock_mtx_init(struct pool *pp, union pool_lock *lock, 2190 const struct lock_type *type) 2191 { 2192 _mtx_init_flags(&lock->prl_mtx, pp->pr_ipl, pp->pr_wchan, 0, type); 2193 } 2194 2195 void 2196 pool_lock_mtx_enter(union pool_lock *lock) 2197 { 2198 mtx_enter(&lock->prl_mtx); 2199 } 2200 2201 int 2202 pool_lock_mtx_enter_try(union pool_lock *lock) 2203 { 2204 return (mtx_enter_try(&lock->prl_mtx)); 2205 } 2206 2207 void 2208 pool_lock_mtx_leave(union pool_lock *lock) 2209 { 2210 mtx_leave(&lock->prl_mtx); 2211 } 2212 2213 void 2214 pool_lock_mtx_assert_locked(union pool_lock *lock) 2215 { 2216 MUTEX_ASSERT_LOCKED(&lock->prl_mtx); 2217 } 2218 2219 void 2220 pool_lock_mtx_assert_unlocked(union pool_lock *lock) 2221 { 2222 MUTEX_ASSERT_UNLOCKED(&lock->prl_mtx); 2223 } 2224 2225 int 2226 pool_lock_mtx_sleep(void *ident, union pool_lock *lock, int priority, 2227 const char *wmesg) 2228 { 2229 return msleep_nsec(ident, &lock->prl_mtx, priority, wmesg, INFSLP); 2230 } 2231 2232 static const struct pool_lock_ops pool_lock_ops_mtx = { 2233 pool_lock_mtx_init, 2234 pool_lock_mtx_enter, 2235 pool_lock_mtx_enter_try, 2236 pool_lock_mtx_leave, 2237 pool_lock_mtx_assert_locked, 2238 pool_lock_mtx_assert_unlocked, 2239 pool_lock_mtx_sleep, 2240 }; 2241 2242 void 2243 pool_lock_rw_init(struct pool *pp, union pool_lock *lock, 2244 const struct lock_type *type) 2245 { 2246 _rw_init_flags(&lock->prl_rwlock, pp->pr_wchan, 0, type); 2247 } 2248 2249 void 2250 pool_lock_rw_enter(union pool_lock *lock) 2251 { 2252 rw_enter_write(&lock->prl_rwlock); 2253 } 2254 2255 int 2256 pool_lock_rw_enter_try(union pool_lock *lock) 2257 { 2258 return (rw_enter(&lock->prl_rwlock, RW_WRITE | RW_NOSLEEP) == 0); 2259 } 2260 2261 void 2262 pool_lock_rw_leave(union pool_lock *lock) 2263 { 2264 rw_exit_write(&lock->prl_rwlock); 2265 } 2266 2267 void 2268 pool_lock_rw_assert_locked(union pool_lock *lock) 2269 { 2270 rw_assert_wrlock(&lock->prl_rwlock); 2271 } 2272 2273 void 2274 pool_lock_rw_assert_unlocked(union pool_lock *lock) 2275 { 2276 KASSERT(rw_status(&lock->prl_rwlock) != RW_WRITE); 2277 } 2278 2279 int 2280 pool_lock_rw_sleep(void *ident, union pool_lock *lock, int priority, 2281 const char *wmesg) 2282 { 2283 return rwsleep_nsec(ident, &lock->prl_rwlock, priority, wmesg, INFSLP); 2284 } 2285 2286 static const struct pool_lock_ops pool_lock_ops_rw = { 2287 pool_lock_rw_init, 2288 pool_lock_rw_enter, 2289 pool_lock_rw_enter_try, 2290 pool_lock_rw_leave, 2291 pool_lock_rw_assert_locked, 2292 pool_lock_rw_assert_unlocked, 2293 pool_lock_rw_sleep, 2294 }; 2295