1 /* $OpenBSD: subr_pool.c,v 1.221 2018/01/18 18:08:51 bluhm Exp $ */ 2 /* $NetBSD: subr_pool.c,v 1.61 2001/09/26 07:14:56 chs Exp $ */ 3 4 /*- 5 * Copyright (c) 1997, 1999, 2000 The NetBSD Foundation, Inc. 6 * All rights reserved. 7 * 8 * This code is derived from software contributed to The NetBSD Foundation 9 * by Paul Kranenburg; by Jason R. Thorpe of the Numerical Aerospace 10 * Simulation Facility, NASA Ames Research Center. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 23 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 24 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 25 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31 * POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include <sys/param.h> 35 #include <sys/systm.h> 36 #include <sys/errno.h> 37 #include <sys/kernel.h> 38 #include <sys/malloc.h> 39 #include <sys/pool.h> 40 #include <sys/proc.h> 41 #include <sys/syslog.h> 42 #include <sys/sysctl.h> 43 #include <sys/task.h> 44 #include <sys/timeout.h> 45 #include <sys/percpu.h> 46 47 #include <uvm/uvm_extern.h> 48 49 /* 50 * Pool resource management utility. 51 * 52 * Memory is allocated in pages which are split into pieces according to 53 * the pool item size. Each page is kept on one of three lists in the 54 * pool structure: `pr_emptypages', `pr_fullpages' and `pr_partpages', 55 * for empty, full and partially-full pages respectively. The individual 56 * pool items are on a linked list headed by `ph_items' in each page 57 * header. The memory for building the page list is either taken from 58 * the allocated pages themselves (for small pool items) or taken from 59 * an internal pool of page headers (`phpool'). 60 */ 61 62 /* List of all pools */ 63 SIMPLEQ_HEAD(,pool) pool_head = SIMPLEQ_HEAD_INITIALIZER(pool_head); 64 65 /* 66 * Every pool gets a unique serial number assigned to it. If this counter 67 * wraps, we're screwed, but we shouldn't create so many pools anyway. 68 */ 69 unsigned int pool_serial; 70 unsigned int pool_count; 71 72 /* Lock the previous variables making up the global pool state */ 73 struct rwlock pool_lock = RWLOCK_INITIALIZER("pools"); 74 75 /* Private pool for page header structures */ 76 struct pool phpool; 77 78 struct pool_lock_ops { 79 void (*pl_init)(struct pool *, union pool_lock *, 80 struct lock_type *); 81 void (*pl_enter)(union pool_lock * LOCK_FL_VARS); 82 int (*pl_enter_try)(union pool_lock * LOCK_FL_VARS); 83 void (*pl_leave)(union pool_lock * LOCK_FL_VARS); 84 void (*pl_assert_locked)(union pool_lock *); 85 void (*pl_assert_unlocked)(union pool_lock *); 86 int (*pl_sleep)(void *, union pool_lock *, int, const char *, int); 87 }; 88 89 static const struct pool_lock_ops pool_lock_ops_mtx; 90 static const struct pool_lock_ops pool_lock_ops_rw; 91 92 #ifdef WITNESS 93 #define pl_init(pp, pl) do { \ 94 static struct lock_type __lock_type = { .lt_name = #pl }; \ 95 (pp)->pr_lock_ops->pl_init(pp, pl, &__lock_type); \ 96 } while (0) 97 #else /* WITNESS */ 98 #define pl_init(pp, pl) (pp)->pr_lock_ops->pl_init(pp, pl, NULL) 99 #endif /* WITNESS */ 100 101 static inline void 102 pl_enter(struct pool *pp, union pool_lock *pl LOCK_FL_VARS) 103 { 104 pp->pr_lock_ops->pl_enter(pl LOCK_FL_ARGS); 105 } 106 static inline int 107 pl_enter_try(struct pool *pp, union pool_lock *pl LOCK_FL_VARS) 108 { 109 return pp->pr_lock_ops->pl_enter_try(pl LOCK_FL_ARGS); 110 } 111 static inline void 112 pl_leave(struct pool *pp, union pool_lock *pl LOCK_FL_VARS) 113 { 114 pp->pr_lock_ops->pl_leave(pl LOCK_FL_ARGS); 115 } 116 static inline void 117 pl_assert_locked(struct pool *pp, union pool_lock *pl) 118 { 119 pp->pr_lock_ops->pl_assert_locked(pl); 120 } 121 static inline void 122 pl_assert_unlocked(struct pool *pp, union pool_lock *pl) 123 { 124 pp->pr_lock_ops->pl_assert_unlocked(pl); 125 } 126 static inline int 127 pl_sleep(struct pool *pp, void *ident, union pool_lock *lock, int priority, 128 const char *wmesg, int timo) 129 { 130 return pp->pr_lock_ops->pl_sleep(ident, lock, priority, wmesg, timo); 131 } 132 133 #ifdef WITNESS 134 # define pl_enter(pp,pl) pl_enter(pp,pl LOCK_FILE_LINE) 135 # define pl_enter_try(pp,pl) pl_enter_try(pp,pl LOCK_FILE_LINE) 136 # define pl_leave(pp,pl) pl_leave(pp,pl LOCK_FILE_LINE) 137 #endif 138 139 struct pool_item { 140 u_long pi_magic; 141 XSIMPLEQ_ENTRY(pool_item) pi_list; 142 }; 143 #define POOL_IMAGIC(ph, pi) ((u_long)(pi) ^ (ph)->ph_magic) 144 145 struct pool_page_header { 146 /* Page headers */ 147 TAILQ_ENTRY(pool_page_header) 148 ph_entry; /* pool page list */ 149 XSIMPLEQ_HEAD(, pool_item) 150 ph_items; /* free items on the page */ 151 RBT_ENTRY(pool_page_header) 152 ph_node; /* off-page page headers */ 153 unsigned int ph_nmissing; /* # of chunks in use */ 154 caddr_t ph_page; /* this page's address */ 155 caddr_t ph_colored; /* page's colored address */ 156 unsigned long ph_magic; 157 int ph_tick; 158 }; 159 #define POOL_MAGICBIT (1 << 3) /* keep away from perturbed low bits */ 160 #define POOL_PHPOISON(ph) ISSET((ph)->ph_magic, POOL_MAGICBIT) 161 162 #ifdef MULTIPROCESSOR 163 struct pool_cache_item { 164 struct pool_cache_item *ci_next; /* next item in list */ 165 unsigned long ci_nitems; /* number of items in list */ 166 TAILQ_ENTRY(pool_cache_item) 167 ci_nextl; /* entry in list of lists */ 168 }; 169 170 /* we store whether the cached item is poisoned in the high bit of nitems */ 171 #define POOL_CACHE_ITEM_NITEMS_MASK 0x7ffffffUL 172 #define POOL_CACHE_ITEM_NITEMS_POISON 0x8000000UL 173 174 #define POOL_CACHE_ITEM_NITEMS(_ci) \ 175 ((_ci)->ci_nitems & POOL_CACHE_ITEM_NITEMS_MASK) 176 177 #define POOL_CACHE_ITEM_POISONED(_ci) \ 178 ISSET((_ci)->ci_nitems, POOL_CACHE_ITEM_NITEMS_POISON) 179 180 struct pool_cache { 181 struct pool_cache_item *pc_actv; /* active list of items */ 182 unsigned long pc_nactv; /* actv head nitems cache */ 183 struct pool_cache_item *pc_prev; /* previous list of items */ 184 185 uint64_t pc_gen; /* generation number */ 186 uint64_t pc_nget; /* # of successful requests */ 187 uint64_t pc_nfail; /* # of unsuccessful reqs */ 188 uint64_t pc_nput; /* # of releases */ 189 uint64_t pc_nlget; /* # of list requests */ 190 uint64_t pc_nlfail; /* # of fails getting a list */ 191 uint64_t pc_nlput; /* # of list releases */ 192 193 int pc_nout; 194 }; 195 196 void *pool_cache_get(struct pool *); 197 void pool_cache_put(struct pool *, void *); 198 void pool_cache_destroy(struct pool *); 199 void pool_cache_gc(struct pool *); 200 #endif 201 void pool_cache_pool_info(struct pool *, struct kinfo_pool *); 202 int pool_cache_info(struct pool *, void *, size_t *); 203 int pool_cache_cpus_info(struct pool *, void *, size_t *); 204 205 #ifdef POOL_DEBUG 206 int pool_debug = 1; 207 #else 208 int pool_debug = 0; 209 #endif 210 211 #define POOL_INPGHDR(pp) ((pp)->pr_phoffset != 0) 212 213 struct pool_page_header * 214 pool_p_alloc(struct pool *, int, int *); 215 void pool_p_insert(struct pool *, struct pool_page_header *); 216 void pool_p_remove(struct pool *, struct pool_page_header *); 217 void pool_p_free(struct pool *, struct pool_page_header *); 218 219 void pool_update_curpage(struct pool *); 220 void *pool_do_get(struct pool *, int, int *); 221 void pool_do_put(struct pool *, void *); 222 int pool_chk_page(struct pool *, struct pool_page_header *, int); 223 int pool_chk(struct pool *); 224 void pool_get_done(struct pool *, void *, void *); 225 void pool_runqueue(struct pool *, int); 226 227 void *pool_allocator_alloc(struct pool *, int, int *); 228 void pool_allocator_free(struct pool *, void *); 229 230 /* 231 * The default pool allocator. 232 */ 233 void *pool_page_alloc(struct pool *, int, int *); 234 void pool_page_free(struct pool *, void *); 235 236 /* 237 * safe for interrupts; this is the default allocator 238 */ 239 struct pool_allocator pool_allocator_single = { 240 pool_page_alloc, 241 pool_page_free, 242 POOL_ALLOC_SIZE(PAGE_SIZE, POOL_ALLOC_ALIGNED) 243 }; 244 245 void *pool_multi_alloc(struct pool *, int, int *); 246 void pool_multi_free(struct pool *, void *); 247 248 struct pool_allocator pool_allocator_multi = { 249 pool_multi_alloc, 250 pool_multi_free, 251 POOL_ALLOC_SIZES(PAGE_SIZE, (1UL << 31), POOL_ALLOC_ALIGNED) 252 }; 253 254 void *pool_multi_alloc_ni(struct pool *, int, int *); 255 void pool_multi_free_ni(struct pool *, void *); 256 257 struct pool_allocator pool_allocator_multi_ni = { 258 pool_multi_alloc_ni, 259 pool_multi_free_ni, 260 POOL_ALLOC_SIZES(PAGE_SIZE, (1UL << 31), POOL_ALLOC_ALIGNED) 261 }; 262 263 #ifdef DDB 264 void pool_print_pagelist(struct pool_pagelist *, int (*)(const char *, ...) 265 __attribute__((__format__(__kprintf__,1,2)))); 266 void pool_print1(struct pool *, const char *, int (*)(const char *, ...) 267 __attribute__((__format__(__kprintf__,1,2)))); 268 #endif 269 270 /* stale page garbage collectors */ 271 void pool_gc_sched(void *); 272 struct timeout pool_gc_tick = TIMEOUT_INITIALIZER(pool_gc_sched, NULL); 273 void pool_gc_pages(void *); 274 struct task pool_gc_task = TASK_INITIALIZER(pool_gc_pages, NULL); 275 int pool_wait_free = 1; 276 int pool_wait_gc = 8; 277 278 RBT_PROTOTYPE(phtree, pool_page_header, ph_node, phtree_compare); 279 280 static inline int 281 phtree_compare(const struct pool_page_header *a, 282 const struct pool_page_header *b) 283 { 284 vaddr_t va = (vaddr_t)a->ph_page; 285 vaddr_t vb = (vaddr_t)b->ph_page; 286 287 /* the compares in this order are important for the NFIND to work */ 288 if (vb < va) 289 return (-1); 290 if (vb > va) 291 return (1); 292 293 return (0); 294 } 295 296 RBT_GENERATE(phtree, pool_page_header, ph_node, phtree_compare); 297 298 /* 299 * Return the pool page header based on page address. 300 */ 301 static inline struct pool_page_header * 302 pr_find_pagehead(struct pool *pp, void *v) 303 { 304 struct pool_page_header *ph, key; 305 306 if (POOL_INPGHDR(pp)) { 307 caddr_t page; 308 309 page = (caddr_t)((vaddr_t)v & pp->pr_pgmask); 310 311 return ((struct pool_page_header *)(page + pp->pr_phoffset)); 312 } 313 314 key.ph_page = v; 315 ph = RBT_NFIND(phtree, &pp->pr_phtree, &key); 316 if (ph == NULL) 317 panic("%s: %s: page header missing", __func__, pp->pr_wchan); 318 319 KASSERT(ph->ph_page <= (caddr_t)v); 320 if (ph->ph_page + pp->pr_pgsize <= (caddr_t)v) 321 panic("%s: %s: incorrect page", __func__, pp->pr_wchan); 322 323 return (ph); 324 } 325 326 /* 327 * Initialize the given pool resource structure. 328 * 329 * We export this routine to allow other kernel parts to declare 330 * static pools that must be initialized before malloc() is available. 331 */ 332 void 333 pool_init(struct pool *pp, size_t size, u_int align, int ipl, int flags, 334 const char *wchan, struct pool_allocator *palloc) 335 { 336 int off = 0, space; 337 unsigned int pgsize = PAGE_SIZE, items; 338 size_t pa_pagesz; 339 #ifdef DIAGNOSTIC 340 struct pool *iter; 341 #endif 342 343 if (align == 0) 344 align = ALIGN(1); 345 346 if (size < sizeof(struct pool_item)) 347 size = sizeof(struct pool_item); 348 349 size = roundup(size, align); 350 351 while (size * 8 > pgsize) 352 pgsize <<= 1; 353 354 if (palloc == NULL) { 355 if (pgsize > PAGE_SIZE) { 356 palloc = ISSET(flags, PR_WAITOK) ? 357 &pool_allocator_multi_ni : &pool_allocator_multi; 358 } else 359 palloc = &pool_allocator_single; 360 361 pa_pagesz = palloc->pa_pagesz; 362 } else { 363 size_t pgsizes; 364 365 pa_pagesz = palloc->pa_pagesz; 366 if (pa_pagesz == 0) 367 pa_pagesz = POOL_ALLOC_DEFAULT; 368 369 pgsizes = pa_pagesz & ~POOL_ALLOC_ALIGNED; 370 371 /* make sure the allocator can fit at least one item */ 372 if (size > pgsizes) { 373 panic("%s: pool %s item size 0x%zx > " 374 "allocator %p sizes 0x%zx", __func__, wchan, 375 size, palloc, pgsizes); 376 } 377 378 /* shrink pgsize until it fits into the range */ 379 while (!ISSET(pgsizes, pgsize)) 380 pgsize >>= 1; 381 } 382 KASSERT(ISSET(pa_pagesz, pgsize)); 383 384 items = pgsize / size; 385 386 /* 387 * Decide whether to put the page header off page to avoid 388 * wasting too large a part of the page. Off-page page headers 389 * go into an RB tree, so we can match a returned item with 390 * its header based on the page address. 391 */ 392 if (ISSET(pa_pagesz, POOL_ALLOC_ALIGNED)) { 393 if (pgsize - (size * items) > 394 sizeof(struct pool_page_header)) { 395 off = pgsize - sizeof(struct pool_page_header); 396 } else if (sizeof(struct pool_page_header) * 2 >= size) { 397 off = pgsize - sizeof(struct pool_page_header); 398 items = off / size; 399 } 400 } 401 402 KASSERT(items > 0); 403 404 /* 405 * Initialize the pool structure. 406 */ 407 memset(pp, 0, sizeof(*pp)); 408 if (ISSET(flags, PR_RWLOCK)) { 409 KASSERT(flags & PR_WAITOK); 410 pp->pr_lock_ops = &pool_lock_ops_rw; 411 } else 412 pp->pr_lock_ops = &pool_lock_ops_mtx; 413 TAILQ_INIT(&pp->pr_emptypages); 414 TAILQ_INIT(&pp->pr_fullpages); 415 TAILQ_INIT(&pp->pr_partpages); 416 pp->pr_curpage = NULL; 417 pp->pr_npages = 0; 418 pp->pr_minitems = 0; 419 pp->pr_minpages = 0; 420 pp->pr_maxpages = 8; 421 pp->pr_size = size; 422 pp->pr_pgsize = pgsize; 423 pp->pr_pgmask = ~0UL ^ (pgsize - 1); 424 pp->pr_phoffset = off; 425 pp->pr_itemsperpage = items; 426 pp->pr_wchan = wchan; 427 pp->pr_alloc = palloc; 428 pp->pr_nitems = 0; 429 pp->pr_nout = 0; 430 pp->pr_hardlimit = UINT_MAX; 431 pp->pr_hardlimit_warning = NULL; 432 pp->pr_hardlimit_ratecap.tv_sec = 0; 433 pp->pr_hardlimit_ratecap.tv_usec = 0; 434 pp->pr_hardlimit_warning_last.tv_sec = 0; 435 pp->pr_hardlimit_warning_last.tv_usec = 0; 436 RBT_INIT(phtree, &pp->pr_phtree); 437 438 /* 439 * Use the space between the chunks and the page header 440 * for cache coloring. 441 */ 442 space = POOL_INPGHDR(pp) ? pp->pr_phoffset : pp->pr_pgsize; 443 space -= pp->pr_itemsperpage * pp->pr_size; 444 pp->pr_align = align; 445 pp->pr_maxcolors = (space / align) + 1; 446 447 pp->pr_nget = 0; 448 pp->pr_nfail = 0; 449 pp->pr_nput = 0; 450 pp->pr_npagealloc = 0; 451 pp->pr_npagefree = 0; 452 pp->pr_hiwat = 0; 453 pp->pr_nidle = 0; 454 455 pp->pr_ipl = ipl; 456 pp->pr_flags = flags; 457 458 pl_init(pp, &pp->pr_lock); 459 pl_init(pp, &pp->pr_requests_lock); 460 TAILQ_INIT(&pp->pr_requests); 461 462 if (phpool.pr_size == 0) { 463 pool_init(&phpool, sizeof(struct pool_page_header), 0, 464 IPL_HIGH, 0, "phpool", NULL); 465 466 /* make sure phpool wont "recurse" */ 467 KASSERT(POOL_INPGHDR(&phpool)); 468 } 469 470 /* pglistalloc/constraint parameters */ 471 pp->pr_crange = &kp_dirty; 472 473 /* Insert this into the list of all pools. */ 474 rw_enter_write(&pool_lock); 475 #ifdef DIAGNOSTIC 476 SIMPLEQ_FOREACH(iter, &pool_head, pr_poollist) { 477 if (iter == pp) 478 panic("%s: pool %s already on list", __func__, wchan); 479 } 480 #endif 481 482 pp->pr_serial = ++pool_serial; 483 if (pool_serial == 0) 484 panic("%s: too much uptime", __func__); 485 486 SIMPLEQ_INSERT_HEAD(&pool_head, pp, pr_poollist); 487 pool_count++; 488 rw_exit_write(&pool_lock); 489 } 490 491 /* 492 * Decommission a pool resource. 493 */ 494 void 495 pool_destroy(struct pool *pp) 496 { 497 struct pool_page_header *ph; 498 struct pool *prev, *iter; 499 500 #ifdef MULTIPROCESSOR 501 if (pp->pr_cache != NULL) 502 pool_cache_destroy(pp); 503 #endif 504 505 #ifdef DIAGNOSTIC 506 if (pp->pr_nout != 0) 507 panic("%s: pool busy: still out: %u", __func__, pp->pr_nout); 508 #endif 509 510 /* Remove from global pool list */ 511 rw_enter_write(&pool_lock); 512 pool_count--; 513 if (pp == SIMPLEQ_FIRST(&pool_head)) 514 SIMPLEQ_REMOVE_HEAD(&pool_head, pr_poollist); 515 else { 516 prev = SIMPLEQ_FIRST(&pool_head); 517 SIMPLEQ_FOREACH(iter, &pool_head, pr_poollist) { 518 if (iter == pp) { 519 SIMPLEQ_REMOVE_AFTER(&pool_head, prev, 520 pr_poollist); 521 break; 522 } 523 prev = iter; 524 } 525 } 526 rw_exit_write(&pool_lock); 527 528 /* Remove all pages */ 529 while ((ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL) { 530 pl_enter(pp, &pp->pr_lock); 531 pool_p_remove(pp, ph); 532 pl_leave(pp, &pp->pr_lock); 533 pool_p_free(pp, ph); 534 } 535 KASSERT(TAILQ_EMPTY(&pp->pr_fullpages)); 536 KASSERT(TAILQ_EMPTY(&pp->pr_partpages)); 537 } 538 539 void 540 pool_request_init(struct pool_request *pr, 541 void (*handler)(struct pool *, void *, void *), void *cookie) 542 { 543 pr->pr_handler = handler; 544 pr->pr_cookie = cookie; 545 pr->pr_item = NULL; 546 } 547 548 void 549 pool_request(struct pool *pp, struct pool_request *pr) 550 { 551 pl_enter(pp, &pp->pr_requests_lock); 552 TAILQ_INSERT_TAIL(&pp->pr_requests, pr, pr_entry); 553 pool_runqueue(pp, PR_NOWAIT); 554 pl_leave(pp, &pp->pr_requests_lock); 555 } 556 557 struct pool_get_memory { 558 union pool_lock lock; 559 void * volatile v; 560 }; 561 562 /* 563 * Grab an item from the pool. 564 */ 565 void * 566 pool_get(struct pool *pp, int flags) 567 { 568 void *v = NULL; 569 int slowdown = 0; 570 571 KASSERT(flags & (PR_WAITOK | PR_NOWAIT)); 572 if (pp->pr_flags & PR_RWLOCK) 573 KASSERT(flags & PR_WAITOK); 574 575 #ifdef MULTIPROCESSOR 576 if (pp->pr_cache != NULL) { 577 v = pool_cache_get(pp); 578 if (v != NULL) 579 goto good; 580 } 581 #endif 582 583 pl_enter(pp, &pp->pr_lock); 584 if (pp->pr_nout >= pp->pr_hardlimit) { 585 if (ISSET(flags, PR_NOWAIT|PR_LIMITFAIL)) 586 goto fail; 587 } else if ((v = pool_do_get(pp, flags, &slowdown)) == NULL) { 588 if (ISSET(flags, PR_NOWAIT)) 589 goto fail; 590 } 591 pl_leave(pp, &pp->pr_lock); 592 593 if ((slowdown || pool_debug == 2) && ISSET(flags, PR_WAITOK)) 594 yield(); 595 596 if (v == NULL) { 597 struct pool_get_memory mem = { .v = NULL }; 598 struct pool_request pr; 599 600 #ifdef DIAGNOSTIC 601 if (ISSET(flags, PR_WAITOK) && curproc == &proc0) 602 panic("%s: cannot sleep for memory during boot", 603 __func__); 604 #endif 605 pl_init(pp, &mem.lock); 606 pool_request_init(&pr, pool_get_done, &mem); 607 pool_request(pp, &pr); 608 609 pl_enter(pp, &mem.lock); 610 while (mem.v == NULL) 611 pl_sleep(pp, &mem, &mem.lock, PSWP, pp->pr_wchan, 0); 612 pl_leave(pp, &mem.lock); 613 614 v = mem.v; 615 } 616 617 #ifdef MULTIPROCESSOR 618 good: 619 #endif 620 if (ISSET(flags, PR_ZERO)) 621 memset(v, 0, pp->pr_size); 622 623 return (v); 624 625 fail: 626 pp->pr_nfail++; 627 pl_leave(pp, &pp->pr_lock); 628 return (NULL); 629 } 630 631 void 632 pool_get_done(struct pool *pp, void *xmem, void *v) 633 { 634 struct pool_get_memory *mem = xmem; 635 636 pl_enter(pp, &mem->lock); 637 mem->v = v; 638 pl_leave(pp, &mem->lock); 639 640 wakeup_one(mem); 641 } 642 643 void 644 pool_runqueue(struct pool *pp, int flags) 645 { 646 struct pool_requests prl = TAILQ_HEAD_INITIALIZER(prl); 647 struct pool_request *pr; 648 649 pl_assert_unlocked(pp, &pp->pr_lock); 650 pl_assert_locked(pp, &pp->pr_requests_lock); 651 652 if (pp->pr_requesting++) 653 return; 654 655 do { 656 pp->pr_requesting = 1; 657 658 /* no TAILQ_JOIN? :( */ 659 while ((pr = TAILQ_FIRST(&pp->pr_requests)) != NULL) { 660 TAILQ_REMOVE(&pp->pr_requests, pr, pr_entry); 661 TAILQ_INSERT_TAIL(&prl, pr, pr_entry); 662 } 663 if (TAILQ_EMPTY(&prl)) 664 continue; 665 666 pl_leave(pp, &pp->pr_requests_lock); 667 668 pl_enter(pp, &pp->pr_lock); 669 pr = TAILQ_FIRST(&prl); 670 while (pr != NULL) { 671 int slowdown = 0; 672 673 if (pp->pr_nout >= pp->pr_hardlimit) 674 break; 675 676 pr->pr_item = pool_do_get(pp, flags, &slowdown); 677 if (pr->pr_item == NULL) /* || slowdown ? */ 678 break; 679 680 pr = TAILQ_NEXT(pr, pr_entry); 681 } 682 pl_leave(pp, &pp->pr_lock); 683 684 while ((pr = TAILQ_FIRST(&prl)) != NULL && 685 pr->pr_item != NULL) { 686 TAILQ_REMOVE(&prl, pr, pr_entry); 687 (*pr->pr_handler)(pp, pr->pr_cookie, pr->pr_item); 688 } 689 690 pl_enter(pp, &pp->pr_requests_lock); 691 } while (--pp->pr_requesting); 692 693 /* no TAILQ_JOIN :( */ 694 while ((pr = TAILQ_FIRST(&prl)) != NULL) { 695 TAILQ_REMOVE(&prl, pr, pr_entry); 696 TAILQ_INSERT_TAIL(&pp->pr_requests, pr, pr_entry); 697 } 698 } 699 700 void * 701 pool_do_get(struct pool *pp, int flags, int *slowdown) 702 { 703 struct pool_item *pi; 704 struct pool_page_header *ph; 705 706 pl_assert_locked(pp, &pp->pr_lock); 707 708 splassert(pp->pr_ipl); 709 710 /* 711 * Account for this item now to avoid races if we need to give up 712 * pr_lock to allocate a page. 713 */ 714 pp->pr_nout++; 715 716 if (pp->pr_curpage == NULL) { 717 pl_leave(pp, &pp->pr_lock); 718 ph = pool_p_alloc(pp, flags, slowdown); 719 pl_enter(pp, &pp->pr_lock); 720 721 if (ph == NULL) { 722 pp->pr_nout--; 723 return (NULL); 724 } 725 726 pool_p_insert(pp, ph); 727 } 728 729 ph = pp->pr_curpage; 730 pi = XSIMPLEQ_FIRST(&ph->ph_items); 731 if (__predict_false(pi == NULL)) 732 panic("%s: %s: page empty", __func__, pp->pr_wchan); 733 734 if (__predict_false(pi->pi_magic != POOL_IMAGIC(ph, pi))) { 735 panic("%s: %s free list modified: " 736 "page %p; item addr %p; offset 0x%x=0x%lx != 0x%lx", 737 __func__, pp->pr_wchan, ph->ph_page, pi, 738 0, pi->pi_magic, POOL_IMAGIC(ph, pi)); 739 } 740 741 XSIMPLEQ_REMOVE_HEAD(&ph->ph_items, pi_list); 742 743 #ifdef DIAGNOSTIC 744 if (pool_debug && POOL_PHPOISON(ph)) { 745 size_t pidx; 746 uint32_t pval; 747 if (poison_check(pi + 1, pp->pr_size - sizeof(*pi), 748 &pidx, &pval)) { 749 int *ip = (int *)(pi + 1); 750 panic("%s: %s free list modified: " 751 "page %p; item addr %p; offset 0x%zx=0x%x", 752 __func__, pp->pr_wchan, ph->ph_page, pi, 753 (pidx * sizeof(int)) + sizeof(*pi), ip[pidx]); 754 } 755 } 756 #endif /* DIAGNOSTIC */ 757 758 if (ph->ph_nmissing++ == 0) { 759 /* 760 * This page was previously empty. Move it to the list of 761 * partially-full pages. This page is already curpage. 762 */ 763 TAILQ_REMOVE(&pp->pr_emptypages, ph, ph_entry); 764 TAILQ_INSERT_TAIL(&pp->pr_partpages, ph, ph_entry); 765 766 pp->pr_nidle--; 767 } 768 769 if (ph->ph_nmissing == pp->pr_itemsperpage) { 770 /* 771 * This page is now full. Move it to the full list 772 * and select a new current page. 773 */ 774 TAILQ_REMOVE(&pp->pr_partpages, ph, ph_entry); 775 TAILQ_INSERT_TAIL(&pp->pr_fullpages, ph, ph_entry); 776 pool_update_curpage(pp); 777 } 778 779 pp->pr_nget++; 780 781 return (pi); 782 } 783 784 /* 785 * Return resource to the pool. 786 */ 787 void 788 pool_put(struct pool *pp, void *v) 789 { 790 struct pool_page_header *ph, *freeph = NULL; 791 792 #ifdef DIAGNOSTIC 793 if (v == NULL) 794 panic("%s: NULL item", __func__); 795 #endif 796 797 #ifdef MULTIPROCESSOR 798 if (pp->pr_cache != NULL && TAILQ_EMPTY(&pp->pr_requests)) { 799 pool_cache_put(pp, v); 800 return; 801 } 802 #endif 803 804 pl_enter(pp, &pp->pr_lock); 805 806 pool_do_put(pp, v); 807 808 pp->pr_nout--; 809 pp->pr_nput++; 810 811 /* is it time to free a page? */ 812 if (pp->pr_nidle > pp->pr_maxpages && 813 (ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL && 814 (ticks - ph->ph_tick) > (hz * pool_wait_free)) { 815 freeph = ph; 816 pool_p_remove(pp, freeph); 817 } 818 819 pl_leave(pp, &pp->pr_lock); 820 821 if (freeph != NULL) 822 pool_p_free(pp, freeph); 823 824 if (!TAILQ_EMPTY(&pp->pr_requests)) { 825 pl_enter(pp, &pp->pr_requests_lock); 826 pool_runqueue(pp, PR_NOWAIT); 827 pl_leave(pp, &pp->pr_requests_lock); 828 } 829 } 830 831 void 832 pool_do_put(struct pool *pp, void *v) 833 { 834 struct pool_item *pi = v; 835 struct pool_page_header *ph; 836 837 splassert(pp->pr_ipl); 838 839 ph = pr_find_pagehead(pp, v); 840 841 #ifdef DIAGNOSTIC 842 if (pool_debug) { 843 struct pool_item *qi; 844 XSIMPLEQ_FOREACH(qi, &ph->ph_items, pi_list) { 845 if (pi == qi) { 846 panic("%s: %s: double pool_put: %p", __func__, 847 pp->pr_wchan, pi); 848 } 849 } 850 } 851 #endif /* DIAGNOSTIC */ 852 853 pi->pi_magic = POOL_IMAGIC(ph, pi); 854 XSIMPLEQ_INSERT_HEAD(&ph->ph_items, pi, pi_list); 855 #ifdef DIAGNOSTIC 856 if (POOL_PHPOISON(ph)) 857 poison_mem(pi + 1, pp->pr_size - sizeof(*pi)); 858 #endif /* DIAGNOSTIC */ 859 860 if (ph->ph_nmissing-- == pp->pr_itemsperpage) { 861 /* 862 * The page was previously completely full, move it to the 863 * partially-full list. 864 */ 865 TAILQ_REMOVE(&pp->pr_fullpages, ph, ph_entry); 866 TAILQ_INSERT_TAIL(&pp->pr_partpages, ph, ph_entry); 867 } 868 869 if (ph->ph_nmissing == 0) { 870 /* 871 * The page is now empty, so move it to the empty page list. 872 */ 873 pp->pr_nidle++; 874 875 ph->ph_tick = ticks; 876 TAILQ_REMOVE(&pp->pr_partpages, ph, ph_entry); 877 TAILQ_INSERT_TAIL(&pp->pr_emptypages, ph, ph_entry); 878 pool_update_curpage(pp); 879 } 880 } 881 882 /* 883 * Add N items to the pool. 884 */ 885 int 886 pool_prime(struct pool *pp, int n) 887 { 888 struct pool_pagelist pl = TAILQ_HEAD_INITIALIZER(pl); 889 struct pool_page_header *ph; 890 int newpages; 891 892 newpages = roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 893 894 while (newpages-- > 0) { 895 int slowdown = 0; 896 897 ph = pool_p_alloc(pp, PR_NOWAIT, &slowdown); 898 if (ph == NULL) /* or slowdown? */ 899 break; 900 901 TAILQ_INSERT_TAIL(&pl, ph, ph_entry); 902 } 903 904 pl_enter(pp, &pp->pr_lock); 905 while ((ph = TAILQ_FIRST(&pl)) != NULL) { 906 TAILQ_REMOVE(&pl, ph, ph_entry); 907 pool_p_insert(pp, ph); 908 } 909 pl_leave(pp, &pp->pr_lock); 910 911 return (0); 912 } 913 914 struct pool_page_header * 915 pool_p_alloc(struct pool *pp, int flags, int *slowdown) 916 { 917 struct pool_page_header *ph; 918 struct pool_item *pi; 919 caddr_t addr; 920 int n; 921 922 pl_assert_unlocked(pp, &pp->pr_lock); 923 KASSERT(pp->pr_size >= sizeof(*pi)); 924 925 addr = pool_allocator_alloc(pp, flags, slowdown); 926 if (addr == NULL) 927 return (NULL); 928 929 if (POOL_INPGHDR(pp)) 930 ph = (struct pool_page_header *)(addr + pp->pr_phoffset); 931 else { 932 ph = pool_get(&phpool, flags); 933 if (ph == NULL) { 934 pool_allocator_free(pp, addr); 935 return (NULL); 936 } 937 } 938 939 XSIMPLEQ_INIT(&ph->ph_items); 940 ph->ph_page = addr; 941 addr += pp->pr_align * (pp->pr_npagealloc % pp->pr_maxcolors); 942 ph->ph_colored = addr; 943 ph->ph_nmissing = 0; 944 arc4random_buf(&ph->ph_magic, sizeof(ph->ph_magic)); 945 #ifdef DIAGNOSTIC 946 /* use a bit in ph_magic to record if we poison page items */ 947 if (pool_debug) 948 SET(ph->ph_magic, POOL_MAGICBIT); 949 else 950 CLR(ph->ph_magic, POOL_MAGICBIT); 951 #endif /* DIAGNOSTIC */ 952 953 n = pp->pr_itemsperpage; 954 while (n--) { 955 pi = (struct pool_item *)addr; 956 pi->pi_magic = POOL_IMAGIC(ph, pi); 957 XSIMPLEQ_INSERT_TAIL(&ph->ph_items, pi, pi_list); 958 959 #ifdef DIAGNOSTIC 960 if (POOL_PHPOISON(ph)) 961 poison_mem(pi + 1, pp->pr_size - sizeof(*pi)); 962 #endif /* DIAGNOSTIC */ 963 964 addr += pp->pr_size; 965 } 966 967 return (ph); 968 } 969 970 void 971 pool_p_free(struct pool *pp, struct pool_page_header *ph) 972 { 973 struct pool_item *pi; 974 975 pl_assert_unlocked(pp, &pp->pr_lock); 976 KASSERT(ph->ph_nmissing == 0); 977 978 XSIMPLEQ_FOREACH(pi, &ph->ph_items, pi_list) { 979 if (__predict_false(pi->pi_magic != POOL_IMAGIC(ph, pi))) { 980 panic("%s: %s free list modified: " 981 "page %p; item addr %p; offset 0x%x=0x%lx", 982 __func__, pp->pr_wchan, ph->ph_page, pi, 983 0, pi->pi_magic); 984 } 985 986 #ifdef DIAGNOSTIC 987 if (POOL_PHPOISON(ph)) { 988 size_t pidx; 989 uint32_t pval; 990 if (poison_check(pi + 1, pp->pr_size - sizeof(*pi), 991 &pidx, &pval)) { 992 int *ip = (int *)(pi + 1); 993 panic("%s: %s free list modified: " 994 "page %p; item addr %p; offset 0x%zx=0x%x", 995 __func__, pp->pr_wchan, ph->ph_page, pi, 996 pidx * sizeof(int), ip[pidx]); 997 } 998 } 999 #endif 1000 } 1001 1002 pool_allocator_free(pp, ph->ph_page); 1003 1004 if (!POOL_INPGHDR(pp)) 1005 pool_put(&phpool, ph); 1006 } 1007 1008 void 1009 pool_p_insert(struct pool *pp, struct pool_page_header *ph) 1010 { 1011 pl_assert_locked(pp, &pp->pr_lock); 1012 1013 /* If the pool was depleted, point at the new page */ 1014 if (pp->pr_curpage == NULL) 1015 pp->pr_curpage = ph; 1016 1017 TAILQ_INSERT_TAIL(&pp->pr_emptypages, ph, ph_entry); 1018 if (!POOL_INPGHDR(pp)) 1019 RBT_INSERT(phtree, &pp->pr_phtree, ph); 1020 1021 pp->pr_nitems += pp->pr_itemsperpage; 1022 pp->pr_nidle++; 1023 1024 pp->pr_npagealloc++; 1025 if (++pp->pr_npages > pp->pr_hiwat) 1026 pp->pr_hiwat = pp->pr_npages; 1027 } 1028 1029 void 1030 pool_p_remove(struct pool *pp, struct pool_page_header *ph) 1031 { 1032 pl_assert_locked(pp, &pp->pr_lock); 1033 1034 pp->pr_npagefree++; 1035 pp->pr_npages--; 1036 pp->pr_nidle--; 1037 pp->pr_nitems -= pp->pr_itemsperpage; 1038 1039 if (!POOL_INPGHDR(pp)) 1040 RBT_REMOVE(phtree, &pp->pr_phtree, ph); 1041 TAILQ_REMOVE(&pp->pr_emptypages, ph, ph_entry); 1042 1043 pool_update_curpage(pp); 1044 } 1045 1046 void 1047 pool_update_curpage(struct pool *pp) 1048 { 1049 pp->pr_curpage = TAILQ_LAST(&pp->pr_partpages, pool_pagelist); 1050 if (pp->pr_curpage == NULL) { 1051 pp->pr_curpage = TAILQ_LAST(&pp->pr_emptypages, pool_pagelist); 1052 } 1053 } 1054 1055 void 1056 pool_setlowat(struct pool *pp, int n) 1057 { 1058 int prime = 0; 1059 1060 pl_enter(pp, &pp->pr_lock); 1061 pp->pr_minitems = n; 1062 pp->pr_minpages = (n == 0) 1063 ? 0 1064 : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 1065 1066 if (pp->pr_nitems < n) 1067 prime = n - pp->pr_nitems; 1068 pl_leave(pp, &pp->pr_lock); 1069 1070 if (prime > 0) 1071 pool_prime(pp, prime); 1072 } 1073 1074 void 1075 pool_sethiwat(struct pool *pp, int n) 1076 { 1077 pp->pr_maxpages = (n == 0) 1078 ? 0 1079 : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 1080 } 1081 1082 int 1083 pool_sethardlimit(struct pool *pp, u_int n, const char *warnmsg, int ratecap) 1084 { 1085 int error = 0; 1086 1087 if (n < pp->pr_nout) { 1088 error = EINVAL; 1089 goto done; 1090 } 1091 1092 pp->pr_hardlimit = n; 1093 pp->pr_hardlimit_warning = warnmsg; 1094 pp->pr_hardlimit_ratecap.tv_sec = ratecap; 1095 pp->pr_hardlimit_warning_last.tv_sec = 0; 1096 pp->pr_hardlimit_warning_last.tv_usec = 0; 1097 1098 done: 1099 return (error); 1100 } 1101 1102 void 1103 pool_set_constraints(struct pool *pp, const struct kmem_pa_mode *mode) 1104 { 1105 pp->pr_crange = mode; 1106 } 1107 1108 /* 1109 * Release all complete pages that have not been used recently. 1110 * 1111 * Returns non-zero if any pages have been reclaimed. 1112 */ 1113 int 1114 pool_reclaim(struct pool *pp) 1115 { 1116 struct pool_page_header *ph, *phnext; 1117 struct pool_pagelist pl = TAILQ_HEAD_INITIALIZER(pl); 1118 1119 pl_enter(pp, &pp->pr_lock); 1120 for (ph = TAILQ_FIRST(&pp->pr_emptypages); ph != NULL; ph = phnext) { 1121 phnext = TAILQ_NEXT(ph, ph_entry); 1122 1123 /* Check our minimum page claim */ 1124 if (pp->pr_npages <= pp->pr_minpages) 1125 break; 1126 1127 /* 1128 * If freeing this page would put us below 1129 * the low water mark, stop now. 1130 */ 1131 if ((pp->pr_nitems - pp->pr_itemsperpage) < 1132 pp->pr_minitems) 1133 break; 1134 1135 pool_p_remove(pp, ph); 1136 TAILQ_INSERT_TAIL(&pl, ph, ph_entry); 1137 } 1138 pl_leave(pp, &pp->pr_lock); 1139 1140 if (TAILQ_EMPTY(&pl)) 1141 return (0); 1142 1143 while ((ph = TAILQ_FIRST(&pl)) != NULL) { 1144 TAILQ_REMOVE(&pl, ph, ph_entry); 1145 pool_p_free(pp, ph); 1146 } 1147 1148 return (1); 1149 } 1150 1151 /* 1152 * Release all complete pages that have not been used recently 1153 * from all pools. 1154 */ 1155 void 1156 pool_reclaim_all(void) 1157 { 1158 struct pool *pp; 1159 1160 rw_enter_read(&pool_lock); 1161 SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist) 1162 pool_reclaim(pp); 1163 rw_exit_read(&pool_lock); 1164 } 1165 1166 #ifdef DDB 1167 #include <machine/db_machdep.h> 1168 #include <ddb/db_output.h> 1169 1170 /* 1171 * Diagnostic helpers. 1172 */ 1173 void 1174 pool_printit(struct pool *pp, const char *modif, 1175 int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2)))) 1176 { 1177 pool_print1(pp, modif, pr); 1178 } 1179 1180 void 1181 pool_print_pagelist(struct pool_pagelist *pl, 1182 int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2)))) 1183 { 1184 struct pool_page_header *ph; 1185 struct pool_item *pi; 1186 1187 TAILQ_FOREACH(ph, pl, ph_entry) { 1188 (*pr)("\t\tpage %p, color %p, nmissing %d\n", 1189 ph->ph_page, ph->ph_colored, ph->ph_nmissing); 1190 XSIMPLEQ_FOREACH(pi, &ph->ph_items, pi_list) { 1191 if (pi->pi_magic != POOL_IMAGIC(ph, pi)) { 1192 (*pr)("\t\t\titem %p, magic 0x%lx\n", 1193 pi, pi->pi_magic); 1194 } 1195 } 1196 } 1197 } 1198 1199 void 1200 pool_print1(struct pool *pp, const char *modif, 1201 int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2)))) 1202 { 1203 struct pool_page_header *ph; 1204 int print_pagelist = 0; 1205 char c; 1206 1207 while ((c = *modif++) != '\0') { 1208 if (c == 'p') 1209 print_pagelist = 1; 1210 modif++; 1211 } 1212 1213 (*pr)("POOL %s: size %u maxcolors %u\n", pp->pr_wchan, pp->pr_size, 1214 pp->pr_maxcolors); 1215 (*pr)("\talloc %p\n", pp->pr_alloc); 1216 (*pr)("\tminitems %u, minpages %u, maxpages %u, npages %u\n", 1217 pp->pr_minitems, pp->pr_minpages, pp->pr_maxpages, pp->pr_npages); 1218 (*pr)("\titemsperpage %u, nitems %u, nout %u, hardlimit %u\n", 1219 pp->pr_itemsperpage, pp->pr_nitems, pp->pr_nout, pp->pr_hardlimit); 1220 1221 (*pr)("\n\tnget %lu, nfail %lu, nput %lu\n", 1222 pp->pr_nget, pp->pr_nfail, pp->pr_nput); 1223 (*pr)("\tnpagealloc %lu, npagefree %lu, hiwat %u, nidle %lu\n", 1224 pp->pr_npagealloc, pp->pr_npagefree, pp->pr_hiwat, pp->pr_nidle); 1225 1226 if (print_pagelist == 0) 1227 return; 1228 1229 if ((ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL) 1230 (*pr)("\n\tempty page list:\n"); 1231 pool_print_pagelist(&pp->pr_emptypages, pr); 1232 if ((ph = TAILQ_FIRST(&pp->pr_fullpages)) != NULL) 1233 (*pr)("\n\tfull page list:\n"); 1234 pool_print_pagelist(&pp->pr_fullpages, pr); 1235 if ((ph = TAILQ_FIRST(&pp->pr_partpages)) != NULL) 1236 (*pr)("\n\tpartial-page list:\n"); 1237 pool_print_pagelist(&pp->pr_partpages, pr); 1238 1239 if (pp->pr_curpage == NULL) 1240 (*pr)("\tno current page\n"); 1241 else 1242 (*pr)("\tcurpage %p\n", pp->pr_curpage->ph_page); 1243 } 1244 1245 void 1246 db_show_all_pools(db_expr_t expr, int haddr, db_expr_t count, char *modif) 1247 { 1248 struct pool *pp; 1249 char maxp[16]; 1250 int ovflw; 1251 char mode; 1252 1253 mode = modif[0]; 1254 if (mode != '\0' && mode != 'a') { 1255 db_printf("usage: show all pools [/a]\n"); 1256 return; 1257 } 1258 1259 if (mode == '\0') 1260 db_printf("%-10s%4s%9s%5s%9s%6s%6s%6s%6s%6s%6s%5s\n", 1261 "Name", 1262 "Size", 1263 "Requests", 1264 "Fail", 1265 "Releases", 1266 "Pgreq", 1267 "Pgrel", 1268 "Npage", 1269 "Hiwat", 1270 "Minpg", 1271 "Maxpg", 1272 "Idle"); 1273 else 1274 db_printf("%-12s %18s %18s\n", 1275 "Name", "Address", "Allocator"); 1276 1277 SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist) { 1278 if (mode == 'a') { 1279 db_printf("%-12s %18p %18p\n", pp->pr_wchan, pp, 1280 pp->pr_alloc); 1281 continue; 1282 } 1283 1284 if (!pp->pr_nget) 1285 continue; 1286 1287 if (pp->pr_maxpages == UINT_MAX) 1288 snprintf(maxp, sizeof maxp, "inf"); 1289 else 1290 snprintf(maxp, sizeof maxp, "%u", pp->pr_maxpages); 1291 1292 #define PRWORD(ovflw, fmt, width, fixed, val) do { \ 1293 (ovflw) += db_printf((fmt), \ 1294 (width) - (fixed) - (ovflw) > 0 ? \ 1295 (width) - (fixed) - (ovflw) : 0, \ 1296 (val)) - (width); \ 1297 if ((ovflw) < 0) \ 1298 (ovflw) = 0; \ 1299 } while (/* CONSTCOND */0) 1300 1301 ovflw = 0; 1302 PRWORD(ovflw, "%-*s", 10, 0, pp->pr_wchan); 1303 PRWORD(ovflw, " %*u", 4, 1, pp->pr_size); 1304 PRWORD(ovflw, " %*lu", 9, 1, pp->pr_nget); 1305 PRWORD(ovflw, " %*lu", 5, 1, pp->pr_nfail); 1306 PRWORD(ovflw, " %*lu", 9, 1, pp->pr_nput); 1307 PRWORD(ovflw, " %*lu", 6, 1, pp->pr_npagealloc); 1308 PRWORD(ovflw, " %*lu", 6, 1, pp->pr_npagefree); 1309 PRWORD(ovflw, " %*d", 6, 1, pp->pr_npages); 1310 PRWORD(ovflw, " %*d", 6, 1, pp->pr_hiwat); 1311 PRWORD(ovflw, " %*d", 6, 1, pp->pr_minpages); 1312 PRWORD(ovflw, " %*s", 6, 1, maxp); 1313 PRWORD(ovflw, " %*lu\n", 5, 1, pp->pr_nidle); 1314 1315 pool_chk(pp); 1316 } 1317 } 1318 #endif /* DDB */ 1319 1320 #if defined(POOL_DEBUG) || defined(DDB) 1321 int 1322 pool_chk_page(struct pool *pp, struct pool_page_header *ph, int expected) 1323 { 1324 struct pool_item *pi; 1325 caddr_t page; 1326 int n; 1327 const char *label = pp->pr_wchan; 1328 1329 page = (caddr_t)((u_long)ph & pp->pr_pgmask); 1330 if (page != ph->ph_page && POOL_INPGHDR(pp)) { 1331 printf("%s: ", label); 1332 printf("pool(%p:%s): page inconsistency: page %p; " 1333 "at page head addr %p (p %p)\n", 1334 pp, pp->pr_wchan, ph->ph_page, ph, page); 1335 return 1; 1336 } 1337 1338 for (pi = XSIMPLEQ_FIRST(&ph->ph_items), n = 0; 1339 pi != NULL; 1340 pi = XSIMPLEQ_NEXT(&ph->ph_items, pi, pi_list), n++) { 1341 if ((caddr_t)pi < ph->ph_page || 1342 (caddr_t)pi >= ph->ph_page + pp->pr_pgsize) { 1343 printf("%s: ", label); 1344 printf("pool(%p:%s): page inconsistency: page %p;" 1345 " item ordinal %d; addr %p\n", pp, 1346 pp->pr_wchan, ph->ph_page, n, pi); 1347 return (1); 1348 } 1349 1350 if (pi->pi_magic != POOL_IMAGIC(ph, pi)) { 1351 printf("%s: ", label); 1352 printf("pool(%p:%s): free list modified: " 1353 "page %p; item ordinal %d; addr %p " 1354 "(p %p); offset 0x%x=0x%lx\n", 1355 pp, pp->pr_wchan, ph->ph_page, n, pi, page, 1356 0, pi->pi_magic); 1357 } 1358 1359 #ifdef DIAGNOSTIC 1360 if (POOL_PHPOISON(ph)) { 1361 size_t pidx; 1362 uint32_t pval; 1363 if (poison_check(pi + 1, pp->pr_size - sizeof(*pi), 1364 &pidx, &pval)) { 1365 int *ip = (int *)(pi + 1); 1366 printf("pool(%s): free list modified: " 1367 "page %p; item ordinal %d; addr %p " 1368 "(p %p); offset 0x%zx=0x%x\n", 1369 pp->pr_wchan, ph->ph_page, n, pi, 1370 page, pidx * sizeof(int), ip[pidx]); 1371 } 1372 } 1373 #endif /* DIAGNOSTIC */ 1374 } 1375 if (n + ph->ph_nmissing != pp->pr_itemsperpage) { 1376 printf("pool(%p:%s): page inconsistency: page %p;" 1377 " %d on list, %d missing, %d items per page\n", pp, 1378 pp->pr_wchan, ph->ph_page, n, ph->ph_nmissing, 1379 pp->pr_itemsperpage); 1380 return 1; 1381 } 1382 if (expected >= 0 && n != expected) { 1383 printf("pool(%p:%s): page inconsistency: page %p;" 1384 " %d on list, %d missing, %d expected\n", pp, 1385 pp->pr_wchan, ph->ph_page, n, ph->ph_nmissing, 1386 expected); 1387 return 1; 1388 } 1389 return 0; 1390 } 1391 1392 int 1393 pool_chk(struct pool *pp) 1394 { 1395 struct pool_page_header *ph; 1396 int r = 0; 1397 1398 TAILQ_FOREACH(ph, &pp->pr_emptypages, ph_entry) 1399 r += pool_chk_page(pp, ph, pp->pr_itemsperpage); 1400 TAILQ_FOREACH(ph, &pp->pr_fullpages, ph_entry) 1401 r += pool_chk_page(pp, ph, 0); 1402 TAILQ_FOREACH(ph, &pp->pr_partpages, ph_entry) 1403 r += pool_chk_page(pp, ph, -1); 1404 1405 return (r); 1406 } 1407 #endif /* defined(POOL_DEBUG) || defined(DDB) */ 1408 1409 #ifdef DDB 1410 void 1411 pool_walk(struct pool *pp, int full, 1412 int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2))), 1413 void (*func)(void *, int, int (*)(const char *, ...) 1414 __attribute__((__format__(__kprintf__,1,2))))) 1415 { 1416 struct pool_page_header *ph; 1417 struct pool_item *pi; 1418 caddr_t cp; 1419 int n; 1420 1421 TAILQ_FOREACH(ph, &pp->pr_fullpages, ph_entry) { 1422 cp = ph->ph_colored; 1423 n = ph->ph_nmissing; 1424 1425 while (n--) { 1426 func(cp, full, pr); 1427 cp += pp->pr_size; 1428 } 1429 } 1430 1431 TAILQ_FOREACH(ph, &pp->pr_partpages, ph_entry) { 1432 cp = ph->ph_colored; 1433 n = ph->ph_nmissing; 1434 1435 do { 1436 XSIMPLEQ_FOREACH(pi, &ph->ph_items, pi_list) { 1437 if (cp == (caddr_t)pi) 1438 break; 1439 } 1440 if (cp != (caddr_t)pi) { 1441 func(cp, full, pr); 1442 n--; 1443 } 1444 1445 cp += pp->pr_size; 1446 } while (n > 0); 1447 } 1448 } 1449 #endif 1450 1451 /* 1452 * We have three different sysctls. 1453 * kern.pool.npools - the number of pools. 1454 * kern.pool.pool.<pool#> - the pool struct for the pool#. 1455 * kern.pool.name.<pool#> - the name for pool#. 1456 */ 1457 int 1458 sysctl_dopool(int *name, u_int namelen, char *oldp, size_t *oldlenp) 1459 { 1460 struct kinfo_pool pi; 1461 struct pool *pp; 1462 int rv = ENOENT; 1463 1464 switch (name[0]) { 1465 case KERN_POOL_NPOOLS: 1466 if (namelen != 1) 1467 return (ENOTDIR); 1468 return (sysctl_rdint(oldp, oldlenp, NULL, pool_count)); 1469 1470 case KERN_POOL_NAME: 1471 case KERN_POOL_POOL: 1472 case KERN_POOL_CACHE: 1473 case KERN_POOL_CACHE_CPUS: 1474 break; 1475 default: 1476 return (EOPNOTSUPP); 1477 } 1478 1479 if (namelen != 2) 1480 return (ENOTDIR); 1481 1482 rw_enter_read(&pool_lock); 1483 1484 SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist) { 1485 if (name[1] == pp->pr_serial) 1486 break; 1487 } 1488 1489 if (pp == NULL) 1490 goto done; 1491 1492 switch (name[0]) { 1493 case KERN_POOL_NAME: 1494 rv = sysctl_rdstring(oldp, oldlenp, NULL, pp->pr_wchan); 1495 break; 1496 case KERN_POOL_POOL: 1497 memset(&pi, 0, sizeof(pi)); 1498 1499 pl_enter(pp, &pp->pr_lock); 1500 pi.pr_size = pp->pr_size; 1501 pi.pr_pgsize = pp->pr_pgsize; 1502 pi.pr_itemsperpage = pp->pr_itemsperpage; 1503 pi.pr_npages = pp->pr_npages; 1504 pi.pr_minpages = pp->pr_minpages; 1505 pi.pr_maxpages = pp->pr_maxpages; 1506 pi.pr_hardlimit = pp->pr_hardlimit; 1507 pi.pr_nout = pp->pr_nout; 1508 pi.pr_nitems = pp->pr_nitems; 1509 pi.pr_nget = pp->pr_nget; 1510 pi.pr_nput = pp->pr_nput; 1511 pi.pr_nfail = pp->pr_nfail; 1512 pi.pr_npagealloc = pp->pr_npagealloc; 1513 pi.pr_npagefree = pp->pr_npagefree; 1514 pi.pr_hiwat = pp->pr_hiwat; 1515 pi.pr_nidle = pp->pr_nidle; 1516 pl_leave(pp, &pp->pr_lock); 1517 1518 pool_cache_pool_info(pp, &pi); 1519 1520 rv = sysctl_rdstruct(oldp, oldlenp, NULL, &pi, sizeof(pi)); 1521 break; 1522 1523 case KERN_POOL_CACHE: 1524 rv = pool_cache_info(pp, oldp, oldlenp); 1525 break; 1526 1527 case KERN_POOL_CACHE_CPUS: 1528 rv = pool_cache_cpus_info(pp, oldp, oldlenp); 1529 break; 1530 } 1531 1532 done: 1533 rw_exit_read(&pool_lock); 1534 1535 return (rv); 1536 } 1537 1538 void 1539 pool_gc_sched(void *null) 1540 { 1541 task_add(systqmp, &pool_gc_task); 1542 } 1543 1544 void 1545 pool_gc_pages(void *null) 1546 { 1547 struct pool *pp; 1548 struct pool_page_header *ph, *freeph; 1549 int s; 1550 1551 rw_enter_read(&pool_lock); 1552 s = splvm(); /* XXX go to splvm until all pools _setipl properly */ 1553 SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist) { 1554 #ifdef MULTIPROCESSOR 1555 if (pp->pr_cache != NULL) 1556 pool_cache_gc(pp); 1557 #endif 1558 1559 if (pp->pr_nidle <= pp->pr_minpages || /* guess */ 1560 !pl_enter_try(pp, &pp->pr_lock)) /* try */ 1561 continue; 1562 1563 /* is it time to free a page? */ 1564 if (pp->pr_nidle > pp->pr_minpages && 1565 (ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL && 1566 (ticks - ph->ph_tick) > (hz * pool_wait_gc)) { 1567 freeph = ph; 1568 pool_p_remove(pp, freeph); 1569 } else 1570 freeph = NULL; 1571 1572 pl_leave(pp, &pp->pr_lock); 1573 1574 if (freeph != NULL) 1575 pool_p_free(pp, freeph); 1576 } 1577 splx(s); 1578 rw_exit_read(&pool_lock); 1579 1580 timeout_add_sec(&pool_gc_tick, 1); 1581 } 1582 1583 /* 1584 * Pool backend allocators. 1585 */ 1586 1587 void * 1588 pool_allocator_alloc(struct pool *pp, int flags, int *slowdown) 1589 { 1590 void *v; 1591 1592 v = (*pp->pr_alloc->pa_alloc)(pp, flags, slowdown); 1593 1594 #ifdef DIAGNOSTIC 1595 if (v != NULL && POOL_INPGHDR(pp)) { 1596 vaddr_t addr = (vaddr_t)v; 1597 if ((addr & pp->pr_pgmask) != addr) { 1598 panic("%s: %s page address %p isnt aligned to %u", 1599 __func__, pp->pr_wchan, v, pp->pr_pgsize); 1600 } 1601 } 1602 #endif 1603 1604 return (v); 1605 } 1606 1607 void 1608 pool_allocator_free(struct pool *pp, void *v) 1609 { 1610 struct pool_allocator *pa = pp->pr_alloc; 1611 1612 (*pa->pa_free)(pp, v); 1613 } 1614 1615 void * 1616 pool_page_alloc(struct pool *pp, int flags, int *slowdown) 1617 { 1618 struct kmem_dyn_mode kd = KMEM_DYN_INITIALIZER; 1619 1620 kd.kd_waitok = ISSET(flags, PR_WAITOK); 1621 kd.kd_slowdown = slowdown; 1622 1623 return (km_alloc(pp->pr_pgsize, &kv_page, pp->pr_crange, &kd)); 1624 } 1625 1626 void 1627 pool_page_free(struct pool *pp, void *v) 1628 { 1629 km_free(v, pp->pr_pgsize, &kv_page, pp->pr_crange); 1630 } 1631 1632 void * 1633 pool_multi_alloc(struct pool *pp, int flags, int *slowdown) 1634 { 1635 struct kmem_va_mode kv = kv_intrsafe; 1636 struct kmem_dyn_mode kd = KMEM_DYN_INITIALIZER; 1637 void *v; 1638 int s; 1639 1640 if (POOL_INPGHDR(pp)) 1641 kv.kv_align = pp->pr_pgsize; 1642 1643 kd.kd_waitok = ISSET(flags, PR_WAITOK); 1644 kd.kd_slowdown = slowdown; 1645 1646 s = splvm(); 1647 v = km_alloc(pp->pr_pgsize, &kv, pp->pr_crange, &kd); 1648 splx(s); 1649 1650 return (v); 1651 } 1652 1653 void 1654 pool_multi_free(struct pool *pp, void *v) 1655 { 1656 struct kmem_va_mode kv = kv_intrsafe; 1657 int s; 1658 1659 if (POOL_INPGHDR(pp)) 1660 kv.kv_align = pp->pr_pgsize; 1661 1662 s = splvm(); 1663 km_free(v, pp->pr_pgsize, &kv, pp->pr_crange); 1664 splx(s); 1665 } 1666 1667 void * 1668 pool_multi_alloc_ni(struct pool *pp, int flags, int *slowdown) 1669 { 1670 struct kmem_va_mode kv = kv_any; 1671 struct kmem_dyn_mode kd = KMEM_DYN_INITIALIZER; 1672 void *v; 1673 1674 if (POOL_INPGHDR(pp)) 1675 kv.kv_align = pp->pr_pgsize; 1676 1677 kd.kd_waitok = ISSET(flags, PR_WAITOK); 1678 kd.kd_slowdown = slowdown; 1679 1680 KERNEL_LOCK(); 1681 v = km_alloc(pp->pr_pgsize, &kv, pp->pr_crange, &kd); 1682 KERNEL_UNLOCK(); 1683 1684 return (v); 1685 } 1686 1687 void 1688 pool_multi_free_ni(struct pool *pp, void *v) 1689 { 1690 struct kmem_va_mode kv = kv_any; 1691 1692 if (POOL_INPGHDR(pp)) 1693 kv.kv_align = pp->pr_pgsize; 1694 1695 KERNEL_LOCK(); 1696 km_free(v, pp->pr_pgsize, &kv, pp->pr_crange); 1697 KERNEL_UNLOCK(); 1698 } 1699 1700 #ifdef MULTIPROCESSOR 1701 1702 struct pool pool_caches; /* per cpu cache entries */ 1703 1704 void 1705 pool_cache_init(struct pool *pp) 1706 { 1707 struct cpumem *cm; 1708 struct pool_cache *pc; 1709 struct cpumem_iter i; 1710 1711 if (pool_caches.pr_size == 0) { 1712 pool_init(&pool_caches, sizeof(struct pool_cache), 1713 CACHELINESIZE, IPL_NONE, PR_WAITOK | PR_RWLOCK, 1714 "plcache", NULL); 1715 } 1716 1717 /* must be able to use the pool items as cache list items */ 1718 KASSERT(pp->pr_size >= sizeof(struct pool_cache_item)); 1719 1720 cm = cpumem_get(&pool_caches); 1721 1722 pl_init(pp, &pp->pr_cache_lock); 1723 arc4random_buf(pp->pr_cache_magic, sizeof(pp->pr_cache_magic)); 1724 TAILQ_INIT(&pp->pr_cache_lists); 1725 pp->pr_cache_nitems = 0; 1726 pp->pr_cache_tick = ticks; 1727 pp->pr_cache_items = 8; 1728 pp->pr_cache_contention = 0; 1729 pp->pr_cache_ngc = 0; 1730 1731 CPUMEM_FOREACH(pc, &i, cm) { 1732 pc->pc_actv = NULL; 1733 pc->pc_nactv = 0; 1734 pc->pc_prev = NULL; 1735 1736 pc->pc_nget = 0; 1737 pc->pc_nfail = 0; 1738 pc->pc_nput = 0; 1739 pc->pc_nlget = 0; 1740 pc->pc_nlfail = 0; 1741 pc->pc_nlput = 0; 1742 pc->pc_nout = 0; 1743 } 1744 1745 membar_producer(); 1746 1747 pp->pr_cache = cm; 1748 } 1749 1750 static inline void 1751 pool_cache_item_magic(struct pool *pp, struct pool_cache_item *ci) 1752 { 1753 unsigned long *entry = (unsigned long *)&ci->ci_nextl; 1754 1755 entry[0] = pp->pr_cache_magic[0] ^ (u_long)ci; 1756 entry[1] = pp->pr_cache_magic[1] ^ (u_long)ci->ci_next; 1757 } 1758 1759 static inline void 1760 pool_cache_item_magic_check(struct pool *pp, struct pool_cache_item *ci) 1761 { 1762 unsigned long *entry; 1763 unsigned long val; 1764 1765 entry = (unsigned long *)&ci->ci_nextl; 1766 val = pp->pr_cache_magic[0] ^ (u_long)ci; 1767 if (*entry != val) 1768 goto fail; 1769 1770 entry++; 1771 val = pp->pr_cache_magic[1] ^ (u_long)ci->ci_next; 1772 if (*entry != val) 1773 goto fail; 1774 1775 return; 1776 1777 fail: 1778 panic("%s: %s cpu free list modified: item addr %p+%zu 0x%lx!=0x%lx", 1779 __func__, pp->pr_wchan, ci, (caddr_t)entry - (caddr_t)ci, 1780 *entry, val); 1781 } 1782 1783 static inline void 1784 pool_list_enter(struct pool *pp) 1785 { 1786 if (pl_enter_try(pp, &pp->pr_cache_lock) == 0) { 1787 pl_enter(pp, &pp->pr_cache_lock); 1788 pp->pr_cache_contention++; 1789 } 1790 } 1791 1792 static inline void 1793 pool_list_leave(struct pool *pp) 1794 { 1795 pl_leave(pp, &pp->pr_cache_lock); 1796 } 1797 1798 static inline struct pool_cache_item * 1799 pool_cache_list_alloc(struct pool *pp, struct pool_cache *pc) 1800 { 1801 struct pool_cache_item *pl; 1802 1803 pool_list_enter(pp); 1804 pl = TAILQ_FIRST(&pp->pr_cache_lists); 1805 if (pl != NULL) { 1806 TAILQ_REMOVE(&pp->pr_cache_lists, pl, ci_nextl); 1807 pp->pr_cache_nitems -= POOL_CACHE_ITEM_NITEMS(pl); 1808 1809 pool_cache_item_magic(pp, pl); 1810 1811 pc->pc_nlget++; 1812 } else 1813 pc->pc_nlfail++; 1814 1815 /* fold this cpus nout into the global while we have the lock */ 1816 pp->pr_cache_nout += pc->pc_nout; 1817 pc->pc_nout = 0; 1818 pool_list_leave(pp); 1819 1820 return (pl); 1821 } 1822 1823 static inline void 1824 pool_cache_list_free(struct pool *pp, struct pool_cache *pc, 1825 struct pool_cache_item *ci) 1826 { 1827 pool_list_enter(pp); 1828 if (TAILQ_EMPTY(&pp->pr_cache_lists)) 1829 pp->pr_cache_tick = ticks; 1830 1831 pp->pr_cache_nitems += POOL_CACHE_ITEM_NITEMS(ci); 1832 TAILQ_INSERT_TAIL(&pp->pr_cache_lists, ci, ci_nextl); 1833 1834 pc->pc_nlput++; 1835 1836 /* fold this cpus nout into the global while we have the lock */ 1837 pp->pr_cache_nout += pc->pc_nout; 1838 pc->pc_nout = 0; 1839 pool_list_leave(pp); 1840 } 1841 1842 static inline struct pool_cache * 1843 pool_cache_enter(struct pool *pp, int *s) 1844 { 1845 struct pool_cache *pc; 1846 1847 pc = cpumem_enter(pp->pr_cache); 1848 *s = splraise(pp->pr_ipl); 1849 pc->pc_gen++; 1850 1851 return (pc); 1852 } 1853 1854 static inline void 1855 pool_cache_leave(struct pool *pp, struct pool_cache *pc, int s) 1856 { 1857 pc->pc_gen++; 1858 splx(s); 1859 cpumem_leave(pp->pr_cache, pc); 1860 } 1861 1862 void * 1863 pool_cache_get(struct pool *pp) 1864 { 1865 struct pool_cache *pc; 1866 struct pool_cache_item *ci; 1867 int s; 1868 1869 pc = pool_cache_enter(pp, &s); 1870 1871 if (pc->pc_actv != NULL) { 1872 ci = pc->pc_actv; 1873 } else if (pc->pc_prev != NULL) { 1874 ci = pc->pc_prev; 1875 pc->pc_prev = NULL; 1876 } else if ((ci = pool_cache_list_alloc(pp, pc)) == NULL) { 1877 pc->pc_nfail++; 1878 goto done; 1879 } 1880 1881 pool_cache_item_magic_check(pp, ci); 1882 #ifdef DIAGNOSTIC 1883 if (pool_debug && POOL_CACHE_ITEM_POISONED(ci)) { 1884 size_t pidx; 1885 uint32_t pval; 1886 1887 if (poison_check(ci + 1, pp->pr_size - sizeof(*ci), 1888 &pidx, &pval)) { 1889 int *ip = (int *)(ci + 1); 1890 ip += pidx; 1891 1892 panic("%s: %s cpu free list modified: " 1893 "item addr %p+%zu 0x%x!=0x%x", 1894 __func__, pp->pr_wchan, ci, 1895 (caddr_t)ip - (caddr_t)ci, *ip, pval); 1896 } 1897 } 1898 #endif 1899 1900 pc->pc_actv = ci->ci_next; 1901 pc->pc_nactv = POOL_CACHE_ITEM_NITEMS(ci) - 1; 1902 pc->pc_nget++; 1903 pc->pc_nout++; 1904 1905 done: 1906 pool_cache_leave(pp, pc, s); 1907 1908 return (ci); 1909 } 1910 1911 void 1912 pool_cache_put(struct pool *pp, void *v) 1913 { 1914 struct pool_cache *pc; 1915 struct pool_cache_item *ci = v; 1916 unsigned long nitems; 1917 int s; 1918 #ifdef DIAGNOSTIC 1919 int poison = pool_debug && pp->pr_size > sizeof(*ci); 1920 1921 if (poison) 1922 poison_mem(ci + 1, pp->pr_size - sizeof(*ci)); 1923 #endif 1924 1925 pc = pool_cache_enter(pp, &s); 1926 1927 nitems = pc->pc_nactv; 1928 if (nitems >= pp->pr_cache_items) { 1929 if (pc->pc_prev != NULL) 1930 pool_cache_list_free(pp, pc, pc->pc_prev); 1931 1932 pc->pc_prev = pc->pc_actv; 1933 1934 pc->pc_actv = NULL; 1935 pc->pc_nactv = 0; 1936 nitems = 0; 1937 } 1938 1939 ci->ci_next = pc->pc_actv; 1940 ci->ci_nitems = ++nitems; 1941 #ifdef DIAGNOSTIC 1942 ci->ci_nitems |= poison ? POOL_CACHE_ITEM_NITEMS_POISON : 0; 1943 #endif 1944 pool_cache_item_magic(pp, ci); 1945 1946 pc->pc_actv = ci; 1947 pc->pc_nactv = nitems; 1948 1949 pc->pc_nput++; 1950 pc->pc_nout--; 1951 1952 pool_cache_leave(pp, pc, s); 1953 } 1954 1955 struct pool_cache_item * 1956 pool_cache_list_put(struct pool *pp, struct pool_cache_item *pl) 1957 { 1958 struct pool_cache_item *rpl, *next; 1959 1960 if (pl == NULL) 1961 return (NULL); 1962 1963 rpl = TAILQ_NEXT(pl, ci_nextl); 1964 1965 pl_enter(pp, &pp->pr_lock); 1966 do { 1967 next = pl->ci_next; 1968 pool_do_put(pp, pl); 1969 pl = next; 1970 } while (pl != NULL); 1971 pl_leave(pp, &pp->pr_lock); 1972 1973 return (rpl); 1974 } 1975 1976 void 1977 pool_cache_destroy(struct pool *pp) 1978 { 1979 struct pool_cache *pc; 1980 struct pool_cache_item *pl; 1981 struct cpumem_iter i; 1982 struct cpumem *cm; 1983 1984 rw_enter_write(&pool_lock); /* serialise with the gc */ 1985 cm = pp->pr_cache; 1986 pp->pr_cache = NULL; /* make pool_put avoid the cache */ 1987 rw_exit_write(&pool_lock); 1988 1989 CPUMEM_FOREACH(pc, &i, cm) { 1990 pool_cache_list_put(pp, pc->pc_actv); 1991 pool_cache_list_put(pp, pc->pc_prev); 1992 } 1993 1994 cpumem_put(&pool_caches, cm); 1995 1996 pl = TAILQ_FIRST(&pp->pr_cache_lists); 1997 while (pl != NULL) 1998 pl = pool_cache_list_put(pp, pl); 1999 } 2000 2001 void 2002 pool_cache_gc(struct pool *pp) 2003 { 2004 unsigned int contention, delta; 2005 2006 if ((ticks - pp->pr_cache_tick) > (hz * pool_wait_gc) && 2007 !TAILQ_EMPTY(&pp->pr_cache_lists) && 2008 pl_enter_try(pp, &pp->pr_cache_lock)) { 2009 struct pool_cache_item *pl = NULL; 2010 2011 pl = TAILQ_FIRST(&pp->pr_cache_lists); 2012 if (pl != NULL) { 2013 TAILQ_REMOVE(&pp->pr_cache_lists, pl, ci_nextl); 2014 pp->pr_cache_nitems -= POOL_CACHE_ITEM_NITEMS(pl); 2015 pp->pr_cache_tick = ticks; 2016 2017 pp->pr_cache_ngc++; 2018 } 2019 2020 pl_leave(pp, &pp->pr_cache_lock); 2021 2022 pool_cache_list_put(pp, pl); 2023 } 2024 2025 /* 2026 * if there's a lot of contention on the pr_cache_mtx then consider 2027 * growing the length of the list to reduce the need to access the 2028 * global pool. 2029 */ 2030 2031 contention = pp->pr_cache_contention; 2032 delta = contention - pp->pr_cache_contention_prev; 2033 if (delta > 8 /* magic */) { 2034 if ((ncpusfound * 8 * 2) <= pp->pr_cache_nitems) 2035 pp->pr_cache_items += 8; 2036 } else if (delta == 0) { 2037 if (pp->pr_cache_items > 8) 2038 pp->pr_cache_items--; 2039 } 2040 pp->pr_cache_contention_prev = contention; 2041 } 2042 2043 void 2044 pool_cache_pool_info(struct pool *pp, struct kinfo_pool *pi) 2045 { 2046 struct pool_cache *pc; 2047 struct cpumem_iter i; 2048 2049 if (pp->pr_cache == NULL) 2050 return; 2051 2052 /* loop through the caches twice to collect stats */ 2053 2054 /* once without the lock so we can yield while reading nget/nput */ 2055 CPUMEM_FOREACH(pc, &i, pp->pr_cache) { 2056 uint64_t gen, nget, nput; 2057 2058 do { 2059 while ((gen = pc->pc_gen) & 1) 2060 yield(); 2061 2062 nget = pc->pc_nget; 2063 nput = pc->pc_nput; 2064 } while (gen != pc->pc_gen); 2065 2066 pi->pr_nget += nget; 2067 pi->pr_nput += nput; 2068 } 2069 2070 /* and once with the mtx so we can get consistent nout values */ 2071 pl_enter(pp, &pp->pr_cache_lock); 2072 CPUMEM_FOREACH(pc, &i, pp->pr_cache) 2073 pi->pr_nout += pc->pc_nout; 2074 2075 pi->pr_nout += pp->pr_cache_nout; 2076 pl_leave(pp, &pp->pr_cache_lock); 2077 } 2078 2079 int 2080 pool_cache_info(struct pool *pp, void *oldp, size_t *oldlenp) 2081 { 2082 struct kinfo_pool_cache kpc; 2083 2084 if (pp->pr_cache == NULL) 2085 return (EOPNOTSUPP); 2086 2087 memset(&kpc, 0, sizeof(kpc)); /* don't leak padding */ 2088 2089 pl_enter(pp, &pp->pr_cache_lock); 2090 kpc.pr_ngc = pp->pr_cache_ngc; 2091 kpc.pr_len = pp->pr_cache_items; 2092 kpc.pr_nitems = pp->pr_cache_nitems; 2093 kpc.pr_contention = pp->pr_cache_contention; 2094 pl_leave(pp, &pp->pr_cache_lock); 2095 2096 return (sysctl_rdstruct(oldp, oldlenp, NULL, &kpc, sizeof(kpc))); 2097 } 2098 2099 int 2100 pool_cache_cpus_info(struct pool *pp, void *oldp, size_t *oldlenp) 2101 { 2102 struct pool_cache *pc; 2103 struct kinfo_pool_cache_cpu *kpcc, *info; 2104 unsigned int cpu = 0; 2105 struct cpumem_iter i; 2106 int error = 0; 2107 size_t len; 2108 2109 if (pp->pr_cache == NULL) 2110 return (EOPNOTSUPP); 2111 if (*oldlenp % sizeof(*kpcc)) 2112 return (EINVAL); 2113 2114 kpcc = mallocarray(ncpusfound, sizeof(*kpcc), M_TEMP, 2115 M_WAITOK|M_CANFAIL|M_ZERO); 2116 if (kpcc == NULL) 2117 return (EIO); 2118 2119 len = ncpusfound * sizeof(*kpcc); 2120 2121 CPUMEM_FOREACH(pc, &i, pp->pr_cache) { 2122 uint64_t gen; 2123 2124 if (cpu >= ncpusfound) { 2125 error = EIO; 2126 goto err; 2127 } 2128 2129 info = &kpcc[cpu]; 2130 info->pr_cpu = cpu; 2131 2132 do { 2133 while ((gen = pc->pc_gen) & 1) 2134 yield(); 2135 2136 info->pr_nget = pc->pc_nget; 2137 info->pr_nfail = pc->pc_nfail; 2138 info->pr_nput = pc->pc_nput; 2139 info->pr_nlget = pc->pc_nlget; 2140 info->pr_nlfail = pc->pc_nlfail; 2141 info->pr_nlput = pc->pc_nlput; 2142 } while (gen != pc->pc_gen); 2143 2144 cpu++; 2145 } 2146 2147 error = sysctl_rdstruct(oldp, oldlenp, NULL, kpcc, len); 2148 err: 2149 free(kpcc, M_TEMP, len); 2150 2151 return (error); 2152 } 2153 #else /* MULTIPROCESSOR */ 2154 void 2155 pool_cache_init(struct pool *pp) 2156 { 2157 /* nop */ 2158 } 2159 2160 void 2161 pool_cache_pool_info(struct pool *pp, struct kinfo_pool *pi) 2162 { 2163 /* nop */ 2164 } 2165 2166 int 2167 pool_cache_info(struct pool *pp, void *oldp, size_t *oldlenp) 2168 { 2169 return (EOPNOTSUPP); 2170 } 2171 2172 int 2173 pool_cache_cpus_info(struct pool *pp, void *oldp, size_t *oldlenp) 2174 { 2175 return (EOPNOTSUPP); 2176 } 2177 #endif /* MULTIPROCESSOR */ 2178 2179 2180 void 2181 pool_lock_mtx_init(struct pool *pp, union pool_lock *lock, 2182 struct lock_type *type) 2183 { 2184 _mtx_init_flags(&lock->prl_mtx, pp->pr_ipl, pp->pr_wchan, 0, type); 2185 } 2186 2187 void 2188 pool_lock_mtx_enter(union pool_lock *lock LOCK_FL_VARS) 2189 { 2190 _mtx_enter(&lock->prl_mtx LOCK_FL_ARGS); 2191 } 2192 2193 int 2194 pool_lock_mtx_enter_try(union pool_lock *lock LOCK_FL_VARS) 2195 { 2196 return (_mtx_enter_try(&lock->prl_mtx LOCK_FL_ARGS)); 2197 } 2198 2199 void 2200 pool_lock_mtx_leave(union pool_lock *lock LOCK_FL_VARS) 2201 { 2202 _mtx_leave(&lock->prl_mtx LOCK_FL_ARGS); 2203 } 2204 2205 void 2206 pool_lock_mtx_assert_locked(union pool_lock *lock) 2207 { 2208 MUTEX_ASSERT_LOCKED(&lock->prl_mtx); 2209 } 2210 2211 void 2212 pool_lock_mtx_assert_unlocked(union pool_lock *lock) 2213 { 2214 MUTEX_ASSERT_UNLOCKED(&lock->prl_mtx); 2215 } 2216 2217 int 2218 pool_lock_mtx_sleep(void *ident, union pool_lock *lock, int priority, 2219 const char *wmesg, int timo) 2220 { 2221 return msleep(ident, &lock->prl_mtx, priority, wmesg, timo); 2222 } 2223 2224 static const struct pool_lock_ops pool_lock_ops_mtx = { 2225 pool_lock_mtx_init, 2226 pool_lock_mtx_enter, 2227 pool_lock_mtx_enter_try, 2228 pool_lock_mtx_leave, 2229 pool_lock_mtx_assert_locked, 2230 pool_lock_mtx_assert_unlocked, 2231 pool_lock_mtx_sleep, 2232 }; 2233 2234 void 2235 pool_lock_rw_init(struct pool *pp, union pool_lock *lock, 2236 struct lock_type *type) 2237 { 2238 _rw_init_flags(&lock->prl_rwlock, pp->pr_wchan, 0, type); 2239 } 2240 2241 void 2242 pool_lock_rw_enter(union pool_lock *lock LOCK_FL_VARS) 2243 { 2244 _rw_enter_write(&lock->prl_rwlock LOCK_FL_ARGS); 2245 } 2246 2247 int 2248 pool_lock_rw_enter_try(union pool_lock *lock LOCK_FL_VARS) 2249 { 2250 return (_rw_enter(&lock->prl_rwlock, RW_WRITE | RW_NOSLEEP 2251 LOCK_FL_ARGS) == 0); 2252 } 2253 2254 void 2255 pool_lock_rw_leave(union pool_lock *lock LOCK_FL_VARS) 2256 { 2257 _rw_exit_write(&lock->prl_rwlock LOCK_FL_ARGS); 2258 } 2259 2260 void 2261 pool_lock_rw_assert_locked(union pool_lock *lock) 2262 { 2263 rw_assert_wrlock(&lock->prl_rwlock); 2264 } 2265 2266 void 2267 pool_lock_rw_assert_unlocked(union pool_lock *lock) 2268 { 2269 KASSERT(rw_status(&lock->prl_rwlock) != RW_WRITE); 2270 } 2271 2272 int 2273 pool_lock_rw_sleep(void *ident, union pool_lock *lock, int priority, 2274 const char *wmesg, int timo) 2275 { 2276 return rwsleep(ident, &lock->prl_rwlock, priority, wmesg, timo); 2277 } 2278 2279 static const struct pool_lock_ops pool_lock_ops_rw = { 2280 pool_lock_rw_init, 2281 pool_lock_rw_enter, 2282 pool_lock_rw_enter_try, 2283 pool_lock_rw_leave, 2284 pool_lock_rw_assert_locked, 2285 pool_lock_rw_assert_unlocked, 2286 pool_lock_rw_sleep, 2287 }; 2288