1 /* $OpenBSD: subr_pool.c,v 1.228 2019/07/19 09:03:03 bluhm Exp $ */ 2 /* $NetBSD: subr_pool.c,v 1.61 2001/09/26 07:14:56 chs Exp $ */ 3 4 /*- 5 * Copyright (c) 1997, 1999, 2000 The NetBSD Foundation, Inc. 6 * All rights reserved. 7 * 8 * This code is derived from software contributed to The NetBSD Foundation 9 * by Paul Kranenburg; by Jason R. Thorpe of the Numerical Aerospace 10 * Simulation Facility, NASA Ames Research Center. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 23 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 24 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 25 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31 * POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include <sys/param.h> 35 #include <sys/systm.h> 36 #include <sys/errno.h> 37 #include <sys/kernel.h> 38 #include <sys/malloc.h> 39 #include <sys/pool.h> 40 #include <sys/proc.h> 41 #include <sys/syslog.h> 42 #include <sys/sysctl.h> 43 #include <sys/task.h> 44 #include <sys/timeout.h> 45 #include <sys/percpu.h> 46 47 #include <uvm/uvm_extern.h> 48 49 /* 50 * Pool resource management utility. 51 * 52 * Memory is allocated in pages which are split into pieces according to 53 * the pool item size. Each page is kept on one of three lists in the 54 * pool structure: `pr_emptypages', `pr_fullpages' and `pr_partpages', 55 * for empty, full and partially-full pages respectively. The individual 56 * pool items are on a linked list headed by `ph_items' in each page 57 * header. The memory for building the page list is either taken from 58 * the allocated pages themselves (for small pool items) or taken from 59 * an internal pool of page headers (`phpool'). 60 */ 61 62 /* List of all pools */ 63 SIMPLEQ_HEAD(,pool) pool_head = SIMPLEQ_HEAD_INITIALIZER(pool_head); 64 65 /* 66 * Every pool gets a unique serial number assigned to it. If this counter 67 * wraps, we're screwed, but we shouldn't create so many pools anyway. 68 */ 69 unsigned int pool_serial; 70 unsigned int pool_count; 71 72 /* Lock the previous variables making up the global pool state */ 73 struct rwlock pool_lock = RWLOCK_INITIALIZER("pools"); 74 75 /* Private pool for page header structures */ 76 struct pool phpool; 77 78 struct pool_lock_ops { 79 void (*pl_init)(struct pool *, union pool_lock *, 80 const struct lock_type *); 81 void (*pl_enter)(union pool_lock *); 82 int (*pl_enter_try)(union pool_lock *); 83 void (*pl_leave)(union pool_lock *); 84 void (*pl_assert_locked)(union pool_lock *); 85 void (*pl_assert_unlocked)(union pool_lock *); 86 int (*pl_sleep)(void *, union pool_lock *, int, const char *, int); 87 }; 88 89 static const struct pool_lock_ops pool_lock_ops_mtx; 90 static const struct pool_lock_ops pool_lock_ops_rw; 91 92 #ifdef WITNESS 93 #define pl_init(pp, pl) do { \ 94 static const struct lock_type __lock_type = { .lt_name = #pl }; \ 95 (pp)->pr_lock_ops->pl_init(pp, pl, &__lock_type); \ 96 } while (0) 97 #else /* WITNESS */ 98 #define pl_init(pp, pl) (pp)->pr_lock_ops->pl_init(pp, pl, NULL) 99 #endif /* WITNESS */ 100 101 static inline void 102 pl_enter(struct pool *pp, union pool_lock *pl) 103 { 104 pp->pr_lock_ops->pl_enter(pl); 105 } 106 static inline int 107 pl_enter_try(struct pool *pp, union pool_lock *pl) 108 { 109 return pp->pr_lock_ops->pl_enter_try(pl); 110 } 111 static inline void 112 pl_leave(struct pool *pp, union pool_lock *pl) 113 { 114 pp->pr_lock_ops->pl_leave(pl); 115 } 116 static inline void 117 pl_assert_locked(struct pool *pp, union pool_lock *pl) 118 { 119 pp->pr_lock_ops->pl_assert_locked(pl); 120 } 121 static inline void 122 pl_assert_unlocked(struct pool *pp, union pool_lock *pl) 123 { 124 pp->pr_lock_ops->pl_assert_unlocked(pl); 125 } 126 static inline int 127 pl_sleep(struct pool *pp, void *ident, union pool_lock *lock, int priority, 128 const char *wmesg, int timo) 129 { 130 return pp->pr_lock_ops->pl_sleep(ident, lock, priority, wmesg, timo); 131 } 132 133 struct pool_item { 134 u_long pi_magic; 135 XSIMPLEQ_ENTRY(pool_item) pi_list; 136 }; 137 #define POOL_IMAGIC(ph, pi) ((u_long)(pi) ^ (ph)->ph_magic) 138 139 struct pool_page_header { 140 /* Page headers */ 141 TAILQ_ENTRY(pool_page_header) 142 ph_entry; /* pool page list */ 143 XSIMPLEQ_HEAD(, pool_item) 144 ph_items; /* free items on the page */ 145 RBT_ENTRY(pool_page_header) 146 ph_node; /* off-page page headers */ 147 unsigned int ph_nmissing; /* # of chunks in use */ 148 caddr_t ph_page; /* this page's address */ 149 caddr_t ph_colored; /* page's colored address */ 150 unsigned long ph_magic; 151 int ph_tick; 152 }; 153 #define POOL_MAGICBIT (1 << 3) /* keep away from perturbed low bits */ 154 #define POOL_PHPOISON(ph) ISSET((ph)->ph_magic, POOL_MAGICBIT) 155 156 #ifdef MULTIPROCESSOR 157 struct pool_cache_item { 158 struct pool_cache_item *ci_next; /* next item in list */ 159 unsigned long ci_nitems; /* number of items in list */ 160 TAILQ_ENTRY(pool_cache_item) 161 ci_nextl; /* entry in list of lists */ 162 }; 163 164 /* we store whether the cached item is poisoned in the high bit of nitems */ 165 #define POOL_CACHE_ITEM_NITEMS_MASK 0x7ffffffUL 166 #define POOL_CACHE_ITEM_NITEMS_POISON 0x8000000UL 167 168 #define POOL_CACHE_ITEM_NITEMS(_ci) \ 169 ((_ci)->ci_nitems & POOL_CACHE_ITEM_NITEMS_MASK) 170 171 #define POOL_CACHE_ITEM_POISONED(_ci) \ 172 ISSET((_ci)->ci_nitems, POOL_CACHE_ITEM_NITEMS_POISON) 173 174 struct pool_cache { 175 struct pool_cache_item *pc_actv; /* active list of items */ 176 unsigned long pc_nactv; /* actv head nitems cache */ 177 struct pool_cache_item *pc_prev; /* previous list of items */ 178 179 uint64_t pc_gen; /* generation number */ 180 uint64_t pc_nget; /* # of successful requests */ 181 uint64_t pc_nfail; /* # of unsuccessful reqs */ 182 uint64_t pc_nput; /* # of releases */ 183 uint64_t pc_nlget; /* # of list requests */ 184 uint64_t pc_nlfail; /* # of fails getting a list */ 185 uint64_t pc_nlput; /* # of list releases */ 186 187 int pc_nout; 188 }; 189 190 void *pool_cache_get(struct pool *); 191 void pool_cache_put(struct pool *, void *); 192 void pool_cache_destroy(struct pool *); 193 void pool_cache_gc(struct pool *); 194 #endif 195 void pool_cache_pool_info(struct pool *, struct kinfo_pool *); 196 int pool_cache_info(struct pool *, void *, size_t *); 197 int pool_cache_cpus_info(struct pool *, void *, size_t *); 198 199 #ifdef POOL_DEBUG 200 int pool_debug = 1; 201 #else 202 int pool_debug = 0; 203 #endif 204 205 #define POOL_INPGHDR(pp) ((pp)->pr_phoffset != 0) 206 207 struct pool_page_header * 208 pool_p_alloc(struct pool *, int, int *); 209 void pool_p_insert(struct pool *, struct pool_page_header *); 210 void pool_p_remove(struct pool *, struct pool_page_header *); 211 void pool_p_free(struct pool *, struct pool_page_header *); 212 213 void pool_update_curpage(struct pool *); 214 void *pool_do_get(struct pool *, int, int *); 215 void pool_do_put(struct pool *, void *); 216 int pool_chk_page(struct pool *, struct pool_page_header *, int); 217 int pool_chk(struct pool *); 218 void pool_get_done(struct pool *, void *, void *); 219 void pool_runqueue(struct pool *, int); 220 221 void *pool_allocator_alloc(struct pool *, int, int *); 222 void pool_allocator_free(struct pool *, void *); 223 224 /* 225 * The default pool allocator. 226 */ 227 void *pool_page_alloc(struct pool *, int, int *); 228 void pool_page_free(struct pool *, void *); 229 230 /* 231 * safe for interrupts; this is the default allocator 232 */ 233 struct pool_allocator pool_allocator_single = { 234 pool_page_alloc, 235 pool_page_free, 236 POOL_ALLOC_SIZE(PAGE_SIZE, POOL_ALLOC_ALIGNED) 237 }; 238 239 void *pool_multi_alloc(struct pool *, int, int *); 240 void pool_multi_free(struct pool *, void *); 241 242 struct pool_allocator pool_allocator_multi = { 243 pool_multi_alloc, 244 pool_multi_free, 245 POOL_ALLOC_SIZES(PAGE_SIZE, (1UL << 31), POOL_ALLOC_ALIGNED) 246 }; 247 248 void *pool_multi_alloc_ni(struct pool *, int, int *); 249 void pool_multi_free_ni(struct pool *, void *); 250 251 struct pool_allocator pool_allocator_multi_ni = { 252 pool_multi_alloc_ni, 253 pool_multi_free_ni, 254 POOL_ALLOC_SIZES(PAGE_SIZE, (1UL << 31), POOL_ALLOC_ALIGNED) 255 }; 256 257 #ifdef DDB 258 void pool_print_pagelist(struct pool_pagelist *, int (*)(const char *, ...) 259 __attribute__((__format__(__kprintf__,1,2)))); 260 void pool_print1(struct pool *, const char *, int (*)(const char *, ...) 261 __attribute__((__format__(__kprintf__,1,2)))); 262 #endif 263 264 /* stale page garbage collectors */ 265 void pool_gc_sched(void *); 266 struct timeout pool_gc_tick = TIMEOUT_INITIALIZER(pool_gc_sched, NULL); 267 void pool_gc_pages(void *); 268 struct task pool_gc_task = TASK_INITIALIZER(pool_gc_pages, NULL); 269 int pool_wait_free = 1; 270 int pool_wait_gc = 8; 271 272 RBT_PROTOTYPE(phtree, pool_page_header, ph_node, phtree_compare); 273 274 static inline int 275 phtree_compare(const struct pool_page_header *a, 276 const struct pool_page_header *b) 277 { 278 vaddr_t va = (vaddr_t)a->ph_page; 279 vaddr_t vb = (vaddr_t)b->ph_page; 280 281 /* the compares in this order are important for the NFIND to work */ 282 if (vb < va) 283 return (-1); 284 if (vb > va) 285 return (1); 286 287 return (0); 288 } 289 290 RBT_GENERATE(phtree, pool_page_header, ph_node, phtree_compare); 291 292 /* 293 * Return the pool page header based on page address. 294 */ 295 static inline struct pool_page_header * 296 pr_find_pagehead(struct pool *pp, void *v) 297 { 298 struct pool_page_header *ph, key; 299 300 if (POOL_INPGHDR(pp)) { 301 caddr_t page; 302 303 page = (caddr_t)((vaddr_t)v & pp->pr_pgmask); 304 305 return ((struct pool_page_header *)(page + pp->pr_phoffset)); 306 } 307 308 key.ph_page = v; 309 ph = RBT_NFIND(phtree, &pp->pr_phtree, &key); 310 if (ph == NULL) 311 panic("%s: %s: page header missing", __func__, pp->pr_wchan); 312 313 KASSERT(ph->ph_page <= (caddr_t)v); 314 if (ph->ph_page + pp->pr_pgsize <= (caddr_t)v) 315 panic("%s: %s: incorrect page", __func__, pp->pr_wchan); 316 317 return (ph); 318 } 319 320 /* 321 * Initialize the given pool resource structure. 322 * 323 * We export this routine to allow other kernel parts to declare 324 * static pools that must be initialized before malloc() is available. 325 */ 326 void 327 pool_init(struct pool *pp, size_t size, u_int align, int ipl, int flags, 328 const char *wchan, struct pool_allocator *palloc) 329 { 330 int off = 0, space; 331 unsigned int pgsize = PAGE_SIZE, items; 332 size_t pa_pagesz; 333 #ifdef DIAGNOSTIC 334 struct pool *iter; 335 #endif 336 337 if (align == 0) 338 align = ALIGN(1); 339 340 if (size < sizeof(struct pool_item)) 341 size = sizeof(struct pool_item); 342 343 size = roundup(size, align); 344 345 while (size * 8 > pgsize) 346 pgsize <<= 1; 347 348 if (palloc == NULL) { 349 if (pgsize > PAGE_SIZE) { 350 palloc = ISSET(flags, PR_WAITOK) ? 351 &pool_allocator_multi_ni : &pool_allocator_multi; 352 } else 353 palloc = &pool_allocator_single; 354 355 pa_pagesz = palloc->pa_pagesz; 356 } else { 357 size_t pgsizes; 358 359 pa_pagesz = palloc->pa_pagesz; 360 if (pa_pagesz == 0) 361 pa_pagesz = POOL_ALLOC_DEFAULT; 362 363 pgsizes = pa_pagesz & ~POOL_ALLOC_ALIGNED; 364 365 /* make sure the allocator can fit at least one item */ 366 if (size > pgsizes) { 367 panic("%s: pool %s item size 0x%zx > " 368 "allocator %p sizes 0x%zx", __func__, wchan, 369 size, palloc, pgsizes); 370 } 371 372 /* shrink pgsize until it fits into the range */ 373 while (!ISSET(pgsizes, pgsize)) 374 pgsize >>= 1; 375 } 376 KASSERT(ISSET(pa_pagesz, pgsize)); 377 378 items = pgsize / size; 379 380 /* 381 * Decide whether to put the page header off page to avoid 382 * wasting too large a part of the page. Off-page page headers 383 * go into an RB tree, so we can match a returned item with 384 * its header based on the page address. 385 */ 386 if (ISSET(pa_pagesz, POOL_ALLOC_ALIGNED)) { 387 if (pgsize - (size * items) > 388 sizeof(struct pool_page_header)) { 389 off = pgsize - sizeof(struct pool_page_header); 390 } else if (sizeof(struct pool_page_header) * 2 >= size) { 391 off = pgsize - sizeof(struct pool_page_header); 392 items = off / size; 393 } 394 } 395 396 KASSERT(items > 0); 397 398 /* 399 * Initialize the pool structure. 400 */ 401 memset(pp, 0, sizeof(*pp)); 402 if (ISSET(flags, PR_RWLOCK)) { 403 KASSERT(flags & PR_WAITOK); 404 pp->pr_lock_ops = &pool_lock_ops_rw; 405 } else 406 pp->pr_lock_ops = &pool_lock_ops_mtx; 407 TAILQ_INIT(&pp->pr_emptypages); 408 TAILQ_INIT(&pp->pr_fullpages); 409 TAILQ_INIT(&pp->pr_partpages); 410 pp->pr_curpage = NULL; 411 pp->pr_npages = 0; 412 pp->pr_minitems = 0; 413 pp->pr_minpages = 0; 414 pp->pr_maxpages = 8; 415 pp->pr_size = size; 416 pp->pr_pgsize = pgsize; 417 pp->pr_pgmask = ~0UL ^ (pgsize - 1); 418 pp->pr_phoffset = off; 419 pp->pr_itemsperpage = items; 420 pp->pr_wchan = wchan; 421 pp->pr_alloc = palloc; 422 pp->pr_nitems = 0; 423 pp->pr_nout = 0; 424 pp->pr_hardlimit = UINT_MAX; 425 pp->pr_hardlimit_warning = NULL; 426 pp->pr_hardlimit_ratecap.tv_sec = 0; 427 pp->pr_hardlimit_ratecap.tv_usec = 0; 428 pp->pr_hardlimit_warning_last.tv_sec = 0; 429 pp->pr_hardlimit_warning_last.tv_usec = 0; 430 RBT_INIT(phtree, &pp->pr_phtree); 431 432 /* 433 * Use the space between the chunks and the page header 434 * for cache coloring. 435 */ 436 space = POOL_INPGHDR(pp) ? pp->pr_phoffset : pp->pr_pgsize; 437 space -= pp->pr_itemsperpage * pp->pr_size; 438 pp->pr_align = align; 439 pp->pr_maxcolors = (space / align) + 1; 440 441 pp->pr_nget = 0; 442 pp->pr_nfail = 0; 443 pp->pr_nput = 0; 444 pp->pr_npagealloc = 0; 445 pp->pr_npagefree = 0; 446 pp->pr_hiwat = 0; 447 pp->pr_nidle = 0; 448 449 pp->pr_ipl = ipl; 450 pp->pr_flags = flags; 451 452 pl_init(pp, &pp->pr_lock); 453 pl_init(pp, &pp->pr_requests_lock); 454 TAILQ_INIT(&pp->pr_requests); 455 456 if (phpool.pr_size == 0) { 457 pool_init(&phpool, sizeof(struct pool_page_header), 0, 458 IPL_HIGH, 0, "phpool", NULL); 459 460 /* make sure phpool wont "recurse" */ 461 KASSERT(POOL_INPGHDR(&phpool)); 462 } 463 464 /* pglistalloc/constraint parameters */ 465 pp->pr_crange = &kp_dirty; 466 467 /* Insert this into the list of all pools. */ 468 rw_enter_write(&pool_lock); 469 #ifdef DIAGNOSTIC 470 SIMPLEQ_FOREACH(iter, &pool_head, pr_poollist) { 471 if (iter == pp) 472 panic("%s: pool %s already on list", __func__, wchan); 473 } 474 #endif 475 476 pp->pr_serial = ++pool_serial; 477 if (pool_serial == 0) 478 panic("%s: too much uptime", __func__); 479 480 SIMPLEQ_INSERT_HEAD(&pool_head, pp, pr_poollist); 481 pool_count++; 482 rw_exit_write(&pool_lock); 483 } 484 485 /* 486 * Decommission a pool resource. 487 */ 488 void 489 pool_destroy(struct pool *pp) 490 { 491 struct pool_page_header *ph; 492 struct pool *prev, *iter; 493 494 #ifdef MULTIPROCESSOR 495 if (pp->pr_cache != NULL) 496 pool_cache_destroy(pp); 497 #endif 498 499 #ifdef DIAGNOSTIC 500 if (pp->pr_nout != 0) 501 panic("%s: pool busy: still out: %u", __func__, pp->pr_nout); 502 #endif 503 504 /* Remove from global pool list */ 505 rw_enter_write(&pool_lock); 506 pool_count--; 507 if (pp == SIMPLEQ_FIRST(&pool_head)) 508 SIMPLEQ_REMOVE_HEAD(&pool_head, pr_poollist); 509 else { 510 prev = SIMPLEQ_FIRST(&pool_head); 511 SIMPLEQ_FOREACH(iter, &pool_head, pr_poollist) { 512 if (iter == pp) { 513 SIMPLEQ_REMOVE_AFTER(&pool_head, prev, 514 pr_poollist); 515 break; 516 } 517 prev = iter; 518 } 519 } 520 rw_exit_write(&pool_lock); 521 522 /* Remove all pages */ 523 while ((ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL) { 524 pl_enter(pp, &pp->pr_lock); 525 pool_p_remove(pp, ph); 526 pl_leave(pp, &pp->pr_lock); 527 pool_p_free(pp, ph); 528 } 529 KASSERT(TAILQ_EMPTY(&pp->pr_fullpages)); 530 KASSERT(TAILQ_EMPTY(&pp->pr_partpages)); 531 } 532 533 void 534 pool_request_init(struct pool_request *pr, 535 void (*handler)(struct pool *, void *, void *), void *cookie) 536 { 537 pr->pr_handler = handler; 538 pr->pr_cookie = cookie; 539 pr->pr_item = NULL; 540 } 541 542 void 543 pool_request(struct pool *pp, struct pool_request *pr) 544 { 545 pl_enter(pp, &pp->pr_requests_lock); 546 TAILQ_INSERT_TAIL(&pp->pr_requests, pr, pr_entry); 547 pool_runqueue(pp, PR_NOWAIT); 548 pl_leave(pp, &pp->pr_requests_lock); 549 } 550 551 struct pool_get_memory { 552 union pool_lock lock; 553 void * volatile v; 554 }; 555 556 /* 557 * Grab an item from the pool. 558 */ 559 void * 560 pool_get(struct pool *pp, int flags) 561 { 562 void *v = NULL; 563 int slowdown = 0; 564 565 KASSERT(flags & (PR_WAITOK | PR_NOWAIT)); 566 if (pp->pr_flags & PR_RWLOCK) 567 KASSERT(flags & PR_WAITOK); 568 569 #ifdef MULTIPROCESSOR 570 if (pp->pr_cache != NULL) { 571 v = pool_cache_get(pp); 572 if (v != NULL) 573 goto good; 574 } 575 #endif 576 577 pl_enter(pp, &pp->pr_lock); 578 if (pp->pr_nout >= pp->pr_hardlimit) { 579 if (ISSET(flags, PR_NOWAIT|PR_LIMITFAIL)) 580 goto fail; 581 } else if ((v = pool_do_get(pp, flags, &slowdown)) == NULL) { 582 if (ISSET(flags, PR_NOWAIT)) 583 goto fail; 584 } 585 pl_leave(pp, &pp->pr_lock); 586 587 if ((slowdown || pool_debug == 2) && ISSET(flags, PR_WAITOK)) 588 yield(); 589 590 if (v == NULL) { 591 struct pool_get_memory mem = { .v = NULL }; 592 struct pool_request pr; 593 594 #ifdef DIAGNOSTIC 595 if (ISSET(flags, PR_WAITOK) && curproc == &proc0) 596 panic("%s: cannot sleep for memory during boot", 597 __func__); 598 #endif 599 pl_init(pp, &mem.lock); 600 pool_request_init(&pr, pool_get_done, &mem); 601 pool_request(pp, &pr); 602 603 pl_enter(pp, &mem.lock); 604 while (mem.v == NULL) 605 pl_sleep(pp, &mem, &mem.lock, PSWP, pp->pr_wchan, 0); 606 pl_leave(pp, &mem.lock); 607 608 v = mem.v; 609 } 610 611 #ifdef MULTIPROCESSOR 612 good: 613 #endif 614 if (ISSET(flags, PR_ZERO)) 615 memset(v, 0, pp->pr_size); 616 617 return (v); 618 619 fail: 620 pp->pr_nfail++; 621 pl_leave(pp, &pp->pr_lock); 622 return (NULL); 623 } 624 625 void 626 pool_get_done(struct pool *pp, void *xmem, void *v) 627 { 628 struct pool_get_memory *mem = xmem; 629 630 pl_enter(pp, &mem->lock); 631 mem->v = v; 632 pl_leave(pp, &mem->lock); 633 634 wakeup_one(mem); 635 } 636 637 void 638 pool_runqueue(struct pool *pp, int flags) 639 { 640 struct pool_requests prl = TAILQ_HEAD_INITIALIZER(prl); 641 struct pool_request *pr; 642 643 pl_assert_unlocked(pp, &pp->pr_lock); 644 pl_assert_locked(pp, &pp->pr_requests_lock); 645 646 if (pp->pr_requesting++) 647 return; 648 649 do { 650 pp->pr_requesting = 1; 651 652 /* no TAILQ_JOIN? :( */ 653 while ((pr = TAILQ_FIRST(&pp->pr_requests)) != NULL) { 654 TAILQ_REMOVE(&pp->pr_requests, pr, pr_entry); 655 TAILQ_INSERT_TAIL(&prl, pr, pr_entry); 656 } 657 if (TAILQ_EMPTY(&prl)) 658 continue; 659 660 pl_leave(pp, &pp->pr_requests_lock); 661 662 pl_enter(pp, &pp->pr_lock); 663 pr = TAILQ_FIRST(&prl); 664 while (pr != NULL) { 665 int slowdown = 0; 666 667 if (pp->pr_nout >= pp->pr_hardlimit) 668 break; 669 670 pr->pr_item = pool_do_get(pp, flags, &slowdown); 671 if (pr->pr_item == NULL) /* || slowdown ? */ 672 break; 673 674 pr = TAILQ_NEXT(pr, pr_entry); 675 } 676 pl_leave(pp, &pp->pr_lock); 677 678 while ((pr = TAILQ_FIRST(&prl)) != NULL && 679 pr->pr_item != NULL) { 680 TAILQ_REMOVE(&prl, pr, pr_entry); 681 (*pr->pr_handler)(pp, pr->pr_cookie, pr->pr_item); 682 } 683 684 pl_enter(pp, &pp->pr_requests_lock); 685 } while (--pp->pr_requesting); 686 687 /* no TAILQ_JOIN :( */ 688 while ((pr = TAILQ_FIRST(&prl)) != NULL) { 689 TAILQ_REMOVE(&prl, pr, pr_entry); 690 TAILQ_INSERT_TAIL(&pp->pr_requests, pr, pr_entry); 691 } 692 } 693 694 void * 695 pool_do_get(struct pool *pp, int flags, int *slowdown) 696 { 697 struct pool_item *pi; 698 struct pool_page_header *ph; 699 700 pl_assert_locked(pp, &pp->pr_lock); 701 702 splassert(pp->pr_ipl); 703 704 /* 705 * Account for this item now to avoid races if we need to give up 706 * pr_lock to allocate a page. 707 */ 708 pp->pr_nout++; 709 710 if (pp->pr_curpage == NULL) { 711 pl_leave(pp, &pp->pr_lock); 712 ph = pool_p_alloc(pp, flags, slowdown); 713 pl_enter(pp, &pp->pr_lock); 714 715 if (ph == NULL) { 716 pp->pr_nout--; 717 return (NULL); 718 } 719 720 pool_p_insert(pp, ph); 721 } 722 723 ph = pp->pr_curpage; 724 pi = XSIMPLEQ_FIRST(&ph->ph_items); 725 if (__predict_false(pi == NULL)) 726 panic("%s: %s: page empty", __func__, pp->pr_wchan); 727 728 if (__predict_false(pi->pi_magic != POOL_IMAGIC(ph, pi))) { 729 panic("%s: %s free list modified: " 730 "page %p; item addr %p; offset 0x%x=0x%lx != 0x%lx", 731 __func__, pp->pr_wchan, ph->ph_page, pi, 732 0, pi->pi_magic, POOL_IMAGIC(ph, pi)); 733 } 734 735 XSIMPLEQ_REMOVE_HEAD(&ph->ph_items, pi_list); 736 737 #ifdef DIAGNOSTIC 738 if (pool_debug && POOL_PHPOISON(ph)) { 739 size_t pidx; 740 uint32_t pval; 741 if (poison_check(pi + 1, pp->pr_size - sizeof(*pi), 742 &pidx, &pval)) { 743 int *ip = (int *)(pi + 1); 744 panic("%s: %s free list modified: " 745 "page %p; item addr %p; offset 0x%zx=0x%x", 746 __func__, pp->pr_wchan, ph->ph_page, pi, 747 (pidx * sizeof(int)) + sizeof(*pi), ip[pidx]); 748 } 749 } 750 #endif /* DIAGNOSTIC */ 751 752 if (ph->ph_nmissing++ == 0) { 753 /* 754 * This page was previously empty. Move it to the list of 755 * partially-full pages. This page is already curpage. 756 */ 757 TAILQ_REMOVE(&pp->pr_emptypages, ph, ph_entry); 758 TAILQ_INSERT_TAIL(&pp->pr_partpages, ph, ph_entry); 759 760 pp->pr_nidle--; 761 } 762 763 if (ph->ph_nmissing == pp->pr_itemsperpage) { 764 /* 765 * This page is now full. Move it to the full list 766 * and select a new current page. 767 */ 768 TAILQ_REMOVE(&pp->pr_partpages, ph, ph_entry); 769 TAILQ_INSERT_TAIL(&pp->pr_fullpages, ph, ph_entry); 770 pool_update_curpage(pp); 771 } 772 773 pp->pr_nget++; 774 775 return (pi); 776 } 777 778 /* 779 * Return resource to the pool. 780 */ 781 void 782 pool_put(struct pool *pp, void *v) 783 { 784 struct pool_page_header *ph, *freeph = NULL; 785 786 #ifdef DIAGNOSTIC 787 if (v == NULL) 788 panic("%s: NULL item", __func__); 789 #endif 790 791 #ifdef MULTIPROCESSOR 792 if (pp->pr_cache != NULL && TAILQ_EMPTY(&pp->pr_requests)) { 793 pool_cache_put(pp, v); 794 return; 795 } 796 #endif 797 798 pl_enter(pp, &pp->pr_lock); 799 800 pool_do_put(pp, v); 801 802 pp->pr_nout--; 803 pp->pr_nput++; 804 805 /* is it time to free a page? */ 806 if (pp->pr_nidle > pp->pr_maxpages && 807 (ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL && 808 (ticks - ph->ph_tick) > (hz * pool_wait_free)) { 809 freeph = ph; 810 pool_p_remove(pp, freeph); 811 } 812 813 pl_leave(pp, &pp->pr_lock); 814 815 if (freeph != NULL) 816 pool_p_free(pp, freeph); 817 818 pool_wakeup(pp); 819 } 820 821 void 822 pool_wakeup(struct pool *pp) 823 { 824 if (!TAILQ_EMPTY(&pp->pr_requests)) { 825 pl_enter(pp, &pp->pr_requests_lock); 826 pool_runqueue(pp, PR_NOWAIT); 827 pl_leave(pp, &pp->pr_requests_lock); 828 } 829 } 830 831 void 832 pool_do_put(struct pool *pp, void *v) 833 { 834 struct pool_item *pi = v; 835 struct pool_page_header *ph; 836 837 splassert(pp->pr_ipl); 838 839 ph = pr_find_pagehead(pp, v); 840 841 #ifdef DIAGNOSTIC 842 if (pool_debug) { 843 struct pool_item *qi; 844 XSIMPLEQ_FOREACH(qi, &ph->ph_items, pi_list) { 845 if (pi == qi) { 846 panic("%s: %s: double pool_put: %p", __func__, 847 pp->pr_wchan, pi); 848 } 849 } 850 } 851 #endif /* DIAGNOSTIC */ 852 853 pi->pi_magic = POOL_IMAGIC(ph, pi); 854 XSIMPLEQ_INSERT_HEAD(&ph->ph_items, pi, pi_list); 855 #ifdef DIAGNOSTIC 856 if (POOL_PHPOISON(ph)) 857 poison_mem(pi + 1, pp->pr_size - sizeof(*pi)); 858 #endif /* DIAGNOSTIC */ 859 860 if (ph->ph_nmissing-- == pp->pr_itemsperpage) { 861 /* 862 * The page was previously completely full, move it to the 863 * partially-full list. 864 */ 865 TAILQ_REMOVE(&pp->pr_fullpages, ph, ph_entry); 866 TAILQ_INSERT_TAIL(&pp->pr_partpages, ph, ph_entry); 867 } 868 869 if (ph->ph_nmissing == 0) { 870 /* 871 * The page is now empty, so move it to the empty page list. 872 */ 873 pp->pr_nidle++; 874 875 ph->ph_tick = ticks; 876 TAILQ_REMOVE(&pp->pr_partpages, ph, ph_entry); 877 TAILQ_INSERT_TAIL(&pp->pr_emptypages, ph, ph_entry); 878 pool_update_curpage(pp); 879 } 880 } 881 882 /* 883 * Add N items to the pool. 884 */ 885 int 886 pool_prime(struct pool *pp, int n) 887 { 888 struct pool_pagelist pl = TAILQ_HEAD_INITIALIZER(pl); 889 struct pool_page_header *ph; 890 int newpages; 891 892 newpages = roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 893 894 while (newpages-- > 0) { 895 int slowdown = 0; 896 897 ph = pool_p_alloc(pp, PR_NOWAIT, &slowdown); 898 if (ph == NULL) /* or slowdown? */ 899 break; 900 901 TAILQ_INSERT_TAIL(&pl, ph, ph_entry); 902 } 903 904 pl_enter(pp, &pp->pr_lock); 905 while ((ph = TAILQ_FIRST(&pl)) != NULL) { 906 TAILQ_REMOVE(&pl, ph, ph_entry); 907 pool_p_insert(pp, ph); 908 } 909 pl_leave(pp, &pp->pr_lock); 910 911 return (0); 912 } 913 914 struct pool_page_header * 915 pool_p_alloc(struct pool *pp, int flags, int *slowdown) 916 { 917 struct pool_page_header *ph; 918 struct pool_item *pi; 919 caddr_t addr; 920 unsigned int order; 921 int o; 922 int n; 923 924 pl_assert_unlocked(pp, &pp->pr_lock); 925 KASSERT(pp->pr_size >= sizeof(*pi)); 926 927 addr = pool_allocator_alloc(pp, flags, slowdown); 928 if (addr == NULL) 929 return (NULL); 930 931 if (POOL_INPGHDR(pp)) 932 ph = (struct pool_page_header *)(addr + pp->pr_phoffset); 933 else { 934 ph = pool_get(&phpool, flags); 935 if (ph == NULL) { 936 pool_allocator_free(pp, addr); 937 return (NULL); 938 } 939 } 940 941 XSIMPLEQ_INIT(&ph->ph_items); 942 ph->ph_page = addr; 943 addr += pp->pr_align * (pp->pr_npagealloc % pp->pr_maxcolors); 944 ph->ph_colored = addr; 945 ph->ph_nmissing = 0; 946 arc4random_buf(&ph->ph_magic, sizeof(ph->ph_magic)); 947 #ifdef DIAGNOSTIC 948 /* use a bit in ph_magic to record if we poison page items */ 949 if (pool_debug) 950 SET(ph->ph_magic, POOL_MAGICBIT); 951 else 952 CLR(ph->ph_magic, POOL_MAGICBIT); 953 #endif /* DIAGNOSTIC */ 954 955 n = pp->pr_itemsperpage; 956 o = 32; 957 while (n--) { 958 pi = (struct pool_item *)addr; 959 pi->pi_magic = POOL_IMAGIC(ph, pi); 960 961 if (o == 32) { 962 order = arc4random(); 963 o = 0; 964 } 965 if (ISSET(order, 1 << o++)) 966 XSIMPLEQ_INSERT_TAIL(&ph->ph_items, pi, pi_list); 967 else 968 XSIMPLEQ_INSERT_HEAD(&ph->ph_items, pi, pi_list); 969 970 #ifdef DIAGNOSTIC 971 if (POOL_PHPOISON(ph)) 972 poison_mem(pi + 1, pp->pr_size - sizeof(*pi)); 973 #endif /* DIAGNOSTIC */ 974 975 addr += pp->pr_size; 976 } 977 978 return (ph); 979 } 980 981 void 982 pool_p_free(struct pool *pp, struct pool_page_header *ph) 983 { 984 struct pool_item *pi; 985 986 pl_assert_unlocked(pp, &pp->pr_lock); 987 KASSERT(ph->ph_nmissing == 0); 988 989 XSIMPLEQ_FOREACH(pi, &ph->ph_items, pi_list) { 990 if (__predict_false(pi->pi_magic != POOL_IMAGIC(ph, pi))) { 991 panic("%s: %s free list modified: " 992 "page %p; item addr %p; offset 0x%x=0x%lx", 993 __func__, pp->pr_wchan, ph->ph_page, pi, 994 0, pi->pi_magic); 995 } 996 997 #ifdef DIAGNOSTIC 998 if (POOL_PHPOISON(ph)) { 999 size_t pidx; 1000 uint32_t pval; 1001 if (poison_check(pi + 1, pp->pr_size - sizeof(*pi), 1002 &pidx, &pval)) { 1003 int *ip = (int *)(pi + 1); 1004 panic("%s: %s free list modified: " 1005 "page %p; item addr %p; offset 0x%zx=0x%x", 1006 __func__, pp->pr_wchan, ph->ph_page, pi, 1007 pidx * sizeof(int), ip[pidx]); 1008 } 1009 } 1010 #endif 1011 } 1012 1013 pool_allocator_free(pp, ph->ph_page); 1014 1015 if (!POOL_INPGHDR(pp)) 1016 pool_put(&phpool, ph); 1017 } 1018 1019 void 1020 pool_p_insert(struct pool *pp, struct pool_page_header *ph) 1021 { 1022 pl_assert_locked(pp, &pp->pr_lock); 1023 1024 /* If the pool was depleted, point at the new page */ 1025 if (pp->pr_curpage == NULL) 1026 pp->pr_curpage = ph; 1027 1028 TAILQ_INSERT_TAIL(&pp->pr_emptypages, ph, ph_entry); 1029 if (!POOL_INPGHDR(pp)) 1030 RBT_INSERT(phtree, &pp->pr_phtree, ph); 1031 1032 pp->pr_nitems += pp->pr_itemsperpage; 1033 pp->pr_nidle++; 1034 1035 pp->pr_npagealloc++; 1036 if (++pp->pr_npages > pp->pr_hiwat) 1037 pp->pr_hiwat = pp->pr_npages; 1038 } 1039 1040 void 1041 pool_p_remove(struct pool *pp, struct pool_page_header *ph) 1042 { 1043 pl_assert_locked(pp, &pp->pr_lock); 1044 1045 pp->pr_npagefree++; 1046 pp->pr_npages--; 1047 pp->pr_nidle--; 1048 pp->pr_nitems -= pp->pr_itemsperpage; 1049 1050 if (!POOL_INPGHDR(pp)) 1051 RBT_REMOVE(phtree, &pp->pr_phtree, ph); 1052 TAILQ_REMOVE(&pp->pr_emptypages, ph, ph_entry); 1053 1054 pool_update_curpage(pp); 1055 } 1056 1057 void 1058 pool_update_curpage(struct pool *pp) 1059 { 1060 pp->pr_curpage = TAILQ_LAST(&pp->pr_partpages, pool_pagelist); 1061 if (pp->pr_curpage == NULL) { 1062 pp->pr_curpage = TAILQ_LAST(&pp->pr_emptypages, pool_pagelist); 1063 } 1064 } 1065 1066 void 1067 pool_setlowat(struct pool *pp, int n) 1068 { 1069 int prime = 0; 1070 1071 pl_enter(pp, &pp->pr_lock); 1072 pp->pr_minitems = n; 1073 pp->pr_minpages = (n == 0) 1074 ? 0 1075 : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 1076 1077 if (pp->pr_nitems < n) 1078 prime = n - pp->pr_nitems; 1079 pl_leave(pp, &pp->pr_lock); 1080 1081 if (prime > 0) 1082 pool_prime(pp, prime); 1083 } 1084 1085 void 1086 pool_sethiwat(struct pool *pp, int n) 1087 { 1088 pp->pr_maxpages = (n == 0) 1089 ? 0 1090 : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 1091 } 1092 1093 int 1094 pool_sethardlimit(struct pool *pp, u_int n, const char *warnmsg, int ratecap) 1095 { 1096 int error = 0; 1097 1098 if (n < pp->pr_nout) { 1099 error = EINVAL; 1100 goto done; 1101 } 1102 1103 pp->pr_hardlimit = n; 1104 pp->pr_hardlimit_warning = warnmsg; 1105 pp->pr_hardlimit_ratecap.tv_sec = ratecap; 1106 pp->pr_hardlimit_warning_last.tv_sec = 0; 1107 pp->pr_hardlimit_warning_last.tv_usec = 0; 1108 1109 done: 1110 return (error); 1111 } 1112 1113 void 1114 pool_set_constraints(struct pool *pp, const struct kmem_pa_mode *mode) 1115 { 1116 pp->pr_crange = mode; 1117 } 1118 1119 /* 1120 * Release all complete pages that have not been used recently. 1121 * 1122 * Returns non-zero if any pages have been reclaimed. 1123 */ 1124 int 1125 pool_reclaim(struct pool *pp) 1126 { 1127 struct pool_page_header *ph, *phnext; 1128 struct pool_pagelist pl = TAILQ_HEAD_INITIALIZER(pl); 1129 1130 pl_enter(pp, &pp->pr_lock); 1131 for (ph = TAILQ_FIRST(&pp->pr_emptypages); ph != NULL; ph = phnext) { 1132 phnext = TAILQ_NEXT(ph, ph_entry); 1133 1134 /* Check our minimum page claim */ 1135 if (pp->pr_npages <= pp->pr_minpages) 1136 break; 1137 1138 /* 1139 * If freeing this page would put us below 1140 * the low water mark, stop now. 1141 */ 1142 if ((pp->pr_nitems - pp->pr_itemsperpage) < 1143 pp->pr_minitems) 1144 break; 1145 1146 pool_p_remove(pp, ph); 1147 TAILQ_INSERT_TAIL(&pl, ph, ph_entry); 1148 } 1149 pl_leave(pp, &pp->pr_lock); 1150 1151 if (TAILQ_EMPTY(&pl)) 1152 return (0); 1153 1154 while ((ph = TAILQ_FIRST(&pl)) != NULL) { 1155 TAILQ_REMOVE(&pl, ph, ph_entry); 1156 pool_p_free(pp, ph); 1157 } 1158 1159 return (1); 1160 } 1161 1162 /* 1163 * Release all complete pages that have not been used recently 1164 * from all pools. 1165 */ 1166 void 1167 pool_reclaim_all(void) 1168 { 1169 struct pool *pp; 1170 1171 rw_enter_read(&pool_lock); 1172 SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist) 1173 pool_reclaim(pp); 1174 rw_exit_read(&pool_lock); 1175 } 1176 1177 #ifdef DDB 1178 #include <machine/db_machdep.h> 1179 #include <ddb/db_output.h> 1180 1181 /* 1182 * Diagnostic helpers. 1183 */ 1184 void 1185 pool_printit(struct pool *pp, const char *modif, 1186 int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2)))) 1187 { 1188 pool_print1(pp, modif, pr); 1189 } 1190 1191 void 1192 pool_print_pagelist(struct pool_pagelist *pl, 1193 int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2)))) 1194 { 1195 struct pool_page_header *ph; 1196 struct pool_item *pi; 1197 1198 TAILQ_FOREACH(ph, pl, ph_entry) { 1199 (*pr)("\t\tpage %p, color %p, nmissing %d\n", 1200 ph->ph_page, ph->ph_colored, ph->ph_nmissing); 1201 XSIMPLEQ_FOREACH(pi, &ph->ph_items, pi_list) { 1202 if (pi->pi_magic != POOL_IMAGIC(ph, pi)) { 1203 (*pr)("\t\t\titem %p, magic 0x%lx\n", 1204 pi, pi->pi_magic); 1205 } 1206 } 1207 } 1208 } 1209 1210 void 1211 pool_print1(struct pool *pp, const char *modif, 1212 int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2)))) 1213 { 1214 struct pool_page_header *ph; 1215 int print_pagelist = 0; 1216 char c; 1217 1218 while ((c = *modif++) != '\0') { 1219 if (c == 'p') 1220 print_pagelist = 1; 1221 modif++; 1222 } 1223 1224 (*pr)("POOL %s: size %u maxcolors %u\n", pp->pr_wchan, pp->pr_size, 1225 pp->pr_maxcolors); 1226 (*pr)("\talloc %p\n", pp->pr_alloc); 1227 (*pr)("\tminitems %u, minpages %u, maxpages %u, npages %u\n", 1228 pp->pr_minitems, pp->pr_minpages, pp->pr_maxpages, pp->pr_npages); 1229 (*pr)("\titemsperpage %u, nitems %u, nout %u, hardlimit %u\n", 1230 pp->pr_itemsperpage, pp->pr_nitems, pp->pr_nout, pp->pr_hardlimit); 1231 1232 (*pr)("\n\tnget %lu, nfail %lu, nput %lu\n", 1233 pp->pr_nget, pp->pr_nfail, pp->pr_nput); 1234 (*pr)("\tnpagealloc %lu, npagefree %lu, hiwat %u, nidle %lu\n", 1235 pp->pr_npagealloc, pp->pr_npagefree, pp->pr_hiwat, pp->pr_nidle); 1236 1237 if (print_pagelist == 0) 1238 return; 1239 1240 if ((ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL) 1241 (*pr)("\n\tempty page list:\n"); 1242 pool_print_pagelist(&pp->pr_emptypages, pr); 1243 if ((ph = TAILQ_FIRST(&pp->pr_fullpages)) != NULL) 1244 (*pr)("\n\tfull page list:\n"); 1245 pool_print_pagelist(&pp->pr_fullpages, pr); 1246 if ((ph = TAILQ_FIRST(&pp->pr_partpages)) != NULL) 1247 (*pr)("\n\tpartial-page list:\n"); 1248 pool_print_pagelist(&pp->pr_partpages, pr); 1249 1250 if (pp->pr_curpage == NULL) 1251 (*pr)("\tno current page\n"); 1252 else 1253 (*pr)("\tcurpage %p\n", pp->pr_curpage->ph_page); 1254 } 1255 1256 void 1257 db_show_all_pools(db_expr_t expr, int haddr, db_expr_t count, char *modif) 1258 { 1259 struct pool *pp; 1260 char maxp[16]; 1261 int ovflw; 1262 char mode; 1263 1264 mode = modif[0]; 1265 if (mode != '\0' && mode != 'a') { 1266 db_printf("usage: show all pools [/a]\n"); 1267 return; 1268 } 1269 1270 if (mode == '\0') 1271 db_printf("%-10s%4s%9s%5s%9s%6s%6s%6s%6s%6s%6s%5s\n", 1272 "Name", 1273 "Size", 1274 "Requests", 1275 "Fail", 1276 "Releases", 1277 "Pgreq", 1278 "Pgrel", 1279 "Npage", 1280 "Hiwat", 1281 "Minpg", 1282 "Maxpg", 1283 "Idle"); 1284 else 1285 db_printf("%-12s %18s %18s\n", 1286 "Name", "Address", "Allocator"); 1287 1288 SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist) { 1289 if (mode == 'a') { 1290 db_printf("%-12s %18p %18p\n", pp->pr_wchan, pp, 1291 pp->pr_alloc); 1292 continue; 1293 } 1294 1295 if (!pp->pr_nget) 1296 continue; 1297 1298 if (pp->pr_maxpages == UINT_MAX) 1299 snprintf(maxp, sizeof maxp, "inf"); 1300 else 1301 snprintf(maxp, sizeof maxp, "%u", pp->pr_maxpages); 1302 1303 #define PRWORD(ovflw, fmt, width, fixed, val) do { \ 1304 (ovflw) += db_printf((fmt), \ 1305 (width) - (fixed) - (ovflw) > 0 ? \ 1306 (width) - (fixed) - (ovflw) : 0, \ 1307 (val)) - (width); \ 1308 if ((ovflw) < 0) \ 1309 (ovflw) = 0; \ 1310 } while (/* CONSTCOND */0) 1311 1312 ovflw = 0; 1313 PRWORD(ovflw, "%-*s", 10, 0, pp->pr_wchan); 1314 PRWORD(ovflw, " %*u", 4, 1, pp->pr_size); 1315 PRWORD(ovflw, " %*lu", 9, 1, pp->pr_nget); 1316 PRWORD(ovflw, " %*lu", 5, 1, pp->pr_nfail); 1317 PRWORD(ovflw, " %*lu", 9, 1, pp->pr_nput); 1318 PRWORD(ovflw, " %*lu", 6, 1, pp->pr_npagealloc); 1319 PRWORD(ovflw, " %*lu", 6, 1, pp->pr_npagefree); 1320 PRWORD(ovflw, " %*d", 6, 1, pp->pr_npages); 1321 PRWORD(ovflw, " %*d", 6, 1, pp->pr_hiwat); 1322 PRWORD(ovflw, " %*d", 6, 1, pp->pr_minpages); 1323 PRWORD(ovflw, " %*s", 6, 1, maxp); 1324 PRWORD(ovflw, " %*lu\n", 5, 1, pp->pr_nidle); 1325 1326 pool_chk(pp); 1327 } 1328 } 1329 #endif /* DDB */ 1330 1331 #if defined(POOL_DEBUG) || defined(DDB) 1332 int 1333 pool_chk_page(struct pool *pp, struct pool_page_header *ph, int expected) 1334 { 1335 struct pool_item *pi; 1336 caddr_t page; 1337 int n; 1338 const char *label = pp->pr_wchan; 1339 1340 page = (caddr_t)((u_long)ph & pp->pr_pgmask); 1341 if (page != ph->ph_page && POOL_INPGHDR(pp)) { 1342 printf("%s: ", label); 1343 printf("pool(%p:%s): page inconsistency: page %p; " 1344 "at page head addr %p (p %p)\n", 1345 pp, pp->pr_wchan, ph->ph_page, ph, page); 1346 return 1; 1347 } 1348 1349 for (pi = XSIMPLEQ_FIRST(&ph->ph_items), n = 0; 1350 pi != NULL; 1351 pi = XSIMPLEQ_NEXT(&ph->ph_items, pi, pi_list), n++) { 1352 if ((caddr_t)pi < ph->ph_page || 1353 (caddr_t)pi >= ph->ph_page + pp->pr_pgsize) { 1354 printf("%s: ", label); 1355 printf("pool(%p:%s): page inconsistency: page %p;" 1356 " item ordinal %d; addr %p\n", pp, 1357 pp->pr_wchan, ph->ph_page, n, pi); 1358 return (1); 1359 } 1360 1361 if (pi->pi_magic != POOL_IMAGIC(ph, pi)) { 1362 printf("%s: ", label); 1363 printf("pool(%p:%s): free list modified: " 1364 "page %p; item ordinal %d; addr %p " 1365 "(p %p); offset 0x%x=0x%lx\n", 1366 pp, pp->pr_wchan, ph->ph_page, n, pi, page, 1367 0, pi->pi_magic); 1368 } 1369 1370 #ifdef DIAGNOSTIC 1371 if (POOL_PHPOISON(ph)) { 1372 size_t pidx; 1373 uint32_t pval; 1374 if (poison_check(pi + 1, pp->pr_size - sizeof(*pi), 1375 &pidx, &pval)) { 1376 int *ip = (int *)(pi + 1); 1377 printf("pool(%s): free list modified: " 1378 "page %p; item ordinal %d; addr %p " 1379 "(p %p); offset 0x%zx=0x%x\n", 1380 pp->pr_wchan, ph->ph_page, n, pi, 1381 page, pidx * sizeof(int), ip[pidx]); 1382 } 1383 } 1384 #endif /* DIAGNOSTIC */ 1385 } 1386 if (n + ph->ph_nmissing != pp->pr_itemsperpage) { 1387 printf("pool(%p:%s): page inconsistency: page %p;" 1388 " %d on list, %d missing, %d items per page\n", pp, 1389 pp->pr_wchan, ph->ph_page, n, ph->ph_nmissing, 1390 pp->pr_itemsperpage); 1391 return 1; 1392 } 1393 if (expected >= 0 && n != expected) { 1394 printf("pool(%p:%s): page inconsistency: page %p;" 1395 " %d on list, %d missing, %d expected\n", pp, 1396 pp->pr_wchan, ph->ph_page, n, ph->ph_nmissing, 1397 expected); 1398 return 1; 1399 } 1400 return 0; 1401 } 1402 1403 int 1404 pool_chk(struct pool *pp) 1405 { 1406 struct pool_page_header *ph; 1407 int r = 0; 1408 1409 TAILQ_FOREACH(ph, &pp->pr_emptypages, ph_entry) 1410 r += pool_chk_page(pp, ph, pp->pr_itemsperpage); 1411 TAILQ_FOREACH(ph, &pp->pr_fullpages, ph_entry) 1412 r += pool_chk_page(pp, ph, 0); 1413 TAILQ_FOREACH(ph, &pp->pr_partpages, ph_entry) 1414 r += pool_chk_page(pp, ph, -1); 1415 1416 return (r); 1417 } 1418 #endif /* defined(POOL_DEBUG) || defined(DDB) */ 1419 1420 #ifdef DDB 1421 void 1422 pool_walk(struct pool *pp, int full, 1423 int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2))), 1424 void (*func)(void *, int, int (*)(const char *, ...) 1425 __attribute__((__format__(__kprintf__,1,2))))) 1426 { 1427 struct pool_page_header *ph; 1428 struct pool_item *pi; 1429 caddr_t cp; 1430 int n; 1431 1432 TAILQ_FOREACH(ph, &pp->pr_fullpages, ph_entry) { 1433 cp = ph->ph_colored; 1434 n = ph->ph_nmissing; 1435 1436 while (n--) { 1437 func(cp, full, pr); 1438 cp += pp->pr_size; 1439 } 1440 } 1441 1442 TAILQ_FOREACH(ph, &pp->pr_partpages, ph_entry) { 1443 cp = ph->ph_colored; 1444 n = ph->ph_nmissing; 1445 1446 do { 1447 XSIMPLEQ_FOREACH(pi, &ph->ph_items, pi_list) { 1448 if (cp == (caddr_t)pi) 1449 break; 1450 } 1451 if (cp != (caddr_t)pi) { 1452 func(cp, full, pr); 1453 n--; 1454 } 1455 1456 cp += pp->pr_size; 1457 } while (n > 0); 1458 } 1459 } 1460 #endif 1461 1462 /* 1463 * We have three different sysctls. 1464 * kern.pool.npools - the number of pools. 1465 * kern.pool.pool.<pool#> - the pool struct for the pool#. 1466 * kern.pool.name.<pool#> - the name for pool#. 1467 */ 1468 int 1469 sysctl_dopool(int *name, u_int namelen, char *oldp, size_t *oldlenp) 1470 { 1471 struct kinfo_pool pi; 1472 struct pool *pp; 1473 int rv = ENOENT; 1474 1475 switch (name[0]) { 1476 case KERN_POOL_NPOOLS: 1477 if (namelen != 1) 1478 return (ENOTDIR); 1479 return (sysctl_rdint(oldp, oldlenp, NULL, pool_count)); 1480 1481 case KERN_POOL_NAME: 1482 case KERN_POOL_POOL: 1483 case KERN_POOL_CACHE: 1484 case KERN_POOL_CACHE_CPUS: 1485 break; 1486 default: 1487 return (EOPNOTSUPP); 1488 } 1489 1490 if (namelen != 2) 1491 return (ENOTDIR); 1492 1493 rw_enter_read(&pool_lock); 1494 1495 SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist) { 1496 if (name[1] == pp->pr_serial) 1497 break; 1498 } 1499 1500 if (pp == NULL) 1501 goto done; 1502 1503 switch (name[0]) { 1504 case KERN_POOL_NAME: 1505 rv = sysctl_rdstring(oldp, oldlenp, NULL, pp->pr_wchan); 1506 break; 1507 case KERN_POOL_POOL: 1508 memset(&pi, 0, sizeof(pi)); 1509 1510 pl_enter(pp, &pp->pr_lock); 1511 pi.pr_size = pp->pr_size; 1512 pi.pr_pgsize = pp->pr_pgsize; 1513 pi.pr_itemsperpage = pp->pr_itemsperpage; 1514 pi.pr_npages = pp->pr_npages; 1515 pi.pr_minpages = pp->pr_minpages; 1516 pi.pr_maxpages = pp->pr_maxpages; 1517 pi.pr_hardlimit = pp->pr_hardlimit; 1518 pi.pr_nout = pp->pr_nout; 1519 pi.pr_nitems = pp->pr_nitems; 1520 pi.pr_nget = pp->pr_nget; 1521 pi.pr_nput = pp->pr_nput; 1522 pi.pr_nfail = pp->pr_nfail; 1523 pi.pr_npagealloc = pp->pr_npagealloc; 1524 pi.pr_npagefree = pp->pr_npagefree; 1525 pi.pr_hiwat = pp->pr_hiwat; 1526 pi.pr_nidle = pp->pr_nidle; 1527 pl_leave(pp, &pp->pr_lock); 1528 1529 pool_cache_pool_info(pp, &pi); 1530 1531 rv = sysctl_rdstruct(oldp, oldlenp, NULL, &pi, sizeof(pi)); 1532 break; 1533 1534 case KERN_POOL_CACHE: 1535 rv = pool_cache_info(pp, oldp, oldlenp); 1536 break; 1537 1538 case KERN_POOL_CACHE_CPUS: 1539 rv = pool_cache_cpus_info(pp, oldp, oldlenp); 1540 break; 1541 } 1542 1543 done: 1544 rw_exit_read(&pool_lock); 1545 1546 return (rv); 1547 } 1548 1549 void 1550 pool_gc_sched(void *null) 1551 { 1552 task_add(systqmp, &pool_gc_task); 1553 } 1554 1555 void 1556 pool_gc_pages(void *null) 1557 { 1558 struct pool *pp; 1559 struct pool_page_header *ph, *freeph; 1560 int s; 1561 1562 rw_enter_read(&pool_lock); 1563 s = splvm(); /* XXX go to splvm until all pools _setipl properly */ 1564 SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist) { 1565 #ifdef MULTIPROCESSOR 1566 if (pp->pr_cache != NULL) 1567 pool_cache_gc(pp); 1568 #endif 1569 1570 if (pp->pr_nidle <= pp->pr_minpages || /* guess */ 1571 !pl_enter_try(pp, &pp->pr_lock)) /* try */ 1572 continue; 1573 1574 /* is it time to free a page? */ 1575 if (pp->pr_nidle > pp->pr_minpages && 1576 (ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL && 1577 (ticks - ph->ph_tick) > (hz * pool_wait_gc)) { 1578 freeph = ph; 1579 pool_p_remove(pp, freeph); 1580 } else 1581 freeph = NULL; 1582 1583 pl_leave(pp, &pp->pr_lock); 1584 1585 if (freeph != NULL) 1586 pool_p_free(pp, freeph); 1587 } 1588 splx(s); 1589 rw_exit_read(&pool_lock); 1590 1591 timeout_add_sec(&pool_gc_tick, 1); 1592 } 1593 1594 /* 1595 * Pool backend allocators. 1596 */ 1597 1598 void * 1599 pool_allocator_alloc(struct pool *pp, int flags, int *slowdown) 1600 { 1601 void *v; 1602 1603 v = (*pp->pr_alloc->pa_alloc)(pp, flags, slowdown); 1604 1605 #ifdef DIAGNOSTIC 1606 if (v != NULL && POOL_INPGHDR(pp)) { 1607 vaddr_t addr = (vaddr_t)v; 1608 if ((addr & pp->pr_pgmask) != addr) { 1609 panic("%s: %s page address %p isnt aligned to %u", 1610 __func__, pp->pr_wchan, v, pp->pr_pgsize); 1611 } 1612 } 1613 #endif 1614 1615 return (v); 1616 } 1617 1618 void 1619 pool_allocator_free(struct pool *pp, void *v) 1620 { 1621 struct pool_allocator *pa = pp->pr_alloc; 1622 1623 (*pa->pa_free)(pp, v); 1624 } 1625 1626 void * 1627 pool_page_alloc(struct pool *pp, int flags, int *slowdown) 1628 { 1629 struct kmem_dyn_mode kd = KMEM_DYN_INITIALIZER; 1630 1631 kd.kd_waitok = ISSET(flags, PR_WAITOK); 1632 kd.kd_slowdown = slowdown; 1633 1634 return (km_alloc(pp->pr_pgsize, &kv_page, pp->pr_crange, &kd)); 1635 } 1636 1637 void 1638 pool_page_free(struct pool *pp, void *v) 1639 { 1640 km_free(v, pp->pr_pgsize, &kv_page, pp->pr_crange); 1641 } 1642 1643 void * 1644 pool_multi_alloc(struct pool *pp, int flags, int *slowdown) 1645 { 1646 struct kmem_va_mode kv = kv_intrsafe; 1647 struct kmem_dyn_mode kd = KMEM_DYN_INITIALIZER; 1648 void *v; 1649 int s; 1650 1651 if (POOL_INPGHDR(pp)) 1652 kv.kv_align = pp->pr_pgsize; 1653 1654 kd.kd_waitok = ISSET(flags, PR_WAITOK); 1655 kd.kd_slowdown = slowdown; 1656 1657 s = splvm(); 1658 v = km_alloc(pp->pr_pgsize, &kv, pp->pr_crange, &kd); 1659 splx(s); 1660 1661 return (v); 1662 } 1663 1664 void 1665 pool_multi_free(struct pool *pp, void *v) 1666 { 1667 struct kmem_va_mode kv = kv_intrsafe; 1668 int s; 1669 1670 if (POOL_INPGHDR(pp)) 1671 kv.kv_align = pp->pr_pgsize; 1672 1673 s = splvm(); 1674 km_free(v, pp->pr_pgsize, &kv, pp->pr_crange); 1675 splx(s); 1676 } 1677 1678 void * 1679 pool_multi_alloc_ni(struct pool *pp, int flags, int *slowdown) 1680 { 1681 struct kmem_va_mode kv = kv_any; 1682 struct kmem_dyn_mode kd = KMEM_DYN_INITIALIZER; 1683 void *v; 1684 1685 if (POOL_INPGHDR(pp)) 1686 kv.kv_align = pp->pr_pgsize; 1687 1688 kd.kd_waitok = ISSET(flags, PR_WAITOK); 1689 kd.kd_slowdown = slowdown; 1690 1691 KERNEL_LOCK(); 1692 v = km_alloc(pp->pr_pgsize, &kv, pp->pr_crange, &kd); 1693 KERNEL_UNLOCK(); 1694 1695 return (v); 1696 } 1697 1698 void 1699 pool_multi_free_ni(struct pool *pp, void *v) 1700 { 1701 struct kmem_va_mode kv = kv_any; 1702 1703 if (POOL_INPGHDR(pp)) 1704 kv.kv_align = pp->pr_pgsize; 1705 1706 KERNEL_LOCK(); 1707 km_free(v, pp->pr_pgsize, &kv, pp->pr_crange); 1708 KERNEL_UNLOCK(); 1709 } 1710 1711 #ifdef MULTIPROCESSOR 1712 1713 struct pool pool_caches; /* per cpu cache entries */ 1714 1715 void 1716 pool_cache_init(struct pool *pp) 1717 { 1718 struct cpumem *cm; 1719 struct pool_cache *pc; 1720 struct cpumem_iter i; 1721 1722 if (pool_caches.pr_size == 0) { 1723 pool_init(&pool_caches, sizeof(struct pool_cache), 1724 CACHELINESIZE, IPL_NONE, PR_WAITOK | PR_RWLOCK, 1725 "plcache", NULL); 1726 } 1727 1728 /* must be able to use the pool items as cache list items */ 1729 KASSERT(pp->pr_size >= sizeof(struct pool_cache_item)); 1730 1731 cm = cpumem_get(&pool_caches); 1732 1733 pl_init(pp, &pp->pr_cache_lock); 1734 arc4random_buf(pp->pr_cache_magic, sizeof(pp->pr_cache_magic)); 1735 TAILQ_INIT(&pp->pr_cache_lists); 1736 pp->pr_cache_nitems = 0; 1737 pp->pr_cache_tick = ticks; 1738 pp->pr_cache_items = 8; 1739 pp->pr_cache_contention = 0; 1740 pp->pr_cache_ngc = 0; 1741 1742 CPUMEM_FOREACH(pc, &i, cm) { 1743 pc->pc_actv = NULL; 1744 pc->pc_nactv = 0; 1745 pc->pc_prev = NULL; 1746 1747 pc->pc_nget = 0; 1748 pc->pc_nfail = 0; 1749 pc->pc_nput = 0; 1750 pc->pc_nlget = 0; 1751 pc->pc_nlfail = 0; 1752 pc->pc_nlput = 0; 1753 pc->pc_nout = 0; 1754 } 1755 1756 membar_producer(); 1757 1758 pp->pr_cache = cm; 1759 } 1760 1761 static inline void 1762 pool_cache_item_magic(struct pool *pp, struct pool_cache_item *ci) 1763 { 1764 unsigned long *entry = (unsigned long *)&ci->ci_nextl; 1765 1766 entry[0] = pp->pr_cache_magic[0] ^ (u_long)ci; 1767 entry[1] = pp->pr_cache_magic[1] ^ (u_long)ci->ci_next; 1768 } 1769 1770 static inline void 1771 pool_cache_item_magic_check(struct pool *pp, struct pool_cache_item *ci) 1772 { 1773 unsigned long *entry; 1774 unsigned long val; 1775 1776 entry = (unsigned long *)&ci->ci_nextl; 1777 val = pp->pr_cache_magic[0] ^ (u_long)ci; 1778 if (*entry != val) 1779 goto fail; 1780 1781 entry++; 1782 val = pp->pr_cache_magic[1] ^ (u_long)ci->ci_next; 1783 if (*entry != val) 1784 goto fail; 1785 1786 return; 1787 1788 fail: 1789 panic("%s: %s cpu free list modified: item addr %p+%zu 0x%lx!=0x%lx", 1790 __func__, pp->pr_wchan, ci, (caddr_t)entry - (caddr_t)ci, 1791 *entry, val); 1792 } 1793 1794 static inline void 1795 pool_list_enter(struct pool *pp) 1796 { 1797 if (pl_enter_try(pp, &pp->pr_cache_lock) == 0) { 1798 pl_enter(pp, &pp->pr_cache_lock); 1799 pp->pr_cache_contention++; 1800 } 1801 } 1802 1803 static inline void 1804 pool_list_leave(struct pool *pp) 1805 { 1806 pl_leave(pp, &pp->pr_cache_lock); 1807 } 1808 1809 static inline struct pool_cache_item * 1810 pool_cache_list_alloc(struct pool *pp, struct pool_cache *pc) 1811 { 1812 struct pool_cache_item *pl; 1813 1814 pool_list_enter(pp); 1815 pl = TAILQ_FIRST(&pp->pr_cache_lists); 1816 if (pl != NULL) { 1817 TAILQ_REMOVE(&pp->pr_cache_lists, pl, ci_nextl); 1818 pp->pr_cache_nitems -= POOL_CACHE_ITEM_NITEMS(pl); 1819 1820 pool_cache_item_magic(pp, pl); 1821 1822 pc->pc_nlget++; 1823 } else 1824 pc->pc_nlfail++; 1825 1826 /* fold this cpus nout into the global while we have the lock */ 1827 pp->pr_cache_nout += pc->pc_nout; 1828 pc->pc_nout = 0; 1829 pool_list_leave(pp); 1830 1831 return (pl); 1832 } 1833 1834 static inline void 1835 pool_cache_list_free(struct pool *pp, struct pool_cache *pc, 1836 struct pool_cache_item *ci) 1837 { 1838 pool_list_enter(pp); 1839 if (TAILQ_EMPTY(&pp->pr_cache_lists)) 1840 pp->pr_cache_tick = ticks; 1841 1842 pp->pr_cache_nitems += POOL_CACHE_ITEM_NITEMS(ci); 1843 TAILQ_INSERT_TAIL(&pp->pr_cache_lists, ci, ci_nextl); 1844 1845 pc->pc_nlput++; 1846 1847 /* fold this cpus nout into the global while we have the lock */ 1848 pp->pr_cache_nout += pc->pc_nout; 1849 pc->pc_nout = 0; 1850 pool_list_leave(pp); 1851 } 1852 1853 static inline struct pool_cache * 1854 pool_cache_enter(struct pool *pp, int *s) 1855 { 1856 struct pool_cache *pc; 1857 1858 pc = cpumem_enter(pp->pr_cache); 1859 *s = splraise(pp->pr_ipl); 1860 pc->pc_gen++; 1861 1862 return (pc); 1863 } 1864 1865 static inline void 1866 pool_cache_leave(struct pool *pp, struct pool_cache *pc, int s) 1867 { 1868 pc->pc_gen++; 1869 splx(s); 1870 cpumem_leave(pp->pr_cache, pc); 1871 } 1872 1873 void * 1874 pool_cache_get(struct pool *pp) 1875 { 1876 struct pool_cache *pc; 1877 struct pool_cache_item *ci; 1878 int s; 1879 1880 pc = pool_cache_enter(pp, &s); 1881 1882 if (pc->pc_actv != NULL) { 1883 ci = pc->pc_actv; 1884 } else if (pc->pc_prev != NULL) { 1885 ci = pc->pc_prev; 1886 pc->pc_prev = NULL; 1887 } else if ((ci = pool_cache_list_alloc(pp, pc)) == NULL) { 1888 pc->pc_nfail++; 1889 goto done; 1890 } 1891 1892 pool_cache_item_magic_check(pp, ci); 1893 #ifdef DIAGNOSTIC 1894 if (pool_debug && POOL_CACHE_ITEM_POISONED(ci)) { 1895 size_t pidx; 1896 uint32_t pval; 1897 1898 if (poison_check(ci + 1, pp->pr_size - sizeof(*ci), 1899 &pidx, &pval)) { 1900 int *ip = (int *)(ci + 1); 1901 ip += pidx; 1902 1903 panic("%s: %s cpu free list modified: " 1904 "item addr %p+%zu 0x%x!=0x%x", 1905 __func__, pp->pr_wchan, ci, 1906 (caddr_t)ip - (caddr_t)ci, *ip, pval); 1907 } 1908 } 1909 #endif 1910 1911 pc->pc_actv = ci->ci_next; 1912 pc->pc_nactv = POOL_CACHE_ITEM_NITEMS(ci) - 1; 1913 pc->pc_nget++; 1914 pc->pc_nout++; 1915 1916 done: 1917 pool_cache_leave(pp, pc, s); 1918 1919 return (ci); 1920 } 1921 1922 void 1923 pool_cache_put(struct pool *pp, void *v) 1924 { 1925 struct pool_cache *pc; 1926 struct pool_cache_item *ci = v; 1927 unsigned long nitems; 1928 int s; 1929 #ifdef DIAGNOSTIC 1930 int poison = pool_debug && pp->pr_size > sizeof(*ci); 1931 1932 if (poison) 1933 poison_mem(ci + 1, pp->pr_size - sizeof(*ci)); 1934 #endif 1935 1936 pc = pool_cache_enter(pp, &s); 1937 1938 nitems = pc->pc_nactv; 1939 if (nitems >= pp->pr_cache_items) { 1940 if (pc->pc_prev != NULL) 1941 pool_cache_list_free(pp, pc, pc->pc_prev); 1942 1943 pc->pc_prev = pc->pc_actv; 1944 1945 pc->pc_actv = NULL; 1946 pc->pc_nactv = 0; 1947 nitems = 0; 1948 } 1949 1950 ci->ci_next = pc->pc_actv; 1951 ci->ci_nitems = ++nitems; 1952 #ifdef DIAGNOSTIC 1953 ci->ci_nitems |= poison ? POOL_CACHE_ITEM_NITEMS_POISON : 0; 1954 #endif 1955 pool_cache_item_magic(pp, ci); 1956 1957 pc->pc_actv = ci; 1958 pc->pc_nactv = nitems; 1959 1960 pc->pc_nput++; 1961 pc->pc_nout--; 1962 1963 pool_cache_leave(pp, pc, s); 1964 } 1965 1966 struct pool_cache_item * 1967 pool_cache_list_put(struct pool *pp, struct pool_cache_item *pl) 1968 { 1969 struct pool_cache_item *rpl, *next; 1970 1971 if (pl == NULL) 1972 return (NULL); 1973 1974 rpl = TAILQ_NEXT(pl, ci_nextl); 1975 1976 pl_enter(pp, &pp->pr_lock); 1977 do { 1978 next = pl->ci_next; 1979 pool_do_put(pp, pl); 1980 pl = next; 1981 } while (pl != NULL); 1982 pl_leave(pp, &pp->pr_lock); 1983 1984 return (rpl); 1985 } 1986 1987 void 1988 pool_cache_destroy(struct pool *pp) 1989 { 1990 struct pool_cache *pc; 1991 struct pool_cache_item *pl; 1992 struct cpumem_iter i; 1993 struct cpumem *cm; 1994 1995 rw_enter_write(&pool_lock); /* serialise with the gc */ 1996 cm = pp->pr_cache; 1997 pp->pr_cache = NULL; /* make pool_put avoid the cache */ 1998 rw_exit_write(&pool_lock); 1999 2000 CPUMEM_FOREACH(pc, &i, cm) { 2001 pool_cache_list_put(pp, pc->pc_actv); 2002 pool_cache_list_put(pp, pc->pc_prev); 2003 } 2004 2005 cpumem_put(&pool_caches, cm); 2006 2007 pl = TAILQ_FIRST(&pp->pr_cache_lists); 2008 while (pl != NULL) 2009 pl = pool_cache_list_put(pp, pl); 2010 } 2011 2012 void 2013 pool_cache_gc(struct pool *pp) 2014 { 2015 unsigned int contention, delta; 2016 2017 if ((ticks - pp->pr_cache_tick) > (hz * pool_wait_gc) && 2018 !TAILQ_EMPTY(&pp->pr_cache_lists) && 2019 pl_enter_try(pp, &pp->pr_cache_lock)) { 2020 struct pool_cache_item *pl = NULL; 2021 2022 pl = TAILQ_FIRST(&pp->pr_cache_lists); 2023 if (pl != NULL) { 2024 TAILQ_REMOVE(&pp->pr_cache_lists, pl, ci_nextl); 2025 pp->pr_cache_nitems -= POOL_CACHE_ITEM_NITEMS(pl); 2026 pp->pr_cache_tick = ticks; 2027 2028 pp->pr_cache_ngc++; 2029 } 2030 2031 pl_leave(pp, &pp->pr_cache_lock); 2032 2033 pool_cache_list_put(pp, pl); 2034 } 2035 2036 /* 2037 * if there's a lot of contention on the pr_cache_mtx then consider 2038 * growing the length of the list to reduce the need to access the 2039 * global pool. 2040 */ 2041 2042 contention = pp->pr_cache_contention; 2043 delta = contention - pp->pr_cache_contention_prev; 2044 if (delta > 8 /* magic */) { 2045 if ((ncpusfound * 8 * 2) <= pp->pr_cache_nitems) 2046 pp->pr_cache_items += 8; 2047 } else if (delta == 0) { 2048 if (pp->pr_cache_items > 8) 2049 pp->pr_cache_items--; 2050 } 2051 pp->pr_cache_contention_prev = contention; 2052 } 2053 2054 void 2055 pool_cache_pool_info(struct pool *pp, struct kinfo_pool *pi) 2056 { 2057 struct pool_cache *pc; 2058 struct cpumem_iter i; 2059 2060 if (pp->pr_cache == NULL) 2061 return; 2062 2063 /* loop through the caches twice to collect stats */ 2064 2065 /* once without the lock so we can yield while reading nget/nput */ 2066 CPUMEM_FOREACH(pc, &i, pp->pr_cache) { 2067 uint64_t gen, nget, nput; 2068 2069 do { 2070 while ((gen = pc->pc_gen) & 1) 2071 yield(); 2072 2073 nget = pc->pc_nget; 2074 nput = pc->pc_nput; 2075 } while (gen != pc->pc_gen); 2076 2077 pi->pr_nget += nget; 2078 pi->pr_nput += nput; 2079 } 2080 2081 /* and once with the mtx so we can get consistent nout values */ 2082 pl_enter(pp, &pp->pr_cache_lock); 2083 CPUMEM_FOREACH(pc, &i, pp->pr_cache) 2084 pi->pr_nout += pc->pc_nout; 2085 2086 pi->pr_nout += pp->pr_cache_nout; 2087 pl_leave(pp, &pp->pr_cache_lock); 2088 } 2089 2090 int 2091 pool_cache_info(struct pool *pp, void *oldp, size_t *oldlenp) 2092 { 2093 struct kinfo_pool_cache kpc; 2094 2095 if (pp->pr_cache == NULL) 2096 return (EOPNOTSUPP); 2097 2098 memset(&kpc, 0, sizeof(kpc)); /* don't leak padding */ 2099 2100 pl_enter(pp, &pp->pr_cache_lock); 2101 kpc.pr_ngc = pp->pr_cache_ngc; 2102 kpc.pr_len = pp->pr_cache_items; 2103 kpc.pr_nitems = pp->pr_cache_nitems; 2104 kpc.pr_contention = pp->pr_cache_contention; 2105 pl_leave(pp, &pp->pr_cache_lock); 2106 2107 return (sysctl_rdstruct(oldp, oldlenp, NULL, &kpc, sizeof(kpc))); 2108 } 2109 2110 int 2111 pool_cache_cpus_info(struct pool *pp, void *oldp, size_t *oldlenp) 2112 { 2113 struct pool_cache *pc; 2114 struct kinfo_pool_cache_cpu *kpcc, *info; 2115 unsigned int cpu = 0; 2116 struct cpumem_iter i; 2117 int error = 0; 2118 size_t len; 2119 2120 if (pp->pr_cache == NULL) 2121 return (EOPNOTSUPP); 2122 if (*oldlenp % sizeof(*kpcc)) 2123 return (EINVAL); 2124 2125 kpcc = mallocarray(ncpusfound, sizeof(*kpcc), M_TEMP, 2126 M_WAITOK|M_CANFAIL|M_ZERO); 2127 if (kpcc == NULL) 2128 return (EIO); 2129 2130 len = ncpusfound * sizeof(*kpcc); 2131 2132 CPUMEM_FOREACH(pc, &i, pp->pr_cache) { 2133 uint64_t gen; 2134 2135 if (cpu >= ncpusfound) { 2136 error = EIO; 2137 goto err; 2138 } 2139 2140 info = &kpcc[cpu]; 2141 info->pr_cpu = cpu; 2142 2143 do { 2144 while ((gen = pc->pc_gen) & 1) 2145 yield(); 2146 2147 info->pr_nget = pc->pc_nget; 2148 info->pr_nfail = pc->pc_nfail; 2149 info->pr_nput = pc->pc_nput; 2150 info->pr_nlget = pc->pc_nlget; 2151 info->pr_nlfail = pc->pc_nlfail; 2152 info->pr_nlput = pc->pc_nlput; 2153 } while (gen != pc->pc_gen); 2154 2155 cpu++; 2156 } 2157 2158 error = sysctl_rdstruct(oldp, oldlenp, NULL, kpcc, len); 2159 err: 2160 free(kpcc, M_TEMP, len); 2161 2162 return (error); 2163 } 2164 #else /* MULTIPROCESSOR */ 2165 void 2166 pool_cache_init(struct pool *pp) 2167 { 2168 /* nop */ 2169 } 2170 2171 void 2172 pool_cache_pool_info(struct pool *pp, struct kinfo_pool *pi) 2173 { 2174 /* nop */ 2175 } 2176 2177 int 2178 pool_cache_info(struct pool *pp, void *oldp, size_t *oldlenp) 2179 { 2180 return (EOPNOTSUPP); 2181 } 2182 2183 int 2184 pool_cache_cpus_info(struct pool *pp, void *oldp, size_t *oldlenp) 2185 { 2186 return (EOPNOTSUPP); 2187 } 2188 #endif /* MULTIPROCESSOR */ 2189 2190 2191 void 2192 pool_lock_mtx_init(struct pool *pp, union pool_lock *lock, 2193 const struct lock_type *type) 2194 { 2195 _mtx_init_flags(&lock->prl_mtx, pp->pr_ipl, pp->pr_wchan, 0, type); 2196 } 2197 2198 void 2199 pool_lock_mtx_enter(union pool_lock *lock) 2200 { 2201 mtx_enter(&lock->prl_mtx); 2202 } 2203 2204 int 2205 pool_lock_mtx_enter_try(union pool_lock *lock) 2206 { 2207 return (mtx_enter_try(&lock->prl_mtx)); 2208 } 2209 2210 void 2211 pool_lock_mtx_leave(union pool_lock *lock) 2212 { 2213 mtx_leave(&lock->prl_mtx); 2214 } 2215 2216 void 2217 pool_lock_mtx_assert_locked(union pool_lock *lock) 2218 { 2219 MUTEX_ASSERT_LOCKED(&lock->prl_mtx); 2220 } 2221 2222 void 2223 pool_lock_mtx_assert_unlocked(union pool_lock *lock) 2224 { 2225 MUTEX_ASSERT_UNLOCKED(&lock->prl_mtx); 2226 } 2227 2228 int 2229 pool_lock_mtx_sleep(void *ident, union pool_lock *lock, int priority, 2230 const char *wmesg, int timo) 2231 { 2232 return msleep(ident, &lock->prl_mtx, priority, wmesg, timo); 2233 } 2234 2235 static const struct pool_lock_ops pool_lock_ops_mtx = { 2236 pool_lock_mtx_init, 2237 pool_lock_mtx_enter, 2238 pool_lock_mtx_enter_try, 2239 pool_lock_mtx_leave, 2240 pool_lock_mtx_assert_locked, 2241 pool_lock_mtx_assert_unlocked, 2242 pool_lock_mtx_sleep, 2243 }; 2244 2245 void 2246 pool_lock_rw_init(struct pool *pp, union pool_lock *lock, 2247 const struct lock_type *type) 2248 { 2249 _rw_init_flags(&lock->prl_rwlock, pp->pr_wchan, 0, type); 2250 } 2251 2252 void 2253 pool_lock_rw_enter(union pool_lock *lock) 2254 { 2255 rw_enter_write(&lock->prl_rwlock); 2256 } 2257 2258 int 2259 pool_lock_rw_enter_try(union pool_lock *lock) 2260 { 2261 return (rw_enter(&lock->prl_rwlock, RW_WRITE | RW_NOSLEEP) == 0); 2262 } 2263 2264 void 2265 pool_lock_rw_leave(union pool_lock *lock) 2266 { 2267 rw_exit_write(&lock->prl_rwlock); 2268 } 2269 2270 void 2271 pool_lock_rw_assert_locked(union pool_lock *lock) 2272 { 2273 rw_assert_wrlock(&lock->prl_rwlock); 2274 } 2275 2276 void 2277 pool_lock_rw_assert_unlocked(union pool_lock *lock) 2278 { 2279 KASSERT(rw_status(&lock->prl_rwlock) != RW_WRITE); 2280 } 2281 2282 int 2283 pool_lock_rw_sleep(void *ident, union pool_lock *lock, int priority, 2284 const char *wmesg, int timo) 2285 { 2286 return rwsleep(ident, &lock->prl_rwlock, priority, wmesg, timo); 2287 } 2288 2289 static const struct pool_lock_ops pool_lock_ops_rw = { 2290 pool_lock_rw_init, 2291 pool_lock_rw_enter, 2292 pool_lock_rw_enter_try, 2293 pool_lock_rw_leave, 2294 pool_lock_rw_assert_locked, 2295 pool_lock_rw_assert_unlocked, 2296 pool_lock_rw_sleep, 2297 }; 2298