1 /* $OpenBSD: subr_pool.c,v 1.224 2019/02/10 20:02:37 tedu Exp $ */ 2 /* $NetBSD: subr_pool.c,v 1.61 2001/09/26 07:14:56 chs Exp $ */ 3 4 /*- 5 * Copyright (c) 1997, 1999, 2000 The NetBSD Foundation, Inc. 6 * All rights reserved. 7 * 8 * This code is derived from software contributed to The NetBSD Foundation 9 * by Paul Kranenburg; by Jason R. Thorpe of the Numerical Aerospace 10 * Simulation Facility, NASA Ames Research Center. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 23 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 24 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 25 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31 * POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include <sys/param.h> 35 #include <sys/systm.h> 36 #include <sys/errno.h> 37 #include <sys/kernel.h> 38 #include <sys/malloc.h> 39 #include <sys/pool.h> 40 #include <sys/proc.h> 41 #include <sys/syslog.h> 42 #include <sys/sysctl.h> 43 #include <sys/task.h> 44 #include <sys/timeout.h> 45 #include <sys/percpu.h> 46 47 #include <uvm/uvm_extern.h> 48 49 /* 50 * Pool resource management utility. 51 * 52 * Memory is allocated in pages which are split into pieces according to 53 * the pool item size. Each page is kept on one of three lists in the 54 * pool structure: `pr_emptypages', `pr_fullpages' and `pr_partpages', 55 * for empty, full and partially-full pages respectively. The individual 56 * pool items are on a linked list headed by `ph_items' in each page 57 * header. The memory for building the page list is either taken from 58 * the allocated pages themselves (for small pool items) or taken from 59 * an internal pool of page headers (`phpool'). 60 */ 61 62 /* List of all pools */ 63 SIMPLEQ_HEAD(,pool) pool_head = SIMPLEQ_HEAD_INITIALIZER(pool_head); 64 65 /* 66 * Every pool gets a unique serial number assigned to it. If this counter 67 * wraps, we're screwed, but we shouldn't create so many pools anyway. 68 */ 69 unsigned int pool_serial; 70 unsigned int pool_count; 71 72 /* Lock the previous variables making up the global pool state */ 73 struct rwlock pool_lock = RWLOCK_INITIALIZER("pools"); 74 75 /* Private pool for page header structures */ 76 struct pool phpool; 77 78 struct pool_lock_ops { 79 void (*pl_init)(struct pool *, union pool_lock *, 80 const struct lock_type *); 81 void (*pl_enter)(union pool_lock * LOCK_FL_VARS); 82 int (*pl_enter_try)(union pool_lock * LOCK_FL_VARS); 83 void (*pl_leave)(union pool_lock * LOCK_FL_VARS); 84 void (*pl_assert_locked)(union pool_lock *); 85 void (*pl_assert_unlocked)(union pool_lock *); 86 int (*pl_sleep)(void *, union pool_lock *, int, const char *, int); 87 }; 88 89 static const struct pool_lock_ops pool_lock_ops_mtx; 90 static const struct pool_lock_ops pool_lock_ops_rw; 91 92 #ifdef WITNESS 93 #define pl_init(pp, pl) do { \ 94 static const struct lock_type __lock_type = { .lt_name = #pl }; \ 95 (pp)->pr_lock_ops->pl_init(pp, pl, &__lock_type); \ 96 } while (0) 97 #else /* WITNESS */ 98 #define pl_init(pp, pl) (pp)->pr_lock_ops->pl_init(pp, pl, NULL) 99 #endif /* WITNESS */ 100 101 static inline void 102 pl_enter(struct pool *pp, union pool_lock *pl LOCK_FL_VARS) 103 { 104 pp->pr_lock_ops->pl_enter(pl LOCK_FL_ARGS); 105 } 106 static inline int 107 pl_enter_try(struct pool *pp, union pool_lock *pl LOCK_FL_VARS) 108 { 109 return pp->pr_lock_ops->pl_enter_try(pl LOCK_FL_ARGS); 110 } 111 static inline void 112 pl_leave(struct pool *pp, union pool_lock *pl LOCK_FL_VARS) 113 { 114 pp->pr_lock_ops->pl_leave(pl LOCK_FL_ARGS); 115 } 116 static inline void 117 pl_assert_locked(struct pool *pp, union pool_lock *pl) 118 { 119 pp->pr_lock_ops->pl_assert_locked(pl); 120 } 121 static inline void 122 pl_assert_unlocked(struct pool *pp, union pool_lock *pl) 123 { 124 pp->pr_lock_ops->pl_assert_unlocked(pl); 125 } 126 static inline int 127 pl_sleep(struct pool *pp, void *ident, union pool_lock *lock, int priority, 128 const char *wmesg, int timo) 129 { 130 return pp->pr_lock_ops->pl_sleep(ident, lock, priority, wmesg, timo); 131 } 132 133 #ifdef WITNESS 134 # define pl_enter(pp,pl) pl_enter(pp,pl LOCK_FILE_LINE) 135 # define pl_enter_try(pp,pl) pl_enter_try(pp,pl LOCK_FILE_LINE) 136 # define pl_leave(pp,pl) pl_leave(pp,pl LOCK_FILE_LINE) 137 #endif 138 139 struct pool_item { 140 u_long pi_magic; 141 XSIMPLEQ_ENTRY(pool_item) pi_list; 142 }; 143 #define POOL_IMAGIC(ph, pi) ((u_long)(pi) ^ (ph)->ph_magic) 144 145 struct pool_page_header { 146 /* Page headers */ 147 TAILQ_ENTRY(pool_page_header) 148 ph_entry; /* pool page list */ 149 XSIMPLEQ_HEAD(, pool_item) 150 ph_items; /* free items on the page */ 151 RBT_ENTRY(pool_page_header) 152 ph_node; /* off-page page headers */ 153 unsigned int ph_nmissing; /* # of chunks in use */ 154 caddr_t ph_page; /* this page's address */ 155 caddr_t ph_colored; /* page's colored address */ 156 unsigned long ph_magic; 157 int ph_tick; 158 int ph_flags; 159 }; 160 #define POOL_MAGICBIT (1 << 3) /* keep away from perturbed low bits */ 161 #define POOL_PHPOISON(ph) ISSET((ph)->ph_magic, POOL_MAGICBIT) 162 163 #ifdef MULTIPROCESSOR 164 struct pool_cache_item { 165 struct pool_cache_item *ci_next; /* next item in list */ 166 unsigned long ci_nitems; /* number of items in list */ 167 TAILQ_ENTRY(pool_cache_item) 168 ci_nextl; /* entry in list of lists */ 169 }; 170 171 /* we store whether the cached item is poisoned in the high bit of nitems */ 172 #define POOL_CACHE_ITEM_NITEMS_MASK 0x7ffffffUL 173 #define POOL_CACHE_ITEM_NITEMS_POISON 0x8000000UL 174 175 #define POOL_CACHE_ITEM_NITEMS(_ci) \ 176 ((_ci)->ci_nitems & POOL_CACHE_ITEM_NITEMS_MASK) 177 178 #define POOL_CACHE_ITEM_POISONED(_ci) \ 179 ISSET((_ci)->ci_nitems, POOL_CACHE_ITEM_NITEMS_POISON) 180 181 struct pool_cache { 182 struct pool_cache_item *pc_actv; /* active list of items */ 183 unsigned long pc_nactv; /* actv head nitems cache */ 184 struct pool_cache_item *pc_prev; /* previous list of items */ 185 186 uint64_t pc_gen; /* generation number */ 187 uint64_t pc_nget; /* # of successful requests */ 188 uint64_t pc_nfail; /* # of unsuccessful reqs */ 189 uint64_t pc_nput; /* # of releases */ 190 uint64_t pc_nlget; /* # of list requests */ 191 uint64_t pc_nlfail; /* # of fails getting a list */ 192 uint64_t pc_nlput; /* # of list releases */ 193 194 int pc_nout; 195 }; 196 197 void *pool_cache_get(struct pool *); 198 void pool_cache_put(struct pool *, void *); 199 void pool_cache_destroy(struct pool *); 200 void pool_cache_gc(struct pool *); 201 #endif 202 void pool_cache_pool_info(struct pool *, struct kinfo_pool *); 203 int pool_cache_info(struct pool *, void *, size_t *); 204 int pool_cache_cpus_info(struct pool *, void *, size_t *); 205 206 #ifdef POOL_DEBUG 207 int pool_debug = 1; 208 #else 209 int pool_debug = 0; 210 #endif 211 212 #define POOL_INPGHDR(pp) ((pp)->pr_phoffset != 0) 213 214 struct pool_page_header * 215 pool_p_alloc(struct pool *, int, int *); 216 void pool_p_insert(struct pool *, struct pool_page_header *); 217 void pool_p_remove(struct pool *, struct pool_page_header *); 218 void pool_p_free(struct pool *, struct pool_page_header *); 219 220 void pool_update_curpage(struct pool *); 221 void *pool_do_get(struct pool *, int, int *); 222 void pool_do_put(struct pool *, void *); 223 int pool_chk_page(struct pool *, struct pool_page_header *, int); 224 int pool_chk(struct pool *); 225 void pool_get_done(struct pool *, void *, void *); 226 void pool_runqueue(struct pool *, int); 227 228 void *pool_allocator_alloc(struct pool *, int, int *); 229 void pool_allocator_free(struct pool *, int, void *); 230 231 /* 232 * The default pool allocator. 233 */ 234 void *pool_page_alloc(struct pool *, int, int *); 235 void pool_page_free(struct pool *, int, void *); 236 237 /* 238 * safe for interrupts; this is the default allocator 239 */ 240 struct pool_allocator pool_allocator_single = { 241 pool_page_alloc, 242 pool_page_free, 243 POOL_ALLOC_SIZE(PAGE_SIZE, POOL_ALLOC_ALIGNED) 244 }; 245 246 void *pool_multi_alloc(struct pool *, int, int *); 247 void pool_multi_free(struct pool *, int, void *); 248 249 struct pool_allocator pool_allocator_multi = { 250 pool_multi_alloc, 251 pool_multi_free, 252 POOL_ALLOC_SIZES(PAGE_SIZE, (1UL << 31), POOL_ALLOC_ALIGNED) 253 }; 254 255 void *pool_multi_alloc_ni(struct pool *, int, int *); 256 void pool_multi_free_ni(struct pool *, int, void *); 257 258 struct pool_allocator pool_allocator_multi_ni = { 259 pool_multi_alloc_ni, 260 pool_multi_free_ni, 261 POOL_ALLOC_SIZES(PAGE_SIZE, (1UL << 31), POOL_ALLOC_ALIGNED) 262 }; 263 264 #ifdef DDB 265 void pool_print_pagelist(struct pool_pagelist *, int (*)(const char *, ...) 266 __attribute__((__format__(__kprintf__,1,2)))); 267 void pool_print1(struct pool *, const char *, int (*)(const char *, ...) 268 __attribute__((__format__(__kprintf__,1,2)))); 269 #endif 270 271 /* stale page garbage collectors */ 272 void pool_gc_sched(void *); 273 struct timeout pool_gc_tick = TIMEOUT_INITIALIZER(pool_gc_sched, NULL); 274 void pool_gc_pages(void *); 275 struct task pool_gc_task = TASK_INITIALIZER(pool_gc_pages, NULL); 276 int pool_wait_free = 1; 277 int pool_wait_gc = 8; 278 279 RBT_PROTOTYPE(phtree, pool_page_header, ph_node, phtree_compare); 280 281 static inline int 282 phtree_compare(const struct pool_page_header *a, 283 const struct pool_page_header *b) 284 { 285 vaddr_t va = (vaddr_t)a->ph_page; 286 vaddr_t vb = (vaddr_t)b->ph_page; 287 288 /* the compares in this order are important for the NFIND to work */ 289 if (vb < va) 290 return (-1); 291 if (vb > va) 292 return (1); 293 294 return (0); 295 } 296 297 RBT_GENERATE(phtree, pool_page_header, ph_node, phtree_compare); 298 299 /* 300 * Return the pool page header based on page address. 301 */ 302 static inline struct pool_page_header * 303 pr_find_pagehead(struct pool *pp, void *v) 304 { 305 struct pool_page_header *ph, key; 306 307 if (POOL_INPGHDR(pp)) { 308 caddr_t page; 309 310 page = (caddr_t)((vaddr_t)v & pp->pr_pgmask); 311 312 return ((struct pool_page_header *)(page + pp->pr_phoffset)); 313 } 314 315 key.ph_page = v; 316 ph = RBT_NFIND(phtree, &pp->pr_phtree, &key); 317 if (ph == NULL) 318 panic("%s: %s: page header missing", __func__, pp->pr_wchan); 319 320 KASSERT(ph->ph_page <= (caddr_t)v); 321 if (ph->ph_page + pp->pr_pgsize <= (caddr_t)v) 322 panic("%s: %s: incorrect page", __func__, pp->pr_wchan); 323 324 return (ph); 325 } 326 327 /* 328 * Initialize the given pool resource structure. 329 * 330 * We export this routine to allow other kernel parts to declare 331 * static pools that must be initialized before malloc() is available. 332 */ 333 void 334 pool_init(struct pool *pp, size_t size, u_int align, int ipl, int flags, 335 const char *wchan, struct pool_allocator *palloc) 336 { 337 int off = 0, space; 338 unsigned int pgsize = PAGE_SIZE, items; 339 size_t pa_pagesz; 340 #ifdef DIAGNOSTIC 341 struct pool *iter; 342 #endif 343 344 if (align == 0) 345 align = ALIGN(1); 346 347 if (size < sizeof(struct pool_item)) 348 size = sizeof(struct pool_item); 349 350 size = roundup(size, align); 351 352 while (size * 8 > pgsize) 353 pgsize <<= 1; 354 355 if (palloc == NULL) { 356 if (pgsize > PAGE_SIZE) { 357 palloc = ISSET(flags, PR_WAITOK) ? 358 &pool_allocator_multi_ni : &pool_allocator_multi; 359 } else 360 palloc = &pool_allocator_single; 361 362 pa_pagesz = palloc->pa_pagesz; 363 } else { 364 size_t pgsizes; 365 366 pa_pagesz = palloc->pa_pagesz; 367 if (pa_pagesz == 0) 368 pa_pagesz = POOL_ALLOC_DEFAULT; 369 370 pgsizes = pa_pagesz & ~POOL_ALLOC_ALIGNED; 371 372 /* make sure the allocator can fit at least one item */ 373 if (size > pgsizes) { 374 panic("%s: pool %s item size 0x%zx > " 375 "allocator %p sizes 0x%zx", __func__, wchan, 376 size, palloc, pgsizes); 377 } 378 379 /* shrink pgsize until it fits into the range */ 380 while (!ISSET(pgsizes, pgsize)) 381 pgsize >>= 1; 382 } 383 KASSERT(ISSET(pa_pagesz, pgsize)); 384 385 items = pgsize / size; 386 387 /* 388 * Decide whether to put the page header off page to avoid 389 * wasting too large a part of the page. Off-page page headers 390 * go into an RB tree, so we can match a returned item with 391 * its header based on the page address. 392 */ 393 if (ISSET(pa_pagesz, POOL_ALLOC_ALIGNED)) { 394 if (pgsize - (size * items) > 395 sizeof(struct pool_page_header)) { 396 off = pgsize - sizeof(struct pool_page_header); 397 } else if (sizeof(struct pool_page_header) * 2 >= size) { 398 off = pgsize - sizeof(struct pool_page_header); 399 items = off / size; 400 } 401 } 402 403 KASSERT(items > 0); 404 405 /* 406 * Initialize the pool structure. 407 */ 408 memset(pp, 0, sizeof(*pp)); 409 if (ISSET(flags, PR_RWLOCK)) { 410 KASSERT(flags & PR_WAITOK); 411 pp->pr_lock_ops = &pool_lock_ops_rw; 412 } else 413 pp->pr_lock_ops = &pool_lock_ops_mtx; 414 TAILQ_INIT(&pp->pr_emptypages); 415 TAILQ_INIT(&pp->pr_fullpages); 416 TAILQ_INIT(&pp->pr_partpages); 417 pp->pr_curpage = NULL; 418 pp->pr_npages = 0; 419 pp->pr_minitems = 0; 420 pp->pr_minpages = 0; 421 pp->pr_maxpages = 8; 422 pp->pr_size = size; 423 pp->pr_pgsize = pgsize; 424 pp->pr_pgmask = ~0UL ^ (pgsize - 1); 425 pp->pr_phoffset = off; 426 pp->pr_itemsperpage = items; 427 pp->pr_wchan = wchan; 428 pp->pr_alloc = palloc; 429 pp->pr_nitems = 0; 430 pp->pr_nout = 0; 431 pp->pr_hardlimit = UINT_MAX; 432 pp->pr_hardlimit_warning = NULL; 433 pp->pr_hardlimit_ratecap.tv_sec = 0; 434 pp->pr_hardlimit_ratecap.tv_usec = 0; 435 pp->pr_hardlimit_warning_last.tv_sec = 0; 436 pp->pr_hardlimit_warning_last.tv_usec = 0; 437 RBT_INIT(phtree, &pp->pr_phtree); 438 439 /* 440 * Use the space between the chunks and the page header 441 * for cache coloring. 442 */ 443 space = POOL_INPGHDR(pp) ? pp->pr_phoffset : pp->pr_pgsize; 444 space -= pp->pr_itemsperpage * pp->pr_size; 445 pp->pr_align = align; 446 pp->pr_maxcolors = (space / align) + 1; 447 448 pp->pr_nget = 0; 449 pp->pr_nfail = 0; 450 pp->pr_nput = 0; 451 pp->pr_npagealloc = 0; 452 pp->pr_npagefree = 0; 453 pp->pr_hiwat = 0; 454 pp->pr_nidle = 0; 455 456 pp->pr_ipl = ipl; 457 pp->pr_flags = flags; 458 459 pl_init(pp, &pp->pr_lock); 460 pl_init(pp, &pp->pr_requests_lock); 461 TAILQ_INIT(&pp->pr_requests); 462 463 if (phpool.pr_size == 0) { 464 pool_init(&phpool, sizeof(struct pool_page_header), 0, 465 IPL_HIGH, 0, "phpool", NULL); 466 467 /* make sure phpool wont "recurse" */ 468 KASSERT(POOL_INPGHDR(&phpool)); 469 } 470 471 /* pglistalloc/constraint parameters */ 472 pp->pr_crange = &kp_dirty; 473 474 /* Insert this into the list of all pools. */ 475 rw_enter_write(&pool_lock); 476 #ifdef DIAGNOSTIC 477 SIMPLEQ_FOREACH(iter, &pool_head, pr_poollist) { 478 if (iter == pp) 479 panic("%s: pool %s already on list", __func__, wchan); 480 } 481 #endif 482 483 pp->pr_serial = ++pool_serial; 484 if (pool_serial == 0) 485 panic("%s: too much uptime", __func__); 486 487 SIMPLEQ_INSERT_HEAD(&pool_head, pp, pr_poollist); 488 pool_count++; 489 rw_exit_write(&pool_lock); 490 } 491 492 /* 493 * Decommission a pool resource. 494 */ 495 void 496 pool_destroy(struct pool *pp) 497 { 498 struct pool_page_header *ph; 499 struct pool *prev, *iter; 500 501 #ifdef MULTIPROCESSOR 502 if (pp->pr_cache != NULL) 503 pool_cache_destroy(pp); 504 #endif 505 506 #ifdef DIAGNOSTIC 507 if (pp->pr_nout != 0) 508 panic("%s: pool busy: still out: %u", __func__, pp->pr_nout); 509 #endif 510 511 /* Remove from global pool list */ 512 rw_enter_write(&pool_lock); 513 pool_count--; 514 if (pp == SIMPLEQ_FIRST(&pool_head)) 515 SIMPLEQ_REMOVE_HEAD(&pool_head, pr_poollist); 516 else { 517 prev = SIMPLEQ_FIRST(&pool_head); 518 SIMPLEQ_FOREACH(iter, &pool_head, pr_poollist) { 519 if (iter == pp) { 520 SIMPLEQ_REMOVE_AFTER(&pool_head, prev, 521 pr_poollist); 522 break; 523 } 524 prev = iter; 525 } 526 } 527 rw_exit_write(&pool_lock); 528 529 /* Remove all pages */ 530 while ((ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL) { 531 pl_enter(pp, &pp->pr_lock); 532 pool_p_remove(pp, ph); 533 pl_leave(pp, &pp->pr_lock); 534 pool_p_free(pp, ph); 535 } 536 KASSERT(TAILQ_EMPTY(&pp->pr_fullpages)); 537 KASSERT(TAILQ_EMPTY(&pp->pr_partpages)); 538 } 539 540 void 541 pool_request_init(struct pool_request *pr, 542 void (*handler)(struct pool *, void *, void *), void *cookie) 543 { 544 pr->pr_handler = handler; 545 pr->pr_cookie = cookie; 546 pr->pr_item = NULL; 547 } 548 549 void 550 pool_request(struct pool *pp, struct pool_request *pr) 551 { 552 pl_enter(pp, &pp->pr_requests_lock); 553 TAILQ_INSERT_TAIL(&pp->pr_requests, pr, pr_entry); 554 pool_runqueue(pp, PR_NOWAIT); 555 pl_leave(pp, &pp->pr_requests_lock); 556 } 557 558 struct pool_get_memory { 559 union pool_lock lock; 560 void * volatile v; 561 }; 562 563 /* 564 * Grab an item from the pool. 565 */ 566 void * 567 pool_get(struct pool *pp, int flags) 568 { 569 void *v = NULL; 570 int slowdown = 0; 571 572 KASSERT(flags & (PR_WAITOK | PR_NOWAIT)); 573 if (pp->pr_flags & PR_RWLOCK) 574 KASSERT(flags & PR_WAITOK); 575 576 #ifdef MULTIPROCESSOR 577 if (pp->pr_cache != NULL) { 578 v = pool_cache_get(pp); 579 if (v != NULL) 580 goto good; 581 } 582 #endif 583 584 pl_enter(pp, &pp->pr_lock); 585 if (pp->pr_nout >= pp->pr_hardlimit) { 586 if (ISSET(flags, PR_NOWAIT|PR_LIMITFAIL)) 587 goto fail; 588 } else if ((v = pool_do_get(pp, flags, &slowdown)) == NULL) { 589 if (ISSET(flags, PR_NOWAIT)) 590 goto fail; 591 } 592 pl_leave(pp, &pp->pr_lock); 593 594 if ((slowdown || pool_debug == 2) && ISSET(flags, PR_WAITOK)) 595 yield(); 596 597 if (v == NULL) { 598 struct pool_get_memory mem = { .v = NULL }; 599 struct pool_request pr; 600 601 #ifdef DIAGNOSTIC 602 if (ISSET(flags, PR_WAITOK) && curproc == &proc0) 603 panic("%s: cannot sleep for memory during boot", 604 __func__); 605 #endif 606 pl_init(pp, &mem.lock); 607 pool_request_init(&pr, pool_get_done, &mem); 608 pool_request(pp, &pr); 609 610 pl_enter(pp, &mem.lock); 611 while (mem.v == NULL) 612 pl_sleep(pp, &mem, &mem.lock, PSWP, pp->pr_wchan, 0); 613 pl_leave(pp, &mem.lock); 614 615 v = mem.v; 616 } 617 618 #ifdef MULTIPROCESSOR 619 good: 620 #endif 621 if (ISSET(flags, PR_ZERO)) 622 memset(v, 0, pp->pr_size); 623 624 return (v); 625 626 fail: 627 pp->pr_nfail++; 628 pl_leave(pp, &pp->pr_lock); 629 return (NULL); 630 } 631 632 void 633 pool_get_done(struct pool *pp, void *xmem, void *v) 634 { 635 struct pool_get_memory *mem = xmem; 636 637 pl_enter(pp, &mem->lock); 638 mem->v = v; 639 pl_leave(pp, &mem->lock); 640 641 wakeup_one(mem); 642 } 643 644 void 645 pool_runqueue(struct pool *pp, int flags) 646 { 647 struct pool_requests prl = TAILQ_HEAD_INITIALIZER(prl); 648 struct pool_request *pr; 649 650 pl_assert_unlocked(pp, &pp->pr_lock); 651 pl_assert_locked(pp, &pp->pr_requests_lock); 652 653 if (pp->pr_requesting++) 654 return; 655 656 do { 657 pp->pr_requesting = 1; 658 659 /* no TAILQ_JOIN? :( */ 660 while ((pr = TAILQ_FIRST(&pp->pr_requests)) != NULL) { 661 TAILQ_REMOVE(&pp->pr_requests, pr, pr_entry); 662 TAILQ_INSERT_TAIL(&prl, pr, pr_entry); 663 } 664 if (TAILQ_EMPTY(&prl)) 665 continue; 666 667 pl_leave(pp, &pp->pr_requests_lock); 668 669 pl_enter(pp, &pp->pr_lock); 670 pr = TAILQ_FIRST(&prl); 671 while (pr != NULL) { 672 int slowdown = 0; 673 674 if (pp->pr_nout >= pp->pr_hardlimit) 675 break; 676 677 pr->pr_item = pool_do_get(pp, flags, &slowdown); 678 if (pr->pr_item == NULL) /* || slowdown ? */ 679 break; 680 681 pr = TAILQ_NEXT(pr, pr_entry); 682 } 683 pl_leave(pp, &pp->pr_lock); 684 685 while ((pr = TAILQ_FIRST(&prl)) != NULL && 686 pr->pr_item != NULL) { 687 TAILQ_REMOVE(&prl, pr, pr_entry); 688 (*pr->pr_handler)(pp, pr->pr_cookie, pr->pr_item); 689 } 690 691 pl_enter(pp, &pp->pr_requests_lock); 692 } while (--pp->pr_requesting); 693 694 /* no TAILQ_JOIN :( */ 695 while ((pr = TAILQ_FIRST(&prl)) != NULL) { 696 TAILQ_REMOVE(&prl, pr, pr_entry); 697 TAILQ_INSERT_TAIL(&pp->pr_requests, pr, pr_entry); 698 } 699 } 700 701 void * 702 pool_do_get(struct pool *pp, int flags, int *slowdown) 703 { 704 struct pool_item *pi; 705 struct pool_page_header *ph; 706 707 pl_assert_locked(pp, &pp->pr_lock); 708 709 splassert(pp->pr_ipl); 710 711 /* 712 * Account for this item now to avoid races if we need to give up 713 * pr_lock to allocate a page. 714 */ 715 pp->pr_nout++; 716 717 if (pp->pr_curpage == NULL) { 718 pl_leave(pp, &pp->pr_lock); 719 ph = pool_p_alloc(pp, flags, slowdown); 720 pl_enter(pp, &pp->pr_lock); 721 722 if (ph == NULL) { 723 pp->pr_nout--; 724 return (NULL); 725 } 726 727 pool_p_insert(pp, ph); 728 } 729 730 ph = pp->pr_curpage; 731 pi = XSIMPLEQ_FIRST(&ph->ph_items); 732 if (__predict_false(pi == NULL)) 733 panic("%s: %s: page empty", __func__, pp->pr_wchan); 734 735 if (__predict_false(pi->pi_magic != POOL_IMAGIC(ph, pi))) { 736 panic("%s: %s free list modified: " 737 "page %p; item addr %p; offset 0x%x=0x%lx != 0x%lx", 738 __func__, pp->pr_wchan, ph->ph_page, pi, 739 0, pi->pi_magic, POOL_IMAGIC(ph, pi)); 740 } 741 742 XSIMPLEQ_REMOVE_HEAD(&ph->ph_items, pi_list); 743 744 #ifdef DIAGNOSTIC 745 if (pool_debug && POOL_PHPOISON(ph)) { 746 size_t pidx; 747 uint32_t pval; 748 if (poison_check(pi + 1, pp->pr_size - sizeof(*pi), 749 &pidx, &pval)) { 750 int *ip = (int *)(pi + 1); 751 panic("%s: %s free list modified: " 752 "page %p; item addr %p; offset 0x%zx=0x%x", 753 __func__, pp->pr_wchan, ph->ph_page, pi, 754 (pidx * sizeof(int)) + sizeof(*pi), ip[pidx]); 755 } 756 } 757 #endif /* DIAGNOSTIC */ 758 759 if (ph->ph_nmissing++ == 0) { 760 /* 761 * This page was previously empty. Move it to the list of 762 * partially-full pages. This page is already curpage. 763 */ 764 TAILQ_REMOVE(&pp->pr_emptypages, ph, ph_entry); 765 TAILQ_INSERT_TAIL(&pp->pr_partpages, ph, ph_entry); 766 767 pp->pr_nidle--; 768 } 769 770 if (ph->ph_nmissing == pp->pr_itemsperpage) { 771 /* 772 * This page is now full. Move it to the full list 773 * and select a new current page. 774 */ 775 TAILQ_REMOVE(&pp->pr_partpages, ph, ph_entry); 776 TAILQ_INSERT_TAIL(&pp->pr_fullpages, ph, ph_entry); 777 pool_update_curpage(pp); 778 } 779 780 pp->pr_nget++; 781 782 return (pi); 783 } 784 785 /* 786 * Return resource to the pool. 787 */ 788 void 789 pool_put(struct pool *pp, void *v) 790 { 791 792 #ifdef DIAGNOSTIC 793 if (v == NULL) 794 panic("%s: NULL item", __func__); 795 #endif 796 797 #ifdef MULTIPROCESSOR 798 if (pp->pr_cache != NULL && TAILQ_EMPTY(&pp->pr_requests)) { 799 pool_cache_put(pp, v); 800 return; 801 } 802 #endif 803 804 pl_enter(pp, &pp->pr_lock); 805 806 pool_do_put(pp, v); 807 808 pp->pr_nout--; 809 pp->pr_nput++; 810 811 pl_leave(pp, &pp->pr_lock); 812 813 if (!TAILQ_EMPTY(&pp->pr_requests)) { 814 pl_enter(pp, &pp->pr_requests_lock); 815 pool_runqueue(pp, PR_NOWAIT); 816 pl_leave(pp, &pp->pr_requests_lock); 817 } 818 } 819 820 void 821 pool_do_put(struct pool *pp, void *v) 822 { 823 struct pool_item *pi = v; 824 struct pool_page_header *ph; 825 826 splassert(pp->pr_ipl); 827 828 ph = pr_find_pagehead(pp, v); 829 830 #ifdef DIAGNOSTIC 831 if (pool_debug) { 832 struct pool_item *qi; 833 XSIMPLEQ_FOREACH(qi, &ph->ph_items, pi_list) { 834 if (pi == qi) { 835 panic("%s: %s: double pool_put: %p", __func__, 836 pp->pr_wchan, pi); 837 } 838 } 839 } 840 #endif /* DIAGNOSTIC */ 841 842 pi->pi_magic = POOL_IMAGIC(ph, pi); 843 XSIMPLEQ_INSERT_HEAD(&ph->ph_items, pi, pi_list); 844 #ifdef DIAGNOSTIC 845 if (POOL_PHPOISON(ph)) 846 poison_mem(pi + 1, pp->pr_size - sizeof(*pi)); 847 #endif /* DIAGNOSTIC */ 848 849 if (ph->ph_nmissing-- == pp->pr_itemsperpage) { 850 /* 851 * The page was previously completely full, move it to the 852 * partially-full list. 853 */ 854 TAILQ_REMOVE(&pp->pr_fullpages, ph, ph_entry); 855 TAILQ_INSERT_TAIL(&pp->pr_partpages, ph, ph_entry); 856 } 857 858 if (ph->ph_nmissing == 0) { 859 /* 860 * The page is now empty, so move it to the empty page list. 861 */ 862 pp->pr_nidle++; 863 864 ph->ph_tick = ticks; 865 TAILQ_REMOVE(&pp->pr_partpages, ph, ph_entry); 866 TAILQ_INSERT_TAIL(&pp->pr_emptypages, ph, ph_entry); 867 pool_update_curpage(pp); 868 } 869 } 870 871 /* 872 * Add N items to the pool. 873 */ 874 int 875 pool_prime(struct pool *pp, int n) 876 { 877 struct pool_pagelist pl = TAILQ_HEAD_INITIALIZER(pl); 878 struct pool_page_header *ph; 879 int newpages; 880 881 newpages = roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 882 883 while (newpages-- > 0) { 884 int slowdown = 0; 885 886 ph = pool_p_alloc(pp, PR_NOWAIT, &slowdown); 887 if (ph == NULL) /* or slowdown? */ 888 break; 889 890 TAILQ_INSERT_TAIL(&pl, ph, ph_entry); 891 } 892 893 pl_enter(pp, &pp->pr_lock); 894 while ((ph = TAILQ_FIRST(&pl)) != NULL) { 895 TAILQ_REMOVE(&pl, ph, ph_entry); 896 pool_p_insert(pp, ph); 897 } 898 pl_leave(pp, &pp->pr_lock); 899 900 return (0); 901 } 902 903 struct pool_page_header * 904 pool_p_alloc(struct pool *pp, int flags, int *slowdown) 905 { 906 struct pool_page_header *ph; 907 struct pool_item *pi; 908 caddr_t addr; 909 unsigned int order; 910 int o; 911 int n; 912 913 pl_assert_unlocked(pp, &pp->pr_lock); 914 KASSERT(pp->pr_size >= sizeof(*pi)); 915 916 addr = pool_allocator_alloc(pp, flags, slowdown); 917 if (addr == NULL) 918 return (NULL); 919 920 if (POOL_INPGHDR(pp)) 921 ph = (struct pool_page_header *)(addr + pp->pr_phoffset); 922 else { 923 ph = pool_get(&phpool, flags); 924 if (ph == NULL) { 925 pool_allocator_free(pp, flags, addr); 926 return (NULL); 927 } 928 } 929 ph->ph_flags = flags; 930 931 XSIMPLEQ_INIT(&ph->ph_items); 932 ph->ph_page = addr; 933 addr += pp->pr_align * (pp->pr_npagealloc % pp->pr_maxcolors); 934 ph->ph_colored = addr; 935 ph->ph_nmissing = 0; 936 arc4random_buf(&ph->ph_magic, sizeof(ph->ph_magic)); 937 #ifdef DIAGNOSTIC 938 /* use a bit in ph_magic to record if we poison page items */ 939 if (pool_debug) 940 SET(ph->ph_magic, POOL_MAGICBIT); 941 else 942 CLR(ph->ph_magic, POOL_MAGICBIT); 943 #endif /* DIAGNOSTIC */ 944 945 n = pp->pr_itemsperpage; 946 o = 32; 947 while (n--) { 948 pi = (struct pool_item *)addr; 949 pi->pi_magic = POOL_IMAGIC(ph, pi); 950 951 if (o == 32) { 952 order = arc4random(); 953 o = 0; 954 } 955 if (ISSET(order, 1 << o++)) 956 XSIMPLEQ_INSERT_TAIL(&ph->ph_items, pi, pi_list); 957 else 958 XSIMPLEQ_INSERT_HEAD(&ph->ph_items, pi, pi_list); 959 960 #ifdef DIAGNOSTIC 961 if (POOL_PHPOISON(ph)) 962 poison_mem(pi + 1, pp->pr_size - sizeof(*pi)); 963 #endif /* DIAGNOSTIC */ 964 965 addr += pp->pr_size; 966 } 967 968 return (ph); 969 } 970 971 void 972 pool_p_free(struct pool *pp, struct pool_page_header *ph) 973 { 974 struct pool_item *pi; 975 976 pl_assert_unlocked(pp, &pp->pr_lock); 977 KASSERT(ph->ph_nmissing == 0); 978 979 XSIMPLEQ_FOREACH(pi, &ph->ph_items, pi_list) { 980 if (__predict_false(pi->pi_magic != POOL_IMAGIC(ph, pi))) { 981 panic("%s: %s free list modified: " 982 "page %p; item addr %p; offset 0x%x=0x%lx", 983 __func__, pp->pr_wchan, ph->ph_page, pi, 984 0, pi->pi_magic); 985 } 986 987 #ifdef DIAGNOSTIC 988 if (POOL_PHPOISON(ph)) { 989 size_t pidx; 990 uint32_t pval; 991 if (poison_check(pi + 1, pp->pr_size - sizeof(*pi), 992 &pidx, &pval)) { 993 int *ip = (int *)(pi + 1); 994 panic("%s: %s free list modified: " 995 "page %p; item addr %p; offset 0x%zx=0x%x", 996 __func__, pp->pr_wchan, ph->ph_page, pi, 997 pidx * sizeof(int), ip[pidx]); 998 } 999 } 1000 #endif 1001 } 1002 1003 pool_allocator_free(pp, ph->ph_flags, ph->ph_page); 1004 1005 if (!POOL_INPGHDR(pp)) 1006 pool_put(&phpool, ph); 1007 } 1008 1009 void 1010 pool_p_insert(struct pool *pp, struct pool_page_header *ph) 1011 { 1012 pl_assert_locked(pp, &pp->pr_lock); 1013 1014 /* If the pool was depleted, point at the new page */ 1015 if (pp->pr_curpage == NULL) 1016 pp->pr_curpage = ph; 1017 1018 TAILQ_INSERT_TAIL(&pp->pr_emptypages, ph, ph_entry); 1019 if (!POOL_INPGHDR(pp)) 1020 RBT_INSERT(phtree, &pp->pr_phtree, ph); 1021 1022 pp->pr_nitems += pp->pr_itemsperpage; 1023 pp->pr_nidle++; 1024 1025 pp->pr_npagealloc++; 1026 if (++pp->pr_npages > pp->pr_hiwat) 1027 pp->pr_hiwat = pp->pr_npages; 1028 } 1029 1030 void 1031 pool_p_remove(struct pool *pp, struct pool_page_header *ph) 1032 { 1033 pl_assert_locked(pp, &pp->pr_lock); 1034 1035 pp->pr_npagefree++; 1036 pp->pr_npages--; 1037 pp->pr_nidle--; 1038 pp->pr_nitems -= pp->pr_itemsperpage; 1039 1040 if (!POOL_INPGHDR(pp)) 1041 RBT_REMOVE(phtree, &pp->pr_phtree, ph); 1042 TAILQ_REMOVE(&pp->pr_emptypages, ph, ph_entry); 1043 1044 pool_update_curpage(pp); 1045 } 1046 1047 void 1048 pool_update_curpage(struct pool *pp) 1049 { 1050 pp->pr_curpage = TAILQ_LAST(&pp->pr_partpages, pool_pagelist); 1051 if (pp->pr_curpage == NULL) { 1052 pp->pr_curpage = TAILQ_LAST(&pp->pr_emptypages, pool_pagelist); 1053 } 1054 } 1055 1056 void 1057 pool_setlowat(struct pool *pp, int n) 1058 { 1059 int prime = 0; 1060 1061 pl_enter(pp, &pp->pr_lock); 1062 pp->pr_minitems = n; 1063 pp->pr_minpages = (n == 0) 1064 ? 0 1065 : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 1066 1067 if (pp->pr_nitems < n) 1068 prime = n - pp->pr_nitems; 1069 pl_leave(pp, &pp->pr_lock); 1070 1071 if (prime > 0) 1072 pool_prime(pp, prime); 1073 } 1074 1075 void 1076 pool_sethiwat(struct pool *pp, int n) 1077 { 1078 pp->pr_maxpages = (n == 0) 1079 ? 0 1080 : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 1081 } 1082 1083 int 1084 pool_sethardlimit(struct pool *pp, u_int n, const char *warnmsg, int ratecap) 1085 { 1086 int error = 0; 1087 1088 if (n < pp->pr_nout) { 1089 error = EINVAL; 1090 goto done; 1091 } 1092 1093 pp->pr_hardlimit = n; 1094 pp->pr_hardlimit_warning = warnmsg; 1095 pp->pr_hardlimit_ratecap.tv_sec = ratecap; 1096 pp->pr_hardlimit_warning_last.tv_sec = 0; 1097 pp->pr_hardlimit_warning_last.tv_usec = 0; 1098 1099 done: 1100 return (error); 1101 } 1102 1103 void 1104 pool_set_constraints(struct pool *pp, const struct kmem_pa_mode *mode) 1105 { 1106 pp->pr_crange = mode; 1107 } 1108 1109 /* 1110 * Release all complete pages that have not been used recently. 1111 * 1112 * Returns non-zero if any pages have been reclaimed. 1113 */ 1114 int 1115 pool_reclaim(struct pool *pp) 1116 { 1117 struct pool_page_header *ph, *phnext; 1118 struct pool_pagelist pl = TAILQ_HEAD_INITIALIZER(pl); 1119 1120 pl_enter(pp, &pp->pr_lock); 1121 for (ph = TAILQ_FIRST(&pp->pr_emptypages); ph != NULL; ph = phnext) { 1122 phnext = TAILQ_NEXT(ph, ph_entry); 1123 1124 /* Check our minimum page claim */ 1125 if (pp->pr_npages <= pp->pr_minpages) 1126 break; 1127 1128 /* 1129 * If freeing this page would put us below 1130 * the low water mark, stop now. 1131 */ 1132 if ((pp->pr_nitems - pp->pr_itemsperpage) < 1133 pp->pr_minitems) 1134 break; 1135 1136 pool_p_remove(pp, ph); 1137 TAILQ_INSERT_TAIL(&pl, ph, ph_entry); 1138 } 1139 pl_leave(pp, &pp->pr_lock); 1140 1141 if (TAILQ_EMPTY(&pl)) 1142 return (0); 1143 1144 while ((ph = TAILQ_FIRST(&pl)) != NULL) { 1145 TAILQ_REMOVE(&pl, ph, ph_entry); 1146 pool_p_free(pp, ph); 1147 } 1148 1149 return (1); 1150 } 1151 1152 /* 1153 * Release all complete pages that have not been used recently 1154 * from all pools. 1155 */ 1156 void 1157 pool_reclaim_all(void) 1158 { 1159 struct pool *pp; 1160 1161 rw_enter_read(&pool_lock); 1162 SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist) 1163 pool_reclaim(pp); 1164 rw_exit_read(&pool_lock); 1165 } 1166 1167 #ifdef DDB 1168 #include <machine/db_machdep.h> 1169 #include <ddb/db_output.h> 1170 1171 /* 1172 * Diagnostic helpers. 1173 */ 1174 void 1175 pool_printit(struct pool *pp, const char *modif, 1176 int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2)))) 1177 { 1178 pool_print1(pp, modif, pr); 1179 } 1180 1181 void 1182 pool_print_pagelist(struct pool_pagelist *pl, 1183 int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2)))) 1184 { 1185 struct pool_page_header *ph; 1186 struct pool_item *pi; 1187 1188 TAILQ_FOREACH(ph, pl, ph_entry) { 1189 (*pr)("\t\tpage %p, color %p, nmissing %d\n", 1190 ph->ph_page, ph->ph_colored, ph->ph_nmissing); 1191 XSIMPLEQ_FOREACH(pi, &ph->ph_items, pi_list) { 1192 if (pi->pi_magic != POOL_IMAGIC(ph, pi)) { 1193 (*pr)("\t\t\titem %p, magic 0x%lx\n", 1194 pi, pi->pi_magic); 1195 } 1196 } 1197 } 1198 } 1199 1200 void 1201 pool_print1(struct pool *pp, const char *modif, 1202 int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2)))) 1203 { 1204 struct pool_page_header *ph; 1205 int print_pagelist = 0; 1206 char c; 1207 1208 while ((c = *modif++) != '\0') { 1209 if (c == 'p') 1210 print_pagelist = 1; 1211 modif++; 1212 } 1213 1214 (*pr)("POOL %s: size %u maxcolors %u\n", pp->pr_wchan, pp->pr_size, 1215 pp->pr_maxcolors); 1216 (*pr)("\talloc %p\n", pp->pr_alloc); 1217 (*pr)("\tminitems %u, minpages %u, maxpages %u, npages %u\n", 1218 pp->pr_minitems, pp->pr_minpages, pp->pr_maxpages, pp->pr_npages); 1219 (*pr)("\titemsperpage %u, nitems %u, nout %u, hardlimit %u\n", 1220 pp->pr_itemsperpage, pp->pr_nitems, pp->pr_nout, pp->pr_hardlimit); 1221 1222 (*pr)("\n\tnget %lu, nfail %lu, nput %lu\n", 1223 pp->pr_nget, pp->pr_nfail, pp->pr_nput); 1224 (*pr)("\tnpagealloc %lu, npagefree %lu, hiwat %u, nidle %lu\n", 1225 pp->pr_npagealloc, pp->pr_npagefree, pp->pr_hiwat, pp->pr_nidle); 1226 1227 if (print_pagelist == 0) 1228 return; 1229 1230 if ((ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL) 1231 (*pr)("\n\tempty page list:\n"); 1232 pool_print_pagelist(&pp->pr_emptypages, pr); 1233 if ((ph = TAILQ_FIRST(&pp->pr_fullpages)) != NULL) 1234 (*pr)("\n\tfull page list:\n"); 1235 pool_print_pagelist(&pp->pr_fullpages, pr); 1236 if ((ph = TAILQ_FIRST(&pp->pr_partpages)) != NULL) 1237 (*pr)("\n\tpartial-page list:\n"); 1238 pool_print_pagelist(&pp->pr_partpages, pr); 1239 1240 if (pp->pr_curpage == NULL) 1241 (*pr)("\tno current page\n"); 1242 else 1243 (*pr)("\tcurpage %p\n", pp->pr_curpage->ph_page); 1244 } 1245 1246 void 1247 db_show_all_pools(db_expr_t expr, int haddr, db_expr_t count, char *modif) 1248 { 1249 struct pool *pp; 1250 char maxp[16]; 1251 int ovflw; 1252 char mode; 1253 1254 mode = modif[0]; 1255 if (mode != '\0' && mode != 'a') { 1256 db_printf("usage: show all pools [/a]\n"); 1257 return; 1258 } 1259 1260 if (mode == '\0') 1261 db_printf("%-10s%4s%9s%5s%9s%6s%6s%6s%6s%6s%6s%5s\n", 1262 "Name", 1263 "Size", 1264 "Requests", 1265 "Fail", 1266 "Releases", 1267 "Pgreq", 1268 "Pgrel", 1269 "Npage", 1270 "Hiwat", 1271 "Minpg", 1272 "Maxpg", 1273 "Idle"); 1274 else 1275 db_printf("%-12s %18s %18s\n", 1276 "Name", "Address", "Allocator"); 1277 1278 SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist) { 1279 if (mode == 'a') { 1280 db_printf("%-12s %18p %18p\n", pp->pr_wchan, pp, 1281 pp->pr_alloc); 1282 continue; 1283 } 1284 1285 if (!pp->pr_nget) 1286 continue; 1287 1288 if (pp->pr_maxpages == UINT_MAX) 1289 snprintf(maxp, sizeof maxp, "inf"); 1290 else 1291 snprintf(maxp, sizeof maxp, "%u", pp->pr_maxpages); 1292 1293 #define PRWORD(ovflw, fmt, width, fixed, val) do { \ 1294 (ovflw) += db_printf((fmt), \ 1295 (width) - (fixed) - (ovflw) > 0 ? \ 1296 (width) - (fixed) - (ovflw) : 0, \ 1297 (val)) - (width); \ 1298 if ((ovflw) < 0) \ 1299 (ovflw) = 0; \ 1300 } while (/* CONSTCOND */0) 1301 1302 ovflw = 0; 1303 PRWORD(ovflw, "%-*s", 10, 0, pp->pr_wchan); 1304 PRWORD(ovflw, " %*u", 4, 1, pp->pr_size); 1305 PRWORD(ovflw, " %*lu", 9, 1, pp->pr_nget); 1306 PRWORD(ovflw, " %*lu", 5, 1, pp->pr_nfail); 1307 PRWORD(ovflw, " %*lu", 9, 1, pp->pr_nput); 1308 PRWORD(ovflw, " %*lu", 6, 1, pp->pr_npagealloc); 1309 PRWORD(ovflw, " %*lu", 6, 1, pp->pr_npagefree); 1310 PRWORD(ovflw, " %*d", 6, 1, pp->pr_npages); 1311 PRWORD(ovflw, " %*d", 6, 1, pp->pr_hiwat); 1312 PRWORD(ovflw, " %*d", 6, 1, pp->pr_minpages); 1313 PRWORD(ovflw, " %*s", 6, 1, maxp); 1314 PRWORD(ovflw, " %*lu\n", 5, 1, pp->pr_nidle); 1315 1316 pool_chk(pp); 1317 } 1318 } 1319 #endif /* DDB */ 1320 1321 #if defined(POOL_DEBUG) || defined(DDB) 1322 int 1323 pool_chk_page(struct pool *pp, struct pool_page_header *ph, int expected) 1324 { 1325 struct pool_item *pi; 1326 caddr_t page; 1327 int n; 1328 const char *label = pp->pr_wchan; 1329 1330 page = (caddr_t)((u_long)ph & pp->pr_pgmask); 1331 if (page != ph->ph_page && POOL_INPGHDR(pp)) { 1332 printf("%s: ", label); 1333 printf("pool(%p:%s): page inconsistency: page %p; " 1334 "at page head addr %p (p %p)\n", 1335 pp, pp->pr_wchan, ph->ph_page, ph, page); 1336 return 1; 1337 } 1338 1339 for (pi = XSIMPLEQ_FIRST(&ph->ph_items), n = 0; 1340 pi != NULL; 1341 pi = XSIMPLEQ_NEXT(&ph->ph_items, pi, pi_list), n++) { 1342 if ((caddr_t)pi < ph->ph_page || 1343 (caddr_t)pi >= ph->ph_page + pp->pr_pgsize) { 1344 printf("%s: ", label); 1345 printf("pool(%p:%s): page inconsistency: page %p;" 1346 " item ordinal %d; addr %p\n", pp, 1347 pp->pr_wchan, ph->ph_page, n, pi); 1348 return (1); 1349 } 1350 1351 if (pi->pi_magic != POOL_IMAGIC(ph, pi)) { 1352 printf("%s: ", label); 1353 printf("pool(%p:%s): free list modified: " 1354 "page %p; item ordinal %d; addr %p " 1355 "(p %p); offset 0x%x=0x%lx\n", 1356 pp, pp->pr_wchan, ph->ph_page, n, pi, page, 1357 0, pi->pi_magic); 1358 } 1359 1360 #ifdef DIAGNOSTIC 1361 if (POOL_PHPOISON(ph)) { 1362 size_t pidx; 1363 uint32_t pval; 1364 if (poison_check(pi + 1, pp->pr_size - sizeof(*pi), 1365 &pidx, &pval)) { 1366 int *ip = (int *)(pi + 1); 1367 printf("pool(%s): free list modified: " 1368 "page %p; item ordinal %d; addr %p " 1369 "(p %p); offset 0x%zx=0x%x\n", 1370 pp->pr_wchan, ph->ph_page, n, pi, 1371 page, pidx * sizeof(int), ip[pidx]); 1372 } 1373 } 1374 #endif /* DIAGNOSTIC */ 1375 } 1376 if (n + ph->ph_nmissing != pp->pr_itemsperpage) { 1377 printf("pool(%p:%s): page inconsistency: page %p;" 1378 " %d on list, %d missing, %d items per page\n", pp, 1379 pp->pr_wchan, ph->ph_page, n, ph->ph_nmissing, 1380 pp->pr_itemsperpage); 1381 return 1; 1382 } 1383 if (expected >= 0 && n != expected) { 1384 printf("pool(%p:%s): page inconsistency: page %p;" 1385 " %d on list, %d missing, %d expected\n", pp, 1386 pp->pr_wchan, ph->ph_page, n, ph->ph_nmissing, 1387 expected); 1388 return 1; 1389 } 1390 return 0; 1391 } 1392 1393 int 1394 pool_chk(struct pool *pp) 1395 { 1396 struct pool_page_header *ph; 1397 int r = 0; 1398 1399 TAILQ_FOREACH(ph, &pp->pr_emptypages, ph_entry) 1400 r += pool_chk_page(pp, ph, pp->pr_itemsperpage); 1401 TAILQ_FOREACH(ph, &pp->pr_fullpages, ph_entry) 1402 r += pool_chk_page(pp, ph, 0); 1403 TAILQ_FOREACH(ph, &pp->pr_partpages, ph_entry) 1404 r += pool_chk_page(pp, ph, -1); 1405 1406 return (r); 1407 } 1408 #endif /* defined(POOL_DEBUG) || defined(DDB) */ 1409 1410 #ifdef DDB 1411 void 1412 pool_walk(struct pool *pp, int full, 1413 int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2))), 1414 void (*func)(void *, int, int (*)(const char *, ...) 1415 __attribute__((__format__(__kprintf__,1,2))))) 1416 { 1417 struct pool_page_header *ph; 1418 struct pool_item *pi; 1419 caddr_t cp; 1420 int n; 1421 1422 TAILQ_FOREACH(ph, &pp->pr_fullpages, ph_entry) { 1423 cp = ph->ph_colored; 1424 n = ph->ph_nmissing; 1425 1426 while (n--) { 1427 func(cp, full, pr); 1428 cp += pp->pr_size; 1429 } 1430 } 1431 1432 TAILQ_FOREACH(ph, &pp->pr_partpages, ph_entry) { 1433 cp = ph->ph_colored; 1434 n = ph->ph_nmissing; 1435 1436 do { 1437 XSIMPLEQ_FOREACH(pi, &ph->ph_items, pi_list) { 1438 if (cp == (caddr_t)pi) 1439 break; 1440 } 1441 if (cp != (caddr_t)pi) { 1442 func(cp, full, pr); 1443 n--; 1444 } 1445 1446 cp += pp->pr_size; 1447 } while (n > 0); 1448 } 1449 } 1450 #endif 1451 1452 /* 1453 * We have three different sysctls. 1454 * kern.pool.npools - the number of pools. 1455 * kern.pool.pool.<pool#> - the pool struct for the pool#. 1456 * kern.pool.name.<pool#> - the name for pool#. 1457 */ 1458 int 1459 sysctl_dopool(int *name, u_int namelen, char *oldp, size_t *oldlenp) 1460 { 1461 struct kinfo_pool pi; 1462 struct pool *pp; 1463 int rv = ENOENT; 1464 1465 switch (name[0]) { 1466 case KERN_POOL_NPOOLS: 1467 if (namelen != 1) 1468 return (ENOTDIR); 1469 return (sysctl_rdint(oldp, oldlenp, NULL, pool_count)); 1470 1471 case KERN_POOL_NAME: 1472 case KERN_POOL_POOL: 1473 case KERN_POOL_CACHE: 1474 case KERN_POOL_CACHE_CPUS: 1475 break; 1476 default: 1477 return (EOPNOTSUPP); 1478 } 1479 1480 if (namelen != 2) 1481 return (ENOTDIR); 1482 1483 rw_enter_read(&pool_lock); 1484 1485 SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist) { 1486 if (name[1] == pp->pr_serial) 1487 break; 1488 } 1489 1490 if (pp == NULL) 1491 goto done; 1492 1493 switch (name[0]) { 1494 case KERN_POOL_NAME: 1495 rv = sysctl_rdstring(oldp, oldlenp, NULL, pp->pr_wchan); 1496 break; 1497 case KERN_POOL_POOL: 1498 memset(&pi, 0, sizeof(pi)); 1499 1500 pl_enter(pp, &pp->pr_lock); 1501 pi.pr_size = pp->pr_size; 1502 pi.pr_pgsize = pp->pr_pgsize; 1503 pi.pr_itemsperpage = pp->pr_itemsperpage; 1504 pi.pr_npages = pp->pr_npages; 1505 pi.pr_minpages = pp->pr_minpages; 1506 pi.pr_maxpages = pp->pr_maxpages; 1507 pi.pr_hardlimit = pp->pr_hardlimit; 1508 pi.pr_nout = pp->pr_nout; 1509 pi.pr_nitems = pp->pr_nitems; 1510 pi.pr_nget = pp->pr_nget; 1511 pi.pr_nput = pp->pr_nput; 1512 pi.pr_nfail = pp->pr_nfail; 1513 pi.pr_npagealloc = pp->pr_npagealloc; 1514 pi.pr_npagefree = pp->pr_npagefree; 1515 pi.pr_hiwat = pp->pr_hiwat; 1516 pi.pr_nidle = pp->pr_nidle; 1517 pl_leave(pp, &pp->pr_lock); 1518 1519 pool_cache_pool_info(pp, &pi); 1520 1521 rv = sysctl_rdstruct(oldp, oldlenp, NULL, &pi, sizeof(pi)); 1522 break; 1523 1524 case KERN_POOL_CACHE: 1525 rv = pool_cache_info(pp, oldp, oldlenp); 1526 break; 1527 1528 case KERN_POOL_CACHE_CPUS: 1529 rv = pool_cache_cpus_info(pp, oldp, oldlenp); 1530 break; 1531 } 1532 1533 done: 1534 rw_exit_read(&pool_lock); 1535 1536 return (rv); 1537 } 1538 1539 void 1540 pool_gc_sched(void *null) 1541 { 1542 task_add(systqmp, &pool_gc_task); 1543 } 1544 1545 void 1546 pool_gc_pages(void *null) 1547 { 1548 struct pool *pp; 1549 struct pool_page_header *ph, *freeph; 1550 int s; 1551 1552 rw_enter_read(&pool_lock); 1553 s = splvm(); /* XXX go to splvm until all pools _setipl properly */ 1554 SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist) { 1555 #ifdef MULTIPROCESSOR 1556 if (pp->pr_cache != NULL) 1557 pool_cache_gc(pp); 1558 #endif 1559 1560 if (pp->pr_nidle <= pp->pr_minpages || /* guess */ 1561 !pl_enter_try(pp, &pp->pr_lock)) /* try */ 1562 continue; 1563 1564 /* is it time to free a page? */ 1565 if (pp->pr_nidle > pp->pr_minpages && 1566 (ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL && 1567 (ticks - ph->ph_tick) > (hz * pool_wait_gc)) { 1568 freeph = ph; 1569 pool_p_remove(pp, freeph); 1570 } else 1571 freeph = NULL; 1572 1573 pl_leave(pp, &pp->pr_lock); 1574 1575 if (freeph != NULL) 1576 pool_p_free(pp, freeph); 1577 } 1578 splx(s); 1579 rw_exit_read(&pool_lock); 1580 1581 timeout_add_sec(&pool_gc_tick, 1); 1582 } 1583 1584 /* 1585 * Pool backend allocators. 1586 */ 1587 1588 void * 1589 pool_allocator_alloc(struct pool *pp, int flags, int *slowdown) 1590 { 1591 void *v; 1592 1593 v = (*pp->pr_alloc->pa_alloc)(pp, flags, slowdown); 1594 1595 #ifdef DIAGNOSTIC 1596 if (v != NULL && POOL_INPGHDR(pp)) { 1597 vaddr_t addr = (vaddr_t)v; 1598 if ((addr & pp->pr_pgmask) != addr) { 1599 panic("%s: %s page address %p isnt aligned to %u", 1600 __func__, pp->pr_wchan, v, pp->pr_pgsize); 1601 } 1602 } 1603 #endif 1604 1605 return (v); 1606 } 1607 1608 void 1609 pool_allocator_free(struct pool *pp, int flags, void *v) 1610 { 1611 struct pool_allocator *pa = pp->pr_alloc; 1612 1613 (*pa->pa_free)(pp, flags, v); 1614 } 1615 1616 void * 1617 pool_page_alloc(struct pool *pp, int flags, int *slowdown) 1618 { 1619 struct kmem_dyn_mode kd = KMEM_DYN_INITIALIZER; 1620 1621 kd.kd_waitok = ISSET(flags, PR_WAITOK); 1622 kd.kd_slowdown = slowdown; 1623 1624 return (km_alloc(pp->pr_pgsize, &kv_page, pp->pr_crange, &kd)); 1625 } 1626 1627 void 1628 pool_page_free(struct pool *pp, int flags, void *v) 1629 { 1630 km_free(v, pp->pr_pgsize, &kv_page, pp->pr_crange); 1631 } 1632 1633 void * 1634 pool_multi_alloc(struct pool *pp, int flags, int *slowdown) 1635 { 1636 struct kmem_va_mode kv = kv_intrsafe; 1637 struct kmem_dyn_mode kd = KMEM_DYN_INITIALIZER; 1638 void *v; 1639 int s; 1640 1641 if (POOL_INPGHDR(pp)) 1642 kv.kv_align = pp->pr_pgsize; 1643 1644 kd.kd_waitok = ISSET(flags, PR_WAITOK); 1645 kd.kd_slowdown = slowdown; 1646 1647 s = splvm(); 1648 v = km_alloc(pp->pr_pgsize, &kv, pp->pr_crange, &kd); 1649 splx(s); 1650 1651 return (v); 1652 } 1653 1654 void 1655 pool_multi_free(struct pool *pp, int flags, void *v) 1656 { 1657 struct kmem_va_mode kv = kv_intrsafe; 1658 int s; 1659 1660 if (POOL_INPGHDR(pp)) 1661 kv.kv_align = pp->pr_pgsize; 1662 1663 s = splvm(); 1664 km_free(v, pp->pr_pgsize, &kv, pp->pr_crange); 1665 splx(s); 1666 } 1667 1668 void * 1669 pool_multi_alloc_ni(struct pool *pp, int flags, int *slowdown) 1670 { 1671 struct kmem_va_mode kv = kv_any; 1672 struct kmem_dyn_mode kd = KMEM_DYN_INITIALIZER; 1673 void *v; 1674 1675 if (POOL_INPGHDR(pp)) 1676 kv.kv_align = pp->pr_pgsize; 1677 1678 kd.kd_waitok = ISSET(flags, PR_WAITOK); 1679 kd.kd_slowdown = slowdown; 1680 1681 KERNEL_LOCK(); 1682 v = km_alloc(pp->pr_pgsize, &kv, pp->pr_crange, &kd); 1683 KERNEL_UNLOCK(); 1684 1685 return (v); 1686 } 1687 1688 void 1689 pool_multi_free_ni(struct pool *pp, int flags, void *v) 1690 { 1691 struct kmem_va_mode kv = kv_any; 1692 1693 if (POOL_INPGHDR(pp)) 1694 kv.kv_align = pp->pr_pgsize; 1695 1696 KERNEL_LOCK(); 1697 km_free(v, pp->pr_pgsize, &kv, pp->pr_crange); 1698 KERNEL_UNLOCK(); 1699 } 1700 1701 #ifdef MULTIPROCESSOR 1702 1703 struct pool pool_caches; /* per cpu cache entries */ 1704 1705 void 1706 pool_cache_init(struct pool *pp) 1707 { 1708 struct cpumem *cm; 1709 struct pool_cache *pc; 1710 struct cpumem_iter i; 1711 1712 if (pool_caches.pr_size == 0) { 1713 pool_init(&pool_caches, sizeof(struct pool_cache), 1714 CACHELINESIZE, IPL_NONE, PR_WAITOK | PR_RWLOCK, 1715 "plcache", NULL); 1716 } 1717 1718 /* must be able to use the pool items as cache list items */ 1719 KASSERT(pp->pr_size >= sizeof(struct pool_cache_item)); 1720 1721 cm = cpumem_get(&pool_caches); 1722 1723 pl_init(pp, &pp->pr_cache_lock); 1724 arc4random_buf(pp->pr_cache_magic, sizeof(pp->pr_cache_magic)); 1725 TAILQ_INIT(&pp->pr_cache_lists); 1726 pp->pr_cache_nitems = 0; 1727 pp->pr_cache_tick = ticks; 1728 pp->pr_cache_items = 8; 1729 pp->pr_cache_contention = 0; 1730 pp->pr_cache_ngc = 0; 1731 1732 CPUMEM_FOREACH(pc, &i, cm) { 1733 pc->pc_actv = NULL; 1734 pc->pc_nactv = 0; 1735 pc->pc_prev = NULL; 1736 1737 pc->pc_nget = 0; 1738 pc->pc_nfail = 0; 1739 pc->pc_nput = 0; 1740 pc->pc_nlget = 0; 1741 pc->pc_nlfail = 0; 1742 pc->pc_nlput = 0; 1743 pc->pc_nout = 0; 1744 } 1745 1746 membar_producer(); 1747 1748 pp->pr_cache = cm; 1749 } 1750 1751 static inline void 1752 pool_cache_item_magic(struct pool *pp, struct pool_cache_item *ci) 1753 { 1754 unsigned long *entry = (unsigned long *)&ci->ci_nextl; 1755 1756 entry[0] = pp->pr_cache_magic[0] ^ (u_long)ci; 1757 entry[1] = pp->pr_cache_magic[1] ^ (u_long)ci->ci_next; 1758 } 1759 1760 static inline void 1761 pool_cache_item_magic_check(struct pool *pp, struct pool_cache_item *ci) 1762 { 1763 unsigned long *entry; 1764 unsigned long val; 1765 1766 entry = (unsigned long *)&ci->ci_nextl; 1767 val = pp->pr_cache_magic[0] ^ (u_long)ci; 1768 if (*entry != val) 1769 goto fail; 1770 1771 entry++; 1772 val = pp->pr_cache_magic[1] ^ (u_long)ci->ci_next; 1773 if (*entry != val) 1774 goto fail; 1775 1776 return; 1777 1778 fail: 1779 panic("%s: %s cpu free list modified: item addr %p+%zu 0x%lx!=0x%lx", 1780 __func__, pp->pr_wchan, ci, (caddr_t)entry - (caddr_t)ci, 1781 *entry, val); 1782 } 1783 1784 static inline void 1785 pool_list_enter(struct pool *pp) 1786 { 1787 if (pl_enter_try(pp, &pp->pr_cache_lock) == 0) { 1788 pl_enter(pp, &pp->pr_cache_lock); 1789 pp->pr_cache_contention++; 1790 } 1791 } 1792 1793 static inline void 1794 pool_list_leave(struct pool *pp) 1795 { 1796 pl_leave(pp, &pp->pr_cache_lock); 1797 } 1798 1799 static inline struct pool_cache_item * 1800 pool_cache_list_alloc(struct pool *pp, struct pool_cache *pc) 1801 { 1802 struct pool_cache_item *pl; 1803 1804 pool_list_enter(pp); 1805 pl = TAILQ_FIRST(&pp->pr_cache_lists); 1806 if (pl != NULL) { 1807 TAILQ_REMOVE(&pp->pr_cache_lists, pl, ci_nextl); 1808 pp->pr_cache_nitems -= POOL_CACHE_ITEM_NITEMS(pl); 1809 1810 pool_cache_item_magic(pp, pl); 1811 1812 pc->pc_nlget++; 1813 } else 1814 pc->pc_nlfail++; 1815 1816 /* fold this cpus nout into the global while we have the lock */ 1817 pp->pr_cache_nout += pc->pc_nout; 1818 pc->pc_nout = 0; 1819 pool_list_leave(pp); 1820 1821 return (pl); 1822 } 1823 1824 static inline void 1825 pool_cache_list_free(struct pool *pp, struct pool_cache *pc, 1826 struct pool_cache_item *ci) 1827 { 1828 pool_list_enter(pp); 1829 if (TAILQ_EMPTY(&pp->pr_cache_lists)) 1830 pp->pr_cache_tick = ticks; 1831 1832 pp->pr_cache_nitems += POOL_CACHE_ITEM_NITEMS(ci); 1833 TAILQ_INSERT_TAIL(&pp->pr_cache_lists, ci, ci_nextl); 1834 1835 pc->pc_nlput++; 1836 1837 /* fold this cpus nout into the global while we have the lock */ 1838 pp->pr_cache_nout += pc->pc_nout; 1839 pc->pc_nout = 0; 1840 pool_list_leave(pp); 1841 } 1842 1843 static inline struct pool_cache * 1844 pool_cache_enter(struct pool *pp, int *s) 1845 { 1846 struct pool_cache *pc; 1847 1848 pc = cpumem_enter(pp->pr_cache); 1849 *s = splraise(pp->pr_ipl); 1850 pc->pc_gen++; 1851 1852 return (pc); 1853 } 1854 1855 static inline void 1856 pool_cache_leave(struct pool *pp, struct pool_cache *pc, int s) 1857 { 1858 pc->pc_gen++; 1859 splx(s); 1860 cpumem_leave(pp->pr_cache, pc); 1861 } 1862 1863 void * 1864 pool_cache_get(struct pool *pp) 1865 { 1866 struct pool_cache *pc; 1867 struct pool_cache_item *ci; 1868 int s; 1869 1870 pc = pool_cache_enter(pp, &s); 1871 1872 if (pc->pc_actv != NULL) { 1873 ci = pc->pc_actv; 1874 } else if (pc->pc_prev != NULL) { 1875 ci = pc->pc_prev; 1876 pc->pc_prev = NULL; 1877 } else if ((ci = pool_cache_list_alloc(pp, pc)) == NULL) { 1878 pc->pc_nfail++; 1879 goto done; 1880 } 1881 1882 pool_cache_item_magic_check(pp, ci); 1883 #ifdef DIAGNOSTIC 1884 if (pool_debug && POOL_CACHE_ITEM_POISONED(ci)) { 1885 size_t pidx; 1886 uint32_t pval; 1887 1888 if (poison_check(ci + 1, pp->pr_size - sizeof(*ci), 1889 &pidx, &pval)) { 1890 int *ip = (int *)(ci + 1); 1891 ip += pidx; 1892 1893 panic("%s: %s cpu free list modified: " 1894 "item addr %p+%zu 0x%x!=0x%x", 1895 __func__, pp->pr_wchan, ci, 1896 (caddr_t)ip - (caddr_t)ci, *ip, pval); 1897 } 1898 } 1899 #endif 1900 1901 pc->pc_actv = ci->ci_next; 1902 pc->pc_nactv = POOL_CACHE_ITEM_NITEMS(ci) - 1; 1903 pc->pc_nget++; 1904 pc->pc_nout++; 1905 1906 done: 1907 pool_cache_leave(pp, pc, s); 1908 1909 return (ci); 1910 } 1911 1912 void 1913 pool_cache_put(struct pool *pp, void *v) 1914 { 1915 struct pool_cache *pc; 1916 struct pool_cache_item *ci = v; 1917 unsigned long nitems; 1918 int s; 1919 #ifdef DIAGNOSTIC 1920 int poison = pool_debug && pp->pr_size > sizeof(*ci); 1921 1922 if (poison) 1923 poison_mem(ci + 1, pp->pr_size - sizeof(*ci)); 1924 #endif 1925 1926 pc = pool_cache_enter(pp, &s); 1927 1928 nitems = pc->pc_nactv; 1929 if (nitems >= pp->pr_cache_items) { 1930 if (pc->pc_prev != NULL) 1931 pool_cache_list_free(pp, pc, pc->pc_prev); 1932 1933 pc->pc_prev = pc->pc_actv; 1934 1935 pc->pc_actv = NULL; 1936 pc->pc_nactv = 0; 1937 nitems = 0; 1938 } 1939 1940 ci->ci_next = pc->pc_actv; 1941 ci->ci_nitems = ++nitems; 1942 #ifdef DIAGNOSTIC 1943 ci->ci_nitems |= poison ? POOL_CACHE_ITEM_NITEMS_POISON : 0; 1944 #endif 1945 pool_cache_item_magic(pp, ci); 1946 1947 pc->pc_actv = ci; 1948 pc->pc_nactv = nitems; 1949 1950 pc->pc_nput++; 1951 pc->pc_nout--; 1952 1953 pool_cache_leave(pp, pc, s); 1954 } 1955 1956 struct pool_cache_item * 1957 pool_cache_list_put(struct pool *pp, struct pool_cache_item *pl) 1958 { 1959 struct pool_cache_item *rpl, *next; 1960 1961 if (pl == NULL) 1962 return (NULL); 1963 1964 rpl = TAILQ_NEXT(pl, ci_nextl); 1965 1966 pl_enter(pp, &pp->pr_lock); 1967 do { 1968 next = pl->ci_next; 1969 pool_do_put(pp, pl); 1970 pl = next; 1971 } while (pl != NULL); 1972 pl_leave(pp, &pp->pr_lock); 1973 1974 return (rpl); 1975 } 1976 1977 void 1978 pool_cache_destroy(struct pool *pp) 1979 { 1980 struct pool_cache *pc; 1981 struct pool_cache_item *pl; 1982 struct cpumem_iter i; 1983 struct cpumem *cm; 1984 1985 rw_enter_write(&pool_lock); /* serialise with the gc */ 1986 cm = pp->pr_cache; 1987 pp->pr_cache = NULL; /* make pool_put avoid the cache */ 1988 rw_exit_write(&pool_lock); 1989 1990 CPUMEM_FOREACH(pc, &i, cm) { 1991 pool_cache_list_put(pp, pc->pc_actv); 1992 pool_cache_list_put(pp, pc->pc_prev); 1993 } 1994 1995 cpumem_put(&pool_caches, cm); 1996 1997 pl = TAILQ_FIRST(&pp->pr_cache_lists); 1998 while (pl != NULL) 1999 pl = pool_cache_list_put(pp, pl); 2000 } 2001 2002 void 2003 pool_cache_gc(struct pool *pp) 2004 { 2005 unsigned int contention, delta; 2006 2007 if ((ticks - pp->pr_cache_tick) > (hz * pool_wait_gc) && 2008 !TAILQ_EMPTY(&pp->pr_cache_lists) && 2009 pl_enter_try(pp, &pp->pr_cache_lock)) { 2010 struct pool_cache_item *pl = NULL; 2011 2012 pl = TAILQ_FIRST(&pp->pr_cache_lists); 2013 if (pl != NULL) { 2014 TAILQ_REMOVE(&pp->pr_cache_lists, pl, ci_nextl); 2015 pp->pr_cache_nitems -= POOL_CACHE_ITEM_NITEMS(pl); 2016 pp->pr_cache_tick = ticks; 2017 2018 pp->pr_cache_ngc++; 2019 } 2020 2021 pl_leave(pp, &pp->pr_cache_lock); 2022 2023 pool_cache_list_put(pp, pl); 2024 } 2025 2026 /* 2027 * if there's a lot of contention on the pr_cache_mtx then consider 2028 * growing the length of the list to reduce the need to access the 2029 * global pool. 2030 */ 2031 2032 contention = pp->pr_cache_contention; 2033 delta = contention - pp->pr_cache_contention_prev; 2034 if (delta > 8 /* magic */) { 2035 if ((ncpusfound * 8 * 2) <= pp->pr_cache_nitems) 2036 pp->pr_cache_items += 8; 2037 } else if (delta == 0) { 2038 if (pp->pr_cache_items > 8) 2039 pp->pr_cache_items--; 2040 } 2041 pp->pr_cache_contention_prev = contention; 2042 } 2043 2044 void 2045 pool_cache_pool_info(struct pool *pp, struct kinfo_pool *pi) 2046 { 2047 struct pool_cache *pc; 2048 struct cpumem_iter i; 2049 2050 if (pp->pr_cache == NULL) 2051 return; 2052 2053 /* loop through the caches twice to collect stats */ 2054 2055 /* once without the lock so we can yield while reading nget/nput */ 2056 CPUMEM_FOREACH(pc, &i, pp->pr_cache) { 2057 uint64_t gen, nget, nput; 2058 2059 do { 2060 while ((gen = pc->pc_gen) & 1) 2061 yield(); 2062 2063 nget = pc->pc_nget; 2064 nput = pc->pc_nput; 2065 } while (gen != pc->pc_gen); 2066 2067 pi->pr_nget += nget; 2068 pi->pr_nput += nput; 2069 } 2070 2071 /* and once with the mtx so we can get consistent nout values */ 2072 pl_enter(pp, &pp->pr_cache_lock); 2073 CPUMEM_FOREACH(pc, &i, pp->pr_cache) 2074 pi->pr_nout += pc->pc_nout; 2075 2076 pi->pr_nout += pp->pr_cache_nout; 2077 pl_leave(pp, &pp->pr_cache_lock); 2078 } 2079 2080 int 2081 pool_cache_info(struct pool *pp, void *oldp, size_t *oldlenp) 2082 { 2083 struct kinfo_pool_cache kpc; 2084 2085 if (pp->pr_cache == NULL) 2086 return (EOPNOTSUPP); 2087 2088 memset(&kpc, 0, sizeof(kpc)); /* don't leak padding */ 2089 2090 pl_enter(pp, &pp->pr_cache_lock); 2091 kpc.pr_ngc = pp->pr_cache_ngc; 2092 kpc.pr_len = pp->pr_cache_items; 2093 kpc.pr_nitems = pp->pr_cache_nitems; 2094 kpc.pr_contention = pp->pr_cache_contention; 2095 pl_leave(pp, &pp->pr_cache_lock); 2096 2097 return (sysctl_rdstruct(oldp, oldlenp, NULL, &kpc, sizeof(kpc))); 2098 } 2099 2100 int 2101 pool_cache_cpus_info(struct pool *pp, void *oldp, size_t *oldlenp) 2102 { 2103 struct pool_cache *pc; 2104 struct kinfo_pool_cache_cpu *kpcc, *info; 2105 unsigned int cpu = 0; 2106 struct cpumem_iter i; 2107 int error = 0; 2108 size_t len; 2109 2110 if (pp->pr_cache == NULL) 2111 return (EOPNOTSUPP); 2112 if (*oldlenp % sizeof(*kpcc)) 2113 return (EINVAL); 2114 2115 kpcc = mallocarray(ncpusfound, sizeof(*kpcc), M_TEMP, 2116 M_WAITOK|M_CANFAIL|M_ZERO); 2117 if (kpcc == NULL) 2118 return (EIO); 2119 2120 len = ncpusfound * sizeof(*kpcc); 2121 2122 CPUMEM_FOREACH(pc, &i, pp->pr_cache) { 2123 uint64_t gen; 2124 2125 if (cpu >= ncpusfound) { 2126 error = EIO; 2127 goto err; 2128 } 2129 2130 info = &kpcc[cpu]; 2131 info->pr_cpu = cpu; 2132 2133 do { 2134 while ((gen = pc->pc_gen) & 1) 2135 yield(); 2136 2137 info->pr_nget = pc->pc_nget; 2138 info->pr_nfail = pc->pc_nfail; 2139 info->pr_nput = pc->pc_nput; 2140 info->pr_nlget = pc->pc_nlget; 2141 info->pr_nlfail = pc->pc_nlfail; 2142 info->pr_nlput = pc->pc_nlput; 2143 } while (gen != pc->pc_gen); 2144 2145 cpu++; 2146 } 2147 2148 error = sysctl_rdstruct(oldp, oldlenp, NULL, kpcc, len); 2149 err: 2150 free(kpcc, M_TEMP, len); 2151 2152 return (error); 2153 } 2154 #else /* MULTIPROCESSOR */ 2155 void 2156 pool_cache_init(struct pool *pp) 2157 { 2158 /* nop */ 2159 } 2160 2161 void 2162 pool_cache_pool_info(struct pool *pp, struct kinfo_pool *pi) 2163 { 2164 /* nop */ 2165 } 2166 2167 int 2168 pool_cache_info(struct pool *pp, void *oldp, size_t *oldlenp) 2169 { 2170 return (EOPNOTSUPP); 2171 } 2172 2173 int 2174 pool_cache_cpus_info(struct pool *pp, void *oldp, size_t *oldlenp) 2175 { 2176 return (EOPNOTSUPP); 2177 } 2178 #endif /* MULTIPROCESSOR */ 2179 2180 2181 void 2182 pool_lock_mtx_init(struct pool *pp, union pool_lock *lock, 2183 const struct lock_type *type) 2184 { 2185 _mtx_init_flags(&lock->prl_mtx, pp->pr_ipl, pp->pr_wchan, 0, type); 2186 } 2187 2188 void 2189 pool_lock_mtx_enter(union pool_lock *lock LOCK_FL_VARS) 2190 { 2191 _mtx_enter(&lock->prl_mtx LOCK_FL_ARGS); 2192 } 2193 2194 int 2195 pool_lock_mtx_enter_try(union pool_lock *lock LOCK_FL_VARS) 2196 { 2197 return (_mtx_enter_try(&lock->prl_mtx LOCK_FL_ARGS)); 2198 } 2199 2200 void 2201 pool_lock_mtx_leave(union pool_lock *lock LOCK_FL_VARS) 2202 { 2203 _mtx_leave(&lock->prl_mtx LOCK_FL_ARGS); 2204 } 2205 2206 void 2207 pool_lock_mtx_assert_locked(union pool_lock *lock) 2208 { 2209 MUTEX_ASSERT_LOCKED(&lock->prl_mtx); 2210 } 2211 2212 void 2213 pool_lock_mtx_assert_unlocked(union pool_lock *lock) 2214 { 2215 MUTEX_ASSERT_UNLOCKED(&lock->prl_mtx); 2216 } 2217 2218 int 2219 pool_lock_mtx_sleep(void *ident, union pool_lock *lock, int priority, 2220 const char *wmesg, int timo) 2221 { 2222 return msleep(ident, &lock->prl_mtx, priority, wmesg, timo); 2223 } 2224 2225 static const struct pool_lock_ops pool_lock_ops_mtx = { 2226 pool_lock_mtx_init, 2227 pool_lock_mtx_enter, 2228 pool_lock_mtx_enter_try, 2229 pool_lock_mtx_leave, 2230 pool_lock_mtx_assert_locked, 2231 pool_lock_mtx_assert_unlocked, 2232 pool_lock_mtx_sleep, 2233 }; 2234 2235 void 2236 pool_lock_rw_init(struct pool *pp, union pool_lock *lock, 2237 const struct lock_type *type) 2238 { 2239 _rw_init_flags(&lock->prl_rwlock, pp->pr_wchan, 0, type); 2240 } 2241 2242 void 2243 pool_lock_rw_enter(union pool_lock *lock LOCK_FL_VARS) 2244 { 2245 _rw_enter_write(&lock->prl_rwlock LOCK_FL_ARGS); 2246 } 2247 2248 int 2249 pool_lock_rw_enter_try(union pool_lock *lock LOCK_FL_VARS) 2250 { 2251 return (_rw_enter(&lock->prl_rwlock, RW_WRITE | RW_NOSLEEP 2252 LOCK_FL_ARGS) == 0); 2253 } 2254 2255 void 2256 pool_lock_rw_leave(union pool_lock *lock LOCK_FL_VARS) 2257 { 2258 _rw_exit_write(&lock->prl_rwlock LOCK_FL_ARGS); 2259 } 2260 2261 void 2262 pool_lock_rw_assert_locked(union pool_lock *lock) 2263 { 2264 rw_assert_wrlock(&lock->prl_rwlock); 2265 } 2266 2267 void 2268 pool_lock_rw_assert_unlocked(union pool_lock *lock) 2269 { 2270 KASSERT(rw_status(&lock->prl_rwlock) != RW_WRITE); 2271 } 2272 2273 int 2274 pool_lock_rw_sleep(void *ident, union pool_lock *lock, int priority, 2275 const char *wmesg, int timo) 2276 { 2277 return rwsleep(ident, &lock->prl_rwlock, priority, wmesg, timo); 2278 } 2279 2280 static const struct pool_lock_ops pool_lock_ops_rw = { 2281 pool_lock_rw_init, 2282 pool_lock_rw_enter, 2283 pool_lock_rw_enter_try, 2284 pool_lock_rw_leave, 2285 pool_lock_rw_assert_locked, 2286 pool_lock_rw_assert_unlocked, 2287 pool_lock_rw_sleep, 2288 }; 2289