1 /* $OpenBSD: subr_pool.c,v 1.220 2017/08/13 20:26:33 guenther Exp $ */ 2 /* $NetBSD: subr_pool.c,v 1.61 2001/09/26 07:14:56 chs Exp $ */ 3 4 /*- 5 * Copyright (c) 1997, 1999, 2000 The NetBSD Foundation, Inc. 6 * All rights reserved. 7 * 8 * This code is derived from software contributed to The NetBSD Foundation 9 * by Paul Kranenburg; by Jason R. Thorpe of the Numerical Aerospace 10 * Simulation Facility, NASA Ames Research Center. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 23 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 24 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 25 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31 * POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include <sys/param.h> 35 #include <sys/systm.h> 36 #include <sys/errno.h> 37 #include <sys/kernel.h> 38 #include <sys/malloc.h> 39 #include <sys/pool.h> 40 #include <sys/syslog.h> 41 #include <sys/sysctl.h> 42 #include <sys/task.h> 43 #include <sys/timeout.h> 44 #include <sys/percpu.h> 45 46 #include <uvm/uvm_extern.h> 47 48 /* 49 * Pool resource management utility. 50 * 51 * Memory is allocated in pages which are split into pieces according to 52 * the pool item size. Each page is kept on one of three lists in the 53 * pool structure: `pr_emptypages', `pr_fullpages' and `pr_partpages', 54 * for empty, full and partially-full pages respectively. The individual 55 * pool items are on a linked list headed by `ph_items' in each page 56 * header. The memory for building the page list is either taken from 57 * the allocated pages themselves (for small pool items) or taken from 58 * an internal pool of page headers (`phpool'). 59 */ 60 61 /* List of all pools */ 62 SIMPLEQ_HEAD(,pool) pool_head = SIMPLEQ_HEAD_INITIALIZER(pool_head); 63 64 /* 65 * Every pool gets a unique serial number assigned to it. If this counter 66 * wraps, we're screwed, but we shouldn't create so many pools anyway. 67 */ 68 unsigned int pool_serial; 69 unsigned int pool_count; 70 71 /* Lock the previous variables making up the global pool state */ 72 struct rwlock pool_lock = RWLOCK_INITIALIZER("pools"); 73 74 /* Private pool for page header structures */ 75 struct pool phpool; 76 77 struct pool_lock_ops { 78 void (*pl_init)(struct pool *, union pool_lock *, 79 struct lock_type *); 80 void (*pl_enter)(union pool_lock * LOCK_FL_VARS); 81 int (*pl_enter_try)(union pool_lock * LOCK_FL_VARS); 82 void (*pl_leave)(union pool_lock * LOCK_FL_VARS); 83 void (*pl_assert_locked)(union pool_lock *); 84 void (*pl_assert_unlocked)(union pool_lock *); 85 int (*pl_sleep)(void *, union pool_lock *, int, const char *, int); 86 }; 87 88 static const struct pool_lock_ops pool_lock_ops_mtx; 89 static const struct pool_lock_ops pool_lock_ops_rw; 90 91 #ifdef WITNESS 92 #define pl_init(pp, pl) do { \ 93 static struct lock_type __lock_type = { .lt_name = #pl }; \ 94 (pp)->pr_lock_ops->pl_init(pp, pl, &__lock_type); \ 95 } while (0) 96 #else /* WITNESS */ 97 #define pl_init(pp, pl) (pp)->pr_lock_ops->pl_init(pp, pl, NULL) 98 #endif /* WITNESS */ 99 100 static inline void 101 pl_enter(struct pool *pp, union pool_lock *pl LOCK_FL_VARS) 102 { 103 pp->pr_lock_ops->pl_enter(pl LOCK_FL_ARGS); 104 } 105 static inline int 106 pl_enter_try(struct pool *pp, union pool_lock *pl LOCK_FL_VARS) 107 { 108 return pp->pr_lock_ops->pl_enter_try(pl LOCK_FL_ARGS); 109 } 110 static inline void 111 pl_leave(struct pool *pp, union pool_lock *pl LOCK_FL_VARS) 112 { 113 pp->pr_lock_ops->pl_leave(pl LOCK_FL_ARGS); 114 } 115 static inline void 116 pl_assert_locked(struct pool *pp, union pool_lock *pl) 117 { 118 pp->pr_lock_ops->pl_assert_locked(pl); 119 } 120 static inline void 121 pl_assert_unlocked(struct pool *pp, union pool_lock *pl) 122 { 123 pp->pr_lock_ops->pl_assert_unlocked(pl); 124 } 125 static inline int 126 pl_sleep(struct pool *pp, void *ident, union pool_lock *lock, int priority, 127 const char *wmesg, int timo) 128 { 129 return pp->pr_lock_ops->pl_sleep(ident, lock, priority, wmesg, timo); 130 } 131 132 #ifdef WITNESS 133 # define pl_enter(pp,pl) pl_enter(pp,pl LOCK_FILE_LINE) 134 # define pl_enter_try(pp,pl) pl_enter_try(pp,pl LOCK_FILE_LINE) 135 # define pl_leave(pp,pl) pl_leave(pp,pl LOCK_FILE_LINE) 136 #endif 137 138 struct pool_item { 139 u_long pi_magic; 140 XSIMPLEQ_ENTRY(pool_item) pi_list; 141 }; 142 #define POOL_IMAGIC(ph, pi) ((u_long)(pi) ^ (ph)->ph_magic) 143 144 struct pool_page_header { 145 /* Page headers */ 146 TAILQ_ENTRY(pool_page_header) 147 ph_entry; /* pool page list */ 148 XSIMPLEQ_HEAD(, pool_item) 149 ph_items; /* free items on the page */ 150 RBT_ENTRY(pool_page_header) 151 ph_node; /* off-page page headers */ 152 unsigned int ph_nmissing; /* # of chunks in use */ 153 caddr_t ph_page; /* this page's address */ 154 caddr_t ph_colored; /* page's colored address */ 155 unsigned long ph_magic; 156 int ph_tick; 157 }; 158 #define POOL_MAGICBIT (1 << 3) /* keep away from perturbed low bits */ 159 #define POOL_PHPOISON(ph) ISSET((ph)->ph_magic, POOL_MAGICBIT) 160 161 #ifdef MULTIPROCESSOR 162 struct pool_cache_item { 163 struct pool_cache_item *ci_next; /* next item in list */ 164 unsigned long ci_nitems; /* number of items in list */ 165 TAILQ_ENTRY(pool_cache_item) 166 ci_nextl; /* entry in list of lists */ 167 }; 168 169 /* we store whether the cached item is poisoned in the high bit of nitems */ 170 #define POOL_CACHE_ITEM_NITEMS_MASK 0x7ffffffUL 171 #define POOL_CACHE_ITEM_NITEMS_POISON 0x8000000UL 172 173 #define POOL_CACHE_ITEM_NITEMS(_ci) \ 174 ((_ci)->ci_nitems & POOL_CACHE_ITEM_NITEMS_MASK) 175 176 #define POOL_CACHE_ITEM_POISONED(_ci) \ 177 ISSET((_ci)->ci_nitems, POOL_CACHE_ITEM_NITEMS_POISON) 178 179 struct pool_cache { 180 struct pool_cache_item *pc_actv; /* active list of items */ 181 unsigned long pc_nactv; /* actv head nitems cache */ 182 struct pool_cache_item *pc_prev; /* previous list of items */ 183 184 uint64_t pc_gen; /* generation number */ 185 uint64_t pc_nget; /* # of successful requests */ 186 uint64_t pc_nfail; /* # of unsuccessful reqs */ 187 uint64_t pc_nput; /* # of releases */ 188 uint64_t pc_nlget; /* # of list requests */ 189 uint64_t pc_nlfail; /* # of fails getting a list */ 190 uint64_t pc_nlput; /* # of list releases */ 191 192 int pc_nout; 193 }; 194 195 void *pool_cache_get(struct pool *); 196 void pool_cache_put(struct pool *, void *); 197 void pool_cache_destroy(struct pool *); 198 void pool_cache_gc(struct pool *); 199 #endif 200 void pool_cache_pool_info(struct pool *, struct kinfo_pool *); 201 int pool_cache_info(struct pool *, void *, size_t *); 202 int pool_cache_cpus_info(struct pool *, void *, size_t *); 203 204 #ifdef POOL_DEBUG 205 int pool_debug = 1; 206 #else 207 int pool_debug = 0; 208 #endif 209 210 #define POOL_INPGHDR(pp) ((pp)->pr_phoffset != 0) 211 212 struct pool_page_header * 213 pool_p_alloc(struct pool *, int, int *); 214 void pool_p_insert(struct pool *, struct pool_page_header *); 215 void pool_p_remove(struct pool *, struct pool_page_header *); 216 void pool_p_free(struct pool *, struct pool_page_header *); 217 218 void pool_update_curpage(struct pool *); 219 void *pool_do_get(struct pool *, int, int *); 220 void pool_do_put(struct pool *, void *); 221 int pool_chk_page(struct pool *, struct pool_page_header *, int); 222 int pool_chk(struct pool *); 223 void pool_get_done(struct pool *, void *, void *); 224 void pool_runqueue(struct pool *, int); 225 226 void *pool_allocator_alloc(struct pool *, int, int *); 227 void pool_allocator_free(struct pool *, void *); 228 229 /* 230 * The default pool allocator. 231 */ 232 void *pool_page_alloc(struct pool *, int, int *); 233 void pool_page_free(struct pool *, void *); 234 235 /* 236 * safe for interrupts; this is the default allocator 237 */ 238 struct pool_allocator pool_allocator_single = { 239 pool_page_alloc, 240 pool_page_free, 241 POOL_ALLOC_SIZE(PAGE_SIZE, POOL_ALLOC_ALIGNED) 242 }; 243 244 void *pool_multi_alloc(struct pool *, int, int *); 245 void pool_multi_free(struct pool *, void *); 246 247 struct pool_allocator pool_allocator_multi = { 248 pool_multi_alloc, 249 pool_multi_free, 250 POOL_ALLOC_SIZES(PAGE_SIZE, (1UL << 31), POOL_ALLOC_ALIGNED) 251 }; 252 253 void *pool_multi_alloc_ni(struct pool *, int, int *); 254 void pool_multi_free_ni(struct pool *, void *); 255 256 struct pool_allocator pool_allocator_multi_ni = { 257 pool_multi_alloc_ni, 258 pool_multi_free_ni, 259 POOL_ALLOC_SIZES(PAGE_SIZE, (1UL << 31), POOL_ALLOC_ALIGNED) 260 }; 261 262 #ifdef DDB 263 void pool_print_pagelist(struct pool_pagelist *, int (*)(const char *, ...) 264 __attribute__((__format__(__kprintf__,1,2)))); 265 void pool_print1(struct pool *, const char *, int (*)(const char *, ...) 266 __attribute__((__format__(__kprintf__,1,2)))); 267 #endif 268 269 /* stale page garbage collectors */ 270 void pool_gc_sched(void *); 271 struct timeout pool_gc_tick = TIMEOUT_INITIALIZER(pool_gc_sched, NULL); 272 void pool_gc_pages(void *); 273 struct task pool_gc_task = TASK_INITIALIZER(pool_gc_pages, NULL); 274 int pool_wait_free = 1; 275 int pool_wait_gc = 8; 276 277 RBT_PROTOTYPE(phtree, pool_page_header, ph_node, phtree_compare); 278 279 static inline int 280 phtree_compare(const struct pool_page_header *a, 281 const struct pool_page_header *b) 282 { 283 vaddr_t va = (vaddr_t)a->ph_page; 284 vaddr_t vb = (vaddr_t)b->ph_page; 285 286 /* the compares in this order are important for the NFIND to work */ 287 if (vb < va) 288 return (-1); 289 if (vb > va) 290 return (1); 291 292 return (0); 293 } 294 295 RBT_GENERATE(phtree, pool_page_header, ph_node, phtree_compare); 296 297 /* 298 * Return the pool page header based on page address. 299 */ 300 static inline struct pool_page_header * 301 pr_find_pagehead(struct pool *pp, void *v) 302 { 303 struct pool_page_header *ph, key; 304 305 if (POOL_INPGHDR(pp)) { 306 caddr_t page; 307 308 page = (caddr_t)((vaddr_t)v & pp->pr_pgmask); 309 310 return ((struct pool_page_header *)(page + pp->pr_phoffset)); 311 } 312 313 key.ph_page = v; 314 ph = RBT_NFIND(phtree, &pp->pr_phtree, &key); 315 if (ph == NULL) 316 panic("%s: %s: page header missing", __func__, pp->pr_wchan); 317 318 KASSERT(ph->ph_page <= (caddr_t)v); 319 if (ph->ph_page + pp->pr_pgsize <= (caddr_t)v) 320 panic("%s: %s: incorrect page", __func__, pp->pr_wchan); 321 322 return (ph); 323 } 324 325 /* 326 * Initialize the given pool resource structure. 327 * 328 * We export this routine to allow other kernel parts to declare 329 * static pools that must be initialized before malloc() is available. 330 */ 331 void 332 pool_init(struct pool *pp, size_t size, u_int align, int ipl, int flags, 333 const char *wchan, struct pool_allocator *palloc) 334 { 335 int off = 0, space; 336 unsigned int pgsize = PAGE_SIZE, items; 337 size_t pa_pagesz; 338 #ifdef DIAGNOSTIC 339 struct pool *iter; 340 #endif 341 342 if (align == 0) 343 align = ALIGN(1); 344 345 if (size < sizeof(struct pool_item)) 346 size = sizeof(struct pool_item); 347 348 size = roundup(size, align); 349 350 while (size * 8 > pgsize) 351 pgsize <<= 1; 352 353 if (palloc == NULL) { 354 if (pgsize > PAGE_SIZE) { 355 palloc = ISSET(flags, PR_WAITOK) ? 356 &pool_allocator_multi_ni : &pool_allocator_multi; 357 } else 358 palloc = &pool_allocator_single; 359 360 pa_pagesz = palloc->pa_pagesz; 361 } else { 362 size_t pgsizes; 363 364 pa_pagesz = palloc->pa_pagesz; 365 if (pa_pagesz == 0) 366 pa_pagesz = POOL_ALLOC_DEFAULT; 367 368 pgsizes = pa_pagesz & ~POOL_ALLOC_ALIGNED; 369 370 /* make sure the allocator can fit at least one item */ 371 if (size > pgsizes) { 372 panic("%s: pool %s item size 0x%zx > " 373 "allocator %p sizes 0x%zx", __func__, wchan, 374 size, palloc, pgsizes); 375 } 376 377 /* shrink pgsize until it fits into the range */ 378 while (!ISSET(pgsizes, pgsize)) 379 pgsize >>= 1; 380 } 381 KASSERT(ISSET(pa_pagesz, pgsize)); 382 383 items = pgsize / size; 384 385 /* 386 * Decide whether to put the page header off page to avoid 387 * wasting too large a part of the page. Off-page page headers 388 * go into an RB tree, so we can match a returned item with 389 * its header based on the page address. 390 */ 391 if (ISSET(pa_pagesz, POOL_ALLOC_ALIGNED)) { 392 if (pgsize - (size * items) > 393 sizeof(struct pool_page_header)) { 394 off = pgsize - sizeof(struct pool_page_header); 395 } else if (sizeof(struct pool_page_header) * 2 >= size) { 396 off = pgsize - sizeof(struct pool_page_header); 397 items = off / size; 398 } 399 } 400 401 KASSERT(items > 0); 402 403 /* 404 * Initialize the pool structure. 405 */ 406 memset(pp, 0, sizeof(*pp)); 407 if (ISSET(flags, PR_RWLOCK)) { 408 KASSERT(flags & PR_WAITOK); 409 pp->pr_lock_ops = &pool_lock_ops_rw; 410 } else 411 pp->pr_lock_ops = &pool_lock_ops_mtx; 412 TAILQ_INIT(&pp->pr_emptypages); 413 TAILQ_INIT(&pp->pr_fullpages); 414 TAILQ_INIT(&pp->pr_partpages); 415 pp->pr_curpage = NULL; 416 pp->pr_npages = 0; 417 pp->pr_minitems = 0; 418 pp->pr_minpages = 0; 419 pp->pr_maxpages = 8; 420 pp->pr_size = size; 421 pp->pr_pgsize = pgsize; 422 pp->pr_pgmask = ~0UL ^ (pgsize - 1); 423 pp->pr_phoffset = off; 424 pp->pr_itemsperpage = items; 425 pp->pr_wchan = wchan; 426 pp->pr_alloc = palloc; 427 pp->pr_nitems = 0; 428 pp->pr_nout = 0; 429 pp->pr_hardlimit = UINT_MAX; 430 pp->pr_hardlimit_warning = NULL; 431 pp->pr_hardlimit_ratecap.tv_sec = 0; 432 pp->pr_hardlimit_ratecap.tv_usec = 0; 433 pp->pr_hardlimit_warning_last.tv_sec = 0; 434 pp->pr_hardlimit_warning_last.tv_usec = 0; 435 RBT_INIT(phtree, &pp->pr_phtree); 436 437 /* 438 * Use the space between the chunks and the page header 439 * for cache coloring. 440 */ 441 space = POOL_INPGHDR(pp) ? pp->pr_phoffset : pp->pr_pgsize; 442 space -= pp->pr_itemsperpage * pp->pr_size; 443 pp->pr_align = align; 444 pp->pr_maxcolors = (space / align) + 1; 445 446 pp->pr_nget = 0; 447 pp->pr_nfail = 0; 448 pp->pr_nput = 0; 449 pp->pr_npagealloc = 0; 450 pp->pr_npagefree = 0; 451 pp->pr_hiwat = 0; 452 pp->pr_nidle = 0; 453 454 pp->pr_ipl = ipl; 455 pp->pr_flags = flags; 456 457 pl_init(pp, &pp->pr_lock); 458 pl_init(pp, &pp->pr_requests_lock); 459 TAILQ_INIT(&pp->pr_requests); 460 461 if (phpool.pr_size == 0) { 462 pool_init(&phpool, sizeof(struct pool_page_header), 0, 463 IPL_HIGH, 0, "phpool", NULL); 464 465 /* make sure phpool wont "recurse" */ 466 KASSERT(POOL_INPGHDR(&phpool)); 467 } 468 469 /* pglistalloc/constraint parameters */ 470 pp->pr_crange = &kp_dirty; 471 472 /* Insert this into the list of all pools. */ 473 rw_enter_write(&pool_lock); 474 #ifdef DIAGNOSTIC 475 SIMPLEQ_FOREACH(iter, &pool_head, pr_poollist) { 476 if (iter == pp) 477 panic("%s: pool %s already on list", __func__, wchan); 478 } 479 #endif 480 481 pp->pr_serial = ++pool_serial; 482 if (pool_serial == 0) 483 panic("%s: too much uptime", __func__); 484 485 SIMPLEQ_INSERT_HEAD(&pool_head, pp, pr_poollist); 486 pool_count++; 487 rw_exit_write(&pool_lock); 488 } 489 490 /* 491 * Decommission a pool resource. 492 */ 493 void 494 pool_destroy(struct pool *pp) 495 { 496 struct pool_page_header *ph; 497 struct pool *prev, *iter; 498 499 #ifdef MULTIPROCESSOR 500 if (pp->pr_cache != NULL) 501 pool_cache_destroy(pp); 502 #endif 503 504 #ifdef DIAGNOSTIC 505 if (pp->pr_nout != 0) 506 panic("%s: pool busy: still out: %u", __func__, pp->pr_nout); 507 #endif 508 509 /* Remove from global pool list */ 510 rw_enter_write(&pool_lock); 511 pool_count--; 512 if (pp == SIMPLEQ_FIRST(&pool_head)) 513 SIMPLEQ_REMOVE_HEAD(&pool_head, pr_poollist); 514 else { 515 prev = SIMPLEQ_FIRST(&pool_head); 516 SIMPLEQ_FOREACH(iter, &pool_head, pr_poollist) { 517 if (iter == pp) { 518 SIMPLEQ_REMOVE_AFTER(&pool_head, prev, 519 pr_poollist); 520 break; 521 } 522 prev = iter; 523 } 524 } 525 rw_exit_write(&pool_lock); 526 527 /* Remove all pages */ 528 while ((ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL) { 529 pl_enter(pp, &pp->pr_lock); 530 pool_p_remove(pp, ph); 531 pl_leave(pp, &pp->pr_lock); 532 pool_p_free(pp, ph); 533 } 534 KASSERT(TAILQ_EMPTY(&pp->pr_fullpages)); 535 KASSERT(TAILQ_EMPTY(&pp->pr_partpages)); 536 } 537 538 void 539 pool_request_init(struct pool_request *pr, 540 void (*handler)(struct pool *, void *, void *), void *cookie) 541 { 542 pr->pr_handler = handler; 543 pr->pr_cookie = cookie; 544 pr->pr_item = NULL; 545 } 546 547 void 548 pool_request(struct pool *pp, struct pool_request *pr) 549 { 550 pl_enter(pp, &pp->pr_requests_lock); 551 TAILQ_INSERT_TAIL(&pp->pr_requests, pr, pr_entry); 552 pool_runqueue(pp, PR_NOWAIT); 553 pl_leave(pp, &pp->pr_requests_lock); 554 } 555 556 struct pool_get_memory { 557 union pool_lock lock; 558 void * volatile v; 559 }; 560 561 /* 562 * Grab an item from the pool. 563 */ 564 void * 565 pool_get(struct pool *pp, int flags) 566 { 567 void *v = NULL; 568 int slowdown = 0; 569 570 KASSERT(flags & (PR_WAITOK | PR_NOWAIT)); 571 if (pp->pr_flags & PR_RWLOCK) 572 KASSERT(flags & PR_WAITOK); 573 574 #ifdef MULTIPROCESSOR 575 if (pp->pr_cache != NULL) { 576 v = pool_cache_get(pp); 577 if (v != NULL) 578 goto good; 579 } 580 #endif 581 582 pl_enter(pp, &pp->pr_lock); 583 if (pp->pr_nout >= pp->pr_hardlimit) { 584 if (ISSET(flags, PR_NOWAIT|PR_LIMITFAIL)) 585 goto fail; 586 } else if ((v = pool_do_get(pp, flags, &slowdown)) == NULL) { 587 if (ISSET(flags, PR_NOWAIT)) 588 goto fail; 589 } 590 pl_leave(pp, &pp->pr_lock); 591 592 if ((slowdown || pool_debug == 2) && ISSET(flags, PR_WAITOK)) 593 yield(); 594 595 if (v == NULL) { 596 struct pool_get_memory mem = { .v = NULL }; 597 struct pool_request pr; 598 599 pl_init(pp, &mem.lock); 600 pool_request_init(&pr, pool_get_done, &mem); 601 pool_request(pp, &pr); 602 603 pl_enter(pp, &mem.lock); 604 while (mem.v == NULL) 605 pl_sleep(pp, &mem, &mem.lock, PSWP, pp->pr_wchan, 0); 606 pl_leave(pp, &mem.lock); 607 608 v = mem.v; 609 } 610 611 #ifdef MULTIPROCESSOR 612 good: 613 #endif 614 if (ISSET(flags, PR_ZERO)) 615 memset(v, 0, pp->pr_size); 616 617 return (v); 618 619 fail: 620 pp->pr_nfail++; 621 pl_leave(pp, &pp->pr_lock); 622 return (NULL); 623 } 624 625 void 626 pool_get_done(struct pool *pp, void *xmem, void *v) 627 { 628 struct pool_get_memory *mem = xmem; 629 630 pl_enter(pp, &mem->lock); 631 mem->v = v; 632 pl_leave(pp, &mem->lock); 633 634 wakeup_one(mem); 635 } 636 637 void 638 pool_runqueue(struct pool *pp, int flags) 639 { 640 struct pool_requests prl = TAILQ_HEAD_INITIALIZER(prl); 641 struct pool_request *pr; 642 643 pl_assert_unlocked(pp, &pp->pr_lock); 644 pl_assert_locked(pp, &pp->pr_requests_lock); 645 646 if (pp->pr_requesting++) 647 return; 648 649 do { 650 pp->pr_requesting = 1; 651 652 /* no TAILQ_JOIN? :( */ 653 while ((pr = TAILQ_FIRST(&pp->pr_requests)) != NULL) { 654 TAILQ_REMOVE(&pp->pr_requests, pr, pr_entry); 655 TAILQ_INSERT_TAIL(&prl, pr, pr_entry); 656 } 657 if (TAILQ_EMPTY(&prl)) 658 continue; 659 660 pl_leave(pp, &pp->pr_requests_lock); 661 662 pl_enter(pp, &pp->pr_lock); 663 pr = TAILQ_FIRST(&prl); 664 while (pr != NULL) { 665 int slowdown = 0; 666 667 if (pp->pr_nout >= pp->pr_hardlimit) 668 break; 669 670 pr->pr_item = pool_do_get(pp, flags, &slowdown); 671 if (pr->pr_item == NULL) /* || slowdown ? */ 672 break; 673 674 pr = TAILQ_NEXT(pr, pr_entry); 675 } 676 pl_leave(pp, &pp->pr_lock); 677 678 while ((pr = TAILQ_FIRST(&prl)) != NULL && 679 pr->pr_item != NULL) { 680 TAILQ_REMOVE(&prl, pr, pr_entry); 681 (*pr->pr_handler)(pp, pr->pr_cookie, pr->pr_item); 682 } 683 684 pl_enter(pp, &pp->pr_requests_lock); 685 } while (--pp->pr_requesting); 686 687 /* no TAILQ_JOIN :( */ 688 while ((pr = TAILQ_FIRST(&prl)) != NULL) { 689 TAILQ_REMOVE(&prl, pr, pr_entry); 690 TAILQ_INSERT_TAIL(&pp->pr_requests, pr, pr_entry); 691 } 692 } 693 694 void * 695 pool_do_get(struct pool *pp, int flags, int *slowdown) 696 { 697 struct pool_item *pi; 698 struct pool_page_header *ph; 699 700 pl_assert_locked(pp, &pp->pr_lock); 701 702 splassert(pp->pr_ipl); 703 704 /* 705 * Account for this item now to avoid races if we need to give up 706 * pr_lock to allocate a page. 707 */ 708 pp->pr_nout++; 709 710 if (pp->pr_curpage == NULL) { 711 pl_leave(pp, &pp->pr_lock); 712 ph = pool_p_alloc(pp, flags, slowdown); 713 pl_enter(pp, &pp->pr_lock); 714 715 if (ph == NULL) { 716 pp->pr_nout--; 717 return (NULL); 718 } 719 720 pool_p_insert(pp, ph); 721 } 722 723 ph = pp->pr_curpage; 724 pi = XSIMPLEQ_FIRST(&ph->ph_items); 725 if (__predict_false(pi == NULL)) 726 panic("%s: %s: page empty", __func__, pp->pr_wchan); 727 728 if (__predict_false(pi->pi_magic != POOL_IMAGIC(ph, pi))) { 729 panic("%s: %s free list modified: " 730 "page %p; item addr %p; offset 0x%x=0x%lx != 0x%lx", 731 __func__, pp->pr_wchan, ph->ph_page, pi, 732 0, pi->pi_magic, POOL_IMAGIC(ph, pi)); 733 } 734 735 XSIMPLEQ_REMOVE_HEAD(&ph->ph_items, pi_list); 736 737 #ifdef DIAGNOSTIC 738 if (pool_debug && POOL_PHPOISON(ph)) { 739 size_t pidx; 740 uint32_t pval; 741 if (poison_check(pi + 1, pp->pr_size - sizeof(*pi), 742 &pidx, &pval)) { 743 int *ip = (int *)(pi + 1); 744 panic("%s: %s free list modified: " 745 "page %p; item addr %p; offset 0x%zx=0x%x", 746 __func__, pp->pr_wchan, ph->ph_page, pi, 747 (pidx * sizeof(int)) + sizeof(*pi), ip[pidx]); 748 } 749 } 750 #endif /* DIAGNOSTIC */ 751 752 if (ph->ph_nmissing++ == 0) { 753 /* 754 * This page was previously empty. Move it to the list of 755 * partially-full pages. This page is already curpage. 756 */ 757 TAILQ_REMOVE(&pp->pr_emptypages, ph, ph_entry); 758 TAILQ_INSERT_TAIL(&pp->pr_partpages, ph, ph_entry); 759 760 pp->pr_nidle--; 761 } 762 763 if (ph->ph_nmissing == pp->pr_itemsperpage) { 764 /* 765 * This page is now full. Move it to the full list 766 * and select a new current page. 767 */ 768 TAILQ_REMOVE(&pp->pr_partpages, ph, ph_entry); 769 TAILQ_INSERT_TAIL(&pp->pr_fullpages, ph, ph_entry); 770 pool_update_curpage(pp); 771 } 772 773 pp->pr_nget++; 774 775 return (pi); 776 } 777 778 /* 779 * Return resource to the pool. 780 */ 781 void 782 pool_put(struct pool *pp, void *v) 783 { 784 struct pool_page_header *ph, *freeph = NULL; 785 786 #ifdef DIAGNOSTIC 787 if (v == NULL) 788 panic("%s: NULL item", __func__); 789 #endif 790 791 #ifdef MULTIPROCESSOR 792 if (pp->pr_cache != NULL && TAILQ_EMPTY(&pp->pr_requests)) { 793 pool_cache_put(pp, v); 794 return; 795 } 796 #endif 797 798 pl_enter(pp, &pp->pr_lock); 799 800 pool_do_put(pp, v); 801 802 pp->pr_nout--; 803 pp->pr_nput++; 804 805 /* is it time to free a page? */ 806 if (pp->pr_nidle > pp->pr_maxpages && 807 (ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL && 808 (ticks - ph->ph_tick) > (hz * pool_wait_free)) { 809 freeph = ph; 810 pool_p_remove(pp, freeph); 811 } 812 813 pl_leave(pp, &pp->pr_lock); 814 815 if (freeph != NULL) 816 pool_p_free(pp, freeph); 817 818 if (!TAILQ_EMPTY(&pp->pr_requests)) { 819 pl_enter(pp, &pp->pr_requests_lock); 820 pool_runqueue(pp, PR_NOWAIT); 821 pl_leave(pp, &pp->pr_requests_lock); 822 } 823 } 824 825 void 826 pool_do_put(struct pool *pp, void *v) 827 { 828 struct pool_item *pi = v; 829 struct pool_page_header *ph; 830 831 splassert(pp->pr_ipl); 832 833 ph = pr_find_pagehead(pp, v); 834 835 #ifdef DIAGNOSTIC 836 if (pool_debug) { 837 struct pool_item *qi; 838 XSIMPLEQ_FOREACH(qi, &ph->ph_items, pi_list) { 839 if (pi == qi) { 840 panic("%s: %s: double pool_put: %p", __func__, 841 pp->pr_wchan, pi); 842 } 843 } 844 } 845 #endif /* DIAGNOSTIC */ 846 847 pi->pi_magic = POOL_IMAGIC(ph, pi); 848 XSIMPLEQ_INSERT_HEAD(&ph->ph_items, pi, pi_list); 849 #ifdef DIAGNOSTIC 850 if (POOL_PHPOISON(ph)) 851 poison_mem(pi + 1, pp->pr_size - sizeof(*pi)); 852 #endif /* DIAGNOSTIC */ 853 854 if (ph->ph_nmissing-- == pp->pr_itemsperpage) { 855 /* 856 * The page was previously completely full, move it to the 857 * partially-full list. 858 */ 859 TAILQ_REMOVE(&pp->pr_fullpages, ph, ph_entry); 860 TAILQ_INSERT_TAIL(&pp->pr_partpages, ph, ph_entry); 861 } 862 863 if (ph->ph_nmissing == 0) { 864 /* 865 * The page is now empty, so move it to the empty page list. 866 */ 867 pp->pr_nidle++; 868 869 ph->ph_tick = ticks; 870 TAILQ_REMOVE(&pp->pr_partpages, ph, ph_entry); 871 TAILQ_INSERT_TAIL(&pp->pr_emptypages, ph, ph_entry); 872 pool_update_curpage(pp); 873 } 874 } 875 876 /* 877 * Add N items to the pool. 878 */ 879 int 880 pool_prime(struct pool *pp, int n) 881 { 882 struct pool_pagelist pl = TAILQ_HEAD_INITIALIZER(pl); 883 struct pool_page_header *ph; 884 int newpages; 885 886 newpages = roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 887 888 while (newpages-- > 0) { 889 int slowdown = 0; 890 891 ph = pool_p_alloc(pp, PR_NOWAIT, &slowdown); 892 if (ph == NULL) /* or slowdown? */ 893 break; 894 895 TAILQ_INSERT_TAIL(&pl, ph, ph_entry); 896 } 897 898 pl_enter(pp, &pp->pr_lock); 899 while ((ph = TAILQ_FIRST(&pl)) != NULL) { 900 TAILQ_REMOVE(&pl, ph, ph_entry); 901 pool_p_insert(pp, ph); 902 } 903 pl_leave(pp, &pp->pr_lock); 904 905 return (0); 906 } 907 908 struct pool_page_header * 909 pool_p_alloc(struct pool *pp, int flags, int *slowdown) 910 { 911 struct pool_page_header *ph; 912 struct pool_item *pi; 913 caddr_t addr; 914 int n; 915 916 pl_assert_unlocked(pp, &pp->pr_lock); 917 KASSERT(pp->pr_size >= sizeof(*pi)); 918 919 addr = pool_allocator_alloc(pp, flags, slowdown); 920 if (addr == NULL) 921 return (NULL); 922 923 if (POOL_INPGHDR(pp)) 924 ph = (struct pool_page_header *)(addr + pp->pr_phoffset); 925 else { 926 ph = pool_get(&phpool, flags); 927 if (ph == NULL) { 928 pool_allocator_free(pp, addr); 929 return (NULL); 930 } 931 } 932 933 XSIMPLEQ_INIT(&ph->ph_items); 934 ph->ph_page = addr; 935 addr += pp->pr_align * (pp->pr_npagealloc % pp->pr_maxcolors); 936 ph->ph_colored = addr; 937 ph->ph_nmissing = 0; 938 arc4random_buf(&ph->ph_magic, sizeof(ph->ph_magic)); 939 #ifdef DIAGNOSTIC 940 /* use a bit in ph_magic to record if we poison page items */ 941 if (pool_debug) 942 SET(ph->ph_magic, POOL_MAGICBIT); 943 else 944 CLR(ph->ph_magic, POOL_MAGICBIT); 945 #endif /* DIAGNOSTIC */ 946 947 n = pp->pr_itemsperpage; 948 while (n--) { 949 pi = (struct pool_item *)addr; 950 pi->pi_magic = POOL_IMAGIC(ph, pi); 951 XSIMPLEQ_INSERT_TAIL(&ph->ph_items, pi, pi_list); 952 953 #ifdef DIAGNOSTIC 954 if (POOL_PHPOISON(ph)) 955 poison_mem(pi + 1, pp->pr_size - sizeof(*pi)); 956 #endif /* DIAGNOSTIC */ 957 958 addr += pp->pr_size; 959 } 960 961 return (ph); 962 } 963 964 void 965 pool_p_free(struct pool *pp, struct pool_page_header *ph) 966 { 967 struct pool_item *pi; 968 969 pl_assert_unlocked(pp, &pp->pr_lock); 970 KASSERT(ph->ph_nmissing == 0); 971 972 XSIMPLEQ_FOREACH(pi, &ph->ph_items, pi_list) { 973 if (__predict_false(pi->pi_magic != POOL_IMAGIC(ph, pi))) { 974 panic("%s: %s free list modified: " 975 "page %p; item addr %p; offset 0x%x=0x%lx", 976 __func__, pp->pr_wchan, ph->ph_page, pi, 977 0, pi->pi_magic); 978 } 979 980 #ifdef DIAGNOSTIC 981 if (POOL_PHPOISON(ph)) { 982 size_t pidx; 983 uint32_t pval; 984 if (poison_check(pi + 1, pp->pr_size - sizeof(*pi), 985 &pidx, &pval)) { 986 int *ip = (int *)(pi + 1); 987 panic("%s: %s free list modified: " 988 "page %p; item addr %p; offset 0x%zx=0x%x", 989 __func__, pp->pr_wchan, ph->ph_page, pi, 990 pidx * sizeof(int), ip[pidx]); 991 } 992 } 993 #endif 994 } 995 996 pool_allocator_free(pp, ph->ph_page); 997 998 if (!POOL_INPGHDR(pp)) 999 pool_put(&phpool, ph); 1000 } 1001 1002 void 1003 pool_p_insert(struct pool *pp, struct pool_page_header *ph) 1004 { 1005 pl_assert_locked(pp, &pp->pr_lock); 1006 1007 /* If the pool was depleted, point at the new page */ 1008 if (pp->pr_curpage == NULL) 1009 pp->pr_curpage = ph; 1010 1011 TAILQ_INSERT_TAIL(&pp->pr_emptypages, ph, ph_entry); 1012 if (!POOL_INPGHDR(pp)) 1013 RBT_INSERT(phtree, &pp->pr_phtree, ph); 1014 1015 pp->pr_nitems += pp->pr_itemsperpage; 1016 pp->pr_nidle++; 1017 1018 pp->pr_npagealloc++; 1019 if (++pp->pr_npages > pp->pr_hiwat) 1020 pp->pr_hiwat = pp->pr_npages; 1021 } 1022 1023 void 1024 pool_p_remove(struct pool *pp, struct pool_page_header *ph) 1025 { 1026 pl_assert_locked(pp, &pp->pr_lock); 1027 1028 pp->pr_npagefree++; 1029 pp->pr_npages--; 1030 pp->pr_nidle--; 1031 pp->pr_nitems -= pp->pr_itemsperpage; 1032 1033 if (!POOL_INPGHDR(pp)) 1034 RBT_REMOVE(phtree, &pp->pr_phtree, ph); 1035 TAILQ_REMOVE(&pp->pr_emptypages, ph, ph_entry); 1036 1037 pool_update_curpage(pp); 1038 } 1039 1040 void 1041 pool_update_curpage(struct pool *pp) 1042 { 1043 pp->pr_curpage = TAILQ_LAST(&pp->pr_partpages, pool_pagelist); 1044 if (pp->pr_curpage == NULL) { 1045 pp->pr_curpage = TAILQ_LAST(&pp->pr_emptypages, pool_pagelist); 1046 } 1047 } 1048 1049 void 1050 pool_setlowat(struct pool *pp, int n) 1051 { 1052 int prime = 0; 1053 1054 pl_enter(pp, &pp->pr_lock); 1055 pp->pr_minitems = n; 1056 pp->pr_minpages = (n == 0) 1057 ? 0 1058 : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 1059 1060 if (pp->pr_nitems < n) 1061 prime = n - pp->pr_nitems; 1062 pl_leave(pp, &pp->pr_lock); 1063 1064 if (prime > 0) 1065 pool_prime(pp, prime); 1066 } 1067 1068 void 1069 pool_sethiwat(struct pool *pp, int n) 1070 { 1071 pp->pr_maxpages = (n == 0) 1072 ? 0 1073 : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 1074 } 1075 1076 int 1077 pool_sethardlimit(struct pool *pp, u_int n, const char *warnmsg, int ratecap) 1078 { 1079 int error = 0; 1080 1081 if (n < pp->pr_nout) { 1082 error = EINVAL; 1083 goto done; 1084 } 1085 1086 pp->pr_hardlimit = n; 1087 pp->pr_hardlimit_warning = warnmsg; 1088 pp->pr_hardlimit_ratecap.tv_sec = ratecap; 1089 pp->pr_hardlimit_warning_last.tv_sec = 0; 1090 pp->pr_hardlimit_warning_last.tv_usec = 0; 1091 1092 done: 1093 return (error); 1094 } 1095 1096 void 1097 pool_set_constraints(struct pool *pp, const struct kmem_pa_mode *mode) 1098 { 1099 pp->pr_crange = mode; 1100 } 1101 1102 /* 1103 * Release all complete pages that have not been used recently. 1104 * 1105 * Returns non-zero if any pages have been reclaimed. 1106 */ 1107 int 1108 pool_reclaim(struct pool *pp) 1109 { 1110 struct pool_page_header *ph, *phnext; 1111 struct pool_pagelist pl = TAILQ_HEAD_INITIALIZER(pl); 1112 1113 pl_enter(pp, &pp->pr_lock); 1114 for (ph = TAILQ_FIRST(&pp->pr_emptypages); ph != NULL; ph = phnext) { 1115 phnext = TAILQ_NEXT(ph, ph_entry); 1116 1117 /* Check our minimum page claim */ 1118 if (pp->pr_npages <= pp->pr_minpages) 1119 break; 1120 1121 /* 1122 * If freeing this page would put us below 1123 * the low water mark, stop now. 1124 */ 1125 if ((pp->pr_nitems - pp->pr_itemsperpage) < 1126 pp->pr_minitems) 1127 break; 1128 1129 pool_p_remove(pp, ph); 1130 TAILQ_INSERT_TAIL(&pl, ph, ph_entry); 1131 } 1132 pl_leave(pp, &pp->pr_lock); 1133 1134 if (TAILQ_EMPTY(&pl)) 1135 return (0); 1136 1137 while ((ph = TAILQ_FIRST(&pl)) != NULL) { 1138 TAILQ_REMOVE(&pl, ph, ph_entry); 1139 pool_p_free(pp, ph); 1140 } 1141 1142 return (1); 1143 } 1144 1145 /* 1146 * Release all complete pages that have not been used recently 1147 * from all pools. 1148 */ 1149 void 1150 pool_reclaim_all(void) 1151 { 1152 struct pool *pp; 1153 1154 rw_enter_read(&pool_lock); 1155 SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist) 1156 pool_reclaim(pp); 1157 rw_exit_read(&pool_lock); 1158 } 1159 1160 #ifdef DDB 1161 #include <machine/db_machdep.h> 1162 #include <ddb/db_output.h> 1163 1164 /* 1165 * Diagnostic helpers. 1166 */ 1167 void 1168 pool_printit(struct pool *pp, const char *modif, 1169 int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2)))) 1170 { 1171 pool_print1(pp, modif, pr); 1172 } 1173 1174 void 1175 pool_print_pagelist(struct pool_pagelist *pl, 1176 int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2)))) 1177 { 1178 struct pool_page_header *ph; 1179 struct pool_item *pi; 1180 1181 TAILQ_FOREACH(ph, pl, ph_entry) { 1182 (*pr)("\t\tpage %p, color %p, nmissing %d\n", 1183 ph->ph_page, ph->ph_colored, ph->ph_nmissing); 1184 XSIMPLEQ_FOREACH(pi, &ph->ph_items, pi_list) { 1185 if (pi->pi_magic != POOL_IMAGIC(ph, pi)) { 1186 (*pr)("\t\t\titem %p, magic 0x%lx\n", 1187 pi, pi->pi_magic); 1188 } 1189 } 1190 } 1191 } 1192 1193 void 1194 pool_print1(struct pool *pp, const char *modif, 1195 int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2)))) 1196 { 1197 struct pool_page_header *ph; 1198 int print_pagelist = 0; 1199 char c; 1200 1201 while ((c = *modif++) != '\0') { 1202 if (c == 'p') 1203 print_pagelist = 1; 1204 modif++; 1205 } 1206 1207 (*pr)("POOL %s: size %u maxcolors %u\n", pp->pr_wchan, pp->pr_size, 1208 pp->pr_maxcolors); 1209 (*pr)("\talloc %p\n", pp->pr_alloc); 1210 (*pr)("\tminitems %u, minpages %u, maxpages %u, npages %u\n", 1211 pp->pr_minitems, pp->pr_minpages, pp->pr_maxpages, pp->pr_npages); 1212 (*pr)("\titemsperpage %u, nitems %u, nout %u, hardlimit %u\n", 1213 pp->pr_itemsperpage, pp->pr_nitems, pp->pr_nout, pp->pr_hardlimit); 1214 1215 (*pr)("\n\tnget %lu, nfail %lu, nput %lu\n", 1216 pp->pr_nget, pp->pr_nfail, pp->pr_nput); 1217 (*pr)("\tnpagealloc %lu, npagefree %lu, hiwat %u, nidle %lu\n", 1218 pp->pr_npagealloc, pp->pr_npagefree, pp->pr_hiwat, pp->pr_nidle); 1219 1220 if (print_pagelist == 0) 1221 return; 1222 1223 if ((ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL) 1224 (*pr)("\n\tempty page list:\n"); 1225 pool_print_pagelist(&pp->pr_emptypages, pr); 1226 if ((ph = TAILQ_FIRST(&pp->pr_fullpages)) != NULL) 1227 (*pr)("\n\tfull page list:\n"); 1228 pool_print_pagelist(&pp->pr_fullpages, pr); 1229 if ((ph = TAILQ_FIRST(&pp->pr_partpages)) != NULL) 1230 (*pr)("\n\tpartial-page list:\n"); 1231 pool_print_pagelist(&pp->pr_partpages, pr); 1232 1233 if (pp->pr_curpage == NULL) 1234 (*pr)("\tno current page\n"); 1235 else 1236 (*pr)("\tcurpage %p\n", pp->pr_curpage->ph_page); 1237 } 1238 1239 void 1240 db_show_all_pools(db_expr_t expr, int haddr, db_expr_t count, char *modif) 1241 { 1242 struct pool *pp; 1243 char maxp[16]; 1244 int ovflw; 1245 char mode; 1246 1247 mode = modif[0]; 1248 if (mode != '\0' && mode != 'a') { 1249 db_printf("usage: show all pools [/a]\n"); 1250 return; 1251 } 1252 1253 if (mode == '\0') 1254 db_printf("%-10s%4s%9s%5s%9s%6s%6s%6s%6s%6s%6s%5s\n", 1255 "Name", 1256 "Size", 1257 "Requests", 1258 "Fail", 1259 "Releases", 1260 "Pgreq", 1261 "Pgrel", 1262 "Npage", 1263 "Hiwat", 1264 "Minpg", 1265 "Maxpg", 1266 "Idle"); 1267 else 1268 db_printf("%-12s %18s %18s\n", 1269 "Name", "Address", "Allocator"); 1270 1271 SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist) { 1272 if (mode == 'a') { 1273 db_printf("%-12s %18p %18p\n", pp->pr_wchan, pp, 1274 pp->pr_alloc); 1275 continue; 1276 } 1277 1278 if (!pp->pr_nget) 1279 continue; 1280 1281 if (pp->pr_maxpages == UINT_MAX) 1282 snprintf(maxp, sizeof maxp, "inf"); 1283 else 1284 snprintf(maxp, sizeof maxp, "%u", pp->pr_maxpages); 1285 1286 #define PRWORD(ovflw, fmt, width, fixed, val) do { \ 1287 (ovflw) += db_printf((fmt), \ 1288 (width) - (fixed) - (ovflw) > 0 ? \ 1289 (width) - (fixed) - (ovflw) : 0, \ 1290 (val)) - (width); \ 1291 if ((ovflw) < 0) \ 1292 (ovflw) = 0; \ 1293 } while (/* CONSTCOND */0) 1294 1295 ovflw = 0; 1296 PRWORD(ovflw, "%-*s", 10, 0, pp->pr_wchan); 1297 PRWORD(ovflw, " %*u", 4, 1, pp->pr_size); 1298 PRWORD(ovflw, " %*lu", 9, 1, pp->pr_nget); 1299 PRWORD(ovflw, " %*lu", 5, 1, pp->pr_nfail); 1300 PRWORD(ovflw, " %*lu", 9, 1, pp->pr_nput); 1301 PRWORD(ovflw, " %*lu", 6, 1, pp->pr_npagealloc); 1302 PRWORD(ovflw, " %*lu", 6, 1, pp->pr_npagefree); 1303 PRWORD(ovflw, " %*d", 6, 1, pp->pr_npages); 1304 PRWORD(ovflw, " %*d", 6, 1, pp->pr_hiwat); 1305 PRWORD(ovflw, " %*d", 6, 1, pp->pr_minpages); 1306 PRWORD(ovflw, " %*s", 6, 1, maxp); 1307 PRWORD(ovflw, " %*lu\n", 5, 1, pp->pr_nidle); 1308 1309 pool_chk(pp); 1310 } 1311 } 1312 #endif /* DDB */ 1313 1314 #if defined(POOL_DEBUG) || defined(DDB) 1315 int 1316 pool_chk_page(struct pool *pp, struct pool_page_header *ph, int expected) 1317 { 1318 struct pool_item *pi; 1319 caddr_t page; 1320 int n; 1321 const char *label = pp->pr_wchan; 1322 1323 page = (caddr_t)((u_long)ph & pp->pr_pgmask); 1324 if (page != ph->ph_page && POOL_INPGHDR(pp)) { 1325 printf("%s: ", label); 1326 printf("pool(%p:%s): page inconsistency: page %p; " 1327 "at page head addr %p (p %p)\n", 1328 pp, pp->pr_wchan, ph->ph_page, ph, page); 1329 return 1; 1330 } 1331 1332 for (pi = XSIMPLEQ_FIRST(&ph->ph_items), n = 0; 1333 pi != NULL; 1334 pi = XSIMPLEQ_NEXT(&ph->ph_items, pi, pi_list), n++) { 1335 if ((caddr_t)pi < ph->ph_page || 1336 (caddr_t)pi >= ph->ph_page + pp->pr_pgsize) { 1337 printf("%s: ", label); 1338 printf("pool(%p:%s): page inconsistency: page %p;" 1339 " item ordinal %d; addr %p\n", pp, 1340 pp->pr_wchan, ph->ph_page, n, pi); 1341 return (1); 1342 } 1343 1344 if (pi->pi_magic != POOL_IMAGIC(ph, pi)) { 1345 printf("%s: ", label); 1346 printf("pool(%p:%s): free list modified: " 1347 "page %p; item ordinal %d; addr %p " 1348 "(p %p); offset 0x%x=0x%lx\n", 1349 pp, pp->pr_wchan, ph->ph_page, n, pi, page, 1350 0, pi->pi_magic); 1351 } 1352 1353 #ifdef DIAGNOSTIC 1354 if (POOL_PHPOISON(ph)) { 1355 size_t pidx; 1356 uint32_t pval; 1357 if (poison_check(pi + 1, pp->pr_size - sizeof(*pi), 1358 &pidx, &pval)) { 1359 int *ip = (int *)(pi + 1); 1360 printf("pool(%s): free list modified: " 1361 "page %p; item ordinal %d; addr %p " 1362 "(p %p); offset 0x%zx=0x%x\n", 1363 pp->pr_wchan, ph->ph_page, n, pi, 1364 page, pidx * sizeof(int), ip[pidx]); 1365 } 1366 } 1367 #endif /* DIAGNOSTIC */ 1368 } 1369 if (n + ph->ph_nmissing != pp->pr_itemsperpage) { 1370 printf("pool(%p:%s): page inconsistency: page %p;" 1371 " %d on list, %d missing, %d items per page\n", pp, 1372 pp->pr_wchan, ph->ph_page, n, ph->ph_nmissing, 1373 pp->pr_itemsperpage); 1374 return 1; 1375 } 1376 if (expected >= 0 && n != expected) { 1377 printf("pool(%p:%s): page inconsistency: page %p;" 1378 " %d on list, %d missing, %d expected\n", pp, 1379 pp->pr_wchan, ph->ph_page, n, ph->ph_nmissing, 1380 expected); 1381 return 1; 1382 } 1383 return 0; 1384 } 1385 1386 int 1387 pool_chk(struct pool *pp) 1388 { 1389 struct pool_page_header *ph; 1390 int r = 0; 1391 1392 TAILQ_FOREACH(ph, &pp->pr_emptypages, ph_entry) 1393 r += pool_chk_page(pp, ph, pp->pr_itemsperpage); 1394 TAILQ_FOREACH(ph, &pp->pr_fullpages, ph_entry) 1395 r += pool_chk_page(pp, ph, 0); 1396 TAILQ_FOREACH(ph, &pp->pr_partpages, ph_entry) 1397 r += pool_chk_page(pp, ph, -1); 1398 1399 return (r); 1400 } 1401 #endif /* defined(POOL_DEBUG) || defined(DDB) */ 1402 1403 #ifdef DDB 1404 void 1405 pool_walk(struct pool *pp, int full, 1406 int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2))), 1407 void (*func)(void *, int, int (*)(const char *, ...) 1408 __attribute__((__format__(__kprintf__,1,2))))) 1409 { 1410 struct pool_page_header *ph; 1411 struct pool_item *pi; 1412 caddr_t cp; 1413 int n; 1414 1415 TAILQ_FOREACH(ph, &pp->pr_fullpages, ph_entry) { 1416 cp = ph->ph_colored; 1417 n = ph->ph_nmissing; 1418 1419 while (n--) { 1420 func(cp, full, pr); 1421 cp += pp->pr_size; 1422 } 1423 } 1424 1425 TAILQ_FOREACH(ph, &pp->pr_partpages, ph_entry) { 1426 cp = ph->ph_colored; 1427 n = ph->ph_nmissing; 1428 1429 do { 1430 XSIMPLEQ_FOREACH(pi, &ph->ph_items, pi_list) { 1431 if (cp == (caddr_t)pi) 1432 break; 1433 } 1434 if (cp != (caddr_t)pi) { 1435 func(cp, full, pr); 1436 n--; 1437 } 1438 1439 cp += pp->pr_size; 1440 } while (n > 0); 1441 } 1442 } 1443 #endif 1444 1445 /* 1446 * We have three different sysctls. 1447 * kern.pool.npools - the number of pools. 1448 * kern.pool.pool.<pool#> - the pool struct for the pool#. 1449 * kern.pool.name.<pool#> - the name for pool#. 1450 */ 1451 int 1452 sysctl_dopool(int *name, u_int namelen, char *oldp, size_t *oldlenp) 1453 { 1454 struct kinfo_pool pi; 1455 struct pool *pp; 1456 int rv = ENOENT; 1457 1458 switch (name[0]) { 1459 case KERN_POOL_NPOOLS: 1460 if (namelen != 1) 1461 return (ENOTDIR); 1462 return (sysctl_rdint(oldp, oldlenp, NULL, pool_count)); 1463 1464 case KERN_POOL_NAME: 1465 case KERN_POOL_POOL: 1466 case KERN_POOL_CACHE: 1467 case KERN_POOL_CACHE_CPUS: 1468 break; 1469 default: 1470 return (EOPNOTSUPP); 1471 } 1472 1473 if (namelen != 2) 1474 return (ENOTDIR); 1475 1476 rw_enter_read(&pool_lock); 1477 1478 SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist) { 1479 if (name[1] == pp->pr_serial) 1480 break; 1481 } 1482 1483 if (pp == NULL) 1484 goto done; 1485 1486 switch (name[0]) { 1487 case KERN_POOL_NAME: 1488 rv = sysctl_rdstring(oldp, oldlenp, NULL, pp->pr_wchan); 1489 break; 1490 case KERN_POOL_POOL: 1491 memset(&pi, 0, sizeof(pi)); 1492 1493 pl_enter(pp, &pp->pr_lock); 1494 pi.pr_size = pp->pr_size; 1495 pi.pr_pgsize = pp->pr_pgsize; 1496 pi.pr_itemsperpage = pp->pr_itemsperpage; 1497 pi.pr_npages = pp->pr_npages; 1498 pi.pr_minpages = pp->pr_minpages; 1499 pi.pr_maxpages = pp->pr_maxpages; 1500 pi.pr_hardlimit = pp->pr_hardlimit; 1501 pi.pr_nout = pp->pr_nout; 1502 pi.pr_nitems = pp->pr_nitems; 1503 pi.pr_nget = pp->pr_nget; 1504 pi.pr_nput = pp->pr_nput; 1505 pi.pr_nfail = pp->pr_nfail; 1506 pi.pr_npagealloc = pp->pr_npagealloc; 1507 pi.pr_npagefree = pp->pr_npagefree; 1508 pi.pr_hiwat = pp->pr_hiwat; 1509 pi.pr_nidle = pp->pr_nidle; 1510 pl_leave(pp, &pp->pr_lock); 1511 1512 pool_cache_pool_info(pp, &pi); 1513 1514 rv = sysctl_rdstruct(oldp, oldlenp, NULL, &pi, sizeof(pi)); 1515 break; 1516 1517 case KERN_POOL_CACHE: 1518 rv = pool_cache_info(pp, oldp, oldlenp); 1519 break; 1520 1521 case KERN_POOL_CACHE_CPUS: 1522 rv = pool_cache_cpus_info(pp, oldp, oldlenp); 1523 break; 1524 } 1525 1526 done: 1527 rw_exit_read(&pool_lock); 1528 1529 return (rv); 1530 } 1531 1532 void 1533 pool_gc_sched(void *null) 1534 { 1535 task_add(systqmp, &pool_gc_task); 1536 } 1537 1538 void 1539 pool_gc_pages(void *null) 1540 { 1541 struct pool *pp; 1542 struct pool_page_header *ph, *freeph; 1543 int s; 1544 1545 rw_enter_read(&pool_lock); 1546 s = splvm(); /* XXX go to splvm until all pools _setipl properly */ 1547 SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist) { 1548 #ifdef MULTIPROCESSOR 1549 if (pp->pr_cache != NULL) 1550 pool_cache_gc(pp); 1551 #endif 1552 1553 if (pp->pr_nidle <= pp->pr_minpages || /* guess */ 1554 !pl_enter_try(pp, &pp->pr_lock)) /* try */ 1555 continue; 1556 1557 /* is it time to free a page? */ 1558 if (pp->pr_nidle > pp->pr_minpages && 1559 (ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL && 1560 (ticks - ph->ph_tick) > (hz * pool_wait_gc)) { 1561 freeph = ph; 1562 pool_p_remove(pp, freeph); 1563 } else 1564 freeph = NULL; 1565 1566 pl_leave(pp, &pp->pr_lock); 1567 1568 if (freeph != NULL) 1569 pool_p_free(pp, freeph); 1570 } 1571 splx(s); 1572 rw_exit_read(&pool_lock); 1573 1574 timeout_add_sec(&pool_gc_tick, 1); 1575 } 1576 1577 /* 1578 * Pool backend allocators. 1579 */ 1580 1581 void * 1582 pool_allocator_alloc(struct pool *pp, int flags, int *slowdown) 1583 { 1584 void *v; 1585 1586 v = (*pp->pr_alloc->pa_alloc)(pp, flags, slowdown); 1587 1588 #ifdef DIAGNOSTIC 1589 if (v != NULL && POOL_INPGHDR(pp)) { 1590 vaddr_t addr = (vaddr_t)v; 1591 if ((addr & pp->pr_pgmask) != addr) { 1592 panic("%s: %s page address %p isnt aligned to %u", 1593 __func__, pp->pr_wchan, v, pp->pr_pgsize); 1594 } 1595 } 1596 #endif 1597 1598 return (v); 1599 } 1600 1601 void 1602 pool_allocator_free(struct pool *pp, void *v) 1603 { 1604 struct pool_allocator *pa = pp->pr_alloc; 1605 1606 (*pa->pa_free)(pp, v); 1607 } 1608 1609 void * 1610 pool_page_alloc(struct pool *pp, int flags, int *slowdown) 1611 { 1612 struct kmem_dyn_mode kd = KMEM_DYN_INITIALIZER; 1613 1614 kd.kd_waitok = ISSET(flags, PR_WAITOK); 1615 kd.kd_slowdown = slowdown; 1616 1617 return (km_alloc(pp->pr_pgsize, &kv_page, pp->pr_crange, &kd)); 1618 } 1619 1620 void 1621 pool_page_free(struct pool *pp, void *v) 1622 { 1623 km_free(v, pp->pr_pgsize, &kv_page, pp->pr_crange); 1624 } 1625 1626 void * 1627 pool_multi_alloc(struct pool *pp, int flags, int *slowdown) 1628 { 1629 struct kmem_va_mode kv = kv_intrsafe; 1630 struct kmem_dyn_mode kd = KMEM_DYN_INITIALIZER; 1631 void *v; 1632 int s; 1633 1634 if (POOL_INPGHDR(pp)) 1635 kv.kv_align = pp->pr_pgsize; 1636 1637 kd.kd_waitok = ISSET(flags, PR_WAITOK); 1638 kd.kd_slowdown = slowdown; 1639 1640 s = splvm(); 1641 v = km_alloc(pp->pr_pgsize, &kv, pp->pr_crange, &kd); 1642 splx(s); 1643 1644 return (v); 1645 } 1646 1647 void 1648 pool_multi_free(struct pool *pp, void *v) 1649 { 1650 struct kmem_va_mode kv = kv_intrsafe; 1651 int s; 1652 1653 if (POOL_INPGHDR(pp)) 1654 kv.kv_align = pp->pr_pgsize; 1655 1656 s = splvm(); 1657 km_free(v, pp->pr_pgsize, &kv, pp->pr_crange); 1658 splx(s); 1659 } 1660 1661 void * 1662 pool_multi_alloc_ni(struct pool *pp, int flags, int *slowdown) 1663 { 1664 struct kmem_va_mode kv = kv_any; 1665 struct kmem_dyn_mode kd = KMEM_DYN_INITIALIZER; 1666 void *v; 1667 1668 if (POOL_INPGHDR(pp)) 1669 kv.kv_align = pp->pr_pgsize; 1670 1671 kd.kd_waitok = ISSET(flags, PR_WAITOK); 1672 kd.kd_slowdown = slowdown; 1673 1674 KERNEL_LOCK(); 1675 v = km_alloc(pp->pr_pgsize, &kv, pp->pr_crange, &kd); 1676 KERNEL_UNLOCK(); 1677 1678 return (v); 1679 } 1680 1681 void 1682 pool_multi_free_ni(struct pool *pp, void *v) 1683 { 1684 struct kmem_va_mode kv = kv_any; 1685 1686 if (POOL_INPGHDR(pp)) 1687 kv.kv_align = pp->pr_pgsize; 1688 1689 KERNEL_LOCK(); 1690 km_free(v, pp->pr_pgsize, &kv, pp->pr_crange); 1691 KERNEL_UNLOCK(); 1692 } 1693 1694 #ifdef MULTIPROCESSOR 1695 1696 struct pool pool_caches; /* per cpu cache entries */ 1697 1698 void 1699 pool_cache_init(struct pool *pp) 1700 { 1701 struct cpumem *cm; 1702 struct pool_cache *pc; 1703 struct cpumem_iter i; 1704 1705 if (pool_caches.pr_size == 0) { 1706 pool_init(&pool_caches, sizeof(struct pool_cache), 1707 CACHELINESIZE, IPL_NONE, PR_WAITOK | PR_RWLOCK, 1708 "plcache", NULL); 1709 } 1710 1711 /* must be able to use the pool items as cache list items */ 1712 KASSERT(pp->pr_size >= sizeof(struct pool_cache_item)); 1713 1714 cm = cpumem_get(&pool_caches); 1715 1716 pl_init(pp, &pp->pr_cache_lock); 1717 arc4random_buf(pp->pr_cache_magic, sizeof(pp->pr_cache_magic)); 1718 TAILQ_INIT(&pp->pr_cache_lists); 1719 pp->pr_cache_nitems = 0; 1720 pp->pr_cache_tick = ticks; 1721 pp->pr_cache_items = 8; 1722 pp->pr_cache_contention = 0; 1723 pp->pr_cache_ngc = 0; 1724 1725 CPUMEM_FOREACH(pc, &i, cm) { 1726 pc->pc_actv = NULL; 1727 pc->pc_nactv = 0; 1728 pc->pc_prev = NULL; 1729 1730 pc->pc_nget = 0; 1731 pc->pc_nfail = 0; 1732 pc->pc_nput = 0; 1733 pc->pc_nlget = 0; 1734 pc->pc_nlfail = 0; 1735 pc->pc_nlput = 0; 1736 pc->pc_nout = 0; 1737 } 1738 1739 membar_producer(); 1740 1741 pp->pr_cache = cm; 1742 } 1743 1744 static inline void 1745 pool_cache_item_magic(struct pool *pp, struct pool_cache_item *ci) 1746 { 1747 unsigned long *entry = (unsigned long *)&ci->ci_nextl; 1748 1749 entry[0] = pp->pr_cache_magic[0] ^ (u_long)ci; 1750 entry[1] = pp->pr_cache_magic[1] ^ (u_long)ci->ci_next; 1751 } 1752 1753 static inline void 1754 pool_cache_item_magic_check(struct pool *pp, struct pool_cache_item *ci) 1755 { 1756 unsigned long *entry; 1757 unsigned long val; 1758 1759 entry = (unsigned long *)&ci->ci_nextl; 1760 val = pp->pr_cache_magic[0] ^ (u_long)ci; 1761 if (*entry != val) 1762 goto fail; 1763 1764 entry++; 1765 val = pp->pr_cache_magic[1] ^ (u_long)ci->ci_next; 1766 if (*entry != val) 1767 goto fail; 1768 1769 return; 1770 1771 fail: 1772 panic("%s: %s cpu free list modified: item addr %p+%zu 0x%lx!=0x%lx", 1773 __func__, pp->pr_wchan, ci, (caddr_t)entry - (caddr_t)ci, 1774 *entry, val); 1775 } 1776 1777 static inline void 1778 pool_list_enter(struct pool *pp) 1779 { 1780 if (pl_enter_try(pp, &pp->pr_cache_lock) == 0) { 1781 pl_enter(pp, &pp->pr_cache_lock); 1782 pp->pr_cache_contention++; 1783 } 1784 } 1785 1786 static inline void 1787 pool_list_leave(struct pool *pp) 1788 { 1789 pl_leave(pp, &pp->pr_cache_lock); 1790 } 1791 1792 static inline struct pool_cache_item * 1793 pool_cache_list_alloc(struct pool *pp, struct pool_cache *pc) 1794 { 1795 struct pool_cache_item *pl; 1796 1797 pool_list_enter(pp); 1798 pl = TAILQ_FIRST(&pp->pr_cache_lists); 1799 if (pl != NULL) { 1800 TAILQ_REMOVE(&pp->pr_cache_lists, pl, ci_nextl); 1801 pp->pr_cache_nitems -= POOL_CACHE_ITEM_NITEMS(pl); 1802 1803 pool_cache_item_magic(pp, pl); 1804 1805 pc->pc_nlget++; 1806 } else 1807 pc->pc_nlfail++; 1808 1809 /* fold this cpus nout into the global while we have the lock */ 1810 pp->pr_cache_nout += pc->pc_nout; 1811 pc->pc_nout = 0; 1812 pool_list_leave(pp); 1813 1814 return (pl); 1815 } 1816 1817 static inline void 1818 pool_cache_list_free(struct pool *pp, struct pool_cache *pc, 1819 struct pool_cache_item *ci) 1820 { 1821 pool_list_enter(pp); 1822 if (TAILQ_EMPTY(&pp->pr_cache_lists)) 1823 pp->pr_cache_tick = ticks; 1824 1825 pp->pr_cache_nitems += POOL_CACHE_ITEM_NITEMS(ci); 1826 TAILQ_INSERT_TAIL(&pp->pr_cache_lists, ci, ci_nextl); 1827 1828 pc->pc_nlput++; 1829 1830 /* fold this cpus nout into the global while we have the lock */ 1831 pp->pr_cache_nout += pc->pc_nout; 1832 pc->pc_nout = 0; 1833 pool_list_leave(pp); 1834 } 1835 1836 static inline struct pool_cache * 1837 pool_cache_enter(struct pool *pp, int *s) 1838 { 1839 struct pool_cache *pc; 1840 1841 pc = cpumem_enter(pp->pr_cache); 1842 *s = splraise(pp->pr_ipl); 1843 pc->pc_gen++; 1844 1845 return (pc); 1846 } 1847 1848 static inline void 1849 pool_cache_leave(struct pool *pp, struct pool_cache *pc, int s) 1850 { 1851 pc->pc_gen++; 1852 splx(s); 1853 cpumem_leave(pp->pr_cache, pc); 1854 } 1855 1856 void * 1857 pool_cache_get(struct pool *pp) 1858 { 1859 struct pool_cache *pc; 1860 struct pool_cache_item *ci; 1861 int s; 1862 1863 pc = pool_cache_enter(pp, &s); 1864 1865 if (pc->pc_actv != NULL) { 1866 ci = pc->pc_actv; 1867 } else if (pc->pc_prev != NULL) { 1868 ci = pc->pc_prev; 1869 pc->pc_prev = NULL; 1870 } else if ((ci = pool_cache_list_alloc(pp, pc)) == NULL) { 1871 pc->pc_nfail++; 1872 goto done; 1873 } 1874 1875 pool_cache_item_magic_check(pp, ci); 1876 #ifdef DIAGNOSTIC 1877 if (pool_debug && POOL_CACHE_ITEM_POISONED(ci)) { 1878 size_t pidx; 1879 uint32_t pval; 1880 1881 if (poison_check(ci + 1, pp->pr_size - sizeof(*ci), 1882 &pidx, &pval)) { 1883 int *ip = (int *)(ci + 1); 1884 ip += pidx; 1885 1886 panic("%s: %s cpu free list modified: " 1887 "item addr %p+%zu 0x%x!=0x%x", 1888 __func__, pp->pr_wchan, ci, 1889 (caddr_t)ip - (caddr_t)ci, *ip, pval); 1890 } 1891 } 1892 #endif 1893 1894 pc->pc_actv = ci->ci_next; 1895 pc->pc_nactv = POOL_CACHE_ITEM_NITEMS(ci) - 1; 1896 pc->pc_nget++; 1897 pc->pc_nout++; 1898 1899 done: 1900 pool_cache_leave(pp, pc, s); 1901 1902 return (ci); 1903 } 1904 1905 void 1906 pool_cache_put(struct pool *pp, void *v) 1907 { 1908 struct pool_cache *pc; 1909 struct pool_cache_item *ci = v; 1910 unsigned long nitems; 1911 int s; 1912 #ifdef DIAGNOSTIC 1913 int poison = pool_debug && pp->pr_size > sizeof(*ci); 1914 1915 if (poison) 1916 poison_mem(ci + 1, pp->pr_size - sizeof(*ci)); 1917 #endif 1918 1919 pc = pool_cache_enter(pp, &s); 1920 1921 nitems = pc->pc_nactv; 1922 if (nitems >= pp->pr_cache_items) { 1923 if (pc->pc_prev != NULL) 1924 pool_cache_list_free(pp, pc, pc->pc_prev); 1925 1926 pc->pc_prev = pc->pc_actv; 1927 1928 pc->pc_actv = NULL; 1929 pc->pc_nactv = 0; 1930 nitems = 0; 1931 } 1932 1933 ci->ci_next = pc->pc_actv; 1934 ci->ci_nitems = ++nitems; 1935 #ifdef DIAGNOSTIC 1936 ci->ci_nitems |= poison ? POOL_CACHE_ITEM_NITEMS_POISON : 0; 1937 #endif 1938 pool_cache_item_magic(pp, ci); 1939 1940 pc->pc_actv = ci; 1941 pc->pc_nactv = nitems; 1942 1943 pc->pc_nput++; 1944 pc->pc_nout--; 1945 1946 pool_cache_leave(pp, pc, s); 1947 } 1948 1949 struct pool_cache_item * 1950 pool_cache_list_put(struct pool *pp, struct pool_cache_item *pl) 1951 { 1952 struct pool_cache_item *rpl, *next; 1953 1954 if (pl == NULL) 1955 return (NULL); 1956 1957 rpl = TAILQ_NEXT(pl, ci_nextl); 1958 1959 pl_enter(pp, &pp->pr_lock); 1960 do { 1961 next = pl->ci_next; 1962 pool_do_put(pp, pl); 1963 pl = next; 1964 } while (pl != NULL); 1965 pl_leave(pp, &pp->pr_lock); 1966 1967 return (rpl); 1968 } 1969 1970 void 1971 pool_cache_destroy(struct pool *pp) 1972 { 1973 struct pool_cache *pc; 1974 struct pool_cache_item *pl; 1975 struct cpumem_iter i; 1976 struct cpumem *cm; 1977 1978 rw_enter_write(&pool_lock); /* serialise with the gc */ 1979 cm = pp->pr_cache; 1980 pp->pr_cache = NULL; /* make pool_put avoid the cache */ 1981 rw_exit_write(&pool_lock); 1982 1983 CPUMEM_FOREACH(pc, &i, cm) { 1984 pool_cache_list_put(pp, pc->pc_actv); 1985 pool_cache_list_put(pp, pc->pc_prev); 1986 } 1987 1988 cpumem_put(&pool_caches, cm); 1989 1990 pl = TAILQ_FIRST(&pp->pr_cache_lists); 1991 while (pl != NULL) 1992 pl = pool_cache_list_put(pp, pl); 1993 } 1994 1995 void 1996 pool_cache_gc(struct pool *pp) 1997 { 1998 unsigned int contention, delta; 1999 2000 if ((ticks - pp->pr_cache_tick) > (hz * pool_wait_gc) && 2001 !TAILQ_EMPTY(&pp->pr_cache_lists) && 2002 pl_enter_try(pp, &pp->pr_cache_lock)) { 2003 struct pool_cache_item *pl = NULL; 2004 2005 pl = TAILQ_FIRST(&pp->pr_cache_lists); 2006 if (pl != NULL) { 2007 TAILQ_REMOVE(&pp->pr_cache_lists, pl, ci_nextl); 2008 pp->pr_cache_nitems -= POOL_CACHE_ITEM_NITEMS(pl); 2009 pp->pr_cache_tick = ticks; 2010 2011 pp->pr_cache_ngc++; 2012 } 2013 2014 pl_leave(pp, &pp->pr_cache_lock); 2015 2016 pool_cache_list_put(pp, pl); 2017 } 2018 2019 /* 2020 * if there's a lot of contention on the pr_cache_mtx then consider 2021 * growing the length of the list to reduce the need to access the 2022 * global pool. 2023 */ 2024 2025 contention = pp->pr_cache_contention; 2026 delta = contention - pp->pr_cache_contention_prev; 2027 if (delta > 8 /* magic */) { 2028 if ((ncpusfound * 8 * 2) <= pp->pr_cache_nitems) 2029 pp->pr_cache_items += 8; 2030 } else if (delta == 0) { 2031 if (pp->pr_cache_items > 8) 2032 pp->pr_cache_items--; 2033 } 2034 pp->pr_cache_contention_prev = contention; 2035 } 2036 2037 void 2038 pool_cache_pool_info(struct pool *pp, struct kinfo_pool *pi) 2039 { 2040 struct pool_cache *pc; 2041 struct cpumem_iter i; 2042 2043 if (pp->pr_cache == NULL) 2044 return; 2045 2046 /* loop through the caches twice to collect stats */ 2047 2048 /* once without the lock so we can yield while reading nget/nput */ 2049 CPUMEM_FOREACH(pc, &i, pp->pr_cache) { 2050 uint64_t gen, nget, nput; 2051 2052 do { 2053 while ((gen = pc->pc_gen) & 1) 2054 yield(); 2055 2056 nget = pc->pc_nget; 2057 nput = pc->pc_nput; 2058 } while (gen != pc->pc_gen); 2059 2060 pi->pr_nget += nget; 2061 pi->pr_nput += nput; 2062 } 2063 2064 /* and once with the mtx so we can get consistent nout values */ 2065 pl_enter(pp, &pp->pr_cache_lock); 2066 CPUMEM_FOREACH(pc, &i, pp->pr_cache) 2067 pi->pr_nout += pc->pc_nout; 2068 2069 pi->pr_nout += pp->pr_cache_nout; 2070 pl_leave(pp, &pp->pr_cache_lock); 2071 } 2072 2073 int 2074 pool_cache_info(struct pool *pp, void *oldp, size_t *oldlenp) 2075 { 2076 struct kinfo_pool_cache kpc; 2077 2078 if (pp->pr_cache == NULL) 2079 return (EOPNOTSUPP); 2080 2081 memset(&kpc, 0, sizeof(kpc)); /* don't leak padding */ 2082 2083 pl_enter(pp, &pp->pr_cache_lock); 2084 kpc.pr_ngc = pp->pr_cache_ngc; 2085 kpc.pr_len = pp->pr_cache_items; 2086 kpc.pr_nitems = pp->pr_cache_nitems; 2087 kpc.pr_contention = pp->pr_cache_contention; 2088 pl_leave(pp, &pp->pr_cache_lock); 2089 2090 return (sysctl_rdstruct(oldp, oldlenp, NULL, &kpc, sizeof(kpc))); 2091 } 2092 2093 int 2094 pool_cache_cpus_info(struct pool *pp, void *oldp, size_t *oldlenp) 2095 { 2096 struct pool_cache *pc; 2097 struct kinfo_pool_cache_cpu *kpcc, *info; 2098 unsigned int cpu = 0; 2099 struct cpumem_iter i; 2100 int error = 0; 2101 size_t len; 2102 2103 if (pp->pr_cache == NULL) 2104 return (EOPNOTSUPP); 2105 if (*oldlenp % sizeof(*kpcc)) 2106 return (EINVAL); 2107 2108 kpcc = mallocarray(ncpusfound, sizeof(*kpcc), M_TEMP, 2109 M_WAITOK|M_CANFAIL|M_ZERO); 2110 if (kpcc == NULL) 2111 return (EIO); 2112 2113 len = ncpusfound * sizeof(*kpcc); 2114 2115 CPUMEM_FOREACH(pc, &i, pp->pr_cache) { 2116 uint64_t gen; 2117 2118 if (cpu >= ncpusfound) { 2119 error = EIO; 2120 goto err; 2121 } 2122 2123 info = &kpcc[cpu]; 2124 info->pr_cpu = cpu; 2125 2126 do { 2127 while ((gen = pc->pc_gen) & 1) 2128 yield(); 2129 2130 info->pr_nget = pc->pc_nget; 2131 info->pr_nfail = pc->pc_nfail; 2132 info->pr_nput = pc->pc_nput; 2133 info->pr_nlget = pc->pc_nlget; 2134 info->pr_nlfail = pc->pc_nlfail; 2135 info->pr_nlput = pc->pc_nlput; 2136 } while (gen != pc->pc_gen); 2137 2138 cpu++; 2139 } 2140 2141 error = sysctl_rdstruct(oldp, oldlenp, NULL, kpcc, len); 2142 err: 2143 free(kpcc, M_TEMP, len); 2144 2145 return (error); 2146 } 2147 #else /* MULTIPROCESSOR */ 2148 void 2149 pool_cache_init(struct pool *pp) 2150 { 2151 /* nop */ 2152 } 2153 2154 void 2155 pool_cache_pool_info(struct pool *pp, struct kinfo_pool *pi) 2156 { 2157 /* nop */ 2158 } 2159 2160 int 2161 pool_cache_info(struct pool *pp, void *oldp, size_t *oldlenp) 2162 { 2163 return (EOPNOTSUPP); 2164 } 2165 2166 int 2167 pool_cache_cpus_info(struct pool *pp, void *oldp, size_t *oldlenp) 2168 { 2169 return (EOPNOTSUPP); 2170 } 2171 #endif /* MULTIPROCESSOR */ 2172 2173 2174 void 2175 pool_lock_mtx_init(struct pool *pp, union pool_lock *lock, 2176 struct lock_type *type) 2177 { 2178 _mtx_init_flags(&lock->prl_mtx, pp->pr_ipl, pp->pr_wchan, 0, type); 2179 } 2180 2181 void 2182 pool_lock_mtx_enter(union pool_lock *lock LOCK_FL_VARS) 2183 { 2184 _mtx_enter(&lock->prl_mtx LOCK_FL_ARGS); 2185 } 2186 2187 int 2188 pool_lock_mtx_enter_try(union pool_lock *lock LOCK_FL_VARS) 2189 { 2190 return (_mtx_enter_try(&lock->prl_mtx LOCK_FL_ARGS)); 2191 } 2192 2193 void 2194 pool_lock_mtx_leave(union pool_lock *lock LOCK_FL_VARS) 2195 { 2196 _mtx_leave(&lock->prl_mtx LOCK_FL_ARGS); 2197 } 2198 2199 void 2200 pool_lock_mtx_assert_locked(union pool_lock *lock) 2201 { 2202 MUTEX_ASSERT_LOCKED(&lock->prl_mtx); 2203 } 2204 2205 void 2206 pool_lock_mtx_assert_unlocked(union pool_lock *lock) 2207 { 2208 MUTEX_ASSERT_UNLOCKED(&lock->prl_mtx); 2209 } 2210 2211 int 2212 pool_lock_mtx_sleep(void *ident, union pool_lock *lock, int priority, 2213 const char *wmesg, int timo) 2214 { 2215 return msleep(ident, &lock->prl_mtx, priority, wmesg, timo); 2216 } 2217 2218 static const struct pool_lock_ops pool_lock_ops_mtx = { 2219 pool_lock_mtx_init, 2220 pool_lock_mtx_enter, 2221 pool_lock_mtx_enter_try, 2222 pool_lock_mtx_leave, 2223 pool_lock_mtx_assert_locked, 2224 pool_lock_mtx_assert_unlocked, 2225 pool_lock_mtx_sleep, 2226 }; 2227 2228 void 2229 pool_lock_rw_init(struct pool *pp, union pool_lock *lock, 2230 struct lock_type *type) 2231 { 2232 _rw_init_flags(&lock->prl_rwlock, pp->pr_wchan, 0, type); 2233 } 2234 2235 void 2236 pool_lock_rw_enter(union pool_lock *lock LOCK_FL_VARS) 2237 { 2238 _rw_enter_write(&lock->prl_rwlock LOCK_FL_ARGS); 2239 } 2240 2241 int 2242 pool_lock_rw_enter_try(union pool_lock *lock LOCK_FL_VARS) 2243 { 2244 return (_rw_enter(&lock->prl_rwlock, RW_WRITE | RW_NOSLEEP 2245 LOCK_FL_ARGS) == 0); 2246 } 2247 2248 void 2249 pool_lock_rw_leave(union pool_lock *lock LOCK_FL_VARS) 2250 { 2251 _rw_exit_write(&lock->prl_rwlock LOCK_FL_ARGS); 2252 } 2253 2254 void 2255 pool_lock_rw_assert_locked(union pool_lock *lock) 2256 { 2257 rw_assert_wrlock(&lock->prl_rwlock); 2258 } 2259 2260 void 2261 pool_lock_rw_assert_unlocked(union pool_lock *lock) 2262 { 2263 KASSERT(rw_status(&lock->prl_rwlock) != RW_WRITE); 2264 } 2265 2266 int 2267 pool_lock_rw_sleep(void *ident, union pool_lock *lock, int priority, 2268 const char *wmesg, int timo) 2269 { 2270 return rwsleep(ident, &lock->prl_rwlock, priority, wmesg, timo); 2271 } 2272 2273 static const struct pool_lock_ops pool_lock_ops_rw = { 2274 pool_lock_rw_init, 2275 pool_lock_rw_enter, 2276 pool_lock_rw_enter_try, 2277 pool_lock_rw_leave, 2278 pool_lock_rw_assert_locked, 2279 pool_lock_rw_assert_unlocked, 2280 pool_lock_rw_sleep, 2281 }; 2282