1 /* $OpenBSD: subr_pool.c,v 1.217 2017/06/23 01:21:55 dlg Exp $ */ 2 /* $NetBSD: subr_pool.c,v 1.61 2001/09/26 07:14:56 chs Exp $ */ 3 4 /*- 5 * Copyright (c) 1997, 1999, 2000 The NetBSD Foundation, Inc. 6 * All rights reserved. 7 * 8 * This code is derived from software contributed to The NetBSD Foundation 9 * by Paul Kranenburg; by Jason R. Thorpe of the Numerical Aerospace 10 * Simulation Facility, NASA Ames Research Center. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 23 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 24 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 25 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31 * POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include <sys/param.h> 35 #include <sys/systm.h> 36 #include <sys/errno.h> 37 #include <sys/kernel.h> 38 #include <sys/malloc.h> 39 #include <sys/pool.h> 40 #include <sys/syslog.h> 41 #include <sys/rwlock.h> 42 #include <sys/sysctl.h> 43 #include <sys/task.h> 44 #include <sys/timeout.h> 45 #include <sys/percpu.h> 46 47 #include <uvm/uvm_extern.h> 48 49 /* 50 * Pool resource management utility. 51 * 52 * Memory is allocated in pages which are split into pieces according to 53 * the pool item size. Each page is kept on one of three lists in the 54 * pool structure: `pr_emptypages', `pr_fullpages' and `pr_partpages', 55 * for empty, full and partially-full pages respectively. The individual 56 * pool items are on a linked list headed by `ph_items' in each page 57 * header. The memory for building the page list is either taken from 58 * the allocated pages themselves (for small pool items) or taken from 59 * an internal pool of page headers (`phpool'). 60 */ 61 62 /* List of all pools */ 63 SIMPLEQ_HEAD(,pool) pool_head = SIMPLEQ_HEAD_INITIALIZER(pool_head); 64 65 /* 66 * Every pool gets a unique serial number assigned to it. If this counter 67 * wraps, we're screwed, but we shouldn't create so many pools anyway. 68 */ 69 unsigned int pool_serial; 70 unsigned int pool_count; 71 72 /* Lock the previous variables making up the global pool state */ 73 struct rwlock pool_lock = RWLOCK_INITIALIZER("pools"); 74 75 /* Private pool for page header structures */ 76 struct pool phpool; 77 78 struct pool_item { 79 u_long pi_magic; 80 XSIMPLEQ_ENTRY(pool_item) pi_list; 81 }; 82 #define POOL_IMAGIC(ph, pi) ((u_long)(pi) ^ (ph)->ph_magic) 83 84 struct pool_page_header { 85 /* Page headers */ 86 TAILQ_ENTRY(pool_page_header) 87 ph_entry; /* pool page list */ 88 XSIMPLEQ_HEAD(, pool_item) 89 ph_items; /* free items on the page */ 90 RBT_ENTRY(pool_page_header) 91 ph_node; /* off-page page headers */ 92 unsigned int ph_nmissing; /* # of chunks in use */ 93 caddr_t ph_page; /* this page's address */ 94 caddr_t ph_colored; /* page's colored address */ 95 unsigned long ph_magic; 96 int ph_tick; 97 }; 98 #define POOL_MAGICBIT (1 << 3) /* keep away from perturbed low bits */ 99 #define POOL_PHPOISON(ph) ISSET((ph)->ph_magic, POOL_MAGICBIT) 100 101 #ifdef MULTIPROCESSOR 102 struct pool_cache_item { 103 struct pool_cache_item *ci_next; /* next item in list */ 104 unsigned long ci_nitems; /* number of items in list */ 105 TAILQ_ENTRY(pool_cache_item) 106 ci_nextl; /* entry in list of lists */ 107 }; 108 109 /* we store whether the cached item is poisoned in the high bit of nitems */ 110 #define POOL_CACHE_ITEM_NITEMS_MASK 0x7ffffffUL 111 #define POOL_CACHE_ITEM_NITEMS_POISON 0x8000000UL 112 113 #define POOL_CACHE_ITEM_NITEMS(_ci) \ 114 ((_ci)->ci_nitems & POOL_CACHE_ITEM_NITEMS_MASK) 115 116 #define POOL_CACHE_ITEM_POISONED(_ci) \ 117 ISSET((_ci)->ci_nitems, POOL_CACHE_ITEM_NITEMS_POISON) 118 119 struct pool_cache { 120 struct pool_cache_item *pc_actv; /* active list of items */ 121 unsigned long pc_nactv; /* actv head nitems cache */ 122 struct pool_cache_item *pc_prev; /* previous list of items */ 123 124 uint64_t pc_gen; /* generation number */ 125 uint64_t pc_nget; /* # of successful requests */ 126 uint64_t pc_nfail; /* # of unsuccessful reqs */ 127 uint64_t pc_nput; /* # of releases */ 128 uint64_t pc_nlget; /* # of list requests */ 129 uint64_t pc_nlfail; /* # of fails getting a list */ 130 uint64_t pc_nlput; /* # of list releases */ 131 132 int pc_nout; 133 }; 134 135 void *pool_cache_get(struct pool *); 136 void pool_cache_put(struct pool *, void *); 137 void pool_cache_destroy(struct pool *); 138 void pool_cache_gc(struct pool *); 139 #endif 140 void pool_cache_pool_info(struct pool *, struct kinfo_pool *); 141 int pool_cache_info(struct pool *, void *, size_t *); 142 int pool_cache_cpus_info(struct pool *, void *, size_t *); 143 144 #ifdef POOL_DEBUG 145 int pool_debug = 1; 146 #else 147 int pool_debug = 0; 148 #endif 149 150 #define POOL_INPGHDR(pp) ((pp)->pr_phoffset != 0) 151 152 struct pool_page_header * 153 pool_p_alloc(struct pool *, int, int *); 154 void pool_p_insert(struct pool *, struct pool_page_header *); 155 void pool_p_remove(struct pool *, struct pool_page_header *); 156 void pool_p_free(struct pool *, struct pool_page_header *); 157 158 void pool_update_curpage(struct pool *); 159 void *pool_do_get(struct pool *, int, int *); 160 void pool_do_put(struct pool *, void *); 161 int pool_chk_page(struct pool *, struct pool_page_header *, int); 162 int pool_chk(struct pool *); 163 void pool_get_done(void *, void *); 164 void pool_runqueue(struct pool *, int); 165 166 void *pool_allocator_alloc(struct pool *, int, int *); 167 void pool_allocator_free(struct pool *, void *); 168 169 /* 170 * The default pool allocator. 171 */ 172 void *pool_page_alloc(struct pool *, int, int *); 173 void pool_page_free(struct pool *, void *); 174 175 /* 176 * safe for interrupts; this is the default allocator 177 */ 178 struct pool_allocator pool_allocator_single = { 179 pool_page_alloc, 180 pool_page_free, 181 POOL_ALLOC_SIZE(PAGE_SIZE, POOL_ALLOC_ALIGNED) 182 }; 183 184 void *pool_multi_alloc(struct pool *, int, int *); 185 void pool_multi_free(struct pool *, void *); 186 187 struct pool_allocator pool_allocator_multi = { 188 pool_multi_alloc, 189 pool_multi_free, 190 POOL_ALLOC_SIZES(PAGE_SIZE, (1UL << 31), POOL_ALLOC_ALIGNED) 191 }; 192 193 void *pool_multi_alloc_ni(struct pool *, int, int *); 194 void pool_multi_free_ni(struct pool *, void *); 195 196 struct pool_allocator pool_allocator_multi_ni = { 197 pool_multi_alloc_ni, 198 pool_multi_free_ni, 199 POOL_ALLOC_SIZES(PAGE_SIZE, (1UL << 31), POOL_ALLOC_ALIGNED) 200 }; 201 202 #ifdef DDB 203 void pool_print_pagelist(struct pool_pagelist *, int (*)(const char *, ...) 204 __attribute__((__format__(__kprintf__,1,2)))); 205 void pool_print1(struct pool *, const char *, int (*)(const char *, ...) 206 __attribute__((__format__(__kprintf__,1,2)))); 207 #endif 208 209 /* stale page garbage collectors */ 210 void pool_gc_sched(void *); 211 struct timeout pool_gc_tick = TIMEOUT_INITIALIZER(pool_gc_sched, NULL); 212 void pool_gc_pages(void *); 213 struct task pool_gc_task = TASK_INITIALIZER(pool_gc_pages, NULL); 214 int pool_wait_free = 1; 215 int pool_wait_gc = 8; 216 217 RBT_PROTOTYPE(phtree, pool_page_header, ph_node, phtree_compare); 218 219 static inline int 220 phtree_compare(const struct pool_page_header *a, 221 const struct pool_page_header *b) 222 { 223 vaddr_t va = (vaddr_t)a->ph_page; 224 vaddr_t vb = (vaddr_t)b->ph_page; 225 226 /* the compares in this order are important for the NFIND to work */ 227 if (vb < va) 228 return (-1); 229 if (vb > va) 230 return (1); 231 232 return (0); 233 } 234 235 RBT_GENERATE(phtree, pool_page_header, ph_node, phtree_compare); 236 237 /* 238 * Return the pool page header based on page address. 239 */ 240 static inline struct pool_page_header * 241 pr_find_pagehead(struct pool *pp, void *v) 242 { 243 struct pool_page_header *ph, key; 244 245 if (POOL_INPGHDR(pp)) { 246 caddr_t page; 247 248 page = (caddr_t)((vaddr_t)v & pp->pr_pgmask); 249 250 return ((struct pool_page_header *)(page + pp->pr_phoffset)); 251 } 252 253 key.ph_page = v; 254 ph = RBT_NFIND(phtree, &pp->pr_phtree, &key); 255 if (ph == NULL) 256 panic("%s: %s: page header missing", __func__, pp->pr_wchan); 257 258 KASSERT(ph->ph_page <= (caddr_t)v); 259 if (ph->ph_page + pp->pr_pgsize <= (caddr_t)v) 260 panic("%s: %s: incorrect page", __func__, pp->pr_wchan); 261 262 return (ph); 263 } 264 265 /* 266 * Initialize the given pool resource structure. 267 * 268 * We export this routine to allow other kernel parts to declare 269 * static pools that must be initialized before malloc() is available. 270 */ 271 void 272 pool_init(struct pool *pp, size_t size, u_int align, int ipl, int flags, 273 const char *wchan, struct pool_allocator *palloc) 274 { 275 int off = 0, space; 276 unsigned int pgsize = PAGE_SIZE, items; 277 size_t pa_pagesz; 278 #ifdef DIAGNOSTIC 279 struct pool *iter; 280 #endif 281 282 if (align == 0) 283 align = ALIGN(1); 284 285 if (size < sizeof(struct pool_item)) 286 size = sizeof(struct pool_item); 287 288 size = roundup(size, align); 289 290 while (size * 8 > pgsize) 291 pgsize <<= 1; 292 293 if (palloc == NULL) { 294 if (pgsize > PAGE_SIZE) { 295 palloc = ISSET(flags, PR_WAITOK) ? 296 &pool_allocator_multi_ni : &pool_allocator_multi; 297 } else 298 palloc = &pool_allocator_single; 299 300 pa_pagesz = palloc->pa_pagesz; 301 } else { 302 size_t pgsizes; 303 304 pa_pagesz = palloc->pa_pagesz; 305 if (pa_pagesz == 0) 306 pa_pagesz = POOL_ALLOC_DEFAULT; 307 308 pgsizes = pa_pagesz & ~POOL_ALLOC_ALIGNED; 309 310 /* make sure the allocator can fit at least one item */ 311 if (size > pgsizes) { 312 panic("%s: pool %s item size 0x%zx > " 313 "allocator %p sizes 0x%zx", __func__, wchan, 314 size, palloc, pgsizes); 315 } 316 317 /* shrink pgsize until it fits into the range */ 318 while (!ISSET(pgsizes, pgsize)) 319 pgsize >>= 1; 320 } 321 KASSERT(ISSET(pa_pagesz, pgsize)); 322 323 items = pgsize / size; 324 325 /* 326 * Decide whether to put the page header off page to avoid 327 * wasting too large a part of the page. Off-page page headers 328 * go into an RB tree, so we can match a returned item with 329 * its header based on the page address. 330 */ 331 if (ISSET(pa_pagesz, POOL_ALLOC_ALIGNED)) { 332 if (pgsize - (size * items) > 333 sizeof(struct pool_page_header)) { 334 off = pgsize - sizeof(struct pool_page_header); 335 } else if (sizeof(struct pool_page_header) * 2 >= size) { 336 off = pgsize - sizeof(struct pool_page_header); 337 items = off / size; 338 } 339 } 340 341 KASSERT(items > 0); 342 343 /* 344 * Initialize the pool structure. 345 */ 346 memset(pp, 0, sizeof(*pp)); 347 TAILQ_INIT(&pp->pr_emptypages); 348 TAILQ_INIT(&pp->pr_fullpages); 349 TAILQ_INIT(&pp->pr_partpages); 350 pp->pr_curpage = NULL; 351 pp->pr_npages = 0; 352 pp->pr_minitems = 0; 353 pp->pr_minpages = 0; 354 pp->pr_maxpages = 8; 355 pp->pr_size = size; 356 pp->pr_pgsize = pgsize; 357 pp->pr_pgmask = ~0UL ^ (pgsize - 1); 358 pp->pr_phoffset = off; 359 pp->pr_itemsperpage = items; 360 pp->pr_wchan = wchan; 361 pp->pr_alloc = palloc; 362 pp->pr_nitems = 0; 363 pp->pr_nout = 0; 364 pp->pr_hardlimit = UINT_MAX; 365 pp->pr_hardlimit_warning = NULL; 366 pp->pr_hardlimit_ratecap.tv_sec = 0; 367 pp->pr_hardlimit_ratecap.tv_usec = 0; 368 pp->pr_hardlimit_warning_last.tv_sec = 0; 369 pp->pr_hardlimit_warning_last.tv_usec = 0; 370 RBT_INIT(phtree, &pp->pr_phtree); 371 372 /* 373 * Use the space between the chunks and the page header 374 * for cache coloring. 375 */ 376 space = POOL_INPGHDR(pp) ? pp->pr_phoffset : pp->pr_pgsize; 377 space -= pp->pr_itemsperpage * pp->pr_size; 378 pp->pr_align = align; 379 pp->pr_maxcolors = (space / align) + 1; 380 381 pp->pr_nget = 0; 382 pp->pr_nfail = 0; 383 pp->pr_nput = 0; 384 pp->pr_npagealloc = 0; 385 pp->pr_npagefree = 0; 386 pp->pr_hiwat = 0; 387 pp->pr_nidle = 0; 388 389 pp->pr_ipl = ipl; 390 mtx_init_flags(&pp->pr_mtx, pp->pr_ipl, wchan, 0); 391 mtx_init_flags(&pp->pr_requests_mtx, pp->pr_ipl, wchan, 0); 392 TAILQ_INIT(&pp->pr_requests); 393 394 if (phpool.pr_size == 0) { 395 pool_init(&phpool, sizeof(struct pool_page_header), 0, 396 IPL_HIGH, 0, "phpool", NULL); 397 398 /* make sure phpool wont "recurse" */ 399 KASSERT(POOL_INPGHDR(&phpool)); 400 } 401 402 /* pglistalloc/constraint parameters */ 403 pp->pr_crange = &kp_dirty; 404 405 /* Insert this into the list of all pools. */ 406 rw_enter_write(&pool_lock); 407 #ifdef DIAGNOSTIC 408 SIMPLEQ_FOREACH(iter, &pool_head, pr_poollist) { 409 if (iter == pp) 410 panic("%s: pool %s already on list", __func__, wchan); 411 } 412 #endif 413 414 pp->pr_serial = ++pool_serial; 415 if (pool_serial == 0) 416 panic("%s: too much uptime", __func__); 417 418 SIMPLEQ_INSERT_HEAD(&pool_head, pp, pr_poollist); 419 pool_count++; 420 rw_exit_write(&pool_lock); 421 } 422 423 /* 424 * Decommission a pool resource. 425 */ 426 void 427 pool_destroy(struct pool *pp) 428 { 429 struct pool_page_header *ph; 430 struct pool *prev, *iter; 431 432 #ifdef MULTIPROCESSOR 433 if (pp->pr_cache != NULL) 434 pool_cache_destroy(pp); 435 #endif 436 437 #ifdef DIAGNOSTIC 438 if (pp->pr_nout != 0) 439 panic("%s: pool busy: still out: %u", __func__, pp->pr_nout); 440 #endif 441 442 /* Remove from global pool list */ 443 rw_enter_write(&pool_lock); 444 pool_count--; 445 if (pp == SIMPLEQ_FIRST(&pool_head)) 446 SIMPLEQ_REMOVE_HEAD(&pool_head, pr_poollist); 447 else { 448 prev = SIMPLEQ_FIRST(&pool_head); 449 SIMPLEQ_FOREACH(iter, &pool_head, pr_poollist) { 450 if (iter == pp) { 451 SIMPLEQ_REMOVE_AFTER(&pool_head, prev, 452 pr_poollist); 453 break; 454 } 455 prev = iter; 456 } 457 } 458 rw_exit_write(&pool_lock); 459 460 /* Remove all pages */ 461 while ((ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL) { 462 mtx_enter(&pp->pr_mtx); 463 pool_p_remove(pp, ph); 464 mtx_leave(&pp->pr_mtx); 465 pool_p_free(pp, ph); 466 } 467 KASSERT(TAILQ_EMPTY(&pp->pr_fullpages)); 468 KASSERT(TAILQ_EMPTY(&pp->pr_partpages)); 469 } 470 471 void 472 pool_request_init(struct pool_request *pr, 473 void (*handler)(void *, void *), void *cookie) 474 { 475 pr->pr_handler = handler; 476 pr->pr_cookie = cookie; 477 pr->pr_item = NULL; 478 } 479 480 void 481 pool_request(struct pool *pp, struct pool_request *pr) 482 { 483 mtx_enter(&pp->pr_requests_mtx); 484 TAILQ_INSERT_TAIL(&pp->pr_requests, pr, pr_entry); 485 pool_runqueue(pp, PR_NOWAIT); 486 mtx_leave(&pp->pr_requests_mtx); 487 } 488 489 struct pool_get_memory { 490 struct mutex mtx; 491 void * volatile v; 492 }; 493 494 /* 495 * Grab an item from the pool. 496 */ 497 void * 498 pool_get(struct pool *pp, int flags) 499 { 500 void *v = NULL; 501 int slowdown = 0; 502 503 #ifdef MULTIPROCESSOR 504 if (pp->pr_cache != NULL) { 505 v = pool_cache_get(pp); 506 if (v != NULL) 507 goto good; 508 } 509 #endif 510 511 KASSERT(flags & (PR_WAITOK | PR_NOWAIT)); 512 513 mtx_enter(&pp->pr_mtx); 514 if (pp->pr_nout >= pp->pr_hardlimit) { 515 if (ISSET(flags, PR_NOWAIT|PR_LIMITFAIL)) 516 goto fail; 517 } else if ((v = pool_do_get(pp, flags, &slowdown)) == NULL) { 518 if (ISSET(flags, PR_NOWAIT)) 519 goto fail; 520 } 521 mtx_leave(&pp->pr_mtx); 522 523 if ((slowdown || pool_debug == 2) && ISSET(flags, PR_WAITOK)) 524 yield(); 525 526 if (v == NULL) { 527 struct pool_get_memory mem = { 528 MUTEX_INITIALIZER(pp->pr_ipl), 529 NULL }; 530 struct pool_request pr; 531 532 pool_request_init(&pr, pool_get_done, &mem); 533 pool_request(pp, &pr); 534 535 mtx_enter(&mem.mtx); 536 while (mem.v == NULL) 537 msleep(&mem, &mem.mtx, PSWP, pp->pr_wchan, 0); 538 mtx_leave(&mem.mtx); 539 540 v = mem.v; 541 } 542 543 #ifdef MULTIPROCESSOR 544 good: 545 #endif 546 if (ISSET(flags, PR_ZERO)) 547 memset(v, 0, pp->pr_size); 548 549 return (v); 550 551 fail: 552 pp->pr_nfail++; 553 mtx_leave(&pp->pr_mtx); 554 return (NULL); 555 } 556 557 void 558 pool_get_done(void *xmem, void *v) 559 { 560 struct pool_get_memory *mem = xmem; 561 562 mtx_enter(&mem->mtx); 563 mem->v = v; 564 mtx_leave(&mem->mtx); 565 566 wakeup_one(mem); 567 } 568 569 void 570 pool_runqueue(struct pool *pp, int flags) 571 { 572 struct pool_requests prl = TAILQ_HEAD_INITIALIZER(prl); 573 struct pool_request *pr; 574 575 MUTEX_ASSERT_UNLOCKED(&pp->pr_mtx); 576 MUTEX_ASSERT_LOCKED(&pp->pr_requests_mtx); 577 578 if (pp->pr_requesting++) 579 return; 580 581 do { 582 pp->pr_requesting = 1; 583 584 /* no TAILQ_JOIN? :( */ 585 while ((pr = TAILQ_FIRST(&pp->pr_requests)) != NULL) { 586 TAILQ_REMOVE(&pp->pr_requests, pr, pr_entry); 587 TAILQ_INSERT_TAIL(&prl, pr, pr_entry); 588 } 589 if (TAILQ_EMPTY(&prl)) 590 continue; 591 592 mtx_leave(&pp->pr_requests_mtx); 593 594 mtx_enter(&pp->pr_mtx); 595 pr = TAILQ_FIRST(&prl); 596 while (pr != NULL) { 597 int slowdown = 0; 598 599 if (pp->pr_nout >= pp->pr_hardlimit) 600 break; 601 602 pr->pr_item = pool_do_get(pp, flags, &slowdown); 603 if (pr->pr_item == NULL) /* || slowdown ? */ 604 break; 605 606 pr = TAILQ_NEXT(pr, pr_entry); 607 } 608 mtx_leave(&pp->pr_mtx); 609 610 while ((pr = TAILQ_FIRST(&prl)) != NULL && 611 pr->pr_item != NULL) { 612 TAILQ_REMOVE(&prl, pr, pr_entry); 613 (*pr->pr_handler)(pr->pr_cookie, pr->pr_item); 614 } 615 616 mtx_enter(&pp->pr_requests_mtx); 617 } while (--pp->pr_requesting); 618 619 /* no TAILQ_JOIN :( */ 620 while ((pr = TAILQ_FIRST(&prl)) != NULL) { 621 TAILQ_REMOVE(&prl, pr, pr_entry); 622 TAILQ_INSERT_TAIL(&pp->pr_requests, pr, pr_entry); 623 } 624 } 625 626 void * 627 pool_do_get(struct pool *pp, int flags, int *slowdown) 628 { 629 struct pool_item *pi; 630 struct pool_page_header *ph; 631 632 MUTEX_ASSERT_LOCKED(&pp->pr_mtx); 633 634 splassert(pp->pr_ipl); 635 636 /* 637 * Account for this item now to avoid races if we need to give up 638 * pr_mtx to allocate a page. 639 */ 640 pp->pr_nout++; 641 642 if (pp->pr_curpage == NULL) { 643 mtx_leave(&pp->pr_mtx); 644 ph = pool_p_alloc(pp, flags, slowdown); 645 mtx_enter(&pp->pr_mtx); 646 647 if (ph == NULL) { 648 pp->pr_nout--; 649 return (NULL); 650 } 651 652 pool_p_insert(pp, ph); 653 } 654 655 ph = pp->pr_curpage; 656 pi = XSIMPLEQ_FIRST(&ph->ph_items); 657 if (__predict_false(pi == NULL)) 658 panic("%s: %s: page empty", __func__, pp->pr_wchan); 659 660 if (__predict_false(pi->pi_magic != POOL_IMAGIC(ph, pi))) { 661 panic("%s: %s free list modified: " 662 "page %p; item addr %p; offset 0x%x=0x%lx != 0x%lx", 663 __func__, pp->pr_wchan, ph->ph_page, pi, 664 0, pi->pi_magic, POOL_IMAGIC(ph, pi)); 665 } 666 667 XSIMPLEQ_REMOVE_HEAD(&ph->ph_items, pi_list); 668 669 #ifdef DIAGNOSTIC 670 if (pool_debug && POOL_PHPOISON(ph)) { 671 size_t pidx; 672 uint32_t pval; 673 if (poison_check(pi + 1, pp->pr_size - sizeof(*pi), 674 &pidx, &pval)) { 675 int *ip = (int *)(pi + 1); 676 panic("%s: %s free list modified: " 677 "page %p; item addr %p; offset 0x%zx=0x%x", 678 __func__, pp->pr_wchan, ph->ph_page, pi, 679 pidx * sizeof(int), ip[pidx]); 680 } 681 } 682 #endif /* DIAGNOSTIC */ 683 684 if (ph->ph_nmissing++ == 0) { 685 /* 686 * This page was previously empty. Move it to the list of 687 * partially-full pages. This page is already curpage. 688 */ 689 TAILQ_REMOVE(&pp->pr_emptypages, ph, ph_entry); 690 TAILQ_INSERT_TAIL(&pp->pr_partpages, ph, ph_entry); 691 692 pp->pr_nidle--; 693 } 694 695 if (ph->ph_nmissing == pp->pr_itemsperpage) { 696 /* 697 * This page is now full. Move it to the full list 698 * and select a new current page. 699 */ 700 TAILQ_REMOVE(&pp->pr_partpages, ph, ph_entry); 701 TAILQ_INSERT_TAIL(&pp->pr_fullpages, ph, ph_entry); 702 pool_update_curpage(pp); 703 } 704 705 pp->pr_nget++; 706 707 return (pi); 708 } 709 710 /* 711 * Return resource to the pool. 712 */ 713 void 714 pool_put(struct pool *pp, void *v) 715 { 716 struct pool_page_header *ph, *freeph = NULL; 717 718 #ifdef DIAGNOSTIC 719 if (v == NULL) 720 panic("%s: NULL item", __func__); 721 #endif 722 723 #ifdef MULTIPROCESSOR 724 if (pp->pr_cache != NULL && TAILQ_EMPTY(&pp->pr_requests)) { 725 pool_cache_put(pp, v); 726 return; 727 } 728 #endif 729 730 mtx_enter(&pp->pr_mtx); 731 732 pool_do_put(pp, v); 733 734 pp->pr_nout--; 735 pp->pr_nput++; 736 737 /* is it time to free a page? */ 738 if (pp->pr_nidle > pp->pr_maxpages && 739 (ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL && 740 (ticks - ph->ph_tick) > (hz * pool_wait_free)) { 741 freeph = ph; 742 pool_p_remove(pp, freeph); 743 } 744 745 mtx_leave(&pp->pr_mtx); 746 747 if (freeph != NULL) 748 pool_p_free(pp, freeph); 749 750 if (!TAILQ_EMPTY(&pp->pr_requests)) { 751 mtx_enter(&pp->pr_requests_mtx); 752 pool_runqueue(pp, PR_NOWAIT); 753 mtx_leave(&pp->pr_requests_mtx); 754 } 755 } 756 757 void 758 pool_do_put(struct pool *pp, void *v) 759 { 760 struct pool_item *pi = v; 761 struct pool_page_header *ph; 762 763 splassert(pp->pr_ipl); 764 765 ph = pr_find_pagehead(pp, v); 766 767 #ifdef DIAGNOSTIC 768 if (pool_debug) { 769 struct pool_item *qi; 770 XSIMPLEQ_FOREACH(qi, &ph->ph_items, pi_list) { 771 if (pi == qi) { 772 panic("%s: %s: double pool_put: %p", __func__, 773 pp->pr_wchan, pi); 774 } 775 } 776 } 777 #endif /* DIAGNOSTIC */ 778 779 pi->pi_magic = POOL_IMAGIC(ph, pi); 780 XSIMPLEQ_INSERT_HEAD(&ph->ph_items, pi, pi_list); 781 #ifdef DIAGNOSTIC 782 if (POOL_PHPOISON(ph)) 783 poison_mem(pi + 1, pp->pr_size - sizeof(*pi)); 784 #endif /* DIAGNOSTIC */ 785 786 if (ph->ph_nmissing-- == pp->pr_itemsperpage) { 787 /* 788 * The page was previously completely full, move it to the 789 * partially-full list. 790 */ 791 TAILQ_REMOVE(&pp->pr_fullpages, ph, ph_entry); 792 TAILQ_INSERT_TAIL(&pp->pr_partpages, ph, ph_entry); 793 } 794 795 if (ph->ph_nmissing == 0) { 796 /* 797 * The page is now empty, so move it to the empty page list. 798 */ 799 pp->pr_nidle++; 800 801 ph->ph_tick = ticks; 802 TAILQ_REMOVE(&pp->pr_partpages, ph, ph_entry); 803 TAILQ_INSERT_TAIL(&pp->pr_emptypages, ph, ph_entry); 804 pool_update_curpage(pp); 805 } 806 } 807 808 /* 809 * Add N items to the pool. 810 */ 811 int 812 pool_prime(struct pool *pp, int n) 813 { 814 struct pool_pagelist pl = TAILQ_HEAD_INITIALIZER(pl); 815 struct pool_page_header *ph; 816 int newpages; 817 818 newpages = roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 819 820 while (newpages-- > 0) { 821 int slowdown = 0; 822 823 ph = pool_p_alloc(pp, PR_NOWAIT, &slowdown); 824 if (ph == NULL) /* or slowdown? */ 825 break; 826 827 TAILQ_INSERT_TAIL(&pl, ph, ph_entry); 828 } 829 830 mtx_enter(&pp->pr_mtx); 831 while ((ph = TAILQ_FIRST(&pl)) != NULL) { 832 TAILQ_REMOVE(&pl, ph, ph_entry); 833 pool_p_insert(pp, ph); 834 } 835 mtx_leave(&pp->pr_mtx); 836 837 return (0); 838 } 839 840 struct pool_page_header * 841 pool_p_alloc(struct pool *pp, int flags, int *slowdown) 842 { 843 struct pool_page_header *ph; 844 struct pool_item *pi; 845 caddr_t addr; 846 int n; 847 848 MUTEX_ASSERT_UNLOCKED(&pp->pr_mtx); 849 KASSERT(pp->pr_size >= sizeof(*pi)); 850 851 addr = pool_allocator_alloc(pp, flags, slowdown); 852 if (addr == NULL) 853 return (NULL); 854 855 if (POOL_INPGHDR(pp)) 856 ph = (struct pool_page_header *)(addr + pp->pr_phoffset); 857 else { 858 ph = pool_get(&phpool, flags); 859 if (ph == NULL) { 860 pool_allocator_free(pp, addr); 861 return (NULL); 862 } 863 } 864 865 XSIMPLEQ_INIT(&ph->ph_items); 866 ph->ph_page = addr; 867 addr += pp->pr_align * (pp->pr_npagealloc % pp->pr_maxcolors); 868 ph->ph_colored = addr; 869 ph->ph_nmissing = 0; 870 arc4random_buf(&ph->ph_magic, sizeof(ph->ph_magic)); 871 #ifdef DIAGNOSTIC 872 /* use a bit in ph_magic to record if we poison page items */ 873 if (pool_debug) 874 SET(ph->ph_magic, POOL_MAGICBIT); 875 else 876 CLR(ph->ph_magic, POOL_MAGICBIT); 877 #endif /* DIAGNOSTIC */ 878 879 n = pp->pr_itemsperpage; 880 while (n--) { 881 pi = (struct pool_item *)addr; 882 pi->pi_magic = POOL_IMAGIC(ph, pi); 883 XSIMPLEQ_INSERT_TAIL(&ph->ph_items, pi, pi_list); 884 885 #ifdef DIAGNOSTIC 886 if (POOL_PHPOISON(ph)) 887 poison_mem(pi + 1, pp->pr_size - sizeof(*pi)); 888 #endif /* DIAGNOSTIC */ 889 890 addr += pp->pr_size; 891 } 892 893 return (ph); 894 } 895 896 void 897 pool_p_free(struct pool *pp, struct pool_page_header *ph) 898 { 899 struct pool_item *pi; 900 901 MUTEX_ASSERT_UNLOCKED(&pp->pr_mtx); 902 KASSERT(ph->ph_nmissing == 0); 903 904 XSIMPLEQ_FOREACH(pi, &ph->ph_items, pi_list) { 905 if (__predict_false(pi->pi_magic != POOL_IMAGIC(ph, pi))) { 906 panic("%s: %s free list modified: " 907 "page %p; item addr %p; offset 0x%x=0x%lx", 908 __func__, pp->pr_wchan, ph->ph_page, pi, 909 0, pi->pi_magic); 910 } 911 912 #ifdef DIAGNOSTIC 913 if (POOL_PHPOISON(ph)) { 914 size_t pidx; 915 uint32_t pval; 916 if (poison_check(pi + 1, pp->pr_size - sizeof(*pi), 917 &pidx, &pval)) { 918 int *ip = (int *)(pi + 1); 919 panic("%s: %s free list modified: " 920 "page %p; item addr %p; offset 0x%zx=0x%x", 921 __func__, pp->pr_wchan, ph->ph_page, pi, 922 pidx * sizeof(int), ip[pidx]); 923 } 924 } 925 #endif 926 } 927 928 pool_allocator_free(pp, ph->ph_page); 929 930 if (!POOL_INPGHDR(pp)) 931 pool_put(&phpool, ph); 932 } 933 934 void 935 pool_p_insert(struct pool *pp, struct pool_page_header *ph) 936 { 937 MUTEX_ASSERT_LOCKED(&pp->pr_mtx); 938 939 /* If the pool was depleted, point at the new page */ 940 if (pp->pr_curpage == NULL) 941 pp->pr_curpage = ph; 942 943 TAILQ_INSERT_TAIL(&pp->pr_emptypages, ph, ph_entry); 944 if (!POOL_INPGHDR(pp)) 945 RBT_INSERT(phtree, &pp->pr_phtree, ph); 946 947 pp->pr_nitems += pp->pr_itemsperpage; 948 pp->pr_nidle++; 949 950 pp->pr_npagealloc++; 951 if (++pp->pr_npages > pp->pr_hiwat) 952 pp->pr_hiwat = pp->pr_npages; 953 } 954 955 void 956 pool_p_remove(struct pool *pp, struct pool_page_header *ph) 957 { 958 MUTEX_ASSERT_LOCKED(&pp->pr_mtx); 959 960 pp->pr_npagefree++; 961 pp->pr_npages--; 962 pp->pr_nidle--; 963 pp->pr_nitems -= pp->pr_itemsperpage; 964 965 if (!POOL_INPGHDR(pp)) 966 RBT_REMOVE(phtree, &pp->pr_phtree, ph); 967 TAILQ_REMOVE(&pp->pr_emptypages, ph, ph_entry); 968 969 pool_update_curpage(pp); 970 } 971 972 void 973 pool_update_curpage(struct pool *pp) 974 { 975 pp->pr_curpage = TAILQ_LAST(&pp->pr_partpages, pool_pagelist); 976 if (pp->pr_curpage == NULL) { 977 pp->pr_curpage = TAILQ_LAST(&pp->pr_emptypages, pool_pagelist); 978 } 979 } 980 981 void 982 pool_setlowat(struct pool *pp, int n) 983 { 984 int prime = 0; 985 986 mtx_enter(&pp->pr_mtx); 987 pp->pr_minitems = n; 988 pp->pr_minpages = (n == 0) 989 ? 0 990 : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 991 992 if (pp->pr_nitems < n) 993 prime = n - pp->pr_nitems; 994 mtx_leave(&pp->pr_mtx); 995 996 if (prime > 0) 997 pool_prime(pp, prime); 998 } 999 1000 void 1001 pool_sethiwat(struct pool *pp, int n) 1002 { 1003 pp->pr_maxpages = (n == 0) 1004 ? 0 1005 : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 1006 } 1007 1008 int 1009 pool_sethardlimit(struct pool *pp, u_int n, const char *warnmsg, int ratecap) 1010 { 1011 int error = 0; 1012 1013 if (n < pp->pr_nout) { 1014 error = EINVAL; 1015 goto done; 1016 } 1017 1018 pp->pr_hardlimit = n; 1019 pp->pr_hardlimit_warning = warnmsg; 1020 pp->pr_hardlimit_ratecap.tv_sec = ratecap; 1021 pp->pr_hardlimit_warning_last.tv_sec = 0; 1022 pp->pr_hardlimit_warning_last.tv_usec = 0; 1023 1024 done: 1025 return (error); 1026 } 1027 1028 void 1029 pool_set_constraints(struct pool *pp, const struct kmem_pa_mode *mode) 1030 { 1031 pp->pr_crange = mode; 1032 } 1033 1034 /* 1035 * Release all complete pages that have not been used recently. 1036 * 1037 * Returns non-zero if any pages have been reclaimed. 1038 */ 1039 int 1040 pool_reclaim(struct pool *pp) 1041 { 1042 struct pool_page_header *ph, *phnext; 1043 struct pool_pagelist pl = TAILQ_HEAD_INITIALIZER(pl); 1044 1045 mtx_enter(&pp->pr_mtx); 1046 for (ph = TAILQ_FIRST(&pp->pr_emptypages); ph != NULL; ph = phnext) { 1047 phnext = TAILQ_NEXT(ph, ph_entry); 1048 1049 /* Check our minimum page claim */ 1050 if (pp->pr_npages <= pp->pr_minpages) 1051 break; 1052 1053 /* 1054 * If freeing this page would put us below 1055 * the low water mark, stop now. 1056 */ 1057 if ((pp->pr_nitems - pp->pr_itemsperpage) < 1058 pp->pr_minitems) 1059 break; 1060 1061 pool_p_remove(pp, ph); 1062 TAILQ_INSERT_TAIL(&pl, ph, ph_entry); 1063 } 1064 mtx_leave(&pp->pr_mtx); 1065 1066 if (TAILQ_EMPTY(&pl)) 1067 return (0); 1068 1069 while ((ph = TAILQ_FIRST(&pl)) != NULL) { 1070 TAILQ_REMOVE(&pl, ph, ph_entry); 1071 pool_p_free(pp, ph); 1072 } 1073 1074 return (1); 1075 } 1076 1077 /* 1078 * Release all complete pages that have not been used recently 1079 * from all pools. 1080 */ 1081 void 1082 pool_reclaim_all(void) 1083 { 1084 struct pool *pp; 1085 1086 rw_enter_read(&pool_lock); 1087 SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist) 1088 pool_reclaim(pp); 1089 rw_exit_read(&pool_lock); 1090 } 1091 1092 #ifdef DDB 1093 #include <machine/db_machdep.h> 1094 #include <ddb/db_output.h> 1095 1096 /* 1097 * Diagnostic helpers. 1098 */ 1099 void 1100 pool_printit(struct pool *pp, const char *modif, 1101 int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2)))) 1102 { 1103 pool_print1(pp, modif, pr); 1104 } 1105 1106 void 1107 pool_print_pagelist(struct pool_pagelist *pl, 1108 int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2)))) 1109 { 1110 struct pool_page_header *ph; 1111 struct pool_item *pi; 1112 1113 TAILQ_FOREACH(ph, pl, ph_entry) { 1114 (*pr)("\t\tpage %p, color %p, nmissing %d\n", 1115 ph->ph_page, ph->ph_colored, ph->ph_nmissing); 1116 XSIMPLEQ_FOREACH(pi, &ph->ph_items, pi_list) { 1117 if (pi->pi_magic != POOL_IMAGIC(ph, pi)) { 1118 (*pr)("\t\t\titem %p, magic 0x%lx\n", 1119 pi, pi->pi_magic); 1120 } 1121 } 1122 } 1123 } 1124 1125 void 1126 pool_print1(struct pool *pp, const char *modif, 1127 int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2)))) 1128 { 1129 struct pool_page_header *ph; 1130 int print_pagelist = 0; 1131 char c; 1132 1133 while ((c = *modif++) != '\0') { 1134 if (c == 'p') 1135 print_pagelist = 1; 1136 modif++; 1137 } 1138 1139 (*pr)("POOL %s: size %u maxcolors %u\n", pp->pr_wchan, pp->pr_size, 1140 pp->pr_maxcolors); 1141 (*pr)("\talloc %p\n", pp->pr_alloc); 1142 (*pr)("\tminitems %u, minpages %u, maxpages %u, npages %u\n", 1143 pp->pr_minitems, pp->pr_minpages, pp->pr_maxpages, pp->pr_npages); 1144 (*pr)("\titemsperpage %u, nitems %u, nout %u, hardlimit %u\n", 1145 pp->pr_itemsperpage, pp->pr_nitems, pp->pr_nout, pp->pr_hardlimit); 1146 1147 (*pr)("\n\tnget %lu, nfail %lu, nput %lu\n", 1148 pp->pr_nget, pp->pr_nfail, pp->pr_nput); 1149 (*pr)("\tnpagealloc %lu, npagefree %lu, hiwat %u, nidle %lu\n", 1150 pp->pr_npagealloc, pp->pr_npagefree, pp->pr_hiwat, pp->pr_nidle); 1151 1152 if (print_pagelist == 0) 1153 return; 1154 1155 if ((ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL) 1156 (*pr)("\n\tempty page list:\n"); 1157 pool_print_pagelist(&pp->pr_emptypages, pr); 1158 if ((ph = TAILQ_FIRST(&pp->pr_fullpages)) != NULL) 1159 (*pr)("\n\tfull page list:\n"); 1160 pool_print_pagelist(&pp->pr_fullpages, pr); 1161 if ((ph = TAILQ_FIRST(&pp->pr_partpages)) != NULL) 1162 (*pr)("\n\tpartial-page list:\n"); 1163 pool_print_pagelist(&pp->pr_partpages, pr); 1164 1165 if (pp->pr_curpage == NULL) 1166 (*pr)("\tno current page\n"); 1167 else 1168 (*pr)("\tcurpage %p\n", pp->pr_curpage->ph_page); 1169 } 1170 1171 void 1172 db_show_all_pools(db_expr_t expr, int haddr, db_expr_t count, char *modif) 1173 { 1174 struct pool *pp; 1175 char maxp[16]; 1176 int ovflw; 1177 char mode; 1178 1179 mode = modif[0]; 1180 if (mode != '\0' && mode != 'a') { 1181 db_printf("usage: show all pools [/a]\n"); 1182 return; 1183 } 1184 1185 if (mode == '\0') 1186 db_printf("%-10s%4s%9s%5s%9s%6s%6s%6s%6s%6s%6s%5s\n", 1187 "Name", 1188 "Size", 1189 "Requests", 1190 "Fail", 1191 "Releases", 1192 "Pgreq", 1193 "Pgrel", 1194 "Npage", 1195 "Hiwat", 1196 "Minpg", 1197 "Maxpg", 1198 "Idle"); 1199 else 1200 db_printf("%-12s %18s %18s\n", 1201 "Name", "Address", "Allocator"); 1202 1203 SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist) { 1204 if (mode == 'a') { 1205 db_printf("%-12s %18p %18p\n", pp->pr_wchan, pp, 1206 pp->pr_alloc); 1207 continue; 1208 } 1209 1210 if (!pp->pr_nget) 1211 continue; 1212 1213 if (pp->pr_maxpages == UINT_MAX) 1214 snprintf(maxp, sizeof maxp, "inf"); 1215 else 1216 snprintf(maxp, sizeof maxp, "%u", pp->pr_maxpages); 1217 1218 #define PRWORD(ovflw, fmt, width, fixed, val) do { \ 1219 (ovflw) += db_printf((fmt), \ 1220 (width) - (fixed) - (ovflw) > 0 ? \ 1221 (width) - (fixed) - (ovflw) : 0, \ 1222 (val)) - (width); \ 1223 if ((ovflw) < 0) \ 1224 (ovflw) = 0; \ 1225 } while (/* CONSTCOND */0) 1226 1227 ovflw = 0; 1228 PRWORD(ovflw, "%-*s", 10, 0, pp->pr_wchan); 1229 PRWORD(ovflw, " %*u", 4, 1, pp->pr_size); 1230 PRWORD(ovflw, " %*lu", 9, 1, pp->pr_nget); 1231 PRWORD(ovflw, " %*lu", 5, 1, pp->pr_nfail); 1232 PRWORD(ovflw, " %*lu", 9, 1, pp->pr_nput); 1233 PRWORD(ovflw, " %*lu", 6, 1, pp->pr_npagealloc); 1234 PRWORD(ovflw, " %*lu", 6, 1, pp->pr_npagefree); 1235 PRWORD(ovflw, " %*d", 6, 1, pp->pr_npages); 1236 PRWORD(ovflw, " %*d", 6, 1, pp->pr_hiwat); 1237 PRWORD(ovflw, " %*d", 6, 1, pp->pr_minpages); 1238 PRWORD(ovflw, " %*s", 6, 1, maxp); 1239 PRWORD(ovflw, " %*lu\n", 5, 1, pp->pr_nidle); 1240 1241 pool_chk(pp); 1242 } 1243 } 1244 #endif /* DDB */ 1245 1246 #if defined(POOL_DEBUG) || defined(DDB) 1247 int 1248 pool_chk_page(struct pool *pp, struct pool_page_header *ph, int expected) 1249 { 1250 struct pool_item *pi; 1251 caddr_t page; 1252 int n; 1253 const char *label = pp->pr_wchan; 1254 1255 page = (caddr_t)((u_long)ph & pp->pr_pgmask); 1256 if (page != ph->ph_page && POOL_INPGHDR(pp)) { 1257 printf("%s: ", label); 1258 printf("pool(%p:%s): page inconsistency: page %p; " 1259 "at page head addr %p (p %p)\n", 1260 pp, pp->pr_wchan, ph->ph_page, ph, page); 1261 return 1; 1262 } 1263 1264 for (pi = XSIMPLEQ_FIRST(&ph->ph_items), n = 0; 1265 pi != NULL; 1266 pi = XSIMPLEQ_NEXT(&ph->ph_items, pi, pi_list), n++) { 1267 if ((caddr_t)pi < ph->ph_page || 1268 (caddr_t)pi >= ph->ph_page + pp->pr_pgsize) { 1269 printf("%s: ", label); 1270 printf("pool(%p:%s): page inconsistency: page %p;" 1271 " item ordinal %d; addr %p\n", pp, 1272 pp->pr_wchan, ph->ph_page, n, pi); 1273 return (1); 1274 } 1275 1276 if (pi->pi_magic != POOL_IMAGIC(ph, pi)) { 1277 printf("%s: ", label); 1278 printf("pool(%p:%s): free list modified: " 1279 "page %p; item ordinal %d; addr %p " 1280 "(p %p); offset 0x%x=0x%lx\n", 1281 pp, pp->pr_wchan, ph->ph_page, n, pi, page, 1282 0, pi->pi_magic); 1283 } 1284 1285 #ifdef DIAGNOSTIC 1286 if (POOL_PHPOISON(ph)) { 1287 size_t pidx; 1288 uint32_t pval; 1289 if (poison_check(pi + 1, pp->pr_size - sizeof(*pi), 1290 &pidx, &pval)) { 1291 int *ip = (int *)(pi + 1); 1292 printf("pool(%s): free list modified: " 1293 "page %p; item ordinal %d; addr %p " 1294 "(p %p); offset 0x%zx=0x%x\n", 1295 pp->pr_wchan, ph->ph_page, n, pi, 1296 page, pidx * sizeof(int), ip[pidx]); 1297 } 1298 } 1299 #endif /* DIAGNOSTIC */ 1300 } 1301 if (n + ph->ph_nmissing != pp->pr_itemsperpage) { 1302 printf("pool(%p:%s): page inconsistency: page %p;" 1303 " %d on list, %d missing, %d items per page\n", pp, 1304 pp->pr_wchan, ph->ph_page, n, ph->ph_nmissing, 1305 pp->pr_itemsperpage); 1306 return 1; 1307 } 1308 if (expected >= 0 && n != expected) { 1309 printf("pool(%p:%s): page inconsistency: page %p;" 1310 " %d on list, %d missing, %d expected\n", pp, 1311 pp->pr_wchan, ph->ph_page, n, ph->ph_nmissing, 1312 expected); 1313 return 1; 1314 } 1315 return 0; 1316 } 1317 1318 int 1319 pool_chk(struct pool *pp) 1320 { 1321 struct pool_page_header *ph; 1322 int r = 0; 1323 1324 TAILQ_FOREACH(ph, &pp->pr_emptypages, ph_entry) 1325 r += pool_chk_page(pp, ph, pp->pr_itemsperpage); 1326 TAILQ_FOREACH(ph, &pp->pr_fullpages, ph_entry) 1327 r += pool_chk_page(pp, ph, 0); 1328 TAILQ_FOREACH(ph, &pp->pr_partpages, ph_entry) 1329 r += pool_chk_page(pp, ph, -1); 1330 1331 return (r); 1332 } 1333 #endif /* defined(POOL_DEBUG) || defined(DDB) */ 1334 1335 #ifdef DDB 1336 void 1337 pool_walk(struct pool *pp, int full, 1338 int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2))), 1339 void (*func)(void *, int, int (*)(const char *, ...) 1340 __attribute__((__format__(__kprintf__,1,2))))) 1341 { 1342 struct pool_page_header *ph; 1343 struct pool_item *pi; 1344 caddr_t cp; 1345 int n; 1346 1347 TAILQ_FOREACH(ph, &pp->pr_fullpages, ph_entry) { 1348 cp = ph->ph_colored; 1349 n = ph->ph_nmissing; 1350 1351 while (n--) { 1352 func(cp, full, pr); 1353 cp += pp->pr_size; 1354 } 1355 } 1356 1357 TAILQ_FOREACH(ph, &pp->pr_partpages, ph_entry) { 1358 cp = ph->ph_colored; 1359 n = ph->ph_nmissing; 1360 1361 do { 1362 XSIMPLEQ_FOREACH(pi, &ph->ph_items, pi_list) { 1363 if (cp == (caddr_t)pi) 1364 break; 1365 } 1366 if (cp != (caddr_t)pi) { 1367 func(cp, full, pr); 1368 n--; 1369 } 1370 1371 cp += pp->pr_size; 1372 } while (n > 0); 1373 } 1374 } 1375 #endif 1376 1377 /* 1378 * We have three different sysctls. 1379 * kern.pool.npools - the number of pools. 1380 * kern.pool.pool.<pool#> - the pool struct for the pool#. 1381 * kern.pool.name.<pool#> - the name for pool#. 1382 */ 1383 int 1384 sysctl_dopool(int *name, u_int namelen, char *oldp, size_t *oldlenp) 1385 { 1386 struct kinfo_pool pi; 1387 struct pool *pp; 1388 int rv = ENOENT; 1389 1390 switch (name[0]) { 1391 case KERN_POOL_NPOOLS: 1392 if (namelen != 1) 1393 return (ENOTDIR); 1394 return (sysctl_rdint(oldp, oldlenp, NULL, pool_count)); 1395 1396 case KERN_POOL_NAME: 1397 case KERN_POOL_POOL: 1398 case KERN_POOL_CACHE: 1399 case KERN_POOL_CACHE_CPUS: 1400 break; 1401 default: 1402 return (EOPNOTSUPP); 1403 } 1404 1405 if (namelen != 2) 1406 return (ENOTDIR); 1407 1408 rw_enter_read(&pool_lock); 1409 1410 SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist) { 1411 if (name[1] == pp->pr_serial) 1412 break; 1413 } 1414 1415 if (pp == NULL) 1416 goto done; 1417 1418 switch (name[0]) { 1419 case KERN_POOL_NAME: 1420 rv = sysctl_rdstring(oldp, oldlenp, NULL, pp->pr_wchan); 1421 break; 1422 case KERN_POOL_POOL: 1423 memset(&pi, 0, sizeof(pi)); 1424 1425 mtx_enter(&pp->pr_mtx); 1426 pi.pr_size = pp->pr_size; 1427 pi.pr_pgsize = pp->pr_pgsize; 1428 pi.pr_itemsperpage = pp->pr_itemsperpage; 1429 pi.pr_npages = pp->pr_npages; 1430 pi.pr_minpages = pp->pr_minpages; 1431 pi.pr_maxpages = pp->pr_maxpages; 1432 pi.pr_hardlimit = pp->pr_hardlimit; 1433 pi.pr_nout = pp->pr_nout; 1434 pi.pr_nitems = pp->pr_nitems; 1435 pi.pr_nget = pp->pr_nget; 1436 pi.pr_nput = pp->pr_nput; 1437 pi.pr_nfail = pp->pr_nfail; 1438 pi.pr_npagealloc = pp->pr_npagealloc; 1439 pi.pr_npagefree = pp->pr_npagefree; 1440 pi.pr_hiwat = pp->pr_hiwat; 1441 pi.pr_nidle = pp->pr_nidle; 1442 mtx_leave(&pp->pr_mtx); 1443 1444 pool_cache_pool_info(pp, &pi); 1445 1446 rv = sysctl_rdstruct(oldp, oldlenp, NULL, &pi, sizeof(pi)); 1447 break; 1448 1449 case KERN_POOL_CACHE: 1450 rv = pool_cache_info(pp, oldp, oldlenp); 1451 break; 1452 1453 case KERN_POOL_CACHE_CPUS: 1454 rv = pool_cache_cpus_info(pp, oldp, oldlenp); 1455 break; 1456 } 1457 1458 done: 1459 rw_exit_read(&pool_lock); 1460 1461 return (rv); 1462 } 1463 1464 void 1465 pool_gc_sched(void *null) 1466 { 1467 task_add(systqmp, &pool_gc_task); 1468 } 1469 1470 void 1471 pool_gc_pages(void *null) 1472 { 1473 struct pool *pp; 1474 struct pool_page_header *ph, *freeph; 1475 int s; 1476 1477 rw_enter_read(&pool_lock); 1478 s = splvm(); /* XXX go to splvm until all pools _setipl properly */ 1479 SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist) { 1480 #ifdef MULTIPROCESSOR 1481 if (pp->pr_cache != NULL) 1482 pool_cache_gc(pp); 1483 #endif 1484 1485 if (pp->pr_nidle <= pp->pr_minpages || /* guess */ 1486 !mtx_enter_try(&pp->pr_mtx)) /* try */ 1487 continue; 1488 1489 /* is it time to free a page? */ 1490 if (pp->pr_nidle > pp->pr_minpages && 1491 (ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL && 1492 (ticks - ph->ph_tick) > (hz * pool_wait_gc)) { 1493 freeph = ph; 1494 pool_p_remove(pp, freeph); 1495 } else 1496 freeph = NULL; 1497 1498 mtx_leave(&pp->pr_mtx); 1499 1500 if (freeph != NULL) 1501 pool_p_free(pp, freeph); 1502 } 1503 splx(s); 1504 rw_exit_read(&pool_lock); 1505 1506 timeout_add_sec(&pool_gc_tick, 1); 1507 } 1508 1509 /* 1510 * Pool backend allocators. 1511 */ 1512 1513 void * 1514 pool_allocator_alloc(struct pool *pp, int flags, int *slowdown) 1515 { 1516 void *v; 1517 1518 v = (*pp->pr_alloc->pa_alloc)(pp, flags, slowdown); 1519 1520 #ifdef DIAGNOSTIC 1521 if (v != NULL && POOL_INPGHDR(pp)) { 1522 vaddr_t addr = (vaddr_t)v; 1523 if ((addr & pp->pr_pgmask) != addr) { 1524 panic("%s: %s page address %p isnt aligned to %u", 1525 __func__, pp->pr_wchan, v, pp->pr_pgsize); 1526 } 1527 } 1528 #endif 1529 1530 return (v); 1531 } 1532 1533 void 1534 pool_allocator_free(struct pool *pp, void *v) 1535 { 1536 struct pool_allocator *pa = pp->pr_alloc; 1537 1538 (*pa->pa_free)(pp, v); 1539 } 1540 1541 void * 1542 pool_page_alloc(struct pool *pp, int flags, int *slowdown) 1543 { 1544 struct kmem_dyn_mode kd = KMEM_DYN_INITIALIZER; 1545 1546 kd.kd_waitok = ISSET(flags, PR_WAITOK); 1547 kd.kd_slowdown = slowdown; 1548 1549 return (km_alloc(pp->pr_pgsize, &kv_page, pp->pr_crange, &kd)); 1550 } 1551 1552 void 1553 pool_page_free(struct pool *pp, void *v) 1554 { 1555 km_free(v, pp->pr_pgsize, &kv_page, pp->pr_crange); 1556 } 1557 1558 void * 1559 pool_multi_alloc(struct pool *pp, int flags, int *slowdown) 1560 { 1561 struct kmem_va_mode kv = kv_intrsafe; 1562 struct kmem_dyn_mode kd = KMEM_DYN_INITIALIZER; 1563 void *v; 1564 int s; 1565 1566 if (POOL_INPGHDR(pp)) 1567 kv.kv_align = pp->pr_pgsize; 1568 1569 kd.kd_waitok = ISSET(flags, PR_WAITOK); 1570 kd.kd_slowdown = slowdown; 1571 1572 s = splvm(); 1573 v = km_alloc(pp->pr_pgsize, &kv, pp->pr_crange, &kd); 1574 splx(s); 1575 1576 return (v); 1577 } 1578 1579 void 1580 pool_multi_free(struct pool *pp, void *v) 1581 { 1582 struct kmem_va_mode kv = kv_intrsafe; 1583 int s; 1584 1585 if (POOL_INPGHDR(pp)) 1586 kv.kv_align = pp->pr_pgsize; 1587 1588 s = splvm(); 1589 km_free(v, pp->pr_pgsize, &kv, pp->pr_crange); 1590 splx(s); 1591 } 1592 1593 void * 1594 pool_multi_alloc_ni(struct pool *pp, int flags, int *slowdown) 1595 { 1596 struct kmem_va_mode kv = kv_any; 1597 struct kmem_dyn_mode kd = KMEM_DYN_INITIALIZER; 1598 void *v; 1599 1600 if (POOL_INPGHDR(pp)) 1601 kv.kv_align = pp->pr_pgsize; 1602 1603 kd.kd_waitok = ISSET(flags, PR_WAITOK); 1604 kd.kd_slowdown = slowdown; 1605 1606 KERNEL_LOCK(); 1607 v = km_alloc(pp->pr_pgsize, &kv, pp->pr_crange, &kd); 1608 KERNEL_UNLOCK(); 1609 1610 return (v); 1611 } 1612 1613 void 1614 pool_multi_free_ni(struct pool *pp, void *v) 1615 { 1616 struct kmem_va_mode kv = kv_any; 1617 1618 if (POOL_INPGHDR(pp)) 1619 kv.kv_align = pp->pr_pgsize; 1620 1621 KERNEL_LOCK(); 1622 km_free(v, pp->pr_pgsize, &kv, pp->pr_crange); 1623 KERNEL_UNLOCK(); 1624 } 1625 1626 #ifdef MULTIPROCESSOR 1627 1628 struct pool pool_caches; /* per cpu cache entries */ 1629 1630 void 1631 pool_cache_init(struct pool *pp) 1632 { 1633 struct cpumem *cm; 1634 struct pool_cache *pc; 1635 struct cpumem_iter i; 1636 1637 if (pool_caches.pr_size == 0) { 1638 pool_init(&pool_caches, sizeof(struct pool_cache), 1639 CACHELINESIZE, IPL_NONE, PR_WAITOK, "plcache", NULL); 1640 } 1641 1642 /* must be able to use the pool items as cache list items */ 1643 KASSERT(pp->pr_size >= sizeof(struct pool_cache_item)); 1644 1645 cm = cpumem_get(&pool_caches); 1646 1647 mtx_init(&pp->pr_cache_mtx, pp->pr_ipl); 1648 arc4random_buf(pp->pr_cache_magic, sizeof(pp->pr_cache_magic)); 1649 TAILQ_INIT(&pp->pr_cache_lists); 1650 pp->pr_cache_nitems = 0; 1651 pp->pr_cache_tick = ticks; 1652 pp->pr_cache_items = 8; 1653 pp->pr_cache_contention = 0; 1654 pp->pr_cache_ngc = 0; 1655 1656 CPUMEM_FOREACH(pc, &i, cm) { 1657 pc->pc_actv = NULL; 1658 pc->pc_nactv = 0; 1659 pc->pc_prev = NULL; 1660 1661 pc->pc_nget = 0; 1662 pc->pc_nfail = 0; 1663 pc->pc_nput = 0; 1664 pc->pc_nlget = 0; 1665 pc->pc_nlfail = 0; 1666 pc->pc_nlput = 0; 1667 pc->pc_nout = 0; 1668 } 1669 1670 membar_producer(); 1671 1672 pp->pr_cache = cm; 1673 } 1674 1675 static inline void 1676 pool_cache_item_magic(struct pool *pp, struct pool_cache_item *ci) 1677 { 1678 unsigned long *entry = (unsigned long *)&ci->ci_nextl; 1679 1680 entry[0] = pp->pr_cache_magic[0] ^ (u_long)ci; 1681 entry[1] = pp->pr_cache_magic[1] ^ (u_long)ci->ci_next; 1682 } 1683 1684 static inline void 1685 pool_cache_item_magic_check(struct pool *pp, struct pool_cache_item *ci) 1686 { 1687 unsigned long *entry; 1688 unsigned long val; 1689 1690 entry = (unsigned long *)&ci->ci_nextl; 1691 val = pp->pr_cache_magic[0] ^ (u_long)ci; 1692 if (*entry != val) 1693 goto fail; 1694 1695 entry++; 1696 val = pp->pr_cache_magic[1] ^ (u_long)ci->ci_next; 1697 if (*entry != val) 1698 goto fail; 1699 1700 return; 1701 1702 fail: 1703 panic("%s: %s cpu free list modified: item addr %p+%zu 0x%lx!=0x%lx", 1704 __func__, pp->pr_wchan, ci, (caddr_t)entry - (caddr_t)ci, 1705 *entry, val); 1706 } 1707 1708 static inline void 1709 pool_list_enter(struct pool *pp) 1710 { 1711 if (mtx_enter_try(&pp->pr_cache_mtx) == 0) { 1712 mtx_enter(&pp->pr_cache_mtx); 1713 pp->pr_cache_contention++; 1714 } 1715 } 1716 1717 static inline void 1718 pool_list_leave(struct pool *pp) 1719 { 1720 mtx_leave(&pp->pr_cache_mtx); 1721 } 1722 1723 static inline struct pool_cache_item * 1724 pool_cache_list_alloc(struct pool *pp, struct pool_cache *pc) 1725 { 1726 struct pool_cache_item *pl; 1727 1728 pool_list_enter(pp); 1729 pl = TAILQ_FIRST(&pp->pr_cache_lists); 1730 if (pl != NULL) { 1731 TAILQ_REMOVE(&pp->pr_cache_lists, pl, ci_nextl); 1732 pp->pr_cache_nitems -= POOL_CACHE_ITEM_NITEMS(pl); 1733 1734 pool_cache_item_magic(pp, pl); 1735 1736 pc->pc_nlget++; 1737 } else 1738 pc->pc_nlfail++; 1739 1740 /* fold this cpus nout into the global while we have the lock */ 1741 pp->pr_cache_nout += pc->pc_nout; 1742 pc->pc_nout = 0; 1743 pool_list_leave(pp); 1744 1745 return (pl); 1746 } 1747 1748 static inline void 1749 pool_cache_list_free(struct pool *pp, struct pool_cache *pc, 1750 struct pool_cache_item *ci) 1751 { 1752 pool_list_enter(pp); 1753 if (TAILQ_EMPTY(&pp->pr_cache_lists)) 1754 pp->pr_cache_tick = ticks; 1755 1756 pp->pr_cache_nitems += POOL_CACHE_ITEM_NITEMS(ci); 1757 TAILQ_INSERT_TAIL(&pp->pr_cache_lists, ci, ci_nextl); 1758 1759 pc->pc_nlput++; 1760 1761 /* fold this cpus nout into the global while we have the lock */ 1762 pp->pr_cache_nout += pc->pc_nout; 1763 pc->pc_nout = 0; 1764 pool_list_leave(pp); 1765 } 1766 1767 static inline struct pool_cache * 1768 pool_cache_enter(struct pool *pp, int *s) 1769 { 1770 struct pool_cache *pc; 1771 1772 pc = cpumem_enter(pp->pr_cache); 1773 *s = splraise(pp->pr_ipl); 1774 pc->pc_gen++; 1775 1776 return (pc); 1777 } 1778 1779 static inline void 1780 pool_cache_leave(struct pool *pp, struct pool_cache *pc, int s) 1781 { 1782 pc->pc_gen++; 1783 splx(s); 1784 cpumem_leave(pp->pr_cache, pc); 1785 } 1786 1787 void * 1788 pool_cache_get(struct pool *pp) 1789 { 1790 struct pool_cache *pc; 1791 struct pool_cache_item *ci; 1792 int s; 1793 1794 pc = pool_cache_enter(pp, &s); 1795 1796 if (pc->pc_actv != NULL) { 1797 ci = pc->pc_actv; 1798 } else if (pc->pc_prev != NULL) { 1799 ci = pc->pc_prev; 1800 pc->pc_prev = NULL; 1801 } else if ((ci = pool_cache_list_alloc(pp, pc)) == NULL) { 1802 pc->pc_nfail++; 1803 goto done; 1804 } 1805 1806 pool_cache_item_magic_check(pp, ci); 1807 #ifdef DIAGNOSTIC 1808 if (pool_debug && POOL_CACHE_ITEM_POISONED(ci)) { 1809 size_t pidx; 1810 uint32_t pval; 1811 1812 if (poison_check(ci + 1, pp->pr_size - sizeof(*ci), 1813 &pidx, &pval)) { 1814 int *ip = (int *)(ci + 1); 1815 ip += pidx; 1816 1817 panic("%s: %s cpu free list modified: " 1818 "item addr %p+%zu 0x%x!=0x%x", 1819 __func__, pp->pr_wchan, ci, 1820 (caddr_t)ip - (caddr_t)ci, *ip, pval); 1821 } 1822 } 1823 #endif 1824 1825 pc->pc_actv = ci->ci_next; 1826 pc->pc_nactv = POOL_CACHE_ITEM_NITEMS(ci) - 1; 1827 pc->pc_nget++; 1828 pc->pc_nout++; 1829 1830 done: 1831 pool_cache_leave(pp, pc, s); 1832 1833 return (ci); 1834 } 1835 1836 void 1837 pool_cache_put(struct pool *pp, void *v) 1838 { 1839 struct pool_cache *pc; 1840 struct pool_cache_item *ci = v; 1841 unsigned long nitems; 1842 int s; 1843 #ifdef DIAGNOSTIC 1844 int poison = pool_debug && pp->pr_size > sizeof(*ci); 1845 1846 if (poison) 1847 poison_mem(ci + 1, pp->pr_size - sizeof(*ci)); 1848 #endif 1849 1850 pc = pool_cache_enter(pp, &s); 1851 1852 nitems = pc->pc_nactv; 1853 if (nitems >= pp->pr_cache_items) { 1854 if (pc->pc_prev != NULL) 1855 pool_cache_list_free(pp, pc, pc->pc_prev); 1856 1857 pc->pc_prev = pc->pc_actv; 1858 1859 pc->pc_actv = NULL; 1860 pc->pc_nactv = 0; 1861 nitems = 0; 1862 } 1863 1864 ci->ci_next = pc->pc_actv; 1865 ci->ci_nitems = ++nitems; 1866 #ifdef DIAGNOSTIC 1867 ci->ci_nitems |= poison ? POOL_CACHE_ITEM_NITEMS_POISON : 0; 1868 #endif 1869 pool_cache_item_magic(pp, ci); 1870 1871 pc->pc_actv = ci; 1872 pc->pc_nactv = nitems; 1873 1874 pc->pc_nput++; 1875 pc->pc_nout--; 1876 1877 pool_cache_leave(pp, pc, s); 1878 } 1879 1880 struct pool_cache_item * 1881 pool_cache_list_put(struct pool *pp, struct pool_cache_item *pl) 1882 { 1883 struct pool_cache_item *rpl, *next; 1884 1885 if (pl == NULL) 1886 return (NULL); 1887 1888 rpl = TAILQ_NEXT(pl, ci_nextl); 1889 1890 mtx_enter(&pp->pr_mtx); 1891 do { 1892 next = pl->ci_next; 1893 pool_do_put(pp, pl); 1894 pl = next; 1895 } while (pl != NULL); 1896 mtx_leave(&pp->pr_mtx); 1897 1898 return (rpl); 1899 } 1900 1901 void 1902 pool_cache_destroy(struct pool *pp) 1903 { 1904 struct pool_cache *pc; 1905 struct pool_cache_item *pl; 1906 struct cpumem_iter i; 1907 struct cpumem *cm; 1908 1909 rw_enter_write(&pool_lock); /* serialise with the gc */ 1910 cm = pp->pr_cache; 1911 pp->pr_cache = NULL; /* make pool_put avoid the cache */ 1912 rw_exit_write(&pool_lock); 1913 1914 CPUMEM_FOREACH(pc, &i, cm) { 1915 pool_cache_list_put(pp, pc->pc_actv); 1916 pool_cache_list_put(pp, pc->pc_prev); 1917 } 1918 1919 cpumem_put(&pool_caches, cm); 1920 1921 pl = TAILQ_FIRST(&pp->pr_cache_lists); 1922 while (pl != NULL) 1923 pl = pool_cache_list_put(pp, pl); 1924 } 1925 1926 void 1927 pool_cache_gc(struct pool *pp) 1928 { 1929 unsigned int contention; 1930 1931 if ((ticks - pp->pr_cache_tick) > (hz * pool_wait_gc) && 1932 !TAILQ_EMPTY(&pp->pr_cache_lists) && 1933 mtx_enter_try(&pp->pr_cache_mtx)) { 1934 struct pool_cache_item *pl = NULL; 1935 1936 pl = TAILQ_FIRST(&pp->pr_cache_lists); 1937 if (pl != NULL) { 1938 TAILQ_REMOVE(&pp->pr_cache_lists, pl, ci_nextl); 1939 pp->pr_cache_nitems -= POOL_CACHE_ITEM_NITEMS(pl); 1940 pp->pr_cache_tick = ticks; 1941 1942 pp->pr_cache_ngc++; 1943 } 1944 1945 mtx_leave(&pp->pr_cache_mtx); 1946 1947 pool_cache_list_put(pp, pl); 1948 } 1949 1950 /* 1951 * if there's a lot of contention on the pr_cache_mtx then consider 1952 * growing the length of the list to reduce the need to access the 1953 * global pool. 1954 */ 1955 1956 contention = pp->pr_cache_contention; 1957 if ((contention - pp->pr_cache_contention_prev) > 8 /* magic */) { 1958 if ((ncpusfound * 8 * 2) <= pp->pr_cache_nitems) 1959 pp->pr_cache_items += 8; 1960 } 1961 pp->pr_cache_contention_prev = contention; 1962 } 1963 1964 void 1965 pool_cache_pool_info(struct pool *pp, struct kinfo_pool *pi) 1966 { 1967 struct pool_cache *pc; 1968 struct cpumem_iter i; 1969 1970 if (pp->pr_cache == NULL) 1971 return; 1972 1973 /* loop through the caches twice to collect stats */ 1974 1975 /* once without the mtx so we can yield while reading nget/nput */ 1976 CPUMEM_FOREACH(pc, &i, pp->pr_cache) { 1977 uint64_t gen, nget, nput; 1978 1979 do { 1980 while ((gen = pc->pc_gen) & 1) 1981 yield(); 1982 1983 nget = pc->pc_nget; 1984 nput = pc->pc_nput; 1985 } while (gen != pc->pc_gen); 1986 1987 pi->pr_nget += nget; 1988 pi->pr_nput += nput; 1989 } 1990 1991 /* and once with the mtx so we can get consistent nout values */ 1992 mtx_enter(&pp->pr_cache_mtx); 1993 CPUMEM_FOREACH(pc, &i, pp->pr_cache) 1994 pi->pr_nout += pc->pc_nout; 1995 1996 pi->pr_nout += pp->pr_cache_nout; 1997 mtx_leave(&pp->pr_cache_mtx); 1998 } 1999 2000 int 2001 pool_cache_info(struct pool *pp, void *oldp, size_t *oldlenp) 2002 { 2003 struct kinfo_pool_cache kpc; 2004 2005 if (pp->pr_cache == NULL) 2006 return (EOPNOTSUPP); 2007 2008 memset(&kpc, 0, sizeof(kpc)); /* don't leak padding */ 2009 2010 mtx_enter(&pp->pr_cache_mtx); 2011 kpc.pr_ngc = pp->pr_cache_ngc; 2012 kpc.pr_len = pp->pr_cache_items; 2013 kpc.pr_nitems = pp->pr_cache_nitems; 2014 kpc.pr_contention = pp->pr_cache_contention; 2015 mtx_leave(&pp->pr_cache_mtx); 2016 2017 return (sysctl_rdstruct(oldp, oldlenp, NULL, &kpc, sizeof(kpc))); 2018 } 2019 2020 int 2021 pool_cache_cpus_info(struct pool *pp, void *oldp, size_t *oldlenp) 2022 { 2023 struct pool_cache *pc; 2024 struct kinfo_pool_cache_cpu *kpcc, *info; 2025 unsigned int cpu = 0; 2026 struct cpumem_iter i; 2027 int error = 0; 2028 size_t len; 2029 2030 if (pp->pr_cache == NULL) 2031 return (EOPNOTSUPP); 2032 if (*oldlenp % sizeof(*kpcc)) 2033 return (EINVAL); 2034 2035 kpcc = mallocarray(ncpusfound, sizeof(*kpcc), M_TEMP, 2036 M_WAITOK|M_CANFAIL|M_ZERO); 2037 if (kpcc == NULL) 2038 return (EIO); 2039 2040 len = ncpusfound * sizeof(*kpcc); 2041 2042 CPUMEM_FOREACH(pc, &i, pp->pr_cache) { 2043 uint64_t gen; 2044 2045 if (cpu >= ncpusfound) { 2046 error = EIO; 2047 goto err; 2048 } 2049 2050 info = &kpcc[cpu]; 2051 info->pr_cpu = cpu; 2052 2053 do { 2054 while ((gen = pc->pc_gen) & 1) 2055 yield(); 2056 2057 info->pr_nget = pc->pc_nget; 2058 info->pr_nfail = pc->pc_nfail; 2059 info->pr_nput = pc->pc_nput; 2060 info->pr_nlget = pc->pc_nlget; 2061 info->pr_nlfail = pc->pc_nlfail; 2062 info->pr_nlput = pc->pc_nlput; 2063 } while (gen != pc->pc_gen); 2064 2065 cpu++; 2066 } 2067 2068 error = sysctl_rdstruct(oldp, oldlenp, NULL, kpcc, len); 2069 err: 2070 free(kpcc, M_TEMP, len); 2071 2072 return (error); 2073 } 2074 #else /* MULTIPROCESSOR */ 2075 void 2076 pool_cache_init(struct pool *pp) 2077 { 2078 /* nop */ 2079 } 2080 2081 void 2082 pool_cache_pool_info(struct pool *pp, struct kinfo_pool *pi) 2083 { 2084 /* nop */ 2085 } 2086 2087 int 2088 pool_cache_info(struct pool *pp, void *oldp, size_t *oldlenp) 2089 { 2090 return (EOPNOTSUPP); 2091 } 2092 2093 int 2094 pool_cache_cpus_info(struct pool *pp, void *oldp, size_t *oldlenp) 2095 { 2096 return (EOPNOTSUPP); 2097 } 2098 #endif /* MULTIPROCESSOR */ 2099