1 /* $OpenBSD: subr_pool.c,v 1.204 2016/11/21 01:44:06 dlg Exp $ */ 2 /* $NetBSD: subr_pool.c,v 1.61 2001/09/26 07:14:56 chs Exp $ */ 3 4 /*- 5 * Copyright (c) 1997, 1999, 2000 The NetBSD Foundation, Inc. 6 * All rights reserved. 7 * 8 * This code is derived from software contributed to The NetBSD Foundation 9 * by Paul Kranenburg; by Jason R. Thorpe of the Numerical Aerospace 10 * Simulation Facility, NASA Ames Research Center. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 23 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 24 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 25 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31 * POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include <sys/param.h> 35 #include <sys/systm.h> 36 #include <sys/errno.h> 37 #include <sys/kernel.h> 38 #include <sys/malloc.h> 39 #include <sys/pool.h> 40 #include <sys/syslog.h> 41 #include <sys/rwlock.h> 42 #include <sys/sysctl.h> 43 #include <sys/task.h> 44 #include <sys/timeout.h> 45 #include <sys/percpu.h> 46 47 #include <uvm/uvm_extern.h> 48 49 /* 50 * Pool resource management utility. 51 * 52 * Memory is allocated in pages which are split into pieces according to 53 * the pool item size. Each page is kept on one of three lists in the 54 * pool structure: `pr_emptypages', `pr_fullpages' and `pr_partpages', 55 * for empty, full and partially-full pages respectively. The individual 56 * pool items are on a linked list headed by `ph_items' in each page 57 * header. The memory for building the page list is either taken from 58 * the allocated pages themselves (for small pool items) or taken from 59 * an internal pool of page headers (`phpool'). 60 */ 61 62 /* List of all pools */ 63 SIMPLEQ_HEAD(,pool) pool_head = SIMPLEQ_HEAD_INITIALIZER(pool_head); 64 65 /* 66 * Every pool gets a unique serial number assigned to it. If this counter 67 * wraps, we're screwed, but we shouldn't create so many pools anyway. 68 */ 69 unsigned int pool_serial; 70 unsigned int pool_count; 71 72 /* Lock the previous variables making up the global pool state */ 73 struct rwlock pool_lock = RWLOCK_INITIALIZER("pools"); 74 75 /* Private pool for page header structures */ 76 struct pool phpool; 77 78 struct pool_item { 79 u_long pi_magic; 80 XSIMPLEQ_ENTRY(pool_item) pi_list; 81 }; 82 #define POOL_IMAGIC(ph, pi) ((u_long)(pi) ^ (ph)->ph_magic) 83 84 struct pool_page_header { 85 /* Page headers */ 86 TAILQ_ENTRY(pool_page_header) 87 ph_entry; /* pool page list */ 88 XSIMPLEQ_HEAD(, pool_item) 89 ph_items; /* free items on the page */ 90 RBT_ENTRY(pool_page_header) 91 ph_node; /* off-page page headers */ 92 unsigned int ph_nmissing; /* # of chunks in use */ 93 caddr_t ph_page; /* this page's address */ 94 caddr_t ph_colored; /* page's colored address */ 95 unsigned long ph_magic; 96 int ph_tick; 97 }; 98 #define POOL_MAGICBIT (1 << 3) /* keep away from perturbed low bits */ 99 #define POOL_PHPOISON(ph) ISSET((ph)->ph_magic, POOL_MAGICBIT) 100 101 #ifdef MULTIPROCESSOR 102 struct pool_cache_item { 103 struct pool_cache_item *ci_next; /* next item in list */ 104 unsigned long ci_nitems; /* number of items in list */ 105 TAILQ_ENTRY(pool_cache_item) 106 ci_nextl; /* entry in list of lists */ 107 }; 108 109 /* we store whether the cached item is poisoned in the high bit of nitems */ 110 #define POOL_CACHE_ITEM_NITEMS_MASK 0x7ffffffUL 111 #define POOL_CACHE_ITEM_NITEMS_POISON 0x8000000UL 112 113 #define POOL_CACHE_ITEM_NITEMS(_ci) \ 114 ((_ci)->ci_nitems & POOL_CACHE_ITEM_NITEMS_MASK) 115 116 #define POOL_CACHE_ITEM_POISONED(_ci) \ 117 ISSET((_ci)->ci_nitems, POOL_CACHE_ITEM_NITEMS_POISON) 118 119 struct pool_cache { 120 struct pool_cache_item *pc_actv; /* active list of items */ 121 unsigned long pc_nactv; /* actv head nitems cache */ 122 struct pool_cache_item *pc_prev; /* previous list of items */ 123 124 uint64_t pc_gen; /* generation number */ 125 uint64_t pc_gets; 126 uint64_t pc_puts; 127 uint64_t pc_fails; 128 129 int pc_nout; 130 }; 131 132 void *pool_cache_get(struct pool *); 133 void pool_cache_put(struct pool *, void *); 134 void pool_cache_destroy(struct pool *); 135 #endif 136 void pool_cache_info(struct pool *, struct kinfo_pool *); 137 138 #ifdef POOL_DEBUG 139 int pool_debug = 1; 140 #else 141 int pool_debug = 0; 142 #endif 143 144 #define POOL_INPGHDR(pp) ((pp)->pr_phoffset != 0) 145 146 struct pool_page_header * 147 pool_p_alloc(struct pool *, int, int *); 148 void pool_p_insert(struct pool *, struct pool_page_header *); 149 void pool_p_remove(struct pool *, struct pool_page_header *); 150 void pool_p_free(struct pool *, struct pool_page_header *); 151 152 void pool_update_curpage(struct pool *); 153 void *pool_do_get(struct pool *, int, int *); 154 int pool_chk_page(struct pool *, struct pool_page_header *, int); 155 int pool_chk(struct pool *); 156 void pool_get_done(void *, void *); 157 void pool_runqueue(struct pool *, int); 158 159 void *pool_allocator_alloc(struct pool *, int, int *); 160 void pool_allocator_free(struct pool *, void *); 161 162 /* 163 * The default pool allocator. 164 */ 165 void *pool_page_alloc(struct pool *, int, int *); 166 void pool_page_free(struct pool *, void *); 167 168 /* 169 * safe for interrupts; this is the default allocator 170 */ 171 struct pool_allocator pool_allocator_single = { 172 pool_page_alloc, 173 pool_page_free, 174 POOL_ALLOC_SIZE(PAGE_SIZE, POOL_ALLOC_ALIGNED) 175 }; 176 177 void *pool_multi_alloc(struct pool *, int, int *); 178 void pool_multi_free(struct pool *, void *); 179 180 struct pool_allocator pool_allocator_multi = { 181 pool_multi_alloc, 182 pool_multi_free, 183 POOL_ALLOC_SIZES(PAGE_SIZE, (1UL << 31), POOL_ALLOC_ALIGNED) 184 }; 185 186 void *pool_multi_alloc_ni(struct pool *, int, int *); 187 void pool_multi_free_ni(struct pool *, void *); 188 189 struct pool_allocator pool_allocator_multi_ni = { 190 pool_multi_alloc_ni, 191 pool_multi_free_ni, 192 POOL_ALLOC_SIZES(PAGE_SIZE, (1UL << 31), POOL_ALLOC_ALIGNED) 193 }; 194 195 #ifdef DDB 196 void pool_print_pagelist(struct pool_pagelist *, int (*)(const char *, ...) 197 __attribute__((__format__(__kprintf__,1,2)))); 198 void pool_print1(struct pool *, const char *, int (*)(const char *, ...) 199 __attribute__((__format__(__kprintf__,1,2)))); 200 #endif 201 202 /* stale page garbage collectors */ 203 void pool_gc_sched(void *); 204 struct timeout pool_gc_tick = TIMEOUT_INITIALIZER(pool_gc_sched, NULL); 205 void pool_gc_pages(void *); 206 struct task pool_gc_task = TASK_INITIALIZER(pool_gc_pages, NULL); 207 int pool_wait_free = 1; 208 int pool_wait_gc = 8; 209 210 RBT_PROTOTYPE(phtree, pool_page_header, ph_node, phtree_compare); 211 212 static inline int 213 phtree_compare(const struct pool_page_header *a, 214 const struct pool_page_header *b) 215 { 216 vaddr_t va = (vaddr_t)a->ph_page; 217 vaddr_t vb = (vaddr_t)b->ph_page; 218 219 /* the compares in this order are important for the NFIND to work */ 220 if (vb < va) 221 return (-1); 222 if (vb > va) 223 return (1); 224 225 return (0); 226 } 227 228 RBT_GENERATE(phtree, pool_page_header, ph_node, phtree_compare); 229 230 /* 231 * Return the pool page header based on page address. 232 */ 233 static inline struct pool_page_header * 234 pr_find_pagehead(struct pool *pp, void *v) 235 { 236 struct pool_page_header *ph, key; 237 238 if (POOL_INPGHDR(pp)) { 239 caddr_t page; 240 241 page = (caddr_t)((vaddr_t)v & pp->pr_pgmask); 242 243 return ((struct pool_page_header *)(page + pp->pr_phoffset)); 244 } 245 246 key.ph_page = v; 247 ph = RBT_NFIND(phtree, &pp->pr_phtree, &key); 248 if (ph == NULL) 249 panic("%s: %s: page header missing", __func__, pp->pr_wchan); 250 251 KASSERT(ph->ph_page <= (caddr_t)v); 252 if (ph->ph_page + pp->pr_pgsize <= (caddr_t)v) 253 panic("%s: %s: incorrect page", __func__, pp->pr_wchan); 254 255 return (ph); 256 } 257 258 /* 259 * Initialize the given pool resource structure. 260 * 261 * We export this routine to allow other kernel parts to declare 262 * static pools that must be initialized before malloc() is available. 263 */ 264 void 265 pool_init(struct pool *pp, size_t size, u_int align, int ipl, int flags, 266 const char *wchan, struct pool_allocator *palloc) 267 { 268 int off = 0, space; 269 unsigned int pgsize = PAGE_SIZE, items; 270 size_t pa_pagesz; 271 #ifdef DIAGNOSTIC 272 struct pool *iter; 273 #endif 274 275 if (align == 0) 276 align = ALIGN(1); 277 278 if (size < sizeof(struct pool_item)) 279 size = sizeof(struct pool_item); 280 281 size = roundup(size, align); 282 283 while (size * 8 > pgsize) 284 pgsize <<= 1; 285 286 if (palloc == NULL) { 287 if (pgsize > PAGE_SIZE) { 288 palloc = ISSET(flags, PR_WAITOK) ? 289 &pool_allocator_multi_ni : &pool_allocator_multi; 290 } else 291 palloc = &pool_allocator_single; 292 293 pa_pagesz = palloc->pa_pagesz; 294 } else { 295 size_t pgsizes; 296 297 pa_pagesz = palloc->pa_pagesz; 298 if (pa_pagesz == 0) 299 pa_pagesz = POOL_ALLOC_DEFAULT; 300 301 pgsizes = pa_pagesz & ~POOL_ALLOC_ALIGNED; 302 303 /* make sure the allocator can fit at least one item */ 304 if (size > pgsizes) { 305 panic("%s: pool %s item size 0x%zx > " 306 "allocator %p sizes 0x%zx", __func__, wchan, 307 size, palloc, pgsizes); 308 } 309 310 /* shrink pgsize until it fits into the range */ 311 while (!ISSET(pgsizes, pgsize)) 312 pgsize >>= 1; 313 } 314 KASSERT(ISSET(pa_pagesz, pgsize)); 315 316 items = pgsize / size; 317 318 /* 319 * Decide whether to put the page header off page to avoid 320 * wasting too large a part of the page. Off-page page headers 321 * go into an RB tree, so we can match a returned item with 322 * its header based on the page address. 323 */ 324 if (ISSET(pa_pagesz, POOL_ALLOC_ALIGNED)) { 325 if (pgsize - (size * items) > 326 sizeof(struct pool_page_header)) { 327 off = pgsize - sizeof(struct pool_page_header); 328 } else if (sizeof(struct pool_page_header) * 2 >= size) { 329 off = pgsize - sizeof(struct pool_page_header); 330 items = off / size; 331 } 332 } 333 334 KASSERT(items > 0); 335 336 /* 337 * Initialize the pool structure. 338 */ 339 memset(pp, 0, sizeof(*pp)); 340 TAILQ_INIT(&pp->pr_emptypages); 341 TAILQ_INIT(&pp->pr_fullpages); 342 TAILQ_INIT(&pp->pr_partpages); 343 pp->pr_curpage = NULL; 344 pp->pr_npages = 0; 345 pp->pr_minitems = 0; 346 pp->pr_minpages = 0; 347 pp->pr_maxpages = 8; 348 pp->pr_size = size; 349 pp->pr_pgsize = pgsize; 350 pp->pr_pgmask = ~0UL ^ (pgsize - 1); 351 pp->pr_phoffset = off; 352 pp->pr_itemsperpage = items; 353 pp->pr_wchan = wchan; 354 pp->pr_alloc = palloc; 355 pp->pr_nitems = 0; 356 pp->pr_nout = 0; 357 pp->pr_hardlimit = UINT_MAX; 358 pp->pr_hardlimit_warning = NULL; 359 pp->pr_hardlimit_ratecap.tv_sec = 0; 360 pp->pr_hardlimit_ratecap.tv_usec = 0; 361 pp->pr_hardlimit_warning_last.tv_sec = 0; 362 pp->pr_hardlimit_warning_last.tv_usec = 0; 363 RBT_INIT(phtree, &pp->pr_phtree); 364 365 /* 366 * Use the space between the chunks and the page header 367 * for cache coloring. 368 */ 369 space = POOL_INPGHDR(pp) ? pp->pr_phoffset : pp->pr_pgsize; 370 space -= pp->pr_itemsperpage * pp->pr_size; 371 pp->pr_align = align; 372 pp->pr_maxcolors = (space / align) + 1; 373 374 pp->pr_nget = 0; 375 pp->pr_nfail = 0; 376 pp->pr_nput = 0; 377 pp->pr_npagealloc = 0; 378 pp->pr_npagefree = 0; 379 pp->pr_hiwat = 0; 380 pp->pr_nidle = 0; 381 382 pp->pr_ipl = ipl; 383 mtx_init(&pp->pr_mtx, pp->pr_ipl); 384 mtx_init(&pp->pr_requests_mtx, pp->pr_ipl); 385 TAILQ_INIT(&pp->pr_requests); 386 387 if (phpool.pr_size == 0) { 388 pool_init(&phpool, sizeof(struct pool_page_header), 0, 389 IPL_HIGH, 0, "phpool", NULL); 390 391 /* make sure phpool wont "recurse" */ 392 KASSERT(POOL_INPGHDR(&phpool)); 393 } 394 395 /* pglistalloc/constraint parameters */ 396 pp->pr_crange = &kp_dirty; 397 398 /* Insert this into the list of all pools. */ 399 rw_enter_write(&pool_lock); 400 #ifdef DIAGNOSTIC 401 SIMPLEQ_FOREACH(iter, &pool_head, pr_poollist) { 402 if (iter == pp) 403 panic("%s: pool %s already on list", __func__, wchan); 404 } 405 #endif 406 407 pp->pr_serial = ++pool_serial; 408 if (pool_serial == 0) 409 panic("%s: too much uptime", __func__); 410 411 SIMPLEQ_INSERT_HEAD(&pool_head, pp, pr_poollist); 412 pool_count++; 413 rw_exit_write(&pool_lock); 414 } 415 416 /* 417 * Decommission a pool resource. 418 */ 419 void 420 pool_destroy(struct pool *pp) 421 { 422 struct pool_page_header *ph; 423 struct pool *prev, *iter; 424 425 #ifdef MULTIPROCESSOR 426 if (pp->pr_cache != NULL) 427 pool_cache_destroy(pp); 428 #endif 429 430 #ifdef DIAGNOSTIC 431 if (pp->pr_nout != 0) 432 panic("%s: pool busy: still out: %u", __func__, pp->pr_nout); 433 #endif 434 435 /* Remove from global pool list */ 436 rw_enter_write(&pool_lock); 437 pool_count--; 438 if (pp == SIMPLEQ_FIRST(&pool_head)) 439 SIMPLEQ_REMOVE_HEAD(&pool_head, pr_poollist); 440 else { 441 prev = SIMPLEQ_FIRST(&pool_head); 442 SIMPLEQ_FOREACH(iter, &pool_head, pr_poollist) { 443 if (iter == pp) { 444 SIMPLEQ_REMOVE_AFTER(&pool_head, prev, 445 pr_poollist); 446 break; 447 } 448 prev = iter; 449 } 450 } 451 rw_exit_write(&pool_lock); 452 453 /* Remove all pages */ 454 while ((ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL) { 455 mtx_enter(&pp->pr_mtx); 456 pool_p_remove(pp, ph); 457 mtx_leave(&pp->pr_mtx); 458 pool_p_free(pp, ph); 459 } 460 KASSERT(TAILQ_EMPTY(&pp->pr_fullpages)); 461 KASSERT(TAILQ_EMPTY(&pp->pr_partpages)); 462 } 463 464 void 465 pool_request_init(struct pool_request *pr, 466 void (*handler)(void *, void *), void *cookie) 467 { 468 pr->pr_handler = handler; 469 pr->pr_cookie = cookie; 470 pr->pr_item = NULL; 471 } 472 473 void 474 pool_request(struct pool *pp, struct pool_request *pr) 475 { 476 mtx_enter(&pp->pr_requests_mtx); 477 TAILQ_INSERT_TAIL(&pp->pr_requests, pr, pr_entry); 478 pool_runqueue(pp, PR_NOWAIT); 479 mtx_leave(&pp->pr_requests_mtx); 480 } 481 482 struct pool_get_memory { 483 struct mutex mtx; 484 void * volatile v; 485 }; 486 487 /* 488 * Grab an item from the pool. 489 */ 490 void * 491 pool_get(struct pool *pp, int flags) 492 { 493 void *v = NULL; 494 int slowdown = 0; 495 496 #ifdef MULTIPROCESSOR 497 if (pp->pr_cache != NULL) { 498 v = pool_cache_get(pp); 499 if (v != NULL) 500 goto good; 501 } 502 #endif 503 504 KASSERT(flags & (PR_WAITOK | PR_NOWAIT)); 505 506 mtx_enter(&pp->pr_mtx); 507 if (pp->pr_nout >= pp->pr_hardlimit) { 508 if (ISSET(flags, PR_NOWAIT|PR_LIMITFAIL)) 509 goto fail; 510 } else if ((v = pool_do_get(pp, flags, &slowdown)) == NULL) { 511 if (ISSET(flags, PR_NOWAIT)) 512 goto fail; 513 } 514 mtx_leave(&pp->pr_mtx); 515 516 if (slowdown && ISSET(flags, PR_WAITOK)) 517 yield(); 518 519 if (v == NULL) { 520 struct pool_get_memory mem = { 521 MUTEX_INITIALIZER(pp->pr_ipl), 522 NULL }; 523 struct pool_request pr; 524 525 pool_request_init(&pr, pool_get_done, &mem); 526 pool_request(pp, &pr); 527 528 mtx_enter(&mem.mtx); 529 while (mem.v == NULL) 530 msleep(&mem, &mem.mtx, PSWP, pp->pr_wchan, 0); 531 mtx_leave(&mem.mtx); 532 533 v = mem.v; 534 } 535 536 #ifdef MULTIPROCESSOR 537 good: 538 #endif 539 if (ISSET(flags, PR_ZERO)) 540 memset(v, 0, pp->pr_size); 541 542 return (v); 543 544 fail: 545 pp->pr_nfail++; 546 mtx_leave(&pp->pr_mtx); 547 return (NULL); 548 } 549 550 void 551 pool_get_done(void *xmem, void *v) 552 { 553 struct pool_get_memory *mem = xmem; 554 555 mtx_enter(&mem->mtx); 556 mem->v = v; 557 mtx_leave(&mem->mtx); 558 559 wakeup_one(mem); 560 } 561 562 void 563 pool_runqueue(struct pool *pp, int flags) 564 { 565 struct pool_requests prl = TAILQ_HEAD_INITIALIZER(prl); 566 struct pool_request *pr; 567 568 MUTEX_ASSERT_UNLOCKED(&pp->pr_mtx); 569 MUTEX_ASSERT_LOCKED(&pp->pr_requests_mtx); 570 571 if (pp->pr_requesting++) 572 return; 573 574 do { 575 pp->pr_requesting = 1; 576 577 /* no TAILQ_JOIN? :( */ 578 while ((pr = TAILQ_FIRST(&pp->pr_requests)) != NULL) { 579 TAILQ_REMOVE(&pp->pr_requests, pr, pr_entry); 580 TAILQ_INSERT_TAIL(&prl, pr, pr_entry); 581 } 582 if (TAILQ_EMPTY(&prl)) 583 continue; 584 585 mtx_leave(&pp->pr_requests_mtx); 586 587 mtx_enter(&pp->pr_mtx); 588 pr = TAILQ_FIRST(&prl); 589 while (pr != NULL) { 590 int slowdown = 0; 591 592 if (pp->pr_nout >= pp->pr_hardlimit) 593 break; 594 595 pr->pr_item = pool_do_get(pp, flags, &slowdown); 596 if (pr->pr_item == NULL) /* || slowdown ? */ 597 break; 598 599 pr = TAILQ_NEXT(pr, pr_entry); 600 } 601 mtx_leave(&pp->pr_mtx); 602 603 while ((pr = TAILQ_FIRST(&prl)) != NULL && 604 pr->pr_item != NULL) { 605 TAILQ_REMOVE(&prl, pr, pr_entry); 606 (*pr->pr_handler)(pr->pr_cookie, pr->pr_item); 607 } 608 609 mtx_enter(&pp->pr_requests_mtx); 610 } while (--pp->pr_requesting); 611 612 /* no TAILQ_JOIN :( */ 613 while ((pr = TAILQ_FIRST(&prl)) != NULL) { 614 TAILQ_REMOVE(&prl, pr, pr_entry); 615 TAILQ_INSERT_TAIL(&pp->pr_requests, pr, pr_entry); 616 } 617 } 618 619 void * 620 pool_do_get(struct pool *pp, int flags, int *slowdown) 621 { 622 struct pool_item *pi; 623 struct pool_page_header *ph; 624 625 MUTEX_ASSERT_LOCKED(&pp->pr_mtx); 626 627 splassert(pp->pr_ipl); 628 629 /* 630 * Account for this item now to avoid races if we need to give up 631 * pr_mtx to allocate a page. 632 */ 633 pp->pr_nout++; 634 635 if (pp->pr_curpage == NULL) { 636 mtx_leave(&pp->pr_mtx); 637 ph = pool_p_alloc(pp, flags, slowdown); 638 mtx_enter(&pp->pr_mtx); 639 640 if (ph == NULL) { 641 pp->pr_nout--; 642 return (NULL); 643 } 644 645 pool_p_insert(pp, ph); 646 } 647 648 ph = pp->pr_curpage; 649 pi = XSIMPLEQ_FIRST(&ph->ph_items); 650 if (__predict_false(pi == NULL)) 651 panic("%s: %s: page empty", __func__, pp->pr_wchan); 652 653 if (__predict_false(pi->pi_magic != POOL_IMAGIC(ph, pi))) { 654 panic("%s: %s free list modified: " 655 "page %p; item addr %p; offset 0x%x=0x%lx != 0x%lx", 656 __func__, pp->pr_wchan, ph->ph_page, pi, 657 0, pi->pi_magic, POOL_IMAGIC(ph, pi)); 658 } 659 660 XSIMPLEQ_REMOVE_HEAD(&ph->ph_items, pi_list); 661 662 #ifdef DIAGNOSTIC 663 if (pool_debug && POOL_PHPOISON(ph)) { 664 size_t pidx; 665 uint32_t pval; 666 if (poison_check(pi + 1, pp->pr_size - sizeof(*pi), 667 &pidx, &pval)) { 668 int *ip = (int *)(pi + 1); 669 panic("%s: %s free list modified: " 670 "page %p; item addr %p; offset 0x%zx=0x%x", 671 __func__, pp->pr_wchan, ph->ph_page, pi, 672 pidx * sizeof(int), ip[pidx]); 673 } 674 } 675 #endif /* DIAGNOSTIC */ 676 677 if (ph->ph_nmissing++ == 0) { 678 /* 679 * This page was previously empty. Move it to the list of 680 * partially-full pages. This page is already curpage. 681 */ 682 TAILQ_REMOVE(&pp->pr_emptypages, ph, ph_entry); 683 TAILQ_INSERT_TAIL(&pp->pr_partpages, ph, ph_entry); 684 685 pp->pr_nidle--; 686 } 687 688 if (ph->ph_nmissing == pp->pr_itemsperpage) { 689 /* 690 * This page is now full. Move it to the full list 691 * and select a new current page. 692 */ 693 TAILQ_REMOVE(&pp->pr_partpages, ph, ph_entry); 694 TAILQ_INSERT_TAIL(&pp->pr_fullpages, ph, ph_entry); 695 pool_update_curpage(pp); 696 } 697 698 pp->pr_nget++; 699 700 return (pi); 701 } 702 703 /* 704 * Return resource to the pool. 705 */ 706 void 707 pool_put(struct pool *pp, void *v) 708 { 709 struct pool_item *pi = v; 710 struct pool_page_header *ph, *freeph = NULL; 711 712 #ifdef DIAGNOSTIC 713 if (v == NULL) 714 panic("%s: NULL item", __func__); 715 #endif 716 717 #ifdef MULTIPROCESSOR 718 if (pp->pr_cache != NULL && TAILQ_EMPTY(&pp->pr_requests)) { 719 pool_cache_put(pp, v); 720 return; 721 } 722 #endif 723 724 mtx_enter(&pp->pr_mtx); 725 726 splassert(pp->pr_ipl); 727 728 ph = pr_find_pagehead(pp, v); 729 730 #ifdef DIAGNOSTIC 731 if (pool_debug) { 732 struct pool_item *qi; 733 XSIMPLEQ_FOREACH(qi, &ph->ph_items, pi_list) { 734 if (pi == qi) { 735 panic("%s: %s: double pool_put: %p", __func__, 736 pp->pr_wchan, pi); 737 } 738 } 739 } 740 #endif /* DIAGNOSTIC */ 741 742 pi->pi_magic = POOL_IMAGIC(ph, pi); 743 XSIMPLEQ_INSERT_HEAD(&ph->ph_items, pi, pi_list); 744 #ifdef DIAGNOSTIC 745 if (POOL_PHPOISON(ph)) 746 poison_mem(pi + 1, pp->pr_size - sizeof(*pi)); 747 #endif /* DIAGNOSTIC */ 748 749 if (ph->ph_nmissing-- == pp->pr_itemsperpage) { 750 /* 751 * The page was previously completely full, move it to the 752 * partially-full list. 753 */ 754 TAILQ_REMOVE(&pp->pr_fullpages, ph, ph_entry); 755 TAILQ_INSERT_TAIL(&pp->pr_partpages, ph, ph_entry); 756 } 757 758 if (ph->ph_nmissing == 0) { 759 /* 760 * The page is now empty, so move it to the empty page list. 761 */ 762 pp->pr_nidle++; 763 764 ph->ph_tick = ticks; 765 TAILQ_REMOVE(&pp->pr_partpages, ph, ph_entry); 766 TAILQ_INSERT_TAIL(&pp->pr_emptypages, ph, ph_entry); 767 pool_update_curpage(pp); 768 } 769 770 pp->pr_nout--; 771 pp->pr_nput++; 772 773 /* is it time to free a page? */ 774 if (pp->pr_nidle > pp->pr_maxpages && 775 (ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL && 776 (ticks - ph->ph_tick) > (hz * pool_wait_free)) { 777 freeph = ph; 778 pool_p_remove(pp, freeph); 779 } 780 mtx_leave(&pp->pr_mtx); 781 782 if (freeph != NULL) 783 pool_p_free(pp, freeph); 784 785 if (!TAILQ_EMPTY(&pp->pr_requests)) { 786 mtx_enter(&pp->pr_requests_mtx); 787 pool_runqueue(pp, PR_NOWAIT); 788 mtx_leave(&pp->pr_requests_mtx); 789 } 790 } 791 792 /* 793 * Add N items to the pool. 794 */ 795 int 796 pool_prime(struct pool *pp, int n) 797 { 798 struct pool_pagelist pl = TAILQ_HEAD_INITIALIZER(pl); 799 struct pool_page_header *ph; 800 int newpages; 801 802 newpages = roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 803 804 while (newpages-- > 0) { 805 int slowdown = 0; 806 807 ph = pool_p_alloc(pp, PR_NOWAIT, &slowdown); 808 if (ph == NULL) /* or slowdown? */ 809 break; 810 811 TAILQ_INSERT_TAIL(&pl, ph, ph_entry); 812 } 813 814 mtx_enter(&pp->pr_mtx); 815 while ((ph = TAILQ_FIRST(&pl)) != NULL) { 816 TAILQ_REMOVE(&pl, ph, ph_entry); 817 pool_p_insert(pp, ph); 818 } 819 mtx_leave(&pp->pr_mtx); 820 821 return (0); 822 } 823 824 struct pool_page_header * 825 pool_p_alloc(struct pool *pp, int flags, int *slowdown) 826 { 827 struct pool_page_header *ph; 828 struct pool_item *pi; 829 caddr_t addr; 830 int n; 831 832 MUTEX_ASSERT_UNLOCKED(&pp->pr_mtx); 833 KASSERT(pp->pr_size >= sizeof(*pi)); 834 835 addr = pool_allocator_alloc(pp, flags, slowdown); 836 if (addr == NULL) 837 return (NULL); 838 839 if (POOL_INPGHDR(pp)) 840 ph = (struct pool_page_header *)(addr + pp->pr_phoffset); 841 else { 842 ph = pool_get(&phpool, flags); 843 if (ph == NULL) { 844 pool_allocator_free(pp, addr); 845 return (NULL); 846 } 847 } 848 849 XSIMPLEQ_INIT(&ph->ph_items); 850 ph->ph_page = addr; 851 addr += pp->pr_align * (pp->pr_npagealloc % pp->pr_maxcolors); 852 ph->ph_colored = addr; 853 ph->ph_nmissing = 0; 854 arc4random_buf(&ph->ph_magic, sizeof(ph->ph_magic)); 855 #ifdef DIAGNOSTIC 856 /* use a bit in ph_magic to record if we poison page items */ 857 if (pool_debug) 858 SET(ph->ph_magic, POOL_MAGICBIT); 859 else 860 CLR(ph->ph_magic, POOL_MAGICBIT); 861 #endif /* DIAGNOSTIC */ 862 863 n = pp->pr_itemsperpage; 864 while (n--) { 865 pi = (struct pool_item *)addr; 866 pi->pi_magic = POOL_IMAGIC(ph, pi); 867 XSIMPLEQ_INSERT_TAIL(&ph->ph_items, pi, pi_list); 868 869 #ifdef DIAGNOSTIC 870 if (POOL_PHPOISON(ph)) 871 poison_mem(pi + 1, pp->pr_size - sizeof(*pi)); 872 #endif /* DIAGNOSTIC */ 873 874 addr += pp->pr_size; 875 } 876 877 return (ph); 878 } 879 880 void 881 pool_p_free(struct pool *pp, struct pool_page_header *ph) 882 { 883 struct pool_item *pi; 884 885 MUTEX_ASSERT_UNLOCKED(&pp->pr_mtx); 886 KASSERT(ph->ph_nmissing == 0); 887 888 XSIMPLEQ_FOREACH(pi, &ph->ph_items, pi_list) { 889 if (__predict_false(pi->pi_magic != POOL_IMAGIC(ph, pi))) { 890 panic("%s: %s free list modified: " 891 "page %p; item addr %p; offset 0x%x=0x%lx", 892 __func__, pp->pr_wchan, ph->ph_page, pi, 893 0, pi->pi_magic); 894 } 895 896 #ifdef DIAGNOSTIC 897 if (POOL_PHPOISON(ph)) { 898 size_t pidx; 899 uint32_t pval; 900 if (poison_check(pi + 1, pp->pr_size - sizeof(*pi), 901 &pidx, &pval)) { 902 int *ip = (int *)(pi + 1); 903 panic("%s: %s free list modified: " 904 "page %p; item addr %p; offset 0x%zx=0x%x", 905 __func__, pp->pr_wchan, ph->ph_page, pi, 906 pidx * sizeof(int), ip[pidx]); 907 } 908 } 909 #endif 910 } 911 912 pool_allocator_free(pp, ph->ph_page); 913 914 if (!POOL_INPGHDR(pp)) 915 pool_put(&phpool, ph); 916 } 917 918 void 919 pool_p_insert(struct pool *pp, struct pool_page_header *ph) 920 { 921 MUTEX_ASSERT_LOCKED(&pp->pr_mtx); 922 923 /* If the pool was depleted, point at the new page */ 924 if (pp->pr_curpage == NULL) 925 pp->pr_curpage = ph; 926 927 TAILQ_INSERT_TAIL(&pp->pr_emptypages, ph, ph_entry); 928 if (!POOL_INPGHDR(pp)) 929 RBT_INSERT(phtree, &pp->pr_phtree, ph); 930 931 pp->pr_nitems += pp->pr_itemsperpage; 932 pp->pr_nidle++; 933 934 pp->pr_npagealloc++; 935 if (++pp->pr_npages > pp->pr_hiwat) 936 pp->pr_hiwat = pp->pr_npages; 937 } 938 939 void 940 pool_p_remove(struct pool *pp, struct pool_page_header *ph) 941 { 942 MUTEX_ASSERT_LOCKED(&pp->pr_mtx); 943 944 pp->pr_npagefree++; 945 pp->pr_npages--; 946 pp->pr_nidle--; 947 pp->pr_nitems -= pp->pr_itemsperpage; 948 949 if (!POOL_INPGHDR(pp)) 950 RBT_REMOVE(phtree, &pp->pr_phtree, ph); 951 TAILQ_REMOVE(&pp->pr_emptypages, ph, ph_entry); 952 953 pool_update_curpage(pp); 954 } 955 956 void 957 pool_update_curpage(struct pool *pp) 958 { 959 pp->pr_curpage = TAILQ_LAST(&pp->pr_partpages, pool_pagelist); 960 if (pp->pr_curpage == NULL) { 961 pp->pr_curpage = TAILQ_LAST(&pp->pr_emptypages, pool_pagelist); 962 } 963 } 964 965 void 966 pool_setlowat(struct pool *pp, int n) 967 { 968 int prime = 0; 969 970 mtx_enter(&pp->pr_mtx); 971 pp->pr_minitems = n; 972 pp->pr_minpages = (n == 0) 973 ? 0 974 : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 975 976 if (pp->pr_nitems < n) 977 prime = n - pp->pr_nitems; 978 mtx_leave(&pp->pr_mtx); 979 980 if (prime > 0) 981 pool_prime(pp, prime); 982 } 983 984 void 985 pool_sethiwat(struct pool *pp, int n) 986 { 987 pp->pr_maxpages = (n == 0) 988 ? 0 989 : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 990 } 991 992 int 993 pool_sethardlimit(struct pool *pp, u_int n, const char *warnmsg, int ratecap) 994 { 995 int error = 0; 996 997 if (n < pp->pr_nout) { 998 error = EINVAL; 999 goto done; 1000 } 1001 1002 pp->pr_hardlimit = n; 1003 pp->pr_hardlimit_warning = warnmsg; 1004 pp->pr_hardlimit_ratecap.tv_sec = ratecap; 1005 pp->pr_hardlimit_warning_last.tv_sec = 0; 1006 pp->pr_hardlimit_warning_last.tv_usec = 0; 1007 1008 done: 1009 return (error); 1010 } 1011 1012 void 1013 pool_set_constraints(struct pool *pp, const struct kmem_pa_mode *mode) 1014 { 1015 pp->pr_crange = mode; 1016 } 1017 1018 /* 1019 * Release all complete pages that have not been used recently. 1020 * 1021 * Returns non-zero if any pages have been reclaimed. 1022 */ 1023 int 1024 pool_reclaim(struct pool *pp) 1025 { 1026 struct pool_page_header *ph, *phnext; 1027 struct pool_pagelist pl = TAILQ_HEAD_INITIALIZER(pl); 1028 1029 mtx_enter(&pp->pr_mtx); 1030 for (ph = TAILQ_FIRST(&pp->pr_emptypages); ph != NULL; ph = phnext) { 1031 phnext = TAILQ_NEXT(ph, ph_entry); 1032 1033 /* Check our minimum page claim */ 1034 if (pp->pr_npages <= pp->pr_minpages) 1035 break; 1036 1037 /* 1038 * If freeing this page would put us below 1039 * the low water mark, stop now. 1040 */ 1041 if ((pp->pr_nitems - pp->pr_itemsperpage) < 1042 pp->pr_minitems) 1043 break; 1044 1045 pool_p_remove(pp, ph); 1046 TAILQ_INSERT_TAIL(&pl, ph, ph_entry); 1047 } 1048 mtx_leave(&pp->pr_mtx); 1049 1050 if (TAILQ_EMPTY(&pl)) 1051 return (0); 1052 1053 while ((ph = TAILQ_FIRST(&pl)) != NULL) { 1054 TAILQ_REMOVE(&pl, ph, ph_entry); 1055 pool_p_free(pp, ph); 1056 } 1057 1058 return (1); 1059 } 1060 1061 /* 1062 * Release all complete pages that have not been used recently 1063 * from all pools. 1064 */ 1065 void 1066 pool_reclaim_all(void) 1067 { 1068 struct pool *pp; 1069 1070 rw_enter_read(&pool_lock); 1071 SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist) 1072 pool_reclaim(pp); 1073 rw_exit_read(&pool_lock); 1074 } 1075 1076 #ifdef DDB 1077 #include <machine/db_machdep.h> 1078 #include <ddb/db_output.h> 1079 1080 /* 1081 * Diagnostic helpers. 1082 */ 1083 void 1084 pool_printit(struct pool *pp, const char *modif, 1085 int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2)))) 1086 { 1087 pool_print1(pp, modif, pr); 1088 } 1089 1090 void 1091 pool_print_pagelist(struct pool_pagelist *pl, 1092 int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2)))) 1093 { 1094 struct pool_page_header *ph; 1095 struct pool_item *pi; 1096 1097 TAILQ_FOREACH(ph, pl, ph_entry) { 1098 (*pr)("\t\tpage %p, color %p, nmissing %d\n", 1099 ph->ph_page, ph->ph_colored, ph->ph_nmissing); 1100 XSIMPLEQ_FOREACH(pi, &ph->ph_items, pi_list) { 1101 if (pi->pi_magic != POOL_IMAGIC(ph, pi)) { 1102 (*pr)("\t\t\titem %p, magic 0x%lx\n", 1103 pi, pi->pi_magic); 1104 } 1105 } 1106 } 1107 } 1108 1109 void 1110 pool_print1(struct pool *pp, const char *modif, 1111 int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2)))) 1112 { 1113 struct pool_page_header *ph; 1114 int print_pagelist = 0; 1115 char c; 1116 1117 while ((c = *modif++) != '\0') { 1118 if (c == 'p') 1119 print_pagelist = 1; 1120 modif++; 1121 } 1122 1123 (*pr)("POOL %s: size %u maxcolors %u\n", pp->pr_wchan, pp->pr_size, 1124 pp->pr_maxcolors); 1125 (*pr)("\talloc %p\n", pp->pr_alloc); 1126 (*pr)("\tminitems %u, minpages %u, maxpages %u, npages %u\n", 1127 pp->pr_minitems, pp->pr_minpages, pp->pr_maxpages, pp->pr_npages); 1128 (*pr)("\titemsperpage %u, nitems %u, nout %u, hardlimit %u\n", 1129 pp->pr_itemsperpage, pp->pr_nitems, pp->pr_nout, pp->pr_hardlimit); 1130 1131 (*pr)("\n\tnget %lu, nfail %lu, nput %lu\n", 1132 pp->pr_nget, pp->pr_nfail, pp->pr_nput); 1133 (*pr)("\tnpagealloc %lu, npagefree %lu, hiwat %u, nidle %lu\n", 1134 pp->pr_npagealloc, pp->pr_npagefree, pp->pr_hiwat, pp->pr_nidle); 1135 1136 if (print_pagelist == 0) 1137 return; 1138 1139 if ((ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL) 1140 (*pr)("\n\tempty page list:\n"); 1141 pool_print_pagelist(&pp->pr_emptypages, pr); 1142 if ((ph = TAILQ_FIRST(&pp->pr_fullpages)) != NULL) 1143 (*pr)("\n\tfull page list:\n"); 1144 pool_print_pagelist(&pp->pr_fullpages, pr); 1145 if ((ph = TAILQ_FIRST(&pp->pr_partpages)) != NULL) 1146 (*pr)("\n\tpartial-page list:\n"); 1147 pool_print_pagelist(&pp->pr_partpages, pr); 1148 1149 if (pp->pr_curpage == NULL) 1150 (*pr)("\tno current page\n"); 1151 else 1152 (*pr)("\tcurpage %p\n", pp->pr_curpage->ph_page); 1153 } 1154 1155 void 1156 db_show_all_pools(db_expr_t expr, int haddr, db_expr_t count, char *modif) 1157 { 1158 struct pool *pp; 1159 char maxp[16]; 1160 int ovflw; 1161 char mode; 1162 1163 mode = modif[0]; 1164 if (mode != '\0' && mode != 'a') { 1165 db_printf("usage: show all pools [/a]\n"); 1166 return; 1167 } 1168 1169 if (mode == '\0') 1170 db_printf("%-10s%4s%9s%5s%9s%6s%6s%6s%6s%6s%6s%5s\n", 1171 "Name", 1172 "Size", 1173 "Requests", 1174 "Fail", 1175 "Releases", 1176 "Pgreq", 1177 "Pgrel", 1178 "Npage", 1179 "Hiwat", 1180 "Minpg", 1181 "Maxpg", 1182 "Idle"); 1183 else 1184 db_printf("%-12s %18s %18s\n", 1185 "Name", "Address", "Allocator"); 1186 1187 SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist) { 1188 if (mode == 'a') { 1189 db_printf("%-12s %18p %18p\n", pp->pr_wchan, pp, 1190 pp->pr_alloc); 1191 continue; 1192 } 1193 1194 if (!pp->pr_nget) 1195 continue; 1196 1197 if (pp->pr_maxpages == UINT_MAX) 1198 snprintf(maxp, sizeof maxp, "inf"); 1199 else 1200 snprintf(maxp, sizeof maxp, "%u", pp->pr_maxpages); 1201 1202 #define PRWORD(ovflw, fmt, width, fixed, val) do { \ 1203 (ovflw) += db_printf((fmt), \ 1204 (width) - (fixed) - (ovflw) > 0 ? \ 1205 (width) - (fixed) - (ovflw) : 0, \ 1206 (val)) - (width); \ 1207 if ((ovflw) < 0) \ 1208 (ovflw) = 0; \ 1209 } while (/* CONSTCOND */0) 1210 1211 ovflw = 0; 1212 PRWORD(ovflw, "%-*s", 10, 0, pp->pr_wchan); 1213 PRWORD(ovflw, " %*u", 4, 1, pp->pr_size); 1214 PRWORD(ovflw, " %*lu", 9, 1, pp->pr_nget); 1215 PRWORD(ovflw, " %*lu", 5, 1, pp->pr_nfail); 1216 PRWORD(ovflw, " %*lu", 9, 1, pp->pr_nput); 1217 PRWORD(ovflw, " %*lu", 6, 1, pp->pr_npagealloc); 1218 PRWORD(ovflw, " %*lu", 6, 1, pp->pr_npagefree); 1219 PRWORD(ovflw, " %*d", 6, 1, pp->pr_npages); 1220 PRWORD(ovflw, " %*d", 6, 1, pp->pr_hiwat); 1221 PRWORD(ovflw, " %*d", 6, 1, pp->pr_minpages); 1222 PRWORD(ovflw, " %*s", 6, 1, maxp); 1223 PRWORD(ovflw, " %*lu\n", 5, 1, pp->pr_nidle); 1224 1225 pool_chk(pp); 1226 } 1227 } 1228 #endif /* DDB */ 1229 1230 #if defined(POOL_DEBUG) || defined(DDB) 1231 int 1232 pool_chk_page(struct pool *pp, struct pool_page_header *ph, int expected) 1233 { 1234 struct pool_item *pi; 1235 caddr_t page; 1236 int n; 1237 const char *label = pp->pr_wchan; 1238 1239 page = (caddr_t)((u_long)ph & pp->pr_pgmask); 1240 if (page != ph->ph_page && POOL_INPGHDR(pp)) { 1241 printf("%s: ", label); 1242 printf("pool(%p:%s): page inconsistency: page %p; " 1243 "at page head addr %p (p %p)\n", 1244 pp, pp->pr_wchan, ph->ph_page, ph, page); 1245 return 1; 1246 } 1247 1248 for (pi = XSIMPLEQ_FIRST(&ph->ph_items), n = 0; 1249 pi != NULL; 1250 pi = XSIMPLEQ_NEXT(&ph->ph_items, pi, pi_list), n++) { 1251 if ((caddr_t)pi < ph->ph_page || 1252 (caddr_t)pi >= ph->ph_page + pp->pr_pgsize) { 1253 printf("%s: ", label); 1254 printf("pool(%p:%s): page inconsistency: page %p;" 1255 " item ordinal %d; addr %p\n", pp, 1256 pp->pr_wchan, ph->ph_page, n, pi); 1257 return (1); 1258 } 1259 1260 if (pi->pi_magic != POOL_IMAGIC(ph, pi)) { 1261 printf("%s: ", label); 1262 printf("pool(%p:%s): free list modified: " 1263 "page %p; item ordinal %d; addr %p " 1264 "(p %p); offset 0x%x=0x%lx\n", 1265 pp, pp->pr_wchan, ph->ph_page, n, pi, page, 1266 0, pi->pi_magic); 1267 } 1268 1269 #ifdef DIAGNOSTIC 1270 if (POOL_PHPOISON(ph)) { 1271 size_t pidx; 1272 uint32_t pval; 1273 if (poison_check(pi + 1, pp->pr_size - sizeof(*pi), 1274 &pidx, &pval)) { 1275 int *ip = (int *)(pi + 1); 1276 printf("pool(%s): free list modified: " 1277 "page %p; item ordinal %d; addr %p " 1278 "(p %p); offset 0x%zx=0x%x\n", 1279 pp->pr_wchan, ph->ph_page, n, pi, 1280 page, pidx * sizeof(int), ip[pidx]); 1281 } 1282 } 1283 #endif /* DIAGNOSTIC */ 1284 } 1285 if (n + ph->ph_nmissing != pp->pr_itemsperpage) { 1286 printf("pool(%p:%s): page inconsistency: page %p;" 1287 " %d on list, %d missing, %d items per page\n", pp, 1288 pp->pr_wchan, ph->ph_page, n, ph->ph_nmissing, 1289 pp->pr_itemsperpage); 1290 return 1; 1291 } 1292 if (expected >= 0 && n != expected) { 1293 printf("pool(%p:%s): page inconsistency: page %p;" 1294 " %d on list, %d missing, %d expected\n", pp, 1295 pp->pr_wchan, ph->ph_page, n, ph->ph_nmissing, 1296 expected); 1297 return 1; 1298 } 1299 return 0; 1300 } 1301 1302 int 1303 pool_chk(struct pool *pp) 1304 { 1305 struct pool_page_header *ph; 1306 int r = 0; 1307 1308 TAILQ_FOREACH(ph, &pp->pr_emptypages, ph_entry) 1309 r += pool_chk_page(pp, ph, pp->pr_itemsperpage); 1310 TAILQ_FOREACH(ph, &pp->pr_fullpages, ph_entry) 1311 r += pool_chk_page(pp, ph, 0); 1312 TAILQ_FOREACH(ph, &pp->pr_partpages, ph_entry) 1313 r += pool_chk_page(pp, ph, -1); 1314 1315 return (r); 1316 } 1317 #endif /* defined(POOL_DEBUG) || defined(DDB) */ 1318 1319 #ifdef DDB 1320 void 1321 pool_walk(struct pool *pp, int full, 1322 int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2))), 1323 void (*func)(void *, int, int (*)(const char *, ...) 1324 __attribute__((__format__(__kprintf__,1,2))))) 1325 { 1326 struct pool_page_header *ph; 1327 struct pool_item *pi; 1328 caddr_t cp; 1329 int n; 1330 1331 TAILQ_FOREACH(ph, &pp->pr_fullpages, ph_entry) { 1332 cp = ph->ph_colored; 1333 n = ph->ph_nmissing; 1334 1335 while (n--) { 1336 func(cp, full, pr); 1337 cp += pp->pr_size; 1338 } 1339 } 1340 1341 TAILQ_FOREACH(ph, &pp->pr_partpages, ph_entry) { 1342 cp = ph->ph_colored; 1343 n = ph->ph_nmissing; 1344 1345 do { 1346 XSIMPLEQ_FOREACH(pi, &ph->ph_items, pi_list) { 1347 if (cp == (caddr_t)pi) 1348 break; 1349 } 1350 if (cp != (caddr_t)pi) { 1351 func(cp, full, pr); 1352 n--; 1353 } 1354 1355 cp += pp->pr_size; 1356 } while (n > 0); 1357 } 1358 } 1359 #endif 1360 1361 /* 1362 * We have three different sysctls. 1363 * kern.pool.npools - the number of pools. 1364 * kern.pool.pool.<pool#> - the pool struct for the pool#. 1365 * kern.pool.name.<pool#> - the name for pool#. 1366 */ 1367 int 1368 sysctl_dopool(int *name, u_int namelen, char *oldp, size_t *oldlenp) 1369 { 1370 struct kinfo_pool pi; 1371 struct pool *pp; 1372 int rv = ENOENT; 1373 1374 switch (name[0]) { 1375 case KERN_POOL_NPOOLS: 1376 if (namelen != 1) 1377 return (ENOTDIR); 1378 return (sysctl_rdint(oldp, oldlenp, NULL, pool_count)); 1379 1380 case KERN_POOL_NAME: 1381 case KERN_POOL_POOL: 1382 break; 1383 default: 1384 return (EOPNOTSUPP); 1385 } 1386 1387 if (namelen != 2) 1388 return (ENOTDIR); 1389 1390 rw_enter_read(&pool_lock); 1391 1392 SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist) { 1393 if (name[1] == pp->pr_serial) 1394 break; 1395 } 1396 1397 if (pp == NULL) 1398 goto done; 1399 1400 switch (name[0]) { 1401 case KERN_POOL_NAME: 1402 rv = sysctl_rdstring(oldp, oldlenp, NULL, pp->pr_wchan); 1403 break; 1404 case KERN_POOL_POOL: 1405 memset(&pi, 0, sizeof(pi)); 1406 1407 mtx_enter(&pp->pr_mtx); 1408 pi.pr_size = pp->pr_size; 1409 pi.pr_pgsize = pp->pr_pgsize; 1410 pi.pr_itemsperpage = pp->pr_itemsperpage; 1411 pi.pr_npages = pp->pr_npages; 1412 pi.pr_minpages = pp->pr_minpages; 1413 pi.pr_maxpages = pp->pr_maxpages; 1414 pi.pr_hardlimit = pp->pr_hardlimit; 1415 pi.pr_nout = pp->pr_nout; 1416 pi.pr_nitems = pp->pr_nitems; 1417 pi.pr_nget = pp->pr_nget; 1418 pi.pr_nput = pp->pr_nput; 1419 pi.pr_nfail = pp->pr_nfail; 1420 pi.pr_npagealloc = pp->pr_npagealloc; 1421 pi.pr_npagefree = pp->pr_npagefree; 1422 pi.pr_hiwat = pp->pr_hiwat; 1423 pi.pr_nidle = pp->pr_nidle; 1424 mtx_leave(&pp->pr_mtx); 1425 1426 pool_cache_info(pp, &pi); 1427 1428 rv = sysctl_rdstruct(oldp, oldlenp, NULL, &pi, sizeof(pi)); 1429 break; 1430 } 1431 1432 done: 1433 rw_exit_read(&pool_lock); 1434 1435 return (rv); 1436 } 1437 1438 void 1439 pool_gc_sched(void *null) 1440 { 1441 task_add(systqmp, &pool_gc_task); 1442 } 1443 1444 void 1445 pool_gc_pages(void *null) 1446 { 1447 struct pool *pp; 1448 struct pool_page_header *ph, *freeph; 1449 int s; 1450 1451 rw_enter_read(&pool_lock); 1452 s = splvm(); /* XXX go to splvm until all pools _setipl properly */ 1453 SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist) { 1454 if (pp->pr_nidle <= pp->pr_minpages || /* guess */ 1455 !mtx_enter_try(&pp->pr_mtx)) /* try */ 1456 continue; 1457 1458 /* is it time to free a page? */ 1459 if (pp->pr_nidle > pp->pr_minpages && 1460 (ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL && 1461 (ticks - ph->ph_tick) > (hz * pool_wait_gc)) { 1462 freeph = ph; 1463 pool_p_remove(pp, freeph); 1464 } else 1465 freeph = NULL; 1466 1467 mtx_leave(&pp->pr_mtx); 1468 1469 if (freeph != NULL) 1470 pool_p_free(pp, freeph); 1471 } 1472 splx(s); 1473 rw_exit_read(&pool_lock); 1474 1475 timeout_add_sec(&pool_gc_tick, 1); 1476 } 1477 1478 /* 1479 * Pool backend allocators. 1480 */ 1481 1482 void * 1483 pool_allocator_alloc(struct pool *pp, int flags, int *slowdown) 1484 { 1485 void *v; 1486 1487 v = (*pp->pr_alloc->pa_alloc)(pp, flags, slowdown); 1488 1489 #ifdef DIAGNOSTIC 1490 if (v != NULL && POOL_INPGHDR(pp)) { 1491 vaddr_t addr = (vaddr_t)v; 1492 if ((addr & pp->pr_pgmask) != addr) { 1493 panic("%s: %s page address %p isnt aligned to %u", 1494 __func__, pp->pr_wchan, v, pp->pr_pgsize); 1495 } 1496 } 1497 #endif 1498 1499 return (v); 1500 } 1501 1502 void 1503 pool_allocator_free(struct pool *pp, void *v) 1504 { 1505 struct pool_allocator *pa = pp->pr_alloc; 1506 1507 (*pa->pa_free)(pp, v); 1508 } 1509 1510 void * 1511 pool_page_alloc(struct pool *pp, int flags, int *slowdown) 1512 { 1513 struct kmem_dyn_mode kd = KMEM_DYN_INITIALIZER; 1514 1515 kd.kd_waitok = ISSET(flags, PR_WAITOK); 1516 kd.kd_slowdown = slowdown; 1517 1518 return (km_alloc(pp->pr_pgsize, &kv_page, pp->pr_crange, &kd)); 1519 } 1520 1521 void 1522 pool_page_free(struct pool *pp, void *v) 1523 { 1524 km_free(v, pp->pr_pgsize, &kv_page, pp->pr_crange); 1525 } 1526 1527 void * 1528 pool_multi_alloc(struct pool *pp, int flags, int *slowdown) 1529 { 1530 struct kmem_va_mode kv = kv_intrsafe; 1531 struct kmem_dyn_mode kd = KMEM_DYN_INITIALIZER; 1532 void *v; 1533 int s; 1534 1535 if (POOL_INPGHDR(pp)) 1536 kv.kv_align = pp->pr_pgsize; 1537 1538 kd.kd_waitok = ISSET(flags, PR_WAITOK); 1539 kd.kd_slowdown = slowdown; 1540 1541 s = splvm(); 1542 v = km_alloc(pp->pr_pgsize, &kv, pp->pr_crange, &kd); 1543 splx(s); 1544 1545 return (v); 1546 } 1547 1548 void 1549 pool_multi_free(struct pool *pp, void *v) 1550 { 1551 struct kmem_va_mode kv = kv_intrsafe; 1552 int s; 1553 1554 if (POOL_INPGHDR(pp)) 1555 kv.kv_align = pp->pr_pgsize; 1556 1557 s = splvm(); 1558 km_free(v, pp->pr_pgsize, &kv, pp->pr_crange); 1559 splx(s); 1560 } 1561 1562 void * 1563 pool_multi_alloc_ni(struct pool *pp, int flags, int *slowdown) 1564 { 1565 struct kmem_va_mode kv = kv_any; 1566 struct kmem_dyn_mode kd = KMEM_DYN_INITIALIZER; 1567 void *v; 1568 1569 if (POOL_INPGHDR(pp)) 1570 kv.kv_align = pp->pr_pgsize; 1571 1572 kd.kd_waitok = ISSET(flags, PR_WAITOK); 1573 kd.kd_slowdown = slowdown; 1574 1575 KERNEL_LOCK(); 1576 v = km_alloc(pp->pr_pgsize, &kv, pp->pr_crange, &kd); 1577 KERNEL_UNLOCK(); 1578 1579 return (v); 1580 } 1581 1582 void 1583 pool_multi_free_ni(struct pool *pp, void *v) 1584 { 1585 struct kmem_va_mode kv = kv_any; 1586 1587 if (POOL_INPGHDR(pp)) 1588 kv.kv_align = pp->pr_pgsize; 1589 1590 KERNEL_LOCK(); 1591 km_free(v, pp->pr_pgsize, &kv, pp->pr_crange); 1592 KERNEL_UNLOCK(); 1593 } 1594 1595 #ifdef MULTIPROCESSOR 1596 1597 struct pool pool_caches; /* per cpu cache entries */ 1598 1599 void 1600 pool_cache_init(struct pool *pp) 1601 { 1602 struct cpumem *cm; 1603 struct pool_cache *pc; 1604 struct cpumem_iter i; 1605 1606 if (pool_caches.pr_size == 0) { 1607 pool_init(&pool_caches, sizeof(struct pool_cache), 64, 1608 IPL_NONE, PR_WAITOK, "plcache", NULL); 1609 } 1610 1611 KASSERT(pp->pr_size >= sizeof(*pc)); 1612 1613 cm = cpumem_get(&pool_caches); 1614 1615 mtx_init(&pp->pr_cache_mtx, pp->pr_ipl); 1616 arc4random_buf(pp->pr_cache_magic, sizeof(pp->pr_cache_magic)); 1617 TAILQ_INIT(&pp->pr_cache_lists); 1618 pp->pr_cache_nlist = 0; 1619 pp->pr_cache_items = 8; 1620 pp->pr_cache_contention = 0; 1621 1622 CPUMEM_FOREACH(pc, &i, cm) { 1623 pc->pc_actv = NULL; 1624 pc->pc_nactv = 0; 1625 pc->pc_prev = NULL; 1626 1627 pc->pc_gets = 0; 1628 pc->pc_puts = 0; 1629 pc->pc_fails = 0; 1630 pc->pc_nout = 0; 1631 } 1632 1633 pp->pr_cache = cm; 1634 } 1635 1636 static inline void 1637 pool_cache_item_magic(struct pool *pp, struct pool_cache_item *ci) 1638 { 1639 unsigned long *entry = (unsigned long *)&ci->ci_nextl; 1640 1641 entry[0] = pp->pr_cache_magic[0] ^ (u_long)ci; 1642 entry[1] = pp->pr_cache_magic[1] ^ (u_long)ci->ci_next; 1643 } 1644 1645 static inline void 1646 pool_cache_item_magic_check(struct pool *pp, struct pool_cache_item *ci) 1647 { 1648 unsigned long *entry; 1649 unsigned long val; 1650 1651 entry = (unsigned long *)&ci->ci_nextl; 1652 val = pp->pr_cache_magic[0] ^ (u_long)ci; 1653 if (*entry != val) 1654 goto fail; 1655 1656 entry++; 1657 val = pp->pr_cache_magic[1] ^ (u_long)ci->ci_next; 1658 if (*entry != val) 1659 goto fail; 1660 1661 return; 1662 1663 fail: 1664 panic("%s: %s cpu free list modified: item addr %p+%zu 0x%lx!=0x%lx", 1665 __func__, pp->pr_wchan, ci, (caddr_t)entry - (caddr_t)ci, 1666 *entry, val); 1667 } 1668 1669 static inline void 1670 pool_list_enter(struct pool *pp) 1671 { 1672 if (mtx_enter_try(&pp->pr_cache_mtx) == 0) { 1673 mtx_enter(&pp->pr_cache_mtx); 1674 pp->pr_cache_contention++; 1675 } 1676 } 1677 1678 static inline void 1679 pool_list_leave(struct pool *pp) 1680 { 1681 mtx_leave(&pp->pr_cache_mtx); 1682 } 1683 1684 static inline struct pool_cache_item * 1685 pool_cache_list_alloc(struct pool *pp, struct pool_cache *pc) 1686 { 1687 struct pool_cache_item *pl; 1688 1689 pool_list_enter(pp); 1690 pl = TAILQ_FIRST(&pp->pr_cache_lists); 1691 if (pl != NULL) { 1692 TAILQ_REMOVE(&pp->pr_cache_lists, pl, ci_nextl); 1693 pp->pr_cache_nlist--; 1694 1695 pool_cache_item_magic(pp, pl); 1696 } 1697 1698 /* fold this cpus nout into the global while we have the lock */ 1699 pp->pr_cache_nout += pc->pc_nout; 1700 pc->pc_nout = 0; 1701 pool_list_leave(pp); 1702 1703 return (pl); 1704 } 1705 1706 static inline void 1707 pool_cache_list_free(struct pool *pp, struct pool_cache *pc, 1708 struct pool_cache_item *ci) 1709 { 1710 pool_list_enter(pp); 1711 TAILQ_INSERT_TAIL(&pp->pr_cache_lists, ci, ci_nextl); 1712 pp->pr_cache_nlist++; 1713 1714 /* fold this cpus nout into the global while we have the lock */ 1715 pp->pr_cache_nout += pc->pc_nout; 1716 pc->pc_nout = 0; 1717 pool_list_leave(pp); 1718 } 1719 1720 static inline struct pool_cache * 1721 pool_cache_enter(struct pool *pp, int *s) 1722 { 1723 struct pool_cache *pc; 1724 1725 pc = cpumem_enter(pp->pr_cache); 1726 *s = splraise(pp->pr_ipl); 1727 pc->pc_gen++; 1728 1729 return (pc); 1730 } 1731 1732 static inline void 1733 pool_cache_leave(struct pool *pp, struct pool_cache *pc, int s) 1734 { 1735 pc->pc_gen++; 1736 splx(s); 1737 cpumem_leave(pp->pr_cache, pc); 1738 } 1739 1740 void * 1741 pool_cache_get(struct pool *pp) 1742 { 1743 struct pool_cache *pc; 1744 struct pool_cache_item *ci; 1745 int s; 1746 1747 pc = pool_cache_enter(pp, &s); 1748 1749 if (pc->pc_actv != NULL) { 1750 ci = pc->pc_actv; 1751 } else if (pc->pc_prev != NULL) { 1752 ci = pc->pc_prev; 1753 pc->pc_prev = NULL; 1754 } else if ((ci = pool_cache_list_alloc(pp, pc)) == NULL) { 1755 pc->pc_fails++; 1756 goto done; 1757 } 1758 1759 pool_cache_item_magic_check(pp, ci); 1760 #ifdef DIAGNOSTIC 1761 if (pool_debug && POOL_CACHE_ITEM_POISONED(ci)) { 1762 size_t pidx; 1763 uint32_t pval; 1764 1765 if (poison_check(ci + 1, pp->pr_size - sizeof(*ci), 1766 &pidx, &pval)) { 1767 int *ip = (int *)(ci + 1); 1768 ip += pidx; 1769 1770 panic("%s: %s cpu free list modified: " 1771 "item addr %p+%zu 0x%x!=0x%x", 1772 __func__, pp->pr_wchan, ci, 1773 (caddr_t)ip - (caddr_t)ci, *ip, pval); 1774 } 1775 } 1776 #endif 1777 1778 pc->pc_actv = ci->ci_next; 1779 pc->pc_nactv = POOL_CACHE_ITEM_NITEMS(ci) - 1; 1780 pc->pc_gets++; 1781 pc->pc_nout++; 1782 1783 done: 1784 pool_cache_leave(pp, pc, s); 1785 1786 return (ci); 1787 } 1788 1789 void 1790 pool_cache_put(struct pool *pp, void *v) 1791 { 1792 struct pool_cache *pc; 1793 struct pool_cache_item *ci = v; 1794 unsigned long nitems; 1795 int s; 1796 #ifdef DIAGNOSTIC 1797 int poison = pool_debug && pp->pr_size > sizeof(*ci); 1798 1799 if (poison) 1800 poison_mem(ci + 1, pp->pr_size - sizeof(*ci)); 1801 #endif 1802 1803 pc = pool_cache_enter(pp, &s); 1804 1805 nitems = pc->pc_nactv; 1806 if (nitems >= pp->pr_cache_items) { 1807 if (pc->pc_prev != NULL) 1808 pool_cache_list_free(pp, pc, pc->pc_prev); 1809 1810 pc->pc_prev = pc->pc_actv; 1811 1812 pc->pc_actv = NULL; 1813 pc->pc_nactv = 0; 1814 nitems = 0; 1815 } 1816 1817 ci->ci_next = pc->pc_actv; 1818 ci->ci_nitems = ++nitems; 1819 #ifdef DIAGNOSTIC 1820 ci->ci_nitems |= poison ? POOL_CACHE_ITEM_NITEMS_POISON : 0; 1821 #endif 1822 pool_cache_item_magic(pp, ci); 1823 1824 pc->pc_actv = ci; 1825 pc->pc_nactv = nitems; 1826 1827 pc->pc_puts++; 1828 pc->pc_nout--; 1829 1830 pool_cache_leave(pp, pc, s); 1831 } 1832 1833 struct pool_cache_item * 1834 pool_cache_list_put(struct pool *pp, struct pool_cache_item *pl) 1835 { 1836 struct pool_cache_item *rpl, *next; 1837 1838 if (pl == NULL) 1839 return (NULL); 1840 1841 rpl = TAILQ_NEXT(pl, ci_nextl); 1842 1843 do { 1844 next = pl->ci_next; 1845 pool_put(pp, pl); 1846 pl = next; 1847 } while (pl != NULL); 1848 1849 return (rpl); 1850 } 1851 1852 void 1853 pool_cache_destroy(struct pool *pp) 1854 { 1855 struct pool_cache *pc; 1856 struct pool_cache_item *pl; 1857 struct cpumem_iter i; 1858 struct cpumem *cm; 1859 1860 cm = pp->pr_cache; 1861 pp->pr_cache = NULL; /* make pool_put avoid the cache */ 1862 1863 CPUMEM_FOREACH(pc, &i, cm) { 1864 pool_cache_list_put(pp, pc->pc_actv); 1865 pool_cache_list_put(pp, pc->pc_prev); 1866 } 1867 1868 cpumem_put(&pool_caches, cm); 1869 1870 pl = TAILQ_FIRST(&pp->pr_cache_lists); 1871 while (pl != NULL) 1872 pl = pool_cache_list_put(pp, pl); 1873 } 1874 1875 void 1876 pool_cache_info(struct pool *pp, struct kinfo_pool *pi) 1877 { 1878 struct pool_cache *pc; 1879 struct cpumem_iter i; 1880 1881 if (pp->pr_cache == NULL) 1882 return; 1883 1884 /* loop through the caches twice to collect stats */ 1885 1886 /* once without the mtx so we can yield while reading nget/nput */ 1887 CPUMEM_FOREACH(pc, &i, pp->pr_cache) { 1888 uint64_t gen, nget, nput; 1889 1890 do { 1891 while ((gen = pc->pc_gen) & 1) 1892 yield(); 1893 1894 nget = pc->pc_gets; 1895 nput = pc->pc_puts; 1896 } while (gen != pc->pc_gen); 1897 1898 pi->pr_nget += nget; 1899 pi->pr_nput += nput; 1900 } 1901 1902 /* and once with the mtx so we can get consistent nout values */ 1903 mtx_enter(&pp->pr_cache_mtx); 1904 CPUMEM_FOREACH(pc, &i, pp->pr_cache) 1905 pi->pr_nout += pc->pc_nout; 1906 1907 pi->pr_nout += pp->pr_cache_nout; 1908 mtx_leave(&pp->pr_cache_mtx); 1909 } 1910 #else /* MULTIPROCESSOR */ 1911 void 1912 pool_cache_init(struct pool *pp) 1913 { 1914 /* nop */ 1915 } 1916 1917 void 1918 pool_cache_info(struct pool *pp, struct kinfo_pool *pi) 1919 { 1920 /* nop */ 1921 } 1922 #endif /* MULTIPROCESSOR */ 1923