1 /* $OpenBSD: subr_pool.c,v 1.206 2017/02/08 05:28:30 dlg Exp $ */ 2 /* $NetBSD: subr_pool.c,v 1.61 2001/09/26 07:14:56 chs Exp $ */ 3 4 /*- 5 * Copyright (c) 1997, 1999, 2000 The NetBSD Foundation, Inc. 6 * All rights reserved. 7 * 8 * This code is derived from software contributed to The NetBSD Foundation 9 * by Paul Kranenburg; by Jason R. Thorpe of the Numerical Aerospace 10 * Simulation Facility, NASA Ames Research Center. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 23 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 24 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 25 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31 * POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include <sys/param.h> 35 #include <sys/systm.h> 36 #include <sys/errno.h> 37 #include <sys/kernel.h> 38 #include <sys/malloc.h> 39 #include <sys/pool.h> 40 #include <sys/syslog.h> 41 #include <sys/rwlock.h> 42 #include <sys/sysctl.h> 43 #include <sys/task.h> 44 #include <sys/timeout.h> 45 #include <sys/percpu.h> 46 47 #include <uvm/uvm_extern.h> 48 49 /* 50 * Pool resource management utility. 51 * 52 * Memory is allocated in pages which are split into pieces according to 53 * the pool item size. Each page is kept on one of three lists in the 54 * pool structure: `pr_emptypages', `pr_fullpages' and `pr_partpages', 55 * for empty, full and partially-full pages respectively. The individual 56 * pool items are on a linked list headed by `ph_items' in each page 57 * header. The memory for building the page list is either taken from 58 * the allocated pages themselves (for small pool items) or taken from 59 * an internal pool of page headers (`phpool'). 60 */ 61 62 /* List of all pools */ 63 SIMPLEQ_HEAD(,pool) pool_head = SIMPLEQ_HEAD_INITIALIZER(pool_head); 64 65 /* 66 * Every pool gets a unique serial number assigned to it. If this counter 67 * wraps, we're screwed, but we shouldn't create so many pools anyway. 68 */ 69 unsigned int pool_serial; 70 unsigned int pool_count; 71 72 /* Lock the previous variables making up the global pool state */ 73 struct rwlock pool_lock = RWLOCK_INITIALIZER("pools"); 74 75 /* Private pool for page header structures */ 76 struct pool phpool; 77 78 struct pool_item { 79 u_long pi_magic; 80 XSIMPLEQ_ENTRY(pool_item) pi_list; 81 }; 82 #define POOL_IMAGIC(ph, pi) ((u_long)(pi) ^ (ph)->ph_magic) 83 84 struct pool_page_header { 85 /* Page headers */ 86 TAILQ_ENTRY(pool_page_header) 87 ph_entry; /* pool page list */ 88 XSIMPLEQ_HEAD(, pool_item) 89 ph_items; /* free items on the page */ 90 RBT_ENTRY(pool_page_header) 91 ph_node; /* off-page page headers */ 92 unsigned int ph_nmissing; /* # of chunks in use */ 93 caddr_t ph_page; /* this page's address */ 94 caddr_t ph_colored; /* page's colored address */ 95 unsigned long ph_magic; 96 int ph_tick; 97 }; 98 #define POOL_MAGICBIT (1 << 3) /* keep away from perturbed low bits */ 99 #define POOL_PHPOISON(ph) ISSET((ph)->ph_magic, POOL_MAGICBIT) 100 101 #ifdef MULTIPROCESSOR 102 struct pool_cache_item { 103 struct pool_cache_item *ci_next; /* next item in list */ 104 unsigned long ci_nitems; /* number of items in list */ 105 TAILQ_ENTRY(pool_cache_item) 106 ci_nextl; /* entry in list of lists */ 107 }; 108 109 /* we store whether the cached item is poisoned in the high bit of nitems */ 110 #define POOL_CACHE_ITEM_NITEMS_MASK 0x7ffffffUL 111 #define POOL_CACHE_ITEM_NITEMS_POISON 0x8000000UL 112 113 #define POOL_CACHE_ITEM_NITEMS(_ci) \ 114 ((_ci)->ci_nitems & POOL_CACHE_ITEM_NITEMS_MASK) 115 116 #define POOL_CACHE_ITEM_POISONED(_ci) \ 117 ISSET((_ci)->ci_nitems, POOL_CACHE_ITEM_NITEMS_POISON) 118 119 struct pool_cache { 120 struct pool_cache_item *pc_actv; /* active list of items */ 121 unsigned long pc_nactv; /* actv head nitems cache */ 122 struct pool_cache_item *pc_prev; /* previous list of items */ 123 124 uint64_t pc_gen; /* generation number */ 125 uint64_t pc_gets; 126 uint64_t pc_puts; 127 uint64_t pc_fails; 128 129 int pc_nout; 130 }; 131 132 void *pool_cache_get(struct pool *); 133 void pool_cache_put(struct pool *, void *); 134 void pool_cache_destroy(struct pool *); 135 #endif 136 void pool_cache_info(struct pool *, struct kinfo_pool *); 137 138 #ifdef POOL_DEBUG 139 int pool_debug = 1; 140 #else 141 int pool_debug = 0; 142 #endif 143 144 #define POOL_INPGHDR(pp) ((pp)->pr_phoffset != 0) 145 146 struct pool_page_header * 147 pool_p_alloc(struct pool *, int, int *); 148 void pool_p_insert(struct pool *, struct pool_page_header *); 149 void pool_p_remove(struct pool *, struct pool_page_header *); 150 void pool_p_free(struct pool *, struct pool_page_header *); 151 152 void pool_update_curpage(struct pool *); 153 void *pool_do_get(struct pool *, int, int *); 154 int pool_chk_page(struct pool *, struct pool_page_header *, int); 155 int pool_chk(struct pool *); 156 void pool_get_done(void *, void *); 157 void pool_runqueue(struct pool *, int); 158 159 void *pool_allocator_alloc(struct pool *, int, int *); 160 void pool_allocator_free(struct pool *, void *); 161 162 /* 163 * The default pool allocator. 164 */ 165 void *pool_page_alloc(struct pool *, int, int *); 166 void pool_page_free(struct pool *, void *); 167 168 /* 169 * safe for interrupts; this is the default allocator 170 */ 171 struct pool_allocator pool_allocator_single = { 172 pool_page_alloc, 173 pool_page_free, 174 POOL_ALLOC_SIZE(PAGE_SIZE, POOL_ALLOC_ALIGNED) 175 }; 176 177 void *pool_multi_alloc(struct pool *, int, int *); 178 void pool_multi_free(struct pool *, void *); 179 180 struct pool_allocator pool_allocator_multi = { 181 pool_multi_alloc, 182 pool_multi_free, 183 POOL_ALLOC_SIZES(PAGE_SIZE, (1UL << 31), POOL_ALLOC_ALIGNED) 184 }; 185 186 void *pool_multi_alloc_ni(struct pool *, int, int *); 187 void pool_multi_free_ni(struct pool *, void *); 188 189 struct pool_allocator pool_allocator_multi_ni = { 190 pool_multi_alloc_ni, 191 pool_multi_free_ni, 192 POOL_ALLOC_SIZES(PAGE_SIZE, (1UL << 31), POOL_ALLOC_ALIGNED) 193 }; 194 195 #ifdef DDB 196 void pool_print_pagelist(struct pool_pagelist *, int (*)(const char *, ...) 197 __attribute__((__format__(__kprintf__,1,2)))); 198 void pool_print1(struct pool *, const char *, int (*)(const char *, ...) 199 __attribute__((__format__(__kprintf__,1,2)))); 200 #endif 201 202 /* stale page garbage collectors */ 203 void pool_gc_sched(void *); 204 struct timeout pool_gc_tick = TIMEOUT_INITIALIZER(pool_gc_sched, NULL); 205 void pool_gc_pages(void *); 206 struct task pool_gc_task = TASK_INITIALIZER(pool_gc_pages, NULL); 207 int pool_wait_free = 1; 208 int pool_wait_gc = 8; 209 210 RBT_PROTOTYPE(phtree, pool_page_header, ph_node, phtree_compare); 211 212 static inline int 213 phtree_compare(const struct pool_page_header *a, 214 const struct pool_page_header *b) 215 { 216 vaddr_t va = (vaddr_t)a->ph_page; 217 vaddr_t vb = (vaddr_t)b->ph_page; 218 219 /* the compares in this order are important for the NFIND to work */ 220 if (vb < va) 221 return (-1); 222 if (vb > va) 223 return (1); 224 225 return (0); 226 } 227 228 RBT_GENERATE(phtree, pool_page_header, ph_node, phtree_compare); 229 230 /* 231 * Return the pool page header based on page address. 232 */ 233 static inline struct pool_page_header * 234 pr_find_pagehead(struct pool *pp, void *v) 235 { 236 struct pool_page_header *ph, key; 237 238 if (POOL_INPGHDR(pp)) { 239 caddr_t page; 240 241 page = (caddr_t)((vaddr_t)v & pp->pr_pgmask); 242 243 return ((struct pool_page_header *)(page + pp->pr_phoffset)); 244 } 245 246 key.ph_page = v; 247 ph = RBT_NFIND(phtree, &pp->pr_phtree, &key); 248 if (ph == NULL) 249 panic("%s: %s: page header missing", __func__, pp->pr_wchan); 250 251 KASSERT(ph->ph_page <= (caddr_t)v); 252 if (ph->ph_page + pp->pr_pgsize <= (caddr_t)v) 253 panic("%s: %s: incorrect page", __func__, pp->pr_wchan); 254 255 return (ph); 256 } 257 258 /* 259 * Initialize the given pool resource structure. 260 * 261 * We export this routine to allow other kernel parts to declare 262 * static pools that must be initialized before malloc() is available. 263 */ 264 void 265 pool_init(struct pool *pp, size_t size, u_int align, int ipl, int flags, 266 const char *wchan, struct pool_allocator *palloc) 267 { 268 int off = 0, space; 269 unsigned int pgsize = PAGE_SIZE, items; 270 size_t pa_pagesz; 271 #ifdef DIAGNOSTIC 272 struct pool *iter; 273 #endif 274 275 if (align == 0) 276 align = ALIGN(1); 277 278 if (size < sizeof(struct pool_item)) 279 size = sizeof(struct pool_item); 280 281 size = roundup(size, align); 282 283 while (size * 8 > pgsize) 284 pgsize <<= 1; 285 286 if (palloc == NULL) { 287 if (pgsize > PAGE_SIZE) { 288 palloc = ISSET(flags, PR_WAITOK) ? 289 &pool_allocator_multi_ni : &pool_allocator_multi; 290 } else 291 palloc = &pool_allocator_single; 292 293 pa_pagesz = palloc->pa_pagesz; 294 } else { 295 size_t pgsizes; 296 297 pa_pagesz = palloc->pa_pagesz; 298 if (pa_pagesz == 0) 299 pa_pagesz = POOL_ALLOC_DEFAULT; 300 301 pgsizes = pa_pagesz & ~POOL_ALLOC_ALIGNED; 302 303 /* make sure the allocator can fit at least one item */ 304 if (size > pgsizes) { 305 panic("%s: pool %s item size 0x%zx > " 306 "allocator %p sizes 0x%zx", __func__, wchan, 307 size, palloc, pgsizes); 308 } 309 310 /* shrink pgsize until it fits into the range */ 311 while (!ISSET(pgsizes, pgsize)) 312 pgsize >>= 1; 313 } 314 KASSERT(ISSET(pa_pagesz, pgsize)); 315 316 items = pgsize / size; 317 318 /* 319 * Decide whether to put the page header off page to avoid 320 * wasting too large a part of the page. Off-page page headers 321 * go into an RB tree, so we can match a returned item with 322 * its header based on the page address. 323 */ 324 if (ISSET(pa_pagesz, POOL_ALLOC_ALIGNED)) { 325 if (pgsize - (size * items) > 326 sizeof(struct pool_page_header)) { 327 off = pgsize - sizeof(struct pool_page_header); 328 } else if (sizeof(struct pool_page_header) * 2 >= size) { 329 off = pgsize - sizeof(struct pool_page_header); 330 items = off / size; 331 } 332 } 333 334 KASSERT(items > 0); 335 336 /* 337 * Initialize the pool structure. 338 */ 339 memset(pp, 0, sizeof(*pp)); 340 TAILQ_INIT(&pp->pr_emptypages); 341 TAILQ_INIT(&pp->pr_fullpages); 342 TAILQ_INIT(&pp->pr_partpages); 343 pp->pr_curpage = NULL; 344 pp->pr_npages = 0; 345 pp->pr_minitems = 0; 346 pp->pr_minpages = 0; 347 pp->pr_maxpages = 8; 348 pp->pr_size = size; 349 pp->pr_pgsize = pgsize; 350 pp->pr_pgmask = ~0UL ^ (pgsize - 1); 351 pp->pr_phoffset = off; 352 pp->pr_itemsperpage = items; 353 pp->pr_wchan = wchan; 354 pp->pr_alloc = palloc; 355 pp->pr_nitems = 0; 356 pp->pr_nout = 0; 357 pp->pr_hardlimit = UINT_MAX; 358 pp->pr_hardlimit_warning = NULL; 359 pp->pr_hardlimit_ratecap.tv_sec = 0; 360 pp->pr_hardlimit_ratecap.tv_usec = 0; 361 pp->pr_hardlimit_warning_last.tv_sec = 0; 362 pp->pr_hardlimit_warning_last.tv_usec = 0; 363 RBT_INIT(phtree, &pp->pr_phtree); 364 365 /* 366 * Use the space between the chunks and the page header 367 * for cache coloring. 368 */ 369 space = POOL_INPGHDR(pp) ? pp->pr_phoffset : pp->pr_pgsize; 370 space -= pp->pr_itemsperpage * pp->pr_size; 371 pp->pr_align = align; 372 pp->pr_maxcolors = (space / align) + 1; 373 374 pp->pr_nget = 0; 375 pp->pr_nfail = 0; 376 pp->pr_nput = 0; 377 pp->pr_npagealloc = 0; 378 pp->pr_npagefree = 0; 379 pp->pr_hiwat = 0; 380 pp->pr_nidle = 0; 381 382 pp->pr_ipl = ipl; 383 mtx_init(&pp->pr_mtx, pp->pr_ipl); 384 mtx_init(&pp->pr_requests_mtx, pp->pr_ipl); 385 TAILQ_INIT(&pp->pr_requests); 386 387 if (phpool.pr_size == 0) { 388 pool_init(&phpool, sizeof(struct pool_page_header), 0, 389 IPL_HIGH, 0, "phpool", NULL); 390 391 /* make sure phpool wont "recurse" */ 392 KASSERT(POOL_INPGHDR(&phpool)); 393 } 394 395 /* pglistalloc/constraint parameters */ 396 pp->pr_crange = &kp_dirty; 397 398 /* Insert this into the list of all pools. */ 399 rw_enter_write(&pool_lock); 400 #ifdef DIAGNOSTIC 401 SIMPLEQ_FOREACH(iter, &pool_head, pr_poollist) { 402 if (iter == pp) 403 panic("%s: pool %s already on list", __func__, wchan); 404 } 405 #endif 406 407 pp->pr_serial = ++pool_serial; 408 if (pool_serial == 0) 409 panic("%s: too much uptime", __func__); 410 411 SIMPLEQ_INSERT_HEAD(&pool_head, pp, pr_poollist); 412 pool_count++; 413 rw_exit_write(&pool_lock); 414 } 415 416 /* 417 * Decommission a pool resource. 418 */ 419 void 420 pool_destroy(struct pool *pp) 421 { 422 struct pool_page_header *ph; 423 struct pool *prev, *iter; 424 425 #ifdef MULTIPROCESSOR 426 if (pp->pr_cache != NULL) 427 pool_cache_destroy(pp); 428 #endif 429 430 #ifdef DIAGNOSTIC 431 if (pp->pr_nout != 0) 432 panic("%s: pool busy: still out: %u", __func__, pp->pr_nout); 433 #endif 434 435 /* Remove from global pool list */ 436 rw_enter_write(&pool_lock); 437 pool_count--; 438 if (pp == SIMPLEQ_FIRST(&pool_head)) 439 SIMPLEQ_REMOVE_HEAD(&pool_head, pr_poollist); 440 else { 441 prev = SIMPLEQ_FIRST(&pool_head); 442 SIMPLEQ_FOREACH(iter, &pool_head, pr_poollist) { 443 if (iter == pp) { 444 SIMPLEQ_REMOVE_AFTER(&pool_head, prev, 445 pr_poollist); 446 break; 447 } 448 prev = iter; 449 } 450 } 451 rw_exit_write(&pool_lock); 452 453 /* Remove all pages */ 454 while ((ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL) { 455 mtx_enter(&pp->pr_mtx); 456 pool_p_remove(pp, ph); 457 mtx_leave(&pp->pr_mtx); 458 pool_p_free(pp, ph); 459 } 460 KASSERT(TAILQ_EMPTY(&pp->pr_fullpages)); 461 KASSERT(TAILQ_EMPTY(&pp->pr_partpages)); 462 } 463 464 void 465 pool_request_init(struct pool_request *pr, 466 void (*handler)(void *, void *), void *cookie) 467 { 468 pr->pr_handler = handler; 469 pr->pr_cookie = cookie; 470 pr->pr_item = NULL; 471 } 472 473 void 474 pool_request(struct pool *pp, struct pool_request *pr) 475 { 476 mtx_enter(&pp->pr_requests_mtx); 477 TAILQ_INSERT_TAIL(&pp->pr_requests, pr, pr_entry); 478 pool_runqueue(pp, PR_NOWAIT); 479 mtx_leave(&pp->pr_requests_mtx); 480 } 481 482 struct pool_get_memory { 483 struct mutex mtx; 484 void * volatile v; 485 }; 486 487 /* 488 * Grab an item from the pool. 489 */ 490 void * 491 pool_get(struct pool *pp, int flags) 492 { 493 void *v = NULL; 494 int slowdown = 0; 495 496 #ifdef MULTIPROCESSOR 497 if (pp->pr_cache != NULL) { 498 v = pool_cache_get(pp); 499 if (v != NULL) 500 goto good; 501 } 502 #endif 503 504 KASSERT(flags & (PR_WAITOK | PR_NOWAIT)); 505 506 mtx_enter(&pp->pr_mtx); 507 if (pp->pr_nout >= pp->pr_hardlimit) { 508 if (ISSET(flags, PR_NOWAIT|PR_LIMITFAIL)) 509 goto fail; 510 } else if ((v = pool_do_get(pp, flags, &slowdown)) == NULL) { 511 if (ISSET(flags, PR_NOWAIT)) 512 goto fail; 513 } 514 mtx_leave(&pp->pr_mtx); 515 516 if ((slowdown || pool_debug == 2) && ISSET(flags, PR_WAITOK)) 517 yield(); 518 519 if (v == NULL) { 520 struct pool_get_memory mem = { 521 MUTEX_INITIALIZER(pp->pr_ipl), 522 NULL }; 523 struct pool_request pr; 524 525 pool_request_init(&pr, pool_get_done, &mem); 526 pool_request(pp, &pr); 527 528 mtx_enter(&mem.mtx); 529 while (mem.v == NULL) 530 msleep(&mem, &mem.mtx, PSWP, pp->pr_wchan, 0); 531 mtx_leave(&mem.mtx); 532 533 v = mem.v; 534 } 535 536 #ifdef MULTIPROCESSOR 537 good: 538 #endif 539 if (ISSET(flags, PR_ZERO)) 540 memset(v, 0, pp->pr_size); 541 542 return (v); 543 544 fail: 545 pp->pr_nfail++; 546 mtx_leave(&pp->pr_mtx); 547 return (NULL); 548 } 549 550 void 551 pool_get_done(void *xmem, void *v) 552 { 553 struct pool_get_memory *mem = xmem; 554 555 mtx_enter(&mem->mtx); 556 mem->v = v; 557 mtx_leave(&mem->mtx); 558 559 wakeup_one(mem); 560 } 561 562 void 563 pool_runqueue(struct pool *pp, int flags) 564 { 565 struct pool_requests prl = TAILQ_HEAD_INITIALIZER(prl); 566 struct pool_request *pr; 567 568 MUTEX_ASSERT_UNLOCKED(&pp->pr_mtx); 569 MUTEX_ASSERT_LOCKED(&pp->pr_requests_mtx); 570 571 if (pp->pr_requesting++) 572 return; 573 574 do { 575 pp->pr_requesting = 1; 576 577 /* no TAILQ_JOIN? :( */ 578 while ((pr = TAILQ_FIRST(&pp->pr_requests)) != NULL) { 579 TAILQ_REMOVE(&pp->pr_requests, pr, pr_entry); 580 TAILQ_INSERT_TAIL(&prl, pr, pr_entry); 581 } 582 if (TAILQ_EMPTY(&prl)) 583 continue; 584 585 mtx_leave(&pp->pr_requests_mtx); 586 587 mtx_enter(&pp->pr_mtx); 588 pr = TAILQ_FIRST(&prl); 589 while (pr != NULL) { 590 int slowdown = 0; 591 592 if (pp->pr_nout >= pp->pr_hardlimit) 593 break; 594 595 pr->pr_item = pool_do_get(pp, flags, &slowdown); 596 if (pr->pr_item == NULL) /* || slowdown ? */ 597 break; 598 599 pr = TAILQ_NEXT(pr, pr_entry); 600 } 601 mtx_leave(&pp->pr_mtx); 602 603 while ((pr = TAILQ_FIRST(&prl)) != NULL && 604 pr->pr_item != NULL) { 605 TAILQ_REMOVE(&prl, pr, pr_entry); 606 (*pr->pr_handler)(pr->pr_cookie, pr->pr_item); 607 } 608 609 mtx_enter(&pp->pr_requests_mtx); 610 } while (--pp->pr_requesting); 611 612 /* no TAILQ_JOIN :( */ 613 while ((pr = TAILQ_FIRST(&prl)) != NULL) { 614 TAILQ_REMOVE(&prl, pr, pr_entry); 615 TAILQ_INSERT_TAIL(&pp->pr_requests, pr, pr_entry); 616 } 617 } 618 619 void * 620 pool_do_get(struct pool *pp, int flags, int *slowdown) 621 { 622 struct pool_item *pi; 623 struct pool_page_header *ph; 624 625 MUTEX_ASSERT_LOCKED(&pp->pr_mtx); 626 627 splassert(pp->pr_ipl); 628 629 /* 630 * Account for this item now to avoid races if we need to give up 631 * pr_mtx to allocate a page. 632 */ 633 pp->pr_nout++; 634 635 if (pp->pr_curpage == NULL) { 636 mtx_leave(&pp->pr_mtx); 637 ph = pool_p_alloc(pp, flags, slowdown); 638 mtx_enter(&pp->pr_mtx); 639 640 if (ph == NULL) { 641 pp->pr_nout--; 642 return (NULL); 643 } 644 645 pool_p_insert(pp, ph); 646 } 647 648 ph = pp->pr_curpage; 649 pi = XSIMPLEQ_FIRST(&ph->ph_items); 650 if (__predict_false(pi == NULL)) 651 panic("%s: %s: page empty", __func__, pp->pr_wchan); 652 653 if (__predict_false(pi->pi_magic != POOL_IMAGIC(ph, pi))) { 654 panic("%s: %s free list modified: " 655 "page %p; item addr %p; offset 0x%x=0x%lx != 0x%lx", 656 __func__, pp->pr_wchan, ph->ph_page, pi, 657 0, pi->pi_magic, POOL_IMAGIC(ph, pi)); 658 } 659 660 XSIMPLEQ_REMOVE_HEAD(&ph->ph_items, pi_list); 661 662 #ifdef DIAGNOSTIC 663 if (pool_debug && POOL_PHPOISON(ph)) { 664 size_t pidx; 665 uint32_t pval; 666 if (poison_check(pi + 1, pp->pr_size - sizeof(*pi), 667 &pidx, &pval)) { 668 int *ip = (int *)(pi + 1); 669 panic("%s: %s free list modified: " 670 "page %p; item addr %p; offset 0x%zx=0x%x", 671 __func__, pp->pr_wchan, ph->ph_page, pi, 672 pidx * sizeof(int), ip[pidx]); 673 } 674 } 675 #endif /* DIAGNOSTIC */ 676 677 if (ph->ph_nmissing++ == 0) { 678 /* 679 * This page was previously empty. Move it to the list of 680 * partially-full pages. This page is already curpage. 681 */ 682 TAILQ_REMOVE(&pp->pr_emptypages, ph, ph_entry); 683 TAILQ_INSERT_TAIL(&pp->pr_partpages, ph, ph_entry); 684 685 pp->pr_nidle--; 686 } 687 688 if (ph->ph_nmissing == pp->pr_itemsperpage) { 689 /* 690 * This page is now full. Move it to the full list 691 * and select a new current page. 692 */ 693 TAILQ_REMOVE(&pp->pr_partpages, ph, ph_entry); 694 TAILQ_INSERT_TAIL(&pp->pr_fullpages, ph, ph_entry); 695 pool_update_curpage(pp); 696 } 697 698 pp->pr_nget++; 699 700 return (pi); 701 } 702 703 /* 704 * Return resource to the pool. 705 */ 706 void 707 pool_put(struct pool *pp, void *v) 708 { 709 struct pool_item *pi = v; 710 struct pool_page_header *ph, *freeph = NULL; 711 712 #ifdef DIAGNOSTIC 713 if (v == NULL) 714 panic("%s: NULL item", __func__); 715 #endif 716 717 #ifdef MULTIPROCESSOR 718 if (pp->pr_cache != NULL && TAILQ_EMPTY(&pp->pr_requests)) { 719 pool_cache_put(pp, v); 720 return; 721 } 722 #endif 723 724 mtx_enter(&pp->pr_mtx); 725 726 splassert(pp->pr_ipl); 727 728 ph = pr_find_pagehead(pp, v); 729 730 #ifdef DIAGNOSTIC 731 if (pool_debug) { 732 struct pool_item *qi; 733 XSIMPLEQ_FOREACH(qi, &ph->ph_items, pi_list) { 734 if (pi == qi) { 735 panic("%s: %s: double pool_put: %p", __func__, 736 pp->pr_wchan, pi); 737 } 738 } 739 } 740 #endif /* DIAGNOSTIC */ 741 742 pi->pi_magic = POOL_IMAGIC(ph, pi); 743 XSIMPLEQ_INSERT_HEAD(&ph->ph_items, pi, pi_list); 744 #ifdef DIAGNOSTIC 745 if (POOL_PHPOISON(ph)) 746 poison_mem(pi + 1, pp->pr_size - sizeof(*pi)); 747 #endif /* DIAGNOSTIC */ 748 749 if (ph->ph_nmissing-- == pp->pr_itemsperpage) { 750 /* 751 * The page was previously completely full, move it to the 752 * partially-full list. 753 */ 754 TAILQ_REMOVE(&pp->pr_fullpages, ph, ph_entry); 755 TAILQ_INSERT_TAIL(&pp->pr_partpages, ph, ph_entry); 756 } 757 758 if (ph->ph_nmissing == 0) { 759 /* 760 * The page is now empty, so move it to the empty page list. 761 */ 762 pp->pr_nidle++; 763 764 ph->ph_tick = ticks; 765 TAILQ_REMOVE(&pp->pr_partpages, ph, ph_entry); 766 TAILQ_INSERT_TAIL(&pp->pr_emptypages, ph, ph_entry); 767 pool_update_curpage(pp); 768 } 769 770 pp->pr_nout--; 771 pp->pr_nput++; 772 773 /* is it time to free a page? */ 774 if (pp->pr_nidle > pp->pr_maxpages && 775 (ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL && 776 (ticks - ph->ph_tick) > (hz * pool_wait_free)) { 777 freeph = ph; 778 pool_p_remove(pp, freeph); 779 } 780 mtx_leave(&pp->pr_mtx); 781 782 if (freeph != NULL) 783 pool_p_free(pp, freeph); 784 785 if (!TAILQ_EMPTY(&pp->pr_requests)) { 786 mtx_enter(&pp->pr_requests_mtx); 787 pool_runqueue(pp, PR_NOWAIT); 788 mtx_leave(&pp->pr_requests_mtx); 789 } 790 } 791 792 /* 793 * Add N items to the pool. 794 */ 795 int 796 pool_prime(struct pool *pp, int n) 797 { 798 struct pool_pagelist pl = TAILQ_HEAD_INITIALIZER(pl); 799 struct pool_page_header *ph; 800 int newpages; 801 802 newpages = roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 803 804 while (newpages-- > 0) { 805 int slowdown = 0; 806 807 ph = pool_p_alloc(pp, PR_NOWAIT, &slowdown); 808 if (ph == NULL) /* or slowdown? */ 809 break; 810 811 TAILQ_INSERT_TAIL(&pl, ph, ph_entry); 812 } 813 814 mtx_enter(&pp->pr_mtx); 815 while ((ph = TAILQ_FIRST(&pl)) != NULL) { 816 TAILQ_REMOVE(&pl, ph, ph_entry); 817 pool_p_insert(pp, ph); 818 } 819 mtx_leave(&pp->pr_mtx); 820 821 return (0); 822 } 823 824 struct pool_page_header * 825 pool_p_alloc(struct pool *pp, int flags, int *slowdown) 826 { 827 struct pool_page_header *ph; 828 struct pool_item *pi; 829 caddr_t addr; 830 int n; 831 832 MUTEX_ASSERT_UNLOCKED(&pp->pr_mtx); 833 KASSERT(pp->pr_size >= sizeof(*pi)); 834 835 addr = pool_allocator_alloc(pp, flags, slowdown); 836 if (addr == NULL) 837 return (NULL); 838 839 if (POOL_INPGHDR(pp)) 840 ph = (struct pool_page_header *)(addr + pp->pr_phoffset); 841 else { 842 ph = pool_get(&phpool, flags); 843 if (ph == NULL) { 844 pool_allocator_free(pp, addr); 845 return (NULL); 846 } 847 } 848 849 XSIMPLEQ_INIT(&ph->ph_items); 850 ph->ph_page = addr; 851 addr += pp->pr_align * (pp->pr_npagealloc % pp->pr_maxcolors); 852 ph->ph_colored = addr; 853 ph->ph_nmissing = 0; 854 arc4random_buf(&ph->ph_magic, sizeof(ph->ph_magic)); 855 #ifdef DIAGNOSTIC 856 /* use a bit in ph_magic to record if we poison page items */ 857 if (pool_debug) 858 SET(ph->ph_magic, POOL_MAGICBIT); 859 else 860 CLR(ph->ph_magic, POOL_MAGICBIT); 861 #endif /* DIAGNOSTIC */ 862 863 n = pp->pr_itemsperpage; 864 while (n--) { 865 pi = (struct pool_item *)addr; 866 pi->pi_magic = POOL_IMAGIC(ph, pi); 867 XSIMPLEQ_INSERT_TAIL(&ph->ph_items, pi, pi_list); 868 869 #ifdef DIAGNOSTIC 870 if (POOL_PHPOISON(ph)) 871 poison_mem(pi + 1, pp->pr_size - sizeof(*pi)); 872 #endif /* DIAGNOSTIC */ 873 874 addr += pp->pr_size; 875 } 876 877 return (ph); 878 } 879 880 void 881 pool_p_free(struct pool *pp, struct pool_page_header *ph) 882 { 883 struct pool_item *pi; 884 885 MUTEX_ASSERT_UNLOCKED(&pp->pr_mtx); 886 KASSERT(ph->ph_nmissing == 0); 887 888 XSIMPLEQ_FOREACH(pi, &ph->ph_items, pi_list) { 889 if (__predict_false(pi->pi_magic != POOL_IMAGIC(ph, pi))) { 890 panic("%s: %s free list modified: " 891 "page %p; item addr %p; offset 0x%x=0x%lx", 892 __func__, pp->pr_wchan, ph->ph_page, pi, 893 0, pi->pi_magic); 894 } 895 896 #ifdef DIAGNOSTIC 897 if (POOL_PHPOISON(ph)) { 898 size_t pidx; 899 uint32_t pval; 900 if (poison_check(pi + 1, pp->pr_size - sizeof(*pi), 901 &pidx, &pval)) { 902 int *ip = (int *)(pi + 1); 903 panic("%s: %s free list modified: " 904 "page %p; item addr %p; offset 0x%zx=0x%x", 905 __func__, pp->pr_wchan, ph->ph_page, pi, 906 pidx * sizeof(int), ip[pidx]); 907 } 908 } 909 #endif 910 } 911 912 pool_allocator_free(pp, ph->ph_page); 913 914 if (!POOL_INPGHDR(pp)) 915 pool_put(&phpool, ph); 916 } 917 918 void 919 pool_p_insert(struct pool *pp, struct pool_page_header *ph) 920 { 921 MUTEX_ASSERT_LOCKED(&pp->pr_mtx); 922 923 /* If the pool was depleted, point at the new page */ 924 if (pp->pr_curpage == NULL) 925 pp->pr_curpage = ph; 926 927 TAILQ_INSERT_TAIL(&pp->pr_emptypages, ph, ph_entry); 928 if (!POOL_INPGHDR(pp)) 929 RBT_INSERT(phtree, &pp->pr_phtree, ph); 930 931 pp->pr_nitems += pp->pr_itemsperpage; 932 pp->pr_nidle++; 933 934 pp->pr_npagealloc++; 935 if (++pp->pr_npages > pp->pr_hiwat) 936 pp->pr_hiwat = pp->pr_npages; 937 } 938 939 void 940 pool_p_remove(struct pool *pp, struct pool_page_header *ph) 941 { 942 MUTEX_ASSERT_LOCKED(&pp->pr_mtx); 943 944 pp->pr_npagefree++; 945 pp->pr_npages--; 946 pp->pr_nidle--; 947 pp->pr_nitems -= pp->pr_itemsperpage; 948 949 if (!POOL_INPGHDR(pp)) 950 RBT_REMOVE(phtree, &pp->pr_phtree, ph); 951 TAILQ_REMOVE(&pp->pr_emptypages, ph, ph_entry); 952 953 pool_update_curpage(pp); 954 } 955 956 void 957 pool_update_curpage(struct pool *pp) 958 { 959 pp->pr_curpage = TAILQ_LAST(&pp->pr_partpages, pool_pagelist); 960 if (pp->pr_curpage == NULL) { 961 pp->pr_curpage = TAILQ_LAST(&pp->pr_emptypages, pool_pagelist); 962 } 963 } 964 965 void 966 pool_setlowat(struct pool *pp, int n) 967 { 968 int prime = 0; 969 970 mtx_enter(&pp->pr_mtx); 971 pp->pr_minitems = n; 972 pp->pr_minpages = (n == 0) 973 ? 0 974 : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 975 976 if (pp->pr_nitems < n) 977 prime = n - pp->pr_nitems; 978 mtx_leave(&pp->pr_mtx); 979 980 if (prime > 0) 981 pool_prime(pp, prime); 982 } 983 984 void 985 pool_sethiwat(struct pool *pp, int n) 986 { 987 pp->pr_maxpages = (n == 0) 988 ? 0 989 : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 990 } 991 992 int 993 pool_sethardlimit(struct pool *pp, u_int n, const char *warnmsg, int ratecap) 994 { 995 int error = 0; 996 997 if (n < pp->pr_nout) { 998 error = EINVAL; 999 goto done; 1000 } 1001 1002 pp->pr_hardlimit = n; 1003 pp->pr_hardlimit_warning = warnmsg; 1004 pp->pr_hardlimit_ratecap.tv_sec = ratecap; 1005 pp->pr_hardlimit_warning_last.tv_sec = 0; 1006 pp->pr_hardlimit_warning_last.tv_usec = 0; 1007 1008 done: 1009 return (error); 1010 } 1011 1012 void 1013 pool_set_constraints(struct pool *pp, const struct kmem_pa_mode *mode) 1014 { 1015 pp->pr_crange = mode; 1016 } 1017 1018 /* 1019 * Release all complete pages that have not been used recently. 1020 * 1021 * Returns non-zero if any pages have been reclaimed. 1022 */ 1023 int 1024 pool_reclaim(struct pool *pp) 1025 { 1026 struct pool_page_header *ph, *phnext; 1027 struct pool_pagelist pl = TAILQ_HEAD_INITIALIZER(pl); 1028 1029 mtx_enter(&pp->pr_mtx); 1030 for (ph = TAILQ_FIRST(&pp->pr_emptypages); ph != NULL; ph = phnext) { 1031 phnext = TAILQ_NEXT(ph, ph_entry); 1032 1033 /* Check our minimum page claim */ 1034 if (pp->pr_npages <= pp->pr_minpages) 1035 break; 1036 1037 /* 1038 * If freeing this page would put us below 1039 * the low water mark, stop now. 1040 */ 1041 if ((pp->pr_nitems - pp->pr_itemsperpage) < 1042 pp->pr_minitems) 1043 break; 1044 1045 pool_p_remove(pp, ph); 1046 TAILQ_INSERT_TAIL(&pl, ph, ph_entry); 1047 } 1048 mtx_leave(&pp->pr_mtx); 1049 1050 if (TAILQ_EMPTY(&pl)) 1051 return (0); 1052 1053 while ((ph = TAILQ_FIRST(&pl)) != NULL) { 1054 TAILQ_REMOVE(&pl, ph, ph_entry); 1055 pool_p_free(pp, ph); 1056 } 1057 1058 return (1); 1059 } 1060 1061 /* 1062 * Release all complete pages that have not been used recently 1063 * from all pools. 1064 */ 1065 void 1066 pool_reclaim_all(void) 1067 { 1068 struct pool *pp; 1069 1070 rw_enter_read(&pool_lock); 1071 SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist) 1072 pool_reclaim(pp); 1073 rw_exit_read(&pool_lock); 1074 } 1075 1076 #ifdef DDB 1077 #include <machine/db_machdep.h> 1078 #include <ddb/db_output.h> 1079 1080 /* 1081 * Diagnostic helpers. 1082 */ 1083 void 1084 pool_printit(struct pool *pp, const char *modif, 1085 int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2)))) 1086 { 1087 pool_print1(pp, modif, pr); 1088 } 1089 1090 void 1091 pool_print_pagelist(struct pool_pagelist *pl, 1092 int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2)))) 1093 { 1094 struct pool_page_header *ph; 1095 struct pool_item *pi; 1096 1097 TAILQ_FOREACH(ph, pl, ph_entry) { 1098 (*pr)("\t\tpage %p, color %p, nmissing %d\n", 1099 ph->ph_page, ph->ph_colored, ph->ph_nmissing); 1100 XSIMPLEQ_FOREACH(pi, &ph->ph_items, pi_list) { 1101 if (pi->pi_magic != POOL_IMAGIC(ph, pi)) { 1102 (*pr)("\t\t\titem %p, magic 0x%lx\n", 1103 pi, pi->pi_magic); 1104 } 1105 } 1106 } 1107 } 1108 1109 void 1110 pool_print1(struct pool *pp, const char *modif, 1111 int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2)))) 1112 { 1113 struct pool_page_header *ph; 1114 int print_pagelist = 0; 1115 char c; 1116 1117 while ((c = *modif++) != '\0') { 1118 if (c == 'p') 1119 print_pagelist = 1; 1120 modif++; 1121 } 1122 1123 (*pr)("POOL %s: size %u maxcolors %u\n", pp->pr_wchan, pp->pr_size, 1124 pp->pr_maxcolors); 1125 (*pr)("\talloc %p\n", pp->pr_alloc); 1126 (*pr)("\tminitems %u, minpages %u, maxpages %u, npages %u\n", 1127 pp->pr_minitems, pp->pr_minpages, pp->pr_maxpages, pp->pr_npages); 1128 (*pr)("\titemsperpage %u, nitems %u, nout %u, hardlimit %u\n", 1129 pp->pr_itemsperpage, pp->pr_nitems, pp->pr_nout, pp->pr_hardlimit); 1130 1131 (*pr)("\n\tnget %lu, nfail %lu, nput %lu\n", 1132 pp->pr_nget, pp->pr_nfail, pp->pr_nput); 1133 (*pr)("\tnpagealloc %lu, npagefree %lu, hiwat %u, nidle %lu\n", 1134 pp->pr_npagealloc, pp->pr_npagefree, pp->pr_hiwat, pp->pr_nidle); 1135 1136 if (print_pagelist == 0) 1137 return; 1138 1139 if ((ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL) 1140 (*pr)("\n\tempty page list:\n"); 1141 pool_print_pagelist(&pp->pr_emptypages, pr); 1142 if ((ph = TAILQ_FIRST(&pp->pr_fullpages)) != NULL) 1143 (*pr)("\n\tfull page list:\n"); 1144 pool_print_pagelist(&pp->pr_fullpages, pr); 1145 if ((ph = TAILQ_FIRST(&pp->pr_partpages)) != NULL) 1146 (*pr)("\n\tpartial-page list:\n"); 1147 pool_print_pagelist(&pp->pr_partpages, pr); 1148 1149 if (pp->pr_curpage == NULL) 1150 (*pr)("\tno current page\n"); 1151 else 1152 (*pr)("\tcurpage %p\n", pp->pr_curpage->ph_page); 1153 } 1154 1155 void 1156 db_show_all_pools(db_expr_t expr, int haddr, db_expr_t count, char *modif) 1157 { 1158 struct pool *pp; 1159 char maxp[16]; 1160 int ovflw; 1161 char mode; 1162 1163 mode = modif[0]; 1164 if (mode != '\0' && mode != 'a') { 1165 db_printf("usage: show all pools [/a]\n"); 1166 return; 1167 } 1168 1169 if (mode == '\0') 1170 db_printf("%-10s%4s%9s%5s%9s%6s%6s%6s%6s%6s%6s%5s\n", 1171 "Name", 1172 "Size", 1173 "Requests", 1174 "Fail", 1175 "Releases", 1176 "Pgreq", 1177 "Pgrel", 1178 "Npage", 1179 "Hiwat", 1180 "Minpg", 1181 "Maxpg", 1182 "Idle"); 1183 else 1184 db_printf("%-12s %18s %18s\n", 1185 "Name", "Address", "Allocator"); 1186 1187 SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist) { 1188 if (mode == 'a') { 1189 db_printf("%-12s %18p %18p\n", pp->pr_wchan, pp, 1190 pp->pr_alloc); 1191 continue; 1192 } 1193 1194 if (!pp->pr_nget) 1195 continue; 1196 1197 if (pp->pr_maxpages == UINT_MAX) 1198 snprintf(maxp, sizeof maxp, "inf"); 1199 else 1200 snprintf(maxp, sizeof maxp, "%u", pp->pr_maxpages); 1201 1202 #define PRWORD(ovflw, fmt, width, fixed, val) do { \ 1203 (ovflw) += db_printf((fmt), \ 1204 (width) - (fixed) - (ovflw) > 0 ? \ 1205 (width) - (fixed) - (ovflw) : 0, \ 1206 (val)) - (width); \ 1207 if ((ovflw) < 0) \ 1208 (ovflw) = 0; \ 1209 } while (/* CONSTCOND */0) 1210 1211 ovflw = 0; 1212 PRWORD(ovflw, "%-*s", 10, 0, pp->pr_wchan); 1213 PRWORD(ovflw, " %*u", 4, 1, pp->pr_size); 1214 PRWORD(ovflw, " %*lu", 9, 1, pp->pr_nget); 1215 PRWORD(ovflw, " %*lu", 5, 1, pp->pr_nfail); 1216 PRWORD(ovflw, " %*lu", 9, 1, pp->pr_nput); 1217 PRWORD(ovflw, " %*lu", 6, 1, pp->pr_npagealloc); 1218 PRWORD(ovflw, " %*lu", 6, 1, pp->pr_npagefree); 1219 PRWORD(ovflw, " %*d", 6, 1, pp->pr_npages); 1220 PRWORD(ovflw, " %*d", 6, 1, pp->pr_hiwat); 1221 PRWORD(ovflw, " %*d", 6, 1, pp->pr_minpages); 1222 PRWORD(ovflw, " %*s", 6, 1, maxp); 1223 PRWORD(ovflw, " %*lu\n", 5, 1, pp->pr_nidle); 1224 1225 pool_chk(pp); 1226 } 1227 } 1228 #endif /* DDB */ 1229 1230 #if defined(POOL_DEBUG) || defined(DDB) 1231 int 1232 pool_chk_page(struct pool *pp, struct pool_page_header *ph, int expected) 1233 { 1234 struct pool_item *pi; 1235 caddr_t page; 1236 int n; 1237 const char *label = pp->pr_wchan; 1238 1239 page = (caddr_t)((u_long)ph & pp->pr_pgmask); 1240 if (page != ph->ph_page && POOL_INPGHDR(pp)) { 1241 printf("%s: ", label); 1242 printf("pool(%p:%s): page inconsistency: page %p; " 1243 "at page head addr %p (p %p)\n", 1244 pp, pp->pr_wchan, ph->ph_page, ph, page); 1245 return 1; 1246 } 1247 1248 for (pi = XSIMPLEQ_FIRST(&ph->ph_items), n = 0; 1249 pi != NULL; 1250 pi = XSIMPLEQ_NEXT(&ph->ph_items, pi, pi_list), n++) { 1251 if ((caddr_t)pi < ph->ph_page || 1252 (caddr_t)pi >= ph->ph_page + pp->pr_pgsize) { 1253 printf("%s: ", label); 1254 printf("pool(%p:%s): page inconsistency: page %p;" 1255 " item ordinal %d; addr %p\n", pp, 1256 pp->pr_wchan, ph->ph_page, n, pi); 1257 return (1); 1258 } 1259 1260 if (pi->pi_magic != POOL_IMAGIC(ph, pi)) { 1261 printf("%s: ", label); 1262 printf("pool(%p:%s): free list modified: " 1263 "page %p; item ordinal %d; addr %p " 1264 "(p %p); offset 0x%x=0x%lx\n", 1265 pp, pp->pr_wchan, ph->ph_page, n, pi, page, 1266 0, pi->pi_magic); 1267 } 1268 1269 #ifdef DIAGNOSTIC 1270 if (POOL_PHPOISON(ph)) { 1271 size_t pidx; 1272 uint32_t pval; 1273 if (poison_check(pi + 1, pp->pr_size - sizeof(*pi), 1274 &pidx, &pval)) { 1275 int *ip = (int *)(pi + 1); 1276 printf("pool(%s): free list modified: " 1277 "page %p; item ordinal %d; addr %p " 1278 "(p %p); offset 0x%zx=0x%x\n", 1279 pp->pr_wchan, ph->ph_page, n, pi, 1280 page, pidx * sizeof(int), ip[pidx]); 1281 } 1282 } 1283 #endif /* DIAGNOSTIC */ 1284 } 1285 if (n + ph->ph_nmissing != pp->pr_itemsperpage) { 1286 printf("pool(%p:%s): page inconsistency: page %p;" 1287 " %d on list, %d missing, %d items per page\n", pp, 1288 pp->pr_wchan, ph->ph_page, n, ph->ph_nmissing, 1289 pp->pr_itemsperpage); 1290 return 1; 1291 } 1292 if (expected >= 0 && n != expected) { 1293 printf("pool(%p:%s): page inconsistency: page %p;" 1294 " %d on list, %d missing, %d expected\n", pp, 1295 pp->pr_wchan, ph->ph_page, n, ph->ph_nmissing, 1296 expected); 1297 return 1; 1298 } 1299 return 0; 1300 } 1301 1302 int 1303 pool_chk(struct pool *pp) 1304 { 1305 struct pool_page_header *ph; 1306 int r = 0; 1307 1308 TAILQ_FOREACH(ph, &pp->pr_emptypages, ph_entry) 1309 r += pool_chk_page(pp, ph, pp->pr_itemsperpage); 1310 TAILQ_FOREACH(ph, &pp->pr_fullpages, ph_entry) 1311 r += pool_chk_page(pp, ph, 0); 1312 TAILQ_FOREACH(ph, &pp->pr_partpages, ph_entry) 1313 r += pool_chk_page(pp, ph, -1); 1314 1315 return (r); 1316 } 1317 #endif /* defined(POOL_DEBUG) || defined(DDB) */ 1318 1319 #ifdef DDB 1320 void 1321 pool_walk(struct pool *pp, int full, 1322 int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2))), 1323 void (*func)(void *, int, int (*)(const char *, ...) 1324 __attribute__((__format__(__kprintf__,1,2))))) 1325 { 1326 struct pool_page_header *ph; 1327 struct pool_item *pi; 1328 caddr_t cp; 1329 int n; 1330 1331 TAILQ_FOREACH(ph, &pp->pr_fullpages, ph_entry) { 1332 cp = ph->ph_colored; 1333 n = ph->ph_nmissing; 1334 1335 while (n--) { 1336 func(cp, full, pr); 1337 cp += pp->pr_size; 1338 } 1339 } 1340 1341 TAILQ_FOREACH(ph, &pp->pr_partpages, ph_entry) { 1342 cp = ph->ph_colored; 1343 n = ph->ph_nmissing; 1344 1345 do { 1346 XSIMPLEQ_FOREACH(pi, &ph->ph_items, pi_list) { 1347 if (cp == (caddr_t)pi) 1348 break; 1349 } 1350 if (cp != (caddr_t)pi) { 1351 func(cp, full, pr); 1352 n--; 1353 } 1354 1355 cp += pp->pr_size; 1356 } while (n > 0); 1357 } 1358 } 1359 #endif 1360 1361 /* 1362 * We have three different sysctls. 1363 * kern.pool.npools - the number of pools. 1364 * kern.pool.pool.<pool#> - the pool struct for the pool#. 1365 * kern.pool.name.<pool#> - the name for pool#. 1366 */ 1367 int 1368 sysctl_dopool(int *name, u_int namelen, char *oldp, size_t *oldlenp) 1369 { 1370 struct kinfo_pool pi; 1371 struct pool *pp; 1372 int rv = ENOENT; 1373 1374 switch (name[0]) { 1375 case KERN_POOL_NPOOLS: 1376 if (namelen != 1) 1377 return (ENOTDIR); 1378 return (sysctl_rdint(oldp, oldlenp, NULL, pool_count)); 1379 1380 case KERN_POOL_NAME: 1381 case KERN_POOL_POOL: 1382 break; 1383 default: 1384 return (EOPNOTSUPP); 1385 } 1386 1387 if (namelen != 2) 1388 return (ENOTDIR); 1389 1390 rw_enter_read(&pool_lock); 1391 1392 SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist) { 1393 if (name[1] == pp->pr_serial) 1394 break; 1395 } 1396 1397 if (pp == NULL) 1398 goto done; 1399 1400 switch (name[0]) { 1401 case KERN_POOL_NAME: 1402 rv = sysctl_rdstring(oldp, oldlenp, NULL, pp->pr_wchan); 1403 break; 1404 case KERN_POOL_POOL: 1405 memset(&pi, 0, sizeof(pi)); 1406 1407 mtx_enter(&pp->pr_mtx); 1408 pi.pr_size = pp->pr_size; 1409 pi.pr_pgsize = pp->pr_pgsize; 1410 pi.pr_itemsperpage = pp->pr_itemsperpage; 1411 pi.pr_npages = pp->pr_npages; 1412 pi.pr_minpages = pp->pr_minpages; 1413 pi.pr_maxpages = pp->pr_maxpages; 1414 pi.pr_hardlimit = pp->pr_hardlimit; 1415 pi.pr_nout = pp->pr_nout; 1416 pi.pr_nitems = pp->pr_nitems; 1417 pi.pr_nget = pp->pr_nget; 1418 pi.pr_nput = pp->pr_nput; 1419 pi.pr_nfail = pp->pr_nfail; 1420 pi.pr_npagealloc = pp->pr_npagealloc; 1421 pi.pr_npagefree = pp->pr_npagefree; 1422 pi.pr_hiwat = pp->pr_hiwat; 1423 pi.pr_nidle = pp->pr_nidle; 1424 mtx_leave(&pp->pr_mtx); 1425 1426 pool_cache_info(pp, &pi); 1427 1428 rv = sysctl_rdstruct(oldp, oldlenp, NULL, &pi, sizeof(pi)); 1429 break; 1430 } 1431 1432 done: 1433 rw_exit_read(&pool_lock); 1434 1435 return (rv); 1436 } 1437 1438 void 1439 pool_gc_sched(void *null) 1440 { 1441 task_add(systqmp, &pool_gc_task); 1442 } 1443 1444 void 1445 pool_gc_pages(void *null) 1446 { 1447 struct pool *pp; 1448 struct pool_page_header *ph, *freeph; 1449 1450 rw_enter_read(&pool_lock); 1451 SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist) { 1452 if (pp->pr_nidle <= pp->pr_minpages || /* guess */ 1453 !mtx_enter_try(&pp->pr_mtx)) /* try */ 1454 continue; 1455 1456 /* is it time to free a page? */ 1457 if (pp->pr_nidle > pp->pr_minpages && 1458 (ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL && 1459 (ticks - ph->ph_tick) > (hz * pool_wait_gc)) { 1460 freeph = ph; 1461 pool_p_remove(pp, freeph); 1462 } else 1463 freeph = NULL; 1464 1465 mtx_leave(&pp->pr_mtx); 1466 1467 if (freeph != NULL) 1468 pool_p_free(pp, freeph); 1469 } 1470 rw_exit_read(&pool_lock); 1471 1472 timeout_add_sec(&pool_gc_tick, 1); 1473 } 1474 1475 /* 1476 * Pool backend allocators. 1477 */ 1478 1479 void * 1480 pool_allocator_alloc(struct pool *pp, int flags, int *slowdown) 1481 { 1482 void *v; 1483 1484 v = (*pp->pr_alloc->pa_alloc)(pp, flags, slowdown); 1485 1486 #ifdef DIAGNOSTIC 1487 if (v != NULL && POOL_INPGHDR(pp)) { 1488 vaddr_t addr = (vaddr_t)v; 1489 if ((addr & pp->pr_pgmask) != addr) { 1490 panic("%s: %s page address %p isnt aligned to %u", 1491 __func__, pp->pr_wchan, v, pp->pr_pgsize); 1492 } 1493 } 1494 #endif 1495 1496 return (v); 1497 } 1498 1499 void 1500 pool_allocator_free(struct pool *pp, void *v) 1501 { 1502 struct pool_allocator *pa = pp->pr_alloc; 1503 1504 (*pa->pa_free)(pp, v); 1505 } 1506 1507 void * 1508 pool_page_alloc(struct pool *pp, int flags, int *slowdown) 1509 { 1510 struct kmem_dyn_mode kd = KMEM_DYN_INITIALIZER; 1511 1512 kd.kd_waitok = ISSET(flags, PR_WAITOK); 1513 kd.kd_slowdown = slowdown; 1514 1515 return (km_alloc(pp->pr_pgsize, &kv_page, pp->pr_crange, &kd)); 1516 } 1517 1518 void 1519 pool_page_free(struct pool *pp, void *v) 1520 { 1521 km_free(v, pp->pr_pgsize, &kv_page, pp->pr_crange); 1522 } 1523 1524 void * 1525 pool_multi_alloc(struct pool *pp, int flags, int *slowdown) 1526 { 1527 struct kmem_va_mode kv = kv_intrsafe; 1528 struct kmem_dyn_mode kd = KMEM_DYN_INITIALIZER; 1529 void *v; 1530 int s; 1531 1532 if (POOL_INPGHDR(pp)) 1533 kv.kv_align = pp->pr_pgsize; 1534 1535 kd.kd_waitok = ISSET(flags, PR_WAITOK); 1536 kd.kd_slowdown = slowdown; 1537 1538 s = splvm(); 1539 v = km_alloc(pp->pr_pgsize, &kv, pp->pr_crange, &kd); 1540 splx(s); 1541 1542 return (v); 1543 } 1544 1545 void 1546 pool_multi_free(struct pool *pp, void *v) 1547 { 1548 struct kmem_va_mode kv = kv_intrsafe; 1549 int s; 1550 1551 if (POOL_INPGHDR(pp)) 1552 kv.kv_align = pp->pr_pgsize; 1553 1554 s = splvm(); 1555 km_free(v, pp->pr_pgsize, &kv, pp->pr_crange); 1556 splx(s); 1557 } 1558 1559 void * 1560 pool_multi_alloc_ni(struct pool *pp, int flags, int *slowdown) 1561 { 1562 struct kmem_va_mode kv = kv_any; 1563 struct kmem_dyn_mode kd = KMEM_DYN_INITIALIZER; 1564 void *v; 1565 1566 if (POOL_INPGHDR(pp)) 1567 kv.kv_align = pp->pr_pgsize; 1568 1569 kd.kd_waitok = ISSET(flags, PR_WAITOK); 1570 kd.kd_slowdown = slowdown; 1571 1572 KERNEL_LOCK(); 1573 v = km_alloc(pp->pr_pgsize, &kv, pp->pr_crange, &kd); 1574 KERNEL_UNLOCK(); 1575 1576 return (v); 1577 } 1578 1579 void 1580 pool_multi_free_ni(struct pool *pp, void *v) 1581 { 1582 struct kmem_va_mode kv = kv_any; 1583 1584 if (POOL_INPGHDR(pp)) 1585 kv.kv_align = pp->pr_pgsize; 1586 1587 KERNEL_LOCK(); 1588 km_free(v, pp->pr_pgsize, &kv, pp->pr_crange); 1589 KERNEL_UNLOCK(); 1590 } 1591 1592 #ifdef MULTIPROCESSOR 1593 1594 struct pool pool_caches; /* per cpu cache entries */ 1595 1596 void 1597 pool_cache_init(struct pool *pp) 1598 { 1599 struct cpumem *cm; 1600 struct pool_cache *pc; 1601 struct cpumem_iter i; 1602 1603 if (pool_caches.pr_size == 0) { 1604 pool_init(&pool_caches, sizeof(struct pool_cache), 64, 1605 IPL_NONE, PR_WAITOK, "plcache", NULL); 1606 } 1607 1608 KASSERT(pp->pr_size >= sizeof(*pc)); 1609 1610 cm = cpumem_get(&pool_caches); 1611 1612 mtx_init(&pp->pr_cache_mtx, pp->pr_ipl); 1613 arc4random_buf(pp->pr_cache_magic, sizeof(pp->pr_cache_magic)); 1614 TAILQ_INIT(&pp->pr_cache_lists); 1615 pp->pr_cache_nlist = 0; 1616 pp->pr_cache_items = 8; 1617 pp->pr_cache_contention = 0; 1618 1619 CPUMEM_FOREACH(pc, &i, cm) { 1620 pc->pc_actv = NULL; 1621 pc->pc_nactv = 0; 1622 pc->pc_prev = NULL; 1623 1624 pc->pc_gets = 0; 1625 pc->pc_puts = 0; 1626 pc->pc_fails = 0; 1627 pc->pc_nout = 0; 1628 } 1629 1630 pp->pr_cache = cm; 1631 } 1632 1633 static inline void 1634 pool_cache_item_magic(struct pool *pp, struct pool_cache_item *ci) 1635 { 1636 unsigned long *entry = (unsigned long *)&ci->ci_nextl; 1637 1638 entry[0] = pp->pr_cache_magic[0] ^ (u_long)ci; 1639 entry[1] = pp->pr_cache_magic[1] ^ (u_long)ci->ci_next; 1640 } 1641 1642 static inline void 1643 pool_cache_item_magic_check(struct pool *pp, struct pool_cache_item *ci) 1644 { 1645 unsigned long *entry; 1646 unsigned long val; 1647 1648 entry = (unsigned long *)&ci->ci_nextl; 1649 val = pp->pr_cache_magic[0] ^ (u_long)ci; 1650 if (*entry != val) 1651 goto fail; 1652 1653 entry++; 1654 val = pp->pr_cache_magic[1] ^ (u_long)ci->ci_next; 1655 if (*entry != val) 1656 goto fail; 1657 1658 return; 1659 1660 fail: 1661 panic("%s: %s cpu free list modified: item addr %p+%zu 0x%lx!=0x%lx", 1662 __func__, pp->pr_wchan, ci, (caddr_t)entry - (caddr_t)ci, 1663 *entry, val); 1664 } 1665 1666 static inline void 1667 pool_list_enter(struct pool *pp) 1668 { 1669 if (mtx_enter_try(&pp->pr_cache_mtx) == 0) { 1670 mtx_enter(&pp->pr_cache_mtx); 1671 pp->pr_cache_contention++; 1672 } 1673 } 1674 1675 static inline void 1676 pool_list_leave(struct pool *pp) 1677 { 1678 mtx_leave(&pp->pr_cache_mtx); 1679 } 1680 1681 static inline struct pool_cache_item * 1682 pool_cache_list_alloc(struct pool *pp, struct pool_cache *pc) 1683 { 1684 struct pool_cache_item *pl; 1685 1686 pool_list_enter(pp); 1687 pl = TAILQ_FIRST(&pp->pr_cache_lists); 1688 if (pl != NULL) { 1689 TAILQ_REMOVE(&pp->pr_cache_lists, pl, ci_nextl); 1690 pp->pr_cache_nlist--; 1691 1692 pool_cache_item_magic(pp, pl); 1693 } 1694 1695 /* fold this cpus nout into the global while we have the lock */ 1696 pp->pr_cache_nout += pc->pc_nout; 1697 pc->pc_nout = 0; 1698 pool_list_leave(pp); 1699 1700 return (pl); 1701 } 1702 1703 static inline void 1704 pool_cache_list_free(struct pool *pp, struct pool_cache *pc, 1705 struct pool_cache_item *ci) 1706 { 1707 pool_list_enter(pp); 1708 TAILQ_INSERT_TAIL(&pp->pr_cache_lists, ci, ci_nextl); 1709 pp->pr_cache_nlist++; 1710 1711 /* fold this cpus nout into the global while we have the lock */ 1712 pp->pr_cache_nout += pc->pc_nout; 1713 pc->pc_nout = 0; 1714 pool_list_leave(pp); 1715 } 1716 1717 static inline struct pool_cache * 1718 pool_cache_enter(struct pool *pp, int *s) 1719 { 1720 struct pool_cache *pc; 1721 1722 pc = cpumem_enter(pp->pr_cache); 1723 *s = splraise(pp->pr_ipl); 1724 pc->pc_gen++; 1725 1726 return (pc); 1727 } 1728 1729 static inline void 1730 pool_cache_leave(struct pool *pp, struct pool_cache *pc, int s) 1731 { 1732 pc->pc_gen++; 1733 splx(s); 1734 cpumem_leave(pp->pr_cache, pc); 1735 } 1736 1737 void * 1738 pool_cache_get(struct pool *pp) 1739 { 1740 struct pool_cache *pc; 1741 struct pool_cache_item *ci; 1742 int s; 1743 1744 pc = pool_cache_enter(pp, &s); 1745 1746 if (pc->pc_actv != NULL) { 1747 ci = pc->pc_actv; 1748 } else if (pc->pc_prev != NULL) { 1749 ci = pc->pc_prev; 1750 pc->pc_prev = NULL; 1751 } else if ((ci = pool_cache_list_alloc(pp, pc)) == NULL) { 1752 pc->pc_fails++; 1753 goto done; 1754 } 1755 1756 pool_cache_item_magic_check(pp, ci); 1757 #ifdef DIAGNOSTIC 1758 if (pool_debug && POOL_CACHE_ITEM_POISONED(ci)) { 1759 size_t pidx; 1760 uint32_t pval; 1761 1762 if (poison_check(ci + 1, pp->pr_size - sizeof(*ci), 1763 &pidx, &pval)) { 1764 int *ip = (int *)(ci + 1); 1765 ip += pidx; 1766 1767 panic("%s: %s cpu free list modified: " 1768 "item addr %p+%zu 0x%x!=0x%x", 1769 __func__, pp->pr_wchan, ci, 1770 (caddr_t)ip - (caddr_t)ci, *ip, pval); 1771 } 1772 } 1773 #endif 1774 1775 pc->pc_actv = ci->ci_next; 1776 pc->pc_nactv = POOL_CACHE_ITEM_NITEMS(ci) - 1; 1777 pc->pc_gets++; 1778 pc->pc_nout++; 1779 1780 done: 1781 pool_cache_leave(pp, pc, s); 1782 1783 return (ci); 1784 } 1785 1786 void 1787 pool_cache_put(struct pool *pp, void *v) 1788 { 1789 struct pool_cache *pc; 1790 struct pool_cache_item *ci = v; 1791 unsigned long nitems; 1792 int s; 1793 #ifdef DIAGNOSTIC 1794 int poison = pool_debug && pp->pr_size > sizeof(*ci); 1795 1796 if (poison) 1797 poison_mem(ci + 1, pp->pr_size - sizeof(*ci)); 1798 #endif 1799 1800 pc = pool_cache_enter(pp, &s); 1801 1802 nitems = pc->pc_nactv; 1803 if (nitems >= pp->pr_cache_items) { 1804 if (pc->pc_prev != NULL) 1805 pool_cache_list_free(pp, pc, pc->pc_prev); 1806 1807 pc->pc_prev = pc->pc_actv; 1808 1809 pc->pc_actv = NULL; 1810 pc->pc_nactv = 0; 1811 nitems = 0; 1812 } 1813 1814 ci->ci_next = pc->pc_actv; 1815 ci->ci_nitems = ++nitems; 1816 #ifdef DIAGNOSTIC 1817 ci->ci_nitems |= poison ? POOL_CACHE_ITEM_NITEMS_POISON : 0; 1818 #endif 1819 pool_cache_item_magic(pp, ci); 1820 1821 pc->pc_actv = ci; 1822 pc->pc_nactv = nitems; 1823 1824 pc->pc_puts++; 1825 pc->pc_nout--; 1826 1827 pool_cache_leave(pp, pc, s); 1828 } 1829 1830 struct pool_cache_item * 1831 pool_cache_list_put(struct pool *pp, struct pool_cache_item *pl) 1832 { 1833 struct pool_cache_item *rpl, *next; 1834 1835 if (pl == NULL) 1836 return (NULL); 1837 1838 rpl = TAILQ_NEXT(pl, ci_nextl); 1839 1840 do { 1841 next = pl->ci_next; 1842 pool_put(pp, pl); 1843 pl = next; 1844 } while (pl != NULL); 1845 1846 return (rpl); 1847 } 1848 1849 void 1850 pool_cache_destroy(struct pool *pp) 1851 { 1852 struct pool_cache *pc; 1853 struct pool_cache_item *pl; 1854 struct cpumem_iter i; 1855 struct cpumem *cm; 1856 1857 cm = pp->pr_cache; 1858 pp->pr_cache = NULL; /* make pool_put avoid the cache */ 1859 1860 CPUMEM_FOREACH(pc, &i, cm) { 1861 pool_cache_list_put(pp, pc->pc_actv); 1862 pool_cache_list_put(pp, pc->pc_prev); 1863 } 1864 1865 cpumem_put(&pool_caches, cm); 1866 1867 pl = TAILQ_FIRST(&pp->pr_cache_lists); 1868 while (pl != NULL) 1869 pl = pool_cache_list_put(pp, pl); 1870 } 1871 1872 void 1873 pool_cache_info(struct pool *pp, struct kinfo_pool *pi) 1874 { 1875 struct pool_cache *pc; 1876 struct cpumem_iter i; 1877 1878 if (pp->pr_cache == NULL) 1879 return; 1880 1881 /* loop through the caches twice to collect stats */ 1882 1883 /* once without the mtx so we can yield while reading nget/nput */ 1884 CPUMEM_FOREACH(pc, &i, pp->pr_cache) { 1885 uint64_t gen, nget, nput; 1886 1887 do { 1888 while ((gen = pc->pc_gen) & 1) 1889 yield(); 1890 1891 nget = pc->pc_gets; 1892 nput = pc->pc_puts; 1893 } while (gen != pc->pc_gen); 1894 1895 pi->pr_nget += nget; 1896 pi->pr_nput += nput; 1897 } 1898 1899 /* and once with the mtx so we can get consistent nout values */ 1900 mtx_enter(&pp->pr_cache_mtx); 1901 CPUMEM_FOREACH(pc, &i, pp->pr_cache) 1902 pi->pr_nout += pc->pc_nout; 1903 1904 pi->pr_nout += pp->pr_cache_nout; 1905 mtx_leave(&pp->pr_cache_mtx); 1906 } 1907 #else /* MULTIPROCESSOR */ 1908 void 1909 pool_cache_init(struct pool *pp) 1910 { 1911 /* nop */ 1912 } 1913 1914 void 1915 pool_cache_info(struct pool *pp, struct kinfo_pool *pi) 1916 { 1917 /* nop */ 1918 } 1919 #endif /* MULTIPROCESSOR */ 1920