1 /* $OpenBSD: subr_pool.c,v 1.199 2016/11/02 01:20:50 dlg Exp $ */ 2 /* $NetBSD: subr_pool.c,v 1.61 2001/09/26 07:14:56 chs Exp $ */ 3 4 /*- 5 * Copyright (c) 1997, 1999, 2000 The NetBSD Foundation, Inc. 6 * All rights reserved. 7 * 8 * This code is derived from software contributed to The NetBSD Foundation 9 * by Paul Kranenburg; by Jason R. Thorpe of the Numerical Aerospace 10 * Simulation Facility, NASA Ames Research Center. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 23 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 24 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 25 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31 * POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include <sys/param.h> 35 #include <sys/systm.h> 36 #include <sys/errno.h> 37 #include <sys/kernel.h> 38 #include <sys/malloc.h> 39 #include <sys/pool.h> 40 #include <sys/syslog.h> 41 #include <sys/rwlock.h> 42 #include <sys/sysctl.h> 43 #include <sys/task.h> 44 #include <sys/timeout.h> 45 #include <sys/percpu.h> 46 47 #include <uvm/uvm_extern.h> 48 49 /* 50 * Pool resource management utility. 51 * 52 * Memory is allocated in pages which are split into pieces according to 53 * the pool item size. Each page is kept on one of three lists in the 54 * pool structure: `pr_emptypages', `pr_fullpages' and `pr_partpages', 55 * for empty, full and partially-full pages respectively. The individual 56 * pool items are on a linked list headed by `ph_itemlist' in each page 57 * header. The memory for building the page list is either taken from 58 * the allocated pages themselves (for small pool items) or taken from 59 * an internal pool of page headers (`phpool'). 60 */ 61 62 /* List of all pools */ 63 SIMPLEQ_HEAD(,pool) pool_head = SIMPLEQ_HEAD_INITIALIZER(pool_head); 64 65 /* 66 * Every pool gets a unique serial number assigned to it. If this counter 67 * wraps, we're screwed, but we shouldn't create so many pools anyway. 68 */ 69 unsigned int pool_serial; 70 unsigned int pool_count; 71 72 /* Lock the previous variables making up the global pool state */ 73 struct rwlock pool_lock = RWLOCK_INITIALIZER("pools"); 74 75 /* Private pool for page header structures */ 76 struct pool phpool; 77 78 struct pool_item_header { 79 /* Page headers */ 80 TAILQ_ENTRY(pool_item_header) 81 ph_pagelist; /* pool page list */ 82 XSIMPLEQ_HEAD(,pool_item) ph_itemlist; /* chunk list for this page */ 83 RBT_ENTRY(pool_item_header) 84 ph_node; /* Off-page page headers */ 85 int ph_nmissing; /* # of chunks in use */ 86 caddr_t ph_page; /* this page's address */ 87 caddr_t ph_colored; /* page's colored address */ 88 u_long ph_magic; 89 int ph_tick; 90 }; 91 #define POOL_MAGICBIT (1 << 3) /* keep away from perturbed low bits */ 92 #define POOL_PHPOISON(ph) ISSET((ph)->ph_magic, POOL_MAGICBIT) 93 94 struct pool_item { 95 u_long pi_magic; 96 XSIMPLEQ_ENTRY(pool_item) pi_list; 97 }; 98 #define POOL_IMAGIC(ph, pi) ((u_long)(pi) ^ (ph)->ph_magic) 99 100 #ifdef MULTIPROCESSOR 101 struct pool_list { 102 struct pool_list *pl_next; /* next in list */ 103 unsigned long pl_cookie; 104 struct pool_list *pl_nextl; /* next list */ 105 unsigned long pl_nitems; /* items in list */ 106 }; 107 108 struct pool_cache { 109 struct pool_list *pc_actv; 110 unsigned long pc_nactv; /* cache pc_actv nitems */ 111 struct pool_list *pc_prev; 112 113 uint64_t pc_gen; /* generation number */ 114 uint64_t pc_gets; 115 uint64_t pc_puts; 116 uint64_t pc_fails; 117 118 int pc_nout; 119 }; 120 121 void *pool_cache_get(struct pool *); 122 void pool_cache_put(struct pool *, void *); 123 void pool_cache_destroy(struct pool *); 124 #endif 125 void pool_cache_info(struct pool *, struct kinfo_pool *); 126 127 #ifdef POOL_DEBUG 128 int pool_debug = 1; 129 #else 130 int pool_debug = 0; 131 #endif 132 133 #define POOL_INPGHDR(pp) ((pp)->pr_phoffset != 0) 134 135 struct pool_item_header * 136 pool_p_alloc(struct pool *, int, int *); 137 void pool_p_insert(struct pool *, struct pool_item_header *); 138 void pool_p_remove(struct pool *, struct pool_item_header *); 139 void pool_p_free(struct pool *, struct pool_item_header *); 140 141 void pool_update_curpage(struct pool *); 142 void *pool_do_get(struct pool *, int, int *); 143 int pool_chk_page(struct pool *, struct pool_item_header *, int); 144 int pool_chk(struct pool *); 145 void pool_get_done(void *, void *); 146 void pool_runqueue(struct pool *, int); 147 148 void *pool_allocator_alloc(struct pool *, int, int *); 149 void pool_allocator_free(struct pool *, void *); 150 151 /* 152 * The default pool allocator. 153 */ 154 void *pool_page_alloc(struct pool *, int, int *); 155 void pool_page_free(struct pool *, void *); 156 157 /* 158 * safe for interrupts; this is the default allocator 159 */ 160 struct pool_allocator pool_allocator_single = { 161 pool_page_alloc, 162 pool_page_free 163 }; 164 165 void *pool_multi_alloc(struct pool *, int, int *); 166 void pool_multi_free(struct pool *, void *); 167 168 struct pool_allocator pool_allocator_multi = { 169 pool_multi_alloc, 170 pool_multi_free 171 }; 172 173 void *pool_multi_alloc_ni(struct pool *, int, int *); 174 void pool_multi_free_ni(struct pool *, void *); 175 176 struct pool_allocator pool_allocator_multi_ni = { 177 pool_multi_alloc_ni, 178 pool_multi_free_ni 179 }; 180 181 #ifdef DDB 182 void pool_print_pagelist(struct pool_pagelist *, int (*)(const char *, ...) 183 __attribute__((__format__(__kprintf__,1,2)))); 184 void pool_print1(struct pool *, const char *, int (*)(const char *, ...) 185 __attribute__((__format__(__kprintf__,1,2)))); 186 #endif 187 188 /* stale page garbage collectors */ 189 void pool_gc_sched(void *); 190 struct timeout pool_gc_tick = TIMEOUT_INITIALIZER(pool_gc_sched, NULL); 191 void pool_gc_pages(void *); 192 struct task pool_gc_task = TASK_INITIALIZER(pool_gc_pages, NULL); 193 int pool_wait_free = 1; 194 int pool_wait_gc = 8; 195 196 RBT_PROTOTYPE(phtree, pool_item_header, ph_node, phtree_compare); 197 198 static inline int 199 phtree_compare(const struct pool_item_header *a, 200 const struct pool_item_header *b) 201 { 202 vaddr_t va = (vaddr_t)a->ph_page; 203 vaddr_t vb = (vaddr_t)b->ph_page; 204 205 /* the compares in this order are important for the NFIND to work */ 206 if (vb < va) 207 return (-1); 208 if (vb > va) 209 return (1); 210 211 return (0); 212 } 213 214 RBT_GENERATE(phtree, pool_item_header, ph_node, phtree_compare); 215 216 /* 217 * Return the pool page header based on page address. 218 */ 219 static inline struct pool_item_header * 220 pr_find_pagehead(struct pool *pp, void *v) 221 { 222 struct pool_item_header *ph, key; 223 224 if (POOL_INPGHDR(pp)) { 225 caddr_t page; 226 227 page = (caddr_t)((vaddr_t)v & pp->pr_pgmask); 228 229 return ((struct pool_item_header *)(page + pp->pr_phoffset)); 230 } 231 232 key.ph_page = v; 233 ph = RBT_NFIND(phtree, &pp->pr_phtree, &key); 234 if (ph == NULL) 235 panic("%s: %s: page header missing", __func__, pp->pr_wchan); 236 237 KASSERT(ph->ph_page <= (caddr_t)v); 238 if (ph->ph_page + pp->pr_pgsize <= (caddr_t)v) 239 panic("%s: %s: incorrect page", __func__, pp->pr_wchan); 240 241 return (ph); 242 } 243 244 /* 245 * Initialize the given pool resource structure. 246 * 247 * We export this routine to allow other kernel parts to declare 248 * static pools that must be initialized before malloc() is available. 249 */ 250 void 251 pool_init(struct pool *pp, size_t size, u_int align, int ipl, int flags, 252 const char *wchan, struct pool_allocator *palloc) 253 { 254 int off = 0, space; 255 unsigned int pgsize = PAGE_SIZE, items; 256 #ifdef DIAGNOSTIC 257 struct pool *iter; 258 #endif 259 260 if (align == 0) 261 align = ALIGN(1); 262 263 if (size < sizeof(struct pool_item)) 264 size = sizeof(struct pool_item); 265 266 size = roundup(size, align); 267 268 if (palloc == NULL) { 269 while (size * 8 > pgsize) 270 pgsize <<= 1; 271 272 if (pgsize > PAGE_SIZE) { 273 palloc = ISSET(flags, PR_WAITOK) ? 274 &pool_allocator_multi_ni : &pool_allocator_multi; 275 } else 276 palloc = &pool_allocator_single; 277 } else 278 pgsize = palloc->pa_pagesz ? palloc->pa_pagesz : PAGE_SIZE; 279 280 items = pgsize / size; 281 282 /* 283 * Decide whether to put the page header off page to avoid 284 * wasting too large a part of the page. Off-page page headers 285 * go into an RB tree, so we can match a returned item with 286 * its header based on the page address. 287 */ 288 if (pgsize - (size * items) > sizeof(struct pool_item_header)) { 289 off = pgsize - sizeof(struct pool_item_header); 290 } else if (sizeof(struct pool_item_header) * 2 >= size) { 291 off = pgsize - sizeof(struct pool_item_header); 292 items = off / size; 293 } 294 295 KASSERT(items > 0); 296 297 /* 298 * Initialize the pool structure. 299 */ 300 memset(pp, 0, sizeof(*pp)); 301 TAILQ_INIT(&pp->pr_emptypages); 302 TAILQ_INIT(&pp->pr_fullpages); 303 TAILQ_INIT(&pp->pr_partpages); 304 pp->pr_curpage = NULL; 305 pp->pr_npages = 0; 306 pp->pr_minitems = 0; 307 pp->pr_minpages = 0; 308 pp->pr_maxpages = 8; 309 pp->pr_size = size; 310 pp->pr_pgsize = pgsize; 311 pp->pr_pgmask = ~0UL ^ (pgsize - 1); 312 pp->pr_phoffset = off; 313 pp->pr_itemsperpage = items; 314 pp->pr_wchan = wchan; 315 pp->pr_alloc = palloc; 316 pp->pr_nitems = 0; 317 pp->pr_nout = 0; 318 pp->pr_hardlimit = UINT_MAX; 319 pp->pr_hardlimit_warning = NULL; 320 pp->pr_hardlimit_ratecap.tv_sec = 0; 321 pp->pr_hardlimit_ratecap.tv_usec = 0; 322 pp->pr_hardlimit_warning_last.tv_sec = 0; 323 pp->pr_hardlimit_warning_last.tv_usec = 0; 324 RBT_INIT(phtree, &pp->pr_phtree); 325 326 /* 327 * Use the space between the chunks and the page header 328 * for cache coloring. 329 */ 330 space = POOL_INPGHDR(pp) ? pp->pr_phoffset : pp->pr_pgsize; 331 space -= pp->pr_itemsperpage * pp->pr_size; 332 pp->pr_align = align; 333 pp->pr_maxcolors = (space / align) + 1; 334 335 pp->pr_nget = 0; 336 pp->pr_nfail = 0; 337 pp->pr_nput = 0; 338 pp->pr_npagealloc = 0; 339 pp->pr_npagefree = 0; 340 pp->pr_hiwat = 0; 341 pp->pr_nidle = 0; 342 343 pp->pr_ipl = ipl; 344 mtx_init(&pp->pr_mtx, pp->pr_ipl); 345 mtx_init(&pp->pr_requests_mtx, pp->pr_ipl); 346 TAILQ_INIT(&pp->pr_requests); 347 348 if (phpool.pr_size == 0) { 349 pool_init(&phpool, sizeof(struct pool_item_header), 0, 350 IPL_HIGH, 0, "phpool", NULL); 351 352 /* make sure phpool wont "recurse" */ 353 KASSERT(POOL_INPGHDR(&phpool)); 354 } 355 356 /* pglistalloc/constraint parameters */ 357 pp->pr_crange = &kp_dirty; 358 359 /* Insert this into the list of all pools. */ 360 rw_enter_write(&pool_lock); 361 #ifdef DIAGNOSTIC 362 SIMPLEQ_FOREACH(iter, &pool_head, pr_poollist) { 363 if (iter == pp) 364 panic("%s: pool %s already on list", __func__, wchan); 365 } 366 #endif 367 368 pp->pr_serial = ++pool_serial; 369 if (pool_serial == 0) 370 panic("%s: too much uptime", __func__); 371 372 SIMPLEQ_INSERT_HEAD(&pool_head, pp, pr_poollist); 373 pool_count++; 374 rw_exit_write(&pool_lock); 375 } 376 377 /* 378 * Decommission a pool resource. 379 */ 380 void 381 pool_destroy(struct pool *pp) 382 { 383 struct pool_item_header *ph; 384 struct pool *prev, *iter; 385 386 #ifdef MULTIPROCESSOR 387 if (pp->pr_cache != NULL) 388 pool_cache_destroy(pp); 389 #endif 390 391 #ifdef DIAGNOSTIC 392 if (pp->pr_nout != 0) 393 panic("%s: pool busy: still out: %u", __func__, pp->pr_nout); 394 #endif 395 396 /* Remove from global pool list */ 397 rw_enter_write(&pool_lock); 398 pool_count--; 399 if (pp == SIMPLEQ_FIRST(&pool_head)) 400 SIMPLEQ_REMOVE_HEAD(&pool_head, pr_poollist); 401 else { 402 prev = SIMPLEQ_FIRST(&pool_head); 403 SIMPLEQ_FOREACH(iter, &pool_head, pr_poollist) { 404 if (iter == pp) { 405 SIMPLEQ_REMOVE_AFTER(&pool_head, prev, 406 pr_poollist); 407 break; 408 } 409 prev = iter; 410 } 411 } 412 rw_exit_write(&pool_lock); 413 414 /* Remove all pages */ 415 while ((ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL) { 416 mtx_enter(&pp->pr_mtx); 417 pool_p_remove(pp, ph); 418 mtx_leave(&pp->pr_mtx); 419 pool_p_free(pp, ph); 420 } 421 KASSERT(TAILQ_EMPTY(&pp->pr_fullpages)); 422 KASSERT(TAILQ_EMPTY(&pp->pr_partpages)); 423 } 424 425 void 426 pool_request_init(struct pool_request *pr, 427 void (*handler)(void *, void *), void *cookie) 428 { 429 pr->pr_handler = handler; 430 pr->pr_cookie = cookie; 431 pr->pr_item = NULL; 432 } 433 434 void 435 pool_request(struct pool *pp, struct pool_request *pr) 436 { 437 mtx_enter(&pp->pr_requests_mtx); 438 TAILQ_INSERT_TAIL(&pp->pr_requests, pr, pr_entry); 439 pool_runqueue(pp, PR_NOWAIT); 440 mtx_leave(&pp->pr_requests_mtx); 441 } 442 443 struct pool_get_memory { 444 struct mutex mtx; 445 void * volatile v; 446 }; 447 448 /* 449 * Grab an item from the pool. 450 */ 451 void * 452 pool_get(struct pool *pp, int flags) 453 { 454 void *v = NULL; 455 int slowdown = 0; 456 457 #ifdef MULTIPROCESSOR 458 if (pp->pr_cache != NULL) { 459 v = pool_cache_get(pp); 460 if (v != NULL) 461 goto good; 462 } 463 #endif 464 465 KASSERT(flags & (PR_WAITOK | PR_NOWAIT)); 466 467 mtx_enter(&pp->pr_mtx); 468 if (pp->pr_nout >= pp->pr_hardlimit) { 469 if (ISSET(flags, PR_NOWAIT|PR_LIMITFAIL)) 470 goto fail; 471 } else if ((v = pool_do_get(pp, flags, &slowdown)) == NULL) { 472 if (ISSET(flags, PR_NOWAIT)) 473 goto fail; 474 } 475 mtx_leave(&pp->pr_mtx); 476 477 if (slowdown && ISSET(flags, PR_WAITOK)) 478 yield(); 479 480 if (v == NULL) { 481 struct pool_get_memory mem = { 482 MUTEX_INITIALIZER(pp->pr_ipl), 483 NULL }; 484 struct pool_request pr; 485 486 pool_request_init(&pr, pool_get_done, &mem); 487 pool_request(pp, &pr); 488 489 mtx_enter(&mem.mtx); 490 while (mem.v == NULL) 491 msleep(&mem, &mem.mtx, PSWP, pp->pr_wchan, 0); 492 mtx_leave(&mem.mtx); 493 494 v = mem.v; 495 } 496 497 #ifdef MULTIPROCESSOR 498 good: 499 #endif 500 if (ISSET(flags, PR_ZERO)) 501 memset(v, 0, pp->pr_size); 502 503 return (v); 504 505 fail: 506 pp->pr_nfail++; 507 mtx_leave(&pp->pr_mtx); 508 return (NULL); 509 } 510 511 void 512 pool_get_done(void *xmem, void *v) 513 { 514 struct pool_get_memory *mem = xmem; 515 516 mtx_enter(&mem->mtx); 517 mem->v = v; 518 mtx_leave(&mem->mtx); 519 520 wakeup_one(mem); 521 } 522 523 void 524 pool_runqueue(struct pool *pp, int flags) 525 { 526 struct pool_requests prl = TAILQ_HEAD_INITIALIZER(prl); 527 struct pool_request *pr; 528 529 MUTEX_ASSERT_UNLOCKED(&pp->pr_mtx); 530 MUTEX_ASSERT_LOCKED(&pp->pr_requests_mtx); 531 532 if (pp->pr_requesting++) 533 return; 534 535 do { 536 pp->pr_requesting = 1; 537 538 /* no TAILQ_JOIN? :( */ 539 while ((pr = TAILQ_FIRST(&pp->pr_requests)) != NULL) { 540 TAILQ_REMOVE(&pp->pr_requests, pr, pr_entry); 541 TAILQ_INSERT_TAIL(&prl, pr, pr_entry); 542 } 543 if (TAILQ_EMPTY(&prl)) 544 continue; 545 546 mtx_leave(&pp->pr_requests_mtx); 547 548 mtx_enter(&pp->pr_mtx); 549 pr = TAILQ_FIRST(&prl); 550 while (pr != NULL) { 551 int slowdown = 0; 552 553 if (pp->pr_nout >= pp->pr_hardlimit) 554 break; 555 556 pr->pr_item = pool_do_get(pp, flags, &slowdown); 557 if (pr->pr_item == NULL) /* || slowdown ? */ 558 break; 559 560 pr = TAILQ_NEXT(pr, pr_entry); 561 } 562 mtx_leave(&pp->pr_mtx); 563 564 while ((pr = TAILQ_FIRST(&prl)) != NULL && 565 pr->pr_item != NULL) { 566 TAILQ_REMOVE(&prl, pr, pr_entry); 567 (*pr->pr_handler)(pr->pr_cookie, pr->pr_item); 568 } 569 570 mtx_enter(&pp->pr_requests_mtx); 571 } while (--pp->pr_requesting); 572 573 /* no TAILQ_JOIN :( */ 574 while ((pr = TAILQ_FIRST(&prl)) != NULL) { 575 TAILQ_REMOVE(&prl, pr, pr_entry); 576 TAILQ_INSERT_TAIL(&pp->pr_requests, pr, pr_entry); 577 } 578 } 579 580 void * 581 pool_do_get(struct pool *pp, int flags, int *slowdown) 582 { 583 struct pool_item *pi; 584 struct pool_item_header *ph; 585 586 MUTEX_ASSERT_LOCKED(&pp->pr_mtx); 587 588 splassert(pp->pr_ipl); 589 590 /* 591 * Account for this item now to avoid races if we need to give up 592 * pr_mtx to allocate a page. 593 */ 594 pp->pr_nout++; 595 596 if (pp->pr_curpage == NULL) { 597 mtx_leave(&pp->pr_mtx); 598 ph = pool_p_alloc(pp, flags, slowdown); 599 mtx_enter(&pp->pr_mtx); 600 601 if (ph == NULL) { 602 pp->pr_nout--; 603 return (NULL); 604 } 605 606 pool_p_insert(pp, ph); 607 } 608 609 ph = pp->pr_curpage; 610 pi = XSIMPLEQ_FIRST(&ph->ph_itemlist); 611 if (__predict_false(pi == NULL)) 612 panic("%s: %s: page empty", __func__, pp->pr_wchan); 613 614 if (__predict_false(pi->pi_magic != POOL_IMAGIC(ph, pi))) { 615 panic("%s: %s free list modified: " 616 "page %p; item addr %p; offset 0x%x=0x%lx != 0x%lx", 617 __func__, pp->pr_wchan, ph->ph_page, pi, 618 0, pi->pi_magic, POOL_IMAGIC(ph, pi)); 619 } 620 621 XSIMPLEQ_REMOVE_HEAD(&ph->ph_itemlist, pi_list); 622 623 #ifdef DIAGNOSTIC 624 if (pool_debug && POOL_PHPOISON(ph)) { 625 size_t pidx; 626 uint32_t pval; 627 if (poison_check(pi + 1, pp->pr_size - sizeof(*pi), 628 &pidx, &pval)) { 629 int *ip = (int *)(pi + 1); 630 panic("%s: %s free list modified: " 631 "page %p; item addr %p; offset 0x%zx=0x%x", 632 __func__, pp->pr_wchan, ph->ph_page, pi, 633 pidx * sizeof(int), ip[pidx]); 634 } 635 } 636 #endif /* DIAGNOSTIC */ 637 638 if (ph->ph_nmissing++ == 0) { 639 /* 640 * This page was previously empty. Move it to the list of 641 * partially-full pages. This page is already curpage. 642 */ 643 TAILQ_REMOVE(&pp->pr_emptypages, ph, ph_pagelist); 644 TAILQ_INSERT_TAIL(&pp->pr_partpages, ph, ph_pagelist); 645 646 pp->pr_nidle--; 647 } 648 649 if (ph->ph_nmissing == pp->pr_itemsperpage) { 650 /* 651 * This page is now full. Move it to the full list 652 * and select a new current page. 653 */ 654 TAILQ_REMOVE(&pp->pr_partpages, ph, ph_pagelist); 655 TAILQ_INSERT_TAIL(&pp->pr_fullpages, ph, ph_pagelist); 656 pool_update_curpage(pp); 657 } 658 659 pp->pr_nget++; 660 661 return (pi); 662 } 663 664 /* 665 * Return resource to the pool. 666 */ 667 void 668 pool_put(struct pool *pp, void *v) 669 { 670 struct pool_item *pi = v; 671 struct pool_item_header *ph, *freeph = NULL; 672 673 #ifdef DIAGNOSTIC 674 if (v == NULL) 675 panic("%s: NULL item", __func__); 676 #endif 677 678 #ifdef MULTIPROCESSOR 679 if (pp->pr_cache != NULL && TAILQ_EMPTY(&pp->pr_requests)) { 680 pool_cache_put(pp, v); 681 return; 682 } 683 #endif 684 685 mtx_enter(&pp->pr_mtx); 686 687 splassert(pp->pr_ipl); 688 689 ph = pr_find_pagehead(pp, v); 690 691 #ifdef DIAGNOSTIC 692 if (pool_debug) { 693 struct pool_item *qi; 694 XSIMPLEQ_FOREACH(qi, &ph->ph_itemlist, pi_list) { 695 if (pi == qi) { 696 panic("%s: %s: double pool_put: %p", __func__, 697 pp->pr_wchan, pi); 698 } 699 } 700 } 701 #endif /* DIAGNOSTIC */ 702 703 pi->pi_magic = POOL_IMAGIC(ph, pi); 704 XSIMPLEQ_INSERT_HEAD(&ph->ph_itemlist, pi, pi_list); 705 #ifdef DIAGNOSTIC 706 if (POOL_PHPOISON(ph)) 707 poison_mem(pi + 1, pp->pr_size - sizeof(*pi)); 708 #endif /* DIAGNOSTIC */ 709 710 if (ph->ph_nmissing-- == pp->pr_itemsperpage) { 711 /* 712 * The page was previously completely full, move it to the 713 * partially-full list. 714 */ 715 TAILQ_REMOVE(&pp->pr_fullpages, ph, ph_pagelist); 716 TAILQ_INSERT_TAIL(&pp->pr_partpages, ph, ph_pagelist); 717 } 718 719 if (ph->ph_nmissing == 0) { 720 /* 721 * The page is now empty, so move it to the empty page list. 722 */ 723 pp->pr_nidle++; 724 725 ph->ph_tick = ticks; 726 TAILQ_REMOVE(&pp->pr_partpages, ph, ph_pagelist); 727 TAILQ_INSERT_TAIL(&pp->pr_emptypages, ph, ph_pagelist); 728 pool_update_curpage(pp); 729 } 730 731 pp->pr_nout--; 732 pp->pr_nput++; 733 734 /* is it time to free a page? */ 735 if (pp->pr_nidle > pp->pr_maxpages && 736 (ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL && 737 (ticks - ph->ph_tick) > (hz * pool_wait_free)) { 738 freeph = ph; 739 pool_p_remove(pp, freeph); 740 } 741 mtx_leave(&pp->pr_mtx); 742 743 if (freeph != NULL) 744 pool_p_free(pp, freeph); 745 746 if (!TAILQ_EMPTY(&pp->pr_requests)) { 747 mtx_enter(&pp->pr_requests_mtx); 748 pool_runqueue(pp, PR_NOWAIT); 749 mtx_leave(&pp->pr_requests_mtx); 750 } 751 } 752 753 /* 754 * Add N items to the pool. 755 */ 756 int 757 pool_prime(struct pool *pp, int n) 758 { 759 struct pool_pagelist pl = TAILQ_HEAD_INITIALIZER(pl); 760 struct pool_item_header *ph; 761 int newpages; 762 763 newpages = roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 764 765 while (newpages-- > 0) { 766 int slowdown = 0; 767 768 ph = pool_p_alloc(pp, PR_NOWAIT, &slowdown); 769 if (ph == NULL) /* or slowdown? */ 770 break; 771 772 TAILQ_INSERT_TAIL(&pl, ph, ph_pagelist); 773 } 774 775 mtx_enter(&pp->pr_mtx); 776 while ((ph = TAILQ_FIRST(&pl)) != NULL) { 777 TAILQ_REMOVE(&pl, ph, ph_pagelist); 778 pool_p_insert(pp, ph); 779 } 780 mtx_leave(&pp->pr_mtx); 781 782 return (0); 783 } 784 785 struct pool_item_header * 786 pool_p_alloc(struct pool *pp, int flags, int *slowdown) 787 { 788 struct pool_item_header *ph; 789 struct pool_item *pi; 790 caddr_t addr; 791 int n; 792 793 MUTEX_ASSERT_UNLOCKED(&pp->pr_mtx); 794 KASSERT(pp->pr_size >= sizeof(*pi)); 795 796 addr = pool_allocator_alloc(pp, flags, slowdown); 797 if (addr == NULL) 798 return (NULL); 799 800 if (POOL_INPGHDR(pp)) 801 ph = (struct pool_item_header *)(addr + pp->pr_phoffset); 802 else { 803 ph = pool_get(&phpool, flags); 804 if (ph == NULL) { 805 pool_allocator_free(pp, addr); 806 return (NULL); 807 } 808 } 809 810 XSIMPLEQ_INIT(&ph->ph_itemlist); 811 ph->ph_page = addr; 812 addr += pp->pr_align * (pp->pr_npagealloc % pp->pr_maxcolors); 813 ph->ph_colored = addr; 814 ph->ph_nmissing = 0; 815 arc4random_buf(&ph->ph_magic, sizeof(ph->ph_magic)); 816 #ifdef DIAGNOSTIC 817 /* use a bit in ph_magic to record if we poison page items */ 818 if (pool_debug) 819 SET(ph->ph_magic, POOL_MAGICBIT); 820 else 821 CLR(ph->ph_magic, POOL_MAGICBIT); 822 #endif /* DIAGNOSTIC */ 823 824 n = pp->pr_itemsperpage; 825 while (n--) { 826 pi = (struct pool_item *)addr; 827 pi->pi_magic = POOL_IMAGIC(ph, pi); 828 XSIMPLEQ_INSERT_TAIL(&ph->ph_itemlist, pi, pi_list); 829 830 #ifdef DIAGNOSTIC 831 if (POOL_PHPOISON(ph)) 832 poison_mem(pi + 1, pp->pr_size - sizeof(*pi)); 833 #endif /* DIAGNOSTIC */ 834 835 addr += pp->pr_size; 836 } 837 838 return (ph); 839 } 840 841 void 842 pool_p_free(struct pool *pp, struct pool_item_header *ph) 843 { 844 struct pool_item *pi; 845 846 MUTEX_ASSERT_UNLOCKED(&pp->pr_mtx); 847 KASSERT(ph->ph_nmissing == 0); 848 849 XSIMPLEQ_FOREACH(pi, &ph->ph_itemlist, pi_list) { 850 if (__predict_false(pi->pi_magic != POOL_IMAGIC(ph, pi))) { 851 panic("%s: %s free list modified: " 852 "page %p; item addr %p; offset 0x%x=0x%lx", 853 __func__, pp->pr_wchan, ph->ph_page, pi, 854 0, pi->pi_magic); 855 } 856 857 #ifdef DIAGNOSTIC 858 if (POOL_PHPOISON(ph)) { 859 size_t pidx; 860 uint32_t pval; 861 if (poison_check(pi + 1, pp->pr_size - sizeof(*pi), 862 &pidx, &pval)) { 863 int *ip = (int *)(pi + 1); 864 panic("%s: %s free list modified: " 865 "page %p; item addr %p; offset 0x%zx=0x%x", 866 __func__, pp->pr_wchan, ph->ph_page, pi, 867 pidx * sizeof(int), ip[pidx]); 868 } 869 } 870 #endif 871 } 872 873 pool_allocator_free(pp, ph->ph_page); 874 875 if (!POOL_INPGHDR(pp)) 876 pool_put(&phpool, ph); 877 } 878 879 void 880 pool_p_insert(struct pool *pp, struct pool_item_header *ph) 881 { 882 MUTEX_ASSERT_LOCKED(&pp->pr_mtx); 883 884 /* If the pool was depleted, point at the new page */ 885 if (pp->pr_curpage == NULL) 886 pp->pr_curpage = ph; 887 888 TAILQ_INSERT_TAIL(&pp->pr_emptypages, ph, ph_pagelist); 889 if (!POOL_INPGHDR(pp)) 890 RBT_INSERT(phtree, &pp->pr_phtree, ph); 891 892 pp->pr_nitems += pp->pr_itemsperpage; 893 pp->pr_nidle++; 894 895 pp->pr_npagealloc++; 896 if (++pp->pr_npages > pp->pr_hiwat) 897 pp->pr_hiwat = pp->pr_npages; 898 } 899 900 void 901 pool_p_remove(struct pool *pp, struct pool_item_header *ph) 902 { 903 MUTEX_ASSERT_LOCKED(&pp->pr_mtx); 904 905 pp->pr_npagefree++; 906 pp->pr_npages--; 907 pp->pr_nidle--; 908 pp->pr_nitems -= pp->pr_itemsperpage; 909 910 if (!POOL_INPGHDR(pp)) 911 RBT_REMOVE(phtree, &pp->pr_phtree, ph); 912 TAILQ_REMOVE(&pp->pr_emptypages, ph, ph_pagelist); 913 914 pool_update_curpage(pp); 915 } 916 917 void 918 pool_update_curpage(struct pool *pp) 919 { 920 pp->pr_curpage = TAILQ_LAST(&pp->pr_partpages, pool_pagelist); 921 if (pp->pr_curpage == NULL) { 922 pp->pr_curpage = TAILQ_LAST(&pp->pr_emptypages, pool_pagelist); 923 } 924 } 925 926 void 927 pool_setlowat(struct pool *pp, int n) 928 { 929 int prime = 0; 930 931 mtx_enter(&pp->pr_mtx); 932 pp->pr_minitems = n; 933 pp->pr_minpages = (n == 0) 934 ? 0 935 : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 936 937 if (pp->pr_nitems < n) 938 prime = n - pp->pr_nitems; 939 mtx_leave(&pp->pr_mtx); 940 941 if (prime > 0) 942 pool_prime(pp, prime); 943 } 944 945 void 946 pool_sethiwat(struct pool *pp, int n) 947 { 948 pp->pr_maxpages = (n == 0) 949 ? 0 950 : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 951 } 952 953 int 954 pool_sethardlimit(struct pool *pp, u_int n, const char *warnmsg, int ratecap) 955 { 956 int error = 0; 957 958 if (n < pp->pr_nout) { 959 error = EINVAL; 960 goto done; 961 } 962 963 pp->pr_hardlimit = n; 964 pp->pr_hardlimit_warning = warnmsg; 965 pp->pr_hardlimit_ratecap.tv_sec = ratecap; 966 pp->pr_hardlimit_warning_last.tv_sec = 0; 967 pp->pr_hardlimit_warning_last.tv_usec = 0; 968 969 done: 970 return (error); 971 } 972 973 void 974 pool_set_constraints(struct pool *pp, const struct kmem_pa_mode *mode) 975 { 976 pp->pr_crange = mode; 977 } 978 979 /* 980 * Release all complete pages that have not been used recently. 981 * 982 * Returns non-zero if any pages have been reclaimed. 983 */ 984 int 985 pool_reclaim(struct pool *pp) 986 { 987 struct pool_item_header *ph, *phnext; 988 struct pool_pagelist pl = TAILQ_HEAD_INITIALIZER(pl); 989 990 mtx_enter(&pp->pr_mtx); 991 for (ph = TAILQ_FIRST(&pp->pr_emptypages); ph != NULL; ph = phnext) { 992 phnext = TAILQ_NEXT(ph, ph_pagelist); 993 994 /* Check our minimum page claim */ 995 if (pp->pr_npages <= pp->pr_minpages) 996 break; 997 998 /* 999 * If freeing this page would put us below 1000 * the low water mark, stop now. 1001 */ 1002 if ((pp->pr_nitems - pp->pr_itemsperpage) < 1003 pp->pr_minitems) 1004 break; 1005 1006 pool_p_remove(pp, ph); 1007 TAILQ_INSERT_TAIL(&pl, ph, ph_pagelist); 1008 } 1009 mtx_leave(&pp->pr_mtx); 1010 1011 if (TAILQ_EMPTY(&pl)) 1012 return (0); 1013 1014 while ((ph = TAILQ_FIRST(&pl)) != NULL) { 1015 TAILQ_REMOVE(&pl, ph, ph_pagelist); 1016 pool_p_free(pp, ph); 1017 } 1018 1019 return (1); 1020 } 1021 1022 /* 1023 * Release all complete pages that have not been used recently 1024 * from all pools. 1025 */ 1026 void 1027 pool_reclaim_all(void) 1028 { 1029 struct pool *pp; 1030 1031 rw_enter_read(&pool_lock); 1032 SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist) 1033 pool_reclaim(pp); 1034 rw_exit_read(&pool_lock); 1035 } 1036 1037 #ifdef DDB 1038 #include <machine/db_machdep.h> 1039 #include <ddb/db_output.h> 1040 1041 /* 1042 * Diagnostic helpers. 1043 */ 1044 void 1045 pool_printit(struct pool *pp, const char *modif, 1046 int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2)))) 1047 { 1048 pool_print1(pp, modif, pr); 1049 } 1050 1051 void 1052 pool_print_pagelist(struct pool_pagelist *pl, 1053 int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2)))) 1054 { 1055 struct pool_item_header *ph; 1056 struct pool_item *pi; 1057 1058 TAILQ_FOREACH(ph, pl, ph_pagelist) { 1059 (*pr)("\t\tpage %p, color %p, nmissing %d\n", 1060 ph->ph_page, ph->ph_colored, ph->ph_nmissing); 1061 XSIMPLEQ_FOREACH(pi, &ph->ph_itemlist, pi_list) { 1062 if (pi->pi_magic != POOL_IMAGIC(ph, pi)) { 1063 (*pr)("\t\t\titem %p, magic 0x%lx\n", 1064 pi, pi->pi_magic); 1065 } 1066 } 1067 } 1068 } 1069 1070 void 1071 pool_print1(struct pool *pp, const char *modif, 1072 int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2)))) 1073 { 1074 struct pool_item_header *ph; 1075 int print_pagelist = 0; 1076 char c; 1077 1078 while ((c = *modif++) != '\0') { 1079 if (c == 'p') 1080 print_pagelist = 1; 1081 modif++; 1082 } 1083 1084 (*pr)("POOL %s: size %u maxcolors %u\n", pp->pr_wchan, pp->pr_size, 1085 pp->pr_maxcolors); 1086 (*pr)("\talloc %p\n", pp->pr_alloc); 1087 (*pr)("\tminitems %u, minpages %u, maxpages %u, npages %u\n", 1088 pp->pr_minitems, pp->pr_minpages, pp->pr_maxpages, pp->pr_npages); 1089 (*pr)("\titemsperpage %u, nitems %u, nout %u, hardlimit %u\n", 1090 pp->pr_itemsperpage, pp->pr_nitems, pp->pr_nout, pp->pr_hardlimit); 1091 1092 (*pr)("\n\tnget %lu, nfail %lu, nput %lu\n", 1093 pp->pr_nget, pp->pr_nfail, pp->pr_nput); 1094 (*pr)("\tnpagealloc %lu, npagefree %lu, hiwat %u, nidle %lu\n", 1095 pp->pr_npagealloc, pp->pr_npagefree, pp->pr_hiwat, pp->pr_nidle); 1096 1097 if (print_pagelist == 0) 1098 return; 1099 1100 if ((ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL) 1101 (*pr)("\n\tempty page list:\n"); 1102 pool_print_pagelist(&pp->pr_emptypages, pr); 1103 if ((ph = TAILQ_FIRST(&pp->pr_fullpages)) != NULL) 1104 (*pr)("\n\tfull page list:\n"); 1105 pool_print_pagelist(&pp->pr_fullpages, pr); 1106 if ((ph = TAILQ_FIRST(&pp->pr_partpages)) != NULL) 1107 (*pr)("\n\tpartial-page list:\n"); 1108 pool_print_pagelist(&pp->pr_partpages, pr); 1109 1110 if (pp->pr_curpage == NULL) 1111 (*pr)("\tno current page\n"); 1112 else 1113 (*pr)("\tcurpage %p\n", pp->pr_curpage->ph_page); 1114 } 1115 1116 void 1117 db_show_all_pools(db_expr_t expr, int haddr, db_expr_t count, char *modif) 1118 { 1119 struct pool *pp; 1120 char maxp[16]; 1121 int ovflw; 1122 char mode; 1123 1124 mode = modif[0]; 1125 if (mode != '\0' && mode != 'a') { 1126 db_printf("usage: show all pools [/a]\n"); 1127 return; 1128 } 1129 1130 if (mode == '\0') 1131 db_printf("%-10s%4s%9s%5s%9s%6s%6s%6s%6s%6s%6s%5s\n", 1132 "Name", 1133 "Size", 1134 "Requests", 1135 "Fail", 1136 "Releases", 1137 "Pgreq", 1138 "Pgrel", 1139 "Npage", 1140 "Hiwat", 1141 "Minpg", 1142 "Maxpg", 1143 "Idle"); 1144 else 1145 db_printf("%-12s %18s %18s\n", 1146 "Name", "Address", "Allocator"); 1147 1148 SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist) { 1149 if (mode == 'a') { 1150 db_printf("%-12s %18p %18p\n", pp->pr_wchan, pp, 1151 pp->pr_alloc); 1152 continue; 1153 } 1154 1155 if (!pp->pr_nget) 1156 continue; 1157 1158 if (pp->pr_maxpages == UINT_MAX) 1159 snprintf(maxp, sizeof maxp, "inf"); 1160 else 1161 snprintf(maxp, sizeof maxp, "%u", pp->pr_maxpages); 1162 1163 #define PRWORD(ovflw, fmt, width, fixed, val) do { \ 1164 (ovflw) += db_printf((fmt), \ 1165 (width) - (fixed) - (ovflw) > 0 ? \ 1166 (width) - (fixed) - (ovflw) : 0, \ 1167 (val)) - (width); \ 1168 if ((ovflw) < 0) \ 1169 (ovflw) = 0; \ 1170 } while (/* CONSTCOND */0) 1171 1172 ovflw = 0; 1173 PRWORD(ovflw, "%-*s", 10, 0, pp->pr_wchan); 1174 PRWORD(ovflw, " %*u", 4, 1, pp->pr_size); 1175 PRWORD(ovflw, " %*lu", 9, 1, pp->pr_nget); 1176 PRWORD(ovflw, " %*lu", 5, 1, pp->pr_nfail); 1177 PRWORD(ovflw, " %*lu", 9, 1, pp->pr_nput); 1178 PRWORD(ovflw, " %*lu", 6, 1, pp->pr_npagealloc); 1179 PRWORD(ovflw, " %*lu", 6, 1, pp->pr_npagefree); 1180 PRWORD(ovflw, " %*d", 6, 1, pp->pr_npages); 1181 PRWORD(ovflw, " %*d", 6, 1, pp->pr_hiwat); 1182 PRWORD(ovflw, " %*d", 6, 1, pp->pr_minpages); 1183 PRWORD(ovflw, " %*s", 6, 1, maxp); 1184 PRWORD(ovflw, " %*lu\n", 5, 1, pp->pr_nidle); 1185 1186 pool_chk(pp); 1187 } 1188 } 1189 #endif /* DDB */ 1190 1191 #if defined(POOL_DEBUG) || defined(DDB) 1192 int 1193 pool_chk_page(struct pool *pp, struct pool_item_header *ph, int expected) 1194 { 1195 struct pool_item *pi; 1196 caddr_t page; 1197 int n; 1198 const char *label = pp->pr_wchan; 1199 1200 page = (caddr_t)((u_long)ph & pp->pr_pgmask); 1201 if (page != ph->ph_page && POOL_INPGHDR(pp)) { 1202 printf("%s: ", label); 1203 printf("pool(%p:%s): page inconsistency: page %p; " 1204 "at page head addr %p (p %p)\n", 1205 pp, pp->pr_wchan, ph->ph_page, ph, page); 1206 return 1; 1207 } 1208 1209 for (pi = XSIMPLEQ_FIRST(&ph->ph_itemlist), n = 0; 1210 pi != NULL; 1211 pi = XSIMPLEQ_NEXT(&ph->ph_itemlist, pi, pi_list), n++) { 1212 if ((caddr_t)pi < ph->ph_page || 1213 (caddr_t)pi >= ph->ph_page + pp->pr_pgsize) { 1214 printf("%s: ", label); 1215 printf("pool(%p:%s): page inconsistency: page %p;" 1216 " item ordinal %d; addr %p\n", pp, 1217 pp->pr_wchan, ph->ph_page, n, pi); 1218 return (1); 1219 } 1220 1221 if (pi->pi_magic != POOL_IMAGIC(ph, pi)) { 1222 printf("%s: ", label); 1223 printf("pool(%p:%s): free list modified: " 1224 "page %p; item ordinal %d; addr %p " 1225 "(p %p); offset 0x%x=0x%lx\n", 1226 pp, pp->pr_wchan, ph->ph_page, n, pi, page, 1227 0, pi->pi_magic); 1228 } 1229 1230 #ifdef DIAGNOSTIC 1231 if (POOL_PHPOISON(ph)) { 1232 size_t pidx; 1233 uint32_t pval; 1234 if (poison_check(pi + 1, pp->pr_size - sizeof(*pi), 1235 &pidx, &pval)) { 1236 int *ip = (int *)(pi + 1); 1237 printf("pool(%s): free list modified: " 1238 "page %p; item ordinal %d; addr %p " 1239 "(p %p); offset 0x%zx=0x%x\n", 1240 pp->pr_wchan, ph->ph_page, n, pi, 1241 page, pidx * sizeof(int), ip[pidx]); 1242 } 1243 } 1244 #endif /* DIAGNOSTIC */ 1245 } 1246 if (n + ph->ph_nmissing != pp->pr_itemsperpage) { 1247 printf("pool(%p:%s): page inconsistency: page %p;" 1248 " %d on list, %d missing, %d items per page\n", pp, 1249 pp->pr_wchan, ph->ph_page, n, ph->ph_nmissing, 1250 pp->pr_itemsperpage); 1251 return 1; 1252 } 1253 if (expected >= 0 && n != expected) { 1254 printf("pool(%p:%s): page inconsistency: page %p;" 1255 " %d on list, %d missing, %d expected\n", pp, 1256 pp->pr_wchan, ph->ph_page, n, ph->ph_nmissing, 1257 expected); 1258 return 1; 1259 } 1260 return 0; 1261 } 1262 1263 int 1264 pool_chk(struct pool *pp) 1265 { 1266 struct pool_item_header *ph; 1267 int r = 0; 1268 1269 TAILQ_FOREACH(ph, &pp->pr_emptypages, ph_pagelist) 1270 r += pool_chk_page(pp, ph, pp->pr_itemsperpage); 1271 TAILQ_FOREACH(ph, &pp->pr_fullpages, ph_pagelist) 1272 r += pool_chk_page(pp, ph, 0); 1273 TAILQ_FOREACH(ph, &pp->pr_partpages, ph_pagelist) 1274 r += pool_chk_page(pp, ph, -1); 1275 1276 return (r); 1277 } 1278 #endif /* defined(POOL_DEBUG) || defined(DDB) */ 1279 1280 #ifdef DDB 1281 void 1282 pool_walk(struct pool *pp, int full, 1283 int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2))), 1284 void (*func)(void *, int, int (*)(const char *, ...) 1285 __attribute__((__format__(__kprintf__,1,2))))) 1286 { 1287 struct pool_item_header *ph; 1288 struct pool_item *pi; 1289 caddr_t cp; 1290 int n; 1291 1292 TAILQ_FOREACH(ph, &pp->pr_fullpages, ph_pagelist) { 1293 cp = ph->ph_colored; 1294 n = ph->ph_nmissing; 1295 1296 while (n--) { 1297 func(cp, full, pr); 1298 cp += pp->pr_size; 1299 } 1300 } 1301 1302 TAILQ_FOREACH(ph, &pp->pr_partpages, ph_pagelist) { 1303 cp = ph->ph_colored; 1304 n = ph->ph_nmissing; 1305 1306 do { 1307 XSIMPLEQ_FOREACH(pi, &ph->ph_itemlist, pi_list) { 1308 if (cp == (caddr_t)pi) 1309 break; 1310 } 1311 if (cp != (caddr_t)pi) { 1312 func(cp, full, pr); 1313 n--; 1314 } 1315 1316 cp += pp->pr_size; 1317 } while (n > 0); 1318 } 1319 } 1320 #endif 1321 1322 /* 1323 * We have three different sysctls. 1324 * kern.pool.npools - the number of pools. 1325 * kern.pool.pool.<pool#> - the pool struct for the pool#. 1326 * kern.pool.name.<pool#> - the name for pool#. 1327 */ 1328 int 1329 sysctl_dopool(int *name, u_int namelen, char *oldp, size_t *oldlenp) 1330 { 1331 struct kinfo_pool pi; 1332 struct pool *pp; 1333 int rv = ENOENT; 1334 1335 switch (name[0]) { 1336 case KERN_POOL_NPOOLS: 1337 if (namelen != 1) 1338 return (ENOTDIR); 1339 return (sysctl_rdint(oldp, oldlenp, NULL, pool_count)); 1340 1341 case KERN_POOL_NAME: 1342 case KERN_POOL_POOL: 1343 break; 1344 default: 1345 return (EOPNOTSUPP); 1346 } 1347 1348 if (namelen != 2) 1349 return (ENOTDIR); 1350 1351 rw_enter_read(&pool_lock); 1352 1353 SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist) { 1354 if (name[1] == pp->pr_serial) 1355 break; 1356 } 1357 1358 if (pp == NULL) 1359 goto done; 1360 1361 switch (name[0]) { 1362 case KERN_POOL_NAME: 1363 rv = sysctl_rdstring(oldp, oldlenp, NULL, pp->pr_wchan); 1364 break; 1365 case KERN_POOL_POOL: 1366 memset(&pi, 0, sizeof(pi)); 1367 1368 mtx_enter(&pp->pr_mtx); 1369 pi.pr_size = pp->pr_size; 1370 pi.pr_pgsize = pp->pr_pgsize; 1371 pi.pr_itemsperpage = pp->pr_itemsperpage; 1372 pi.pr_npages = pp->pr_npages; 1373 pi.pr_minpages = pp->pr_minpages; 1374 pi.pr_maxpages = pp->pr_maxpages; 1375 pi.pr_hardlimit = pp->pr_hardlimit; 1376 pi.pr_nout = pp->pr_nout; 1377 pi.pr_nitems = pp->pr_nitems; 1378 pi.pr_nget = pp->pr_nget; 1379 pi.pr_nput = pp->pr_nput; 1380 pi.pr_nfail = pp->pr_nfail; 1381 pi.pr_npagealloc = pp->pr_npagealloc; 1382 pi.pr_npagefree = pp->pr_npagefree; 1383 pi.pr_hiwat = pp->pr_hiwat; 1384 pi.pr_nidle = pp->pr_nidle; 1385 mtx_leave(&pp->pr_mtx); 1386 1387 pool_cache_info(pp, &pi); 1388 1389 rv = sysctl_rdstruct(oldp, oldlenp, NULL, &pi, sizeof(pi)); 1390 break; 1391 } 1392 1393 done: 1394 rw_exit_read(&pool_lock); 1395 1396 return (rv); 1397 } 1398 1399 void 1400 pool_gc_sched(void *null) 1401 { 1402 task_add(systqmp, &pool_gc_task); 1403 } 1404 1405 void 1406 pool_gc_pages(void *null) 1407 { 1408 struct pool *pp; 1409 struct pool_item_header *ph, *freeph; 1410 int s; 1411 1412 rw_enter_read(&pool_lock); 1413 s = splvm(); /* XXX go to splvm until all pools _setipl properly */ 1414 SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist) { 1415 if (pp->pr_nidle <= pp->pr_minpages || /* guess */ 1416 !mtx_enter_try(&pp->pr_mtx)) /* try */ 1417 continue; 1418 1419 /* is it time to free a page? */ 1420 if (pp->pr_nidle > pp->pr_minpages && 1421 (ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL && 1422 (ticks - ph->ph_tick) > (hz * pool_wait_gc)) { 1423 freeph = ph; 1424 pool_p_remove(pp, freeph); 1425 } else 1426 freeph = NULL; 1427 1428 mtx_leave(&pp->pr_mtx); 1429 1430 if (freeph != NULL) 1431 pool_p_free(pp, freeph); 1432 } 1433 splx(s); 1434 rw_exit_read(&pool_lock); 1435 1436 timeout_add_sec(&pool_gc_tick, 1); 1437 } 1438 1439 /* 1440 * Pool backend allocators. 1441 */ 1442 1443 void * 1444 pool_allocator_alloc(struct pool *pp, int flags, int *slowdown) 1445 { 1446 void *v; 1447 1448 v = (*pp->pr_alloc->pa_alloc)(pp, flags, slowdown); 1449 1450 #ifdef DIAGNOSTIC 1451 if (v != NULL && POOL_INPGHDR(pp)) { 1452 vaddr_t addr = (vaddr_t)v; 1453 if ((addr & pp->pr_pgmask) != addr) { 1454 panic("%s: %s page address %p isnt aligned to %u", 1455 __func__, pp->pr_wchan, v, pp->pr_pgsize); 1456 } 1457 } 1458 #endif 1459 1460 return (v); 1461 } 1462 1463 void 1464 pool_allocator_free(struct pool *pp, void *v) 1465 { 1466 struct pool_allocator *pa = pp->pr_alloc; 1467 1468 (*pa->pa_free)(pp, v); 1469 } 1470 1471 void * 1472 pool_page_alloc(struct pool *pp, int flags, int *slowdown) 1473 { 1474 struct kmem_dyn_mode kd = KMEM_DYN_INITIALIZER; 1475 1476 kd.kd_waitok = ISSET(flags, PR_WAITOK); 1477 kd.kd_slowdown = slowdown; 1478 1479 return (km_alloc(pp->pr_pgsize, &kv_page, pp->pr_crange, &kd)); 1480 } 1481 1482 void 1483 pool_page_free(struct pool *pp, void *v) 1484 { 1485 km_free(v, pp->pr_pgsize, &kv_page, pp->pr_crange); 1486 } 1487 1488 void * 1489 pool_multi_alloc(struct pool *pp, int flags, int *slowdown) 1490 { 1491 struct kmem_va_mode kv = kv_intrsafe; 1492 struct kmem_dyn_mode kd = KMEM_DYN_INITIALIZER; 1493 void *v; 1494 int s; 1495 1496 if (POOL_INPGHDR(pp)) 1497 kv.kv_align = pp->pr_pgsize; 1498 1499 kd.kd_waitok = ISSET(flags, PR_WAITOK); 1500 kd.kd_slowdown = slowdown; 1501 1502 s = splvm(); 1503 v = km_alloc(pp->pr_pgsize, &kv, pp->pr_crange, &kd); 1504 splx(s); 1505 1506 return (v); 1507 } 1508 1509 void 1510 pool_multi_free(struct pool *pp, void *v) 1511 { 1512 struct kmem_va_mode kv = kv_intrsafe; 1513 int s; 1514 1515 if (POOL_INPGHDR(pp)) 1516 kv.kv_align = pp->pr_pgsize; 1517 1518 s = splvm(); 1519 km_free(v, pp->pr_pgsize, &kv, pp->pr_crange); 1520 splx(s); 1521 } 1522 1523 void * 1524 pool_multi_alloc_ni(struct pool *pp, int flags, int *slowdown) 1525 { 1526 struct kmem_va_mode kv = kv_any; 1527 struct kmem_dyn_mode kd = KMEM_DYN_INITIALIZER; 1528 void *v; 1529 1530 if (POOL_INPGHDR(pp)) 1531 kv.kv_align = pp->pr_pgsize; 1532 1533 kd.kd_waitok = ISSET(flags, PR_WAITOK); 1534 kd.kd_slowdown = slowdown; 1535 1536 KERNEL_LOCK(); 1537 v = km_alloc(pp->pr_pgsize, &kv, pp->pr_crange, &kd); 1538 KERNEL_UNLOCK(); 1539 1540 return (v); 1541 } 1542 1543 void 1544 pool_multi_free_ni(struct pool *pp, void *v) 1545 { 1546 struct kmem_va_mode kv = kv_any; 1547 1548 if (POOL_INPGHDR(pp)) 1549 kv.kv_align = pp->pr_pgsize; 1550 1551 KERNEL_LOCK(); 1552 km_free(v, pp->pr_pgsize, &kv, pp->pr_crange); 1553 KERNEL_UNLOCK(); 1554 } 1555 1556 #ifdef MULTIPROCESSOR 1557 1558 struct pool pool_caches; /* per cpu cache entries */ 1559 1560 void 1561 pool_cache_init(struct pool *pp) 1562 { 1563 struct cpumem *cm; 1564 struct pool_cache *pc; 1565 struct cpumem_iter i; 1566 1567 if (pool_caches.pr_size == 0) { 1568 pool_init(&pool_caches, sizeof(struct pool_cache), 64, 1569 IPL_NONE, PR_WAITOK, "plcache", NULL); 1570 } 1571 1572 KASSERT(pp->pr_size >= sizeof(*pc)); 1573 1574 cm = cpumem_get(&pool_caches); 1575 1576 mtx_init(&pp->pr_cache_mtx, pp->pr_ipl); 1577 pp->pr_cache_list = NULL; 1578 pp->pr_cache_nlist = 0; 1579 pp->pr_cache_items = 8; 1580 pp->pr_cache_contention = 0; 1581 1582 CPUMEM_FOREACH(pc, &i, cm) { 1583 pc->pc_actv = NULL; 1584 pc->pc_nactv = 0; 1585 pc->pc_prev = NULL; 1586 1587 pc->pc_gets = 0; 1588 pc->pc_puts = 0; 1589 pc->pc_fails = 0; 1590 pc->pc_nout = 0; 1591 } 1592 1593 pp->pr_cache = cm; 1594 } 1595 1596 static inline void 1597 pool_list_enter(struct pool *pp) 1598 { 1599 if (mtx_enter_try(&pp->pr_cache_mtx) == 0) { 1600 mtx_enter(&pp->pr_cache_mtx); 1601 pp->pr_cache_contention++; 1602 } 1603 } 1604 1605 static inline void 1606 pool_list_leave(struct pool *pp) 1607 { 1608 mtx_leave(&pp->pr_cache_mtx); 1609 } 1610 1611 static inline struct pool_list * 1612 pool_list_alloc(struct pool *pp, struct pool_cache *pc) 1613 { 1614 struct pool_list *pl; 1615 1616 pool_list_enter(pp); 1617 pl = pp->pr_cache_list; 1618 if (pl != NULL) { 1619 pp->pr_cache_list = pl->pl_nextl; 1620 pp->pr_cache_nlist--; 1621 } 1622 1623 pp->pr_cache_nout += pc->pc_nout; 1624 pc->pc_nout = 0; 1625 pool_list_leave(pp); 1626 1627 return (pl); 1628 } 1629 1630 static inline void 1631 pool_list_free(struct pool *pp, struct pool_cache *pc, struct pool_list *pl) 1632 { 1633 pool_list_enter(pp); 1634 pl->pl_nextl = pp->pr_cache_list; 1635 pp->pr_cache_list = pl; 1636 pp->pr_cache_nlist++; 1637 1638 pp->pr_cache_nout += pc->pc_nout; 1639 pc->pc_nout = 0; 1640 pool_list_leave(pp); 1641 } 1642 1643 static inline struct pool_cache * 1644 pool_cache_enter(struct pool *pp, int *s) 1645 { 1646 struct pool_cache *pc; 1647 1648 pc = cpumem_enter(pp->pr_cache); 1649 *s = splraise(pp->pr_ipl); 1650 pc->pc_gen++; 1651 1652 return (pc); 1653 } 1654 1655 static inline void 1656 pool_cache_leave(struct pool *pp, struct pool_cache *pc, int s) 1657 { 1658 pc->pc_gen++; 1659 splx(s); 1660 cpumem_leave(pp->pr_cache, pc); 1661 } 1662 1663 void * 1664 pool_cache_get(struct pool *pp) 1665 { 1666 struct pool_cache *pc; 1667 struct pool_list *pl; 1668 int s; 1669 1670 pc = pool_cache_enter(pp, &s); 1671 1672 if (pc->pc_actv != NULL) { 1673 pl = pc->pc_actv; 1674 } else if (pc->pc_prev != NULL) { 1675 pl = pc->pc_prev; 1676 pc->pc_prev = NULL; 1677 } else if ((pl = pool_list_alloc(pp, pc)) == NULL) { 1678 pc->pc_fails++; 1679 goto done; 1680 } 1681 1682 pc->pc_actv = pl->pl_next; 1683 pc->pc_nactv = pl->pl_nitems - 1; 1684 pc->pc_gets++; 1685 pc->pc_nout++; 1686 done: 1687 pool_cache_leave(pp, pc, s); 1688 1689 return (pl); 1690 } 1691 1692 void 1693 pool_cache_put(struct pool *pp, void *v) 1694 { 1695 struct pool_cache *pc; 1696 struct pool_list *pl = v; 1697 unsigned long cache_items = pp->pr_cache_items; 1698 unsigned long nitems; 1699 int s; 1700 1701 pc = pool_cache_enter(pp, &s); 1702 1703 nitems = pc->pc_nactv; 1704 if (nitems >= cache_items) { 1705 if (pc->pc_prev != NULL) 1706 pool_list_free(pp, pc, pc->pc_prev); 1707 1708 pc->pc_prev = pc->pc_actv; 1709 1710 pc->pc_actv = NULL; 1711 pc->pc_nactv = 0; 1712 nitems = 0; 1713 } 1714 1715 pl->pl_next = pc->pc_actv; 1716 pl->pl_nitems = ++nitems; 1717 1718 pc->pc_actv = pl; 1719 pc->pc_nactv = nitems; 1720 1721 pc->pc_puts++; 1722 pc->pc_nout--; 1723 1724 pool_cache_leave(pp, pc, s); 1725 } 1726 1727 struct pool_list * 1728 pool_list_put(struct pool *pp, struct pool_list *pl) 1729 { 1730 struct pool_list *rpl, *npl; 1731 1732 if (pl == NULL) 1733 return (NULL); 1734 1735 rpl = (struct pool_list *)pl->pl_next; 1736 1737 do { 1738 npl = pl->pl_next; 1739 pool_put(pp, pl); 1740 pl = npl; 1741 } while (pl != NULL); 1742 1743 return (rpl); 1744 } 1745 1746 void 1747 pool_cache_destroy(struct pool *pp) 1748 { 1749 struct pool_cache *pc; 1750 struct pool_list *pl; 1751 struct cpumem_iter i; 1752 struct cpumem *cm; 1753 1754 cm = pp->pr_cache; 1755 pp->pr_cache = NULL; /* make pool_put avoid the cache */ 1756 1757 CPUMEM_FOREACH(pc, &i, cm) { 1758 pool_list_put(pp, pc->pc_actv); 1759 pool_list_put(pp, pc->pc_prev); 1760 } 1761 1762 cpumem_put(&pool_caches, cm); 1763 1764 pl = pp->pr_cache_list; 1765 while (pl != NULL) 1766 pl = pool_list_put(pp, pl); 1767 } 1768 1769 void 1770 pool_cache_info(struct pool *pp, struct kinfo_pool *pi) 1771 { 1772 struct pool_cache *pc; 1773 struct cpumem_iter i; 1774 1775 if (pp->pr_cache == NULL) 1776 return; 1777 1778 /* loop through the caches twice to collect stats */ 1779 1780 /* once without the mtx so we can yield while reading nget/nput */ 1781 CPUMEM_FOREACH(pc, &i, pp->pr_cache) { 1782 uint64_t gen, nget, nput; 1783 1784 do { 1785 while ((gen = pc->pc_gen) & 1) 1786 yield(); 1787 1788 nget = pc->pc_gets; 1789 nput = pc->pc_puts; 1790 } while (gen != pc->pc_gen); 1791 1792 pi->pr_nget += nget; 1793 pi->pr_nput += nput; 1794 } 1795 1796 /* and once with the mtx so we can get consistent nout values */ 1797 mtx_enter(&pp->pr_cache_mtx); 1798 CPUMEM_FOREACH(pc, &i, pp->pr_cache) 1799 pi->pr_nout += pc->pc_nout; 1800 1801 pi->pr_nout += pp->pr_cache_nout; 1802 mtx_leave(&pp->pr_cache_mtx); 1803 } 1804 #else /* MULTIPROCESSOR */ 1805 void 1806 pool_cache_init(struct pool *pp) 1807 { 1808 /* nop */ 1809 } 1810 1811 void 1812 pool_cache_info(struct pool *pp, struct kinfo_pool *pi) 1813 { 1814 /* nop */ 1815 } 1816 #endif /* MULTIPROCESSOR */ 1817