1 /* $OpenBSD: subr_pool.c,v 1.198 2016/09/15 02:00:16 dlg Exp $ */ 2 /* $NetBSD: subr_pool.c,v 1.61 2001/09/26 07:14:56 chs Exp $ */ 3 4 /*- 5 * Copyright (c) 1997, 1999, 2000 The NetBSD Foundation, Inc. 6 * All rights reserved. 7 * 8 * This code is derived from software contributed to The NetBSD Foundation 9 * by Paul Kranenburg; by Jason R. Thorpe of the Numerical Aerospace 10 * Simulation Facility, NASA Ames Research Center. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 23 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 24 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 25 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31 * POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include <sys/param.h> 35 #include <sys/systm.h> 36 #include <sys/errno.h> 37 #include <sys/kernel.h> 38 #include <sys/malloc.h> 39 #include <sys/pool.h> 40 #include <sys/syslog.h> 41 #include <sys/rwlock.h> 42 #include <sys/sysctl.h> 43 #include <sys/task.h> 44 #include <sys/timeout.h> 45 46 #include <uvm/uvm_extern.h> 47 48 /* 49 * Pool resource management utility. 50 * 51 * Memory is allocated in pages which are split into pieces according to 52 * the pool item size. Each page is kept on one of three lists in the 53 * pool structure: `pr_emptypages', `pr_fullpages' and `pr_partpages', 54 * for empty, full and partially-full pages respectively. The individual 55 * pool items are on a linked list headed by `ph_itemlist' in each page 56 * header. The memory for building the page list is either taken from 57 * the allocated pages themselves (for small pool items) or taken from 58 * an internal pool of page headers (`phpool'). 59 */ 60 61 /* List of all pools */ 62 SIMPLEQ_HEAD(,pool) pool_head = SIMPLEQ_HEAD_INITIALIZER(pool_head); 63 64 /* 65 * Every pool gets a unique serial number assigned to it. If this counter 66 * wraps, we're screwed, but we shouldn't create so many pools anyway. 67 */ 68 unsigned int pool_serial; 69 unsigned int pool_count; 70 71 /* Lock the previous variables making up the global pool state */ 72 struct rwlock pool_lock = RWLOCK_INITIALIZER("pools"); 73 74 /* Private pool for page header structures */ 75 struct pool phpool; 76 77 struct pool_item_header { 78 /* Page headers */ 79 TAILQ_ENTRY(pool_item_header) 80 ph_pagelist; /* pool page list */ 81 XSIMPLEQ_HEAD(,pool_item) ph_itemlist; /* chunk list for this page */ 82 RBT_ENTRY(pool_item_header) 83 ph_node; /* Off-page page headers */ 84 int ph_nmissing; /* # of chunks in use */ 85 caddr_t ph_page; /* this page's address */ 86 caddr_t ph_colored; /* page's colored address */ 87 u_long ph_magic; 88 int ph_tick; 89 }; 90 #define POOL_MAGICBIT (1 << 3) /* keep away from perturbed low bits */ 91 #define POOL_PHPOISON(ph) ISSET((ph)->ph_magic, POOL_MAGICBIT) 92 93 struct pool_item { 94 u_long pi_magic; 95 XSIMPLEQ_ENTRY(pool_item) pi_list; 96 }; 97 #define POOL_IMAGIC(ph, pi) ((u_long)(pi) ^ (ph)->ph_magic) 98 99 #ifdef POOL_DEBUG 100 int pool_debug = 1; 101 #else 102 int pool_debug = 0; 103 #endif 104 105 #define POOL_INPGHDR(pp) ((pp)->pr_phoffset != 0) 106 107 struct pool_item_header * 108 pool_p_alloc(struct pool *, int, int *); 109 void pool_p_insert(struct pool *, struct pool_item_header *); 110 void pool_p_remove(struct pool *, struct pool_item_header *); 111 void pool_p_free(struct pool *, struct pool_item_header *); 112 113 void pool_update_curpage(struct pool *); 114 void *pool_do_get(struct pool *, int, int *); 115 int pool_chk_page(struct pool *, struct pool_item_header *, int); 116 int pool_chk(struct pool *); 117 void pool_get_done(void *, void *); 118 void pool_runqueue(struct pool *, int); 119 120 void *pool_allocator_alloc(struct pool *, int, int *); 121 void pool_allocator_free(struct pool *, void *); 122 123 /* 124 * The default pool allocator. 125 */ 126 void *pool_page_alloc(struct pool *, int, int *); 127 void pool_page_free(struct pool *, void *); 128 129 /* 130 * safe for interrupts; this is the default allocator 131 */ 132 struct pool_allocator pool_allocator_single = { 133 pool_page_alloc, 134 pool_page_free 135 }; 136 137 void *pool_multi_alloc(struct pool *, int, int *); 138 void pool_multi_free(struct pool *, void *); 139 140 struct pool_allocator pool_allocator_multi = { 141 pool_multi_alloc, 142 pool_multi_free 143 }; 144 145 void *pool_multi_alloc_ni(struct pool *, int, int *); 146 void pool_multi_free_ni(struct pool *, void *); 147 148 struct pool_allocator pool_allocator_multi_ni = { 149 pool_multi_alloc_ni, 150 pool_multi_free_ni 151 }; 152 153 #ifdef DDB 154 void pool_print_pagelist(struct pool_pagelist *, int (*)(const char *, ...) 155 __attribute__((__format__(__kprintf__,1,2)))); 156 void pool_print1(struct pool *, const char *, int (*)(const char *, ...) 157 __attribute__((__format__(__kprintf__,1,2)))); 158 #endif 159 160 /* stale page garbage collectors */ 161 void pool_gc_sched(void *); 162 struct timeout pool_gc_tick = TIMEOUT_INITIALIZER(pool_gc_sched, NULL); 163 void pool_gc_pages(void *); 164 struct task pool_gc_task = TASK_INITIALIZER(pool_gc_pages, NULL); 165 int pool_wait_free = 1; 166 int pool_wait_gc = 8; 167 168 RBT_PROTOTYPE(phtree, pool_item_header, ph_node, phtree_compare); 169 170 static inline int 171 phtree_compare(const struct pool_item_header *a, 172 const struct pool_item_header *b) 173 { 174 vaddr_t va = (vaddr_t)a->ph_page; 175 vaddr_t vb = (vaddr_t)b->ph_page; 176 177 /* the compares in this order are important for the NFIND to work */ 178 if (vb < va) 179 return (-1); 180 if (vb > va) 181 return (1); 182 183 return (0); 184 } 185 186 RBT_GENERATE(phtree, pool_item_header, ph_node, phtree_compare); 187 188 /* 189 * Return the pool page header based on page address. 190 */ 191 static inline struct pool_item_header * 192 pr_find_pagehead(struct pool *pp, void *v) 193 { 194 struct pool_item_header *ph, key; 195 196 if (POOL_INPGHDR(pp)) { 197 caddr_t page; 198 199 page = (caddr_t)((vaddr_t)v & pp->pr_pgmask); 200 201 return ((struct pool_item_header *)(page + pp->pr_phoffset)); 202 } 203 204 key.ph_page = v; 205 ph = RBT_NFIND(phtree, &pp->pr_phtree, &key); 206 if (ph == NULL) 207 panic("%s: %s: page header missing", __func__, pp->pr_wchan); 208 209 KASSERT(ph->ph_page <= (caddr_t)v); 210 if (ph->ph_page + pp->pr_pgsize <= (caddr_t)v) 211 panic("%s: %s: incorrect page", __func__, pp->pr_wchan); 212 213 return (ph); 214 } 215 216 /* 217 * Initialize the given pool resource structure. 218 * 219 * We export this routine to allow other kernel parts to declare 220 * static pools that must be initialized before malloc() is available. 221 */ 222 void 223 pool_init(struct pool *pp, size_t size, u_int align, int ipl, int flags, 224 const char *wchan, struct pool_allocator *palloc) 225 { 226 int off = 0, space; 227 unsigned int pgsize = PAGE_SIZE, items; 228 #ifdef DIAGNOSTIC 229 struct pool *iter; 230 #endif 231 232 if (align == 0) 233 align = ALIGN(1); 234 235 if (size < sizeof(struct pool_item)) 236 size = sizeof(struct pool_item); 237 238 size = roundup(size, align); 239 240 if (palloc == NULL) { 241 while (size * 8 > pgsize) 242 pgsize <<= 1; 243 244 if (pgsize > PAGE_SIZE) { 245 palloc = ISSET(flags, PR_WAITOK) ? 246 &pool_allocator_multi_ni : &pool_allocator_multi; 247 } else 248 palloc = &pool_allocator_single; 249 } else 250 pgsize = palloc->pa_pagesz ? palloc->pa_pagesz : PAGE_SIZE; 251 252 items = pgsize / size; 253 254 /* 255 * Decide whether to put the page header off page to avoid 256 * wasting too large a part of the page. Off-page page headers 257 * go into an RB tree, so we can match a returned item with 258 * its header based on the page address. 259 */ 260 if (pgsize - (size * items) > sizeof(struct pool_item_header)) { 261 off = pgsize - sizeof(struct pool_item_header); 262 } else if (sizeof(struct pool_item_header) * 2 >= size) { 263 off = pgsize - sizeof(struct pool_item_header); 264 items = off / size; 265 } 266 267 KASSERT(items > 0); 268 269 /* 270 * Initialize the pool structure. 271 */ 272 memset(pp, 0, sizeof(*pp)); 273 TAILQ_INIT(&pp->pr_emptypages); 274 TAILQ_INIT(&pp->pr_fullpages); 275 TAILQ_INIT(&pp->pr_partpages); 276 pp->pr_curpage = NULL; 277 pp->pr_npages = 0; 278 pp->pr_minitems = 0; 279 pp->pr_minpages = 0; 280 pp->pr_maxpages = 8; 281 pp->pr_size = size; 282 pp->pr_pgsize = pgsize; 283 pp->pr_pgmask = ~0UL ^ (pgsize - 1); 284 pp->pr_phoffset = off; 285 pp->pr_itemsperpage = items; 286 pp->pr_wchan = wchan; 287 pp->pr_alloc = palloc; 288 pp->pr_nitems = 0; 289 pp->pr_nout = 0; 290 pp->pr_hardlimit = UINT_MAX; 291 pp->pr_hardlimit_warning = NULL; 292 pp->pr_hardlimit_ratecap.tv_sec = 0; 293 pp->pr_hardlimit_ratecap.tv_usec = 0; 294 pp->pr_hardlimit_warning_last.tv_sec = 0; 295 pp->pr_hardlimit_warning_last.tv_usec = 0; 296 RBT_INIT(phtree, &pp->pr_phtree); 297 298 /* 299 * Use the space between the chunks and the page header 300 * for cache coloring. 301 */ 302 space = POOL_INPGHDR(pp) ? pp->pr_phoffset : pp->pr_pgsize; 303 space -= pp->pr_itemsperpage * pp->pr_size; 304 pp->pr_align = align; 305 pp->pr_maxcolors = (space / align) + 1; 306 307 pp->pr_nget = 0; 308 pp->pr_nfail = 0; 309 pp->pr_nput = 0; 310 pp->pr_npagealloc = 0; 311 pp->pr_npagefree = 0; 312 pp->pr_hiwat = 0; 313 pp->pr_nidle = 0; 314 315 pp->pr_ipl = ipl; 316 mtx_init(&pp->pr_mtx, pp->pr_ipl); 317 mtx_init(&pp->pr_requests_mtx, pp->pr_ipl); 318 TAILQ_INIT(&pp->pr_requests); 319 320 if (phpool.pr_size == 0) { 321 pool_init(&phpool, sizeof(struct pool_item_header), 0, 322 IPL_HIGH, 0, "phpool", NULL); 323 324 /* make sure phpool wont "recurse" */ 325 KASSERT(POOL_INPGHDR(&phpool)); 326 } 327 328 /* pglistalloc/constraint parameters */ 329 pp->pr_crange = &kp_dirty; 330 331 /* Insert this into the list of all pools. */ 332 rw_enter_write(&pool_lock); 333 #ifdef DIAGNOSTIC 334 SIMPLEQ_FOREACH(iter, &pool_head, pr_poollist) { 335 if (iter == pp) 336 panic("%s: pool %s already on list", __func__, wchan); 337 } 338 #endif 339 340 pp->pr_serial = ++pool_serial; 341 if (pool_serial == 0) 342 panic("%s: too much uptime", __func__); 343 344 SIMPLEQ_INSERT_HEAD(&pool_head, pp, pr_poollist); 345 pool_count++; 346 rw_exit_write(&pool_lock); 347 } 348 349 /* 350 * Decommission a pool resource. 351 */ 352 void 353 pool_destroy(struct pool *pp) 354 { 355 struct pool_item_header *ph; 356 struct pool *prev, *iter; 357 358 #ifdef DIAGNOSTIC 359 if (pp->pr_nout != 0) 360 panic("%s: pool busy: still out: %u", __func__, pp->pr_nout); 361 #endif 362 363 /* Remove from global pool list */ 364 rw_enter_write(&pool_lock); 365 pool_count--; 366 if (pp == SIMPLEQ_FIRST(&pool_head)) 367 SIMPLEQ_REMOVE_HEAD(&pool_head, pr_poollist); 368 else { 369 prev = SIMPLEQ_FIRST(&pool_head); 370 SIMPLEQ_FOREACH(iter, &pool_head, pr_poollist) { 371 if (iter == pp) { 372 SIMPLEQ_REMOVE_AFTER(&pool_head, prev, 373 pr_poollist); 374 break; 375 } 376 prev = iter; 377 } 378 } 379 rw_exit_write(&pool_lock); 380 381 /* Remove all pages */ 382 while ((ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL) { 383 mtx_enter(&pp->pr_mtx); 384 pool_p_remove(pp, ph); 385 mtx_leave(&pp->pr_mtx); 386 pool_p_free(pp, ph); 387 } 388 KASSERT(TAILQ_EMPTY(&pp->pr_fullpages)); 389 KASSERT(TAILQ_EMPTY(&pp->pr_partpages)); 390 } 391 392 void 393 pool_request_init(struct pool_request *pr, 394 void (*handler)(void *, void *), void *cookie) 395 { 396 pr->pr_handler = handler; 397 pr->pr_cookie = cookie; 398 pr->pr_item = NULL; 399 } 400 401 void 402 pool_request(struct pool *pp, struct pool_request *pr) 403 { 404 mtx_enter(&pp->pr_requests_mtx); 405 TAILQ_INSERT_TAIL(&pp->pr_requests, pr, pr_entry); 406 pool_runqueue(pp, PR_NOWAIT); 407 mtx_leave(&pp->pr_requests_mtx); 408 } 409 410 struct pool_get_memory { 411 struct mutex mtx; 412 void * volatile v; 413 }; 414 415 /* 416 * Grab an item from the pool. 417 */ 418 void * 419 pool_get(struct pool *pp, int flags) 420 { 421 void *v = NULL; 422 int slowdown = 0; 423 424 KASSERT(flags & (PR_WAITOK | PR_NOWAIT)); 425 426 mtx_enter(&pp->pr_mtx); 427 if (pp->pr_nout >= pp->pr_hardlimit) { 428 if (ISSET(flags, PR_NOWAIT|PR_LIMITFAIL)) 429 goto fail; 430 } else if ((v = pool_do_get(pp, flags, &slowdown)) == NULL) { 431 if (ISSET(flags, PR_NOWAIT)) 432 goto fail; 433 } 434 mtx_leave(&pp->pr_mtx); 435 436 if (slowdown && ISSET(flags, PR_WAITOK)) 437 yield(); 438 439 if (v == NULL) { 440 struct pool_get_memory mem = { 441 MUTEX_INITIALIZER(pp->pr_ipl), 442 NULL }; 443 struct pool_request pr; 444 445 pool_request_init(&pr, pool_get_done, &mem); 446 pool_request(pp, &pr); 447 448 mtx_enter(&mem.mtx); 449 while (mem.v == NULL) 450 msleep(&mem, &mem.mtx, PSWP, pp->pr_wchan, 0); 451 mtx_leave(&mem.mtx); 452 453 v = mem.v; 454 } 455 456 if (ISSET(flags, PR_ZERO)) 457 memset(v, 0, pp->pr_size); 458 459 return (v); 460 461 fail: 462 pp->pr_nfail++; 463 mtx_leave(&pp->pr_mtx); 464 return (NULL); 465 } 466 467 void 468 pool_get_done(void *xmem, void *v) 469 { 470 struct pool_get_memory *mem = xmem; 471 472 mtx_enter(&mem->mtx); 473 mem->v = v; 474 mtx_leave(&mem->mtx); 475 476 wakeup_one(mem); 477 } 478 479 void 480 pool_runqueue(struct pool *pp, int flags) 481 { 482 struct pool_requests prl = TAILQ_HEAD_INITIALIZER(prl); 483 struct pool_request *pr; 484 485 MUTEX_ASSERT_UNLOCKED(&pp->pr_mtx); 486 MUTEX_ASSERT_LOCKED(&pp->pr_requests_mtx); 487 488 if (pp->pr_requesting++) 489 return; 490 491 do { 492 pp->pr_requesting = 1; 493 494 /* no TAILQ_JOIN? :( */ 495 while ((pr = TAILQ_FIRST(&pp->pr_requests)) != NULL) { 496 TAILQ_REMOVE(&pp->pr_requests, pr, pr_entry); 497 TAILQ_INSERT_TAIL(&prl, pr, pr_entry); 498 } 499 if (TAILQ_EMPTY(&prl)) 500 continue; 501 502 mtx_leave(&pp->pr_requests_mtx); 503 504 mtx_enter(&pp->pr_mtx); 505 pr = TAILQ_FIRST(&prl); 506 while (pr != NULL) { 507 int slowdown = 0; 508 509 if (pp->pr_nout >= pp->pr_hardlimit) 510 break; 511 512 pr->pr_item = pool_do_get(pp, flags, &slowdown); 513 if (pr->pr_item == NULL) /* || slowdown ? */ 514 break; 515 516 pr = TAILQ_NEXT(pr, pr_entry); 517 } 518 mtx_leave(&pp->pr_mtx); 519 520 while ((pr = TAILQ_FIRST(&prl)) != NULL && 521 pr->pr_item != NULL) { 522 TAILQ_REMOVE(&prl, pr, pr_entry); 523 (*pr->pr_handler)(pr->pr_cookie, pr->pr_item); 524 } 525 526 mtx_enter(&pp->pr_requests_mtx); 527 } while (--pp->pr_requesting); 528 529 /* no TAILQ_JOIN :( */ 530 while ((pr = TAILQ_FIRST(&prl)) != NULL) { 531 TAILQ_REMOVE(&prl, pr, pr_entry); 532 TAILQ_INSERT_TAIL(&pp->pr_requests, pr, pr_entry); 533 } 534 } 535 536 void * 537 pool_do_get(struct pool *pp, int flags, int *slowdown) 538 { 539 struct pool_item *pi; 540 struct pool_item_header *ph; 541 542 MUTEX_ASSERT_LOCKED(&pp->pr_mtx); 543 544 splassert(pp->pr_ipl); 545 546 /* 547 * Account for this item now to avoid races if we need to give up 548 * pr_mtx to allocate a page. 549 */ 550 pp->pr_nout++; 551 552 if (pp->pr_curpage == NULL) { 553 mtx_leave(&pp->pr_mtx); 554 ph = pool_p_alloc(pp, flags, slowdown); 555 mtx_enter(&pp->pr_mtx); 556 557 if (ph == NULL) { 558 pp->pr_nout--; 559 return (NULL); 560 } 561 562 pool_p_insert(pp, ph); 563 } 564 565 ph = pp->pr_curpage; 566 pi = XSIMPLEQ_FIRST(&ph->ph_itemlist); 567 if (__predict_false(pi == NULL)) 568 panic("%s: %s: page empty", __func__, pp->pr_wchan); 569 570 if (__predict_false(pi->pi_magic != POOL_IMAGIC(ph, pi))) { 571 panic("%s: %s free list modified: " 572 "page %p; item addr %p; offset 0x%x=0x%lx != 0x%lx", 573 __func__, pp->pr_wchan, ph->ph_page, pi, 574 0, pi->pi_magic, POOL_IMAGIC(ph, pi)); 575 } 576 577 XSIMPLEQ_REMOVE_HEAD(&ph->ph_itemlist, pi_list); 578 579 #ifdef DIAGNOSTIC 580 if (pool_debug && POOL_PHPOISON(ph)) { 581 size_t pidx; 582 uint32_t pval; 583 if (poison_check(pi + 1, pp->pr_size - sizeof(*pi), 584 &pidx, &pval)) { 585 int *ip = (int *)(pi + 1); 586 panic("%s: %s free list modified: " 587 "page %p; item addr %p; offset 0x%zx=0x%x", 588 __func__, pp->pr_wchan, ph->ph_page, pi, 589 pidx * sizeof(int), ip[pidx]); 590 } 591 } 592 #endif /* DIAGNOSTIC */ 593 594 if (ph->ph_nmissing++ == 0) { 595 /* 596 * This page was previously empty. Move it to the list of 597 * partially-full pages. This page is already curpage. 598 */ 599 TAILQ_REMOVE(&pp->pr_emptypages, ph, ph_pagelist); 600 TAILQ_INSERT_TAIL(&pp->pr_partpages, ph, ph_pagelist); 601 602 pp->pr_nidle--; 603 } 604 605 if (ph->ph_nmissing == pp->pr_itemsperpage) { 606 /* 607 * This page is now full. Move it to the full list 608 * and select a new current page. 609 */ 610 TAILQ_REMOVE(&pp->pr_partpages, ph, ph_pagelist); 611 TAILQ_INSERT_TAIL(&pp->pr_fullpages, ph, ph_pagelist); 612 pool_update_curpage(pp); 613 } 614 615 pp->pr_nget++; 616 617 return (pi); 618 } 619 620 /* 621 * Return resource to the pool. 622 */ 623 void 624 pool_put(struct pool *pp, void *v) 625 { 626 struct pool_item *pi = v; 627 struct pool_item_header *ph, *freeph = NULL; 628 629 #ifdef DIAGNOSTIC 630 if (v == NULL) 631 panic("%s: NULL item", __func__); 632 #endif 633 634 mtx_enter(&pp->pr_mtx); 635 636 splassert(pp->pr_ipl); 637 638 ph = pr_find_pagehead(pp, v); 639 640 #ifdef DIAGNOSTIC 641 if (pool_debug) { 642 struct pool_item *qi; 643 XSIMPLEQ_FOREACH(qi, &ph->ph_itemlist, pi_list) { 644 if (pi == qi) { 645 panic("%s: %s: double pool_put: %p", __func__, 646 pp->pr_wchan, pi); 647 } 648 } 649 } 650 #endif /* DIAGNOSTIC */ 651 652 pi->pi_magic = POOL_IMAGIC(ph, pi); 653 XSIMPLEQ_INSERT_HEAD(&ph->ph_itemlist, pi, pi_list); 654 #ifdef DIAGNOSTIC 655 if (POOL_PHPOISON(ph)) 656 poison_mem(pi + 1, pp->pr_size - sizeof(*pi)); 657 #endif /* DIAGNOSTIC */ 658 659 if (ph->ph_nmissing-- == pp->pr_itemsperpage) { 660 /* 661 * The page was previously completely full, move it to the 662 * partially-full list. 663 */ 664 TAILQ_REMOVE(&pp->pr_fullpages, ph, ph_pagelist); 665 TAILQ_INSERT_TAIL(&pp->pr_partpages, ph, ph_pagelist); 666 } 667 668 if (ph->ph_nmissing == 0) { 669 /* 670 * The page is now empty, so move it to the empty page list. 671 */ 672 pp->pr_nidle++; 673 674 ph->ph_tick = ticks; 675 TAILQ_REMOVE(&pp->pr_partpages, ph, ph_pagelist); 676 TAILQ_INSERT_TAIL(&pp->pr_emptypages, ph, ph_pagelist); 677 pool_update_curpage(pp); 678 } 679 680 pp->pr_nout--; 681 pp->pr_nput++; 682 683 /* is it time to free a page? */ 684 if (pp->pr_nidle > pp->pr_maxpages && 685 (ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL && 686 (ticks - ph->ph_tick) > (hz * pool_wait_free)) { 687 freeph = ph; 688 pool_p_remove(pp, freeph); 689 } 690 mtx_leave(&pp->pr_mtx); 691 692 if (freeph != NULL) 693 pool_p_free(pp, freeph); 694 695 if (!TAILQ_EMPTY(&pp->pr_requests)) { 696 mtx_enter(&pp->pr_requests_mtx); 697 pool_runqueue(pp, PR_NOWAIT); 698 mtx_leave(&pp->pr_requests_mtx); 699 } 700 } 701 702 /* 703 * Add N items to the pool. 704 */ 705 int 706 pool_prime(struct pool *pp, int n) 707 { 708 struct pool_pagelist pl = TAILQ_HEAD_INITIALIZER(pl); 709 struct pool_item_header *ph; 710 int newpages; 711 712 newpages = roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 713 714 while (newpages-- > 0) { 715 int slowdown = 0; 716 717 ph = pool_p_alloc(pp, PR_NOWAIT, &slowdown); 718 if (ph == NULL) /* or slowdown? */ 719 break; 720 721 TAILQ_INSERT_TAIL(&pl, ph, ph_pagelist); 722 } 723 724 mtx_enter(&pp->pr_mtx); 725 while ((ph = TAILQ_FIRST(&pl)) != NULL) { 726 TAILQ_REMOVE(&pl, ph, ph_pagelist); 727 pool_p_insert(pp, ph); 728 } 729 mtx_leave(&pp->pr_mtx); 730 731 return (0); 732 } 733 734 struct pool_item_header * 735 pool_p_alloc(struct pool *pp, int flags, int *slowdown) 736 { 737 struct pool_item_header *ph; 738 struct pool_item *pi; 739 caddr_t addr; 740 int n; 741 742 MUTEX_ASSERT_UNLOCKED(&pp->pr_mtx); 743 KASSERT(pp->pr_size >= sizeof(*pi)); 744 745 addr = pool_allocator_alloc(pp, flags, slowdown); 746 if (addr == NULL) 747 return (NULL); 748 749 if (POOL_INPGHDR(pp)) 750 ph = (struct pool_item_header *)(addr + pp->pr_phoffset); 751 else { 752 ph = pool_get(&phpool, flags); 753 if (ph == NULL) { 754 pool_allocator_free(pp, addr); 755 return (NULL); 756 } 757 } 758 759 XSIMPLEQ_INIT(&ph->ph_itemlist); 760 ph->ph_page = addr; 761 addr += pp->pr_align * (pp->pr_npagealloc % pp->pr_maxcolors); 762 ph->ph_colored = addr; 763 ph->ph_nmissing = 0; 764 arc4random_buf(&ph->ph_magic, sizeof(ph->ph_magic)); 765 #ifdef DIAGNOSTIC 766 /* use a bit in ph_magic to record if we poison page items */ 767 if (pool_debug) 768 SET(ph->ph_magic, POOL_MAGICBIT); 769 else 770 CLR(ph->ph_magic, POOL_MAGICBIT); 771 #endif /* DIAGNOSTIC */ 772 773 n = pp->pr_itemsperpage; 774 while (n--) { 775 pi = (struct pool_item *)addr; 776 pi->pi_magic = POOL_IMAGIC(ph, pi); 777 XSIMPLEQ_INSERT_TAIL(&ph->ph_itemlist, pi, pi_list); 778 779 #ifdef DIAGNOSTIC 780 if (POOL_PHPOISON(ph)) 781 poison_mem(pi + 1, pp->pr_size - sizeof(*pi)); 782 #endif /* DIAGNOSTIC */ 783 784 addr += pp->pr_size; 785 } 786 787 return (ph); 788 } 789 790 void 791 pool_p_free(struct pool *pp, struct pool_item_header *ph) 792 { 793 struct pool_item *pi; 794 795 MUTEX_ASSERT_UNLOCKED(&pp->pr_mtx); 796 KASSERT(ph->ph_nmissing == 0); 797 798 XSIMPLEQ_FOREACH(pi, &ph->ph_itemlist, pi_list) { 799 if (__predict_false(pi->pi_magic != POOL_IMAGIC(ph, pi))) { 800 panic("%s: %s free list modified: " 801 "page %p; item addr %p; offset 0x%x=0x%lx", 802 __func__, pp->pr_wchan, ph->ph_page, pi, 803 0, pi->pi_magic); 804 } 805 806 #ifdef DIAGNOSTIC 807 if (POOL_PHPOISON(ph)) { 808 size_t pidx; 809 uint32_t pval; 810 if (poison_check(pi + 1, pp->pr_size - sizeof(*pi), 811 &pidx, &pval)) { 812 int *ip = (int *)(pi + 1); 813 panic("%s: %s free list modified: " 814 "page %p; item addr %p; offset 0x%zx=0x%x", 815 __func__, pp->pr_wchan, ph->ph_page, pi, 816 pidx * sizeof(int), ip[pidx]); 817 } 818 } 819 #endif 820 } 821 822 pool_allocator_free(pp, ph->ph_page); 823 824 if (!POOL_INPGHDR(pp)) 825 pool_put(&phpool, ph); 826 } 827 828 void 829 pool_p_insert(struct pool *pp, struct pool_item_header *ph) 830 { 831 MUTEX_ASSERT_LOCKED(&pp->pr_mtx); 832 833 /* If the pool was depleted, point at the new page */ 834 if (pp->pr_curpage == NULL) 835 pp->pr_curpage = ph; 836 837 TAILQ_INSERT_TAIL(&pp->pr_emptypages, ph, ph_pagelist); 838 if (!POOL_INPGHDR(pp)) 839 RBT_INSERT(phtree, &pp->pr_phtree, ph); 840 841 pp->pr_nitems += pp->pr_itemsperpage; 842 pp->pr_nidle++; 843 844 pp->pr_npagealloc++; 845 if (++pp->pr_npages > pp->pr_hiwat) 846 pp->pr_hiwat = pp->pr_npages; 847 } 848 849 void 850 pool_p_remove(struct pool *pp, struct pool_item_header *ph) 851 { 852 MUTEX_ASSERT_LOCKED(&pp->pr_mtx); 853 854 pp->pr_npagefree++; 855 pp->pr_npages--; 856 pp->pr_nidle--; 857 pp->pr_nitems -= pp->pr_itemsperpage; 858 859 if (!POOL_INPGHDR(pp)) 860 RBT_REMOVE(phtree, &pp->pr_phtree, ph); 861 TAILQ_REMOVE(&pp->pr_emptypages, ph, ph_pagelist); 862 863 pool_update_curpage(pp); 864 } 865 866 void 867 pool_update_curpage(struct pool *pp) 868 { 869 pp->pr_curpage = TAILQ_LAST(&pp->pr_partpages, pool_pagelist); 870 if (pp->pr_curpage == NULL) { 871 pp->pr_curpage = TAILQ_LAST(&pp->pr_emptypages, pool_pagelist); 872 } 873 } 874 875 void 876 pool_setlowat(struct pool *pp, int n) 877 { 878 int prime = 0; 879 880 mtx_enter(&pp->pr_mtx); 881 pp->pr_minitems = n; 882 pp->pr_minpages = (n == 0) 883 ? 0 884 : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 885 886 if (pp->pr_nitems < n) 887 prime = n - pp->pr_nitems; 888 mtx_leave(&pp->pr_mtx); 889 890 if (prime > 0) 891 pool_prime(pp, prime); 892 } 893 894 void 895 pool_sethiwat(struct pool *pp, int n) 896 { 897 pp->pr_maxpages = (n == 0) 898 ? 0 899 : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 900 } 901 902 int 903 pool_sethardlimit(struct pool *pp, u_int n, const char *warnmsg, int ratecap) 904 { 905 int error = 0; 906 907 if (n < pp->pr_nout) { 908 error = EINVAL; 909 goto done; 910 } 911 912 pp->pr_hardlimit = n; 913 pp->pr_hardlimit_warning = warnmsg; 914 pp->pr_hardlimit_ratecap.tv_sec = ratecap; 915 pp->pr_hardlimit_warning_last.tv_sec = 0; 916 pp->pr_hardlimit_warning_last.tv_usec = 0; 917 918 done: 919 return (error); 920 } 921 922 void 923 pool_set_constraints(struct pool *pp, const struct kmem_pa_mode *mode) 924 { 925 pp->pr_crange = mode; 926 } 927 928 /* 929 * Release all complete pages that have not been used recently. 930 * 931 * Returns non-zero if any pages have been reclaimed. 932 */ 933 int 934 pool_reclaim(struct pool *pp) 935 { 936 struct pool_item_header *ph, *phnext; 937 struct pool_pagelist pl = TAILQ_HEAD_INITIALIZER(pl); 938 939 mtx_enter(&pp->pr_mtx); 940 for (ph = TAILQ_FIRST(&pp->pr_emptypages); ph != NULL; ph = phnext) { 941 phnext = TAILQ_NEXT(ph, ph_pagelist); 942 943 /* Check our minimum page claim */ 944 if (pp->pr_npages <= pp->pr_minpages) 945 break; 946 947 /* 948 * If freeing this page would put us below 949 * the low water mark, stop now. 950 */ 951 if ((pp->pr_nitems - pp->pr_itemsperpage) < 952 pp->pr_minitems) 953 break; 954 955 pool_p_remove(pp, ph); 956 TAILQ_INSERT_TAIL(&pl, ph, ph_pagelist); 957 } 958 mtx_leave(&pp->pr_mtx); 959 960 if (TAILQ_EMPTY(&pl)) 961 return (0); 962 963 while ((ph = TAILQ_FIRST(&pl)) != NULL) { 964 TAILQ_REMOVE(&pl, ph, ph_pagelist); 965 pool_p_free(pp, ph); 966 } 967 968 return (1); 969 } 970 971 /* 972 * Release all complete pages that have not been used recently 973 * from all pools. 974 */ 975 void 976 pool_reclaim_all(void) 977 { 978 struct pool *pp; 979 980 rw_enter_read(&pool_lock); 981 SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist) 982 pool_reclaim(pp); 983 rw_exit_read(&pool_lock); 984 } 985 986 #ifdef DDB 987 #include <machine/db_machdep.h> 988 #include <ddb/db_output.h> 989 990 /* 991 * Diagnostic helpers. 992 */ 993 void 994 pool_printit(struct pool *pp, const char *modif, 995 int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2)))) 996 { 997 pool_print1(pp, modif, pr); 998 } 999 1000 void 1001 pool_print_pagelist(struct pool_pagelist *pl, 1002 int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2)))) 1003 { 1004 struct pool_item_header *ph; 1005 struct pool_item *pi; 1006 1007 TAILQ_FOREACH(ph, pl, ph_pagelist) { 1008 (*pr)("\t\tpage %p, color %p, nmissing %d\n", 1009 ph->ph_page, ph->ph_colored, ph->ph_nmissing); 1010 XSIMPLEQ_FOREACH(pi, &ph->ph_itemlist, pi_list) { 1011 if (pi->pi_magic != POOL_IMAGIC(ph, pi)) { 1012 (*pr)("\t\t\titem %p, magic 0x%lx\n", 1013 pi, pi->pi_magic); 1014 } 1015 } 1016 } 1017 } 1018 1019 void 1020 pool_print1(struct pool *pp, const char *modif, 1021 int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2)))) 1022 { 1023 struct pool_item_header *ph; 1024 int print_pagelist = 0; 1025 char c; 1026 1027 while ((c = *modif++) != '\0') { 1028 if (c == 'p') 1029 print_pagelist = 1; 1030 modif++; 1031 } 1032 1033 (*pr)("POOL %s: size %u maxcolors %u\n", pp->pr_wchan, pp->pr_size, 1034 pp->pr_maxcolors); 1035 (*pr)("\talloc %p\n", pp->pr_alloc); 1036 (*pr)("\tminitems %u, minpages %u, maxpages %u, npages %u\n", 1037 pp->pr_minitems, pp->pr_minpages, pp->pr_maxpages, pp->pr_npages); 1038 (*pr)("\titemsperpage %u, nitems %u, nout %u, hardlimit %u\n", 1039 pp->pr_itemsperpage, pp->pr_nitems, pp->pr_nout, pp->pr_hardlimit); 1040 1041 (*pr)("\n\tnget %lu, nfail %lu, nput %lu\n", 1042 pp->pr_nget, pp->pr_nfail, pp->pr_nput); 1043 (*pr)("\tnpagealloc %lu, npagefree %lu, hiwat %u, nidle %lu\n", 1044 pp->pr_npagealloc, pp->pr_npagefree, pp->pr_hiwat, pp->pr_nidle); 1045 1046 if (print_pagelist == 0) 1047 return; 1048 1049 if ((ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL) 1050 (*pr)("\n\tempty page list:\n"); 1051 pool_print_pagelist(&pp->pr_emptypages, pr); 1052 if ((ph = TAILQ_FIRST(&pp->pr_fullpages)) != NULL) 1053 (*pr)("\n\tfull page list:\n"); 1054 pool_print_pagelist(&pp->pr_fullpages, pr); 1055 if ((ph = TAILQ_FIRST(&pp->pr_partpages)) != NULL) 1056 (*pr)("\n\tpartial-page list:\n"); 1057 pool_print_pagelist(&pp->pr_partpages, pr); 1058 1059 if (pp->pr_curpage == NULL) 1060 (*pr)("\tno current page\n"); 1061 else 1062 (*pr)("\tcurpage %p\n", pp->pr_curpage->ph_page); 1063 } 1064 1065 void 1066 db_show_all_pools(db_expr_t expr, int haddr, db_expr_t count, char *modif) 1067 { 1068 struct pool *pp; 1069 char maxp[16]; 1070 int ovflw; 1071 char mode; 1072 1073 mode = modif[0]; 1074 if (mode != '\0' && mode != 'a') { 1075 db_printf("usage: show all pools [/a]\n"); 1076 return; 1077 } 1078 1079 if (mode == '\0') 1080 db_printf("%-10s%4s%9s%5s%9s%6s%6s%6s%6s%6s%6s%5s\n", 1081 "Name", 1082 "Size", 1083 "Requests", 1084 "Fail", 1085 "Releases", 1086 "Pgreq", 1087 "Pgrel", 1088 "Npage", 1089 "Hiwat", 1090 "Minpg", 1091 "Maxpg", 1092 "Idle"); 1093 else 1094 db_printf("%-12s %18s %18s\n", 1095 "Name", "Address", "Allocator"); 1096 1097 SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist) { 1098 if (mode == 'a') { 1099 db_printf("%-12s %18p %18p\n", pp->pr_wchan, pp, 1100 pp->pr_alloc); 1101 continue; 1102 } 1103 1104 if (!pp->pr_nget) 1105 continue; 1106 1107 if (pp->pr_maxpages == UINT_MAX) 1108 snprintf(maxp, sizeof maxp, "inf"); 1109 else 1110 snprintf(maxp, sizeof maxp, "%u", pp->pr_maxpages); 1111 1112 #define PRWORD(ovflw, fmt, width, fixed, val) do { \ 1113 (ovflw) += db_printf((fmt), \ 1114 (width) - (fixed) - (ovflw) > 0 ? \ 1115 (width) - (fixed) - (ovflw) : 0, \ 1116 (val)) - (width); \ 1117 if ((ovflw) < 0) \ 1118 (ovflw) = 0; \ 1119 } while (/* CONSTCOND */0) 1120 1121 ovflw = 0; 1122 PRWORD(ovflw, "%-*s", 10, 0, pp->pr_wchan); 1123 PRWORD(ovflw, " %*u", 4, 1, pp->pr_size); 1124 PRWORD(ovflw, " %*lu", 9, 1, pp->pr_nget); 1125 PRWORD(ovflw, " %*lu", 5, 1, pp->pr_nfail); 1126 PRWORD(ovflw, " %*lu", 9, 1, pp->pr_nput); 1127 PRWORD(ovflw, " %*lu", 6, 1, pp->pr_npagealloc); 1128 PRWORD(ovflw, " %*lu", 6, 1, pp->pr_npagefree); 1129 PRWORD(ovflw, " %*d", 6, 1, pp->pr_npages); 1130 PRWORD(ovflw, " %*d", 6, 1, pp->pr_hiwat); 1131 PRWORD(ovflw, " %*d", 6, 1, pp->pr_minpages); 1132 PRWORD(ovflw, " %*s", 6, 1, maxp); 1133 PRWORD(ovflw, " %*lu\n", 5, 1, pp->pr_nidle); 1134 1135 pool_chk(pp); 1136 } 1137 } 1138 #endif /* DDB */ 1139 1140 #if defined(POOL_DEBUG) || defined(DDB) 1141 int 1142 pool_chk_page(struct pool *pp, struct pool_item_header *ph, int expected) 1143 { 1144 struct pool_item *pi; 1145 caddr_t page; 1146 int n; 1147 const char *label = pp->pr_wchan; 1148 1149 page = (caddr_t)((u_long)ph & pp->pr_pgmask); 1150 if (page != ph->ph_page && POOL_INPGHDR(pp)) { 1151 printf("%s: ", label); 1152 printf("pool(%p:%s): page inconsistency: page %p; " 1153 "at page head addr %p (p %p)\n", 1154 pp, pp->pr_wchan, ph->ph_page, ph, page); 1155 return 1; 1156 } 1157 1158 for (pi = XSIMPLEQ_FIRST(&ph->ph_itemlist), n = 0; 1159 pi != NULL; 1160 pi = XSIMPLEQ_NEXT(&ph->ph_itemlist, pi, pi_list), n++) { 1161 if ((caddr_t)pi < ph->ph_page || 1162 (caddr_t)pi >= ph->ph_page + pp->pr_pgsize) { 1163 printf("%s: ", label); 1164 printf("pool(%p:%s): page inconsistency: page %p;" 1165 " item ordinal %d; addr %p\n", pp, 1166 pp->pr_wchan, ph->ph_page, n, pi); 1167 return (1); 1168 } 1169 1170 if (pi->pi_magic != POOL_IMAGIC(ph, pi)) { 1171 printf("%s: ", label); 1172 printf("pool(%p:%s): free list modified: " 1173 "page %p; item ordinal %d; addr %p " 1174 "(p %p); offset 0x%x=0x%lx\n", 1175 pp, pp->pr_wchan, ph->ph_page, n, pi, page, 1176 0, pi->pi_magic); 1177 } 1178 1179 #ifdef DIAGNOSTIC 1180 if (POOL_PHPOISON(ph)) { 1181 size_t pidx; 1182 uint32_t pval; 1183 if (poison_check(pi + 1, pp->pr_size - sizeof(*pi), 1184 &pidx, &pval)) { 1185 int *ip = (int *)(pi + 1); 1186 printf("pool(%s): free list modified: " 1187 "page %p; item ordinal %d; addr %p " 1188 "(p %p); offset 0x%zx=0x%x\n", 1189 pp->pr_wchan, ph->ph_page, n, pi, 1190 page, pidx * sizeof(int), ip[pidx]); 1191 } 1192 } 1193 #endif /* DIAGNOSTIC */ 1194 } 1195 if (n + ph->ph_nmissing != pp->pr_itemsperpage) { 1196 printf("pool(%p:%s): page inconsistency: page %p;" 1197 " %d on list, %d missing, %d items per page\n", pp, 1198 pp->pr_wchan, ph->ph_page, n, ph->ph_nmissing, 1199 pp->pr_itemsperpage); 1200 return 1; 1201 } 1202 if (expected >= 0 && n != expected) { 1203 printf("pool(%p:%s): page inconsistency: page %p;" 1204 " %d on list, %d missing, %d expected\n", pp, 1205 pp->pr_wchan, ph->ph_page, n, ph->ph_nmissing, 1206 expected); 1207 return 1; 1208 } 1209 return 0; 1210 } 1211 1212 int 1213 pool_chk(struct pool *pp) 1214 { 1215 struct pool_item_header *ph; 1216 int r = 0; 1217 1218 TAILQ_FOREACH(ph, &pp->pr_emptypages, ph_pagelist) 1219 r += pool_chk_page(pp, ph, pp->pr_itemsperpage); 1220 TAILQ_FOREACH(ph, &pp->pr_fullpages, ph_pagelist) 1221 r += pool_chk_page(pp, ph, 0); 1222 TAILQ_FOREACH(ph, &pp->pr_partpages, ph_pagelist) 1223 r += pool_chk_page(pp, ph, -1); 1224 1225 return (r); 1226 } 1227 #endif /* defined(POOL_DEBUG) || defined(DDB) */ 1228 1229 #ifdef DDB 1230 void 1231 pool_walk(struct pool *pp, int full, 1232 int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2))), 1233 void (*func)(void *, int, int (*)(const char *, ...) 1234 __attribute__((__format__(__kprintf__,1,2))))) 1235 { 1236 struct pool_item_header *ph; 1237 struct pool_item *pi; 1238 caddr_t cp; 1239 int n; 1240 1241 TAILQ_FOREACH(ph, &pp->pr_fullpages, ph_pagelist) { 1242 cp = ph->ph_colored; 1243 n = ph->ph_nmissing; 1244 1245 while (n--) { 1246 func(cp, full, pr); 1247 cp += pp->pr_size; 1248 } 1249 } 1250 1251 TAILQ_FOREACH(ph, &pp->pr_partpages, ph_pagelist) { 1252 cp = ph->ph_colored; 1253 n = ph->ph_nmissing; 1254 1255 do { 1256 XSIMPLEQ_FOREACH(pi, &ph->ph_itemlist, pi_list) { 1257 if (cp == (caddr_t)pi) 1258 break; 1259 } 1260 if (cp != (caddr_t)pi) { 1261 func(cp, full, pr); 1262 n--; 1263 } 1264 1265 cp += pp->pr_size; 1266 } while (n > 0); 1267 } 1268 } 1269 #endif 1270 1271 /* 1272 * We have three different sysctls. 1273 * kern.pool.npools - the number of pools. 1274 * kern.pool.pool.<pool#> - the pool struct for the pool#. 1275 * kern.pool.name.<pool#> - the name for pool#. 1276 */ 1277 int 1278 sysctl_dopool(int *name, u_int namelen, char *oldp, size_t *oldlenp) 1279 { 1280 struct kinfo_pool pi; 1281 struct pool *pp; 1282 int rv = ENOENT; 1283 1284 switch (name[0]) { 1285 case KERN_POOL_NPOOLS: 1286 if (namelen != 1) 1287 return (ENOTDIR); 1288 return (sysctl_rdint(oldp, oldlenp, NULL, pool_count)); 1289 1290 case KERN_POOL_NAME: 1291 case KERN_POOL_POOL: 1292 break; 1293 default: 1294 return (EOPNOTSUPP); 1295 } 1296 1297 if (namelen != 2) 1298 return (ENOTDIR); 1299 1300 rw_enter_read(&pool_lock); 1301 1302 SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist) { 1303 if (name[1] == pp->pr_serial) 1304 break; 1305 } 1306 1307 if (pp == NULL) 1308 goto done; 1309 1310 switch (name[0]) { 1311 case KERN_POOL_NAME: 1312 rv = sysctl_rdstring(oldp, oldlenp, NULL, pp->pr_wchan); 1313 break; 1314 case KERN_POOL_POOL: 1315 memset(&pi, 0, sizeof(pi)); 1316 1317 mtx_enter(&pp->pr_mtx); 1318 pi.pr_size = pp->pr_size; 1319 pi.pr_pgsize = pp->pr_pgsize; 1320 pi.pr_itemsperpage = pp->pr_itemsperpage; 1321 pi.pr_npages = pp->pr_npages; 1322 pi.pr_minpages = pp->pr_minpages; 1323 pi.pr_maxpages = pp->pr_maxpages; 1324 pi.pr_hardlimit = pp->pr_hardlimit; 1325 pi.pr_nout = pp->pr_nout; 1326 pi.pr_nitems = pp->pr_nitems; 1327 pi.pr_nget = pp->pr_nget; 1328 pi.pr_nput = pp->pr_nput; 1329 pi.pr_nfail = pp->pr_nfail; 1330 pi.pr_npagealloc = pp->pr_npagealloc; 1331 pi.pr_npagefree = pp->pr_npagefree; 1332 pi.pr_hiwat = pp->pr_hiwat; 1333 pi.pr_nidle = pp->pr_nidle; 1334 mtx_leave(&pp->pr_mtx); 1335 1336 rv = sysctl_rdstruct(oldp, oldlenp, NULL, &pi, sizeof(pi)); 1337 break; 1338 } 1339 1340 done: 1341 rw_exit_read(&pool_lock); 1342 1343 return (rv); 1344 } 1345 1346 void 1347 pool_gc_sched(void *null) 1348 { 1349 task_add(systqmp, &pool_gc_task); 1350 } 1351 1352 void 1353 pool_gc_pages(void *null) 1354 { 1355 struct pool *pp; 1356 struct pool_item_header *ph, *freeph; 1357 int s; 1358 1359 rw_enter_read(&pool_lock); 1360 s = splvm(); /* XXX go to splvm until all pools _setipl properly */ 1361 SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist) { 1362 if (pp->pr_nidle <= pp->pr_minpages || /* guess */ 1363 !mtx_enter_try(&pp->pr_mtx)) /* try */ 1364 continue; 1365 1366 /* is it time to free a page? */ 1367 if (pp->pr_nidle > pp->pr_minpages && 1368 (ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL && 1369 (ticks - ph->ph_tick) > (hz * pool_wait_gc)) { 1370 freeph = ph; 1371 pool_p_remove(pp, freeph); 1372 } else 1373 freeph = NULL; 1374 1375 mtx_leave(&pp->pr_mtx); 1376 1377 if (freeph != NULL) 1378 pool_p_free(pp, freeph); 1379 } 1380 splx(s); 1381 rw_exit_read(&pool_lock); 1382 1383 timeout_add_sec(&pool_gc_tick, 1); 1384 } 1385 1386 /* 1387 * Pool backend allocators. 1388 */ 1389 1390 void * 1391 pool_allocator_alloc(struct pool *pp, int flags, int *slowdown) 1392 { 1393 void *v; 1394 1395 v = (*pp->pr_alloc->pa_alloc)(pp, flags, slowdown); 1396 1397 #ifdef DIAGNOSTIC 1398 if (v != NULL && POOL_INPGHDR(pp)) { 1399 vaddr_t addr = (vaddr_t)v; 1400 if ((addr & pp->pr_pgmask) != addr) { 1401 panic("%s: %s page address %p isnt aligned to %u", 1402 __func__, pp->pr_wchan, v, pp->pr_pgsize); 1403 } 1404 } 1405 #endif 1406 1407 return (v); 1408 } 1409 1410 void 1411 pool_allocator_free(struct pool *pp, void *v) 1412 { 1413 struct pool_allocator *pa = pp->pr_alloc; 1414 1415 (*pa->pa_free)(pp, v); 1416 } 1417 1418 void * 1419 pool_page_alloc(struct pool *pp, int flags, int *slowdown) 1420 { 1421 struct kmem_dyn_mode kd = KMEM_DYN_INITIALIZER; 1422 1423 kd.kd_waitok = ISSET(flags, PR_WAITOK); 1424 kd.kd_slowdown = slowdown; 1425 1426 return (km_alloc(pp->pr_pgsize, &kv_page, pp->pr_crange, &kd)); 1427 } 1428 1429 void 1430 pool_page_free(struct pool *pp, void *v) 1431 { 1432 km_free(v, pp->pr_pgsize, &kv_page, pp->pr_crange); 1433 } 1434 1435 void * 1436 pool_multi_alloc(struct pool *pp, int flags, int *slowdown) 1437 { 1438 struct kmem_va_mode kv = kv_intrsafe; 1439 struct kmem_dyn_mode kd = KMEM_DYN_INITIALIZER; 1440 void *v; 1441 int s; 1442 1443 if (POOL_INPGHDR(pp)) 1444 kv.kv_align = pp->pr_pgsize; 1445 1446 kd.kd_waitok = ISSET(flags, PR_WAITOK); 1447 kd.kd_slowdown = slowdown; 1448 1449 s = splvm(); 1450 v = km_alloc(pp->pr_pgsize, &kv, pp->pr_crange, &kd); 1451 splx(s); 1452 1453 return (v); 1454 } 1455 1456 void 1457 pool_multi_free(struct pool *pp, void *v) 1458 { 1459 struct kmem_va_mode kv = kv_intrsafe; 1460 int s; 1461 1462 if (POOL_INPGHDR(pp)) 1463 kv.kv_align = pp->pr_pgsize; 1464 1465 s = splvm(); 1466 km_free(v, pp->pr_pgsize, &kv, pp->pr_crange); 1467 splx(s); 1468 } 1469 1470 void * 1471 pool_multi_alloc_ni(struct pool *pp, int flags, int *slowdown) 1472 { 1473 struct kmem_va_mode kv = kv_any; 1474 struct kmem_dyn_mode kd = KMEM_DYN_INITIALIZER; 1475 void *v; 1476 1477 if (POOL_INPGHDR(pp)) 1478 kv.kv_align = pp->pr_pgsize; 1479 1480 kd.kd_waitok = ISSET(flags, PR_WAITOK); 1481 kd.kd_slowdown = slowdown; 1482 1483 KERNEL_LOCK(); 1484 v = km_alloc(pp->pr_pgsize, &kv, pp->pr_crange, &kd); 1485 KERNEL_UNLOCK(); 1486 1487 return (v); 1488 } 1489 1490 void 1491 pool_multi_free_ni(struct pool *pp, void *v) 1492 { 1493 struct kmem_va_mode kv = kv_any; 1494 1495 if (POOL_INPGHDR(pp)) 1496 kv.kv_align = pp->pr_pgsize; 1497 1498 KERNEL_LOCK(); 1499 km_free(v, pp->pr_pgsize, &kv, pp->pr_crange); 1500 KERNEL_UNLOCK(); 1501 } 1502