1 /* $OpenBSD: subr_pool.c,v 1.194 2016/01/15 11:21:58 dlg Exp $ */ 2 /* $NetBSD: subr_pool.c,v 1.61 2001/09/26 07:14:56 chs Exp $ */ 3 4 /*- 5 * Copyright (c) 1997, 1999, 2000 The NetBSD Foundation, Inc. 6 * All rights reserved. 7 * 8 * This code is derived from software contributed to The NetBSD Foundation 9 * by Paul Kranenburg; by Jason R. Thorpe of the Numerical Aerospace 10 * Simulation Facility, NASA Ames Research Center. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 23 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 24 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 25 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31 * POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include <sys/param.h> 35 #include <sys/systm.h> 36 #include <sys/errno.h> 37 #include <sys/kernel.h> 38 #include <sys/malloc.h> 39 #include <sys/pool.h> 40 #include <sys/syslog.h> 41 #include <sys/rwlock.h> 42 #include <sys/sysctl.h> 43 #include <sys/task.h> 44 #include <sys/timeout.h> 45 46 #include <uvm/uvm_extern.h> 47 48 /* 49 * Pool resource management utility. 50 * 51 * Memory is allocated in pages which are split into pieces according to 52 * the pool item size. Each page is kept on one of three lists in the 53 * pool structure: `pr_emptypages', `pr_fullpages' and `pr_partpages', 54 * for empty, full and partially-full pages respectively. The individual 55 * pool items are on a linked list headed by `ph_itemlist' in each page 56 * header. The memory for building the page list is either taken from 57 * the allocated pages themselves (for small pool items) or taken from 58 * an internal pool of page headers (`phpool'). 59 */ 60 61 /* List of all pools */ 62 SIMPLEQ_HEAD(,pool) pool_head = SIMPLEQ_HEAD_INITIALIZER(pool_head); 63 64 /* 65 * Every pool gets a unique serial number assigned to it. If this counter 66 * wraps, we're screwed, but we shouldn't create so many pools anyway. 67 */ 68 unsigned int pool_serial; 69 unsigned int pool_count; 70 71 /* Lock the previous variables making up the global pool state */ 72 struct rwlock pool_lock = RWLOCK_INITIALIZER("pools"); 73 74 /* Private pool for page header structures */ 75 struct pool phpool; 76 77 struct pool_item_header { 78 /* Page headers */ 79 TAILQ_ENTRY(pool_item_header) 80 ph_pagelist; /* pool page list */ 81 XSIMPLEQ_HEAD(,pool_item) ph_itemlist; /* chunk list for this page */ 82 RB_ENTRY(pool_item_header) 83 ph_node; /* Off-page page headers */ 84 int ph_nmissing; /* # of chunks in use */ 85 caddr_t ph_page; /* this page's address */ 86 caddr_t ph_colored; /* page's colored address */ 87 u_long ph_magic; 88 int ph_tick; 89 }; 90 #define POOL_MAGICBIT (1 << 3) /* keep away from perturbed low bits */ 91 #define POOL_PHPOISON(ph) ISSET((ph)->ph_magic, POOL_MAGICBIT) 92 93 struct pool_item { 94 u_long pi_magic; 95 XSIMPLEQ_ENTRY(pool_item) pi_list; 96 }; 97 #define POOL_IMAGIC(ph, pi) ((u_long)(pi) ^ (ph)->ph_magic) 98 99 #ifdef POOL_DEBUG 100 int pool_debug = 1; 101 #else 102 int pool_debug = 0; 103 #endif 104 105 #define POOL_INPGHDR(pp) ((pp)->pr_phoffset != 0) 106 107 struct pool_item_header * 108 pool_p_alloc(struct pool *, int, int *); 109 void pool_p_insert(struct pool *, struct pool_item_header *); 110 void pool_p_remove(struct pool *, struct pool_item_header *); 111 void pool_p_free(struct pool *, struct pool_item_header *); 112 113 void pool_update_curpage(struct pool *); 114 void *pool_do_get(struct pool *, int, int *); 115 int pool_chk_page(struct pool *, struct pool_item_header *, int); 116 int pool_chk(struct pool *); 117 void pool_get_done(void *, void *); 118 void pool_runqueue(struct pool *, int); 119 120 void *pool_allocator_alloc(struct pool *, int, int *); 121 void pool_allocator_free(struct pool *, void *); 122 123 /* 124 * The default pool allocator. 125 */ 126 void *pool_page_alloc(struct pool *, int, int *); 127 void pool_page_free(struct pool *, void *); 128 129 /* 130 * safe for interrupts; this is the default allocator 131 */ 132 struct pool_allocator pool_allocator_single = { 133 pool_page_alloc, 134 pool_page_free 135 }; 136 137 void *pool_multi_alloc(struct pool *, int, int *); 138 void pool_multi_free(struct pool *, void *); 139 140 struct pool_allocator pool_allocator_multi = { 141 pool_multi_alloc, 142 pool_multi_free 143 }; 144 145 void *pool_multi_alloc_ni(struct pool *, int, int *); 146 void pool_multi_free_ni(struct pool *, void *); 147 148 struct pool_allocator pool_allocator_multi_ni = { 149 pool_multi_alloc_ni, 150 pool_multi_free_ni 151 }; 152 153 #ifdef DDB 154 void pool_print_pagelist(struct pool_pagelist *, int (*)(const char *, ...) 155 __attribute__((__format__(__kprintf__,1,2)))); 156 void pool_print1(struct pool *, const char *, int (*)(const char *, ...) 157 __attribute__((__format__(__kprintf__,1,2)))); 158 #endif 159 160 /* stale page garbage collectors */ 161 void pool_gc_sched(void *); 162 struct timeout pool_gc_tick = TIMEOUT_INITIALIZER(pool_gc_sched, NULL); 163 void pool_gc_pages(void *); 164 struct task pool_gc_task = TASK_INITIALIZER(pool_gc_pages, NULL); 165 int pool_wait_free = 1; 166 int pool_wait_gc = 8; 167 168 static inline int 169 phtree_compare(struct pool_item_header *a, struct pool_item_header *b) 170 { 171 vaddr_t va = (vaddr_t)a->ph_page; 172 vaddr_t vb = (vaddr_t)b->ph_page; 173 174 /* the compares in this order are important for the NFIND to work */ 175 if (vb < va) 176 return (-1); 177 if (vb > va) 178 return (1); 179 180 return (0); 181 } 182 183 RB_PROTOTYPE(phtree, pool_item_header, ph_node, phtree_compare); 184 RB_GENERATE(phtree, pool_item_header, ph_node, phtree_compare); 185 186 /* 187 * Return the pool page header based on page address. 188 */ 189 static inline struct pool_item_header * 190 pr_find_pagehead(struct pool *pp, void *v) 191 { 192 struct pool_item_header *ph, key; 193 194 if (POOL_INPGHDR(pp)) { 195 caddr_t page; 196 197 page = (caddr_t)((vaddr_t)v & pp->pr_pgmask); 198 199 return ((struct pool_item_header *)(page + pp->pr_phoffset)); 200 } 201 202 key.ph_page = v; 203 ph = RB_NFIND(phtree, &pp->pr_phtree, &key); 204 if (ph == NULL) 205 panic("%s: %s: page header missing", __func__, pp->pr_wchan); 206 207 KASSERT(ph->ph_page <= (caddr_t)v); 208 if (ph->ph_page + pp->pr_pgsize <= (caddr_t)v) 209 panic("%s: %s: incorrect page", __func__, pp->pr_wchan); 210 211 return (ph); 212 } 213 214 /* 215 * Initialize the given pool resource structure. 216 * 217 * We export this routine to allow other kernel parts to declare 218 * static pools that must be initialized before malloc() is available. 219 */ 220 void 221 pool_init(struct pool *pp, size_t size, u_int align, u_int ioff, int flags, 222 const char *wchan, struct pool_allocator *palloc) 223 { 224 int off = 0, space; 225 unsigned int pgsize = PAGE_SIZE, items; 226 #ifdef DIAGNOSTIC 227 struct pool *iter; 228 KASSERT(ioff == 0); 229 #endif 230 231 if (align == 0) 232 align = ALIGN(1); 233 234 if (size < sizeof(struct pool_item)) 235 size = sizeof(struct pool_item); 236 237 size = roundup(size, align); 238 239 if (palloc == NULL) { 240 while (size * 8 > pgsize) 241 pgsize <<= 1; 242 243 if (pgsize > PAGE_SIZE) { 244 palloc = ISSET(flags, PR_WAITOK) ? 245 &pool_allocator_multi_ni : &pool_allocator_multi; 246 } else 247 palloc = &pool_allocator_single; 248 } else 249 pgsize = palloc->pa_pagesz ? palloc->pa_pagesz : PAGE_SIZE; 250 251 items = pgsize / size; 252 253 /* 254 * Decide whether to put the page header off page to avoid 255 * wasting too large a part of the page. Off-page page headers 256 * go into an RB tree, so we can match a returned item with 257 * its header based on the page address. 258 */ 259 if (pgsize - (size * items) > sizeof(struct pool_item_header)) { 260 off = pgsize - sizeof(struct pool_item_header); 261 } else if (sizeof(struct pool_item_header) * 2 >= size) { 262 off = pgsize - sizeof(struct pool_item_header); 263 items = off / size; 264 } 265 266 KASSERT(items > 0); 267 268 /* 269 * Initialize the pool structure. 270 */ 271 memset(pp, 0, sizeof(*pp)); 272 TAILQ_INIT(&pp->pr_emptypages); 273 TAILQ_INIT(&pp->pr_fullpages); 274 TAILQ_INIT(&pp->pr_partpages); 275 pp->pr_curpage = NULL; 276 pp->pr_npages = 0; 277 pp->pr_minitems = 0; 278 pp->pr_minpages = 0; 279 pp->pr_maxpages = 8; 280 pp->pr_size = size; 281 pp->pr_pgsize = pgsize; 282 pp->pr_pgmask = ~0UL ^ (pgsize - 1); 283 pp->pr_phoffset = off; 284 pp->pr_itemsperpage = items; 285 pp->pr_wchan = wchan; 286 pp->pr_alloc = palloc; 287 pp->pr_nitems = 0; 288 pp->pr_nout = 0; 289 pp->pr_hardlimit = UINT_MAX; 290 pp->pr_hardlimit_warning = NULL; 291 pp->pr_hardlimit_ratecap.tv_sec = 0; 292 pp->pr_hardlimit_ratecap.tv_usec = 0; 293 pp->pr_hardlimit_warning_last.tv_sec = 0; 294 pp->pr_hardlimit_warning_last.tv_usec = 0; 295 RB_INIT(&pp->pr_phtree); 296 297 /* 298 * Use the space between the chunks and the page header 299 * for cache coloring. 300 */ 301 space = POOL_INPGHDR(pp) ? pp->pr_phoffset : pp->pr_pgsize; 302 space -= pp->pr_itemsperpage * pp->pr_size; 303 pp->pr_align = align; 304 pp->pr_maxcolors = (space / align) + 1; 305 306 pp->pr_nget = 0; 307 pp->pr_nfail = 0; 308 pp->pr_nput = 0; 309 pp->pr_npagealloc = 0; 310 pp->pr_npagefree = 0; 311 pp->pr_hiwat = 0; 312 pp->pr_nidle = 0; 313 314 pp->pr_ipl = -1; 315 mtx_init(&pp->pr_mtx, IPL_NONE); 316 mtx_init(&pp->pr_requests_mtx, IPL_NONE); 317 TAILQ_INIT(&pp->pr_requests); 318 319 if (phpool.pr_size == 0) { 320 pool_init(&phpool, sizeof(struct pool_item_header), 0, 0, 321 0, "phpool", NULL); 322 pool_setipl(&phpool, IPL_HIGH); 323 324 /* make sure phpool wont "recurse" */ 325 KASSERT(POOL_INPGHDR(&phpool)); 326 } 327 328 /* pglistalloc/constraint parameters */ 329 pp->pr_crange = &kp_dirty; 330 331 /* Insert this into the list of all pools. */ 332 rw_enter_write(&pool_lock); 333 #ifdef DIAGNOSTIC 334 SIMPLEQ_FOREACH(iter, &pool_head, pr_poollist) { 335 if (iter == pp) 336 panic("%s: pool %s already on list", __func__, wchan); 337 } 338 #endif 339 340 pp->pr_serial = ++pool_serial; 341 if (pool_serial == 0) 342 panic("%s: too much uptime", __func__); 343 344 SIMPLEQ_INSERT_HEAD(&pool_head, pp, pr_poollist); 345 pool_count++; 346 rw_exit_write(&pool_lock); 347 } 348 349 void 350 pool_setipl(struct pool *pp, int ipl) 351 { 352 pp->pr_ipl = ipl; 353 mtx_init(&pp->pr_mtx, ipl); 354 mtx_init(&pp->pr_requests_mtx, ipl); 355 } 356 357 /* 358 * Decommission a pool resource. 359 */ 360 void 361 pool_destroy(struct pool *pp) 362 { 363 struct pool_item_header *ph; 364 struct pool *prev, *iter; 365 366 #ifdef DIAGNOSTIC 367 if (pp->pr_nout != 0) 368 panic("%s: pool busy: still out: %u", __func__, pp->pr_nout); 369 #endif 370 371 /* Remove from global pool list */ 372 rw_enter_write(&pool_lock); 373 pool_count--; 374 if (pp == SIMPLEQ_FIRST(&pool_head)) 375 SIMPLEQ_REMOVE_HEAD(&pool_head, pr_poollist); 376 else { 377 prev = SIMPLEQ_FIRST(&pool_head); 378 SIMPLEQ_FOREACH(iter, &pool_head, pr_poollist) { 379 if (iter == pp) { 380 SIMPLEQ_REMOVE_AFTER(&pool_head, prev, 381 pr_poollist); 382 break; 383 } 384 prev = iter; 385 } 386 } 387 rw_exit_write(&pool_lock); 388 389 /* Remove all pages */ 390 while ((ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL) { 391 mtx_enter(&pp->pr_mtx); 392 pool_p_remove(pp, ph); 393 mtx_leave(&pp->pr_mtx); 394 pool_p_free(pp, ph); 395 } 396 KASSERT(TAILQ_EMPTY(&pp->pr_fullpages)); 397 KASSERT(TAILQ_EMPTY(&pp->pr_partpages)); 398 } 399 400 void 401 pool_request_init(struct pool_request *pr, 402 void (*handler)(void *, void *), void *cookie) 403 { 404 pr->pr_handler = handler; 405 pr->pr_cookie = cookie; 406 pr->pr_item = NULL; 407 } 408 409 void 410 pool_request(struct pool *pp, struct pool_request *pr) 411 { 412 mtx_enter(&pp->pr_requests_mtx); 413 TAILQ_INSERT_TAIL(&pp->pr_requests, pr, pr_entry); 414 pool_runqueue(pp, PR_NOWAIT); 415 mtx_leave(&pp->pr_requests_mtx); 416 } 417 418 struct pool_get_memory { 419 struct mutex mtx; 420 void * volatile v; 421 }; 422 423 /* 424 * Grab an item from the pool. 425 */ 426 void * 427 pool_get(struct pool *pp, int flags) 428 { 429 void *v = NULL; 430 int slowdown = 0; 431 432 KASSERT(flags & (PR_WAITOK | PR_NOWAIT)); 433 434 435 mtx_enter(&pp->pr_mtx); 436 if (pp->pr_nout >= pp->pr_hardlimit) { 437 if (ISSET(flags, PR_NOWAIT|PR_LIMITFAIL)) 438 goto fail; 439 } else if ((v = pool_do_get(pp, flags, &slowdown)) == NULL) { 440 if (ISSET(flags, PR_NOWAIT)) 441 goto fail; 442 } 443 mtx_leave(&pp->pr_mtx); 444 445 if (slowdown && ISSET(flags, PR_WAITOK)) 446 yield(); 447 448 if (v == NULL) { 449 struct pool_get_memory mem = { 450 MUTEX_INITIALIZER((pp->pr_ipl == -1) ? 451 IPL_NONE : pp->pr_ipl), NULL }; 452 struct pool_request pr; 453 454 pool_request_init(&pr, pool_get_done, &mem); 455 pool_request(pp, &pr); 456 457 mtx_enter(&mem.mtx); 458 while (mem.v == NULL) 459 msleep(&mem, &mem.mtx, PSWP, pp->pr_wchan, 0); 460 mtx_leave(&mem.mtx); 461 462 v = mem.v; 463 } 464 465 if (ISSET(flags, PR_ZERO)) 466 memset(v, 0, pp->pr_size); 467 468 return (v); 469 470 fail: 471 pp->pr_nfail++; 472 mtx_leave(&pp->pr_mtx); 473 return (NULL); 474 } 475 476 void 477 pool_get_done(void *xmem, void *v) 478 { 479 struct pool_get_memory *mem = xmem; 480 481 mtx_enter(&mem->mtx); 482 mem->v = v; 483 mtx_leave(&mem->mtx); 484 485 wakeup_one(mem); 486 } 487 488 void 489 pool_runqueue(struct pool *pp, int flags) 490 { 491 struct pool_requests prl = TAILQ_HEAD_INITIALIZER(prl); 492 struct pool_request *pr; 493 494 MUTEX_ASSERT_UNLOCKED(&pp->pr_mtx); 495 MUTEX_ASSERT_LOCKED(&pp->pr_requests_mtx); 496 497 if (pp->pr_requesting++) 498 return; 499 500 do { 501 pp->pr_requesting = 1; 502 503 /* no TAILQ_JOIN? :( */ 504 while ((pr = TAILQ_FIRST(&pp->pr_requests)) != NULL) { 505 TAILQ_REMOVE(&pp->pr_requests, pr, pr_entry); 506 TAILQ_INSERT_TAIL(&prl, pr, pr_entry); 507 } 508 if (TAILQ_EMPTY(&prl)) 509 continue; 510 511 mtx_leave(&pp->pr_requests_mtx); 512 513 mtx_enter(&pp->pr_mtx); 514 pr = TAILQ_FIRST(&prl); 515 while (pr != NULL) { 516 int slowdown = 0; 517 518 if (pp->pr_nout >= pp->pr_hardlimit) 519 break; 520 521 pr->pr_item = pool_do_get(pp, flags, &slowdown); 522 if (pr->pr_item == NULL) /* || slowdown ? */ 523 break; 524 525 pr = TAILQ_NEXT(pr, pr_entry); 526 } 527 mtx_leave(&pp->pr_mtx); 528 529 while ((pr = TAILQ_FIRST(&prl)) != NULL && 530 pr->pr_item != NULL) { 531 TAILQ_REMOVE(&prl, pr, pr_entry); 532 (*pr->pr_handler)(pr->pr_cookie, pr->pr_item); 533 } 534 535 mtx_enter(&pp->pr_requests_mtx); 536 } while (--pp->pr_requesting); 537 538 /* no TAILQ_JOIN :( */ 539 while ((pr = TAILQ_FIRST(&prl)) != NULL) { 540 TAILQ_REMOVE(&prl, pr, pr_entry); 541 TAILQ_INSERT_TAIL(&pp->pr_requests, pr, pr_entry); 542 } 543 } 544 545 void * 546 pool_do_get(struct pool *pp, int flags, int *slowdown) 547 { 548 struct pool_item *pi; 549 struct pool_item_header *ph; 550 551 MUTEX_ASSERT_LOCKED(&pp->pr_mtx); 552 553 if (pp->pr_ipl != -1) 554 splassert(pp->pr_ipl); 555 556 /* 557 * Account for this item now to avoid races if we need to give up 558 * pr_mtx to allocate a page. 559 */ 560 pp->pr_nout++; 561 562 if (pp->pr_curpage == NULL) { 563 mtx_leave(&pp->pr_mtx); 564 ph = pool_p_alloc(pp, flags, slowdown); 565 mtx_enter(&pp->pr_mtx); 566 567 if (ph == NULL) { 568 pp->pr_nout--; 569 return (NULL); 570 } 571 572 pool_p_insert(pp, ph); 573 } 574 575 ph = pp->pr_curpage; 576 pi = XSIMPLEQ_FIRST(&ph->ph_itemlist); 577 if (__predict_false(pi == NULL)) 578 panic("%s: %s: page empty", __func__, pp->pr_wchan); 579 580 if (__predict_false(pi->pi_magic != POOL_IMAGIC(ph, pi))) { 581 panic("%s: %s free list modified: " 582 "page %p; item addr %p; offset 0x%x=0x%lx != 0x%lx", 583 __func__, pp->pr_wchan, ph->ph_page, pi, 584 0, pi->pi_magic, POOL_IMAGIC(ph, pi)); 585 } 586 587 XSIMPLEQ_REMOVE_HEAD(&ph->ph_itemlist, pi_list); 588 589 #ifdef DIAGNOSTIC 590 if (pool_debug && POOL_PHPOISON(ph)) { 591 size_t pidx; 592 uint32_t pval; 593 if (poison_check(pi + 1, pp->pr_size - sizeof(*pi), 594 &pidx, &pval)) { 595 int *ip = (int *)(pi + 1); 596 panic("%s: %s free list modified: " 597 "page %p; item addr %p; offset 0x%zx=0x%x", 598 __func__, pp->pr_wchan, ph->ph_page, pi, 599 pidx * sizeof(int), ip[pidx]); 600 } 601 } 602 #endif /* DIAGNOSTIC */ 603 604 if (ph->ph_nmissing++ == 0) { 605 /* 606 * This page was previously empty. Move it to the list of 607 * partially-full pages. This page is already curpage. 608 */ 609 TAILQ_REMOVE(&pp->pr_emptypages, ph, ph_pagelist); 610 TAILQ_INSERT_TAIL(&pp->pr_partpages, ph, ph_pagelist); 611 612 pp->pr_nidle--; 613 } 614 615 if (ph->ph_nmissing == pp->pr_itemsperpage) { 616 /* 617 * This page is now full. Move it to the full list 618 * and select a new current page. 619 */ 620 TAILQ_REMOVE(&pp->pr_partpages, ph, ph_pagelist); 621 TAILQ_INSERT_TAIL(&pp->pr_fullpages, ph, ph_pagelist); 622 pool_update_curpage(pp); 623 } 624 625 pp->pr_nget++; 626 627 return (pi); 628 } 629 630 /* 631 * Return resource to the pool. 632 */ 633 void 634 pool_put(struct pool *pp, void *v) 635 { 636 struct pool_item *pi = v; 637 struct pool_item_header *ph, *freeph = NULL; 638 639 #ifdef DIAGNOSTIC 640 if (v == NULL) 641 panic("%s: NULL item", __func__); 642 #endif 643 644 mtx_enter(&pp->pr_mtx); 645 646 if (pp->pr_ipl != -1) 647 splassert(pp->pr_ipl); 648 649 ph = pr_find_pagehead(pp, v); 650 651 #ifdef DIAGNOSTIC 652 if (pool_debug) { 653 struct pool_item *qi; 654 XSIMPLEQ_FOREACH(qi, &ph->ph_itemlist, pi_list) { 655 if (pi == qi) { 656 panic("%s: %s: double pool_put: %p", __func__, 657 pp->pr_wchan, pi); 658 } 659 } 660 } 661 #endif /* DIAGNOSTIC */ 662 663 pi->pi_magic = POOL_IMAGIC(ph, pi); 664 XSIMPLEQ_INSERT_HEAD(&ph->ph_itemlist, pi, pi_list); 665 #ifdef DIAGNOSTIC 666 if (POOL_PHPOISON(ph)) 667 poison_mem(pi + 1, pp->pr_size - sizeof(*pi)); 668 #endif /* DIAGNOSTIC */ 669 670 if (ph->ph_nmissing-- == pp->pr_itemsperpage) { 671 /* 672 * The page was previously completely full, move it to the 673 * partially-full list. 674 */ 675 TAILQ_REMOVE(&pp->pr_fullpages, ph, ph_pagelist); 676 TAILQ_INSERT_TAIL(&pp->pr_partpages, ph, ph_pagelist); 677 } 678 679 if (ph->ph_nmissing == 0) { 680 /* 681 * The page is now empty, so move it to the empty page list. 682 */ 683 pp->pr_nidle++; 684 685 ph->ph_tick = ticks; 686 TAILQ_REMOVE(&pp->pr_partpages, ph, ph_pagelist); 687 TAILQ_INSERT_TAIL(&pp->pr_emptypages, ph, ph_pagelist); 688 pool_update_curpage(pp); 689 } 690 691 pp->pr_nout--; 692 pp->pr_nput++; 693 694 /* is it time to free a page? */ 695 if (pp->pr_nidle > pp->pr_maxpages && 696 (ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL && 697 (ticks - ph->ph_tick) > (hz * pool_wait_free)) { 698 freeph = ph; 699 pool_p_remove(pp, freeph); 700 } 701 mtx_leave(&pp->pr_mtx); 702 703 if (freeph != NULL) 704 pool_p_free(pp, freeph); 705 706 if (!TAILQ_EMPTY(&pp->pr_requests)) { 707 mtx_enter(&pp->pr_requests_mtx); 708 pool_runqueue(pp, PR_NOWAIT); 709 mtx_leave(&pp->pr_requests_mtx); 710 } 711 } 712 713 /* 714 * Add N items to the pool. 715 */ 716 int 717 pool_prime(struct pool *pp, int n) 718 { 719 struct pool_pagelist pl = TAILQ_HEAD_INITIALIZER(pl); 720 struct pool_item_header *ph; 721 int newpages; 722 723 newpages = roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 724 725 while (newpages-- > 0) { 726 int slowdown = 0; 727 728 ph = pool_p_alloc(pp, PR_NOWAIT, &slowdown); 729 if (ph == NULL) /* or slowdown? */ 730 break; 731 732 TAILQ_INSERT_TAIL(&pl, ph, ph_pagelist); 733 } 734 735 mtx_enter(&pp->pr_mtx); 736 while ((ph = TAILQ_FIRST(&pl)) != NULL) { 737 TAILQ_REMOVE(&pl, ph, ph_pagelist); 738 pool_p_insert(pp, ph); 739 } 740 mtx_leave(&pp->pr_mtx); 741 742 return (0); 743 } 744 745 struct pool_item_header * 746 pool_p_alloc(struct pool *pp, int flags, int *slowdown) 747 { 748 struct pool_item_header *ph; 749 struct pool_item *pi; 750 caddr_t addr; 751 int n; 752 753 MUTEX_ASSERT_UNLOCKED(&pp->pr_mtx); 754 KASSERT(pp->pr_size >= sizeof(*pi)); 755 756 addr = pool_allocator_alloc(pp, flags, slowdown); 757 if (addr == NULL) 758 return (NULL); 759 760 if (POOL_INPGHDR(pp)) 761 ph = (struct pool_item_header *)(addr + pp->pr_phoffset); 762 else { 763 ph = pool_get(&phpool, flags); 764 if (ph == NULL) { 765 pool_allocator_free(pp, addr); 766 return (NULL); 767 } 768 } 769 770 XSIMPLEQ_INIT(&ph->ph_itemlist); 771 ph->ph_page = addr; 772 addr += pp->pr_align * (pp->pr_npagealloc % pp->pr_maxcolors); 773 ph->ph_colored = addr; 774 ph->ph_nmissing = 0; 775 arc4random_buf(&ph->ph_magic, sizeof(ph->ph_magic)); 776 #ifdef DIAGNOSTIC 777 /* use a bit in ph_magic to record if we poison page items */ 778 if (pool_debug) 779 SET(ph->ph_magic, POOL_MAGICBIT); 780 else 781 CLR(ph->ph_magic, POOL_MAGICBIT); 782 #endif /* DIAGNOSTIC */ 783 784 n = pp->pr_itemsperpage; 785 while (n--) { 786 pi = (struct pool_item *)addr; 787 pi->pi_magic = POOL_IMAGIC(ph, pi); 788 XSIMPLEQ_INSERT_TAIL(&ph->ph_itemlist, pi, pi_list); 789 790 #ifdef DIAGNOSTIC 791 if (POOL_PHPOISON(ph)) 792 poison_mem(pi + 1, pp->pr_size - sizeof(*pi)); 793 #endif /* DIAGNOSTIC */ 794 795 addr += pp->pr_size; 796 } 797 798 return (ph); 799 } 800 801 void 802 pool_p_free(struct pool *pp, struct pool_item_header *ph) 803 { 804 struct pool_item *pi; 805 806 MUTEX_ASSERT_UNLOCKED(&pp->pr_mtx); 807 KASSERT(ph->ph_nmissing == 0); 808 809 XSIMPLEQ_FOREACH(pi, &ph->ph_itemlist, pi_list) { 810 if (__predict_false(pi->pi_magic != POOL_IMAGIC(ph, pi))) { 811 panic("%s: %s free list modified: " 812 "page %p; item addr %p; offset 0x%x=0x%lx", 813 __func__, pp->pr_wchan, ph->ph_page, pi, 814 0, pi->pi_magic); 815 } 816 817 #ifdef DIAGNOSTIC 818 if (POOL_PHPOISON(ph)) { 819 size_t pidx; 820 uint32_t pval; 821 if (poison_check(pi + 1, pp->pr_size - sizeof(*pi), 822 &pidx, &pval)) { 823 int *ip = (int *)(pi + 1); 824 panic("%s: %s free list modified: " 825 "page %p; item addr %p; offset 0x%zx=0x%x", 826 __func__, pp->pr_wchan, ph->ph_page, pi, 827 pidx * sizeof(int), ip[pidx]); 828 } 829 } 830 #endif 831 } 832 833 pool_allocator_free(pp, ph->ph_page); 834 835 if (!POOL_INPGHDR(pp)) 836 pool_put(&phpool, ph); 837 } 838 839 void 840 pool_p_insert(struct pool *pp, struct pool_item_header *ph) 841 { 842 MUTEX_ASSERT_LOCKED(&pp->pr_mtx); 843 844 /* If the pool was depleted, point at the new page */ 845 if (pp->pr_curpage == NULL) 846 pp->pr_curpage = ph; 847 848 TAILQ_INSERT_TAIL(&pp->pr_emptypages, ph, ph_pagelist); 849 if (!POOL_INPGHDR(pp)) 850 RB_INSERT(phtree, &pp->pr_phtree, ph); 851 852 pp->pr_nitems += pp->pr_itemsperpage; 853 pp->pr_nidle++; 854 855 pp->pr_npagealloc++; 856 if (++pp->pr_npages > pp->pr_hiwat) 857 pp->pr_hiwat = pp->pr_npages; 858 } 859 860 void 861 pool_p_remove(struct pool *pp, struct pool_item_header *ph) 862 { 863 MUTEX_ASSERT_LOCKED(&pp->pr_mtx); 864 865 pp->pr_npagefree++; 866 pp->pr_npages--; 867 pp->pr_nidle--; 868 pp->pr_nitems -= pp->pr_itemsperpage; 869 870 if (!POOL_INPGHDR(pp)) 871 RB_REMOVE(phtree, &pp->pr_phtree, ph); 872 TAILQ_REMOVE(&pp->pr_emptypages, ph, ph_pagelist); 873 874 pool_update_curpage(pp); 875 } 876 877 void 878 pool_update_curpage(struct pool *pp) 879 { 880 pp->pr_curpage = TAILQ_LAST(&pp->pr_partpages, pool_pagelist); 881 if (pp->pr_curpage == NULL) { 882 pp->pr_curpage = TAILQ_LAST(&pp->pr_emptypages, pool_pagelist); 883 } 884 } 885 886 void 887 pool_setlowat(struct pool *pp, int n) 888 { 889 int prime = 0; 890 891 mtx_enter(&pp->pr_mtx); 892 pp->pr_minitems = n; 893 pp->pr_minpages = (n == 0) 894 ? 0 895 : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 896 897 if (pp->pr_nitems < n) 898 prime = n - pp->pr_nitems; 899 mtx_leave(&pp->pr_mtx); 900 901 if (prime > 0) 902 pool_prime(pp, prime); 903 } 904 905 void 906 pool_sethiwat(struct pool *pp, int n) 907 { 908 pp->pr_maxpages = (n == 0) 909 ? 0 910 : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 911 } 912 913 int 914 pool_sethardlimit(struct pool *pp, u_int n, const char *warnmsg, int ratecap) 915 { 916 int error = 0; 917 918 if (n < pp->pr_nout) { 919 error = EINVAL; 920 goto done; 921 } 922 923 pp->pr_hardlimit = n; 924 pp->pr_hardlimit_warning = warnmsg; 925 pp->pr_hardlimit_ratecap.tv_sec = ratecap; 926 pp->pr_hardlimit_warning_last.tv_sec = 0; 927 pp->pr_hardlimit_warning_last.tv_usec = 0; 928 929 done: 930 return (error); 931 } 932 933 void 934 pool_set_constraints(struct pool *pp, const struct kmem_pa_mode *mode) 935 { 936 pp->pr_crange = mode; 937 } 938 939 /* 940 * Release all complete pages that have not been used recently. 941 * 942 * Returns non-zero if any pages have been reclaimed. 943 */ 944 int 945 pool_reclaim(struct pool *pp) 946 { 947 struct pool_item_header *ph, *phnext; 948 struct pool_pagelist pl = TAILQ_HEAD_INITIALIZER(pl); 949 950 mtx_enter(&pp->pr_mtx); 951 for (ph = TAILQ_FIRST(&pp->pr_emptypages); ph != NULL; ph = phnext) { 952 phnext = TAILQ_NEXT(ph, ph_pagelist); 953 954 /* Check our minimum page claim */ 955 if (pp->pr_npages <= pp->pr_minpages) 956 break; 957 958 /* 959 * If freeing this page would put us below 960 * the low water mark, stop now. 961 */ 962 if ((pp->pr_nitems - pp->pr_itemsperpage) < 963 pp->pr_minitems) 964 break; 965 966 pool_p_remove(pp, ph); 967 TAILQ_INSERT_TAIL(&pl, ph, ph_pagelist); 968 } 969 mtx_leave(&pp->pr_mtx); 970 971 if (TAILQ_EMPTY(&pl)) 972 return (0); 973 974 while ((ph = TAILQ_FIRST(&pl)) != NULL) { 975 TAILQ_REMOVE(&pl, ph, ph_pagelist); 976 pool_p_free(pp, ph); 977 } 978 979 return (1); 980 } 981 982 /* 983 * Release all complete pages that have not been used recently 984 * from all pools. 985 */ 986 void 987 pool_reclaim_all(void) 988 { 989 struct pool *pp; 990 991 rw_enter_read(&pool_lock); 992 SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist) 993 pool_reclaim(pp); 994 rw_exit_read(&pool_lock); 995 } 996 997 #ifdef DDB 998 #include <machine/db_machdep.h> 999 #include <ddb/db_output.h> 1000 1001 /* 1002 * Diagnostic helpers. 1003 */ 1004 void 1005 pool_printit(struct pool *pp, const char *modif, 1006 int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2)))) 1007 { 1008 pool_print1(pp, modif, pr); 1009 } 1010 1011 void 1012 pool_print_pagelist(struct pool_pagelist *pl, 1013 int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2)))) 1014 { 1015 struct pool_item_header *ph; 1016 struct pool_item *pi; 1017 1018 TAILQ_FOREACH(ph, pl, ph_pagelist) { 1019 (*pr)("\t\tpage %p, color %p, nmissing %d\n", 1020 ph->ph_page, ph->ph_colored, ph->ph_nmissing); 1021 XSIMPLEQ_FOREACH(pi, &ph->ph_itemlist, pi_list) { 1022 if (pi->pi_magic != POOL_IMAGIC(ph, pi)) { 1023 (*pr)("\t\t\titem %p, magic 0x%lx\n", 1024 pi, pi->pi_magic); 1025 } 1026 } 1027 } 1028 } 1029 1030 void 1031 pool_print1(struct pool *pp, const char *modif, 1032 int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2)))) 1033 { 1034 struct pool_item_header *ph; 1035 int print_pagelist = 0; 1036 char c; 1037 1038 while ((c = *modif++) != '\0') { 1039 if (c == 'p') 1040 print_pagelist = 1; 1041 modif++; 1042 } 1043 1044 (*pr)("POOL %s: size %u maxcolors %u\n", pp->pr_wchan, pp->pr_size, 1045 pp->pr_maxcolors); 1046 (*pr)("\talloc %p\n", pp->pr_alloc); 1047 (*pr)("\tminitems %u, minpages %u, maxpages %u, npages %u\n", 1048 pp->pr_minitems, pp->pr_minpages, pp->pr_maxpages, pp->pr_npages); 1049 (*pr)("\titemsperpage %u, nitems %u, nout %u, hardlimit %u\n", 1050 pp->pr_itemsperpage, pp->pr_nitems, pp->pr_nout, pp->pr_hardlimit); 1051 1052 (*pr)("\n\tnget %lu, nfail %lu, nput %lu\n", 1053 pp->pr_nget, pp->pr_nfail, pp->pr_nput); 1054 (*pr)("\tnpagealloc %lu, npagefree %lu, hiwat %u, nidle %lu\n", 1055 pp->pr_npagealloc, pp->pr_npagefree, pp->pr_hiwat, pp->pr_nidle); 1056 1057 if (print_pagelist == 0) 1058 return; 1059 1060 if ((ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL) 1061 (*pr)("\n\tempty page list:\n"); 1062 pool_print_pagelist(&pp->pr_emptypages, pr); 1063 if ((ph = TAILQ_FIRST(&pp->pr_fullpages)) != NULL) 1064 (*pr)("\n\tfull page list:\n"); 1065 pool_print_pagelist(&pp->pr_fullpages, pr); 1066 if ((ph = TAILQ_FIRST(&pp->pr_partpages)) != NULL) 1067 (*pr)("\n\tpartial-page list:\n"); 1068 pool_print_pagelist(&pp->pr_partpages, pr); 1069 1070 if (pp->pr_curpage == NULL) 1071 (*pr)("\tno current page\n"); 1072 else 1073 (*pr)("\tcurpage %p\n", pp->pr_curpage->ph_page); 1074 } 1075 1076 void 1077 db_show_all_pools(db_expr_t expr, int haddr, db_expr_t count, char *modif) 1078 { 1079 struct pool *pp; 1080 char maxp[16]; 1081 int ovflw; 1082 char mode; 1083 1084 mode = modif[0]; 1085 if (mode != '\0' && mode != 'a') { 1086 db_printf("usage: show all pools [/a]\n"); 1087 return; 1088 } 1089 1090 if (mode == '\0') 1091 db_printf("%-10s%4s%9s%5s%9s%6s%6s%6s%6s%6s%6s%5s\n", 1092 "Name", 1093 "Size", 1094 "Requests", 1095 "Fail", 1096 "Releases", 1097 "Pgreq", 1098 "Pgrel", 1099 "Npage", 1100 "Hiwat", 1101 "Minpg", 1102 "Maxpg", 1103 "Idle"); 1104 else 1105 db_printf("%-12s %18s %18s\n", 1106 "Name", "Address", "Allocator"); 1107 1108 SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist) { 1109 if (mode == 'a') { 1110 db_printf("%-12s %18p %18p\n", pp->pr_wchan, pp, 1111 pp->pr_alloc); 1112 continue; 1113 } 1114 1115 if (!pp->pr_nget) 1116 continue; 1117 1118 if (pp->pr_maxpages == UINT_MAX) 1119 snprintf(maxp, sizeof maxp, "inf"); 1120 else 1121 snprintf(maxp, sizeof maxp, "%u", pp->pr_maxpages); 1122 1123 #define PRWORD(ovflw, fmt, width, fixed, val) do { \ 1124 (ovflw) += db_printf((fmt), \ 1125 (width) - (fixed) - (ovflw) > 0 ? \ 1126 (width) - (fixed) - (ovflw) : 0, \ 1127 (val)) - (width); \ 1128 if ((ovflw) < 0) \ 1129 (ovflw) = 0; \ 1130 } while (/* CONSTCOND */0) 1131 1132 ovflw = 0; 1133 PRWORD(ovflw, "%-*s", 10, 0, pp->pr_wchan); 1134 PRWORD(ovflw, " %*u", 4, 1, pp->pr_size); 1135 PRWORD(ovflw, " %*lu", 9, 1, pp->pr_nget); 1136 PRWORD(ovflw, " %*lu", 5, 1, pp->pr_nfail); 1137 PRWORD(ovflw, " %*lu", 9, 1, pp->pr_nput); 1138 PRWORD(ovflw, " %*lu", 6, 1, pp->pr_npagealloc); 1139 PRWORD(ovflw, " %*lu", 6, 1, pp->pr_npagefree); 1140 PRWORD(ovflw, " %*d", 6, 1, pp->pr_npages); 1141 PRWORD(ovflw, " %*d", 6, 1, pp->pr_hiwat); 1142 PRWORD(ovflw, " %*d", 6, 1, pp->pr_minpages); 1143 PRWORD(ovflw, " %*s", 6, 1, maxp); 1144 PRWORD(ovflw, " %*lu\n", 5, 1, pp->pr_nidle); 1145 1146 pool_chk(pp); 1147 } 1148 } 1149 #endif /* DDB */ 1150 1151 #if defined(POOL_DEBUG) || defined(DDB) 1152 int 1153 pool_chk_page(struct pool *pp, struct pool_item_header *ph, int expected) 1154 { 1155 struct pool_item *pi; 1156 caddr_t page; 1157 int n; 1158 const char *label = pp->pr_wchan; 1159 1160 page = (caddr_t)((u_long)ph & pp->pr_pgmask); 1161 if (page != ph->ph_page && POOL_INPGHDR(pp)) { 1162 printf("%s: ", label); 1163 printf("pool(%p:%s): page inconsistency: page %p; " 1164 "at page head addr %p (p %p)\n", 1165 pp, pp->pr_wchan, ph->ph_page, ph, page); 1166 return 1; 1167 } 1168 1169 for (pi = XSIMPLEQ_FIRST(&ph->ph_itemlist), n = 0; 1170 pi != NULL; 1171 pi = XSIMPLEQ_NEXT(&ph->ph_itemlist, pi, pi_list), n++) { 1172 if ((caddr_t)pi < ph->ph_page || 1173 (caddr_t)pi >= ph->ph_page + pp->pr_pgsize) { 1174 printf("%s: ", label); 1175 printf("pool(%p:%s): page inconsistency: page %p;" 1176 " item ordinal %d; addr %p\n", pp, 1177 pp->pr_wchan, ph->ph_page, n, pi); 1178 return (1); 1179 } 1180 1181 if (pi->pi_magic != POOL_IMAGIC(ph, pi)) { 1182 printf("%s: ", label); 1183 printf("pool(%p:%s): free list modified: " 1184 "page %p; item ordinal %d; addr %p " 1185 "(p %p); offset 0x%x=0x%lx\n", 1186 pp, pp->pr_wchan, ph->ph_page, n, pi, page, 1187 0, pi->pi_magic); 1188 } 1189 1190 #ifdef DIAGNOSTIC 1191 if (POOL_PHPOISON(ph)) { 1192 size_t pidx; 1193 uint32_t pval; 1194 if (poison_check(pi + 1, pp->pr_size - sizeof(*pi), 1195 &pidx, &pval)) { 1196 int *ip = (int *)(pi + 1); 1197 printf("pool(%s): free list modified: " 1198 "page %p; item ordinal %d; addr %p " 1199 "(p %p); offset 0x%zx=0x%x\n", 1200 pp->pr_wchan, ph->ph_page, n, pi, 1201 page, pidx * sizeof(int), ip[pidx]); 1202 } 1203 } 1204 #endif /* DIAGNOSTIC */ 1205 } 1206 if (n + ph->ph_nmissing != pp->pr_itemsperpage) { 1207 printf("pool(%p:%s): page inconsistency: page %p;" 1208 " %d on list, %d missing, %d items per page\n", pp, 1209 pp->pr_wchan, ph->ph_page, n, ph->ph_nmissing, 1210 pp->pr_itemsperpage); 1211 return 1; 1212 } 1213 if (expected >= 0 && n != expected) { 1214 printf("pool(%p:%s): page inconsistency: page %p;" 1215 " %d on list, %d missing, %d expected\n", pp, 1216 pp->pr_wchan, ph->ph_page, n, ph->ph_nmissing, 1217 expected); 1218 return 1; 1219 } 1220 return 0; 1221 } 1222 1223 int 1224 pool_chk(struct pool *pp) 1225 { 1226 struct pool_item_header *ph; 1227 int r = 0; 1228 1229 TAILQ_FOREACH(ph, &pp->pr_emptypages, ph_pagelist) 1230 r += pool_chk_page(pp, ph, pp->pr_itemsperpage); 1231 TAILQ_FOREACH(ph, &pp->pr_fullpages, ph_pagelist) 1232 r += pool_chk_page(pp, ph, 0); 1233 TAILQ_FOREACH(ph, &pp->pr_partpages, ph_pagelist) 1234 r += pool_chk_page(pp, ph, -1); 1235 1236 return (r); 1237 } 1238 #endif /* defined(POOL_DEBUG) || defined(DDB) */ 1239 1240 #ifdef DDB 1241 void 1242 pool_walk(struct pool *pp, int full, 1243 int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2))), 1244 void (*func)(void *, int, int (*)(const char *, ...) 1245 __attribute__((__format__(__kprintf__,1,2))))) 1246 { 1247 struct pool_item_header *ph; 1248 struct pool_item *pi; 1249 caddr_t cp; 1250 int n; 1251 1252 TAILQ_FOREACH(ph, &pp->pr_fullpages, ph_pagelist) { 1253 cp = ph->ph_colored; 1254 n = ph->ph_nmissing; 1255 1256 while (n--) { 1257 func(cp, full, pr); 1258 cp += pp->pr_size; 1259 } 1260 } 1261 1262 TAILQ_FOREACH(ph, &pp->pr_partpages, ph_pagelist) { 1263 cp = ph->ph_colored; 1264 n = ph->ph_nmissing; 1265 1266 do { 1267 XSIMPLEQ_FOREACH(pi, &ph->ph_itemlist, pi_list) { 1268 if (cp == (caddr_t)pi) 1269 break; 1270 } 1271 if (cp != (caddr_t)pi) { 1272 func(cp, full, pr); 1273 n--; 1274 } 1275 1276 cp += pp->pr_size; 1277 } while (n > 0); 1278 } 1279 } 1280 #endif 1281 1282 /* 1283 * We have three different sysctls. 1284 * kern.pool.npools - the number of pools. 1285 * kern.pool.pool.<pool#> - the pool struct for the pool#. 1286 * kern.pool.name.<pool#> - the name for pool#. 1287 */ 1288 int 1289 sysctl_dopool(int *name, u_int namelen, char *oldp, size_t *oldlenp) 1290 { 1291 struct kinfo_pool pi; 1292 struct pool *pp; 1293 int rv = ENOENT; 1294 1295 switch (name[0]) { 1296 case KERN_POOL_NPOOLS: 1297 if (namelen != 1) 1298 return (ENOTDIR); 1299 return (sysctl_rdint(oldp, oldlenp, NULL, pool_count)); 1300 1301 case KERN_POOL_NAME: 1302 case KERN_POOL_POOL: 1303 break; 1304 default: 1305 return (EOPNOTSUPP); 1306 } 1307 1308 if (namelen != 2) 1309 return (ENOTDIR); 1310 1311 rw_enter_read(&pool_lock); 1312 1313 SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist) { 1314 if (name[1] == pp->pr_serial) 1315 break; 1316 } 1317 1318 if (pp == NULL) 1319 goto done; 1320 1321 switch (name[0]) { 1322 case KERN_POOL_NAME: 1323 rv = sysctl_rdstring(oldp, oldlenp, NULL, pp->pr_wchan); 1324 break; 1325 case KERN_POOL_POOL: 1326 memset(&pi, 0, sizeof(pi)); 1327 1328 if (pp->pr_ipl != -1) 1329 mtx_enter(&pp->pr_mtx); 1330 pi.pr_size = pp->pr_size; 1331 pi.pr_pgsize = pp->pr_pgsize; 1332 pi.pr_itemsperpage = pp->pr_itemsperpage; 1333 pi.pr_npages = pp->pr_npages; 1334 pi.pr_minpages = pp->pr_minpages; 1335 pi.pr_maxpages = pp->pr_maxpages; 1336 pi.pr_hardlimit = pp->pr_hardlimit; 1337 pi.pr_nout = pp->pr_nout; 1338 pi.pr_nitems = pp->pr_nitems; 1339 pi.pr_nget = pp->pr_nget; 1340 pi.pr_nput = pp->pr_nput; 1341 pi.pr_nfail = pp->pr_nfail; 1342 pi.pr_npagealloc = pp->pr_npagealloc; 1343 pi.pr_npagefree = pp->pr_npagefree; 1344 pi.pr_hiwat = pp->pr_hiwat; 1345 pi.pr_nidle = pp->pr_nidle; 1346 if (pp->pr_ipl != -1) 1347 mtx_leave(&pp->pr_mtx); 1348 1349 rv = sysctl_rdstruct(oldp, oldlenp, NULL, &pi, sizeof(pi)); 1350 break; 1351 } 1352 1353 done: 1354 rw_exit_read(&pool_lock); 1355 1356 return (rv); 1357 } 1358 1359 void 1360 pool_gc_sched(void *null) 1361 { 1362 task_add(systqmp, &pool_gc_task); 1363 } 1364 1365 void 1366 pool_gc_pages(void *null) 1367 { 1368 struct pool *pp; 1369 struct pool_item_header *ph, *freeph; 1370 int s; 1371 1372 rw_enter_read(&pool_lock); 1373 s = splvm(); /* XXX go to splvm until all pools _setipl properly */ 1374 SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist) { 1375 if (pp->pr_nidle <= pp->pr_minpages || /* guess */ 1376 !mtx_enter_try(&pp->pr_mtx)) /* try */ 1377 continue; 1378 1379 /* is it time to free a page? */ 1380 if (pp->pr_nidle > pp->pr_minpages && 1381 (ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL && 1382 (ticks - ph->ph_tick) > (hz * pool_wait_gc)) { 1383 freeph = ph; 1384 pool_p_remove(pp, freeph); 1385 } else 1386 freeph = NULL; 1387 1388 mtx_leave(&pp->pr_mtx); 1389 1390 if (freeph != NULL) 1391 pool_p_free(pp, freeph); 1392 } 1393 splx(s); 1394 rw_exit_read(&pool_lock); 1395 1396 timeout_add_sec(&pool_gc_tick, 1); 1397 } 1398 1399 /* 1400 * Pool backend allocators. 1401 */ 1402 1403 void * 1404 pool_allocator_alloc(struct pool *pp, int flags, int *slowdown) 1405 { 1406 void *v; 1407 1408 v = (*pp->pr_alloc->pa_alloc)(pp, flags, slowdown); 1409 1410 #ifdef DIAGNOSTIC 1411 if (v != NULL && POOL_INPGHDR(pp)) { 1412 vaddr_t addr = (vaddr_t)v; 1413 if ((addr & pp->pr_pgmask) != addr) { 1414 panic("%s: %s page address %p isnt aligned to %u", 1415 __func__, pp->pr_wchan, v, pp->pr_pgsize); 1416 } 1417 } 1418 #endif 1419 1420 return (v); 1421 } 1422 1423 void 1424 pool_allocator_free(struct pool *pp, void *v) 1425 { 1426 struct pool_allocator *pa = pp->pr_alloc; 1427 1428 (*pa->pa_free)(pp, v); 1429 } 1430 1431 void * 1432 pool_page_alloc(struct pool *pp, int flags, int *slowdown) 1433 { 1434 struct kmem_dyn_mode kd = KMEM_DYN_INITIALIZER; 1435 1436 kd.kd_waitok = ISSET(flags, PR_WAITOK); 1437 kd.kd_slowdown = slowdown; 1438 1439 return (km_alloc(pp->pr_pgsize, &kv_page, pp->pr_crange, &kd)); 1440 } 1441 1442 void 1443 pool_page_free(struct pool *pp, void *v) 1444 { 1445 km_free(v, pp->pr_pgsize, &kv_page, pp->pr_crange); 1446 } 1447 1448 void * 1449 pool_multi_alloc(struct pool *pp, int flags, int *slowdown) 1450 { 1451 struct kmem_va_mode kv = kv_intrsafe; 1452 struct kmem_dyn_mode kd = KMEM_DYN_INITIALIZER; 1453 void *v; 1454 int s; 1455 1456 if (POOL_INPGHDR(pp)) 1457 kv.kv_align = pp->pr_pgsize; 1458 1459 kd.kd_waitok = ISSET(flags, PR_WAITOK); 1460 kd.kd_slowdown = slowdown; 1461 1462 s = splvm(); 1463 v = km_alloc(pp->pr_pgsize, &kv, pp->pr_crange, &kd); 1464 splx(s); 1465 1466 return (v); 1467 } 1468 1469 void 1470 pool_multi_free(struct pool *pp, void *v) 1471 { 1472 struct kmem_va_mode kv = kv_intrsafe; 1473 int s; 1474 1475 if (POOL_INPGHDR(pp)) 1476 kv.kv_align = pp->pr_pgsize; 1477 1478 s = splvm(); 1479 km_free(v, pp->pr_pgsize, &kv, pp->pr_crange); 1480 splx(s); 1481 } 1482 1483 void * 1484 pool_multi_alloc_ni(struct pool *pp, int flags, int *slowdown) 1485 { 1486 struct kmem_va_mode kv = kv_any; 1487 struct kmem_dyn_mode kd = KMEM_DYN_INITIALIZER; 1488 void *v; 1489 1490 if (POOL_INPGHDR(pp)) 1491 kv.kv_align = pp->pr_pgsize; 1492 1493 kd.kd_waitok = ISSET(flags, PR_WAITOK); 1494 kd.kd_slowdown = slowdown; 1495 1496 KERNEL_LOCK(); 1497 v = km_alloc(pp->pr_pgsize, &kv, pp->pr_crange, &kd); 1498 KERNEL_UNLOCK(); 1499 1500 return (v); 1501 } 1502 1503 void 1504 pool_multi_free_ni(struct pool *pp, void *v) 1505 { 1506 struct kmem_va_mode kv = kv_any; 1507 1508 if (POOL_INPGHDR(pp)) 1509 kv.kv_align = pp->pr_pgsize; 1510 1511 KERNEL_LOCK(); 1512 km_free(v, pp->pr_pgsize, &kv, pp->pr_crange); 1513 KERNEL_UNLOCK(); 1514 } 1515