1 /* $OpenBSD: subr_pool.c,v 1.191 2015/09/08 13:37:21 kettenis Exp $ */ 2 /* $NetBSD: subr_pool.c,v 1.61 2001/09/26 07:14:56 chs Exp $ */ 3 4 /*- 5 * Copyright (c) 1997, 1999, 2000 The NetBSD Foundation, Inc. 6 * All rights reserved. 7 * 8 * This code is derived from software contributed to The NetBSD Foundation 9 * by Paul Kranenburg; by Jason R. Thorpe of the Numerical Aerospace 10 * Simulation Facility, NASA Ames Research Center. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 23 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 24 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 25 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31 * POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include <sys/param.h> 35 #include <sys/systm.h> 36 #include <sys/errno.h> 37 #include <sys/kernel.h> 38 #include <sys/malloc.h> 39 #include <sys/pool.h> 40 #include <sys/syslog.h> 41 #include <sys/rwlock.h> 42 #include <sys/sysctl.h> 43 #include <sys/task.h> 44 #include <sys/timeout.h> 45 46 #include <uvm/uvm_extern.h> 47 48 /* 49 * Pool resource management utility. 50 * 51 * Memory is allocated in pages which are split into pieces according to 52 * the pool item size. Each page is kept on one of three lists in the 53 * pool structure: `pr_emptypages', `pr_fullpages' and `pr_partpages', 54 * for empty, full and partially-full pages respectively. The individual 55 * pool items are on a linked list headed by `ph_itemlist' in each page 56 * header. The memory for building the page list is either taken from 57 * the allocated pages themselves (for small pool items) or taken from 58 * an internal pool of page headers (`phpool'). 59 */ 60 61 /* List of all pools */ 62 SIMPLEQ_HEAD(,pool) pool_head = SIMPLEQ_HEAD_INITIALIZER(pool_head); 63 64 /* 65 * Every pool gets a unique serial number assigned to it. If this counter 66 * wraps, we're screwed, but we shouldn't create so many pools anyway. 67 */ 68 unsigned int pool_serial; 69 unsigned int pool_count; 70 71 /* Lock the previous variables making up the global pool state */ 72 struct rwlock pool_lock = RWLOCK_INITIALIZER("pools"); 73 74 /* Private pool for page header structures */ 75 struct pool phpool; 76 77 struct pool_item_header { 78 /* Page headers */ 79 TAILQ_ENTRY(pool_item_header) 80 ph_pagelist; /* pool page list */ 81 XSIMPLEQ_HEAD(,pool_item) ph_itemlist; /* chunk list for this page */ 82 RB_ENTRY(pool_item_header) 83 ph_node; /* Off-page page headers */ 84 int ph_nmissing; /* # of chunks in use */ 85 caddr_t ph_page; /* this page's address */ 86 caddr_t ph_colored; /* page's colored address */ 87 u_long ph_magic; 88 int ph_tick; 89 }; 90 #define POOL_MAGICBIT (1 << 3) /* keep away from perturbed low bits */ 91 #define POOL_PHPOISON(ph) ISSET((ph)->ph_magic, POOL_MAGICBIT) 92 93 struct pool_item { 94 u_long pi_magic; 95 XSIMPLEQ_ENTRY(pool_item) pi_list; 96 }; 97 #define POOL_IMAGIC(ph, pi) ((u_long)(pi) ^ (ph)->ph_magic) 98 99 #ifdef POOL_DEBUG 100 int pool_debug = 1; 101 #else 102 int pool_debug = 0; 103 #endif 104 105 #define POOL_INPGHDR(pp) ((pp)->pr_phoffset != 0) 106 107 struct pool_item_header * 108 pool_p_alloc(struct pool *, int, int *); 109 void pool_p_insert(struct pool *, struct pool_item_header *); 110 void pool_p_remove(struct pool *, struct pool_item_header *); 111 void pool_p_free(struct pool *, struct pool_item_header *); 112 113 void pool_update_curpage(struct pool *); 114 void *pool_do_get(struct pool *, int, int *); 115 int pool_chk_page(struct pool *, struct pool_item_header *, int); 116 int pool_chk(struct pool *); 117 void pool_get_done(void *, void *); 118 void pool_runqueue(struct pool *, int); 119 120 void *pool_allocator_alloc(struct pool *, int, int *); 121 void pool_allocator_free(struct pool *, void *); 122 123 /* 124 * The default pool allocator. 125 */ 126 void *pool_page_alloc(struct pool *, int, int *); 127 void pool_page_free(struct pool *, void *); 128 129 /* 130 * safe for interrupts, name preserved for compat this is the default 131 * allocator 132 */ 133 struct pool_allocator pool_allocator_nointr = { 134 pool_page_alloc, 135 pool_page_free 136 }; 137 138 void *pool_large_alloc(struct pool *, int, int *); 139 void pool_large_free(struct pool *, void *); 140 141 struct pool_allocator pool_allocator_large = { 142 pool_large_alloc, 143 pool_large_free 144 }; 145 146 void *pool_large_alloc_ni(struct pool *, int, int *); 147 void pool_large_free_ni(struct pool *, void *); 148 149 struct pool_allocator pool_allocator_large_ni = { 150 pool_large_alloc_ni, 151 pool_large_free_ni 152 }; 153 154 #ifdef DDB 155 void pool_print_pagelist(struct pool_pagelist *, int (*)(const char *, ...) 156 __attribute__((__format__(__kprintf__,1,2)))); 157 void pool_print1(struct pool *, const char *, int (*)(const char *, ...) 158 __attribute__((__format__(__kprintf__,1,2)))); 159 #endif 160 161 /* stale page garbage collectors */ 162 void pool_gc_sched(void *); 163 struct timeout pool_gc_tick = TIMEOUT_INITIALIZER(pool_gc_sched, NULL); 164 void pool_gc_pages(void *); 165 struct task pool_gc_task = TASK_INITIALIZER(pool_gc_pages, NULL); 166 int pool_wait_free = 1; 167 int pool_wait_gc = 8; 168 169 static inline int 170 phtree_compare(struct pool_item_header *a, struct pool_item_header *b) 171 { 172 vaddr_t va = (vaddr_t)a->ph_page; 173 vaddr_t vb = (vaddr_t)b->ph_page; 174 175 /* the compares in this order are important for the NFIND to work */ 176 if (vb < va) 177 return (-1); 178 if (vb > va) 179 return (1); 180 181 return (0); 182 } 183 184 RB_PROTOTYPE(phtree, pool_item_header, ph_node, phtree_compare); 185 RB_GENERATE(phtree, pool_item_header, ph_node, phtree_compare); 186 187 /* 188 * Return the pool page header based on page address. 189 */ 190 static inline struct pool_item_header * 191 pr_find_pagehead(struct pool *pp, void *v) 192 { 193 struct pool_item_header *ph, key; 194 195 if (POOL_INPGHDR(pp)) { 196 caddr_t page; 197 198 page = (caddr_t)((vaddr_t)v & pp->pr_pgmask); 199 200 return ((struct pool_item_header *)(page + pp->pr_phoffset)); 201 } 202 203 key.ph_page = v; 204 ph = RB_NFIND(phtree, &pp->pr_phtree, &key); 205 if (ph == NULL) 206 panic("%s: %s: page header missing", __func__, pp->pr_wchan); 207 208 KASSERT(ph->ph_page <= (caddr_t)v); 209 if (ph->ph_page + pp->pr_pgsize <= (caddr_t)v) 210 panic("%s: %s: incorrect page", __func__, pp->pr_wchan); 211 212 return (ph); 213 } 214 215 /* 216 * Initialize the given pool resource structure. 217 * 218 * We export this routine to allow other kernel parts to declare 219 * static pools that must be initialized before malloc() is available. 220 */ 221 void 222 pool_init(struct pool *pp, size_t size, u_int align, u_int ioff, int flags, 223 const char *wchan, struct pool_allocator *palloc) 224 { 225 int off = 0, space; 226 unsigned int pgsize = PAGE_SIZE, items; 227 #ifdef DIAGNOSTIC 228 struct pool *iter; 229 KASSERT(ioff == 0); 230 #endif 231 232 if (align == 0) 233 align = ALIGN(1); 234 235 if (size < sizeof(struct pool_item)) 236 size = sizeof(struct pool_item); 237 238 size = roundup(size, align); 239 240 if (palloc == NULL) { 241 while (size * 8 > pgsize) 242 pgsize <<= 1; 243 244 if (pgsize > PAGE_SIZE) { 245 palloc = ISSET(flags, PR_WAITOK) ? 246 &pool_allocator_large_ni : &pool_allocator_large; 247 } else 248 palloc = &pool_allocator_nointr; 249 } else 250 pgsize = palloc->pa_pagesz ? palloc->pa_pagesz : PAGE_SIZE; 251 252 items = pgsize / size; 253 254 /* 255 * Decide whether to put the page header off page to avoid 256 * wasting too large a part of the page. Off-page page headers 257 * go into an RB tree, so we can match a returned item with 258 * its header based on the page address. 259 */ 260 if (pgsize - (size * items) > sizeof(struct pool_item_header)) { 261 off = pgsize - sizeof(struct pool_item_header); 262 } else if (sizeof(struct pool_item_header) * 2 >= size) { 263 off = pgsize - sizeof(struct pool_item_header); 264 items = off / size; 265 } 266 267 KASSERT(items > 0); 268 269 /* 270 * Initialize the pool structure. 271 */ 272 memset(pp, 0, sizeof(*pp)); 273 TAILQ_INIT(&pp->pr_emptypages); 274 TAILQ_INIT(&pp->pr_fullpages); 275 TAILQ_INIT(&pp->pr_partpages); 276 pp->pr_curpage = NULL; 277 pp->pr_npages = 0; 278 pp->pr_minitems = 0; 279 pp->pr_minpages = 0; 280 pp->pr_maxpages = 8; 281 pp->pr_size = size; 282 pp->pr_pgsize = pgsize; 283 pp->pr_pgmask = ~0UL ^ (pgsize - 1); 284 pp->pr_phoffset = off; 285 pp->pr_itemsperpage = items; 286 pp->pr_wchan = wchan; 287 pp->pr_alloc = palloc; 288 pp->pr_nitems = 0; 289 pp->pr_nout = 0; 290 pp->pr_hardlimit = UINT_MAX; 291 pp->pr_hardlimit_warning = NULL; 292 pp->pr_hardlimit_ratecap.tv_sec = 0; 293 pp->pr_hardlimit_ratecap.tv_usec = 0; 294 pp->pr_hardlimit_warning_last.tv_sec = 0; 295 pp->pr_hardlimit_warning_last.tv_usec = 0; 296 RB_INIT(&pp->pr_phtree); 297 298 /* 299 * Use the space between the chunks and the page header 300 * for cache coloring. 301 */ 302 space = POOL_INPGHDR(pp) ? pp->pr_phoffset : pp->pr_pgsize; 303 space -= pp->pr_itemsperpage * pp->pr_size; 304 pp->pr_align = align; 305 pp->pr_maxcolors = (space / align) + 1; 306 307 pp->pr_nget = 0; 308 pp->pr_nfail = 0; 309 pp->pr_nput = 0; 310 pp->pr_npagealloc = 0; 311 pp->pr_npagefree = 0; 312 pp->pr_hiwat = 0; 313 pp->pr_nidle = 0; 314 315 pp->pr_ipl = -1; 316 mtx_init(&pp->pr_mtx, IPL_NONE); 317 mtx_init(&pp->pr_requests_mtx, IPL_NONE); 318 TAILQ_INIT(&pp->pr_requests); 319 320 if (phpool.pr_size == 0) { 321 pool_init(&phpool, sizeof(struct pool_item_header), 0, 0, 322 0, "phpool", NULL); 323 pool_setipl(&phpool, IPL_HIGH); 324 325 /* make sure phpool wont "recurse" */ 326 KASSERT(POOL_INPGHDR(&phpool)); 327 } 328 329 /* pglistalloc/constraint parameters */ 330 pp->pr_crange = &kp_dirty; 331 332 /* Insert this into the list of all pools. */ 333 rw_enter_write(&pool_lock); 334 #ifdef DIAGNOSTIC 335 SIMPLEQ_FOREACH(iter, &pool_head, pr_poollist) { 336 if (iter == pp) 337 panic("%s: pool %s already on list", __func__, wchan); 338 } 339 #endif 340 341 pp->pr_serial = ++pool_serial; 342 if (pool_serial == 0) 343 panic("%s: too much uptime", __func__); 344 345 SIMPLEQ_INSERT_HEAD(&pool_head, pp, pr_poollist); 346 pool_count++; 347 rw_exit_write(&pool_lock); 348 } 349 350 void 351 pool_setipl(struct pool *pp, int ipl) 352 { 353 pp->pr_ipl = ipl; 354 mtx_init(&pp->pr_mtx, ipl); 355 mtx_init(&pp->pr_requests_mtx, ipl); 356 } 357 358 /* 359 * Decommission a pool resource. 360 */ 361 void 362 pool_destroy(struct pool *pp) 363 { 364 struct pool_item_header *ph; 365 struct pool *prev, *iter; 366 367 #ifdef DIAGNOSTIC 368 if (pp->pr_nout != 0) 369 panic("%s: pool busy: still out: %u", __func__, pp->pr_nout); 370 #endif 371 372 /* Remove from global pool list */ 373 rw_enter_write(&pool_lock); 374 pool_count--; 375 if (pp == SIMPLEQ_FIRST(&pool_head)) 376 SIMPLEQ_REMOVE_HEAD(&pool_head, pr_poollist); 377 else { 378 prev = SIMPLEQ_FIRST(&pool_head); 379 SIMPLEQ_FOREACH(iter, &pool_head, pr_poollist) { 380 if (iter == pp) { 381 SIMPLEQ_REMOVE_AFTER(&pool_head, prev, 382 pr_poollist); 383 break; 384 } 385 prev = iter; 386 } 387 } 388 rw_exit_write(&pool_lock); 389 390 /* Remove all pages */ 391 while ((ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL) { 392 mtx_enter(&pp->pr_mtx); 393 pool_p_remove(pp, ph); 394 mtx_leave(&pp->pr_mtx); 395 pool_p_free(pp, ph); 396 } 397 KASSERT(TAILQ_EMPTY(&pp->pr_fullpages)); 398 KASSERT(TAILQ_EMPTY(&pp->pr_partpages)); 399 } 400 401 void 402 pool_request_init(struct pool_request *pr, 403 void (*handler)(void *, void *), void *cookie) 404 { 405 pr->pr_handler = handler; 406 pr->pr_cookie = cookie; 407 pr->pr_item = NULL; 408 } 409 410 void 411 pool_request(struct pool *pp, struct pool_request *pr) 412 { 413 mtx_enter(&pp->pr_requests_mtx); 414 TAILQ_INSERT_TAIL(&pp->pr_requests, pr, pr_entry); 415 pool_runqueue(pp, PR_NOWAIT); 416 mtx_leave(&pp->pr_requests_mtx); 417 } 418 419 struct pool_get_memory { 420 struct mutex mtx; 421 void * volatile v; 422 }; 423 424 /* 425 * Grab an item from the pool. 426 */ 427 void * 428 pool_get(struct pool *pp, int flags) 429 { 430 void *v = NULL; 431 int slowdown = 0; 432 433 KASSERT(flags & (PR_WAITOK | PR_NOWAIT)); 434 435 436 mtx_enter(&pp->pr_mtx); 437 if (pp->pr_nout >= pp->pr_hardlimit) { 438 if (ISSET(flags, PR_NOWAIT|PR_LIMITFAIL)) 439 goto fail; 440 } else if ((v = pool_do_get(pp, flags, &slowdown)) == NULL) { 441 if (ISSET(flags, PR_NOWAIT)) 442 goto fail; 443 } 444 mtx_leave(&pp->pr_mtx); 445 446 if (slowdown && ISSET(flags, PR_WAITOK)) 447 yield(); 448 449 if (v == NULL) { 450 struct pool_get_memory mem = { 451 MUTEX_INITIALIZER((pp->pr_ipl == -1) ? 452 IPL_NONE : pp->pr_ipl), NULL }; 453 struct pool_request pr; 454 455 pool_request_init(&pr, pool_get_done, &mem); 456 pool_request(pp, &pr); 457 458 mtx_enter(&mem.mtx); 459 while (mem.v == NULL) 460 msleep(&mem, &mem.mtx, PSWP, pp->pr_wchan, 0); 461 mtx_leave(&mem.mtx); 462 463 v = mem.v; 464 } 465 466 if (ISSET(flags, PR_ZERO)) 467 memset(v, 0, pp->pr_size); 468 469 return (v); 470 471 fail: 472 pp->pr_nfail++; 473 mtx_leave(&pp->pr_mtx); 474 return (NULL); 475 } 476 477 void 478 pool_get_done(void *xmem, void *v) 479 { 480 struct pool_get_memory *mem = xmem; 481 482 mtx_enter(&mem->mtx); 483 mem->v = v; 484 mtx_leave(&mem->mtx); 485 486 wakeup_one(mem); 487 } 488 489 void 490 pool_runqueue(struct pool *pp, int flags) 491 { 492 struct pool_requests prl = TAILQ_HEAD_INITIALIZER(prl); 493 struct pool_request *pr; 494 495 MUTEX_ASSERT_UNLOCKED(&pp->pr_mtx); 496 MUTEX_ASSERT_LOCKED(&pp->pr_requests_mtx); 497 498 if (pp->pr_requesting++) 499 return; 500 501 do { 502 pp->pr_requesting = 1; 503 504 /* no TAILQ_JOIN? :( */ 505 while ((pr = TAILQ_FIRST(&pp->pr_requests)) != NULL) { 506 TAILQ_REMOVE(&pp->pr_requests, pr, pr_entry); 507 TAILQ_INSERT_TAIL(&prl, pr, pr_entry); 508 } 509 if (TAILQ_EMPTY(&prl)) 510 continue; 511 512 mtx_leave(&pp->pr_requests_mtx); 513 514 mtx_enter(&pp->pr_mtx); 515 pr = TAILQ_FIRST(&prl); 516 while (pr != NULL) { 517 int slowdown = 0; 518 519 if (pp->pr_nout >= pp->pr_hardlimit) 520 break; 521 522 pr->pr_item = pool_do_get(pp, flags, &slowdown); 523 if (pr->pr_item == NULL) /* || slowdown ? */ 524 break; 525 526 pr = TAILQ_NEXT(pr, pr_entry); 527 } 528 mtx_leave(&pp->pr_mtx); 529 530 while ((pr = TAILQ_FIRST(&prl)) != NULL && 531 pr->pr_item != NULL) { 532 TAILQ_REMOVE(&prl, pr, pr_entry); 533 (*pr->pr_handler)(pr->pr_cookie, pr->pr_item); 534 } 535 536 mtx_enter(&pp->pr_requests_mtx); 537 } while (--pp->pr_requesting); 538 539 /* no TAILQ_JOIN :( */ 540 while ((pr = TAILQ_FIRST(&prl)) != NULL) { 541 TAILQ_REMOVE(&prl, pr, pr_entry); 542 TAILQ_INSERT_TAIL(&pp->pr_requests, pr, pr_entry); 543 } 544 } 545 546 void * 547 pool_do_get(struct pool *pp, int flags, int *slowdown) 548 { 549 struct pool_item *pi; 550 struct pool_item_header *ph; 551 552 MUTEX_ASSERT_LOCKED(&pp->pr_mtx); 553 554 if (pp->pr_ipl != -1) 555 splassert(pp->pr_ipl); 556 557 /* 558 * Account for this item now to avoid races if we need to give up 559 * pr_mtx to allocate a page. 560 */ 561 pp->pr_nout++; 562 563 if (pp->pr_curpage == NULL) { 564 mtx_leave(&pp->pr_mtx); 565 ph = pool_p_alloc(pp, flags, slowdown); 566 mtx_enter(&pp->pr_mtx); 567 568 if (ph == NULL) { 569 pp->pr_nout--; 570 return (NULL); 571 } 572 573 pool_p_insert(pp, ph); 574 } 575 576 ph = pp->pr_curpage; 577 pi = XSIMPLEQ_FIRST(&ph->ph_itemlist); 578 if (__predict_false(pi == NULL)) 579 panic("%s: %s: page empty", __func__, pp->pr_wchan); 580 581 if (__predict_false(pi->pi_magic != POOL_IMAGIC(ph, pi))) { 582 panic("%s: %s free list modified: " 583 "page %p; item addr %p; offset 0x%x=0x%lx != 0x%lx", 584 __func__, pp->pr_wchan, ph->ph_page, pi, 585 0, pi->pi_magic, POOL_IMAGIC(ph, pi)); 586 } 587 588 XSIMPLEQ_REMOVE_HEAD(&ph->ph_itemlist, pi_list); 589 590 #ifdef DIAGNOSTIC 591 if (pool_debug && POOL_PHPOISON(ph)) { 592 size_t pidx; 593 uint32_t pval; 594 if (poison_check(pi + 1, pp->pr_size - sizeof(*pi), 595 &pidx, &pval)) { 596 int *ip = (int *)(pi + 1); 597 panic("%s: %s free list modified: " 598 "page %p; item addr %p; offset 0x%zx=0x%x", 599 __func__, pp->pr_wchan, ph->ph_page, pi, 600 pidx * sizeof(int), ip[pidx]); 601 } 602 } 603 #endif /* DIAGNOSTIC */ 604 605 if (ph->ph_nmissing++ == 0) { 606 /* 607 * This page was previously empty. Move it to the list of 608 * partially-full pages. This page is already curpage. 609 */ 610 TAILQ_REMOVE(&pp->pr_emptypages, ph, ph_pagelist); 611 TAILQ_INSERT_TAIL(&pp->pr_partpages, ph, ph_pagelist); 612 613 pp->pr_nidle--; 614 } 615 616 if (ph->ph_nmissing == pp->pr_itemsperpage) { 617 /* 618 * This page is now full. Move it to the full list 619 * and select a new current page. 620 */ 621 TAILQ_REMOVE(&pp->pr_partpages, ph, ph_pagelist); 622 TAILQ_INSERT_TAIL(&pp->pr_fullpages, ph, ph_pagelist); 623 pool_update_curpage(pp); 624 } 625 626 pp->pr_nget++; 627 628 return (pi); 629 } 630 631 /* 632 * Return resource to the pool. 633 */ 634 void 635 pool_put(struct pool *pp, void *v) 636 { 637 struct pool_item *pi = v; 638 struct pool_item_header *ph, *freeph = NULL; 639 640 #ifdef DIAGNOSTIC 641 if (v == NULL) 642 panic("%s: NULL item", __func__); 643 #endif 644 645 mtx_enter(&pp->pr_mtx); 646 647 if (pp->pr_ipl != -1) 648 splassert(pp->pr_ipl); 649 650 ph = pr_find_pagehead(pp, v); 651 652 #ifdef DIAGNOSTIC 653 if (pool_debug) { 654 struct pool_item *qi; 655 XSIMPLEQ_FOREACH(qi, &ph->ph_itemlist, pi_list) { 656 if (pi == qi) { 657 panic("%s: %s: double pool_put: %p", __func__, 658 pp->pr_wchan, pi); 659 } 660 } 661 } 662 #endif /* DIAGNOSTIC */ 663 664 pi->pi_magic = POOL_IMAGIC(ph, pi); 665 XSIMPLEQ_INSERT_HEAD(&ph->ph_itemlist, pi, pi_list); 666 #ifdef DIAGNOSTIC 667 if (POOL_PHPOISON(ph)) 668 poison_mem(pi + 1, pp->pr_size - sizeof(*pi)); 669 #endif /* DIAGNOSTIC */ 670 671 if (ph->ph_nmissing-- == pp->pr_itemsperpage) { 672 /* 673 * The page was previously completely full, move it to the 674 * partially-full list. 675 */ 676 TAILQ_REMOVE(&pp->pr_fullpages, ph, ph_pagelist); 677 TAILQ_INSERT_TAIL(&pp->pr_partpages, ph, ph_pagelist); 678 } 679 680 if (ph->ph_nmissing == 0) { 681 /* 682 * The page is now empty, so move it to the empty page list. 683 */ 684 pp->pr_nidle++; 685 686 ph->ph_tick = ticks; 687 TAILQ_REMOVE(&pp->pr_partpages, ph, ph_pagelist); 688 TAILQ_INSERT_TAIL(&pp->pr_emptypages, ph, ph_pagelist); 689 pool_update_curpage(pp); 690 } 691 692 pp->pr_nout--; 693 pp->pr_nput++; 694 695 /* is it time to free a page? */ 696 if (pp->pr_nidle > pp->pr_maxpages && 697 (ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL && 698 (ticks - ph->ph_tick) > (hz * pool_wait_free)) { 699 freeph = ph; 700 pool_p_remove(pp, freeph); 701 } 702 mtx_leave(&pp->pr_mtx); 703 704 if (freeph != NULL) 705 pool_p_free(pp, freeph); 706 707 mtx_enter(&pp->pr_requests_mtx); 708 pool_runqueue(pp, PR_NOWAIT); 709 mtx_leave(&pp->pr_requests_mtx); 710 } 711 712 /* 713 * Add N items to the pool. 714 */ 715 int 716 pool_prime(struct pool *pp, int n) 717 { 718 struct pool_pagelist pl = TAILQ_HEAD_INITIALIZER(pl); 719 struct pool_item_header *ph; 720 int newpages; 721 722 newpages = roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 723 724 while (newpages-- > 0) { 725 int slowdown = 0; 726 727 ph = pool_p_alloc(pp, PR_NOWAIT, &slowdown); 728 if (ph == NULL) /* or slowdown? */ 729 break; 730 731 TAILQ_INSERT_TAIL(&pl, ph, ph_pagelist); 732 } 733 734 mtx_enter(&pp->pr_mtx); 735 while ((ph = TAILQ_FIRST(&pl)) != NULL) { 736 TAILQ_REMOVE(&pl, ph, ph_pagelist); 737 pool_p_insert(pp, ph); 738 } 739 mtx_leave(&pp->pr_mtx); 740 741 return (0); 742 } 743 744 struct pool_item_header * 745 pool_p_alloc(struct pool *pp, int flags, int *slowdown) 746 { 747 struct pool_item_header *ph; 748 struct pool_item *pi; 749 caddr_t addr; 750 int n; 751 752 MUTEX_ASSERT_UNLOCKED(&pp->pr_mtx); 753 KASSERT(pp->pr_size >= sizeof(*pi)); 754 755 addr = pool_allocator_alloc(pp, flags, slowdown); 756 if (addr == NULL) 757 return (NULL); 758 759 if (POOL_INPGHDR(pp)) 760 ph = (struct pool_item_header *)(addr + pp->pr_phoffset); 761 else { 762 ph = pool_get(&phpool, flags); 763 if (ph == NULL) { 764 pool_allocator_free(pp, addr); 765 return (NULL); 766 } 767 } 768 769 XSIMPLEQ_INIT(&ph->ph_itemlist); 770 ph->ph_page = addr; 771 addr += pp->pr_align * (pp->pr_npagealloc % pp->pr_maxcolors); 772 ph->ph_colored = addr; 773 ph->ph_nmissing = 0; 774 arc4random_buf(&ph->ph_magic, sizeof(ph->ph_magic)); 775 #ifdef DIAGNOSTIC 776 /* use a bit in ph_magic to record if we poison page items */ 777 if (pool_debug) 778 SET(ph->ph_magic, POOL_MAGICBIT); 779 else 780 CLR(ph->ph_magic, POOL_MAGICBIT); 781 #endif /* DIAGNOSTIC */ 782 783 n = pp->pr_itemsperpage; 784 while (n--) { 785 pi = (struct pool_item *)addr; 786 pi->pi_magic = POOL_IMAGIC(ph, pi); 787 XSIMPLEQ_INSERT_TAIL(&ph->ph_itemlist, pi, pi_list); 788 789 #ifdef DIAGNOSTIC 790 if (POOL_PHPOISON(ph)) 791 poison_mem(pi + 1, pp->pr_size - sizeof(*pi)); 792 #endif /* DIAGNOSTIC */ 793 794 addr += pp->pr_size; 795 } 796 797 return (ph); 798 } 799 800 void 801 pool_p_free(struct pool *pp, struct pool_item_header *ph) 802 { 803 struct pool_item *pi; 804 805 MUTEX_ASSERT_UNLOCKED(&pp->pr_mtx); 806 KASSERT(ph->ph_nmissing == 0); 807 808 XSIMPLEQ_FOREACH(pi, &ph->ph_itemlist, pi_list) { 809 if (__predict_false(pi->pi_magic != POOL_IMAGIC(ph, pi))) { 810 panic("%s: %s free list modified: " 811 "page %p; item addr %p; offset 0x%x=0x%lx", 812 __func__, pp->pr_wchan, ph->ph_page, pi, 813 0, pi->pi_magic); 814 } 815 816 #ifdef DIAGNOSTIC 817 if (POOL_PHPOISON(ph)) { 818 size_t pidx; 819 uint32_t pval; 820 if (poison_check(pi + 1, pp->pr_size - sizeof(*pi), 821 &pidx, &pval)) { 822 int *ip = (int *)(pi + 1); 823 panic("%s: %s free list modified: " 824 "page %p; item addr %p; offset 0x%zx=0x%x", 825 __func__, pp->pr_wchan, ph->ph_page, pi, 826 pidx * sizeof(int), ip[pidx]); 827 } 828 } 829 #endif 830 } 831 832 pool_allocator_free(pp, ph->ph_page); 833 834 if (!POOL_INPGHDR(pp)) 835 pool_put(&phpool, ph); 836 } 837 838 void 839 pool_p_insert(struct pool *pp, struct pool_item_header *ph) 840 { 841 MUTEX_ASSERT_LOCKED(&pp->pr_mtx); 842 843 /* If the pool was depleted, point at the new page */ 844 if (pp->pr_curpage == NULL) 845 pp->pr_curpage = ph; 846 847 TAILQ_INSERT_TAIL(&pp->pr_emptypages, ph, ph_pagelist); 848 if (!POOL_INPGHDR(pp)) 849 RB_INSERT(phtree, &pp->pr_phtree, ph); 850 851 pp->pr_nitems += pp->pr_itemsperpage; 852 pp->pr_nidle++; 853 854 pp->pr_npagealloc++; 855 if (++pp->pr_npages > pp->pr_hiwat) 856 pp->pr_hiwat = pp->pr_npages; 857 } 858 859 void 860 pool_p_remove(struct pool *pp, struct pool_item_header *ph) 861 { 862 MUTEX_ASSERT_LOCKED(&pp->pr_mtx); 863 864 pp->pr_npagefree++; 865 pp->pr_npages--; 866 pp->pr_nidle--; 867 pp->pr_nitems -= pp->pr_itemsperpage; 868 869 if (!POOL_INPGHDR(pp)) 870 RB_REMOVE(phtree, &pp->pr_phtree, ph); 871 TAILQ_REMOVE(&pp->pr_emptypages, ph, ph_pagelist); 872 873 pool_update_curpage(pp); 874 } 875 876 void 877 pool_update_curpage(struct pool *pp) 878 { 879 pp->pr_curpage = TAILQ_LAST(&pp->pr_partpages, pool_pagelist); 880 if (pp->pr_curpage == NULL) { 881 pp->pr_curpage = TAILQ_LAST(&pp->pr_emptypages, pool_pagelist); 882 } 883 } 884 885 void 886 pool_setlowat(struct pool *pp, int n) 887 { 888 int prime = 0; 889 890 mtx_enter(&pp->pr_mtx); 891 pp->pr_minitems = n; 892 pp->pr_minpages = (n == 0) 893 ? 0 894 : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 895 896 if (pp->pr_nitems < n) 897 prime = n - pp->pr_nitems; 898 mtx_leave(&pp->pr_mtx); 899 900 if (prime > 0) 901 pool_prime(pp, prime); 902 } 903 904 void 905 pool_sethiwat(struct pool *pp, int n) 906 { 907 pp->pr_maxpages = (n == 0) 908 ? 0 909 : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 910 } 911 912 int 913 pool_sethardlimit(struct pool *pp, u_int n, const char *warnmsg, int ratecap) 914 { 915 int error = 0; 916 917 if (n < pp->pr_nout) { 918 error = EINVAL; 919 goto done; 920 } 921 922 pp->pr_hardlimit = n; 923 pp->pr_hardlimit_warning = warnmsg; 924 pp->pr_hardlimit_ratecap.tv_sec = ratecap; 925 pp->pr_hardlimit_warning_last.tv_sec = 0; 926 pp->pr_hardlimit_warning_last.tv_usec = 0; 927 928 done: 929 return (error); 930 } 931 932 void 933 pool_set_constraints(struct pool *pp, const struct kmem_pa_mode *mode) 934 { 935 pp->pr_crange = mode; 936 } 937 938 /* 939 * Release all complete pages that have not been used recently. 940 * 941 * Returns non-zero if any pages have been reclaimed. 942 */ 943 int 944 pool_reclaim(struct pool *pp) 945 { 946 struct pool_item_header *ph, *phnext; 947 struct pool_pagelist pl = TAILQ_HEAD_INITIALIZER(pl); 948 949 mtx_enter(&pp->pr_mtx); 950 for (ph = TAILQ_FIRST(&pp->pr_emptypages); ph != NULL; ph = phnext) { 951 phnext = TAILQ_NEXT(ph, ph_pagelist); 952 953 /* Check our minimum page claim */ 954 if (pp->pr_npages <= pp->pr_minpages) 955 break; 956 957 /* 958 * If freeing this page would put us below 959 * the low water mark, stop now. 960 */ 961 if ((pp->pr_nitems - pp->pr_itemsperpage) < 962 pp->pr_minitems) 963 break; 964 965 pool_p_remove(pp, ph); 966 TAILQ_INSERT_TAIL(&pl, ph, ph_pagelist); 967 } 968 mtx_leave(&pp->pr_mtx); 969 970 if (TAILQ_EMPTY(&pl)) 971 return (0); 972 973 while ((ph = TAILQ_FIRST(&pl)) != NULL) { 974 TAILQ_REMOVE(&pl, ph, ph_pagelist); 975 pool_p_free(pp, ph); 976 } 977 978 return (1); 979 } 980 981 /* 982 * Release all complete pages that have not been used recently 983 * from all pools. 984 */ 985 void 986 pool_reclaim_all(void) 987 { 988 struct pool *pp; 989 990 rw_enter_read(&pool_lock); 991 SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist) 992 pool_reclaim(pp); 993 rw_exit_read(&pool_lock); 994 } 995 996 #ifdef DDB 997 #include <machine/db_machdep.h> 998 #include <ddb/db_output.h> 999 1000 /* 1001 * Diagnostic helpers. 1002 */ 1003 void 1004 pool_printit(struct pool *pp, const char *modif, 1005 int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2)))) 1006 { 1007 pool_print1(pp, modif, pr); 1008 } 1009 1010 void 1011 pool_print_pagelist(struct pool_pagelist *pl, 1012 int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2)))) 1013 { 1014 struct pool_item_header *ph; 1015 struct pool_item *pi; 1016 1017 TAILQ_FOREACH(ph, pl, ph_pagelist) { 1018 (*pr)("\t\tpage %p, color %p, nmissing %d\n", 1019 ph->ph_page, ph->ph_colored, ph->ph_nmissing); 1020 XSIMPLEQ_FOREACH(pi, &ph->ph_itemlist, pi_list) { 1021 if (pi->pi_magic != POOL_IMAGIC(ph, pi)) { 1022 (*pr)("\t\t\titem %p, magic 0x%lx\n", 1023 pi, pi->pi_magic); 1024 } 1025 } 1026 } 1027 } 1028 1029 void 1030 pool_print1(struct pool *pp, const char *modif, 1031 int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2)))) 1032 { 1033 struct pool_item_header *ph; 1034 int print_pagelist = 0; 1035 char c; 1036 1037 while ((c = *modif++) != '\0') { 1038 if (c == 'p') 1039 print_pagelist = 1; 1040 modif++; 1041 } 1042 1043 (*pr)("POOL %s: size %u maxcolors %u\n", pp->pr_wchan, pp->pr_size, 1044 pp->pr_maxcolors); 1045 (*pr)("\talloc %p\n", pp->pr_alloc); 1046 (*pr)("\tminitems %u, minpages %u, maxpages %u, npages %u\n", 1047 pp->pr_minitems, pp->pr_minpages, pp->pr_maxpages, pp->pr_npages); 1048 (*pr)("\titemsperpage %u, nitems %u, nout %u, hardlimit %u\n", 1049 pp->pr_itemsperpage, pp->pr_nitems, pp->pr_nout, pp->pr_hardlimit); 1050 1051 (*pr)("\n\tnget %lu, nfail %lu, nput %lu\n", 1052 pp->pr_nget, pp->pr_nfail, pp->pr_nput); 1053 (*pr)("\tnpagealloc %lu, npagefree %lu, hiwat %u, nidle %lu\n", 1054 pp->pr_npagealloc, pp->pr_npagefree, pp->pr_hiwat, pp->pr_nidle); 1055 1056 if (print_pagelist == 0) 1057 return; 1058 1059 if ((ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL) 1060 (*pr)("\n\tempty page list:\n"); 1061 pool_print_pagelist(&pp->pr_emptypages, pr); 1062 if ((ph = TAILQ_FIRST(&pp->pr_fullpages)) != NULL) 1063 (*pr)("\n\tfull page list:\n"); 1064 pool_print_pagelist(&pp->pr_fullpages, pr); 1065 if ((ph = TAILQ_FIRST(&pp->pr_partpages)) != NULL) 1066 (*pr)("\n\tpartial-page list:\n"); 1067 pool_print_pagelist(&pp->pr_partpages, pr); 1068 1069 if (pp->pr_curpage == NULL) 1070 (*pr)("\tno current page\n"); 1071 else 1072 (*pr)("\tcurpage %p\n", pp->pr_curpage->ph_page); 1073 } 1074 1075 void 1076 db_show_all_pools(db_expr_t expr, int haddr, db_expr_t count, char *modif) 1077 { 1078 struct pool *pp; 1079 char maxp[16]; 1080 int ovflw; 1081 char mode; 1082 1083 mode = modif[0]; 1084 if (mode != '\0' && mode != 'a') { 1085 db_printf("usage: show all pools [/a]\n"); 1086 return; 1087 } 1088 1089 if (mode == '\0') 1090 db_printf("%-10s%4s%9s%5s%9s%6s%6s%6s%6s%6s%6s%5s\n", 1091 "Name", 1092 "Size", 1093 "Requests", 1094 "Fail", 1095 "Releases", 1096 "Pgreq", 1097 "Pgrel", 1098 "Npage", 1099 "Hiwat", 1100 "Minpg", 1101 "Maxpg", 1102 "Idle"); 1103 else 1104 db_printf("%-12s %18s %18s\n", 1105 "Name", "Address", "Allocator"); 1106 1107 SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist) { 1108 if (mode == 'a') { 1109 db_printf("%-12s %18p %18p\n", pp->pr_wchan, pp, 1110 pp->pr_alloc); 1111 continue; 1112 } 1113 1114 if (!pp->pr_nget) 1115 continue; 1116 1117 if (pp->pr_maxpages == UINT_MAX) 1118 snprintf(maxp, sizeof maxp, "inf"); 1119 else 1120 snprintf(maxp, sizeof maxp, "%u", pp->pr_maxpages); 1121 1122 #define PRWORD(ovflw, fmt, width, fixed, val) do { \ 1123 (ovflw) += db_printf((fmt), \ 1124 (width) - (fixed) - (ovflw) > 0 ? \ 1125 (width) - (fixed) - (ovflw) : 0, \ 1126 (val)) - (width); \ 1127 if ((ovflw) < 0) \ 1128 (ovflw) = 0; \ 1129 } while (/* CONSTCOND */0) 1130 1131 ovflw = 0; 1132 PRWORD(ovflw, "%-*s", 10, 0, pp->pr_wchan); 1133 PRWORD(ovflw, " %*u", 4, 1, pp->pr_size); 1134 PRWORD(ovflw, " %*lu", 9, 1, pp->pr_nget); 1135 PRWORD(ovflw, " %*lu", 5, 1, pp->pr_nfail); 1136 PRWORD(ovflw, " %*lu", 9, 1, pp->pr_nput); 1137 PRWORD(ovflw, " %*lu", 6, 1, pp->pr_npagealloc); 1138 PRWORD(ovflw, " %*lu", 6, 1, pp->pr_npagefree); 1139 PRWORD(ovflw, " %*d", 6, 1, pp->pr_npages); 1140 PRWORD(ovflw, " %*d", 6, 1, pp->pr_hiwat); 1141 PRWORD(ovflw, " %*d", 6, 1, pp->pr_minpages); 1142 PRWORD(ovflw, " %*s", 6, 1, maxp); 1143 PRWORD(ovflw, " %*lu\n", 5, 1, pp->pr_nidle); 1144 1145 pool_chk(pp); 1146 } 1147 } 1148 #endif /* DDB */ 1149 1150 #if defined(POOL_DEBUG) || defined(DDB) 1151 int 1152 pool_chk_page(struct pool *pp, struct pool_item_header *ph, int expected) 1153 { 1154 struct pool_item *pi; 1155 caddr_t page; 1156 int n; 1157 const char *label = pp->pr_wchan; 1158 1159 page = (caddr_t)((u_long)ph & pp->pr_pgmask); 1160 if (page != ph->ph_page && POOL_INPGHDR(pp)) { 1161 printf("%s: ", label); 1162 printf("pool(%p:%s): page inconsistency: page %p; " 1163 "at page head addr %p (p %p)\n", 1164 pp, pp->pr_wchan, ph->ph_page, ph, page); 1165 return 1; 1166 } 1167 1168 for (pi = XSIMPLEQ_FIRST(&ph->ph_itemlist), n = 0; 1169 pi != NULL; 1170 pi = XSIMPLEQ_NEXT(&ph->ph_itemlist, pi, pi_list), n++) { 1171 if ((caddr_t)pi < ph->ph_page || 1172 (caddr_t)pi >= ph->ph_page + pp->pr_pgsize) { 1173 printf("%s: ", label); 1174 printf("pool(%p:%s): page inconsistency: page %p;" 1175 " item ordinal %d; addr %p\n", pp, 1176 pp->pr_wchan, ph->ph_page, n, pi); 1177 return (1); 1178 } 1179 1180 if (pi->pi_magic != POOL_IMAGIC(ph, pi)) { 1181 printf("%s: ", label); 1182 printf("pool(%p:%s): free list modified: " 1183 "page %p; item ordinal %d; addr %p " 1184 "(p %p); offset 0x%x=0x%lx\n", 1185 pp, pp->pr_wchan, ph->ph_page, n, pi, page, 1186 0, pi->pi_magic); 1187 } 1188 1189 #ifdef DIAGNOSTIC 1190 if (POOL_PHPOISON(ph)) { 1191 size_t pidx; 1192 uint32_t pval; 1193 if (poison_check(pi + 1, pp->pr_size - sizeof(*pi), 1194 &pidx, &pval)) { 1195 int *ip = (int *)(pi + 1); 1196 printf("pool(%s): free list modified: " 1197 "page %p; item ordinal %d; addr %p " 1198 "(p %p); offset 0x%zx=0x%x\n", 1199 pp->pr_wchan, ph->ph_page, n, pi, 1200 page, pidx * sizeof(int), ip[pidx]); 1201 } 1202 } 1203 #endif /* DIAGNOSTIC */ 1204 } 1205 if (n + ph->ph_nmissing != pp->pr_itemsperpage) { 1206 printf("pool(%p:%s): page inconsistency: page %p;" 1207 " %d on list, %d missing, %d items per page\n", pp, 1208 pp->pr_wchan, ph->ph_page, n, ph->ph_nmissing, 1209 pp->pr_itemsperpage); 1210 return 1; 1211 } 1212 if (expected >= 0 && n != expected) { 1213 printf("pool(%p:%s): page inconsistency: page %p;" 1214 " %d on list, %d missing, %d expected\n", pp, 1215 pp->pr_wchan, ph->ph_page, n, ph->ph_nmissing, 1216 expected); 1217 return 1; 1218 } 1219 return 0; 1220 } 1221 1222 int 1223 pool_chk(struct pool *pp) 1224 { 1225 struct pool_item_header *ph; 1226 int r = 0; 1227 1228 TAILQ_FOREACH(ph, &pp->pr_emptypages, ph_pagelist) 1229 r += pool_chk_page(pp, ph, pp->pr_itemsperpage); 1230 TAILQ_FOREACH(ph, &pp->pr_fullpages, ph_pagelist) 1231 r += pool_chk_page(pp, ph, 0); 1232 TAILQ_FOREACH(ph, &pp->pr_partpages, ph_pagelist) 1233 r += pool_chk_page(pp, ph, -1); 1234 1235 return (r); 1236 } 1237 #endif /* defined(POOL_DEBUG) || defined(DDB) */ 1238 1239 #ifdef DDB 1240 void 1241 pool_walk(struct pool *pp, int full, 1242 int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2))), 1243 void (*func)(void *, int, int (*)(const char *, ...) 1244 __attribute__((__format__(__kprintf__,1,2))))) 1245 { 1246 struct pool_item_header *ph; 1247 struct pool_item *pi; 1248 caddr_t cp; 1249 int n; 1250 1251 TAILQ_FOREACH(ph, &pp->pr_fullpages, ph_pagelist) { 1252 cp = ph->ph_colored; 1253 n = ph->ph_nmissing; 1254 1255 while (n--) { 1256 func(cp, full, pr); 1257 cp += pp->pr_size; 1258 } 1259 } 1260 1261 TAILQ_FOREACH(ph, &pp->pr_partpages, ph_pagelist) { 1262 cp = ph->ph_colored; 1263 n = ph->ph_nmissing; 1264 1265 do { 1266 XSIMPLEQ_FOREACH(pi, &ph->ph_itemlist, pi_list) { 1267 if (cp == (caddr_t)pi) 1268 break; 1269 } 1270 if (cp != (caddr_t)pi) { 1271 func(cp, full, pr); 1272 n--; 1273 } 1274 1275 cp += pp->pr_size; 1276 } while (n > 0); 1277 } 1278 } 1279 #endif 1280 1281 /* 1282 * We have three different sysctls. 1283 * kern.pool.npools - the number of pools. 1284 * kern.pool.pool.<pool#> - the pool struct for the pool#. 1285 * kern.pool.name.<pool#> - the name for pool#. 1286 */ 1287 int 1288 sysctl_dopool(int *name, u_int namelen, char *oldp, size_t *oldlenp) 1289 { 1290 struct kinfo_pool pi; 1291 struct pool *pp; 1292 int rv = ENOENT; 1293 1294 switch (name[0]) { 1295 case KERN_POOL_NPOOLS: 1296 if (namelen != 1) 1297 return (ENOTDIR); 1298 return (sysctl_rdint(oldp, oldlenp, NULL, pool_count)); 1299 1300 case KERN_POOL_NAME: 1301 case KERN_POOL_POOL: 1302 break; 1303 default: 1304 return (EOPNOTSUPP); 1305 } 1306 1307 if (namelen != 2) 1308 return (ENOTDIR); 1309 1310 rw_enter_read(&pool_lock); 1311 1312 SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist) { 1313 if (name[1] == pp->pr_serial) 1314 break; 1315 } 1316 1317 if (pp == NULL) 1318 goto done; 1319 1320 switch (name[0]) { 1321 case KERN_POOL_NAME: 1322 rv = sysctl_rdstring(oldp, oldlenp, NULL, pp->pr_wchan); 1323 break; 1324 case KERN_POOL_POOL: 1325 memset(&pi, 0, sizeof(pi)); 1326 1327 if (pp->pr_ipl != -1) 1328 mtx_enter(&pp->pr_mtx); 1329 pi.pr_size = pp->pr_size; 1330 pi.pr_pgsize = pp->pr_pgsize; 1331 pi.pr_itemsperpage = pp->pr_itemsperpage; 1332 pi.pr_npages = pp->pr_npages; 1333 pi.pr_minpages = pp->pr_minpages; 1334 pi.pr_maxpages = pp->pr_maxpages; 1335 pi.pr_hardlimit = pp->pr_hardlimit; 1336 pi.pr_nout = pp->pr_nout; 1337 pi.pr_nitems = pp->pr_nitems; 1338 pi.pr_nget = pp->pr_nget; 1339 pi.pr_nput = pp->pr_nput; 1340 pi.pr_nfail = pp->pr_nfail; 1341 pi.pr_npagealloc = pp->pr_npagealloc; 1342 pi.pr_npagefree = pp->pr_npagefree; 1343 pi.pr_hiwat = pp->pr_hiwat; 1344 pi.pr_nidle = pp->pr_nidle; 1345 if (pp->pr_ipl != -1) 1346 mtx_leave(&pp->pr_mtx); 1347 1348 rv = sysctl_rdstruct(oldp, oldlenp, NULL, &pi, sizeof(pi)); 1349 break; 1350 } 1351 1352 done: 1353 rw_exit_read(&pool_lock); 1354 1355 return (rv); 1356 } 1357 1358 void 1359 pool_gc_sched(void *null) 1360 { 1361 task_add(systqmp, &pool_gc_task); 1362 } 1363 1364 void 1365 pool_gc_pages(void *null) 1366 { 1367 struct pool *pp; 1368 struct pool_item_header *ph, *freeph; 1369 int s; 1370 1371 rw_enter_read(&pool_lock); 1372 s = splvm(); /* XXX go to splvm until all pools _setipl properly */ 1373 SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist) { 1374 if (pp->pr_nidle <= pp->pr_minpages || /* guess */ 1375 !mtx_enter_try(&pp->pr_mtx)) /* try */ 1376 continue; 1377 1378 /* is it time to free a page? */ 1379 if (pp->pr_nidle > pp->pr_minpages && 1380 (ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL && 1381 (ticks - ph->ph_tick) > (hz * pool_wait_gc)) { 1382 freeph = ph; 1383 pool_p_remove(pp, freeph); 1384 } else 1385 freeph = NULL; 1386 1387 mtx_leave(&pp->pr_mtx); 1388 1389 if (freeph != NULL) 1390 pool_p_free(pp, freeph); 1391 } 1392 splx(s); 1393 rw_exit_read(&pool_lock); 1394 1395 timeout_add_sec(&pool_gc_tick, 1); 1396 } 1397 1398 /* 1399 * Pool backend allocators. 1400 */ 1401 1402 void * 1403 pool_allocator_alloc(struct pool *pp, int flags, int *slowdown) 1404 { 1405 void *v; 1406 1407 v = (*pp->pr_alloc->pa_alloc)(pp, flags, slowdown); 1408 1409 #ifdef DIAGNOSTIC 1410 if (v != NULL && POOL_INPGHDR(pp)) { 1411 vaddr_t addr = (vaddr_t)v; 1412 if ((addr & pp->pr_pgmask) != addr) { 1413 panic("%s: %s page address %p isnt aligned to %u", 1414 __func__, pp->pr_wchan, v, pp->pr_pgsize); 1415 } 1416 } 1417 #endif 1418 1419 return (v); 1420 } 1421 1422 void 1423 pool_allocator_free(struct pool *pp, void *v) 1424 { 1425 struct pool_allocator *pa = pp->pr_alloc; 1426 1427 (*pa->pa_free)(pp, v); 1428 } 1429 1430 void * 1431 pool_page_alloc(struct pool *pp, int flags, int *slowdown) 1432 { 1433 struct kmem_dyn_mode kd = KMEM_DYN_INITIALIZER; 1434 1435 kd.kd_waitok = ISSET(flags, PR_WAITOK); 1436 kd.kd_slowdown = slowdown; 1437 1438 return (km_alloc(pp->pr_pgsize, &kv_page, pp->pr_crange, &kd)); 1439 } 1440 1441 void 1442 pool_page_free(struct pool *pp, void *v) 1443 { 1444 km_free(v, pp->pr_pgsize, &kv_page, pp->pr_crange); 1445 } 1446 1447 void * 1448 pool_large_alloc(struct pool *pp, int flags, int *slowdown) 1449 { 1450 struct kmem_va_mode kv = kv_intrsafe; 1451 struct kmem_dyn_mode kd = KMEM_DYN_INITIALIZER; 1452 void *v; 1453 int s; 1454 1455 if (POOL_INPGHDR(pp)) 1456 kv.kv_align = pp->pr_pgsize; 1457 1458 kd.kd_waitok = ISSET(flags, PR_WAITOK); 1459 kd.kd_slowdown = slowdown; 1460 1461 s = splvm(); 1462 KERNEL_LOCK(); 1463 v = km_alloc(pp->pr_pgsize, &kv, pp->pr_crange, &kd); 1464 KERNEL_UNLOCK(); 1465 splx(s); 1466 1467 return (v); 1468 } 1469 1470 void 1471 pool_large_free(struct pool *pp, void *v) 1472 { 1473 struct kmem_va_mode kv = kv_intrsafe; 1474 int s; 1475 1476 if (POOL_INPGHDR(pp)) 1477 kv.kv_align = pp->pr_pgsize; 1478 1479 s = splvm(); 1480 KERNEL_LOCK(); 1481 km_free(v, pp->pr_pgsize, &kv, pp->pr_crange); 1482 KERNEL_UNLOCK(); 1483 splx(s); 1484 } 1485 1486 void * 1487 pool_large_alloc_ni(struct pool *pp, int flags, int *slowdown) 1488 { 1489 struct kmem_va_mode kv = kv_any; 1490 struct kmem_dyn_mode kd = KMEM_DYN_INITIALIZER; 1491 void *v; 1492 1493 if (POOL_INPGHDR(pp)) 1494 kv.kv_align = pp->pr_pgsize; 1495 1496 kd.kd_waitok = ISSET(flags, PR_WAITOK); 1497 kd.kd_slowdown = slowdown; 1498 1499 KERNEL_LOCK(); 1500 v = km_alloc(pp->pr_pgsize, &kv, pp->pr_crange, &kd); 1501 KERNEL_UNLOCK(); 1502 1503 return (v); 1504 } 1505 1506 void 1507 pool_large_free_ni(struct pool *pp, void *v) 1508 { 1509 struct kmem_va_mode kv = kv_any; 1510 1511 if (POOL_INPGHDR(pp)) 1512 kv.kv_align = pp->pr_pgsize; 1513 1514 KERNEL_LOCK(); 1515 km_free(v, pp->pr_pgsize, &kv, pp->pr_crange); 1516 KERNEL_UNLOCK(); 1517 } 1518