1 /* $OpenBSD: subr_pool.c,v 1.193 2015/09/11 09:26:13 kettenis Exp $ */ 2 /* $NetBSD: subr_pool.c,v 1.61 2001/09/26 07:14:56 chs Exp $ */ 3 4 /*- 5 * Copyright (c) 1997, 1999, 2000 The NetBSD Foundation, Inc. 6 * All rights reserved. 7 * 8 * This code is derived from software contributed to The NetBSD Foundation 9 * by Paul Kranenburg; by Jason R. Thorpe of the Numerical Aerospace 10 * Simulation Facility, NASA Ames Research Center. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 23 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 24 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 25 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31 * POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include <sys/param.h> 35 #include <sys/systm.h> 36 #include <sys/errno.h> 37 #include <sys/kernel.h> 38 #include <sys/malloc.h> 39 #include <sys/pool.h> 40 #include <sys/syslog.h> 41 #include <sys/rwlock.h> 42 #include <sys/sysctl.h> 43 #include <sys/task.h> 44 #include <sys/timeout.h> 45 46 #include <uvm/uvm_extern.h> 47 48 /* 49 * Pool resource management utility. 50 * 51 * Memory is allocated in pages which are split into pieces according to 52 * the pool item size. Each page is kept on one of three lists in the 53 * pool structure: `pr_emptypages', `pr_fullpages' and `pr_partpages', 54 * for empty, full and partially-full pages respectively. The individual 55 * pool items are on a linked list headed by `ph_itemlist' in each page 56 * header. The memory for building the page list is either taken from 57 * the allocated pages themselves (for small pool items) or taken from 58 * an internal pool of page headers (`phpool'). 59 */ 60 61 /* List of all pools */ 62 SIMPLEQ_HEAD(,pool) pool_head = SIMPLEQ_HEAD_INITIALIZER(pool_head); 63 64 /* 65 * Every pool gets a unique serial number assigned to it. If this counter 66 * wraps, we're screwed, but we shouldn't create so many pools anyway. 67 */ 68 unsigned int pool_serial; 69 unsigned int pool_count; 70 71 /* Lock the previous variables making up the global pool state */ 72 struct rwlock pool_lock = RWLOCK_INITIALIZER("pools"); 73 74 /* Private pool for page header structures */ 75 struct pool phpool; 76 77 struct pool_item_header { 78 /* Page headers */ 79 TAILQ_ENTRY(pool_item_header) 80 ph_pagelist; /* pool page list */ 81 XSIMPLEQ_HEAD(,pool_item) ph_itemlist; /* chunk list for this page */ 82 RB_ENTRY(pool_item_header) 83 ph_node; /* Off-page page headers */ 84 int ph_nmissing; /* # of chunks in use */ 85 caddr_t ph_page; /* this page's address */ 86 caddr_t ph_colored; /* page's colored address */ 87 u_long ph_magic; 88 int ph_tick; 89 }; 90 #define POOL_MAGICBIT (1 << 3) /* keep away from perturbed low bits */ 91 #define POOL_PHPOISON(ph) ISSET((ph)->ph_magic, POOL_MAGICBIT) 92 93 struct pool_item { 94 u_long pi_magic; 95 XSIMPLEQ_ENTRY(pool_item) pi_list; 96 }; 97 #define POOL_IMAGIC(ph, pi) ((u_long)(pi) ^ (ph)->ph_magic) 98 99 #ifdef POOL_DEBUG 100 int pool_debug = 1; 101 #else 102 int pool_debug = 0; 103 #endif 104 105 #define POOL_INPGHDR(pp) ((pp)->pr_phoffset != 0) 106 107 struct pool_item_header * 108 pool_p_alloc(struct pool *, int, int *); 109 void pool_p_insert(struct pool *, struct pool_item_header *); 110 void pool_p_remove(struct pool *, struct pool_item_header *); 111 void pool_p_free(struct pool *, struct pool_item_header *); 112 113 void pool_update_curpage(struct pool *); 114 void *pool_do_get(struct pool *, int, int *); 115 int pool_chk_page(struct pool *, struct pool_item_header *, int); 116 int pool_chk(struct pool *); 117 void pool_get_done(void *, void *); 118 void pool_runqueue(struct pool *, int); 119 120 void *pool_allocator_alloc(struct pool *, int, int *); 121 void pool_allocator_free(struct pool *, void *); 122 123 /* 124 * The default pool allocator. 125 */ 126 void *pool_page_alloc(struct pool *, int, int *); 127 void pool_page_free(struct pool *, void *); 128 129 /* 130 * safe for interrupts; this is the default allocator 131 */ 132 struct pool_allocator pool_allocator_single = { 133 pool_page_alloc, 134 pool_page_free 135 }; 136 137 void *pool_multi_alloc(struct pool *, int, int *); 138 void pool_multi_free(struct pool *, void *); 139 140 struct pool_allocator pool_allocator_multi = { 141 pool_multi_alloc, 142 pool_multi_free 143 }; 144 145 void *pool_multi_alloc_ni(struct pool *, int, int *); 146 void pool_multi_free_ni(struct pool *, void *); 147 148 struct pool_allocator pool_allocator_multi_ni = { 149 pool_multi_alloc_ni, 150 pool_multi_free_ni 151 }; 152 153 #ifdef DDB 154 void pool_print_pagelist(struct pool_pagelist *, int (*)(const char *, ...) 155 __attribute__((__format__(__kprintf__,1,2)))); 156 void pool_print1(struct pool *, const char *, int (*)(const char *, ...) 157 __attribute__((__format__(__kprintf__,1,2)))); 158 #endif 159 160 /* stale page garbage collectors */ 161 void pool_gc_sched(void *); 162 struct timeout pool_gc_tick = TIMEOUT_INITIALIZER(pool_gc_sched, NULL); 163 void pool_gc_pages(void *); 164 struct task pool_gc_task = TASK_INITIALIZER(pool_gc_pages, NULL); 165 int pool_wait_free = 1; 166 int pool_wait_gc = 8; 167 168 static inline int 169 phtree_compare(struct pool_item_header *a, struct pool_item_header *b) 170 { 171 vaddr_t va = (vaddr_t)a->ph_page; 172 vaddr_t vb = (vaddr_t)b->ph_page; 173 174 /* the compares in this order are important for the NFIND to work */ 175 if (vb < va) 176 return (-1); 177 if (vb > va) 178 return (1); 179 180 return (0); 181 } 182 183 RB_PROTOTYPE(phtree, pool_item_header, ph_node, phtree_compare); 184 RB_GENERATE(phtree, pool_item_header, ph_node, phtree_compare); 185 186 /* 187 * Return the pool page header based on page address. 188 */ 189 static inline struct pool_item_header * 190 pr_find_pagehead(struct pool *pp, void *v) 191 { 192 struct pool_item_header *ph, key; 193 194 if (POOL_INPGHDR(pp)) { 195 caddr_t page; 196 197 page = (caddr_t)((vaddr_t)v & pp->pr_pgmask); 198 199 return ((struct pool_item_header *)(page + pp->pr_phoffset)); 200 } 201 202 key.ph_page = v; 203 ph = RB_NFIND(phtree, &pp->pr_phtree, &key); 204 if (ph == NULL) 205 panic("%s: %s: page header missing", __func__, pp->pr_wchan); 206 207 KASSERT(ph->ph_page <= (caddr_t)v); 208 if (ph->ph_page + pp->pr_pgsize <= (caddr_t)v) 209 panic("%s: %s: incorrect page", __func__, pp->pr_wchan); 210 211 return (ph); 212 } 213 214 /* 215 * Initialize the given pool resource structure. 216 * 217 * We export this routine to allow other kernel parts to declare 218 * static pools that must be initialized before malloc() is available. 219 */ 220 void 221 pool_init(struct pool *pp, size_t size, u_int align, u_int ioff, int flags, 222 const char *wchan, struct pool_allocator *palloc) 223 { 224 int off = 0, space; 225 unsigned int pgsize = PAGE_SIZE, items; 226 #ifdef DIAGNOSTIC 227 struct pool *iter; 228 KASSERT(ioff == 0); 229 #endif 230 231 if (align == 0) 232 align = ALIGN(1); 233 234 if (size < sizeof(struct pool_item)) 235 size = sizeof(struct pool_item); 236 237 size = roundup(size, align); 238 239 if (palloc == NULL) { 240 while (size * 8 > pgsize) 241 pgsize <<= 1; 242 243 if (pgsize > PAGE_SIZE) { 244 palloc = ISSET(flags, PR_WAITOK) ? 245 &pool_allocator_multi_ni : &pool_allocator_multi; 246 } else 247 palloc = &pool_allocator_single; 248 } else 249 pgsize = palloc->pa_pagesz ? palloc->pa_pagesz : PAGE_SIZE; 250 251 items = pgsize / size; 252 253 /* 254 * Decide whether to put the page header off page to avoid 255 * wasting too large a part of the page. Off-page page headers 256 * go into an RB tree, so we can match a returned item with 257 * its header based on the page address. 258 */ 259 if (pgsize - (size * items) > sizeof(struct pool_item_header)) { 260 off = pgsize - sizeof(struct pool_item_header); 261 } else if (sizeof(struct pool_item_header) * 2 >= size) { 262 off = pgsize - sizeof(struct pool_item_header); 263 items = off / size; 264 } 265 266 KASSERT(items > 0); 267 268 /* 269 * Initialize the pool structure. 270 */ 271 memset(pp, 0, sizeof(*pp)); 272 TAILQ_INIT(&pp->pr_emptypages); 273 TAILQ_INIT(&pp->pr_fullpages); 274 TAILQ_INIT(&pp->pr_partpages); 275 pp->pr_curpage = NULL; 276 pp->pr_npages = 0; 277 pp->pr_minitems = 0; 278 pp->pr_minpages = 0; 279 pp->pr_maxpages = 8; 280 pp->pr_size = size; 281 pp->pr_pgsize = pgsize; 282 pp->pr_pgmask = ~0UL ^ (pgsize - 1); 283 pp->pr_phoffset = off; 284 pp->pr_itemsperpage = items; 285 pp->pr_wchan = wchan; 286 pp->pr_alloc = palloc; 287 pp->pr_nitems = 0; 288 pp->pr_nout = 0; 289 pp->pr_hardlimit = UINT_MAX; 290 pp->pr_hardlimit_warning = NULL; 291 pp->pr_hardlimit_ratecap.tv_sec = 0; 292 pp->pr_hardlimit_ratecap.tv_usec = 0; 293 pp->pr_hardlimit_warning_last.tv_sec = 0; 294 pp->pr_hardlimit_warning_last.tv_usec = 0; 295 RB_INIT(&pp->pr_phtree); 296 297 /* 298 * Use the space between the chunks and the page header 299 * for cache coloring. 300 */ 301 space = POOL_INPGHDR(pp) ? pp->pr_phoffset : pp->pr_pgsize; 302 space -= pp->pr_itemsperpage * pp->pr_size; 303 pp->pr_align = align; 304 pp->pr_maxcolors = (space / align) + 1; 305 306 pp->pr_nget = 0; 307 pp->pr_nfail = 0; 308 pp->pr_nput = 0; 309 pp->pr_npagealloc = 0; 310 pp->pr_npagefree = 0; 311 pp->pr_hiwat = 0; 312 pp->pr_nidle = 0; 313 314 pp->pr_ipl = -1; 315 mtx_init(&pp->pr_mtx, IPL_NONE); 316 mtx_init(&pp->pr_requests_mtx, IPL_NONE); 317 TAILQ_INIT(&pp->pr_requests); 318 319 if (phpool.pr_size == 0) { 320 pool_init(&phpool, sizeof(struct pool_item_header), 0, 0, 321 0, "phpool", NULL); 322 pool_setipl(&phpool, IPL_HIGH); 323 324 /* make sure phpool wont "recurse" */ 325 KASSERT(POOL_INPGHDR(&phpool)); 326 } 327 328 /* pglistalloc/constraint parameters */ 329 pp->pr_crange = &kp_dirty; 330 331 /* Insert this into the list of all pools. */ 332 rw_enter_write(&pool_lock); 333 #ifdef DIAGNOSTIC 334 SIMPLEQ_FOREACH(iter, &pool_head, pr_poollist) { 335 if (iter == pp) 336 panic("%s: pool %s already on list", __func__, wchan); 337 } 338 #endif 339 340 pp->pr_serial = ++pool_serial; 341 if (pool_serial == 0) 342 panic("%s: too much uptime", __func__); 343 344 SIMPLEQ_INSERT_HEAD(&pool_head, pp, pr_poollist); 345 pool_count++; 346 rw_exit_write(&pool_lock); 347 } 348 349 void 350 pool_setipl(struct pool *pp, int ipl) 351 { 352 pp->pr_ipl = ipl; 353 mtx_init(&pp->pr_mtx, ipl); 354 mtx_init(&pp->pr_requests_mtx, ipl); 355 } 356 357 /* 358 * Decommission a pool resource. 359 */ 360 void 361 pool_destroy(struct pool *pp) 362 { 363 struct pool_item_header *ph; 364 struct pool *prev, *iter; 365 366 #ifdef DIAGNOSTIC 367 if (pp->pr_nout != 0) 368 panic("%s: pool busy: still out: %u", __func__, pp->pr_nout); 369 #endif 370 371 /* Remove from global pool list */ 372 rw_enter_write(&pool_lock); 373 pool_count--; 374 if (pp == SIMPLEQ_FIRST(&pool_head)) 375 SIMPLEQ_REMOVE_HEAD(&pool_head, pr_poollist); 376 else { 377 prev = SIMPLEQ_FIRST(&pool_head); 378 SIMPLEQ_FOREACH(iter, &pool_head, pr_poollist) { 379 if (iter == pp) { 380 SIMPLEQ_REMOVE_AFTER(&pool_head, prev, 381 pr_poollist); 382 break; 383 } 384 prev = iter; 385 } 386 } 387 rw_exit_write(&pool_lock); 388 389 /* Remove all pages */ 390 while ((ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL) { 391 mtx_enter(&pp->pr_mtx); 392 pool_p_remove(pp, ph); 393 mtx_leave(&pp->pr_mtx); 394 pool_p_free(pp, ph); 395 } 396 KASSERT(TAILQ_EMPTY(&pp->pr_fullpages)); 397 KASSERT(TAILQ_EMPTY(&pp->pr_partpages)); 398 } 399 400 void 401 pool_request_init(struct pool_request *pr, 402 void (*handler)(void *, void *), void *cookie) 403 { 404 pr->pr_handler = handler; 405 pr->pr_cookie = cookie; 406 pr->pr_item = NULL; 407 } 408 409 void 410 pool_request(struct pool *pp, struct pool_request *pr) 411 { 412 mtx_enter(&pp->pr_requests_mtx); 413 TAILQ_INSERT_TAIL(&pp->pr_requests, pr, pr_entry); 414 pool_runqueue(pp, PR_NOWAIT); 415 mtx_leave(&pp->pr_requests_mtx); 416 } 417 418 struct pool_get_memory { 419 struct mutex mtx; 420 void * volatile v; 421 }; 422 423 /* 424 * Grab an item from the pool. 425 */ 426 void * 427 pool_get(struct pool *pp, int flags) 428 { 429 void *v = NULL; 430 int slowdown = 0; 431 432 KASSERT(flags & (PR_WAITOK | PR_NOWAIT)); 433 434 435 mtx_enter(&pp->pr_mtx); 436 if (pp->pr_nout >= pp->pr_hardlimit) { 437 if (ISSET(flags, PR_NOWAIT|PR_LIMITFAIL)) 438 goto fail; 439 } else if ((v = pool_do_get(pp, flags, &slowdown)) == NULL) { 440 if (ISSET(flags, PR_NOWAIT)) 441 goto fail; 442 } 443 mtx_leave(&pp->pr_mtx); 444 445 if (slowdown && ISSET(flags, PR_WAITOK)) 446 yield(); 447 448 if (v == NULL) { 449 struct pool_get_memory mem = { 450 MUTEX_INITIALIZER((pp->pr_ipl == -1) ? 451 IPL_NONE : pp->pr_ipl), NULL }; 452 struct pool_request pr; 453 454 pool_request_init(&pr, pool_get_done, &mem); 455 pool_request(pp, &pr); 456 457 mtx_enter(&mem.mtx); 458 while (mem.v == NULL) 459 msleep(&mem, &mem.mtx, PSWP, pp->pr_wchan, 0); 460 mtx_leave(&mem.mtx); 461 462 v = mem.v; 463 } 464 465 if (ISSET(flags, PR_ZERO)) 466 memset(v, 0, pp->pr_size); 467 468 return (v); 469 470 fail: 471 pp->pr_nfail++; 472 mtx_leave(&pp->pr_mtx); 473 return (NULL); 474 } 475 476 void 477 pool_get_done(void *xmem, void *v) 478 { 479 struct pool_get_memory *mem = xmem; 480 481 mtx_enter(&mem->mtx); 482 mem->v = v; 483 mtx_leave(&mem->mtx); 484 485 wakeup_one(mem); 486 } 487 488 void 489 pool_runqueue(struct pool *pp, int flags) 490 { 491 struct pool_requests prl = TAILQ_HEAD_INITIALIZER(prl); 492 struct pool_request *pr; 493 494 MUTEX_ASSERT_UNLOCKED(&pp->pr_mtx); 495 MUTEX_ASSERT_LOCKED(&pp->pr_requests_mtx); 496 497 if (pp->pr_requesting++) 498 return; 499 500 do { 501 pp->pr_requesting = 1; 502 503 /* no TAILQ_JOIN? :( */ 504 while ((pr = TAILQ_FIRST(&pp->pr_requests)) != NULL) { 505 TAILQ_REMOVE(&pp->pr_requests, pr, pr_entry); 506 TAILQ_INSERT_TAIL(&prl, pr, pr_entry); 507 } 508 if (TAILQ_EMPTY(&prl)) 509 continue; 510 511 mtx_leave(&pp->pr_requests_mtx); 512 513 mtx_enter(&pp->pr_mtx); 514 pr = TAILQ_FIRST(&prl); 515 while (pr != NULL) { 516 int slowdown = 0; 517 518 if (pp->pr_nout >= pp->pr_hardlimit) 519 break; 520 521 pr->pr_item = pool_do_get(pp, flags, &slowdown); 522 if (pr->pr_item == NULL) /* || slowdown ? */ 523 break; 524 525 pr = TAILQ_NEXT(pr, pr_entry); 526 } 527 mtx_leave(&pp->pr_mtx); 528 529 while ((pr = TAILQ_FIRST(&prl)) != NULL && 530 pr->pr_item != NULL) { 531 TAILQ_REMOVE(&prl, pr, pr_entry); 532 (*pr->pr_handler)(pr->pr_cookie, pr->pr_item); 533 } 534 535 mtx_enter(&pp->pr_requests_mtx); 536 } while (--pp->pr_requesting); 537 538 /* no TAILQ_JOIN :( */ 539 while ((pr = TAILQ_FIRST(&prl)) != NULL) { 540 TAILQ_REMOVE(&prl, pr, pr_entry); 541 TAILQ_INSERT_TAIL(&pp->pr_requests, pr, pr_entry); 542 } 543 } 544 545 void * 546 pool_do_get(struct pool *pp, int flags, int *slowdown) 547 { 548 struct pool_item *pi; 549 struct pool_item_header *ph; 550 551 MUTEX_ASSERT_LOCKED(&pp->pr_mtx); 552 553 if (pp->pr_ipl != -1) 554 splassert(pp->pr_ipl); 555 556 /* 557 * Account for this item now to avoid races if we need to give up 558 * pr_mtx to allocate a page. 559 */ 560 pp->pr_nout++; 561 562 if (pp->pr_curpage == NULL) { 563 mtx_leave(&pp->pr_mtx); 564 ph = pool_p_alloc(pp, flags, slowdown); 565 mtx_enter(&pp->pr_mtx); 566 567 if (ph == NULL) { 568 pp->pr_nout--; 569 return (NULL); 570 } 571 572 pool_p_insert(pp, ph); 573 } 574 575 ph = pp->pr_curpage; 576 pi = XSIMPLEQ_FIRST(&ph->ph_itemlist); 577 if (__predict_false(pi == NULL)) 578 panic("%s: %s: page empty", __func__, pp->pr_wchan); 579 580 if (__predict_false(pi->pi_magic != POOL_IMAGIC(ph, pi))) { 581 panic("%s: %s free list modified: " 582 "page %p; item addr %p; offset 0x%x=0x%lx != 0x%lx", 583 __func__, pp->pr_wchan, ph->ph_page, pi, 584 0, pi->pi_magic, POOL_IMAGIC(ph, pi)); 585 } 586 587 XSIMPLEQ_REMOVE_HEAD(&ph->ph_itemlist, pi_list); 588 589 #ifdef DIAGNOSTIC 590 if (pool_debug && POOL_PHPOISON(ph)) { 591 size_t pidx; 592 uint32_t pval; 593 if (poison_check(pi + 1, pp->pr_size - sizeof(*pi), 594 &pidx, &pval)) { 595 int *ip = (int *)(pi + 1); 596 panic("%s: %s free list modified: " 597 "page %p; item addr %p; offset 0x%zx=0x%x", 598 __func__, pp->pr_wchan, ph->ph_page, pi, 599 pidx * sizeof(int), ip[pidx]); 600 } 601 } 602 #endif /* DIAGNOSTIC */ 603 604 if (ph->ph_nmissing++ == 0) { 605 /* 606 * This page was previously empty. Move it to the list of 607 * partially-full pages. This page is already curpage. 608 */ 609 TAILQ_REMOVE(&pp->pr_emptypages, ph, ph_pagelist); 610 TAILQ_INSERT_TAIL(&pp->pr_partpages, ph, ph_pagelist); 611 612 pp->pr_nidle--; 613 } 614 615 if (ph->ph_nmissing == pp->pr_itemsperpage) { 616 /* 617 * This page is now full. Move it to the full list 618 * and select a new current page. 619 */ 620 TAILQ_REMOVE(&pp->pr_partpages, ph, ph_pagelist); 621 TAILQ_INSERT_TAIL(&pp->pr_fullpages, ph, ph_pagelist); 622 pool_update_curpage(pp); 623 } 624 625 pp->pr_nget++; 626 627 return (pi); 628 } 629 630 /* 631 * Return resource to the pool. 632 */ 633 void 634 pool_put(struct pool *pp, void *v) 635 { 636 struct pool_item *pi = v; 637 struct pool_item_header *ph, *freeph = NULL; 638 639 #ifdef DIAGNOSTIC 640 if (v == NULL) 641 panic("%s: NULL item", __func__); 642 #endif 643 644 mtx_enter(&pp->pr_mtx); 645 646 if (pp->pr_ipl != -1) 647 splassert(pp->pr_ipl); 648 649 ph = pr_find_pagehead(pp, v); 650 651 #ifdef DIAGNOSTIC 652 if (pool_debug) { 653 struct pool_item *qi; 654 XSIMPLEQ_FOREACH(qi, &ph->ph_itemlist, pi_list) { 655 if (pi == qi) { 656 panic("%s: %s: double pool_put: %p", __func__, 657 pp->pr_wchan, pi); 658 } 659 } 660 } 661 #endif /* DIAGNOSTIC */ 662 663 pi->pi_magic = POOL_IMAGIC(ph, pi); 664 XSIMPLEQ_INSERT_HEAD(&ph->ph_itemlist, pi, pi_list); 665 #ifdef DIAGNOSTIC 666 if (POOL_PHPOISON(ph)) 667 poison_mem(pi + 1, pp->pr_size - sizeof(*pi)); 668 #endif /* DIAGNOSTIC */ 669 670 if (ph->ph_nmissing-- == pp->pr_itemsperpage) { 671 /* 672 * The page was previously completely full, move it to the 673 * partially-full list. 674 */ 675 TAILQ_REMOVE(&pp->pr_fullpages, ph, ph_pagelist); 676 TAILQ_INSERT_TAIL(&pp->pr_partpages, ph, ph_pagelist); 677 } 678 679 if (ph->ph_nmissing == 0) { 680 /* 681 * The page is now empty, so move it to the empty page list. 682 */ 683 pp->pr_nidle++; 684 685 ph->ph_tick = ticks; 686 TAILQ_REMOVE(&pp->pr_partpages, ph, ph_pagelist); 687 TAILQ_INSERT_TAIL(&pp->pr_emptypages, ph, ph_pagelist); 688 pool_update_curpage(pp); 689 } 690 691 pp->pr_nout--; 692 pp->pr_nput++; 693 694 /* is it time to free a page? */ 695 if (pp->pr_nidle > pp->pr_maxpages && 696 (ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL && 697 (ticks - ph->ph_tick) > (hz * pool_wait_free)) { 698 freeph = ph; 699 pool_p_remove(pp, freeph); 700 } 701 mtx_leave(&pp->pr_mtx); 702 703 if (freeph != NULL) 704 pool_p_free(pp, freeph); 705 706 mtx_enter(&pp->pr_requests_mtx); 707 pool_runqueue(pp, PR_NOWAIT); 708 mtx_leave(&pp->pr_requests_mtx); 709 } 710 711 /* 712 * Add N items to the pool. 713 */ 714 int 715 pool_prime(struct pool *pp, int n) 716 { 717 struct pool_pagelist pl = TAILQ_HEAD_INITIALIZER(pl); 718 struct pool_item_header *ph; 719 int newpages; 720 721 newpages = roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 722 723 while (newpages-- > 0) { 724 int slowdown = 0; 725 726 ph = pool_p_alloc(pp, PR_NOWAIT, &slowdown); 727 if (ph == NULL) /* or slowdown? */ 728 break; 729 730 TAILQ_INSERT_TAIL(&pl, ph, ph_pagelist); 731 } 732 733 mtx_enter(&pp->pr_mtx); 734 while ((ph = TAILQ_FIRST(&pl)) != NULL) { 735 TAILQ_REMOVE(&pl, ph, ph_pagelist); 736 pool_p_insert(pp, ph); 737 } 738 mtx_leave(&pp->pr_mtx); 739 740 return (0); 741 } 742 743 struct pool_item_header * 744 pool_p_alloc(struct pool *pp, int flags, int *slowdown) 745 { 746 struct pool_item_header *ph; 747 struct pool_item *pi; 748 caddr_t addr; 749 int n; 750 751 MUTEX_ASSERT_UNLOCKED(&pp->pr_mtx); 752 KASSERT(pp->pr_size >= sizeof(*pi)); 753 754 addr = pool_allocator_alloc(pp, flags, slowdown); 755 if (addr == NULL) 756 return (NULL); 757 758 if (POOL_INPGHDR(pp)) 759 ph = (struct pool_item_header *)(addr + pp->pr_phoffset); 760 else { 761 ph = pool_get(&phpool, flags); 762 if (ph == NULL) { 763 pool_allocator_free(pp, addr); 764 return (NULL); 765 } 766 } 767 768 XSIMPLEQ_INIT(&ph->ph_itemlist); 769 ph->ph_page = addr; 770 addr += pp->pr_align * (pp->pr_npagealloc % pp->pr_maxcolors); 771 ph->ph_colored = addr; 772 ph->ph_nmissing = 0; 773 arc4random_buf(&ph->ph_magic, sizeof(ph->ph_magic)); 774 #ifdef DIAGNOSTIC 775 /* use a bit in ph_magic to record if we poison page items */ 776 if (pool_debug) 777 SET(ph->ph_magic, POOL_MAGICBIT); 778 else 779 CLR(ph->ph_magic, POOL_MAGICBIT); 780 #endif /* DIAGNOSTIC */ 781 782 n = pp->pr_itemsperpage; 783 while (n--) { 784 pi = (struct pool_item *)addr; 785 pi->pi_magic = POOL_IMAGIC(ph, pi); 786 XSIMPLEQ_INSERT_TAIL(&ph->ph_itemlist, pi, pi_list); 787 788 #ifdef DIAGNOSTIC 789 if (POOL_PHPOISON(ph)) 790 poison_mem(pi + 1, pp->pr_size - sizeof(*pi)); 791 #endif /* DIAGNOSTIC */ 792 793 addr += pp->pr_size; 794 } 795 796 return (ph); 797 } 798 799 void 800 pool_p_free(struct pool *pp, struct pool_item_header *ph) 801 { 802 struct pool_item *pi; 803 804 MUTEX_ASSERT_UNLOCKED(&pp->pr_mtx); 805 KASSERT(ph->ph_nmissing == 0); 806 807 XSIMPLEQ_FOREACH(pi, &ph->ph_itemlist, pi_list) { 808 if (__predict_false(pi->pi_magic != POOL_IMAGIC(ph, pi))) { 809 panic("%s: %s free list modified: " 810 "page %p; item addr %p; offset 0x%x=0x%lx", 811 __func__, pp->pr_wchan, ph->ph_page, pi, 812 0, pi->pi_magic); 813 } 814 815 #ifdef DIAGNOSTIC 816 if (POOL_PHPOISON(ph)) { 817 size_t pidx; 818 uint32_t pval; 819 if (poison_check(pi + 1, pp->pr_size - sizeof(*pi), 820 &pidx, &pval)) { 821 int *ip = (int *)(pi + 1); 822 panic("%s: %s free list modified: " 823 "page %p; item addr %p; offset 0x%zx=0x%x", 824 __func__, pp->pr_wchan, ph->ph_page, pi, 825 pidx * sizeof(int), ip[pidx]); 826 } 827 } 828 #endif 829 } 830 831 pool_allocator_free(pp, ph->ph_page); 832 833 if (!POOL_INPGHDR(pp)) 834 pool_put(&phpool, ph); 835 } 836 837 void 838 pool_p_insert(struct pool *pp, struct pool_item_header *ph) 839 { 840 MUTEX_ASSERT_LOCKED(&pp->pr_mtx); 841 842 /* If the pool was depleted, point at the new page */ 843 if (pp->pr_curpage == NULL) 844 pp->pr_curpage = ph; 845 846 TAILQ_INSERT_TAIL(&pp->pr_emptypages, ph, ph_pagelist); 847 if (!POOL_INPGHDR(pp)) 848 RB_INSERT(phtree, &pp->pr_phtree, ph); 849 850 pp->pr_nitems += pp->pr_itemsperpage; 851 pp->pr_nidle++; 852 853 pp->pr_npagealloc++; 854 if (++pp->pr_npages > pp->pr_hiwat) 855 pp->pr_hiwat = pp->pr_npages; 856 } 857 858 void 859 pool_p_remove(struct pool *pp, struct pool_item_header *ph) 860 { 861 MUTEX_ASSERT_LOCKED(&pp->pr_mtx); 862 863 pp->pr_npagefree++; 864 pp->pr_npages--; 865 pp->pr_nidle--; 866 pp->pr_nitems -= pp->pr_itemsperpage; 867 868 if (!POOL_INPGHDR(pp)) 869 RB_REMOVE(phtree, &pp->pr_phtree, ph); 870 TAILQ_REMOVE(&pp->pr_emptypages, ph, ph_pagelist); 871 872 pool_update_curpage(pp); 873 } 874 875 void 876 pool_update_curpage(struct pool *pp) 877 { 878 pp->pr_curpage = TAILQ_LAST(&pp->pr_partpages, pool_pagelist); 879 if (pp->pr_curpage == NULL) { 880 pp->pr_curpage = TAILQ_LAST(&pp->pr_emptypages, pool_pagelist); 881 } 882 } 883 884 void 885 pool_setlowat(struct pool *pp, int n) 886 { 887 int prime = 0; 888 889 mtx_enter(&pp->pr_mtx); 890 pp->pr_minitems = n; 891 pp->pr_minpages = (n == 0) 892 ? 0 893 : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 894 895 if (pp->pr_nitems < n) 896 prime = n - pp->pr_nitems; 897 mtx_leave(&pp->pr_mtx); 898 899 if (prime > 0) 900 pool_prime(pp, prime); 901 } 902 903 void 904 pool_sethiwat(struct pool *pp, int n) 905 { 906 pp->pr_maxpages = (n == 0) 907 ? 0 908 : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 909 } 910 911 int 912 pool_sethardlimit(struct pool *pp, u_int n, const char *warnmsg, int ratecap) 913 { 914 int error = 0; 915 916 if (n < pp->pr_nout) { 917 error = EINVAL; 918 goto done; 919 } 920 921 pp->pr_hardlimit = n; 922 pp->pr_hardlimit_warning = warnmsg; 923 pp->pr_hardlimit_ratecap.tv_sec = ratecap; 924 pp->pr_hardlimit_warning_last.tv_sec = 0; 925 pp->pr_hardlimit_warning_last.tv_usec = 0; 926 927 done: 928 return (error); 929 } 930 931 void 932 pool_set_constraints(struct pool *pp, const struct kmem_pa_mode *mode) 933 { 934 pp->pr_crange = mode; 935 } 936 937 /* 938 * Release all complete pages that have not been used recently. 939 * 940 * Returns non-zero if any pages have been reclaimed. 941 */ 942 int 943 pool_reclaim(struct pool *pp) 944 { 945 struct pool_item_header *ph, *phnext; 946 struct pool_pagelist pl = TAILQ_HEAD_INITIALIZER(pl); 947 948 mtx_enter(&pp->pr_mtx); 949 for (ph = TAILQ_FIRST(&pp->pr_emptypages); ph != NULL; ph = phnext) { 950 phnext = TAILQ_NEXT(ph, ph_pagelist); 951 952 /* Check our minimum page claim */ 953 if (pp->pr_npages <= pp->pr_minpages) 954 break; 955 956 /* 957 * If freeing this page would put us below 958 * the low water mark, stop now. 959 */ 960 if ((pp->pr_nitems - pp->pr_itemsperpage) < 961 pp->pr_minitems) 962 break; 963 964 pool_p_remove(pp, ph); 965 TAILQ_INSERT_TAIL(&pl, ph, ph_pagelist); 966 } 967 mtx_leave(&pp->pr_mtx); 968 969 if (TAILQ_EMPTY(&pl)) 970 return (0); 971 972 while ((ph = TAILQ_FIRST(&pl)) != NULL) { 973 TAILQ_REMOVE(&pl, ph, ph_pagelist); 974 pool_p_free(pp, ph); 975 } 976 977 return (1); 978 } 979 980 /* 981 * Release all complete pages that have not been used recently 982 * from all pools. 983 */ 984 void 985 pool_reclaim_all(void) 986 { 987 struct pool *pp; 988 989 rw_enter_read(&pool_lock); 990 SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist) 991 pool_reclaim(pp); 992 rw_exit_read(&pool_lock); 993 } 994 995 #ifdef DDB 996 #include <machine/db_machdep.h> 997 #include <ddb/db_output.h> 998 999 /* 1000 * Diagnostic helpers. 1001 */ 1002 void 1003 pool_printit(struct pool *pp, const char *modif, 1004 int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2)))) 1005 { 1006 pool_print1(pp, modif, pr); 1007 } 1008 1009 void 1010 pool_print_pagelist(struct pool_pagelist *pl, 1011 int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2)))) 1012 { 1013 struct pool_item_header *ph; 1014 struct pool_item *pi; 1015 1016 TAILQ_FOREACH(ph, pl, ph_pagelist) { 1017 (*pr)("\t\tpage %p, color %p, nmissing %d\n", 1018 ph->ph_page, ph->ph_colored, ph->ph_nmissing); 1019 XSIMPLEQ_FOREACH(pi, &ph->ph_itemlist, pi_list) { 1020 if (pi->pi_magic != POOL_IMAGIC(ph, pi)) { 1021 (*pr)("\t\t\titem %p, magic 0x%lx\n", 1022 pi, pi->pi_magic); 1023 } 1024 } 1025 } 1026 } 1027 1028 void 1029 pool_print1(struct pool *pp, const char *modif, 1030 int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2)))) 1031 { 1032 struct pool_item_header *ph; 1033 int print_pagelist = 0; 1034 char c; 1035 1036 while ((c = *modif++) != '\0') { 1037 if (c == 'p') 1038 print_pagelist = 1; 1039 modif++; 1040 } 1041 1042 (*pr)("POOL %s: size %u maxcolors %u\n", pp->pr_wchan, pp->pr_size, 1043 pp->pr_maxcolors); 1044 (*pr)("\talloc %p\n", pp->pr_alloc); 1045 (*pr)("\tminitems %u, minpages %u, maxpages %u, npages %u\n", 1046 pp->pr_minitems, pp->pr_minpages, pp->pr_maxpages, pp->pr_npages); 1047 (*pr)("\titemsperpage %u, nitems %u, nout %u, hardlimit %u\n", 1048 pp->pr_itemsperpage, pp->pr_nitems, pp->pr_nout, pp->pr_hardlimit); 1049 1050 (*pr)("\n\tnget %lu, nfail %lu, nput %lu\n", 1051 pp->pr_nget, pp->pr_nfail, pp->pr_nput); 1052 (*pr)("\tnpagealloc %lu, npagefree %lu, hiwat %u, nidle %lu\n", 1053 pp->pr_npagealloc, pp->pr_npagefree, pp->pr_hiwat, pp->pr_nidle); 1054 1055 if (print_pagelist == 0) 1056 return; 1057 1058 if ((ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL) 1059 (*pr)("\n\tempty page list:\n"); 1060 pool_print_pagelist(&pp->pr_emptypages, pr); 1061 if ((ph = TAILQ_FIRST(&pp->pr_fullpages)) != NULL) 1062 (*pr)("\n\tfull page list:\n"); 1063 pool_print_pagelist(&pp->pr_fullpages, pr); 1064 if ((ph = TAILQ_FIRST(&pp->pr_partpages)) != NULL) 1065 (*pr)("\n\tpartial-page list:\n"); 1066 pool_print_pagelist(&pp->pr_partpages, pr); 1067 1068 if (pp->pr_curpage == NULL) 1069 (*pr)("\tno current page\n"); 1070 else 1071 (*pr)("\tcurpage %p\n", pp->pr_curpage->ph_page); 1072 } 1073 1074 void 1075 db_show_all_pools(db_expr_t expr, int haddr, db_expr_t count, char *modif) 1076 { 1077 struct pool *pp; 1078 char maxp[16]; 1079 int ovflw; 1080 char mode; 1081 1082 mode = modif[0]; 1083 if (mode != '\0' && mode != 'a') { 1084 db_printf("usage: show all pools [/a]\n"); 1085 return; 1086 } 1087 1088 if (mode == '\0') 1089 db_printf("%-10s%4s%9s%5s%9s%6s%6s%6s%6s%6s%6s%5s\n", 1090 "Name", 1091 "Size", 1092 "Requests", 1093 "Fail", 1094 "Releases", 1095 "Pgreq", 1096 "Pgrel", 1097 "Npage", 1098 "Hiwat", 1099 "Minpg", 1100 "Maxpg", 1101 "Idle"); 1102 else 1103 db_printf("%-12s %18s %18s\n", 1104 "Name", "Address", "Allocator"); 1105 1106 SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist) { 1107 if (mode == 'a') { 1108 db_printf("%-12s %18p %18p\n", pp->pr_wchan, pp, 1109 pp->pr_alloc); 1110 continue; 1111 } 1112 1113 if (!pp->pr_nget) 1114 continue; 1115 1116 if (pp->pr_maxpages == UINT_MAX) 1117 snprintf(maxp, sizeof maxp, "inf"); 1118 else 1119 snprintf(maxp, sizeof maxp, "%u", pp->pr_maxpages); 1120 1121 #define PRWORD(ovflw, fmt, width, fixed, val) do { \ 1122 (ovflw) += db_printf((fmt), \ 1123 (width) - (fixed) - (ovflw) > 0 ? \ 1124 (width) - (fixed) - (ovflw) : 0, \ 1125 (val)) - (width); \ 1126 if ((ovflw) < 0) \ 1127 (ovflw) = 0; \ 1128 } while (/* CONSTCOND */0) 1129 1130 ovflw = 0; 1131 PRWORD(ovflw, "%-*s", 10, 0, pp->pr_wchan); 1132 PRWORD(ovflw, " %*u", 4, 1, pp->pr_size); 1133 PRWORD(ovflw, " %*lu", 9, 1, pp->pr_nget); 1134 PRWORD(ovflw, " %*lu", 5, 1, pp->pr_nfail); 1135 PRWORD(ovflw, " %*lu", 9, 1, pp->pr_nput); 1136 PRWORD(ovflw, " %*lu", 6, 1, pp->pr_npagealloc); 1137 PRWORD(ovflw, " %*lu", 6, 1, pp->pr_npagefree); 1138 PRWORD(ovflw, " %*d", 6, 1, pp->pr_npages); 1139 PRWORD(ovflw, " %*d", 6, 1, pp->pr_hiwat); 1140 PRWORD(ovflw, " %*d", 6, 1, pp->pr_minpages); 1141 PRWORD(ovflw, " %*s", 6, 1, maxp); 1142 PRWORD(ovflw, " %*lu\n", 5, 1, pp->pr_nidle); 1143 1144 pool_chk(pp); 1145 } 1146 } 1147 #endif /* DDB */ 1148 1149 #if defined(POOL_DEBUG) || defined(DDB) 1150 int 1151 pool_chk_page(struct pool *pp, struct pool_item_header *ph, int expected) 1152 { 1153 struct pool_item *pi; 1154 caddr_t page; 1155 int n; 1156 const char *label = pp->pr_wchan; 1157 1158 page = (caddr_t)((u_long)ph & pp->pr_pgmask); 1159 if (page != ph->ph_page && POOL_INPGHDR(pp)) { 1160 printf("%s: ", label); 1161 printf("pool(%p:%s): page inconsistency: page %p; " 1162 "at page head addr %p (p %p)\n", 1163 pp, pp->pr_wchan, ph->ph_page, ph, page); 1164 return 1; 1165 } 1166 1167 for (pi = XSIMPLEQ_FIRST(&ph->ph_itemlist), n = 0; 1168 pi != NULL; 1169 pi = XSIMPLEQ_NEXT(&ph->ph_itemlist, pi, pi_list), n++) { 1170 if ((caddr_t)pi < ph->ph_page || 1171 (caddr_t)pi >= ph->ph_page + pp->pr_pgsize) { 1172 printf("%s: ", label); 1173 printf("pool(%p:%s): page inconsistency: page %p;" 1174 " item ordinal %d; addr %p\n", pp, 1175 pp->pr_wchan, ph->ph_page, n, pi); 1176 return (1); 1177 } 1178 1179 if (pi->pi_magic != POOL_IMAGIC(ph, pi)) { 1180 printf("%s: ", label); 1181 printf("pool(%p:%s): free list modified: " 1182 "page %p; item ordinal %d; addr %p " 1183 "(p %p); offset 0x%x=0x%lx\n", 1184 pp, pp->pr_wchan, ph->ph_page, n, pi, page, 1185 0, pi->pi_magic); 1186 } 1187 1188 #ifdef DIAGNOSTIC 1189 if (POOL_PHPOISON(ph)) { 1190 size_t pidx; 1191 uint32_t pval; 1192 if (poison_check(pi + 1, pp->pr_size - sizeof(*pi), 1193 &pidx, &pval)) { 1194 int *ip = (int *)(pi + 1); 1195 printf("pool(%s): free list modified: " 1196 "page %p; item ordinal %d; addr %p " 1197 "(p %p); offset 0x%zx=0x%x\n", 1198 pp->pr_wchan, ph->ph_page, n, pi, 1199 page, pidx * sizeof(int), ip[pidx]); 1200 } 1201 } 1202 #endif /* DIAGNOSTIC */ 1203 } 1204 if (n + ph->ph_nmissing != pp->pr_itemsperpage) { 1205 printf("pool(%p:%s): page inconsistency: page %p;" 1206 " %d on list, %d missing, %d items per page\n", pp, 1207 pp->pr_wchan, ph->ph_page, n, ph->ph_nmissing, 1208 pp->pr_itemsperpage); 1209 return 1; 1210 } 1211 if (expected >= 0 && n != expected) { 1212 printf("pool(%p:%s): page inconsistency: page %p;" 1213 " %d on list, %d missing, %d expected\n", pp, 1214 pp->pr_wchan, ph->ph_page, n, ph->ph_nmissing, 1215 expected); 1216 return 1; 1217 } 1218 return 0; 1219 } 1220 1221 int 1222 pool_chk(struct pool *pp) 1223 { 1224 struct pool_item_header *ph; 1225 int r = 0; 1226 1227 TAILQ_FOREACH(ph, &pp->pr_emptypages, ph_pagelist) 1228 r += pool_chk_page(pp, ph, pp->pr_itemsperpage); 1229 TAILQ_FOREACH(ph, &pp->pr_fullpages, ph_pagelist) 1230 r += pool_chk_page(pp, ph, 0); 1231 TAILQ_FOREACH(ph, &pp->pr_partpages, ph_pagelist) 1232 r += pool_chk_page(pp, ph, -1); 1233 1234 return (r); 1235 } 1236 #endif /* defined(POOL_DEBUG) || defined(DDB) */ 1237 1238 #ifdef DDB 1239 void 1240 pool_walk(struct pool *pp, int full, 1241 int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2))), 1242 void (*func)(void *, int, int (*)(const char *, ...) 1243 __attribute__((__format__(__kprintf__,1,2))))) 1244 { 1245 struct pool_item_header *ph; 1246 struct pool_item *pi; 1247 caddr_t cp; 1248 int n; 1249 1250 TAILQ_FOREACH(ph, &pp->pr_fullpages, ph_pagelist) { 1251 cp = ph->ph_colored; 1252 n = ph->ph_nmissing; 1253 1254 while (n--) { 1255 func(cp, full, pr); 1256 cp += pp->pr_size; 1257 } 1258 } 1259 1260 TAILQ_FOREACH(ph, &pp->pr_partpages, ph_pagelist) { 1261 cp = ph->ph_colored; 1262 n = ph->ph_nmissing; 1263 1264 do { 1265 XSIMPLEQ_FOREACH(pi, &ph->ph_itemlist, pi_list) { 1266 if (cp == (caddr_t)pi) 1267 break; 1268 } 1269 if (cp != (caddr_t)pi) { 1270 func(cp, full, pr); 1271 n--; 1272 } 1273 1274 cp += pp->pr_size; 1275 } while (n > 0); 1276 } 1277 } 1278 #endif 1279 1280 /* 1281 * We have three different sysctls. 1282 * kern.pool.npools - the number of pools. 1283 * kern.pool.pool.<pool#> - the pool struct for the pool#. 1284 * kern.pool.name.<pool#> - the name for pool#. 1285 */ 1286 int 1287 sysctl_dopool(int *name, u_int namelen, char *oldp, size_t *oldlenp) 1288 { 1289 struct kinfo_pool pi; 1290 struct pool *pp; 1291 int rv = ENOENT; 1292 1293 switch (name[0]) { 1294 case KERN_POOL_NPOOLS: 1295 if (namelen != 1) 1296 return (ENOTDIR); 1297 return (sysctl_rdint(oldp, oldlenp, NULL, pool_count)); 1298 1299 case KERN_POOL_NAME: 1300 case KERN_POOL_POOL: 1301 break; 1302 default: 1303 return (EOPNOTSUPP); 1304 } 1305 1306 if (namelen != 2) 1307 return (ENOTDIR); 1308 1309 rw_enter_read(&pool_lock); 1310 1311 SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist) { 1312 if (name[1] == pp->pr_serial) 1313 break; 1314 } 1315 1316 if (pp == NULL) 1317 goto done; 1318 1319 switch (name[0]) { 1320 case KERN_POOL_NAME: 1321 rv = sysctl_rdstring(oldp, oldlenp, NULL, pp->pr_wchan); 1322 break; 1323 case KERN_POOL_POOL: 1324 memset(&pi, 0, sizeof(pi)); 1325 1326 if (pp->pr_ipl != -1) 1327 mtx_enter(&pp->pr_mtx); 1328 pi.pr_size = pp->pr_size; 1329 pi.pr_pgsize = pp->pr_pgsize; 1330 pi.pr_itemsperpage = pp->pr_itemsperpage; 1331 pi.pr_npages = pp->pr_npages; 1332 pi.pr_minpages = pp->pr_minpages; 1333 pi.pr_maxpages = pp->pr_maxpages; 1334 pi.pr_hardlimit = pp->pr_hardlimit; 1335 pi.pr_nout = pp->pr_nout; 1336 pi.pr_nitems = pp->pr_nitems; 1337 pi.pr_nget = pp->pr_nget; 1338 pi.pr_nput = pp->pr_nput; 1339 pi.pr_nfail = pp->pr_nfail; 1340 pi.pr_npagealloc = pp->pr_npagealloc; 1341 pi.pr_npagefree = pp->pr_npagefree; 1342 pi.pr_hiwat = pp->pr_hiwat; 1343 pi.pr_nidle = pp->pr_nidle; 1344 if (pp->pr_ipl != -1) 1345 mtx_leave(&pp->pr_mtx); 1346 1347 rv = sysctl_rdstruct(oldp, oldlenp, NULL, &pi, sizeof(pi)); 1348 break; 1349 } 1350 1351 done: 1352 rw_exit_read(&pool_lock); 1353 1354 return (rv); 1355 } 1356 1357 void 1358 pool_gc_sched(void *null) 1359 { 1360 task_add(systqmp, &pool_gc_task); 1361 } 1362 1363 void 1364 pool_gc_pages(void *null) 1365 { 1366 struct pool *pp; 1367 struct pool_item_header *ph, *freeph; 1368 int s; 1369 1370 rw_enter_read(&pool_lock); 1371 s = splvm(); /* XXX go to splvm until all pools _setipl properly */ 1372 SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist) { 1373 if (pp->pr_nidle <= pp->pr_minpages || /* guess */ 1374 !mtx_enter_try(&pp->pr_mtx)) /* try */ 1375 continue; 1376 1377 /* is it time to free a page? */ 1378 if (pp->pr_nidle > pp->pr_minpages && 1379 (ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL && 1380 (ticks - ph->ph_tick) > (hz * pool_wait_gc)) { 1381 freeph = ph; 1382 pool_p_remove(pp, freeph); 1383 } else 1384 freeph = NULL; 1385 1386 mtx_leave(&pp->pr_mtx); 1387 1388 if (freeph != NULL) 1389 pool_p_free(pp, freeph); 1390 } 1391 splx(s); 1392 rw_exit_read(&pool_lock); 1393 1394 timeout_add_sec(&pool_gc_tick, 1); 1395 } 1396 1397 /* 1398 * Pool backend allocators. 1399 */ 1400 1401 void * 1402 pool_allocator_alloc(struct pool *pp, int flags, int *slowdown) 1403 { 1404 void *v; 1405 1406 v = (*pp->pr_alloc->pa_alloc)(pp, flags, slowdown); 1407 1408 #ifdef DIAGNOSTIC 1409 if (v != NULL && POOL_INPGHDR(pp)) { 1410 vaddr_t addr = (vaddr_t)v; 1411 if ((addr & pp->pr_pgmask) != addr) { 1412 panic("%s: %s page address %p isnt aligned to %u", 1413 __func__, pp->pr_wchan, v, pp->pr_pgsize); 1414 } 1415 } 1416 #endif 1417 1418 return (v); 1419 } 1420 1421 void 1422 pool_allocator_free(struct pool *pp, void *v) 1423 { 1424 struct pool_allocator *pa = pp->pr_alloc; 1425 1426 (*pa->pa_free)(pp, v); 1427 } 1428 1429 void * 1430 pool_page_alloc(struct pool *pp, int flags, int *slowdown) 1431 { 1432 struct kmem_dyn_mode kd = KMEM_DYN_INITIALIZER; 1433 1434 kd.kd_waitok = ISSET(flags, PR_WAITOK); 1435 kd.kd_slowdown = slowdown; 1436 1437 return (km_alloc(pp->pr_pgsize, &kv_page, pp->pr_crange, &kd)); 1438 } 1439 1440 void 1441 pool_page_free(struct pool *pp, void *v) 1442 { 1443 km_free(v, pp->pr_pgsize, &kv_page, pp->pr_crange); 1444 } 1445 1446 void * 1447 pool_multi_alloc(struct pool *pp, int flags, int *slowdown) 1448 { 1449 struct kmem_va_mode kv = kv_intrsafe; 1450 struct kmem_dyn_mode kd = KMEM_DYN_INITIALIZER; 1451 void *v; 1452 int s; 1453 1454 if (POOL_INPGHDR(pp)) 1455 kv.kv_align = pp->pr_pgsize; 1456 1457 kd.kd_waitok = ISSET(flags, PR_WAITOK); 1458 kd.kd_slowdown = slowdown; 1459 1460 s = splvm(); 1461 v = km_alloc(pp->pr_pgsize, &kv, pp->pr_crange, &kd); 1462 splx(s); 1463 1464 return (v); 1465 } 1466 1467 void 1468 pool_multi_free(struct pool *pp, void *v) 1469 { 1470 struct kmem_va_mode kv = kv_intrsafe; 1471 int s; 1472 1473 if (POOL_INPGHDR(pp)) 1474 kv.kv_align = pp->pr_pgsize; 1475 1476 s = splvm(); 1477 km_free(v, pp->pr_pgsize, &kv, pp->pr_crange); 1478 splx(s); 1479 } 1480 1481 void * 1482 pool_multi_alloc_ni(struct pool *pp, int flags, int *slowdown) 1483 { 1484 struct kmem_va_mode kv = kv_any; 1485 struct kmem_dyn_mode kd = KMEM_DYN_INITIALIZER; 1486 void *v; 1487 1488 if (POOL_INPGHDR(pp)) 1489 kv.kv_align = pp->pr_pgsize; 1490 1491 kd.kd_waitok = ISSET(flags, PR_WAITOK); 1492 kd.kd_slowdown = slowdown; 1493 1494 KERNEL_LOCK(); 1495 v = km_alloc(pp->pr_pgsize, &kv, pp->pr_crange, &kd); 1496 KERNEL_UNLOCK(); 1497 1498 return (v); 1499 } 1500 1501 void 1502 pool_multi_free_ni(struct pool *pp, void *v) 1503 { 1504 struct kmem_va_mode kv = kv_any; 1505 1506 if (POOL_INPGHDR(pp)) 1507 kv.kv_align = pp->pr_pgsize; 1508 1509 KERNEL_LOCK(); 1510 km_free(v, pp->pr_pgsize, &kv, pp->pr_crange); 1511 KERNEL_UNLOCK(); 1512 } 1513