1 /* $OpenBSD: subr_pool.c,v 1.197 2016/09/15 01:24:08 dlg Exp $ */ 2 /* $NetBSD: subr_pool.c,v 1.61 2001/09/26 07:14:56 chs Exp $ */ 3 4 /*- 5 * Copyright (c) 1997, 1999, 2000 The NetBSD Foundation, Inc. 6 * All rights reserved. 7 * 8 * This code is derived from software contributed to The NetBSD Foundation 9 * by Paul Kranenburg; by Jason R. Thorpe of the Numerical Aerospace 10 * Simulation Facility, NASA Ames Research Center. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 23 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 24 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 25 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31 * POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include <sys/param.h> 35 #include <sys/systm.h> 36 #include <sys/errno.h> 37 #include <sys/kernel.h> 38 #include <sys/malloc.h> 39 #include <sys/pool.h> 40 #include <sys/syslog.h> 41 #include <sys/rwlock.h> 42 #include <sys/sysctl.h> 43 #include <sys/task.h> 44 #include <sys/timeout.h> 45 46 #include <uvm/uvm_extern.h> 47 48 /* 49 * Pool resource management utility. 50 * 51 * Memory is allocated in pages which are split into pieces according to 52 * the pool item size. Each page is kept on one of three lists in the 53 * pool structure: `pr_emptypages', `pr_fullpages' and `pr_partpages', 54 * for empty, full and partially-full pages respectively. The individual 55 * pool items are on a linked list headed by `ph_itemlist' in each page 56 * header. The memory for building the page list is either taken from 57 * the allocated pages themselves (for small pool items) or taken from 58 * an internal pool of page headers (`phpool'). 59 */ 60 61 /* List of all pools */ 62 SIMPLEQ_HEAD(,pool) pool_head = SIMPLEQ_HEAD_INITIALIZER(pool_head); 63 64 /* 65 * Every pool gets a unique serial number assigned to it. If this counter 66 * wraps, we're screwed, but we shouldn't create so many pools anyway. 67 */ 68 unsigned int pool_serial; 69 unsigned int pool_count; 70 71 /* Lock the previous variables making up the global pool state */ 72 struct rwlock pool_lock = RWLOCK_INITIALIZER("pools"); 73 74 /* Private pool for page header structures */ 75 struct pool phpool; 76 77 struct pool_item_header { 78 /* Page headers */ 79 TAILQ_ENTRY(pool_item_header) 80 ph_pagelist; /* pool page list */ 81 XSIMPLEQ_HEAD(,pool_item) ph_itemlist; /* chunk list for this page */ 82 RBT_ENTRY(pool_item_header) 83 ph_node; /* Off-page page headers */ 84 int ph_nmissing; /* # of chunks in use */ 85 caddr_t ph_page; /* this page's address */ 86 caddr_t ph_colored; /* page's colored address */ 87 u_long ph_magic; 88 int ph_tick; 89 }; 90 #define POOL_MAGICBIT (1 << 3) /* keep away from perturbed low bits */ 91 #define POOL_PHPOISON(ph) ISSET((ph)->ph_magic, POOL_MAGICBIT) 92 93 struct pool_item { 94 u_long pi_magic; 95 XSIMPLEQ_ENTRY(pool_item) pi_list; 96 }; 97 #define POOL_IMAGIC(ph, pi) ((u_long)(pi) ^ (ph)->ph_magic) 98 99 #ifdef POOL_DEBUG 100 int pool_debug = 1; 101 #else 102 int pool_debug = 0; 103 #endif 104 105 #define POOL_INPGHDR(pp) ((pp)->pr_phoffset != 0) 106 107 struct pool_item_header * 108 pool_p_alloc(struct pool *, int, int *); 109 void pool_p_insert(struct pool *, struct pool_item_header *); 110 void pool_p_remove(struct pool *, struct pool_item_header *); 111 void pool_p_free(struct pool *, struct pool_item_header *); 112 113 void pool_update_curpage(struct pool *); 114 void *pool_do_get(struct pool *, int, int *); 115 int pool_chk_page(struct pool *, struct pool_item_header *, int); 116 int pool_chk(struct pool *); 117 void pool_get_done(void *, void *); 118 void pool_runqueue(struct pool *, int); 119 120 void *pool_allocator_alloc(struct pool *, int, int *); 121 void pool_allocator_free(struct pool *, void *); 122 123 /* 124 * The default pool allocator. 125 */ 126 void *pool_page_alloc(struct pool *, int, int *); 127 void pool_page_free(struct pool *, void *); 128 129 /* 130 * safe for interrupts; this is the default allocator 131 */ 132 struct pool_allocator pool_allocator_single = { 133 pool_page_alloc, 134 pool_page_free 135 }; 136 137 void *pool_multi_alloc(struct pool *, int, int *); 138 void pool_multi_free(struct pool *, void *); 139 140 struct pool_allocator pool_allocator_multi = { 141 pool_multi_alloc, 142 pool_multi_free 143 }; 144 145 void *pool_multi_alloc_ni(struct pool *, int, int *); 146 void pool_multi_free_ni(struct pool *, void *); 147 148 struct pool_allocator pool_allocator_multi_ni = { 149 pool_multi_alloc_ni, 150 pool_multi_free_ni 151 }; 152 153 #ifdef DDB 154 void pool_print_pagelist(struct pool_pagelist *, int (*)(const char *, ...) 155 __attribute__((__format__(__kprintf__,1,2)))); 156 void pool_print1(struct pool *, const char *, int (*)(const char *, ...) 157 __attribute__((__format__(__kprintf__,1,2)))); 158 #endif 159 160 /* stale page garbage collectors */ 161 void pool_gc_sched(void *); 162 struct timeout pool_gc_tick = TIMEOUT_INITIALIZER(pool_gc_sched, NULL); 163 void pool_gc_pages(void *); 164 struct task pool_gc_task = TASK_INITIALIZER(pool_gc_pages, NULL); 165 int pool_wait_free = 1; 166 int pool_wait_gc = 8; 167 168 RBT_PROTOTYPE(phtree, pool_item_header, ph_node, phtree_compare); 169 170 static inline int 171 phtree_compare(const struct pool_item_header *a, 172 const struct pool_item_header *b) 173 { 174 vaddr_t va = (vaddr_t)a->ph_page; 175 vaddr_t vb = (vaddr_t)b->ph_page; 176 177 /* the compares in this order are important for the NFIND to work */ 178 if (vb < va) 179 return (-1); 180 if (vb > va) 181 return (1); 182 183 return (0); 184 } 185 186 RBT_GENERATE(phtree, pool_item_header, ph_node, phtree_compare); 187 188 /* 189 * Return the pool page header based on page address. 190 */ 191 static inline struct pool_item_header * 192 pr_find_pagehead(struct pool *pp, void *v) 193 { 194 struct pool_item_header *ph, key; 195 196 if (POOL_INPGHDR(pp)) { 197 caddr_t page; 198 199 page = (caddr_t)((vaddr_t)v & pp->pr_pgmask); 200 201 return ((struct pool_item_header *)(page + pp->pr_phoffset)); 202 } 203 204 key.ph_page = v; 205 ph = RBT_NFIND(phtree, &pp->pr_phtree, &key); 206 if (ph == NULL) 207 panic("%s: %s: page header missing", __func__, pp->pr_wchan); 208 209 KASSERT(ph->ph_page <= (caddr_t)v); 210 if (ph->ph_page + pp->pr_pgsize <= (caddr_t)v) 211 panic("%s: %s: incorrect page", __func__, pp->pr_wchan); 212 213 return (ph); 214 } 215 216 /* 217 * Initialize the given pool resource structure. 218 * 219 * We export this routine to allow other kernel parts to declare 220 * static pools that must be initialized before malloc() is available. 221 */ 222 void 223 pool_init(struct pool *pp, size_t size, u_int align, u_int ioff, int flags, 224 const char *wchan, struct pool_allocator *palloc) 225 { 226 int off = 0, space; 227 unsigned int pgsize = PAGE_SIZE, items; 228 #ifdef DIAGNOSTIC 229 struct pool *iter; 230 KASSERT(ioff == 0); 231 #endif 232 233 if (align == 0) 234 align = ALIGN(1); 235 236 if (size < sizeof(struct pool_item)) 237 size = sizeof(struct pool_item); 238 239 size = roundup(size, align); 240 241 if (palloc == NULL) { 242 while (size * 8 > pgsize) 243 pgsize <<= 1; 244 245 if (pgsize > PAGE_SIZE) { 246 palloc = ISSET(flags, PR_WAITOK) ? 247 &pool_allocator_multi_ni : &pool_allocator_multi; 248 } else 249 palloc = &pool_allocator_single; 250 } else 251 pgsize = palloc->pa_pagesz ? palloc->pa_pagesz : PAGE_SIZE; 252 253 items = pgsize / size; 254 255 /* 256 * Decide whether to put the page header off page to avoid 257 * wasting too large a part of the page. Off-page page headers 258 * go into an RB tree, so we can match a returned item with 259 * its header based on the page address. 260 */ 261 if (pgsize - (size * items) > sizeof(struct pool_item_header)) { 262 off = pgsize - sizeof(struct pool_item_header); 263 } else if (sizeof(struct pool_item_header) * 2 >= size) { 264 off = pgsize - sizeof(struct pool_item_header); 265 items = off / size; 266 } 267 268 KASSERT(items > 0); 269 270 /* 271 * Initialize the pool structure. 272 */ 273 memset(pp, 0, sizeof(*pp)); 274 TAILQ_INIT(&pp->pr_emptypages); 275 TAILQ_INIT(&pp->pr_fullpages); 276 TAILQ_INIT(&pp->pr_partpages); 277 pp->pr_curpage = NULL; 278 pp->pr_npages = 0; 279 pp->pr_minitems = 0; 280 pp->pr_minpages = 0; 281 pp->pr_maxpages = 8; 282 pp->pr_size = size; 283 pp->pr_pgsize = pgsize; 284 pp->pr_pgmask = ~0UL ^ (pgsize - 1); 285 pp->pr_phoffset = off; 286 pp->pr_itemsperpage = items; 287 pp->pr_wchan = wchan; 288 pp->pr_alloc = palloc; 289 pp->pr_nitems = 0; 290 pp->pr_nout = 0; 291 pp->pr_hardlimit = UINT_MAX; 292 pp->pr_hardlimit_warning = NULL; 293 pp->pr_hardlimit_ratecap.tv_sec = 0; 294 pp->pr_hardlimit_ratecap.tv_usec = 0; 295 pp->pr_hardlimit_warning_last.tv_sec = 0; 296 pp->pr_hardlimit_warning_last.tv_usec = 0; 297 RBT_INIT(phtree, &pp->pr_phtree); 298 299 /* 300 * Use the space between the chunks and the page header 301 * for cache coloring. 302 */ 303 space = POOL_INPGHDR(pp) ? pp->pr_phoffset : pp->pr_pgsize; 304 space -= pp->pr_itemsperpage * pp->pr_size; 305 pp->pr_align = align; 306 pp->pr_maxcolors = (space / align) + 1; 307 308 pp->pr_nget = 0; 309 pp->pr_nfail = 0; 310 pp->pr_nput = 0; 311 pp->pr_npagealloc = 0; 312 pp->pr_npagefree = 0; 313 pp->pr_hiwat = 0; 314 pp->pr_nidle = 0; 315 316 pp->pr_ipl = -1; 317 mtx_init(&pp->pr_mtx, IPL_NONE); 318 mtx_init(&pp->pr_requests_mtx, IPL_NONE); 319 TAILQ_INIT(&pp->pr_requests); 320 321 if (phpool.pr_size == 0) { 322 pool_init(&phpool, sizeof(struct pool_item_header), 0, 0, 323 0, "phpool", NULL); 324 pool_setipl(&phpool, IPL_HIGH); 325 326 /* make sure phpool wont "recurse" */ 327 KASSERT(POOL_INPGHDR(&phpool)); 328 } 329 330 /* pglistalloc/constraint parameters */ 331 pp->pr_crange = &kp_dirty; 332 333 /* Insert this into the list of all pools. */ 334 rw_enter_write(&pool_lock); 335 #ifdef DIAGNOSTIC 336 SIMPLEQ_FOREACH(iter, &pool_head, pr_poollist) { 337 if (iter == pp) 338 panic("%s: pool %s already on list", __func__, wchan); 339 } 340 #endif 341 342 pp->pr_serial = ++pool_serial; 343 if (pool_serial == 0) 344 panic("%s: too much uptime", __func__); 345 346 SIMPLEQ_INSERT_HEAD(&pool_head, pp, pr_poollist); 347 pool_count++; 348 rw_exit_write(&pool_lock); 349 } 350 351 void 352 pool_setipl(struct pool *pp, int ipl) 353 { 354 pp->pr_ipl = ipl; 355 mtx_init(&pp->pr_mtx, ipl); 356 mtx_init(&pp->pr_requests_mtx, ipl); 357 } 358 359 /* 360 * Decommission a pool resource. 361 */ 362 void 363 pool_destroy(struct pool *pp) 364 { 365 struct pool_item_header *ph; 366 struct pool *prev, *iter; 367 368 #ifdef DIAGNOSTIC 369 if (pp->pr_nout != 0) 370 panic("%s: pool busy: still out: %u", __func__, pp->pr_nout); 371 #endif 372 373 /* Remove from global pool list */ 374 rw_enter_write(&pool_lock); 375 pool_count--; 376 if (pp == SIMPLEQ_FIRST(&pool_head)) 377 SIMPLEQ_REMOVE_HEAD(&pool_head, pr_poollist); 378 else { 379 prev = SIMPLEQ_FIRST(&pool_head); 380 SIMPLEQ_FOREACH(iter, &pool_head, pr_poollist) { 381 if (iter == pp) { 382 SIMPLEQ_REMOVE_AFTER(&pool_head, prev, 383 pr_poollist); 384 break; 385 } 386 prev = iter; 387 } 388 } 389 rw_exit_write(&pool_lock); 390 391 /* Remove all pages */ 392 while ((ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL) { 393 mtx_enter(&pp->pr_mtx); 394 pool_p_remove(pp, ph); 395 mtx_leave(&pp->pr_mtx); 396 pool_p_free(pp, ph); 397 } 398 KASSERT(TAILQ_EMPTY(&pp->pr_fullpages)); 399 KASSERT(TAILQ_EMPTY(&pp->pr_partpages)); 400 } 401 402 void 403 pool_request_init(struct pool_request *pr, 404 void (*handler)(void *, void *), void *cookie) 405 { 406 pr->pr_handler = handler; 407 pr->pr_cookie = cookie; 408 pr->pr_item = NULL; 409 } 410 411 void 412 pool_request(struct pool *pp, struct pool_request *pr) 413 { 414 mtx_enter(&pp->pr_requests_mtx); 415 TAILQ_INSERT_TAIL(&pp->pr_requests, pr, pr_entry); 416 pool_runqueue(pp, PR_NOWAIT); 417 mtx_leave(&pp->pr_requests_mtx); 418 } 419 420 struct pool_get_memory { 421 struct mutex mtx; 422 void * volatile v; 423 }; 424 425 /* 426 * Grab an item from the pool. 427 */ 428 void * 429 pool_get(struct pool *pp, int flags) 430 { 431 void *v = NULL; 432 int slowdown = 0; 433 434 KASSERT(flags & (PR_WAITOK | PR_NOWAIT)); 435 436 437 mtx_enter(&pp->pr_mtx); 438 if (pp->pr_nout >= pp->pr_hardlimit) { 439 if (ISSET(flags, PR_NOWAIT|PR_LIMITFAIL)) 440 goto fail; 441 } else if ((v = pool_do_get(pp, flags, &slowdown)) == NULL) { 442 if (ISSET(flags, PR_NOWAIT)) 443 goto fail; 444 } 445 mtx_leave(&pp->pr_mtx); 446 447 if (slowdown && ISSET(flags, PR_WAITOK)) 448 yield(); 449 450 if (v == NULL) { 451 struct pool_get_memory mem = { 452 MUTEX_INITIALIZER((pp->pr_ipl == -1) ? 453 IPL_NONE : pp->pr_ipl), NULL }; 454 struct pool_request pr; 455 456 pool_request_init(&pr, pool_get_done, &mem); 457 pool_request(pp, &pr); 458 459 mtx_enter(&mem.mtx); 460 while (mem.v == NULL) 461 msleep(&mem, &mem.mtx, PSWP, pp->pr_wchan, 0); 462 mtx_leave(&mem.mtx); 463 464 v = mem.v; 465 } 466 467 if (ISSET(flags, PR_ZERO)) 468 memset(v, 0, pp->pr_size); 469 470 return (v); 471 472 fail: 473 pp->pr_nfail++; 474 mtx_leave(&pp->pr_mtx); 475 return (NULL); 476 } 477 478 void 479 pool_get_done(void *xmem, void *v) 480 { 481 struct pool_get_memory *mem = xmem; 482 483 mtx_enter(&mem->mtx); 484 mem->v = v; 485 mtx_leave(&mem->mtx); 486 487 wakeup_one(mem); 488 } 489 490 void 491 pool_runqueue(struct pool *pp, int flags) 492 { 493 struct pool_requests prl = TAILQ_HEAD_INITIALIZER(prl); 494 struct pool_request *pr; 495 496 MUTEX_ASSERT_UNLOCKED(&pp->pr_mtx); 497 MUTEX_ASSERT_LOCKED(&pp->pr_requests_mtx); 498 499 if (pp->pr_requesting++) 500 return; 501 502 do { 503 pp->pr_requesting = 1; 504 505 /* no TAILQ_JOIN? :( */ 506 while ((pr = TAILQ_FIRST(&pp->pr_requests)) != NULL) { 507 TAILQ_REMOVE(&pp->pr_requests, pr, pr_entry); 508 TAILQ_INSERT_TAIL(&prl, pr, pr_entry); 509 } 510 if (TAILQ_EMPTY(&prl)) 511 continue; 512 513 mtx_leave(&pp->pr_requests_mtx); 514 515 mtx_enter(&pp->pr_mtx); 516 pr = TAILQ_FIRST(&prl); 517 while (pr != NULL) { 518 int slowdown = 0; 519 520 if (pp->pr_nout >= pp->pr_hardlimit) 521 break; 522 523 pr->pr_item = pool_do_get(pp, flags, &slowdown); 524 if (pr->pr_item == NULL) /* || slowdown ? */ 525 break; 526 527 pr = TAILQ_NEXT(pr, pr_entry); 528 } 529 mtx_leave(&pp->pr_mtx); 530 531 while ((pr = TAILQ_FIRST(&prl)) != NULL && 532 pr->pr_item != NULL) { 533 TAILQ_REMOVE(&prl, pr, pr_entry); 534 (*pr->pr_handler)(pr->pr_cookie, pr->pr_item); 535 } 536 537 mtx_enter(&pp->pr_requests_mtx); 538 } while (--pp->pr_requesting); 539 540 /* no TAILQ_JOIN :( */ 541 while ((pr = TAILQ_FIRST(&prl)) != NULL) { 542 TAILQ_REMOVE(&prl, pr, pr_entry); 543 TAILQ_INSERT_TAIL(&pp->pr_requests, pr, pr_entry); 544 } 545 } 546 547 void * 548 pool_do_get(struct pool *pp, int flags, int *slowdown) 549 { 550 struct pool_item *pi; 551 struct pool_item_header *ph; 552 553 MUTEX_ASSERT_LOCKED(&pp->pr_mtx); 554 555 if (pp->pr_ipl != -1) 556 splassert(pp->pr_ipl); 557 558 /* 559 * Account for this item now to avoid races if we need to give up 560 * pr_mtx to allocate a page. 561 */ 562 pp->pr_nout++; 563 564 if (pp->pr_curpage == NULL) { 565 mtx_leave(&pp->pr_mtx); 566 ph = pool_p_alloc(pp, flags, slowdown); 567 mtx_enter(&pp->pr_mtx); 568 569 if (ph == NULL) { 570 pp->pr_nout--; 571 return (NULL); 572 } 573 574 pool_p_insert(pp, ph); 575 } 576 577 ph = pp->pr_curpage; 578 pi = XSIMPLEQ_FIRST(&ph->ph_itemlist); 579 if (__predict_false(pi == NULL)) 580 panic("%s: %s: page empty", __func__, pp->pr_wchan); 581 582 if (__predict_false(pi->pi_magic != POOL_IMAGIC(ph, pi))) { 583 panic("%s: %s free list modified: " 584 "page %p; item addr %p; offset 0x%x=0x%lx != 0x%lx", 585 __func__, pp->pr_wchan, ph->ph_page, pi, 586 0, pi->pi_magic, POOL_IMAGIC(ph, pi)); 587 } 588 589 XSIMPLEQ_REMOVE_HEAD(&ph->ph_itemlist, pi_list); 590 591 #ifdef DIAGNOSTIC 592 if (pool_debug && POOL_PHPOISON(ph)) { 593 size_t pidx; 594 uint32_t pval; 595 if (poison_check(pi + 1, pp->pr_size - sizeof(*pi), 596 &pidx, &pval)) { 597 int *ip = (int *)(pi + 1); 598 panic("%s: %s free list modified: " 599 "page %p; item addr %p; offset 0x%zx=0x%x", 600 __func__, pp->pr_wchan, ph->ph_page, pi, 601 pidx * sizeof(int), ip[pidx]); 602 } 603 } 604 #endif /* DIAGNOSTIC */ 605 606 if (ph->ph_nmissing++ == 0) { 607 /* 608 * This page was previously empty. Move it to the list of 609 * partially-full pages. This page is already curpage. 610 */ 611 TAILQ_REMOVE(&pp->pr_emptypages, ph, ph_pagelist); 612 TAILQ_INSERT_TAIL(&pp->pr_partpages, ph, ph_pagelist); 613 614 pp->pr_nidle--; 615 } 616 617 if (ph->ph_nmissing == pp->pr_itemsperpage) { 618 /* 619 * This page is now full. Move it to the full list 620 * and select a new current page. 621 */ 622 TAILQ_REMOVE(&pp->pr_partpages, ph, ph_pagelist); 623 TAILQ_INSERT_TAIL(&pp->pr_fullpages, ph, ph_pagelist); 624 pool_update_curpage(pp); 625 } 626 627 pp->pr_nget++; 628 629 return (pi); 630 } 631 632 /* 633 * Return resource to the pool. 634 */ 635 void 636 pool_put(struct pool *pp, void *v) 637 { 638 struct pool_item *pi = v; 639 struct pool_item_header *ph, *freeph = NULL; 640 641 #ifdef DIAGNOSTIC 642 if (v == NULL) 643 panic("%s: NULL item", __func__); 644 #endif 645 646 mtx_enter(&pp->pr_mtx); 647 648 if (pp->pr_ipl != -1) 649 splassert(pp->pr_ipl); 650 651 ph = pr_find_pagehead(pp, v); 652 653 #ifdef DIAGNOSTIC 654 if (pool_debug) { 655 struct pool_item *qi; 656 XSIMPLEQ_FOREACH(qi, &ph->ph_itemlist, pi_list) { 657 if (pi == qi) { 658 panic("%s: %s: double pool_put: %p", __func__, 659 pp->pr_wchan, pi); 660 } 661 } 662 } 663 #endif /* DIAGNOSTIC */ 664 665 pi->pi_magic = POOL_IMAGIC(ph, pi); 666 XSIMPLEQ_INSERT_HEAD(&ph->ph_itemlist, pi, pi_list); 667 #ifdef DIAGNOSTIC 668 if (POOL_PHPOISON(ph)) 669 poison_mem(pi + 1, pp->pr_size - sizeof(*pi)); 670 #endif /* DIAGNOSTIC */ 671 672 if (ph->ph_nmissing-- == pp->pr_itemsperpage) { 673 /* 674 * The page was previously completely full, move it to the 675 * partially-full list. 676 */ 677 TAILQ_REMOVE(&pp->pr_fullpages, ph, ph_pagelist); 678 TAILQ_INSERT_TAIL(&pp->pr_partpages, ph, ph_pagelist); 679 } 680 681 if (ph->ph_nmissing == 0) { 682 /* 683 * The page is now empty, so move it to the empty page list. 684 */ 685 pp->pr_nidle++; 686 687 ph->ph_tick = ticks; 688 TAILQ_REMOVE(&pp->pr_partpages, ph, ph_pagelist); 689 TAILQ_INSERT_TAIL(&pp->pr_emptypages, ph, ph_pagelist); 690 pool_update_curpage(pp); 691 } 692 693 pp->pr_nout--; 694 pp->pr_nput++; 695 696 /* is it time to free a page? */ 697 if (pp->pr_nidle > pp->pr_maxpages && 698 (ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL && 699 (ticks - ph->ph_tick) > (hz * pool_wait_free)) { 700 freeph = ph; 701 pool_p_remove(pp, freeph); 702 } 703 mtx_leave(&pp->pr_mtx); 704 705 if (freeph != NULL) 706 pool_p_free(pp, freeph); 707 708 if (!TAILQ_EMPTY(&pp->pr_requests)) { 709 mtx_enter(&pp->pr_requests_mtx); 710 pool_runqueue(pp, PR_NOWAIT); 711 mtx_leave(&pp->pr_requests_mtx); 712 } 713 } 714 715 /* 716 * Add N items to the pool. 717 */ 718 int 719 pool_prime(struct pool *pp, int n) 720 { 721 struct pool_pagelist pl = TAILQ_HEAD_INITIALIZER(pl); 722 struct pool_item_header *ph; 723 int newpages; 724 725 newpages = roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 726 727 while (newpages-- > 0) { 728 int slowdown = 0; 729 730 ph = pool_p_alloc(pp, PR_NOWAIT, &slowdown); 731 if (ph == NULL) /* or slowdown? */ 732 break; 733 734 TAILQ_INSERT_TAIL(&pl, ph, ph_pagelist); 735 } 736 737 mtx_enter(&pp->pr_mtx); 738 while ((ph = TAILQ_FIRST(&pl)) != NULL) { 739 TAILQ_REMOVE(&pl, ph, ph_pagelist); 740 pool_p_insert(pp, ph); 741 } 742 mtx_leave(&pp->pr_mtx); 743 744 return (0); 745 } 746 747 struct pool_item_header * 748 pool_p_alloc(struct pool *pp, int flags, int *slowdown) 749 { 750 struct pool_item_header *ph; 751 struct pool_item *pi; 752 caddr_t addr; 753 int n; 754 755 MUTEX_ASSERT_UNLOCKED(&pp->pr_mtx); 756 KASSERT(pp->pr_size >= sizeof(*pi)); 757 758 addr = pool_allocator_alloc(pp, flags, slowdown); 759 if (addr == NULL) 760 return (NULL); 761 762 if (POOL_INPGHDR(pp)) 763 ph = (struct pool_item_header *)(addr + pp->pr_phoffset); 764 else { 765 ph = pool_get(&phpool, flags); 766 if (ph == NULL) { 767 pool_allocator_free(pp, addr); 768 return (NULL); 769 } 770 } 771 772 XSIMPLEQ_INIT(&ph->ph_itemlist); 773 ph->ph_page = addr; 774 addr += pp->pr_align * (pp->pr_npagealloc % pp->pr_maxcolors); 775 ph->ph_colored = addr; 776 ph->ph_nmissing = 0; 777 arc4random_buf(&ph->ph_magic, sizeof(ph->ph_magic)); 778 #ifdef DIAGNOSTIC 779 /* use a bit in ph_magic to record if we poison page items */ 780 if (pool_debug) 781 SET(ph->ph_magic, POOL_MAGICBIT); 782 else 783 CLR(ph->ph_magic, POOL_MAGICBIT); 784 #endif /* DIAGNOSTIC */ 785 786 n = pp->pr_itemsperpage; 787 while (n--) { 788 pi = (struct pool_item *)addr; 789 pi->pi_magic = POOL_IMAGIC(ph, pi); 790 XSIMPLEQ_INSERT_TAIL(&ph->ph_itemlist, pi, pi_list); 791 792 #ifdef DIAGNOSTIC 793 if (POOL_PHPOISON(ph)) 794 poison_mem(pi + 1, pp->pr_size - sizeof(*pi)); 795 #endif /* DIAGNOSTIC */ 796 797 addr += pp->pr_size; 798 } 799 800 return (ph); 801 } 802 803 void 804 pool_p_free(struct pool *pp, struct pool_item_header *ph) 805 { 806 struct pool_item *pi; 807 808 MUTEX_ASSERT_UNLOCKED(&pp->pr_mtx); 809 KASSERT(ph->ph_nmissing == 0); 810 811 XSIMPLEQ_FOREACH(pi, &ph->ph_itemlist, pi_list) { 812 if (__predict_false(pi->pi_magic != POOL_IMAGIC(ph, pi))) { 813 panic("%s: %s free list modified: " 814 "page %p; item addr %p; offset 0x%x=0x%lx", 815 __func__, pp->pr_wchan, ph->ph_page, pi, 816 0, pi->pi_magic); 817 } 818 819 #ifdef DIAGNOSTIC 820 if (POOL_PHPOISON(ph)) { 821 size_t pidx; 822 uint32_t pval; 823 if (poison_check(pi + 1, pp->pr_size - sizeof(*pi), 824 &pidx, &pval)) { 825 int *ip = (int *)(pi + 1); 826 panic("%s: %s free list modified: " 827 "page %p; item addr %p; offset 0x%zx=0x%x", 828 __func__, pp->pr_wchan, ph->ph_page, pi, 829 pidx * sizeof(int), ip[pidx]); 830 } 831 } 832 #endif 833 } 834 835 pool_allocator_free(pp, ph->ph_page); 836 837 if (!POOL_INPGHDR(pp)) 838 pool_put(&phpool, ph); 839 } 840 841 void 842 pool_p_insert(struct pool *pp, struct pool_item_header *ph) 843 { 844 MUTEX_ASSERT_LOCKED(&pp->pr_mtx); 845 846 /* If the pool was depleted, point at the new page */ 847 if (pp->pr_curpage == NULL) 848 pp->pr_curpage = ph; 849 850 TAILQ_INSERT_TAIL(&pp->pr_emptypages, ph, ph_pagelist); 851 if (!POOL_INPGHDR(pp)) 852 RBT_INSERT(phtree, &pp->pr_phtree, ph); 853 854 pp->pr_nitems += pp->pr_itemsperpage; 855 pp->pr_nidle++; 856 857 pp->pr_npagealloc++; 858 if (++pp->pr_npages > pp->pr_hiwat) 859 pp->pr_hiwat = pp->pr_npages; 860 } 861 862 void 863 pool_p_remove(struct pool *pp, struct pool_item_header *ph) 864 { 865 MUTEX_ASSERT_LOCKED(&pp->pr_mtx); 866 867 pp->pr_npagefree++; 868 pp->pr_npages--; 869 pp->pr_nidle--; 870 pp->pr_nitems -= pp->pr_itemsperpage; 871 872 if (!POOL_INPGHDR(pp)) 873 RBT_REMOVE(phtree, &pp->pr_phtree, ph); 874 TAILQ_REMOVE(&pp->pr_emptypages, ph, ph_pagelist); 875 876 pool_update_curpage(pp); 877 } 878 879 void 880 pool_update_curpage(struct pool *pp) 881 { 882 pp->pr_curpage = TAILQ_LAST(&pp->pr_partpages, pool_pagelist); 883 if (pp->pr_curpage == NULL) { 884 pp->pr_curpage = TAILQ_LAST(&pp->pr_emptypages, pool_pagelist); 885 } 886 } 887 888 void 889 pool_setlowat(struct pool *pp, int n) 890 { 891 int prime = 0; 892 893 mtx_enter(&pp->pr_mtx); 894 pp->pr_minitems = n; 895 pp->pr_minpages = (n == 0) 896 ? 0 897 : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 898 899 if (pp->pr_nitems < n) 900 prime = n - pp->pr_nitems; 901 mtx_leave(&pp->pr_mtx); 902 903 if (prime > 0) 904 pool_prime(pp, prime); 905 } 906 907 void 908 pool_sethiwat(struct pool *pp, int n) 909 { 910 pp->pr_maxpages = (n == 0) 911 ? 0 912 : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 913 } 914 915 int 916 pool_sethardlimit(struct pool *pp, u_int n, const char *warnmsg, int ratecap) 917 { 918 int error = 0; 919 920 if (n < pp->pr_nout) { 921 error = EINVAL; 922 goto done; 923 } 924 925 pp->pr_hardlimit = n; 926 pp->pr_hardlimit_warning = warnmsg; 927 pp->pr_hardlimit_ratecap.tv_sec = ratecap; 928 pp->pr_hardlimit_warning_last.tv_sec = 0; 929 pp->pr_hardlimit_warning_last.tv_usec = 0; 930 931 done: 932 return (error); 933 } 934 935 void 936 pool_set_constraints(struct pool *pp, const struct kmem_pa_mode *mode) 937 { 938 pp->pr_crange = mode; 939 } 940 941 /* 942 * Release all complete pages that have not been used recently. 943 * 944 * Returns non-zero if any pages have been reclaimed. 945 */ 946 int 947 pool_reclaim(struct pool *pp) 948 { 949 struct pool_item_header *ph, *phnext; 950 struct pool_pagelist pl = TAILQ_HEAD_INITIALIZER(pl); 951 952 mtx_enter(&pp->pr_mtx); 953 for (ph = TAILQ_FIRST(&pp->pr_emptypages); ph != NULL; ph = phnext) { 954 phnext = TAILQ_NEXT(ph, ph_pagelist); 955 956 /* Check our minimum page claim */ 957 if (pp->pr_npages <= pp->pr_minpages) 958 break; 959 960 /* 961 * If freeing this page would put us below 962 * the low water mark, stop now. 963 */ 964 if ((pp->pr_nitems - pp->pr_itemsperpage) < 965 pp->pr_minitems) 966 break; 967 968 pool_p_remove(pp, ph); 969 TAILQ_INSERT_TAIL(&pl, ph, ph_pagelist); 970 } 971 mtx_leave(&pp->pr_mtx); 972 973 if (TAILQ_EMPTY(&pl)) 974 return (0); 975 976 while ((ph = TAILQ_FIRST(&pl)) != NULL) { 977 TAILQ_REMOVE(&pl, ph, ph_pagelist); 978 pool_p_free(pp, ph); 979 } 980 981 return (1); 982 } 983 984 /* 985 * Release all complete pages that have not been used recently 986 * from all pools. 987 */ 988 void 989 pool_reclaim_all(void) 990 { 991 struct pool *pp; 992 993 rw_enter_read(&pool_lock); 994 SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist) 995 pool_reclaim(pp); 996 rw_exit_read(&pool_lock); 997 } 998 999 #ifdef DDB 1000 #include <machine/db_machdep.h> 1001 #include <ddb/db_output.h> 1002 1003 /* 1004 * Diagnostic helpers. 1005 */ 1006 void 1007 pool_printit(struct pool *pp, const char *modif, 1008 int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2)))) 1009 { 1010 pool_print1(pp, modif, pr); 1011 } 1012 1013 void 1014 pool_print_pagelist(struct pool_pagelist *pl, 1015 int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2)))) 1016 { 1017 struct pool_item_header *ph; 1018 struct pool_item *pi; 1019 1020 TAILQ_FOREACH(ph, pl, ph_pagelist) { 1021 (*pr)("\t\tpage %p, color %p, nmissing %d\n", 1022 ph->ph_page, ph->ph_colored, ph->ph_nmissing); 1023 XSIMPLEQ_FOREACH(pi, &ph->ph_itemlist, pi_list) { 1024 if (pi->pi_magic != POOL_IMAGIC(ph, pi)) { 1025 (*pr)("\t\t\titem %p, magic 0x%lx\n", 1026 pi, pi->pi_magic); 1027 } 1028 } 1029 } 1030 } 1031 1032 void 1033 pool_print1(struct pool *pp, const char *modif, 1034 int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2)))) 1035 { 1036 struct pool_item_header *ph; 1037 int print_pagelist = 0; 1038 char c; 1039 1040 while ((c = *modif++) != '\0') { 1041 if (c == 'p') 1042 print_pagelist = 1; 1043 modif++; 1044 } 1045 1046 (*pr)("POOL %s: size %u maxcolors %u\n", pp->pr_wchan, pp->pr_size, 1047 pp->pr_maxcolors); 1048 (*pr)("\talloc %p\n", pp->pr_alloc); 1049 (*pr)("\tminitems %u, minpages %u, maxpages %u, npages %u\n", 1050 pp->pr_minitems, pp->pr_minpages, pp->pr_maxpages, pp->pr_npages); 1051 (*pr)("\titemsperpage %u, nitems %u, nout %u, hardlimit %u\n", 1052 pp->pr_itemsperpage, pp->pr_nitems, pp->pr_nout, pp->pr_hardlimit); 1053 1054 (*pr)("\n\tnget %lu, nfail %lu, nput %lu\n", 1055 pp->pr_nget, pp->pr_nfail, pp->pr_nput); 1056 (*pr)("\tnpagealloc %lu, npagefree %lu, hiwat %u, nidle %lu\n", 1057 pp->pr_npagealloc, pp->pr_npagefree, pp->pr_hiwat, pp->pr_nidle); 1058 1059 if (print_pagelist == 0) 1060 return; 1061 1062 if ((ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL) 1063 (*pr)("\n\tempty page list:\n"); 1064 pool_print_pagelist(&pp->pr_emptypages, pr); 1065 if ((ph = TAILQ_FIRST(&pp->pr_fullpages)) != NULL) 1066 (*pr)("\n\tfull page list:\n"); 1067 pool_print_pagelist(&pp->pr_fullpages, pr); 1068 if ((ph = TAILQ_FIRST(&pp->pr_partpages)) != NULL) 1069 (*pr)("\n\tpartial-page list:\n"); 1070 pool_print_pagelist(&pp->pr_partpages, pr); 1071 1072 if (pp->pr_curpage == NULL) 1073 (*pr)("\tno current page\n"); 1074 else 1075 (*pr)("\tcurpage %p\n", pp->pr_curpage->ph_page); 1076 } 1077 1078 void 1079 db_show_all_pools(db_expr_t expr, int haddr, db_expr_t count, char *modif) 1080 { 1081 struct pool *pp; 1082 char maxp[16]; 1083 int ovflw; 1084 char mode; 1085 1086 mode = modif[0]; 1087 if (mode != '\0' && mode != 'a') { 1088 db_printf("usage: show all pools [/a]\n"); 1089 return; 1090 } 1091 1092 if (mode == '\0') 1093 db_printf("%-10s%4s%9s%5s%9s%6s%6s%6s%6s%6s%6s%5s\n", 1094 "Name", 1095 "Size", 1096 "Requests", 1097 "Fail", 1098 "Releases", 1099 "Pgreq", 1100 "Pgrel", 1101 "Npage", 1102 "Hiwat", 1103 "Minpg", 1104 "Maxpg", 1105 "Idle"); 1106 else 1107 db_printf("%-12s %18s %18s\n", 1108 "Name", "Address", "Allocator"); 1109 1110 SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist) { 1111 if (mode == 'a') { 1112 db_printf("%-12s %18p %18p\n", pp->pr_wchan, pp, 1113 pp->pr_alloc); 1114 continue; 1115 } 1116 1117 if (!pp->pr_nget) 1118 continue; 1119 1120 if (pp->pr_maxpages == UINT_MAX) 1121 snprintf(maxp, sizeof maxp, "inf"); 1122 else 1123 snprintf(maxp, sizeof maxp, "%u", pp->pr_maxpages); 1124 1125 #define PRWORD(ovflw, fmt, width, fixed, val) do { \ 1126 (ovflw) += db_printf((fmt), \ 1127 (width) - (fixed) - (ovflw) > 0 ? \ 1128 (width) - (fixed) - (ovflw) : 0, \ 1129 (val)) - (width); \ 1130 if ((ovflw) < 0) \ 1131 (ovflw) = 0; \ 1132 } while (/* CONSTCOND */0) 1133 1134 ovflw = 0; 1135 PRWORD(ovflw, "%-*s", 10, 0, pp->pr_wchan); 1136 PRWORD(ovflw, " %*u", 4, 1, pp->pr_size); 1137 PRWORD(ovflw, " %*lu", 9, 1, pp->pr_nget); 1138 PRWORD(ovflw, " %*lu", 5, 1, pp->pr_nfail); 1139 PRWORD(ovflw, " %*lu", 9, 1, pp->pr_nput); 1140 PRWORD(ovflw, " %*lu", 6, 1, pp->pr_npagealloc); 1141 PRWORD(ovflw, " %*lu", 6, 1, pp->pr_npagefree); 1142 PRWORD(ovflw, " %*d", 6, 1, pp->pr_npages); 1143 PRWORD(ovflw, " %*d", 6, 1, pp->pr_hiwat); 1144 PRWORD(ovflw, " %*d", 6, 1, pp->pr_minpages); 1145 PRWORD(ovflw, " %*s", 6, 1, maxp); 1146 PRWORD(ovflw, " %*lu\n", 5, 1, pp->pr_nidle); 1147 1148 pool_chk(pp); 1149 } 1150 } 1151 #endif /* DDB */ 1152 1153 #if defined(POOL_DEBUG) || defined(DDB) 1154 int 1155 pool_chk_page(struct pool *pp, struct pool_item_header *ph, int expected) 1156 { 1157 struct pool_item *pi; 1158 caddr_t page; 1159 int n; 1160 const char *label = pp->pr_wchan; 1161 1162 page = (caddr_t)((u_long)ph & pp->pr_pgmask); 1163 if (page != ph->ph_page && POOL_INPGHDR(pp)) { 1164 printf("%s: ", label); 1165 printf("pool(%p:%s): page inconsistency: page %p; " 1166 "at page head addr %p (p %p)\n", 1167 pp, pp->pr_wchan, ph->ph_page, ph, page); 1168 return 1; 1169 } 1170 1171 for (pi = XSIMPLEQ_FIRST(&ph->ph_itemlist), n = 0; 1172 pi != NULL; 1173 pi = XSIMPLEQ_NEXT(&ph->ph_itemlist, pi, pi_list), n++) { 1174 if ((caddr_t)pi < ph->ph_page || 1175 (caddr_t)pi >= ph->ph_page + pp->pr_pgsize) { 1176 printf("%s: ", label); 1177 printf("pool(%p:%s): page inconsistency: page %p;" 1178 " item ordinal %d; addr %p\n", pp, 1179 pp->pr_wchan, ph->ph_page, n, pi); 1180 return (1); 1181 } 1182 1183 if (pi->pi_magic != POOL_IMAGIC(ph, pi)) { 1184 printf("%s: ", label); 1185 printf("pool(%p:%s): free list modified: " 1186 "page %p; item ordinal %d; addr %p " 1187 "(p %p); offset 0x%x=0x%lx\n", 1188 pp, pp->pr_wchan, ph->ph_page, n, pi, page, 1189 0, pi->pi_magic); 1190 } 1191 1192 #ifdef DIAGNOSTIC 1193 if (POOL_PHPOISON(ph)) { 1194 size_t pidx; 1195 uint32_t pval; 1196 if (poison_check(pi + 1, pp->pr_size - sizeof(*pi), 1197 &pidx, &pval)) { 1198 int *ip = (int *)(pi + 1); 1199 printf("pool(%s): free list modified: " 1200 "page %p; item ordinal %d; addr %p " 1201 "(p %p); offset 0x%zx=0x%x\n", 1202 pp->pr_wchan, ph->ph_page, n, pi, 1203 page, pidx * sizeof(int), ip[pidx]); 1204 } 1205 } 1206 #endif /* DIAGNOSTIC */ 1207 } 1208 if (n + ph->ph_nmissing != pp->pr_itemsperpage) { 1209 printf("pool(%p:%s): page inconsistency: page %p;" 1210 " %d on list, %d missing, %d items per page\n", pp, 1211 pp->pr_wchan, ph->ph_page, n, ph->ph_nmissing, 1212 pp->pr_itemsperpage); 1213 return 1; 1214 } 1215 if (expected >= 0 && n != expected) { 1216 printf("pool(%p:%s): page inconsistency: page %p;" 1217 " %d on list, %d missing, %d expected\n", pp, 1218 pp->pr_wchan, ph->ph_page, n, ph->ph_nmissing, 1219 expected); 1220 return 1; 1221 } 1222 return 0; 1223 } 1224 1225 int 1226 pool_chk(struct pool *pp) 1227 { 1228 struct pool_item_header *ph; 1229 int r = 0; 1230 1231 TAILQ_FOREACH(ph, &pp->pr_emptypages, ph_pagelist) 1232 r += pool_chk_page(pp, ph, pp->pr_itemsperpage); 1233 TAILQ_FOREACH(ph, &pp->pr_fullpages, ph_pagelist) 1234 r += pool_chk_page(pp, ph, 0); 1235 TAILQ_FOREACH(ph, &pp->pr_partpages, ph_pagelist) 1236 r += pool_chk_page(pp, ph, -1); 1237 1238 return (r); 1239 } 1240 #endif /* defined(POOL_DEBUG) || defined(DDB) */ 1241 1242 #ifdef DDB 1243 void 1244 pool_walk(struct pool *pp, int full, 1245 int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2))), 1246 void (*func)(void *, int, int (*)(const char *, ...) 1247 __attribute__((__format__(__kprintf__,1,2))))) 1248 { 1249 struct pool_item_header *ph; 1250 struct pool_item *pi; 1251 caddr_t cp; 1252 int n; 1253 1254 TAILQ_FOREACH(ph, &pp->pr_fullpages, ph_pagelist) { 1255 cp = ph->ph_colored; 1256 n = ph->ph_nmissing; 1257 1258 while (n--) { 1259 func(cp, full, pr); 1260 cp += pp->pr_size; 1261 } 1262 } 1263 1264 TAILQ_FOREACH(ph, &pp->pr_partpages, ph_pagelist) { 1265 cp = ph->ph_colored; 1266 n = ph->ph_nmissing; 1267 1268 do { 1269 XSIMPLEQ_FOREACH(pi, &ph->ph_itemlist, pi_list) { 1270 if (cp == (caddr_t)pi) 1271 break; 1272 } 1273 if (cp != (caddr_t)pi) { 1274 func(cp, full, pr); 1275 n--; 1276 } 1277 1278 cp += pp->pr_size; 1279 } while (n > 0); 1280 } 1281 } 1282 #endif 1283 1284 /* 1285 * We have three different sysctls. 1286 * kern.pool.npools - the number of pools. 1287 * kern.pool.pool.<pool#> - the pool struct for the pool#. 1288 * kern.pool.name.<pool#> - the name for pool#. 1289 */ 1290 int 1291 sysctl_dopool(int *name, u_int namelen, char *oldp, size_t *oldlenp) 1292 { 1293 struct kinfo_pool pi; 1294 struct pool *pp; 1295 int rv = ENOENT; 1296 1297 switch (name[0]) { 1298 case KERN_POOL_NPOOLS: 1299 if (namelen != 1) 1300 return (ENOTDIR); 1301 return (sysctl_rdint(oldp, oldlenp, NULL, pool_count)); 1302 1303 case KERN_POOL_NAME: 1304 case KERN_POOL_POOL: 1305 break; 1306 default: 1307 return (EOPNOTSUPP); 1308 } 1309 1310 if (namelen != 2) 1311 return (ENOTDIR); 1312 1313 rw_enter_read(&pool_lock); 1314 1315 SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist) { 1316 if (name[1] == pp->pr_serial) 1317 break; 1318 } 1319 1320 if (pp == NULL) 1321 goto done; 1322 1323 switch (name[0]) { 1324 case KERN_POOL_NAME: 1325 rv = sysctl_rdstring(oldp, oldlenp, NULL, pp->pr_wchan); 1326 break; 1327 case KERN_POOL_POOL: 1328 memset(&pi, 0, sizeof(pi)); 1329 1330 if (pp->pr_ipl != -1) 1331 mtx_enter(&pp->pr_mtx); 1332 pi.pr_size = pp->pr_size; 1333 pi.pr_pgsize = pp->pr_pgsize; 1334 pi.pr_itemsperpage = pp->pr_itemsperpage; 1335 pi.pr_npages = pp->pr_npages; 1336 pi.pr_minpages = pp->pr_minpages; 1337 pi.pr_maxpages = pp->pr_maxpages; 1338 pi.pr_hardlimit = pp->pr_hardlimit; 1339 pi.pr_nout = pp->pr_nout; 1340 pi.pr_nitems = pp->pr_nitems; 1341 pi.pr_nget = pp->pr_nget; 1342 pi.pr_nput = pp->pr_nput; 1343 pi.pr_nfail = pp->pr_nfail; 1344 pi.pr_npagealloc = pp->pr_npagealloc; 1345 pi.pr_npagefree = pp->pr_npagefree; 1346 pi.pr_hiwat = pp->pr_hiwat; 1347 pi.pr_nidle = pp->pr_nidle; 1348 if (pp->pr_ipl != -1) 1349 mtx_leave(&pp->pr_mtx); 1350 1351 rv = sysctl_rdstruct(oldp, oldlenp, NULL, &pi, sizeof(pi)); 1352 break; 1353 } 1354 1355 done: 1356 rw_exit_read(&pool_lock); 1357 1358 return (rv); 1359 } 1360 1361 void 1362 pool_gc_sched(void *null) 1363 { 1364 task_add(systqmp, &pool_gc_task); 1365 } 1366 1367 void 1368 pool_gc_pages(void *null) 1369 { 1370 struct pool *pp; 1371 struct pool_item_header *ph, *freeph; 1372 int s; 1373 1374 rw_enter_read(&pool_lock); 1375 s = splvm(); /* XXX go to splvm until all pools _setipl properly */ 1376 SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist) { 1377 if (pp->pr_nidle <= pp->pr_minpages || /* guess */ 1378 !mtx_enter_try(&pp->pr_mtx)) /* try */ 1379 continue; 1380 1381 /* is it time to free a page? */ 1382 if (pp->pr_nidle > pp->pr_minpages && 1383 (ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL && 1384 (ticks - ph->ph_tick) > (hz * pool_wait_gc)) { 1385 freeph = ph; 1386 pool_p_remove(pp, freeph); 1387 } else 1388 freeph = NULL; 1389 1390 mtx_leave(&pp->pr_mtx); 1391 1392 if (freeph != NULL) 1393 pool_p_free(pp, freeph); 1394 } 1395 splx(s); 1396 rw_exit_read(&pool_lock); 1397 1398 timeout_add_sec(&pool_gc_tick, 1); 1399 } 1400 1401 /* 1402 * Pool backend allocators. 1403 */ 1404 1405 void * 1406 pool_allocator_alloc(struct pool *pp, int flags, int *slowdown) 1407 { 1408 void *v; 1409 1410 v = (*pp->pr_alloc->pa_alloc)(pp, flags, slowdown); 1411 1412 #ifdef DIAGNOSTIC 1413 if (v != NULL && POOL_INPGHDR(pp)) { 1414 vaddr_t addr = (vaddr_t)v; 1415 if ((addr & pp->pr_pgmask) != addr) { 1416 panic("%s: %s page address %p isnt aligned to %u", 1417 __func__, pp->pr_wchan, v, pp->pr_pgsize); 1418 } 1419 } 1420 #endif 1421 1422 return (v); 1423 } 1424 1425 void 1426 pool_allocator_free(struct pool *pp, void *v) 1427 { 1428 struct pool_allocator *pa = pp->pr_alloc; 1429 1430 (*pa->pa_free)(pp, v); 1431 } 1432 1433 void * 1434 pool_page_alloc(struct pool *pp, int flags, int *slowdown) 1435 { 1436 struct kmem_dyn_mode kd = KMEM_DYN_INITIALIZER; 1437 1438 kd.kd_waitok = ISSET(flags, PR_WAITOK); 1439 kd.kd_slowdown = slowdown; 1440 1441 return (km_alloc(pp->pr_pgsize, &kv_page, pp->pr_crange, &kd)); 1442 } 1443 1444 void 1445 pool_page_free(struct pool *pp, void *v) 1446 { 1447 km_free(v, pp->pr_pgsize, &kv_page, pp->pr_crange); 1448 } 1449 1450 void * 1451 pool_multi_alloc(struct pool *pp, int flags, int *slowdown) 1452 { 1453 struct kmem_va_mode kv = kv_intrsafe; 1454 struct kmem_dyn_mode kd = KMEM_DYN_INITIALIZER; 1455 void *v; 1456 int s; 1457 1458 if (POOL_INPGHDR(pp)) 1459 kv.kv_align = pp->pr_pgsize; 1460 1461 kd.kd_waitok = ISSET(flags, PR_WAITOK); 1462 kd.kd_slowdown = slowdown; 1463 1464 s = splvm(); 1465 v = km_alloc(pp->pr_pgsize, &kv, pp->pr_crange, &kd); 1466 splx(s); 1467 1468 return (v); 1469 } 1470 1471 void 1472 pool_multi_free(struct pool *pp, void *v) 1473 { 1474 struct kmem_va_mode kv = kv_intrsafe; 1475 int s; 1476 1477 if (POOL_INPGHDR(pp)) 1478 kv.kv_align = pp->pr_pgsize; 1479 1480 s = splvm(); 1481 km_free(v, pp->pr_pgsize, &kv, pp->pr_crange); 1482 splx(s); 1483 } 1484 1485 void * 1486 pool_multi_alloc_ni(struct pool *pp, int flags, int *slowdown) 1487 { 1488 struct kmem_va_mode kv = kv_any; 1489 struct kmem_dyn_mode kd = KMEM_DYN_INITIALIZER; 1490 void *v; 1491 1492 if (POOL_INPGHDR(pp)) 1493 kv.kv_align = pp->pr_pgsize; 1494 1495 kd.kd_waitok = ISSET(flags, PR_WAITOK); 1496 kd.kd_slowdown = slowdown; 1497 1498 KERNEL_LOCK(); 1499 v = km_alloc(pp->pr_pgsize, &kv, pp->pr_crange, &kd); 1500 KERNEL_UNLOCK(); 1501 1502 return (v); 1503 } 1504 1505 void 1506 pool_multi_free_ni(struct pool *pp, void *v) 1507 { 1508 struct kmem_va_mode kv = kv_any; 1509 1510 if (POOL_INPGHDR(pp)) 1511 kv.kv_align = pp->pr_pgsize; 1512 1513 KERNEL_LOCK(); 1514 km_free(v, pp->pr_pgsize, &kv, pp->pr_crange); 1515 KERNEL_UNLOCK(); 1516 } 1517