1 /* $OpenBSD: subr_pool.c,v 1.185 2015/04/21 13:15:54 dlg Exp $ */ 2 /* $NetBSD: subr_pool.c,v 1.61 2001/09/26 07:14:56 chs Exp $ */ 3 4 /*- 5 * Copyright (c) 1997, 1999, 2000 The NetBSD Foundation, Inc. 6 * All rights reserved. 7 * 8 * This code is derived from software contributed to The NetBSD Foundation 9 * by Paul Kranenburg; by Jason R. Thorpe of the Numerical Aerospace 10 * Simulation Facility, NASA Ames Research Center. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 23 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 24 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 25 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31 * POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include <sys/param.h> 35 #include <sys/systm.h> 36 #include <sys/errno.h> 37 #include <sys/kernel.h> 38 #include <sys/malloc.h> 39 #include <sys/pool.h> 40 #include <sys/syslog.h> 41 #include <sys/rwlock.h> 42 #include <sys/sysctl.h> 43 #include <sys/task.h> 44 #include <sys/timeout.h> 45 46 #include <uvm/uvm_extern.h> 47 48 /* 49 * Pool resource management utility. 50 * 51 * Memory is allocated in pages which are split into pieces according to 52 * the pool item size. Each page is kept on one of three lists in the 53 * pool structure: `pr_emptypages', `pr_fullpages' and `pr_partpages', 54 * for empty, full and partially-full pages respectively. The individual 55 * pool items are on a linked list headed by `ph_itemlist' in each page 56 * header. The memory for building the page list is either taken from 57 * the allocated pages themselves (for small pool items) or taken from 58 * an internal pool of page headers (`phpool'). 59 */ 60 61 /* List of all pools */ 62 SIMPLEQ_HEAD(,pool) pool_head = SIMPLEQ_HEAD_INITIALIZER(pool_head); 63 64 /* 65 * Every pool gets a unique serial number assigned to it. If this counter 66 * wraps, we're screwed, but we shouldn't create so many pools anyway. 67 */ 68 unsigned int pool_serial; 69 unsigned int pool_count; 70 71 /* Lock the previous variables making up the global pool state */ 72 struct rwlock pool_lock = RWLOCK_INITIALIZER("pools"); 73 74 /* Private pool for page header structures */ 75 struct pool phpool; 76 77 struct pool_item_header { 78 /* Page headers */ 79 TAILQ_ENTRY(pool_item_header) 80 ph_pagelist; /* pool page list */ 81 XSIMPLEQ_HEAD(,pool_item) ph_itemlist; /* chunk list for this page */ 82 RB_ENTRY(pool_item_header) 83 ph_node; /* Off-page page headers */ 84 int ph_nmissing; /* # of chunks in use */ 85 caddr_t ph_page; /* this page's address */ 86 caddr_t ph_colored; /* page's colored address */ 87 u_long ph_magic; 88 int ph_tick; 89 }; 90 #define POOL_MAGICBIT (1 << 3) /* keep away from perturbed low bits */ 91 #define POOL_PHPOISON(ph) ISSET((ph)->ph_magic, POOL_MAGICBIT) 92 93 struct pool_item { 94 u_long pi_magic; 95 XSIMPLEQ_ENTRY(pool_item) pi_list; 96 }; 97 #define POOL_IMAGIC(ph, pi) ((u_long)(pi) ^ (ph)->ph_magic) 98 99 #ifdef POOL_DEBUG 100 int pool_debug = 1; 101 #else 102 int pool_debug = 0; 103 #endif 104 105 #define POOL_NEEDS_CATCHUP(pp) \ 106 ((pp)->pr_nitems < (pp)->pr_minitems) 107 108 #define POOL_INPGHDR(pp) ((pp)->pr_phoffset != 0) 109 110 struct pool_item_header * 111 pool_p_alloc(struct pool *, int, int *); 112 void pool_p_insert(struct pool *, struct pool_item_header *); 113 void pool_p_remove(struct pool *, struct pool_item_header *); 114 void pool_p_free(struct pool *, struct pool_item_header *); 115 116 void pool_update_curpage(struct pool *); 117 void *pool_do_get(struct pool *, int, int *); 118 int pool_chk_page(struct pool *, struct pool_item_header *, int); 119 int pool_chk(struct pool *); 120 void pool_get_done(void *, void *); 121 void pool_runqueue(struct pool *, int); 122 123 void *pool_allocator_alloc(struct pool *, int, int *); 124 void pool_allocator_free(struct pool *, void *); 125 126 /* 127 * The default pool allocator. 128 */ 129 void *pool_page_alloc(struct pool *, int, int *); 130 void pool_page_free(struct pool *, void *); 131 132 /* 133 * safe for interrupts, name preserved for compat this is the default 134 * allocator 135 */ 136 struct pool_allocator pool_allocator_nointr = { 137 pool_page_alloc, 138 pool_page_free 139 }; 140 141 void *pool_large_alloc(struct pool *, int, int *); 142 void pool_large_free(struct pool *, void *); 143 144 struct pool_allocator pool_allocator_large = { 145 pool_large_alloc, 146 pool_large_free 147 }; 148 149 void *pool_large_alloc_ni(struct pool *, int, int *); 150 void pool_large_free_ni(struct pool *, void *); 151 152 struct pool_allocator pool_allocator_large_ni = { 153 pool_large_alloc_ni, 154 pool_large_free_ni 155 }; 156 157 #ifdef DDB 158 void pool_print_pagelist(struct pool_pagelist *, int (*)(const char *, ...) 159 __attribute__((__format__(__kprintf__,1,2)))); 160 void pool_print1(struct pool *, const char *, int (*)(const char *, ...) 161 __attribute__((__format__(__kprintf__,1,2)))); 162 #endif 163 164 /* stale page garbage collectors */ 165 void pool_gc_sched(void *); 166 struct timeout pool_gc_tick = TIMEOUT_INITIALIZER(pool_gc_sched, NULL); 167 void pool_gc_pages(void *); 168 struct task pool_gc_task = TASK_INITIALIZER(pool_gc_pages, NULL); 169 int pool_wait_free = 1; 170 int pool_wait_gc = 8; 171 172 static inline int 173 phtree_compare(struct pool_item_header *a, struct pool_item_header *b) 174 { 175 vaddr_t va = (vaddr_t)a->ph_page; 176 vaddr_t vb = (vaddr_t)b->ph_page; 177 178 /* the compares in this order are important for the NFIND to work */ 179 if (vb < va) 180 return (-1); 181 if (vb > va) 182 return (1); 183 184 return (0); 185 } 186 187 RB_PROTOTYPE(phtree, pool_item_header, ph_node, phtree_compare); 188 RB_GENERATE(phtree, pool_item_header, ph_node, phtree_compare); 189 190 /* 191 * Return the pool page header based on page address. 192 */ 193 static inline struct pool_item_header * 194 pr_find_pagehead(struct pool *pp, void *v) 195 { 196 struct pool_item_header *ph, key; 197 198 if (POOL_INPGHDR(pp)) { 199 caddr_t page; 200 201 page = (caddr_t)((vaddr_t)v & pp->pr_pgmask); 202 203 return ((struct pool_item_header *)(page + pp->pr_phoffset)); 204 } 205 206 key.ph_page = v; 207 ph = RB_NFIND(phtree, &pp->pr_phtree, &key); 208 if (ph == NULL) 209 panic("%s: %s: page header missing", __func__, pp->pr_wchan); 210 211 KASSERT(ph->ph_page <= (caddr_t)v); 212 if (ph->ph_page + pp->pr_pgsize <= (caddr_t)v) 213 panic("%s: %s: incorrect page", __func__, pp->pr_wchan); 214 215 return (ph); 216 } 217 218 /* 219 * Initialize the given pool resource structure. 220 * 221 * We export this routine to allow other kernel parts to declare 222 * static pools that must be initialized before malloc() is available. 223 */ 224 void 225 pool_init(struct pool *pp, size_t size, u_int align, u_int ioff, int flags, 226 const char *wchan, struct pool_allocator *palloc) 227 { 228 int off = 0, space; 229 unsigned int pgsize = PAGE_SIZE, items; 230 #ifdef DIAGNOSTIC 231 struct pool *iter; 232 KASSERT(ioff == 0); 233 #endif 234 235 if (align == 0) 236 align = ALIGN(1); 237 238 if (size < sizeof(struct pool_item)) 239 size = sizeof(struct pool_item); 240 241 size = roundup(size, align); 242 243 if (palloc == NULL) { 244 while (size > pgsize) 245 pgsize <<= 1; 246 247 if (pgsize > PAGE_SIZE) { 248 palloc = ISSET(flags, PR_WAITOK) ? 249 &pool_allocator_large_ni : &pool_allocator_large; 250 } else 251 palloc = &pool_allocator_nointr; 252 } else 253 pgsize = palloc->pa_pagesz ? palloc->pa_pagesz : PAGE_SIZE; 254 255 items = pgsize / size; 256 257 /* 258 * Decide whether to put the page header off page to avoid 259 * wasting too large a part of the page. Off-page page headers 260 * go into an RB tree, so we can match a returned item with 261 * its header based on the page address. 262 */ 263 if (pgsize - (size * items) > sizeof(struct pool_item_header)) { 264 off = pgsize - sizeof(struct pool_item_header); 265 } else if (sizeof(struct pool_item_header) * 2 >= size) { 266 off = pgsize - sizeof(struct pool_item_header); 267 items = off / size; 268 } 269 270 KASSERT(items > 0); 271 272 /* 273 * Initialize the pool structure. 274 */ 275 memset(pp, 0, sizeof(*pp)); 276 TAILQ_INIT(&pp->pr_emptypages); 277 TAILQ_INIT(&pp->pr_fullpages); 278 TAILQ_INIT(&pp->pr_partpages); 279 pp->pr_curpage = NULL; 280 pp->pr_npages = 0; 281 pp->pr_minitems = 0; 282 pp->pr_minpages = 0; 283 pp->pr_maxpages = 8; 284 pp->pr_size = size; 285 pp->pr_pgsize = pgsize; 286 pp->pr_pgmask = ~0UL ^ (pgsize - 1); 287 pp->pr_phoffset = off; 288 pp->pr_itemsperpage = items; 289 pp->pr_wchan = wchan; 290 pp->pr_alloc = palloc; 291 pp->pr_nitems = 0; 292 pp->pr_nout = 0; 293 pp->pr_hardlimit = UINT_MAX; 294 pp->pr_hardlimit_warning = NULL; 295 pp->pr_hardlimit_ratecap.tv_sec = 0; 296 pp->pr_hardlimit_ratecap.tv_usec = 0; 297 pp->pr_hardlimit_warning_last.tv_sec = 0; 298 pp->pr_hardlimit_warning_last.tv_usec = 0; 299 RB_INIT(&pp->pr_phtree); 300 301 /* 302 * Use the space between the chunks and the page header 303 * for cache coloring. 304 */ 305 space = POOL_INPGHDR(pp) ? pp->pr_phoffset : pp->pr_pgsize; 306 space -= pp->pr_itemsperpage * pp->pr_size; 307 pp->pr_align = align; 308 pp->pr_maxcolors = (space / align) + 1; 309 310 pp->pr_nget = 0; 311 pp->pr_nfail = 0; 312 pp->pr_nput = 0; 313 pp->pr_npagealloc = 0; 314 pp->pr_npagefree = 0; 315 pp->pr_hiwat = 0; 316 pp->pr_nidle = 0; 317 318 pp->pr_ipl = -1; 319 mtx_init(&pp->pr_mtx, IPL_NONE); 320 mtx_init(&pp->pr_requests_mtx, IPL_NONE); 321 TAILQ_INIT(&pp->pr_requests); 322 323 if (phpool.pr_size == 0) { 324 pool_init(&phpool, sizeof(struct pool_item_header), 0, 0, 325 0, "phpool", NULL); 326 pool_setipl(&phpool, IPL_HIGH); 327 328 /* make sure phpool wont "recurse" */ 329 KASSERT(POOL_INPGHDR(&phpool)); 330 } 331 332 /* pglistalloc/constraint parameters */ 333 pp->pr_crange = &kp_dirty; 334 335 /* Insert this into the list of all pools. */ 336 rw_enter_write(&pool_lock); 337 #ifdef DIAGNOSTIC 338 SIMPLEQ_FOREACH(iter, &pool_head, pr_poollist) { 339 if (iter == pp) 340 panic("%s: pool %s already on list", __func__, wchan); 341 } 342 #endif 343 344 pp->pr_serial = ++pool_serial; 345 if (pool_serial == 0) 346 panic("%s: too much uptime", __func__); 347 348 SIMPLEQ_INSERT_HEAD(&pool_head, pp, pr_poollist); 349 pool_count++; 350 rw_exit_write(&pool_lock); 351 } 352 353 void 354 pool_setipl(struct pool *pp, int ipl) 355 { 356 pp->pr_ipl = ipl; 357 mtx_init(&pp->pr_mtx, ipl); 358 mtx_init(&pp->pr_requests_mtx, ipl); 359 } 360 361 /* 362 * Decommission a pool resource. 363 */ 364 void 365 pool_destroy(struct pool *pp) 366 { 367 struct pool_item_header *ph; 368 struct pool *prev, *iter; 369 370 #ifdef DIAGNOSTIC 371 if (pp->pr_nout != 0) 372 panic("%s: pool busy: still out: %u", __func__, pp->pr_nout); 373 #endif 374 375 /* Remove from global pool list */ 376 rw_enter_write(&pool_lock); 377 pool_count--; 378 if (pp == SIMPLEQ_FIRST(&pool_head)) 379 SIMPLEQ_REMOVE_HEAD(&pool_head, pr_poollist); 380 else { 381 prev = SIMPLEQ_FIRST(&pool_head); 382 SIMPLEQ_FOREACH(iter, &pool_head, pr_poollist) { 383 if (iter == pp) { 384 SIMPLEQ_REMOVE_AFTER(&pool_head, prev, 385 pr_poollist); 386 break; 387 } 388 prev = iter; 389 } 390 } 391 rw_exit_write(&pool_lock); 392 393 /* Remove all pages */ 394 while ((ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL) { 395 mtx_enter(&pp->pr_mtx); 396 pool_p_remove(pp, ph); 397 mtx_leave(&pp->pr_mtx); 398 pool_p_free(pp, ph); 399 } 400 KASSERT(TAILQ_EMPTY(&pp->pr_fullpages)); 401 KASSERT(TAILQ_EMPTY(&pp->pr_partpages)); 402 } 403 404 void 405 pool_request_init(struct pool_request *pr, 406 void (*handler)(void *, void *), void *cookie) 407 { 408 pr->pr_handler = handler; 409 pr->pr_cookie = cookie; 410 pr->pr_item = NULL; 411 } 412 413 void 414 pool_request(struct pool *pp, struct pool_request *pr) 415 { 416 mtx_enter(&pp->pr_requests_mtx); 417 TAILQ_INSERT_TAIL(&pp->pr_requests, pr, pr_entry); 418 pool_runqueue(pp, PR_NOWAIT); 419 mtx_leave(&pp->pr_requests_mtx); 420 } 421 422 struct pool_get_memory { 423 struct mutex mtx; 424 void * volatile v; 425 }; 426 427 /* 428 * Grab an item from the pool. 429 */ 430 void * 431 pool_get(struct pool *pp, int flags) 432 { 433 void *v = NULL; 434 int slowdown = 0; 435 436 KASSERT(flags & (PR_WAITOK | PR_NOWAIT)); 437 438 439 mtx_enter(&pp->pr_mtx); 440 if (pp->pr_nout >= pp->pr_hardlimit) { 441 if (ISSET(flags, PR_NOWAIT|PR_LIMITFAIL)) 442 goto fail; 443 } else if ((v = pool_do_get(pp, flags, &slowdown)) == NULL) { 444 if (ISSET(flags, PR_NOWAIT)) 445 goto fail; 446 } 447 mtx_leave(&pp->pr_mtx); 448 449 if (slowdown && ISSET(flags, PR_WAITOK)) 450 yield(); 451 452 if (v == NULL) { 453 struct pool_get_memory mem = { 454 MUTEX_INITIALIZER((pp->pr_ipl == -1) ? 455 IPL_NONE : pp->pr_ipl), NULL }; 456 struct pool_request pr; 457 458 pool_request_init(&pr, pool_get_done, &mem); 459 pool_request(pp, &pr); 460 461 mtx_enter(&mem.mtx); 462 while (mem.v == NULL) 463 msleep(&mem, &mem.mtx, PSWP, pp->pr_wchan, 0); 464 mtx_leave(&mem.mtx); 465 466 v = mem.v; 467 } 468 469 if (ISSET(flags, PR_ZERO)) 470 memset(v, 0, pp->pr_size); 471 472 return (v); 473 474 fail: 475 pp->pr_nfail++; 476 mtx_leave(&pp->pr_mtx); 477 return (NULL); 478 } 479 480 void 481 pool_get_done(void *xmem, void *v) 482 { 483 struct pool_get_memory *mem = xmem; 484 485 mtx_enter(&mem->mtx); 486 mem->v = v; 487 mtx_leave(&mem->mtx); 488 489 wakeup_one(mem); 490 } 491 492 void 493 pool_runqueue(struct pool *pp, int flags) 494 { 495 struct pool_requests prl = TAILQ_HEAD_INITIALIZER(prl); 496 struct pool_request *pr; 497 498 MUTEX_ASSERT_UNLOCKED(&pp->pr_mtx); 499 MUTEX_ASSERT_LOCKED(&pp->pr_requests_mtx); 500 501 if (pp->pr_requesting++) 502 return; 503 504 do { 505 pp->pr_requesting = 1; 506 507 /* no TAILQ_JOIN? :( */ 508 while ((pr = TAILQ_FIRST(&pp->pr_requests)) != NULL) { 509 TAILQ_REMOVE(&pp->pr_requests, pr, pr_entry); 510 TAILQ_INSERT_TAIL(&prl, pr, pr_entry); 511 } 512 if (TAILQ_EMPTY(&prl)) 513 continue; 514 515 mtx_leave(&pp->pr_requests_mtx); 516 517 mtx_enter(&pp->pr_mtx); 518 pr = TAILQ_FIRST(&prl); 519 while (pr != NULL) { 520 int slowdown = 0; 521 522 if (pp->pr_nout >= pp->pr_hardlimit) 523 break; 524 525 pr->pr_item = pool_do_get(pp, flags, &slowdown); 526 if (pr->pr_item == NULL) /* || slowdown ? */ 527 break; 528 529 pr = TAILQ_NEXT(pr, pr_entry); 530 } 531 mtx_leave(&pp->pr_mtx); 532 533 while ((pr = TAILQ_FIRST(&prl)) != NULL && 534 pr->pr_item != NULL) { 535 TAILQ_REMOVE(&prl, pr, pr_entry); 536 (*pr->pr_handler)(pr->pr_cookie, pr->pr_item); 537 } 538 539 mtx_enter(&pp->pr_requests_mtx); 540 } while (--pp->pr_requesting); 541 542 /* no TAILQ_JOIN :( */ 543 while ((pr = TAILQ_FIRST(&prl)) != NULL) { 544 TAILQ_REMOVE(&prl, pr, pr_entry); 545 TAILQ_INSERT_TAIL(&pp->pr_requests, pr, pr_entry); 546 } 547 } 548 549 void * 550 pool_do_get(struct pool *pp, int flags, int *slowdown) 551 { 552 struct pool_item *pi; 553 struct pool_item_header *ph; 554 555 MUTEX_ASSERT_LOCKED(&pp->pr_mtx); 556 557 if (pp->pr_ipl != -1) 558 splassert(pp->pr_ipl); 559 560 /* 561 * Account for this item now to avoid races if we need to give up 562 * pr_mtx to allocate a page. 563 */ 564 pp->pr_nout++; 565 566 if (pp->pr_curpage == NULL) { 567 mtx_leave(&pp->pr_mtx); 568 ph = pool_p_alloc(pp, flags, slowdown); 569 mtx_enter(&pp->pr_mtx); 570 571 if (ph == NULL) { 572 pp->pr_nout--; 573 return (NULL); 574 } 575 576 pool_p_insert(pp, ph); 577 } 578 579 ph = pp->pr_curpage; 580 pi = XSIMPLEQ_FIRST(&ph->ph_itemlist); 581 if (__predict_false(pi == NULL)) 582 panic("%s: %s: page empty", __func__, pp->pr_wchan); 583 584 if (__predict_false(pi->pi_magic != POOL_IMAGIC(ph, pi))) { 585 panic("%s: %s free list modified: " 586 "page %p; item addr %p; offset 0x%x=0x%lx != 0x%lx", 587 __func__, pp->pr_wchan, ph->ph_page, pi, 588 0, pi->pi_magic, POOL_IMAGIC(ph, pi)); 589 } 590 591 XSIMPLEQ_REMOVE_HEAD(&ph->ph_itemlist, pi_list); 592 593 #ifdef DIAGNOSTIC 594 if (pool_debug && POOL_PHPOISON(ph)) { 595 size_t pidx; 596 uint32_t pval; 597 if (poison_check(pi + 1, pp->pr_size - sizeof(*pi), 598 &pidx, &pval)) { 599 int *ip = (int *)(pi + 1); 600 panic("%s: %s free list modified: " 601 "page %p; item addr %p; offset 0x%zx=0x%x", 602 __func__, pp->pr_wchan, ph->ph_page, pi, 603 pidx * sizeof(int), ip[pidx]); 604 } 605 } 606 #endif /* DIAGNOSTIC */ 607 608 if (ph->ph_nmissing++ == 0) { 609 /* 610 * This page was previously empty. Move it to the list of 611 * partially-full pages. This page is already curpage. 612 */ 613 TAILQ_REMOVE(&pp->pr_emptypages, ph, ph_pagelist); 614 TAILQ_INSERT_TAIL(&pp->pr_partpages, ph, ph_pagelist); 615 616 pp->pr_nidle--; 617 } 618 619 if (ph->ph_nmissing == pp->pr_itemsperpage) { 620 /* 621 * This page is now full. Move it to the full list 622 * and select a new current page. 623 */ 624 TAILQ_REMOVE(&pp->pr_partpages, ph, ph_pagelist); 625 TAILQ_INSERT_TAIL(&pp->pr_fullpages, ph, ph_pagelist); 626 pool_update_curpage(pp); 627 } 628 629 pp->pr_nget++; 630 631 return (pi); 632 } 633 634 /* 635 * Return resource to the pool. 636 */ 637 void 638 pool_put(struct pool *pp, void *v) 639 { 640 struct pool_item *pi = v; 641 struct pool_item_header *ph, *freeph = NULL; 642 extern int ticks; 643 644 #ifdef DIAGNOSTIC 645 if (v == NULL) 646 panic("%s: NULL item", __func__); 647 #endif 648 649 mtx_enter(&pp->pr_mtx); 650 651 if (pp->pr_ipl != -1) 652 splassert(pp->pr_ipl); 653 654 ph = pr_find_pagehead(pp, v); 655 656 #ifdef DIAGNOSTIC 657 if (pool_debug) { 658 struct pool_item *qi; 659 XSIMPLEQ_FOREACH(qi, &ph->ph_itemlist, pi_list) { 660 if (pi == qi) { 661 panic("%s: %s: double pool_put: %p", __func__, 662 pp->pr_wchan, pi); 663 } 664 } 665 } 666 #endif /* DIAGNOSTIC */ 667 668 pi->pi_magic = POOL_IMAGIC(ph, pi); 669 XSIMPLEQ_INSERT_HEAD(&ph->ph_itemlist, pi, pi_list); 670 #ifdef DIAGNOSTIC 671 if (POOL_PHPOISON(ph)) 672 poison_mem(pi + 1, pp->pr_size - sizeof(*pi)); 673 #endif /* DIAGNOSTIC */ 674 675 if (ph->ph_nmissing-- == pp->pr_itemsperpage) { 676 /* 677 * The page was previously completely full, move it to the 678 * partially-full list. 679 */ 680 TAILQ_REMOVE(&pp->pr_fullpages, ph, ph_pagelist); 681 TAILQ_INSERT_TAIL(&pp->pr_partpages, ph, ph_pagelist); 682 } 683 684 if (ph->ph_nmissing == 0) { 685 /* 686 * The page is now empty, so move it to the empty page list. 687 */ 688 pp->pr_nidle++; 689 690 ph->ph_tick = ticks; 691 TAILQ_REMOVE(&pp->pr_partpages, ph, ph_pagelist); 692 TAILQ_INSERT_TAIL(&pp->pr_emptypages, ph, ph_pagelist); 693 pool_update_curpage(pp); 694 } 695 696 pp->pr_nout--; 697 pp->pr_nput++; 698 699 /* is it time to free a page? */ 700 if (pp->pr_nidle > pp->pr_maxpages && 701 (ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL && 702 (ticks - ph->ph_tick) > (hz * pool_wait_free)) { 703 freeph = ph; 704 pool_p_remove(pp, freeph); 705 } 706 mtx_leave(&pp->pr_mtx); 707 708 if (freeph != NULL) 709 pool_p_free(pp, freeph); 710 711 mtx_enter(&pp->pr_requests_mtx); 712 pool_runqueue(pp, PR_NOWAIT); 713 mtx_leave(&pp->pr_requests_mtx); 714 } 715 716 /* 717 * Add N items to the pool. 718 */ 719 int 720 pool_prime(struct pool *pp, int n) 721 { 722 struct pool_pagelist pl = TAILQ_HEAD_INITIALIZER(pl); 723 struct pool_item_header *ph; 724 int newpages; 725 726 newpages = roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 727 728 while (newpages-- > 0) { 729 int slowdown = 0; 730 731 ph = pool_p_alloc(pp, PR_NOWAIT, &slowdown); 732 if (ph == NULL) /* or slowdown? */ 733 break; 734 735 TAILQ_INSERT_TAIL(&pl, ph, ph_pagelist); 736 } 737 738 mtx_enter(&pp->pr_mtx); 739 while ((ph = TAILQ_FIRST(&pl)) != NULL) { 740 TAILQ_REMOVE(&pl, ph, ph_pagelist); 741 pool_p_insert(pp, ph); 742 } 743 mtx_leave(&pp->pr_mtx); 744 745 return (0); 746 } 747 748 struct pool_item_header * 749 pool_p_alloc(struct pool *pp, int flags, int *slowdown) 750 { 751 struct pool_item_header *ph; 752 struct pool_item *pi; 753 caddr_t addr; 754 int n; 755 756 MUTEX_ASSERT_UNLOCKED(&pp->pr_mtx); 757 KASSERT(pp->pr_size >= sizeof(*pi)); 758 759 addr = pool_allocator_alloc(pp, flags, slowdown); 760 if (addr == NULL) 761 return (NULL); 762 763 if (POOL_INPGHDR(pp)) 764 ph = (struct pool_item_header *)(addr + pp->pr_phoffset); 765 else { 766 ph = pool_get(&phpool, flags); 767 if (ph == NULL) { 768 pool_allocator_free(pp, addr); 769 return (NULL); 770 } 771 } 772 773 XSIMPLEQ_INIT(&ph->ph_itemlist); 774 ph->ph_page = addr; 775 addr += pp->pr_align * (pp->pr_npagealloc % pp->pr_maxcolors); 776 ph->ph_colored = addr; 777 ph->ph_nmissing = 0; 778 arc4random_buf(&ph->ph_magic, sizeof(ph->ph_magic)); 779 #ifdef DIAGNOSTIC 780 /* use a bit in ph_magic to record if we poison page items */ 781 if (pool_debug) 782 SET(ph->ph_magic, POOL_MAGICBIT); 783 else 784 CLR(ph->ph_magic, POOL_MAGICBIT); 785 #endif /* DIAGNOSTIC */ 786 787 n = pp->pr_itemsperpage; 788 while (n--) { 789 pi = (struct pool_item *)addr; 790 pi->pi_magic = POOL_IMAGIC(ph, pi); 791 XSIMPLEQ_INSERT_TAIL(&ph->ph_itemlist, pi, pi_list); 792 793 #ifdef DIAGNOSTIC 794 if (POOL_PHPOISON(ph)) 795 poison_mem(pi + 1, pp->pr_size - sizeof(*pi)); 796 #endif /* DIAGNOSTIC */ 797 798 addr += pp->pr_size; 799 } 800 801 return (ph); 802 } 803 804 void 805 pool_p_free(struct pool *pp, struct pool_item_header *ph) 806 { 807 struct pool_item *pi; 808 809 MUTEX_ASSERT_UNLOCKED(&pp->pr_mtx); 810 KASSERT(ph->ph_nmissing == 0); 811 812 XSIMPLEQ_FOREACH(pi, &ph->ph_itemlist, pi_list) { 813 if (__predict_false(pi->pi_magic != POOL_IMAGIC(ph, pi))) { 814 panic("%s: %s free list modified: " 815 "page %p; item addr %p; offset 0x%x=0x%lx", 816 __func__, pp->pr_wchan, ph->ph_page, pi, 817 0, pi->pi_magic); 818 } 819 820 #ifdef DIAGNOSTIC 821 if (POOL_PHPOISON(ph)) { 822 size_t pidx; 823 uint32_t pval; 824 if (poison_check(pi + 1, pp->pr_size - sizeof(*pi), 825 &pidx, &pval)) { 826 int *ip = (int *)(pi + 1); 827 panic("%s: %s free list modified: " 828 "page %p; item addr %p; offset 0x%zx=0x%x", 829 __func__, pp->pr_wchan, ph->ph_page, pi, 830 pidx * sizeof(int), ip[pidx]); 831 } 832 } 833 #endif 834 } 835 836 pool_allocator_free(pp, ph->ph_page); 837 838 if (!POOL_INPGHDR(pp)) 839 pool_put(&phpool, ph); 840 } 841 842 void 843 pool_p_insert(struct pool *pp, struct pool_item_header *ph) 844 { 845 MUTEX_ASSERT_LOCKED(&pp->pr_mtx); 846 847 /* If the pool was depleted, point at the new page */ 848 if (pp->pr_curpage == NULL) 849 pp->pr_curpage = ph; 850 851 TAILQ_INSERT_TAIL(&pp->pr_emptypages, ph, ph_pagelist); 852 if (!POOL_INPGHDR(pp)) 853 RB_INSERT(phtree, &pp->pr_phtree, ph); 854 855 pp->pr_nitems += pp->pr_itemsperpage; 856 pp->pr_nidle++; 857 858 pp->pr_npagealloc++; 859 if (++pp->pr_npages > pp->pr_hiwat) 860 pp->pr_hiwat = pp->pr_npages; 861 } 862 863 void 864 pool_p_remove(struct pool *pp, struct pool_item_header *ph) 865 { 866 MUTEX_ASSERT_LOCKED(&pp->pr_mtx); 867 868 pp->pr_npagefree++; 869 pp->pr_npages--; 870 pp->pr_nidle--; 871 pp->pr_nitems -= pp->pr_itemsperpage; 872 873 if (!POOL_INPGHDR(pp)) 874 RB_REMOVE(phtree, &pp->pr_phtree, ph); 875 TAILQ_REMOVE(&pp->pr_emptypages, ph, ph_pagelist); 876 877 pool_update_curpage(pp); 878 } 879 880 void 881 pool_update_curpage(struct pool *pp) 882 { 883 pp->pr_curpage = TAILQ_LAST(&pp->pr_partpages, pool_pagelist); 884 if (pp->pr_curpage == NULL) { 885 pp->pr_curpage = TAILQ_LAST(&pp->pr_emptypages, pool_pagelist); 886 } 887 } 888 889 void 890 pool_setlowat(struct pool *pp, int n) 891 { 892 int prime = 0; 893 894 mtx_enter(&pp->pr_mtx); 895 pp->pr_minitems = n; 896 pp->pr_minpages = (n == 0) 897 ? 0 898 : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 899 900 if (pp->pr_nitems < n) 901 prime = n - pp->pr_nitems; 902 mtx_leave(&pp->pr_mtx); 903 904 if (prime > 0) 905 pool_prime(pp, prime); 906 } 907 908 void 909 pool_sethiwat(struct pool *pp, int n) 910 { 911 pp->pr_maxpages = (n == 0) 912 ? 0 913 : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 914 } 915 916 int 917 pool_sethardlimit(struct pool *pp, u_int n, const char *warnmsg, int ratecap) 918 { 919 int error = 0; 920 921 if (n < pp->pr_nout) { 922 error = EINVAL; 923 goto done; 924 } 925 926 pp->pr_hardlimit = n; 927 pp->pr_hardlimit_warning = warnmsg; 928 pp->pr_hardlimit_ratecap.tv_sec = ratecap; 929 pp->pr_hardlimit_warning_last.tv_sec = 0; 930 pp->pr_hardlimit_warning_last.tv_usec = 0; 931 932 done: 933 return (error); 934 } 935 936 void 937 pool_set_constraints(struct pool *pp, const struct kmem_pa_mode *mode) 938 { 939 pp->pr_crange = mode; 940 } 941 942 /* 943 * Release all complete pages that have not been used recently. 944 * 945 * Returns non-zero if any pages have been reclaimed. 946 */ 947 int 948 pool_reclaim(struct pool *pp) 949 { 950 struct pool_item_header *ph, *phnext; 951 struct pool_pagelist pl = TAILQ_HEAD_INITIALIZER(pl); 952 953 mtx_enter(&pp->pr_mtx); 954 for (ph = TAILQ_FIRST(&pp->pr_emptypages); ph != NULL; ph = phnext) { 955 phnext = TAILQ_NEXT(ph, ph_pagelist); 956 957 /* Check our minimum page claim */ 958 if (pp->pr_npages <= pp->pr_minpages) 959 break; 960 961 /* 962 * If freeing this page would put us below 963 * the low water mark, stop now. 964 */ 965 if ((pp->pr_nitems - pp->pr_itemsperpage) < 966 pp->pr_minitems) 967 break; 968 969 pool_p_remove(pp, ph); 970 TAILQ_INSERT_TAIL(&pl, ph, ph_pagelist); 971 } 972 mtx_leave(&pp->pr_mtx); 973 974 if (TAILQ_EMPTY(&pl)) 975 return (0); 976 977 while ((ph = TAILQ_FIRST(&pl)) != NULL) { 978 TAILQ_REMOVE(&pl, ph, ph_pagelist); 979 pool_p_free(pp, ph); 980 } 981 982 return (1); 983 } 984 985 /* 986 * Release all complete pages that have not been used recently 987 * from all pools. 988 */ 989 void 990 pool_reclaim_all(void) 991 { 992 struct pool *pp; 993 994 rw_enter_read(&pool_lock); 995 SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist) 996 pool_reclaim(pp); 997 rw_exit_read(&pool_lock); 998 } 999 1000 #ifdef DDB 1001 #include <machine/db_machdep.h> 1002 #include <ddb/db_output.h> 1003 1004 /* 1005 * Diagnostic helpers. 1006 */ 1007 void 1008 pool_printit(struct pool *pp, const char *modif, 1009 int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2)))) 1010 { 1011 pool_print1(pp, modif, pr); 1012 } 1013 1014 void 1015 pool_print_pagelist(struct pool_pagelist *pl, 1016 int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2)))) 1017 { 1018 struct pool_item_header *ph; 1019 struct pool_item *pi; 1020 1021 TAILQ_FOREACH(ph, pl, ph_pagelist) { 1022 (*pr)("\t\tpage %p, color %p, nmissing %d\n", 1023 ph->ph_page, ph->ph_colored, ph->ph_nmissing); 1024 XSIMPLEQ_FOREACH(pi, &ph->ph_itemlist, pi_list) { 1025 if (pi->pi_magic != POOL_IMAGIC(ph, pi)) { 1026 (*pr)("\t\t\titem %p, magic 0x%lx\n", 1027 pi, pi->pi_magic); 1028 } 1029 } 1030 } 1031 } 1032 1033 void 1034 pool_print1(struct pool *pp, const char *modif, 1035 int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2)))) 1036 { 1037 struct pool_item_header *ph; 1038 int print_pagelist = 0; 1039 char c; 1040 1041 while ((c = *modif++) != '\0') { 1042 if (c == 'p') 1043 print_pagelist = 1; 1044 modif++; 1045 } 1046 1047 (*pr)("POOL %s: size %u maxcolors %u\n", pp->pr_wchan, pp->pr_size, 1048 pp->pr_maxcolors); 1049 (*pr)("\talloc %p\n", pp->pr_alloc); 1050 (*pr)("\tminitems %u, minpages %u, maxpages %u, npages %u\n", 1051 pp->pr_minitems, pp->pr_minpages, pp->pr_maxpages, pp->pr_npages); 1052 (*pr)("\titemsperpage %u, nitems %u, nout %u, hardlimit %u\n", 1053 pp->pr_itemsperpage, pp->pr_nitems, pp->pr_nout, pp->pr_hardlimit); 1054 1055 (*pr)("\n\tnget %lu, nfail %lu, nput %lu\n", 1056 pp->pr_nget, pp->pr_nfail, pp->pr_nput); 1057 (*pr)("\tnpagealloc %lu, npagefree %lu, hiwat %u, nidle %lu\n", 1058 pp->pr_npagealloc, pp->pr_npagefree, pp->pr_hiwat, pp->pr_nidle); 1059 1060 if (print_pagelist == 0) 1061 return; 1062 1063 if ((ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL) 1064 (*pr)("\n\tempty page list:\n"); 1065 pool_print_pagelist(&pp->pr_emptypages, pr); 1066 if ((ph = TAILQ_FIRST(&pp->pr_fullpages)) != NULL) 1067 (*pr)("\n\tfull page list:\n"); 1068 pool_print_pagelist(&pp->pr_fullpages, pr); 1069 if ((ph = TAILQ_FIRST(&pp->pr_partpages)) != NULL) 1070 (*pr)("\n\tpartial-page list:\n"); 1071 pool_print_pagelist(&pp->pr_partpages, pr); 1072 1073 if (pp->pr_curpage == NULL) 1074 (*pr)("\tno current page\n"); 1075 else 1076 (*pr)("\tcurpage %p\n", pp->pr_curpage->ph_page); 1077 } 1078 1079 void 1080 db_show_all_pools(db_expr_t expr, int haddr, db_expr_t count, char *modif) 1081 { 1082 struct pool *pp; 1083 char maxp[16]; 1084 int ovflw; 1085 char mode; 1086 1087 mode = modif[0]; 1088 if (mode != '\0' && mode != 'a') { 1089 db_printf("usage: show all pools [/a]\n"); 1090 return; 1091 } 1092 1093 if (mode == '\0') 1094 db_printf("%-10s%4s%9s%5s%9s%6s%6s%6s%6s%6s%6s%5s\n", 1095 "Name", 1096 "Size", 1097 "Requests", 1098 "Fail", 1099 "Releases", 1100 "Pgreq", 1101 "Pgrel", 1102 "Npage", 1103 "Hiwat", 1104 "Minpg", 1105 "Maxpg", 1106 "Idle"); 1107 else 1108 db_printf("%-12s %18s %18s\n", 1109 "Name", "Address", "Allocator"); 1110 1111 SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist) { 1112 if (mode == 'a') { 1113 db_printf("%-12s %18p %18p\n", pp->pr_wchan, pp, 1114 pp->pr_alloc); 1115 continue; 1116 } 1117 1118 if (!pp->pr_nget) 1119 continue; 1120 1121 if (pp->pr_maxpages == UINT_MAX) 1122 snprintf(maxp, sizeof maxp, "inf"); 1123 else 1124 snprintf(maxp, sizeof maxp, "%u", pp->pr_maxpages); 1125 1126 #define PRWORD(ovflw, fmt, width, fixed, val) do { \ 1127 (ovflw) += db_printf((fmt), \ 1128 (width) - (fixed) - (ovflw) > 0 ? \ 1129 (width) - (fixed) - (ovflw) : 0, \ 1130 (val)) - (width); \ 1131 if ((ovflw) < 0) \ 1132 (ovflw) = 0; \ 1133 } while (/* CONSTCOND */0) 1134 1135 ovflw = 0; 1136 PRWORD(ovflw, "%-*s", 10, 0, pp->pr_wchan); 1137 PRWORD(ovflw, " %*u", 4, 1, pp->pr_size); 1138 PRWORD(ovflw, " %*lu", 9, 1, pp->pr_nget); 1139 PRWORD(ovflw, " %*lu", 5, 1, pp->pr_nfail); 1140 PRWORD(ovflw, " %*lu", 9, 1, pp->pr_nput); 1141 PRWORD(ovflw, " %*lu", 6, 1, pp->pr_npagealloc); 1142 PRWORD(ovflw, " %*lu", 6, 1, pp->pr_npagefree); 1143 PRWORD(ovflw, " %*d", 6, 1, pp->pr_npages); 1144 PRWORD(ovflw, " %*d", 6, 1, pp->pr_hiwat); 1145 PRWORD(ovflw, " %*d", 6, 1, pp->pr_minpages); 1146 PRWORD(ovflw, " %*s", 6, 1, maxp); 1147 PRWORD(ovflw, " %*lu\n", 5, 1, pp->pr_nidle); 1148 1149 pool_chk(pp); 1150 } 1151 } 1152 #endif /* DDB */ 1153 1154 #if defined(POOL_DEBUG) || defined(DDB) 1155 int 1156 pool_chk_page(struct pool *pp, struct pool_item_header *ph, int expected) 1157 { 1158 struct pool_item *pi; 1159 caddr_t page; 1160 int n; 1161 const char *label = pp->pr_wchan; 1162 1163 page = (caddr_t)((u_long)ph & pp->pr_pgmask); 1164 if (page != ph->ph_page && POOL_INPGHDR(pp)) { 1165 printf("%s: ", label); 1166 printf("pool(%p:%s): page inconsistency: page %p; " 1167 "at page head addr %p (p %p)\n", 1168 pp, pp->pr_wchan, ph->ph_page, ph, page); 1169 return 1; 1170 } 1171 1172 for (pi = XSIMPLEQ_FIRST(&ph->ph_itemlist), n = 0; 1173 pi != NULL; 1174 pi = XSIMPLEQ_NEXT(&ph->ph_itemlist, pi, pi_list), n++) { 1175 if ((caddr_t)pi < ph->ph_page || 1176 (caddr_t)pi >= ph->ph_page + pp->pr_pgsize) { 1177 printf("%s: ", label); 1178 printf("pool(%p:%s): page inconsistency: page %p;" 1179 " item ordinal %d; addr %p\n", pp, 1180 pp->pr_wchan, ph->ph_page, n, pi); 1181 return (1); 1182 } 1183 1184 if (pi->pi_magic != POOL_IMAGIC(ph, pi)) { 1185 printf("%s: ", label); 1186 printf("pool(%p:%s): free list modified: " 1187 "page %p; item ordinal %d; addr %p " 1188 "(p %p); offset 0x%x=0x%lx\n", 1189 pp, pp->pr_wchan, ph->ph_page, n, pi, page, 1190 0, pi->pi_magic); 1191 } 1192 1193 #ifdef DIAGNOSTIC 1194 if (POOL_PHPOISON(ph)) { 1195 size_t pidx; 1196 uint32_t pval; 1197 if (poison_check(pi + 1, pp->pr_size - sizeof(*pi), 1198 &pidx, &pval)) { 1199 int *ip = (int *)(pi + 1); 1200 printf("pool(%s): free list modified: " 1201 "page %p; item ordinal %d; addr %p " 1202 "(p %p); offset 0x%zx=0x%x\n", 1203 pp->pr_wchan, ph->ph_page, n, pi, 1204 page, pidx * sizeof(int), ip[pidx]); 1205 } 1206 } 1207 #endif /* DIAGNOSTIC */ 1208 } 1209 if (n + ph->ph_nmissing != pp->pr_itemsperpage) { 1210 printf("pool(%p:%s): page inconsistency: page %p;" 1211 " %d on list, %d missing, %d items per page\n", pp, 1212 pp->pr_wchan, ph->ph_page, n, ph->ph_nmissing, 1213 pp->pr_itemsperpage); 1214 return 1; 1215 } 1216 if (expected >= 0 && n != expected) { 1217 printf("pool(%p:%s): page inconsistency: page %p;" 1218 " %d on list, %d missing, %d expected\n", pp, 1219 pp->pr_wchan, ph->ph_page, n, ph->ph_nmissing, 1220 expected); 1221 return 1; 1222 } 1223 return 0; 1224 } 1225 1226 int 1227 pool_chk(struct pool *pp) 1228 { 1229 struct pool_item_header *ph; 1230 int r = 0; 1231 1232 TAILQ_FOREACH(ph, &pp->pr_emptypages, ph_pagelist) 1233 r += pool_chk_page(pp, ph, pp->pr_itemsperpage); 1234 TAILQ_FOREACH(ph, &pp->pr_fullpages, ph_pagelist) 1235 r += pool_chk_page(pp, ph, 0); 1236 TAILQ_FOREACH(ph, &pp->pr_partpages, ph_pagelist) 1237 r += pool_chk_page(pp, ph, -1); 1238 1239 return (r); 1240 } 1241 #endif /* defined(POOL_DEBUG) || defined(DDB) */ 1242 1243 #ifdef DDB 1244 void 1245 pool_walk(struct pool *pp, int full, 1246 int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2))), 1247 void (*func)(void *, int, int (*)(const char *, ...) 1248 __attribute__((__format__(__kprintf__,1,2))))) 1249 { 1250 struct pool_item_header *ph; 1251 struct pool_item *pi; 1252 caddr_t cp; 1253 int n; 1254 1255 TAILQ_FOREACH(ph, &pp->pr_fullpages, ph_pagelist) { 1256 cp = ph->ph_colored; 1257 n = ph->ph_nmissing; 1258 1259 while (n--) { 1260 func(cp, full, pr); 1261 cp += pp->pr_size; 1262 } 1263 } 1264 1265 TAILQ_FOREACH(ph, &pp->pr_partpages, ph_pagelist) { 1266 cp = ph->ph_colored; 1267 n = ph->ph_nmissing; 1268 1269 do { 1270 XSIMPLEQ_FOREACH(pi, &ph->ph_itemlist, pi_list) { 1271 if (cp == (caddr_t)pi) 1272 break; 1273 } 1274 if (cp != (caddr_t)pi) { 1275 func(cp, full, pr); 1276 n--; 1277 } 1278 1279 cp += pp->pr_size; 1280 } while (n > 0); 1281 } 1282 } 1283 #endif 1284 1285 /* 1286 * We have three different sysctls. 1287 * kern.pool.npools - the number of pools. 1288 * kern.pool.pool.<pool#> - the pool struct for the pool#. 1289 * kern.pool.name.<pool#> - the name for pool#. 1290 */ 1291 int 1292 sysctl_dopool(int *name, u_int namelen, char *oldp, size_t *oldlenp) 1293 { 1294 struct kinfo_pool pi; 1295 struct pool *pp; 1296 int rv = ENOENT; 1297 1298 switch (name[0]) { 1299 case KERN_POOL_NPOOLS: 1300 if (namelen != 1) 1301 return (ENOTDIR); 1302 return (sysctl_rdint(oldp, oldlenp, NULL, pool_count)); 1303 1304 case KERN_POOL_NAME: 1305 case KERN_POOL_POOL: 1306 break; 1307 default: 1308 return (EOPNOTSUPP); 1309 } 1310 1311 if (namelen != 2) 1312 return (ENOTDIR); 1313 1314 rw_enter_read(&pool_lock); 1315 1316 SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist) { 1317 if (name[1] == pp->pr_serial) 1318 break; 1319 } 1320 1321 if (pp == NULL) 1322 goto done; 1323 1324 switch (name[0]) { 1325 case KERN_POOL_NAME: 1326 rv = sysctl_rdstring(oldp, oldlenp, NULL, pp->pr_wchan); 1327 break; 1328 case KERN_POOL_POOL: 1329 memset(&pi, 0, sizeof(pi)); 1330 1331 if (pp->pr_ipl != -1) 1332 mtx_enter(&pp->pr_mtx); 1333 pi.pr_size = pp->pr_size; 1334 pi.pr_pgsize = pp->pr_pgsize; 1335 pi.pr_itemsperpage = pp->pr_itemsperpage; 1336 pi.pr_npages = pp->pr_npages; 1337 pi.pr_minpages = pp->pr_minpages; 1338 pi.pr_maxpages = pp->pr_maxpages; 1339 pi.pr_hardlimit = pp->pr_hardlimit; 1340 pi.pr_nout = pp->pr_nout; 1341 pi.pr_nitems = pp->pr_nitems; 1342 pi.pr_nget = pp->pr_nget; 1343 pi.pr_nput = pp->pr_nput; 1344 pi.pr_nfail = pp->pr_nfail; 1345 pi.pr_npagealloc = pp->pr_npagealloc; 1346 pi.pr_npagefree = pp->pr_npagefree; 1347 pi.pr_hiwat = pp->pr_hiwat; 1348 pi.pr_nidle = pp->pr_nidle; 1349 if (pp->pr_ipl != -1) 1350 mtx_leave(&pp->pr_mtx); 1351 1352 rv = sysctl_rdstruct(oldp, oldlenp, NULL, &pi, sizeof(pi)); 1353 break; 1354 } 1355 1356 done: 1357 rw_exit_read(&pool_lock); 1358 1359 return (rv); 1360 } 1361 1362 void 1363 pool_gc_sched(void *null) 1364 { 1365 task_add(systqmp, &pool_gc_task); 1366 } 1367 1368 void 1369 pool_gc_pages(void *null) 1370 { 1371 extern int ticks; 1372 struct pool *pp; 1373 struct pool_item_header *ph, *freeph; 1374 int s; 1375 1376 rw_enter_read(&pool_lock); 1377 s = splvm(); /* XXX go to splvm until all pools _setipl properly */ 1378 SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist) { 1379 if (pp->pr_nidle <= pp->pr_minpages || /* guess */ 1380 !mtx_enter_try(&pp->pr_mtx)) /* try */ 1381 continue; 1382 1383 /* is it time to free a page? */ 1384 if (pp->pr_nidle > pp->pr_minpages && 1385 (ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL && 1386 (ticks - ph->ph_tick) > (hz * pool_wait_gc)) { 1387 freeph = ph; 1388 pool_p_remove(pp, freeph); 1389 } else 1390 freeph = NULL; 1391 1392 mtx_leave(&pp->pr_mtx); 1393 1394 if (freeph != NULL) 1395 pool_p_free(pp, freeph); 1396 } 1397 splx(s); 1398 rw_exit_read(&pool_lock); 1399 1400 timeout_add_sec(&pool_gc_tick, 1); 1401 } 1402 1403 /* 1404 * Pool backend allocators. 1405 */ 1406 1407 void * 1408 pool_allocator_alloc(struct pool *pp, int flags, int *slowdown) 1409 { 1410 void *v; 1411 1412 KERNEL_LOCK(); 1413 v = (*pp->pr_alloc->pa_alloc)(pp, flags, slowdown); 1414 KERNEL_UNLOCK(); 1415 1416 #ifdef DIAGNOSTIC 1417 if (v != NULL && POOL_INPGHDR(pp)) { 1418 vaddr_t addr = (vaddr_t)v; 1419 if ((addr & pp->pr_pgmask) != addr) { 1420 panic("%s: %s page address %p isnt aligned to %u", 1421 __func__, pp->pr_wchan, v, pp->pr_pgsize); 1422 } 1423 } 1424 #endif 1425 1426 return (v); 1427 } 1428 1429 void 1430 pool_allocator_free(struct pool *pp, void *v) 1431 { 1432 struct pool_allocator *pa = pp->pr_alloc; 1433 1434 KERNEL_LOCK(); 1435 (*pa->pa_free)(pp, v); 1436 KERNEL_UNLOCK(); 1437 } 1438 1439 void * 1440 pool_page_alloc(struct pool *pp, int flags, int *slowdown) 1441 { 1442 struct kmem_dyn_mode kd = KMEM_DYN_INITIALIZER; 1443 1444 kd.kd_waitok = ISSET(flags, PR_WAITOK); 1445 kd.kd_slowdown = slowdown; 1446 1447 return (km_alloc(pp->pr_pgsize, &kv_page, pp->pr_crange, &kd)); 1448 } 1449 1450 void 1451 pool_page_free(struct pool *pp, void *v) 1452 { 1453 km_free(v, pp->pr_pgsize, &kv_page, pp->pr_crange); 1454 } 1455 1456 void * 1457 pool_large_alloc(struct pool *pp, int flags, int *slowdown) 1458 { 1459 struct kmem_va_mode kv = kv_intrsafe; 1460 struct kmem_dyn_mode kd = KMEM_DYN_INITIALIZER; 1461 void *v; 1462 int s; 1463 1464 if (POOL_INPGHDR(pp)) 1465 kv.kv_align = pp->pr_pgsize; 1466 1467 kd.kd_waitok = ISSET(flags, PR_WAITOK); 1468 kd.kd_slowdown = slowdown; 1469 1470 s = splvm(); 1471 v = km_alloc(pp->pr_pgsize, &kv, pp->pr_crange, &kd); 1472 splx(s); 1473 1474 return (v); 1475 } 1476 1477 void 1478 pool_large_free(struct pool *pp, void *v) 1479 { 1480 struct kmem_va_mode kv = kv_intrsafe; 1481 int s; 1482 1483 if (POOL_INPGHDR(pp)) 1484 kv.kv_align = pp->pr_pgsize; 1485 1486 s = splvm(); 1487 km_free(v, pp->pr_pgsize, &kv, pp->pr_crange); 1488 splx(s); 1489 } 1490 1491 void * 1492 pool_large_alloc_ni(struct pool *pp, int flags, int *slowdown) 1493 { 1494 struct kmem_va_mode kv = kv_any; 1495 struct kmem_dyn_mode kd = KMEM_DYN_INITIALIZER; 1496 1497 if (POOL_INPGHDR(pp)) 1498 kv.kv_align = pp->pr_pgsize; 1499 1500 kd.kd_waitok = ISSET(flags, PR_WAITOK); 1501 kd.kd_slowdown = slowdown; 1502 1503 return (km_alloc(pp->pr_pgsize, &kv, pp->pr_crange, &kd)); 1504 } 1505 1506 void 1507 pool_large_free_ni(struct pool *pp, void *v) 1508 { 1509 struct kmem_va_mode kv = kv_any; 1510 1511 if (POOL_INPGHDR(pp)) 1512 kv.kv_align = pp->pr_pgsize; 1513 1514 km_free(v, pp->pr_pgsize, &kv, pp->pr_crange); 1515 } 1516