1 /* $OpenBSD: subr_pool.c,v 1.202 2016/11/02 06:26:16 dlg Exp $ */ 2 /* $NetBSD: subr_pool.c,v 1.61 2001/09/26 07:14:56 chs Exp $ */ 3 4 /*- 5 * Copyright (c) 1997, 1999, 2000 The NetBSD Foundation, Inc. 6 * All rights reserved. 7 * 8 * This code is derived from software contributed to The NetBSD Foundation 9 * by Paul Kranenburg; by Jason R. Thorpe of the Numerical Aerospace 10 * Simulation Facility, NASA Ames Research Center. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 23 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 24 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 25 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31 * POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include <sys/param.h> 35 #include <sys/systm.h> 36 #include <sys/errno.h> 37 #include <sys/kernel.h> 38 #include <sys/malloc.h> 39 #include <sys/pool.h> 40 #include <sys/syslog.h> 41 #include <sys/rwlock.h> 42 #include <sys/sysctl.h> 43 #include <sys/task.h> 44 #include <sys/timeout.h> 45 #include <sys/percpu.h> 46 47 #include <uvm/uvm_extern.h> 48 49 /* 50 * Pool resource management utility. 51 * 52 * Memory is allocated in pages which are split into pieces according to 53 * the pool item size. Each page is kept on one of three lists in the 54 * pool structure: `pr_emptypages', `pr_fullpages' and `pr_partpages', 55 * for empty, full and partially-full pages respectively. The individual 56 * pool items are on a linked list headed by `ph_itemlist' in each page 57 * header. The memory for building the page list is either taken from 58 * the allocated pages themselves (for small pool items) or taken from 59 * an internal pool of page headers (`phpool'). 60 */ 61 62 /* List of all pools */ 63 SIMPLEQ_HEAD(,pool) pool_head = SIMPLEQ_HEAD_INITIALIZER(pool_head); 64 65 /* 66 * Every pool gets a unique serial number assigned to it. If this counter 67 * wraps, we're screwed, but we shouldn't create so many pools anyway. 68 */ 69 unsigned int pool_serial; 70 unsigned int pool_count; 71 72 /* Lock the previous variables making up the global pool state */ 73 struct rwlock pool_lock = RWLOCK_INITIALIZER("pools"); 74 75 /* Private pool for page header structures */ 76 struct pool phpool; 77 78 struct pool_item_header { 79 /* Page headers */ 80 TAILQ_ENTRY(pool_item_header) 81 ph_pagelist; /* pool page list */ 82 XSIMPLEQ_HEAD(,pool_item) ph_itemlist; /* chunk list for this page */ 83 RBT_ENTRY(pool_item_header) 84 ph_node; /* Off-page page headers */ 85 int ph_nmissing; /* # of chunks in use */ 86 caddr_t ph_page; /* this page's address */ 87 caddr_t ph_colored; /* page's colored address */ 88 u_long ph_magic; 89 int ph_tick; 90 }; 91 #define POOL_MAGICBIT (1 << 3) /* keep away from perturbed low bits */ 92 #define POOL_PHPOISON(ph) ISSET((ph)->ph_magic, POOL_MAGICBIT) 93 94 struct pool_item { 95 u_long pi_magic; 96 XSIMPLEQ_ENTRY(pool_item) pi_list; 97 }; 98 #define POOL_IMAGIC(ph, pi) ((u_long)(pi) ^ (ph)->ph_magic) 99 100 #ifdef MULTIPROCESSOR 101 struct pool_list { 102 struct pool_list *pl_next; /* next in list */ 103 unsigned long pl_nitems; /* items in list */ 104 TAILQ_ENTRY(pool_list) pl_nextl; /* list of lists */ 105 }; 106 107 #define POOL_LIST_NITEMS_MASK 0x7ffffffUL 108 #define POOL_LIST_NITEMS_POISON 0x8000000UL 109 110 #define POOL_LIST_POISONED(_pl) \ 111 ISSET((_pl)->pl_nitems, POOL_LIST_NITEMS_POISON) 112 113 #define POOL_LIST_NITEMS(_pl) \ 114 ((_pl)->pl_nitems & POOL_LIST_NITEMS_MASK) 115 116 struct pool_cache { 117 struct pool_list *pc_actv; 118 unsigned long pc_nactv; /* cache pc_actv nitems */ 119 struct pool_list *pc_prev; 120 121 uint64_t pc_gen; /* generation number */ 122 uint64_t pc_gets; 123 uint64_t pc_puts; 124 uint64_t pc_fails; 125 126 int pc_nout; 127 }; 128 129 void *pool_cache_get(struct pool *); 130 void pool_cache_put(struct pool *, void *); 131 void pool_cache_destroy(struct pool *); 132 #endif 133 void pool_cache_info(struct pool *, struct kinfo_pool *); 134 135 #ifdef POOL_DEBUG 136 int pool_debug = 1; 137 #else 138 int pool_debug = 0; 139 #endif 140 141 #define POOL_INPGHDR(pp) ((pp)->pr_phoffset != 0) 142 143 struct pool_item_header * 144 pool_p_alloc(struct pool *, int, int *); 145 void pool_p_insert(struct pool *, struct pool_item_header *); 146 void pool_p_remove(struct pool *, struct pool_item_header *); 147 void pool_p_free(struct pool *, struct pool_item_header *); 148 149 void pool_update_curpage(struct pool *); 150 void *pool_do_get(struct pool *, int, int *); 151 int pool_chk_page(struct pool *, struct pool_item_header *, int); 152 int pool_chk(struct pool *); 153 void pool_get_done(void *, void *); 154 void pool_runqueue(struct pool *, int); 155 156 void *pool_allocator_alloc(struct pool *, int, int *); 157 void pool_allocator_free(struct pool *, void *); 158 159 /* 160 * The default pool allocator. 161 */ 162 void *pool_page_alloc(struct pool *, int, int *); 163 void pool_page_free(struct pool *, void *); 164 165 /* 166 * safe for interrupts; this is the default allocator 167 */ 168 struct pool_allocator pool_allocator_single = { 169 pool_page_alloc, 170 pool_page_free 171 }; 172 173 void *pool_multi_alloc(struct pool *, int, int *); 174 void pool_multi_free(struct pool *, void *); 175 176 struct pool_allocator pool_allocator_multi = { 177 pool_multi_alloc, 178 pool_multi_free 179 }; 180 181 void *pool_multi_alloc_ni(struct pool *, int, int *); 182 void pool_multi_free_ni(struct pool *, void *); 183 184 struct pool_allocator pool_allocator_multi_ni = { 185 pool_multi_alloc_ni, 186 pool_multi_free_ni 187 }; 188 189 #ifdef DDB 190 void pool_print_pagelist(struct pool_pagelist *, int (*)(const char *, ...) 191 __attribute__((__format__(__kprintf__,1,2)))); 192 void pool_print1(struct pool *, const char *, int (*)(const char *, ...) 193 __attribute__((__format__(__kprintf__,1,2)))); 194 #endif 195 196 /* stale page garbage collectors */ 197 void pool_gc_sched(void *); 198 struct timeout pool_gc_tick = TIMEOUT_INITIALIZER(pool_gc_sched, NULL); 199 void pool_gc_pages(void *); 200 struct task pool_gc_task = TASK_INITIALIZER(pool_gc_pages, NULL); 201 int pool_wait_free = 1; 202 int pool_wait_gc = 8; 203 204 RBT_PROTOTYPE(phtree, pool_item_header, ph_node, phtree_compare); 205 206 static inline int 207 phtree_compare(const struct pool_item_header *a, 208 const struct pool_item_header *b) 209 { 210 vaddr_t va = (vaddr_t)a->ph_page; 211 vaddr_t vb = (vaddr_t)b->ph_page; 212 213 /* the compares in this order are important for the NFIND to work */ 214 if (vb < va) 215 return (-1); 216 if (vb > va) 217 return (1); 218 219 return (0); 220 } 221 222 RBT_GENERATE(phtree, pool_item_header, ph_node, phtree_compare); 223 224 /* 225 * Return the pool page header based on page address. 226 */ 227 static inline struct pool_item_header * 228 pr_find_pagehead(struct pool *pp, void *v) 229 { 230 struct pool_item_header *ph, key; 231 232 if (POOL_INPGHDR(pp)) { 233 caddr_t page; 234 235 page = (caddr_t)((vaddr_t)v & pp->pr_pgmask); 236 237 return ((struct pool_item_header *)(page + pp->pr_phoffset)); 238 } 239 240 key.ph_page = v; 241 ph = RBT_NFIND(phtree, &pp->pr_phtree, &key); 242 if (ph == NULL) 243 panic("%s: %s: page header missing", __func__, pp->pr_wchan); 244 245 KASSERT(ph->ph_page <= (caddr_t)v); 246 if (ph->ph_page + pp->pr_pgsize <= (caddr_t)v) 247 panic("%s: %s: incorrect page", __func__, pp->pr_wchan); 248 249 return (ph); 250 } 251 252 /* 253 * Initialize the given pool resource structure. 254 * 255 * We export this routine to allow other kernel parts to declare 256 * static pools that must be initialized before malloc() is available. 257 */ 258 void 259 pool_init(struct pool *pp, size_t size, u_int align, int ipl, int flags, 260 const char *wchan, struct pool_allocator *palloc) 261 { 262 int off = 0, space; 263 unsigned int pgsize = PAGE_SIZE, items; 264 #ifdef DIAGNOSTIC 265 struct pool *iter; 266 #endif 267 268 if (align == 0) 269 align = ALIGN(1); 270 271 if (size < sizeof(struct pool_item)) 272 size = sizeof(struct pool_item); 273 274 size = roundup(size, align); 275 276 if (palloc == NULL) { 277 while (size * 8 > pgsize) 278 pgsize <<= 1; 279 280 if (pgsize > PAGE_SIZE) { 281 palloc = ISSET(flags, PR_WAITOK) ? 282 &pool_allocator_multi_ni : &pool_allocator_multi; 283 } else 284 palloc = &pool_allocator_single; 285 } else 286 pgsize = palloc->pa_pagesz ? palloc->pa_pagesz : PAGE_SIZE; 287 288 items = pgsize / size; 289 290 /* 291 * Decide whether to put the page header off page to avoid 292 * wasting too large a part of the page. Off-page page headers 293 * go into an RB tree, so we can match a returned item with 294 * its header based on the page address. 295 */ 296 if (pgsize - (size * items) > sizeof(struct pool_item_header)) { 297 off = pgsize - sizeof(struct pool_item_header); 298 } else if (sizeof(struct pool_item_header) * 2 >= size) { 299 off = pgsize - sizeof(struct pool_item_header); 300 items = off / size; 301 } 302 303 KASSERT(items > 0); 304 305 /* 306 * Initialize the pool structure. 307 */ 308 memset(pp, 0, sizeof(*pp)); 309 TAILQ_INIT(&pp->pr_emptypages); 310 TAILQ_INIT(&pp->pr_fullpages); 311 TAILQ_INIT(&pp->pr_partpages); 312 pp->pr_curpage = NULL; 313 pp->pr_npages = 0; 314 pp->pr_minitems = 0; 315 pp->pr_minpages = 0; 316 pp->pr_maxpages = 8; 317 pp->pr_size = size; 318 pp->pr_pgsize = pgsize; 319 pp->pr_pgmask = ~0UL ^ (pgsize - 1); 320 pp->pr_phoffset = off; 321 pp->pr_itemsperpage = items; 322 pp->pr_wchan = wchan; 323 pp->pr_alloc = palloc; 324 pp->pr_nitems = 0; 325 pp->pr_nout = 0; 326 pp->pr_hardlimit = UINT_MAX; 327 pp->pr_hardlimit_warning = NULL; 328 pp->pr_hardlimit_ratecap.tv_sec = 0; 329 pp->pr_hardlimit_ratecap.tv_usec = 0; 330 pp->pr_hardlimit_warning_last.tv_sec = 0; 331 pp->pr_hardlimit_warning_last.tv_usec = 0; 332 RBT_INIT(phtree, &pp->pr_phtree); 333 334 /* 335 * Use the space between the chunks and the page header 336 * for cache coloring. 337 */ 338 space = POOL_INPGHDR(pp) ? pp->pr_phoffset : pp->pr_pgsize; 339 space -= pp->pr_itemsperpage * pp->pr_size; 340 pp->pr_align = align; 341 pp->pr_maxcolors = (space / align) + 1; 342 343 pp->pr_nget = 0; 344 pp->pr_nfail = 0; 345 pp->pr_nput = 0; 346 pp->pr_npagealloc = 0; 347 pp->pr_npagefree = 0; 348 pp->pr_hiwat = 0; 349 pp->pr_nidle = 0; 350 351 pp->pr_ipl = ipl; 352 mtx_init(&pp->pr_mtx, pp->pr_ipl); 353 mtx_init(&pp->pr_requests_mtx, pp->pr_ipl); 354 TAILQ_INIT(&pp->pr_requests); 355 356 if (phpool.pr_size == 0) { 357 pool_init(&phpool, sizeof(struct pool_item_header), 0, 358 IPL_HIGH, 0, "phpool", NULL); 359 360 /* make sure phpool wont "recurse" */ 361 KASSERT(POOL_INPGHDR(&phpool)); 362 } 363 364 /* pglistalloc/constraint parameters */ 365 pp->pr_crange = &kp_dirty; 366 367 /* Insert this into the list of all pools. */ 368 rw_enter_write(&pool_lock); 369 #ifdef DIAGNOSTIC 370 SIMPLEQ_FOREACH(iter, &pool_head, pr_poollist) { 371 if (iter == pp) 372 panic("%s: pool %s already on list", __func__, wchan); 373 } 374 #endif 375 376 pp->pr_serial = ++pool_serial; 377 if (pool_serial == 0) 378 panic("%s: too much uptime", __func__); 379 380 SIMPLEQ_INSERT_HEAD(&pool_head, pp, pr_poollist); 381 pool_count++; 382 rw_exit_write(&pool_lock); 383 } 384 385 /* 386 * Decommission a pool resource. 387 */ 388 void 389 pool_destroy(struct pool *pp) 390 { 391 struct pool_item_header *ph; 392 struct pool *prev, *iter; 393 394 #ifdef MULTIPROCESSOR 395 if (pp->pr_cache != NULL) 396 pool_cache_destroy(pp); 397 #endif 398 399 #ifdef DIAGNOSTIC 400 if (pp->pr_nout != 0) 401 panic("%s: pool busy: still out: %u", __func__, pp->pr_nout); 402 #endif 403 404 /* Remove from global pool list */ 405 rw_enter_write(&pool_lock); 406 pool_count--; 407 if (pp == SIMPLEQ_FIRST(&pool_head)) 408 SIMPLEQ_REMOVE_HEAD(&pool_head, pr_poollist); 409 else { 410 prev = SIMPLEQ_FIRST(&pool_head); 411 SIMPLEQ_FOREACH(iter, &pool_head, pr_poollist) { 412 if (iter == pp) { 413 SIMPLEQ_REMOVE_AFTER(&pool_head, prev, 414 pr_poollist); 415 break; 416 } 417 prev = iter; 418 } 419 } 420 rw_exit_write(&pool_lock); 421 422 /* Remove all pages */ 423 while ((ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL) { 424 mtx_enter(&pp->pr_mtx); 425 pool_p_remove(pp, ph); 426 mtx_leave(&pp->pr_mtx); 427 pool_p_free(pp, ph); 428 } 429 KASSERT(TAILQ_EMPTY(&pp->pr_fullpages)); 430 KASSERT(TAILQ_EMPTY(&pp->pr_partpages)); 431 } 432 433 void 434 pool_request_init(struct pool_request *pr, 435 void (*handler)(void *, void *), void *cookie) 436 { 437 pr->pr_handler = handler; 438 pr->pr_cookie = cookie; 439 pr->pr_item = NULL; 440 } 441 442 void 443 pool_request(struct pool *pp, struct pool_request *pr) 444 { 445 mtx_enter(&pp->pr_requests_mtx); 446 TAILQ_INSERT_TAIL(&pp->pr_requests, pr, pr_entry); 447 pool_runqueue(pp, PR_NOWAIT); 448 mtx_leave(&pp->pr_requests_mtx); 449 } 450 451 struct pool_get_memory { 452 struct mutex mtx; 453 void * volatile v; 454 }; 455 456 /* 457 * Grab an item from the pool. 458 */ 459 void * 460 pool_get(struct pool *pp, int flags) 461 { 462 void *v = NULL; 463 int slowdown = 0; 464 465 #ifdef MULTIPROCESSOR 466 if (pp->pr_cache != NULL) { 467 v = pool_cache_get(pp); 468 if (v != NULL) 469 goto good; 470 } 471 #endif 472 473 KASSERT(flags & (PR_WAITOK | PR_NOWAIT)); 474 475 mtx_enter(&pp->pr_mtx); 476 if (pp->pr_nout >= pp->pr_hardlimit) { 477 if (ISSET(flags, PR_NOWAIT|PR_LIMITFAIL)) 478 goto fail; 479 } else if ((v = pool_do_get(pp, flags, &slowdown)) == NULL) { 480 if (ISSET(flags, PR_NOWAIT)) 481 goto fail; 482 } 483 mtx_leave(&pp->pr_mtx); 484 485 if (slowdown && ISSET(flags, PR_WAITOK)) 486 yield(); 487 488 if (v == NULL) { 489 struct pool_get_memory mem = { 490 MUTEX_INITIALIZER(pp->pr_ipl), 491 NULL }; 492 struct pool_request pr; 493 494 pool_request_init(&pr, pool_get_done, &mem); 495 pool_request(pp, &pr); 496 497 mtx_enter(&mem.mtx); 498 while (mem.v == NULL) 499 msleep(&mem, &mem.mtx, PSWP, pp->pr_wchan, 0); 500 mtx_leave(&mem.mtx); 501 502 v = mem.v; 503 } 504 505 #ifdef MULTIPROCESSOR 506 good: 507 #endif 508 if (ISSET(flags, PR_ZERO)) 509 memset(v, 0, pp->pr_size); 510 511 return (v); 512 513 fail: 514 pp->pr_nfail++; 515 mtx_leave(&pp->pr_mtx); 516 return (NULL); 517 } 518 519 void 520 pool_get_done(void *xmem, void *v) 521 { 522 struct pool_get_memory *mem = xmem; 523 524 mtx_enter(&mem->mtx); 525 mem->v = v; 526 mtx_leave(&mem->mtx); 527 528 wakeup_one(mem); 529 } 530 531 void 532 pool_runqueue(struct pool *pp, int flags) 533 { 534 struct pool_requests prl = TAILQ_HEAD_INITIALIZER(prl); 535 struct pool_request *pr; 536 537 MUTEX_ASSERT_UNLOCKED(&pp->pr_mtx); 538 MUTEX_ASSERT_LOCKED(&pp->pr_requests_mtx); 539 540 if (pp->pr_requesting++) 541 return; 542 543 do { 544 pp->pr_requesting = 1; 545 546 /* no TAILQ_JOIN? :( */ 547 while ((pr = TAILQ_FIRST(&pp->pr_requests)) != NULL) { 548 TAILQ_REMOVE(&pp->pr_requests, pr, pr_entry); 549 TAILQ_INSERT_TAIL(&prl, pr, pr_entry); 550 } 551 if (TAILQ_EMPTY(&prl)) 552 continue; 553 554 mtx_leave(&pp->pr_requests_mtx); 555 556 mtx_enter(&pp->pr_mtx); 557 pr = TAILQ_FIRST(&prl); 558 while (pr != NULL) { 559 int slowdown = 0; 560 561 if (pp->pr_nout >= pp->pr_hardlimit) 562 break; 563 564 pr->pr_item = pool_do_get(pp, flags, &slowdown); 565 if (pr->pr_item == NULL) /* || slowdown ? */ 566 break; 567 568 pr = TAILQ_NEXT(pr, pr_entry); 569 } 570 mtx_leave(&pp->pr_mtx); 571 572 while ((pr = TAILQ_FIRST(&prl)) != NULL && 573 pr->pr_item != NULL) { 574 TAILQ_REMOVE(&prl, pr, pr_entry); 575 (*pr->pr_handler)(pr->pr_cookie, pr->pr_item); 576 } 577 578 mtx_enter(&pp->pr_requests_mtx); 579 } while (--pp->pr_requesting); 580 581 /* no TAILQ_JOIN :( */ 582 while ((pr = TAILQ_FIRST(&prl)) != NULL) { 583 TAILQ_REMOVE(&prl, pr, pr_entry); 584 TAILQ_INSERT_TAIL(&pp->pr_requests, pr, pr_entry); 585 } 586 } 587 588 void * 589 pool_do_get(struct pool *pp, int flags, int *slowdown) 590 { 591 struct pool_item *pi; 592 struct pool_item_header *ph; 593 594 MUTEX_ASSERT_LOCKED(&pp->pr_mtx); 595 596 splassert(pp->pr_ipl); 597 598 /* 599 * Account for this item now to avoid races if we need to give up 600 * pr_mtx to allocate a page. 601 */ 602 pp->pr_nout++; 603 604 if (pp->pr_curpage == NULL) { 605 mtx_leave(&pp->pr_mtx); 606 ph = pool_p_alloc(pp, flags, slowdown); 607 mtx_enter(&pp->pr_mtx); 608 609 if (ph == NULL) { 610 pp->pr_nout--; 611 return (NULL); 612 } 613 614 pool_p_insert(pp, ph); 615 } 616 617 ph = pp->pr_curpage; 618 pi = XSIMPLEQ_FIRST(&ph->ph_itemlist); 619 if (__predict_false(pi == NULL)) 620 panic("%s: %s: page empty", __func__, pp->pr_wchan); 621 622 if (__predict_false(pi->pi_magic != POOL_IMAGIC(ph, pi))) { 623 panic("%s: %s free list modified: " 624 "page %p; item addr %p; offset 0x%x=0x%lx != 0x%lx", 625 __func__, pp->pr_wchan, ph->ph_page, pi, 626 0, pi->pi_magic, POOL_IMAGIC(ph, pi)); 627 } 628 629 XSIMPLEQ_REMOVE_HEAD(&ph->ph_itemlist, pi_list); 630 631 #ifdef DIAGNOSTIC 632 if (pool_debug && POOL_PHPOISON(ph)) { 633 size_t pidx; 634 uint32_t pval; 635 if (poison_check(pi + 1, pp->pr_size - sizeof(*pi), 636 &pidx, &pval)) { 637 int *ip = (int *)(pi + 1); 638 panic("%s: %s free list modified: " 639 "page %p; item addr %p; offset 0x%zx=0x%x", 640 __func__, pp->pr_wchan, ph->ph_page, pi, 641 pidx * sizeof(int), ip[pidx]); 642 } 643 } 644 #endif /* DIAGNOSTIC */ 645 646 if (ph->ph_nmissing++ == 0) { 647 /* 648 * This page was previously empty. Move it to the list of 649 * partially-full pages. This page is already curpage. 650 */ 651 TAILQ_REMOVE(&pp->pr_emptypages, ph, ph_pagelist); 652 TAILQ_INSERT_TAIL(&pp->pr_partpages, ph, ph_pagelist); 653 654 pp->pr_nidle--; 655 } 656 657 if (ph->ph_nmissing == pp->pr_itemsperpage) { 658 /* 659 * This page is now full. Move it to the full list 660 * and select a new current page. 661 */ 662 TAILQ_REMOVE(&pp->pr_partpages, ph, ph_pagelist); 663 TAILQ_INSERT_TAIL(&pp->pr_fullpages, ph, ph_pagelist); 664 pool_update_curpage(pp); 665 } 666 667 pp->pr_nget++; 668 669 return (pi); 670 } 671 672 /* 673 * Return resource to the pool. 674 */ 675 void 676 pool_put(struct pool *pp, void *v) 677 { 678 struct pool_item *pi = v; 679 struct pool_item_header *ph, *freeph = NULL; 680 681 #ifdef DIAGNOSTIC 682 if (v == NULL) 683 panic("%s: NULL item", __func__); 684 #endif 685 686 #ifdef MULTIPROCESSOR 687 if (pp->pr_cache != NULL && TAILQ_EMPTY(&pp->pr_requests)) { 688 pool_cache_put(pp, v); 689 return; 690 } 691 #endif 692 693 mtx_enter(&pp->pr_mtx); 694 695 splassert(pp->pr_ipl); 696 697 ph = pr_find_pagehead(pp, v); 698 699 #ifdef DIAGNOSTIC 700 if (pool_debug) { 701 struct pool_item *qi; 702 XSIMPLEQ_FOREACH(qi, &ph->ph_itemlist, pi_list) { 703 if (pi == qi) { 704 panic("%s: %s: double pool_put: %p", __func__, 705 pp->pr_wchan, pi); 706 } 707 } 708 } 709 #endif /* DIAGNOSTIC */ 710 711 pi->pi_magic = POOL_IMAGIC(ph, pi); 712 XSIMPLEQ_INSERT_HEAD(&ph->ph_itemlist, pi, pi_list); 713 #ifdef DIAGNOSTIC 714 if (POOL_PHPOISON(ph)) 715 poison_mem(pi + 1, pp->pr_size - sizeof(*pi)); 716 #endif /* DIAGNOSTIC */ 717 718 if (ph->ph_nmissing-- == pp->pr_itemsperpage) { 719 /* 720 * The page was previously completely full, move it to the 721 * partially-full list. 722 */ 723 TAILQ_REMOVE(&pp->pr_fullpages, ph, ph_pagelist); 724 TAILQ_INSERT_TAIL(&pp->pr_partpages, ph, ph_pagelist); 725 } 726 727 if (ph->ph_nmissing == 0) { 728 /* 729 * The page is now empty, so move it to the empty page list. 730 */ 731 pp->pr_nidle++; 732 733 ph->ph_tick = ticks; 734 TAILQ_REMOVE(&pp->pr_partpages, ph, ph_pagelist); 735 TAILQ_INSERT_TAIL(&pp->pr_emptypages, ph, ph_pagelist); 736 pool_update_curpage(pp); 737 } 738 739 pp->pr_nout--; 740 pp->pr_nput++; 741 742 /* is it time to free a page? */ 743 if (pp->pr_nidle > pp->pr_maxpages && 744 (ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL && 745 (ticks - ph->ph_tick) > (hz * pool_wait_free)) { 746 freeph = ph; 747 pool_p_remove(pp, freeph); 748 } 749 mtx_leave(&pp->pr_mtx); 750 751 if (freeph != NULL) 752 pool_p_free(pp, freeph); 753 754 if (!TAILQ_EMPTY(&pp->pr_requests)) { 755 mtx_enter(&pp->pr_requests_mtx); 756 pool_runqueue(pp, PR_NOWAIT); 757 mtx_leave(&pp->pr_requests_mtx); 758 } 759 } 760 761 /* 762 * Add N items to the pool. 763 */ 764 int 765 pool_prime(struct pool *pp, int n) 766 { 767 struct pool_pagelist pl = TAILQ_HEAD_INITIALIZER(pl); 768 struct pool_item_header *ph; 769 int newpages; 770 771 newpages = roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 772 773 while (newpages-- > 0) { 774 int slowdown = 0; 775 776 ph = pool_p_alloc(pp, PR_NOWAIT, &slowdown); 777 if (ph == NULL) /* or slowdown? */ 778 break; 779 780 TAILQ_INSERT_TAIL(&pl, ph, ph_pagelist); 781 } 782 783 mtx_enter(&pp->pr_mtx); 784 while ((ph = TAILQ_FIRST(&pl)) != NULL) { 785 TAILQ_REMOVE(&pl, ph, ph_pagelist); 786 pool_p_insert(pp, ph); 787 } 788 mtx_leave(&pp->pr_mtx); 789 790 return (0); 791 } 792 793 struct pool_item_header * 794 pool_p_alloc(struct pool *pp, int flags, int *slowdown) 795 { 796 struct pool_item_header *ph; 797 struct pool_item *pi; 798 caddr_t addr; 799 int n; 800 801 MUTEX_ASSERT_UNLOCKED(&pp->pr_mtx); 802 KASSERT(pp->pr_size >= sizeof(*pi)); 803 804 addr = pool_allocator_alloc(pp, flags, slowdown); 805 if (addr == NULL) 806 return (NULL); 807 808 if (POOL_INPGHDR(pp)) 809 ph = (struct pool_item_header *)(addr + pp->pr_phoffset); 810 else { 811 ph = pool_get(&phpool, flags); 812 if (ph == NULL) { 813 pool_allocator_free(pp, addr); 814 return (NULL); 815 } 816 } 817 818 XSIMPLEQ_INIT(&ph->ph_itemlist); 819 ph->ph_page = addr; 820 addr += pp->pr_align * (pp->pr_npagealloc % pp->pr_maxcolors); 821 ph->ph_colored = addr; 822 ph->ph_nmissing = 0; 823 arc4random_buf(&ph->ph_magic, sizeof(ph->ph_magic)); 824 #ifdef DIAGNOSTIC 825 /* use a bit in ph_magic to record if we poison page items */ 826 if (pool_debug) 827 SET(ph->ph_magic, POOL_MAGICBIT); 828 else 829 CLR(ph->ph_magic, POOL_MAGICBIT); 830 #endif /* DIAGNOSTIC */ 831 832 n = pp->pr_itemsperpage; 833 while (n--) { 834 pi = (struct pool_item *)addr; 835 pi->pi_magic = POOL_IMAGIC(ph, pi); 836 XSIMPLEQ_INSERT_TAIL(&ph->ph_itemlist, pi, pi_list); 837 838 #ifdef DIAGNOSTIC 839 if (POOL_PHPOISON(ph)) 840 poison_mem(pi + 1, pp->pr_size - sizeof(*pi)); 841 #endif /* DIAGNOSTIC */ 842 843 addr += pp->pr_size; 844 } 845 846 return (ph); 847 } 848 849 void 850 pool_p_free(struct pool *pp, struct pool_item_header *ph) 851 { 852 struct pool_item *pi; 853 854 MUTEX_ASSERT_UNLOCKED(&pp->pr_mtx); 855 KASSERT(ph->ph_nmissing == 0); 856 857 XSIMPLEQ_FOREACH(pi, &ph->ph_itemlist, pi_list) { 858 if (__predict_false(pi->pi_magic != POOL_IMAGIC(ph, pi))) { 859 panic("%s: %s free list modified: " 860 "page %p; item addr %p; offset 0x%x=0x%lx", 861 __func__, pp->pr_wchan, ph->ph_page, pi, 862 0, pi->pi_magic); 863 } 864 865 #ifdef DIAGNOSTIC 866 if (POOL_PHPOISON(ph)) { 867 size_t pidx; 868 uint32_t pval; 869 if (poison_check(pi + 1, pp->pr_size - sizeof(*pi), 870 &pidx, &pval)) { 871 int *ip = (int *)(pi + 1); 872 panic("%s: %s free list modified: " 873 "page %p; item addr %p; offset 0x%zx=0x%x", 874 __func__, pp->pr_wchan, ph->ph_page, pi, 875 pidx * sizeof(int), ip[pidx]); 876 } 877 } 878 #endif 879 } 880 881 pool_allocator_free(pp, ph->ph_page); 882 883 if (!POOL_INPGHDR(pp)) 884 pool_put(&phpool, ph); 885 } 886 887 void 888 pool_p_insert(struct pool *pp, struct pool_item_header *ph) 889 { 890 MUTEX_ASSERT_LOCKED(&pp->pr_mtx); 891 892 /* If the pool was depleted, point at the new page */ 893 if (pp->pr_curpage == NULL) 894 pp->pr_curpage = ph; 895 896 TAILQ_INSERT_TAIL(&pp->pr_emptypages, ph, ph_pagelist); 897 if (!POOL_INPGHDR(pp)) 898 RBT_INSERT(phtree, &pp->pr_phtree, ph); 899 900 pp->pr_nitems += pp->pr_itemsperpage; 901 pp->pr_nidle++; 902 903 pp->pr_npagealloc++; 904 if (++pp->pr_npages > pp->pr_hiwat) 905 pp->pr_hiwat = pp->pr_npages; 906 } 907 908 void 909 pool_p_remove(struct pool *pp, struct pool_item_header *ph) 910 { 911 MUTEX_ASSERT_LOCKED(&pp->pr_mtx); 912 913 pp->pr_npagefree++; 914 pp->pr_npages--; 915 pp->pr_nidle--; 916 pp->pr_nitems -= pp->pr_itemsperpage; 917 918 if (!POOL_INPGHDR(pp)) 919 RBT_REMOVE(phtree, &pp->pr_phtree, ph); 920 TAILQ_REMOVE(&pp->pr_emptypages, ph, ph_pagelist); 921 922 pool_update_curpage(pp); 923 } 924 925 void 926 pool_update_curpage(struct pool *pp) 927 { 928 pp->pr_curpage = TAILQ_LAST(&pp->pr_partpages, pool_pagelist); 929 if (pp->pr_curpage == NULL) { 930 pp->pr_curpage = TAILQ_LAST(&pp->pr_emptypages, pool_pagelist); 931 } 932 } 933 934 void 935 pool_setlowat(struct pool *pp, int n) 936 { 937 int prime = 0; 938 939 mtx_enter(&pp->pr_mtx); 940 pp->pr_minitems = n; 941 pp->pr_minpages = (n == 0) 942 ? 0 943 : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 944 945 if (pp->pr_nitems < n) 946 prime = n - pp->pr_nitems; 947 mtx_leave(&pp->pr_mtx); 948 949 if (prime > 0) 950 pool_prime(pp, prime); 951 } 952 953 void 954 pool_sethiwat(struct pool *pp, int n) 955 { 956 pp->pr_maxpages = (n == 0) 957 ? 0 958 : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 959 } 960 961 int 962 pool_sethardlimit(struct pool *pp, u_int n, const char *warnmsg, int ratecap) 963 { 964 int error = 0; 965 966 if (n < pp->pr_nout) { 967 error = EINVAL; 968 goto done; 969 } 970 971 pp->pr_hardlimit = n; 972 pp->pr_hardlimit_warning = warnmsg; 973 pp->pr_hardlimit_ratecap.tv_sec = ratecap; 974 pp->pr_hardlimit_warning_last.tv_sec = 0; 975 pp->pr_hardlimit_warning_last.tv_usec = 0; 976 977 done: 978 return (error); 979 } 980 981 void 982 pool_set_constraints(struct pool *pp, const struct kmem_pa_mode *mode) 983 { 984 pp->pr_crange = mode; 985 } 986 987 /* 988 * Release all complete pages that have not been used recently. 989 * 990 * Returns non-zero if any pages have been reclaimed. 991 */ 992 int 993 pool_reclaim(struct pool *pp) 994 { 995 struct pool_item_header *ph, *phnext; 996 struct pool_pagelist pl = TAILQ_HEAD_INITIALIZER(pl); 997 998 mtx_enter(&pp->pr_mtx); 999 for (ph = TAILQ_FIRST(&pp->pr_emptypages); ph != NULL; ph = phnext) { 1000 phnext = TAILQ_NEXT(ph, ph_pagelist); 1001 1002 /* Check our minimum page claim */ 1003 if (pp->pr_npages <= pp->pr_minpages) 1004 break; 1005 1006 /* 1007 * If freeing this page would put us below 1008 * the low water mark, stop now. 1009 */ 1010 if ((pp->pr_nitems - pp->pr_itemsperpage) < 1011 pp->pr_minitems) 1012 break; 1013 1014 pool_p_remove(pp, ph); 1015 TAILQ_INSERT_TAIL(&pl, ph, ph_pagelist); 1016 } 1017 mtx_leave(&pp->pr_mtx); 1018 1019 if (TAILQ_EMPTY(&pl)) 1020 return (0); 1021 1022 while ((ph = TAILQ_FIRST(&pl)) != NULL) { 1023 TAILQ_REMOVE(&pl, ph, ph_pagelist); 1024 pool_p_free(pp, ph); 1025 } 1026 1027 return (1); 1028 } 1029 1030 /* 1031 * Release all complete pages that have not been used recently 1032 * from all pools. 1033 */ 1034 void 1035 pool_reclaim_all(void) 1036 { 1037 struct pool *pp; 1038 1039 rw_enter_read(&pool_lock); 1040 SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist) 1041 pool_reclaim(pp); 1042 rw_exit_read(&pool_lock); 1043 } 1044 1045 #ifdef DDB 1046 #include <machine/db_machdep.h> 1047 #include <ddb/db_output.h> 1048 1049 /* 1050 * Diagnostic helpers. 1051 */ 1052 void 1053 pool_printit(struct pool *pp, const char *modif, 1054 int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2)))) 1055 { 1056 pool_print1(pp, modif, pr); 1057 } 1058 1059 void 1060 pool_print_pagelist(struct pool_pagelist *pl, 1061 int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2)))) 1062 { 1063 struct pool_item_header *ph; 1064 struct pool_item *pi; 1065 1066 TAILQ_FOREACH(ph, pl, ph_pagelist) { 1067 (*pr)("\t\tpage %p, color %p, nmissing %d\n", 1068 ph->ph_page, ph->ph_colored, ph->ph_nmissing); 1069 XSIMPLEQ_FOREACH(pi, &ph->ph_itemlist, pi_list) { 1070 if (pi->pi_magic != POOL_IMAGIC(ph, pi)) { 1071 (*pr)("\t\t\titem %p, magic 0x%lx\n", 1072 pi, pi->pi_magic); 1073 } 1074 } 1075 } 1076 } 1077 1078 void 1079 pool_print1(struct pool *pp, const char *modif, 1080 int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2)))) 1081 { 1082 struct pool_item_header *ph; 1083 int print_pagelist = 0; 1084 char c; 1085 1086 while ((c = *modif++) != '\0') { 1087 if (c == 'p') 1088 print_pagelist = 1; 1089 modif++; 1090 } 1091 1092 (*pr)("POOL %s: size %u maxcolors %u\n", pp->pr_wchan, pp->pr_size, 1093 pp->pr_maxcolors); 1094 (*pr)("\talloc %p\n", pp->pr_alloc); 1095 (*pr)("\tminitems %u, minpages %u, maxpages %u, npages %u\n", 1096 pp->pr_minitems, pp->pr_minpages, pp->pr_maxpages, pp->pr_npages); 1097 (*pr)("\titemsperpage %u, nitems %u, nout %u, hardlimit %u\n", 1098 pp->pr_itemsperpage, pp->pr_nitems, pp->pr_nout, pp->pr_hardlimit); 1099 1100 (*pr)("\n\tnget %lu, nfail %lu, nput %lu\n", 1101 pp->pr_nget, pp->pr_nfail, pp->pr_nput); 1102 (*pr)("\tnpagealloc %lu, npagefree %lu, hiwat %u, nidle %lu\n", 1103 pp->pr_npagealloc, pp->pr_npagefree, pp->pr_hiwat, pp->pr_nidle); 1104 1105 if (print_pagelist == 0) 1106 return; 1107 1108 if ((ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL) 1109 (*pr)("\n\tempty page list:\n"); 1110 pool_print_pagelist(&pp->pr_emptypages, pr); 1111 if ((ph = TAILQ_FIRST(&pp->pr_fullpages)) != NULL) 1112 (*pr)("\n\tfull page list:\n"); 1113 pool_print_pagelist(&pp->pr_fullpages, pr); 1114 if ((ph = TAILQ_FIRST(&pp->pr_partpages)) != NULL) 1115 (*pr)("\n\tpartial-page list:\n"); 1116 pool_print_pagelist(&pp->pr_partpages, pr); 1117 1118 if (pp->pr_curpage == NULL) 1119 (*pr)("\tno current page\n"); 1120 else 1121 (*pr)("\tcurpage %p\n", pp->pr_curpage->ph_page); 1122 } 1123 1124 void 1125 db_show_all_pools(db_expr_t expr, int haddr, db_expr_t count, char *modif) 1126 { 1127 struct pool *pp; 1128 char maxp[16]; 1129 int ovflw; 1130 char mode; 1131 1132 mode = modif[0]; 1133 if (mode != '\0' && mode != 'a') { 1134 db_printf("usage: show all pools [/a]\n"); 1135 return; 1136 } 1137 1138 if (mode == '\0') 1139 db_printf("%-10s%4s%9s%5s%9s%6s%6s%6s%6s%6s%6s%5s\n", 1140 "Name", 1141 "Size", 1142 "Requests", 1143 "Fail", 1144 "Releases", 1145 "Pgreq", 1146 "Pgrel", 1147 "Npage", 1148 "Hiwat", 1149 "Minpg", 1150 "Maxpg", 1151 "Idle"); 1152 else 1153 db_printf("%-12s %18s %18s\n", 1154 "Name", "Address", "Allocator"); 1155 1156 SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist) { 1157 if (mode == 'a') { 1158 db_printf("%-12s %18p %18p\n", pp->pr_wchan, pp, 1159 pp->pr_alloc); 1160 continue; 1161 } 1162 1163 if (!pp->pr_nget) 1164 continue; 1165 1166 if (pp->pr_maxpages == UINT_MAX) 1167 snprintf(maxp, sizeof maxp, "inf"); 1168 else 1169 snprintf(maxp, sizeof maxp, "%u", pp->pr_maxpages); 1170 1171 #define PRWORD(ovflw, fmt, width, fixed, val) do { \ 1172 (ovflw) += db_printf((fmt), \ 1173 (width) - (fixed) - (ovflw) > 0 ? \ 1174 (width) - (fixed) - (ovflw) : 0, \ 1175 (val)) - (width); \ 1176 if ((ovflw) < 0) \ 1177 (ovflw) = 0; \ 1178 } while (/* CONSTCOND */0) 1179 1180 ovflw = 0; 1181 PRWORD(ovflw, "%-*s", 10, 0, pp->pr_wchan); 1182 PRWORD(ovflw, " %*u", 4, 1, pp->pr_size); 1183 PRWORD(ovflw, " %*lu", 9, 1, pp->pr_nget); 1184 PRWORD(ovflw, " %*lu", 5, 1, pp->pr_nfail); 1185 PRWORD(ovflw, " %*lu", 9, 1, pp->pr_nput); 1186 PRWORD(ovflw, " %*lu", 6, 1, pp->pr_npagealloc); 1187 PRWORD(ovflw, " %*lu", 6, 1, pp->pr_npagefree); 1188 PRWORD(ovflw, " %*d", 6, 1, pp->pr_npages); 1189 PRWORD(ovflw, " %*d", 6, 1, pp->pr_hiwat); 1190 PRWORD(ovflw, " %*d", 6, 1, pp->pr_minpages); 1191 PRWORD(ovflw, " %*s", 6, 1, maxp); 1192 PRWORD(ovflw, " %*lu\n", 5, 1, pp->pr_nidle); 1193 1194 pool_chk(pp); 1195 } 1196 } 1197 #endif /* DDB */ 1198 1199 #if defined(POOL_DEBUG) || defined(DDB) 1200 int 1201 pool_chk_page(struct pool *pp, struct pool_item_header *ph, int expected) 1202 { 1203 struct pool_item *pi; 1204 caddr_t page; 1205 int n; 1206 const char *label = pp->pr_wchan; 1207 1208 page = (caddr_t)((u_long)ph & pp->pr_pgmask); 1209 if (page != ph->ph_page && POOL_INPGHDR(pp)) { 1210 printf("%s: ", label); 1211 printf("pool(%p:%s): page inconsistency: page %p; " 1212 "at page head addr %p (p %p)\n", 1213 pp, pp->pr_wchan, ph->ph_page, ph, page); 1214 return 1; 1215 } 1216 1217 for (pi = XSIMPLEQ_FIRST(&ph->ph_itemlist), n = 0; 1218 pi != NULL; 1219 pi = XSIMPLEQ_NEXT(&ph->ph_itemlist, pi, pi_list), n++) { 1220 if ((caddr_t)pi < ph->ph_page || 1221 (caddr_t)pi >= ph->ph_page + pp->pr_pgsize) { 1222 printf("%s: ", label); 1223 printf("pool(%p:%s): page inconsistency: page %p;" 1224 " item ordinal %d; addr %p\n", pp, 1225 pp->pr_wchan, ph->ph_page, n, pi); 1226 return (1); 1227 } 1228 1229 if (pi->pi_magic != POOL_IMAGIC(ph, pi)) { 1230 printf("%s: ", label); 1231 printf("pool(%p:%s): free list modified: " 1232 "page %p; item ordinal %d; addr %p " 1233 "(p %p); offset 0x%x=0x%lx\n", 1234 pp, pp->pr_wchan, ph->ph_page, n, pi, page, 1235 0, pi->pi_magic); 1236 } 1237 1238 #ifdef DIAGNOSTIC 1239 if (POOL_PHPOISON(ph)) { 1240 size_t pidx; 1241 uint32_t pval; 1242 if (poison_check(pi + 1, pp->pr_size - sizeof(*pi), 1243 &pidx, &pval)) { 1244 int *ip = (int *)(pi + 1); 1245 printf("pool(%s): free list modified: " 1246 "page %p; item ordinal %d; addr %p " 1247 "(p %p); offset 0x%zx=0x%x\n", 1248 pp->pr_wchan, ph->ph_page, n, pi, 1249 page, pidx * sizeof(int), ip[pidx]); 1250 } 1251 } 1252 #endif /* DIAGNOSTIC */ 1253 } 1254 if (n + ph->ph_nmissing != pp->pr_itemsperpage) { 1255 printf("pool(%p:%s): page inconsistency: page %p;" 1256 " %d on list, %d missing, %d items per page\n", pp, 1257 pp->pr_wchan, ph->ph_page, n, ph->ph_nmissing, 1258 pp->pr_itemsperpage); 1259 return 1; 1260 } 1261 if (expected >= 0 && n != expected) { 1262 printf("pool(%p:%s): page inconsistency: page %p;" 1263 " %d on list, %d missing, %d expected\n", pp, 1264 pp->pr_wchan, ph->ph_page, n, ph->ph_nmissing, 1265 expected); 1266 return 1; 1267 } 1268 return 0; 1269 } 1270 1271 int 1272 pool_chk(struct pool *pp) 1273 { 1274 struct pool_item_header *ph; 1275 int r = 0; 1276 1277 TAILQ_FOREACH(ph, &pp->pr_emptypages, ph_pagelist) 1278 r += pool_chk_page(pp, ph, pp->pr_itemsperpage); 1279 TAILQ_FOREACH(ph, &pp->pr_fullpages, ph_pagelist) 1280 r += pool_chk_page(pp, ph, 0); 1281 TAILQ_FOREACH(ph, &pp->pr_partpages, ph_pagelist) 1282 r += pool_chk_page(pp, ph, -1); 1283 1284 return (r); 1285 } 1286 #endif /* defined(POOL_DEBUG) || defined(DDB) */ 1287 1288 #ifdef DDB 1289 void 1290 pool_walk(struct pool *pp, int full, 1291 int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2))), 1292 void (*func)(void *, int, int (*)(const char *, ...) 1293 __attribute__((__format__(__kprintf__,1,2))))) 1294 { 1295 struct pool_item_header *ph; 1296 struct pool_item *pi; 1297 caddr_t cp; 1298 int n; 1299 1300 TAILQ_FOREACH(ph, &pp->pr_fullpages, ph_pagelist) { 1301 cp = ph->ph_colored; 1302 n = ph->ph_nmissing; 1303 1304 while (n--) { 1305 func(cp, full, pr); 1306 cp += pp->pr_size; 1307 } 1308 } 1309 1310 TAILQ_FOREACH(ph, &pp->pr_partpages, ph_pagelist) { 1311 cp = ph->ph_colored; 1312 n = ph->ph_nmissing; 1313 1314 do { 1315 XSIMPLEQ_FOREACH(pi, &ph->ph_itemlist, pi_list) { 1316 if (cp == (caddr_t)pi) 1317 break; 1318 } 1319 if (cp != (caddr_t)pi) { 1320 func(cp, full, pr); 1321 n--; 1322 } 1323 1324 cp += pp->pr_size; 1325 } while (n > 0); 1326 } 1327 } 1328 #endif 1329 1330 /* 1331 * We have three different sysctls. 1332 * kern.pool.npools - the number of pools. 1333 * kern.pool.pool.<pool#> - the pool struct for the pool#. 1334 * kern.pool.name.<pool#> - the name for pool#. 1335 */ 1336 int 1337 sysctl_dopool(int *name, u_int namelen, char *oldp, size_t *oldlenp) 1338 { 1339 struct kinfo_pool pi; 1340 struct pool *pp; 1341 int rv = ENOENT; 1342 1343 switch (name[0]) { 1344 case KERN_POOL_NPOOLS: 1345 if (namelen != 1) 1346 return (ENOTDIR); 1347 return (sysctl_rdint(oldp, oldlenp, NULL, pool_count)); 1348 1349 case KERN_POOL_NAME: 1350 case KERN_POOL_POOL: 1351 break; 1352 default: 1353 return (EOPNOTSUPP); 1354 } 1355 1356 if (namelen != 2) 1357 return (ENOTDIR); 1358 1359 rw_enter_read(&pool_lock); 1360 1361 SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist) { 1362 if (name[1] == pp->pr_serial) 1363 break; 1364 } 1365 1366 if (pp == NULL) 1367 goto done; 1368 1369 switch (name[0]) { 1370 case KERN_POOL_NAME: 1371 rv = sysctl_rdstring(oldp, oldlenp, NULL, pp->pr_wchan); 1372 break; 1373 case KERN_POOL_POOL: 1374 memset(&pi, 0, sizeof(pi)); 1375 1376 mtx_enter(&pp->pr_mtx); 1377 pi.pr_size = pp->pr_size; 1378 pi.pr_pgsize = pp->pr_pgsize; 1379 pi.pr_itemsperpage = pp->pr_itemsperpage; 1380 pi.pr_npages = pp->pr_npages; 1381 pi.pr_minpages = pp->pr_minpages; 1382 pi.pr_maxpages = pp->pr_maxpages; 1383 pi.pr_hardlimit = pp->pr_hardlimit; 1384 pi.pr_nout = pp->pr_nout; 1385 pi.pr_nitems = pp->pr_nitems; 1386 pi.pr_nget = pp->pr_nget; 1387 pi.pr_nput = pp->pr_nput; 1388 pi.pr_nfail = pp->pr_nfail; 1389 pi.pr_npagealloc = pp->pr_npagealloc; 1390 pi.pr_npagefree = pp->pr_npagefree; 1391 pi.pr_hiwat = pp->pr_hiwat; 1392 pi.pr_nidle = pp->pr_nidle; 1393 mtx_leave(&pp->pr_mtx); 1394 1395 pool_cache_info(pp, &pi); 1396 1397 rv = sysctl_rdstruct(oldp, oldlenp, NULL, &pi, sizeof(pi)); 1398 break; 1399 } 1400 1401 done: 1402 rw_exit_read(&pool_lock); 1403 1404 return (rv); 1405 } 1406 1407 void 1408 pool_gc_sched(void *null) 1409 { 1410 task_add(systqmp, &pool_gc_task); 1411 } 1412 1413 void 1414 pool_gc_pages(void *null) 1415 { 1416 struct pool *pp; 1417 struct pool_item_header *ph, *freeph; 1418 int s; 1419 1420 rw_enter_read(&pool_lock); 1421 s = splvm(); /* XXX go to splvm until all pools _setipl properly */ 1422 SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist) { 1423 if (pp->pr_nidle <= pp->pr_minpages || /* guess */ 1424 !mtx_enter_try(&pp->pr_mtx)) /* try */ 1425 continue; 1426 1427 /* is it time to free a page? */ 1428 if (pp->pr_nidle > pp->pr_minpages && 1429 (ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL && 1430 (ticks - ph->ph_tick) > (hz * pool_wait_gc)) { 1431 freeph = ph; 1432 pool_p_remove(pp, freeph); 1433 } else 1434 freeph = NULL; 1435 1436 mtx_leave(&pp->pr_mtx); 1437 1438 if (freeph != NULL) 1439 pool_p_free(pp, freeph); 1440 } 1441 splx(s); 1442 rw_exit_read(&pool_lock); 1443 1444 timeout_add_sec(&pool_gc_tick, 1); 1445 } 1446 1447 /* 1448 * Pool backend allocators. 1449 */ 1450 1451 void * 1452 pool_allocator_alloc(struct pool *pp, int flags, int *slowdown) 1453 { 1454 void *v; 1455 1456 v = (*pp->pr_alloc->pa_alloc)(pp, flags, slowdown); 1457 1458 #ifdef DIAGNOSTIC 1459 if (v != NULL && POOL_INPGHDR(pp)) { 1460 vaddr_t addr = (vaddr_t)v; 1461 if ((addr & pp->pr_pgmask) != addr) { 1462 panic("%s: %s page address %p isnt aligned to %u", 1463 __func__, pp->pr_wchan, v, pp->pr_pgsize); 1464 } 1465 } 1466 #endif 1467 1468 return (v); 1469 } 1470 1471 void 1472 pool_allocator_free(struct pool *pp, void *v) 1473 { 1474 struct pool_allocator *pa = pp->pr_alloc; 1475 1476 (*pa->pa_free)(pp, v); 1477 } 1478 1479 void * 1480 pool_page_alloc(struct pool *pp, int flags, int *slowdown) 1481 { 1482 struct kmem_dyn_mode kd = KMEM_DYN_INITIALIZER; 1483 1484 kd.kd_waitok = ISSET(flags, PR_WAITOK); 1485 kd.kd_slowdown = slowdown; 1486 1487 return (km_alloc(pp->pr_pgsize, &kv_page, pp->pr_crange, &kd)); 1488 } 1489 1490 void 1491 pool_page_free(struct pool *pp, void *v) 1492 { 1493 km_free(v, pp->pr_pgsize, &kv_page, pp->pr_crange); 1494 } 1495 1496 void * 1497 pool_multi_alloc(struct pool *pp, int flags, int *slowdown) 1498 { 1499 struct kmem_va_mode kv = kv_intrsafe; 1500 struct kmem_dyn_mode kd = KMEM_DYN_INITIALIZER; 1501 void *v; 1502 int s; 1503 1504 if (POOL_INPGHDR(pp)) 1505 kv.kv_align = pp->pr_pgsize; 1506 1507 kd.kd_waitok = ISSET(flags, PR_WAITOK); 1508 kd.kd_slowdown = slowdown; 1509 1510 s = splvm(); 1511 v = km_alloc(pp->pr_pgsize, &kv, pp->pr_crange, &kd); 1512 splx(s); 1513 1514 return (v); 1515 } 1516 1517 void 1518 pool_multi_free(struct pool *pp, void *v) 1519 { 1520 struct kmem_va_mode kv = kv_intrsafe; 1521 int s; 1522 1523 if (POOL_INPGHDR(pp)) 1524 kv.kv_align = pp->pr_pgsize; 1525 1526 s = splvm(); 1527 km_free(v, pp->pr_pgsize, &kv, pp->pr_crange); 1528 splx(s); 1529 } 1530 1531 void * 1532 pool_multi_alloc_ni(struct pool *pp, int flags, int *slowdown) 1533 { 1534 struct kmem_va_mode kv = kv_any; 1535 struct kmem_dyn_mode kd = KMEM_DYN_INITIALIZER; 1536 void *v; 1537 1538 if (POOL_INPGHDR(pp)) 1539 kv.kv_align = pp->pr_pgsize; 1540 1541 kd.kd_waitok = ISSET(flags, PR_WAITOK); 1542 kd.kd_slowdown = slowdown; 1543 1544 KERNEL_LOCK(); 1545 v = km_alloc(pp->pr_pgsize, &kv, pp->pr_crange, &kd); 1546 KERNEL_UNLOCK(); 1547 1548 return (v); 1549 } 1550 1551 void 1552 pool_multi_free_ni(struct pool *pp, void *v) 1553 { 1554 struct kmem_va_mode kv = kv_any; 1555 1556 if (POOL_INPGHDR(pp)) 1557 kv.kv_align = pp->pr_pgsize; 1558 1559 KERNEL_LOCK(); 1560 km_free(v, pp->pr_pgsize, &kv, pp->pr_crange); 1561 KERNEL_UNLOCK(); 1562 } 1563 1564 #ifdef MULTIPROCESSOR 1565 1566 struct pool pool_caches; /* per cpu cache entries */ 1567 1568 void 1569 pool_cache_init(struct pool *pp) 1570 { 1571 struct cpumem *cm; 1572 struct pool_cache *pc; 1573 struct cpumem_iter i; 1574 1575 if (pool_caches.pr_size == 0) { 1576 pool_init(&pool_caches, sizeof(struct pool_cache), 64, 1577 IPL_NONE, PR_WAITOK, "plcache", NULL); 1578 } 1579 1580 KASSERT(pp->pr_size >= sizeof(*pc)); 1581 1582 cm = cpumem_get(&pool_caches); 1583 1584 mtx_init(&pp->pr_cache_mtx, pp->pr_ipl); 1585 arc4random_buf(pp->pr_cache_magic, sizeof(pp->pr_cache_magic)); 1586 TAILQ_INIT(&pp->pr_cache_lists); 1587 pp->pr_cache_nlist = 0; 1588 pp->pr_cache_items = 8; 1589 pp->pr_cache_contention = 0; 1590 1591 CPUMEM_FOREACH(pc, &i, cm) { 1592 pc->pc_actv = NULL; 1593 pc->pc_nactv = 0; 1594 pc->pc_prev = NULL; 1595 1596 pc->pc_gets = 0; 1597 pc->pc_puts = 0; 1598 pc->pc_fails = 0; 1599 pc->pc_nout = 0; 1600 } 1601 1602 pp->pr_cache = cm; 1603 } 1604 1605 static inline void 1606 pool_list_magic(struct pool *pp, struct pool_list *pl) 1607 { 1608 unsigned long *entry = (unsigned long *)&pl->pl_nextl; 1609 1610 entry[0] = pp->pr_cache_magic[0] ^ (u_long)pl; 1611 entry[1] = pp->pr_cache_magic[1] ^ (u_long)pl->pl_next; 1612 } 1613 1614 static inline void 1615 pool_list_magic_check(struct pool *pp, struct pool_list *pl) 1616 { 1617 unsigned long *entry; 1618 unsigned long val; 1619 1620 entry = (unsigned long *)&pl->pl_nextl; 1621 val = pp->pr_cache_magic[0] ^ (u_long)pl; 1622 if (*entry != val) 1623 goto fail; 1624 1625 entry++; 1626 val = pp->pr_cache_magic[1] ^ (u_long)pl->pl_next; 1627 if (*entry != val) 1628 goto fail; 1629 1630 return; 1631 1632 fail: 1633 panic("%s: %s cpu free list modified: item addr %p+%zu 0x%lx!=0x%lx", 1634 __func__, pp->pr_wchan, pl, (caddr_t)entry - (caddr_t)pl, 1635 *entry, val); 1636 } 1637 1638 static inline void 1639 pool_list_enter(struct pool *pp) 1640 { 1641 if (mtx_enter_try(&pp->pr_cache_mtx) == 0) { 1642 mtx_enter(&pp->pr_cache_mtx); 1643 pp->pr_cache_contention++; 1644 } 1645 } 1646 1647 static inline void 1648 pool_list_leave(struct pool *pp) 1649 { 1650 mtx_leave(&pp->pr_cache_mtx); 1651 } 1652 1653 static inline struct pool_list * 1654 pool_list_alloc(struct pool *pp, struct pool_cache *pc) 1655 { 1656 struct pool_list *pl; 1657 1658 pool_list_enter(pp); 1659 pl = TAILQ_FIRST(&pp->pr_cache_lists); 1660 if (pl != NULL) { 1661 TAILQ_REMOVE(&pp->pr_cache_lists, pl, pl_nextl); 1662 pp->pr_cache_nlist--; 1663 1664 pool_list_magic(pp, pl); 1665 } 1666 1667 pp->pr_cache_nout += pc->pc_nout; 1668 pc->pc_nout = 0; 1669 pool_list_leave(pp); 1670 1671 return (pl); 1672 } 1673 1674 static inline void 1675 pool_list_free(struct pool *pp, struct pool_cache *pc, struct pool_list *pl) 1676 { 1677 pool_list_enter(pp); 1678 TAILQ_INSERT_TAIL(&pp->pr_cache_lists, pl, pl_nextl); 1679 pp->pr_cache_nlist++; 1680 1681 pp->pr_cache_nout += pc->pc_nout; 1682 pc->pc_nout = 0; 1683 pool_list_leave(pp); 1684 } 1685 1686 static inline struct pool_cache * 1687 pool_cache_enter(struct pool *pp, int *s) 1688 { 1689 struct pool_cache *pc; 1690 1691 pc = cpumem_enter(pp->pr_cache); 1692 *s = splraise(pp->pr_ipl); 1693 pc->pc_gen++; 1694 1695 return (pc); 1696 } 1697 1698 static inline void 1699 pool_cache_leave(struct pool *pp, struct pool_cache *pc, int s) 1700 { 1701 pc->pc_gen++; 1702 splx(s); 1703 cpumem_leave(pp->pr_cache, pc); 1704 } 1705 1706 void * 1707 pool_cache_get(struct pool *pp) 1708 { 1709 struct pool_cache *pc; 1710 struct pool_list *pl; 1711 int s; 1712 1713 pc = pool_cache_enter(pp, &s); 1714 1715 if (pc->pc_actv != NULL) { 1716 pl = pc->pc_actv; 1717 } else if (pc->pc_prev != NULL) { 1718 pl = pc->pc_prev; 1719 pc->pc_prev = NULL; 1720 } else if ((pl = pool_list_alloc(pp, pc)) == NULL) { 1721 pc->pc_fails++; 1722 goto done; 1723 } 1724 1725 pool_list_magic_check(pp, pl); 1726 #ifdef DIAGNOSTIC 1727 if (pool_debug && POOL_LIST_POISONED(pl)) { 1728 size_t pidx; 1729 uint32_t pval; 1730 1731 if (poison_check(pl + 1, pp->pr_size - sizeof(*pl), 1732 &pidx, &pval)) { 1733 int *ip = (int *)(pl + 1); 1734 ip += pidx; 1735 1736 panic("%s: %s cpu free list modified: " 1737 "item addr %p+%zu 0x%x!=0x%x", 1738 __func__, pp->pr_wchan, pl, 1739 (caddr_t)ip - (caddr_t)pl, *ip, pval); 1740 } 1741 } 1742 #endif 1743 1744 pc->pc_actv = pl->pl_next; 1745 pc->pc_nactv = POOL_LIST_NITEMS(pl) - 1; 1746 pc->pc_gets++; 1747 pc->pc_nout++; 1748 1749 done: 1750 pool_cache_leave(pp, pc, s); 1751 1752 return (pl); 1753 } 1754 1755 void 1756 pool_cache_put(struct pool *pp, void *v) 1757 { 1758 struct pool_cache *pc; 1759 struct pool_list *pl = v; 1760 unsigned long nitems; 1761 int s; 1762 #ifdef DIAGNOSTIC 1763 int poison = pool_debug && pp->pr_size > sizeof(*pl); 1764 1765 if (poison) 1766 poison_mem(pl + 1, pp->pr_size - sizeof(*pl)); 1767 #endif 1768 1769 pc = pool_cache_enter(pp, &s); 1770 1771 nitems = pc->pc_nactv; 1772 if (nitems >= pp->pr_cache_items) { 1773 if (pc->pc_prev != NULL) 1774 pool_list_free(pp, pc, pc->pc_prev); 1775 1776 pc->pc_prev = pc->pc_actv; 1777 1778 pc->pc_actv = NULL; 1779 pc->pc_nactv = 0; 1780 nitems = 0; 1781 } 1782 1783 pl->pl_next = pc->pc_actv; 1784 pl->pl_nitems = ++nitems; 1785 #ifdef DIAGNOSTIC 1786 pl->pl_nitems |= poison ? POOL_LIST_NITEMS_POISON : 0; 1787 #endif 1788 pool_list_magic(pp, pl); 1789 1790 pc->pc_actv = pl; 1791 pc->pc_nactv = nitems; 1792 1793 pc->pc_puts++; 1794 pc->pc_nout--; 1795 1796 pool_cache_leave(pp, pc, s); 1797 } 1798 1799 struct pool_list * 1800 pool_list_put(struct pool *pp, struct pool_list *pl) 1801 { 1802 struct pool_list *rpl, *npl; 1803 1804 if (pl == NULL) 1805 return (NULL); 1806 1807 rpl = TAILQ_NEXT(pl, pl_nextl); 1808 1809 do { 1810 npl = pl->pl_next; 1811 pool_put(pp, pl); 1812 pl = npl; 1813 } while (pl != NULL); 1814 1815 return (rpl); 1816 } 1817 1818 void 1819 pool_cache_destroy(struct pool *pp) 1820 { 1821 struct pool_cache *pc; 1822 struct pool_list *pl; 1823 struct cpumem_iter i; 1824 struct cpumem *cm; 1825 1826 cm = pp->pr_cache; 1827 pp->pr_cache = NULL; /* make pool_put avoid the cache */ 1828 1829 CPUMEM_FOREACH(pc, &i, cm) { 1830 pool_list_put(pp, pc->pc_actv); 1831 pool_list_put(pp, pc->pc_prev); 1832 } 1833 1834 cpumem_put(&pool_caches, cm); 1835 1836 pl = TAILQ_FIRST(&pp->pr_cache_lists); 1837 while (pl != NULL) 1838 pl = pool_list_put(pp, pl); 1839 } 1840 1841 void 1842 pool_cache_info(struct pool *pp, struct kinfo_pool *pi) 1843 { 1844 struct pool_cache *pc; 1845 struct cpumem_iter i; 1846 1847 if (pp->pr_cache == NULL) 1848 return; 1849 1850 /* loop through the caches twice to collect stats */ 1851 1852 /* once without the mtx so we can yield while reading nget/nput */ 1853 CPUMEM_FOREACH(pc, &i, pp->pr_cache) { 1854 uint64_t gen, nget, nput; 1855 1856 do { 1857 while ((gen = pc->pc_gen) & 1) 1858 yield(); 1859 1860 nget = pc->pc_gets; 1861 nput = pc->pc_puts; 1862 } while (gen != pc->pc_gen); 1863 1864 pi->pr_nget += nget; 1865 pi->pr_nput += nput; 1866 } 1867 1868 /* and once with the mtx so we can get consistent nout values */ 1869 mtx_enter(&pp->pr_cache_mtx); 1870 CPUMEM_FOREACH(pc, &i, pp->pr_cache) 1871 pi->pr_nout += pc->pc_nout; 1872 1873 pi->pr_nout += pp->pr_cache_nout; 1874 mtx_leave(&pp->pr_cache_mtx); 1875 } 1876 #else /* MULTIPROCESSOR */ 1877 void 1878 pool_cache_init(struct pool *pp) 1879 { 1880 /* nop */ 1881 } 1882 1883 void 1884 pool_cache_info(struct pool *pp, struct kinfo_pool *pi) 1885 { 1886 /* nop */ 1887 } 1888 #endif /* MULTIPROCESSOR */ 1889