1 /* $OpenBSD: subr_pool.c,v 1.138 2014/07/10 13:34:39 tedu Exp $ */ 2 /* $NetBSD: subr_pool.c,v 1.61 2001/09/26 07:14:56 chs Exp $ */ 3 4 /*- 5 * Copyright (c) 1997, 1999, 2000 The NetBSD Foundation, Inc. 6 * All rights reserved. 7 * 8 * This code is derived from software contributed to The NetBSD Foundation 9 * by Paul Kranenburg; by Jason R. Thorpe of the Numerical Aerospace 10 * Simulation Facility, NASA Ames Research Center. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 23 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 24 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 25 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31 * POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include <sys/param.h> 35 #include <sys/systm.h> 36 #include <sys/proc.h> 37 #include <sys/errno.h> 38 #include <sys/kernel.h> 39 #include <sys/malloc.h> 40 #include <sys/pool.h> 41 #include <sys/syslog.h> 42 #include <sys/sysctl.h> 43 44 #include <uvm/uvm_extern.h> 45 #include <dev/rndvar.h> 46 47 /* 48 * Pool resource management utility. 49 * 50 * Memory is allocated in pages which are split into pieces according to 51 * the pool item size. Each page is kept on one of three lists in the 52 * pool structure: `pr_emptypages', `pr_fullpages' and `pr_partpages', 53 * for empty, full and partially-full pages respectively. The individual 54 * pool items are on a linked list headed by `ph_itemlist' in each page 55 * header. The memory for building the page list is either taken from 56 * the allocated pages themselves (for small pool items) or taken from 57 * an internal pool of page headers (`phpool'). 58 */ 59 60 /* List of all pools */ 61 SIMPLEQ_HEAD(,pool) pool_head = SIMPLEQ_HEAD_INITIALIZER(pool_head); 62 63 /* Private pool for page header structures */ 64 struct pool phpool; 65 66 struct pool_item_header { 67 /* Page headers */ 68 LIST_ENTRY(pool_item_header) 69 ph_pagelist; /* pool page list */ 70 XSIMPLEQ_HEAD(,pool_item) ph_itemlist; /* chunk list for this page */ 71 RB_ENTRY(pool_item_header) 72 ph_node; /* Off-page page headers */ 73 int ph_nmissing; /* # of chunks in use */ 74 caddr_t ph_page; /* this page's address */ 75 caddr_t ph_colored; /* page's colored address */ 76 int ph_pagesize; 77 int ph_magic; 78 }; 79 80 struct pool_item { 81 u_int32_t pi_magic; 82 /* Other entries use only this list entry */ 83 XSIMPLEQ_ENTRY(pool_item) pi_list; 84 }; 85 86 #ifdef POOL_DEBUG 87 int pool_debug = 1; 88 #else 89 int pool_debug = 0; 90 #endif 91 92 #define POOL_NEEDS_CATCHUP(pp) \ 93 ((pp)->pr_nitems < (pp)->pr_minitems) 94 95 /* 96 * Every pool gets a unique serial number assigned to it. If this counter 97 * wraps, we're screwed, but we shouldn't create so many pools anyway. 98 */ 99 unsigned int pool_serial; 100 101 int pool_catchup(struct pool *); 102 void pool_prime_page(struct pool *, caddr_t, struct pool_item_header *); 103 void pool_update_curpage(struct pool *); 104 void pool_swizzle_curpage(struct pool *); 105 void *pool_do_get(struct pool *, int); 106 void pool_do_put(struct pool *, void *); 107 void pr_rmpage(struct pool *, struct pool_item_header *, 108 struct pool_pagelist *); 109 int pool_chk_page(struct pool *, struct pool_item_header *, int); 110 int pool_chk(struct pool *); 111 struct pool_item_header *pool_alloc_item_header(struct pool *, caddr_t , int); 112 113 void *pool_allocator_alloc(struct pool *, int, int *); 114 void pool_allocator_free(struct pool *, void *); 115 116 /* 117 * XXX - quick hack. For pools with large items we want to use a special 118 * allocator. For now, instead of having the allocator figure out 119 * the allocation size from the pool (which can be done trivially 120 * with round_page(pr_itemsperpage * pr_size)) which would require 121 * lots of changes everywhere, we just create allocators for each 122 * size. We limit those to 128 pages. 123 */ 124 #define POOL_LARGE_MAXPAGES 128 125 struct pool_allocator pool_allocator_large[POOL_LARGE_MAXPAGES]; 126 struct pool_allocator pool_allocator_large_ni[POOL_LARGE_MAXPAGES]; 127 void *pool_large_alloc(struct pool *, int, int *); 128 void pool_large_free(struct pool *, void *); 129 void *pool_large_alloc_ni(struct pool *, int, int *); 130 void pool_large_free_ni(struct pool *, void *); 131 132 133 #ifdef DDB 134 void pool_print_pagelist(struct pool_pagelist *, int (*)(const char *, ...) 135 __attribute__((__format__(__kprintf__,1,2)))); 136 void pool_print1(struct pool *, const char *, int (*)(const char *, ...) 137 __attribute__((__format__(__kprintf__,1,2)))); 138 #endif 139 140 #define pool_sleep(pl) msleep(pl, &pl->pr_mtx, PSWP, pl->pr_wchan, 0) 141 142 static __inline int 143 phtree_compare(struct pool_item_header *a, struct pool_item_header *b) 144 { 145 long diff = (vaddr_t)a->ph_page - (vaddr_t)b->ph_page; 146 if (diff < 0) 147 return -(-diff >= a->ph_pagesize); 148 else if (diff > 0) 149 return (diff >= b->ph_pagesize); 150 else 151 return (0); 152 } 153 154 RB_PROTOTYPE(phtree, pool_item_header, ph_node, phtree_compare); 155 RB_GENERATE(phtree, pool_item_header, ph_node, phtree_compare); 156 157 /* 158 * Return the pool page header based on page address. 159 */ 160 static __inline struct pool_item_header * 161 pr_find_pagehead(struct pool *pp, void *v) 162 { 163 struct pool_item_header *ph, tmp; 164 165 if ((pp->pr_roflags & PR_PHINPAGE) != 0) { 166 caddr_t page; 167 168 page = (caddr_t)((vaddr_t)v & pp->pr_alloc->pa_pagemask); 169 170 return ((struct pool_item_header *)(page + pp->pr_phoffset)); 171 } 172 173 /* 174 * The trick we're using in the tree compare function is to compare 175 * two elements equal when they overlap. We want to return the 176 * page header that belongs to the element just before this address. 177 * We don't want this element to compare equal to the next element, 178 * so the compare function takes the pagesize from the lower element. 179 * If this header is the lower, its pagesize is zero, so it can't 180 * overlap with the next header. But if the header we're looking for 181 * is lower, we'll use its pagesize and it will overlap and return 182 * equal. 183 */ 184 tmp.ph_page = v; 185 tmp.ph_pagesize = 0; 186 ph = RB_FIND(phtree, &pp->pr_phtree, &tmp); 187 188 if (ph) { 189 KASSERT(ph->ph_page <= (caddr_t)v); 190 KASSERT(ph->ph_page + ph->ph_pagesize > (caddr_t)v); 191 } 192 return ph; 193 } 194 195 /* 196 * Remove a page from the pool. 197 */ 198 void 199 pr_rmpage(struct pool *pp, struct pool_item_header *ph, 200 struct pool_pagelist *pq) 201 { 202 203 /* 204 * If the page was idle, decrement the idle page count. 205 */ 206 if (ph->ph_nmissing == 0) { 207 #ifdef DIAGNOSTIC 208 if (pp->pr_nidle == 0) 209 panic("pr_rmpage: nidle inconsistent"); 210 if (pp->pr_nitems < pp->pr_itemsperpage) 211 panic("pr_rmpage: nitems inconsistent"); 212 #endif 213 pp->pr_nidle--; 214 } 215 216 pp->pr_nitems -= pp->pr_itemsperpage; 217 218 /* 219 * Unlink a page from the pool and release it (or queue it for release). 220 */ 221 LIST_REMOVE(ph, ph_pagelist); 222 if ((pp->pr_roflags & PR_PHINPAGE) == 0) 223 RB_REMOVE(phtree, &pp->pr_phtree, ph); 224 pp->pr_npages--; 225 pp->pr_npagefree++; 226 pool_update_curpage(pp); 227 228 if (pq) { 229 LIST_INSERT_HEAD(pq, ph, ph_pagelist); 230 } else { 231 pool_allocator_free(pp, ph->ph_page); 232 if ((pp->pr_roflags & PR_PHINPAGE) == 0) 233 pool_put(&phpool, ph); 234 } 235 } 236 237 /* 238 * Initialize the given pool resource structure. 239 * 240 * We export this routine to allow other kernel parts to declare 241 * static pools that must be initialized before malloc() is available. 242 */ 243 void 244 pool_init(struct pool *pp, size_t size, u_int align, u_int ioff, int flags, 245 const char *wchan, struct pool_allocator *palloc) 246 { 247 int off, slack; 248 #ifdef DIAGNOSTIC 249 struct pool *iter; 250 251 SIMPLEQ_FOREACH(iter, &pool_head, pr_poollist) { 252 if (iter == pp) 253 panic("init pool already on list"); 254 } 255 #endif 256 257 #ifdef MALLOC_DEBUG 258 if ((flags & PR_DEBUG) && (ioff != 0 || align != 0)) 259 flags &= ~PR_DEBUG; 260 #endif 261 /* 262 * Check arguments and construct default values. 263 */ 264 if (palloc == NULL) { 265 if (size > PAGE_SIZE) { 266 int psize; 267 268 /* 269 * XXX - should take align into account as well. 270 */ 271 if (size == round_page(size)) 272 psize = size / PAGE_SIZE; 273 else 274 psize = PAGE_SIZE / roundup(size % PAGE_SIZE, 275 1024); 276 if (psize > POOL_LARGE_MAXPAGES) 277 psize = POOL_LARGE_MAXPAGES; 278 if (flags & PR_WAITOK) 279 palloc = &pool_allocator_large_ni[psize-1]; 280 else 281 palloc = &pool_allocator_large[psize-1]; 282 if (palloc->pa_pagesz == 0) { 283 palloc->pa_pagesz = psize * PAGE_SIZE; 284 if (flags & PR_WAITOK) { 285 palloc->pa_alloc = pool_large_alloc_ni; 286 palloc->pa_free = pool_large_free_ni; 287 } else { 288 palloc->pa_alloc = pool_large_alloc; 289 palloc->pa_free = pool_large_free; 290 } 291 } 292 } else { 293 palloc = &pool_allocator_nointr; 294 } 295 } 296 if (palloc->pa_pagesz == 0) { 297 palloc->pa_pagesz = PAGE_SIZE; 298 } 299 if (palloc->pa_pagemask == 0) { 300 palloc->pa_pagemask = ~(palloc->pa_pagesz - 1); 301 palloc->pa_pageshift = ffs(palloc->pa_pagesz) - 1; 302 } 303 304 if (align == 0) 305 align = ALIGN(1); 306 307 if (size < sizeof(struct pool_item)) 308 size = sizeof(struct pool_item); 309 310 size = roundup(size, align); 311 #ifdef DIAGNOSTIC 312 if (size > palloc->pa_pagesz) 313 panic("pool_init: pool item size (%lu) too large", 314 (u_long)size); 315 #endif 316 317 /* 318 * Initialize the pool structure. 319 */ 320 LIST_INIT(&pp->pr_emptypages); 321 LIST_INIT(&pp->pr_fullpages); 322 LIST_INIT(&pp->pr_partpages); 323 pp->pr_curpage = NULL; 324 pp->pr_npages = 0; 325 pp->pr_minitems = 0; 326 pp->pr_minpages = 0; 327 pp->pr_maxpages = 8; 328 pp->pr_roflags = flags; 329 pp->pr_flags = 0; 330 pp->pr_size = size; 331 pp->pr_align = align; 332 pp->pr_wchan = wchan; 333 pp->pr_alloc = palloc; 334 pp->pr_nitems = 0; 335 pp->pr_nout = 0; 336 pp->pr_hardlimit = UINT_MAX; 337 pp->pr_hardlimit_warning = NULL; 338 pp->pr_hardlimit_ratecap.tv_sec = 0; 339 pp->pr_hardlimit_ratecap.tv_usec = 0; 340 pp->pr_hardlimit_warning_last.tv_sec = 0; 341 pp->pr_hardlimit_warning_last.tv_usec = 0; 342 pp->pr_serial = ++pool_serial; 343 if (pool_serial == 0) 344 panic("pool_init: too much uptime"); 345 346 /* 347 * Decide whether to put the page header off page to avoid 348 * wasting too large a part of the page. Off-page page headers 349 * go into an RB tree, so we can match a returned item with 350 * its header based on the page address. 351 * We use 1/16 of the page size as the threshold (XXX: tune) 352 */ 353 if (pp->pr_size < palloc->pa_pagesz/16 && pp->pr_size < PAGE_SIZE) { 354 /* Use the end of the page for the page header */ 355 pp->pr_roflags |= PR_PHINPAGE; 356 pp->pr_phoffset = off = palloc->pa_pagesz - 357 ALIGN(sizeof(struct pool_item_header)); 358 } else { 359 /* The page header will be taken from our page header pool */ 360 pp->pr_phoffset = 0; 361 off = palloc->pa_pagesz; 362 RB_INIT(&pp->pr_phtree); 363 } 364 365 /* 366 * Alignment is to take place at `ioff' within the item. This means 367 * we must reserve up to `align - 1' bytes on the page to allow 368 * appropriate positioning of each item. 369 * 370 * Silently enforce `0 <= ioff < align'. 371 */ 372 pp->pr_itemoffset = ioff = ioff % align; 373 pp->pr_itemsperpage = (off - ((align - ioff) % align)) / pp->pr_size; 374 KASSERT(pp->pr_itemsperpage != 0); 375 376 /* 377 * Use the slack between the chunks and the page header 378 * for "cache coloring". 379 */ 380 slack = off - pp->pr_itemsperpage * pp->pr_size; 381 pp->pr_maxcolor = (slack / align) * align; 382 pp->pr_curcolor = 0; 383 384 pp->pr_nget = 0; 385 pp->pr_nfail = 0; 386 pp->pr_nput = 0; 387 pp->pr_npagealloc = 0; 388 pp->pr_npagefree = 0; 389 pp->pr_hiwat = 0; 390 pp->pr_nidle = 0; 391 392 pp->pr_ipl = -1; 393 mtx_init(&pp->pr_mtx, IPL_NONE); 394 395 if (phpool.pr_size == 0) { 396 pool_init(&phpool, sizeof(struct pool_item_header), 0, 0, 397 0, "phpool", NULL); 398 pool_setipl(&phpool, IPL_HIGH); 399 } 400 401 /* pglistalloc/constraint parameters */ 402 pp->pr_crange = &kp_dirty; 403 404 /* Insert this into the list of all pools. */ 405 SIMPLEQ_INSERT_HEAD(&pool_head, pp, pr_poollist); 406 } 407 408 void 409 pool_setipl(struct pool *pp, int ipl) 410 { 411 pp->pr_ipl = ipl; 412 mtx_init(&pp->pr_mtx, ipl); 413 } 414 415 /* 416 * Decommission a pool resource. 417 */ 418 void 419 pool_destroy(struct pool *pp) 420 { 421 struct pool_item_header *ph; 422 struct pool *prev, *iter; 423 424 /* Remove from global pool list */ 425 if (pp == SIMPLEQ_FIRST(&pool_head)) 426 SIMPLEQ_REMOVE_HEAD(&pool_head, pr_poollist); 427 else { 428 prev = SIMPLEQ_FIRST(&pool_head); 429 SIMPLEQ_FOREACH(iter, &pool_head, pr_poollist) { 430 if (iter == pp) { 431 SIMPLEQ_REMOVE_AFTER(&pool_head, prev, 432 pr_poollist); 433 goto removed; 434 } 435 prev = iter; 436 } 437 #ifdef DIAGNOSTIC 438 panic("destroyed pool not on list"); 439 #endif 440 } 441 removed: 442 #ifdef DIAGNOSTIC 443 if (pp->pr_nout != 0) 444 panic("pool_destroy: pool busy: still out: %u", pp->pr_nout); 445 #endif 446 447 /* Remove all pages */ 448 while ((ph = LIST_FIRST(&pp->pr_emptypages)) != NULL) 449 pr_rmpage(pp, ph, NULL); 450 KASSERT(LIST_EMPTY(&pp->pr_fullpages)); 451 KASSERT(LIST_EMPTY(&pp->pr_partpages)); 452 453 } 454 455 struct pool_item_header * 456 pool_alloc_item_header(struct pool *pp, caddr_t storage, int flags) 457 { 458 struct pool_item_header *ph; 459 460 if ((pp->pr_roflags & PR_PHINPAGE) != 0) 461 ph = (struct pool_item_header *)(storage + pp->pr_phoffset); 462 else 463 ph = pool_get(&phpool, (flags & ~(PR_WAITOK | PR_ZERO)) | 464 PR_NOWAIT); 465 #ifdef DIAGNOSTIC 466 if (pool_debug && ph != NULL) 467 ph->ph_magic = poison_value(ph); 468 #endif 469 return (ph); 470 } 471 472 /* 473 * Grab an item from the pool; must be called at appropriate spl level 474 */ 475 void * 476 pool_get(struct pool *pp, int flags) 477 { 478 void *v; 479 480 KASSERT(flags & (PR_WAITOK | PR_NOWAIT)); 481 482 if ((flags & PR_WAITOK) != 0) { 483 #ifdef DIAGNOSTIC 484 assertwaitok(); 485 if (pool_debug == 2) 486 yield(); 487 #endif 488 if (!cold && pool_debug) { 489 KERNEL_UNLOCK(); 490 KERNEL_LOCK(); 491 } 492 } 493 494 mtx_enter(&pp->pr_mtx); 495 #ifdef POOL_DEBUG 496 if (pp->pr_roflags & PR_DEBUGCHK) { 497 if (pool_chk(pp)) 498 panic("before pool_get"); 499 } 500 #endif 501 v = pool_do_get(pp, flags); 502 #ifdef POOL_DEBUG 503 if (pp->pr_roflags & PR_DEBUGCHK) { 504 if (pool_chk(pp)) 505 panic("after pool_get"); 506 } 507 #endif 508 if (v != NULL) 509 pp->pr_nget++; 510 mtx_leave(&pp->pr_mtx); 511 if (v == NULL) 512 return (v); 513 514 if (flags & PR_ZERO) 515 memset(v, 0, pp->pr_size); 516 517 return (v); 518 } 519 520 void * 521 pool_do_get(struct pool *pp, int flags) 522 { 523 struct pool_item *pi; 524 struct pool_item_header *ph; 525 void *v; 526 int slowdown = 0; 527 528 #ifdef MALLOC_DEBUG 529 if (pp->pr_roflags & PR_DEBUG) { 530 void *addr; 531 532 addr = NULL; 533 debug_malloc(pp->pr_size, M_DEBUG, 534 (flags & PR_WAITOK) ? M_WAITOK : M_NOWAIT, &addr); 535 return (addr); 536 } 537 #endif 538 539 startover: 540 /* 541 * Check to see if we've reached the hard limit. If we have, 542 * and we can wait, then wait until an item has been returned to 543 * the pool. 544 */ 545 #ifdef DIAGNOSTIC 546 if (pp->pr_nout > pp->pr_hardlimit) 547 panic("pool_do_get: %s: crossed hard limit", pp->pr_wchan); 548 #endif 549 if (pp->pr_nout == pp->pr_hardlimit) { 550 if ((flags & PR_WAITOK) && !(flags & PR_LIMITFAIL)) { 551 /* 552 * XXX: A warning isn't logged in this case. Should 553 * it be? 554 */ 555 pp->pr_flags |= PR_WANTED; 556 pool_sleep(pp); 557 goto startover; 558 } 559 560 /* 561 * Log a message that the hard limit has been hit. 562 */ 563 if (pp->pr_hardlimit_warning != NULL && 564 ratecheck(&pp->pr_hardlimit_warning_last, 565 &pp->pr_hardlimit_ratecap)) 566 log(LOG_ERR, "%s\n", pp->pr_hardlimit_warning); 567 568 pp->pr_nfail++; 569 return (NULL); 570 } 571 572 pool_swizzle_curpage(pp); 573 /* 574 * The convention we use is that if `curpage' is not NULL, then 575 * it points at a non-empty bucket. In particular, `curpage' 576 * never points at a page header which has PR_PHINPAGE set and 577 * has no items in its bucket. 578 */ 579 if ((ph = pp->pr_curpage) == NULL) { 580 #ifdef DIAGNOSTIC 581 if (pp->pr_nitems != 0) { 582 printf("pool_do_get: %s: curpage NULL, nitems %u\n", 583 pp->pr_wchan, pp->pr_nitems); 584 panic("pool_do_get: nitems inconsistent"); 585 } 586 #endif 587 588 /* 589 * Call the back-end page allocator for more memory. 590 */ 591 v = pool_allocator_alloc(pp, flags, &slowdown); 592 if (v != NULL) 593 ph = pool_alloc_item_header(pp, v, flags); 594 595 if (v == NULL || ph == NULL) { 596 if (v != NULL) 597 pool_allocator_free(pp, v); 598 599 if ((flags & PR_WAITOK) == 0) { 600 pp->pr_nfail++; 601 return (NULL); 602 } 603 604 /* 605 * Wait for items to be returned to this pool. 606 * 607 * XXX: maybe we should wake up once a second and 608 * try again? 609 */ 610 pp->pr_flags |= PR_WANTED; 611 pool_sleep(pp); 612 goto startover; 613 } 614 615 /* We have more memory; add it to the pool */ 616 pool_prime_page(pp, v, ph); 617 pp->pr_npagealloc++; 618 619 if (slowdown && (flags & PR_WAITOK)) { 620 mtx_leave(&pp->pr_mtx); 621 yield(); 622 mtx_enter(&pp->pr_mtx); 623 } 624 625 /* Start the allocation process over. */ 626 goto startover; 627 } 628 if ((v = pi = XSIMPLEQ_FIRST(&ph->ph_itemlist)) == NULL) { 629 panic("pool_do_get: %s: page empty", pp->pr_wchan); 630 } 631 #ifdef DIAGNOSTIC 632 if (pp->pr_nitems == 0) { 633 printf("pool_do_get: %s: items on itemlist, nitems %u\n", 634 pp->pr_wchan, pp->pr_nitems); 635 panic("pool_do_get: nitems inconsistent"); 636 } 637 #endif 638 639 #ifdef DIAGNOSTIC 640 if (pi->pi_magic != poison_value(pi)) 641 panic("pool_do_get(%s): free list modified: " 642 "page %p; item addr %p; offset 0x%x=0x%x", 643 pp->pr_wchan, ph->ph_page, pi, 0, pi->pi_magic); 644 if (pool_debug && ph->ph_magic) { 645 size_t pidx; 646 uint32_t pval; 647 if (poison_check(pi + 1, pp->pr_size - sizeof(*pi), 648 &pidx, &pval)) { 649 int *ip = (int *)(pi + 1); 650 panic("pool_do_get(%s): free list modified: " 651 "page %p; item addr %p; offset 0x%zx=0x%x", 652 pp->pr_wchan, ph->ph_page, pi, 653 pidx * sizeof(int), ip[pidx]); 654 } 655 } 656 #endif /* DIAGNOSTIC */ 657 658 /* 659 * Remove from item list. 660 */ 661 XSIMPLEQ_REMOVE_HEAD(&ph->ph_itemlist, pi_list); 662 pp->pr_nitems--; 663 pp->pr_nout++; 664 if (ph->ph_nmissing == 0) { 665 #ifdef DIAGNOSTIC 666 if (pp->pr_nidle == 0) 667 panic("pool_do_get: nidle inconsistent"); 668 #endif 669 pp->pr_nidle--; 670 671 /* 672 * This page was previously empty. Move it to the list of 673 * partially-full pages. This page is already curpage. 674 */ 675 LIST_REMOVE(ph, ph_pagelist); 676 LIST_INSERT_HEAD(&pp->pr_partpages, ph, ph_pagelist); 677 } 678 ph->ph_nmissing++; 679 if (XSIMPLEQ_EMPTY(&ph->ph_itemlist)) { 680 #ifdef DIAGNOSTIC 681 if (ph->ph_nmissing != pp->pr_itemsperpage) { 682 panic("pool_do_get: %s: nmissing inconsistent", 683 pp->pr_wchan); 684 } 685 #endif 686 /* 687 * This page is now full. Move it to the full list 688 * and select a new current page. 689 */ 690 LIST_REMOVE(ph, ph_pagelist); 691 LIST_INSERT_HEAD(&pp->pr_fullpages, ph, ph_pagelist); 692 pool_update_curpage(pp); 693 } 694 695 /* 696 * If we have a low water mark and we are now below that low 697 * water mark, add more items to the pool. 698 */ 699 if (POOL_NEEDS_CATCHUP(pp) && pool_catchup(pp) != 0) { 700 /* 701 * XXX: Should we log a warning? Should we set up a timeout 702 * to try again in a second or so? The latter could break 703 * a caller's assumptions about interrupt protection, etc. 704 */ 705 } 706 return (v); 707 } 708 709 /* 710 * Return resource to the pool; must be called at appropriate spl level 711 */ 712 void 713 pool_put(struct pool *pp, void *v) 714 { 715 mtx_enter(&pp->pr_mtx); 716 #ifdef POOL_DEBUG 717 if (pp->pr_roflags & PR_DEBUGCHK) { 718 if (pool_chk(pp)) 719 panic("before pool_put"); 720 } 721 #endif 722 pool_do_put(pp, v); 723 #ifdef POOL_DEBUG 724 if (pp->pr_roflags & PR_DEBUGCHK) { 725 if (pool_chk(pp)) 726 panic("after pool_put"); 727 } 728 #endif 729 pp->pr_nput++; 730 mtx_leave(&pp->pr_mtx); 731 } 732 733 /* 734 * Internal version of pool_put(). 735 */ 736 void 737 pool_do_put(struct pool *pp, void *v) 738 { 739 struct pool_item *pi = v; 740 struct pool_item_header *ph; 741 742 if (v == NULL) 743 panic("pool_put of NULL"); 744 745 #ifdef MALLOC_DEBUG 746 if (pp->pr_roflags & PR_DEBUG) { 747 debug_free(v, M_DEBUG); 748 return; 749 } 750 #endif 751 752 #ifdef DIAGNOSTIC 753 if (pp->pr_ipl != -1) 754 splassert(pp->pr_ipl); 755 756 if (pp->pr_nout == 0) { 757 printf("pool %s: putting with none out\n", 758 pp->pr_wchan); 759 panic("pool_do_put"); 760 } 761 #endif 762 763 if ((ph = pr_find_pagehead(pp, v)) == NULL) { 764 panic("pool_do_put: %s: page header missing", pp->pr_wchan); 765 } 766 767 /* 768 * Return to item list. 769 */ 770 #ifdef DIAGNOSTIC 771 if (pool_debug) { 772 struct pool_item *qi; 773 XSIMPLEQ_FOREACH(qi, &ph->ph_itemlist, pi_list) 774 if (pi == qi) 775 panic("double pool_put: %p", pi); 776 } 777 pi->pi_magic = poison_value(pi); 778 if (ph->ph_magic) { 779 poison_mem(pi + 1, pp->pr_size - sizeof(*pi)); 780 } 781 #endif /* DIAGNOSTIC */ 782 783 XSIMPLEQ_INSERT_HEAD(&ph->ph_itemlist, pi, pi_list); 784 ph->ph_nmissing--; 785 pp->pr_nitems++; 786 pp->pr_nout--; 787 788 /* Cancel "pool empty" condition if it exists */ 789 if (pp->pr_curpage == NULL) 790 pp->pr_curpage = ph; 791 792 if (pp->pr_flags & PR_WANTED) { 793 pp->pr_flags &= ~PR_WANTED; 794 wakeup(pp); 795 } 796 797 /* 798 * If this page is now empty, do one of two things: 799 * 800 * (1) If we have more pages than the page high water mark, 801 * free the page back to the system. 802 * 803 * (2) Otherwise, move the page to the empty page list. 804 * 805 * Either way, select a new current page (so we use a partially-full 806 * page if one is available). 807 */ 808 if (ph->ph_nmissing == 0) { 809 pp->pr_nidle++; 810 if (pp->pr_nidle > pp->pr_maxpages) { 811 pr_rmpage(pp, ph, NULL); 812 } else { 813 LIST_REMOVE(ph, ph_pagelist); 814 LIST_INSERT_HEAD(&pp->pr_emptypages, ph, ph_pagelist); 815 pool_update_curpage(pp); 816 } 817 } 818 /* 819 * If the page was previously completely full, move it to the 820 * partially-full list. 821 */ 822 else if (ph->ph_nmissing == (pp->pr_itemsperpage - 1)) { 823 LIST_REMOVE(ph, ph_pagelist); 824 LIST_INSERT_HEAD(&pp->pr_partpages, ph, ph_pagelist); 825 } 826 } 827 828 /* 829 * Add N items to the pool. 830 */ 831 int 832 pool_prime(struct pool *pp, int n) 833 { 834 struct pool_item_header *ph; 835 caddr_t cp; 836 int newpages; 837 int slowdown; 838 839 mtx_enter(&pp->pr_mtx); 840 newpages = roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 841 842 while (newpages-- > 0) { 843 cp = pool_allocator_alloc(pp, PR_NOWAIT, &slowdown); 844 if (cp != NULL) 845 ph = pool_alloc_item_header(pp, cp, PR_NOWAIT); 846 if (cp == NULL || ph == NULL) { 847 if (cp != NULL) 848 pool_allocator_free(pp, cp); 849 break; 850 } 851 852 pool_prime_page(pp, cp, ph); 853 pp->pr_npagealloc++; 854 pp->pr_minpages++; 855 } 856 857 if (pp->pr_minpages >= pp->pr_maxpages) 858 pp->pr_maxpages = pp->pr_minpages + 1; /* XXX */ 859 860 mtx_leave(&pp->pr_mtx); 861 return (0); 862 } 863 864 /* 865 * Add a page worth of items to the pool. 866 * 867 * Note, we must be called with the pool descriptor LOCKED. 868 */ 869 void 870 pool_prime_page(struct pool *pp, caddr_t storage, struct pool_item_header *ph) 871 { 872 struct pool_item *pi; 873 caddr_t cp = storage; 874 unsigned int align = pp->pr_align; 875 unsigned int ioff = pp->pr_itemoffset; 876 int n; 877 878 /* 879 * Insert page header. 880 */ 881 LIST_INSERT_HEAD(&pp->pr_emptypages, ph, ph_pagelist); 882 XSIMPLEQ_INIT(&ph->ph_itemlist); 883 ph->ph_page = storage; 884 ph->ph_pagesize = pp->pr_alloc->pa_pagesz; 885 ph->ph_nmissing = 0; 886 if ((pp->pr_roflags & PR_PHINPAGE) == 0) 887 RB_INSERT(phtree, &pp->pr_phtree, ph); 888 889 pp->pr_nidle++; 890 891 /* 892 * Color this page. 893 */ 894 cp = (caddr_t)(cp + pp->pr_curcolor); 895 if ((pp->pr_curcolor += align) > pp->pr_maxcolor) 896 pp->pr_curcolor = 0; 897 898 /* 899 * Adjust storage to apply alignment to `pr_itemoffset' in each item. 900 */ 901 if (ioff != 0) 902 cp = (caddr_t)(cp + (align - ioff)); 903 ph->ph_colored = cp; 904 905 /* 906 * Insert remaining chunks on the bucket list. 907 */ 908 n = pp->pr_itemsperpage; 909 pp->pr_nitems += n; 910 911 while (n--) { 912 pi = (struct pool_item *)cp; 913 914 KASSERT(((((vaddr_t)pi) + ioff) & (align - 1)) == 0); 915 916 /* Insert on page list */ 917 XSIMPLEQ_INSERT_TAIL(&ph->ph_itemlist, pi, pi_list); 918 919 #ifdef DIAGNOSTIC 920 pi->pi_magic = poison_value(pi); 921 if (ph->ph_magic) { 922 poison_mem(pi + 1, pp->pr_size - sizeof(*pi)); 923 } 924 #endif /* DIAGNOSTIC */ 925 cp = (caddr_t)(cp + pp->pr_size); 926 } 927 928 /* 929 * If the pool was depleted, point at the new page. 930 */ 931 if (pp->pr_curpage == NULL) 932 pp->pr_curpage = ph; 933 934 if (++pp->pr_npages > pp->pr_hiwat) 935 pp->pr_hiwat = pp->pr_npages; 936 } 937 938 /* 939 * Used by pool_get() when nitems drops below the low water mark. This 940 * is used to catch up pr_nitems with the low water mark. 941 * 942 * Note we never wait for memory here, we let the caller decide what to do. 943 */ 944 int 945 pool_catchup(struct pool *pp) 946 { 947 struct pool_item_header *ph; 948 caddr_t cp; 949 int error = 0; 950 int slowdown; 951 952 while (POOL_NEEDS_CATCHUP(pp)) { 953 /* 954 * Call the page back-end allocator for more memory. 955 */ 956 cp = pool_allocator_alloc(pp, PR_NOWAIT, &slowdown); 957 if (cp != NULL) 958 ph = pool_alloc_item_header(pp, cp, PR_NOWAIT); 959 if (cp == NULL || ph == NULL) { 960 if (cp != NULL) 961 pool_allocator_free(pp, cp); 962 error = ENOMEM; 963 break; 964 } 965 pool_prime_page(pp, cp, ph); 966 pp->pr_npagealloc++; 967 } 968 969 return (error); 970 } 971 972 void 973 pool_update_curpage(struct pool *pp) 974 { 975 976 pp->pr_curpage = LIST_FIRST(&pp->pr_partpages); 977 if (pp->pr_curpage == NULL) { 978 pp->pr_curpage = LIST_FIRST(&pp->pr_emptypages); 979 } 980 } 981 982 void 983 pool_swizzle_curpage(struct pool *pp) 984 { 985 struct pool_item_header *ph, *next; 986 987 if ((ph = pp->pr_curpage) == NULL) 988 return; 989 if (arc4random_uniform(16) != 0) 990 return; 991 next = LIST_FIRST(&pp->pr_partpages); 992 if (next == ph) 993 next = LIST_NEXT(next, ph_pagelist); 994 if (next == NULL) { 995 next = LIST_FIRST(&pp->pr_emptypages); 996 if (next == ph) 997 next = LIST_NEXT(next, ph_pagelist); 998 } 999 if (next != NULL) 1000 pp->pr_curpage = next; 1001 } 1002 1003 void 1004 pool_setlowat(struct pool *pp, int n) 1005 { 1006 1007 pp->pr_minitems = n; 1008 pp->pr_minpages = (n == 0) 1009 ? 0 1010 : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 1011 1012 mtx_enter(&pp->pr_mtx); 1013 /* Make sure we're caught up with the newly-set low water mark. */ 1014 if (POOL_NEEDS_CATCHUP(pp) && pool_catchup(pp) != 0) { 1015 /* 1016 * XXX: Should we log a warning? Should we set up a timeout 1017 * to try again in a second or so? The latter could break 1018 * a caller's assumptions about interrupt protection, etc. 1019 */ 1020 } 1021 mtx_leave(&pp->pr_mtx); 1022 } 1023 1024 void 1025 pool_sethiwat(struct pool *pp, int n) 1026 { 1027 1028 pp->pr_maxpages = (n == 0) 1029 ? 0 1030 : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 1031 } 1032 1033 int 1034 pool_sethardlimit(struct pool *pp, u_int n, const char *warnmsg, int ratecap) 1035 { 1036 int error = 0; 1037 1038 if (n < pp->pr_nout) { 1039 error = EINVAL; 1040 goto done; 1041 } 1042 1043 pp->pr_hardlimit = n; 1044 pp->pr_hardlimit_warning = warnmsg; 1045 pp->pr_hardlimit_ratecap.tv_sec = ratecap; 1046 pp->pr_hardlimit_warning_last.tv_sec = 0; 1047 pp->pr_hardlimit_warning_last.tv_usec = 0; 1048 1049 done: 1050 return (error); 1051 } 1052 1053 void 1054 pool_set_constraints(struct pool *pp, const struct kmem_pa_mode *mode) 1055 { 1056 pp->pr_crange = mode; 1057 } 1058 1059 /* 1060 * Release all complete pages that have not been used recently. 1061 * 1062 * Returns non-zero if any pages have been reclaimed. 1063 */ 1064 int 1065 pool_reclaim(struct pool *pp) 1066 { 1067 struct pool_item_header *ph, *phnext; 1068 struct pool_pagelist pq; 1069 1070 LIST_INIT(&pq); 1071 1072 mtx_enter(&pp->pr_mtx); 1073 for (ph = LIST_FIRST(&pp->pr_emptypages); ph != NULL; ph = phnext) { 1074 phnext = LIST_NEXT(ph, ph_pagelist); 1075 1076 /* Check our minimum page claim */ 1077 if (pp->pr_npages <= pp->pr_minpages) 1078 break; 1079 1080 KASSERT(ph->ph_nmissing == 0); 1081 1082 /* 1083 * If freeing this page would put us below 1084 * the low water mark, stop now. 1085 */ 1086 if ((pp->pr_nitems - pp->pr_itemsperpage) < 1087 pp->pr_minitems) 1088 break; 1089 1090 pr_rmpage(pp, ph, &pq); 1091 } 1092 mtx_leave(&pp->pr_mtx); 1093 1094 if (LIST_EMPTY(&pq)) 1095 return (0); 1096 while ((ph = LIST_FIRST(&pq)) != NULL) { 1097 LIST_REMOVE(ph, ph_pagelist); 1098 pool_allocator_free(pp, ph->ph_page); 1099 if (pp->pr_roflags & PR_PHINPAGE) 1100 continue; 1101 pool_put(&phpool, ph); 1102 } 1103 1104 return (1); 1105 } 1106 1107 /* 1108 * Release all complete pages that have not been used recently 1109 * from all pools. 1110 */ 1111 void 1112 pool_reclaim_all(void) 1113 { 1114 struct pool *pp; 1115 int s; 1116 1117 s = splhigh(); 1118 SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist) 1119 pool_reclaim(pp); 1120 splx(s); 1121 } 1122 1123 #ifdef DDB 1124 #include <machine/db_machdep.h> 1125 #include <ddb/db_interface.h> 1126 #include <ddb/db_output.h> 1127 1128 /* 1129 * Diagnostic helpers. 1130 */ 1131 void 1132 pool_printit(struct pool *pp, const char *modif, 1133 int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2)))) 1134 { 1135 pool_print1(pp, modif, pr); 1136 } 1137 1138 void 1139 pool_print_pagelist(struct pool_pagelist *pl, 1140 int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2)))) 1141 { 1142 struct pool_item_header *ph; 1143 #ifdef DIAGNOSTIC 1144 struct pool_item *pi; 1145 #endif 1146 1147 LIST_FOREACH(ph, pl, ph_pagelist) { 1148 (*pr)("\t\tpage %p, nmissing %d\n", 1149 ph->ph_page, ph->ph_nmissing); 1150 #ifdef DIAGNOSTIC 1151 XSIMPLEQ_FOREACH(pi, &ph->ph_itemlist, pi_list) { 1152 if (pi->pi_magic != poison_value(pi)) { 1153 (*pr)("\t\t\titem %p, magic 0x%x\n", 1154 pi, pi->pi_magic); 1155 } 1156 } 1157 #endif 1158 } 1159 } 1160 1161 void 1162 pool_print1(struct pool *pp, const char *modif, 1163 int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2)))) 1164 { 1165 struct pool_item_header *ph; 1166 int print_pagelist = 0; 1167 char c; 1168 1169 while ((c = *modif++) != '\0') { 1170 if (c == 'p') 1171 print_pagelist = 1; 1172 modif++; 1173 } 1174 1175 (*pr)("POOL %s: size %u, align %u, ioff %u, roflags 0x%08x\n", 1176 pp->pr_wchan, pp->pr_size, pp->pr_align, pp->pr_itemoffset, 1177 pp->pr_roflags); 1178 (*pr)("\talloc %p\n", pp->pr_alloc); 1179 (*pr)("\tminitems %u, minpages %u, maxpages %u, npages %u\n", 1180 pp->pr_minitems, pp->pr_minpages, pp->pr_maxpages, pp->pr_npages); 1181 (*pr)("\titemsperpage %u, nitems %u, nout %u, hardlimit %u\n", 1182 pp->pr_itemsperpage, pp->pr_nitems, pp->pr_nout, pp->pr_hardlimit); 1183 1184 (*pr)("\n\tnget %lu, nfail %lu, nput %lu\n", 1185 pp->pr_nget, pp->pr_nfail, pp->pr_nput); 1186 (*pr)("\tnpagealloc %lu, npagefree %lu, hiwat %u, nidle %lu\n", 1187 pp->pr_npagealloc, pp->pr_npagefree, pp->pr_hiwat, pp->pr_nidle); 1188 1189 if (print_pagelist == 0) 1190 return; 1191 1192 if ((ph = LIST_FIRST(&pp->pr_emptypages)) != NULL) 1193 (*pr)("\n\tempty page list:\n"); 1194 pool_print_pagelist(&pp->pr_emptypages, pr); 1195 if ((ph = LIST_FIRST(&pp->pr_fullpages)) != NULL) 1196 (*pr)("\n\tfull page list:\n"); 1197 pool_print_pagelist(&pp->pr_fullpages, pr); 1198 if ((ph = LIST_FIRST(&pp->pr_partpages)) != NULL) 1199 (*pr)("\n\tpartial-page list:\n"); 1200 pool_print_pagelist(&pp->pr_partpages, pr); 1201 1202 if (pp->pr_curpage == NULL) 1203 (*pr)("\tno current page\n"); 1204 else 1205 (*pr)("\tcurpage %p\n", pp->pr_curpage->ph_page); 1206 } 1207 1208 void 1209 db_show_all_pools(db_expr_t expr, int haddr, db_expr_t count, char *modif) 1210 { 1211 struct pool *pp; 1212 char maxp[16]; 1213 int ovflw; 1214 char mode; 1215 1216 mode = modif[0]; 1217 if (mode != '\0' && mode != 'a') { 1218 db_printf("usage: show all pools [/a]\n"); 1219 return; 1220 } 1221 1222 if (mode == '\0') 1223 db_printf("%-10s%4s%9s%5s%9s%6s%6s%6s%6s%6s%6s%5s\n", 1224 "Name", 1225 "Size", 1226 "Requests", 1227 "Fail", 1228 "Releases", 1229 "Pgreq", 1230 "Pgrel", 1231 "Npage", 1232 "Hiwat", 1233 "Minpg", 1234 "Maxpg", 1235 "Idle"); 1236 else 1237 db_printf("%-12s %18s %18s\n", 1238 "Name", "Address", "Allocator"); 1239 1240 SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist) { 1241 if (mode == 'a') { 1242 db_printf("%-12s %18p %18p\n", pp->pr_wchan, pp, 1243 pp->pr_alloc); 1244 continue; 1245 } 1246 1247 if (!pp->pr_nget) 1248 continue; 1249 1250 if (pp->pr_maxpages == UINT_MAX) 1251 snprintf(maxp, sizeof maxp, "inf"); 1252 else 1253 snprintf(maxp, sizeof maxp, "%u", pp->pr_maxpages); 1254 1255 #define PRWORD(ovflw, fmt, width, fixed, val) do { \ 1256 (ovflw) += db_printf((fmt), \ 1257 (width) - (fixed) - (ovflw) > 0 ? \ 1258 (width) - (fixed) - (ovflw) : 0, \ 1259 (val)) - (width); \ 1260 if ((ovflw) < 0) \ 1261 (ovflw) = 0; \ 1262 } while (/* CONSTCOND */0) 1263 1264 ovflw = 0; 1265 PRWORD(ovflw, "%-*s", 10, 0, pp->pr_wchan); 1266 PRWORD(ovflw, " %*u", 4, 1, pp->pr_size); 1267 PRWORD(ovflw, " %*lu", 9, 1, pp->pr_nget); 1268 PRWORD(ovflw, " %*lu", 5, 1, pp->pr_nfail); 1269 PRWORD(ovflw, " %*lu", 9, 1, pp->pr_nput); 1270 PRWORD(ovflw, " %*lu", 6, 1, pp->pr_npagealloc); 1271 PRWORD(ovflw, " %*lu", 6, 1, pp->pr_npagefree); 1272 PRWORD(ovflw, " %*d", 6, 1, pp->pr_npages); 1273 PRWORD(ovflw, " %*d", 6, 1, pp->pr_hiwat); 1274 PRWORD(ovflw, " %*d", 6, 1, pp->pr_minpages); 1275 PRWORD(ovflw, " %*s", 6, 1, maxp); 1276 PRWORD(ovflw, " %*lu\n", 5, 1, pp->pr_nidle); 1277 1278 pool_chk(pp); 1279 } 1280 } 1281 #endif /* DDB */ 1282 1283 #if defined(POOL_DEBUG) || defined(DDB) 1284 int 1285 pool_chk_page(struct pool *pp, struct pool_item_header *ph, int expected) 1286 { 1287 struct pool_item *pi; 1288 caddr_t page; 1289 int n; 1290 const char *label = pp->pr_wchan; 1291 1292 page = (caddr_t)((u_long)ph & pp->pr_alloc->pa_pagemask); 1293 if (page != ph->ph_page && 1294 (pp->pr_roflags & PR_PHINPAGE) != 0) { 1295 printf("%s: ", label); 1296 printf("pool(%p:%s): page inconsistency: page %p; " 1297 "at page head addr %p (p %p)\n", 1298 pp, pp->pr_wchan, ph->ph_page, ph, page); 1299 return 1; 1300 } 1301 1302 for (pi = XSIMPLEQ_FIRST(&ph->ph_itemlist), n = 0; 1303 pi != NULL; 1304 pi = XSIMPLEQ_NEXT(&ph->ph_itemlist, pi, pi_list), n++) { 1305 1306 #ifdef DIAGNOSTIC 1307 if (pi->pi_magic != poison_value(pi)) { 1308 printf("%s: ", label); 1309 printf("pool(%s): free list modified: " 1310 "page %p; item ordinal %d; addr %p " 1311 "(p %p); offset 0x%x=0x%x\n", 1312 pp->pr_wchan, ph->ph_page, n, pi, page, 1313 0, pi->pi_magic); 1314 } 1315 if (pool_debug && ph->ph_magic) { 1316 size_t pidx; 1317 uint32_t pval; 1318 if (poison_check(pi + 1, pp->pr_size - sizeof(*pi), 1319 &pidx, &pval)) { 1320 int *ip = (int *)(pi + 1); 1321 printf("pool(%s): free list modified: " 1322 "page %p; item ordinal %d; addr %p " 1323 "(p %p); offset 0x%zx=0x%x\n", 1324 pp->pr_wchan, ph->ph_page, n, pi, 1325 page, pidx * sizeof(int), ip[pidx]); 1326 } 1327 } 1328 #endif /* DIAGNOSTIC */ 1329 page = 1330 (caddr_t)((u_long)pi & pp->pr_alloc->pa_pagemask); 1331 if (page == ph->ph_page) 1332 continue; 1333 1334 printf("%s: ", label); 1335 printf("pool(%p:%s): page inconsistency: page %p;" 1336 " item ordinal %d; addr %p (p %p)\n", pp, 1337 pp->pr_wchan, ph->ph_page, n, pi, page); 1338 return 1; 1339 } 1340 if (n + ph->ph_nmissing != pp->pr_itemsperpage) { 1341 printf("pool(%p:%s): page inconsistency: page %p;" 1342 " %d on list, %d missing, %d items per page\n", pp, 1343 pp->pr_wchan, ph->ph_page, n, ph->ph_nmissing, 1344 pp->pr_itemsperpage); 1345 return 1; 1346 } 1347 if (expected >= 0 && n != expected) { 1348 printf("pool(%p:%s): page inconsistency: page %p;" 1349 " %d on list, %d missing, %d expected\n", pp, 1350 pp->pr_wchan, ph->ph_page, n, ph->ph_nmissing, 1351 expected); 1352 return 1; 1353 } 1354 return 0; 1355 } 1356 1357 int 1358 pool_chk(struct pool *pp) 1359 { 1360 struct pool_item_header *ph; 1361 int r = 0; 1362 1363 LIST_FOREACH(ph, &pp->pr_emptypages, ph_pagelist) 1364 r += pool_chk_page(pp, ph, pp->pr_itemsperpage); 1365 LIST_FOREACH(ph, &pp->pr_fullpages, ph_pagelist) 1366 r += pool_chk_page(pp, ph, 0); 1367 LIST_FOREACH(ph, &pp->pr_partpages, ph_pagelist) 1368 r += pool_chk_page(pp, ph, -1); 1369 1370 return (r); 1371 } 1372 #endif /* defined(POOL_DEBUG) || defined(DDB) */ 1373 1374 #ifdef DDB 1375 void 1376 pool_walk(struct pool *pp, int full, 1377 int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2))), 1378 void (*func)(void *, int, int (*)(const char *, ...) 1379 __attribute__((__format__(__kprintf__,1,2))))) 1380 { 1381 struct pool_item_header *ph; 1382 struct pool_item *pi; 1383 caddr_t cp; 1384 int n; 1385 1386 LIST_FOREACH(ph, &pp->pr_fullpages, ph_pagelist) { 1387 cp = ph->ph_colored; 1388 n = ph->ph_nmissing; 1389 1390 while (n--) { 1391 func(cp, full, pr); 1392 cp += pp->pr_size; 1393 } 1394 } 1395 1396 LIST_FOREACH(ph, &pp->pr_partpages, ph_pagelist) { 1397 cp = ph->ph_colored; 1398 n = ph->ph_nmissing; 1399 1400 do { 1401 XSIMPLEQ_FOREACH(pi, &ph->ph_itemlist, pi_list) { 1402 if (cp == (caddr_t)pi) 1403 break; 1404 } 1405 if (cp != (caddr_t)pi) { 1406 func(cp, full, pr); 1407 n--; 1408 } 1409 1410 cp += pp->pr_size; 1411 } while (n > 0); 1412 } 1413 } 1414 #endif 1415 1416 /* 1417 * We have three different sysctls. 1418 * kern.pool.npools - the number of pools. 1419 * kern.pool.pool.<pool#> - the pool struct for the pool#. 1420 * kern.pool.name.<pool#> - the name for pool#. 1421 */ 1422 int 1423 sysctl_dopool(int *name, u_int namelen, char *where, size_t *sizep) 1424 { 1425 struct kinfo_pool pi; 1426 struct pool *pp; 1427 size_t buflen = where != NULL ? *sizep : 0; 1428 int npools = 0, s; 1429 unsigned int lookfor; 1430 size_t len; 1431 1432 switch (*name) { 1433 case KERN_POOL_NPOOLS: 1434 if (namelen != 1 || buflen != sizeof(int)) 1435 return (EINVAL); 1436 lookfor = 0; 1437 break; 1438 case KERN_POOL_NAME: 1439 if (namelen != 2 || buflen < 1) 1440 return (EINVAL); 1441 lookfor = name[1]; 1442 break; 1443 case KERN_POOL_POOL: 1444 if (namelen != 2 || buflen != sizeof(pi)) 1445 return (EINVAL); 1446 lookfor = name[1]; 1447 break; 1448 default: 1449 return (EINVAL); 1450 } 1451 1452 s = splvm(); 1453 1454 SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist) { 1455 npools++; 1456 if (lookfor == pp->pr_serial) 1457 break; 1458 } 1459 1460 splx(s); 1461 1462 if (*name != KERN_POOL_NPOOLS && pp == NULL) 1463 return (ENOENT); 1464 1465 switch (*name) { 1466 case KERN_POOL_NPOOLS: 1467 return copyout(&npools, where, buflen); 1468 case KERN_POOL_NAME: 1469 len = strlen(pp->pr_wchan) + 1; 1470 if (*sizep < len) 1471 return (ENOMEM); 1472 *sizep = len; 1473 return copyout(pp->pr_wchan, where, len); 1474 case KERN_POOL_POOL: 1475 memset(&pi, 0, sizeof(pi)); 1476 pi.pr_size = pp->pr_size; 1477 pi.pr_pgsize = pp->pr_alloc->pa_pagesz; 1478 pi.pr_itemsperpage = pp->pr_itemsperpage; 1479 pi.pr_minpages = pp->pr_minpages; 1480 pi.pr_maxpages = pp->pr_maxpages; 1481 pi.pr_hardlimit = pp->pr_hardlimit; 1482 pi.pr_nout = pp->pr_nout; 1483 pi.pr_nitems = pp->pr_nitems; 1484 pi.pr_nget = pp->pr_nget; 1485 pi.pr_nput = pp->pr_nput; 1486 pi.pr_nfail = pp->pr_nfail; 1487 pi.pr_npagealloc = pp->pr_npagealloc; 1488 pi.pr_npagefree = pp->pr_npagefree; 1489 pi.pr_hiwat = pp->pr_hiwat; 1490 pi.pr_nidle = pp->pr_nidle; 1491 return copyout(&pi, where, buflen); 1492 } 1493 /* NOTREACHED */ 1494 return (0); /* XXX - Stupid gcc */ 1495 } 1496 1497 /* 1498 * Pool backend allocators. 1499 * 1500 * Each pool has a backend allocator that handles allocation, deallocation 1501 */ 1502 void *pool_page_alloc(struct pool *, int, int *); 1503 void pool_page_free(struct pool *, void *); 1504 1505 /* 1506 * safe for interrupts, name preserved for compat this is the default 1507 * allocator 1508 */ 1509 struct pool_allocator pool_allocator_nointr = { 1510 pool_page_alloc, pool_page_free, 0, 1511 }; 1512 1513 /* 1514 * XXX - we have at least three different resources for the same allocation 1515 * and each resource can be depleted. First we have the ready elements in 1516 * the pool. Then we have the resource (typically a vm_map) for this 1517 * allocator, then we have physical memory. Waiting for any of these can 1518 * be unnecessary when any other is freed, but the kernel doesn't support 1519 * sleeping on multiple addresses, so we have to fake. The caller sleeps on 1520 * the pool (so that we can be awakened when an item is returned to the pool), 1521 * but we set PA_WANT on the allocator. When a page is returned to 1522 * the allocator and PA_WANT is set pool_allocator_free will wakeup all 1523 * sleeping pools belonging to this allocator. (XXX - thundering herd). 1524 * We also wake up the allocator in case someone without a pool (malloc) 1525 * is sleeping waiting for this allocator. 1526 */ 1527 1528 void * 1529 pool_allocator_alloc(struct pool *pp, int flags, int *slowdown) 1530 { 1531 int waitok = flags & PR_WAITOK; 1532 void *v; 1533 1534 if (waitok) 1535 mtx_leave(&pp->pr_mtx); 1536 v = pp->pr_alloc->pa_alloc(pp, flags, slowdown); 1537 if (waitok) 1538 mtx_enter(&pp->pr_mtx); 1539 1540 return (v); 1541 } 1542 1543 void 1544 pool_allocator_free(struct pool *pp, void *v) 1545 { 1546 struct pool_allocator *pa = pp->pr_alloc; 1547 1548 (*pa->pa_free)(pp, v); 1549 } 1550 1551 void * 1552 pool_page_alloc(struct pool *pp, int flags, int *slowdown) 1553 { 1554 struct kmem_dyn_mode kd = KMEM_DYN_INITIALIZER; 1555 1556 kd.kd_waitok = (flags & PR_WAITOK); 1557 kd.kd_slowdown = slowdown; 1558 1559 return (km_alloc(PAGE_SIZE, &kv_page, pp->pr_crange, &kd)); 1560 } 1561 1562 void 1563 pool_page_free(struct pool *pp, void *v) 1564 { 1565 km_free(v, PAGE_SIZE, &kv_page, pp->pr_crange); 1566 } 1567 1568 void * 1569 pool_large_alloc(struct pool *pp, int flags, int *slowdown) 1570 { 1571 struct kmem_dyn_mode kd = KMEM_DYN_INITIALIZER; 1572 void *v; 1573 int s; 1574 1575 kd.kd_waitok = (flags & PR_WAITOK); 1576 kd.kd_slowdown = slowdown; 1577 1578 s = splvm(); 1579 v = km_alloc(pp->pr_alloc->pa_pagesz, &kv_intrsafe, pp->pr_crange, 1580 &kd); 1581 splx(s); 1582 1583 return (v); 1584 } 1585 1586 void 1587 pool_large_free(struct pool *pp, void *v) 1588 { 1589 int s; 1590 1591 s = splvm(); 1592 km_free(v, pp->pr_alloc->pa_pagesz, &kv_intrsafe, pp->pr_crange); 1593 splx(s); 1594 } 1595 1596 void * 1597 pool_large_alloc_ni(struct pool *pp, int flags, int *slowdown) 1598 { 1599 struct kmem_dyn_mode kd = KMEM_DYN_INITIALIZER; 1600 1601 kd.kd_waitok = (flags & PR_WAITOK); 1602 kd.kd_slowdown = slowdown; 1603 1604 return (km_alloc(pp->pr_alloc->pa_pagesz, &kv_any, pp->pr_crange, &kd)); 1605 } 1606 1607 void 1608 pool_large_free_ni(struct pool *pp, void *v) 1609 { 1610 km_free(v, pp->pr_alloc->pa_pagesz, &kv_any, pp->pr_crange); 1611 } 1612