1 /* $OpenBSD: subr_pool.c,v 1.109 2011/09/23 07:27:09 dlg Exp $ */ 2 /* $NetBSD: subr_pool.c,v 1.61 2001/09/26 07:14:56 chs Exp $ */ 3 4 /*- 5 * Copyright (c) 1997, 1999, 2000 The NetBSD Foundation, Inc. 6 * All rights reserved. 7 * 8 * This code is derived from software contributed to The NetBSD Foundation 9 * by Paul Kranenburg; by Jason R. Thorpe of the Numerical Aerospace 10 * Simulation Facility, NASA Ames Research Center. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 23 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 24 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 25 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31 * POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include <sys/param.h> 35 #include <sys/systm.h> 36 #include <sys/proc.h> 37 #include <sys/errno.h> 38 #include <sys/kernel.h> 39 #include <sys/malloc.h> 40 #include <sys/pool.h> 41 #include <sys/syslog.h> 42 #include <sys/sysctl.h> 43 44 #include <uvm/uvm.h> 45 #include <dev/rndvar.h> 46 47 /* 48 * Pool resource management utility. 49 * 50 * Memory is allocated in pages which are split into pieces according to 51 * the pool item size. Each page is kept on one of three lists in the 52 * pool structure: `pr_emptypages', `pr_fullpages' and `pr_partpages', 53 * for empty, full and partially-full pages respectively. The individual 54 * pool items are on a linked list headed by `ph_itemlist' in each page 55 * header. The memory for building the page list is either taken from 56 * the allocated pages themselves (for small pool items) or taken from 57 * an internal pool of page headers (`phpool'). 58 */ 59 60 /* List of all pools */ 61 TAILQ_HEAD(,pool) pool_head = TAILQ_HEAD_INITIALIZER(pool_head); 62 63 /* Private pool for page header structures */ 64 struct pool phpool; 65 66 struct pool_item_header { 67 /* Page headers */ 68 LIST_ENTRY(pool_item_header) 69 ph_pagelist; /* pool page list */ 70 TAILQ_HEAD(,pool_item) ph_itemlist; /* chunk list for this page */ 71 RB_ENTRY(pool_item_header) 72 ph_node; /* Off-page page headers */ 73 int ph_nmissing; /* # of chunks in use */ 74 caddr_t ph_page; /* this page's address */ 75 caddr_t ph_colored; /* page's colored address */ 76 int ph_pagesize; 77 int ph_magic; 78 }; 79 80 struct pool_item { 81 #ifdef DIAGNOSTIC 82 u_int32_t pi_magic; 83 #endif 84 /* Other entries use only this list entry */ 85 TAILQ_ENTRY(pool_item) pi_list; 86 }; 87 88 #ifdef DEADBEEF1 89 #define PI_MAGIC DEADBEEF1 90 #else 91 #define PI_MAGIC 0xdeafbeef 92 #endif 93 94 #ifdef POOL_DEBUG 95 int pool_debug = 1; 96 #else 97 int pool_debug = 0; 98 #endif 99 100 #define POOL_NEEDS_CATCHUP(pp) \ 101 ((pp)->pr_nitems < (pp)->pr_minitems) 102 103 /* 104 * Every pool gets a unique serial number assigned to it. If this counter 105 * wraps, we're screwed, but we shouldn't create so many pools anyway. 106 */ 107 unsigned int pool_serial; 108 109 int pool_catchup(struct pool *); 110 void pool_prime_page(struct pool *, caddr_t, struct pool_item_header *); 111 void pool_update_curpage(struct pool *); 112 void *pool_do_get(struct pool *, int); 113 void pool_do_put(struct pool *, void *); 114 void pr_rmpage(struct pool *, struct pool_item_header *, 115 struct pool_pagelist *); 116 int pool_chk_page(struct pool *, struct pool_item_header *, int); 117 struct pool_item_header *pool_alloc_item_header(struct pool *, caddr_t , int); 118 119 void *pool_allocator_alloc(struct pool *, int, int *); 120 void pool_allocator_free(struct pool *, void *); 121 122 /* 123 * XXX - quick hack. For pools with large items we want to use a special 124 * allocator. For now, instead of having the allocator figure out 125 * the allocation size from the pool (which can be done trivially 126 * with round_page(pr_itemsperpage * pr_size)) which would require 127 * lots of changes everywhere, we just create allocators for each 128 * size. We limit those to 128 pages. 129 */ 130 #define POOL_LARGE_MAXPAGES 128 131 struct pool_allocator pool_allocator_large[POOL_LARGE_MAXPAGES]; 132 struct pool_allocator pool_allocator_large_ni[POOL_LARGE_MAXPAGES]; 133 void *pool_large_alloc(struct pool *, int, int *); 134 void pool_large_free(struct pool *, void *); 135 void *pool_large_alloc_ni(struct pool *, int, int *); 136 void pool_large_free_ni(struct pool *, void *); 137 138 139 #ifdef DDB 140 void pool_print_pagelist(struct pool_pagelist *, 141 int (*)(const char *, ...)); 142 void pool_print1(struct pool *, const char *, int (*)(const char *, ...)); 143 #endif 144 145 #define pool_sleep(pl) msleep(pl, &pl->pr_mtx, PSWP, pl->pr_wchan, 0) 146 147 static __inline int 148 phtree_compare(struct pool_item_header *a, struct pool_item_header *b) 149 { 150 long diff = (vaddr_t)a->ph_page - (vaddr_t)b->ph_page; 151 if (diff < 0) 152 return -(-diff >= a->ph_pagesize); 153 else if (diff > 0) 154 return (diff >= b->ph_pagesize); 155 else 156 return (0); 157 } 158 159 RB_PROTOTYPE(phtree, pool_item_header, ph_node, phtree_compare); 160 RB_GENERATE(phtree, pool_item_header, ph_node, phtree_compare); 161 162 /* 163 * Return the pool page header based on page address. 164 */ 165 static __inline struct pool_item_header * 166 pr_find_pagehead(struct pool *pp, void *v) 167 { 168 struct pool_item_header *ph, tmp; 169 170 if ((pp->pr_roflags & PR_PHINPAGE) != 0) { 171 caddr_t page; 172 173 page = (caddr_t)((vaddr_t)v & pp->pr_alloc->pa_pagemask); 174 175 return ((struct pool_item_header *)(page + pp->pr_phoffset)); 176 } 177 178 /* 179 * The trick we're using in the tree compare function is to compare 180 * two elements equal when they overlap. We want to return the 181 * page header that belongs to the element just before this address. 182 * We don't want this element to compare equal to the next element, 183 * so the compare function takes the pagesize from the lower element. 184 * If this header is the lower, its pagesize is zero, so it can't 185 * overlap with the next header. But if the header we're looking for 186 * is lower, we'll use its pagesize and it will overlap and return 187 * equal. 188 */ 189 tmp.ph_page = v; 190 tmp.ph_pagesize = 0; 191 ph = RB_FIND(phtree, &pp->pr_phtree, &tmp); 192 193 if (ph) { 194 KASSERT(ph->ph_page <= (caddr_t)v); 195 KASSERT(ph->ph_page + ph->ph_pagesize > (caddr_t)v); 196 } 197 return ph; 198 } 199 200 /* 201 * Remove a page from the pool. 202 */ 203 void 204 pr_rmpage(struct pool *pp, struct pool_item_header *ph, 205 struct pool_pagelist *pq) 206 { 207 208 /* 209 * If the page was idle, decrement the idle page count. 210 */ 211 if (ph->ph_nmissing == 0) { 212 #ifdef DIAGNOSTIC 213 if (pp->pr_nidle == 0) 214 panic("pr_rmpage: nidle inconsistent"); 215 if (pp->pr_nitems < pp->pr_itemsperpage) 216 panic("pr_rmpage: nitems inconsistent"); 217 #endif 218 pp->pr_nidle--; 219 } 220 221 pp->pr_nitems -= pp->pr_itemsperpage; 222 223 /* 224 * Unlink a page from the pool and release it (or queue it for release). 225 */ 226 LIST_REMOVE(ph, ph_pagelist); 227 if ((pp->pr_roflags & PR_PHINPAGE) == 0) 228 RB_REMOVE(phtree, &pp->pr_phtree, ph); 229 pp->pr_npages--; 230 pp->pr_npagefree++; 231 pool_update_curpage(pp); 232 233 if (pq) { 234 LIST_INSERT_HEAD(pq, ph, ph_pagelist); 235 } else { 236 pool_allocator_free(pp, ph->ph_page); 237 if ((pp->pr_roflags & PR_PHINPAGE) == 0) 238 pool_put(&phpool, ph); 239 } 240 } 241 242 /* 243 * Initialize the given pool resource structure. 244 * 245 * We export this routine to allow other kernel parts to declare 246 * static pools that must be initialized before malloc() is available. 247 */ 248 void 249 pool_init(struct pool *pp, size_t size, u_int align, u_int ioff, int flags, 250 const char *wchan, struct pool_allocator *palloc) 251 { 252 int off, slack; 253 254 #ifdef MALLOC_DEBUG 255 if ((flags & PR_DEBUG) && (ioff != 0 || align != 0)) 256 flags &= ~PR_DEBUG; 257 #endif 258 /* 259 * Check arguments and construct default values. 260 */ 261 if (palloc == NULL) { 262 if (size > PAGE_SIZE) { 263 int psize; 264 265 /* 266 * XXX - should take align into account as well. 267 */ 268 if (size == round_page(size)) 269 psize = size / PAGE_SIZE; 270 else 271 psize = PAGE_SIZE / roundup(size % PAGE_SIZE, 272 1024); 273 if (psize > POOL_LARGE_MAXPAGES) 274 psize = POOL_LARGE_MAXPAGES; 275 if (flags & PR_WAITOK) 276 palloc = &pool_allocator_large_ni[psize-1]; 277 else 278 palloc = &pool_allocator_large[psize-1]; 279 if (palloc->pa_pagesz == 0) { 280 palloc->pa_pagesz = psize * PAGE_SIZE; 281 if (flags & PR_WAITOK) { 282 palloc->pa_alloc = pool_large_alloc_ni; 283 palloc->pa_free = pool_large_free_ni; 284 } else { 285 palloc->pa_alloc = pool_large_alloc; 286 palloc->pa_free = pool_large_free; 287 } 288 } 289 } else { 290 palloc = &pool_allocator_nointr; 291 } 292 } 293 if (palloc->pa_pagesz == 0) { 294 palloc->pa_pagesz = PAGE_SIZE; 295 } 296 if (palloc->pa_pagemask == 0) { 297 palloc->pa_pagemask = ~(palloc->pa_pagesz - 1); 298 palloc->pa_pageshift = ffs(palloc->pa_pagesz) - 1; 299 } 300 301 if (align == 0) 302 align = ALIGN(1); 303 304 if (size < sizeof(struct pool_item)) 305 size = sizeof(struct pool_item); 306 307 size = roundup(size, align); 308 #ifdef DIAGNOSTIC 309 if (size > palloc->pa_pagesz) 310 panic("pool_init: pool item size (%lu) too large", 311 (u_long)size); 312 #endif 313 314 /* 315 * Initialize the pool structure. 316 */ 317 LIST_INIT(&pp->pr_emptypages); 318 LIST_INIT(&pp->pr_fullpages); 319 LIST_INIT(&pp->pr_partpages); 320 pp->pr_curpage = NULL; 321 pp->pr_npages = 0; 322 pp->pr_minitems = 0; 323 pp->pr_minpages = 0; 324 pp->pr_maxpages = 8; 325 pp->pr_roflags = flags; 326 pp->pr_flags = 0; 327 pp->pr_size = size; 328 pp->pr_align = align; 329 pp->pr_wchan = wchan; 330 pp->pr_alloc = palloc; 331 pp->pr_nitems = 0; 332 pp->pr_nout = 0; 333 pp->pr_hardlimit = UINT_MAX; 334 pp->pr_hardlimit_warning = NULL; 335 pp->pr_hardlimit_ratecap.tv_sec = 0; 336 pp->pr_hardlimit_ratecap.tv_usec = 0; 337 pp->pr_hardlimit_warning_last.tv_sec = 0; 338 pp->pr_hardlimit_warning_last.tv_usec = 0; 339 pp->pr_serial = ++pool_serial; 340 if (pool_serial == 0) 341 panic("pool_init: too much uptime"); 342 343 /* constructor, destructor, and arg */ 344 pp->pr_ctor = NULL; 345 pp->pr_dtor = NULL; 346 pp->pr_arg = NULL; 347 348 /* 349 * Decide whether to put the page header off page to avoid 350 * wasting too large a part of the page. Off-page page headers 351 * go into an RB tree, so we can match a returned item with 352 * its header based on the page address. 353 * We use 1/16 of the page size as the threshold (XXX: tune) 354 */ 355 if (pp->pr_size < palloc->pa_pagesz/16 && pp->pr_size < PAGE_SIZE) { 356 /* Use the end of the page for the page header */ 357 pp->pr_roflags |= PR_PHINPAGE; 358 pp->pr_phoffset = off = palloc->pa_pagesz - 359 ALIGN(sizeof(struct pool_item_header)); 360 } else { 361 /* The page header will be taken from our page header pool */ 362 pp->pr_phoffset = 0; 363 off = palloc->pa_pagesz; 364 RB_INIT(&pp->pr_phtree); 365 } 366 367 /* 368 * Alignment is to take place at `ioff' within the item. This means 369 * we must reserve up to `align - 1' bytes on the page to allow 370 * appropriate positioning of each item. 371 * 372 * Silently enforce `0 <= ioff < align'. 373 */ 374 pp->pr_itemoffset = ioff = ioff % align; 375 pp->pr_itemsperpage = (off - ((align - ioff) % align)) / pp->pr_size; 376 KASSERT(pp->pr_itemsperpage != 0); 377 378 /* 379 * Use the slack between the chunks and the page header 380 * for "cache coloring". 381 */ 382 slack = off - pp->pr_itemsperpage * pp->pr_size; 383 pp->pr_maxcolor = (slack / align) * align; 384 pp->pr_curcolor = 0; 385 386 pp->pr_nget = 0; 387 pp->pr_nfail = 0; 388 pp->pr_nput = 0; 389 pp->pr_npagealloc = 0; 390 pp->pr_npagefree = 0; 391 pp->pr_hiwat = 0; 392 pp->pr_nidle = 0; 393 394 pp->pr_ipl = -1; 395 mtx_init(&pp->pr_mtx, IPL_NONE); 396 397 if (phpool.pr_size == 0) { 398 pool_init(&phpool, sizeof(struct pool_item_header), 0, 0, 399 0, "phpool", NULL); 400 pool_setipl(&phpool, IPL_HIGH); 401 } 402 403 /* pglistalloc/constraint parameters */ 404 pp->pr_crange = &kp_dirty; 405 406 /* Insert this into the list of all pools. */ 407 TAILQ_INSERT_HEAD(&pool_head, pp, pr_poollist); 408 } 409 410 void 411 pool_setipl(struct pool *pp, int ipl) 412 { 413 pp->pr_ipl = ipl; 414 mtx_init(&pp->pr_mtx, ipl); 415 } 416 417 /* 418 * Decommission a pool resource. 419 */ 420 void 421 pool_destroy(struct pool *pp) 422 { 423 struct pool_item_header *ph; 424 425 #ifdef DIAGNOSTIC 426 if (pp->pr_nout != 0) 427 panic("pool_destroy: pool busy: still out: %u", pp->pr_nout); 428 #endif 429 430 /* Remove all pages */ 431 while ((ph = LIST_FIRST(&pp->pr_emptypages)) != NULL) 432 pr_rmpage(pp, ph, NULL); 433 KASSERT(LIST_EMPTY(&pp->pr_fullpages)); 434 KASSERT(LIST_EMPTY(&pp->pr_partpages)); 435 436 /* Remove from global pool list */ 437 TAILQ_REMOVE(&pool_head, pp, pr_poollist); 438 } 439 440 struct pool_item_header * 441 pool_alloc_item_header(struct pool *pp, caddr_t storage, int flags) 442 { 443 struct pool_item_header *ph; 444 445 if ((pp->pr_roflags & PR_PHINPAGE) != 0) 446 ph = (struct pool_item_header *)(storage + pp->pr_phoffset); 447 else 448 ph = pool_get(&phpool, (flags & ~(PR_WAITOK | PR_ZERO)) | 449 PR_NOWAIT); 450 if (pool_debug && ph != NULL) 451 ph->ph_magic = PI_MAGIC; 452 return (ph); 453 } 454 455 /* 456 * Grab an item from the pool; must be called at appropriate spl level 457 */ 458 void * 459 pool_get(struct pool *pp, int flags) 460 { 461 void *v; 462 463 KASSERT(flags & (PR_WAITOK | PR_NOWAIT)); 464 465 #ifdef DIAGNOSTIC 466 if ((flags & PR_WAITOK) != 0) 467 assertwaitok(); 468 #endif /* DIAGNOSTIC */ 469 470 mtx_enter(&pp->pr_mtx); 471 #ifdef POOL_DEBUG 472 if (pp->pr_roflags & PR_DEBUGCHK) { 473 if (pool_chk(pp)) 474 panic("before pool_get"); 475 } 476 #endif 477 v = pool_do_get(pp, flags); 478 #ifdef POOL_DEBUG 479 if (pp->pr_roflags & PR_DEBUGCHK) { 480 if (pool_chk(pp)) 481 panic("after pool_get"); 482 } 483 #endif 484 mtx_leave(&pp->pr_mtx); 485 if (v == NULL) 486 return (v); 487 488 if (pp->pr_ctor) { 489 if (flags & PR_ZERO) 490 panic("pool_get: PR_ZERO when ctor set"); 491 if (pp->pr_ctor(pp->pr_arg, v, flags)) { 492 mtx_enter(&pp->pr_mtx); 493 pool_do_put(pp, v); 494 mtx_leave(&pp->pr_mtx); 495 v = NULL; 496 } 497 } else { 498 if (flags & PR_ZERO) 499 memset(v, 0, pp->pr_size); 500 } 501 if (v != NULL) 502 pp->pr_nget++; 503 return (v); 504 } 505 506 void * 507 pool_do_get(struct pool *pp, int flags) 508 { 509 struct pool_item *pi; 510 struct pool_item_header *ph; 511 void *v; 512 int slowdown = 0; 513 #if defined(DIAGNOSTIC) && defined(POOL_DEBUG) 514 int i, *ip; 515 #endif 516 517 #ifdef MALLOC_DEBUG 518 if (pp->pr_roflags & PR_DEBUG) { 519 void *addr; 520 521 addr = NULL; 522 debug_malloc(pp->pr_size, M_DEBUG, 523 (flags & PR_WAITOK) ? M_WAITOK : M_NOWAIT, &addr); 524 return (addr); 525 } 526 #endif 527 528 startover: 529 /* 530 * Check to see if we've reached the hard limit. If we have, 531 * and we can wait, then wait until an item has been returned to 532 * the pool. 533 */ 534 #ifdef DIAGNOSTIC 535 if (pp->pr_nout > pp->pr_hardlimit) 536 panic("pool_do_get: %s: crossed hard limit", pp->pr_wchan); 537 #endif 538 if (pp->pr_nout == pp->pr_hardlimit) { 539 if ((flags & PR_WAITOK) && !(flags & PR_LIMITFAIL)) { 540 /* 541 * XXX: A warning isn't logged in this case. Should 542 * it be? 543 */ 544 pp->pr_flags |= PR_WANTED; 545 pool_sleep(pp); 546 goto startover; 547 } 548 549 /* 550 * Log a message that the hard limit has been hit. 551 */ 552 if (pp->pr_hardlimit_warning != NULL && 553 ratecheck(&pp->pr_hardlimit_warning_last, 554 &pp->pr_hardlimit_ratecap)) 555 log(LOG_ERR, "%s\n", pp->pr_hardlimit_warning); 556 557 pp->pr_nfail++; 558 return (NULL); 559 } 560 561 /* 562 * The convention we use is that if `curpage' is not NULL, then 563 * it points at a non-empty bucket. In particular, `curpage' 564 * never points at a page header which has PR_PHINPAGE set and 565 * has no items in its bucket. 566 */ 567 if ((ph = pp->pr_curpage) == NULL) { 568 #ifdef DIAGNOSTIC 569 if (pp->pr_nitems != 0) { 570 printf("pool_do_get: %s: curpage NULL, nitems %u\n", 571 pp->pr_wchan, pp->pr_nitems); 572 panic("pool_do_get: nitems inconsistent"); 573 } 574 #endif 575 576 /* 577 * Call the back-end page allocator for more memory. 578 */ 579 v = pool_allocator_alloc(pp, flags, &slowdown); 580 if (v != NULL) 581 ph = pool_alloc_item_header(pp, v, flags); 582 583 if (v == NULL || ph == NULL) { 584 if (v != NULL) 585 pool_allocator_free(pp, v); 586 587 if ((flags & PR_WAITOK) == 0) { 588 pp->pr_nfail++; 589 return (NULL); 590 } 591 592 /* 593 * Wait for items to be returned to this pool. 594 * 595 * XXX: maybe we should wake up once a second and 596 * try again? 597 */ 598 pp->pr_flags |= PR_WANTED; 599 pool_sleep(pp); 600 goto startover; 601 } 602 603 /* We have more memory; add it to the pool */ 604 pool_prime_page(pp, v, ph); 605 pp->pr_npagealloc++; 606 607 if (slowdown && (flags & PR_WAITOK)) { 608 mtx_leave(&pp->pr_mtx); 609 yield(); 610 mtx_enter(&pp->pr_mtx); 611 } 612 613 /* Start the allocation process over. */ 614 goto startover; 615 } 616 if ((v = pi = TAILQ_FIRST(&ph->ph_itemlist)) == NULL) { 617 panic("pool_do_get: %s: page empty", pp->pr_wchan); 618 } 619 #ifdef DIAGNOSTIC 620 if (pp->pr_nitems == 0) { 621 printf("pool_do_get: %s: items on itemlist, nitems %u\n", 622 pp->pr_wchan, pp->pr_nitems); 623 panic("pool_do_get: nitems inconsistent"); 624 } 625 #endif 626 627 #ifdef DIAGNOSTIC 628 if (pi->pi_magic != PI_MAGIC) 629 panic("pool_do_get(%s): free list modified: " 630 "page %p; item addr %p; offset 0x%x=0x%x", 631 pp->pr_wchan, ph->ph_page, pi, 0, pi->pi_magic); 632 #ifdef POOL_DEBUG 633 if (pool_debug && ph->ph_magic) { 634 for (ip = (int *)pi, i = sizeof(*pi) / sizeof(int); 635 i < pp->pr_size / sizeof(int); i++) { 636 if (ip[i] != ph->ph_magic) { 637 panic("pool_do_get(%s): free list modified: " 638 "page %p; item addr %p; offset 0x%x=0x%x", 639 pp->pr_wchan, ph->ph_page, pi, 640 i * sizeof(int), ip[i]); 641 } 642 } 643 } 644 #endif /* POOL_DEBUG */ 645 #endif /* DIAGNOSTIC */ 646 647 /* 648 * Remove from item list. 649 */ 650 TAILQ_REMOVE(&ph->ph_itemlist, pi, pi_list); 651 pp->pr_nitems--; 652 pp->pr_nout++; 653 if (ph->ph_nmissing == 0) { 654 #ifdef DIAGNOSTIC 655 if (pp->pr_nidle == 0) 656 panic("pool_do_get: nidle inconsistent"); 657 #endif 658 pp->pr_nidle--; 659 660 /* 661 * This page was previously empty. Move it to the list of 662 * partially-full pages. This page is already curpage. 663 */ 664 LIST_REMOVE(ph, ph_pagelist); 665 LIST_INSERT_HEAD(&pp->pr_partpages, ph, ph_pagelist); 666 } 667 ph->ph_nmissing++; 668 if (TAILQ_EMPTY(&ph->ph_itemlist)) { 669 #ifdef DIAGNOSTIC 670 if (ph->ph_nmissing != pp->pr_itemsperpage) { 671 panic("pool_do_get: %s: nmissing inconsistent", 672 pp->pr_wchan); 673 } 674 #endif 675 /* 676 * This page is now full. Move it to the full list 677 * and select a new current page. 678 */ 679 LIST_REMOVE(ph, ph_pagelist); 680 LIST_INSERT_HEAD(&pp->pr_fullpages, ph, ph_pagelist); 681 pool_update_curpage(pp); 682 } 683 684 /* 685 * If we have a low water mark and we are now below that low 686 * water mark, add more items to the pool. 687 */ 688 if (POOL_NEEDS_CATCHUP(pp) && pool_catchup(pp) != 0) { 689 /* 690 * XXX: Should we log a warning? Should we set up a timeout 691 * to try again in a second or so? The latter could break 692 * a caller's assumptions about interrupt protection, etc. 693 */ 694 } 695 return (v); 696 } 697 698 /* 699 * Return resource to the pool; must be called at appropriate spl level 700 */ 701 void 702 pool_put(struct pool *pp, void *v) 703 { 704 if (pp->pr_dtor) 705 pp->pr_dtor(pp->pr_arg, v); 706 mtx_enter(&pp->pr_mtx); 707 #ifdef POOL_DEBUG 708 if (pp->pr_roflags & PR_DEBUGCHK) { 709 if (pool_chk(pp)) 710 panic("before pool_put"); 711 } 712 #endif 713 pool_do_put(pp, v); 714 #ifdef POOL_DEBUG 715 if (pp->pr_roflags & PR_DEBUGCHK) { 716 if (pool_chk(pp)) 717 panic("after pool_put"); 718 } 719 #endif 720 mtx_leave(&pp->pr_mtx); 721 pp->pr_nput++; 722 } 723 724 /* 725 * Internal version of pool_put(). 726 */ 727 void 728 pool_do_put(struct pool *pp, void *v) 729 { 730 struct pool_item *pi = v; 731 struct pool_item_header *ph; 732 #if defined(DIAGNOSTIC) && defined(POOL_DEBUG) 733 int i, *ip; 734 #endif 735 736 if (v == NULL) 737 panic("pool_put of NULL"); 738 739 #ifdef MALLOC_DEBUG 740 if (pp->pr_roflags & PR_DEBUG) { 741 debug_free(v, M_DEBUG); 742 return; 743 } 744 #endif 745 746 #ifdef DIAGNOSTIC 747 if (pp->pr_ipl != -1) 748 splassert(pp->pr_ipl); 749 750 if (pp->pr_nout == 0) { 751 printf("pool %s: putting with none out\n", 752 pp->pr_wchan); 753 panic("pool_do_put"); 754 } 755 #endif 756 757 if ((ph = pr_find_pagehead(pp, v)) == NULL) { 758 panic("pool_do_put: %s: page header missing", pp->pr_wchan); 759 } 760 761 /* 762 * Return to item list. 763 */ 764 #ifdef DIAGNOSTIC 765 pi->pi_magic = PI_MAGIC; 766 #ifdef POOL_DEBUG 767 if (ph->ph_magic) { 768 for (ip = (int *)pi, i = sizeof(*pi)/sizeof(int); 769 i < pp->pr_size / sizeof(int); i++) 770 ip[i] = ph->ph_magic; 771 } 772 #endif /* POOL_DEBUG */ 773 #endif /* DIAGNOSTIC */ 774 775 TAILQ_INSERT_HEAD(&ph->ph_itemlist, pi, pi_list); 776 ph->ph_nmissing--; 777 pp->pr_nitems++; 778 pp->pr_nout--; 779 780 /* Cancel "pool empty" condition if it exists */ 781 if (pp->pr_curpage == NULL) 782 pp->pr_curpage = ph; 783 784 if (pp->pr_flags & PR_WANTED) { 785 pp->pr_flags &= ~PR_WANTED; 786 wakeup(pp); 787 } 788 789 /* 790 * If this page is now empty, do one of two things: 791 * 792 * (1) If we have more pages than the page high water mark, 793 * free the page back to the system. 794 * 795 * (2) Otherwise, move the page to the empty page list. 796 * 797 * Either way, select a new current page (so we use a partially-full 798 * page if one is available). 799 */ 800 if (ph->ph_nmissing == 0) { 801 pp->pr_nidle++; 802 if (pp->pr_nidle > pp->pr_maxpages) { 803 pr_rmpage(pp, ph, NULL); 804 } else { 805 LIST_REMOVE(ph, ph_pagelist); 806 LIST_INSERT_HEAD(&pp->pr_emptypages, ph, ph_pagelist); 807 pool_update_curpage(pp); 808 } 809 } 810 811 /* 812 * If the page was previously completely full, move it to the 813 * partially-full list and make it the current page. The next 814 * allocation will get the item from this page, instead of 815 * further fragmenting the pool. 816 */ 817 else if (ph->ph_nmissing == (pp->pr_itemsperpage - 1)) { 818 LIST_REMOVE(ph, ph_pagelist); 819 LIST_INSERT_HEAD(&pp->pr_partpages, ph, ph_pagelist); 820 pp->pr_curpage = ph; 821 } 822 } 823 824 /* 825 * Add N items to the pool. 826 */ 827 int 828 pool_prime(struct pool *pp, int n) 829 { 830 struct pool_item_header *ph; 831 caddr_t cp; 832 int newpages; 833 int slowdown; 834 835 mtx_enter(&pp->pr_mtx); 836 newpages = roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 837 838 while (newpages-- > 0) { 839 cp = pool_allocator_alloc(pp, PR_NOWAIT, &slowdown); 840 if (cp != NULL) 841 ph = pool_alloc_item_header(pp, cp, PR_NOWAIT); 842 if (cp == NULL || ph == NULL) { 843 if (cp != NULL) 844 pool_allocator_free(pp, cp); 845 break; 846 } 847 848 pool_prime_page(pp, cp, ph); 849 pp->pr_npagealloc++; 850 pp->pr_minpages++; 851 } 852 853 if (pp->pr_minpages >= pp->pr_maxpages) 854 pp->pr_maxpages = pp->pr_minpages + 1; /* XXX */ 855 856 mtx_leave(&pp->pr_mtx); 857 return (0); 858 } 859 860 /* 861 * Add a page worth of items to the pool. 862 * 863 * Note, we must be called with the pool descriptor LOCKED. 864 */ 865 void 866 pool_prime_page(struct pool *pp, caddr_t storage, struct pool_item_header *ph) 867 { 868 struct pool_item *pi; 869 caddr_t cp = storage; 870 unsigned int align = pp->pr_align; 871 unsigned int ioff = pp->pr_itemoffset; 872 int n; 873 #if defined(DIAGNOSTIC) && defined(POOL_DEBUG) 874 int i, *ip; 875 #endif 876 877 /* 878 * Insert page header. 879 */ 880 LIST_INSERT_HEAD(&pp->pr_emptypages, ph, ph_pagelist); 881 TAILQ_INIT(&ph->ph_itemlist); 882 ph->ph_page = storage; 883 ph->ph_pagesize = pp->pr_alloc->pa_pagesz; 884 ph->ph_nmissing = 0; 885 if ((pp->pr_roflags & PR_PHINPAGE) == 0) 886 RB_INSERT(phtree, &pp->pr_phtree, ph); 887 888 pp->pr_nidle++; 889 890 /* 891 * Color this page. 892 */ 893 cp = (caddr_t)(cp + pp->pr_curcolor); 894 if ((pp->pr_curcolor += align) > pp->pr_maxcolor) 895 pp->pr_curcolor = 0; 896 897 /* 898 * Adjust storage to apply alignment to `pr_itemoffset' in each item. 899 */ 900 if (ioff != 0) 901 cp = (caddr_t)(cp + (align - ioff)); 902 ph->ph_colored = cp; 903 904 /* 905 * Insert remaining chunks on the bucket list. 906 */ 907 n = pp->pr_itemsperpage; 908 pp->pr_nitems += n; 909 910 while (n--) { 911 pi = (struct pool_item *)cp; 912 913 KASSERT(((((vaddr_t)pi) + ioff) & (align - 1)) == 0); 914 915 /* Insert on page list */ 916 TAILQ_INSERT_TAIL(&ph->ph_itemlist, pi, pi_list); 917 918 #ifdef DIAGNOSTIC 919 pi->pi_magic = PI_MAGIC; 920 #ifdef POOL_DEBUG 921 if (ph->ph_magic) { 922 for (ip = (int *)pi, i = sizeof(*pi)/sizeof(int); 923 i < pp->pr_size / sizeof(int); i++) 924 ip[i] = ph->ph_magic; 925 } 926 #endif /* POOL_DEBUG */ 927 #endif /* DIAGNOSTIC */ 928 cp = (caddr_t)(cp + pp->pr_size); 929 } 930 931 /* 932 * If the pool was depleted, point at the new page. 933 */ 934 if (pp->pr_curpage == NULL) 935 pp->pr_curpage = ph; 936 937 if (++pp->pr_npages > pp->pr_hiwat) 938 pp->pr_hiwat = pp->pr_npages; 939 } 940 941 /* 942 * Used by pool_get() when nitems drops below the low water mark. This 943 * is used to catch up pr_nitems with the low water mark. 944 * 945 * Note we never wait for memory here, we let the caller decide what to do. 946 */ 947 int 948 pool_catchup(struct pool *pp) 949 { 950 struct pool_item_header *ph; 951 caddr_t cp; 952 int error = 0; 953 int slowdown; 954 955 while (POOL_NEEDS_CATCHUP(pp)) { 956 /* 957 * Call the page back-end allocator for more memory. 958 */ 959 cp = pool_allocator_alloc(pp, PR_NOWAIT, &slowdown); 960 if (cp != NULL) 961 ph = pool_alloc_item_header(pp, cp, PR_NOWAIT); 962 if (cp == NULL || ph == NULL) { 963 if (cp != NULL) 964 pool_allocator_free(pp, cp); 965 error = ENOMEM; 966 break; 967 } 968 pool_prime_page(pp, cp, ph); 969 pp->pr_npagealloc++; 970 } 971 972 return (error); 973 } 974 975 void 976 pool_update_curpage(struct pool *pp) 977 { 978 979 pp->pr_curpage = LIST_FIRST(&pp->pr_partpages); 980 if (pp->pr_curpage == NULL) { 981 pp->pr_curpage = LIST_FIRST(&pp->pr_emptypages); 982 } 983 } 984 985 void 986 pool_setlowat(struct pool *pp, int n) 987 { 988 989 pp->pr_minitems = n; 990 pp->pr_minpages = (n == 0) 991 ? 0 992 : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 993 994 mtx_enter(&pp->pr_mtx); 995 /* Make sure we're caught up with the newly-set low water mark. */ 996 if (POOL_NEEDS_CATCHUP(pp) && pool_catchup(pp) != 0) { 997 /* 998 * XXX: Should we log a warning? Should we set up a timeout 999 * to try again in a second or so? The latter could break 1000 * a caller's assumptions about interrupt protection, etc. 1001 */ 1002 } 1003 mtx_leave(&pp->pr_mtx); 1004 } 1005 1006 void 1007 pool_sethiwat(struct pool *pp, int n) 1008 { 1009 1010 pp->pr_maxpages = (n == 0) 1011 ? 0 1012 : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 1013 } 1014 1015 int 1016 pool_sethardlimit(struct pool *pp, u_int n, const char *warnmsg, int ratecap) 1017 { 1018 int error = 0; 1019 1020 if (n < pp->pr_nout) { 1021 error = EINVAL; 1022 goto done; 1023 } 1024 1025 pp->pr_hardlimit = n; 1026 pp->pr_hardlimit_warning = warnmsg; 1027 pp->pr_hardlimit_ratecap.tv_sec = ratecap; 1028 pp->pr_hardlimit_warning_last.tv_sec = 0; 1029 pp->pr_hardlimit_warning_last.tv_usec = 0; 1030 1031 done: 1032 return (error); 1033 } 1034 1035 void 1036 pool_set_constraints(struct pool *pp, const struct kmem_pa_mode *mode) 1037 { 1038 pp->pr_crange = mode; 1039 } 1040 1041 void 1042 pool_set_ctordtor(struct pool *pp, int (*ctor)(void *, void *, int), 1043 void (*dtor)(void *, void *), void *arg) 1044 { 1045 pp->pr_ctor = ctor; 1046 pp->pr_dtor = dtor; 1047 pp->pr_arg = arg; 1048 } 1049 /* 1050 * Release all complete pages that have not been used recently. 1051 * 1052 * Returns non-zero if any pages have been reclaimed. 1053 */ 1054 int 1055 pool_reclaim(struct pool *pp) 1056 { 1057 struct pool_item_header *ph, *phnext; 1058 struct pool_pagelist pq; 1059 1060 LIST_INIT(&pq); 1061 1062 mtx_enter(&pp->pr_mtx); 1063 for (ph = LIST_FIRST(&pp->pr_emptypages); ph != NULL; ph = phnext) { 1064 phnext = LIST_NEXT(ph, ph_pagelist); 1065 1066 /* Check our minimum page claim */ 1067 if (pp->pr_npages <= pp->pr_minpages) 1068 break; 1069 1070 KASSERT(ph->ph_nmissing == 0); 1071 1072 /* 1073 * If freeing this page would put us below 1074 * the low water mark, stop now. 1075 */ 1076 if ((pp->pr_nitems - pp->pr_itemsperpage) < 1077 pp->pr_minitems) 1078 break; 1079 1080 pr_rmpage(pp, ph, &pq); 1081 } 1082 mtx_leave(&pp->pr_mtx); 1083 1084 if (LIST_EMPTY(&pq)) 1085 return (0); 1086 while ((ph = LIST_FIRST(&pq)) != NULL) { 1087 LIST_REMOVE(ph, ph_pagelist); 1088 pool_allocator_free(pp, ph->ph_page); 1089 if (pp->pr_roflags & PR_PHINPAGE) 1090 continue; 1091 pool_put(&phpool, ph); 1092 } 1093 1094 return (1); 1095 } 1096 1097 /* 1098 * Release all complete pages that have not been used recently 1099 * from all pools. 1100 */ 1101 void 1102 pool_reclaim_all(void) 1103 { 1104 struct pool *pp; 1105 TAILQ_FOREACH(pp, &pool_head, pr_poollist) 1106 pool_reclaim(pp); 1107 } 1108 1109 #ifdef DDB 1110 #include <machine/db_machdep.h> 1111 #include <ddb/db_interface.h> 1112 #include <ddb/db_output.h> 1113 1114 /* 1115 * Diagnostic helpers. 1116 */ 1117 void 1118 pool_printit(struct pool *pp, const char *modif, int (*pr)(const char *, ...)) 1119 { 1120 pool_print1(pp, modif, pr); 1121 } 1122 1123 void 1124 pool_print_pagelist(struct pool_pagelist *pl, int (*pr)(const char *, ...)) 1125 { 1126 struct pool_item_header *ph; 1127 #ifdef DIAGNOSTIC 1128 struct pool_item *pi; 1129 #endif 1130 1131 LIST_FOREACH(ph, pl, ph_pagelist) { 1132 (*pr)("\t\tpage %p, nmissing %d\n", 1133 ph->ph_page, ph->ph_nmissing); 1134 #ifdef DIAGNOSTIC 1135 TAILQ_FOREACH(pi, &ph->ph_itemlist, pi_list) { 1136 if (pi->pi_magic != PI_MAGIC) { 1137 (*pr)("\t\t\titem %p, magic 0x%x\n", 1138 pi, pi->pi_magic); 1139 } 1140 } 1141 #endif 1142 } 1143 } 1144 1145 void 1146 pool_print1(struct pool *pp, const char *modif, int (*pr)(const char *, ...)) 1147 { 1148 struct pool_item_header *ph; 1149 int print_pagelist = 0; 1150 char c; 1151 1152 while ((c = *modif++) != '\0') { 1153 if (c == 'p') 1154 print_pagelist = 1; 1155 modif++; 1156 } 1157 1158 (*pr)("POOL %s: size %u, align %u, ioff %u, roflags 0x%08x\n", 1159 pp->pr_wchan, pp->pr_size, pp->pr_align, pp->pr_itemoffset, 1160 pp->pr_roflags); 1161 (*pr)("\talloc %p\n", pp->pr_alloc); 1162 (*pr)("\tminitems %u, minpages %u, maxpages %u, npages %u\n", 1163 pp->pr_minitems, pp->pr_minpages, pp->pr_maxpages, pp->pr_npages); 1164 (*pr)("\titemsperpage %u, nitems %u, nout %u, hardlimit %u\n", 1165 pp->pr_itemsperpage, pp->pr_nitems, pp->pr_nout, pp->pr_hardlimit); 1166 1167 (*pr)("\n\tnget %lu, nfail %lu, nput %lu\n", 1168 pp->pr_nget, pp->pr_nfail, pp->pr_nput); 1169 (*pr)("\tnpagealloc %lu, npagefree %lu, hiwat %u, nidle %lu\n", 1170 pp->pr_npagealloc, pp->pr_npagefree, pp->pr_hiwat, pp->pr_nidle); 1171 1172 if (print_pagelist == 0) 1173 return; 1174 1175 if ((ph = LIST_FIRST(&pp->pr_emptypages)) != NULL) 1176 (*pr)("\n\tempty page list:\n"); 1177 pool_print_pagelist(&pp->pr_emptypages, pr); 1178 if ((ph = LIST_FIRST(&pp->pr_fullpages)) != NULL) 1179 (*pr)("\n\tfull page list:\n"); 1180 pool_print_pagelist(&pp->pr_fullpages, pr); 1181 if ((ph = LIST_FIRST(&pp->pr_partpages)) != NULL) 1182 (*pr)("\n\tpartial-page list:\n"); 1183 pool_print_pagelist(&pp->pr_partpages, pr); 1184 1185 if (pp->pr_curpage == NULL) 1186 (*pr)("\tno current page\n"); 1187 else 1188 (*pr)("\tcurpage %p\n", pp->pr_curpage->ph_page); 1189 } 1190 1191 void 1192 db_show_all_pools(db_expr_t expr, int haddr, db_expr_t count, char *modif) 1193 { 1194 struct pool *pp; 1195 char maxp[16]; 1196 int ovflw; 1197 char mode; 1198 1199 mode = modif[0]; 1200 if (mode != '\0' && mode != 'a') { 1201 db_printf("usage: show all pools [/a]\n"); 1202 return; 1203 } 1204 1205 if (mode == '\0') 1206 db_printf("%-10s%4s%9s%5s%9s%6s%6s%6s%6s%6s%6s%5s\n", 1207 "Name", 1208 "Size", 1209 "Requests", 1210 "Fail", 1211 "Releases", 1212 "Pgreq", 1213 "Pgrel", 1214 "Npage", 1215 "Hiwat", 1216 "Minpg", 1217 "Maxpg", 1218 "Idle"); 1219 else 1220 db_printf("%-10s %18s %18s\n", 1221 "Name", "Address", "Allocator"); 1222 1223 TAILQ_FOREACH(pp, &pool_head, pr_poollist) { 1224 if (mode == 'a') { 1225 db_printf("%-10s %18p %18p\n", pp->pr_wchan, pp, 1226 pp->pr_alloc); 1227 continue; 1228 } 1229 1230 if (!pp->pr_nget) 1231 continue; 1232 1233 if (pp->pr_maxpages == UINT_MAX) 1234 snprintf(maxp, sizeof maxp, "inf"); 1235 else 1236 snprintf(maxp, sizeof maxp, "%u", pp->pr_maxpages); 1237 1238 #define PRWORD(ovflw, fmt, width, fixed, val) do { \ 1239 (ovflw) += db_printf((fmt), \ 1240 (width) - (fixed) - (ovflw) > 0 ? \ 1241 (width) - (fixed) - (ovflw) : 0, \ 1242 (val)) - (width); \ 1243 if ((ovflw) < 0) \ 1244 (ovflw) = 0; \ 1245 } while (/* CONSTCOND */0) 1246 1247 ovflw = 0; 1248 PRWORD(ovflw, "%-*s", 10, 0, pp->pr_wchan); 1249 PRWORD(ovflw, " %*u", 4, 1, pp->pr_size); 1250 PRWORD(ovflw, " %*lu", 9, 1, pp->pr_nget); 1251 PRWORD(ovflw, " %*lu", 5, 1, pp->pr_nfail); 1252 PRWORD(ovflw, " %*lu", 9, 1, pp->pr_nput); 1253 PRWORD(ovflw, " %*lu", 6, 1, pp->pr_npagealloc); 1254 PRWORD(ovflw, " %*lu", 6, 1, pp->pr_npagefree); 1255 PRWORD(ovflw, " %*d", 6, 1, pp->pr_npages); 1256 PRWORD(ovflw, " %*d", 6, 1, pp->pr_hiwat); 1257 PRWORD(ovflw, " %*d", 6, 1, pp->pr_minpages); 1258 PRWORD(ovflw, " %*s", 6, 1, maxp); 1259 PRWORD(ovflw, " %*lu\n", 5, 1, pp->pr_nidle); 1260 1261 pool_chk(pp); 1262 } 1263 } 1264 1265 int 1266 pool_chk_page(struct pool *pp, struct pool_item_header *ph, int expected) 1267 { 1268 struct pool_item *pi; 1269 caddr_t page; 1270 int n; 1271 #if defined(DIAGNOSTIC) && defined(POOL_DEBUG) 1272 int i, *ip; 1273 #endif 1274 const char *label = pp->pr_wchan; 1275 1276 page = (caddr_t)((u_long)ph & pp->pr_alloc->pa_pagemask); 1277 if (page != ph->ph_page && 1278 (pp->pr_roflags & PR_PHINPAGE) != 0) { 1279 printf("%s: ", label); 1280 printf("pool(%p:%s): page inconsistency: page %p; " 1281 "at page head addr %p (p %p)\n", 1282 pp, pp->pr_wchan, ph->ph_page, ph, page); 1283 return 1; 1284 } 1285 1286 for (pi = TAILQ_FIRST(&ph->ph_itemlist), n = 0; 1287 pi != NULL; 1288 pi = TAILQ_NEXT(pi,pi_list), n++) { 1289 1290 #ifdef DIAGNOSTIC 1291 if (pi->pi_magic != PI_MAGIC) { 1292 printf("%s: ", label); 1293 printf("pool(%s): free list modified: " 1294 "page %p; item ordinal %d; addr %p " 1295 "(p %p); offset 0x%x=0x%x\n", 1296 pp->pr_wchan, ph->ph_page, n, pi, page, 1297 0, pi->pi_magic); 1298 } 1299 #ifdef POOL_DEBUG 1300 if (pool_debug && ph->ph_magic) { 1301 for (ip = (int *)pi, i = sizeof(*pi) / sizeof(int); 1302 i < pp->pr_size / sizeof(int); i++) { 1303 if (ip[i] != ph->ph_magic) { 1304 printf("pool(%s): free list modified: " 1305 "page %p; item ordinal %d; addr %p " 1306 "(p %p); offset 0x%x=0x%x\n", 1307 pp->pr_wchan, ph->ph_page, n, pi, 1308 page, i * sizeof(int), ip[i]); 1309 } 1310 } 1311 } 1312 1313 #endif /* POOL_DEBUG */ 1314 #endif /* DIAGNOSTIC */ 1315 page = 1316 (caddr_t)((u_long)pi & pp->pr_alloc->pa_pagemask); 1317 if (page == ph->ph_page) 1318 continue; 1319 1320 printf("%s: ", label); 1321 printf("pool(%p:%s): page inconsistency: page %p;" 1322 " item ordinal %d; addr %p (p %p)\n", pp, 1323 pp->pr_wchan, ph->ph_page, n, pi, page); 1324 return 1; 1325 } 1326 if (n + ph->ph_nmissing != pp->pr_itemsperpage) { 1327 printf("pool(%p:%s): page inconsistency: page %p;" 1328 " %d on list, %d missing, %d items per page\n", pp, 1329 pp->pr_wchan, ph->ph_page, n, ph->ph_nmissing, 1330 pp->pr_itemsperpage); 1331 return 1; 1332 } 1333 if (expected >= 0 && n != expected) { 1334 printf("pool(%p:%s): page inconsistency: page %p;" 1335 " %d on list, %d missing, %d expected\n", pp, 1336 pp->pr_wchan, ph->ph_page, n, ph->ph_nmissing, 1337 expected); 1338 return 1; 1339 } 1340 return 0; 1341 } 1342 1343 int 1344 pool_chk(struct pool *pp) 1345 { 1346 struct pool_item_header *ph; 1347 int r = 0; 1348 1349 LIST_FOREACH(ph, &pp->pr_emptypages, ph_pagelist) 1350 r += pool_chk_page(pp, ph, pp->pr_itemsperpage); 1351 LIST_FOREACH(ph, &pp->pr_fullpages, ph_pagelist) 1352 r += pool_chk_page(pp, ph, 0); 1353 LIST_FOREACH(ph, &pp->pr_partpages, ph_pagelist) 1354 r += pool_chk_page(pp, ph, -1); 1355 1356 return (r); 1357 } 1358 1359 void 1360 pool_walk(struct pool *pp, int full, int (*pr)(const char *, ...), 1361 void (*func)(void *, int, int (*)(const char *, ...))) 1362 { 1363 struct pool_item_header *ph; 1364 struct pool_item *pi; 1365 caddr_t cp; 1366 int n; 1367 1368 LIST_FOREACH(ph, &pp->pr_fullpages, ph_pagelist) { 1369 cp = ph->ph_colored; 1370 n = ph->ph_nmissing; 1371 1372 while (n--) { 1373 func(cp, full, pr); 1374 cp += pp->pr_size; 1375 } 1376 } 1377 1378 LIST_FOREACH(ph, &pp->pr_partpages, ph_pagelist) { 1379 cp = ph->ph_colored; 1380 n = ph->ph_nmissing; 1381 1382 do { 1383 TAILQ_FOREACH(pi, &ph->ph_itemlist, pi_list) { 1384 if (cp == (caddr_t)pi) 1385 break; 1386 } 1387 if (cp != (caddr_t)pi) { 1388 func(cp, full, pr); 1389 n--; 1390 } 1391 1392 cp += pp->pr_size; 1393 } while (n > 0); 1394 } 1395 } 1396 #endif 1397 1398 /* 1399 * We have three different sysctls. 1400 * kern.pool.npools - the number of pools. 1401 * kern.pool.pool.<pool#> - the pool struct for the pool#. 1402 * kern.pool.name.<pool#> - the name for pool#. 1403 */ 1404 int 1405 sysctl_dopool(int *name, u_int namelen, char *where, size_t *sizep) 1406 { 1407 struct pool *pp, *foundpool = NULL; 1408 size_t buflen = where != NULL ? *sizep : 0; 1409 int npools = 0, s; 1410 unsigned int lookfor; 1411 size_t len; 1412 1413 switch (*name) { 1414 case KERN_POOL_NPOOLS: 1415 if (namelen != 1 || buflen != sizeof(int)) 1416 return (EINVAL); 1417 lookfor = 0; 1418 break; 1419 case KERN_POOL_NAME: 1420 if (namelen != 2 || buflen < 1) 1421 return (EINVAL); 1422 lookfor = name[1]; 1423 break; 1424 case KERN_POOL_POOL: 1425 if (namelen != 2 || buflen != sizeof(struct pool)) 1426 return (EINVAL); 1427 lookfor = name[1]; 1428 break; 1429 default: 1430 return (EINVAL); 1431 } 1432 1433 s = splvm(); 1434 1435 TAILQ_FOREACH(pp, &pool_head, pr_poollist) { 1436 npools++; 1437 if (lookfor == pp->pr_serial) { 1438 foundpool = pp; 1439 break; 1440 } 1441 } 1442 1443 splx(s); 1444 1445 if (*name != KERN_POOL_NPOOLS && foundpool == NULL) 1446 return (ENOENT); 1447 1448 switch (*name) { 1449 case KERN_POOL_NPOOLS: 1450 return copyout(&npools, where, buflen); 1451 case KERN_POOL_NAME: 1452 len = strlen(foundpool->pr_wchan) + 1; 1453 if (*sizep < len) 1454 return (ENOMEM); 1455 *sizep = len; 1456 return copyout(foundpool->pr_wchan, where, len); 1457 case KERN_POOL_POOL: 1458 return copyout(foundpool, where, buflen); 1459 } 1460 /* NOTREACHED */ 1461 return (0); /* XXX - Stupid gcc */ 1462 } 1463 1464 /* 1465 * Pool backend allocators. 1466 * 1467 * Each pool has a backend allocator that handles allocation, deallocation 1468 */ 1469 void *pool_page_alloc(struct pool *, int, int *); 1470 void pool_page_free(struct pool *, void *); 1471 1472 /* 1473 * safe for interrupts, name preserved for compat this is the default 1474 * allocator 1475 */ 1476 struct pool_allocator pool_allocator_nointr = { 1477 pool_page_alloc, pool_page_free, 0, 1478 }; 1479 1480 /* 1481 * XXX - we have at least three different resources for the same allocation 1482 * and each resource can be depleted. First we have the ready elements in 1483 * the pool. Then we have the resource (typically a vm_map) for this 1484 * allocator, then we have physical memory. Waiting for any of these can 1485 * be unnecessary when any other is freed, but the kernel doesn't support 1486 * sleeping on multiple addresses, so we have to fake. The caller sleeps on 1487 * the pool (so that we can be awakened when an item is returned to the pool), 1488 * but we set PA_WANT on the allocator. When a page is returned to 1489 * the allocator and PA_WANT is set pool_allocator_free will wakeup all 1490 * sleeping pools belonging to this allocator. (XXX - thundering herd). 1491 * We also wake up the allocator in case someone without a pool (malloc) 1492 * is sleeping waiting for this allocator. 1493 */ 1494 1495 void * 1496 pool_allocator_alloc(struct pool *pp, int flags, int *slowdown) 1497 { 1498 boolean_t waitok = (flags & PR_WAITOK) ? TRUE : FALSE; 1499 void *v; 1500 1501 if (waitok) 1502 mtx_leave(&pp->pr_mtx); 1503 v = pp->pr_alloc->pa_alloc(pp, flags, slowdown); 1504 if (waitok) 1505 mtx_enter(&pp->pr_mtx); 1506 1507 return (v); 1508 } 1509 1510 void 1511 pool_allocator_free(struct pool *pp, void *v) 1512 { 1513 struct pool_allocator *pa = pp->pr_alloc; 1514 1515 (*pa->pa_free)(pp, v); 1516 } 1517 1518 void * 1519 pool_page_alloc(struct pool *pp, int flags, int *slowdown) 1520 { 1521 struct kmem_dyn_mode kd = KMEM_DYN_INITIALIZER; 1522 1523 kd.kd_waitok = (flags & PR_WAITOK); 1524 kd.kd_slowdown = slowdown; 1525 1526 return (km_alloc(PAGE_SIZE, &kv_page, pp->pr_crange, &kd)); 1527 } 1528 1529 void 1530 pool_page_free(struct pool *pp, void *v) 1531 { 1532 km_free(v, PAGE_SIZE, &kv_page, pp->pr_crange); 1533 } 1534 1535 void * 1536 pool_large_alloc(struct pool *pp, int flags, int *slowdown) 1537 { 1538 struct kmem_dyn_mode kd = KMEM_DYN_INITIALIZER; 1539 void *v; 1540 int s; 1541 1542 kd.kd_waitok = (flags & PR_WAITOK); 1543 kd.kd_slowdown = slowdown; 1544 1545 s = splvm(); 1546 v = km_alloc(pp->pr_alloc->pa_pagesz, &kv_intrsafe, pp->pr_crange, 1547 &kd); 1548 splx(s); 1549 1550 return (v); 1551 } 1552 1553 void 1554 pool_large_free(struct pool *pp, void *v) 1555 { 1556 int s; 1557 1558 s = splvm(); 1559 km_free(v, pp->pr_alloc->pa_pagesz, &kv_intrsafe, pp->pr_crange); 1560 splx(s); 1561 } 1562 1563 void * 1564 pool_large_alloc_ni(struct pool *pp, int flags, int *slowdown) 1565 { 1566 struct kmem_dyn_mode kd = KMEM_DYN_INITIALIZER; 1567 1568 kd.kd_waitok = (flags & PR_WAITOK); 1569 kd.kd_slowdown = slowdown; 1570 1571 return (km_alloc(pp->pr_alloc->pa_pagesz, &kv_any, pp->pr_crange, &kd)); 1572 } 1573 1574 void 1575 pool_large_free_ni(struct pool *pp, void *v) 1576 { 1577 km_free(v, pp->pr_alloc->pa_pagesz, &kv_any, pp->pr_crange); 1578 } 1579