1 /* $OpenBSD: subr_pool.c,v 1.112 2012/12/24 19:43:11 guenther Exp $ */ 2 /* $NetBSD: subr_pool.c,v 1.61 2001/09/26 07:14:56 chs Exp $ */ 3 4 /*- 5 * Copyright (c) 1997, 1999, 2000 The NetBSD Foundation, Inc. 6 * All rights reserved. 7 * 8 * This code is derived from software contributed to The NetBSD Foundation 9 * by Paul Kranenburg; by Jason R. Thorpe of the Numerical Aerospace 10 * Simulation Facility, NASA Ames Research Center. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 23 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 24 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 25 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31 * POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include <sys/param.h> 35 #include <sys/systm.h> 36 #include <sys/proc.h> 37 #include <sys/errno.h> 38 #include <sys/kernel.h> 39 #include <sys/malloc.h> 40 #include <sys/pool.h> 41 #include <sys/syslog.h> 42 #include <sys/sysctl.h> 43 44 #include <uvm/uvm.h> 45 #include <dev/rndvar.h> 46 47 /* 48 * Pool resource management utility. 49 * 50 * Memory is allocated in pages which are split into pieces according to 51 * the pool item size. Each page is kept on one of three lists in the 52 * pool structure: `pr_emptypages', `pr_fullpages' and `pr_partpages', 53 * for empty, full and partially-full pages respectively. The individual 54 * pool items are on a linked list headed by `ph_itemlist' in each page 55 * header. The memory for building the page list is either taken from 56 * the allocated pages themselves (for small pool items) or taken from 57 * an internal pool of page headers (`phpool'). 58 */ 59 60 /* List of all pools */ 61 TAILQ_HEAD(,pool) pool_head = TAILQ_HEAD_INITIALIZER(pool_head); 62 63 /* Private pool for page header structures */ 64 struct pool phpool; 65 66 struct pool_item_header { 67 /* Page headers */ 68 LIST_ENTRY(pool_item_header) 69 ph_pagelist; /* pool page list */ 70 TAILQ_HEAD(,pool_item) ph_itemlist; /* chunk list for this page */ 71 RB_ENTRY(pool_item_header) 72 ph_node; /* Off-page page headers */ 73 int ph_nmissing; /* # of chunks in use */ 74 caddr_t ph_page; /* this page's address */ 75 caddr_t ph_colored; /* page's colored address */ 76 int ph_pagesize; 77 int ph_magic; 78 }; 79 80 struct pool_item { 81 #ifdef DIAGNOSTIC 82 u_int32_t pi_magic; 83 #endif 84 /* Other entries use only this list entry */ 85 TAILQ_ENTRY(pool_item) pi_list; 86 }; 87 88 #ifdef DEADBEEF1 89 #define PI_MAGIC DEADBEEF1 90 #else 91 #define PI_MAGIC 0xdeafbeef 92 #endif 93 94 #ifdef POOL_DEBUG 95 int pool_debug = 1; 96 #else 97 int pool_debug = 0; 98 #endif 99 100 #define POOL_NEEDS_CATCHUP(pp) \ 101 ((pp)->pr_nitems < (pp)->pr_minitems) 102 103 /* 104 * Every pool gets a unique serial number assigned to it. If this counter 105 * wraps, we're screwed, but we shouldn't create so many pools anyway. 106 */ 107 unsigned int pool_serial; 108 109 int pool_catchup(struct pool *); 110 void pool_prime_page(struct pool *, caddr_t, struct pool_item_header *); 111 void pool_update_curpage(struct pool *); 112 void *pool_do_get(struct pool *, int); 113 void pool_do_put(struct pool *, void *); 114 void pr_rmpage(struct pool *, struct pool_item_header *, 115 struct pool_pagelist *); 116 int pool_chk_page(struct pool *, struct pool_item_header *, int); 117 int pool_chk(struct pool *); 118 struct pool_item_header *pool_alloc_item_header(struct pool *, caddr_t , int); 119 120 void *pool_allocator_alloc(struct pool *, int, int *); 121 void pool_allocator_free(struct pool *, void *); 122 123 /* 124 * XXX - quick hack. For pools with large items we want to use a special 125 * allocator. For now, instead of having the allocator figure out 126 * the allocation size from the pool (which can be done trivially 127 * with round_page(pr_itemsperpage * pr_size)) which would require 128 * lots of changes everywhere, we just create allocators for each 129 * size. We limit those to 128 pages. 130 */ 131 #define POOL_LARGE_MAXPAGES 128 132 struct pool_allocator pool_allocator_large[POOL_LARGE_MAXPAGES]; 133 struct pool_allocator pool_allocator_large_ni[POOL_LARGE_MAXPAGES]; 134 void *pool_large_alloc(struct pool *, int, int *); 135 void pool_large_free(struct pool *, void *); 136 void *pool_large_alloc_ni(struct pool *, int, int *); 137 void pool_large_free_ni(struct pool *, void *); 138 139 140 #ifdef DDB 141 void pool_print_pagelist(struct pool_pagelist *, 142 int (*)(const char *, ...)); 143 void pool_print1(struct pool *, const char *, int (*)(const char *, ...)); 144 #endif 145 146 #define pool_sleep(pl) msleep(pl, &pl->pr_mtx, PSWP, pl->pr_wchan, 0) 147 148 static __inline int 149 phtree_compare(struct pool_item_header *a, struct pool_item_header *b) 150 { 151 long diff = (vaddr_t)a->ph_page - (vaddr_t)b->ph_page; 152 if (diff < 0) 153 return -(-diff >= a->ph_pagesize); 154 else if (diff > 0) 155 return (diff >= b->ph_pagesize); 156 else 157 return (0); 158 } 159 160 RB_PROTOTYPE(phtree, pool_item_header, ph_node, phtree_compare); 161 RB_GENERATE(phtree, pool_item_header, ph_node, phtree_compare); 162 163 /* 164 * Return the pool page header based on page address. 165 */ 166 static __inline struct pool_item_header * 167 pr_find_pagehead(struct pool *pp, void *v) 168 { 169 struct pool_item_header *ph, tmp; 170 171 if ((pp->pr_roflags & PR_PHINPAGE) != 0) { 172 caddr_t page; 173 174 page = (caddr_t)((vaddr_t)v & pp->pr_alloc->pa_pagemask); 175 176 return ((struct pool_item_header *)(page + pp->pr_phoffset)); 177 } 178 179 /* 180 * The trick we're using in the tree compare function is to compare 181 * two elements equal when they overlap. We want to return the 182 * page header that belongs to the element just before this address. 183 * We don't want this element to compare equal to the next element, 184 * so the compare function takes the pagesize from the lower element. 185 * If this header is the lower, its pagesize is zero, so it can't 186 * overlap with the next header. But if the header we're looking for 187 * is lower, we'll use its pagesize and it will overlap and return 188 * equal. 189 */ 190 tmp.ph_page = v; 191 tmp.ph_pagesize = 0; 192 ph = RB_FIND(phtree, &pp->pr_phtree, &tmp); 193 194 if (ph) { 195 KASSERT(ph->ph_page <= (caddr_t)v); 196 KASSERT(ph->ph_page + ph->ph_pagesize > (caddr_t)v); 197 } 198 return ph; 199 } 200 201 /* 202 * Remove a page from the pool. 203 */ 204 void 205 pr_rmpage(struct pool *pp, struct pool_item_header *ph, 206 struct pool_pagelist *pq) 207 { 208 209 /* 210 * If the page was idle, decrement the idle page count. 211 */ 212 if (ph->ph_nmissing == 0) { 213 #ifdef DIAGNOSTIC 214 if (pp->pr_nidle == 0) 215 panic("pr_rmpage: nidle inconsistent"); 216 if (pp->pr_nitems < pp->pr_itemsperpage) 217 panic("pr_rmpage: nitems inconsistent"); 218 #endif 219 pp->pr_nidle--; 220 } 221 222 pp->pr_nitems -= pp->pr_itemsperpage; 223 224 /* 225 * Unlink a page from the pool and release it (or queue it for release). 226 */ 227 LIST_REMOVE(ph, ph_pagelist); 228 if ((pp->pr_roflags & PR_PHINPAGE) == 0) 229 RB_REMOVE(phtree, &pp->pr_phtree, ph); 230 pp->pr_npages--; 231 pp->pr_npagefree++; 232 pool_update_curpage(pp); 233 234 if (pq) { 235 LIST_INSERT_HEAD(pq, ph, ph_pagelist); 236 } else { 237 pool_allocator_free(pp, ph->ph_page); 238 if ((pp->pr_roflags & PR_PHINPAGE) == 0) 239 pool_put(&phpool, ph); 240 } 241 } 242 243 /* 244 * Initialize the given pool resource structure. 245 * 246 * We export this routine to allow other kernel parts to declare 247 * static pools that must be initialized before malloc() is available. 248 */ 249 void 250 pool_init(struct pool *pp, size_t size, u_int align, u_int ioff, int flags, 251 const char *wchan, struct pool_allocator *palloc) 252 { 253 int off, slack; 254 255 #ifdef MALLOC_DEBUG 256 if ((flags & PR_DEBUG) && (ioff != 0 || align != 0)) 257 flags &= ~PR_DEBUG; 258 #endif 259 /* 260 * Check arguments and construct default values. 261 */ 262 if (palloc == NULL) { 263 if (size > PAGE_SIZE) { 264 int psize; 265 266 /* 267 * XXX - should take align into account as well. 268 */ 269 if (size == round_page(size)) 270 psize = size / PAGE_SIZE; 271 else 272 psize = PAGE_SIZE / roundup(size % PAGE_SIZE, 273 1024); 274 if (psize > POOL_LARGE_MAXPAGES) 275 psize = POOL_LARGE_MAXPAGES; 276 if (flags & PR_WAITOK) 277 palloc = &pool_allocator_large_ni[psize-1]; 278 else 279 palloc = &pool_allocator_large[psize-1]; 280 if (palloc->pa_pagesz == 0) { 281 palloc->pa_pagesz = psize * PAGE_SIZE; 282 if (flags & PR_WAITOK) { 283 palloc->pa_alloc = pool_large_alloc_ni; 284 palloc->pa_free = pool_large_free_ni; 285 } else { 286 palloc->pa_alloc = pool_large_alloc; 287 palloc->pa_free = pool_large_free; 288 } 289 } 290 } else { 291 palloc = &pool_allocator_nointr; 292 } 293 } 294 if (palloc->pa_pagesz == 0) { 295 palloc->pa_pagesz = PAGE_SIZE; 296 } 297 if (palloc->pa_pagemask == 0) { 298 palloc->pa_pagemask = ~(palloc->pa_pagesz - 1); 299 palloc->pa_pageshift = ffs(palloc->pa_pagesz) - 1; 300 } 301 302 if (align == 0) 303 align = ALIGN(1); 304 305 if (size < sizeof(struct pool_item)) 306 size = sizeof(struct pool_item); 307 308 size = roundup(size, align); 309 #ifdef DIAGNOSTIC 310 if (size > palloc->pa_pagesz) 311 panic("pool_init: pool item size (%lu) too large", 312 (u_long)size); 313 #endif 314 315 /* 316 * Initialize the pool structure. 317 */ 318 LIST_INIT(&pp->pr_emptypages); 319 LIST_INIT(&pp->pr_fullpages); 320 LIST_INIT(&pp->pr_partpages); 321 pp->pr_curpage = NULL; 322 pp->pr_npages = 0; 323 pp->pr_minitems = 0; 324 pp->pr_minpages = 0; 325 pp->pr_maxpages = 8; 326 pp->pr_roflags = flags; 327 pp->pr_flags = 0; 328 pp->pr_size = size; 329 pp->pr_align = align; 330 pp->pr_wchan = wchan; 331 pp->pr_alloc = palloc; 332 pp->pr_nitems = 0; 333 pp->pr_nout = 0; 334 pp->pr_hardlimit = UINT_MAX; 335 pp->pr_hardlimit_warning = NULL; 336 pp->pr_hardlimit_ratecap.tv_sec = 0; 337 pp->pr_hardlimit_ratecap.tv_usec = 0; 338 pp->pr_hardlimit_warning_last.tv_sec = 0; 339 pp->pr_hardlimit_warning_last.tv_usec = 0; 340 pp->pr_serial = ++pool_serial; 341 if (pool_serial == 0) 342 panic("pool_init: too much uptime"); 343 344 /* constructor, destructor, and arg */ 345 pp->pr_ctor = NULL; 346 pp->pr_dtor = NULL; 347 pp->pr_arg = NULL; 348 349 /* 350 * Decide whether to put the page header off page to avoid 351 * wasting too large a part of the page. Off-page page headers 352 * go into an RB tree, so we can match a returned item with 353 * its header based on the page address. 354 * We use 1/16 of the page size as the threshold (XXX: tune) 355 */ 356 if (pp->pr_size < palloc->pa_pagesz/16 && pp->pr_size < PAGE_SIZE) { 357 /* Use the end of the page for the page header */ 358 pp->pr_roflags |= PR_PHINPAGE; 359 pp->pr_phoffset = off = palloc->pa_pagesz - 360 ALIGN(sizeof(struct pool_item_header)); 361 } else { 362 /* The page header will be taken from our page header pool */ 363 pp->pr_phoffset = 0; 364 off = palloc->pa_pagesz; 365 RB_INIT(&pp->pr_phtree); 366 } 367 368 /* 369 * Alignment is to take place at `ioff' within the item. This means 370 * we must reserve up to `align - 1' bytes on the page to allow 371 * appropriate positioning of each item. 372 * 373 * Silently enforce `0 <= ioff < align'. 374 */ 375 pp->pr_itemoffset = ioff = ioff % align; 376 pp->pr_itemsperpage = (off - ((align - ioff) % align)) / pp->pr_size; 377 KASSERT(pp->pr_itemsperpage != 0); 378 379 /* 380 * Use the slack between the chunks and the page header 381 * for "cache coloring". 382 */ 383 slack = off - pp->pr_itemsperpage * pp->pr_size; 384 pp->pr_maxcolor = (slack / align) * align; 385 pp->pr_curcolor = 0; 386 387 pp->pr_nget = 0; 388 pp->pr_nfail = 0; 389 pp->pr_nput = 0; 390 pp->pr_npagealloc = 0; 391 pp->pr_npagefree = 0; 392 pp->pr_hiwat = 0; 393 pp->pr_nidle = 0; 394 395 pp->pr_ipl = -1; 396 mtx_init(&pp->pr_mtx, IPL_NONE); 397 398 if (phpool.pr_size == 0) { 399 pool_init(&phpool, sizeof(struct pool_item_header), 0, 0, 400 0, "phpool", NULL); 401 pool_setipl(&phpool, IPL_HIGH); 402 } 403 404 /* pglistalloc/constraint parameters */ 405 pp->pr_crange = &kp_dirty; 406 407 /* Insert this into the list of all pools. */ 408 TAILQ_INSERT_HEAD(&pool_head, pp, pr_poollist); 409 } 410 411 void 412 pool_setipl(struct pool *pp, int ipl) 413 { 414 pp->pr_ipl = ipl; 415 mtx_init(&pp->pr_mtx, ipl); 416 } 417 418 /* 419 * Decommission a pool resource. 420 */ 421 void 422 pool_destroy(struct pool *pp) 423 { 424 struct pool_item_header *ph; 425 426 #ifdef DIAGNOSTIC 427 if (pp->pr_nout != 0) 428 panic("pool_destroy: pool busy: still out: %u", pp->pr_nout); 429 #endif 430 431 /* Remove all pages */ 432 while ((ph = LIST_FIRST(&pp->pr_emptypages)) != NULL) 433 pr_rmpage(pp, ph, NULL); 434 KASSERT(LIST_EMPTY(&pp->pr_fullpages)); 435 KASSERT(LIST_EMPTY(&pp->pr_partpages)); 436 437 /* Remove from global pool list */ 438 TAILQ_REMOVE(&pool_head, pp, pr_poollist); 439 } 440 441 struct pool_item_header * 442 pool_alloc_item_header(struct pool *pp, caddr_t storage, int flags) 443 { 444 struct pool_item_header *ph; 445 446 if ((pp->pr_roflags & PR_PHINPAGE) != 0) 447 ph = (struct pool_item_header *)(storage + pp->pr_phoffset); 448 else 449 ph = pool_get(&phpool, (flags & ~(PR_WAITOK | PR_ZERO)) | 450 PR_NOWAIT); 451 if (pool_debug && ph != NULL) 452 ph->ph_magic = PI_MAGIC; 453 return (ph); 454 } 455 456 /* 457 * Grab an item from the pool; must be called at appropriate spl level 458 */ 459 void * 460 pool_get(struct pool *pp, int flags) 461 { 462 void *v; 463 464 KASSERT(flags & (PR_WAITOK | PR_NOWAIT)); 465 466 #ifdef DIAGNOSTIC 467 if ((flags & PR_WAITOK) != 0) 468 assertwaitok(); 469 #endif /* DIAGNOSTIC */ 470 471 mtx_enter(&pp->pr_mtx); 472 #ifdef POOL_DEBUG 473 if (pp->pr_roflags & PR_DEBUGCHK) { 474 if (pool_chk(pp)) 475 panic("before pool_get"); 476 } 477 #endif 478 v = pool_do_get(pp, flags); 479 #ifdef POOL_DEBUG 480 if (pp->pr_roflags & PR_DEBUGCHK) { 481 if (pool_chk(pp)) 482 panic("after pool_get"); 483 } 484 #endif 485 if (v != NULL) 486 pp->pr_nget++; 487 mtx_leave(&pp->pr_mtx); 488 if (v == NULL) 489 return (v); 490 491 if (pp->pr_ctor) { 492 if (flags & PR_ZERO) 493 panic("pool_get: PR_ZERO when ctor set"); 494 if (pp->pr_ctor(pp->pr_arg, v, flags)) { 495 mtx_enter(&pp->pr_mtx); 496 pp->pr_nget--; 497 pool_do_put(pp, v); 498 mtx_leave(&pp->pr_mtx); 499 v = NULL; 500 } 501 } else { 502 if (flags & PR_ZERO) 503 memset(v, 0, pp->pr_size); 504 } 505 return (v); 506 } 507 508 void * 509 pool_do_get(struct pool *pp, int flags) 510 { 511 struct pool_item *pi; 512 struct pool_item_header *ph; 513 void *v; 514 int slowdown = 0; 515 #if defined(DIAGNOSTIC) && defined(POOL_DEBUG) 516 int i, *ip; 517 #endif 518 519 #ifdef MALLOC_DEBUG 520 if (pp->pr_roflags & PR_DEBUG) { 521 void *addr; 522 523 addr = NULL; 524 debug_malloc(pp->pr_size, M_DEBUG, 525 (flags & PR_WAITOK) ? M_WAITOK : M_NOWAIT, &addr); 526 return (addr); 527 } 528 #endif 529 530 startover: 531 /* 532 * Check to see if we've reached the hard limit. If we have, 533 * and we can wait, then wait until an item has been returned to 534 * the pool. 535 */ 536 #ifdef DIAGNOSTIC 537 if (pp->pr_nout > pp->pr_hardlimit) 538 panic("pool_do_get: %s: crossed hard limit", pp->pr_wchan); 539 #endif 540 if (pp->pr_nout == pp->pr_hardlimit) { 541 if ((flags & PR_WAITOK) && !(flags & PR_LIMITFAIL)) { 542 /* 543 * XXX: A warning isn't logged in this case. Should 544 * it be? 545 */ 546 pp->pr_flags |= PR_WANTED; 547 pool_sleep(pp); 548 goto startover; 549 } 550 551 /* 552 * Log a message that the hard limit has been hit. 553 */ 554 if (pp->pr_hardlimit_warning != NULL && 555 ratecheck(&pp->pr_hardlimit_warning_last, 556 &pp->pr_hardlimit_ratecap)) 557 log(LOG_ERR, "%s\n", pp->pr_hardlimit_warning); 558 559 pp->pr_nfail++; 560 return (NULL); 561 } 562 563 /* 564 * The convention we use is that if `curpage' is not NULL, then 565 * it points at a non-empty bucket. In particular, `curpage' 566 * never points at a page header which has PR_PHINPAGE set and 567 * has no items in its bucket. 568 */ 569 if ((ph = pp->pr_curpage) == NULL) { 570 #ifdef DIAGNOSTIC 571 if (pp->pr_nitems != 0) { 572 printf("pool_do_get: %s: curpage NULL, nitems %u\n", 573 pp->pr_wchan, pp->pr_nitems); 574 panic("pool_do_get: nitems inconsistent"); 575 } 576 #endif 577 578 /* 579 * Call the back-end page allocator for more memory. 580 */ 581 v = pool_allocator_alloc(pp, flags, &slowdown); 582 if (v != NULL) 583 ph = pool_alloc_item_header(pp, v, flags); 584 585 if (v == NULL || ph == NULL) { 586 if (v != NULL) 587 pool_allocator_free(pp, v); 588 589 if ((flags & PR_WAITOK) == 0) { 590 pp->pr_nfail++; 591 return (NULL); 592 } 593 594 /* 595 * Wait for items to be returned to this pool. 596 * 597 * XXX: maybe we should wake up once a second and 598 * try again? 599 */ 600 pp->pr_flags |= PR_WANTED; 601 pool_sleep(pp); 602 goto startover; 603 } 604 605 /* We have more memory; add it to the pool */ 606 pool_prime_page(pp, v, ph); 607 pp->pr_npagealloc++; 608 609 if (slowdown && (flags & PR_WAITOK)) { 610 mtx_leave(&pp->pr_mtx); 611 yield(); 612 mtx_enter(&pp->pr_mtx); 613 } 614 615 /* Start the allocation process over. */ 616 goto startover; 617 } 618 if ((v = pi = TAILQ_FIRST(&ph->ph_itemlist)) == NULL) { 619 panic("pool_do_get: %s: page empty", pp->pr_wchan); 620 } 621 #ifdef DIAGNOSTIC 622 if (pp->pr_nitems == 0) { 623 printf("pool_do_get: %s: items on itemlist, nitems %u\n", 624 pp->pr_wchan, pp->pr_nitems); 625 panic("pool_do_get: nitems inconsistent"); 626 } 627 #endif 628 629 #ifdef DIAGNOSTIC 630 if (pi->pi_magic != PI_MAGIC) 631 panic("pool_do_get(%s): free list modified: " 632 "page %p; item addr %p; offset 0x%x=0x%x", 633 pp->pr_wchan, ph->ph_page, pi, 0, pi->pi_magic); 634 #ifdef POOL_DEBUG 635 if (pool_debug && ph->ph_magic) { 636 for (ip = (int *)pi, i = sizeof(*pi) / sizeof(int); 637 i < pp->pr_size / sizeof(int); i++) { 638 if (ip[i] != ph->ph_magic) { 639 panic("pool_do_get(%s): free list modified: " 640 "page %p; item addr %p; offset 0x%x=0x%x", 641 pp->pr_wchan, ph->ph_page, pi, 642 i * sizeof(int), ip[i]); 643 } 644 } 645 } 646 #endif /* POOL_DEBUG */ 647 #endif /* DIAGNOSTIC */ 648 649 /* 650 * Remove from item list. 651 */ 652 TAILQ_REMOVE(&ph->ph_itemlist, pi, pi_list); 653 pp->pr_nitems--; 654 pp->pr_nout++; 655 if (ph->ph_nmissing == 0) { 656 #ifdef DIAGNOSTIC 657 if (pp->pr_nidle == 0) 658 panic("pool_do_get: nidle inconsistent"); 659 #endif 660 pp->pr_nidle--; 661 662 /* 663 * This page was previously empty. Move it to the list of 664 * partially-full pages. This page is already curpage. 665 */ 666 LIST_REMOVE(ph, ph_pagelist); 667 LIST_INSERT_HEAD(&pp->pr_partpages, ph, ph_pagelist); 668 } 669 ph->ph_nmissing++; 670 if (TAILQ_EMPTY(&ph->ph_itemlist)) { 671 #ifdef DIAGNOSTIC 672 if (ph->ph_nmissing != pp->pr_itemsperpage) { 673 panic("pool_do_get: %s: nmissing inconsistent", 674 pp->pr_wchan); 675 } 676 #endif 677 /* 678 * This page is now full. Move it to the full list 679 * and select a new current page. 680 */ 681 LIST_REMOVE(ph, ph_pagelist); 682 LIST_INSERT_HEAD(&pp->pr_fullpages, ph, ph_pagelist); 683 pool_update_curpage(pp); 684 } 685 686 /* 687 * If we have a low water mark and we are now below that low 688 * water mark, add more items to the pool. 689 */ 690 if (POOL_NEEDS_CATCHUP(pp) && pool_catchup(pp) != 0) { 691 /* 692 * XXX: Should we log a warning? Should we set up a timeout 693 * to try again in a second or so? The latter could break 694 * a caller's assumptions about interrupt protection, etc. 695 */ 696 } 697 return (v); 698 } 699 700 /* 701 * Return resource to the pool; must be called at appropriate spl level 702 */ 703 void 704 pool_put(struct pool *pp, void *v) 705 { 706 if (pp->pr_dtor) 707 pp->pr_dtor(pp->pr_arg, v); 708 mtx_enter(&pp->pr_mtx); 709 #ifdef POOL_DEBUG 710 if (pp->pr_roflags & PR_DEBUGCHK) { 711 if (pool_chk(pp)) 712 panic("before pool_put"); 713 } 714 #endif 715 pool_do_put(pp, v); 716 #ifdef POOL_DEBUG 717 if (pp->pr_roflags & PR_DEBUGCHK) { 718 if (pool_chk(pp)) 719 panic("after pool_put"); 720 } 721 #endif 722 pp->pr_nput++; 723 mtx_leave(&pp->pr_mtx); 724 } 725 726 /* 727 * Internal version of pool_put(). 728 */ 729 void 730 pool_do_put(struct pool *pp, void *v) 731 { 732 struct pool_item *pi = v; 733 struct pool_item_header *ph; 734 #if defined(DIAGNOSTIC) && defined(POOL_DEBUG) 735 int i, *ip; 736 #endif 737 738 if (v == NULL) 739 panic("pool_put of NULL"); 740 741 #ifdef MALLOC_DEBUG 742 if (pp->pr_roflags & PR_DEBUG) { 743 debug_free(v, M_DEBUG); 744 return; 745 } 746 #endif 747 748 #ifdef DIAGNOSTIC 749 if (pp->pr_ipl != -1) 750 splassert(pp->pr_ipl); 751 752 if (pp->pr_nout == 0) { 753 printf("pool %s: putting with none out\n", 754 pp->pr_wchan); 755 panic("pool_do_put"); 756 } 757 #endif 758 759 if ((ph = pr_find_pagehead(pp, v)) == NULL) { 760 panic("pool_do_put: %s: page header missing", pp->pr_wchan); 761 } 762 763 /* 764 * Return to item list. 765 */ 766 #ifdef DIAGNOSTIC 767 pi->pi_magic = PI_MAGIC; 768 #ifdef POOL_DEBUG 769 if (ph->ph_magic) { 770 for (ip = (int *)pi, i = sizeof(*pi)/sizeof(int); 771 i < pp->pr_size / sizeof(int); i++) 772 ip[i] = ph->ph_magic; 773 } 774 #endif /* POOL_DEBUG */ 775 #endif /* DIAGNOSTIC */ 776 777 TAILQ_INSERT_HEAD(&ph->ph_itemlist, pi, pi_list); 778 ph->ph_nmissing--; 779 pp->pr_nitems++; 780 pp->pr_nout--; 781 782 /* Cancel "pool empty" condition if it exists */ 783 if (pp->pr_curpage == NULL) 784 pp->pr_curpage = ph; 785 786 if (pp->pr_flags & PR_WANTED) { 787 pp->pr_flags &= ~PR_WANTED; 788 wakeup(pp); 789 } 790 791 /* 792 * If this page is now empty, do one of two things: 793 * 794 * (1) If we have more pages than the page high water mark, 795 * free the page back to the system. 796 * 797 * (2) Otherwise, move the page to the empty page list. 798 * 799 * Either way, select a new current page (so we use a partially-full 800 * page if one is available). 801 */ 802 if (ph->ph_nmissing == 0) { 803 pp->pr_nidle++; 804 if (pp->pr_nidle > pp->pr_maxpages) { 805 pr_rmpage(pp, ph, NULL); 806 } else { 807 LIST_REMOVE(ph, ph_pagelist); 808 LIST_INSERT_HEAD(&pp->pr_emptypages, ph, ph_pagelist); 809 pool_update_curpage(pp); 810 } 811 } 812 813 /* 814 * If the page was previously completely full, move it to the 815 * partially-full list and make it the current page. The next 816 * allocation will get the item from this page, instead of 817 * further fragmenting the pool. 818 */ 819 else if (ph->ph_nmissing == (pp->pr_itemsperpage - 1)) { 820 LIST_REMOVE(ph, ph_pagelist); 821 LIST_INSERT_HEAD(&pp->pr_partpages, ph, ph_pagelist); 822 pp->pr_curpage = ph; 823 } 824 } 825 826 /* 827 * Add N items to the pool. 828 */ 829 int 830 pool_prime(struct pool *pp, int n) 831 { 832 struct pool_item_header *ph; 833 caddr_t cp; 834 int newpages; 835 int slowdown; 836 837 mtx_enter(&pp->pr_mtx); 838 newpages = roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 839 840 while (newpages-- > 0) { 841 cp = pool_allocator_alloc(pp, PR_NOWAIT, &slowdown); 842 if (cp != NULL) 843 ph = pool_alloc_item_header(pp, cp, PR_NOWAIT); 844 if (cp == NULL || ph == NULL) { 845 if (cp != NULL) 846 pool_allocator_free(pp, cp); 847 break; 848 } 849 850 pool_prime_page(pp, cp, ph); 851 pp->pr_npagealloc++; 852 pp->pr_minpages++; 853 } 854 855 if (pp->pr_minpages >= pp->pr_maxpages) 856 pp->pr_maxpages = pp->pr_minpages + 1; /* XXX */ 857 858 mtx_leave(&pp->pr_mtx); 859 return (0); 860 } 861 862 /* 863 * Add a page worth of items to the pool. 864 * 865 * Note, we must be called with the pool descriptor LOCKED. 866 */ 867 void 868 pool_prime_page(struct pool *pp, caddr_t storage, struct pool_item_header *ph) 869 { 870 struct pool_item *pi; 871 caddr_t cp = storage; 872 unsigned int align = pp->pr_align; 873 unsigned int ioff = pp->pr_itemoffset; 874 int n; 875 #if defined(DIAGNOSTIC) && defined(POOL_DEBUG) 876 int i, *ip; 877 #endif 878 879 /* 880 * Insert page header. 881 */ 882 LIST_INSERT_HEAD(&pp->pr_emptypages, ph, ph_pagelist); 883 TAILQ_INIT(&ph->ph_itemlist); 884 ph->ph_page = storage; 885 ph->ph_pagesize = pp->pr_alloc->pa_pagesz; 886 ph->ph_nmissing = 0; 887 if ((pp->pr_roflags & PR_PHINPAGE) == 0) 888 RB_INSERT(phtree, &pp->pr_phtree, ph); 889 890 pp->pr_nidle++; 891 892 /* 893 * Color this page. 894 */ 895 cp = (caddr_t)(cp + pp->pr_curcolor); 896 if ((pp->pr_curcolor += align) > pp->pr_maxcolor) 897 pp->pr_curcolor = 0; 898 899 /* 900 * Adjust storage to apply alignment to `pr_itemoffset' in each item. 901 */ 902 if (ioff != 0) 903 cp = (caddr_t)(cp + (align - ioff)); 904 ph->ph_colored = cp; 905 906 /* 907 * Insert remaining chunks on the bucket list. 908 */ 909 n = pp->pr_itemsperpage; 910 pp->pr_nitems += n; 911 912 while (n--) { 913 pi = (struct pool_item *)cp; 914 915 KASSERT(((((vaddr_t)pi) + ioff) & (align - 1)) == 0); 916 917 /* Insert on page list */ 918 TAILQ_INSERT_TAIL(&ph->ph_itemlist, pi, pi_list); 919 920 #ifdef DIAGNOSTIC 921 pi->pi_magic = PI_MAGIC; 922 #ifdef POOL_DEBUG 923 if (ph->ph_magic) { 924 for (ip = (int *)pi, i = sizeof(*pi)/sizeof(int); 925 i < pp->pr_size / sizeof(int); i++) 926 ip[i] = ph->ph_magic; 927 } 928 #endif /* POOL_DEBUG */ 929 #endif /* DIAGNOSTIC */ 930 cp = (caddr_t)(cp + pp->pr_size); 931 } 932 933 /* 934 * If the pool was depleted, point at the new page. 935 */ 936 if (pp->pr_curpage == NULL) 937 pp->pr_curpage = ph; 938 939 if (++pp->pr_npages > pp->pr_hiwat) 940 pp->pr_hiwat = pp->pr_npages; 941 } 942 943 /* 944 * Used by pool_get() when nitems drops below the low water mark. This 945 * is used to catch up pr_nitems with the low water mark. 946 * 947 * Note we never wait for memory here, we let the caller decide what to do. 948 */ 949 int 950 pool_catchup(struct pool *pp) 951 { 952 struct pool_item_header *ph; 953 caddr_t cp; 954 int error = 0; 955 int slowdown; 956 957 while (POOL_NEEDS_CATCHUP(pp)) { 958 /* 959 * Call the page back-end allocator for more memory. 960 */ 961 cp = pool_allocator_alloc(pp, PR_NOWAIT, &slowdown); 962 if (cp != NULL) 963 ph = pool_alloc_item_header(pp, cp, PR_NOWAIT); 964 if (cp == NULL || ph == NULL) { 965 if (cp != NULL) 966 pool_allocator_free(pp, cp); 967 error = ENOMEM; 968 break; 969 } 970 pool_prime_page(pp, cp, ph); 971 pp->pr_npagealloc++; 972 } 973 974 return (error); 975 } 976 977 void 978 pool_update_curpage(struct pool *pp) 979 { 980 981 pp->pr_curpage = LIST_FIRST(&pp->pr_partpages); 982 if (pp->pr_curpage == NULL) { 983 pp->pr_curpage = LIST_FIRST(&pp->pr_emptypages); 984 } 985 } 986 987 void 988 pool_setlowat(struct pool *pp, int n) 989 { 990 991 pp->pr_minitems = n; 992 pp->pr_minpages = (n == 0) 993 ? 0 994 : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 995 996 mtx_enter(&pp->pr_mtx); 997 /* Make sure we're caught up with the newly-set low water mark. */ 998 if (POOL_NEEDS_CATCHUP(pp) && pool_catchup(pp) != 0) { 999 /* 1000 * XXX: Should we log a warning? Should we set up a timeout 1001 * to try again in a second or so? The latter could break 1002 * a caller's assumptions about interrupt protection, etc. 1003 */ 1004 } 1005 mtx_leave(&pp->pr_mtx); 1006 } 1007 1008 void 1009 pool_sethiwat(struct pool *pp, int n) 1010 { 1011 1012 pp->pr_maxpages = (n == 0) 1013 ? 0 1014 : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 1015 } 1016 1017 int 1018 pool_sethardlimit(struct pool *pp, u_int n, const char *warnmsg, int ratecap) 1019 { 1020 int error = 0; 1021 1022 if (n < pp->pr_nout) { 1023 error = EINVAL; 1024 goto done; 1025 } 1026 1027 pp->pr_hardlimit = n; 1028 pp->pr_hardlimit_warning = warnmsg; 1029 pp->pr_hardlimit_ratecap.tv_sec = ratecap; 1030 pp->pr_hardlimit_warning_last.tv_sec = 0; 1031 pp->pr_hardlimit_warning_last.tv_usec = 0; 1032 1033 done: 1034 return (error); 1035 } 1036 1037 void 1038 pool_set_constraints(struct pool *pp, const struct kmem_pa_mode *mode) 1039 { 1040 pp->pr_crange = mode; 1041 } 1042 1043 void 1044 pool_set_ctordtor(struct pool *pp, int (*ctor)(void *, void *, int), 1045 void (*dtor)(void *, void *), void *arg) 1046 { 1047 pp->pr_ctor = ctor; 1048 pp->pr_dtor = dtor; 1049 pp->pr_arg = arg; 1050 } 1051 /* 1052 * Release all complete pages that have not been used recently. 1053 * 1054 * Returns non-zero if any pages have been reclaimed. 1055 */ 1056 int 1057 pool_reclaim(struct pool *pp) 1058 { 1059 struct pool_item_header *ph, *phnext; 1060 struct pool_pagelist pq; 1061 1062 LIST_INIT(&pq); 1063 1064 mtx_enter(&pp->pr_mtx); 1065 for (ph = LIST_FIRST(&pp->pr_emptypages); ph != NULL; ph = phnext) { 1066 phnext = LIST_NEXT(ph, ph_pagelist); 1067 1068 /* Check our minimum page claim */ 1069 if (pp->pr_npages <= pp->pr_minpages) 1070 break; 1071 1072 KASSERT(ph->ph_nmissing == 0); 1073 1074 /* 1075 * If freeing this page would put us below 1076 * the low water mark, stop now. 1077 */ 1078 if ((pp->pr_nitems - pp->pr_itemsperpage) < 1079 pp->pr_minitems) 1080 break; 1081 1082 pr_rmpage(pp, ph, &pq); 1083 } 1084 mtx_leave(&pp->pr_mtx); 1085 1086 if (LIST_EMPTY(&pq)) 1087 return (0); 1088 while ((ph = LIST_FIRST(&pq)) != NULL) { 1089 LIST_REMOVE(ph, ph_pagelist); 1090 pool_allocator_free(pp, ph->ph_page); 1091 if (pp->pr_roflags & PR_PHINPAGE) 1092 continue; 1093 pool_put(&phpool, ph); 1094 } 1095 1096 return (1); 1097 } 1098 1099 /* 1100 * Release all complete pages that have not been used recently 1101 * from all pools. 1102 */ 1103 void 1104 pool_reclaim_all(void) 1105 { 1106 struct pool *pp; 1107 int s; 1108 1109 s = splhigh(); 1110 TAILQ_FOREACH(pp, &pool_head, pr_poollist) 1111 pool_reclaim(pp); 1112 splx(s); 1113 } 1114 1115 #ifdef DDB 1116 #include <machine/db_machdep.h> 1117 #include <ddb/db_interface.h> 1118 #include <ddb/db_output.h> 1119 1120 /* 1121 * Diagnostic helpers. 1122 */ 1123 void 1124 pool_printit(struct pool *pp, const char *modif, int (*pr)(const char *, ...)) 1125 { 1126 pool_print1(pp, modif, pr); 1127 } 1128 1129 void 1130 pool_print_pagelist(struct pool_pagelist *pl, int (*pr)(const char *, ...)) 1131 { 1132 struct pool_item_header *ph; 1133 #ifdef DIAGNOSTIC 1134 struct pool_item *pi; 1135 #endif 1136 1137 LIST_FOREACH(ph, pl, ph_pagelist) { 1138 (*pr)("\t\tpage %p, nmissing %d\n", 1139 ph->ph_page, ph->ph_nmissing); 1140 #ifdef DIAGNOSTIC 1141 TAILQ_FOREACH(pi, &ph->ph_itemlist, pi_list) { 1142 if (pi->pi_magic != PI_MAGIC) { 1143 (*pr)("\t\t\titem %p, magic 0x%x\n", 1144 pi, pi->pi_magic); 1145 } 1146 } 1147 #endif 1148 } 1149 } 1150 1151 void 1152 pool_print1(struct pool *pp, const char *modif, int (*pr)(const char *, ...)) 1153 { 1154 struct pool_item_header *ph; 1155 int print_pagelist = 0; 1156 char c; 1157 1158 while ((c = *modif++) != '\0') { 1159 if (c == 'p') 1160 print_pagelist = 1; 1161 modif++; 1162 } 1163 1164 (*pr)("POOL %s: size %u, align %u, ioff %u, roflags 0x%08x\n", 1165 pp->pr_wchan, pp->pr_size, pp->pr_align, pp->pr_itemoffset, 1166 pp->pr_roflags); 1167 (*pr)("\talloc %p\n", pp->pr_alloc); 1168 (*pr)("\tminitems %u, minpages %u, maxpages %u, npages %u\n", 1169 pp->pr_minitems, pp->pr_minpages, pp->pr_maxpages, pp->pr_npages); 1170 (*pr)("\titemsperpage %u, nitems %u, nout %u, hardlimit %u\n", 1171 pp->pr_itemsperpage, pp->pr_nitems, pp->pr_nout, pp->pr_hardlimit); 1172 1173 (*pr)("\n\tnget %lu, nfail %lu, nput %lu\n", 1174 pp->pr_nget, pp->pr_nfail, pp->pr_nput); 1175 (*pr)("\tnpagealloc %lu, npagefree %lu, hiwat %u, nidle %lu\n", 1176 pp->pr_npagealloc, pp->pr_npagefree, pp->pr_hiwat, pp->pr_nidle); 1177 1178 if (print_pagelist == 0) 1179 return; 1180 1181 if ((ph = LIST_FIRST(&pp->pr_emptypages)) != NULL) 1182 (*pr)("\n\tempty page list:\n"); 1183 pool_print_pagelist(&pp->pr_emptypages, pr); 1184 if ((ph = LIST_FIRST(&pp->pr_fullpages)) != NULL) 1185 (*pr)("\n\tfull page list:\n"); 1186 pool_print_pagelist(&pp->pr_fullpages, pr); 1187 if ((ph = LIST_FIRST(&pp->pr_partpages)) != NULL) 1188 (*pr)("\n\tpartial-page list:\n"); 1189 pool_print_pagelist(&pp->pr_partpages, pr); 1190 1191 if (pp->pr_curpage == NULL) 1192 (*pr)("\tno current page\n"); 1193 else 1194 (*pr)("\tcurpage %p\n", pp->pr_curpage->ph_page); 1195 } 1196 1197 void 1198 db_show_all_pools(db_expr_t expr, int haddr, db_expr_t count, char *modif) 1199 { 1200 struct pool *pp; 1201 char maxp[16]; 1202 int ovflw; 1203 char mode; 1204 1205 mode = modif[0]; 1206 if (mode != '\0' && mode != 'a') { 1207 db_printf("usage: show all pools [/a]\n"); 1208 return; 1209 } 1210 1211 if (mode == '\0') 1212 db_printf("%-10s%4s%9s%5s%9s%6s%6s%6s%6s%6s%6s%5s\n", 1213 "Name", 1214 "Size", 1215 "Requests", 1216 "Fail", 1217 "Releases", 1218 "Pgreq", 1219 "Pgrel", 1220 "Npage", 1221 "Hiwat", 1222 "Minpg", 1223 "Maxpg", 1224 "Idle"); 1225 else 1226 db_printf("%-10s %18s %18s\n", 1227 "Name", "Address", "Allocator"); 1228 1229 TAILQ_FOREACH(pp, &pool_head, pr_poollist) { 1230 if (mode == 'a') { 1231 db_printf("%-10s %18p %18p\n", pp->pr_wchan, pp, 1232 pp->pr_alloc); 1233 continue; 1234 } 1235 1236 if (!pp->pr_nget) 1237 continue; 1238 1239 if (pp->pr_maxpages == UINT_MAX) 1240 snprintf(maxp, sizeof maxp, "inf"); 1241 else 1242 snprintf(maxp, sizeof maxp, "%u", pp->pr_maxpages); 1243 1244 #define PRWORD(ovflw, fmt, width, fixed, val) do { \ 1245 (ovflw) += db_printf((fmt), \ 1246 (width) - (fixed) - (ovflw) > 0 ? \ 1247 (width) - (fixed) - (ovflw) : 0, \ 1248 (val)) - (width); \ 1249 if ((ovflw) < 0) \ 1250 (ovflw) = 0; \ 1251 } while (/* CONSTCOND */0) 1252 1253 ovflw = 0; 1254 PRWORD(ovflw, "%-*s", 10, 0, pp->pr_wchan); 1255 PRWORD(ovflw, " %*u", 4, 1, pp->pr_size); 1256 PRWORD(ovflw, " %*lu", 9, 1, pp->pr_nget); 1257 PRWORD(ovflw, " %*lu", 5, 1, pp->pr_nfail); 1258 PRWORD(ovflw, " %*lu", 9, 1, pp->pr_nput); 1259 PRWORD(ovflw, " %*lu", 6, 1, pp->pr_npagealloc); 1260 PRWORD(ovflw, " %*lu", 6, 1, pp->pr_npagefree); 1261 PRWORD(ovflw, " %*d", 6, 1, pp->pr_npages); 1262 PRWORD(ovflw, " %*d", 6, 1, pp->pr_hiwat); 1263 PRWORD(ovflw, " %*d", 6, 1, pp->pr_minpages); 1264 PRWORD(ovflw, " %*s", 6, 1, maxp); 1265 PRWORD(ovflw, " %*lu\n", 5, 1, pp->pr_nidle); 1266 1267 pool_chk(pp); 1268 } 1269 } 1270 #endif /* DDB */ 1271 1272 #if defined(POOL_DEBUG) || defined(DDB) 1273 int 1274 pool_chk_page(struct pool *pp, struct pool_item_header *ph, int expected) 1275 { 1276 struct pool_item *pi; 1277 caddr_t page; 1278 int n; 1279 #if defined(DIAGNOSTIC) && defined(POOL_DEBUG) 1280 int i, *ip; 1281 #endif 1282 const char *label = pp->pr_wchan; 1283 1284 page = (caddr_t)((u_long)ph & pp->pr_alloc->pa_pagemask); 1285 if (page != ph->ph_page && 1286 (pp->pr_roflags & PR_PHINPAGE) != 0) { 1287 printf("%s: ", label); 1288 printf("pool(%p:%s): page inconsistency: page %p; " 1289 "at page head addr %p (p %p)\n", 1290 pp, pp->pr_wchan, ph->ph_page, ph, page); 1291 return 1; 1292 } 1293 1294 for (pi = TAILQ_FIRST(&ph->ph_itemlist), n = 0; 1295 pi != NULL; 1296 pi = TAILQ_NEXT(pi,pi_list), n++) { 1297 1298 #ifdef DIAGNOSTIC 1299 if (pi->pi_magic != PI_MAGIC) { 1300 printf("%s: ", label); 1301 printf("pool(%s): free list modified: " 1302 "page %p; item ordinal %d; addr %p " 1303 "(p %p); offset 0x%x=0x%x\n", 1304 pp->pr_wchan, ph->ph_page, n, pi, page, 1305 0, pi->pi_magic); 1306 } 1307 #ifdef POOL_DEBUG 1308 if (pool_debug && ph->ph_magic) { 1309 for (ip = (int *)pi, i = sizeof(*pi) / sizeof(int); 1310 i < pp->pr_size / sizeof(int); i++) { 1311 if (ip[i] != ph->ph_magic) { 1312 printf("pool(%s): free list modified: " 1313 "page %p; item ordinal %d; addr %p " 1314 "(p %p); offset 0x%x=0x%x\n", 1315 pp->pr_wchan, ph->ph_page, n, pi, 1316 page, i * sizeof(int), ip[i]); 1317 } 1318 } 1319 } 1320 1321 #endif /* POOL_DEBUG */ 1322 #endif /* DIAGNOSTIC */ 1323 page = 1324 (caddr_t)((u_long)pi & pp->pr_alloc->pa_pagemask); 1325 if (page == ph->ph_page) 1326 continue; 1327 1328 printf("%s: ", label); 1329 printf("pool(%p:%s): page inconsistency: page %p;" 1330 " item ordinal %d; addr %p (p %p)\n", pp, 1331 pp->pr_wchan, ph->ph_page, n, pi, page); 1332 return 1; 1333 } 1334 if (n + ph->ph_nmissing != pp->pr_itemsperpage) { 1335 printf("pool(%p:%s): page inconsistency: page %p;" 1336 " %d on list, %d missing, %d items per page\n", pp, 1337 pp->pr_wchan, ph->ph_page, n, ph->ph_nmissing, 1338 pp->pr_itemsperpage); 1339 return 1; 1340 } 1341 if (expected >= 0 && n != expected) { 1342 printf("pool(%p:%s): page inconsistency: page %p;" 1343 " %d on list, %d missing, %d expected\n", pp, 1344 pp->pr_wchan, ph->ph_page, n, ph->ph_nmissing, 1345 expected); 1346 return 1; 1347 } 1348 return 0; 1349 } 1350 1351 int 1352 pool_chk(struct pool *pp) 1353 { 1354 struct pool_item_header *ph; 1355 int r = 0; 1356 1357 LIST_FOREACH(ph, &pp->pr_emptypages, ph_pagelist) 1358 r += pool_chk_page(pp, ph, pp->pr_itemsperpage); 1359 LIST_FOREACH(ph, &pp->pr_fullpages, ph_pagelist) 1360 r += pool_chk_page(pp, ph, 0); 1361 LIST_FOREACH(ph, &pp->pr_partpages, ph_pagelist) 1362 r += pool_chk_page(pp, ph, -1); 1363 1364 return (r); 1365 } 1366 #endif /* defined(POOL_DEBUG) || defined(DDB) */ 1367 1368 #ifdef DDB 1369 void 1370 pool_walk(struct pool *pp, int full, int (*pr)(const char *, ...), 1371 void (*func)(void *, int, int (*)(const char *, ...))) 1372 { 1373 struct pool_item_header *ph; 1374 struct pool_item *pi; 1375 caddr_t cp; 1376 int n; 1377 1378 LIST_FOREACH(ph, &pp->pr_fullpages, ph_pagelist) { 1379 cp = ph->ph_colored; 1380 n = ph->ph_nmissing; 1381 1382 while (n--) { 1383 func(cp, full, pr); 1384 cp += pp->pr_size; 1385 } 1386 } 1387 1388 LIST_FOREACH(ph, &pp->pr_partpages, ph_pagelist) { 1389 cp = ph->ph_colored; 1390 n = ph->ph_nmissing; 1391 1392 do { 1393 TAILQ_FOREACH(pi, &ph->ph_itemlist, pi_list) { 1394 if (cp == (caddr_t)pi) 1395 break; 1396 } 1397 if (cp != (caddr_t)pi) { 1398 func(cp, full, pr); 1399 n--; 1400 } 1401 1402 cp += pp->pr_size; 1403 } while (n > 0); 1404 } 1405 } 1406 #endif 1407 1408 /* 1409 * We have three different sysctls. 1410 * kern.pool.npools - the number of pools. 1411 * kern.pool.pool.<pool#> - the pool struct for the pool#. 1412 * kern.pool.name.<pool#> - the name for pool#. 1413 */ 1414 int 1415 sysctl_dopool(int *name, u_int namelen, char *where, size_t *sizep) 1416 { 1417 struct pool *pp, *foundpool = NULL; 1418 size_t buflen = where != NULL ? *sizep : 0; 1419 int npools = 0, s; 1420 unsigned int lookfor; 1421 size_t len; 1422 1423 switch (*name) { 1424 case KERN_POOL_NPOOLS: 1425 if (namelen != 1 || buflen != sizeof(int)) 1426 return (EINVAL); 1427 lookfor = 0; 1428 break; 1429 case KERN_POOL_NAME: 1430 if (namelen != 2 || buflen < 1) 1431 return (EINVAL); 1432 lookfor = name[1]; 1433 break; 1434 case KERN_POOL_POOL: 1435 if (namelen != 2 || buflen != sizeof(struct pool)) 1436 return (EINVAL); 1437 lookfor = name[1]; 1438 break; 1439 default: 1440 return (EINVAL); 1441 } 1442 1443 s = splvm(); 1444 1445 TAILQ_FOREACH(pp, &pool_head, pr_poollist) { 1446 npools++; 1447 if (lookfor == pp->pr_serial) { 1448 foundpool = pp; 1449 break; 1450 } 1451 } 1452 1453 splx(s); 1454 1455 if (*name != KERN_POOL_NPOOLS && foundpool == NULL) 1456 return (ENOENT); 1457 1458 switch (*name) { 1459 case KERN_POOL_NPOOLS: 1460 return copyout(&npools, where, buflen); 1461 case KERN_POOL_NAME: 1462 len = strlen(foundpool->pr_wchan) + 1; 1463 if (*sizep < len) 1464 return (ENOMEM); 1465 *sizep = len; 1466 return copyout(foundpool->pr_wchan, where, len); 1467 case KERN_POOL_POOL: 1468 return copyout(foundpool, where, buflen); 1469 } 1470 /* NOTREACHED */ 1471 return (0); /* XXX - Stupid gcc */ 1472 } 1473 1474 /* 1475 * Pool backend allocators. 1476 * 1477 * Each pool has a backend allocator that handles allocation, deallocation 1478 */ 1479 void *pool_page_alloc(struct pool *, int, int *); 1480 void pool_page_free(struct pool *, void *); 1481 1482 /* 1483 * safe for interrupts, name preserved for compat this is the default 1484 * allocator 1485 */ 1486 struct pool_allocator pool_allocator_nointr = { 1487 pool_page_alloc, pool_page_free, 0, 1488 }; 1489 1490 /* 1491 * XXX - we have at least three different resources for the same allocation 1492 * and each resource can be depleted. First we have the ready elements in 1493 * the pool. Then we have the resource (typically a vm_map) for this 1494 * allocator, then we have physical memory. Waiting for any of these can 1495 * be unnecessary when any other is freed, but the kernel doesn't support 1496 * sleeping on multiple addresses, so we have to fake. The caller sleeps on 1497 * the pool (so that we can be awakened when an item is returned to the pool), 1498 * but we set PA_WANT on the allocator. When a page is returned to 1499 * the allocator and PA_WANT is set pool_allocator_free will wakeup all 1500 * sleeping pools belonging to this allocator. (XXX - thundering herd). 1501 * We also wake up the allocator in case someone without a pool (malloc) 1502 * is sleeping waiting for this allocator. 1503 */ 1504 1505 void * 1506 pool_allocator_alloc(struct pool *pp, int flags, int *slowdown) 1507 { 1508 boolean_t waitok = (flags & PR_WAITOK) ? TRUE : FALSE; 1509 void *v; 1510 1511 if (waitok) 1512 mtx_leave(&pp->pr_mtx); 1513 v = pp->pr_alloc->pa_alloc(pp, flags, slowdown); 1514 if (waitok) 1515 mtx_enter(&pp->pr_mtx); 1516 1517 return (v); 1518 } 1519 1520 void 1521 pool_allocator_free(struct pool *pp, void *v) 1522 { 1523 struct pool_allocator *pa = pp->pr_alloc; 1524 1525 (*pa->pa_free)(pp, v); 1526 } 1527 1528 void * 1529 pool_page_alloc(struct pool *pp, int flags, int *slowdown) 1530 { 1531 struct kmem_dyn_mode kd = KMEM_DYN_INITIALIZER; 1532 1533 kd.kd_waitok = (flags & PR_WAITOK); 1534 kd.kd_slowdown = slowdown; 1535 1536 return (km_alloc(PAGE_SIZE, &kv_page, pp->pr_crange, &kd)); 1537 } 1538 1539 void 1540 pool_page_free(struct pool *pp, void *v) 1541 { 1542 km_free(v, PAGE_SIZE, &kv_page, pp->pr_crange); 1543 } 1544 1545 void * 1546 pool_large_alloc(struct pool *pp, int flags, int *slowdown) 1547 { 1548 struct kmem_dyn_mode kd = KMEM_DYN_INITIALIZER; 1549 void *v; 1550 int s; 1551 1552 kd.kd_waitok = (flags & PR_WAITOK); 1553 kd.kd_slowdown = slowdown; 1554 1555 s = splvm(); 1556 v = km_alloc(pp->pr_alloc->pa_pagesz, &kv_intrsafe, pp->pr_crange, 1557 &kd); 1558 splx(s); 1559 1560 return (v); 1561 } 1562 1563 void 1564 pool_large_free(struct pool *pp, void *v) 1565 { 1566 int s; 1567 1568 s = splvm(); 1569 km_free(v, pp->pr_alloc->pa_pagesz, &kv_intrsafe, pp->pr_crange); 1570 splx(s); 1571 } 1572 1573 void * 1574 pool_large_alloc_ni(struct pool *pp, int flags, int *slowdown) 1575 { 1576 struct kmem_dyn_mode kd = KMEM_DYN_INITIALIZER; 1577 1578 kd.kd_waitok = (flags & PR_WAITOK); 1579 kd.kd_slowdown = slowdown; 1580 1581 return (km_alloc(pp->pr_alloc->pa_pagesz, &kv_any, pp->pr_crange, &kd)); 1582 } 1583 1584 void 1585 pool_large_free_ni(struct pool *pp, void *v) 1586 { 1587 km_free(v, pp->pr_alloc->pa_pagesz, &kv_any, pp->pr_crange); 1588 } 1589