1 /* $OpenBSD: subr_pool.c,v 1.121 2013/05/31 20:44:10 tedu Exp $ */ 2 /* $NetBSD: subr_pool.c,v 1.61 2001/09/26 07:14:56 chs Exp $ */ 3 4 /*- 5 * Copyright (c) 1997, 1999, 2000 The NetBSD Foundation, Inc. 6 * All rights reserved. 7 * 8 * This code is derived from software contributed to The NetBSD Foundation 9 * by Paul Kranenburg; by Jason R. Thorpe of the Numerical Aerospace 10 * Simulation Facility, NASA Ames Research Center. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 23 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 24 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 25 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31 * POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include <sys/param.h> 35 #include <sys/systm.h> 36 #include <sys/proc.h> 37 #include <sys/errno.h> 38 #include <sys/kernel.h> 39 #include <sys/malloc.h> 40 #include <sys/pool.h> 41 #include <sys/syslog.h> 42 #include <sys/sysctl.h> 43 44 #include <uvm/uvm.h> 45 #include <dev/rndvar.h> 46 47 /* 48 * Pool resource management utility. 49 * 50 * Memory is allocated in pages which are split into pieces according to 51 * the pool item size. Each page is kept on one of three lists in the 52 * pool structure: `pr_emptypages', `pr_fullpages' and `pr_partpages', 53 * for empty, full and partially-full pages respectively. The individual 54 * pool items are on a linked list headed by `ph_itemlist' in each page 55 * header. The memory for building the page list is either taken from 56 * the allocated pages themselves (for small pool items) or taken from 57 * an internal pool of page headers (`phpool'). 58 */ 59 60 /* List of all pools */ 61 SIMPLEQ_HEAD(,pool) pool_head = SIMPLEQ_HEAD_INITIALIZER(pool_head); 62 63 /* Private pool for page header structures */ 64 struct pool phpool; 65 66 struct pool_item_header { 67 /* Page headers */ 68 LIST_ENTRY(pool_item_header) 69 ph_pagelist; /* pool page list */ 70 XSIMPLEQ_HEAD(,pool_item) ph_itemlist; /* chunk list for this page */ 71 RB_ENTRY(pool_item_header) 72 ph_node; /* Off-page page headers */ 73 int ph_nmissing; /* # of chunks in use */ 74 caddr_t ph_page; /* this page's address */ 75 caddr_t ph_colored; /* page's colored address */ 76 int ph_pagesize; 77 int ph_magic; 78 }; 79 80 struct pool_item { 81 u_int32_t pi_magic; 82 /* Other entries use only this list entry */ 83 XSIMPLEQ_ENTRY(pool_item) pi_list; 84 }; 85 86 #ifdef POOL_DEBUG 87 int pool_debug = 1; 88 #else 89 int pool_debug = 0; 90 #endif 91 92 #define POOL_NEEDS_CATCHUP(pp) \ 93 ((pp)->pr_nitems < (pp)->pr_minitems) 94 95 /* 96 * Every pool gets a unique serial number assigned to it. If this counter 97 * wraps, we're screwed, but we shouldn't create so many pools anyway. 98 */ 99 unsigned int pool_serial; 100 101 int pool_catchup(struct pool *); 102 void pool_prime_page(struct pool *, caddr_t, struct pool_item_header *); 103 void pool_update_curpage(struct pool *); 104 void *pool_do_get(struct pool *, int); 105 void pool_do_put(struct pool *, void *); 106 void pr_rmpage(struct pool *, struct pool_item_header *, 107 struct pool_pagelist *); 108 int pool_chk_page(struct pool *, struct pool_item_header *, int); 109 int pool_chk(struct pool *); 110 struct pool_item_header *pool_alloc_item_header(struct pool *, caddr_t , int); 111 112 void *pool_allocator_alloc(struct pool *, int, int *); 113 void pool_allocator_free(struct pool *, void *); 114 115 /* 116 * XXX - quick hack. For pools with large items we want to use a special 117 * allocator. For now, instead of having the allocator figure out 118 * the allocation size from the pool (which can be done trivially 119 * with round_page(pr_itemsperpage * pr_size)) which would require 120 * lots of changes everywhere, we just create allocators for each 121 * size. We limit those to 128 pages. 122 */ 123 #define POOL_LARGE_MAXPAGES 128 124 struct pool_allocator pool_allocator_large[POOL_LARGE_MAXPAGES]; 125 struct pool_allocator pool_allocator_large_ni[POOL_LARGE_MAXPAGES]; 126 void *pool_large_alloc(struct pool *, int, int *); 127 void pool_large_free(struct pool *, void *); 128 void *pool_large_alloc_ni(struct pool *, int, int *); 129 void pool_large_free_ni(struct pool *, void *); 130 131 132 #ifdef DDB 133 void pool_print_pagelist(struct pool_pagelist *, int (*)(const char *, ...) 134 /* __attribute__((__format__(__kprintf__,1,2))) */); 135 void pool_print1(struct pool *, const char *, int (*)(const char *, ...) 136 /* __attribute__((__format__(__kprintf__,1,2))) */); 137 #endif 138 139 #define pool_sleep(pl) msleep(pl, &pl->pr_mtx, PSWP, pl->pr_wchan, 0) 140 141 static __inline int 142 phtree_compare(struct pool_item_header *a, struct pool_item_header *b) 143 { 144 long diff = (vaddr_t)a->ph_page - (vaddr_t)b->ph_page; 145 if (diff < 0) 146 return -(-diff >= a->ph_pagesize); 147 else if (diff > 0) 148 return (diff >= b->ph_pagesize); 149 else 150 return (0); 151 } 152 153 RB_PROTOTYPE(phtree, pool_item_header, ph_node, phtree_compare); 154 RB_GENERATE(phtree, pool_item_header, ph_node, phtree_compare); 155 156 /* 157 * Return the pool page header based on page address. 158 */ 159 static __inline struct pool_item_header * 160 pr_find_pagehead(struct pool *pp, void *v) 161 { 162 struct pool_item_header *ph, tmp; 163 164 if ((pp->pr_roflags & PR_PHINPAGE) != 0) { 165 caddr_t page; 166 167 page = (caddr_t)((vaddr_t)v & pp->pr_alloc->pa_pagemask); 168 169 return ((struct pool_item_header *)(page + pp->pr_phoffset)); 170 } 171 172 /* 173 * The trick we're using in the tree compare function is to compare 174 * two elements equal when they overlap. We want to return the 175 * page header that belongs to the element just before this address. 176 * We don't want this element to compare equal to the next element, 177 * so the compare function takes the pagesize from the lower element. 178 * If this header is the lower, its pagesize is zero, so it can't 179 * overlap with the next header. But if the header we're looking for 180 * is lower, we'll use its pagesize and it will overlap and return 181 * equal. 182 */ 183 tmp.ph_page = v; 184 tmp.ph_pagesize = 0; 185 ph = RB_FIND(phtree, &pp->pr_phtree, &tmp); 186 187 if (ph) { 188 KASSERT(ph->ph_page <= (caddr_t)v); 189 KASSERT(ph->ph_page + ph->ph_pagesize > (caddr_t)v); 190 } 191 return ph; 192 } 193 194 /* 195 * Remove a page from the pool. 196 */ 197 void 198 pr_rmpage(struct pool *pp, struct pool_item_header *ph, 199 struct pool_pagelist *pq) 200 { 201 202 /* 203 * If the page was idle, decrement the idle page count. 204 */ 205 if (ph->ph_nmissing == 0) { 206 #ifdef DIAGNOSTIC 207 if (pp->pr_nidle == 0) 208 panic("pr_rmpage: nidle inconsistent"); 209 if (pp->pr_nitems < pp->pr_itemsperpage) 210 panic("pr_rmpage: nitems inconsistent"); 211 #endif 212 pp->pr_nidle--; 213 } 214 215 pp->pr_nitems -= pp->pr_itemsperpage; 216 217 /* 218 * Unlink a page from the pool and release it (or queue it for release). 219 */ 220 LIST_REMOVE(ph, ph_pagelist); 221 if ((pp->pr_roflags & PR_PHINPAGE) == 0) 222 RB_REMOVE(phtree, &pp->pr_phtree, ph); 223 pp->pr_npages--; 224 pp->pr_npagefree++; 225 pool_update_curpage(pp); 226 227 if (pq) { 228 LIST_INSERT_HEAD(pq, ph, ph_pagelist); 229 } else { 230 pool_allocator_free(pp, ph->ph_page); 231 if ((pp->pr_roflags & PR_PHINPAGE) == 0) 232 pool_put(&phpool, ph); 233 } 234 } 235 236 /* 237 * Initialize the given pool resource structure. 238 * 239 * We export this routine to allow other kernel parts to declare 240 * static pools that must be initialized before malloc() is available. 241 */ 242 void 243 pool_init(struct pool *pp, size_t size, u_int align, u_int ioff, int flags, 244 const char *wchan, struct pool_allocator *palloc) 245 { 246 int off, slack; 247 #ifdef DIAGNOSTIC 248 struct pool *iter; 249 250 SIMPLEQ_FOREACH(iter, &pool_head, pr_poollist) { 251 if (iter == pp) 252 panic("init pool already on list"); 253 } 254 #endif 255 256 #ifdef MALLOC_DEBUG 257 if ((flags & PR_DEBUG) && (ioff != 0 || align != 0)) 258 flags &= ~PR_DEBUG; 259 #endif 260 /* 261 * Check arguments and construct default values. 262 */ 263 if (palloc == NULL) { 264 if (size > PAGE_SIZE) { 265 int psize; 266 267 /* 268 * XXX - should take align into account as well. 269 */ 270 if (size == round_page(size)) 271 psize = size / PAGE_SIZE; 272 else 273 psize = PAGE_SIZE / roundup(size % PAGE_SIZE, 274 1024); 275 if (psize > POOL_LARGE_MAXPAGES) 276 psize = POOL_LARGE_MAXPAGES; 277 if (flags & PR_WAITOK) 278 palloc = &pool_allocator_large_ni[psize-1]; 279 else 280 palloc = &pool_allocator_large[psize-1]; 281 if (palloc->pa_pagesz == 0) { 282 palloc->pa_pagesz = psize * PAGE_SIZE; 283 if (flags & PR_WAITOK) { 284 palloc->pa_alloc = pool_large_alloc_ni; 285 palloc->pa_free = pool_large_free_ni; 286 } else { 287 palloc->pa_alloc = pool_large_alloc; 288 palloc->pa_free = pool_large_free; 289 } 290 } 291 } else { 292 palloc = &pool_allocator_nointr; 293 } 294 } 295 if (palloc->pa_pagesz == 0) { 296 palloc->pa_pagesz = PAGE_SIZE; 297 } 298 if (palloc->pa_pagemask == 0) { 299 palloc->pa_pagemask = ~(palloc->pa_pagesz - 1); 300 palloc->pa_pageshift = ffs(palloc->pa_pagesz) - 1; 301 } 302 303 if (align == 0) 304 align = ALIGN(1); 305 306 if (size < sizeof(struct pool_item)) 307 size = sizeof(struct pool_item); 308 309 size = roundup(size, align); 310 #ifdef DIAGNOSTIC 311 if (size > palloc->pa_pagesz) 312 panic("pool_init: pool item size (%lu) too large", 313 (u_long)size); 314 #endif 315 316 /* 317 * Initialize the pool structure. 318 */ 319 LIST_INIT(&pp->pr_emptypages); 320 LIST_INIT(&pp->pr_fullpages); 321 LIST_INIT(&pp->pr_partpages); 322 pp->pr_curpage = NULL; 323 pp->pr_npages = 0; 324 pp->pr_minitems = 0; 325 pp->pr_minpages = 0; 326 pp->pr_maxpages = 8; 327 pp->pr_roflags = flags; 328 pp->pr_flags = 0; 329 pp->pr_size = size; 330 pp->pr_align = align; 331 pp->pr_wchan = wchan; 332 pp->pr_alloc = palloc; 333 pp->pr_nitems = 0; 334 pp->pr_nout = 0; 335 pp->pr_hardlimit = UINT_MAX; 336 pp->pr_hardlimit_warning = NULL; 337 pp->pr_hardlimit_ratecap.tv_sec = 0; 338 pp->pr_hardlimit_ratecap.tv_usec = 0; 339 pp->pr_hardlimit_warning_last.tv_sec = 0; 340 pp->pr_hardlimit_warning_last.tv_usec = 0; 341 pp->pr_serial = ++pool_serial; 342 if (pool_serial == 0) 343 panic("pool_init: too much uptime"); 344 345 /* constructor, destructor, and arg */ 346 pp->pr_ctor = NULL; 347 pp->pr_dtor = NULL; 348 pp->pr_arg = NULL; 349 350 /* 351 * Decide whether to put the page header off page to avoid 352 * wasting too large a part of the page. Off-page page headers 353 * go into an RB tree, so we can match a returned item with 354 * its header based on the page address. 355 * We use 1/16 of the page size as the threshold (XXX: tune) 356 */ 357 if (pp->pr_size < palloc->pa_pagesz/16 && pp->pr_size < PAGE_SIZE) { 358 /* Use the end of the page for the page header */ 359 pp->pr_roflags |= PR_PHINPAGE; 360 pp->pr_phoffset = off = palloc->pa_pagesz - 361 ALIGN(sizeof(struct pool_item_header)); 362 } else { 363 /* The page header will be taken from our page header pool */ 364 pp->pr_phoffset = 0; 365 off = palloc->pa_pagesz; 366 RB_INIT(&pp->pr_phtree); 367 } 368 369 /* 370 * Alignment is to take place at `ioff' within the item. This means 371 * we must reserve up to `align - 1' bytes on the page to allow 372 * appropriate positioning of each item. 373 * 374 * Silently enforce `0 <= ioff < align'. 375 */ 376 pp->pr_itemoffset = ioff = ioff % align; 377 pp->pr_itemsperpage = (off - ((align - ioff) % align)) / pp->pr_size; 378 KASSERT(pp->pr_itemsperpage != 0); 379 380 /* 381 * Use the slack between the chunks and the page header 382 * for "cache coloring". 383 */ 384 slack = off - pp->pr_itemsperpage * pp->pr_size; 385 pp->pr_maxcolor = (slack / align) * align; 386 pp->pr_curcolor = 0; 387 388 pp->pr_nget = 0; 389 pp->pr_nfail = 0; 390 pp->pr_nput = 0; 391 pp->pr_npagealloc = 0; 392 pp->pr_npagefree = 0; 393 pp->pr_hiwat = 0; 394 pp->pr_nidle = 0; 395 396 pp->pr_ipl = -1; 397 mtx_init(&pp->pr_mtx, IPL_NONE); 398 399 if (phpool.pr_size == 0) { 400 pool_init(&phpool, sizeof(struct pool_item_header), 0, 0, 401 0, "phpool", NULL); 402 pool_setipl(&phpool, IPL_HIGH); 403 } 404 405 /* pglistalloc/constraint parameters */ 406 pp->pr_crange = &kp_dirty; 407 408 /* Insert this into the list of all pools. */ 409 SIMPLEQ_INSERT_HEAD(&pool_head, pp, pr_poollist); 410 } 411 412 void 413 pool_setipl(struct pool *pp, int ipl) 414 { 415 pp->pr_ipl = ipl; 416 mtx_init(&pp->pr_mtx, ipl); 417 } 418 419 /* 420 * Decommission a pool resource. 421 */ 422 void 423 pool_destroy(struct pool *pp) 424 { 425 struct pool_item_header *ph; 426 struct pool *prev, *iter; 427 428 /* Remove from global pool list */ 429 if (pp == SIMPLEQ_FIRST(&pool_head)) 430 SIMPLEQ_REMOVE_HEAD(&pool_head, pr_poollist); 431 else { 432 prev = SIMPLEQ_FIRST(&pool_head); 433 SIMPLEQ_FOREACH(iter, &pool_head, pr_poollist) { 434 if (iter == pp) { 435 SIMPLEQ_REMOVE_AFTER(&pool_head, prev, 436 pr_poollist); 437 goto removed; 438 } 439 prev = iter; 440 } 441 #ifdef DIAGNOSTIC 442 panic("destroyed pool not on list"); 443 #endif 444 } 445 removed: 446 #ifdef DIAGNOSTIC 447 if (pp->pr_nout != 0) 448 panic("pool_destroy: pool busy: still out: %u", pp->pr_nout); 449 #endif 450 451 /* Remove all pages */ 452 while ((ph = LIST_FIRST(&pp->pr_emptypages)) != NULL) 453 pr_rmpage(pp, ph, NULL); 454 KASSERT(LIST_EMPTY(&pp->pr_fullpages)); 455 KASSERT(LIST_EMPTY(&pp->pr_partpages)); 456 457 } 458 459 struct pool_item_header * 460 pool_alloc_item_header(struct pool *pp, caddr_t storage, int flags) 461 { 462 struct pool_item_header *ph; 463 464 if ((pp->pr_roflags & PR_PHINPAGE) != 0) 465 ph = (struct pool_item_header *)(storage + pp->pr_phoffset); 466 else 467 ph = pool_get(&phpool, (flags & ~(PR_WAITOK | PR_ZERO)) | 468 PR_NOWAIT); 469 #ifdef DIAGNOSTIC 470 if (pool_debug && ph != NULL) 471 ph->ph_magic = poison_value(ph); 472 #endif 473 return (ph); 474 } 475 476 /* 477 * Grab an item from the pool; must be called at appropriate spl level 478 */ 479 void * 480 pool_get(struct pool *pp, int flags) 481 { 482 void *v; 483 484 KASSERT(flags & (PR_WAITOK | PR_NOWAIT)); 485 486 #ifdef DIAGNOSTIC 487 if ((flags & PR_WAITOK) != 0) { 488 assertwaitok(); 489 if (pool_debug == 2) 490 yield(); 491 } 492 #endif /* DIAGNOSTIC */ 493 494 mtx_enter(&pp->pr_mtx); 495 #ifdef POOL_DEBUG 496 if (pp->pr_roflags & PR_DEBUGCHK) { 497 if (pool_chk(pp)) 498 panic("before pool_get"); 499 } 500 #endif 501 v = pool_do_get(pp, flags); 502 #ifdef POOL_DEBUG 503 if (pp->pr_roflags & PR_DEBUGCHK) { 504 if (pool_chk(pp)) 505 panic("after pool_get"); 506 } 507 #endif 508 if (v != NULL) 509 pp->pr_nget++; 510 mtx_leave(&pp->pr_mtx); 511 if (v == NULL) 512 return (v); 513 514 if (pp->pr_ctor) { 515 if (flags & PR_ZERO) 516 panic("pool_get: PR_ZERO when ctor set"); 517 if (pp->pr_ctor(pp->pr_arg, v, flags)) { 518 mtx_enter(&pp->pr_mtx); 519 pp->pr_nget--; 520 pool_do_put(pp, v); 521 mtx_leave(&pp->pr_mtx); 522 v = NULL; 523 } 524 } else { 525 if (flags & PR_ZERO) 526 memset(v, 0, pp->pr_size); 527 } 528 return (v); 529 } 530 531 void * 532 pool_do_get(struct pool *pp, int flags) 533 { 534 struct pool_item *pi; 535 struct pool_item_header *ph; 536 void *v; 537 int slowdown = 0; 538 539 #ifdef MALLOC_DEBUG 540 if (pp->pr_roflags & PR_DEBUG) { 541 void *addr; 542 543 addr = NULL; 544 debug_malloc(pp->pr_size, M_DEBUG, 545 (flags & PR_WAITOK) ? M_WAITOK : M_NOWAIT, &addr); 546 return (addr); 547 } 548 #endif 549 550 startover: 551 /* 552 * Check to see if we've reached the hard limit. If we have, 553 * and we can wait, then wait until an item has been returned to 554 * the pool. 555 */ 556 #ifdef DIAGNOSTIC 557 if (pp->pr_nout > pp->pr_hardlimit) 558 panic("pool_do_get: %s: crossed hard limit", pp->pr_wchan); 559 #endif 560 if (pp->pr_nout == pp->pr_hardlimit) { 561 if ((flags & PR_WAITOK) && !(flags & PR_LIMITFAIL)) { 562 /* 563 * XXX: A warning isn't logged in this case. Should 564 * it be? 565 */ 566 pp->pr_flags |= PR_WANTED; 567 pool_sleep(pp); 568 goto startover; 569 } 570 571 /* 572 * Log a message that the hard limit has been hit. 573 */ 574 if (pp->pr_hardlimit_warning != NULL && 575 ratecheck(&pp->pr_hardlimit_warning_last, 576 &pp->pr_hardlimit_ratecap)) 577 log(LOG_ERR, "%s\n", pp->pr_hardlimit_warning); 578 579 pp->pr_nfail++; 580 return (NULL); 581 } 582 583 /* 584 * The convention we use is that if `curpage' is not NULL, then 585 * it points at a non-empty bucket. In particular, `curpage' 586 * never points at a page header which has PR_PHINPAGE set and 587 * has no items in its bucket. 588 */ 589 if ((ph = pp->pr_curpage) == NULL) { 590 #ifdef DIAGNOSTIC 591 if (pp->pr_nitems != 0) { 592 printf("pool_do_get: %s: curpage NULL, nitems %u\n", 593 pp->pr_wchan, pp->pr_nitems); 594 panic("pool_do_get: nitems inconsistent"); 595 } 596 #endif 597 598 /* 599 * Call the back-end page allocator for more memory. 600 */ 601 v = pool_allocator_alloc(pp, flags, &slowdown); 602 if (v != NULL) 603 ph = pool_alloc_item_header(pp, v, flags); 604 605 if (v == NULL || ph == NULL) { 606 if (v != NULL) 607 pool_allocator_free(pp, v); 608 609 if ((flags & PR_WAITOK) == 0) { 610 pp->pr_nfail++; 611 return (NULL); 612 } 613 614 /* 615 * Wait for items to be returned to this pool. 616 * 617 * XXX: maybe we should wake up once a second and 618 * try again? 619 */ 620 pp->pr_flags |= PR_WANTED; 621 pool_sleep(pp); 622 goto startover; 623 } 624 625 /* We have more memory; add it to the pool */ 626 pool_prime_page(pp, v, ph); 627 pp->pr_npagealloc++; 628 629 if (slowdown && (flags & PR_WAITOK)) { 630 mtx_leave(&pp->pr_mtx); 631 yield(); 632 mtx_enter(&pp->pr_mtx); 633 } 634 635 /* Start the allocation process over. */ 636 goto startover; 637 } 638 if ((v = pi = XSIMPLEQ_FIRST(&ph->ph_itemlist)) == NULL) { 639 panic("pool_do_get: %s: page empty", pp->pr_wchan); 640 } 641 #ifdef DIAGNOSTIC 642 if (pp->pr_nitems == 0) { 643 printf("pool_do_get: %s: items on itemlist, nitems %u\n", 644 pp->pr_wchan, pp->pr_nitems); 645 panic("pool_do_get: nitems inconsistent"); 646 } 647 #endif 648 649 #ifdef DIAGNOSTIC 650 if (pi->pi_magic != poison_value(pi)) 651 panic("pool_do_get(%s): free list modified: " 652 "page %p; item addr %p; offset 0x%x=0x%x", 653 pp->pr_wchan, ph->ph_page, pi, 0, pi->pi_magic); 654 if (pool_debug && ph->ph_magic) { 655 size_t pidx; 656 int pval; 657 if (poison_check(pi + 1, pp->pr_size - sizeof(*pi), 658 &pidx, &pval)) { 659 int *ip = (int *)(pi + 1); 660 panic("pool_do_get(%s): free list modified: " 661 "page %p; item addr %p; offset 0x%zx=0x%x", 662 pp->pr_wchan, ph->ph_page, pi, 663 pidx * sizeof(int), ip[pidx]); 664 } 665 } 666 #endif /* DIAGNOSTIC */ 667 668 /* 669 * Remove from item list. 670 */ 671 XSIMPLEQ_REMOVE_HEAD(&ph->ph_itemlist, pi_list); 672 pp->pr_nitems--; 673 pp->pr_nout++; 674 if (ph->ph_nmissing == 0) { 675 #ifdef DIAGNOSTIC 676 if (pp->pr_nidle == 0) 677 panic("pool_do_get: nidle inconsistent"); 678 #endif 679 pp->pr_nidle--; 680 681 /* 682 * This page was previously empty. Move it to the list of 683 * partially-full pages. This page is already curpage. 684 */ 685 LIST_REMOVE(ph, ph_pagelist); 686 LIST_INSERT_HEAD(&pp->pr_partpages, ph, ph_pagelist); 687 } 688 ph->ph_nmissing++; 689 if (XSIMPLEQ_EMPTY(&ph->ph_itemlist)) { 690 #ifdef DIAGNOSTIC 691 if (ph->ph_nmissing != pp->pr_itemsperpage) { 692 panic("pool_do_get: %s: nmissing inconsistent", 693 pp->pr_wchan); 694 } 695 #endif 696 /* 697 * This page is now full. Move it to the full list 698 * and select a new current page. 699 */ 700 LIST_REMOVE(ph, ph_pagelist); 701 LIST_INSERT_HEAD(&pp->pr_fullpages, ph, ph_pagelist); 702 pool_update_curpage(pp); 703 } 704 705 /* 706 * If we have a low water mark and we are now below that low 707 * water mark, add more items to the pool. 708 */ 709 if (POOL_NEEDS_CATCHUP(pp) && pool_catchup(pp) != 0) { 710 /* 711 * XXX: Should we log a warning? Should we set up a timeout 712 * to try again in a second or so? The latter could break 713 * a caller's assumptions about interrupt protection, etc. 714 */ 715 } 716 return (v); 717 } 718 719 /* 720 * Return resource to the pool; must be called at appropriate spl level 721 */ 722 void 723 pool_put(struct pool *pp, void *v) 724 { 725 if (pp->pr_dtor) 726 pp->pr_dtor(pp->pr_arg, v); 727 mtx_enter(&pp->pr_mtx); 728 #ifdef POOL_DEBUG 729 if (pp->pr_roflags & PR_DEBUGCHK) { 730 if (pool_chk(pp)) 731 panic("before pool_put"); 732 } 733 #endif 734 pool_do_put(pp, v); 735 #ifdef POOL_DEBUG 736 if (pp->pr_roflags & PR_DEBUGCHK) { 737 if (pool_chk(pp)) 738 panic("after pool_put"); 739 } 740 #endif 741 pp->pr_nput++; 742 mtx_leave(&pp->pr_mtx); 743 } 744 745 /* 746 * Internal version of pool_put(). 747 */ 748 void 749 pool_do_put(struct pool *pp, void *v) 750 { 751 struct pool_item *pi = v; 752 struct pool_item_header *ph; 753 754 if (v == NULL) 755 panic("pool_put of NULL"); 756 757 #ifdef MALLOC_DEBUG 758 if (pp->pr_roflags & PR_DEBUG) { 759 debug_free(v, M_DEBUG); 760 return; 761 } 762 #endif 763 764 #ifdef DIAGNOSTIC 765 if (pp->pr_ipl != -1) 766 splassert(pp->pr_ipl); 767 768 if (pp->pr_nout == 0) { 769 printf("pool %s: putting with none out\n", 770 pp->pr_wchan); 771 panic("pool_do_put"); 772 } 773 #endif 774 775 if ((ph = pr_find_pagehead(pp, v)) == NULL) { 776 panic("pool_do_put: %s: page header missing", pp->pr_wchan); 777 } 778 779 /* 780 * Return to item list. 781 */ 782 #ifdef DIAGNOSTIC 783 pi->pi_magic = poison_value(pi); 784 if (ph->ph_magic) { 785 poison_mem(pi + 1, pp->pr_size - sizeof(*pi)); 786 } 787 #endif /* DIAGNOSTIC */ 788 789 XSIMPLEQ_INSERT_HEAD(&ph->ph_itemlist, pi, pi_list); 790 ph->ph_nmissing--; 791 pp->pr_nitems++; 792 pp->pr_nout--; 793 794 /* Cancel "pool empty" condition if it exists */ 795 if (pp->pr_curpage == NULL) 796 pp->pr_curpage = ph; 797 798 if (pp->pr_flags & PR_WANTED) { 799 pp->pr_flags &= ~PR_WANTED; 800 wakeup(pp); 801 } 802 803 /* 804 * If this page is now empty, do one of two things: 805 * 806 * (1) If we have more pages than the page high water mark, 807 * free the page back to the system. 808 * 809 * (2) Otherwise, move the page to the empty page list. 810 * 811 * Either way, select a new current page (so we use a partially-full 812 * page if one is available). 813 */ 814 if (ph->ph_nmissing == 0) { 815 pp->pr_nidle++; 816 if (pp->pr_nidle > pp->pr_maxpages) { 817 pr_rmpage(pp, ph, NULL); 818 } else { 819 LIST_REMOVE(ph, ph_pagelist); 820 LIST_INSERT_HEAD(&pp->pr_emptypages, ph, ph_pagelist); 821 pool_update_curpage(pp); 822 } 823 } 824 825 /* 826 * If the page was previously completely full, move it to the 827 * partially-full list and make it the current page. The next 828 * allocation will get the item from this page, instead of 829 * further fragmenting the pool. 830 */ 831 else if (ph->ph_nmissing == (pp->pr_itemsperpage - 1)) { 832 LIST_REMOVE(ph, ph_pagelist); 833 LIST_INSERT_HEAD(&pp->pr_partpages, ph, ph_pagelist); 834 pp->pr_curpage = ph; 835 } 836 } 837 838 /* 839 * Add N items to the pool. 840 */ 841 int 842 pool_prime(struct pool *pp, int n) 843 { 844 struct pool_item_header *ph; 845 caddr_t cp; 846 int newpages; 847 int slowdown; 848 849 mtx_enter(&pp->pr_mtx); 850 newpages = roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 851 852 while (newpages-- > 0) { 853 cp = pool_allocator_alloc(pp, PR_NOWAIT, &slowdown); 854 if (cp != NULL) 855 ph = pool_alloc_item_header(pp, cp, PR_NOWAIT); 856 if (cp == NULL || ph == NULL) { 857 if (cp != NULL) 858 pool_allocator_free(pp, cp); 859 break; 860 } 861 862 pool_prime_page(pp, cp, ph); 863 pp->pr_npagealloc++; 864 pp->pr_minpages++; 865 } 866 867 if (pp->pr_minpages >= pp->pr_maxpages) 868 pp->pr_maxpages = pp->pr_minpages + 1; /* XXX */ 869 870 mtx_leave(&pp->pr_mtx); 871 return (0); 872 } 873 874 /* 875 * Add a page worth of items to the pool. 876 * 877 * Note, we must be called with the pool descriptor LOCKED. 878 */ 879 void 880 pool_prime_page(struct pool *pp, caddr_t storage, struct pool_item_header *ph) 881 { 882 struct pool_item *pi; 883 caddr_t cp = storage; 884 unsigned int align = pp->pr_align; 885 unsigned int ioff = pp->pr_itemoffset; 886 int n; 887 888 /* 889 * Insert page header. 890 */ 891 LIST_INSERT_HEAD(&pp->pr_emptypages, ph, ph_pagelist); 892 XSIMPLEQ_INIT(&ph->ph_itemlist); 893 ph->ph_page = storage; 894 ph->ph_pagesize = pp->pr_alloc->pa_pagesz; 895 ph->ph_nmissing = 0; 896 if ((pp->pr_roflags & PR_PHINPAGE) == 0) 897 RB_INSERT(phtree, &pp->pr_phtree, ph); 898 899 pp->pr_nidle++; 900 901 /* 902 * Color this page. 903 */ 904 cp = (caddr_t)(cp + pp->pr_curcolor); 905 if ((pp->pr_curcolor += align) > pp->pr_maxcolor) 906 pp->pr_curcolor = 0; 907 908 /* 909 * Adjust storage to apply alignment to `pr_itemoffset' in each item. 910 */ 911 if (ioff != 0) 912 cp = (caddr_t)(cp + (align - ioff)); 913 ph->ph_colored = cp; 914 915 /* 916 * Insert remaining chunks on the bucket list. 917 */ 918 n = pp->pr_itemsperpage; 919 pp->pr_nitems += n; 920 921 while (n--) { 922 pi = (struct pool_item *)cp; 923 924 KASSERT(((((vaddr_t)pi) + ioff) & (align - 1)) == 0); 925 926 /* Insert on page list */ 927 XSIMPLEQ_INSERT_TAIL(&ph->ph_itemlist, pi, pi_list); 928 929 #ifdef DIAGNOSTIC 930 pi->pi_magic = poison_value(pi); 931 if (ph->ph_magic) { 932 poison_mem(pi + 1, pp->pr_size - sizeof(*pi)); 933 } 934 #endif /* DIAGNOSTIC */ 935 cp = (caddr_t)(cp + pp->pr_size); 936 } 937 938 /* 939 * If the pool was depleted, point at the new page. 940 */ 941 if (pp->pr_curpage == NULL) 942 pp->pr_curpage = ph; 943 944 if (++pp->pr_npages > pp->pr_hiwat) 945 pp->pr_hiwat = pp->pr_npages; 946 } 947 948 /* 949 * Used by pool_get() when nitems drops below the low water mark. This 950 * is used to catch up pr_nitems with the low water mark. 951 * 952 * Note we never wait for memory here, we let the caller decide what to do. 953 */ 954 int 955 pool_catchup(struct pool *pp) 956 { 957 struct pool_item_header *ph; 958 caddr_t cp; 959 int error = 0; 960 int slowdown; 961 962 while (POOL_NEEDS_CATCHUP(pp)) { 963 /* 964 * Call the page back-end allocator for more memory. 965 */ 966 cp = pool_allocator_alloc(pp, PR_NOWAIT, &slowdown); 967 if (cp != NULL) 968 ph = pool_alloc_item_header(pp, cp, PR_NOWAIT); 969 if (cp == NULL || ph == NULL) { 970 if (cp != NULL) 971 pool_allocator_free(pp, cp); 972 error = ENOMEM; 973 break; 974 } 975 pool_prime_page(pp, cp, ph); 976 pp->pr_npagealloc++; 977 } 978 979 return (error); 980 } 981 982 void 983 pool_update_curpage(struct pool *pp) 984 { 985 986 pp->pr_curpage = LIST_FIRST(&pp->pr_partpages); 987 if (pp->pr_curpage == NULL) { 988 pp->pr_curpage = LIST_FIRST(&pp->pr_emptypages); 989 } 990 } 991 992 void 993 pool_setlowat(struct pool *pp, int n) 994 { 995 996 pp->pr_minitems = n; 997 pp->pr_minpages = (n == 0) 998 ? 0 999 : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 1000 1001 mtx_enter(&pp->pr_mtx); 1002 /* Make sure we're caught up with the newly-set low water mark. */ 1003 if (POOL_NEEDS_CATCHUP(pp) && pool_catchup(pp) != 0) { 1004 /* 1005 * XXX: Should we log a warning? Should we set up a timeout 1006 * to try again in a second or so? The latter could break 1007 * a caller's assumptions about interrupt protection, etc. 1008 */ 1009 } 1010 mtx_leave(&pp->pr_mtx); 1011 } 1012 1013 void 1014 pool_sethiwat(struct pool *pp, int n) 1015 { 1016 1017 pp->pr_maxpages = (n == 0) 1018 ? 0 1019 : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 1020 } 1021 1022 int 1023 pool_sethardlimit(struct pool *pp, u_int n, const char *warnmsg, int ratecap) 1024 { 1025 int error = 0; 1026 1027 if (n < pp->pr_nout) { 1028 error = EINVAL; 1029 goto done; 1030 } 1031 1032 pp->pr_hardlimit = n; 1033 pp->pr_hardlimit_warning = warnmsg; 1034 pp->pr_hardlimit_ratecap.tv_sec = ratecap; 1035 pp->pr_hardlimit_warning_last.tv_sec = 0; 1036 pp->pr_hardlimit_warning_last.tv_usec = 0; 1037 1038 done: 1039 return (error); 1040 } 1041 1042 void 1043 pool_set_constraints(struct pool *pp, const struct kmem_pa_mode *mode) 1044 { 1045 pp->pr_crange = mode; 1046 } 1047 1048 void 1049 pool_set_ctordtor(struct pool *pp, int (*ctor)(void *, void *, int), 1050 void (*dtor)(void *, void *), void *arg) 1051 { 1052 pp->pr_ctor = ctor; 1053 pp->pr_dtor = dtor; 1054 pp->pr_arg = arg; 1055 } 1056 /* 1057 * Release all complete pages that have not been used recently. 1058 * 1059 * Returns non-zero if any pages have been reclaimed. 1060 */ 1061 int 1062 pool_reclaim(struct pool *pp) 1063 { 1064 struct pool_item_header *ph, *phnext; 1065 struct pool_pagelist pq; 1066 1067 LIST_INIT(&pq); 1068 1069 mtx_enter(&pp->pr_mtx); 1070 for (ph = LIST_FIRST(&pp->pr_emptypages); ph != NULL; ph = phnext) { 1071 phnext = LIST_NEXT(ph, ph_pagelist); 1072 1073 /* Check our minimum page claim */ 1074 if (pp->pr_npages <= pp->pr_minpages) 1075 break; 1076 1077 KASSERT(ph->ph_nmissing == 0); 1078 1079 /* 1080 * If freeing this page would put us below 1081 * the low water mark, stop now. 1082 */ 1083 if ((pp->pr_nitems - pp->pr_itemsperpage) < 1084 pp->pr_minitems) 1085 break; 1086 1087 pr_rmpage(pp, ph, &pq); 1088 } 1089 mtx_leave(&pp->pr_mtx); 1090 1091 if (LIST_EMPTY(&pq)) 1092 return (0); 1093 while ((ph = LIST_FIRST(&pq)) != NULL) { 1094 LIST_REMOVE(ph, ph_pagelist); 1095 pool_allocator_free(pp, ph->ph_page); 1096 if (pp->pr_roflags & PR_PHINPAGE) 1097 continue; 1098 pool_put(&phpool, ph); 1099 } 1100 1101 return (1); 1102 } 1103 1104 /* 1105 * Release all complete pages that have not been used recently 1106 * from all pools. 1107 */ 1108 void 1109 pool_reclaim_all(void) 1110 { 1111 struct pool *pp; 1112 int s; 1113 1114 s = splhigh(); 1115 SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist) 1116 pool_reclaim(pp); 1117 splx(s); 1118 } 1119 1120 #ifdef DDB 1121 #include <machine/db_machdep.h> 1122 #include <ddb/db_interface.h> 1123 #include <ddb/db_output.h> 1124 1125 /* 1126 * Diagnostic helpers. 1127 */ 1128 void 1129 pool_printit(struct pool *pp, const char *modif, 1130 int (*pr)(const char *, ...) /* __attribute__((__format__(__kprintf__,1,2))) */) 1131 { 1132 pool_print1(pp, modif, pr); 1133 } 1134 1135 void 1136 pool_print_pagelist(struct pool_pagelist *pl, 1137 int (*pr)(const char *, ...) /* __attribute__((__format__(__kprintf__,1,2))) */) 1138 { 1139 struct pool_item_header *ph; 1140 #ifdef DIAGNOSTIC 1141 struct pool_item *pi; 1142 #endif 1143 1144 LIST_FOREACH(ph, pl, ph_pagelist) { 1145 (*pr)("\t\tpage %p, nmissing %d\n", 1146 ph->ph_page, ph->ph_nmissing); 1147 #ifdef DIAGNOSTIC 1148 XSIMPLEQ_FOREACH(pi, &ph->ph_itemlist, pi_list) { 1149 if (pi->pi_magic != poison_value(pi)) { 1150 (*pr)("\t\t\titem %p, magic 0x%x\n", 1151 pi, pi->pi_magic); 1152 } 1153 } 1154 #endif 1155 } 1156 } 1157 1158 void 1159 pool_print1(struct pool *pp, const char *modif, 1160 int (*pr)(const char *, ...) /* __attribute__((__format__(__kprintf__,1,2))) */) 1161 { 1162 struct pool_item_header *ph; 1163 int print_pagelist = 0; 1164 char c; 1165 1166 while ((c = *modif++) != '\0') { 1167 if (c == 'p') 1168 print_pagelist = 1; 1169 modif++; 1170 } 1171 1172 (*pr)("POOL %s: size %u, align %u, ioff %u, roflags 0x%08x\n", 1173 pp->pr_wchan, pp->pr_size, pp->pr_align, pp->pr_itemoffset, 1174 pp->pr_roflags); 1175 (*pr)("\talloc %p\n", pp->pr_alloc); 1176 (*pr)("\tminitems %u, minpages %u, maxpages %u, npages %u\n", 1177 pp->pr_minitems, pp->pr_minpages, pp->pr_maxpages, pp->pr_npages); 1178 (*pr)("\titemsperpage %u, nitems %u, nout %u, hardlimit %u\n", 1179 pp->pr_itemsperpage, pp->pr_nitems, pp->pr_nout, pp->pr_hardlimit); 1180 1181 (*pr)("\n\tnget %lu, nfail %lu, nput %lu\n", 1182 pp->pr_nget, pp->pr_nfail, pp->pr_nput); 1183 (*pr)("\tnpagealloc %lu, npagefree %lu, hiwat %u, nidle %lu\n", 1184 pp->pr_npagealloc, pp->pr_npagefree, pp->pr_hiwat, pp->pr_nidle); 1185 1186 if (print_pagelist == 0) 1187 return; 1188 1189 if ((ph = LIST_FIRST(&pp->pr_emptypages)) != NULL) 1190 (*pr)("\n\tempty page list:\n"); 1191 pool_print_pagelist(&pp->pr_emptypages, pr); 1192 if ((ph = LIST_FIRST(&pp->pr_fullpages)) != NULL) 1193 (*pr)("\n\tfull page list:\n"); 1194 pool_print_pagelist(&pp->pr_fullpages, pr); 1195 if ((ph = LIST_FIRST(&pp->pr_partpages)) != NULL) 1196 (*pr)("\n\tpartial-page list:\n"); 1197 pool_print_pagelist(&pp->pr_partpages, pr); 1198 1199 if (pp->pr_curpage == NULL) 1200 (*pr)("\tno current page\n"); 1201 else 1202 (*pr)("\tcurpage %p\n", pp->pr_curpage->ph_page); 1203 } 1204 1205 void 1206 db_show_all_pools(db_expr_t expr, int haddr, db_expr_t count, char *modif) 1207 { 1208 struct pool *pp; 1209 char maxp[16]; 1210 int ovflw; 1211 char mode; 1212 1213 mode = modif[0]; 1214 if (mode != '\0' && mode != 'a') { 1215 db_printf("usage: show all pools [/a]\n"); 1216 return; 1217 } 1218 1219 if (mode == '\0') 1220 db_printf("%-10s%4s%9s%5s%9s%6s%6s%6s%6s%6s%6s%5s\n", 1221 "Name", 1222 "Size", 1223 "Requests", 1224 "Fail", 1225 "Releases", 1226 "Pgreq", 1227 "Pgrel", 1228 "Npage", 1229 "Hiwat", 1230 "Minpg", 1231 "Maxpg", 1232 "Idle"); 1233 else 1234 db_printf("%-12s %18s %18s\n", 1235 "Name", "Address", "Allocator"); 1236 1237 SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist) { 1238 if (mode == 'a') { 1239 db_printf("%-12s %18p %18p\n", pp->pr_wchan, pp, 1240 pp->pr_alloc); 1241 continue; 1242 } 1243 1244 if (!pp->pr_nget) 1245 continue; 1246 1247 if (pp->pr_maxpages == UINT_MAX) 1248 snprintf(maxp, sizeof maxp, "inf"); 1249 else 1250 snprintf(maxp, sizeof maxp, "%u", pp->pr_maxpages); 1251 1252 #define PRWORD(ovflw, fmt, width, fixed, val) do { \ 1253 (ovflw) += db_printf((fmt), \ 1254 (width) - (fixed) - (ovflw) > 0 ? \ 1255 (width) - (fixed) - (ovflw) : 0, \ 1256 (val)) - (width); \ 1257 if ((ovflw) < 0) \ 1258 (ovflw) = 0; \ 1259 } while (/* CONSTCOND */0) 1260 1261 ovflw = 0; 1262 PRWORD(ovflw, "%-*s", 10, 0, pp->pr_wchan); 1263 PRWORD(ovflw, " %*u", 4, 1, pp->pr_size); 1264 PRWORD(ovflw, " %*lu", 9, 1, pp->pr_nget); 1265 PRWORD(ovflw, " %*lu", 5, 1, pp->pr_nfail); 1266 PRWORD(ovflw, " %*lu", 9, 1, pp->pr_nput); 1267 PRWORD(ovflw, " %*lu", 6, 1, pp->pr_npagealloc); 1268 PRWORD(ovflw, " %*lu", 6, 1, pp->pr_npagefree); 1269 PRWORD(ovflw, " %*d", 6, 1, pp->pr_npages); 1270 PRWORD(ovflw, " %*d", 6, 1, pp->pr_hiwat); 1271 PRWORD(ovflw, " %*d", 6, 1, pp->pr_minpages); 1272 PRWORD(ovflw, " %*s", 6, 1, maxp); 1273 PRWORD(ovflw, " %*lu\n", 5, 1, pp->pr_nidle); 1274 1275 pool_chk(pp); 1276 } 1277 } 1278 #endif /* DDB */ 1279 1280 #if defined(POOL_DEBUG) || defined(DDB) 1281 int 1282 pool_chk_page(struct pool *pp, struct pool_item_header *ph, int expected) 1283 { 1284 struct pool_item *pi; 1285 caddr_t page; 1286 int n; 1287 const char *label = pp->pr_wchan; 1288 1289 page = (caddr_t)((u_long)ph & pp->pr_alloc->pa_pagemask); 1290 if (page != ph->ph_page && 1291 (pp->pr_roflags & PR_PHINPAGE) != 0) { 1292 printf("%s: ", label); 1293 printf("pool(%p:%s): page inconsistency: page %p; " 1294 "at page head addr %p (p %p)\n", 1295 pp, pp->pr_wchan, ph->ph_page, ph, page); 1296 return 1; 1297 } 1298 1299 for (pi = XSIMPLEQ_FIRST(&ph->ph_itemlist), n = 0; 1300 pi != NULL; 1301 pi = XSIMPLEQ_NEXT(&ph->ph_itemlist, pi, pi_list), n++) { 1302 1303 #ifdef DIAGNOSTIC 1304 if (pi->pi_magic != poison_value(pi)) { 1305 printf("%s: ", label); 1306 printf("pool(%s): free list modified: " 1307 "page %p; item ordinal %d; addr %p " 1308 "(p %p); offset 0x%x=0x%x\n", 1309 pp->pr_wchan, ph->ph_page, n, pi, page, 1310 0, pi->pi_magic); 1311 } 1312 if (pool_debug && ph->ph_magic) { 1313 size_t pidx; 1314 int pval; 1315 if (poison_check(pi + 1, pp->pr_size - sizeof(*pi), 1316 &pidx, &pval)) { 1317 int *ip = (int *)(pi + 1); 1318 printf("pool(%s): free list modified: " 1319 "page %p; item ordinal %d; addr %p " 1320 "(p %p); offset 0x%zx=0x%x\n", 1321 pp->pr_wchan, ph->ph_page, n, pi, 1322 page, pidx * sizeof(int), ip[pidx]); 1323 } 1324 } 1325 #endif /* DIAGNOSTIC */ 1326 page = 1327 (caddr_t)((u_long)pi & pp->pr_alloc->pa_pagemask); 1328 if (page == ph->ph_page) 1329 continue; 1330 1331 printf("%s: ", label); 1332 printf("pool(%p:%s): page inconsistency: page %p;" 1333 " item ordinal %d; addr %p (p %p)\n", pp, 1334 pp->pr_wchan, ph->ph_page, n, pi, page); 1335 return 1; 1336 } 1337 if (n + ph->ph_nmissing != pp->pr_itemsperpage) { 1338 printf("pool(%p:%s): page inconsistency: page %p;" 1339 " %d on list, %d missing, %d items per page\n", pp, 1340 pp->pr_wchan, ph->ph_page, n, ph->ph_nmissing, 1341 pp->pr_itemsperpage); 1342 return 1; 1343 } 1344 if (expected >= 0 && n != expected) { 1345 printf("pool(%p:%s): page inconsistency: page %p;" 1346 " %d on list, %d missing, %d expected\n", pp, 1347 pp->pr_wchan, ph->ph_page, n, ph->ph_nmissing, 1348 expected); 1349 return 1; 1350 } 1351 return 0; 1352 } 1353 1354 int 1355 pool_chk(struct pool *pp) 1356 { 1357 struct pool_item_header *ph; 1358 int r = 0; 1359 1360 LIST_FOREACH(ph, &pp->pr_emptypages, ph_pagelist) 1361 r += pool_chk_page(pp, ph, pp->pr_itemsperpage); 1362 LIST_FOREACH(ph, &pp->pr_fullpages, ph_pagelist) 1363 r += pool_chk_page(pp, ph, 0); 1364 LIST_FOREACH(ph, &pp->pr_partpages, ph_pagelist) 1365 r += pool_chk_page(pp, ph, -1); 1366 1367 return (r); 1368 } 1369 #endif /* defined(POOL_DEBUG) || defined(DDB) */ 1370 1371 #ifdef DDB 1372 void 1373 pool_walk(struct pool *pp, int full, 1374 int (*pr)(const char *, ...) /* __attribute__((__format__(__kprintf__,1,2))) */, 1375 void (*func)(void *, int, int (*)(const char *, ...) /* __attribute__((__format__(__kprintf__,1,2))) */)) 1376 { 1377 struct pool_item_header *ph; 1378 struct pool_item *pi; 1379 caddr_t cp; 1380 int n; 1381 1382 LIST_FOREACH(ph, &pp->pr_fullpages, ph_pagelist) { 1383 cp = ph->ph_colored; 1384 n = ph->ph_nmissing; 1385 1386 while (n--) { 1387 func(cp, full, pr); 1388 cp += pp->pr_size; 1389 } 1390 } 1391 1392 LIST_FOREACH(ph, &pp->pr_partpages, ph_pagelist) { 1393 cp = ph->ph_colored; 1394 n = ph->ph_nmissing; 1395 1396 do { 1397 XSIMPLEQ_FOREACH(pi, &ph->ph_itemlist, pi_list) { 1398 if (cp == (caddr_t)pi) 1399 break; 1400 } 1401 if (cp != (caddr_t)pi) { 1402 func(cp, full, pr); 1403 n--; 1404 } 1405 1406 cp += pp->pr_size; 1407 } while (n > 0); 1408 } 1409 } 1410 #endif 1411 1412 /* 1413 * We have three different sysctls. 1414 * kern.pool.npools - the number of pools. 1415 * kern.pool.pool.<pool#> - the pool struct for the pool#. 1416 * kern.pool.name.<pool#> - the name for pool#. 1417 */ 1418 int 1419 sysctl_dopool(int *name, u_int namelen, char *where, size_t *sizep) 1420 { 1421 struct pool *pp, *foundpool = NULL; 1422 size_t buflen = where != NULL ? *sizep : 0; 1423 int npools = 0, s; 1424 unsigned int lookfor; 1425 size_t len; 1426 1427 switch (*name) { 1428 case KERN_POOL_NPOOLS: 1429 if (namelen != 1 || buflen != sizeof(int)) 1430 return (EINVAL); 1431 lookfor = 0; 1432 break; 1433 case KERN_POOL_NAME: 1434 if (namelen != 2 || buflen < 1) 1435 return (EINVAL); 1436 lookfor = name[1]; 1437 break; 1438 case KERN_POOL_POOL: 1439 if (namelen != 2 || buflen != sizeof(struct pool)) 1440 return (EINVAL); 1441 lookfor = name[1]; 1442 break; 1443 default: 1444 return (EINVAL); 1445 } 1446 1447 s = splvm(); 1448 1449 SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist) { 1450 npools++; 1451 if (lookfor == pp->pr_serial) { 1452 foundpool = pp; 1453 break; 1454 } 1455 } 1456 1457 splx(s); 1458 1459 if (*name != KERN_POOL_NPOOLS && foundpool == NULL) 1460 return (ENOENT); 1461 1462 switch (*name) { 1463 case KERN_POOL_NPOOLS: 1464 return copyout(&npools, where, buflen); 1465 case KERN_POOL_NAME: 1466 len = strlen(foundpool->pr_wchan) + 1; 1467 if (*sizep < len) 1468 return (ENOMEM); 1469 *sizep = len; 1470 return copyout(foundpool->pr_wchan, where, len); 1471 case KERN_POOL_POOL: 1472 return copyout(foundpool, where, buflen); 1473 } 1474 /* NOTREACHED */ 1475 return (0); /* XXX - Stupid gcc */ 1476 } 1477 1478 /* 1479 * Pool backend allocators. 1480 * 1481 * Each pool has a backend allocator that handles allocation, deallocation 1482 */ 1483 void *pool_page_alloc(struct pool *, int, int *); 1484 void pool_page_free(struct pool *, void *); 1485 1486 /* 1487 * safe for interrupts, name preserved for compat this is the default 1488 * allocator 1489 */ 1490 struct pool_allocator pool_allocator_nointr = { 1491 pool_page_alloc, pool_page_free, 0, 1492 }; 1493 1494 /* 1495 * XXX - we have at least three different resources for the same allocation 1496 * and each resource can be depleted. First we have the ready elements in 1497 * the pool. Then we have the resource (typically a vm_map) for this 1498 * allocator, then we have physical memory. Waiting for any of these can 1499 * be unnecessary when any other is freed, but the kernel doesn't support 1500 * sleeping on multiple addresses, so we have to fake. The caller sleeps on 1501 * the pool (so that we can be awakened when an item is returned to the pool), 1502 * but we set PA_WANT on the allocator. When a page is returned to 1503 * the allocator and PA_WANT is set pool_allocator_free will wakeup all 1504 * sleeping pools belonging to this allocator. (XXX - thundering herd). 1505 * We also wake up the allocator in case someone without a pool (malloc) 1506 * is sleeping waiting for this allocator. 1507 */ 1508 1509 void * 1510 pool_allocator_alloc(struct pool *pp, int flags, int *slowdown) 1511 { 1512 boolean_t waitok = (flags & PR_WAITOK) ? TRUE : FALSE; 1513 void *v; 1514 1515 if (waitok) 1516 mtx_leave(&pp->pr_mtx); 1517 v = pp->pr_alloc->pa_alloc(pp, flags, slowdown); 1518 if (waitok) 1519 mtx_enter(&pp->pr_mtx); 1520 1521 return (v); 1522 } 1523 1524 void 1525 pool_allocator_free(struct pool *pp, void *v) 1526 { 1527 struct pool_allocator *pa = pp->pr_alloc; 1528 1529 (*pa->pa_free)(pp, v); 1530 } 1531 1532 void * 1533 pool_page_alloc(struct pool *pp, int flags, int *slowdown) 1534 { 1535 struct kmem_dyn_mode kd = KMEM_DYN_INITIALIZER; 1536 1537 kd.kd_waitok = (flags & PR_WAITOK); 1538 kd.kd_slowdown = slowdown; 1539 1540 return (km_alloc(PAGE_SIZE, &kv_page, pp->pr_crange, &kd)); 1541 } 1542 1543 void 1544 pool_page_free(struct pool *pp, void *v) 1545 { 1546 km_free(v, PAGE_SIZE, &kv_page, pp->pr_crange); 1547 } 1548 1549 void * 1550 pool_large_alloc(struct pool *pp, int flags, int *slowdown) 1551 { 1552 struct kmem_dyn_mode kd = KMEM_DYN_INITIALIZER; 1553 void *v; 1554 int s; 1555 1556 kd.kd_waitok = (flags & PR_WAITOK); 1557 kd.kd_slowdown = slowdown; 1558 1559 s = splvm(); 1560 v = km_alloc(pp->pr_alloc->pa_pagesz, &kv_intrsafe, pp->pr_crange, 1561 &kd); 1562 splx(s); 1563 1564 return (v); 1565 } 1566 1567 void 1568 pool_large_free(struct pool *pp, void *v) 1569 { 1570 int s; 1571 1572 s = splvm(); 1573 km_free(v, pp->pr_alloc->pa_pagesz, &kv_intrsafe, pp->pr_crange); 1574 splx(s); 1575 } 1576 1577 void * 1578 pool_large_alloc_ni(struct pool *pp, int flags, int *slowdown) 1579 { 1580 struct kmem_dyn_mode kd = KMEM_DYN_INITIALIZER; 1581 1582 kd.kd_waitok = (flags & PR_WAITOK); 1583 kd.kd_slowdown = slowdown; 1584 1585 return (km_alloc(pp->pr_alloc->pa_pagesz, &kv_any, pp->pr_crange, &kd)); 1586 } 1587 1588 void 1589 pool_large_free_ni(struct pool *pp, void *v) 1590 { 1591 km_free(v, pp->pr_alloc->pa_pagesz, &kv_any, pp->pr_crange); 1592 } 1593