1 /* $OpenBSD: subr_pool.c,v 1.93 2010/06/27 03:03:48 thib Exp $ */ 2 /* $NetBSD: subr_pool.c,v 1.61 2001/09/26 07:14:56 chs Exp $ */ 3 4 /*- 5 * Copyright (c) 1997, 1999, 2000 The NetBSD Foundation, Inc. 6 * All rights reserved. 7 * 8 * This code is derived from software contributed to The NetBSD Foundation 9 * by Paul Kranenburg; by Jason R. Thorpe of the Numerical Aerospace 10 * Simulation Facility, NASA Ames Research Center. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 23 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 24 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 25 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31 * POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include <sys/param.h> 35 #include <sys/systm.h> 36 #include <sys/proc.h> 37 #include <sys/errno.h> 38 #include <sys/kernel.h> 39 #include <sys/malloc.h> 40 #include <sys/pool.h> 41 #include <sys/syslog.h> 42 #include <sys/sysctl.h> 43 44 #include <uvm/uvm.h> 45 46 47 /* 48 * Pool resource management utility. 49 * 50 * Memory is allocated in pages which are split into pieces according to 51 * the pool item size. Each page is kept on one of three lists in the 52 * pool structure: `pr_emptypages', `pr_fullpages' and `pr_partpages', 53 * for empty, full and partially-full pages respectively. The individual 54 * pool items are on a linked list headed by `ph_itemlist' in each page 55 * header. The memory for building the page list is either taken from 56 * the allocated pages themselves (for small pool items) or taken from 57 * an internal pool of page headers (`phpool'). 58 */ 59 60 /* List of all pools */ 61 TAILQ_HEAD(,pool) pool_head = TAILQ_HEAD_INITIALIZER(pool_head); 62 63 /* Private pool for page header structures */ 64 struct pool phpool; 65 66 struct pool_item_header { 67 /* Page headers */ 68 LIST_ENTRY(pool_item_header) 69 ph_pagelist; /* pool page list */ 70 TAILQ_HEAD(,pool_item) ph_itemlist; /* chunk list for this page */ 71 RB_ENTRY(pool_item_header) 72 ph_node; /* Off-page page headers */ 73 int ph_nmissing; /* # of chunks in use */ 74 caddr_t ph_page; /* this page's address */ 75 caddr_t ph_colored; /* page's colored address */ 76 int ph_pagesize; 77 }; 78 79 struct pool_item { 80 #ifdef DIAGNOSTIC 81 u_int32_t pi_magic; 82 #endif 83 /* Other entries use only this list entry */ 84 TAILQ_ENTRY(pool_item) pi_list; 85 }; 86 87 #ifdef DEADBEEF1 88 #define PI_MAGIC DEADBEEF1 89 #else 90 #define PI_MAGIC 0xdeafbeef 91 #endif 92 93 #define POOL_NEEDS_CATCHUP(pp) \ 94 ((pp)->pr_nitems < (pp)->pr_minitems) 95 96 /* 97 * Default constraint range for pools, that cover the whole 98 * address space. 99 */ 100 struct uvm_constraint_range pool_full_range = { 0x0, (paddr_t)-1 }; 101 102 /* 103 * Every pool gets a unique serial number assigned to it. If this counter 104 * wraps, we're screwed, but we shouldn't create so many pools anyway. 105 */ 106 unsigned int pool_serial; 107 108 int pool_catchup(struct pool *); 109 void pool_prime_page(struct pool *, caddr_t, struct pool_item_header *); 110 void pool_update_curpage(struct pool *); 111 void *pool_do_get(struct pool *, int); 112 void pool_do_put(struct pool *, void *); 113 void pr_rmpage(struct pool *, struct pool_item_header *, 114 struct pool_pagelist *); 115 int pool_chk_page(struct pool *, const char *, struct pool_item_header *); 116 struct pool_item_header *pool_alloc_item_header(struct pool *, caddr_t , int); 117 118 void *pool_allocator_alloc(struct pool *, int, int *); 119 void pool_allocator_free(struct pool *, void *); 120 121 /* 122 * XXX - quick hack. For pools with large items we want to use a special 123 * allocator. For now, instead of having the allocator figure out 124 * the allocation size from the pool (which can be done trivially 125 * with round_page(pr_itemsperpage * pr_size)) which would require 126 * lots of changes everywhere, we just create allocators for each 127 * size. We limit those to 128 pages. 128 */ 129 #define POOL_LARGE_MAXPAGES 128 130 struct pool_allocator pool_allocator_large[POOL_LARGE_MAXPAGES]; 131 struct pool_allocator pool_allocator_large_ni[POOL_LARGE_MAXPAGES]; 132 void *pool_large_alloc(struct pool *, int, int *); 133 void pool_large_free(struct pool *, void *); 134 void *pool_large_alloc_ni(struct pool *, int, int *); 135 void pool_large_free_ni(struct pool *, void *); 136 137 138 #ifdef DDB 139 void pool_print_pagelist(struct pool_pagelist *, 140 int (*)(const char *, ...)); 141 void pool_print1(struct pool *, const char *, int (*)(const char *, ...)); 142 #endif 143 144 #define pool_sleep(pl) msleep(pl, &pl->pr_mtx, PSWP, pl->pr_wchan, 0) 145 146 static __inline int 147 phtree_compare(struct pool_item_header *a, struct pool_item_header *b) 148 { 149 long diff = (vaddr_t)a->ph_page - (vaddr_t)b->ph_page; 150 if (diff < 0) 151 return -(-diff >= a->ph_pagesize); 152 else if (diff > 0) 153 return (diff >= b->ph_pagesize); 154 else 155 return (0); 156 } 157 158 RB_PROTOTYPE(phtree, pool_item_header, ph_node, phtree_compare); 159 RB_GENERATE(phtree, pool_item_header, ph_node, phtree_compare); 160 161 /* 162 * Return the pool page header based on page address. 163 */ 164 static __inline struct pool_item_header * 165 pr_find_pagehead(struct pool *pp, void *v) 166 { 167 struct pool_item_header *ph, tmp; 168 169 if ((pp->pr_roflags & PR_PHINPAGE) != 0) { 170 caddr_t page; 171 172 page = (caddr_t)((vaddr_t)v & pp->pr_alloc->pa_pagemask); 173 174 return ((struct pool_item_header *)(page + pp->pr_phoffset)); 175 } 176 177 /* 178 * The trick we're using in the tree compare function is to compare 179 * two elements equal when they overlap. We want to return the 180 * page header that belongs to the element just before this address. 181 * We don't want this element to compare equal to the next element, 182 * so the compare function takes the pagesize from the lower element. 183 * If this header is the lower, its pagesize is zero, so it can't 184 * overlap with the next header. But if the header we're looking for 185 * is lower, we'll use its pagesize and it will overlap and return 186 * equal. 187 */ 188 tmp.ph_page = v; 189 tmp.ph_pagesize = 0; 190 ph = RB_FIND(phtree, &pp->pr_phtree, &tmp); 191 192 if (ph) { 193 KASSERT(ph->ph_page <= (caddr_t)v); 194 KASSERT(ph->ph_page + ph->ph_pagesize > (caddr_t)v); 195 } 196 return ph; 197 } 198 199 /* 200 * Remove a page from the pool. 201 */ 202 void 203 pr_rmpage(struct pool *pp, struct pool_item_header *ph, 204 struct pool_pagelist *pq) 205 { 206 207 /* 208 * If the page was idle, decrement the idle page count. 209 */ 210 if (ph->ph_nmissing == 0) { 211 #ifdef DIAGNOSTIC 212 if (pp->pr_nidle == 0) 213 panic("pr_rmpage: nidle inconsistent"); 214 if (pp->pr_nitems < pp->pr_itemsperpage) 215 panic("pr_rmpage: nitems inconsistent"); 216 #endif 217 pp->pr_nidle--; 218 } 219 220 pp->pr_nitems -= pp->pr_itemsperpage; 221 222 /* 223 * Unlink a page from the pool and release it (or queue it for release). 224 */ 225 LIST_REMOVE(ph, ph_pagelist); 226 if ((pp->pr_roflags & PR_PHINPAGE) == 0) 227 RB_REMOVE(phtree, &pp->pr_phtree, ph); 228 if (pq) { 229 LIST_INSERT_HEAD(pq, ph, ph_pagelist); 230 } else { 231 pool_allocator_free(pp, ph->ph_page); 232 if ((pp->pr_roflags & PR_PHINPAGE) == 0) 233 pool_put(&phpool, ph); 234 } 235 pp->pr_npages--; 236 pp->pr_npagefree++; 237 238 pool_update_curpage(pp); 239 } 240 241 /* 242 * Initialize the given pool resource structure. 243 * 244 * We export this routine to allow other kernel parts to declare 245 * static pools that must be initialized before malloc() is available. 246 */ 247 void 248 pool_init(struct pool *pp, size_t size, u_int align, u_int ioff, int flags, 249 const char *wchan, struct pool_allocator *palloc) 250 { 251 int off, slack; 252 253 #ifdef MALLOC_DEBUG 254 if ((flags & PR_DEBUG) && (ioff != 0 || align != 0)) 255 flags &= ~PR_DEBUG; 256 #endif 257 /* 258 * Check arguments and construct default values. 259 */ 260 if (palloc == NULL) { 261 if (size > PAGE_SIZE) { 262 int psize; 263 264 /* 265 * XXX - should take align into account as well. 266 */ 267 if (size == round_page(size)) 268 psize = size / PAGE_SIZE; 269 else 270 psize = PAGE_SIZE / roundup(size % PAGE_SIZE, 271 1024); 272 if (psize > POOL_LARGE_MAXPAGES) 273 psize = POOL_LARGE_MAXPAGES; 274 if (flags & PR_WAITOK) 275 palloc = &pool_allocator_large_ni[psize-1]; 276 else 277 palloc = &pool_allocator_large[psize-1]; 278 if (palloc->pa_pagesz == 0) { 279 palloc->pa_pagesz = psize * PAGE_SIZE; 280 if (flags & PR_WAITOK) { 281 palloc->pa_alloc = pool_large_alloc_ni; 282 palloc->pa_free = pool_large_free_ni; 283 } else { 284 palloc->pa_alloc = pool_large_alloc; 285 palloc->pa_free = pool_large_free; 286 } 287 } 288 } else { 289 palloc = &pool_allocator_nointr; 290 } 291 } 292 if (palloc->pa_pagesz == 0) { 293 palloc->pa_pagesz = PAGE_SIZE; 294 } 295 if (palloc->pa_pagemask == 0) { 296 palloc->pa_pagemask = ~(palloc->pa_pagesz - 1); 297 palloc->pa_pageshift = ffs(palloc->pa_pagesz) - 1; 298 } 299 300 if (align == 0) 301 align = ALIGN(1); 302 303 if (size < sizeof(struct pool_item)) 304 size = sizeof(struct pool_item); 305 306 size = roundup(size, align); 307 #ifdef DIAGNOSTIC 308 if (size > palloc->pa_pagesz) 309 panic("pool_init: pool item size (%lu) too large", 310 (u_long)size); 311 #endif 312 313 /* 314 * Initialize the pool structure. 315 */ 316 LIST_INIT(&pp->pr_emptypages); 317 LIST_INIT(&pp->pr_fullpages); 318 LIST_INIT(&pp->pr_partpages); 319 pp->pr_curpage = NULL; 320 pp->pr_npages = 0; 321 pp->pr_minitems = 0; 322 pp->pr_minpages = 0; 323 pp->pr_maxpages = 8; 324 pp->pr_roflags = flags; 325 pp->pr_flags = 0; 326 pp->pr_size = size; 327 pp->pr_align = align; 328 pp->pr_wchan = wchan; 329 pp->pr_alloc = palloc; 330 pp->pr_nitems = 0; 331 pp->pr_nout = 0; 332 pp->pr_hardlimit = UINT_MAX; 333 pp->pr_hardlimit_warning = NULL; 334 pp->pr_hardlimit_ratecap.tv_sec = 0; 335 pp->pr_hardlimit_ratecap.tv_usec = 0; 336 pp->pr_hardlimit_warning_last.tv_sec = 0; 337 pp->pr_hardlimit_warning_last.tv_usec = 0; 338 pp->pr_serial = ++pool_serial; 339 if (pool_serial == 0) 340 panic("pool_init: too much uptime"); 341 342 /* constructor, destructor, and arg */ 343 pp->pr_ctor = NULL; 344 pp->pr_dtor = NULL; 345 pp->pr_arg = NULL; 346 347 /* 348 * Decide whether to put the page header off page to avoid 349 * wasting too large a part of the page. Off-page page headers 350 * go into an RB tree, so we can match a returned item with 351 * its header based on the page address. 352 * We use 1/16 of the page size as the threshold (XXX: tune) 353 */ 354 if (pp->pr_size < palloc->pa_pagesz/16 && pp->pr_size < PAGE_SIZE) { 355 /* Use the end of the page for the page header */ 356 pp->pr_roflags |= PR_PHINPAGE; 357 pp->pr_phoffset = off = palloc->pa_pagesz - 358 ALIGN(sizeof(struct pool_item_header)); 359 } else { 360 /* The page header will be taken from our page header pool */ 361 pp->pr_phoffset = 0; 362 off = palloc->pa_pagesz; 363 RB_INIT(&pp->pr_phtree); 364 } 365 366 /* 367 * Alignment is to take place at `ioff' within the item. This means 368 * we must reserve up to `align - 1' bytes on the page to allow 369 * appropriate positioning of each item. 370 * 371 * Silently enforce `0 <= ioff < align'. 372 */ 373 pp->pr_itemoffset = ioff = ioff % align; 374 pp->pr_itemsperpage = (off - ((align - ioff) % align)) / pp->pr_size; 375 KASSERT(pp->pr_itemsperpage != 0); 376 377 /* 378 * Use the slack between the chunks and the page header 379 * for "cache coloring". 380 */ 381 slack = off - pp->pr_itemsperpage * pp->pr_size; 382 pp->pr_maxcolor = (slack / align) * align; 383 pp->pr_curcolor = 0; 384 385 pp->pr_nget = 0; 386 pp->pr_nfail = 0; 387 pp->pr_nput = 0; 388 pp->pr_npagealloc = 0; 389 pp->pr_npagefree = 0; 390 pp->pr_hiwat = 0; 391 pp->pr_nidle = 0; 392 393 pp->pr_ipl = -1; 394 mtx_init(&pp->pr_mtx, IPL_NONE); 395 396 if (phpool.pr_size == 0) { 397 pool_init(&phpool, sizeof(struct pool_item_header), 0, 0, 398 0, "phpool", NULL); 399 pool_setipl(&phpool, IPL_HIGH); 400 } 401 402 /* pglistalloc/constraint parameters */ 403 pp->pr_crange = &pool_full_range; 404 pp->pr_pa_nsegs = 0; 405 406 /* Insert this into the list of all pools. */ 407 TAILQ_INSERT_HEAD(&pool_head, pp, pr_poollist); 408 } 409 410 void 411 pool_setipl(struct pool *pp, int ipl) 412 { 413 pp->pr_ipl = ipl; 414 mtx_init(&pp->pr_mtx, ipl); 415 } 416 417 /* 418 * Decommission a pool resource. 419 */ 420 void 421 pool_destroy(struct pool *pp) 422 { 423 struct pool_item_header *ph; 424 425 #ifdef DIAGNOSTIC 426 if (pp->pr_nout != 0) 427 panic("pool_destroy: pool busy: still out: %u", pp->pr_nout); 428 #endif 429 430 /* Remove all pages */ 431 while ((ph = LIST_FIRST(&pp->pr_emptypages)) != NULL) 432 pr_rmpage(pp, ph, NULL); 433 KASSERT(LIST_EMPTY(&pp->pr_fullpages)); 434 KASSERT(LIST_EMPTY(&pp->pr_partpages)); 435 436 /* Remove from global pool list */ 437 TAILQ_REMOVE(&pool_head, pp, pr_poollist); 438 } 439 440 struct pool_item_header * 441 pool_alloc_item_header(struct pool *pp, caddr_t storage, int flags) 442 { 443 struct pool_item_header *ph; 444 445 if ((pp->pr_roflags & PR_PHINPAGE) != 0) 446 ph = (struct pool_item_header *)(storage + pp->pr_phoffset); 447 else 448 ph = pool_get(&phpool, flags & ~(PR_WAITOK | PR_ZERO)); 449 450 return (ph); 451 } 452 453 /* 454 * Grab an item from the pool; must be called at appropriate spl level 455 */ 456 void * 457 pool_get(struct pool *pp, int flags) 458 { 459 void *v; 460 461 #ifdef DIAGNOSTIC 462 if ((flags & PR_WAITOK) != 0) 463 splassert(IPL_NONE); 464 #endif /* DIAGNOSTIC */ 465 466 mtx_enter(&pp->pr_mtx); 467 v = pool_do_get(pp, flags); 468 mtx_leave(&pp->pr_mtx); 469 if (v == NULL) 470 return (v); 471 472 if (pp->pr_ctor) { 473 if (flags & PR_ZERO) 474 panic("pool_get: PR_ZERO when ctor set"); 475 if (pp->pr_ctor(pp->pr_arg, v, flags)) { 476 mtx_enter(&pp->pr_mtx); 477 pool_do_put(pp, v); 478 mtx_leave(&pp->pr_mtx); 479 v = NULL; 480 } 481 } else { 482 if (flags & PR_ZERO) 483 memset(v, 0, pp->pr_size); 484 } 485 if (v != NULL) 486 pp->pr_nget++; 487 return (v); 488 } 489 490 void * 491 pool_do_get(struct pool *pp, int flags) 492 { 493 struct pool_item *pi; 494 struct pool_item_header *ph; 495 void *v; 496 int slowdown = 0; 497 #if defined(DIAGNOSTIC) && defined(POOL_DEBUG) 498 int i, *ip; 499 #endif 500 501 #ifdef MALLOC_DEBUG 502 if (pp->pr_roflags & PR_DEBUG) { 503 void *addr; 504 505 addr = NULL; 506 debug_malloc(pp->pr_size, M_DEBUG, 507 (flags & PR_WAITOK) ? M_WAITOK : M_NOWAIT, &addr); 508 return (addr); 509 } 510 #endif 511 512 startover: 513 /* 514 * Check to see if we've reached the hard limit. If we have, 515 * and we can wait, then wait until an item has been returned to 516 * the pool. 517 */ 518 #ifdef DIAGNOSTIC 519 if (__predict_false(pp->pr_nout > pp->pr_hardlimit)) 520 panic("pool_do_get: %s: crossed hard limit", pp->pr_wchan); 521 #endif 522 if (__predict_false(pp->pr_nout == pp->pr_hardlimit)) { 523 if ((flags & PR_WAITOK) && !(flags & PR_LIMITFAIL)) { 524 /* 525 * XXX: A warning isn't logged in this case. Should 526 * it be? 527 */ 528 pp->pr_flags |= PR_WANTED; 529 pool_sleep(pp); 530 goto startover; 531 } 532 533 /* 534 * Log a message that the hard limit has been hit. 535 */ 536 if (pp->pr_hardlimit_warning != NULL && 537 ratecheck(&pp->pr_hardlimit_warning_last, 538 &pp->pr_hardlimit_ratecap)) 539 log(LOG_ERR, "%s\n", pp->pr_hardlimit_warning); 540 541 pp->pr_nfail++; 542 return (NULL); 543 } 544 545 /* 546 * The convention we use is that if `curpage' is not NULL, then 547 * it points at a non-empty bucket. In particular, `curpage' 548 * never points at a page header which has PR_PHINPAGE set and 549 * has no items in its bucket. 550 */ 551 if ((ph = pp->pr_curpage) == NULL) { 552 #ifdef DIAGNOSTIC 553 if (pp->pr_nitems != 0) { 554 printf("pool_do_get: %s: curpage NULL, nitems %u\n", 555 pp->pr_wchan, pp->pr_nitems); 556 panic("pool_do_get: nitems inconsistent"); 557 } 558 #endif 559 560 /* 561 * Call the back-end page allocator for more memory. 562 */ 563 v = pool_allocator_alloc(pp, flags, &slowdown); 564 if (__predict_true(v != NULL)) 565 ph = pool_alloc_item_header(pp, v, flags); 566 567 if (__predict_false(v == NULL || ph == NULL)) { 568 if (v != NULL) 569 pool_allocator_free(pp, v); 570 571 if ((flags & PR_WAITOK) == 0) { 572 pp->pr_nfail++; 573 return (NULL); 574 } 575 576 /* 577 * Wait for items to be returned to this pool. 578 * 579 * XXX: maybe we should wake up once a second and 580 * try again? 581 */ 582 pp->pr_flags |= PR_WANTED; 583 pool_sleep(pp); 584 goto startover; 585 } 586 587 /* We have more memory; add it to the pool */ 588 pool_prime_page(pp, v, ph); 589 pp->pr_npagealloc++; 590 591 if (slowdown && (flags & PR_WAITOK)) { 592 mtx_leave(&pp->pr_mtx); 593 yield(); 594 mtx_enter(&pp->pr_mtx); 595 } 596 597 /* Start the allocation process over. */ 598 goto startover; 599 } 600 if (__predict_false((v = pi = TAILQ_FIRST(&ph->ph_itemlist)) == NULL)) { 601 panic("pool_do_get: %s: page empty", pp->pr_wchan); 602 } 603 #ifdef DIAGNOSTIC 604 if (__predict_false(pp->pr_nitems == 0)) { 605 printf("pool_do_get: %s: items on itemlist, nitems %u\n", 606 pp->pr_wchan, pp->pr_nitems); 607 panic("pool_do_get: nitems inconsistent"); 608 } 609 #endif 610 611 #ifdef DIAGNOSTIC 612 if (__predict_false(pi->pi_magic != PI_MAGIC)) 613 panic("pool_do_get(%s): free list modified: " 614 "page %p; item addr %p; offset 0x%x=0x%x", 615 pp->pr_wchan, ph->ph_page, pi, 0, pi->pi_magic); 616 #ifdef POOL_DEBUG 617 for (ip = (int *)pi, i = sizeof(*pi) / sizeof(int); 618 i < pp->pr_size / sizeof(int); i++) { 619 if (ip[i] != PI_MAGIC) { 620 panic("pool_do_get(%s): free list modified: " 621 "page %p; item addr %p; offset 0x%x=0x%x", 622 pp->pr_wchan, ph->ph_page, pi, 623 i * sizeof(int), ip[i]); 624 } 625 } 626 #endif /* POOL_DEBUG */ 627 #endif /* DIAGNOSTIC */ 628 629 /* 630 * Remove from item list. 631 */ 632 TAILQ_REMOVE(&ph->ph_itemlist, pi, pi_list); 633 pp->pr_nitems--; 634 pp->pr_nout++; 635 if (ph->ph_nmissing == 0) { 636 #ifdef DIAGNOSTIC 637 if (__predict_false(pp->pr_nidle == 0)) 638 panic("pool_do_get: nidle inconsistent"); 639 #endif 640 pp->pr_nidle--; 641 642 /* 643 * This page was previously empty. Move it to the list of 644 * partially-full pages. This page is already curpage. 645 */ 646 LIST_REMOVE(ph, ph_pagelist); 647 LIST_INSERT_HEAD(&pp->pr_partpages, ph, ph_pagelist); 648 } 649 ph->ph_nmissing++; 650 if (TAILQ_EMPTY(&ph->ph_itemlist)) { 651 #ifdef DIAGNOSTIC 652 if (__predict_false(ph->ph_nmissing != pp->pr_itemsperpage)) { 653 panic("pool_do_get: %s: nmissing inconsistent", 654 pp->pr_wchan); 655 } 656 #endif 657 /* 658 * This page is now full. Move it to the full list 659 * and select a new current page. 660 */ 661 LIST_REMOVE(ph, ph_pagelist); 662 LIST_INSERT_HEAD(&pp->pr_fullpages, ph, ph_pagelist); 663 pool_update_curpage(pp); 664 } 665 666 /* 667 * If we have a low water mark and we are now below that low 668 * water mark, add more items to the pool. 669 */ 670 if (POOL_NEEDS_CATCHUP(pp) && pool_catchup(pp) != 0) { 671 /* 672 * XXX: Should we log a warning? Should we set up a timeout 673 * to try again in a second or so? The latter could break 674 * a caller's assumptions about interrupt protection, etc. 675 */ 676 } 677 return (v); 678 } 679 680 /* 681 * Return resource to the pool; must be called at appropriate spl level 682 */ 683 void 684 pool_put(struct pool *pp, void *v) 685 { 686 if (pp->pr_dtor) 687 pp->pr_dtor(pp->pr_arg, v); 688 mtx_enter(&pp->pr_mtx); 689 pool_do_put(pp, v); 690 mtx_leave(&pp->pr_mtx); 691 pp->pr_nput++; 692 } 693 694 /* 695 * Internal version of pool_put(). 696 */ 697 void 698 pool_do_put(struct pool *pp, void *v) 699 { 700 struct pool_item *pi = v; 701 struct pool_item_header *ph; 702 #if defined(DIAGNOSTIC) && defined(POOL_DEBUG) 703 int i, *ip; 704 #endif 705 706 if (v == NULL) 707 panic("pool_put of NULL"); 708 709 #ifdef MALLOC_DEBUG 710 if (pp->pr_roflags & PR_DEBUG) { 711 debug_free(v, M_DEBUG); 712 return; 713 } 714 #endif 715 716 #ifdef DIAGNOSTIC 717 if (pp->pr_ipl != -1) 718 splassert(pp->pr_ipl); 719 720 if (__predict_false(pp->pr_nout == 0)) { 721 printf("pool %s: putting with none out\n", 722 pp->pr_wchan); 723 panic("pool_do_put"); 724 } 725 #endif 726 727 if (__predict_false((ph = pr_find_pagehead(pp, v)) == NULL)) { 728 panic("pool_do_put: %s: page header missing", pp->pr_wchan); 729 } 730 731 /* 732 * Return to item list. 733 */ 734 #ifdef DIAGNOSTIC 735 pi->pi_magic = PI_MAGIC; 736 #ifdef POOL_DEBUG 737 for (ip = (int *)pi, i = sizeof(*pi)/sizeof(int); 738 i < pp->pr_size / sizeof(int); i++) 739 ip[i] = PI_MAGIC; 740 #endif /* POOL_DEBUG */ 741 #endif /* DIAGNOSTIC */ 742 743 TAILQ_INSERT_HEAD(&ph->ph_itemlist, pi, pi_list); 744 ph->ph_nmissing--; 745 pp->pr_nitems++; 746 pp->pr_nout--; 747 748 /* Cancel "pool empty" condition if it exists */ 749 if (pp->pr_curpage == NULL) 750 pp->pr_curpage = ph; 751 752 if (pp->pr_flags & PR_WANTED) { 753 pp->pr_flags &= ~PR_WANTED; 754 if (ph->ph_nmissing == 0) 755 pp->pr_nidle++; 756 wakeup(pp); 757 return; 758 } 759 760 /* 761 * If this page is now empty, do one of two things: 762 * 763 * (1) If we have more pages than the page high water mark, 764 * free the page back to the system. 765 * 766 * (2) Otherwise, move the page to the empty page list. 767 * 768 * Either way, select a new current page (so we use a partially-full 769 * page if one is available). 770 */ 771 if (ph->ph_nmissing == 0) { 772 pp->pr_nidle++; 773 if (pp->pr_nidle > pp->pr_maxpages) { 774 pr_rmpage(pp, ph, NULL); 775 } else { 776 LIST_REMOVE(ph, ph_pagelist); 777 LIST_INSERT_HEAD(&pp->pr_emptypages, ph, ph_pagelist); 778 } 779 pool_update_curpage(pp); 780 } 781 782 /* 783 * If the page was previously completely full, move it to the 784 * partially-full list and make it the current page. The next 785 * allocation will get the item from this page, instead of 786 * further fragmenting the pool. 787 */ 788 else if (ph->ph_nmissing == (pp->pr_itemsperpage - 1)) { 789 LIST_REMOVE(ph, ph_pagelist); 790 LIST_INSERT_HEAD(&pp->pr_partpages, ph, ph_pagelist); 791 pp->pr_curpage = ph; 792 } 793 } 794 795 /* 796 * Add N items to the pool. 797 */ 798 int 799 pool_prime(struct pool *pp, int n) 800 { 801 struct pool_item_header *ph; 802 caddr_t cp; 803 int newpages; 804 int slowdown; 805 806 mtx_enter(&pp->pr_mtx); 807 newpages = roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 808 809 while (newpages-- > 0) { 810 cp = pool_allocator_alloc(pp, PR_NOWAIT, &slowdown); 811 if (__predict_true(cp != NULL)) 812 ph = pool_alloc_item_header(pp, cp, PR_NOWAIT); 813 if (__predict_false(cp == NULL || ph == NULL)) { 814 if (cp != NULL) 815 pool_allocator_free(pp, cp); 816 break; 817 } 818 819 pool_prime_page(pp, cp, ph); 820 pp->pr_npagealloc++; 821 pp->pr_minpages++; 822 } 823 824 if (pp->pr_minpages >= pp->pr_maxpages) 825 pp->pr_maxpages = pp->pr_minpages + 1; /* XXX */ 826 827 mtx_leave(&pp->pr_mtx); 828 return (0); 829 } 830 831 /* 832 * Add a page worth of items to the pool. 833 * 834 * Note, we must be called with the pool descriptor LOCKED. 835 */ 836 void 837 pool_prime_page(struct pool *pp, caddr_t storage, struct pool_item_header *ph) 838 { 839 struct pool_item *pi; 840 caddr_t cp = storage; 841 unsigned int align = pp->pr_align; 842 unsigned int ioff = pp->pr_itemoffset; 843 int n; 844 #if defined(DIAGNOSTIC) && defined(POOL_DEBUG) 845 int i, *ip; 846 #endif 847 848 /* 849 * Insert page header. 850 */ 851 LIST_INSERT_HEAD(&pp->pr_emptypages, ph, ph_pagelist); 852 TAILQ_INIT(&ph->ph_itemlist); 853 ph->ph_page = storage; 854 ph->ph_pagesize = pp->pr_alloc->pa_pagesz; 855 ph->ph_nmissing = 0; 856 if ((pp->pr_roflags & PR_PHINPAGE) == 0) 857 RB_INSERT(phtree, &pp->pr_phtree, ph); 858 859 pp->pr_nidle++; 860 861 /* 862 * Color this page. 863 */ 864 cp = (caddr_t)(cp + pp->pr_curcolor); 865 if ((pp->pr_curcolor += align) > pp->pr_maxcolor) 866 pp->pr_curcolor = 0; 867 868 /* 869 * Adjust storage to apply alignment to `pr_itemoffset' in each item. 870 */ 871 if (ioff != 0) 872 cp = (caddr_t)(cp + (align - ioff)); 873 ph->ph_colored = cp; 874 875 /* 876 * Insert remaining chunks on the bucket list. 877 */ 878 n = pp->pr_itemsperpage; 879 pp->pr_nitems += n; 880 881 while (n--) { 882 pi = (struct pool_item *)cp; 883 884 KASSERT(((((vaddr_t)pi) + ioff) & (align - 1)) == 0); 885 886 /* Insert on page list */ 887 TAILQ_INSERT_TAIL(&ph->ph_itemlist, pi, pi_list); 888 889 #ifdef DIAGNOSTIC 890 pi->pi_magic = PI_MAGIC; 891 #ifdef POOL_DEBUG 892 for (ip = (int *)pi, i = sizeof(*pi)/sizeof(int); 893 i < pp->pr_size / sizeof(int); i++) 894 ip[i] = PI_MAGIC; 895 #endif /* POOL_DEBUG */ 896 #endif /* DIAGNOSTIC */ 897 cp = (caddr_t)(cp + pp->pr_size); 898 } 899 900 /* 901 * If the pool was depleted, point at the new page. 902 */ 903 if (pp->pr_curpage == NULL) 904 pp->pr_curpage = ph; 905 906 if (++pp->pr_npages > pp->pr_hiwat) 907 pp->pr_hiwat = pp->pr_npages; 908 } 909 910 /* 911 * Used by pool_get() when nitems drops below the low water mark. This 912 * is used to catch up pr_nitems with the low water mark. 913 * 914 * Note we never wait for memory here, we let the caller decide what to do. 915 */ 916 int 917 pool_catchup(struct pool *pp) 918 { 919 struct pool_item_header *ph; 920 caddr_t cp; 921 int error = 0; 922 int slowdown; 923 924 while (POOL_NEEDS_CATCHUP(pp)) { 925 /* 926 * Call the page back-end allocator for more memory. 927 */ 928 cp = pool_allocator_alloc(pp, PR_NOWAIT, &slowdown); 929 if (__predict_true(cp != NULL)) 930 ph = pool_alloc_item_header(pp, cp, PR_NOWAIT); 931 if (__predict_false(cp == NULL || ph == NULL)) { 932 if (cp != NULL) 933 pool_allocator_free(pp, cp); 934 error = ENOMEM; 935 break; 936 } 937 pool_prime_page(pp, cp, ph); 938 pp->pr_npagealloc++; 939 } 940 941 return (error); 942 } 943 944 void 945 pool_update_curpage(struct pool *pp) 946 { 947 948 pp->pr_curpage = LIST_FIRST(&pp->pr_partpages); 949 if (pp->pr_curpage == NULL) { 950 pp->pr_curpage = LIST_FIRST(&pp->pr_emptypages); 951 } 952 } 953 954 void 955 pool_setlowat(struct pool *pp, int n) 956 { 957 958 pp->pr_minitems = n; 959 pp->pr_minpages = (n == 0) 960 ? 0 961 : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 962 963 mtx_enter(&pp->pr_mtx); 964 /* Make sure we're caught up with the newly-set low water mark. */ 965 if (POOL_NEEDS_CATCHUP(pp) && pool_catchup(pp) != 0) { 966 /* 967 * XXX: Should we log a warning? Should we set up a timeout 968 * to try again in a second or so? The latter could break 969 * a caller's assumptions about interrupt protection, etc. 970 */ 971 } 972 mtx_leave(&pp->pr_mtx); 973 } 974 975 void 976 pool_sethiwat(struct pool *pp, int n) 977 { 978 979 pp->pr_maxpages = (n == 0) 980 ? 0 981 : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 982 } 983 984 int 985 pool_sethardlimit(struct pool *pp, u_int n, const char *warnmsg, int ratecap) 986 { 987 int error = 0; 988 989 if (n < pp->pr_nout) { 990 error = EINVAL; 991 goto done; 992 } 993 994 pp->pr_hardlimit = n; 995 pp->pr_hardlimit_warning = warnmsg; 996 pp->pr_hardlimit_ratecap.tv_sec = ratecap; 997 pp->pr_hardlimit_warning_last.tv_sec = 0; 998 pp->pr_hardlimit_warning_last.tv_usec = 0; 999 1000 /* 1001 * In-line version of pool_sethiwat(). 1002 */ 1003 pp->pr_maxpages = (n == 0 || n == UINT_MAX) 1004 ? n 1005 : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 1006 1007 done: 1008 return (error); 1009 } 1010 1011 void 1012 pool_set_constraints(struct pool *pp, struct uvm_constraint_range *range, 1013 int nsegs) 1014 { 1015 /* 1016 * Subsequent changes to the constrictions are only 1017 * allowed to make them _more_ strict. 1018 */ 1019 KASSERT(pp->pr_crange->ucr_high >= range->ucr_high && 1020 pp->pr_crange->ucr_low <= range->ucr_low); 1021 1022 pp->pr_crange = range; 1023 pp->pr_pa_nsegs = nsegs; 1024 } 1025 1026 void 1027 pool_set_ctordtor(struct pool *pp, int (*ctor)(void *, void *, int), 1028 void (*dtor)(void *, void *), void *arg) 1029 { 1030 pp->pr_ctor = ctor; 1031 pp->pr_dtor = dtor; 1032 pp->pr_arg = arg; 1033 } 1034 /* 1035 * Release all complete pages that have not been used recently. 1036 * 1037 * Returns non-zero if any pages have been reclaimed. 1038 */ 1039 int 1040 pool_reclaim(struct pool *pp) 1041 { 1042 struct pool_item_header *ph, *phnext; 1043 struct pool_pagelist pq; 1044 1045 LIST_INIT(&pq); 1046 1047 mtx_enter(&pp->pr_mtx); 1048 for (ph = LIST_FIRST(&pp->pr_emptypages); ph != NULL; ph = phnext) { 1049 phnext = LIST_NEXT(ph, ph_pagelist); 1050 1051 /* Check our minimum page claim */ 1052 if (pp->pr_npages <= pp->pr_minpages) 1053 break; 1054 1055 KASSERT(ph->ph_nmissing == 0); 1056 1057 /* 1058 * If freeing this page would put us below 1059 * the low water mark, stop now. 1060 */ 1061 if ((pp->pr_nitems - pp->pr_itemsperpage) < 1062 pp->pr_minitems) 1063 break; 1064 1065 pr_rmpage(pp, ph, &pq); 1066 } 1067 mtx_leave(&pp->pr_mtx); 1068 1069 if (LIST_EMPTY(&pq)) 1070 return (0); 1071 while ((ph = LIST_FIRST(&pq)) != NULL) { 1072 LIST_REMOVE(ph, ph_pagelist); 1073 pool_allocator_free(pp, ph->ph_page); 1074 if (pp->pr_roflags & PR_PHINPAGE) 1075 continue; 1076 pool_put(&phpool, ph); 1077 } 1078 1079 return (1); 1080 } 1081 1082 #ifdef DDB 1083 #include <machine/db_machdep.h> 1084 #include <ddb/db_interface.h> 1085 #include <ddb/db_output.h> 1086 1087 /* 1088 * Diagnostic helpers. 1089 */ 1090 void 1091 pool_printit(struct pool *pp, const char *modif, int (*pr)(const char *, ...)) 1092 { 1093 pool_print1(pp, modif, pr); 1094 } 1095 1096 void 1097 pool_print_pagelist(struct pool_pagelist *pl, int (*pr)(const char *, ...)) 1098 { 1099 struct pool_item_header *ph; 1100 #ifdef DIAGNOSTIC 1101 struct pool_item *pi; 1102 #endif 1103 1104 LIST_FOREACH(ph, pl, ph_pagelist) { 1105 (*pr)("\t\tpage %p, nmissing %d\n", 1106 ph->ph_page, ph->ph_nmissing); 1107 #ifdef DIAGNOSTIC 1108 TAILQ_FOREACH(pi, &ph->ph_itemlist, pi_list) { 1109 if (pi->pi_magic != PI_MAGIC) { 1110 (*pr)("\t\t\titem %p, magic 0x%x\n", 1111 pi, pi->pi_magic); 1112 } 1113 } 1114 #endif 1115 } 1116 } 1117 1118 void 1119 pool_print1(struct pool *pp, const char *modif, int (*pr)(const char *, ...)) 1120 { 1121 struct pool_item_header *ph; 1122 int print_pagelist = 0; 1123 char c; 1124 1125 while ((c = *modif++) != '\0') { 1126 if (c == 'p') 1127 print_pagelist = 1; 1128 modif++; 1129 } 1130 1131 (*pr)("POOL %s: size %u, align %u, ioff %u, roflags 0x%08x\n", 1132 pp->pr_wchan, pp->pr_size, pp->pr_align, pp->pr_itemoffset, 1133 pp->pr_roflags); 1134 (*pr)("\talloc %p\n", pp->pr_alloc); 1135 (*pr)("\tminitems %u, minpages %u, maxpages %u, npages %u\n", 1136 pp->pr_minitems, pp->pr_minpages, pp->pr_maxpages, pp->pr_npages); 1137 (*pr)("\titemsperpage %u, nitems %u, nout %u, hardlimit %u\n", 1138 pp->pr_itemsperpage, pp->pr_nitems, pp->pr_nout, pp->pr_hardlimit); 1139 1140 (*pr)("\n\tnget %lu, nfail %lu, nput %lu\n", 1141 pp->pr_nget, pp->pr_nfail, pp->pr_nput); 1142 (*pr)("\tnpagealloc %lu, npagefree %lu, hiwat %u, nidle %lu\n", 1143 pp->pr_npagealloc, pp->pr_npagefree, pp->pr_hiwat, pp->pr_nidle); 1144 1145 if (print_pagelist == 0) 1146 return; 1147 1148 if ((ph = LIST_FIRST(&pp->pr_emptypages)) != NULL) 1149 (*pr)("\n\tempty page list:\n"); 1150 pool_print_pagelist(&pp->pr_emptypages, pr); 1151 if ((ph = LIST_FIRST(&pp->pr_fullpages)) != NULL) 1152 (*pr)("\n\tfull page list:\n"); 1153 pool_print_pagelist(&pp->pr_fullpages, pr); 1154 if ((ph = LIST_FIRST(&pp->pr_partpages)) != NULL) 1155 (*pr)("\n\tpartial-page list:\n"); 1156 pool_print_pagelist(&pp->pr_partpages, pr); 1157 1158 if (pp->pr_curpage == NULL) 1159 (*pr)("\tno current page\n"); 1160 else 1161 (*pr)("\tcurpage %p\n", pp->pr_curpage->ph_page); 1162 } 1163 1164 void 1165 db_show_all_pools(db_expr_t expr, int haddr, db_expr_t count, char *modif) 1166 { 1167 struct pool *pp; 1168 char maxp[16]; 1169 int ovflw; 1170 char mode; 1171 1172 mode = modif[0]; 1173 if (mode != '\0' && mode != 'a') { 1174 db_printf("usage: show all pools [/a]\n"); 1175 return; 1176 } 1177 1178 if (mode == '\0') 1179 db_printf("%-10s%4s%9s%5s%9s%6s%6s%6s%6s%6s%6s%5s\n", 1180 "Name", 1181 "Size", 1182 "Requests", 1183 "Fail", 1184 "Releases", 1185 "Pgreq", 1186 "Pgrel", 1187 "Npage", 1188 "Hiwat", 1189 "Minpg", 1190 "Maxpg", 1191 "Idle"); 1192 else 1193 db_printf("%-10s %18s %18s\n", 1194 "Name", "Address", "Allocator"); 1195 1196 TAILQ_FOREACH(pp, &pool_head, pr_poollist) { 1197 if (mode == 'a') { 1198 db_printf("%-10s %18p %18p\n", pp->pr_wchan, pp, 1199 pp->pr_alloc); 1200 continue; 1201 } 1202 1203 if (!pp->pr_nget) 1204 continue; 1205 1206 if (pp->pr_maxpages == UINT_MAX) 1207 snprintf(maxp, sizeof maxp, "inf"); 1208 else 1209 snprintf(maxp, sizeof maxp, "%u", pp->pr_maxpages); 1210 1211 #define PRWORD(ovflw, fmt, width, fixed, val) do { \ 1212 (ovflw) += db_printf((fmt), \ 1213 (width) - (fixed) - (ovflw) > 0 ? \ 1214 (width) - (fixed) - (ovflw) : 0, \ 1215 (val)) - (width); \ 1216 if ((ovflw) < 0) \ 1217 (ovflw) = 0; \ 1218 } while (/* CONSTCOND */0) 1219 1220 ovflw = 0; 1221 PRWORD(ovflw, "%-*s", 10, 0, pp->pr_wchan); 1222 PRWORD(ovflw, " %*u", 4, 1, pp->pr_size); 1223 PRWORD(ovflw, " %*lu", 9, 1, pp->pr_nget); 1224 PRWORD(ovflw, " %*lu", 5, 1, pp->pr_nfail); 1225 PRWORD(ovflw, " %*lu", 9, 1, pp->pr_nput); 1226 PRWORD(ovflw, " %*lu", 6, 1, pp->pr_npagealloc); 1227 PRWORD(ovflw, " %*lu", 6, 1, pp->pr_npagefree); 1228 PRWORD(ovflw, " %*d", 6, 1, pp->pr_npages); 1229 PRWORD(ovflw, " %*d", 6, 1, pp->pr_hiwat); 1230 PRWORD(ovflw, " %*d", 6, 1, pp->pr_minpages); 1231 PRWORD(ovflw, " %*s", 6, 1, maxp); 1232 PRWORD(ovflw, " %*lu\n", 5, 1, pp->pr_nidle); 1233 1234 pool_chk(pp, pp->pr_wchan); 1235 } 1236 } 1237 1238 int 1239 pool_chk_page(struct pool *pp, const char *label, struct pool_item_header *ph) 1240 { 1241 struct pool_item *pi; 1242 caddr_t page; 1243 int n; 1244 #if defined(DIAGNOSTIC) && defined(POOL_DEBUG) 1245 int i, *ip; 1246 #endif 1247 1248 page = (caddr_t)((u_long)ph & pp->pr_alloc->pa_pagemask); 1249 if (page != ph->ph_page && 1250 (pp->pr_roflags & PR_PHINPAGE) != 0) { 1251 if (label != NULL) 1252 printf("%s: ", label); 1253 printf("pool(%p:%s): page inconsistency: page %p; " 1254 "at page head addr %p (p %p)\n", 1255 pp, pp->pr_wchan, ph->ph_page, ph, page); 1256 return 1; 1257 } 1258 1259 for (pi = TAILQ_FIRST(&ph->ph_itemlist), n = 0; 1260 pi != NULL; 1261 pi = TAILQ_NEXT(pi,pi_list), n++) { 1262 1263 #ifdef DIAGNOSTIC 1264 if (pi->pi_magic != PI_MAGIC) { 1265 if (label != NULL) 1266 printf("%s: ", label); 1267 printf("pool(%s): free list modified: " 1268 "page %p; item ordinal %d; addr %p " 1269 "(p %p); offset 0x%x=0x%x\n", 1270 pp->pr_wchan, ph->ph_page, n, pi, page, 1271 0, pi->pi_magic); 1272 } 1273 #ifdef POOL_DEBUG 1274 for (ip = (int *)pi, i = sizeof(*pi) / sizeof(int); 1275 i < pp->pr_size / sizeof(int); i++) { 1276 if (ip[i] != PI_MAGIC) { 1277 printf("pool(%s): free list modified: " 1278 "page %p; item ordinal %d; addr %p " 1279 "(p %p); offset 0x%x=0x%x\n", 1280 pp->pr_wchan, ph->ph_page, n, pi, 1281 page, i * sizeof(int), ip[i]); 1282 } 1283 } 1284 1285 #endif /* POOL_DEBUG */ 1286 #endif /* DIAGNOSTIC */ 1287 page = 1288 (caddr_t)((u_long)pi & pp->pr_alloc->pa_pagemask); 1289 if (page == ph->ph_page) 1290 continue; 1291 1292 if (label != NULL) 1293 printf("%s: ", label); 1294 printf("pool(%p:%s): page inconsistency: page %p;" 1295 " item ordinal %d; addr %p (p %p)\n", pp, 1296 pp->pr_wchan, ph->ph_page, n, pi, page); 1297 return 1; 1298 } 1299 return 0; 1300 } 1301 1302 int 1303 pool_chk(struct pool *pp, const char *label) 1304 { 1305 struct pool_item_header *ph; 1306 int r = 0; 1307 1308 LIST_FOREACH(ph, &pp->pr_emptypages, ph_pagelist) 1309 r += pool_chk_page(pp, label, ph); 1310 LIST_FOREACH(ph, &pp->pr_fullpages, ph_pagelist) 1311 r += pool_chk_page(pp, label, ph); 1312 LIST_FOREACH(ph, &pp->pr_partpages, ph_pagelist) 1313 r += pool_chk_page(pp, label, ph); 1314 1315 return (r); 1316 } 1317 1318 void 1319 pool_walk(struct pool *pp, int full, int (*pr)(const char *, ...), 1320 void (*func)(void *, int, int (*)(const char *, ...))) 1321 { 1322 struct pool_item_header *ph; 1323 struct pool_item *pi; 1324 caddr_t cp; 1325 int n; 1326 1327 LIST_FOREACH(ph, &pp->pr_fullpages, ph_pagelist) { 1328 cp = ph->ph_colored; 1329 n = ph->ph_nmissing; 1330 1331 while (n--) { 1332 func(cp, full, pr); 1333 cp += pp->pr_size; 1334 } 1335 } 1336 1337 LIST_FOREACH(ph, &pp->pr_partpages, ph_pagelist) { 1338 cp = ph->ph_colored; 1339 n = ph->ph_nmissing; 1340 1341 do { 1342 TAILQ_FOREACH(pi, &ph->ph_itemlist, pi_list) { 1343 if (cp == (caddr_t)pi) 1344 break; 1345 } 1346 if (cp != (caddr_t)pi) { 1347 func(cp, full, pr); 1348 n--; 1349 } 1350 1351 cp += pp->pr_size; 1352 } while (n > 0); 1353 } 1354 } 1355 #endif 1356 1357 /* 1358 * We have three different sysctls. 1359 * kern.pool.npools - the number of pools. 1360 * kern.pool.pool.<pool#> - the pool struct for the pool#. 1361 * kern.pool.name.<pool#> - the name for pool#. 1362 */ 1363 int 1364 sysctl_dopool(int *name, u_int namelen, char *where, size_t *sizep) 1365 { 1366 struct pool *pp, *foundpool = NULL; 1367 size_t buflen = where != NULL ? *sizep : 0; 1368 int npools = 0, s; 1369 unsigned int lookfor; 1370 size_t len; 1371 1372 switch (*name) { 1373 case KERN_POOL_NPOOLS: 1374 if (namelen != 1 || buflen != sizeof(int)) 1375 return (EINVAL); 1376 lookfor = 0; 1377 break; 1378 case KERN_POOL_NAME: 1379 if (namelen != 2 || buflen < 1) 1380 return (EINVAL); 1381 lookfor = name[1]; 1382 break; 1383 case KERN_POOL_POOL: 1384 if (namelen != 2 || buflen != sizeof(struct pool)) 1385 return (EINVAL); 1386 lookfor = name[1]; 1387 break; 1388 default: 1389 return (EINVAL); 1390 } 1391 1392 s = splvm(); 1393 1394 TAILQ_FOREACH(pp, &pool_head, pr_poollist) { 1395 npools++; 1396 if (lookfor == pp->pr_serial) { 1397 foundpool = pp; 1398 break; 1399 } 1400 } 1401 1402 splx(s); 1403 1404 if (*name != KERN_POOL_NPOOLS && foundpool == NULL) 1405 return (ENOENT); 1406 1407 switch (*name) { 1408 case KERN_POOL_NPOOLS: 1409 return copyout(&npools, where, buflen); 1410 case KERN_POOL_NAME: 1411 len = strlen(foundpool->pr_wchan) + 1; 1412 if (*sizep < len) 1413 return (ENOMEM); 1414 *sizep = len; 1415 return copyout(foundpool->pr_wchan, where, len); 1416 case KERN_POOL_POOL: 1417 return copyout(foundpool, where, buflen); 1418 } 1419 /* NOTREACHED */ 1420 return (0); /* XXX - Stupid gcc */ 1421 } 1422 1423 /* 1424 * Pool backend allocators. 1425 * 1426 * Each pool has a backend allocator that handles allocation, deallocation 1427 */ 1428 void *pool_page_alloc(struct pool *, int, int *); 1429 void pool_page_free(struct pool *, void *); 1430 1431 /* 1432 * safe for interrupts, name preserved for compat this is the default 1433 * allocator 1434 */ 1435 struct pool_allocator pool_allocator_nointr = { 1436 pool_page_alloc, pool_page_free, 0, 1437 }; 1438 1439 /* 1440 * XXX - we have at least three different resources for the same allocation 1441 * and each resource can be depleted. First we have the ready elements in 1442 * the pool. Then we have the resource (typically a vm_map) for this 1443 * allocator, then we have physical memory. Waiting for any of these can 1444 * be unnecessary when any other is freed, but the kernel doesn't support 1445 * sleeping on multiple addresses, so we have to fake. The caller sleeps on 1446 * the pool (so that we can be awakened when an item is returned to the pool), 1447 * but we set PA_WANT on the allocator. When a page is returned to 1448 * the allocator and PA_WANT is set pool_allocator_free will wakeup all 1449 * sleeping pools belonging to this allocator. (XXX - thundering herd). 1450 * We also wake up the allocator in case someone without a pool (malloc) 1451 * is sleeping waiting for this allocator. 1452 */ 1453 1454 void * 1455 pool_allocator_alloc(struct pool *pp, int flags, int *slowdown) 1456 { 1457 boolean_t waitok = (flags & PR_WAITOK) ? TRUE : FALSE; 1458 void *v; 1459 1460 if (waitok) 1461 mtx_leave(&pp->pr_mtx); 1462 v = pp->pr_alloc->pa_alloc(pp, flags, slowdown); 1463 if (waitok) 1464 mtx_enter(&pp->pr_mtx); 1465 1466 return (v); 1467 } 1468 1469 void 1470 pool_allocator_free(struct pool *pp, void *v) 1471 { 1472 struct pool_allocator *pa = pp->pr_alloc; 1473 1474 (*pa->pa_free)(pp, v); 1475 } 1476 1477 void * 1478 pool_page_alloc(struct pool *pp, int flags, int *slowdown) 1479 { 1480 int kfl = (flags & PR_WAITOK) ? 0 : UVM_KMF_NOWAIT; 1481 1482 return (uvm_km_getpage_pla(kfl, slowdown, pp->pr_crange->ucr_low, 1483 pp->pr_crange->ucr_high, 0, 0)); 1484 } 1485 1486 void 1487 pool_page_free(struct pool *pp, void *v) 1488 { 1489 uvm_km_putpage(v); 1490 } 1491 1492 void * 1493 pool_large_alloc(struct pool *pp, int flags, int *slowdown) 1494 { 1495 int kfl = (flags & PR_WAITOK) ? 0 : UVM_KMF_NOWAIT; 1496 vaddr_t va; 1497 int s; 1498 1499 s = splvm(); 1500 va = uvm_km_kmemalloc_pla(kmem_map, NULL, pp->pr_alloc->pa_pagesz, kfl, 1501 pp->pr_crange->ucr_low, pp->pr_crange->ucr_high, 1502 0, 0, pp->pr_pa_nsegs); 1503 splx(s); 1504 1505 return ((void *)va); 1506 } 1507 1508 void 1509 pool_large_free(struct pool *pp, void *v) 1510 { 1511 int s; 1512 1513 s = splvm(); 1514 uvm_km_free(kmem_map, (vaddr_t)v, pp->pr_alloc->pa_pagesz); 1515 splx(s); 1516 } 1517 1518 void * 1519 pool_large_alloc_ni(struct pool *pp, int flags, int *slowdown) 1520 { 1521 int kfl = (flags & PR_WAITOK) ? 0 : UVM_KMF_NOWAIT; 1522 1523 return ((void *)uvm_km_kmemalloc_pla(kernel_map, uvm.kernel_object, 1524 pp->pr_alloc->pa_pagesz, kfl, 1525 pp->pr_crange->ucr_low, pp->pr_crange->ucr_high, 1526 0, 0, pp->pr_pa_nsegs)); 1527 } 1528 1529 void 1530 pool_large_free_ni(struct pool *pp, void *v) 1531 { 1532 uvm_km_free(kernel_map, (vaddr_t)v, pp->pr_alloc->pa_pagesz); 1533 } 1534