1 /* $OpenBSD: subr_pool.c,v 1.98 2010/09/26 21:03:57 tedu Exp $ */ 2 /* $NetBSD: subr_pool.c,v 1.61 2001/09/26 07:14:56 chs Exp $ */ 3 4 /*- 5 * Copyright (c) 1997, 1999, 2000 The NetBSD Foundation, Inc. 6 * All rights reserved. 7 * 8 * This code is derived from software contributed to The NetBSD Foundation 9 * by Paul Kranenburg; by Jason R. Thorpe of the Numerical Aerospace 10 * Simulation Facility, NASA Ames Research Center. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 23 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 24 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 25 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31 * POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include <sys/param.h> 35 #include <sys/systm.h> 36 #include <sys/proc.h> 37 #include <sys/errno.h> 38 #include <sys/kernel.h> 39 #include <sys/malloc.h> 40 #include <sys/pool.h> 41 #include <sys/syslog.h> 42 #include <sys/sysctl.h> 43 44 #include <uvm/uvm.h> 45 46 47 /* 48 * Pool resource management utility. 49 * 50 * Memory is allocated in pages which are split into pieces according to 51 * the pool item size. Each page is kept on one of three lists in the 52 * pool structure: `pr_emptypages', `pr_fullpages' and `pr_partpages', 53 * for empty, full and partially-full pages respectively. The individual 54 * pool items are on a linked list headed by `ph_itemlist' in each page 55 * header. The memory for building the page list is either taken from 56 * the allocated pages themselves (for small pool items) or taken from 57 * an internal pool of page headers (`phpool'). 58 */ 59 60 /* List of all pools */ 61 TAILQ_HEAD(,pool) pool_head = TAILQ_HEAD_INITIALIZER(pool_head); 62 63 /* Private pool for page header structures */ 64 struct pool phpool; 65 66 struct pool_item_header { 67 /* Page headers */ 68 LIST_ENTRY(pool_item_header) 69 ph_pagelist; /* pool page list */ 70 TAILQ_HEAD(,pool_item) ph_itemlist; /* chunk list for this page */ 71 RB_ENTRY(pool_item_header) 72 ph_node; /* Off-page page headers */ 73 int ph_nmissing; /* # of chunks in use */ 74 caddr_t ph_page; /* this page's address */ 75 caddr_t ph_colored; /* page's colored address */ 76 int ph_pagesize; 77 }; 78 79 struct pool_item { 80 #ifdef DIAGNOSTIC 81 u_int32_t pi_magic; 82 #endif 83 /* Other entries use only this list entry */ 84 TAILQ_ENTRY(pool_item) pi_list; 85 }; 86 87 #ifdef DEADBEEF1 88 #define PI_MAGIC DEADBEEF1 89 #else 90 #define PI_MAGIC 0xdeafbeef 91 #endif 92 93 #define POOL_NEEDS_CATCHUP(pp) \ 94 ((pp)->pr_nitems < (pp)->pr_minitems) 95 96 /* 97 * Every pool gets a unique serial number assigned to it. If this counter 98 * wraps, we're screwed, but we shouldn't create so many pools anyway. 99 */ 100 unsigned int pool_serial; 101 102 int pool_catchup(struct pool *); 103 void pool_prime_page(struct pool *, caddr_t, struct pool_item_header *); 104 void pool_update_curpage(struct pool *); 105 void *pool_do_get(struct pool *, int); 106 void pool_do_put(struct pool *, void *); 107 void pr_rmpage(struct pool *, struct pool_item_header *, 108 struct pool_pagelist *); 109 int pool_chk_page(struct pool *, const char *, struct pool_item_header *); 110 struct pool_item_header *pool_alloc_item_header(struct pool *, caddr_t , int); 111 112 void *pool_allocator_alloc(struct pool *, int, int *); 113 void pool_allocator_free(struct pool *, void *); 114 115 /* 116 * XXX - quick hack. For pools with large items we want to use a special 117 * allocator. For now, instead of having the allocator figure out 118 * the allocation size from the pool (which can be done trivially 119 * with round_page(pr_itemsperpage * pr_size)) which would require 120 * lots of changes everywhere, we just create allocators for each 121 * size. We limit those to 128 pages. 122 */ 123 #define POOL_LARGE_MAXPAGES 128 124 struct pool_allocator pool_allocator_large[POOL_LARGE_MAXPAGES]; 125 struct pool_allocator pool_allocator_large_ni[POOL_LARGE_MAXPAGES]; 126 void *pool_large_alloc(struct pool *, int, int *); 127 void pool_large_free(struct pool *, void *); 128 void *pool_large_alloc_ni(struct pool *, int, int *); 129 void pool_large_free_ni(struct pool *, void *); 130 131 132 #ifdef DDB 133 void pool_print_pagelist(struct pool_pagelist *, 134 int (*)(const char *, ...)); 135 void pool_print1(struct pool *, const char *, int (*)(const char *, ...)); 136 #endif 137 138 #define pool_sleep(pl) msleep(pl, &pl->pr_mtx, PSWP, pl->pr_wchan, 0) 139 140 static __inline int 141 phtree_compare(struct pool_item_header *a, struct pool_item_header *b) 142 { 143 long diff = (vaddr_t)a->ph_page - (vaddr_t)b->ph_page; 144 if (diff < 0) 145 return -(-diff >= a->ph_pagesize); 146 else if (diff > 0) 147 return (diff >= b->ph_pagesize); 148 else 149 return (0); 150 } 151 152 RB_PROTOTYPE(phtree, pool_item_header, ph_node, phtree_compare); 153 RB_GENERATE(phtree, pool_item_header, ph_node, phtree_compare); 154 155 /* 156 * Return the pool page header based on page address. 157 */ 158 static __inline struct pool_item_header * 159 pr_find_pagehead(struct pool *pp, void *v) 160 { 161 struct pool_item_header *ph, tmp; 162 163 if ((pp->pr_roflags & PR_PHINPAGE) != 0) { 164 caddr_t page; 165 166 page = (caddr_t)((vaddr_t)v & pp->pr_alloc->pa_pagemask); 167 168 return ((struct pool_item_header *)(page + pp->pr_phoffset)); 169 } 170 171 /* 172 * The trick we're using in the tree compare function is to compare 173 * two elements equal when they overlap. We want to return the 174 * page header that belongs to the element just before this address. 175 * We don't want this element to compare equal to the next element, 176 * so the compare function takes the pagesize from the lower element. 177 * If this header is the lower, its pagesize is zero, so it can't 178 * overlap with the next header. But if the header we're looking for 179 * is lower, we'll use its pagesize and it will overlap and return 180 * equal. 181 */ 182 tmp.ph_page = v; 183 tmp.ph_pagesize = 0; 184 ph = RB_FIND(phtree, &pp->pr_phtree, &tmp); 185 186 if (ph) { 187 KASSERT(ph->ph_page <= (caddr_t)v); 188 KASSERT(ph->ph_page + ph->ph_pagesize > (caddr_t)v); 189 } 190 return ph; 191 } 192 193 /* 194 * Remove a page from the pool. 195 */ 196 void 197 pr_rmpage(struct pool *pp, struct pool_item_header *ph, 198 struct pool_pagelist *pq) 199 { 200 201 /* 202 * If the page was idle, decrement the idle page count. 203 */ 204 if (ph->ph_nmissing == 0) { 205 #ifdef DIAGNOSTIC 206 if (pp->pr_nidle == 0) 207 panic("pr_rmpage: nidle inconsistent"); 208 if (pp->pr_nitems < pp->pr_itemsperpage) 209 panic("pr_rmpage: nitems inconsistent"); 210 #endif 211 pp->pr_nidle--; 212 } 213 214 pp->pr_nitems -= pp->pr_itemsperpage; 215 216 /* 217 * Unlink a page from the pool and release it (or queue it for release). 218 */ 219 LIST_REMOVE(ph, ph_pagelist); 220 if ((pp->pr_roflags & PR_PHINPAGE) == 0) 221 RB_REMOVE(phtree, &pp->pr_phtree, ph); 222 if (pq) { 223 LIST_INSERT_HEAD(pq, ph, ph_pagelist); 224 } else { 225 pool_allocator_free(pp, ph->ph_page); 226 if ((pp->pr_roflags & PR_PHINPAGE) == 0) 227 pool_put(&phpool, ph); 228 } 229 pp->pr_npages--; 230 pp->pr_npagefree++; 231 232 pool_update_curpage(pp); 233 } 234 235 /* 236 * Initialize the given pool resource structure. 237 * 238 * We export this routine to allow other kernel parts to declare 239 * static pools that must be initialized before malloc() is available. 240 */ 241 void 242 pool_init(struct pool *pp, size_t size, u_int align, u_int ioff, int flags, 243 const char *wchan, struct pool_allocator *palloc) 244 { 245 int off, slack; 246 247 #ifdef MALLOC_DEBUG 248 if ((flags & PR_DEBUG) && (ioff != 0 || align != 0)) 249 flags &= ~PR_DEBUG; 250 #endif 251 /* 252 * Check arguments and construct default values. 253 */ 254 if (palloc == NULL) { 255 if (size > PAGE_SIZE) { 256 int psize; 257 258 /* 259 * XXX - should take align into account as well. 260 */ 261 if (size == round_page(size)) 262 psize = size / PAGE_SIZE; 263 else 264 psize = PAGE_SIZE / roundup(size % PAGE_SIZE, 265 1024); 266 if (psize > POOL_LARGE_MAXPAGES) 267 psize = POOL_LARGE_MAXPAGES; 268 if (flags & PR_WAITOK) 269 palloc = &pool_allocator_large_ni[psize-1]; 270 else 271 palloc = &pool_allocator_large[psize-1]; 272 if (palloc->pa_pagesz == 0) { 273 palloc->pa_pagesz = psize * PAGE_SIZE; 274 if (flags & PR_WAITOK) { 275 palloc->pa_alloc = pool_large_alloc_ni; 276 palloc->pa_free = pool_large_free_ni; 277 } else { 278 palloc->pa_alloc = pool_large_alloc; 279 palloc->pa_free = pool_large_free; 280 } 281 } 282 } else { 283 palloc = &pool_allocator_nointr; 284 } 285 } 286 if (palloc->pa_pagesz == 0) { 287 palloc->pa_pagesz = PAGE_SIZE; 288 } 289 if (palloc->pa_pagemask == 0) { 290 palloc->pa_pagemask = ~(palloc->pa_pagesz - 1); 291 palloc->pa_pageshift = ffs(palloc->pa_pagesz) - 1; 292 } 293 294 if (align == 0) 295 align = ALIGN(1); 296 297 if (size < sizeof(struct pool_item)) 298 size = sizeof(struct pool_item); 299 300 size = roundup(size, align); 301 #ifdef DIAGNOSTIC 302 if (size > palloc->pa_pagesz) 303 panic("pool_init: pool item size (%lu) too large", 304 (u_long)size); 305 #endif 306 307 /* 308 * Initialize the pool structure. 309 */ 310 LIST_INIT(&pp->pr_emptypages); 311 LIST_INIT(&pp->pr_fullpages); 312 LIST_INIT(&pp->pr_partpages); 313 pp->pr_curpage = NULL; 314 pp->pr_npages = 0; 315 pp->pr_minitems = 0; 316 pp->pr_minpages = 0; 317 pp->pr_maxpages = 8; 318 pp->pr_roflags = flags; 319 pp->pr_flags = 0; 320 pp->pr_size = size; 321 pp->pr_align = align; 322 pp->pr_wchan = wchan; 323 pp->pr_alloc = palloc; 324 pp->pr_nitems = 0; 325 pp->pr_nout = 0; 326 pp->pr_hardlimit = UINT_MAX; 327 pp->pr_hardlimit_warning = NULL; 328 pp->pr_hardlimit_ratecap.tv_sec = 0; 329 pp->pr_hardlimit_ratecap.tv_usec = 0; 330 pp->pr_hardlimit_warning_last.tv_sec = 0; 331 pp->pr_hardlimit_warning_last.tv_usec = 0; 332 pp->pr_serial = ++pool_serial; 333 if (pool_serial == 0) 334 panic("pool_init: too much uptime"); 335 336 /* constructor, destructor, and arg */ 337 pp->pr_ctor = NULL; 338 pp->pr_dtor = NULL; 339 pp->pr_arg = NULL; 340 341 /* 342 * Decide whether to put the page header off page to avoid 343 * wasting too large a part of the page. Off-page page headers 344 * go into an RB tree, so we can match a returned item with 345 * its header based on the page address. 346 * We use 1/16 of the page size as the threshold (XXX: tune) 347 */ 348 if (pp->pr_size < palloc->pa_pagesz/16 && pp->pr_size < PAGE_SIZE) { 349 /* Use the end of the page for the page header */ 350 pp->pr_roflags |= PR_PHINPAGE; 351 pp->pr_phoffset = off = palloc->pa_pagesz - 352 ALIGN(sizeof(struct pool_item_header)); 353 } else { 354 /* The page header will be taken from our page header pool */ 355 pp->pr_phoffset = 0; 356 off = palloc->pa_pagesz; 357 RB_INIT(&pp->pr_phtree); 358 } 359 360 /* 361 * Alignment is to take place at `ioff' within the item. This means 362 * we must reserve up to `align - 1' bytes on the page to allow 363 * appropriate positioning of each item. 364 * 365 * Silently enforce `0 <= ioff < align'. 366 */ 367 pp->pr_itemoffset = ioff = ioff % align; 368 pp->pr_itemsperpage = (off - ((align - ioff) % align)) / pp->pr_size; 369 KASSERT(pp->pr_itemsperpage != 0); 370 371 /* 372 * Use the slack between the chunks and the page header 373 * for "cache coloring". 374 */ 375 slack = off - pp->pr_itemsperpage * pp->pr_size; 376 pp->pr_maxcolor = (slack / align) * align; 377 pp->pr_curcolor = 0; 378 379 pp->pr_nget = 0; 380 pp->pr_nfail = 0; 381 pp->pr_nput = 0; 382 pp->pr_npagealloc = 0; 383 pp->pr_npagefree = 0; 384 pp->pr_hiwat = 0; 385 pp->pr_nidle = 0; 386 387 pp->pr_ipl = -1; 388 mtx_init(&pp->pr_mtx, IPL_NONE); 389 390 if (phpool.pr_size == 0) { 391 pool_init(&phpool, sizeof(struct pool_item_header), 0, 0, 392 0, "phpool", NULL); 393 pool_setipl(&phpool, IPL_HIGH); 394 } 395 396 /* pglistalloc/constraint parameters */ 397 pp->pr_crange = &no_constraint; 398 pp->pr_pa_nsegs = 0; 399 400 /* Insert this into the list of all pools. */ 401 TAILQ_INSERT_HEAD(&pool_head, pp, pr_poollist); 402 } 403 404 void 405 pool_setipl(struct pool *pp, int ipl) 406 { 407 pp->pr_ipl = ipl; 408 mtx_init(&pp->pr_mtx, ipl); 409 } 410 411 /* 412 * Decommission a pool resource. 413 */ 414 void 415 pool_destroy(struct pool *pp) 416 { 417 struct pool_item_header *ph; 418 419 #ifdef DIAGNOSTIC 420 if (pp->pr_nout != 0) 421 panic("pool_destroy: pool busy: still out: %u", pp->pr_nout); 422 #endif 423 424 /* Remove all pages */ 425 while ((ph = LIST_FIRST(&pp->pr_emptypages)) != NULL) 426 pr_rmpage(pp, ph, NULL); 427 KASSERT(LIST_EMPTY(&pp->pr_fullpages)); 428 KASSERT(LIST_EMPTY(&pp->pr_partpages)); 429 430 /* Remove from global pool list */ 431 TAILQ_REMOVE(&pool_head, pp, pr_poollist); 432 } 433 434 struct pool_item_header * 435 pool_alloc_item_header(struct pool *pp, caddr_t storage, int flags) 436 { 437 struct pool_item_header *ph; 438 439 if ((pp->pr_roflags & PR_PHINPAGE) != 0) 440 ph = (struct pool_item_header *)(storage + pp->pr_phoffset); 441 else 442 ph = pool_get(&phpool, (flags & ~(PR_WAITOK | PR_ZERO)) | 443 PR_NOWAIT); 444 445 return (ph); 446 } 447 448 /* 449 * Grab an item from the pool; must be called at appropriate spl level 450 */ 451 void * 452 pool_get(struct pool *pp, int flags) 453 { 454 void *v; 455 456 KASSERT(flags & (PR_WAITOK | PR_NOWAIT)); 457 458 #ifdef DIAGNOSTIC 459 if ((flags & PR_WAITOK) != 0) 460 assertwaitok(); 461 #endif /* DIAGNOSTIC */ 462 463 mtx_enter(&pp->pr_mtx); 464 v = pool_do_get(pp, flags); 465 mtx_leave(&pp->pr_mtx); 466 if (v == NULL) 467 return (v); 468 469 if (pp->pr_ctor) { 470 if (flags & PR_ZERO) 471 panic("pool_get: PR_ZERO when ctor set"); 472 if (pp->pr_ctor(pp->pr_arg, v, flags)) { 473 mtx_enter(&pp->pr_mtx); 474 pool_do_put(pp, v); 475 mtx_leave(&pp->pr_mtx); 476 v = NULL; 477 } 478 } else { 479 if (flags & PR_ZERO) 480 memset(v, 0, pp->pr_size); 481 } 482 if (v != NULL) 483 pp->pr_nget++; 484 return (v); 485 } 486 487 void * 488 pool_do_get(struct pool *pp, int flags) 489 { 490 struct pool_item *pi; 491 struct pool_item_header *ph; 492 void *v; 493 int slowdown = 0; 494 #if defined(DIAGNOSTIC) && defined(POOL_DEBUG) 495 int i, *ip; 496 #endif 497 498 #ifdef MALLOC_DEBUG 499 if (pp->pr_roflags & PR_DEBUG) { 500 void *addr; 501 502 addr = NULL; 503 debug_malloc(pp->pr_size, M_DEBUG, 504 (flags & PR_WAITOK) ? M_WAITOK : M_NOWAIT, &addr); 505 return (addr); 506 } 507 #endif 508 509 startover: 510 /* 511 * Check to see if we've reached the hard limit. If we have, 512 * and we can wait, then wait until an item has been returned to 513 * the pool. 514 */ 515 #ifdef DIAGNOSTIC 516 if (__predict_false(pp->pr_nout > pp->pr_hardlimit)) 517 panic("pool_do_get: %s: crossed hard limit", pp->pr_wchan); 518 #endif 519 if (__predict_false(pp->pr_nout == pp->pr_hardlimit)) { 520 if ((flags & PR_WAITOK) && !(flags & PR_LIMITFAIL)) { 521 /* 522 * XXX: A warning isn't logged in this case. Should 523 * it be? 524 */ 525 pp->pr_flags |= PR_WANTED; 526 pool_sleep(pp); 527 goto startover; 528 } 529 530 /* 531 * Log a message that the hard limit has been hit. 532 */ 533 if (pp->pr_hardlimit_warning != NULL && 534 ratecheck(&pp->pr_hardlimit_warning_last, 535 &pp->pr_hardlimit_ratecap)) 536 log(LOG_ERR, "%s\n", pp->pr_hardlimit_warning); 537 538 pp->pr_nfail++; 539 return (NULL); 540 } 541 542 /* 543 * The convention we use is that if `curpage' is not NULL, then 544 * it points at a non-empty bucket. In particular, `curpage' 545 * never points at a page header which has PR_PHINPAGE set and 546 * has no items in its bucket. 547 */ 548 if ((ph = pp->pr_curpage) == NULL) { 549 #ifdef DIAGNOSTIC 550 if (pp->pr_nitems != 0) { 551 printf("pool_do_get: %s: curpage NULL, nitems %u\n", 552 pp->pr_wchan, pp->pr_nitems); 553 panic("pool_do_get: nitems inconsistent"); 554 } 555 #endif 556 557 /* 558 * Call the back-end page allocator for more memory. 559 */ 560 v = pool_allocator_alloc(pp, flags, &slowdown); 561 if (__predict_true(v != NULL)) 562 ph = pool_alloc_item_header(pp, v, flags); 563 564 if (__predict_false(v == NULL || ph == NULL)) { 565 if (v != NULL) 566 pool_allocator_free(pp, v); 567 568 if ((flags & PR_WAITOK) == 0) { 569 pp->pr_nfail++; 570 return (NULL); 571 } 572 573 /* 574 * Wait for items to be returned to this pool. 575 * 576 * XXX: maybe we should wake up once a second and 577 * try again? 578 */ 579 pp->pr_flags |= PR_WANTED; 580 pool_sleep(pp); 581 goto startover; 582 } 583 584 /* We have more memory; add it to the pool */ 585 pool_prime_page(pp, v, ph); 586 pp->pr_npagealloc++; 587 588 if (slowdown && (flags & PR_WAITOK)) { 589 mtx_leave(&pp->pr_mtx); 590 yield(); 591 mtx_enter(&pp->pr_mtx); 592 } 593 594 /* Start the allocation process over. */ 595 goto startover; 596 } 597 if (__predict_false((v = pi = TAILQ_FIRST(&ph->ph_itemlist)) == NULL)) { 598 panic("pool_do_get: %s: page empty", pp->pr_wchan); 599 } 600 #ifdef DIAGNOSTIC 601 if (__predict_false(pp->pr_nitems == 0)) { 602 printf("pool_do_get: %s: items on itemlist, nitems %u\n", 603 pp->pr_wchan, pp->pr_nitems); 604 panic("pool_do_get: nitems inconsistent"); 605 } 606 #endif 607 608 #ifdef DIAGNOSTIC 609 if (__predict_false(pi->pi_magic != PI_MAGIC)) 610 panic("pool_do_get(%s): free list modified: " 611 "page %p; item addr %p; offset 0x%x=0x%x", 612 pp->pr_wchan, ph->ph_page, pi, 0, pi->pi_magic); 613 #ifdef POOL_DEBUG 614 for (ip = (int *)pi, i = sizeof(*pi) / sizeof(int); 615 i < pp->pr_size / sizeof(int); i++) { 616 if (ip[i] != PI_MAGIC) { 617 panic("pool_do_get(%s): free list modified: " 618 "page %p; item addr %p; offset 0x%x=0x%x", 619 pp->pr_wchan, ph->ph_page, pi, 620 i * sizeof(int), ip[i]); 621 } 622 } 623 #endif /* POOL_DEBUG */ 624 #endif /* DIAGNOSTIC */ 625 626 /* 627 * Remove from item list. 628 */ 629 TAILQ_REMOVE(&ph->ph_itemlist, pi, pi_list); 630 pp->pr_nitems--; 631 pp->pr_nout++; 632 if (ph->ph_nmissing == 0) { 633 #ifdef DIAGNOSTIC 634 if (__predict_false(pp->pr_nidle == 0)) 635 panic("pool_do_get: nidle inconsistent"); 636 #endif 637 pp->pr_nidle--; 638 639 /* 640 * This page was previously empty. Move it to the list of 641 * partially-full pages. This page is already curpage. 642 */ 643 LIST_REMOVE(ph, ph_pagelist); 644 LIST_INSERT_HEAD(&pp->pr_partpages, ph, ph_pagelist); 645 } 646 ph->ph_nmissing++; 647 if (TAILQ_EMPTY(&ph->ph_itemlist)) { 648 #ifdef DIAGNOSTIC 649 if (__predict_false(ph->ph_nmissing != pp->pr_itemsperpage)) { 650 panic("pool_do_get: %s: nmissing inconsistent", 651 pp->pr_wchan); 652 } 653 #endif 654 /* 655 * This page is now full. Move it to the full list 656 * and select a new current page. 657 */ 658 LIST_REMOVE(ph, ph_pagelist); 659 LIST_INSERT_HEAD(&pp->pr_fullpages, ph, ph_pagelist); 660 pool_update_curpage(pp); 661 } 662 663 /* 664 * If we have a low water mark and we are now below that low 665 * water mark, add more items to the pool. 666 */ 667 if (POOL_NEEDS_CATCHUP(pp) && pool_catchup(pp) != 0) { 668 /* 669 * XXX: Should we log a warning? Should we set up a timeout 670 * to try again in a second or so? The latter could break 671 * a caller's assumptions about interrupt protection, etc. 672 */ 673 } 674 return (v); 675 } 676 677 /* 678 * Return resource to the pool; must be called at appropriate spl level 679 */ 680 void 681 pool_put(struct pool *pp, void *v) 682 { 683 if (pp->pr_dtor) 684 pp->pr_dtor(pp->pr_arg, v); 685 mtx_enter(&pp->pr_mtx); 686 pool_do_put(pp, v); 687 mtx_leave(&pp->pr_mtx); 688 pp->pr_nput++; 689 } 690 691 /* 692 * Internal version of pool_put(). 693 */ 694 void 695 pool_do_put(struct pool *pp, void *v) 696 { 697 struct pool_item *pi = v; 698 struct pool_item_header *ph; 699 #if defined(DIAGNOSTIC) && defined(POOL_DEBUG) 700 int i, *ip; 701 #endif 702 703 if (v == NULL) 704 panic("pool_put of NULL"); 705 706 #ifdef MALLOC_DEBUG 707 if (pp->pr_roflags & PR_DEBUG) { 708 debug_free(v, M_DEBUG); 709 return; 710 } 711 #endif 712 713 #ifdef DIAGNOSTIC 714 if (pp->pr_ipl != -1) 715 splassert(pp->pr_ipl); 716 717 if (__predict_false(pp->pr_nout == 0)) { 718 printf("pool %s: putting with none out\n", 719 pp->pr_wchan); 720 panic("pool_do_put"); 721 } 722 #endif 723 724 if (__predict_false((ph = pr_find_pagehead(pp, v)) == NULL)) { 725 panic("pool_do_put: %s: page header missing", pp->pr_wchan); 726 } 727 728 /* 729 * Return to item list. 730 */ 731 #ifdef DIAGNOSTIC 732 pi->pi_magic = PI_MAGIC; 733 #ifdef POOL_DEBUG 734 for (ip = (int *)pi, i = sizeof(*pi)/sizeof(int); 735 i < pp->pr_size / sizeof(int); i++) 736 ip[i] = PI_MAGIC; 737 #endif /* POOL_DEBUG */ 738 #endif /* DIAGNOSTIC */ 739 740 TAILQ_INSERT_HEAD(&ph->ph_itemlist, pi, pi_list); 741 ph->ph_nmissing--; 742 pp->pr_nitems++; 743 pp->pr_nout--; 744 745 /* Cancel "pool empty" condition if it exists */ 746 if (pp->pr_curpage == NULL) 747 pp->pr_curpage = ph; 748 749 if (pp->pr_flags & PR_WANTED) { 750 pp->pr_flags &= ~PR_WANTED; 751 if (ph->ph_nmissing == 0) 752 pp->pr_nidle++; 753 wakeup(pp); 754 return; 755 } 756 757 /* 758 * If this page is now empty, do one of two things: 759 * 760 * (1) If we have more pages than the page high water mark, 761 * free the page back to the system. 762 * 763 * (2) Otherwise, move the page to the empty page list. 764 * 765 * Either way, select a new current page (so we use a partially-full 766 * page if one is available). 767 */ 768 if (ph->ph_nmissing == 0) { 769 pp->pr_nidle++; 770 if (pp->pr_nidle > pp->pr_maxpages) { 771 pr_rmpage(pp, ph, NULL); 772 } else { 773 LIST_REMOVE(ph, ph_pagelist); 774 LIST_INSERT_HEAD(&pp->pr_emptypages, ph, ph_pagelist); 775 } 776 pool_update_curpage(pp); 777 } 778 779 /* 780 * If the page was previously completely full, move it to the 781 * partially-full list and make it the current page. The next 782 * allocation will get the item from this page, instead of 783 * further fragmenting the pool. 784 */ 785 else if (ph->ph_nmissing == (pp->pr_itemsperpage - 1)) { 786 LIST_REMOVE(ph, ph_pagelist); 787 LIST_INSERT_HEAD(&pp->pr_partpages, ph, ph_pagelist); 788 pp->pr_curpage = ph; 789 } 790 } 791 792 /* 793 * Add N items to the pool. 794 */ 795 int 796 pool_prime(struct pool *pp, int n) 797 { 798 struct pool_item_header *ph; 799 caddr_t cp; 800 int newpages; 801 int slowdown; 802 803 mtx_enter(&pp->pr_mtx); 804 newpages = roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 805 806 while (newpages-- > 0) { 807 cp = pool_allocator_alloc(pp, PR_NOWAIT, &slowdown); 808 if (__predict_true(cp != NULL)) 809 ph = pool_alloc_item_header(pp, cp, PR_NOWAIT); 810 if (__predict_false(cp == NULL || ph == NULL)) { 811 if (cp != NULL) 812 pool_allocator_free(pp, cp); 813 break; 814 } 815 816 pool_prime_page(pp, cp, ph); 817 pp->pr_npagealloc++; 818 pp->pr_minpages++; 819 } 820 821 if (pp->pr_minpages >= pp->pr_maxpages) 822 pp->pr_maxpages = pp->pr_minpages + 1; /* XXX */ 823 824 mtx_leave(&pp->pr_mtx); 825 return (0); 826 } 827 828 /* 829 * Add a page worth of items to the pool. 830 * 831 * Note, we must be called with the pool descriptor LOCKED. 832 */ 833 void 834 pool_prime_page(struct pool *pp, caddr_t storage, struct pool_item_header *ph) 835 { 836 struct pool_item *pi; 837 caddr_t cp = storage; 838 unsigned int align = pp->pr_align; 839 unsigned int ioff = pp->pr_itemoffset; 840 int n; 841 #if defined(DIAGNOSTIC) && defined(POOL_DEBUG) 842 int i, *ip; 843 #endif 844 845 /* 846 * Insert page header. 847 */ 848 LIST_INSERT_HEAD(&pp->pr_emptypages, ph, ph_pagelist); 849 TAILQ_INIT(&ph->ph_itemlist); 850 ph->ph_page = storage; 851 ph->ph_pagesize = pp->pr_alloc->pa_pagesz; 852 ph->ph_nmissing = 0; 853 if ((pp->pr_roflags & PR_PHINPAGE) == 0) 854 RB_INSERT(phtree, &pp->pr_phtree, ph); 855 856 pp->pr_nidle++; 857 858 /* 859 * Color this page. 860 */ 861 cp = (caddr_t)(cp + pp->pr_curcolor); 862 if ((pp->pr_curcolor += align) > pp->pr_maxcolor) 863 pp->pr_curcolor = 0; 864 865 /* 866 * Adjust storage to apply alignment to `pr_itemoffset' in each item. 867 */ 868 if (ioff != 0) 869 cp = (caddr_t)(cp + (align - ioff)); 870 ph->ph_colored = cp; 871 872 /* 873 * Insert remaining chunks on the bucket list. 874 */ 875 n = pp->pr_itemsperpage; 876 pp->pr_nitems += n; 877 878 while (n--) { 879 pi = (struct pool_item *)cp; 880 881 KASSERT(((((vaddr_t)pi) + ioff) & (align - 1)) == 0); 882 883 /* Insert on page list */ 884 TAILQ_INSERT_TAIL(&ph->ph_itemlist, pi, pi_list); 885 886 #ifdef DIAGNOSTIC 887 pi->pi_magic = PI_MAGIC; 888 #ifdef POOL_DEBUG 889 for (ip = (int *)pi, i = sizeof(*pi)/sizeof(int); 890 i < pp->pr_size / sizeof(int); i++) 891 ip[i] = PI_MAGIC; 892 #endif /* POOL_DEBUG */ 893 #endif /* DIAGNOSTIC */ 894 cp = (caddr_t)(cp + pp->pr_size); 895 } 896 897 /* 898 * If the pool was depleted, point at the new page. 899 */ 900 if (pp->pr_curpage == NULL) 901 pp->pr_curpage = ph; 902 903 if (++pp->pr_npages > pp->pr_hiwat) 904 pp->pr_hiwat = pp->pr_npages; 905 } 906 907 /* 908 * Used by pool_get() when nitems drops below the low water mark. This 909 * is used to catch up pr_nitems with the low water mark. 910 * 911 * Note we never wait for memory here, we let the caller decide what to do. 912 */ 913 int 914 pool_catchup(struct pool *pp) 915 { 916 struct pool_item_header *ph; 917 caddr_t cp; 918 int error = 0; 919 int slowdown; 920 921 while (POOL_NEEDS_CATCHUP(pp)) { 922 /* 923 * Call the page back-end allocator for more memory. 924 */ 925 cp = pool_allocator_alloc(pp, PR_NOWAIT, &slowdown); 926 if (__predict_true(cp != NULL)) 927 ph = pool_alloc_item_header(pp, cp, PR_NOWAIT); 928 if (__predict_false(cp == NULL || ph == NULL)) { 929 if (cp != NULL) 930 pool_allocator_free(pp, cp); 931 error = ENOMEM; 932 break; 933 } 934 pool_prime_page(pp, cp, ph); 935 pp->pr_npagealloc++; 936 } 937 938 return (error); 939 } 940 941 void 942 pool_update_curpage(struct pool *pp) 943 { 944 945 pp->pr_curpage = LIST_FIRST(&pp->pr_partpages); 946 if (pp->pr_curpage == NULL) { 947 pp->pr_curpage = LIST_FIRST(&pp->pr_emptypages); 948 } 949 } 950 951 void 952 pool_setlowat(struct pool *pp, int n) 953 { 954 955 pp->pr_minitems = n; 956 pp->pr_minpages = (n == 0) 957 ? 0 958 : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 959 960 mtx_enter(&pp->pr_mtx); 961 /* Make sure we're caught up with the newly-set low water mark. */ 962 if (POOL_NEEDS_CATCHUP(pp) && pool_catchup(pp) != 0) { 963 /* 964 * XXX: Should we log a warning? Should we set up a timeout 965 * to try again in a second or so? The latter could break 966 * a caller's assumptions about interrupt protection, etc. 967 */ 968 } 969 mtx_leave(&pp->pr_mtx); 970 } 971 972 void 973 pool_sethiwat(struct pool *pp, int n) 974 { 975 976 pp->pr_maxpages = (n == 0) 977 ? 0 978 : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 979 } 980 981 int 982 pool_sethardlimit(struct pool *pp, u_int n, const char *warnmsg, int ratecap) 983 { 984 int error = 0; 985 986 if (n < pp->pr_nout) { 987 error = EINVAL; 988 goto done; 989 } 990 991 pp->pr_hardlimit = n; 992 pp->pr_hardlimit_warning = warnmsg; 993 pp->pr_hardlimit_ratecap.tv_sec = ratecap; 994 pp->pr_hardlimit_warning_last.tv_sec = 0; 995 pp->pr_hardlimit_warning_last.tv_usec = 0; 996 997 /* 998 * In-line version of pool_sethiwat(). 999 */ 1000 pp->pr_maxpages = (n == 0 || n == UINT_MAX) 1001 ? n 1002 : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 1003 1004 done: 1005 return (error); 1006 } 1007 1008 void 1009 pool_set_constraints(struct pool *pp, struct uvm_constraint_range *range, 1010 int nsegs) 1011 { 1012 /* 1013 * Subsequent changes to the constrictions are only 1014 * allowed to make them _more_ strict. 1015 */ 1016 KASSERT(pp->pr_crange->ucr_high >= range->ucr_high && 1017 pp->pr_crange->ucr_low <= range->ucr_low); 1018 1019 pp->pr_crange = range; 1020 pp->pr_pa_nsegs = nsegs; 1021 } 1022 1023 void 1024 pool_set_ctordtor(struct pool *pp, int (*ctor)(void *, void *, int), 1025 void (*dtor)(void *, void *), void *arg) 1026 { 1027 pp->pr_ctor = ctor; 1028 pp->pr_dtor = dtor; 1029 pp->pr_arg = arg; 1030 } 1031 /* 1032 * Release all complete pages that have not been used recently. 1033 * 1034 * Returns non-zero if any pages have been reclaimed. 1035 */ 1036 int 1037 pool_reclaim(struct pool *pp) 1038 { 1039 struct pool_item_header *ph, *phnext; 1040 struct pool_pagelist pq; 1041 1042 LIST_INIT(&pq); 1043 1044 mtx_enter(&pp->pr_mtx); 1045 for (ph = LIST_FIRST(&pp->pr_emptypages); ph != NULL; ph = phnext) { 1046 phnext = LIST_NEXT(ph, ph_pagelist); 1047 1048 /* Check our minimum page claim */ 1049 if (pp->pr_npages <= pp->pr_minpages) 1050 break; 1051 1052 KASSERT(ph->ph_nmissing == 0); 1053 1054 /* 1055 * If freeing this page would put us below 1056 * the low water mark, stop now. 1057 */ 1058 if ((pp->pr_nitems - pp->pr_itemsperpage) < 1059 pp->pr_minitems) 1060 break; 1061 1062 pr_rmpage(pp, ph, &pq); 1063 } 1064 mtx_leave(&pp->pr_mtx); 1065 1066 if (LIST_EMPTY(&pq)) 1067 return (0); 1068 while ((ph = LIST_FIRST(&pq)) != NULL) { 1069 LIST_REMOVE(ph, ph_pagelist); 1070 pool_allocator_free(pp, ph->ph_page); 1071 if (pp->pr_roflags & PR_PHINPAGE) 1072 continue; 1073 pool_put(&phpool, ph); 1074 } 1075 1076 return (1); 1077 } 1078 1079 #ifdef DDB 1080 #include <machine/db_machdep.h> 1081 #include <ddb/db_interface.h> 1082 #include <ddb/db_output.h> 1083 1084 /* 1085 * Diagnostic helpers. 1086 */ 1087 void 1088 pool_printit(struct pool *pp, const char *modif, int (*pr)(const char *, ...)) 1089 { 1090 pool_print1(pp, modif, pr); 1091 } 1092 1093 void 1094 pool_print_pagelist(struct pool_pagelist *pl, int (*pr)(const char *, ...)) 1095 { 1096 struct pool_item_header *ph; 1097 #ifdef DIAGNOSTIC 1098 struct pool_item *pi; 1099 #endif 1100 1101 LIST_FOREACH(ph, pl, ph_pagelist) { 1102 (*pr)("\t\tpage %p, nmissing %d\n", 1103 ph->ph_page, ph->ph_nmissing); 1104 #ifdef DIAGNOSTIC 1105 TAILQ_FOREACH(pi, &ph->ph_itemlist, pi_list) { 1106 if (pi->pi_magic != PI_MAGIC) { 1107 (*pr)("\t\t\titem %p, magic 0x%x\n", 1108 pi, pi->pi_magic); 1109 } 1110 } 1111 #endif 1112 } 1113 } 1114 1115 void 1116 pool_print1(struct pool *pp, const char *modif, int (*pr)(const char *, ...)) 1117 { 1118 struct pool_item_header *ph; 1119 int print_pagelist = 0; 1120 char c; 1121 1122 while ((c = *modif++) != '\0') { 1123 if (c == 'p') 1124 print_pagelist = 1; 1125 modif++; 1126 } 1127 1128 (*pr)("POOL %s: size %u, align %u, ioff %u, roflags 0x%08x\n", 1129 pp->pr_wchan, pp->pr_size, pp->pr_align, pp->pr_itemoffset, 1130 pp->pr_roflags); 1131 (*pr)("\talloc %p\n", pp->pr_alloc); 1132 (*pr)("\tminitems %u, minpages %u, maxpages %u, npages %u\n", 1133 pp->pr_minitems, pp->pr_minpages, pp->pr_maxpages, pp->pr_npages); 1134 (*pr)("\titemsperpage %u, nitems %u, nout %u, hardlimit %u\n", 1135 pp->pr_itemsperpage, pp->pr_nitems, pp->pr_nout, pp->pr_hardlimit); 1136 1137 (*pr)("\n\tnget %lu, nfail %lu, nput %lu\n", 1138 pp->pr_nget, pp->pr_nfail, pp->pr_nput); 1139 (*pr)("\tnpagealloc %lu, npagefree %lu, hiwat %u, nidle %lu\n", 1140 pp->pr_npagealloc, pp->pr_npagefree, pp->pr_hiwat, pp->pr_nidle); 1141 1142 if (print_pagelist == 0) 1143 return; 1144 1145 if ((ph = LIST_FIRST(&pp->pr_emptypages)) != NULL) 1146 (*pr)("\n\tempty page list:\n"); 1147 pool_print_pagelist(&pp->pr_emptypages, pr); 1148 if ((ph = LIST_FIRST(&pp->pr_fullpages)) != NULL) 1149 (*pr)("\n\tfull page list:\n"); 1150 pool_print_pagelist(&pp->pr_fullpages, pr); 1151 if ((ph = LIST_FIRST(&pp->pr_partpages)) != NULL) 1152 (*pr)("\n\tpartial-page list:\n"); 1153 pool_print_pagelist(&pp->pr_partpages, pr); 1154 1155 if (pp->pr_curpage == NULL) 1156 (*pr)("\tno current page\n"); 1157 else 1158 (*pr)("\tcurpage %p\n", pp->pr_curpage->ph_page); 1159 } 1160 1161 void 1162 db_show_all_pools(db_expr_t expr, int haddr, db_expr_t count, char *modif) 1163 { 1164 struct pool *pp; 1165 char maxp[16]; 1166 int ovflw; 1167 char mode; 1168 1169 mode = modif[0]; 1170 if (mode != '\0' && mode != 'a') { 1171 db_printf("usage: show all pools [/a]\n"); 1172 return; 1173 } 1174 1175 if (mode == '\0') 1176 db_printf("%-10s%4s%9s%5s%9s%6s%6s%6s%6s%6s%6s%5s\n", 1177 "Name", 1178 "Size", 1179 "Requests", 1180 "Fail", 1181 "Releases", 1182 "Pgreq", 1183 "Pgrel", 1184 "Npage", 1185 "Hiwat", 1186 "Minpg", 1187 "Maxpg", 1188 "Idle"); 1189 else 1190 db_printf("%-10s %18s %18s\n", 1191 "Name", "Address", "Allocator"); 1192 1193 TAILQ_FOREACH(pp, &pool_head, pr_poollist) { 1194 if (mode == 'a') { 1195 db_printf("%-10s %18p %18p\n", pp->pr_wchan, pp, 1196 pp->pr_alloc); 1197 continue; 1198 } 1199 1200 if (!pp->pr_nget) 1201 continue; 1202 1203 if (pp->pr_maxpages == UINT_MAX) 1204 snprintf(maxp, sizeof maxp, "inf"); 1205 else 1206 snprintf(maxp, sizeof maxp, "%u", pp->pr_maxpages); 1207 1208 #define PRWORD(ovflw, fmt, width, fixed, val) do { \ 1209 (ovflw) += db_printf((fmt), \ 1210 (width) - (fixed) - (ovflw) > 0 ? \ 1211 (width) - (fixed) - (ovflw) : 0, \ 1212 (val)) - (width); \ 1213 if ((ovflw) < 0) \ 1214 (ovflw) = 0; \ 1215 } while (/* CONSTCOND */0) 1216 1217 ovflw = 0; 1218 PRWORD(ovflw, "%-*s", 10, 0, pp->pr_wchan); 1219 PRWORD(ovflw, " %*u", 4, 1, pp->pr_size); 1220 PRWORD(ovflw, " %*lu", 9, 1, pp->pr_nget); 1221 PRWORD(ovflw, " %*lu", 5, 1, pp->pr_nfail); 1222 PRWORD(ovflw, " %*lu", 9, 1, pp->pr_nput); 1223 PRWORD(ovflw, " %*lu", 6, 1, pp->pr_npagealloc); 1224 PRWORD(ovflw, " %*lu", 6, 1, pp->pr_npagefree); 1225 PRWORD(ovflw, " %*d", 6, 1, pp->pr_npages); 1226 PRWORD(ovflw, " %*d", 6, 1, pp->pr_hiwat); 1227 PRWORD(ovflw, " %*d", 6, 1, pp->pr_minpages); 1228 PRWORD(ovflw, " %*s", 6, 1, maxp); 1229 PRWORD(ovflw, " %*lu\n", 5, 1, pp->pr_nidle); 1230 1231 pool_chk(pp, pp->pr_wchan); 1232 } 1233 } 1234 1235 int 1236 pool_chk_page(struct pool *pp, const char *label, struct pool_item_header *ph) 1237 { 1238 struct pool_item *pi; 1239 caddr_t page; 1240 int n; 1241 #if defined(DIAGNOSTIC) && defined(POOL_DEBUG) 1242 int i, *ip; 1243 #endif 1244 1245 page = (caddr_t)((u_long)ph & pp->pr_alloc->pa_pagemask); 1246 if (page != ph->ph_page && 1247 (pp->pr_roflags & PR_PHINPAGE) != 0) { 1248 if (label != NULL) 1249 printf("%s: ", label); 1250 printf("pool(%p:%s): page inconsistency: page %p; " 1251 "at page head addr %p (p %p)\n", 1252 pp, pp->pr_wchan, ph->ph_page, ph, page); 1253 return 1; 1254 } 1255 1256 for (pi = TAILQ_FIRST(&ph->ph_itemlist), n = 0; 1257 pi != NULL; 1258 pi = TAILQ_NEXT(pi,pi_list), n++) { 1259 1260 #ifdef DIAGNOSTIC 1261 if (pi->pi_magic != PI_MAGIC) { 1262 if (label != NULL) 1263 printf("%s: ", label); 1264 printf("pool(%s): free list modified: " 1265 "page %p; item ordinal %d; addr %p " 1266 "(p %p); offset 0x%x=0x%x\n", 1267 pp->pr_wchan, ph->ph_page, n, pi, page, 1268 0, pi->pi_magic); 1269 } 1270 #ifdef POOL_DEBUG 1271 for (ip = (int *)pi, i = sizeof(*pi) / sizeof(int); 1272 i < pp->pr_size / sizeof(int); i++) { 1273 if (ip[i] != PI_MAGIC) { 1274 printf("pool(%s): free list modified: " 1275 "page %p; item ordinal %d; addr %p " 1276 "(p %p); offset 0x%x=0x%x\n", 1277 pp->pr_wchan, ph->ph_page, n, pi, 1278 page, i * sizeof(int), ip[i]); 1279 } 1280 } 1281 1282 #endif /* POOL_DEBUG */ 1283 #endif /* DIAGNOSTIC */ 1284 page = 1285 (caddr_t)((u_long)pi & pp->pr_alloc->pa_pagemask); 1286 if (page == ph->ph_page) 1287 continue; 1288 1289 if (label != NULL) 1290 printf("%s: ", label); 1291 printf("pool(%p:%s): page inconsistency: page %p;" 1292 " item ordinal %d; addr %p (p %p)\n", pp, 1293 pp->pr_wchan, ph->ph_page, n, pi, page); 1294 return 1; 1295 } 1296 return 0; 1297 } 1298 1299 int 1300 pool_chk(struct pool *pp, const char *label) 1301 { 1302 struct pool_item_header *ph; 1303 int r = 0; 1304 1305 LIST_FOREACH(ph, &pp->pr_emptypages, ph_pagelist) 1306 r += pool_chk_page(pp, label, ph); 1307 LIST_FOREACH(ph, &pp->pr_fullpages, ph_pagelist) 1308 r += pool_chk_page(pp, label, ph); 1309 LIST_FOREACH(ph, &pp->pr_partpages, ph_pagelist) 1310 r += pool_chk_page(pp, label, ph); 1311 1312 return (r); 1313 } 1314 1315 void 1316 pool_walk(struct pool *pp, int full, int (*pr)(const char *, ...), 1317 void (*func)(void *, int, int (*)(const char *, ...))) 1318 { 1319 struct pool_item_header *ph; 1320 struct pool_item *pi; 1321 caddr_t cp; 1322 int n; 1323 1324 LIST_FOREACH(ph, &pp->pr_fullpages, ph_pagelist) { 1325 cp = ph->ph_colored; 1326 n = ph->ph_nmissing; 1327 1328 while (n--) { 1329 func(cp, full, pr); 1330 cp += pp->pr_size; 1331 } 1332 } 1333 1334 LIST_FOREACH(ph, &pp->pr_partpages, ph_pagelist) { 1335 cp = ph->ph_colored; 1336 n = ph->ph_nmissing; 1337 1338 do { 1339 TAILQ_FOREACH(pi, &ph->ph_itemlist, pi_list) { 1340 if (cp == (caddr_t)pi) 1341 break; 1342 } 1343 if (cp != (caddr_t)pi) { 1344 func(cp, full, pr); 1345 n--; 1346 } 1347 1348 cp += pp->pr_size; 1349 } while (n > 0); 1350 } 1351 } 1352 #endif 1353 1354 /* 1355 * We have three different sysctls. 1356 * kern.pool.npools - the number of pools. 1357 * kern.pool.pool.<pool#> - the pool struct for the pool#. 1358 * kern.pool.name.<pool#> - the name for pool#. 1359 */ 1360 int 1361 sysctl_dopool(int *name, u_int namelen, char *where, size_t *sizep) 1362 { 1363 struct pool *pp, *foundpool = NULL; 1364 size_t buflen = where != NULL ? *sizep : 0; 1365 int npools = 0, s; 1366 unsigned int lookfor; 1367 size_t len; 1368 1369 switch (*name) { 1370 case KERN_POOL_NPOOLS: 1371 if (namelen != 1 || buflen != sizeof(int)) 1372 return (EINVAL); 1373 lookfor = 0; 1374 break; 1375 case KERN_POOL_NAME: 1376 if (namelen != 2 || buflen < 1) 1377 return (EINVAL); 1378 lookfor = name[1]; 1379 break; 1380 case KERN_POOL_POOL: 1381 if (namelen != 2 || buflen != sizeof(struct pool)) 1382 return (EINVAL); 1383 lookfor = name[1]; 1384 break; 1385 default: 1386 return (EINVAL); 1387 } 1388 1389 s = splvm(); 1390 1391 TAILQ_FOREACH(pp, &pool_head, pr_poollist) { 1392 npools++; 1393 if (lookfor == pp->pr_serial) { 1394 foundpool = pp; 1395 break; 1396 } 1397 } 1398 1399 splx(s); 1400 1401 if (*name != KERN_POOL_NPOOLS && foundpool == NULL) 1402 return (ENOENT); 1403 1404 switch (*name) { 1405 case KERN_POOL_NPOOLS: 1406 return copyout(&npools, where, buflen); 1407 case KERN_POOL_NAME: 1408 len = strlen(foundpool->pr_wchan) + 1; 1409 if (*sizep < len) 1410 return (ENOMEM); 1411 *sizep = len; 1412 return copyout(foundpool->pr_wchan, where, len); 1413 case KERN_POOL_POOL: 1414 return copyout(foundpool, where, buflen); 1415 } 1416 /* NOTREACHED */ 1417 return (0); /* XXX - Stupid gcc */ 1418 } 1419 1420 /* 1421 * Pool backend allocators. 1422 * 1423 * Each pool has a backend allocator that handles allocation, deallocation 1424 */ 1425 void *pool_page_alloc(struct pool *, int, int *); 1426 void pool_page_free(struct pool *, void *); 1427 1428 /* 1429 * safe for interrupts, name preserved for compat this is the default 1430 * allocator 1431 */ 1432 struct pool_allocator pool_allocator_nointr = { 1433 pool_page_alloc, pool_page_free, 0, 1434 }; 1435 1436 /* 1437 * XXX - we have at least three different resources for the same allocation 1438 * and each resource can be depleted. First we have the ready elements in 1439 * the pool. Then we have the resource (typically a vm_map) for this 1440 * allocator, then we have physical memory. Waiting for any of these can 1441 * be unnecessary when any other is freed, but the kernel doesn't support 1442 * sleeping on multiple addresses, so we have to fake. The caller sleeps on 1443 * the pool (so that we can be awakened when an item is returned to the pool), 1444 * but we set PA_WANT on the allocator. When a page is returned to 1445 * the allocator and PA_WANT is set pool_allocator_free will wakeup all 1446 * sleeping pools belonging to this allocator. (XXX - thundering herd). 1447 * We also wake up the allocator in case someone without a pool (malloc) 1448 * is sleeping waiting for this allocator. 1449 */ 1450 1451 void * 1452 pool_allocator_alloc(struct pool *pp, int flags, int *slowdown) 1453 { 1454 boolean_t waitok = (flags & PR_WAITOK) ? TRUE : FALSE; 1455 void *v; 1456 1457 if (waitok) 1458 mtx_leave(&pp->pr_mtx); 1459 v = pp->pr_alloc->pa_alloc(pp, flags, slowdown); 1460 if (waitok) 1461 mtx_enter(&pp->pr_mtx); 1462 1463 return (v); 1464 } 1465 1466 void 1467 pool_allocator_free(struct pool *pp, void *v) 1468 { 1469 struct pool_allocator *pa = pp->pr_alloc; 1470 1471 (*pa->pa_free)(pp, v); 1472 } 1473 1474 void * 1475 pool_page_alloc(struct pool *pp, int flags, int *slowdown) 1476 { 1477 int kfl = (flags & PR_WAITOK) ? 0 : UVM_KMF_NOWAIT; 1478 1479 return (uvm_km_getpage_pla(kfl, slowdown, pp->pr_crange->ucr_low, 1480 pp->pr_crange->ucr_high, 0, 0)); 1481 } 1482 1483 void 1484 pool_page_free(struct pool *pp, void *v) 1485 { 1486 uvm_km_putpage(v); 1487 } 1488 1489 void * 1490 pool_large_alloc(struct pool *pp, int flags, int *slowdown) 1491 { 1492 int kfl = (flags & PR_WAITOK) ? 0 : UVM_KMF_NOWAIT; 1493 vaddr_t va; 1494 int s; 1495 1496 s = splvm(); 1497 va = uvm_km_kmemalloc_pla(kmem_map, NULL, pp->pr_alloc->pa_pagesz, 0, 1498 kfl, pp->pr_crange->ucr_low, pp->pr_crange->ucr_high, 1499 0, 0, pp->pr_pa_nsegs); 1500 splx(s); 1501 1502 return ((void *)va); 1503 } 1504 1505 void 1506 pool_large_free(struct pool *pp, void *v) 1507 { 1508 int s; 1509 1510 s = splvm(); 1511 uvm_km_free(kmem_map, (vaddr_t)v, pp->pr_alloc->pa_pagesz); 1512 splx(s); 1513 } 1514 1515 void * 1516 pool_large_alloc_ni(struct pool *pp, int flags, int *slowdown) 1517 { 1518 int kfl = (flags & PR_WAITOK) ? 0 : UVM_KMF_NOWAIT; 1519 1520 return ((void *)uvm_km_kmemalloc_pla(kernel_map, uvm.kernel_object, 1521 pp->pr_alloc->pa_pagesz, 0, kfl, 1522 pp->pr_crange->ucr_low, pp->pr_crange->ucr_high, 1523 0, 0, pp->pr_pa_nsegs)); 1524 } 1525 1526 void 1527 pool_large_free_ni(struct pool *pp, void *v) 1528 { 1529 uvm_km_free(kernel_map, (vaddr_t)v, pp->pr_alloc->pa_pagesz); 1530 } 1531