1 /* $OpenBSD: subr_pool.c,v 1.78 2009/02/17 07:53:55 deraadt Exp $ */ 2 /* $NetBSD: subr_pool.c,v 1.61 2001/09/26 07:14:56 chs Exp $ */ 3 4 /*- 5 * Copyright (c) 1997, 1999, 2000 The NetBSD Foundation, Inc. 6 * All rights reserved. 7 * 8 * This code is derived from software contributed to The NetBSD Foundation 9 * by Paul Kranenburg; by Jason R. Thorpe of the Numerical Aerospace 10 * Simulation Facility, NASA Ames Research Center. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 23 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 24 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 25 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31 * POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include <sys/param.h> 35 #include <sys/systm.h> 36 #include <sys/proc.h> 37 #include <sys/errno.h> 38 #include <sys/kernel.h> 39 #include <sys/malloc.h> 40 #include <sys/pool.h> 41 #include <sys/syslog.h> 42 #include <sys/sysctl.h> 43 44 #include <uvm/uvm.h> 45 46 47 /* 48 * Pool resource management utility. 49 * 50 * Memory is allocated in pages which are split into pieces according to 51 * the pool item size. Each page is kept on one of three lists in the 52 * pool structure: `pr_emptypages', `pr_fullpages' and `pr_partpages', 53 * for empty, full and partially-full pages respectively. The individual 54 * pool items are on a linked list headed by `ph_itemlist' in each page 55 * header. The memory for building the page list is either taken from 56 * the allocated pages themselves (for small pool items) or taken from 57 * an internal pool of page headers (`phpool'). 58 */ 59 60 /* List of all pools */ 61 TAILQ_HEAD(,pool) pool_head = TAILQ_HEAD_INITIALIZER(pool_head); 62 63 /* Private pool for page header structures */ 64 struct pool phpool; 65 66 struct pool_item_header { 67 /* Page headers */ 68 LIST_ENTRY(pool_item_header) 69 ph_pagelist; /* pool page list */ 70 TAILQ_HEAD(,pool_item) ph_itemlist; /* chunk list for this page */ 71 SPLAY_ENTRY(pool_item_header) 72 ph_node; /* Off-page page headers */ 73 int ph_nmissing; /* # of chunks in use */ 74 caddr_t ph_page; /* this page's address */ 75 caddr_t ph_colored; /* page's colored address */ 76 int ph_pagesize; 77 }; 78 79 struct pool_item { 80 #ifdef DIAGNOSTIC 81 u_int32_t pi_magic; 82 #endif 83 /* Other entries use only this list entry */ 84 TAILQ_ENTRY(pool_item) pi_list; 85 }; 86 87 #ifdef DEADBEEF1 88 #define PI_MAGIC DEADBEEF1 89 #else 90 #define PI_MAGIC 0xdeafbeef 91 #endif 92 93 #define POOL_NEEDS_CATCHUP(pp) \ 94 ((pp)->pr_nitems < (pp)->pr_minitems) 95 96 /* 97 * Every pool gets a unique serial number assigned to it. If this counter 98 * wraps, we're screwed, but we shouldn't create so many pools anyway. 99 */ 100 unsigned int pool_serial; 101 102 int pool_catchup(struct pool *); 103 void pool_prime_page(struct pool *, caddr_t, struct pool_item_header *); 104 void pool_update_curpage(struct pool *); 105 void *pool_do_get(struct pool *, int); 106 void pool_do_put(struct pool *, void *); 107 void pr_rmpage(struct pool *, struct pool_item_header *, 108 struct pool_pagelist *); 109 int pool_chk_page(struct pool *, const char *, struct pool_item_header *); 110 struct pool_item_header *pool_alloc_item_header(struct pool *, caddr_t , int); 111 112 void *pool_allocator_alloc(struct pool *, int, int *); 113 void pool_allocator_free(struct pool *, void *); 114 115 /* 116 * XXX - quick hack. For pools with large items we want to use a special 117 * allocator. For now, instead of having the allocator figure out 118 * the allocation size from the pool (which can be done trivially 119 * with round_page(pr_itemsperpage * pr_size)) which would require 120 * lots of changes everywhere, we just create allocators for each 121 * size. We limit those to 128 pages. 122 */ 123 #define POOL_LARGE_MAXPAGES 128 124 struct pool_allocator pool_allocator_large[POOL_LARGE_MAXPAGES]; 125 struct pool_allocator pool_allocator_large_ni[POOL_LARGE_MAXPAGES]; 126 void *pool_large_alloc(struct pool *, int, int *); 127 void pool_large_free(struct pool *, void *); 128 void *pool_large_alloc_ni(struct pool *, int, int *); 129 void pool_large_free_ni(struct pool *, void *); 130 131 132 #ifdef DDB 133 void pool_print_pagelist(struct pool_pagelist *, 134 int (*)(const char *, ...)); 135 void pool_print1(struct pool *, const char *, int (*)(const char *, ...)); 136 #endif 137 138 #define pool_sleep(pl) msleep(pl, &pl->pr_mtx, PSWP, pl->pr_wchan, 0) 139 140 static __inline int 141 phtree_compare(struct pool_item_header *a, struct pool_item_header *b) 142 { 143 long diff = (vaddr_t)a->ph_page - (vaddr_t)b->ph_page; 144 if (diff < 0) 145 return -(-diff >= a->ph_pagesize); 146 else if (diff > 0) 147 return (diff >= b->ph_pagesize); 148 else 149 return (0); 150 } 151 152 SPLAY_PROTOTYPE(phtree, pool_item_header, ph_node, phtree_compare); 153 SPLAY_GENERATE(phtree, pool_item_header, ph_node, phtree_compare); 154 155 /* 156 * Return the pool page header based on page address. 157 */ 158 static __inline struct pool_item_header * 159 pr_find_pagehead(struct pool *pp, void *v) 160 { 161 struct pool_item_header *ph, tmp; 162 163 if ((pp->pr_roflags & PR_PHINPAGE) != 0) { 164 caddr_t page; 165 166 page = (caddr_t)((vaddr_t)v & pp->pr_alloc->pa_pagemask); 167 168 return ((struct pool_item_header *)(page + pp->pr_phoffset)); 169 } 170 171 /* 172 * The trick we're using in the tree compare function is to compare 173 * two elements equal when they overlap. We want to return the 174 * page header that belongs to the element just before this address. 175 * We don't want this element to compare equal to the next element, 176 * so the compare function takes the pagesize from the lower element. 177 * If this header is the lower, its pagesize is zero, so it can't 178 * overlap with the next header. But if the header we're looking for 179 * is lower, we'll use its pagesize and it will overlap and return 180 * equal. 181 */ 182 tmp.ph_page = v; 183 tmp.ph_pagesize = 0; 184 ph = SPLAY_FIND(phtree, &pp->pr_phtree, &tmp); 185 186 if (ph) { 187 KASSERT(ph->ph_page <= (caddr_t)v); 188 KASSERT(ph->ph_page + ph->ph_pagesize > (caddr_t)v); 189 } 190 return ph; 191 } 192 193 /* 194 * Remove a page from the pool. 195 */ 196 void 197 pr_rmpage(struct pool *pp, struct pool_item_header *ph, 198 struct pool_pagelist *pq) 199 { 200 201 /* 202 * If the page was idle, decrement the idle page count. 203 */ 204 if (ph->ph_nmissing == 0) { 205 #ifdef DIAGNOSTIC 206 if (pp->pr_nidle == 0) 207 panic("pr_rmpage: nidle inconsistent"); 208 if (pp->pr_nitems < pp->pr_itemsperpage) 209 panic("pr_rmpage: nitems inconsistent"); 210 #endif 211 pp->pr_nidle--; 212 } 213 214 pp->pr_nitems -= pp->pr_itemsperpage; 215 216 /* 217 * Unlink a page from the pool and release it (or queue it for release). 218 */ 219 LIST_REMOVE(ph, ph_pagelist); 220 if ((pp->pr_roflags & PR_PHINPAGE) == 0) 221 SPLAY_REMOVE(phtree, &pp->pr_phtree, ph); 222 if (pq) { 223 LIST_INSERT_HEAD(pq, ph, ph_pagelist); 224 } else { 225 pool_allocator_free(pp, ph->ph_page); 226 if ((pp->pr_roflags & PR_PHINPAGE) == 0) 227 pool_put(&phpool, ph); 228 } 229 pp->pr_npages--; 230 pp->pr_npagefree++; 231 232 pool_update_curpage(pp); 233 } 234 235 /* 236 * Initialize the given pool resource structure. 237 * 238 * We export this routine to allow other kernel parts to declare 239 * static pools that must be initialized before malloc() is available. 240 */ 241 void 242 pool_init(struct pool *pp, size_t size, u_int align, u_int ioff, int flags, 243 const char *wchan, struct pool_allocator *palloc) 244 { 245 int off, slack; 246 247 #ifdef MALLOC_DEBUG 248 if ((flags & PR_DEBUG) && (ioff != 0 || align != 0)) 249 flags &= ~PR_DEBUG; 250 #endif 251 /* 252 * Check arguments and construct default values. 253 */ 254 if (palloc == NULL) { 255 if (size > PAGE_SIZE) { 256 int psize; 257 258 /* 259 * XXX - should take align into account as well. 260 */ 261 if (size == round_page(size)) 262 psize = size / PAGE_SIZE; 263 else 264 psize = PAGE_SIZE / roundup(size % PAGE_SIZE, 265 1024); 266 if (psize > POOL_LARGE_MAXPAGES) 267 psize = POOL_LARGE_MAXPAGES; 268 if (flags & PR_WAITOK) 269 palloc = &pool_allocator_large_ni[psize-1]; 270 else 271 palloc = &pool_allocator_large[psize-1]; 272 if (palloc->pa_pagesz == 0) { 273 palloc->pa_pagesz = psize * PAGE_SIZE; 274 if (flags & PR_WAITOK) { 275 palloc->pa_alloc = pool_large_alloc_ni; 276 palloc->pa_free = pool_large_free_ni; 277 } else { 278 palloc->pa_alloc = pool_large_alloc; 279 palloc->pa_free = pool_large_free; 280 } 281 } 282 } else { 283 palloc = &pool_allocator_nointr; 284 } 285 } 286 if (palloc->pa_pagesz == 0) { 287 palloc->pa_pagesz = PAGE_SIZE; 288 } 289 if (palloc->pa_pagemask == 0) { 290 palloc->pa_pagemask = ~(palloc->pa_pagesz - 1); 291 palloc->pa_pageshift = ffs(palloc->pa_pagesz) - 1; 292 } 293 294 if (align == 0) 295 align = ALIGN(1); 296 297 if (size < sizeof(struct pool_item)) 298 size = sizeof(struct pool_item); 299 300 size = roundup(size, align); 301 #ifdef DIAGNOSTIC 302 if (size > palloc->pa_pagesz) 303 panic("pool_init: pool item size (%lu) too large", 304 (u_long)size); 305 #endif 306 307 /* 308 * Initialize the pool structure. 309 */ 310 LIST_INIT(&pp->pr_emptypages); 311 LIST_INIT(&pp->pr_fullpages); 312 LIST_INIT(&pp->pr_partpages); 313 pp->pr_curpage = NULL; 314 pp->pr_npages = 0; 315 pp->pr_minitems = 0; 316 pp->pr_minpages = 0; 317 pp->pr_maxpages = 8; 318 pp->pr_roflags = flags; 319 pp->pr_flags = 0; 320 pp->pr_size = size; 321 pp->pr_align = align; 322 pp->pr_wchan = wchan; 323 pp->pr_alloc = palloc; 324 pp->pr_nitems = 0; 325 pp->pr_nout = 0; 326 pp->pr_hardlimit = UINT_MAX; 327 pp->pr_hardlimit_warning = NULL; 328 pp->pr_hardlimit_ratecap.tv_sec = 0; 329 pp->pr_hardlimit_ratecap.tv_usec = 0; 330 pp->pr_hardlimit_warning_last.tv_sec = 0; 331 pp->pr_hardlimit_warning_last.tv_usec = 0; 332 pp->pr_serial = ++pool_serial; 333 if (pool_serial == 0) 334 panic("pool_init: too much uptime"); 335 336 /* 337 * Decide whether to put the page header off page to avoid 338 * wasting too large a part of the page. Off-page page headers 339 * go on a hash table, so we can match a returned item 340 * with its header based on the page address. 341 * We use 1/16 of the page size as the threshold (XXX: tune) 342 */ 343 if (pp->pr_size < palloc->pa_pagesz/16 && pp->pr_size < PAGE_SIZE) { 344 /* Use the end of the page for the page header */ 345 pp->pr_roflags |= PR_PHINPAGE; 346 pp->pr_phoffset = off = palloc->pa_pagesz - 347 ALIGN(sizeof(struct pool_item_header)); 348 } else { 349 /* The page header will be taken from our page header pool */ 350 pp->pr_phoffset = 0; 351 off = palloc->pa_pagesz; 352 SPLAY_INIT(&pp->pr_phtree); 353 } 354 355 /* 356 * Alignment is to take place at `ioff' within the item. This means 357 * we must reserve up to `align - 1' bytes on the page to allow 358 * appropriate positioning of each item. 359 * 360 * Silently enforce `0 <= ioff < align'. 361 */ 362 pp->pr_itemoffset = ioff = ioff % align; 363 pp->pr_itemsperpage = (off - ((align - ioff) % align)) / pp->pr_size; 364 KASSERT(pp->pr_itemsperpage != 0); 365 366 /* 367 * Use the slack between the chunks and the page header 368 * for "cache coloring". 369 */ 370 slack = off - pp->pr_itemsperpage * pp->pr_size; 371 pp->pr_maxcolor = (slack / align) * align; 372 pp->pr_curcolor = 0; 373 374 pp->pr_nget = 0; 375 pp->pr_nfail = 0; 376 pp->pr_nput = 0; 377 pp->pr_npagealloc = 0; 378 pp->pr_npagefree = 0; 379 pp->pr_hiwat = 0; 380 pp->pr_nidle = 0; 381 382 pp->pr_ipl = -1; 383 mtx_init(&pp->pr_mtx, IPL_NONE); 384 385 if (phpool.pr_size == 0) { 386 pool_init(&phpool, sizeof(struct pool_item_header), 0, 0, 387 0, "phpool", NULL); 388 pool_setipl(&phpool, IPL_HIGH); 389 } 390 391 /* Insert this into the list of all pools. */ 392 TAILQ_INSERT_HEAD(&pool_head, pp, pr_poollist); 393 } 394 395 void 396 pool_setipl(struct pool *pp, int ipl) 397 { 398 pp->pr_ipl = ipl; 399 mtx_init(&pp->pr_mtx, ipl); 400 } 401 402 /* 403 * Decommission a pool resource. 404 */ 405 void 406 pool_destroy(struct pool *pp) 407 { 408 struct pool_item_header *ph; 409 410 #ifdef DIAGNOSTIC 411 if (pp->pr_nout != 0) 412 panic("pool_destroy: pool busy: still out: %u", pp->pr_nout); 413 #endif 414 415 /* Remove all pages */ 416 while ((ph = LIST_FIRST(&pp->pr_emptypages)) != NULL) 417 pr_rmpage(pp, ph, NULL); 418 KASSERT(LIST_EMPTY(&pp->pr_fullpages)); 419 KASSERT(LIST_EMPTY(&pp->pr_partpages)); 420 421 /* Remove from global pool list */ 422 TAILQ_REMOVE(&pool_head, pp, pr_poollist); 423 } 424 425 struct pool_item_header * 426 pool_alloc_item_header(struct pool *pp, caddr_t storage, int flags) 427 { 428 struct pool_item_header *ph; 429 430 if ((pp->pr_roflags & PR_PHINPAGE) != 0) 431 ph = (struct pool_item_header *)(storage + pp->pr_phoffset); 432 else { 433 ph = pool_get(&phpool, flags); 434 } 435 436 return (ph); 437 } 438 439 /* 440 * Grab an item from the pool; must be called at appropriate spl level 441 */ 442 void * 443 pool_get(struct pool *pp, int flags) 444 { 445 void *v; 446 447 mtx_enter(&pp->pr_mtx); 448 v = pool_do_get(pp, flags); 449 mtx_leave(&pp->pr_mtx); 450 if (v && pp->pr_ctor && pp->pr_ctor(pp->pr_arg, v, flags)) { 451 mtx_enter(&pp->pr_mtx); 452 pool_do_put(pp, v); 453 mtx_leave(&pp->pr_mtx); 454 v = NULL; 455 } 456 if (v) { 457 pp->pr_nget++; 458 if (flags & PR_ZERO) 459 memset(v, 0, pp->pr_size); 460 } 461 return (v); 462 } 463 464 void * 465 pool_do_get(struct pool *pp, int flags) 466 { 467 struct pool_item *pi; 468 struct pool_item_header *ph; 469 void *v; 470 int slowdown = 0; 471 #ifdef POOL_DEBUG 472 int i, *ip; 473 #endif 474 475 #ifdef DIAGNOSTIC 476 if ((flags & PR_WAITOK) != 0) 477 splassert(IPL_NONE); 478 if (pp->pr_ipl != -1) 479 splassert(pp->pr_ipl); 480 #endif /* DIAGNOSTIC */ 481 482 #ifdef MALLOC_DEBUG 483 if (pp->pr_roflags & PR_DEBUG) { 484 void *addr; 485 486 addr = NULL; 487 debug_malloc(pp->pr_size, M_DEBUG, 488 (flags & PR_WAITOK) ? M_WAITOK : M_NOWAIT, &addr); 489 return (addr); 490 } 491 #endif 492 493 startover: 494 /* 495 * Check to see if we've reached the hard limit. If we have, 496 * and we can wait, then wait until an item has been returned to 497 * the pool. 498 */ 499 #ifdef DIAGNOSTIC 500 if (__predict_false(pp->pr_nout > pp->pr_hardlimit)) 501 panic("pool_do_get: %s: crossed hard limit", pp->pr_wchan); 502 #endif 503 if (__predict_false(pp->pr_nout == pp->pr_hardlimit)) { 504 if ((flags & PR_WAITOK) && !(flags & PR_LIMITFAIL)) { 505 /* 506 * XXX: A warning isn't logged in this case. Should 507 * it be? 508 */ 509 pp->pr_flags |= PR_WANTED; 510 pool_sleep(pp); 511 goto startover; 512 } 513 514 /* 515 * Log a message that the hard limit has been hit. 516 */ 517 if (pp->pr_hardlimit_warning != NULL && 518 ratecheck(&pp->pr_hardlimit_warning_last, 519 &pp->pr_hardlimit_ratecap)) 520 log(LOG_ERR, "%s\n", pp->pr_hardlimit_warning); 521 522 pp->pr_nfail++; 523 return (NULL); 524 } 525 526 /* 527 * The convention we use is that if `curpage' is not NULL, then 528 * it points at a non-empty bucket. In particular, `curpage' 529 * never points at a page header which has PR_PHINPAGE set and 530 * has no items in its bucket. 531 */ 532 if ((ph = pp->pr_curpage) == NULL) { 533 #ifdef DIAGNOSTIC 534 if (pp->pr_nitems != 0) { 535 printf("pool_do_get: %s: curpage NULL, nitems %u\n", 536 pp->pr_wchan, pp->pr_nitems); 537 panic("pool_do_get: nitems inconsistent"); 538 } 539 #endif 540 541 /* 542 * Call the back-end page allocator for more memory. 543 */ 544 v = pool_allocator_alloc(pp, flags, &slowdown); 545 if (__predict_true(v != NULL)) 546 ph = pool_alloc_item_header(pp, v, flags); 547 548 if (__predict_false(v == NULL || ph == NULL)) { 549 if (v != NULL) 550 pool_allocator_free(pp, v); 551 552 if ((flags & PR_WAITOK) == 0) { 553 pp->pr_nfail++; 554 return (NULL); 555 } 556 557 /* 558 * Wait for items to be returned to this pool. 559 * 560 * XXX: maybe we should wake up once a second and 561 * try again? 562 */ 563 pp->pr_flags |= PR_WANTED; 564 pool_sleep(pp); 565 goto startover; 566 } 567 568 /* We have more memory; add it to the pool */ 569 pool_prime_page(pp, v, ph); 570 pp->pr_npagealloc++; 571 572 if (slowdown && (flags & PR_WAITOK)) { 573 mtx_leave(&pp->pr_mtx); 574 yield(); 575 mtx_enter(&pp->pr_mtx); 576 } 577 578 /* Start the allocation process over. */ 579 goto startover; 580 } 581 if (__predict_false((v = pi = TAILQ_FIRST(&ph->ph_itemlist)) == NULL)) { 582 panic("pool_do_get: %s: page empty", pp->pr_wchan); 583 } 584 #ifdef DIAGNOSTIC 585 if (__predict_false(pp->pr_nitems == 0)) { 586 printf("pool_do_get: %s: items on itemlist, nitems %u\n", 587 pp->pr_wchan, pp->pr_nitems); 588 panic("pool_do_get: nitems inconsistent"); 589 } 590 #endif 591 592 #ifdef DIAGNOSTIC 593 if (__predict_false(pi->pi_magic != PI_MAGIC)) 594 panic("pool_do_get(%s): free list modified: " 595 "page %p; item addr %p; offset 0x%x=0x%x", 596 pp->pr_wchan, ph->ph_page, pi, 0, pi->pi_magic); 597 #ifdef POOL_DEBUG 598 for (ip = (int *)pi, i = sizeof(*pi) / sizeof(int); 599 i < pp->pr_size / sizeof(int); i++) { 600 if (ip[i] != PI_MAGIC) { 601 panic("pool_do_get(%s): free list modified: " 602 "page %p; item addr %p; offset 0x%x=0x%x", 603 pp->pr_wchan, ph->ph_page, pi, 604 i * sizeof(int), ip[i]); 605 } 606 } 607 #endif /* POOL_DEBUG */ 608 #endif /* DIAGNOSTIC */ 609 610 /* 611 * Remove from item list. 612 */ 613 TAILQ_REMOVE(&ph->ph_itemlist, pi, pi_list); 614 pp->pr_nitems--; 615 pp->pr_nout++; 616 if (ph->ph_nmissing == 0) { 617 #ifdef DIAGNOSTIC 618 if (__predict_false(pp->pr_nidle == 0)) 619 panic("pool_do_get: nidle inconsistent"); 620 #endif 621 pp->pr_nidle--; 622 623 /* 624 * This page was previously empty. Move it to the list of 625 * partially-full pages. This page is already curpage. 626 */ 627 LIST_REMOVE(ph, ph_pagelist); 628 LIST_INSERT_HEAD(&pp->pr_partpages, ph, ph_pagelist); 629 } 630 ph->ph_nmissing++; 631 if (TAILQ_EMPTY(&ph->ph_itemlist)) { 632 #ifdef DIAGNOSTIC 633 if (__predict_false(ph->ph_nmissing != pp->pr_itemsperpage)) { 634 panic("pool_do_get: %s: nmissing inconsistent", 635 pp->pr_wchan); 636 } 637 #endif 638 /* 639 * This page is now full. Move it to the full list 640 * and select a new current page. 641 */ 642 LIST_REMOVE(ph, ph_pagelist); 643 LIST_INSERT_HEAD(&pp->pr_fullpages, ph, ph_pagelist); 644 pool_update_curpage(pp); 645 } 646 647 /* 648 * If we have a low water mark and we are now below that low 649 * water mark, add more items to the pool. 650 */ 651 if (POOL_NEEDS_CATCHUP(pp) && pool_catchup(pp) != 0) { 652 /* 653 * XXX: Should we log a warning? Should we set up a timeout 654 * to try again in a second or so? The latter could break 655 * a caller's assumptions about interrupt protection, etc. 656 */ 657 } 658 return (v); 659 } 660 661 /* 662 * Return resource to the pool; must be called at appropriate spl level 663 */ 664 void 665 pool_put(struct pool *pp, void *v) 666 { 667 if (pp->pr_dtor) 668 pp->pr_dtor(pp->pr_arg, v); 669 mtx_enter(&pp->pr_mtx); 670 pool_do_put(pp, v); 671 mtx_leave(&pp->pr_mtx); 672 pp->pr_nput++; 673 } 674 675 /* 676 * Internal version of pool_put(). 677 */ 678 void 679 pool_do_put(struct pool *pp, void *v) 680 { 681 struct pool_item *pi = v; 682 struct pool_item_header *ph; 683 #ifdef POOL_DEBUG 684 int i, *ip; 685 #endif 686 687 if (v == NULL) 688 panic("pool_put of NULL"); 689 690 #ifdef MALLOC_DEBUG 691 if (pp->pr_roflags & PR_DEBUG) { 692 debug_free(v, M_DEBUG); 693 return; 694 } 695 #endif 696 697 #ifdef DIAGNOSTIC 698 if (pp->pr_ipl != -1) 699 splassert(pp->pr_ipl); 700 701 if (__predict_false(pp->pr_nout == 0)) { 702 printf("pool %s: putting with none out\n", 703 pp->pr_wchan); 704 panic("pool_do_put"); 705 } 706 #endif 707 708 if (__predict_false((ph = pr_find_pagehead(pp, v)) == NULL)) { 709 panic("pool_do_put: %s: page header missing", pp->pr_wchan); 710 } 711 712 /* 713 * Return to item list. 714 */ 715 #ifdef DIAGNOSTIC 716 pi->pi_magic = PI_MAGIC; 717 #ifdef POOL_DEBUG 718 for (ip = (int *)pi, i = sizeof(*pi)/sizeof(int); 719 i < pp->pr_size / sizeof(int); i++) 720 ip[i] = PI_MAGIC; 721 #endif /* POOL_DEBUG */ 722 #endif /* DIAGNOSTIC */ 723 724 TAILQ_INSERT_HEAD(&ph->ph_itemlist, pi, pi_list); 725 ph->ph_nmissing--; 726 pp->pr_nitems++; 727 pp->pr_nout--; 728 729 /* Cancel "pool empty" condition if it exists */ 730 if (pp->pr_curpage == NULL) 731 pp->pr_curpage = ph; 732 733 if (pp->pr_flags & PR_WANTED) { 734 pp->pr_flags &= ~PR_WANTED; 735 if (ph->ph_nmissing == 0) 736 pp->pr_nidle++; 737 wakeup(pp); 738 return; 739 } 740 741 /* 742 * If this page is now empty, do one of two things: 743 * 744 * (1) If we have more pages than the page high water mark, 745 * free the page back to the system. 746 * 747 * (2) Otherwise, move the page to the empty page list. 748 * 749 * Either way, select a new current page (so we use a partially-full 750 * page if one is available). 751 */ 752 if (ph->ph_nmissing == 0) { 753 pp->pr_nidle++; 754 if (pp->pr_nidle > pp->pr_maxpages) { 755 pr_rmpage(pp, ph, NULL); 756 } else { 757 LIST_REMOVE(ph, ph_pagelist); 758 LIST_INSERT_HEAD(&pp->pr_emptypages, ph, ph_pagelist); 759 } 760 pool_update_curpage(pp); 761 } 762 763 /* 764 * If the page was previously completely full, move it to the 765 * partially-full list and make it the current page. The next 766 * allocation will get the item from this page, instead of 767 * further fragmenting the pool. 768 */ 769 else if (ph->ph_nmissing == (pp->pr_itemsperpage - 1)) { 770 LIST_REMOVE(ph, ph_pagelist); 771 LIST_INSERT_HEAD(&pp->pr_partpages, ph, ph_pagelist); 772 pp->pr_curpage = ph; 773 } 774 } 775 776 /* 777 * Add N items to the pool. 778 */ 779 int 780 pool_prime(struct pool *pp, int n) 781 { 782 struct pool_item_header *ph; 783 caddr_t cp; 784 int newpages; 785 int slowdown; 786 787 mtx_enter(&pp->pr_mtx); 788 newpages = roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 789 790 while (newpages-- > 0) { 791 cp = pool_allocator_alloc(pp, PR_NOWAIT, &slowdown); 792 if (__predict_true(cp != NULL)) 793 ph = pool_alloc_item_header(pp, cp, PR_NOWAIT); 794 if (__predict_false(cp == NULL || ph == NULL)) { 795 if (cp != NULL) 796 pool_allocator_free(pp, cp); 797 break; 798 } 799 800 pool_prime_page(pp, cp, ph); 801 pp->pr_npagealloc++; 802 pp->pr_minpages++; 803 } 804 805 if (pp->pr_minpages >= pp->pr_maxpages) 806 pp->pr_maxpages = pp->pr_minpages + 1; /* XXX */ 807 808 mtx_leave(&pp->pr_mtx); 809 return (0); 810 } 811 812 /* 813 * Add a page worth of items to the pool. 814 * 815 * Note, we must be called with the pool descriptor LOCKED. 816 */ 817 void 818 pool_prime_page(struct pool *pp, caddr_t storage, struct pool_item_header *ph) 819 { 820 struct pool_item *pi; 821 caddr_t cp = storage; 822 unsigned int align = pp->pr_align; 823 unsigned int ioff = pp->pr_itemoffset; 824 int n; 825 #ifdef POOL_DEBUG 826 int i, *ip; 827 #endif 828 829 /* 830 * Insert page header. 831 */ 832 LIST_INSERT_HEAD(&pp->pr_emptypages, ph, ph_pagelist); 833 TAILQ_INIT(&ph->ph_itemlist); 834 ph->ph_page = storage; 835 ph->ph_pagesize = pp->pr_alloc->pa_pagesz; 836 ph->ph_nmissing = 0; 837 if ((pp->pr_roflags & PR_PHINPAGE) == 0) 838 SPLAY_INSERT(phtree, &pp->pr_phtree, ph); 839 840 pp->pr_nidle++; 841 842 /* 843 * Color this page. 844 */ 845 cp = (caddr_t)(cp + pp->pr_curcolor); 846 if ((pp->pr_curcolor += align) > pp->pr_maxcolor) 847 pp->pr_curcolor = 0; 848 849 /* 850 * Adjust storage to apply aligment to `pr_itemoffset' in each item. 851 */ 852 if (ioff != 0) 853 cp = (caddr_t)(cp + (align - ioff)); 854 ph->ph_colored = cp; 855 856 /* 857 * Insert remaining chunks on the bucket list. 858 */ 859 n = pp->pr_itemsperpage; 860 pp->pr_nitems += n; 861 862 while (n--) { 863 pi = (struct pool_item *)cp; 864 865 KASSERT(((((vaddr_t)pi) + ioff) & (align - 1)) == 0); 866 867 /* Insert on page list */ 868 TAILQ_INSERT_TAIL(&ph->ph_itemlist, pi, pi_list); 869 870 #ifdef DIAGNOSTIC 871 pi->pi_magic = PI_MAGIC; 872 #ifdef POOL_DEBUG 873 for (ip = (int *)pi, i = sizeof(*pi)/sizeof(int); 874 i < pp->pr_size / sizeof(int); i++) 875 ip[i] = PI_MAGIC; 876 #endif /* POOL_DEBUG */ 877 #endif /* DIAGNOSTIC */ 878 cp = (caddr_t)(cp + pp->pr_size); 879 } 880 881 /* 882 * If the pool was depleted, point at the new page. 883 */ 884 if (pp->pr_curpage == NULL) 885 pp->pr_curpage = ph; 886 887 if (++pp->pr_npages > pp->pr_hiwat) 888 pp->pr_hiwat = pp->pr_npages; 889 } 890 891 /* 892 * Used by pool_get() when nitems drops below the low water mark. This 893 * is used to catch up pr_nitems with the low water mark. 894 * 895 * Note we never wait for memory here, we let the caller decide what to do. 896 */ 897 int 898 pool_catchup(struct pool *pp) 899 { 900 struct pool_item_header *ph; 901 caddr_t cp; 902 int error = 0; 903 int slowdown; 904 905 while (POOL_NEEDS_CATCHUP(pp)) { 906 /* 907 * Call the page back-end allocator for more memory. 908 */ 909 cp = pool_allocator_alloc(pp, PR_NOWAIT, &slowdown); 910 if (__predict_true(cp != NULL)) 911 ph = pool_alloc_item_header(pp, cp, PR_NOWAIT); 912 if (__predict_false(cp == NULL || ph == NULL)) { 913 if (cp != NULL) 914 pool_allocator_free(pp, cp); 915 error = ENOMEM; 916 break; 917 } 918 pool_prime_page(pp, cp, ph); 919 pp->pr_npagealloc++; 920 } 921 922 return (error); 923 } 924 925 void 926 pool_update_curpage(struct pool *pp) 927 { 928 929 pp->pr_curpage = LIST_FIRST(&pp->pr_partpages); 930 if (pp->pr_curpage == NULL) { 931 pp->pr_curpage = LIST_FIRST(&pp->pr_emptypages); 932 } 933 } 934 935 void 936 pool_setlowat(struct pool *pp, int n) 937 { 938 939 pp->pr_minitems = n; 940 pp->pr_minpages = (n == 0) 941 ? 0 942 : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 943 944 mtx_enter(&pp->pr_mtx); 945 /* Make sure we're caught up with the newly-set low water mark. */ 946 if (POOL_NEEDS_CATCHUP(pp) && pool_catchup(pp) != 0) { 947 /* 948 * XXX: Should we log a warning? Should we set up a timeout 949 * to try again in a second or so? The latter could break 950 * a caller's assumptions about interrupt protection, etc. 951 */ 952 } 953 mtx_leave(&pp->pr_mtx); 954 } 955 956 void 957 pool_sethiwat(struct pool *pp, int n) 958 { 959 960 pp->pr_maxpages = (n == 0) 961 ? 0 962 : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 963 } 964 965 int 966 pool_sethardlimit(struct pool *pp, u_int n, const char *warnmsg, int ratecap) 967 { 968 int error = 0; 969 970 if (n < pp->pr_nout) { 971 error = EINVAL; 972 goto done; 973 } 974 975 pp->pr_hardlimit = n; 976 pp->pr_hardlimit_warning = warnmsg; 977 pp->pr_hardlimit_ratecap.tv_sec = ratecap; 978 pp->pr_hardlimit_warning_last.tv_sec = 0; 979 pp->pr_hardlimit_warning_last.tv_usec = 0; 980 981 /* 982 * In-line version of pool_sethiwat(). 983 */ 984 pp->pr_maxpages = (n == 0 || n == UINT_MAX) 985 ? n 986 : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 987 988 done: 989 return (error); 990 } 991 992 void 993 pool_set_ctordtor(struct pool *pp, int (*ctor)(void *, void *, int), 994 void (*dtor)(void *, void *), void *arg) 995 { 996 pp->pr_ctor = ctor; 997 pp->pr_dtor = dtor; 998 pp->pr_arg = arg; 999 } 1000 /* 1001 * Release all complete pages that have not been used recently. 1002 * 1003 * Returns non-zero if any pages have been reclaimed. 1004 */ 1005 int 1006 pool_reclaim(struct pool *pp) 1007 { 1008 struct pool_item_header *ph, *phnext; 1009 struct pool_pagelist pq; 1010 1011 LIST_INIT(&pq); 1012 1013 mtx_enter(&pp->pr_mtx); 1014 for (ph = LIST_FIRST(&pp->pr_emptypages); ph != NULL; ph = phnext) { 1015 phnext = LIST_NEXT(ph, ph_pagelist); 1016 1017 /* Check our minimum page claim */ 1018 if (pp->pr_npages <= pp->pr_minpages) 1019 break; 1020 1021 KASSERT(ph->ph_nmissing == 0); 1022 1023 /* 1024 * If freeing this page would put us below 1025 * the low water mark, stop now. 1026 */ 1027 if ((pp->pr_nitems - pp->pr_itemsperpage) < 1028 pp->pr_minitems) 1029 break; 1030 1031 pr_rmpage(pp, ph, &pq); 1032 } 1033 mtx_leave(&pp->pr_mtx); 1034 1035 if (LIST_EMPTY(&pq)) 1036 return (0); 1037 while ((ph = LIST_FIRST(&pq)) != NULL) { 1038 LIST_REMOVE(ph, ph_pagelist); 1039 pool_allocator_free(pp, ph->ph_page); 1040 if (pp->pr_roflags & PR_PHINPAGE) 1041 continue; 1042 pool_put(&phpool, ph); 1043 } 1044 1045 return (1); 1046 } 1047 1048 #ifdef DDB 1049 #include <machine/db_machdep.h> 1050 #include <ddb/db_interface.h> 1051 #include <ddb/db_output.h> 1052 1053 /* 1054 * Diagnostic helpers. 1055 */ 1056 void 1057 pool_printit(struct pool *pp, const char *modif, int (*pr)(const char *, ...)) 1058 { 1059 pool_print1(pp, modif, pr); 1060 } 1061 1062 void 1063 pool_print_pagelist(struct pool_pagelist *pl, int (*pr)(const char *, ...)) 1064 { 1065 struct pool_item_header *ph; 1066 #ifdef DIAGNOSTIC 1067 struct pool_item *pi; 1068 #endif 1069 1070 LIST_FOREACH(ph, pl, ph_pagelist) { 1071 (*pr)("\t\tpage %p, nmissing %d\n", 1072 ph->ph_page, ph->ph_nmissing); 1073 #ifdef DIAGNOSTIC 1074 TAILQ_FOREACH(pi, &ph->ph_itemlist, pi_list) { 1075 if (pi->pi_magic != PI_MAGIC) { 1076 (*pr)("\t\t\titem %p, magic 0x%x\n", 1077 pi, pi->pi_magic); 1078 } 1079 } 1080 #endif 1081 } 1082 } 1083 1084 void 1085 pool_print1(struct pool *pp, const char *modif, int (*pr)(const char *, ...)) 1086 { 1087 struct pool_item_header *ph; 1088 int print_pagelist = 0; 1089 char c; 1090 1091 while ((c = *modif++) != '\0') { 1092 if (c == 'p') 1093 print_pagelist = 1; 1094 modif++; 1095 } 1096 1097 (*pr)("POOL %s: size %u, align %u, ioff %u, roflags 0x%08x\n", 1098 pp->pr_wchan, pp->pr_size, pp->pr_align, pp->pr_itemoffset, 1099 pp->pr_roflags); 1100 (*pr)("\talloc %p\n", pp->pr_alloc); 1101 (*pr)("\tminitems %u, minpages %u, maxpages %u, npages %u\n", 1102 pp->pr_minitems, pp->pr_minpages, pp->pr_maxpages, pp->pr_npages); 1103 (*pr)("\titemsperpage %u, nitems %u, nout %u, hardlimit %u\n", 1104 pp->pr_itemsperpage, pp->pr_nitems, pp->pr_nout, pp->pr_hardlimit); 1105 1106 (*pr)("\n\tnget %lu, nfail %lu, nput %lu\n", 1107 pp->pr_nget, pp->pr_nfail, pp->pr_nput); 1108 (*pr)("\tnpagealloc %lu, npagefree %lu, hiwat %u, nidle %lu\n", 1109 pp->pr_npagealloc, pp->pr_npagefree, pp->pr_hiwat, pp->pr_nidle); 1110 1111 if (print_pagelist == 0) 1112 return; 1113 1114 if ((ph = LIST_FIRST(&pp->pr_emptypages)) != NULL) 1115 (*pr)("\n\tempty page list:\n"); 1116 pool_print_pagelist(&pp->pr_emptypages, pr); 1117 if ((ph = LIST_FIRST(&pp->pr_fullpages)) != NULL) 1118 (*pr)("\n\tfull page list:\n"); 1119 pool_print_pagelist(&pp->pr_fullpages, pr); 1120 if ((ph = LIST_FIRST(&pp->pr_partpages)) != NULL) 1121 (*pr)("\n\tpartial-page list:\n"); 1122 pool_print_pagelist(&pp->pr_partpages, pr); 1123 1124 if (pp->pr_curpage == NULL) 1125 (*pr)("\tno current page\n"); 1126 else 1127 (*pr)("\tcurpage %p\n", pp->pr_curpage->ph_page); 1128 } 1129 1130 void 1131 db_show_all_pools(db_expr_t expr, int haddr, db_expr_t count, char *modif) 1132 { 1133 struct pool *pp; 1134 char maxp[16]; 1135 int ovflw; 1136 char mode; 1137 1138 mode = modif[0]; 1139 if (mode != '\0' && mode != 'a') { 1140 db_printf("usage: show all pools [/a]\n"); 1141 return; 1142 } 1143 1144 if (mode == '\0') 1145 db_printf("%-10s%4s%9s%5s%9s%6s%6s%6s%6s%6s%6s%5s\n", 1146 "Name", 1147 "Size", 1148 "Requests", 1149 "Fail", 1150 "Releases", 1151 "Pgreq", 1152 "Pgrel", 1153 "Npage", 1154 "Hiwat", 1155 "Minpg", 1156 "Maxpg", 1157 "Idle"); 1158 else 1159 db_printf("%-10s %18s %18s\n", 1160 "Name", "Address", "Allocator"); 1161 1162 TAILQ_FOREACH(pp, &pool_head, pr_poollist) { 1163 if (mode == 'a') { 1164 db_printf("%-10s %18p %18p\n", pp->pr_wchan, pp, 1165 pp->pr_alloc); 1166 continue; 1167 } 1168 1169 if (!pp->pr_nget) 1170 continue; 1171 1172 if (pp->pr_maxpages == UINT_MAX) 1173 snprintf(maxp, sizeof maxp, "inf"); 1174 else 1175 snprintf(maxp, sizeof maxp, "%u", pp->pr_maxpages); 1176 1177 #define PRWORD(ovflw, fmt, width, fixed, val) do { \ 1178 (ovflw) += db_printf((fmt), \ 1179 (width) - (fixed) - (ovflw) > 0 ? \ 1180 (width) - (fixed) - (ovflw) : 0, \ 1181 (val)) - (width); \ 1182 if ((ovflw) < 0) \ 1183 (ovflw) = 0; \ 1184 } while (/* CONSTCOND */0) 1185 1186 ovflw = 0; 1187 PRWORD(ovflw, "%-*s", 10, 0, pp->pr_wchan); 1188 PRWORD(ovflw, " %*u", 4, 1, pp->pr_size); 1189 PRWORD(ovflw, " %*lu", 9, 1, pp->pr_nget); 1190 PRWORD(ovflw, " %*lu", 5, 1, pp->pr_nfail); 1191 PRWORD(ovflw, " %*lu", 9, 1, pp->pr_nput); 1192 PRWORD(ovflw, " %*lu", 6, 1, pp->pr_npagealloc); 1193 PRWORD(ovflw, " %*lu", 6, 1, pp->pr_npagefree); 1194 PRWORD(ovflw, " %*d", 6, 1, pp->pr_npages); 1195 PRWORD(ovflw, " %*d", 6, 1, pp->pr_hiwat); 1196 PRWORD(ovflw, " %*d", 6, 1, pp->pr_minpages); 1197 PRWORD(ovflw, " %*s", 6, 1, maxp); 1198 PRWORD(ovflw, " %*lu\n", 5, 1, pp->pr_nidle); 1199 1200 pool_chk(pp, pp->pr_wchan); 1201 } 1202 } 1203 1204 int 1205 pool_chk_page(struct pool *pp, const char *label, struct pool_item_header *ph) 1206 { 1207 struct pool_item *pi; 1208 caddr_t page; 1209 int n; 1210 #ifdef POOL_DEBUG 1211 int i, *ip; 1212 #endif 1213 1214 page = (caddr_t)((u_long)ph & pp->pr_alloc->pa_pagemask); 1215 if (page != ph->ph_page && 1216 (pp->pr_roflags & PR_PHINPAGE) != 0) { 1217 if (label != NULL) 1218 printf("%s: ", label); 1219 printf("pool(%p:%s): page inconsistency: page %p; " 1220 "at page head addr %p (p %p)\n", 1221 pp, pp->pr_wchan, ph->ph_page, ph, page); 1222 return 1; 1223 } 1224 1225 for (pi = TAILQ_FIRST(&ph->ph_itemlist), n = 0; 1226 pi != NULL; 1227 pi = TAILQ_NEXT(pi,pi_list), n++) { 1228 1229 #ifdef DIAGNOSTIC 1230 if (pi->pi_magic != PI_MAGIC) { 1231 if (label != NULL) 1232 printf("%s: ", label); 1233 printf("pool(%s): free list modified: " 1234 "page %p; item ordinal %d; addr %p " 1235 "(p %p); offset 0x%x=0x%x\n", 1236 pp->pr_wchan, ph->ph_page, n, pi, page, 1237 0, pi->pi_magic); 1238 } 1239 #ifdef POOL_DEBUG 1240 for (ip = (int *)pi, i = sizeof(*pi) / sizeof(int); 1241 i < pp->pr_size / sizeof(int); i++) { 1242 if (ip[i] != PI_MAGIC) { 1243 printf("pool(%s): free list modified: " 1244 "page %p; item ordinal %d; addr %p " 1245 "(p %p); offset 0x%x=0x%x\n", 1246 pp->pr_wchan, ph->ph_page, n, pi, 1247 page, i * sizeof(int), ip[i]); 1248 } 1249 } 1250 1251 #endif /* POOL_DEBUG */ 1252 #endif /* DIAGNOSTIC */ 1253 page = 1254 (caddr_t)((u_long)pi & pp->pr_alloc->pa_pagemask); 1255 if (page == ph->ph_page) 1256 continue; 1257 1258 if (label != NULL) 1259 printf("%s: ", label); 1260 printf("pool(%p:%s): page inconsistency: page %p;" 1261 " item ordinal %d; addr %p (p %p)\n", pp, 1262 pp->pr_wchan, ph->ph_page, n, pi, page); 1263 return 1; 1264 } 1265 return 0; 1266 } 1267 1268 int 1269 pool_chk(struct pool *pp, const char *label) 1270 { 1271 struct pool_item_header *ph; 1272 int r = 0; 1273 1274 LIST_FOREACH(ph, &pp->pr_emptypages, ph_pagelist) 1275 r += pool_chk_page(pp, label, ph); 1276 LIST_FOREACH(ph, &pp->pr_fullpages, ph_pagelist) 1277 r += pool_chk_page(pp, label, ph); 1278 LIST_FOREACH(ph, &pp->pr_partpages, ph_pagelist) 1279 r += pool_chk_page(pp, label, ph); 1280 1281 return (r); 1282 } 1283 1284 void 1285 pool_walk(struct pool *pp, void (*func)(void *)) 1286 { 1287 struct pool_item_header *ph; 1288 struct pool_item *pi; 1289 caddr_t cp; 1290 int n; 1291 1292 LIST_FOREACH(ph, &pp->pr_fullpages, ph_pagelist) { 1293 cp = ph->ph_colored; 1294 n = ph->ph_nmissing; 1295 1296 while (n--) { 1297 func(cp); 1298 cp += pp->pr_size; 1299 } 1300 } 1301 1302 LIST_FOREACH(ph, &pp->pr_partpages, ph_pagelist) { 1303 cp = ph->ph_colored; 1304 n = ph->ph_nmissing; 1305 1306 do { 1307 TAILQ_FOREACH(pi, &ph->ph_itemlist, pi_list) { 1308 if (cp == (caddr_t)pi) 1309 break; 1310 } 1311 if (cp != (caddr_t)pi) { 1312 func(cp); 1313 n--; 1314 } 1315 1316 cp += pp->pr_size; 1317 } while (n > 0); 1318 } 1319 } 1320 #endif 1321 1322 /* 1323 * We have three different sysctls. 1324 * kern.pool.npools - the number of pools. 1325 * kern.pool.pool.<pool#> - the pool struct for the pool#. 1326 * kern.pool.name.<pool#> - the name for pool#. 1327 */ 1328 int 1329 sysctl_dopool(int *name, u_int namelen, char *where, size_t *sizep) 1330 { 1331 struct pool *pp, *foundpool = NULL; 1332 size_t buflen = where != NULL ? *sizep : 0; 1333 int npools = 0, s; 1334 unsigned int lookfor; 1335 size_t len; 1336 1337 switch (*name) { 1338 case KERN_POOL_NPOOLS: 1339 if (namelen != 1 || buflen != sizeof(int)) 1340 return (EINVAL); 1341 lookfor = 0; 1342 break; 1343 case KERN_POOL_NAME: 1344 if (namelen != 2 || buflen < 1) 1345 return (EINVAL); 1346 lookfor = name[1]; 1347 break; 1348 case KERN_POOL_POOL: 1349 if (namelen != 2 || buflen != sizeof(struct pool)) 1350 return (EINVAL); 1351 lookfor = name[1]; 1352 break; 1353 default: 1354 return (EINVAL); 1355 } 1356 1357 s = splvm(); 1358 1359 TAILQ_FOREACH(pp, &pool_head, pr_poollist) { 1360 npools++; 1361 if (lookfor == pp->pr_serial) { 1362 foundpool = pp; 1363 break; 1364 } 1365 } 1366 1367 splx(s); 1368 1369 if (*name != KERN_POOL_NPOOLS && foundpool == NULL) 1370 return (ENOENT); 1371 1372 switch (*name) { 1373 case KERN_POOL_NPOOLS: 1374 return copyout(&npools, where, buflen); 1375 case KERN_POOL_NAME: 1376 len = strlen(foundpool->pr_wchan) + 1; 1377 if (*sizep < len) 1378 return (ENOMEM); 1379 *sizep = len; 1380 return copyout(foundpool->pr_wchan, where, len); 1381 case KERN_POOL_POOL: 1382 return copyout(foundpool, where, buflen); 1383 } 1384 /* NOTREACHED */ 1385 return (0); /* XXX - Stupid gcc */ 1386 } 1387 1388 /* 1389 * Pool backend allocators. 1390 * 1391 * Each pool has a backend allocator that handles allocation, deallocation 1392 */ 1393 void *pool_page_alloc(struct pool *, int, int *); 1394 void pool_page_free(struct pool *, void *); 1395 1396 /* 1397 * safe for interrupts, name preserved for compat this is the default 1398 * allocator 1399 */ 1400 struct pool_allocator pool_allocator_nointr = { 1401 pool_page_alloc, pool_page_free, 0, 1402 }; 1403 1404 /* 1405 * XXX - we have at least three different resources for the same allocation 1406 * and each resource can be depleted. First we have the ready elements in 1407 * the pool. Then we have the resource (typically a vm_map) for this 1408 * allocator, then we have physical memory. Waiting for any of these can 1409 * be unnecessary when any other is freed, but the kernel doesn't support 1410 * sleeping on multiple addresses, so we have to fake. The caller sleeps on 1411 * the pool (so that we can be awakened when an item is returned to the pool), 1412 * but we set PA_WANT on the allocator. When a page is returned to 1413 * the allocator and PA_WANT is set pool_allocator_free will wakeup all 1414 * sleeping pools belonging to this allocator. (XXX - thundering herd). 1415 * We also wake up the allocator in case someone without a pool (malloc) 1416 * is sleeping waiting for this allocator. 1417 */ 1418 1419 void * 1420 pool_allocator_alloc(struct pool *pp, int flags, int *slowdown) 1421 { 1422 boolean_t waitok = (flags & PR_WAITOK) ? TRUE : FALSE; 1423 void *v; 1424 1425 if (waitok) 1426 mtx_leave(&pp->pr_mtx); 1427 v = pp->pr_alloc->pa_alloc(pp, flags, slowdown); 1428 if (waitok) 1429 mtx_enter(&pp->pr_mtx); 1430 1431 return (v); 1432 } 1433 1434 void 1435 pool_allocator_free(struct pool *pp, void *v) 1436 { 1437 struct pool_allocator *pa = pp->pr_alloc; 1438 1439 (*pa->pa_free)(pp, v); 1440 } 1441 1442 void * 1443 pool_page_alloc(struct pool *pp, int flags, int *slowdown) 1444 { 1445 boolean_t waitok = (flags & PR_WAITOK) ? TRUE : FALSE; 1446 1447 return (uvm_km_getpage(waitok, slowdown)); 1448 } 1449 1450 void 1451 pool_page_free(struct pool *pp, void *v) 1452 { 1453 1454 uvm_km_putpage(v); 1455 } 1456 1457 void * 1458 pool_large_alloc(struct pool *pp, int flags, int *slowdown) 1459 { 1460 int kfl = (flags & PR_WAITOK) ? 0 : UVM_KMF_NOWAIT; 1461 vaddr_t va; 1462 int s; 1463 1464 s = splvm(); 1465 va = uvm_km_kmemalloc(kmem_map, NULL, pp->pr_alloc->pa_pagesz, kfl); 1466 splx(s); 1467 1468 return ((void *)va); 1469 } 1470 1471 void 1472 pool_large_free(struct pool *pp, void *v) 1473 { 1474 int s; 1475 1476 s = splvm(); 1477 uvm_km_free(kmem_map, (vaddr_t)v, pp->pr_alloc->pa_pagesz); 1478 splx(s); 1479 } 1480 1481 void * 1482 pool_large_alloc_ni(struct pool *pp, int flags, int *slowdown) 1483 { 1484 int kfl = (flags & PR_WAITOK) ? 0 : UVM_KMF_NOWAIT; 1485 1486 return ((void *)uvm_km_kmemalloc(kernel_map, uvm.kernel_object, 1487 pp->pr_alloc->pa_pagesz, kfl)); 1488 } 1489 1490 void 1491 pool_large_free_ni(struct pool *pp, void *v) 1492 { 1493 uvm_km_free(kernel_map, (vaddr_t)v, pp->pr_alloc->pa_pagesz); 1494 } 1495