1 /* 2 * Copyright (c) 2005 Jeffrey M. Hsu. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Jeffrey M. Hsu. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. Neither the name of The DragonFly Project nor the names of its 16 * contributors may be used to endorse or promote products derived 17 * from this software without specific, prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 22 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 23 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 24 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 25 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 26 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 27 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 29 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 */ 32 33 #include <sys/param.h> 34 #include <sys/kernel.h> 35 #include <sys/systm.h> 36 #include <sys/callout.h> 37 #include <sys/globaldata.h> 38 #include <sys/malloc.h> 39 #include <sys/queue.h> 40 #include <sys/objcache.h> 41 #include <sys/spinlock.h> 42 #include <sys/thread.h> 43 #include <sys/thread2.h> 44 #include <sys/spinlock2.h> 45 46 static MALLOC_DEFINE(M_OBJCACHE, "objcache", "Object Cache"); 47 static MALLOC_DEFINE(M_OBJMAG, "objcache mag", "Object Cache Magazine"); 48 49 #define INITIAL_MAG_CAPACITY 64 50 51 struct magazine { 52 int rounds; 53 int capacity; 54 SLIST_ENTRY(magazine) nextmagazine; 55 void *objects[]; 56 }; 57 58 SLIST_HEAD(magazinelist, magazine); 59 60 #define MAGAZINE_HDRSIZE __offsetof(struct magazine, objects[0]) 61 #define MAGAZINE_CAPACITY_MAX 4096 62 #define MAGAZINE_CAPACITY_MIN 4 63 64 /* 65 * per-cluster cache of magazines 66 * 67 * All fields in this structure are protected by the spinlock. 68 */ 69 struct magazinedepot { 70 /* 71 * The per-cpu object caches only exchanges completely full or 72 * completely empty magazines with the depot layer, so only have 73 * to cache these two types of magazines. 74 */ 75 struct magazinelist fullmagazines; 76 struct magazinelist emptymagazines; 77 int magcapacity; 78 79 /* protect this structure */ 80 struct spinlock spin; 81 82 /* magazines not yet allocated towards limit */ 83 int unallocated_objects; 84 int cluster_limit; /* ref for adjustments */ 85 86 /* infrequently used fields */ 87 int waiting; /* waiting for another cpu to 88 * return a full magazine to 89 * the depot */ 90 int contested; /* depot contention count */ 91 } __cachealign; 92 93 /* 94 * per-cpu object cache 95 * All fields in this structure are protected by crit_enter(). 96 */ 97 struct percpu_objcache { 98 struct magazine *loaded_magazine; /* active magazine */ 99 struct magazine *previous_magazine; /* backup magazine */ 100 101 /* statistics */ 102 u_long gets_cumulative; /* total calls to get */ 103 u_long gets_null; /* objcache_get returned NULL */ 104 u_long allocs_cumulative; /* total calls to alloc */ 105 u_long puts_cumulative; /* total calls to put */ 106 u_long gets_exhausted; /* # of gets hit exhaustion */ 107 #ifdef notyet 108 u_long puts_othercluster; /* returned to other cluster */ 109 #endif 110 111 /* infrequently used fields */ 112 int waiting; /* waiting for a thread on this 113 * cpu to return an obj to the 114 * per-cpu cache */ 115 } __cachealign; 116 117 /* only until we have NUMA cluster topology information XXX */ 118 #define MAXCLUSTERS 1 119 #define myclusterid 0 120 #define CLUSTER_OF(obj) 0 121 122 /* 123 * Rarely accessed but useful bits of objcache. 124 */ 125 struct objcache_desc { 126 LIST_ENTRY(objcache_desc) next; 127 struct objcache *objcache; 128 int total_objects; 129 #define OBJCACHE_NAMELEN 36 130 char name[OBJCACHE_NAMELEN]; 131 }; 132 133 /* 134 * Two-level object cache consisting of NUMA cluster-level depots of 135 * fully loaded or completely empty magazines and cpu-level caches of 136 * individual objects. 137 */ 138 struct objcache { 139 /* object constructor and destructor from blank storage */ 140 objcache_ctor_fn *ctor; 141 objcache_dtor_fn *dtor; 142 void *privdata; 143 144 /* interface to underlying allocator */ 145 objcache_alloc_fn *alloc; 146 objcache_free_fn *free; 147 void *allocator_args; 148 149 struct objcache_desc *desc; 150 151 /* NUMA-cluster level caches */ 152 struct magazinedepot depot[MAXCLUSTERS]; 153 154 struct percpu_objcache cache_percpu[]; /* per-cpu caches */ 155 }; 156 157 static struct spinlock objcachelist_spin; 158 static LIST_HEAD(objcachelist, objcache_desc) allobjcaches; 159 static int magazine_capmin; 160 static int magazine_capmax; 161 162 static struct magazine * 163 mag_alloc(int capacity) 164 { 165 struct magazine *mag; 166 int size; 167 168 size = __offsetof(struct magazine, objects[capacity]); 169 KASSERT(size > 0 && (size & __VM_CACHELINE_MASK) == 0, 170 ("magazine size is not multiple cache line size")); 171 172 mag = kmalloc_cachealign(size, M_OBJMAG, M_INTWAIT | M_ZERO); 173 mag->capacity = capacity; 174 mag->rounds = 0; 175 return (mag); 176 } 177 178 static int 179 mag_capacity_align(int mag_capacity) 180 { 181 int mag_size; 182 183 mag_size = __VM_CACHELINE_ALIGN( 184 __offsetof(struct magazine, objects[mag_capacity])); 185 mag_capacity = (mag_size - MAGAZINE_HDRSIZE) / sizeof(void *); 186 187 return mag_capacity; 188 } 189 190 /* 191 * Utility routine for objects that don't require any de-construction. 192 */ 193 194 static void 195 null_dtor(void *obj, void *privdata) 196 { 197 /* do nothing */ 198 } 199 200 static boolean_t 201 null_ctor(void *obj, void *privdata, int ocflags) 202 { 203 return TRUE; 204 } 205 206 /* 207 * Create an object cache. 208 */ 209 struct objcache * 210 objcache_create(const char *name, int cluster_limit, int nom_cache, 211 objcache_ctor_fn *ctor, objcache_dtor_fn *dtor, void *privdata, 212 objcache_alloc_fn *alloc, objcache_free_fn *free, 213 void *allocator_args) 214 { 215 struct objcache_desc *desc; 216 struct objcache *oc; 217 struct magazinedepot *depot; 218 int cpuid; 219 int nmagdepot; 220 int mag_capacity; 221 int i; 222 223 /* 224 * Allocate objcache descriptor. 225 */ 226 desc = kmalloc(sizeof(*desc), M_OBJCACHE, M_WAITOK | M_ZERO); 227 228 /* 229 * Allocate object cache structure 230 */ 231 oc = kmalloc_cachealign( 232 __offsetof(struct objcache, cache_percpu[ncpus]), 233 M_OBJCACHE, M_WAITOK | M_ZERO); 234 oc->ctor = ctor ? ctor : null_ctor; 235 oc->dtor = dtor ? dtor : null_dtor; 236 oc->privdata = privdata; 237 oc->alloc = alloc; 238 oc->free = free; 239 oc->allocator_args = allocator_args; 240 241 /* 242 * Link objcache and its descriptor. 243 */ 244 oc->desc = desc; 245 desc->objcache = oc; 246 strlcpy(desc->name, name, sizeof(desc->name)); 247 248 /* 249 * Initialize depot list(s). 250 */ 251 depot = &oc->depot[0]; 252 253 spin_init(&depot->spin, "objcachedepot"); 254 SLIST_INIT(&depot->fullmagazines); 255 SLIST_INIT(&depot->emptymagazines); 256 257 /* 258 * Figure out the nominal number of free objects to cache and 259 * the magazine capacity. By default we want to cache up to 260 * half the cluster_limit. If there is no cluster_limit then 261 * we want to cache up to 128 objects. 262 */ 263 if (nom_cache == 0) 264 nom_cache = cluster_limit / 2; 265 if (cluster_limit && nom_cache > cluster_limit) 266 nom_cache = cluster_limit; 267 if (nom_cache == 0) 268 nom_cache = INITIAL_MAG_CAPACITY * 2; 269 270 /* 271 * Magazine capacity for 2 active magazines per cpu plus 2 272 * magazines in the depot. 273 */ 274 mag_capacity = mag_capacity_align(nom_cache / (ncpus + 1) / 2 + 1); 275 if (mag_capacity > magazine_capmax) 276 mag_capacity = magazine_capmax; 277 else if (mag_capacity < magazine_capmin) 278 mag_capacity = magazine_capmin; 279 depot->magcapacity = mag_capacity; 280 281 /* 282 * The cluster_limit must be sufficient to have two magazines per 283 * cpu plus at least two magazines in the depot. However, because 284 * partial magazines can stay on the cpus what we really need here 285 * is to specify the number of extra magazines we allocate for the 286 * depot. 287 * 288 * Use ~1B objects to mean 'unlimited'. A negative unallocated 289 * object count is possible due to dynamic adjustments so we can't 290 * use a negative number to mean 'unlimited'. We need some overflow 291 * capacity too due to the preallocated mags. 292 */ 293 if (cluster_limit == 0) { 294 depot->unallocated_objects = 0x40000000; 295 } else { 296 depot->unallocated_objects = ncpus * mag_capacity * 2 + 297 cluster_limit; 298 } 299 300 /* Save # of total objects. */ 301 desc->total_objects = depot->unallocated_objects; 302 303 /* 304 * This is a dynamic adjustment aid initialized to the callers 305 * expectations of the current limit. 306 */ 307 depot->cluster_limit = cluster_limit; 308 309 /* 310 * Initialize per-cpu caches 311 */ 312 for (cpuid = 0; cpuid < ncpus; cpuid++) { 313 struct percpu_objcache *cache_percpu = &oc->cache_percpu[cpuid]; 314 315 cache_percpu->loaded_magazine = mag_alloc(mag_capacity); 316 cache_percpu->previous_magazine = mag_alloc(mag_capacity); 317 } 318 319 /* 320 * Compute how many empty magazines to place in the depot. This 321 * determines the retained cache size and is based on nom_cache. 322 * 323 * The actual cache size is larger because there are two magazines 324 * for each cpu as well but those can be in any fill state so we 325 * just can't count them. 326 * 327 * There is a minimum of two magazines in the depot. 328 */ 329 nmagdepot = nom_cache / mag_capacity + 1; 330 if (nmagdepot < 2) 331 nmagdepot = 2; 332 333 /* 334 * Put empty magazines in depot 335 */ 336 for (i = 0; i < nmagdepot; i++) { 337 struct magazine *mag = mag_alloc(mag_capacity); 338 SLIST_INSERT_HEAD(&depot->emptymagazines, mag, nextmagazine); 339 } 340 341 spin_lock(&objcachelist_spin); 342 LIST_INSERT_HEAD(&allobjcaches, desc, next); 343 spin_unlock(&objcachelist_spin); 344 345 return (oc); 346 } 347 348 /* 349 * Adjust the cluster limit. This is allowed to cause unallocated_objects 350 * to go negative. Note that due to the magazine hysteresis there is a 351 * limit to how much of the objcache can be reclaimed using this API to 352 * reduce its size. 353 */ 354 void 355 objcache_set_cluster_limit(struct objcache *oc, int cluster_limit) 356 { 357 struct magazinedepot *depot; 358 359 depot = &oc->depot[myclusterid]; 360 if (depot->cluster_limit != cluster_limit) { 361 int delta; 362 363 spin_lock(&depot->spin); 364 delta = cluster_limit - depot->cluster_limit; 365 depot->unallocated_objects += delta; 366 depot->cluster_limit = cluster_limit; 367 spin_unlock(&depot->spin); 368 wakeup(depot); 369 370 oc->desc->total_objects += delta; 371 } 372 } 373 374 struct objcache * 375 objcache_create_simple(malloc_type_t mtype, size_t objsize) 376 { 377 struct objcache_malloc_args *margs; 378 struct objcache *oc; 379 380 margs = kmalloc(sizeof(*margs), M_OBJCACHE, M_WAITOK|M_ZERO); 381 margs->objsize = objsize; 382 margs->mtype = mtype; 383 oc = objcache_create(mtype->ks_shortdesc, 0, 0, 384 NULL, NULL, NULL, 385 objcache_malloc_alloc, objcache_malloc_free, 386 margs); 387 return (oc); 388 } 389 390 struct objcache * 391 objcache_create_mbacked(malloc_type_t mtype, size_t objsize, 392 int cluster_limit, int nom_cache, 393 objcache_ctor_fn *ctor, objcache_dtor_fn *dtor, 394 void *privdata) 395 { 396 struct objcache_malloc_args *margs; 397 struct objcache *oc; 398 399 margs = kmalloc(sizeof(*margs), M_OBJCACHE, M_WAITOK|M_ZERO); 400 margs->objsize = objsize; 401 margs->mtype = mtype; 402 oc = objcache_create(mtype->ks_shortdesc, 403 cluster_limit, nom_cache, 404 ctor, dtor, privdata, 405 objcache_malloc_alloc, objcache_malloc_free, 406 margs); 407 return(oc); 408 } 409 410 411 #define MAGAZINE_EMPTY(mag) (mag->rounds == 0) 412 #define MAGAZINE_NOTEMPTY(mag) (mag->rounds != 0) 413 #define MAGAZINE_FULL(mag) (mag->rounds == mag->capacity) 414 415 #define swap(x, y) ({ struct magazine *t = x; x = y; y = t; }) 416 417 /* 418 * Get an object from the object cache. 419 * 420 * WARNING! ocflags are only used when we have to go to the underlying 421 * allocator, so we cannot depend on flags such as M_ZERO. 422 */ 423 void * 424 objcache_get(struct objcache *oc, int ocflags) 425 { 426 struct percpu_objcache *cpucache = &oc->cache_percpu[mycpuid]; 427 struct magazine *loadedmag; 428 struct magazine *emptymag; 429 void *obj; 430 struct magazinedepot *depot; 431 432 KKASSERT((ocflags & M_ZERO) == 0); 433 crit_enter(); 434 ++cpucache->gets_cumulative; 435 436 retry: 437 /* 438 * Loaded magazine has an object. This is the hot path. 439 * It is lock-free and uses a critical section to block 440 * out interrupt handlers on the same processor. 441 */ 442 loadedmag = cpucache->loaded_magazine; 443 if (MAGAZINE_NOTEMPTY(loadedmag)) { 444 obj = loadedmag->objects[--loadedmag->rounds]; 445 crit_exit(); 446 return (obj); 447 } 448 449 /* Previous magazine has an object. */ 450 if (MAGAZINE_NOTEMPTY(cpucache->previous_magazine)) { 451 swap(cpucache->loaded_magazine, cpucache->previous_magazine); 452 loadedmag = cpucache->loaded_magazine; 453 obj = loadedmag->objects[--loadedmag->rounds]; 454 crit_exit(); 455 return (obj); 456 } 457 458 /* 459 * Both magazines empty. Get a full magazine from the depot and 460 * move one of the empty ones to the depot. 461 * 462 * Obtain the depot spinlock. 463 * 464 * NOTE: Beyond this point, M_* flags are handled via oc->alloc() 465 */ 466 depot = &oc->depot[myclusterid]; 467 spin_lock(&depot->spin); 468 469 /* 470 * Recheck the cpucache after obtaining the depot spinlock. This 471 * shouldn't be necessary now but don't take any chances. 472 */ 473 if (MAGAZINE_NOTEMPTY(cpucache->loaded_magazine) || 474 MAGAZINE_NOTEMPTY(cpucache->previous_magazine) 475 ) { 476 spin_unlock(&depot->spin); 477 goto retry; 478 } 479 480 /* Check if depot has a full magazine. */ 481 if (!SLIST_EMPTY(&depot->fullmagazines)) { 482 emptymag = cpucache->previous_magazine; 483 cpucache->previous_magazine = cpucache->loaded_magazine; 484 cpucache->loaded_magazine = SLIST_FIRST(&depot->fullmagazines); 485 SLIST_REMOVE_HEAD(&depot->fullmagazines, nextmagazine); 486 487 /* 488 * Return emptymag to the depot. 489 */ 490 KKASSERT(MAGAZINE_EMPTY(emptymag)); 491 SLIST_INSERT_HEAD(&depot->emptymagazines, 492 emptymag, nextmagazine); 493 spin_unlock(&depot->spin); 494 goto retry; 495 } 496 497 /* 498 * The depot does not have any non-empty magazines. If we have 499 * not hit our object limit we can allocate a new object using 500 * the back-end allocator. 501 * 502 * NOTE: unallocated_objects can wind up being negative due to 503 * objcache_set_cluster_limit() calls. 504 */ 505 if (__predict_true(depot->unallocated_objects > 0)) { 506 --depot->unallocated_objects; 507 spin_unlock(&depot->spin); 508 ++cpucache->allocs_cumulative; 509 crit_exit(); 510 511 obj = oc->alloc(oc->allocator_args, ocflags); 512 if (obj) { 513 if (oc->ctor(obj, oc->privdata, ocflags)) 514 return (obj); 515 oc->free(obj, oc->allocator_args); 516 obj = NULL; 517 } 518 if (obj == NULL) { 519 spin_lock(&depot->spin); 520 ++depot->unallocated_objects; 521 spin_unlock(&depot->spin); 522 if (depot->waiting) 523 wakeup(depot); 524 525 crit_enter(); 526 /* 527 * makes debugging easier when gets_cumulative does 528 * not include gets_null. 529 */ 530 ++cpucache->gets_null; 531 --cpucache->gets_cumulative; 532 crit_exit(); 533 } 534 return(obj); 535 } 536 if (__predict_false(cpucache->gets_exhausted++ == 0)) { 537 kprintf("Warning: objcache(%s) exhausted on cpu%d!\n", 538 oc->desc->name, mycpuid); 539 } 540 541 /* 542 * Otherwise block if allowed to. 543 */ 544 if ((ocflags & (M_WAITOK|M_NULLOK)) == M_WAITOK) { 545 ++cpucache->waiting; 546 ++depot->waiting; 547 ssleep(depot, &depot->spin, 0, "objcache_get", 0); 548 --cpucache->waiting; 549 --depot->waiting; 550 spin_unlock(&depot->spin); 551 goto retry; 552 } 553 554 /* 555 * Otherwise fail 556 */ 557 ++cpucache->gets_null; 558 --cpucache->gets_cumulative; 559 crit_exit(); 560 spin_unlock(&depot->spin); 561 return (NULL); 562 } 563 564 /* 565 * Wrapper for malloc allocation routines. 566 */ 567 void * 568 objcache_malloc_alloc(void *allocator_args, int ocflags) 569 { 570 struct objcache_malloc_args *alloc_args = allocator_args; 571 572 return (kmalloc(alloc_args->objsize, alloc_args->mtype, 573 ocflags & OC_MFLAGS)); 574 } 575 576 /* 577 * Wrapper for malloc allocation routines, with initial zeroing 578 * (but objects are not zerod on reuse from cache). 579 */ 580 void * 581 objcache_malloc_alloc_zero(void *allocator_args, int ocflags) 582 { 583 struct objcache_malloc_args *alloc_args = allocator_args; 584 585 return (kmalloc(alloc_args->objsize, alloc_args->mtype, 586 (ocflags & OC_MFLAGS) | M_ZERO)); 587 } 588 589 590 void 591 objcache_malloc_free(void *obj, void *allocator_args) 592 { 593 struct objcache_malloc_args *alloc_args = allocator_args; 594 595 kfree(obj, alloc_args->mtype); 596 } 597 598 /* 599 * Wrapper for allocation policies that pre-allocate at initialization time 600 * and don't do run-time allocation. 601 */ 602 void * 603 objcache_nop_alloc(void *allocator_args, int ocflags) 604 { 605 return (NULL); 606 } 607 608 void 609 objcache_nop_free(void *obj, void *allocator_args) 610 { 611 } 612 613 /* 614 * Return an object to the object cache. 615 */ 616 void 617 objcache_put(struct objcache *oc, void *obj) 618 { 619 struct percpu_objcache *cpucache = &oc->cache_percpu[mycpuid]; 620 struct magazine *loadedmag; 621 struct magazinedepot *depot; 622 623 crit_enter(); 624 ++cpucache->puts_cumulative; 625 626 if (CLUSTER_OF(obj) != myclusterid) { 627 #ifdef notyet 628 /* use lazy IPI to send object to owning cluster XXX todo */ 629 ++cpucache->puts_othercluster; 630 crit_exit(); 631 return; 632 #endif 633 } 634 635 retry: 636 /* 637 * Free slot available in loaded magazine. This is the hot path. 638 * It is lock-free and uses a critical section to block out interrupt 639 * handlers on the same processor. 640 */ 641 loadedmag = cpucache->loaded_magazine; 642 if (!MAGAZINE_FULL(loadedmag)) { 643 loadedmag->objects[loadedmag->rounds++] = obj; 644 if (cpucache->waiting) 645 wakeup_mycpu(&oc->depot[myclusterid]); 646 crit_exit(); 647 return; 648 } 649 650 /* 651 * Current magazine full, but previous magazine has room. XXX 652 */ 653 if (!MAGAZINE_FULL(cpucache->previous_magazine)) { 654 swap(cpucache->loaded_magazine, cpucache->previous_magazine); 655 loadedmag = cpucache->loaded_magazine; 656 loadedmag->objects[loadedmag->rounds++] = obj; 657 if (cpucache->waiting) 658 wakeup_mycpu(&oc->depot[myclusterid]); 659 crit_exit(); 660 return; 661 } 662 663 /* 664 * Both magazines full. Get an empty magazine from the depot and 665 * move a full loaded magazine to the depot. Even though the 666 * magazine may wind up with space available after we block on 667 * the spinlock, we still cycle it through to avoid the non-optimal 668 * corner-case. 669 * 670 * Obtain the depot spinlock. 671 */ 672 depot = &oc->depot[myclusterid]; 673 spin_lock(&depot->spin); 674 675 /* 676 * If an empty magazine is available in the depot, cycle it 677 * through and retry. 678 */ 679 if (!SLIST_EMPTY(&depot->emptymagazines)) { 680 loadedmag = cpucache->previous_magazine; 681 cpucache->previous_magazine = cpucache->loaded_magazine; 682 cpucache->loaded_magazine = SLIST_FIRST(&depot->emptymagazines); 683 SLIST_REMOVE_HEAD(&depot->emptymagazines, nextmagazine); 684 685 /* 686 * Return loadedmag to the depot. Due to blocking it may 687 * not be entirely full and could even be empty. 688 */ 689 if (MAGAZINE_EMPTY(loadedmag)) { 690 SLIST_INSERT_HEAD(&depot->emptymagazines, 691 loadedmag, nextmagazine); 692 spin_unlock(&depot->spin); 693 } else { 694 SLIST_INSERT_HEAD(&depot->fullmagazines, 695 loadedmag, nextmagazine); 696 spin_unlock(&depot->spin); 697 if (depot->waiting) 698 wakeup(depot); 699 } 700 goto retry; 701 } 702 703 /* 704 * An empty mag is not available. This is a corner case which can 705 * occur due to cpus holding partially full magazines. Do not try 706 * to allocate a mag, just free the object. 707 */ 708 ++depot->unallocated_objects; 709 spin_unlock(&depot->spin); 710 if (depot->waiting) 711 wakeup(depot); 712 crit_exit(); 713 oc->dtor(obj, oc->privdata); 714 oc->free(obj, oc->allocator_args); 715 } 716 717 /* 718 * The object is being put back into the cache, but the caller has 719 * indicated that the object is not in any shape to be reused and should 720 * be dtor'd immediately. 721 */ 722 void 723 objcache_dtor(struct objcache *oc, void *obj) 724 { 725 struct magazinedepot *depot; 726 727 depot = &oc->depot[myclusterid]; 728 spin_lock(&depot->spin); 729 ++depot->unallocated_objects; 730 spin_unlock(&depot->spin); 731 if (depot->waiting) 732 wakeup(depot); 733 oc->dtor(obj, oc->privdata); 734 oc->free(obj, oc->allocator_args); 735 } 736 737 /* 738 * Deallocate all objects in a magazine and free the magazine if requested. 739 * When freeit is TRUE the magazine must already be disassociated from the 740 * depot. 741 * 742 * Must be called with a critical section held when called with a per-cpu 743 * magazine. The magazine may be indirectly modified during the loop. 744 * 745 * If the magazine moves during a dtor the operation is aborted. This is 746 * only allowed when freeit is FALSE. 747 * 748 * The number of objects freed is returned. 749 */ 750 static int 751 mag_purge(struct objcache *oc, struct magazine **magp, int freeit) 752 { 753 struct magazine *mag = *magp; 754 int count; 755 void *obj; 756 757 count = 0; 758 while (mag->rounds) { 759 obj = mag->objects[--mag->rounds]; 760 oc->dtor(obj, oc->privdata); /* MAY BLOCK */ 761 oc->free(obj, oc->allocator_args); /* MAY BLOCK */ 762 ++count; 763 764 /* 765 * Cycle for interrupts. 766 */ 767 if ((count & 15) == 0) { 768 crit_exit(); 769 crit_enter(); 770 } 771 772 /* 773 * mag may have become invalid either due to dtor/free 774 * blocking or interrupt cycling, do not derefernce it 775 * until we check. 776 */ 777 if (*magp != mag) { 778 kprintf("mag_purge: mag ripped out\n"); 779 break; 780 } 781 } 782 if (freeit) { 783 KKASSERT(*magp == mag); 784 *magp = NULL; 785 kfree(mag, M_OBJMAG); 786 } 787 return(count); 788 } 789 790 /* 791 * Disassociate zero or more magazines from a magazine list associated with 792 * the depot, update the depot, and move the magazines to a temporary 793 * list. 794 * 795 * The caller must check the depot for waiters and wake it up, typically 796 * after disposing of the magazines this function loads onto the temporary 797 * list. 798 */ 799 static void 800 maglist_disassociate(struct magazinedepot *depot, struct magazinelist *maglist, 801 struct magazinelist *tmplist, boolean_t purgeall) 802 { 803 struct magazine *mag; 804 805 while ((mag = SLIST_FIRST(maglist)) != NULL) { 806 SLIST_REMOVE_HEAD(maglist, nextmagazine); 807 SLIST_INSERT_HEAD(tmplist, mag, nextmagazine); 808 depot->unallocated_objects += mag->rounds; 809 } 810 } 811 812 /* 813 * Deallocate all magazines and their contents from the passed temporary 814 * list. The magazines have already been accounted for by their depots. 815 * 816 * The total number of rounds freed is returned. This number is typically 817 * only used to determine whether a wakeup on the depot is needed or not. 818 */ 819 static int 820 maglist_purge(struct objcache *oc, struct magazinelist *maglist) 821 { 822 struct magazine *mag; 823 int count = 0; 824 825 /* 826 * can't use SLIST_FOREACH because blocking releases the depot 827 * spinlock 828 */ 829 crit_enter(); 830 while ((mag = SLIST_FIRST(maglist)) != NULL) { 831 SLIST_REMOVE_HEAD(maglist, nextmagazine); 832 count += mag_purge(oc, &mag, TRUE); 833 } 834 crit_exit(); 835 return(count); 836 } 837 838 /* 839 * De-allocates all magazines on the full and empty magazine lists. 840 * 841 * Because this routine is called with a spinlock held, the magazines 842 * can only be disassociated and moved to a temporary list, not freed. 843 * 844 * The caller is responsible for freeing the magazines. 845 */ 846 static void 847 depot_disassociate(struct magazinedepot *depot, struct magazinelist *tmplist) 848 { 849 maglist_disassociate(depot, &depot->fullmagazines, tmplist, TRUE); 850 maglist_disassociate(depot, &depot->emptymagazines, tmplist, TRUE); 851 } 852 853 /* 854 * Try to free up some memory. Return as soon as some free memory is found. 855 * For each object cache on the reclaim list, first try the current per-cpu 856 * cache, then the full magazine depot. 857 */ 858 boolean_t 859 objcache_reclaimlist(struct objcache *oclist[], int nlist, int ocflags) 860 { 861 struct objcache *oc; 862 struct percpu_objcache *cpucache; 863 struct magazinedepot *depot; 864 struct magazinelist tmplist; 865 int i, count; 866 867 SLIST_INIT(&tmplist); 868 869 for (i = 0; i < nlist; i++) { 870 oc = oclist[i]; 871 cpucache = &oc->cache_percpu[mycpuid]; 872 depot = &oc->depot[myclusterid]; 873 874 crit_enter(); 875 count = mag_purge(oc, &cpucache->loaded_magazine, FALSE); 876 if (count == 0) 877 count += mag_purge(oc, &cpucache->previous_magazine, FALSE); 878 crit_exit(); 879 if (count > 0) { 880 spin_lock(&depot->spin); 881 depot->unallocated_objects += count; 882 spin_unlock(&depot->spin); 883 if (depot->waiting) 884 wakeup(depot); 885 return (TRUE); 886 } 887 spin_lock(&depot->spin); 888 maglist_disassociate(depot, &depot->fullmagazines, 889 &tmplist, FALSE); 890 spin_unlock(&depot->spin); 891 count = maglist_purge(oc, &tmplist); 892 if (count > 0) { 893 if (depot->waiting) 894 wakeup(depot); 895 return (TRUE); 896 } 897 } 898 return (FALSE); 899 } 900 901 /* 902 * Destroy an object cache. Must have no existing references. 903 */ 904 void 905 objcache_destroy(struct objcache *oc) 906 { 907 struct objcache_desc *desc = oc->desc; 908 struct percpu_objcache *cache_percpu; 909 struct magazinedepot *depot; 910 int clusterid, cpuid; 911 struct magazinelist tmplist; 912 913 spin_lock(&objcachelist_spin); 914 LIST_REMOVE(desc, next); 915 spin_unlock(&objcachelist_spin); 916 917 SLIST_INIT(&tmplist); 918 for (clusterid = 0; clusterid < MAXCLUSTERS; clusterid++) { 919 depot = &oc->depot[clusterid]; 920 spin_lock(&depot->spin); 921 depot_disassociate(depot, &tmplist); 922 spin_unlock(&depot->spin); 923 } 924 maglist_purge(oc, &tmplist); 925 926 for (cpuid = 0; cpuid < ncpus; cpuid++) { 927 cache_percpu = &oc->cache_percpu[cpuid]; 928 929 crit_enter(); 930 mag_purge(oc, &cache_percpu->loaded_magazine, TRUE); 931 mag_purge(oc, &cache_percpu->previous_magazine, TRUE); 932 crit_exit(); 933 cache_percpu->loaded_magazine = NULL; 934 cache_percpu->previous_magazine = NULL; 935 /* don't bother adjusting depot->unallocated_objects */ 936 } 937 938 kfree(desc, M_OBJCACHE); 939 kfree(oc, M_OBJCACHE); 940 } 941 942 static void 943 objcache_init(void) 944 { 945 spin_init(&objcachelist_spin, "objcachelist"); 946 947 magazine_capmin = mag_capacity_align(MAGAZINE_CAPACITY_MIN); 948 magazine_capmax = mag_capacity_align(MAGAZINE_CAPACITY_MAX); 949 if (bootverbose) { 950 kprintf("objcache: magazine cap [%d, %d]\n", 951 magazine_capmin, magazine_capmax); 952 } 953 #if 0 954 callout_init_mp(&objcache_callout); 955 objcache_rebalance_period = 60 * hz; 956 callout_reset(&objcache_callout, objcache_rebalance_period, 957 objcache_timer, NULL); 958 #endif 959 } 960 SYSINIT(objcache, SI_BOOT2_OBJCACHE, SI_ORDER_FIRST, objcache_init, 0); 961