xref: /dflybsd-src/sys/kern/kern_objcache.c (revision f2ffee576e2565f43acdc3329060453575e5f3f2)
1 /*
2  * Copyright (c) 2005 Jeffrey M. Hsu.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Jeffrey M. Hsu.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. Neither the name of The DragonFly Project nor the names of its
16  *    contributors may be used to endorse or promote products derived
17  *    from this software without specific, prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
22  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
23  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
24  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
25  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
26  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
27  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
29  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  */
32 
33 #include <sys/param.h>
34 #include <sys/kernel.h>
35 #include <sys/systm.h>
36 #include <sys/callout.h>
37 #include <sys/globaldata.h>
38 #include <sys/malloc.h>
39 #include <sys/queue.h>
40 #include <sys/objcache.h>
41 #include <sys/spinlock.h>
42 #include <sys/thread.h>
43 #include <sys/thread2.h>
44 #include <sys/spinlock2.h>
45 
46 static MALLOC_DEFINE(M_OBJCACHE, "objcache", "Object Cache");
47 static MALLOC_DEFINE(M_OBJMAG, "objcache mag", "Object Cache Magazine");
48 
49 #define	INITIAL_MAG_CAPACITY	64
50 
51 struct magazine {
52 	int			 rounds;
53 	int			 capacity;
54 	SLIST_ENTRY(magazine)	 nextmagazine;
55 	void			*objects[];
56 };
57 
58 SLIST_HEAD(magazinelist, magazine);
59 
60 #define MAGAZINE_HDRSIZE	__offsetof(struct magazine, objects[0])
61 #define MAGAZINE_CAPACITY_MAX	4096
62 #define MAGAZINE_CAPACITY_MIN	4
63 
64 /*
65  * per-cluster cache of magazines
66  *
67  * All fields in this structure are protected by the spinlock.
68  */
69 struct magazinedepot {
70 	/*
71 	 * The per-cpu object caches only exchanges completely full or
72 	 * completely empty magazines with the depot layer, so only have
73 	 * to cache these two types of magazines.
74 	 */
75 	struct magazinelist	fullmagazines;
76 	struct magazinelist	emptymagazines;
77 	int			magcapacity;
78 
79 	/* protect this structure */
80 	struct spinlock		spin;
81 
82 	/* magazines not yet allocated towards limit */
83 	int			unallocated_objects;
84 	int			cluster_limit;	/* ref for adjustments */
85 
86 	/* infrequently used fields */
87 	int			waiting;	/* waiting for another cpu to
88 						 * return a full magazine to
89 						 * the depot */
90 	int			contested;	/* depot contention count */
91 } __cachealign;
92 
93 /*
94  * per-cpu object cache
95  * All fields in this structure are protected by crit_enter().
96  */
97 struct percpu_objcache {
98 	struct magazine	*loaded_magazine;	/* active magazine */
99 	struct magazine	*previous_magazine;	/* backup magazine */
100 
101 	/* statistics */
102 	u_long		gets_cumulative;	/* total calls to get */
103 	u_long		gets_null;		/* objcache_get returned NULL */
104 	u_long		allocs_cumulative;	/* total calls to alloc */
105 	u_long		puts_cumulative;	/* total calls to put */
106 	u_long		gets_exhausted;		/* # of gets hit exhaustion */
107 #ifdef notyet
108 	u_long		puts_othercluster;	/* returned to other cluster */
109 #endif
110 
111 	/* infrequently used fields */
112 	int		waiting;		/* waiting for a thread on this
113 						 * cpu to return an obj to the
114 						 * per-cpu cache */
115 } __cachealign;
116 
117 /* only until we have NUMA cluster topology information XXX */
118 #define MAXCLUSTERS 1
119 #define myclusterid 0
120 #define CLUSTER_OF(obj) 0
121 
122 /*
123  * Rarely accessed but useful bits of objcache.
124  */
125 struct objcache_desc {
126 	LIST_ENTRY(objcache_desc)	next;
127 	struct objcache			*objcache;
128 	int				total_objects;
129 #define OBJCACHE_NAMELEN		36
130 	char				name[OBJCACHE_NAMELEN];
131 };
132 
133 /*
134  * Two-level object cache consisting of NUMA cluster-level depots of
135  * fully loaded or completely empty magazines and cpu-level caches of
136  * individual objects.
137  */
138 struct objcache {
139 	/* object constructor and destructor from blank storage */
140 	objcache_ctor_fn	*ctor;
141 	objcache_dtor_fn	*dtor;
142 	void			*privdata;
143 
144 	/* interface to underlying allocator */
145 	objcache_alloc_fn	*alloc;
146 	objcache_free_fn	*free;
147 	void			*allocator_args;
148 
149 	struct objcache_desc	*desc;
150 
151 	/* NUMA-cluster level caches */
152 	struct magazinedepot	depot[MAXCLUSTERS];
153 
154 	struct percpu_objcache	cache_percpu[];	/* per-cpu caches */
155 };
156 
157 static struct spinlock objcachelist_spin;
158 static LIST_HEAD(objcachelist, objcache_desc) allobjcaches;
159 static int magazine_capmin;
160 static int magazine_capmax;
161 
162 static struct magazine *
163 mag_alloc(int capacity)
164 {
165 	struct magazine *mag;
166 	int size;
167 
168 	size = __offsetof(struct magazine, objects[capacity]);
169 	KASSERT(size > 0 && (size & __VM_CACHELINE_MASK) == 0,
170 	    ("magazine size is not multiple cache line size"));
171 
172 	mag = kmalloc_cachealign(size, M_OBJMAG, M_INTWAIT | M_ZERO);
173 	mag->capacity = capacity;
174 	mag->rounds = 0;
175 	return (mag);
176 }
177 
178 static int
179 mag_capacity_align(int mag_capacity)
180 {
181 	int mag_size;
182 
183 	mag_size = __VM_CACHELINE_ALIGN(
184 	    __offsetof(struct magazine, objects[mag_capacity]));
185 	mag_capacity = (mag_size - MAGAZINE_HDRSIZE) / sizeof(void *);
186 
187 	return mag_capacity;
188 }
189 
190 /*
191  * Utility routine for objects that don't require any de-construction.
192  */
193 
194 static void
195 null_dtor(void *obj, void *privdata)
196 {
197 	/* do nothing */
198 }
199 
200 static boolean_t
201 null_ctor(void *obj, void *privdata, int ocflags)
202 {
203 	return TRUE;
204 }
205 
206 /*
207  * Create an object cache.
208  */
209 struct objcache *
210 objcache_create(const char *name, int cluster_limit, int nom_cache,
211 		objcache_ctor_fn *ctor, objcache_dtor_fn *dtor, void *privdata,
212 		objcache_alloc_fn *alloc, objcache_free_fn *free,
213 		void *allocator_args)
214 {
215 	struct objcache_desc *desc;
216 	struct objcache *oc;
217 	struct magazinedepot *depot;
218 	int cpuid;
219 	int nmagdepot;
220 	int mag_capacity;
221 	int i;
222 
223 	/*
224 	 * Allocate objcache descriptor.
225 	 */
226 	desc = kmalloc(sizeof(*desc), M_OBJCACHE, M_WAITOK | M_ZERO);
227 
228 	/*
229 	 * Allocate object cache structure
230 	 */
231 	oc = kmalloc_cachealign(
232 	    __offsetof(struct objcache, cache_percpu[ncpus]),
233 	    M_OBJCACHE, M_WAITOK | M_ZERO);
234 	oc->ctor = ctor ? ctor : null_ctor;
235 	oc->dtor = dtor ? dtor : null_dtor;
236 	oc->privdata = privdata;
237 	oc->alloc = alloc;
238 	oc->free = free;
239 	oc->allocator_args = allocator_args;
240 
241 	/*
242 	 * Link objcache and its descriptor.
243 	 */
244 	oc->desc = desc;
245 	desc->objcache = oc;
246 	strlcpy(desc->name, name, sizeof(desc->name));
247 
248 	/*
249 	 * Initialize depot list(s).
250 	 */
251 	depot = &oc->depot[0];
252 
253 	spin_init(&depot->spin, "objcachedepot");
254 	SLIST_INIT(&depot->fullmagazines);
255 	SLIST_INIT(&depot->emptymagazines);
256 
257 	/*
258 	 * Figure out the nominal number of free objects to cache and
259 	 * the magazine capacity.  By default we want to cache up to
260 	 * half the cluster_limit.  If there is no cluster_limit then
261 	 * we want to cache up to 128 objects.
262 	 */
263 	if (nom_cache == 0)
264 		nom_cache = cluster_limit / 2;
265 	if (cluster_limit && nom_cache > cluster_limit)
266 		nom_cache = cluster_limit;
267 	if (nom_cache == 0)
268 		nom_cache = INITIAL_MAG_CAPACITY * 2;
269 
270 	/*
271 	 * Magazine capacity for 2 active magazines per cpu plus 2
272 	 * magazines in the depot.
273 	 */
274 	mag_capacity = mag_capacity_align(nom_cache / (ncpus + 1) / 2 + 1);
275 	if (mag_capacity > magazine_capmax)
276 		mag_capacity = magazine_capmax;
277 	else if (mag_capacity < magazine_capmin)
278 		mag_capacity = magazine_capmin;
279 	depot->magcapacity = mag_capacity;
280 
281 	/*
282 	 * The cluster_limit must be sufficient to have two magazines per
283 	 * cpu plus at least two magazines in the depot.  However, because
284 	 * partial magazines can stay on the cpus what we really need here
285 	 * is to specify the number of extra magazines we allocate for the
286 	 * depot.
287 	 *
288 	 * Use ~1B objects to mean 'unlimited'.  A negative unallocated
289 	 * object count is possible due to dynamic adjustments so we can't
290 	 * use a negative number to mean 'unlimited'.  We need some overflow
291 	 * capacity too due to the preallocated mags.
292 	 */
293 	if (cluster_limit == 0) {
294 		depot->unallocated_objects = 0x40000000;
295 	} else {
296 		depot->unallocated_objects = ncpus * mag_capacity * 2 +
297 					     cluster_limit;
298 	}
299 
300 	/* Save # of total objects. */
301 	desc->total_objects = depot->unallocated_objects;
302 
303 	/*
304 	 * This is a dynamic adjustment aid initialized to the callers
305 	 * expectations of the current limit.
306 	 */
307 	depot->cluster_limit = cluster_limit;
308 
309 	/*
310 	 * Initialize per-cpu caches
311 	 */
312 	for (cpuid = 0; cpuid < ncpus; cpuid++) {
313 		struct percpu_objcache *cache_percpu = &oc->cache_percpu[cpuid];
314 
315 		cache_percpu->loaded_magazine = mag_alloc(mag_capacity);
316 		cache_percpu->previous_magazine = mag_alloc(mag_capacity);
317 	}
318 
319 	/*
320 	 * Compute how many empty magazines to place in the depot.  This
321 	 * determines the retained cache size and is based on nom_cache.
322 	 *
323 	 * The actual cache size is larger because there are two magazines
324 	 * for each cpu as well but those can be in any fill state so we
325 	 * just can't count them.
326 	 *
327 	 * There is a minimum of two magazines in the depot.
328 	 */
329 	nmagdepot = nom_cache / mag_capacity + 1;
330 	if (nmagdepot < 2)
331 		nmagdepot = 2;
332 
333 	/*
334 	 * Put empty magazines in depot
335 	 */
336 	for (i = 0; i < nmagdepot; i++) {
337 		struct magazine *mag = mag_alloc(mag_capacity);
338 		SLIST_INSERT_HEAD(&depot->emptymagazines, mag, nextmagazine);
339 	}
340 
341 	spin_lock(&objcachelist_spin);
342 	LIST_INSERT_HEAD(&allobjcaches, desc, next);
343 	spin_unlock(&objcachelist_spin);
344 
345 	return (oc);
346 }
347 
348 /*
349  * Adjust the cluster limit.  This is allowed to cause unallocated_objects
350  * to go negative.  Note that due to the magazine hysteresis there is a
351  * limit to how much of the objcache can be reclaimed using this API to
352  * reduce its size.
353  */
354 void
355 objcache_set_cluster_limit(struct objcache *oc, int cluster_limit)
356 {
357 	struct magazinedepot *depot;
358 
359 	depot = &oc->depot[myclusterid];
360 	if (depot->cluster_limit != cluster_limit) {
361 		int delta;
362 
363 		spin_lock(&depot->spin);
364 		delta = cluster_limit - depot->cluster_limit;
365 		depot->unallocated_objects += delta;
366 		depot->cluster_limit = cluster_limit;
367 		spin_unlock(&depot->spin);
368 		wakeup(depot);
369 
370 		oc->desc->total_objects += delta;
371 	}
372 }
373 
374 struct objcache *
375 objcache_create_simple(malloc_type_t mtype, size_t objsize)
376 {
377 	struct objcache_malloc_args *margs;
378 	struct objcache *oc;
379 
380 	margs = kmalloc(sizeof(*margs), M_OBJCACHE, M_WAITOK|M_ZERO);
381 	margs->objsize = objsize;
382 	margs->mtype = mtype;
383 	oc = objcache_create(mtype->ks_shortdesc, 0, 0,
384 			     NULL, NULL, NULL,
385 			     objcache_malloc_alloc, objcache_malloc_free,
386 			     margs);
387 	return (oc);
388 }
389 
390 struct objcache *
391 objcache_create_mbacked(malloc_type_t mtype, size_t objsize,
392 			int cluster_limit, int nom_cache,
393 			objcache_ctor_fn *ctor, objcache_dtor_fn *dtor,
394 			void *privdata)
395 {
396 	struct objcache_malloc_args *margs;
397 	struct objcache *oc;
398 
399 	margs = kmalloc(sizeof(*margs), M_OBJCACHE, M_WAITOK|M_ZERO);
400 	margs->objsize = objsize;
401 	margs->mtype = mtype;
402 	oc = objcache_create(mtype->ks_shortdesc,
403 			     cluster_limit, nom_cache,
404 			     ctor, dtor, privdata,
405 			     objcache_malloc_alloc, objcache_malloc_free,
406 			     margs);
407 	return(oc);
408 }
409 
410 
411 #define MAGAZINE_EMPTY(mag)	(mag->rounds == 0)
412 #define MAGAZINE_NOTEMPTY(mag)	(mag->rounds != 0)
413 #define MAGAZINE_FULL(mag)	(mag->rounds == mag->capacity)
414 
415 #define	swap(x, y)	({ struct magazine *t = x; x = y; y = t; })
416 
417 /*
418  * Get an object from the object cache.
419  *
420  * WARNING!  ocflags are only used when we have to go to the underlying
421  * allocator, so we cannot depend on flags such as M_ZERO.
422  */
423 void *
424 objcache_get(struct objcache *oc, int ocflags)
425 {
426 	struct percpu_objcache *cpucache = &oc->cache_percpu[mycpuid];
427 	struct magazine *loadedmag;
428 	struct magazine *emptymag;
429 	void *obj;
430 	struct magazinedepot *depot;
431 
432 	KKASSERT((ocflags & M_ZERO) == 0);
433 	crit_enter();
434 	++cpucache->gets_cumulative;
435 
436 retry:
437 	/*
438 	 * Loaded magazine has an object.  This is the hot path.
439 	 * It is lock-free and uses a critical section to block
440 	 * out interrupt handlers on the same processor.
441 	 */
442 	loadedmag = cpucache->loaded_magazine;
443 	if (MAGAZINE_NOTEMPTY(loadedmag)) {
444 		obj = loadedmag->objects[--loadedmag->rounds];
445 		crit_exit();
446 		return (obj);
447 	}
448 
449 	/* Previous magazine has an object. */
450 	if (MAGAZINE_NOTEMPTY(cpucache->previous_magazine)) {
451 		swap(cpucache->loaded_magazine, cpucache->previous_magazine);
452 		loadedmag = cpucache->loaded_magazine;
453 		obj = loadedmag->objects[--loadedmag->rounds];
454 		crit_exit();
455 		return (obj);
456 	}
457 
458 	/*
459 	 * Both magazines empty.  Get a full magazine from the depot and
460 	 * move one of the empty ones to the depot.
461 	 *
462 	 * Obtain the depot spinlock.
463 	 *
464 	 * NOTE: Beyond this point, M_* flags are handled via oc->alloc()
465 	 */
466 	depot = &oc->depot[myclusterid];
467 	spin_lock(&depot->spin);
468 
469 	/*
470 	 * Recheck the cpucache after obtaining the depot spinlock.  This
471 	 * shouldn't be necessary now but don't take any chances.
472 	 */
473 	if (MAGAZINE_NOTEMPTY(cpucache->loaded_magazine) ||
474 	    MAGAZINE_NOTEMPTY(cpucache->previous_magazine)
475 	) {
476 		spin_unlock(&depot->spin);
477 		goto retry;
478 	}
479 
480 	/* Check if depot has a full magazine. */
481 	if (!SLIST_EMPTY(&depot->fullmagazines)) {
482 		emptymag = cpucache->previous_magazine;
483 		cpucache->previous_magazine = cpucache->loaded_magazine;
484 		cpucache->loaded_magazine = SLIST_FIRST(&depot->fullmagazines);
485 		SLIST_REMOVE_HEAD(&depot->fullmagazines, nextmagazine);
486 
487 		/*
488 		 * Return emptymag to the depot.
489 		 */
490 		KKASSERT(MAGAZINE_EMPTY(emptymag));
491 		SLIST_INSERT_HEAD(&depot->emptymagazines,
492 				  emptymag, nextmagazine);
493 		spin_unlock(&depot->spin);
494 		goto retry;
495 	}
496 
497 	/*
498 	 * The depot does not have any non-empty magazines.  If we have
499 	 * not hit our object limit we can allocate a new object using
500 	 * the back-end allocator.
501 	 *
502 	 * NOTE: unallocated_objects can wind up being negative due to
503 	 *	 objcache_set_cluster_limit() calls.
504 	 */
505 	if (__predict_true(depot->unallocated_objects > 0)) {
506 		--depot->unallocated_objects;
507 		spin_unlock(&depot->spin);
508 		++cpucache->allocs_cumulative;
509 		crit_exit();
510 
511 		obj = oc->alloc(oc->allocator_args, ocflags);
512 		if (obj) {
513 			if (oc->ctor(obj, oc->privdata, ocflags))
514 				return (obj);
515 			oc->free(obj, oc->allocator_args);
516 			obj = NULL;
517 		}
518 		if (obj == NULL) {
519 			spin_lock(&depot->spin);
520 			++depot->unallocated_objects;
521 			spin_unlock(&depot->spin);
522 			if (depot->waiting)
523 				wakeup(depot);
524 
525 			crit_enter();
526 			/*
527 			 * makes debugging easier when gets_cumulative does
528 			 * not include gets_null.
529 			 */
530 			++cpucache->gets_null;
531 			--cpucache->gets_cumulative;
532 			crit_exit();
533 		}
534 		return(obj);
535 	}
536 	if (__predict_false(cpucache->gets_exhausted++ == 0)) {
537 		kprintf("Warning: objcache(%s) exhausted on cpu%d!\n",
538 		    oc->desc->name, mycpuid);
539 	}
540 
541 	/*
542 	 * Otherwise block if allowed to.
543 	 */
544 	if ((ocflags & (M_WAITOK|M_NULLOK)) == M_WAITOK) {
545 		++cpucache->waiting;
546 		++depot->waiting;
547 		ssleep(depot, &depot->spin, 0, "objcache_get", 0);
548 		--cpucache->waiting;
549 		--depot->waiting;
550 		spin_unlock(&depot->spin);
551 		goto retry;
552 	}
553 
554 	/*
555 	 * Otherwise fail
556 	 */
557 	++cpucache->gets_null;
558 	--cpucache->gets_cumulative;
559 	crit_exit();
560 	spin_unlock(&depot->spin);
561 	return (NULL);
562 }
563 
564 /*
565  * Wrapper for malloc allocation routines.
566  */
567 void *
568 objcache_malloc_alloc(void *allocator_args, int ocflags)
569 {
570 	struct objcache_malloc_args *alloc_args = allocator_args;
571 
572 	return (kmalloc(alloc_args->objsize, alloc_args->mtype,
573 		       ocflags & OC_MFLAGS));
574 }
575 
576 /*
577  * Wrapper for malloc allocation routines, with initial zeroing
578  * (but objects are not zerod on reuse from cache).
579  */
580 void *
581 objcache_malloc_alloc_zero(void *allocator_args, int ocflags)
582 {
583 	struct objcache_malloc_args *alloc_args = allocator_args;
584 
585 	return (kmalloc(alloc_args->objsize, alloc_args->mtype,
586 		       (ocflags & OC_MFLAGS) | M_ZERO));
587 }
588 
589 
590 void
591 objcache_malloc_free(void *obj, void *allocator_args)
592 {
593 	struct objcache_malloc_args *alloc_args = allocator_args;
594 
595 	kfree(obj, alloc_args->mtype);
596 }
597 
598 /*
599  * Wrapper for allocation policies that pre-allocate at initialization time
600  * and don't do run-time allocation.
601  */
602 void *
603 objcache_nop_alloc(void *allocator_args, int ocflags)
604 {
605 	return (NULL);
606 }
607 
608 void
609 objcache_nop_free(void *obj, void *allocator_args)
610 {
611 }
612 
613 /*
614  * Return an object to the object cache.
615  */
616 void
617 objcache_put(struct objcache *oc, void *obj)
618 {
619 	struct percpu_objcache *cpucache = &oc->cache_percpu[mycpuid];
620 	struct magazine *loadedmag;
621 	struct magazinedepot *depot;
622 
623 	crit_enter();
624 	++cpucache->puts_cumulative;
625 
626 	if (CLUSTER_OF(obj) != myclusterid) {
627 #ifdef notyet
628 		/* use lazy IPI to send object to owning cluster XXX todo */
629 		++cpucache->puts_othercluster;
630 		crit_exit();
631 		return;
632 #endif
633 	}
634 
635 retry:
636 	/*
637 	 * Free slot available in loaded magazine.  This is the hot path.
638 	 * It is lock-free and uses a critical section to block out interrupt
639 	 * handlers on the same processor.
640 	 */
641 	loadedmag = cpucache->loaded_magazine;
642 	if (!MAGAZINE_FULL(loadedmag)) {
643 		loadedmag->objects[loadedmag->rounds++] = obj;
644 		if (cpucache->waiting)
645 			wakeup_mycpu(&oc->depot[myclusterid]);
646 		crit_exit();
647 		return;
648 	}
649 
650 	/*
651 	 * Current magazine full, but previous magazine has room.  XXX
652 	 */
653 	if (!MAGAZINE_FULL(cpucache->previous_magazine)) {
654 		swap(cpucache->loaded_magazine, cpucache->previous_magazine);
655 		loadedmag = cpucache->loaded_magazine;
656 		loadedmag->objects[loadedmag->rounds++] = obj;
657 		if (cpucache->waiting)
658 			wakeup_mycpu(&oc->depot[myclusterid]);
659 		crit_exit();
660 		return;
661 	}
662 
663 	/*
664 	 * Both magazines full.  Get an empty magazine from the depot and
665 	 * move a full loaded magazine to the depot.  Even though the
666 	 * magazine may wind up with space available after we block on
667 	 * the spinlock, we still cycle it through to avoid the non-optimal
668 	 * corner-case.
669 	 *
670 	 * Obtain the depot spinlock.
671 	 */
672 	depot = &oc->depot[myclusterid];
673 	spin_lock(&depot->spin);
674 
675 	/*
676 	 * If an empty magazine is available in the depot, cycle it
677 	 * through and retry.
678 	 */
679 	if (!SLIST_EMPTY(&depot->emptymagazines)) {
680 		loadedmag = cpucache->previous_magazine;
681 		cpucache->previous_magazine = cpucache->loaded_magazine;
682 		cpucache->loaded_magazine = SLIST_FIRST(&depot->emptymagazines);
683 		SLIST_REMOVE_HEAD(&depot->emptymagazines, nextmagazine);
684 
685 		/*
686 		 * Return loadedmag to the depot.  Due to blocking it may
687 		 * not be entirely full and could even be empty.
688 		 */
689 		if (MAGAZINE_EMPTY(loadedmag)) {
690 			SLIST_INSERT_HEAD(&depot->emptymagazines,
691 					  loadedmag, nextmagazine);
692 			spin_unlock(&depot->spin);
693 		} else {
694 			SLIST_INSERT_HEAD(&depot->fullmagazines,
695 					  loadedmag, nextmagazine);
696 			spin_unlock(&depot->spin);
697 			if (depot->waiting)
698 				wakeup(depot);
699 		}
700 		goto retry;
701 	}
702 
703 	/*
704 	 * An empty mag is not available.  This is a corner case which can
705 	 * occur due to cpus holding partially full magazines.  Do not try
706 	 * to allocate a mag, just free the object.
707 	 */
708 	++depot->unallocated_objects;
709 	spin_unlock(&depot->spin);
710 	if (depot->waiting)
711 		wakeup(depot);
712 	crit_exit();
713 	oc->dtor(obj, oc->privdata);
714 	oc->free(obj, oc->allocator_args);
715 }
716 
717 /*
718  * The object is being put back into the cache, but the caller has
719  * indicated that the object is not in any shape to be reused and should
720  * be dtor'd immediately.
721  */
722 void
723 objcache_dtor(struct objcache *oc, void *obj)
724 {
725 	struct magazinedepot *depot;
726 
727 	depot = &oc->depot[myclusterid];
728 	spin_lock(&depot->spin);
729 	++depot->unallocated_objects;
730 	spin_unlock(&depot->spin);
731 	if (depot->waiting)
732 		wakeup(depot);
733 	oc->dtor(obj, oc->privdata);
734 	oc->free(obj, oc->allocator_args);
735 }
736 
737 /*
738  * Deallocate all objects in a magazine and free the magazine if requested.
739  * When freeit is TRUE the magazine must already be disassociated from the
740  * depot.
741  *
742  * Must be called with a critical section held when called with a per-cpu
743  * magazine.  The magazine may be indirectly modified during the loop.
744  *
745  * If the magazine moves during a dtor the operation is aborted.  This is
746  * only allowed when freeit is FALSE.
747  *
748  * The number of objects freed is returned.
749  */
750 static int
751 mag_purge(struct objcache *oc, struct magazine **magp, int freeit)
752 {
753 	struct magazine *mag = *magp;
754 	int count;
755 	void *obj;
756 
757 	count = 0;
758 	while (mag->rounds) {
759 		obj = mag->objects[--mag->rounds];
760 		oc->dtor(obj, oc->privdata);		/* MAY BLOCK */
761 		oc->free(obj, oc->allocator_args);	/* MAY BLOCK */
762 		++count;
763 
764 		/*
765 		 * Cycle for interrupts.
766 		 */
767 		if ((count & 15) == 0) {
768 			crit_exit();
769 			crit_enter();
770 		}
771 
772 		/*
773 		 * mag may have become invalid either due to dtor/free
774 		 * blocking or interrupt cycling, do not derefernce it
775 		 * until we check.
776 		 */
777 		if (*magp != mag) {
778 			kprintf("mag_purge: mag ripped out\n");
779 			break;
780 		}
781 	}
782 	if (freeit) {
783 		KKASSERT(*magp == mag);
784 		*magp = NULL;
785 		kfree(mag, M_OBJMAG);
786 	}
787 	return(count);
788 }
789 
790 /*
791  * Disassociate zero or more magazines from a magazine list associated with
792  * the depot, update the depot, and move the magazines to a temporary
793  * list.
794  *
795  * The caller must check the depot for waiters and wake it up, typically
796  * after disposing of the magazines this function loads onto the temporary
797  * list.
798  */
799 static void
800 maglist_disassociate(struct magazinedepot *depot, struct magazinelist *maglist,
801 		     struct magazinelist *tmplist, boolean_t purgeall)
802 {
803 	struct magazine *mag;
804 
805 	while ((mag = SLIST_FIRST(maglist)) != NULL) {
806 		SLIST_REMOVE_HEAD(maglist, nextmagazine);
807 		SLIST_INSERT_HEAD(tmplist, mag, nextmagazine);
808 		depot->unallocated_objects += mag->rounds;
809 	}
810 }
811 
812 /*
813  * Deallocate all magazines and their contents from the passed temporary
814  * list.  The magazines have already been accounted for by their depots.
815  *
816  * The total number of rounds freed is returned.  This number is typically
817  * only used to determine whether a wakeup on the depot is needed or not.
818  */
819 static int
820 maglist_purge(struct objcache *oc, struct magazinelist *maglist)
821 {
822 	struct magazine *mag;
823 	int count = 0;
824 
825 	/*
826 	 * can't use SLIST_FOREACH because blocking releases the depot
827 	 * spinlock
828 	 */
829 	crit_enter();
830 	while ((mag = SLIST_FIRST(maglist)) != NULL) {
831 		SLIST_REMOVE_HEAD(maglist, nextmagazine);
832 		count += mag_purge(oc, &mag, TRUE);
833 	}
834 	crit_exit();
835 	return(count);
836 }
837 
838 /*
839  * De-allocates all magazines on the full and empty magazine lists.
840  *
841  * Because this routine is called with a spinlock held, the magazines
842  * can only be disassociated and moved to a temporary list, not freed.
843  *
844  * The caller is responsible for freeing the magazines.
845  */
846 static void
847 depot_disassociate(struct magazinedepot *depot, struct magazinelist *tmplist)
848 {
849 	maglist_disassociate(depot, &depot->fullmagazines, tmplist, TRUE);
850 	maglist_disassociate(depot, &depot->emptymagazines, tmplist, TRUE);
851 }
852 
853 /*
854  * Try to free up some memory.  Return as soon as some free memory is found.
855  * For each object cache on the reclaim list, first try the current per-cpu
856  * cache, then the full magazine depot.
857  */
858 boolean_t
859 objcache_reclaimlist(struct objcache *oclist[], int nlist, int ocflags)
860 {
861 	struct objcache *oc;
862 	struct percpu_objcache *cpucache;
863 	struct magazinedepot *depot;
864 	struct magazinelist tmplist;
865 	int i, count;
866 
867 	SLIST_INIT(&tmplist);
868 
869 	for (i = 0; i < nlist; i++) {
870 		oc = oclist[i];
871 		cpucache = &oc->cache_percpu[mycpuid];
872 		depot = &oc->depot[myclusterid];
873 
874 		crit_enter();
875 		count = mag_purge(oc, &cpucache->loaded_magazine, FALSE);
876 		if (count == 0)
877 			count += mag_purge(oc, &cpucache->previous_magazine, FALSE);
878 		crit_exit();
879 		if (count > 0) {
880 			spin_lock(&depot->spin);
881 			depot->unallocated_objects += count;
882 			spin_unlock(&depot->spin);
883 			if (depot->waiting)
884 				wakeup(depot);
885 			return (TRUE);
886 		}
887 		spin_lock(&depot->spin);
888 		maglist_disassociate(depot, &depot->fullmagazines,
889 				     &tmplist, FALSE);
890 		spin_unlock(&depot->spin);
891 		count = maglist_purge(oc, &tmplist);
892 		if (count > 0) {
893 			if (depot->waiting)
894 				wakeup(depot);
895 			return (TRUE);
896 		}
897 	}
898 	return (FALSE);
899 }
900 
901 /*
902  * Destroy an object cache.  Must have no existing references.
903  */
904 void
905 objcache_destroy(struct objcache *oc)
906 {
907 	struct objcache_desc *desc = oc->desc;
908 	struct percpu_objcache *cache_percpu;
909 	struct magazinedepot *depot;
910 	int clusterid, cpuid;
911 	struct magazinelist tmplist;
912 
913 	spin_lock(&objcachelist_spin);
914 	LIST_REMOVE(desc, next);
915 	spin_unlock(&objcachelist_spin);
916 
917 	SLIST_INIT(&tmplist);
918 	for (clusterid = 0; clusterid < MAXCLUSTERS; clusterid++) {
919 		depot = &oc->depot[clusterid];
920 		spin_lock(&depot->spin);
921 		depot_disassociate(depot, &tmplist);
922 		spin_unlock(&depot->spin);
923 	}
924 	maglist_purge(oc, &tmplist);
925 
926 	for (cpuid = 0; cpuid < ncpus; cpuid++) {
927 		cache_percpu = &oc->cache_percpu[cpuid];
928 
929 		crit_enter();
930 		mag_purge(oc, &cache_percpu->loaded_magazine, TRUE);
931 		mag_purge(oc, &cache_percpu->previous_magazine, TRUE);
932 		crit_exit();
933 		cache_percpu->loaded_magazine = NULL;
934 		cache_percpu->previous_magazine = NULL;
935 		/* don't bother adjusting depot->unallocated_objects */
936 	}
937 
938 	kfree(desc, M_OBJCACHE);
939 	kfree(oc, M_OBJCACHE);
940 }
941 
942 static void
943 objcache_init(void)
944 {
945 	spin_init(&objcachelist_spin, "objcachelist");
946 
947 	magazine_capmin = mag_capacity_align(MAGAZINE_CAPACITY_MIN);
948 	magazine_capmax = mag_capacity_align(MAGAZINE_CAPACITY_MAX);
949 	if (bootverbose) {
950 		kprintf("objcache: magazine cap [%d, %d]\n",
951 		    magazine_capmin, magazine_capmax);
952 	}
953 #if 0
954 	callout_init_mp(&objcache_callout);
955 	objcache_rebalance_period = 60 * hz;
956 	callout_reset(&objcache_callout, objcache_rebalance_period,
957 		      objcache_timer, NULL);
958 #endif
959 }
960 SYSINIT(objcache, SI_BOOT2_OBJCACHE, SI_ORDER_FIRST, objcache_init, 0);
961