1 /* $NetBSD: subr_localcount.c,v 1.8 2024/08/16 21:54:17 riastradh Exp $ */ 2 3 /*- 4 * Copyright (c) 2016 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Taylor R. Campbell. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /* 33 * CPU-local reference counts 34 * 35 * localcount(9) is a reference-counting scheme that involves no 36 * interprocessor synchronization most of the time, at the cost of 37 * eight bytes of memory per CPU per object and at the cost of 38 * expensive interprocessor synchronization to drain references. 39 * 40 * localcount(9) references may be held across sleeps, may be 41 * transferred from CPU to CPU or thread to thread: they behave 42 * semantically like typical reference counts, with different 43 * pragmatic performance characteristics. 44 */ 45 46 #include <sys/cdefs.h> 47 __KERNEL_RCSID(0, "$NetBSD: subr_localcount.c,v 1.8 2024/08/16 21:54:17 riastradh Exp $"); 48 49 #include <sys/param.h> 50 #include <sys/localcount.h> 51 #include <sys/types.h> 52 #include <sys/condvar.h> 53 #include <sys/errno.h> 54 #include <sys/mutex.h> 55 #include <sys/percpu.h> 56 #include <sys/xcall.h> 57 #if defined(DEBUG) && defined(LOCKDEBUG) 58 #include <sys/atomic.h> 59 #endif 60 61 static void localcount_xc(void *, void *); 62 63 /* 64 * localcount_init(lc) 65 * 66 * Initialize a localcount object. Returns 0 on success, error 67 * code on failure. May fail to allocate memory for percpu(9). 68 * 69 * The caller must call localcount_drain and then localcount_fini 70 * when done with lc. 71 */ 72 void 73 localcount_init(struct localcount *lc) 74 { 75 76 lc->lc_totalp = NULL; 77 lc->lc_percpu = percpu_alloc(sizeof(int64_t)); 78 } 79 80 /* 81 * localcount_drain(lc, cv, interlock) 82 * 83 * Wait for all acquired references to lc to drain. Caller must 84 * hold interlock; localcount_drain releases it during cross-calls 85 * and waits on cv. The cv and interlock passed here must be the 86 * same as are passed to localcount_release for this lc. 87 * 88 * Caller must guarantee that no new references can be acquired 89 * with localcount_acquire before calling localcount_drain. For 90 * example, any object that may be found in a list and acquired 91 * must be removed from the list before localcount_drain. 92 * 93 * The localcount object lc may be used only with localcount_fini 94 * after this, unless reinitialized after localcount_fini with 95 * localcount_init. 96 */ 97 void 98 localcount_drain(struct localcount *lc, kcondvar_t *cv, kmutex_t *interlock) 99 { 100 int64_t total = 0; 101 102 KASSERT(mutex_owned(interlock)); 103 KASSERT(lc->lc_totalp == NULL); 104 105 /* Mark it draining. */ 106 lc->lc_totalp = &total; 107 108 /* 109 * Count up all references on all CPUs. 110 * 111 * This serves as a global memory barrier: after xc_wait, all 112 * CPUs will have witnessed the nonnull value of lc->lc_totalp, 113 * so that it is safe to wait on the cv for them. 114 */ 115 mutex_exit(interlock); 116 xc_wait(xc_broadcast(0, &localcount_xc, lc, interlock)); 117 mutex_enter(interlock); 118 119 /* Wait for remaining references to drain. */ 120 while (total != 0) { 121 /* 122 * At this point, now that we have added up all 123 * references on all CPUs, the total had better be 124 * nonnegative. 125 */ 126 KASSERTMSG((0 < total), 127 "negatively referenced localcount: %p, %"PRId64, 128 lc, total); 129 cv_wait(cv, interlock); 130 } 131 132 /* Paranoia: Cause any further use of lc->lc_totalp to crash. */ 133 lc->lc_totalp = (void *)(uintptr_t)1; 134 } 135 136 /* 137 * localcount_fini(lc) 138 * 139 * Finalize a localcount object, releasing any memory allocated 140 * for it. The localcount object must already have been drained. 141 */ 142 void 143 localcount_fini(struct localcount *lc) 144 { 145 146 KASSERT(lc->lc_totalp == (void *)(uintptr_t)1); 147 percpu_free(lc->lc_percpu, sizeof(uint64_t)); 148 } 149 150 /* 151 * localcount_xc(cookie0, cookie1) 152 * 153 * Accumulate and transfer the per-CPU reference counts to a 154 * global total, resetting the per-CPU counter to zero. Once 155 * localcount_drain() has started, we only maintain the total 156 * count in localcount_release(). 157 */ 158 static void 159 localcount_xc(void *cookie0, void *cookie1) 160 { 161 struct localcount *lc = cookie0; 162 kmutex_t *interlock = cookie1; 163 int64_t *localp; 164 int s; 165 166 mutex_enter(interlock); 167 localp = percpu_getref(lc->lc_percpu); 168 s = splhigh(); 169 *lc->lc_totalp += *localp; 170 *localp -= *localp; /* ie, *localp = 0; */ 171 splx(s); 172 percpu_putref(lc->lc_percpu); 173 mutex_exit(interlock); 174 } 175 176 /* 177 * localcount_adjust(lc, delta) 178 * 179 * Add delta -- positive or negative -- to the local CPU's count 180 * for lc. 181 */ 182 static void 183 localcount_adjust(struct localcount *lc, int delta) 184 { 185 int64_t *localp; 186 int s; 187 188 localp = percpu_getref(lc->lc_percpu); 189 s = splhigh(); 190 *localp += delta; 191 splx(s); 192 percpu_putref(lc->lc_percpu); 193 } 194 195 /* 196 * localcount_acquire(lc) 197 * 198 * Acquire a reference to lc. 199 * 200 * The reference may be held across sleeps and may be migrated 201 * from CPU to CPU, or even thread to thread -- it is only 202 * counted, not associated with a particular concrete owner. 203 * 204 * Involves no interprocessor synchronization. May be used in any 205 * context: while a lock is held, within a pserialize(9) read 206 * section, in hard interrupt context (provided other users block 207 * hard interrupts), in soft interrupt context, in thread context, 208 * &c. 209 * 210 * Caller must guarantee that there is no concurrent 211 * localcount_drain. For example, any object that may be found in 212 * a list and acquired must be removed from the list before 213 * localcount_drain. 214 */ 215 void 216 localcount_acquire(struct localcount *lc) 217 { 218 219 KASSERT(lc->lc_totalp == NULL); 220 localcount_adjust(lc, +1); 221 #if defined(DEBUG) && defined(LOCKDEBUG) 222 if (atomic_inc_32_nv(&lc->lc_refcnt) == 0) 223 panic("counter overflow"); 224 #endif 225 } 226 227 /* 228 * localcount_release(lc, cv, interlock) 229 * 230 * Release a reference to lc. If there is a concurrent 231 * localcount_drain and this may be the last reference, notify 232 * localcount_drain by acquiring interlock, waking cv, and 233 * releasing interlock. The cv and interlock passed here must be 234 * the same as are passed to localcount_drain for this lc. 235 * 236 * Involves no interprocessor synchronization unless there is a 237 * concurrent localcount_drain in progress. 238 */ 239 void 240 localcount_release(struct localcount *lc, kcondvar_t *cv, kmutex_t *interlock) 241 { 242 243 /* 244 * Block xcall so that if someone begins draining after we see 245 * lc->lc_totalp as null, then they won't start cv_wait until 246 * after they have counted this CPU's contributions. 247 * 248 * Otherwise, localcount_drain may notice an extant reference 249 * from this CPU and cv_wait for it, but having seen 250 * lc->lc_totalp as null, this CPU will not wake 251 * localcount_drain. 252 */ 253 kpreempt_disable(); 254 255 KDASSERT(mutex_ownable(interlock)); 256 if (__predict_false(lc->lc_totalp != NULL)) { 257 /* 258 * Slow path -- wake localcount_drain in case this is 259 * the last reference. 260 */ 261 mutex_enter(interlock); 262 if (--*lc->lc_totalp == 0) 263 cv_broadcast(cv); 264 mutex_exit(interlock); 265 goto out; 266 } 267 268 localcount_adjust(lc, -1); 269 #if defined(DEBUG) && defined(LOCKDEBUG) 270 if (atomic_dec_32_nv(&lc->lc_refcnt) == UINT_MAX) 271 panic("counter underflow"); 272 #endif 273 out: kpreempt_enable(); 274 } 275 276 /* 277 * localcount_debug_refcnt(lc) 278 * 279 * Return a total reference count of lc. It returns a correct value 280 * only if DEBUG and LOCKDEBUG enabled. Otherwise always return 0. 281 */ 282 uint32_t 283 localcount_debug_refcnt(const struct localcount *lc) 284 { 285 286 #if defined(DEBUG) && defined(LOCKDEBUG) 287 return lc->lc_refcnt; 288 #else 289 return 0; 290 #endif 291 } 292