1 /* $NetBSD: linux_srcu.c,v 1.4 2021/12/19 11:49:11 riastradh Exp $ */ 2 3 /*- 4 * Copyright (c) 2018 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Taylor R. Campbell. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 #include <sys/cdefs.h> 33 __KERNEL_RCSID(0, "$NetBSD: linux_srcu.c,v 1.4 2021/12/19 11:49:11 riastradh Exp $"); 34 35 /* 36 * SRCU: Sleepable RCU 37 * 38 * (This is not exactly SRCU as Linux implements it; it is my 39 * approximation of the semantics I think we need.) 40 * 41 * For each srcu context, representing a related set of read 42 * sections, on each CPU we store two counts of numbers of 43 * readers in two epochs: active readers and draining readers. 44 * 45 * All new srcu read sections get counted in the active epoch. 46 * When there's no synchronize_srcu in progress, the draining 47 * epoch has zero readers. When a thread calls synchronize_srcu, 48 * which must be serialized by the caller, it it swaps the sense 49 * of the epochs, issues an xcall to collect a global count of the 50 * number of readers in the now-draining epoch, and waits for the 51 * remainder to complete. 52 * 53 * This is basically NetBSD localcount(9), but without the 54 * restriction that the caller of localcount_drain must guarantee 55 * no new readers -- srcu uses two counts per CPU instead of one 56 * like localcount(9), and synchronize_srcu just waits for all 57 * existing readers to drain while new oness count toward a new 58 * epoch. 59 */ 60 61 #include <sys/types.h> 62 #include <sys/condvar.h> 63 #include <sys/mutex.h> 64 #include <sys/percpu.h> 65 #include <sys/proc.h> 66 #include <sys/systm.h> 67 #include <sys/xcall.h> 68 69 #include <linux/srcu.h> 70 71 struct srcu_cpu { 72 int64_t src_count[2]; 73 }; 74 75 /* 76 * _init_srcu_struct(srcu, name) 77 * 78 * Initialize the srcu state with the specified name. Caller must 79 * call srcu_fini when done. 80 * 81 * name should be no longer than 8 characters; longer will be 82 * truncated. 83 * 84 * May sleep. 85 */ 86 void 87 _init_srcu_struct(struct srcu_struct *srcu, const char *name) 88 { 89 90 ASSERT_SLEEPABLE(); 91 92 srcu->srcu_percpu = percpu_alloc(sizeof(struct srcu_cpu)); 93 mutex_init(&srcu->srcu_lock, MUTEX_DEFAULT, IPL_VM); 94 cv_init(&srcu->srcu_cv, name); 95 srcu->srcu_sync = NULL; 96 srcu->srcu_total = 0; 97 srcu->srcu_gen = 0; 98 } 99 100 /* 101 * cleanup_srcu_struct(srcu) 102 * 103 * Finalize an srcu state, which must not be in use right now. If 104 * any srcu read sections might be active, caller must wait for 105 * them to complete with synchronize_srcu. 106 * 107 * May sleep. 108 */ 109 void 110 cleanup_srcu_struct(struct srcu_struct *srcu) 111 { 112 113 ASSERT_SLEEPABLE(); 114 115 KASSERTMSG((srcu->srcu_sync == NULL), 116 "%s in lwp %p while synchronize_srcu running in lwp %p", 117 __func__, curlwp, srcu->srcu_sync); 118 cv_destroy(&srcu->srcu_cv); 119 mutex_destroy(&srcu->srcu_lock); 120 percpu_free(srcu->srcu_percpu, sizeof(struct srcu_cpu)); 121 } 122 123 /* 124 * srcu_adjust(srcu, gen, delta) 125 * 126 * Internal subroutine: Add delta to the local CPU's count of 127 * readers in the generation gen. 128 * 129 * Never sleeps. 130 */ 131 static void 132 srcu_adjust(struct srcu_struct *srcu, unsigned gen, int delta) 133 { 134 struct srcu_cpu *cpu; 135 unsigned epoch = gen & 1; /* active epoch */ 136 137 cpu = percpu_getref(srcu->srcu_percpu); 138 cpu->src_count[epoch] += delta; 139 percpu_putref(srcu->srcu_percpu); 140 } 141 142 /* 143 * srcu_read_lock(srcu) 144 * 145 * Enter an srcu read section and return a ticket for it. Any 146 * subsequent synchronize_srcu will wait until this thread calls 147 * srcu_read_unlock(srcu, ticket). 148 * 149 * Never sleeps. 150 */ 151 int 152 srcu_read_lock(struct srcu_struct *srcu) 153 { 154 unsigned gen; 155 156 /* 157 * Prevent xcall while we fetch the generation and adjust the 158 * count. 159 */ 160 kpreempt_disable(); 161 gen = srcu->srcu_gen; 162 srcu_adjust(srcu, gen, +1); 163 kpreempt_enable(); 164 165 /* 166 * No stronger, inter-CPU memory barrier is needed: if there is 167 * a concurrent synchronize_srcu, it will issue an xcall that 168 * functions as a stronger memory barrier. 169 */ 170 171 return gen; 172 } 173 174 /* 175 * srcu_read_unlock(srcu, ticket) 176 * 177 * Exit an srcu read section started with srcu_read_lock returning 178 * ticket. If there is a pending synchronize_srcu and we might be 179 * the last reader, notify it. 180 * 181 * Never sleeps. 182 */ 183 void 184 srcu_read_unlock(struct srcu_struct *srcu, int ticket) 185 { 186 unsigned gen = ticket; 187 188 /* 189 * All side effects have completed on this CPU before we 190 * disable kpreemption. 191 * 192 * No stronger, inter-CPU memory barrier is needed: if there is 193 * a concurrent synchronize_srcu, it will issue an xcall that 194 * functions as a stronger memory barrier. 195 */ 196 197 /* 198 * Prevent xcall while we determine whether we need to notify a 199 * sync and decrement the count in our generation. 200 */ 201 kpreempt_disable(); 202 if (__predict_true(gen == srcu->srcu_gen)) { 203 /* 204 * Fast path: just decrement the local count. If a 205 * sync has begun and incremented gen after we observed 206 * it, it will issue an xcall that will run after this 207 * kpreempt_disable section to collect our local count. 208 */ 209 srcu_adjust(srcu, gen, -1); 210 } else { 211 /* 212 * Slow path: decrement the total count, and if it goes 213 * to zero, notify the sync in progress. The xcall may 214 * have already run, or it may have yet to run; since 215 * we can't tell which, we must contribute to the 216 * global count, not to our local count. 217 */ 218 mutex_spin_enter(&srcu->srcu_lock); 219 KASSERT(srcu->srcu_sync != NULL); 220 if (--srcu->srcu_total == 0) 221 cv_broadcast(&srcu->srcu_cv); 222 mutex_spin_exit(&srcu->srcu_lock); 223 } 224 kpreempt_enable(); 225 } 226 227 /* 228 * synchronize_srcu_xc(a, b) 229 * 230 * Cross-call function for synchronize_srcu: a is the struct srcu_struct 231 * pointer; b is ignored. Transfer the local count of srcu 232 * readers on this CPU in the inactive epoch to the global count 233 * under the srcu sync lock. 234 */ 235 static void 236 synchronize_srcu_xc(void *a, void *b) 237 { 238 struct srcu_struct *srcu = a; 239 struct srcu_cpu *cpu; 240 unsigned gen, epoch; 241 uint64_t local; 242 243 /* Operate under the sync lock. Blocks preemption as side effect. */ 244 mutex_spin_enter(&srcu->srcu_lock); 245 246 gen = srcu->srcu_gen; /* active generation */ 247 epoch = 1 ^ (gen & 1); /* draining epoch */ 248 249 /* Transfer the local count to the global count. */ 250 cpu = percpu_getref(srcu->srcu_percpu); 251 local = cpu->src_count[epoch]; 252 srcu->srcu_total += local; 253 cpu->src_count[epoch] -= local; /* i.e., cpu->src_count[epoch] = 0 */ 254 KASSERT(cpu->src_count[epoch] == 0); 255 percpu_putref(srcu->srcu_percpu); 256 257 mutex_spin_exit(&srcu->srcu_lock); 258 } 259 260 /* 261 * synchronize_srcu(srcu) 262 * 263 * Wait for all srcu readers on all CPUs that may have begun 264 * before sychronize_srcu to complete. 265 * 266 * May sleep. (Practically guaranteed to sleep!) 267 */ 268 void 269 synchronize_srcu(struct srcu_struct *srcu) 270 { 271 272 ASSERT_SLEEPABLE(); 273 274 /* Start a sync, and advance the active generation. */ 275 mutex_spin_enter(&srcu->srcu_lock); 276 while (srcu->srcu_sync != NULL) 277 cv_wait(&srcu->srcu_cv, &srcu->srcu_lock); 278 KASSERT(srcu->srcu_total == 0); 279 srcu->srcu_sync = curlwp; 280 srcu->srcu_gen++; 281 mutex_spin_exit(&srcu->srcu_lock); 282 283 /* 284 * Wait for all CPUs to witness the change to the active 285 * generation, and collect their local counts in the draining 286 * epoch into the global count. 287 */ 288 xc_wait(xc_broadcast(0, synchronize_srcu_xc, srcu, NULL)); 289 290 /* 291 * Wait for the global count of users in the draining epoch to 292 * drain to zero. 293 */ 294 mutex_spin_enter(&srcu->srcu_lock); 295 while (srcu->srcu_total != 0) 296 cv_wait(&srcu->srcu_cv, &srcu->srcu_lock); 297 srcu->srcu_sync = NULL; 298 cv_broadcast(&srcu->srcu_cv); 299 mutex_spin_exit(&srcu->srcu_lock); 300 } 301 302 /* 303 * synchronize_srcu_expedited(srcu) 304 * 305 * Wait for all srcu readers on all CPUs that may have begun 306 * before sychronize_srcu to complete. Try to get an answer 307 * faster than synchronize_srcu, at the cost of more activity 308 * triggered on other CPUs. 309 * 310 * May sleep. (Practically guaranteed to sleep!) 311 */ 312 void 313 synchronize_srcu_expedited(struct srcu_struct *srcu) 314 { 315 316 synchronize_srcu(srcu); 317 } 318