1 /* $NetBSD: linux_srcu.c,v 1.4 2021/12/19 11:49:11 riastradh Exp $ */
2
3 /*-
4 * Copyright (c) 2018 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Taylor R. Campbell.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 #include <sys/cdefs.h>
33 __KERNEL_RCSID(0, "$NetBSD: linux_srcu.c,v 1.4 2021/12/19 11:49:11 riastradh Exp $");
34
35 /*
36 * SRCU: Sleepable RCU
37 *
38 * (This is not exactly SRCU as Linux implements it; it is my
39 * approximation of the semantics I think we need.)
40 *
41 * For each srcu context, representing a related set of read
42 * sections, on each CPU we store two counts of numbers of
43 * readers in two epochs: active readers and draining readers.
44 *
45 * All new srcu read sections get counted in the active epoch.
46 * When there's no synchronize_srcu in progress, the draining
47 * epoch has zero readers. When a thread calls synchronize_srcu,
48 * which must be serialized by the caller, it it swaps the sense
49 * of the epochs, issues an xcall to collect a global count of the
50 * number of readers in the now-draining epoch, and waits for the
51 * remainder to complete.
52 *
53 * This is basically NetBSD localcount(9), but without the
54 * restriction that the caller of localcount_drain must guarantee
55 * no new readers -- srcu uses two counts per CPU instead of one
56 * like localcount(9), and synchronize_srcu just waits for all
57 * existing readers to drain while new oness count toward a new
58 * epoch.
59 */
60
61 #include <sys/types.h>
62 #include <sys/condvar.h>
63 #include <sys/mutex.h>
64 #include <sys/percpu.h>
65 #include <sys/proc.h>
66 #include <sys/systm.h>
67 #include <sys/xcall.h>
68
69 #include <linux/srcu.h>
70
71 struct srcu_cpu {
72 int64_t src_count[2];
73 };
74
75 /*
76 * _init_srcu_struct(srcu, name)
77 *
78 * Initialize the srcu state with the specified name. Caller must
79 * call srcu_fini when done.
80 *
81 * name should be no longer than 8 characters; longer will be
82 * truncated.
83 *
84 * May sleep.
85 */
86 void
_init_srcu_struct(struct srcu_struct * srcu,const char * name)87 _init_srcu_struct(struct srcu_struct *srcu, const char *name)
88 {
89
90 ASSERT_SLEEPABLE();
91
92 srcu->srcu_percpu = percpu_alloc(sizeof(struct srcu_cpu));
93 mutex_init(&srcu->srcu_lock, MUTEX_DEFAULT, IPL_VM);
94 cv_init(&srcu->srcu_cv, name);
95 srcu->srcu_sync = NULL;
96 srcu->srcu_total = 0;
97 srcu->srcu_gen = 0;
98 }
99
100 /*
101 * cleanup_srcu_struct(srcu)
102 *
103 * Finalize an srcu state, which must not be in use right now. If
104 * any srcu read sections might be active, caller must wait for
105 * them to complete with synchronize_srcu.
106 *
107 * May sleep.
108 */
109 void
cleanup_srcu_struct(struct srcu_struct * srcu)110 cleanup_srcu_struct(struct srcu_struct *srcu)
111 {
112
113 ASSERT_SLEEPABLE();
114
115 KASSERTMSG((srcu->srcu_sync == NULL),
116 "%s in lwp %p while synchronize_srcu running in lwp %p",
117 __func__, curlwp, srcu->srcu_sync);
118 cv_destroy(&srcu->srcu_cv);
119 mutex_destroy(&srcu->srcu_lock);
120 percpu_free(srcu->srcu_percpu, sizeof(struct srcu_cpu));
121 }
122
123 /*
124 * srcu_adjust(srcu, gen, delta)
125 *
126 * Internal subroutine: Add delta to the local CPU's count of
127 * readers in the generation gen.
128 *
129 * Never sleeps.
130 */
131 static void
srcu_adjust(struct srcu_struct * srcu,unsigned gen,int delta)132 srcu_adjust(struct srcu_struct *srcu, unsigned gen, int delta)
133 {
134 struct srcu_cpu *cpu;
135 unsigned epoch = gen & 1; /* active epoch */
136
137 cpu = percpu_getref(srcu->srcu_percpu);
138 cpu->src_count[epoch] += delta;
139 percpu_putref(srcu->srcu_percpu);
140 }
141
142 /*
143 * srcu_read_lock(srcu)
144 *
145 * Enter an srcu read section and return a ticket for it. Any
146 * subsequent synchronize_srcu will wait until this thread calls
147 * srcu_read_unlock(srcu, ticket).
148 *
149 * Never sleeps.
150 */
151 int
srcu_read_lock(struct srcu_struct * srcu)152 srcu_read_lock(struct srcu_struct *srcu)
153 {
154 unsigned gen;
155
156 /*
157 * Prevent xcall while we fetch the generation and adjust the
158 * count.
159 */
160 kpreempt_disable();
161 gen = srcu->srcu_gen;
162 srcu_adjust(srcu, gen, +1);
163 kpreempt_enable();
164
165 /*
166 * No stronger, inter-CPU memory barrier is needed: if there is
167 * a concurrent synchronize_srcu, it will issue an xcall that
168 * functions as a stronger memory barrier.
169 */
170
171 return gen;
172 }
173
174 /*
175 * srcu_read_unlock(srcu, ticket)
176 *
177 * Exit an srcu read section started with srcu_read_lock returning
178 * ticket. If there is a pending synchronize_srcu and we might be
179 * the last reader, notify it.
180 *
181 * Never sleeps.
182 */
183 void
srcu_read_unlock(struct srcu_struct * srcu,int ticket)184 srcu_read_unlock(struct srcu_struct *srcu, int ticket)
185 {
186 unsigned gen = ticket;
187
188 /*
189 * All side effects have completed on this CPU before we
190 * disable kpreemption.
191 *
192 * No stronger, inter-CPU memory barrier is needed: if there is
193 * a concurrent synchronize_srcu, it will issue an xcall that
194 * functions as a stronger memory barrier.
195 */
196
197 /*
198 * Prevent xcall while we determine whether we need to notify a
199 * sync and decrement the count in our generation.
200 */
201 kpreempt_disable();
202 if (__predict_true(gen == srcu->srcu_gen)) {
203 /*
204 * Fast path: just decrement the local count. If a
205 * sync has begun and incremented gen after we observed
206 * it, it will issue an xcall that will run after this
207 * kpreempt_disable section to collect our local count.
208 */
209 srcu_adjust(srcu, gen, -1);
210 } else {
211 /*
212 * Slow path: decrement the total count, and if it goes
213 * to zero, notify the sync in progress. The xcall may
214 * have already run, or it may have yet to run; since
215 * we can't tell which, we must contribute to the
216 * global count, not to our local count.
217 */
218 mutex_spin_enter(&srcu->srcu_lock);
219 KASSERT(srcu->srcu_sync != NULL);
220 if (--srcu->srcu_total == 0)
221 cv_broadcast(&srcu->srcu_cv);
222 mutex_spin_exit(&srcu->srcu_lock);
223 }
224 kpreempt_enable();
225 }
226
227 /*
228 * synchronize_srcu_xc(a, b)
229 *
230 * Cross-call function for synchronize_srcu: a is the struct srcu_struct
231 * pointer; b is ignored. Transfer the local count of srcu
232 * readers on this CPU in the inactive epoch to the global count
233 * under the srcu sync lock.
234 */
235 static void
synchronize_srcu_xc(void * a,void * b)236 synchronize_srcu_xc(void *a, void *b)
237 {
238 struct srcu_struct *srcu = a;
239 struct srcu_cpu *cpu;
240 unsigned gen, epoch;
241 uint64_t local;
242
243 /* Operate under the sync lock. Blocks preemption as side effect. */
244 mutex_spin_enter(&srcu->srcu_lock);
245
246 gen = srcu->srcu_gen; /* active generation */
247 epoch = 1 ^ (gen & 1); /* draining epoch */
248
249 /* Transfer the local count to the global count. */
250 cpu = percpu_getref(srcu->srcu_percpu);
251 local = cpu->src_count[epoch];
252 srcu->srcu_total += local;
253 cpu->src_count[epoch] -= local; /* i.e., cpu->src_count[epoch] = 0 */
254 KASSERT(cpu->src_count[epoch] == 0);
255 percpu_putref(srcu->srcu_percpu);
256
257 mutex_spin_exit(&srcu->srcu_lock);
258 }
259
260 /*
261 * synchronize_srcu(srcu)
262 *
263 * Wait for all srcu readers on all CPUs that may have begun
264 * before sychronize_srcu to complete.
265 *
266 * May sleep. (Practically guaranteed to sleep!)
267 */
268 void
synchronize_srcu(struct srcu_struct * srcu)269 synchronize_srcu(struct srcu_struct *srcu)
270 {
271
272 ASSERT_SLEEPABLE();
273
274 /* Start a sync, and advance the active generation. */
275 mutex_spin_enter(&srcu->srcu_lock);
276 while (srcu->srcu_sync != NULL)
277 cv_wait(&srcu->srcu_cv, &srcu->srcu_lock);
278 KASSERT(srcu->srcu_total == 0);
279 srcu->srcu_sync = curlwp;
280 srcu->srcu_gen++;
281 mutex_spin_exit(&srcu->srcu_lock);
282
283 /*
284 * Wait for all CPUs to witness the change to the active
285 * generation, and collect their local counts in the draining
286 * epoch into the global count.
287 */
288 xc_wait(xc_broadcast(0, synchronize_srcu_xc, srcu, NULL));
289
290 /*
291 * Wait for the global count of users in the draining epoch to
292 * drain to zero.
293 */
294 mutex_spin_enter(&srcu->srcu_lock);
295 while (srcu->srcu_total != 0)
296 cv_wait(&srcu->srcu_cv, &srcu->srcu_lock);
297 srcu->srcu_sync = NULL;
298 cv_broadcast(&srcu->srcu_cv);
299 mutex_spin_exit(&srcu->srcu_lock);
300 }
301
302 /*
303 * synchronize_srcu_expedited(srcu)
304 *
305 * Wait for all srcu readers on all CPUs that may have begun
306 * before sychronize_srcu to complete. Try to get an answer
307 * faster than synchronize_srcu, at the cost of more activity
308 * triggered on other CPUs.
309 *
310 * May sleep. (Practically guaranteed to sleep!)
311 */
312 void
synchronize_srcu_expedited(struct srcu_struct * srcu)313 synchronize_srcu_expedited(struct srcu_struct *srcu)
314 {
315
316 synchronize_srcu(srcu);
317 }
318