xref: /netbsd-src/sys/external/bsd/common/linux/linux_srcu.c (revision 9e5fbd4f7f25d0a357b482f4591196ee62d90ed5)
1 /*	$NetBSD: linux_srcu.c,v 1.4 2021/12/19 11:49:11 riastradh Exp $	*/
2 
3 /*-
4  * Copyright (c) 2018 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Taylor R. Campbell.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 #include <sys/cdefs.h>
33 __KERNEL_RCSID(0, "$NetBSD: linux_srcu.c,v 1.4 2021/12/19 11:49:11 riastradh Exp $");
34 
35 /*
36  * SRCU: Sleepable RCU
37  *
38  *	(This is not exactly SRCU as Linux implements it; it is my
39  *	approximation of the semantics I think we need.)
40  *
41  *	For each srcu context, representing a related set of read
42  *	sections, on each CPU we store two counts of numbers of
43  *	readers in two epochs: active readers and draining readers.
44  *
45  *	All new srcu read sections get counted in the active epoch.
46  *	When there's no synchronize_srcu in progress, the draining
47  *	epoch has zero readers.  When a thread calls synchronize_srcu,
48  *	which must be serialized by the caller, it it swaps the sense
49  *	of the epochs, issues an xcall to collect a global count of the
50  *	number of readers in the now-draining epoch, and waits for the
51  *	remainder to complete.
52  *
53  *	This is basically NetBSD localcount(9), but without the
54  *	restriction that the caller of localcount_drain must guarantee
55  *	no new readers -- srcu uses two counts per CPU instead of one
56  *	like localcount(9), and synchronize_srcu just waits for all
57  *	existing readers to drain while new oness count toward a new
58  *	epoch.
59  */
60 
61 #include <sys/types.h>
62 #include <sys/condvar.h>
63 #include <sys/mutex.h>
64 #include <sys/percpu.h>
65 #include <sys/proc.h>
66 #include <sys/systm.h>
67 #include <sys/xcall.h>
68 
69 #include <linux/srcu.h>
70 
71 struct srcu_cpu {
72 	int64_t	src_count[2];
73 };
74 
75 /*
76  * _init_srcu_struct(srcu, name)
77  *
78  *	Initialize the srcu state with the specified name.  Caller must
79  *	call srcu_fini when done.
80  *
81  *	name should be no longer than 8 characters; longer will be
82  *	truncated.
83  *
84  *	May sleep.
85  */
86 void
_init_srcu_struct(struct srcu_struct * srcu,const char * name)87 _init_srcu_struct(struct srcu_struct *srcu, const char *name)
88 {
89 
90 	ASSERT_SLEEPABLE();
91 
92 	srcu->srcu_percpu = percpu_alloc(sizeof(struct srcu_cpu));
93 	mutex_init(&srcu->srcu_lock, MUTEX_DEFAULT, IPL_VM);
94 	cv_init(&srcu->srcu_cv, name);
95 	srcu->srcu_sync = NULL;
96 	srcu->srcu_total = 0;
97 	srcu->srcu_gen = 0;
98 }
99 
100 /*
101  * cleanup_srcu_struct(srcu)
102  *
103  *	Finalize an srcu state, which must not be in use right now.  If
104  *	any srcu read sections might be active, caller must wait for
105  *	them to complete with synchronize_srcu.
106  *
107  *	May sleep.
108  */
109 void
cleanup_srcu_struct(struct srcu_struct * srcu)110 cleanup_srcu_struct(struct srcu_struct *srcu)
111 {
112 
113 	ASSERT_SLEEPABLE();
114 
115 	KASSERTMSG((srcu->srcu_sync == NULL),
116 	    "%s in lwp %p while synchronize_srcu running in lwp %p",
117 	    __func__, curlwp, srcu->srcu_sync);
118 	cv_destroy(&srcu->srcu_cv);
119 	mutex_destroy(&srcu->srcu_lock);
120 	percpu_free(srcu->srcu_percpu, sizeof(struct srcu_cpu));
121 }
122 
123 /*
124  * srcu_adjust(srcu, gen, delta)
125  *
126  *	Internal subroutine: Add delta to the local CPU's count of
127  *	readers in the generation gen.
128  *
129  *	Never sleeps.
130  */
131 static void
srcu_adjust(struct srcu_struct * srcu,unsigned gen,int delta)132 srcu_adjust(struct srcu_struct *srcu, unsigned gen, int delta)
133 {
134 	struct srcu_cpu *cpu;
135 	unsigned epoch = gen & 1; /* active epoch */
136 
137 	cpu = percpu_getref(srcu->srcu_percpu);
138 	cpu->src_count[epoch] += delta;
139 	percpu_putref(srcu->srcu_percpu);
140 }
141 
142 /*
143  * srcu_read_lock(srcu)
144  *
145  *	Enter an srcu read section and return a ticket for it.  Any
146  *	subsequent synchronize_srcu will wait until this thread calls
147  *	srcu_read_unlock(srcu, ticket).
148  *
149  *	Never sleeps.
150  */
151 int
srcu_read_lock(struct srcu_struct * srcu)152 srcu_read_lock(struct srcu_struct *srcu)
153 {
154 	unsigned gen;
155 
156 	/*
157 	 * Prevent xcall while we fetch the generation and adjust the
158 	 * count.
159 	 */
160 	kpreempt_disable();
161 	gen = srcu->srcu_gen;
162 	srcu_adjust(srcu, gen, +1);
163 	kpreempt_enable();
164 
165 	/*
166 	 * No stronger, inter-CPU memory barrier is needed: if there is
167 	 * a concurrent synchronize_srcu, it will issue an xcall that
168 	 * functions as a stronger memory barrier.
169 	 */
170 
171 	return gen;
172 }
173 
174 /*
175  * srcu_read_unlock(srcu, ticket)
176  *
177  *	Exit an srcu read section started with srcu_read_lock returning
178  *	ticket.  If there is a pending synchronize_srcu and we might be
179  *	the last reader, notify it.
180  *
181  *	Never sleeps.
182  */
183 void
srcu_read_unlock(struct srcu_struct * srcu,int ticket)184 srcu_read_unlock(struct srcu_struct *srcu, int ticket)
185 {
186 	unsigned gen = ticket;
187 
188 	/*
189 	 * All side effects have completed on this CPU before we
190 	 * disable kpreemption.
191 	 *
192 	 * No stronger, inter-CPU memory barrier is needed: if there is
193 	 * a concurrent synchronize_srcu, it will issue an xcall that
194 	 * functions as a stronger memory barrier.
195 	 */
196 
197 	/*
198 	 * Prevent xcall while we determine whether we need to notify a
199 	 * sync and decrement the count in our generation.
200 	 */
201 	kpreempt_disable();
202 	if (__predict_true(gen == srcu->srcu_gen)) {
203 		/*
204 		 * Fast path: just decrement the local count.  If a
205 		 * sync has begun and incremented gen after we observed
206 		 * it, it will issue an xcall that will run after this
207 		 * kpreempt_disable section to collect our local count.
208 		 */
209 		srcu_adjust(srcu, gen, -1);
210 	} else {
211 		/*
212 		 * Slow path: decrement the total count, and if it goes
213 		 * to zero, notify the sync in progress.  The xcall may
214 		 * have already run, or it may have yet to run; since
215 		 * we can't tell which, we must contribute to the
216 		 * global count, not to our local count.
217 		 */
218 		mutex_spin_enter(&srcu->srcu_lock);
219 		KASSERT(srcu->srcu_sync != NULL);
220 		if (--srcu->srcu_total == 0)
221 			cv_broadcast(&srcu->srcu_cv);
222 		mutex_spin_exit(&srcu->srcu_lock);
223 	}
224 	kpreempt_enable();
225 }
226 
227 /*
228  * synchronize_srcu_xc(a, b)
229  *
230  *	Cross-call function for synchronize_srcu: a is the struct srcu_struct
231  *	pointer; b is ignored.  Transfer the local count of srcu
232  *	readers on this CPU in the inactive epoch to the global count
233  *	under the srcu sync lock.
234  */
235 static void
synchronize_srcu_xc(void * a,void * b)236 synchronize_srcu_xc(void *a, void *b)
237 {
238 	struct srcu_struct *srcu = a;
239 	struct srcu_cpu *cpu;
240 	unsigned gen, epoch;
241 	uint64_t local;
242 
243 	/* Operate under the sync lock.  Blocks preemption as side effect.  */
244 	mutex_spin_enter(&srcu->srcu_lock);
245 
246 	gen = srcu->srcu_gen;	/* active generation */
247 	epoch = 1 ^ (gen & 1);	/* draining epoch */
248 
249 	/* Transfer the local count to the global count.  */
250 	cpu = percpu_getref(srcu->srcu_percpu);
251 	local = cpu->src_count[epoch];
252 	srcu->srcu_total += local;
253 	cpu->src_count[epoch] -= local; /* i.e., cpu->src_count[epoch] = 0 */
254 	KASSERT(cpu->src_count[epoch] == 0);
255 	percpu_putref(srcu->srcu_percpu);
256 
257 	mutex_spin_exit(&srcu->srcu_lock);
258 }
259 
260 /*
261  * synchronize_srcu(srcu)
262  *
263  *	Wait for all srcu readers on all CPUs that may have begun
264  *	before sychronize_srcu to complete.
265  *
266  *	May sleep.  (Practically guaranteed to sleep!)
267  */
268 void
synchronize_srcu(struct srcu_struct * srcu)269 synchronize_srcu(struct srcu_struct *srcu)
270 {
271 
272 	ASSERT_SLEEPABLE();
273 
274 	/* Start a sync, and advance the active generation.  */
275 	mutex_spin_enter(&srcu->srcu_lock);
276 	while (srcu->srcu_sync != NULL)
277 		cv_wait(&srcu->srcu_cv, &srcu->srcu_lock);
278 	KASSERT(srcu->srcu_total == 0);
279 	srcu->srcu_sync = curlwp;
280 	srcu->srcu_gen++;
281 	mutex_spin_exit(&srcu->srcu_lock);
282 
283 	/*
284 	 * Wait for all CPUs to witness the change to the active
285 	 * generation, and collect their local counts in the draining
286 	 * epoch into the global count.
287 	 */
288 	xc_wait(xc_broadcast(0, synchronize_srcu_xc, srcu, NULL));
289 
290 	/*
291 	 * Wait for the global count of users in the draining epoch to
292 	 * drain to zero.
293 	 */
294 	mutex_spin_enter(&srcu->srcu_lock);
295 	while (srcu->srcu_total != 0)
296 		cv_wait(&srcu->srcu_cv, &srcu->srcu_lock);
297 	srcu->srcu_sync = NULL;
298 	cv_broadcast(&srcu->srcu_cv);
299 	mutex_spin_exit(&srcu->srcu_lock);
300 }
301 
302 /*
303  * synchronize_srcu_expedited(srcu)
304  *
305  *	Wait for all srcu readers on all CPUs that may have begun
306  *	before sychronize_srcu to complete.  Try to get an answer
307  *	faster than synchronize_srcu, at the cost of more activity
308  *	triggered on other CPUs.
309  *
310  *	May sleep.  (Practically guaranteed to sleep!)
311  */
312 void
synchronize_srcu_expedited(struct srcu_struct * srcu)313 synchronize_srcu_expedited(struct srcu_struct *srcu)
314 {
315 
316 	synchronize_srcu(srcu);
317 }
318