xref: /netbsd-src/sys/external/bsd/common/linux/linux_rcu.c (revision 61da5b323173d5b9f57cc42cc91e6ef9fbc88213)
1 /*	$NetBSD: linux_rcu.c,v 1.7 2021/12/19 12:40:11 riastradh Exp $	*/
2 
3 /*-
4  * Copyright (c) 2018 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Taylor R. Campbell.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 #include <sys/cdefs.h>
33 __KERNEL_RCSID(0, "$NetBSD: linux_rcu.c,v 1.7 2021/12/19 12:40:11 riastradh Exp $");
34 
35 #include <sys/param.h>
36 #include <sys/types.h>
37 
38 #include <sys/condvar.h>
39 #include <sys/cpu.h>
40 #include <sys/kthread.h>
41 #include <sys/lockdebug.h>
42 #include <sys/mutex.h>
43 #include <sys/sdt.h>
44 #include <sys/xcall.h>
45 
46 #include <linux/rcupdate.h>
47 #include <linux/slab.h>
48 
49 SDT_PROBE_DEFINE0(sdt, linux, rcu, synchronize__start);
50 SDT_PROBE_DEFINE1(sdt, linux, rcu, synchronize__cpu, "unsigned"/*cpu*/);
51 SDT_PROBE_DEFINE0(sdt, linux, rcu, synchronize__done);
52 SDT_PROBE_DEFINE0(sdt, linux, rcu, barrier__start);
53 SDT_PROBE_DEFINE0(sdt, linux, rcu, barrier__done);
54 SDT_PROBE_DEFINE2(sdt, linux, rcu, call__queue,
55     "struct rcu_head *"/*head*/, "void (*)(struct rcu_head *)"/*callback*/);
56 SDT_PROBE_DEFINE2(sdt, linux, rcu, call__run,
57     "struct rcu_head *"/*head*/, "void (*)(struct rcu_head *)"/*callback*/);
58 SDT_PROBE_DEFINE2(sdt, linux, rcu, call__done,
59     "struct rcu_head *"/*head*/, "void (*)(struct rcu_head *)"/*callback*/);
60 SDT_PROBE_DEFINE2(sdt, linux, rcu, kfree__queue,
61     "struct rcu_head *"/*head*/, "void *"/*obj*/);
62 SDT_PROBE_DEFINE2(sdt, linux, rcu, kfree__free,
63     "struct rcu_head *"/*head*/, "void *"/*obj*/);
64 SDT_PROBE_DEFINE2(sdt, linux, rcu, kfree__done,
65     "struct rcu_head *"/*head*/, "void *"/*obj*/);
66 
67 static struct {
68 	kmutex_t	lock;
69 	kcondvar_t	cv;
70 	struct rcu_head	*first_callback;
71 	struct rcu_head	*first_kfree;
72 	struct lwp	*lwp;
73 	uint64_t	gen;
74 	bool		running;
75 	bool		dying;
76 } gc __cacheline_aligned;
77 
78 static void
synchronize_rcu_xc(void * a,void * b)79 synchronize_rcu_xc(void *a, void *b)
80 {
81 
82 	SDT_PROBE1(sdt, linux, rcu, synchronize__cpu,  cpu_index(curcpu()));
83 }
84 
85 /*
86  * synchronize_rcu()
87  *
88  *	Wait for any pending RCU read section on every CPU to complete
89  *	by triggering on every CPU activity that is blocked by an RCU
90  *	read section.
91  *
92  *	May sleep.  (Practically guaranteed to sleep!)
93  */
94 void
synchronize_rcu(void)95 synchronize_rcu(void)
96 {
97 
98 	SDT_PROBE0(sdt, linux, rcu, synchronize__start);
99 	xc_wait(xc_broadcast(0, &synchronize_rcu_xc, NULL, NULL));
100 	SDT_PROBE0(sdt, linux, rcu, synchronize__done);
101 }
102 
103 /*
104  * synchronize_rcu_expedited()
105  *
106  *	Wait for any pending RCU read section on every CPU to complete
107  *	by triggering on every CPU activity that is blocked by an RCU
108  *	read section.  Try to get an answer faster than
109  *	synchronize_rcu, at the cost of more activity triggered on
110  *	other CPUs.
111  *
112  *	May sleep.  (Practically guaranteed to sleep!)
113  */
114 void
synchronize_rcu_expedited(void)115 synchronize_rcu_expedited(void)
116 {
117 
118 	synchronize_rcu();
119 }
120 
121 /*
122  * cookie = get_state_synchronize_rcu(), cond_synchronize_rcu(cookie)
123  *
124  *	Optimization for synchronize_rcu -- skip if it has already
125  *	happened between get_state_synchronize_rcu and
126  *	cond_synchronize_rcu.  get_state_synchronize_rcu implies a full
127  *	SMP memory barrier (membar_sync).
128  */
129 unsigned long
get_state_synchronize_rcu(void)130 get_state_synchronize_rcu(void)
131 {
132 
133 	membar_sync();
134 	return 0;
135 }
136 
137 void
cond_synchronize_rcu(unsigned long cookie)138 cond_synchronize_rcu(unsigned long cookie)
139 {
140 
141 	synchronize_rcu();
142 }
143 
144 /*
145  * rcu_barrier()
146  *
147  *	Wait for all pending RCU callbacks to complete.
148  *
149  *	Does not imply, and is not implied by, synchronize_rcu.
150  */
151 void
rcu_barrier(void)152 rcu_barrier(void)
153 {
154 	uint64_t gen;
155 
156 	/*
157 	 * If the GC isn't running anything yet, then all callbacks of
158 	 * interest are queued, and it suffices to wait for the GC to
159 	 * advance one generation number.
160 	 *
161 	 * If the GC is already running, however, and there are any
162 	 * callbacks of interest queued but not in the GC's current
163 	 * batch of work, then when the advances the generation number
164 	 * it will not have completed the queued callbacks.  So we have
165 	 * to wait for one more generation -- or until the GC has
166 	 * stopped running because there's no work left.
167 	 */
168 
169 	SDT_PROBE0(sdt, linux, rcu, barrier__start);
170 	mutex_enter(&gc.lock);
171 	gen = gc.gen;
172 	if (gc.running)
173 		gen++;
174 	while (gc.running || gc.first_callback || gc.first_kfree) {
175 		cv_wait(&gc.cv, &gc.lock);
176 		if (gc.gen > gen)
177 			break;
178 	}
179 	mutex_exit(&gc.lock);
180 	SDT_PROBE0(sdt, linux, rcu, barrier__done);
181 }
182 
183 /*
184  * call_rcu(head, callback)
185  *
186  *	Arrange to call callback(head) after any pending RCU read
187  *	sections on every CPU is complete.  Return immediately.
188  */
189 void
call_rcu(struct rcu_head * head,void (* callback)(struct rcu_head *))190 call_rcu(struct rcu_head *head, void (*callback)(struct rcu_head *))
191 {
192 
193 	head->rcuh_u.callback = callback;
194 
195 	mutex_enter(&gc.lock);
196 	head->rcuh_next = gc.first_callback;
197 	gc.first_callback = head;
198 	cv_broadcast(&gc.cv);
199 	SDT_PROBE2(sdt, linux, rcu, call__queue,  head, callback);
200 	mutex_exit(&gc.lock);
201 }
202 
203 /*
204  * _kfree_rcu(head, obj)
205  *
206  *	kfree_rcu helper: schedule kfree(obj) using head for storage.
207  */
208 void
_kfree_rcu(struct rcu_head * head,void * obj)209 _kfree_rcu(struct rcu_head *head, void *obj)
210 {
211 
212 	LOCKDEBUG_MEM_CHECK(obj, ((struct linux_malloc *)obj - 1)->lm_size);
213 
214 	head->rcuh_u.obj = obj;
215 
216 	mutex_enter(&gc.lock);
217 	head->rcuh_next = gc.first_kfree;
218 	gc.first_kfree = head;
219 	cv_broadcast(&gc.cv);
220 	SDT_PROBE2(sdt, linux, rcu, kfree__queue,  head, obj);
221 	mutex_exit(&gc.lock);
222 }
223 
224 static void
gc_thread(void * cookie)225 gc_thread(void *cookie)
226 {
227 	struct rcu_head *head_callback, *head_kfree, *head, *next;
228 
229 	mutex_enter(&gc.lock);
230 	for (;;) {
231 		/* Start with no work.  */
232 		bool work = false;
233 
234 		/* Grab the list of callbacks.  */
235 		if ((head_callback = gc.first_callback) != NULL) {
236 			gc.first_callback = NULL;
237 			work = true;
238 		}
239 
240 		/* Grab the list of objects to kfree.  */
241 		if ((head_kfree = gc.first_kfree) != NULL) {
242 			gc.first_kfree = NULL;
243 			work = true;
244 		}
245 
246 		/*
247 		 * If no work, then either stop, if we're dying, or
248 		 * wait for work, if not.
249 		 */
250 		if (!work) {
251 			if (gc.dying)
252 				break;
253 			cv_wait(&gc.cv, &gc.lock);
254 			continue;
255 		}
256 
257 		/*
258 		 * We have work to do.  Drop the lock to do it, and
259 		 * notify rcu_barrier that we're still doing it.
260 		 */
261 		gc.running = true;
262 		mutex_exit(&gc.lock);
263 
264 		/* Wait for activity on all CPUs.  */
265 		synchronize_rcu();
266 
267 		/* Call the callbacks.  */
268 		for (head = head_callback; head != NULL; head = next) {
269 			void (*callback)(struct rcu_head *) =
270 			    head->rcuh_u.callback;
271 			next = head->rcuh_next;
272 			SDT_PROBE2(sdt, linux, rcu, call__run,
273 			    head, callback);
274 			(*callback)(head);
275 			/*
276 			 * Can't dereference head or invoke
277 			 * callback after this point.
278 			 */
279 			SDT_PROBE2(sdt, linux, rcu, call__done,
280 			    head, callback);
281 		}
282 
283 		/* Free the objects to kfree.  */
284 		for (head = head_kfree; head != NULL; head = next) {
285 			void *obj = head->rcuh_u.obj;
286 			next = head->rcuh_next;
287 			SDT_PROBE2(sdt, linux, rcu, kfree__free,  head, obj);
288 			kfree(obj);
289 			/* Can't dereference head or obj after this point.  */
290 			SDT_PROBE2(sdt, linux, rcu, kfree__done,  head, obj);
291 		}
292 
293 		/* Return to the lock.  */
294 		mutex_enter(&gc.lock);
295 
296 		/* Finished a batch of work.  Notify rcu_barrier.  */
297 		gc.gen++;
298 		gc.running = false;
299 		cv_broadcast(&gc.cv);
300 
301 		/*
302 		 * Limit ourselves to one batch per tick, in an attempt
303 		 * to make the batches larger.
304 		 *
305 		 * XXX We should maybe also limit the size of each
306 		 * batch.
307 		 */
308 		(void)kpause("lxrcubat", /*intr*/false, /*timo*/1, &gc.lock);
309 	}
310 	KASSERT(gc.first_callback == NULL);
311 	KASSERT(gc.first_kfree == NULL);
312 	mutex_exit(&gc.lock);
313 
314 	kthread_exit(0);
315 }
316 
317 void
init_rcu_head(struct rcu_head * head)318 init_rcu_head(struct rcu_head *head)
319 {
320 }
321 
322 void
destroy_rcu_head(struct rcu_head * head)323 destroy_rcu_head(struct rcu_head *head)
324 {
325 }
326 
327 int
linux_rcu_gc_init(void)328 linux_rcu_gc_init(void)
329 {
330 	int error;
331 
332 	mutex_init(&gc.lock, MUTEX_DEFAULT, IPL_VM);
333 	cv_init(&gc.cv, "lnxrcugc");
334 	gc.first_callback = NULL;
335 	gc.first_kfree = NULL;
336 	gc.gen = 0;
337 	gc.dying = false;
338 
339 	error = kthread_create(PRI_NONE,
340 	    KTHREAD_MPSAFE|KTHREAD_TS|KTHREAD_MUSTJOIN, NULL, &gc_thread, NULL,
341 	    &gc.lwp, "lnxrcugc");
342 	if (error)
343 		goto fail;
344 
345 	/* Success!  */
346 	return 0;
347 
348 fail:	cv_destroy(&gc.cv);
349 	mutex_destroy(&gc.lock);
350 	return error;
351 }
352 
353 void
linux_rcu_gc_fini(void)354 linux_rcu_gc_fini(void)
355 {
356 
357 	mutex_enter(&gc.lock);
358 	gc.dying = true;
359 	cv_broadcast(&gc.cv);
360 	mutex_exit(&gc.lock);
361 
362 	kthread_join(gc.lwp);
363 	gc.lwp = NULL;
364 	KASSERT(gc.first_callback == NULL);
365 	KASSERT(gc.first_kfree == NULL);
366 	cv_destroy(&gc.cv);
367 	mutex_destroy(&gc.lock);
368 }
369