1*61da5b32Sriastradh /* $NetBSD: linux_rcu.c,v 1.7 2021/12/19 12:40:11 riastradh Exp $ */
2a67dcc49Sriastradh
3a67dcc49Sriastradh /*-
4a67dcc49Sriastradh * Copyright (c) 2018 The NetBSD Foundation, Inc.
5a67dcc49Sriastradh * All rights reserved.
6a67dcc49Sriastradh *
7a67dcc49Sriastradh * This code is derived from software contributed to The NetBSD Foundation
8a67dcc49Sriastradh * by Taylor R. Campbell.
9a67dcc49Sriastradh *
10a67dcc49Sriastradh * Redistribution and use in source and binary forms, with or without
11a67dcc49Sriastradh * modification, are permitted provided that the following conditions
12a67dcc49Sriastradh * are met:
13a67dcc49Sriastradh * 1. Redistributions of source code must retain the above copyright
14a67dcc49Sriastradh * notice, this list of conditions and the following disclaimer.
15a67dcc49Sriastradh * 2. Redistributions in binary form must reproduce the above copyright
16a67dcc49Sriastradh * notice, this list of conditions and the following disclaimer in the
17a67dcc49Sriastradh * documentation and/or other materials provided with the distribution.
18a67dcc49Sriastradh *
19a67dcc49Sriastradh * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20a67dcc49Sriastradh * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21a67dcc49Sriastradh * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22a67dcc49Sriastradh * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23a67dcc49Sriastradh * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24a67dcc49Sriastradh * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25a67dcc49Sriastradh * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26a67dcc49Sriastradh * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27a67dcc49Sriastradh * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28a67dcc49Sriastradh * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29a67dcc49Sriastradh * POSSIBILITY OF SUCH DAMAGE.
30a67dcc49Sriastradh */
31a67dcc49Sriastradh
32a67dcc49Sriastradh #include <sys/cdefs.h>
33*61da5b32Sriastradh __KERNEL_RCSID(0, "$NetBSD: linux_rcu.c,v 1.7 2021/12/19 12:40:11 riastradh Exp $");
34a67dcc49Sriastradh
35a67dcc49Sriastradh #include <sys/param.h>
36a67dcc49Sriastradh #include <sys/types.h>
37f42a7664Sriastradh
38a67dcc49Sriastradh #include <sys/condvar.h>
39a67dcc49Sriastradh #include <sys/cpu.h>
40a67dcc49Sriastradh #include <sys/kthread.h>
41f42a7664Sriastradh #include <sys/lockdebug.h>
42a67dcc49Sriastradh #include <sys/mutex.h>
43a67dcc49Sriastradh #include <sys/sdt.h>
44a67dcc49Sriastradh #include <sys/xcall.h>
45a67dcc49Sriastradh
46a67dcc49Sriastradh #include <linux/rcupdate.h>
47a67dcc49Sriastradh #include <linux/slab.h>
48a67dcc49Sriastradh
49a67dcc49Sriastradh SDT_PROBE_DEFINE0(sdt, linux, rcu, synchronize__start);
50a67dcc49Sriastradh SDT_PROBE_DEFINE1(sdt, linux, rcu, synchronize__cpu, "unsigned"/*cpu*/);
51a67dcc49Sriastradh SDT_PROBE_DEFINE0(sdt, linux, rcu, synchronize__done);
52a67dcc49Sriastradh SDT_PROBE_DEFINE0(sdt, linux, rcu, barrier__start);
53a67dcc49Sriastradh SDT_PROBE_DEFINE0(sdt, linux, rcu, barrier__done);
54a67dcc49Sriastradh SDT_PROBE_DEFINE2(sdt, linux, rcu, call__queue,
55a67dcc49Sriastradh "struct rcu_head *"/*head*/, "void (*)(struct rcu_head *)"/*callback*/);
56a67dcc49Sriastradh SDT_PROBE_DEFINE2(sdt, linux, rcu, call__run,
57a67dcc49Sriastradh "struct rcu_head *"/*head*/, "void (*)(struct rcu_head *)"/*callback*/);
58a67dcc49Sriastradh SDT_PROBE_DEFINE2(sdt, linux, rcu, call__done,
59a67dcc49Sriastradh "struct rcu_head *"/*head*/, "void (*)(struct rcu_head *)"/*callback*/);
60a67dcc49Sriastradh SDT_PROBE_DEFINE2(sdt, linux, rcu, kfree__queue,
61a67dcc49Sriastradh "struct rcu_head *"/*head*/, "void *"/*obj*/);
62a67dcc49Sriastradh SDT_PROBE_DEFINE2(sdt, linux, rcu, kfree__free,
63a67dcc49Sriastradh "struct rcu_head *"/*head*/, "void *"/*obj*/);
64a67dcc49Sriastradh SDT_PROBE_DEFINE2(sdt, linux, rcu, kfree__done,
65a67dcc49Sriastradh "struct rcu_head *"/*head*/, "void *"/*obj*/);
66a67dcc49Sriastradh
67a67dcc49Sriastradh static struct {
68a67dcc49Sriastradh kmutex_t lock;
69a67dcc49Sriastradh kcondvar_t cv;
70a67dcc49Sriastradh struct rcu_head *first_callback;
71a67dcc49Sriastradh struct rcu_head *first_kfree;
72a67dcc49Sriastradh struct lwp *lwp;
73a67dcc49Sriastradh uint64_t gen;
74*61da5b32Sriastradh bool running;
75a67dcc49Sriastradh bool dying;
76a67dcc49Sriastradh } gc __cacheline_aligned;
77a67dcc49Sriastradh
78a67dcc49Sriastradh static void
synchronize_rcu_xc(void * a,void * b)79a67dcc49Sriastradh synchronize_rcu_xc(void *a, void *b)
80a67dcc49Sriastradh {
81a67dcc49Sriastradh
82a67dcc49Sriastradh SDT_PROBE1(sdt, linux, rcu, synchronize__cpu, cpu_index(curcpu()));
83a67dcc49Sriastradh }
84a67dcc49Sriastradh
85a67dcc49Sriastradh /*
86a67dcc49Sriastradh * synchronize_rcu()
87a67dcc49Sriastradh *
88a67dcc49Sriastradh * Wait for any pending RCU read section on every CPU to complete
89a67dcc49Sriastradh * by triggering on every CPU activity that is blocked by an RCU
90a67dcc49Sriastradh * read section.
919e5fbd4fSriastradh *
929e5fbd4fSriastradh * May sleep. (Practically guaranteed to sleep!)
93a67dcc49Sriastradh */
94a67dcc49Sriastradh void
synchronize_rcu(void)95a67dcc49Sriastradh synchronize_rcu(void)
96a67dcc49Sriastradh {
97a67dcc49Sriastradh
98a67dcc49Sriastradh SDT_PROBE0(sdt, linux, rcu, synchronize__start);
99a67dcc49Sriastradh xc_wait(xc_broadcast(0, &synchronize_rcu_xc, NULL, NULL));
100a67dcc49Sriastradh SDT_PROBE0(sdt, linux, rcu, synchronize__done);
101a67dcc49Sriastradh }
102a67dcc49Sriastradh
103a67dcc49Sriastradh /*
1049e5fbd4fSriastradh * synchronize_rcu_expedited()
1059e5fbd4fSriastradh *
1069e5fbd4fSriastradh * Wait for any pending RCU read section on every CPU to complete
1079e5fbd4fSriastradh * by triggering on every CPU activity that is blocked by an RCU
1089e5fbd4fSriastradh * read section. Try to get an answer faster than
1099e5fbd4fSriastradh * synchronize_rcu, at the cost of more activity triggered on
1109e5fbd4fSriastradh * other CPUs.
1119e5fbd4fSriastradh *
1129e5fbd4fSriastradh * May sleep. (Practically guaranteed to sleep!)
1139e5fbd4fSriastradh */
1149e5fbd4fSriastradh void
synchronize_rcu_expedited(void)1159e5fbd4fSriastradh synchronize_rcu_expedited(void)
1169e5fbd4fSriastradh {
1179e5fbd4fSriastradh
1189e5fbd4fSriastradh synchronize_rcu();
1199e5fbd4fSriastradh }
1209e5fbd4fSriastradh
1219e5fbd4fSriastradh /*
122561e5505Sriastradh * cookie = get_state_synchronize_rcu(), cond_synchronize_rcu(cookie)
123561e5505Sriastradh *
124561e5505Sriastradh * Optimization for synchronize_rcu -- skip if it has already
125561e5505Sriastradh * happened between get_state_synchronize_rcu and
126561e5505Sriastradh * cond_synchronize_rcu. get_state_synchronize_rcu implies a full
127561e5505Sriastradh * SMP memory barrier (membar_sync).
128561e5505Sriastradh */
129561e5505Sriastradh unsigned long
get_state_synchronize_rcu(void)130561e5505Sriastradh get_state_synchronize_rcu(void)
131561e5505Sriastradh {
132561e5505Sriastradh
133561e5505Sriastradh membar_sync();
134561e5505Sriastradh return 0;
135561e5505Sriastradh }
136561e5505Sriastradh
137561e5505Sriastradh void
cond_synchronize_rcu(unsigned long cookie)138561e5505Sriastradh cond_synchronize_rcu(unsigned long cookie)
139561e5505Sriastradh {
140561e5505Sriastradh
141561e5505Sriastradh synchronize_rcu();
142561e5505Sriastradh }
143561e5505Sriastradh
144561e5505Sriastradh /*
145a67dcc49Sriastradh * rcu_barrier()
146a67dcc49Sriastradh *
147a67dcc49Sriastradh * Wait for all pending RCU callbacks to complete.
148a67dcc49Sriastradh *
149a67dcc49Sriastradh * Does not imply, and is not implied by, synchronize_rcu.
150a67dcc49Sriastradh */
151a67dcc49Sriastradh void
rcu_barrier(void)152a67dcc49Sriastradh rcu_barrier(void)
153a67dcc49Sriastradh {
154a67dcc49Sriastradh uint64_t gen;
155a67dcc49Sriastradh
156*61da5b32Sriastradh /*
157*61da5b32Sriastradh * If the GC isn't running anything yet, then all callbacks of
158*61da5b32Sriastradh * interest are queued, and it suffices to wait for the GC to
159*61da5b32Sriastradh * advance one generation number.
160*61da5b32Sriastradh *
161*61da5b32Sriastradh * If the GC is already running, however, and there are any
162*61da5b32Sriastradh * callbacks of interest queued but not in the GC's current
163*61da5b32Sriastradh * batch of work, then when the advances the generation number
164*61da5b32Sriastradh * it will not have completed the queued callbacks. So we have
165*61da5b32Sriastradh * to wait for one more generation -- or until the GC has
166*61da5b32Sriastradh * stopped running because there's no work left.
167*61da5b32Sriastradh */
168*61da5b32Sriastradh
169a67dcc49Sriastradh SDT_PROBE0(sdt, linux, rcu, barrier__start);
170a67dcc49Sriastradh mutex_enter(&gc.lock);
171a67dcc49Sriastradh gen = gc.gen;
172*61da5b32Sriastradh if (gc.running)
173*61da5b32Sriastradh gen++;
174*61da5b32Sriastradh while (gc.running || gc.first_callback || gc.first_kfree) {
175a67dcc49Sriastradh cv_wait(&gc.cv, &gc.lock);
176*61da5b32Sriastradh if (gc.gen > gen)
177*61da5b32Sriastradh break;
178a67dcc49Sriastradh }
179a67dcc49Sriastradh mutex_exit(&gc.lock);
180a67dcc49Sriastradh SDT_PROBE0(sdt, linux, rcu, barrier__done);
181a67dcc49Sriastradh }
182a67dcc49Sriastradh
183a67dcc49Sriastradh /*
184a67dcc49Sriastradh * call_rcu(head, callback)
185a67dcc49Sriastradh *
186a67dcc49Sriastradh * Arrange to call callback(head) after any pending RCU read
187a67dcc49Sriastradh * sections on every CPU is complete. Return immediately.
188a67dcc49Sriastradh */
189a67dcc49Sriastradh void
call_rcu(struct rcu_head * head,void (* callback)(struct rcu_head *))190a67dcc49Sriastradh call_rcu(struct rcu_head *head, void (*callback)(struct rcu_head *))
191a67dcc49Sriastradh {
192a67dcc49Sriastradh
193a67dcc49Sriastradh head->rcuh_u.callback = callback;
194a67dcc49Sriastradh
195a67dcc49Sriastradh mutex_enter(&gc.lock);
196a67dcc49Sriastradh head->rcuh_next = gc.first_callback;
197a67dcc49Sriastradh gc.first_callback = head;
198a67dcc49Sriastradh cv_broadcast(&gc.cv);
199a67dcc49Sriastradh SDT_PROBE2(sdt, linux, rcu, call__queue, head, callback);
200a67dcc49Sriastradh mutex_exit(&gc.lock);
201a67dcc49Sriastradh }
202a67dcc49Sriastradh
203a67dcc49Sriastradh /*
204a67dcc49Sriastradh * _kfree_rcu(head, obj)
205a67dcc49Sriastradh *
206a67dcc49Sriastradh * kfree_rcu helper: schedule kfree(obj) using head for storage.
207a67dcc49Sriastradh */
208a67dcc49Sriastradh void
_kfree_rcu(struct rcu_head * head,void * obj)209a67dcc49Sriastradh _kfree_rcu(struct rcu_head *head, void *obj)
210a67dcc49Sriastradh {
211a67dcc49Sriastradh
212f42a7664Sriastradh LOCKDEBUG_MEM_CHECK(obj, ((struct linux_malloc *)obj - 1)->lm_size);
213f42a7664Sriastradh
214a67dcc49Sriastradh head->rcuh_u.obj = obj;
215a67dcc49Sriastradh
216a67dcc49Sriastradh mutex_enter(&gc.lock);
217a67dcc49Sriastradh head->rcuh_next = gc.first_kfree;
218a67dcc49Sriastradh gc.first_kfree = head;
219a67dcc49Sriastradh cv_broadcast(&gc.cv);
220a67dcc49Sriastradh SDT_PROBE2(sdt, linux, rcu, kfree__queue, head, obj);
221a67dcc49Sriastradh mutex_exit(&gc.lock);
222a67dcc49Sriastradh }
223a67dcc49Sriastradh
224a67dcc49Sriastradh static void
gc_thread(void * cookie)225a67dcc49Sriastradh gc_thread(void *cookie)
226a67dcc49Sriastradh {
227a67dcc49Sriastradh struct rcu_head *head_callback, *head_kfree, *head, *next;
228a67dcc49Sriastradh
229a67dcc49Sriastradh mutex_enter(&gc.lock);
230a67dcc49Sriastradh for (;;) {
231a67dcc49Sriastradh /* Start with no work. */
232a67dcc49Sriastradh bool work = false;
233a67dcc49Sriastradh
234a67dcc49Sriastradh /* Grab the list of callbacks. */
235a67dcc49Sriastradh if ((head_callback = gc.first_callback) != NULL) {
236a67dcc49Sriastradh gc.first_callback = NULL;
237a67dcc49Sriastradh work = true;
238a67dcc49Sriastradh }
239a67dcc49Sriastradh
240a67dcc49Sriastradh /* Grab the list of objects to kfree. */
241a67dcc49Sriastradh if ((head_kfree = gc.first_kfree) != NULL) {
242a67dcc49Sriastradh gc.first_kfree = NULL;
243a67dcc49Sriastradh work = true;
244a67dcc49Sriastradh }
245a67dcc49Sriastradh
246a67dcc49Sriastradh /*
247a67dcc49Sriastradh * If no work, then either stop, if we're dying, or
248a67dcc49Sriastradh * wait for work, if not.
249a67dcc49Sriastradh */
250a67dcc49Sriastradh if (!work) {
251a67dcc49Sriastradh if (gc.dying)
252a67dcc49Sriastradh break;
253a67dcc49Sriastradh cv_wait(&gc.cv, &gc.lock);
254a67dcc49Sriastradh continue;
255a67dcc49Sriastradh }
256a67dcc49Sriastradh
257*61da5b32Sriastradh /*
258*61da5b32Sriastradh * We have work to do. Drop the lock to do it, and
259*61da5b32Sriastradh * notify rcu_barrier that we're still doing it.
260*61da5b32Sriastradh */
261*61da5b32Sriastradh gc.running = true;
262a67dcc49Sriastradh mutex_exit(&gc.lock);
263a67dcc49Sriastradh
264a67dcc49Sriastradh /* Wait for activity on all CPUs. */
265a67dcc49Sriastradh synchronize_rcu();
266a67dcc49Sriastradh
267a67dcc49Sriastradh /* Call the callbacks. */
268a67dcc49Sriastradh for (head = head_callback; head != NULL; head = next) {
269a67dcc49Sriastradh void (*callback)(struct rcu_head *) =
270a67dcc49Sriastradh head->rcuh_u.callback;
271a67dcc49Sriastradh next = head->rcuh_next;
272a67dcc49Sriastradh SDT_PROBE2(sdt, linux, rcu, call__run,
273a67dcc49Sriastradh head, callback);
274a67dcc49Sriastradh (*callback)(head);
275a67dcc49Sriastradh /*
276a67dcc49Sriastradh * Can't dereference head or invoke
277a67dcc49Sriastradh * callback after this point.
278a67dcc49Sriastradh */
279a67dcc49Sriastradh SDT_PROBE2(sdt, linux, rcu, call__done,
280a67dcc49Sriastradh head, callback);
281a67dcc49Sriastradh }
282a67dcc49Sriastradh
283a67dcc49Sriastradh /* Free the objects to kfree. */
284a67dcc49Sriastradh for (head = head_kfree; head != NULL; head = next) {
285a67dcc49Sriastradh void *obj = head->rcuh_u.obj;
286a67dcc49Sriastradh next = head->rcuh_next;
287a67dcc49Sriastradh SDT_PROBE2(sdt, linux, rcu, kfree__free, head, obj);
288a67dcc49Sriastradh kfree(obj);
289a67dcc49Sriastradh /* Can't dereference head or obj after this point. */
290a67dcc49Sriastradh SDT_PROBE2(sdt, linux, rcu, kfree__done, head, obj);
291a67dcc49Sriastradh }
292a67dcc49Sriastradh
293a67dcc49Sriastradh /* Return to the lock. */
294a67dcc49Sriastradh mutex_enter(&gc.lock);
295a67dcc49Sriastradh
296a67dcc49Sriastradh /* Finished a batch of work. Notify rcu_barrier. */
297a67dcc49Sriastradh gc.gen++;
298*61da5b32Sriastradh gc.running = false;
299a67dcc49Sriastradh cv_broadcast(&gc.cv);
300da40f2dfSriastradh
301da40f2dfSriastradh /*
302da40f2dfSriastradh * Limit ourselves to one batch per tick, in an attempt
303da40f2dfSriastradh * to make the batches larger.
304da40f2dfSriastradh *
305da40f2dfSriastradh * XXX We should maybe also limit the size of each
306da40f2dfSriastradh * batch.
307da40f2dfSriastradh */
308da40f2dfSriastradh (void)kpause("lxrcubat", /*intr*/false, /*timo*/1, &gc.lock);
309a67dcc49Sriastradh }
310a67dcc49Sriastradh KASSERT(gc.first_callback == NULL);
311a67dcc49Sriastradh KASSERT(gc.first_kfree == NULL);
312a67dcc49Sriastradh mutex_exit(&gc.lock);
313a67dcc49Sriastradh
314a67dcc49Sriastradh kthread_exit(0);
315a67dcc49Sriastradh }
316a67dcc49Sriastradh
3170d5ca95bSriastradh void
init_rcu_head(struct rcu_head * head)3180d5ca95bSriastradh init_rcu_head(struct rcu_head *head)
3190d5ca95bSriastradh {
3200d5ca95bSriastradh }
3210d5ca95bSriastradh
3220d5ca95bSriastradh void
destroy_rcu_head(struct rcu_head * head)3230d5ca95bSriastradh destroy_rcu_head(struct rcu_head *head)
3240d5ca95bSriastradh {
3250d5ca95bSriastradh }
3260d5ca95bSriastradh
327a67dcc49Sriastradh int
linux_rcu_gc_init(void)328a67dcc49Sriastradh linux_rcu_gc_init(void)
329a67dcc49Sriastradh {
330a67dcc49Sriastradh int error;
331a67dcc49Sriastradh
332a67dcc49Sriastradh mutex_init(&gc.lock, MUTEX_DEFAULT, IPL_VM);
333a67dcc49Sriastradh cv_init(&gc.cv, "lnxrcugc");
334a67dcc49Sriastradh gc.first_callback = NULL;
335a67dcc49Sriastradh gc.first_kfree = NULL;
336a67dcc49Sriastradh gc.gen = 0;
337a67dcc49Sriastradh gc.dying = false;
338a67dcc49Sriastradh
339a67dcc49Sriastradh error = kthread_create(PRI_NONE,
340a67dcc49Sriastradh KTHREAD_MPSAFE|KTHREAD_TS|KTHREAD_MUSTJOIN, NULL, &gc_thread, NULL,
341a67dcc49Sriastradh &gc.lwp, "lnxrcugc");
342a67dcc49Sriastradh if (error)
343a67dcc49Sriastradh goto fail;
344a67dcc49Sriastradh
345a67dcc49Sriastradh /* Success! */
346a67dcc49Sriastradh return 0;
347a67dcc49Sriastradh
348a67dcc49Sriastradh fail: cv_destroy(&gc.cv);
349a67dcc49Sriastradh mutex_destroy(&gc.lock);
350a67dcc49Sriastradh return error;
351a67dcc49Sriastradh }
352a67dcc49Sriastradh
353a67dcc49Sriastradh void
linux_rcu_gc_fini(void)354a67dcc49Sriastradh linux_rcu_gc_fini(void)
355a67dcc49Sriastradh {
356a67dcc49Sriastradh
357a67dcc49Sriastradh mutex_enter(&gc.lock);
358a67dcc49Sriastradh gc.dying = true;
359a67dcc49Sriastradh cv_broadcast(&gc.cv);
360a67dcc49Sriastradh mutex_exit(&gc.lock);
361a67dcc49Sriastradh
362a67dcc49Sriastradh kthread_join(gc.lwp);
363a67dcc49Sriastradh gc.lwp = NULL;
364a67dcc49Sriastradh KASSERT(gc.first_callback == NULL);
365a67dcc49Sriastradh KASSERT(gc.first_kfree == NULL);
366a67dcc49Sriastradh cv_destroy(&gc.cv);
367a67dcc49Sriastradh mutex_destroy(&gc.lock);
368a67dcc49Sriastradh }
369