1 /* $NetBSD: linux_rcu.c,v 1.7 2021/12/19 12:40:11 riastradh Exp $ */
2
3 /*-
4 * Copyright (c) 2018 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Taylor R. Campbell.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 #include <sys/cdefs.h>
33 __KERNEL_RCSID(0, "$NetBSD: linux_rcu.c,v 1.7 2021/12/19 12:40:11 riastradh Exp $");
34
35 #include <sys/param.h>
36 #include <sys/types.h>
37
38 #include <sys/condvar.h>
39 #include <sys/cpu.h>
40 #include <sys/kthread.h>
41 #include <sys/lockdebug.h>
42 #include <sys/mutex.h>
43 #include <sys/sdt.h>
44 #include <sys/xcall.h>
45
46 #include <linux/rcupdate.h>
47 #include <linux/slab.h>
48
49 SDT_PROBE_DEFINE0(sdt, linux, rcu, synchronize__start);
50 SDT_PROBE_DEFINE1(sdt, linux, rcu, synchronize__cpu, "unsigned"/*cpu*/);
51 SDT_PROBE_DEFINE0(sdt, linux, rcu, synchronize__done);
52 SDT_PROBE_DEFINE0(sdt, linux, rcu, barrier__start);
53 SDT_PROBE_DEFINE0(sdt, linux, rcu, barrier__done);
54 SDT_PROBE_DEFINE2(sdt, linux, rcu, call__queue,
55 "struct rcu_head *"/*head*/, "void (*)(struct rcu_head *)"/*callback*/);
56 SDT_PROBE_DEFINE2(sdt, linux, rcu, call__run,
57 "struct rcu_head *"/*head*/, "void (*)(struct rcu_head *)"/*callback*/);
58 SDT_PROBE_DEFINE2(sdt, linux, rcu, call__done,
59 "struct rcu_head *"/*head*/, "void (*)(struct rcu_head *)"/*callback*/);
60 SDT_PROBE_DEFINE2(sdt, linux, rcu, kfree__queue,
61 "struct rcu_head *"/*head*/, "void *"/*obj*/);
62 SDT_PROBE_DEFINE2(sdt, linux, rcu, kfree__free,
63 "struct rcu_head *"/*head*/, "void *"/*obj*/);
64 SDT_PROBE_DEFINE2(sdt, linux, rcu, kfree__done,
65 "struct rcu_head *"/*head*/, "void *"/*obj*/);
66
67 static struct {
68 kmutex_t lock;
69 kcondvar_t cv;
70 struct rcu_head *first_callback;
71 struct rcu_head *first_kfree;
72 struct lwp *lwp;
73 uint64_t gen;
74 bool running;
75 bool dying;
76 } gc __cacheline_aligned;
77
78 static void
synchronize_rcu_xc(void * a,void * b)79 synchronize_rcu_xc(void *a, void *b)
80 {
81
82 SDT_PROBE1(sdt, linux, rcu, synchronize__cpu, cpu_index(curcpu()));
83 }
84
85 /*
86 * synchronize_rcu()
87 *
88 * Wait for any pending RCU read section on every CPU to complete
89 * by triggering on every CPU activity that is blocked by an RCU
90 * read section.
91 *
92 * May sleep. (Practically guaranteed to sleep!)
93 */
94 void
synchronize_rcu(void)95 synchronize_rcu(void)
96 {
97
98 SDT_PROBE0(sdt, linux, rcu, synchronize__start);
99 xc_wait(xc_broadcast(0, &synchronize_rcu_xc, NULL, NULL));
100 SDT_PROBE0(sdt, linux, rcu, synchronize__done);
101 }
102
103 /*
104 * synchronize_rcu_expedited()
105 *
106 * Wait for any pending RCU read section on every CPU to complete
107 * by triggering on every CPU activity that is blocked by an RCU
108 * read section. Try to get an answer faster than
109 * synchronize_rcu, at the cost of more activity triggered on
110 * other CPUs.
111 *
112 * May sleep. (Practically guaranteed to sleep!)
113 */
114 void
synchronize_rcu_expedited(void)115 synchronize_rcu_expedited(void)
116 {
117
118 synchronize_rcu();
119 }
120
121 /*
122 * cookie = get_state_synchronize_rcu(), cond_synchronize_rcu(cookie)
123 *
124 * Optimization for synchronize_rcu -- skip if it has already
125 * happened between get_state_synchronize_rcu and
126 * cond_synchronize_rcu. get_state_synchronize_rcu implies a full
127 * SMP memory barrier (membar_sync).
128 */
129 unsigned long
get_state_synchronize_rcu(void)130 get_state_synchronize_rcu(void)
131 {
132
133 membar_sync();
134 return 0;
135 }
136
137 void
cond_synchronize_rcu(unsigned long cookie)138 cond_synchronize_rcu(unsigned long cookie)
139 {
140
141 synchronize_rcu();
142 }
143
144 /*
145 * rcu_barrier()
146 *
147 * Wait for all pending RCU callbacks to complete.
148 *
149 * Does not imply, and is not implied by, synchronize_rcu.
150 */
151 void
rcu_barrier(void)152 rcu_barrier(void)
153 {
154 uint64_t gen;
155
156 /*
157 * If the GC isn't running anything yet, then all callbacks of
158 * interest are queued, and it suffices to wait for the GC to
159 * advance one generation number.
160 *
161 * If the GC is already running, however, and there are any
162 * callbacks of interest queued but not in the GC's current
163 * batch of work, then when the advances the generation number
164 * it will not have completed the queued callbacks. So we have
165 * to wait for one more generation -- or until the GC has
166 * stopped running because there's no work left.
167 */
168
169 SDT_PROBE0(sdt, linux, rcu, barrier__start);
170 mutex_enter(&gc.lock);
171 gen = gc.gen;
172 if (gc.running)
173 gen++;
174 while (gc.running || gc.first_callback || gc.first_kfree) {
175 cv_wait(&gc.cv, &gc.lock);
176 if (gc.gen > gen)
177 break;
178 }
179 mutex_exit(&gc.lock);
180 SDT_PROBE0(sdt, linux, rcu, barrier__done);
181 }
182
183 /*
184 * call_rcu(head, callback)
185 *
186 * Arrange to call callback(head) after any pending RCU read
187 * sections on every CPU is complete. Return immediately.
188 */
189 void
call_rcu(struct rcu_head * head,void (* callback)(struct rcu_head *))190 call_rcu(struct rcu_head *head, void (*callback)(struct rcu_head *))
191 {
192
193 head->rcuh_u.callback = callback;
194
195 mutex_enter(&gc.lock);
196 head->rcuh_next = gc.first_callback;
197 gc.first_callback = head;
198 cv_broadcast(&gc.cv);
199 SDT_PROBE2(sdt, linux, rcu, call__queue, head, callback);
200 mutex_exit(&gc.lock);
201 }
202
203 /*
204 * _kfree_rcu(head, obj)
205 *
206 * kfree_rcu helper: schedule kfree(obj) using head for storage.
207 */
208 void
_kfree_rcu(struct rcu_head * head,void * obj)209 _kfree_rcu(struct rcu_head *head, void *obj)
210 {
211
212 LOCKDEBUG_MEM_CHECK(obj, ((struct linux_malloc *)obj - 1)->lm_size);
213
214 head->rcuh_u.obj = obj;
215
216 mutex_enter(&gc.lock);
217 head->rcuh_next = gc.first_kfree;
218 gc.first_kfree = head;
219 cv_broadcast(&gc.cv);
220 SDT_PROBE2(sdt, linux, rcu, kfree__queue, head, obj);
221 mutex_exit(&gc.lock);
222 }
223
224 static void
gc_thread(void * cookie)225 gc_thread(void *cookie)
226 {
227 struct rcu_head *head_callback, *head_kfree, *head, *next;
228
229 mutex_enter(&gc.lock);
230 for (;;) {
231 /* Start with no work. */
232 bool work = false;
233
234 /* Grab the list of callbacks. */
235 if ((head_callback = gc.first_callback) != NULL) {
236 gc.first_callback = NULL;
237 work = true;
238 }
239
240 /* Grab the list of objects to kfree. */
241 if ((head_kfree = gc.first_kfree) != NULL) {
242 gc.first_kfree = NULL;
243 work = true;
244 }
245
246 /*
247 * If no work, then either stop, if we're dying, or
248 * wait for work, if not.
249 */
250 if (!work) {
251 if (gc.dying)
252 break;
253 cv_wait(&gc.cv, &gc.lock);
254 continue;
255 }
256
257 /*
258 * We have work to do. Drop the lock to do it, and
259 * notify rcu_barrier that we're still doing it.
260 */
261 gc.running = true;
262 mutex_exit(&gc.lock);
263
264 /* Wait for activity on all CPUs. */
265 synchronize_rcu();
266
267 /* Call the callbacks. */
268 for (head = head_callback; head != NULL; head = next) {
269 void (*callback)(struct rcu_head *) =
270 head->rcuh_u.callback;
271 next = head->rcuh_next;
272 SDT_PROBE2(sdt, linux, rcu, call__run,
273 head, callback);
274 (*callback)(head);
275 /*
276 * Can't dereference head or invoke
277 * callback after this point.
278 */
279 SDT_PROBE2(sdt, linux, rcu, call__done,
280 head, callback);
281 }
282
283 /* Free the objects to kfree. */
284 for (head = head_kfree; head != NULL; head = next) {
285 void *obj = head->rcuh_u.obj;
286 next = head->rcuh_next;
287 SDT_PROBE2(sdt, linux, rcu, kfree__free, head, obj);
288 kfree(obj);
289 /* Can't dereference head or obj after this point. */
290 SDT_PROBE2(sdt, linux, rcu, kfree__done, head, obj);
291 }
292
293 /* Return to the lock. */
294 mutex_enter(&gc.lock);
295
296 /* Finished a batch of work. Notify rcu_barrier. */
297 gc.gen++;
298 gc.running = false;
299 cv_broadcast(&gc.cv);
300
301 /*
302 * Limit ourselves to one batch per tick, in an attempt
303 * to make the batches larger.
304 *
305 * XXX We should maybe also limit the size of each
306 * batch.
307 */
308 (void)kpause("lxrcubat", /*intr*/false, /*timo*/1, &gc.lock);
309 }
310 KASSERT(gc.first_callback == NULL);
311 KASSERT(gc.first_kfree == NULL);
312 mutex_exit(&gc.lock);
313
314 kthread_exit(0);
315 }
316
317 void
init_rcu_head(struct rcu_head * head)318 init_rcu_head(struct rcu_head *head)
319 {
320 }
321
322 void
destroy_rcu_head(struct rcu_head * head)323 destroy_rcu_head(struct rcu_head *head)
324 {
325 }
326
327 int
linux_rcu_gc_init(void)328 linux_rcu_gc_init(void)
329 {
330 int error;
331
332 mutex_init(&gc.lock, MUTEX_DEFAULT, IPL_VM);
333 cv_init(&gc.cv, "lnxrcugc");
334 gc.first_callback = NULL;
335 gc.first_kfree = NULL;
336 gc.gen = 0;
337 gc.dying = false;
338
339 error = kthread_create(PRI_NONE,
340 KTHREAD_MPSAFE|KTHREAD_TS|KTHREAD_MUSTJOIN, NULL, &gc_thread, NULL,
341 &gc.lwp, "lnxrcugc");
342 if (error)
343 goto fail;
344
345 /* Success! */
346 return 0;
347
348 fail: cv_destroy(&gc.cv);
349 mutex_destroy(&gc.lock);
350 return error;
351 }
352
353 void
linux_rcu_gc_fini(void)354 linux_rcu_gc_fini(void)
355 {
356
357 mutex_enter(&gc.lock);
358 gc.dying = true;
359 cv_broadcast(&gc.cv);
360 mutex_exit(&gc.lock);
361
362 kthread_join(gc.lwp);
363 gc.lwp = NULL;
364 KASSERT(gc.first_callback == NULL);
365 KASSERT(gc.first_kfree == NULL);
366 cv_destroy(&gc.cv);
367 mutex_destroy(&gc.lock);
368 }
369