1 /* $NetBSD: linux_rcu.c,v 1.7 2021/12/19 12:40:11 riastradh Exp $ */ 2 3 /*- 4 * Copyright (c) 2018 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Taylor R. Campbell. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 #include <sys/cdefs.h> 33 __KERNEL_RCSID(0, "$NetBSD: linux_rcu.c,v 1.7 2021/12/19 12:40:11 riastradh Exp $"); 34 35 #include <sys/param.h> 36 #include <sys/types.h> 37 38 #include <sys/condvar.h> 39 #include <sys/cpu.h> 40 #include <sys/kthread.h> 41 #include <sys/lockdebug.h> 42 #include <sys/mutex.h> 43 #include <sys/sdt.h> 44 #include <sys/xcall.h> 45 46 #include <linux/rcupdate.h> 47 #include <linux/slab.h> 48 49 SDT_PROBE_DEFINE0(sdt, linux, rcu, synchronize__start); 50 SDT_PROBE_DEFINE1(sdt, linux, rcu, synchronize__cpu, "unsigned"/*cpu*/); 51 SDT_PROBE_DEFINE0(sdt, linux, rcu, synchronize__done); 52 SDT_PROBE_DEFINE0(sdt, linux, rcu, barrier__start); 53 SDT_PROBE_DEFINE0(sdt, linux, rcu, barrier__done); 54 SDT_PROBE_DEFINE2(sdt, linux, rcu, call__queue, 55 "struct rcu_head *"/*head*/, "void (*)(struct rcu_head *)"/*callback*/); 56 SDT_PROBE_DEFINE2(sdt, linux, rcu, call__run, 57 "struct rcu_head *"/*head*/, "void (*)(struct rcu_head *)"/*callback*/); 58 SDT_PROBE_DEFINE2(sdt, linux, rcu, call__done, 59 "struct rcu_head *"/*head*/, "void (*)(struct rcu_head *)"/*callback*/); 60 SDT_PROBE_DEFINE2(sdt, linux, rcu, kfree__queue, 61 "struct rcu_head *"/*head*/, "void *"/*obj*/); 62 SDT_PROBE_DEFINE2(sdt, linux, rcu, kfree__free, 63 "struct rcu_head *"/*head*/, "void *"/*obj*/); 64 SDT_PROBE_DEFINE2(sdt, linux, rcu, kfree__done, 65 "struct rcu_head *"/*head*/, "void *"/*obj*/); 66 67 static struct { 68 kmutex_t lock; 69 kcondvar_t cv; 70 struct rcu_head *first_callback; 71 struct rcu_head *first_kfree; 72 struct lwp *lwp; 73 uint64_t gen; 74 bool running; 75 bool dying; 76 } gc __cacheline_aligned; 77 78 static void 79 synchronize_rcu_xc(void *a, void *b) 80 { 81 82 SDT_PROBE1(sdt, linux, rcu, synchronize__cpu, cpu_index(curcpu())); 83 } 84 85 /* 86 * synchronize_rcu() 87 * 88 * Wait for any pending RCU read section on every CPU to complete 89 * by triggering on every CPU activity that is blocked by an RCU 90 * read section. 91 * 92 * May sleep. (Practically guaranteed to sleep!) 93 */ 94 void 95 synchronize_rcu(void) 96 { 97 98 SDT_PROBE0(sdt, linux, rcu, synchronize__start); 99 xc_wait(xc_broadcast(0, &synchronize_rcu_xc, NULL, NULL)); 100 SDT_PROBE0(sdt, linux, rcu, synchronize__done); 101 } 102 103 /* 104 * synchronize_rcu_expedited() 105 * 106 * Wait for any pending RCU read section on every CPU to complete 107 * by triggering on every CPU activity that is blocked by an RCU 108 * read section. Try to get an answer faster than 109 * synchronize_rcu, at the cost of more activity triggered on 110 * other CPUs. 111 * 112 * May sleep. (Practically guaranteed to sleep!) 113 */ 114 void 115 synchronize_rcu_expedited(void) 116 { 117 118 synchronize_rcu(); 119 } 120 121 /* 122 * cookie = get_state_synchronize_rcu(), cond_synchronize_rcu(cookie) 123 * 124 * Optimization for synchronize_rcu -- skip if it has already 125 * happened between get_state_synchronize_rcu and 126 * cond_synchronize_rcu. get_state_synchronize_rcu implies a full 127 * SMP memory barrier (membar_sync). 128 */ 129 unsigned long 130 get_state_synchronize_rcu(void) 131 { 132 133 membar_sync(); 134 return 0; 135 } 136 137 void 138 cond_synchronize_rcu(unsigned long cookie) 139 { 140 141 synchronize_rcu(); 142 } 143 144 /* 145 * rcu_barrier() 146 * 147 * Wait for all pending RCU callbacks to complete. 148 * 149 * Does not imply, and is not implied by, synchronize_rcu. 150 */ 151 void 152 rcu_barrier(void) 153 { 154 uint64_t gen; 155 156 /* 157 * If the GC isn't running anything yet, then all callbacks of 158 * interest are queued, and it suffices to wait for the GC to 159 * advance one generation number. 160 * 161 * If the GC is already running, however, and there are any 162 * callbacks of interest queued but not in the GC's current 163 * batch of work, then when the advances the generation number 164 * it will not have completed the queued callbacks. So we have 165 * to wait for one more generation -- or until the GC has 166 * stopped running because there's no work left. 167 */ 168 169 SDT_PROBE0(sdt, linux, rcu, barrier__start); 170 mutex_enter(&gc.lock); 171 gen = gc.gen; 172 if (gc.running) 173 gen++; 174 while (gc.running || gc.first_callback || gc.first_kfree) { 175 cv_wait(&gc.cv, &gc.lock); 176 if (gc.gen > gen) 177 break; 178 } 179 mutex_exit(&gc.lock); 180 SDT_PROBE0(sdt, linux, rcu, barrier__done); 181 } 182 183 /* 184 * call_rcu(head, callback) 185 * 186 * Arrange to call callback(head) after any pending RCU read 187 * sections on every CPU is complete. Return immediately. 188 */ 189 void 190 call_rcu(struct rcu_head *head, void (*callback)(struct rcu_head *)) 191 { 192 193 head->rcuh_u.callback = callback; 194 195 mutex_enter(&gc.lock); 196 head->rcuh_next = gc.first_callback; 197 gc.first_callback = head; 198 cv_broadcast(&gc.cv); 199 SDT_PROBE2(sdt, linux, rcu, call__queue, head, callback); 200 mutex_exit(&gc.lock); 201 } 202 203 /* 204 * _kfree_rcu(head, obj) 205 * 206 * kfree_rcu helper: schedule kfree(obj) using head for storage. 207 */ 208 void 209 _kfree_rcu(struct rcu_head *head, void *obj) 210 { 211 212 LOCKDEBUG_MEM_CHECK(obj, ((struct linux_malloc *)obj - 1)->lm_size); 213 214 head->rcuh_u.obj = obj; 215 216 mutex_enter(&gc.lock); 217 head->rcuh_next = gc.first_kfree; 218 gc.first_kfree = head; 219 cv_broadcast(&gc.cv); 220 SDT_PROBE2(sdt, linux, rcu, kfree__queue, head, obj); 221 mutex_exit(&gc.lock); 222 } 223 224 static void 225 gc_thread(void *cookie) 226 { 227 struct rcu_head *head_callback, *head_kfree, *head, *next; 228 229 mutex_enter(&gc.lock); 230 for (;;) { 231 /* Start with no work. */ 232 bool work = false; 233 234 /* Grab the list of callbacks. */ 235 if ((head_callback = gc.first_callback) != NULL) { 236 gc.first_callback = NULL; 237 work = true; 238 } 239 240 /* Grab the list of objects to kfree. */ 241 if ((head_kfree = gc.first_kfree) != NULL) { 242 gc.first_kfree = NULL; 243 work = true; 244 } 245 246 /* 247 * If no work, then either stop, if we're dying, or 248 * wait for work, if not. 249 */ 250 if (!work) { 251 if (gc.dying) 252 break; 253 cv_wait(&gc.cv, &gc.lock); 254 continue; 255 } 256 257 /* 258 * We have work to do. Drop the lock to do it, and 259 * notify rcu_barrier that we're still doing it. 260 */ 261 gc.running = true; 262 mutex_exit(&gc.lock); 263 264 /* Wait for activity on all CPUs. */ 265 synchronize_rcu(); 266 267 /* Call the callbacks. */ 268 for (head = head_callback; head != NULL; head = next) { 269 void (*callback)(struct rcu_head *) = 270 head->rcuh_u.callback; 271 next = head->rcuh_next; 272 SDT_PROBE2(sdt, linux, rcu, call__run, 273 head, callback); 274 (*callback)(head); 275 /* 276 * Can't dereference head or invoke 277 * callback after this point. 278 */ 279 SDT_PROBE2(sdt, linux, rcu, call__done, 280 head, callback); 281 } 282 283 /* Free the objects to kfree. */ 284 for (head = head_kfree; head != NULL; head = next) { 285 void *obj = head->rcuh_u.obj; 286 next = head->rcuh_next; 287 SDT_PROBE2(sdt, linux, rcu, kfree__free, head, obj); 288 kfree(obj); 289 /* Can't dereference head or obj after this point. */ 290 SDT_PROBE2(sdt, linux, rcu, kfree__done, head, obj); 291 } 292 293 /* Return to the lock. */ 294 mutex_enter(&gc.lock); 295 296 /* Finished a batch of work. Notify rcu_barrier. */ 297 gc.gen++; 298 gc.running = false; 299 cv_broadcast(&gc.cv); 300 301 /* 302 * Limit ourselves to one batch per tick, in an attempt 303 * to make the batches larger. 304 * 305 * XXX We should maybe also limit the size of each 306 * batch. 307 */ 308 (void)kpause("lxrcubat", /*intr*/false, /*timo*/1, &gc.lock); 309 } 310 KASSERT(gc.first_callback == NULL); 311 KASSERT(gc.first_kfree == NULL); 312 mutex_exit(&gc.lock); 313 314 kthread_exit(0); 315 } 316 317 void 318 init_rcu_head(struct rcu_head *head) 319 { 320 } 321 322 void 323 destroy_rcu_head(struct rcu_head *head) 324 { 325 } 326 327 int 328 linux_rcu_gc_init(void) 329 { 330 int error; 331 332 mutex_init(&gc.lock, MUTEX_DEFAULT, IPL_VM); 333 cv_init(&gc.cv, "lnxrcugc"); 334 gc.first_callback = NULL; 335 gc.first_kfree = NULL; 336 gc.gen = 0; 337 gc.dying = false; 338 339 error = kthread_create(PRI_NONE, 340 KTHREAD_MPSAFE|KTHREAD_TS|KTHREAD_MUSTJOIN, NULL, &gc_thread, NULL, 341 &gc.lwp, "lnxrcugc"); 342 if (error) 343 goto fail; 344 345 /* Success! */ 346 return 0; 347 348 fail: cv_destroy(&gc.cv); 349 mutex_destroy(&gc.lock); 350 return error; 351 } 352 353 void 354 linux_rcu_gc_fini(void) 355 { 356 357 mutex_enter(&gc.lock); 358 gc.dying = true; 359 cv_broadcast(&gc.cv); 360 mutex_exit(&gc.lock); 361 362 kthread_join(gc.lwp); 363 gc.lwp = NULL; 364 KASSERT(gc.first_callback == NULL); 365 KASSERT(gc.first_kfree == NULL); 366 cv_destroy(&gc.cv); 367 mutex_destroy(&gc.lock); 368 } 369