1*9e5fbd4fSriastradh /* $NetBSD: linux_srcu.c,v 1.4 2021/12/19 11:49:11 riastradh Exp $ */
26bbc4920Sriastradh
36bbc4920Sriastradh /*-
46bbc4920Sriastradh * Copyright (c) 2018 The NetBSD Foundation, Inc.
56bbc4920Sriastradh * All rights reserved.
66bbc4920Sriastradh *
76bbc4920Sriastradh * This code is derived from software contributed to The NetBSD Foundation
86bbc4920Sriastradh * by Taylor R. Campbell.
96bbc4920Sriastradh *
106bbc4920Sriastradh * Redistribution and use in source and binary forms, with or without
116bbc4920Sriastradh * modification, are permitted provided that the following conditions
126bbc4920Sriastradh * are met:
136bbc4920Sriastradh * 1. Redistributions of source code must retain the above copyright
146bbc4920Sriastradh * notice, this list of conditions and the following disclaimer.
156bbc4920Sriastradh * 2. Redistributions in binary form must reproduce the above copyright
166bbc4920Sriastradh * notice, this list of conditions and the following disclaimer in the
176bbc4920Sriastradh * documentation and/or other materials provided with the distribution.
186bbc4920Sriastradh *
196bbc4920Sriastradh * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
206bbc4920Sriastradh * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
216bbc4920Sriastradh * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
226bbc4920Sriastradh * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
236bbc4920Sriastradh * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
246bbc4920Sriastradh * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
256bbc4920Sriastradh * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
266bbc4920Sriastradh * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
276bbc4920Sriastradh * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
286bbc4920Sriastradh * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
296bbc4920Sriastradh * POSSIBILITY OF SUCH DAMAGE.
306bbc4920Sriastradh */
316bbc4920Sriastradh
326bbc4920Sriastradh #include <sys/cdefs.h>
33*9e5fbd4fSriastradh __KERNEL_RCSID(0, "$NetBSD: linux_srcu.c,v 1.4 2021/12/19 11:49:11 riastradh Exp $");
346bbc4920Sriastradh
356bbc4920Sriastradh /*
366bbc4920Sriastradh * SRCU: Sleepable RCU
376bbc4920Sriastradh *
386bbc4920Sriastradh * (This is not exactly SRCU as Linux implements it; it is my
396bbc4920Sriastradh * approximation of the semantics I think we need.)
406bbc4920Sriastradh *
416bbc4920Sriastradh * For each srcu context, representing a related set of read
426bbc4920Sriastradh * sections, on each CPU we store two counts of numbers of
436bbc4920Sriastradh * readers in two epochs: active readers and draining readers.
446bbc4920Sriastradh *
456bbc4920Sriastradh * All new srcu read sections get counted in the active epoch.
466bbc4920Sriastradh * When there's no synchronize_srcu in progress, the draining
476bbc4920Sriastradh * epoch has zero readers. When a thread calls synchronize_srcu,
486bbc4920Sriastradh * which must be serialized by the caller, it it swaps the sense
496bbc4920Sriastradh * of the epochs, issues an xcall to collect a global count of the
506bbc4920Sriastradh * number of readers in the now-draining epoch, and waits for the
516bbc4920Sriastradh * remainder to complete.
526bbc4920Sriastradh *
536bbc4920Sriastradh * This is basically NetBSD localcount(9), but without the
546bbc4920Sriastradh * restriction that the caller of localcount_drain must guarantee
556bbc4920Sriastradh * no new readers -- srcu uses two counts per CPU instead of one
566bbc4920Sriastradh * like localcount(9), and synchronize_srcu just waits for all
576bbc4920Sriastradh * existing readers to drain while new oness count toward a new
586bbc4920Sriastradh * epoch.
596bbc4920Sriastradh */
606bbc4920Sriastradh
616bbc4920Sriastradh #include <sys/types.h>
626bbc4920Sriastradh #include <sys/condvar.h>
636bbc4920Sriastradh #include <sys/mutex.h>
646bbc4920Sriastradh #include <sys/percpu.h>
656bbc4920Sriastradh #include <sys/proc.h>
666bbc4920Sriastradh #include <sys/systm.h>
676bbc4920Sriastradh #include <sys/xcall.h>
686bbc4920Sriastradh
696bbc4920Sriastradh #include <linux/srcu.h>
706bbc4920Sriastradh
716bbc4920Sriastradh struct srcu_cpu {
726bbc4920Sriastradh int64_t src_count[2];
736bbc4920Sriastradh };
746bbc4920Sriastradh
756bbc4920Sriastradh /*
76*9e5fbd4fSriastradh * _init_srcu_struct(srcu, name)
776bbc4920Sriastradh *
786bbc4920Sriastradh * Initialize the srcu state with the specified name. Caller must
796bbc4920Sriastradh * call srcu_fini when done.
806bbc4920Sriastradh *
816bbc4920Sriastradh * name should be no longer than 8 characters; longer will be
826bbc4920Sriastradh * truncated.
836bbc4920Sriastradh *
846bbc4920Sriastradh * May sleep.
856bbc4920Sriastradh */
866bbc4920Sriastradh void
_init_srcu_struct(struct srcu_struct * srcu,const char * name)87*9e5fbd4fSriastradh _init_srcu_struct(struct srcu_struct *srcu, const char *name)
886bbc4920Sriastradh {
896bbc4920Sriastradh
906bbc4920Sriastradh ASSERT_SLEEPABLE();
916bbc4920Sriastradh
926bbc4920Sriastradh srcu->srcu_percpu = percpu_alloc(sizeof(struct srcu_cpu));
936bbc4920Sriastradh mutex_init(&srcu->srcu_lock, MUTEX_DEFAULT, IPL_VM);
946bbc4920Sriastradh cv_init(&srcu->srcu_cv, name);
956bbc4920Sriastradh srcu->srcu_sync = NULL;
966bbc4920Sriastradh srcu->srcu_total = 0;
976bbc4920Sriastradh srcu->srcu_gen = 0;
986bbc4920Sriastradh }
996bbc4920Sriastradh
1006bbc4920Sriastradh /*
101*9e5fbd4fSriastradh * cleanup_srcu_struct(srcu)
1026bbc4920Sriastradh *
1036bbc4920Sriastradh * Finalize an srcu state, which must not be in use right now. If
1046bbc4920Sriastradh * any srcu read sections might be active, caller must wait for
1056bbc4920Sriastradh * them to complete with synchronize_srcu.
1066bbc4920Sriastradh *
1076bbc4920Sriastradh * May sleep.
1086bbc4920Sriastradh */
1096bbc4920Sriastradh void
cleanup_srcu_struct(struct srcu_struct * srcu)110*9e5fbd4fSriastradh cleanup_srcu_struct(struct srcu_struct *srcu)
1116bbc4920Sriastradh {
1126bbc4920Sriastradh
1136bbc4920Sriastradh ASSERT_SLEEPABLE();
1146bbc4920Sriastradh
1156bbc4920Sriastradh KASSERTMSG((srcu->srcu_sync == NULL),
116*9e5fbd4fSriastradh "%s in lwp %p while synchronize_srcu running in lwp %p",
117*9e5fbd4fSriastradh __func__, curlwp, srcu->srcu_sync);
1186bbc4920Sriastradh cv_destroy(&srcu->srcu_cv);
1196bbc4920Sriastradh mutex_destroy(&srcu->srcu_lock);
1206bbc4920Sriastradh percpu_free(srcu->srcu_percpu, sizeof(struct srcu_cpu));
1216bbc4920Sriastradh }
1226bbc4920Sriastradh
1236bbc4920Sriastradh /*
1246bbc4920Sriastradh * srcu_adjust(srcu, gen, delta)
1256bbc4920Sriastradh *
1266bbc4920Sriastradh * Internal subroutine: Add delta to the local CPU's count of
1276bbc4920Sriastradh * readers in the generation gen.
1286bbc4920Sriastradh *
1296bbc4920Sriastradh * Never sleeps.
1306bbc4920Sriastradh */
1316bbc4920Sriastradh static void
srcu_adjust(struct srcu_struct * srcu,unsigned gen,int delta)13263272d6eSriastradh srcu_adjust(struct srcu_struct *srcu, unsigned gen, int delta)
1336bbc4920Sriastradh {
1346bbc4920Sriastradh struct srcu_cpu *cpu;
1356bbc4920Sriastradh unsigned epoch = gen & 1; /* active epoch */
1366bbc4920Sriastradh
1376bbc4920Sriastradh cpu = percpu_getref(srcu->srcu_percpu);
1386bbc4920Sriastradh cpu->src_count[epoch] += delta;
1396bbc4920Sriastradh percpu_putref(srcu->srcu_percpu);
1406bbc4920Sriastradh }
1416bbc4920Sriastradh
1426bbc4920Sriastradh /*
1436bbc4920Sriastradh * srcu_read_lock(srcu)
1446bbc4920Sriastradh *
1456bbc4920Sriastradh * Enter an srcu read section and return a ticket for it. Any
1466bbc4920Sriastradh * subsequent synchronize_srcu will wait until this thread calls
1476bbc4920Sriastradh * srcu_read_unlock(srcu, ticket).
1486bbc4920Sriastradh *
1496bbc4920Sriastradh * Never sleeps.
1506bbc4920Sriastradh */
1516bbc4920Sriastradh int
srcu_read_lock(struct srcu_struct * srcu)15263272d6eSriastradh srcu_read_lock(struct srcu_struct *srcu)
1536bbc4920Sriastradh {
1546bbc4920Sriastradh unsigned gen;
1556bbc4920Sriastradh
1566bbc4920Sriastradh /*
1576bbc4920Sriastradh * Prevent xcall while we fetch the generation and adjust the
1586bbc4920Sriastradh * count.
1596bbc4920Sriastradh */
1606bbc4920Sriastradh kpreempt_disable();
1616bbc4920Sriastradh gen = srcu->srcu_gen;
1626bbc4920Sriastradh srcu_adjust(srcu, gen, +1);
1636bbc4920Sriastradh kpreempt_enable();
1646bbc4920Sriastradh
1656bbc4920Sriastradh /*
1666bbc4920Sriastradh * No stronger, inter-CPU memory barrier is needed: if there is
1676bbc4920Sriastradh * a concurrent synchronize_srcu, it will issue an xcall that
1686bbc4920Sriastradh * functions as a stronger memory barrier.
1696bbc4920Sriastradh */
1706bbc4920Sriastradh
1716bbc4920Sriastradh return gen;
1726bbc4920Sriastradh }
1736bbc4920Sriastradh
1746bbc4920Sriastradh /*
1756bbc4920Sriastradh * srcu_read_unlock(srcu, ticket)
1766bbc4920Sriastradh *
1776bbc4920Sriastradh * Exit an srcu read section started with srcu_read_lock returning
1786bbc4920Sriastradh * ticket. If there is a pending synchronize_srcu and we might be
1796bbc4920Sriastradh * the last reader, notify it.
1806bbc4920Sriastradh *
1816bbc4920Sriastradh * Never sleeps.
1826bbc4920Sriastradh */
1836bbc4920Sriastradh void
srcu_read_unlock(struct srcu_struct * srcu,int ticket)18463272d6eSriastradh srcu_read_unlock(struct srcu_struct *srcu, int ticket)
1856bbc4920Sriastradh {
1866bbc4920Sriastradh unsigned gen = ticket;
1876bbc4920Sriastradh
1886bbc4920Sriastradh /*
189c25862e8Sriastradh * All side effects have completed on this CPU before we
190c25862e8Sriastradh * disable kpreemption.
1916bbc4920Sriastradh *
1926bbc4920Sriastradh * No stronger, inter-CPU memory barrier is needed: if there is
1936bbc4920Sriastradh * a concurrent synchronize_srcu, it will issue an xcall that
1946bbc4920Sriastradh * functions as a stronger memory barrier.
1956bbc4920Sriastradh */
1966bbc4920Sriastradh
1976bbc4920Sriastradh /*
1986bbc4920Sriastradh * Prevent xcall while we determine whether we need to notify a
1996bbc4920Sriastradh * sync and decrement the count in our generation.
2006bbc4920Sriastradh */
2016bbc4920Sriastradh kpreempt_disable();
2026bbc4920Sriastradh if (__predict_true(gen == srcu->srcu_gen)) {
2036bbc4920Sriastradh /*
2046bbc4920Sriastradh * Fast path: just decrement the local count. If a
2056bbc4920Sriastradh * sync has begun and incremented gen after we observed
2066bbc4920Sriastradh * it, it will issue an xcall that will run after this
2076bbc4920Sriastradh * kpreempt_disable section to collect our local count.
2086bbc4920Sriastradh */
2096bbc4920Sriastradh srcu_adjust(srcu, gen, -1);
2106bbc4920Sriastradh } else {
2116bbc4920Sriastradh /*
2126bbc4920Sriastradh * Slow path: decrement the total count, and if it goes
2136bbc4920Sriastradh * to zero, notify the sync in progress. The xcall may
2146bbc4920Sriastradh * have already run, or it may have yet to run; since
2156bbc4920Sriastradh * we can't tell which, we must contribute to the
2166bbc4920Sriastradh * global count, not to our local count.
2176bbc4920Sriastradh */
2186bbc4920Sriastradh mutex_spin_enter(&srcu->srcu_lock);
2196bbc4920Sriastradh KASSERT(srcu->srcu_sync != NULL);
2206bbc4920Sriastradh if (--srcu->srcu_total == 0)
2216bbc4920Sriastradh cv_broadcast(&srcu->srcu_cv);
2226bbc4920Sriastradh mutex_spin_exit(&srcu->srcu_lock);
2236bbc4920Sriastradh }
2246bbc4920Sriastradh kpreempt_enable();
2256bbc4920Sriastradh }
2266bbc4920Sriastradh
2276bbc4920Sriastradh /*
2286bbc4920Sriastradh * synchronize_srcu_xc(a, b)
2296bbc4920Sriastradh *
23063272d6eSriastradh * Cross-call function for synchronize_srcu: a is the struct srcu_struct
2316bbc4920Sriastradh * pointer; b is ignored. Transfer the local count of srcu
2326bbc4920Sriastradh * readers on this CPU in the inactive epoch to the global count
2336bbc4920Sriastradh * under the srcu sync lock.
2346bbc4920Sriastradh */
2356bbc4920Sriastradh static void
synchronize_srcu_xc(void * a,void * b)2366bbc4920Sriastradh synchronize_srcu_xc(void *a, void *b)
2376bbc4920Sriastradh {
23863272d6eSriastradh struct srcu_struct *srcu = a;
2396bbc4920Sriastradh struct srcu_cpu *cpu;
2406bbc4920Sriastradh unsigned gen, epoch;
2416bbc4920Sriastradh uint64_t local;
2426bbc4920Sriastradh
2436bbc4920Sriastradh /* Operate under the sync lock. Blocks preemption as side effect. */
2446bbc4920Sriastradh mutex_spin_enter(&srcu->srcu_lock);
2456bbc4920Sriastradh
2466bbc4920Sriastradh gen = srcu->srcu_gen; /* active generation */
2476bbc4920Sriastradh epoch = 1 ^ (gen & 1); /* draining epoch */
2486bbc4920Sriastradh
2496bbc4920Sriastradh /* Transfer the local count to the global count. */
2506bbc4920Sriastradh cpu = percpu_getref(srcu->srcu_percpu);
2516bbc4920Sriastradh local = cpu->src_count[epoch];
2526bbc4920Sriastradh srcu->srcu_total += local;
2536bbc4920Sriastradh cpu->src_count[epoch] -= local; /* i.e., cpu->src_count[epoch] = 0 */
2546bbc4920Sriastradh KASSERT(cpu->src_count[epoch] == 0);
2556bbc4920Sriastradh percpu_putref(srcu->srcu_percpu);
2566bbc4920Sriastradh
2576bbc4920Sriastradh mutex_spin_exit(&srcu->srcu_lock);
2586bbc4920Sriastradh }
2596bbc4920Sriastradh
2606bbc4920Sriastradh /*
2616bbc4920Sriastradh * synchronize_srcu(srcu)
2626bbc4920Sriastradh *
2636bbc4920Sriastradh * Wait for all srcu readers on all CPUs that may have begun
2646bbc4920Sriastradh * before sychronize_srcu to complete.
2656bbc4920Sriastradh *
2666bbc4920Sriastradh * May sleep. (Practically guaranteed to sleep!)
2676bbc4920Sriastradh */
2686bbc4920Sriastradh void
synchronize_srcu(struct srcu_struct * srcu)26963272d6eSriastradh synchronize_srcu(struct srcu_struct *srcu)
2706bbc4920Sriastradh {
2716bbc4920Sriastradh
2726bbc4920Sriastradh ASSERT_SLEEPABLE();
2736bbc4920Sriastradh
2746bbc4920Sriastradh /* Start a sync, and advance the active generation. */
2756bbc4920Sriastradh mutex_spin_enter(&srcu->srcu_lock);
2766bbc4920Sriastradh while (srcu->srcu_sync != NULL)
2776bbc4920Sriastradh cv_wait(&srcu->srcu_cv, &srcu->srcu_lock);
2786bbc4920Sriastradh KASSERT(srcu->srcu_total == 0);
2796bbc4920Sriastradh srcu->srcu_sync = curlwp;
2806bbc4920Sriastradh srcu->srcu_gen++;
2816bbc4920Sriastradh mutex_spin_exit(&srcu->srcu_lock);
2826bbc4920Sriastradh
2836bbc4920Sriastradh /*
2846bbc4920Sriastradh * Wait for all CPUs to witness the change to the active
2856bbc4920Sriastradh * generation, and collect their local counts in the draining
2866bbc4920Sriastradh * epoch into the global count.
2876bbc4920Sriastradh */
2886bbc4920Sriastradh xc_wait(xc_broadcast(0, synchronize_srcu_xc, srcu, NULL));
2896bbc4920Sriastradh
2906bbc4920Sriastradh /*
2916bbc4920Sriastradh * Wait for the global count of users in the draining epoch to
2926bbc4920Sriastradh * drain to zero.
2936bbc4920Sriastradh */
2946bbc4920Sriastradh mutex_spin_enter(&srcu->srcu_lock);
2956bbc4920Sriastradh while (srcu->srcu_total != 0)
2966bbc4920Sriastradh cv_wait(&srcu->srcu_cv, &srcu->srcu_lock);
2976bbc4920Sriastradh srcu->srcu_sync = NULL;
2986bbc4920Sriastradh cv_broadcast(&srcu->srcu_cv);
2996bbc4920Sriastradh mutex_spin_exit(&srcu->srcu_lock);
3006bbc4920Sriastradh }
301*9e5fbd4fSriastradh
302*9e5fbd4fSriastradh /*
303*9e5fbd4fSriastradh * synchronize_srcu_expedited(srcu)
304*9e5fbd4fSriastradh *
305*9e5fbd4fSriastradh * Wait for all srcu readers on all CPUs that may have begun
306*9e5fbd4fSriastradh * before sychronize_srcu to complete. Try to get an answer
307*9e5fbd4fSriastradh * faster than synchronize_srcu, at the cost of more activity
308*9e5fbd4fSriastradh * triggered on other CPUs.
309*9e5fbd4fSriastradh *
310*9e5fbd4fSriastradh * May sleep. (Practically guaranteed to sleep!)
311*9e5fbd4fSriastradh */
312*9e5fbd4fSriastradh void
synchronize_srcu_expedited(struct srcu_struct * srcu)313*9e5fbd4fSriastradh synchronize_srcu_expedited(struct srcu_struct *srcu)
314*9e5fbd4fSriastradh {
315*9e5fbd4fSriastradh
316*9e5fbd4fSriastradh synchronize_srcu(srcu);
317*9e5fbd4fSriastradh }
318