xref: /netbsd-src/sys/external/bsd/common/linux/linux_srcu.c (revision 9e5fbd4f7f25d0a357b482f4591196ee62d90ed5)
1*9e5fbd4fSriastradh /*	$NetBSD: linux_srcu.c,v 1.4 2021/12/19 11:49:11 riastradh Exp $	*/
26bbc4920Sriastradh 
36bbc4920Sriastradh /*-
46bbc4920Sriastradh  * Copyright (c) 2018 The NetBSD Foundation, Inc.
56bbc4920Sriastradh  * All rights reserved.
66bbc4920Sriastradh  *
76bbc4920Sriastradh  * This code is derived from software contributed to The NetBSD Foundation
86bbc4920Sriastradh  * by Taylor R. Campbell.
96bbc4920Sriastradh  *
106bbc4920Sriastradh  * Redistribution and use in source and binary forms, with or without
116bbc4920Sriastradh  * modification, are permitted provided that the following conditions
126bbc4920Sriastradh  * are met:
136bbc4920Sriastradh  * 1. Redistributions of source code must retain the above copyright
146bbc4920Sriastradh  *    notice, this list of conditions and the following disclaimer.
156bbc4920Sriastradh  * 2. Redistributions in binary form must reproduce the above copyright
166bbc4920Sriastradh  *    notice, this list of conditions and the following disclaimer in the
176bbc4920Sriastradh  *    documentation and/or other materials provided with the distribution.
186bbc4920Sriastradh  *
196bbc4920Sriastradh  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
206bbc4920Sriastradh  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
216bbc4920Sriastradh  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
226bbc4920Sriastradh  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
236bbc4920Sriastradh  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
246bbc4920Sriastradh  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
256bbc4920Sriastradh  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
266bbc4920Sriastradh  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
276bbc4920Sriastradh  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
286bbc4920Sriastradh  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
296bbc4920Sriastradh  * POSSIBILITY OF SUCH DAMAGE.
306bbc4920Sriastradh  */
316bbc4920Sriastradh 
326bbc4920Sriastradh #include <sys/cdefs.h>
33*9e5fbd4fSriastradh __KERNEL_RCSID(0, "$NetBSD: linux_srcu.c,v 1.4 2021/12/19 11:49:11 riastradh Exp $");
346bbc4920Sriastradh 
356bbc4920Sriastradh /*
366bbc4920Sriastradh  * SRCU: Sleepable RCU
376bbc4920Sriastradh  *
386bbc4920Sriastradh  *	(This is not exactly SRCU as Linux implements it; it is my
396bbc4920Sriastradh  *	approximation of the semantics I think we need.)
406bbc4920Sriastradh  *
416bbc4920Sriastradh  *	For each srcu context, representing a related set of read
426bbc4920Sriastradh  *	sections, on each CPU we store two counts of numbers of
436bbc4920Sriastradh  *	readers in two epochs: active readers and draining readers.
446bbc4920Sriastradh  *
456bbc4920Sriastradh  *	All new srcu read sections get counted in the active epoch.
466bbc4920Sriastradh  *	When there's no synchronize_srcu in progress, the draining
476bbc4920Sriastradh  *	epoch has zero readers.  When a thread calls synchronize_srcu,
486bbc4920Sriastradh  *	which must be serialized by the caller, it it swaps the sense
496bbc4920Sriastradh  *	of the epochs, issues an xcall to collect a global count of the
506bbc4920Sriastradh  *	number of readers in the now-draining epoch, and waits for the
516bbc4920Sriastradh  *	remainder to complete.
526bbc4920Sriastradh  *
536bbc4920Sriastradh  *	This is basically NetBSD localcount(9), but without the
546bbc4920Sriastradh  *	restriction that the caller of localcount_drain must guarantee
556bbc4920Sriastradh  *	no new readers -- srcu uses two counts per CPU instead of one
566bbc4920Sriastradh  *	like localcount(9), and synchronize_srcu just waits for all
576bbc4920Sriastradh  *	existing readers to drain while new oness count toward a new
586bbc4920Sriastradh  *	epoch.
596bbc4920Sriastradh  */
606bbc4920Sriastradh 
616bbc4920Sriastradh #include <sys/types.h>
626bbc4920Sriastradh #include <sys/condvar.h>
636bbc4920Sriastradh #include <sys/mutex.h>
646bbc4920Sriastradh #include <sys/percpu.h>
656bbc4920Sriastradh #include <sys/proc.h>
666bbc4920Sriastradh #include <sys/systm.h>
676bbc4920Sriastradh #include <sys/xcall.h>
686bbc4920Sriastradh 
696bbc4920Sriastradh #include <linux/srcu.h>
706bbc4920Sriastradh 
716bbc4920Sriastradh struct srcu_cpu {
726bbc4920Sriastradh 	int64_t	src_count[2];
736bbc4920Sriastradh };
746bbc4920Sriastradh 
756bbc4920Sriastradh /*
76*9e5fbd4fSriastradh  * _init_srcu_struct(srcu, name)
776bbc4920Sriastradh  *
786bbc4920Sriastradh  *	Initialize the srcu state with the specified name.  Caller must
796bbc4920Sriastradh  *	call srcu_fini when done.
806bbc4920Sriastradh  *
816bbc4920Sriastradh  *	name should be no longer than 8 characters; longer will be
826bbc4920Sriastradh  *	truncated.
836bbc4920Sriastradh  *
846bbc4920Sriastradh  *	May sleep.
856bbc4920Sriastradh  */
866bbc4920Sriastradh void
_init_srcu_struct(struct srcu_struct * srcu,const char * name)87*9e5fbd4fSriastradh _init_srcu_struct(struct srcu_struct *srcu, const char *name)
886bbc4920Sriastradh {
896bbc4920Sriastradh 
906bbc4920Sriastradh 	ASSERT_SLEEPABLE();
916bbc4920Sriastradh 
926bbc4920Sriastradh 	srcu->srcu_percpu = percpu_alloc(sizeof(struct srcu_cpu));
936bbc4920Sriastradh 	mutex_init(&srcu->srcu_lock, MUTEX_DEFAULT, IPL_VM);
946bbc4920Sriastradh 	cv_init(&srcu->srcu_cv, name);
956bbc4920Sriastradh 	srcu->srcu_sync = NULL;
966bbc4920Sriastradh 	srcu->srcu_total = 0;
976bbc4920Sriastradh 	srcu->srcu_gen = 0;
986bbc4920Sriastradh }
996bbc4920Sriastradh 
1006bbc4920Sriastradh /*
101*9e5fbd4fSriastradh  * cleanup_srcu_struct(srcu)
1026bbc4920Sriastradh  *
1036bbc4920Sriastradh  *	Finalize an srcu state, which must not be in use right now.  If
1046bbc4920Sriastradh  *	any srcu read sections might be active, caller must wait for
1056bbc4920Sriastradh  *	them to complete with synchronize_srcu.
1066bbc4920Sriastradh  *
1076bbc4920Sriastradh  *	May sleep.
1086bbc4920Sriastradh  */
1096bbc4920Sriastradh void
cleanup_srcu_struct(struct srcu_struct * srcu)110*9e5fbd4fSriastradh cleanup_srcu_struct(struct srcu_struct *srcu)
1116bbc4920Sriastradh {
1126bbc4920Sriastradh 
1136bbc4920Sriastradh 	ASSERT_SLEEPABLE();
1146bbc4920Sriastradh 
1156bbc4920Sriastradh 	KASSERTMSG((srcu->srcu_sync == NULL),
116*9e5fbd4fSriastradh 	    "%s in lwp %p while synchronize_srcu running in lwp %p",
117*9e5fbd4fSriastradh 	    __func__, curlwp, srcu->srcu_sync);
1186bbc4920Sriastradh 	cv_destroy(&srcu->srcu_cv);
1196bbc4920Sriastradh 	mutex_destroy(&srcu->srcu_lock);
1206bbc4920Sriastradh 	percpu_free(srcu->srcu_percpu, sizeof(struct srcu_cpu));
1216bbc4920Sriastradh }
1226bbc4920Sriastradh 
1236bbc4920Sriastradh /*
1246bbc4920Sriastradh  * srcu_adjust(srcu, gen, delta)
1256bbc4920Sriastradh  *
1266bbc4920Sriastradh  *	Internal subroutine: Add delta to the local CPU's count of
1276bbc4920Sriastradh  *	readers in the generation gen.
1286bbc4920Sriastradh  *
1296bbc4920Sriastradh  *	Never sleeps.
1306bbc4920Sriastradh  */
1316bbc4920Sriastradh static void
srcu_adjust(struct srcu_struct * srcu,unsigned gen,int delta)13263272d6eSriastradh srcu_adjust(struct srcu_struct *srcu, unsigned gen, int delta)
1336bbc4920Sriastradh {
1346bbc4920Sriastradh 	struct srcu_cpu *cpu;
1356bbc4920Sriastradh 	unsigned epoch = gen & 1; /* active epoch */
1366bbc4920Sriastradh 
1376bbc4920Sriastradh 	cpu = percpu_getref(srcu->srcu_percpu);
1386bbc4920Sriastradh 	cpu->src_count[epoch] += delta;
1396bbc4920Sriastradh 	percpu_putref(srcu->srcu_percpu);
1406bbc4920Sriastradh }
1416bbc4920Sriastradh 
1426bbc4920Sriastradh /*
1436bbc4920Sriastradh  * srcu_read_lock(srcu)
1446bbc4920Sriastradh  *
1456bbc4920Sriastradh  *	Enter an srcu read section and return a ticket for it.  Any
1466bbc4920Sriastradh  *	subsequent synchronize_srcu will wait until this thread calls
1476bbc4920Sriastradh  *	srcu_read_unlock(srcu, ticket).
1486bbc4920Sriastradh  *
1496bbc4920Sriastradh  *	Never sleeps.
1506bbc4920Sriastradh  */
1516bbc4920Sriastradh int
srcu_read_lock(struct srcu_struct * srcu)15263272d6eSriastradh srcu_read_lock(struct srcu_struct *srcu)
1536bbc4920Sriastradh {
1546bbc4920Sriastradh 	unsigned gen;
1556bbc4920Sriastradh 
1566bbc4920Sriastradh 	/*
1576bbc4920Sriastradh 	 * Prevent xcall while we fetch the generation and adjust the
1586bbc4920Sriastradh 	 * count.
1596bbc4920Sriastradh 	 */
1606bbc4920Sriastradh 	kpreempt_disable();
1616bbc4920Sriastradh 	gen = srcu->srcu_gen;
1626bbc4920Sriastradh 	srcu_adjust(srcu, gen, +1);
1636bbc4920Sriastradh 	kpreempt_enable();
1646bbc4920Sriastradh 
1656bbc4920Sriastradh 	/*
1666bbc4920Sriastradh 	 * No stronger, inter-CPU memory barrier is needed: if there is
1676bbc4920Sriastradh 	 * a concurrent synchronize_srcu, it will issue an xcall that
1686bbc4920Sriastradh 	 * functions as a stronger memory barrier.
1696bbc4920Sriastradh 	 */
1706bbc4920Sriastradh 
1716bbc4920Sriastradh 	return gen;
1726bbc4920Sriastradh }
1736bbc4920Sriastradh 
1746bbc4920Sriastradh /*
1756bbc4920Sriastradh  * srcu_read_unlock(srcu, ticket)
1766bbc4920Sriastradh  *
1776bbc4920Sriastradh  *	Exit an srcu read section started with srcu_read_lock returning
1786bbc4920Sriastradh  *	ticket.  If there is a pending synchronize_srcu and we might be
1796bbc4920Sriastradh  *	the last reader, notify it.
1806bbc4920Sriastradh  *
1816bbc4920Sriastradh  *	Never sleeps.
1826bbc4920Sriastradh  */
1836bbc4920Sriastradh void
srcu_read_unlock(struct srcu_struct * srcu,int ticket)18463272d6eSriastradh srcu_read_unlock(struct srcu_struct *srcu, int ticket)
1856bbc4920Sriastradh {
1866bbc4920Sriastradh 	unsigned gen = ticket;
1876bbc4920Sriastradh 
1886bbc4920Sriastradh 	/*
189c25862e8Sriastradh 	 * All side effects have completed on this CPU before we
190c25862e8Sriastradh 	 * disable kpreemption.
1916bbc4920Sriastradh 	 *
1926bbc4920Sriastradh 	 * No stronger, inter-CPU memory barrier is needed: if there is
1936bbc4920Sriastradh 	 * a concurrent synchronize_srcu, it will issue an xcall that
1946bbc4920Sriastradh 	 * functions as a stronger memory barrier.
1956bbc4920Sriastradh 	 */
1966bbc4920Sriastradh 
1976bbc4920Sriastradh 	/*
1986bbc4920Sriastradh 	 * Prevent xcall while we determine whether we need to notify a
1996bbc4920Sriastradh 	 * sync and decrement the count in our generation.
2006bbc4920Sriastradh 	 */
2016bbc4920Sriastradh 	kpreempt_disable();
2026bbc4920Sriastradh 	if (__predict_true(gen == srcu->srcu_gen)) {
2036bbc4920Sriastradh 		/*
2046bbc4920Sriastradh 		 * Fast path: just decrement the local count.  If a
2056bbc4920Sriastradh 		 * sync has begun and incremented gen after we observed
2066bbc4920Sriastradh 		 * it, it will issue an xcall that will run after this
2076bbc4920Sriastradh 		 * kpreempt_disable section to collect our local count.
2086bbc4920Sriastradh 		 */
2096bbc4920Sriastradh 		srcu_adjust(srcu, gen, -1);
2106bbc4920Sriastradh 	} else {
2116bbc4920Sriastradh 		/*
2126bbc4920Sriastradh 		 * Slow path: decrement the total count, and if it goes
2136bbc4920Sriastradh 		 * to zero, notify the sync in progress.  The xcall may
2146bbc4920Sriastradh 		 * have already run, or it may have yet to run; since
2156bbc4920Sriastradh 		 * we can't tell which, we must contribute to the
2166bbc4920Sriastradh 		 * global count, not to our local count.
2176bbc4920Sriastradh 		 */
2186bbc4920Sriastradh 		mutex_spin_enter(&srcu->srcu_lock);
2196bbc4920Sriastradh 		KASSERT(srcu->srcu_sync != NULL);
2206bbc4920Sriastradh 		if (--srcu->srcu_total == 0)
2216bbc4920Sriastradh 			cv_broadcast(&srcu->srcu_cv);
2226bbc4920Sriastradh 		mutex_spin_exit(&srcu->srcu_lock);
2236bbc4920Sriastradh 	}
2246bbc4920Sriastradh 	kpreempt_enable();
2256bbc4920Sriastradh }
2266bbc4920Sriastradh 
2276bbc4920Sriastradh /*
2286bbc4920Sriastradh  * synchronize_srcu_xc(a, b)
2296bbc4920Sriastradh  *
23063272d6eSriastradh  *	Cross-call function for synchronize_srcu: a is the struct srcu_struct
2316bbc4920Sriastradh  *	pointer; b is ignored.  Transfer the local count of srcu
2326bbc4920Sriastradh  *	readers on this CPU in the inactive epoch to the global count
2336bbc4920Sriastradh  *	under the srcu sync lock.
2346bbc4920Sriastradh  */
2356bbc4920Sriastradh static void
synchronize_srcu_xc(void * a,void * b)2366bbc4920Sriastradh synchronize_srcu_xc(void *a, void *b)
2376bbc4920Sriastradh {
23863272d6eSriastradh 	struct srcu_struct *srcu = a;
2396bbc4920Sriastradh 	struct srcu_cpu *cpu;
2406bbc4920Sriastradh 	unsigned gen, epoch;
2416bbc4920Sriastradh 	uint64_t local;
2426bbc4920Sriastradh 
2436bbc4920Sriastradh 	/* Operate under the sync lock.  Blocks preemption as side effect.  */
2446bbc4920Sriastradh 	mutex_spin_enter(&srcu->srcu_lock);
2456bbc4920Sriastradh 
2466bbc4920Sriastradh 	gen = srcu->srcu_gen;	/* active generation */
2476bbc4920Sriastradh 	epoch = 1 ^ (gen & 1);	/* draining epoch */
2486bbc4920Sriastradh 
2496bbc4920Sriastradh 	/* Transfer the local count to the global count.  */
2506bbc4920Sriastradh 	cpu = percpu_getref(srcu->srcu_percpu);
2516bbc4920Sriastradh 	local = cpu->src_count[epoch];
2526bbc4920Sriastradh 	srcu->srcu_total += local;
2536bbc4920Sriastradh 	cpu->src_count[epoch] -= local; /* i.e., cpu->src_count[epoch] = 0 */
2546bbc4920Sriastradh 	KASSERT(cpu->src_count[epoch] == 0);
2556bbc4920Sriastradh 	percpu_putref(srcu->srcu_percpu);
2566bbc4920Sriastradh 
2576bbc4920Sriastradh 	mutex_spin_exit(&srcu->srcu_lock);
2586bbc4920Sriastradh }
2596bbc4920Sriastradh 
2606bbc4920Sriastradh /*
2616bbc4920Sriastradh  * synchronize_srcu(srcu)
2626bbc4920Sriastradh  *
2636bbc4920Sriastradh  *	Wait for all srcu readers on all CPUs that may have begun
2646bbc4920Sriastradh  *	before sychronize_srcu to complete.
2656bbc4920Sriastradh  *
2666bbc4920Sriastradh  *	May sleep.  (Practically guaranteed to sleep!)
2676bbc4920Sriastradh  */
2686bbc4920Sriastradh void
synchronize_srcu(struct srcu_struct * srcu)26963272d6eSriastradh synchronize_srcu(struct srcu_struct *srcu)
2706bbc4920Sriastradh {
2716bbc4920Sriastradh 
2726bbc4920Sriastradh 	ASSERT_SLEEPABLE();
2736bbc4920Sriastradh 
2746bbc4920Sriastradh 	/* Start a sync, and advance the active generation.  */
2756bbc4920Sriastradh 	mutex_spin_enter(&srcu->srcu_lock);
2766bbc4920Sriastradh 	while (srcu->srcu_sync != NULL)
2776bbc4920Sriastradh 		cv_wait(&srcu->srcu_cv, &srcu->srcu_lock);
2786bbc4920Sriastradh 	KASSERT(srcu->srcu_total == 0);
2796bbc4920Sriastradh 	srcu->srcu_sync = curlwp;
2806bbc4920Sriastradh 	srcu->srcu_gen++;
2816bbc4920Sriastradh 	mutex_spin_exit(&srcu->srcu_lock);
2826bbc4920Sriastradh 
2836bbc4920Sriastradh 	/*
2846bbc4920Sriastradh 	 * Wait for all CPUs to witness the change to the active
2856bbc4920Sriastradh 	 * generation, and collect their local counts in the draining
2866bbc4920Sriastradh 	 * epoch into the global count.
2876bbc4920Sriastradh 	 */
2886bbc4920Sriastradh 	xc_wait(xc_broadcast(0, synchronize_srcu_xc, srcu, NULL));
2896bbc4920Sriastradh 
2906bbc4920Sriastradh 	/*
2916bbc4920Sriastradh 	 * Wait for the global count of users in the draining epoch to
2926bbc4920Sriastradh 	 * drain to zero.
2936bbc4920Sriastradh 	 */
2946bbc4920Sriastradh 	mutex_spin_enter(&srcu->srcu_lock);
2956bbc4920Sriastradh 	while (srcu->srcu_total != 0)
2966bbc4920Sriastradh 		cv_wait(&srcu->srcu_cv, &srcu->srcu_lock);
2976bbc4920Sriastradh 	srcu->srcu_sync = NULL;
2986bbc4920Sriastradh 	cv_broadcast(&srcu->srcu_cv);
2996bbc4920Sriastradh 	mutex_spin_exit(&srcu->srcu_lock);
3006bbc4920Sriastradh }
301*9e5fbd4fSriastradh 
302*9e5fbd4fSriastradh /*
303*9e5fbd4fSriastradh  * synchronize_srcu_expedited(srcu)
304*9e5fbd4fSriastradh  *
305*9e5fbd4fSriastradh  *	Wait for all srcu readers on all CPUs that may have begun
306*9e5fbd4fSriastradh  *	before sychronize_srcu to complete.  Try to get an answer
307*9e5fbd4fSriastradh  *	faster than synchronize_srcu, at the cost of more activity
308*9e5fbd4fSriastradh  *	triggered on other CPUs.
309*9e5fbd4fSriastradh  *
310*9e5fbd4fSriastradh  *	May sleep.  (Practically guaranteed to sleep!)
311*9e5fbd4fSriastradh  */
312*9e5fbd4fSriastradh void
synchronize_srcu_expedited(struct srcu_struct * srcu)313*9e5fbd4fSriastradh synchronize_srcu_expedited(struct srcu_struct *srcu)
314*9e5fbd4fSriastradh {
315*9e5fbd4fSriastradh 
316*9e5fbd4fSriastradh 	synchronize_srcu(srcu);
317*9e5fbd4fSriastradh }
318