xref: /openbsd-src/sys/kern/kern_smr.c (revision cf31dfdee0cd9bc598be108ae94f2bdefce0a488)
1*cf31dfdeSmpi /*	$OpenBSD: kern_smr.c,v 1.17 2024/07/08 14:46:47 mpi Exp $	*/
2f2396460Svisa 
3f2396460Svisa /*
483695439Svisa  * Copyright (c) 2019-2020 Visa Hankala
5f2396460Svisa  *
6f2396460Svisa  * Permission to use, copy, modify, and distribute this software for any
7f2396460Svisa  * purpose with or without fee is hereby granted, provided that the above
8f2396460Svisa  * copyright notice and this permission notice appear in all copies.
9f2396460Svisa  *
10f2396460Svisa  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11f2396460Svisa  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12f2396460Svisa  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13f2396460Svisa  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14f2396460Svisa  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15f2396460Svisa  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16f2396460Svisa  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17f2396460Svisa  */
18f2396460Svisa 
19f2396460Svisa #include <sys/param.h>
20f2396460Svisa #include <sys/systm.h>
21f2396460Svisa #include <sys/kthread.h>
22f2396460Svisa #include <sys/mutex.h>
23f2396460Svisa #include <sys/percpu.h>
24f2396460Svisa #include <sys/proc.h>
25f2396460Svisa #include <sys/smr.h>
26f2396460Svisa #include <sys/time.h>
277720a192Sclaudio #include <sys/tracepoint.h>
285c8bc909Svisa #include <sys/witness.h>
29f2396460Svisa 
30f2396460Svisa #include <machine/cpu.h>
31f2396460Svisa 
32f2396460Svisa #define SMR_PAUSE	100		/* pause between rounds in msec */
33f2396460Svisa 
34f2396460Svisa void	smr_dispatch(struct schedstate_percpu *);
35f2396460Svisa void	smr_grace_wait(void);
36f2396460Svisa void	smr_thread(void *);
37f2396460Svisa void	smr_wakeup(void *);
38f2396460Svisa 
39f2396460Svisa struct mutex		smr_lock = MUTEX_INITIALIZER(IPL_HIGH);
40f2396460Svisa struct smr_entry_list	smr_deferred;
41f2396460Svisa struct timeout		smr_wakeup_tmo;
42f2396460Svisa unsigned int		smr_expedite;
43f2396460Svisa unsigned int		smr_ndeferred;
4483695439Svisa unsigned char		smr_grace_period;
45f2396460Svisa 
46aa45e4b6Svisa #ifdef WITNESS
47aa45e4b6Svisa static const char smr_lock_name[] = "smr";
48aa45e4b6Svisa struct lock_object smr_lock_obj = {
49aa45e4b6Svisa 	.lo_name = smr_lock_name,
50aa45e4b6Svisa 	.lo_flags = LO_WITNESS | LO_INITIALIZED | LO_SLEEPABLE |
51aa45e4b6Svisa 	    (LO_CLASS_RWLOCK << LO_CLASSSHIFT)
52aa45e4b6Svisa };
53aa45e4b6Svisa struct lock_type smr_lock_type = {
54aa45e4b6Svisa 	.lt_name = smr_lock_name
55aa45e4b6Svisa };
56aa45e4b6Svisa #endif
57aa45e4b6Svisa 
58f2396460Svisa static inline int
smr_cpu_is_idle(struct cpu_info * ci)59f2396460Svisa smr_cpu_is_idle(struct cpu_info *ci)
60f2396460Svisa {
61f2396460Svisa 	return ci->ci_curproc == ci->ci_schedstate.spc_idleproc;
62f2396460Svisa }
63f2396460Svisa 
64f2396460Svisa void
smr_startup(void)65f2396460Svisa smr_startup(void)
66f2396460Svisa {
67f2396460Svisa 	SIMPLEQ_INIT(&smr_deferred);
68aa45e4b6Svisa 	WITNESS_INIT(&smr_lock_obj, &smr_lock_type);
691ab6845cSvisa 	timeout_set(&smr_wakeup_tmo, smr_wakeup, NULL);
70f2396460Svisa }
71f2396460Svisa 
72f2396460Svisa void
smr_startup_thread(void)731ab6845cSvisa smr_startup_thread(void)
74f2396460Svisa {
75f2396460Svisa 	if (kthread_create(smr_thread, NULL, NULL, "smr") != 0)
76f2396460Svisa 		panic("could not create smr thread");
77f2396460Svisa }
78f2396460Svisa 
79f2396460Svisa struct timeval smr_logintvl = { 300, 0 };
80f2396460Svisa 
81f2396460Svisa void
smr_thread(void * arg)82f2396460Svisa smr_thread(void *arg)
83f2396460Svisa {
84f2396460Svisa 	struct timeval elapsed, end, loglast, start;
85f2396460Svisa 	struct smr_entry_list deferred;
86f2396460Svisa 	struct smr_entry *smr;
876bbcc068Svisa 	unsigned long count;
88f2396460Svisa 
89f2396460Svisa 	KERNEL_ASSERT_LOCKED();
90f2396460Svisa 	KERNEL_UNLOCK();
91f2396460Svisa 
92f2396460Svisa 	memset(&loglast, 0, sizeof(loglast));
93f2396460Svisa 	SIMPLEQ_INIT(&deferred);
94f2396460Svisa 
95f2396460Svisa 	for (;;) {
96f2396460Svisa 		mtx_enter(&smr_lock);
97f2396460Svisa 		if (smr_ndeferred == 0) {
98f2396460Svisa 			while (smr_ndeferred == 0)
9982fff5faSjsg 				msleep_nsec(&smr_ndeferred, &smr_lock, PVM,
10082fff5faSjsg 				    "bored", INFSLP);
101f2396460Svisa 		} else {
102f2396460Svisa 			if (smr_expedite == 0)
1034bc97b15Scheloha 				msleep_nsec(&smr_ndeferred, &smr_lock, PVM,
1044bc97b15Scheloha 				    "pause", MSEC_TO_NSEC(SMR_PAUSE));
105f2396460Svisa 		}
106f2396460Svisa 
107f2396460Svisa 		SIMPLEQ_CONCAT(&deferred, &smr_deferred);
108f2396460Svisa 		smr_ndeferred = 0;
109f2396460Svisa 		smr_expedite = 0;
110f2396460Svisa 		mtx_leave(&smr_lock);
111f2396460Svisa 
112f2396460Svisa 		getmicrouptime(&start);
113f2396460Svisa 
114f2396460Svisa 		smr_grace_wait();
115f2396460Svisa 
116aa45e4b6Svisa 		WITNESS_CHECKORDER(&smr_lock_obj, LOP_NEWORDER, NULL);
117aa45e4b6Svisa 		WITNESS_LOCK(&smr_lock_obj, 0);
118aa45e4b6Svisa 
1197720a192Sclaudio 		count = 0;
120f2396460Svisa 		while ((smr = SIMPLEQ_FIRST(&deferred)) != NULL) {
121f2396460Svisa 			SIMPLEQ_REMOVE_HEAD(&deferred, smr_list);
1227720a192Sclaudio 			TRACEPOINT(smr, called, smr->smr_func, smr->smr_arg);
123f2396460Svisa 			smr->smr_func(smr->smr_arg);
1247720a192Sclaudio 			count++;
125f2396460Svisa 		}
126f2396460Svisa 
127aa45e4b6Svisa 		WITNESS_UNLOCK(&smr_lock_obj, 0);
128aa45e4b6Svisa 
129f2396460Svisa 		getmicrouptime(&end);
130f2396460Svisa 		timersub(&end, &start, &elapsed);
1315c8bc909Svisa 		if (elapsed.tv_sec >= 2 &&
1325c8bc909Svisa 		    ratecheck(&loglast, &smr_logintvl)) {
1335c8bc909Svisa 			printf("smr: dispatch took %ld.%06lds\n",
1345c8bc909Svisa 			    (long)elapsed.tv_sec,
1355c8bc909Svisa 			    (long)elapsed.tv_usec);
1365c8bc909Svisa 		}
1375c8bc909Svisa 		TRACEPOINT(smr, thread, TIMEVAL_TO_NSEC(&elapsed), count);
138f2396460Svisa 	}
139f2396460Svisa }
140f2396460Svisa 
141f2396460Svisa /*
14283695439Svisa  * Announce next grace period and wait until all CPUs have entered it
14383695439Svisa  * by crossing quiescent state.
144f2396460Svisa  */
145f2396460Svisa void
smr_grace_wait(void)146f2396460Svisa smr_grace_wait(void)
147f2396460Svisa {
148f2396460Svisa #ifdef MULTIPROCESSOR
149f2396460Svisa 	CPU_INFO_ITERATOR cii;
15083695439Svisa 	struct cpu_info *ci;
15183695439Svisa 	unsigned char smrgp;
152f2396460Svisa 
15383695439Svisa 	smrgp = READ_ONCE(smr_grace_period) + 1;
15483695439Svisa 	WRITE_ONCE(smr_grace_period, smrgp);
15583695439Svisa 
15683695439Svisa 	curcpu()->ci_schedstate.spc_smrgp = smrgp;
15783695439Svisa 
158f2396460Svisa 	CPU_INFO_FOREACH(cii, ci) {
159d73de46fSkettenis 		if (!CPU_IS_RUNNING(ci))
160d73de46fSkettenis 			continue;
16183695439Svisa 		if (READ_ONCE(ci->ci_schedstate.spc_smrgp) == smrgp)
162f2396460Svisa 			continue;
163f2396460Svisa 		sched_peg_curproc(ci);
16483695439Svisa 		KASSERT(ci->ci_schedstate.spc_smrgp == smrgp);
165f2396460Svisa 	}
166*cf31dfdeSmpi 	sched_unpeg_curproc();
167f2396460Svisa #endif /* MULTIPROCESSOR */
168f2396460Svisa }
169f2396460Svisa 
170f2396460Svisa void
smr_wakeup(void * arg)171f2396460Svisa smr_wakeup(void *arg)
172f2396460Svisa {
1737720a192Sclaudio 	TRACEPOINT(smr, wakeup, NULL);
174f2396460Svisa 	wakeup(&smr_ndeferred);
175f2396460Svisa }
176f2396460Svisa 
177f2396460Svisa void
smr_read_enter(void)178f2396460Svisa smr_read_enter(void)
179f2396460Svisa {
180f2396460Svisa #ifdef DIAGNOSTIC
1815266b40fSvisa 	struct schedstate_percpu *spc = &curcpu()->ci_schedstate;
1825266b40fSvisa 
183f2396460Svisa 	spc->spc_smrdepth++;
184f2396460Svisa #endif
185f2396460Svisa }
186f2396460Svisa 
187f2396460Svisa void
smr_read_leave(void)188f2396460Svisa smr_read_leave(void)
189f2396460Svisa {
190f2396460Svisa #ifdef DIAGNOSTIC
191f2396460Svisa 	struct schedstate_percpu *spc = &curcpu()->ci_schedstate;
192f2396460Svisa 
193f2396460Svisa 	KASSERT(spc->spc_smrdepth > 0);
194f2396460Svisa 	spc->spc_smrdepth--;
195f2396460Svisa #endif
196f2396460Svisa }
197f2396460Svisa 
198f2396460Svisa /*
199f2396460Svisa  * Move SMR entries from the local queue to the system-wide queue.
200f2396460Svisa  */
201f2396460Svisa void
smr_dispatch(struct schedstate_percpu * spc)202f2396460Svisa smr_dispatch(struct schedstate_percpu *spc)
203f2396460Svisa {
204f2396460Svisa 	int expedite = 0, wake = 0;
205f2396460Svisa 
206f2396460Svisa 	mtx_enter(&smr_lock);
207f2396460Svisa 	if (smr_ndeferred == 0)
208f2396460Svisa 		wake = 1;
209f2396460Svisa 	SIMPLEQ_CONCAT(&smr_deferred, &spc->spc_deferred);
210f2396460Svisa 	smr_ndeferred += spc->spc_ndeferred;
211f2396460Svisa 	spc->spc_ndeferred = 0;
212f2396460Svisa 	smr_expedite |= spc->spc_smrexpedite;
213f2396460Svisa 	spc->spc_smrexpedite = 0;
214f2396460Svisa 	expedite = smr_expedite;
215f2396460Svisa 	mtx_leave(&smr_lock);
216f2396460Svisa 
217f2396460Svisa 	if (expedite)
2187720a192Sclaudio 		smr_wakeup(NULL);
219f2396460Svisa 	else if (wake)
220f2396460Svisa 		timeout_add_msec(&smr_wakeup_tmo, SMR_PAUSE);
221f2396460Svisa }
222f2396460Svisa 
223f2396460Svisa /*
224f2396460Svisa  * Signal that the current CPU is in quiescent state.
225f2396460Svisa  */
226f2396460Svisa void
smr_idle(void)227f2396460Svisa smr_idle(void)
228f2396460Svisa {
229f2396460Svisa 	struct schedstate_percpu *spc = &curcpu()->ci_schedstate;
23083695439Svisa 	unsigned char smrgp;
231f2396460Svisa 
232f2396460Svisa 	SMR_ASSERT_NONCRITICAL();
233f2396460Svisa 
234f2396460Svisa 	if (spc->spc_ndeferred > 0)
235f2396460Svisa 		smr_dispatch(spc);
23683695439Svisa 
23783695439Svisa 	/*
23883695439Svisa 	 * Update this CPU's view of the system's grace period.
23983695439Svisa 	 * The update must become visible after any preceding reads
24083695439Svisa 	 * of SMR-protected data.
24183695439Svisa 	 */
24283695439Svisa 	smrgp = READ_ONCE(smr_grace_period);
24383695439Svisa 	if (__predict_false(spc->spc_smrgp != smrgp)) {
24483695439Svisa 		membar_exit();
24583695439Svisa 		WRITE_ONCE(spc->spc_smrgp, smrgp);
24683695439Svisa 	}
247f2396460Svisa }
248f2396460Svisa 
249f2396460Svisa void
smr_call_impl(struct smr_entry * smr,void (* func)(void *),void * arg,int expedite)250f2396460Svisa smr_call_impl(struct smr_entry *smr, void (*func)(void *), void *arg,
251f2396460Svisa     int expedite)
252f2396460Svisa {
253f2396460Svisa 	struct cpu_info *ci = curcpu();
254f2396460Svisa 	struct schedstate_percpu *spc = &ci->ci_schedstate;
255f2396460Svisa 	int s;
256f2396460Svisa 
257f2396460Svisa 	KASSERT(smr->smr_func == NULL);
258f2396460Svisa 
259f2396460Svisa 	smr->smr_func = func;
260f2396460Svisa 	smr->smr_arg = arg;
261f2396460Svisa 
262f2396460Svisa 	s = splhigh();
263f2396460Svisa 	SIMPLEQ_INSERT_TAIL(&spc->spc_deferred, smr, smr_list);
264f2396460Svisa 	spc->spc_ndeferred++;
265f2396460Svisa 	spc->spc_smrexpedite |= expedite;
266f2396460Svisa 	splx(s);
2677720a192Sclaudio 	TRACEPOINT(smr, call, func, arg, expedite);
268f2396460Svisa 
269f2396460Svisa 	/*
270f2396460Svisa 	 * If this call was made from an interrupt context that
271f2396460Svisa 	 * preempted idle state, dispatch the local queue to the shared
272f2396460Svisa 	 * queue immediately.
273f2396460Svisa 	 * The entries would linger in the local queue long if the CPU
274f2396460Svisa 	 * went to sleep without calling smr_idle().
275f2396460Svisa 	 */
276f2396460Svisa 	if (smr_cpu_is_idle(ci))
277f2396460Svisa 		smr_dispatch(spc);
278f2396460Svisa }
279f2396460Svisa 
280f2396460Svisa void
smr_barrier_func(void * arg)281f2396460Svisa smr_barrier_func(void *arg)
282f2396460Svisa {
283f2396460Svisa 	struct cond *c = arg;
284f2396460Svisa 
285f2396460Svisa 	cond_signal(c);
286f2396460Svisa }
287f2396460Svisa 
288f2396460Svisa void
smr_barrier_impl(int expedite)289f2396460Svisa smr_barrier_impl(int expedite)
290f2396460Svisa {
291f2396460Svisa 	struct cond c = COND_INITIALIZER();
292f2396460Svisa 	struct smr_entry smr;
293f2396460Svisa 
294f2396460Svisa 	if (panicstr != NULL || db_active)
295f2396460Svisa 		return;
296f2396460Svisa 
297aa45e4b6Svisa 	WITNESS_CHECKORDER(&smr_lock_obj, LOP_NEWORDER, NULL);
298aa45e4b6Svisa 
2997720a192Sclaudio 	TRACEPOINT(smr, barrier_enter, expedite);
300f2396460Svisa 	smr_init(&smr);
301f2396460Svisa 	smr_call_impl(&smr, smr_barrier_func, &c, expedite);
302f2396460Svisa 	cond_wait(&c, "smrbar");
3037720a192Sclaudio 	TRACEPOINT(smr, barrier_exit, expedite);
304f2396460Svisa }
305