1*cf31dfdeSmpi /* $OpenBSD: kern_smr.c,v 1.17 2024/07/08 14:46:47 mpi Exp $ */
2f2396460Svisa
3f2396460Svisa /*
483695439Svisa * Copyright (c) 2019-2020 Visa Hankala
5f2396460Svisa *
6f2396460Svisa * Permission to use, copy, modify, and distribute this software for any
7f2396460Svisa * purpose with or without fee is hereby granted, provided that the above
8f2396460Svisa * copyright notice and this permission notice appear in all copies.
9f2396460Svisa *
10f2396460Svisa * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11f2396460Svisa * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12f2396460Svisa * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13f2396460Svisa * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14f2396460Svisa * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15f2396460Svisa * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16f2396460Svisa * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17f2396460Svisa */
18f2396460Svisa
19f2396460Svisa #include <sys/param.h>
20f2396460Svisa #include <sys/systm.h>
21f2396460Svisa #include <sys/kthread.h>
22f2396460Svisa #include <sys/mutex.h>
23f2396460Svisa #include <sys/percpu.h>
24f2396460Svisa #include <sys/proc.h>
25f2396460Svisa #include <sys/smr.h>
26f2396460Svisa #include <sys/time.h>
277720a192Sclaudio #include <sys/tracepoint.h>
285c8bc909Svisa #include <sys/witness.h>
29f2396460Svisa
30f2396460Svisa #include <machine/cpu.h>
31f2396460Svisa
32f2396460Svisa #define SMR_PAUSE 100 /* pause between rounds in msec */
33f2396460Svisa
34f2396460Svisa void smr_dispatch(struct schedstate_percpu *);
35f2396460Svisa void smr_grace_wait(void);
36f2396460Svisa void smr_thread(void *);
37f2396460Svisa void smr_wakeup(void *);
38f2396460Svisa
39f2396460Svisa struct mutex smr_lock = MUTEX_INITIALIZER(IPL_HIGH);
40f2396460Svisa struct smr_entry_list smr_deferred;
41f2396460Svisa struct timeout smr_wakeup_tmo;
42f2396460Svisa unsigned int smr_expedite;
43f2396460Svisa unsigned int smr_ndeferred;
4483695439Svisa unsigned char smr_grace_period;
45f2396460Svisa
46aa45e4b6Svisa #ifdef WITNESS
47aa45e4b6Svisa static const char smr_lock_name[] = "smr";
48aa45e4b6Svisa struct lock_object smr_lock_obj = {
49aa45e4b6Svisa .lo_name = smr_lock_name,
50aa45e4b6Svisa .lo_flags = LO_WITNESS | LO_INITIALIZED | LO_SLEEPABLE |
51aa45e4b6Svisa (LO_CLASS_RWLOCK << LO_CLASSSHIFT)
52aa45e4b6Svisa };
53aa45e4b6Svisa struct lock_type smr_lock_type = {
54aa45e4b6Svisa .lt_name = smr_lock_name
55aa45e4b6Svisa };
56aa45e4b6Svisa #endif
57aa45e4b6Svisa
58f2396460Svisa static inline int
smr_cpu_is_idle(struct cpu_info * ci)59f2396460Svisa smr_cpu_is_idle(struct cpu_info *ci)
60f2396460Svisa {
61f2396460Svisa return ci->ci_curproc == ci->ci_schedstate.spc_idleproc;
62f2396460Svisa }
63f2396460Svisa
64f2396460Svisa void
smr_startup(void)65f2396460Svisa smr_startup(void)
66f2396460Svisa {
67f2396460Svisa SIMPLEQ_INIT(&smr_deferred);
68aa45e4b6Svisa WITNESS_INIT(&smr_lock_obj, &smr_lock_type);
691ab6845cSvisa timeout_set(&smr_wakeup_tmo, smr_wakeup, NULL);
70f2396460Svisa }
71f2396460Svisa
72f2396460Svisa void
smr_startup_thread(void)731ab6845cSvisa smr_startup_thread(void)
74f2396460Svisa {
75f2396460Svisa if (kthread_create(smr_thread, NULL, NULL, "smr") != 0)
76f2396460Svisa panic("could not create smr thread");
77f2396460Svisa }
78f2396460Svisa
79f2396460Svisa struct timeval smr_logintvl = { 300, 0 };
80f2396460Svisa
81f2396460Svisa void
smr_thread(void * arg)82f2396460Svisa smr_thread(void *arg)
83f2396460Svisa {
84f2396460Svisa struct timeval elapsed, end, loglast, start;
85f2396460Svisa struct smr_entry_list deferred;
86f2396460Svisa struct smr_entry *smr;
876bbcc068Svisa unsigned long count;
88f2396460Svisa
89f2396460Svisa KERNEL_ASSERT_LOCKED();
90f2396460Svisa KERNEL_UNLOCK();
91f2396460Svisa
92f2396460Svisa memset(&loglast, 0, sizeof(loglast));
93f2396460Svisa SIMPLEQ_INIT(&deferred);
94f2396460Svisa
95f2396460Svisa for (;;) {
96f2396460Svisa mtx_enter(&smr_lock);
97f2396460Svisa if (smr_ndeferred == 0) {
98f2396460Svisa while (smr_ndeferred == 0)
9982fff5faSjsg msleep_nsec(&smr_ndeferred, &smr_lock, PVM,
10082fff5faSjsg "bored", INFSLP);
101f2396460Svisa } else {
102f2396460Svisa if (smr_expedite == 0)
1034bc97b15Scheloha msleep_nsec(&smr_ndeferred, &smr_lock, PVM,
1044bc97b15Scheloha "pause", MSEC_TO_NSEC(SMR_PAUSE));
105f2396460Svisa }
106f2396460Svisa
107f2396460Svisa SIMPLEQ_CONCAT(&deferred, &smr_deferred);
108f2396460Svisa smr_ndeferred = 0;
109f2396460Svisa smr_expedite = 0;
110f2396460Svisa mtx_leave(&smr_lock);
111f2396460Svisa
112f2396460Svisa getmicrouptime(&start);
113f2396460Svisa
114f2396460Svisa smr_grace_wait();
115f2396460Svisa
116aa45e4b6Svisa WITNESS_CHECKORDER(&smr_lock_obj, LOP_NEWORDER, NULL);
117aa45e4b6Svisa WITNESS_LOCK(&smr_lock_obj, 0);
118aa45e4b6Svisa
1197720a192Sclaudio count = 0;
120f2396460Svisa while ((smr = SIMPLEQ_FIRST(&deferred)) != NULL) {
121f2396460Svisa SIMPLEQ_REMOVE_HEAD(&deferred, smr_list);
1227720a192Sclaudio TRACEPOINT(smr, called, smr->smr_func, smr->smr_arg);
123f2396460Svisa smr->smr_func(smr->smr_arg);
1247720a192Sclaudio count++;
125f2396460Svisa }
126f2396460Svisa
127aa45e4b6Svisa WITNESS_UNLOCK(&smr_lock_obj, 0);
128aa45e4b6Svisa
129f2396460Svisa getmicrouptime(&end);
130f2396460Svisa timersub(&end, &start, &elapsed);
1315c8bc909Svisa if (elapsed.tv_sec >= 2 &&
1325c8bc909Svisa ratecheck(&loglast, &smr_logintvl)) {
1335c8bc909Svisa printf("smr: dispatch took %ld.%06lds\n",
1345c8bc909Svisa (long)elapsed.tv_sec,
1355c8bc909Svisa (long)elapsed.tv_usec);
1365c8bc909Svisa }
1375c8bc909Svisa TRACEPOINT(smr, thread, TIMEVAL_TO_NSEC(&elapsed), count);
138f2396460Svisa }
139f2396460Svisa }
140f2396460Svisa
141f2396460Svisa /*
14283695439Svisa * Announce next grace period and wait until all CPUs have entered it
14383695439Svisa * by crossing quiescent state.
144f2396460Svisa */
145f2396460Svisa void
smr_grace_wait(void)146f2396460Svisa smr_grace_wait(void)
147f2396460Svisa {
148f2396460Svisa #ifdef MULTIPROCESSOR
149f2396460Svisa CPU_INFO_ITERATOR cii;
15083695439Svisa struct cpu_info *ci;
15183695439Svisa unsigned char smrgp;
152f2396460Svisa
15383695439Svisa smrgp = READ_ONCE(smr_grace_period) + 1;
15483695439Svisa WRITE_ONCE(smr_grace_period, smrgp);
15583695439Svisa
15683695439Svisa curcpu()->ci_schedstate.spc_smrgp = smrgp;
15783695439Svisa
158f2396460Svisa CPU_INFO_FOREACH(cii, ci) {
159d73de46fSkettenis if (!CPU_IS_RUNNING(ci))
160d73de46fSkettenis continue;
16183695439Svisa if (READ_ONCE(ci->ci_schedstate.spc_smrgp) == smrgp)
162f2396460Svisa continue;
163f2396460Svisa sched_peg_curproc(ci);
16483695439Svisa KASSERT(ci->ci_schedstate.spc_smrgp == smrgp);
165f2396460Svisa }
166*cf31dfdeSmpi sched_unpeg_curproc();
167f2396460Svisa #endif /* MULTIPROCESSOR */
168f2396460Svisa }
169f2396460Svisa
170f2396460Svisa void
smr_wakeup(void * arg)171f2396460Svisa smr_wakeup(void *arg)
172f2396460Svisa {
1737720a192Sclaudio TRACEPOINT(smr, wakeup, NULL);
174f2396460Svisa wakeup(&smr_ndeferred);
175f2396460Svisa }
176f2396460Svisa
177f2396460Svisa void
smr_read_enter(void)178f2396460Svisa smr_read_enter(void)
179f2396460Svisa {
180f2396460Svisa #ifdef DIAGNOSTIC
1815266b40fSvisa struct schedstate_percpu *spc = &curcpu()->ci_schedstate;
1825266b40fSvisa
183f2396460Svisa spc->spc_smrdepth++;
184f2396460Svisa #endif
185f2396460Svisa }
186f2396460Svisa
187f2396460Svisa void
smr_read_leave(void)188f2396460Svisa smr_read_leave(void)
189f2396460Svisa {
190f2396460Svisa #ifdef DIAGNOSTIC
191f2396460Svisa struct schedstate_percpu *spc = &curcpu()->ci_schedstate;
192f2396460Svisa
193f2396460Svisa KASSERT(spc->spc_smrdepth > 0);
194f2396460Svisa spc->spc_smrdepth--;
195f2396460Svisa #endif
196f2396460Svisa }
197f2396460Svisa
198f2396460Svisa /*
199f2396460Svisa * Move SMR entries from the local queue to the system-wide queue.
200f2396460Svisa */
201f2396460Svisa void
smr_dispatch(struct schedstate_percpu * spc)202f2396460Svisa smr_dispatch(struct schedstate_percpu *spc)
203f2396460Svisa {
204f2396460Svisa int expedite = 0, wake = 0;
205f2396460Svisa
206f2396460Svisa mtx_enter(&smr_lock);
207f2396460Svisa if (smr_ndeferred == 0)
208f2396460Svisa wake = 1;
209f2396460Svisa SIMPLEQ_CONCAT(&smr_deferred, &spc->spc_deferred);
210f2396460Svisa smr_ndeferred += spc->spc_ndeferred;
211f2396460Svisa spc->spc_ndeferred = 0;
212f2396460Svisa smr_expedite |= spc->spc_smrexpedite;
213f2396460Svisa spc->spc_smrexpedite = 0;
214f2396460Svisa expedite = smr_expedite;
215f2396460Svisa mtx_leave(&smr_lock);
216f2396460Svisa
217f2396460Svisa if (expedite)
2187720a192Sclaudio smr_wakeup(NULL);
219f2396460Svisa else if (wake)
220f2396460Svisa timeout_add_msec(&smr_wakeup_tmo, SMR_PAUSE);
221f2396460Svisa }
222f2396460Svisa
223f2396460Svisa /*
224f2396460Svisa * Signal that the current CPU is in quiescent state.
225f2396460Svisa */
226f2396460Svisa void
smr_idle(void)227f2396460Svisa smr_idle(void)
228f2396460Svisa {
229f2396460Svisa struct schedstate_percpu *spc = &curcpu()->ci_schedstate;
23083695439Svisa unsigned char smrgp;
231f2396460Svisa
232f2396460Svisa SMR_ASSERT_NONCRITICAL();
233f2396460Svisa
234f2396460Svisa if (spc->spc_ndeferred > 0)
235f2396460Svisa smr_dispatch(spc);
23683695439Svisa
23783695439Svisa /*
23883695439Svisa * Update this CPU's view of the system's grace period.
23983695439Svisa * The update must become visible after any preceding reads
24083695439Svisa * of SMR-protected data.
24183695439Svisa */
24283695439Svisa smrgp = READ_ONCE(smr_grace_period);
24383695439Svisa if (__predict_false(spc->spc_smrgp != smrgp)) {
24483695439Svisa membar_exit();
24583695439Svisa WRITE_ONCE(spc->spc_smrgp, smrgp);
24683695439Svisa }
247f2396460Svisa }
248f2396460Svisa
249f2396460Svisa void
smr_call_impl(struct smr_entry * smr,void (* func)(void *),void * arg,int expedite)250f2396460Svisa smr_call_impl(struct smr_entry *smr, void (*func)(void *), void *arg,
251f2396460Svisa int expedite)
252f2396460Svisa {
253f2396460Svisa struct cpu_info *ci = curcpu();
254f2396460Svisa struct schedstate_percpu *spc = &ci->ci_schedstate;
255f2396460Svisa int s;
256f2396460Svisa
257f2396460Svisa KASSERT(smr->smr_func == NULL);
258f2396460Svisa
259f2396460Svisa smr->smr_func = func;
260f2396460Svisa smr->smr_arg = arg;
261f2396460Svisa
262f2396460Svisa s = splhigh();
263f2396460Svisa SIMPLEQ_INSERT_TAIL(&spc->spc_deferred, smr, smr_list);
264f2396460Svisa spc->spc_ndeferred++;
265f2396460Svisa spc->spc_smrexpedite |= expedite;
266f2396460Svisa splx(s);
2677720a192Sclaudio TRACEPOINT(smr, call, func, arg, expedite);
268f2396460Svisa
269f2396460Svisa /*
270f2396460Svisa * If this call was made from an interrupt context that
271f2396460Svisa * preempted idle state, dispatch the local queue to the shared
272f2396460Svisa * queue immediately.
273f2396460Svisa * The entries would linger in the local queue long if the CPU
274f2396460Svisa * went to sleep without calling smr_idle().
275f2396460Svisa */
276f2396460Svisa if (smr_cpu_is_idle(ci))
277f2396460Svisa smr_dispatch(spc);
278f2396460Svisa }
279f2396460Svisa
280f2396460Svisa void
smr_barrier_func(void * arg)281f2396460Svisa smr_barrier_func(void *arg)
282f2396460Svisa {
283f2396460Svisa struct cond *c = arg;
284f2396460Svisa
285f2396460Svisa cond_signal(c);
286f2396460Svisa }
287f2396460Svisa
288f2396460Svisa void
smr_barrier_impl(int expedite)289f2396460Svisa smr_barrier_impl(int expedite)
290f2396460Svisa {
291f2396460Svisa struct cond c = COND_INITIALIZER();
292f2396460Svisa struct smr_entry smr;
293f2396460Svisa
294f2396460Svisa if (panicstr != NULL || db_active)
295f2396460Svisa return;
296f2396460Svisa
297aa45e4b6Svisa WITNESS_CHECKORDER(&smr_lock_obj, LOP_NEWORDER, NULL);
298aa45e4b6Svisa
2997720a192Sclaudio TRACEPOINT(smr, barrier_enter, expedite);
300f2396460Svisa smr_init(&smr);
301f2396460Svisa smr_call_impl(&smr, smr_barrier_func, &c, expedite);
302f2396460Svisa cond_wait(&c, "smrbar");
3037720a192Sclaudio TRACEPOINT(smr, barrier_exit, expedite);
304f2396460Svisa }
305