xref: /openbsd-src/sys/kern/kern_smr.c (revision 4e1ee0786f11cc571bd0be17d38e46f635c719fc)
1 /*	$OpenBSD: kern_smr.c,v 1.12 2021/07/06 09:34:07 kettenis Exp $	*/
2 
3 /*
4  * Copyright (c) 2019-2020 Visa Hankala
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 
19 #include <sys/param.h>
20 #include <sys/systm.h>
21 #include <sys/kernel.h>
22 #include <sys/kthread.h>
23 #include <sys/mutex.h>
24 #include <sys/percpu.h>
25 #include <sys/proc.h>
26 #include <sys/smr.h>
27 #include <sys/time.h>
28 #include <sys/witness.h>
29 
30 #include <machine/cpu.h>
31 
32 #define SMR_PAUSE	100		/* pause between rounds in msec */
33 
34 void	smr_dispatch(struct schedstate_percpu *);
35 void	smr_grace_wait(void);
36 void	smr_thread(void *);
37 void	smr_wakeup(void *);
38 
39 struct mutex		smr_lock = MUTEX_INITIALIZER(IPL_HIGH);
40 struct smr_entry_list	smr_deferred;
41 struct timeout		smr_wakeup_tmo;
42 unsigned int		smr_expedite;
43 unsigned int		smr_ndeferred;
44 unsigned char		smr_grace_period;
45 
46 #ifdef WITNESS
47 static const char smr_lock_name[] = "smr";
48 struct lock_object smr_lock_obj = {
49 	.lo_name = smr_lock_name,
50 	.lo_flags = LO_WITNESS | LO_INITIALIZED | LO_SLEEPABLE |
51 	    (LO_CLASS_RWLOCK << LO_CLASSSHIFT)
52 };
53 struct lock_type smr_lock_type = {
54 	.lt_name = smr_lock_name
55 };
56 #endif
57 
58 static inline int
59 smr_cpu_is_idle(struct cpu_info *ci)
60 {
61 	return ci->ci_curproc == ci->ci_schedstate.spc_idleproc;
62 }
63 
64 void
65 smr_startup(void)
66 {
67 	SIMPLEQ_INIT(&smr_deferred);
68 	WITNESS_INIT(&smr_lock_obj, &smr_lock_type);
69 	timeout_set(&smr_wakeup_tmo, smr_wakeup, NULL);
70 }
71 
72 void
73 smr_startup_thread(void)
74 {
75 	if (kthread_create(smr_thread, NULL, NULL, "smr") != 0)
76 		panic("could not create smr thread");
77 }
78 
79 struct timeval smr_logintvl = { 300, 0 };
80 
81 void
82 smr_thread(void *arg)
83 {
84 	struct timeval elapsed, end, loglast, start;
85 	struct smr_entry_list deferred;
86 	struct smr_entry *smr;
87 
88 	KERNEL_ASSERT_LOCKED();
89 	KERNEL_UNLOCK();
90 
91 	memset(&loglast, 0, sizeof(loglast));
92 	SIMPLEQ_INIT(&deferred);
93 
94 	for (;;) {
95 		mtx_enter(&smr_lock);
96 		if (smr_ndeferred == 0) {
97 			while (smr_ndeferred == 0)
98 				msleep_nsec(&smr_ndeferred, &smr_lock, PVM,
99 				    "bored", INFSLP);
100 		} else {
101 			if (smr_expedite == 0)
102 				msleep_nsec(&smr_ndeferred, &smr_lock, PVM,
103 				    "pause", MSEC_TO_NSEC(SMR_PAUSE));
104 		}
105 
106 		SIMPLEQ_CONCAT(&deferred, &smr_deferred);
107 		smr_ndeferred = 0;
108 		smr_expedite = 0;
109 		mtx_leave(&smr_lock);
110 
111 		getmicrouptime(&start);
112 
113 		smr_grace_wait();
114 
115 		WITNESS_CHECKORDER(&smr_lock_obj, LOP_NEWORDER, NULL);
116 		WITNESS_LOCK(&smr_lock_obj, 0);
117 
118 		while ((smr = SIMPLEQ_FIRST(&deferred)) != NULL) {
119 			SIMPLEQ_REMOVE_HEAD(&deferred, smr_list);
120 			smr->smr_func(smr->smr_arg);
121 		}
122 
123 		WITNESS_UNLOCK(&smr_lock_obj, 0);
124 
125 		getmicrouptime(&end);
126 		timersub(&end, &start, &elapsed);
127 		if (elapsed.tv_sec >= 5 &&
128 		    ratecheck(&loglast, &smr_logintvl))
129 			printf("smr: dispatch took %ld seconds\n",
130 			    (long)elapsed.tv_sec);
131 	}
132 }
133 
134 /*
135  * Announce next grace period and wait until all CPUs have entered it
136  * by crossing quiescent state.
137  */
138 void
139 smr_grace_wait(void)
140 {
141 #ifdef MULTIPROCESSOR
142 	CPU_INFO_ITERATOR cii;
143 	struct cpu_info *ci;
144 	unsigned char smrgp;
145 
146 	smrgp = READ_ONCE(smr_grace_period) + 1;
147 	WRITE_ONCE(smr_grace_period, smrgp);
148 
149 	curcpu()->ci_schedstate.spc_smrgp = smrgp;
150 
151 	CPU_INFO_FOREACH(cii, ci) {
152 		if (!CPU_IS_RUNNING(ci))
153 			continue;
154 		if (READ_ONCE(ci->ci_schedstate.spc_smrgp) == smrgp)
155 			continue;
156 		sched_peg_curproc(ci);
157 		KASSERT(ci->ci_schedstate.spc_smrgp == smrgp);
158 	}
159 	atomic_clearbits_int(&curproc->p_flag, P_CPUPEG);
160 #endif /* MULTIPROCESSOR */
161 }
162 
163 void
164 smr_wakeup(void *arg)
165 {
166 	wakeup(&smr_ndeferred);
167 }
168 
169 void
170 smr_read_enter(void)
171 {
172 #ifdef DIAGNOSTIC
173 	struct schedstate_percpu *spc = &curcpu()->ci_schedstate;
174 
175 	spc->spc_smrdepth++;
176 #endif
177 }
178 
179 void
180 smr_read_leave(void)
181 {
182 #ifdef DIAGNOSTIC
183 	struct schedstate_percpu *spc = &curcpu()->ci_schedstate;
184 
185 	KASSERT(spc->spc_smrdepth > 0);
186 	spc->spc_smrdepth--;
187 #endif
188 }
189 
190 /*
191  * Move SMR entries from the local queue to the system-wide queue.
192  */
193 void
194 smr_dispatch(struct schedstate_percpu *spc)
195 {
196 	int expedite = 0, wake = 0;
197 
198 	mtx_enter(&smr_lock);
199 	if (smr_ndeferred == 0)
200 		wake = 1;
201 	SIMPLEQ_CONCAT(&smr_deferred, &spc->spc_deferred);
202 	smr_ndeferred += spc->spc_ndeferred;
203 	spc->spc_ndeferred = 0;
204 	smr_expedite |= spc->spc_smrexpedite;
205 	spc->spc_smrexpedite = 0;
206 	expedite = smr_expedite;
207 	mtx_leave(&smr_lock);
208 
209 	if (expedite)
210 		wakeup(&smr_ndeferred);
211 	else if (wake)
212 		timeout_add_msec(&smr_wakeup_tmo, SMR_PAUSE);
213 }
214 
215 /*
216  * Signal that the current CPU is in quiescent state.
217  */
218 void
219 smr_idle(void)
220 {
221 	struct schedstate_percpu *spc = &curcpu()->ci_schedstate;
222 	unsigned char smrgp;
223 
224 	SMR_ASSERT_NONCRITICAL();
225 
226 	if (spc->spc_ndeferred > 0)
227 		smr_dispatch(spc);
228 
229 	/*
230 	 * Update this CPU's view of the system's grace period.
231 	 * The update must become visible after any preceding reads
232 	 * of SMR-protected data.
233 	 */
234 	smrgp = READ_ONCE(smr_grace_period);
235 	if (__predict_false(spc->spc_smrgp != smrgp)) {
236 		membar_exit();
237 		WRITE_ONCE(spc->spc_smrgp, smrgp);
238 	}
239 }
240 
241 void
242 smr_call_impl(struct smr_entry *smr, void (*func)(void *), void *arg,
243     int expedite)
244 {
245 	struct cpu_info *ci = curcpu();
246 	struct schedstate_percpu *spc = &ci->ci_schedstate;
247 	int s;
248 
249 	KASSERT(smr->smr_func == NULL);
250 
251 	smr->smr_func = func;
252 	smr->smr_arg = arg;
253 
254 	s = splhigh();
255 	SIMPLEQ_INSERT_TAIL(&spc->spc_deferred, smr, smr_list);
256 	spc->spc_ndeferred++;
257 	spc->spc_smrexpedite |= expedite;
258 	splx(s);
259 
260 	/*
261 	 * If this call was made from an interrupt context that
262 	 * preempted idle state, dispatch the local queue to the shared
263 	 * queue immediately.
264 	 * The entries would linger in the local queue long if the CPU
265 	 * went to sleep without calling smr_idle().
266 	 */
267 	if (smr_cpu_is_idle(ci))
268 		smr_dispatch(spc);
269 }
270 
271 void
272 smr_barrier_func(void *arg)
273 {
274 	struct cond *c = arg;
275 
276 	cond_signal(c);
277 }
278 
279 void
280 smr_barrier_impl(int expedite)
281 {
282 	struct cond c = COND_INITIALIZER();
283 	struct smr_entry smr;
284 
285 	if (panicstr != NULL || db_active)
286 		return;
287 
288 	WITNESS_CHECKORDER(&smr_lock_obj, LOP_NEWORDER, NULL);
289 
290 	smr_init(&smr);
291 	smr_call_impl(&smr, smr_barrier_func, &c, expedite);
292 	cond_wait(&c, "smrbar");
293 }
294