xref: /openbsd-src/sys/kern/kern_smr.c (revision d89ec533011f513df1010f142a111086a0785f09)
1 /*	$OpenBSD: kern_smr.c,v 1.15 2021/11/24 13:17:37 visa Exp $	*/
2 
3 /*
4  * Copyright (c) 2019-2020 Visa Hankala
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 
19 #include <sys/param.h>
20 #include <sys/systm.h>
21 #include <sys/kernel.h>
22 #include <sys/kthread.h>
23 #include <sys/mutex.h>
24 #include <sys/percpu.h>
25 #include <sys/proc.h>
26 #include <sys/smr.h>
27 #include <sys/time.h>
28 #include <sys/tracepoint.h>
29 #include <sys/witness.h>
30 
31 #include <machine/cpu.h>
32 
33 #define SMR_PAUSE	100		/* pause between rounds in msec */
34 
35 void	smr_dispatch(struct schedstate_percpu *);
36 void	smr_grace_wait(void);
37 void	smr_thread(void *);
38 void	smr_wakeup(void *);
39 
40 struct mutex		smr_lock = MUTEX_INITIALIZER(IPL_HIGH);
41 struct smr_entry_list	smr_deferred;
42 struct timeout		smr_wakeup_tmo;
43 unsigned int		smr_expedite;
44 unsigned int		smr_ndeferred;
45 unsigned char		smr_grace_period;
46 
47 #ifdef WITNESS
48 static const char smr_lock_name[] = "smr";
49 struct lock_object smr_lock_obj = {
50 	.lo_name = smr_lock_name,
51 	.lo_flags = LO_WITNESS | LO_INITIALIZED | LO_SLEEPABLE |
52 	    (LO_CLASS_RWLOCK << LO_CLASSSHIFT)
53 };
54 struct lock_type smr_lock_type = {
55 	.lt_name = smr_lock_name
56 };
57 #endif
58 
59 static inline int
60 smr_cpu_is_idle(struct cpu_info *ci)
61 {
62 	return ci->ci_curproc == ci->ci_schedstate.spc_idleproc;
63 }
64 
65 void
66 smr_startup(void)
67 {
68 	SIMPLEQ_INIT(&smr_deferred);
69 	WITNESS_INIT(&smr_lock_obj, &smr_lock_type);
70 	timeout_set(&smr_wakeup_tmo, smr_wakeup, NULL);
71 }
72 
73 void
74 smr_startup_thread(void)
75 {
76 	if (kthread_create(smr_thread, NULL, NULL, "smr") != 0)
77 		panic("could not create smr thread");
78 }
79 
80 struct timeval smr_logintvl = { 300, 0 };
81 
82 void
83 smr_thread(void *arg)
84 {
85 	struct timeval elapsed, end, loglast, start;
86 	struct smr_entry_list deferred;
87 	struct smr_entry *smr;
88 	unsigned long count;
89 
90 	KERNEL_ASSERT_LOCKED();
91 	KERNEL_UNLOCK();
92 
93 	memset(&loglast, 0, sizeof(loglast));
94 	SIMPLEQ_INIT(&deferred);
95 
96 	for (;;) {
97 		mtx_enter(&smr_lock);
98 		if (smr_ndeferred == 0) {
99 			while (smr_ndeferred == 0)
100 				msleep_nsec(&smr_ndeferred, &smr_lock, PVM,
101 				    "bored", INFSLP);
102 		} else {
103 			if (smr_expedite == 0)
104 				msleep_nsec(&smr_ndeferred, &smr_lock, PVM,
105 				    "pause", MSEC_TO_NSEC(SMR_PAUSE));
106 		}
107 
108 		SIMPLEQ_CONCAT(&deferred, &smr_deferred);
109 		smr_ndeferred = 0;
110 		smr_expedite = 0;
111 		mtx_leave(&smr_lock);
112 
113 		getmicrouptime(&start);
114 
115 		smr_grace_wait();
116 
117 		WITNESS_CHECKORDER(&smr_lock_obj, LOP_NEWORDER, NULL);
118 		WITNESS_LOCK(&smr_lock_obj, 0);
119 
120 		count = 0;
121 		while ((smr = SIMPLEQ_FIRST(&deferred)) != NULL) {
122 			SIMPLEQ_REMOVE_HEAD(&deferred, smr_list);
123 			TRACEPOINT(smr, called, smr->smr_func, smr->smr_arg);
124 			smr->smr_func(smr->smr_arg);
125 			count++;
126 		}
127 
128 		WITNESS_UNLOCK(&smr_lock_obj, 0);
129 
130 		getmicrouptime(&end);
131 		timersub(&end, &start, &elapsed);
132 		if (elapsed.tv_sec >= 2 &&
133 		    ratecheck(&loglast, &smr_logintvl)) {
134 			printf("smr: dispatch took %ld.%06lds\n",
135 			    (long)elapsed.tv_sec,
136 			    (long)elapsed.tv_usec);
137 		}
138 		TRACEPOINT(smr, thread, TIMEVAL_TO_NSEC(&elapsed), count);
139 	}
140 }
141 
142 /*
143  * Announce next grace period and wait until all CPUs have entered it
144  * by crossing quiescent state.
145  */
146 void
147 smr_grace_wait(void)
148 {
149 #ifdef MULTIPROCESSOR
150 	CPU_INFO_ITERATOR cii;
151 	struct cpu_info *ci;
152 	unsigned char smrgp;
153 
154 	smrgp = READ_ONCE(smr_grace_period) + 1;
155 	WRITE_ONCE(smr_grace_period, smrgp);
156 
157 	curcpu()->ci_schedstate.spc_smrgp = smrgp;
158 
159 	CPU_INFO_FOREACH(cii, ci) {
160 		if (!CPU_IS_RUNNING(ci))
161 			continue;
162 		if (READ_ONCE(ci->ci_schedstate.spc_smrgp) == smrgp)
163 			continue;
164 		sched_peg_curproc(ci);
165 		KASSERT(ci->ci_schedstate.spc_smrgp == smrgp);
166 	}
167 	atomic_clearbits_int(&curproc->p_flag, P_CPUPEG);
168 #endif /* MULTIPROCESSOR */
169 }
170 
171 void
172 smr_wakeup(void *arg)
173 {
174 	TRACEPOINT(smr, wakeup, NULL);
175 	wakeup(&smr_ndeferred);
176 }
177 
178 void
179 smr_read_enter(void)
180 {
181 #ifdef DIAGNOSTIC
182 	struct schedstate_percpu *spc = &curcpu()->ci_schedstate;
183 
184 	spc->spc_smrdepth++;
185 #endif
186 }
187 
188 void
189 smr_read_leave(void)
190 {
191 #ifdef DIAGNOSTIC
192 	struct schedstate_percpu *spc = &curcpu()->ci_schedstate;
193 
194 	KASSERT(spc->spc_smrdepth > 0);
195 	spc->spc_smrdepth--;
196 #endif
197 }
198 
199 /*
200  * Move SMR entries from the local queue to the system-wide queue.
201  */
202 void
203 smr_dispatch(struct schedstate_percpu *spc)
204 {
205 	int expedite = 0, wake = 0;
206 
207 	mtx_enter(&smr_lock);
208 	if (smr_ndeferred == 0)
209 		wake = 1;
210 	SIMPLEQ_CONCAT(&smr_deferred, &spc->spc_deferred);
211 	smr_ndeferred += spc->spc_ndeferred;
212 	spc->spc_ndeferred = 0;
213 	smr_expedite |= spc->spc_smrexpedite;
214 	spc->spc_smrexpedite = 0;
215 	expedite = smr_expedite;
216 	mtx_leave(&smr_lock);
217 
218 	if (expedite)
219 		smr_wakeup(NULL);
220 	else if (wake)
221 		timeout_add_msec(&smr_wakeup_tmo, SMR_PAUSE);
222 }
223 
224 /*
225  * Signal that the current CPU is in quiescent state.
226  */
227 void
228 smr_idle(void)
229 {
230 	struct schedstate_percpu *spc = &curcpu()->ci_schedstate;
231 	unsigned char smrgp;
232 
233 	SMR_ASSERT_NONCRITICAL();
234 
235 	if (spc->spc_ndeferred > 0)
236 		smr_dispatch(spc);
237 
238 	/*
239 	 * Update this CPU's view of the system's grace period.
240 	 * The update must become visible after any preceding reads
241 	 * of SMR-protected data.
242 	 */
243 	smrgp = READ_ONCE(smr_grace_period);
244 	if (__predict_false(spc->spc_smrgp != smrgp)) {
245 		membar_exit();
246 		WRITE_ONCE(spc->spc_smrgp, smrgp);
247 	}
248 }
249 
250 void
251 smr_call_impl(struct smr_entry *smr, void (*func)(void *), void *arg,
252     int expedite)
253 {
254 	struct cpu_info *ci = curcpu();
255 	struct schedstate_percpu *spc = &ci->ci_schedstate;
256 	int s;
257 
258 	KASSERT(smr->smr_func == NULL);
259 
260 	smr->smr_func = func;
261 	smr->smr_arg = arg;
262 
263 	s = splhigh();
264 	SIMPLEQ_INSERT_TAIL(&spc->spc_deferred, smr, smr_list);
265 	spc->spc_ndeferred++;
266 	spc->spc_smrexpedite |= expedite;
267 	splx(s);
268 	TRACEPOINT(smr, call, func, arg, expedite);
269 
270 	/*
271 	 * If this call was made from an interrupt context that
272 	 * preempted idle state, dispatch the local queue to the shared
273 	 * queue immediately.
274 	 * The entries would linger in the local queue long if the CPU
275 	 * went to sleep without calling smr_idle().
276 	 */
277 	if (smr_cpu_is_idle(ci))
278 		smr_dispatch(spc);
279 }
280 
281 void
282 smr_barrier_func(void *arg)
283 {
284 	struct cond *c = arg;
285 
286 	cond_signal(c);
287 }
288 
289 void
290 smr_barrier_impl(int expedite)
291 {
292 	struct cond c = COND_INITIALIZER();
293 	struct smr_entry smr;
294 
295 	if (panicstr != NULL || db_active)
296 		return;
297 
298 	WITNESS_CHECKORDER(&smr_lock_obj, LOP_NEWORDER, NULL);
299 
300 	TRACEPOINT(smr, barrier_enter, expedite);
301 	smr_init(&smr);
302 	smr_call_impl(&smr, smr_barrier_func, &c, expedite);
303 	cond_wait(&c, "smrbar");
304 	TRACEPOINT(smr, barrier_exit, expedite);
305 }
306