xref: /openbsd-src/sys/kern/kern_smr.c (revision 46035553bfdd96e63c94e32da0210227ec2e3cf1)
1 /*	$OpenBSD: kern_smr.c,v 1.9 2020/12/25 12:49:31 visa Exp $	*/
2 
3 /*
4  * Copyright (c) 2019-2020 Visa Hankala
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 
19 #include <sys/param.h>
20 #include <sys/systm.h>
21 #include <sys/kernel.h>
22 #include <sys/kthread.h>
23 #include <sys/mutex.h>
24 #include <sys/percpu.h>
25 #include <sys/proc.h>
26 #include <sys/smr.h>
27 #include <sys/time.h>
28 #include <sys/witness.h>
29 
30 #include <machine/cpu.h>
31 
32 #define SMR_PAUSE	100		/* pause between rounds in msec */
33 
34 void	smr_dispatch(struct schedstate_percpu *);
35 void	smr_grace_wait(void);
36 void	smr_thread(void *);
37 void	smr_wakeup(void *);
38 
39 struct mutex		smr_lock = MUTEX_INITIALIZER(IPL_HIGH);
40 struct smr_entry_list	smr_deferred;
41 struct timeout		smr_wakeup_tmo;
42 unsigned int		smr_expedite;
43 unsigned int		smr_ndeferred;
44 unsigned char		smr_grace_period;
45 
46 #ifdef WITNESS
47 static const char smr_lock_name[] = "smr";
48 struct lock_object smr_lock_obj = {
49 	.lo_name = smr_lock_name,
50 	.lo_flags = LO_WITNESS | LO_INITIALIZED | LO_SLEEPABLE |
51 	    (LO_CLASS_RWLOCK << LO_CLASSSHIFT)
52 };
53 struct lock_type smr_lock_type = {
54 	.lt_name = smr_lock_name
55 };
56 #endif
57 
58 static inline int
59 smr_cpu_is_idle(struct cpu_info *ci)
60 {
61 	return ci->ci_curproc == ci->ci_schedstate.spc_idleproc;
62 }
63 
64 void
65 smr_startup(void)
66 {
67 	SIMPLEQ_INIT(&smr_deferred);
68 	WITNESS_INIT(&smr_lock_obj, &smr_lock_type);
69 	timeout_set(&smr_wakeup_tmo, smr_wakeup, NULL);
70 }
71 
72 void
73 smr_startup_thread(void)
74 {
75 	if (kthread_create(smr_thread, NULL, NULL, "smr") != 0)
76 		panic("could not create smr thread");
77 }
78 
79 struct timeval smr_logintvl = { 300, 0 };
80 
81 void
82 smr_thread(void *arg)
83 {
84 	struct timeval elapsed, end, loglast, start;
85 	struct smr_entry_list deferred;
86 	struct smr_entry *smr;
87 
88 	KERNEL_ASSERT_LOCKED();
89 	KERNEL_UNLOCK();
90 
91 	memset(&loglast, 0, sizeof(loglast));
92 	SIMPLEQ_INIT(&deferred);
93 
94 	for (;;) {
95 		mtx_enter(&smr_lock);
96 		if (smr_ndeferred == 0) {
97 			while (smr_ndeferred == 0)
98 				msleep_nsec(&smr_ndeferred, &smr_lock, PVM,
99 				    "bored", INFSLP);
100 		} else {
101 			if (smr_expedite == 0)
102 				msleep_nsec(&smr_ndeferred, &smr_lock, PVM,
103 				    "pause", MSEC_TO_NSEC(SMR_PAUSE));
104 		}
105 
106 		SIMPLEQ_CONCAT(&deferred, &smr_deferred);
107 		smr_ndeferred = 0;
108 		smr_expedite = 0;
109 		mtx_leave(&smr_lock);
110 
111 		getmicrouptime(&start);
112 
113 		smr_grace_wait();
114 
115 		WITNESS_CHECKORDER(&smr_lock_obj, LOP_NEWORDER, NULL);
116 		WITNESS_LOCK(&smr_lock_obj, 0);
117 
118 		while ((smr = SIMPLEQ_FIRST(&deferred)) != NULL) {
119 			SIMPLEQ_REMOVE_HEAD(&deferred, smr_list);
120 			smr->smr_func(smr->smr_arg);
121 		}
122 
123 		WITNESS_UNLOCK(&smr_lock_obj, 0);
124 
125 		getmicrouptime(&end);
126 		timersub(&end, &start, &elapsed);
127 		if (elapsed.tv_sec >= 5 &&
128 		    ratecheck(&loglast, &smr_logintvl))
129 			printf("smr: dispatch took %ld seconds\n",
130 			    (long)elapsed.tv_sec);
131 	}
132 }
133 
134 /*
135  * Announce next grace period and wait until all CPUs have entered it
136  * by crossing quiescent state.
137  */
138 void
139 smr_grace_wait(void)
140 {
141 #ifdef MULTIPROCESSOR
142 	CPU_INFO_ITERATOR cii;
143 	struct cpu_info *ci;
144 	unsigned char smrgp;
145 
146 	smrgp = READ_ONCE(smr_grace_period) + 1;
147 	WRITE_ONCE(smr_grace_period, smrgp);
148 
149 	curcpu()->ci_schedstate.spc_smrgp = smrgp;
150 
151 	CPU_INFO_FOREACH(cii, ci) {
152 		if (READ_ONCE(ci->ci_schedstate.spc_smrgp) == smrgp)
153 			continue;
154 		sched_peg_curproc(ci);
155 		KASSERT(ci->ci_schedstate.spc_smrgp == smrgp);
156 	}
157 	atomic_clearbits_int(&curproc->p_flag, P_CPUPEG);
158 #endif /* MULTIPROCESSOR */
159 }
160 
161 void
162 smr_wakeup(void *arg)
163 {
164 	wakeup(&smr_ndeferred);
165 }
166 
167 void
168 smr_read_enter(void)
169 {
170 #ifdef DIAGNOSTIC
171 	struct schedstate_percpu *spc = &curcpu()->ci_schedstate;
172 
173 	spc->spc_smrdepth++;
174 #endif
175 }
176 
177 void
178 smr_read_leave(void)
179 {
180 #ifdef DIAGNOSTIC
181 	struct schedstate_percpu *spc = &curcpu()->ci_schedstate;
182 
183 	KASSERT(spc->spc_smrdepth > 0);
184 	spc->spc_smrdepth--;
185 #endif
186 }
187 
188 /*
189  * Move SMR entries from the local queue to the system-wide queue.
190  */
191 void
192 smr_dispatch(struct schedstate_percpu *spc)
193 {
194 	int expedite = 0, wake = 0;
195 
196 	mtx_enter(&smr_lock);
197 	if (smr_ndeferred == 0)
198 		wake = 1;
199 	SIMPLEQ_CONCAT(&smr_deferred, &spc->spc_deferred);
200 	smr_ndeferred += spc->spc_ndeferred;
201 	spc->spc_ndeferred = 0;
202 	smr_expedite |= spc->spc_smrexpedite;
203 	spc->spc_smrexpedite = 0;
204 	expedite = smr_expedite;
205 	mtx_leave(&smr_lock);
206 
207 	if (expedite)
208 		wakeup(&smr_ndeferred);
209 	else if (wake)
210 		timeout_add_msec(&smr_wakeup_tmo, SMR_PAUSE);
211 }
212 
213 /*
214  * Signal that the current CPU is in quiescent state.
215  */
216 void
217 smr_idle(void)
218 {
219 	struct schedstate_percpu *spc = &curcpu()->ci_schedstate;
220 	unsigned char smrgp;
221 
222 	SMR_ASSERT_NONCRITICAL();
223 
224 	if (spc->spc_ndeferred > 0)
225 		smr_dispatch(spc);
226 
227 	/*
228 	 * Update this CPU's view of the system's grace period.
229 	 * The update must become visible after any preceding reads
230 	 * of SMR-protected data.
231 	 */
232 	smrgp = READ_ONCE(smr_grace_period);
233 	if (__predict_false(spc->spc_smrgp != smrgp)) {
234 		membar_exit();
235 		WRITE_ONCE(spc->spc_smrgp, smrgp);
236 	}
237 }
238 
239 void
240 smr_call_impl(struct smr_entry *smr, void (*func)(void *), void *arg,
241     int expedite)
242 {
243 	struct cpu_info *ci = curcpu();
244 	struct schedstate_percpu *spc = &ci->ci_schedstate;
245 	int s;
246 
247 	KASSERT(smr->smr_func == NULL);
248 
249 	smr->smr_func = func;
250 	smr->smr_arg = arg;
251 
252 	s = splhigh();
253 	SIMPLEQ_INSERT_TAIL(&spc->spc_deferred, smr, smr_list);
254 	spc->spc_ndeferred++;
255 	spc->spc_smrexpedite |= expedite;
256 	splx(s);
257 
258 	/*
259 	 * If this call was made from an interrupt context that
260 	 * preempted idle state, dispatch the local queue to the shared
261 	 * queue immediately.
262 	 * The entries would linger in the local queue long if the CPU
263 	 * went to sleep without calling smr_idle().
264 	 */
265 	if (smr_cpu_is_idle(ci))
266 		smr_dispatch(spc);
267 }
268 
269 void
270 smr_barrier_func(void *arg)
271 {
272 	struct cond *c = arg;
273 
274 	cond_signal(c);
275 }
276 
277 void
278 smr_barrier_impl(int expedite)
279 {
280 	struct cond c = COND_INITIALIZER();
281 	struct smr_entry smr;
282 
283 	if (panicstr != NULL || db_active)
284 		return;
285 
286 	WITNESS_CHECKORDER(&smr_lock_obj, LOP_NEWORDER, NULL);
287 
288 	smr_init(&smr);
289 	smr_call_impl(&smr, smr_barrier_func, &c, expedite);
290 	cond_wait(&c, "smrbar");
291 }
292