xref: /netbsd-src/sys/kern/kern_sleepq.c (revision 267197ec1eebfcb9810ea27a89625b6ddf68e3e7)
1 /*	$NetBSD: kern_sleepq.c,v 1.21 2008/02/14 14:26:57 ad Exp $	*/
2 
3 /*-
4  * Copyright (c) 2006, 2007 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Andrew Doran.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. All advertising materials mentioning features or use of this software
19  *    must display the following acknowledgement:
20  *	This product includes software developed by the NetBSD
21  *	Foundation, Inc. and its contributors.
22  * 4. Neither the name of The NetBSD Foundation nor the names of its
23  *    contributors may be used to endorse or promote products derived
24  *    from this software without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
27  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
30  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36  * POSSIBILITY OF SUCH DAMAGE.
37  */
38 
39 /*
40  * Sleep queue implementation, used by turnstiles and general sleep/wakeup
41  * interfaces.
42  */
43 
44 #include <sys/cdefs.h>
45 __KERNEL_RCSID(0, "$NetBSD: kern_sleepq.c,v 1.21 2008/02/14 14:26:57 ad Exp $");
46 
47 #include <sys/param.h>
48 #include <sys/kernel.h>
49 #include <sys/cpu.h>
50 #include <sys/pool.h>
51 #include <sys/proc.h>
52 #include <sys/resourcevar.h>
53 #include <sys/sched.h>
54 #include <sys/systm.h>
55 #include <sys/sleepq.h>
56 #include <sys/ktrace.h>
57 
58 #include <uvm/uvm_extern.h>
59 
60 int	sleepq_sigtoerror(lwp_t *, int);
61 
62 /* General purpose sleep table, used by ltsleep() and condition variables. */
63 sleeptab_t	sleeptab;
64 
65 /*
66  * sleeptab_init:
67  *
68  *	Initialize a sleep table.
69  */
70 void
71 sleeptab_init(sleeptab_t *st)
72 {
73 	sleepq_t *sq;
74 	int i;
75 
76 	for (i = 0; i < SLEEPTAB_HASH_SIZE; i++) {
77 		sq = &st->st_queues[i].st_queue;
78 		mutex_init(&st->st_queues[i].st_mutex, MUTEX_DEFAULT,
79 		    IPL_SCHED);
80 		sleepq_init(sq, &st->st_queues[i].st_mutex);
81 	}
82 }
83 
84 /*
85  * sleepq_init:
86  *
87  *	Prepare a sleep queue for use.
88  */
89 void
90 sleepq_init(sleepq_t *sq, kmutex_t *mtx)
91 {
92 
93 	sq->sq_waiters = 0;
94 	sq->sq_mutex = mtx;
95 	TAILQ_INIT(&sq->sq_queue);
96 }
97 
98 /*
99  * sleepq_remove:
100  *
101  *	Remove an LWP from a sleep queue and wake it up.  Return non-zero if
102  *	the LWP is swapped out; if so the caller needs to awaken the swapper
103  *	to bring the LWP into memory.
104  */
105 int
106 sleepq_remove(sleepq_t *sq, lwp_t *l)
107 {
108 	struct schedstate_percpu *spc;
109 	struct cpu_info *ci;
110 	pri_t pri;
111 
112 	KASSERT(lwp_locked(l, sq->sq_mutex));
113 	KASSERT(sq->sq_waiters > 0);
114 
115 	sq->sq_waiters--;
116 	TAILQ_REMOVE(&sq->sq_queue, l, l_sleepchain);
117 
118 #ifdef DIAGNOSTIC
119 	if (sq->sq_waiters == 0)
120 		KASSERT(TAILQ_FIRST(&sq->sq_queue) == NULL);
121 	else
122 		KASSERT(TAILQ_FIRST(&sq->sq_queue) != NULL);
123 #endif
124 
125 	l->l_syncobj = &sched_syncobj;
126 	l->l_wchan = NULL;
127 	l->l_sleepq = NULL;
128 	l->l_flag &= ~LW_SINTR;
129 
130 	ci = l->l_cpu;
131 	spc = &ci->ci_schedstate;
132 
133 	/*
134 	 * If not sleeping, the LWP must have been suspended.  Let whoever
135 	 * holds it stopped set it running again.
136 	 */
137 	if (l->l_stat != LSSLEEP) {
138 		KASSERT(l->l_stat == LSSTOP || l->l_stat == LSSUSPENDED);
139 		lwp_setlock(l, spc->spc_lwplock);
140 		return 0;
141 	}
142 
143 	/*
144 	 * If the LWP is still on the CPU, mark it as LSONPROC.  It may be
145 	 * about to call mi_switch(), in which case it will yield.
146 	 */
147 	if ((l->l_flag & LW_RUNNING) != 0) {
148 		l->l_stat = LSONPROC;
149 		l->l_slptime = 0;
150 		lwp_setlock(l, spc->spc_lwplock);
151 		return 0;
152 	}
153 
154 	/*
155 	 * Call the wake-up handler of scheduler.
156 	 * It might change the CPU for this thread.
157 	 */
158 	sched_wakeup(l);
159 	ci = l->l_cpu;
160 	spc = &ci->ci_schedstate;
161 
162 	/*
163 	 * Set it running.
164 	 */
165 	spc_lock(ci);
166 	lwp_setlock(l, spc->spc_mutex);
167 	sched_setrunnable(l);
168 	l->l_stat = LSRUN;
169 	l->l_slptime = 0;
170 	if ((l->l_flag & LW_INMEM) != 0) {
171 		sched_enqueue(l, false);
172 		pri = lwp_eprio(l);
173 		/* XXX This test is not good enough! */
174 		if (pri > spc->spc_curpriority) {
175 			cpu_need_resched(ci,
176 			    (pri >= PRI_KERNEL ? RESCHED_IMMED : 0));
177 		}
178 		spc_unlock(ci);
179 		return 0;
180 	}
181 	spc_unlock(ci);
182 	return 1;
183 }
184 
185 /*
186  * sleepq_insert:
187  *
188  *	Insert an LWP into the sleep queue, optionally sorting by priority.
189  */
190 inline void
191 sleepq_insert(sleepq_t *sq, lwp_t *l, syncobj_t *sobj)
192 {
193 	lwp_t *l2;
194 	const int pri = lwp_eprio(l);
195 
196 	if ((sobj->sobj_flag & SOBJ_SLEEPQ_SORTED) != 0) {
197 		TAILQ_FOREACH(l2, &sq->sq_queue, l_sleepchain) {
198 			if (lwp_eprio(l2) < pri) {
199 				TAILQ_INSERT_BEFORE(l2, l, l_sleepchain);
200 				return;
201 			}
202 		}
203 	}
204 
205 	if ((sobj->sobj_flag & SOBJ_SLEEPQ_LIFO) != 0)
206 		TAILQ_INSERT_HEAD(&sq->sq_queue, l, l_sleepchain);
207 	else
208 		TAILQ_INSERT_TAIL(&sq->sq_queue, l, l_sleepchain);
209 }
210 
211 /*
212  * sleepq_enqueue:
213  *
214  *	Enter an LWP into the sleep queue and prepare for sleep.  The sleep
215  *	queue must already be locked, and any interlock (such as the kernel
216  *	lock) must have be released (see sleeptab_lookup(), sleepq_enter()).
217  */
218 void
219 sleepq_enqueue(sleepq_t *sq, wchan_t wchan, const char *wmesg, syncobj_t *sobj)
220 {
221 	lwp_t *l = curlwp;
222 
223 	KASSERT(mutex_owned(sq->sq_mutex));
224 	KASSERT(l->l_stat == LSONPROC);
225 	KASSERT(l->l_wchan == NULL && l->l_sleepq == NULL);
226 
227 	l->l_syncobj = sobj;
228 	l->l_wchan = wchan;
229 	l->l_sleepq = sq;
230 	l->l_wmesg = wmesg;
231 	l->l_slptime = 0;
232 	l->l_stat = LSSLEEP;
233 	l->l_sleeperr = 0;
234 
235 	sq->sq_waiters++;
236 	sleepq_insert(sq, l, sobj);
237 	sched_slept(l);
238 }
239 
240 /*
241  * sleepq_block:
242  *
243  *	After any intermediate step such as releasing an interlock, switch.
244  * 	sleepq_block() may return early under exceptional conditions, for
245  * 	example if the LWP's containing process is exiting.
246  */
247 int
248 sleepq_block(int timo, bool catch)
249 {
250 	int error = 0, sig;
251 	struct proc *p;
252 	lwp_t *l = curlwp;
253 	bool early = false;
254 
255 	ktrcsw(1, 0);
256 
257 	/*
258 	 * If sleeping interruptably, check for pending signals, exits or
259 	 * core dump events.
260 	 */
261 	if (catch) {
262 		l->l_flag |= LW_SINTR;
263 		if ((l->l_flag & (LW_CANCELLED|LW_WEXIT|LW_WCORE)) != 0) {
264 			l->l_flag &= ~LW_CANCELLED;
265 			error = EINTR;
266 			early = true;
267 		} else if ((l->l_flag & LW_PENDSIG) != 0 && sigispending(l, 0))
268 			early = true;
269 	}
270 
271 	if (early) {
272 		/* lwp_unsleep() will release the lock */
273 		lwp_unsleep(l);
274 	} else {
275 		if (timo)
276 			callout_schedule(&l->l_timeout_ch, timo);
277 		mi_switch(l);
278 
279 		/* The LWP and sleep queue are now unlocked. */
280 		if (timo) {
281 			/*
282 			 * Even if the callout appears to have fired, we need to
283 			 * stop it in order to synchronise with other CPUs.
284 			 */
285 			if (callout_stop(&l->l_timeout_ch))
286 				error = EWOULDBLOCK;
287 		}
288 	}
289 
290 	if (catch && error == 0) {
291 		p = l->l_proc;
292 		if ((l->l_flag & (LW_CANCELLED | LW_WEXIT | LW_WCORE)) != 0)
293 			error = EINTR;
294 		else if ((l->l_flag & LW_PENDSIG) != 0) {
295 			mutex_enter(&p->p_smutex);
296 			if ((sig = issignal(l)) != 0)
297 				error = sleepq_sigtoerror(l, sig);
298 			mutex_exit(&p->p_smutex);
299 		}
300 	}
301 
302 	ktrcsw(0, 0);
303 
304 	KERNEL_LOCK(l->l_biglocks, l);
305 	return error;
306 }
307 
308 /*
309  * sleepq_wake:
310  *
311  *	Wake zero or more LWPs blocked on a single wait channel.
312  */
313 lwp_t *
314 sleepq_wake(sleepq_t *sq, wchan_t wchan, u_int expected)
315 {
316 	lwp_t *l, *next;
317 	int swapin = 0;
318 
319 	KASSERT(mutex_owned(sq->sq_mutex));
320 
321 	for (l = TAILQ_FIRST(&sq->sq_queue); l != NULL; l = next) {
322 		KASSERT(l->l_sleepq == sq);
323 		next = TAILQ_NEXT(l, l_sleepchain);
324 		if (l->l_wchan != wchan)
325 			continue;
326 		swapin |= sleepq_remove(sq, l);
327 		if (--expected == 0)
328 			break;
329 	}
330 
331 	sleepq_unlock(sq);
332 
333 	/*
334 	 * If there are newly awakend threads that need to be swapped in,
335 	 * then kick the swapper into action.
336 	 */
337 	if (swapin)
338 		uvm_kick_scheduler();
339 
340 	return l;
341 }
342 
343 /*
344  * sleepq_unsleep:
345  *
346  *	Remove an LWP from its sleep queue and set it runnable again.
347  *	sleepq_unsleep() is called with the LWP's mutex held, and will
348  *	always release it.
349  */
350 void
351 sleepq_unsleep(lwp_t *l)
352 {
353 	sleepq_t *sq = l->l_sleepq;
354 	int swapin;
355 
356 	KASSERT(lwp_locked(l, NULL));
357 	KASSERT(l->l_wchan != NULL);
358 	KASSERT(l->l_mutex == sq->sq_mutex);
359 
360 	swapin = sleepq_remove(sq, l);
361 	sleepq_unlock(sq);
362 
363 	if (swapin)
364 		uvm_kick_scheduler();
365 }
366 
367 /*
368  * sleepq_timeout:
369  *
370  *	Entered via the callout(9) subsystem to time out an LWP that is on a
371  *	sleep queue.
372  */
373 void
374 sleepq_timeout(void *arg)
375 {
376 	lwp_t *l = arg;
377 
378 	/*
379 	 * Lock the LWP.  Assuming it's still on the sleep queue, its
380 	 * current mutex will also be the sleep queue mutex.
381 	 */
382 	lwp_lock(l);
383 
384 	if (l->l_wchan == NULL) {
385 		/* Somebody beat us to it. */
386 		lwp_unlock(l);
387 		return;
388 	}
389 
390 	lwp_unsleep(l);
391 }
392 
393 /*
394  * sleepq_sigtoerror:
395  *
396  *	Given a signal number, interpret and return an error code.
397  */
398 int
399 sleepq_sigtoerror(lwp_t *l, int sig)
400 {
401 	struct proc *p = l->l_proc;
402 	int error;
403 
404 	KASSERT(mutex_owned(&p->p_smutex));
405 
406 	/*
407 	 * If this sleep was canceled, don't let the syscall restart.
408 	 */
409 	if ((SIGACTION(p, sig).sa_flags & SA_RESTART) == 0)
410 		error = EINTR;
411 	else
412 		error = ERESTART;
413 
414 	return error;
415 }
416 
417 /*
418  * sleepq_abort:
419  *
420  *	After a panic or during autoconfiguration, lower the interrupt
421  *	priority level to give pending interrupts a chance to run, and
422  *	then return.  Called if sleepq_dontsleep() returns non-zero, and
423  *	always returns zero.
424  */
425 int
426 sleepq_abort(kmutex_t *mtx, int unlock)
427 {
428 	extern int safepri;
429 	int s;
430 
431 	s = splhigh();
432 	splx(safepri);
433 	splx(s);
434 	if (mtx != NULL && unlock != 0)
435 		mutex_exit(mtx);
436 
437 	return 0;
438 }
439 
440 /*
441  * sleepq_changepri:
442  *
443  *	Adjust the priority of an LWP residing on a sleepq.  This method
444  *	will only alter the user priority; the effective priority is
445  *	assumed to have been fixed at the time of insertion into the queue.
446  */
447 void
448 sleepq_changepri(lwp_t *l, pri_t pri)
449 {
450 	sleepq_t *sq = l->l_sleepq;
451 	pri_t opri;
452 
453 	KASSERT(lwp_locked(l, sq->sq_mutex));
454 
455 	opri = lwp_eprio(l);
456 	l->l_priority = pri;
457 	if (lwp_eprio(l) != opri) {
458 		TAILQ_REMOVE(&sq->sq_queue, l, l_sleepchain);
459 		sleepq_insert(sq, l, l->l_syncobj);
460 	}
461 }
462 
463 void
464 sleepq_lendpri(lwp_t *l, pri_t pri)
465 {
466 	sleepq_t *sq = l->l_sleepq;
467 	pri_t opri;
468 
469 	KASSERT(lwp_locked(l, sq->sq_mutex));
470 
471 	opri = lwp_eprio(l);
472 	l->l_inheritedprio = pri;
473 
474 	if (lwp_eprio(l) != opri &&
475 	    (l->l_syncobj->sobj_flag & SOBJ_SLEEPQ_SORTED) != 0) {
476 		TAILQ_REMOVE(&sq->sq_queue, l, l_sleepchain);
477 		sleepq_insert(sq, l, l->l_syncobj);
478 	}
479 }
480