xref: /netbsd-src/sys/kern/kern_sleepq.c (revision 8b0f9554ff8762542c4defc4f70e1eb76fb508fa)
1 /*	$NetBSD: kern_sleepq.c,v 1.19 2007/12/05 07:06:53 ad Exp $	*/
2 
3 /*-
4  * Copyright (c) 2006, 2007 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Andrew Doran.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. All advertising materials mentioning features or use of this software
19  *    must display the following acknowledgement:
20  *	This product includes software developed by the NetBSD
21  *	Foundation, Inc. and its contributors.
22  * 4. Neither the name of The NetBSD Foundation nor the names of its
23  *    contributors may be used to endorse or promote products derived
24  *    from this software without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
27  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
30  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36  * POSSIBILITY OF SUCH DAMAGE.
37  */
38 
39 /*
40  * Sleep queue implementation, used by turnstiles and general sleep/wakeup
41  * interfaces.
42  */
43 
44 #include <sys/cdefs.h>
45 __KERNEL_RCSID(0, "$NetBSD: kern_sleepq.c,v 1.19 2007/12/05 07:06:53 ad Exp $");
46 
47 #include <sys/param.h>
48 #include <sys/lock.h>
49 #include <sys/kernel.h>
50 #include <sys/cpu.h>
51 #include <sys/pool.h>
52 #include <sys/proc.h>
53 #include <sys/resourcevar.h>
54 #include <sys/sched.h>
55 #include <sys/systm.h>
56 #include <sys/sleepq.h>
57 #include <sys/ktrace.h>
58 
59 #include <uvm/uvm_extern.h>
60 
61 int	sleepq_sigtoerror(lwp_t *, int);
62 
63 /* General purpose sleep table, used by ltsleep() and condition variables. */
64 sleeptab_t	sleeptab;
65 
66 /*
67  * sleeptab_init:
68  *
69  *	Initialize a sleep table.
70  */
71 void
72 sleeptab_init(sleeptab_t *st)
73 {
74 	sleepq_t *sq;
75 	int i;
76 
77 	for (i = 0; i < SLEEPTAB_HASH_SIZE; i++) {
78 		sq = &st->st_queues[i].st_queue;
79 		mutex_init(&st->st_queues[i].st_mutex, MUTEX_DEFAULT,
80 		    IPL_SCHED);
81 		sleepq_init(sq, &st->st_queues[i].st_mutex);
82 	}
83 }
84 
85 /*
86  * sleepq_init:
87  *
88  *	Prepare a sleep queue for use.
89  */
90 void
91 sleepq_init(sleepq_t *sq, kmutex_t *mtx)
92 {
93 
94 	sq->sq_waiters = 0;
95 	sq->sq_mutex = mtx;
96 	TAILQ_INIT(&sq->sq_queue);
97 }
98 
99 /*
100  * sleepq_remove:
101  *
102  *	Remove an LWP from a sleep queue and wake it up.  Return non-zero if
103  *	the LWP is swapped out; if so the caller needs to awaken the swapper
104  *	to bring the LWP into memory.
105  */
106 int
107 sleepq_remove(sleepq_t *sq, lwp_t *l)
108 {
109 	struct schedstate_percpu *spc;
110 	struct cpu_info *ci;
111 	pri_t pri;
112 
113 	KASSERT(lwp_locked(l, sq->sq_mutex));
114 	KASSERT(sq->sq_waiters > 0);
115 
116 	sq->sq_waiters--;
117 	TAILQ_REMOVE(&sq->sq_queue, l, l_sleepchain);
118 
119 #ifdef DIAGNOSTIC
120 	if (sq->sq_waiters == 0)
121 		KASSERT(TAILQ_FIRST(&sq->sq_queue) == NULL);
122 	else
123 		KASSERT(TAILQ_FIRST(&sq->sq_queue) != NULL);
124 #endif
125 
126 	l->l_syncobj = &sched_syncobj;
127 	l->l_wchan = NULL;
128 	l->l_sleepq = NULL;
129 	l->l_flag &= ~LW_SINTR;
130 
131 	ci = l->l_cpu;
132 	spc = &ci->ci_schedstate;
133 
134 	/*
135 	 * If not sleeping, the LWP must have been suspended.  Let whoever
136 	 * holds it stopped set it running again.
137 	 */
138 	if (l->l_stat != LSSLEEP) {
139 		KASSERT(l->l_stat == LSSTOP || l->l_stat == LSSUSPENDED);
140 		lwp_setlock(l, &spc->spc_lwplock);
141 		return 0;
142 	}
143 
144 	/*
145 	 * If the LWP is still on the CPU, mark it as LSONPROC.  It may be
146 	 * about to call mi_switch(), in which case it will yield.
147 	 */
148 	if ((l->l_flag & LW_RUNNING) != 0) {
149 		l->l_stat = LSONPROC;
150 		l->l_slptime = 0;
151 		lwp_setlock(l, &spc->spc_lwplock);
152 		return 0;
153 	}
154 
155 	/*
156 	 * Call the wake-up handler of scheduler.
157 	 * It might change the CPU for this thread.
158 	 */
159 	sched_wakeup(l);
160 	ci = l->l_cpu;
161 	spc = &ci->ci_schedstate;
162 
163 	/*
164 	 * Set it running.
165 	 */
166 	spc_lock(ci);
167 	lwp_setlock(l, spc->spc_mutex);
168 	sched_setrunnable(l);
169 	l->l_stat = LSRUN;
170 	l->l_slptime = 0;
171 	if ((l->l_flag & LW_INMEM) != 0) {
172 		sched_enqueue(l, false);
173 		pri = lwp_eprio(l);
174 		/* XXX This test is not good enough! */
175 		if (pri > spc->spc_curpriority) {
176 			cpu_need_resched(ci,
177 			    (pri >= PRI_KERNEL ? RESCHED_IMMED : 0));
178 		}
179 		spc_unlock(ci);
180 		return 0;
181 	}
182 	spc_unlock(ci);
183 	return 1;
184 }
185 
186 /*
187  * sleepq_insert:
188  *
189  *	Insert an LWP into the sleep queue, optionally sorting by priority.
190  */
191 inline void
192 sleepq_insert(sleepq_t *sq, lwp_t *l, syncobj_t *sobj)
193 {
194 	lwp_t *l2;
195 	const int pri = lwp_eprio(l);
196 
197 	if ((sobj->sobj_flag & SOBJ_SLEEPQ_SORTED) != 0) {
198 		TAILQ_FOREACH(l2, &sq->sq_queue, l_sleepchain) {
199 			if (lwp_eprio(l2) < pri) {
200 				TAILQ_INSERT_BEFORE(l2, l, l_sleepchain);
201 				return;
202 			}
203 		}
204 	}
205 
206 	if ((sobj->sobj_flag & SOBJ_SLEEPQ_LIFO) != 0)
207 		TAILQ_INSERT_HEAD(&sq->sq_queue, l, l_sleepchain);
208 	else
209 		TAILQ_INSERT_TAIL(&sq->sq_queue, l, l_sleepchain);
210 }
211 
212 /*
213  * sleepq_enqueue:
214  *
215  *	Enter an LWP into the sleep queue and prepare for sleep.  The sleep
216  *	queue must already be locked, and any interlock (such as the kernel
217  *	lock) must have be released (see sleeptab_lookup(), sleepq_enter()).
218  */
219 void
220 sleepq_enqueue(sleepq_t *sq, wchan_t wchan, const char *wmesg, syncobj_t *sobj)
221 {
222 	lwp_t *l = curlwp;
223 
224 	KASSERT(mutex_owned(sq->sq_mutex));
225 	KASSERT(l->l_stat == LSONPROC);
226 	KASSERT(l->l_wchan == NULL && l->l_sleepq == NULL);
227 
228 	l->l_syncobj = sobj;
229 	l->l_wchan = wchan;
230 	l->l_sleepq = sq;
231 	l->l_wmesg = wmesg;
232 	l->l_slptime = 0;
233 	l->l_stat = LSSLEEP;
234 	l->l_sleeperr = 0;
235 
236 	sq->sq_waiters++;
237 	sleepq_insert(sq, l, sobj);
238 	sched_slept(l);
239 }
240 
241 /*
242  * sleepq_block:
243  *
244  *	After any intermediate step such as releasing an interlock, switch.
245  * 	sleepq_block() may return early under exceptional conditions, for
246  * 	example if the LWP's containing process is exiting.
247  */
248 int
249 sleepq_block(int timo, bool catch)
250 {
251 	int error = 0, sig;
252 	struct proc *p;
253 	lwp_t *l = curlwp;
254 	bool early = false;
255 
256 	ktrcsw(1, 0);
257 
258 	/*
259 	 * If sleeping interruptably, check for pending signals, exits or
260 	 * core dump events.
261 	 */
262 	if (catch) {
263 		l->l_flag |= LW_SINTR;
264 		if ((l->l_flag & (LW_CANCELLED|LW_WEXIT|LW_WCORE)) != 0) {
265 			l->l_flag &= ~LW_CANCELLED;
266 			error = EINTR;
267 			early = true;
268 		} else if ((l->l_flag & LW_PENDSIG) != 0 && sigispending(l, 0))
269 			early = true;
270 	}
271 
272 	if (early) {
273 		/* lwp_unsleep() will release the lock */
274 		lwp_unsleep(l);
275 	} else {
276 		if (timo)
277 			callout_schedule(&l->l_timeout_ch, timo);
278 		mi_switch(l);
279 
280 		/* The LWP and sleep queue are now unlocked. */
281 		if (timo) {
282 			/*
283 			 * Even if the callout appears to have fired, we need to
284 			 * stop it in order to synchronise with other CPUs.
285 			 */
286 			if (callout_stop(&l->l_timeout_ch))
287 				error = EWOULDBLOCK;
288 		}
289 	}
290 
291 	if (catch && error == 0) {
292 		p = l->l_proc;
293 		if ((l->l_flag & (LW_CANCELLED | LW_WEXIT | LW_WCORE)) != 0)
294 			error = EINTR;
295 		else if ((l->l_flag & LW_PENDSIG) != 0) {
296 			mutex_enter(&p->p_smutex);
297 			if ((sig = issignal(l)) != 0)
298 				error = sleepq_sigtoerror(l, sig);
299 			mutex_exit(&p->p_smutex);
300 		}
301 	}
302 
303 	ktrcsw(0, 0);
304 
305 	KERNEL_LOCK(l->l_biglocks, l);
306 	return error;
307 }
308 
309 /*
310  * sleepq_wake:
311  *
312  *	Wake zero or more LWPs blocked on a single wait channel.
313  */
314 lwp_t *
315 sleepq_wake(sleepq_t *sq, wchan_t wchan, u_int expected)
316 {
317 	lwp_t *l, *next;
318 	int swapin = 0;
319 
320 	KASSERT(mutex_owned(sq->sq_mutex));
321 
322 	for (l = TAILQ_FIRST(&sq->sq_queue); l != NULL; l = next) {
323 		KASSERT(l->l_sleepq == sq);
324 		next = TAILQ_NEXT(l, l_sleepchain);
325 		if (l->l_wchan != wchan)
326 			continue;
327 		swapin |= sleepq_remove(sq, l);
328 		if (--expected == 0)
329 			break;
330 	}
331 
332 	sleepq_unlock(sq);
333 
334 	/*
335 	 * If there are newly awakend threads that need to be swapped in,
336 	 * then kick the swapper into action.
337 	 */
338 	if (swapin)
339 		uvm_kick_scheduler();
340 
341 	return l;
342 }
343 
344 /*
345  * sleepq_unsleep:
346  *
347  *	Remove an LWP from its sleep queue and set it runnable again.
348  *	sleepq_unsleep() is called with the LWP's mutex held, and will
349  *	always release it.
350  */
351 void
352 sleepq_unsleep(lwp_t *l)
353 {
354 	sleepq_t *sq = l->l_sleepq;
355 	int swapin;
356 
357 	KASSERT(lwp_locked(l, NULL));
358 	KASSERT(l->l_wchan != NULL);
359 	KASSERT(l->l_mutex == sq->sq_mutex);
360 
361 	swapin = sleepq_remove(sq, l);
362 	sleepq_unlock(sq);
363 
364 	if (swapin)
365 		uvm_kick_scheduler();
366 }
367 
368 /*
369  * sleepq_timeout:
370  *
371  *	Entered via the callout(9) subsystem to time out an LWP that is on a
372  *	sleep queue.
373  */
374 void
375 sleepq_timeout(void *arg)
376 {
377 	lwp_t *l = arg;
378 
379 	/*
380 	 * Lock the LWP.  Assuming it's still on the sleep queue, its
381 	 * current mutex will also be the sleep queue mutex.
382 	 */
383 	lwp_lock(l);
384 
385 	if (l->l_wchan == NULL) {
386 		/* Somebody beat us to it. */
387 		lwp_unlock(l);
388 		return;
389 	}
390 
391 	lwp_unsleep(l);
392 }
393 
394 /*
395  * sleepq_sigtoerror:
396  *
397  *	Given a signal number, interpret and return an error code.
398  */
399 int
400 sleepq_sigtoerror(lwp_t *l, int sig)
401 {
402 	struct proc *p = l->l_proc;
403 	int error;
404 
405 	KASSERT(mutex_owned(&p->p_smutex));
406 
407 	/*
408 	 * If this sleep was canceled, don't let the syscall restart.
409 	 */
410 	if ((SIGACTION(p, sig).sa_flags & SA_RESTART) == 0)
411 		error = EINTR;
412 	else
413 		error = ERESTART;
414 
415 	return error;
416 }
417 
418 /*
419  * sleepq_abort:
420  *
421  *	After a panic or during autoconfiguration, lower the interrupt
422  *	priority level to give pending interrupts a chance to run, and
423  *	then return.  Called if sleepq_dontsleep() returns non-zero, and
424  *	always returns zero.
425  */
426 int
427 sleepq_abort(kmutex_t *mtx, int unlock)
428 {
429 	extern int safepri;
430 	int s;
431 
432 	s = splhigh();
433 	splx(safepri);
434 	splx(s);
435 	if (mtx != NULL && unlock != 0)
436 		mutex_exit(mtx);
437 
438 	return 0;
439 }
440 
441 /*
442  * sleepq_changepri:
443  *
444  *	Adjust the priority of an LWP residing on a sleepq.  This method
445  *	will only alter the user priority; the effective priority is
446  *	assumed to have been fixed at the time of insertion into the queue.
447  */
448 void
449 sleepq_changepri(lwp_t *l, pri_t pri)
450 {
451 	sleepq_t *sq = l->l_sleepq;
452 	pri_t opri;
453 
454 	KASSERT(lwp_locked(l, sq->sq_mutex));
455 
456 	opri = lwp_eprio(l);
457 	l->l_priority = pri;
458 	if (lwp_eprio(l) != opri) {
459 		TAILQ_REMOVE(&sq->sq_queue, l, l_sleepchain);
460 		sleepq_insert(sq, l, l->l_syncobj);
461 	}
462 }
463 
464 void
465 sleepq_lendpri(lwp_t *l, pri_t pri)
466 {
467 	sleepq_t *sq = l->l_sleepq;
468 	pri_t opri;
469 
470 	KASSERT(lwp_locked(l, sq->sq_mutex));
471 
472 	opri = lwp_eprio(l);
473 	l->l_inheritedprio = pri;
474 
475 	if (lwp_eprio(l) != opri &&
476 	    (l->l_syncobj->sobj_flag & SOBJ_SLEEPQ_SORTED) != 0) {
477 		TAILQ_REMOVE(&sq->sq_queue, l, l_sleepchain);
478 		sleepq_insert(sq, l, l->l_syncobj);
479 	}
480 }
481