xref: /openbsd-src/sys/kern/kern_synch.c (revision 6f05df2d9be0954bec42d51d943d77bd250fb664)
1 /*	$OpenBSD: kern_synch.c,v 1.116 2014/07/08 17:19:25 deraadt Exp $	*/
2 /*	$NetBSD: kern_synch.c,v 1.37 1996/04/22 01:38:37 christos Exp $	*/
3 
4 /*
5  * Copyright (c) 1982, 1986, 1990, 1991, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  * (c) UNIX System Laboratories, Inc.
8  * All or some portions of this file are derived from material licensed
9  * to the University of California by American Telephone and Telegraph
10  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
11  * the permission of UNIX System Laboratories, Inc.
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  * 3. Neither the name of the University nor the names of its contributors
22  *    may be used to endorse or promote products derived from this software
23  *    without specific prior written permission.
24  *
25  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35  * SUCH DAMAGE.
36  *
37  *	@(#)kern_synch.c	8.6 (Berkeley) 1/21/94
38  */
39 
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #include <sys/proc.h>
43 #include <sys/kernel.h>
44 #include <sys/buf.h>
45 #include <sys/signalvar.h>
46 #include <sys/resourcevar.h>
47 #include <sys/sched.h>
48 #include <sys/timeout.h>
49 #include <sys/mount.h>
50 #include <sys/syscallargs.h>
51 #include <sys/pool.h>
52 
53 #include <machine/spinlock.h>
54 
55 #ifdef KTRACE
56 #include <sys/ktrace.h>
57 #endif
58 
59 int	thrsleep(struct proc *, struct sys___thrsleep_args *);
60 
61 
62 /*
63  * We're only looking at 7 bits of the address; everything is
64  * aligned to 4, lots of things are aligned to greater powers
65  * of 2.  Shift right by 8, i.e. drop the bottom 256 worth.
66  */
67 #define TABLESIZE	128
68 #define LOOKUP(x)	(((long)(x) >> 8) & (TABLESIZE - 1))
69 TAILQ_HEAD(slpque,proc) slpque[TABLESIZE];
70 
71 void
72 sleep_queue_init(void)
73 {
74 	int i;
75 
76 	for (i = 0; i < TABLESIZE; i++)
77 		TAILQ_INIT(&slpque[i]);
78 }
79 
80 
81 /*
82  * During autoconfiguration or after a panic, a sleep will simply
83  * lower the priority briefly to allow interrupts, then return.
84  * The priority to be used (safepri) is machine-dependent, thus this
85  * value is initialized and maintained in the machine-dependent layers.
86  * This priority will typically be 0, or the lowest priority
87  * that is safe for use on the interrupt stack; it can be made
88  * higher to block network software interrupts after panics.
89  */
90 extern int safepri;
91 
92 /*
93  * General sleep call.  Suspends the current process until a wakeup is
94  * performed on the specified identifier.  The process will then be made
95  * runnable with the specified priority.  Sleeps at most timo/hz seconds
96  * (0 means no timeout).  If pri includes PCATCH flag, signals are checked
97  * before and after sleeping, else signals are not checked.  Returns 0 if
98  * awakened, EWOULDBLOCK if the timeout expires.  If PCATCH is set and a
99  * signal needs to be delivered, ERESTART is returned if the current system
100  * call should be restarted if possible, and EINTR is returned if the system
101  * call should be interrupted by the signal (return EINTR).
102  */
103 int
104 tsleep(const volatile void *ident, int priority, const char *wmesg, int timo)
105 {
106 	struct sleep_state sls;
107 	int error, error1;
108 
109 	KASSERT((priority & ~(PRIMASK | PCATCH)) == 0);
110 
111 #ifdef MULTIPROCESSOR
112 	KASSERT(timo || __mp_lock_held(&kernel_lock));
113 #endif
114 
115 	if (cold || panicstr) {
116 		int s;
117 		/*
118 		 * After a panic, or during autoconfiguration,
119 		 * just give interrupts a chance, then just return;
120 		 * don't run any other procs or panic below,
121 		 * in case this is the idle process and already asleep.
122 		 */
123 		s = splhigh();
124 		splx(safepri);
125 		splx(s);
126 		return (0);
127 	}
128 
129 	sleep_setup(&sls, ident, priority, wmesg);
130 	sleep_setup_timeout(&sls, timo);
131 	sleep_setup_signal(&sls, priority);
132 
133 	sleep_finish(&sls, 1);
134 	error1 = sleep_finish_timeout(&sls);
135 	error = sleep_finish_signal(&sls);
136 
137 	/* Signal errors are higher priority than timeouts. */
138 	if (error == 0 && error1 != 0)
139 		error = error1;
140 
141 	return (error);
142 }
143 
144 /*
145  * Same as tsleep, but if we have a mutex provided, then once we've
146  * entered the sleep queue we drop the mutex. After sleeping we re-lock.
147  */
148 int
149 msleep(const volatile void *ident, struct mutex *mtx, int priority,
150     const char *wmesg, int timo)
151 {
152 	struct sleep_state sls;
153 	int error, error1, spl;
154 
155 	KASSERT((priority & ~(PRIMASK | PCATCH | PNORELOCK)) == 0);
156 	KASSERT(mtx != NULL);
157 
158 	sleep_setup(&sls, ident, priority, wmesg);
159 	sleep_setup_timeout(&sls, timo);
160 	sleep_setup_signal(&sls, priority);
161 
162 	/* XXX - We need to make sure that the mutex doesn't
163 	 * unblock splsched. This can be made a bit more
164 	 * correct when the sched_lock is a mutex.
165 	 */
166 	spl = MUTEX_OLDIPL(mtx);
167 	MUTEX_OLDIPL(mtx) = splsched();
168 	mtx_leave(mtx);
169 
170 	sleep_finish(&sls, 1);
171 	error1 = sleep_finish_timeout(&sls);
172 	error = sleep_finish_signal(&sls);
173 
174 	if ((priority & PNORELOCK) == 0) {
175 		mtx_enter(mtx);
176 		MUTEX_OLDIPL(mtx) = spl; /* put the ipl back */
177 	} else
178 		splx(spl);
179 
180 	/* Signal errors are higher priority than timeouts. */
181 	if (error == 0 && error1 != 0)
182 		error = error1;
183 
184 	return (error);
185 }
186 
187 void
188 sleep_setup(struct sleep_state *sls, const volatile void *ident, int prio,
189     const char *wmesg)
190 {
191 	struct proc *p = curproc;
192 
193 #ifdef DIAGNOSTIC
194 	if (ident == NULL)
195 		panic("tsleep: no ident");
196 	if (p->p_stat != SONPROC)
197 		panic("tsleep: not SONPROC");
198 #endif
199 
200 #ifdef KTRACE
201 	if (KTRPOINT(p, KTR_CSW))
202 		ktrcsw(p, 1, 0);
203 #endif
204 
205 	sls->sls_catch = 0;
206 	sls->sls_do_sleep = 1;
207 	sls->sls_sig = 1;
208 
209 	SCHED_LOCK(sls->sls_s);
210 
211 	p->p_wchan = ident;
212 	p->p_wmesg = wmesg;
213 	p->p_slptime = 0;
214 	p->p_priority = prio & PRIMASK;
215 	TAILQ_INSERT_TAIL(&slpque[LOOKUP(ident)], p, p_runq);
216 }
217 
218 void
219 sleep_finish(struct sleep_state *sls, int do_sleep)
220 {
221 	struct proc *p = curproc;
222 
223 	if (sls->sls_do_sleep && do_sleep) {
224 		p->p_stat = SSLEEP;
225 		p->p_ru.ru_nvcsw++;
226 		SCHED_ASSERT_LOCKED();
227 		mi_switch();
228 	} else if (!do_sleep) {
229 		unsleep(p);
230 	}
231 
232 #ifdef DIAGNOSTIC
233 	if (p->p_stat != SONPROC)
234 		panic("sleep_finish !SONPROC");
235 #endif
236 
237 	p->p_cpu->ci_schedstate.spc_curpriority = p->p_usrpri;
238 	SCHED_UNLOCK(sls->sls_s);
239 
240 	/*
241 	 * Even though this belongs to the signal handling part of sleep,
242 	 * we need to clear it before the ktrace.
243 	 */
244 	atomic_clearbits_int(&p->p_flag, P_SINTR);
245 
246 #ifdef KTRACE
247 	if (KTRPOINT(p, KTR_CSW))
248 		ktrcsw(p, 0, 0);
249 #endif
250 }
251 
252 void
253 sleep_setup_timeout(struct sleep_state *sls, int timo)
254 {
255 	if (timo)
256 		timeout_add(&curproc->p_sleep_to, timo);
257 }
258 
259 int
260 sleep_finish_timeout(struct sleep_state *sls)
261 {
262 	struct proc *p = curproc;
263 
264 	if (p->p_flag & P_TIMEOUT) {
265 		atomic_clearbits_int(&p->p_flag, P_TIMEOUT);
266 		return (EWOULDBLOCK);
267 	} else
268 		timeout_del(&p->p_sleep_to);
269 
270 	return (0);
271 }
272 
273 void
274 sleep_setup_signal(struct sleep_state *sls, int prio)
275 {
276 	struct proc *p = curproc;
277 
278 	if ((sls->sls_catch = (prio & PCATCH)) == 0)
279 		return;
280 
281 	/*
282 	 * We put ourselves on the sleep queue and start our timeout
283 	 * before calling CURSIG, as we could stop there, and a wakeup
284 	 * or a SIGCONT (or both) could occur while we were stopped.
285 	 * A SIGCONT would cause us to be marked as SSLEEP
286 	 * without resuming us, thus we must be ready for sleep
287 	 * when CURSIG is called.  If the wakeup happens while we're
288 	 * stopped, p->p_wchan will be 0 upon return from CURSIG.
289 	 */
290 	atomic_setbits_int(&p->p_flag, P_SINTR);
291 	if (p->p_p->ps_single != NULL || (sls->sls_sig = CURSIG(p)) != 0) {
292 		if (p->p_wchan)
293 			unsleep(p);
294 		p->p_stat = SONPROC;
295 		sls->sls_do_sleep = 0;
296 	} else if (p->p_wchan == 0) {
297 		sls->sls_catch = 0;
298 		sls->sls_do_sleep = 0;
299 	}
300 }
301 
302 int
303 sleep_finish_signal(struct sleep_state *sls)
304 {
305 	struct proc *p = curproc;
306 	int error;
307 
308 	if (sls->sls_catch != 0) {
309 		if ((error = single_thread_check(p, 1)))
310 			return (error);
311 		if (sls->sls_sig != 0 || (sls->sls_sig = CURSIG(p)) != 0) {
312 			if (p->p_p->ps_sigacts->ps_sigintr &
313 			    sigmask(sls->sls_sig))
314 				return (EINTR);
315 			return (ERESTART);
316 		}
317 	}
318 
319 	return (0);
320 }
321 
322 /*
323  * Implement timeout for tsleep.
324  * If process hasn't been awakened (wchan non-zero),
325  * set timeout flag and undo the sleep.  If proc
326  * is stopped, just unsleep so it will remain stopped.
327  */
328 void
329 endtsleep(void *arg)
330 {
331 	struct proc *p = arg;
332 	int s;
333 
334 	SCHED_LOCK(s);
335 	if (p->p_wchan) {
336 		if (p->p_stat == SSLEEP)
337 			setrunnable(p);
338 		else
339 			unsleep(p);
340 		atomic_setbits_int(&p->p_flag, P_TIMEOUT);
341 	}
342 	SCHED_UNLOCK(s);
343 }
344 
345 /*
346  * Remove a process from its wait queue
347  */
348 void
349 unsleep(struct proc *p)
350 {
351 	if (p->p_wchan) {
352 		TAILQ_REMOVE(&slpque[LOOKUP(p->p_wchan)], p, p_runq);
353 		p->p_wchan = NULL;
354 	}
355 }
356 
357 /*
358  * Make a number of processes sleeping on the specified identifier runnable.
359  */
360 void
361 wakeup_n(const volatile void *ident, int n)
362 {
363 	struct slpque *qp;
364 	struct proc *p;
365 	struct proc *pnext;
366 	int s;
367 
368 	SCHED_LOCK(s);
369 	qp = &slpque[LOOKUP(ident)];
370 	for (p = TAILQ_FIRST(qp); p != NULL && n != 0; p = pnext) {
371 		pnext = TAILQ_NEXT(p, p_runq);
372 #ifdef DIAGNOSTIC
373 		if (p->p_stat != SSLEEP && p->p_stat != SSTOP)
374 			panic("wakeup: p_stat is %d", (int)p->p_stat);
375 #endif
376 		if (p->p_wchan == ident) {
377 			--n;
378 			p->p_wchan = 0;
379 			TAILQ_REMOVE(qp, p, p_runq);
380 			if (p->p_stat == SSLEEP)
381 				setrunnable(p);
382 		}
383 	}
384 	SCHED_UNLOCK(s);
385 }
386 
387 /*
388  * Make all processes sleeping on the specified identifier runnable.
389  */
390 void
391 wakeup(const volatile void *chan)
392 {
393 	wakeup_n(chan, -1);
394 }
395 
396 int
397 sys_sched_yield(struct proc *p, void *v, register_t *retval)
398 {
399 	yield();
400 	return (0);
401 }
402 
403 int thrsleep_unlock(void *, int);
404 int
405 thrsleep_unlock(void *lock, int lockflags)
406 {
407 	static _atomic_lock_t unlocked = _ATOMIC_LOCK_UNLOCKED;
408 	_atomic_lock_t *atomiclock = lock;
409 	uint32_t *ticket = lock;
410 	uint32_t ticketvalue;
411 	int error;
412 
413 	if (!lock)
414 		return (0);
415 
416 	if (lockflags) {
417 		if ((error = copyin(ticket, &ticketvalue, sizeof(ticketvalue))))
418 			return (error);
419 		ticketvalue++;
420 		error = copyout(&ticketvalue, ticket, sizeof(ticketvalue));
421 	} else {
422 		error = copyout(&unlocked, atomiclock, sizeof(unlocked));
423 	}
424 	return (error);
425 }
426 
427 static int globalsleepaddr;
428 
429 int
430 thrsleep(struct proc *p, struct sys___thrsleep_args *v)
431 {
432 	struct sys___thrsleep_args /* {
433 		syscallarg(const volatile void *) ident;
434 		syscallarg(clockid_t) clock_id;
435 		syscallarg(const struct timespec *) tp;
436 		syscallarg(void *) lock;
437 		syscallarg(const int *) abort;
438 	} */ *uap = v;
439 	long ident = (long)SCARG(uap, ident);
440 	struct timespec *tsp = (struct timespec *)SCARG(uap, tp);
441 	void *lock = SCARG(uap, lock);
442 	long long to_ticks = 0;
443 	int abort, error;
444 	clockid_t clock_id = SCARG(uap, clock_id) & 0x7;
445 	int lockflags = SCARG(uap, clock_id) & 0x8;
446 
447 	if (ident == 0)
448 		return (EINVAL);
449 	if (tsp != NULL) {
450 		struct timespec now;
451 
452 		if ((error = clock_gettime(p, clock_id, &now)))
453 			return (error);
454 #ifdef KTRACE
455 		if (KTRPOINT(p, KTR_STRUCT))
456 			ktrabstimespec(p, tsp);
457 #endif
458 
459 		if (timespeccmp(tsp, &now, <)) {
460 			/* already passed: still do the unlock */
461 			if ((error = thrsleep_unlock(lock, lockflags)))
462 				return (error);
463 			return (EWOULDBLOCK);
464 		}
465 
466 		timespecsub(tsp, &now, tsp);
467 		to_ticks = (long long)hz * tsp->tv_sec +
468 		    (tsp->tv_nsec + tick * 1000 - 1) / (tick * 1000) + 1;
469 		if (to_ticks > INT_MAX)
470 			to_ticks = INT_MAX;
471 	}
472 
473 	p->p_thrslpid = ident;
474 
475 	if ((error = thrsleep_unlock(lock, lockflags))) {
476 		goto out;
477 	}
478 
479 	if (SCARG(uap, abort) != NULL) {
480 		if ((error = copyin(SCARG(uap, abort), &abort,
481 		    sizeof(abort))) != 0)
482 			goto out;
483 		if (abort) {
484 			error = EINTR;
485 			goto out;
486 		}
487 	}
488 
489 	if (p->p_thrslpid == 0)
490 		error = 0;
491 	else {
492 		void *sleepaddr = &p->p_thrslpid;
493 		if (ident == -1)
494 			sleepaddr = &globalsleepaddr;
495 		error = tsleep(sleepaddr, PUSER | PCATCH, "thrsleep",
496 		    (int)to_ticks);
497 	}
498 
499 out:
500 	p->p_thrslpid = 0;
501 
502 	if (error == ERESTART)
503 		error = EINTR;
504 
505 	return (error);
506 
507 }
508 
509 int
510 sys___thrsleep(struct proc *p, void *v, register_t *retval)
511 {
512 	struct sys___thrsleep_args /* {
513 		syscallarg(const volatile void *) ident;
514 		syscallarg(clockid_t) clock_id;
515 		syscallarg(struct timespec *) tp;
516 		syscallarg(void *) lock;
517 		syscallarg(const int *) abort;
518 	} */ *uap = v;
519 	struct timespec ts;
520 	int error;
521 
522 	if (SCARG(uap, tp) != NULL) {
523 		if ((error = copyin(SCARG(uap, tp), &ts, sizeof(ts)))) {
524 			*retval = error;
525 			return (0);
526 		}
527 		SCARG(uap, tp) = &ts;
528 	}
529 
530 	*retval = thrsleep(p, uap);
531 	return (0);
532 }
533 
534 int
535 sys___thrwakeup(struct proc *p, void *v, register_t *retval)
536 {
537 	struct sys___thrwakeup_args /* {
538 		syscallarg(const volatile void *) ident;
539 		syscallarg(int) n;
540 	} */ *uap = v;
541 	long ident = (long)SCARG(uap, ident);
542 	int n = SCARG(uap, n);
543 	struct proc *q;
544 	int found = 0;
545 
546 	if (ident == 0)
547 		*retval = EINVAL;
548 	else if (ident == -1)
549 		wakeup(&globalsleepaddr);
550 	else {
551 		TAILQ_FOREACH(q, &p->p_p->ps_threads, p_thr_link) {
552 			if (q->p_thrslpid == ident) {
553 				wakeup_one(&q->p_thrslpid);
554 				q->p_thrslpid = 0;
555 				if (++found == n)
556 					break;
557 			}
558 		}
559 		*retval = found ? 0 : ESRCH;
560 	}
561 
562 	return (0);
563 }
564