xref: /openbsd-src/sys/kern/kern_synch.c (revision 91f110e064cd7c194e59e019b83bb7496c1c84d4)
1 /*	$OpenBSD: kern_synch.c,v 1.115 2014/03/22 06:05:45 guenther Exp $	*/
2 /*	$NetBSD: kern_synch.c,v 1.37 1996/04/22 01:38:37 christos Exp $	*/
3 
4 /*
5  * Copyright (c) 1982, 1986, 1990, 1991, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  * (c) UNIX System Laboratories, Inc.
8  * All or some portions of this file are derived from material licensed
9  * to the University of California by American Telephone and Telegraph
10  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
11  * the permission of UNIX System Laboratories, Inc.
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  * 3. Neither the name of the University nor the names of its contributors
22  *    may be used to endorse or promote products derived from this software
23  *    without specific prior written permission.
24  *
25  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35  * SUCH DAMAGE.
36  *
37  *	@(#)kern_synch.c	8.6 (Berkeley) 1/21/94
38  */
39 
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #include <sys/proc.h>
43 #include <sys/kernel.h>
44 #include <sys/buf.h>
45 #include <sys/signalvar.h>
46 #include <sys/resourcevar.h>
47 #include <uvm/uvm_extern.h>
48 #include <sys/sched.h>
49 #include <sys/timeout.h>
50 #include <sys/mount.h>
51 #include <sys/syscallargs.h>
52 #include <sys/pool.h>
53 
54 #include <machine/spinlock.h>
55 
56 #ifdef KTRACE
57 #include <sys/ktrace.h>
58 #endif
59 
60 int	thrsleep(struct proc *, struct sys___thrsleep_args *);
61 
62 
63 /*
64  * We're only looking at 7 bits of the address; everything is
65  * aligned to 4, lots of things are aligned to greater powers
66  * of 2.  Shift right by 8, i.e. drop the bottom 256 worth.
67  */
68 #define TABLESIZE	128
69 #define LOOKUP(x)	(((long)(x) >> 8) & (TABLESIZE - 1))
70 TAILQ_HEAD(slpque,proc) slpque[TABLESIZE];
71 
72 void
73 sleep_queue_init(void)
74 {
75 	int i;
76 
77 	for (i = 0; i < TABLESIZE; i++)
78 		TAILQ_INIT(&slpque[i]);
79 }
80 
81 
82 /*
83  * During autoconfiguration or after a panic, a sleep will simply
84  * lower the priority briefly to allow interrupts, then return.
85  * The priority to be used (safepri) is machine-dependent, thus this
86  * value is initialized and maintained in the machine-dependent layers.
87  * This priority will typically be 0, or the lowest priority
88  * that is safe for use on the interrupt stack; it can be made
89  * higher to block network software interrupts after panics.
90  */
91 extern int safepri;
92 
93 /*
94  * General sleep call.  Suspends the current process until a wakeup is
95  * performed on the specified identifier.  The process will then be made
96  * runnable with the specified priority.  Sleeps at most timo/hz seconds
97  * (0 means no timeout).  If pri includes PCATCH flag, signals are checked
98  * before and after sleeping, else signals are not checked.  Returns 0 if
99  * awakened, EWOULDBLOCK if the timeout expires.  If PCATCH is set and a
100  * signal needs to be delivered, ERESTART is returned if the current system
101  * call should be restarted if possible, and EINTR is returned if the system
102  * call should be interrupted by the signal (return EINTR).
103  */
104 int
105 tsleep(const volatile void *ident, int priority, const char *wmesg, int timo)
106 {
107 	struct sleep_state sls;
108 	int error, error1;
109 
110 	KASSERT((priority & ~(PRIMASK | PCATCH)) == 0);
111 
112 #ifdef MULTIPROCESSOR
113 	KASSERT(timo || __mp_lock_held(&kernel_lock));
114 #endif
115 
116 	if (cold || panicstr) {
117 		int s;
118 		/*
119 		 * After a panic, or during autoconfiguration,
120 		 * just give interrupts a chance, then just return;
121 		 * don't run any other procs or panic below,
122 		 * in case this is the idle process and already asleep.
123 		 */
124 		s = splhigh();
125 		splx(safepri);
126 		splx(s);
127 		return (0);
128 	}
129 
130 	sleep_setup(&sls, ident, priority, wmesg);
131 	sleep_setup_timeout(&sls, timo);
132 	sleep_setup_signal(&sls, priority);
133 
134 	sleep_finish(&sls, 1);
135 	error1 = sleep_finish_timeout(&sls);
136 	error = sleep_finish_signal(&sls);
137 
138 	/* Signal errors are higher priority than timeouts. */
139 	if (error == 0 && error1 != 0)
140 		error = error1;
141 
142 	return (error);
143 }
144 
145 /*
146  * Same as tsleep, but if we have a mutex provided, then once we've
147  * entered the sleep queue we drop the mutex. After sleeping we re-lock.
148  */
149 int
150 msleep(const volatile void *ident, struct mutex *mtx, int priority,
151     const char *wmesg, int timo)
152 {
153 	struct sleep_state sls;
154 	int error, error1, spl;
155 
156 	KASSERT((priority & ~(PRIMASK | PCATCH | PNORELOCK)) == 0);
157 	KASSERT(mtx != NULL);
158 
159 	sleep_setup(&sls, ident, priority, wmesg);
160 	sleep_setup_timeout(&sls, timo);
161 	sleep_setup_signal(&sls, priority);
162 
163 	/* XXX - We need to make sure that the mutex doesn't
164 	 * unblock splsched. This can be made a bit more
165 	 * correct when the sched_lock is a mutex.
166 	 */
167 	spl = MUTEX_OLDIPL(mtx);
168 	MUTEX_OLDIPL(mtx) = splsched();
169 	mtx_leave(mtx);
170 
171 	sleep_finish(&sls, 1);
172 	error1 = sleep_finish_timeout(&sls);
173 	error = sleep_finish_signal(&sls);
174 
175 	if ((priority & PNORELOCK) == 0) {
176 		mtx_enter(mtx);
177 		MUTEX_OLDIPL(mtx) = spl; /* put the ipl back */
178 	} else
179 		splx(spl);
180 
181 	/* Signal errors are higher priority than timeouts. */
182 	if (error == 0 && error1 != 0)
183 		error = error1;
184 
185 	return (error);
186 }
187 
188 void
189 sleep_setup(struct sleep_state *sls, const volatile void *ident, int prio,
190     const char *wmesg)
191 {
192 	struct proc *p = curproc;
193 
194 #ifdef DIAGNOSTIC
195 	if (ident == NULL)
196 		panic("tsleep: no ident");
197 	if (p->p_stat != SONPROC)
198 		panic("tsleep: not SONPROC");
199 #endif
200 
201 #ifdef KTRACE
202 	if (KTRPOINT(p, KTR_CSW))
203 		ktrcsw(p, 1, 0);
204 #endif
205 
206 	sls->sls_catch = 0;
207 	sls->sls_do_sleep = 1;
208 	sls->sls_sig = 1;
209 
210 	SCHED_LOCK(sls->sls_s);
211 
212 	p->p_wchan = ident;
213 	p->p_wmesg = wmesg;
214 	p->p_slptime = 0;
215 	p->p_priority = prio & PRIMASK;
216 	TAILQ_INSERT_TAIL(&slpque[LOOKUP(ident)], p, p_runq);
217 }
218 
219 void
220 sleep_finish(struct sleep_state *sls, int do_sleep)
221 {
222 	struct proc *p = curproc;
223 
224 	if (sls->sls_do_sleep && do_sleep) {
225 		p->p_stat = SSLEEP;
226 		p->p_ru.ru_nvcsw++;
227 		SCHED_ASSERT_LOCKED();
228 		mi_switch();
229 	} else if (!do_sleep) {
230 		unsleep(p);
231 	}
232 
233 #ifdef DIAGNOSTIC
234 	if (p->p_stat != SONPROC)
235 		panic("sleep_finish !SONPROC");
236 #endif
237 
238 	p->p_cpu->ci_schedstate.spc_curpriority = p->p_usrpri;
239 	SCHED_UNLOCK(sls->sls_s);
240 
241 	/*
242 	 * Even though this belongs to the signal handling part of sleep,
243 	 * we need to clear it before the ktrace.
244 	 */
245 	atomic_clearbits_int(&p->p_flag, P_SINTR);
246 
247 #ifdef KTRACE
248 	if (KTRPOINT(p, KTR_CSW))
249 		ktrcsw(p, 0, 0);
250 #endif
251 }
252 
253 void
254 sleep_setup_timeout(struct sleep_state *sls, int timo)
255 {
256 	if (timo)
257 		timeout_add(&curproc->p_sleep_to, timo);
258 }
259 
260 int
261 sleep_finish_timeout(struct sleep_state *sls)
262 {
263 	struct proc *p = curproc;
264 
265 	if (p->p_flag & P_TIMEOUT) {
266 		atomic_clearbits_int(&p->p_flag, P_TIMEOUT);
267 		return (EWOULDBLOCK);
268 	} else
269 		timeout_del(&p->p_sleep_to);
270 
271 	return (0);
272 }
273 
274 void
275 sleep_setup_signal(struct sleep_state *sls, int prio)
276 {
277 	struct proc *p = curproc;
278 
279 	if ((sls->sls_catch = (prio & PCATCH)) == 0)
280 		return;
281 
282 	/*
283 	 * We put ourselves on the sleep queue and start our timeout
284 	 * before calling CURSIG, as we could stop there, and a wakeup
285 	 * or a SIGCONT (or both) could occur while we were stopped.
286 	 * A SIGCONT would cause us to be marked as SSLEEP
287 	 * without resuming us, thus we must be ready for sleep
288 	 * when CURSIG is called.  If the wakeup happens while we're
289 	 * stopped, p->p_wchan will be 0 upon return from CURSIG.
290 	 */
291 	atomic_setbits_int(&p->p_flag, P_SINTR);
292 	if (p->p_p->ps_single != NULL || (sls->sls_sig = CURSIG(p)) != 0) {
293 		if (p->p_wchan)
294 			unsleep(p);
295 		p->p_stat = SONPROC;
296 		sls->sls_do_sleep = 0;
297 	} else if (p->p_wchan == 0) {
298 		sls->sls_catch = 0;
299 		sls->sls_do_sleep = 0;
300 	}
301 }
302 
303 int
304 sleep_finish_signal(struct sleep_state *sls)
305 {
306 	struct proc *p = curproc;
307 	int error;
308 
309 	if (sls->sls_catch != 0) {
310 		if ((error = single_thread_check(p, 1)))
311 			return (error);
312 		if (sls->sls_sig != 0 || (sls->sls_sig = CURSIG(p)) != 0) {
313 			if (p->p_p->ps_sigacts->ps_sigintr &
314 			    sigmask(sls->sls_sig))
315 				return (EINTR);
316 			return (ERESTART);
317 		}
318 	}
319 
320 	return (0);
321 }
322 
323 /*
324  * Implement timeout for tsleep.
325  * If process hasn't been awakened (wchan non-zero),
326  * set timeout flag and undo the sleep.  If proc
327  * is stopped, just unsleep so it will remain stopped.
328  */
329 void
330 endtsleep(void *arg)
331 {
332 	struct proc *p = arg;
333 	int s;
334 
335 	SCHED_LOCK(s);
336 	if (p->p_wchan) {
337 		if (p->p_stat == SSLEEP)
338 			setrunnable(p);
339 		else
340 			unsleep(p);
341 		atomic_setbits_int(&p->p_flag, P_TIMEOUT);
342 	}
343 	SCHED_UNLOCK(s);
344 }
345 
346 /*
347  * Remove a process from its wait queue
348  */
349 void
350 unsleep(struct proc *p)
351 {
352 	if (p->p_wchan) {
353 		TAILQ_REMOVE(&slpque[LOOKUP(p->p_wchan)], p, p_runq);
354 		p->p_wchan = NULL;
355 	}
356 }
357 
358 /*
359  * Make a number of processes sleeping on the specified identifier runnable.
360  */
361 void
362 wakeup_n(const volatile void *ident, int n)
363 {
364 	struct slpque *qp;
365 	struct proc *p;
366 	struct proc *pnext;
367 	int s;
368 
369 	SCHED_LOCK(s);
370 	qp = &slpque[LOOKUP(ident)];
371 	for (p = TAILQ_FIRST(qp); p != NULL && n != 0; p = pnext) {
372 		pnext = TAILQ_NEXT(p, p_runq);
373 #ifdef DIAGNOSTIC
374 		if (p->p_stat != SSLEEP && p->p_stat != SSTOP)
375 			panic("wakeup: p_stat is %d", (int)p->p_stat);
376 #endif
377 		if (p->p_wchan == ident) {
378 			--n;
379 			p->p_wchan = 0;
380 			TAILQ_REMOVE(qp, p, p_runq);
381 			if (p->p_stat == SSLEEP)
382 				setrunnable(p);
383 		}
384 	}
385 	SCHED_UNLOCK(s);
386 }
387 
388 /*
389  * Make all processes sleeping on the specified identifier runnable.
390  */
391 void
392 wakeup(const volatile void *chan)
393 {
394 	wakeup_n(chan, -1);
395 }
396 
397 int
398 sys_sched_yield(struct proc *p, void *v, register_t *retval)
399 {
400 	yield();
401 	return (0);
402 }
403 
404 int thrsleep_unlock(void *, int);
405 int
406 thrsleep_unlock(void *lock, int lockflags)
407 {
408 	static _atomic_lock_t unlocked = _ATOMIC_LOCK_UNLOCKED;
409 	_atomic_lock_t *atomiclock = lock;
410 	uint32_t *ticket = lock;
411 	uint32_t ticketvalue;
412 	int error;
413 
414 	if (!lock)
415 		return (0);
416 
417 	if (lockflags) {
418 		if ((error = copyin(ticket, &ticketvalue, sizeof(ticketvalue))))
419 			return (error);
420 		ticketvalue++;
421 		error = copyout(&ticketvalue, ticket, sizeof(ticketvalue));
422 	} else {
423 		error = copyout(&unlocked, atomiclock, sizeof(unlocked));
424 	}
425 	return (error);
426 }
427 
428 static int globalsleepaddr;
429 
430 int
431 thrsleep(struct proc *p, struct sys___thrsleep_args *v)
432 {
433 	struct sys___thrsleep_args /* {
434 		syscallarg(const volatile void *) ident;
435 		syscallarg(clockid_t) clock_id;
436 		syscallarg(const struct timespec *) tp;
437 		syscallarg(void *) lock;
438 		syscallarg(const int *) abort;
439 	} */ *uap = v;
440 	long ident = (long)SCARG(uap, ident);
441 	struct timespec *tsp = (struct timespec *)SCARG(uap, tp);
442 	void *lock = SCARG(uap, lock);
443 	long long to_ticks = 0;
444 	int abort, error;
445 	clockid_t clock_id = SCARG(uap, clock_id) & 0x7;
446 	int lockflags = SCARG(uap, clock_id) & 0x8;
447 
448 	if (ident == 0)
449 		return (EINVAL);
450 	if (tsp != NULL) {
451 		struct timespec now;
452 
453 		if ((error = clock_gettime(p, clock_id, &now)))
454 			return (error);
455 #ifdef KTRACE
456 		if (KTRPOINT(p, KTR_STRUCT))
457 			ktrabstimespec(p, tsp);
458 #endif
459 
460 		if (timespeccmp(tsp, &now, <)) {
461 			/* already passed: still do the unlock */
462 			if ((error = thrsleep_unlock(lock, lockflags)))
463 				return (error);
464 			return (EWOULDBLOCK);
465 		}
466 
467 		timespecsub(tsp, &now, tsp);
468 		to_ticks = (long long)hz * tsp->tv_sec +
469 		    (tsp->tv_nsec + tick * 1000 - 1) / (tick * 1000) + 1;
470 		if (to_ticks > INT_MAX)
471 			to_ticks = INT_MAX;
472 	}
473 
474 	p->p_thrslpid = ident;
475 
476 	if ((error = thrsleep_unlock(lock, lockflags))) {
477 		goto out;
478 	}
479 
480 	if (SCARG(uap, abort) != NULL) {
481 		if ((error = copyin(SCARG(uap, abort), &abort,
482 		    sizeof(abort))) != 0)
483 			goto out;
484 		if (abort) {
485 			error = EINTR;
486 			goto out;
487 		}
488 	}
489 
490 	if (p->p_thrslpid == 0)
491 		error = 0;
492 	else {
493 		void *sleepaddr = &p->p_thrslpid;
494 		if (ident == -1)
495 			sleepaddr = &globalsleepaddr;
496 		error = tsleep(sleepaddr, PUSER | PCATCH, "thrsleep",
497 		    (int)to_ticks);
498 	}
499 
500 out:
501 	p->p_thrslpid = 0;
502 
503 	if (error == ERESTART)
504 		error = EINTR;
505 
506 	return (error);
507 
508 }
509 
510 int
511 sys___thrsleep(struct proc *p, void *v, register_t *retval)
512 {
513 	struct sys___thrsleep_args /* {
514 		syscallarg(const volatile void *) ident;
515 		syscallarg(clockid_t) clock_id;
516 		syscallarg(struct timespec *) tp;
517 		syscallarg(void *) lock;
518 		syscallarg(const int *) abort;
519 	} */ *uap = v;
520 	struct timespec ts;
521 	int error;
522 
523 	if (SCARG(uap, tp) != NULL) {
524 		if ((error = copyin(SCARG(uap, tp), &ts, sizeof(ts)))) {
525 			*retval = error;
526 			return (0);
527 		}
528 		SCARG(uap, tp) = &ts;
529 	}
530 
531 	*retval = thrsleep(p, uap);
532 	return (0);
533 }
534 
535 int
536 sys___thrwakeup(struct proc *p, void *v, register_t *retval)
537 {
538 	struct sys___thrwakeup_args /* {
539 		syscallarg(const volatile void *) ident;
540 		syscallarg(int) n;
541 	} */ *uap = v;
542 	long ident = (long)SCARG(uap, ident);
543 	int n = SCARG(uap, n);
544 	struct proc *q;
545 	int found = 0;
546 
547 	if (ident == 0)
548 		*retval = EINVAL;
549 	else if (ident == -1)
550 		wakeup(&globalsleepaddr);
551 	else {
552 		TAILQ_FOREACH(q, &p->p_p->ps_threads, p_thr_link) {
553 			if (q->p_thrslpid == ident) {
554 				wakeup_one(&q->p_thrslpid);
555 				q->p_thrslpid = 0;
556 				if (++found == n)
557 					break;
558 			}
559 		}
560 		*retval = found ? 0 : ESRCH;
561 	}
562 
563 	return (0);
564 }
565