xref: /openbsd-src/sys/kern/kern_synch.c (revision cb39b41371628601fbe4c618205356d538b9d08a)
1 /*	$OpenBSD: kern_synch.c,v 1.121 2015/05/12 09:30:35 mikeb Exp $	*/
2 /*	$NetBSD: kern_synch.c,v 1.37 1996/04/22 01:38:37 christos Exp $	*/
3 
4 /*
5  * Copyright (c) 1982, 1986, 1990, 1991, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  * (c) UNIX System Laboratories, Inc.
8  * All or some portions of this file are derived from material licensed
9  * to the University of California by American Telephone and Telegraph
10  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
11  * the permission of UNIX System Laboratories, Inc.
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  * 3. Neither the name of the University nor the names of its contributors
22  *    may be used to endorse or promote products derived from this software
23  *    without specific prior written permission.
24  *
25  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35  * SUCH DAMAGE.
36  *
37  *	@(#)kern_synch.c	8.6 (Berkeley) 1/21/94
38  */
39 
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #include <sys/proc.h>
43 #include <sys/kernel.h>
44 #include <sys/signalvar.h>
45 #include <sys/resourcevar.h>
46 #include <sys/sched.h>
47 #include <sys/timeout.h>
48 #include <sys/mount.h>
49 #include <sys/syscallargs.h>
50 #include <sys/pool.h>
51 
52 #include <machine/spinlock.h>
53 
54 #ifdef KTRACE
55 #include <sys/ktrace.h>
56 #endif
57 
58 int	thrsleep(struct proc *, struct sys___thrsleep_args *);
59 
60 
61 /*
62  * We're only looking at 7 bits of the address; everything is
63  * aligned to 4, lots of things are aligned to greater powers
64  * of 2.  Shift right by 8, i.e. drop the bottom 256 worth.
65  */
66 #define TABLESIZE	128
67 #define LOOKUP(x)	(((long)(x) >> 8) & (TABLESIZE - 1))
68 TAILQ_HEAD(slpque,proc) slpque[TABLESIZE];
69 
70 void
71 sleep_queue_init(void)
72 {
73 	int i;
74 
75 	for (i = 0; i < TABLESIZE; i++)
76 		TAILQ_INIT(&slpque[i]);
77 }
78 
79 
80 /*
81  * During autoconfiguration or after a panic, a sleep will simply
82  * lower the priority briefly to allow interrupts, then return.
83  * The priority to be used (safepri) is machine-dependent, thus this
84  * value is initialized and maintained in the machine-dependent layers.
85  * This priority will typically be 0, or the lowest priority
86  * that is safe for use on the interrupt stack; it can be made
87  * higher to block network software interrupts after panics.
88  */
89 extern int safepri;
90 
91 /*
92  * General sleep call.  Suspends the current process until a wakeup is
93  * performed on the specified identifier.  The process will then be made
94  * runnable with the specified priority.  Sleeps at most timo/hz seconds
95  * (0 means no timeout).  If pri includes PCATCH flag, signals are checked
96  * before and after sleeping, else signals are not checked.  Returns 0 if
97  * awakened, EWOULDBLOCK if the timeout expires.  If PCATCH is set and a
98  * signal needs to be delivered, ERESTART is returned if the current system
99  * call should be restarted if possible, and EINTR is returned if the system
100  * call should be interrupted by the signal (return EINTR).
101  */
102 int
103 tsleep(const volatile void *ident, int priority, const char *wmesg, int timo)
104 {
105 	struct sleep_state sls;
106 	int error, error1;
107 #ifdef MULTIPROCESSOR
108 	int hold_count;
109 #endif
110 
111 	KASSERT((priority & ~(PRIMASK | PCATCH)) == 0);
112 
113 #ifdef MULTIPROCESSOR
114 	KASSERT(timo || __mp_lock_held(&kernel_lock));
115 #endif
116 
117 	if (cold || panicstr) {
118 		int s;
119 		/*
120 		 * After a panic, or during autoconfiguration,
121 		 * just give interrupts a chance, then just return;
122 		 * don't run any other procs or panic below,
123 		 * in case this is the idle process and already asleep.
124 		 */
125 		s = splhigh();
126 		splx(safepri);
127 #ifdef MULTIPROCESSOR
128 		if (__mp_lock_held(&kernel_lock)) {
129 			hold_count = __mp_release_all(&kernel_lock);
130 			__mp_acquire_count(&kernel_lock, hold_count);
131 		}
132 #endif
133 		splx(s);
134 		return (0);
135 	}
136 
137 	sleep_setup(&sls, ident, priority, wmesg);
138 	sleep_setup_timeout(&sls, timo);
139 	sleep_setup_signal(&sls, priority);
140 
141 	sleep_finish(&sls, 1);
142 	error1 = sleep_finish_timeout(&sls);
143 	error = sleep_finish_signal(&sls);
144 
145 	/* Signal errors are higher priority than timeouts. */
146 	if (error == 0 && error1 != 0)
147 		error = error1;
148 
149 	return (error);
150 }
151 
152 /*
153  * Same as tsleep, but if we have a mutex provided, then once we've
154  * entered the sleep queue we drop the mutex. After sleeping we re-lock.
155  */
156 int
157 msleep(const volatile void *ident, struct mutex *mtx, int priority,
158     const char *wmesg, int timo)
159 {
160 	struct sleep_state sls;
161 	int error, error1, spl;
162 #ifdef MULTIPROCESSOR
163 	int hold_count;
164 #endif
165 
166 	KASSERT((priority & ~(PRIMASK | PCATCH | PNORELOCK)) == 0);
167 	KASSERT(mtx != NULL);
168 
169 	if (cold || panicstr) {
170 		/*
171 		 * After a panic, or during autoconfiguration,
172 		 * just give interrupts a chance, then just return;
173 		 * don't run any other procs or panic below,
174 		 * in case this is the idle process and already asleep.
175 		 */
176 		spl = MUTEX_OLDIPL(mtx);
177 		MUTEX_OLDIPL(mtx) = safepri;
178 		mtx_leave(mtx);
179 #ifdef MULTIPROCESSOR
180 		if (__mp_lock_held(&kernel_lock)) {
181 			hold_count = __mp_release_all(&kernel_lock);
182 			__mp_acquire_count(&kernel_lock, hold_count);
183 		}
184 #endif
185 		if ((priority & PNORELOCK) == 0) {
186 			mtx_enter(mtx);
187 			MUTEX_OLDIPL(mtx) = spl;
188 		} else
189 			splx(spl);
190 		return (0);
191 	}
192 
193 	sleep_setup(&sls, ident, priority, wmesg);
194 	sleep_setup_timeout(&sls, timo);
195 	sleep_setup_signal(&sls, priority);
196 
197 	/* XXX - We need to make sure that the mutex doesn't
198 	 * unblock splsched. This can be made a bit more
199 	 * correct when the sched_lock is a mutex.
200 	 */
201 	spl = MUTEX_OLDIPL(mtx);
202 	MUTEX_OLDIPL(mtx) = splsched();
203 	mtx_leave(mtx);
204 
205 	sleep_finish(&sls, 1);
206 	error1 = sleep_finish_timeout(&sls);
207 	error = sleep_finish_signal(&sls);
208 
209 	if ((priority & PNORELOCK) == 0) {
210 		mtx_enter(mtx);
211 		MUTEX_OLDIPL(mtx) = spl; /* put the ipl back */
212 	} else
213 		splx(spl);
214 
215 	/* Signal errors are higher priority than timeouts. */
216 	if (error == 0 && error1 != 0)
217 		error = error1;
218 
219 	return (error);
220 }
221 
222 void
223 sleep_setup(struct sleep_state *sls, const volatile void *ident, int prio,
224     const char *wmesg)
225 {
226 	struct proc *p = curproc;
227 
228 #ifdef DIAGNOSTIC
229 	if (p->p_flag & P_CANTSLEEP)
230 		panic("sleep: %s failed insomnia", p->p_comm);
231 	if (ident == NULL)
232 		panic("tsleep: no ident");
233 	if (p->p_stat != SONPROC)
234 		panic("tsleep: not SONPROC");
235 #endif
236 
237 #ifdef KTRACE
238 	if (KTRPOINT(p, KTR_CSW))
239 		ktrcsw(p, 1, 0);
240 #endif
241 
242 	sls->sls_catch = 0;
243 	sls->sls_do_sleep = 1;
244 	sls->sls_sig = 1;
245 
246 	SCHED_LOCK(sls->sls_s);
247 
248 	p->p_wchan = ident;
249 	p->p_wmesg = wmesg;
250 	p->p_slptime = 0;
251 	p->p_priority = prio & PRIMASK;
252 	TAILQ_INSERT_TAIL(&slpque[LOOKUP(ident)], p, p_runq);
253 }
254 
255 void
256 sleep_finish(struct sleep_state *sls, int do_sleep)
257 {
258 	struct proc *p = curproc;
259 
260 	if (sls->sls_do_sleep && do_sleep) {
261 		p->p_stat = SSLEEP;
262 		p->p_ru.ru_nvcsw++;
263 		SCHED_ASSERT_LOCKED();
264 		mi_switch();
265 	} else if (!do_sleep) {
266 		unsleep(p);
267 	}
268 
269 #ifdef DIAGNOSTIC
270 	if (p->p_stat != SONPROC)
271 		panic("sleep_finish !SONPROC");
272 #endif
273 
274 	p->p_cpu->ci_schedstate.spc_curpriority = p->p_usrpri;
275 	SCHED_UNLOCK(sls->sls_s);
276 
277 	/*
278 	 * Even though this belongs to the signal handling part of sleep,
279 	 * we need to clear it before the ktrace.
280 	 */
281 	atomic_clearbits_int(&p->p_flag, P_SINTR);
282 
283 #ifdef KTRACE
284 	if (KTRPOINT(p, KTR_CSW))
285 		ktrcsw(p, 0, 0);
286 #endif
287 }
288 
289 void
290 sleep_setup_timeout(struct sleep_state *sls, int timo)
291 {
292 	if (timo)
293 		timeout_add(&curproc->p_sleep_to, timo);
294 }
295 
296 int
297 sleep_finish_timeout(struct sleep_state *sls)
298 {
299 	struct proc *p = curproc;
300 
301 	if (p->p_flag & P_TIMEOUT) {
302 		atomic_clearbits_int(&p->p_flag, P_TIMEOUT);
303 		return (EWOULDBLOCK);
304 	} else
305 		timeout_del(&p->p_sleep_to);
306 
307 	return (0);
308 }
309 
310 void
311 sleep_setup_signal(struct sleep_state *sls, int prio)
312 {
313 	struct proc *p = curproc;
314 
315 	if ((sls->sls_catch = (prio & PCATCH)) == 0)
316 		return;
317 
318 	/*
319 	 * We put ourselves on the sleep queue and start our timeout
320 	 * before calling CURSIG, as we could stop there, and a wakeup
321 	 * or a SIGCONT (or both) could occur while we were stopped.
322 	 * A SIGCONT would cause us to be marked as SSLEEP
323 	 * without resuming us, thus we must be ready for sleep
324 	 * when CURSIG is called.  If the wakeup happens while we're
325 	 * stopped, p->p_wchan will be 0 upon return from CURSIG.
326 	 */
327 	atomic_setbits_int(&p->p_flag, P_SINTR);
328 	if (p->p_p->ps_single != NULL || (sls->sls_sig = CURSIG(p)) != 0) {
329 		if (p->p_wchan)
330 			unsleep(p);
331 		p->p_stat = SONPROC;
332 		sls->sls_do_sleep = 0;
333 	} else if (p->p_wchan == 0) {
334 		sls->sls_catch = 0;
335 		sls->sls_do_sleep = 0;
336 	}
337 }
338 
339 int
340 sleep_finish_signal(struct sleep_state *sls)
341 {
342 	struct proc *p = curproc;
343 	int error;
344 
345 	if (sls->sls_catch != 0) {
346 		if ((error = single_thread_check(p, 1)))
347 			return (error);
348 		if (sls->sls_sig != 0 || (sls->sls_sig = CURSIG(p)) != 0) {
349 			if (p->p_p->ps_sigacts->ps_sigintr &
350 			    sigmask(sls->sls_sig))
351 				return (EINTR);
352 			return (ERESTART);
353 		}
354 	}
355 
356 	return (0);
357 }
358 
359 /*
360  * Implement timeout for tsleep.
361  * If process hasn't been awakened (wchan non-zero),
362  * set timeout flag and undo the sleep.  If proc
363  * is stopped, just unsleep so it will remain stopped.
364  */
365 void
366 endtsleep(void *arg)
367 {
368 	struct proc *p = arg;
369 	int s;
370 
371 	SCHED_LOCK(s);
372 	if (p->p_wchan) {
373 		if (p->p_stat == SSLEEP)
374 			setrunnable(p);
375 		else
376 			unsleep(p);
377 		atomic_setbits_int(&p->p_flag, P_TIMEOUT);
378 	}
379 	SCHED_UNLOCK(s);
380 }
381 
382 /*
383  * Remove a process from its wait queue
384  */
385 void
386 unsleep(struct proc *p)
387 {
388 	SCHED_ASSERT_LOCKED();
389 
390 	if (p->p_wchan) {
391 		TAILQ_REMOVE(&slpque[LOOKUP(p->p_wchan)], p, p_runq);
392 		p->p_wchan = NULL;
393 	}
394 }
395 
396 /*
397  * Make a number of processes sleeping on the specified identifier runnable.
398  */
399 void
400 wakeup_n(const volatile void *ident, int n)
401 {
402 	struct slpque *qp;
403 	struct proc *p;
404 	struct proc *pnext;
405 	int s;
406 
407 	SCHED_LOCK(s);
408 	qp = &slpque[LOOKUP(ident)];
409 	for (p = TAILQ_FIRST(qp); p != NULL && n != 0; p = pnext) {
410 		pnext = TAILQ_NEXT(p, p_runq);
411 #ifdef DIAGNOSTIC
412 		if (p->p_stat != SSLEEP && p->p_stat != SSTOP)
413 			panic("wakeup: p_stat is %d", (int)p->p_stat);
414 #endif
415 		if (p->p_wchan == ident) {
416 			--n;
417 			p->p_wchan = 0;
418 			TAILQ_REMOVE(qp, p, p_runq);
419 			if (p->p_stat == SSLEEP)
420 				setrunnable(p);
421 		}
422 	}
423 	SCHED_UNLOCK(s);
424 }
425 
426 /*
427  * Make all processes sleeping on the specified identifier runnable.
428  */
429 void
430 wakeup(const volatile void *chan)
431 {
432 	wakeup_n(chan, -1);
433 }
434 
435 int
436 sys_sched_yield(struct proc *p, void *v, register_t *retval)
437 {
438 	yield();
439 	return (0);
440 }
441 
442 int thrsleep_unlock(void *, int);
443 int
444 thrsleep_unlock(void *lock, int lockflags)
445 {
446 	static _atomic_lock_t unlocked = _ATOMIC_LOCK_UNLOCKED;
447 	_atomic_lock_t *atomiclock = lock;
448 	uint32_t *ticket = lock;
449 	uint32_t ticketvalue;
450 	int error;
451 
452 	if (!lock)
453 		return (0);
454 
455 	if (lockflags) {
456 		if ((error = copyin(ticket, &ticketvalue, sizeof(ticketvalue))))
457 			return (error);
458 		ticketvalue++;
459 		error = copyout(&ticketvalue, ticket, sizeof(ticketvalue));
460 	} else {
461 		error = copyout(&unlocked, atomiclock, sizeof(unlocked));
462 	}
463 	return (error);
464 }
465 
466 static int globalsleepaddr;
467 
468 int
469 thrsleep(struct proc *p, struct sys___thrsleep_args *v)
470 {
471 	struct sys___thrsleep_args /* {
472 		syscallarg(const volatile void *) ident;
473 		syscallarg(clockid_t) clock_id;
474 		syscallarg(const struct timespec *) tp;
475 		syscallarg(void *) lock;
476 		syscallarg(const int *) abort;
477 	} */ *uap = v;
478 	long ident = (long)SCARG(uap, ident);
479 	struct timespec *tsp = (struct timespec *)SCARG(uap, tp);
480 	void *lock = SCARG(uap, lock);
481 	long long to_ticks = 0;
482 	int abort, error;
483 	clockid_t clock_id = SCARG(uap, clock_id) & 0x7;
484 	int lockflags = SCARG(uap, clock_id) & 0x8;
485 
486 	if (ident == 0)
487 		return (EINVAL);
488 	if (tsp != NULL) {
489 		struct timespec now;
490 
491 		if ((error = clock_gettime(p, clock_id, &now)))
492 			return (error);
493 #ifdef KTRACE
494 		if (KTRPOINT(p, KTR_STRUCT))
495 			ktrabstimespec(p, tsp);
496 #endif
497 
498 		if (timespeccmp(tsp, &now, <)) {
499 			/* already passed: still do the unlock */
500 			if ((error = thrsleep_unlock(lock, lockflags)))
501 				return (error);
502 			return (EWOULDBLOCK);
503 		}
504 
505 		timespecsub(tsp, &now, tsp);
506 		to_ticks = (long long)hz * tsp->tv_sec +
507 		    (tsp->tv_nsec + tick * 1000 - 1) / (tick * 1000) + 1;
508 		if (to_ticks > INT_MAX)
509 			to_ticks = INT_MAX;
510 	}
511 
512 	p->p_thrslpid = ident;
513 
514 	if ((error = thrsleep_unlock(lock, lockflags))) {
515 		goto out;
516 	}
517 
518 	if (SCARG(uap, abort) != NULL) {
519 		if ((error = copyin(SCARG(uap, abort), &abort,
520 		    sizeof(abort))) != 0)
521 			goto out;
522 		if (abort) {
523 			error = EINTR;
524 			goto out;
525 		}
526 	}
527 
528 	if (p->p_thrslpid == 0)
529 		error = 0;
530 	else {
531 		void *sleepaddr = &p->p_thrslpid;
532 		if (ident == -1)
533 			sleepaddr = &globalsleepaddr;
534 		error = tsleep(sleepaddr, PUSER | PCATCH, "thrsleep",
535 		    (int)to_ticks);
536 	}
537 
538 out:
539 	p->p_thrslpid = 0;
540 
541 	if (error == ERESTART)
542 		error = EINTR;
543 
544 	return (error);
545 
546 }
547 
548 int
549 sys___thrsleep(struct proc *p, void *v, register_t *retval)
550 {
551 	struct sys___thrsleep_args /* {
552 		syscallarg(const volatile void *) ident;
553 		syscallarg(clockid_t) clock_id;
554 		syscallarg(struct timespec *) tp;
555 		syscallarg(void *) lock;
556 		syscallarg(const int *) abort;
557 	} */ *uap = v;
558 	struct timespec ts;
559 	int error;
560 
561 	if (SCARG(uap, tp) != NULL) {
562 		if ((error = copyin(SCARG(uap, tp), &ts, sizeof(ts)))) {
563 			*retval = error;
564 			return (0);
565 		}
566 		SCARG(uap, tp) = &ts;
567 	}
568 
569 	*retval = thrsleep(p, uap);
570 	return (0);
571 }
572 
573 int
574 sys___thrwakeup(struct proc *p, void *v, register_t *retval)
575 {
576 	struct sys___thrwakeup_args /* {
577 		syscallarg(const volatile void *) ident;
578 		syscallarg(int) n;
579 	} */ *uap = v;
580 	long ident = (long)SCARG(uap, ident);
581 	int n = SCARG(uap, n);
582 	struct proc *q;
583 	int found = 0;
584 
585 	if (ident == 0)
586 		*retval = EINVAL;
587 	else if (ident == -1)
588 		wakeup(&globalsleepaddr);
589 	else {
590 		TAILQ_FOREACH(q, &p->p_p->ps_threads, p_thr_link) {
591 			if (q->p_thrslpid == ident) {
592 				wakeup_one(&q->p_thrslpid);
593 				q->p_thrslpid = 0;
594 				if (++found == n)
595 					break;
596 			}
597 		}
598 		*retval = found ? 0 : ESRCH;
599 	}
600 
601 	return (0);
602 }
603