xref: /openbsd-src/sys/kern/kern_synch.c (revision d4741794dd2f512d997014f8bd85fbb24d935059)
1 /*	$OpenBSD: kern_synch.c,v 1.135 2016/09/13 08:32:44 mpi Exp $	*/
2 /*	$NetBSD: kern_synch.c,v 1.37 1996/04/22 01:38:37 christos Exp $	*/
3 
4 /*
5  * Copyright (c) 1982, 1986, 1990, 1991, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  * (c) UNIX System Laboratories, Inc.
8  * All or some portions of this file are derived from material licensed
9  * to the University of California by American Telephone and Telegraph
10  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
11  * the permission of UNIX System Laboratories, Inc.
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  * 3. Neither the name of the University nor the names of its contributors
22  *    may be used to endorse or promote products derived from this software
23  *    without specific prior written permission.
24  *
25  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35  * SUCH DAMAGE.
36  *
37  *	@(#)kern_synch.c	8.6 (Berkeley) 1/21/94
38  */
39 
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #include <sys/proc.h>
43 #include <sys/kernel.h>
44 #include <sys/signalvar.h>
45 #include <sys/resourcevar.h>
46 #include <sys/sched.h>
47 #include <sys/timeout.h>
48 #include <sys/mount.h>
49 #include <sys/syscallargs.h>
50 #include <sys/pool.h>
51 #include <sys/refcnt.h>
52 #include <sys/atomic.h>
53 #include <ddb/db_output.h>
54 
55 #include <machine/spinlock.h>
56 
57 #ifdef KTRACE
58 #include <sys/ktrace.h>
59 #endif
60 
61 int	thrsleep(struct proc *, struct sys___thrsleep_args *);
62 int	thrsleep_unlock(void *);
63 
64 /*
65  * We're only looking at 7 bits of the address; everything is
66  * aligned to 4, lots of things are aligned to greater powers
67  * of 2.  Shift right by 8, i.e. drop the bottom 256 worth.
68  */
69 #define TABLESIZE	128
70 #define LOOKUP(x)	(((long)(x) >> 8) & (TABLESIZE - 1))
71 TAILQ_HEAD(slpque,proc) slpque[TABLESIZE];
72 
73 void
74 sleep_queue_init(void)
75 {
76 	int i;
77 
78 	for (i = 0; i < TABLESIZE; i++)
79 		TAILQ_INIT(&slpque[i]);
80 }
81 
82 
83 /*
84  * During autoconfiguration or after a panic, a sleep will simply
85  * lower the priority briefly to allow interrupts, then return.
86  * The priority to be used (safepri) is machine-dependent, thus this
87  * value is initialized and maintained in the machine-dependent layers.
88  * This priority will typically be 0, or the lowest priority
89  * that is safe for use on the interrupt stack; it can be made
90  * higher to block network software interrupts after panics.
91  */
92 extern int safepri;
93 
94 /*
95  * General sleep call.  Suspends the current process until a wakeup is
96  * performed on the specified identifier.  The process will then be made
97  * runnable with the specified priority.  Sleeps at most timo/hz seconds
98  * (0 means no timeout).  If pri includes PCATCH flag, signals are checked
99  * before and after sleeping, else signals are not checked.  Returns 0 if
100  * awakened, EWOULDBLOCK if the timeout expires.  If PCATCH is set and a
101  * signal needs to be delivered, ERESTART is returned if the current system
102  * call should be restarted if possible, and EINTR is returned if the system
103  * call should be interrupted by the signal (return EINTR).
104  */
105 int
106 tsleep(const volatile void *ident, int priority, const char *wmesg, int timo)
107 {
108 	struct sleep_state sls;
109 	int error, error1;
110 #ifdef MULTIPROCESSOR
111 	int hold_count;
112 #endif
113 
114 	KASSERT((priority & ~(PRIMASK | PCATCH)) == 0);
115 
116 #ifdef MULTIPROCESSOR
117 	KASSERT(timo || __mp_lock_held(&kernel_lock));
118 #endif
119 
120 #ifdef DDB
121 	if (cold == 2)
122 		db_stack_dump();
123 #endif
124 	if (cold || panicstr) {
125 		int s;
126 		/*
127 		 * After a panic, or during autoconfiguration,
128 		 * just give interrupts a chance, then just return;
129 		 * don't run any other procs or panic below,
130 		 * in case this is the idle process and already asleep.
131 		 */
132 		s = splhigh();
133 		splx(safepri);
134 #ifdef MULTIPROCESSOR
135 		if (__mp_lock_held(&kernel_lock)) {
136 			hold_count = __mp_release_all(&kernel_lock);
137 			__mp_acquire_count(&kernel_lock, hold_count);
138 		}
139 #endif
140 		splx(s);
141 		return (0);
142 	}
143 
144 	sleep_setup(&sls, ident, priority, wmesg);
145 	sleep_setup_timeout(&sls, timo);
146 	sleep_setup_signal(&sls, priority);
147 
148 	sleep_finish(&sls, 1);
149 	error1 = sleep_finish_timeout(&sls);
150 	error = sleep_finish_signal(&sls);
151 
152 	/* Signal errors are higher priority than timeouts. */
153 	if (error == 0 && error1 != 0)
154 		error = error1;
155 
156 	return (error);
157 }
158 
159 /*
160  * Same as tsleep, but if we have a mutex provided, then once we've
161  * entered the sleep queue we drop the mutex. After sleeping we re-lock.
162  */
163 int
164 msleep(const volatile void *ident, struct mutex *mtx, int priority,
165     const char *wmesg, int timo)
166 {
167 	struct sleep_state sls;
168 	int error, error1, spl;
169 #ifdef MULTIPROCESSOR
170 	int hold_count;
171 #endif
172 
173 	KASSERT((priority & ~(PRIMASK | PCATCH | PNORELOCK)) == 0);
174 	KASSERT(mtx != NULL);
175 
176 	if (cold || panicstr) {
177 		/*
178 		 * After a panic, or during autoconfiguration,
179 		 * just give interrupts a chance, then just return;
180 		 * don't run any other procs or panic below,
181 		 * in case this is the idle process and already asleep.
182 		 */
183 		spl = MUTEX_OLDIPL(mtx);
184 		MUTEX_OLDIPL(mtx) = safepri;
185 		mtx_leave(mtx);
186 #ifdef MULTIPROCESSOR
187 		if (__mp_lock_held(&kernel_lock)) {
188 			hold_count = __mp_release_all(&kernel_lock);
189 			__mp_acquire_count(&kernel_lock, hold_count);
190 		}
191 #endif
192 		if ((priority & PNORELOCK) == 0) {
193 			mtx_enter(mtx);
194 			MUTEX_OLDIPL(mtx) = spl;
195 		} else
196 			splx(spl);
197 		return (0);
198 	}
199 
200 	sleep_setup(&sls, ident, priority, wmesg);
201 	sleep_setup_timeout(&sls, timo);
202 	sleep_setup_signal(&sls, priority);
203 
204 	/* XXX - We need to make sure that the mutex doesn't
205 	 * unblock splsched. This can be made a bit more
206 	 * correct when the sched_lock is a mutex.
207 	 */
208 	spl = MUTEX_OLDIPL(mtx);
209 	MUTEX_OLDIPL(mtx) = splsched();
210 	mtx_leave(mtx);
211 
212 	sleep_finish(&sls, 1);
213 	error1 = sleep_finish_timeout(&sls);
214 	error = sleep_finish_signal(&sls);
215 
216 	if ((priority & PNORELOCK) == 0) {
217 		mtx_enter(mtx);
218 		MUTEX_OLDIPL(mtx) = spl; /* put the ipl back */
219 	} else
220 		splx(spl);
221 
222 	/* Signal errors are higher priority than timeouts. */
223 	if (error == 0 && error1 != 0)
224 		error = error1;
225 
226 	return (error);
227 }
228 
229 /*
230  * Same as tsleep, but if we have a rwlock provided, then once we've
231  * entered the sleep queue we drop the it. After sleeping we re-lock.
232  */
233 int
234 rwsleep(const volatile void *ident, struct rwlock *wl, int priority,
235     const char *wmesg, int timo)
236 {
237 	struct sleep_state sls;
238 	int error, error1;
239 
240 	KASSERT((priority & ~(PRIMASK | PCATCH | PNORELOCK)) == 0);
241 	rw_assert_wrlock(wl);
242 
243 	sleep_setup(&sls, ident, priority, wmesg);
244 	sleep_setup_timeout(&sls, timo);
245 	sleep_setup_signal(&sls, priority);
246 
247 	rw_exit_write(wl);
248 
249 	sleep_finish(&sls, 1);
250 	error1 = sleep_finish_timeout(&sls);
251 	error = sleep_finish_signal(&sls);
252 
253 	if ((priority & PNORELOCK) == 0)
254 		rw_enter_write(wl);
255 
256 	/* Signal errors are higher priority than timeouts. */
257 	if (error == 0 && error1 != 0)
258 		error = error1;
259 
260 	return (error);
261 }
262 
263 void
264 sleep_setup(struct sleep_state *sls, const volatile void *ident, int prio,
265     const char *wmesg)
266 {
267 	struct proc *p = curproc;
268 
269 #ifdef DIAGNOSTIC
270 	if (p->p_flag & P_CANTSLEEP)
271 		panic("sleep: %s failed insomnia", p->p_comm);
272 	if (ident == NULL)
273 		panic("tsleep: no ident");
274 	if (p->p_stat != SONPROC)
275 		panic("tsleep: not SONPROC");
276 #endif
277 
278 	sls->sls_catch = 0;
279 	sls->sls_do_sleep = 1;
280 	sls->sls_sig = 1;
281 
282 	SCHED_LOCK(sls->sls_s);
283 
284 	p->p_wchan = ident;
285 	p->p_wmesg = wmesg;
286 	p->p_slptime = 0;
287 	p->p_priority = prio & PRIMASK;
288 	TAILQ_INSERT_TAIL(&slpque[LOOKUP(ident)], p, p_runq);
289 }
290 
291 void
292 sleep_finish(struct sleep_state *sls, int do_sleep)
293 {
294 	struct proc *p = curproc;
295 
296 	if (sls->sls_do_sleep && do_sleep) {
297 		p->p_stat = SSLEEP;
298 		p->p_ru.ru_nvcsw++;
299 		SCHED_ASSERT_LOCKED();
300 		mi_switch();
301 	} else if (!do_sleep) {
302 		unsleep(p);
303 	}
304 
305 #ifdef DIAGNOSTIC
306 	if (p->p_stat != SONPROC)
307 		panic("sleep_finish !SONPROC");
308 #endif
309 
310 	p->p_cpu->ci_schedstate.spc_curpriority = p->p_usrpri;
311 	SCHED_UNLOCK(sls->sls_s);
312 
313 	/*
314 	 * Even though this belongs to the signal handling part of sleep,
315 	 * we need to clear it before the ktrace.
316 	 */
317 	atomic_clearbits_int(&p->p_flag, P_SINTR);
318 }
319 
320 void
321 sleep_setup_timeout(struct sleep_state *sls, int timo)
322 {
323 	if (timo)
324 		timeout_add(&curproc->p_sleep_to, timo);
325 }
326 
327 int
328 sleep_finish_timeout(struct sleep_state *sls)
329 {
330 	struct proc *p = curproc;
331 
332 	if (p->p_flag & P_TIMEOUT) {
333 		atomic_clearbits_int(&p->p_flag, P_TIMEOUT);
334 		return (EWOULDBLOCK);
335 	} else
336 		timeout_del(&p->p_sleep_to);
337 
338 	return (0);
339 }
340 
341 void
342 sleep_setup_signal(struct sleep_state *sls, int prio)
343 {
344 	struct proc *p = curproc;
345 
346 	if ((sls->sls_catch = (prio & PCATCH)) == 0)
347 		return;
348 
349 	/*
350 	 * We put ourselves on the sleep queue and start our timeout
351 	 * before calling CURSIG, as we could stop there, and a wakeup
352 	 * or a SIGCONT (or both) could occur while we were stopped.
353 	 * A SIGCONT would cause us to be marked as SSLEEP
354 	 * without resuming us, thus we must be ready for sleep
355 	 * when CURSIG is called.  If the wakeup happens while we're
356 	 * stopped, p->p_wchan will be 0 upon return from CURSIG.
357 	 */
358 	atomic_setbits_int(&p->p_flag, P_SINTR);
359 	if (p->p_p->ps_single != NULL || (sls->sls_sig = CURSIG(p)) != 0) {
360 		if (p->p_wchan)
361 			unsleep(p);
362 		p->p_stat = SONPROC;
363 		sls->sls_do_sleep = 0;
364 	} else if (p->p_wchan == 0) {
365 		sls->sls_catch = 0;
366 		sls->sls_do_sleep = 0;
367 	}
368 }
369 
370 int
371 sleep_finish_signal(struct sleep_state *sls)
372 {
373 	struct proc *p = curproc;
374 	int error;
375 
376 	if (sls->sls_catch != 0) {
377 		if ((error = single_thread_check(p, 1)))
378 			return (error);
379 		if (sls->sls_sig != 0 || (sls->sls_sig = CURSIG(p)) != 0) {
380 			if (p->p_p->ps_sigacts->ps_sigintr &
381 			    sigmask(sls->sls_sig))
382 				return (EINTR);
383 			return (ERESTART);
384 		}
385 	}
386 
387 	return (0);
388 }
389 
390 /*
391  * Implement timeout for tsleep.
392  * If process hasn't been awakened (wchan non-zero),
393  * set timeout flag and undo the sleep.  If proc
394  * is stopped, just unsleep so it will remain stopped.
395  */
396 void
397 endtsleep(void *arg)
398 {
399 	struct proc *p = arg;
400 	int s;
401 
402 	SCHED_LOCK(s);
403 	if (p->p_wchan) {
404 		if (p->p_stat == SSLEEP)
405 			setrunnable(p);
406 		else
407 			unsleep(p);
408 		atomic_setbits_int(&p->p_flag, P_TIMEOUT);
409 	}
410 	SCHED_UNLOCK(s);
411 }
412 
413 /*
414  * Remove a process from its wait queue
415  */
416 void
417 unsleep(struct proc *p)
418 {
419 	SCHED_ASSERT_LOCKED();
420 
421 	if (p->p_wchan) {
422 		TAILQ_REMOVE(&slpque[LOOKUP(p->p_wchan)], p, p_runq);
423 		p->p_wchan = NULL;
424 	}
425 }
426 
427 /*
428  * Make a number of processes sleeping on the specified identifier runnable.
429  */
430 void
431 wakeup_n(const volatile void *ident, int n)
432 {
433 	struct slpque *qp;
434 	struct proc *p;
435 	struct proc *pnext;
436 	int s;
437 
438 	SCHED_LOCK(s);
439 	qp = &slpque[LOOKUP(ident)];
440 	for (p = TAILQ_FIRST(qp); p != NULL && n != 0; p = pnext) {
441 		pnext = TAILQ_NEXT(p, p_runq);
442 #ifdef DIAGNOSTIC
443 		if (p->p_stat != SSLEEP && p->p_stat != SSTOP)
444 			panic("wakeup: p_stat is %d", (int)p->p_stat);
445 #endif
446 		if (p->p_wchan == ident) {
447 			--n;
448 			p->p_wchan = 0;
449 			TAILQ_REMOVE(qp, p, p_runq);
450 			if (p->p_stat == SSLEEP)
451 				setrunnable(p);
452 		}
453 	}
454 	SCHED_UNLOCK(s);
455 }
456 
457 /*
458  * Make all processes sleeping on the specified identifier runnable.
459  */
460 void
461 wakeup(const volatile void *chan)
462 {
463 	wakeup_n(chan, -1);
464 }
465 
466 int
467 sys_sched_yield(struct proc *p, void *v, register_t *retval)
468 {
469 	struct proc *q;
470 	int s;
471 
472 	SCHED_LOCK(s);
473 	/*
474 	 * If one of the threads of a multi-threaded process called
475 	 * sched_yield(2), drop its priority to ensure its siblings
476 	 * can make some progress.
477 	 */
478 	p->p_priority = p->p_usrpri;
479 	TAILQ_FOREACH(q, &p->p_p->ps_threads, p_thr_link)
480 		p->p_priority = max(p->p_priority, q->p_priority);
481 	p->p_stat = SRUN;
482 	setrunqueue(p);
483 	p->p_ru.ru_nvcsw++;
484 	mi_switch();
485 	SCHED_UNLOCK(s);
486 
487 	return (0);
488 }
489 
490 int
491 thrsleep_unlock(void *lock)
492 {
493 	static _atomic_lock_t unlocked = _ATOMIC_LOCK_UNLOCKED;
494 	_atomic_lock_t *atomiclock = lock;
495 
496 	if (!lock)
497 		return 0;
498 
499 	return copyout(&unlocked, atomiclock, sizeof(unlocked));
500 }
501 
502 static int globalsleepaddr;
503 
504 int
505 thrsleep(struct proc *p, struct sys___thrsleep_args *v)
506 {
507 	struct sys___thrsleep_args /* {
508 		syscallarg(const volatile void *) ident;
509 		syscallarg(clockid_t) clock_id;
510 		syscallarg(const struct timespec *) tp;
511 		syscallarg(void *) lock;
512 		syscallarg(const int *) abort;
513 	} */ *uap = v;
514 	long ident = (long)SCARG(uap, ident);
515 	struct timespec *tsp = (struct timespec *)SCARG(uap, tp);
516 	void *lock = SCARG(uap, lock);
517 	uint64_t to_ticks = 0;
518 	int abort, error;
519 	clockid_t clock_id = SCARG(uap, clock_id);
520 
521 	if (ident == 0)
522 		return (EINVAL);
523 	if (tsp != NULL) {
524 		struct timespec now;
525 
526 		if ((error = clock_gettime(p, clock_id, &now)))
527 			return (error);
528 #ifdef KTRACE
529 		if (KTRPOINT(p, KTR_STRUCT))
530 			ktrabstimespec(p, tsp);
531 #endif
532 
533 		if (timespeccmp(tsp, &now, <)) {
534 			/* already passed: still do the unlock */
535 			if ((error = thrsleep_unlock(lock)))
536 				return (error);
537 			return (EWOULDBLOCK);
538 		}
539 
540 		timespecsub(tsp, &now, tsp);
541 		to_ticks = (uint64_t)hz * tsp->tv_sec +
542 		    (tsp->tv_nsec + tick * 1000 - 1) / (tick * 1000) + 1;
543 		if (to_ticks > INT_MAX)
544 			to_ticks = INT_MAX;
545 	}
546 
547 	p->p_thrslpid = ident;
548 
549 	if ((error = thrsleep_unlock(lock)))
550 		goto out;
551 
552 	if (SCARG(uap, abort) != NULL) {
553 		if ((error = copyin(SCARG(uap, abort), &abort,
554 		    sizeof(abort))) != 0)
555 			goto out;
556 		if (abort) {
557 			error = EINTR;
558 			goto out;
559 		}
560 	}
561 
562 	if (p->p_thrslpid == 0)
563 		error = 0;
564 	else {
565 		void *sleepaddr = &p->p_thrslpid;
566 		if (ident == -1)
567 			sleepaddr = &globalsleepaddr;
568 		error = tsleep(sleepaddr, PUSER | PCATCH, "thrsleep",
569 		    (int)to_ticks);
570 	}
571 
572 out:
573 	p->p_thrslpid = 0;
574 
575 	if (error == ERESTART)
576 		error = EINTR;
577 
578 	return (error);
579 
580 }
581 
582 int
583 sys___thrsleep(struct proc *p, void *v, register_t *retval)
584 {
585 	struct sys___thrsleep_args /* {
586 		syscallarg(const volatile void *) ident;
587 		syscallarg(clockid_t) clock_id;
588 		syscallarg(struct timespec *) tp;
589 		syscallarg(void *) lock;
590 		syscallarg(const int *) abort;
591 	} */ *uap = v;
592 	struct timespec ts;
593 	int error;
594 
595 	if (SCARG(uap, tp) != NULL) {
596 		if ((error = copyin(SCARG(uap, tp), &ts, sizeof(ts)))) {
597 			*retval = error;
598 			return (0);
599 		}
600 		SCARG(uap, tp) = &ts;
601 	}
602 
603 	*retval = thrsleep(p, uap);
604 	return (0);
605 }
606 
607 int
608 sys___thrwakeup(struct proc *p, void *v, register_t *retval)
609 {
610 	struct sys___thrwakeup_args /* {
611 		syscallarg(const volatile void *) ident;
612 		syscallarg(int) n;
613 	} */ *uap = v;
614 	long ident = (long)SCARG(uap, ident);
615 	int n = SCARG(uap, n);
616 	struct proc *q;
617 	int found = 0;
618 
619 	if (ident == 0)
620 		*retval = EINVAL;
621 	else if (ident == -1)
622 		wakeup(&globalsleepaddr);
623 	else {
624 		TAILQ_FOREACH(q, &p->p_p->ps_threads, p_thr_link) {
625 			if (q->p_thrslpid == ident) {
626 				wakeup_one(&q->p_thrslpid);
627 				q->p_thrslpid = 0;
628 				if (++found == n)
629 					break;
630 			}
631 		}
632 		*retval = found ? 0 : ESRCH;
633 	}
634 
635 	return (0);
636 }
637 
638 void
639 refcnt_init(struct refcnt *r)
640 {
641 	r->refs = 1;
642 }
643 
644 void
645 refcnt_take(struct refcnt *r)
646 {
647 #ifdef DIAGNOSTIC
648 	u_int refcnt;
649 
650 	refcnt = atomic_inc_int_nv(&r->refs);
651 	KASSERT(refcnt != 0);
652 #else
653 	atomic_inc_int(&r->refs);
654 #endif
655 }
656 
657 int
658 refcnt_rele(struct refcnt *r)
659 {
660 	u_int refcnt;
661 
662 	refcnt = atomic_dec_int_nv(&r->refs);
663 	KASSERT(refcnt != ~0);
664 
665 	return (refcnt == 0);
666 }
667 
668 void
669 refcnt_rele_wake(struct refcnt *r)
670 {
671 	if (refcnt_rele(r))
672 		wakeup_one(r);
673 }
674 
675 void
676 refcnt_finalize(struct refcnt *r, const char *wmesg)
677 {
678 	struct sleep_state sls;
679 	u_int refcnt;
680 
681 	refcnt = atomic_dec_int_nv(&r->refs);
682 	while (refcnt) {
683 		sleep_setup(&sls, r, PWAIT, wmesg);
684 		refcnt = r->refs;
685 		sleep_finish(&sls, refcnt);
686 	}
687 }
688