xref: /openbsd-src/sys/kern/kern_synch.c (revision 0b7734b3d77bb9b21afec6f4621cae6c805dbd45)
1 /*	$OpenBSD: kern_synch.c,v 1.133 2016/07/06 15:53:01 tedu Exp $	*/
2 /*	$NetBSD: kern_synch.c,v 1.37 1996/04/22 01:38:37 christos Exp $	*/
3 
4 /*
5  * Copyright (c) 1982, 1986, 1990, 1991, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  * (c) UNIX System Laboratories, Inc.
8  * All or some portions of this file are derived from material licensed
9  * to the University of California by American Telephone and Telegraph
10  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
11  * the permission of UNIX System Laboratories, Inc.
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  * 3. Neither the name of the University nor the names of its contributors
22  *    may be used to endorse or promote products derived from this software
23  *    without specific prior written permission.
24  *
25  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35  * SUCH DAMAGE.
36  *
37  *	@(#)kern_synch.c	8.6 (Berkeley) 1/21/94
38  */
39 
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #include <sys/proc.h>
43 #include <sys/kernel.h>
44 #include <sys/signalvar.h>
45 #include <sys/resourcevar.h>
46 #include <sys/sched.h>
47 #include <sys/timeout.h>
48 #include <sys/mount.h>
49 #include <sys/syscallargs.h>
50 #include <sys/pool.h>
51 #include <sys/refcnt.h>
52 #include <sys/atomic.h>
53 #include <ddb/db_output.h>
54 
55 #include <machine/spinlock.h>
56 
57 #ifdef KTRACE
58 #include <sys/ktrace.h>
59 #endif
60 
61 int	thrsleep(struct proc *, struct sys___thrsleep_args *);
62 int	thrsleep_unlock(void *, int);
63 
64 /*
65  * We're only looking at 7 bits of the address; everything is
66  * aligned to 4, lots of things are aligned to greater powers
67  * of 2.  Shift right by 8, i.e. drop the bottom 256 worth.
68  */
69 #define TABLESIZE	128
70 #define LOOKUP(x)	(((long)(x) >> 8) & (TABLESIZE - 1))
71 TAILQ_HEAD(slpque,proc) slpque[TABLESIZE];
72 
73 void
74 sleep_queue_init(void)
75 {
76 	int i;
77 
78 	for (i = 0; i < TABLESIZE; i++)
79 		TAILQ_INIT(&slpque[i]);
80 }
81 
82 
83 /*
84  * During autoconfiguration or after a panic, a sleep will simply
85  * lower the priority briefly to allow interrupts, then return.
86  * The priority to be used (safepri) is machine-dependent, thus this
87  * value is initialized and maintained in the machine-dependent layers.
88  * This priority will typically be 0, or the lowest priority
89  * that is safe for use on the interrupt stack; it can be made
90  * higher to block network software interrupts after panics.
91  */
92 extern int safepri;
93 
94 /*
95  * General sleep call.  Suspends the current process until a wakeup is
96  * performed on the specified identifier.  The process will then be made
97  * runnable with the specified priority.  Sleeps at most timo/hz seconds
98  * (0 means no timeout).  If pri includes PCATCH flag, signals are checked
99  * before and after sleeping, else signals are not checked.  Returns 0 if
100  * awakened, EWOULDBLOCK if the timeout expires.  If PCATCH is set and a
101  * signal needs to be delivered, ERESTART is returned if the current system
102  * call should be restarted if possible, and EINTR is returned if the system
103  * call should be interrupted by the signal (return EINTR).
104  */
105 int
106 tsleep(const volatile void *ident, int priority, const char *wmesg, int timo)
107 {
108 	struct sleep_state sls;
109 	int error, error1;
110 #ifdef MULTIPROCESSOR
111 	int hold_count;
112 #endif
113 
114 	KASSERT((priority & ~(PRIMASK | PCATCH)) == 0);
115 
116 #ifdef MULTIPROCESSOR
117 	KASSERT(timo || __mp_lock_held(&kernel_lock));
118 #endif
119 
120 #ifdef DDB
121 	if (cold == 2)
122 		db_stack_dump();
123 #endif
124 	if (cold || panicstr) {
125 		int s;
126 		/*
127 		 * After a panic, or during autoconfiguration,
128 		 * just give interrupts a chance, then just return;
129 		 * don't run any other procs or panic below,
130 		 * in case this is the idle process and already asleep.
131 		 */
132 		s = splhigh();
133 		splx(safepri);
134 #ifdef MULTIPROCESSOR
135 		if (__mp_lock_held(&kernel_lock)) {
136 			hold_count = __mp_release_all(&kernel_lock);
137 			__mp_acquire_count(&kernel_lock, hold_count);
138 		}
139 #endif
140 		splx(s);
141 		return (0);
142 	}
143 
144 	sleep_setup(&sls, ident, priority, wmesg);
145 	sleep_setup_timeout(&sls, timo);
146 	sleep_setup_signal(&sls, priority);
147 
148 	sleep_finish(&sls, 1);
149 	error1 = sleep_finish_timeout(&sls);
150 	error = sleep_finish_signal(&sls);
151 
152 	/* Signal errors are higher priority than timeouts. */
153 	if (error == 0 && error1 != 0)
154 		error = error1;
155 
156 	return (error);
157 }
158 
159 /*
160  * Same as tsleep, but if we have a mutex provided, then once we've
161  * entered the sleep queue we drop the mutex. After sleeping we re-lock.
162  */
163 int
164 msleep(const volatile void *ident, struct mutex *mtx, int priority,
165     const char *wmesg, int timo)
166 {
167 	struct sleep_state sls;
168 	int error, error1, spl;
169 #ifdef MULTIPROCESSOR
170 	int hold_count;
171 #endif
172 
173 	KASSERT((priority & ~(PRIMASK | PCATCH | PNORELOCK)) == 0);
174 	KASSERT(mtx != NULL);
175 
176 	if (cold || panicstr) {
177 		/*
178 		 * After a panic, or during autoconfiguration,
179 		 * just give interrupts a chance, then just return;
180 		 * don't run any other procs or panic below,
181 		 * in case this is the idle process and already asleep.
182 		 */
183 		spl = MUTEX_OLDIPL(mtx);
184 		MUTEX_OLDIPL(mtx) = safepri;
185 		mtx_leave(mtx);
186 #ifdef MULTIPROCESSOR
187 		if (__mp_lock_held(&kernel_lock)) {
188 			hold_count = __mp_release_all(&kernel_lock);
189 			__mp_acquire_count(&kernel_lock, hold_count);
190 		}
191 #endif
192 		if ((priority & PNORELOCK) == 0) {
193 			mtx_enter(mtx);
194 			MUTEX_OLDIPL(mtx) = spl;
195 		} else
196 			splx(spl);
197 		return (0);
198 	}
199 
200 	sleep_setup(&sls, ident, priority, wmesg);
201 	sleep_setup_timeout(&sls, timo);
202 	sleep_setup_signal(&sls, priority);
203 
204 	/* XXX - We need to make sure that the mutex doesn't
205 	 * unblock splsched. This can be made a bit more
206 	 * correct when the sched_lock is a mutex.
207 	 */
208 	spl = MUTEX_OLDIPL(mtx);
209 	MUTEX_OLDIPL(mtx) = splsched();
210 	mtx_leave(mtx);
211 
212 	sleep_finish(&sls, 1);
213 	error1 = sleep_finish_timeout(&sls);
214 	error = sleep_finish_signal(&sls);
215 
216 	if ((priority & PNORELOCK) == 0) {
217 		mtx_enter(mtx);
218 		MUTEX_OLDIPL(mtx) = spl; /* put the ipl back */
219 	} else
220 		splx(spl);
221 
222 	/* Signal errors are higher priority than timeouts. */
223 	if (error == 0 && error1 != 0)
224 		error = error1;
225 
226 	return (error);
227 }
228 
229 void
230 sleep_setup(struct sleep_state *sls, const volatile void *ident, int prio,
231     const char *wmesg)
232 {
233 	struct proc *p = curproc;
234 
235 #ifdef DIAGNOSTIC
236 	if (p->p_flag & P_CANTSLEEP)
237 		panic("sleep: %s failed insomnia", p->p_comm);
238 	if (ident == NULL)
239 		panic("tsleep: no ident");
240 	if (p->p_stat != SONPROC)
241 		panic("tsleep: not SONPROC");
242 #endif
243 
244 	sls->sls_catch = 0;
245 	sls->sls_do_sleep = 1;
246 	sls->sls_sig = 1;
247 
248 	SCHED_LOCK(sls->sls_s);
249 
250 	p->p_wchan = ident;
251 	p->p_wmesg = wmesg;
252 	p->p_slptime = 0;
253 	p->p_priority = prio & PRIMASK;
254 	TAILQ_INSERT_TAIL(&slpque[LOOKUP(ident)], p, p_runq);
255 }
256 
257 void
258 sleep_finish(struct sleep_state *sls, int do_sleep)
259 {
260 	struct proc *p = curproc;
261 
262 	if (sls->sls_do_sleep && do_sleep) {
263 		p->p_stat = SSLEEP;
264 		p->p_ru.ru_nvcsw++;
265 		SCHED_ASSERT_LOCKED();
266 		mi_switch();
267 	} else if (!do_sleep) {
268 		unsleep(p);
269 	}
270 
271 #ifdef DIAGNOSTIC
272 	if (p->p_stat != SONPROC)
273 		panic("sleep_finish !SONPROC");
274 #endif
275 
276 	p->p_cpu->ci_schedstate.spc_curpriority = p->p_usrpri;
277 	SCHED_UNLOCK(sls->sls_s);
278 
279 	/*
280 	 * Even though this belongs to the signal handling part of sleep,
281 	 * we need to clear it before the ktrace.
282 	 */
283 	atomic_clearbits_int(&p->p_flag, P_SINTR);
284 }
285 
286 void
287 sleep_setup_timeout(struct sleep_state *sls, int timo)
288 {
289 	if (timo)
290 		timeout_add(&curproc->p_sleep_to, timo);
291 }
292 
293 int
294 sleep_finish_timeout(struct sleep_state *sls)
295 {
296 	struct proc *p = curproc;
297 
298 	if (p->p_flag & P_TIMEOUT) {
299 		atomic_clearbits_int(&p->p_flag, P_TIMEOUT);
300 		return (EWOULDBLOCK);
301 	} else
302 		timeout_del(&p->p_sleep_to);
303 
304 	return (0);
305 }
306 
307 void
308 sleep_setup_signal(struct sleep_state *sls, int prio)
309 {
310 	struct proc *p = curproc;
311 
312 	if ((sls->sls_catch = (prio & PCATCH)) == 0)
313 		return;
314 
315 	/*
316 	 * We put ourselves on the sleep queue and start our timeout
317 	 * before calling CURSIG, as we could stop there, and a wakeup
318 	 * or a SIGCONT (or both) could occur while we were stopped.
319 	 * A SIGCONT would cause us to be marked as SSLEEP
320 	 * without resuming us, thus we must be ready for sleep
321 	 * when CURSIG is called.  If the wakeup happens while we're
322 	 * stopped, p->p_wchan will be 0 upon return from CURSIG.
323 	 */
324 	atomic_setbits_int(&p->p_flag, P_SINTR);
325 	if (p->p_p->ps_single != NULL || (sls->sls_sig = CURSIG(p)) != 0) {
326 		if (p->p_wchan)
327 			unsleep(p);
328 		p->p_stat = SONPROC;
329 		sls->sls_do_sleep = 0;
330 	} else if (p->p_wchan == 0) {
331 		sls->sls_catch = 0;
332 		sls->sls_do_sleep = 0;
333 	}
334 }
335 
336 int
337 sleep_finish_signal(struct sleep_state *sls)
338 {
339 	struct proc *p = curproc;
340 	int error;
341 
342 	if (sls->sls_catch != 0) {
343 		if ((error = single_thread_check(p, 1)))
344 			return (error);
345 		if (sls->sls_sig != 0 || (sls->sls_sig = CURSIG(p)) != 0) {
346 			if (p->p_p->ps_sigacts->ps_sigintr &
347 			    sigmask(sls->sls_sig))
348 				return (EINTR);
349 			return (ERESTART);
350 		}
351 	}
352 
353 	return (0);
354 }
355 
356 /*
357  * Implement timeout for tsleep.
358  * If process hasn't been awakened (wchan non-zero),
359  * set timeout flag and undo the sleep.  If proc
360  * is stopped, just unsleep so it will remain stopped.
361  */
362 void
363 endtsleep(void *arg)
364 {
365 	struct proc *p = arg;
366 	int s;
367 
368 	SCHED_LOCK(s);
369 	if (p->p_wchan) {
370 		if (p->p_stat == SSLEEP)
371 			setrunnable(p);
372 		else
373 			unsleep(p);
374 		atomic_setbits_int(&p->p_flag, P_TIMEOUT);
375 	}
376 	SCHED_UNLOCK(s);
377 }
378 
379 /*
380  * Remove a process from its wait queue
381  */
382 void
383 unsleep(struct proc *p)
384 {
385 	SCHED_ASSERT_LOCKED();
386 
387 	if (p->p_wchan) {
388 		TAILQ_REMOVE(&slpque[LOOKUP(p->p_wchan)], p, p_runq);
389 		p->p_wchan = NULL;
390 	}
391 }
392 
393 /*
394  * Make a number of processes sleeping on the specified identifier runnable.
395  */
396 void
397 wakeup_n(const volatile void *ident, int n)
398 {
399 	struct slpque *qp;
400 	struct proc *p;
401 	struct proc *pnext;
402 	int s;
403 
404 	SCHED_LOCK(s);
405 	qp = &slpque[LOOKUP(ident)];
406 	for (p = TAILQ_FIRST(qp); p != NULL && n != 0; p = pnext) {
407 		pnext = TAILQ_NEXT(p, p_runq);
408 #ifdef DIAGNOSTIC
409 		if (p->p_stat != SSLEEP && p->p_stat != SSTOP)
410 			panic("wakeup: p_stat is %d", (int)p->p_stat);
411 #endif
412 		if (p->p_wchan == ident) {
413 			--n;
414 			p->p_wchan = 0;
415 			TAILQ_REMOVE(qp, p, p_runq);
416 			if (p->p_stat == SSLEEP)
417 				setrunnable(p);
418 		}
419 	}
420 	SCHED_UNLOCK(s);
421 }
422 
423 /*
424  * Make all processes sleeping on the specified identifier runnable.
425  */
426 void
427 wakeup(const volatile void *chan)
428 {
429 	wakeup_n(chan, -1);
430 }
431 
432 int
433 sys_sched_yield(struct proc *p, void *v, register_t *retval)
434 {
435 	struct proc *q;
436 	int s;
437 
438 	SCHED_LOCK(s);
439 	/*
440 	 * If one of the threads of a multi-threaded process called
441 	 * sched_yield(2), drop its priority to ensure its siblings
442 	 * can make some progress.
443 	 */
444 	p->p_priority = p->p_usrpri;
445 	TAILQ_FOREACH(q, &p->p_p->ps_threads, p_thr_link)
446 		p->p_priority = max(p->p_priority, q->p_priority);
447 	p->p_stat = SRUN;
448 	setrunqueue(p);
449 	p->p_ru.ru_nvcsw++;
450 	mi_switch();
451 	SCHED_UNLOCK(s);
452 
453 	return (0);
454 }
455 
456 int
457 thrsleep_unlock(void *lock, int lockflags)
458 {
459 	static _atomic_lock_t unlocked = _ATOMIC_LOCK_UNLOCKED;
460 	_atomic_lock_t *atomiclock = lock;
461 	uint32_t *ticket = lock;
462 	uint32_t ticketvalue;
463 	int error;
464 
465 	if (!lock)
466 		return (0);
467 
468 	if (lockflags) {
469 		if ((error = copyin(ticket, &ticketvalue, sizeof(ticketvalue))))
470 			return (error);
471 		ticketvalue++;
472 		error = copyout(&ticketvalue, ticket, sizeof(ticketvalue));
473 	} else {
474 		error = copyout(&unlocked, atomiclock, sizeof(unlocked));
475 	}
476 	return (error);
477 }
478 
479 static int globalsleepaddr;
480 
481 int
482 thrsleep(struct proc *p, struct sys___thrsleep_args *v)
483 {
484 	struct sys___thrsleep_args /* {
485 		syscallarg(const volatile void *) ident;
486 		syscallarg(clockid_t) clock_id;
487 		syscallarg(const struct timespec *) tp;
488 		syscallarg(void *) lock;
489 		syscallarg(const int *) abort;
490 	} */ *uap = v;
491 	long ident = (long)SCARG(uap, ident);
492 	struct timespec *tsp = (struct timespec *)SCARG(uap, tp);
493 	void *lock = SCARG(uap, lock);
494 	uint64_t to_ticks = 0;
495 	int abort, error;
496 	clockid_t clock_id = SCARG(uap, clock_id) & 0x7;
497 	int lockflags = SCARG(uap, clock_id) & 0x8;
498 
499 	if (ident == 0)
500 		return (EINVAL);
501 	if (tsp != NULL) {
502 		struct timespec now;
503 
504 		if ((error = clock_gettime(p, clock_id, &now)))
505 			return (error);
506 #ifdef KTRACE
507 		if (KTRPOINT(p, KTR_STRUCT))
508 			ktrabstimespec(p, tsp);
509 #endif
510 
511 		if (timespeccmp(tsp, &now, <)) {
512 			/* already passed: still do the unlock */
513 			if ((error = thrsleep_unlock(lock, lockflags)))
514 				return (error);
515 			return (EWOULDBLOCK);
516 		}
517 
518 		timespecsub(tsp, &now, tsp);
519 		to_ticks = (uint64_t)hz * tsp->tv_sec +
520 		    (tsp->tv_nsec + tick * 1000 - 1) / (tick * 1000) + 1;
521 		if (to_ticks > INT_MAX)
522 			to_ticks = INT_MAX;
523 	}
524 
525 	p->p_thrslpid = ident;
526 
527 	if ((error = thrsleep_unlock(lock, lockflags))) {
528 		goto out;
529 	}
530 
531 	if (SCARG(uap, abort) != NULL) {
532 		if ((error = copyin(SCARG(uap, abort), &abort,
533 		    sizeof(abort))) != 0)
534 			goto out;
535 		if (abort) {
536 			error = EINTR;
537 			goto out;
538 		}
539 	}
540 
541 	if (p->p_thrslpid == 0)
542 		error = 0;
543 	else {
544 		void *sleepaddr = &p->p_thrslpid;
545 		if (ident == -1)
546 			sleepaddr = &globalsleepaddr;
547 		error = tsleep(sleepaddr, PUSER | PCATCH, "thrsleep",
548 		    (int)to_ticks);
549 	}
550 
551 out:
552 	p->p_thrslpid = 0;
553 
554 	if (error == ERESTART)
555 		error = EINTR;
556 
557 	return (error);
558 
559 }
560 
561 int
562 sys___thrsleep(struct proc *p, void *v, register_t *retval)
563 {
564 	struct sys___thrsleep_args /* {
565 		syscallarg(const volatile void *) ident;
566 		syscallarg(clockid_t) clock_id;
567 		syscallarg(struct timespec *) tp;
568 		syscallarg(void *) lock;
569 		syscallarg(const int *) abort;
570 	} */ *uap = v;
571 	struct timespec ts;
572 	int error;
573 
574 	if (SCARG(uap, tp) != NULL) {
575 		if ((error = copyin(SCARG(uap, tp), &ts, sizeof(ts)))) {
576 			*retval = error;
577 			return (0);
578 		}
579 		SCARG(uap, tp) = &ts;
580 	}
581 
582 	*retval = thrsleep(p, uap);
583 	return (0);
584 }
585 
586 int
587 sys___thrwakeup(struct proc *p, void *v, register_t *retval)
588 {
589 	struct sys___thrwakeup_args /* {
590 		syscallarg(const volatile void *) ident;
591 		syscallarg(int) n;
592 	} */ *uap = v;
593 	long ident = (long)SCARG(uap, ident);
594 	int n = SCARG(uap, n);
595 	struct proc *q;
596 	int found = 0;
597 
598 	if (ident == 0)
599 		*retval = EINVAL;
600 	else if (ident == -1)
601 		wakeup(&globalsleepaddr);
602 	else {
603 		TAILQ_FOREACH(q, &p->p_p->ps_threads, p_thr_link) {
604 			if (q->p_thrslpid == ident) {
605 				wakeup_one(&q->p_thrslpid);
606 				q->p_thrslpid = 0;
607 				if (++found == n)
608 					break;
609 			}
610 		}
611 		*retval = found ? 0 : ESRCH;
612 	}
613 
614 	return (0);
615 }
616 
617 void
618 refcnt_init(struct refcnt *r)
619 {
620 	r->refs = 1;
621 }
622 
623 void
624 refcnt_take(struct refcnt *r)
625 {
626 #ifdef DIAGNOSTIC
627 	u_int refcnt;
628 
629 	refcnt = atomic_inc_int_nv(&r->refs);
630 	KASSERT(refcnt != 0);
631 #else
632 	atomic_inc_int(&r->refs);
633 #endif
634 }
635 
636 int
637 refcnt_rele(struct refcnt *r)
638 {
639 	u_int refcnt;
640 
641 	refcnt = atomic_dec_int_nv(&r->refs);
642 	KASSERT(refcnt != ~0);
643 
644 	return (refcnt == 0);
645 }
646 
647 void
648 refcnt_rele_wake(struct refcnt *r)
649 {
650 	if (refcnt_rele(r))
651 		wakeup_one(r);
652 }
653 
654 void
655 refcnt_finalize(struct refcnt *r, const char *wmesg)
656 {
657 	struct sleep_state sls;
658 	u_int refcnt;
659 
660 	refcnt = atomic_dec_int_nv(&r->refs);
661 	while (refcnt) {
662 		sleep_setup(&sls, r, PWAIT, wmesg);
663 		refcnt = r->refs;
664 		sleep_finish(&sls, refcnt);
665 	}
666 }
667