1 /* $OpenBSD: kern_synch.c,v 1.144 2018/04/24 16:28:42 pirofti Exp $ */ 2 /* $NetBSD: kern_synch.c,v 1.37 1996/04/22 01:38:37 christos Exp $ */ 3 4 /* 5 * Copyright (c) 1982, 1986, 1990, 1991, 1993 6 * The Regents of the University of California. All rights reserved. 7 * (c) UNIX System Laboratories, Inc. 8 * All or some portions of this file are derived from material licensed 9 * to the University of California by American Telephone and Telegraph 10 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 11 * the permission of UNIX System Laboratories, Inc. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 3. Neither the name of the University nor the names of its contributors 22 * may be used to endorse or promote products derived from this software 23 * without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 * 37 * @(#)kern_synch.c 8.6 (Berkeley) 1/21/94 38 */ 39 40 #include <sys/param.h> 41 #include <sys/systm.h> 42 #include <sys/proc.h> 43 #include <sys/kernel.h> 44 #include <sys/signalvar.h> 45 #include <sys/resourcevar.h> 46 #include <sys/sched.h> 47 #include <sys/timeout.h> 48 #include <sys/mount.h> 49 #include <sys/syscallargs.h> 50 #include <sys/pool.h> 51 #include <sys/refcnt.h> 52 #include <sys/atomic.h> 53 #include <sys/witness.h> 54 #include <ddb/db_output.h> 55 56 #include <machine/spinlock.h> 57 58 #ifdef KTRACE 59 #include <sys/ktrace.h> 60 #endif 61 62 int thrsleep(struct proc *, struct sys___thrsleep_args *); 63 int thrsleep_unlock(void *); 64 65 /* 66 * We're only looking at 7 bits of the address; everything is 67 * aligned to 4, lots of things are aligned to greater powers 68 * of 2. Shift right by 8, i.e. drop the bottom 256 worth. 69 */ 70 #define TABLESIZE 128 71 #define LOOKUP(x) (((long)(x) >> 8) & (TABLESIZE - 1)) 72 TAILQ_HEAD(slpque,proc) slpque[TABLESIZE]; 73 74 void 75 sleep_queue_init(void) 76 { 77 int i; 78 79 for (i = 0; i < TABLESIZE; i++) 80 TAILQ_INIT(&slpque[i]); 81 } 82 83 84 /* 85 * During autoconfiguration or after a panic, a sleep will simply 86 * lower the priority briefly to allow interrupts, then return. 87 * The priority to be used (safepri) is machine-dependent, thus this 88 * value is initialized and maintained in the machine-dependent layers. 89 * This priority will typically be 0, or the lowest priority 90 * that is safe for use on the interrupt stack; it can be made 91 * higher to block network software interrupts after panics. 92 */ 93 extern int safepri; 94 95 /* 96 * General sleep call. Suspends the current process until a wakeup is 97 * performed on the specified identifier. The process will then be made 98 * runnable with the specified priority. Sleeps at most timo/hz seconds 99 * (0 means no timeout). If pri includes PCATCH flag, signals are checked 100 * before and after sleeping, else signals are not checked. Returns 0 if 101 * awakened, EWOULDBLOCK if the timeout expires. If PCATCH is set and a 102 * signal needs to be delivered, ERESTART is returned if the current system 103 * call should be restarted if possible, and EINTR is returned if the system 104 * call should be interrupted by the signal (return EINTR). 105 */ 106 int 107 tsleep(const volatile void *ident, int priority, const char *wmesg, int timo) 108 { 109 struct sleep_state sls; 110 int error, error1; 111 #ifdef MULTIPROCESSOR 112 int hold_count; 113 #endif 114 115 KASSERT((priority & ~(PRIMASK | PCATCH)) == 0); 116 117 #ifdef MULTIPROCESSOR 118 KASSERT(timo || _kernel_lock_held()); 119 #endif 120 121 #ifdef DDB 122 if (cold == 2) 123 db_stack_dump(); 124 #endif 125 if (cold || panicstr) { 126 int s; 127 /* 128 * After a panic, or during autoconfiguration, 129 * just give interrupts a chance, then just return; 130 * don't run any other procs or panic below, 131 * in case this is the idle process and already asleep. 132 */ 133 s = splhigh(); 134 splx(safepri); 135 #ifdef MULTIPROCESSOR 136 if (_kernel_lock_held()) { 137 hold_count = __mp_release_all(&kernel_lock); 138 __mp_acquire_count(&kernel_lock, hold_count); 139 } 140 #endif 141 splx(s); 142 return (0); 143 } 144 145 sleep_setup(&sls, ident, priority, wmesg); 146 sleep_setup_timeout(&sls, timo); 147 sleep_setup_signal(&sls, priority); 148 149 sleep_finish(&sls, 1); 150 error1 = sleep_finish_timeout(&sls); 151 error = sleep_finish_signal(&sls); 152 153 /* Signal errors are higher priority than timeouts. */ 154 if (error == 0 && error1 != 0) 155 error = error1; 156 157 return (error); 158 } 159 160 /* 161 * Same as tsleep, but if we have a mutex provided, then once we've 162 * entered the sleep queue we drop the mutex. After sleeping we re-lock. 163 */ 164 int 165 msleep(const volatile void *ident, struct mutex *mtx, int priority, 166 const char *wmesg, int timo) 167 { 168 struct sleep_state sls; 169 int error, error1, spl; 170 #ifdef MULTIPROCESSOR 171 int hold_count; 172 #endif 173 WITNESS_SAVE_DECL(lock_fl); 174 175 KASSERT((priority & ~(PRIMASK | PCATCH | PNORELOCK)) == 0); 176 KASSERT(mtx != NULL); 177 178 if (cold || panicstr) { 179 /* 180 * After a panic, or during autoconfiguration, 181 * just give interrupts a chance, then just return; 182 * don't run any other procs or panic below, 183 * in case this is the idle process and already asleep. 184 */ 185 spl = MUTEX_OLDIPL(mtx); 186 MUTEX_OLDIPL(mtx) = safepri; 187 mtx_leave(mtx); 188 #ifdef MULTIPROCESSOR 189 if (_kernel_lock_held()) { 190 hold_count = __mp_release_all(&kernel_lock); 191 __mp_acquire_count(&kernel_lock, hold_count); 192 } 193 #endif 194 if ((priority & PNORELOCK) == 0) { 195 mtx_enter(mtx); 196 MUTEX_OLDIPL(mtx) = spl; 197 } else 198 splx(spl); 199 return (0); 200 } 201 202 sleep_setup(&sls, ident, priority, wmesg); 203 sleep_setup_timeout(&sls, timo); 204 sleep_setup_signal(&sls, priority); 205 206 WITNESS_SAVE(MUTEX_LOCK_OBJECT(mtx), lock_fl); 207 208 /* XXX - We need to make sure that the mutex doesn't 209 * unblock splsched. This can be made a bit more 210 * correct when the sched_lock is a mutex. 211 */ 212 spl = MUTEX_OLDIPL(mtx); 213 MUTEX_OLDIPL(mtx) = splsched(); 214 mtx_leave(mtx); 215 216 sleep_finish(&sls, 1); 217 error1 = sleep_finish_timeout(&sls); 218 error = sleep_finish_signal(&sls); 219 220 if ((priority & PNORELOCK) == 0) { 221 mtx_enter(mtx); 222 MUTEX_OLDIPL(mtx) = spl; /* put the ipl back */ 223 WITNESS_RESTORE(MUTEX_LOCK_OBJECT(mtx), lock_fl); 224 } else 225 splx(spl); 226 227 /* Signal errors are higher priority than timeouts. */ 228 if (error == 0 && error1 != 0) 229 error = error1; 230 231 return (error); 232 } 233 234 /* 235 * Same as tsleep, but if we have a rwlock provided, then once we've 236 * entered the sleep queue we drop the it. After sleeping we re-lock. 237 */ 238 int 239 rwsleep(const volatile void *ident, struct rwlock *wl, int priority, 240 const char *wmesg, int timo) 241 { 242 struct sleep_state sls; 243 int error, error1; 244 WITNESS_SAVE_DECL(lock_fl); 245 246 KASSERT((priority & ~(PRIMASK | PCATCH | PNORELOCK)) == 0); 247 rw_assert_wrlock(wl); 248 249 sleep_setup(&sls, ident, priority, wmesg); 250 sleep_setup_timeout(&sls, timo); 251 sleep_setup_signal(&sls, priority); 252 253 WITNESS_SAVE(&wl->rwl_lock_obj, lock_fl); 254 255 rw_exit_write(wl); 256 257 sleep_finish(&sls, 1); 258 error1 = sleep_finish_timeout(&sls); 259 error = sleep_finish_signal(&sls); 260 261 if ((priority & PNORELOCK) == 0) { 262 rw_enter_write(wl); 263 WITNESS_RESTORE(&wl->rwl_lock_obj, lock_fl); 264 } 265 266 /* Signal errors are higher priority than timeouts. */ 267 if (error == 0 && error1 != 0) 268 error = error1; 269 270 return (error); 271 } 272 273 void 274 sleep_setup(struct sleep_state *sls, const volatile void *ident, int prio, 275 const char *wmesg) 276 { 277 struct proc *p = curproc; 278 279 #ifdef DIAGNOSTIC 280 if (p->p_flag & P_CANTSLEEP) 281 panic("sleep: %s failed insomnia", p->p_p->ps_comm); 282 if (ident == NULL) 283 panic("tsleep: no ident"); 284 if (p->p_stat != SONPROC) 285 panic("tsleep: not SONPROC"); 286 #endif 287 288 sls->sls_catch = 0; 289 sls->sls_do_sleep = 1; 290 sls->sls_sig = 1; 291 292 SCHED_LOCK(sls->sls_s); 293 294 p->p_wchan = ident; 295 p->p_wmesg = wmesg; 296 p->p_slptime = 0; 297 p->p_priority = prio & PRIMASK; 298 TAILQ_INSERT_TAIL(&slpque[LOOKUP(ident)], p, p_runq); 299 } 300 301 void 302 sleep_finish(struct sleep_state *sls, int do_sleep) 303 { 304 struct proc *p = curproc; 305 306 if (sls->sls_do_sleep && do_sleep) { 307 p->p_stat = SSLEEP; 308 p->p_ru.ru_nvcsw++; 309 SCHED_ASSERT_LOCKED(); 310 mi_switch(); 311 } else if (!do_sleep) { 312 unsleep(p); 313 } 314 315 #ifdef DIAGNOSTIC 316 if (p->p_stat != SONPROC) 317 panic("sleep_finish !SONPROC"); 318 #endif 319 320 p->p_cpu->ci_schedstate.spc_curpriority = p->p_usrpri; 321 SCHED_UNLOCK(sls->sls_s); 322 323 /* 324 * Even though this belongs to the signal handling part of sleep, 325 * we need to clear it before the ktrace. 326 */ 327 atomic_clearbits_int(&p->p_flag, P_SINTR); 328 } 329 330 void 331 sleep_setup_timeout(struct sleep_state *sls, int timo) 332 { 333 if (timo) 334 timeout_add(&curproc->p_sleep_to, timo); 335 } 336 337 int 338 sleep_finish_timeout(struct sleep_state *sls) 339 { 340 struct proc *p = curproc; 341 342 if (p->p_flag & P_TIMEOUT) { 343 atomic_clearbits_int(&p->p_flag, P_TIMEOUT); 344 return (EWOULDBLOCK); 345 } else 346 timeout_del(&p->p_sleep_to); 347 348 return (0); 349 } 350 351 void 352 sleep_setup_signal(struct sleep_state *sls, int prio) 353 { 354 struct proc *p = curproc; 355 356 if ((sls->sls_catch = (prio & PCATCH)) == 0) 357 return; 358 359 /* 360 * We put ourselves on the sleep queue and start our timeout 361 * before calling CURSIG, as we could stop there, and a wakeup 362 * or a SIGCONT (or both) could occur while we were stopped. 363 * A SIGCONT would cause us to be marked as SSLEEP 364 * without resuming us, thus we must be ready for sleep 365 * when CURSIG is called. If the wakeup happens while we're 366 * stopped, p->p_wchan will be 0 upon return from CURSIG. 367 */ 368 atomic_setbits_int(&p->p_flag, P_SINTR); 369 if (p->p_p->ps_single != NULL || (sls->sls_sig = CURSIG(p)) != 0) { 370 if (p->p_wchan) 371 unsleep(p); 372 p->p_stat = SONPROC; 373 sls->sls_do_sleep = 0; 374 } else if (p->p_wchan == 0) { 375 sls->sls_catch = 0; 376 sls->sls_do_sleep = 0; 377 } 378 } 379 380 int 381 sleep_finish_signal(struct sleep_state *sls) 382 { 383 struct proc *p = curproc; 384 int error; 385 386 if (sls->sls_catch != 0) { 387 if ((error = single_thread_check(p, 1))) 388 return (error); 389 if (sls->sls_sig != 0 || (sls->sls_sig = CURSIG(p)) != 0) { 390 if (p->p_p->ps_sigacts->ps_sigintr & 391 sigmask(sls->sls_sig)) 392 return (EINTR); 393 return (ERESTART); 394 } 395 } 396 397 return (0); 398 } 399 400 /* 401 * Implement timeout for tsleep. 402 * If process hasn't been awakened (wchan non-zero), 403 * set timeout flag and undo the sleep. If proc 404 * is stopped, just unsleep so it will remain stopped. 405 */ 406 void 407 endtsleep(void *arg) 408 { 409 struct proc *p = arg; 410 int s; 411 412 SCHED_LOCK(s); 413 if (p->p_wchan) { 414 if (p->p_stat == SSLEEP) 415 setrunnable(p); 416 else 417 unsleep(p); 418 atomic_setbits_int(&p->p_flag, P_TIMEOUT); 419 } 420 SCHED_UNLOCK(s); 421 } 422 423 /* 424 * Remove a process from its wait queue 425 */ 426 void 427 unsleep(struct proc *p) 428 { 429 SCHED_ASSERT_LOCKED(); 430 431 if (p->p_wchan) { 432 TAILQ_REMOVE(&slpque[LOOKUP(p->p_wchan)], p, p_runq); 433 p->p_wchan = NULL; 434 } 435 } 436 437 /* 438 * Make a number of processes sleeping on the specified identifier runnable. 439 */ 440 void 441 wakeup_n(const volatile void *ident, int n) 442 { 443 struct slpque *qp; 444 struct proc *p; 445 struct proc *pnext; 446 int s; 447 448 SCHED_LOCK(s); 449 qp = &slpque[LOOKUP(ident)]; 450 for (p = TAILQ_FIRST(qp); p != NULL && n != 0; p = pnext) { 451 pnext = TAILQ_NEXT(p, p_runq); 452 #ifdef DIAGNOSTIC 453 /* 454 * If the rwlock passed to rwsleep() is contended, the 455 * CPU will end up calling wakeup() between sleep_setup() 456 * and sleep_finish(). 457 */ 458 if (p == curproc) { 459 KASSERT(p->p_stat == SONPROC); 460 continue; 461 } 462 if (p->p_stat != SSLEEP && p->p_stat != SSTOP) 463 panic("wakeup: p_stat is %d", (int)p->p_stat); 464 #endif 465 if (p->p_wchan == ident) { 466 --n; 467 p->p_wchan = 0; 468 TAILQ_REMOVE(qp, p, p_runq); 469 if (p->p_stat == SSLEEP) 470 setrunnable(p); 471 } 472 } 473 SCHED_UNLOCK(s); 474 } 475 476 /* 477 * Make all processes sleeping on the specified identifier runnable. 478 */ 479 void 480 wakeup(const volatile void *chan) 481 { 482 wakeup_n(chan, -1); 483 } 484 485 int 486 sys_sched_yield(struct proc *p, void *v, register_t *retval) 487 { 488 struct proc *q; 489 int s; 490 491 SCHED_LOCK(s); 492 /* 493 * If one of the threads of a multi-threaded process called 494 * sched_yield(2), drop its priority to ensure its siblings 495 * can make some progress. 496 */ 497 p->p_priority = p->p_usrpri; 498 TAILQ_FOREACH(q, &p->p_p->ps_threads, p_thr_link) 499 p->p_priority = max(p->p_priority, q->p_priority); 500 p->p_stat = SRUN; 501 setrunqueue(p); 502 p->p_ru.ru_nvcsw++; 503 mi_switch(); 504 SCHED_UNLOCK(s); 505 506 return (0); 507 } 508 509 int 510 thrsleep_unlock(void *lock) 511 { 512 static _atomic_lock_t unlocked = _ATOMIC_LOCK_UNLOCKED; 513 _atomic_lock_t *atomiclock = lock; 514 515 if (!lock) 516 return 0; 517 518 return copyout(&unlocked, atomiclock, sizeof(unlocked)); 519 } 520 521 static int globalsleepaddr; 522 523 int 524 thrsleep(struct proc *p, struct sys___thrsleep_args *v) 525 { 526 struct sys___thrsleep_args /* { 527 syscallarg(const volatile void *) ident; 528 syscallarg(clockid_t) clock_id; 529 syscallarg(const struct timespec *) tp; 530 syscallarg(void *) lock; 531 syscallarg(const int *) abort; 532 } */ *uap = v; 533 long ident = (long)SCARG(uap, ident); 534 struct timespec *tsp = (struct timespec *)SCARG(uap, tp); 535 void *lock = SCARG(uap, lock); 536 uint64_t to_ticks = 0; 537 int abort, error; 538 clockid_t clock_id = SCARG(uap, clock_id); 539 540 if (ident == 0) 541 return (EINVAL); 542 if (tsp != NULL) { 543 struct timespec now; 544 545 if ((error = clock_gettime(p, clock_id, &now))) 546 return (error); 547 #ifdef KTRACE 548 if (KTRPOINT(p, KTR_STRUCT)) 549 ktrabstimespec(p, tsp); 550 #endif 551 552 if (timespeccmp(tsp, &now, <)) { 553 /* already passed: still do the unlock */ 554 if ((error = thrsleep_unlock(lock))) 555 return (error); 556 return (EWOULDBLOCK); 557 } 558 559 timespecsub(tsp, &now, tsp); 560 to_ticks = (uint64_t)hz * tsp->tv_sec + 561 (tsp->tv_nsec + tick * 1000 - 1) / (tick * 1000) + 1; 562 if (to_ticks > INT_MAX) 563 to_ticks = INT_MAX; 564 } 565 566 p->p_thrslpid = ident; 567 568 if ((error = thrsleep_unlock(lock))) 569 goto out; 570 571 if (SCARG(uap, abort) != NULL) { 572 if ((error = copyin(SCARG(uap, abort), &abort, 573 sizeof(abort))) != 0) 574 goto out; 575 if (abort) { 576 error = EINTR; 577 goto out; 578 } 579 } 580 581 if (p->p_thrslpid == 0) 582 error = 0; 583 else { 584 void *sleepaddr = &p->p_thrslpid; 585 if (ident == -1) 586 sleepaddr = &globalsleepaddr; 587 error = tsleep(sleepaddr, PUSER | PCATCH, "thrsleep", 588 (int)to_ticks); 589 } 590 591 out: 592 p->p_thrslpid = 0; 593 594 if (error == ERESTART) 595 error = ECANCELED; 596 597 return (error); 598 599 } 600 601 int 602 sys___thrsleep(struct proc *p, void *v, register_t *retval) 603 { 604 struct sys___thrsleep_args /* { 605 syscallarg(const volatile void *) ident; 606 syscallarg(clockid_t) clock_id; 607 syscallarg(struct timespec *) tp; 608 syscallarg(void *) lock; 609 syscallarg(const int *) abort; 610 } */ *uap = v; 611 struct timespec ts; 612 int error; 613 614 if (SCARG(uap, tp) != NULL) { 615 if ((error = copyin(SCARG(uap, tp), &ts, sizeof(ts)))) { 616 *retval = error; 617 return 0; 618 } 619 if (ts.tv_nsec < 0 || ts.tv_nsec >= 1000000000) { 620 *retval = EINVAL; 621 return 0; 622 } 623 SCARG(uap, tp) = &ts; 624 } 625 626 *retval = thrsleep(p, uap); 627 return 0; 628 } 629 630 int 631 sys___thrwakeup(struct proc *p, void *v, register_t *retval) 632 { 633 struct sys___thrwakeup_args /* { 634 syscallarg(const volatile void *) ident; 635 syscallarg(int) n; 636 } */ *uap = v; 637 long ident = (long)SCARG(uap, ident); 638 int n = SCARG(uap, n); 639 struct proc *q; 640 int found = 0; 641 642 if (ident == 0) 643 *retval = EINVAL; 644 else if (ident == -1) 645 wakeup(&globalsleepaddr); 646 else { 647 TAILQ_FOREACH(q, &p->p_p->ps_threads, p_thr_link) { 648 if (q->p_thrslpid == ident) { 649 wakeup_one(&q->p_thrslpid); 650 q->p_thrslpid = 0; 651 if (++found == n) 652 break; 653 } 654 } 655 *retval = found ? 0 : ESRCH; 656 } 657 658 return (0); 659 } 660 661 void 662 refcnt_init(struct refcnt *r) 663 { 664 r->refs = 1; 665 } 666 667 void 668 refcnt_take(struct refcnt *r) 669 { 670 #ifdef DIAGNOSTIC 671 u_int refcnt; 672 673 refcnt = atomic_inc_int_nv(&r->refs); 674 KASSERT(refcnt != 0); 675 #else 676 atomic_inc_int(&r->refs); 677 #endif 678 } 679 680 int 681 refcnt_rele(struct refcnt *r) 682 { 683 u_int refcnt; 684 685 refcnt = atomic_dec_int_nv(&r->refs); 686 KASSERT(refcnt != ~0); 687 688 return (refcnt == 0); 689 } 690 691 void 692 refcnt_rele_wake(struct refcnt *r) 693 { 694 if (refcnt_rele(r)) 695 wakeup_one(r); 696 } 697 698 void 699 refcnt_finalize(struct refcnt *r, const char *wmesg) 700 { 701 struct sleep_state sls; 702 u_int refcnt; 703 704 refcnt = atomic_dec_int_nv(&r->refs); 705 while (refcnt) { 706 sleep_setup(&sls, r, PWAIT, wmesg); 707 refcnt = r->refs; 708 sleep_finish(&sls, refcnt); 709 } 710 } 711 712 void 713 cond_init(struct cond *c) 714 { 715 c->c_wait = 1; 716 } 717 718 void 719 cond_signal(struct cond *c) 720 { 721 c->c_wait = 0; 722 723 wakeup_one(c); 724 } 725 726 void 727 cond_wait(struct cond *c, const char *wmesg) 728 { 729 struct sleep_state sls; 730 int wait; 731 732 wait = c->c_wait; 733 while (wait) { 734 sleep_setup(&sls, c, PWAIT, wmesg); 735 wait = c->c_wait; 736 sleep_finish(&sls, wait); 737 } 738 } 739