1 /* $NetBSD: kern_sleepq.c,v 1.37 2009/10/21 21:12:06 rmind Exp $ */ 2 3 /*- 4 * Copyright (c) 2006, 2007, 2008, 2009 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Andrew Doran. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /* 33 * Sleep queue implementation, used by turnstiles and general sleep/wakeup 34 * interfaces. 35 */ 36 37 #include <sys/cdefs.h> 38 __KERNEL_RCSID(0, "$NetBSD: kern_sleepq.c,v 1.37 2009/10/21 21:12:06 rmind Exp $"); 39 40 #include <sys/param.h> 41 #include <sys/kernel.h> 42 #include <sys/cpu.h> 43 #include <sys/pool.h> 44 #include <sys/proc.h> 45 #include <sys/resourcevar.h> 46 #include <sys/sa.h> 47 #include <sys/savar.h> 48 #include <sys/sched.h> 49 #include <sys/systm.h> 50 #include <sys/sleepq.h> 51 #include <sys/ktrace.h> 52 53 #include <uvm/uvm_extern.h> 54 55 #include "opt_sa.h" 56 57 int sleepq_sigtoerror(lwp_t *, int); 58 59 /* General purpose sleep table, used by ltsleep() and condition variables. */ 60 sleeptab_t sleeptab; 61 62 /* 63 * sleeptab_init: 64 * 65 * Initialize a sleep table. 66 */ 67 void 68 sleeptab_init(sleeptab_t *st) 69 { 70 sleepq_t *sq; 71 int i; 72 73 for (i = 0; i < SLEEPTAB_HASH_SIZE; i++) { 74 sq = &st->st_queues[i].st_queue; 75 st->st_queues[i].st_mutex = 76 mutex_obj_alloc(MUTEX_DEFAULT, IPL_SCHED); 77 sleepq_init(sq); 78 } 79 } 80 81 /* 82 * sleepq_init: 83 * 84 * Prepare a sleep queue for use. 85 */ 86 void 87 sleepq_init(sleepq_t *sq) 88 { 89 90 TAILQ_INIT(sq); 91 } 92 93 /* 94 * sleepq_remove: 95 * 96 * Remove an LWP from a sleep queue and wake it up. 97 */ 98 void 99 sleepq_remove(sleepq_t *sq, lwp_t *l) 100 { 101 struct schedstate_percpu *spc; 102 struct cpu_info *ci; 103 104 KASSERT(lwp_locked(l, NULL)); 105 106 TAILQ_REMOVE(sq, l, l_sleepchain); 107 l->l_syncobj = &sched_syncobj; 108 l->l_wchan = NULL; 109 l->l_sleepq = NULL; 110 l->l_flag &= ~LW_SINTR; 111 112 ci = l->l_cpu; 113 spc = &ci->ci_schedstate; 114 115 /* 116 * If not sleeping, the LWP must have been suspended. Let whoever 117 * holds it stopped set it running again. 118 */ 119 if (l->l_stat != LSSLEEP) { 120 KASSERT(l->l_stat == LSSTOP || l->l_stat == LSSUSPENDED); 121 lwp_setlock(l, spc->spc_lwplock); 122 return; 123 } 124 125 /* 126 * If the LWP is still on the CPU, mark it as LSONPROC. It may be 127 * about to call mi_switch(), in which case it will yield. 128 */ 129 if ((l->l_pflag & LP_RUNNING) != 0) { 130 l->l_stat = LSONPROC; 131 l->l_slptime = 0; 132 lwp_setlock(l, spc->spc_lwplock); 133 return; 134 } 135 136 /* Update sleep time delta, call the wake-up handler of scheduler */ 137 l->l_slpticksum += (hardclock_ticks - l->l_slpticks); 138 sched_wakeup(l); 139 140 /* Look for a CPU to wake up */ 141 l->l_cpu = sched_takecpu(l); 142 ci = l->l_cpu; 143 spc = &ci->ci_schedstate; 144 145 /* 146 * Set it running. 147 */ 148 spc_lock(ci); 149 lwp_setlock(l, spc->spc_mutex); 150 #ifdef KERN_SA 151 if (l->l_proc->p_sa != NULL) 152 sa_awaken(l); 153 #endif /* KERN_SA */ 154 sched_setrunnable(l); 155 l->l_stat = LSRUN; 156 l->l_slptime = 0; 157 sched_enqueue(l, false); 158 spc_unlock(ci); 159 } 160 161 /* 162 * sleepq_insert: 163 * 164 * Insert an LWP into the sleep queue, optionally sorting by priority. 165 */ 166 inline void 167 sleepq_insert(sleepq_t *sq, lwp_t *l, syncobj_t *sobj) 168 { 169 lwp_t *l2; 170 const int pri = lwp_eprio(l); 171 172 if ((sobj->sobj_flag & SOBJ_SLEEPQ_SORTED) != 0) { 173 TAILQ_FOREACH(l2, sq, l_sleepchain) { 174 if (lwp_eprio(l2) < pri) { 175 TAILQ_INSERT_BEFORE(l2, l, l_sleepchain); 176 return; 177 } 178 } 179 } 180 181 if ((sobj->sobj_flag & SOBJ_SLEEPQ_LIFO) != 0) 182 TAILQ_INSERT_HEAD(sq, l, l_sleepchain); 183 else 184 TAILQ_INSERT_TAIL(sq, l, l_sleepchain); 185 } 186 187 /* 188 * sleepq_enqueue: 189 * 190 * Enter an LWP into the sleep queue and prepare for sleep. The sleep 191 * queue must already be locked, and any interlock (such as the kernel 192 * lock) must have be released (see sleeptab_lookup(), sleepq_enter()). 193 */ 194 void 195 sleepq_enqueue(sleepq_t *sq, wchan_t wchan, const char *wmesg, syncobj_t *sobj) 196 { 197 lwp_t *l = curlwp; 198 199 KASSERT(lwp_locked(l, NULL)); 200 KASSERT(l->l_stat == LSONPROC); 201 KASSERT(l->l_wchan == NULL && l->l_sleepq == NULL); 202 203 l->l_syncobj = sobj; 204 l->l_wchan = wchan; 205 l->l_sleepq = sq; 206 l->l_wmesg = wmesg; 207 l->l_slptime = 0; 208 l->l_stat = LSSLEEP; 209 l->l_sleeperr = 0; 210 211 sleepq_insert(sq, l, sobj); 212 213 /* Save the time when thread has slept */ 214 l->l_slpticks = hardclock_ticks; 215 sched_slept(l); 216 } 217 218 /* 219 * sleepq_block: 220 * 221 * After any intermediate step such as releasing an interlock, switch. 222 * sleepq_block() may return early under exceptional conditions, for 223 * example if the LWP's containing process is exiting. 224 */ 225 int 226 sleepq_block(int timo, bool catch) 227 { 228 int error = 0, sig; 229 struct proc *p; 230 lwp_t *l = curlwp; 231 bool early = false; 232 int biglocks = l->l_biglocks; 233 234 ktrcsw(1, 0); 235 236 /* 237 * If sleeping interruptably, check for pending signals, exits or 238 * core dump events. 239 */ 240 if (catch) { 241 l->l_flag |= LW_SINTR; 242 if ((l->l_flag & (LW_CANCELLED|LW_WEXIT|LW_WCORE)) != 0) { 243 l->l_flag &= ~LW_CANCELLED; 244 error = EINTR; 245 early = true; 246 } else if ((l->l_flag & LW_PENDSIG) != 0 && sigispending(l, 0)) 247 early = true; 248 } 249 250 if (early) { 251 /* lwp_unsleep() will release the lock */ 252 lwp_unsleep(l, true); 253 } else { 254 if (timo) 255 callout_schedule(&l->l_timeout_ch, timo); 256 257 #ifdef KERN_SA 258 if (((l->l_flag & LW_SA) != 0) && (~l->l_pflag & LP_SA_NOBLOCK)) 259 sa_switch(l); 260 else 261 #endif 262 mi_switch(l); 263 264 /* The LWP and sleep queue are now unlocked. */ 265 if (timo) { 266 /* 267 * Even if the callout appears to have fired, we need to 268 * stop it in order to synchronise with other CPUs. 269 */ 270 if (callout_halt(&l->l_timeout_ch, NULL)) 271 error = EWOULDBLOCK; 272 } 273 } 274 275 if (catch && error == 0) { 276 p = l->l_proc; 277 if ((l->l_flag & (LW_CANCELLED | LW_WEXIT | LW_WCORE)) != 0) 278 error = EINTR; 279 else if ((l->l_flag & LW_PENDSIG) != 0) { 280 /* 281 * Acquiring p_lock may cause us to recurse 282 * through the sleep path and back into this 283 * routine, but is safe because LWPs sleeping 284 * on locks are non-interruptable. We will 285 * not recurse again. 286 */ 287 mutex_enter(p->p_lock); 288 if ((sig = issignal(l)) != 0) 289 error = sleepq_sigtoerror(l, sig); 290 mutex_exit(p->p_lock); 291 } 292 } 293 294 ktrcsw(0, 0); 295 if (__predict_false(biglocks != 0)) { 296 KERNEL_LOCK(biglocks, NULL); 297 } 298 return error; 299 } 300 301 /* 302 * sleepq_wake: 303 * 304 * Wake zero or more LWPs blocked on a single wait channel. 305 */ 306 lwp_t * 307 sleepq_wake(sleepq_t *sq, wchan_t wchan, u_int expected, kmutex_t *mp) 308 { 309 lwp_t *l, *next; 310 311 KASSERT(mutex_owned(mp)); 312 313 for (l = TAILQ_FIRST(sq); l != NULL; l = next) { 314 KASSERT(l->l_sleepq == sq); 315 KASSERT(l->l_mutex == mp); 316 next = TAILQ_NEXT(l, l_sleepchain); 317 if (l->l_wchan != wchan) 318 continue; 319 sleepq_remove(sq, l); 320 if (--expected == 0) 321 break; 322 } 323 324 mutex_spin_exit(mp); 325 return l; 326 } 327 328 /* 329 * sleepq_unsleep: 330 * 331 * Remove an LWP from its sleep queue and set it runnable again. 332 * sleepq_unsleep() is called with the LWP's mutex held, and will 333 * always release it. 334 */ 335 void 336 sleepq_unsleep(lwp_t *l, bool cleanup) 337 { 338 sleepq_t *sq = l->l_sleepq; 339 kmutex_t *mp = l->l_mutex; 340 341 KASSERT(lwp_locked(l, mp)); 342 KASSERT(l->l_wchan != NULL); 343 344 sleepq_remove(sq, l); 345 if (cleanup) { 346 mutex_spin_exit(mp); 347 } 348 } 349 350 /* 351 * sleepq_timeout: 352 * 353 * Entered via the callout(9) subsystem to time out an LWP that is on a 354 * sleep queue. 355 */ 356 void 357 sleepq_timeout(void *arg) 358 { 359 lwp_t *l = arg; 360 361 /* 362 * Lock the LWP. Assuming it's still on the sleep queue, its 363 * current mutex will also be the sleep queue mutex. 364 */ 365 lwp_lock(l); 366 367 if (l->l_wchan == NULL) { 368 /* Somebody beat us to it. */ 369 lwp_unlock(l); 370 return; 371 } 372 373 lwp_unsleep(l, true); 374 } 375 376 /* 377 * sleepq_sigtoerror: 378 * 379 * Given a signal number, interpret and return an error code. 380 */ 381 int 382 sleepq_sigtoerror(lwp_t *l, int sig) 383 { 384 struct proc *p = l->l_proc; 385 int error; 386 387 KASSERT(mutex_owned(p->p_lock)); 388 389 /* 390 * If this sleep was canceled, don't let the syscall restart. 391 */ 392 if ((SIGACTION(p, sig).sa_flags & SA_RESTART) == 0) 393 error = EINTR; 394 else 395 error = ERESTART; 396 397 return error; 398 } 399 400 /* 401 * sleepq_abort: 402 * 403 * After a panic or during autoconfiguration, lower the interrupt 404 * priority level to give pending interrupts a chance to run, and 405 * then return. Called if sleepq_dontsleep() returns non-zero, and 406 * always returns zero. 407 */ 408 int 409 sleepq_abort(kmutex_t *mtx, int unlock) 410 { 411 extern int safepri; 412 int s; 413 414 s = splhigh(); 415 splx(safepri); 416 splx(s); 417 if (mtx != NULL && unlock != 0) 418 mutex_exit(mtx); 419 420 return 0; 421 } 422 423 /* 424 * sleepq_changepri: 425 * 426 * Adjust the priority of an LWP residing on a sleepq. This method 427 * will only alter the user priority; the effective priority is 428 * assumed to have been fixed at the time of insertion into the queue. 429 */ 430 void 431 sleepq_changepri(lwp_t *l, pri_t pri) 432 { 433 sleepq_t *sq = l->l_sleepq; 434 pri_t opri; 435 436 KASSERT(lwp_locked(l, NULL)); 437 438 opri = lwp_eprio(l); 439 l->l_priority = pri; 440 441 if (lwp_eprio(l) == opri) { 442 return; 443 } 444 if ((l->l_syncobj->sobj_flag & SOBJ_SLEEPQ_SORTED) == 0) { 445 return; 446 } 447 448 /* 449 * Don't let the sleep queue become empty, even briefly. 450 * cv_signal() and cv_broadcast() inspect it without the 451 * sleep queue lock held and need to see a non-empty queue 452 * head if there are waiters. 453 */ 454 if (TAILQ_FIRST(sq) == l && TAILQ_NEXT(l, l_sleepchain) == NULL) { 455 return; 456 } 457 TAILQ_REMOVE(sq, l, l_sleepchain); 458 sleepq_insert(sq, l, l->l_syncobj); 459 } 460 461 void 462 sleepq_lendpri(lwp_t *l, pri_t pri) 463 { 464 sleepq_t *sq = l->l_sleepq; 465 pri_t opri; 466 467 KASSERT(lwp_locked(l, NULL)); 468 469 opri = lwp_eprio(l); 470 l->l_inheritedprio = pri; 471 472 if (lwp_eprio(l) == opri) { 473 return; 474 } 475 if ((l->l_syncobj->sobj_flag & SOBJ_SLEEPQ_SORTED) == 0) { 476 return; 477 } 478 479 /* 480 * Don't let the sleep queue become empty, even briefly. 481 * cv_signal() and cv_broadcast() inspect it without the 482 * sleep queue lock held and need to see a non-empty queue 483 * head if there are waiters. 484 */ 485 if (TAILQ_FIRST(sq) == l && TAILQ_NEXT(l, l_sleepchain) == NULL) { 486 return; 487 } 488 TAILQ_REMOVE(sq, l, l_sleepchain); 489 sleepq_insert(sq, l, l->l_syncobj); 490 } 491