1 /* $NetBSD: kern_sleepq.c,v 1.29 2008/05/19 12:48:54 rmind Exp $ */ 2 3 /*- 4 * Copyright (c) 2006, 2007, 2008 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Andrew Doran. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /* 33 * Sleep queue implementation, used by turnstiles and general sleep/wakeup 34 * interfaces. 35 */ 36 37 #include <sys/cdefs.h> 38 __KERNEL_RCSID(0, "$NetBSD: kern_sleepq.c,v 1.29 2008/05/19 12:48:54 rmind Exp $"); 39 40 #include <sys/param.h> 41 #include <sys/kernel.h> 42 #include <sys/cpu.h> 43 #include <sys/pool.h> 44 #include <sys/proc.h> 45 #include <sys/resourcevar.h> 46 #include <sys/sched.h> 47 #include <sys/systm.h> 48 #include <sys/sleepq.h> 49 #include <sys/ktrace.h> 50 51 #include <uvm/uvm_extern.h> 52 53 int sleepq_sigtoerror(lwp_t *, int); 54 55 /* General purpose sleep table, used by ltsleep() and condition variables. */ 56 sleeptab_t sleeptab; 57 58 /* 59 * sleeptab_init: 60 * 61 * Initialize a sleep table. 62 */ 63 void 64 sleeptab_init(sleeptab_t *st) 65 { 66 sleepq_t *sq; 67 int i; 68 69 for (i = 0; i < SLEEPTAB_HASH_SIZE; i++) { 70 sq = &st->st_queues[i].st_queue; 71 mutex_init(&st->st_queues[i].st_mutex, MUTEX_DEFAULT, 72 IPL_SCHED); 73 sleepq_init(sq, &st->st_queues[i].st_mutex); 74 } 75 } 76 77 /* 78 * sleepq_init: 79 * 80 * Prepare a sleep queue for use. 81 */ 82 void 83 sleepq_init(sleepq_t *sq, kmutex_t *mtx) 84 { 85 86 sq->sq_waiters = 0; 87 sq->sq_mutex = mtx; 88 TAILQ_INIT(&sq->sq_queue); 89 } 90 91 /* 92 * sleepq_remove: 93 * 94 * Remove an LWP from a sleep queue and wake it up. Return non-zero if 95 * the LWP is swapped out; if so the caller needs to awaken the swapper 96 * to bring the LWP into memory. 97 */ 98 int 99 sleepq_remove(sleepq_t *sq, lwp_t *l) 100 { 101 struct schedstate_percpu *spc; 102 struct cpu_info *ci; 103 104 KASSERT(lwp_locked(l, sq->sq_mutex)); 105 KASSERT(sq->sq_waiters > 0); 106 107 sq->sq_waiters--; 108 TAILQ_REMOVE(&sq->sq_queue, l, l_sleepchain); 109 110 #ifdef DIAGNOSTIC 111 if (sq->sq_waiters == 0) 112 KASSERT(TAILQ_FIRST(&sq->sq_queue) == NULL); 113 else 114 KASSERT(TAILQ_FIRST(&sq->sq_queue) != NULL); 115 #endif 116 117 l->l_syncobj = &sched_syncobj; 118 l->l_wchan = NULL; 119 l->l_sleepq = NULL; 120 l->l_flag &= ~LW_SINTR; 121 122 ci = l->l_cpu; 123 spc = &ci->ci_schedstate; 124 125 /* 126 * If not sleeping, the LWP must have been suspended. Let whoever 127 * holds it stopped set it running again. 128 */ 129 if (l->l_stat != LSSLEEP) { 130 KASSERT(l->l_stat == LSSTOP || l->l_stat == LSSUSPENDED); 131 lwp_setlock(l, spc->spc_lwplock); 132 return 0; 133 } 134 135 /* 136 * If the LWP is still on the CPU, mark it as LSONPROC. It may be 137 * about to call mi_switch(), in which case it will yield. 138 */ 139 if ((l->l_flag & LW_RUNNING) != 0) { 140 l->l_stat = LSONPROC; 141 l->l_slptime = 0; 142 lwp_setlock(l, spc->spc_lwplock); 143 return 0; 144 } 145 146 /* Update sleep time delta, call the wake-up handler of scheduler */ 147 l->l_slpticksum += (hardclock_ticks - l->l_slpticks); 148 sched_wakeup(l); 149 150 /* Look for a CPU to wake up */ 151 l->l_cpu = sched_takecpu(l); 152 ci = l->l_cpu; 153 spc = &ci->ci_schedstate; 154 155 /* 156 * Set it running. 157 */ 158 spc_lock(ci); 159 lwp_setlock(l, spc->spc_mutex); 160 sched_setrunnable(l); 161 l->l_stat = LSRUN; 162 l->l_slptime = 0; 163 if ((l->l_flag & LW_INMEM) != 0) { 164 sched_enqueue(l, false); 165 spc_unlock(ci); 166 return 0; 167 } 168 spc_unlock(ci); 169 return 1; 170 } 171 172 /* 173 * sleepq_insert: 174 * 175 * Insert an LWP into the sleep queue, optionally sorting by priority. 176 */ 177 inline void 178 sleepq_insert(sleepq_t *sq, lwp_t *l, syncobj_t *sobj) 179 { 180 lwp_t *l2; 181 const int pri = lwp_eprio(l); 182 183 if ((sobj->sobj_flag & SOBJ_SLEEPQ_SORTED) != 0) { 184 TAILQ_FOREACH(l2, &sq->sq_queue, l_sleepchain) { 185 if (lwp_eprio(l2) < pri) { 186 TAILQ_INSERT_BEFORE(l2, l, l_sleepchain); 187 return; 188 } 189 } 190 } 191 192 if ((sobj->sobj_flag & SOBJ_SLEEPQ_LIFO) != 0) 193 TAILQ_INSERT_HEAD(&sq->sq_queue, l, l_sleepchain); 194 else 195 TAILQ_INSERT_TAIL(&sq->sq_queue, l, l_sleepchain); 196 } 197 198 /* 199 * sleepq_enqueue: 200 * 201 * Enter an LWP into the sleep queue and prepare for sleep. The sleep 202 * queue must already be locked, and any interlock (such as the kernel 203 * lock) must have be released (see sleeptab_lookup(), sleepq_enter()). 204 */ 205 void 206 sleepq_enqueue(sleepq_t *sq, wchan_t wchan, const char *wmesg, syncobj_t *sobj) 207 { 208 lwp_t *l = curlwp; 209 210 KASSERT(lwp_locked(l, sq->sq_mutex)); 211 KASSERT(l->l_stat == LSONPROC); 212 KASSERT(l->l_wchan == NULL && l->l_sleepq == NULL); 213 214 l->l_syncobj = sobj; 215 l->l_wchan = wchan; 216 l->l_sleepq = sq; 217 l->l_wmesg = wmesg; 218 l->l_slptime = 0; 219 l->l_stat = LSSLEEP; 220 l->l_sleeperr = 0; 221 222 sq->sq_waiters++; 223 sleepq_insert(sq, l, sobj); 224 225 /* Save the time when thread has slept */ 226 l->l_slpticks = hardclock_ticks; 227 sched_slept(l); 228 } 229 230 /* 231 * sleepq_block: 232 * 233 * After any intermediate step such as releasing an interlock, switch. 234 * sleepq_block() may return early under exceptional conditions, for 235 * example if the LWP's containing process is exiting. 236 */ 237 int 238 sleepq_block(int timo, bool catch) 239 { 240 int error = 0, sig; 241 struct proc *p; 242 lwp_t *l = curlwp; 243 bool early = false; 244 245 ktrcsw(1, 0); 246 247 /* 248 * If sleeping interruptably, check for pending signals, exits or 249 * core dump events. 250 */ 251 if (catch) { 252 l->l_flag |= LW_SINTR; 253 if ((l->l_flag & (LW_CANCELLED|LW_WEXIT|LW_WCORE)) != 0) { 254 l->l_flag &= ~LW_CANCELLED; 255 error = EINTR; 256 early = true; 257 } else if ((l->l_flag & LW_PENDSIG) != 0 && sigispending(l, 0)) 258 early = true; 259 } 260 261 if (early) { 262 /* lwp_unsleep() will release the lock */ 263 lwp_unsleep(l, true); 264 } else { 265 if (timo) 266 callout_schedule(&l->l_timeout_ch, timo); 267 mi_switch(l); 268 269 /* The LWP and sleep queue are now unlocked. */ 270 if (timo) { 271 /* 272 * Even if the callout appears to have fired, we need to 273 * stop it in order to synchronise with other CPUs. 274 */ 275 if (callout_halt(&l->l_timeout_ch, NULL)) 276 error = EWOULDBLOCK; 277 } 278 } 279 280 if (catch && error == 0) { 281 p = l->l_proc; 282 if ((l->l_flag & (LW_CANCELLED | LW_WEXIT | LW_WCORE)) != 0) 283 error = EINTR; 284 else if ((l->l_flag & LW_PENDSIG) != 0) { 285 mutex_enter(p->p_lock); 286 if ((sig = issignal(l)) != 0) 287 error = sleepq_sigtoerror(l, sig); 288 mutex_exit(p->p_lock); 289 } 290 } 291 292 ktrcsw(0, 0); 293 294 KERNEL_LOCK(l->l_biglocks, l); 295 return error; 296 } 297 298 /* 299 * sleepq_wake: 300 * 301 * Wake zero or more LWPs blocked on a single wait channel. 302 */ 303 lwp_t * 304 sleepq_wake(sleepq_t *sq, wchan_t wchan, u_int expected) 305 { 306 lwp_t *l, *next; 307 int swapin = 0; 308 309 KASSERT(mutex_owned(sq->sq_mutex)); 310 311 for (l = TAILQ_FIRST(&sq->sq_queue); l != NULL; l = next) { 312 KASSERT(l->l_sleepq == sq); 313 KASSERT(l->l_mutex == sq->sq_mutex); 314 next = TAILQ_NEXT(l, l_sleepchain); 315 if (l->l_wchan != wchan) 316 continue; 317 swapin |= sleepq_remove(sq, l); 318 if (--expected == 0) 319 break; 320 } 321 322 sleepq_unlock(sq); 323 324 /* 325 * If there are newly awakend threads that need to be swapped in, 326 * then kick the swapper into action. 327 */ 328 if (swapin) 329 uvm_kick_scheduler(); 330 331 return l; 332 } 333 334 /* 335 * sleepq_unsleep: 336 * 337 * Remove an LWP from its sleep queue and set it runnable again. 338 * sleepq_unsleep() is called with the LWP's mutex held, and will 339 * always release it. 340 */ 341 u_int 342 sleepq_unsleep(lwp_t *l, bool cleanup) 343 { 344 sleepq_t *sq = l->l_sleepq; 345 int swapin; 346 347 KASSERT(lwp_locked(l, sq->sq_mutex)); 348 KASSERT(l->l_wchan != NULL); 349 350 swapin = sleepq_remove(sq, l); 351 352 if (cleanup) { 353 sleepq_unlock(sq); 354 if (swapin) 355 uvm_kick_scheduler(); 356 } 357 358 return swapin; 359 } 360 361 /* 362 * sleepq_timeout: 363 * 364 * Entered via the callout(9) subsystem to time out an LWP that is on a 365 * sleep queue. 366 */ 367 void 368 sleepq_timeout(void *arg) 369 { 370 lwp_t *l = arg; 371 372 /* 373 * Lock the LWP. Assuming it's still on the sleep queue, its 374 * current mutex will also be the sleep queue mutex. 375 */ 376 lwp_lock(l); 377 378 if (l->l_wchan == NULL) { 379 /* Somebody beat us to it. */ 380 lwp_unlock(l); 381 return; 382 } 383 384 lwp_unsleep(l, true); 385 } 386 387 /* 388 * sleepq_sigtoerror: 389 * 390 * Given a signal number, interpret and return an error code. 391 */ 392 int 393 sleepq_sigtoerror(lwp_t *l, int sig) 394 { 395 struct proc *p = l->l_proc; 396 int error; 397 398 KASSERT(mutex_owned(p->p_lock)); 399 400 /* 401 * If this sleep was canceled, don't let the syscall restart. 402 */ 403 if ((SIGACTION(p, sig).sa_flags & SA_RESTART) == 0) 404 error = EINTR; 405 else 406 error = ERESTART; 407 408 return error; 409 } 410 411 /* 412 * sleepq_abort: 413 * 414 * After a panic or during autoconfiguration, lower the interrupt 415 * priority level to give pending interrupts a chance to run, and 416 * then return. Called if sleepq_dontsleep() returns non-zero, and 417 * always returns zero. 418 */ 419 int 420 sleepq_abort(kmutex_t *mtx, int unlock) 421 { 422 extern int safepri; 423 int s; 424 425 s = splhigh(); 426 splx(safepri); 427 splx(s); 428 if (mtx != NULL && unlock != 0) 429 mutex_exit(mtx); 430 431 return 0; 432 } 433 434 /* 435 * sleepq_changepri: 436 * 437 * Adjust the priority of an LWP residing on a sleepq. This method 438 * will only alter the user priority; the effective priority is 439 * assumed to have been fixed at the time of insertion into the queue. 440 */ 441 void 442 sleepq_changepri(lwp_t *l, pri_t pri) 443 { 444 sleepq_t *sq = l->l_sleepq; 445 pri_t opri; 446 447 KASSERT(lwp_locked(l, sq->sq_mutex)); 448 449 opri = lwp_eprio(l); 450 l->l_priority = pri; 451 if (lwp_eprio(l) != opri) { 452 TAILQ_REMOVE(&sq->sq_queue, l, l_sleepchain); 453 sleepq_insert(sq, l, l->l_syncobj); 454 } 455 } 456 457 void 458 sleepq_lendpri(lwp_t *l, pri_t pri) 459 { 460 sleepq_t *sq = l->l_sleepq; 461 pri_t opri; 462 463 KASSERT(lwp_locked(l, sq->sq_mutex)); 464 465 opri = lwp_eprio(l); 466 l->l_inheritedprio = pri; 467 468 if (lwp_eprio(l) != opri && 469 (l->l_syncobj->sobj_flag & SOBJ_SLEEPQ_SORTED) != 0) { 470 TAILQ_REMOVE(&sq->sq_queue, l, l_sleepchain); 471 sleepq_insert(sq, l, l->l_syncobj); 472 } 473 } 474