1 /* $NetBSD: kern_sleepq.c,v 1.28 2008/04/28 20:24:03 martin Exp $ */ 2 3 /*- 4 * Copyright (c) 2006, 2007, 2008 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Andrew Doran. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /* 33 * Sleep queue implementation, used by turnstiles and general sleep/wakeup 34 * interfaces. 35 */ 36 37 #include <sys/cdefs.h> 38 __KERNEL_RCSID(0, "$NetBSD: kern_sleepq.c,v 1.28 2008/04/28 20:24:03 martin Exp $"); 39 40 #include <sys/param.h> 41 #include <sys/kernel.h> 42 #include <sys/cpu.h> 43 #include <sys/pool.h> 44 #include <sys/proc.h> 45 #include <sys/resourcevar.h> 46 #include <sys/sched.h> 47 #include <sys/systm.h> 48 #include <sys/sleepq.h> 49 #include <sys/ktrace.h> 50 51 #include <uvm/uvm_extern.h> 52 53 int sleepq_sigtoerror(lwp_t *, int); 54 55 /* General purpose sleep table, used by ltsleep() and condition variables. */ 56 sleeptab_t sleeptab; 57 58 /* 59 * sleeptab_init: 60 * 61 * Initialize a sleep table. 62 */ 63 void 64 sleeptab_init(sleeptab_t *st) 65 { 66 sleepq_t *sq; 67 int i; 68 69 for (i = 0; i < SLEEPTAB_HASH_SIZE; i++) { 70 sq = &st->st_queues[i].st_queue; 71 mutex_init(&st->st_queues[i].st_mutex, MUTEX_DEFAULT, 72 IPL_SCHED); 73 sleepq_init(sq, &st->st_queues[i].st_mutex); 74 } 75 } 76 77 /* 78 * sleepq_init: 79 * 80 * Prepare a sleep queue for use. 81 */ 82 void 83 sleepq_init(sleepq_t *sq, kmutex_t *mtx) 84 { 85 86 sq->sq_waiters = 0; 87 sq->sq_mutex = mtx; 88 TAILQ_INIT(&sq->sq_queue); 89 } 90 91 /* 92 * sleepq_remove: 93 * 94 * Remove an LWP from a sleep queue and wake it up. Return non-zero if 95 * the LWP is swapped out; if so the caller needs to awaken the swapper 96 * to bring the LWP into memory. 97 */ 98 int 99 sleepq_remove(sleepq_t *sq, lwp_t *l) 100 { 101 struct schedstate_percpu *spc; 102 struct cpu_info *ci; 103 104 KASSERT(lwp_locked(l, sq->sq_mutex)); 105 KASSERT(sq->sq_waiters > 0); 106 107 sq->sq_waiters--; 108 TAILQ_REMOVE(&sq->sq_queue, l, l_sleepchain); 109 110 #ifdef DIAGNOSTIC 111 if (sq->sq_waiters == 0) 112 KASSERT(TAILQ_FIRST(&sq->sq_queue) == NULL); 113 else 114 KASSERT(TAILQ_FIRST(&sq->sq_queue) != NULL); 115 #endif 116 117 l->l_syncobj = &sched_syncobj; 118 l->l_wchan = NULL; 119 l->l_sleepq = NULL; 120 l->l_flag &= ~LW_SINTR; 121 122 ci = l->l_cpu; 123 spc = &ci->ci_schedstate; 124 125 /* 126 * If not sleeping, the LWP must have been suspended. Let whoever 127 * holds it stopped set it running again. 128 */ 129 if (l->l_stat != LSSLEEP) { 130 KASSERT(l->l_stat == LSSTOP || l->l_stat == LSSUSPENDED); 131 lwp_setlock(l, spc->spc_lwplock); 132 return 0; 133 } 134 135 /* 136 * If the LWP is still on the CPU, mark it as LSONPROC. It may be 137 * about to call mi_switch(), in which case it will yield. 138 */ 139 if ((l->l_flag & LW_RUNNING) != 0) { 140 l->l_stat = LSONPROC; 141 l->l_slptime = 0; 142 lwp_setlock(l, spc->spc_lwplock); 143 return 0; 144 } 145 146 /* 147 * Call the wake-up handler of scheduler. 148 * It might change the CPU for this thread. 149 */ 150 sched_wakeup(l); 151 ci = l->l_cpu; 152 spc = &ci->ci_schedstate; 153 154 /* 155 * Set it running. 156 */ 157 spc_lock(ci); 158 lwp_setlock(l, spc->spc_mutex); 159 sched_setrunnable(l); 160 l->l_stat = LSRUN; 161 l->l_slptime = 0; 162 if ((l->l_flag & LW_INMEM) != 0) { 163 sched_enqueue(l, false); 164 spc_unlock(ci); 165 return 0; 166 } 167 spc_unlock(ci); 168 return 1; 169 } 170 171 /* 172 * sleepq_insert: 173 * 174 * Insert an LWP into the sleep queue, optionally sorting by priority. 175 */ 176 inline void 177 sleepq_insert(sleepq_t *sq, lwp_t *l, syncobj_t *sobj) 178 { 179 lwp_t *l2; 180 const int pri = lwp_eprio(l); 181 182 if ((sobj->sobj_flag & SOBJ_SLEEPQ_SORTED) != 0) { 183 TAILQ_FOREACH(l2, &sq->sq_queue, l_sleepchain) { 184 if (lwp_eprio(l2) < pri) { 185 TAILQ_INSERT_BEFORE(l2, l, l_sleepchain); 186 return; 187 } 188 } 189 } 190 191 if ((sobj->sobj_flag & SOBJ_SLEEPQ_LIFO) != 0) 192 TAILQ_INSERT_HEAD(&sq->sq_queue, l, l_sleepchain); 193 else 194 TAILQ_INSERT_TAIL(&sq->sq_queue, l, l_sleepchain); 195 } 196 197 /* 198 * sleepq_enqueue: 199 * 200 * Enter an LWP into the sleep queue and prepare for sleep. The sleep 201 * queue must already be locked, and any interlock (such as the kernel 202 * lock) must have be released (see sleeptab_lookup(), sleepq_enter()). 203 */ 204 void 205 sleepq_enqueue(sleepq_t *sq, wchan_t wchan, const char *wmesg, syncobj_t *sobj) 206 { 207 lwp_t *l = curlwp; 208 209 KASSERT(lwp_locked(l, sq->sq_mutex)); 210 KASSERT(l->l_stat == LSONPROC); 211 KASSERT(l->l_wchan == NULL && l->l_sleepq == NULL); 212 213 l->l_syncobj = sobj; 214 l->l_wchan = wchan; 215 l->l_sleepq = sq; 216 l->l_wmesg = wmesg; 217 l->l_slptime = 0; 218 l->l_stat = LSSLEEP; 219 l->l_sleeperr = 0; 220 221 sq->sq_waiters++; 222 sleepq_insert(sq, l, sobj); 223 sched_slept(l); 224 } 225 226 /* 227 * sleepq_block: 228 * 229 * After any intermediate step such as releasing an interlock, switch. 230 * sleepq_block() may return early under exceptional conditions, for 231 * example if the LWP's containing process is exiting. 232 */ 233 int 234 sleepq_block(int timo, bool catch) 235 { 236 int error = 0, sig; 237 struct proc *p; 238 lwp_t *l = curlwp; 239 bool early = false; 240 241 ktrcsw(1, 0); 242 243 /* 244 * If sleeping interruptably, check for pending signals, exits or 245 * core dump events. 246 */ 247 if (catch) { 248 l->l_flag |= LW_SINTR; 249 if ((l->l_flag & (LW_CANCELLED|LW_WEXIT|LW_WCORE)) != 0) { 250 l->l_flag &= ~LW_CANCELLED; 251 error = EINTR; 252 early = true; 253 } else if ((l->l_flag & LW_PENDSIG) != 0 && sigispending(l, 0)) 254 early = true; 255 } 256 257 if (early) { 258 /* lwp_unsleep() will release the lock */ 259 lwp_unsleep(l, true); 260 } else { 261 if (timo) 262 callout_schedule(&l->l_timeout_ch, timo); 263 mi_switch(l); 264 265 /* The LWP and sleep queue are now unlocked. */ 266 if (timo) { 267 /* 268 * Even if the callout appears to have fired, we need to 269 * stop it in order to synchronise with other CPUs. 270 */ 271 if (callout_halt(&l->l_timeout_ch, NULL)) 272 error = EWOULDBLOCK; 273 } 274 } 275 276 if (catch && error == 0) { 277 p = l->l_proc; 278 if ((l->l_flag & (LW_CANCELLED | LW_WEXIT | LW_WCORE)) != 0) 279 error = EINTR; 280 else if ((l->l_flag & LW_PENDSIG) != 0) { 281 mutex_enter(p->p_lock); 282 if ((sig = issignal(l)) != 0) 283 error = sleepq_sigtoerror(l, sig); 284 mutex_exit(p->p_lock); 285 } 286 } 287 288 ktrcsw(0, 0); 289 290 KERNEL_LOCK(l->l_biglocks, l); 291 return error; 292 } 293 294 /* 295 * sleepq_wake: 296 * 297 * Wake zero or more LWPs blocked on a single wait channel. 298 */ 299 lwp_t * 300 sleepq_wake(sleepq_t *sq, wchan_t wchan, u_int expected) 301 { 302 lwp_t *l, *next; 303 int swapin = 0; 304 305 KASSERT(mutex_owned(sq->sq_mutex)); 306 307 for (l = TAILQ_FIRST(&sq->sq_queue); l != NULL; l = next) { 308 KASSERT(l->l_sleepq == sq); 309 KASSERT(l->l_mutex == sq->sq_mutex); 310 next = TAILQ_NEXT(l, l_sleepchain); 311 if (l->l_wchan != wchan) 312 continue; 313 swapin |= sleepq_remove(sq, l); 314 if (--expected == 0) 315 break; 316 } 317 318 sleepq_unlock(sq); 319 320 /* 321 * If there are newly awakend threads that need to be swapped in, 322 * then kick the swapper into action. 323 */ 324 if (swapin) 325 uvm_kick_scheduler(); 326 327 return l; 328 } 329 330 /* 331 * sleepq_unsleep: 332 * 333 * Remove an LWP from its sleep queue and set it runnable again. 334 * sleepq_unsleep() is called with the LWP's mutex held, and will 335 * always release it. 336 */ 337 u_int 338 sleepq_unsleep(lwp_t *l, bool cleanup) 339 { 340 sleepq_t *sq = l->l_sleepq; 341 int swapin; 342 343 KASSERT(lwp_locked(l, sq->sq_mutex)); 344 KASSERT(l->l_wchan != NULL); 345 346 swapin = sleepq_remove(sq, l); 347 348 if (cleanup) { 349 sleepq_unlock(sq); 350 if (swapin) 351 uvm_kick_scheduler(); 352 } 353 354 return swapin; 355 } 356 357 /* 358 * sleepq_timeout: 359 * 360 * Entered via the callout(9) subsystem to time out an LWP that is on a 361 * sleep queue. 362 */ 363 void 364 sleepq_timeout(void *arg) 365 { 366 lwp_t *l = arg; 367 368 /* 369 * Lock the LWP. Assuming it's still on the sleep queue, its 370 * current mutex will also be the sleep queue mutex. 371 */ 372 lwp_lock(l); 373 374 if (l->l_wchan == NULL) { 375 /* Somebody beat us to it. */ 376 lwp_unlock(l); 377 return; 378 } 379 380 lwp_unsleep(l, true); 381 } 382 383 /* 384 * sleepq_sigtoerror: 385 * 386 * Given a signal number, interpret and return an error code. 387 */ 388 int 389 sleepq_sigtoerror(lwp_t *l, int sig) 390 { 391 struct proc *p = l->l_proc; 392 int error; 393 394 KASSERT(mutex_owned(p->p_lock)); 395 396 /* 397 * If this sleep was canceled, don't let the syscall restart. 398 */ 399 if ((SIGACTION(p, sig).sa_flags & SA_RESTART) == 0) 400 error = EINTR; 401 else 402 error = ERESTART; 403 404 return error; 405 } 406 407 /* 408 * sleepq_abort: 409 * 410 * After a panic or during autoconfiguration, lower the interrupt 411 * priority level to give pending interrupts a chance to run, and 412 * then return. Called if sleepq_dontsleep() returns non-zero, and 413 * always returns zero. 414 */ 415 int 416 sleepq_abort(kmutex_t *mtx, int unlock) 417 { 418 extern int safepri; 419 int s; 420 421 s = splhigh(); 422 splx(safepri); 423 splx(s); 424 if (mtx != NULL && unlock != 0) 425 mutex_exit(mtx); 426 427 return 0; 428 } 429 430 /* 431 * sleepq_changepri: 432 * 433 * Adjust the priority of an LWP residing on a sleepq. This method 434 * will only alter the user priority; the effective priority is 435 * assumed to have been fixed at the time of insertion into the queue. 436 */ 437 void 438 sleepq_changepri(lwp_t *l, pri_t pri) 439 { 440 sleepq_t *sq = l->l_sleepq; 441 pri_t opri; 442 443 KASSERT(lwp_locked(l, sq->sq_mutex)); 444 445 opri = lwp_eprio(l); 446 l->l_priority = pri; 447 if (lwp_eprio(l) != opri) { 448 TAILQ_REMOVE(&sq->sq_queue, l, l_sleepchain); 449 sleepq_insert(sq, l, l->l_syncobj); 450 } 451 } 452 453 void 454 sleepq_lendpri(lwp_t *l, pri_t pri) 455 { 456 sleepq_t *sq = l->l_sleepq; 457 pri_t opri; 458 459 KASSERT(lwp_locked(l, sq->sq_mutex)); 460 461 opri = lwp_eprio(l); 462 l->l_inheritedprio = pri; 463 464 if (lwp_eprio(l) != opri && 465 (l->l_syncobj->sobj_flag & SOBJ_SLEEPQ_SORTED) != 0) { 466 TAILQ_REMOVE(&sq->sq_queue, l, l_sleepchain); 467 sleepq_insert(sq, l, l->l_syncobj); 468 } 469 } 470