1 /* $NetBSD: kern_synch.c,v 1.173 2006/11/03 20:46:00 ad Exp $ */ 2 3 /*- 4 * Copyright (c) 1999, 2000, 2004 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 9 * NASA Ames Research Center. 10 * This code is derived from software contributed to The NetBSD Foundation 11 * by Charles M. Hannum. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 3. All advertising materials mentioning features or use of this software 22 * must display the following acknowledgement: 23 * This product includes software developed by the NetBSD 24 * Foundation, Inc. and its contributors. 25 * 4. Neither the name of The NetBSD Foundation nor the names of its 26 * contributors may be used to endorse or promote products derived 27 * from this software without specific prior written permission. 28 * 29 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 30 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 31 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 32 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 33 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 34 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 35 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 36 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 37 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 38 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 39 * POSSIBILITY OF SUCH DAMAGE. 40 */ 41 42 /*- 43 * Copyright (c) 1982, 1986, 1990, 1991, 1993 44 * The Regents of the University of California. All rights reserved. 45 * (c) UNIX System Laboratories, Inc. 46 * All or some portions of this file are derived from material licensed 47 * to the University of California by American Telephone and Telegraph 48 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 49 * the permission of UNIX System Laboratories, Inc. 50 * 51 * Redistribution and use in source and binary forms, with or without 52 * modification, are permitted provided that the following conditions 53 * are met: 54 * 1. Redistributions of source code must retain the above copyright 55 * notice, this list of conditions and the following disclaimer. 56 * 2. Redistributions in binary form must reproduce the above copyright 57 * notice, this list of conditions and the following disclaimer in the 58 * documentation and/or other materials provided with the distribution. 59 * 3. Neither the name of the University nor the names of its contributors 60 * may be used to endorse or promote products derived from this software 61 * without specific prior written permission. 62 * 63 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 64 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 65 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 66 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 67 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 68 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 69 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 70 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 71 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 72 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 73 * SUCH DAMAGE. 74 * 75 * @(#)kern_synch.c 8.9 (Berkeley) 5/19/95 76 */ 77 78 #include <sys/cdefs.h> 79 __KERNEL_RCSID(0, "$NetBSD: kern_synch.c,v 1.173 2006/11/03 20:46:00 ad Exp $"); 80 81 #include "opt_ddb.h" 82 #include "opt_ktrace.h" 83 #include "opt_kstack.h" 84 #include "opt_lockdebug.h" 85 #include "opt_multiprocessor.h" 86 #include "opt_perfctrs.h" 87 88 #include <sys/param.h> 89 #include <sys/systm.h> 90 #include <sys/callout.h> 91 #include <sys/proc.h> 92 #include <sys/kernel.h> 93 #include <sys/buf.h> 94 #if defined(PERFCTRS) 95 #include <sys/pmc.h> 96 #endif 97 #include <sys/signalvar.h> 98 #include <sys/resourcevar.h> 99 #include <sys/sched.h> 100 #include <sys/sa.h> 101 #include <sys/savar.h> 102 #include <sys/kauth.h> 103 104 #include <uvm/uvm_extern.h> 105 106 #ifdef KTRACE 107 #include <sys/ktrace.h> 108 #endif 109 110 #include <machine/cpu.h> 111 112 int lbolt; /* once a second sleep address */ 113 int rrticks; /* number of hardclock ticks per roundrobin() */ 114 115 #if defined(MULTIPROCESSOR) || defined(LOCKDEBUG) 116 #define XXX_SCHED_LOCK simple_lock(&sched_lock) 117 #define XXX_SCHED_UNLOCK simple_unlock(&sched_lock) 118 #else 119 #define XXX_SCHED_LOCK /* nothing */ 120 #define XXX_SCHED_UNLOCK /* nothing */ 121 #endif 122 123 /* 124 * Sleep queues. 125 * 126 * We're only looking at 7 bits of the address; everything is 127 * aligned to 4, lots of things are aligned to greater powers 128 * of 2. Shift right by 8, i.e. drop the bottom 256 worth. 129 */ 130 #define SLPQUE_TABLESIZE 128 131 #define SLPQUE_LOOKUP(x) (((u_long)(x) >> 8) & (SLPQUE_TABLESIZE - 1)) 132 133 #define SLPQUE(ident) (&sched_slpque[SLPQUE_LOOKUP(ident)]) 134 135 /* 136 * The global scheduler state. 137 */ 138 struct prochd sched_qs[RUNQUE_NQS]; /* run queues */ 139 volatile uint32_t sched_whichqs; /* bitmap of non-empty queues */ 140 struct slpque sched_slpque[SLPQUE_TABLESIZE]; /* sleep queues */ 141 142 struct simplelock sched_lock = SIMPLELOCK_INITIALIZER; 143 144 void schedcpu(void *); 145 void updatepri(struct lwp *); 146 void endtsleep(void *); 147 148 inline void sa_awaken(struct lwp *); 149 inline void awaken(struct lwp *); 150 151 struct callout schedcpu_ch = CALLOUT_INITIALIZER_SETFUNC(schedcpu, NULL); 152 static unsigned int schedcpu_ticks; 153 154 155 /* 156 * Force switch among equal priority processes every 100ms. 157 * Called from hardclock every hz/10 == rrticks hardclock ticks. 158 */ 159 /* ARGSUSED */ 160 void 161 roundrobin(struct cpu_info *ci) 162 { 163 struct schedstate_percpu *spc = &ci->ci_schedstate; 164 165 spc->spc_rrticks = rrticks; 166 167 if (curlwp != NULL) { 168 if (spc->spc_flags & SPCF_SEENRR) { 169 /* 170 * The process has already been through a roundrobin 171 * without switching and may be hogging the CPU. 172 * Indicate that the process should yield. 173 */ 174 spc->spc_flags |= SPCF_SHOULDYIELD; 175 } else 176 spc->spc_flags |= SPCF_SEENRR; 177 } 178 need_resched(curcpu()); 179 } 180 181 #define PPQ (128 / RUNQUE_NQS) /* priorities per queue */ 182 #define NICE_WEIGHT 2 /* priorities per nice level */ 183 184 #define ESTCPU_SHIFT 11 185 #define ESTCPU_MAX ((NICE_WEIGHT * PRIO_MAX - PPQ) << ESTCPU_SHIFT) 186 #define ESTCPULIM(e) min((e), ESTCPU_MAX) 187 188 /* 189 * Constants for digital decay and forget: 190 * 90% of (p_estcpu) usage in 5 * loadav time 191 * 95% of (p_pctcpu) usage in 60 seconds (load insensitive) 192 * Note that, as ps(1) mentions, this can let percentages 193 * total over 100% (I've seen 137.9% for 3 processes). 194 * 195 * Note that hardclock updates p_estcpu and p_cpticks independently. 196 * 197 * We wish to decay away 90% of p_estcpu in (5 * loadavg) seconds. 198 * That is, the system wants to compute a value of decay such 199 * that the following for loop: 200 * for (i = 0; i < (5 * loadavg); i++) 201 * p_estcpu *= decay; 202 * will compute 203 * p_estcpu *= 0.1; 204 * for all values of loadavg: 205 * 206 * Mathematically this loop can be expressed by saying: 207 * decay ** (5 * loadavg) ~= .1 208 * 209 * The system computes decay as: 210 * decay = (2 * loadavg) / (2 * loadavg + 1) 211 * 212 * We wish to prove that the system's computation of decay 213 * will always fulfill the equation: 214 * decay ** (5 * loadavg) ~= .1 215 * 216 * If we compute b as: 217 * b = 2 * loadavg 218 * then 219 * decay = b / (b + 1) 220 * 221 * We now need to prove two things: 222 * 1) Given factor ** (5 * loadavg) ~= .1, prove factor == b/(b+1) 223 * 2) Given b/(b+1) ** power ~= .1, prove power == (5 * loadavg) 224 * 225 * Facts: 226 * For x close to zero, exp(x) =~ 1 + x, since 227 * exp(x) = 0! + x**1/1! + x**2/2! + ... . 228 * therefore exp(-1/b) =~ 1 - (1/b) = (b-1)/b. 229 * For x close to zero, ln(1+x) =~ x, since 230 * ln(1+x) = x - x**2/2 + x**3/3 - ... -1 < x < 1 231 * therefore ln(b/(b+1)) = ln(1 - 1/(b+1)) =~ -1/(b+1). 232 * ln(.1) =~ -2.30 233 * 234 * Proof of (1): 235 * Solve (factor)**(power) =~ .1 given power (5*loadav): 236 * solving for factor, 237 * ln(factor) =~ (-2.30/5*loadav), or 238 * factor =~ exp(-1/((5/2.30)*loadav)) =~ exp(-1/(2*loadav)) = 239 * exp(-1/b) =~ (b-1)/b =~ b/(b+1). QED 240 * 241 * Proof of (2): 242 * Solve (factor)**(power) =~ .1 given factor == (b/(b+1)): 243 * solving for power, 244 * power*ln(b/(b+1)) =~ -2.30, or 245 * power =~ 2.3 * (b + 1) = 4.6*loadav + 2.3 =~ 5*loadav. QED 246 * 247 * Actual power values for the implemented algorithm are as follows: 248 * loadav: 1 2 3 4 249 * power: 5.68 10.32 14.94 19.55 250 */ 251 252 /* calculations for digital decay to forget 90% of usage in 5*loadav sec */ 253 #define loadfactor(loadav) (2 * (loadav)) 254 255 static fixpt_t 256 decay_cpu(fixpt_t loadfac, fixpt_t estcpu) 257 { 258 259 if (estcpu == 0) { 260 return 0; 261 } 262 263 #if !defined(_LP64) 264 /* avoid 64bit arithmetics. */ 265 #define FIXPT_MAX ((fixpt_t)((UINTMAX_C(1) << sizeof(fixpt_t) * CHAR_BIT) - 1)) 266 if (__predict_true(loadfac <= FIXPT_MAX / ESTCPU_MAX)) { 267 return estcpu * loadfac / (loadfac + FSCALE); 268 } 269 #endif /* !defined(_LP64) */ 270 271 return (uint64_t)estcpu * loadfac / (loadfac + FSCALE); 272 } 273 274 /* 275 * For all load averages >= 1 and max p_estcpu of (255 << ESTCPU_SHIFT), 276 * sleeping for at least seven times the loadfactor will decay p_estcpu to 277 * less than (1 << ESTCPU_SHIFT). 278 * 279 * note that our ESTCPU_MAX is actually much smaller than (255 << ESTCPU_SHIFT). 280 */ 281 static fixpt_t 282 decay_cpu_batch(fixpt_t loadfac, fixpt_t estcpu, unsigned int n) 283 { 284 285 if ((n << FSHIFT) >= 7 * loadfac) { 286 return 0; 287 } 288 289 while (estcpu != 0 && n > 1) { 290 estcpu = decay_cpu(loadfac, estcpu); 291 n--; 292 } 293 294 return estcpu; 295 } 296 297 /* decay 95% of `p_pctcpu' in 60 seconds; see CCPU_SHIFT before changing */ 298 fixpt_t ccpu = 0.95122942450071400909 * FSCALE; /* exp(-1/20) */ 299 300 /* 301 * If `ccpu' is not equal to `exp(-1/20)' and you still want to use the 302 * faster/more-accurate formula, you'll have to estimate CCPU_SHIFT below 303 * and possibly adjust FSHIFT in "param.h" so that (FSHIFT >= CCPU_SHIFT). 304 * 305 * To estimate CCPU_SHIFT for exp(-1/20), the following formula was used: 306 * 1 - exp(-1/20) ~= 0.0487 ~= 0.0488 == 1 (fixed pt, *11* bits). 307 * 308 * If you dont want to bother with the faster/more-accurate formula, you 309 * can set CCPU_SHIFT to (FSHIFT + 1) which will use a slower/less-accurate 310 * (more general) method of calculating the %age of CPU used by a process. 311 */ 312 #define CCPU_SHIFT 11 313 314 /* 315 * Recompute process priorities, every hz ticks. 316 */ 317 /* ARGSUSED */ 318 void 319 schedcpu(void *arg) 320 { 321 fixpt_t loadfac = loadfactor(averunnable.ldavg[0]); 322 struct lwp *l; 323 struct proc *p; 324 int s, minslp; 325 int clkhz; 326 327 schedcpu_ticks++; 328 329 proclist_lock_read(); 330 PROCLIST_FOREACH(p, &allproc) { 331 /* 332 * Increment time in/out of memory and sleep time 333 * (if sleeping). We ignore overflow; with 16-bit int's 334 * (remember them?) overflow takes 45 days. 335 */ 336 minslp = 2; 337 LIST_FOREACH(l, &p->p_lwps, l_sibling) { 338 l->l_swtime++; 339 if (l->l_stat == LSSLEEP || l->l_stat == LSSTOP || 340 l->l_stat == LSSUSPENDED) { 341 l->l_slptime++; 342 minslp = min(minslp, l->l_slptime); 343 } else 344 minslp = 0; 345 } 346 p->p_pctcpu = (p->p_pctcpu * ccpu) >> FSHIFT; 347 /* 348 * If the process has slept the entire second, 349 * stop recalculating its priority until it wakes up. 350 */ 351 if (minslp > 1) 352 continue; 353 s = splstatclock(); /* prevent state changes */ 354 /* 355 * p_pctcpu is only for ps. 356 */ 357 clkhz = stathz != 0 ? stathz : hz; 358 #if (FSHIFT >= CCPU_SHIFT) 359 p->p_pctcpu += (clkhz == 100)? 360 ((fixpt_t) p->p_cpticks) << (FSHIFT - CCPU_SHIFT): 361 100 * (((fixpt_t) p->p_cpticks) 362 << (FSHIFT - CCPU_SHIFT)) / clkhz; 363 #else 364 p->p_pctcpu += ((FSCALE - ccpu) * 365 (p->p_cpticks * FSCALE / clkhz)) >> FSHIFT; 366 #endif 367 p->p_cpticks = 0; 368 p->p_estcpu = decay_cpu(loadfac, p->p_estcpu); 369 splx(s); /* Done with the process CPU ticks update */ 370 SCHED_LOCK(s); 371 LIST_FOREACH(l, &p->p_lwps, l_sibling) { 372 if (l->l_slptime > 1) 373 continue; 374 resetpriority(l); 375 if (l->l_priority >= PUSER) { 376 if (l->l_stat == LSRUN && 377 (l->l_flag & L_INMEM) && 378 (l->l_priority / PPQ) != (l->l_usrpri / PPQ)) { 379 remrunqueue(l); 380 l->l_priority = l->l_usrpri; 381 setrunqueue(l); 382 } else 383 l->l_priority = l->l_usrpri; 384 } 385 } 386 SCHED_UNLOCK(s); 387 } 388 proclist_unlock_read(); 389 uvm_meter(); 390 wakeup((caddr_t)&lbolt); 391 callout_schedule(&schedcpu_ch, hz); 392 } 393 394 /* 395 * Recalculate the priority of a process after it has slept for a while. 396 */ 397 void 398 updatepri(struct lwp *l) 399 { 400 struct proc *p = l->l_proc; 401 fixpt_t loadfac; 402 403 SCHED_ASSERT_LOCKED(); 404 KASSERT(l->l_slptime > 1); 405 406 loadfac = loadfactor(averunnable.ldavg[0]); 407 408 l->l_slptime--; /* the first time was done in schedcpu */ 409 /* XXX NJWLWP */ 410 p->p_estcpu = decay_cpu_batch(loadfac, p->p_estcpu, l->l_slptime); 411 resetpriority(l); 412 } 413 414 /* 415 * During autoconfiguration or after a panic, a sleep will simply 416 * lower the priority briefly to allow interrupts, then return. 417 * The priority to be used (safepri) is machine-dependent, thus this 418 * value is initialized and maintained in the machine-dependent layers. 419 * This priority will typically be 0, or the lowest priority 420 * that is safe for use on the interrupt stack; it can be made 421 * higher to block network software interrupts after panics. 422 */ 423 int safepri; 424 425 /* 426 * General sleep call. Suspends the current process until a wakeup is 427 * performed on the specified identifier. The process will then be made 428 * runnable with the specified priority. Sleeps at most timo/hz seconds 429 * (0 means no timeout). If pri includes PCATCH flag, signals are checked 430 * before and after sleeping, else signals are not checked. Returns 0 if 431 * awakened, EWOULDBLOCK if the timeout expires. If PCATCH is set and a 432 * signal needs to be delivered, ERESTART is returned if the current system 433 * call should be restarted if possible, and EINTR is returned if the system 434 * call should be interrupted by the signal (return EINTR). 435 * 436 * The interlock is held until the scheduler_slock is acquired. The 437 * interlock will be locked before returning back to the caller 438 * unless the PNORELOCK flag is specified, in which case the 439 * interlock will always be unlocked upon return. 440 */ 441 int 442 ltsleep(volatile const void *ident, int priority, const char *wmesg, int timo, 443 volatile struct simplelock *interlock) 444 { 445 struct lwp *l = curlwp; 446 struct proc *p = l ? l->l_proc : NULL; 447 struct slpque *qp; 448 struct sadata_upcall *sau; 449 int sig, s; 450 int catch = priority & PCATCH; 451 int relock = (priority & PNORELOCK) == 0; 452 int exiterr = (priority & PNOEXITERR) == 0; 453 454 /* 455 * XXXSMP 456 * This is probably bogus. Figure out what the right 457 * thing to do here really is. 458 * Note that not sleeping if ltsleep is called with curlwp == NULL 459 * in the shutdown case is disgusting but partly necessary given 460 * how shutdown (barely) works. 461 */ 462 if (cold || (doing_shutdown && (panicstr || (l == NULL)))) { 463 /* 464 * After a panic, or during autoconfiguration, 465 * just give interrupts a chance, then just return; 466 * don't run any other procs or panic below, 467 * in case this is the idle process and already asleep. 468 */ 469 s = splhigh(); 470 splx(safepri); 471 splx(s); 472 if (interlock != NULL && relock == 0) 473 simple_unlock(interlock); 474 return (0); 475 } 476 477 KASSERT(p != NULL); 478 LOCK_ASSERT(interlock == NULL || simple_lock_held(interlock)); 479 480 #ifdef KTRACE 481 if (KTRPOINT(p, KTR_CSW)) 482 ktrcsw(l, 1, 0); 483 #endif 484 485 /* 486 * XXX We need to allocate the sadata_upcall structure here, 487 * XXX since we can't sleep while waiting for memory inside 488 * XXX sa_upcall(). It would be nice if we could safely 489 * XXX allocate the sadata_upcall structure on the stack, here. 490 */ 491 if (l->l_flag & L_SA) { 492 sau = sadata_upcall_alloc(0); 493 } else { 494 sau = NULL; 495 } 496 497 SCHED_LOCK(s); 498 499 #ifdef DIAGNOSTIC 500 if (ident == NULL) 501 panic("ltsleep: ident == NULL"); 502 if (l->l_stat != LSONPROC) 503 panic("ltsleep: l_stat %d != LSONPROC", l->l_stat); 504 if (l->l_back != NULL) 505 panic("ltsleep: p_back != NULL"); 506 #endif 507 508 l->l_wchan = ident; 509 l->l_wmesg = wmesg; 510 l->l_slptime = 0; 511 l->l_priority = priority & PRIMASK; 512 513 qp = SLPQUE(ident); 514 if (qp->sq_head == 0) 515 qp->sq_head = l; 516 else { 517 *qp->sq_tailp = l; 518 } 519 *(qp->sq_tailp = &l->l_forw) = 0; 520 521 if (timo) 522 callout_reset(&l->l_tsleep_ch, timo, endtsleep, l); 523 524 /* 525 * We can now release the interlock; the scheduler_slock 526 * is held, so a thread can't get in to do wakeup() before 527 * we do the switch. 528 * 529 * XXX We leave the code block here, after inserting ourselves 530 * on the sleep queue, because we might want a more clever 531 * data structure for the sleep queues at some point. 532 */ 533 if (interlock != NULL) 534 simple_unlock(interlock); 535 536 /* 537 * We put ourselves on the sleep queue and start our timeout 538 * before calling CURSIG, as we could stop there, and a wakeup 539 * or a SIGCONT (or both) could occur while we were stopped. 540 * A SIGCONT would cause us to be marked as SSLEEP 541 * without resuming us, thus we must be ready for sleep 542 * when CURSIG is called. If the wakeup happens while we're 543 * stopped, p->p_wchan will be 0 upon return from CURSIG. 544 */ 545 if (catch) { 546 XXX_SCHED_UNLOCK; 547 l->l_flag |= L_SINTR; 548 if (((sig = CURSIG(l)) != 0) || 549 ((p->p_flag & P_WEXIT) && p->p_nlwps > 1)) { 550 XXX_SCHED_LOCK; 551 if (l->l_wchan != NULL) 552 unsleep(l); 553 l->l_stat = LSONPROC; 554 SCHED_UNLOCK(s); 555 goto resume; 556 } 557 XXX_SCHED_LOCK; 558 if (l->l_wchan == NULL) { 559 SCHED_UNLOCK(s); 560 catch = 0; 561 goto resume; 562 } 563 } else 564 sig = 0; 565 l->l_stat = LSSLEEP; 566 p->p_nrlwps--; 567 p->p_stats->p_ru.ru_nvcsw++; 568 SCHED_ASSERT_LOCKED(); 569 if (l->l_flag & L_SA) 570 sa_switch(l, sau, SA_UPCALL_BLOCKED); 571 else 572 mi_switch(l, NULL); 573 574 #ifdef KERN_SYNCH_BPENDTSLEEP_LABEL 575 /* 576 * XXX 577 * gcc4 optimizer will duplicate this asm statement on some arch 578 * and it will cause a multiple symbol definition error in gas. 579 * the kernel Makefile is setup to use -fno-reorder-blocks if 580 * this option is set. 581 */ 582 /* handy breakpoint location after process "wakes" */ 583 __asm(".globl bpendtsleep\nbpendtsleep:"); 584 #endif 585 /* 586 * p->p_nrlwps is incremented by whoever made us runnable again, 587 * either setrunnable() or awaken(). 588 */ 589 590 SCHED_ASSERT_UNLOCKED(); 591 splx(s); 592 593 resume: 594 KDASSERT(l->l_cpu != NULL); 595 KDASSERT(l->l_cpu == curcpu()); 596 l->l_cpu->ci_schedstate.spc_curpriority = l->l_usrpri; 597 598 l->l_flag &= ~L_SINTR; 599 if (l->l_flag & L_TIMEOUT) { 600 l->l_flag &= ~(L_TIMEOUT|L_CANCELLED); 601 if (sig == 0) { 602 #ifdef KTRACE 603 if (KTRPOINT(p, KTR_CSW)) 604 ktrcsw(l, 0, 0); 605 #endif 606 if (relock && interlock != NULL) 607 simple_lock(interlock); 608 return (EWOULDBLOCK); 609 } 610 } else if (timo) 611 callout_stop(&l->l_tsleep_ch); 612 613 if (catch) { 614 const int cancelled = l->l_flag & L_CANCELLED; 615 l->l_flag &= ~L_CANCELLED; 616 if (sig != 0 || (sig = CURSIG(l)) != 0 || cancelled) { 617 #ifdef KTRACE 618 if (KTRPOINT(p, KTR_CSW)) 619 ktrcsw(l, 0, 0); 620 #endif 621 if (relock && interlock != NULL) 622 simple_lock(interlock); 623 /* 624 * If this sleep was canceled, don't let the syscall 625 * restart. 626 */ 627 if (cancelled || 628 (SIGACTION(p, sig).sa_flags & SA_RESTART) == 0) 629 return (EINTR); 630 return (ERESTART); 631 } 632 } 633 634 #ifdef KTRACE 635 if (KTRPOINT(p, KTR_CSW)) 636 ktrcsw(l, 0, 0); 637 #endif 638 if (relock && interlock != NULL) 639 simple_lock(interlock); 640 641 /* XXXNJW this is very much a kluge. 642 * revisit. a better way of preventing looping/hanging syscalls like 643 * wait4() and _lwp_wait() from wedging an exiting process 644 * would be preferred. 645 */ 646 if (catch && ((p->p_flag & P_WEXIT) && p->p_nlwps > 1 && exiterr)) 647 return (EINTR); 648 return (0); 649 } 650 651 /* 652 * Implement timeout for tsleep. 653 * If process hasn't been awakened (wchan non-zero), 654 * set timeout flag and undo the sleep. If proc 655 * is stopped, just unsleep so it will remain stopped. 656 */ 657 void 658 endtsleep(void *arg) 659 { 660 struct lwp *l; 661 int s; 662 663 l = (struct lwp *)arg; 664 SCHED_LOCK(s); 665 if (l->l_wchan) { 666 if (l->l_stat == LSSLEEP) 667 setrunnable(l); 668 else 669 unsleep(l); 670 l->l_flag |= L_TIMEOUT; 671 } 672 SCHED_UNLOCK(s); 673 } 674 675 /* 676 * Remove a process from its wait queue 677 */ 678 void 679 unsleep(struct lwp *l) 680 { 681 struct slpque *qp; 682 struct lwp **hp; 683 684 SCHED_ASSERT_LOCKED(); 685 686 if (l->l_wchan) { 687 hp = &(qp = SLPQUE(l->l_wchan))->sq_head; 688 while (*hp != l) 689 hp = &(*hp)->l_forw; 690 *hp = l->l_forw; 691 if (qp->sq_tailp == &l->l_forw) 692 qp->sq_tailp = hp; 693 l->l_wchan = 0; 694 } 695 } 696 697 inline void 698 sa_awaken(struct lwp *l) 699 { 700 701 SCHED_ASSERT_LOCKED(); 702 703 if (l == l->l_savp->savp_lwp && l->l_flag & L_SA_YIELD) 704 l->l_flag &= ~L_SA_IDLE; 705 } 706 707 /* 708 * Optimized-for-wakeup() version of setrunnable(). 709 */ 710 inline void 711 awaken(struct lwp *l) 712 { 713 714 SCHED_ASSERT_LOCKED(); 715 716 if (l->l_proc->p_sa) 717 sa_awaken(l); 718 719 if (l->l_slptime > 1) 720 updatepri(l); 721 l->l_slptime = 0; 722 l->l_stat = LSRUN; 723 l->l_proc->p_nrlwps++; 724 /* 725 * Since curpriority is a user priority, p->p_priority 726 * is always better than curpriority on the last CPU on 727 * which it ran. 728 * 729 * XXXSMP See affinity comment in resched_proc(). 730 */ 731 if (l->l_flag & L_INMEM) { 732 setrunqueue(l); 733 KASSERT(l->l_cpu != NULL); 734 need_resched(l->l_cpu); 735 } else 736 sched_wakeup(&proc0); 737 } 738 739 #if defined(MULTIPROCESSOR) || defined(LOCKDEBUG) 740 void 741 sched_unlock_idle(void) 742 { 743 744 simple_unlock(&sched_lock); 745 } 746 747 void 748 sched_lock_idle(void) 749 { 750 751 simple_lock(&sched_lock); 752 } 753 #endif /* MULTIPROCESSOR || LOCKDEBUG */ 754 755 /* 756 * Make all processes sleeping on the specified identifier runnable. 757 */ 758 759 void 760 wakeup(volatile const void *ident) 761 { 762 int s; 763 764 SCHED_ASSERT_UNLOCKED(); 765 766 SCHED_LOCK(s); 767 sched_wakeup(ident); 768 SCHED_UNLOCK(s); 769 } 770 771 void 772 sched_wakeup(volatile const void *ident) 773 { 774 struct slpque *qp; 775 struct lwp *l, **q; 776 777 SCHED_ASSERT_LOCKED(); 778 779 qp = SLPQUE(ident); 780 restart: 781 for (q = &qp->sq_head; (l = *q) != NULL; ) { 782 #ifdef DIAGNOSTIC 783 if (l->l_back || (l->l_stat != LSSLEEP && 784 l->l_stat != LSSTOP && l->l_stat != LSSUSPENDED)) 785 panic("wakeup"); 786 #endif 787 if (l->l_wchan == ident) { 788 l->l_wchan = 0; 789 *q = l->l_forw; 790 if (qp->sq_tailp == &l->l_forw) 791 qp->sq_tailp = q; 792 if (l->l_stat == LSSLEEP) { 793 awaken(l); 794 goto restart; 795 } 796 } else 797 q = &l->l_forw; 798 } 799 } 800 801 /* 802 * Make the highest priority process first in line on the specified 803 * identifier runnable. 804 */ 805 void 806 wakeup_one(volatile const void *ident) 807 { 808 struct slpque *qp; 809 struct lwp *l, **q; 810 struct lwp *best_sleepp, **best_sleepq; 811 struct lwp *best_stopp, **best_stopq; 812 int s; 813 814 best_sleepp = best_stopp = NULL; 815 best_sleepq = best_stopq = NULL; 816 817 SCHED_LOCK(s); 818 819 qp = SLPQUE(ident); 820 821 for (q = &qp->sq_head; (l = *q) != NULL; q = &l->l_forw) { 822 #ifdef DIAGNOSTIC 823 if (l->l_back || (l->l_stat != LSSLEEP && 824 l->l_stat != LSSTOP && l->l_stat != LSSUSPENDED)) 825 panic("wakeup_one"); 826 #endif 827 if (l->l_wchan == ident) { 828 if (l->l_stat == LSSLEEP) { 829 if (best_sleepp == NULL || 830 l->l_priority < best_sleepp->l_priority) { 831 best_sleepp = l; 832 best_sleepq = q; 833 } 834 } else { 835 if (best_stopp == NULL || 836 l->l_priority < best_stopp->l_priority) { 837 best_stopp = l; 838 best_stopq = q; 839 } 840 } 841 } 842 } 843 844 /* 845 * Consider any SSLEEP process higher than the highest priority SSTOP 846 * process. 847 */ 848 if (best_sleepp != NULL) { 849 l = best_sleepp; 850 q = best_sleepq; 851 } else { 852 l = best_stopp; 853 q = best_stopq; 854 } 855 856 if (l != NULL) { 857 l->l_wchan = NULL; 858 *q = l->l_forw; 859 if (qp->sq_tailp == &l->l_forw) 860 qp->sq_tailp = q; 861 if (l->l_stat == LSSLEEP) 862 awaken(l); 863 } 864 SCHED_UNLOCK(s); 865 } 866 867 /* 868 * General yield call. Puts the current process back on its run queue and 869 * performs a voluntary context switch. Should only be called when the 870 * current process explicitly requests it (eg sched_yield(2) in compat code). 871 */ 872 void 873 yield(void) 874 { 875 struct lwp *l = curlwp; 876 int s; 877 878 SCHED_LOCK(s); 879 l->l_priority = l->l_usrpri; 880 l->l_stat = LSRUN; 881 setrunqueue(l); 882 l->l_proc->p_stats->p_ru.ru_nvcsw++; 883 mi_switch(l, NULL); 884 SCHED_ASSERT_UNLOCKED(); 885 splx(s); 886 } 887 888 /* 889 * General preemption call. Puts the current process back on its run queue 890 * and performs an involuntary context switch. 891 * The 'more' ("more work to do") argument is boolean. Returning to userspace 892 * preempt() calls pass 0. "Voluntary" preemptions in e.g. uiomove() pass 1. 893 * This will be used to indicate to the SA subsystem that the LWP is 894 * not yet finished in the kernel. 895 */ 896 897 void 898 preempt(int more) 899 { 900 struct lwp *l = curlwp; 901 int r, s; 902 903 SCHED_LOCK(s); 904 l->l_priority = l->l_usrpri; 905 l->l_stat = LSRUN; 906 setrunqueue(l); 907 l->l_proc->p_stats->p_ru.ru_nivcsw++; 908 r = mi_switch(l, NULL); 909 SCHED_ASSERT_UNLOCKED(); 910 splx(s); 911 if ((l->l_flag & L_SA) != 0 && r != 0 && more == 0) 912 sa_preempt(l); 913 } 914 915 /* 916 * The machine independent parts of context switch. 917 * Must be called at splsched() (no higher!) and with 918 * the sched_lock held. 919 * Switch to "new" if non-NULL, otherwise let cpu_switch choose 920 * the next lwp. 921 * 922 * Returns 1 if another process was actually run. 923 */ 924 int 925 mi_switch(struct lwp *l, struct lwp *newl) 926 { 927 struct schedstate_percpu *spc; 928 struct rlimit *rlim; 929 long s, u; 930 struct timeval tv; 931 int hold_count; 932 struct proc *p = l->l_proc; 933 int retval; 934 935 SCHED_ASSERT_LOCKED(); 936 937 /* 938 * Release the kernel_lock, as we are about to yield the CPU. 939 * The scheduler lock is still held until cpu_switch() 940 * selects a new process and removes it from the run queue. 941 */ 942 hold_count = KERNEL_LOCK_RELEASE_ALL(); 943 944 KDASSERT(l->l_cpu != NULL); 945 KDASSERT(l->l_cpu == curcpu()); 946 947 spc = &l->l_cpu->ci_schedstate; 948 949 #ifdef LOCKDEBUG 950 spinlock_switchcheck(); 951 simple_lock_switchcheck(); 952 #endif 953 954 /* 955 * Compute the amount of time during which the current 956 * process was running. 957 */ 958 microtime(&tv); 959 u = p->p_rtime.tv_usec + 960 (tv.tv_usec - spc->spc_runtime.tv_usec); 961 s = p->p_rtime.tv_sec + (tv.tv_sec - spc->spc_runtime.tv_sec); 962 if (u < 0) { 963 u += 1000000; 964 s--; 965 } else if (u >= 1000000) { 966 u -= 1000000; 967 s++; 968 } 969 p->p_rtime.tv_usec = u; 970 p->p_rtime.tv_sec = s; 971 972 /* 973 * Process is about to yield the CPU; clear the appropriate 974 * scheduling flags. 975 */ 976 spc->spc_flags &= ~SPCF_SWITCHCLEAR; 977 978 #ifdef KSTACK_CHECK_MAGIC 979 kstack_check_magic(l); 980 #endif 981 982 /* 983 * If we are using h/w performance counters, save context. 984 */ 985 #if PERFCTRS 986 if (PMC_ENABLED(p)) { 987 pmc_save_context(p); 988 } 989 #endif 990 991 /* 992 * Switch to the new current process. When we 993 * run again, we'll return back here. 994 */ 995 uvmexp.swtch++; 996 if (newl == NULL) { 997 retval = cpu_switch(l, NULL); 998 } else { 999 remrunqueue(newl); 1000 cpu_switchto(l, newl); 1001 retval = 0; 1002 } 1003 1004 /* 1005 * If we are using h/w performance counters, restore context. 1006 */ 1007 #if PERFCTRS 1008 if (PMC_ENABLED(p)) { 1009 pmc_restore_context(p); 1010 } 1011 #endif 1012 1013 /* 1014 * Make sure that MD code released the scheduler lock before 1015 * resuming us. 1016 */ 1017 SCHED_ASSERT_UNLOCKED(); 1018 1019 /* 1020 * We're running again; record our new start time. We might 1021 * be running on a new CPU now, so don't use the cache'd 1022 * schedstate_percpu pointer. 1023 */ 1024 KDASSERT(l->l_cpu != NULL); 1025 KDASSERT(l->l_cpu == curcpu()); 1026 microtime(&l->l_cpu->ci_schedstate.spc_runtime); 1027 1028 /* 1029 * Reacquire the kernel_lock now. We do this after we've 1030 * released the scheduler lock to avoid deadlock, and before 1031 * we reacquire the interlock. 1032 */ 1033 KERNEL_LOCK_ACQUIRE_COUNT(hold_count); 1034 1035 /* 1036 * Check if the process exceeds its CPU resource allocation. 1037 * If over max, kill it. In any case, if it has run for more 1038 * than 10 minutes, reduce priority to give others a chance. 1039 */ 1040 rlim = &p->p_rlimit[RLIMIT_CPU]; 1041 if (s >= rlim->rlim_cur) { 1042 if (s >= rlim->rlim_max) { 1043 psignal(p, SIGKILL); 1044 } else { 1045 psignal(p, SIGXCPU); 1046 if (rlim->rlim_cur < rlim->rlim_max) 1047 rlim->rlim_cur += 5; 1048 } 1049 } 1050 if (autonicetime && s > autonicetime && 1051 kauth_cred_geteuid(p->p_cred) && p->p_nice == NZERO) { 1052 SCHED_LOCK(s); 1053 p->p_nice = autoniceval + NZERO; 1054 resetpriority(l); 1055 SCHED_UNLOCK(s); 1056 } 1057 1058 return retval; 1059 } 1060 1061 /* 1062 * Initialize the (doubly-linked) run queues 1063 * to be empty. 1064 */ 1065 void 1066 rqinit() 1067 { 1068 int i; 1069 1070 for (i = 0; i < RUNQUE_NQS; i++) 1071 sched_qs[i].ph_link = sched_qs[i].ph_rlink = 1072 (struct lwp *)&sched_qs[i]; 1073 } 1074 1075 static inline void 1076 resched_proc(struct lwp *l, u_char pri) 1077 { 1078 struct cpu_info *ci; 1079 1080 /* 1081 * XXXSMP 1082 * Since l->l_cpu persists across a context switch, 1083 * this gives us *very weak* processor affinity, in 1084 * that we notify the CPU on which the process last 1085 * ran that it should try to switch. 1086 * 1087 * This does not guarantee that the process will run on 1088 * that processor next, because another processor might 1089 * grab it the next time it performs a context switch. 1090 * 1091 * This also does not handle the case where its last 1092 * CPU is running a higher-priority process, but every 1093 * other CPU is running a lower-priority process. There 1094 * are ways to handle this situation, but they're not 1095 * currently very pretty, and we also need to weigh the 1096 * cost of moving a process from one CPU to another. 1097 * 1098 * XXXSMP 1099 * There is also the issue of locking the other CPU's 1100 * sched state, which we currently do not do. 1101 */ 1102 ci = (l->l_cpu != NULL) ? l->l_cpu : curcpu(); 1103 if (pri < ci->ci_schedstate.spc_curpriority) 1104 need_resched(ci); 1105 } 1106 1107 /* 1108 * Change process state to be runnable, 1109 * placing it on the run queue if it is in memory, 1110 * and awakening the swapper if it isn't in memory. 1111 */ 1112 void 1113 setrunnable(struct lwp *l) 1114 { 1115 struct proc *p = l->l_proc; 1116 1117 SCHED_ASSERT_LOCKED(); 1118 1119 switch (l->l_stat) { 1120 case 0: 1121 case LSRUN: 1122 case LSONPROC: 1123 case LSZOMB: 1124 case LSDEAD: 1125 default: 1126 panic("setrunnable: lwp %p state was %d", l, l->l_stat); 1127 case LSSTOP: 1128 /* 1129 * If we're being traced (possibly because someone attached us 1130 * while we were stopped), check for a signal from the debugger. 1131 */ 1132 if ((p->p_flag & P_TRACED) != 0 && p->p_xstat != 0) { 1133 sigaddset(&p->p_sigctx.ps_siglist, p->p_xstat); 1134 CHECKSIGS(p); 1135 } 1136 case LSSLEEP: 1137 unsleep(l); /* e.g. when sending signals */ 1138 break; 1139 1140 case LSIDL: 1141 break; 1142 case LSSUSPENDED: 1143 break; 1144 } 1145 1146 if (l->l_proc->p_sa) 1147 sa_awaken(l); 1148 1149 l->l_stat = LSRUN; 1150 p->p_nrlwps++; 1151 1152 if (l->l_flag & L_INMEM) 1153 setrunqueue(l); 1154 1155 if (l->l_slptime > 1) 1156 updatepri(l); 1157 l->l_slptime = 0; 1158 if ((l->l_flag & L_INMEM) == 0) 1159 sched_wakeup((caddr_t)&proc0); 1160 else 1161 resched_proc(l, l->l_priority); 1162 } 1163 1164 /* 1165 * Compute the priority of a process when running in user mode. 1166 * Arrange to reschedule if the resulting priority is better 1167 * than that of the current process. 1168 */ 1169 void 1170 resetpriority(struct lwp *l) 1171 { 1172 unsigned int newpriority; 1173 struct proc *p = l->l_proc; 1174 1175 SCHED_ASSERT_LOCKED(); 1176 1177 newpriority = PUSER + (p->p_estcpu >> ESTCPU_SHIFT) + 1178 NICE_WEIGHT * (p->p_nice - NZERO); 1179 newpriority = min(newpriority, MAXPRI); 1180 l->l_usrpri = newpriority; 1181 resched_proc(l, l->l_usrpri); 1182 } 1183 1184 /* 1185 * Recompute priority for all LWPs in a process. 1186 */ 1187 void 1188 resetprocpriority(struct proc *p) 1189 { 1190 struct lwp *l; 1191 1192 LIST_FOREACH(l, &p->p_lwps, l_sibling) 1193 resetpriority(l); 1194 } 1195 1196 /* 1197 * We adjust the priority of the current process. The priority of a process 1198 * gets worse as it accumulates CPU time. The CPU usage estimator (p_estcpu) 1199 * is increased here. The formula for computing priorities (in kern_synch.c) 1200 * will compute a different value each time p_estcpu increases. This can 1201 * cause a switch, but unless the priority crosses a PPQ boundary the actual 1202 * queue will not change. The CPU usage estimator ramps up quite quickly 1203 * when the process is running (linearly), and decays away exponentially, at 1204 * a rate which is proportionally slower when the system is busy. The basic 1205 * principle is that the system will 90% forget that the process used a lot 1206 * of CPU time in 5 * loadav seconds. This causes the system to favor 1207 * processes which haven't run much recently, and to round-robin among other 1208 * processes. 1209 */ 1210 1211 void 1212 schedclock(struct lwp *l) 1213 { 1214 struct proc *p = l->l_proc; 1215 int s; 1216 1217 p->p_estcpu = ESTCPULIM(p->p_estcpu + (1 << ESTCPU_SHIFT)); 1218 SCHED_LOCK(s); 1219 resetpriority(l); 1220 SCHED_UNLOCK(s); 1221 1222 if (l->l_priority >= PUSER) 1223 l->l_priority = l->l_usrpri; 1224 } 1225 1226 void 1227 suspendsched() 1228 { 1229 struct lwp *l; 1230 int s; 1231 1232 /* 1233 * Convert all non-P_SYSTEM LSSLEEP or LSRUN processes to 1234 * LSSUSPENDED. 1235 */ 1236 proclist_lock_read(); 1237 SCHED_LOCK(s); 1238 LIST_FOREACH(l, &alllwp, l_list) { 1239 if ((l->l_proc->p_flag & P_SYSTEM) != 0) 1240 continue; 1241 1242 switch (l->l_stat) { 1243 case LSRUN: 1244 l->l_proc->p_nrlwps--; 1245 if ((l->l_flag & L_INMEM) != 0) 1246 remrunqueue(l); 1247 /* FALLTHROUGH */ 1248 case LSSLEEP: 1249 l->l_stat = LSSUSPENDED; 1250 break; 1251 case LSONPROC: 1252 /* 1253 * XXX SMP: we need to deal with processes on 1254 * others CPU ! 1255 */ 1256 break; 1257 default: 1258 break; 1259 } 1260 } 1261 SCHED_UNLOCK(s); 1262 proclist_unlock_read(); 1263 } 1264 1265 /* 1266 * scheduler_fork_hook: 1267 * 1268 * Inherit the parent's scheduler history. 1269 */ 1270 void 1271 scheduler_fork_hook(struct proc *parent, struct proc *child) 1272 { 1273 1274 child->p_estcpu = child->p_estcpu_inherited = parent->p_estcpu; 1275 child->p_forktime = schedcpu_ticks; 1276 } 1277 1278 /* 1279 * scheduler_wait_hook: 1280 * 1281 * Chargeback parents for the sins of their children. 1282 */ 1283 void 1284 scheduler_wait_hook(struct proc *parent, struct proc *child) 1285 { 1286 fixpt_t loadfac = loadfactor(averunnable.ldavg[0]); 1287 fixpt_t estcpu; 1288 1289 /* XXX Only if parent != init?? */ 1290 1291 estcpu = decay_cpu_batch(loadfac, child->p_estcpu_inherited, 1292 schedcpu_ticks - child->p_forktime); 1293 if (child->p_estcpu > estcpu) { 1294 parent->p_estcpu = 1295 ESTCPULIM(parent->p_estcpu + child->p_estcpu - estcpu); 1296 } 1297 } 1298 1299 /* 1300 * Low-level routines to access the run queue. Optimised assembler 1301 * routines can override these. 1302 */ 1303 1304 #ifndef __HAVE_MD_RUNQUEUE 1305 1306 /* 1307 * On some architectures, it's faster to use a MSB ordering for the priorites 1308 * than the traditional LSB ordering. 1309 */ 1310 #ifdef __HAVE_BIGENDIAN_BITOPS 1311 #define RQMASK(n) (0x80000000 >> (n)) 1312 #else 1313 #define RQMASK(n) (0x00000001 << (n)) 1314 #endif 1315 1316 /* 1317 * The primitives that manipulate the run queues. whichqs tells which 1318 * of the 32 queues qs have processes in them. Setrunqueue puts processes 1319 * into queues, remrunqueue removes them from queues. The running process is 1320 * on no queue, other processes are on a queue related to p->p_priority, 1321 * divided by 4 actually to shrink the 0-127 range of priorities into the 32 1322 * available queues. 1323 */ 1324 1325 #ifdef RQDEBUG 1326 static void 1327 checkrunqueue(int whichq, struct lwp *l) 1328 { 1329 const struct prochd * const rq = &sched_qs[whichq]; 1330 struct lwp *l2; 1331 int found = 0; 1332 int die = 0; 1333 int empty = 1; 1334 for (l2 = rq->ph_link; l2 != (const void*) rq; l2 = l2->l_forw) { 1335 if (l2->l_stat != LSRUN) { 1336 printf("checkrunqueue[%d]: lwp %p state (%d) " 1337 " != LSRUN\n", whichq, l2, l2->l_stat); 1338 } 1339 if (l2->l_back->l_forw != l2) { 1340 printf("checkrunqueue[%d]: lwp %p back-qptr (%p) " 1341 "corrupt %p\n", whichq, l2, l2->l_back, 1342 l2->l_back->l_forw); 1343 die = 1; 1344 } 1345 if (l2->l_forw->l_back != l2) { 1346 printf("checkrunqueue[%d]: lwp %p forw-qptr (%p) " 1347 "corrupt %p\n", whichq, l2, l2->l_forw, 1348 l2->l_forw->l_back); 1349 die = 1; 1350 } 1351 if (l2 == l) 1352 found = 1; 1353 empty = 0; 1354 } 1355 if (empty && (sched_whichqs & RQMASK(whichq)) != 0) { 1356 printf("checkrunqueue[%d]: bit set for empty run-queue %p\n", 1357 whichq, rq); 1358 die = 1; 1359 } else if (!empty && (sched_whichqs & RQMASK(whichq)) == 0) { 1360 printf("checkrunqueue[%d]: bit clear for non-empty " 1361 "run-queue %p\n", whichq, rq); 1362 die = 1; 1363 } 1364 if (l != NULL && (sched_whichqs & RQMASK(whichq)) == 0) { 1365 printf("checkrunqueue[%d]: bit clear for active lwp %p\n", 1366 whichq, l); 1367 die = 1; 1368 } 1369 if (l != NULL && empty) { 1370 printf("checkrunqueue[%d]: empty run-queue %p with " 1371 "active lwp %p\n", whichq, rq, l); 1372 die = 1; 1373 } 1374 if (l != NULL && !found) { 1375 printf("checkrunqueue[%d]: lwp %p not in runqueue %p!", 1376 whichq, l, rq); 1377 die = 1; 1378 } 1379 if (die) 1380 panic("checkrunqueue: inconsistency found"); 1381 } 1382 #endif /* RQDEBUG */ 1383 1384 void 1385 setrunqueue(struct lwp *l) 1386 { 1387 struct prochd *rq; 1388 struct lwp *prev; 1389 const int whichq = l->l_priority / PPQ; 1390 1391 #ifdef RQDEBUG 1392 checkrunqueue(whichq, NULL); 1393 #endif 1394 #ifdef DIAGNOSTIC 1395 if (l->l_back != NULL || l->l_wchan != NULL || l->l_stat != LSRUN) 1396 panic("setrunqueue"); 1397 #endif 1398 sched_whichqs |= RQMASK(whichq); 1399 rq = &sched_qs[whichq]; 1400 prev = rq->ph_rlink; 1401 l->l_forw = (struct lwp *)rq; 1402 rq->ph_rlink = l; 1403 prev->l_forw = l; 1404 l->l_back = prev; 1405 #ifdef RQDEBUG 1406 checkrunqueue(whichq, l); 1407 #endif 1408 } 1409 1410 void 1411 remrunqueue(struct lwp *l) 1412 { 1413 struct lwp *prev, *next; 1414 const int whichq = l->l_priority / PPQ; 1415 #ifdef RQDEBUG 1416 checkrunqueue(whichq, l); 1417 #endif 1418 #ifdef DIAGNOSTIC 1419 if (((sched_whichqs & RQMASK(whichq)) == 0)) 1420 panic("remrunqueue: bit %d not set", whichq); 1421 #endif 1422 prev = l->l_back; 1423 l->l_back = NULL; 1424 next = l->l_forw; 1425 prev->l_forw = next; 1426 next->l_back = prev; 1427 if (prev == next) 1428 sched_whichqs &= ~RQMASK(whichq); 1429 #ifdef RQDEBUG 1430 checkrunqueue(whichq, NULL); 1431 #endif 1432 } 1433 1434 #undef RQMASK 1435 #endif /* !defined(__HAVE_MD_RUNQUEUE) */ 1436