1 /* $NetBSD: kern_synch.c,v 1.107 2001/11/30 16:21:16 kleink Exp $ */ 2 3 /*- 4 * Copyright (c) 1999, 2000 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 9 * NASA Ames Research Center. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. All advertising materials mentioning features or use of this software 20 * must display the following acknowledgement: 21 * This product includes software developed by the NetBSD 22 * Foundation, Inc. and its contributors. 23 * 4. Neither the name of The NetBSD Foundation nor the names of its 24 * contributors may be used to endorse or promote products derived 25 * from this software without specific prior written permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 28 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 29 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 30 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 31 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 32 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 33 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 34 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 35 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 36 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 37 * POSSIBILITY OF SUCH DAMAGE. 38 */ 39 40 /*- 41 * Copyright (c) 1982, 1986, 1990, 1991, 1993 42 * The Regents of the University of California. All rights reserved. 43 * (c) UNIX System Laboratories, Inc. 44 * All or some portions of this file are derived from material licensed 45 * to the University of California by American Telephone and Telegraph 46 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 47 * the permission of UNIX System Laboratories, Inc. 48 * 49 * Redistribution and use in source and binary forms, with or without 50 * modification, are permitted provided that the following conditions 51 * are met: 52 * 1. Redistributions of source code must retain the above copyright 53 * notice, this list of conditions and the following disclaimer. 54 * 2. Redistributions in binary form must reproduce the above copyright 55 * notice, this list of conditions and the following disclaimer in the 56 * documentation and/or other materials provided with the distribution. 57 * 3. All advertising materials mentioning features or use of this software 58 * must display the following acknowledgement: 59 * This product includes software developed by the University of 60 * California, Berkeley and its contributors. 61 * 4. Neither the name of the University nor the names of its contributors 62 * may be used to endorse or promote products derived from this software 63 * without specific prior written permission. 64 * 65 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 66 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 67 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 68 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 69 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 70 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 71 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 72 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 73 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 74 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 75 * SUCH DAMAGE. 76 * 77 * @(#)kern_synch.c 8.9 (Berkeley) 5/19/95 78 */ 79 80 #include <sys/cdefs.h> 81 __KERNEL_RCSID(0, "$NetBSD: kern_synch.c,v 1.107 2001/11/30 16:21:16 kleink Exp $"); 82 83 #include "opt_ddb.h" 84 #include "opt_ktrace.h" 85 #include "opt_lockdebug.h" 86 #include "opt_multiprocessor.h" 87 88 #include <sys/param.h> 89 #include <sys/systm.h> 90 #include <sys/callout.h> 91 #include <sys/proc.h> 92 #include <sys/kernel.h> 93 #include <sys/buf.h> 94 #include <sys/signalvar.h> 95 #include <sys/resourcevar.h> 96 #include <sys/sched.h> 97 98 #include <uvm/uvm_extern.h> 99 100 #ifdef KTRACE 101 #include <sys/ktrace.h> 102 #endif 103 104 #include <machine/cpu.h> 105 106 int lbolt; /* once a second sleep address */ 107 int rrticks; /* number of hardclock ticks per roundrobin() */ 108 109 /* 110 * The global scheduler state. 111 */ 112 struct prochd sched_qs[RUNQUE_NQS]; /* run queues */ 113 __volatile u_int32_t sched_whichqs; /* bitmap of non-empty queues */ 114 struct slpque sched_slpque[SLPQUE_TABLESIZE]; /* sleep queues */ 115 116 struct simplelock sched_lock = SIMPLELOCK_INITIALIZER; 117 #if defined(MULTIPROCESSOR) 118 struct lock kernel_lock; 119 #endif 120 121 void schedcpu(void *); 122 void updatepri(struct proc *); 123 void endtsleep(void *); 124 125 __inline void awaken(struct proc *); 126 127 struct callout schedcpu_ch = CALLOUT_INITIALIZER; 128 129 /* 130 * Force switch among equal priority processes every 100ms. 131 * Called from hardclock every hz/10 == rrticks hardclock ticks. 132 */ 133 /* ARGSUSED */ 134 void 135 roundrobin(struct cpu_info *ci) 136 { 137 struct schedstate_percpu *spc = &ci->ci_schedstate; 138 139 spc->spc_rrticks = rrticks; 140 141 if (curproc != NULL) { 142 if (spc->spc_flags & SPCF_SEENRR) { 143 /* 144 * The process has already been through a roundrobin 145 * without switching and may be hogging the CPU. 146 * Indicate that the process should yield. 147 */ 148 spc->spc_flags |= SPCF_SHOULDYIELD; 149 } else 150 spc->spc_flags |= SPCF_SEENRR; 151 } 152 need_resched(curcpu()); 153 } 154 155 /* 156 * Constants for digital decay and forget: 157 * 90% of (p_estcpu) usage in 5 * loadav time 158 * 95% of (p_pctcpu) usage in 60 seconds (load insensitive) 159 * Note that, as ps(1) mentions, this can let percentages 160 * total over 100% (I've seen 137.9% for 3 processes). 161 * 162 * Note that hardclock updates p_estcpu and p_cpticks independently. 163 * 164 * We wish to decay away 90% of p_estcpu in (5 * loadavg) seconds. 165 * That is, the system wants to compute a value of decay such 166 * that the following for loop: 167 * for (i = 0; i < (5 * loadavg); i++) 168 * p_estcpu *= decay; 169 * will compute 170 * p_estcpu *= 0.1; 171 * for all values of loadavg: 172 * 173 * Mathematically this loop can be expressed by saying: 174 * decay ** (5 * loadavg) ~= .1 175 * 176 * The system computes decay as: 177 * decay = (2 * loadavg) / (2 * loadavg + 1) 178 * 179 * We wish to prove that the system's computation of decay 180 * will always fulfill the equation: 181 * decay ** (5 * loadavg) ~= .1 182 * 183 * If we compute b as: 184 * b = 2 * loadavg 185 * then 186 * decay = b / (b + 1) 187 * 188 * We now need to prove two things: 189 * 1) Given factor ** (5 * loadavg) ~= .1, prove factor == b/(b+1) 190 * 2) Given b/(b+1) ** power ~= .1, prove power == (5 * loadavg) 191 * 192 * Facts: 193 * For x close to zero, exp(x) =~ 1 + x, since 194 * exp(x) = 0! + x**1/1! + x**2/2! + ... . 195 * therefore exp(-1/b) =~ 1 - (1/b) = (b-1)/b. 196 * For x close to zero, ln(1+x) =~ x, since 197 * ln(1+x) = x - x**2/2 + x**3/3 - ... -1 < x < 1 198 * therefore ln(b/(b+1)) = ln(1 - 1/(b+1)) =~ -1/(b+1). 199 * ln(.1) =~ -2.30 200 * 201 * Proof of (1): 202 * Solve (factor)**(power) =~ .1 given power (5*loadav): 203 * solving for factor, 204 * ln(factor) =~ (-2.30/5*loadav), or 205 * factor =~ exp(-1/((5/2.30)*loadav)) =~ exp(-1/(2*loadav)) = 206 * exp(-1/b) =~ (b-1)/b =~ b/(b+1). QED 207 * 208 * Proof of (2): 209 * Solve (factor)**(power) =~ .1 given factor == (b/(b+1)): 210 * solving for power, 211 * power*ln(b/(b+1)) =~ -2.30, or 212 * power =~ 2.3 * (b + 1) = 4.6*loadav + 2.3 =~ 5*loadav. QED 213 * 214 * Actual power values for the implemented algorithm are as follows: 215 * loadav: 1 2 3 4 216 * power: 5.68 10.32 14.94 19.55 217 */ 218 219 /* calculations for digital decay to forget 90% of usage in 5*loadav sec */ 220 #define loadfactor(loadav) (2 * (loadav)) 221 #define decay_cpu(loadfac, cpu) (((loadfac) * (cpu)) / ((loadfac) + FSCALE)) 222 223 /* decay 95% of `p_pctcpu' in 60 seconds; see CCPU_SHIFT before changing */ 224 fixpt_t ccpu = 0.95122942450071400909 * FSCALE; /* exp(-1/20) */ 225 226 /* 227 * If `ccpu' is not equal to `exp(-1/20)' and you still want to use the 228 * faster/more-accurate formula, you'll have to estimate CCPU_SHIFT below 229 * and possibly adjust FSHIFT in "param.h" so that (FSHIFT >= CCPU_SHIFT). 230 * 231 * To estimate CCPU_SHIFT for exp(-1/20), the following formula was used: 232 * 1 - exp(-1/20) ~= 0.0487 ~= 0.0488 == 1 (fixed pt, *11* bits). 233 * 234 * If you dont want to bother with the faster/more-accurate formula, you 235 * can set CCPU_SHIFT to (FSHIFT + 1) which will use a slower/less-accurate 236 * (more general) method of calculating the %age of CPU used by a process. 237 */ 238 #define CCPU_SHIFT 11 239 240 /* 241 * Recompute process priorities, every hz ticks. 242 */ 243 /* ARGSUSED */ 244 void 245 schedcpu(void *arg) 246 { 247 fixpt_t loadfac = loadfactor(averunnable.ldavg[0]); 248 struct proc *p; 249 int s, s1; 250 unsigned int newcpu; 251 int clkhz; 252 253 proclist_lock_read(); 254 for (p = allproc.lh_first; p != 0; p = p->p_list.le_next) { 255 /* 256 * Increment time in/out of memory and sleep time 257 * (if sleeping). We ignore overflow; with 16-bit int's 258 * (remember them?) overflow takes 45 days. 259 */ 260 p->p_swtime++; 261 if (p->p_stat == SSLEEP || p->p_stat == SSTOP) 262 p->p_slptime++; 263 p->p_pctcpu = (p->p_pctcpu * ccpu) >> FSHIFT; 264 /* 265 * If the process has slept the entire second, 266 * stop recalculating its priority until it wakes up. 267 */ 268 if (p->p_slptime > 1) 269 continue; 270 s = splstatclock(); /* prevent state changes */ 271 /* 272 * p_pctcpu is only for ps. 273 */ 274 clkhz = stathz != 0 ? stathz : hz; 275 #if (FSHIFT >= CCPU_SHIFT) 276 p->p_pctcpu += (clkhz == 100)? 277 ((fixpt_t) p->p_cpticks) << (FSHIFT - CCPU_SHIFT): 278 100 * (((fixpt_t) p->p_cpticks) 279 << (FSHIFT - CCPU_SHIFT)) / clkhz; 280 #else 281 p->p_pctcpu += ((FSCALE - ccpu) * 282 (p->p_cpticks * FSCALE / clkhz)) >> FSHIFT; 283 #endif 284 p->p_cpticks = 0; 285 newcpu = (u_int)decay_cpu(loadfac, p->p_estcpu); 286 p->p_estcpu = newcpu; 287 SCHED_LOCK(s1); 288 resetpriority(p); 289 if (p->p_priority >= PUSER) { 290 if (p->p_stat == SRUN && 291 (p->p_flag & P_INMEM) && 292 (p->p_priority / PPQ) != (p->p_usrpri / PPQ)) { 293 remrunqueue(p); 294 p->p_priority = p->p_usrpri; 295 setrunqueue(p); 296 } else 297 p->p_priority = p->p_usrpri; 298 } 299 SCHED_UNLOCK(s1); 300 splx(s); 301 } 302 proclist_unlock_read(); 303 uvm_meter(); 304 wakeup((caddr_t)&lbolt); 305 callout_reset(&schedcpu_ch, hz, schedcpu, NULL); 306 } 307 308 /* 309 * Recalculate the priority of a process after it has slept for a while. 310 * For all load averages >= 1 and max p_estcpu of 255, sleeping for at 311 * least six times the loadfactor will decay p_estcpu to zero. 312 */ 313 void 314 updatepri(struct proc *p) 315 { 316 unsigned int newcpu; 317 fixpt_t loadfac; 318 319 SCHED_ASSERT_LOCKED(); 320 321 newcpu = p->p_estcpu; 322 loadfac = loadfactor(averunnable.ldavg[0]); 323 324 if (p->p_slptime > 5 * loadfac) 325 p->p_estcpu = 0; 326 else { 327 p->p_slptime--; /* the first time was done in schedcpu */ 328 while (newcpu && --p->p_slptime) 329 newcpu = (int) decay_cpu(loadfac, newcpu); 330 p->p_estcpu = newcpu; 331 } 332 resetpriority(p); 333 } 334 335 /* 336 * During autoconfiguration or after a panic, a sleep will simply 337 * lower the priority briefly to allow interrupts, then return. 338 * The priority to be used (safepri) is machine-dependent, thus this 339 * value is initialized and maintained in the machine-dependent layers. 340 * This priority will typically be 0, or the lowest priority 341 * that is safe for use on the interrupt stack; it can be made 342 * higher to block network software interrupts after panics. 343 */ 344 int safepri; 345 346 /* 347 * General sleep call. Suspends the current process until a wakeup is 348 * performed on the specified identifier. The process will then be made 349 * runnable with the specified priority. Sleeps at most timo/hz seconds 350 * (0 means no timeout). If pri includes PCATCH flag, signals are checked 351 * before and after sleeping, else signals are not checked. Returns 0 if 352 * awakened, EWOULDBLOCK if the timeout expires. If PCATCH is set and a 353 * signal needs to be delivered, ERESTART is returned if the current system 354 * call should be restarted if possible, and EINTR is returned if the system 355 * call should be interrupted by the signal (return EINTR). 356 * 357 * The interlock is held until the scheduler_slock is acquired. The 358 * interlock will be locked before returning back to the caller 359 * unless the PNORELOCK flag is specified, in which case the 360 * interlock will always be unlocked upon return. 361 */ 362 int 363 ltsleep(void *ident, int priority, const char *wmesg, int timo, 364 __volatile struct simplelock *interlock) 365 { 366 struct proc *p = curproc; 367 struct slpque *qp; 368 int sig, s; 369 int catch = priority & PCATCH; 370 int relock = (priority & PNORELOCK) == 0; 371 372 /* 373 * XXXSMP 374 * This is probably bogus. Figure out what the right 375 * thing to do here really is. 376 * Note that not sleeping if ltsleep is called with curproc == NULL 377 * in the shutdown case is disgusting but partly necessary given 378 * how shutdown (barely) works. 379 */ 380 if (cold || (doing_shutdown && (panicstr || (p == NULL)))) { 381 /* 382 * After a panic, or during autoconfiguration, 383 * just give interrupts a chance, then just return; 384 * don't run any other procs or panic below, 385 * in case this is the idle process and already asleep. 386 */ 387 s = splhigh(); 388 splx(safepri); 389 splx(s); 390 if (interlock != NULL && relock == 0) 391 simple_unlock(interlock); 392 return (0); 393 } 394 395 KASSERT(p != NULL); 396 LOCK_ASSERT(interlock == NULL || simple_lock_held(interlock)); 397 398 #ifdef KTRACE 399 if (KTRPOINT(p, KTR_CSW)) 400 ktrcsw(p, 1, 0); 401 #endif 402 403 SCHED_LOCK(s); 404 405 #ifdef DIAGNOSTIC 406 if (ident == NULL) 407 panic("ltsleep: ident == NULL"); 408 if (p->p_stat != SONPROC) 409 panic("ltsleep: p_stat %d != SONPROC", p->p_stat); 410 if (p->p_back != NULL) 411 panic("ltsleep: p_back != NULL"); 412 #endif 413 414 p->p_wchan = ident; 415 p->p_wmesg = wmesg; 416 p->p_slptime = 0; 417 p->p_priority = priority & PRIMASK; 418 419 qp = SLPQUE(ident); 420 if (qp->sq_head == 0) 421 qp->sq_head = p; 422 else 423 *qp->sq_tailp = p; 424 *(qp->sq_tailp = &p->p_forw) = 0; 425 426 if (timo) 427 callout_reset(&p->p_tsleep_ch, timo, endtsleep, p); 428 429 /* 430 * We can now release the interlock; the scheduler_slock 431 * is held, so a thread can't get in to do wakeup() before 432 * we do the switch. 433 * 434 * XXX We leave the code block here, after inserting ourselves 435 * on the sleep queue, because we might want a more clever 436 * data structure for the sleep queues at some point. 437 */ 438 if (interlock != NULL) 439 simple_unlock(interlock); 440 441 /* 442 * We put ourselves on the sleep queue and start our timeout 443 * before calling CURSIG, as we could stop there, and a wakeup 444 * or a SIGCONT (or both) could occur while we were stopped. 445 * A SIGCONT would cause us to be marked as SSLEEP 446 * without resuming us, thus we must be ready for sleep 447 * when CURSIG is called. If the wakeup happens while we're 448 * stopped, p->p_wchan will be 0 upon return from CURSIG. 449 */ 450 if (catch) { 451 p->p_flag |= P_SINTR; 452 if ((sig = CURSIG(p)) != 0) { 453 if (p->p_wchan != NULL) 454 unsleep(p); 455 p->p_stat = SONPROC; 456 SCHED_UNLOCK(s); 457 goto resume; 458 } 459 if (p->p_wchan == NULL) { 460 catch = 0; 461 SCHED_UNLOCK(s); 462 goto resume; 463 } 464 } else 465 sig = 0; 466 p->p_stat = SSLEEP; 467 p->p_stats->p_ru.ru_nvcsw++; 468 469 SCHED_ASSERT_LOCKED(); 470 mi_switch(p); 471 472 #if defined(DDB) && !defined(GPROF) 473 /* handy breakpoint location after process "wakes" */ 474 __asm(".globl bpendtsleep ; bpendtsleep:"); 475 #endif 476 477 SCHED_ASSERT_UNLOCKED(); 478 splx(s); 479 480 resume: 481 KDASSERT(p->p_cpu != NULL); 482 KDASSERT(p->p_cpu == curcpu()); 483 p->p_cpu->ci_schedstate.spc_curpriority = p->p_usrpri; 484 485 p->p_flag &= ~P_SINTR; 486 if (p->p_flag & P_TIMEOUT) { 487 p->p_flag &= ~P_TIMEOUT; 488 if (sig == 0) { 489 #ifdef KTRACE 490 if (KTRPOINT(p, KTR_CSW)) 491 ktrcsw(p, 0, 0); 492 #endif 493 if (relock && interlock != NULL) 494 simple_lock(interlock); 495 return (EWOULDBLOCK); 496 } 497 } else if (timo) 498 callout_stop(&p->p_tsleep_ch); 499 if (catch && (sig != 0 || (sig = CURSIG(p)) != 0)) { 500 #ifdef KTRACE 501 if (KTRPOINT(p, KTR_CSW)) 502 ktrcsw(p, 0, 0); 503 #endif 504 if (relock && interlock != NULL) 505 simple_lock(interlock); 506 if ((SIGACTION(p, sig).sa_flags & SA_RESTART) == 0) 507 return (EINTR); 508 return (ERESTART); 509 } 510 #ifdef KTRACE 511 if (KTRPOINT(p, KTR_CSW)) 512 ktrcsw(p, 0, 0); 513 #endif 514 if (relock && interlock != NULL) 515 simple_lock(interlock); 516 return (0); 517 } 518 519 /* 520 * Implement timeout for tsleep. 521 * If process hasn't been awakened (wchan non-zero), 522 * set timeout flag and undo the sleep. If proc 523 * is stopped, just unsleep so it will remain stopped. 524 */ 525 void 526 endtsleep(void *arg) 527 { 528 struct proc *p; 529 int s; 530 531 p = (struct proc *)arg; 532 533 SCHED_LOCK(s); 534 if (p->p_wchan) { 535 if (p->p_stat == SSLEEP) 536 setrunnable(p); 537 else 538 unsleep(p); 539 p->p_flag |= P_TIMEOUT; 540 } 541 SCHED_UNLOCK(s); 542 } 543 544 /* 545 * Remove a process from its wait queue 546 */ 547 void 548 unsleep(struct proc *p) 549 { 550 struct slpque *qp; 551 struct proc **hp; 552 553 SCHED_ASSERT_LOCKED(); 554 555 if (p->p_wchan) { 556 hp = &(qp = SLPQUE(p->p_wchan))->sq_head; 557 while (*hp != p) 558 hp = &(*hp)->p_forw; 559 *hp = p->p_forw; 560 if (qp->sq_tailp == &p->p_forw) 561 qp->sq_tailp = hp; 562 p->p_wchan = 0; 563 } 564 } 565 566 /* 567 * Optimized-for-wakeup() version of setrunnable(). 568 */ 569 __inline void 570 awaken(struct proc *p) 571 { 572 573 SCHED_ASSERT_LOCKED(); 574 575 if (p->p_slptime > 1) 576 updatepri(p); 577 p->p_slptime = 0; 578 p->p_stat = SRUN; 579 580 /* 581 * Since curpriority is a user priority, p->p_priority 582 * is always better than curpriority. 583 */ 584 if (p->p_flag & P_INMEM) { 585 setrunqueue(p); 586 KASSERT(p->p_cpu != NULL); 587 need_resched(p->p_cpu); 588 } else 589 sched_wakeup(&proc0); 590 } 591 592 #if defined(MULTIPROCESSOR) || defined(LOCKDEBUG) 593 void 594 sched_unlock_idle(void) 595 { 596 597 simple_unlock(&sched_lock); 598 } 599 600 void 601 sched_lock_idle(void) 602 { 603 604 simple_lock(&sched_lock); 605 } 606 #endif /* MULTIPROCESSOR || LOCKDEBUG */ 607 608 /* 609 * Make all processes sleeping on the specified identifier runnable. 610 */ 611 612 void 613 wakeup(void *ident) 614 { 615 int s; 616 617 SCHED_ASSERT_UNLOCKED(); 618 619 SCHED_LOCK(s); 620 sched_wakeup(ident); 621 SCHED_UNLOCK(s); 622 } 623 624 void 625 sched_wakeup(void *ident) 626 { 627 struct slpque *qp; 628 struct proc *p, **q; 629 630 SCHED_ASSERT_LOCKED(); 631 632 qp = SLPQUE(ident); 633 restart: 634 for (q = &qp->sq_head; (p = *q) != NULL; ) { 635 #ifdef DIAGNOSTIC 636 if (p->p_back || (p->p_stat != SSLEEP && p->p_stat != SSTOP)) 637 panic("wakeup"); 638 #endif 639 if (p->p_wchan == ident) { 640 p->p_wchan = 0; 641 *q = p->p_forw; 642 if (qp->sq_tailp == &p->p_forw) 643 qp->sq_tailp = q; 644 if (p->p_stat == SSLEEP) { 645 awaken(p); 646 goto restart; 647 } 648 } else 649 q = &p->p_forw; 650 } 651 } 652 653 /* 654 * Make the highest priority process first in line on the specified 655 * identifier runnable. 656 */ 657 void 658 wakeup_one(void *ident) 659 { 660 struct slpque *qp; 661 struct proc *p, **q; 662 struct proc *best_sleepp, **best_sleepq; 663 struct proc *best_stopp, **best_stopq; 664 int s; 665 666 best_sleepp = best_stopp = NULL; 667 best_sleepq = best_stopq = NULL; 668 669 SCHED_LOCK(s); 670 671 qp = SLPQUE(ident); 672 673 for (q = &qp->sq_head; (p = *q) != NULL; q = &p->p_forw) { 674 #ifdef DIAGNOSTIC 675 if (p->p_back || (p->p_stat != SSLEEP && p->p_stat != SSTOP)) 676 panic("wakeup_one"); 677 #endif 678 if (p->p_wchan == ident) { 679 if (p->p_stat == SSLEEP) { 680 if (best_sleepp == NULL || 681 p->p_priority < best_sleepp->p_priority) { 682 best_sleepp = p; 683 best_sleepq = q; 684 } 685 } else { 686 if (best_stopp == NULL || 687 p->p_priority < best_stopp->p_priority) { 688 best_stopp = p; 689 best_stopq = q; 690 } 691 } 692 } 693 } 694 695 /* 696 * Consider any SSLEEP process higher than the highest priority SSTOP 697 * process. 698 */ 699 if (best_sleepp != NULL) { 700 p = best_sleepp; 701 q = best_sleepq; 702 } else { 703 p = best_stopp; 704 q = best_stopq; 705 } 706 707 if (p != NULL) { 708 p->p_wchan = NULL; 709 *q = p->p_forw; 710 if (qp->sq_tailp == &p->p_forw) 711 qp->sq_tailp = q; 712 if (p->p_stat == SSLEEP) 713 awaken(p); 714 } 715 SCHED_UNLOCK(s); 716 } 717 718 /* 719 * General yield call. Puts the current process back on its run queue and 720 * performs a voluntary context switch. 721 */ 722 void 723 yield(void) 724 { 725 struct proc *p = curproc; 726 int s; 727 728 SCHED_LOCK(s); 729 p->p_priority = p->p_usrpri; 730 p->p_stat = SRUN; 731 setrunqueue(p); 732 p->p_stats->p_ru.ru_nvcsw++; 733 mi_switch(p); 734 SCHED_ASSERT_UNLOCKED(); 735 splx(s); 736 } 737 738 /* 739 * General preemption call. Puts the current process back on its run queue 740 * and performs an involuntary context switch. If a process is supplied, 741 * we switch to that process. Otherwise, we use the normal process selection 742 * criteria. 743 */ 744 void 745 preempt(struct proc *newp) 746 { 747 struct proc *p = curproc; 748 int s; 749 750 /* 751 * XXX Switching to a specific process is not supported yet. 752 */ 753 if (newp != NULL) 754 panic("preempt: cpu_preempt not yet implemented"); 755 756 SCHED_LOCK(s); 757 p->p_priority = p->p_usrpri; 758 p->p_stat = SRUN; 759 setrunqueue(p); 760 p->p_stats->p_ru.ru_nivcsw++; 761 mi_switch(p); 762 SCHED_ASSERT_UNLOCKED(); 763 splx(s); 764 } 765 766 /* 767 * The machine independent parts of context switch. 768 * Must be called at splsched() (no higher!) and with 769 * the sched_lock held. 770 */ 771 void 772 mi_switch(struct proc *p) 773 { 774 struct schedstate_percpu *spc; 775 struct rlimit *rlim; 776 long s, u; 777 struct timeval tv; 778 #if defined(MULTIPROCESSOR) 779 int hold_count; 780 #endif 781 782 SCHED_ASSERT_LOCKED(); 783 784 #if defined(MULTIPROCESSOR) 785 /* 786 * Release the kernel_lock, as we are about to yield the CPU. 787 * The scheduler lock is still held until cpu_switch() 788 * selects a new process and removes it from the run queue. 789 */ 790 if (p->p_flag & P_BIGLOCK) 791 hold_count = spinlock_release_all(&kernel_lock); 792 #endif 793 794 KDASSERT(p->p_cpu != NULL); 795 KDASSERT(p->p_cpu == curcpu()); 796 797 spc = &p->p_cpu->ci_schedstate; 798 799 #if defined(LOCKDEBUG) || defined(DIAGNOSTIC) 800 spinlock_switchcheck(); 801 #endif 802 #ifdef LOCKDEBUG 803 simple_lock_switchcheck(); 804 #endif 805 806 /* 807 * Compute the amount of time during which the current 808 * process was running, and add that to its total so far. 809 */ 810 microtime(&tv); 811 u = p->p_rtime.tv_usec + (tv.tv_usec - spc->spc_runtime.tv_usec); 812 s = p->p_rtime.tv_sec + (tv.tv_sec - spc->spc_runtime.tv_sec); 813 if (u < 0) { 814 u += 1000000; 815 s--; 816 } else if (u >= 1000000) { 817 u -= 1000000; 818 s++; 819 } 820 p->p_rtime.tv_usec = u; 821 p->p_rtime.tv_sec = s; 822 823 /* 824 * Check if the process exceeds its cpu resource allocation. 825 * If over max, kill it. In any case, if it has run for more 826 * than 10 minutes, reduce priority to give others a chance. 827 */ 828 rlim = &p->p_rlimit[RLIMIT_CPU]; 829 if (s >= rlim->rlim_cur) { 830 /* 831 * XXXSMP: we're inside the scheduler lock perimeter; 832 * use sched_psignal. 833 */ 834 if (s >= rlim->rlim_max) 835 sched_psignal(p, SIGKILL); 836 else { 837 sched_psignal(p, SIGXCPU); 838 if (rlim->rlim_cur < rlim->rlim_max) 839 rlim->rlim_cur += 5; 840 } 841 } 842 if (autonicetime && s > autonicetime && p->p_ucred->cr_uid && 843 p->p_nice == NZERO) { 844 p->p_nice = autoniceval + NZERO; 845 resetpriority(p); 846 } 847 848 /* 849 * Process is about to yield the CPU; clear the appropriate 850 * scheduling flags. 851 */ 852 spc->spc_flags &= ~SPCF_SWITCHCLEAR; 853 854 /* 855 * Pick a new current process and switch to it. When we 856 * run again, we'll return back here. 857 */ 858 uvmexp.swtch++; 859 cpu_switch(p); 860 861 /* 862 * Make sure that MD code released the scheduler lock before 863 * resuming us. 864 */ 865 SCHED_ASSERT_UNLOCKED(); 866 867 /* 868 * We're running again; record our new start time. We might 869 * be running on a new CPU now, so don't use the cache'd 870 * schedstate_percpu pointer. 871 */ 872 KDASSERT(p->p_cpu != NULL); 873 KDASSERT(p->p_cpu == curcpu()); 874 microtime(&p->p_cpu->ci_schedstate.spc_runtime); 875 876 #if defined(MULTIPROCESSOR) 877 /* 878 * Reacquire the kernel_lock now. We do this after we've 879 * released the scheduler lock to avoid deadlock, and before 880 * we reacquire the interlock. 881 */ 882 if (p->p_flag & P_BIGLOCK) 883 spinlock_acquire_count(&kernel_lock, hold_count); 884 #endif 885 } 886 887 /* 888 * Initialize the (doubly-linked) run queues 889 * to be empty. 890 */ 891 void 892 rqinit() 893 { 894 int i; 895 896 for (i = 0; i < RUNQUE_NQS; i++) 897 sched_qs[i].ph_link = sched_qs[i].ph_rlink = 898 (struct proc *)&sched_qs[i]; 899 } 900 901 /* 902 * Change process state to be runnable, 903 * placing it on the run queue if it is in memory, 904 * and awakening the swapper if it isn't in memory. 905 */ 906 void 907 setrunnable(struct proc *p) 908 { 909 910 SCHED_ASSERT_LOCKED(); 911 912 switch (p->p_stat) { 913 case 0: 914 case SRUN: 915 case SONPROC: 916 case SZOMB: 917 case SDEAD: 918 default: 919 panic("setrunnable"); 920 case SSTOP: 921 /* 922 * If we're being traced (possibly because someone attached us 923 * while we were stopped), check for a signal from the debugger. 924 */ 925 if ((p->p_flag & P_TRACED) != 0 && p->p_xstat != 0) { 926 sigaddset(&p->p_sigctx.ps_siglist, p->p_xstat); 927 CHECKSIGS(p); 928 } 929 case SSLEEP: 930 unsleep(p); /* e.g. when sending signals */ 931 break; 932 933 case SIDL: 934 break; 935 } 936 p->p_stat = SRUN; 937 if (p->p_flag & P_INMEM) 938 setrunqueue(p); 939 940 if (p->p_slptime > 1) 941 updatepri(p); 942 p->p_slptime = 0; 943 if ((p->p_flag & P_INMEM) == 0) 944 sched_wakeup((caddr_t)&proc0); 945 else if (p->p_priority < curcpu()->ci_schedstate.spc_curpriority) { 946 /* 947 * XXXSMP 948 * This is not exactly right. Since p->p_cpu persists 949 * across a context switch, this gives us some sort 950 * of processor affinity. But we need to figure out 951 * at what point it's better to reschedule on a different 952 * CPU than the last one. 953 */ 954 need_resched((p->p_cpu != NULL) ? p->p_cpu : curcpu()); 955 } 956 } 957 958 /* 959 * Compute the priority of a process when running in user mode. 960 * Arrange to reschedule if the resulting priority is better 961 * than that of the current process. 962 */ 963 void 964 resetpriority(struct proc *p) 965 { 966 unsigned int newpriority; 967 968 SCHED_ASSERT_LOCKED(); 969 970 newpriority = PUSER + p->p_estcpu + NICE_WEIGHT * (p->p_nice - NZERO); 971 newpriority = min(newpriority, MAXPRI); 972 p->p_usrpri = newpriority; 973 if (newpriority < curcpu()->ci_schedstate.spc_curpriority) { 974 /* 975 * XXXSMP 976 * Same applies as in setrunnable() above. 977 */ 978 need_resched((p->p_cpu != NULL) ? p->p_cpu : curcpu()); 979 } 980 } 981 982 /* 983 * We adjust the priority of the current process. The priority of a process 984 * gets worse as it accumulates CPU time. The cpu usage estimator (p_estcpu) 985 * is increased here. The formula for computing priorities (in kern_synch.c) 986 * will compute a different value each time p_estcpu increases. This can 987 * cause a switch, but unless the priority crosses a PPQ boundary the actual 988 * queue will not change. The cpu usage estimator ramps up quite quickly 989 * when the process is running (linearly), and decays away exponentially, at 990 * a rate which is proportionally slower when the system is busy. The basic 991 * principle is that the system will 90% forget that the process used a lot 992 * of CPU time in 5 * loadav seconds. This causes the system to favor 993 * processes which haven't run much recently, and to round-robin among other 994 * processes. 995 */ 996 997 void 998 schedclock(struct proc *p) 999 { 1000 int s; 1001 1002 p->p_estcpu = ESTCPULIM(p->p_estcpu + 1); 1003 1004 SCHED_LOCK(s); 1005 resetpriority(p); 1006 SCHED_UNLOCK(s); 1007 1008 if (p->p_priority >= PUSER) 1009 p->p_priority = p->p_usrpri; 1010 } 1011 1012 void 1013 suspendsched() 1014 { 1015 struct proc *p; 1016 int s; 1017 1018 /* 1019 * Convert all non-P_SYSTEM SSLEEP or SRUN processes to SSTOP. 1020 */ 1021 proclist_lock_read(); 1022 SCHED_LOCK(s); 1023 for (p = LIST_FIRST(&allproc); p != NULL; p = LIST_NEXT(p, p_list)) { 1024 if ((p->p_flag & P_SYSTEM) != 0) 1025 continue; 1026 switch (p->p_stat) { 1027 case SRUN: 1028 if ((p->p_flag & P_INMEM) != 0) 1029 remrunqueue(p); 1030 /* FALLTHROUGH */ 1031 case SSLEEP: 1032 p->p_stat = SSTOP; 1033 break; 1034 case SONPROC: 1035 /* 1036 * XXX SMP: we need to deal with processes on 1037 * others CPU ! 1038 */ 1039 break; 1040 default: 1041 break; 1042 } 1043 } 1044 SCHED_UNLOCK(s); 1045 proclist_unlock_read(); 1046 } 1047