1 /* $OpenBSD: kern_synch.c,v 1.54 2004/01/26 01:27:02 deraadt Exp $ */ 2 /* $NetBSD: kern_synch.c,v 1.37 1996/04/22 01:38:37 christos Exp $ */ 3 4 /*- 5 * Copyright (c) 1982, 1986, 1990, 1991, 1993 6 * The Regents of the University of California. All rights reserved. 7 * (c) UNIX System Laboratories, Inc. 8 * All or some portions of this file are derived from material licensed 9 * to the University of California by American Telephone and Telegraph 10 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 11 * the permission of UNIX System Laboratories, Inc. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 3. Neither the name of the University nor the names of its contributors 22 * may be used to endorse or promote products derived from this software 23 * without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 * 37 * @(#)kern_synch.c 8.6 (Berkeley) 1/21/94 38 */ 39 40 #include <sys/param.h> 41 #include <sys/systm.h> 42 #include <sys/proc.h> 43 #include <sys/kernel.h> 44 #include <sys/buf.h> 45 #include <sys/signalvar.h> 46 #include <sys/resourcevar.h> 47 #include <uvm/uvm_extern.h> 48 #include <sys/sched.h> 49 #include <sys/timeout.h> 50 51 #ifdef KTRACE 52 #include <sys/ktrace.h> 53 #endif 54 55 #include <machine/cpu.h> 56 57 u_char curpriority; /* usrpri of curproc */ 58 int lbolt; /* once a second sleep address */ 59 60 int whichqs; /* Bit mask summary of non-empty Q's. */ 61 struct prochd qs[NQS]; 62 63 void scheduler_start(void); 64 65 void roundrobin(void *); 66 void schedcpu(void *); 67 void updatepri(struct proc *); 68 void endtsleep(void *); 69 70 void 71 scheduler_start() 72 { 73 static struct timeout roundrobin_to; 74 static struct timeout schedcpu_to; 75 76 /* 77 * We avoid polluting the global namespace by keeping the scheduler 78 * timeouts static in this function. 79 * We setup the timeouts here and kick roundrobin and schedcpu once to 80 * make them do their job. 81 */ 82 83 timeout_set(&roundrobin_to, roundrobin, &roundrobin_to); 84 timeout_set(&schedcpu_to, schedcpu, &schedcpu_to); 85 86 roundrobin(&roundrobin_to); 87 schedcpu(&schedcpu_to); 88 } 89 90 /* 91 * Force switch among equal priority processes every 100ms. 92 */ 93 /* ARGSUSED */ 94 void 95 roundrobin(arg) 96 void *arg; 97 { 98 struct timeout *to = (struct timeout *)arg; 99 struct proc *p = curproc; 100 int s; 101 102 if (p != NULL) { 103 s = splstatclock(); 104 if (p->p_schedflags & PSCHED_SEENRR) { 105 /* 106 * The process has already been through a roundrobin 107 * without switching and may be hogging the CPU. 108 * Indicate that the process should yield. 109 */ 110 p->p_schedflags |= PSCHED_SHOULDYIELD; 111 } else { 112 p->p_schedflags |= PSCHED_SEENRR; 113 } 114 splx(s); 115 } 116 need_resched(); 117 timeout_add(to, hz / 10); 118 } 119 120 /* 121 * Constants for digital decay and forget: 122 * 90% of (p_estcpu) usage in 5 * loadav time 123 * 95% of (p_pctcpu) usage in 60 seconds (load insensitive) 124 * Note that, as ps(1) mentions, this can let percentages 125 * total over 100% (I've seen 137.9% for 3 processes). 126 * 127 * Note that hardclock updates p_estcpu and p_cpticks independently. 128 * 129 * We wish to decay away 90% of p_estcpu in (5 * loadavg) seconds. 130 * That is, the system wants to compute a value of decay such 131 * that the following for loop: 132 * for (i = 0; i < (5 * loadavg); i++) 133 * p_estcpu *= decay; 134 * will compute 135 * p_estcpu *= 0.1; 136 * for all values of loadavg: 137 * 138 * Mathematically this loop can be expressed by saying: 139 * decay ** (5 * loadavg) ~= .1 140 * 141 * The system computes decay as: 142 * decay = (2 * loadavg) / (2 * loadavg + 1) 143 * 144 * We wish to prove that the system's computation of decay 145 * will always fulfill the equation: 146 * decay ** (5 * loadavg) ~= .1 147 * 148 * If we compute b as: 149 * b = 2 * loadavg 150 * then 151 * decay = b / (b + 1) 152 * 153 * We now need to prove two things: 154 * 1) Given factor ** (5 * loadavg) ~= .1, prove factor == b/(b+1) 155 * 2) Given b/(b+1) ** power ~= .1, prove power == (5 * loadavg) 156 * 157 * Facts: 158 * For x close to zero, exp(x) =~ 1 + x, since 159 * exp(x) = 0! + x**1/1! + x**2/2! + ... . 160 * therefore exp(-1/b) =~ 1 - (1/b) = (b-1)/b. 161 * For x close to zero, ln(1+x) =~ x, since 162 * ln(1+x) = x - x**2/2 + x**3/3 - ... -1 < x < 1 163 * therefore ln(b/(b+1)) = ln(1 - 1/(b+1)) =~ -1/(b+1). 164 * ln(.1) =~ -2.30 165 * 166 * Proof of (1): 167 * Solve (factor)**(power) =~ .1 given power (5*loadav): 168 * solving for factor, 169 * ln(factor) =~ (-2.30/5*loadav), or 170 * factor =~ exp(-1/((5/2.30)*loadav)) =~ exp(-1/(2*loadav)) = 171 * exp(-1/b) =~ (b-1)/b =~ b/(b+1). QED 172 * 173 * Proof of (2): 174 * Solve (factor)**(power) =~ .1 given factor == (b/(b+1)): 175 * solving for power, 176 * power*ln(b/(b+1)) =~ -2.30, or 177 * power =~ 2.3 * (b + 1) = 4.6*loadav + 2.3 =~ 5*loadav. QED 178 * 179 * Actual power values for the implemented algorithm are as follows: 180 * loadav: 1 2 3 4 181 * power: 5.68 10.32 14.94 19.55 182 */ 183 184 /* calculations for digital decay to forget 90% of usage in 5*loadav sec */ 185 #define loadfactor(loadav) (2 * (loadav)) 186 #define decay_cpu(loadfac, cpu) (((loadfac) * (cpu)) / ((loadfac) + FSCALE)) 187 188 /* decay 95% of `p_pctcpu' in 60 seconds; see CCPU_SHIFT before changing */ 189 fixpt_t ccpu = 0.95122942450071400909 * FSCALE; /* exp(-1/20) */ 190 191 /* 192 * If `ccpu' is not equal to `exp(-1/20)' and you still want to use the 193 * faster/more-accurate formula, you'll have to estimate CCPU_SHIFT below 194 * and possibly adjust FSHIFT in "param.h" so that (FSHIFT >= CCPU_SHIFT). 195 * 196 * To estimate CCPU_SHIFT for exp(-1/20), the following formula was used: 197 * 1 - exp(-1/20) ~= 0.0487 ~= 0.0488 == 1 (fixed pt, *11* bits). 198 * 199 * If you dont want to bother with the faster/more-accurate formula, you 200 * can set CCPU_SHIFT to (FSHIFT + 1) which will use a slower/less-accurate 201 * (more general) method of calculating the %age of CPU used by a process. 202 */ 203 #define CCPU_SHIFT 11 204 205 /* 206 * Recompute process priorities, every hz ticks. 207 */ 208 /* ARGSUSED */ 209 void 210 schedcpu(arg) 211 void *arg; 212 { 213 struct timeout *to = (struct timeout *)arg; 214 fixpt_t loadfac = loadfactor(averunnable.ldavg[0]); 215 struct proc *p; 216 int s; 217 unsigned int newcpu; 218 int phz; 219 220 /* 221 * If we have a statistics clock, use that to calculate CPU 222 * time, otherwise revert to using the profiling clock (which, 223 * in turn, defaults to hz if there is no separate profiling 224 * clock available) 225 */ 226 phz = stathz ? stathz : profhz; 227 KASSERT(phz); 228 229 for (p = LIST_FIRST(&allproc); p != 0; p = LIST_NEXT(p, p_list)) { 230 /* 231 * Increment time in/out of memory and sleep time 232 * (if sleeping). We ignore overflow; with 16-bit int's 233 * (remember them?) overflow takes 45 days. 234 */ 235 p->p_swtime++; 236 if (p->p_stat == SSLEEP || p->p_stat == SSTOP) 237 p->p_slptime++; 238 p->p_pctcpu = (p->p_pctcpu * ccpu) >> FSHIFT; 239 /* 240 * If the process has slept the entire second, 241 * stop recalculating its priority until it wakes up. 242 */ 243 if (p->p_slptime > 1) 244 continue; 245 s = splstatclock(); /* prevent state changes */ 246 /* 247 * p_pctcpu is only for ps. 248 */ 249 #if (FSHIFT >= CCPU_SHIFT) 250 p->p_pctcpu += (phz == 100)? 251 ((fixpt_t) p->p_cpticks) << (FSHIFT - CCPU_SHIFT): 252 100 * (((fixpt_t) p->p_cpticks) 253 << (FSHIFT - CCPU_SHIFT)) / phz; 254 #else 255 p->p_pctcpu += ((FSCALE - ccpu) * 256 (p->p_cpticks * FSCALE / phz)) >> FSHIFT; 257 #endif 258 p->p_cpticks = 0; 259 newcpu = (u_int) decay_cpu(loadfac, p->p_estcpu); 260 p->p_estcpu = newcpu; 261 resetpriority(p); 262 if (p->p_priority >= PUSER) { 263 if ((p != curproc) && 264 p->p_stat == SRUN && 265 (p->p_flag & P_INMEM) && 266 (p->p_priority / PPQ) != (p->p_usrpri / PPQ)) { 267 remrunqueue(p); 268 p->p_priority = p->p_usrpri; 269 setrunqueue(p); 270 } else 271 p->p_priority = p->p_usrpri; 272 } 273 splx(s); 274 } 275 uvm_meter(); 276 wakeup((caddr_t)&lbolt); 277 timeout_add(to, hz); 278 } 279 280 /* 281 * Recalculate the priority of a process after it has slept for a while. 282 * For all load averages >= 1 and max p_estcpu of 255, sleeping for at 283 * least six times the loadfactor will decay p_estcpu to zero. 284 */ 285 void 286 updatepri(p) 287 register struct proc *p; 288 { 289 register unsigned int newcpu = p->p_estcpu; 290 register fixpt_t loadfac = loadfactor(averunnable.ldavg[0]); 291 292 if (p->p_slptime > 5 * loadfac) 293 p->p_estcpu = 0; 294 else { 295 p->p_slptime--; /* the first time was done in schedcpu */ 296 while (newcpu && --p->p_slptime) 297 newcpu = (int) decay_cpu(loadfac, newcpu); 298 p->p_estcpu = newcpu; 299 } 300 resetpriority(p); 301 } 302 303 /* 304 * We're only looking at 7 bits of the address; everything is 305 * aligned to 4, lots of things are aligned to greater powers 306 * of 2. Shift right by 8, i.e. drop the bottom 256 worth. 307 */ 308 #define TABLESIZE 128 309 #define LOOKUP(x) (((long)(x) >> 8) & (TABLESIZE - 1)) 310 struct slpque { 311 struct proc *sq_head; 312 struct proc **sq_tailp; 313 } slpque[TABLESIZE]; 314 315 /* 316 * During autoconfiguration or after a panic, a sleep will simply 317 * lower the priority briefly to allow interrupts, then return. 318 * The priority to be used (safepri) is machine-dependent, thus this 319 * value is initialized and maintained in the machine-dependent layers. 320 * This priority will typically be 0, or the lowest priority 321 * that is safe for use on the interrupt stack; it can be made 322 * higher to block network software interrupts after panics. 323 */ 324 int safepri; 325 326 /* 327 * General sleep call. Suspends the current process until a wakeup is 328 * performed on the specified identifier. The process will then be made 329 * runnable with the specified priority. Sleeps at most timo/hz seconds 330 * (0 means no timeout). If pri includes PCATCH flag, signals are checked 331 * before and after sleeping, else signals are not checked. Returns 0 if 332 * awakened, EWOULDBLOCK if the timeout expires. If PCATCH is set and a 333 * signal needs to be delivered, ERESTART is returned if the current system 334 * call should be restarted if possible, and EINTR is returned if the system 335 * call should be interrupted by the signal (return EINTR). 336 * 337 * The interlock is held until the scheduler_slock (XXX) is held. The 338 * interlock will be locked before returning back to the caller 339 * unless the PNORELOCK flag is specified, in which case the 340 * interlock will always be unlocked upon return. 341 */ 342 int 343 ltsleep(ident, priority, wmesg, timo, interlock) 344 void *ident; 345 int priority, timo; 346 const char *wmesg; 347 volatile struct simplelock *interlock; 348 { 349 struct proc *p = curproc; 350 struct slpque *qp; 351 int s, sig; 352 int catch = priority & PCATCH; 353 int relock = (priority & PNORELOCK) == 0; 354 355 #ifdef KTRACE 356 if (KTRPOINT(p, KTR_CSW)) 357 ktrcsw(p, 1, 0); 358 #endif 359 s = splhigh(); 360 if (cold || panicstr) { 361 /* 362 * After a panic, or during autoconfiguration, 363 * just give interrupts a chance, then just return; 364 * don't run any other procs or panic below, 365 * in case this is the idle process and already asleep. 366 */ 367 splx(safepri); 368 splx(s); 369 if (interlock != NULL && relock == 0) 370 simple_unlock(interlock); 371 return (0); 372 } 373 #ifdef DIAGNOSTIC 374 if (ident == NULL || p->p_stat != SRUN || p->p_back) 375 panic("tsleep"); 376 #endif 377 p->p_wchan = ident; 378 p->p_wmesg = wmesg; 379 p->p_slptime = 0; 380 p->p_priority = priority & PRIMASK; 381 qp = &slpque[LOOKUP(ident)]; 382 if (qp->sq_head == 0) 383 qp->sq_head = p; 384 else 385 *qp->sq_tailp = p; 386 *(qp->sq_tailp = &p->p_forw) = 0; 387 if (timo) 388 timeout_add(&p->p_sleep_to, timo); 389 /* 390 * We can now release the interlock; the scheduler_slock 391 * is held, so a thread can't get in to do wakeup() before 392 * we do the switch. 393 * 394 * XXX We leave the code block here, after inserting ourselves 395 * on the sleep queue, because we might want a more clever 396 * data structure for the sleep queues at some point. 397 */ 398 if (interlock != NULL) 399 simple_unlock(interlock); 400 401 /* 402 * We put ourselves on the sleep queue and start our timeout 403 * before calling CURSIG, as we could stop there, and a wakeup 404 * or a SIGCONT (or both) could occur while we were stopped. 405 * A SIGCONT would cause us to be marked as SSLEEP 406 * without resuming us, thus we must be ready for sleep 407 * when CURSIG is called. If the wakeup happens while we're 408 * stopped, p->p_wchan will be 0 upon return from CURSIG. 409 */ 410 if (catch) { 411 p->p_flag |= P_SINTR; 412 if ((sig = CURSIG(p)) != 0) { 413 if (p->p_wchan) 414 unsleep(p); 415 p->p_stat = SRUN; 416 goto resume; 417 } 418 if (p->p_wchan == 0) { 419 catch = 0; 420 goto resume; 421 } 422 } else 423 sig = 0; 424 p->p_stat = SSLEEP; 425 p->p_stats->p_ru.ru_nvcsw++; 426 mi_switch(); 427 #ifdef DDB 428 /* handy breakpoint location after process "wakes" */ 429 __asm(".globl bpendtsleep\nbpendtsleep:"); 430 #endif 431 resume: 432 curpriority = p->p_usrpri; 433 splx(s); 434 p->p_flag &= ~P_SINTR; 435 if (p->p_flag & P_TIMEOUT) { 436 p->p_flag &= ~P_TIMEOUT; 437 if (sig == 0) { 438 #ifdef KTRACE 439 if (KTRPOINT(p, KTR_CSW)) 440 ktrcsw(p, 0, 0); 441 #endif 442 if (interlock != NULL && relock) 443 simple_lock(interlock); 444 return (EWOULDBLOCK); 445 } 446 } else if (timo) 447 timeout_del(&p->p_sleep_to); 448 if (catch && (sig != 0 || (sig = CURSIG(p)) != 0)) { 449 #ifdef KTRACE 450 if (KTRPOINT(p, KTR_CSW)) 451 ktrcsw(p, 0, 0); 452 #endif 453 if (interlock != NULL && relock) 454 simple_lock(interlock); 455 if (p->p_sigacts->ps_sigintr & sigmask(sig)) 456 return (EINTR); 457 return (ERESTART); 458 } 459 #ifdef KTRACE 460 if (KTRPOINT(p, KTR_CSW)) 461 ktrcsw(p, 0, 0); 462 #endif 463 if (interlock != NULL && relock) 464 simple_lock(interlock); 465 return (0); 466 } 467 468 /* 469 * Implement timeout for tsleep. 470 * If process hasn't been awakened (wchan non-zero), 471 * set timeout flag and undo the sleep. If proc 472 * is stopped, just unsleep so it will remain stopped. 473 */ 474 void 475 endtsleep(arg) 476 void *arg; 477 { 478 struct proc *p; 479 int s; 480 481 p = (struct proc *)arg; 482 s = splhigh(); 483 if (p->p_wchan) { 484 if (p->p_stat == SSLEEP) 485 setrunnable(p); 486 else 487 unsleep(p); 488 p->p_flag |= P_TIMEOUT; 489 } 490 splx(s); 491 } 492 493 /* 494 * Short-term, non-interruptable sleep. 495 */ 496 void 497 sleep(ident, priority) 498 void *ident; 499 int priority; 500 { 501 register struct proc *p = curproc; 502 register struct slpque *qp; 503 register int s; 504 505 #ifdef DIAGNOSTIC 506 if (priority > PZERO) { 507 printf("sleep called with priority %d > PZERO, wchan: %p\n", 508 priority, ident); 509 panic("old sleep"); 510 } 511 #endif 512 s = splhigh(); 513 if (cold || panicstr) { 514 /* 515 * After a panic, or during autoconfiguration, 516 * just give interrupts a chance, then just return; 517 * don't run any other procs or panic below, 518 * in case this is the idle process and already asleep. 519 */ 520 splx(safepri); 521 splx(s); 522 return; 523 } 524 #ifdef DIAGNOSTIC 525 if (ident == NULL || p->p_stat != SRUN || p->p_back) 526 panic("sleep"); 527 #endif 528 p->p_wchan = ident; 529 p->p_wmesg = NULL; 530 p->p_slptime = 0; 531 p->p_priority = priority; 532 qp = &slpque[LOOKUP(ident)]; 533 if (qp->sq_head == 0) 534 qp->sq_head = p; 535 else 536 *qp->sq_tailp = p; 537 *(qp->sq_tailp = &p->p_forw) = 0; 538 p->p_stat = SSLEEP; 539 p->p_stats->p_ru.ru_nvcsw++; 540 #ifdef KTRACE 541 if (KTRPOINT(p, KTR_CSW)) 542 ktrcsw(p, 1, 0); 543 #endif 544 mi_switch(); 545 #ifdef DDB 546 /* handy breakpoint location after process "wakes" */ 547 __asm(".globl bpendsleep\nbpendsleep:"); 548 #endif 549 #ifdef KTRACE 550 if (KTRPOINT(p, KTR_CSW)) 551 ktrcsw(p, 0, 0); 552 #endif 553 curpriority = p->p_usrpri; 554 splx(s); 555 } 556 557 /* 558 * Remove a process from its wait queue 559 */ 560 void 561 unsleep(p) 562 register struct proc *p; 563 { 564 register struct slpque *qp; 565 register struct proc **hp; 566 int s; 567 568 s = splhigh(); 569 if (p->p_wchan) { 570 hp = &(qp = &slpque[LOOKUP(p->p_wchan)])->sq_head; 571 while (*hp != p) 572 hp = &(*hp)->p_forw; 573 *hp = p->p_forw; 574 if (qp->sq_tailp == &p->p_forw) 575 qp->sq_tailp = hp; 576 p->p_wchan = 0; 577 } 578 splx(s); 579 } 580 581 /* 582 * Make all processes sleeping on the specified identifier runnable. 583 */ 584 void 585 wakeup_n(ident, n) 586 void *ident; 587 int n; 588 { 589 struct slpque *qp; 590 struct proc *p, **q; 591 int s; 592 593 s = splhigh(); 594 qp = &slpque[LOOKUP(ident)]; 595 restart: 596 for (q = &qp->sq_head; (p = *q) != NULL; ) { 597 #ifdef DIAGNOSTIC 598 if (p->p_back || (p->p_stat != SSLEEP && p->p_stat != SSTOP)) 599 panic("wakeup"); 600 #endif 601 if (p->p_wchan == ident) { 602 --n; 603 p->p_wchan = 0; 604 *q = p->p_forw; 605 if (qp->sq_tailp == &p->p_forw) 606 qp->sq_tailp = q; 607 if (p->p_stat == SSLEEP) { 608 /* OPTIMIZED EXPANSION OF setrunnable(p); */ 609 if (p->p_slptime > 1) 610 updatepri(p); 611 p->p_slptime = 0; 612 p->p_stat = SRUN; 613 614 /* 615 * Since curpriority is a user priority, 616 * p->p_priority is always better than 617 * curpriority. 618 */ 619 620 if ((p->p_flag & P_INMEM) != 0) { 621 setrunqueue(p); 622 need_resched(); 623 } else { 624 wakeup((caddr_t)&proc0); 625 } 626 /* END INLINE EXPANSION */ 627 628 if (n != 0) 629 goto restart; 630 else 631 break; 632 } 633 } else 634 q = &p->p_forw; 635 } 636 splx(s); 637 } 638 639 void 640 wakeup(chan) 641 void *chan; 642 { 643 wakeup_n(chan, -1); 644 } 645 646 /* 647 * General yield call. Puts the current process back on its run queue and 648 * performs a voluntary context switch. 649 */ 650 void 651 yield() 652 { 653 struct proc *p = curproc; 654 int s; 655 656 s = splstatclock(); 657 p->p_priority = p->p_usrpri; 658 setrunqueue(p); 659 p->p_stats->p_ru.ru_nvcsw++; 660 mi_switch(); 661 splx(s); 662 } 663 664 /* 665 * General preemption call. Puts the current process back on its run queue 666 * and performs an involuntary context switch. If a process is supplied, 667 * we switch to that process. Otherwise, we use the normal process selection 668 * criteria. 669 */ 670 void 671 preempt(newp) 672 struct proc *newp; 673 { 674 struct proc *p = curproc; 675 int s; 676 677 /* 678 * XXX Switching to a specific process is not supported yet. 679 */ 680 if (newp != NULL) 681 panic("preempt: cpu_preempt not yet implemented"); 682 683 s = splstatclock(); 684 p->p_priority = p->p_usrpri; 685 setrunqueue(p); 686 p->p_stats->p_ru.ru_nivcsw++; 687 mi_switch(); 688 splx(s); 689 } 690 691 692 /* 693 * Must be called at splstatclock() or higher. 694 */ 695 void 696 mi_switch() 697 { 698 struct proc *p = curproc; /* XXX */ 699 struct rlimit *rlim; 700 struct timeval tv; 701 702 splassert(IPL_STATCLOCK); 703 704 /* 705 * Compute the amount of time during which the current 706 * process was running, and add that to its total so far. 707 */ 708 microtime(&tv); 709 if (timercmp(&tv, &runtime, <)) { 710 #if 0 711 printf("time is not monotonic! " 712 "tv=%ld.%06ld, runtime=%ld.%06ld\n", 713 tv.tv_sec, tv.tv_usec, runtime.tv_sec, runtime.tv_usec); 714 #endif 715 } else { 716 timersub(&tv, &runtime, &tv); 717 timeradd(&p->p_rtime, &tv, &p->p_rtime); 718 } 719 720 /* 721 * Check if the process exceeds its cpu resource allocation. 722 * If over max, kill it. 723 */ 724 rlim = &p->p_rlimit[RLIMIT_CPU]; 725 if ((rlim_t)p->p_rtime.tv_sec >= rlim->rlim_cur) { 726 if ((rlim_t)p->p_rtime.tv_sec >= rlim->rlim_max) { 727 psignal(p, SIGKILL); 728 } else { 729 psignal(p, SIGXCPU); 730 if (rlim->rlim_cur < rlim->rlim_max) 731 rlim->rlim_cur += 5; 732 } 733 } 734 735 /* 736 * Process is about to yield the CPU; clear the appropriate 737 * scheduling flags. 738 */ 739 p->p_schedflags &= ~PSCHED_SWITCHCLEAR; 740 741 /* 742 * Pick a new current process and record its start time. 743 */ 744 uvmexp.swtch++; 745 cpu_switch(p); 746 microtime(&runtime); 747 } 748 749 /* 750 * Initialize the (doubly-linked) run queues 751 * to be empty. 752 */ 753 void 754 rqinit() 755 { 756 register int i; 757 758 for (i = 0; i < NQS; i++) 759 qs[i].ph_link = qs[i].ph_rlink = (struct proc *)&qs[i]; 760 } 761 762 /* 763 * Change process state to be runnable, 764 * placing it on the run queue if it is in memory, 765 * and awakening the swapper if it isn't in memory. 766 */ 767 void 768 setrunnable(p) 769 register struct proc *p; 770 { 771 register int s; 772 773 s = splhigh(); 774 switch (p->p_stat) { 775 case 0: 776 case SRUN: 777 case SZOMB: 778 case SDEAD: 779 default: 780 panic("setrunnable"); 781 case SSTOP: 782 /* 783 * If we're being traced (possibly because someone attached us 784 * while we were stopped), check for a signal from the debugger. 785 */ 786 if ((p->p_flag & P_TRACED) != 0 && p->p_xstat != 0) 787 p->p_siglist |= sigmask(p->p_xstat); 788 case SSLEEP: 789 unsleep(p); /* e.g. when sending signals */ 790 break; 791 case SIDL: 792 break; 793 } 794 p->p_stat = SRUN; 795 if (p->p_flag & P_INMEM) 796 setrunqueue(p); 797 splx(s); 798 if (p->p_slptime > 1) 799 updatepri(p); 800 p->p_slptime = 0; 801 if ((p->p_flag & P_INMEM) == 0) 802 wakeup((caddr_t)&proc0); 803 else if (p->p_priority < curpriority) 804 need_resched(); 805 } 806 807 /* 808 * Compute the priority of a process when running in user mode. 809 * Arrange to reschedule if the resulting priority is better 810 * than that of the current process. 811 */ 812 void 813 resetpriority(p) 814 register struct proc *p; 815 { 816 register unsigned int newpriority; 817 818 newpriority = PUSER + p->p_estcpu + NICE_WEIGHT * (p->p_nice - NZERO); 819 newpriority = min(newpriority, MAXPRI); 820 p->p_usrpri = newpriority; 821 if (newpriority < curpriority) 822 need_resched(); 823 } 824 825 /* 826 * We adjust the priority of the current process. The priority of a process 827 * gets worse as it accumulates CPU time. The cpu usage estimator (p_estcpu) 828 * is increased here. The formula for computing priorities (in kern_synch.c) 829 * will compute a different value each time p_estcpu increases. This can 830 * cause a switch, but unless the priority crosses a PPQ boundary the actual 831 * queue will not change. The cpu usage estimator ramps up quite quickly 832 * when the process is running (linearly), and decays away exponentially, at 833 * a rate which is proportionally slower when the system is busy. The basic 834 * principle is that the system will 90% forget that the process used a lot 835 * of CPU time in 5 * loadav seconds. This causes the system to favor 836 * processes which haven't run much recently, and to round-robin among other 837 * processes. 838 */ 839 840 void 841 schedclock(p) 842 struct proc *p; 843 { 844 p->p_estcpu = ESTCPULIM(p->p_estcpu + 1); 845 resetpriority(p); 846 if (p->p_priority >= PUSER) 847 p->p_priority = p->p_usrpri; 848 } 849 850 #ifdef DDB 851 #include <machine/db_machdep.h> 852 853 #include <ddb/db_interface.h> 854 #include <ddb/db_output.h> 855 856 void 857 db_show_all_procs(addr, haddr, count, modif) 858 db_expr_t addr; 859 int haddr; 860 db_expr_t count; 861 char *modif; 862 { 863 char *mode; 864 int doingzomb = 0; 865 struct proc *p, *pp; 866 867 if (modif[0] == 0) 868 modif[0] = 'n'; /* default == normal mode */ 869 870 mode = "mawn"; 871 while (*mode && *mode != modif[0]) 872 mode++; 873 if (*mode == 0 || *mode == 'm') { 874 db_printf("usage: show all procs [/a] [/n] [/w]\n"); 875 db_printf("\t/a == show process address info\n"); 876 db_printf("\t/n == show normal process info [default]\n"); 877 db_printf("\t/w == show process wait/emul info\n"); 878 return; 879 } 880 881 p = LIST_FIRST(&allproc); 882 883 switch (*mode) { 884 885 case 'a': 886 db_printf(" PID %-10s %18s %18s %18s\n", 887 "COMMAND", "STRUCT PROC *", "UAREA *", "VMSPACE/VM_MAP"); 888 break; 889 case 'n': 890 db_printf(" PID %5s %5s %5s S %10s %-9s %-16s\n", 891 "PPID", "PGRP", "UID", "FLAGS", "WAIT", "COMMAND"); 892 break; 893 case 'w': 894 db_printf(" PID %-16s %-8s %18s %s\n", 895 "COMMAND", "EMUL", "WAIT-CHANNEL", "WAIT-MSG"); 896 break; 897 } 898 899 while (p != 0) { 900 pp = p->p_pptr; 901 if (p->p_stat) { 902 903 db_printf("%c%5d ", p == curproc ? '*' : ' ', 904 p->p_pid); 905 906 switch (*mode) { 907 908 case 'a': 909 db_printf("%-10.10s %18p %18p %18p\n", 910 p->p_comm, p, p->p_addr, p->p_vmspace); 911 break; 912 913 case 'n': 914 db_printf("%5d %5d %5d %d %#10x " 915 "%-9.9s %-16s\n", 916 pp ? pp->p_pid : -1, p->p_pgrp->pg_id, 917 p->p_cred->p_ruid, p->p_stat, p->p_flag, 918 (p->p_wchan && p->p_wmesg) ? 919 p->p_wmesg : "", p->p_comm); 920 break; 921 922 case 'w': 923 db_printf("%-16s %-8s %18p %s\n", p->p_comm, 924 p->p_emul->e_name, p->p_wchan, 925 (p->p_wchan && p->p_wmesg) ? 926 p->p_wmesg : ""); 927 break; 928 929 } 930 } 931 p = LIST_NEXT(p, p_list); 932 if (p == 0 && doingzomb == 0) { 933 doingzomb = 1; 934 p = LIST_FIRST(&zombproc); 935 } 936 } 937 } 938 #endif 939