1 /* $OpenBSD: kern_synch.c,v 1.48 2003/06/02 23:28:06 millert Exp $ */ 2 /* $NetBSD: kern_synch.c,v 1.37 1996/04/22 01:38:37 christos Exp $ */ 3 4 /*- 5 * Copyright (c) 1982, 1986, 1990, 1991, 1993 6 * The Regents of the University of California. All rights reserved. 7 * (c) UNIX System Laboratories, Inc. 8 * All or some portions of this file are derived from material licensed 9 * to the University of California by American Telephone and Telegraph 10 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 11 * the permission of UNIX System Laboratories, Inc. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 3. Neither the name of the University nor the names of its contributors 22 * may be used to endorse or promote products derived from this software 23 * without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 * 37 * @(#)kern_synch.c 8.6 (Berkeley) 1/21/94 38 */ 39 40 #include <sys/param.h> 41 #include <sys/systm.h> 42 #include <sys/proc.h> 43 #include <sys/kernel.h> 44 #include <sys/buf.h> 45 #include <sys/signalvar.h> 46 #include <sys/resourcevar.h> 47 #include <uvm/uvm_extern.h> 48 #include <sys/sched.h> 49 #include <sys/timeout.h> 50 51 #ifdef KTRACE 52 #include <sys/ktrace.h> 53 #endif 54 55 #include <machine/cpu.h> 56 57 u_char curpriority; /* usrpri of curproc */ 58 int lbolt; /* once a second sleep address */ 59 60 int whichqs; /* Bit mask summary of non-empty Q's. */ 61 struct prochd qs[NQS]; 62 63 void scheduler_start(void); 64 65 void roundrobin(void *); 66 void schedcpu(void *); 67 void updatepri(struct proc *); 68 void endtsleep(void *); 69 70 void 71 scheduler_start() 72 { 73 static struct timeout roundrobin_to; 74 static struct timeout schedcpu_to; 75 76 /* 77 * We avoid polluting the global namespace by keeping the scheduler 78 * timeouts static in this function. 79 * We setup the timeouts here and kick roundrobin and schedcpu once to 80 * make them do their job. 81 */ 82 83 timeout_set(&roundrobin_to, roundrobin, &roundrobin_to); 84 timeout_set(&schedcpu_to, schedcpu, &schedcpu_to); 85 86 roundrobin(&roundrobin_to); 87 schedcpu(&schedcpu_to); 88 } 89 90 /* 91 * Force switch among equal priority processes every 100ms. 92 */ 93 /* ARGSUSED */ 94 void 95 roundrobin(arg) 96 void *arg; 97 { 98 struct timeout *to = (struct timeout *)arg; 99 struct proc *p = curproc; 100 int s; 101 102 if (p != NULL) { 103 s = splstatclock(); 104 if (p->p_schedflags & PSCHED_SEENRR) { 105 /* 106 * The process has already been through a roundrobin 107 * without switching and may be hogging the CPU. 108 * Indicate that the process should yield. 109 */ 110 p->p_schedflags |= PSCHED_SHOULDYIELD; 111 } else { 112 p->p_schedflags |= PSCHED_SEENRR; 113 } 114 splx(s); 115 } 116 need_resched(); 117 timeout_add(to, hz / 10); 118 } 119 120 /* 121 * Constants for digital decay and forget: 122 * 90% of (p_estcpu) usage in 5 * loadav time 123 * 95% of (p_pctcpu) usage in 60 seconds (load insensitive) 124 * Note that, as ps(1) mentions, this can let percentages 125 * total over 100% (I've seen 137.9% for 3 processes). 126 * 127 * Note that hardclock updates p_estcpu and p_cpticks independently. 128 * 129 * We wish to decay away 90% of p_estcpu in (5 * loadavg) seconds. 130 * That is, the system wants to compute a value of decay such 131 * that the following for loop: 132 * for (i = 0; i < (5 * loadavg); i++) 133 * p_estcpu *= decay; 134 * will compute 135 * p_estcpu *= 0.1; 136 * for all values of loadavg: 137 * 138 * Mathematically this loop can be expressed by saying: 139 * decay ** (5 * loadavg) ~= .1 140 * 141 * The system computes decay as: 142 * decay = (2 * loadavg) / (2 * loadavg + 1) 143 * 144 * We wish to prove that the system's computation of decay 145 * will always fulfill the equation: 146 * decay ** (5 * loadavg) ~= .1 147 * 148 * If we compute b as: 149 * b = 2 * loadavg 150 * then 151 * decay = b / (b + 1) 152 * 153 * We now need to prove two things: 154 * 1) Given factor ** (5 * loadavg) ~= .1, prove factor == b/(b+1) 155 * 2) Given b/(b+1) ** power ~= .1, prove power == (5 * loadavg) 156 * 157 * Facts: 158 * For x close to zero, exp(x) =~ 1 + x, since 159 * exp(x) = 0! + x**1/1! + x**2/2! + ... . 160 * therefore exp(-1/b) =~ 1 - (1/b) = (b-1)/b. 161 * For x close to zero, ln(1+x) =~ x, since 162 * ln(1+x) = x - x**2/2 + x**3/3 - ... -1 < x < 1 163 * therefore ln(b/(b+1)) = ln(1 - 1/(b+1)) =~ -1/(b+1). 164 * ln(.1) =~ -2.30 165 * 166 * Proof of (1): 167 * Solve (factor)**(power) =~ .1 given power (5*loadav): 168 * solving for factor, 169 * ln(factor) =~ (-2.30/5*loadav), or 170 * factor =~ exp(-1/((5/2.30)*loadav)) =~ exp(-1/(2*loadav)) = 171 * exp(-1/b) =~ (b-1)/b =~ b/(b+1). QED 172 * 173 * Proof of (2): 174 * Solve (factor)**(power) =~ .1 given factor == (b/(b+1)): 175 * solving for power, 176 * power*ln(b/(b+1)) =~ -2.30, or 177 * power =~ 2.3 * (b + 1) = 4.6*loadav + 2.3 =~ 5*loadav. QED 178 * 179 * Actual power values for the implemented algorithm are as follows: 180 * loadav: 1 2 3 4 181 * power: 5.68 10.32 14.94 19.55 182 */ 183 184 /* calculations for digital decay to forget 90% of usage in 5*loadav sec */ 185 #define loadfactor(loadav) (2 * (loadav)) 186 #define decay_cpu(loadfac, cpu) (((loadfac) * (cpu)) / ((loadfac) + FSCALE)) 187 188 /* decay 95% of `p_pctcpu' in 60 seconds; see CCPU_SHIFT before changing */ 189 fixpt_t ccpu = 0.95122942450071400909 * FSCALE; /* exp(-1/20) */ 190 191 /* 192 * If `ccpu' is not equal to `exp(-1/20)' and you still want to use the 193 * faster/more-accurate formula, you'll have to estimate CCPU_SHIFT below 194 * and possibly adjust FSHIFT in "param.h" so that (FSHIFT >= CCPU_SHIFT). 195 * 196 * To estimate CCPU_SHIFT for exp(-1/20), the following formula was used: 197 * 1 - exp(-1/20) ~= 0.0487 ~= 0.0488 == 1 (fixed pt, *11* bits). 198 * 199 * If you dont want to bother with the faster/more-accurate formula, you 200 * can set CCPU_SHIFT to (FSHIFT + 1) which will use a slower/less-accurate 201 * (more general) method of calculating the %age of CPU used by a process. 202 */ 203 #define CCPU_SHIFT 11 204 205 /* 206 * Recompute process priorities, every hz ticks. 207 */ 208 /* ARGSUSED */ 209 void 210 schedcpu(arg) 211 void *arg; 212 { 213 struct timeout *to = (struct timeout *)arg; 214 fixpt_t loadfac = loadfactor(averunnable.ldavg[0]); 215 struct proc *p; 216 int s; 217 unsigned int newcpu; 218 int phz; 219 220 /* 221 * If we have a statistics clock, use that to calculate CPU 222 * time, otherwise revert to using the profiling clock (which, 223 * in turn, defaults to hz if there is no separate profiling 224 * clock available) 225 */ 226 phz = stathz ? stathz : profhz; 227 KASSERT(phz); 228 229 for (p = LIST_FIRST(&allproc); p != 0; p = LIST_NEXT(p, p_list)) { 230 /* 231 * Increment time in/out of memory and sleep time 232 * (if sleeping). We ignore overflow; with 16-bit int's 233 * (remember them?) overflow takes 45 days. 234 */ 235 p->p_swtime++; 236 if (p->p_stat == SSLEEP || p->p_stat == SSTOP) 237 p->p_slptime++; 238 p->p_pctcpu = (p->p_pctcpu * ccpu) >> FSHIFT; 239 /* 240 * If the process has slept the entire second, 241 * stop recalculating its priority until it wakes up. 242 */ 243 if (p->p_slptime > 1) 244 continue; 245 s = splstatclock(); /* prevent state changes */ 246 /* 247 * p_pctcpu is only for ps. 248 */ 249 #if (FSHIFT >= CCPU_SHIFT) 250 p->p_pctcpu += (phz == 100)? 251 ((fixpt_t) p->p_cpticks) << (FSHIFT - CCPU_SHIFT): 252 100 * (((fixpt_t) p->p_cpticks) 253 << (FSHIFT - CCPU_SHIFT)) / phz; 254 #else 255 p->p_pctcpu += ((FSCALE - ccpu) * 256 (p->p_cpticks * FSCALE / phz)) >> FSHIFT; 257 #endif 258 p->p_cpticks = 0; 259 newcpu = (u_int) decay_cpu(loadfac, p->p_estcpu); 260 p->p_estcpu = newcpu; 261 resetpriority(p); 262 if (p->p_priority >= PUSER) { 263 if ((p != curproc) && 264 p->p_stat == SRUN && 265 (p->p_flag & P_INMEM) && 266 (p->p_priority / PPQ) != (p->p_usrpri / PPQ)) { 267 remrunqueue(p); 268 p->p_priority = p->p_usrpri; 269 setrunqueue(p); 270 } else 271 p->p_priority = p->p_usrpri; 272 } 273 splx(s); 274 } 275 uvm_meter(); 276 wakeup((caddr_t)&lbolt); 277 timeout_add(to, hz); 278 } 279 280 /* 281 * Recalculate the priority of a process after it has slept for a while. 282 * For all load averages >= 1 and max p_estcpu of 255, sleeping for at 283 * least six times the loadfactor will decay p_estcpu to zero. 284 */ 285 void 286 updatepri(p) 287 register struct proc *p; 288 { 289 register unsigned int newcpu = p->p_estcpu; 290 register fixpt_t loadfac = loadfactor(averunnable.ldavg[0]); 291 292 if (p->p_slptime > 5 * loadfac) 293 p->p_estcpu = 0; 294 else { 295 p->p_slptime--; /* the first time was done in schedcpu */ 296 while (newcpu && --p->p_slptime) 297 newcpu = (int) decay_cpu(loadfac, newcpu); 298 p->p_estcpu = newcpu; 299 } 300 resetpriority(p); 301 } 302 303 /* 304 * We're only looking at 7 bits of the address; everything is 305 * aligned to 4, lots of things are aligned to greater powers 306 * of 2. Shift right by 8, i.e. drop the bottom 256 worth. 307 */ 308 #define TABLESIZE 128 309 #define LOOKUP(x) (((long)(x) >> 8) & (TABLESIZE - 1)) 310 struct slpque { 311 struct proc *sq_head; 312 struct proc **sq_tailp; 313 } slpque[TABLESIZE]; 314 315 /* 316 * During autoconfiguration or after a panic, a sleep will simply 317 * lower the priority briefly to allow interrupts, then return. 318 * The priority to be used (safepri) is machine-dependent, thus this 319 * value is initialized and maintained in the machine-dependent layers. 320 * This priority will typically be 0, or the lowest priority 321 * that is safe for use on the interrupt stack; it can be made 322 * higher to block network software interrupts after panics. 323 */ 324 int safepri; 325 326 /* 327 * General sleep call. Suspends the current process until a wakeup is 328 * performed on the specified identifier. The process will then be made 329 * runnable with the specified priority. Sleeps at most timo/hz seconds 330 * (0 means no timeout). If pri includes PCATCH flag, signals are checked 331 * before and after sleeping, else signals are not checked. Returns 0 if 332 * awakened, EWOULDBLOCK if the timeout expires. If PCATCH is set and a 333 * signal needs to be delivered, ERESTART is returned if the current system 334 * call should be restarted if possible, and EINTR is returned if the system 335 * call should be interrupted by the signal (return EINTR). 336 * 337 * The interlock is held until the scheduler_slock (XXX) is held. The 338 * interlock will be locked before returning back to the caller 339 * unless the PNORELOCK flag is specified, in which case the 340 * interlock will always be unlocked upon return. 341 */ 342 int 343 ltsleep(ident, priority, wmesg, timo, interlock) 344 void *ident; 345 int priority, timo; 346 const char *wmesg; 347 volatile struct simplelock *interlock; 348 { 349 struct proc *p = curproc; 350 struct slpque *qp; 351 int s, sig; 352 int catch = priority & PCATCH; 353 int relock = (priority & PNORELOCK) == 0; 354 355 #ifdef KTRACE 356 if (KTRPOINT(p, KTR_CSW)) 357 ktrcsw(p, 1, 0); 358 #endif 359 s = splhigh(); 360 if (cold || panicstr) { 361 /* 362 * After a panic, or during autoconfiguration, 363 * just give interrupts a chance, then just return; 364 * don't run any other procs or panic below, 365 * in case this is the idle process and already asleep. 366 */ 367 splx(safepri); 368 splx(s); 369 if (interlock != NULL && relock == 0) 370 simple_unlock(interlock); 371 return (0); 372 } 373 #ifdef DIAGNOSTIC 374 if (ident == NULL || p->p_stat != SRUN || p->p_back) 375 panic("tsleep"); 376 #endif 377 p->p_wchan = ident; 378 p->p_wmesg = wmesg; 379 p->p_slptime = 0; 380 p->p_priority = priority & PRIMASK; 381 qp = &slpque[LOOKUP(ident)]; 382 if (qp->sq_head == 0) 383 qp->sq_head = p; 384 else 385 *qp->sq_tailp = p; 386 *(qp->sq_tailp = &p->p_forw) = 0; 387 if (timo) 388 timeout_add(&p->p_sleep_to, timo); 389 /* 390 * We can now release the interlock; the scheduler_slock 391 * is held, so a thread can't get in to do wakeup() before 392 * we do the switch. 393 * 394 * XXX We leave the code block here, after inserting ourselves 395 * on the sleep queue, because we might want a more clever 396 * data structure for the sleep queues at some point. 397 */ 398 if (interlock != NULL) 399 simple_unlock(interlock); 400 401 /* 402 * We put ourselves on the sleep queue and start our timeout 403 * before calling CURSIG, as we could stop there, and a wakeup 404 * or a SIGCONT (or both) could occur while we were stopped. 405 * A SIGCONT would cause us to be marked as SSLEEP 406 * without resuming us, thus we must be ready for sleep 407 * when CURSIG is called. If the wakeup happens while we're 408 * stopped, p->p_wchan will be 0 upon return from CURSIG. 409 */ 410 if (catch) { 411 p->p_flag |= P_SINTR; 412 if ((sig = CURSIG(p)) != 0) { 413 if (p->p_wchan) 414 unsleep(p); 415 p->p_stat = SRUN; 416 goto resume; 417 } 418 if (p->p_wchan == 0) { 419 catch = 0; 420 goto resume; 421 } 422 } else 423 sig = 0; 424 p->p_stat = SSLEEP; 425 p->p_stats->p_ru.ru_nvcsw++; 426 mi_switch(); 427 #ifdef DDB 428 /* handy breakpoint location after process "wakes" */ 429 __asm(".globl bpendtsleep\nbpendtsleep:"); 430 #endif 431 resume: 432 curpriority = p->p_usrpri; 433 splx(s); 434 p->p_flag &= ~P_SINTR; 435 if (p->p_flag & P_TIMEOUT) { 436 p->p_flag &= ~P_TIMEOUT; 437 if (sig == 0) { 438 #ifdef KTRACE 439 if (KTRPOINT(p, KTR_CSW)) 440 ktrcsw(p, 0, 0); 441 #endif 442 if (interlock != NULL && relock) 443 simple_lock(interlock); 444 return (EWOULDBLOCK); 445 } 446 } else if (timo) 447 timeout_del(&p->p_sleep_to); 448 if (catch && (sig != 0 || (sig = CURSIG(p)) != 0)) { 449 #ifdef KTRACE 450 if (KTRPOINT(p, KTR_CSW)) 451 ktrcsw(p, 0, 0); 452 #endif 453 if (interlock != NULL && relock) 454 simple_lock(interlock); 455 if (p->p_sigacts->ps_sigintr & sigmask(sig)) 456 return (EINTR); 457 return (ERESTART); 458 } 459 #ifdef KTRACE 460 if (KTRPOINT(p, KTR_CSW)) 461 ktrcsw(p, 0, 0); 462 #endif 463 if (interlock != NULL && relock) 464 simple_lock(interlock); 465 return (0); 466 } 467 468 /* 469 * Implement timeout for tsleep. 470 * If process hasn't been awakened (wchan non-zero), 471 * set timeout flag and undo the sleep. If proc 472 * is stopped, just unsleep so it will remain stopped. 473 */ 474 void 475 endtsleep(arg) 476 void *arg; 477 { 478 struct proc *p; 479 int s; 480 481 p = (struct proc *)arg; 482 s = splhigh(); 483 if (p->p_wchan) { 484 if (p->p_stat == SSLEEP) 485 setrunnable(p); 486 else 487 unsleep(p); 488 p->p_flag |= P_TIMEOUT; 489 } 490 splx(s); 491 } 492 493 /* 494 * Short-term, non-interruptable sleep. 495 */ 496 void 497 sleep(ident, priority) 498 void *ident; 499 int priority; 500 { 501 register struct proc *p = curproc; 502 register struct slpque *qp; 503 register int s; 504 505 #ifdef DIAGNOSTIC 506 if (priority > PZERO) { 507 printf("sleep called with priority %d > PZERO, wchan: %p\n", 508 priority, ident); 509 panic("old sleep"); 510 } 511 #endif 512 s = splhigh(); 513 if (cold || panicstr) { 514 /* 515 * After a panic, or during autoconfiguration, 516 * just give interrupts a chance, then just return; 517 * don't run any other procs or panic below, 518 * in case this is the idle process and already asleep. 519 */ 520 splx(safepri); 521 splx(s); 522 return; 523 } 524 #ifdef DIAGNOSTIC 525 if (ident == NULL || p->p_stat != SRUN || p->p_back) 526 panic("sleep"); 527 #endif 528 p->p_wchan = ident; 529 p->p_wmesg = NULL; 530 p->p_slptime = 0; 531 p->p_priority = priority; 532 qp = &slpque[LOOKUP(ident)]; 533 if (qp->sq_head == 0) 534 qp->sq_head = p; 535 else 536 *qp->sq_tailp = p; 537 *(qp->sq_tailp = &p->p_forw) = 0; 538 p->p_stat = SSLEEP; 539 p->p_stats->p_ru.ru_nvcsw++; 540 #ifdef KTRACE 541 if (KTRPOINT(p, KTR_CSW)) 542 ktrcsw(p, 1, 0); 543 #endif 544 mi_switch(); 545 #ifdef DDB 546 /* handy breakpoint location after process "wakes" */ 547 __asm(".globl bpendsleep\nbpendsleep:"); 548 #endif 549 #ifdef KTRACE 550 if (KTRPOINT(p, KTR_CSW)) 551 ktrcsw(p, 0, 0); 552 #endif 553 curpriority = p->p_usrpri; 554 splx(s); 555 } 556 557 /* 558 * Remove a process from its wait queue 559 */ 560 void 561 unsleep(p) 562 register struct proc *p; 563 { 564 register struct slpque *qp; 565 register struct proc **hp; 566 int s; 567 568 s = splhigh(); 569 if (p->p_wchan) { 570 hp = &(qp = &slpque[LOOKUP(p->p_wchan)])->sq_head; 571 while (*hp != p) 572 hp = &(*hp)->p_forw; 573 *hp = p->p_forw; 574 if (qp->sq_tailp == &p->p_forw) 575 qp->sq_tailp = hp; 576 p->p_wchan = 0; 577 } 578 splx(s); 579 } 580 581 /* 582 * Make all processes sleeping on the specified identifier runnable. 583 */ 584 void 585 wakeup_n(ident, n) 586 void *ident; 587 int n; 588 { 589 struct slpque *qp; 590 struct proc *p, **q; 591 int s; 592 593 s = splhigh(); 594 qp = &slpque[LOOKUP(ident)]; 595 restart: 596 for (q = &qp->sq_head; (p = *q) != NULL; ) { 597 #ifdef DIAGNOSTIC 598 if (p->p_back || (p->p_stat != SSLEEP && p->p_stat != SSTOP)) 599 panic("wakeup"); 600 #endif 601 if (p->p_wchan == ident) { 602 --n; 603 p->p_wchan = 0; 604 *q = p->p_forw; 605 if (qp->sq_tailp == &p->p_forw) 606 qp->sq_tailp = q; 607 if (p->p_stat == SSLEEP) { 608 /* OPTIMIZED EXPANSION OF setrunnable(p); */ 609 if (p->p_slptime > 1) 610 updatepri(p); 611 p->p_slptime = 0; 612 p->p_stat = SRUN; 613 614 /* 615 * Since curpriority is a user priority, 616 * p->p_priority is always better than 617 * curpriority. 618 */ 619 620 if ((p->p_flag & P_INMEM) != 0) { 621 setrunqueue(p); 622 need_resched(); 623 } else { 624 wakeup((caddr_t)&proc0); 625 } 626 /* END INLINE EXPANSION */ 627 628 if (n != 0) 629 goto restart; 630 else 631 break; 632 } 633 } else 634 q = &p->p_forw; 635 } 636 splx(s); 637 } 638 639 void 640 wakeup(chan) 641 void *chan; 642 { 643 wakeup_n(chan, -1); 644 } 645 646 /* 647 * General yield call. Puts the current process back on its run queue and 648 * performs a voluntary context switch. 649 */ 650 void 651 yield() 652 { 653 struct proc *p = curproc; 654 int s; 655 656 s = splstatclock(); 657 p->p_priority = p->p_usrpri; 658 setrunqueue(p); 659 p->p_stats->p_ru.ru_nvcsw++; 660 mi_switch(); 661 splx(s); 662 } 663 664 /* 665 * General preemption call. Puts the current process back on its run queue 666 * and performs an involuntary context switch. If a process is supplied, 667 * we switch to that process. Otherwise, we use the normal process selection 668 * criteria. 669 */ 670 void 671 preempt(newp) 672 struct proc *newp; 673 { 674 struct proc *p = curproc; 675 int s; 676 677 /* 678 * XXX Switching to a specific process is not supported yet. 679 */ 680 if (newp != NULL) 681 panic("preempt: cpu_preempt not yet implemented"); 682 683 s = splstatclock(); 684 p->p_priority = p->p_usrpri; 685 setrunqueue(p); 686 p->p_stats->p_ru.ru_nivcsw++; 687 mi_switch(); 688 splx(s); 689 } 690 691 692 /* 693 * Must be called at splstatclock() or higher. 694 */ 695 void 696 mi_switch() 697 { 698 struct proc *p = curproc; /* XXX */ 699 struct rlimit *rlim; 700 long s, u; 701 struct timeval tv; 702 703 splassert(IPL_STATCLOCK); 704 705 /* 706 * Compute the amount of time during which the current 707 * process was running, and add that to its total so far. 708 */ 709 microtime(&tv); 710 u = p->p_rtime.tv_usec + (tv.tv_usec - runtime.tv_usec); 711 s = p->p_rtime.tv_sec + (tv.tv_sec - runtime.tv_sec); 712 if (u < 0) { 713 u += 1000000; 714 s--; 715 } else if (u >= 1000000) { 716 u -= 1000000; 717 s++; 718 } 719 p->p_rtime.tv_usec = u; 720 p->p_rtime.tv_sec = s; 721 722 /* 723 * Check if the process exceeds its cpu resource allocation. 724 * If over max, kill it. 725 */ 726 rlim = &p->p_rlimit[RLIMIT_CPU]; 727 if (s >= rlim->rlim_cur) { 728 if (s >= rlim->rlim_max) 729 psignal(p, SIGKILL); 730 else { 731 psignal(p, SIGXCPU); 732 if (rlim->rlim_cur < rlim->rlim_max) 733 rlim->rlim_cur += 5; 734 } 735 } 736 737 /* 738 * Process is about to yield the CPU; clear the appropriate 739 * scheduling flags. 740 */ 741 p->p_schedflags &= ~PSCHED_SWITCHCLEAR; 742 743 /* 744 * Pick a new current process and record its start time. 745 */ 746 uvmexp.swtch++; 747 cpu_switch(p); 748 microtime(&runtime); 749 } 750 751 /* 752 * Initialize the (doubly-linked) run queues 753 * to be empty. 754 */ 755 void 756 rqinit() 757 { 758 register int i; 759 760 for (i = 0; i < NQS; i++) 761 qs[i].ph_link = qs[i].ph_rlink = (struct proc *)&qs[i]; 762 } 763 764 /* 765 * Change process state to be runnable, 766 * placing it on the run queue if it is in memory, 767 * and awakening the swapper if it isn't in memory. 768 */ 769 void 770 setrunnable(p) 771 register struct proc *p; 772 { 773 register int s; 774 775 s = splhigh(); 776 switch (p->p_stat) { 777 case 0: 778 case SRUN: 779 case SZOMB: 780 case SDEAD: 781 default: 782 panic("setrunnable"); 783 case SSTOP: 784 /* 785 * If we're being traced (possibly because someone attached us 786 * while we were stopped), check for a signal from the debugger. 787 */ 788 if ((p->p_flag & P_TRACED) != 0 && p->p_xstat != 0) 789 p->p_siglist |= sigmask(p->p_xstat); 790 case SSLEEP: 791 unsleep(p); /* e.g. when sending signals */ 792 break; 793 case SIDL: 794 break; 795 } 796 p->p_stat = SRUN; 797 if (p->p_flag & P_INMEM) 798 setrunqueue(p); 799 splx(s); 800 if (p->p_slptime > 1) 801 updatepri(p); 802 p->p_slptime = 0; 803 if ((p->p_flag & P_INMEM) == 0) 804 wakeup((caddr_t)&proc0); 805 else if (p->p_priority < curpriority) 806 need_resched(); 807 } 808 809 /* 810 * Compute the priority of a process when running in user mode. 811 * Arrange to reschedule if the resulting priority is better 812 * than that of the current process. 813 */ 814 void 815 resetpriority(p) 816 register struct proc *p; 817 { 818 register unsigned int newpriority; 819 820 newpriority = PUSER + p->p_estcpu + NICE_WEIGHT * (p->p_nice - NZERO); 821 newpriority = min(newpriority, MAXPRI); 822 p->p_usrpri = newpriority; 823 if (newpriority < curpriority) 824 need_resched(); 825 } 826 827 /* 828 * We adjust the priority of the current process. The priority of a process 829 * gets worse as it accumulates CPU time. The cpu usage estimator (p_estcpu) 830 * is increased here. The formula for computing priorities (in kern_synch.c) 831 * will compute a different value each time p_estcpu increases. This can 832 * cause a switch, but unless the priority crosses a PPQ boundary the actual 833 * queue will not change. The cpu usage estimator ramps up quite quickly 834 * when the process is running (linearly), and decays away exponentially, at 835 * a rate which is proportionally slower when the system is busy. The basic 836 * principle is that the system will 90% forget that the process used a lot 837 * of CPU time in 5 * loadav seconds. This causes the system to favor 838 * processes which haven't run much recently, and to round-robin among other 839 * processes. 840 */ 841 842 void 843 schedclock(p) 844 struct proc *p; 845 { 846 p->p_estcpu = ESTCPULIM(p->p_estcpu + 1); 847 resetpriority(p); 848 if (p->p_priority >= PUSER) 849 p->p_priority = p->p_usrpri; 850 } 851 852 #ifdef DDB 853 #include <machine/db_machdep.h> 854 855 #include <ddb/db_interface.h> 856 #include <ddb/db_output.h> 857 858 void 859 db_show_all_procs(addr, haddr, count, modif) 860 db_expr_t addr; 861 int haddr; 862 db_expr_t count; 863 char *modif; 864 { 865 char *mode; 866 int doingzomb = 0; 867 struct proc *p, *pp; 868 869 if (modif[0] == 0) 870 modif[0] = 'n'; /* default == normal mode */ 871 872 mode = "mawn"; 873 while (*mode && *mode != modif[0]) 874 mode++; 875 if (*mode == 0 || *mode == 'm') { 876 db_printf("usage: show all procs [/a] [/n] [/w]\n"); 877 db_printf("\t/a == show process address info\n"); 878 db_printf("\t/n == show normal process info [default]\n"); 879 db_printf("\t/w == show process wait/emul info\n"); 880 return; 881 } 882 883 p = LIST_FIRST(&allproc); 884 885 switch (*mode) { 886 887 case 'a': 888 db_printf(" PID %-10s %18s %18s %18s\n", 889 "COMMAND", "STRUCT PROC *", "UAREA *", "VMSPACE/VM_MAP"); 890 break; 891 case 'n': 892 db_printf(" PID %5s %5s %5s S %10s %-9s %-16s\n", 893 "PPID", "PGRP", "UID", "FLAGS", "WAIT", "COMMAND"); 894 break; 895 case 'w': 896 db_printf(" PID %-16s %-8s %18s %s\n", 897 "COMMAND", "EMUL", "WAIT-CHANNEL", "WAIT-MSG"); 898 break; 899 } 900 901 while (p != 0) { 902 pp = p->p_pptr; 903 if (p->p_stat) { 904 905 db_printf("%c%5d ", p == curproc ? '*' : ' ', 906 p->p_pid); 907 908 switch (*mode) { 909 910 case 'a': 911 db_printf("%-10.10s %18p %18p %18p\n", 912 p->p_comm, p, p->p_addr, p->p_vmspace); 913 break; 914 915 case 'n': 916 db_printf("%5d %5d %5d %d %#10x " 917 "%-9.9s %-16s\n", 918 pp ? pp->p_pid : -1, p->p_pgrp->pg_id, 919 p->p_cred->p_ruid, p->p_stat, p->p_flag, 920 (p->p_wchan && p->p_wmesg) ? 921 p->p_wmesg : "", p->p_comm); 922 break; 923 924 case 'w': 925 db_printf("%-16s %-8s %18p %s\n", p->p_comm, 926 p->p_emul->e_name, p->p_wchan, 927 (p->p_wchan && p->p_wmesg) ? 928 p->p_wmesg : ""); 929 break; 930 931 } 932 } 933 p = LIST_NEXT(p, p_list); 934 if (p == 0 && doingzomb == 0) { 935 doingzomb = 1; 936 p = LIST_FIRST(&zombproc); 937 } 938 } 939 } 940 #endif 941