1 /* $OpenBSD: kern_synch.c,v 1.40 2001/11/11 22:30:56 art Exp $ */ 2 /* $NetBSD: kern_synch.c,v 1.37 1996/04/22 01:38:37 christos Exp $ */ 3 4 /*- 5 * Copyright (c) 1982, 1986, 1990, 1991, 1993 6 * The Regents of the University of California. All rights reserved. 7 * (c) UNIX System Laboratories, Inc. 8 * All or some portions of this file are derived from material licensed 9 * to the University of California by American Telephone and Telegraph 10 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 11 * the permission of UNIX System Laboratories, Inc. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 3. All advertising materials mentioning features or use of this software 22 * must display the following acknowledgement: 23 * This product includes software developed by the University of 24 * California, Berkeley and its contributors. 25 * 4. Neither the name of the University nor the names of its contributors 26 * may be used to endorse or promote products derived from this software 27 * without specific prior written permission. 28 * 29 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 30 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 31 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 32 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 33 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 34 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 35 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 36 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 37 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 38 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 39 * SUCH DAMAGE. 40 * 41 * @(#)kern_synch.c 8.6 (Berkeley) 1/21/94 42 */ 43 44 #include <sys/param.h> 45 #include <sys/systm.h> 46 #include <sys/proc.h> 47 #include <sys/kernel.h> 48 #include <sys/buf.h> 49 #include <sys/signalvar.h> 50 #include <sys/resourcevar.h> 51 #include <uvm/uvm_extern.h> 52 #include <sys/sched.h> 53 #include <sys/timeout.h> 54 55 #ifdef KTRACE 56 #include <sys/ktrace.h> 57 #endif 58 59 #include <machine/cpu.h> 60 61 u_char curpriority; /* usrpri of curproc */ 62 int lbolt; /* once a second sleep address */ 63 64 void scheduler_start __P((void)); 65 66 void roundrobin __P((void *)); 67 void schedcpu __P((void *)); 68 void updatepri __P((struct proc *)); 69 void endtsleep __P((void *)); 70 71 void 72 scheduler_start() 73 { 74 static struct timeout roundrobin_to; 75 static struct timeout schedcpu_to; 76 77 /* 78 * We avoid polluting the global namespace by keeping the scheduler 79 * timeouts static in this function. 80 * We setup the timeouts here and kick roundrobin and schedcpu once to 81 * make them do their job. 82 */ 83 84 timeout_set(&roundrobin_to, roundrobin, &roundrobin_to); 85 timeout_set(&schedcpu_to, schedcpu, &schedcpu_to); 86 87 roundrobin(&roundrobin_to); 88 schedcpu(&schedcpu_to); 89 } 90 91 /* 92 * Force switch among equal priority processes every 100ms. 93 */ 94 /* ARGSUSED */ 95 void 96 roundrobin(arg) 97 void *arg; 98 { 99 struct timeout *to = (struct timeout *)arg; 100 struct proc *p = curproc; 101 int s; 102 103 if (p != NULL) { 104 s = splstatclock(); 105 if (p->p_schedflags & PSCHED_SEENRR) { 106 /* 107 * The process has already been through a roundrobin 108 * without switching and may be hogging the CPU. 109 * Indicate that the process should yield. 110 */ 111 p->p_schedflags |= PSCHED_SHOULDYIELD; 112 } else { 113 p->p_schedflags |= PSCHED_SEENRR; 114 } 115 splx(s); 116 } 117 need_resched(); 118 timeout_add(to, hz / 10); 119 } 120 121 /* 122 * Constants for digital decay and forget: 123 * 90% of (p_estcpu) usage in 5 * loadav time 124 * 95% of (p_pctcpu) usage in 60 seconds (load insensitive) 125 * Note that, as ps(1) mentions, this can let percentages 126 * total over 100% (I've seen 137.9% for 3 processes). 127 * 128 * Note that hardclock updates p_estcpu and p_cpticks independently. 129 * 130 * We wish to decay away 90% of p_estcpu in (5 * loadavg) seconds. 131 * That is, the system wants to compute a value of decay such 132 * that the following for loop: 133 * for (i = 0; i < (5 * loadavg); i++) 134 * p_estcpu *= decay; 135 * will compute 136 * p_estcpu *= 0.1; 137 * for all values of loadavg: 138 * 139 * Mathematically this loop can be expressed by saying: 140 * decay ** (5 * loadavg) ~= .1 141 * 142 * The system computes decay as: 143 * decay = (2 * loadavg) / (2 * loadavg + 1) 144 * 145 * We wish to prove that the system's computation of decay 146 * will always fulfill the equation: 147 * decay ** (5 * loadavg) ~= .1 148 * 149 * If we compute b as: 150 * b = 2 * loadavg 151 * then 152 * decay = b / (b + 1) 153 * 154 * We now need to prove two things: 155 * 1) Given factor ** (5 * loadavg) ~= .1, prove factor == b/(b+1) 156 * 2) Given b/(b+1) ** power ~= .1, prove power == (5 * loadavg) 157 * 158 * Facts: 159 * For x close to zero, exp(x) =~ 1 + x, since 160 * exp(x) = 0! + x**1/1! + x**2/2! + ... . 161 * therefore exp(-1/b) =~ 1 - (1/b) = (b-1)/b. 162 * For x close to zero, ln(1+x) =~ x, since 163 * ln(1+x) = x - x**2/2 + x**3/3 - ... -1 < x < 1 164 * therefore ln(b/(b+1)) = ln(1 - 1/(b+1)) =~ -1/(b+1). 165 * ln(.1) =~ -2.30 166 * 167 * Proof of (1): 168 * Solve (factor)**(power) =~ .1 given power (5*loadav): 169 * solving for factor, 170 * ln(factor) =~ (-2.30/5*loadav), or 171 * factor =~ exp(-1/((5/2.30)*loadav)) =~ exp(-1/(2*loadav)) = 172 * exp(-1/b) =~ (b-1)/b =~ b/(b+1). QED 173 * 174 * Proof of (2): 175 * Solve (factor)**(power) =~ .1 given factor == (b/(b+1)): 176 * solving for power, 177 * power*ln(b/(b+1)) =~ -2.30, or 178 * power =~ 2.3 * (b + 1) = 4.6*loadav + 2.3 =~ 5*loadav. QED 179 * 180 * Actual power values for the implemented algorithm are as follows: 181 * loadav: 1 2 3 4 182 * power: 5.68 10.32 14.94 19.55 183 */ 184 185 /* calculations for digital decay to forget 90% of usage in 5*loadav sec */ 186 #define loadfactor(loadav) (2 * (loadav)) 187 #define decay_cpu(loadfac, cpu) (((loadfac) * (cpu)) / ((loadfac) + FSCALE)) 188 189 /* decay 95% of `p_pctcpu' in 60 seconds; see CCPU_SHIFT before changing */ 190 fixpt_t ccpu = 0.95122942450071400909 * FSCALE; /* exp(-1/20) */ 191 192 /* 193 * If `ccpu' is not equal to `exp(-1/20)' and you still want to use the 194 * faster/more-accurate formula, you'll have to estimate CCPU_SHIFT below 195 * and possibly adjust FSHIFT in "param.h" so that (FSHIFT >= CCPU_SHIFT). 196 * 197 * To estimate CCPU_SHIFT for exp(-1/20), the following formula was used: 198 * 1 - exp(-1/20) ~= 0.0487 ~= 0.0488 == 1 (fixed pt, *11* bits). 199 * 200 * If you dont want to bother with the faster/more-accurate formula, you 201 * can set CCPU_SHIFT to (FSHIFT + 1) which will use a slower/less-accurate 202 * (more general) method of calculating the %age of CPU used by a process. 203 */ 204 #define CCPU_SHIFT 11 205 206 /* 207 * Recompute process priorities, every hz ticks. 208 */ 209 /* ARGSUSED */ 210 void 211 schedcpu(arg) 212 void *arg; 213 { 214 struct timeout *to = (struct timeout *)arg; 215 fixpt_t loadfac = loadfactor(averunnable.ldavg[0]); 216 struct proc *p; 217 int s; 218 unsigned int newcpu; 219 int phz; 220 221 /* 222 * If we have a statistics clock, use that to calculate CPU 223 * time, otherwise revert to using the profiling clock (which, 224 * in turn, defaults to hz if there is no separate profiling 225 * clock available) 226 */ 227 phz = stathz ? stathz : profhz; 228 KASSERT(phz); 229 230 for (p = LIST_FIRST(&allproc); p != 0; p = LIST_NEXT(p, p_list)) { 231 /* 232 * Increment time in/out of memory and sleep time 233 * (if sleeping). We ignore overflow; with 16-bit int's 234 * (remember them?) overflow takes 45 days. 235 */ 236 p->p_swtime++; 237 if (p->p_stat == SSLEEP || p->p_stat == SSTOP) 238 p->p_slptime++; 239 p->p_pctcpu = (p->p_pctcpu * ccpu) >> FSHIFT; 240 /* 241 * If the process has slept the entire second, 242 * stop recalculating its priority until it wakes up. 243 */ 244 if (p->p_slptime > 1) 245 continue; 246 s = splstatclock(); /* prevent state changes */ 247 /* 248 * p_pctcpu is only for ps. 249 */ 250 #if (FSHIFT >= CCPU_SHIFT) 251 p->p_pctcpu += (phz == 100)? 252 ((fixpt_t) p->p_cpticks) << (FSHIFT - CCPU_SHIFT): 253 100 * (((fixpt_t) p->p_cpticks) 254 << (FSHIFT - CCPU_SHIFT)) / phz; 255 #else 256 p->p_pctcpu += ((FSCALE - ccpu) * 257 (p->p_cpticks * FSCALE / phz)) >> FSHIFT; 258 #endif 259 p->p_cpticks = 0; 260 newcpu = (u_int) decay_cpu(loadfac, p->p_estcpu); 261 p->p_estcpu = newcpu; 262 resetpriority(p); 263 if (p->p_priority >= PUSER) { 264 if ((p != curproc) && 265 p->p_stat == SRUN && 266 (p->p_flag & P_INMEM) && 267 (p->p_priority / PPQ) != (p->p_usrpri / PPQ)) { 268 remrunqueue(p); 269 p->p_priority = p->p_usrpri; 270 setrunqueue(p); 271 } else 272 p->p_priority = p->p_usrpri; 273 } 274 splx(s); 275 } 276 uvm_meter(); 277 wakeup((caddr_t)&lbolt); 278 timeout_add(to, hz); 279 } 280 281 /* 282 * Recalculate the priority of a process after it has slept for a while. 283 * For all load averages >= 1 and max p_estcpu of 255, sleeping for at 284 * least six times the loadfactor will decay p_estcpu to zero. 285 */ 286 void 287 updatepri(p) 288 register struct proc *p; 289 { 290 register unsigned int newcpu = p->p_estcpu; 291 register fixpt_t loadfac = loadfactor(averunnable.ldavg[0]); 292 293 if (p->p_slptime > 5 * loadfac) 294 p->p_estcpu = 0; 295 else { 296 p->p_slptime--; /* the first time was done in schedcpu */ 297 while (newcpu && --p->p_slptime) 298 newcpu = (int) decay_cpu(loadfac, newcpu); 299 p->p_estcpu = newcpu; 300 } 301 resetpriority(p); 302 } 303 304 /* 305 * We're only looking at 7 bits of the address; everything is 306 * aligned to 4, lots of things are aligned to greater powers 307 * of 2. Shift right by 8, i.e. drop the bottom 256 worth. 308 */ 309 #define TABLESIZE 128 310 #define LOOKUP(x) (((long)(x) >> 8) & (TABLESIZE - 1)) 311 struct slpque { 312 struct proc *sq_head; 313 struct proc **sq_tailp; 314 } slpque[TABLESIZE]; 315 316 /* 317 * During autoconfiguration or after a panic, a sleep will simply 318 * lower the priority briefly to allow interrupts, then return. 319 * The priority to be used (safepri) is machine-dependent, thus this 320 * value is initialized and maintained in the machine-dependent layers. 321 * This priority will typically be 0, or the lowest priority 322 * that is safe for use on the interrupt stack; it can be made 323 * higher to block network software interrupts after panics. 324 */ 325 int safepri; 326 327 /* 328 * General sleep call. Suspends the current process until a wakeup is 329 * performed on the specified identifier. The process will then be made 330 * runnable with the specified priority. Sleeps at most timo/hz seconds 331 * (0 means no timeout). If pri includes PCATCH flag, signals are checked 332 * before and after sleeping, else signals are not checked. Returns 0 if 333 * awakened, EWOULDBLOCK if the timeout expires. If PCATCH is set and a 334 * signal needs to be delivered, ERESTART is returned if the current system 335 * call should be restarted if possible, and EINTR is returned if the system 336 * call should be interrupted by the signal (return EINTR). 337 * 338 * The interlock is held until the scheduler_slock (XXX) is held. The 339 * interlock will be locked before returning back to the caller 340 * unless the PNORELOCK flag is specified, in which case the 341 * interlock will always be unlocked upon return. 342 */ 343 int 344 ltsleep(ident, priority, wmesg, timo, interlock) 345 void *ident; 346 int priority, timo; 347 const char *wmesg; 348 volatile struct simplelock *interlock; 349 { 350 struct proc *p = curproc; 351 struct slpque *qp; 352 int s, sig; 353 int catch = priority & PCATCH; 354 int relock = (priority & PNORELOCK) == 0; 355 356 #ifdef KTRACE 357 if (KTRPOINT(p, KTR_CSW)) 358 ktrcsw(p, 1, 0); 359 #endif 360 s = splhigh(); 361 if (cold || panicstr) { 362 /* 363 * After a panic, or during autoconfiguration, 364 * just give interrupts a chance, then just return; 365 * don't run any other procs or panic below, 366 * in case this is the idle process and already asleep. 367 */ 368 splx(safepri); 369 splx(s); 370 if (interlock != NULL && relock == 0) 371 simple_unlock(interlock); 372 return (0); 373 } 374 #ifdef DIAGNOSTIC 375 if (ident == NULL || p->p_stat != SRUN || p->p_back) 376 panic("tsleep"); 377 #endif 378 p->p_wchan = ident; 379 p->p_wmesg = wmesg; 380 p->p_slptime = 0; 381 p->p_priority = priority & PRIMASK; 382 qp = &slpque[LOOKUP(ident)]; 383 if (qp->sq_head == 0) 384 qp->sq_head = p; 385 else 386 *qp->sq_tailp = p; 387 *(qp->sq_tailp = &p->p_forw) = 0; 388 if (timo) 389 timeout_add(&p->p_sleep_to, timo); 390 /* 391 * We can now release the interlock; the scheduler_slock 392 * is held, so a thread can't get in to do wakeup() before 393 * we do the switch. 394 * 395 * XXX We leave the code block here, after inserting ourselves 396 * on the sleep queue, because we might want a more clever 397 * data structure for the sleep queues at some point. 398 */ 399 if (interlock != NULL) 400 simple_unlock(interlock); 401 402 /* 403 * We put ourselves on the sleep queue and start our timeout 404 * before calling CURSIG, as we could stop there, and a wakeup 405 * or a SIGCONT (or both) could occur while we were stopped. 406 * A SIGCONT would cause us to be marked as SSLEEP 407 * without resuming us, thus we must be ready for sleep 408 * when CURSIG is called. If the wakeup happens while we're 409 * stopped, p->p_wchan will be 0 upon return from CURSIG. 410 */ 411 if (catch) { 412 p->p_flag |= P_SINTR; 413 if ((sig = CURSIG(p)) != 0) { 414 if (p->p_wchan) 415 unsleep(p); 416 p->p_stat = SRUN; 417 goto resume; 418 } 419 if (p->p_wchan == 0) { 420 catch = 0; 421 goto resume; 422 } 423 } else 424 sig = 0; 425 p->p_stat = SSLEEP; 426 p->p_stats->p_ru.ru_nvcsw++; 427 mi_switch(); 428 #ifdef DDB 429 /* handy breakpoint location after process "wakes" */ 430 __asm(".globl bpendtsleep ; bpendtsleep:"); 431 #endif 432 resume: 433 curpriority = p->p_usrpri; 434 splx(s); 435 p->p_flag &= ~P_SINTR; 436 if (p->p_flag & P_TIMEOUT) { 437 p->p_flag &= ~P_TIMEOUT; 438 if (sig == 0) { 439 #ifdef KTRACE 440 if (KTRPOINT(p, KTR_CSW)) 441 ktrcsw(p, 0, 0); 442 #endif 443 if (interlock != NULL && relock) 444 simple_lock(interlock); 445 return (EWOULDBLOCK); 446 } 447 } else if (timo) 448 timeout_del(&p->p_sleep_to); 449 if (catch && (sig != 0 || (sig = CURSIG(p)) != 0)) { 450 #ifdef KTRACE 451 if (KTRPOINT(p, KTR_CSW)) 452 ktrcsw(p, 0, 0); 453 #endif 454 if (interlock != NULL && relock) 455 simple_lock(interlock); 456 if (p->p_sigacts->ps_sigintr & sigmask(sig)) 457 return (EINTR); 458 return (ERESTART); 459 } 460 #ifdef KTRACE 461 if (KTRPOINT(p, KTR_CSW)) 462 ktrcsw(p, 0, 0); 463 #endif 464 if (interlock != NULL && relock) 465 simple_lock(interlock); 466 return (0); 467 } 468 469 /* 470 * Implement timeout for tsleep. 471 * If process hasn't been awakened (wchan non-zero), 472 * set timeout flag and undo the sleep. If proc 473 * is stopped, just unsleep so it will remain stopped. 474 */ 475 void 476 endtsleep(arg) 477 void *arg; 478 { 479 struct proc *p; 480 int s; 481 482 p = (struct proc *)arg; 483 s = splhigh(); 484 if (p->p_wchan) { 485 if (p->p_stat == SSLEEP) 486 setrunnable(p); 487 else 488 unsleep(p); 489 p->p_flag |= P_TIMEOUT; 490 } 491 splx(s); 492 } 493 494 /* 495 * Short-term, non-interruptable sleep. 496 */ 497 void 498 sleep(ident, priority) 499 void *ident; 500 int priority; 501 { 502 register struct proc *p = curproc; 503 register struct slpque *qp; 504 register int s; 505 506 #ifdef DIAGNOSTIC 507 if (priority > PZERO) { 508 printf("sleep called with priority %d > PZERO, wchan: %p\n", 509 priority, ident); 510 panic("old sleep"); 511 } 512 #endif 513 s = splhigh(); 514 if (cold || panicstr) { 515 /* 516 * After a panic, or during autoconfiguration, 517 * just give interrupts a chance, then just return; 518 * don't run any other procs or panic below, 519 * in case this is the idle process and already asleep. 520 */ 521 splx(safepri); 522 splx(s); 523 return; 524 } 525 #ifdef DIAGNOSTIC 526 if (ident == NULL || p->p_stat != SRUN || p->p_back) 527 panic("sleep"); 528 #endif 529 p->p_wchan = ident; 530 p->p_wmesg = NULL; 531 p->p_slptime = 0; 532 p->p_priority = priority; 533 qp = &slpque[LOOKUP(ident)]; 534 if (qp->sq_head == 0) 535 qp->sq_head = p; 536 else 537 *qp->sq_tailp = p; 538 *(qp->sq_tailp = &p->p_forw) = 0; 539 p->p_stat = SSLEEP; 540 p->p_stats->p_ru.ru_nvcsw++; 541 #ifdef KTRACE 542 if (KTRPOINT(p, KTR_CSW)) 543 ktrcsw(p, 1, 0); 544 #endif 545 mi_switch(); 546 #ifdef DDB 547 /* handy breakpoint location after process "wakes" */ 548 __asm(".globl bpendsleep ; bpendsleep:"); 549 #endif 550 #ifdef KTRACE 551 if (KTRPOINT(p, KTR_CSW)) 552 ktrcsw(p, 0, 0); 553 #endif 554 curpriority = p->p_usrpri; 555 splx(s); 556 } 557 558 /* 559 * Remove a process from its wait queue 560 */ 561 void 562 unsleep(p) 563 register struct proc *p; 564 { 565 register struct slpque *qp; 566 register struct proc **hp; 567 int s; 568 569 s = splhigh(); 570 if (p->p_wchan) { 571 hp = &(qp = &slpque[LOOKUP(p->p_wchan)])->sq_head; 572 while (*hp != p) 573 hp = &(*hp)->p_forw; 574 *hp = p->p_forw; 575 if (qp->sq_tailp == &p->p_forw) 576 qp->sq_tailp = hp; 577 p->p_wchan = 0; 578 } 579 splx(s); 580 } 581 582 /* 583 * Make all processes sleeping on the specified identifier runnable. 584 */ 585 void 586 wakeup_n(ident, n) 587 void *ident; 588 int n; 589 { 590 struct slpque *qp; 591 struct proc *p, **q; 592 int s; 593 594 s = splhigh(); 595 qp = &slpque[LOOKUP(ident)]; 596 restart: 597 for (q = &qp->sq_head; (p = *q) != NULL; ) { 598 #ifdef DIAGNOSTIC 599 if (p->p_back || (p->p_stat != SSLEEP && p->p_stat != SSTOP)) 600 panic("wakeup"); 601 #endif 602 if (p->p_wchan == ident) { 603 --n; 604 p->p_wchan = 0; 605 *q = p->p_forw; 606 if (qp->sq_tailp == &p->p_forw) 607 qp->sq_tailp = q; 608 if (p->p_stat == SSLEEP) { 609 /* OPTIMIZED EXPANSION OF setrunnable(p); */ 610 if (p->p_slptime > 1) 611 updatepri(p); 612 p->p_slptime = 0; 613 p->p_stat = SRUN; 614 615 /* 616 * Since curpriority is a user priority, 617 * p->p_priority is always better than 618 * curpriority. 619 */ 620 621 if ((p->p_flag & P_INMEM) != 0) { 622 setrunqueue(p); 623 need_resched(); 624 } else { 625 wakeup((caddr_t)&proc0); 626 } 627 /* END INLINE EXPANSION */ 628 629 if (n != 0) 630 goto restart; 631 else 632 break; 633 } 634 } else 635 q = &p->p_forw; 636 } 637 splx(s); 638 } 639 640 void 641 wakeup(chan) 642 void *chan; 643 { 644 wakeup_n(chan, -1); 645 } 646 647 /* 648 * General yield call. Puts the current process back on its run queue and 649 * performs a voluntary context switch. 650 */ 651 void 652 yield() 653 { 654 struct proc *p = curproc; 655 int s; 656 657 p->p_priority = p->p_usrpri; 658 s = splstatclock(); 659 setrunqueue(p); 660 p->p_stats->p_ru.ru_nvcsw++; 661 mi_switch(); 662 splx(s); 663 } 664 665 /* 666 * General preemption call. Puts the current process back on its run queue 667 * and performs an involuntary context switch. If a process is supplied, 668 * we switch to that process. Otherwise, we use the normal process selection 669 * criteria. 670 */ 671 void 672 preempt(newp) 673 struct proc *newp; 674 { 675 struct proc *p = curproc; 676 int s; 677 678 /* 679 * XXX Switching to a specific process is not supported yet. 680 */ 681 if (newp != NULL) 682 panic("preempt: cpu_preempt not yet implemented"); 683 684 p->p_priority = p->p_usrpri; 685 s = splstatclock(); 686 setrunqueue(p); 687 p->p_stats->p_ru.ru_nivcsw++; 688 mi_switch(); 689 splx(s); 690 } 691 692 693 /* 694 * Must be called at splstatclock() or higher. 695 */ 696 void 697 mi_switch() 698 { 699 register struct proc *p = curproc; /* XXX */ 700 register struct rlimit *rlim; 701 register long s, u; 702 struct timeval tv; 703 704 /* 705 * Compute the amount of time during which the current 706 * process was running, and add that to its total so far. 707 */ 708 microtime(&tv); 709 u = p->p_rtime.tv_usec + (tv.tv_usec - runtime.tv_usec); 710 s = p->p_rtime.tv_sec + (tv.tv_sec - runtime.tv_sec); 711 if (u < 0) { 712 u += 1000000; 713 s--; 714 } else if (u >= 1000000) { 715 u -= 1000000; 716 s++; 717 } 718 p->p_rtime.tv_usec = u; 719 p->p_rtime.tv_sec = s; 720 721 /* 722 * Check if the process exceeds its cpu resource allocation. 723 * If over max, kill it. In any case, if it has run for more 724 * than 10 minutes, reduce priority to give others a chance. 725 */ 726 rlim = &p->p_rlimit[RLIMIT_CPU]; 727 if (s >= rlim->rlim_cur) { 728 if (s >= rlim->rlim_max) 729 psignal(p, SIGKILL); 730 else { 731 psignal(p, SIGXCPU); 732 if (rlim->rlim_cur < rlim->rlim_max) 733 rlim->rlim_cur += 5; 734 } 735 } 736 if (s > 10 * 60 && p->p_ucred->cr_uid && p->p_nice == NZERO) { 737 p->p_nice = NZERO + 4; 738 resetpriority(p); 739 } 740 741 742 /* 743 * Process is about to yield the CPU; clear the appropriate 744 * scheduling flags. 745 */ 746 p->p_schedflags &= ~PSCHED_SWITCHCLEAR; 747 748 /* 749 * Pick a new current process and record its start time. 750 */ 751 uvmexp.swtch++; 752 cpu_switch(p); 753 microtime(&runtime); 754 } 755 756 /* 757 * Initialize the (doubly-linked) run queues 758 * to be empty. 759 */ 760 void 761 rqinit() 762 { 763 register int i; 764 765 for (i = 0; i < NQS; i++) 766 qs[i].ph_link = qs[i].ph_rlink = (struct proc *)&qs[i]; 767 } 768 769 /* 770 * Change process state to be runnable, 771 * placing it on the run queue if it is in memory, 772 * and awakening the swapper if it isn't in memory. 773 */ 774 void 775 setrunnable(p) 776 register struct proc *p; 777 { 778 register int s; 779 780 s = splhigh(); 781 switch (p->p_stat) { 782 case 0: 783 case SRUN: 784 case SZOMB: 785 case SDEAD: 786 default: 787 panic("setrunnable"); 788 case SSTOP: 789 /* 790 * If we're being traced (possibly because someone attached us 791 * while we were stopped), check for a signal from the debugger. 792 */ 793 if ((p->p_flag & P_TRACED) != 0 && p->p_xstat != 0) 794 p->p_siglist |= sigmask(p->p_xstat); 795 case SSLEEP: 796 unsleep(p); /* e.g. when sending signals */ 797 break; 798 case SIDL: 799 break; 800 } 801 p->p_stat = SRUN; 802 if (p->p_flag & P_INMEM) 803 setrunqueue(p); 804 splx(s); 805 if (p->p_slptime > 1) 806 updatepri(p); 807 p->p_slptime = 0; 808 if ((p->p_flag & P_INMEM) == 0) 809 wakeup((caddr_t)&proc0); 810 else if (p->p_priority < curpriority) 811 need_resched(); 812 } 813 814 /* 815 * Compute the priority of a process when running in user mode. 816 * Arrange to reschedule if the resulting priority is better 817 * than that of the current process. 818 */ 819 void 820 resetpriority(p) 821 register struct proc *p; 822 { 823 register unsigned int newpriority; 824 825 newpriority = PUSER + p->p_estcpu + NICE_WEIGHT * (p->p_nice - NZERO); 826 newpriority = min(newpriority, MAXPRI); 827 p->p_usrpri = newpriority; 828 if (newpriority < curpriority) 829 need_resched(); 830 } 831 832 /* 833 * We adjust the priority of the current process. The priority of a process 834 * gets worse as it accumulates CPU time. The cpu usage estimator (p_estcpu) 835 * is increased here. The formula for computing priorities (in kern_synch.c) 836 * will compute a different value each time p_estcpu increases. This can 837 * cause a switch, but unless the priority crosses a PPQ boundary the actual 838 * queue will not change. The cpu usage estimator ramps up quite quickly 839 * when the process is running (linearly), and decays away exponentially, at 840 * a rate which is proportionally slower when the system is busy. The basic 841 * principle is that the system will 90% forget that the process used a lot 842 * of CPU time in 5 * loadav seconds. This causes the system to favor 843 * processes which haven't run much recently, and to round-robin among other 844 * processes. 845 */ 846 847 void 848 schedclock(p) 849 struct proc *p; 850 { 851 p->p_estcpu = ESTCPULIM(p->p_estcpu + 1); 852 resetpriority(p); 853 if (p->p_priority >= PUSER) 854 p->p_priority = p->p_usrpri; 855 } 856 857 #ifdef DDB 858 #include <machine/db_machdep.h> 859 860 #include <ddb/db_interface.h> 861 #include <ddb/db_output.h> 862 863 void 864 db_show_all_procs(addr, haddr, count, modif) 865 db_expr_t addr; 866 int haddr; 867 db_expr_t count; 868 char *modif; 869 { 870 char *mode; 871 int doingzomb = 0; 872 struct proc *p, *pp; 873 874 if (modif[0] == 0) 875 modif[0] = 'n'; /* default == normal mode */ 876 877 mode = "mawn"; 878 while (*mode && *mode != modif[0]) 879 mode++; 880 if (*mode == 0 || *mode == 'm') { 881 db_printf("usage: show all procs [/a] [/n] [/w]\n"); 882 db_printf("\t/a == show process address info\n"); 883 db_printf("\t/n == show normal process info [default]\n"); 884 db_printf("\t/w == show process wait/emul info\n"); 885 return; 886 } 887 888 p = LIST_FIRST(&allproc); 889 890 switch (*mode) { 891 892 case 'a': 893 db_printf(" PID %-10s %18s %18s %18s\n", 894 "COMMAND", "STRUCT PROC *", "UAREA *", "VMSPACE/VM_MAP"); 895 break; 896 case 'n': 897 db_printf(" PID %5s %5s %5s S %10s %-9s %-16s\n", 898 "PPID", "PGRP", "UID", "FLAGS", "WAIT", "COMMAND"); 899 break; 900 case 'w': 901 db_printf(" PID %-16s %-8s %18s %s\n", 902 "COMMAND", "EMUL", "WAIT-CHANNEL", "WAIT-MSG"); 903 break; 904 } 905 906 while (p != 0) { 907 pp = p->p_pptr; 908 if (p->p_stat) { 909 910 db_printf("%c%5d ", p == curproc ? '*' : ' ', 911 p->p_pid); 912 913 switch (*mode) { 914 915 case 'a': 916 db_printf("%-10.10s %18p %18p %18p\n", 917 p->p_comm, p, p->p_addr, p->p_vmspace); 918 break; 919 920 case 'n': 921 db_printf("%5d %5d %5d %d %#10x " 922 "%-9.9s %-16s\n", 923 pp ? pp->p_pid : -1, p->p_pgrp->pg_id, 924 p->p_cred->p_ruid, p->p_stat, p->p_flag, 925 (p->p_wchan && p->p_wmesg) ? 926 p->p_wmesg : "", p->p_comm); 927 break; 928 929 case 'w': 930 db_printf("%-16s %-8s %18p %s\n", p->p_comm, 931 p->p_emul->e_name, p->p_wchan, 932 (p->p_wchan && p->p_wmesg) ? 933 p->p_wmesg : ""); 934 break; 935 936 } 937 } 938 p = LIST_NEXT(p, p_list); 939 if (p == 0 && doingzomb == 0) { 940 doingzomb = 1; 941 p = LIST_FIRST(&zombproc); 942 } 943 } 944 } 945 #endif 946