1 /* $OpenBSD: kern_synch.c,v 1.37 2001/08/07 22:57:15 art Exp $ */ 2 /* $NetBSD: kern_synch.c,v 1.37 1996/04/22 01:38:37 christos Exp $ */ 3 4 /*- 5 * Copyright (c) 1982, 1986, 1990, 1991, 1993 6 * The Regents of the University of California. All rights reserved. 7 * (c) UNIX System Laboratories, Inc. 8 * All or some portions of this file are derived from material licensed 9 * to the University of California by American Telephone and Telegraph 10 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 11 * the permission of UNIX System Laboratories, Inc. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 3. All advertising materials mentioning features or use of this software 22 * must display the following acknowledgement: 23 * This product includes software developed by the University of 24 * California, Berkeley and its contributors. 25 * 4. Neither the name of the University nor the names of its contributors 26 * may be used to endorse or promote products derived from this software 27 * without specific prior written permission. 28 * 29 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 30 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 31 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 32 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 33 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 34 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 35 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 36 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 37 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 38 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 39 * SUCH DAMAGE. 40 * 41 * @(#)kern_synch.c 8.6 (Berkeley) 1/21/94 42 */ 43 44 #include <sys/param.h> 45 #include <sys/systm.h> 46 #include <sys/proc.h> 47 #include <sys/kernel.h> 48 #include <sys/buf.h> 49 #include <sys/signalvar.h> 50 #include <sys/resourcevar.h> 51 #include <vm/vm.h> 52 #include <sys/sched.h> 53 #include <sys/timeout.h> 54 55 #include <uvm/uvm_extern.h> 56 57 #ifdef KTRACE 58 #include <sys/ktrace.h> 59 #endif 60 61 #include <machine/cpu.h> 62 63 u_char curpriority; /* usrpri of curproc */ 64 int lbolt; /* once a second sleep address */ 65 66 void scheduler_start __P((void)); 67 68 void roundrobin __P((void *)); 69 void schedcpu __P((void *)); 70 void updatepri __P((struct proc *)); 71 void endtsleep __P((void *)); 72 73 void 74 scheduler_start() 75 { 76 static struct timeout roundrobin_to; 77 static struct timeout schedcpu_to; 78 79 /* 80 * We avoid polluting the global namespace by keeping the scheduler 81 * timeouts static in this function. 82 * We setup the timeouts here and kick roundrobin and schedcpu once to 83 * make them do their job. 84 */ 85 86 timeout_set(&roundrobin_to, roundrobin, &roundrobin_to); 87 timeout_set(&schedcpu_to, schedcpu, &schedcpu_to); 88 89 roundrobin(&roundrobin_to); 90 schedcpu(&schedcpu_to); 91 } 92 93 /* 94 * Force switch among equal priority processes every 100ms. 95 */ 96 /* ARGSUSED */ 97 void 98 roundrobin(arg) 99 void *arg; 100 { 101 struct timeout *to = (struct timeout *)arg; 102 struct proc *p = curproc; 103 int s; 104 105 if (p != NULL) { 106 s = splstatclock(); 107 if (p->p_schedflags & PSCHED_SEENRR) { 108 /* 109 * The process has already been through a roundrobin 110 * without switching and may be hogging the CPU. 111 * Indicate that the process should yield. 112 */ 113 p->p_schedflags |= PSCHED_SHOULDYIELD; 114 } else { 115 p->p_schedflags |= PSCHED_SEENRR; 116 } 117 splx(s); 118 } 119 need_resched(); 120 timeout_add(to, hz / 10); 121 } 122 123 /* 124 * Constants for digital decay and forget: 125 * 90% of (p_estcpu) usage in 5 * loadav time 126 * 95% of (p_pctcpu) usage in 60 seconds (load insensitive) 127 * Note that, as ps(1) mentions, this can let percentages 128 * total over 100% (I've seen 137.9% for 3 processes). 129 * 130 * Note that hardclock updates p_estcpu and p_cpticks independently. 131 * 132 * We wish to decay away 90% of p_estcpu in (5 * loadavg) seconds. 133 * That is, the system wants to compute a value of decay such 134 * that the following for loop: 135 * for (i = 0; i < (5 * loadavg); i++) 136 * p_estcpu *= decay; 137 * will compute 138 * p_estcpu *= 0.1; 139 * for all values of loadavg: 140 * 141 * Mathematically this loop can be expressed by saying: 142 * decay ** (5 * loadavg) ~= .1 143 * 144 * The system computes decay as: 145 * decay = (2 * loadavg) / (2 * loadavg + 1) 146 * 147 * We wish to prove that the system's computation of decay 148 * will always fulfill the equation: 149 * decay ** (5 * loadavg) ~= .1 150 * 151 * If we compute b as: 152 * b = 2 * loadavg 153 * then 154 * decay = b / (b + 1) 155 * 156 * We now need to prove two things: 157 * 1) Given factor ** (5 * loadavg) ~= .1, prove factor == b/(b+1) 158 * 2) Given b/(b+1) ** power ~= .1, prove power == (5 * loadavg) 159 * 160 * Facts: 161 * For x close to zero, exp(x) =~ 1 + x, since 162 * exp(x) = 0! + x**1/1! + x**2/2! + ... . 163 * therefore exp(-1/b) =~ 1 - (1/b) = (b-1)/b. 164 * For x close to zero, ln(1+x) =~ x, since 165 * ln(1+x) = x - x**2/2 + x**3/3 - ... -1 < x < 1 166 * therefore ln(b/(b+1)) = ln(1 - 1/(b+1)) =~ -1/(b+1). 167 * ln(.1) =~ -2.30 168 * 169 * Proof of (1): 170 * Solve (factor)**(power) =~ .1 given power (5*loadav): 171 * solving for factor, 172 * ln(factor) =~ (-2.30/5*loadav), or 173 * factor =~ exp(-1/((5/2.30)*loadav)) =~ exp(-1/(2*loadav)) = 174 * exp(-1/b) =~ (b-1)/b =~ b/(b+1). QED 175 * 176 * Proof of (2): 177 * Solve (factor)**(power) =~ .1 given factor == (b/(b+1)): 178 * solving for power, 179 * power*ln(b/(b+1)) =~ -2.30, or 180 * power =~ 2.3 * (b + 1) = 4.6*loadav + 2.3 =~ 5*loadav. QED 181 * 182 * Actual power values for the implemented algorithm are as follows: 183 * loadav: 1 2 3 4 184 * power: 5.68 10.32 14.94 19.55 185 */ 186 187 /* calculations for digital decay to forget 90% of usage in 5*loadav sec */ 188 #define loadfactor(loadav) (2 * (loadav)) 189 #define decay_cpu(loadfac, cpu) (((loadfac) * (cpu)) / ((loadfac) + FSCALE)) 190 191 /* decay 95% of `p_pctcpu' in 60 seconds; see CCPU_SHIFT before changing */ 192 fixpt_t ccpu = 0.95122942450071400909 * FSCALE; /* exp(-1/20) */ 193 194 /* 195 * If `ccpu' is not equal to `exp(-1/20)' and you still want to use the 196 * faster/more-accurate formula, you'll have to estimate CCPU_SHIFT below 197 * and possibly adjust FSHIFT in "param.h" so that (FSHIFT >= CCPU_SHIFT). 198 * 199 * To estimate CCPU_SHIFT for exp(-1/20), the following formula was used: 200 * 1 - exp(-1/20) ~= 0.0487 ~= 0.0488 == 1 (fixed pt, *11* bits). 201 * 202 * If you dont want to bother with the faster/more-accurate formula, you 203 * can set CCPU_SHIFT to (FSHIFT + 1) which will use a slower/less-accurate 204 * (more general) method of calculating the %age of CPU used by a process. 205 */ 206 #define CCPU_SHIFT 11 207 208 /* 209 * Recompute process priorities, every hz ticks. 210 */ 211 /* ARGSUSED */ 212 void 213 schedcpu(arg) 214 void *arg; 215 { 216 struct timeout *to = (struct timeout *)arg; 217 fixpt_t loadfac = loadfactor(averunnable.ldavg[0]); 218 struct proc *p; 219 int s; 220 unsigned int newcpu; 221 int phz; 222 223 /* 224 * If we have a statistics clock, use that to calculate CPU 225 * time, otherwise revert to using the profiling clock (which, 226 * in turn, defaults to hz if there is no separate profiling 227 * clock available) 228 */ 229 phz = stathz ? stathz : profhz; 230 KASSERT(phz); 231 232 for (p = LIST_FIRST(&allproc); p != 0; p = LIST_NEXT(p, p_list)) { 233 /* 234 * Increment time in/out of memory and sleep time 235 * (if sleeping). We ignore overflow; with 16-bit int's 236 * (remember them?) overflow takes 45 days. 237 */ 238 p->p_swtime++; 239 if (p->p_stat == SSLEEP || p->p_stat == SSTOP) 240 p->p_slptime++; 241 p->p_pctcpu = (p->p_pctcpu * ccpu) >> FSHIFT; 242 /* 243 * If the process has slept the entire second, 244 * stop recalculating its priority until it wakes up. 245 */ 246 if (p->p_slptime > 1) 247 continue; 248 s = splstatclock(); /* prevent state changes */ 249 /* 250 * p_pctcpu is only for ps. 251 */ 252 #if (FSHIFT >= CCPU_SHIFT) 253 p->p_pctcpu += (phz == 100)? 254 ((fixpt_t) p->p_cpticks) << (FSHIFT - CCPU_SHIFT): 255 100 * (((fixpt_t) p->p_cpticks) 256 << (FSHIFT - CCPU_SHIFT)) / phz; 257 #else 258 p->p_pctcpu += ((FSCALE - ccpu) * 259 (p->p_cpticks * FSCALE / phz)) >> FSHIFT; 260 #endif 261 p->p_cpticks = 0; 262 newcpu = (u_int) decay_cpu(loadfac, p->p_estcpu); 263 p->p_estcpu = newcpu; 264 resetpriority(p); 265 if (p->p_priority >= PUSER) { 266 if ((p != curproc) && 267 p->p_stat == SRUN && 268 (p->p_flag & P_INMEM) && 269 (p->p_priority / PPQ) != (p->p_usrpri / PPQ)) { 270 remrunqueue(p); 271 p->p_priority = p->p_usrpri; 272 setrunqueue(p); 273 } else 274 p->p_priority = p->p_usrpri; 275 } 276 splx(s); 277 } 278 uvm_meter(); 279 wakeup((caddr_t)&lbolt); 280 timeout_add(to, hz); 281 } 282 283 /* 284 * Recalculate the priority of a process after it has slept for a while. 285 * For all load averages >= 1 and max p_estcpu of 255, sleeping for at 286 * least six times the loadfactor will decay p_estcpu to zero. 287 */ 288 void 289 updatepri(p) 290 register struct proc *p; 291 { 292 register unsigned int newcpu = p->p_estcpu; 293 register fixpt_t loadfac = loadfactor(averunnable.ldavg[0]); 294 295 if (p->p_slptime > 5 * loadfac) 296 p->p_estcpu = 0; 297 else { 298 p->p_slptime--; /* the first time was done in schedcpu */ 299 while (newcpu && --p->p_slptime) 300 newcpu = (int) decay_cpu(loadfac, newcpu); 301 p->p_estcpu = newcpu; 302 } 303 resetpriority(p); 304 } 305 306 /* 307 * We're only looking at 7 bits of the address; everything is 308 * aligned to 4, lots of things are aligned to greater powers 309 * of 2. Shift right by 8, i.e. drop the bottom 256 worth. 310 */ 311 #define TABLESIZE 128 312 #define LOOKUP(x) (((long)(x) >> 8) & (TABLESIZE - 1)) 313 struct slpque { 314 struct proc *sq_head; 315 struct proc **sq_tailp; 316 } slpque[TABLESIZE]; 317 318 /* 319 * During autoconfiguration or after a panic, a sleep will simply 320 * lower the priority briefly to allow interrupts, then return. 321 * The priority to be used (safepri) is machine-dependent, thus this 322 * value is initialized and maintained in the machine-dependent layers. 323 * This priority will typically be 0, or the lowest priority 324 * that is safe for use on the interrupt stack; it can be made 325 * higher to block network software interrupts after panics. 326 */ 327 int safepri; 328 329 /* 330 * General sleep call. Suspends the current process until a wakeup is 331 * performed on the specified identifier. The process will then be made 332 * runnable with the specified priority. Sleeps at most timo/hz seconds 333 * (0 means no timeout). If pri includes PCATCH flag, signals are checked 334 * before and after sleeping, else signals are not checked. Returns 0 if 335 * awakened, EWOULDBLOCK if the timeout expires. If PCATCH is set and a 336 * signal needs to be delivered, ERESTART is returned if the current system 337 * call should be restarted if possible, and EINTR is returned if the system 338 * call should be interrupted by the signal (return EINTR). 339 * 340 * The interlock is held until the scheduler_slock (XXX) is held. The 341 * interlock will be locked before returning back to the caller 342 * unless the PNORELOCK flag is specified, in which case the 343 * interlock will always be unlocked upon return. 344 */ 345 int 346 ltsleep(ident, priority, wmesg, timo, interlock) 347 void *ident; 348 int priority, timo; 349 char *wmesg; 350 volatile struct simplelock *interlock; 351 { 352 struct proc *p = curproc; 353 struct slpque *qp; 354 int s, sig; 355 int catch = priority & PCATCH; 356 int relock = (priority & PNORELOCK) == 0; 357 358 #ifdef KTRACE 359 if (KTRPOINT(p, KTR_CSW)) 360 ktrcsw(p, 1, 0); 361 #endif 362 s = splhigh(); 363 if (cold || panicstr) { 364 /* 365 * After a panic, or during autoconfiguration, 366 * just give interrupts a chance, then just return; 367 * don't run any other procs or panic below, 368 * in case this is the idle process and already asleep. 369 */ 370 splx(safepri); 371 splx(s); 372 if (interlock != NULL && relock == 0) 373 simple_unlock(interlock); 374 return (0); 375 } 376 #ifdef DIAGNOSTIC 377 if (ident == NULL || p->p_stat != SRUN || p->p_back) 378 panic("tsleep"); 379 #endif 380 p->p_wchan = ident; 381 p->p_wmesg = wmesg; 382 p->p_slptime = 0; 383 p->p_priority = priority & PRIMASK; 384 qp = &slpque[LOOKUP(ident)]; 385 if (qp->sq_head == 0) 386 qp->sq_head = p; 387 else 388 *qp->sq_tailp = p; 389 *(qp->sq_tailp = &p->p_forw) = 0; 390 if (timo) 391 timeout_add(&p->p_sleep_to, timo); 392 /* 393 * We can now release the interlock; the scheduler_slock 394 * is held, so a thread can't get in to do wakeup() before 395 * we do the switch. 396 * 397 * XXX We leave the code block here, after inserting ourselves 398 * on the sleep queue, because we might want a more clever 399 * data structure for the sleep queues at some point. 400 */ 401 if (interlock != NULL) 402 simple_unlock(interlock); 403 404 /* 405 * We put ourselves on the sleep queue and start our timeout 406 * before calling CURSIG, as we could stop there, and a wakeup 407 * or a SIGCONT (or both) could occur while we were stopped. 408 * A SIGCONT would cause us to be marked as SSLEEP 409 * without resuming us, thus we must be ready for sleep 410 * when CURSIG is called. If the wakeup happens while we're 411 * stopped, p->p_wchan will be 0 upon return from CURSIG. 412 */ 413 if (catch) { 414 p->p_flag |= P_SINTR; 415 if ((sig = CURSIG(p)) != 0) { 416 if (p->p_wchan) 417 unsleep(p); 418 p->p_stat = SRUN; 419 goto resume; 420 } 421 if (p->p_wchan == 0) { 422 catch = 0; 423 goto resume; 424 } 425 } else 426 sig = 0; 427 p->p_stat = SSLEEP; 428 p->p_stats->p_ru.ru_nvcsw++; 429 mi_switch(); 430 #ifdef DDB 431 /* handy breakpoint location after process "wakes" */ 432 __asm(".globl bpendtsleep ; bpendtsleep:"); 433 #endif 434 resume: 435 curpriority = p->p_usrpri; 436 splx(s); 437 p->p_flag &= ~P_SINTR; 438 if (p->p_flag & P_TIMEOUT) { 439 p->p_flag &= ~P_TIMEOUT; 440 if (sig == 0) { 441 #ifdef KTRACE 442 if (KTRPOINT(p, KTR_CSW)) 443 ktrcsw(p, 0, 0); 444 #endif 445 if (interlock != NULL && relock) 446 simple_lock(interlock); 447 return (EWOULDBLOCK); 448 } 449 } else if (timo) 450 timeout_del(&p->p_sleep_to); 451 if (catch && (sig != 0 || (sig = CURSIG(p)) != 0)) { 452 #ifdef KTRACE 453 if (KTRPOINT(p, KTR_CSW)) 454 ktrcsw(p, 0, 0); 455 #endif 456 if (interlock != NULL && relock) 457 simple_lock(interlock); 458 if (p->p_sigacts->ps_sigintr & sigmask(sig)) 459 return (EINTR); 460 return (ERESTART); 461 } 462 #ifdef KTRACE 463 if (KTRPOINT(p, KTR_CSW)) 464 ktrcsw(p, 0, 0); 465 #endif 466 if (interlock != NULL && relock) 467 simple_lock(interlock); 468 return (0); 469 } 470 471 /* 472 * Implement timeout for tsleep. 473 * If process hasn't been awakened (wchan non-zero), 474 * set timeout flag and undo the sleep. If proc 475 * is stopped, just unsleep so it will remain stopped. 476 */ 477 void 478 endtsleep(arg) 479 void *arg; 480 { 481 struct proc *p; 482 int s; 483 484 p = (struct proc *)arg; 485 s = splhigh(); 486 if (p->p_wchan) { 487 if (p->p_stat == SSLEEP) 488 setrunnable(p); 489 else 490 unsleep(p); 491 p->p_flag |= P_TIMEOUT; 492 } 493 splx(s); 494 } 495 496 /* 497 * Short-term, non-interruptable sleep. 498 */ 499 void 500 sleep(ident, priority) 501 void *ident; 502 int priority; 503 { 504 register struct proc *p = curproc; 505 register struct slpque *qp; 506 register int s; 507 508 #ifdef DIAGNOSTIC 509 if (priority > PZERO) { 510 printf("sleep called with priority %d > PZERO, wchan: %p\n", 511 priority, ident); 512 panic("old sleep"); 513 } 514 #endif 515 s = splhigh(); 516 if (cold || panicstr) { 517 /* 518 * After a panic, or during autoconfiguration, 519 * just give interrupts a chance, then just return; 520 * don't run any other procs or panic below, 521 * in case this is the idle process and already asleep. 522 */ 523 splx(safepri); 524 splx(s); 525 return; 526 } 527 #ifdef DIAGNOSTIC 528 if (ident == NULL || p->p_stat != SRUN || p->p_back) 529 panic("sleep"); 530 #endif 531 p->p_wchan = ident; 532 p->p_wmesg = NULL; 533 p->p_slptime = 0; 534 p->p_priority = priority; 535 qp = &slpque[LOOKUP(ident)]; 536 if (qp->sq_head == 0) 537 qp->sq_head = p; 538 else 539 *qp->sq_tailp = p; 540 *(qp->sq_tailp = &p->p_forw) = 0; 541 p->p_stat = SSLEEP; 542 p->p_stats->p_ru.ru_nvcsw++; 543 #ifdef KTRACE 544 if (KTRPOINT(p, KTR_CSW)) 545 ktrcsw(p, 1, 0); 546 #endif 547 mi_switch(); 548 #ifdef DDB 549 /* handy breakpoint location after process "wakes" */ 550 __asm(".globl bpendsleep ; bpendsleep:"); 551 #endif 552 #ifdef KTRACE 553 if (KTRPOINT(p, KTR_CSW)) 554 ktrcsw(p, 0, 0); 555 #endif 556 curpriority = p->p_usrpri; 557 splx(s); 558 } 559 560 /* 561 * Remove a process from its wait queue 562 */ 563 void 564 unsleep(p) 565 register struct proc *p; 566 { 567 register struct slpque *qp; 568 register struct proc **hp; 569 int s; 570 571 s = splhigh(); 572 if (p->p_wchan) { 573 hp = &(qp = &slpque[LOOKUP(p->p_wchan)])->sq_head; 574 while (*hp != p) 575 hp = &(*hp)->p_forw; 576 *hp = p->p_forw; 577 if (qp->sq_tailp == &p->p_forw) 578 qp->sq_tailp = hp; 579 p->p_wchan = 0; 580 } 581 splx(s); 582 } 583 584 /* 585 * Make all processes sleeping on the specified identifier runnable. 586 */ 587 void 588 wakeup_n(ident, n) 589 void *ident; 590 int n; 591 { 592 struct slpque *qp; 593 struct proc *p, **q; 594 int s; 595 596 s = splhigh(); 597 qp = &slpque[LOOKUP(ident)]; 598 restart: 599 for (q = &qp->sq_head; (p = *q) != NULL; ) { 600 #ifdef DIAGNOSTIC 601 if (p->p_back || (p->p_stat != SSLEEP && p->p_stat != SSTOP)) 602 panic("wakeup"); 603 #endif 604 if (p->p_wchan == ident) { 605 --n; 606 p->p_wchan = 0; 607 *q = p->p_forw; 608 if (qp->sq_tailp == &p->p_forw) 609 qp->sq_tailp = q; 610 if (p->p_stat == SSLEEP) { 611 /* OPTIMIZED EXPANSION OF setrunnable(p); */ 612 if (p->p_slptime > 1) 613 updatepri(p); 614 p->p_slptime = 0; 615 p->p_stat = SRUN; 616 617 /* 618 * Since curpriority is a user priority, 619 * p->p_priority is always better than 620 * curpriority. 621 */ 622 623 if ((p->p_flag & P_INMEM) != 0) { 624 setrunqueue(p); 625 need_resched(); 626 } else { 627 wakeup((caddr_t)&proc0); 628 } 629 /* END INLINE EXPANSION */ 630 631 if (n != 0) 632 goto restart; 633 else 634 break; 635 } 636 } else 637 q = &p->p_forw; 638 } 639 splx(s); 640 } 641 642 void 643 wakeup(chan) 644 void *chan; 645 { 646 wakeup_n(chan, -1); 647 } 648 649 /* 650 * General yield call. Puts the current process back on its run queue and 651 * performs a voluntary context switch. 652 */ 653 void 654 yield() 655 { 656 struct proc *p = curproc; 657 int s; 658 659 p->p_priority = p->p_usrpri; 660 s = splstatclock(); 661 setrunqueue(p); 662 p->p_stats->p_ru.ru_nvcsw++; 663 mi_switch(); 664 splx(s); 665 } 666 667 /* 668 * General preemption call. Puts the current process back on its run queue 669 * and performs an involuntary context switch. If a process is supplied, 670 * we switch to that process. Otherwise, we use the normal process selection 671 * criteria. 672 */ 673 void 674 preempt(newp) 675 struct proc *newp; 676 { 677 struct proc *p = curproc; 678 int s; 679 680 /* 681 * XXX Switching to a specific process is not supported yet. 682 */ 683 if (newp != NULL) 684 panic("preempt: cpu_preempt not yet implemented"); 685 686 p->p_priority = p->p_usrpri; 687 s = splstatclock(); 688 setrunqueue(p); 689 p->p_stats->p_ru.ru_nivcsw++; 690 mi_switch(); 691 splx(s); 692 } 693 694 695 /* 696 * The machine independent parts of mi_switch(). 697 * Must be called at splstatclock() or higher. 698 */ 699 void 700 mi_switch() 701 { 702 register struct proc *p = curproc; /* XXX */ 703 register struct rlimit *rlim; 704 register long s, u; 705 struct timeval tv; 706 707 /* 708 * Compute the amount of time during which the current 709 * process was running, and add that to its total so far. 710 */ 711 microtime(&tv); 712 u = p->p_rtime.tv_usec + (tv.tv_usec - runtime.tv_usec); 713 s = p->p_rtime.tv_sec + (tv.tv_sec - runtime.tv_sec); 714 if (u < 0) { 715 u += 1000000; 716 s--; 717 } else if (u >= 1000000) { 718 u -= 1000000; 719 s++; 720 } 721 p->p_rtime.tv_usec = u; 722 p->p_rtime.tv_sec = s; 723 724 /* 725 * Check if the process exceeds its cpu resource allocation. 726 * If over max, kill it. In any case, if it has run for more 727 * than 10 minutes, reduce priority to give others a chance. 728 */ 729 rlim = &p->p_rlimit[RLIMIT_CPU]; 730 if (s >= rlim->rlim_cur) { 731 if (s >= rlim->rlim_max) 732 psignal(p, SIGKILL); 733 else { 734 psignal(p, SIGXCPU); 735 if (rlim->rlim_cur < rlim->rlim_max) 736 rlim->rlim_cur += 5; 737 } 738 } 739 if (s > 10 * 60 && p->p_ucred->cr_uid && p->p_nice == NZERO) { 740 p->p_nice = NZERO + 4; 741 resetpriority(p); 742 } 743 744 745 /* 746 * Process is about to yield the CPU; clear the appropriate 747 * scheduling flags. 748 */ 749 p->p_schedflags &= ~PSCHED_SWITCHCLEAR; 750 751 /* 752 * Pick a new current process and record its start time. 753 */ 754 uvmexp.swtch++; 755 cpu_switch(p); 756 microtime(&runtime); 757 } 758 759 /* 760 * Initialize the (doubly-linked) run queues 761 * to be empty. 762 */ 763 void 764 rqinit() 765 { 766 register int i; 767 768 for (i = 0; i < NQS; i++) 769 qs[i].ph_link = qs[i].ph_rlink = (struct proc *)&qs[i]; 770 } 771 772 /* 773 * Change process state to be runnable, 774 * placing it on the run queue if it is in memory, 775 * and awakening the swapper if it isn't in memory. 776 */ 777 void 778 setrunnable(p) 779 register struct proc *p; 780 { 781 register int s; 782 783 s = splhigh(); 784 switch (p->p_stat) { 785 case 0: 786 case SRUN: 787 case SZOMB: 788 case SDEAD: 789 default: 790 panic("setrunnable"); 791 case SSTOP: 792 /* 793 * If we're being traced (possibly because someone attached us 794 * while we were stopped), check for a signal from the debugger. 795 */ 796 if ((p->p_flag & P_TRACED) != 0 && p->p_xstat != 0) 797 p->p_siglist |= sigmask(p->p_xstat); 798 case SSLEEP: 799 unsleep(p); /* e.g. when sending signals */ 800 break; 801 case SIDL: 802 break; 803 } 804 p->p_stat = SRUN; 805 if (p->p_flag & P_INMEM) 806 setrunqueue(p); 807 splx(s); 808 if (p->p_slptime > 1) 809 updatepri(p); 810 p->p_slptime = 0; 811 if ((p->p_flag & P_INMEM) == 0) 812 wakeup((caddr_t)&proc0); 813 else if (p->p_priority < curpriority) 814 need_resched(); 815 } 816 817 /* 818 * Compute the priority of a process when running in user mode. 819 * Arrange to reschedule if the resulting priority is better 820 * than that of the current process. 821 */ 822 void 823 resetpriority(p) 824 register struct proc *p; 825 { 826 register unsigned int newpriority; 827 828 newpriority = PUSER + p->p_estcpu + NICE_WEIGHT * (p->p_nice - NZERO); 829 newpriority = min(newpriority, MAXPRI); 830 p->p_usrpri = newpriority; 831 if (newpriority < curpriority) 832 need_resched(); 833 } 834 835 /* 836 * We adjust the priority of the current process. The priority of a process 837 * gets worse as it accumulates CPU time. The cpu usage estimator (p_estcpu) 838 * is increased here. The formula for computing priorities (in kern_synch.c) 839 * will compute a different value each time p_estcpu increases. This can 840 * cause a switch, but unless the priority crosses a PPQ boundary the actual 841 * queue will not change. The cpu usage estimator ramps up quite quickly 842 * when the process is running (linearly), and decays away exponentially, at 843 * a rate which is proportionally slower when the system is busy. The basic 844 * principle is that the system will 90% forget that the process used a lot 845 * of CPU time in 5 * loadav seconds. This causes the system to favor 846 * processes which haven't run much recently, and to round-robin among other 847 * processes. 848 */ 849 850 void 851 schedclock(p) 852 struct proc *p; 853 { 854 p->p_estcpu = ESTCPULIM(p->p_estcpu + 1); 855 resetpriority(p); 856 if (p->p_priority >= PUSER) 857 p->p_priority = p->p_usrpri; 858 } 859 860 #ifdef DDB 861 #include <machine/db_machdep.h> 862 863 #include <ddb/db_interface.h> 864 #include <ddb/db_output.h> 865 866 void 867 db_show_all_procs(addr, haddr, count, modif) 868 db_expr_t addr; 869 int haddr; 870 db_expr_t count; 871 char *modif; 872 { 873 char *mode; 874 int doingzomb = 0; 875 struct proc *p, *pp; 876 877 if (modif[0] == 0) 878 modif[0] = 'n'; /* default == normal mode */ 879 880 mode = "mawn"; 881 while (*mode && *mode != modif[0]) 882 mode++; 883 if (*mode == 0 || *mode == 'm') { 884 db_printf("usage: show all procs [/a] [/n] [/w]\n"); 885 db_printf("\t/a == show process address info\n"); 886 db_printf("\t/n == show normal process info [default]\n"); 887 db_printf("\t/w == show process wait/emul info\n"); 888 return; 889 } 890 891 p = LIST_FIRST(&allproc); 892 893 switch (*mode) { 894 895 case 'a': 896 db_printf(" PID %-10s %18s %18s %18s\n", 897 "COMMAND", "STRUCT PROC *", "UAREA *", "VMSPACE/VM_MAP"); 898 break; 899 case 'n': 900 db_printf(" PID %5s %5s %5s S %10s %-9s %-16s\n", 901 "PPID", "PGRP", "UID", "FLAGS", "WAIT", "COMMAND"); 902 break; 903 case 'w': 904 db_printf(" PID %-16s %-8s %18s %s\n", 905 "COMMAND", "EMUL", "WAIT-CHANNEL", "WAIT-MSG"); 906 break; 907 } 908 909 while (p != 0) { 910 pp = p->p_pptr; 911 if (p->p_stat) { 912 913 db_printf("%c%5d ", p == curproc ? '*' : ' ', 914 p->p_pid); 915 916 switch (*mode) { 917 918 case 'a': 919 db_printf("%-10.10s %18p %18p %18p\n", 920 p->p_comm, p, p->p_addr, p->p_vmspace); 921 break; 922 923 case 'n': 924 db_printf("%5d %5d %5d %d %#10x " 925 "%-9.9s %-16s\n", 926 pp ? pp->p_pid : -1, p->p_pgrp->pg_id, 927 p->p_cred->p_ruid, p->p_stat, p->p_flag, 928 (p->p_wchan && p->p_wmesg) ? 929 p->p_wmesg : "", p->p_comm); 930 break; 931 932 case 'w': 933 db_printf("%-16s %-8s %18p %s\n", p->p_comm, 934 p->p_emul->e_name, p->p_wchan, 935 (p->p_wchan && p->p_wmesg) ? 936 p->p_wmesg : ""); 937 break; 938 939 } 940 } 941 p = LIST_NEXT(p, p_list); 942 if (p == 0 && doingzomb == 0) { 943 doingzomb = 1; 944 p = LIST_FIRST(&zombproc); 945 } 946 } 947 } 948 #endif 949