1 /* 2 * Copyright (c) 1982, 1986 Regents of the University of California. 3 * All rights reserved. The Berkeley software License Agreement 4 * specifies the terms and conditions for redistribution. 5 * 6 * @(#)kern_synch.c 7.10 (Berkeley) 04/03/90 7 */ 8 9 #include "machine/pte.h" 10 #include "machine/psl.h" 11 #include "machine/mtpr.h" 12 13 #include "param.h" 14 #include "systm.h" 15 #include "user.h" 16 #include "proc.h" 17 #include "vm.h" 18 #include "kernel.h" 19 #include "buf.h" 20 #include "tsleep.h" 21 22 /* 23 * Force switch among equal priority processes every 100ms. 24 */ 25 roundrobin() 26 { 27 28 runrun++; 29 aston(); 30 timeout(roundrobin, (caddr_t)0, hz / 10); 31 } 32 33 /* 34 * constants for digital decay and forget 35 * 90% of (p_cpu) usage in 5*loadav time 36 * 95% of (p_pctcpu) usage in 60 seconds (load insensitive) 37 * Note that, as ps(1) mentions, this can let percentages 38 * total over 100% (I've seen 137.9% for 3 processes). 39 * 40 * Note that hardclock updates p_cpu and p_cpticks independently. 41 * 42 * We wish to decay away 90% of p_cpu in (5 * loadavg) seconds. 43 * That is, the system wants to compute a value of decay such 44 * that the following for loop: 45 * for (i = 0; i < (5 * loadavg); i++) 46 * p_cpu *= decay; 47 * will compute 48 * p_cpu *= 0.1; 49 * for all values of loadavg: 50 * 51 * Mathematically this loop can be expressed by saying: 52 * decay ** (5 * loadavg) ~= .1 53 * 54 * The system computes decay as: 55 * decay = (2 * loadavg) / (2 * loadavg + 1) 56 * 57 * We wish to prove that the system's computation of decay 58 * will always fulfill the equation: 59 * decay ** (5 * loadavg) ~= .1 60 * 61 * If we compute b as: 62 * b = 2 * loadavg 63 * then 64 * decay = b / (b + 1) 65 * 66 * We now need to prove two things: 67 * 1) Given factor ** (5 * loadavg) ~= .1, prove factor == b/(b+1) 68 * 2) Given b/(b+1) ** power ~= .1, prove power == (5 * loadavg) 69 * 70 * Facts: 71 * For x close to zero, exp(x) =~ 1 + x, since 72 * exp(x) = 0! + x**1/1! + x**2/2! + ... . 73 * therefore exp(-1/b) =~ 1 - (1/b) = (b-1)/b. 74 * For x close to zero, ln(1+x) =~ x, since 75 * ln(1+x) = x - x**2/2 + x**3/3 - ... -1 < x < 1 76 * therefore ln(b/(b+1)) = ln(1 - 1/(b+1)) =~ -1/(b+1). 77 * ln(.1) =~ -2.30 78 * 79 * Proof of (1): 80 * Solve (factor)**(power) =~ .1 given power (5*loadav): 81 * solving for factor, 82 * ln(factor) =~ (-2.30/5*loadav), or 83 * factor =~ exp(-1/((5/2.30)*loadav) =~ exp(-1/(2*loadav)) = 84 * exp(-1/b) =~ (b-1)/b =~ b/(b+1). QED 85 * 86 * Proof of (2): 87 * Solve (factor)**(power) =~ .1 given factor == (b/(b+1)): 88 * solving for power, 89 * power*ln(b/(b+1)) =~ -2.30, or 90 * power =~ 2.3 * (b + 1) = 4.6*loadav + 2.3 =~ 5*loadav. QED 91 * 92 * Actual power values for the implemented algorithm are as follows: 93 * loadav: 1 2 3 4 94 * power: 5.68 10.32 14.94 19.55 95 */ 96 97 /* calculations for digital decay to forget 90% of usage in 5*loadav sec */ 98 #define get_b(loadav) (2 * (loadav)) 99 #define get_pcpu(b, cpu) (((b) * ((cpu) & 0377)) / ((b) + FSCALE)) 100 101 /* decay 95% of `p_pctcpu' in 60 seconds; see CCPU_SHIFT before changing */ 102 fixpt_t ccpu = 0.95122942450071400909 * FSCALE; /* exp(-1/20) */ 103 104 /* 105 * If `ccpu' is not equal to `exp(-1/20)' and you still want to use the 106 * faster/more-accurate formula, you'll have to estimate CCPU_SHIFT below 107 * and possibly adjust FSHIFT in "param.h" so that (FSHIFT >= CCPU_SHIFT). 108 * 109 * To estimate CCPU_SHIFT for exp(-1/20), the following formula was used: 110 * 1 - exp(-1/20) ~= 0.0487 ~= 0.0488 == 1 (fixed pt, *11* bits). 111 * 112 * If you dont want to bother with the faster/more-accurate formula, you 113 * can set CCPU_SHIFT to (FSHIFT + 1) which will use a slower/less-accurate 114 * (more general) method of calculating the %age of CPU used by a process. 115 */ 116 #define CCPU_SHIFT 11 117 118 /* 119 * Recompute process priorities, once a second 120 */ 121 schedcpu() 122 { 123 register fixpt_t b = get_b(averunnable[0]); 124 register struct proc *p; 125 register int s, a; 126 127 wakeup((caddr_t)&lbolt); 128 for (p = allproc; p != NULL; p = p->p_nxt) { 129 if (p->p_time != 127) 130 p->p_time++; 131 if (p->p_stat==SSLEEP || p->p_stat==SSTOP) 132 if (p->p_slptime != 127) 133 p->p_slptime++; 134 p->p_pctcpu = (p->p_pctcpu * ccpu) >> FSHIFT; 135 /* 136 * If the process has slept the entire second, 137 * stop recalculating its priority until it wakes up. 138 */ 139 if (p->p_slptime > 1) 140 continue; 141 /* 142 * p_pctcpu is only for ps. 143 */ 144 #if (FSHIFT >= CCPU_SHIFT) 145 p->p_pctcpu += (hz == 100)? 146 ((fixpt_t) p->p_cpticks) << (FSHIFT - CCPU_SHIFT): 147 100 * (((fixpt_t) p->p_cpticks) 148 << (FSHIFT - CCPU_SHIFT)) / hz; 149 #else 150 p->p_pctcpu += ((FSCALE - ccpu) * 151 (p->p_cpticks * FSCALE / hz)) >> FSHIFT; 152 #endif 153 p->p_cpticks = 0; 154 a = (int) get_pcpu(b, p->p_cpu) + p->p_nice; 155 if (a < 0) 156 a = 0; 157 if (a > 255) 158 a = 255; 159 p->p_cpu = a; 160 (void) setpri(p); 161 s = splhigh(); /* prevent state changes */ 162 if (p->p_pri >= PUSER) { 163 #define PPQ (128 / NQS) 164 if ((p != u.u_procp || noproc) && 165 p->p_stat == SRUN && 166 (p->p_flag & SLOAD) && 167 (p->p_pri / PPQ) != (p->p_usrpri / PPQ)) { 168 remrq(p); 169 p->p_pri = p->p_usrpri; 170 setrq(p); 171 } else 172 p->p_pri = p->p_usrpri; 173 } 174 splx(s); 175 } 176 vmmeter(); 177 if (runin!=0) { 178 runin = 0; 179 wakeup((caddr_t)&runin); 180 } 181 if (bclnlist != NULL) 182 wakeup((caddr_t)&proc[2]); 183 timeout(schedcpu, (caddr_t)0, hz); 184 } 185 186 /* 187 * Recalculate the priority of a process after it has slept for a while. 188 */ 189 updatepri(p) 190 register struct proc *p; 191 { 192 register int a = p->p_cpu & 0377; 193 register fixpt_t b = get_b(averunnable[0]); 194 195 p->p_slptime--; /* the first time was done in schedcpu */ 196 while (a && --p->p_slptime) 197 a = (int) get_pcpu(b, a) /* + p->p_nice */; 198 p->p_slptime = 0; 199 if (a < 0) 200 a = 0; 201 if (a > 255) 202 a = 255; 203 p->p_cpu = a; 204 (void) setpri(p); 205 } 206 207 #define SQSIZE 0100 /* Must be power of 2 */ 208 #define HASH(x) (( (int) x >> 5) & (SQSIZE-1)) 209 struct slpque { 210 struct proc *sq_head; 211 struct proc **sq_tailp; 212 } slpque[SQSIZE]; 213 214 /* 215 * XXX - redo comments once interface is set 216 * 217 * Give up the processor till a wakeup occurs 218 * on chan, at which time the process 219 * enters the scheduling queue at priority pri. 220 * The most important effect of pri is that when 221 * pri<=PZERO a signal cannot disturb the sleep; 222 * if pri>PZERO signals will be processed. 223 * Callers of this routine must be prepared for 224 * premature return, and check that the reason for 225 * sleeping has gone away. 226 */ 227 228 /* 229 * interruptable sleep with longjmp processing. 230 * TEMPORARY UNTIL ALL CALLERS ARE TAUGHT TO UNWIND 231 */ 232 tsleep(chan, pri, wmesg, timeout) 233 caddr_t chan; 234 int pri; 235 char *wmesg; 236 int timeout; 237 { 238 if (pri <= PZERO) 239 panic("tsleep: pri <= PZERO"); 240 if (isleep(chan, pri, wmesg, timeout) == EINTR) 241 longjmp(&u.u_qsave); 242 } 243 244 /* 245 * Interruptable sleep. 246 * Sleeps on chan for time of at most timo/hz seconds (0 means no timeout). 247 * Returns 0 if awakened, EINTR if a signal needs to be delivered, 248 * or EWOULDBLOCK if the timeout expires. 249 */ 250 isleep(chan, pri, wmesg, timo) 251 caddr_t chan; 252 int pri; 253 char *wmesg; 254 int timo; 255 { 256 register struct proc *rp; 257 register struct slpque *qp; 258 register s; 259 extern int cold; 260 int endtsleep(); 261 262 rp = u.u_procp; 263 s = splhigh(); 264 if (cold || panicstr) { 265 /* 266 * After a panic, or during autoconfiguration, 267 * just give interrupts a chance, then just return; 268 * don't run any other procs or panic below, 269 * in case this is the idle process and already asleep. 270 * The splnet should be spl0 if the network was being used 271 * by the filesystem, but for now avoid network interrupts 272 * that might cause another panic. 273 */ 274 (void) splnet(); 275 splx(s); 276 return (0); 277 } 278 #ifdef DIAGNOSTIC 279 if (chan==0 || rp->p_stat != SRUN || rp->p_rlink) 280 panic("isleep"); 281 #endif 282 rp->p_wchan = chan; 283 rp->p_wmesg = wmesg; 284 rp->p_slptime = 0; 285 rp->p_pri = pri; 286 qp = &slpque[HASH(chan)]; 287 if (qp->sq_head == 0) 288 qp->sq_head = rp; 289 else 290 *qp->sq_tailp = rp; 291 *(qp->sq_tailp = &rp->p_link) = 0; 292 /* 293 * If we stop in issig(), wakeup may already have happened 294 * when we return (rp->p_wchan will then be 0). 295 */ 296 if (CURSIG(rp)) { 297 if (rp->p_wchan) 298 unsleep(rp); 299 rp->p_stat = SRUN; 300 splx(s); 301 return (EINTR); 302 } 303 if (rp->p_wchan == 0) { 304 splx(s); 305 return (0); 306 } 307 rp->p_stat = SSLEEP; 308 if (timo) 309 timeout(endtsleep, (caddr_t)rp, timo); 310 (void) spl0(); 311 u.u_ru.ru_nvcsw++; 312 swtch(); 313 curpri = rp->p_usrpri; 314 splx(s); 315 if (rp->p_flag & STIMO) { 316 rp->p_flag &= ~STIMO; 317 return (EWOULDBLOCK); 318 } 319 if (timo) 320 untimeout(endtsleep, (caddr_t)rp); 321 if (CURSIG(rp)) 322 return (EINTR); 323 return (0); 324 } 325 326 /* 327 * Implement timeout for tsleep. 328 * If process hasn't been awakened (wchan non-zero), 329 * set timeout flag and undo the sleep. If proc 330 * is stopped, just unsleep so it will remain stopped. 331 */ 332 endtsleep(p) 333 register struct proc *p; 334 { 335 int s = splhigh(); 336 337 if (p->p_wchan) { 338 if (p->p_stat == SSLEEP) 339 setrun(p); 340 else 341 unsleep(p); 342 p->p_flag |= STIMO; 343 } 344 splx(s); 345 } 346 347 int sleepdebug = 1; /* XXX */ 348 349 sleep(chan, pri) 350 caddr_t chan; 351 int pri; 352 { 353 register struct proc *rp; 354 register struct slpque *qp; 355 register s; 356 extern int cold; 357 358 rp = u.u_procp; 359 s = splhigh(); 360 if (cold || panicstr) { 361 /* 362 * After a panic, or during autoconfiguration, 363 * just give interrupts a chance, then just return; 364 * don't run any other procs or panic below, 365 * in case this is the idle process and already asleep. 366 * The splnet should be spl0 if the network was being used 367 * by the filesystem, but for now avoid network interrupts 368 * that might cause another panic. 369 */ 370 (void) splnet(); 371 splx(s); 372 return; 373 } 374 #ifdef DIAGNOSTIC 375 if (chan==0 || rp->p_stat != SRUN || rp->p_rlink) 376 panic("sleep"); 377 #endif 378 rp->p_wchan = chan; 379 rp->p_wmesg = NULL; 380 rp->p_slptime = 0; 381 rp->p_pri = pri; 382 qp = &slpque[HASH(chan)]; 383 if (qp->sq_head == 0) 384 qp->sq_head = rp; 385 else 386 *qp->sq_tailp = rp; 387 *(qp->sq_tailp = &rp->p_link) = 0; 388 if (pri > PZERO) { 389 if (sleepdebug) 390 printf("sleep called with pri > PZERO, wchan: %x\n", 391 chan); 392 /* 393 * If we stop in issig(), wakeup may already have happened 394 * when we return (rp->p_wchan will then be 0). 395 */ 396 if (CURSIG(rp)) { 397 if (rp->p_wchan) 398 unsleep(rp); 399 rp->p_stat = SRUN; 400 (void) spl0(); 401 goto psig; 402 } 403 if (rp->p_wchan == 0) 404 goto out; 405 rp->p_stat = SSLEEP; 406 (void) spl0(); 407 u.u_ru.ru_nvcsw++; 408 swtch(); 409 if (CURSIG(rp)) 410 goto psig; 411 } else { 412 rp->p_stat = SSLEEP; 413 (void) spl0(); 414 u.u_ru.ru_nvcsw++; 415 swtch(); 416 } 417 curpri = rp->p_usrpri; 418 out: 419 splx(s); 420 return; 421 422 /* 423 * If priority was low (>PZERO) and 424 * there has been a signal, execute non-local goto through 425 * u.u_qsave, aborting the system call in progress (see trap.c) 426 */ 427 psig: 428 longjmp(&u.u_qsave); 429 /*NOTREACHED*/ 430 } 431 432 /* 433 * Remove a process from its wait queue 434 */ 435 unsleep(p) 436 register struct proc *p; 437 { 438 register struct slpque *qp; 439 register struct proc **hp; 440 int s; 441 442 s = splhigh(); 443 if (p->p_wchan) { 444 hp = &(qp = &slpque[HASH(p->p_wchan)])->sq_head; 445 while (*hp != p) 446 hp = &(*hp)->p_link; 447 *hp = p->p_link; 448 if (qp->sq_tailp == &p->p_link) 449 qp->sq_tailp = hp; 450 p->p_wchan = 0; 451 } 452 splx(s); 453 } 454 455 /* 456 * Wake up all processes sleeping on chan. 457 */ 458 wakeup(chan) 459 register caddr_t chan; 460 { 461 register struct slpque *qp; 462 register struct proc *p, **q; 463 int s; 464 465 s = splhigh(); 466 qp = &slpque[HASH(chan)]; 467 restart: 468 for (q = &qp->sq_head; p = *q; ) { 469 #ifdef DIAGNOSTIC 470 if (p->p_rlink || p->p_stat != SSLEEP && p->p_stat != SSTOP) 471 panic("wakeup"); 472 #endif 473 if (p->p_wchan==chan) { 474 p->p_wchan = 0; 475 *q = p->p_link; 476 if (qp->sq_tailp == &p->p_link) 477 qp->sq_tailp = q; 478 if (p->p_stat == SSLEEP) { 479 /* OPTIMIZED INLINE EXPANSION OF setrun(p) */ 480 if (p->p_slptime > 1) 481 updatepri(p); 482 p->p_stat = SRUN; 483 if (p->p_flag & SLOAD) 484 setrq(p); 485 /* 486 * Since curpri is a usrpri, 487 * p->p_pri is always better than curpri. 488 */ 489 runrun++; 490 aston(); 491 if ((p->p_flag&SLOAD) == 0) { 492 if (runout != 0) { 493 runout = 0; 494 wakeup((caddr_t)&runout); 495 } 496 wantin++; 497 } 498 /* END INLINE EXPANSION */ 499 goto restart; 500 } 501 } else 502 q = &p->p_link; 503 } 504 splx(s); 505 } 506 507 /* 508 * Initialize the (doubly-linked) run queues 509 * to be empty. 510 */ 511 rqinit() 512 { 513 register int i; 514 515 for (i = 0; i < NQS; i++) 516 qs[i].ph_link = qs[i].ph_rlink = (struct proc *)&qs[i]; 517 } 518 519 /* 520 * Set the process running; 521 * arrange for it to be swapped in if necessary. 522 */ 523 setrun(p) 524 register struct proc *p; 525 { 526 register int s; 527 528 s = splhigh(); 529 switch (p->p_stat) { 530 531 case 0: 532 case SWAIT: 533 case SRUN: 534 case SZOMB: 535 default: 536 panic("setrun"); 537 538 case SSTOP: 539 case SSLEEP: 540 unsleep(p); /* e.g. when sending signals */ 541 break; 542 543 case SIDL: 544 break; 545 } 546 p->p_stat = SRUN; 547 if (p->p_flag & SLOAD) 548 setrq(p); 549 splx(s); 550 if (p->p_slptime > 1) 551 updatepri(p); 552 if (p->p_pri < curpri) { 553 runrun++; 554 aston(); 555 } 556 if ((p->p_flag&SLOAD) == 0) { 557 if (runout != 0) { 558 runout = 0; 559 wakeup((caddr_t)&runout); 560 } 561 wantin++; 562 } 563 } 564 565 /* 566 * Set user priority. 567 * The rescheduling flag (runrun) 568 * is set if the priority is better 569 * than the currently running process. 570 */ 571 setpri(pp) 572 register struct proc *pp; 573 { 574 register int p; 575 576 p = (pp->p_cpu & 0377)/4; 577 p += PUSER + 2 * pp->p_nice; 578 if (pp->p_rssize > pp->p_maxrss && freemem < desfree) 579 p += 2*4; /* effectively, nice(4) */ 580 if (p > 127) 581 p = 127; 582 if (p < curpri) { 583 runrun++; 584 aston(); 585 } 586 pp->p_usrpri = p; 587 return (p); 588 } 589