1 /*- 2 * Copyright (c) 1982, 1986, 1990, 1991, 1993 3 * The Regents of the University of California. All rights reserved. 4 * All rights reserved. 5 * 6 * %sccs.include.redist.c% 7 * 8 * @(#)kern_synch.c 8.5 (Berkeley) 09/23/93 9 */ 10 11 #include <sys/param.h> 12 #include <sys/systm.h> 13 #include <sys/proc.h> 14 #include <sys/kernel.h> 15 #include <sys/buf.h> 16 #include <sys/signalvar.h> 17 #include <sys/resourcevar.h> 18 #include <sys/vmmeter.h> 19 #ifdef KTRACE 20 #include <sys/ktrace.h> 21 #endif 22 23 #include <machine/cpu.h> 24 25 u_char curpriority; /* usrpri of curproc */ 26 int lbolt; /* once a second sleep address */ 27 28 /* 29 * Force switch among equal priority processes every 100ms. 30 */ 31 /* ARGSUSED */ 32 void 33 roundrobin(arg) 34 void *arg; 35 { 36 37 need_resched(); 38 timeout(roundrobin, NULL, hz / 10); 39 } 40 41 /* 42 * Constants for digital decay and forget: 43 * 90% of (p_estcpu) usage in 5 * loadav time 44 * 95% of (p_pctcpu) usage in 60 seconds (load insensitive) 45 * Note that, as ps(1) mentions, this can let percentages 46 * total over 100% (I've seen 137.9% for 3 processes). 47 * 48 * Note that hardclock updates p_estcpu and p_cpticks independently. 49 * 50 * We wish to decay away 90% of p_estcpu in (5 * loadavg) seconds. 51 * That is, the system wants to compute a value of decay such 52 * that the following for loop: 53 * for (i = 0; i < (5 * loadavg); i++) 54 * p_estcpu *= decay; 55 * will compute 56 * p_estcpu *= 0.1; 57 * for all values of loadavg: 58 * 59 * Mathematically this loop can be expressed by saying: 60 * decay ** (5 * loadavg) ~= .1 61 * 62 * The system computes decay as: 63 * decay = (2 * loadavg) / (2 * loadavg + 1) 64 * 65 * We wish to prove that the system's computation of decay 66 * will always fulfill the equation: 67 * decay ** (5 * loadavg) ~= .1 68 * 69 * If we compute b as: 70 * b = 2 * loadavg 71 * then 72 * decay = b / (b + 1) 73 * 74 * We now need to prove two things: 75 * 1) Given factor ** (5 * loadavg) ~= .1, prove factor == b/(b+1) 76 * 2) Given b/(b+1) ** power ~= .1, prove power == (5 * loadavg) 77 * 78 * Facts: 79 * For x close to zero, exp(x) =~ 1 + x, since 80 * exp(x) = 0! + x**1/1! + x**2/2! + ... . 81 * therefore exp(-1/b) =~ 1 - (1/b) = (b-1)/b. 82 * For x close to zero, ln(1+x) =~ x, since 83 * ln(1+x) = x - x**2/2 + x**3/3 - ... -1 < x < 1 84 * therefore ln(b/(b+1)) = ln(1 - 1/(b+1)) =~ -1/(b+1). 85 * ln(.1) =~ -2.30 86 * 87 * Proof of (1): 88 * Solve (factor)**(power) =~ .1 given power (5*loadav): 89 * solving for factor, 90 * ln(factor) =~ (-2.30/5*loadav), or 91 * factor =~ exp(-1/((5/2.30)*loadav)) =~ exp(-1/(2*loadav)) = 92 * exp(-1/b) =~ (b-1)/b =~ b/(b+1). QED 93 * 94 * Proof of (2): 95 * Solve (factor)**(power) =~ .1 given factor == (b/(b+1)): 96 * solving for power, 97 * power*ln(b/(b+1)) =~ -2.30, or 98 * power =~ 2.3 * (b + 1) = 4.6*loadav + 2.3 =~ 5*loadav. QED 99 * 100 * Actual power values for the implemented algorithm are as follows: 101 * loadav: 1 2 3 4 102 * power: 5.68 10.32 14.94 19.55 103 */ 104 105 /* calculations for digital decay to forget 90% of usage in 5*loadav sec */ 106 #define loadfactor(loadav) (2 * (loadav)) 107 #define decay_cpu(loadfac, cpu) (((loadfac) * (cpu)) / ((loadfac) + FSCALE)) 108 109 /* decay 95% of `p_pctcpu' in 60 seconds; see CCPU_SHIFT before changing */ 110 fixpt_t ccpu = 0.95122942450071400909 * FSCALE; /* exp(-1/20) */ 111 112 /* 113 * If `ccpu' is not equal to `exp(-1/20)' and you still want to use the 114 * faster/more-accurate formula, you'll have to estimate CCPU_SHIFT below 115 * and possibly adjust FSHIFT in "param.h" so that (FSHIFT >= CCPU_SHIFT). 116 * 117 * To estimate CCPU_SHIFT for exp(-1/20), the following formula was used: 118 * 1 - exp(-1/20) ~= 0.0487 ~= 0.0488 == 1 (fixed pt, *11* bits). 119 * 120 * If you dont want to bother with the faster/more-accurate formula, you 121 * can set CCPU_SHIFT to (FSHIFT + 1) which will use a slower/less-accurate 122 * (more general) method of calculating the %age of CPU used by a process. 123 */ 124 #define CCPU_SHIFT 11 125 126 /* 127 * Recompute process priorities, every hz ticks. 128 */ 129 /* ARGSUSED */ 130 void 131 schedcpu(arg) 132 void *arg; 133 { 134 register fixpt_t loadfac = loadfactor(averunnable.ldavg[0]); 135 register struct proc *p; 136 register int s; 137 register unsigned int newcpu; 138 139 wakeup((caddr_t)&lbolt); 140 for (p = (struct proc *)allproc; p != NULL; p = p->p_next) { 141 /* 142 * Increment time in/out of memory and sleep time 143 * (if sleeping). We ignore overflow; with 16-bit int's 144 * (remember them?) overflow takes 45 days. 145 */ 146 p->p_swtime++; 147 if (p->p_stat == SSLEEP || p->p_stat == SSTOP) 148 p->p_slptime++; 149 p->p_pctcpu = (p->p_pctcpu * ccpu) >> FSHIFT; 150 /* 151 * If the process has slept the entire second, 152 * stop recalculating its priority until it wakes up. 153 */ 154 if (p->p_slptime > 1) 155 continue; 156 s = splstatclock(); /* prevent state changes */ 157 /* 158 * p_pctcpu is only for ps. 159 */ 160 #if (FSHIFT >= CCPU_SHIFT) 161 p->p_pctcpu += (hz == 100)? 162 ((fixpt_t) p->p_cpticks) << (FSHIFT - CCPU_SHIFT): 163 100 * (((fixpt_t) p->p_cpticks) 164 << (FSHIFT - CCPU_SHIFT)) / hz; 165 #else 166 p->p_pctcpu += ((FSCALE - ccpu) * 167 (p->p_cpticks * FSCALE / hz)) >> FSHIFT; 168 #endif 169 p->p_cpticks = 0; 170 newcpu = (u_int) decay_cpu(loadfac, p->p_estcpu) + p->p_nice; 171 p->p_estcpu = min(newcpu, UCHAR_MAX); 172 resetpriority(p); 173 if (p->p_priority >= PUSER) { 174 #define PPQ (128 / NQS) /* priorities per queue */ 175 if ((p != curproc) && 176 p->p_stat == SRUN && 177 (p->p_flag & P_INMEM) && 178 (p->p_priority / PPQ) != (p->p_usrpri / PPQ)) { 179 remrq(p); 180 p->p_priority = p->p_usrpri; 181 setrunqueue(p); 182 } else 183 p->p_priority = p->p_usrpri; 184 } 185 splx(s); 186 } 187 vmmeter(); 188 if (bclnlist != NULL) 189 wakeup((caddr_t)pageproc); 190 timeout(schedcpu, (void *)0, hz); 191 } 192 193 /* 194 * Recalculate the priority of a process after it has slept for a while. 195 * For all load averages >= 1 and max p_estcpu of 255, sleeping for at 196 * least six times the loadfactor will decay p_estcpu to zero. 197 */ 198 void 199 updatepri(p) 200 register struct proc *p; 201 { 202 register unsigned int newcpu = p->p_estcpu; 203 register fixpt_t loadfac = loadfactor(averunnable.ldavg[0]); 204 205 if (p->p_slptime > 5 * loadfac) 206 p->p_estcpu = 0; 207 else { 208 p->p_slptime--; /* the first time was done in schedcpu */ 209 while (newcpu && --p->p_slptime) 210 newcpu = (int) decay_cpu(loadfac, newcpu); 211 p->p_estcpu = min(newcpu, UCHAR_MAX); 212 } 213 resetpriority(p); 214 } 215 216 /* 217 * We're only looking at 7 bits of the address; everything is 218 * aligned to 4, lots of things are aligned to greater powers 219 * of 2. Shift right by 8, i.e. drop the bottom 256 worth. 220 */ 221 #define TABLESIZE 128 222 #define LOOKUP(x) (((int)(x) >> 8) & (TABLESIZE - 1)) 223 struct slpque { 224 struct proc *sq_head; 225 struct proc **sq_tailp; 226 } slpque[TABLESIZE]; 227 228 /* 229 * During autoconfiguration or after a panic, a sleep will simply 230 * lower the priority briefly to allow interrupts, then return. 231 * The priority to be used (safepri) is machine-dependent, thus this 232 * value is initialized and maintained in the machine-dependent layers. 233 * This priority will typically be 0, or the lowest priority 234 * that is safe for use on the interrupt stack; it can be made 235 * higher to block network software interrupts after panics. 236 */ 237 int safepri; 238 239 /* 240 * General sleep call. Suspends the current process until a wakeup is 241 * performed on the specified identifier. The process will then be made 242 * runnable with the specified priority. Sleeps at most timo/hz seconds 243 * (0 means no timeout). If pri includes PCATCH flag, signals are checked 244 * before and after sleeping, else signals are not checked. Returns 0 if 245 * awakened, EWOULDBLOCK if the timeout expires. If PCATCH is set and a 246 * signal needs to be delivered, ERESTART is returned if the current system 247 * call should be restarted if possible, and EINTR is returned if the system 248 * call should be interrupted by the signal (return EINTR). 249 */ 250 int 251 tsleep(ident, priority, wmesg, timo) 252 void *ident; 253 int priority, timo; 254 char *wmesg; 255 { 256 register struct proc *p = curproc; 257 register struct slpque *qp; 258 register s; 259 int sig, catch = priority & PCATCH; 260 extern int cold; 261 void endtsleep __P((void *)); 262 263 #ifdef KTRACE 264 if (KTRPOINT(p, KTR_CSW)) 265 ktrcsw(p->p_tracep, 1, 0); 266 #endif 267 s = splhigh(); 268 if (cold || panicstr) { 269 /* 270 * After a panic, or during autoconfiguration, 271 * just give interrupts a chance, then just return; 272 * don't run any other procs or panic below, 273 * in case this is the idle process and already asleep. 274 */ 275 splx(safepri); 276 splx(s); 277 return (0); 278 } 279 #ifdef DIAGNOSTIC 280 if (ident == NULL || p->p_stat != SRUN || p->p_back) 281 panic("tsleep"); 282 #endif 283 p->p_wchan = ident; 284 p->p_wmesg = wmesg; 285 p->p_slptime = 0; 286 p->p_priority = priority & PRIMASK; 287 qp = &slpque[LOOKUP(ident)]; 288 if (qp->sq_head == 0) 289 qp->sq_head = p; 290 else 291 *qp->sq_tailp = p; 292 *(qp->sq_tailp = &p->p_forw) = 0; 293 if (timo) 294 timeout(endtsleep, (void *)p, timo); 295 /* 296 * We put ourselves on the sleep queue and start our timeout 297 * before calling CURSIG, as we could stop there, and a wakeup 298 * or a SIGCONT (or both) could occur while we were stopped. 299 * A SIGCONT would cause us to be marked as SSLEEP 300 * without resuming us, thus we must be ready for sleep 301 * when CURSIG is called. If the wakeup happens while we're 302 * stopped, p->p_wchan will be 0 upon return from CURSIG. 303 */ 304 if (catch) { 305 p->p_flag |= P_SINTR; 306 if (sig = CURSIG(p)) { 307 if (p->p_wchan) 308 unsleep(p); 309 p->p_stat = SRUN; 310 goto resume; 311 } 312 if (p->p_wchan == 0) { 313 catch = 0; 314 goto resume; 315 } 316 } else 317 sig = 0; 318 p->p_stat = SSLEEP; 319 p->p_stats->p_ru.ru_nvcsw++; 320 mi_switch(); 321 resume: 322 curpriority = p->p_usrpri; 323 splx(s); 324 p->p_flag &= ~P_SINTR; 325 if (p->p_flag & P_TIMEOUT) { 326 p->p_flag &= ~P_TIMEOUT; 327 if (sig == 0) { 328 #ifdef KTRACE 329 if (KTRPOINT(p, KTR_CSW)) 330 ktrcsw(p->p_tracep, 0, 0); 331 #endif 332 return (EWOULDBLOCK); 333 } 334 } else if (timo) 335 untimeout(endtsleep, (void *)p); 336 if (catch && (sig != 0 || (sig = CURSIG(p)))) { 337 #ifdef KTRACE 338 if (KTRPOINT(p, KTR_CSW)) 339 ktrcsw(p->p_tracep, 0, 0); 340 #endif 341 if (p->p_sigacts->ps_sigintr & sigmask(sig)) 342 return (EINTR); 343 return (ERESTART); 344 } 345 #ifdef KTRACE 346 if (KTRPOINT(p, KTR_CSW)) 347 ktrcsw(p->p_tracep, 0, 0); 348 #endif 349 return (0); 350 } 351 352 /* 353 * Implement timeout for tsleep. 354 * If process hasn't been awakened (wchan non-zero), 355 * set timeout flag and undo the sleep. If proc 356 * is stopped, just unsleep so it will remain stopped. 357 */ 358 void 359 endtsleep(arg) 360 void *arg; 361 { 362 register struct proc *p; 363 int s; 364 365 p = (struct proc *)arg; 366 s = splhigh(); 367 if (p->p_wchan) { 368 if (p->p_stat == SSLEEP) 369 setrunnable(p); 370 else 371 unsleep(p); 372 p->p_flag |= P_TIMEOUT; 373 } 374 splx(s); 375 } 376 377 /* 378 * Short-term, non-interruptable sleep. 379 */ 380 void 381 sleep(ident, priority) 382 void *ident; 383 int priority; 384 { 385 register struct proc *p = curproc; 386 register struct slpque *qp; 387 register s; 388 extern int cold; 389 390 #ifdef DIAGNOSTIC 391 if (priority > PZERO) { 392 printf("sleep called with priority %d > PZERO, wchan: %x\n", 393 priority, ident); 394 panic("old sleep"); 395 } 396 #endif 397 s = splhigh(); 398 if (cold || panicstr) { 399 /* 400 * After a panic, or during autoconfiguration, 401 * just give interrupts a chance, then just return; 402 * don't run any other procs or panic below, 403 * in case this is the idle process and already asleep. 404 */ 405 splx(safepri); 406 splx(s); 407 return; 408 } 409 #ifdef DIAGNOSTIC 410 if (ident == NULL || p->p_stat != SRUN || p->p_back) 411 panic("sleep"); 412 #endif 413 p->p_wchan = ident; 414 p->p_wmesg = NULL; 415 p->p_slptime = 0; 416 p->p_priority = priority; 417 qp = &slpque[LOOKUP(ident)]; 418 if (qp->sq_head == 0) 419 qp->sq_head = p; 420 else 421 *qp->sq_tailp = p; 422 *(qp->sq_tailp = &p->p_forw) = 0; 423 p->p_stat = SSLEEP; 424 p->p_stats->p_ru.ru_nvcsw++; 425 #ifdef KTRACE 426 if (KTRPOINT(p, KTR_CSW)) 427 ktrcsw(p->p_tracep, 1, 0); 428 #endif 429 mi_switch(); 430 #ifdef KTRACE 431 if (KTRPOINT(p, KTR_CSW)) 432 ktrcsw(p->p_tracep, 0, 0); 433 #endif 434 curpriority = p->p_usrpri; 435 splx(s); 436 } 437 438 /* 439 * Remove a process from its wait queue 440 */ 441 void 442 unsleep(p) 443 register struct proc *p; 444 { 445 register struct slpque *qp; 446 register struct proc **hp; 447 int s; 448 449 s = splhigh(); 450 if (p->p_wchan) { 451 hp = &(qp = &slpque[LOOKUP(p->p_wchan)])->sq_head; 452 while (*hp != p) 453 hp = &(*hp)->p_forw; 454 *hp = p->p_forw; 455 if (qp->sq_tailp == &p->p_forw) 456 qp->sq_tailp = hp; 457 p->p_wchan = 0; 458 } 459 splx(s); 460 } 461 462 /* 463 * Make all processes sleeping on the specified identifier runnable. 464 */ 465 void 466 wakeup(ident) 467 register void *ident; 468 { 469 register struct slpque *qp; 470 register struct proc *p, **q; 471 int s; 472 473 s = splhigh(); 474 qp = &slpque[LOOKUP(ident)]; 475 restart: 476 for (q = &qp->sq_head; p = *q; ) { 477 #ifdef DIAGNOSTIC 478 if (p->p_back || p->p_stat != SSLEEP && p->p_stat != SSTOP) 479 panic("wakeup"); 480 #endif 481 if (p->p_wchan == ident) { 482 p->p_wchan = 0; 483 *q = p->p_forw; 484 if (qp->sq_tailp == &p->p_forw) 485 qp->sq_tailp = q; 486 if (p->p_stat == SSLEEP) { 487 /* OPTIMIZED EXPANSION OF setrunnable(p); */ 488 if (p->p_slptime > 1) 489 updatepri(p); 490 p->p_slptime = 0; 491 p->p_stat = SRUN; 492 if (p->p_flag & P_INMEM) 493 setrunqueue(p); 494 /* 495 * Since curpriority is a user priority, 496 * p->p_priority is always better than 497 * curpriority. 498 */ 499 if ((p->p_flag & P_INMEM) == 0) 500 wakeup((caddr_t)&proc0); 501 else 502 need_resched(); 503 /* END INLINE EXPANSION */ 504 goto restart; 505 } 506 } else 507 q = &p->p_forw; 508 } 509 splx(s); 510 } 511 512 /* 513 * The machine independent parts of mi_switch(). 514 * Must be called at splstatclock() or higher. 515 */ 516 void 517 mi_switch() 518 { 519 register struct proc *p = curproc; /* XXX */ 520 register struct rlimit *rlim; 521 register long s, u; 522 struct timeval tv; 523 524 /* 525 * Compute the amount of time during which the current 526 * process was running, and add that to its total so far. 527 */ 528 microtime(&tv); 529 u = p->p_rtime.tv_usec + (tv.tv_usec - runtime.tv_usec); 530 s = p->p_rtime.tv_sec + (tv.tv_sec - runtime.tv_sec); 531 if (u < 0) { 532 u += 1000000; 533 s--; 534 } else if (u >= 1000000) { 535 u -= 1000000; 536 s++; 537 } 538 p->p_rtime.tv_usec = u; 539 p->p_rtime.tv_sec = s; 540 541 /* 542 * Check if the process exceeds its cpu resource allocation. 543 * If over max, kill it. In any case, if it has run for more 544 * than 10 minutes, reduce priority to give others a chance. 545 */ 546 rlim = &p->p_rlimit[RLIMIT_CPU]; 547 if (s >= rlim->rlim_cur) { 548 if (s >= rlim->rlim_max) 549 psignal(p, SIGKILL); 550 else { 551 psignal(p, SIGXCPU); 552 if (rlim->rlim_cur < rlim->rlim_max) 553 rlim->rlim_cur += 5; 554 } 555 } 556 if (s > 10 * 60 && p->p_ucred->cr_uid && p->p_nice == NZERO) { 557 p->p_nice = NZERO + 4; 558 resetpriority(p); 559 } 560 561 /* 562 * Pick a new current process and record its start time. 563 */ 564 cnt.v_swtch++; 565 cpu_switch(p); 566 microtime(&runtime); 567 } 568 569 /* 570 * Initialize the (doubly-linked) run queues 571 * to be empty. 572 */ 573 rqinit() 574 { 575 register int i; 576 577 for (i = 0; i < NQS; i++) 578 qs[i].ph_link = qs[i].ph_rlink = (struct proc *)&qs[i]; 579 } 580 581 /* 582 * Change process state to be runnable, 583 * placing it on the run queue if it is in memory, 584 * and awakening the swapper if it isn't in memory. 585 */ 586 void 587 setrunnable(p) 588 register struct proc *p; 589 { 590 register int s; 591 592 s = splhigh(); 593 switch (p->p_stat) { 594 case 0: 595 case SRUN: 596 case SZOMB: 597 default: 598 panic("setrunnable"); 599 case SSTOP: 600 case SSLEEP: 601 unsleep(p); /* e.g. when sending signals */ 602 break; 603 604 case SIDL: 605 break; 606 } 607 p->p_stat = SRUN; 608 if (p->p_flag & P_INMEM) 609 setrunqueue(p); 610 splx(s); 611 if (p->p_slptime > 1) 612 updatepri(p); 613 p->p_slptime = 0; 614 if ((p->p_flag & P_INMEM) == 0) 615 wakeup((caddr_t)&proc0); 616 else if (p->p_priority < curpriority) 617 need_resched(); 618 } 619 620 /* 621 * Compute the priority of a process when running in user mode. 622 * Arrange to reschedule if the resulting priority is better 623 * than that of the current process. 624 */ 625 void 626 resetpriority(p) 627 register struct proc *p; 628 { 629 register unsigned int newpriority; 630 631 newpriority = PUSER + p->p_estcpu / 4 + 2 * p->p_nice; 632 newpriority = min(newpriority, MAXPRI); 633 p->p_usrpri = newpriority; 634 if (newpriority < curpriority) 635 need_resched(); 636 } 637