1 /*- 2 * Copyright (c) 1982, 1986, 1990, 1991, 1993 3 * The Regents of the University of California. All rights reserved. 4 * All rights reserved. 5 * 6 * %sccs.include.redist.c% 7 * 8 * @(#)kern_synch.c 8.2 (Berkeley) 09/05/93 9 */ 10 11 #include <sys/param.h> 12 #include <sys/systm.h> 13 #include <sys/proc.h> 14 #include <sys/kernel.h> 15 #include <sys/buf.h> 16 #include <sys/signalvar.h> 17 #include <sys/resourcevar.h> 18 #include <sys/vmmeter.h> 19 #ifdef KTRACE 20 #include <sys/ktrace.h> 21 #endif 22 23 #include <machine/cpu.h> 24 25 u_char curpriority; /* usrpri of curproc */ 26 int lbolt; /* once a second sleep address */ 27 28 /* 29 * Force switch among equal priority processes every 100ms. 30 */ 31 /* ARGSUSED */ 32 void 33 roundrobin(arg) 34 void *arg; 35 { 36 37 need_resched(); 38 timeout(roundrobin, NULL, hz / 10); 39 } 40 41 /* 42 * constants for digital decay and forget 43 * 90% of (p_cpu) usage in 5*loadav time 44 * 95% of (p_pctcpu) usage in 60 seconds (load insensitive) 45 * Note that, as ps(1) mentions, this can let percentages 46 * total over 100% (I've seen 137.9% for 3 processes). 47 * 48 * Note that hardclock updates p_cpu and p_cpticks independently. 49 * 50 * We wish to decay away 90% of p_cpu in (5 * loadavg) seconds. 51 * That is, the system wants to compute a value of decay such 52 * that the following for loop: 53 * for (i = 0; i < (5 * loadavg); i++) 54 * p_cpu *= decay; 55 * will compute 56 * p_cpu *= 0.1; 57 * for all values of loadavg: 58 * 59 * Mathematically this loop can be expressed by saying: 60 * decay ** (5 * loadavg) ~= .1 61 * 62 * The system computes decay as: 63 * decay = (2 * loadavg) / (2 * loadavg + 1) 64 * 65 * We wish to prove that the system's computation of decay 66 * will always fulfill the equation: 67 * decay ** (5 * loadavg) ~= .1 68 * 69 * If we compute b as: 70 * b = 2 * loadavg 71 * then 72 * decay = b / (b + 1) 73 * 74 * We now need to prove two things: 75 * 1) Given factor ** (5 * loadavg) ~= .1, prove factor == b/(b+1) 76 * 2) Given b/(b+1) ** power ~= .1, prove power == (5 * loadavg) 77 * 78 * Facts: 79 * For x close to zero, exp(x) =~ 1 + x, since 80 * exp(x) = 0! + x**1/1! + x**2/2! + ... . 81 * therefore exp(-1/b) =~ 1 - (1/b) = (b-1)/b. 82 * For x close to zero, ln(1+x) =~ x, since 83 * ln(1+x) = x - x**2/2 + x**3/3 - ... -1 < x < 1 84 * therefore ln(b/(b+1)) = ln(1 - 1/(b+1)) =~ -1/(b+1). 85 * ln(.1) =~ -2.30 86 * 87 * Proof of (1): 88 * Solve (factor)**(power) =~ .1 given power (5*loadav): 89 * solving for factor, 90 * ln(factor) =~ (-2.30/5*loadav), or 91 * factor =~ exp(-1/((5/2.30)*loadav)) =~ exp(-1/(2*loadav)) = 92 * exp(-1/b) =~ (b-1)/b =~ b/(b+1). QED 93 * 94 * Proof of (2): 95 * Solve (factor)**(power) =~ .1 given factor == (b/(b+1)): 96 * solving for power, 97 * power*ln(b/(b+1)) =~ -2.30, or 98 * power =~ 2.3 * (b + 1) = 4.6*loadav + 2.3 =~ 5*loadav. QED 99 * 100 * Actual power values for the implemented algorithm are as follows: 101 * loadav: 1 2 3 4 102 * power: 5.68 10.32 14.94 19.55 103 */ 104 105 /* calculations for digital decay to forget 90% of usage in 5*loadav sec */ 106 #define loadfactor(loadav) (2 * (loadav)) 107 #define decay_cpu(loadfac, cpu) (((loadfac) * (cpu)) / ((loadfac) + FSCALE)) 108 109 /* decay 95% of `p_pctcpu' in 60 seconds; see CCPU_SHIFT before changing */ 110 fixpt_t ccpu = 0.95122942450071400909 * FSCALE; /* exp(-1/20) */ 111 112 /* 113 * If `ccpu' is not equal to `exp(-1/20)' and you still want to use the 114 * faster/more-accurate formula, you'll have to estimate CCPU_SHIFT below 115 * and possibly adjust FSHIFT in "param.h" so that (FSHIFT >= CCPU_SHIFT). 116 * 117 * To estimate CCPU_SHIFT for exp(-1/20), the following formula was used: 118 * 1 - exp(-1/20) ~= 0.0487 ~= 0.0488 == 1 (fixed pt, *11* bits). 119 * 120 * If you dont want to bother with the faster/more-accurate formula, you 121 * can set CCPU_SHIFT to (FSHIFT + 1) which will use a slower/less-accurate 122 * (more general) method of calculating the %age of CPU used by a process. 123 */ 124 #define CCPU_SHIFT 11 125 126 /* 127 * Recompute process priorities, every hz ticks. 128 */ 129 /* ARGSUSED */ 130 void 131 schedcpu(arg) 132 void *arg; 133 { 134 register fixpt_t loadfac = loadfactor(averunnable.ldavg[0]); 135 register struct proc *p; 136 register int s; 137 register unsigned int newcpu; 138 139 wakeup((caddr_t)&lbolt); 140 for (p = (struct proc *)allproc; p != NULL; p = p->p_nxt) { 141 /* 142 * Increment time in/out of memory and sleep time 143 * (if sleeping). We ignore overflow; with 16-bit int's 144 * (remember them?) overflow takes 45 days. 145 */ 146 p->p_time++; 147 if (p->p_stat == SSLEEP || p->p_stat == SSTOP) 148 p->p_slptime++; 149 p->p_pctcpu = (p->p_pctcpu * ccpu) >> FSHIFT; 150 /* 151 * If the process has slept the entire second, 152 * stop recalculating its priority until it wakes up. 153 */ 154 if (p->p_slptime > 1) 155 continue; 156 s = splstatclock(); /* prevent state changes */ 157 /* 158 * p_pctcpu is only for ps. 159 */ 160 #if (FSHIFT >= CCPU_SHIFT) 161 p->p_pctcpu += (hz == 100)? 162 ((fixpt_t) p->p_cpticks) << (FSHIFT - CCPU_SHIFT): 163 100 * (((fixpt_t) p->p_cpticks) 164 << (FSHIFT - CCPU_SHIFT)) / hz; 165 #else 166 p->p_pctcpu += ((FSCALE - ccpu) * 167 (p->p_cpticks * FSCALE / hz)) >> FSHIFT; 168 #endif 169 p->p_cpticks = 0; 170 newcpu = (u_int) decay_cpu(loadfac, p->p_cpu) + p->p_nice; 171 p->p_cpu = min(newcpu, UCHAR_MAX); 172 resetpriority(p); 173 if (p->p_pri >= PUSER) { 174 #define PPQ (128 / NQS) /* priorities per queue */ 175 if ((p != curproc) && 176 p->p_stat == SRUN && 177 (p->p_flag & SLOAD) && 178 (p->p_pri / PPQ) != (p->p_usrpri / PPQ)) { 179 remrq(p); 180 p->p_pri = p->p_usrpri; 181 setrq(p); 182 } else 183 p->p_pri = p->p_usrpri; 184 } 185 splx(s); 186 } 187 vmmeter(); 188 if (bclnlist != NULL) 189 wakeup((caddr_t)pageproc); 190 timeout(schedcpu, (void *)0, hz); 191 } 192 193 /* 194 * Recalculate the priority of a process after it has slept for a while. 195 * For all load averages >= 1 and max p_cpu of 255, sleeping for at least 196 * six times the loadfactor will decay p_cpu to zero. 197 */ 198 void 199 updatepri(p) 200 register struct proc *p; 201 { 202 register unsigned int newcpu = p->p_cpu; 203 register fixpt_t loadfac = loadfactor(averunnable.ldavg[0]); 204 205 if (p->p_slptime > 5 * loadfac) 206 p->p_cpu = 0; 207 else { 208 p->p_slptime--; /* the first time was done in schedcpu */ 209 while (newcpu && --p->p_slptime) 210 newcpu = (int) decay_cpu(loadfac, newcpu); 211 p->p_cpu = min(newcpu, UCHAR_MAX); 212 } 213 resetpriority(p); 214 } 215 216 #define SQSIZE 0100 /* Must be power of 2 */ 217 #define HASH(x) (( (int) x >> 5) & (SQSIZE-1)) 218 struct slpque { 219 struct proc *sq_head; 220 struct proc **sq_tailp; 221 } slpque[SQSIZE]; 222 223 /* 224 * During autoconfiguration or after a panic, a sleep will simply 225 * lower the priority briefly to allow interrupts, then return. 226 * The priority to be used (safepri) is machine-dependent, thus this 227 * value is initialized and maintained in the machine-dependent layers. 228 * This priority will typically be 0, or the lowest priority 229 * that is safe for use on the interrupt stack; it can be made 230 * higher to block network software interrupts after panics. 231 */ 232 int safepri; 233 234 /* 235 * General sleep call. Suspends the current process until a wakeup is 236 * performed on the specified identifier. The process will then be made 237 * runnable with the specified priority. Sleeps at most timo/hz seconds 238 * (0 means no timeout). If pri includes PCATCH flag, signals are checked 239 * before and after sleeping, else signals are not checked. Returns 0 if 240 * awakened, EWOULDBLOCK if the timeout expires. If PCATCH is set and a 241 * signal needs to be delivered, ERESTART is returned if the current system 242 * call should be restarted if possible, and EINTR is returned if the system 243 * call should be interrupted by the signal (return EINTR). 244 */ 245 int 246 tsleep(ident, priority, wmesg, timo) 247 void *ident; 248 int priority, timo; 249 char *wmesg; 250 { 251 register struct proc *p = curproc; 252 register struct slpque *qp; 253 register s; 254 int sig, catch = priority & PCATCH; 255 extern int cold; 256 void endtsleep __P((void *)); 257 258 #ifdef KTRACE 259 if (KTRPOINT(p, KTR_CSW)) 260 ktrcsw(p->p_tracep, 1, 0); 261 #endif 262 s = splhigh(); 263 if (cold || panicstr) { 264 /* 265 * After a panic, or during autoconfiguration, 266 * just give interrupts a chance, then just return; 267 * don't run any other procs or panic below, 268 * in case this is the idle process and already asleep. 269 */ 270 splx(safepri); 271 splx(s); 272 return (0); 273 } 274 #ifdef DIAGNOSTIC 275 if (ident == NULL || p->p_stat != SRUN || p->p_rlink) 276 panic("tsleep"); 277 #endif 278 p->p_wchan = ident; 279 p->p_wmesg = wmesg; 280 p->p_slptime = 0; 281 p->p_pri = priority & PRIMASK; 282 qp = &slpque[HASH(ident)]; 283 if (qp->sq_head == 0) 284 qp->sq_head = p; 285 else 286 *qp->sq_tailp = p; 287 *(qp->sq_tailp = &p->p_link) = 0; 288 if (timo) 289 timeout(endtsleep, (void *)p, timo); 290 /* 291 * We put ourselves on the sleep queue and start our timeout 292 * before calling CURSIG, as we could stop there, and a wakeup 293 * or a SIGCONT (or both) could occur while we were stopped. 294 * A SIGCONT would cause us to be marked as SSLEEP 295 * without resuming us, thus we must be ready for sleep 296 * when CURSIG is called. If the wakeup happens while we're 297 * stopped, p->p_wchan will be 0 upon return from CURSIG. 298 */ 299 if (catch) { 300 p->p_flag |= SSINTR; 301 if (sig = CURSIG(p)) { 302 if (p->p_wchan) 303 unsleep(p); 304 p->p_stat = SRUN; 305 goto resume; 306 } 307 if (p->p_wchan == 0) { 308 catch = 0; 309 goto resume; 310 } 311 } else 312 sig = 0; 313 p->p_stat = SSLEEP; 314 p->p_stats->p_ru.ru_nvcsw++; 315 swtch(); 316 resume: 317 curpriority = p->p_usrpri; 318 splx(s); 319 p->p_flag &= ~SSINTR; 320 if (p->p_flag & STIMO) { 321 p->p_flag &= ~STIMO; 322 if (sig == 0) { 323 #ifdef KTRACE 324 if (KTRPOINT(p, KTR_CSW)) 325 ktrcsw(p->p_tracep, 0, 0); 326 #endif 327 return (EWOULDBLOCK); 328 } 329 } else if (timo) 330 untimeout(endtsleep, (void *)p); 331 if (catch && (sig != 0 || (sig = CURSIG(p)))) { 332 #ifdef KTRACE 333 if (KTRPOINT(p, KTR_CSW)) 334 ktrcsw(p->p_tracep, 0, 0); 335 #endif 336 if (p->p_sigacts->ps_sigintr & sigmask(sig)) 337 return (EINTR); 338 return (ERESTART); 339 } 340 #ifdef KTRACE 341 if (KTRPOINT(p, KTR_CSW)) 342 ktrcsw(p->p_tracep, 0, 0); 343 #endif 344 return (0); 345 } 346 347 /* 348 * Implement timeout for tsleep. 349 * If process hasn't been awakened (wchan non-zero), 350 * set timeout flag and undo the sleep. If proc 351 * is stopped, just unsleep so it will remain stopped. 352 */ 353 void 354 endtsleep(arg) 355 void *arg; 356 { 357 register struct proc *p; 358 int s; 359 360 p = (struct proc *)arg; 361 s = splhigh(); 362 if (p->p_wchan) { 363 if (p->p_stat == SSLEEP) 364 setrun(p); 365 else 366 unsleep(p); 367 p->p_flag |= STIMO; 368 } 369 splx(s); 370 } 371 372 /* 373 * Short-term, non-interruptable sleep. 374 */ 375 void 376 sleep(ident, priority) 377 void *ident; 378 int priority; 379 { 380 register struct proc *p = curproc; 381 register struct slpque *qp; 382 register s; 383 extern int cold; 384 385 #ifdef DIAGNOSTIC 386 if (priority > PZERO) { 387 printf("sleep called with priority %d > PZERO, wchan: %x\n", 388 priority, ident); 389 panic("old sleep"); 390 } 391 #endif 392 s = splhigh(); 393 if (cold || panicstr) { 394 /* 395 * After a panic, or during autoconfiguration, 396 * just give interrupts a chance, then just return; 397 * don't run any other procs or panic below, 398 * in case this is the idle process and already asleep. 399 */ 400 splx(safepri); 401 splx(s); 402 return; 403 } 404 #ifdef DIAGNOSTIC 405 if (ident == NULL || p->p_stat != SRUN || p->p_rlink) 406 panic("sleep"); 407 #endif 408 p->p_wchan = ident; 409 p->p_wmesg = NULL; 410 p->p_slptime = 0; 411 p->p_pri = priority; 412 qp = &slpque[HASH(ident)]; 413 if (qp->sq_head == 0) 414 qp->sq_head = p; 415 else 416 *qp->sq_tailp = p; 417 *(qp->sq_tailp = &p->p_link) = 0; 418 p->p_stat = SSLEEP; 419 p->p_stats->p_ru.ru_nvcsw++; 420 #ifdef KTRACE 421 if (KTRPOINT(p, KTR_CSW)) 422 ktrcsw(p->p_tracep, 1, 0); 423 #endif 424 swtch(); 425 #ifdef KTRACE 426 if (KTRPOINT(p, KTR_CSW)) 427 ktrcsw(p->p_tracep, 0, 0); 428 #endif 429 curpriority = p->p_usrpri; 430 splx(s); 431 } 432 433 /* 434 * Remove a process from its wait queue 435 */ 436 void 437 unsleep(p) 438 register struct proc *p; 439 { 440 register struct slpque *qp; 441 register struct proc **hp; 442 int s; 443 444 s = splhigh(); 445 if (p->p_wchan) { 446 hp = &(qp = &slpque[HASH(p->p_wchan)])->sq_head; 447 while (*hp != p) 448 hp = &(*hp)->p_link; 449 *hp = p->p_link; 450 if (qp->sq_tailp == &p->p_link) 451 qp->sq_tailp = hp; 452 p->p_wchan = 0; 453 } 454 splx(s); 455 } 456 457 /* 458 * Make all processes sleeping on the specified identifier runnable. 459 */ 460 void 461 wakeup(ident) 462 register void *ident; 463 { 464 register struct slpque *qp; 465 register struct proc *p, **q; 466 int s; 467 468 s = splhigh(); 469 qp = &slpque[HASH(ident)]; 470 restart: 471 for (q = &qp->sq_head; p = *q; ) { 472 #ifdef DIAGNOSTIC 473 if (p->p_rlink || p->p_stat != SSLEEP && p->p_stat != SSTOP) 474 panic("wakeup"); 475 #endif 476 if (p->p_wchan == ident) { 477 p->p_wchan = 0; 478 *q = p->p_link; 479 if (qp->sq_tailp == &p->p_link) 480 qp->sq_tailp = q; 481 if (p->p_stat == SSLEEP) { 482 /* OPTIMIZED INLINE EXPANSION OF setrun(p) */ 483 if (p->p_slptime > 1) 484 updatepri(p); 485 p->p_slptime = 0; 486 p->p_stat = SRUN; 487 if (p->p_flag & SLOAD) 488 setrq(p); 489 /* 490 * Since curpriority is a user priority, 491 * p->p_pri is always better than curpriority. 492 */ 493 if ((p->p_flag&SLOAD) == 0) 494 wakeup((caddr_t)&proc0); 495 else 496 need_resched(); 497 /* END INLINE EXPANSION */ 498 goto restart; 499 } 500 } else 501 q = &p->p_link; 502 } 503 splx(s); 504 } 505 506 /* 507 * The machine independent parts of swtch(). 508 * Must be called at splstatclock() or higher. 509 */ 510 void 511 swtch() 512 { 513 register struct proc *p = curproc; /* XXX */ 514 register struct rlimit *rlim; 515 register long s, u; 516 struct timeval tv; 517 518 /* 519 * Compute the amount of time during which the current 520 * process was running, and add that to its total so far. 521 */ 522 microtime(&tv); 523 u = p->p_rtime.tv_usec + (tv.tv_usec - runtime.tv_usec); 524 s = p->p_rtime.tv_sec + (tv.tv_sec - runtime.tv_sec); 525 if (u < 0) { 526 u += 1000000; 527 s--; 528 } else if (u >= 1000000) { 529 u -= 1000000; 530 s++; 531 } 532 p->p_rtime.tv_usec = u; 533 p->p_rtime.tv_sec = s; 534 535 /* 536 * Check if the process exceeds its cpu resource allocation. 537 * If over max, kill it. In any case, if it has run for more 538 * than 10 minutes, reduce priority to give others a chance. 539 */ 540 rlim = &p->p_rlimit[RLIMIT_CPU]; 541 if (s >= rlim->rlim_cur) { 542 if (s >= rlim->rlim_max) 543 psignal(p, SIGKILL); 544 else { 545 psignal(p, SIGXCPU); 546 if (rlim->rlim_cur < rlim->rlim_max) 547 rlim->rlim_cur += 5; 548 } 549 } 550 if (s > 10 * 60 && p->p_ucred->cr_uid && p->p_nice == NZERO) { 551 p->p_nice = NZERO + 4; 552 resetpriority(p); 553 } 554 555 /* 556 * Pick a new current process and record its start time. 557 */ 558 cnt.v_swtch++; 559 cpu_swtch(p); 560 microtime(&runtime); 561 } 562 563 /* 564 * Initialize the (doubly-linked) run queues 565 * to be empty. 566 */ 567 rqinit() 568 { 569 register int i; 570 571 for (i = 0; i < NQS; i++) 572 qs[i].ph_link = qs[i].ph_rlink = (struct proc *)&qs[i]; 573 } 574 575 /* 576 * Change process state to be runnable, 577 * placing it on the run queue if it is in memory, 578 * and awakening the swapper if it isn't in memory. 579 */ 580 void 581 setrun(p) 582 register struct proc *p; 583 { 584 register int s; 585 586 s = splhigh(); 587 switch (p->p_stat) { 588 589 case 0: 590 case SWAIT: 591 case SRUN: 592 case SZOMB: 593 default: 594 panic("setrun"); 595 596 case SSTOP: 597 case SSLEEP: 598 unsleep(p); /* e.g. when sending signals */ 599 break; 600 601 case SIDL: 602 break; 603 } 604 p->p_stat = SRUN; 605 if (p->p_flag & SLOAD) 606 setrq(p); 607 splx(s); 608 if (p->p_slptime > 1) 609 updatepri(p); 610 p->p_slptime = 0; 611 if ((p->p_flag&SLOAD) == 0) 612 wakeup((caddr_t)&proc0); 613 else if (p->p_pri < curpriority) 614 need_resched(); 615 } 616 617 /* 618 * Compute the priority of a process when running in user mode. 619 * Arrange to reschedule if the resulting priority is better 620 * than that of the current process. 621 */ 622 void 623 resetpriority(p) 624 register struct proc *p; 625 { 626 register unsigned int newpriority; 627 628 newpriority = PUSER + p->p_cpu / 4 + 2 * p->p_nice; 629 newpriority = min(newpriority, MAXPRI); 630 p->p_usrpri = newpriority; 631 if (newpriority < curpriority) 632 need_resched(); 633 } 634