123376Smckusick /* 229096Smckusick * Copyright (c) 1982, 1986 Regents of the University of California. 323376Smckusick * All rights reserved. The Berkeley software License Agreement 423376Smckusick * specifies the terms and conditions for redistribution. 523376Smckusick * 6*38164Smckusick * @(#)kern_synch.c 7.9 (Berkeley) 05/29/89 723376Smckusick */ 833Sbill 937495Smckusick #include "machine/pte.h" 1037495Smckusick #include "machine/psl.h" 1137495Smckusick #include "machine/mtpr.h" 129756Ssam 1317093Sbloom #include "param.h" 1417093Sbloom #include "systm.h" 1517093Sbloom #include "user.h" 1617093Sbloom #include "proc.h" 1717093Sbloom #include "vm.h" 1817093Sbloom #include "kernel.h" 1917093Sbloom #include "buf.h" 209756Ssam 218102Sroot /* 228102Sroot * Force switch among equal priority processes every 100ms. 238102Sroot */ 248102Sroot roundrobin() 258102Sroot { 268102Sroot 278102Sroot runrun++; 288102Sroot aston(); 298624Sroot timeout(roundrobin, (caddr_t)0, hz / 10); 308102Sroot } 318102Sroot 3232908Smckusick /* 3332908Smckusick * constants for digital decay and forget 3432908Smckusick * 90% of (p_cpu) usage in 5*loadav time 3532908Smckusick * 95% of (p_pctcpu) usage in 60 seconds (load insensitive) 3632908Smckusick * Note that, as ps(1) mentions, this can let percentages 3732908Smckusick * total over 100% (I've seen 137.9% for 3 processes). 3832908Smckusick * 3932908Smckusick * Note that hardclock updates p_cpu and p_cpticks independently. 4032908Smckusick * 4132908Smckusick * We wish to decay away 90% of p_cpu in (5 * loadavg) seconds. 4232908Smckusick * That is, the system wants to compute a value of decay such 4332908Smckusick * that the following for loop: 4432908Smckusick * for (i = 0; i < (5 * loadavg); i++) 4532908Smckusick * p_cpu *= decay; 4632908Smckusick * will compute 4732908Smckusick * p_cpu *= 0.1; 4832908Smckusick * for all values of loadavg: 4932908Smckusick * 5032908Smckusick * Mathematically this loop can be expressed by saying: 5132908Smckusick * decay ** (5 * loadavg) ~= .1 5232908Smckusick * 5332908Smckusick * The system computes decay as: 5432908Smckusick * decay = (2 * loadavg) / (2 * loadavg + 1) 5532908Smckusick * 5632908Smckusick * We wish to prove that the system's computation of decay 5732908Smckusick * will always fulfill the equation: 5832908Smckusick * decay ** (5 * loadavg) ~= .1 5932908Smckusick * 6032908Smckusick * If we compute b as: 6132908Smckusick * b = 2 * loadavg 6232908Smckusick * then 6332908Smckusick * decay = b / (b + 1) 6432908Smckusick * 6532908Smckusick * We now need to prove two things: 6632908Smckusick * 1) Given factor ** (5 * loadavg) ~= .1, prove factor == b/(b+1) 6732908Smckusick * 2) Given b/(b+1) ** power ~= .1, prove power == (5 * loadavg) 6832908Smckusick * 6932908Smckusick * Facts: 7032908Smckusick * For x close to zero, exp(x) =~ 1 + x, since 7132908Smckusick * exp(x) = 0! + x**1/1! + x**2/2! + ... . 7232908Smckusick * therefore exp(-1/b) =~ 1 - (1/b) = (b-1)/b. 7332908Smckusick * For x close to zero, ln(1+x) =~ x, since 7432908Smckusick * ln(1+x) = x - x**2/2 + x**3/3 - ... -1 < x < 1 7532908Smckusick * therefore ln(b/(b+1)) = ln(1 - 1/(b+1)) =~ -1/(b+1). 7632908Smckusick * ln(.1) =~ -2.30 7732908Smckusick * 7832908Smckusick * Proof of (1): 7932908Smckusick * Solve (factor)**(power) =~ .1 given power (5*loadav): 8032908Smckusick * solving for factor, 8132908Smckusick * ln(factor) =~ (-2.30/5*loadav), or 8232908Smckusick * factor =~ exp(-1/((5/2.30)*loadav) =~ exp(-1/(2*loadav)) = 8332908Smckusick * exp(-1/b) =~ (b-1)/b =~ b/(b+1). QED 8432908Smckusick * 8532908Smckusick * Proof of (2): 8632908Smckusick * Solve (factor)**(power) =~ .1 given factor == (b/(b+1)): 8732908Smckusick * solving for power, 8832908Smckusick * power*ln(b/(b+1)) =~ -2.30, or 8932908Smckusick * power =~ 2.3 * (b + 1) = 4.6*loadav + 2.3 =~ 5*loadav. QED 9032908Smckusick * 9132908Smckusick * Actual power values for the implemented algorithm are as follows: 9232908Smckusick * loadav: 1 2 3 4 9332908Smckusick * power: 5.68 10.32 14.94 19.55 9432908Smckusick */ 9517541Skarels 96*38164Smckusick /* calculations for digital decay to forget 90% of usage in 5*loadav sec */ 97*38164Smckusick #define get_b(loadav) (2 * (loadav)) 98*38164Smckusick #define get_pcpu(b, cpu) (((b) * ((cpu) & 0377)) / ((b) + FSCALE)) 998102Sroot 100*38164Smckusick /* decay 95% of `p_pctcpu' in 60 seconds; see CCPU_SHIFT before changing */ 101*38164Smckusick fixpt_t ccpu = 0.95122942450071400909 * FSCALE; /* exp(-1/20) */ 102*38164Smckusick 1038102Sroot /* 104*38164Smckusick * If `ccpu' is not equal to `exp(-1/20)' and you still want to use the 105*38164Smckusick * faster/more-accurate formula, you'll have to estimate CCPU_SHIFT below 106*38164Smckusick * and possibly adjust FSHIFT in "param.h" so that (FSHIFT >= CCPU_SHIFT). 107*38164Smckusick * 108*38164Smckusick * To estimate CCPU_SHIFT for exp(-1/20), the following formula was used: 109*38164Smckusick * 1 - exp(-1/20) ~= 0.0487 ~= 0.0488 == 1 (fixed pt, *11* bits). 110*38164Smckusick * 111*38164Smckusick * If you dont want to bother with the faster/more-accurate formula, you 112*38164Smckusick * can set CCPU_SHIFT to (FSHIFT + 1) which will use a slower/less-accurate 113*38164Smckusick * (more general) method of calculating the %age of CPU used by a process. 114*38164Smckusick */ 115*38164Smckusick #define CCPU_SHIFT 11 116*38164Smckusick 117*38164Smckusick /* 1188102Sroot * Recompute process priorities, once a second 1198102Sroot */ 1208102Sroot schedcpu() 1218102Sroot { 122*38164Smckusick register fixpt_t b = get_b(averunnable[0]); 1238102Sroot register struct proc *p; 1248102Sroot register int s, a; 1258102Sroot 1268102Sroot wakeup((caddr_t)&lbolt); 12716532Skarels for (p = allproc; p != NULL; p = p->p_nxt) { 1288102Sroot if (p->p_time != 127) 1298102Sroot p->p_time++; 1308102Sroot if (p->p_stat==SSLEEP || p->p_stat==SSTOP) 1318102Sroot if (p->p_slptime != 127) 1328102Sroot p->p_slptime++; 133*38164Smckusick p->p_pctcpu = (p->p_pctcpu * ccpu) >> FSHIFT; 13417541Skarels /* 13517541Skarels * If the process has slept the entire second, 13617541Skarels * stop recalculating its priority until it wakes up. 13717541Skarels */ 138*38164Smckusick if (p->p_slptime > 1) 13917541Skarels continue; 14017541Skarels /* 14117541Skarels * p_pctcpu is only for ps. 14217541Skarels */ 143*38164Smckusick #if (FSHIFT >= CCPU_SHIFT) 144*38164Smckusick p->p_pctcpu += (hz == 100)? 145*38164Smckusick ((fixpt_t) p->p_cpticks) << (FSHIFT - CCPU_SHIFT): 146*38164Smckusick 100 * (((fixpt_t) p->p_cpticks) 147*38164Smckusick << (FSHIFT - CCPU_SHIFT)) / hz; 148*38164Smckusick #else 149*38164Smckusick p->p_pctcpu += ((FSCALE - ccpu) * 150*38164Smckusick (p->p_cpticks * FSCALE / hz)) >> FSHIFT; 151*38164Smckusick #endif 1528102Sroot p->p_cpticks = 0; 153*38164Smckusick a = (int) get_pcpu(b, p->p_cpu) + p->p_nice; 1548102Sroot if (a < 0) 1558102Sroot a = 0; 1568102Sroot if (a > 255) 1578102Sroot a = 255; 1588102Sroot p->p_cpu = a; 1598102Sroot (void) setpri(p); 16017541Skarels s = splhigh(); /* prevent state changes */ 1618102Sroot if (p->p_pri >= PUSER) { 16216795Skarels #define PPQ (128 / NQS) 1638102Sroot if ((p != u.u_procp || noproc) && 1648102Sroot p->p_stat == SRUN && 1658102Sroot (p->p_flag & SLOAD) && 16616795Skarels (p->p_pri / PPQ) != (p->p_usrpri / PPQ)) { 1678102Sroot remrq(p); 1688102Sroot p->p_pri = p->p_usrpri; 1698102Sroot setrq(p); 1708102Sroot } else 1718102Sroot p->p_pri = p->p_usrpri; 1728102Sroot } 1738102Sroot splx(s); 1748102Sroot } 1758102Sroot vmmeter(); 1768102Sroot if (runin!=0) { 1778102Sroot runin = 0; 1788102Sroot wakeup((caddr_t)&runin); 1798102Sroot } 1808102Sroot if (bclnlist != NULL) 1818102Sroot wakeup((caddr_t)&proc[2]); 1828624Sroot timeout(schedcpu, (caddr_t)0, hz); 1838102Sroot } 1848102Sroot 18517541Skarels /* 18617541Skarels * Recalculate the priority of a process after it has slept for a while. 18717541Skarels */ 18817541Skarels updatepri(p) 18917541Skarels register struct proc *p; 19017541Skarels { 19117541Skarels register int a = p->p_cpu & 0377; 192*38164Smckusick register fixpt_t b = get_b(averunnable[0]); 19317541Skarels 19417541Skarels p->p_slptime--; /* the first time was done in schedcpu */ 19517541Skarels while (a && --p->p_slptime) 196*38164Smckusick a = (int) get_pcpu(b, a) /* + p->p_nice */; 19730232Skarels p->p_slptime = 0; 19817541Skarels if (a < 0) 19917541Skarels a = 0; 20017541Skarels if (a > 255) 20117541Skarels a = 255; 20217541Skarels p->p_cpu = a; 20317541Skarels (void) setpri(p); 20417541Skarels } 20517541Skarels 20633Sbill #define SQSIZE 0100 /* Must be power of 2 */ 20733Sbill #define HASH(x) (( (int) x >> 5) & (SQSIZE-1)) 20821099Smckusick struct slpque { 20921099Smckusick struct proc *sq_head; 21021099Smckusick struct proc **sq_tailp; 21121099Smckusick } slpque[SQSIZE]; 21233Sbill 21333Sbill /* 21433Sbill * Give up the processor till a wakeup occurs 21533Sbill * on chan, at which time the process 21633Sbill * enters the scheduling queue at priority pri. 21733Sbill * The most important effect of pri is that when 21833Sbill * pri<=PZERO a signal cannot disturb the sleep; 21933Sbill * if pri>PZERO signals will be processed. 22033Sbill * Callers of this routine must be prepared for 22133Sbill * premature return, and check that the reason for 22233Sbill * sleeping has gone away. 22333Sbill */ 22433Sbill sleep(chan, pri) 2258033Sroot caddr_t chan; 2268033Sroot int pri; 22733Sbill { 22821099Smckusick register struct proc *rp; 22921099Smckusick register struct slpque *qp; 230207Sbill register s; 23130532Skarels extern int cold; 23233Sbill 23333Sbill rp = u.u_procp; 23417541Skarels s = splhigh(); 23530532Skarels if (cold || panicstr) { 23618363Skarels /* 23730532Skarels * After a panic, or during autoconfiguration, 23830532Skarels * just give interrupts a chance, then just return; 23930532Skarels * don't run any other procs or panic below, 24030532Skarels * in case this is the idle process and already asleep. 24118363Skarels * The splnet should be spl0 if the network was being used 24218363Skarels * by the filesystem, but for now avoid network interrupts 24318363Skarels * that might cause another panic. 24418363Skarels */ 24518363Skarels (void) splnet(); 24618363Skarels splx(s); 24718363Skarels return; 24818363Skarels } 24918363Skarels if (chan==0 || rp->p_stat != SRUN || rp->p_rlink) 25033Sbill panic("sleep"); 25133Sbill rp->p_wchan = chan; 25233Sbill rp->p_slptime = 0; 25333Sbill rp->p_pri = pri; 25421099Smckusick qp = &slpque[HASH(chan)]; 25521099Smckusick if (qp->sq_head == 0) 25621099Smckusick qp->sq_head = rp; 25721099Smckusick else 25821099Smckusick *qp->sq_tailp = rp; 25921099Smckusick *(qp->sq_tailp = &rp->p_link) = 0; 2604826Swnj if (pri > PZERO) { 26121763Skarels /* 26221763Skarels * If we stop in issig(), wakeup may already have happened 26321763Skarels * when we return (rp->p_wchan will then be 0). 26421763Skarels */ 2654826Swnj if (ISSIG(rp)) { 266187Sbill if (rp->p_wchan) 267187Sbill unsleep(rp); 26833Sbill rp->p_stat = SRUN; 269131Sbill (void) spl0(); 27033Sbill goto psig; 27133Sbill } 272187Sbill if (rp->p_wchan == 0) 273187Sbill goto out; 274187Sbill rp->p_stat = SSLEEP; 275131Sbill (void) spl0(); 2768033Sroot u.u_ru.ru_nvcsw++; 27733Sbill swtch(); 2784826Swnj if (ISSIG(rp)) 27933Sbill goto psig; 28033Sbill } else { 281207Sbill rp->p_stat = SSLEEP; 282131Sbill (void) spl0(); 2838033Sroot u.u_ru.ru_nvcsw++; 28433Sbill swtch(); 28533Sbill } 28616795Skarels curpri = rp->p_usrpri; 287187Sbill out: 28833Sbill splx(s); 28933Sbill return; 29033Sbill 29133Sbill /* 29233Sbill * If priority was low (>PZERO) and 2934826Swnj * there has been a signal, execute non-local goto through 2948113Sroot * u.u_qsave, aborting the system call in progress (see trap.c) 29533Sbill */ 29633Sbill psig: 2978113Sroot longjmp(&u.u_qsave); 29833Sbill /*NOTREACHED*/ 29933Sbill } 30033Sbill 30133Sbill /* 302181Sbill * Remove a process from its wait queue 303181Sbill */ 304181Sbill unsleep(p) 3054826Swnj register struct proc *p; 306181Sbill { 30721099Smckusick register struct slpque *qp; 308181Sbill register struct proc **hp; 30921099Smckusick int s; 310181Sbill 31117541Skarels s = splhigh(); 312181Sbill if (p->p_wchan) { 31321099Smckusick hp = &(qp = &slpque[HASH(p->p_wchan)])->sq_head; 314181Sbill while (*hp != p) 315181Sbill hp = &(*hp)->p_link; 316181Sbill *hp = p->p_link; 31721099Smckusick if (qp->sq_tailp == &p->p_link) 31821099Smckusick qp->sq_tailp = hp; 319181Sbill p->p_wchan = 0; 320181Sbill } 321181Sbill splx(s); 322181Sbill } 323181Sbill 324181Sbill /* 32533Sbill * Wake up all processes sleeping on chan. 32633Sbill */ 32733Sbill wakeup(chan) 3284826Swnj register caddr_t chan; 32933Sbill { 33021099Smckusick register struct slpque *qp; 33121099Smckusick register struct proc *p, **q; 33233Sbill int s; 33333Sbill 33417541Skarels s = splhigh(); 33521099Smckusick qp = &slpque[HASH(chan)]; 33633Sbill restart: 33721099Smckusick for (q = &qp->sq_head; p = *q; ) { 338181Sbill if (p->p_rlink || p->p_stat != SSLEEP && p->p_stat != SSTOP) 33933Sbill panic("wakeup"); 340207Sbill if (p->p_wchan==chan) { 34133Sbill p->p_wchan = 0; 342187Sbill *q = p->p_link; 34321099Smckusick if (qp->sq_tailp == &p->p_link) 34421099Smckusick qp->sq_tailp = q; 345181Sbill if (p->p_stat == SSLEEP) { 346181Sbill /* OPTIMIZED INLINE EXPANSION OF setrun(p) */ 34721763Skarels if (p->p_slptime > 1) 34821763Skarels updatepri(p); 349181Sbill p->p_stat = SRUN; 3502702Swnj if (p->p_flag & SLOAD) 351181Sbill setrq(p); 35216795Skarels /* 35316795Skarels * Since curpri is a usrpri, 35416795Skarels * p->p_pri is always better than curpri. 35516795Skarels */ 35616795Skarels runrun++; 35716795Skarels aston(); 3583545Swnj if ((p->p_flag&SLOAD) == 0) { 3593545Swnj if (runout != 0) { 3603545Swnj runout = 0; 3613545Swnj wakeup((caddr_t)&runout); 3623545Swnj } 3633545Swnj wantin++; 364181Sbill } 365181Sbill /* END INLINE EXPANSION */ 366187Sbill goto restart; 36733Sbill } 368187Sbill } else 369187Sbill q = &p->p_link; 37033Sbill } 37133Sbill splx(s); 37233Sbill } 37333Sbill 37433Sbill /* 37533Sbill * Initialize the (doubly-linked) run queues 37633Sbill * to be empty. 37733Sbill */ 37833Sbill rqinit() 37933Sbill { 38033Sbill register int i; 38133Sbill 38233Sbill for (i = 0; i < NQS; i++) 38333Sbill qs[i].ph_link = qs[i].ph_rlink = (struct proc *)&qs[i]; 38433Sbill } 38533Sbill 38633Sbill /* 38733Sbill * Set the process running; 38833Sbill * arrange for it to be swapped in if necessary. 38933Sbill */ 39033Sbill setrun(p) 3914826Swnj register struct proc *p; 39233Sbill { 3934826Swnj register int s; 39433Sbill 39517541Skarels s = splhigh(); 39633Sbill switch (p->p_stat) { 39733Sbill 39833Sbill case 0: 39933Sbill case SWAIT: 40033Sbill case SRUN: 40133Sbill case SZOMB: 40233Sbill default: 40333Sbill panic("setrun"); 40433Sbill 405207Sbill case SSTOP: 40633Sbill case SSLEEP: 407181Sbill unsleep(p); /* e.g. when sending signals */ 40833Sbill break; 40933Sbill 41033Sbill case SIDL: 41133Sbill break; 41233Sbill } 41333Sbill p->p_stat = SRUN; 41433Sbill if (p->p_flag & SLOAD) 41533Sbill setrq(p); 41633Sbill splx(s); 41730232Skarels if (p->p_slptime > 1) 41830232Skarels updatepri(p); 4194826Swnj if (p->p_pri < curpri) { 42033Sbill runrun++; 4212443Swnj aston(); 4222443Swnj } 4233545Swnj if ((p->p_flag&SLOAD) == 0) { 4244826Swnj if (runout != 0) { 4253545Swnj runout = 0; 4263545Swnj wakeup((caddr_t)&runout); 4273545Swnj } 4283545Swnj wantin++; 42933Sbill } 43033Sbill } 43133Sbill 43233Sbill /* 43333Sbill * Set user priority. 43433Sbill * The rescheduling flag (runrun) 43533Sbill * is set if the priority is better 43633Sbill * than the currently running process. 43733Sbill */ 43833Sbill setpri(pp) 4394826Swnj register struct proc *pp; 44033Sbill { 4414826Swnj register int p; 44233Sbill 4433875Swnj p = (pp->p_cpu & 0377)/4; 44417541Skarels p += PUSER + 2 * pp->p_nice; 4453530Swnj if (pp->p_rssize > pp->p_maxrss && freemem < desfree) 4463530Swnj p += 2*4; /* effectively, nice(4) */ 4474826Swnj if (p > 127) 44833Sbill p = 127; 4494826Swnj if (p < curpri) { 45033Sbill runrun++; 4512453Swnj aston(); 4522453Swnj } 45333Sbill pp->p_usrpri = p; 4544826Swnj return (p); 45533Sbill } 456