123376Smckusick /* 229096Smckusick * Copyright (c) 1982, 1986 Regents of the University of California. 323376Smckusick * All rights reserved. The Berkeley software License Agreement 423376Smckusick * specifies the terms and conditions for redistribution. 523376Smckusick * 6*37495Smckusick * @(#)kern_synch.c 7.7 (Berkeley) 04/25/89 723376Smckusick */ 833Sbill 9*37495Smckusick #include "machine/pte.h" 10*37495Smckusick #include "machine/psl.h" 11*37495Smckusick #include "machine/mtpr.h" 129756Ssam 1317093Sbloom #include "param.h" 1417093Sbloom #include "systm.h" 1517093Sbloom #include "dir.h" 1617093Sbloom #include "user.h" 1717093Sbloom #include "proc.h" 1817093Sbloom #include "vm.h" 1917093Sbloom #include "kernel.h" 2017093Sbloom #include "buf.h" 219756Ssam 228102Sroot /* 238102Sroot * Force switch among equal priority processes every 100ms. 248102Sroot */ 258102Sroot roundrobin() 268102Sroot { 278102Sroot 288102Sroot runrun++; 298102Sroot aston(); 308624Sroot timeout(roundrobin, (caddr_t)0, hz / 10); 318102Sroot } 328102Sroot 3332908Smckusick /* 3432908Smckusick * constants for digital decay and forget 3532908Smckusick * 90% of (p_cpu) usage in 5*loadav time 3632908Smckusick * 95% of (p_pctcpu) usage in 60 seconds (load insensitive) 3732908Smckusick * Note that, as ps(1) mentions, this can let percentages 3832908Smckusick * total over 100% (I've seen 137.9% for 3 processes). 3932908Smckusick * 4032908Smckusick * Note that hardclock updates p_cpu and p_cpticks independently. 4132908Smckusick * 4232908Smckusick * We wish to decay away 90% of p_cpu in (5 * loadavg) seconds. 4332908Smckusick * That is, the system wants to compute a value of decay such 4432908Smckusick * that the following for loop: 4532908Smckusick * for (i = 0; i < (5 * loadavg); i++) 4632908Smckusick * p_cpu *= decay; 4732908Smckusick * will compute 4832908Smckusick * p_cpu *= 0.1; 4932908Smckusick * for all values of loadavg: 5032908Smckusick * 5132908Smckusick * Mathematically this loop can be expressed by saying: 5232908Smckusick * decay ** (5 * loadavg) ~= .1 5332908Smckusick * 5432908Smckusick * The system computes decay as: 5532908Smckusick * decay = (2 * loadavg) / (2 * loadavg + 1) 5632908Smckusick * 5732908Smckusick * We wish to prove that the system's computation of decay 5832908Smckusick * will always fulfill the equation: 5932908Smckusick * decay ** (5 * loadavg) ~= .1 6032908Smckusick * 6132908Smckusick * If we compute b as: 6232908Smckusick * b = 2 * loadavg 6332908Smckusick * then 6432908Smckusick * decay = b / (b + 1) 6532908Smckusick * 6632908Smckusick * We now need to prove two things: 6732908Smckusick * 1) Given factor ** (5 * loadavg) ~= .1, prove factor == b/(b+1) 6832908Smckusick * 2) Given b/(b+1) ** power ~= .1, prove power == (5 * loadavg) 6932908Smckusick * 7032908Smckusick * Facts: 7132908Smckusick * For x close to zero, exp(x) =~ 1 + x, since 7232908Smckusick * exp(x) = 0! + x**1/1! + x**2/2! + ... . 7332908Smckusick * therefore exp(-1/b) =~ 1 - (1/b) = (b-1)/b. 7432908Smckusick * For x close to zero, ln(1+x) =~ x, since 7532908Smckusick * ln(1+x) = x - x**2/2 + x**3/3 - ... -1 < x < 1 7632908Smckusick * therefore ln(b/(b+1)) = ln(1 - 1/(b+1)) =~ -1/(b+1). 7732908Smckusick * ln(.1) =~ -2.30 7832908Smckusick * 7932908Smckusick * Proof of (1): 8032908Smckusick * Solve (factor)**(power) =~ .1 given power (5*loadav): 8132908Smckusick * solving for factor, 8232908Smckusick * ln(factor) =~ (-2.30/5*loadav), or 8332908Smckusick * factor =~ exp(-1/((5/2.30)*loadav) =~ exp(-1/(2*loadav)) = 8432908Smckusick * exp(-1/b) =~ (b-1)/b =~ b/(b+1). QED 8532908Smckusick * 8632908Smckusick * Proof of (2): 8732908Smckusick * Solve (factor)**(power) =~ .1 given factor == (b/(b+1)): 8832908Smckusick * solving for power, 8932908Smckusick * power*ln(b/(b+1)) =~ -2.30, or 9032908Smckusick * power =~ 2.3 * (b + 1) = 4.6*loadav + 2.3 =~ 5*loadav. QED 9132908Smckusick * 9232908Smckusick * Actual power values for the implemented algorithm are as follows: 9332908Smckusick * loadav: 1 2 3 4 9432908Smckusick * power: 5.68 10.32 14.94 19.55 9532908Smckusick */ 9617541Skarels #define filter(loadav) ((2 * (loadav)) / (2 * (loadav) + 1)) 9717541Skarels 988102Sroot double ccpu = 0.95122942450071400909; /* exp(-1/20) */ 998102Sroot 1008102Sroot /* 1018102Sroot * Recompute process priorities, once a second 1028102Sroot */ 1038102Sroot schedcpu() 1048102Sroot { 10516795Skarels register double ccpu1 = (1.0 - ccpu) / (double)hz; 1068102Sroot register struct proc *p; 1078102Sroot register int s, a; 10817541Skarels float scale = filter(avenrun[0]); 1098102Sroot 1108102Sroot wakeup((caddr_t)&lbolt); 11116532Skarels for (p = allproc; p != NULL; p = p->p_nxt) { 1128102Sroot if (p->p_time != 127) 1138102Sroot p->p_time++; 1148102Sroot if (p->p_stat==SSLEEP || p->p_stat==SSTOP) 1158102Sroot if (p->p_slptime != 127) 1168102Sroot p->p_slptime++; 11717541Skarels /* 11817541Skarels * If the process has slept the entire second, 11917541Skarels * stop recalculating its priority until it wakes up. 12017541Skarels */ 12117541Skarels if (p->p_slptime > 1) { 12217541Skarels p->p_pctcpu *= ccpu; 12317541Skarels continue; 12417541Skarels } 12517541Skarels /* 12617541Skarels * p_pctcpu is only for ps. 12717541Skarels */ 12817541Skarels p->p_pctcpu = ccpu * p->p_pctcpu + ccpu1 * p->p_cpticks; 1298102Sroot p->p_cpticks = 0; 13017541Skarels a = (int) (scale * (p->p_cpu & 0377)) + p->p_nice; 1318102Sroot if (a < 0) 1328102Sroot a = 0; 1338102Sroot if (a > 255) 1348102Sroot a = 255; 1358102Sroot p->p_cpu = a; 1368102Sroot (void) setpri(p); 13717541Skarels s = splhigh(); /* prevent state changes */ 1388102Sroot if (p->p_pri >= PUSER) { 13916795Skarels #define PPQ (128 / NQS) 1408102Sroot if ((p != u.u_procp || noproc) && 1418102Sroot p->p_stat == SRUN && 1428102Sroot (p->p_flag & SLOAD) && 14316795Skarels (p->p_pri / PPQ) != (p->p_usrpri / PPQ)) { 1448102Sroot remrq(p); 1458102Sroot p->p_pri = p->p_usrpri; 1468102Sroot setrq(p); 1478102Sroot } else 1488102Sroot p->p_pri = p->p_usrpri; 1498102Sroot } 1508102Sroot splx(s); 1518102Sroot } 1528102Sroot vmmeter(); 1538102Sroot if (runin!=0) { 1548102Sroot runin = 0; 1558102Sroot wakeup((caddr_t)&runin); 1568102Sroot } 1578102Sroot if (bclnlist != NULL) 1588102Sroot wakeup((caddr_t)&proc[2]); 1598624Sroot timeout(schedcpu, (caddr_t)0, hz); 1608102Sroot } 1618102Sroot 16217541Skarels /* 16317541Skarels * Recalculate the priority of a process after it has slept for a while. 16417541Skarels */ 16517541Skarels updatepri(p) 16617541Skarels register struct proc *p; 16717541Skarels { 16817541Skarels register int a = p->p_cpu & 0377; 16917541Skarels float scale = filter(avenrun[0]); 17017541Skarels 17117541Skarels p->p_slptime--; /* the first time was done in schedcpu */ 17217541Skarels while (a && --p->p_slptime) 17317541Skarels a = (int) (scale * a) /* + p->p_nice */; 17430232Skarels p->p_slptime = 0; 17517541Skarels if (a < 0) 17617541Skarels a = 0; 17717541Skarels if (a > 255) 17817541Skarels a = 255; 17917541Skarels p->p_cpu = a; 18017541Skarels (void) setpri(p); 18117541Skarels } 18217541Skarels 18333Sbill #define SQSIZE 0100 /* Must be power of 2 */ 18433Sbill #define HASH(x) (( (int) x >> 5) & (SQSIZE-1)) 18521099Smckusick struct slpque { 18621099Smckusick struct proc *sq_head; 18721099Smckusick struct proc **sq_tailp; 18821099Smckusick } slpque[SQSIZE]; 18933Sbill 19033Sbill /* 19133Sbill * Give up the processor till a wakeup occurs 19233Sbill * on chan, at which time the process 19333Sbill * enters the scheduling queue at priority pri. 19433Sbill * The most important effect of pri is that when 19533Sbill * pri<=PZERO a signal cannot disturb the sleep; 19633Sbill * if pri>PZERO signals will be processed. 19733Sbill * Callers of this routine must be prepared for 19833Sbill * premature return, and check that the reason for 19933Sbill * sleeping has gone away. 20033Sbill */ 20133Sbill sleep(chan, pri) 2028033Sroot caddr_t chan; 2038033Sroot int pri; 20433Sbill { 20521099Smckusick register struct proc *rp; 20621099Smckusick register struct slpque *qp; 207207Sbill register s; 20830532Skarels extern int cold; 20933Sbill 21033Sbill rp = u.u_procp; 21117541Skarels s = splhigh(); 21230532Skarels if (cold || panicstr) { 21318363Skarels /* 21430532Skarels * After a panic, or during autoconfiguration, 21530532Skarels * just give interrupts a chance, then just return; 21630532Skarels * don't run any other procs or panic below, 21730532Skarels * in case this is the idle process and already asleep. 21818363Skarels * The splnet should be spl0 if the network was being used 21918363Skarels * by the filesystem, but for now avoid network interrupts 22018363Skarels * that might cause another panic. 22118363Skarels */ 22218363Skarels (void) splnet(); 22318363Skarels splx(s); 22418363Skarels return; 22518363Skarels } 22618363Skarels if (chan==0 || rp->p_stat != SRUN || rp->p_rlink) 22733Sbill panic("sleep"); 22833Sbill rp->p_wchan = chan; 22933Sbill rp->p_slptime = 0; 23033Sbill rp->p_pri = pri; 23121099Smckusick qp = &slpque[HASH(chan)]; 23221099Smckusick if (qp->sq_head == 0) 23321099Smckusick qp->sq_head = rp; 23421099Smckusick else 23521099Smckusick *qp->sq_tailp = rp; 23621099Smckusick *(qp->sq_tailp = &rp->p_link) = 0; 2374826Swnj if (pri > PZERO) { 23821763Skarels /* 23921763Skarels * If we stop in issig(), wakeup may already have happened 24021763Skarels * when we return (rp->p_wchan will then be 0). 24121763Skarels */ 2424826Swnj if (ISSIG(rp)) { 243187Sbill if (rp->p_wchan) 244187Sbill unsleep(rp); 24533Sbill rp->p_stat = SRUN; 246131Sbill (void) spl0(); 24733Sbill goto psig; 24833Sbill } 249187Sbill if (rp->p_wchan == 0) 250187Sbill goto out; 251187Sbill rp->p_stat = SSLEEP; 252131Sbill (void) spl0(); 2538033Sroot u.u_ru.ru_nvcsw++; 25433Sbill swtch(); 2554826Swnj if (ISSIG(rp)) 25633Sbill goto psig; 25733Sbill } else { 258207Sbill rp->p_stat = SSLEEP; 259131Sbill (void) spl0(); 2608033Sroot u.u_ru.ru_nvcsw++; 26133Sbill swtch(); 26233Sbill } 26316795Skarels curpri = rp->p_usrpri; 264187Sbill out: 26533Sbill splx(s); 26633Sbill return; 26733Sbill 26833Sbill /* 26933Sbill * If priority was low (>PZERO) and 2704826Swnj * there has been a signal, execute non-local goto through 2718113Sroot * u.u_qsave, aborting the system call in progress (see trap.c) 27233Sbill */ 27333Sbill psig: 2748113Sroot longjmp(&u.u_qsave); 27533Sbill /*NOTREACHED*/ 27633Sbill } 27733Sbill 27833Sbill /* 279181Sbill * Remove a process from its wait queue 280181Sbill */ 281181Sbill unsleep(p) 2824826Swnj register struct proc *p; 283181Sbill { 28421099Smckusick register struct slpque *qp; 285181Sbill register struct proc **hp; 28621099Smckusick int s; 287181Sbill 28817541Skarels s = splhigh(); 289181Sbill if (p->p_wchan) { 29021099Smckusick hp = &(qp = &slpque[HASH(p->p_wchan)])->sq_head; 291181Sbill while (*hp != p) 292181Sbill hp = &(*hp)->p_link; 293181Sbill *hp = p->p_link; 29421099Smckusick if (qp->sq_tailp == &p->p_link) 29521099Smckusick qp->sq_tailp = hp; 296181Sbill p->p_wchan = 0; 297181Sbill } 298181Sbill splx(s); 299181Sbill } 300181Sbill 301181Sbill /* 30233Sbill * Wake up all processes sleeping on chan. 30333Sbill */ 30433Sbill wakeup(chan) 3054826Swnj register caddr_t chan; 30633Sbill { 30721099Smckusick register struct slpque *qp; 30821099Smckusick register struct proc *p, **q; 30933Sbill int s; 31033Sbill 31117541Skarels s = splhigh(); 31221099Smckusick qp = &slpque[HASH(chan)]; 31333Sbill restart: 31421099Smckusick for (q = &qp->sq_head; p = *q; ) { 315181Sbill if (p->p_rlink || p->p_stat != SSLEEP && p->p_stat != SSTOP) 31633Sbill panic("wakeup"); 317207Sbill if (p->p_wchan==chan) { 31833Sbill p->p_wchan = 0; 319187Sbill *q = p->p_link; 32021099Smckusick if (qp->sq_tailp == &p->p_link) 32121099Smckusick qp->sq_tailp = q; 322181Sbill if (p->p_stat == SSLEEP) { 323181Sbill /* OPTIMIZED INLINE EXPANSION OF setrun(p) */ 32421763Skarels if (p->p_slptime > 1) 32521763Skarels updatepri(p); 326181Sbill p->p_stat = SRUN; 3272702Swnj if (p->p_flag & SLOAD) 328181Sbill setrq(p); 32916795Skarels /* 33016795Skarels * Since curpri is a usrpri, 33116795Skarels * p->p_pri is always better than curpri. 33216795Skarels */ 33316795Skarels runrun++; 33416795Skarels aston(); 3353545Swnj if ((p->p_flag&SLOAD) == 0) { 3363545Swnj if (runout != 0) { 3373545Swnj runout = 0; 3383545Swnj wakeup((caddr_t)&runout); 3393545Swnj } 3403545Swnj wantin++; 341181Sbill } 342181Sbill /* END INLINE EXPANSION */ 343187Sbill goto restart; 34433Sbill } 345187Sbill } else 346187Sbill q = &p->p_link; 34733Sbill } 34833Sbill splx(s); 34933Sbill } 35033Sbill 35133Sbill /* 35233Sbill * Initialize the (doubly-linked) run queues 35333Sbill * to be empty. 35433Sbill */ 35533Sbill rqinit() 35633Sbill { 35733Sbill register int i; 35833Sbill 35933Sbill for (i = 0; i < NQS; i++) 36033Sbill qs[i].ph_link = qs[i].ph_rlink = (struct proc *)&qs[i]; 36133Sbill } 36233Sbill 36333Sbill /* 36433Sbill * Set the process running; 36533Sbill * arrange for it to be swapped in if necessary. 36633Sbill */ 36733Sbill setrun(p) 3684826Swnj register struct proc *p; 36933Sbill { 3704826Swnj register int s; 37133Sbill 37217541Skarels s = splhigh(); 37333Sbill switch (p->p_stat) { 37433Sbill 37533Sbill case 0: 37633Sbill case SWAIT: 37733Sbill case SRUN: 37833Sbill case SZOMB: 37933Sbill default: 38033Sbill panic("setrun"); 38133Sbill 382207Sbill case SSTOP: 38333Sbill case SSLEEP: 384181Sbill unsleep(p); /* e.g. when sending signals */ 38533Sbill break; 38633Sbill 38733Sbill case SIDL: 38833Sbill break; 38933Sbill } 39033Sbill p->p_stat = SRUN; 39133Sbill if (p->p_flag & SLOAD) 39233Sbill setrq(p); 39333Sbill splx(s); 39430232Skarels if (p->p_slptime > 1) 39530232Skarels updatepri(p); 3964826Swnj if (p->p_pri < curpri) { 39733Sbill runrun++; 3982443Swnj aston(); 3992443Swnj } 4003545Swnj if ((p->p_flag&SLOAD) == 0) { 4014826Swnj if (runout != 0) { 4023545Swnj runout = 0; 4033545Swnj wakeup((caddr_t)&runout); 4043545Swnj } 4053545Swnj wantin++; 40633Sbill } 40733Sbill } 40833Sbill 40933Sbill /* 41033Sbill * Set user priority. 41133Sbill * The rescheduling flag (runrun) 41233Sbill * is set if the priority is better 41333Sbill * than the currently running process. 41433Sbill */ 41533Sbill setpri(pp) 4164826Swnj register struct proc *pp; 41733Sbill { 4184826Swnj register int p; 41933Sbill 4203875Swnj p = (pp->p_cpu & 0377)/4; 42117541Skarels p += PUSER + 2 * pp->p_nice; 4223530Swnj if (pp->p_rssize > pp->p_maxrss && freemem < desfree) 4233530Swnj p += 2*4; /* effectively, nice(4) */ 4244826Swnj if (p > 127) 42533Sbill p = 127; 4264826Swnj if (p < curpri) { 42733Sbill runrun++; 4282453Swnj aston(); 4292453Swnj } 43033Sbill pp->p_usrpri = p; 4314826Swnj return (p); 43233Sbill } 433