123376Smckusick /* 2*40711Skarels * Copyright (c) 1982, 1986, 1990 Regents of the University of California. 323376Smckusick * All rights reserved. The Berkeley software License Agreement 423376Smckusick * specifies the terms and conditions for redistribution. 523376Smckusick * 6*40711Skarels * @(#)kern_synch.c 7.11 (Berkeley) 04/03/90 723376Smckusick */ 833Sbill 937495Smckusick #include "machine/pte.h" 1037495Smckusick #include "machine/psl.h" 1137495Smckusick #include "machine/mtpr.h" 129756Ssam 1317093Sbloom #include "param.h" 1417093Sbloom #include "systm.h" 1517093Sbloom #include "user.h" 1617093Sbloom #include "proc.h" 1717093Sbloom #include "vm.h" 1817093Sbloom #include "kernel.h" 1917093Sbloom #include "buf.h" 209756Ssam 218102Sroot /* 228102Sroot * Force switch among equal priority processes every 100ms. 238102Sroot */ 248102Sroot roundrobin() 258102Sroot { 268102Sroot 278102Sroot runrun++; 288102Sroot aston(); 298624Sroot timeout(roundrobin, (caddr_t)0, hz / 10); 308102Sroot } 318102Sroot 3232908Smckusick /* 3332908Smckusick * constants for digital decay and forget 3432908Smckusick * 90% of (p_cpu) usage in 5*loadav time 3532908Smckusick * 95% of (p_pctcpu) usage in 60 seconds (load insensitive) 3632908Smckusick * Note that, as ps(1) mentions, this can let percentages 3732908Smckusick * total over 100% (I've seen 137.9% for 3 processes). 3832908Smckusick * 3932908Smckusick * Note that hardclock updates p_cpu and p_cpticks independently. 4032908Smckusick * 4132908Smckusick * We wish to decay away 90% of p_cpu in (5 * loadavg) seconds. 4232908Smckusick * That is, the system wants to compute a value of decay such 4332908Smckusick * that the following for loop: 4432908Smckusick * for (i = 0; i < (5 * loadavg); i++) 4532908Smckusick * p_cpu *= decay; 4632908Smckusick * will compute 4732908Smckusick * p_cpu *= 0.1; 4832908Smckusick * for all values of loadavg: 4932908Smckusick * 5032908Smckusick * Mathematically this loop can be expressed by saying: 5132908Smckusick * decay ** (5 * loadavg) ~= .1 5232908Smckusick * 5332908Smckusick * The system computes decay as: 5432908Smckusick * decay = (2 * loadavg) / (2 * loadavg + 1) 5532908Smckusick * 5632908Smckusick * We wish to prove that the system's computation of decay 5732908Smckusick * will always fulfill the equation: 5832908Smckusick * decay ** (5 * loadavg) ~= .1 5932908Smckusick * 6032908Smckusick * If we compute b as: 6132908Smckusick * b = 2 * loadavg 6232908Smckusick * then 6332908Smckusick * decay = b / (b + 1) 6432908Smckusick * 6532908Smckusick * We now need to prove two things: 6632908Smckusick * 1) Given factor ** (5 * loadavg) ~= .1, prove factor == b/(b+1) 6732908Smckusick * 2) Given b/(b+1) ** power ~= .1, prove power == (5 * loadavg) 6832908Smckusick * 6932908Smckusick * Facts: 7032908Smckusick * For x close to zero, exp(x) =~ 1 + x, since 7132908Smckusick * exp(x) = 0! + x**1/1! + x**2/2! + ... . 7232908Smckusick * therefore exp(-1/b) =~ 1 - (1/b) = (b-1)/b. 7332908Smckusick * For x close to zero, ln(1+x) =~ x, since 7432908Smckusick * ln(1+x) = x - x**2/2 + x**3/3 - ... -1 < x < 1 7532908Smckusick * therefore ln(b/(b+1)) = ln(1 - 1/(b+1)) =~ -1/(b+1). 7632908Smckusick * ln(.1) =~ -2.30 7732908Smckusick * 7832908Smckusick * Proof of (1): 7932908Smckusick * Solve (factor)**(power) =~ .1 given power (5*loadav): 8032908Smckusick * solving for factor, 8132908Smckusick * ln(factor) =~ (-2.30/5*loadav), or 8232908Smckusick * factor =~ exp(-1/((5/2.30)*loadav) =~ exp(-1/(2*loadav)) = 8332908Smckusick * exp(-1/b) =~ (b-1)/b =~ b/(b+1). QED 8432908Smckusick * 8532908Smckusick * Proof of (2): 8632908Smckusick * Solve (factor)**(power) =~ .1 given factor == (b/(b+1)): 8732908Smckusick * solving for power, 8832908Smckusick * power*ln(b/(b+1)) =~ -2.30, or 8932908Smckusick * power =~ 2.3 * (b + 1) = 4.6*loadav + 2.3 =~ 5*loadav. QED 9032908Smckusick * 9132908Smckusick * Actual power values for the implemented algorithm are as follows: 9232908Smckusick * loadav: 1 2 3 4 9332908Smckusick * power: 5.68 10.32 14.94 19.55 9432908Smckusick */ 9517541Skarels 9638164Smckusick /* calculations for digital decay to forget 90% of usage in 5*loadav sec */ 9738164Smckusick #define get_b(loadav) (2 * (loadav)) 9838164Smckusick #define get_pcpu(b, cpu) (((b) * ((cpu) & 0377)) / ((b) + FSCALE)) 998102Sroot 10038164Smckusick /* decay 95% of `p_pctcpu' in 60 seconds; see CCPU_SHIFT before changing */ 10138164Smckusick fixpt_t ccpu = 0.95122942450071400909 * FSCALE; /* exp(-1/20) */ 10238164Smckusick 1038102Sroot /* 10438164Smckusick * If `ccpu' is not equal to `exp(-1/20)' and you still want to use the 10538164Smckusick * faster/more-accurate formula, you'll have to estimate CCPU_SHIFT below 10638164Smckusick * and possibly adjust FSHIFT in "param.h" so that (FSHIFT >= CCPU_SHIFT). 10738164Smckusick * 10838164Smckusick * To estimate CCPU_SHIFT for exp(-1/20), the following formula was used: 10938164Smckusick * 1 - exp(-1/20) ~= 0.0487 ~= 0.0488 == 1 (fixed pt, *11* bits). 11038164Smckusick * 11138164Smckusick * If you dont want to bother with the faster/more-accurate formula, you 11238164Smckusick * can set CCPU_SHIFT to (FSHIFT + 1) which will use a slower/less-accurate 11338164Smckusick * (more general) method of calculating the %age of CPU used by a process. 11438164Smckusick */ 11538164Smckusick #define CCPU_SHIFT 11 11638164Smckusick 11738164Smckusick /* 1188102Sroot * Recompute process priorities, once a second 1198102Sroot */ 1208102Sroot schedcpu() 1218102Sroot { 12238164Smckusick register fixpt_t b = get_b(averunnable[0]); 1238102Sroot register struct proc *p; 1248102Sroot register int s, a; 1258102Sroot 1268102Sroot wakeup((caddr_t)&lbolt); 12716532Skarels for (p = allproc; p != NULL; p = p->p_nxt) { 1288102Sroot if (p->p_time != 127) 1298102Sroot p->p_time++; 1308102Sroot if (p->p_stat==SSLEEP || p->p_stat==SSTOP) 1318102Sroot if (p->p_slptime != 127) 1328102Sroot p->p_slptime++; 13338164Smckusick p->p_pctcpu = (p->p_pctcpu * ccpu) >> FSHIFT; 13417541Skarels /* 13517541Skarels * If the process has slept the entire second, 13617541Skarels * stop recalculating its priority until it wakes up. 13717541Skarels */ 13838164Smckusick if (p->p_slptime > 1) 13917541Skarels continue; 14017541Skarels /* 14117541Skarels * p_pctcpu is only for ps. 14217541Skarels */ 14338164Smckusick #if (FSHIFT >= CCPU_SHIFT) 14438164Smckusick p->p_pctcpu += (hz == 100)? 14538164Smckusick ((fixpt_t) p->p_cpticks) << (FSHIFT - CCPU_SHIFT): 14638164Smckusick 100 * (((fixpt_t) p->p_cpticks) 14738164Smckusick << (FSHIFT - CCPU_SHIFT)) / hz; 14838164Smckusick #else 14938164Smckusick p->p_pctcpu += ((FSCALE - ccpu) * 15038164Smckusick (p->p_cpticks * FSCALE / hz)) >> FSHIFT; 15138164Smckusick #endif 1528102Sroot p->p_cpticks = 0; 15338164Smckusick a = (int) get_pcpu(b, p->p_cpu) + p->p_nice; 1548102Sroot if (a < 0) 1558102Sroot a = 0; 1568102Sroot if (a > 255) 1578102Sroot a = 255; 1588102Sroot p->p_cpu = a; 1598102Sroot (void) setpri(p); 16017541Skarels s = splhigh(); /* prevent state changes */ 1618102Sroot if (p->p_pri >= PUSER) { 16216795Skarels #define PPQ (128 / NQS) 1638102Sroot if ((p != u.u_procp || noproc) && 1648102Sroot p->p_stat == SRUN && 1658102Sroot (p->p_flag & SLOAD) && 16616795Skarels (p->p_pri / PPQ) != (p->p_usrpri / PPQ)) { 1678102Sroot remrq(p); 1688102Sroot p->p_pri = p->p_usrpri; 1698102Sroot setrq(p); 1708102Sroot } else 1718102Sroot p->p_pri = p->p_usrpri; 1728102Sroot } 1738102Sroot splx(s); 1748102Sroot } 1758102Sroot vmmeter(); 1768102Sroot if (runin!=0) { 1778102Sroot runin = 0; 1788102Sroot wakeup((caddr_t)&runin); 1798102Sroot } 1808102Sroot if (bclnlist != NULL) 1818102Sroot wakeup((caddr_t)&proc[2]); 1828624Sroot timeout(schedcpu, (caddr_t)0, hz); 1838102Sroot } 1848102Sroot 18517541Skarels /* 18617541Skarels * Recalculate the priority of a process after it has slept for a while. 18717541Skarels */ 18817541Skarels updatepri(p) 18917541Skarels register struct proc *p; 19017541Skarels { 19117541Skarels register int a = p->p_cpu & 0377; 19238164Smckusick register fixpt_t b = get_b(averunnable[0]); 19317541Skarels 19417541Skarels p->p_slptime--; /* the first time was done in schedcpu */ 19517541Skarels while (a && --p->p_slptime) 19638164Smckusick a = (int) get_pcpu(b, a) /* + p->p_nice */; 19730232Skarels p->p_slptime = 0; 19817541Skarels if (a < 0) 19917541Skarels a = 0; 20017541Skarels if (a > 255) 20117541Skarels a = 255; 20217541Skarels p->p_cpu = a; 20317541Skarels (void) setpri(p); 20417541Skarels } 20517541Skarels 20633Sbill #define SQSIZE 0100 /* Must be power of 2 */ 20733Sbill #define HASH(x) (( (int) x >> 5) & (SQSIZE-1)) 20821099Smckusick struct slpque { 20921099Smckusick struct proc *sq_head; 21021099Smckusick struct proc **sq_tailp; 21121099Smckusick } slpque[SQSIZE]; 21233Sbill 21333Sbill /* 214*40711Skarels * General sleep call. 215*40711Skarels * Suspends current process until a wakeup is made on chan. 216*40711Skarels * The process will then be made runnable with priority pri. 217*40711Skarels * Sleeps at most timo/hz seconds (0 means no timeout). 218*40711Skarels * If pri includes PCATCH flag, signals are checked 219*40711Skarels * before and after sleeping, else signals are not checked. 220*40711Skarels * Returns 0 if awakened, EWOULDBLOCK if the timeout expires. 221*40711Skarels * If PCATCH is set and a signal needs to be delivered, 222*40711Skarels * ERESTART is returned if the current system call should be restarted 223*40711Skarels * if possible, and EINTR is returned if the system call should 224*40711Skarels * be interrupted by the signal (return EINTR). 22533Sbill */ 226*40711Skarels tsleep(chan, pri, wmesg, timo) 22740710Smarc caddr_t chan; 22840710Smarc int pri; 22940710Smarc char *wmesg; 23040710Smarc int timo; 23140710Smarc { 23240710Smarc register struct proc *rp; 23340710Smarc register struct slpque *qp; 23440710Smarc register s; 235*40711Skarels int sig, catch = pri & PCATCH; 23640710Smarc extern int cold; 23740710Smarc int endtsleep(); 23840710Smarc 23940710Smarc rp = u.u_procp; 24040710Smarc s = splhigh(); 24140710Smarc if (cold || panicstr) { 24240710Smarc /* 24340710Smarc * After a panic, or during autoconfiguration, 24440710Smarc * just give interrupts a chance, then just return; 24540710Smarc * don't run any other procs or panic below, 24640710Smarc * in case this is the idle process and already asleep. 24740710Smarc */ 248*40711Skarels (void) spl0(); 24940710Smarc splx(s); 25040710Smarc return (0); 25140710Smarc } 25240710Smarc #ifdef DIAGNOSTIC 253*40711Skarels if (chan == 0 || rp->p_stat != SRUN || rp->p_rlink) 254*40711Skarels panic("tsleep"); 25540710Smarc #endif 25640710Smarc rp->p_wchan = chan; 25740710Smarc rp->p_wmesg = wmesg; 25840710Smarc rp->p_slptime = 0; 259*40711Skarels rp->p_pri = pri & PRIMASK; 26040710Smarc qp = &slpque[HASH(chan)]; 26140710Smarc if (qp->sq_head == 0) 26240710Smarc qp->sq_head = rp; 26340710Smarc else 26440710Smarc *qp->sq_tailp = rp; 26540710Smarc *(qp->sq_tailp = &rp->p_link) = 0; 26640710Smarc /* 267*40711Skarels * If we stop in CURSIG/issig(), wakeup may already 268*40711Skarels * have happened when we return. 269*40711Skarels * rp->p_wchan will then be 0. 27040710Smarc */ 271*40711Skarels if (catch) { 272*40711Skarels if (sig = CURSIG(rp)) { 273*40711Skarels if (rp->p_wchan) 274*40711Skarels unsleep(rp); 275*40711Skarels rp->p_stat = SRUN; 276*40711Skarels splx(s); 277*40711Skarels if (u.u_sigintr & sigmask(sig)) 278*40711Skarels return (EINTR); 279*40711Skarels return (ERESTART); 280*40711Skarels } 281*40711Skarels if (rp->p_wchan == 0) { 282*40711Skarels splx(s); 283*40711Skarels return (0); 284*40711Skarels } 285*40711Skarels rp->p_flag |= SSINTR; 28640710Smarc } 28740710Smarc rp->p_stat = SSLEEP; 28840710Smarc if (timo) 28940710Smarc timeout(endtsleep, (caddr_t)rp, timo); 29040710Smarc (void) spl0(); 29140710Smarc u.u_ru.ru_nvcsw++; 29240710Smarc swtch(); 29340710Smarc curpri = rp->p_usrpri; 29440710Smarc splx(s); 295*40711Skarels rp->p_flag &= ~SSINTR; 29640710Smarc if (rp->p_flag & STIMO) { 29740710Smarc rp->p_flag &= ~STIMO; 29840710Smarc return (EWOULDBLOCK); 29940710Smarc } 30040710Smarc if (timo) 30140710Smarc untimeout(endtsleep, (caddr_t)rp); 302*40711Skarels if (catch && (sig = CURSIG(rp))) { 303*40711Skarels if (u.u_sigintr & sigmask(sig)) 304*40711Skarels return (EINTR); 305*40711Skarels return (ERESTART); 306*40711Skarels } 30740710Smarc return (0); 30840710Smarc } 30940710Smarc 31040710Smarc /* 31140710Smarc * Implement timeout for tsleep. 31240710Smarc * If process hasn't been awakened (wchan non-zero), 31340710Smarc * set timeout flag and undo the sleep. If proc 31440710Smarc * is stopped, just unsleep so it will remain stopped. 31540710Smarc */ 31640710Smarc endtsleep(p) 31740710Smarc register struct proc *p; 31840710Smarc { 31940710Smarc int s = splhigh(); 32040710Smarc 32140710Smarc if (p->p_wchan) { 32240710Smarc if (p->p_stat == SSLEEP) 32340710Smarc setrun(p); 32440710Smarc else 32540710Smarc unsleep(p); 32640710Smarc p->p_flag |= STIMO; 32740710Smarc } 32840710Smarc splx(s); 32940710Smarc } 33040710Smarc 331*40711Skarels /* 332*40711Skarels * Short-term, non-interruptable sleep. 333*40711Skarels */ 33433Sbill sleep(chan, pri) 3358033Sroot caddr_t chan; 3368033Sroot int pri; 33733Sbill { 33821099Smckusick register struct proc *rp; 33921099Smckusick register struct slpque *qp; 340207Sbill register s; 34130532Skarels extern int cold; 34233Sbill 343*40711Skarels #ifdef DIAGNOSTIC 344*40711Skarels if (pri > PZERO) { 345*40711Skarels printf("sleep called with pri %d > PZERO, wchan: %x\n", 346*40711Skarels pri, chan); 347*40711Skarels panic("old sleep"); 348*40711Skarels } 349*40711Skarels #endif 35033Sbill rp = u.u_procp; 35117541Skarels s = splhigh(); 35230532Skarels if (cold || panicstr) { 35318363Skarels /* 35430532Skarels * After a panic, or during autoconfiguration, 35530532Skarels * just give interrupts a chance, then just return; 35630532Skarels * don't run any other procs or panic below, 35730532Skarels * in case this is the idle process and already asleep. 35818363Skarels */ 359*40711Skarels (void) spl0(); 36018363Skarels splx(s); 36118363Skarels return; 36218363Skarels } 36340710Smarc #ifdef DIAGNOSTIC 36418363Skarels if (chan==0 || rp->p_stat != SRUN || rp->p_rlink) 36533Sbill panic("sleep"); 36640710Smarc #endif 36733Sbill rp->p_wchan = chan; 36840710Smarc rp->p_wmesg = NULL; 36933Sbill rp->p_slptime = 0; 37033Sbill rp->p_pri = pri; 37121099Smckusick qp = &slpque[HASH(chan)]; 37221099Smckusick if (qp->sq_head == 0) 37321099Smckusick qp->sq_head = rp; 37421099Smckusick else 37521099Smckusick *qp->sq_tailp = rp; 37621099Smckusick *(qp->sq_tailp = &rp->p_link) = 0; 377*40711Skarels rp->p_stat = SSLEEP; 378*40711Skarels (void) spl0(); 379*40711Skarels u.u_ru.ru_nvcsw++; 380*40711Skarels swtch(); 38116795Skarels curpri = rp->p_usrpri; 38233Sbill splx(s); 38333Sbill } 38433Sbill 38533Sbill /* 386181Sbill * Remove a process from its wait queue 387181Sbill */ 388181Sbill unsleep(p) 3894826Swnj register struct proc *p; 390181Sbill { 39121099Smckusick register struct slpque *qp; 392181Sbill register struct proc **hp; 39321099Smckusick int s; 394181Sbill 39517541Skarels s = splhigh(); 396181Sbill if (p->p_wchan) { 39721099Smckusick hp = &(qp = &slpque[HASH(p->p_wchan)])->sq_head; 398181Sbill while (*hp != p) 399181Sbill hp = &(*hp)->p_link; 400181Sbill *hp = p->p_link; 40121099Smckusick if (qp->sq_tailp == &p->p_link) 40221099Smckusick qp->sq_tailp = hp; 403181Sbill p->p_wchan = 0; 404181Sbill } 405181Sbill splx(s); 406181Sbill } 407181Sbill 408181Sbill /* 40933Sbill * Wake up all processes sleeping on chan. 41033Sbill */ 41133Sbill wakeup(chan) 4124826Swnj register caddr_t chan; 41333Sbill { 41421099Smckusick register struct slpque *qp; 41521099Smckusick register struct proc *p, **q; 41633Sbill int s; 41733Sbill 41817541Skarels s = splhigh(); 41921099Smckusick qp = &slpque[HASH(chan)]; 42033Sbill restart: 42121099Smckusick for (q = &qp->sq_head; p = *q; ) { 42240710Smarc #ifdef DIAGNOSTIC 423181Sbill if (p->p_rlink || p->p_stat != SSLEEP && p->p_stat != SSTOP) 42433Sbill panic("wakeup"); 42540710Smarc #endif 426207Sbill if (p->p_wchan==chan) { 42733Sbill p->p_wchan = 0; 428187Sbill *q = p->p_link; 42921099Smckusick if (qp->sq_tailp == &p->p_link) 43021099Smckusick qp->sq_tailp = q; 431181Sbill if (p->p_stat == SSLEEP) { 432181Sbill /* OPTIMIZED INLINE EXPANSION OF setrun(p) */ 43321763Skarels if (p->p_slptime > 1) 43421763Skarels updatepri(p); 435181Sbill p->p_stat = SRUN; 4362702Swnj if (p->p_flag & SLOAD) 437181Sbill setrq(p); 43816795Skarels /* 43916795Skarels * Since curpri is a usrpri, 44016795Skarels * p->p_pri is always better than curpri. 44116795Skarels */ 44216795Skarels runrun++; 44316795Skarels aston(); 4443545Swnj if ((p->p_flag&SLOAD) == 0) { 4453545Swnj if (runout != 0) { 4463545Swnj runout = 0; 4473545Swnj wakeup((caddr_t)&runout); 4483545Swnj } 4493545Swnj wantin++; 450181Sbill } 451181Sbill /* END INLINE EXPANSION */ 452187Sbill goto restart; 45333Sbill } 454187Sbill } else 455187Sbill q = &p->p_link; 45633Sbill } 45733Sbill splx(s); 45833Sbill } 45933Sbill 46033Sbill /* 46133Sbill * Initialize the (doubly-linked) run queues 46233Sbill * to be empty. 46333Sbill */ 46433Sbill rqinit() 46533Sbill { 46633Sbill register int i; 46733Sbill 46833Sbill for (i = 0; i < NQS; i++) 46933Sbill qs[i].ph_link = qs[i].ph_rlink = (struct proc *)&qs[i]; 47033Sbill } 47133Sbill 47233Sbill /* 47333Sbill * Set the process running; 47433Sbill * arrange for it to be swapped in if necessary. 47533Sbill */ 47633Sbill setrun(p) 4774826Swnj register struct proc *p; 47833Sbill { 4794826Swnj register int s; 48033Sbill 48117541Skarels s = splhigh(); 48233Sbill switch (p->p_stat) { 48333Sbill 48433Sbill case 0: 48533Sbill case SWAIT: 48633Sbill case SRUN: 48733Sbill case SZOMB: 48833Sbill default: 48933Sbill panic("setrun"); 49033Sbill 491207Sbill case SSTOP: 49233Sbill case SSLEEP: 493181Sbill unsleep(p); /* e.g. when sending signals */ 49433Sbill break; 49533Sbill 49633Sbill case SIDL: 49733Sbill break; 49833Sbill } 49933Sbill p->p_stat = SRUN; 50033Sbill if (p->p_flag & SLOAD) 50133Sbill setrq(p); 50233Sbill splx(s); 50330232Skarels if (p->p_slptime > 1) 50430232Skarels updatepri(p); 5054826Swnj if (p->p_pri < curpri) { 50633Sbill runrun++; 5072443Swnj aston(); 5082443Swnj } 5093545Swnj if ((p->p_flag&SLOAD) == 0) { 5104826Swnj if (runout != 0) { 5113545Swnj runout = 0; 5123545Swnj wakeup((caddr_t)&runout); 5133545Swnj } 5143545Swnj wantin++; 51533Sbill } 51633Sbill } 51733Sbill 51833Sbill /* 51933Sbill * Set user priority. 52033Sbill * The rescheduling flag (runrun) 52133Sbill * is set if the priority is better 52233Sbill * than the currently running process. 52333Sbill */ 52433Sbill setpri(pp) 5254826Swnj register struct proc *pp; 52633Sbill { 5274826Swnj register int p; 52833Sbill 5293875Swnj p = (pp->p_cpu & 0377)/4; 53017541Skarels p += PUSER + 2 * pp->p_nice; 5313530Swnj if (pp->p_rssize > pp->p_maxrss && freemem < desfree) 5323530Swnj p += 2*4; /* effectively, nice(4) */ 5334826Swnj if (p > 127) 53433Sbill p = 127; 5354826Swnj if (p < curpri) { 53633Sbill runrun++; 5372453Swnj aston(); 5382453Swnj } 53933Sbill pp->p_usrpri = p; 5404826Swnj return (p); 54133Sbill } 542