xref: /csrg-svn/sys/kern/kern_synch.c (revision 32908)
123376Smckusick /*
229096Smckusick  * Copyright (c) 1982, 1986 Regents of the University of California.
323376Smckusick  * All rights reserved.  The Berkeley software License Agreement
423376Smckusick  * specifies the terms and conditions for redistribution.
523376Smckusick  *
6*32908Smckusick  *	@(#)kern_synch.c	7.6 (Berkeley) 12/11/87
723376Smckusick  */
833Sbill 
99756Ssam #include "../machine/pte.h"
1029946Skarels #include "../machine/psl.h"
1129946Skarels #include "../machine/mtpr.h"
129756Ssam 
1317093Sbloom #include "param.h"
1417093Sbloom #include "systm.h"
1517093Sbloom #include "dir.h"
1617093Sbloom #include "user.h"
1717093Sbloom #include "proc.h"
1817093Sbloom #include "vm.h"
1917093Sbloom #include "kernel.h"
2017093Sbloom #include "buf.h"
219756Ssam 
228102Sroot /*
238102Sroot  * Force switch among equal priority processes every 100ms.
248102Sroot  */
258102Sroot roundrobin()
268102Sroot {
278102Sroot 
288102Sroot 	runrun++;
298102Sroot 	aston();
308624Sroot 	timeout(roundrobin, (caddr_t)0, hz / 10);
318102Sroot }
328102Sroot 
33*32908Smckusick /*
34*32908Smckusick  * constants for digital decay and forget
35*32908Smckusick  *	90% of (p_cpu) usage in 5*loadav time
36*32908Smckusick  *	95% of (p_pctcpu) usage in 60 seconds (load insensitive)
37*32908Smckusick  *          Note that, as ps(1) mentions, this can let percentages
38*32908Smckusick  *          total over 100% (I've seen 137.9% for 3 processes).
39*32908Smckusick  *
40*32908Smckusick  * Note that hardclock updates p_cpu and p_cpticks independently.
41*32908Smckusick  *
42*32908Smckusick  * We wish to decay away 90% of p_cpu in (5 * loadavg) seconds.
43*32908Smckusick  * That is, the system wants to compute a value of decay such
44*32908Smckusick  * that the following for loop:
45*32908Smckusick  * 	for (i = 0; i < (5 * loadavg); i++)
46*32908Smckusick  * 		p_cpu *= decay;
47*32908Smckusick  * will compute
48*32908Smckusick  * 	p_cpu *= 0.1;
49*32908Smckusick  * for all values of loadavg:
50*32908Smckusick  *
51*32908Smckusick  * Mathematically this loop can be expressed by saying:
52*32908Smckusick  * 	decay ** (5 * loadavg) ~= .1
53*32908Smckusick  *
54*32908Smckusick  * The system computes decay as:
55*32908Smckusick  * 	decay = (2 * loadavg) / (2 * loadavg + 1)
56*32908Smckusick  *
57*32908Smckusick  * We wish to prove that the system's computation of decay
58*32908Smckusick  * will always fulfill the equation:
59*32908Smckusick  * 	decay ** (5 * loadavg) ~= .1
60*32908Smckusick  *
61*32908Smckusick  * If we compute b as:
62*32908Smckusick  * 	b = 2 * loadavg
63*32908Smckusick  * then
64*32908Smckusick  * 	decay = b / (b + 1)
65*32908Smckusick  *
66*32908Smckusick  * We now need to prove two things:
67*32908Smckusick  *	1) Given factor ** (5 * loadavg) ~= .1, prove factor == b/(b+1)
68*32908Smckusick  *	2) Given b/(b+1) ** power ~= .1, prove power == (5 * loadavg)
69*32908Smckusick  *
70*32908Smckusick  * Facts:
71*32908Smckusick  *         For x close to zero, exp(x) =~ 1 + x, since
72*32908Smckusick  *              exp(x) = 0! + x**1/1! + x**2/2! + ... .
73*32908Smckusick  *              therefore exp(-1/b) =~ 1 - (1/b) = (b-1)/b.
74*32908Smckusick  *         For x close to zero, ln(1+x) =~ x, since
75*32908Smckusick  *              ln(1+x) = x - x**2/2 + x**3/3 - ...     -1 < x < 1
76*32908Smckusick  *              therefore ln(b/(b+1)) = ln(1 - 1/(b+1)) =~ -1/(b+1).
77*32908Smckusick  *         ln(.1) =~ -2.30
78*32908Smckusick  *
79*32908Smckusick  * Proof of (1):
80*32908Smckusick  *    Solve (factor)**(power) =~ .1 given power (5*loadav):
81*32908Smckusick  *	solving for factor,
82*32908Smckusick  *      ln(factor) =~ (-2.30/5*loadav), or
83*32908Smckusick  *      factor =~ exp(-1/((5/2.30)*loadav) =~ exp(-1/(2*loadav)) =
84*32908Smckusick  *          exp(-1/b) =~ (b-1)/b =~ b/(b+1).                    QED
85*32908Smckusick  *
86*32908Smckusick  * Proof of (2):
87*32908Smckusick  *    Solve (factor)**(power) =~ .1 given factor == (b/(b+1)):
88*32908Smckusick  *	solving for power,
89*32908Smckusick  *      power*ln(b/(b+1)) =~ -2.30, or
90*32908Smckusick  *      power =~ 2.3 * (b + 1) = 4.6*loadav + 2.3 =~ 5*loadav.  QED
91*32908Smckusick  *
92*32908Smckusick  * Actual power values for the implemented algorithm are as follows:
93*32908Smckusick  *      loadav: 1       2       3       4
94*32908Smckusick  *      power:  5.68    10.32   14.94   19.55
95*32908Smckusick  */
9617541Skarels #define	filter(loadav) ((2 * (loadav)) / (2 * (loadav) + 1))
9717541Skarels 
988102Sroot double	ccpu = 0.95122942450071400909;		/* exp(-1/20) */
998102Sroot 
1008102Sroot /*
1018102Sroot  * Recompute process priorities, once a second
1028102Sroot  */
1038102Sroot schedcpu()
1048102Sroot {
10516795Skarels 	register double ccpu1 = (1.0 - ccpu) / (double)hz;
1068102Sroot 	register struct proc *p;
1078102Sroot 	register int s, a;
10817541Skarels 	float scale = filter(avenrun[0]);
1098102Sroot 
1108102Sroot 	wakeup((caddr_t)&lbolt);
11116532Skarels 	for (p = allproc; p != NULL; p = p->p_nxt) {
1128102Sroot 		if (p->p_time != 127)
1138102Sroot 			p->p_time++;
1148102Sroot 		if (p->p_stat==SSLEEP || p->p_stat==SSTOP)
1158102Sroot 			if (p->p_slptime != 127)
1168102Sroot 				p->p_slptime++;
11717541Skarels 		/*
11817541Skarels 		 * If the process has slept the entire second,
11917541Skarels 		 * stop recalculating its priority until it wakes up.
12017541Skarels 		 */
12117541Skarels 		if (p->p_slptime > 1) {
12217541Skarels 			p->p_pctcpu *= ccpu;
12317541Skarels 			continue;
12417541Skarels 		}
12517541Skarels 		/*
12617541Skarels 		 * p_pctcpu is only for ps.
12717541Skarels 		 */
12817541Skarels 		p->p_pctcpu = ccpu * p->p_pctcpu + ccpu1 * p->p_cpticks;
1298102Sroot 		p->p_cpticks = 0;
13017541Skarels 		a = (int) (scale * (p->p_cpu & 0377)) + p->p_nice;
1318102Sroot 		if (a < 0)
1328102Sroot 			a = 0;
1338102Sroot 		if (a > 255)
1348102Sroot 			a = 255;
1358102Sroot 		p->p_cpu = a;
1368102Sroot 		(void) setpri(p);
13717541Skarels 		s = splhigh();	/* prevent state changes */
1388102Sroot 		if (p->p_pri >= PUSER) {
13916795Skarels #define	PPQ	(128 / NQS)
1408102Sroot 			if ((p != u.u_procp || noproc) &&
1418102Sroot 			    p->p_stat == SRUN &&
1428102Sroot 			    (p->p_flag & SLOAD) &&
14316795Skarels 			    (p->p_pri / PPQ) != (p->p_usrpri / PPQ)) {
1448102Sroot 				remrq(p);
1458102Sroot 				p->p_pri = p->p_usrpri;
1468102Sroot 				setrq(p);
1478102Sroot 			} else
1488102Sroot 				p->p_pri = p->p_usrpri;
1498102Sroot 		}
1508102Sroot 		splx(s);
1518102Sroot 	}
1528102Sroot 	vmmeter();
1538102Sroot 	if (runin!=0) {
1548102Sroot 		runin = 0;
1558102Sroot 		wakeup((caddr_t)&runin);
1568102Sroot 	}
1578102Sroot 	if (bclnlist != NULL)
1588102Sroot 		wakeup((caddr_t)&proc[2]);
1598624Sroot 	timeout(schedcpu, (caddr_t)0, hz);
1608102Sroot }
1618102Sroot 
16217541Skarels /*
16317541Skarels  * Recalculate the priority of a process after it has slept for a while.
16417541Skarels  */
16517541Skarels updatepri(p)
16617541Skarels 	register struct proc *p;
16717541Skarels {
16817541Skarels 	register int a = p->p_cpu & 0377;
16917541Skarels 	float scale = filter(avenrun[0]);
17017541Skarels 
17117541Skarels 	p->p_slptime--;		/* the first time was done in schedcpu */
17217541Skarels 	while (a && --p->p_slptime)
17317541Skarels 		a = (int) (scale * a) /* + p->p_nice */;
17430232Skarels 	p->p_slptime = 0;
17517541Skarels 	if (a < 0)
17617541Skarels 		a = 0;
17717541Skarels 	if (a > 255)
17817541Skarels 		a = 255;
17917541Skarels 	p->p_cpu = a;
18017541Skarels 	(void) setpri(p);
18117541Skarels }
18217541Skarels 
18333Sbill #define SQSIZE 0100	/* Must be power of 2 */
18433Sbill #define HASH(x)	(( (int) x >> 5) & (SQSIZE-1))
18521099Smckusick struct slpque {
18621099Smckusick 	struct proc *sq_head;
18721099Smckusick 	struct proc **sq_tailp;
18821099Smckusick } slpque[SQSIZE];
18933Sbill 
19033Sbill /*
19133Sbill  * Give up the processor till a wakeup occurs
19233Sbill  * on chan, at which time the process
19333Sbill  * enters the scheduling queue at priority pri.
19433Sbill  * The most important effect of pri is that when
19533Sbill  * pri<=PZERO a signal cannot disturb the sleep;
19633Sbill  * if pri>PZERO signals will be processed.
19733Sbill  * Callers of this routine must be prepared for
19833Sbill  * premature return, and check that the reason for
19933Sbill  * sleeping has gone away.
20033Sbill  */
20133Sbill sleep(chan, pri)
2028033Sroot 	caddr_t chan;
2038033Sroot 	int pri;
20433Sbill {
20521099Smckusick 	register struct proc *rp;
20621099Smckusick 	register struct slpque *qp;
207207Sbill 	register s;
20830532Skarels 	extern int cold;
20933Sbill 
21033Sbill 	rp = u.u_procp;
21117541Skarels 	s = splhigh();
21230532Skarels 	if (cold || panicstr) {
21318363Skarels 		/*
21430532Skarels 		 * After a panic, or during autoconfiguration,
21530532Skarels 		 * just give interrupts a chance, then just return;
21630532Skarels 		 * don't run any other procs or panic below,
21730532Skarels 		 * in case this is the idle process and already asleep.
21818363Skarels 		 * The splnet should be spl0 if the network was being used
21918363Skarels 		 * by the filesystem, but for now avoid network interrupts
22018363Skarels 		 * that might cause another panic.
22118363Skarels 		 */
22218363Skarels 		(void) splnet();
22318363Skarels 		splx(s);
22418363Skarels 		return;
22518363Skarels 	}
22618363Skarels 	if (chan==0 || rp->p_stat != SRUN || rp->p_rlink)
22733Sbill 		panic("sleep");
22833Sbill 	rp->p_wchan = chan;
22933Sbill 	rp->p_slptime = 0;
23033Sbill 	rp->p_pri = pri;
23121099Smckusick 	qp = &slpque[HASH(chan)];
23221099Smckusick 	if (qp->sq_head == 0)
23321099Smckusick 		qp->sq_head = rp;
23421099Smckusick 	else
23521099Smckusick 		*qp->sq_tailp = rp;
23621099Smckusick 	*(qp->sq_tailp = &rp->p_link) = 0;
2374826Swnj 	if (pri > PZERO) {
23821763Skarels 		/*
23921763Skarels 		 * If we stop in issig(), wakeup may already have happened
24021763Skarels 		 * when we return (rp->p_wchan will then be 0).
24121763Skarels 		 */
2424826Swnj 		if (ISSIG(rp)) {
243187Sbill 			if (rp->p_wchan)
244187Sbill 				unsleep(rp);
24533Sbill 			rp->p_stat = SRUN;
246131Sbill 			(void) spl0();
24733Sbill 			goto psig;
24833Sbill 		}
249187Sbill 		if (rp->p_wchan == 0)
250187Sbill 			goto out;
251187Sbill 		rp->p_stat = SSLEEP;
252131Sbill 		(void) spl0();
2538033Sroot 		u.u_ru.ru_nvcsw++;
25433Sbill 		swtch();
2554826Swnj 		if (ISSIG(rp))
25633Sbill 			goto psig;
25733Sbill 	} else {
258207Sbill 		rp->p_stat = SSLEEP;
259131Sbill 		(void) spl0();
2608033Sroot 		u.u_ru.ru_nvcsw++;
26133Sbill 		swtch();
26233Sbill 	}
26316795Skarels 	curpri = rp->p_usrpri;
264187Sbill out:
26533Sbill 	splx(s);
26633Sbill 	return;
26733Sbill 
26833Sbill 	/*
26933Sbill 	 * If priority was low (>PZERO) and
2704826Swnj 	 * there has been a signal, execute non-local goto through
2718113Sroot 	 * u.u_qsave, aborting the system call in progress (see trap.c)
27233Sbill 	 */
27333Sbill psig:
2748113Sroot 	longjmp(&u.u_qsave);
27533Sbill 	/*NOTREACHED*/
27633Sbill }
27733Sbill 
27833Sbill /*
279181Sbill  * Remove a process from its wait queue
280181Sbill  */
281181Sbill unsleep(p)
2824826Swnj 	register struct proc *p;
283181Sbill {
28421099Smckusick 	register struct slpque *qp;
285181Sbill 	register struct proc **hp;
28621099Smckusick 	int s;
287181Sbill 
28817541Skarels 	s = splhigh();
289181Sbill 	if (p->p_wchan) {
29021099Smckusick 		hp = &(qp = &slpque[HASH(p->p_wchan)])->sq_head;
291181Sbill 		while (*hp != p)
292181Sbill 			hp = &(*hp)->p_link;
293181Sbill 		*hp = p->p_link;
29421099Smckusick 		if (qp->sq_tailp == &p->p_link)
29521099Smckusick 			qp->sq_tailp = hp;
296181Sbill 		p->p_wchan = 0;
297181Sbill 	}
298181Sbill 	splx(s);
299181Sbill }
300181Sbill 
301181Sbill /*
30233Sbill  * Wake up all processes sleeping on chan.
30333Sbill  */
30433Sbill wakeup(chan)
3054826Swnj 	register caddr_t chan;
30633Sbill {
30721099Smckusick 	register struct slpque *qp;
30821099Smckusick 	register struct proc *p, **q;
30933Sbill 	int s;
31033Sbill 
31117541Skarels 	s = splhigh();
31221099Smckusick 	qp = &slpque[HASH(chan)];
31333Sbill restart:
31421099Smckusick 	for (q = &qp->sq_head; p = *q; ) {
315181Sbill 		if (p->p_rlink || p->p_stat != SSLEEP && p->p_stat != SSTOP)
31633Sbill 			panic("wakeup");
317207Sbill 		if (p->p_wchan==chan) {
31833Sbill 			p->p_wchan = 0;
319187Sbill 			*q = p->p_link;
32021099Smckusick 			if (qp->sq_tailp == &p->p_link)
32121099Smckusick 				qp->sq_tailp = q;
322181Sbill 			if (p->p_stat == SSLEEP) {
323181Sbill 				/* OPTIMIZED INLINE EXPANSION OF setrun(p) */
32421763Skarels 				if (p->p_slptime > 1)
32521763Skarels 					updatepri(p);
326181Sbill 				p->p_stat = SRUN;
3272702Swnj 				if (p->p_flag & SLOAD)
328181Sbill 					setrq(p);
32916795Skarels 				/*
33016795Skarels 				 * Since curpri is a usrpri,
33116795Skarels 				 * p->p_pri is always better than curpri.
33216795Skarels 				 */
33316795Skarels 				runrun++;
33416795Skarels 				aston();
3353545Swnj 				if ((p->p_flag&SLOAD) == 0) {
3363545Swnj 					if (runout != 0) {
3373545Swnj 						runout = 0;
3383545Swnj 						wakeup((caddr_t)&runout);
3393545Swnj 					}
3403545Swnj 					wantin++;
341181Sbill 				}
342181Sbill 				/* END INLINE EXPANSION */
343187Sbill 				goto restart;
34433Sbill 			}
345187Sbill 		} else
346187Sbill 			q = &p->p_link;
34733Sbill 	}
34833Sbill 	splx(s);
34933Sbill }
35033Sbill 
35133Sbill /*
35233Sbill  * Initialize the (doubly-linked) run queues
35333Sbill  * to be empty.
35433Sbill  */
35533Sbill rqinit()
35633Sbill {
35733Sbill 	register int i;
35833Sbill 
35933Sbill 	for (i = 0; i < NQS; i++)
36033Sbill 		qs[i].ph_link = qs[i].ph_rlink = (struct proc *)&qs[i];
36133Sbill }
36233Sbill 
36333Sbill /*
36433Sbill  * Set the process running;
36533Sbill  * arrange for it to be swapped in if necessary.
36633Sbill  */
36733Sbill setrun(p)
3684826Swnj 	register struct proc *p;
36933Sbill {
3704826Swnj 	register int s;
37133Sbill 
37217541Skarels 	s = splhigh();
37333Sbill 	switch (p->p_stat) {
37433Sbill 
37533Sbill 	case 0:
37633Sbill 	case SWAIT:
37733Sbill 	case SRUN:
37833Sbill 	case SZOMB:
37933Sbill 	default:
38033Sbill 		panic("setrun");
38133Sbill 
382207Sbill 	case SSTOP:
38333Sbill 	case SSLEEP:
384181Sbill 		unsleep(p);		/* e.g. when sending signals */
38533Sbill 		break;
38633Sbill 
38733Sbill 	case SIDL:
38833Sbill 		break;
38933Sbill 	}
39033Sbill 	p->p_stat = SRUN;
39133Sbill 	if (p->p_flag & SLOAD)
39233Sbill 		setrq(p);
39333Sbill 	splx(s);
39430232Skarels 	if (p->p_slptime > 1)
39530232Skarels 		updatepri(p);
3964826Swnj 	if (p->p_pri < curpri) {
39733Sbill 		runrun++;
3982443Swnj 		aston();
3992443Swnj 	}
4003545Swnj 	if ((p->p_flag&SLOAD) == 0) {
4014826Swnj 		if (runout != 0) {
4023545Swnj 			runout = 0;
4033545Swnj 			wakeup((caddr_t)&runout);
4043545Swnj 		}
4053545Swnj 		wantin++;
40633Sbill 	}
40733Sbill }
40833Sbill 
40933Sbill /*
41033Sbill  * Set user priority.
41133Sbill  * The rescheduling flag (runrun)
41233Sbill  * is set if the priority is better
41333Sbill  * than the currently running process.
41433Sbill  */
41533Sbill setpri(pp)
4164826Swnj 	register struct proc *pp;
41733Sbill {
4184826Swnj 	register int p;
41933Sbill 
4203875Swnj 	p = (pp->p_cpu & 0377)/4;
42117541Skarels 	p += PUSER + 2 * pp->p_nice;
4223530Swnj 	if (pp->p_rssize > pp->p_maxrss && freemem < desfree)
4233530Swnj 		p += 2*4;	/* effectively, nice(4) */
4244826Swnj 	if (p > 127)
42533Sbill 		p = 127;
4264826Swnj 	if (p < curpri) {
42733Sbill 		runrun++;
4282453Swnj 		aston();
4292453Swnj 	}
43033Sbill 	pp->p_usrpri = p;
4314826Swnj 	return (p);
43233Sbill }
433