xref: /csrg-svn/sys/kern/kern_clock.c (revision 5247)
1*5247Sroot /*	kern_clock.c	4.28	81/12/12	*/
29Sbill 
39Sbill #include "../h/param.h"
49Sbill #include "../h/systm.h"
5329Sbill #include "../h/dk.h"
62768Swnj #include "../h/callout.h"
79Sbill #include "../h/seg.h"
89Sbill #include "../h/dir.h"
99Sbill #include "../h/user.h"
109Sbill #include "../h/proc.h"
119Sbill #include "../h/reg.h"
129Sbill #include "../h/psl.h"
139Sbill #include "../h/vm.h"
149Sbill #include "../h/buf.h"
159Sbill #include "../h/text.h"
16877Sbill #include "../h/vlimit.h"
17877Sbill #include "../h/mtpr.h"
18877Sbill #include "../h/clock.h"
192689Swnj #include "../h/cpu.h"
20*5247Sroot #include "../h/protosw.h"
219Sbill 
223511Sroot #include "bk.h"
231943Swnj #include "dh.h"
241943Swnj #include "dz.h"
251559Sbill 
269Sbill /*
272442Swnj  * Hardclock is called straight from
289Sbill  * the real time clock interrupt.
292442Swnj  * We limit the work we do at real clock interrupt time to:
302442Swnj  *	reloading clock
312442Swnj  *	decrementing time to callouts
322442Swnj  *	recording cpu time usage
332450Swnj  *	modifying priority of current process
342442Swnj  *	arrange for soft clock interrupt
352442Swnj  *	kernel pc profiling
369Sbill  *
373110Swnj  * At software (softclock) interrupt time we:
389Sbill  *	implement callouts
399Sbill  *	maintain date
409Sbill  *	lightning bolt wakeup (every second)
419Sbill  *	alarm clock signals
429Sbill  *	jab the scheduler
432442Swnj  *
442442Swnj  * On the vax softclock interrupts are implemented by
452442Swnj  * software interrupts.  Note that we may have multiple softclock
462442Swnj  * interrupts compressed into one (due to excessive interrupt load),
472442Swnj  * but that hardclock interrupts should never be lost.
489Sbill  */
494968Swnj #ifdef KPROF
504527Swnj int	kcounts[20000];
514968Swnj #endif
529Sbill 
53*5247Sroot /*
54*5247Sroot  * Protoslow is like lbolt, but for slow protocol timeouts, counting
55*5247Sroot  * up to (hz/PR_SLOWHZ), then causing a pfslowtimo().
56*5247Sroot  * Protofast is like lbolt, but for fast protocol timeouts, counting
57*5247Sroot  * up to (hz/PR_FASTHZ), then causing a pffasttimo().
58*5247Sroot  */
59*5247Sroot int	protoslow;
60*5247Sroot int	protofast;
61*5247Sroot 
622609Swnj /*ARGSUSED*/
632442Swnj hardclock(pc, ps)
642450Swnj 	caddr_t pc;
659Sbill {
662768Swnj 	register struct callout *p1;
679Sbill 	register struct proc *pp;
682442Swnj 	register int s, cpstate;
699Sbill 
709Sbill 	/*
719Sbill 	 * reprime clock
729Sbill 	 */
739Sbill 	clkreld();
749Sbill 
759Sbill 	/*
762442Swnj 	 * update callout times
779Sbill 	 */
783542Swnj 	for (p1 = calltodo.c_next; p1 && p1->c_time <= 0; p1 = p1->c_next)
793542Swnj 		;
803542Swnj 	if (p1)
813542Swnj 		p1->c_time--;
82138Sbill 
83138Sbill 	/*
842442Swnj 	 * Maintain iostat and per-process cpu statistics
85138Sbill 	 */
869Sbill 	if (!noproc) {
879Sbill 		s = u.u_procp->p_rssize;
889Sbill 		u.u_vm.vm_idsrss += s;
899Sbill 		if (u.u_procp->p_textp) {
909Sbill 			register int xrss = u.u_procp->p_textp->x_rssize;
919Sbill 
929Sbill 			s += xrss;
939Sbill 			u.u_vm.vm_ixrss += xrss;
949Sbill 		}
959Sbill 		if (s > u.u_vm.vm_maxrss)
969Sbill 			u.u_vm.vm_maxrss = s;
972768Swnj 		if ((u.u_vm.vm_utime+u.u_vm.vm_stime+1)/hz > u.u_limit[LIM_CPU]) {
98375Sbill 			psignal(u.u_procp, SIGXCPU);
99375Sbill 			if (u.u_limit[LIM_CPU] < INFINITY - 5)
100375Sbill 				u.u_limit[LIM_CPU] += 5;
101375Sbill 		}
1029Sbill 	}
1033110Swnj 	/*
1043110Swnj 	 * Update iostat information.
1053110Swnj 	 */
1069Sbill 	if (USERMODE(ps)) {
1079Sbill 		u.u_vm.vm_utime++;
1089Sbill 		if(u.u_procp->p_nice > NZERO)
109305Sbill 			cpstate = CP_NICE;
110305Sbill 		else
111305Sbill 			cpstate = CP_USER;
1129Sbill 	} else {
1134968Swnj #ifdef KPROF
1144968Swnj 	int k = ((int)pc & 0x7fffffff) / 8;
1154968Swnj 	if (k < 20000)
1164968Swnj 		kcounts[k]++;
1174968Swnj #endif
118305Sbill 		cpstate = CP_SYS;
1199Sbill 		if (noproc)
120305Sbill 			cpstate = CP_IDLE;
1219Sbill 		else
1229Sbill 			u.u_vm.vm_stime++;
1239Sbill 	}
1241408Sbill 	cp_time[cpstate]++;
1252442Swnj 	for (s = 0; s < DK_NDRIVE; s++)
1262442Swnj 		if (dk_busy&(1<<s))
1272442Swnj 			dk_time[s]++;
1283110Swnj 	/*
1293110Swnj 	 * Adjust priority of current process.
1303110Swnj 	 */
1319Sbill 	if (!noproc) {
1329Sbill 		pp = u.u_procp;
1331399Sbill 		pp->p_cpticks++;
1349Sbill 		if(++pp->p_cpu == 0)
1359Sbill 			pp->p_cpu--;
1363876Swnj 		if(pp->p_cpu % 4 == 0) {
137125Sbill 			(void) setpri(pp);
1389Sbill 			if (pp->p_pri >= PUSER)
1399Sbill 				pp->p_pri = pp->p_usrpri;
1409Sbill 		}
1419Sbill 	}
1423110Swnj 	/*
1433110Swnj 	 * Time moves on.
1443110Swnj 	 */
1459Sbill 	++lbolt;
146*5247Sroot 
147*5247Sroot 	/*
148*5247Sroot 	 * Time moves on for protocols.
149*5247Sroot 	 */
150*5247Sroot 	++protoslow; ++protofast;
151*5247Sroot 
1522689Swnj #if VAX780
1533110Swnj 	/*
1543110Swnj 	 * On 780's, impelement a fast UBA watcher,
1553110Swnj 	 * to make sure uba's don't get stuck.
1563110Swnj 	 */
1572872Swnj 	if (cpu == VAX_780 && panicstr == 0 && !BASEPRI(ps))
1582442Swnj 		unhang();
1592442Swnj #endif
1603110Swnj 	/*
1613110Swnj 	 * Schedule a software interrupt for the rest
1623110Swnj 	 * of clock activities.
1633110Swnj 	 */
1642442Swnj 	setsoftclock();
1652442Swnj }
1662442Swnj 
1672442Swnj /*
1683876Swnj  * The digital decay cpu usage priority assignment is scaled to run in
1693876Swnj  * time as expanded by the 1 minute load average.  Each second we
1703876Swnj  * multiply the the previous cpu usage estimate by
1713876Swnj  *		nrscale*avenrun[0]
1723876Swnj  * The following relates the load average to the period over which
1733876Swnj  * cpu usage is 90% forgotten:
1743876Swnj  *	loadav 1	 5 seconds
1753876Swnj  *	loadav 5	24 seconds
1763876Swnj  *	loadav 10	47 seconds
1773876Swnj  *	loadav 20	93 seconds
1783876Swnj  * This is a great improvement on the previous algorithm which
1793876Swnj  * decayed the priorities by a constant, and decayed away all knowledge
1803876Swnj  * of previous activity in about 20 seconds.  Under heavy load,
1813876Swnj  * the previous algorithm degenerated to round-robin with poor response
1823876Swnj  * time when there was a high load average.
1832442Swnj  */
1843984Sroot #undef ave
1853876Swnj #define	ave(a,b) ((int)(((int)(a*b))/(b+1)))
1863876Swnj int	nrscale = 2;
1873876Swnj double	avenrun[];
1883110Swnj 
1893110Swnj /*
1903110Swnj  * Constant for decay filter for cpu usage field
1913110Swnj  * in process table (used by ps au).
1923110Swnj  */
1932442Swnj double	ccpu = 0.95122942450071400909;		/* exp(-1/20) */
1942442Swnj 
1952442Swnj /*
1962442Swnj  * Software clock interrupt.
1973110Swnj  * This routine runs at lower priority than device interrupts.
1982442Swnj  */
1992609Swnj /*ARGSUSED*/
2002442Swnj softclock(pc, ps)
2012450Swnj 	caddr_t pc;
2022442Swnj {
2033615Sroot 	register struct callout *p1;
2042442Swnj 	register struct proc *pp;
2052442Swnj 	register int a, s;
2063542Swnj 	caddr_t arg;
2073542Swnj 	int (*func)();
2082442Swnj 
2092442Swnj 	/*
2102872Swnj 	 * Perform callouts (but not after panic's!)
2112442Swnj 	 */
2123542Swnj 	if (panicstr == 0) {
2133542Swnj 		for (;;) {
2143542Swnj 			s = spl7();
2154250Swnj 			if ((p1 = calltodo.c_next) == 0 || p1->c_time > 0) {
2164250Swnj 				splx(s);
2173542Swnj 				break;
2184250Swnj 			}
2193542Swnj 			calltodo.c_next = p1->c_next;
2203542Swnj 			arg = p1->c_arg;
2213542Swnj 			func = p1->c_func;
2223542Swnj 			p1->c_next = callfree;
2233542Swnj 			callfree = p1;
2243542Swnj 			(void) splx(s);
2253542Swnj 			(*func)(arg);
2262442Swnj 		}
2272442Swnj 	}
2282442Swnj 
2292442Swnj 	/*
2302442Swnj 	 * Drain silos.
2312442Swnj 	 */
2322647Swnj #if NDH > 0
2332442Swnj 	s = spl5(); dhtimer(); splx(s);
2342442Swnj #endif
2352647Swnj #if NDZ > 0
2362442Swnj 	s = spl5(); dztimer(); splx(s);
2372442Swnj #endif
2382442Swnj 
2392442Swnj 	/*
2402450Swnj 	 * If idling and processes are waiting to swap in,
2412450Swnj 	 * check on them.
2422450Swnj 	 */
2432450Swnj 	if (noproc && runin) {
2442450Swnj 		runin = 0;
2452450Swnj 		wakeup((caddr_t)&runin);
2462450Swnj 	}
2472450Swnj 
2482450Swnj 	/*
2493876Swnj 	 * Run paging daemon every 1/4 sec.
2502442Swnj 	 */
2512768Swnj 	if (lbolt % (hz/4) == 0) {
2529Sbill 		vmpago();
2533876Swnj 	}
2543876Swnj 
2553876Swnj 	/*
2563876Swnj 	 * Reschedule every 1/10 sec.
2573876Swnj 	 */
2583876Swnj 	if (lbolt % (hz/10) == 0) {
2599Sbill 		runrun++;
2602442Swnj 		aston();
2619Sbill 	}
2622442Swnj 
2632442Swnj 	/*
264*5247Sroot 	 * Run network slow and fast timeouts.
265*5247Sroot 	 */
266*5247Sroot 	if (protofast >= hz / PR_FASTHZ)
267*5247Sroot 		pffasttimo();
268*5247Sroot 	if (protofast >= hz / PR_SLOWHZ)
269*5247Sroot 		pfslowtimo();
270*5247Sroot 
271*5247Sroot 	/*
2722442Swnj 	 * Lightning bolt every second:
2732442Swnj 	 *	sleep timeouts
2742442Swnj 	 *	process priority recomputation
2752442Swnj 	 *	process %cpu averaging
2762442Swnj 	 *	virtual memory metering
2772442Swnj 	 *	kick swapper if processes want in
2782442Swnj 	 */
2792768Swnj 	if (lbolt >= hz) {
2802872Swnj 		/*
2813110Swnj 		 * This doesn't mean much on VAX since we run at
2822872Swnj 		 * software interrupt time... if hardclock()
2832872Swnj 		 * calls softclock() directly, it prevents
2842872Swnj 		 * this code from running when the priority
2852872Swnj 		 * was raised when the clock interrupt occurred.
2862872Swnj 		 */
2879Sbill 		if (BASEPRI(ps))
2889Sbill 			return;
2892872Swnj 
2902872Swnj 		/*
2912872Swnj 		 * If we didn't run a few times because of
2922872Swnj 		 * long blockage at high ipl, we don't
2932872Swnj 		 * really want to run this code several times,
2942872Swnj 		 * so squish out all multiples of hz here.
2952872Swnj 		 */
2962872Swnj 		time += lbolt / hz;
2972872Swnj 		lbolt %= hz;
2982872Swnj 
2992872Swnj 		/*
3002872Swnj 		 * Wakeup lightning bolt sleepers.
3012872Swnj 		 * Processes sleep on lbolt to wait
3022872Swnj 		 * for short amounts of time (e.g. 1 second).
3032872Swnj 		 */
3049Sbill 		wakeup((caddr_t)&lbolt);
3052872Swnj 
3062872Swnj 		/*
3072872Swnj 		 * Recompute process priority and process
3082872Swnj 		 * sleep() system calls as well as internal
3092872Swnj 		 * sleeps with timeouts (tsleep() kernel routine).
3102872Swnj 		 */
3112872Swnj 		for (pp = proc; pp < procNPROC; pp++)
312928Sbill 		if (pp->p_stat && pp->p_stat!=SZOMB) {
3132872Swnj 			/*
3142872Swnj 			 * Increase resident time, to max of 127 seconds
3152872Swnj 			 * (it is kept in a character.)  For
3162872Swnj 			 * loaded processes this is time in core; for
3172872Swnj 			 * swapped processes, this is time on drum.
3182872Swnj 			 */
3192872Swnj 			if (pp->p_time != 127)
3209Sbill 				pp->p_time++;
3212872Swnj 			/*
3222872Swnj 			 * If process has clock counting down, and it
3232872Swnj 			 * expires, set it running (if this is a tsleep()),
3242872Swnj 			 * or give it an SIGALRM (if the user process
3252872Swnj 			 * is using alarm signals.
3262872Swnj 			 */
3272872Swnj 			if (pp->p_clktim && --pp->p_clktim == 0)
3282872Swnj 				if (pp->p_flag & STIMO) {
3292872Swnj 					s = spl6();
3302872Swnj 					switch (pp->p_stat) {
331204Sbill 
3322872Swnj 					case SSLEEP:
3332872Swnj 						setrun(pp);
3342872Swnj 						break;
335204Sbill 
3362872Swnj 					case SSTOP:
3372872Swnj 						unsleep(pp);
3382872Swnj 						break;
3392872Swnj 					}
3402872Swnj 					pp->p_flag &= ~STIMO;
3412872Swnj 					splx(s);
3422872Swnj 				} else
3432872Swnj 					psignal(pp, SIGALRM);
3442872Swnj 			/*
3452872Swnj 			 * If process is blocked, increment computed
3462872Swnj 			 * time blocked.  This is used in swap scheduling.
3472872Swnj 			 */
3482872Swnj 			if (pp->p_stat==SSLEEP || pp->p_stat==SSTOP)
3499Sbill 				if (pp->p_slptime != 127)
3509Sbill 					pp->p_slptime++;
3512872Swnj 			/*
3522872Swnj 			 * Update digital filter estimation of process
3532872Swnj 			 * cpu utilization for loaded processes.
3542872Swnj 			 */
3551399Sbill 			if (pp->p_flag&SLOAD)
3561399Sbill 				pp->p_pctcpu = ccpu * pp->p_pctcpu +
3572768Swnj 				    (1.0 - ccpu) * (pp->p_cpticks/(float)hz);
3582872Swnj 			/*
3592872Swnj 			 * Recompute process priority.  The number p_cpu
3602872Swnj 			 * is a weighted estimate of cpu time consumed.
3612872Swnj 			 * A process which consumes cpu time has this
3622872Swnj 			 * increase regularly.  We here decrease it by
3633876Swnj 			 * a fraction based on load average giving a digital
3643876Swnj 			 * decay filter which damps out in about 5 seconds
3653876Swnj 			 * when seconds are measured in time expanded by the
3663876Swnj 			 * load average.
3672872Swnj 			 *
3682872Swnj 			 * If a process is niced, then the nice directly
3692872Swnj 			 * affects the new priority.  The final priority
3702872Swnj 			 * is in the range 0 to 255, to fit in a character.
3712872Swnj 			 */
3721399Sbill 			pp->p_cpticks = 0;
3733876Swnj 			a = ave((pp->p_cpu & 0377), avenrun[0]*nrscale) +
3743876Swnj 			     pp->p_nice - NZERO;
3752872Swnj 			if (a < 0)
3769Sbill 				a = 0;
3772872Swnj 			if (a > 255)
3789Sbill 				a = 255;
3799Sbill 			pp->p_cpu = a;
380125Sbill 			(void) setpri(pp);
3812872Swnj 			/*
3822872Swnj 			 * Now have computed new process priority
3832872Swnj 			 * in p->p_usrpri.  Carefully change p->p_pri.
3842872Swnj 			 * A process is on a run queue associated with
3852872Swnj 			 * this priority, so we must block out process
3862872Swnj 			 * state changes during the transition.
3872872Swnj 			 */
3889Sbill 			s = spl6();
3892872Swnj 			if (pp->p_pri >= PUSER) {
3909Sbill 				if ((pp != u.u_procp || noproc) &&
3919Sbill 				    pp->p_stat == SRUN &&
3929Sbill 				    (pp->p_flag & SLOAD) &&
3939Sbill 				    pp->p_pri != pp->p_usrpri) {
3949Sbill 					remrq(pp);
3959Sbill 					pp->p_pri = pp->p_usrpri;
3969Sbill 					setrq(pp);
3979Sbill 				} else
3989Sbill 					pp->p_pri = pp->p_usrpri;
3999Sbill 			}
4009Sbill 			splx(s);
4019Sbill 		}
4022872Swnj 
4032872Swnj 		/*
4042872Swnj 		 * Perform virtual memory metering.
4052872Swnj 		 */
4069Sbill 		vmmeter();
4072872Swnj 
4082872Swnj 		/*
4092872Swnj 		 * If the swap process is trying to bring
4102872Swnj 		 * a process in, have it look again to see
4112872Swnj 		 * if it is possible now.
4122872Swnj 		 */
4132872Swnj 		if (runin!=0) {
4149Sbill 			runin = 0;
4159Sbill 			wakeup((caddr_t)&runin);
4169Sbill 		}
4172872Swnj 
4189Sbill 		/*
4199Sbill 		 * If there are pages that have been cleaned,
4209Sbill 		 * jolt the pageout daemon to process them.
4219Sbill 		 * We do this here so that these pages will be
4229Sbill 		 * freed if there is an abundance of memory and the
4239Sbill 		 * daemon would not be awakened otherwise.
4249Sbill 		 */
4259Sbill 		if (bclnlist != NULL)
4269Sbill 			wakeup((caddr_t)&proc[2]);
4272872Swnj 
4282872Swnj 		/*
4292872Swnj 		 * If the trap occurred from usermode,
4302872Swnj 		 * then check to see if it has now been
4312872Swnj 		 * running more than 10 minutes of user time
4322872Swnj 		 * and should thus run with reduced priority
4332872Swnj 		 * to give other processes a chance.
4342872Swnj 		 */
4359Sbill 		if (USERMODE(ps)) {
4369Sbill 			pp = u.u_procp;
4372872Swnj 			if (pp->p_uid && pp->p_nice == NZERO &&
4382872Swnj 			    u.u_vm.vm_utime > 600 * hz)
4392872Swnj 				pp->p_nice = NZERO+4;
440125Sbill 			(void) setpri(pp);
4419Sbill 			pp->p_pri = pp->p_usrpri;
4429Sbill 		}
4439Sbill 	}
4442872Swnj 	/*
4452872Swnj 	 * If trapped user-mode, give it a profiling tick.
4462872Swnj 	 */
4472442Swnj 	if (USERMODE(ps) && u.u_prof.pr_scale) {
4482442Swnj 		u.u_procp->p_flag |= SOWEUPC;
4492442Swnj 		aston();
4509Sbill 	}
4519Sbill }
4529Sbill 
4539Sbill /*
4543110Swnj  * Timeout is called to arrange that
4552768Swnj  * fun(arg) is called in tim/hz seconds.
4563542Swnj  * An entry is linked into the callout
4573110Swnj  * structure.  The time in each structure
4582768Swnj  * entry is the number of hz's more
4599Sbill  * than the previous entry.
4609Sbill  * In this way, decrementing the
4619Sbill  * first entry has the effect of
4629Sbill  * updating all entries.
4639Sbill  *
4649Sbill  * The panic is there because there is nothing
4659Sbill  * intelligent to be done if an entry won't fit.
4669Sbill  */
4679Sbill timeout(fun, arg, tim)
4682450Swnj 	int (*fun)();
4692450Swnj 	caddr_t arg;
4709Sbill {
4713542Swnj 	register struct callout *p1, *p2, *pnew;
4729Sbill 	register int t;
4739Sbill 	int s;
4749Sbill 
4753446Sroot /* DEBUGGING CODE */
4763446Sroot 	int ttrstrt();
4773446Sroot 
4783446Sroot 	if (fun == ttrstrt && arg == 0)
4793446Sroot 		panic("timeout ttrstr arg");
4803446Sroot /* END DEBUGGING CODE */
4819Sbill 	t = tim;
4829Sbill 	s = spl7();
4833542Swnj 	pnew = callfree;
4843542Swnj 	if (pnew == NULL)
4853542Swnj 		panic("timeout table overflow");
4863542Swnj 	callfree = pnew->c_next;
4873542Swnj 	pnew->c_arg = arg;
4883542Swnj 	pnew->c_func = fun;
4893542Swnj 	for (p1 = &calltodo; (p2 = p1->c_next) && p2->c_time < t; p1 = p2)
4903542Swnj 		t -= p2->c_time;
4913542Swnj 	p1->c_next = pnew;
4923542Swnj 	pnew->c_next = p2;
4933542Swnj 	pnew->c_time = t;
4943542Swnj 	if (p2)
4953542Swnj 		p2->c_time -= t;
4969Sbill 	splx(s);
4979Sbill }
498