xref: /csrg-svn/sys/kern/kern_clock.c (revision 3876)
1*3876Swnj /*	kern_clock.c	4.22	81/06/11	*/
29Sbill 
39Sbill #include "../h/param.h"
49Sbill #include "../h/systm.h"
5329Sbill #include "../h/dk.h"
62768Swnj #include "../h/callout.h"
79Sbill #include "../h/seg.h"
89Sbill #include "../h/dir.h"
99Sbill #include "../h/user.h"
109Sbill #include "../h/proc.h"
119Sbill #include "../h/reg.h"
129Sbill #include "../h/psl.h"
139Sbill #include "../h/vm.h"
149Sbill #include "../h/buf.h"
159Sbill #include "../h/text.h"
16877Sbill #include "../h/vlimit.h"
17877Sbill #include "../h/mtpr.h"
18877Sbill #include "../h/clock.h"
192689Swnj #include "../h/cpu.h"
209Sbill 
213511Sroot #include "bk.h"
221943Swnj #include "dh.h"
231943Swnj #include "dz.h"
241559Sbill 
259Sbill /*
262442Swnj  * Hardclock is called straight from
279Sbill  * the real time clock interrupt.
282442Swnj  * We limit the work we do at real clock interrupt time to:
292442Swnj  *	reloading clock
302442Swnj  *	decrementing time to callouts
312442Swnj  *	recording cpu time usage
322450Swnj  *	modifying priority of current process
332442Swnj  *	arrange for soft clock interrupt
342442Swnj  *	kernel pc profiling
359Sbill  *
363110Swnj  * At software (softclock) interrupt time we:
379Sbill  *	implement callouts
389Sbill  *	maintain date
399Sbill  *	lightning bolt wakeup (every second)
409Sbill  *	alarm clock signals
419Sbill  *	jab the scheduler
422442Swnj  *
432442Swnj  * On the vax softclock interrupts are implemented by
442442Swnj  * software interrupts.  Note that we may have multiple softclock
452442Swnj  * interrupts compressed into one (due to excessive interrupt load),
462442Swnj  * but that hardclock interrupts should never be lost.
479Sbill  */
489Sbill 
492609Swnj /*ARGSUSED*/
502442Swnj hardclock(pc, ps)
512450Swnj 	caddr_t pc;
529Sbill {
532768Swnj 	register struct callout *p1;
549Sbill 	register struct proc *pp;
552442Swnj 	register int s, cpstate;
569Sbill 
579Sbill 	/*
589Sbill 	 * reprime clock
599Sbill 	 */
609Sbill 	clkreld();
619Sbill 
629Sbill 	/*
632442Swnj 	 * update callout times
649Sbill 	 */
653542Swnj 	for (p1 = calltodo.c_next; p1 && p1->c_time <= 0; p1 = p1->c_next)
663542Swnj 		;
673542Swnj 	if (p1)
683542Swnj 		p1->c_time--;
69138Sbill 
70138Sbill 	/*
712442Swnj 	 * Maintain iostat and per-process cpu statistics
72138Sbill 	 */
739Sbill 	if (!noproc) {
749Sbill 		s = u.u_procp->p_rssize;
759Sbill 		u.u_vm.vm_idsrss += s;
769Sbill 		if (u.u_procp->p_textp) {
779Sbill 			register int xrss = u.u_procp->p_textp->x_rssize;
789Sbill 
799Sbill 			s += xrss;
809Sbill 			u.u_vm.vm_ixrss += xrss;
819Sbill 		}
829Sbill 		if (s > u.u_vm.vm_maxrss)
839Sbill 			u.u_vm.vm_maxrss = s;
842768Swnj 		if ((u.u_vm.vm_utime+u.u_vm.vm_stime+1)/hz > u.u_limit[LIM_CPU]) {
85375Sbill 			psignal(u.u_procp, SIGXCPU);
86375Sbill 			if (u.u_limit[LIM_CPU] < INFINITY - 5)
87375Sbill 				u.u_limit[LIM_CPU] += 5;
88375Sbill 		}
899Sbill 	}
903110Swnj 	/*
913110Swnj 	 * Update iostat information.
923110Swnj 	 */
939Sbill 	if (USERMODE(ps)) {
949Sbill 		u.u_vm.vm_utime++;
959Sbill 		if(u.u_procp->p_nice > NZERO)
96305Sbill 			cpstate = CP_NICE;
97305Sbill 		else
98305Sbill 			cpstate = CP_USER;
999Sbill 	} else {
100305Sbill 		cpstate = CP_SYS;
1019Sbill 		if (noproc)
102305Sbill 			cpstate = CP_IDLE;
1039Sbill 		else
1049Sbill 			u.u_vm.vm_stime++;
1059Sbill 	}
1061408Sbill 	cp_time[cpstate]++;
1072442Swnj 	for (s = 0; s < DK_NDRIVE; s++)
1082442Swnj 		if (dk_busy&(1<<s))
1092442Swnj 			dk_time[s]++;
1103110Swnj 	/*
1113110Swnj 	 * Adjust priority of current process.
1123110Swnj 	 */
1139Sbill 	if (!noproc) {
1149Sbill 		pp = u.u_procp;
1151399Sbill 		pp->p_cpticks++;
1169Sbill 		if(++pp->p_cpu == 0)
1179Sbill 			pp->p_cpu--;
118*3876Swnj 		if(pp->p_cpu % 4 == 0) {
119125Sbill 			(void) setpri(pp);
1209Sbill 			if (pp->p_pri >= PUSER)
1219Sbill 				pp->p_pri = pp->p_usrpri;
1229Sbill 		}
1239Sbill 	}
1243110Swnj 	/*
1253110Swnj 	 * Time moves on.
1263110Swnj 	 */
1279Sbill 	++lbolt;
1282689Swnj #if VAX780
1293110Swnj 	/*
1303110Swnj 	 * On 780's, impelement a fast UBA watcher,
1313110Swnj 	 * to make sure uba's don't get stuck.
1323110Swnj 	 */
1332872Swnj 	if (cpu == VAX_780 && panicstr == 0 && !BASEPRI(ps))
1342442Swnj 		unhang();
1352442Swnj #endif
1363110Swnj 	/*
1373110Swnj 	 * Schedule a software interrupt for the rest
1383110Swnj 	 * of clock activities.
1393110Swnj 	 */
1402442Swnj 	setsoftclock();
1412442Swnj }
1422442Swnj 
1432442Swnj /*
144*3876Swnj  * The digital decay cpu usage priority assignment is scaled to run in
145*3876Swnj  * time as expanded by the 1 minute load average.  Each second we
146*3876Swnj  * multiply the the previous cpu usage estimate by
147*3876Swnj  *		nrscale*avenrun[0]
148*3876Swnj  * The following relates the load average to the period over which
149*3876Swnj  * cpu usage is 90% forgotten:
150*3876Swnj  *	loadav 1	 5 seconds
151*3876Swnj  *	loadav 5	24 seconds
152*3876Swnj  *	loadav 10	47 seconds
153*3876Swnj  *	loadav 20	93 seconds
154*3876Swnj  * This is a great improvement on the previous algorithm which
155*3876Swnj  * decayed the priorities by a constant, and decayed away all knowledge
156*3876Swnj  * of previous activity in about 20 seconds.  Under heavy load,
157*3876Swnj  * the previous algorithm degenerated to round-robin with poor response
158*3876Swnj  * time when there was a high load average.
1592442Swnj  */
160*3876Swnj #define	ave(a,b) ((int)(((int)(a*b))/(b+1)))
161*3876Swnj int	nrscale = 2;
162*3876Swnj double	avenrun[];
1633110Swnj 
1643110Swnj /*
1653110Swnj  * Constant for decay filter for cpu usage field
1663110Swnj  * in process table (used by ps au).
1673110Swnj  */
1682442Swnj double	ccpu = 0.95122942450071400909;		/* exp(-1/20) */
1692442Swnj 
1702442Swnj /*
1712442Swnj  * Software clock interrupt.
1723110Swnj  * This routine runs at lower priority than device interrupts.
1732442Swnj  */
1742609Swnj /*ARGSUSED*/
1752442Swnj softclock(pc, ps)
1762450Swnj 	caddr_t pc;
1772442Swnj {
1783615Sroot 	register struct callout *p1;
1792442Swnj 	register struct proc *pp;
1802442Swnj 	register int a, s;
1813542Swnj 	caddr_t arg;
1823542Swnj 	int (*func)();
1832442Swnj 
1842442Swnj 	/*
1852872Swnj 	 * Perform callouts (but not after panic's!)
1862442Swnj 	 */
1873542Swnj 	if (panicstr == 0) {
1883542Swnj 		for (;;) {
1893542Swnj 			s = spl7();
1903542Swnj 			if ((p1 = calltodo.c_next) == 0 || p1->c_time > 0)
1913542Swnj 				break;
1923542Swnj 			calltodo.c_next = p1->c_next;
1933542Swnj 			arg = p1->c_arg;
1943542Swnj 			func = p1->c_func;
1953542Swnj 			p1->c_next = callfree;
1963542Swnj 			callfree = p1;
1973542Swnj 			(void) splx(s);
1983542Swnj 			(*func)(arg);
1992442Swnj 		}
2002442Swnj 	}
2012442Swnj 
2022442Swnj 	/*
2032442Swnj 	 * Drain silos.
2042442Swnj 	 */
2053511Sroot #if NBK > 0
2062647Swnj #if NDH > 0
2072442Swnj 	s = spl5(); dhtimer(); splx(s);
2082442Swnj #endif
2092647Swnj #if NDZ > 0
2102442Swnj 	s = spl5(); dztimer(); splx(s);
2112442Swnj #endif
2123511Sroot #endif
2132442Swnj 
2142442Swnj 	/*
2152450Swnj 	 * If idling and processes are waiting to swap in,
2162450Swnj 	 * check on them.
2172450Swnj 	 */
2182450Swnj 	if (noproc && runin) {
2192450Swnj 		runin = 0;
2202450Swnj 		wakeup((caddr_t)&runin);
2212450Swnj 	}
2222450Swnj 
2232450Swnj 	/*
224*3876Swnj 	 * Run paging daemon every 1/4 sec.
2252442Swnj 	 */
2262768Swnj 	if (lbolt % (hz/4) == 0) {
2279Sbill 		vmpago();
228*3876Swnj 	}
229*3876Swnj 
230*3876Swnj 	/*
231*3876Swnj 	 * Reschedule every 1/10 sec.
232*3876Swnj 	 */
233*3876Swnj 	if (lbolt % (hz/10) == 0) {
2349Sbill 		runrun++;
2352442Swnj 		aston();
2369Sbill 	}
2372442Swnj 
2382442Swnj 	/*
2392442Swnj 	 * Lightning bolt every second:
2402442Swnj 	 *	sleep timeouts
2412442Swnj 	 *	process priority recomputation
2422442Swnj 	 *	process %cpu averaging
2432442Swnj 	 *	virtual memory metering
2442442Swnj 	 *	kick swapper if processes want in
2452442Swnj 	 */
2462768Swnj 	if (lbolt >= hz) {
2472872Swnj 		/*
2483110Swnj 		 * This doesn't mean much on VAX since we run at
2492872Swnj 		 * software interrupt time... if hardclock()
2502872Swnj 		 * calls softclock() directly, it prevents
2512872Swnj 		 * this code from running when the priority
2522872Swnj 		 * was raised when the clock interrupt occurred.
2532872Swnj 		 */
2549Sbill 		if (BASEPRI(ps))
2559Sbill 			return;
2562872Swnj 
2572872Swnj 		/*
2582872Swnj 		 * If we didn't run a few times because of
2592872Swnj 		 * long blockage at high ipl, we don't
2602872Swnj 		 * really want to run this code several times,
2612872Swnj 		 * so squish out all multiples of hz here.
2622872Swnj 		 */
2632872Swnj 		time += lbolt / hz;
2642872Swnj 		lbolt %= hz;
2652872Swnj 
2662872Swnj 		/*
2672872Swnj 		 * Wakeup lightning bolt sleepers.
2682872Swnj 		 * Processes sleep on lbolt to wait
2692872Swnj 		 * for short amounts of time (e.g. 1 second).
2702872Swnj 		 */
2719Sbill 		wakeup((caddr_t)&lbolt);
2722872Swnj 
2732872Swnj 		/*
2742872Swnj 		 * Recompute process priority and process
2752872Swnj 		 * sleep() system calls as well as internal
2762872Swnj 		 * sleeps with timeouts (tsleep() kernel routine).
2772872Swnj 		 */
2782872Swnj 		for (pp = proc; pp < procNPROC; pp++)
279928Sbill 		if (pp->p_stat && pp->p_stat!=SZOMB) {
2802872Swnj 			/*
2812872Swnj 			 * Increase resident time, to max of 127 seconds
2822872Swnj 			 * (it is kept in a character.)  For
2832872Swnj 			 * loaded processes this is time in core; for
2842872Swnj 			 * swapped processes, this is time on drum.
2852872Swnj 			 */
2862872Swnj 			if (pp->p_time != 127)
2879Sbill 				pp->p_time++;
2882872Swnj 			/*
2892872Swnj 			 * If process has clock counting down, and it
2902872Swnj 			 * expires, set it running (if this is a tsleep()),
2912872Swnj 			 * or give it an SIGALRM (if the user process
2922872Swnj 			 * is using alarm signals.
2932872Swnj 			 */
2942872Swnj 			if (pp->p_clktim && --pp->p_clktim == 0)
2952872Swnj 				if (pp->p_flag & STIMO) {
2962872Swnj 					s = spl6();
2972872Swnj 					switch (pp->p_stat) {
298204Sbill 
2992872Swnj 					case SSLEEP:
3002872Swnj 						setrun(pp);
3012872Swnj 						break;
302204Sbill 
3032872Swnj 					case SSTOP:
3042872Swnj 						unsleep(pp);
3052872Swnj 						break;
3062872Swnj 					}
3072872Swnj 					pp->p_flag &= ~STIMO;
3082872Swnj 					splx(s);
3092872Swnj 				} else
3102872Swnj 					psignal(pp, SIGALRM);
3112872Swnj 			/*
3122872Swnj 			 * If process is blocked, increment computed
3132872Swnj 			 * time blocked.  This is used in swap scheduling.
3142872Swnj 			 */
3152872Swnj 			if (pp->p_stat==SSLEEP || pp->p_stat==SSTOP)
3169Sbill 				if (pp->p_slptime != 127)
3179Sbill 					pp->p_slptime++;
3182872Swnj 			/*
3192872Swnj 			 * Update digital filter estimation of process
3202872Swnj 			 * cpu utilization for loaded processes.
3212872Swnj 			 */
3221399Sbill 			if (pp->p_flag&SLOAD)
3231399Sbill 				pp->p_pctcpu = ccpu * pp->p_pctcpu +
3242768Swnj 				    (1.0 - ccpu) * (pp->p_cpticks/(float)hz);
3252872Swnj 			/*
3262872Swnj 			 * Recompute process priority.  The number p_cpu
3272872Swnj 			 * is a weighted estimate of cpu time consumed.
3282872Swnj 			 * A process which consumes cpu time has this
3292872Swnj 			 * increase regularly.  We here decrease it by
330*3876Swnj 			 * a fraction based on load average giving a digital
331*3876Swnj 			 * decay filter which damps out in about 5 seconds
332*3876Swnj 			 * when seconds are measured in time expanded by the
333*3876Swnj 			 * load average.
3342872Swnj 			 *
3352872Swnj 			 * If a process is niced, then the nice directly
3362872Swnj 			 * affects the new priority.  The final priority
3372872Swnj 			 * is in the range 0 to 255, to fit in a character.
3382872Swnj 			 */
3391399Sbill 			pp->p_cpticks = 0;
340*3876Swnj 			a = ave((pp->p_cpu & 0377), avenrun[0]*nrscale) +
341*3876Swnj 			     pp->p_nice - NZERO;
3422872Swnj 			if (a < 0)
3439Sbill 				a = 0;
3442872Swnj 			if (a > 255)
3459Sbill 				a = 255;
3469Sbill 			pp->p_cpu = a;
347125Sbill 			(void) setpri(pp);
3482872Swnj 			/*
3492872Swnj 			 * Now have computed new process priority
3502872Swnj 			 * in p->p_usrpri.  Carefully change p->p_pri.
3512872Swnj 			 * A process is on a run queue associated with
3522872Swnj 			 * this priority, so we must block out process
3532872Swnj 			 * state changes during the transition.
3542872Swnj 			 */
3559Sbill 			s = spl6();
3562872Swnj 			if (pp->p_pri >= PUSER) {
3579Sbill 				if ((pp != u.u_procp || noproc) &&
3589Sbill 				    pp->p_stat == SRUN &&
3599Sbill 				    (pp->p_flag & SLOAD) &&
3609Sbill 				    pp->p_pri != pp->p_usrpri) {
3619Sbill 					remrq(pp);
3629Sbill 					pp->p_pri = pp->p_usrpri;
3639Sbill 					setrq(pp);
3649Sbill 				} else
3659Sbill 					pp->p_pri = pp->p_usrpri;
3669Sbill 			}
3679Sbill 			splx(s);
3689Sbill 		}
3692872Swnj 
3702872Swnj 		/*
3712872Swnj 		 * Perform virtual memory metering.
3722872Swnj 		 */
3739Sbill 		vmmeter();
3742872Swnj 
3752872Swnj 		/*
3762872Swnj 		 * If the swap process is trying to bring
3772872Swnj 		 * a process in, have it look again to see
3782872Swnj 		 * if it is possible now.
3792872Swnj 		 */
3802872Swnj 		if (runin!=0) {
3819Sbill 			runin = 0;
3829Sbill 			wakeup((caddr_t)&runin);
3839Sbill 		}
3842872Swnj 
3859Sbill 		/*
3869Sbill 		 * If there are pages that have been cleaned,
3879Sbill 		 * jolt the pageout daemon to process them.
3889Sbill 		 * We do this here so that these pages will be
3899Sbill 		 * freed if there is an abundance of memory and the
3909Sbill 		 * daemon would not be awakened otherwise.
3919Sbill 		 */
3929Sbill 		if (bclnlist != NULL)
3939Sbill 			wakeup((caddr_t)&proc[2]);
3942872Swnj 
3952872Swnj 		/*
3962872Swnj 		 * If the trap occurred from usermode,
3972872Swnj 		 * then check to see if it has now been
3982872Swnj 		 * running more than 10 minutes of user time
3992872Swnj 		 * and should thus run with reduced priority
4002872Swnj 		 * to give other processes a chance.
4012872Swnj 		 */
4029Sbill 		if (USERMODE(ps)) {
4039Sbill 			pp = u.u_procp;
4042872Swnj 			if (pp->p_uid && pp->p_nice == NZERO &&
4052872Swnj 			    u.u_vm.vm_utime > 600 * hz)
4062872Swnj 				pp->p_nice = NZERO+4;
407125Sbill 			(void) setpri(pp);
4089Sbill 			pp->p_pri = pp->p_usrpri;
4099Sbill 		}
4109Sbill 	}
4112872Swnj 	/*
4122872Swnj 	 * If trapped user-mode, give it a profiling tick.
4132872Swnj 	 */
4142442Swnj 	if (USERMODE(ps) && u.u_prof.pr_scale) {
4152442Swnj 		u.u_procp->p_flag |= SOWEUPC;
4162442Swnj 		aston();
4179Sbill 	}
4189Sbill }
4199Sbill 
4209Sbill /*
4213110Swnj  * Timeout is called to arrange that
4222768Swnj  * fun(arg) is called in tim/hz seconds.
4233542Swnj  * An entry is linked into the callout
4243110Swnj  * structure.  The time in each structure
4252768Swnj  * entry is the number of hz's more
4269Sbill  * than the previous entry.
4279Sbill  * In this way, decrementing the
4289Sbill  * first entry has the effect of
4299Sbill  * updating all entries.
4309Sbill  *
4319Sbill  * The panic is there because there is nothing
4329Sbill  * intelligent to be done if an entry won't fit.
4339Sbill  */
4349Sbill timeout(fun, arg, tim)
4352450Swnj 	int (*fun)();
4362450Swnj 	caddr_t arg;
4379Sbill {
4383542Swnj 	register struct callout *p1, *p2, *pnew;
4399Sbill 	register int t;
4409Sbill 	int s;
4419Sbill 
4423446Sroot /* DEBUGGING CODE */
4433446Sroot 	int ttrstrt();
4443446Sroot 
4453446Sroot 	if (fun == ttrstrt && arg == 0)
4463446Sroot 		panic("timeout ttrstr arg");
4473446Sroot /* END DEBUGGING CODE */
4489Sbill 	t = tim;
4499Sbill 	s = spl7();
4503542Swnj 	pnew = callfree;
4513542Swnj 	if (pnew == NULL)
4523542Swnj 		panic("timeout table overflow");
4533542Swnj 	callfree = pnew->c_next;
4543542Swnj 	pnew->c_arg = arg;
4553542Swnj 	pnew->c_func = fun;
4563542Swnj 	for (p1 = &calltodo; (p2 = p1->c_next) && p2->c_time < t; p1 = p2)
4573542Swnj 		t -= p2->c_time;
4583542Swnj 	p1->c_next = pnew;
4593542Swnj 	pnew->c_next = p2;
4603542Swnj 	pnew->c_time = t;
4613542Swnj 	if (p2)
4623542Swnj 		p2->c_time -= t;
4639Sbill 	splx(s);
4649Sbill }
465