sys/kern/kern_clock.c

*5264Swnj/*	kern_clock.c	4.30	81/12/19	*/
9Sbill
9Sbill#include "../h/param.h"
9Sbill#include "../h/systm.h"
329Sbill#include "../h/dk.h"
2768Swnj#include "../h/callout.h"
9Sbill#include "../h/seg.h"
9Sbill#include "../h/dir.h"
9Sbill#include "../h/user.h"
9Sbill#include "../h/proc.h"
9Sbill#include "../h/reg.h"
9Sbill#include "../h/psl.h"
9Sbill#include "../h/vm.h"
9Sbill#include "../h/buf.h"
9Sbill#include "../h/text.h"
877Sbill#include "../h/vlimit.h"
877Sbill#include "../h/mtpr.h"
877Sbill#include "../h/clock.h"
2689Swnj#include "../h/cpu.h"
5247Sroot#include "../h/protosw.h"
9Sbill
3511Sroot#include "bk.h"
1943Swnj#include "dh.h"
1943Swnj#include "dz.h"
1559Sbill
9Sbill/*
2442Swnj * Hardclock is called straight from
9Sbill * the real time clock interrupt.
2442Swnj * We limit the work we do at real clock interrupt time to:
2442Swnj *	reloading clock
2442Swnj *	decrementing time to callouts
2442Swnj *	recording cpu time usage
2450Swnj *	modifying priority of current process
2442Swnj *	arrange for soft clock interrupt
2442Swnj *	kernel pc profiling
9Sbill *
3110Swnj * At software (softclock) interrupt time we:
9Sbill *	implement callouts
9Sbill *	maintain date
9Sbill *	lightning bolt wakeup (every second)
9Sbill *	alarm clock signals
9Sbill *	jab the scheduler
2442Swnj *
2442Swnj * On the vax softclock interrupts are implemented by
2442Swnj * software interrupts.  Note that we may have multiple softclock
2442Swnj * interrupts compressed into one (due to excessive interrupt load),
2442Swnj * but that hardclock interrupts should never be lost.
9Sbill */
4968Swnj#ifdef KPROF
4527Swnjint	kcounts[20000];
4968Swnj#endif
9Sbill
5247Sroot/*
5247Sroot * Protoslow is like lbolt, but for slow protocol timeouts, counting
5247Sroot * up to (hz/PR_SLOWHZ), then causing a pfslowtimo().
5247Sroot * Protofast is like lbolt, but for fast protocol timeouts, counting
5247Sroot * up to (hz/PR_FASTHZ), then causing a pffasttimo().
5247Sroot */
5247Srootint	protoslow;
5247Srootint	protofast;
5247Sroot
2609Swnj/*ARGSUSED*/
2442Swnjhardclock(pc, ps)
2450Swnj	caddr_t pc;
9Sbill{
2768Swnj	register struct callout *p1;
9Sbill	register struct proc *pp;
2442Swnj	register int s, cpstate;
9Sbill
9Sbill	/*
9Sbill	 * reprime clock
9Sbill	 */
9Sbill	clkreld();
9Sbill
9Sbill	/*
2442Swnj	 * update callout times
9Sbill	 */
3542Swnj	for (p1 = calltodo.c_next; p1 && p1->c_time <= 0; p1 = p1->c_next)
3542Swnj		;
3542Swnj	if (p1)
3542Swnj		p1->c_time--;
138Sbill
138Sbill	/*
2442Swnj	 * Maintain iostat and per-process cpu statistics
138Sbill	 */
9Sbill	if (!noproc) {
9Sbill		s = u.u_procp->p_rssize;
9Sbill		u.u_vm.vm_idsrss += s;
9Sbill		if (u.u_procp->p_textp) {
9Sbill			register int xrss = u.u_procp->p_textp->x_rssize;
9Sbill
9Sbill			s += xrss;
9Sbill			u.u_vm.vm_ixrss += xrss;
9Sbill		}
9Sbill		if (s > u.u_vm.vm_maxrss)
9Sbill			u.u_vm.vm_maxrss = s;
2768Swnj		if ((u.u_vm.vm_utime+u.u_vm.vm_stime+1)/hz > u.u_limit[LIM_CPU]) {
375Sbill			psignal(u.u_procp, SIGXCPU);
375Sbill			if (u.u_limit[LIM_CPU] < INFINITY - 5)
375Sbill				u.u_limit[LIM_CPU] += 5;
375Sbill		}
9Sbill	}
3110Swnj	/*
3110Swnj	 * Update iostat information.
3110Swnj	 */
9Sbill	if (USERMODE(ps)) {
9Sbill		u.u_vm.vm_utime++;
9Sbill		if(u.u_procp->p_nice > NZERO)
305Sbill			cpstate = CP_NICE;
305Sbill		else
305Sbill			cpstate = CP_USER;
9Sbill	} else {
4968Swnj#ifdef KPROF
4968Swnj	int k = ((int)pc & 0x7fffffff) / 8;
4968Swnj	if (k < 20000)
4968Swnj		kcounts[k]++;
4968Swnj#endif
305Sbill		cpstate = CP_SYS;
9Sbill		if (noproc)
305Sbill			cpstate = CP_IDLE;
9Sbill		else
9Sbill			u.u_vm.vm_stime++;
9Sbill	}
1408Sbill	cp_time[cpstate]++;
2442Swnj	for (s = 0; s < DK_NDRIVE; s++)
2442Swnj		if (dk_busy&(1<<s))
2442Swnj			dk_time[s]++;
3110Swnj	/*
3110Swnj	 * Adjust priority of current process.
3110Swnj	 */
9Sbill	if (!noproc) {
9Sbill		pp = u.u_procp;
1399Sbill		pp->p_cpticks++;
9Sbill		if(++pp->p_cpu == 0)
9Sbill			pp->p_cpu--;
3876Swnj		if(pp->p_cpu % 4 == 0) {
125Sbill			(void) setpri(pp);
9Sbill			if (pp->p_pri >= PUSER)
9Sbill				pp->p_pri = pp->p_usrpri;
9Sbill		}
9Sbill	}
3110Swnj	/*
3110Swnj	 * Time moves on.
3110Swnj	 */
9Sbill	++lbolt;
5247Sroot
5247Sroot	/*
5247Sroot	 * Time moves on for protocols.
5247Sroot	 */
*5264Swnj	--protoslow; --protofast;
5247Sroot
2689Swnj#if VAX780
3110Swnj	/*
3110Swnj	 * On 780's, impelement a fast UBA watcher,
3110Swnj	 * to make sure uba's don't get stuck.
3110Swnj	 */
2872Swnj	if (cpu == VAX_780 && panicstr == 0 && !BASEPRI(ps))
2442Swnj		unhang();
2442Swnj#endif
3110Swnj	/*
3110Swnj	 * Schedule a software interrupt for the rest
3110Swnj	 * of clock activities.
3110Swnj	 */
2442Swnj	setsoftclock();
2442Swnj}
2442Swnj
2442Swnj/*
3876Swnj * The digital decay cpu usage priority assignment is scaled to run in
3876Swnj * time as expanded by the 1 minute load average.  Each second we
3876Swnj * multiply the the previous cpu usage estimate by
3876Swnj *		nrscale*avenrun[0]
3876Swnj * The following relates the load average to the period over which
3876Swnj * cpu usage is 90% forgotten:
3876Swnj *	loadav 1	 5 seconds
3876Swnj *	loadav 5	24 seconds
3876Swnj *	loadav 10	47 seconds
3876Swnj *	loadav 20	93 seconds
3876Swnj * This is a great improvement on the previous algorithm which
3876Swnj * decayed the priorities by a constant, and decayed away all knowledge
3876Swnj * of previous activity in about 20 seconds.  Under heavy load,
3876Swnj * the previous algorithm degenerated to round-robin with poor response
3876Swnj * time when there was a high load average.
2442Swnj */
3984Sroot#undef ave
3876Swnj#define	ave(a,b) ((int)(((int)(a*b))/(b+1)))
3876Swnjint	nrscale = 2;
3876Swnjdouble	avenrun[];
3110Swnj
3110Swnj/*
3110Swnj * Constant for decay filter for cpu usage field
3110Swnj * in process table (used by ps au).
3110Swnj */
2442Swnjdouble	ccpu = 0.95122942450071400909;		/* exp(-1/20) */
2442Swnj
2442Swnj/*
2442Swnj * Software clock interrupt.
3110Swnj * This routine runs at lower priority than device interrupts.
2442Swnj */
2609Swnj/*ARGSUSED*/
2442Swnjsoftclock(pc, ps)
2450Swnj	caddr_t pc;
2442Swnj{
3615Sroot	register struct callout *p1;
2442Swnj	register struct proc *pp;
2442Swnj	register int a, s;
3542Swnj	caddr_t arg;
3542Swnj	int (*func)();
2442Swnj
2442Swnj	/*
2872Swnj	 * Perform callouts (but not after panic's!)
2442Swnj	 */
3542Swnj	if (panicstr == 0) {
3542Swnj		for (;;) {
3542Swnj			s = spl7();
4250Swnj			if ((p1 = calltodo.c_next) == 0 || p1->c_time > 0) {
4250Swnj				splx(s);
3542Swnj				break;
4250Swnj			}
3542Swnj			calltodo.c_next = p1->c_next;
3542Swnj			arg = p1->c_arg;
3542Swnj			func = p1->c_func;
3542Swnj			p1->c_next = callfree;
3542Swnj			callfree = p1;
3542Swnj			(void) splx(s);
3542Swnj			(*func)(arg);
2442Swnj		}
2442Swnj	}
2442Swnj
2442Swnj	/*
2442Swnj	 * Drain silos.
2442Swnj	 */
2647Swnj#if NDH > 0
2442Swnj	s = spl5(); dhtimer(); splx(s);
2442Swnj#endif
2647Swnj#if NDZ > 0
2442Swnj	s = spl5(); dztimer(); splx(s);
2442Swnj#endif
2442Swnj
2442Swnj	/*
2450Swnj	 * If idling and processes are waiting to swap in,
2450Swnj	 * check on them.
2450Swnj	 */
2450Swnj	if (noproc && runin) {
2450Swnj		runin = 0;
2450Swnj		wakeup((caddr_t)&runin);
2450Swnj	}
2450Swnj
2450Swnj	/*
3876Swnj	 * Run paging daemon every 1/4 sec.
2442Swnj	 */
2768Swnj	if (lbolt % (hz/4) == 0) {
9Sbill		vmpago();
3876Swnj	}
3876Swnj
3876Swnj	/*
3876Swnj	 * Reschedule every 1/10 sec.
3876Swnj	 */
3876Swnj	if (lbolt % (hz/10) == 0) {
9Sbill		runrun++;
2442Swnj		aston();
9Sbill	}
2442Swnj
2442Swnj	/*
5247Sroot	 * Run network slow and fast timeouts.
5247Sroot	 */
*5264Swnj	if (protofast <= 0) {
*5264Swnj		protofast = hz / PR_FASTHZ;
5247Sroot		pffasttimo();
*5264Swnj	}
*5264Swnj	if (protoslow <= 0) {
*5264Swnj		protoslow = hz / PR_SLOWHZ;
5247Sroot		pfslowtimo();
*5264Swnj	}
5247Sroot
5247Sroot	/*
2442Swnj	 * Lightning bolt every second:
2442Swnj	 *	sleep timeouts
2442Swnj	 *	process priority recomputation
2442Swnj	 *	process %cpu averaging
2442Swnj	 *	virtual memory metering
2442Swnj	 *	kick swapper if processes want in
2442Swnj	 */
2768Swnj	if (lbolt >= hz) {
2872Swnj		/*
3110Swnj		 * This doesn't mean much on VAX since we run at
2872Swnj		 * software interrupt time... if hardclock()
2872Swnj		 * calls softclock() directly, it prevents
2872Swnj		 * this code from running when the priority
2872Swnj		 * was raised when the clock interrupt occurred.
2872Swnj		 */
9Sbill		if (BASEPRI(ps))
9Sbill			return;
2872Swnj
2872Swnj		/*
2872Swnj		 * If we didn't run a few times because of
2872Swnj		 * long blockage at high ipl, we don't
2872Swnj		 * really want to run this code several times,
2872Swnj		 * so squish out all multiples of hz here.
2872Swnj		 */
2872Swnj		time += lbolt / hz;
2872Swnj		lbolt %= hz;
2872Swnj
2872Swnj		/*
2872Swnj		 * Wakeup lightning bolt sleepers.
2872Swnj		 * Processes sleep on lbolt to wait
2872Swnj		 * for short amounts of time (e.g. 1 second).
2872Swnj		 */
9Sbill		wakeup((caddr_t)&lbolt);
2872Swnj
2872Swnj		/*
2872Swnj		 * Recompute process priority and process
2872Swnj		 * sleep() system calls as well as internal
2872Swnj		 * sleeps with timeouts (tsleep() kernel routine).
2872Swnj		 */
2872Swnj		for (pp = proc; pp < procNPROC; pp++)
928Sbill		if (pp->p_stat && pp->p_stat!=SZOMB) {
2872Swnj			/*
2872Swnj			 * Increase resident time, to max of 127 seconds
2872Swnj			 * (it is kept in a character.)  For
2872Swnj			 * loaded processes this is time in core; for
2872Swnj			 * swapped processes, this is time on drum.
2872Swnj			 */
2872Swnj			if (pp->p_time != 127)
9Sbill				pp->p_time++;
2872Swnj			/*
2872Swnj			 * If process has clock counting down, and it
2872Swnj			 * expires, set it running (if this is a tsleep()),
2872Swnj			 * or give it an SIGALRM (if the user process
2872Swnj			 * is using alarm signals.
2872Swnj			 */
2872Swnj			if (pp->p_clktim && --pp->p_clktim == 0)
2872Swnj				if (pp->p_flag & STIMO) {
2872Swnj					s = spl6();
2872Swnj					switch (pp->p_stat) {
204Sbill
2872Swnj					case SSLEEP:
2872Swnj						setrun(pp);
2872Swnj						break;
204Sbill
2872Swnj					case SSTOP:
2872Swnj						unsleep(pp);
2872Swnj						break;
2872Swnj					}
2872Swnj					pp->p_flag &= ~STIMO;
2872Swnj					splx(s);
2872Swnj				} else
2872Swnj					psignal(pp, SIGALRM);
2872Swnj			/*
2872Swnj			 * If process is blocked, increment computed
2872Swnj			 * time blocked.  This is used in swap scheduling.
2872Swnj			 */
2872Swnj			if (pp->p_stat==SSLEEP || pp->p_stat==SSTOP)
9Sbill				if (pp->p_slptime != 127)
9Sbill					pp->p_slptime++;
2872Swnj			/*
2872Swnj			 * Update digital filter estimation of process
2872Swnj			 * cpu utilization for loaded processes.
2872Swnj			 */
1399Sbill			if (pp->p_flag&SLOAD)
1399Sbill				pp->p_pctcpu = ccpu * pp->p_pctcpu +
2768Swnj				    (1.0 - ccpu) * (pp->p_cpticks/(float)hz);
2872Swnj			/*
2872Swnj			 * Recompute process priority.  The number p_cpu
2872Swnj			 * is a weighted estimate of cpu time consumed.
2872Swnj			 * A process which consumes cpu time has this
2872Swnj			 * increase regularly.  We here decrease it by
3876Swnj			 * a fraction based on load average giving a digital
3876Swnj			 * decay filter which damps out in about 5 seconds
3876Swnj			 * when seconds are measured in time expanded by the
3876Swnj			 * load average.
2872Swnj			 *
2872Swnj			 * If a process is niced, then the nice directly
2872Swnj			 * affects the new priority.  The final priority
2872Swnj			 * is in the range 0 to 255, to fit in a character.
2872Swnj			 */
1399Sbill			pp->p_cpticks = 0;
3876Swnj			a = ave((pp->p_cpu & 0377), avenrun[0]*nrscale) +
3876Swnj			     pp->p_nice - NZERO;
2872Swnj			if (a < 0)
9Sbill				a = 0;
2872Swnj			if (a > 255)
9Sbill				a = 255;
9Sbill			pp->p_cpu = a;
125Sbill			(void) setpri(pp);
2872Swnj			/*
2872Swnj			 * Now have computed new process priority
2872Swnj			 * in p->p_usrpri.  Carefully change p->p_pri.
2872Swnj			 * A process is on a run queue associated with
2872Swnj			 * this priority, so we must block out process
2872Swnj			 * state changes during the transition.
2872Swnj			 */
9Sbill			s = spl6();
2872Swnj			if (pp->p_pri >= PUSER) {
9Sbill				if ((pp != u.u_procp || noproc) &&
9Sbill				    pp->p_stat == SRUN &&
9Sbill				    (pp->p_flag & SLOAD) &&
9Sbill				    pp->p_pri != pp->p_usrpri) {
9Sbill					remrq(pp);
9Sbill					pp->p_pri = pp->p_usrpri;
9Sbill					setrq(pp);
9Sbill				} else
9Sbill					pp->p_pri = pp->p_usrpri;
9Sbill			}
9Sbill			splx(s);
9Sbill		}
2872Swnj
2872Swnj		/*
2872Swnj		 * Perform virtual memory metering.
2872Swnj		 */
9Sbill		vmmeter();
2872Swnj
2872Swnj		/*
2872Swnj		 * If the swap process is trying to bring
2872Swnj		 * a process in, have it look again to see
2872Swnj		 * if it is possible now.
2872Swnj		 */
2872Swnj		if (runin!=0) {
9Sbill			runin = 0;
9Sbill			wakeup((caddr_t)&runin);
9Sbill		}
2872Swnj
9Sbill		/*
9Sbill		 * If there are pages that have been cleaned,
9Sbill		 * jolt the pageout daemon to process them.
9Sbill		 * We do this here so that these pages will be
9Sbill		 * freed if there is an abundance of memory and the
9Sbill		 * daemon would not be awakened otherwise.
9Sbill		 */
9Sbill		if (bclnlist != NULL)
9Sbill			wakeup((caddr_t)&proc[2]);
2872Swnj
2872Swnj		/*
2872Swnj		 * If the trap occurred from usermode,
2872Swnj		 * then check to see if it has now been
2872Swnj		 * running more than 10 minutes of user time
2872Swnj		 * and should thus run with reduced priority
2872Swnj		 * to give other processes a chance.
2872Swnj		 */
9Sbill		if (USERMODE(ps)) {
9Sbill			pp = u.u_procp;
2872Swnj			if (pp->p_uid && pp->p_nice == NZERO &&
2872Swnj			    u.u_vm.vm_utime > 600 * hz)
2872Swnj				pp->p_nice = NZERO+4;
125Sbill			(void) setpri(pp);
9Sbill			pp->p_pri = pp->p_usrpri;
9Sbill		}
9Sbill	}
2872Swnj	/*
2872Swnj	 * If trapped user-mode, give it a profiling tick.
2872Swnj	 */
2442Swnj	if (USERMODE(ps) && u.u_prof.pr_scale) {
2442Swnj		u.u_procp->p_flag |= SOWEUPC;
2442Swnj		aston();
9Sbill	}
9Sbill}
9Sbill
9Sbill/*
3110Swnj * Timeout is called to arrange that
2768Swnj * fun(arg) is called in tim/hz seconds.
3542Swnj * An entry is linked into the callout
3110Swnj * structure.  The time in each structure
2768Swnj * entry is the number of hz's more
9Sbill * than the previous entry.
9Sbill * In this way, decrementing the
9Sbill * first entry has the effect of
9Sbill * updating all entries.
9Sbill *
9Sbill * The panic is there because there is nothing
9Sbill * intelligent to be done if an entry won't fit.
9Sbill */
9Sbilltimeout(fun, arg, tim)
2450Swnj	int (*fun)();
2450Swnj	caddr_t arg;
9Sbill{
3542Swnj	register struct callout *p1, *p2, *pnew;
9Sbill	register int t;
9Sbill	int s;
9Sbill
3446Sroot/* DEBUGGING CODE */
3446Sroot	int ttrstrt();
3446Sroot
3446Sroot	if (fun == ttrstrt && arg == 0)
3446Sroot		panic("timeout ttrstr arg");
3446Sroot/* END DEBUGGING CODE */
9Sbill	t = tim;
9Sbill	s = spl7();
3542Swnj	pnew = callfree;
3542Swnj	if (pnew == NULL)
3542Swnj		panic("timeout table overflow");
3542Swnj	callfree = pnew->c_next;
3542Swnj	pnew->c_arg = arg;
3542Swnj	pnew->c_func = fun;
3542Swnj	for (p1 = &calltodo; (p2 = p1->c_next) && p2->c_time < t; p1 = p2)
3542Swnj		t -= p2->c_time;
3542Swnj	p1->c_next = pnew;
3542Swnj	pnew->c_next = p2;
3542Swnj	pnew->c_time = t;
3542Swnj	if (p2)
3542Swnj		p2->c_time -= t;
9Sbill	splx(s);
9Sbill}