xref: /csrg-svn/sys/kern/kern_clock.c (revision 65142)
149594Sbostic /*-
263170Sbostic  * Copyright (c) 1982, 1986, 1991, 1993
363170Sbostic  *	The Regents of the University of California.  All rights reserved.
423366Smckusick  *
549594Sbostic  * %sccs.include.redist.c%
649594Sbostic  *
7*65142Storek  *	@(#)kern_clock.c	8.4 (Berkeley) 12/14/93
823366Smckusick  */
99Sbill 
1056517Sbostic #include <sys/param.h>
1156517Sbostic #include <sys/systm.h>
1256517Sbostic #include <sys/dkstat.h>
1356517Sbostic #include <sys/callout.h>
1456517Sbostic #include <sys/kernel.h>
1556517Sbostic #include <sys/proc.h>
1656517Sbostic #include <sys/resourcevar.h>
179Sbill 
1856517Sbostic #include <machine/cpu.h>
1935406Skarels 
2010291Smckusick #ifdef GPROF
2156517Sbostic #include <sys/gmon.h>
2210291Smckusick #endif
2310291Smckusick 
248124Sroot /*
258124Sroot  * Clock handling routines.
268124Sroot  *
2754791Storek  * This code is written to operate with two timers that run independently of
2854791Storek  * each other.  The main clock, running hz times per second, is used to keep
2954791Storek  * track of real time.  The second timer handles kernel and user profiling,
3054791Storek  * and does resource use estimation.  If the second timer is programmable,
3154791Storek  * it is randomized to avoid aliasing between the two clocks.  For example,
3254791Storek  * the randomization prevents an adversary from always giving up the cpu
3354791Storek  * just before its quantum expires.  Otherwise, it would never accumulate
3454791Storek  * cpu ticks.  The mean frequency of the second timer is stathz.
3554791Storek  *
3654791Storek  * If no second timer exists, stathz will be zero; in this case we drive
3754791Storek  * profiling and statistics off the main clock.  This WILL NOT be accurate;
3854791Storek  * do not do it unless absolutely necessary.
3954791Storek  *
4054791Storek  * The statistics clock may (or may not) be run at a higher rate while
4154791Storek  * profiling.  This profile clock runs at profhz.  We require that profhz
4254791Storek  * be an integral multiple of stathz.
4354791Storek  *
4454791Storek  * If the statistics clock is running fast, it must be divided by the ratio
4554791Storek  * profhz/stathz for statistics.  (For profiling, every tick counts.)
468124Sroot  */
471559Sbill 
488124Sroot /*
498124Sroot  * TODO:
5012747Ssam  *	allocate more timeout table slots when table overflows.
518124Sroot  */
5226265Skarels 
5317007Smckusick /*
5417007Smckusick  * Bump a timeval by a small number of usec's.
5517007Smckusick  */
5617007Smckusick #define BUMPTIME(t, usec) { \
5754791Storek 	register volatile struct timeval *tp = (t); \
5854791Storek 	register long us; \
5917007Smckusick  \
6054791Storek 	tp->tv_usec = us = tp->tv_usec + (usec); \
6154791Storek 	if (us >= 1000000) { \
6254791Storek 		tp->tv_usec = us - 1000000; \
6317007Smckusick 		tp->tv_sec++; \
6417007Smckusick 	} \
6517007Smckusick }
6617007Smckusick 
6754124Smckusick int	stathz;
6853011Ssklower int	profhz;
6954138Smckusick int	profprocs;
7056338Ssklower int	ticks;
7156317Shibler static int psdiv, pscnt;	/* prof => stat divider */
7256855Storek int	psratio;		/* ratio: prof / stat */
7354791Storek 
7454791Storek volatile struct	timeval time;
7554791Storek volatile struct	timeval mono_time;
7654791Storek 
778124Sroot /*
7854791Storek  * Initialize clock frequencies and start both clocks running.
798124Sroot  */
8054791Storek void
8154791Storek initclocks()
8254791Storek {
8354791Storek 	register int i;
8454791Storek 
8554791Storek 	/*
8654791Storek 	 * Set divisors to 1 (normal case) and let the machine-specific
8754791Storek 	 * code do its bit.
8854791Storek 	 */
8954791Storek 	psdiv = pscnt = 1;
9054791Storek 	cpu_initclocks();
9154791Storek 
9254791Storek 	/*
9354791Storek 	 * Compute profhz/stathz, and fix profhz if needed.
9454791Storek 	 */
9554791Storek 	i = stathz ? stathz : hz;
9654791Storek 	if (profhz == 0)
9754791Storek 		profhz = i;
9854791Storek 	psratio = profhz / i;
9954791Storek }
10054791Storek 
10154791Storek /*
10254791Storek  * The real-time timer, interrupting hz times per second.
10354791Storek  */
10454791Storek void
10544774Swilliam hardclock(frame)
10654791Storek 	register struct clockframe *frame;
1079Sbill {
1082768Swnj 	register struct callout *p1;
10954791Storek 	register struct proc *p;
11055294Storek 	register int delta, needsoft;
11128947Skarels 	extern int tickdelta;
11228947Skarels 	extern long timedelta;
1139Sbill 
1148124Sroot 	/*
1158124Sroot 	 * Update real-time timeout queue.
1168124Sroot 	 * At front of queue are some number of events which are ``due''.
1178124Sroot 	 * The time to these is <= 0 and if negative represents the
1188124Sroot 	 * number of ticks which have passed since it was supposed to happen.
1198124Sroot 	 * The rest of the q elements (times > 0) are events yet to happen,
1208124Sroot 	 * where the time for each is given as a delta from the previous.
1218124Sroot 	 * Decrementing just the first of these serves to decrement the time
1228124Sroot 	 * to all events.
1238124Sroot 	 */
12454791Storek 	needsoft = 0;
12554791Storek 	for (p1 = calltodo.c_next; p1 != NULL; p1 = p1->c_next) {
12612747Ssam 		if (--p1->c_time > 0)
12712747Ssam 			break;
12816172Skarels 		needsoft = 1;
12912747Ssam 		if (p1->c_time == 0)
13012747Ssam 			break;
13112747Ssam 	}
132138Sbill 
13354791Storek 	p = curproc;
13454791Storek 	if (p) {
13554791Storek 		register struct pstats *pstats;
13654791Storek 
1378124Sroot 		/*
13854791Storek 		 * Run current process's virtual and profile time, as needed.
1398124Sroot 		 */
14054791Storek 		pstats = p->p_stats;
14154791Storek 		if (CLKF_USERMODE(frame) &&
14254791Storek 		    timerisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value) &&
14347546Skarels 		    itimerdecr(&pstats->p_timer[ITIMER_VIRTUAL], tick) == 0)
14440674Smarc 			psignal(p, SIGVTALRM);
14547546Skarels 		if (timerisset(&pstats->p_timer[ITIMER_PROF].it_value) &&
14647546Skarels 		    itimerdecr(&pstats->p_timer[ITIMER_PROF], tick) == 0)
14740674Smarc 			psignal(p, SIGPROF);
1489Sbill 	}
1498124Sroot 
1508124Sroot 	/*
15154791Storek 	 * If no separate statistics clock is available, run it from here.
15211392Ssam 	 */
15354124Smckusick 	if (stathz == 0)
15454791Storek 		statclock(frame);
15511392Ssam 
15611392Ssam 	/*
15755294Storek 	 * Increment the time-of-day.  The increment is just ``tick'' unless
15855294Storek 	 * we are still adjusting the clock; see adjtime().
1598124Sroot 	 */
16056338Ssklower 	ticks++;
16155294Storek 	if (timedelta == 0)
16255294Storek 		delta = tick;
16355294Storek 	else {
16455294Storek 		delta = tick + tickdelta;
16555294Storek 		timedelta -= tickdelta;
16617356Skarels 	}
16755294Storek 	BUMPTIME(&time, delta);
16855294Storek 	BUMPTIME(&mono_time, delta);
16954791Storek 
17054791Storek 	/*
17154791Storek 	 * Process callouts at a very low cpu priority, so we don't keep the
17254791Storek 	 * relatively high clock interrupt priority any longer than necessary.
17354791Storek 	 */
17416525Skarels 	if (needsoft) {
17554791Storek 		if (CLKF_BASEPRI(frame)) {
17616525Skarels 			/*
17716525Skarels 			 * Save the overhead of a software interrupt;
17816525Skarels 			 * it will happen as soon as we return, so do it now.
17916525Skarels 			 */
18054791Storek 			(void)splsoftclock();
18154791Storek 			softclock();
18216525Skarels 		} else
18316525Skarels 			setsoftclock();
18416525Skarels 	}
1852442Swnj }
1862442Swnj 
1878124Sroot /*
18854791Storek  * Software (low priority) clock interrupt.
1898124Sroot  * Run periodic events from timeout queue.
1908124Sroot  */
1912609Swnj /*ARGSUSED*/
19254791Storek void
19354791Storek softclock()
1942442Swnj {
19554791Storek 	register struct callout *c;
19654791Storek 	register void *arg;
19754791Storek 	register void (*func) __P((void *));
19854791Storek 	register int s;
1992442Swnj 
20054791Storek 	s = splhigh();
20154791Storek 	while ((c = calltodo.c_next) != NULL && c->c_time <= 0) {
20254791Storek 		func = c->c_func;
20354791Storek 		arg = c->c_arg;
20454791Storek 		calltodo.c_next = c->c_next;
20554791Storek 		c->c_next = callfree;
20654791Storek 		callfree = c;
2079157Ssam 		splx(s);
20854791Storek 		(*func)(arg);
20954791Storek 		(void) splhigh();
2102442Swnj 	}
21154791Storek 	splx(s);
2129Sbill }
2139Sbill 
2149Sbill /*
21564428Sbostic  * timeout --
21664428Sbostic  *	Execute a function after a specified length of time.
21764428Sbostic  *
21864428Sbostic  * untimeout --
21964428Sbostic  *	Cancel previous timeout function call.
22064428Sbostic  *
22164428Sbostic  *	See AT&T BCI Driver Reference Manual for specification.  This
22264428Sbostic  *	implementation differs from that one in that no identification
22364428Sbostic  *	value is returned from timeout, rather, the original arguments
22464428Sbostic  *	to timeout are used to identify entries for untimeout.
22512747Ssam  */
22654791Storek void
22764428Sbostic timeout(ftn, arg, ticks)
22864428Sbostic 	void (*ftn) __P((void *));
22954791Storek 	void *arg;
23064428Sbostic 	register int ticks;
2319Sbill {
23264428Sbostic 	register struct callout *new, *p, *t;
23354791Storek 	register int s;
2349Sbill 
23564428Sbostic 	if (ticks <= 0)
23664428Sbostic 		ticks = 1;
23764428Sbostic 
23864428Sbostic 	/* Lock out the clock. */
23954791Storek 	s = splhigh();
24064428Sbostic 
24164428Sbostic 	/* Fill in the next free callout structure. */
24264428Sbostic 	if (callfree == NULL)
24364428Sbostic 		panic("timeout table full");
24464428Sbostic 	new = callfree;
24564428Sbostic 	callfree = new->c_next;
24664428Sbostic 	new->c_arg = arg;
24764428Sbostic 	new->c_func = ftn;
24864428Sbostic 
24964428Sbostic 	/*
25064428Sbostic 	 * The time for each event is stored as a difference from the time
25164428Sbostic 	 * of the previous event on the queue.  Walk the queue, correcting
25264428Sbostic 	 * the ticks argument for queue entries passed.  Correct the ticks
25364428Sbostic 	 * value for the queue entry immediately after the insertion point
254*65142Storek 	 * as well.  Watch out for negative c_time values; these represent
255*65142Storek 	 * overdue events.
25664428Sbostic 	 */
25764428Sbostic 	for (p = &calltodo;
25864428Sbostic 	    (t = p->c_next) != NULL && ticks > t->c_time; p = t)
259*65142Storek 		if (t->c_time > 0)
260*65142Storek 			ticks -= t->c_time;
26164428Sbostic 	new->c_time = ticks;
26264428Sbostic 	if (t != NULL)
26364428Sbostic 		t->c_time -= ticks;
26464428Sbostic 
26564428Sbostic 	/* Insert the new entry into the queue. */
26664428Sbostic 	p->c_next = new;
26764428Sbostic 	new->c_next = t;
2689Sbill 	splx(s);
2699Sbill }
2707305Ssam 
27154791Storek void
27264428Sbostic untimeout(ftn, arg)
27364428Sbostic 	void (*ftn) __P((void *));
27454791Storek 	void *arg;
2757305Ssam {
27664428Sbostic 	register struct callout *p, *t;
2777305Ssam 	register int s;
2787305Ssam 
27926265Skarels 	s = splhigh();
28064428Sbostic 	for (p = &calltodo; (t = p->c_next) != NULL; p = t)
28164428Sbostic 		if (t->c_func == ftn && t->c_arg == arg) {
28264428Sbostic 			/* Increment next entry's tick count. */
28364428Sbostic 			if (t->c_next && t->c_time > 0)
28464428Sbostic 				t->c_next->c_time += t->c_time;
28564428Sbostic 
28664428Sbostic 			/* Move entry from callout queue to callfree queue. */
28764428Sbostic 			p->c_next = t->c_next;
28864428Sbostic 			t->c_next = callfree;
28964428Sbostic 			callfree = t;
2907305Ssam 			break;
2917305Ssam 		}
2927305Ssam 	splx(s);
2937305Ssam }
2948112Sroot 
2958124Sroot /*
29664428Sbostic  * Compute number of hz until specified time.  Used to
29764428Sbostic  * compute third argument to timeout() from an absolute time.
2988124Sroot  */
29954791Storek int
3008112Sroot hzto(tv)
3018112Sroot 	struct timeval *tv;
3028112Sroot {
30354791Storek 	register long ticks, sec;
30454791Storek 	int s;
3058112Sroot 
3068124Sroot 	/*
3078124Sroot 	 * If number of milliseconds will fit in 32 bit arithmetic,
3088124Sroot 	 * then compute number of milliseconds to time and scale to
3098124Sroot 	 * ticks.  Otherwise just compute number of hz in time, rounding
3108124Sroot 	 * times greater than representible to maximum value.
3118124Sroot 	 *
3128124Sroot 	 * Delta times less than 25 days can be computed ``exactly''.
3138124Sroot 	 * Maximum value for any timeout in 10ms ticks is 250 days.
3148124Sroot 	 */
31554791Storek 	s = splhigh();
3168124Sroot 	sec = tv->tv_sec - time.tv_sec;
3178124Sroot 	if (sec <= 0x7fffffff / 1000 - 1000)
3188124Sroot 		ticks = ((tv->tv_sec - time.tv_sec) * 1000 +
3198124Sroot 			(tv->tv_usec - time.tv_usec) / 1000) / (tick / 1000);
3208124Sroot 	else if (sec <= 0x7fffffff / hz)
3218124Sroot 		ticks = sec * hz;
3228124Sroot 	else
3238124Sroot 		ticks = 0x7fffffff;
3248112Sroot 	splx(s);
3258112Sroot 	return (ticks);
3268112Sroot }
32752668Smckusick 
32852668Smckusick /*
32954791Storek  * Start profiling on a process.
33054791Storek  *
33154791Storek  * Kernel profiling passes proc0 which never exits and hence
33254791Storek  * keeps the profile clock running constantly.
33354791Storek  */
33454791Storek void
33554791Storek startprofclock(p)
33654791Storek 	register struct proc *p;
33754791Storek {
33854791Storek 	int s;
33954791Storek 
34064574Sbostic 	if ((p->p_flag & P_PROFIL) == 0) {
34164574Sbostic 		p->p_flag |= P_PROFIL;
34254791Storek 		if (++profprocs == 1 && stathz != 0) {
34354791Storek 			s = splstatclock();
34454791Storek 			psdiv = pscnt = psratio;
34554791Storek 			setstatclockrate(profhz);
34654791Storek 			splx(s);
34754791Storek 		}
34854791Storek 	}
34954791Storek }
35054791Storek 
35154791Storek /*
35254791Storek  * Stop profiling on a process.
35354791Storek  */
35454791Storek void
35554791Storek stopprofclock(p)
35654791Storek 	register struct proc *p;
35754791Storek {
35854791Storek 	int s;
35954791Storek 
36064574Sbostic 	if (p->p_flag & P_PROFIL) {
36164574Sbostic 		p->p_flag &= ~P_PROFIL;
36254791Storek 		if (--profprocs == 0 && stathz != 0) {
36354791Storek 			s = splstatclock();
36454791Storek 			psdiv = pscnt = 1;
36554791Storek 			setstatclockrate(stathz);
36654791Storek 			splx(s);
36754791Storek 		}
36854791Storek 	}
36954791Storek }
37054791Storek 
37154791Storek int	dk_ndrive = DK_NDRIVE;
37254791Storek 
37354791Storek /*
37454791Storek  * Statistics clock.  Grab profile sample, and if divider reaches 0,
37554791Storek  * do process and kernel statistics.
37654791Storek  */
37754791Storek void
37854791Storek statclock(frame)
37954791Storek 	register struct clockframe *frame;
38054791Storek {
38154791Storek #ifdef GPROF
38254791Storek 	register struct gmonparam *g;
38354791Storek #endif
38454791Storek 	register struct proc *p;
38554791Storek 	register int i;
38654791Storek 
38754791Storek 	if (CLKF_USERMODE(frame)) {
38854791Storek 		p = curproc;
38964574Sbostic 		if (p->p_flag & P_PROFIL)
39054791Storek 			addupc_intr(p, CLKF_PC(frame), 1);
39154791Storek 		if (--pscnt > 0)
39254791Storek 			return;
39354791Storek 		/*
39454791Storek 		 * Came from user mode; CPU was in user state.
39554791Storek 		 * If this process is being profiled record the tick.
39654791Storek 		 */
39754791Storek 		p->p_uticks++;
39854791Storek 		if (p->p_nice > NZERO)
39954791Storek 			cp_time[CP_NICE]++;
40054791Storek 		else
40154791Storek 			cp_time[CP_USER]++;
40254791Storek 	} else {
40354791Storek #ifdef GPROF
40454791Storek 		/*
40554791Storek 		 * Kernel statistics are just like addupc_intr, only easier.
40654791Storek 		 */
40754791Storek 		g = &_gmonparam;
40854791Storek 		if (g->state == GMON_PROF_ON) {
40954791Storek 			i = CLKF_PC(frame) - g->lowpc;
41059204Smckusick 			if (i < g->textsize) {
41159204Smckusick 				i /= HISTFRACTION * sizeof(*g->kcount);
41259204Smckusick 				g->kcount[i]++;
41359204Smckusick 			}
41454791Storek 		}
41554791Storek #endif
41654791Storek 		if (--pscnt > 0)
41754791Storek 			return;
41854791Storek 		/*
41954791Storek 		 * Came from kernel mode, so we were:
42054791Storek 		 * - handling an interrupt,
42154791Storek 		 * - doing syscall or trap work on behalf of the current
42254791Storek 		 *   user process, or
42354791Storek 		 * - spinning in the idle loop.
42454791Storek 		 * Whichever it is, charge the time as appropriate.
42554791Storek 		 * Note that we charge interrupts to the current process,
42654791Storek 		 * regardless of whether they are ``for'' that process,
42754791Storek 		 * so that we know how much of its real time was spent
42854791Storek 		 * in ``non-process'' (i.e., interrupt) work.
42954791Storek 		 */
43054791Storek 		p = curproc;
43154791Storek 		if (CLKF_INTR(frame)) {
43254791Storek 			if (p != NULL)
43354791Storek 				p->p_iticks++;
43454791Storek 			cp_time[CP_INTR]++;
43554791Storek 		} else if (p != NULL) {
43654791Storek 			p->p_sticks++;
43754791Storek 			cp_time[CP_SYS]++;
43854791Storek 		} else
43954791Storek 			cp_time[CP_IDLE]++;
44054791Storek 	}
44154791Storek 	pscnt = psdiv;
44254791Storek 
44354791Storek 	/*
44454791Storek 	 * We maintain statistics shown by user-level statistics
44554791Storek 	 * programs:  the amount of time in each cpu state, and
44654791Storek 	 * the amount of time each of DK_NDRIVE ``drives'' is busy.
44754791Storek 	 *
44854791Storek 	 * XXX	should either run linked list of drives, or (better)
44954791Storek 	 *	grab timestamps in the start & done code.
45054791Storek 	 */
45154791Storek 	for (i = 0; i < DK_NDRIVE; i++)
45254791Storek 		if (dk_busy & (1 << i))
45354791Storek 			dk_time[i]++;
45454791Storek 
45554791Storek 	/*
45664574Sbostic 	 * We adjust the priority of the current process.  The priority of
45764574Sbostic 	 * a process gets worse as it accumulates CPU time.  The cpu usage
45864574Sbostic 	 * estimator (p_estcpu) is increased here.  The formula for computing
45964574Sbostic 	 * priorities (in kern_synch.c) will compute a different value each
46064574Sbostic 	 * time p_estcpu increases by 4.  The cpu usage estimator ramps up
46164574Sbostic 	 * quite quickly when the process is running (linearly), and decays
46264574Sbostic 	 * away exponentially, at a rate which is proportionally slower when
46364574Sbostic 	 * the system is busy.  The basic principal is that the system will
46464574Sbostic 	 * 90% forget that the process used a lot of CPU time in 5 * loadav
46564574Sbostic 	 * seconds.  This causes the system to favor processes which haven't
46664574Sbostic 	 * run much recently, and to round-robin among other processes.
46754791Storek 	 */
46854791Storek 	if (p != NULL) {
46954791Storek 		p->p_cpticks++;
47064574Sbostic 		if (++p->p_estcpu == 0)
47164574Sbostic 			p->p_estcpu--;
47264574Sbostic 		if ((p->p_estcpu & 3) == 0) {
47364428Sbostic 			resetpriority(p);
47464574Sbostic 			if (p->p_priority >= PUSER)
47564574Sbostic 				p->p_priority = p->p_usrpri;
47654791Storek 		}
47754791Storek 	}
47854791Storek }
47954791Storek 
48054791Storek /*
48152668Smckusick  * Return information about system clocks.
48252668Smckusick  */
48357840Smckusick sysctl_clockrate(where, sizep)
48452668Smckusick 	register char *where;
48558464Sbostic 	size_t *sizep;
48652668Smckusick {
48757840Smckusick 	struct clockinfo clkinfo;
48852668Smckusick 
48952668Smckusick 	/*
49057840Smckusick 	 * Construct clockinfo structure.
49152668Smckusick 	 */
49257840Smckusick 	clkinfo.hz = hz;
49357840Smckusick 	clkinfo.tick = tick;
49457840Smckusick 	clkinfo.profhz = profhz;
49557840Smckusick 	clkinfo.stathz = stathz ? stathz : hz;
49657840Smckusick 	return (sysctl_rdstruct(where, sizep, NULL, &clkinfo, sizeof(clkinfo)));
49752668Smckusick }
498