xref: /csrg-svn/sys/kern/kern_clock.c (revision 65771)
149594Sbostic /*-
263170Sbostic  * Copyright (c) 1982, 1986, 1991, 1993
363170Sbostic  *	The Regents of the University of California.  All rights reserved.
4*65771Sbostic  * (c) UNIX System Laboratories, Inc.
5*65771Sbostic  * All or some portions of this file are derived from material licensed
6*65771Sbostic  * to the University of California by American Telephone and Telegraph
7*65771Sbostic  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8*65771Sbostic  * the permission of UNIX System Laboratories, Inc.
923366Smckusick  *
1049594Sbostic  * %sccs.include.redist.c%
1149594Sbostic  *
12*65771Sbostic  *	@(#)kern_clock.c	8.5 (Berkeley) 01/21/94
1323366Smckusick  */
149Sbill 
1556517Sbostic #include <sys/param.h>
1656517Sbostic #include <sys/systm.h>
1756517Sbostic #include <sys/dkstat.h>
1856517Sbostic #include <sys/callout.h>
1956517Sbostic #include <sys/kernel.h>
2056517Sbostic #include <sys/proc.h>
2156517Sbostic #include <sys/resourcevar.h>
229Sbill 
2356517Sbostic #include <machine/cpu.h>
2435406Skarels 
2510291Smckusick #ifdef GPROF
2656517Sbostic #include <sys/gmon.h>
2710291Smckusick #endif
2810291Smckusick 
298124Sroot /*
308124Sroot  * Clock handling routines.
318124Sroot  *
3254791Storek  * This code is written to operate with two timers that run independently of
3354791Storek  * each other.  The main clock, running hz times per second, is used to keep
3454791Storek  * track of real time.  The second timer handles kernel and user profiling,
3554791Storek  * and does resource use estimation.  If the second timer is programmable,
3654791Storek  * it is randomized to avoid aliasing between the two clocks.  For example,
3754791Storek  * the randomization prevents an adversary from always giving up the cpu
3854791Storek  * just before its quantum expires.  Otherwise, it would never accumulate
3954791Storek  * cpu ticks.  The mean frequency of the second timer is stathz.
4054791Storek  *
4154791Storek  * If no second timer exists, stathz will be zero; in this case we drive
4254791Storek  * profiling and statistics off the main clock.  This WILL NOT be accurate;
4354791Storek  * do not do it unless absolutely necessary.
4454791Storek  *
4554791Storek  * The statistics clock may (or may not) be run at a higher rate while
4654791Storek  * profiling.  This profile clock runs at profhz.  We require that profhz
4754791Storek  * be an integral multiple of stathz.
4854791Storek  *
4954791Storek  * If the statistics clock is running fast, it must be divided by the ratio
5054791Storek  * profhz/stathz for statistics.  (For profiling, every tick counts.)
518124Sroot  */
521559Sbill 
538124Sroot /*
548124Sroot  * TODO:
5512747Ssam  *	allocate more timeout table slots when table overflows.
568124Sroot  */
5726265Skarels 
5817007Smckusick /*
5917007Smckusick  * Bump a timeval by a small number of usec's.
6017007Smckusick  */
6117007Smckusick #define BUMPTIME(t, usec) { \
6254791Storek 	register volatile struct timeval *tp = (t); \
6354791Storek 	register long us; \
6417007Smckusick  \
6554791Storek 	tp->tv_usec = us = tp->tv_usec + (usec); \
6654791Storek 	if (us >= 1000000) { \
6754791Storek 		tp->tv_usec = us - 1000000; \
6817007Smckusick 		tp->tv_sec++; \
6917007Smckusick 	} \
7017007Smckusick }
7117007Smckusick 
7254124Smckusick int	stathz;
7353011Ssklower int	profhz;
7454138Smckusick int	profprocs;
7556338Ssklower int	ticks;
7656317Shibler static int psdiv, pscnt;	/* prof => stat divider */
7756855Storek int	psratio;		/* ratio: prof / stat */
7854791Storek 
7954791Storek volatile struct	timeval time;
8054791Storek volatile struct	timeval mono_time;
8154791Storek 
828124Sroot /*
8354791Storek  * Initialize clock frequencies and start both clocks running.
848124Sroot  */
8554791Storek void
initclocks()8654791Storek initclocks()
8754791Storek {
8854791Storek 	register int i;
8954791Storek 
9054791Storek 	/*
9154791Storek 	 * Set divisors to 1 (normal case) and let the machine-specific
9254791Storek 	 * code do its bit.
9354791Storek 	 */
9454791Storek 	psdiv = pscnt = 1;
9554791Storek 	cpu_initclocks();
9654791Storek 
9754791Storek 	/*
9854791Storek 	 * Compute profhz/stathz, and fix profhz if needed.
9954791Storek 	 */
10054791Storek 	i = stathz ? stathz : hz;
10154791Storek 	if (profhz == 0)
10254791Storek 		profhz = i;
10354791Storek 	psratio = profhz / i;
10454791Storek }
10554791Storek 
10654791Storek /*
10754791Storek  * The real-time timer, interrupting hz times per second.
10854791Storek  */
10954791Storek void
hardclock(frame)11044774Swilliam hardclock(frame)
11154791Storek 	register struct clockframe *frame;
1129Sbill {
1132768Swnj 	register struct callout *p1;
11454791Storek 	register struct proc *p;
11555294Storek 	register int delta, needsoft;
11628947Skarels 	extern int tickdelta;
11728947Skarels 	extern long timedelta;
1189Sbill 
1198124Sroot 	/*
1208124Sroot 	 * Update real-time timeout queue.
1218124Sroot 	 * At front of queue are some number of events which are ``due''.
1228124Sroot 	 * The time to these is <= 0 and if negative represents the
1238124Sroot 	 * number of ticks which have passed since it was supposed to happen.
1248124Sroot 	 * The rest of the q elements (times > 0) are events yet to happen,
1258124Sroot 	 * where the time for each is given as a delta from the previous.
1268124Sroot 	 * Decrementing just the first of these serves to decrement the time
1278124Sroot 	 * to all events.
1288124Sroot 	 */
12954791Storek 	needsoft = 0;
13054791Storek 	for (p1 = calltodo.c_next; p1 != NULL; p1 = p1->c_next) {
13112747Ssam 		if (--p1->c_time > 0)
13212747Ssam 			break;
13316172Skarels 		needsoft = 1;
13412747Ssam 		if (p1->c_time == 0)
13512747Ssam 			break;
13612747Ssam 	}
137138Sbill 
13854791Storek 	p = curproc;
13954791Storek 	if (p) {
14054791Storek 		register struct pstats *pstats;
14154791Storek 
1428124Sroot 		/*
14354791Storek 		 * Run current process's virtual and profile time, as needed.
1448124Sroot 		 */
14554791Storek 		pstats = p->p_stats;
14654791Storek 		if (CLKF_USERMODE(frame) &&
14754791Storek 		    timerisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value) &&
14847546Skarels 		    itimerdecr(&pstats->p_timer[ITIMER_VIRTUAL], tick) == 0)
14940674Smarc 			psignal(p, SIGVTALRM);
15047546Skarels 		if (timerisset(&pstats->p_timer[ITIMER_PROF].it_value) &&
15147546Skarels 		    itimerdecr(&pstats->p_timer[ITIMER_PROF], tick) == 0)
15240674Smarc 			psignal(p, SIGPROF);
1539Sbill 	}
1548124Sroot 
1558124Sroot 	/*
15654791Storek 	 * If no separate statistics clock is available, run it from here.
15711392Ssam 	 */
15854124Smckusick 	if (stathz == 0)
15954791Storek 		statclock(frame);
16011392Ssam 
16111392Ssam 	/*
16255294Storek 	 * Increment the time-of-day.  The increment is just ``tick'' unless
16355294Storek 	 * we are still adjusting the clock; see adjtime().
1648124Sroot 	 */
16556338Ssklower 	ticks++;
16655294Storek 	if (timedelta == 0)
16755294Storek 		delta = tick;
16855294Storek 	else {
16955294Storek 		delta = tick + tickdelta;
17055294Storek 		timedelta -= tickdelta;
17117356Skarels 	}
17255294Storek 	BUMPTIME(&time, delta);
17355294Storek 	BUMPTIME(&mono_time, delta);
17454791Storek 
17554791Storek 	/*
17654791Storek 	 * Process callouts at a very low cpu priority, so we don't keep the
17754791Storek 	 * relatively high clock interrupt priority any longer than necessary.
17854791Storek 	 */
17916525Skarels 	if (needsoft) {
18054791Storek 		if (CLKF_BASEPRI(frame)) {
18116525Skarels 			/*
18216525Skarels 			 * Save the overhead of a software interrupt;
18316525Skarels 			 * it will happen as soon as we return, so do it now.
18416525Skarels 			 */
18554791Storek 			(void)splsoftclock();
18654791Storek 			softclock();
18716525Skarels 		} else
18816525Skarels 			setsoftclock();
18916525Skarels 	}
1902442Swnj }
1912442Swnj 
1928124Sroot /*
19354791Storek  * Software (low priority) clock interrupt.
1948124Sroot  * Run periodic events from timeout queue.
1958124Sroot  */
1962609Swnj /*ARGSUSED*/
19754791Storek void
softclock()19854791Storek softclock()
1992442Swnj {
20054791Storek 	register struct callout *c;
20154791Storek 	register void *arg;
20254791Storek 	register void (*func) __P((void *));
20354791Storek 	register int s;
2042442Swnj 
20554791Storek 	s = splhigh();
20654791Storek 	while ((c = calltodo.c_next) != NULL && c->c_time <= 0) {
20754791Storek 		func = c->c_func;
20854791Storek 		arg = c->c_arg;
20954791Storek 		calltodo.c_next = c->c_next;
21054791Storek 		c->c_next = callfree;
21154791Storek 		callfree = c;
2129157Ssam 		splx(s);
21354791Storek 		(*func)(arg);
21454791Storek 		(void) splhigh();
2152442Swnj 	}
21654791Storek 	splx(s);
2179Sbill }
2189Sbill 
2199Sbill /*
22064428Sbostic  * timeout --
22164428Sbostic  *	Execute a function after a specified length of time.
22264428Sbostic  *
22364428Sbostic  * untimeout --
22464428Sbostic  *	Cancel previous timeout function call.
22564428Sbostic  *
22664428Sbostic  *	See AT&T BCI Driver Reference Manual for specification.  This
22764428Sbostic  *	implementation differs from that one in that no identification
22864428Sbostic  *	value is returned from timeout, rather, the original arguments
22964428Sbostic  *	to timeout are used to identify entries for untimeout.
23012747Ssam  */
23154791Storek void
23264428Sbostic timeout(ftn, arg, ticks)
23364428Sbostic 	void (*ftn) __P((void *));
23454791Storek 	void *arg;
23564428Sbostic 	register int ticks;
2369Sbill {
23764428Sbostic 	register struct callout *new, *p, *t;
23854791Storek 	register int s;
2399Sbill 
24064428Sbostic 	if (ticks <= 0)
24164428Sbostic 		ticks = 1;
24264428Sbostic 
24364428Sbostic 	/* Lock out the clock. */
24454791Storek 	s = splhigh();
24564428Sbostic 
24664428Sbostic 	/* Fill in the next free callout structure. */
24764428Sbostic 	if (callfree == NULL)
24864428Sbostic 		panic("timeout table full");
24964428Sbostic 	new = callfree;
25064428Sbostic 	callfree = new->c_next;
25164428Sbostic 	new->c_arg = arg;
25264428Sbostic 	new->c_func = ftn;
25364428Sbostic 
25464428Sbostic 	/*
25564428Sbostic 	 * The time for each event is stored as a difference from the time
25664428Sbostic 	 * of the previous event on the queue.  Walk the queue, correcting
25764428Sbostic 	 * the ticks argument for queue entries passed.  Correct the ticks
25864428Sbostic 	 * value for the queue entry immediately after the insertion point
25965142Storek 	 * as well.  Watch out for negative c_time values; these represent
26065142Storek 	 * overdue events.
26164428Sbostic 	 */
26264428Sbostic 	for (p = &calltodo;
26364428Sbostic 	    (t = p->c_next) != NULL && ticks > t->c_time; p = t)
26465142Storek 		if (t->c_time > 0)
26565142Storek 			ticks -= t->c_time;
26664428Sbostic 	new->c_time = ticks;
26764428Sbostic 	if (t != NULL)
26864428Sbostic 		t->c_time -= ticks;
26964428Sbostic 
27064428Sbostic 	/* Insert the new entry into the queue. */
27164428Sbostic 	p->c_next = new;
27264428Sbostic 	new->c_next = t;
2739Sbill 	splx(s);
2749Sbill }
2757305Ssam 
27654791Storek void
27764428Sbostic untimeout(ftn, arg)
27864428Sbostic 	void (*ftn) __P((void *));
27954791Storek 	void *arg;
2807305Ssam {
28164428Sbostic 	register struct callout *p, *t;
2827305Ssam 	register int s;
2837305Ssam 
28426265Skarels 	s = splhigh();
28564428Sbostic 	for (p = &calltodo; (t = p->c_next) != NULL; p = t)
28664428Sbostic 		if (t->c_func == ftn && t->c_arg == arg) {
28764428Sbostic 			/* Increment next entry's tick count. */
28864428Sbostic 			if (t->c_next && t->c_time > 0)
28964428Sbostic 				t->c_next->c_time += t->c_time;
29064428Sbostic 
29164428Sbostic 			/* Move entry from callout queue to callfree queue. */
29264428Sbostic 			p->c_next = t->c_next;
29364428Sbostic 			t->c_next = callfree;
29464428Sbostic 			callfree = t;
2957305Ssam 			break;
2967305Ssam 		}
2977305Ssam 	splx(s);
2987305Ssam }
2998112Sroot 
3008124Sroot /*
30164428Sbostic  * Compute number of hz until specified time.  Used to
30264428Sbostic  * compute third argument to timeout() from an absolute time.
3038124Sroot  */
30454791Storek int
hzto(tv)3058112Sroot hzto(tv)
3068112Sroot 	struct timeval *tv;
3078112Sroot {
30854791Storek 	register long ticks, sec;
30954791Storek 	int s;
3108112Sroot 
3118124Sroot 	/*
3128124Sroot 	 * If number of milliseconds will fit in 32 bit arithmetic,
3138124Sroot 	 * then compute number of milliseconds to time and scale to
3148124Sroot 	 * ticks.  Otherwise just compute number of hz in time, rounding
3158124Sroot 	 * times greater than representible to maximum value.
3168124Sroot 	 *
3178124Sroot 	 * Delta times less than 25 days can be computed ``exactly''.
3188124Sroot 	 * Maximum value for any timeout in 10ms ticks is 250 days.
3198124Sroot 	 */
32054791Storek 	s = splhigh();
3218124Sroot 	sec = tv->tv_sec - time.tv_sec;
3228124Sroot 	if (sec <= 0x7fffffff / 1000 - 1000)
3238124Sroot 		ticks = ((tv->tv_sec - time.tv_sec) * 1000 +
3248124Sroot 			(tv->tv_usec - time.tv_usec) / 1000) / (tick / 1000);
3258124Sroot 	else if (sec <= 0x7fffffff / hz)
3268124Sroot 		ticks = sec * hz;
3278124Sroot 	else
3288124Sroot 		ticks = 0x7fffffff;
3298112Sroot 	splx(s);
3308112Sroot 	return (ticks);
3318112Sroot }
33252668Smckusick 
33352668Smckusick /*
33454791Storek  * Start profiling on a process.
33554791Storek  *
33654791Storek  * Kernel profiling passes proc0 which never exits and hence
33754791Storek  * keeps the profile clock running constantly.
33854791Storek  */
33954791Storek void
startprofclock(p)34054791Storek startprofclock(p)
34154791Storek 	register struct proc *p;
34254791Storek {
34354791Storek 	int s;
34454791Storek 
34564574Sbostic 	if ((p->p_flag & P_PROFIL) == 0) {
34664574Sbostic 		p->p_flag |= P_PROFIL;
34754791Storek 		if (++profprocs == 1 && stathz != 0) {
34854791Storek 			s = splstatclock();
34954791Storek 			psdiv = pscnt = psratio;
35054791Storek 			setstatclockrate(profhz);
35154791Storek 			splx(s);
35254791Storek 		}
35354791Storek 	}
35454791Storek }
35554791Storek 
35654791Storek /*
35754791Storek  * Stop profiling on a process.
35854791Storek  */
35954791Storek void
stopprofclock(p)36054791Storek stopprofclock(p)
36154791Storek 	register struct proc *p;
36254791Storek {
36354791Storek 	int s;
36454791Storek 
36564574Sbostic 	if (p->p_flag & P_PROFIL) {
36664574Sbostic 		p->p_flag &= ~P_PROFIL;
36754791Storek 		if (--profprocs == 0 && stathz != 0) {
36854791Storek 			s = splstatclock();
36954791Storek 			psdiv = pscnt = 1;
37054791Storek 			setstatclockrate(stathz);
37154791Storek 			splx(s);
37254791Storek 		}
37354791Storek 	}
37454791Storek }
37554791Storek 
37654791Storek int	dk_ndrive = DK_NDRIVE;
37754791Storek 
37854791Storek /*
37954791Storek  * Statistics clock.  Grab profile sample, and if divider reaches 0,
38054791Storek  * do process and kernel statistics.
38154791Storek  */
38254791Storek void
statclock(frame)38354791Storek statclock(frame)
38454791Storek 	register struct clockframe *frame;
38554791Storek {
38654791Storek #ifdef GPROF
38754791Storek 	register struct gmonparam *g;
38854791Storek #endif
38954791Storek 	register struct proc *p;
39054791Storek 	register int i;
39154791Storek 
39254791Storek 	if (CLKF_USERMODE(frame)) {
39354791Storek 		p = curproc;
39464574Sbostic 		if (p->p_flag & P_PROFIL)
39554791Storek 			addupc_intr(p, CLKF_PC(frame), 1);
39654791Storek 		if (--pscnt > 0)
39754791Storek 			return;
39854791Storek 		/*
39954791Storek 		 * Came from user mode; CPU was in user state.
40054791Storek 		 * If this process is being profiled record the tick.
40154791Storek 		 */
40254791Storek 		p->p_uticks++;
40354791Storek 		if (p->p_nice > NZERO)
40454791Storek 			cp_time[CP_NICE]++;
40554791Storek 		else
40654791Storek 			cp_time[CP_USER]++;
40754791Storek 	} else {
40854791Storek #ifdef GPROF
40954791Storek 		/*
41054791Storek 		 * Kernel statistics are just like addupc_intr, only easier.
41154791Storek 		 */
41254791Storek 		g = &_gmonparam;
41354791Storek 		if (g->state == GMON_PROF_ON) {
41454791Storek 			i = CLKF_PC(frame) - g->lowpc;
41559204Smckusick 			if (i < g->textsize) {
41659204Smckusick 				i /= HISTFRACTION * sizeof(*g->kcount);
41759204Smckusick 				g->kcount[i]++;
41859204Smckusick 			}
41954791Storek 		}
42054791Storek #endif
42154791Storek 		if (--pscnt > 0)
42254791Storek 			return;
42354791Storek 		/*
42454791Storek 		 * Came from kernel mode, so we were:
42554791Storek 		 * - handling an interrupt,
42654791Storek 		 * - doing syscall or trap work on behalf of the current
42754791Storek 		 *   user process, or
42854791Storek 		 * - spinning in the idle loop.
42954791Storek 		 * Whichever it is, charge the time as appropriate.
43054791Storek 		 * Note that we charge interrupts to the current process,
43154791Storek 		 * regardless of whether they are ``for'' that process,
43254791Storek 		 * so that we know how much of its real time was spent
43354791Storek 		 * in ``non-process'' (i.e., interrupt) work.
43454791Storek 		 */
43554791Storek 		p = curproc;
43654791Storek 		if (CLKF_INTR(frame)) {
43754791Storek 			if (p != NULL)
43854791Storek 				p->p_iticks++;
43954791Storek 			cp_time[CP_INTR]++;
44054791Storek 		} else if (p != NULL) {
44154791Storek 			p->p_sticks++;
44254791Storek 			cp_time[CP_SYS]++;
44354791Storek 		} else
44454791Storek 			cp_time[CP_IDLE]++;
44554791Storek 	}
44654791Storek 	pscnt = psdiv;
44754791Storek 
44854791Storek 	/*
44954791Storek 	 * We maintain statistics shown by user-level statistics
45054791Storek 	 * programs:  the amount of time in each cpu state, and
45154791Storek 	 * the amount of time each of DK_NDRIVE ``drives'' is busy.
45254791Storek 	 *
45354791Storek 	 * XXX	should either run linked list of drives, or (better)
45454791Storek 	 *	grab timestamps in the start & done code.
45554791Storek 	 */
45654791Storek 	for (i = 0; i < DK_NDRIVE; i++)
45754791Storek 		if (dk_busy & (1 << i))
45854791Storek 			dk_time[i]++;
45954791Storek 
46054791Storek 	/*
46164574Sbostic 	 * We adjust the priority of the current process.  The priority of
46264574Sbostic 	 * a process gets worse as it accumulates CPU time.  The cpu usage
46364574Sbostic 	 * estimator (p_estcpu) is increased here.  The formula for computing
46464574Sbostic 	 * priorities (in kern_synch.c) will compute a different value each
46564574Sbostic 	 * time p_estcpu increases by 4.  The cpu usage estimator ramps up
46664574Sbostic 	 * quite quickly when the process is running (linearly), and decays
46764574Sbostic 	 * away exponentially, at a rate which is proportionally slower when
46864574Sbostic 	 * the system is busy.  The basic principal is that the system will
46964574Sbostic 	 * 90% forget that the process used a lot of CPU time in 5 * loadav
47064574Sbostic 	 * seconds.  This causes the system to favor processes which haven't
47164574Sbostic 	 * run much recently, and to round-robin among other processes.
47254791Storek 	 */
47354791Storek 	if (p != NULL) {
47454791Storek 		p->p_cpticks++;
47564574Sbostic 		if (++p->p_estcpu == 0)
47664574Sbostic 			p->p_estcpu--;
47764574Sbostic 		if ((p->p_estcpu & 3) == 0) {
47864428Sbostic 			resetpriority(p);
47964574Sbostic 			if (p->p_priority >= PUSER)
48064574Sbostic 				p->p_priority = p->p_usrpri;
48154791Storek 		}
48254791Storek 	}
48354791Storek }
48454791Storek 
48554791Storek /*
48652668Smckusick  * Return information about system clocks.
48752668Smckusick  */
sysctl_clockrate(where,sizep)48857840Smckusick sysctl_clockrate(where, sizep)
48952668Smckusick 	register char *where;
49058464Sbostic 	size_t *sizep;
49152668Smckusick {
49257840Smckusick 	struct clockinfo clkinfo;
49352668Smckusick 
49452668Smckusick 	/*
49557840Smckusick 	 * Construct clockinfo structure.
49652668Smckusick 	 */
49757840Smckusick 	clkinfo.hz = hz;
49857840Smckusick 	clkinfo.tick = tick;
49957840Smckusick 	clkinfo.profhz = profhz;
50057840Smckusick 	clkinfo.stathz = stathz ? stathz : hz;
50157840Smckusick 	return (sysctl_rdstruct(where, sizep, NULL, &clkinfo, sizeof(clkinfo)));
50252668Smckusick }
503