149594Sbostic /*-
263170Sbostic * Copyright (c) 1982, 1986, 1991, 1993
363170Sbostic * The Regents of the University of California. All rights reserved.
4*65771Sbostic * (c) UNIX System Laboratories, Inc.
5*65771Sbostic * All or some portions of this file are derived from material licensed
6*65771Sbostic * to the University of California by American Telephone and Telegraph
7*65771Sbostic * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8*65771Sbostic * the permission of UNIX System Laboratories, Inc.
923366Smckusick *
1049594Sbostic * %sccs.include.redist.c%
1149594Sbostic *
12*65771Sbostic * @(#)kern_clock.c 8.5 (Berkeley) 01/21/94
1323366Smckusick */
149Sbill
1556517Sbostic #include <sys/param.h>
1656517Sbostic #include <sys/systm.h>
1756517Sbostic #include <sys/dkstat.h>
1856517Sbostic #include <sys/callout.h>
1956517Sbostic #include <sys/kernel.h>
2056517Sbostic #include <sys/proc.h>
2156517Sbostic #include <sys/resourcevar.h>
229Sbill
2356517Sbostic #include <machine/cpu.h>
2435406Skarels
2510291Smckusick #ifdef GPROF
2656517Sbostic #include <sys/gmon.h>
2710291Smckusick #endif
2810291Smckusick
298124Sroot /*
308124Sroot * Clock handling routines.
318124Sroot *
3254791Storek * This code is written to operate with two timers that run independently of
3354791Storek * each other. The main clock, running hz times per second, is used to keep
3454791Storek * track of real time. The second timer handles kernel and user profiling,
3554791Storek * and does resource use estimation. If the second timer is programmable,
3654791Storek * it is randomized to avoid aliasing between the two clocks. For example,
3754791Storek * the randomization prevents an adversary from always giving up the cpu
3854791Storek * just before its quantum expires. Otherwise, it would never accumulate
3954791Storek * cpu ticks. The mean frequency of the second timer is stathz.
4054791Storek *
4154791Storek * If no second timer exists, stathz will be zero; in this case we drive
4254791Storek * profiling and statistics off the main clock. This WILL NOT be accurate;
4354791Storek * do not do it unless absolutely necessary.
4454791Storek *
4554791Storek * The statistics clock may (or may not) be run at a higher rate while
4654791Storek * profiling. This profile clock runs at profhz. We require that profhz
4754791Storek * be an integral multiple of stathz.
4854791Storek *
4954791Storek * If the statistics clock is running fast, it must be divided by the ratio
5054791Storek * profhz/stathz for statistics. (For profiling, every tick counts.)
518124Sroot */
521559Sbill
538124Sroot /*
548124Sroot * TODO:
5512747Ssam * allocate more timeout table slots when table overflows.
568124Sroot */
5726265Skarels
5817007Smckusick /*
5917007Smckusick * Bump a timeval by a small number of usec's.
6017007Smckusick */
6117007Smckusick #define BUMPTIME(t, usec) { \
6254791Storek register volatile struct timeval *tp = (t); \
6354791Storek register long us; \
6417007Smckusick \
6554791Storek tp->tv_usec = us = tp->tv_usec + (usec); \
6654791Storek if (us >= 1000000) { \
6754791Storek tp->tv_usec = us - 1000000; \
6817007Smckusick tp->tv_sec++; \
6917007Smckusick } \
7017007Smckusick }
7117007Smckusick
7254124Smckusick int stathz;
7353011Ssklower int profhz;
7454138Smckusick int profprocs;
7556338Ssklower int ticks;
7656317Shibler static int psdiv, pscnt; /* prof => stat divider */
7756855Storek int psratio; /* ratio: prof / stat */
7854791Storek
7954791Storek volatile struct timeval time;
8054791Storek volatile struct timeval mono_time;
8154791Storek
828124Sroot /*
8354791Storek * Initialize clock frequencies and start both clocks running.
848124Sroot */
8554791Storek void
initclocks()8654791Storek initclocks()
8754791Storek {
8854791Storek register int i;
8954791Storek
9054791Storek /*
9154791Storek * Set divisors to 1 (normal case) and let the machine-specific
9254791Storek * code do its bit.
9354791Storek */
9454791Storek psdiv = pscnt = 1;
9554791Storek cpu_initclocks();
9654791Storek
9754791Storek /*
9854791Storek * Compute profhz/stathz, and fix profhz if needed.
9954791Storek */
10054791Storek i = stathz ? stathz : hz;
10154791Storek if (profhz == 0)
10254791Storek profhz = i;
10354791Storek psratio = profhz / i;
10454791Storek }
10554791Storek
10654791Storek /*
10754791Storek * The real-time timer, interrupting hz times per second.
10854791Storek */
10954791Storek void
hardclock(frame)11044774Swilliam hardclock(frame)
11154791Storek register struct clockframe *frame;
1129Sbill {
1132768Swnj register struct callout *p1;
11454791Storek register struct proc *p;
11555294Storek register int delta, needsoft;
11628947Skarels extern int tickdelta;
11728947Skarels extern long timedelta;
1189Sbill
1198124Sroot /*
1208124Sroot * Update real-time timeout queue.
1218124Sroot * At front of queue are some number of events which are ``due''.
1228124Sroot * The time to these is <= 0 and if negative represents the
1238124Sroot * number of ticks which have passed since it was supposed to happen.
1248124Sroot * The rest of the q elements (times > 0) are events yet to happen,
1258124Sroot * where the time for each is given as a delta from the previous.
1268124Sroot * Decrementing just the first of these serves to decrement the time
1278124Sroot * to all events.
1288124Sroot */
12954791Storek needsoft = 0;
13054791Storek for (p1 = calltodo.c_next; p1 != NULL; p1 = p1->c_next) {
13112747Ssam if (--p1->c_time > 0)
13212747Ssam break;
13316172Skarels needsoft = 1;
13412747Ssam if (p1->c_time == 0)
13512747Ssam break;
13612747Ssam }
137138Sbill
13854791Storek p = curproc;
13954791Storek if (p) {
14054791Storek register struct pstats *pstats;
14154791Storek
1428124Sroot /*
14354791Storek * Run current process's virtual and profile time, as needed.
1448124Sroot */
14554791Storek pstats = p->p_stats;
14654791Storek if (CLKF_USERMODE(frame) &&
14754791Storek timerisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value) &&
14847546Skarels itimerdecr(&pstats->p_timer[ITIMER_VIRTUAL], tick) == 0)
14940674Smarc psignal(p, SIGVTALRM);
15047546Skarels if (timerisset(&pstats->p_timer[ITIMER_PROF].it_value) &&
15147546Skarels itimerdecr(&pstats->p_timer[ITIMER_PROF], tick) == 0)
15240674Smarc psignal(p, SIGPROF);
1539Sbill }
1548124Sroot
1558124Sroot /*
15654791Storek * If no separate statistics clock is available, run it from here.
15711392Ssam */
15854124Smckusick if (stathz == 0)
15954791Storek statclock(frame);
16011392Ssam
16111392Ssam /*
16255294Storek * Increment the time-of-day. The increment is just ``tick'' unless
16355294Storek * we are still adjusting the clock; see adjtime().
1648124Sroot */
16556338Ssklower ticks++;
16655294Storek if (timedelta == 0)
16755294Storek delta = tick;
16855294Storek else {
16955294Storek delta = tick + tickdelta;
17055294Storek timedelta -= tickdelta;
17117356Skarels }
17255294Storek BUMPTIME(&time, delta);
17355294Storek BUMPTIME(&mono_time, delta);
17454791Storek
17554791Storek /*
17654791Storek * Process callouts at a very low cpu priority, so we don't keep the
17754791Storek * relatively high clock interrupt priority any longer than necessary.
17854791Storek */
17916525Skarels if (needsoft) {
18054791Storek if (CLKF_BASEPRI(frame)) {
18116525Skarels /*
18216525Skarels * Save the overhead of a software interrupt;
18316525Skarels * it will happen as soon as we return, so do it now.
18416525Skarels */
18554791Storek (void)splsoftclock();
18654791Storek softclock();
18716525Skarels } else
18816525Skarels setsoftclock();
18916525Skarels }
1902442Swnj }
1912442Swnj
1928124Sroot /*
19354791Storek * Software (low priority) clock interrupt.
1948124Sroot * Run periodic events from timeout queue.
1958124Sroot */
1962609Swnj /*ARGSUSED*/
19754791Storek void
softclock()19854791Storek softclock()
1992442Swnj {
20054791Storek register struct callout *c;
20154791Storek register void *arg;
20254791Storek register void (*func) __P((void *));
20354791Storek register int s;
2042442Swnj
20554791Storek s = splhigh();
20654791Storek while ((c = calltodo.c_next) != NULL && c->c_time <= 0) {
20754791Storek func = c->c_func;
20854791Storek arg = c->c_arg;
20954791Storek calltodo.c_next = c->c_next;
21054791Storek c->c_next = callfree;
21154791Storek callfree = c;
2129157Ssam splx(s);
21354791Storek (*func)(arg);
21454791Storek (void) splhigh();
2152442Swnj }
21654791Storek splx(s);
2179Sbill }
2189Sbill
2199Sbill /*
22064428Sbostic * timeout --
22164428Sbostic * Execute a function after a specified length of time.
22264428Sbostic *
22364428Sbostic * untimeout --
22464428Sbostic * Cancel previous timeout function call.
22564428Sbostic *
22664428Sbostic * See AT&T BCI Driver Reference Manual for specification. This
22764428Sbostic * implementation differs from that one in that no identification
22864428Sbostic * value is returned from timeout, rather, the original arguments
22964428Sbostic * to timeout are used to identify entries for untimeout.
23012747Ssam */
23154791Storek void
23264428Sbostic timeout(ftn, arg, ticks)
23364428Sbostic void (*ftn) __P((void *));
23454791Storek void *arg;
23564428Sbostic register int ticks;
2369Sbill {
23764428Sbostic register struct callout *new, *p, *t;
23854791Storek register int s;
2399Sbill
24064428Sbostic if (ticks <= 0)
24164428Sbostic ticks = 1;
24264428Sbostic
24364428Sbostic /* Lock out the clock. */
24454791Storek s = splhigh();
24564428Sbostic
24664428Sbostic /* Fill in the next free callout structure. */
24764428Sbostic if (callfree == NULL)
24864428Sbostic panic("timeout table full");
24964428Sbostic new = callfree;
25064428Sbostic callfree = new->c_next;
25164428Sbostic new->c_arg = arg;
25264428Sbostic new->c_func = ftn;
25364428Sbostic
25464428Sbostic /*
25564428Sbostic * The time for each event is stored as a difference from the time
25664428Sbostic * of the previous event on the queue. Walk the queue, correcting
25764428Sbostic * the ticks argument for queue entries passed. Correct the ticks
25864428Sbostic * value for the queue entry immediately after the insertion point
25965142Storek * as well. Watch out for negative c_time values; these represent
26065142Storek * overdue events.
26164428Sbostic */
26264428Sbostic for (p = &calltodo;
26364428Sbostic (t = p->c_next) != NULL && ticks > t->c_time; p = t)
26465142Storek if (t->c_time > 0)
26565142Storek ticks -= t->c_time;
26664428Sbostic new->c_time = ticks;
26764428Sbostic if (t != NULL)
26864428Sbostic t->c_time -= ticks;
26964428Sbostic
27064428Sbostic /* Insert the new entry into the queue. */
27164428Sbostic p->c_next = new;
27264428Sbostic new->c_next = t;
2739Sbill splx(s);
2749Sbill }
2757305Ssam
27654791Storek void
27764428Sbostic untimeout(ftn, arg)
27864428Sbostic void (*ftn) __P((void *));
27954791Storek void *arg;
2807305Ssam {
28164428Sbostic register struct callout *p, *t;
2827305Ssam register int s;
2837305Ssam
28426265Skarels s = splhigh();
28564428Sbostic for (p = &calltodo; (t = p->c_next) != NULL; p = t)
28664428Sbostic if (t->c_func == ftn && t->c_arg == arg) {
28764428Sbostic /* Increment next entry's tick count. */
28864428Sbostic if (t->c_next && t->c_time > 0)
28964428Sbostic t->c_next->c_time += t->c_time;
29064428Sbostic
29164428Sbostic /* Move entry from callout queue to callfree queue. */
29264428Sbostic p->c_next = t->c_next;
29364428Sbostic t->c_next = callfree;
29464428Sbostic callfree = t;
2957305Ssam break;
2967305Ssam }
2977305Ssam splx(s);
2987305Ssam }
2998112Sroot
3008124Sroot /*
30164428Sbostic * Compute number of hz until specified time. Used to
30264428Sbostic * compute third argument to timeout() from an absolute time.
3038124Sroot */
30454791Storek int
hzto(tv)3058112Sroot hzto(tv)
3068112Sroot struct timeval *tv;
3078112Sroot {
30854791Storek register long ticks, sec;
30954791Storek int s;
3108112Sroot
3118124Sroot /*
3128124Sroot * If number of milliseconds will fit in 32 bit arithmetic,
3138124Sroot * then compute number of milliseconds to time and scale to
3148124Sroot * ticks. Otherwise just compute number of hz in time, rounding
3158124Sroot * times greater than representible to maximum value.
3168124Sroot *
3178124Sroot * Delta times less than 25 days can be computed ``exactly''.
3188124Sroot * Maximum value for any timeout in 10ms ticks is 250 days.
3198124Sroot */
32054791Storek s = splhigh();
3218124Sroot sec = tv->tv_sec - time.tv_sec;
3228124Sroot if (sec <= 0x7fffffff / 1000 - 1000)
3238124Sroot ticks = ((tv->tv_sec - time.tv_sec) * 1000 +
3248124Sroot (tv->tv_usec - time.tv_usec) / 1000) / (tick / 1000);
3258124Sroot else if (sec <= 0x7fffffff / hz)
3268124Sroot ticks = sec * hz;
3278124Sroot else
3288124Sroot ticks = 0x7fffffff;
3298112Sroot splx(s);
3308112Sroot return (ticks);
3318112Sroot }
33252668Smckusick
33352668Smckusick /*
33454791Storek * Start profiling on a process.
33554791Storek *
33654791Storek * Kernel profiling passes proc0 which never exits and hence
33754791Storek * keeps the profile clock running constantly.
33854791Storek */
33954791Storek void
startprofclock(p)34054791Storek startprofclock(p)
34154791Storek register struct proc *p;
34254791Storek {
34354791Storek int s;
34454791Storek
34564574Sbostic if ((p->p_flag & P_PROFIL) == 0) {
34664574Sbostic p->p_flag |= P_PROFIL;
34754791Storek if (++profprocs == 1 && stathz != 0) {
34854791Storek s = splstatclock();
34954791Storek psdiv = pscnt = psratio;
35054791Storek setstatclockrate(profhz);
35154791Storek splx(s);
35254791Storek }
35354791Storek }
35454791Storek }
35554791Storek
35654791Storek /*
35754791Storek * Stop profiling on a process.
35854791Storek */
35954791Storek void
stopprofclock(p)36054791Storek stopprofclock(p)
36154791Storek register struct proc *p;
36254791Storek {
36354791Storek int s;
36454791Storek
36564574Sbostic if (p->p_flag & P_PROFIL) {
36664574Sbostic p->p_flag &= ~P_PROFIL;
36754791Storek if (--profprocs == 0 && stathz != 0) {
36854791Storek s = splstatclock();
36954791Storek psdiv = pscnt = 1;
37054791Storek setstatclockrate(stathz);
37154791Storek splx(s);
37254791Storek }
37354791Storek }
37454791Storek }
37554791Storek
37654791Storek int dk_ndrive = DK_NDRIVE;
37754791Storek
37854791Storek /*
37954791Storek * Statistics clock. Grab profile sample, and if divider reaches 0,
38054791Storek * do process and kernel statistics.
38154791Storek */
38254791Storek void
statclock(frame)38354791Storek statclock(frame)
38454791Storek register struct clockframe *frame;
38554791Storek {
38654791Storek #ifdef GPROF
38754791Storek register struct gmonparam *g;
38854791Storek #endif
38954791Storek register struct proc *p;
39054791Storek register int i;
39154791Storek
39254791Storek if (CLKF_USERMODE(frame)) {
39354791Storek p = curproc;
39464574Sbostic if (p->p_flag & P_PROFIL)
39554791Storek addupc_intr(p, CLKF_PC(frame), 1);
39654791Storek if (--pscnt > 0)
39754791Storek return;
39854791Storek /*
39954791Storek * Came from user mode; CPU was in user state.
40054791Storek * If this process is being profiled record the tick.
40154791Storek */
40254791Storek p->p_uticks++;
40354791Storek if (p->p_nice > NZERO)
40454791Storek cp_time[CP_NICE]++;
40554791Storek else
40654791Storek cp_time[CP_USER]++;
40754791Storek } else {
40854791Storek #ifdef GPROF
40954791Storek /*
41054791Storek * Kernel statistics are just like addupc_intr, only easier.
41154791Storek */
41254791Storek g = &_gmonparam;
41354791Storek if (g->state == GMON_PROF_ON) {
41454791Storek i = CLKF_PC(frame) - g->lowpc;
41559204Smckusick if (i < g->textsize) {
41659204Smckusick i /= HISTFRACTION * sizeof(*g->kcount);
41759204Smckusick g->kcount[i]++;
41859204Smckusick }
41954791Storek }
42054791Storek #endif
42154791Storek if (--pscnt > 0)
42254791Storek return;
42354791Storek /*
42454791Storek * Came from kernel mode, so we were:
42554791Storek * - handling an interrupt,
42654791Storek * - doing syscall or trap work on behalf of the current
42754791Storek * user process, or
42854791Storek * - spinning in the idle loop.
42954791Storek * Whichever it is, charge the time as appropriate.
43054791Storek * Note that we charge interrupts to the current process,
43154791Storek * regardless of whether they are ``for'' that process,
43254791Storek * so that we know how much of its real time was spent
43354791Storek * in ``non-process'' (i.e., interrupt) work.
43454791Storek */
43554791Storek p = curproc;
43654791Storek if (CLKF_INTR(frame)) {
43754791Storek if (p != NULL)
43854791Storek p->p_iticks++;
43954791Storek cp_time[CP_INTR]++;
44054791Storek } else if (p != NULL) {
44154791Storek p->p_sticks++;
44254791Storek cp_time[CP_SYS]++;
44354791Storek } else
44454791Storek cp_time[CP_IDLE]++;
44554791Storek }
44654791Storek pscnt = psdiv;
44754791Storek
44854791Storek /*
44954791Storek * We maintain statistics shown by user-level statistics
45054791Storek * programs: the amount of time in each cpu state, and
45154791Storek * the amount of time each of DK_NDRIVE ``drives'' is busy.
45254791Storek *
45354791Storek * XXX should either run linked list of drives, or (better)
45454791Storek * grab timestamps in the start & done code.
45554791Storek */
45654791Storek for (i = 0; i < DK_NDRIVE; i++)
45754791Storek if (dk_busy & (1 << i))
45854791Storek dk_time[i]++;
45954791Storek
46054791Storek /*
46164574Sbostic * We adjust the priority of the current process. The priority of
46264574Sbostic * a process gets worse as it accumulates CPU time. The cpu usage
46364574Sbostic * estimator (p_estcpu) is increased here. The formula for computing
46464574Sbostic * priorities (in kern_synch.c) will compute a different value each
46564574Sbostic * time p_estcpu increases by 4. The cpu usage estimator ramps up
46664574Sbostic * quite quickly when the process is running (linearly), and decays
46764574Sbostic * away exponentially, at a rate which is proportionally slower when
46864574Sbostic * the system is busy. The basic principal is that the system will
46964574Sbostic * 90% forget that the process used a lot of CPU time in 5 * loadav
47064574Sbostic * seconds. This causes the system to favor processes which haven't
47164574Sbostic * run much recently, and to round-robin among other processes.
47254791Storek */
47354791Storek if (p != NULL) {
47454791Storek p->p_cpticks++;
47564574Sbostic if (++p->p_estcpu == 0)
47664574Sbostic p->p_estcpu--;
47764574Sbostic if ((p->p_estcpu & 3) == 0) {
47864428Sbostic resetpriority(p);
47964574Sbostic if (p->p_priority >= PUSER)
48064574Sbostic p->p_priority = p->p_usrpri;
48154791Storek }
48254791Storek }
48354791Storek }
48454791Storek
48554791Storek /*
48652668Smckusick * Return information about system clocks.
48752668Smckusick */
sysctl_clockrate(where,sizep)48857840Smckusick sysctl_clockrate(where, sizep)
48952668Smckusick register char *where;
49058464Sbostic size_t *sizep;
49152668Smckusick {
49257840Smckusick struct clockinfo clkinfo;
49352668Smckusick
49452668Smckusick /*
49557840Smckusick * Construct clockinfo structure.
49652668Smckusick */
49757840Smckusick clkinfo.hz = hz;
49857840Smckusick clkinfo.tick = tick;
49957840Smckusick clkinfo.profhz = profhz;
50057840Smckusick clkinfo.stathz = stathz ? stathz : hz;
50157840Smckusick return (sysctl_rdstruct(where, sizep, NULL, &clkinfo, sizeof(clkinfo)));
50252668Smckusick }
503