149594Sbostic /*- 263170Sbostic * Copyright (c) 1982, 1986, 1991, 1993 363170Sbostic * The Regents of the University of California. All rights reserved. 423366Smckusick * 549594Sbostic * %sccs.include.redist.c% 649594Sbostic * 7*65142Storek * @(#)kern_clock.c 8.4 (Berkeley) 12/14/93 823366Smckusick */ 99Sbill 1056517Sbostic #include <sys/param.h> 1156517Sbostic #include <sys/systm.h> 1256517Sbostic #include <sys/dkstat.h> 1356517Sbostic #include <sys/callout.h> 1456517Sbostic #include <sys/kernel.h> 1556517Sbostic #include <sys/proc.h> 1656517Sbostic #include <sys/resourcevar.h> 179Sbill 1856517Sbostic #include <machine/cpu.h> 1935406Skarels 2010291Smckusick #ifdef GPROF 2156517Sbostic #include <sys/gmon.h> 2210291Smckusick #endif 2310291Smckusick 248124Sroot /* 258124Sroot * Clock handling routines. 268124Sroot * 2754791Storek * This code is written to operate with two timers that run independently of 2854791Storek * each other. The main clock, running hz times per second, is used to keep 2954791Storek * track of real time. The second timer handles kernel and user profiling, 3054791Storek * and does resource use estimation. If the second timer is programmable, 3154791Storek * it is randomized to avoid aliasing between the two clocks. For example, 3254791Storek * the randomization prevents an adversary from always giving up the cpu 3354791Storek * just before its quantum expires. Otherwise, it would never accumulate 3454791Storek * cpu ticks. The mean frequency of the second timer is stathz. 3554791Storek * 3654791Storek * If no second timer exists, stathz will be zero; in this case we drive 3754791Storek * profiling and statistics off the main clock. This WILL NOT be accurate; 3854791Storek * do not do it unless absolutely necessary. 3954791Storek * 4054791Storek * The statistics clock may (or may not) be run at a higher rate while 4154791Storek * profiling. This profile clock runs at profhz. We require that profhz 4254791Storek * be an integral multiple of stathz. 4354791Storek * 4454791Storek * If the statistics clock is running fast, it must be divided by the ratio 4554791Storek * profhz/stathz for statistics. (For profiling, every tick counts.) 468124Sroot */ 471559Sbill 488124Sroot /* 498124Sroot * TODO: 5012747Ssam * allocate more timeout table slots when table overflows. 518124Sroot */ 5226265Skarels 5317007Smckusick /* 5417007Smckusick * Bump a timeval by a small number of usec's. 5517007Smckusick */ 5617007Smckusick #define BUMPTIME(t, usec) { \ 5754791Storek register volatile struct timeval *tp = (t); \ 5854791Storek register long us; \ 5917007Smckusick \ 6054791Storek tp->tv_usec = us = tp->tv_usec + (usec); \ 6154791Storek if (us >= 1000000) { \ 6254791Storek tp->tv_usec = us - 1000000; \ 6317007Smckusick tp->tv_sec++; \ 6417007Smckusick } \ 6517007Smckusick } 6617007Smckusick 6754124Smckusick int stathz; 6853011Ssklower int profhz; 6954138Smckusick int profprocs; 7056338Ssklower int ticks; 7156317Shibler static int psdiv, pscnt; /* prof => stat divider */ 7256855Storek int psratio; /* ratio: prof / stat */ 7354791Storek 7454791Storek volatile struct timeval time; 7554791Storek volatile struct timeval mono_time; 7654791Storek 778124Sroot /* 7854791Storek * Initialize clock frequencies and start both clocks running. 798124Sroot */ 8054791Storek void 8154791Storek initclocks() 8254791Storek { 8354791Storek register int i; 8454791Storek 8554791Storek /* 8654791Storek * Set divisors to 1 (normal case) and let the machine-specific 8754791Storek * code do its bit. 8854791Storek */ 8954791Storek psdiv = pscnt = 1; 9054791Storek cpu_initclocks(); 9154791Storek 9254791Storek /* 9354791Storek * Compute profhz/stathz, and fix profhz if needed. 9454791Storek */ 9554791Storek i = stathz ? stathz : hz; 9654791Storek if (profhz == 0) 9754791Storek profhz = i; 9854791Storek psratio = profhz / i; 9954791Storek } 10054791Storek 10154791Storek /* 10254791Storek * The real-time timer, interrupting hz times per second. 10354791Storek */ 10454791Storek void 10544774Swilliam hardclock(frame) 10654791Storek register struct clockframe *frame; 1079Sbill { 1082768Swnj register struct callout *p1; 10954791Storek register struct proc *p; 11055294Storek register int delta, needsoft; 11128947Skarels extern int tickdelta; 11228947Skarels extern long timedelta; 1139Sbill 1148124Sroot /* 1158124Sroot * Update real-time timeout queue. 1168124Sroot * At front of queue are some number of events which are ``due''. 1178124Sroot * The time to these is <= 0 and if negative represents the 1188124Sroot * number of ticks which have passed since it was supposed to happen. 1198124Sroot * The rest of the q elements (times > 0) are events yet to happen, 1208124Sroot * where the time for each is given as a delta from the previous. 1218124Sroot * Decrementing just the first of these serves to decrement the time 1228124Sroot * to all events. 1238124Sroot */ 12454791Storek needsoft = 0; 12554791Storek for (p1 = calltodo.c_next; p1 != NULL; p1 = p1->c_next) { 12612747Ssam if (--p1->c_time > 0) 12712747Ssam break; 12816172Skarels needsoft = 1; 12912747Ssam if (p1->c_time == 0) 13012747Ssam break; 13112747Ssam } 132138Sbill 13354791Storek p = curproc; 13454791Storek if (p) { 13554791Storek register struct pstats *pstats; 13654791Storek 1378124Sroot /* 13854791Storek * Run current process's virtual and profile time, as needed. 1398124Sroot */ 14054791Storek pstats = p->p_stats; 14154791Storek if (CLKF_USERMODE(frame) && 14254791Storek timerisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value) && 14347546Skarels itimerdecr(&pstats->p_timer[ITIMER_VIRTUAL], tick) == 0) 14440674Smarc psignal(p, SIGVTALRM); 14547546Skarels if (timerisset(&pstats->p_timer[ITIMER_PROF].it_value) && 14647546Skarels itimerdecr(&pstats->p_timer[ITIMER_PROF], tick) == 0) 14740674Smarc psignal(p, SIGPROF); 1489Sbill } 1498124Sroot 1508124Sroot /* 15154791Storek * If no separate statistics clock is available, run it from here. 15211392Ssam */ 15354124Smckusick if (stathz == 0) 15454791Storek statclock(frame); 15511392Ssam 15611392Ssam /* 15755294Storek * Increment the time-of-day. The increment is just ``tick'' unless 15855294Storek * we are still adjusting the clock; see adjtime(). 1598124Sroot */ 16056338Ssklower ticks++; 16155294Storek if (timedelta == 0) 16255294Storek delta = tick; 16355294Storek else { 16455294Storek delta = tick + tickdelta; 16555294Storek timedelta -= tickdelta; 16617356Skarels } 16755294Storek BUMPTIME(&time, delta); 16855294Storek BUMPTIME(&mono_time, delta); 16954791Storek 17054791Storek /* 17154791Storek * Process callouts at a very low cpu priority, so we don't keep the 17254791Storek * relatively high clock interrupt priority any longer than necessary. 17354791Storek */ 17416525Skarels if (needsoft) { 17554791Storek if (CLKF_BASEPRI(frame)) { 17616525Skarels /* 17716525Skarels * Save the overhead of a software interrupt; 17816525Skarels * it will happen as soon as we return, so do it now. 17916525Skarels */ 18054791Storek (void)splsoftclock(); 18154791Storek softclock(); 18216525Skarels } else 18316525Skarels setsoftclock(); 18416525Skarels } 1852442Swnj } 1862442Swnj 1878124Sroot /* 18854791Storek * Software (low priority) clock interrupt. 1898124Sroot * Run periodic events from timeout queue. 1908124Sroot */ 1912609Swnj /*ARGSUSED*/ 19254791Storek void 19354791Storek softclock() 1942442Swnj { 19554791Storek register struct callout *c; 19654791Storek register void *arg; 19754791Storek register void (*func) __P((void *)); 19854791Storek register int s; 1992442Swnj 20054791Storek s = splhigh(); 20154791Storek while ((c = calltodo.c_next) != NULL && c->c_time <= 0) { 20254791Storek func = c->c_func; 20354791Storek arg = c->c_arg; 20454791Storek calltodo.c_next = c->c_next; 20554791Storek c->c_next = callfree; 20654791Storek callfree = c; 2079157Ssam splx(s); 20854791Storek (*func)(arg); 20954791Storek (void) splhigh(); 2102442Swnj } 21154791Storek splx(s); 2129Sbill } 2139Sbill 2149Sbill /* 21564428Sbostic * timeout -- 21664428Sbostic * Execute a function after a specified length of time. 21764428Sbostic * 21864428Sbostic * untimeout -- 21964428Sbostic * Cancel previous timeout function call. 22064428Sbostic * 22164428Sbostic * See AT&T BCI Driver Reference Manual for specification. This 22264428Sbostic * implementation differs from that one in that no identification 22364428Sbostic * value is returned from timeout, rather, the original arguments 22464428Sbostic * to timeout are used to identify entries for untimeout. 22512747Ssam */ 22654791Storek void 22764428Sbostic timeout(ftn, arg, ticks) 22864428Sbostic void (*ftn) __P((void *)); 22954791Storek void *arg; 23064428Sbostic register int ticks; 2319Sbill { 23264428Sbostic register struct callout *new, *p, *t; 23354791Storek register int s; 2349Sbill 23564428Sbostic if (ticks <= 0) 23664428Sbostic ticks = 1; 23764428Sbostic 23864428Sbostic /* Lock out the clock. */ 23954791Storek s = splhigh(); 24064428Sbostic 24164428Sbostic /* Fill in the next free callout structure. */ 24264428Sbostic if (callfree == NULL) 24364428Sbostic panic("timeout table full"); 24464428Sbostic new = callfree; 24564428Sbostic callfree = new->c_next; 24664428Sbostic new->c_arg = arg; 24764428Sbostic new->c_func = ftn; 24864428Sbostic 24964428Sbostic /* 25064428Sbostic * The time for each event is stored as a difference from the time 25164428Sbostic * of the previous event on the queue. Walk the queue, correcting 25264428Sbostic * the ticks argument for queue entries passed. Correct the ticks 25364428Sbostic * value for the queue entry immediately after the insertion point 254*65142Storek * as well. Watch out for negative c_time values; these represent 255*65142Storek * overdue events. 25664428Sbostic */ 25764428Sbostic for (p = &calltodo; 25864428Sbostic (t = p->c_next) != NULL && ticks > t->c_time; p = t) 259*65142Storek if (t->c_time > 0) 260*65142Storek ticks -= t->c_time; 26164428Sbostic new->c_time = ticks; 26264428Sbostic if (t != NULL) 26364428Sbostic t->c_time -= ticks; 26464428Sbostic 26564428Sbostic /* Insert the new entry into the queue. */ 26664428Sbostic p->c_next = new; 26764428Sbostic new->c_next = t; 2689Sbill splx(s); 2699Sbill } 2707305Ssam 27154791Storek void 27264428Sbostic untimeout(ftn, arg) 27364428Sbostic void (*ftn) __P((void *)); 27454791Storek void *arg; 2757305Ssam { 27664428Sbostic register struct callout *p, *t; 2777305Ssam register int s; 2787305Ssam 27926265Skarels s = splhigh(); 28064428Sbostic for (p = &calltodo; (t = p->c_next) != NULL; p = t) 28164428Sbostic if (t->c_func == ftn && t->c_arg == arg) { 28264428Sbostic /* Increment next entry's tick count. */ 28364428Sbostic if (t->c_next && t->c_time > 0) 28464428Sbostic t->c_next->c_time += t->c_time; 28564428Sbostic 28664428Sbostic /* Move entry from callout queue to callfree queue. */ 28764428Sbostic p->c_next = t->c_next; 28864428Sbostic t->c_next = callfree; 28964428Sbostic callfree = t; 2907305Ssam break; 2917305Ssam } 2927305Ssam splx(s); 2937305Ssam } 2948112Sroot 2958124Sroot /* 29664428Sbostic * Compute number of hz until specified time. Used to 29764428Sbostic * compute third argument to timeout() from an absolute time. 2988124Sroot */ 29954791Storek int 3008112Sroot hzto(tv) 3018112Sroot struct timeval *tv; 3028112Sroot { 30354791Storek register long ticks, sec; 30454791Storek int s; 3058112Sroot 3068124Sroot /* 3078124Sroot * If number of milliseconds will fit in 32 bit arithmetic, 3088124Sroot * then compute number of milliseconds to time and scale to 3098124Sroot * ticks. Otherwise just compute number of hz in time, rounding 3108124Sroot * times greater than representible to maximum value. 3118124Sroot * 3128124Sroot * Delta times less than 25 days can be computed ``exactly''. 3138124Sroot * Maximum value for any timeout in 10ms ticks is 250 days. 3148124Sroot */ 31554791Storek s = splhigh(); 3168124Sroot sec = tv->tv_sec - time.tv_sec; 3178124Sroot if (sec <= 0x7fffffff / 1000 - 1000) 3188124Sroot ticks = ((tv->tv_sec - time.tv_sec) * 1000 + 3198124Sroot (tv->tv_usec - time.tv_usec) / 1000) / (tick / 1000); 3208124Sroot else if (sec <= 0x7fffffff / hz) 3218124Sroot ticks = sec * hz; 3228124Sroot else 3238124Sroot ticks = 0x7fffffff; 3248112Sroot splx(s); 3258112Sroot return (ticks); 3268112Sroot } 32752668Smckusick 32852668Smckusick /* 32954791Storek * Start profiling on a process. 33054791Storek * 33154791Storek * Kernel profiling passes proc0 which never exits and hence 33254791Storek * keeps the profile clock running constantly. 33354791Storek */ 33454791Storek void 33554791Storek startprofclock(p) 33654791Storek register struct proc *p; 33754791Storek { 33854791Storek int s; 33954791Storek 34064574Sbostic if ((p->p_flag & P_PROFIL) == 0) { 34164574Sbostic p->p_flag |= P_PROFIL; 34254791Storek if (++profprocs == 1 && stathz != 0) { 34354791Storek s = splstatclock(); 34454791Storek psdiv = pscnt = psratio; 34554791Storek setstatclockrate(profhz); 34654791Storek splx(s); 34754791Storek } 34854791Storek } 34954791Storek } 35054791Storek 35154791Storek /* 35254791Storek * Stop profiling on a process. 35354791Storek */ 35454791Storek void 35554791Storek stopprofclock(p) 35654791Storek register struct proc *p; 35754791Storek { 35854791Storek int s; 35954791Storek 36064574Sbostic if (p->p_flag & P_PROFIL) { 36164574Sbostic p->p_flag &= ~P_PROFIL; 36254791Storek if (--profprocs == 0 && stathz != 0) { 36354791Storek s = splstatclock(); 36454791Storek psdiv = pscnt = 1; 36554791Storek setstatclockrate(stathz); 36654791Storek splx(s); 36754791Storek } 36854791Storek } 36954791Storek } 37054791Storek 37154791Storek int dk_ndrive = DK_NDRIVE; 37254791Storek 37354791Storek /* 37454791Storek * Statistics clock. Grab profile sample, and if divider reaches 0, 37554791Storek * do process and kernel statistics. 37654791Storek */ 37754791Storek void 37854791Storek statclock(frame) 37954791Storek register struct clockframe *frame; 38054791Storek { 38154791Storek #ifdef GPROF 38254791Storek register struct gmonparam *g; 38354791Storek #endif 38454791Storek register struct proc *p; 38554791Storek register int i; 38654791Storek 38754791Storek if (CLKF_USERMODE(frame)) { 38854791Storek p = curproc; 38964574Sbostic if (p->p_flag & P_PROFIL) 39054791Storek addupc_intr(p, CLKF_PC(frame), 1); 39154791Storek if (--pscnt > 0) 39254791Storek return; 39354791Storek /* 39454791Storek * Came from user mode; CPU was in user state. 39554791Storek * If this process is being profiled record the tick. 39654791Storek */ 39754791Storek p->p_uticks++; 39854791Storek if (p->p_nice > NZERO) 39954791Storek cp_time[CP_NICE]++; 40054791Storek else 40154791Storek cp_time[CP_USER]++; 40254791Storek } else { 40354791Storek #ifdef GPROF 40454791Storek /* 40554791Storek * Kernel statistics are just like addupc_intr, only easier. 40654791Storek */ 40754791Storek g = &_gmonparam; 40854791Storek if (g->state == GMON_PROF_ON) { 40954791Storek i = CLKF_PC(frame) - g->lowpc; 41059204Smckusick if (i < g->textsize) { 41159204Smckusick i /= HISTFRACTION * sizeof(*g->kcount); 41259204Smckusick g->kcount[i]++; 41359204Smckusick } 41454791Storek } 41554791Storek #endif 41654791Storek if (--pscnt > 0) 41754791Storek return; 41854791Storek /* 41954791Storek * Came from kernel mode, so we were: 42054791Storek * - handling an interrupt, 42154791Storek * - doing syscall or trap work on behalf of the current 42254791Storek * user process, or 42354791Storek * - spinning in the idle loop. 42454791Storek * Whichever it is, charge the time as appropriate. 42554791Storek * Note that we charge interrupts to the current process, 42654791Storek * regardless of whether they are ``for'' that process, 42754791Storek * so that we know how much of its real time was spent 42854791Storek * in ``non-process'' (i.e., interrupt) work. 42954791Storek */ 43054791Storek p = curproc; 43154791Storek if (CLKF_INTR(frame)) { 43254791Storek if (p != NULL) 43354791Storek p->p_iticks++; 43454791Storek cp_time[CP_INTR]++; 43554791Storek } else if (p != NULL) { 43654791Storek p->p_sticks++; 43754791Storek cp_time[CP_SYS]++; 43854791Storek } else 43954791Storek cp_time[CP_IDLE]++; 44054791Storek } 44154791Storek pscnt = psdiv; 44254791Storek 44354791Storek /* 44454791Storek * We maintain statistics shown by user-level statistics 44554791Storek * programs: the amount of time in each cpu state, and 44654791Storek * the amount of time each of DK_NDRIVE ``drives'' is busy. 44754791Storek * 44854791Storek * XXX should either run linked list of drives, or (better) 44954791Storek * grab timestamps in the start & done code. 45054791Storek */ 45154791Storek for (i = 0; i < DK_NDRIVE; i++) 45254791Storek if (dk_busy & (1 << i)) 45354791Storek dk_time[i]++; 45454791Storek 45554791Storek /* 45664574Sbostic * We adjust the priority of the current process. The priority of 45764574Sbostic * a process gets worse as it accumulates CPU time. The cpu usage 45864574Sbostic * estimator (p_estcpu) is increased here. The formula for computing 45964574Sbostic * priorities (in kern_synch.c) will compute a different value each 46064574Sbostic * time p_estcpu increases by 4. The cpu usage estimator ramps up 46164574Sbostic * quite quickly when the process is running (linearly), and decays 46264574Sbostic * away exponentially, at a rate which is proportionally slower when 46364574Sbostic * the system is busy. The basic principal is that the system will 46464574Sbostic * 90% forget that the process used a lot of CPU time in 5 * loadav 46564574Sbostic * seconds. This causes the system to favor processes which haven't 46664574Sbostic * run much recently, and to round-robin among other processes. 46754791Storek */ 46854791Storek if (p != NULL) { 46954791Storek p->p_cpticks++; 47064574Sbostic if (++p->p_estcpu == 0) 47164574Sbostic p->p_estcpu--; 47264574Sbostic if ((p->p_estcpu & 3) == 0) { 47364428Sbostic resetpriority(p); 47464574Sbostic if (p->p_priority >= PUSER) 47564574Sbostic p->p_priority = p->p_usrpri; 47654791Storek } 47754791Storek } 47854791Storek } 47954791Storek 48054791Storek /* 48152668Smckusick * Return information about system clocks. 48252668Smckusick */ 48357840Smckusick sysctl_clockrate(where, sizep) 48452668Smckusick register char *where; 48558464Sbostic size_t *sizep; 48652668Smckusick { 48757840Smckusick struct clockinfo clkinfo; 48852668Smckusick 48952668Smckusick /* 49057840Smckusick * Construct clockinfo structure. 49152668Smckusick */ 49257840Smckusick clkinfo.hz = hz; 49357840Smckusick clkinfo.tick = tick; 49457840Smckusick clkinfo.profhz = profhz; 49557840Smckusick clkinfo.stathz = stathz ? stathz : hz; 49657840Smckusick return (sysctl_rdstruct(where, sizep, NULL, &clkinfo, sizeof(clkinfo))); 49752668Smckusick } 498