149594Sbostic /*- 249594Sbostic * Copyright (c) 1982, 1986, 1991 The Regents of the University of California. 349594Sbostic * All rights reserved. 423366Smckusick * 549594Sbostic * %sccs.include.redist.c% 649594Sbostic * 7*54886Storek * @(#)kern_clock.c 7.23 (Berkeley) 07/10/92 823366Smckusick */ 99Sbill 1017088Sbloom #include "param.h" 1117088Sbloom #include "systm.h" 1229946Skarels #include "dkstat.h" 1317088Sbloom #include "callout.h" 1417088Sbloom #include "kernel.h" 1517088Sbloom #include "proc.h" 1648979Skarels #include "resourcevar.h" 179Sbill 1847546Skarels #include "machine/cpu.h" 1935406Skarels 2010291Smckusick #ifdef GPROF 2154791Storek #include "gmon.h" 2254791Storek extern u_short *kcount; 2310291Smckusick #endif 2410291Smckusick 258124Sroot /* 268124Sroot * Clock handling routines. 278124Sroot * 2854791Storek * This code is written to operate with two timers that run independently of 2954791Storek * each other. The main clock, running hz times per second, is used to keep 3054791Storek * track of real time. The second timer handles kernel and user profiling, 3154791Storek * and does resource use estimation. If the second timer is programmable, 3254791Storek * it is randomized to avoid aliasing between the two clocks. For example, 3354791Storek * the randomization prevents an adversary from always giving up the cpu 3454791Storek * just before its quantum expires. Otherwise, it would never accumulate 3554791Storek * cpu ticks. The mean frequency of the second timer is stathz. 3654791Storek * 3754791Storek * If no second timer exists, stathz will be zero; in this case we drive 3854791Storek * profiling and statistics off the main clock. This WILL NOT be accurate; 3954791Storek * do not do it unless absolutely necessary. 4054791Storek * 4154791Storek * The statistics clock may (or may not) be run at a higher rate while 4254791Storek * profiling. This profile clock runs at profhz. We require that profhz 4354791Storek * be an integral multiple of stathz. 4454791Storek * 4554791Storek * If the statistics clock is running fast, it must be divided by the ratio 4654791Storek * profhz/stathz for statistics. (For profiling, every tick counts.) 478124Sroot */ 481559Sbill 498124Sroot /* 508124Sroot * TODO: 5112747Ssam * allocate more timeout table slots when table overflows. 528124Sroot */ 5326265Skarels 5417007Smckusick /* 5517007Smckusick * Bump a timeval by a small number of usec's. 5617007Smckusick */ 5717007Smckusick #define BUMPTIME(t, usec) { \ 5854791Storek register volatile struct timeval *tp = (t); \ 5954791Storek register long us; \ 6017007Smckusick \ 6154791Storek tp->tv_usec = us = tp->tv_usec + (usec); \ 6254791Storek if (us >= 1000000) { \ 6354791Storek tp->tv_usec = us - 1000000; \ 6417007Smckusick tp->tv_sec++; \ 6517007Smckusick } \ 6617007Smckusick } 6717007Smckusick 6854124Smckusick int stathz; 6953011Ssklower int profhz; 7054138Smckusick int profprocs; 7154791Storek static int psratio, psdiv, pscnt; /* prof => stat divider */ 7254791Storek 7354791Storek volatile struct timeval time; 7454791Storek volatile struct timeval mono_time; 7554791Storek 768124Sroot /* 7754791Storek * Initialize clock frequencies and start both clocks running. 788124Sroot */ 7954791Storek void 8054791Storek initclocks() 8154791Storek { 8254791Storek register int i; 8354791Storek 8454791Storek /* 8554791Storek * Set divisors to 1 (normal case) and let the machine-specific 8654791Storek * code do its bit. 8754791Storek */ 8854791Storek psdiv = pscnt = 1; 8954791Storek cpu_initclocks(); 9054791Storek 9154791Storek /* 9254791Storek * Compute profhz/stathz, and fix profhz if needed. 9354791Storek */ 9454791Storek i = stathz ? stathz : hz; 9554791Storek if (profhz == 0) 9654791Storek profhz = i; 9754791Storek psratio = profhz / i; 9854791Storek } 9954791Storek 10054791Storek /* 10154791Storek * The real-time timer, interrupting hz times per second. 10254791Storek */ 10354791Storek void 10444774Swilliam hardclock(frame) 10554791Storek register struct clockframe *frame; 1069Sbill { 1072768Swnj register struct callout *p1; 10854791Storek register struct proc *p; 10954791Storek register int needsoft; 11028947Skarels extern int tickdelta; 11128947Skarels extern long timedelta; 1129Sbill 1138124Sroot /* 1148124Sroot * Update real-time timeout queue. 1158124Sroot * At front of queue are some number of events which are ``due''. 1168124Sroot * The time to these is <= 0 and if negative represents the 1178124Sroot * number of ticks which have passed since it was supposed to happen. 1188124Sroot * The rest of the q elements (times > 0) are events yet to happen, 1198124Sroot * where the time for each is given as a delta from the previous. 1208124Sroot * Decrementing just the first of these serves to decrement the time 1218124Sroot * to all events. 1228124Sroot */ 12354791Storek needsoft = 0; 12454791Storek for (p1 = calltodo.c_next; p1 != NULL; p1 = p1->c_next) { 12512747Ssam if (--p1->c_time > 0) 12612747Ssam break; 12716172Skarels needsoft = 1; 12812747Ssam if (p1->c_time == 0) 12912747Ssam break; 13012747Ssam } 131138Sbill 13254791Storek p = curproc; 13354791Storek if (p) { 13454791Storek register struct pstats *pstats; 13554791Storek 1368124Sroot /* 13754791Storek * Run current process's virtual and profile time, as needed. 1388124Sroot */ 13954791Storek pstats = p->p_stats; 14054791Storek if (CLKF_USERMODE(frame) && 14154791Storek timerisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value) && 14247546Skarels itimerdecr(&pstats->p_timer[ITIMER_VIRTUAL], tick) == 0) 14340674Smarc psignal(p, SIGVTALRM); 14447546Skarels if (timerisset(&pstats->p_timer[ITIMER_PROF].it_value) && 14547546Skarels itimerdecr(&pstats->p_timer[ITIMER_PROF], tick) == 0) 14640674Smarc psignal(p, SIGPROF); 1479Sbill } 1488124Sroot 1498124Sroot /* 15054791Storek * If no separate statistics clock is available, run it from here. 15111392Ssam */ 15254124Smckusick if (stathz == 0) 15354791Storek statclock(frame); 15411392Ssam 15511392Ssam /* 15654791Storek * Increment the time-of-day. 1578124Sroot */ 15853011Ssklower if (timedelta == 0) { 15954791Storek BUMPTIME(&time, tick); 16054791Storek BUMPTIME(&mono_time, tick); 16153011Ssklower } else { 16254791Storek register int delta; 16317356Skarels 16428828Skarels if (timedelta < 0) { 16528828Skarels delta = tick - tickdelta; 16628828Skarels timedelta += tickdelta; 16717356Skarels } else { 16828828Skarels delta = tick + tickdelta; 16928828Skarels timedelta -= tickdelta; 17017356Skarels } 17117356Skarels BUMPTIME(&time, delta); 17254791Storek BUMPTIME(&mono_time, delta); 17317356Skarels } 17454791Storek 17554791Storek /* 17654791Storek * Process callouts at a very low cpu priority, so we don't keep the 17754791Storek * relatively high clock interrupt priority any longer than necessary. 17854791Storek */ 17916525Skarels if (needsoft) { 18054791Storek if (CLKF_BASEPRI(frame)) { 18116525Skarels /* 18216525Skarels * Save the overhead of a software interrupt; 18316525Skarels * it will happen as soon as we return, so do it now. 18416525Skarels */ 18554791Storek (void)splsoftclock(); 18654791Storek softclock(); 18716525Skarels } else 18816525Skarels setsoftclock(); 18916525Skarels } 1902442Swnj } 1912442Swnj 1928124Sroot /* 19354791Storek * Software (low priority) clock interrupt. 1948124Sroot * Run periodic events from timeout queue. 1958124Sroot */ 1962609Swnj /*ARGSUSED*/ 19754791Storek void 19854791Storek softclock() 1992442Swnj { 20054791Storek register struct callout *c; 20154791Storek register void *arg; 20254791Storek register void (*func) __P((void *)); 20354791Storek register int s; 2042442Swnj 20554791Storek s = splhigh(); 20654791Storek while ((c = calltodo.c_next) != NULL && c->c_time <= 0) { 20754791Storek func = c->c_func; 20854791Storek arg = c->c_arg; 20954791Storek calltodo.c_next = c->c_next; 21054791Storek c->c_next = callfree; 21154791Storek callfree = c; 2129157Ssam splx(s); 21354791Storek (*func)(arg); 21454791Storek (void) splhigh(); 2152442Swnj } 21654791Storek splx(s); 2179Sbill } 2189Sbill 2199Sbill /* 22047546Skarels * Arrange that (*func)(arg) is called in t/hz seconds. 22112747Ssam */ 22254791Storek void 22347546Skarels timeout(func, arg, t) 22454791Storek void (*func) __P((void *)); 22554791Storek void *arg; 22612747Ssam register int t; 2279Sbill { 2283542Swnj register struct callout *p1, *p2, *pnew; 22954791Storek register int s; 2309Sbill 23154791Storek s = splhigh(); 23218282Smckusick if (t <= 0) 23312747Ssam t = 1; 2343542Swnj pnew = callfree; 2353542Swnj if (pnew == NULL) 2363542Swnj panic("timeout table overflow"); 2373542Swnj callfree = pnew->c_next; 2383542Swnj pnew->c_arg = arg; 23947546Skarels pnew->c_func = func; 2403542Swnj for (p1 = &calltodo; (p2 = p1->c_next) && p2->c_time < t; p1 = p2) 2419742Ssam if (p2->c_time > 0) 2429742Ssam t -= p2->c_time; 2433542Swnj p1->c_next = pnew; 2443542Swnj pnew->c_next = p2; 2453542Swnj pnew->c_time = t; 2463542Swnj if (p2) 2473542Swnj p2->c_time -= t; 2489Sbill splx(s); 2499Sbill } 2507305Ssam 2517305Ssam /* 2527305Ssam * untimeout is called to remove a function timeout call 2537305Ssam * from the callout structure. 2547305Ssam */ 25554791Storek void 25647546Skarels untimeout(func, arg) 25754791Storek void (*func) __P((void *)); 25854791Storek void *arg; 2597305Ssam { 2607305Ssam register struct callout *p1, *p2; 2617305Ssam register int s; 2627305Ssam 26326265Skarels s = splhigh(); 26454791Storek for (p1 = &calltodo; (p2 = p1->c_next) != NULL; p1 = p2) { 26547546Skarels if (p2->c_func == func && p2->c_arg == arg) { 2668112Sroot if (p2->c_next && p2->c_time > 0) 2677305Ssam p2->c_next->c_time += p2->c_time; 2687305Ssam p1->c_next = p2->c_next; 2697305Ssam p2->c_next = callfree; 2707305Ssam callfree = p2; 2717305Ssam break; 2727305Ssam } 2737305Ssam } 2747305Ssam splx(s); 2757305Ssam } 2768112Sroot 2778124Sroot /* 2788124Sroot * Compute number of hz until specified time. 2798124Sroot * Used to compute third argument to timeout() from an 2808124Sroot * absolute time. 2818124Sroot */ 28254791Storek int 2838112Sroot hzto(tv) 2848112Sroot struct timeval *tv; 2858112Sroot { 28654791Storek register long ticks, sec; 28754791Storek int s; 2888112Sroot 2898124Sroot /* 2908124Sroot * If number of milliseconds will fit in 32 bit arithmetic, 2918124Sroot * then compute number of milliseconds to time and scale to 2928124Sroot * ticks. Otherwise just compute number of hz in time, rounding 2938124Sroot * times greater than representible to maximum value. 2948124Sroot * 2958124Sroot * Delta times less than 25 days can be computed ``exactly''. 2968124Sroot * Maximum value for any timeout in 10ms ticks is 250 days. 2978124Sroot */ 29854791Storek s = splhigh(); 2998124Sroot sec = tv->tv_sec - time.tv_sec; 3008124Sroot if (sec <= 0x7fffffff / 1000 - 1000) 3018124Sroot ticks = ((tv->tv_sec - time.tv_sec) * 1000 + 3028124Sroot (tv->tv_usec - time.tv_usec) / 1000) / (tick / 1000); 3038124Sroot else if (sec <= 0x7fffffff / hz) 3048124Sroot ticks = sec * hz; 3058124Sroot else 3068124Sroot ticks = 0x7fffffff; 3078112Sroot splx(s); 3088112Sroot return (ticks); 3098112Sroot } 31052668Smckusick 31152668Smckusick /* 31254791Storek * Start profiling on a process. 31354791Storek * 31454791Storek * Kernel profiling passes proc0 which never exits and hence 31554791Storek * keeps the profile clock running constantly. 31654791Storek */ 31754791Storek void 31854791Storek startprofclock(p) 31954791Storek register struct proc *p; 32054791Storek { 32154791Storek int s; 32254791Storek 32354791Storek if ((p->p_flag & SPROFIL) == 0) { 32454791Storek p->p_flag |= SPROFIL; 32554791Storek if (++profprocs == 1 && stathz != 0) { 32654791Storek s = splstatclock(); 32754791Storek psdiv = pscnt = psratio; 32854791Storek setstatclockrate(profhz); 32954791Storek splx(s); 33054791Storek } 33154791Storek } 33254791Storek } 33354791Storek 33454791Storek /* 33554791Storek * Stop profiling on a process. 33654791Storek */ 33754791Storek void 33854791Storek stopprofclock(p) 33954791Storek register struct proc *p; 34054791Storek { 34154791Storek int s; 34254791Storek 34354791Storek if (p->p_flag & SPROFIL) { 34454791Storek p->p_flag &= ~SPROFIL; 34554791Storek if (--profprocs == 0 && stathz != 0) { 34654791Storek s = splstatclock(); 34754791Storek psdiv = pscnt = 1; 34854791Storek setstatclockrate(stathz); 34954791Storek splx(s); 35054791Storek } 35154791Storek } 35254791Storek } 35354791Storek 35454791Storek int dk_ndrive = DK_NDRIVE; 35554791Storek 35654791Storek /* 35754791Storek * Statistics clock. Grab profile sample, and if divider reaches 0, 35854791Storek * do process and kernel statistics. 35954791Storek */ 36054791Storek void 36154791Storek statclock(frame) 36254791Storek register struct clockframe *frame; 36354791Storek { 36454791Storek #ifdef GPROF 36554791Storek register struct gmonparam *g; 36654791Storek #endif 36754791Storek register struct proc *p; 36854791Storek register int i; 36954791Storek 37054791Storek if (CLKF_USERMODE(frame)) { 37154791Storek p = curproc; 37254791Storek if (p->p_flag & SPROFIL) 37354791Storek addupc_intr(p, CLKF_PC(frame), 1); 37454791Storek if (--pscnt > 0) 37554791Storek return; 37654791Storek /* 37754791Storek * Came from user mode; CPU was in user state. 37854791Storek * If this process is being profiled record the tick. 37954791Storek */ 38054791Storek p->p_uticks++; 38154791Storek if (p->p_nice > NZERO) 38254791Storek cp_time[CP_NICE]++; 38354791Storek else 38454791Storek cp_time[CP_USER]++; 38554791Storek } else { 38654791Storek #ifdef GPROF 38754791Storek /* 38854791Storek * Kernel statistics are just like addupc_intr, only easier. 38954791Storek */ 39054791Storek g = &_gmonparam; 39154791Storek if (g->state == GMON_PROF_ON) { 39254791Storek i = CLKF_PC(frame) - g->lowpc; 39354791Storek if (i < g->textsize) 394*54886Storek kcount[i / (HISTFRACTION * sizeof(*kcount))]++; 39554791Storek } 39654791Storek #endif 39754791Storek if (--pscnt > 0) 39854791Storek return; 39954791Storek /* 40054791Storek * Came from kernel mode, so we were: 40154791Storek * - handling an interrupt, 40254791Storek * - doing syscall or trap work on behalf of the current 40354791Storek * user process, or 40454791Storek * - spinning in the idle loop. 40554791Storek * Whichever it is, charge the time as appropriate. 40654791Storek * Note that we charge interrupts to the current process, 40754791Storek * regardless of whether they are ``for'' that process, 40854791Storek * so that we know how much of its real time was spent 40954791Storek * in ``non-process'' (i.e., interrupt) work. 41054791Storek */ 41154791Storek p = curproc; 41254791Storek if (CLKF_INTR(frame)) { 41354791Storek if (p != NULL) 41454791Storek p->p_iticks++; 41554791Storek cp_time[CP_INTR]++; 41654791Storek } else if (p != NULL) { 41754791Storek p->p_sticks++; 41854791Storek cp_time[CP_SYS]++; 41954791Storek } else 42054791Storek cp_time[CP_IDLE]++; 42154791Storek } 42254791Storek pscnt = psdiv; 42354791Storek 42454791Storek /* 42554791Storek * We maintain statistics shown by user-level statistics 42654791Storek * programs: the amount of time in each cpu state, and 42754791Storek * the amount of time each of DK_NDRIVE ``drives'' is busy. 42854791Storek * 42954791Storek * XXX should either run linked list of drives, or (better) 43054791Storek * grab timestamps in the start & done code. 43154791Storek */ 43254791Storek for (i = 0; i < DK_NDRIVE; i++) 43354791Storek if (dk_busy & (1 << i)) 43454791Storek dk_time[i]++; 43554791Storek 43654791Storek /* 43754791Storek * We adjust the priority of the current process. 43854791Storek * The priority of a process gets worse as it accumulates 43954791Storek * CPU time. The cpu usage estimator (p_cpu) is increased here 44054791Storek * and the formula for computing priorities (in kern_synch.c) 44154791Storek * will compute a different value each time the p_cpu increases 44254791Storek * by 4. The cpu usage estimator ramps up quite quickly when 44354791Storek * the process is running (linearly), and decays away 44454791Storek * exponentially, at a rate which is proportionally slower 44554791Storek * when the system is busy. The basic principal is that the 44654791Storek * system will 90% forget that a process used a lot of CPU 44754791Storek * time in 5*loadav seconds. This causes the system to favor 44854791Storek * processes which haven't run much recently, and to 44954791Storek * round-robin among other processes. 45054791Storek */ 45154791Storek if (p != NULL) { 45254791Storek p->p_cpticks++; 45354791Storek if (++p->p_cpu == 0) 45454791Storek p->p_cpu--; 45554791Storek if ((p->p_cpu & 3) == 0) { 45654791Storek setpri(p); 45754791Storek if (p->p_pri >= PUSER) 45854791Storek p->p_pri = p->p_usrpri; 45954791Storek } 46054791Storek } 46154791Storek } 46254791Storek 46354791Storek /* 46452668Smckusick * Return information about system clocks. 46552668Smckusick */ 46652668Smckusick /* ARGSUSED */ 46752668Smckusick kinfo_clockrate(op, where, acopysize, arg, aneeded) 46852668Smckusick int op; 46952668Smckusick register char *where; 47052668Smckusick int *acopysize, arg, *aneeded; 47152668Smckusick { 47252668Smckusick int buflen, error; 47352668Smckusick struct clockinfo clockinfo; 47452668Smckusick 47552668Smckusick *aneeded = sizeof(clockinfo); 47652668Smckusick if (where == NULL) 47752668Smckusick return (0); 47852668Smckusick /* 47952668Smckusick * Check for enough buffering. 48052668Smckusick */ 48152668Smckusick buflen = *acopysize; 48252668Smckusick if (buflen < sizeof(clockinfo)) { 48352668Smckusick *acopysize = 0; 48452668Smckusick return (0); 48552668Smckusick } 48652668Smckusick /* 48752668Smckusick * Copyout clockinfo structure. 48852668Smckusick */ 48952668Smckusick clockinfo.hz = hz; 49052668Smckusick clockinfo.tick = tick; 49152668Smckusick clockinfo.profhz = profhz; 49254791Storek clockinfo.stathz = stathz ? stathz : hz; 49352668Smckusick if (error = copyout((caddr_t)&clockinfo, where, sizeof(clockinfo))) 49452668Smckusick return (error); 49552668Smckusick *acopysize = sizeof(clockinfo); 49652668Smckusick return (0); 49752668Smckusick } 498