149594Sbostic /*- 249594Sbostic * Copyright (c) 1982, 1986, 1991 The Regents of the University of California. 349594Sbostic * All rights reserved. 423366Smckusick * 549594Sbostic * %sccs.include.redist.c% 649594Sbostic * 7*56855Storek * @(#)kern_clock.c 7.28 (Berkeley) 11/16/92 823366Smckusick */ 99Sbill 1056517Sbostic #include <sys/param.h> 1156517Sbostic #include <sys/systm.h> 1256517Sbostic #include <sys/dkstat.h> 1356517Sbostic #include <sys/callout.h> 1456517Sbostic #include <sys/kernel.h> 1556517Sbostic #include <sys/proc.h> 1656517Sbostic #include <sys/resourcevar.h> 179Sbill 1856517Sbostic #include <machine/cpu.h> 1935406Skarels 2010291Smckusick #ifdef GPROF 2156517Sbostic #include <sys/gmon.h> 2254791Storek extern u_short *kcount; 2310291Smckusick #endif 2410291Smckusick 258124Sroot /* 268124Sroot * Clock handling routines. 278124Sroot * 2854791Storek * This code is written to operate with two timers that run independently of 2954791Storek * each other. The main clock, running hz times per second, is used to keep 3054791Storek * track of real time. The second timer handles kernel and user profiling, 3154791Storek * and does resource use estimation. If the second timer is programmable, 3254791Storek * it is randomized to avoid aliasing between the two clocks. For example, 3354791Storek * the randomization prevents an adversary from always giving up the cpu 3454791Storek * just before its quantum expires. Otherwise, it would never accumulate 3554791Storek * cpu ticks. The mean frequency of the second timer is stathz. 3654791Storek * 3754791Storek * If no second timer exists, stathz will be zero; in this case we drive 3854791Storek * profiling and statistics off the main clock. This WILL NOT be accurate; 3954791Storek * do not do it unless absolutely necessary. 4054791Storek * 4154791Storek * The statistics clock may (or may not) be run at a higher rate while 4254791Storek * profiling. This profile clock runs at profhz. We require that profhz 4354791Storek * be an integral multiple of stathz. 4454791Storek * 4554791Storek * If the statistics clock is running fast, it must be divided by the ratio 4654791Storek * profhz/stathz for statistics. (For profiling, every tick counts.) 478124Sroot */ 481559Sbill 498124Sroot /* 508124Sroot * TODO: 5112747Ssam * allocate more timeout table slots when table overflows. 528124Sroot */ 5326265Skarels 5417007Smckusick /* 5517007Smckusick * Bump a timeval by a small number of usec's. 5617007Smckusick */ 5717007Smckusick #define BUMPTIME(t, usec) { \ 5854791Storek register volatile struct timeval *tp = (t); \ 5954791Storek register long us; \ 6017007Smckusick \ 6154791Storek tp->tv_usec = us = tp->tv_usec + (usec); \ 6254791Storek if (us >= 1000000) { \ 6354791Storek tp->tv_usec = us - 1000000; \ 6417007Smckusick tp->tv_sec++; \ 6517007Smckusick } \ 6617007Smckusick } 6717007Smckusick 6854124Smckusick int stathz; 6953011Ssklower int profhz; 7054138Smckusick int profprocs; 7156338Ssklower int ticks; 7256317Shibler static int psdiv, pscnt; /* prof => stat divider */ 73*56855Storek int psratio; /* ratio: prof / stat */ 7454791Storek 7554791Storek volatile struct timeval time; 7654791Storek volatile struct timeval mono_time; 7754791Storek 788124Sroot /* 7954791Storek * Initialize clock frequencies and start both clocks running. 808124Sroot */ 8154791Storek void 8254791Storek initclocks() 8354791Storek { 8454791Storek register int i; 8554791Storek 8654791Storek /* 8754791Storek * Set divisors to 1 (normal case) and let the machine-specific 8854791Storek * code do its bit. 8954791Storek */ 9054791Storek psdiv = pscnt = 1; 9154791Storek cpu_initclocks(); 9254791Storek 9354791Storek /* 9454791Storek * Compute profhz/stathz, and fix profhz if needed. 9554791Storek */ 9654791Storek i = stathz ? stathz : hz; 9754791Storek if (profhz == 0) 9854791Storek profhz = i; 9954791Storek psratio = profhz / i; 10054791Storek } 10154791Storek 10254791Storek /* 10354791Storek * The real-time timer, interrupting hz times per second. 10454791Storek */ 10554791Storek void 10644774Swilliam hardclock(frame) 10754791Storek register struct clockframe *frame; 1089Sbill { 1092768Swnj register struct callout *p1; 11054791Storek register struct proc *p; 11155294Storek register int delta, needsoft; 11228947Skarels extern int tickdelta; 11328947Skarels extern long timedelta; 1149Sbill 1158124Sroot /* 1168124Sroot * Update real-time timeout queue. 1178124Sroot * At front of queue are some number of events which are ``due''. 1188124Sroot * The time to these is <= 0 and if negative represents the 1198124Sroot * number of ticks which have passed since it was supposed to happen. 1208124Sroot * The rest of the q elements (times > 0) are events yet to happen, 1218124Sroot * where the time for each is given as a delta from the previous. 1228124Sroot * Decrementing just the first of these serves to decrement the time 1238124Sroot * to all events. 1248124Sroot */ 12554791Storek needsoft = 0; 12654791Storek for (p1 = calltodo.c_next; p1 != NULL; p1 = p1->c_next) { 12712747Ssam if (--p1->c_time > 0) 12812747Ssam break; 12916172Skarels needsoft = 1; 13012747Ssam if (p1->c_time == 0) 13112747Ssam break; 13212747Ssam } 133138Sbill 13454791Storek p = curproc; 13554791Storek if (p) { 13654791Storek register struct pstats *pstats; 13754791Storek 1388124Sroot /* 13954791Storek * Run current process's virtual and profile time, as needed. 1408124Sroot */ 14154791Storek pstats = p->p_stats; 14254791Storek if (CLKF_USERMODE(frame) && 14354791Storek timerisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value) && 14447546Skarels itimerdecr(&pstats->p_timer[ITIMER_VIRTUAL], tick) == 0) 14540674Smarc psignal(p, SIGVTALRM); 14647546Skarels if (timerisset(&pstats->p_timer[ITIMER_PROF].it_value) && 14747546Skarels itimerdecr(&pstats->p_timer[ITIMER_PROF], tick) == 0) 14840674Smarc psignal(p, SIGPROF); 1499Sbill } 1508124Sroot 1518124Sroot /* 15254791Storek * If no separate statistics clock is available, run it from here. 15311392Ssam */ 15454124Smckusick if (stathz == 0) 15554791Storek statclock(frame); 15611392Ssam 15711392Ssam /* 15855294Storek * Increment the time-of-day. The increment is just ``tick'' unless 15955294Storek * we are still adjusting the clock; see adjtime(). 1608124Sroot */ 16156338Ssklower ticks++; 16255294Storek if (timedelta == 0) 16355294Storek delta = tick; 16455294Storek else { 16555294Storek delta = tick + tickdelta; 16655294Storek timedelta -= tickdelta; 16717356Skarels } 16855294Storek BUMPTIME(&time, delta); 16955294Storek BUMPTIME(&mono_time, delta); 17054791Storek 17154791Storek /* 17254791Storek * Process callouts at a very low cpu priority, so we don't keep the 17354791Storek * relatively high clock interrupt priority any longer than necessary. 17454791Storek */ 17516525Skarels if (needsoft) { 17654791Storek if (CLKF_BASEPRI(frame)) { 17716525Skarels /* 17816525Skarels * Save the overhead of a software interrupt; 17916525Skarels * it will happen as soon as we return, so do it now. 18016525Skarels */ 18154791Storek (void)splsoftclock(); 18254791Storek softclock(); 18316525Skarels } else 18416525Skarels setsoftclock(); 18516525Skarels } 1862442Swnj } 1872442Swnj 1888124Sroot /* 18954791Storek * Software (low priority) clock interrupt. 1908124Sroot * Run periodic events from timeout queue. 1918124Sroot */ 1922609Swnj /*ARGSUSED*/ 19354791Storek void 19454791Storek softclock() 1952442Swnj { 19654791Storek register struct callout *c; 19754791Storek register void *arg; 19854791Storek register void (*func) __P((void *)); 19954791Storek register int s; 2002442Swnj 20154791Storek s = splhigh(); 20254791Storek while ((c = calltodo.c_next) != NULL && c->c_time <= 0) { 20354791Storek func = c->c_func; 20454791Storek arg = c->c_arg; 20554791Storek calltodo.c_next = c->c_next; 20654791Storek c->c_next = callfree; 20754791Storek callfree = c; 2089157Ssam splx(s); 20954791Storek (*func)(arg); 21054791Storek (void) splhigh(); 2112442Swnj } 21254791Storek splx(s); 2139Sbill } 2149Sbill 2159Sbill /* 21647546Skarels * Arrange that (*func)(arg) is called in t/hz seconds. 21712747Ssam */ 21854791Storek void 21947546Skarels timeout(func, arg, t) 22054791Storek void (*func) __P((void *)); 22154791Storek void *arg; 22212747Ssam register int t; 2239Sbill { 2243542Swnj register struct callout *p1, *p2, *pnew; 22554791Storek register int s; 2269Sbill 22754791Storek s = splhigh(); 22818282Smckusick if (t <= 0) 22912747Ssam t = 1; 2303542Swnj pnew = callfree; 2313542Swnj if (pnew == NULL) 2323542Swnj panic("timeout table overflow"); 2333542Swnj callfree = pnew->c_next; 2343542Swnj pnew->c_arg = arg; 23547546Skarels pnew->c_func = func; 2363542Swnj for (p1 = &calltodo; (p2 = p1->c_next) && p2->c_time < t; p1 = p2) 2379742Ssam if (p2->c_time > 0) 2389742Ssam t -= p2->c_time; 2393542Swnj p1->c_next = pnew; 2403542Swnj pnew->c_next = p2; 2413542Swnj pnew->c_time = t; 2423542Swnj if (p2) 2433542Swnj p2->c_time -= t; 2449Sbill splx(s); 2459Sbill } 2467305Ssam 2477305Ssam /* 2487305Ssam * untimeout is called to remove a function timeout call 2497305Ssam * from the callout structure. 2507305Ssam */ 25154791Storek void 25247546Skarels untimeout(func, arg) 25354791Storek void (*func) __P((void *)); 25454791Storek void *arg; 2557305Ssam { 2567305Ssam register struct callout *p1, *p2; 2577305Ssam register int s; 2587305Ssam 25926265Skarels s = splhigh(); 26054791Storek for (p1 = &calltodo; (p2 = p1->c_next) != NULL; p1 = p2) { 26147546Skarels if (p2->c_func == func && p2->c_arg == arg) { 2628112Sroot if (p2->c_next && p2->c_time > 0) 2637305Ssam p2->c_next->c_time += p2->c_time; 2647305Ssam p1->c_next = p2->c_next; 2657305Ssam p2->c_next = callfree; 2667305Ssam callfree = p2; 2677305Ssam break; 2687305Ssam } 2697305Ssam } 2707305Ssam splx(s); 2717305Ssam } 2728112Sroot 2738124Sroot /* 2748124Sroot * Compute number of hz until specified time. 2758124Sroot * Used to compute third argument to timeout() from an 2768124Sroot * absolute time. 2778124Sroot */ 27854791Storek int 2798112Sroot hzto(tv) 2808112Sroot struct timeval *tv; 2818112Sroot { 28254791Storek register long ticks, sec; 28354791Storek int s; 2848112Sroot 2858124Sroot /* 2868124Sroot * If number of milliseconds will fit in 32 bit arithmetic, 2878124Sroot * then compute number of milliseconds to time and scale to 2888124Sroot * ticks. Otherwise just compute number of hz in time, rounding 2898124Sroot * times greater than representible to maximum value. 2908124Sroot * 2918124Sroot * Delta times less than 25 days can be computed ``exactly''. 2928124Sroot * Maximum value for any timeout in 10ms ticks is 250 days. 2938124Sroot */ 29454791Storek s = splhigh(); 2958124Sroot sec = tv->tv_sec - time.tv_sec; 2968124Sroot if (sec <= 0x7fffffff / 1000 - 1000) 2978124Sroot ticks = ((tv->tv_sec - time.tv_sec) * 1000 + 2988124Sroot (tv->tv_usec - time.tv_usec) / 1000) / (tick / 1000); 2998124Sroot else if (sec <= 0x7fffffff / hz) 3008124Sroot ticks = sec * hz; 3018124Sroot else 3028124Sroot ticks = 0x7fffffff; 3038112Sroot splx(s); 3048112Sroot return (ticks); 3058112Sroot } 30652668Smckusick 30752668Smckusick /* 30854791Storek * Start profiling on a process. 30954791Storek * 31054791Storek * Kernel profiling passes proc0 which never exits and hence 31154791Storek * keeps the profile clock running constantly. 31254791Storek */ 31354791Storek void 31454791Storek startprofclock(p) 31554791Storek register struct proc *p; 31654791Storek { 31754791Storek int s; 31854791Storek 31954791Storek if ((p->p_flag & SPROFIL) == 0) { 32054791Storek p->p_flag |= SPROFIL; 32154791Storek if (++profprocs == 1 && stathz != 0) { 32254791Storek s = splstatclock(); 32354791Storek psdiv = pscnt = psratio; 32454791Storek setstatclockrate(profhz); 32554791Storek splx(s); 32654791Storek } 32754791Storek } 32854791Storek } 32954791Storek 33054791Storek /* 33154791Storek * Stop profiling on a process. 33254791Storek */ 33354791Storek void 33454791Storek stopprofclock(p) 33554791Storek register struct proc *p; 33654791Storek { 33754791Storek int s; 33854791Storek 33954791Storek if (p->p_flag & SPROFIL) { 34054791Storek p->p_flag &= ~SPROFIL; 34154791Storek if (--profprocs == 0 && stathz != 0) { 34254791Storek s = splstatclock(); 34354791Storek psdiv = pscnt = 1; 34454791Storek setstatclockrate(stathz); 34554791Storek splx(s); 34654791Storek } 34754791Storek } 34854791Storek } 34954791Storek 35054791Storek int dk_ndrive = DK_NDRIVE; 35154791Storek 35254791Storek /* 35354791Storek * Statistics clock. Grab profile sample, and if divider reaches 0, 35454791Storek * do process and kernel statistics. 35554791Storek */ 35654791Storek void 35754791Storek statclock(frame) 35854791Storek register struct clockframe *frame; 35954791Storek { 36054791Storek #ifdef GPROF 36154791Storek register struct gmonparam *g; 36254791Storek #endif 36354791Storek register struct proc *p; 36454791Storek register int i; 36554791Storek 36654791Storek if (CLKF_USERMODE(frame)) { 36754791Storek p = curproc; 36854791Storek if (p->p_flag & SPROFIL) 36954791Storek addupc_intr(p, CLKF_PC(frame), 1); 37054791Storek if (--pscnt > 0) 37154791Storek return; 37254791Storek /* 37354791Storek * Came from user mode; CPU was in user state. 37454791Storek * If this process is being profiled record the tick. 37554791Storek */ 37654791Storek p->p_uticks++; 37754791Storek if (p->p_nice > NZERO) 37854791Storek cp_time[CP_NICE]++; 37954791Storek else 38054791Storek cp_time[CP_USER]++; 38154791Storek } else { 38254791Storek #ifdef GPROF 38354791Storek /* 38454791Storek * Kernel statistics are just like addupc_intr, only easier. 38554791Storek */ 38654791Storek g = &_gmonparam; 38754791Storek if (g->state == GMON_PROF_ON) { 38854791Storek i = CLKF_PC(frame) - g->lowpc; 38954791Storek if (i < g->textsize) 39054886Storek kcount[i / (HISTFRACTION * sizeof(*kcount))]++; 39154791Storek } 39254791Storek #endif 39354791Storek if (--pscnt > 0) 39454791Storek return; 39554791Storek /* 39654791Storek * Came from kernel mode, so we were: 39754791Storek * - handling an interrupt, 39854791Storek * - doing syscall or trap work on behalf of the current 39954791Storek * user process, or 40054791Storek * - spinning in the idle loop. 40154791Storek * Whichever it is, charge the time as appropriate. 40254791Storek * Note that we charge interrupts to the current process, 40354791Storek * regardless of whether they are ``for'' that process, 40454791Storek * so that we know how much of its real time was spent 40554791Storek * in ``non-process'' (i.e., interrupt) work. 40654791Storek */ 40754791Storek p = curproc; 40854791Storek if (CLKF_INTR(frame)) { 40954791Storek if (p != NULL) 41054791Storek p->p_iticks++; 41154791Storek cp_time[CP_INTR]++; 41254791Storek } else if (p != NULL) { 41354791Storek p->p_sticks++; 41454791Storek cp_time[CP_SYS]++; 41554791Storek } else 41654791Storek cp_time[CP_IDLE]++; 41754791Storek } 41854791Storek pscnt = psdiv; 41954791Storek 42054791Storek /* 42154791Storek * We maintain statistics shown by user-level statistics 42254791Storek * programs: the amount of time in each cpu state, and 42354791Storek * the amount of time each of DK_NDRIVE ``drives'' is busy. 42454791Storek * 42554791Storek * XXX should either run linked list of drives, or (better) 42654791Storek * grab timestamps in the start & done code. 42754791Storek */ 42854791Storek for (i = 0; i < DK_NDRIVE; i++) 42954791Storek if (dk_busy & (1 << i)) 43054791Storek dk_time[i]++; 43154791Storek 43254791Storek /* 43354791Storek * We adjust the priority of the current process. 43454791Storek * The priority of a process gets worse as it accumulates 43554791Storek * CPU time. The cpu usage estimator (p_cpu) is increased here 43654791Storek * and the formula for computing priorities (in kern_synch.c) 43754791Storek * will compute a different value each time the p_cpu increases 43854791Storek * by 4. The cpu usage estimator ramps up quite quickly when 43954791Storek * the process is running (linearly), and decays away 44054791Storek * exponentially, at a rate which is proportionally slower 44154791Storek * when the system is busy. The basic principal is that the 44254791Storek * system will 90% forget that a process used a lot of CPU 44354791Storek * time in 5*loadav seconds. This causes the system to favor 44454791Storek * processes which haven't run much recently, and to 44554791Storek * round-robin among other processes. 44654791Storek */ 44754791Storek if (p != NULL) { 44854791Storek p->p_cpticks++; 44954791Storek if (++p->p_cpu == 0) 45054791Storek p->p_cpu--; 45154791Storek if ((p->p_cpu & 3) == 0) { 45254791Storek setpri(p); 45354791Storek if (p->p_pri >= PUSER) 45454791Storek p->p_pri = p->p_usrpri; 45554791Storek } 45654791Storek } 45754791Storek } 45854791Storek 45954791Storek /* 46052668Smckusick * Return information about system clocks. 46152668Smckusick */ 46252668Smckusick /* ARGSUSED */ 46352668Smckusick kinfo_clockrate(op, where, acopysize, arg, aneeded) 46452668Smckusick int op; 46552668Smckusick register char *where; 46652668Smckusick int *acopysize, arg, *aneeded; 46752668Smckusick { 46852668Smckusick int buflen, error; 46952668Smckusick struct clockinfo clockinfo; 47052668Smckusick 47152668Smckusick *aneeded = sizeof(clockinfo); 47252668Smckusick if (where == NULL) 47352668Smckusick return (0); 47452668Smckusick /* 47552668Smckusick * Check for enough buffering. 47652668Smckusick */ 47752668Smckusick buflen = *acopysize; 47852668Smckusick if (buflen < sizeof(clockinfo)) { 47952668Smckusick *acopysize = 0; 48052668Smckusick return (0); 48152668Smckusick } 48252668Smckusick /* 48352668Smckusick * Copyout clockinfo structure. 48452668Smckusick */ 48552668Smckusick clockinfo.hz = hz; 48652668Smckusick clockinfo.tick = tick; 48752668Smckusick clockinfo.profhz = profhz; 48854791Storek clockinfo.stathz = stathz ? stathz : hz; 48952668Smckusick if (error = copyout((caddr_t)&clockinfo, where, sizeof(clockinfo))) 49052668Smckusick return (error); 49152668Smckusick *acopysize = sizeof(clockinfo); 49252668Smckusick return (0); 49352668Smckusick } 494