123366Smckusick /* 2*47546Skarels * Copyright (c) 1982, 1986, 1991 Regents of the University of California. 323366Smckusick * All rights reserved. The Berkeley software License Agreement 423366Smckusick * specifies the terms and conditions for redistribution. 523366Smckusick * 6*47546Skarels * @(#)kern_clock.c 7.12 (Berkeley) 03/17/91 723366Smckusick */ 89Sbill 917088Sbloom #include "param.h" 1017088Sbloom #include "systm.h" 1129946Skarels #include "dkstat.h" 1217088Sbloom #include "callout.h" 1344404Skarels #include "user.h" 1417088Sbloom #include "kernel.h" 1517088Sbloom #include "proc.h" 169Sbill 17*47546Skarels #include "machine/cpu.h" 1835406Skarels 1910291Smckusick #ifdef GPROF 2017088Sbloom #include "gprof.h" 2110291Smckusick #endif 2210291Smckusick 238124Sroot /* 248124Sroot * Clock handling routines. 258124Sroot * 2611392Ssam * This code is written to operate with two timers which run 2711392Ssam * independently of each other. The main clock, running at hz 2811392Ssam * times per second, is used to do scheduling and timeout calculations. 2911392Ssam * The second timer does resource utilization estimation statistically 3011392Ssam * based on the state of the machine phz times a second. Both functions 3111392Ssam * can be performed by a single clock (ie hz == phz), however the 3211392Ssam * statistics will be much more prone to errors. Ideally a machine 3311392Ssam * would have separate clocks measuring time spent in user state, system 3411392Ssam * state, interrupt state, and idle state. These clocks would allow a non- 3511392Ssam * approximate measure of resource utilization. 368124Sroot */ 371559Sbill 388124Sroot /* 398124Sroot * TODO: 4012747Ssam * time of day, system/user timing, timeouts, profiling on separate timers 4112747Ssam * allocate more timeout table slots when table overflows. 428124Sroot */ 4326265Skarels 4417007Smckusick /* 4517007Smckusick * Bump a timeval by a small number of usec's. 4617007Smckusick */ 4717007Smckusick #define BUMPTIME(t, usec) { \ 4817007Smckusick register struct timeval *tp = (t); \ 4917007Smckusick \ 5017007Smckusick tp->tv_usec += (usec); \ 5117007Smckusick if (tp->tv_usec >= 1000000) { \ 5217007Smckusick tp->tv_usec -= 1000000; \ 5317007Smckusick tp->tv_sec++; \ 5417007Smckusick } \ 5517007Smckusick } 5617007Smckusick 578124Sroot /* 5811392Ssam * The hz hardware interval timer. 5911392Ssam * We update the events relating to real time. 6011392Ssam * If this timer is also being used to gather statistics, 6111392Ssam * we run through the statistics gathering routine as well. 628124Sroot */ 6344774Swilliam hardclock(frame) 64*47546Skarels clockframe frame; 659Sbill { 662768Swnj register struct callout *p1; 67*47546Skarels register struct proc *p = curproc; 68*47546Skarels register struct pstats *pstats = p->p_stats; 6924524Sbloom register int s; 7016172Skarels int needsoft = 0; 7128947Skarels extern int tickdelta; 7228947Skarels extern long timedelta; 739Sbill 748124Sroot /* 758124Sroot * Update real-time timeout queue. 768124Sroot * At front of queue are some number of events which are ``due''. 778124Sroot * The time to these is <= 0 and if negative represents the 788124Sroot * number of ticks which have passed since it was supposed to happen. 798124Sroot * The rest of the q elements (times > 0) are events yet to happen, 808124Sroot * where the time for each is given as a delta from the previous. 818124Sroot * Decrementing just the first of these serves to decrement the time 828124Sroot * to all events. 838124Sroot */ 8412747Ssam p1 = calltodo.c_next; 8512747Ssam while (p1) { 8612747Ssam if (--p1->c_time > 0) 8712747Ssam break; 8816172Skarels needsoft = 1; 8912747Ssam if (p1->c_time == 0) 9012747Ssam break; 9112747Ssam p1 = p1->c_next; 9212747Ssam } 93138Sbill 948124Sroot /* 958124Sroot * Charge the time out based on the mode the cpu is in. 968124Sroot * Here again we fudge for the lack of proper interval timers 978124Sroot * assuming that the current state has been around at least 988124Sroot * one tick. 998124Sroot */ 100*47546Skarels if (CLKF_USERMODE(&frame)) { 101*47546Skarels if (pstats->p_prof.pr_scale) 10216172Skarels needsoft = 1; 1038124Sroot /* 1048124Sroot * CPU was in user state. Increment 1058124Sroot * user time counter, and process process-virtual time 1069604Ssam * interval timer. 1078124Sroot */ 10840674Smarc BUMPTIME(&p->p_utime, tick); 109*47546Skarels if (timerisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value) && 110*47546Skarels itimerdecr(&pstats->p_timer[ITIMER_VIRTUAL], tick) == 0) 11140674Smarc psignal(p, SIGVTALRM); 1129Sbill } else { 1138124Sroot /* 11424524Sbloom * CPU was in system state. 1158124Sroot */ 11626265Skarels if (!noproc) 11740674Smarc BUMPTIME(&p->p_stime, tick); 1189Sbill } 1198097Sroot 1208124Sroot /* 12110388Ssam * If the cpu is currently scheduled to a process, then 12210388Ssam * charge it with resource utilization for a tick, updating 12310388Ssam * statistics which run in (user+system) virtual time, 12410388Ssam * such as the cpu time limit and profiling timers. 12510388Ssam * This assumes that the current process has been running 12610388Ssam * the entire last tick. 12710388Ssam */ 12818585Skarels if (noproc == 0) { 12940674Smarc if ((p->p_utime.tv_sec+p->p_stime.tv_sec+1) > 130*47546Skarels p->p_rlimit[RLIMIT_CPU].rlim_cur) { 13140674Smarc psignal(p, SIGXCPU); 132*47546Skarels if (p->p_rlimit[RLIMIT_CPU].rlim_cur < 133*47546Skarels p->p_rlimit[RLIMIT_CPU].rlim_max) 134*47546Skarels p->p_rlimit[RLIMIT_CPU].rlim_cur += 5; 13510388Ssam } 136*47546Skarels if (timerisset(&pstats->p_timer[ITIMER_PROF].it_value) && 137*47546Skarels itimerdecr(&pstats->p_timer[ITIMER_PROF], tick) == 0) 13840674Smarc psignal(p, SIGPROF); 13910388Ssam 140*47546Skarels /* 141*47546Skarels * We adjust the priority of the current process. 142*47546Skarels * The priority of a process gets worse as it accumulates 143*47546Skarels * CPU time. The cpu usage estimator (p_cpu) is increased here 144*47546Skarels * and the formula for computing priorities (in kern_synch.c) 145*47546Skarels * will compute a different value each time the p_cpu increases 146*47546Skarels * by 4. The cpu usage estimator ramps up quite quickly when 147*47546Skarels * the process is running (linearly), and decays away 148*47546Skarels * exponentially, * at a rate which is proportionally slower 149*47546Skarels * when the system is busy. The basic principal is that the 150*47546Skarels * system will 90% forget that a process used a lot of CPU 151*47546Skarels * time in 5*loadav seconds. This causes the system to favor 152*47546Skarels * processes which haven't run much recently, and to 153*47546Skarels * round-robin among other processes. 154*47546Skarels */ 1558097Sroot p->p_cpticks++; 1568097Sroot if (++p->p_cpu == 0) 1578097Sroot p->p_cpu--; 1588124Sroot if ((p->p_cpu&3) == 0) { 159*47546Skarels setpri(p); 1608097Sroot if (p->p_pri >= PUSER) 1618097Sroot p->p_pri = p->p_usrpri; 1629Sbill } 1639Sbill } 1648124Sroot 1658124Sroot /* 16611392Ssam * If the alternate clock has not made itself known then 16711392Ssam * we must gather the statistics. 16811392Ssam */ 16911392Ssam if (phz == 0) 170*47546Skarels gatherstats(&frame); 17111392Ssam 17211392Ssam /* 1738124Sroot * Increment the time-of-day, and schedule 1748124Sroot * processing of the callouts at a very low cpu priority, 1758124Sroot * so we don't keep the relatively high clock interrupt 1768124Sroot * priority any longer than necessary. 1778124Sroot */ 17828828Skarels if (timedelta == 0) 17917356Skarels BUMPTIME(&time, tick) 18017356Skarels else { 18117356Skarels register delta; 18217356Skarels 18328828Skarels if (timedelta < 0) { 18428828Skarels delta = tick - tickdelta; 18528828Skarels timedelta += tickdelta; 18617356Skarels } else { 18728828Skarels delta = tick + tickdelta; 18828828Skarels timedelta -= tickdelta; 18917356Skarels } 19017356Skarels BUMPTIME(&time, delta); 19117356Skarels } 19216525Skarels if (needsoft) { 193*47546Skarels if (CLKF_BASEPRI(&frame)) { 19416525Skarels /* 19516525Skarels * Save the overhead of a software interrupt; 19616525Skarels * it will happen as soon as we return, so do it now. 19716525Skarels */ 19816525Skarels (void) splsoftclock(); 19944774Swilliam softclock(frame); 20016525Skarels } else 20116525Skarels setsoftclock(); 20216525Skarels } 2032442Swnj } 2042442Swnj 20515191Ssam int dk_ndrive = DK_NDRIVE; 2068124Sroot /* 20711392Ssam * Gather statistics on resource utilization. 20811392Ssam * 20911392Ssam * We make a gross assumption: that the system has been in the 21011392Ssam * state it is in (user state, kernel state, interrupt state, 21111392Ssam * or idle state) for the entire last time interval, and 21211392Ssam * update statistics accordingly. 21311392Ssam */ 214*47546Skarels gatherstats(framep) 215*47546Skarels clockframe *framep; 21611392Ssam { 21726265Skarels register int cpstate, s; 21811392Ssam 21911392Ssam /* 22011392Ssam * Determine what state the cpu is in. 22111392Ssam */ 222*47546Skarels if (CLKF_USERMODE(framep)) { 22311392Ssam /* 22411392Ssam * CPU was in user state. 22511392Ssam */ 226*47546Skarels if (curproc->p_nice > NZERO) 22711392Ssam cpstate = CP_NICE; 22811392Ssam else 22911392Ssam cpstate = CP_USER; 23011392Ssam } else { 23111392Ssam /* 23211392Ssam * CPU was in system state. If profiling kernel 23324524Sbloom * increment a counter. If no process is running 23424524Sbloom * then this is a system tick if we were running 23524524Sbloom * at a non-zero IPL (in a driver). If a process is running, 23624524Sbloom * then we charge it with system time even if we were 23724524Sbloom * at a non-zero IPL, since the system often runs 23824524Sbloom * this way during processing of system calls. 23924524Sbloom * This is approximate, but the lack of true interval 24024524Sbloom * timers makes doing anything else difficult. 24111392Ssam */ 24211392Ssam cpstate = CP_SYS; 243*47546Skarels if (noproc && CLKF_BASEPRI(framep)) 24411392Ssam cpstate = CP_IDLE; 24511392Ssam #ifdef GPROF 246*47546Skarels s = CLKF_PC(framep) - s_lowpc; 24711392Ssam if (profiling < 2 && s < s_textsize) 24811392Ssam kcount[s / (HISTFRACTION * sizeof (*kcount))]++; 24911392Ssam #endif 25011392Ssam } 25111392Ssam /* 25211392Ssam * We maintain statistics shown by user-level statistics 25311392Ssam * programs: the amount of time in each cpu state, and 25411392Ssam * the amount of time each of DK_NDRIVE ``drives'' is busy. 25511392Ssam */ 25611392Ssam cp_time[cpstate]++; 25711392Ssam for (s = 0; s < DK_NDRIVE; s++) 25829946Skarels if (dk_busy&(1<<s)) 25911392Ssam dk_time[s]++; 26011392Ssam } 26111392Ssam 26211392Ssam /* 2638124Sroot * Software priority level clock interrupt. 2648124Sroot * Run periodic events from timeout queue. 2658124Sroot */ 2662609Swnj /*ARGSUSED*/ 26744774Swilliam softclock(frame) 268*47546Skarels clockframe frame; 2692442Swnj { 2702442Swnj 2718097Sroot for (;;) { 2728124Sroot register struct callout *p1; 2738124Sroot register caddr_t arg; 2748124Sroot register int (*func)(); 2758124Sroot register int a, s; 2768124Sroot 27726265Skarels s = splhigh(); 2788097Sroot if ((p1 = calltodo.c_next) == 0 || p1->c_time > 0) { 2798097Sroot splx(s); 2808097Sroot break; 2812442Swnj } 2828124Sroot arg = p1->c_arg; func = p1->c_func; a = p1->c_time; 2838097Sroot calltodo.c_next = p1->c_next; 2848097Sroot p1->c_next = callfree; 2858097Sroot callfree = p1; 2869157Ssam splx(s); 2878112Sroot (*func)(arg, a); 2882442Swnj } 2899604Ssam /* 29013127Ssam * If trapped user-mode and profiling, give it 29113127Ssam * a profiling tick. 2929604Ssam */ 293*47546Skarels if (CLKF_USERMODE(&frame)) { 294*47546Skarels register struct proc *p = curproc; 29513127Ssam 296*47546Skarels if (p->p_stats->p_prof.pr_scale) 297*47546Skarels profile_tick(p, &frame); 29813127Ssam /* 29913127Ssam * Check to see if process has accumulated 30013127Ssam * more than 10 minutes of user time. If so 30113127Ssam * reduce priority to give others a chance. 30213127Ssam */ 303*47546Skarels if (p->p_ucred->cr_uid && p->p_nice == NZERO && 30440674Smarc p->p_utime.tv_sec > 10 * 60) { 305*47546Skarels p->p_nice = NZERO + 4; 306*47546Skarels setpri(p); 30713127Ssam p->p_pri = p->p_usrpri; 30813127Ssam } 3099604Ssam } 3109Sbill } 3119Sbill 3129Sbill /* 313*47546Skarels * Arrange that (*func)(arg) is called in t/hz seconds. 31412747Ssam */ 315*47546Skarels timeout(func, arg, t) 316*47546Skarels int (*func)(); 3172450Swnj caddr_t arg; 31812747Ssam register int t; 3199Sbill { 3203542Swnj register struct callout *p1, *p2, *pnew; 32126265Skarels register int s = splhigh(); 3229Sbill 32318282Smckusick if (t <= 0) 32412747Ssam t = 1; 3253542Swnj pnew = callfree; 3263542Swnj if (pnew == NULL) 3273542Swnj panic("timeout table overflow"); 3283542Swnj callfree = pnew->c_next; 3293542Swnj pnew->c_arg = arg; 330*47546Skarels pnew->c_func = func; 3313542Swnj for (p1 = &calltodo; (p2 = p1->c_next) && p2->c_time < t; p1 = p2) 3329742Ssam if (p2->c_time > 0) 3339742Ssam t -= p2->c_time; 3343542Swnj p1->c_next = pnew; 3353542Swnj pnew->c_next = p2; 3363542Swnj pnew->c_time = t; 3373542Swnj if (p2) 3383542Swnj p2->c_time -= t; 3399Sbill splx(s); 3409Sbill } 3417305Ssam 3427305Ssam /* 3437305Ssam * untimeout is called to remove a function timeout call 3447305Ssam * from the callout structure. 3457305Ssam */ 346*47546Skarels untimeout(func, arg) 347*47546Skarels int (*func)(); 3487305Ssam caddr_t arg; 3497305Ssam { 3507305Ssam register struct callout *p1, *p2; 3517305Ssam register int s; 3527305Ssam 35326265Skarels s = splhigh(); 3547305Ssam for (p1 = &calltodo; (p2 = p1->c_next) != 0; p1 = p2) { 355*47546Skarels if (p2->c_func == func && p2->c_arg == arg) { 3568112Sroot if (p2->c_next && p2->c_time > 0) 3577305Ssam p2->c_next->c_time += p2->c_time; 3587305Ssam p1->c_next = p2->c_next; 3597305Ssam p2->c_next = callfree; 3607305Ssam callfree = p2; 3617305Ssam break; 3627305Ssam } 3637305Ssam } 3647305Ssam splx(s); 3657305Ssam } 3668112Sroot 3678124Sroot /* 3688124Sroot * Compute number of hz until specified time. 3698124Sroot * Used to compute third argument to timeout() from an 3708124Sroot * absolute time. 3718124Sroot */ 3728112Sroot hzto(tv) 3738112Sroot struct timeval *tv; 3748112Sroot { 3758124Sroot register long ticks; 3768124Sroot register long sec; 37726265Skarels int s = splhigh(); 3788112Sroot 3798124Sroot /* 3808124Sroot * If number of milliseconds will fit in 32 bit arithmetic, 3818124Sroot * then compute number of milliseconds to time and scale to 3828124Sroot * ticks. Otherwise just compute number of hz in time, rounding 3838124Sroot * times greater than representible to maximum value. 3848124Sroot * 3858124Sroot * Delta times less than 25 days can be computed ``exactly''. 3868124Sroot * Maximum value for any timeout in 10ms ticks is 250 days. 3878124Sroot */ 3888124Sroot sec = tv->tv_sec - time.tv_sec; 3898124Sroot if (sec <= 0x7fffffff / 1000 - 1000) 3908124Sroot ticks = ((tv->tv_sec - time.tv_sec) * 1000 + 3918124Sroot (tv->tv_usec - time.tv_usec) / 1000) / (tick / 1000); 3928124Sroot else if (sec <= 0x7fffffff / hz) 3938124Sroot ticks = sec * hz; 3948124Sroot else 3958124Sroot ticks = 0x7fffffff; 3968112Sroot splx(s); 3978112Sroot return (ticks); 3988112Sroot } 39912747Ssam 40043402Smckusick /* ARGSUSED */ 40143402Smckusick profil(p, uap, retval) 40243402Smckusick struct proc *p; 40343402Smckusick register struct args { 40412747Ssam short *bufbase; 40512747Ssam unsigned bufsize; 40612747Ssam unsigned pcoffset; 40712747Ssam unsigned pcscale; 40843402Smckusick } *uap; 40943402Smckusick int *retval; 41043402Smckusick { 411*47546Skarels register struct uprof *upp = &p->p_stats->p_prof; 41212747Ssam 41312747Ssam upp->pr_base = uap->bufbase; 41412747Ssam upp->pr_size = uap->bufsize; 41512747Ssam upp->pr_off = uap->pcoffset; 41612747Ssam upp->pr_scale = uap->pcscale; 41744404Skarels return (0); 41812747Ssam } 419