149594Sbostic /*- 249594Sbostic * Copyright (c) 1982, 1986, 1991 The Regents of the University of California. 349594Sbostic * All rights reserved. 423366Smckusick * 549594Sbostic * %sccs.include.redist.c% 649594Sbostic * 7*54124Smckusick * @(#)kern_clock.c 7.20 (Berkeley) 06/20/92 823366Smckusick */ 99Sbill 1017088Sbloom #include "param.h" 1117088Sbloom #include "systm.h" 1229946Skarels #include "dkstat.h" 1317088Sbloom #include "callout.h" 1417088Sbloom #include "kernel.h" 1517088Sbloom #include "proc.h" 1648979Skarels #include "resourcevar.h" 179Sbill 1847546Skarels #include "machine/cpu.h" 1935406Skarels 2010291Smckusick #ifdef GPROF 2117088Sbloom #include "gprof.h" 2210291Smckusick #endif 2310291Smckusick 248124Sroot /* 258124Sroot * Clock handling routines. 268124Sroot * 2711392Ssam * This code is written to operate with two timers which run 2811392Ssam * independently of each other. The main clock, running at hz 2911392Ssam * times per second, is used to do scheduling and timeout calculations. 3011392Ssam * The second timer does resource utilization estimation statistically 31*54124Smckusick * based on the state of the machine stathz times a second. Both functions 32*54124Smckusick * can be performed by a single clock (ie hz == stathz), however the 3311392Ssam * statistics will be much more prone to errors. Ideally a machine 3411392Ssam * would have separate clocks measuring time spent in user state, system 3511392Ssam * state, interrupt state, and idle state. These clocks would allow a non- 3611392Ssam * approximate measure of resource utilization. 378124Sroot */ 381559Sbill 398124Sroot /* 408124Sroot * TODO: 4112747Ssam * time of day, system/user timing, timeouts, profiling on separate timers 4212747Ssam * allocate more timeout table slots when table overflows. 438124Sroot */ 4426265Skarels 4517007Smckusick /* 4617007Smckusick * Bump a timeval by a small number of usec's. 4717007Smckusick */ 4817007Smckusick #define BUMPTIME(t, usec) { \ 4917007Smckusick register struct timeval *tp = (t); \ 5017007Smckusick \ 5117007Smckusick tp->tv_usec += (usec); \ 5217007Smckusick if (tp->tv_usec >= 1000000) { \ 5317007Smckusick tp->tv_usec -= 1000000; \ 5417007Smckusick tp->tv_sec++; \ 5517007Smckusick } \ 5617007Smckusick } 5717007Smckusick 5853011Ssklower int ticks; 59*54124Smckusick int stathz; 6053011Ssklower int profhz; 6153011Ssklower struct timeval time; 6253011Ssklower struct timeval mono_time; 638124Sroot /* 6411392Ssam * The hz hardware interval timer. 6511392Ssam * We update the events relating to real time. 6611392Ssam * If this timer is also being used to gather statistics, 6711392Ssam * we run through the statistics gathering routine as well. 688124Sroot */ 6944774Swilliam hardclock(frame) 7047546Skarels clockframe frame; 719Sbill { 722768Swnj register struct callout *p1; 7347546Skarels register struct proc *p = curproc; 7448979Skarels register struct pstats *pstats; 7524524Sbloom register int s; 7616172Skarels int needsoft = 0; 7752951Smckusick time_t secs; 7828947Skarels extern int tickdelta; 7928947Skarels extern long timedelta; 809Sbill 818124Sroot /* 828124Sroot * Update real-time timeout queue. 838124Sroot * At front of queue are some number of events which are ``due''. 848124Sroot * The time to these is <= 0 and if negative represents the 858124Sroot * number of ticks which have passed since it was supposed to happen. 868124Sroot * The rest of the q elements (times > 0) are events yet to happen, 878124Sroot * where the time for each is given as a delta from the previous. 888124Sroot * Decrementing just the first of these serves to decrement the time 898124Sroot * to all events. 908124Sroot */ 9112747Ssam p1 = calltodo.c_next; 9212747Ssam while (p1) { 9312747Ssam if (--p1->c_time > 0) 9412747Ssam break; 9516172Skarels needsoft = 1; 9612747Ssam if (p1->c_time == 0) 9712747Ssam break; 9812747Ssam p1 = p1->c_next; 9912747Ssam } 100138Sbill 1018124Sroot /* 10248979Skarels * Curproc (now in p) is null if no process is running. 10348979Skarels * We assume that curproc is set in user mode! 10448979Skarels */ 10548979Skarels if (p) 10648979Skarels pstats = p->p_stats; 10748979Skarels /* 1088124Sroot * Charge the time out based on the mode the cpu is in. 1098124Sroot * Here again we fudge for the lack of proper interval timers 1108124Sroot * assuming that the current state has been around at least 1118124Sroot * one tick. 1128124Sroot */ 11347546Skarels if (CLKF_USERMODE(&frame)) { 11447546Skarels if (pstats->p_prof.pr_scale) 11516172Skarels needsoft = 1; 1168124Sroot /* 1178124Sroot * CPU was in user state. Increment 1188124Sroot * user time counter, and process process-virtual time 1199604Ssam * interval timer. 1208124Sroot */ 12140674Smarc BUMPTIME(&p->p_utime, tick); 12247546Skarels if (timerisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value) && 12347546Skarels itimerdecr(&pstats->p_timer[ITIMER_VIRTUAL], tick) == 0) 12440674Smarc psignal(p, SIGVTALRM); 1259Sbill } else { 1268124Sroot /* 12724524Sbloom * CPU was in system state. 1288124Sroot */ 12948979Skarels if (p) 13040674Smarc BUMPTIME(&p->p_stime, tick); 1319Sbill } 1328097Sroot 1338124Sroot /* 13410388Ssam * If the cpu is currently scheduled to a process, then 13510388Ssam * charge it with resource utilization for a tick, updating 13610388Ssam * statistics which run in (user+system) virtual time, 13710388Ssam * such as the cpu time limit and profiling timers. 13810388Ssam * This assumes that the current process has been running 13910388Ssam * the entire last tick. 14010388Ssam */ 14148979Skarels if (p) { 14252951Smckusick secs = p->p_utime.tv_sec + p->p_stime.tv_sec + 1; 14352951Smckusick if (secs > p->p_rlimit[RLIMIT_CPU].rlim_cur) { 14452951Smckusick if (secs > p->p_rlimit[RLIMIT_CPU].rlim_max) 14552951Smckusick psignal(p, SIGKILL); 14652951Smckusick else { 14752951Smckusick psignal(p, SIGXCPU); 14852951Smckusick if (p->p_rlimit[RLIMIT_CPU].rlim_cur < 14952951Smckusick p->p_rlimit[RLIMIT_CPU].rlim_max) 15052951Smckusick p->p_rlimit[RLIMIT_CPU].rlim_cur += 5; 15152951Smckusick } 15210388Ssam } 15347546Skarels if (timerisset(&pstats->p_timer[ITIMER_PROF].it_value) && 15447546Skarels itimerdecr(&pstats->p_timer[ITIMER_PROF], tick) == 0) 15540674Smarc psignal(p, SIGPROF); 15610388Ssam 15747546Skarels /* 15847546Skarels * We adjust the priority of the current process. 15947546Skarels * The priority of a process gets worse as it accumulates 16047546Skarels * CPU time. The cpu usage estimator (p_cpu) is increased here 16147546Skarels * and the formula for computing priorities (in kern_synch.c) 16247546Skarels * will compute a different value each time the p_cpu increases 16347546Skarels * by 4. The cpu usage estimator ramps up quite quickly when 16447546Skarels * the process is running (linearly), and decays away 16547546Skarels * exponentially, * at a rate which is proportionally slower 16647546Skarels * when the system is busy. The basic principal is that the 16747546Skarels * system will 90% forget that a process used a lot of CPU 16847546Skarels * time in 5*loadav seconds. This causes the system to favor 16947546Skarels * processes which haven't run much recently, and to 17047546Skarels * round-robin among other processes. 17147546Skarels */ 1728097Sroot p->p_cpticks++; 1738097Sroot if (++p->p_cpu == 0) 1748097Sroot p->p_cpu--; 1758124Sroot if ((p->p_cpu&3) == 0) { 17647546Skarels setpri(p); 1778097Sroot if (p->p_pri >= PUSER) 1788097Sroot p->p_pri = p->p_usrpri; 1799Sbill } 1809Sbill } 1818124Sroot 1828124Sroot /* 18311392Ssam * If the alternate clock has not made itself known then 18411392Ssam * we must gather the statistics. 18511392Ssam */ 186*54124Smckusick if (stathz == 0) 18747546Skarels gatherstats(&frame); 18811392Ssam 18911392Ssam /* 1908124Sroot * Increment the time-of-day, and schedule 1918124Sroot * processing of the callouts at a very low cpu priority, 1928124Sroot * so we don't keep the relatively high clock interrupt 1938124Sroot * priority any longer than necessary. 1948124Sroot */ 19553011Ssklower ticks++; 19653011Ssklower if (timedelta == 0) { 19717356Skarels BUMPTIME(&time, tick) 19853011Ssklower BUMPTIME(&mono_time, tick) 19953011Ssklower } else { 20017356Skarels register delta; 20117356Skarels 20228828Skarels if (timedelta < 0) { 20328828Skarels delta = tick - tickdelta; 20428828Skarels timedelta += tickdelta; 20517356Skarels } else { 20628828Skarels delta = tick + tickdelta; 20728828Skarels timedelta -= tickdelta; 20817356Skarels } 20917356Skarels BUMPTIME(&time, delta); 21053011Ssklower BUMPTIME(&mono_time, delta) 21117356Skarels } 21216525Skarels if (needsoft) { 21347546Skarels if (CLKF_BASEPRI(&frame)) { 21416525Skarels /* 21516525Skarels * Save the overhead of a software interrupt; 21616525Skarels * it will happen as soon as we return, so do it now. 21716525Skarels */ 21816525Skarels (void) splsoftclock(); 21944774Swilliam softclock(frame); 22016525Skarels } else 22116525Skarels setsoftclock(); 22216525Skarels } 2232442Swnj } 2242442Swnj 22515191Ssam int dk_ndrive = DK_NDRIVE; 2268124Sroot /* 22711392Ssam * Gather statistics on resource utilization. 22811392Ssam * 22911392Ssam * We make a gross assumption: that the system has been in the 23011392Ssam * state it is in (user state, kernel state, interrupt state, 23111392Ssam * or idle state) for the entire last time interval, and 23211392Ssam * update statistics accordingly. 23311392Ssam */ 23447546Skarels gatherstats(framep) 23547546Skarels clockframe *framep; 23611392Ssam { 23726265Skarels register int cpstate, s; 23811392Ssam 23911392Ssam /* 24011392Ssam * Determine what state the cpu is in. 24111392Ssam */ 24247546Skarels if (CLKF_USERMODE(framep)) { 24311392Ssam /* 24411392Ssam * CPU was in user state. 24511392Ssam */ 24647546Skarels if (curproc->p_nice > NZERO) 24711392Ssam cpstate = CP_NICE; 24811392Ssam else 24911392Ssam cpstate = CP_USER; 25011392Ssam } else { 25111392Ssam /* 25211392Ssam * CPU was in system state. If profiling kernel 25324524Sbloom * increment a counter. If no process is running 25424524Sbloom * then this is a system tick if we were running 25524524Sbloom * at a non-zero IPL (in a driver). If a process is running, 25624524Sbloom * then we charge it with system time even if we were 25724524Sbloom * at a non-zero IPL, since the system often runs 25824524Sbloom * this way during processing of system calls. 25924524Sbloom * This is approximate, but the lack of true interval 26024524Sbloom * timers makes doing anything else difficult. 26111392Ssam */ 26211392Ssam cpstate = CP_SYS; 26348979Skarels if (curproc == NULL && CLKF_BASEPRI(framep)) 26411392Ssam cpstate = CP_IDLE; 26511392Ssam #ifdef GPROF 26647546Skarels s = CLKF_PC(framep) - s_lowpc; 26711392Ssam if (profiling < 2 && s < s_textsize) 26811392Ssam kcount[s / (HISTFRACTION * sizeof (*kcount))]++; 26911392Ssam #endif 27011392Ssam } 27111392Ssam /* 27211392Ssam * We maintain statistics shown by user-level statistics 27311392Ssam * programs: the amount of time in each cpu state, and 27411392Ssam * the amount of time each of DK_NDRIVE ``drives'' is busy. 27511392Ssam */ 27611392Ssam cp_time[cpstate]++; 27711392Ssam for (s = 0; s < DK_NDRIVE; s++) 27829946Skarels if (dk_busy&(1<<s)) 27911392Ssam dk_time[s]++; 28011392Ssam } 28111392Ssam 28211392Ssam /* 2838124Sroot * Software priority level clock interrupt. 2848124Sroot * Run periodic events from timeout queue. 2858124Sroot */ 2862609Swnj /*ARGSUSED*/ 28744774Swilliam softclock(frame) 28847546Skarels clockframe frame; 2892442Swnj { 2902442Swnj 2918097Sroot for (;;) { 2928124Sroot register struct callout *p1; 2938124Sroot register caddr_t arg; 2948124Sroot register int (*func)(); 2958124Sroot register int a, s; 2968124Sroot 29726265Skarels s = splhigh(); 2988097Sroot if ((p1 = calltodo.c_next) == 0 || p1->c_time > 0) { 2998097Sroot splx(s); 3008097Sroot break; 3012442Swnj } 3028124Sroot arg = p1->c_arg; func = p1->c_func; a = p1->c_time; 3038097Sroot calltodo.c_next = p1->c_next; 3048097Sroot p1->c_next = callfree; 3058097Sroot callfree = p1; 3069157Ssam splx(s); 3078112Sroot (*func)(arg, a); 3082442Swnj } 3099604Ssam /* 31013127Ssam * If trapped user-mode and profiling, give it 31113127Ssam * a profiling tick. 3129604Ssam */ 31347546Skarels if (CLKF_USERMODE(&frame)) { 31447546Skarels register struct proc *p = curproc; 31513127Ssam 31647546Skarels if (p->p_stats->p_prof.pr_scale) 31747546Skarels profile_tick(p, &frame); 31813127Ssam /* 31913127Ssam * Check to see if process has accumulated 32013127Ssam * more than 10 minutes of user time. If so 32113127Ssam * reduce priority to give others a chance. 32213127Ssam */ 32347546Skarels if (p->p_ucred->cr_uid && p->p_nice == NZERO && 32440674Smarc p->p_utime.tv_sec > 10 * 60) { 32547546Skarels p->p_nice = NZERO + 4; 32647546Skarels setpri(p); 32713127Ssam p->p_pri = p->p_usrpri; 32813127Ssam } 3299604Ssam } 3309Sbill } 3319Sbill 3329Sbill /* 33347546Skarels * Arrange that (*func)(arg) is called in t/hz seconds. 33412747Ssam */ 33547546Skarels timeout(func, arg, t) 33647546Skarels int (*func)(); 3372450Swnj caddr_t arg; 33812747Ssam register int t; 3399Sbill { 3403542Swnj register struct callout *p1, *p2, *pnew; 34126265Skarels register int s = splhigh(); 3429Sbill 34318282Smckusick if (t <= 0) 34412747Ssam t = 1; 3453542Swnj pnew = callfree; 3463542Swnj if (pnew == NULL) 3473542Swnj panic("timeout table overflow"); 3483542Swnj callfree = pnew->c_next; 3493542Swnj pnew->c_arg = arg; 35047546Skarels pnew->c_func = func; 3513542Swnj for (p1 = &calltodo; (p2 = p1->c_next) && p2->c_time < t; p1 = p2) 3529742Ssam if (p2->c_time > 0) 3539742Ssam t -= p2->c_time; 3543542Swnj p1->c_next = pnew; 3553542Swnj pnew->c_next = p2; 3563542Swnj pnew->c_time = t; 3573542Swnj if (p2) 3583542Swnj p2->c_time -= t; 3599Sbill splx(s); 3609Sbill } 3617305Ssam 3627305Ssam /* 3637305Ssam * untimeout is called to remove a function timeout call 3647305Ssam * from the callout structure. 3657305Ssam */ 36647546Skarels untimeout(func, arg) 36747546Skarels int (*func)(); 3687305Ssam caddr_t arg; 3697305Ssam { 3707305Ssam register struct callout *p1, *p2; 3717305Ssam register int s; 3727305Ssam 37326265Skarels s = splhigh(); 3747305Ssam for (p1 = &calltodo; (p2 = p1->c_next) != 0; p1 = p2) { 37547546Skarels if (p2->c_func == func && p2->c_arg == arg) { 3768112Sroot if (p2->c_next && p2->c_time > 0) 3777305Ssam p2->c_next->c_time += p2->c_time; 3787305Ssam p1->c_next = p2->c_next; 3797305Ssam p2->c_next = callfree; 3807305Ssam callfree = p2; 3817305Ssam break; 3827305Ssam } 3837305Ssam } 3847305Ssam splx(s); 3857305Ssam } 3868112Sroot 3878124Sroot /* 3888124Sroot * Compute number of hz until specified time. 3898124Sroot * Used to compute third argument to timeout() from an 3908124Sroot * absolute time. 3918124Sroot */ 3928112Sroot hzto(tv) 3938112Sroot struct timeval *tv; 3948112Sroot { 3958124Sroot register long ticks; 3968124Sroot register long sec; 39726265Skarels int s = splhigh(); 3988112Sroot 3998124Sroot /* 4008124Sroot * If number of milliseconds will fit in 32 bit arithmetic, 4018124Sroot * then compute number of milliseconds to time and scale to 4028124Sroot * ticks. Otherwise just compute number of hz in time, rounding 4038124Sroot * times greater than representible to maximum value. 4048124Sroot * 4058124Sroot * Delta times less than 25 days can be computed ``exactly''. 4068124Sroot * Maximum value for any timeout in 10ms ticks is 250 days. 4078124Sroot */ 4088124Sroot sec = tv->tv_sec - time.tv_sec; 4098124Sroot if (sec <= 0x7fffffff / 1000 - 1000) 4108124Sroot ticks = ((tv->tv_sec - time.tv_sec) * 1000 + 4118124Sroot (tv->tv_usec - time.tv_usec) / 1000) / (tick / 1000); 4128124Sroot else if (sec <= 0x7fffffff / hz) 4138124Sroot ticks = sec * hz; 4148124Sroot else 4158124Sroot ticks = 0x7fffffff; 4168112Sroot splx(s); 4178112Sroot return (ticks); 4188112Sroot } 41952668Smckusick 42052668Smckusick /* 42152668Smckusick * Return information about system clocks. 42252668Smckusick */ 42352668Smckusick /* ARGSUSED */ 42452668Smckusick kinfo_clockrate(op, where, acopysize, arg, aneeded) 42552668Smckusick int op; 42652668Smckusick register char *where; 42752668Smckusick int *acopysize, arg, *aneeded; 42852668Smckusick { 42952668Smckusick int buflen, error; 43052668Smckusick struct clockinfo clockinfo; 43152668Smckusick 43252668Smckusick *aneeded = sizeof(clockinfo); 43352668Smckusick if (where == NULL) 43452668Smckusick return (0); 43552668Smckusick /* 43652668Smckusick * Check for enough buffering. 43752668Smckusick */ 43852668Smckusick buflen = *acopysize; 43952668Smckusick if (buflen < sizeof(clockinfo)) { 44052668Smckusick *acopysize = 0; 44152668Smckusick return (0); 44252668Smckusick } 44352668Smckusick /* 44452668Smckusick * Copyout clockinfo structure. 44552668Smckusick */ 44652668Smckusick clockinfo.hz = hz; 447*54124Smckusick clockinfo.stathz = stathz; 44852668Smckusick clockinfo.tick = tick; 44952668Smckusick clockinfo.profhz = profhz; 45052668Smckusick if (error = copyout((caddr_t)&clockinfo, where, sizeof(clockinfo))) 45152668Smckusick return (error); 45252668Smckusick *acopysize = sizeof(clockinfo); 45352668Smckusick return (0); 45452668Smckusick } 455