149594Sbostic /*- 249594Sbostic * Copyright (c) 1982, 1986, 1991 The Regents of the University of California. 349594Sbostic * All rights reserved. 423366Smckusick * 549594Sbostic * %sccs.include.redist.c% 649594Sbostic * 7*54138Smckusick * @(#)kern_clock.c 7.21 (Berkeley) 06/20/92 823366Smckusick */ 99Sbill 1017088Sbloom #include "param.h" 1117088Sbloom #include "systm.h" 1229946Skarels #include "dkstat.h" 1317088Sbloom #include "callout.h" 1417088Sbloom #include "kernel.h" 1517088Sbloom #include "proc.h" 1648979Skarels #include "resourcevar.h" 179Sbill 1847546Skarels #include "machine/cpu.h" 1935406Skarels 2010291Smckusick #ifdef GPROF 2117088Sbloom #include "gprof.h" 2210291Smckusick #endif 2310291Smckusick 248124Sroot /* 258124Sroot * Clock handling routines. 268124Sroot * 2711392Ssam * This code is written to operate with two timers which run 2811392Ssam * independently of each other. The main clock, running at hz 2911392Ssam * times per second, is used to do scheduling and timeout calculations. 3011392Ssam * The second timer does resource utilization estimation statistically 3154124Smckusick * based on the state of the machine stathz times a second. Both functions 3254124Smckusick * can be performed by a single clock (ie hz == stathz), however the 3311392Ssam * statistics will be much more prone to errors. Ideally a machine 3411392Ssam * would have separate clocks measuring time spent in user state, system 3511392Ssam * state, interrupt state, and idle state. These clocks would allow a non- 3611392Ssam * approximate measure of resource utilization. 378124Sroot */ 381559Sbill 398124Sroot /* 408124Sroot * TODO: 4112747Ssam * time of day, system/user timing, timeouts, profiling on separate timers 4212747Ssam * allocate more timeout table slots when table overflows. 438124Sroot */ 4426265Skarels 4517007Smckusick /* 4617007Smckusick * Bump a timeval by a small number of usec's. 4717007Smckusick */ 4817007Smckusick #define BUMPTIME(t, usec) { \ 4917007Smckusick register struct timeval *tp = (t); \ 5017007Smckusick \ 5117007Smckusick tp->tv_usec += (usec); \ 5217007Smckusick if (tp->tv_usec >= 1000000) { \ 5317007Smckusick tp->tv_usec -= 1000000; \ 5417007Smckusick tp->tv_sec++; \ 5517007Smckusick } \ 5617007Smckusick } 5717007Smckusick 5853011Ssklower int ticks; 5954124Smckusick int stathz; 6053011Ssklower int profhz; 61*54138Smckusick int profprocs; 6253011Ssklower struct timeval time; 6353011Ssklower struct timeval mono_time; 648124Sroot /* 6511392Ssam * The hz hardware interval timer. 6611392Ssam * We update the events relating to real time. 6711392Ssam * If this timer is also being used to gather statistics, 6811392Ssam * we run through the statistics gathering routine as well. 698124Sroot */ 7044774Swilliam hardclock(frame) 7147546Skarels clockframe frame; 729Sbill { 732768Swnj register struct callout *p1; 7447546Skarels register struct proc *p = curproc; 7548979Skarels register struct pstats *pstats; 7624524Sbloom register int s; 7716172Skarels int needsoft = 0; 7852951Smckusick time_t secs; 7928947Skarels extern int tickdelta; 8028947Skarels extern long timedelta; 819Sbill 828124Sroot /* 838124Sroot * Update real-time timeout queue. 848124Sroot * At front of queue are some number of events which are ``due''. 858124Sroot * The time to these is <= 0 and if negative represents the 868124Sroot * number of ticks which have passed since it was supposed to happen. 878124Sroot * The rest of the q elements (times > 0) are events yet to happen, 888124Sroot * where the time for each is given as a delta from the previous. 898124Sroot * Decrementing just the first of these serves to decrement the time 908124Sroot * to all events. 918124Sroot */ 9212747Ssam p1 = calltodo.c_next; 9312747Ssam while (p1) { 9412747Ssam if (--p1->c_time > 0) 9512747Ssam break; 9616172Skarels needsoft = 1; 9712747Ssam if (p1->c_time == 0) 9812747Ssam break; 9912747Ssam p1 = p1->c_next; 10012747Ssam } 101138Sbill 1028124Sroot /* 10348979Skarels * Curproc (now in p) is null if no process is running. 10448979Skarels * We assume that curproc is set in user mode! 10548979Skarels */ 10648979Skarels if (p) 10748979Skarels pstats = p->p_stats; 10848979Skarels /* 1098124Sroot * Charge the time out based on the mode the cpu is in. 1108124Sroot * Here again we fudge for the lack of proper interval timers 1118124Sroot * assuming that the current state has been around at least 1128124Sroot * one tick. 1138124Sroot */ 11447546Skarels if (CLKF_USERMODE(&frame)) { 11547546Skarels if (pstats->p_prof.pr_scale) 11616172Skarels needsoft = 1; 1178124Sroot /* 1188124Sroot * CPU was in user state. Increment 1198124Sroot * user time counter, and process process-virtual time 1209604Ssam * interval timer. 1218124Sroot */ 12240674Smarc BUMPTIME(&p->p_utime, tick); 12347546Skarels if (timerisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value) && 12447546Skarels itimerdecr(&pstats->p_timer[ITIMER_VIRTUAL], tick) == 0) 12540674Smarc psignal(p, SIGVTALRM); 1269Sbill } else { 1278124Sroot /* 12824524Sbloom * CPU was in system state. 1298124Sroot */ 13048979Skarels if (p) 13140674Smarc BUMPTIME(&p->p_stime, tick); 1329Sbill } 1338097Sroot 1348124Sroot /* 13510388Ssam * If the cpu is currently scheduled to a process, then 13610388Ssam * charge it with resource utilization for a tick, updating 13710388Ssam * statistics which run in (user+system) virtual time, 13810388Ssam * such as the cpu time limit and profiling timers. 13910388Ssam * This assumes that the current process has been running 14010388Ssam * the entire last tick. 14110388Ssam */ 14248979Skarels if (p) { 14352951Smckusick secs = p->p_utime.tv_sec + p->p_stime.tv_sec + 1; 14452951Smckusick if (secs > p->p_rlimit[RLIMIT_CPU].rlim_cur) { 14552951Smckusick if (secs > p->p_rlimit[RLIMIT_CPU].rlim_max) 14652951Smckusick psignal(p, SIGKILL); 14752951Smckusick else { 14852951Smckusick psignal(p, SIGXCPU); 14952951Smckusick if (p->p_rlimit[RLIMIT_CPU].rlim_cur < 15052951Smckusick p->p_rlimit[RLIMIT_CPU].rlim_max) 15152951Smckusick p->p_rlimit[RLIMIT_CPU].rlim_cur += 5; 15252951Smckusick } 15310388Ssam } 15447546Skarels if (timerisset(&pstats->p_timer[ITIMER_PROF].it_value) && 15547546Skarels itimerdecr(&pstats->p_timer[ITIMER_PROF], tick) == 0) 15640674Smarc psignal(p, SIGPROF); 15710388Ssam 15847546Skarels /* 15947546Skarels * We adjust the priority of the current process. 16047546Skarels * The priority of a process gets worse as it accumulates 16147546Skarels * CPU time. The cpu usage estimator (p_cpu) is increased here 16247546Skarels * and the formula for computing priorities (in kern_synch.c) 16347546Skarels * will compute a different value each time the p_cpu increases 16447546Skarels * by 4. The cpu usage estimator ramps up quite quickly when 16547546Skarels * the process is running (linearly), and decays away 16647546Skarels * exponentially, * at a rate which is proportionally slower 16747546Skarels * when the system is busy. The basic principal is that the 16847546Skarels * system will 90% forget that a process used a lot of CPU 16947546Skarels * time in 5*loadav seconds. This causes the system to favor 17047546Skarels * processes which haven't run much recently, and to 17147546Skarels * round-robin among other processes. 17247546Skarels */ 1738097Sroot p->p_cpticks++; 1748097Sroot if (++p->p_cpu == 0) 1758097Sroot p->p_cpu--; 1768124Sroot if ((p->p_cpu&3) == 0) { 17747546Skarels setpri(p); 1788097Sroot if (p->p_pri >= PUSER) 1798097Sroot p->p_pri = p->p_usrpri; 1809Sbill } 1819Sbill } 1828124Sroot 1838124Sroot /* 18411392Ssam * If the alternate clock has not made itself known then 18511392Ssam * we must gather the statistics. 18611392Ssam */ 18754124Smckusick if (stathz == 0) 18847546Skarels gatherstats(&frame); 18911392Ssam 19011392Ssam /* 1918124Sroot * Increment the time-of-day, and schedule 1928124Sroot * processing of the callouts at a very low cpu priority, 1938124Sroot * so we don't keep the relatively high clock interrupt 1948124Sroot * priority any longer than necessary. 1958124Sroot */ 19653011Ssklower ticks++; 19753011Ssklower if (timedelta == 0) { 19817356Skarels BUMPTIME(&time, tick) 19953011Ssklower BUMPTIME(&mono_time, tick) 20053011Ssklower } else { 20117356Skarels register delta; 20217356Skarels 20328828Skarels if (timedelta < 0) { 20428828Skarels delta = tick - tickdelta; 20528828Skarels timedelta += tickdelta; 20617356Skarels } else { 20728828Skarels delta = tick + tickdelta; 20828828Skarels timedelta -= tickdelta; 20917356Skarels } 21017356Skarels BUMPTIME(&time, delta); 21153011Ssklower BUMPTIME(&mono_time, delta) 21217356Skarels } 21316525Skarels if (needsoft) { 21447546Skarels if (CLKF_BASEPRI(&frame)) { 21516525Skarels /* 21616525Skarels * Save the overhead of a software interrupt; 21716525Skarels * it will happen as soon as we return, so do it now. 21816525Skarels */ 21916525Skarels (void) splsoftclock(); 22044774Swilliam softclock(frame); 22116525Skarels } else 22216525Skarels setsoftclock(); 22316525Skarels } 2242442Swnj } 2252442Swnj 22615191Ssam int dk_ndrive = DK_NDRIVE; 2278124Sroot /* 22811392Ssam * Gather statistics on resource utilization. 22911392Ssam * 23011392Ssam * We make a gross assumption: that the system has been in the 23111392Ssam * state it is in (user state, kernel state, interrupt state, 23211392Ssam * or idle state) for the entire last time interval, and 23311392Ssam * update statistics accordingly. 23411392Ssam */ 23547546Skarels gatherstats(framep) 23647546Skarels clockframe *framep; 23711392Ssam { 23826265Skarels register int cpstate, s; 23911392Ssam 24011392Ssam /* 24111392Ssam * Determine what state the cpu is in. 24211392Ssam */ 24347546Skarels if (CLKF_USERMODE(framep)) { 24411392Ssam /* 24511392Ssam * CPU was in user state. 24611392Ssam */ 24747546Skarels if (curproc->p_nice > NZERO) 24811392Ssam cpstate = CP_NICE; 24911392Ssam else 25011392Ssam cpstate = CP_USER; 25111392Ssam } else { 25211392Ssam /* 25311392Ssam * CPU was in system state. If profiling kernel 25424524Sbloom * increment a counter. If no process is running 25524524Sbloom * then this is a system tick if we were running 25624524Sbloom * at a non-zero IPL (in a driver). If a process is running, 25724524Sbloom * then we charge it with system time even if we were 25824524Sbloom * at a non-zero IPL, since the system often runs 25924524Sbloom * this way during processing of system calls. 26024524Sbloom * This is approximate, but the lack of true interval 26124524Sbloom * timers makes doing anything else difficult. 26211392Ssam */ 26311392Ssam cpstate = CP_SYS; 26448979Skarels if (curproc == NULL && CLKF_BASEPRI(framep)) 26511392Ssam cpstate = CP_IDLE; 26611392Ssam #ifdef GPROF 26747546Skarels s = CLKF_PC(framep) - s_lowpc; 26811392Ssam if (profiling < 2 && s < s_textsize) 26911392Ssam kcount[s / (HISTFRACTION * sizeof (*kcount))]++; 27011392Ssam #endif 27111392Ssam } 27211392Ssam /* 27311392Ssam * We maintain statistics shown by user-level statistics 27411392Ssam * programs: the amount of time in each cpu state, and 27511392Ssam * the amount of time each of DK_NDRIVE ``drives'' is busy. 27611392Ssam */ 27711392Ssam cp_time[cpstate]++; 27811392Ssam for (s = 0; s < DK_NDRIVE; s++) 27929946Skarels if (dk_busy&(1<<s)) 28011392Ssam dk_time[s]++; 28111392Ssam } 28211392Ssam 28311392Ssam /* 2848124Sroot * Software priority level clock interrupt. 2858124Sroot * Run periodic events from timeout queue. 2868124Sroot */ 2872609Swnj /*ARGSUSED*/ 28844774Swilliam softclock(frame) 28947546Skarels clockframe frame; 2902442Swnj { 2912442Swnj 2928097Sroot for (;;) { 2938124Sroot register struct callout *p1; 2948124Sroot register caddr_t arg; 2958124Sroot register int (*func)(); 2968124Sroot register int a, s; 2978124Sroot 29826265Skarels s = splhigh(); 2998097Sroot if ((p1 = calltodo.c_next) == 0 || p1->c_time > 0) { 3008097Sroot splx(s); 3018097Sroot break; 3022442Swnj } 3038124Sroot arg = p1->c_arg; func = p1->c_func; a = p1->c_time; 3048097Sroot calltodo.c_next = p1->c_next; 3058097Sroot p1->c_next = callfree; 3068097Sroot callfree = p1; 3079157Ssam splx(s); 3088112Sroot (*func)(arg, a); 3092442Swnj } 3109604Ssam /* 31113127Ssam * If trapped user-mode and profiling, give it 31213127Ssam * a profiling tick. 3139604Ssam */ 31447546Skarels if (CLKF_USERMODE(&frame)) { 31547546Skarels register struct proc *p = curproc; 31613127Ssam 31747546Skarels if (p->p_stats->p_prof.pr_scale) 31847546Skarels profile_tick(p, &frame); 31913127Ssam /* 32013127Ssam * Check to see if process has accumulated 32113127Ssam * more than 10 minutes of user time. If so 32213127Ssam * reduce priority to give others a chance. 32313127Ssam */ 32447546Skarels if (p->p_ucred->cr_uid && p->p_nice == NZERO && 32540674Smarc p->p_utime.tv_sec > 10 * 60) { 32647546Skarels p->p_nice = NZERO + 4; 32747546Skarels setpri(p); 32813127Ssam p->p_pri = p->p_usrpri; 32913127Ssam } 3309604Ssam } 3319Sbill } 3329Sbill 3339Sbill /* 334*54138Smckusick * Notification of start of profiling clock 335*54138Smckusick * 336*54138Smckusick * Kernel profiling passes proc0 which never exits and hence 337*54138Smckusick * keeps the profile clock running constantly. 338*54138Smckusick */ 339*54138Smckusick startprofclock(p) 340*54138Smckusick struct proc *p; 341*54138Smckusick { 342*54138Smckusick 343*54138Smckusick if (p->p_flag & SPROFIL) 344*54138Smckusick return; 345*54138Smckusick profprocs++; 346*54138Smckusick p->p_flag |= SPROFIL; 347*54138Smckusick #ifdef PROFTIMER 348*54138Smckusick initprofclock(profprocs); 349*54138Smckusick #else 350*54138Smckusick profhz = hz; 351*54138Smckusick #endif 352*54138Smckusick } 353*54138Smckusick 354*54138Smckusick /* 355*54138Smckusick * Notification of stopping of profile clock 356*54138Smckusick */ 357*54138Smckusick stopprofclock(p) 358*54138Smckusick struct proc *p; 359*54138Smckusick { 360*54138Smckusick 361*54138Smckusick if ((p->p_flag & SPROFIL) == 0) 362*54138Smckusick return; 363*54138Smckusick profprocs--; 364*54138Smckusick p->p_flag &= ~SPROFIL; 365*54138Smckusick #ifdef PROFTIMER 366*54138Smckusick initprofclock(profprocs); 367*54138Smckusick #endif 368*54138Smckusick } 369*54138Smckusick 370*54138Smckusick /* 37147546Skarels * Arrange that (*func)(arg) is called in t/hz seconds. 37212747Ssam */ 37347546Skarels timeout(func, arg, t) 37447546Skarels int (*func)(); 3752450Swnj caddr_t arg; 37612747Ssam register int t; 3779Sbill { 3783542Swnj register struct callout *p1, *p2, *pnew; 37926265Skarels register int s = splhigh(); 3809Sbill 38118282Smckusick if (t <= 0) 38212747Ssam t = 1; 3833542Swnj pnew = callfree; 3843542Swnj if (pnew == NULL) 3853542Swnj panic("timeout table overflow"); 3863542Swnj callfree = pnew->c_next; 3873542Swnj pnew->c_arg = arg; 38847546Skarels pnew->c_func = func; 3893542Swnj for (p1 = &calltodo; (p2 = p1->c_next) && p2->c_time < t; p1 = p2) 3909742Ssam if (p2->c_time > 0) 3919742Ssam t -= p2->c_time; 3923542Swnj p1->c_next = pnew; 3933542Swnj pnew->c_next = p2; 3943542Swnj pnew->c_time = t; 3953542Swnj if (p2) 3963542Swnj p2->c_time -= t; 3979Sbill splx(s); 3989Sbill } 3997305Ssam 4007305Ssam /* 4017305Ssam * untimeout is called to remove a function timeout call 4027305Ssam * from the callout structure. 4037305Ssam */ 40447546Skarels untimeout(func, arg) 40547546Skarels int (*func)(); 4067305Ssam caddr_t arg; 4077305Ssam { 4087305Ssam register struct callout *p1, *p2; 4097305Ssam register int s; 4107305Ssam 41126265Skarels s = splhigh(); 4127305Ssam for (p1 = &calltodo; (p2 = p1->c_next) != 0; p1 = p2) { 41347546Skarels if (p2->c_func == func && p2->c_arg == arg) { 4148112Sroot if (p2->c_next && p2->c_time > 0) 4157305Ssam p2->c_next->c_time += p2->c_time; 4167305Ssam p1->c_next = p2->c_next; 4177305Ssam p2->c_next = callfree; 4187305Ssam callfree = p2; 4197305Ssam break; 4207305Ssam } 4217305Ssam } 4227305Ssam splx(s); 4237305Ssam } 4248112Sroot 4258124Sroot /* 4268124Sroot * Compute number of hz until specified time. 4278124Sroot * Used to compute third argument to timeout() from an 4288124Sroot * absolute time. 4298124Sroot */ 4308112Sroot hzto(tv) 4318112Sroot struct timeval *tv; 4328112Sroot { 4338124Sroot register long ticks; 4348124Sroot register long sec; 43526265Skarels int s = splhigh(); 4368112Sroot 4378124Sroot /* 4388124Sroot * If number of milliseconds will fit in 32 bit arithmetic, 4398124Sroot * then compute number of milliseconds to time and scale to 4408124Sroot * ticks. Otherwise just compute number of hz in time, rounding 4418124Sroot * times greater than representible to maximum value. 4428124Sroot * 4438124Sroot * Delta times less than 25 days can be computed ``exactly''. 4448124Sroot * Maximum value for any timeout in 10ms ticks is 250 days. 4458124Sroot */ 4468124Sroot sec = tv->tv_sec - time.tv_sec; 4478124Sroot if (sec <= 0x7fffffff / 1000 - 1000) 4488124Sroot ticks = ((tv->tv_sec - time.tv_sec) * 1000 + 4498124Sroot (tv->tv_usec - time.tv_usec) / 1000) / (tick / 1000); 4508124Sroot else if (sec <= 0x7fffffff / hz) 4518124Sroot ticks = sec * hz; 4528124Sroot else 4538124Sroot ticks = 0x7fffffff; 4548112Sroot splx(s); 4558112Sroot return (ticks); 4568112Sroot } 45752668Smckusick 45852668Smckusick /* 45952668Smckusick * Return information about system clocks. 46052668Smckusick */ 46152668Smckusick /* ARGSUSED */ 46252668Smckusick kinfo_clockrate(op, where, acopysize, arg, aneeded) 46352668Smckusick int op; 46452668Smckusick register char *where; 46552668Smckusick int *acopysize, arg, *aneeded; 46652668Smckusick { 46752668Smckusick int buflen, error; 46852668Smckusick struct clockinfo clockinfo; 46952668Smckusick 47052668Smckusick *aneeded = sizeof(clockinfo); 47152668Smckusick if (where == NULL) 47252668Smckusick return (0); 47352668Smckusick /* 47452668Smckusick * Check for enough buffering. 47552668Smckusick */ 47652668Smckusick buflen = *acopysize; 47752668Smckusick if (buflen < sizeof(clockinfo)) { 47852668Smckusick *acopysize = 0; 47952668Smckusick return (0); 48052668Smckusick } 48152668Smckusick /* 48252668Smckusick * Copyout clockinfo structure. 48352668Smckusick */ 48452668Smckusick clockinfo.hz = hz; 48554124Smckusick clockinfo.stathz = stathz; 48652668Smckusick clockinfo.tick = tick; 487*54138Smckusick #ifdef PROFTIMER 488*54138Smckusick initprofclock(2); 489*54138Smckusick #else 490*54138Smckusick profhz = hz; 491*54138Smckusick #endif 49252668Smckusick clockinfo.profhz = profhz; 49352668Smckusick if (error = copyout((caddr_t)&clockinfo, where, sizeof(clockinfo))) 49452668Smckusick return (error); 49552668Smckusick *acopysize = sizeof(clockinfo); 49652668Smckusick return (0); 49752668Smckusick } 498