123366Smckusick /* 229086Smckusick * Copyright (c) 1982, 1986 Regents of the University of California. 323366Smckusick * All rights reserved. The Berkeley software License Agreement 423366Smckusick * specifies the terms and conditions for redistribution. 523366Smckusick * 6*44774Swilliam * @(#)kern_clock.c 7.10 (Berkeley) 06/30/90 723366Smckusick */ 89Sbill 917088Sbloom #include "param.h" 1017088Sbloom #include "systm.h" 1129946Skarels #include "dkstat.h" 1217088Sbloom #include "callout.h" 1344404Skarels #include "user.h" 1417088Sbloom #include "kernel.h" 1517088Sbloom #include "proc.h" 1617088Sbloom #include "vm.h" 1717088Sbloom #include "text.h" 189Sbill 1937493Smckusick #include "machine/reg.h" 2037493Smckusick #include "machine/psl.h" 2135406Skarels 2229946Skarels #if defined(vax) || defined(tahoe) 2337493Smckusick #include "machine/mtpr.h" 2437493Smckusick #include "machine/clock.h" 259751Ssam #endif 2641566Smckusick #if defined(hp300) 2741566Smckusick #include "machine/mtpr.h" 2841566Smckusick #endif 29*44774Swilliam #ifdef i386 30*44774Swilliam #include "machine/frame.h" 31*44774Swilliam #include "machine/segments.h" 32*44774Swilliam #endif 339751Ssam 3410291Smckusick #ifdef GPROF 3517088Sbloom #include "gprof.h" 3610291Smckusick #endif 3710291Smckusick 388124Sroot /* 398124Sroot * Clock handling routines. 408124Sroot * 4111392Ssam * This code is written to operate with two timers which run 4211392Ssam * independently of each other. The main clock, running at hz 4311392Ssam * times per second, is used to do scheduling and timeout calculations. 4411392Ssam * The second timer does resource utilization estimation statistically 4511392Ssam * based on the state of the machine phz times a second. Both functions 4611392Ssam * can be performed by a single clock (ie hz == phz), however the 4711392Ssam * statistics will be much more prone to errors. Ideally a machine 4811392Ssam * would have separate clocks measuring time spent in user state, system 4911392Ssam * state, interrupt state, and idle state. These clocks would allow a non- 5011392Ssam * approximate measure of resource utilization. 518124Sroot */ 521559Sbill 538124Sroot /* 548124Sroot * TODO: 5512747Ssam * time of day, system/user timing, timeouts, profiling on separate timers 5612747Ssam * allocate more timeout table slots when table overflows. 578124Sroot */ 5826265Skarels 5917007Smckusick /* 6017007Smckusick * Bump a timeval by a small number of usec's. 6117007Smckusick */ 6217007Smckusick #define BUMPTIME(t, usec) { \ 6317007Smckusick register struct timeval *tp = (t); \ 6417007Smckusick \ 6517007Smckusick tp->tv_usec += (usec); \ 6617007Smckusick if (tp->tv_usec >= 1000000) { \ 6717007Smckusick tp->tv_usec -= 1000000; \ 6817007Smckusick tp->tv_sec++; \ 6917007Smckusick } \ 7017007Smckusick } 7117007Smckusick 728124Sroot /* 7311392Ssam * The hz hardware interval timer. 7411392Ssam * We update the events relating to real time. 7511392Ssam * If this timer is also being used to gather statistics, 7611392Ssam * we run through the statistics gathering routine as well. 778124Sroot */ 782609Swnj /*ARGSUSED*/ 79*44774Swilliam #ifndef i386 802442Swnj hardclock(pc, ps) 812450Swnj caddr_t pc; 828944Sroot int ps; 83*44774Swilliam #else 84*44774Swilliam hardclock(frame) 85*44774Swilliam struct intrframe frame; 86*44774Swilliam #define pc frame.if_eip 87*44774Swilliam #endif 889Sbill { 892768Swnj register struct callout *p1; 9040674Smarc register struct proc *p = u.u_procp; 9124524Sbloom register int s; 9216172Skarels int needsoft = 0; 9328947Skarels extern int tickdelta; 9428947Skarels extern long timedelta; 959Sbill 968124Sroot /* 978124Sroot * Update real-time timeout queue. 988124Sroot * At front of queue are some number of events which are ``due''. 998124Sroot * The time to these is <= 0 and if negative represents the 1008124Sroot * number of ticks which have passed since it was supposed to happen. 1018124Sroot * The rest of the q elements (times > 0) are events yet to happen, 1028124Sroot * where the time for each is given as a delta from the previous. 1038124Sroot * Decrementing just the first of these serves to decrement the time 1048124Sroot * to all events. 1058124Sroot */ 10612747Ssam p1 = calltodo.c_next; 10712747Ssam while (p1) { 10812747Ssam if (--p1->c_time > 0) 10912747Ssam break; 11016172Skarels needsoft = 1; 11112747Ssam if (p1->c_time == 0) 11212747Ssam break; 11312747Ssam p1 = p1->c_next; 11412747Ssam } 115138Sbill 1168124Sroot /* 1178124Sroot * Charge the time out based on the mode the cpu is in. 1188124Sroot * Here again we fudge for the lack of proper interval timers 1198124Sroot * assuming that the current state has been around at least 1208124Sroot * one tick. 1218124Sroot */ 122*44774Swilliam #ifdef i386 123*44774Swilliam if (ISPL(frame.if_cs) == SEL_UPL) { 124*44774Swilliam #else 1259Sbill if (USERMODE(ps)) { 126*44774Swilliam #endif 12716172Skarels if (u.u_prof.pr_scale) 12816172Skarels needsoft = 1; 1298124Sroot /* 1308124Sroot * CPU was in user state. Increment 1318124Sroot * user time counter, and process process-virtual time 1329604Ssam * interval timer. 1338124Sroot */ 13440674Smarc BUMPTIME(&p->p_utime, tick); 1358097Sroot if (timerisset(&u.u_timer[ITIMER_VIRTUAL].it_value) && 1368097Sroot itimerdecr(&u.u_timer[ITIMER_VIRTUAL], tick) == 0) 13740674Smarc psignal(p, SIGVTALRM); 1389Sbill } else { 1398124Sroot /* 14024524Sbloom * CPU was in system state. 1418124Sroot */ 14226265Skarels if (!noproc) 14340674Smarc BUMPTIME(&p->p_stime, tick); 1449Sbill } 1458097Sroot 1468124Sroot /* 14710388Ssam * If the cpu is currently scheduled to a process, then 14810388Ssam * charge it with resource utilization for a tick, updating 14910388Ssam * statistics which run in (user+system) virtual time, 15010388Ssam * such as the cpu time limit and profiling timers. 15110388Ssam * This assumes that the current process has been running 15210388Ssam * the entire last tick. 15310388Ssam */ 15418585Skarels if (noproc == 0) { 15540674Smarc if ((p->p_utime.tv_sec+p->p_stime.tv_sec+1) > 15610388Ssam u.u_rlimit[RLIMIT_CPU].rlim_cur) { 15740674Smarc psignal(p, SIGXCPU); 15810388Ssam if (u.u_rlimit[RLIMIT_CPU].rlim_cur < 15910388Ssam u.u_rlimit[RLIMIT_CPU].rlim_max) 16010388Ssam u.u_rlimit[RLIMIT_CPU].rlim_cur += 5; 16110388Ssam } 16210388Ssam if (timerisset(&u.u_timer[ITIMER_PROF].it_value) && 16310388Ssam itimerdecr(&u.u_timer[ITIMER_PROF], tick) == 0) 16440674Smarc psignal(p, SIGPROF); 16540674Smarc s = p->p_rssize; 16626265Skarels u.u_ru.ru_idrss += s; 16726265Skarels #ifdef notdef 16826265Skarels u.u_ru.ru_isrss += 0; /* XXX (haven't got this) */ 16926265Skarels #endif 17040674Smarc if (p->p_textp) { 17140674Smarc register int xrss = p->p_textp->x_rssize; 17210388Ssam 17310388Ssam s += xrss; 17410388Ssam u.u_ru.ru_ixrss += xrss; 17510388Ssam } 17610388Ssam if (s > u.u_ru.ru_maxrss) 17710388Ssam u.u_ru.ru_maxrss = s; 17810388Ssam } 17910388Ssam 18010388Ssam /* 1818124Sroot * We adjust the priority of the current process. 1828124Sroot * The priority of a process gets worse as it accumulates 1838124Sroot * CPU time. The cpu usage estimator (p_cpu) is increased here 1848124Sroot * and the formula for computing priorities (in kern_synch.c) 1858124Sroot * will compute a different value each time the p_cpu increases 1868124Sroot * by 4. The cpu usage estimator ramps up quite quickly when 1878124Sroot * the process is running (linearly), and decays away exponentially, 1888124Sroot * at a rate which is proportionally slower when the system is 1898124Sroot * busy. The basic principal is that the system will 90% forget 1908124Sroot * that a process used a lot of CPU time in 5*loadav seconds. 1918124Sroot * This causes the system to favor processes which haven't run 1928124Sroot * much recently, and to round-robin among other processes. 1938124Sroot */ 1949Sbill if (!noproc) { 1958097Sroot p->p_cpticks++; 1968097Sroot if (++p->p_cpu == 0) 1978097Sroot p->p_cpu--; 1988124Sroot if ((p->p_cpu&3) == 0) { 1998097Sroot (void) setpri(p); 2008097Sroot if (p->p_pri >= PUSER) 2018097Sroot p->p_pri = p->p_usrpri; 2029Sbill } 2039Sbill } 2048124Sroot 2058124Sroot /* 20611392Ssam * If the alternate clock has not made itself known then 20711392Ssam * we must gather the statistics. 20811392Ssam */ 20911392Ssam if (phz == 0) 210*44774Swilliam #ifdef i386 211*44774Swilliam gatherstats(pc, ISPL(frame.if_cs), frame.if_ppl); 212*44774Swilliam #else 21311392Ssam gatherstats(pc, ps); 214*44774Swilliam #endif 21511392Ssam 21611392Ssam /* 2178124Sroot * Increment the time-of-day, and schedule 2188124Sroot * processing of the callouts at a very low cpu priority, 2198124Sroot * so we don't keep the relatively high clock interrupt 2208124Sroot * priority any longer than necessary. 2218124Sroot */ 22228828Skarels if (timedelta == 0) 22317356Skarels BUMPTIME(&time, tick) 22417356Skarels else { 22517356Skarels register delta; 22617356Skarels 22728828Skarels if (timedelta < 0) { 22828828Skarels delta = tick - tickdelta; 22928828Skarels timedelta += tickdelta; 23017356Skarels } else { 23128828Skarels delta = tick + tickdelta; 23228828Skarels timedelta -= tickdelta; 23317356Skarels } 23417356Skarels BUMPTIME(&time, delta); 23517356Skarels } 23616525Skarels if (needsoft) { 237*44774Swilliam #ifdef i386 238*44774Swilliam if (frame.if_ppl == 0) { 239*44774Swilliam #else 24016525Skarels if (BASEPRI(ps)) { 241*44774Swilliam #endif 24216525Skarels /* 24316525Skarels * Save the overhead of a software interrupt; 24416525Skarels * it will happen as soon as we return, so do it now. 24516525Skarels */ 24616525Skarels (void) splsoftclock(); 247*44774Swilliam #ifdef i386 248*44774Swilliam softclock(frame); 249*44774Swilliam #else 25016525Skarels softclock(pc, ps); 251*44774Swilliam #endif 25216525Skarels } else 25316525Skarels setsoftclock(); 25416525Skarels } 2552442Swnj } 2562442Swnj 25715191Ssam int dk_ndrive = DK_NDRIVE; 2588124Sroot /* 25911392Ssam * Gather statistics on resource utilization. 26011392Ssam * 26111392Ssam * We make a gross assumption: that the system has been in the 26211392Ssam * state it is in (user state, kernel state, interrupt state, 26311392Ssam * or idle state) for the entire last time interval, and 26411392Ssam * update statistics accordingly. 26511392Ssam */ 26612747Ssam /*ARGSUSED*/ 267*44774Swilliam #ifdef i386 268*44774Swilliam #undef pc 269*44774Swilliam gatherstats(pc, ps, ppl) 270*44774Swilliam #else 27111392Ssam gatherstats(pc, ps) 272*44774Swilliam #endif 27311392Ssam caddr_t pc; 27411392Ssam int ps; 27511392Ssam { 27626265Skarels register int cpstate, s; 27711392Ssam 27811392Ssam /* 27911392Ssam * Determine what state the cpu is in. 28011392Ssam */ 281*44774Swilliam #ifdef i386 282*44774Swilliam if (ps == SEL_UPL) { 283*44774Swilliam #else 28411392Ssam if (USERMODE(ps)) { 285*44774Swilliam #endif 28611392Ssam /* 28711392Ssam * CPU was in user state. 28811392Ssam */ 28911392Ssam if (u.u_procp->p_nice > NZERO) 29011392Ssam cpstate = CP_NICE; 29111392Ssam else 29211392Ssam cpstate = CP_USER; 29311392Ssam } else { 29411392Ssam /* 29511392Ssam * CPU was in system state. If profiling kernel 29624524Sbloom * increment a counter. If no process is running 29724524Sbloom * then this is a system tick if we were running 29824524Sbloom * at a non-zero IPL (in a driver). If a process is running, 29924524Sbloom * then we charge it with system time even if we were 30024524Sbloom * at a non-zero IPL, since the system often runs 30124524Sbloom * this way during processing of system calls. 30224524Sbloom * This is approximate, but the lack of true interval 30324524Sbloom * timers makes doing anything else difficult. 30411392Ssam */ 30511392Ssam cpstate = CP_SYS; 306*44774Swilliam #if defined(i386) 307*44774Swilliam if (noproc && ps == 0) 308*44774Swilliam #else 30911392Ssam if (noproc && BASEPRI(ps)) 310*44774Swilliam #endif 31111392Ssam cpstate = CP_IDLE; 31211392Ssam #ifdef GPROF 31311392Ssam s = pc - s_lowpc; 31411392Ssam if (profiling < 2 && s < s_textsize) 31511392Ssam kcount[s / (HISTFRACTION * sizeof (*kcount))]++; 31611392Ssam #endif 31711392Ssam } 31811392Ssam /* 31911392Ssam * We maintain statistics shown by user-level statistics 32011392Ssam * programs: the amount of time in each cpu state, and 32111392Ssam * the amount of time each of DK_NDRIVE ``drives'' is busy. 32211392Ssam */ 32311392Ssam cp_time[cpstate]++; 32411392Ssam for (s = 0; s < DK_NDRIVE; s++) 32529946Skarels if (dk_busy&(1<<s)) 32611392Ssam dk_time[s]++; 32711392Ssam } 32811392Ssam 32911392Ssam /* 3308124Sroot * Software priority level clock interrupt. 3318124Sroot * Run periodic events from timeout queue. 3328124Sroot */ 3332609Swnj /*ARGSUSED*/ 334*44774Swilliam #ifdef i386 335*44774Swilliam softclock(frame) 336*44774Swilliam struct intrframe frame; 337*44774Swilliam #define pc frame.if_eip 338*44774Swilliam #else 3392442Swnj softclock(pc, ps) 3402450Swnj caddr_t pc; 3418944Sroot int ps; 342*44774Swilliam #endif 3432442Swnj { 3442442Swnj 3458097Sroot for (;;) { 3468124Sroot register struct callout *p1; 3478124Sroot register caddr_t arg; 3488124Sroot register int (*func)(); 3498124Sroot register int a, s; 3508124Sroot 35126265Skarels s = splhigh(); 3528097Sroot if ((p1 = calltodo.c_next) == 0 || p1->c_time > 0) { 3538097Sroot splx(s); 3548097Sroot break; 3552442Swnj } 3568124Sroot arg = p1->c_arg; func = p1->c_func; a = p1->c_time; 3578097Sroot calltodo.c_next = p1->c_next; 3588097Sroot p1->c_next = callfree; 3598097Sroot callfree = p1; 3609157Ssam splx(s); 3618112Sroot (*func)(arg, a); 3622442Swnj } 3639604Ssam /* 36413127Ssam * If trapped user-mode and profiling, give it 36513127Ssam * a profiling tick. 3669604Ssam */ 367*44774Swilliam #ifdef i386 368*44774Swilliam if (ISPL(frame.if_cs) == SEL_UPL) { 369*44774Swilliam #else 37013127Ssam if (USERMODE(ps)) { 371*44774Swilliam #endif 37213127Ssam register struct proc *p = u.u_procp; 37313127Ssam 37413127Ssam if (u.u_prof.pr_scale) { 37513127Ssam p->p_flag |= SOWEUPC; 37613127Ssam aston(); 37713127Ssam } 37813127Ssam /* 37913127Ssam * Check to see if process has accumulated 38013127Ssam * more than 10 minutes of user time. If so 38113127Ssam * reduce priority to give others a chance. 38213127Ssam */ 38313127Ssam if (p->p_uid && p->p_nice == NZERO && 38440674Smarc p->p_utime.tv_sec > 10 * 60) { 38513127Ssam p->p_nice = NZERO+4; 38613127Ssam (void) setpri(p); 38713127Ssam p->p_pri = p->p_usrpri; 38813127Ssam } 3899604Ssam } 3909Sbill } 3919Sbill 3929Sbill /* 39312747Ssam * Arrange that (*fun)(arg) is called in t/hz seconds. 39412747Ssam */ 39512747Ssam timeout(fun, arg, t) 3962450Swnj int (*fun)(); 3972450Swnj caddr_t arg; 39812747Ssam register int t; 3999Sbill { 4003542Swnj register struct callout *p1, *p2, *pnew; 40126265Skarels register int s = splhigh(); 4029Sbill 40318282Smckusick if (t <= 0) 40412747Ssam t = 1; 4053542Swnj pnew = callfree; 4063542Swnj if (pnew == NULL) 4073542Swnj panic("timeout table overflow"); 4083542Swnj callfree = pnew->c_next; 4093542Swnj pnew->c_arg = arg; 4103542Swnj pnew->c_func = fun; 4113542Swnj for (p1 = &calltodo; (p2 = p1->c_next) && p2->c_time < t; p1 = p2) 4129742Ssam if (p2->c_time > 0) 4139742Ssam t -= p2->c_time; 4143542Swnj p1->c_next = pnew; 4153542Swnj pnew->c_next = p2; 4163542Swnj pnew->c_time = t; 4173542Swnj if (p2) 4183542Swnj p2->c_time -= t; 4199Sbill splx(s); 4209Sbill } 4217305Ssam 4227305Ssam /* 4237305Ssam * untimeout is called to remove a function timeout call 4247305Ssam * from the callout structure. 4257305Ssam */ 4268097Sroot untimeout(fun, arg) 4277305Ssam int (*fun)(); 4287305Ssam caddr_t arg; 4297305Ssam { 4307305Ssam register struct callout *p1, *p2; 4317305Ssam register int s; 4327305Ssam 43326265Skarels s = splhigh(); 4347305Ssam for (p1 = &calltodo; (p2 = p1->c_next) != 0; p1 = p2) { 4357305Ssam if (p2->c_func == fun && p2->c_arg == arg) { 4368112Sroot if (p2->c_next && p2->c_time > 0) 4377305Ssam p2->c_next->c_time += p2->c_time; 4387305Ssam p1->c_next = p2->c_next; 4397305Ssam p2->c_next = callfree; 4407305Ssam callfree = p2; 4417305Ssam break; 4427305Ssam } 4437305Ssam } 4447305Ssam splx(s); 4457305Ssam } 4468112Sroot 4478124Sroot /* 4488124Sroot * Compute number of hz until specified time. 4498124Sroot * Used to compute third argument to timeout() from an 4508124Sroot * absolute time. 4518124Sroot */ 4528112Sroot hzto(tv) 4538112Sroot struct timeval *tv; 4548112Sroot { 4558124Sroot register long ticks; 4568124Sroot register long sec; 45726265Skarels int s = splhigh(); 4588112Sroot 4598124Sroot /* 4608124Sroot * If number of milliseconds will fit in 32 bit arithmetic, 4618124Sroot * then compute number of milliseconds to time and scale to 4628124Sroot * ticks. Otherwise just compute number of hz in time, rounding 4638124Sroot * times greater than representible to maximum value. 4648124Sroot * 4658124Sroot * Delta times less than 25 days can be computed ``exactly''. 4668124Sroot * Maximum value for any timeout in 10ms ticks is 250 days. 4678124Sroot */ 4688124Sroot sec = tv->tv_sec - time.tv_sec; 4698124Sroot if (sec <= 0x7fffffff / 1000 - 1000) 4708124Sroot ticks = ((tv->tv_sec - time.tv_sec) * 1000 + 4718124Sroot (tv->tv_usec - time.tv_usec) / 1000) / (tick / 1000); 4728124Sroot else if (sec <= 0x7fffffff / hz) 4738124Sroot ticks = sec * hz; 4748124Sroot else 4758124Sroot ticks = 0x7fffffff; 4768112Sroot splx(s); 4778112Sroot return (ticks); 4788112Sroot } 47912747Ssam 48043402Smckusick /* ARGSUSED */ 48143402Smckusick profil(p, uap, retval) 48243402Smckusick struct proc *p; 48343402Smckusick register struct args { 48412747Ssam short *bufbase; 48512747Ssam unsigned bufsize; 48612747Ssam unsigned pcoffset; 48712747Ssam unsigned pcscale; 48843402Smckusick } *uap; 48943402Smckusick int *retval; 49043402Smckusick { 49112747Ssam register struct uprof *upp = &u.u_prof; 49212747Ssam 49312747Ssam upp->pr_base = uap->bufbase; 49412747Ssam upp->pr_size = uap->bufsize; 49512747Ssam upp->pr_off = uap->pcoffset; 49612747Ssam upp->pr_scale = uap->pcscale; 49744404Skarels return (0); 49812747Ssam } 499