1*23366Smckusick /* 2*23366Smckusick * Copyright (c) 1982 Regents of the University of California. 3*23366Smckusick * All rights reserved. The Berkeley software License Agreement 4*23366Smckusick * specifies the terms and conditions for redistribution. 5*23366Smckusick * 6*23366Smckusick * @(#)kern_clock.c 6.13 (Berkeley) 06/08/85 7*23366Smckusick */ 89Sbill 99751Ssam #include "../machine/reg.h" 109751Ssam #include "../machine/psl.h" 119751Ssam 1217088Sbloom #include "param.h" 1317088Sbloom #include "systm.h" 1417088Sbloom #include "dk.h" 1517088Sbloom #include "callout.h" 1617088Sbloom #include "dir.h" 1717088Sbloom #include "user.h" 1817088Sbloom #include "kernel.h" 1917088Sbloom #include "proc.h" 2017088Sbloom #include "vm.h" 2117088Sbloom #include "text.h" 229Sbill 239751Ssam #ifdef vax 249751Ssam #include "../vax/mtpr.h" 259751Ssam #endif 269751Ssam 2710291Smckusick #ifdef GPROF 2817088Sbloom #include "gprof.h" 2910291Smckusick #endif 3010291Smckusick 318124Sroot /* 328124Sroot * Clock handling routines. 338124Sroot * 3411392Ssam * This code is written to operate with two timers which run 3511392Ssam * independently of each other. The main clock, running at hz 3611392Ssam * times per second, is used to do scheduling and timeout calculations. 3711392Ssam * The second timer does resource utilization estimation statistically 3811392Ssam * based on the state of the machine phz times a second. Both functions 3911392Ssam * can be performed by a single clock (ie hz == phz), however the 4011392Ssam * statistics will be much more prone to errors. Ideally a machine 4111392Ssam * would have separate clocks measuring time spent in user state, system 4211392Ssam * state, interrupt state, and idle state. These clocks would allow a non- 4311392Ssam * approximate measure of resource utilization. 448124Sroot */ 451559Sbill 468124Sroot /* 478124Sroot * TODO: 4812747Ssam * time of day, system/user timing, timeouts, profiling on separate timers 4912747Ssam * allocate more timeout table slots when table overflows. 508124Sroot */ 5117007Smckusick #ifdef notdef 5217007Smckusick /* 5317007Smckusick * Bump a timeval by a small number of usec's. 5417007Smckusick */ 5517007Smckusick bumptime(tp, usec) 5617007Smckusick register struct timeval *tp; 5717007Smckusick int usec; 5817007Smckusick { 599Sbill 6017007Smckusick tp->tv_usec += usec; 6117007Smckusick if (tp->tv_usec >= 1000000) { 6217007Smckusick tp->tv_usec -= 1000000; 6317007Smckusick tp->tv_sec++; 6417007Smckusick } 6517007Smckusick } 6617007Smckusick #endif notdef 6717007Smckusick #define BUMPTIME(t, usec) { \ 6817007Smckusick register struct timeval *tp = (t); \ 6917007Smckusick \ 7017007Smckusick tp->tv_usec += (usec); \ 7117007Smckusick if (tp->tv_usec >= 1000000) { \ 7217007Smckusick tp->tv_usec -= 1000000; \ 7317007Smckusick tp->tv_sec++; \ 7417007Smckusick } \ 7517007Smckusick } 7617007Smckusick 778124Sroot /* 7811392Ssam * The hz hardware interval timer. 7911392Ssam * We update the events relating to real time. 8011392Ssam * If this timer is also being used to gather statistics, 8111392Ssam * we run through the statistics gathering routine as well. 828124Sroot */ 832609Swnj /*ARGSUSED*/ 842442Swnj hardclock(pc, ps) 852450Swnj caddr_t pc; 868944Sroot int ps; 879Sbill { 882768Swnj register struct callout *p1; 898097Sroot register struct proc *p; 902442Swnj register int s, cpstate; 9116172Skarels int needsoft = 0; 9217356Skarels extern int adjtimedelta, tickadj; 939Sbill 948124Sroot /* 958124Sroot * Update real-time timeout queue. 968124Sroot * At front of queue are some number of events which are ``due''. 978124Sroot * The time to these is <= 0 and if negative represents the 988124Sroot * number of ticks which have passed since it was supposed to happen. 998124Sroot * The rest of the q elements (times > 0) are events yet to happen, 1008124Sroot * where the time for each is given as a delta from the previous. 1018124Sroot * Decrementing just the first of these serves to decrement the time 1028124Sroot * to all events. 1038124Sroot */ 10412747Ssam p1 = calltodo.c_next; 10512747Ssam while (p1) { 10612747Ssam if (--p1->c_time > 0) 10712747Ssam break; 10816172Skarels needsoft = 1; 10912747Ssam if (p1->c_time == 0) 11012747Ssam break; 11112747Ssam p1 = p1->c_next; 11212747Ssam } 113138Sbill 1148124Sroot /* 1158124Sroot * Charge the time out based on the mode the cpu is in. 1168124Sroot * Here again we fudge for the lack of proper interval timers 1178124Sroot * assuming that the current state has been around at least 1188124Sroot * one tick. 1198124Sroot */ 1209Sbill if (USERMODE(ps)) { 12116172Skarels if (u.u_prof.pr_scale) 12216172Skarels needsoft = 1; 1238124Sroot /* 1248124Sroot * CPU was in user state. Increment 1258124Sroot * user time counter, and process process-virtual time 1269604Ssam * interval timer. 1278124Sroot */ 12817007Smckusick BUMPTIME(&u.u_ru.ru_utime, tick); 1298097Sroot if (timerisset(&u.u_timer[ITIMER_VIRTUAL].it_value) && 1308097Sroot itimerdecr(&u.u_timer[ITIMER_VIRTUAL], tick) == 0) 1318097Sroot psignal(u.u_procp, SIGVTALRM); 1328028Sroot if (u.u_procp->p_nice > NZERO) 133305Sbill cpstate = CP_NICE; 134305Sbill else 135305Sbill cpstate = CP_USER; 1369Sbill } else { 1378124Sroot /* 1388124Sroot * CPU was in system state. If profiling kernel 1398124Sroot * increment a counter. If no process is running 1408124Sroot * then this is a system tick if we were running 1418124Sroot * at a non-zero IPL (in a driver). If a process is running, 1428124Sroot * then we charge it with system time even if we were 1438124Sroot * at a non-zero IPL, since the system often runs 1448124Sroot * this way during processing of system calls. 1458124Sroot * This is approximate, but the lack of true interval 1468124Sroot * timers makes doing anything else difficult. 1478124Sroot */ 148305Sbill cpstate = CP_SYS; 1497315Ssam if (noproc) { 1508944Sroot if (BASEPRI(ps)) 1517315Ssam cpstate = CP_IDLE; 1528028Sroot } else { 15317007Smckusick BUMPTIME(&u.u_ru.ru_stime, tick); 1548028Sroot } 1559Sbill } 1568097Sroot 1578124Sroot /* 15810388Ssam * If the cpu is currently scheduled to a process, then 15910388Ssam * charge it with resource utilization for a tick, updating 16010388Ssam * statistics which run in (user+system) virtual time, 16110388Ssam * such as the cpu time limit and profiling timers. 16210388Ssam * This assumes that the current process has been running 16310388Ssam * the entire last tick. 16410388Ssam */ 16518585Skarels if (noproc == 0) { 16610388Ssam if ((u.u_ru.ru_utime.tv_sec+u.u_ru.ru_stime.tv_sec+1) > 16710388Ssam u.u_rlimit[RLIMIT_CPU].rlim_cur) { 16810388Ssam psignal(u.u_procp, SIGXCPU); 16910388Ssam if (u.u_rlimit[RLIMIT_CPU].rlim_cur < 17010388Ssam u.u_rlimit[RLIMIT_CPU].rlim_max) 17110388Ssam u.u_rlimit[RLIMIT_CPU].rlim_cur += 5; 17210388Ssam } 17310388Ssam if (timerisset(&u.u_timer[ITIMER_PROF].it_value) && 17410388Ssam itimerdecr(&u.u_timer[ITIMER_PROF], tick) == 0) 17510388Ssam psignal(u.u_procp, SIGPROF); 17610388Ssam s = u.u_procp->p_rssize; 17710388Ssam u.u_ru.ru_idrss += s; u.u_ru.ru_isrss += 0; /* XXX */ 17810388Ssam if (u.u_procp->p_textp) { 17910388Ssam register int xrss = u.u_procp->p_textp->x_rssize; 18010388Ssam 18110388Ssam s += xrss; 18210388Ssam u.u_ru.ru_ixrss += xrss; 18310388Ssam } 18410388Ssam if (s > u.u_ru.ru_maxrss) 18510388Ssam u.u_ru.ru_maxrss = s; 18610388Ssam } 18710388Ssam 18810388Ssam /* 1898124Sroot * We adjust the priority of the current process. 1908124Sroot * The priority of a process gets worse as it accumulates 1918124Sroot * CPU time. The cpu usage estimator (p_cpu) is increased here 1928124Sroot * and the formula for computing priorities (in kern_synch.c) 1938124Sroot * will compute a different value each time the p_cpu increases 1948124Sroot * by 4. The cpu usage estimator ramps up quite quickly when 1958124Sroot * the process is running (linearly), and decays away exponentially, 1968124Sroot * at a rate which is proportionally slower when the system is 1978124Sroot * busy. The basic principal is that the system will 90% forget 1988124Sroot * that a process used a lot of CPU time in 5*loadav seconds. 1998124Sroot * This causes the system to favor processes which haven't run 2008124Sroot * much recently, and to round-robin among other processes. 2018124Sroot */ 2029Sbill if (!noproc) { 2038097Sroot p = u.u_procp; 2048097Sroot p->p_cpticks++; 2058097Sroot if (++p->p_cpu == 0) 2068097Sroot p->p_cpu--; 2078124Sroot if ((p->p_cpu&3) == 0) { 2088097Sroot (void) setpri(p); 2098097Sroot if (p->p_pri >= PUSER) 2108097Sroot p->p_pri = p->p_usrpri; 2119Sbill } 2129Sbill } 2138124Sroot 2148124Sroot /* 21511392Ssam * If the alternate clock has not made itself known then 21611392Ssam * we must gather the statistics. 21711392Ssam */ 21811392Ssam if (phz == 0) 21911392Ssam gatherstats(pc, ps); 22011392Ssam 22111392Ssam /* 2228124Sroot * Increment the time-of-day, and schedule 2238124Sroot * processing of the callouts at a very low cpu priority, 2248124Sroot * so we don't keep the relatively high clock interrupt 2258124Sroot * priority any longer than necessary. 2268124Sroot */ 22717356Skarels if (adjtimedelta == 0) 22817356Skarels BUMPTIME(&time, tick) 22917356Skarels else { 23017356Skarels register delta; 23117356Skarels 23217356Skarels if (adjtimedelta < 0) { 23317356Skarels delta = tick - tickadj; 23417356Skarels adjtimedelta += tickadj; 23517356Skarels } else { 23617356Skarels delta = tick + tickadj; 23717356Skarels adjtimedelta -= tickadj; 23817356Skarels } 23917356Skarels BUMPTIME(&time, delta); 24017356Skarels } 24116525Skarels if (needsoft) { 24216525Skarels if (BASEPRI(ps)) { 24316525Skarels /* 24416525Skarels * Save the overhead of a software interrupt; 24516525Skarels * it will happen as soon as we return, so do it now. 24616525Skarels */ 24716525Skarels (void) splsoftclock(); 24816525Skarels softclock(pc, ps); 24916525Skarels } else 25016525Skarels setsoftclock(); 25116525Skarels } 2522442Swnj } 2532442Swnj 25415191Ssam int dk_ndrive = DK_NDRIVE; 2558124Sroot /* 25611392Ssam * Gather statistics on resource utilization. 25711392Ssam * 25811392Ssam * We make a gross assumption: that the system has been in the 25911392Ssam * state it is in (user state, kernel state, interrupt state, 26011392Ssam * or idle state) for the entire last time interval, and 26111392Ssam * update statistics accordingly. 26211392Ssam */ 26312747Ssam /*ARGSUSED*/ 26411392Ssam gatherstats(pc, ps) 26511392Ssam caddr_t pc; 26611392Ssam int ps; 26711392Ssam { 26811392Ssam int cpstate, s; 26911392Ssam 27011392Ssam /* 27111392Ssam * Determine what state the cpu is in. 27211392Ssam */ 27311392Ssam if (USERMODE(ps)) { 27411392Ssam /* 27511392Ssam * CPU was in user state. 27611392Ssam */ 27711392Ssam if (u.u_procp->p_nice > NZERO) 27811392Ssam cpstate = CP_NICE; 27911392Ssam else 28011392Ssam cpstate = CP_USER; 28111392Ssam } else { 28211392Ssam /* 28311392Ssam * CPU was in system state. If profiling kernel 28411392Ssam * increment a counter. 28511392Ssam */ 28611392Ssam cpstate = CP_SYS; 28711392Ssam if (noproc && BASEPRI(ps)) 28811392Ssam cpstate = CP_IDLE; 28911392Ssam #ifdef GPROF 29011392Ssam s = pc - s_lowpc; 29111392Ssam if (profiling < 2 && s < s_textsize) 29211392Ssam kcount[s / (HISTFRACTION * sizeof (*kcount))]++; 29311392Ssam #endif 29411392Ssam } 29511392Ssam /* 29611392Ssam * We maintain statistics shown by user-level statistics 29711392Ssam * programs: the amount of time in each cpu state, and 29811392Ssam * the amount of time each of DK_NDRIVE ``drives'' is busy. 29911392Ssam */ 30011392Ssam cp_time[cpstate]++; 30111392Ssam for (s = 0; s < DK_NDRIVE; s++) 30211392Ssam if (dk_busy&(1<<s)) 30311392Ssam dk_time[s]++; 30411392Ssam } 30511392Ssam 30611392Ssam /* 3078124Sroot * Software priority level clock interrupt. 3088124Sroot * Run periodic events from timeout queue. 3098124Sroot */ 3102609Swnj /*ARGSUSED*/ 3112442Swnj softclock(pc, ps) 3122450Swnj caddr_t pc; 3138944Sroot int ps; 3142442Swnj { 3152442Swnj 3168097Sroot for (;;) { 3178124Sroot register struct callout *p1; 3188124Sroot register caddr_t arg; 3198124Sroot register int (*func)(); 3208124Sroot register int a, s; 3218124Sroot 3228097Sroot s = spl7(); 3238097Sroot if ((p1 = calltodo.c_next) == 0 || p1->c_time > 0) { 3248097Sroot splx(s); 3258097Sroot break; 3262442Swnj } 3278124Sroot arg = p1->c_arg; func = p1->c_func; a = p1->c_time; 3288097Sroot calltodo.c_next = p1->c_next; 3298097Sroot p1->c_next = callfree; 3308097Sroot callfree = p1; 3319157Ssam splx(s); 3328112Sroot (*func)(arg, a); 3332442Swnj } 3349604Ssam /* 33513127Ssam * If trapped user-mode and profiling, give it 33613127Ssam * a profiling tick. 3379604Ssam */ 33813127Ssam if (USERMODE(ps)) { 33913127Ssam register struct proc *p = u.u_procp; 34013127Ssam 34113127Ssam if (u.u_prof.pr_scale) { 34213127Ssam p->p_flag |= SOWEUPC; 34313127Ssam aston(); 34413127Ssam } 34513127Ssam /* 34613127Ssam * Check to see if process has accumulated 34713127Ssam * more than 10 minutes of user time. If so 34813127Ssam * reduce priority to give others a chance. 34913127Ssam */ 35013127Ssam if (p->p_uid && p->p_nice == NZERO && 35113127Ssam u.u_ru.ru_utime.tv_sec > 10 * 60) { 35213127Ssam p->p_nice = NZERO+4; 35313127Ssam (void) setpri(p); 35413127Ssam p->p_pri = p->p_usrpri; 35513127Ssam } 3569604Ssam } 3579Sbill } 3589Sbill 3599Sbill /* 36012747Ssam * Arrange that (*fun)(arg) is called in t/hz seconds. 36112747Ssam */ 36212747Ssam timeout(fun, arg, t) 3632450Swnj int (*fun)(); 3642450Swnj caddr_t arg; 36512747Ssam register int t; 3669Sbill { 3673542Swnj register struct callout *p1, *p2, *pnew; 36812747Ssam register int s = spl7(); 3699Sbill 37018282Smckusick if (t <= 0) 37112747Ssam t = 1; 3723542Swnj pnew = callfree; 3733542Swnj if (pnew == NULL) 3743542Swnj panic("timeout table overflow"); 3753542Swnj callfree = pnew->c_next; 3763542Swnj pnew->c_arg = arg; 3773542Swnj pnew->c_func = fun; 3783542Swnj for (p1 = &calltodo; (p2 = p1->c_next) && p2->c_time < t; p1 = p2) 3799742Ssam if (p2->c_time > 0) 3809742Ssam t -= p2->c_time; 3813542Swnj p1->c_next = pnew; 3823542Swnj pnew->c_next = p2; 3833542Swnj pnew->c_time = t; 3843542Swnj if (p2) 3853542Swnj p2->c_time -= t; 3869Sbill splx(s); 3879Sbill } 3887305Ssam 3897305Ssam /* 3907305Ssam * untimeout is called to remove a function timeout call 3917305Ssam * from the callout structure. 3927305Ssam */ 3938097Sroot untimeout(fun, arg) 3947305Ssam int (*fun)(); 3957305Ssam caddr_t arg; 3967305Ssam { 3977305Ssam register struct callout *p1, *p2; 3987305Ssam register int s; 3997305Ssam 4007305Ssam s = spl7(); 4017305Ssam for (p1 = &calltodo; (p2 = p1->c_next) != 0; p1 = p2) { 4027305Ssam if (p2->c_func == fun && p2->c_arg == arg) { 4038112Sroot if (p2->c_next && p2->c_time > 0) 4047305Ssam p2->c_next->c_time += p2->c_time; 4057305Ssam p1->c_next = p2->c_next; 4067305Ssam p2->c_next = callfree; 4077305Ssam callfree = p2; 4087305Ssam break; 4097305Ssam } 4107305Ssam } 4117305Ssam splx(s); 4127305Ssam } 4138112Sroot 4148124Sroot /* 4158124Sroot * Compute number of hz until specified time. 4168124Sroot * Used to compute third argument to timeout() from an 4178124Sroot * absolute time. 4188124Sroot */ 4198112Sroot hzto(tv) 4208112Sroot struct timeval *tv; 4218112Sroot { 4228124Sroot register long ticks; 4238124Sroot register long sec; 4248112Sroot int s = spl7(); 4258112Sroot 4268124Sroot /* 4278124Sroot * If number of milliseconds will fit in 32 bit arithmetic, 4288124Sroot * then compute number of milliseconds to time and scale to 4298124Sroot * ticks. Otherwise just compute number of hz in time, rounding 4308124Sroot * times greater than representible to maximum value. 4318124Sroot * 4328124Sroot * Delta times less than 25 days can be computed ``exactly''. 4338124Sroot * Maximum value for any timeout in 10ms ticks is 250 days. 4348124Sroot */ 4358124Sroot sec = tv->tv_sec - time.tv_sec; 4368124Sroot if (sec <= 0x7fffffff / 1000 - 1000) 4378124Sroot ticks = ((tv->tv_sec - time.tv_sec) * 1000 + 4388124Sroot (tv->tv_usec - time.tv_usec) / 1000) / (tick / 1000); 4398124Sroot else if (sec <= 0x7fffffff / hz) 4408124Sroot ticks = sec * hz; 4418124Sroot else 4428124Sroot ticks = 0x7fffffff; 4438112Sroot splx(s); 4448112Sroot return (ticks); 4458112Sroot } 44612747Ssam 44712747Ssam profil() 44812747Ssam { 44912747Ssam register struct a { 45012747Ssam short *bufbase; 45112747Ssam unsigned bufsize; 45212747Ssam unsigned pcoffset; 45312747Ssam unsigned pcscale; 45412747Ssam } *uap = (struct a *)u.u_ap; 45512747Ssam register struct uprof *upp = &u.u_prof; 45612747Ssam 45712747Ssam upp->pr_base = uap->bufbase; 45812747Ssam upp->pr_size = uap->bufsize; 45912747Ssam upp->pr_off = uap->pcoffset; 46012747Ssam upp->pr_scale = uap->pcscale; 46112747Ssam } 46212747Ssam 46312747Ssam opause() 46412747Ssam { 46512747Ssam 46612747Ssam for (;;) 46712747Ssam sleep((caddr_t)&u, PSLEP); 46812747Ssam } 469