1*17356Skarels /* kern_clock.c 6.10 84/11/14 */ 29Sbill 39751Ssam #include "../machine/reg.h" 49751Ssam #include "../machine/psl.h" 59751Ssam 617088Sbloom #include "param.h" 717088Sbloom #include "systm.h" 817088Sbloom #include "dk.h" 917088Sbloom #include "callout.h" 1017088Sbloom #include "dir.h" 1117088Sbloom #include "user.h" 1217088Sbloom #include "kernel.h" 1317088Sbloom #include "proc.h" 1417088Sbloom #include "vm.h" 1517088Sbloom #include "text.h" 169Sbill 179751Ssam #ifdef vax 189751Ssam #include "../vax/mtpr.h" 199751Ssam #endif 209751Ssam 2110291Smckusick #ifdef GPROF 2217088Sbloom #include "gprof.h" 2310291Smckusick #endif 2410291Smckusick 258124Sroot /* 268124Sroot * Clock handling routines. 278124Sroot * 2811392Ssam * This code is written to operate with two timers which run 2911392Ssam * independently of each other. The main clock, running at hz 3011392Ssam * times per second, is used to do scheduling and timeout calculations. 3111392Ssam * The second timer does resource utilization estimation statistically 3211392Ssam * based on the state of the machine phz times a second. Both functions 3311392Ssam * can be performed by a single clock (ie hz == phz), however the 3411392Ssam * statistics will be much more prone to errors. Ideally a machine 3511392Ssam * would have separate clocks measuring time spent in user state, system 3611392Ssam * state, interrupt state, and idle state. These clocks would allow a non- 3711392Ssam * approximate measure of resource utilization. 388124Sroot */ 391559Sbill 408124Sroot /* 418124Sroot * TODO: 4212747Ssam * time of day, system/user timing, timeouts, profiling on separate timers 4312747Ssam * allocate more timeout table slots when table overflows. 448124Sroot */ 4517007Smckusick #ifdef notdef 4617007Smckusick /* 4717007Smckusick * Bump a timeval by a small number of usec's. 4817007Smckusick */ 4917007Smckusick bumptime(tp, usec) 5017007Smckusick register struct timeval *tp; 5117007Smckusick int usec; 5217007Smckusick { 539Sbill 5417007Smckusick tp->tv_usec += usec; 5517007Smckusick if (tp->tv_usec >= 1000000) { 5617007Smckusick tp->tv_usec -= 1000000; 5717007Smckusick tp->tv_sec++; 5817007Smckusick } 5917007Smckusick } 6017007Smckusick #endif notdef 6117007Smckusick #define BUMPTIME(t, usec) { \ 6217007Smckusick register struct timeval *tp = (t); \ 6317007Smckusick \ 6417007Smckusick tp->tv_usec += (usec); \ 6517007Smckusick if (tp->tv_usec >= 1000000) { \ 6617007Smckusick tp->tv_usec -= 1000000; \ 6717007Smckusick tp->tv_sec++; \ 6817007Smckusick } \ 6917007Smckusick } 7017007Smckusick 718124Sroot /* 7211392Ssam * The hz hardware interval timer. 7311392Ssam * We update the events relating to real time. 7411392Ssam * If this timer is also being used to gather statistics, 7511392Ssam * we run through the statistics gathering routine as well. 768124Sroot */ 772609Swnj /*ARGSUSED*/ 782442Swnj hardclock(pc, ps) 792450Swnj caddr_t pc; 808944Sroot int ps; 819Sbill { 822768Swnj register struct callout *p1; 838097Sroot register struct proc *p; 842442Swnj register int s, cpstate; 8516172Skarels int needsoft = 0; 86*17356Skarels extern int adjtimedelta, tickadj; 879Sbill 888124Sroot /* 898124Sroot * Update real-time timeout queue. 908124Sroot * At front of queue are some number of events which are ``due''. 918124Sroot * The time to these is <= 0 and if negative represents the 928124Sroot * number of ticks which have passed since it was supposed to happen. 938124Sroot * The rest of the q elements (times > 0) are events yet to happen, 948124Sroot * where the time for each is given as a delta from the previous. 958124Sroot * Decrementing just the first of these serves to decrement the time 968124Sroot * to all events. 978124Sroot */ 9812747Ssam p1 = calltodo.c_next; 9912747Ssam while (p1) { 10012747Ssam if (--p1->c_time > 0) 10112747Ssam break; 10216172Skarels needsoft = 1; 10312747Ssam if (p1->c_time == 0) 10412747Ssam break; 10512747Ssam p1 = p1->c_next; 10612747Ssam } 107138Sbill 1088124Sroot /* 1098124Sroot * Charge the time out based on the mode the cpu is in. 1108124Sroot * Here again we fudge for the lack of proper interval timers 1118124Sroot * assuming that the current state has been around at least 1128124Sroot * one tick. 1138124Sroot */ 1149Sbill if (USERMODE(ps)) { 11516172Skarels if (u.u_prof.pr_scale) 11616172Skarels needsoft = 1; 1178124Sroot /* 1188124Sroot * CPU was in user state. Increment 1198124Sroot * user time counter, and process process-virtual time 1209604Ssam * interval timer. 1218124Sroot */ 12217007Smckusick BUMPTIME(&u.u_ru.ru_utime, tick); 1238097Sroot if (timerisset(&u.u_timer[ITIMER_VIRTUAL].it_value) && 1248097Sroot itimerdecr(&u.u_timer[ITIMER_VIRTUAL], tick) == 0) 1258097Sroot psignal(u.u_procp, SIGVTALRM); 1268028Sroot if (u.u_procp->p_nice > NZERO) 127305Sbill cpstate = CP_NICE; 128305Sbill else 129305Sbill cpstate = CP_USER; 1309Sbill } else { 1318124Sroot /* 1328124Sroot * CPU was in system state. If profiling kernel 1338124Sroot * increment a counter. If no process is running 1348124Sroot * then this is a system tick if we were running 1358124Sroot * at a non-zero IPL (in a driver). If a process is running, 1368124Sroot * then we charge it with system time even if we were 1378124Sroot * at a non-zero IPL, since the system often runs 1388124Sroot * this way during processing of system calls. 1398124Sroot * This is approximate, but the lack of true interval 1408124Sroot * timers makes doing anything else difficult. 1418124Sroot */ 142305Sbill cpstate = CP_SYS; 1437315Ssam if (noproc) { 1448944Sroot if (BASEPRI(ps)) 1457315Ssam cpstate = CP_IDLE; 1468028Sroot } else { 14717007Smckusick BUMPTIME(&u.u_ru.ru_stime, tick); 1488028Sroot } 1499Sbill } 1508097Sroot 1518124Sroot /* 15210388Ssam * If the cpu is currently scheduled to a process, then 15310388Ssam * charge it with resource utilization for a tick, updating 15410388Ssam * statistics which run in (user+system) virtual time, 15510388Ssam * such as the cpu time limit and profiling timers. 15610388Ssam * This assumes that the current process has been running 15710388Ssam * the entire last tick. 15810388Ssam */ 15910388Ssam if (noproc == 0 && cpstate != CP_IDLE) { 16010388Ssam if ((u.u_ru.ru_utime.tv_sec+u.u_ru.ru_stime.tv_sec+1) > 16110388Ssam u.u_rlimit[RLIMIT_CPU].rlim_cur) { 16210388Ssam psignal(u.u_procp, SIGXCPU); 16310388Ssam if (u.u_rlimit[RLIMIT_CPU].rlim_cur < 16410388Ssam u.u_rlimit[RLIMIT_CPU].rlim_max) 16510388Ssam u.u_rlimit[RLIMIT_CPU].rlim_cur += 5; 16610388Ssam } 16710388Ssam if (timerisset(&u.u_timer[ITIMER_PROF].it_value) && 16810388Ssam itimerdecr(&u.u_timer[ITIMER_PROF], tick) == 0) 16910388Ssam psignal(u.u_procp, SIGPROF); 17010388Ssam s = u.u_procp->p_rssize; 17110388Ssam u.u_ru.ru_idrss += s; u.u_ru.ru_isrss += 0; /* XXX */ 17210388Ssam if (u.u_procp->p_textp) { 17310388Ssam register int xrss = u.u_procp->p_textp->x_rssize; 17410388Ssam 17510388Ssam s += xrss; 17610388Ssam u.u_ru.ru_ixrss += xrss; 17710388Ssam } 17810388Ssam if (s > u.u_ru.ru_maxrss) 17910388Ssam u.u_ru.ru_maxrss = s; 18010388Ssam } 18110388Ssam 18210388Ssam /* 1838124Sroot * We adjust the priority of the current process. 1848124Sroot * The priority of a process gets worse as it accumulates 1858124Sroot * CPU time. The cpu usage estimator (p_cpu) is increased here 1868124Sroot * and the formula for computing priorities (in kern_synch.c) 1878124Sroot * will compute a different value each time the p_cpu increases 1888124Sroot * by 4. The cpu usage estimator ramps up quite quickly when 1898124Sroot * the process is running (linearly), and decays away exponentially, 1908124Sroot * at a rate which is proportionally slower when the system is 1918124Sroot * busy. The basic principal is that the system will 90% forget 1928124Sroot * that a process used a lot of CPU time in 5*loadav seconds. 1938124Sroot * This causes the system to favor processes which haven't run 1948124Sroot * much recently, and to round-robin among other processes. 1958124Sroot */ 1969Sbill if (!noproc) { 1978097Sroot p = u.u_procp; 1988097Sroot p->p_cpticks++; 1998097Sroot if (++p->p_cpu == 0) 2008097Sroot p->p_cpu--; 2018124Sroot if ((p->p_cpu&3) == 0) { 2028097Sroot (void) setpri(p); 2038097Sroot if (p->p_pri >= PUSER) 2048097Sroot p->p_pri = p->p_usrpri; 2059Sbill } 2069Sbill } 2078124Sroot 2088124Sroot /* 20911392Ssam * If the alternate clock has not made itself known then 21011392Ssam * we must gather the statistics. 21111392Ssam */ 21211392Ssam if (phz == 0) 21311392Ssam gatherstats(pc, ps); 21411392Ssam 21511392Ssam /* 2168124Sroot * Increment the time-of-day, and schedule 2178124Sroot * processing of the callouts at a very low cpu priority, 2188124Sroot * so we don't keep the relatively high clock interrupt 2198124Sroot * priority any longer than necessary. 2208124Sroot */ 221*17356Skarels if (adjtimedelta == 0) 222*17356Skarels BUMPTIME(&time, tick) 223*17356Skarels else { 224*17356Skarels register delta; 225*17356Skarels 226*17356Skarels if (adjtimedelta < 0) { 227*17356Skarels delta = tick - tickadj; 228*17356Skarels adjtimedelta += tickadj; 229*17356Skarels } else { 230*17356Skarels delta = tick + tickadj; 231*17356Skarels adjtimedelta -= tickadj; 232*17356Skarels } 233*17356Skarels BUMPTIME(&time, delta); 234*17356Skarels } 23516525Skarels if (needsoft) { 23616525Skarels if (BASEPRI(ps)) { 23716525Skarels /* 23816525Skarels * Save the overhead of a software interrupt; 23916525Skarels * it will happen as soon as we return, so do it now. 24016525Skarels */ 24116525Skarels (void) splsoftclock(); 24216525Skarels softclock(pc, ps); 24316525Skarels } else 24416525Skarels setsoftclock(); 24516525Skarels } 2462442Swnj } 2472442Swnj 24815191Ssam int dk_ndrive = DK_NDRIVE; 2498124Sroot /* 25011392Ssam * Gather statistics on resource utilization. 25111392Ssam * 25211392Ssam * We make a gross assumption: that the system has been in the 25311392Ssam * state it is in (user state, kernel state, interrupt state, 25411392Ssam * or idle state) for the entire last time interval, and 25511392Ssam * update statistics accordingly. 25611392Ssam */ 25712747Ssam /*ARGSUSED*/ 25811392Ssam gatherstats(pc, ps) 25911392Ssam caddr_t pc; 26011392Ssam int ps; 26111392Ssam { 26211392Ssam int cpstate, s; 26311392Ssam 26411392Ssam /* 26511392Ssam * Determine what state the cpu is in. 26611392Ssam */ 26711392Ssam if (USERMODE(ps)) { 26811392Ssam /* 26911392Ssam * CPU was in user state. 27011392Ssam */ 27111392Ssam if (u.u_procp->p_nice > NZERO) 27211392Ssam cpstate = CP_NICE; 27311392Ssam else 27411392Ssam cpstate = CP_USER; 27511392Ssam } else { 27611392Ssam /* 27711392Ssam * CPU was in system state. If profiling kernel 27811392Ssam * increment a counter. 27911392Ssam */ 28011392Ssam cpstate = CP_SYS; 28111392Ssam if (noproc && BASEPRI(ps)) 28211392Ssam cpstate = CP_IDLE; 28311392Ssam #ifdef GPROF 28411392Ssam s = pc - s_lowpc; 28511392Ssam if (profiling < 2 && s < s_textsize) 28611392Ssam kcount[s / (HISTFRACTION * sizeof (*kcount))]++; 28711392Ssam #endif 28811392Ssam } 28911392Ssam /* 29011392Ssam * We maintain statistics shown by user-level statistics 29111392Ssam * programs: the amount of time in each cpu state, and 29211392Ssam * the amount of time each of DK_NDRIVE ``drives'' is busy. 29311392Ssam */ 29411392Ssam cp_time[cpstate]++; 29511392Ssam for (s = 0; s < DK_NDRIVE; s++) 29611392Ssam if (dk_busy&(1<<s)) 29711392Ssam dk_time[s]++; 29811392Ssam } 29911392Ssam 30011392Ssam /* 3018124Sroot * Software priority level clock interrupt. 3028124Sroot * Run periodic events from timeout queue. 3038124Sroot */ 3042609Swnj /*ARGSUSED*/ 3052442Swnj softclock(pc, ps) 3062450Swnj caddr_t pc; 3078944Sroot int ps; 3082442Swnj { 3092442Swnj 3108097Sroot for (;;) { 3118124Sroot register struct callout *p1; 3128124Sroot register caddr_t arg; 3138124Sroot register int (*func)(); 3148124Sroot register int a, s; 3158124Sroot 3168097Sroot s = spl7(); 3178097Sroot if ((p1 = calltodo.c_next) == 0 || p1->c_time > 0) { 3188097Sroot splx(s); 3198097Sroot break; 3202442Swnj } 3218124Sroot arg = p1->c_arg; func = p1->c_func; a = p1->c_time; 3228097Sroot calltodo.c_next = p1->c_next; 3238097Sroot p1->c_next = callfree; 3248097Sroot callfree = p1; 3259157Ssam splx(s); 3268112Sroot (*func)(arg, a); 3272442Swnj } 3289604Ssam /* 32913127Ssam * If trapped user-mode and profiling, give it 33013127Ssam * a profiling tick. 3319604Ssam */ 33213127Ssam if (USERMODE(ps)) { 33313127Ssam register struct proc *p = u.u_procp; 33413127Ssam 33513127Ssam if (u.u_prof.pr_scale) { 33613127Ssam p->p_flag |= SOWEUPC; 33713127Ssam aston(); 33813127Ssam } 33913127Ssam /* 34013127Ssam * Check to see if process has accumulated 34113127Ssam * more than 10 minutes of user time. If so 34213127Ssam * reduce priority to give others a chance. 34313127Ssam */ 34413127Ssam if (p->p_uid && p->p_nice == NZERO && 34513127Ssam u.u_ru.ru_utime.tv_sec > 10 * 60) { 34613127Ssam p->p_nice = NZERO+4; 34713127Ssam (void) setpri(p); 34813127Ssam p->p_pri = p->p_usrpri; 34913127Ssam } 3509604Ssam } 3519Sbill } 3529Sbill 3539Sbill /* 35412747Ssam * Arrange that (*fun)(arg) is called in t/hz seconds. 35512747Ssam */ 35612747Ssam timeout(fun, arg, t) 3572450Swnj int (*fun)(); 3582450Swnj caddr_t arg; 35912747Ssam register int t; 3609Sbill { 3613542Swnj register struct callout *p1, *p2, *pnew; 36212747Ssam register int s = spl7(); 3639Sbill 36412747Ssam if (t == 0) 36512747Ssam t = 1; 3663542Swnj pnew = callfree; 3673542Swnj if (pnew == NULL) 3683542Swnj panic("timeout table overflow"); 3693542Swnj callfree = pnew->c_next; 3703542Swnj pnew->c_arg = arg; 3713542Swnj pnew->c_func = fun; 3723542Swnj for (p1 = &calltodo; (p2 = p1->c_next) && p2->c_time < t; p1 = p2) 3739742Ssam if (p2->c_time > 0) 3749742Ssam t -= p2->c_time; 3753542Swnj p1->c_next = pnew; 3763542Swnj pnew->c_next = p2; 3773542Swnj pnew->c_time = t; 3783542Swnj if (p2) 3793542Swnj p2->c_time -= t; 3809Sbill splx(s); 3819Sbill } 3827305Ssam 3837305Ssam /* 3847305Ssam * untimeout is called to remove a function timeout call 3857305Ssam * from the callout structure. 3867305Ssam */ 3878097Sroot untimeout(fun, arg) 3887305Ssam int (*fun)(); 3897305Ssam caddr_t arg; 3907305Ssam { 3917305Ssam register struct callout *p1, *p2; 3927305Ssam register int s; 3937305Ssam 3947305Ssam s = spl7(); 3957305Ssam for (p1 = &calltodo; (p2 = p1->c_next) != 0; p1 = p2) { 3967305Ssam if (p2->c_func == fun && p2->c_arg == arg) { 3978112Sroot if (p2->c_next && p2->c_time > 0) 3987305Ssam p2->c_next->c_time += p2->c_time; 3997305Ssam p1->c_next = p2->c_next; 4007305Ssam p2->c_next = callfree; 4017305Ssam callfree = p2; 4027305Ssam break; 4037305Ssam } 4047305Ssam } 4057305Ssam splx(s); 4067305Ssam } 4078112Sroot 4088124Sroot /* 4098124Sroot * Compute number of hz until specified time. 4108124Sroot * Used to compute third argument to timeout() from an 4118124Sroot * absolute time. 4128124Sroot */ 4138112Sroot hzto(tv) 4148112Sroot struct timeval *tv; 4158112Sroot { 4168124Sroot register long ticks; 4178124Sroot register long sec; 4188112Sroot int s = spl7(); 4198112Sroot 4208124Sroot /* 4218124Sroot * If number of milliseconds will fit in 32 bit arithmetic, 4228124Sroot * then compute number of milliseconds to time and scale to 4238124Sroot * ticks. Otherwise just compute number of hz in time, rounding 4248124Sroot * times greater than representible to maximum value. 4258124Sroot * 4268124Sroot * Delta times less than 25 days can be computed ``exactly''. 4278124Sroot * Maximum value for any timeout in 10ms ticks is 250 days. 4288124Sroot */ 4298124Sroot sec = tv->tv_sec - time.tv_sec; 4308124Sroot if (sec <= 0x7fffffff / 1000 - 1000) 4318124Sroot ticks = ((tv->tv_sec - time.tv_sec) * 1000 + 4328124Sroot (tv->tv_usec - time.tv_usec) / 1000) / (tick / 1000); 4338124Sroot else if (sec <= 0x7fffffff / hz) 4348124Sroot ticks = sec * hz; 4358124Sroot else 4368124Sroot ticks = 0x7fffffff; 4378112Sroot splx(s); 4388112Sroot return (ticks); 4398112Sroot } 44012747Ssam 44112747Ssam profil() 44212747Ssam { 44312747Ssam register struct a { 44412747Ssam short *bufbase; 44512747Ssam unsigned bufsize; 44612747Ssam unsigned pcoffset; 44712747Ssam unsigned pcscale; 44812747Ssam } *uap = (struct a *)u.u_ap; 44912747Ssam register struct uprof *upp = &u.u_prof; 45012747Ssam 45112747Ssam upp->pr_base = uap->bufbase; 45212747Ssam upp->pr_size = uap->bufsize; 45312747Ssam upp->pr_off = uap->pcoffset; 45412747Ssam upp->pr_scale = uap->pcscale; 45512747Ssam } 45612747Ssam 45712747Ssam opause() 45812747Ssam { 45912747Ssam 46012747Ssam for (;;) 46112747Ssam sleep((caddr_t)&u, PSLEP); 46212747Ssam } 463