1*49594Sbostic /*- 2*49594Sbostic * Copyright (c) 1982, 1986, 1991 The Regents of the University of California. 3*49594Sbostic * All rights reserved. 423366Smckusick * 5*49594Sbostic * %sccs.include.redist.c% 6*49594Sbostic * 7*49594Sbostic * @(#)kern_clock.c 7.16 (Berkeley) 05/09/91 823366Smckusick */ 99Sbill 1017088Sbloom #include "param.h" 1117088Sbloom #include "systm.h" 1229946Skarels #include "dkstat.h" 1317088Sbloom #include "callout.h" 1417088Sbloom #include "kernel.h" 1517088Sbloom #include "proc.h" 1648979Skarels #include "resourcevar.h" 179Sbill 1847546Skarels #include "machine/cpu.h" 1935406Skarels 2010291Smckusick #ifdef GPROF 2117088Sbloom #include "gprof.h" 2210291Smckusick #endif 2310291Smckusick 248124Sroot /* 258124Sroot * Clock handling routines. 268124Sroot * 2711392Ssam * This code is written to operate with two timers which run 2811392Ssam * independently of each other. The main clock, running at hz 2911392Ssam * times per second, is used to do scheduling and timeout calculations. 3011392Ssam * The second timer does resource utilization estimation statistically 3111392Ssam * based on the state of the machine phz times a second. Both functions 3211392Ssam * can be performed by a single clock (ie hz == phz), however the 3311392Ssam * statistics will be much more prone to errors. Ideally a machine 3411392Ssam * would have separate clocks measuring time spent in user state, system 3511392Ssam * state, interrupt state, and idle state. These clocks would allow a non- 3611392Ssam * approximate measure of resource utilization. 378124Sroot */ 381559Sbill 398124Sroot /* 408124Sroot * TODO: 4112747Ssam * time of day, system/user timing, timeouts, profiling on separate timers 4212747Ssam * allocate more timeout table slots when table overflows. 438124Sroot */ 4426265Skarels 4517007Smckusick /* 4617007Smckusick * Bump a timeval by a small number of usec's. 4717007Smckusick */ 4817007Smckusick #define BUMPTIME(t, usec) { \ 4917007Smckusick register struct timeval *tp = (t); \ 5017007Smckusick \ 5117007Smckusick tp->tv_usec += (usec); \ 5217007Smckusick if (tp->tv_usec >= 1000000) { \ 5317007Smckusick tp->tv_usec -= 1000000; \ 5417007Smckusick tp->tv_sec++; \ 5517007Smckusick } \ 5617007Smckusick } 5717007Smckusick 588124Sroot /* 5911392Ssam * The hz hardware interval timer. 6011392Ssam * We update the events relating to real time. 6111392Ssam * If this timer is also being used to gather statistics, 6211392Ssam * we run through the statistics gathering routine as well. 638124Sroot */ 6444774Swilliam hardclock(frame) 6547546Skarels clockframe frame; 669Sbill { 672768Swnj register struct callout *p1; 6847546Skarels register struct proc *p = curproc; 6948979Skarels register struct pstats *pstats; 7024524Sbloom register int s; 7116172Skarels int needsoft = 0; 7228947Skarels extern int tickdelta; 7328947Skarels extern long timedelta; 749Sbill 758124Sroot /* 768124Sroot * Update real-time timeout queue. 778124Sroot * At front of queue are some number of events which are ``due''. 788124Sroot * The time to these is <= 0 and if negative represents the 798124Sroot * number of ticks which have passed since it was supposed to happen. 808124Sroot * The rest of the q elements (times > 0) are events yet to happen, 818124Sroot * where the time for each is given as a delta from the previous. 828124Sroot * Decrementing just the first of these serves to decrement the time 838124Sroot * to all events. 848124Sroot */ 8512747Ssam p1 = calltodo.c_next; 8612747Ssam while (p1) { 8712747Ssam if (--p1->c_time > 0) 8812747Ssam break; 8916172Skarels needsoft = 1; 9012747Ssam if (p1->c_time == 0) 9112747Ssam break; 9212747Ssam p1 = p1->c_next; 9312747Ssam } 94138Sbill 958124Sroot /* 9648979Skarels * Curproc (now in p) is null if no process is running. 9748979Skarels * We assume that curproc is set in user mode! 9848979Skarels */ 9948979Skarels if (p) 10048979Skarels pstats = p->p_stats; 10148979Skarels /* 1028124Sroot * Charge the time out based on the mode the cpu is in. 1038124Sroot * Here again we fudge for the lack of proper interval timers 1048124Sroot * assuming that the current state has been around at least 1058124Sroot * one tick. 1068124Sroot */ 10747546Skarels if (CLKF_USERMODE(&frame)) { 10847546Skarels if (pstats->p_prof.pr_scale) 10916172Skarels needsoft = 1; 1108124Sroot /* 1118124Sroot * CPU was in user state. Increment 1128124Sroot * user time counter, and process process-virtual time 1139604Ssam * interval timer. 1148124Sroot */ 11540674Smarc BUMPTIME(&p->p_utime, tick); 11647546Skarels if (timerisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value) && 11747546Skarels itimerdecr(&pstats->p_timer[ITIMER_VIRTUAL], tick) == 0) 11840674Smarc psignal(p, SIGVTALRM); 1199Sbill } else { 1208124Sroot /* 12124524Sbloom * CPU was in system state. 1228124Sroot */ 12348979Skarels if (p) 12440674Smarc BUMPTIME(&p->p_stime, tick); 1259Sbill } 1268097Sroot 1278124Sroot /* 12810388Ssam * If the cpu is currently scheduled to a process, then 12910388Ssam * charge it with resource utilization for a tick, updating 13010388Ssam * statistics which run in (user+system) virtual time, 13110388Ssam * such as the cpu time limit and profiling timers. 13210388Ssam * This assumes that the current process has been running 13310388Ssam * the entire last tick. 13410388Ssam */ 13548979Skarels if (p) { 13640674Smarc if ((p->p_utime.tv_sec+p->p_stime.tv_sec+1) > 13747546Skarels p->p_rlimit[RLIMIT_CPU].rlim_cur) { 13840674Smarc psignal(p, SIGXCPU); 13947546Skarels if (p->p_rlimit[RLIMIT_CPU].rlim_cur < 14047546Skarels p->p_rlimit[RLIMIT_CPU].rlim_max) 14147546Skarels p->p_rlimit[RLIMIT_CPU].rlim_cur += 5; 14210388Ssam } 14347546Skarels if (timerisset(&pstats->p_timer[ITIMER_PROF].it_value) && 14447546Skarels itimerdecr(&pstats->p_timer[ITIMER_PROF], tick) == 0) 14540674Smarc psignal(p, SIGPROF); 14610388Ssam 14747546Skarels /* 14847546Skarels * We adjust the priority of the current process. 14947546Skarels * The priority of a process gets worse as it accumulates 15047546Skarels * CPU time. The cpu usage estimator (p_cpu) is increased here 15147546Skarels * and the formula for computing priorities (in kern_synch.c) 15247546Skarels * will compute a different value each time the p_cpu increases 15347546Skarels * by 4. The cpu usage estimator ramps up quite quickly when 15447546Skarels * the process is running (linearly), and decays away 15547546Skarels * exponentially, * at a rate which is proportionally slower 15647546Skarels * when the system is busy. The basic principal is that the 15747546Skarels * system will 90% forget that a process used a lot of CPU 15847546Skarels * time in 5*loadav seconds. This causes the system to favor 15947546Skarels * processes which haven't run much recently, and to 16047546Skarels * round-robin among other processes. 16147546Skarels */ 1628097Sroot p->p_cpticks++; 1638097Sroot if (++p->p_cpu == 0) 1648097Sroot p->p_cpu--; 1658124Sroot if ((p->p_cpu&3) == 0) { 16647546Skarels setpri(p); 1678097Sroot if (p->p_pri >= PUSER) 1688097Sroot p->p_pri = p->p_usrpri; 1699Sbill } 1709Sbill } 1718124Sroot 1728124Sroot /* 17311392Ssam * If the alternate clock has not made itself known then 17411392Ssam * we must gather the statistics. 17511392Ssam */ 17611392Ssam if (phz == 0) 17747546Skarels gatherstats(&frame); 17811392Ssam 17911392Ssam /* 1808124Sroot * Increment the time-of-day, and schedule 1818124Sroot * processing of the callouts at a very low cpu priority, 1828124Sroot * so we don't keep the relatively high clock interrupt 1838124Sroot * priority any longer than necessary. 1848124Sroot */ 18528828Skarels if (timedelta == 0) 18617356Skarels BUMPTIME(&time, tick) 18717356Skarels else { 18817356Skarels register delta; 18917356Skarels 19028828Skarels if (timedelta < 0) { 19128828Skarels delta = tick - tickdelta; 19228828Skarels timedelta += tickdelta; 19317356Skarels } else { 19428828Skarels delta = tick + tickdelta; 19528828Skarels timedelta -= tickdelta; 19617356Skarels } 19717356Skarels BUMPTIME(&time, delta); 19817356Skarels } 19916525Skarels if (needsoft) { 20047546Skarels if (CLKF_BASEPRI(&frame)) { 20116525Skarels /* 20216525Skarels * Save the overhead of a software interrupt; 20316525Skarels * it will happen as soon as we return, so do it now. 20416525Skarels */ 20516525Skarels (void) splsoftclock(); 20644774Swilliam softclock(frame); 20716525Skarels } else 20816525Skarels setsoftclock(); 20916525Skarels } 2102442Swnj } 2112442Swnj 21215191Ssam int dk_ndrive = DK_NDRIVE; 2138124Sroot /* 21411392Ssam * Gather statistics on resource utilization. 21511392Ssam * 21611392Ssam * We make a gross assumption: that the system has been in the 21711392Ssam * state it is in (user state, kernel state, interrupt state, 21811392Ssam * or idle state) for the entire last time interval, and 21911392Ssam * update statistics accordingly. 22011392Ssam */ 22147546Skarels gatherstats(framep) 22247546Skarels clockframe *framep; 22311392Ssam { 22426265Skarels register int cpstate, s; 22511392Ssam 22611392Ssam /* 22711392Ssam * Determine what state the cpu is in. 22811392Ssam */ 22947546Skarels if (CLKF_USERMODE(framep)) { 23011392Ssam /* 23111392Ssam * CPU was in user state. 23211392Ssam */ 23347546Skarels if (curproc->p_nice > NZERO) 23411392Ssam cpstate = CP_NICE; 23511392Ssam else 23611392Ssam cpstate = CP_USER; 23711392Ssam } else { 23811392Ssam /* 23911392Ssam * CPU was in system state. If profiling kernel 24024524Sbloom * increment a counter. If no process is running 24124524Sbloom * then this is a system tick if we were running 24224524Sbloom * at a non-zero IPL (in a driver). If a process is running, 24324524Sbloom * then we charge it with system time even if we were 24424524Sbloom * at a non-zero IPL, since the system often runs 24524524Sbloom * this way during processing of system calls. 24624524Sbloom * This is approximate, but the lack of true interval 24724524Sbloom * timers makes doing anything else difficult. 24811392Ssam */ 24911392Ssam cpstate = CP_SYS; 25048979Skarels if (curproc == NULL && CLKF_BASEPRI(framep)) 25111392Ssam cpstate = CP_IDLE; 25211392Ssam #ifdef GPROF 25347546Skarels s = CLKF_PC(framep) - s_lowpc; 25411392Ssam if (profiling < 2 && s < s_textsize) 25511392Ssam kcount[s / (HISTFRACTION * sizeof (*kcount))]++; 25611392Ssam #endif 25711392Ssam } 25811392Ssam /* 25911392Ssam * We maintain statistics shown by user-level statistics 26011392Ssam * programs: the amount of time in each cpu state, and 26111392Ssam * the amount of time each of DK_NDRIVE ``drives'' is busy. 26211392Ssam */ 26311392Ssam cp_time[cpstate]++; 26411392Ssam for (s = 0; s < DK_NDRIVE; s++) 26529946Skarels if (dk_busy&(1<<s)) 26611392Ssam dk_time[s]++; 26711392Ssam } 26811392Ssam 26911392Ssam /* 2708124Sroot * Software priority level clock interrupt. 2718124Sroot * Run periodic events from timeout queue. 2728124Sroot */ 2732609Swnj /*ARGSUSED*/ 27444774Swilliam softclock(frame) 27547546Skarels clockframe frame; 2762442Swnj { 2772442Swnj 2788097Sroot for (;;) { 2798124Sroot register struct callout *p1; 2808124Sroot register caddr_t arg; 2818124Sroot register int (*func)(); 2828124Sroot register int a, s; 2838124Sroot 28426265Skarels s = splhigh(); 2858097Sroot if ((p1 = calltodo.c_next) == 0 || p1->c_time > 0) { 2868097Sroot splx(s); 2878097Sroot break; 2882442Swnj } 2898124Sroot arg = p1->c_arg; func = p1->c_func; a = p1->c_time; 2908097Sroot calltodo.c_next = p1->c_next; 2918097Sroot p1->c_next = callfree; 2928097Sroot callfree = p1; 2939157Ssam splx(s); 2948112Sroot (*func)(arg, a); 2952442Swnj } 2969604Ssam /* 29713127Ssam * If trapped user-mode and profiling, give it 29813127Ssam * a profiling tick. 2999604Ssam */ 30047546Skarels if (CLKF_USERMODE(&frame)) { 30147546Skarels register struct proc *p = curproc; 30213127Ssam 30347546Skarels if (p->p_stats->p_prof.pr_scale) 30447546Skarels profile_tick(p, &frame); 30513127Ssam /* 30613127Ssam * Check to see if process has accumulated 30713127Ssam * more than 10 minutes of user time. If so 30813127Ssam * reduce priority to give others a chance. 30913127Ssam */ 31047546Skarels if (p->p_ucred->cr_uid && p->p_nice == NZERO && 31140674Smarc p->p_utime.tv_sec > 10 * 60) { 31247546Skarels p->p_nice = NZERO + 4; 31347546Skarels setpri(p); 31413127Ssam p->p_pri = p->p_usrpri; 31513127Ssam } 3169604Ssam } 3179Sbill } 3189Sbill 3199Sbill /* 32047546Skarels * Arrange that (*func)(arg) is called in t/hz seconds. 32112747Ssam */ 32247546Skarels timeout(func, arg, t) 32347546Skarels int (*func)(); 3242450Swnj caddr_t arg; 32512747Ssam register int t; 3269Sbill { 3273542Swnj register struct callout *p1, *p2, *pnew; 32826265Skarels register int s = splhigh(); 3299Sbill 33018282Smckusick if (t <= 0) 33112747Ssam t = 1; 3323542Swnj pnew = callfree; 3333542Swnj if (pnew == NULL) 3343542Swnj panic("timeout table overflow"); 3353542Swnj callfree = pnew->c_next; 3363542Swnj pnew->c_arg = arg; 33747546Skarels pnew->c_func = func; 3383542Swnj for (p1 = &calltodo; (p2 = p1->c_next) && p2->c_time < t; p1 = p2) 3399742Ssam if (p2->c_time > 0) 3409742Ssam t -= p2->c_time; 3413542Swnj p1->c_next = pnew; 3423542Swnj pnew->c_next = p2; 3433542Swnj pnew->c_time = t; 3443542Swnj if (p2) 3453542Swnj p2->c_time -= t; 3469Sbill splx(s); 3479Sbill } 3487305Ssam 3497305Ssam /* 3507305Ssam * untimeout is called to remove a function timeout call 3517305Ssam * from the callout structure. 3527305Ssam */ 35347546Skarels untimeout(func, arg) 35447546Skarels int (*func)(); 3557305Ssam caddr_t arg; 3567305Ssam { 3577305Ssam register struct callout *p1, *p2; 3587305Ssam register int s; 3597305Ssam 36026265Skarels s = splhigh(); 3617305Ssam for (p1 = &calltodo; (p2 = p1->c_next) != 0; p1 = p2) { 36247546Skarels if (p2->c_func == func && p2->c_arg == arg) { 3638112Sroot if (p2->c_next && p2->c_time > 0) 3647305Ssam p2->c_next->c_time += p2->c_time; 3657305Ssam p1->c_next = p2->c_next; 3667305Ssam p2->c_next = callfree; 3677305Ssam callfree = p2; 3687305Ssam break; 3697305Ssam } 3707305Ssam } 3717305Ssam splx(s); 3727305Ssam } 3738112Sroot 3748124Sroot /* 3758124Sroot * Compute number of hz until specified time. 3768124Sroot * Used to compute third argument to timeout() from an 3778124Sroot * absolute time. 3788124Sroot */ 3798112Sroot hzto(tv) 3808112Sroot struct timeval *tv; 3818112Sroot { 3828124Sroot register long ticks; 3838124Sroot register long sec; 38426265Skarels int s = splhigh(); 3858112Sroot 3868124Sroot /* 3878124Sroot * If number of milliseconds will fit in 32 bit arithmetic, 3888124Sroot * then compute number of milliseconds to time and scale to 3898124Sroot * ticks. Otherwise just compute number of hz in time, rounding 3908124Sroot * times greater than representible to maximum value. 3918124Sroot * 3928124Sroot * Delta times less than 25 days can be computed ``exactly''. 3938124Sroot * Maximum value for any timeout in 10ms ticks is 250 days. 3948124Sroot */ 3958124Sroot sec = tv->tv_sec - time.tv_sec; 3968124Sroot if (sec <= 0x7fffffff / 1000 - 1000) 3978124Sroot ticks = ((tv->tv_sec - time.tv_sec) * 1000 + 3988124Sroot (tv->tv_usec - time.tv_usec) / 1000) / (tick / 1000); 3998124Sroot else if (sec <= 0x7fffffff / hz) 4008124Sroot ticks = sec * hz; 4018124Sroot else 4028124Sroot ticks = 0x7fffffff; 4038112Sroot splx(s); 4048112Sroot return (ticks); 4058112Sroot } 406