1*12747Ssam /* kern_clock.c 4.54 83/05/27 */ 29Sbill 39751Ssam #include "../machine/reg.h" 49751Ssam #include "../machine/psl.h" 59751Ssam 69Sbill #include "../h/param.h" 79Sbill #include "../h/systm.h" 8329Sbill #include "../h/dk.h" 92768Swnj #include "../h/callout.h" 109Sbill #include "../h/dir.h" 119Sbill #include "../h/user.h" 128028Sroot #include "../h/kernel.h" 139Sbill #include "../h/proc.h" 149Sbill #include "../h/vm.h" 159Sbill #include "../h/text.h" 169Sbill 179751Ssam #ifdef vax 189751Ssam #include "../vax/mtpr.h" 199751Ssam #endif 209751Ssam 2110291Smckusick #ifdef GPROF 2210291Smckusick #include "../h/gprof.h" 2310291Smckusick #endif 2410291Smckusick 2511392Ssam #ifdef KGCLOCK 2611392Ssam extern int phz; 2711392Ssam #endif 2811392Ssam 298124Sroot /* 308124Sroot * Clock handling routines. 318124Sroot * 3211392Ssam * This code is written to operate with two timers which run 3311392Ssam * independently of each other. The main clock, running at hz 3411392Ssam * times per second, is used to do scheduling and timeout calculations. 3511392Ssam * The second timer does resource utilization estimation statistically 3611392Ssam * based on the state of the machine phz times a second. Both functions 3711392Ssam * can be performed by a single clock (ie hz == phz), however the 3811392Ssam * statistics will be much more prone to errors. Ideally a machine 3911392Ssam * would have separate clocks measuring time spent in user state, system 4011392Ssam * state, interrupt state, and idle state. These clocks would allow a non- 4111392Ssam * approximate measure of resource utilization. 428124Sroot */ 431559Sbill 448124Sroot /* 458124Sroot * TODO: 46*12747Ssam * time of day, system/user timing, timeouts, profiling on separate timers 47*12747Ssam * allocate more timeout table slots when table overflows. 488124Sroot */ 499Sbill 508124Sroot /* 5111392Ssam * The hz hardware interval timer. 5211392Ssam * We update the events relating to real time. 5311392Ssam * If this timer is also being used to gather statistics, 5411392Ssam * we run through the statistics gathering routine as well. 558124Sroot */ 562609Swnj /*ARGSUSED*/ 578965Sroot #ifdef vax 582442Swnj hardclock(pc, ps) 592450Swnj caddr_t pc; 608944Sroot int ps; 619Sbill { 628944Sroot #endif 638965Sroot #ifdef sun 648944Sroot hardclock(regs) 658944Sroot struct regs regs; 668944Sroot { 67*12747Ssam #define ps regs.r_sr 68*12747Ssam #define pc (caddr_t)regs.r_pc 698944Sroot #endif 702768Swnj register struct callout *p1; 718097Sroot register struct proc *p; 722442Swnj register int s, cpstate; 73*12747Ssam int needsoft = 0; 749Sbill 758124Sroot /* 768124Sroot * Update real-time timeout queue. 778124Sroot * At front of queue are some number of events which are ``due''. 788124Sroot * The time to these is <= 0 and if negative represents the 798124Sroot * number of ticks which have passed since it was supposed to happen. 808124Sroot * The rest of the q elements (times > 0) are events yet to happen, 818124Sroot * where the time for each is given as a delta from the previous. 828124Sroot * Decrementing just the first of these serves to decrement the time 838124Sroot * to all events. 848124Sroot */ 85*12747Ssam p1 = calltodo.c_next; 86*12747Ssam while (p1) { 87*12747Ssam if (--p1->c_time > 0) 88*12747Ssam break; 89*12747Ssam needsoft = 1; 90*12747Ssam if (p1->c_time == 0) 91*12747Ssam break; 92*12747Ssam p1 = p1->c_next; 93*12747Ssam } 94138Sbill 958124Sroot /* 968124Sroot * Charge the time out based on the mode the cpu is in. 978124Sroot * Here again we fudge for the lack of proper interval timers 988124Sroot * assuming that the current state has been around at least 998124Sroot * one tick. 1008124Sroot */ 1019Sbill if (USERMODE(ps)) { 102*12747Ssam #ifdef sun 103*12747Ssam u.u_ar0 = ®s.r_r0; /* aston needs ar0 */ 104*12747Ssam #endif 105*12747Ssam if (u.u_prof.pr_scale) 106*12747Ssam needsoft = 1; 1078124Sroot /* 1088124Sroot * CPU was in user state. Increment 1098124Sroot * user time counter, and process process-virtual time 1109604Ssam * interval timer. 1118124Sroot */ 1128124Sroot bumptime(&u.u_ru.ru_utime, tick); 1138097Sroot if (timerisset(&u.u_timer[ITIMER_VIRTUAL].it_value) && 1148097Sroot itimerdecr(&u.u_timer[ITIMER_VIRTUAL], tick) == 0) 1158097Sroot psignal(u.u_procp, SIGVTALRM); 1168028Sroot if (u.u_procp->p_nice > NZERO) 117305Sbill cpstate = CP_NICE; 118305Sbill else 119305Sbill cpstate = CP_USER; 1209Sbill } else { 1218124Sroot /* 1228124Sroot * CPU was in system state. If profiling kernel 1238124Sroot * increment a counter. If no process is running 1248124Sroot * then this is a system tick if we were running 1258124Sroot * at a non-zero IPL (in a driver). If a process is running, 1268124Sroot * then we charge it with system time even if we were 1278124Sroot * at a non-zero IPL, since the system often runs 1288124Sroot * this way during processing of system calls. 1298124Sroot * This is approximate, but the lack of true interval 1308124Sroot * timers makes doing anything else difficult. 1318124Sroot */ 132305Sbill cpstate = CP_SYS; 1337315Ssam if (noproc) { 1348944Sroot if (BASEPRI(ps)) 1357315Ssam cpstate = CP_IDLE; 1368028Sroot } else { 1378124Sroot bumptime(&u.u_ru.ru_stime, tick); 1388028Sroot } 1399Sbill } 1408097Sroot 1418124Sroot /* 14210388Ssam * If the cpu is currently scheduled to a process, then 14310388Ssam * charge it with resource utilization for a tick, updating 14410388Ssam * statistics which run in (user+system) virtual time, 14510388Ssam * such as the cpu time limit and profiling timers. 14610388Ssam * This assumes that the current process has been running 14710388Ssam * the entire last tick. 14810388Ssam */ 14910388Ssam if (noproc == 0 && cpstate != CP_IDLE) { 15010388Ssam if ((u.u_ru.ru_utime.tv_sec+u.u_ru.ru_stime.tv_sec+1) > 15110388Ssam u.u_rlimit[RLIMIT_CPU].rlim_cur) { 15210388Ssam psignal(u.u_procp, SIGXCPU); 15310388Ssam if (u.u_rlimit[RLIMIT_CPU].rlim_cur < 15410388Ssam u.u_rlimit[RLIMIT_CPU].rlim_max) 15510388Ssam u.u_rlimit[RLIMIT_CPU].rlim_cur += 5; 15610388Ssam } 15710388Ssam if (timerisset(&u.u_timer[ITIMER_PROF].it_value) && 15810388Ssam itimerdecr(&u.u_timer[ITIMER_PROF], tick) == 0) 15910388Ssam psignal(u.u_procp, SIGPROF); 16010388Ssam s = u.u_procp->p_rssize; 16110388Ssam u.u_ru.ru_idrss += s; u.u_ru.ru_isrss += 0; /* XXX */ 16210388Ssam if (u.u_procp->p_textp) { 16310388Ssam register int xrss = u.u_procp->p_textp->x_rssize; 16410388Ssam 16510388Ssam s += xrss; 16610388Ssam u.u_ru.ru_ixrss += xrss; 16710388Ssam } 16810388Ssam if (s > u.u_ru.ru_maxrss) 16910388Ssam u.u_ru.ru_maxrss = s; 17010388Ssam } 17110388Ssam 17210388Ssam /* 1738124Sroot * We adjust the priority of the current process. 1748124Sroot * The priority of a process gets worse as it accumulates 1758124Sroot * CPU time. The cpu usage estimator (p_cpu) is increased here 1768124Sroot * and the formula for computing priorities (in kern_synch.c) 1778124Sroot * will compute a different value each time the p_cpu increases 1788124Sroot * by 4. The cpu usage estimator ramps up quite quickly when 1798124Sroot * the process is running (linearly), and decays away exponentially, 1808124Sroot * at a rate which is proportionally slower when the system is 1818124Sroot * busy. The basic principal is that the system will 90% forget 1828124Sroot * that a process used a lot of CPU time in 5*loadav seconds. 1838124Sroot * This causes the system to favor processes which haven't run 1848124Sroot * much recently, and to round-robin among other processes. 1858124Sroot */ 1869Sbill if (!noproc) { 1878097Sroot p = u.u_procp; 1888097Sroot p->p_cpticks++; 1898097Sroot if (++p->p_cpu == 0) 1908097Sroot p->p_cpu--; 1918124Sroot if ((p->p_cpu&3) == 0) { 1928097Sroot (void) setpri(p); 1938097Sroot if (p->p_pri >= PUSER) 1948097Sroot p->p_pri = p->p_usrpri; 1959Sbill } 1969Sbill } 1978124Sroot 1988124Sroot /* 19911392Ssam * If this is the only timer then we have to use it to 20011392Ssam * gather statistics. 20111392Ssam */ 20211392Ssam #ifndef KGCLOCK 20311392Ssam gatherstats(pc, ps); 20411392Ssam #else 20511392Ssam /* 20611392Ssam * If the alternate clock has not made itself known then 20711392Ssam * we must gather the statistics. 20811392Ssam */ 20911392Ssam if (phz == 0) 21011392Ssam gatherstats(pc, ps); 21111392Ssam #endif 21211392Ssam 21311392Ssam /* 2148124Sroot * Increment the time-of-day, and schedule 2158124Sroot * processing of the callouts at a very low cpu priority, 2168124Sroot * so we don't keep the relatively high clock interrupt 2178124Sroot * priority any longer than necessary. 2188124Sroot */ 2198124Sroot bumptime(&time, tick); 220*12747Ssam if (needsoft) 221*12747Ssam setsoftclock(); 2222442Swnj } 223*12747Ssam #ifdef sun 224*12747Ssam #undef pc 225*12747Ssam #undef ps 226*12747Ssam #endif 2272442Swnj 2288124Sroot /* 22911392Ssam * Gather statistics on resource utilization. 23011392Ssam * 23111392Ssam * We make a gross assumption: that the system has been in the 23211392Ssam * state it is in (user state, kernel state, interrupt state, 23311392Ssam * or idle state) for the entire last time interval, and 23411392Ssam * update statistics accordingly. 23511392Ssam */ 236*12747Ssam /*ARGSUSED*/ 23711392Ssam gatherstats(pc, ps) 23811392Ssam caddr_t pc; 23911392Ssam int ps; 24011392Ssam { 24111392Ssam int cpstate, s; 24211392Ssam 24311392Ssam /* 24411392Ssam * Determine what state the cpu is in. 24511392Ssam */ 24611392Ssam if (USERMODE(ps)) { 24711392Ssam /* 24811392Ssam * CPU was in user state. 24911392Ssam */ 25011392Ssam if (u.u_procp->p_nice > NZERO) 25111392Ssam cpstate = CP_NICE; 25211392Ssam else 25311392Ssam cpstate = CP_USER; 25411392Ssam } else { 25511392Ssam /* 25611392Ssam * CPU was in system state. If profiling kernel 25711392Ssam * increment a counter. 25811392Ssam */ 25911392Ssam cpstate = CP_SYS; 26011392Ssam if (noproc && BASEPRI(ps)) 26111392Ssam cpstate = CP_IDLE; 26211392Ssam #ifdef GPROF 26311392Ssam s = pc - s_lowpc; 26411392Ssam if (profiling < 2 && s < s_textsize) 26511392Ssam kcount[s / (HISTFRACTION * sizeof (*kcount))]++; 26611392Ssam #endif 26711392Ssam } 26811392Ssam /* 26911392Ssam * We maintain statistics shown by user-level statistics 27011392Ssam * programs: the amount of time in each cpu state, and 27111392Ssam * the amount of time each of DK_NDRIVE ``drives'' is busy. 27211392Ssam */ 27311392Ssam cp_time[cpstate]++; 27411392Ssam for (s = 0; s < DK_NDRIVE; s++) 27511392Ssam if (dk_busy&(1<<s)) 27611392Ssam dk_time[s]++; 27711392Ssam } 27811392Ssam 27911392Ssam /* 2808124Sroot * Software priority level clock interrupt. 2818124Sroot * Run periodic events from timeout queue. 2828124Sroot */ 2832609Swnj /*ARGSUSED*/ 2848965Sroot #ifdef vax 2852442Swnj softclock(pc, ps) 2862450Swnj caddr_t pc; 2878944Sroot int ps; 2882442Swnj { 2898944Sroot #endif 2908965Sroot #ifdef sun 2919751Ssam softclock() 2928944Sroot { 293*12747Ssam #define pc (caddr_t)u.u_ar0[PC] 294*12747Ssam #define ps u.u_ar0[PS] 2958944Sroot #endif 2962442Swnj 2978097Sroot for (;;) { 2988124Sroot register struct callout *p1; 2998124Sroot register caddr_t arg; 3008124Sroot register int (*func)(); 3018124Sroot register int a, s; 3028124Sroot 3038097Sroot s = spl7(); 3048097Sroot if ((p1 = calltodo.c_next) == 0 || p1->c_time > 0) { 3058097Sroot splx(s); 3068097Sroot break; 3072442Swnj } 3088124Sroot arg = p1->c_arg; func = p1->c_func; a = p1->c_time; 3098097Sroot calltodo.c_next = p1->c_next; 3108097Sroot p1->c_next = callfree; 3118097Sroot callfree = p1; 3129157Ssam splx(s); 3138112Sroot (*func)(arg, a); 3142442Swnj } 3159604Ssam /* 3169604Ssam * If trapped user-mode, give it a profiling tick. 3179604Ssam */ 3189604Ssam if (USERMODE(ps) && u.u_prof.pr_scale) { 3199604Ssam u.u_procp->p_flag |= SOWEUPC; 3209604Ssam aston(); 3219604Ssam } 3229Sbill } 3239Sbill 3249Sbill /* 325*12747Ssam * Bump a timeval by a small number of usec's. 3269Sbill */ 327*12747Ssam bumptime(tp, usec) 328*12747Ssam register struct timeval *tp; 329*12747Ssam int usec; 330*12747Ssam { 331*12747Ssam 332*12747Ssam tp->tv_usec += usec; 333*12747Ssam if (tp->tv_usec >= 1000000) { 334*12747Ssam tp->tv_usec -= 1000000; 335*12747Ssam tp->tv_sec++; 336*12747Ssam } 337*12747Ssam } 338*12747Ssam 339*12747Ssam /* 340*12747Ssam * Arrange that (*fun)(arg) is called in t/hz seconds. 341*12747Ssam */ 342*12747Ssam timeout(fun, arg, t) 3432450Swnj int (*fun)(); 3442450Swnj caddr_t arg; 345*12747Ssam register int t; 3469Sbill { 3473542Swnj register struct callout *p1, *p2, *pnew; 348*12747Ssam register int s = spl7(); 3499Sbill 350*12747Ssam if (t == 0) 351*12747Ssam t = 1; 3523542Swnj pnew = callfree; 3533542Swnj if (pnew == NULL) 3543542Swnj panic("timeout table overflow"); 3553542Swnj callfree = pnew->c_next; 3563542Swnj pnew->c_arg = arg; 3573542Swnj pnew->c_func = fun; 3583542Swnj for (p1 = &calltodo; (p2 = p1->c_next) && p2->c_time < t; p1 = p2) 3599742Ssam if (p2->c_time > 0) 3609742Ssam t -= p2->c_time; 3613542Swnj p1->c_next = pnew; 3623542Swnj pnew->c_next = p2; 3633542Swnj pnew->c_time = t; 3643542Swnj if (p2) 3653542Swnj p2->c_time -= t; 3669Sbill splx(s); 3679Sbill } 3687305Ssam 3697305Ssam /* 3707305Ssam * untimeout is called to remove a function timeout call 3717305Ssam * from the callout structure. 3727305Ssam */ 3738097Sroot untimeout(fun, arg) 3747305Ssam int (*fun)(); 3757305Ssam caddr_t arg; 3767305Ssam { 3777305Ssam register struct callout *p1, *p2; 3787305Ssam register int s; 3797305Ssam 3807305Ssam s = spl7(); 3817305Ssam for (p1 = &calltodo; (p2 = p1->c_next) != 0; p1 = p2) { 3827305Ssam if (p2->c_func == fun && p2->c_arg == arg) { 3838112Sroot if (p2->c_next && p2->c_time > 0) 3847305Ssam p2->c_next->c_time += p2->c_time; 3857305Ssam p1->c_next = p2->c_next; 3867305Ssam p2->c_next = callfree; 3877305Ssam callfree = p2; 3887305Ssam break; 3897305Ssam } 3907305Ssam } 3917305Ssam splx(s); 3927305Ssam } 3938112Sroot 3948124Sroot /* 3958124Sroot * Compute number of hz until specified time. 3968124Sroot * Used to compute third argument to timeout() from an 3978124Sroot * absolute time. 3988124Sroot */ 3998112Sroot hzto(tv) 4008112Sroot struct timeval *tv; 4018112Sroot { 4028124Sroot register long ticks; 4038124Sroot register long sec; 4048112Sroot int s = spl7(); 4058112Sroot 4068124Sroot /* 4078124Sroot * If number of milliseconds will fit in 32 bit arithmetic, 4088124Sroot * then compute number of milliseconds to time and scale to 4098124Sroot * ticks. Otherwise just compute number of hz in time, rounding 4108124Sroot * times greater than representible to maximum value. 4118124Sroot * 4128124Sroot * Delta times less than 25 days can be computed ``exactly''. 4138124Sroot * Maximum value for any timeout in 10ms ticks is 250 days. 4148124Sroot */ 4158124Sroot sec = tv->tv_sec - time.tv_sec; 4168124Sroot if (sec <= 0x7fffffff / 1000 - 1000) 4178124Sroot ticks = ((tv->tv_sec - time.tv_sec) * 1000 + 4188124Sroot (tv->tv_usec - time.tv_usec) / 1000) / (tick / 1000); 4198124Sroot else if (sec <= 0x7fffffff / hz) 4208124Sroot ticks = sec * hz; 4218124Sroot else 4228124Sroot ticks = 0x7fffffff; 4238112Sroot splx(s); 4248112Sroot return (ticks); 4258112Sroot } 426*12747Ssam 427*12747Ssam profil() 428*12747Ssam { 429*12747Ssam register struct a { 430*12747Ssam short *bufbase; 431*12747Ssam unsigned bufsize; 432*12747Ssam unsigned pcoffset; 433*12747Ssam unsigned pcscale; 434*12747Ssam } *uap = (struct a *)u.u_ap; 435*12747Ssam register struct uprof *upp = &u.u_prof; 436*12747Ssam 437*12747Ssam upp->pr_base = uap->bufbase; 438*12747Ssam upp->pr_size = uap->bufsize; 439*12747Ssam upp->pr_off = uap->pcoffset; 440*12747Ssam upp->pr_scale = uap->pcscale; 441*12747Ssam } 442*12747Ssam 443*12747Ssam opause() 444*12747Ssam { 445*12747Ssam 446*12747Ssam for (;;) 447*12747Ssam sleep((caddr_t)&u, PSLEP); 448*12747Ssam } 449