1*11392Ssam /* kern_clock.c 4.52 83/03/03 */ 29Sbill 39751Ssam #include "../machine/reg.h" 49751Ssam #include "../machine/psl.h" 59751Ssam 69Sbill #include "../h/param.h" 79Sbill #include "../h/systm.h" 8329Sbill #include "../h/dk.h" 92768Swnj #include "../h/callout.h" 109Sbill #include "../h/dir.h" 119Sbill #include "../h/user.h" 128028Sroot #include "../h/kernel.h" 139Sbill #include "../h/proc.h" 149Sbill #include "../h/vm.h" 159Sbill #include "../h/text.h" 167490Skre #ifdef MUSH 177490Skre #include "../h/quota.h" 187490Skre #include "../h/share.h" 197490Skre #endif 209Sbill 219751Ssam #ifdef vax 229751Ssam #include "../vax/mtpr.h" 239751Ssam #endif 249751Ssam 2510291Smckusick #ifdef GPROF 2610291Smckusick #include "../h/gprof.h" 2710291Smckusick #endif 2810291Smckusick 29*11392Ssam #ifdef KGCLOCK 30*11392Ssam extern int phz; 31*11392Ssam #endif 32*11392Ssam 338124Sroot /* 348124Sroot * Clock handling routines. 358124Sroot * 36*11392Ssam * This code is written to operate with two timers which run 37*11392Ssam * independently of each other. The main clock, running at hz 38*11392Ssam * times per second, is used to do scheduling and timeout calculations. 39*11392Ssam * The second timer does resource utilization estimation statistically 40*11392Ssam * based on the state of the machine phz times a second. Both functions 41*11392Ssam * can be performed by a single clock (ie hz == phz), however the 42*11392Ssam * statistics will be much more prone to errors. Ideally a machine 43*11392Ssam * would have separate clocks measuring time spent in user state, system 44*11392Ssam * state, interrupt state, and idle state. These clocks would allow a non- 45*11392Ssam * approximate measure of resource utilization. 468124Sroot */ 471559Sbill 488124Sroot /* 498124Sroot * TODO: 508124Sroot * * Keep more accurate statistics by simulating good interval timers. 518124Sroot * * Use the time-of-day clock on the VAX to keep more accurate time 528124Sroot * than is possible by repeated use of the interval timer. 538124Sroot * * Allocate more timeout table slots when table overflows. 54*11392Ssam * * Get all resource allocation to use second timer. 558124Sroot */ 569Sbill 578124Sroot /* bump a timeval by a small number of usec's */ 588124Sroot #define bumptime(tp, usec) \ 598124Sroot (tp)->tv_usec += usec; \ 608097Sroot if ((tp)->tv_usec >= 1000000) { \ 618097Sroot (tp)->tv_usec -= 1000000; \ 628097Sroot (tp)->tv_sec++; \ 638097Sroot } 645247Sroot 658124Sroot /* 66*11392Ssam * The hz hardware interval timer. 67*11392Ssam * We update the events relating to real time. 68*11392Ssam * If this timer is also being used to gather statistics, 69*11392Ssam * we run through the statistics gathering routine as well. 708124Sroot */ 712609Swnj /*ARGSUSED*/ 728965Sroot #ifdef vax 732442Swnj hardclock(pc, ps) 742450Swnj caddr_t pc; 758944Sroot int ps; 769Sbill { 778944Sroot #endif 788965Sroot #ifdef sun 798944Sroot hardclock(regs) 808944Sroot struct regs regs; 818944Sroot { 828944Sroot int ps = regs.r_sr; 838944Sroot caddr_t pc = (caddr_t)regs.r_pc; 848944Sroot #endif 852768Swnj register struct callout *p1; 868097Sroot register struct proc *p; 872442Swnj register int s, cpstate; 889Sbill 899751Ssam #ifdef sun 909751Ssam if (USERMODE(ps)) /* aston needs ar0 */ 919751Ssam u.u_ar0 = ®s.r_r0; 929751Ssam #endif 938124Sroot /* 948124Sroot * Update real-time timeout queue. 958124Sroot * At front of queue are some number of events which are ``due''. 968124Sroot * The time to these is <= 0 and if negative represents the 978124Sroot * number of ticks which have passed since it was supposed to happen. 988124Sroot * The rest of the q elements (times > 0) are events yet to happen, 998124Sroot * where the time for each is given as a delta from the previous. 1008124Sroot * Decrementing just the first of these serves to decrement the time 1018124Sroot * to all events. 1028124Sroot */ 1033542Swnj for (p1 = calltodo.c_next; p1 && p1->c_time <= 0; p1 = p1->c_next) 1048112Sroot --p1->c_time; 1053542Swnj if (p1) 1068112Sroot --p1->c_time; 107138Sbill 1088124Sroot /* 1098124Sroot * Charge the time out based on the mode the cpu is in. 1108124Sroot * Here again we fudge for the lack of proper interval timers 1118124Sroot * assuming that the current state has been around at least 1128124Sroot * one tick. 1138124Sroot */ 1149Sbill if (USERMODE(ps)) { 1158124Sroot /* 1168124Sroot * CPU was in user state. Increment 1178124Sroot * user time counter, and process process-virtual time 1189604Ssam * interval timer. 1198124Sroot */ 1208124Sroot bumptime(&u.u_ru.ru_utime, tick); 1218097Sroot if (timerisset(&u.u_timer[ITIMER_VIRTUAL].it_value) && 1228097Sroot itimerdecr(&u.u_timer[ITIMER_VIRTUAL], tick) == 0) 1238097Sroot psignal(u.u_procp, SIGVTALRM); 1248028Sroot if (u.u_procp->p_nice > NZERO) 125305Sbill cpstate = CP_NICE; 126305Sbill else 127305Sbill cpstate = CP_USER; 1289Sbill } else { 1298124Sroot /* 1308124Sroot * CPU was in system state. If profiling kernel 1318124Sroot * increment a counter. If no process is running 1328124Sroot * then this is a system tick if we were running 1338124Sroot * at a non-zero IPL (in a driver). If a process is running, 1348124Sroot * then we charge it with system time even if we were 1358124Sroot * at a non-zero IPL, since the system often runs 1368124Sroot * this way during processing of system calls. 1378124Sroot * This is approximate, but the lack of true interval 1388124Sroot * timers makes doing anything else difficult. 1398124Sroot */ 140305Sbill cpstate = CP_SYS; 1417315Ssam if (noproc) { 1428944Sroot if (BASEPRI(ps)) 1437315Ssam cpstate = CP_IDLE; 1448028Sroot } else { 1458124Sroot bumptime(&u.u_ru.ru_stime, tick); 1468028Sroot } 1479Sbill } 1488097Sroot 1498124Sroot /* 15010388Ssam * If the cpu is currently scheduled to a process, then 15110388Ssam * charge it with resource utilization for a tick, updating 15210388Ssam * statistics which run in (user+system) virtual time, 15310388Ssam * such as the cpu time limit and profiling timers. 15410388Ssam * This assumes that the current process has been running 15510388Ssam * the entire last tick. 15610388Ssam */ 15710388Ssam if (noproc == 0 && cpstate != CP_IDLE) { 15810388Ssam if ((u.u_ru.ru_utime.tv_sec+u.u_ru.ru_stime.tv_sec+1) > 15910388Ssam u.u_rlimit[RLIMIT_CPU].rlim_cur) { 16010388Ssam psignal(u.u_procp, SIGXCPU); 16110388Ssam if (u.u_rlimit[RLIMIT_CPU].rlim_cur < 16210388Ssam u.u_rlimit[RLIMIT_CPU].rlim_max) 16310388Ssam u.u_rlimit[RLIMIT_CPU].rlim_cur += 5; 16410388Ssam } 16510388Ssam if (timerisset(&u.u_timer[ITIMER_PROF].it_value) && 16610388Ssam itimerdecr(&u.u_timer[ITIMER_PROF], tick) == 0) 16710388Ssam psignal(u.u_procp, SIGPROF); 16810388Ssam s = u.u_procp->p_rssize; 16910388Ssam u.u_ru.ru_idrss += s; u.u_ru.ru_isrss += 0; /* XXX */ 17010388Ssam if (u.u_procp->p_textp) { 17110388Ssam register int xrss = u.u_procp->p_textp->x_rssize; 17210388Ssam 17310388Ssam s += xrss; 17410388Ssam u.u_ru.ru_ixrss += xrss; 17510388Ssam } 17610388Ssam if (s > u.u_ru.ru_maxrss) 17710388Ssam u.u_ru.ru_maxrss = s; 17810388Ssam } 17910388Ssam 18010388Ssam /* 1818124Sroot * We adjust the priority of the current process. 1828124Sroot * The priority of a process gets worse as it accumulates 1838124Sroot * CPU time. The cpu usage estimator (p_cpu) is increased here 1848124Sroot * and the formula for computing priorities (in kern_synch.c) 1858124Sroot * will compute a different value each time the p_cpu increases 1868124Sroot * by 4. The cpu usage estimator ramps up quite quickly when 1878124Sroot * the process is running (linearly), and decays away exponentially, 1888124Sroot * at a rate which is proportionally slower when the system is 1898124Sroot * busy. The basic principal is that the system will 90% forget 1908124Sroot * that a process used a lot of CPU time in 5*loadav seconds. 1918124Sroot * This causes the system to favor processes which haven't run 1928124Sroot * much recently, and to round-robin among other processes. 1938124Sroot */ 1949Sbill if (!noproc) { 1958097Sroot p = u.u_procp; 1968097Sroot p->p_cpticks++; 1978097Sroot if (++p->p_cpu == 0) 1988097Sroot p->p_cpu--; 1997490Skre #ifdef MUSH 2008097Sroot p->p_quota->q_cost += (p->p_nice > NZERO ? 2018097Sroot (shconsts.sc_tic * ((2*NZERO)-p->p_nice)) / NZERO : 2027490Skre shconsts.sc_tic) * (((int)avenrun[0]+2)/3); 2037490Skre #endif 2048124Sroot if ((p->p_cpu&3) == 0) { 2058097Sroot (void) setpri(p); 2068097Sroot if (p->p_pri >= PUSER) 2078097Sroot p->p_pri = p->p_usrpri; 2089Sbill } 2099Sbill } 2108124Sroot 2118124Sroot /* 212*11392Ssam * If this is the only timer then we have to use it to 213*11392Ssam * gather statistics. 214*11392Ssam */ 215*11392Ssam #ifndef KGCLOCK 216*11392Ssam gatherstats(pc, ps); 217*11392Ssam #else 218*11392Ssam /* 219*11392Ssam * If the alternate clock has not made itself known then 220*11392Ssam * we must gather the statistics. 221*11392Ssam */ 222*11392Ssam if (phz == 0) 223*11392Ssam gatherstats(pc, ps); 224*11392Ssam #endif 225*11392Ssam 226*11392Ssam /* 2278124Sroot * Increment the time-of-day, and schedule 2288124Sroot * processing of the callouts at a very low cpu priority, 2298124Sroot * so we don't keep the relatively high clock interrupt 2308124Sroot * priority any longer than necessary. 2318124Sroot */ 2328124Sroot bumptime(&time, tick); 2332442Swnj setsoftclock(); 2342442Swnj } 2352442Swnj 2368124Sroot /* 237*11392Ssam * Gather statistics on resource utilization. 238*11392Ssam * 239*11392Ssam * We make a gross assumption: that the system has been in the 240*11392Ssam * state it is in (user state, kernel state, interrupt state, 241*11392Ssam * or idle state) for the entire last time interval, and 242*11392Ssam * update statistics accordingly. 243*11392Ssam */ 244*11392Ssam gatherstats(pc, ps) 245*11392Ssam caddr_t pc; 246*11392Ssam int ps; 247*11392Ssam { 248*11392Ssam int cpstate, s; 249*11392Ssam 250*11392Ssam /* 251*11392Ssam * Determine what state the cpu is in. 252*11392Ssam */ 253*11392Ssam if (USERMODE(ps)) { 254*11392Ssam /* 255*11392Ssam * CPU was in user state. 256*11392Ssam */ 257*11392Ssam if (u.u_procp->p_nice > NZERO) 258*11392Ssam cpstate = CP_NICE; 259*11392Ssam else 260*11392Ssam cpstate = CP_USER; 261*11392Ssam } else { 262*11392Ssam /* 263*11392Ssam * CPU was in system state. If profiling kernel 264*11392Ssam * increment a counter. 265*11392Ssam */ 266*11392Ssam cpstate = CP_SYS; 267*11392Ssam if (noproc && BASEPRI(ps)) 268*11392Ssam cpstate = CP_IDLE; 269*11392Ssam #ifdef GPROF 270*11392Ssam s = pc - s_lowpc; 271*11392Ssam if (profiling < 2 && s < s_textsize) 272*11392Ssam kcount[s / (HISTFRACTION * sizeof (*kcount))]++; 273*11392Ssam #endif 274*11392Ssam } 275*11392Ssam /* 276*11392Ssam * We maintain statistics shown by user-level statistics 277*11392Ssam * programs: the amount of time in each cpu state, and 278*11392Ssam * the amount of time each of DK_NDRIVE ``drives'' is busy. 279*11392Ssam */ 280*11392Ssam cp_time[cpstate]++; 281*11392Ssam for (s = 0; s < DK_NDRIVE; s++) 282*11392Ssam if (dk_busy&(1<<s)) 283*11392Ssam dk_time[s]++; 284*11392Ssam } 285*11392Ssam 286*11392Ssam /* 2878124Sroot * Software priority level clock interrupt. 2888124Sroot * Run periodic events from timeout queue. 2898124Sroot */ 2902609Swnj /*ARGSUSED*/ 2918965Sroot #ifdef vax 2922442Swnj softclock(pc, ps) 2932450Swnj caddr_t pc; 2948944Sroot int ps; 2952442Swnj { 2968944Sroot #endif 2978965Sroot #ifdef sun 2989751Ssam softclock() 2998944Sroot { 3009751Ssam int ps = u.u_ar0[PS]; 3019751Ssam caddr_t pc = (caddr_t)u.u_ar0[PC]; 3028944Sroot #endif 3032442Swnj 3048097Sroot for (;;) { 3058124Sroot register struct callout *p1; 3068124Sroot register caddr_t arg; 3078124Sroot register int (*func)(); 3088124Sroot register int a, s; 3098124Sroot 3108097Sroot s = spl7(); 3118097Sroot if ((p1 = calltodo.c_next) == 0 || p1->c_time > 0) { 3128097Sroot splx(s); 3138097Sroot break; 3142442Swnj } 3158124Sroot arg = p1->c_arg; func = p1->c_func; a = p1->c_time; 3168097Sroot calltodo.c_next = p1->c_next; 3178097Sroot p1->c_next = callfree; 3188097Sroot callfree = p1; 3199157Ssam splx(s); 3208112Sroot (*func)(arg, a); 3212442Swnj } 3229604Ssam /* 3239604Ssam * If trapped user-mode, give it a profiling tick. 3249604Ssam */ 3259604Ssam if (USERMODE(ps) && u.u_prof.pr_scale) { 3269604Ssam u.u_procp->p_flag |= SOWEUPC; 3279604Ssam aston(); 3289604Ssam } 3299Sbill } 3309Sbill 3319Sbill /* 3328097Sroot * Arrange that (*fun)(arg) is called in tim/hz seconds. 3339Sbill */ 3349Sbill timeout(fun, arg, tim) 3352450Swnj int (*fun)(); 3362450Swnj caddr_t arg; 3378097Sroot int tim; 3389Sbill { 3393542Swnj register struct callout *p1, *p2, *pnew; 3409Sbill register int t; 3419Sbill int s; 3429Sbill 3439Sbill t = tim; 3449Sbill s = spl7(); 3453542Swnj pnew = callfree; 3463542Swnj if (pnew == NULL) 3473542Swnj panic("timeout table overflow"); 3483542Swnj callfree = pnew->c_next; 3493542Swnj pnew->c_arg = arg; 3503542Swnj pnew->c_func = fun; 3513542Swnj for (p1 = &calltodo; (p2 = p1->c_next) && p2->c_time < t; p1 = p2) 3529742Ssam if (p2->c_time > 0) 3539742Ssam t -= p2->c_time; 3543542Swnj p1->c_next = pnew; 3553542Swnj pnew->c_next = p2; 3563542Swnj pnew->c_time = t; 3573542Swnj if (p2) 3583542Swnj p2->c_time -= t; 3599Sbill splx(s); 3609Sbill } 3617305Ssam 3627305Ssam /* 3637305Ssam * untimeout is called to remove a function timeout call 3647305Ssam * from the callout structure. 3657305Ssam */ 3668097Sroot untimeout(fun, arg) 3677305Ssam int (*fun)(); 3687305Ssam caddr_t arg; 3697305Ssam { 3707305Ssam register struct callout *p1, *p2; 3717305Ssam register int s; 3727305Ssam 3737305Ssam s = spl7(); 3747305Ssam for (p1 = &calltodo; (p2 = p1->c_next) != 0; p1 = p2) { 3757305Ssam if (p2->c_func == fun && p2->c_arg == arg) { 3768112Sroot if (p2->c_next && p2->c_time > 0) 3777305Ssam p2->c_next->c_time += p2->c_time; 3787305Ssam p1->c_next = p2->c_next; 3797305Ssam p2->c_next = callfree; 3807305Ssam callfree = p2; 3817305Ssam break; 3827305Ssam } 3837305Ssam } 3847305Ssam splx(s); 3857305Ssam } 3868112Sroot 3878124Sroot /* 3888124Sroot * Compute number of hz until specified time. 3898124Sroot * Used to compute third argument to timeout() from an 3908124Sroot * absolute time. 3918124Sroot */ 3928112Sroot hzto(tv) 3938112Sroot struct timeval *tv; 3948112Sroot { 3958124Sroot register long ticks; 3968124Sroot register long sec; 3978112Sroot int s = spl7(); 3988112Sroot 3998124Sroot /* 4008124Sroot * If number of milliseconds will fit in 32 bit arithmetic, 4018124Sroot * then compute number of milliseconds to time and scale to 4028124Sroot * ticks. Otherwise just compute number of hz in time, rounding 4038124Sroot * times greater than representible to maximum value. 4048124Sroot * 4058124Sroot * Delta times less than 25 days can be computed ``exactly''. 4068124Sroot * Maximum value for any timeout in 10ms ticks is 250 days. 4078124Sroot */ 4088124Sroot sec = tv->tv_sec - time.tv_sec; 4098124Sroot if (sec <= 0x7fffffff / 1000 - 1000) 4108124Sroot ticks = ((tv->tv_sec - time.tv_sec) * 1000 + 4118124Sroot (tv->tv_usec - time.tv_usec) / 1000) / (tick / 1000); 4128124Sroot else if (sec <= 0x7fffffff / hz) 4138124Sroot ticks = sec * hz; 4148124Sroot else 4158124Sroot ticks = 0x7fffffff; 4168112Sroot splx(s); 4178112Sroot return (ticks); 4188112Sroot } 419