1*15191Ssam /* kern_clock.c 6.3 83/10/08 */ 29Sbill 39751Ssam #include "../machine/reg.h" 49751Ssam #include "../machine/psl.h" 59751Ssam 69Sbill #include "../h/param.h" 79Sbill #include "../h/systm.h" 8329Sbill #include "../h/dk.h" 92768Swnj #include "../h/callout.h" 109Sbill #include "../h/dir.h" 119Sbill #include "../h/user.h" 128028Sroot #include "../h/kernel.h" 139Sbill #include "../h/proc.h" 149Sbill #include "../h/vm.h" 159Sbill #include "../h/text.h" 169Sbill 179751Ssam #ifdef vax 189751Ssam #include "../vax/mtpr.h" 199751Ssam #endif 209751Ssam 2110291Smckusick #ifdef GPROF 2210291Smckusick #include "../h/gprof.h" 2310291Smckusick #endif 2410291Smckusick 258124Sroot /* 268124Sroot * Clock handling routines. 278124Sroot * 2811392Ssam * This code is written to operate with two timers which run 2911392Ssam * independently of each other. The main clock, running at hz 3011392Ssam * times per second, is used to do scheduling and timeout calculations. 3111392Ssam * The second timer does resource utilization estimation statistically 3211392Ssam * based on the state of the machine phz times a second. Both functions 3311392Ssam * can be performed by a single clock (ie hz == phz), however the 3411392Ssam * statistics will be much more prone to errors. Ideally a machine 3511392Ssam * would have separate clocks measuring time spent in user state, system 3611392Ssam * state, interrupt state, and idle state. These clocks would allow a non- 3711392Ssam * approximate measure of resource utilization. 388124Sroot */ 391559Sbill 408124Sroot /* 418124Sroot * TODO: 4212747Ssam * time of day, system/user timing, timeouts, profiling on separate timers 4312747Ssam * allocate more timeout table slots when table overflows. 448124Sroot */ 459Sbill 468124Sroot /* 4711392Ssam * The hz hardware interval timer. 4811392Ssam * We update the events relating to real time. 4911392Ssam * If this timer is also being used to gather statistics, 5011392Ssam * we run through the statistics gathering routine as well. 518124Sroot */ 522609Swnj /*ARGSUSED*/ 532442Swnj hardclock(pc, ps) 542450Swnj caddr_t pc; 558944Sroot int ps; 569Sbill { 572768Swnj register struct callout *p1; 588097Sroot register struct proc *p; 592442Swnj register int s, cpstate; 609Sbill 618124Sroot /* 628124Sroot * Update real-time timeout queue. 638124Sroot * At front of queue are some number of events which are ``due''. 648124Sroot * The time to these is <= 0 and if negative represents the 658124Sroot * number of ticks which have passed since it was supposed to happen. 668124Sroot * The rest of the q elements (times > 0) are events yet to happen, 678124Sroot * where the time for each is given as a delta from the previous. 688124Sroot * Decrementing just the first of these serves to decrement the time 698124Sroot * to all events. 708124Sroot */ 7112747Ssam p1 = calltodo.c_next; 7212747Ssam while (p1) { 7312747Ssam if (--p1->c_time > 0) 7412747Ssam break; 7512747Ssam if (p1->c_time == 0) 7612747Ssam break; 7712747Ssam p1 = p1->c_next; 7812747Ssam } 79138Sbill 808124Sroot /* 818124Sroot * Charge the time out based on the mode the cpu is in. 828124Sroot * Here again we fudge for the lack of proper interval timers 838124Sroot * assuming that the current state has been around at least 848124Sroot * one tick. 858124Sroot */ 869Sbill if (USERMODE(ps)) { 878124Sroot /* 888124Sroot * CPU was in user state. Increment 898124Sroot * user time counter, and process process-virtual time 909604Ssam * interval timer. 918124Sroot */ 928124Sroot bumptime(&u.u_ru.ru_utime, tick); 938097Sroot if (timerisset(&u.u_timer[ITIMER_VIRTUAL].it_value) && 948097Sroot itimerdecr(&u.u_timer[ITIMER_VIRTUAL], tick) == 0) 958097Sroot psignal(u.u_procp, SIGVTALRM); 968028Sroot if (u.u_procp->p_nice > NZERO) 97305Sbill cpstate = CP_NICE; 98305Sbill else 99305Sbill cpstate = CP_USER; 1009Sbill } else { 1018124Sroot /* 1028124Sroot * CPU was in system state. If profiling kernel 1038124Sroot * increment a counter. If no process is running 1048124Sroot * then this is a system tick if we were running 1058124Sroot * at a non-zero IPL (in a driver). If a process is running, 1068124Sroot * then we charge it with system time even if we were 1078124Sroot * at a non-zero IPL, since the system often runs 1088124Sroot * this way during processing of system calls. 1098124Sroot * This is approximate, but the lack of true interval 1108124Sroot * timers makes doing anything else difficult. 1118124Sroot */ 112305Sbill cpstate = CP_SYS; 1137315Ssam if (noproc) { 1148944Sroot if (BASEPRI(ps)) 1157315Ssam cpstate = CP_IDLE; 1168028Sroot } else { 1178124Sroot bumptime(&u.u_ru.ru_stime, tick); 1188028Sroot } 1199Sbill } 1208097Sroot 1218124Sroot /* 12210388Ssam * If the cpu is currently scheduled to a process, then 12310388Ssam * charge it with resource utilization for a tick, updating 12410388Ssam * statistics which run in (user+system) virtual time, 12510388Ssam * such as the cpu time limit and profiling timers. 12610388Ssam * This assumes that the current process has been running 12710388Ssam * the entire last tick. 12810388Ssam */ 12910388Ssam if (noproc == 0 && cpstate != CP_IDLE) { 13010388Ssam if ((u.u_ru.ru_utime.tv_sec+u.u_ru.ru_stime.tv_sec+1) > 13110388Ssam u.u_rlimit[RLIMIT_CPU].rlim_cur) { 13210388Ssam psignal(u.u_procp, SIGXCPU); 13310388Ssam if (u.u_rlimit[RLIMIT_CPU].rlim_cur < 13410388Ssam u.u_rlimit[RLIMIT_CPU].rlim_max) 13510388Ssam u.u_rlimit[RLIMIT_CPU].rlim_cur += 5; 13610388Ssam } 13710388Ssam if (timerisset(&u.u_timer[ITIMER_PROF].it_value) && 13810388Ssam itimerdecr(&u.u_timer[ITIMER_PROF], tick) == 0) 13910388Ssam psignal(u.u_procp, SIGPROF); 14010388Ssam s = u.u_procp->p_rssize; 14110388Ssam u.u_ru.ru_idrss += s; u.u_ru.ru_isrss += 0; /* XXX */ 14210388Ssam if (u.u_procp->p_textp) { 14310388Ssam register int xrss = u.u_procp->p_textp->x_rssize; 14410388Ssam 14510388Ssam s += xrss; 14610388Ssam u.u_ru.ru_ixrss += xrss; 14710388Ssam } 14810388Ssam if (s > u.u_ru.ru_maxrss) 14910388Ssam u.u_ru.ru_maxrss = s; 15010388Ssam } 15110388Ssam 15210388Ssam /* 1538124Sroot * We adjust the priority of the current process. 1548124Sroot * The priority of a process gets worse as it accumulates 1558124Sroot * CPU time. The cpu usage estimator (p_cpu) is increased here 1568124Sroot * and the formula for computing priorities (in kern_synch.c) 1578124Sroot * will compute a different value each time the p_cpu increases 1588124Sroot * by 4. The cpu usage estimator ramps up quite quickly when 1598124Sroot * the process is running (linearly), and decays away exponentially, 1608124Sroot * at a rate which is proportionally slower when the system is 1618124Sroot * busy. The basic principal is that the system will 90% forget 1628124Sroot * that a process used a lot of CPU time in 5*loadav seconds. 1638124Sroot * This causes the system to favor processes which haven't run 1648124Sroot * much recently, and to round-robin among other processes. 1658124Sroot */ 1669Sbill if (!noproc) { 1678097Sroot p = u.u_procp; 1688097Sroot p->p_cpticks++; 1698097Sroot if (++p->p_cpu == 0) 1708097Sroot p->p_cpu--; 1718124Sroot if ((p->p_cpu&3) == 0) { 1728097Sroot (void) setpri(p); 1738097Sroot if (p->p_pri >= PUSER) 1748097Sroot p->p_pri = p->p_usrpri; 1759Sbill } 1769Sbill } 1778124Sroot 1788124Sroot /* 17911392Ssam * If the alternate clock has not made itself known then 18011392Ssam * we must gather the statistics. 18111392Ssam */ 18211392Ssam if (phz == 0) 18311392Ssam gatherstats(pc, ps); 18411392Ssam 18511392Ssam /* 1868124Sroot * Increment the time-of-day, and schedule 1878124Sroot * processing of the callouts at a very low cpu priority, 1888124Sroot * so we don't keep the relatively high clock interrupt 1898124Sroot * priority any longer than necessary. 1908124Sroot */ 1918124Sroot bumptime(&time, tick); 19215140Skarels setsoftclock(); 1932442Swnj } 1942442Swnj 195*15191Ssam int dk_ndrive = DK_NDRIVE; 1968124Sroot /* 19711392Ssam * Gather statistics on resource utilization. 19811392Ssam * 19911392Ssam * We make a gross assumption: that the system has been in the 20011392Ssam * state it is in (user state, kernel state, interrupt state, 20111392Ssam * or idle state) for the entire last time interval, and 20211392Ssam * update statistics accordingly. 20311392Ssam */ 20412747Ssam /*ARGSUSED*/ 20511392Ssam gatherstats(pc, ps) 20611392Ssam caddr_t pc; 20711392Ssam int ps; 20811392Ssam { 20911392Ssam int cpstate, s; 21011392Ssam 21111392Ssam /* 21211392Ssam * Determine what state the cpu is in. 21311392Ssam */ 21411392Ssam if (USERMODE(ps)) { 21511392Ssam /* 21611392Ssam * CPU was in user state. 21711392Ssam */ 21811392Ssam if (u.u_procp->p_nice > NZERO) 21911392Ssam cpstate = CP_NICE; 22011392Ssam else 22111392Ssam cpstate = CP_USER; 22211392Ssam } else { 22311392Ssam /* 22411392Ssam * CPU was in system state. If profiling kernel 22511392Ssam * increment a counter. 22611392Ssam */ 22711392Ssam cpstate = CP_SYS; 22811392Ssam if (noproc && BASEPRI(ps)) 22911392Ssam cpstate = CP_IDLE; 23011392Ssam #ifdef GPROF 23111392Ssam s = pc - s_lowpc; 23211392Ssam if (profiling < 2 && s < s_textsize) 23311392Ssam kcount[s / (HISTFRACTION * sizeof (*kcount))]++; 23411392Ssam #endif 23511392Ssam } 23611392Ssam /* 23711392Ssam * We maintain statistics shown by user-level statistics 23811392Ssam * programs: the amount of time in each cpu state, and 23911392Ssam * the amount of time each of DK_NDRIVE ``drives'' is busy. 24011392Ssam */ 24111392Ssam cp_time[cpstate]++; 24211392Ssam for (s = 0; s < DK_NDRIVE; s++) 24311392Ssam if (dk_busy&(1<<s)) 24411392Ssam dk_time[s]++; 24511392Ssam } 24611392Ssam 24711392Ssam /* 2488124Sroot * Software priority level clock interrupt. 2498124Sroot * Run periodic events from timeout queue. 2508124Sroot */ 2512609Swnj /*ARGSUSED*/ 2522442Swnj softclock(pc, ps) 2532450Swnj caddr_t pc; 2548944Sroot int ps; 2552442Swnj { 2562442Swnj 2578097Sroot for (;;) { 2588124Sroot register struct callout *p1; 2598124Sroot register caddr_t arg; 2608124Sroot register int (*func)(); 2618124Sroot register int a, s; 2628124Sroot 2638097Sroot s = spl7(); 2648097Sroot if ((p1 = calltodo.c_next) == 0 || p1->c_time > 0) { 2658097Sroot splx(s); 2668097Sroot break; 2672442Swnj } 2688124Sroot arg = p1->c_arg; func = p1->c_func; a = p1->c_time; 2698097Sroot calltodo.c_next = p1->c_next; 2708097Sroot p1->c_next = callfree; 2718097Sroot callfree = p1; 2729157Ssam splx(s); 2738112Sroot (*func)(arg, a); 2742442Swnj } 2759604Ssam /* 27613127Ssam * If trapped user-mode and profiling, give it 27713127Ssam * a profiling tick. 2789604Ssam */ 27913127Ssam if (USERMODE(ps)) { 28013127Ssam register struct proc *p = u.u_procp; 28113127Ssam 28213127Ssam if (u.u_prof.pr_scale) { 28313127Ssam p->p_flag |= SOWEUPC; 28413127Ssam aston(); 28513127Ssam } 28613127Ssam /* 28713127Ssam * Check to see if process has accumulated 28813127Ssam * more than 10 minutes of user time. If so 28913127Ssam * reduce priority to give others a chance. 29013127Ssam */ 29113127Ssam if (p->p_uid && p->p_nice == NZERO && 29213127Ssam u.u_ru.ru_utime.tv_sec > 10 * 60) { 29313127Ssam p->p_nice = NZERO+4; 29413127Ssam (void) setpri(p); 29513127Ssam p->p_pri = p->p_usrpri; 29613127Ssam } 2979604Ssam } 2989Sbill } 2999Sbill 3009Sbill /* 30112747Ssam * Bump a timeval by a small number of usec's. 3029Sbill */ 30312747Ssam bumptime(tp, usec) 30412747Ssam register struct timeval *tp; 30512747Ssam int usec; 30612747Ssam { 30712747Ssam 30812747Ssam tp->tv_usec += usec; 30912747Ssam if (tp->tv_usec >= 1000000) { 31012747Ssam tp->tv_usec -= 1000000; 31112747Ssam tp->tv_sec++; 31212747Ssam } 31312747Ssam } 31412747Ssam 31512747Ssam /* 31612747Ssam * Arrange that (*fun)(arg) is called in t/hz seconds. 31712747Ssam */ 31812747Ssam timeout(fun, arg, t) 3192450Swnj int (*fun)(); 3202450Swnj caddr_t arg; 32112747Ssam register int t; 3229Sbill { 3233542Swnj register struct callout *p1, *p2, *pnew; 32412747Ssam register int s = spl7(); 3259Sbill 32612747Ssam if (t == 0) 32712747Ssam t = 1; 3283542Swnj pnew = callfree; 3293542Swnj if (pnew == NULL) 3303542Swnj panic("timeout table overflow"); 3313542Swnj callfree = pnew->c_next; 3323542Swnj pnew->c_arg = arg; 3333542Swnj pnew->c_func = fun; 3343542Swnj for (p1 = &calltodo; (p2 = p1->c_next) && p2->c_time < t; p1 = p2) 3359742Ssam if (p2->c_time > 0) 3369742Ssam t -= p2->c_time; 3373542Swnj p1->c_next = pnew; 3383542Swnj pnew->c_next = p2; 3393542Swnj pnew->c_time = t; 3403542Swnj if (p2) 3413542Swnj p2->c_time -= t; 3429Sbill splx(s); 3439Sbill } 3447305Ssam 3457305Ssam /* 3467305Ssam * untimeout is called to remove a function timeout call 3477305Ssam * from the callout structure. 3487305Ssam */ 3498097Sroot untimeout(fun, arg) 3507305Ssam int (*fun)(); 3517305Ssam caddr_t arg; 3527305Ssam { 3537305Ssam register struct callout *p1, *p2; 3547305Ssam register int s; 3557305Ssam 3567305Ssam s = spl7(); 3577305Ssam for (p1 = &calltodo; (p2 = p1->c_next) != 0; p1 = p2) { 3587305Ssam if (p2->c_func == fun && p2->c_arg == arg) { 3598112Sroot if (p2->c_next && p2->c_time > 0) 3607305Ssam p2->c_next->c_time += p2->c_time; 3617305Ssam p1->c_next = p2->c_next; 3627305Ssam p2->c_next = callfree; 3637305Ssam callfree = p2; 3647305Ssam break; 3657305Ssam } 3667305Ssam } 3677305Ssam splx(s); 3687305Ssam } 3698112Sroot 3708124Sroot /* 3718124Sroot * Compute number of hz until specified time. 3728124Sroot * Used to compute third argument to timeout() from an 3738124Sroot * absolute time. 3748124Sroot */ 3758112Sroot hzto(tv) 3768112Sroot struct timeval *tv; 3778112Sroot { 3788124Sroot register long ticks; 3798124Sroot register long sec; 3808112Sroot int s = spl7(); 3818112Sroot 3828124Sroot /* 3838124Sroot * If number of milliseconds will fit in 32 bit arithmetic, 3848124Sroot * then compute number of milliseconds to time and scale to 3858124Sroot * ticks. Otherwise just compute number of hz in time, rounding 3868124Sroot * times greater than representible to maximum value. 3878124Sroot * 3888124Sroot * Delta times less than 25 days can be computed ``exactly''. 3898124Sroot * Maximum value for any timeout in 10ms ticks is 250 days. 3908124Sroot */ 3918124Sroot sec = tv->tv_sec - time.tv_sec; 3928124Sroot if (sec <= 0x7fffffff / 1000 - 1000) 3938124Sroot ticks = ((tv->tv_sec - time.tv_sec) * 1000 + 3948124Sroot (tv->tv_usec - time.tv_usec) / 1000) / (tick / 1000); 3958124Sroot else if (sec <= 0x7fffffff / hz) 3968124Sroot ticks = sec * hz; 3978124Sroot else 3988124Sroot ticks = 0x7fffffff; 3998112Sroot splx(s); 4008112Sroot return (ticks); 4018112Sroot } 40212747Ssam 40312747Ssam profil() 40412747Ssam { 40512747Ssam register struct a { 40612747Ssam short *bufbase; 40712747Ssam unsigned bufsize; 40812747Ssam unsigned pcoffset; 40912747Ssam unsigned pcscale; 41012747Ssam } *uap = (struct a *)u.u_ap; 41112747Ssam register struct uprof *upp = &u.u_prof; 41212747Ssam 41312747Ssam upp->pr_base = uap->bufbase; 41412747Ssam upp->pr_size = uap->bufsize; 41512747Ssam upp->pr_off = uap->pcoffset; 41612747Ssam upp->pr_scale = uap->pcscale; 41712747Ssam } 41812747Ssam 41912747Ssam opause() 42012747Ssam { 42112747Ssam 42212747Ssam for (;;) 42312747Ssam sleep((caddr_t)&u, PSLEP); 42412747Ssam } 425