1*13127Ssam /* kern_clock.c 4.56 83/06/14 */ 29Sbill 39751Ssam #include "../machine/reg.h" 49751Ssam #include "../machine/psl.h" 59751Ssam 69Sbill #include "../h/param.h" 79Sbill #include "../h/systm.h" 8329Sbill #include "../h/dk.h" 92768Swnj #include "../h/callout.h" 109Sbill #include "../h/dir.h" 119Sbill #include "../h/user.h" 128028Sroot #include "../h/kernel.h" 139Sbill #include "../h/proc.h" 149Sbill #include "../h/vm.h" 159Sbill #include "../h/text.h" 169Sbill 179751Ssam #ifdef vax 189751Ssam #include "../vax/mtpr.h" 199751Ssam #endif 209751Ssam 2110291Smckusick #ifdef GPROF 2210291Smckusick #include "../h/gprof.h" 2310291Smckusick #endif 2410291Smckusick 258124Sroot /* 268124Sroot * Clock handling routines. 278124Sroot * 2811392Ssam * This code is written to operate with two timers which run 2911392Ssam * independently of each other. The main clock, running at hz 3011392Ssam * times per second, is used to do scheduling and timeout calculations. 3111392Ssam * The second timer does resource utilization estimation statistically 3211392Ssam * based on the state of the machine phz times a second. Both functions 3311392Ssam * can be performed by a single clock (ie hz == phz), however the 3411392Ssam * statistics will be much more prone to errors. Ideally a machine 3511392Ssam * would have separate clocks measuring time spent in user state, system 3611392Ssam * state, interrupt state, and idle state. These clocks would allow a non- 3711392Ssam * approximate measure of resource utilization. 388124Sroot */ 391559Sbill 408124Sroot /* 418124Sroot * TODO: 4212747Ssam * time of day, system/user timing, timeouts, profiling on separate timers 4312747Ssam * allocate more timeout table slots when table overflows. 448124Sroot */ 459Sbill 468124Sroot /* 4711392Ssam * The hz hardware interval timer. 4811392Ssam * We update the events relating to real time. 4911392Ssam * If this timer is also being used to gather statistics, 5011392Ssam * we run through the statistics gathering routine as well. 518124Sroot */ 522609Swnj /*ARGSUSED*/ 538965Sroot #ifdef vax 542442Swnj hardclock(pc, ps) 552450Swnj caddr_t pc; 568944Sroot int ps; 579Sbill { 588944Sroot #endif 598965Sroot #ifdef sun 608944Sroot hardclock(regs) 618944Sroot struct regs regs; 628944Sroot { 6312747Ssam #define ps regs.r_sr 6412747Ssam #define pc (caddr_t)regs.r_pc 658944Sroot #endif 662768Swnj register struct callout *p1; 678097Sroot register struct proc *p; 682442Swnj register int s, cpstate; 6912747Ssam int needsoft = 0; 709Sbill 718124Sroot /* 728124Sroot * Update real-time timeout queue. 738124Sroot * At front of queue are some number of events which are ``due''. 748124Sroot * The time to these is <= 0 and if negative represents the 758124Sroot * number of ticks which have passed since it was supposed to happen. 768124Sroot * The rest of the q elements (times > 0) are events yet to happen, 778124Sroot * where the time for each is given as a delta from the previous. 788124Sroot * Decrementing just the first of these serves to decrement the time 798124Sroot * to all events. 808124Sroot */ 8112747Ssam p1 = calltodo.c_next; 8212747Ssam while (p1) { 8312747Ssam if (--p1->c_time > 0) 8412747Ssam break; 8512747Ssam needsoft = 1; 8612747Ssam if (p1->c_time == 0) 8712747Ssam break; 8812747Ssam p1 = p1->c_next; 8912747Ssam } 90138Sbill 918124Sroot /* 928124Sroot * Charge the time out based on the mode the cpu is in. 938124Sroot * Here again we fudge for the lack of proper interval timers 948124Sroot * assuming that the current state has been around at least 958124Sroot * one tick. 968124Sroot */ 979Sbill if (USERMODE(ps)) { 9812747Ssam #ifdef sun 9912747Ssam u.u_ar0 = ®s.r_r0; /* aston needs ar0 */ 10012747Ssam #endif 10112747Ssam if (u.u_prof.pr_scale) 10212747Ssam needsoft = 1; 1038124Sroot /* 1048124Sroot * CPU was in user state. Increment 1058124Sroot * user time counter, and process process-virtual time 1069604Ssam * interval timer. 1078124Sroot */ 1088124Sroot bumptime(&u.u_ru.ru_utime, tick); 1098097Sroot if (timerisset(&u.u_timer[ITIMER_VIRTUAL].it_value) && 1108097Sroot itimerdecr(&u.u_timer[ITIMER_VIRTUAL], tick) == 0) 1118097Sroot psignal(u.u_procp, SIGVTALRM); 1128028Sroot if (u.u_procp->p_nice > NZERO) 113305Sbill cpstate = CP_NICE; 114305Sbill else 115305Sbill cpstate = CP_USER; 1169Sbill } else { 1178124Sroot /* 1188124Sroot * CPU was in system state. If profiling kernel 1198124Sroot * increment a counter. If no process is running 1208124Sroot * then this is a system tick if we were running 1218124Sroot * at a non-zero IPL (in a driver). If a process is running, 1228124Sroot * then we charge it with system time even if we were 1238124Sroot * at a non-zero IPL, since the system often runs 1248124Sroot * this way during processing of system calls. 1258124Sroot * This is approximate, but the lack of true interval 1268124Sroot * timers makes doing anything else difficult. 1278124Sroot */ 128305Sbill cpstate = CP_SYS; 1297315Ssam if (noproc) { 1308944Sroot if (BASEPRI(ps)) 1317315Ssam cpstate = CP_IDLE; 1328028Sroot } else { 1338124Sroot bumptime(&u.u_ru.ru_stime, tick); 1348028Sroot } 1359Sbill } 1368097Sroot 1378124Sroot /* 13810388Ssam * If the cpu is currently scheduled to a process, then 13910388Ssam * charge it with resource utilization for a tick, updating 14010388Ssam * statistics which run in (user+system) virtual time, 14110388Ssam * such as the cpu time limit and profiling timers. 14210388Ssam * This assumes that the current process has been running 14310388Ssam * the entire last tick. 14410388Ssam */ 14510388Ssam if (noproc == 0 && cpstate != CP_IDLE) { 14610388Ssam if ((u.u_ru.ru_utime.tv_sec+u.u_ru.ru_stime.tv_sec+1) > 14710388Ssam u.u_rlimit[RLIMIT_CPU].rlim_cur) { 14810388Ssam psignal(u.u_procp, SIGXCPU); 14910388Ssam if (u.u_rlimit[RLIMIT_CPU].rlim_cur < 15010388Ssam u.u_rlimit[RLIMIT_CPU].rlim_max) 15110388Ssam u.u_rlimit[RLIMIT_CPU].rlim_cur += 5; 15210388Ssam } 15310388Ssam if (timerisset(&u.u_timer[ITIMER_PROF].it_value) && 15410388Ssam itimerdecr(&u.u_timer[ITIMER_PROF], tick) == 0) 15510388Ssam psignal(u.u_procp, SIGPROF); 15610388Ssam s = u.u_procp->p_rssize; 15710388Ssam u.u_ru.ru_idrss += s; u.u_ru.ru_isrss += 0; /* XXX */ 15810388Ssam if (u.u_procp->p_textp) { 15910388Ssam register int xrss = u.u_procp->p_textp->x_rssize; 16010388Ssam 16110388Ssam s += xrss; 16210388Ssam u.u_ru.ru_ixrss += xrss; 16310388Ssam } 16410388Ssam if (s > u.u_ru.ru_maxrss) 16510388Ssam u.u_ru.ru_maxrss = s; 16610388Ssam } 16710388Ssam 16810388Ssam /* 1698124Sroot * We adjust the priority of the current process. 1708124Sroot * The priority of a process gets worse as it accumulates 1718124Sroot * CPU time. The cpu usage estimator (p_cpu) is increased here 1728124Sroot * and the formula for computing priorities (in kern_synch.c) 1738124Sroot * will compute a different value each time the p_cpu increases 1748124Sroot * by 4. The cpu usage estimator ramps up quite quickly when 1758124Sroot * the process is running (linearly), and decays away exponentially, 1768124Sroot * at a rate which is proportionally slower when the system is 1778124Sroot * busy. The basic principal is that the system will 90% forget 1788124Sroot * that a process used a lot of CPU time in 5*loadav seconds. 1798124Sroot * This causes the system to favor processes which haven't run 1808124Sroot * much recently, and to round-robin among other processes. 1818124Sroot */ 1829Sbill if (!noproc) { 1838097Sroot p = u.u_procp; 1848097Sroot p->p_cpticks++; 1858097Sroot if (++p->p_cpu == 0) 1868097Sroot p->p_cpu--; 1878124Sroot if ((p->p_cpu&3) == 0) { 1888097Sroot (void) setpri(p); 1898097Sroot if (p->p_pri >= PUSER) 1908097Sroot p->p_pri = p->p_usrpri; 1919Sbill } 1929Sbill } 1938124Sroot 1948124Sroot /* 19511392Ssam * If the alternate clock has not made itself known then 19611392Ssam * we must gather the statistics. 19711392Ssam */ 19811392Ssam if (phz == 0) 19911392Ssam gatherstats(pc, ps); 20011392Ssam 20111392Ssam /* 2028124Sroot * Increment the time-of-day, and schedule 2038124Sroot * processing of the callouts at a very low cpu priority, 2048124Sroot * so we don't keep the relatively high clock interrupt 2058124Sroot * priority any longer than necessary. 2068124Sroot */ 2078124Sroot bumptime(&time, tick); 20812747Ssam if (needsoft) 20912747Ssam setsoftclock(); 2102442Swnj } 21112747Ssam #ifdef sun 21212747Ssam #undef pc 21312747Ssam #undef ps 21412747Ssam #endif 2152442Swnj 2168124Sroot /* 21711392Ssam * Gather statistics on resource utilization. 21811392Ssam * 21911392Ssam * We make a gross assumption: that the system has been in the 22011392Ssam * state it is in (user state, kernel state, interrupt state, 22111392Ssam * or idle state) for the entire last time interval, and 22211392Ssam * update statistics accordingly. 22311392Ssam */ 22412747Ssam /*ARGSUSED*/ 22511392Ssam gatherstats(pc, ps) 22611392Ssam caddr_t pc; 22711392Ssam int ps; 22811392Ssam { 22911392Ssam int cpstate, s; 23011392Ssam 23111392Ssam /* 23211392Ssam * Determine what state the cpu is in. 23311392Ssam */ 23411392Ssam if (USERMODE(ps)) { 23511392Ssam /* 23611392Ssam * CPU was in user state. 23711392Ssam */ 23811392Ssam if (u.u_procp->p_nice > NZERO) 23911392Ssam cpstate = CP_NICE; 24011392Ssam else 24111392Ssam cpstate = CP_USER; 24211392Ssam } else { 24311392Ssam /* 24411392Ssam * CPU was in system state. If profiling kernel 24511392Ssam * increment a counter. 24611392Ssam */ 24711392Ssam cpstate = CP_SYS; 24811392Ssam if (noproc && BASEPRI(ps)) 24911392Ssam cpstate = CP_IDLE; 25011392Ssam #ifdef GPROF 25111392Ssam s = pc - s_lowpc; 25211392Ssam if (profiling < 2 && s < s_textsize) 25311392Ssam kcount[s / (HISTFRACTION * sizeof (*kcount))]++; 25411392Ssam #endif 25511392Ssam } 25611392Ssam /* 25711392Ssam * We maintain statistics shown by user-level statistics 25811392Ssam * programs: the amount of time in each cpu state, and 25911392Ssam * the amount of time each of DK_NDRIVE ``drives'' is busy. 26011392Ssam */ 26111392Ssam cp_time[cpstate]++; 26211392Ssam for (s = 0; s < DK_NDRIVE; s++) 26311392Ssam if (dk_busy&(1<<s)) 26411392Ssam dk_time[s]++; 26511392Ssam } 26611392Ssam 26711392Ssam /* 2688124Sroot * Software priority level clock interrupt. 2698124Sroot * Run periodic events from timeout queue. 2708124Sroot */ 2712609Swnj /*ARGSUSED*/ 2728965Sroot #ifdef vax 2732442Swnj softclock(pc, ps) 2742450Swnj caddr_t pc; 2758944Sroot int ps; 2762442Swnj { 2778944Sroot #endif 2788965Sroot #ifdef sun 2799751Ssam softclock() 2808944Sroot { 28112747Ssam #define pc (caddr_t)u.u_ar0[PC] 28212747Ssam #define ps u.u_ar0[PS] 2838944Sroot #endif 2842442Swnj 2858097Sroot for (;;) { 2868124Sroot register struct callout *p1; 2878124Sroot register caddr_t arg; 2888124Sroot register int (*func)(); 2898124Sroot register int a, s; 2908124Sroot 2918097Sroot s = spl7(); 2928097Sroot if ((p1 = calltodo.c_next) == 0 || p1->c_time > 0) { 2938097Sroot splx(s); 2948097Sroot break; 2952442Swnj } 2968124Sroot arg = p1->c_arg; func = p1->c_func; a = p1->c_time; 2978097Sroot calltodo.c_next = p1->c_next; 2988097Sroot p1->c_next = callfree; 2998097Sroot callfree = p1; 3009157Ssam splx(s); 3018112Sroot (*func)(arg, a); 3022442Swnj } 3039604Ssam /* 304*13127Ssam * If trapped user-mode and profiling, give it 305*13127Ssam * a profiling tick. 3069604Ssam */ 307*13127Ssam if (USERMODE(ps)) { 308*13127Ssam register struct proc *p = u.u_procp; 309*13127Ssam 310*13127Ssam if (u.u_prof.pr_scale) { 311*13127Ssam p->p_flag |= SOWEUPC; 312*13127Ssam aston(); 313*13127Ssam } 314*13127Ssam #ifdef vax 315*13127Ssam /* 316*13127Ssam * Check to see if process has accumulated 317*13127Ssam * more than 10 minutes of user time. If so 318*13127Ssam * reduce priority to give others a chance. 319*13127Ssam */ 320*13127Ssam if (p->p_uid && p->p_nice == NZERO && 321*13127Ssam u.u_ru.ru_utime.tv_sec > 10 * 60) { 322*13127Ssam p->p_nice = NZERO+4; 323*13127Ssam (void) setpri(p); 324*13127Ssam p->p_pri = p->p_usrpri; 325*13127Ssam } 326*13127Ssam #endif 3279604Ssam } 3289Sbill } 3299Sbill 3309Sbill /* 33112747Ssam * Bump a timeval by a small number of usec's. 3329Sbill */ 33312747Ssam bumptime(tp, usec) 33412747Ssam register struct timeval *tp; 33512747Ssam int usec; 33612747Ssam { 33712747Ssam 33812747Ssam tp->tv_usec += usec; 33912747Ssam if (tp->tv_usec >= 1000000) { 34012747Ssam tp->tv_usec -= 1000000; 34112747Ssam tp->tv_sec++; 34212747Ssam } 34312747Ssam } 34412747Ssam 34512747Ssam /* 34612747Ssam * Arrange that (*fun)(arg) is called in t/hz seconds. 34712747Ssam */ 34812747Ssam timeout(fun, arg, t) 3492450Swnj int (*fun)(); 3502450Swnj caddr_t arg; 35112747Ssam register int t; 3529Sbill { 3533542Swnj register struct callout *p1, *p2, *pnew; 35412747Ssam register int s = spl7(); 3559Sbill 35612747Ssam if (t == 0) 35712747Ssam t = 1; 3583542Swnj pnew = callfree; 3593542Swnj if (pnew == NULL) 3603542Swnj panic("timeout table overflow"); 3613542Swnj callfree = pnew->c_next; 3623542Swnj pnew->c_arg = arg; 3633542Swnj pnew->c_func = fun; 3643542Swnj for (p1 = &calltodo; (p2 = p1->c_next) && p2->c_time < t; p1 = p2) 3659742Ssam if (p2->c_time > 0) 3669742Ssam t -= p2->c_time; 3673542Swnj p1->c_next = pnew; 3683542Swnj pnew->c_next = p2; 3693542Swnj pnew->c_time = t; 3703542Swnj if (p2) 3713542Swnj p2->c_time -= t; 3729Sbill splx(s); 3739Sbill } 3747305Ssam 3757305Ssam /* 3767305Ssam * untimeout is called to remove a function timeout call 3777305Ssam * from the callout structure. 3787305Ssam */ 3798097Sroot untimeout(fun, arg) 3807305Ssam int (*fun)(); 3817305Ssam caddr_t arg; 3827305Ssam { 3837305Ssam register struct callout *p1, *p2; 3847305Ssam register int s; 3857305Ssam 3867305Ssam s = spl7(); 3877305Ssam for (p1 = &calltodo; (p2 = p1->c_next) != 0; p1 = p2) { 3887305Ssam if (p2->c_func == fun && p2->c_arg == arg) { 3898112Sroot if (p2->c_next && p2->c_time > 0) 3907305Ssam p2->c_next->c_time += p2->c_time; 3917305Ssam p1->c_next = p2->c_next; 3927305Ssam p2->c_next = callfree; 3937305Ssam callfree = p2; 3947305Ssam break; 3957305Ssam } 3967305Ssam } 3977305Ssam splx(s); 3987305Ssam } 3998112Sroot 4008124Sroot /* 4018124Sroot * Compute number of hz until specified time. 4028124Sroot * Used to compute third argument to timeout() from an 4038124Sroot * absolute time. 4048124Sroot */ 4058112Sroot hzto(tv) 4068112Sroot struct timeval *tv; 4078112Sroot { 4088124Sroot register long ticks; 4098124Sroot register long sec; 4108112Sroot int s = spl7(); 4118112Sroot 4128124Sroot /* 4138124Sroot * If number of milliseconds will fit in 32 bit arithmetic, 4148124Sroot * then compute number of milliseconds to time and scale to 4158124Sroot * ticks. Otherwise just compute number of hz in time, rounding 4168124Sroot * times greater than representible to maximum value. 4178124Sroot * 4188124Sroot * Delta times less than 25 days can be computed ``exactly''. 4198124Sroot * Maximum value for any timeout in 10ms ticks is 250 days. 4208124Sroot */ 4218124Sroot sec = tv->tv_sec - time.tv_sec; 4228124Sroot if (sec <= 0x7fffffff / 1000 - 1000) 4238124Sroot ticks = ((tv->tv_sec - time.tv_sec) * 1000 + 4248124Sroot (tv->tv_usec - time.tv_usec) / 1000) / (tick / 1000); 4258124Sroot else if (sec <= 0x7fffffff / hz) 4268124Sroot ticks = sec * hz; 4278124Sroot else 4288124Sroot ticks = 0x7fffffff; 4298112Sroot splx(s); 4308112Sroot return (ticks); 4318112Sroot } 43212747Ssam 43312747Ssam profil() 43412747Ssam { 43512747Ssam register struct a { 43612747Ssam short *bufbase; 43712747Ssam unsigned bufsize; 43812747Ssam unsigned pcoffset; 43912747Ssam unsigned pcscale; 44012747Ssam } *uap = (struct a *)u.u_ap; 44112747Ssam register struct uprof *upp = &u.u_prof; 44212747Ssam 44312747Ssam upp->pr_base = uap->bufbase; 44412747Ssam upp->pr_size = uap->bufsize; 44512747Ssam upp->pr_off = uap->pcoffset; 44612747Ssam upp->pr_scale = uap->pcscale; 44712747Ssam } 44812747Ssam 44912747Ssam opause() 45012747Ssam { 45112747Ssam 45212747Ssam for (;;) 45312747Ssam sleep((caddr_t)&u, PSLEP); 45412747Ssam } 455