1*7388Sroot /* kern_clock.c 4.33 82/07/13 */ 29Sbill 39Sbill #include "../h/param.h" 49Sbill #include "../h/systm.h" 5329Sbill #include "../h/dk.h" 62768Swnj #include "../h/callout.h" 79Sbill #include "../h/seg.h" 89Sbill #include "../h/dir.h" 99Sbill #include "../h/user.h" 109Sbill #include "../h/proc.h" 119Sbill #include "../h/reg.h" 129Sbill #include "../h/psl.h" 139Sbill #include "../h/vm.h" 149Sbill #include "../h/buf.h" 159Sbill #include "../h/text.h" 16877Sbill #include "../h/vlimit.h" 17877Sbill #include "../h/mtpr.h" 18877Sbill #include "../h/clock.h" 192689Swnj #include "../h/cpu.h" 205247Sroot #include "../h/protosw.h" 219Sbill 223511Sroot #include "bk.h" 231943Swnj #include "dh.h" 241943Swnj #include "dz.h" 257305Ssam #include "ps.h" 261559Sbill 279Sbill /* 282442Swnj * Hardclock is called straight from 299Sbill * the real time clock interrupt. 302442Swnj * We limit the work we do at real clock interrupt time to: 312442Swnj * reloading clock 322442Swnj * decrementing time to callouts 332442Swnj * recording cpu time usage 342450Swnj * modifying priority of current process 352442Swnj * arrange for soft clock interrupt 362442Swnj * kernel pc profiling 379Sbill * 383110Swnj * At software (softclock) interrupt time we: 399Sbill * implement callouts 409Sbill * maintain date 419Sbill * lightning bolt wakeup (every second) 429Sbill * alarm clock signals 439Sbill * jab the scheduler 442442Swnj * 452442Swnj * On the vax softclock interrupts are implemented by 462442Swnj * software interrupts. Note that we may have multiple softclock 472442Swnj * interrupts compressed into one (due to excessive interrupt load), 482442Swnj * but that hardclock interrupts should never be lost. 499Sbill */ 50*7388Sroot #ifdef GPROF 51*7388Sroot extern int profiling; 52*7388Sroot extern char *s_lowpc; 53*7388Sroot extern u_long s_textsize; 54*7388Sroot extern u_short *kcount; 554968Swnj #endif 569Sbill 575247Sroot /* 585247Sroot * Protoslow is like lbolt, but for slow protocol timeouts, counting 595247Sroot * up to (hz/PR_SLOWHZ), then causing a pfslowtimo(). 605247Sroot * Protofast is like lbolt, but for fast protocol timeouts, counting 615247Sroot * up to (hz/PR_FASTHZ), then causing a pffasttimo(). 625247Sroot */ 635247Sroot int protoslow; 645247Sroot int protofast; 655247Sroot 662609Swnj /*ARGSUSED*/ 672442Swnj hardclock(pc, ps) 682450Swnj caddr_t pc; 699Sbill { 702768Swnj register struct callout *p1; 719Sbill register struct proc *pp; 722442Swnj register int s, cpstate; 739Sbill 749Sbill /* 759Sbill * reprime clock 769Sbill */ 779Sbill clkreld(); 789Sbill 797305Ssam #if NPS > 0 809Sbill /* 817305Ssam * sync referesh of picture system 827305Ssam */ 837305Ssam psextsync(pc, ps); 847305Ssam #endif 857305Ssam 867305Ssam /* 872442Swnj * update callout times 889Sbill */ 893542Swnj for (p1 = calltodo.c_next; p1 && p1->c_time <= 0; p1 = p1->c_next) 903542Swnj ; 913542Swnj if (p1) 923542Swnj p1->c_time--; 93138Sbill 94138Sbill /* 952442Swnj * Maintain iostat and per-process cpu statistics 96138Sbill */ 979Sbill if (!noproc) { 989Sbill s = u.u_procp->p_rssize; 999Sbill u.u_vm.vm_idsrss += s; 1009Sbill if (u.u_procp->p_textp) { 1019Sbill register int xrss = u.u_procp->p_textp->x_rssize; 1029Sbill 1039Sbill s += xrss; 1049Sbill u.u_vm.vm_ixrss += xrss; 1059Sbill } 1069Sbill if (s > u.u_vm.vm_maxrss) 1079Sbill u.u_vm.vm_maxrss = s; 1082768Swnj if ((u.u_vm.vm_utime+u.u_vm.vm_stime+1)/hz > u.u_limit[LIM_CPU]) { 109375Sbill psignal(u.u_procp, SIGXCPU); 110375Sbill if (u.u_limit[LIM_CPU] < INFINITY - 5) 111375Sbill u.u_limit[LIM_CPU] += 5; 112375Sbill } 1139Sbill } 1143110Swnj /* 1153110Swnj * Update iostat information. 1163110Swnj */ 1179Sbill if (USERMODE(ps)) { 1189Sbill u.u_vm.vm_utime++; 1199Sbill if(u.u_procp->p_nice > NZERO) 120305Sbill cpstate = CP_NICE; 121305Sbill else 122305Sbill cpstate = CP_USER; 1239Sbill } else { 124*7388Sroot #ifdef GPROF 125*7388Sroot int k = pc - s_lowpc; 126*7388Sroot if (profiling < 2 && k < s_textsize) 127*7388Sroot kcount[k / sizeof (*kcount)]++; 1284968Swnj #endif 129305Sbill cpstate = CP_SYS; 1307315Ssam if (noproc) { 1317315Ssam if ((ps&PSL_IPL) != 0) 1327315Ssam cpstate = CP_IDLE; 1337315Ssam } else 1349Sbill u.u_vm.vm_stime++; 1359Sbill } 1361408Sbill cp_time[cpstate]++; 1372442Swnj for (s = 0; s < DK_NDRIVE; s++) 1382442Swnj if (dk_busy&(1<<s)) 1392442Swnj dk_time[s]++; 1403110Swnj /* 1413110Swnj * Adjust priority of current process. 1423110Swnj */ 1439Sbill if (!noproc) { 1449Sbill pp = u.u_procp; 1451399Sbill pp->p_cpticks++; 1469Sbill if(++pp->p_cpu == 0) 1479Sbill pp->p_cpu--; 1483876Swnj if(pp->p_cpu % 4 == 0) { 149125Sbill (void) setpri(pp); 1509Sbill if (pp->p_pri >= PUSER) 1519Sbill pp->p_pri = pp->p_usrpri; 1529Sbill } 1539Sbill } 1543110Swnj /* 1553110Swnj * Time moves on. 1563110Swnj */ 1579Sbill ++lbolt; 1585247Sroot 1595247Sroot /* 1605247Sroot * Time moves on for protocols. 1615247Sroot */ 1625264Swnj --protoslow; --protofast; 1635247Sroot 1642689Swnj #if VAX780 1653110Swnj /* 1663110Swnj * On 780's, impelement a fast UBA watcher, 1673110Swnj * to make sure uba's don't get stuck. 1683110Swnj */ 1692872Swnj if (cpu == VAX_780 && panicstr == 0 && !BASEPRI(ps)) 1702442Swnj unhang(); 1712442Swnj #endif 1723110Swnj /* 1733110Swnj * Schedule a software interrupt for the rest 1743110Swnj * of clock activities. 1753110Swnj */ 1762442Swnj setsoftclock(); 1772442Swnj } 1782442Swnj 1792442Swnj /* 1803876Swnj * The digital decay cpu usage priority assignment is scaled to run in 1813876Swnj * time as expanded by the 1 minute load average. Each second we 1823876Swnj * multiply the the previous cpu usage estimate by 1833876Swnj * nrscale*avenrun[0] 1843876Swnj * The following relates the load average to the period over which 1853876Swnj * cpu usage is 90% forgotten: 1863876Swnj * loadav 1 5 seconds 1873876Swnj * loadav 5 24 seconds 1883876Swnj * loadav 10 47 seconds 1893876Swnj * loadav 20 93 seconds 1903876Swnj * This is a great improvement on the previous algorithm which 1913876Swnj * decayed the priorities by a constant, and decayed away all knowledge 1923876Swnj * of previous activity in about 20 seconds. Under heavy load, 1933876Swnj * the previous algorithm degenerated to round-robin with poor response 1943876Swnj * time when there was a high load average. 1952442Swnj */ 1963984Sroot #undef ave 1973876Swnj #define ave(a,b) ((int)(((int)(a*b))/(b+1))) 1983876Swnj int nrscale = 2; 1993876Swnj double avenrun[]; 2003110Swnj 2013110Swnj /* 2023110Swnj * Constant for decay filter for cpu usage field 2033110Swnj * in process table (used by ps au). 2043110Swnj */ 2052442Swnj double ccpu = 0.95122942450071400909; /* exp(-1/20) */ 2062442Swnj 2072442Swnj /* 2082442Swnj * Software clock interrupt. 2093110Swnj * This routine runs at lower priority than device interrupts. 2102442Swnj */ 2112609Swnj /*ARGSUSED*/ 2122442Swnj softclock(pc, ps) 2132450Swnj caddr_t pc; 2142442Swnj { 2153615Sroot register struct callout *p1; 2162442Swnj register struct proc *pp; 2172442Swnj register int a, s; 2183542Swnj caddr_t arg; 2193542Swnj int (*func)(); 2202442Swnj 2212442Swnj /* 2222872Swnj * Perform callouts (but not after panic's!) 2232442Swnj */ 2243542Swnj if (panicstr == 0) { 2253542Swnj for (;;) { 2263542Swnj s = spl7(); 2274250Swnj if ((p1 = calltodo.c_next) == 0 || p1->c_time > 0) { 2284250Swnj splx(s); 2293542Swnj break; 2304250Swnj } 2313542Swnj calltodo.c_next = p1->c_next; 2323542Swnj arg = p1->c_arg; 2333542Swnj func = p1->c_func; 2343542Swnj p1->c_next = callfree; 2353542Swnj callfree = p1; 2363542Swnj (void) splx(s); 2373542Swnj (*func)(arg); 2382442Swnj } 2392442Swnj } 2402442Swnj 2412442Swnj /* 2422442Swnj * Drain silos. 2432442Swnj */ 2442647Swnj #if NDH > 0 2452442Swnj s = spl5(); dhtimer(); splx(s); 2462442Swnj #endif 2472647Swnj #if NDZ > 0 2482442Swnj s = spl5(); dztimer(); splx(s); 2492442Swnj #endif 2502442Swnj 2512442Swnj /* 2522450Swnj * If idling and processes are waiting to swap in, 2532450Swnj * check on them. 2542450Swnj */ 2552450Swnj if (noproc && runin) { 2562450Swnj runin = 0; 2572450Swnj wakeup((caddr_t)&runin); 2582450Swnj } 2592450Swnj 2602450Swnj /* 2613876Swnj * Run paging daemon every 1/4 sec. 2622442Swnj */ 2632768Swnj if (lbolt % (hz/4) == 0) { 2649Sbill vmpago(); 2653876Swnj } 2663876Swnj 2673876Swnj /* 2683876Swnj * Reschedule every 1/10 sec. 2693876Swnj */ 2703876Swnj if (lbolt % (hz/10) == 0) { 2719Sbill runrun++; 2722442Swnj aston(); 2739Sbill } 2742442Swnj 2752442Swnj /* 2765247Sroot * Run network slow and fast timeouts. 2775247Sroot */ 2785264Swnj if (protofast <= 0) { 2795264Swnj protofast = hz / PR_FASTHZ; 2805247Sroot pffasttimo(); 2815264Swnj } 2825264Swnj if (protoslow <= 0) { 2835264Swnj protoslow = hz / PR_SLOWHZ; 2845247Sroot pfslowtimo(); 2855264Swnj } 2865247Sroot 2875247Sroot /* 2882442Swnj * Lightning bolt every second: 2892442Swnj * sleep timeouts 2902442Swnj * process priority recomputation 2912442Swnj * process %cpu averaging 2922442Swnj * virtual memory metering 2932442Swnj * kick swapper if processes want in 2942442Swnj */ 2952768Swnj if (lbolt >= hz) { 2962872Swnj /* 2973110Swnj * This doesn't mean much on VAX since we run at 2982872Swnj * software interrupt time... if hardclock() 2992872Swnj * calls softclock() directly, it prevents 3002872Swnj * this code from running when the priority 3012872Swnj * was raised when the clock interrupt occurred. 3022872Swnj */ 3039Sbill if (BASEPRI(ps)) 3049Sbill return; 3052872Swnj 3062872Swnj /* 3072872Swnj * If we didn't run a few times because of 3082872Swnj * long blockage at high ipl, we don't 3092872Swnj * really want to run this code several times, 3102872Swnj * so squish out all multiples of hz here. 3112872Swnj */ 3127315Ssam s = spl6(); 3137315Ssam time += lbolt / hz; lbolt %= hz; 3147315Ssam splx(s); 3152872Swnj 3162872Swnj /* 3172872Swnj * Wakeup lightning bolt sleepers. 3182872Swnj * Processes sleep on lbolt to wait 3192872Swnj * for short amounts of time (e.g. 1 second). 3202872Swnj */ 3219Sbill wakeup((caddr_t)&lbolt); 3222872Swnj 3232872Swnj /* 3242872Swnj * Recompute process priority and process 3252872Swnj * sleep() system calls as well as internal 3262872Swnj * sleeps with timeouts (tsleep() kernel routine). 3272872Swnj */ 3282872Swnj for (pp = proc; pp < procNPROC; pp++) 329928Sbill if (pp->p_stat && pp->p_stat!=SZOMB) { 3302872Swnj /* 3312872Swnj * Increase resident time, to max of 127 seconds 3322872Swnj * (it is kept in a character.) For 3332872Swnj * loaded processes this is time in core; for 3342872Swnj * swapped processes, this is time on drum. 3352872Swnj */ 3362872Swnj if (pp->p_time != 127) 3379Sbill pp->p_time++; 3382872Swnj /* 3392872Swnj * If process has clock counting down, and it 3402872Swnj * expires, set it running (if this is a tsleep()), 3412872Swnj * or give it an SIGALRM (if the user process 3422872Swnj * is using alarm signals. 3432872Swnj */ 3442872Swnj if (pp->p_clktim && --pp->p_clktim == 0) 3452872Swnj if (pp->p_flag & STIMO) { 3462872Swnj s = spl6(); 3472872Swnj switch (pp->p_stat) { 348204Sbill 3492872Swnj case SSLEEP: 3502872Swnj setrun(pp); 3512872Swnj break; 352204Sbill 3532872Swnj case SSTOP: 3542872Swnj unsleep(pp); 3552872Swnj break; 3562872Swnj } 3572872Swnj pp->p_flag &= ~STIMO; 3582872Swnj splx(s); 3592872Swnj } else 3602872Swnj psignal(pp, SIGALRM); 3612872Swnj /* 3622872Swnj * If process is blocked, increment computed 3632872Swnj * time blocked. This is used in swap scheduling. 3642872Swnj */ 3652872Swnj if (pp->p_stat==SSLEEP || pp->p_stat==SSTOP) 3669Sbill if (pp->p_slptime != 127) 3679Sbill pp->p_slptime++; 3682872Swnj /* 3692872Swnj * Update digital filter estimation of process 3702872Swnj * cpu utilization for loaded processes. 3712872Swnj */ 3721399Sbill if (pp->p_flag&SLOAD) 3731399Sbill pp->p_pctcpu = ccpu * pp->p_pctcpu + 3742768Swnj (1.0 - ccpu) * (pp->p_cpticks/(float)hz); 3752872Swnj /* 3762872Swnj * Recompute process priority. The number p_cpu 3772872Swnj * is a weighted estimate of cpu time consumed. 3782872Swnj * A process which consumes cpu time has this 3792872Swnj * increase regularly. We here decrease it by 3803876Swnj * a fraction based on load average giving a digital 3813876Swnj * decay filter which damps out in about 5 seconds 3823876Swnj * when seconds are measured in time expanded by the 3833876Swnj * load average. 3842872Swnj * 3852872Swnj * If a process is niced, then the nice directly 3862872Swnj * affects the new priority. The final priority 3872872Swnj * is in the range 0 to 255, to fit in a character. 3882872Swnj */ 3891399Sbill pp->p_cpticks = 0; 3903876Swnj a = ave((pp->p_cpu & 0377), avenrun[0]*nrscale) + 3913876Swnj pp->p_nice - NZERO; 3922872Swnj if (a < 0) 3939Sbill a = 0; 3942872Swnj if (a > 255) 3959Sbill a = 255; 3969Sbill pp->p_cpu = a; 397125Sbill (void) setpri(pp); 3982872Swnj /* 3992872Swnj * Now have computed new process priority 4002872Swnj * in p->p_usrpri. Carefully change p->p_pri. 4012872Swnj * A process is on a run queue associated with 4022872Swnj * this priority, so we must block out process 4032872Swnj * state changes during the transition. 4042872Swnj */ 4059Sbill s = spl6(); 4062872Swnj if (pp->p_pri >= PUSER) { 4079Sbill if ((pp != u.u_procp || noproc) && 4089Sbill pp->p_stat == SRUN && 4099Sbill (pp->p_flag & SLOAD) && 4109Sbill pp->p_pri != pp->p_usrpri) { 4119Sbill remrq(pp); 4129Sbill pp->p_pri = pp->p_usrpri; 4139Sbill setrq(pp); 4149Sbill } else 4159Sbill pp->p_pri = pp->p_usrpri; 4169Sbill } 4179Sbill splx(s); 4189Sbill } 4192872Swnj 4202872Swnj /* 4212872Swnj * Perform virtual memory metering. 4222872Swnj */ 4239Sbill vmmeter(); 4242872Swnj 4252872Swnj /* 4262872Swnj * If the swap process is trying to bring 4272872Swnj * a process in, have it look again to see 4282872Swnj * if it is possible now. 4292872Swnj */ 4302872Swnj if (runin!=0) { 4319Sbill runin = 0; 4329Sbill wakeup((caddr_t)&runin); 4339Sbill } 4342872Swnj 4359Sbill /* 4369Sbill * If there are pages that have been cleaned, 4379Sbill * jolt the pageout daemon to process them. 4389Sbill * We do this here so that these pages will be 4399Sbill * freed if there is an abundance of memory and the 4409Sbill * daemon would not be awakened otherwise. 4419Sbill */ 4429Sbill if (bclnlist != NULL) 4439Sbill wakeup((caddr_t)&proc[2]); 4442872Swnj 4452872Swnj /* 4462872Swnj * If the trap occurred from usermode, 4472872Swnj * then check to see if it has now been 4482872Swnj * running more than 10 minutes of user time 4492872Swnj * and should thus run with reduced priority 4502872Swnj * to give other processes a chance. 4512872Swnj */ 4529Sbill if (USERMODE(ps)) { 4539Sbill pp = u.u_procp; 4542872Swnj if (pp->p_uid && pp->p_nice == NZERO && 4552872Swnj u.u_vm.vm_utime > 600 * hz) 4562872Swnj pp->p_nice = NZERO+4; 457125Sbill (void) setpri(pp); 4589Sbill pp->p_pri = pp->p_usrpri; 4599Sbill } 4609Sbill } 4612872Swnj /* 4622872Swnj * If trapped user-mode, give it a profiling tick. 4632872Swnj */ 4642442Swnj if (USERMODE(ps) && u.u_prof.pr_scale) { 4652442Swnj u.u_procp->p_flag |= SOWEUPC; 4662442Swnj aston(); 4679Sbill } 4689Sbill } 4699Sbill 4709Sbill /* 4713110Swnj * Timeout is called to arrange that 4722768Swnj * fun(arg) is called in tim/hz seconds. 4733542Swnj * An entry is linked into the callout 4743110Swnj * structure. The time in each structure 4752768Swnj * entry is the number of hz's more 4769Sbill * than the previous entry. 4779Sbill * In this way, decrementing the 4789Sbill * first entry has the effect of 4799Sbill * updating all entries. 4809Sbill * 4819Sbill * The panic is there because there is nothing 4829Sbill * intelligent to be done if an entry won't fit. 4839Sbill */ 4849Sbill timeout(fun, arg, tim) 4852450Swnj int (*fun)(); 4862450Swnj caddr_t arg; 4879Sbill { 4883542Swnj register struct callout *p1, *p2, *pnew; 4899Sbill register int t; 4909Sbill int s; 4919Sbill 4923446Sroot /* DEBUGGING CODE */ 4933446Sroot int ttrstrt(); 4943446Sroot 4953446Sroot if (fun == ttrstrt && arg == 0) 4963446Sroot panic("timeout ttrstr arg"); 4973446Sroot /* END DEBUGGING CODE */ 4989Sbill t = tim; 4999Sbill s = spl7(); 5003542Swnj pnew = callfree; 5013542Swnj if (pnew == NULL) 5023542Swnj panic("timeout table overflow"); 5033542Swnj callfree = pnew->c_next; 5043542Swnj pnew->c_arg = arg; 5053542Swnj pnew->c_func = fun; 5063542Swnj for (p1 = &calltodo; (p2 = p1->c_next) && p2->c_time < t; p1 = p2) 5073542Swnj t -= p2->c_time; 5083542Swnj p1->c_next = pnew; 5093542Swnj pnew->c_next = p2; 5103542Swnj pnew->c_time = t; 5113542Swnj if (p2) 5123542Swnj p2->c_time -= t; 5139Sbill splx(s); 5149Sbill } 5157305Ssam 5167305Ssam /* 5177305Ssam * untimeout is called to remove a function timeout call 5187305Ssam * from the callout structure. 5197305Ssam */ 5207305Ssam untimeout (fun, arg) 5217305Ssam int (*fun)(); 5227305Ssam caddr_t arg; 5237305Ssam { 5247305Ssam 5257305Ssam register struct callout *p1, *p2; 5267305Ssam register int s; 5277305Ssam 5287305Ssam s = spl7(); 5297305Ssam for (p1 = &calltodo; (p2 = p1->c_next) != 0; p1 = p2) { 5307305Ssam if (p2->c_func == fun && p2->c_arg == arg) { 5317305Ssam if (p2->c_next) 5327305Ssam p2->c_next->c_time += p2->c_time; 5337305Ssam p1->c_next = p2->c_next; 5347305Ssam p2->c_next = callfree; 5357305Ssam callfree = p2; 5367305Ssam break; 5377305Ssam } 5387305Ssam } 5397305Ssam splx(s); 5407305Ssam } 541