1*7474Ssam /* kern_clock.c 4.34 82/07/21 */ 29Sbill 39Sbill #include "../h/param.h" 49Sbill #include "../h/systm.h" 5329Sbill #include "../h/dk.h" 62768Swnj #include "../h/callout.h" 79Sbill #include "../h/seg.h" 89Sbill #include "../h/dir.h" 99Sbill #include "../h/user.h" 109Sbill #include "../h/proc.h" 119Sbill #include "../h/reg.h" 129Sbill #include "../h/psl.h" 139Sbill #include "../h/vm.h" 149Sbill #include "../h/buf.h" 159Sbill #include "../h/text.h" 16877Sbill #include "../h/vlimit.h" 17877Sbill #include "../h/mtpr.h" 18877Sbill #include "../h/clock.h" 192689Swnj #include "../h/cpu.h" 205247Sroot #include "../h/protosw.h" 21*7474Ssam #include "../h/socket.h" 22*7474Ssam #include "../net/if.h" 239Sbill 243511Sroot #include "bk.h" 251943Swnj #include "dh.h" 261943Swnj #include "dz.h" 277305Ssam #include "ps.h" 281559Sbill 299Sbill /* 302442Swnj * Hardclock is called straight from 319Sbill * the real time clock interrupt. 322442Swnj * We limit the work we do at real clock interrupt time to: 332442Swnj * reloading clock 342442Swnj * decrementing time to callouts 352442Swnj * recording cpu time usage 362450Swnj * modifying priority of current process 372442Swnj * arrange for soft clock interrupt 382442Swnj * kernel pc profiling 399Sbill * 403110Swnj * At software (softclock) interrupt time we: 419Sbill * implement callouts 429Sbill * maintain date 439Sbill * lightning bolt wakeup (every second) 449Sbill * alarm clock signals 459Sbill * jab the scheduler 462442Swnj * 472442Swnj * On the vax softclock interrupts are implemented by 482442Swnj * software interrupts. Note that we may have multiple softclock 492442Swnj * interrupts compressed into one (due to excessive interrupt load), 502442Swnj * but that hardclock interrupts should never be lost. 519Sbill */ 527388Sroot #ifdef GPROF 537388Sroot extern int profiling; 547388Sroot extern char *s_lowpc; 557388Sroot extern u_long s_textsize; 567388Sroot extern u_short *kcount; 574968Swnj #endif 589Sbill 595247Sroot /* 605247Sroot * Protoslow is like lbolt, but for slow protocol timeouts, counting 615247Sroot * up to (hz/PR_SLOWHZ), then causing a pfslowtimo(). 625247Sroot * Protofast is like lbolt, but for fast protocol timeouts, counting 635247Sroot * up to (hz/PR_FASTHZ), then causing a pffasttimo(). 645247Sroot */ 655247Sroot int protoslow; 665247Sroot int protofast; 67*7474Ssam int ifnetslow; 685247Sroot 692609Swnj /*ARGSUSED*/ 702442Swnj hardclock(pc, ps) 712450Swnj caddr_t pc; 729Sbill { 732768Swnj register struct callout *p1; 749Sbill register struct proc *pp; 752442Swnj register int s, cpstate; 769Sbill 779Sbill /* 789Sbill * reprime clock 799Sbill */ 809Sbill clkreld(); 819Sbill 827305Ssam #if NPS > 0 839Sbill /* 847305Ssam * sync referesh of picture system 857305Ssam */ 867305Ssam psextsync(pc, ps); 877305Ssam #endif 887305Ssam 897305Ssam /* 902442Swnj * update callout times 919Sbill */ 923542Swnj for (p1 = calltodo.c_next; p1 && p1->c_time <= 0; p1 = p1->c_next) 933542Swnj ; 943542Swnj if (p1) 953542Swnj p1->c_time--; 96138Sbill 97138Sbill /* 982442Swnj * Maintain iostat and per-process cpu statistics 99138Sbill */ 1009Sbill if (!noproc) { 1019Sbill s = u.u_procp->p_rssize; 1029Sbill u.u_vm.vm_idsrss += s; 1039Sbill if (u.u_procp->p_textp) { 1049Sbill register int xrss = u.u_procp->p_textp->x_rssize; 1059Sbill 1069Sbill s += xrss; 1079Sbill u.u_vm.vm_ixrss += xrss; 1089Sbill } 1099Sbill if (s > u.u_vm.vm_maxrss) 1109Sbill u.u_vm.vm_maxrss = s; 1112768Swnj if ((u.u_vm.vm_utime+u.u_vm.vm_stime+1)/hz > u.u_limit[LIM_CPU]) { 112375Sbill psignal(u.u_procp, SIGXCPU); 113375Sbill if (u.u_limit[LIM_CPU] < INFINITY - 5) 114375Sbill u.u_limit[LIM_CPU] += 5; 115375Sbill } 1169Sbill } 1173110Swnj /* 1183110Swnj * Update iostat information. 1193110Swnj */ 1209Sbill if (USERMODE(ps)) { 1219Sbill u.u_vm.vm_utime++; 1229Sbill if(u.u_procp->p_nice > NZERO) 123305Sbill cpstate = CP_NICE; 124305Sbill else 125305Sbill cpstate = CP_USER; 1269Sbill } else { 1277388Sroot #ifdef GPROF 1287388Sroot int k = pc - s_lowpc; 1297388Sroot if (profiling < 2 && k < s_textsize) 1307388Sroot kcount[k / sizeof (*kcount)]++; 1314968Swnj #endif 132305Sbill cpstate = CP_SYS; 1337315Ssam if (noproc) { 1347315Ssam if ((ps&PSL_IPL) != 0) 1357315Ssam cpstate = CP_IDLE; 1367315Ssam } else 1379Sbill u.u_vm.vm_stime++; 1389Sbill } 1391408Sbill cp_time[cpstate]++; 1402442Swnj for (s = 0; s < DK_NDRIVE; s++) 1412442Swnj if (dk_busy&(1<<s)) 1422442Swnj dk_time[s]++; 1433110Swnj /* 1443110Swnj * Adjust priority of current process. 1453110Swnj */ 1469Sbill if (!noproc) { 1479Sbill pp = u.u_procp; 1481399Sbill pp->p_cpticks++; 1499Sbill if(++pp->p_cpu == 0) 1509Sbill pp->p_cpu--; 1513876Swnj if(pp->p_cpu % 4 == 0) { 152125Sbill (void) setpri(pp); 1539Sbill if (pp->p_pri >= PUSER) 1549Sbill pp->p_pri = pp->p_usrpri; 1559Sbill } 1569Sbill } 1573110Swnj /* 1583110Swnj * Time moves on. 1593110Swnj */ 1609Sbill ++lbolt; 1615247Sroot 1625247Sroot /* 1635247Sroot * Time moves on for protocols. 1645247Sroot */ 165*7474Ssam --protoslow; --protofast; --ifnetslow; 1665247Sroot 1672689Swnj #if VAX780 1683110Swnj /* 1693110Swnj * On 780's, impelement a fast UBA watcher, 1703110Swnj * to make sure uba's don't get stuck. 1713110Swnj */ 1722872Swnj if (cpu == VAX_780 && panicstr == 0 && !BASEPRI(ps)) 1732442Swnj unhang(); 1742442Swnj #endif 1753110Swnj /* 1763110Swnj * Schedule a software interrupt for the rest 1773110Swnj * of clock activities. 1783110Swnj */ 1792442Swnj setsoftclock(); 1802442Swnj } 1812442Swnj 1822442Swnj /* 1833876Swnj * The digital decay cpu usage priority assignment is scaled to run in 1843876Swnj * time as expanded by the 1 minute load average. Each second we 1853876Swnj * multiply the the previous cpu usage estimate by 1863876Swnj * nrscale*avenrun[0] 1873876Swnj * The following relates the load average to the period over which 1883876Swnj * cpu usage is 90% forgotten: 1893876Swnj * loadav 1 5 seconds 1903876Swnj * loadav 5 24 seconds 1913876Swnj * loadav 10 47 seconds 1923876Swnj * loadav 20 93 seconds 1933876Swnj * This is a great improvement on the previous algorithm which 1943876Swnj * decayed the priorities by a constant, and decayed away all knowledge 1953876Swnj * of previous activity in about 20 seconds. Under heavy load, 1963876Swnj * the previous algorithm degenerated to round-robin with poor response 1973876Swnj * time when there was a high load average. 1982442Swnj */ 1993984Sroot #undef ave 2003876Swnj #define ave(a,b) ((int)(((int)(a*b))/(b+1))) 2013876Swnj int nrscale = 2; 2023876Swnj double avenrun[]; 2033110Swnj 2043110Swnj /* 2053110Swnj * Constant for decay filter for cpu usage field 2063110Swnj * in process table (used by ps au). 2073110Swnj */ 2082442Swnj double ccpu = 0.95122942450071400909; /* exp(-1/20) */ 2092442Swnj 2102442Swnj /* 2112442Swnj * Software clock interrupt. 2123110Swnj * This routine runs at lower priority than device interrupts. 2132442Swnj */ 2142609Swnj /*ARGSUSED*/ 2152442Swnj softclock(pc, ps) 2162450Swnj caddr_t pc; 2172442Swnj { 2183615Sroot register struct callout *p1; 2192442Swnj register struct proc *pp; 2202442Swnj register int a, s; 2213542Swnj caddr_t arg; 2223542Swnj int (*func)(); 2232442Swnj 2242442Swnj /* 2252872Swnj * Perform callouts (but not after panic's!) 2262442Swnj */ 2273542Swnj if (panicstr == 0) { 2283542Swnj for (;;) { 2293542Swnj s = spl7(); 2304250Swnj if ((p1 = calltodo.c_next) == 0 || p1->c_time > 0) { 2314250Swnj splx(s); 2323542Swnj break; 2334250Swnj } 2343542Swnj calltodo.c_next = p1->c_next; 2353542Swnj arg = p1->c_arg; 2363542Swnj func = p1->c_func; 2373542Swnj p1->c_next = callfree; 2383542Swnj callfree = p1; 2393542Swnj (void) splx(s); 2403542Swnj (*func)(arg); 2412442Swnj } 2422442Swnj } 2432442Swnj 2442442Swnj /* 2452442Swnj * Drain silos. 2462442Swnj */ 2472647Swnj #if NDH > 0 2482442Swnj s = spl5(); dhtimer(); splx(s); 2492442Swnj #endif 2502647Swnj #if NDZ > 0 2512442Swnj s = spl5(); dztimer(); splx(s); 2522442Swnj #endif 2532442Swnj 2542442Swnj /* 2552450Swnj * If idling and processes are waiting to swap in, 2562450Swnj * check on them. 2572450Swnj */ 2582450Swnj if (noproc && runin) { 2592450Swnj runin = 0; 2602450Swnj wakeup((caddr_t)&runin); 2612450Swnj } 2622450Swnj 2632450Swnj /* 2643876Swnj * Run paging daemon every 1/4 sec. 2652442Swnj */ 2662768Swnj if (lbolt % (hz/4) == 0) { 2679Sbill vmpago(); 2683876Swnj } 2693876Swnj 2703876Swnj /* 2713876Swnj * Reschedule every 1/10 sec. 2723876Swnj */ 2733876Swnj if (lbolt % (hz/10) == 0) { 2749Sbill runrun++; 2752442Swnj aston(); 2769Sbill } 2772442Swnj 2782442Swnj /* 2795247Sroot * Run network slow and fast timeouts. 2805247Sroot */ 2815264Swnj if (protofast <= 0) { 2825264Swnj protofast = hz / PR_FASTHZ; 2835247Sroot pffasttimo(); 2845264Swnj } 2855264Swnj if (protoslow <= 0) { 2865264Swnj protoslow = hz / PR_SLOWHZ; 2875247Sroot pfslowtimo(); 2885264Swnj } 289*7474Ssam if (ifnetslow <= 0) { 290*7474Ssam ifnetslow = hz / IFNET_SLOWHZ; 291*7474Ssam if_slowtimo(); 292*7474Ssam } 2935247Sroot 2945247Sroot /* 2952442Swnj * Lightning bolt every second: 2962442Swnj * sleep timeouts 2972442Swnj * process priority recomputation 2982442Swnj * process %cpu averaging 2992442Swnj * virtual memory metering 3002442Swnj * kick swapper if processes want in 3012442Swnj */ 3022768Swnj if (lbolt >= hz) { 3032872Swnj /* 3043110Swnj * This doesn't mean much on VAX since we run at 3052872Swnj * software interrupt time... if hardclock() 3062872Swnj * calls softclock() directly, it prevents 3072872Swnj * this code from running when the priority 3082872Swnj * was raised when the clock interrupt occurred. 3092872Swnj */ 3109Sbill if (BASEPRI(ps)) 3119Sbill return; 3122872Swnj 3132872Swnj /* 3142872Swnj * If we didn't run a few times because of 3152872Swnj * long blockage at high ipl, we don't 3162872Swnj * really want to run this code several times, 3172872Swnj * so squish out all multiples of hz here. 3182872Swnj */ 3197315Ssam s = spl6(); 3207315Ssam time += lbolt / hz; lbolt %= hz; 3217315Ssam splx(s); 3222872Swnj 3232872Swnj /* 3242872Swnj * Wakeup lightning bolt sleepers. 3252872Swnj * Processes sleep on lbolt to wait 3262872Swnj * for short amounts of time (e.g. 1 second). 3272872Swnj */ 3289Sbill wakeup((caddr_t)&lbolt); 3292872Swnj 3302872Swnj /* 3312872Swnj * Recompute process priority and process 3322872Swnj * sleep() system calls as well as internal 3332872Swnj * sleeps with timeouts (tsleep() kernel routine). 3342872Swnj */ 3352872Swnj for (pp = proc; pp < procNPROC; pp++) 336928Sbill if (pp->p_stat && pp->p_stat!=SZOMB) { 3372872Swnj /* 3382872Swnj * Increase resident time, to max of 127 seconds 3392872Swnj * (it is kept in a character.) For 3402872Swnj * loaded processes this is time in core; for 3412872Swnj * swapped processes, this is time on drum. 3422872Swnj */ 3432872Swnj if (pp->p_time != 127) 3449Sbill pp->p_time++; 3452872Swnj /* 3462872Swnj * If process has clock counting down, and it 3472872Swnj * expires, set it running (if this is a tsleep()), 3482872Swnj * or give it an SIGALRM (if the user process 3492872Swnj * is using alarm signals. 3502872Swnj */ 3512872Swnj if (pp->p_clktim && --pp->p_clktim == 0) 3522872Swnj if (pp->p_flag & STIMO) { 3532872Swnj s = spl6(); 3542872Swnj switch (pp->p_stat) { 355204Sbill 3562872Swnj case SSLEEP: 3572872Swnj setrun(pp); 3582872Swnj break; 359204Sbill 3602872Swnj case SSTOP: 3612872Swnj unsleep(pp); 3622872Swnj break; 3632872Swnj } 3642872Swnj pp->p_flag &= ~STIMO; 3652872Swnj splx(s); 3662872Swnj } else 3672872Swnj psignal(pp, SIGALRM); 3682872Swnj /* 3692872Swnj * If process is blocked, increment computed 3702872Swnj * time blocked. This is used in swap scheduling. 3712872Swnj */ 3722872Swnj if (pp->p_stat==SSLEEP || pp->p_stat==SSTOP) 3739Sbill if (pp->p_slptime != 127) 3749Sbill pp->p_slptime++; 3752872Swnj /* 3762872Swnj * Update digital filter estimation of process 3772872Swnj * cpu utilization for loaded processes. 3782872Swnj */ 3791399Sbill if (pp->p_flag&SLOAD) 3801399Sbill pp->p_pctcpu = ccpu * pp->p_pctcpu + 3812768Swnj (1.0 - ccpu) * (pp->p_cpticks/(float)hz); 3822872Swnj /* 3832872Swnj * Recompute process priority. The number p_cpu 3842872Swnj * is a weighted estimate of cpu time consumed. 3852872Swnj * A process which consumes cpu time has this 3862872Swnj * increase regularly. We here decrease it by 3873876Swnj * a fraction based on load average giving a digital 3883876Swnj * decay filter which damps out in about 5 seconds 3893876Swnj * when seconds are measured in time expanded by the 3903876Swnj * load average. 3912872Swnj * 3922872Swnj * If a process is niced, then the nice directly 3932872Swnj * affects the new priority. The final priority 3942872Swnj * is in the range 0 to 255, to fit in a character. 3952872Swnj */ 3961399Sbill pp->p_cpticks = 0; 3973876Swnj a = ave((pp->p_cpu & 0377), avenrun[0]*nrscale) + 3983876Swnj pp->p_nice - NZERO; 3992872Swnj if (a < 0) 4009Sbill a = 0; 4012872Swnj if (a > 255) 4029Sbill a = 255; 4039Sbill pp->p_cpu = a; 404125Sbill (void) setpri(pp); 4052872Swnj /* 4062872Swnj * Now have computed new process priority 4072872Swnj * in p->p_usrpri. Carefully change p->p_pri. 4082872Swnj * A process is on a run queue associated with 4092872Swnj * this priority, so we must block out process 4102872Swnj * state changes during the transition. 4112872Swnj */ 4129Sbill s = spl6(); 4132872Swnj if (pp->p_pri >= PUSER) { 4149Sbill if ((pp != u.u_procp || noproc) && 4159Sbill pp->p_stat == SRUN && 4169Sbill (pp->p_flag & SLOAD) && 4179Sbill pp->p_pri != pp->p_usrpri) { 4189Sbill remrq(pp); 4199Sbill pp->p_pri = pp->p_usrpri; 4209Sbill setrq(pp); 4219Sbill } else 4229Sbill pp->p_pri = pp->p_usrpri; 4239Sbill } 4249Sbill splx(s); 4259Sbill } 4262872Swnj 4272872Swnj /* 4282872Swnj * Perform virtual memory metering. 4292872Swnj */ 4309Sbill vmmeter(); 4312872Swnj 4322872Swnj /* 4332872Swnj * If the swap process is trying to bring 4342872Swnj * a process in, have it look again to see 4352872Swnj * if it is possible now. 4362872Swnj */ 4372872Swnj if (runin!=0) { 4389Sbill runin = 0; 4399Sbill wakeup((caddr_t)&runin); 4409Sbill } 4412872Swnj 4429Sbill /* 4439Sbill * If there are pages that have been cleaned, 4449Sbill * jolt the pageout daemon to process them. 4459Sbill * We do this here so that these pages will be 4469Sbill * freed if there is an abundance of memory and the 4479Sbill * daemon would not be awakened otherwise. 4489Sbill */ 4499Sbill if (bclnlist != NULL) 4509Sbill wakeup((caddr_t)&proc[2]); 4512872Swnj 4522872Swnj /* 4532872Swnj * If the trap occurred from usermode, 4542872Swnj * then check to see if it has now been 4552872Swnj * running more than 10 minutes of user time 4562872Swnj * and should thus run with reduced priority 4572872Swnj * to give other processes a chance. 4582872Swnj */ 4599Sbill if (USERMODE(ps)) { 4609Sbill pp = u.u_procp; 4612872Swnj if (pp->p_uid && pp->p_nice == NZERO && 4622872Swnj u.u_vm.vm_utime > 600 * hz) 4632872Swnj pp->p_nice = NZERO+4; 464125Sbill (void) setpri(pp); 4659Sbill pp->p_pri = pp->p_usrpri; 4669Sbill } 4679Sbill } 4682872Swnj /* 4692872Swnj * If trapped user-mode, give it a profiling tick. 4702872Swnj */ 4712442Swnj if (USERMODE(ps) && u.u_prof.pr_scale) { 4722442Swnj u.u_procp->p_flag |= SOWEUPC; 4732442Swnj aston(); 4749Sbill } 4759Sbill } 4769Sbill 4779Sbill /* 4783110Swnj * Timeout is called to arrange that 4792768Swnj * fun(arg) is called in tim/hz seconds. 4803542Swnj * An entry is linked into the callout 4813110Swnj * structure. The time in each structure 4822768Swnj * entry is the number of hz's more 4839Sbill * than the previous entry. 4849Sbill * In this way, decrementing the 4859Sbill * first entry has the effect of 4869Sbill * updating all entries. 4879Sbill * 4889Sbill * The panic is there because there is nothing 4899Sbill * intelligent to be done if an entry won't fit. 4909Sbill */ 4919Sbill timeout(fun, arg, tim) 4922450Swnj int (*fun)(); 4932450Swnj caddr_t arg; 4949Sbill { 4953542Swnj register struct callout *p1, *p2, *pnew; 4969Sbill register int t; 4979Sbill int s; 4989Sbill 4993446Sroot /* DEBUGGING CODE */ 5003446Sroot int ttrstrt(); 5013446Sroot 5023446Sroot if (fun == ttrstrt && arg == 0) 5033446Sroot panic("timeout ttrstr arg"); 5043446Sroot /* END DEBUGGING CODE */ 5059Sbill t = tim; 5069Sbill s = spl7(); 5073542Swnj pnew = callfree; 5083542Swnj if (pnew == NULL) 5093542Swnj panic("timeout table overflow"); 5103542Swnj callfree = pnew->c_next; 5113542Swnj pnew->c_arg = arg; 5123542Swnj pnew->c_func = fun; 5133542Swnj for (p1 = &calltodo; (p2 = p1->c_next) && p2->c_time < t; p1 = p2) 5143542Swnj t -= p2->c_time; 5153542Swnj p1->c_next = pnew; 5163542Swnj pnew->c_next = p2; 5173542Swnj pnew->c_time = t; 5183542Swnj if (p2) 5193542Swnj p2->c_time -= t; 5209Sbill splx(s); 5219Sbill } 5227305Ssam 5237305Ssam /* 5247305Ssam * untimeout is called to remove a function timeout call 5257305Ssam * from the callout structure. 5267305Ssam */ 5277305Ssam untimeout (fun, arg) 5287305Ssam int (*fun)(); 5297305Ssam caddr_t arg; 5307305Ssam { 5317305Ssam 5327305Ssam register struct callout *p1, *p2; 5337305Ssam register int s; 5347305Ssam 5357305Ssam s = spl7(); 5367305Ssam for (p1 = &calltodo; (p2 = p1->c_next) != 0; p1 = p2) { 5377305Ssam if (p2->c_func == fun && p2->c_arg == arg) { 5387305Ssam if (p2->c_next) 5397305Ssam p2->c_next->c_time += p2->c_time; 5407305Ssam p1->c_next = p2->c_next; 5417305Ssam p2->c_next = callfree; 5427305Ssam callfree = p2; 5437305Ssam break; 5447305Ssam } 5457305Ssam } 5467305Ssam splx(s); 5477305Ssam } 548