1*5264Swnj /* kern_clock.c 4.30 81/12/19 */ 29Sbill 39Sbill #include "../h/param.h" 49Sbill #include "../h/systm.h" 5329Sbill #include "../h/dk.h" 62768Swnj #include "../h/callout.h" 79Sbill #include "../h/seg.h" 89Sbill #include "../h/dir.h" 99Sbill #include "../h/user.h" 109Sbill #include "../h/proc.h" 119Sbill #include "../h/reg.h" 129Sbill #include "../h/psl.h" 139Sbill #include "../h/vm.h" 149Sbill #include "../h/buf.h" 159Sbill #include "../h/text.h" 16877Sbill #include "../h/vlimit.h" 17877Sbill #include "../h/mtpr.h" 18877Sbill #include "../h/clock.h" 192689Swnj #include "../h/cpu.h" 205247Sroot #include "../h/protosw.h" 219Sbill 223511Sroot #include "bk.h" 231943Swnj #include "dh.h" 241943Swnj #include "dz.h" 251559Sbill 269Sbill /* 272442Swnj * Hardclock is called straight from 289Sbill * the real time clock interrupt. 292442Swnj * We limit the work we do at real clock interrupt time to: 302442Swnj * reloading clock 312442Swnj * decrementing time to callouts 322442Swnj * recording cpu time usage 332450Swnj * modifying priority of current process 342442Swnj * arrange for soft clock interrupt 352442Swnj * kernel pc profiling 369Sbill * 373110Swnj * At software (softclock) interrupt time we: 389Sbill * implement callouts 399Sbill * maintain date 409Sbill * lightning bolt wakeup (every second) 419Sbill * alarm clock signals 429Sbill * jab the scheduler 432442Swnj * 442442Swnj * On the vax softclock interrupts are implemented by 452442Swnj * software interrupts. Note that we may have multiple softclock 462442Swnj * interrupts compressed into one (due to excessive interrupt load), 472442Swnj * but that hardclock interrupts should never be lost. 489Sbill */ 494968Swnj #ifdef KPROF 504527Swnj int kcounts[20000]; 514968Swnj #endif 529Sbill 535247Sroot /* 545247Sroot * Protoslow is like lbolt, but for slow protocol timeouts, counting 555247Sroot * up to (hz/PR_SLOWHZ), then causing a pfslowtimo(). 565247Sroot * Protofast is like lbolt, but for fast protocol timeouts, counting 575247Sroot * up to (hz/PR_FASTHZ), then causing a pffasttimo(). 585247Sroot */ 595247Sroot int protoslow; 605247Sroot int protofast; 615247Sroot 622609Swnj /*ARGSUSED*/ 632442Swnj hardclock(pc, ps) 642450Swnj caddr_t pc; 659Sbill { 662768Swnj register struct callout *p1; 679Sbill register struct proc *pp; 682442Swnj register int s, cpstate; 699Sbill 709Sbill /* 719Sbill * reprime clock 729Sbill */ 739Sbill clkreld(); 749Sbill 759Sbill /* 762442Swnj * update callout times 779Sbill */ 783542Swnj for (p1 = calltodo.c_next; p1 && p1->c_time <= 0; p1 = p1->c_next) 793542Swnj ; 803542Swnj if (p1) 813542Swnj p1->c_time--; 82138Sbill 83138Sbill /* 842442Swnj * Maintain iostat and per-process cpu statistics 85138Sbill */ 869Sbill if (!noproc) { 879Sbill s = u.u_procp->p_rssize; 889Sbill u.u_vm.vm_idsrss += s; 899Sbill if (u.u_procp->p_textp) { 909Sbill register int xrss = u.u_procp->p_textp->x_rssize; 919Sbill 929Sbill s += xrss; 939Sbill u.u_vm.vm_ixrss += xrss; 949Sbill } 959Sbill if (s > u.u_vm.vm_maxrss) 969Sbill u.u_vm.vm_maxrss = s; 972768Swnj if ((u.u_vm.vm_utime+u.u_vm.vm_stime+1)/hz > u.u_limit[LIM_CPU]) { 98375Sbill psignal(u.u_procp, SIGXCPU); 99375Sbill if (u.u_limit[LIM_CPU] < INFINITY - 5) 100375Sbill u.u_limit[LIM_CPU] += 5; 101375Sbill } 1029Sbill } 1033110Swnj /* 1043110Swnj * Update iostat information. 1053110Swnj */ 1069Sbill if (USERMODE(ps)) { 1079Sbill u.u_vm.vm_utime++; 1089Sbill if(u.u_procp->p_nice > NZERO) 109305Sbill cpstate = CP_NICE; 110305Sbill else 111305Sbill cpstate = CP_USER; 1129Sbill } else { 1134968Swnj #ifdef KPROF 1144968Swnj int k = ((int)pc & 0x7fffffff) / 8; 1154968Swnj if (k < 20000) 1164968Swnj kcounts[k]++; 1174968Swnj #endif 118305Sbill cpstate = CP_SYS; 1199Sbill if (noproc) 120305Sbill cpstate = CP_IDLE; 1219Sbill else 1229Sbill u.u_vm.vm_stime++; 1239Sbill } 1241408Sbill cp_time[cpstate]++; 1252442Swnj for (s = 0; s < DK_NDRIVE; s++) 1262442Swnj if (dk_busy&(1<<s)) 1272442Swnj dk_time[s]++; 1283110Swnj /* 1293110Swnj * Adjust priority of current process. 1303110Swnj */ 1319Sbill if (!noproc) { 1329Sbill pp = u.u_procp; 1331399Sbill pp->p_cpticks++; 1349Sbill if(++pp->p_cpu == 0) 1359Sbill pp->p_cpu--; 1363876Swnj if(pp->p_cpu % 4 == 0) { 137125Sbill (void) setpri(pp); 1389Sbill if (pp->p_pri >= PUSER) 1399Sbill pp->p_pri = pp->p_usrpri; 1409Sbill } 1419Sbill } 1423110Swnj /* 1433110Swnj * Time moves on. 1443110Swnj */ 1459Sbill ++lbolt; 1465247Sroot 1475247Sroot /* 1485247Sroot * Time moves on for protocols. 1495247Sroot */ 150*5264Swnj --protoslow; --protofast; 1515247Sroot 1522689Swnj #if VAX780 1533110Swnj /* 1543110Swnj * On 780's, impelement a fast UBA watcher, 1553110Swnj * to make sure uba's don't get stuck. 1563110Swnj */ 1572872Swnj if (cpu == VAX_780 && panicstr == 0 && !BASEPRI(ps)) 1582442Swnj unhang(); 1592442Swnj #endif 1603110Swnj /* 1613110Swnj * Schedule a software interrupt for the rest 1623110Swnj * of clock activities. 1633110Swnj */ 1642442Swnj setsoftclock(); 1652442Swnj } 1662442Swnj 1672442Swnj /* 1683876Swnj * The digital decay cpu usage priority assignment is scaled to run in 1693876Swnj * time as expanded by the 1 minute load average. Each second we 1703876Swnj * multiply the the previous cpu usage estimate by 1713876Swnj * nrscale*avenrun[0] 1723876Swnj * The following relates the load average to the period over which 1733876Swnj * cpu usage is 90% forgotten: 1743876Swnj * loadav 1 5 seconds 1753876Swnj * loadav 5 24 seconds 1763876Swnj * loadav 10 47 seconds 1773876Swnj * loadav 20 93 seconds 1783876Swnj * This is a great improvement on the previous algorithm which 1793876Swnj * decayed the priorities by a constant, and decayed away all knowledge 1803876Swnj * of previous activity in about 20 seconds. Under heavy load, 1813876Swnj * the previous algorithm degenerated to round-robin with poor response 1823876Swnj * time when there was a high load average. 1832442Swnj */ 1843984Sroot #undef ave 1853876Swnj #define ave(a,b) ((int)(((int)(a*b))/(b+1))) 1863876Swnj int nrscale = 2; 1873876Swnj double avenrun[]; 1883110Swnj 1893110Swnj /* 1903110Swnj * Constant for decay filter for cpu usage field 1913110Swnj * in process table (used by ps au). 1923110Swnj */ 1932442Swnj double ccpu = 0.95122942450071400909; /* exp(-1/20) */ 1942442Swnj 1952442Swnj /* 1962442Swnj * Software clock interrupt. 1973110Swnj * This routine runs at lower priority than device interrupts. 1982442Swnj */ 1992609Swnj /*ARGSUSED*/ 2002442Swnj softclock(pc, ps) 2012450Swnj caddr_t pc; 2022442Swnj { 2033615Sroot register struct callout *p1; 2042442Swnj register struct proc *pp; 2052442Swnj register int a, s; 2063542Swnj caddr_t arg; 2073542Swnj int (*func)(); 2082442Swnj 2092442Swnj /* 2102872Swnj * Perform callouts (but not after panic's!) 2112442Swnj */ 2123542Swnj if (panicstr == 0) { 2133542Swnj for (;;) { 2143542Swnj s = spl7(); 2154250Swnj if ((p1 = calltodo.c_next) == 0 || p1->c_time > 0) { 2164250Swnj splx(s); 2173542Swnj break; 2184250Swnj } 2193542Swnj calltodo.c_next = p1->c_next; 2203542Swnj arg = p1->c_arg; 2213542Swnj func = p1->c_func; 2223542Swnj p1->c_next = callfree; 2233542Swnj callfree = p1; 2243542Swnj (void) splx(s); 2253542Swnj (*func)(arg); 2262442Swnj } 2272442Swnj } 2282442Swnj 2292442Swnj /* 2302442Swnj * Drain silos. 2312442Swnj */ 2322647Swnj #if NDH > 0 2332442Swnj s = spl5(); dhtimer(); splx(s); 2342442Swnj #endif 2352647Swnj #if NDZ > 0 2362442Swnj s = spl5(); dztimer(); splx(s); 2372442Swnj #endif 2382442Swnj 2392442Swnj /* 2402450Swnj * If idling and processes are waiting to swap in, 2412450Swnj * check on them. 2422450Swnj */ 2432450Swnj if (noproc && runin) { 2442450Swnj runin = 0; 2452450Swnj wakeup((caddr_t)&runin); 2462450Swnj } 2472450Swnj 2482450Swnj /* 2493876Swnj * Run paging daemon every 1/4 sec. 2502442Swnj */ 2512768Swnj if (lbolt % (hz/4) == 0) { 2529Sbill vmpago(); 2533876Swnj } 2543876Swnj 2553876Swnj /* 2563876Swnj * Reschedule every 1/10 sec. 2573876Swnj */ 2583876Swnj if (lbolt % (hz/10) == 0) { 2599Sbill runrun++; 2602442Swnj aston(); 2619Sbill } 2622442Swnj 2632442Swnj /* 2645247Sroot * Run network slow and fast timeouts. 2655247Sroot */ 266*5264Swnj if (protofast <= 0) { 267*5264Swnj protofast = hz / PR_FASTHZ; 2685247Sroot pffasttimo(); 269*5264Swnj } 270*5264Swnj if (protoslow <= 0) { 271*5264Swnj protoslow = hz / PR_SLOWHZ; 2725247Sroot pfslowtimo(); 273*5264Swnj } 2745247Sroot 2755247Sroot /* 2762442Swnj * Lightning bolt every second: 2772442Swnj * sleep timeouts 2782442Swnj * process priority recomputation 2792442Swnj * process %cpu averaging 2802442Swnj * virtual memory metering 2812442Swnj * kick swapper if processes want in 2822442Swnj */ 2832768Swnj if (lbolt >= hz) { 2842872Swnj /* 2853110Swnj * This doesn't mean much on VAX since we run at 2862872Swnj * software interrupt time... if hardclock() 2872872Swnj * calls softclock() directly, it prevents 2882872Swnj * this code from running when the priority 2892872Swnj * was raised when the clock interrupt occurred. 2902872Swnj */ 2919Sbill if (BASEPRI(ps)) 2929Sbill return; 2932872Swnj 2942872Swnj /* 2952872Swnj * If we didn't run a few times because of 2962872Swnj * long blockage at high ipl, we don't 2972872Swnj * really want to run this code several times, 2982872Swnj * so squish out all multiples of hz here. 2992872Swnj */ 3002872Swnj time += lbolt / hz; 3012872Swnj lbolt %= hz; 3022872Swnj 3032872Swnj /* 3042872Swnj * Wakeup lightning bolt sleepers. 3052872Swnj * Processes sleep on lbolt to wait 3062872Swnj * for short amounts of time (e.g. 1 second). 3072872Swnj */ 3089Sbill wakeup((caddr_t)&lbolt); 3092872Swnj 3102872Swnj /* 3112872Swnj * Recompute process priority and process 3122872Swnj * sleep() system calls as well as internal 3132872Swnj * sleeps with timeouts (tsleep() kernel routine). 3142872Swnj */ 3152872Swnj for (pp = proc; pp < procNPROC; pp++) 316928Sbill if (pp->p_stat && pp->p_stat!=SZOMB) { 3172872Swnj /* 3182872Swnj * Increase resident time, to max of 127 seconds 3192872Swnj * (it is kept in a character.) For 3202872Swnj * loaded processes this is time in core; for 3212872Swnj * swapped processes, this is time on drum. 3222872Swnj */ 3232872Swnj if (pp->p_time != 127) 3249Sbill pp->p_time++; 3252872Swnj /* 3262872Swnj * If process has clock counting down, and it 3272872Swnj * expires, set it running (if this is a tsleep()), 3282872Swnj * or give it an SIGALRM (if the user process 3292872Swnj * is using alarm signals. 3302872Swnj */ 3312872Swnj if (pp->p_clktim && --pp->p_clktim == 0) 3322872Swnj if (pp->p_flag & STIMO) { 3332872Swnj s = spl6(); 3342872Swnj switch (pp->p_stat) { 335204Sbill 3362872Swnj case SSLEEP: 3372872Swnj setrun(pp); 3382872Swnj break; 339204Sbill 3402872Swnj case SSTOP: 3412872Swnj unsleep(pp); 3422872Swnj break; 3432872Swnj } 3442872Swnj pp->p_flag &= ~STIMO; 3452872Swnj splx(s); 3462872Swnj } else 3472872Swnj psignal(pp, SIGALRM); 3482872Swnj /* 3492872Swnj * If process is blocked, increment computed 3502872Swnj * time blocked. This is used in swap scheduling. 3512872Swnj */ 3522872Swnj if (pp->p_stat==SSLEEP || pp->p_stat==SSTOP) 3539Sbill if (pp->p_slptime != 127) 3549Sbill pp->p_slptime++; 3552872Swnj /* 3562872Swnj * Update digital filter estimation of process 3572872Swnj * cpu utilization for loaded processes. 3582872Swnj */ 3591399Sbill if (pp->p_flag&SLOAD) 3601399Sbill pp->p_pctcpu = ccpu * pp->p_pctcpu + 3612768Swnj (1.0 - ccpu) * (pp->p_cpticks/(float)hz); 3622872Swnj /* 3632872Swnj * Recompute process priority. The number p_cpu 3642872Swnj * is a weighted estimate of cpu time consumed. 3652872Swnj * A process which consumes cpu time has this 3662872Swnj * increase regularly. We here decrease it by 3673876Swnj * a fraction based on load average giving a digital 3683876Swnj * decay filter which damps out in about 5 seconds 3693876Swnj * when seconds are measured in time expanded by the 3703876Swnj * load average. 3712872Swnj * 3722872Swnj * If a process is niced, then the nice directly 3732872Swnj * affects the new priority. The final priority 3742872Swnj * is in the range 0 to 255, to fit in a character. 3752872Swnj */ 3761399Sbill pp->p_cpticks = 0; 3773876Swnj a = ave((pp->p_cpu & 0377), avenrun[0]*nrscale) + 3783876Swnj pp->p_nice - NZERO; 3792872Swnj if (a < 0) 3809Sbill a = 0; 3812872Swnj if (a > 255) 3829Sbill a = 255; 3839Sbill pp->p_cpu = a; 384125Sbill (void) setpri(pp); 3852872Swnj /* 3862872Swnj * Now have computed new process priority 3872872Swnj * in p->p_usrpri. Carefully change p->p_pri. 3882872Swnj * A process is on a run queue associated with 3892872Swnj * this priority, so we must block out process 3902872Swnj * state changes during the transition. 3912872Swnj */ 3929Sbill s = spl6(); 3932872Swnj if (pp->p_pri >= PUSER) { 3949Sbill if ((pp != u.u_procp || noproc) && 3959Sbill pp->p_stat == SRUN && 3969Sbill (pp->p_flag & SLOAD) && 3979Sbill pp->p_pri != pp->p_usrpri) { 3989Sbill remrq(pp); 3999Sbill pp->p_pri = pp->p_usrpri; 4009Sbill setrq(pp); 4019Sbill } else 4029Sbill pp->p_pri = pp->p_usrpri; 4039Sbill } 4049Sbill splx(s); 4059Sbill } 4062872Swnj 4072872Swnj /* 4082872Swnj * Perform virtual memory metering. 4092872Swnj */ 4109Sbill vmmeter(); 4112872Swnj 4122872Swnj /* 4132872Swnj * If the swap process is trying to bring 4142872Swnj * a process in, have it look again to see 4152872Swnj * if it is possible now. 4162872Swnj */ 4172872Swnj if (runin!=0) { 4189Sbill runin = 0; 4199Sbill wakeup((caddr_t)&runin); 4209Sbill } 4212872Swnj 4229Sbill /* 4239Sbill * If there are pages that have been cleaned, 4249Sbill * jolt the pageout daemon to process them. 4259Sbill * We do this here so that these pages will be 4269Sbill * freed if there is an abundance of memory and the 4279Sbill * daemon would not be awakened otherwise. 4289Sbill */ 4299Sbill if (bclnlist != NULL) 4309Sbill wakeup((caddr_t)&proc[2]); 4312872Swnj 4322872Swnj /* 4332872Swnj * If the trap occurred from usermode, 4342872Swnj * then check to see if it has now been 4352872Swnj * running more than 10 minutes of user time 4362872Swnj * and should thus run with reduced priority 4372872Swnj * to give other processes a chance. 4382872Swnj */ 4399Sbill if (USERMODE(ps)) { 4409Sbill pp = u.u_procp; 4412872Swnj if (pp->p_uid && pp->p_nice == NZERO && 4422872Swnj u.u_vm.vm_utime > 600 * hz) 4432872Swnj pp->p_nice = NZERO+4; 444125Sbill (void) setpri(pp); 4459Sbill pp->p_pri = pp->p_usrpri; 4469Sbill } 4479Sbill } 4482872Swnj /* 4492872Swnj * If trapped user-mode, give it a profiling tick. 4502872Swnj */ 4512442Swnj if (USERMODE(ps) && u.u_prof.pr_scale) { 4522442Swnj u.u_procp->p_flag |= SOWEUPC; 4532442Swnj aston(); 4549Sbill } 4559Sbill } 4569Sbill 4579Sbill /* 4583110Swnj * Timeout is called to arrange that 4592768Swnj * fun(arg) is called in tim/hz seconds. 4603542Swnj * An entry is linked into the callout 4613110Swnj * structure. The time in each structure 4622768Swnj * entry is the number of hz's more 4639Sbill * than the previous entry. 4649Sbill * In this way, decrementing the 4659Sbill * first entry has the effect of 4669Sbill * updating all entries. 4679Sbill * 4689Sbill * The panic is there because there is nothing 4699Sbill * intelligent to be done if an entry won't fit. 4709Sbill */ 4719Sbill timeout(fun, arg, tim) 4722450Swnj int (*fun)(); 4732450Swnj caddr_t arg; 4749Sbill { 4753542Swnj register struct callout *p1, *p2, *pnew; 4769Sbill register int t; 4779Sbill int s; 4789Sbill 4793446Sroot /* DEBUGGING CODE */ 4803446Sroot int ttrstrt(); 4813446Sroot 4823446Sroot if (fun == ttrstrt && arg == 0) 4833446Sroot panic("timeout ttrstr arg"); 4843446Sroot /* END DEBUGGING CODE */ 4859Sbill t = tim; 4869Sbill s = spl7(); 4873542Swnj pnew = callfree; 4883542Swnj if (pnew == NULL) 4893542Swnj panic("timeout table overflow"); 4903542Swnj callfree = pnew->c_next; 4913542Swnj pnew->c_arg = arg; 4923542Swnj pnew->c_func = fun; 4933542Swnj for (p1 = &calltodo; (p2 = p1->c_next) && p2->c_time < t; p1 = p2) 4943542Swnj t -= p2->c_time; 4953542Swnj p1->c_next = pnew; 4963542Swnj pnew->c_next = p2; 4973542Swnj pnew->c_time = t; 4983542Swnj if (p2) 4993542Swnj p2->c_time -= t; 5009Sbill splx(s); 5019Sbill } 502