1*7305Ssam /* kern_clock.c 4.31 82/06/26 */ 29Sbill 39Sbill #include "../h/param.h" 49Sbill #include "../h/systm.h" 5329Sbill #include "../h/dk.h" 62768Swnj #include "../h/callout.h" 79Sbill #include "../h/seg.h" 89Sbill #include "../h/dir.h" 99Sbill #include "../h/user.h" 109Sbill #include "../h/proc.h" 119Sbill #include "../h/reg.h" 129Sbill #include "../h/psl.h" 139Sbill #include "../h/vm.h" 149Sbill #include "../h/buf.h" 159Sbill #include "../h/text.h" 16877Sbill #include "../h/vlimit.h" 17877Sbill #include "../h/mtpr.h" 18877Sbill #include "../h/clock.h" 192689Swnj #include "../h/cpu.h" 205247Sroot #include "../h/protosw.h" 219Sbill 223511Sroot #include "bk.h" 231943Swnj #include "dh.h" 241943Swnj #include "dz.h" 25*7305Ssam #include "ps.h" 261559Sbill 279Sbill /* 282442Swnj * Hardclock is called straight from 299Sbill * the real time clock interrupt. 302442Swnj * We limit the work we do at real clock interrupt time to: 312442Swnj * reloading clock 322442Swnj * decrementing time to callouts 332442Swnj * recording cpu time usage 342450Swnj * modifying priority of current process 352442Swnj * arrange for soft clock interrupt 362442Swnj * kernel pc profiling 379Sbill * 383110Swnj * At software (softclock) interrupt time we: 399Sbill * implement callouts 409Sbill * maintain date 419Sbill * lightning bolt wakeup (every second) 429Sbill * alarm clock signals 439Sbill * jab the scheduler 442442Swnj * 452442Swnj * On the vax softclock interrupts are implemented by 462442Swnj * software interrupts. Note that we may have multiple softclock 472442Swnj * interrupts compressed into one (due to excessive interrupt load), 482442Swnj * but that hardclock interrupts should never be lost. 499Sbill */ 504968Swnj #ifdef KPROF 514527Swnj int kcounts[20000]; 524968Swnj #endif 539Sbill 545247Sroot /* 555247Sroot * Protoslow is like lbolt, but for slow protocol timeouts, counting 565247Sroot * up to (hz/PR_SLOWHZ), then causing a pfslowtimo(). 575247Sroot * Protofast is like lbolt, but for fast protocol timeouts, counting 585247Sroot * up to (hz/PR_FASTHZ), then causing a pffasttimo(). 595247Sroot */ 605247Sroot int protoslow; 615247Sroot int protofast; 625247Sroot 632609Swnj /*ARGSUSED*/ 642442Swnj hardclock(pc, ps) 652450Swnj caddr_t pc; 669Sbill { 672768Swnj register struct callout *p1; 689Sbill register struct proc *pp; 692442Swnj register int s, cpstate; 709Sbill 719Sbill /* 729Sbill * reprime clock 739Sbill */ 749Sbill clkreld(); 759Sbill 76*7305Ssam #if NPS > 0 779Sbill /* 78*7305Ssam * sync referesh of picture system 79*7305Ssam */ 80*7305Ssam psextsync(pc, ps); 81*7305Ssam #endif 82*7305Ssam 83*7305Ssam /* 842442Swnj * update callout times 859Sbill */ 863542Swnj for (p1 = calltodo.c_next; p1 && p1->c_time <= 0; p1 = p1->c_next) 873542Swnj ; 883542Swnj if (p1) 893542Swnj p1->c_time--; 90138Sbill 91138Sbill /* 922442Swnj * Maintain iostat and per-process cpu statistics 93138Sbill */ 949Sbill if (!noproc) { 959Sbill s = u.u_procp->p_rssize; 969Sbill u.u_vm.vm_idsrss += s; 979Sbill if (u.u_procp->p_textp) { 989Sbill register int xrss = u.u_procp->p_textp->x_rssize; 999Sbill 1009Sbill s += xrss; 1019Sbill u.u_vm.vm_ixrss += xrss; 1029Sbill } 1039Sbill if (s > u.u_vm.vm_maxrss) 1049Sbill u.u_vm.vm_maxrss = s; 1052768Swnj if ((u.u_vm.vm_utime+u.u_vm.vm_stime+1)/hz > u.u_limit[LIM_CPU]) { 106375Sbill psignal(u.u_procp, SIGXCPU); 107375Sbill if (u.u_limit[LIM_CPU] < INFINITY - 5) 108375Sbill u.u_limit[LIM_CPU] += 5; 109375Sbill } 1109Sbill } 1113110Swnj /* 1123110Swnj * Update iostat information. 1133110Swnj */ 1149Sbill if (USERMODE(ps)) { 1159Sbill u.u_vm.vm_utime++; 1169Sbill if(u.u_procp->p_nice > NZERO) 117305Sbill cpstate = CP_NICE; 118305Sbill else 119305Sbill cpstate = CP_USER; 1209Sbill } else { 1214968Swnj #ifdef KPROF 1224968Swnj int k = ((int)pc & 0x7fffffff) / 8; 1234968Swnj if (k < 20000) 1244968Swnj kcounts[k]++; 1254968Swnj #endif 126305Sbill cpstate = CP_SYS; 1279Sbill if (noproc) 128305Sbill cpstate = CP_IDLE; 1299Sbill else 1309Sbill u.u_vm.vm_stime++; 1319Sbill } 1321408Sbill cp_time[cpstate]++; 1332442Swnj for (s = 0; s < DK_NDRIVE; s++) 1342442Swnj if (dk_busy&(1<<s)) 1352442Swnj dk_time[s]++; 1363110Swnj /* 1373110Swnj * Adjust priority of current process. 1383110Swnj */ 1399Sbill if (!noproc) { 1409Sbill pp = u.u_procp; 1411399Sbill pp->p_cpticks++; 1429Sbill if(++pp->p_cpu == 0) 1439Sbill pp->p_cpu--; 1443876Swnj if(pp->p_cpu % 4 == 0) { 145125Sbill (void) setpri(pp); 1469Sbill if (pp->p_pri >= PUSER) 1479Sbill pp->p_pri = pp->p_usrpri; 1489Sbill } 1499Sbill } 1503110Swnj /* 1513110Swnj * Time moves on. 1523110Swnj */ 1539Sbill ++lbolt; 1545247Sroot 1555247Sroot /* 1565247Sroot * Time moves on for protocols. 1575247Sroot */ 1585264Swnj --protoslow; --protofast; 1595247Sroot 1602689Swnj #if VAX780 1613110Swnj /* 1623110Swnj * On 780's, impelement a fast UBA watcher, 1633110Swnj * to make sure uba's don't get stuck. 1643110Swnj */ 1652872Swnj if (cpu == VAX_780 && panicstr == 0 && !BASEPRI(ps)) 1662442Swnj unhang(); 1672442Swnj #endif 1683110Swnj /* 1693110Swnj * Schedule a software interrupt for the rest 1703110Swnj * of clock activities. 1713110Swnj */ 1722442Swnj setsoftclock(); 1732442Swnj } 1742442Swnj 1752442Swnj /* 1763876Swnj * The digital decay cpu usage priority assignment is scaled to run in 1773876Swnj * time as expanded by the 1 minute load average. Each second we 1783876Swnj * multiply the the previous cpu usage estimate by 1793876Swnj * nrscale*avenrun[0] 1803876Swnj * The following relates the load average to the period over which 1813876Swnj * cpu usage is 90% forgotten: 1823876Swnj * loadav 1 5 seconds 1833876Swnj * loadav 5 24 seconds 1843876Swnj * loadav 10 47 seconds 1853876Swnj * loadav 20 93 seconds 1863876Swnj * This is a great improvement on the previous algorithm which 1873876Swnj * decayed the priorities by a constant, and decayed away all knowledge 1883876Swnj * of previous activity in about 20 seconds. Under heavy load, 1893876Swnj * the previous algorithm degenerated to round-robin with poor response 1903876Swnj * time when there was a high load average. 1912442Swnj */ 1923984Sroot #undef ave 1933876Swnj #define ave(a,b) ((int)(((int)(a*b))/(b+1))) 1943876Swnj int nrscale = 2; 1953876Swnj double avenrun[]; 1963110Swnj 1973110Swnj /* 1983110Swnj * Constant for decay filter for cpu usage field 1993110Swnj * in process table (used by ps au). 2003110Swnj */ 2012442Swnj double ccpu = 0.95122942450071400909; /* exp(-1/20) */ 2022442Swnj 2032442Swnj /* 2042442Swnj * Software clock interrupt. 2053110Swnj * This routine runs at lower priority than device interrupts. 2062442Swnj */ 2072609Swnj /*ARGSUSED*/ 2082442Swnj softclock(pc, ps) 2092450Swnj caddr_t pc; 2102442Swnj { 2113615Sroot register struct callout *p1; 2122442Swnj register struct proc *pp; 2132442Swnj register int a, s; 2143542Swnj caddr_t arg; 2153542Swnj int (*func)(); 2162442Swnj 2172442Swnj /* 2182872Swnj * Perform callouts (but not after panic's!) 2192442Swnj */ 2203542Swnj if (panicstr == 0) { 2213542Swnj for (;;) { 2223542Swnj s = spl7(); 2234250Swnj if ((p1 = calltodo.c_next) == 0 || p1->c_time > 0) { 2244250Swnj splx(s); 2253542Swnj break; 2264250Swnj } 2273542Swnj calltodo.c_next = p1->c_next; 2283542Swnj arg = p1->c_arg; 2293542Swnj func = p1->c_func; 2303542Swnj p1->c_next = callfree; 2313542Swnj callfree = p1; 2323542Swnj (void) splx(s); 2333542Swnj (*func)(arg); 2342442Swnj } 2352442Swnj } 2362442Swnj 2372442Swnj /* 2382442Swnj * Drain silos. 2392442Swnj */ 2402647Swnj #if NDH > 0 2412442Swnj s = spl5(); dhtimer(); splx(s); 2422442Swnj #endif 2432647Swnj #if NDZ > 0 2442442Swnj s = spl5(); dztimer(); splx(s); 2452442Swnj #endif 2462442Swnj 2472442Swnj /* 2482450Swnj * If idling and processes are waiting to swap in, 2492450Swnj * check on them. 2502450Swnj */ 2512450Swnj if (noproc && runin) { 2522450Swnj runin = 0; 2532450Swnj wakeup((caddr_t)&runin); 2542450Swnj } 2552450Swnj 2562450Swnj /* 2573876Swnj * Run paging daemon every 1/4 sec. 2582442Swnj */ 2592768Swnj if (lbolt % (hz/4) == 0) { 2609Sbill vmpago(); 2613876Swnj } 2623876Swnj 2633876Swnj /* 2643876Swnj * Reschedule every 1/10 sec. 2653876Swnj */ 2663876Swnj if (lbolt % (hz/10) == 0) { 2679Sbill runrun++; 2682442Swnj aston(); 2699Sbill } 2702442Swnj 2712442Swnj /* 2725247Sroot * Run network slow and fast timeouts. 2735247Sroot */ 2745264Swnj if (protofast <= 0) { 2755264Swnj protofast = hz / PR_FASTHZ; 2765247Sroot pffasttimo(); 2775264Swnj } 2785264Swnj if (protoslow <= 0) { 2795264Swnj protoslow = hz / PR_SLOWHZ; 2805247Sroot pfslowtimo(); 2815264Swnj } 2825247Sroot 2835247Sroot /* 2842442Swnj * Lightning bolt every second: 2852442Swnj * sleep timeouts 2862442Swnj * process priority recomputation 2872442Swnj * process %cpu averaging 2882442Swnj * virtual memory metering 2892442Swnj * kick swapper if processes want in 2902442Swnj */ 2912768Swnj if (lbolt >= hz) { 2922872Swnj /* 2933110Swnj * This doesn't mean much on VAX since we run at 2942872Swnj * software interrupt time... if hardclock() 2952872Swnj * calls softclock() directly, it prevents 2962872Swnj * this code from running when the priority 2972872Swnj * was raised when the clock interrupt occurred. 2982872Swnj */ 2999Sbill if (BASEPRI(ps)) 3009Sbill return; 3012872Swnj 3022872Swnj /* 3032872Swnj * If we didn't run a few times because of 3042872Swnj * long blockage at high ipl, we don't 3052872Swnj * really want to run this code several times, 3062872Swnj * so squish out all multiples of hz here. 3072872Swnj */ 3082872Swnj time += lbolt / hz; 3092872Swnj lbolt %= hz; 3102872Swnj 3112872Swnj /* 3122872Swnj * Wakeup lightning bolt sleepers. 3132872Swnj * Processes sleep on lbolt to wait 3142872Swnj * for short amounts of time (e.g. 1 second). 3152872Swnj */ 3169Sbill wakeup((caddr_t)&lbolt); 3172872Swnj 3182872Swnj /* 3192872Swnj * Recompute process priority and process 3202872Swnj * sleep() system calls as well as internal 3212872Swnj * sleeps with timeouts (tsleep() kernel routine). 3222872Swnj */ 3232872Swnj for (pp = proc; pp < procNPROC; pp++) 324928Sbill if (pp->p_stat && pp->p_stat!=SZOMB) { 3252872Swnj /* 3262872Swnj * Increase resident time, to max of 127 seconds 3272872Swnj * (it is kept in a character.) For 3282872Swnj * loaded processes this is time in core; for 3292872Swnj * swapped processes, this is time on drum. 3302872Swnj */ 3312872Swnj if (pp->p_time != 127) 3329Sbill pp->p_time++; 3332872Swnj /* 3342872Swnj * If process has clock counting down, and it 3352872Swnj * expires, set it running (if this is a tsleep()), 3362872Swnj * or give it an SIGALRM (if the user process 3372872Swnj * is using alarm signals. 3382872Swnj */ 3392872Swnj if (pp->p_clktim && --pp->p_clktim == 0) 3402872Swnj if (pp->p_flag & STIMO) { 3412872Swnj s = spl6(); 3422872Swnj switch (pp->p_stat) { 343204Sbill 3442872Swnj case SSLEEP: 3452872Swnj setrun(pp); 3462872Swnj break; 347204Sbill 3482872Swnj case SSTOP: 3492872Swnj unsleep(pp); 3502872Swnj break; 3512872Swnj } 3522872Swnj pp->p_flag &= ~STIMO; 3532872Swnj splx(s); 3542872Swnj } else 3552872Swnj psignal(pp, SIGALRM); 3562872Swnj /* 3572872Swnj * If process is blocked, increment computed 3582872Swnj * time blocked. This is used in swap scheduling. 3592872Swnj */ 3602872Swnj if (pp->p_stat==SSLEEP || pp->p_stat==SSTOP) 3619Sbill if (pp->p_slptime != 127) 3629Sbill pp->p_slptime++; 3632872Swnj /* 3642872Swnj * Update digital filter estimation of process 3652872Swnj * cpu utilization for loaded processes. 3662872Swnj */ 3671399Sbill if (pp->p_flag&SLOAD) 3681399Sbill pp->p_pctcpu = ccpu * pp->p_pctcpu + 3692768Swnj (1.0 - ccpu) * (pp->p_cpticks/(float)hz); 3702872Swnj /* 3712872Swnj * Recompute process priority. The number p_cpu 3722872Swnj * is a weighted estimate of cpu time consumed. 3732872Swnj * A process which consumes cpu time has this 3742872Swnj * increase regularly. We here decrease it by 3753876Swnj * a fraction based on load average giving a digital 3763876Swnj * decay filter which damps out in about 5 seconds 3773876Swnj * when seconds are measured in time expanded by the 3783876Swnj * load average. 3792872Swnj * 3802872Swnj * If a process is niced, then the nice directly 3812872Swnj * affects the new priority. The final priority 3822872Swnj * is in the range 0 to 255, to fit in a character. 3832872Swnj */ 3841399Sbill pp->p_cpticks = 0; 3853876Swnj a = ave((pp->p_cpu & 0377), avenrun[0]*nrscale) + 3863876Swnj pp->p_nice - NZERO; 3872872Swnj if (a < 0) 3889Sbill a = 0; 3892872Swnj if (a > 255) 3909Sbill a = 255; 3919Sbill pp->p_cpu = a; 392125Sbill (void) setpri(pp); 3932872Swnj /* 3942872Swnj * Now have computed new process priority 3952872Swnj * in p->p_usrpri. Carefully change p->p_pri. 3962872Swnj * A process is on a run queue associated with 3972872Swnj * this priority, so we must block out process 3982872Swnj * state changes during the transition. 3992872Swnj */ 4009Sbill s = spl6(); 4012872Swnj if (pp->p_pri >= PUSER) { 4029Sbill if ((pp != u.u_procp || noproc) && 4039Sbill pp->p_stat == SRUN && 4049Sbill (pp->p_flag & SLOAD) && 4059Sbill pp->p_pri != pp->p_usrpri) { 4069Sbill remrq(pp); 4079Sbill pp->p_pri = pp->p_usrpri; 4089Sbill setrq(pp); 4099Sbill } else 4109Sbill pp->p_pri = pp->p_usrpri; 4119Sbill } 4129Sbill splx(s); 4139Sbill } 4142872Swnj 4152872Swnj /* 4162872Swnj * Perform virtual memory metering. 4172872Swnj */ 4189Sbill vmmeter(); 4192872Swnj 4202872Swnj /* 4212872Swnj * If the swap process is trying to bring 4222872Swnj * a process in, have it look again to see 4232872Swnj * if it is possible now. 4242872Swnj */ 4252872Swnj if (runin!=0) { 4269Sbill runin = 0; 4279Sbill wakeup((caddr_t)&runin); 4289Sbill } 4292872Swnj 4309Sbill /* 4319Sbill * If there are pages that have been cleaned, 4329Sbill * jolt the pageout daemon to process them. 4339Sbill * We do this here so that these pages will be 4349Sbill * freed if there is an abundance of memory and the 4359Sbill * daemon would not be awakened otherwise. 4369Sbill */ 4379Sbill if (bclnlist != NULL) 4389Sbill wakeup((caddr_t)&proc[2]); 4392872Swnj 4402872Swnj /* 4412872Swnj * If the trap occurred from usermode, 4422872Swnj * then check to see if it has now been 4432872Swnj * running more than 10 minutes of user time 4442872Swnj * and should thus run with reduced priority 4452872Swnj * to give other processes a chance. 4462872Swnj */ 4479Sbill if (USERMODE(ps)) { 4489Sbill pp = u.u_procp; 4492872Swnj if (pp->p_uid && pp->p_nice == NZERO && 4502872Swnj u.u_vm.vm_utime > 600 * hz) 4512872Swnj pp->p_nice = NZERO+4; 452125Sbill (void) setpri(pp); 4539Sbill pp->p_pri = pp->p_usrpri; 4549Sbill } 4559Sbill } 4562872Swnj /* 4572872Swnj * If trapped user-mode, give it a profiling tick. 4582872Swnj */ 4592442Swnj if (USERMODE(ps) && u.u_prof.pr_scale) { 4602442Swnj u.u_procp->p_flag |= SOWEUPC; 4612442Swnj aston(); 4629Sbill } 4639Sbill } 4649Sbill 4659Sbill /* 4663110Swnj * Timeout is called to arrange that 4672768Swnj * fun(arg) is called in tim/hz seconds. 4683542Swnj * An entry is linked into the callout 4693110Swnj * structure. The time in each structure 4702768Swnj * entry is the number of hz's more 4719Sbill * than the previous entry. 4729Sbill * In this way, decrementing the 4739Sbill * first entry has the effect of 4749Sbill * updating all entries. 4759Sbill * 4769Sbill * The panic is there because there is nothing 4779Sbill * intelligent to be done if an entry won't fit. 4789Sbill */ 4799Sbill timeout(fun, arg, tim) 4802450Swnj int (*fun)(); 4812450Swnj caddr_t arg; 4829Sbill { 4833542Swnj register struct callout *p1, *p2, *pnew; 4849Sbill register int t; 4859Sbill int s; 4869Sbill 4873446Sroot /* DEBUGGING CODE */ 4883446Sroot int ttrstrt(); 4893446Sroot 4903446Sroot if (fun == ttrstrt && arg == 0) 4913446Sroot panic("timeout ttrstr arg"); 4923446Sroot /* END DEBUGGING CODE */ 4939Sbill t = tim; 4949Sbill s = spl7(); 4953542Swnj pnew = callfree; 4963542Swnj if (pnew == NULL) 4973542Swnj panic("timeout table overflow"); 4983542Swnj callfree = pnew->c_next; 4993542Swnj pnew->c_arg = arg; 5003542Swnj pnew->c_func = fun; 5013542Swnj for (p1 = &calltodo; (p2 = p1->c_next) && p2->c_time < t; p1 = p2) 5023542Swnj t -= p2->c_time; 5033542Swnj p1->c_next = pnew; 5043542Swnj pnew->c_next = p2; 5053542Swnj pnew->c_time = t; 5063542Swnj if (p2) 5073542Swnj p2->c_time -= t; 5089Sbill splx(s); 5099Sbill } 510*7305Ssam 511*7305Ssam /* 512*7305Ssam * untimeout is called to remove a function timeout call 513*7305Ssam * from the callout structure. 514*7305Ssam */ 515*7305Ssam untimeout (fun, arg) 516*7305Ssam int (*fun)(); 517*7305Ssam caddr_t arg; 518*7305Ssam { 519*7305Ssam 520*7305Ssam register struct callout *p1, *p2; 521*7305Ssam register int s; 522*7305Ssam 523*7305Ssam s = spl7(); 524*7305Ssam for (p1 = &calltodo; (p2 = p1->c_next) != 0; p1 = p2) { 525*7305Ssam if (p2->c_func == fun && p2->c_arg == arg) { 526*7305Ssam if (p2->c_next) 527*7305Ssam p2->c_next->c_time += p2->c_time; 528*7305Ssam p1->c_next = p2->c_next; 529*7305Ssam p2->c_next = callfree; 530*7305Ssam callfree = p2; 531*7305Ssam break; 532*7305Ssam } 533*7305Ssam } 534*7305Ssam splx(s); 535*7305Ssam } 536