1 /* kern_clock.c 4.27 81/11/20 */ 2 3 #include "../h/param.h" 4 #include "../h/systm.h" 5 #include "../h/dk.h" 6 #include "../h/callout.h" 7 #include "../h/seg.h" 8 #include "../h/dir.h" 9 #include "../h/user.h" 10 #include "../h/proc.h" 11 #include "../h/reg.h" 12 #include "../h/psl.h" 13 #include "../h/vm.h" 14 #include "../h/buf.h" 15 #include "../h/text.h" 16 #include "../h/vlimit.h" 17 #include "../h/mtpr.h" 18 #include "../h/clock.h" 19 #include "../h/cpu.h" 20 21 #include "bk.h" 22 #include "dh.h" 23 #include "dz.h" 24 25 /* 26 * Hardclock is called straight from 27 * the real time clock interrupt. 28 * We limit the work we do at real clock interrupt time to: 29 * reloading clock 30 * decrementing time to callouts 31 * recording cpu time usage 32 * modifying priority of current process 33 * arrange for soft clock interrupt 34 * kernel pc profiling 35 * 36 * At software (softclock) interrupt time we: 37 * implement callouts 38 * maintain date 39 * lightning bolt wakeup (every second) 40 * alarm clock signals 41 * jab the scheduler 42 * 43 * On the vax softclock interrupts are implemented by 44 * software interrupts. Note that we may have multiple softclock 45 * interrupts compressed into one (due to excessive interrupt load), 46 * but that hardclock interrupts should never be lost. 47 */ 48 #ifdef KPROF 49 int kcounts[20000]; 50 #endif 51 52 /*ARGSUSED*/ 53 hardclock(pc, ps) 54 caddr_t pc; 55 { 56 register struct callout *p1; 57 register struct proc *pp; 58 register int s, cpstate; 59 60 /* 61 * reprime clock 62 */ 63 clkreld(); 64 65 /* 66 * update callout times 67 */ 68 for (p1 = calltodo.c_next; p1 && p1->c_time <= 0; p1 = p1->c_next) 69 ; 70 if (p1) 71 p1->c_time--; 72 73 /* 74 * Maintain iostat and per-process cpu statistics 75 */ 76 if (!noproc) { 77 s = u.u_procp->p_rssize; 78 u.u_vm.vm_idsrss += s; 79 if (u.u_procp->p_textp) { 80 register int xrss = u.u_procp->p_textp->x_rssize; 81 82 s += xrss; 83 u.u_vm.vm_ixrss += xrss; 84 } 85 if (s > u.u_vm.vm_maxrss) 86 u.u_vm.vm_maxrss = s; 87 if ((u.u_vm.vm_utime+u.u_vm.vm_stime+1)/hz > u.u_limit[LIM_CPU]) { 88 psignal(u.u_procp, SIGXCPU); 89 if (u.u_limit[LIM_CPU] < INFINITY - 5) 90 u.u_limit[LIM_CPU] += 5; 91 } 92 } 93 /* 94 * Update iostat information. 95 */ 96 if (USERMODE(ps)) { 97 u.u_vm.vm_utime++; 98 if(u.u_procp->p_nice > NZERO) 99 cpstate = CP_NICE; 100 else 101 cpstate = CP_USER; 102 } else { 103 #ifdef KPROF 104 int k = ((int)pc & 0x7fffffff) / 8; 105 if (k < 20000) 106 kcounts[k]++; 107 #endif 108 cpstate = CP_SYS; 109 if (noproc) 110 cpstate = CP_IDLE; 111 else 112 u.u_vm.vm_stime++; 113 } 114 cp_time[cpstate]++; 115 for (s = 0; s < DK_NDRIVE; s++) 116 if (dk_busy&(1<<s)) 117 dk_time[s]++; 118 /* 119 * Adjust priority of current process. 120 */ 121 if (!noproc) { 122 pp = u.u_procp; 123 pp->p_cpticks++; 124 if(++pp->p_cpu == 0) 125 pp->p_cpu--; 126 if(pp->p_cpu % 4 == 0) { 127 (void) setpri(pp); 128 if (pp->p_pri >= PUSER) 129 pp->p_pri = pp->p_usrpri; 130 } 131 } 132 /* 133 * Time moves on. 134 */ 135 ++lbolt; 136 #if VAX780 137 /* 138 * On 780's, impelement a fast UBA watcher, 139 * to make sure uba's don't get stuck. 140 */ 141 if (cpu == VAX_780 && panicstr == 0 && !BASEPRI(ps)) 142 unhang(); 143 #endif 144 /* 145 * Schedule a software interrupt for the rest 146 * of clock activities. 147 */ 148 setsoftclock(); 149 } 150 151 /* 152 * The digital decay cpu usage priority assignment is scaled to run in 153 * time as expanded by the 1 minute load average. Each second we 154 * multiply the the previous cpu usage estimate by 155 * nrscale*avenrun[0] 156 * The following relates the load average to the period over which 157 * cpu usage is 90% forgotten: 158 * loadav 1 5 seconds 159 * loadav 5 24 seconds 160 * loadav 10 47 seconds 161 * loadav 20 93 seconds 162 * This is a great improvement on the previous algorithm which 163 * decayed the priorities by a constant, and decayed away all knowledge 164 * of previous activity in about 20 seconds. Under heavy load, 165 * the previous algorithm degenerated to round-robin with poor response 166 * time when there was a high load average. 167 */ 168 #undef ave 169 #define ave(a,b) ((int)(((int)(a*b))/(b+1))) 170 int nrscale = 2; 171 double avenrun[]; 172 173 /* 174 * Constant for decay filter for cpu usage field 175 * in process table (used by ps au). 176 */ 177 double ccpu = 0.95122942450071400909; /* exp(-1/20) */ 178 179 /* 180 * Software clock interrupt. 181 * This routine runs at lower priority than device interrupts. 182 */ 183 /*ARGSUSED*/ 184 softclock(pc, ps) 185 caddr_t pc; 186 { 187 register struct callout *p1; 188 register struct proc *pp; 189 register int a, s; 190 caddr_t arg; 191 int (*func)(); 192 193 /* 194 * Perform callouts (but not after panic's!) 195 */ 196 if (panicstr == 0) { 197 for (;;) { 198 s = spl7(); 199 if ((p1 = calltodo.c_next) == 0 || p1->c_time > 0) { 200 splx(s); 201 break; 202 } 203 calltodo.c_next = p1->c_next; 204 arg = p1->c_arg; 205 func = p1->c_func; 206 p1->c_next = callfree; 207 callfree = p1; 208 (void) splx(s); 209 (*func)(arg); 210 } 211 } 212 213 /* 214 * Drain silos. 215 */ 216 #if NDH > 0 217 s = spl5(); dhtimer(); splx(s); 218 #endif 219 #if NDZ > 0 220 s = spl5(); dztimer(); splx(s); 221 #endif 222 223 /* 224 * If idling and processes are waiting to swap in, 225 * check on them. 226 */ 227 if (noproc && runin) { 228 runin = 0; 229 wakeup((caddr_t)&runin); 230 } 231 232 /* 233 * Run paging daemon every 1/4 sec. 234 */ 235 if (lbolt % (hz/4) == 0) { 236 vmpago(); 237 } 238 239 /* 240 * Reschedule every 1/10 sec. 241 */ 242 if (lbolt % (hz/10) == 0) { 243 runrun++; 244 aston(); 245 } 246 247 /* 248 * Lightning bolt every second: 249 * sleep timeouts 250 * process priority recomputation 251 * process %cpu averaging 252 * virtual memory metering 253 * kick swapper if processes want in 254 */ 255 if (lbolt >= hz) { 256 /* 257 * This doesn't mean much on VAX since we run at 258 * software interrupt time... if hardclock() 259 * calls softclock() directly, it prevents 260 * this code from running when the priority 261 * was raised when the clock interrupt occurred. 262 */ 263 if (BASEPRI(ps)) 264 return; 265 266 /* 267 * If we didn't run a few times because of 268 * long blockage at high ipl, we don't 269 * really want to run this code several times, 270 * so squish out all multiples of hz here. 271 */ 272 time += lbolt / hz; 273 lbolt %= hz; 274 275 /* 276 * Wakeup lightning bolt sleepers. 277 * Processes sleep on lbolt to wait 278 * for short amounts of time (e.g. 1 second). 279 */ 280 wakeup((caddr_t)&lbolt); 281 282 /* 283 * Recompute process priority and process 284 * sleep() system calls as well as internal 285 * sleeps with timeouts (tsleep() kernel routine). 286 */ 287 for (pp = proc; pp < procNPROC; pp++) 288 if (pp->p_stat && pp->p_stat!=SZOMB) { 289 /* 290 * Increase resident time, to max of 127 seconds 291 * (it is kept in a character.) For 292 * loaded processes this is time in core; for 293 * swapped processes, this is time on drum. 294 */ 295 if (pp->p_time != 127) 296 pp->p_time++; 297 /* 298 * If process has clock counting down, and it 299 * expires, set it running (if this is a tsleep()), 300 * or give it an SIGALRM (if the user process 301 * is using alarm signals. 302 */ 303 if (pp->p_clktim && --pp->p_clktim == 0) 304 if (pp->p_flag & STIMO) { 305 s = spl6(); 306 switch (pp->p_stat) { 307 308 case SSLEEP: 309 setrun(pp); 310 break; 311 312 case SSTOP: 313 unsleep(pp); 314 break; 315 } 316 pp->p_flag &= ~STIMO; 317 splx(s); 318 } else 319 psignal(pp, SIGALRM); 320 /* 321 * If process is blocked, increment computed 322 * time blocked. This is used in swap scheduling. 323 */ 324 if (pp->p_stat==SSLEEP || pp->p_stat==SSTOP) 325 if (pp->p_slptime != 127) 326 pp->p_slptime++; 327 /* 328 * Update digital filter estimation of process 329 * cpu utilization for loaded processes. 330 */ 331 if (pp->p_flag&SLOAD) 332 pp->p_pctcpu = ccpu * pp->p_pctcpu + 333 (1.0 - ccpu) * (pp->p_cpticks/(float)hz); 334 /* 335 * Recompute process priority. The number p_cpu 336 * is a weighted estimate of cpu time consumed. 337 * A process which consumes cpu time has this 338 * increase regularly. We here decrease it by 339 * a fraction based on load average giving a digital 340 * decay filter which damps out in about 5 seconds 341 * when seconds are measured in time expanded by the 342 * load average. 343 * 344 * If a process is niced, then the nice directly 345 * affects the new priority. The final priority 346 * is in the range 0 to 255, to fit in a character. 347 */ 348 pp->p_cpticks = 0; 349 a = ave((pp->p_cpu & 0377), avenrun[0]*nrscale) + 350 pp->p_nice - NZERO; 351 if (a < 0) 352 a = 0; 353 if (a > 255) 354 a = 255; 355 pp->p_cpu = a; 356 (void) setpri(pp); 357 /* 358 * Now have computed new process priority 359 * in p->p_usrpri. Carefully change p->p_pri. 360 * A process is on a run queue associated with 361 * this priority, so we must block out process 362 * state changes during the transition. 363 */ 364 s = spl6(); 365 if (pp->p_pri >= PUSER) { 366 if ((pp != u.u_procp || noproc) && 367 pp->p_stat == SRUN && 368 (pp->p_flag & SLOAD) && 369 pp->p_pri != pp->p_usrpri) { 370 remrq(pp); 371 pp->p_pri = pp->p_usrpri; 372 setrq(pp); 373 } else 374 pp->p_pri = pp->p_usrpri; 375 } 376 splx(s); 377 } 378 379 /* 380 * Perform virtual memory metering. 381 */ 382 vmmeter(); 383 384 /* 385 * If the swap process is trying to bring 386 * a process in, have it look again to see 387 * if it is possible now. 388 */ 389 if (runin!=0) { 390 runin = 0; 391 wakeup((caddr_t)&runin); 392 } 393 394 /* 395 * If there are pages that have been cleaned, 396 * jolt the pageout daemon to process them. 397 * We do this here so that these pages will be 398 * freed if there is an abundance of memory and the 399 * daemon would not be awakened otherwise. 400 */ 401 if (bclnlist != NULL) 402 wakeup((caddr_t)&proc[2]); 403 404 /* 405 * If the trap occurred from usermode, 406 * then check to see if it has now been 407 * running more than 10 minutes of user time 408 * and should thus run with reduced priority 409 * to give other processes a chance. 410 */ 411 if (USERMODE(ps)) { 412 pp = u.u_procp; 413 if (pp->p_uid && pp->p_nice == NZERO && 414 u.u_vm.vm_utime > 600 * hz) 415 pp->p_nice = NZERO+4; 416 (void) setpri(pp); 417 pp->p_pri = pp->p_usrpri; 418 } 419 } 420 /* 421 * If trapped user-mode, give it a profiling tick. 422 */ 423 if (USERMODE(ps) && u.u_prof.pr_scale) { 424 u.u_procp->p_flag |= SOWEUPC; 425 aston(); 426 } 427 } 428 429 /* 430 * Timeout is called to arrange that 431 * fun(arg) is called in tim/hz seconds. 432 * An entry is linked into the callout 433 * structure. The time in each structure 434 * entry is the number of hz's more 435 * than the previous entry. 436 * In this way, decrementing the 437 * first entry has the effect of 438 * updating all entries. 439 * 440 * The panic is there because there is nothing 441 * intelligent to be done if an entry won't fit. 442 */ 443 timeout(fun, arg, tim) 444 int (*fun)(); 445 caddr_t arg; 446 { 447 register struct callout *p1, *p2, *pnew; 448 register int t; 449 int s; 450 451 /* DEBUGGING CODE */ 452 int ttrstrt(); 453 454 if (fun == ttrstrt && arg == 0) 455 panic("timeout ttrstr arg"); 456 /* END DEBUGGING CODE */ 457 t = tim; 458 s = spl7(); 459 pnew = callfree; 460 if (pnew == NULL) 461 panic("timeout table overflow"); 462 callfree = pnew->c_next; 463 pnew->c_arg = arg; 464 pnew->c_func = fun; 465 for (p1 = &calltodo; (p2 = p1->c_next) && p2->c_time < t; p1 = p2) 466 t -= p2->c_time; 467 p1->c_next = pnew; 468 pnew->c_next = p2; 469 pnew->c_time = t; 470 if (p2) 471 p2->c_time -= t; 472 splx(s); 473 } 474