1 /* kern_clock.c 4.55 83/05/30 */ 2 3 #include "../machine/reg.h" 4 #include "../machine/psl.h" 5 6 #include "../h/param.h" 7 #include "../h/systm.h" 8 #include "../h/dk.h" 9 #include "../h/callout.h" 10 #include "../h/dir.h" 11 #include "../h/user.h" 12 #include "../h/kernel.h" 13 #include "../h/proc.h" 14 #include "../h/vm.h" 15 #include "../h/text.h" 16 17 #ifdef vax 18 #include "../vax/mtpr.h" 19 #endif 20 21 #ifdef GPROF 22 #include "../h/gprof.h" 23 #endif 24 25 /* 26 * Clock handling routines. 27 * 28 * This code is written to operate with two timers which run 29 * independently of each other. The main clock, running at hz 30 * times per second, is used to do scheduling and timeout calculations. 31 * The second timer does resource utilization estimation statistically 32 * based on the state of the machine phz times a second. Both functions 33 * can be performed by a single clock (ie hz == phz), however the 34 * statistics will be much more prone to errors. Ideally a machine 35 * would have separate clocks measuring time spent in user state, system 36 * state, interrupt state, and idle state. These clocks would allow a non- 37 * approximate measure of resource utilization. 38 */ 39 40 /* 41 * TODO: 42 * time of day, system/user timing, timeouts, profiling on separate timers 43 * allocate more timeout table slots when table overflows. 44 */ 45 46 /* 47 * The hz hardware interval timer. 48 * We update the events relating to real time. 49 * If this timer is also being used to gather statistics, 50 * we run through the statistics gathering routine as well. 51 */ 52 /*ARGSUSED*/ 53 #ifdef vax 54 hardclock(pc, ps) 55 caddr_t pc; 56 int ps; 57 { 58 #endif 59 #ifdef sun 60 hardclock(regs) 61 struct regs regs; 62 { 63 #define ps regs.r_sr 64 #define pc (caddr_t)regs.r_pc 65 #endif 66 register struct callout *p1; 67 register struct proc *p; 68 register int s, cpstate; 69 int needsoft = 0; 70 71 /* 72 * Update real-time timeout queue. 73 * At front of queue are some number of events which are ``due''. 74 * The time to these is <= 0 and if negative represents the 75 * number of ticks which have passed since it was supposed to happen. 76 * The rest of the q elements (times > 0) are events yet to happen, 77 * where the time for each is given as a delta from the previous. 78 * Decrementing just the first of these serves to decrement the time 79 * to all events. 80 */ 81 p1 = calltodo.c_next; 82 while (p1) { 83 if (--p1->c_time > 0) 84 break; 85 needsoft = 1; 86 if (p1->c_time == 0) 87 break; 88 p1 = p1->c_next; 89 } 90 91 /* 92 * Charge the time out based on the mode the cpu is in. 93 * Here again we fudge for the lack of proper interval timers 94 * assuming that the current state has been around at least 95 * one tick. 96 */ 97 if (USERMODE(ps)) { 98 #ifdef sun 99 u.u_ar0 = ®s.r_r0; /* aston needs ar0 */ 100 #endif 101 if (u.u_prof.pr_scale) 102 needsoft = 1; 103 /* 104 * CPU was in user state. Increment 105 * user time counter, and process process-virtual time 106 * interval timer. 107 */ 108 bumptime(&u.u_ru.ru_utime, tick); 109 if (timerisset(&u.u_timer[ITIMER_VIRTUAL].it_value) && 110 itimerdecr(&u.u_timer[ITIMER_VIRTUAL], tick) == 0) 111 psignal(u.u_procp, SIGVTALRM); 112 if (u.u_procp->p_nice > NZERO) 113 cpstate = CP_NICE; 114 else 115 cpstate = CP_USER; 116 } else { 117 /* 118 * CPU was in system state. If profiling kernel 119 * increment a counter. If no process is running 120 * then this is a system tick if we were running 121 * at a non-zero IPL (in a driver). If a process is running, 122 * then we charge it with system time even if we were 123 * at a non-zero IPL, since the system often runs 124 * this way during processing of system calls. 125 * This is approximate, but the lack of true interval 126 * timers makes doing anything else difficult. 127 */ 128 cpstate = CP_SYS; 129 if (noproc) { 130 if (BASEPRI(ps)) 131 cpstate = CP_IDLE; 132 } else { 133 bumptime(&u.u_ru.ru_stime, tick); 134 } 135 } 136 137 /* 138 * If the cpu is currently scheduled to a process, then 139 * charge it with resource utilization for a tick, updating 140 * statistics which run in (user+system) virtual time, 141 * such as the cpu time limit and profiling timers. 142 * This assumes that the current process has been running 143 * the entire last tick. 144 */ 145 if (noproc == 0 && cpstate != CP_IDLE) { 146 if ((u.u_ru.ru_utime.tv_sec+u.u_ru.ru_stime.tv_sec+1) > 147 u.u_rlimit[RLIMIT_CPU].rlim_cur) { 148 psignal(u.u_procp, SIGXCPU); 149 if (u.u_rlimit[RLIMIT_CPU].rlim_cur < 150 u.u_rlimit[RLIMIT_CPU].rlim_max) 151 u.u_rlimit[RLIMIT_CPU].rlim_cur += 5; 152 } 153 if (timerisset(&u.u_timer[ITIMER_PROF].it_value) && 154 itimerdecr(&u.u_timer[ITIMER_PROF], tick) == 0) 155 psignal(u.u_procp, SIGPROF); 156 s = u.u_procp->p_rssize; 157 u.u_ru.ru_idrss += s; u.u_ru.ru_isrss += 0; /* XXX */ 158 if (u.u_procp->p_textp) { 159 register int xrss = u.u_procp->p_textp->x_rssize; 160 161 s += xrss; 162 u.u_ru.ru_ixrss += xrss; 163 } 164 if (s > u.u_ru.ru_maxrss) 165 u.u_ru.ru_maxrss = s; 166 } 167 168 /* 169 * We adjust the priority of the current process. 170 * The priority of a process gets worse as it accumulates 171 * CPU time. The cpu usage estimator (p_cpu) is increased here 172 * and the formula for computing priorities (in kern_synch.c) 173 * will compute a different value each time the p_cpu increases 174 * by 4. The cpu usage estimator ramps up quite quickly when 175 * the process is running (linearly), and decays away exponentially, 176 * at a rate which is proportionally slower when the system is 177 * busy. The basic principal is that the system will 90% forget 178 * that a process used a lot of CPU time in 5*loadav seconds. 179 * This causes the system to favor processes which haven't run 180 * much recently, and to round-robin among other processes. 181 */ 182 if (!noproc) { 183 p = u.u_procp; 184 p->p_cpticks++; 185 if (++p->p_cpu == 0) 186 p->p_cpu--; 187 if ((p->p_cpu&3) == 0) { 188 (void) setpri(p); 189 if (p->p_pri >= PUSER) 190 p->p_pri = p->p_usrpri; 191 } 192 } 193 194 /* 195 * If the alternate clock has not made itself known then 196 * we must gather the statistics. 197 */ 198 if (phz == 0) 199 gatherstats(pc, ps); 200 201 /* 202 * Increment the time-of-day, and schedule 203 * processing of the callouts at a very low cpu priority, 204 * so we don't keep the relatively high clock interrupt 205 * priority any longer than necessary. 206 */ 207 bumptime(&time, tick); 208 if (needsoft) 209 setsoftclock(); 210 } 211 #ifdef sun 212 #undef pc 213 #undef ps 214 #endif 215 216 /* 217 * Gather statistics on resource utilization. 218 * 219 * We make a gross assumption: that the system has been in the 220 * state it is in (user state, kernel state, interrupt state, 221 * or idle state) for the entire last time interval, and 222 * update statistics accordingly. 223 */ 224 /*ARGSUSED*/ 225 gatherstats(pc, ps) 226 caddr_t pc; 227 int ps; 228 { 229 int cpstate, s; 230 231 /* 232 * Determine what state the cpu is in. 233 */ 234 if (USERMODE(ps)) { 235 /* 236 * CPU was in user state. 237 */ 238 if (u.u_procp->p_nice > NZERO) 239 cpstate = CP_NICE; 240 else 241 cpstate = CP_USER; 242 } else { 243 /* 244 * CPU was in system state. If profiling kernel 245 * increment a counter. 246 */ 247 cpstate = CP_SYS; 248 if (noproc && BASEPRI(ps)) 249 cpstate = CP_IDLE; 250 #ifdef GPROF 251 s = pc - s_lowpc; 252 if (profiling < 2 && s < s_textsize) 253 kcount[s / (HISTFRACTION * sizeof (*kcount))]++; 254 #endif 255 } 256 /* 257 * We maintain statistics shown by user-level statistics 258 * programs: the amount of time in each cpu state, and 259 * the amount of time each of DK_NDRIVE ``drives'' is busy. 260 */ 261 cp_time[cpstate]++; 262 for (s = 0; s < DK_NDRIVE; s++) 263 if (dk_busy&(1<<s)) 264 dk_time[s]++; 265 } 266 267 /* 268 * Software priority level clock interrupt. 269 * Run periodic events from timeout queue. 270 */ 271 /*ARGSUSED*/ 272 #ifdef vax 273 softclock(pc, ps) 274 caddr_t pc; 275 int ps; 276 { 277 #endif 278 #ifdef sun 279 softclock() 280 { 281 #define pc (caddr_t)u.u_ar0[PC] 282 #define ps u.u_ar0[PS] 283 #endif 284 285 for (;;) { 286 register struct callout *p1; 287 register caddr_t arg; 288 register int (*func)(); 289 register int a, s; 290 291 s = spl7(); 292 if ((p1 = calltodo.c_next) == 0 || p1->c_time > 0) { 293 splx(s); 294 break; 295 } 296 arg = p1->c_arg; func = p1->c_func; a = p1->c_time; 297 calltodo.c_next = p1->c_next; 298 p1->c_next = callfree; 299 callfree = p1; 300 splx(s); 301 (*func)(arg, a); 302 } 303 /* 304 * If trapped user-mode, give it a profiling tick. 305 */ 306 if (USERMODE(ps) && u.u_prof.pr_scale) { 307 u.u_procp->p_flag |= SOWEUPC; 308 aston(); 309 } 310 } 311 312 /* 313 * Bump a timeval by a small number of usec's. 314 */ 315 bumptime(tp, usec) 316 register struct timeval *tp; 317 int usec; 318 { 319 320 tp->tv_usec += usec; 321 if (tp->tv_usec >= 1000000) { 322 tp->tv_usec -= 1000000; 323 tp->tv_sec++; 324 } 325 } 326 327 /* 328 * Arrange that (*fun)(arg) is called in t/hz seconds. 329 */ 330 timeout(fun, arg, t) 331 int (*fun)(); 332 caddr_t arg; 333 register int t; 334 { 335 register struct callout *p1, *p2, *pnew; 336 register int s = spl7(); 337 338 if (t == 0) 339 t = 1; 340 pnew = callfree; 341 if (pnew == NULL) 342 panic("timeout table overflow"); 343 callfree = pnew->c_next; 344 pnew->c_arg = arg; 345 pnew->c_func = fun; 346 for (p1 = &calltodo; (p2 = p1->c_next) && p2->c_time < t; p1 = p2) 347 if (p2->c_time > 0) 348 t -= p2->c_time; 349 p1->c_next = pnew; 350 pnew->c_next = p2; 351 pnew->c_time = t; 352 if (p2) 353 p2->c_time -= t; 354 splx(s); 355 } 356 357 /* 358 * untimeout is called to remove a function timeout call 359 * from the callout structure. 360 */ 361 untimeout(fun, arg) 362 int (*fun)(); 363 caddr_t arg; 364 { 365 register struct callout *p1, *p2; 366 register int s; 367 368 s = spl7(); 369 for (p1 = &calltodo; (p2 = p1->c_next) != 0; p1 = p2) { 370 if (p2->c_func == fun && p2->c_arg == arg) { 371 if (p2->c_next && p2->c_time > 0) 372 p2->c_next->c_time += p2->c_time; 373 p1->c_next = p2->c_next; 374 p2->c_next = callfree; 375 callfree = p2; 376 break; 377 } 378 } 379 splx(s); 380 } 381 382 /* 383 * Compute number of hz until specified time. 384 * Used to compute third argument to timeout() from an 385 * absolute time. 386 */ 387 hzto(tv) 388 struct timeval *tv; 389 { 390 register long ticks; 391 register long sec; 392 int s = spl7(); 393 394 /* 395 * If number of milliseconds will fit in 32 bit arithmetic, 396 * then compute number of milliseconds to time and scale to 397 * ticks. Otherwise just compute number of hz in time, rounding 398 * times greater than representible to maximum value. 399 * 400 * Delta times less than 25 days can be computed ``exactly''. 401 * Maximum value for any timeout in 10ms ticks is 250 days. 402 */ 403 sec = tv->tv_sec - time.tv_sec; 404 if (sec <= 0x7fffffff / 1000 - 1000) 405 ticks = ((tv->tv_sec - time.tv_sec) * 1000 + 406 (tv->tv_usec - time.tv_usec) / 1000) / (tick / 1000); 407 else if (sec <= 0x7fffffff / hz) 408 ticks = sec * hz; 409 else 410 ticks = 0x7fffffff; 411 splx(s); 412 return (ticks); 413 } 414 415 profil() 416 { 417 register struct a { 418 short *bufbase; 419 unsigned bufsize; 420 unsigned pcoffset; 421 unsigned pcscale; 422 } *uap = (struct a *)u.u_ap; 423 register struct uprof *upp = &u.u_prof; 424 425 upp->pr_base = uap->bufbase; 426 upp->pr_size = uap->bufsize; 427 upp->pr_off = uap->pcoffset; 428 upp->pr_scale = uap->pcscale; 429 } 430 431 opause() 432 { 433 434 for (;;) 435 sleep((caddr_t)&u, PSLEP); 436 } 437