1 /* 2 * Copyright (c) 1982, 1986 Regents of the University of California. 3 * All rights reserved. The Berkeley software License Agreement 4 * specifies the terms and conditions for redistribution. 5 * 6 * @(#)kern_clock.c 7.5 (Berkeley) 05/05/89 7 */ 8 9 #include "param.h" 10 #include "systm.h" 11 #include "dkstat.h" 12 #include "callout.h" 13 #include "user.h" 14 #include "kernel.h" 15 #include "proc.h" 16 #include "vm.h" 17 #include "text.h" 18 19 #include "machine/reg.h" 20 #include "machine/psl.h" 21 22 #if defined(vax) || defined(tahoe) 23 #include "machine/mtpr.h" 24 #include "machine/clock.h" 25 #endif 26 27 #ifdef GPROF 28 #include "gprof.h" 29 #endif 30 31 /* 32 * Clock handling routines. 33 * 34 * This code is written to operate with two timers which run 35 * independently of each other. The main clock, running at hz 36 * times per second, is used to do scheduling and timeout calculations. 37 * The second timer does resource utilization estimation statistically 38 * based on the state of the machine phz times a second. Both functions 39 * can be performed by a single clock (ie hz == phz), however the 40 * statistics will be much more prone to errors. Ideally a machine 41 * would have separate clocks measuring time spent in user state, system 42 * state, interrupt state, and idle state. These clocks would allow a non- 43 * approximate measure of resource utilization. 44 */ 45 46 /* 47 * TODO: 48 * time of day, system/user timing, timeouts, profiling on separate timers 49 * allocate more timeout table slots when table overflows. 50 */ 51 52 /* 53 * Bump a timeval by a small number of usec's. 54 */ 55 #define BUMPTIME(t, usec) { \ 56 register struct timeval *tp = (t); \ 57 \ 58 tp->tv_usec += (usec); \ 59 if (tp->tv_usec >= 1000000) { \ 60 tp->tv_usec -= 1000000; \ 61 tp->tv_sec++; \ 62 } \ 63 } 64 65 /* 66 * The hz hardware interval timer. 67 * We update the events relating to real time. 68 * If this timer is also being used to gather statistics, 69 * we run through the statistics gathering routine as well. 70 */ 71 /*ARGSUSED*/ 72 hardclock(pc, ps) 73 caddr_t pc; 74 int ps; 75 { 76 register struct callout *p1; 77 register struct proc *p; 78 register int s; 79 int needsoft = 0; 80 extern int tickdelta; 81 extern long timedelta; 82 83 /* 84 * Update real-time timeout queue. 85 * At front of queue are some number of events which are ``due''. 86 * The time to these is <= 0 and if negative represents the 87 * number of ticks which have passed since it was supposed to happen. 88 * The rest of the q elements (times > 0) are events yet to happen, 89 * where the time for each is given as a delta from the previous. 90 * Decrementing just the first of these serves to decrement the time 91 * to all events. 92 */ 93 p1 = calltodo.c_next; 94 while (p1) { 95 if (--p1->c_time > 0) 96 break; 97 needsoft = 1; 98 if (p1->c_time == 0) 99 break; 100 p1 = p1->c_next; 101 } 102 103 /* 104 * Charge the time out based on the mode the cpu is in. 105 * Here again we fudge for the lack of proper interval timers 106 * assuming that the current state has been around at least 107 * one tick. 108 */ 109 if (USERMODE(ps)) { 110 if (u.u_prof.pr_scale) 111 needsoft = 1; 112 /* 113 * CPU was in user state. Increment 114 * user time counter, and process process-virtual time 115 * interval timer. 116 */ 117 BUMPTIME(&u.u_ru.ru_utime, tick); 118 if (timerisset(&u.u_timer[ITIMER_VIRTUAL].it_value) && 119 itimerdecr(&u.u_timer[ITIMER_VIRTUAL], tick) == 0) 120 psignal(u.u_procp, SIGVTALRM); 121 } else { 122 /* 123 * CPU was in system state. 124 */ 125 if (!noproc) 126 BUMPTIME(&u.u_ru.ru_stime, tick); 127 } 128 129 /* 130 * If the cpu is currently scheduled to a process, then 131 * charge it with resource utilization for a tick, updating 132 * statistics which run in (user+system) virtual time, 133 * such as the cpu time limit and profiling timers. 134 * This assumes that the current process has been running 135 * the entire last tick. 136 */ 137 if (noproc == 0) { 138 if ((u.u_ru.ru_utime.tv_sec+u.u_ru.ru_stime.tv_sec+1) > 139 u.u_rlimit[RLIMIT_CPU].rlim_cur) { 140 psignal(u.u_procp, SIGXCPU); 141 if (u.u_rlimit[RLIMIT_CPU].rlim_cur < 142 u.u_rlimit[RLIMIT_CPU].rlim_max) 143 u.u_rlimit[RLIMIT_CPU].rlim_cur += 5; 144 } 145 if (timerisset(&u.u_timer[ITIMER_PROF].it_value) && 146 itimerdecr(&u.u_timer[ITIMER_PROF], tick) == 0) 147 psignal(u.u_procp, SIGPROF); 148 s = u.u_procp->p_rssize; 149 u.u_ru.ru_idrss += s; 150 #ifdef notdef 151 u.u_ru.ru_isrss += 0; /* XXX (haven't got this) */ 152 #endif 153 if (u.u_procp->p_textp) { 154 register int xrss = u.u_procp->p_textp->x_rssize; 155 156 s += xrss; 157 u.u_ru.ru_ixrss += xrss; 158 } 159 if (s > u.u_ru.ru_maxrss) 160 u.u_ru.ru_maxrss = s; 161 } 162 163 /* 164 * We adjust the priority of the current process. 165 * The priority of a process gets worse as it accumulates 166 * CPU time. The cpu usage estimator (p_cpu) is increased here 167 * and the formula for computing priorities (in kern_synch.c) 168 * will compute a different value each time the p_cpu increases 169 * by 4. The cpu usage estimator ramps up quite quickly when 170 * the process is running (linearly), and decays away exponentially, 171 * at a rate which is proportionally slower when the system is 172 * busy. The basic principal is that the system will 90% forget 173 * that a process used a lot of CPU time in 5*loadav seconds. 174 * This causes the system to favor processes which haven't run 175 * much recently, and to round-robin among other processes. 176 */ 177 if (!noproc) { 178 p = u.u_procp; 179 p->p_cpticks++; 180 if (++p->p_cpu == 0) 181 p->p_cpu--; 182 if ((p->p_cpu&3) == 0) { 183 (void) setpri(p); 184 if (p->p_pri >= PUSER) 185 p->p_pri = p->p_usrpri; 186 } 187 } 188 189 /* 190 * If the alternate clock has not made itself known then 191 * we must gather the statistics. 192 */ 193 if (phz == 0) 194 gatherstats(pc, ps); 195 196 /* 197 * Increment the time-of-day, and schedule 198 * processing of the callouts at a very low cpu priority, 199 * so we don't keep the relatively high clock interrupt 200 * priority any longer than necessary. 201 */ 202 if (timedelta == 0) 203 BUMPTIME(&time, tick) 204 else { 205 register delta; 206 207 if (timedelta < 0) { 208 delta = tick - tickdelta; 209 timedelta += tickdelta; 210 } else { 211 delta = tick + tickdelta; 212 timedelta -= tickdelta; 213 } 214 BUMPTIME(&time, delta); 215 } 216 if (needsoft) { 217 if (BASEPRI(ps)) { 218 /* 219 * Save the overhead of a software interrupt; 220 * it will happen as soon as we return, so do it now. 221 */ 222 (void) splsoftclock(); 223 softclock(pc, ps); 224 } else 225 setsoftclock(); 226 } 227 } 228 229 int dk_ndrive = DK_NDRIVE; 230 /* 231 * Gather statistics on resource utilization. 232 * 233 * We make a gross assumption: that the system has been in the 234 * state it is in (user state, kernel state, interrupt state, 235 * or idle state) for the entire last time interval, and 236 * update statistics accordingly. 237 */ 238 /*ARGSUSED*/ 239 gatherstats(pc, ps) 240 caddr_t pc; 241 int ps; 242 { 243 register int cpstate, s; 244 245 /* 246 * Determine what state the cpu is in. 247 */ 248 if (USERMODE(ps)) { 249 /* 250 * CPU was in user state. 251 */ 252 if (u.u_procp->p_nice > NZERO) 253 cpstate = CP_NICE; 254 else 255 cpstate = CP_USER; 256 } else { 257 /* 258 * CPU was in system state. If profiling kernel 259 * increment a counter. If no process is running 260 * then this is a system tick if we were running 261 * at a non-zero IPL (in a driver). If a process is running, 262 * then we charge it with system time even if we were 263 * at a non-zero IPL, since the system often runs 264 * this way during processing of system calls. 265 * This is approximate, but the lack of true interval 266 * timers makes doing anything else difficult. 267 */ 268 cpstate = CP_SYS; 269 if (noproc && BASEPRI(ps)) 270 cpstate = CP_IDLE; 271 #ifdef GPROF 272 s = pc - s_lowpc; 273 if (profiling < 2 && s < s_textsize) 274 kcount[s / (HISTFRACTION * sizeof (*kcount))]++; 275 #endif 276 } 277 /* 278 * We maintain statistics shown by user-level statistics 279 * programs: the amount of time in each cpu state, and 280 * the amount of time each of DK_NDRIVE ``drives'' is busy. 281 */ 282 cp_time[cpstate]++; 283 for (s = 0; s < DK_NDRIVE; s++) 284 if (dk_busy&(1<<s)) 285 dk_time[s]++; 286 } 287 288 /* 289 * Software priority level clock interrupt. 290 * Run periodic events from timeout queue. 291 */ 292 /*ARGSUSED*/ 293 softclock(pc, ps) 294 caddr_t pc; 295 int ps; 296 { 297 298 for (;;) { 299 register struct callout *p1; 300 register caddr_t arg; 301 register int (*func)(); 302 register int a, s; 303 304 s = splhigh(); 305 if ((p1 = calltodo.c_next) == 0 || p1->c_time > 0) { 306 splx(s); 307 break; 308 } 309 arg = p1->c_arg; func = p1->c_func; a = p1->c_time; 310 calltodo.c_next = p1->c_next; 311 p1->c_next = callfree; 312 callfree = p1; 313 splx(s); 314 (*func)(arg, a); 315 } 316 /* 317 * If trapped user-mode and profiling, give it 318 * a profiling tick. 319 */ 320 if (USERMODE(ps)) { 321 register struct proc *p = u.u_procp; 322 323 if (u.u_prof.pr_scale) { 324 p->p_flag |= SOWEUPC; 325 aston(); 326 } 327 /* 328 * Check to see if process has accumulated 329 * more than 10 minutes of user time. If so 330 * reduce priority to give others a chance. 331 */ 332 if (p->p_uid && p->p_nice == NZERO && 333 u.u_ru.ru_utime.tv_sec > 10 * 60) { 334 p->p_nice = NZERO+4; 335 (void) setpri(p); 336 p->p_pri = p->p_usrpri; 337 } 338 } 339 } 340 341 /* 342 * Arrange that (*fun)(arg) is called in t/hz seconds. 343 */ 344 timeout(fun, arg, t) 345 int (*fun)(); 346 caddr_t arg; 347 register int t; 348 { 349 register struct callout *p1, *p2, *pnew; 350 register int s = splhigh(); 351 352 if (t <= 0) 353 t = 1; 354 pnew = callfree; 355 if (pnew == NULL) 356 panic("timeout table overflow"); 357 callfree = pnew->c_next; 358 pnew->c_arg = arg; 359 pnew->c_func = fun; 360 for (p1 = &calltodo; (p2 = p1->c_next) && p2->c_time < t; p1 = p2) 361 if (p2->c_time > 0) 362 t -= p2->c_time; 363 p1->c_next = pnew; 364 pnew->c_next = p2; 365 pnew->c_time = t; 366 if (p2) 367 p2->c_time -= t; 368 splx(s); 369 } 370 371 /* 372 * untimeout is called to remove a function timeout call 373 * from the callout structure. 374 */ 375 untimeout(fun, arg) 376 int (*fun)(); 377 caddr_t arg; 378 { 379 register struct callout *p1, *p2; 380 register int s; 381 382 s = splhigh(); 383 for (p1 = &calltodo; (p2 = p1->c_next) != 0; p1 = p2) { 384 if (p2->c_func == fun && p2->c_arg == arg) { 385 if (p2->c_next && p2->c_time > 0) 386 p2->c_next->c_time += p2->c_time; 387 p1->c_next = p2->c_next; 388 p2->c_next = callfree; 389 callfree = p2; 390 break; 391 } 392 } 393 splx(s); 394 } 395 396 /* 397 * Compute number of hz until specified time. 398 * Used to compute third argument to timeout() from an 399 * absolute time. 400 */ 401 hzto(tv) 402 struct timeval *tv; 403 { 404 register long ticks; 405 register long sec; 406 int s = splhigh(); 407 408 /* 409 * If number of milliseconds will fit in 32 bit arithmetic, 410 * then compute number of milliseconds to time and scale to 411 * ticks. Otherwise just compute number of hz in time, rounding 412 * times greater than representible to maximum value. 413 * 414 * Delta times less than 25 days can be computed ``exactly''. 415 * Maximum value for any timeout in 10ms ticks is 250 days. 416 */ 417 sec = tv->tv_sec - time.tv_sec; 418 if (sec <= 0x7fffffff / 1000 - 1000) 419 ticks = ((tv->tv_sec - time.tv_sec) * 1000 + 420 (tv->tv_usec - time.tv_usec) / 1000) / (tick / 1000); 421 else if (sec <= 0x7fffffff / hz) 422 ticks = sec * hz; 423 else 424 ticks = 0x7fffffff; 425 splx(s); 426 return (ticks); 427 } 428 429 profil() 430 { 431 register struct a { 432 short *bufbase; 433 unsigned bufsize; 434 unsigned pcoffset; 435 unsigned pcscale; 436 } *uap = (struct a *)u.u_ap; 437 register struct uprof *upp = &u.u_prof; 438 439 upp->pr_base = uap->bufbase; 440 upp->pr_size = uap->bufsize; 441 upp->pr_off = uap->pcoffset; 442 upp->pr_scale = uap->pcscale; 443 } 444