1 /* 2 * Copyright (c) 1982, 1986 Regents of the University of California. 3 * All rights reserved. The Berkeley software License Agreement 4 * specifies the terms and conditions for redistribution. 5 * 6 * @(#)kern_clock.c 7.9 (Berkeley) 06/28/90 7 */ 8 9 #include "param.h" 10 #include "systm.h" 11 #include "dkstat.h" 12 #include "callout.h" 13 #include "user.h" 14 #include "kernel.h" 15 #include "proc.h" 16 #include "vm.h" 17 #include "text.h" 18 19 #include "machine/reg.h" 20 #include "machine/psl.h" 21 22 #if defined(vax) || defined(tahoe) 23 #include "machine/mtpr.h" 24 #include "machine/clock.h" 25 #endif 26 #if defined(hp300) 27 #include "machine/mtpr.h" 28 #endif 29 30 #ifdef GPROF 31 #include "gprof.h" 32 #endif 33 34 /* 35 * Clock handling routines. 36 * 37 * This code is written to operate with two timers which run 38 * independently of each other. The main clock, running at hz 39 * times per second, is used to do scheduling and timeout calculations. 40 * The second timer does resource utilization estimation statistically 41 * based on the state of the machine phz times a second. Both functions 42 * can be performed by a single clock (ie hz == phz), however the 43 * statistics will be much more prone to errors. Ideally a machine 44 * would have separate clocks measuring time spent in user state, system 45 * state, interrupt state, and idle state. These clocks would allow a non- 46 * approximate measure of resource utilization. 47 */ 48 49 /* 50 * TODO: 51 * time of day, system/user timing, timeouts, profiling on separate timers 52 * allocate more timeout table slots when table overflows. 53 */ 54 55 /* 56 * Bump a timeval by a small number of usec's. 57 */ 58 #define BUMPTIME(t, usec) { \ 59 register struct timeval *tp = (t); \ 60 \ 61 tp->tv_usec += (usec); \ 62 if (tp->tv_usec >= 1000000) { \ 63 tp->tv_usec -= 1000000; \ 64 tp->tv_sec++; \ 65 } \ 66 } 67 68 /* 69 * The hz hardware interval timer. 70 * We update the events relating to real time. 71 * If this timer is also being used to gather statistics, 72 * we run through the statistics gathering routine as well. 73 */ 74 /*ARGSUSED*/ 75 hardclock(pc, ps) 76 caddr_t pc; 77 int ps; 78 { 79 register struct callout *p1; 80 register struct proc *p = u.u_procp; 81 register int s; 82 int needsoft = 0; 83 extern int tickdelta; 84 extern long timedelta; 85 86 /* 87 * Update real-time timeout queue. 88 * At front of queue are some number of events which are ``due''. 89 * The time to these is <= 0 and if negative represents the 90 * number of ticks which have passed since it was supposed to happen. 91 * The rest of the q elements (times > 0) are events yet to happen, 92 * where the time for each is given as a delta from the previous. 93 * Decrementing just the first of these serves to decrement the time 94 * to all events. 95 */ 96 p1 = calltodo.c_next; 97 while (p1) { 98 if (--p1->c_time > 0) 99 break; 100 needsoft = 1; 101 if (p1->c_time == 0) 102 break; 103 p1 = p1->c_next; 104 } 105 106 /* 107 * Charge the time out based on the mode the cpu is in. 108 * Here again we fudge for the lack of proper interval timers 109 * assuming that the current state has been around at least 110 * one tick. 111 */ 112 if (USERMODE(ps)) { 113 if (u.u_prof.pr_scale) 114 needsoft = 1; 115 /* 116 * CPU was in user state. Increment 117 * user time counter, and process process-virtual time 118 * interval timer. 119 */ 120 BUMPTIME(&p->p_utime, tick); 121 if (timerisset(&u.u_timer[ITIMER_VIRTUAL].it_value) && 122 itimerdecr(&u.u_timer[ITIMER_VIRTUAL], tick) == 0) 123 psignal(p, SIGVTALRM); 124 } else { 125 /* 126 * CPU was in system state. 127 */ 128 if (!noproc) 129 BUMPTIME(&p->p_stime, tick); 130 } 131 132 /* 133 * If the cpu is currently scheduled to a process, then 134 * charge it with resource utilization for a tick, updating 135 * statistics which run in (user+system) virtual time, 136 * such as the cpu time limit and profiling timers. 137 * This assumes that the current process has been running 138 * the entire last tick. 139 */ 140 if (noproc == 0) { 141 if ((p->p_utime.tv_sec+p->p_stime.tv_sec+1) > 142 u.u_rlimit[RLIMIT_CPU].rlim_cur) { 143 psignal(p, SIGXCPU); 144 if (u.u_rlimit[RLIMIT_CPU].rlim_cur < 145 u.u_rlimit[RLIMIT_CPU].rlim_max) 146 u.u_rlimit[RLIMIT_CPU].rlim_cur += 5; 147 } 148 if (timerisset(&u.u_timer[ITIMER_PROF].it_value) && 149 itimerdecr(&u.u_timer[ITIMER_PROF], tick) == 0) 150 psignal(p, SIGPROF); 151 s = p->p_rssize; 152 u.u_ru.ru_idrss += s; 153 #ifdef notdef 154 u.u_ru.ru_isrss += 0; /* XXX (haven't got this) */ 155 #endif 156 if (p->p_textp) { 157 register int xrss = p->p_textp->x_rssize; 158 159 s += xrss; 160 u.u_ru.ru_ixrss += xrss; 161 } 162 if (s > u.u_ru.ru_maxrss) 163 u.u_ru.ru_maxrss = s; 164 } 165 166 /* 167 * We adjust the priority of the current process. 168 * The priority of a process gets worse as it accumulates 169 * CPU time. The cpu usage estimator (p_cpu) is increased here 170 * and the formula for computing priorities (in kern_synch.c) 171 * will compute a different value each time the p_cpu increases 172 * by 4. The cpu usage estimator ramps up quite quickly when 173 * the process is running (linearly), and decays away exponentially, 174 * at a rate which is proportionally slower when the system is 175 * busy. The basic principal is that the system will 90% forget 176 * that a process used a lot of CPU time in 5*loadav seconds. 177 * This causes the system to favor processes which haven't run 178 * much recently, and to round-robin among other processes. 179 */ 180 if (!noproc) { 181 p->p_cpticks++; 182 if (++p->p_cpu == 0) 183 p->p_cpu--; 184 if ((p->p_cpu&3) == 0) { 185 (void) setpri(p); 186 if (p->p_pri >= PUSER) 187 p->p_pri = p->p_usrpri; 188 } 189 } 190 191 /* 192 * If the alternate clock has not made itself known then 193 * we must gather the statistics. 194 */ 195 if (phz == 0) 196 gatherstats(pc, ps); 197 198 /* 199 * Increment the time-of-day, and schedule 200 * processing of the callouts at a very low cpu priority, 201 * so we don't keep the relatively high clock interrupt 202 * priority any longer than necessary. 203 */ 204 if (timedelta == 0) 205 BUMPTIME(&time, tick) 206 else { 207 register delta; 208 209 if (timedelta < 0) { 210 delta = tick - tickdelta; 211 timedelta += tickdelta; 212 } else { 213 delta = tick + tickdelta; 214 timedelta -= tickdelta; 215 } 216 BUMPTIME(&time, delta); 217 } 218 if (needsoft) { 219 if (BASEPRI(ps)) { 220 /* 221 * Save the overhead of a software interrupt; 222 * it will happen as soon as we return, so do it now. 223 */ 224 (void) splsoftclock(); 225 softclock(pc, ps); 226 } else 227 setsoftclock(); 228 } 229 } 230 231 int dk_ndrive = DK_NDRIVE; 232 /* 233 * Gather statistics on resource utilization. 234 * 235 * We make a gross assumption: that the system has been in the 236 * state it is in (user state, kernel state, interrupt state, 237 * or idle state) for the entire last time interval, and 238 * update statistics accordingly. 239 */ 240 /*ARGSUSED*/ 241 gatherstats(pc, ps) 242 caddr_t pc; 243 int ps; 244 { 245 register int cpstate, s; 246 247 /* 248 * Determine what state the cpu is in. 249 */ 250 if (USERMODE(ps)) { 251 /* 252 * CPU was in user state. 253 */ 254 if (u.u_procp->p_nice > NZERO) 255 cpstate = CP_NICE; 256 else 257 cpstate = CP_USER; 258 } else { 259 /* 260 * CPU was in system state. If profiling kernel 261 * increment a counter. If no process is running 262 * then this is a system tick if we were running 263 * at a non-zero IPL (in a driver). If a process is running, 264 * then we charge it with system time even if we were 265 * at a non-zero IPL, since the system often runs 266 * this way during processing of system calls. 267 * This is approximate, but the lack of true interval 268 * timers makes doing anything else difficult. 269 */ 270 cpstate = CP_SYS; 271 if (noproc && BASEPRI(ps)) 272 cpstate = CP_IDLE; 273 #ifdef GPROF 274 s = pc - s_lowpc; 275 if (profiling < 2 && s < s_textsize) 276 kcount[s / (HISTFRACTION * sizeof (*kcount))]++; 277 #endif 278 } 279 /* 280 * We maintain statistics shown by user-level statistics 281 * programs: the amount of time in each cpu state, and 282 * the amount of time each of DK_NDRIVE ``drives'' is busy. 283 */ 284 cp_time[cpstate]++; 285 for (s = 0; s < DK_NDRIVE; s++) 286 if (dk_busy&(1<<s)) 287 dk_time[s]++; 288 } 289 290 /* 291 * Software priority level clock interrupt. 292 * Run periodic events from timeout queue. 293 */ 294 /*ARGSUSED*/ 295 softclock(pc, ps) 296 caddr_t pc; 297 int ps; 298 { 299 300 for (;;) { 301 register struct callout *p1; 302 register caddr_t arg; 303 register int (*func)(); 304 register int a, s; 305 306 s = splhigh(); 307 if ((p1 = calltodo.c_next) == 0 || p1->c_time > 0) { 308 splx(s); 309 break; 310 } 311 arg = p1->c_arg; func = p1->c_func; a = p1->c_time; 312 calltodo.c_next = p1->c_next; 313 p1->c_next = callfree; 314 callfree = p1; 315 splx(s); 316 (*func)(arg, a); 317 } 318 /* 319 * If trapped user-mode and profiling, give it 320 * a profiling tick. 321 */ 322 if (USERMODE(ps)) { 323 register struct proc *p = u.u_procp; 324 325 if (u.u_prof.pr_scale) { 326 p->p_flag |= SOWEUPC; 327 aston(); 328 } 329 /* 330 * Check to see if process has accumulated 331 * more than 10 minutes of user time. If so 332 * reduce priority to give others a chance. 333 */ 334 if (p->p_uid && p->p_nice == NZERO && 335 p->p_utime.tv_sec > 10 * 60) { 336 p->p_nice = NZERO+4; 337 (void) setpri(p); 338 p->p_pri = p->p_usrpri; 339 } 340 } 341 } 342 343 /* 344 * Arrange that (*fun)(arg) is called in t/hz seconds. 345 */ 346 timeout(fun, arg, t) 347 int (*fun)(); 348 caddr_t arg; 349 register int t; 350 { 351 register struct callout *p1, *p2, *pnew; 352 register int s = splhigh(); 353 354 if (t <= 0) 355 t = 1; 356 pnew = callfree; 357 if (pnew == NULL) 358 panic("timeout table overflow"); 359 callfree = pnew->c_next; 360 pnew->c_arg = arg; 361 pnew->c_func = fun; 362 for (p1 = &calltodo; (p2 = p1->c_next) && p2->c_time < t; p1 = p2) 363 if (p2->c_time > 0) 364 t -= p2->c_time; 365 p1->c_next = pnew; 366 pnew->c_next = p2; 367 pnew->c_time = t; 368 if (p2) 369 p2->c_time -= t; 370 splx(s); 371 } 372 373 /* 374 * untimeout is called to remove a function timeout call 375 * from the callout structure. 376 */ 377 untimeout(fun, arg) 378 int (*fun)(); 379 caddr_t arg; 380 { 381 register struct callout *p1, *p2; 382 register int s; 383 384 s = splhigh(); 385 for (p1 = &calltodo; (p2 = p1->c_next) != 0; p1 = p2) { 386 if (p2->c_func == fun && p2->c_arg == arg) { 387 if (p2->c_next && p2->c_time > 0) 388 p2->c_next->c_time += p2->c_time; 389 p1->c_next = p2->c_next; 390 p2->c_next = callfree; 391 callfree = p2; 392 break; 393 } 394 } 395 splx(s); 396 } 397 398 /* 399 * Compute number of hz until specified time. 400 * Used to compute third argument to timeout() from an 401 * absolute time. 402 */ 403 hzto(tv) 404 struct timeval *tv; 405 { 406 register long ticks; 407 register long sec; 408 int s = splhigh(); 409 410 /* 411 * If number of milliseconds will fit in 32 bit arithmetic, 412 * then compute number of milliseconds to time and scale to 413 * ticks. Otherwise just compute number of hz in time, rounding 414 * times greater than representible to maximum value. 415 * 416 * Delta times less than 25 days can be computed ``exactly''. 417 * Maximum value for any timeout in 10ms ticks is 250 days. 418 */ 419 sec = tv->tv_sec - time.tv_sec; 420 if (sec <= 0x7fffffff / 1000 - 1000) 421 ticks = ((tv->tv_sec - time.tv_sec) * 1000 + 422 (tv->tv_usec - time.tv_usec) / 1000) / (tick / 1000); 423 else if (sec <= 0x7fffffff / hz) 424 ticks = sec * hz; 425 else 426 ticks = 0x7fffffff; 427 splx(s); 428 return (ticks); 429 } 430 431 /* ARGSUSED */ 432 profil(p, uap, retval) 433 struct proc *p; 434 register struct args { 435 short *bufbase; 436 unsigned bufsize; 437 unsigned pcoffset; 438 unsigned pcscale; 439 } *uap; 440 int *retval; 441 { 442 register struct uprof *upp = &u.u_prof; 443 444 upp->pr_base = uap->bufbase; 445 upp->pr_size = uap->bufsize; 446 upp->pr_off = uap->pcoffset; 447 upp->pr_scale = uap->pcscale; 448 return (0); 449 } 450