1 /* 2 * Copyright (c) 1982, 1986 Regents of the University of California. 3 * All rights reserved. The Berkeley software License Agreement 4 * specifies the terms and conditions for redistribution. 5 * 6 * @(#)kern_clock.c 7.2 (Berkeley) 11/03/86 7 */ 8 9 #include "../machine/reg.h" 10 #include "../machine/psl.h" 11 12 #include "param.h" 13 #include "systm.h" 14 #include "dkstat.h" 15 #include "callout.h" 16 #include "dir.h" 17 #include "user.h" 18 #include "kernel.h" 19 #include "proc.h" 20 #include "vm.h" 21 #include "text.h" 22 23 #if defined(vax) || defined(tahoe) 24 #include "../machine/mtpr.h" 25 #include "../machine/clock.h" 26 #endif 27 28 #ifdef GPROF 29 #include "gprof.h" 30 #endif 31 32 /* 33 * Clock handling routines. 34 * 35 * This code is written to operate with two timers which run 36 * independently of each other. The main clock, running at hz 37 * times per second, is used to do scheduling and timeout calculations. 38 * The second timer does resource utilization estimation statistically 39 * based on the state of the machine phz times a second. Both functions 40 * can be performed by a single clock (ie hz == phz), however the 41 * statistics will be much more prone to errors. Ideally a machine 42 * would have separate clocks measuring time spent in user state, system 43 * state, interrupt state, and idle state. These clocks would allow a non- 44 * approximate measure of resource utilization. 45 */ 46 47 /* 48 * TODO: 49 * time of day, system/user timing, timeouts, profiling on separate timers 50 * allocate more timeout table slots when table overflows. 51 */ 52 53 /* 54 * Bump a timeval by a small number of usec's. 55 */ 56 #define BUMPTIME(t, usec) { \ 57 register struct timeval *tp = (t); \ 58 \ 59 tp->tv_usec += (usec); \ 60 if (tp->tv_usec >= 1000000) { \ 61 tp->tv_usec -= 1000000; \ 62 tp->tv_sec++; \ 63 } \ 64 } 65 66 /* 67 * The hz hardware interval timer. 68 * We update the events relating to real time. 69 * If this timer is also being used to gather statistics, 70 * we run through the statistics gathering routine as well. 71 */ 72 /*ARGSUSED*/ 73 hardclock(pc, ps) 74 caddr_t pc; 75 int ps; 76 { 77 register struct callout *p1; 78 register struct proc *p; 79 register int s; 80 int needsoft = 0; 81 extern int tickdelta; 82 extern long timedelta; 83 84 /* 85 * Update real-time timeout queue. 86 * At front of queue are some number of events which are ``due''. 87 * The time to these is <= 0 and if negative represents the 88 * number of ticks which have passed since it was supposed to happen. 89 * The rest of the q elements (times > 0) are events yet to happen, 90 * where the time for each is given as a delta from the previous. 91 * Decrementing just the first of these serves to decrement the time 92 * to all events. 93 */ 94 p1 = calltodo.c_next; 95 while (p1) { 96 if (--p1->c_time > 0) 97 break; 98 needsoft = 1; 99 if (p1->c_time == 0) 100 break; 101 p1 = p1->c_next; 102 } 103 104 /* 105 * Charge the time out based on the mode the cpu is in. 106 * Here again we fudge for the lack of proper interval timers 107 * assuming that the current state has been around at least 108 * one tick. 109 */ 110 if (USERMODE(ps)) { 111 if (u.u_prof.pr_scale) 112 needsoft = 1; 113 /* 114 * CPU was in user state. Increment 115 * user time counter, and process process-virtual time 116 * interval timer. 117 */ 118 BUMPTIME(&u.u_ru.ru_utime, tick); 119 if (timerisset(&u.u_timer[ITIMER_VIRTUAL].it_value) && 120 itimerdecr(&u.u_timer[ITIMER_VIRTUAL], tick) == 0) 121 psignal(u.u_procp, SIGVTALRM); 122 } else { 123 /* 124 * CPU was in system state. 125 */ 126 if (!noproc) 127 BUMPTIME(&u.u_ru.ru_stime, tick); 128 } 129 130 /* 131 * If the cpu is currently scheduled to a process, then 132 * charge it with resource utilization for a tick, updating 133 * statistics which run in (user+system) virtual time, 134 * such as the cpu time limit and profiling timers. 135 * This assumes that the current process has been running 136 * the entire last tick. 137 */ 138 if (noproc == 0) { 139 if ((u.u_ru.ru_utime.tv_sec+u.u_ru.ru_stime.tv_sec+1) > 140 u.u_rlimit[RLIMIT_CPU].rlim_cur) { 141 psignal(u.u_procp, SIGXCPU); 142 if (u.u_rlimit[RLIMIT_CPU].rlim_cur < 143 u.u_rlimit[RLIMIT_CPU].rlim_max) 144 u.u_rlimit[RLIMIT_CPU].rlim_cur += 5; 145 } 146 if (timerisset(&u.u_timer[ITIMER_PROF].it_value) && 147 itimerdecr(&u.u_timer[ITIMER_PROF], tick) == 0) 148 psignal(u.u_procp, SIGPROF); 149 s = u.u_procp->p_rssize; 150 u.u_ru.ru_idrss += s; 151 #ifdef notdef 152 u.u_ru.ru_isrss += 0; /* XXX (haven't got this) */ 153 #endif 154 if (u.u_procp->p_textp) { 155 register int xrss = u.u_procp->p_textp->x_rssize; 156 157 s += xrss; 158 u.u_ru.ru_ixrss += xrss; 159 } 160 if (s > u.u_ru.ru_maxrss) 161 u.u_ru.ru_maxrss = s; 162 } 163 164 /* 165 * We adjust the priority of the current process. 166 * The priority of a process gets worse as it accumulates 167 * CPU time. The cpu usage estimator (p_cpu) is increased here 168 * and the formula for computing priorities (in kern_synch.c) 169 * will compute a different value each time the p_cpu increases 170 * by 4. The cpu usage estimator ramps up quite quickly when 171 * the process is running (linearly), and decays away exponentially, 172 * at a rate which is proportionally slower when the system is 173 * busy. The basic principal is that the system will 90% forget 174 * that a process used a lot of CPU time in 5*loadav seconds. 175 * This causes the system to favor processes which haven't run 176 * much recently, and to round-robin among other processes. 177 */ 178 if (!noproc) { 179 p = u.u_procp; 180 p->p_cpticks++; 181 if (++p->p_cpu == 0) 182 p->p_cpu--; 183 if ((p->p_cpu&3) == 0) { 184 (void) setpri(p); 185 if (p->p_pri >= PUSER) 186 p->p_pri = p->p_usrpri; 187 } 188 } 189 190 /* 191 * If the alternate clock has not made itself known then 192 * we must gather the statistics. 193 */ 194 if (phz == 0) 195 gatherstats(pc, ps); 196 197 /* 198 * Increment the time-of-day, and schedule 199 * processing of the callouts at a very low cpu priority, 200 * so we don't keep the relatively high clock interrupt 201 * priority any longer than necessary. 202 */ 203 if (timedelta == 0) 204 BUMPTIME(&time, tick) 205 else { 206 register delta; 207 208 if (timedelta < 0) { 209 delta = tick - tickdelta; 210 timedelta += tickdelta; 211 } else { 212 delta = tick + tickdelta; 213 timedelta -= tickdelta; 214 } 215 BUMPTIME(&time, delta); 216 } 217 if (needsoft) { 218 if (BASEPRI(ps)) { 219 /* 220 * Save the overhead of a software interrupt; 221 * it will happen as soon as we return, so do it now. 222 */ 223 (void) splsoftclock(); 224 softclock(pc, ps); 225 } else 226 setsoftclock(); 227 } 228 } 229 230 int dk_ndrive = DK_NDRIVE; 231 /* 232 * Gather statistics on resource utilization. 233 * 234 * We make a gross assumption: that the system has been in the 235 * state it is in (user state, kernel state, interrupt state, 236 * or idle state) for the entire last time interval, and 237 * update statistics accordingly. 238 */ 239 /*ARGSUSED*/ 240 gatherstats(pc, ps) 241 caddr_t pc; 242 int ps; 243 { 244 register int cpstate, s; 245 246 /* 247 * Determine what state the cpu is in. 248 */ 249 if (USERMODE(ps)) { 250 /* 251 * CPU was in user state. 252 */ 253 if (u.u_procp->p_nice > NZERO) 254 cpstate = CP_NICE; 255 else 256 cpstate = CP_USER; 257 } else { 258 /* 259 * CPU was in system state. If profiling kernel 260 * increment a counter. If no process is running 261 * then this is a system tick if we were running 262 * at a non-zero IPL (in a driver). If a process is running, 263 * then we charge it with system time even if we were 264 * at a non-zero IPL, since the system often runs 265 * this way during processing of system calls. 266 * This is approximate, but the lack of true interval 267 * timers makes doing anything else difficult. 268 */ 269 cpstate = CP_SYS; 270 if (noproc && BASEPRI(ps)) 271 cpstate = CP_IDLE; 272 #ifdef GPROF 273 s = pc - s_lowpc; 274 if (profiling < 2 && s < s_textsize) 275 kcount[s / (HISTFRACTION * sizeof (*kcount))]++; 276 #endif 277 } 278 /* 279 * We maintain statistics shown by user-level statistics 280 * programs: the amount of time in each cpu state, and 281 * the amount of time each of DK_NDRIVE ``drives'' is busy. 282 */ 283 cp_time[cpstate]++; 284 for (s = 0; s < DK_NDRIVE; s++) 285 if (dk_busy&(1<<s)) 286 dk_time[s]++; 287 } 288 289 /* 290 * Software priority level clock interrupt. 291 * Run periodic events from timeout queue. 292 */ 293 /*ARGSUSED*/ 294 softclock(pc, ps) 295 caddr_t pc; 296 int ps; 297 { 298 299 for (;;) { 300 register struct callout *p1; 301 register caddr_t arg; 302 register int (*func)(); 303 register int a, s; 304 305 s = splhigh(); 306 if ((p1 = calltodo.c_next) == 0 || p1->c_time > 0) { 307 splx(s); 308 break; 309 } 310 arg = p1->c_arg; func = p1->c_func; a = p1->c_time; 311 calltodo.c_next = p1->c_next; 312 p1->c_next = callfree; 313 callfree = p1; 314 splx(s); 315 (*func)(arg, a); 316 } 317 /* 318 * If trapped user-mode and profiling, give it 319 * a profiling tick. 320 */ 321 if (USERMODE(ps)) { 322 register struct proc *p = u.u_procp; 323 324 if (u.u_prof.pr_scale) { 325 p->p_flag |= SOWEUPC; 326 aston(); 327 } 328 /* 329 * Check to see if process has accumulated 330 * more than 10 minutes of user time. If so 331 * reduce priority to give others a chance. 332 */ 333 if (p->p_uid && p->p_nice == NZERO && 334 u.u_ru.ru_utime.tv_sec > 10 * 60) { 335 p->p_nice = NZERO+4; 336 (void) setpri(p); 337 p->p_pri = p->p_usrpri; 338 } 339 } 340 } 341 342 /* 343 * Arrange that (*fun)(arg) is called in t/hz seconds. 344 */ 345 timeout(fun, arg, t) 346 int (*fun)(); 347 caddr_t arg; 348 register int t; 349 { 350 register struct callout *p1, *p2, *pnew; 351 register int s = splhigh(); 352 353 if (t <= 0) 354 t = 1; 355 pnew = callfree; 356 if (pnew == NULL) 357 panic("timeout table overflow"); 358 callfree = pnew->c_next; 359 pnew->c_arg = arg; 360 pnew->c_func = fun; 361 for (p1 = &calltodo; (p2 = p1->c_next) && p2->c_time < t; p1 = p2) 362 if (p2->c_time > 0) 363 t -= p2->c_time; 364 p1->c_next = pnew; 365 pnew->c_next = p2; 366 pnew->c_time = t; 367 if (p2) 368 p2->c_time -= t; 369 splx(s); 370 } 371 372 /* 373 * untimeout is called to remove a function timeout call 374 * from the callout structure. 375 */ 376 untimeout(fun, arg) 377 int (*fun)(); 378 caddr_t arg; 379 { 380 register struct callout *p1, *p2; 381 register int s; 382 383 s = splhigh(); 384 for (p1 = &calltodo; (p2 = p1->c_next) != 0; p1 = p2) { 385 if (p2->c_func == fun && p2->c_arg == arg) { 386 if (p2->c_next && p2->c_time > 0) 387 p2->c_next->c_time += p2->c_time; 388 p1->c_next = p2->c_next; 389 p2->c_next = callfree; 390 callfree = p2; 391 break; 392 } 393 } 394 splx(s); 395 } 396 397 /* 398 * Compute number of hz until specified time. 399 * Used to compute third argument to timeout() from an 400 * absolute time. 401 */ 402 hzto(tv) 403 struct timeval *tv; 404 { 405 register long ticks; 406 register long sec; 407 int s = splhigh(); 408 409 /* 410 * If number of milliseconds will fit in 32 bit arithmetic, 411 * then compute number of milliseconds to time and scale to 412 * ticks. Otherwise just compute number of hz in time, rounding 413 * times greater than representible to maximum value. 414 * 415 * Delta times less than 25 days can be computed ``exactly''. 416 * Maximum value for any timeout in 10ms ticks is 250 days. 417 */ 418 sec = tv->tv_sec - time.tv_sec; 419 if (sec <= 0x7fffffff / 1000 - 1000) 420 ticks = ((tv->tv_sec - time.tv_sec) * 1000 + 421 (tv->tv_usec - time.tv_usec) / 1000) / (tick / 1000); 422 else if (sec <= 0x7fffffff / hz) 423 ticks = sec * hz; 424 else 425 ticks = 0x7fffffff; 426 splx(s); 427 return (ticks); 428 } 429 430 profil() 431 { 432 register struct a { 433 short *bufbase; 434 unsigned bufsize; 435 unsigned pcoffset; 436 unsigned pcscale; 437 } *uap = (struct a *)u.u_ap; 438 register struct uprof *upp = &u.u_prof; 439 440 upp->pr_base = uap->bufbase; 441 upp->pr_size = uap->bufsize; 442 upp->pr_off = uap->pcoffset; 443 upp->pr_scale = uap->pcscale; 444 } 445 446 #ifdef COMPAT 447 opause() 448 { 449 450 for (;;) 451 sleep((caddr_t)&u, PSLEP); 452 } 453 #endif 454