1 /* 2 * Copyright (c) 1982 Regents of the University of California. 3 * All rights reserved. The Berkeley software License Agreement 4 * specifies the terms and conditions for redistribution. 5 * 6 * @(#)kern_clock.c 6.14 (Berkeley) 09/04/85 7 */ 8 9 #include "../machine/reg.h" 10 #include "../machine/psl.h" 11 12 #include "param.h" 13 #include "systm.h" 14 #include "dk.h" 15 #include "callout.h" 16 #include "dir.h" 17 #include "user.h" 18 #include "kernel.h" 19 #include "proc.h" 20 #include "vm.h" 21 #include "text.h" 22 23 #ifdef vax 24 #include "../vax/mtpr.h" 25 #endif 26 27 #ifdef GPROF 28 #include "gprof.h" 29 #endif 30 31 /* 32 * Clock handling routines. 33 * 34 * This code is written to operate with two timers which run 35 * independently of each other. The main clock, running at hz 36 * times per second, is used to do scheduling and timeout calculations. 37 * The second timer does resource utilization estimation statistically 38 * based on the state of the machine phz times a second. Both functions 39 * can be performed by a single clock (ie hz == phz), however the 40 * statistics will be much more prone to errors. Ideally a machine 41 * would have separate clocks measuring time spent in user state, system 42 * state, interrupt state, and idle state. These clocks would allow a non- 43 * approximate measure of resource utilization. 44 */ 45 46 /* 47 * TODO: 48 * time of day, system/user timing, timeouts, profiling on separate timers 49 * allocate more timeout table slots when table overflows. 50 */ 51 #ifdef notdef 52 /* 53 * Bump a timeval by a small number of usec's. 54 */ 55 bumptime(tp, usec) 56 register struct timeval *tp; 57 int usec; 58 { 59 60 tp->tv_usec += usec; 61 if (tp->tv_usec >= 1000000) { 62 tp->tv_usec -= 1000000; 63 tp->tv_sec++; 64 } 65 } 66 #endif notdef 67 #define BUMPTIME(t, usec) { \ 68 register struct timeval *tp = (t); \ 69 \ 70 tp->tv_usec += (usec); \ 71 if (tp->tv_usec >= 1000000) { \ 72 tp->tv_usec -= 1000000; \ 73 tp->tv_sec++; \ 74 } \ 75 } 76 77 /* 78 * The hz hardware interval timer. 79 * We update the events relating to real time. 80 * If this timer is also being used to gather statistics, 81 * we run through the statistics gathering routine as well. 82 */ 83 /*ARGSUSED*/ 84 hardclock(pc, ps) 85 caddr_t pc; 86 int ps; 87 { 88 register struct callout *p1; 89 register struct proc *p; 90 register int s; 91 int needsoft = 0; 92 extern int adjtimedelta, tickadj; 93 94 /* 95 * Update real-time timeout queue. 96 * At front of queue are some number of events which are ``due''. 97 * The time to these is <= 0 and if negative represents the 98 * number of ticks which have passed since it was supposed to happen. 99 * The rest of the q elements (times > 0) are events yet to happen, 100 * where the time for each is given as a delta from the previous. 101 * Decrementing just the first of these serves to decrement the time 102 * to all events. 103 */ 104 p1 = calltodo.c_next; 105 while (p1) { 106 if (--p1->c_time > 0) 107 break; 108 needsoft = 1; 109 if (p1->c_time == 0) 110 break; 111 p1 = p1->c_next; 112 } 113 114 /* 115 * Charge the time out based on the mode the cpu is in. 116 * Here again we fudge for the lack of proper interval timers 117 * assuming that the current state has been around at least 118 * one tick. 119 */ 120 if (USERMODE(ps)) { 121 if (u.u_prof.pr_scale) 122 needsoft = 1; 123 /* 124 * CPU was in user state. Increment 125 * user time counter, and process process-virtual time 126 * interval timer. 127 */ 128 BUMPTIME(&u.u_ru.ru_utime, tick); 129 if (timerisset(&u.u_timer[ITIMER_VIRTUAL].it_value) && 130 itimerdecr(&u.u_timer[ITIMER_VIRTUAL], tick) == 0) 131 psignal(u.u_procp, SIGVTALRM); 132 } else { 133 /* 134 * CPU was in system state. 135 */ 136 if (! noproc) { 137 BUMPTIME(&u.u_ru.ru_stime, tick); 138 } 139 } 140 141 /* 142 * If the cpu is currently scheduled to a process, then 143 * charge it with resource utilization for a tick, updating 144 * statistics which run in (user+system) virtual time, 145 * such as the cpu time limit and profiling timers. 146 * This assumes that the current process has been running 147 * the entire last tick. 148 */ 149 if (noproc == 0) { 150 if ((u.u_ru.ru_utime.tv_sec+u.u_ru.ru_stime.tv_sec+1) > 151 u.u_rlimit[RLIMIT_CPU].rlim_cur) { 152 psignal(u.u_procp, SIGXCPU); 153 if (u.u_rlimit[RLIMIT_CPU].rlim_cur < 154 u.u_rlimit[RLIMIT_CPU].rlim_max) 155 u.u_rlimit[RLIMIT_CPU].rlim_cur += 5; 156 } 157 if (timerisset(&u.u_timer[ITIMER_PROF].it_value) && 158 itimerdecr(&u.u_timer[ITIMER_PROF], tick) == 0) 159 psignal(u.u_procp, SIGPROF); 160 s = u.u_procp->p_rssize; 161 u.u_ru.ru_idrss += s; u.u_ru.ru_isrss += 0; /* XXX */ 162 if (u.u_procp->p_textp) { 163 register int xrss = u.u_procp->p_textp->x_rssize; 164 165 s += xrss; 166 u.u_ru.ru_ixrss += xrss; 167 } 168 if (s > u.u_ru.ru_maxrss) 169 u.u_ru.ru_maxrss = s; 170 } 171 172 /* 173 * We adjust the priority of the current process. 174 * The priority of a process gets worse as it accumulates 175 * CPU time. The cpu usage estimator (p_cpu) is increased here 176 * and the formula for computing priorities (in kern_synch.c) 177 * will compute a different value each time the p_cpu increases 178 * by 4. The cpu usage estimator ramps up quite quickly when 179 * the process is running (linearly), and decays away exponentially, 180 * at a rate which is proportionally slower when the system is 181 * busy. The basic principal is that the system will 90% forget 182 * that a process used a lot of CPU time in 5*loadav seconds. 183 * This causes the system to favor processes which haven't run 184 * much recently, and to round-robin among other processes. 185 */ 186 if (!noproc) { 187 p = u.u_procp; 188 p->p_cpticks++; 189 if (++p->p_cpu == 0) 190 p->p_cpu--; 191 if ((p->p_cpu&3) == 0) { 192 (void) setpri(p); 193 if (p->p_pri >= PUSER) 194 p->p_pri = p->p_usrpri; 195 } 196 } 197 198 /* 199 * If the alternate clock has not made itself known then 200 * we must gather the statistics. 201 */ 202 if (phz == 0) 203 gatherstats(pc, ps); 204 205 /* 206 * Increment the time-of-day, and schedule 207 * processing of the callouts at a very low cpu priority, 208 * so we don't keep the relatively high clock interrupt 209 * priority any longer than necessary. 210 */ 211 if (adjtimedelta == 0) 212 BUMPTIME(&time, tick) 213 else { 214 register delta; 215 216 if (adjtimedelta < 0) { 217 delta = tick - tickadj; 218 adjtimedelta += tickadj; 219 } else { 220 delta = tick + tickadj; 221 adjtimedelta -= tickadj; 222 } 223 BUMPTIME(&time, delta); 224 } 225 if (needsoft) { 226 if (BASEPRI(ps)) { 227 /* 228 * Save the overhead of a software interrupt; 229 * it will happen as soon as we return, so do it now. 230 */ 231 (void) splsoftclock(); 232 softclock(pc, ps); 233 } else 234 setsoftclock(); 235 } 236 } 237 238 int dk_ndrive = DK_NDRIVE; 239 /* 240 * Gather statistics on resource utilization. 241 * 242 * We make a gross assumption: that the system has been in the 243 * state it is in (user state, kernel state, interrupt state, 244 * or idle state) for the entire last time interval, and 245 * update statistics accordingly. 246 */ 247 /*ARGSUSED*/ 248 gatherstats(pc, ps) 249 caddr_t pc; 250 int ps; 251 { 252 int cpstate, s; 253 254 /* 255 * Determine what state the cpu is in. 256 */ 257 if (USERMODE(ps)) { 258 /* 259 * CPU was in user state. 260 */ 261 if (u.u_procp->p_nice > NZERO) 262 cpstate = CP_NICE; 263 else 264 cpstate = CP_USER; 265 } else { 266 /* 267 * CPU was in system state. If profiling kernel 268 * increment a counter. If no process is running 269 * then this is a system tick if we were running 270 * at a non-zero IPL (in a driver). If a process is running, 271 * then we charge it with system time even if we were 272 * at a non-zero IPL, since the system often runs 273 * this way during processing of system calls. 274 * This is approximate, but the lack of true interval 275 * timers makes doing anything else difficult. 276 */ 277 cpstate = CP_SYS; 278 if (noproc && BASEPRI(ps)) 279 cpstate = CP_IDLE; 280 #ifdef GPROF 281 s = pc - s_lowpc; 282 if (profiling < 2 && s < s_textsize) 283 kcount[s / (HISTFRACTION * sizeof (*kcount))]++; 284 #endif 285 } 286 /* 287 * We maintain statistics shown by user-level statistics 288 * programs: the amount of time in each cpu state, and 289 * the amount of time each of DK_NDRIVE ``drives'' is busy. 290 */ 291 cp_time[cpstate]++; 292 for (s = 0; s < DK_NDRIVE; s++) 293 if (dk_busy&(1<<s)) 294 dk_time[s]++; 295 } 296 297 /* 298 * Software priority level clock interrupt. 299 * Run periodic events from timeout queue. 300 */ 301 /*ARGSUSED*/ 302 softclock(pc, ps) 303 caddr_t pc; 304 int ps; 305 { 306 307 for (;;) { 308 register struct callout *p1; 309 register caddr_t arg; 310 register int (*func)(); 311 register int a, s; 312 313 s = spl7(); 314 if ((p1 = calltodo.c_next) == 0 || p1->c_time > 0) { 315 splx(s); 316 break; 317 } 318 arg = p1->c_arg; func = p1->c_func; a = p1->c_time; 319 calltodo.c_next = p1->c_next; 320 p1->c_next = callfree; 321 callfree = p1; 322 splx(s); 323 (*func)(arg, a); 324 } 325 /* 326 * If trapped user-mode and profiling, give it 327 * a profiling tick. 328 */ 329 if (USERMODE(ps)) { 330 register struct proc *p = u.u_procp; 331 332 if (u.u_prof.pr_scale) { 333 p->p_flag |= SOWEUPC; 334 aston(); 335 } 336 /* 337 * Check to see if process has accumulated 338 * more than 10 minutes of user time. If so 339 * reduce priority to give others a chance. 340 */ 341 if (p->p_uid && p->p_nice == NZERO && 342 u.u_ru.ru_utime.tv_sec > 10 * 60) { 343 p->p_nice = NZERO+4; 344 (void) setpri(p); 345 p->p_pri = p->p_usrpri; 346 } 347 } 348 } 349 350 /* 351 * Arrange that (*fun)(arg) is called in t/hz seconds. 352 */ 353 timeout(fun, arg, t) 354 int (*fun)(); 355 caddr_t arg; 356 register int t; 357 { 358 register struct callout *p1, *p2, *pnew; 359 register int s = spl7(); 360 361 if (t <= 0) 362 t = 1; 363 pnew = callfree; 364 if (pnew == NULL) 365 panic("timeout table overflow"); 366 callfree = pnew->c_next; 367 pnew->c_arg = arg; 368 pnew->c_func = fun; 369 for (p1 = &calltodo; (p2 = p1->c_next) && p2->c_time < t; p1 = p2) 370 if (p2->c_time > 0) 371 t -= p2->c_time; 372 p1->c_next = pnew; 373 pnew->c_next = p2; 374 pnew->c_time = t; 375 if (p2) 376 p2->c_time -= t; 377 splx(s); 378 } 379 380 /* 381 * untimeout is called to remove a function timeout call 382 * from the callout structure. 383 */ 384 untimeout(fun, arg) 385 int (*fun)(); 386 caddr_t arg; 387 { 388 register struct callout *p1, *p2; 389 register int s; 390 391 s = spl7(); 392 for (p1 = &calltodo; (p2 = p1->c_next) != 0; p1 = p2) { 393 if (p2->c_func == fun && p2->c_arg == arg) { 394 if (p2->c_next && p2->c_time > 0) 395 p2->c_next->c_time += p2->c_time; 396 p1->c_next = p2->c_next; 397 p2->c_next = callfree; 398 callfree = p2; 399 break; 400 } 401 } 402 splx(s); 403 } 404 405 /* 406 * Compute number of hz until specified time. 407 * Used to compute third argument to timeout() from an 408 * absolute time. 409 */ 410 hzto(tv) 411 struct timeval *tv; 412 { 413 register long ticks; 414 register long sec; 415 int s = spl7(); 416 417 /* 418 * If number of milliseconds will fit in 32 bit arithmetic, 419 * then compute number of milliseconds to time and scale to 420 * ticks. Otherwise just compute number of hz in time, rounding 421 * times greater than representible to maximum value. 422 * 423 * Delta times less than 25 days can be computed ``exactly''. 424 * Maximum value for any timeout in 10ms ticks is 250 days. 425 */ 426 sec = tv->tv_sec - time.tv_sec; 427 if (sec <= 0x7fffffff / 1000 - 1000) 428 ticks = ((tv->tv_sec - time.tv_sec) * 1000 + 429 (tv->tv_usec - time.tv_usec) / 1000) / (tick / 1000); 430 else if (sec <= 0x7fffffff / hz) 431 ticks = sec * hz; 432 else 433 ticks = 0x7fffffff; 434 splx(s); 435 return (ticks); 436 } 437 438 profil() 439 { 440 register struct a { 441 short *bufbase; 442 unsigned bufsize; 443 unsigned pcoffset; 444 unsigned pcscale; 445 } *uap = (struct a *)u.u_ap; 446 register struct uprof *upp = &u.u_prof; 447 448 upp->pr_base = uap->bufbase; 449 upp->pr_size = uap->bufsize; 450 upp->pr_off = uap->pcoffset; 451 upp->pr_scale = uap->pcscale; 452 } 453 454 opause() 455 { 456 457 for (;;) 458 sleep((caddr_t)&u, PSLEP); 459 } 460