1 /* $NetBSD: kern_clock.c,v 1.22 1995/03/03 01:24:03 cgd Exp $ */ 2 3 /*- 4 * Copyright (c) 1982, 1986, 1991, 1993 5 * The Regents of the University of California. All rights reserved. 6 * (c) UNIX System Laboratories, Inc. 7 * All or some portions of this file are derived from material licensed 8 * to the University of California by American Telephone and Telegraph 9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 10 * the permission of UNIX System Laboratories, Inc. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 3. All advertising materials mentioning features or use of this software 21 * must display the following acknowledgement: 22 * This product includes software developed by the University of 23 * California, Berkeley and its contributors. 24 * 4. Neither the name of the University nor the names of its contributors 25 * may be used to endorse or promote products derived from this software 26 * without specific prior written permission. 27 * 28 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 29 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 30 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 31 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 32 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 33 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 34 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 35 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 37 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 38 * SUCH DAMAGE. 39 * 40 * @(#)kern_clock.c 8.5 (Berkeley) 1/21/94 41 */ 42 43 #include <sys/param.h> 44 #include <sys/systm.h> 45 #include <sys/dkstat.h> 46 #include <sys/callout.h> 47 #include <sys/kernel.h> 48 #include <sys/proc.h> 49 #include <sys/resourcevar.h> 50 51 #include <machine/cpu.h> 52 53 #ifdef GPROF 54 #include <sys/gmon.h> 55 #endif 56 57 /* 58 * Clock handling routines. 59 * 60 * This code is written to operate with two timers that run independently of 61 * each other. The main clock, running hz times per second, is used to keep 62 * track of real time. The second timer handles kernel and user profiling, 63 * and does resource use estimation. If the second timer is programmable, 64 * it is randomized to avoid aliasing between the two clocks. For example, 65 * the randomization prevents an adversary from always giving up the cpu 66 * just before its quantum expires. Otherwise, it would never accumulate 67 * cpu ticks. The mean frequency of the second timer is stathz. 68 * 69 * If no second timer exists, stathz will be zero; in this case we drive 70 * profiling and statistics off the main clock. This WILL NOT be accurate; 71 * do not do it unless absolutely necessary. 72 * 73 * The statistics clock may (or may not) be run at a higher rate while 74 * profiling. This profile clock runs at profhz. We require that profhz 75 * be an integral multiple of stathz. 76 * 77 * If the statistics clock is running fast, it must be divided by the ratio 78 * profhz/stathz for statistics. (For profiling, every tick counts.) 79 */ 80 81 /* 82 * TODO: 83 * allocate more timeout table slots when table overflows. 84 */ 85 86 /* 87 * Bump a timeval by a small number of usec's. 88 */ 89 #define BUMPTIME(t, usec) { \ 90 register volatile struct timeval *tp = (t); \ 91 register long us; \ 92 \ 93 tp->tv_usec = us = tp->tv_usec + (usec); \ 94 if (us >= 1000000) { \ 95 tp->tv_usec = us - 1000000; \ 96 tp->tv_sec++; \ 97 } \ 98 } 99 100 int stathz; 101 int profhz; 102 int profprocs; 103 int ticks; 104 static int psdiv, pscnt; /* prof => stat divider */ 105 int psratio; /* ratio: prof / stat */ 106 int tickfix, tickfixinterval; /* used if tick not really integral */ 107 static int tickfixcnt; /* number of ticks since last fix */ 108 109 volatile struct timeval time; 110 volatile struct timeval mono_time; 111 112 /* 113 * Initialize clock frequencies and start both clocks running. 114 */ 115 void 116 initclocks() 117 { 118 register int i; 119 120 /* 121 * Set divisors to 1 (normal case) and let the machine-specific 122 * code do its bit. 123 */ 124 psdiv = pscnt = 1; 125 cpu_initclocks(); 126 127 /* 128 * Compute profhz/stathz, and fix profhz if needed. 129 */ 130 i = stathz ? stathz : hz; 131 if (profhz == 0) 132 profhz = i; 133 psratio = profhz / i; 134 } 135 136 /* 137 * The real-time timer, interrupting hz times per second. 138 */ 139 void 140 hardclock(frame) 141 register struct clockframe *frame; 142 { 143 register struct callout *p1; 144 register struct proc *p; 145 register int delta, needsoft; 146 extern int tickdelta; 147 extern long timedelta; 148 149 /* 150 * Update real-time timeout queue. 151 * At front of queue are some number of events which are ``due''. 152 * The time to these is <= 0 and if negative represents the 153 * number of ticks which have passed since it was supposed to happen. 154 * The rest of the q elements (times > 0) are events yet to happen, 155 * where the time for each is given as a delta from the previous. 156 * Decrementing just the first of these serves to decrement the time 157 * to all events. 158 */ 159 needsoft = 0; 160 for (p1 = calltodo.c_next; p1 != NULL; p1 = p1->c_next) { 161 if (--p1->c_time > 0) 162 break; 163 needsoft = 1; 164 if (p1->c_time == 0) 165 break; 166 } 167 168 p = curproc; 169 if (p) { 170 register struct pstats *pstats; 171 172 /* 173 * Run current process's virtual and profile time, as needed. 174 */ 175 pstats = p->p_stats; 176 if (CLKF_USERMODE(frame) && 177 timerisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value) && 178 itimerdecr(&pstats->p_timer[ITIMER_VIRTUAL], tick) == 0) 179 psignal(p, SIGVTALRM); 180 if (timerisset(&pstats->p_timer[ITIMER_PROF].it_value) && 181 itimerdecr(&pstats->p_timer[ITIMER_PROF], tick) == 0) 182 psignal(p, SIGPROF); 183 } 184 185 /* 186 * If no separate statistics clock is available, run it from here. 187 */ 188 if (stathz == 0) 189 statclock(frame); 190 191 /* 192 * Increment the time-of-day. The increment is normally just 193 * ``tick''. If the machine is one which has a clock frequency 194 * such that ``hz'' would not divide the second evenly into 195 * milliseconds, a periodic adjustment must be applied. Finally, 196 * if we are still adjusting the time (see adjtime()), 197 * ``tickdelta'' may also be added in. 198 */ 199 ticks++; 200 delta = tick; 201 if (tickfix) { 202 tickfixcnt++; 203 if (tickfixcnt > tickfixinterval) { 204 delta += tickfix; 205 tickfixcnt = 0; 206 } 207 } 208 if (timedelta != 0) { 209 delta = tick + tickdelta; 210 timedelta -= tickdelta; 211 } 212 BUMPTIME(&time, delta); 213 BUMPTIME(&mono_time, delta); 214 215 /* 216 * Process callouts at a very low cpu priority, so we don't keep the 217 * relatively high clock interrupt priority any longer than necessary. 218 */ 219 if (needsoft) { 220 if (CLKF_BASEPRI(frame)) { 221 /* 222 * Save the overhead of a software interrupt; 223 * it will happen as soon as we return, so do it now. 224 */ 225 (void)splsoftclock(); 226 softclock(); 227 } else 228 setsoftclock(); 229 } 230 } 231 232 /* 233 * Software (low priority) clock interrupt. 234 * Run periodic events from timeout queue. 235 */ 236 /*ARGSUSED*/ 237 void 238 softclock() 239 { 240 register struct callout *c; 241 register void *arg; 242 register void (*func) __P((void *)); 243 register int s; 244 245 s = splhigh(); 246 while ((c = calltodo.c_next) != NULL && c->c_time <= 0) { 247 func = c->c_func; 248 arg = c->c_arg; 249 calltodo.c_next = c->c_next; 250 c->c_next = callfree; 251 callfree = c; 252 splx(s); 253 (*func)(arg); 254 (void) splhigh(); 255 } 256 splx(s); 257 } 258 259 /* 260 * timeout -- 261 * Execute a function after a specified length of time. 262 * 263 * untimeout -- 264 * Cancel previous timeout function call. 265 * 266 * See AT&T BCI Driver Reference Manual for specification. This 267 * implementation differs from that one in that no identification 268 * value is returned from timeout, rather, the original arguments 269 * to timeout are used to identify entries for untimeout. 270 */ 271 void 272 timeout(ftn, arg, ticks) 273 void (*ftn) __P((void *)); 274 void *arg; 275 register int ticks; 276 { 277 register struct callout *new, *p, *t; 278 register int s; 279 280 if (ticks <= 0) 281 ticks = 1; 282 283 /* Lock out the clock. */ 284 s = splhigh(); 285 286 /* Fill in the next free callout structure. */ 287 if (callfree == NULL) 288 panic("timeout table full"); 289 new = callfree; 290 callfree = new->c_next; 291 new->c_arg = arg; 292 new->c_func = ftn; 293 294 /* 295 * The time for each event is stored as a difference from the time 296 * of the previous event on the queue. Walk the queue, correcting 297 * the ticks argument for queue entries passed. Correct the ticks 298 * value for the queue entry immediately after the insertion point 299 * as well. Watch out for negative c_time values; these represent 300 * overdue events. 301 */ 302 for (p = &calltodo; 303 (t = p->c_next) != NULL && ticks > t->c_time; p = t) 304 if (t->c_time > 0) 305 ticks -= t->c_time; 306 new->c_time = ticks; 307 if (t != NULL) 308 t->c_time -= ticks; 309 310 /* Insert the new entry into the queue. */ 311 p->c_next = new; 312 new->c_next = t; 313 splx(s); 314 } 315 316 void 317 untimeout(ftn, arg) 318 void (*ftn) __P((void *)); 319 void *arg; 320 { 321 register struct callout *p, *t; 322 register int s; 323 324 s = splhigh(); 325 for (p = &calltodo; (t = p->c_next) != NULL; p = t) 326 if (t->c_func == ftn && t->c_arg == arg) { 327 /* Increment next entry's tick count. */ 328 if (t->c_next && t->c_time > 0) 329 t->c_next->c_time += t->c_time; 330 331 /* Move entry from callout queue to callfree queue. */ 332 p->c_next = t->c_next; 333 t->c_next = callfree; 334 callfree = t; 335 break; 336 } 337 splx(s); 338 } 339 340 /* 341 * Compute number of hz until specified time. Used to 342 * compute third argument to timeout() from an absolute time. 343 */ 344 int 345 hzto(tv) 346 struct timeval *tv; 347 { 348 register long ticks, sec; 349 int s; 350 351 /* 352 * If number of microseconds will fit in 32 bit arithmetic, 353 * then compute number of microseconds to time and scale to 354 * ticks. Otherwise just compute number of hz in time, rounding 355 * times greater than representible to maximum value. (We must 356 * compute in microseconds, because hz can be greater than 1000, 357 * and thus tick can be less than one millisecond). 358 * 359 * Delta times less than 14 hours can be computed ``exactly''. 360 * (Note that if hz would yeild a non-integral number of us per 361 * tick, i.e. tickfix is nonzero, timouts can be a tick longer 362 * than they should be.) Maximum value for any timeout in 10ms 363 * ticks is 250 days. 364 */ 365 s = splhigh(); 366 sec = tv->tv_sec - time.tv_sec; 367 if (sec <= 0x7fffffff / 1000000 - 1) 368 ticks = ((tv->tv_sec - time.tv_sec) * 1000000 + 369 (tv->tv_usec - time.tv_usec)) / tick; 370 else if (sec <= 0x7fffffff / hz) 371 ticks = sec * hz; 372 else 373 ticks = 0x7fffffff; 374 splx(s); 375 return (ticks); 376 } 377 378 /* 379 * Start profiling on a process. 380 * 381 * Kernel profiling passes proc0 which never exits and hence 382 * keeps the profile clock running constantly. 383 */ 384 void 385 startprofclock(p) 386 register struct proc *p; 387 { 388 int s; 389 390 if ((p->p_flag & P_PROFIL) == 0) { 391 p->p_flag |= P_PROFIL; 392 if (++profprocs == 1 && stathz != 0) { 393 s = splstatclock(); 394 psdiv = pscnt = psratio; 395 setstatclockrate(profhz); 396 splx(s); 397 } 398 } 399 } 400 401 /* 402 * Stop profiling on a process. 403 */ 404 void 405 stopprofclock(p) 406 register struct proc *p; 407 { 408 int s; 409 410 if (p->p_flag & P_PROFIL) { 411 p->p_flag &= ~P_PROFIL; 412 if (--profprocs == 0 && stathz != 0) { 413 s = splstatclock(); 414 psdiv = pscnt = 1; 415 setstatclockrate(stathz); 416 splx(s); 417 } 418 } 419 } 420 421 int dk_ndrive = DK_NDRIVE; 422 423 /* 424 * Statistics clock. Grab profile sample, and if divider reaches 0, 425 * do process and kernel statistics. 426 */ 427 void 428 statclock(frame) 429 register struct clockframe *frame; 430 { 431 #ifdef GPROF 432 register struct gmonparam *g; 433 #endif 434 register struct proc *p; 435 register int i; 436 437 if (CLKF_USERMODE(frame)) { 438 p = curproc; 439 if (p->p_flag & P_PROFIL) 440 addupc_intr(p, CLKF_PC(frame), 1); 441 if (--pscnt > 0) 442 return; 443 /* 444 * Came from user mode; CPU was in user state. 445 * If this process is being profiled record the tick. 446 */ 447 p->p_uticks++; 448 if (p->p_nice > NZERO) 449 cp_time[CP_NICE]++; 450 else 451 cp_time[CP_USER]++; 452 } else { 453 #ifdef GPROF 454 /* 455 * Kernel statistics are just like addupc_intr, only easier. 456 */ 457 g = &_gmonparam; 458 if (g->state == GMON_PROF_ON) { 459 i = CLKF_PC(frame) - g->lowpc; 460 if (i < g->textsize) { 461 i /= HISTFRACTION * sizeof(*g->kcount); 462 g->kcount[i]++; 463 } 464 } 465 #endif 466 if (--pscnt > 0) 467 return; 468 /* 469 * Came from kernel mode, so we were: 470 * - handling an interrupt, 471 * - doing syscall or trap work on behalf of the current 472 * user process, or 473 * - spinning in the idle loop. 474 * Whichever it is, charge the time as appropriate. 475 * Note that we charge interrupts to the current process, 476 * regardless of whether they are ``for'' that process, 477 * so that we know how much of its real time was spent 478 * in ``non-process'' (i.e., interrupt) work. 479 */ 480 p = curproc; 481 if (CLKF_INTR(frame)) { 482 if (p != NULL) 483 p->p_iticks++; 484 cp_time[CP_INTR]++; 485 } else if (p != NULL) { 486 p->p_sticks++; 487 cp_time[CP_SYS]++; 488 } else 489 cp_time[CP_IDLE]++; 490 } 491 pscnt = psdiv; 492 493 /* 494 * We maintain statistics shown by user-level statistics 495 * programs: the amount of time in each cpu state, and 496 * the amount of time each of DK_NDRIVE ``drives'' is busy. 497 * 498 * XXX should either run linked list of drives, or (better) 499 * grab timestamps in the start & done code. 500 */ 501 for (i = 0; i < DK_NDRIVE; i++) 502 if (dk_busy & (1 << i)) 503 dk_time[i]++; 504 505 /* 506 * We adjust the priority of the current process. The priority of 507 * a process gets worse as it accumulates CPU time. The cpu usage 508 * estimator (p_estcpu) is increased here. The formula for computing 509 * priorities (in kern_synch.c) will compute a different value each 510 * time p_estcpu increases by 4. The cpu usage estimator ramps up 511 * quite quickly when the process is running (linearly), and decays 512 * away exponentially, at a rate which is proportionally slower when 513 * the system is busy. The basic principal is that the system will 514 * 90% forget that the process used a lot of CPU time in 5 * loadav 515 * seconds. This causes the system to favor processes which haven't 516 * run much recently, and to round-robin among other processes. 517 */ 518 if (p != NULL) { 519 p->p_cpticks++; 520 if (++p->p_estcpu == 0) 521 p->p_estcpu--; 522 if ((p->p_estcpu & 3) == 0) { 523 resetpriority(p); 524 if (p->p_priority >= PUSER) 525 p->p_priority = p->p_usrpri; 526 } 527 } 528 } 529 530 /* 531 * Return information about system clocks. 532 */ 533 sysctl_clockrate(where, sizep) 534 register char *where; 535 size_t *sizep; 536 { 537 struct clockinfo clkinfo; 538 539 /* 540 * Construct clockinfo structure. 541 */ 542 clkinfo.tick = tick; 543 clkinfo.tickadj = tickadj; 544 clkinfo.hz = hz; 545 clkinfo.profhz = profhz; 546 clkinfo.stathz = stathz ? stathz : hz; 547 return (sysctl_rdstruct(where, sizep, NULL, &clkinfo, sizeof(clkinfo))); 548 } 549 550 #ifdef DDB 551 #include <machine/db_machdep.h> 552 553 #include <ddb/db_access.h> 554 #include <ddb/db_sym.h> 555 556 void db_show_callout(long addr, int haddr, int count, char *modif) 557 { 558 register struct callout *p1; 559 register int cum; 560 register int s; 561 db_expr_t offset; 562 char *name; 563 564 db_printf(" cum ticks arg func\n"); 565 s = splhigh(); 566 for (cum = 0, p1 = calltodo.c_next; p1; p1 = p1->c_next) { 567 register int t = p1->c_time; 568 569 if (t > 0) 570 cum += t; 571 572 db_find_sym_and_offset((db_addr_t)p1->c_func, &name, &offset); 573 if (name == NULL) 574 name = "?"; 575 576 db_printf("%9d %9d %8x %s (%x)\n", 577 cum, t, p1->c_arg, name, p1->c_func); 578 } 579 splx(s); 580 } 581 #endif 582