1 /* $NetBSD: kern_clock.c,v 1.19 1994/06/29 06:32:19 cgd Exp $ */ 2 3 /*- 4 * Copyright (c) 1982, 1986, 1991, 1993 5 * The Regents of the University of California. All rights reserved. 6 * (c) UNIX System Laboratories, Inc. 7 * All or some portions of this file are derived from material licensed 8 * to the University of California by American Telephone and Telegraph 9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 10 * the permission of UNIX System Laboratories, Inc. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 3. All advertising materials mentioning features or use of this software 21 * must display the following acknowledgement: 22 * This product includes software developed by the University of 23 * California, Berkeley and its contributors. 24 * 4. Neither the name of the University nor the names of its contributors 25 * may be used to endorse or promote products derived from this software 26 * without specific prior written permission. 27 * 28 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 29 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 30 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 31 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 32 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 33 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 34 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 35 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 37 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 38 * SUCH DAMAGE. 39 * 40 * @(#)kern_clock.c 8.5 (Berkeley) 1/21/94 41 */ 42 43 #include <sys/param.h> 44 #include <sys/systm.h> 45 #include <sys/dkstat.h> 46 #include <sys/callout.h> 47 #include <sys/kernel.h> 48 #include <sys/proc.h> 49 #include <sys/resourcevar.h> 50 51 #include <machine/cpu.h> 52 53 #ifdef GPROF 54 #include <sys/gmon.h> 55 #endif 56 57 /* 58 * Clock handling routines. 59 * 60 * This code is written to operate with two timers that run independently of 61 * each other. The main clock, running hz times per second, is used to keep 62 * track of real time. The second timer handles kernel and user profiling, 63 * and does resource use estimation. If the second timer is programmable, 64 * it is randomized to avoid aliasing between the two clocks. For example, 65 * the randomization prevents an adversary from always giving up the cpu 66 * just before its quantum expires. Otherwise, it would never accumulate 67 * cpu ticks. The mean frequency of the second timer is stathz. 68 * 69 * If no second timer exists, stathz will be zero; in this case we drive 70 * profiling and statistics off the main clock. This WILL NOT be accurate; 71 * do not do it unless absolutely necessary. 72 * 73 * The statistics clock may (or may not) be run at a higher rate while 74 * profiling. This profile clock runs at profhz. We require that profhz 75 * be an integral multiple of stathz. 76 * 77 * If the statistics clock is running fast, it must be divided by the ratio 78 * profhz/stathz for statistics. (For profiling, every tick counts.) 79 */ 80 81 /* 82 * TODO: 83 * allocate more timeout table slots when table overflows. 84 */ 85 86 /* 87 * Bump a timeval by a small number of usec's. 88 */ 89 #define BUMPTIME(t, usec) { \ 90 register volatile struct timeval *tp = (t); \ 91 register long us; \ 92 \ 93 tp->tv_usec = us = tp->tv_usec + (usec); \ 94 if (us >= 1000000) { \ 95 tp->tv_usec = us - 1000000; \ 96 tp->tv_sec++; \ 97 } \ 98 } 99 100 int stathz; 101 int profhz; 102 int profprocs; 103 int ticks; 104 static int psdiv, pscnt; /* prof => stat divider */ 105 int psratio; /* ratio: prof / stat */ 106 107 volatile struct timeval time; 108 volatile struct timeval mono_time; 109 110 /* 111 * Initialize clock frequencies and start both clocks running. 112 */ 113 void 114 initclocks() 115 { 116 register int i; 117 118 /* 119 * Set divisors to 1 (normal case) and let the machine-specific 120 * code do its bit. 121 */ 122 psdiv = pscnt = 1; 123 cpu_initclocks(); 124 125 /* 126 * Compute profhz/stathz, and fix profhz if needed. 127 */ 128 i = stathz ? stathz : hz; 129 if (profhz == 0) 130 profhz = i; 131 psratio = profhz / i; 132 } 133 134 /* 135 * The real-time timer, interrupting hz times per second. 136 */ 137 void 138 hardclock(frame) 139 register struct clockframe *frame; 140 { 141 register struct callout *p1; 142 register struct proc *p; 143 register int delta, needsoft; 144 extern int tickdelta; 145 extern long timedelta; 146 147 /* 148 * Update real-time timeout queue. 149 * At front of queue are some number of events which are ``due''. 150 * The time to these is <= 0 and if negative represents the 151 * number of ticks which have passed since it was supposed to happen. 152 * The rest of the q elements (times > 0) are events yet to happen, 153 * where the time for each is given as a delta from the previous. 154 * Decrementing just the first of these serves to decrement the time 155 * to all events. 156 */ 157 needsoft = 0; 158 for (p1 = calltodo.c_next; p1 != NULL; p1 = p1->c_next) { 159 if (--p1->c_time > 0) 160 break; 161 needsoft = 1; 162 if (p1->c_time == 0) 163 break; 164 } 165 166 p = curproc; 167 if (p) { 168 register struct pstats *pstats; 169 170 /* 171 * Run current process's virtual and profile time, as needed. 172 */ 173 pstats = p->p_stats; 174 if (CLKF_USERMODE(frame) && 175 timerisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value) && 176 itimerdecr(&pstats->p_timer[ITIMER_VIRTUAL], tick) == 0) 177 psignal(p, SIGVTALRM); 178 if (timerisset(&pstats->p_timer[ITIMER_PROF].it_value) && 179 itimerdecr(&pstats->p_timer[ITIMER_PROF], tick) == 0) 180 psignal(p, SIGPROF); 181 } 182 183 /* 184 * If no separate statistics clock is available, run it from here. 185 */ 186 if (stathz == 0) 187 statclock(frame); 188 189 /* 190 * Increment the time-of-day. The increment is just ``tick'' unless 191 * we are still adjusting the clock; see adjtime(). 192 */ 193 ticks++; 194 if (timedelta == 0) 195 delta = tick; 196 else { 197 delta = tick + tickdelta; 198 timedelta -= tickdelta; 199 } 200 BUMPTIME(&time, delta); 201 BUMPTIME(&mono_time, delta); 202 203 /* 204 * Process callouts at a very low cpu priority, so we don't keep the 205 * relatively high clock interrupt priority any longer than necessary. 206 */ 207 if (needsoft) { 208 if (CLKF_BASEPRI(frame)) { 209 /* 210 * Save the overhead of a software interrupt; 211 * it will happen as soon as we return, so do it now. 212 */ 213 (void)splsoftclock(); 214 softclock(); 215 } else 216 setsoftclock(); 217 } 218 } 219 220 /* 221 * Software (low priority) clock interrupt. 222 * Run periodic events from timeout queue. 223 */ 224 /*ARGSUSED*/ 225 void 226 softclock() 227 { 228 register struct callout *c; 229 register void *arg; 230 register void (*func) __P((void *)); 231 register int s; 232 233 s = splhigh(); 234 while ((c = calltodo.c_next) != NULL && c->c_time <= 0) { 235 func = c->c_func; 236 arg = c->c_arg; 237 calltodo.c_next = c->c_next; 238 c->c_next = callfree; 239 callfree = c; 240 splx(s); 241 (*func)(arg); 242 (void) splhigh(); 243 } 244 splx(s); 245 } 246 247 /* 248 * timeout -- 249 * Execute a function after a specified length of time. 250 * 251 * untimeout -- 252 * Cancel previous timeout function call. 253 * 254 * See AT&T BCI Driver Reference Manual for specification. This 255 * implementation differs from that one in that no identification 256 * value is returned from timeout, rather, the original arguments 257 * to timeout are used to identify entries for untimeout. 258 */ 259 void 260 timeout(ftn, arg, ticks) 261 void (*ftn) __P((void *)); 262 void *arg; 263 register int ticks; 264 { 265 register struct callout *new, *p, *t; 266 register int s; 267 268 if (ticks <= 0) 269 ticks = 1; 270 271 /* Lock out the clock. */ 272 s = splhigh(); 273 274 /* Fill in the next free callout structure. */ 275 if (callfree == NULL) 276 panic("timeout table full"); 277 new = callfree; 278 callfree = new->c_next; 279 new->c_arg = arg; 280 new->c_func = ftn; 281 282 /* 283 * The time for each event is stored as a difference from the time 284 * of the previous event on the queue. Walk the queue, correcting 285 * the ticks argument for queue entries passed. Correct the ticks 286 * value for the queue entry immediately after the insertion point 287 * as well. Watch out for negative c_time values; these represent 288 * overdue events. 289 */ 290 for (p = &calltodo; 291 (t = p->c_next) != NULL && ticks > t->c_time; p = t) 292 if (t->c_time > 0) 293 ticks -= t->c_time; 294 new->c_time = ticks; 295 if (t != NULL) 296 t->c_time -= ticks; 297 298 /* Insert the new entry into the queue. */ 299 p->c_next = new; 300 new->c_next = t; 301 splx(s); 302 } 303 304 void 305 untimeout(ftn, arg) 306 void (*ftn) __P((void *)); 307 void *arg; 308 { 309 register struct callout *p, *t; 310 register int s; 311 312 s = splhigh(); 313 for (p = &calltodo; (t = p->c_next) != NULL; p = t) 314 if (t->c_func == ftn && t->c_arg == arg) { 315 /* Increment next entry's tick count. */ 316 if (t->c_next && t->c_time > 0) 317 t->c_next->c_time += t->c_time; 318 319 /* Move entry from callout queue to callfree queue. */ 320 p->c_next = t->c_next; 321 t->c_next = callfree; 322 callfree = t; 323 break; 324 } 325 splx(s); 326 } 327 328 /* 329 * Compute number of hz until specified time. Used to 330 * compute third argument to timeout() from an absolute time. 331 */ 332 int 333 hzto(tv) 334 struct timeval *tv; 335 { 336 register long ticks, sec; 337 int s; 338 339 /* 340 * If number of milliseconds will fit in 32 bit arithmetic, 341 * then compute number of milliseconds to time and scale to 342 * ticks. Otherwise just compute number of hz in time, rounding 343 * times greater than representible to maximum value. 344 * 345 * Delta times less than 25 days can be computed ``exactly''. 346 * Maximum value for any timeout in 10ms ticks is 250 days. 347 */ 348 s = splhigh(); 349 sec = tv->tv_sec - time.tv_sec; 350 if (sec <= 0x7fffffff / 1000 - 1000) 351 ticks = ((tv->tv_sec - time.tv_sec) * 1000 + 352 (tv->tv_usec - time.tv_usec) / 1000) / (tick / 1000); 353 else if (sec <= 0x7fffffff / hz) 354 ticks = sec * hz; 355 else 356 ticks = 0x7fffffff; 357 splx(s); 358 return (ticks); 359 } 360 361 /* 362 * Start profiling on a process. 363 * 364 * Kernel profiling passes proc0 which never exits and hence 365 * keeps the profile clock running constantly. 366 */ 367 void 368 startprofclock(p) 369 register struct proc *p; 370 { 371 int s; 372 373 if ((p->p_flag & P_PROFIL) == 0) { 374 p->p_flag |= P_PROFIL; 375 if (++profprocs == 1 && stathz != 0) { 376 s = splstatclock(); 377 psdiv = pscnt = psratio; 378 setstatclockrate(profhz); 379 splx(s); 380 } 381 } 382 } 383 384 /* 385 * Stop profiling on a process. 386 */ 387 void 388 stopprofclock(p) 389 register struct proc *p; 390 { 391 int s; 392 393 if (p->p_flag & P_PROFIL) { 394 p->p_flag &= ~P_PROFIL; 395 if (--profprocs == 0 && stathz != 0) { 396 s = splstatclock(); 397 psdiv = pscnt = 1; 398 setstatclockrate(stathz); 399 splx(s); 400 } 401 } 402 } 403 404 int dk_ndrive = DK_NDRIVE; 405 406 /* 407 * Statistics clock. Grab profile sample, and if divider reaches 0, 408 * do process and kernel statistics. 409 */ 410 void 411 statclock(frame) 412 register struct clockframe *frame; 413 { 414 #ifdef GPROF 415 register struct gmonparam *g; 416 #endif 417 register struct proc *p; 418 register int i; 419 420 if (CLKF_USERMODE(frame)) { 421 p = curproc; 422 if (p->p_flag & P_PROFIL) 423 addupc_intr(p, CLKF_PC(frame), 1); 424 if (--pscnt > 0) 425 return; 426 /* 427 * Came from user mode; CPU was in user state. 428 * If this process is being profiled record the tick. 429 */ 430 p->p_uticks++; 431 if (p->p_nice > NZERO) 432 cp_time[CP_NICE]++; 433 else 434 cp_time[CP_USER]++; 435 } else { 436 #ifdef GPROF 437 /* 438 * Kernel statistics are just like addupc_intr, only easier. 439 */ 440 g = &_gmonparam; 441 if (g->state == GMON_PROF_ON) { 442 i = CLKF_PC(frame) - g->lowpc; 443 if (i < g->textsize) { 444 i /= HISTFRACTION * sizeof(*g->kcount); 445 g->kcount[i]++; 446 } 447 } 448 #endif 449 if (--pscnt > 0) 450 return; 451 /* 452 * Came from kernel mode, so we were: 453 * - handling an interrupt, 454 * - doing syscall or trap work on behalf of the current 455 * user process, or 456 * - spinning in the idle loop. 457 * Whichever it is, charge the time as appropriate. 458 * Note that we charge interrupts to the current process, 459 * regardless of whether they are ``for'' that process, 460 * so that we know how much of its real time was spent 461 * in ``non-process'' (i.e., interrupt) work. 462 */ 463 p = curproc; 464 if (CLKF_INTR(frame)) { 465 if (p != NULL) 466 p->p_iticks++; 467 cp_time[CP_INTR]++; 468 } else if (p != NULL) { 469 p->p_sticks++; 470 cp_time[CP_SYS]++; 471 } else 472 cp_time[CP_IDLE]++; 473 } 474 pscnt = psdiv; 475 476 /* 477 * We maintain statistics shown by user-level statistics 478 * programs: the amount of time in each cpu state, and 479 * the amount of time each of DK_NDRIVE ``drives'' is busy. 480 * 481 * XXX should either run linked list of drives, or (better) 482 * grab timestamps in the start & done code. 483 */ 484 for (i = 0; i < DK_NDRIVE; i++) 485 if (dk_busy & (1 << i)) 486 dk_time[i]++; 487 488 /* 489 * We adjust the priority of the current process. The priority of 490 * a process gets worse as it accumulates CPU time. The cpu usage 491 * estimator (p_estcpu) is increased here. The formula for computing 492 * priorities (in kern_synch.c) will compute a different value each 493 * time p_estcpu increases by 4. The cpu usage estimator ramps up 494 * quite quickly when the process is running (linearly), and decays 495 * away exponentially, at a rate which is proportionally slower when 496 * the system is busy. The basic principal is that the system will 497 * 90% forget that the process used a lot of CPU time in 5 * loadav 498 * seconds. This causes the system to favor processes which haven't 499 * run much recently, and to round-robin among other processes. 500 */ 501 if (p != NULL) { 502 p->p_cpticks++; 503 if (++p->p_estcpu == 0) 504 p->p_estcpu--; 505 if ((p->p_estcpu & 3) == 0) { 506 resetpriority(p); 507 if (p->p_priority >= PUSER) 508 p->p_priority = p->p_usrpri; 509 } 510 } 511 } 512 513 /* 514 * Return information about system clocks. 515 */ 516 sysctl_clockrate(where, sizep) 517 register char *where; 518 size_t *sizep; 519 { 520 struct clockinfo clkinfo; 521 522 /* 523 * Construct clockinfo structure. 524 */ 525 clkinfo.hz = hz; 526 clkinfo.tick = tick; 527 clkinfo.profhz = profhz; 528 clkinfo.stathz = stathz ? stathz : hz; 529 return (sysctl_rdstruct(where, sizep, NULL, &clkinfo, sizeof(clkinfo))); 530 } 531 532 #ifdef DDB 533 #include <ddb/db_access.h> 534 #include <ddb/db_sym.h> 535 536 void db_show_callout(long addr, int haddr, int count, char *modif) 537 { 538 register struct callout *p1; 539 register int cum; 540 register int s; 541 db_expr_t offset; 542 char *name; 543 544 db_printf(" cum ticks arg func\n"); 545 s = splhigh(); 546 for (cum = 0, p1 = calltodo.c_next; p1; p1 = p1->c_next) { 547 register int t = p1->c_time; 548 549 if (t > 0) 550 cum += t; 551 552 db_find_sym_and_offset(p1->c_func, &name, &offset); 553 if (name == NULL) 554 name = "?"; 555 556 db_printf("%9d %9d %8x %s (%x)\n", 557 cum, t, p1->c_arg, name, p1->c_func); 558 } 559 splx(s); 560 } 561 #endif 562