1 /* $OpenBSD: kern_clock.c,v 1.81 2013/04/24 17:29:02 matthew Exp $ */ 2 /* $NetBSD: kern_clock.c,v 1.34 1996/06/09 04:51:03 briggs Exp $ */ 3 4 /*- 5 * Copyright (c) 1982, 1986, 1991, 1993 6 * The Regents of the University of California. All rights reserved. 7 * (c) UNIX System Laboratories, Inc. 8 * All or some portions of this file are derived from material licensed 9 * to the University of California by American Telephone and Telegraph 10 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 11 * the permission of UNIX System Laboratories, Inc. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 3. Neither the name of the University nor the names of its contributors 22 * may be used to endorse or promote products derived from this software 23 * without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 * 37 * @(#)kern_clock.c 8.5 (Berkeley) 1/21/94 38 */ 39 40 #include <sys/param.h> 41 #include <sys/systm.h> 42 #include <sys/dkstat.h> 43 #include <sys/timeout.h> 44 #include <sys/kernel.h> 45 #include <sys/limits.h> 46 #include <sys/proc.h> 47 #include <sys/user.h> 48 #include <sys/resourcevar.h> 49 #include <sys/signalvar.h> 50 #include <uvm/uvm_extern.h> 51 #include <sys/sysctl.h> 52 #include <sys/sched.h> 53 #include <sys/timetc.h> 54 55 56 #ifdef GPROF 57 #include <sys/gmon.h> 58 #endif 59 60 /* 61 * Clock handling routines. 62 * 63 * This code is written to operate with two timers that run independently of 64 * each other. The main clock, running hz times per second, is used to keep 65 * track of real time. The second timer handles kernel and user profiling, 66 * and does resource use estimation. If the second timer is programmable, 67 * it is randomized to avoid aliasing between the two clocks. For example, 68 * the randomization prevents an adversary from always giving up the cpu 69 * just before its quantum expires. Otherwise, it would never accumulate 70 * cpu ticks. The mean frequency of the second timer is stathz. 71 * 72 * If no second timer exists, stathz will be zero; in this case we drive 73 * profiling and statistics off the main clock. This WILL NOT be accurate; 74 * do not do it unless absolutely necessary. 75 * 76 * The statistics clock may (or may not) be run at a higher rate while 77 * profiling. This profile clock runs at profhz. We require that profhz 78 * be an integral multiple of stathz. 79 * 80 * If the statistics clock is running fast, it must be divided by the ratio 81 * profhz/stathz for statistics. (For profiling, every tick counts.) 82 */ 83 84 /* 85 * Bump a timeval by a small number of usec's. 86 */ 87 #define BUMPTIME(t, usec) { \ 88 volatile struct timeval *tp = (t); \ 89 long us; \ 90 \ 91 tp->tv_usec = us = tp->tv_usec + (usec); \ 92 if (us >= 1000000) { \ 93 tp->tv_usec = us - 1000000; \ 94 tp->tv_sec++; \ 95 } \ 96 } 97 98 int stathz; 99 int schedhz; 100 int profhz; 101 int profprocs; 102 int ticks; 103 static int psdiv, pscnt; /* prof => stat divider */ 104 int psratio; /* ratio: prof / stat */ 105 106 long cp_time[CPUSTATES]; 107 108 void *softclock_si; 109 110 /* 111 * Initialize clock frequencies and start both clocks running. 112 */ 113 void 114 initclocks(void) 115 { 116 int i; 117 118 softclock_si = softintr_establish(IPL_SOFTCLOCK, softclock, NULL); 119 if (softclock_si == NULL) 120 panic("initclocks: unable to register softclock intr"); 121 122 /* 123 * Set divisors to 1 (normal case) and let the machine-specific 124 * code do its bit. 125 */ 126 psdiv = pscnt = 1; 127 cpu_initclocks(); 128 129 /* 130 * Compute profhz/stathz, and fix profhz if needed. 131 */ 132 i = stathz ? stathz : hz; 133 if (profhz == 0) 134 profhz = i; 135 psratio = profhz / i; 136 137 /* For very large HZ, ensure that division by 0 does not occur later */ 138 if (tickadj == 0) 139 tickadj = 1; 140 141 inittimecounter(); 142 } 143 144 /* 145 * hardclock does the accounting needed for ITIMER_PROF and ITIMER_VIRTUAL. 146 * We don't want to send signals with psignal from hardclock because it makes 147 * MULTIPROCESSOR locking very complicated. Instead we use a small trick 148 * to send the signals safely and without blocking too many interrupts 149 * while doing that (signal handling can be heavy). 150 * 151 * hardclock detects that the itimer has expired, and schedules a timeout 152 * to deliver the signal. This works because of the following reasons: 153 * - The timeout can be scheduled with a 1 tick time because we're 154 * doing it before the timeout processing in hardclock. So it will 155 * be scheduled to run as soon as possible. 156 * - The timeout will be run in softclock which will run before we 157 * return to userland and process pending signals. 158 * - If the system is so busy that several VIRTUAL/PROF ticks are 159 * sent before softclock processing, we'll send only one signal. 160 * But if we'd send the signal from hardclock only one signal would 161 * be delivered to the user process. So userland will only see one 162 * signal anyway. 163 */ 164 165 void 166 virttimer_trampoline(void *v) 167 { 168 struct process *pr = v; 169 170 psignal(pr->ps_mainproc, SIGVTALRM); 171 } 172 173 void 174 proftimer_trampoline(void *v) 175 { 176 struct process *pr = v; 177 178 psignal(pr->ps_mainproc, SIGPROF); 179 } 180 181 /* 182 * The real-time timer, interrupting hz times per second. 183 */ 184 void 185 hardclock(struct clockframe *frame) 186 { 187 struct proc *p; 188 struct cpu_info *ci = curcpu(); 189 190 p = curproc; 191 if (p && ((p->p_flag & (P_SYSTEM | P_WEXIT)) == 0)) { 192 struct process *pr = p->p_p; 193 194 /* 195 * Run current process's virtual and profile time, as needed. 196 */ 197 if (CLKF_USERMODE(frame) && 198 timerisset(&pr->ps_timer[ITIMER_VIRTUAL].it_value) && 199 itimerdecr(&pr->ps_timer[ITIMER_VIRTUAL], tick) == 0) 200 timeout_add(&pr->ps_virt_to, 1); 201 if (timerisset(&pr->ps_timer[ITIMER_PROF].it_value) && 202 itimerdecr(&pr->ps_timer[ITIMER_PROF], tick) == 0) 203 timeout_add(&pr->ps_prof_to, 1); 204 } 205 206 /* 207 * If no separate statistics clock is available, run it from here. 208 */ 209 if (stathz == 0) 210 statclock(frame); 211 212 if (--ci->ci_schedstate.spc_rrticks <= 0) 213 roundrobin(ci); 214 215 /* 216 * If we are not the primary CPU, we're not allowed to do 217 * any more work. 218 */ 219 if (CPU_IS_PRIMARY(ci) == 0) 220 return; 221 222 tc_ticktock(); 223 224 /* 225 * Update real-time timeout queue. 226 * Process callouts at a very low cpu priority, so we don't keep the 227 * relatively high clock interrupt priority any longer than necessary. 228 */ 229 if (timeout_hardclock_update()) 230 softintr_schedule(softclock_si); 231 } 232 233 /* 234 * Compute number of hz until specified time. Used to 235 * compute the second argument to timeout_add() from an absolute time. 236 */ 237 int 238 hzto(const struct timeval *tv) 239 { 240 struct timeval now; 241 unsigned long ticks; 242 long sec, usec; 243 244 /* 245 * If the number of usecs in the whole seconds part of the time 246 * difference fits in a long, then the total number of usecs will 247 * fit in an unsigned long. Compute the total and convert it to 248 * ticks, rounding up and adding 1 to allow for the current tick 249 * to expire. Rounding also depends on unsigned long arithmetic 250 * to avoid overflow. 251 * 252 * Otherwise, if the number of ticks in the whole seconds part of 253 * the time difference fits in a long, then convert the parts to 254 * ticks separately and add, using similar rounding methods and 255 * overflow avoidance. This method would work in the previous 256 * case but it is slightly slower and assumes that hz is integral. 257 * 258 * Otherwise, round the time difference down to the maximum 259 * representable value. 260 * 261 * If ints have 32 bits, then the maximum value for any timeout in 262 * 10ms ticks is 248 days. 263 */ 264 getmicrotime(&now); 265 sec = tv->tv_sec - now.tv_sec; 266 usec = tv->tv_usec - now.tv_usec; 267 if (usec < 0) { 268 sec--; 269 usec += 1000000; 270 } 271 if (sec < 0 || (sec == 0 && usec <= 0)) { 272 ticks = 0; 273 } else if (sec <= LONG_MAX / 1000000) 274 ticks = (sec * 1000000 + (unsigned long)usec + (tick - 1)) 275 / tick + 1; 276 else if (sec <= LONG_MAX / hz) 277 ticks = sec * hz 278 + ((unsigned long)usec + (tick - 1)) / tick + 1; 279 else 280 ticks = LONG_MAX; 281 if (ticks > INT_MAX) 282 ticks = INT_MAX; 283 return ((int)ticks); 284 } 285 286 /* 287 * Compute number of hz in the specified amount of time. 288 */ 289 int 290 tvtohz(const struct timeval *tv) 291 { 292 unsigned long ticks; 293 long sec, usec; 294 295 /* 296 * If the number of usecs in the whole seconds part of the time 297 * fits in a long, then the total number of usecs will 298 * fit in an unsigned long. Compute the total and convert it to 299 * ticks, rounding up and adding 1 to allow for the current tick 300 * to expire. Rounding also depends on unsigned long arithmetic 301 * to avoid overflow. 302 * 303 * Otherwise, if the number of ticks in the whole seconds part of 304 * the time fits in a long, then convert the parts to 305 * ticks separately and add, using similar rounding methods and 306 * overflow avoidance. This method would work in the previous 307 * case but it is slightly slower and assumes that hz is integral. 308 * 309 * Otherwise, round the time down to the maximum 310 * representable value. 311 * 312 * If ints have 32 bits, then the maximum value for any timeout in 313 * 10ms ticks is 248 days. 314 */ 315 sec = tv->tv_sec; 316 usec = tv->tv_usec; 317 if (sec < 0 || (sec == 0 && usec <= 0)) 318 ticks = 0; 319 else if (sec <= LONG_MAX / 1000000) 320 ticks = (sec * 1000000 + (unsigned long)usec + (tick - 1)) 321 / tick + 1; 322 else if (sec <= LONG_MAX / hz) 323 ticks = sec * hz 324 + ((unsigned long)usec + (tick - 1)) / tick + 1; 325 else 326 ticks = LONG_MAX; 327 if (ticks > INT_MAX) 328 ticks = INT_MAX; 329 return ((int)ticks); 330 } 331 332 int 333 tstohz(const struct timespec *ts) 334 { 335 struct timeval tv; 336 TIMESPEC_TO_TIMEVAL(&tv, ts); 337 338 /* Round up. */ 339 if ((ts->tv_nsec % 1000) != 0) { 340 tv.tv_usec += 1; 341 if (tv.tv_usec >= 1000000) { 342 tv.tv_usec -= 1000000; 343 tv.tv_sec += 1; 344 } 345 } 346 347 return (tvtohz(&tv)); 348 } 349 350 /* 351 * Start profiling on a process. 352 * 353 * Kernel profiling passes proc0 which never exits and hence 354 * keeps the profile clock running constantly. 355 */ 356 void 357 startprofclock(struct process *pr) 358 { 359 int s; 360 361 if ((pr->ps_flags & PS_PROFIL) == 0) { 362 atomic_setbits_int(&pr->ps_flags, PS_PROFIL); 363 if (++profprocs == 1 && stathz != 0) { 364 s = splstatclock(); 365 psdiv = pscnt = psratio; 366 setstatclockrate(profhz); 367 splx(s); 368 } 369 } 370 } 371 372 /* 373 * Stop profiling on a process. 374 */ 375 void 376 stopprofclock(struct process *pr) 377 { 378 int s; 379 380 if (pr->ps_flags & PS_PROFIL) { 381 atomic_clearbits_int(&pr->ps_flags, PS_PROFIL); 382 if (--profprocs == 0 && stathz != 0) { 383 s = splstatclock(); 384 psdiv = pscnt = 1; 385 setstatclockrate(stathz); 386 splx(s); 387 } 388 } 389 } 390 391 /* 392 * Statistics clock. Grab profile sample, and if divider reaches 0, 393 * do process and kernel statistics. 394 */ 395 void 396 statclock(struct clockframe *frame) 397 { 398 #ifdef GPROF 399 struct gmonparam *g; 400 u_long i; 401 #endif 402 struct cpu_info *ci = curcpu(); 403 struct schedstate_percpu *spc = &ci->ci_schedstate; 404 struct proc *p = curproc; 405 struct process *pr; 406 407 /* 408 * Notice changes in divisor frequency, and adjust clock 409 * frequency accordingly. 410 */ 411 if (spc->spc_psdiv != psdiv) { 412 spc->spc_psdiv = psdiv; 413 spc->spc_pscnt = psdiv; 414 if (psdiv == 1) { 415 setstatclockrate(stathz); 416 } else { 417 setstatclockrate(profhz); 418 } 419 } 420 421 if (CLKF_USERMODE(frame)) { 422 pr = p->p_p; 423 if (pr->ps_flags & PS_PROFIL) 424 addupc_intr(p, CLKF_PC(frame)); 425 if (--spc->spc_pscnt > 0) 426 return; 427 /* 428 * Came from user mode; CPU was in user state. 429 * If this process is being profiled record the tick. 430 */ 431 p->p_uticks++; 432 if (pr->ps_nice > NZERO) 433 spc->spc_cp_time[CP_NICE]++; 434 else 435 spc->spc_cp_time[CP_USER]++; 436 } else { 437 #ifdef GPROF 438 /* 439 * Kernel statistics are just like addupc_intr, only easier. 440 */ 441 g = ci->ci_gmon; 442 if (g != NULL && g->state == GMON_PROF_ON) { 443 i = CLKF_PC(frame) - g->lowpc; 444 if (i < g->textsize) { 445 i /= HISTFRACTION * sizeof(*g->kcount); 446 g->kcount[i]++; 447 } 448 } 449 #endif 450 #if defined(PROC_PC) 451 if (p != NULL && p->p_p->ps_flags & PS_PROFIL) 452 addupc_intr(p, PROC_PC(p)); 453 #endif 454 if (--spc->spc_pscnt > 0) 455 return; 456 /* 457 * Came from kernel mode, so we were: 458 * - handling an interrupt, 459 * - doing syscall or trap work on behalf of the current 460 * user process, or 461 * - spinning in the idle loop. 462 * Whichever it is, charge the time as appropriate. 463 * Note that we charge interrupts to the current process, 464 * regardless of whether they are ``for'' that process, 465 * so that we know how much of its real time was spent 466 * in ``non-process'' (i.e., interrupt) work. 467 */ 468 if (CLKF_INTR(frame)) { 469 if (p != NULL) 470 p->p_iticks++; 471 spc->spc_cp_time[CP_INTR]++; 472 } else if (p != NULL && p != spc->spc_idleproc) { 473 p->p_sticks++; 474 spc->spc_cp_time[CP_SYS]++; 475 } else 476 spc->spc_cp_time[CP_IDLE]++; 477 } 478 spc->spc_pscnt = psdiv; 479 480 if (p != NULL) { 481 p->p_cpticks++; 482 /* 483 * If no schedclock is provided, call it here at ~~12-25 Hz; 484 * ~~16 Hz is best 485 */ 486 if (schedhz == 0) { 487 if ((++curcpu()->ci_schedstate.spc_schedticks & 3) == 488 0) 489 schedclock(p); 490 } 491 } 492 } 493 494 /* 495 * Return information about system clocks. 496 */ 497 int 498 sysctl_clockrate(char *where, size_t *sizep, void *newp) 499 { 500 struct clockinfo clkinfo; 501 502 /* 503 * Construct clockinfo structure. 504 */ 505 clkinfo.tick = tick; 506 clkinfo.tickadj = tickadj; 507 clkinfo.hz = hz; 508 clkinfo.profhz = profhz; 509 clkinfo.stathz = stathz ? stathz : hz; 510 return (sysctl_rdstruct(where, sizep, newp, &clkinfo, sizeof(clkinfo))); 511 } 512