1 /* $NetBSD: kern_clock.c,v 1.117 2008/01/20 18:09:11 joerg Exp $ */ 2 3 /*- 4 * Copyright (c) 2000, 2004, 2006, 2007 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 9 * NASA Ames Research Center. 10 * This code is derived from software contributed to The NetBSD Foundation 11 * by Charles M. Hannum. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 3. All advertising materials mentioning features or use of this software 22 * must display the following acknowledgement: 23 * This product includes software developed by the NetBSD 24 * Foundation, Inc. and its contributors. 25 * 4. Neither the name of The NetBSD Foundation nor the names of its 26 * contributors may be used to endorse or promote products derived 27 * from this software without specific prior written permission. 28 * 29 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 30 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 31 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 32 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 33 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 34 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 35 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 36 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 37 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 38 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 39 * POSSIBILITY OF SUCH DAMAGE. 40 */ 41 42 /*- 43 * Copyright (c) 1982, 1986, 1991, 1993 44 * The Regents of the University of California. All rights reserved. 45 * (c) UNIX System Laboratories, Inc. 46 * All or some portions of this file are derived from material licensed 47 * to the University of California by American Telephone and Telegraph 48 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 49 * the permission of UNIX System Laboratories, Inc. 50 * 51 * Redistribution and use in source and binary forms, with or without 52 * modification, are permitted provided that the following conditions 53 * are met: 54 * 1. Redistributions of source code must retain the above copyright 55 * notice, this list of conditions and the following disclaimer. 56 * 2. Redistributions in binary form must reproduce the above copyright 57 * notice, this list of conditions and the following disclaimer in the 58 * documentation and/or other materials provided with the distribution. 59 * 3. Neither the name of the University nor the names of its contributors 60 * may be used to endorse or promote products derived from this software 61 * without specific prior written permission. 62 * 63 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 64 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 65 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 66 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 67 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 68 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 69 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 70 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 71 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 72 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 73 * SUCH DAMAGE. 74 * 75 * @(#)kern_clock.c 8.5 (Berkeley) 1/21/94 76 */ 77 78 #include <sys/cdefs.h> 79 __KERNEL_RCSID(0, "$NetBSD: kern_clock.c,v 1.117 2008/01/20 18:09:11 joerg Exp $"); 80 81 #include "opt_ntp.h" 82 #include "opt_multiprocessor.h" 83 #include "opt_perfctrs.h" 84 85 #include <sys/param.h> 86 #include <sys/systm.h> 87 #include <sys/callout.h> 88 #include <sys/kernel.h> 89 #include <sys/proc.h> 90 #include <sys/resourcevar.h> 91 #include <sys/signalvar.h> 92 #include <sys/sysctl.h> 93 #include <sys/timex.h> 94 #include <sys/sched.h> 95 #include <sys/time.h> 96 #include <sys/timetc.h> 97 #include <sys/cpu.h> 98 99 #ifdef GPROF 100 #include <sys/gmon.h> 101 #endif 102 103 /* 104 * Clock handling routines. 105 * 106 * This code is written to operate with two timers that run independently of 107 * each other. The main clock, running hz times per second, is used to keep 108 * track of real time. The second timer handles kernel and user profiling, 109 * and does resource use estimation. If the second timer is programmable, 110 * it is randomized to avoid aliasing between the two clocks. For example, 111 * the randomization prevents an adversary from always giving up the CPU 112 * just before its quantum expires. Otherwise, it would never accumulate 113 * CPU ticks. The mean frequency of the second timer is stathz. 114 * 115 * If no second timer exists, stathz will be zero; in this case we drive 116 * profiling and statistics off the main clock. This WILL NOT be accurate; 117 * do not do it unless absolutely necessary. 118 * 119 * The statistics clock may (or may not) be run at a higher rate while 120 * profiling. This profile clock runs at profhz. We require that profhz 121 * be an integral multiple of stathz. 122 * 123 * If the statistics clock is running fast, it must be divided by the ratio 124 * profhz/stathz for statistics. (For profiling, every tick counts.) 125 */ 126 127 int stathz; 128 int profhz; 129 int profsrc; 130 int schedhz; 131 int profprocs; 132 int hardclock_ticks; 133 static int hardscheddiv; /* hard => sched divider (used if schedhz == 0) */ 134 static int psdiv; /* prof => stat divider */ 135 int psratio; /* ratio: prof / stat */ 136 137 static u_int get_intr_timecount(struct timecounter *); 138 139 static struct timecounter intr_timecounter = { 140 get_intr_timecount, /* get_timecount */ 141 0, /* no poll_pps */ 142 ~0u, /* counter_mask */ 143 0, /* frequency */ 144 "clockinterrupt", /* name */ 145 0, /* quality - minimum implementation level for a clock */ 146 NULL, /* prev */ 147 NULL, /* next */ 148 }; 149 150 static u_int 151 get_intr_timecount(struct timecounter *tc) 152 { 153 154 return (u_int)hardclock_ticks; 155 } 156 157 /* 158 * Initialize clock frequencies and start both clocks running. 159 */ 160 void 161 initclocks(void) 162 { 163 int i; 164 165 /* 166 * Set divisors to 1 (normal case) and let the machine-specific 167 * code do its bit. 168 */ 169 psdiv = 1; 170 /* 171 * provide minimum default time counter 172 * will only run at interrupt resolution 173 */ 174 intr_timecounter.tc_frequency = hz; 175 tc_init(&intr_timecounter); 176 cpu_initclocks(); 177 178 /* 179 * Compute profhz and stathz, fix profhz if needed. 180 */ 181 i = stathz ? stathz : hz; 182 if (profhz == 0) 183 profhz = i; 184 psratio = profhz / i; 185 if (schedhz == 0) { 186 /* 16Hz is best */ 187 hardscheddiv = hz / 16; 188 if (hardscheddiv <= 0) 189 panic("hardscheddiv"); 190 } 191 192 } 193 194 /* 195 * The real-time timer, interrupting hz times per second. 196 */ 197 void 198 hardclock(struct clockframe *frame) 199 { 200 struct lwp *l; 201 struct proc *p; 202 struct cpu_info *ci = curcpu(); 203 struct ptimer *pt; 204 205 l = ci->ci_data.cpu_onproc; 206 if (!CURCPU_IDLE_P()) { 207 p = l->l_proc; 208 /* 209 * Run current process's virtual and profile time, as needed. 210 */ 211 if (CLKF_USERMODE(frame) && p->p_timers && 212 (pt = LIST_FIRST(&p->p_timers->pts_virtual)) != NULL) 213 if (itimerdecr(pt, tick) == 0) 214 itimerfire(pt); 215 if (p->p_timers && 216 (pt = LIST_FIRST(&p->p_timers->pts_prof)) != NULL) 217 if (itimerdecr(pt, tick) == 0) 218 itimerfire(pt); 219 } 220 221 /* 222 * If no separate statistics clock is available, run it from here. 223 */ 224 if (stathz == 0) 225 statclock(frame); 226 /* 227 * If no separate schedclock is provided, call it here 228 * at about 16 Hz. 229 */ 230 if (schedhz == 0) { 231 if ((int)(--ci->ci_schedstate.spc_schedticks) <= 0) { 232 schedclock(l); 233 ci->ci_schedstate.spc_schedticks = hardscheddiv; 234 } 235 } 236 if ((--ci->ci_schedstate.spc_ticks) <= 0) 237 sched_tick(ci); 238 239 #if defined(MULTIPROCESSOR) 240 /* 241 * If we are not the primary CPU, we're not allowed to do 242 * any more work. 243 */ 244 if (CPU_IS_PRIMARY(ci) == 0) 245 return; 246 #endif 247 248 hardclock_ticks++; 249 250 tc_ticktock(); 251 252 /* 253 * Update real-time timeout queue. Callouts are processed at a 254 * very low CPU priority, so we don't keep the relatively high 255 * clock interrupt priority any longer than necessary. 256 */ 257 callout_hardclock(); 258 } 259 260 /* 261 * Start profiling on a process. 262 * 263 * Kernel profiling passes proc0 which never exits and hence 264 * keeps the profile clock running constantly. 265 */ 266 void 267 startprofclock(struct proc *p) 268 { 269 270 KASSERT(mutex_owned(&p->p_stmutex)); 271 272 if ((p->p_stflag & PST_PROFIL) == 0) { 273 p->p_stflag |= PST_PROFIL; 274 /* 275 * This is only necessary if using the clock as the 276 * profiling source. 277 */ 278 if (++profprocs == 1 && stathz != 0) 279 psdiv = psratio; 280 } 281 } 282 283 /* 284 * Stop profiling on a process. 285 */ 286 void 287 stopprofclock(struct proc *p) 288 { 289 290 KASSERT(mutex_owned(&p->p_stmutex)); 291 292 if (p->p_stflag & PST_PROFIL) { 293 p->p_stflag &= ~PST_PROFIL; 294 /* 295 * This is only necessary if using the clock as the 296 * profiling source. 297 */ 298 if (--profprocs == 0 && stathz != 0) 299 psdiv = 1; 300 } 301 } 302 303 #if defined(PERFCTRS) 304 /* 305 * Independent profiling "tick" in case we're using a separate 306 * clock or profiling event source. Currently, that's just 307 * performance counters--hence the wrapper. 308 */ 309 void 310 proftick(struct clockframe *frame) 311 { 312 #ifdef GPROF 313 struct gmonparam *g; 314 intptr_t i; 315 #endif 316 struct lwp *l; 317 struct proc *p; 318 319 l = curcpu()->ci_data.cpu_onproc; 320 p = (l ? l->l_proc : NULL); 321 if (CLKF_USERMODE(frame)) { 322 mutex_spin_enter(&p->p_stmutex); 323 if (p->p_stflag & PST_PROFIL) 324 addupc_intr(l, CLKF_PC(frame)); 325 mutex_spin_exit(&p->p_stmutex); 326 } else { 327 #ifdef GPROF 328 g = &_gmonparam; 329 if (g->state == GMON_PROF_ON) { 330 i = CLKF_PC(frame) - g->lowpc; 331 if (i < g->textsize) { 332 i /= HISTFRACTION * sizeof(*g->kcount); 333 g->kcount[i]++; 334 } 335 } 336 #endif 337 #ifdef LWP_PC 338 if (p != NULL && (p->p_stflag & PST_PROFIL) != 0) 339 addupc_intr(l, LWP_PC(l)); 340 #endif 341 } 342 } 343 #endif 344 345 void 346 schedclock(struct lwp *l) 347 { 348 349 if ((l->l_flag & LW_IDLE) != 0) 350 return; 351 352 sched_schedclock(l); 353 } 354 355 /* 356 * Statistics clock. Grab profile sample, and if divider reaches 0, 357 * do process and kernel statistics. 358 */ 359 void 360 statclock(struct clockframe *frame) 361 { 362 #ifdef GPROF 363 struct gmonparam *g; 364 intptr_t i; 365 #endif 366 struct cpu_info *ci = curcpu(); 367 struct schedstate_percpu *spc = &ci->ci_schedstate; 368 struct proc *p; 369 struct lwp *l; 370 371 /* 372 * Notice changes in divisor frequency, and adjust clock 373 * frequency accordingly. 374 */ 375 if (spc->spc_psdiv != psdiv) { 376 spc->spc_psdiv = psdiv; 377 spc->spc_pscnt = psdiv; 378 if (psdiv == 1) { 379 setstatclockrate(stathz); 380 } else { 381 setstatclockrate(profhz); 382 } 383 } 384 l = ci->ci_data.cpu_onproc; 385 if ((l->l_flag & LW_IDLE) != 0) { 386 /* 387 * don't account idle lwps as swapper. 388 */ 389 p = NULL; 390 } else { 391 p = l->l_proc; 392 mutex_spin_enter(&p->p_stmutex); 393 } 394 395 if (CLKF_USERMODE(frame)) { 396 if ((p->p_stflag & PST_PROFIL) && profsrc == PROFSRC_CLOCK) 397 addupc_intr(l, CLKF_PC(frame)); 398 if (--spc->spc_pscnt > 0) { 399 mutex_spin_exit(&p->p_stmutex); 400 return; 401 } 402 403 /* 404 * Came from user mode; CPU was in user state. 405 * If this process is being profiled record the tick. 406 */ 407 p->p_uticks++; 408 if (p->p_nice > NZERO) 409 spc->spc_cp_time[CP_NICE]++; 410 else 411 spc->spc_cp_time[CP_USER]++; 412 } else { 413 #ifdef GPROF 414 /* 415 * Kernel statistics are just like addupc_intr, only easier. 416 */ 417 g = &_gmonparam; 418 if (profsrc == PROFSRC_CLOCK && g->state == GMON_PROF_ON) { 419 i = CLKF_PC(frame) - g->lowpc; 420 if (i < g->textsize) { 421 i /= HISTFRACTION * sizeof(*g->kcount); 422 g->kcount[i]++; 423 } 424 } 425 #endif 426 #ifdef LWP_PC 427 if (p != NULL && profsrc == PROFSRC_CLOCK && 428 (p->p_stflag & PST_PROFIL)) { 429 addupc_intr(l, LWP_PC(l)); 430 } 431 #endif 432 if (--spc->spc_pscnt > 0) { 433 if (p != NULL) 434 mutex_spin_exit(&p->p_stmutex); 435 return; 436 } 437 /* 438 * Came from kernel mode, so we were: 439 * - handling an interrupt, 440 * - doing syscall or trap work on behalf of the current 441 * user process, or 442 * - spinning in the idle loop. 443 * Whichever it is, charge the time as appropriate. 444 * Note that we charge interrupts to the current process, 445 * regardless of whether they are ``for'' that process, 446 * so that we know how much of its real time was spent 447 * in ``non-process'' (i.e., interrupt) work. 448 */ 449 if (CLKF_INTR(frame) || (curlwp->l_pflag & LP_INTR) != 0) { 450 if (p != NULL) { 451 p->p_iticks++; 452 } 453 spc->spc_cp_time[CP_INTR]++; 454 } else if (p != NULL) { 455 p->p_sticks++; 456 spc->spc_cp_time[CP_SYS]++; 457 } else { 458 spc->spc_cp_time[CP_IDLE]++; 459 } 460 } 461 spc->spc_pscnt = psdiv; 462 463 if (p != NULL) { 464 ++l->l_cpticks; 465 mutex_spin_exit(&p->p_stmutex); 466 } 467 } 468