1 /* $NetBSD: kern_clock.c,v 1.121 2008/04/22 11:45:28 ad Exp $ */ 2 3 /*- 4 * Copyright (c) 2000, 2004, 2006, 2007, 2008 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 9 * NASA Ames Research Center. 10 * This code is derived from software contributed to The NetBSD Foundation 11 * by Charles M. Hannum. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 3. All advertising materials mentioning features or use of this software 22 * must display the following acknowledgement: 23 * This product includes software developed by the NetBSD 24 * Foundation, Inc. and its contributors. 25 * 4. Neither the name of The NetBSD Foundation nor the names of its 26 * contributors may be used to endorse or promote products derived 27 * from this software without specific prior written permission. 28 * 29 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 30 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 31 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 32 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 33 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 34 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 35 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 36 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 37 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 38 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 39 * POSSIBILITY OF SUCH DAMAGE. 40 */ 41 42 /*- 43 * Copyright (c) 1982, 1986, 1991, 1993 44 * The Regents of the University of California. All rights reserved. 45 * (c) UNIX System Laboratories, Inc. 46 * All or some portions of this file are derived from material licensed 47 * to the University of California by American Telephone and Telegraph 48 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 49 * the permission of UNIX System Laboratories, Inc. 50 * 51 * Redistribution and use in source and binary forms, with or without 52 * modification, are permitted provided that the following conditions 53 * are met: 54 * 1. Redistributions of source code must retain the above copyright 55 * notice, this list of conditions and the following disclaimer. 56 * 2. Redistributions in binary form must reproduce the above copyright 57 * notice, this list of conditions and the following disclaimer in the 58 * documentation and/or other materials provided with the distribution. 59 * 3. Neither the name of the University nor the names of its contributors 60 * may be used to endorse or promote products derived from this software 61 * without specific prior written permission. 62 * 63 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 64 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 65 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 66 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 67 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 68 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 69 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 70 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 71 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 72 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 73 * SUCH DAMAGE. 74 * 75 * @(#)kern_clock.c 8.5 (Berkeley) 1/21/94 76 */ 77 78 #include <sys/cdefs.h> 79 __KERNEL_RCSID(0, "$NetBSD: kern_clock.c,v 1.121 2008/04/22 11:45:28 ad Exp $"); 80 81 #include "opt_ntp.h" 82 #include "opt_multiprocessor.h" 83 #include "opt_perfctrs.h" 84 85 #include <sys/param.h> 86 #include <sys/systm.h> 87 #include <sys/callout.h> 88 #include <sys/kernel.h> 89 #include <sys/proc.h> 90 #include <sys/resourcevar.h> 91 #include <sys/signalvar.h> 92 #include <sys/sysctl.h> 93 #include <sys/timex.h> 94 #include <sys/sched.h> 95 #include <sys/time.h> 96 #include <sys/timetc.h> 97 #include <sys/cpu.h> 98 #include <sys/atomic.h> 99 100 #include <uvm/uvm_extern.h> 101 102 #ifdef GPROF 103 #include <sys/gmon.h> 104 #endif 105 106 /* 107 * Clock handling routines. 108 * 109 * This code is written to operate with two timers that run independently of 110 * each other. The main clock, running hz times per second, is used to keep 111 * track of real time. The second timer handles kernel and user profiling, 112 * and does resource use estimation. If the second timer is programmable, 113 * it is randomized to avoid aliasing between the two clocks. For example, 114 * the randomization prevents an adversary from always giving up the CPU 115 * just before its quantum expires. Otherwise, it would never accumulate 116 * CPU ticks. The mean frequency of the second timer is stathz. 117 * 118 * If no second timer exists, stathz will be zero; in this case we drive 119 * profiling and statistics off the main clock. This WILL NOT be accurate; 120 * do not do it unless absolutely necessary. 121 * 122 * The statistics clock may (or may not) be run at a higher rate while 123 * profiling. This profile clock runs at profhz. We require that profhz 124 * be an integral multiple of stathz. 125 * 126 * If the statistics clock is running fast, it must be divided by the ratio 127 * profhz/stathz for statistics. (For profiling, every tick counts.) 128 */ 129 130 int stathz; 131 int profhz; 132 int profsrc; 133 int schedhz; 134 int profprocs; 135 int hardclock_ticks; 136 static int hardscheddiv; /* hard => sched divider (used if schedhz == 0) */ 137 static int psdiv; /* prof => stat divider */ 138 int psratio; /* ratio: prof / stat */ 139 140 static u_int get_intr_timecount(struct timecounter *); 141 142 static struct timecounter intr_timecounter = { 143 get_intr_timecount, /* get_timecount */ 144 0, /* no poll_pps */ 145 ~0u, /* counter_mask */ 146 0, /* frequency */ 147 "clockinterrupt", /* name */ 148 0, /* quality - minimum implementation level for a clock */ 149 NULL, /* prev */ 150 NULL, /* next */ 151 }; 152 153 static u_int 154 get_intr_timecount(struct timecounter *tc) 155 { 156 157 return (u_int)hardclock_ticks; 158 } 159 160 /* 161 * Initialize clock frequencies and start both clocks running. 162 */ 163 void 164 initclocks(void) 165 { 166 int i; 167 168 /* 169 * Set divisors to 1 (normal case) and let the machine-specific 170 * code do its bit. 171 */ 172 psdiv = 1; 173 /* 174 * provide minimum default time counter 175 * will only run at interrupt resolution 176 */ 177 intr_timecounter.tc_frequency = hz; 178 tc_init(&intr_timecounter); 179 cpu_initclocks(); 180 181 /* 182 * Compute profhz and stathz, fix profhz if needed. 183 */ 184 i = stathz ? stathz : hz; 185 if (profhz == 0) 186 profhz = i; 187 psratio = profhz / i; 188 if (schedhz == 0) { 189 /* 16Hz is best */ 190 hardscheddiv = hz / 16; 191 if (hardscheddiv <= 0) 192 panic("hardscheddiv"); 193 } 194 195 } 196 197 /* 198 * The real-time timer, interrupting hz times per second. 199 */ 200 void 201 hardclock(struct clockframe *frame) 202 { 203 struct lwp *l; 204 struct cpu_info *ci; 205 206 ci = curcpu(); 207 l = ci->ci_data.cpu_onproc; 208 209 timer_tick(l, CLKF_USERMODE(frame)); 210 211 /* 212 * If no separate statistics clock is available, run it from here. 213 */ 214 if (stathz == 0) 215 statclock(frame); 216 /* 217 * If no separate schedclock is provided, call it here 218 * at about 16 Hz. 219 */ 220 if (schedhz == 0) { 221 if ((int)(--ci->ci_schedstate.spc_schedticks) <= 0) { 222 schedclock(l); 223 ci->ci_schedstate.spc_schedticks = hardscheddiv; 224 } 225 } 226 if ((--ci->ci_schedstate.spc_ticks) <= 0) 227 sched_tick(ci); 228 229 #if defined(MULTIPROCESSOR) 230 if (CPU_IS_PRIMARY(ci)) 231 #endif 232 { 233 hardclock_ticks++; 234 tc_ticktock(); 235 } 236 237 /* 238 * Update real-time timeout queue. Callouts are processed at a 239 * very low CPU priority, so we don't keep the relatively high 240 * clock interrupt priority any longer than necessary. 241 */ 242 callout_hardclock(); 243 } 244 245 /* 246 * Start profiling on a process. 247 * 248 * Kernel profiling passes proc0 which never exits and hence 249 * keeps the profile clock running constantly. 250 */ 251 void 252 startprofclock(struct proc *p) 253 { 254 255 KASSERT(mutex_owned(&p->p_stmutex)); 256 257 if ((p->p_stflag & PST_PROFIL) == 0) { 258 p->p_stflag |= PST_PROFIL; 259 /* 260 * This is only necessary if using the clock as the 261 * profiling source. 262 */ 263 if (++profprocs == 1 && stathz != 0) 264 psdiv = psratio; 265 } 266 } 267 268 /* 269 * Stop profiling on a process. 270 */ 271 void 272 stopprofclock(struct proc *p) 273 { 274 275 KASSERT(mutex_owned(&p->p_stmutex)); 276 277 if (p->p_stflag & PST_PROFIL) { 278 p->p_stflag &= ~PST_PROFIL; 279 /* 280 * This is only necessary if using the clock as the 281 * profiling source. 282 */ 283 if (--profprocs == 0 && stathz != 0) 284 psdiv = 1; 285 } 286 } 287 288 #if defined(PERFCTRS) 289 /* 290 * Independent profiling "tick" in case we're using a separate 291 * clock or profiling event source. Currently, that's just 292 * performance counters--hence the wrapper. 293 */ 294 void 295 proftick(struct clockframe *frame) 296 { 297 #ifdef GPROF 298 struct gmonparam *g; 299 intptr_t i; 300 #endif 301 struct lwp *l; 302 struct proc *p; 303 304 l = curcpu()->ci_data.cpu_onproc; 305 p = (l ? l->l_proc : NULL); 306 if (CLKF_USERMODE(frame)) { 307 mutex_spin_enter(&p->p_stmutex); 308 if (p->p_stflag & PST_PROFIL) 309 addupc_intr(l, CLKF_PC(frame)); 310 mutex_spin_exit(&p->p_stmutex); 311 } else { 312 #ifdef GPROF 313 g = &_gmonparam; 314 if (g->state == GMON_PROF_ON) { 315 i = CLKF_PC(frame) - g->lowpc; 316 if (i < g->textsize) { 317 i /= HISTFRACTION * sizeof(*g->kcount); 318 g->kcount[i]++; 319 } 320 } 321 #endif 322 #ifdef LWP_PC 323 if (p != NULL && (p->p_stflag & PST_PROFIL) != 0) 324 addupc_intr(l, LWP_PC(l)); 325 #endif 326 } 327 } 328 #endif 329 330 void 331 schedclock(struct lwp *l) 332 { 333 struct cpu_info *ci; 334 335 ci = l->l_cpu; 336 337 /* Accumulate syscall and context switch counts. */ 338 atomic_add_int((unsigned *)&uvmexp.swtch, ci->ci_data.cpu_nswtch); 339 ci->ci_data.cpu_nswtch = 0; 340 atomic_add_int((unsigned *)&uvmexp.syscalls, ci->ci_data.cpu_nsyscall); 341 ci->ci_data.cpu_nsyscall = 0; 342 343 if ((l->l_flag & LW_IDLE) != 0) 344 return; 345 346 sched_schedclock(l); 347 } 348 349 /* 350 * Statistics clock. Grab profile sample, and if divider reaches 0, 351 * do process and kernel statistics. 352 */ 353 void 354 statclock(struct clockframe *frame) 355 { 356 #ifdef GPROF 357 struct gmonparam *g; 358 intptr_t i; 359 #endif 360 struct cpu_info *ci = curcpu(); 361 struct schedstate_percpu *spc = &ci->ci_schedstate; 362 struct proc *p; 363 struct lwp *l; 364 365 /* 366 * Notice changes in divisor frequency, and adjust clock 367 * frequency accordingly. 368 */ 369 if (spc->spc_psdiv != psdiv) { 370 spc->spc_psdiv = psdiv; 371 spc->spc_pscnt = psdiv; 372 if (psdiv == 1) { 373 setstatclockrate(stathz); 374 } else { 375 setstatclockrate(profhz); 376 } 377 } 378 l = ci->ci_data.cpu_onproc; 379 if ((l->l_flag & LW_IDLE) != 0) { 380 /* 381 * don't account idle lwps as swapper. 382 */ 383 p = NULL; 384 } else { 385 p = l->l_proc; 386 mutex_spin_enter(&p->p_stmutex); 387 } 388 389 if (CLKF_USERMODE(frame)) { 390 if ((p->p_stflag & PST_PROFIL) && profsrc == PROFSRC_CLOCK) 391 addupc_intr(l, CLKF_PC(frame)); 392 if (--spc->spc_pscnt > 0) { 393 mutex_spin_exit(&p->p_stmutex); 394 return; 395 } 396 397 /* 398 * Came from user mode; CPU was in user state. 399 * If this process is being profiled record the tick. 400 */ 401 p->p_uticks++; 402 if (p->p_nice > NZERO) 403 spc->spc_cp_time[CP_NICE]++; 404 else 405 spc->spc_cp_time[CP_USER]++; 406 } else { 407 #ifdef GPROF 408 /* 409 * Kernel statistics are just like addupc_intr, only easier. 410 */ 411 g = &_gmonparam; 412 if (profsrc == PROFSRC_CLOCK && g->state == GMON_PROF_ON) { 413 i = CLKF_PC(frame) - g->lowpc; 414 if (i < g->textsize) { 415 i /= HISTFRACTION * sizeof(*g->kcount); 416 g->kcount[i]++; 417 } 418 } 419 #endif 420 #ifdef LWP_PC 421 if (p != NULL && profsrc == PROFSRC_CLOCK && 422 (p->p_stflag & PST_PROFIL)) { 423 addupc_intr(l, LWP_PC(l)); 424 } 425 #endif 426 if (--spc->spc_pscnt > 0) { 427 if (p != NULL) 428 mutex_spin_exit(&p->p_stmutex); 429 return; 430 } 431 /* 432 * Came from kernel mode, so we were: 433 * - handling an interrupt, 434 * - doing syscall or trap work on behalf of the current 435 * user process, or 436 * - spinning in the idle loop. 437 * Whichever it is, charge the time as appropriate. 438 * Note that we charge interrupts to the current process, 439 * regardless of whether they are ``for'' that process, 440 * so that we know how much of its real time was spent 441 * in ``non-process'' (i.e., interrupt) work. 442 */ 443 if (CLKF_INTR(frame) || (curlwp->l_pflag & LP_INTR) != 0) { 444 if (p != NULL) { 445 p->p_iticks++; 446 } 447 spc->spc_cp_time[CP_INTR]++; 448 } else if (p != NULL) { 449 p->p_sticks++; 450 spc->spc_cp_time[CP_SYS]++; 451 } else { 452 spc->spc_cp_time[CP_IDLE]++; 453 } 454 } 455 spc->spc_pscnt = psdiv; 456 457 if (p != NULL) { 458 ++l->l_cpticks; 459 mutex_spin_exit(&p->p_stmutex); 460 } 461 } 462