1 /* $NetBSD: kern_clock.c,v 1.83 2003/01/27 22:38:24 pk Exp $ */ 2 3 /*- 4 * Copyright (c) 2000 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 9 * NASA Ames Research Center. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. All advertising materials mentioning features or use of this software 20 * must display the following acknowledgement: 21 * This product includes software developed by the NetBSD 22 * Foundation, Inc. and its contributors. 23 * 4. Neither the name of The NetBSD Foundation nor the names of its 24 * contributors may be used to endorse or promote products derived 25 * from this software without specific prior written permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 28 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 29 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 30 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 31 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 32 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 33 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 34 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 35 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 36 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 37 * POSSIBILITY OF SUCH DAMAGE. 38 */ 39 40 /*- 41 * Copyright (c) 1982, 1986, 1991, 1993 42 * The Regents of the University of California. All rights reserved. 43 * (c) UNIX System Laboratories, Inc. 44 * All or some portions of this file are derived from material licensed 45 * to the University of California by American Telephone and Telegraph 46 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 47 * the permission of UNIX System Laboratories, Inc. 48 * 49 * Redistribution and use in source and binary forms, with or without 50 * modification, are permitted provided that the following conditions 51 * are met: 52 * 1. Redistributions of source code must retain the above copyright 53 * notice, this list of conditions and the following disclaimer. 54 * 2. Redistributions in binary form must reproduce the above copyright 55 * notice, this list of conditions and the following disclaimer in the 56 * documentation and/or other materials provided with the distribution. 57 * 3. All advertising materials mentioning features or use of this software 58 * must display the following acknowledgement: 59 * This product includes software developed by the University of 60 * California, Berkeley and its contributors. 61 * 4. Neither the name of the University nor the names of its contributors 62 * may be used to endorse or promote products derived from this software 63 * without specific prior written permission. 64 * 65 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 66 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 67 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 68 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 69 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 70 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 71 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 72 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 73 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 74 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 75 * SUCH DAMAGE. 76 * 77 * @(#)kern_clock.c 8.5 (Berkeley) 1/21/94 78 */ 79 80 #include <sys/cdefs.h> 81 __KERNEL_RCSID(0, "$NetBSD: kern_clock.c,v 1.83 2003/01/27 22:38:24 pk Exp $"); 82 83 #include "opt_callout.h" 84 #include "opt_ntp.h" 85 #include "opt_perfctrs.h" 86 87 #include <sys/param.h> 88 #include <sys/systm.h> 89 #include <sys/dkstat.h> 90 #include <sys/callout.h> 91 #include <sys/kernel.h> 92 #include <sys/proc.h> 93 #include <sys/resourcevar.h> 94 #include <sys/signalvar.h> 95 #include <sys/sysctl.h> 96 #include <sys/timex.h> 97 #include <sys/sched.h> 98 #include <sys/time.h> 99 #ifdef CALLWHEEL_STATS 100 #include <sys/device.h> 101 #endif 102 103 #include <machine/cpu.h> 104 #ifdef __HAVE_GENERIC_SOFT_INTERRUPTS 105 #include <machine/intr.h> 106 #endif 107 108 #ifdef GPROF 109 #include <sys/gmon.h> 110 #endif 111 112 /* 113 * Clock handling routines. 114 * 115 * This code is written to operate with two timers that run independently of 116 * each other. The main clock, running hz times per second, is used to keep 117 * track of real time. The second timer handles kernel and user profiling, 118 * and does resource use estimation. If the second timer is programmable, 119 * it is randomized to avoid aliasing between the two clocks. For example, 120 * the randomization prevents an adversary from always giving up the cpu 121 * just before its quantum expires. Otherwise, it would never accumulate 122 * cpu ticks. The mean frequency of the second timer is stathz. 123 * 124 * If no second timer exists, stathz will be zero; in this case we drive 125 * profiling and statistics off the main clock. This WILL NOT be accurate; 126 * do not do it unless absolutely necessary. 127 * 128 * The statistics clock may (or may not) be run at a higher rate while 129 * profiling. This profile clock runs at profhz. We require that profhz 130 * be an integral multiple of stathz. 131 * 132 * If the statistics clock is running fast, it must be divided by the ratio 133 * profhz/stathz for statistics. (For profiling, every tick counts.) 134 */ 135 136 #ifdef NTP /* NTP phase-locked loop in kernel */ 137 /* 138 * Phase/frequency-lock loop (PLL/FLL) definitions 139 * 140 * The following variables are read and set by the ntp_adjtime() system 141 * call. 142 * 143 * time_state shows the state of the system clock, with values defined 144 * in the timex.h header file. 145 * 146 * time_status shows the status of the system clock, with bits defined 147 * in the timex.h header file. 148 * 149 * time_offset is used by the PLL/FLL to adjust the system time in small 150 * increments. 151 * 152 * time_constant determines the bandwidth or "stiffness" of the PLL. 153 * 154 * time_tolerance determines maximum frequency error or tolerance of the 155 * CPU clock oscillator and is a property of the architecture; however, 156 * in principle it could change as result of the presence of external 157 * discipline signals, for instance. 158 * 159 * time_precision is usually equal to the kernel tick variable; however, 160 * in cases where a precision clock counter or external clock is 161 * available, the resolution can be much less than this and depend on 162 * whether the external clock is working or not. 163 * 164 * time_maxerror is initialized by a ntp_adjtime() call and increased by 165 * the kernel once each second to reflect the maximum error bound 166 * growth. 167 * 168 * time_esterror is set and read by the ntp_adjtime() call, but 169 * otherwise not used by the kernel. 170 */ 171 int time_state = TIME_OK; /* clock state */ 172 int time_status = STA_UNSYNC; /* clock status bits */ 173 long time_offset = 0; /* time offset (us) */ 174 long time_constant = 0; /* pll time constant */ 175 long time_tolerance = MAXFREQ; /* frequency tolerance (scaled ppm) */ 176 long time_precision = 1; /* clock precision (us) */ 177 long time_maxerror = MAXPHASE; /* maximum error (us) */ 178 long time_esterror = MAXPHASE; /* estimated error (us) */ 179 180 /* 181 * The following variables establish the state of the PLL/FLL and the 182 * residual time and frequency offset of the local clock. The scale 183 * factors are defined in the timex.h header file. 184 * 185 * time_phase and time_freq are the phase increment and the frequency 186 * increment, respectively, of the kernel time variable. 187 * 188 * time_freq is set via ntp_adjtime() from a value stored in a file when 189 * the synchronization daemon is first started. Its value is retrieved 190 * via ntp_adjtime() and written to the file about once per hour by the 191 * daemon. 192 * 193 * time_adj is the adjustment added to the value of tick at each timer 194 * interrupt and is recomputed from time_phase and time_freq at each 195 * seconds rollover. 196 * 197 * time_reftime is the second's portion of the system time at the last 198 * call to ntp_adjtime(). It is used to adjust the time_freq variable 199 * and to increase the time_maxerror as the time since last update 200 * increases. 201 */ 202 long time_phase = 0; /* phase offset (scaled us) */ 203 long time_freq = 0; /* frequency offset (scaled ppm) */ 204 long time_adj = 0; /* tick adjust (scaled 1 / hz) */ 205 long time_reftime = 0; /* time at last adjustment (s) */ 206 207 #ifdef PPS_SYNC 208 /* 209 * The following variables are used only if the kernel PPS discipline 210 * code is configured (PPS_SYNC). The scale factors are defined in the 211 * timex.h header file. 212 * 213 * pps_time contains the time at each calibration interval, as read by 214 * microtime(). pps_count counts the seconds of the calibration 215 * interval, the duration of which is nominally pps_shift in powers of 216 * two. 217 * 218 * pps_offset is the time offset produced by the time median filter 219 * pps_tf[], while pps_jitter is the dispersion (jitter) measured by 220 * this filter. 221 * 222 * pps_freq is the frequency offset produced by the frequency median 223 * filter pps_ff[], while pps_stabil is the dispersion (wander) measured 224 * by this filter. 225 * 226 * pps_usec is latched from a high resolution counter or external clock 227 * at pps_time. Here we want the hardware counter contents only, not the 228 * contents plus the time_tv.usec as usual. 229 * 230 * pps_valid counts the number of seconds since the last PPS update. It 231 * is used as a watchdog timer to disable the PPS discipline should the 232 * PPS signal be lost. 233 * 234 * pps_glitch counts the number of seconds since the beginning of an 235 * offset burst more than tick/2 from current nominal offset. It is used 236 * mainly to suppress error bursts due to priority conflicts between the 237 * PPS interrupt and timer interrupt. 238 * 239 * pps_intcnt counts the calibration intervals for use in the interval- 240 * adaptation algorithm. It's just too complicated for words. 241 */ 242 struct timeval pps_time; /* kernel time at last interval */ 243 long pps_tf[] = {0, 0, 0}; /* pps time offset median filter (us) */ 244 long pps_offset = 0; /* pps time offset (us) */ 245 long pps_jitter = MAXTIME; /* time dispersion (jitter) (us) */ 246 long pps_ff[] = {0, 0, 0}; /* pps frequency offset median filter */ 247 long pps_freq = 0; /* frequency offset (scaled ppm) */ 248 long pps_stabil = MAXFREQ; /* frequency dispersion (scaled ppm) */ 249 long pps_usec = 0; /* microsec counter at last interval */ 250 long pps_valid = PPS_VALID; /* pps signal watchdog counter */ 251 int pps_glitch = 0; /* pps signal glitch counter */ 252 int pps_count = 0; /* calibration interval counter (s) */ 253 int pps_shift = PPS_SHIFT; /* interval duration (s) (shift) */ 254 int pps_intcnt = 0; /* intervals at current duration */ 255 256 /* 257 * PPS signal quality monitors 258 * 259 * pps_jitcnt counts the seconds that have been discarded because the 260 * jitter measured by the time median filter exceeds the limit MAXTIME 261 * (100 us). 262 * 263 * pps_calcnt counts the frequency calibration intervals, which are 264 * variable from 4 s to 256 s. 265 * 266 * pps_errcnt counts the calibration intervals which have been discarded 267 * because the wander exceeds the limit MAXFREQ (100 ppm) or where the 268 * calibration interval jitter exceeds two ticks. 269 * 270 * pps_stbcnt counts the calibration intervals that have been discarded 271 * because the frequency wander exceeds the limit MAXFREQ / 4 (25 us). 272 */ 273 long pps_jitcnt = 0; /* jitter limit exceeded */ 274 long pps_calcnt = 0; /* calibration intervals */ 275 long pps_errcnt = 0; /* calibration errors */ 276 long pps_stbcnt = 0; /* stability limit exceeded */ 277 #endif /* PPS_SYNC */ 278 279 #ifdef EXT_CLOCK 280 /* 281 * External clock definitions 282 * 283 * The following definitions and declarations are used only if an 284 * external clock is configured on the system. 285 */ 286 #define CLOCK_INTERVAL 30 /* CPU clock update interval (s) */ 287 288 /* 289 * The clock_count variable is set to CLOCK_INTERVAL at each PPS 290 * interrupt and decremented once each second. 291 */ 292 int clock_count = 0; /* CPU clock counter */ 293 294 #ifdef HIGHBALL 295 /* 296 * The clock_offset and clock_cpu variables are used by the HIGHBALL 297 * interface. The clock_offset variable defines the offset between 298 * system time and the HIGBALL counters. The clock_cpu variable contains 299 * the offset between the system clock and the HIGHBALL clock for use in 300 * disciplining the kernel time variable. 301 */ 302 extern struct timeval clock_offset; /* Highball clock offset */ 303 long clock_cpu = 0; /* CPU clock adjust */ 304 #endif /* HIGHBALL */ 305 #endif /* EXT_CLOCK */ 306 #endif /* NTP */ 307 308 309 /* 310 * Bump a timeval by a small number of usec's. 311 */ 312 #define BUMPTIME(t, usec) { \ 313 volatile struct timeval *tp = (t); \ 314 long us; \ 315 \ 316 tp->tv_usec = us = tp->tv_usec + (usec); \ 317 if (us >= 1000000) { \ 318 tp->tv_usec = us - 1000000; \ 319 tp->tv_sec++; \ 320 } \ 321 } 322 323 int stathz; 324 int profhz; 325 int profsrc; 326 int schedhz; 327 int profprocs; 328 int softclock_running; /* 1 => softclock() is running */ 329 static int psdiv; /* prof => stat divider */ 330 int psratio; /* ratio: prof / stat */ 331 int tickfix, tickfixinterval; /* used if tick not really integral */ 332 #ifndef NTP 333 static int tickfixcnt; /* accumulated fractional error */ 334 #else 335 int fixtick; /* used by NTP for same */ 336 int shifthz; 337 #endif 338 339 /* 340 * We might want ldd to load the both words from time at once. 341 * To succeed we need to be quadword aligned. 342 * The sparc already does that, and that it has worked so far is a fluke. 343 */ 344 volatile struct timeval time __attribute__((__aligned__(__alignof__(quad_t)))); 345 volatile struct timeval mono_time; 346 347 /* 348 * The callout mechanism is based on the work of Adam M. Costello and 349 * George Varghese, published in a technical report entitled "Redesigning 350 * the BSD Callout and Timer Facilities", and Justin Gibbs's subsequent 351 * integration into FreeBSD, modified for NetBSD by Jason R. Thorpe. 352 * 353 * The original work on the data structures used in this implementation 354 * was published by G. Varghese and A. Lauck in the paper "Hashed and 355 * Hierarchical Timing Wheels: Data Structures for the Efficient 356 * Implementation of a Timer Facility" in the Proceedings of the 11th 357 * ACM Annual Symposium on Operating System Principles, Austin, Texas, 358 * November 1987. 359 */ 360 struct callout_queue *callwheel; 361 int callwheelsize, callwheelbits, callwheelmask; 362 363 static struct callout *nextsoftcheck; /* next callout to be checked */ 364 365 #ifdef CALLWHEEL_STATS 366 int *callwheel_sizes; /* per-bucket length count */ 367 struct evcnt callwheel_collisions; /* number of hash collisions */ 368 struct evcnt callwheel_maxlength; /* length of the longest hash chain */ 369 struct evcnt callwheel_count; /* # callouts currently */ 370 struct evcnt callwheel_established; /* # callouts established */ 371 struct evcnt callwheel_fired; /* # callouts that fired */ 372 struct evcnt callwheel_disestablished; /* # callouts disestablished */ 373 struct evcnt callwheel_changed; /* # callouts changed */ 374 struct evcnt callwheel_softclocks; /* # times softclock() called */ 375 struct evcnt callwheel_softchecks; /* # checks per softclock() */ 376 struct evcnt callwheel_softempty; /* # empty buckets seen */ 377 struct evcnt callwheel_hintworked; /* # times hint saved scan */ 378 #endif /* CALLWHEEL_STATS */ 379 380 /* 381 * This value indicates the number of consecutive callouts that 382 * will be checked before we allow interrupts to have a chance 383 * again. 384 */ 385 #ifndef MAX_SOFTCLOCK_STEPS 386 #define MAX_SOFTCLOCK_STEPS 100 387 #endif 388 389 struct simplelock callwheel_slock; 390 391 #define CALLWHEEL_LOCK(s) \ 392 do { \ 393 s = splsched(); \ 394 simple_lock(&callwheel_slock); \ 395 } while (/*CONSTCOND*/ 0) 396 397 #define CALLWHEEL_UNLOCK(s) \ 398 do { \ 399 simple_unlock(&callwheel_slock); \ 400 splx(s); \ 401 } while (/*CONSTCOND*/ 0) 402 403 static void callout_stop_locked(struct callout *); 404 405 /* 406 * These are both protected by callwheel_lock. 407 * XXX SHOULD BE STATIC!! 408 */ 409 u_int64_t hardclock_ticks, softclock_ticks; 410 411 #ifdef __HAVE_GENERIC_SOFT_INTERRUPTS 412 void softclock(void *); 413 void *softclock_si; 414 #endif 415 416 /* 417 * Initialize clock frequencies and start both clocks running. 418 */ 419 void 420 initclocks(void) 421 { 422 int i; 423 424 #ifdef __HAVE_GENERIC_SOFT_INTERRUPTS 425 softclock_si = softintr_establish(IPL_SOFTCLOCK, softclock, NULL); 426 if (softclock_si == NULL) 427 panic("initclocks: unable to register softclock intr"); 428 #endif 429 430 /* 431 * Set divisors to 1 (normal case) and let the machine-specific 432 * code do its bit. 433 */ 434 psdiv = 1; 435 cpu_initclocks(); 436 437 /* 438 * Compute profhz/stathz/rrticks, and fix profhz if needed. 439 */ 440 i = stathz ? stathz : hz; 441 if (profhz == 0) 442 profhz = i; 443 psratio = profhz / i; 444 rrticks = hz / 10; 445 446 #ifdef NTP 447 switch (hz) { 448 case 1: 449 shifthz = SHIFT_SCALE - 0; 450 break; 451 case 2: 452 shifthz = SHIFT_SCALE - 1; 453 break; 454 case 4: 455 shifthz = SHIFT_SCALE - 2; 456 break; 457 case 8: 458 shifthz = SHIFT_SCALE - 3; 459 break; 460 case 16: 461 shifthz = SHIFT_SCALE - 4; 462 break; 463 case 32: 464 shifthz = SHIFT_SCALE - 5; 465 break; 466 case 60: 467 case 64: 468 shifthz = SHIFT_SCALE - 6; 469 break; 470 case 96: 471 case 100: 472 case 128: 473 shifthz = SHIFT_SCALE - 7; 474 break; 475 case 256: 476 shifthz = SHIFT_SCALE - 8; 477 break; 478 case 512: 479 shifthz = SHIFT_SCALE - 9; 480 break; 481 case 1000: 482 case 1024: 483 shifthz = SHIFT_SCALE - 10; 484 break; 485 case 1200: 486 case 2048: 487 shifthz = SHIFT_SCALE - 11; 488 break; 489 case 4096: 490 shifthz = SHIFT_SCALE - 12; 491 break; 492 case 8192: 493 shifthz = SHIFT_SCALE - 13; 494 break; 495 case 16384: 496 shifthz = SHIFT_SCALE - 14; 497 break; 498 case 32768: 499 shifthz = SHIFT_SCALE - 15; 500 break; 501 case 65536: 502 shifthz = SHIFT_SCALE - 16; 503 break; 504 default: 505 panic("weird hz"); 506 } 507 if (fixtick == 0) { 508 /* 509 * Give MD code a chance to set this to a better 510 * value; but, if it doesn't, we should. 511 */ 512 fixtick = (1000000 - (hz*tick)); 513 } 514 #endif 515 } 516 517 /* 518 * The real-time timer, interrupting hz times per second. 519 */ 520 void 521 hardclock(struct clockframe *frame) 522 { 523 struct lwp *l; 524 struct proc *p; 525 int delta; 526 extern int tickdelta; 527 extern long timedelta; 528 struct cpu_info *ci = curcpu(); 529 struct ptimer *pt; 530 int s; 531 #ifdef NTP 532 int time_update; 533 int ltemp; 534 #endif 535 536 l = curlwp; 537 if (l) { 538 p = l->l_proc; 539 /* 540 * Run current process's virtual and profile time, as needed. 541 */ 542 if (CLKF_USERMODE(frame) && p->p_timers && 543 (pt = LIST_FIRST(&p->p_timers->pts_virtual)) != NULL) 544 if (itimerdecr(pt, tick) == 0) 545 itimerfire(pt); 546 if (p->p_timers && 547 (pt = LIST_FIRST(&p->p_timers->pts_prof)) != NULL) 548 if (itimerdecr(pt, tick) == 0) 549 itimerfire(pt); 550 } 551 552 /* 553 * If no separate statistics clock is available, run it from here. 554 */ 555 if (stathz == 0) 556 statclock(frame); 557 if ((--ci->ci_schedstate.spc_rrticks) <= 0) 558 roundrobin(ci); 559 560 #if defined(MULTIPROCESSOR) 561 /* 562 * If we are not the primary CPU, we're not allowed to do 563 * any more work. 564 */ 565 if (CPU_IS_PRIMARY(ci) == 0) 566 return; 567 #endif 568 569 /* 570 * Increment the time-of-day. The increment is normally just 571 * ``tick''. If the machine is one which has a clock frequency 572 * such that ``hz'' would not divide the second evenly into 573 * milliseconds, a periodic adjustment must be applied. Finally, 574 * if we are still adjusting the time (see adjtime()), 575 * ``tickdelta'' may also be added in. 576 */ 577 delta = tick; 578 579 #ifndef NTP 580 if (tickfix) { 581 tickfixcnt += tickfix; 582 if (tickfixcnt >= tickfixinterval) { 583 delta++; 584 tickfixcnt -= tickfixinterval; 585 } 586 } 587 #endif /* !NTP */ 588 /* Imprecise 4bsd adjtime() handling */ 589 if (timedelta != 0) { 590 delta += tickdelta; 591 timedelta -= tickdelta; 592 } 593 594 #ifdef notyet 595 microset(); 596 #endif 597 598 #ifndef NTP 599 BUMPTIME(&time, delta); /* XXX Now done using NTP code below */ 600 #endif 601 BUMPTIME(&mono_time, delta); 602 603 #ifdef NTP 604 time_update = delta; 605 606 /* 607 * Compute the phase adjustment. If the low-order bits 608 * (time_phase) of the update overflow, bump the high-order bits 609 * (time_update). 610 */ 611 time_phase += time_adj; 612 if (time_phase <= -FINEUSEC) { 613 ltemp = -time_phase >> SHIFT_SCALE; 614 time_phase += ltemp << SHIFT_SCALE; 615 time_update -= ltemp; 616 } else if (time_phase >= FINEUSEC) { 617 ltemp = time_phase >> SHIFT_SCALE; 618 time_phase -= ltemp << SHIFT_SCALE; 619 time_update += ltemp; 620 } 621 622 #ifdef HIGHBALL 623 /* 624 * If the HIGHBALL board is installed, we need to adjust the 625 * external clock offset in order to close the hardware feedback 626 * loop. This will adjust the external clock phase and frequency 627 * in small amounts. The additional phase noise and frequency 628 * wander this causes should be minimal. We also need to 629 * discipline the kernel time variable, since the PLL is used to 630 * discipline the external clock. If the Highball board is not 631 * present, we discipline kernel time with the PLL as usual. We 632 * assume that the external clock phase adjustment (time_update) 633 * and kernel phase adjustment (clock_cpu) are less than the 634 * value of tick. 635 */ 636 clock_offset.tv_usec += time_update; 637 if (clock_offset.tv_usec >= 1000000) { 638 clock_offset.tv_sec++; 639 clock_offset.tv_usec -= 1000000; 640 } 641 if (clock_offset.tv_usec < 0) { 642 clock_offset.tv_sec--; 643 clock_offset.tv_usec += 1000000; 644 } 645 time.tv_usec += clock_cpu; 646 clock_cpu = 0; 647 #else 648 time.tv_usec += time_update; 649 #endif /* HIGHBALL */ 650 651 /* 652 * On rollover of the second the phase adjustment to be used for 653 * the next second is calculated. Also, the maximum error is 654 * increased by the tolerance. If the PPS frequency discipline 655 * code is present, the phase is increased to compensate for the 656 * CPU clock oscillator frequency error. 657 * 658 * On a 32-bit machine and given parameters in the timex.h 659 * header file, the maximum phase adjustment is +-512 ms and 660 * maximum frequency offset is a tad less than) +-512 ppm. On a 661 * 64-bit machine, you shouldn't need to ask. 662 */ 663 if (time.tv_usec >= 1000000) { 664 time.tv_usec -= 1000000; 665 time.tv_sec++; 666 time_maxerror += time_tolerance >> SHIFT_USEC; 667 668 /* 669 * Leap second processing. If in leap-insert state at 670 * the end of the day, the system clock is set back one 671 * second; if in leap-delete state, the system clock is 672 * set ahead one second. The microtime() routine or 673 * external clock driver will insure that reported time 674 * is always monotonic. The ugly divides should be 675 * replaced. 676 */ 677 switch (time_state) { 678 case TIME_OK: 679 if (time_status & STA_INS) 680 time_state = TIME_INS; 681 else if (time_status & STA_DEL) 682 time_state = TIME_DEL; 683 break; 684 685 case TIME_INS: 686 if (time.tv_sec % 86400 == 0) { 687 time.tv_sec--; 688 time_state = TIME_OOP; 689 } 690 break; 691 692 case TIME_DEL: 693 if ((time.tv_sec + 1) % 86400 == 0) { 694 time.tv_sec++; 695 time_state = TIME_WAIT; 696 } 697 break; 698 699 case TIME_OOP: 700 time_state = TIME_WAIT; 701 break; 702 703 case TIME_WAIT: 704 if (!(time_status & (STA_INS | STA_DEL))) 705 time_state = TIME_OK; 706 break; 707 } 708 709 /* 710 * Compute the phase adjustment for the next second. In 711 * PLL mode, the offset is reduced by a fixed factor 712 * times the time constant. In FLL mode the offset is 713 * used directly. In either mode, the maximum phase 714 * adjustment for each second is clamped so as to spread 715 * the adjustment over not more than the number of 716 * seconds between updates. 717 */ 718 if (time_offset < 0) { 719 ltemp = -time_offset; 720 if (!(time_status & STA_FLL)) 721 ltemp >>= SHIFT_KG + time_constant; 722 if (ltemp > (MAXPHASE / MINSEC) << SHIFT_UPDATE) 723 ltemp = (MAXPHASE / MINSEC) << 724 SHIFT_UPDATE; 725 time_offset += ltemp; 726 time_adj = -ltemp << (shifthz - SHIFT_UPDATE); 727 } else if (time_offset > 0) { 728 ltemp = time_offset; 729 if (!(time_status & STA_FLL)) 730 ltemp >>= SHIFT_KG + time_constant; 731 if (ltemp > (MAXPHASE / MINSEC) << SHIFT_UPDATE) 732 ltemp = (MAXPHASE / MINSEC) << 733 SHIFT_UPDATE; 734 time_offset -= ltemp; 735 time_adj = ltemp << (shifthz - SHIFT_UPDATE); 736 } else 737 time_adj = 0; 738 739 /* 740 * Compute the frequency estimate and additional phase 741 * adjustment due to frequency error for the next 742 * second. When the PPS signal is engaged, gnaw on the 743 * watchdog counter and update the frequency computed by 744 * the pll and the PPS signal. 745 */ 746 #ifdef PPS_SYNC 747 pps_valid++; 748 if (pps_valid == PPS_VALID) { 749 pps_jitter = MAXTIME; 750 pps_stabil = MAXFREQ; 751 time_status &= ~(STA_PPSSIGNAL | STA_PPSJITTER | 752 STA_PPSWANDER | STA_PPSERROR); 753 } 754 ltemp = time_freq + pps_freq; 755 #else 756 ltemp = time_freq; 757 #endif /* PPS_SYNC */ 758 759 if (ltemp < 0) 760 time_adj -= -ltemp >> (SHIFT_USEC - shifthz); 761 else 762 time_adj += ltemp >> (SHIFT_USEC - shifthz); 763 time_adj += (long)fixtick << shifthz; 764 765 /* 766 * When the CPU clock oscillator frequency is not a 767 * power of 2 in Hz, shifthz is only an approximate 768 * scale factor. 769 * 770 * To determine the adjustment, you can do the following: 771 * bc -q 772 * scale=24 773 * obase=2 774 * idealhz/realhz 775 * where `idealhz' is the next higher power of 2, and `realhz' 776 * is the actual value. You may need to factor this result 777 * into a sequence of 2 multipliers to get better precision. 778 * 779 * Likewise, the error can be calculated with (e.g. for 100Hz): 780 * bc -q 781 * scale=24 782 * ((1+2^-2+2^-5)*(1-2^-10)*realhz-idealhz)/idealhz 783 * (and then multiply by 1000000 to get ppm). 784 */ 785 switch (hz) { 786 case 60: 787 /* A factor of 1.000100010001 gives about 15ppm 788 error. */ 789 if (time_adj < 0) { 790 time_adj -= (-time_adj >> 4); 791 time_adj -= (-time_adj >> 8); 792 } else { 793 time_adj += (time_adj >> 4); 794 time_adj += (time_adj >> 8); 795 } 796 break; 797 798 case 96: 799 /* A factor of 1.0101010101 gives about 244ppm error. */ 800 if (time_adj < 0) { 801 time_adj -= (-time_adj >> 2); 802 time_adj -= (-time_adj >> 4) + (-time_adj >> 8); 803 } else { 804 time_adj += (time_adj >> 2); 805 time_adj += (time_adj >> 4) + (time_adj >> 8); 806 } 807 break; 808 809 case 100: 810 /* A factor of 1.010001111010111 gives about 1ppm 811 error. */ 812 if (time_adj < 0) { 813 time_adj -= (-time_adj >> 2) + (-time_adj >> 5); 814 time_adj += (-time_adj >> 10); 815 } else { 816 time_adj += (time_adj >> 2) + (time_adj >> 5); 817 time_adj -= (time_adj >> 10); 818 } 819 break; 820 821 case 1000: 822 /* A factor of 1.000001100010100001 gives about 50ppm 823 error. */ 824 if (time_adj < 0) { 825 time_adj -= (-time_adj >> 6) + (-time_adj >> 11); 826 time_adj -= (-time_adj >> 7); 827 } else { 828 time_adj += (time_adj >> 6) + (time_adj >> 11); 829 time_adj += (time_adj >> 7); 830 } 831 break; 832 833 case 1200: 834 /* A factor of 1.1011010011100001 gives about 64ppm 835 error. */ 836 if (time_adj < 0) { 837 time_adj -= (-time_adj >> 1) + (-time_adj >> 6); 838 time_adj -= (-time_adj >> 3) + (-time_adj >> 10); 839 } else { 840 time_adj += (time_adj >> 1) + (time_adj >> 6); 841 time_adj += (time_adj >> 3) + (time_adj >> 10); 842 } 843 break; 844 } 845 846 #ifdef EXT_CLOCK 847 /* 848 * If an external clock is present, it is necessary to 849 * discipline the kernel time variable anyway, since not 850 * all system components use the microtime() interface. 851 * Here, the time offset between the external clock and 852 * kernel time variable is computed every so often. 853 */ 854 clock_count++; 855 if (clock_count > CLOCK_INTERVAL) { 856 clock_count = 0; 857 microtime(&clock_ext); 858 delta.tv_sec = clock_ext.tv_sec - time.tv_sec; 859 delta.tv_usec = clock_ext.tv_usec - 860 time.tv_usec; 861 if (delta.tv_usec < 0) 862 delta.tv_sec--; 863 if (delta.tv_usec >= 500000) { 864 delta.tv_usec -= 1000000; 865 delta.tv_sec++; 866 } 867 if (delta.tv_usec < -500000) { 868 delta.tv_usec += 1000000; 869 delta.tv_sec--; 870 } 871 if (delta.tv_sec > 0 || (delta.tv_sec == 0 && 872 delta.tv_usec > MAXPHASE) || 873 delta.tv_sec < -1 || (delta.tv_sec == -1 && 874 delta.tv_usec < -MAXPHASE)) { 875 time = clock_ext; 876 delta.tv_sec = 0; 877 delta.tv_usec = 0; 878 } 879 #ifdef HIGHBALL 880 clock_cpu = delta.tv_usec; 881 #else /* HIGHBALL */ 882 hardupdate(delta.tv_usec); 883 #endif /* HIGHBALL */ 884 } 885 #endif /* EXT_CLOCK */ 886 } 887 888 #endif /* NTP */ 889 890 /* 891 * Process callouts at a very low cpu priority, so we don't keep the 892 * relatively high clock interrupt priority any longer than necessary. 893 */ 894 CALLWHEEL_LOCK(s); 895 hardclock_ticks++; 896 if (! TAILQ_EMPTY(&callwheel[hardclock_ticks & callwheelmask].cq_q)) { 897 CALLWHEEL_UNLOCK(s); 898 if (CLKF_BASEPRI(frame)) { 899 /* 900 * Save the overhead of a software interrupt; 901 * it will happen as soon as we return, so do 902 * it now. 903 * 904 * NOTE: If we're at ``base priority'', softclock() 905 * was not already running. 906 */ 907 spllowersoftclock(); 908 KERNEL_LOCK(LK_CANRECURSE|LK_EXCLUSIVE); 909 softclock(NULL); 910 KERNEL_UNLOCK(); 911 } else { 912 #ifdef __HAVE_GENERIC_SOFT_INTERRUPTS 913 softintr_schedule(softclock_si); 914 #else 915 setsoftclock(); 916 #endif 917 } 918 return; 919 } else if (softclock_running == 0 && 920 (softclock_ticks + 1) == hardclock_ticks) { 921 softclock_ticks++; 922 } 923 CALLWHEEL_UNLOCK(s); 924 } 925 926 /* 927 * Software (low priority) clock interrupt. 928 * Run periodic events from timeout queue. 929 */ 930 /*ARGSUSED*/ 931 void 932 softclock(void *v) 933 { 934 struct callout_queue *bucket; 935 struct callout *c; 936 void (*func)(void *); 937 void *arg; 938 int s, idx; 939 int steps = 0; 940 941 CALLWHEEL_LOCK(s); 942 943 softclock_running = 1; 944 945 #ifdef CALLWHEEL_STATS 946 callwheel_softclocks.ev_count++; 947 #endif 948 949 while (softclock_ticks != hardclock_ticks) { 950 softclock_ticks++; 951 idx = (int)(softclock_ticks & callwheelmask); 952 bucket = &callwheel[idx]; 953 c = TAILQ_FIRST(&bucket->cq_q); 954 if (c == NULL) { 955 #ifdef CALLWHEEL_STATS 956 callwheel_softempty.ev_count++; 957 #endif 958 continue; 959 } 960 if (softclock_ticks < bucket->cq_hint) { 961 #ifdef CALLWHEEL_STATS 962 callwheel_hintworked.ev_count++; 963 #endif 964 continue; 965 } 966 bucket->cq_hint = UQUAD_MAX; 967 while (c != NULL) { 968 #ifdef CALLWHEEL_STATS 969 callwheel_softchecks.ev_count++; 970 #endif 971 if (c->c_time != softclock_ticks) { 972 if (c->c_time < bucket->cq_hint) 973 bucket->cq_hint = c->c_time; 974 c = TAILQ_NEXT(c, c_link); 975 if (++steps >= MAX_SOFTCLOCK_STEPS) { 976 nextsoftcheck = c; 977 /* Give interrupts a chance. */ 978 CALLWHEEL_UNLOCK(s); 979 CALLWHEEL_LOCK(s); 980 c = nextsoftcheck; 981 steps = 0; 982 } 983 } else { 984 nextsoftcheck = TAILQ_NEXT(c, c_link); 985 TAILQ_REMOVE(&bucket->cq_q, c, c_link); 986 #ifdef CALLWHEEL_STATS 987 callwheel_sizes[idx]--; 988 callwheel_fired.ev_count++; 989 callwheel_count.ev_count--; 990 #endif 991 func = c->c_func; 992 arg = c->c_arg; 993 c->c_func = NULL; 994 c->c_flags &= ~CALLOUT_PENDING; 995 CALLWHEEL_UNLOCK(s); 996 (*func)(arg); 997 CALLWHEEL_LOCK(s); 998 steps = 0; 999 c = nextsoftcheck; 1000 } 1001 } 1002 if (TAILQ_EMPTY(&bucket->cq_q)) 1003 bucket->cq_hint = UQUAD_MAX; 1004 } 1005 nextsoftcheck = NULL; 1006 softclock_running = 0; 1007 CALLWHEEL_UNLOCK(s); 1008 } 1009 1010 /* 1011 * callout_setsize: 1012 * 1013 * Determine how many callwheels are necessary and 1014 * set hash mask. Called from allocsys(). 1015 */ 1016 void 1017 callout_setsize(void) 1018 { 1019 1020 for (callwheelsize = 1; callwheelsize < ncallout; callwheelsize <<= 1) 1021 /* loop */ ; 1022 callwheelmask = callwheelsize - 1; 1023 } 1024 1025 /* 1026 * callout_startup: 1027 * 1028 * Initialize the callwheel buckets. 1029 */ 1030 void 1031 callout_startup(void) 1032 { 1033 int i; 1034 1035 for (i = 0; i < callwheelsize; i++) { 1036 callwheel[i].cq_hint = UQUAD_MAX; 1037 TAILQ_INIT(&callwheel[i].cq_q); 1038 } 1039 1040 simple_lock_init(&callwheel_slock); 1041 1042 #ifdef CALLWHEEL_STATS 1043 evcnt_attach_dynamic(&callwheel_collisions, EVCNT_TYPE_MISC, 1044 NULL, "callwheel", "collisions"); 1045 evcnt_attach_dynamic(&callwheel_maxlength, EVCNT_TYPE_MISC, 1046 NULL, "callwheel", "maxlength"); 1047 evcnt_attach_dynamic(&callwheel_count, EVCNT_TYPE_MISC, 1048 NULL, "callwheel", "count"); 1049 evcnt_attach_dynamic(&callwheel_established, EVCNT_TYPE_MISC, 1050 NULL, "callwheel", "established"); 1051 evcnt_attach_dynamic(&callwheel_fired, EVCNT_TYPE_MISC, 1052 NULL, "callwheel", "fired"); 1053 evcnt_attach_dynamic(&callwheel_disestablished, EVCNT_TYPE_MISC, 1054 NULL, "callwheel", "disestablished"); 1055 evcnt_attach_dynamic(&callwheel_changed, EVCNT_TYPE_MISC, 1056 NULL, "callwheel", "changed"); 1057 evcnt_attach_dynamic(&callwheel_softclocks, EVCNT_TYPE_MISC, 1058 NULL, "callwheel", "softclocks"); 1059 evcnt_attach_dynamic(&callwheel_softempty, EVCNT_TYPE_MISC, 1060 NULL, "callwheel", "softempty"); 1061 evcnt_attach_dynamic(&callwheel_hintworked, EVCNT_TYPE_MISC, 1062 NULL, "callwheel", "hintworked"); 1063 #endif /* CALLWHEEL_STATS */ 1064 } 1065 1066 /* 1067 * callout_init: 1068 * 1069 * Initialize a callout structure so that it can be used 1070 * by callout_reset() and callout_stop(). 1071 */ 1072 void 1073 callout_init(struct callout *c) 1074 { 1075 1076 memset(c, 0, sizeof(*c)); 1077 } 1078 1079 /* 1080 * callout_reset: 1081 * 1082 * Establish or change a timeout. 1083 */ 1084 void 1085 callout_reset(struct callout *c, int ticks, void (*func)(void *), void *arg) 1086 { 1087 struct callout_queue *bucket; 1088 int s; 1089 1090 if (ticks <= 0) 1091 ticks = 1; 1092 1093 CALLWHEEL_LOCK(s); 1094 1095 /* 1096 * If this callout's timer is already running, cancel it 1097 * before we modify it. 1098 */ 1099 if (c->c_flags & CALLOUT_PENDING) { 1100 callout_stop_locked(c); /* Already locked */ 1101 #ifdef CALLWHEEL_STATS 1102 callwheel_changed.ev_count++; 1103 #endif 1104 } 1105 1106 c->c_arg = arg; 1107 c->c_func = func; 1108 c->c_flags = CALLOUT_ACTIVE | CALLOUT_PENDING; 1109 c->c_time = hardclock_ticks + ticks; 1110 1111 bucket = &callwheel[c->c_time & callwheelmask]; 1112 1113 #ifdef CALLWHEEL_STATS 1114 if (! TAILQ_EMPTY(&bucket->cq_q)) 1115 callwheel_collisions.ev_count++; 1116 #endif 1117 1118 TAILQ_INSERT_TAIL(&bucket->cq_q, c, c_link); 1119 if (c->c_time < bucket->cq_hint) 1120 bucket->cq_hint = c->c_time; 1121 1122 #ifdef CALLWHEEL_STATS 1123 callwheel_count.ev_count++; 1124 callwheel_established.ev_count++; 1125 if (++callwheel_sizes[c->c_time & callwheelmask] > 1126 callwheel_maxlength.ev_count) 1127 callwheel_maxlength.ev_count = 1128 callwheel_sizes[c->c_time & callwheelmask]; 1129 #endif 1130 1131 CALLWHEEL_UNLOCK(s); 1132 } 1133 1134 /* 1135 * callout_stop_locked: 1136 * 1137 * Disestablish a timeout. Callwheel is locked. 1138 */ 1139 static void 1140 callout_stop_locked(struct callout *c) 1141 { 1142 struct callout_queue *bucket; 1143 1144 /* 1145 * Don't attempt to delete a callout that's not on the queue. 1146 */ 1147 if ((c->c_flags & CALLOUT_PENDING) == 0) { 1148 c->c_flags &= ~CALLOUT_ACTIVE; 1149 return; 1150 } 1151 1152 c->c_flags &= ~(CALLOUT_ACTIVE | CALLOUT_PENDING); 1153 1154 if (nextsoftcheck == c) 1155 nextsoftcheck = TAILQ_NEXT(c, c_link); 1156 1157 bucket = &callwheel[c->c_time & callwheelmask]; 1158 TAILQ_REMOVE(&bucket->cq_q, c, c_link); 1159 if (TAILQ_EMPTY(&bucket->cq_q)) 1160 bucket->cq_hint = UQUAD_MAX; 1161 #ifdef CALLWHEEL_STATS 1162 callwheel_count.ev_count--; 1163 callwheel_disestablished.ev_count++; 1164 callwheel_sizes[c->c_time & callwheelmask]--; 1165 #endif 1166 1167 c->c_func = NULL; 1168 } 1169 1170 /* 1171 * callout_stop: 1172 * 1173 * Disestablish a timeout. Callwheel is unlocked. This is 1174 * the standard entry point. 1175 */ 1176 void 1177 callout_stop(struct callout *c) 1178 { 1179 int s; 1180 1181 CALLWHEEL_LOCK(s); 1182 callout_stop_locked(c); 1183 CALLWHEEL_UNLOCK(s); 1184 } 1185 1186 #ifdef CALLWHEEL_STATS 1187 /* 1188 * callout_showstats: 1189 * 1190 * Display callout statistics. Call it from DDB. 1191 */ 1192 void 1193 callout_showstats(void) 1194 { 1195 u_int64_t curticks; 1196 int s; 1197 1198 s = splclock(); 1199 curticks = softclock_ticks; 1200 splx(s); 1201 1202 printf("Callwheel statistics:\n"); 1203 printf("\tCallouts currently queued: %llu\n", 1204 (long long) callwheel_count.ev_count); 1205 printf("\tCallouts established: %llu\n", 1206 (long long) callwheel_established.ev_count); 1207 printf("\tCallouts disestablished: %llu\n", 1208 (long long) callwheel_disestablished.ev_count); 1209 if (callwheel_changed.ev_count != 0) 1210 printf("\t\tOf those, %llu were changes\n", 1211 (long long) callwheel_changed.ev_count); 1212 printf("\tCallouts that fired: %llu\n", 1213 (long long) callwheel_fired.ev_count); 1214 printf("\tNumber of buckets: %d\n", callwheelsize); 1215 printf("\tNumber of hash collisions: %llu\n", 1216 (long long) callwheel_collisions.ev_count); 1217 printf("\tMaximum hash chain length: %llu\n", 1218 (long long) callwheel_maxlength.ev_count); 1219 printf("\tSoftclocks: %llu, Softchecks: %llu\n", 1220 (long long) callwheel_softclocks.ev_count, 1221 (long long) callwheel_softchecks.ev_count); 1222 printf("\t\tEmpty buckets seen: %llu\n", 1223 (long long) callwheel_softempty.ev_count); 1224 printf("\t\tTimes hint saved scan: %llu\n", 1225 (long long) callwheel_hintworked.ev_count); 1226 } 1227 #endif 1228 1229 /* 1230 * Compute number of hz until specified time. Used to compute second 1231 * argument to callout_reset() from an absolute time. 1232 */ 1233 int 1234 hzto(struct timeval *tv) 1235 { 1236 unsigned long ticks; 1237 long sec, usec; 1238 int s; 1239 1240 /* 1241 * If the number of usecs in the whole seconds part of the time 1242 * difference fits in a long, then the total number of usecs will 1243 * fit in an unsigned long. Compute the total and convert it to 1244 * ticks, rounding up and adding 1 to allow for the current tick 1245 * to expire. Rounding also depends on unsigned long arithmetic 1246 * to avoid overflow. 1247 * 1248 * Otherwise, if the number of ticks in the whole seconds part of 1249 * the time difference fits in a long, then convert the parts to 1250 * ticks separately and add, using similar rounding methods and 1251 * overflow avoidance. This method would work in the previous 1252 * case, but it is slightly slower and assume that hz is integral. 1253 * 1254 * Otherwise, round the time difference down to the maximum 1255 * representable value. 1256 * 1257 * If ints are 32-bit, then the maximum value for any timeout in 1258 * 10ms ticks is 248 days. 1259 */ 1260 s = splclock(); 1261 sec = tv->tv_sec - time.tv_sec; 1262 usec = tv->tv_usec - time.tv_usec; 1263 splx(s); 1264 1265 if (usec < 0) { 1266 sec--; 1267 usec += 1000000; 1268 } 1269 1270 if (sec < 0 || (sec == 0 && usec <= 0)) { 1271 /* 1272 * Would expire now or in the past. Return 0 ticks. 1273 * This is different from the legacy hzto() interface, 1274 * and callers need to check for it. 1275 */ 1276 ticks = 0; 1277 } else if (sec <= (LONG_MAX / 1000000)) 1278 ticks = (((sec * 1000000) + (unsigned long)usec + (tick - 1)) 1279 / tick) + 1; 1280 else if (sec <= (LONG_MAX / hz)) 1281 ticks = (sec * hz) + 1282 (((unsigned long)usec + (tick - 1)) / tick) + 1; 1283 else 1284 ticks = LONG_MAX; 1285 1286 if (ticks > INT_MAX) 1287 ticks = INT_MAX; 1288 1289 return ((int)ticks); 1290 } 1291 1292 /* 1293 * Start profiling on a process. 1294 * 1295 * Kernel profiling passes proc0 which never exits and hence 1296 * keeps the profile clock running constantly. 1297 */ 1298 void 1299 startprofclock(struct proc *p) 1300 { 1301 1302 if ((p->p_flag & P_PROFIL) == 0) { 1303 p->p_flag |= P_PROFIL; 1304 /* 1305 * This is only necessary if using the clock as the 1306 * profiling source. 1307 */ 1308 if (++profprocs == 1 && stathz != 0) 1309 psdiv = psratio; 1310 } 1311 } 1312 1313 /* 1314 * Stop profiling on a process. 1315 */ 1316 void 1317 stopprofclock(struct proc *p) 1318 { 1319 1320 if (p->p_flag & P_PROFIL) { 1321 p->p_flag &= ~P_PROFIL; 1322 /* 1323 * This is only necessary if using the clock as the 1324 * profiling source. 1325 */ 1326 if (--profprocs == 0 && stathz != 0) 1327 psdiv = 1; 1328 } 1329 } 1330 1331 #if defined(PERFCTRS) 1332 /* 1333 * Independent profiling "tick" in case we're using a separate 1334 * clock or profiling event source. Currently, that's just 1335 * performance counters--hence the wrapper. 1336 */ 1337 void 1338 proftick(struct clockframe *frame) 1339 { 1340 #ifdef GPROF 1341 struct gmonparam *g; 1342 intptr_t i; 1343 #endif 1344 struct proc *p; 1345 1346 p = curproc; 1347 if (CLKF_USERMODE(frame)) { 1348 if (p->p_flag & P_PROFIL) 1349 addupc_intr(p, CLKF_PC(frame)); 1350 } else { 1351 #ifdef GPROF 1352 g = &_gmonparam; 1353 if (g->state == GMON_PROF_ON) { 1354 i = CLKF_PC(frame) - g->lowpc; 1355 if (i < g->textsize) { 1356 i /= HISTFRACTION * sizeof(*g->kcount); 1357 g->kcount[i]++; 1358 } 1359 } 1360 #endif 1361 #ifdef PROC_PC 1362 if (p && p->p_flag & P_PROFIL) 1363 addupc_intr(p, PROC_PC(p)); 1364 #endif 1365 } 1366 } 1367 #endif 1368 1369 /* 1370 * Statistics clock. Grab profile sample, and if divider reaches 0, 1371 * do process and kernel statistics. 1372 */ 1373 void 1374 statclock(struct clockframe *frame) 1375 { 1376 #ifdef GPROF 1377 struct gmonparam *g; 1378 intptr_t i; 1379 #endif 1380 struct cpu_info *ci = curcpu(); 1381 struct schedstate_percpu *spc = &ci->ci_schedstate; 1382 struct lwp *l; 1383 struct proc *p; 1384 1385 /* 1386 * Notice changes in divisor frequency, and adjust clock 1387 * frequency accordingly. 1388 */ 1389 if (spc->spc_psdiv != psdiv) { 1390 spc->spc_psdiv = psdiv; 1391 spc->spc_pscnt = psdiv; 1392 if (psdiv == 1) { 1393 setstatclockrate(stathz); 1394 } else { 1395 setstatclockrate(profhz); 1396 } 1397 } 1398 l = curlwp; 1399 p = (l ? l->l_proc : 0); 1400 if (CLKF_USERMODE(frame)) { 1401 if (p->p_flag & P_PROFIL && profsrc == PROFSRC_CLOCK) 1402 addupc_intr(p, CLKF_PC(frame)); 1403 if (--spc->spc_pscnt > 0) 1404 return; 1405 /* 1406 * Came from user mode; CPU was in user state. 1407 * If this process is being profiled record the tick. 1408 */ 1409 p->p_uticks++; 1410 if (p->p_nice > NZERO) 1411 spc->spc_cp_time[CP_NICE]++; 1412 else 1413 spc->spc_cp_time[CP_USER]++; 1414 } else { 1415 #ifdef GPROF 1416 /* 1417 * Kernel statistics are just like addupc_intr, only easier. 1418 */ 1419 g = &_gmonparam; 1420 if (profsrc == PROFSRC_CLOCK && g->state == GMON_PROF_ON) { 1421 i = CLKF_PC(frame) - g->lowpc; 1422 if (i < g->textsize) { 1423 i /= HISTFRACTION * sizeof(*g->kcount); 1424 g->kcount[i]++; 1425 } 1426 } 1427 #endif 1428 #ifdef LWP_PC 1429 if (p && profsrc == PROFSRC_CLOCK && p->p_flag & P_PROFIL) 1430 addupc_intr(p, LWP_PC(l)); 1431 #endif 1432 if (--spc->spc_pscnt > 0) 1433 return; 1434 /* 1435 * Came from kernel mode, so we were: 1436 * - handling an interrupt, 1437 * - doing syscall or trap work on behalf of the current 1438 * user process, or 1439 * - spinning in the idle loop. 1440 * Whichever it is, charge the time as appropriate. 1441 * Note that we charge interrupts to the current process, 1442 * regardless of whether they are ``for'' that process, 1443 * so that we know how much of its real time was spent 1444 * in ``non-process'' (i.e., interrupt) work. 1445 */ 1446 if (CLKF_INTR(frame)) { 1447 if (p != NULL) 1448 p->p_iticks++; 1449 spc->spc_cp_time[CP_INTR]++; 1450 } else if (p != NULL) { 1451 p->p_sticks++; 1452 spc->spc_cp_time[CP_SYS]++; 1453 } else 1454 spc->spc_cp_time[CP_IDLE]++; 1455 } 1456 spc->spc_pscnt = psdiv; 1457 1458 if (l != NULL) { 1459 ++p->p_cpticks; 1460 /* 1461 * If no separate schedclock is provided, call it here 1462 * at ~~12-25 Hz, ~~16 Hz is best 1463 */ 1464 if (schedhz == 0) 1465 if ((++ci->ci_schedstate.spc_schedticks & 3) == 0) 1466 schedclock(l); 1467 } 1468 } 1469 1470 1471 #ifdef NTP /* NTP phase-locked loop in kernel */ 1472 1473 /* 1474 * hardupdate() - local clock update 1475 * 1476 * This routine is called by ntp_adjtime() to update the local clock 1477 * phase and frequency. The implementation is of an adaptive-parameter, 1478 * hybrid phase/frequency-lock loop (PLL/FLL). The routine computes new 1479 * time and frequency offset estimates for each call. If the kernel PPS 1480 * discipline code is configured (PPS_SYNC), the PPS signal itself 1481 * determines the new time offset, instead of the calling argument. 1482 * Presumably, calls to ntp_adjtime() occur only when the caller 1483 * believes the local clock is valid within some bound (+-128 ms with 1484 * NTP). If the caller's time is far different than the PPS time, an 1485 * argument will ensue, and it's not clear who will lose. 1486 * 1487 * For uncompensated quartz crystal oscillatores and nominal update 1488 * intervals less than 1024 s, operation should be in phase-lock mode 1489 * (STA_FLL = 0), where the loop is disciplined to phase. For update 1490 * intervals greater than thiss, operation should be in frequency-lock 1491 * mode (STA_FLL = 1), where the loop is disciplined to frequency. 1492 * 1493 * Note: splclock() is in effect. 1494 */ 1495 void 1496 hardupdate(long offset) 1497 { 1498 long ltemp, mtemp; 1499 1500 if (!(time_status & STA_PLL) && !(time_status & STA_PPSTIME)) 1501 return; 1502 ltemp = offset; 1503 #ifdef PPS_SYNC 1504 if (time_status & STA_PPSTIME && time_status & STA_PPSSIGNAL) 1505 ltemp = pps_offset; 1506 #endif /* PPS_SYNC */ 1507 1508 /* 1509 * Scale the phase adjustment and clamp to the operating range. 1510 */ 1511 if (ltemp > MAXPHASE) 1512 time_offset = MAXPHASE << SHIFT_UPDATE; 1513 else if (ltemp < -MAXPHASE) 1514 time_offset = -(MAXPHASE << SHIFT_UPDATE); 1515 else 1516 time_offset = ltemp << SHIFT_UPDATE; 1517 1518 /* 1519 * Select whether the frequency is to be controlled and in which 1520 * mode (PLL or FLL). Clamp to the operating range. Ugly 1521 * multiply/divide should be replaced someday. 1522 */ 1523 if (time_status & STA_FREQHOLD || time_reftime == 0) 1524 time_reftime = time.tv_sec; 1525 mtemp = time.tv_sec - time_reftime; 1526 time_reftime = time.tv_sec; 1527 if (time_status & STA_FLL) { 1528 if (mtemp >= MINSEC) { 1529 ltemp = ((time_offset / mtemp) << (SHIFT_USEC - 1530 SHIFT_UPDATE)); 1531 if (ltemp < 0) 1532 time_freq -= -ltemp >> SHIFT_KH; 1533 else 1534 time_freq += ltemp >> SHIFT_KH; 1535 } 1536 } else { 1537 if (mtemp < MAXSEC) { 1538 ltemp *= mtemp; 1539 if (ltemp < 0) 1540 time_freq -= -ltemp >> (time_constant + 1541 time_constant + SHIFT_KF - 1542 SHIFT_USEC); 1543 else 1544 time_freq += ltemp >> (time_constant + 1545 time_constant + SHIFT_KF - 1546 SHIFT_USEC); 1547 } 1548 } 1549 if (time_freq > time_tolerance) 1550 time_freq = time_tolerance; 1551 else if (time_freq < -time_tolerance) 1552 time_freq = -time_tolerance; 1553 } 1554 1555 #ifdef PPS_SYNC 1556 /* 1557 * hardpps() - discipline CPU clock oscillator to external PPS signal 1558 * 1559 * This routine is called at each PPS interrupt in order to discipline 1560 * the CPU clock oscillator to the PPS signal. It measures the PPS phase 1561 * and leaves it in a handy spot for the hardclock() routine. It 1562 * integrates successive PPS phase differences and calculates the 1563 * frequency offset. This is used in hardclock() to discipline the CPU 1564 * clock oscillator so that intrinsic frequency error is cancelled out. 1565 * The code requires the caller to capture the time and hardware counter 1566 * value at the on-time PPS signal transition. 1567 * 1568 * Note that, on some Unix systems, this routine runs at an interrupt 1569 * priority level higher than the timer interrupt routine hardclock(). 1570 * Therefore, the variables used are distinct from the hardclock() 1571 * variables, except for certain exceptions: The PPS frequency pps_freq 1572 * and phase pps_offset variables are determined by this routine and 1573 * updated atomically. The time_tolerance variable can be considered a 1574 * constant, since it is infrequently changed, and then only when the 1575 * PPS signal is disabled. The watchdog counter pps_valid is updated 1576 * once per second by hardclock() and is atomically cleared in this 1577 * routine. 1578 */ 1579 void 1580 hardpps(struct timeval *tvp, /* time at PPS */ 1581 long usec /* hardware counter at PPS */) 1582 { 1583 long u_usec, v_usec, bigtick; 1584 long cal_sec, cal_usec; 1585 1586 /* 1587 * An occasional glitch can be produced when the PPS interrupt 1588 * occurs in the hardclock() routine before the time variable is 1589 * updated. Here the offset is discarded when the difference 1590 * between it and the last one is greater than tick/2, but not 1591 * if the interval since the first discard exceeds 30 s. 1592 */ 1593 time_status |= STA_PPSSIGNAL; 1594 time_status &= ~(STA_PPSJITTER | STA_PPSWANDER | STA_PPSERROR); 1595 pps_valid = 0; 1596 u_usec = -tvp->tv_usec; 1597 if (u_usec < -500000) 1598 u_usec += 1000000; 1599 v_usec = pps_offset - u_usec; 1600 if (v_usec < 0) 1601 v_usec = -v_usec; 1602 if (v_usec > (tick >> 1)) { 1603 if (pps_glitch > MAXGLITCH) { 1604 pps_glitch = 0; 1605 pps_tf[2] = u_usec; 1606 pps_tf[1] = u_usec; 1607 } else { 1608 pps_glitch++; 1609 u_usec = pps_offset; 1610 } 1611 } else 1612 pps_glitch = 0; 1613 1614 /* 1615 * A three-stage median filter is used to help deglitch the pps 1616 * time. The median sample becomes the time offset estimate; the 1617 * difference between the other two samples becomes the time 1618 * dispersion (jitter) estimate. 1619 */ 1620 pps_tf[2] = pps_tf[1]; 1621 pps_tf[1] = pps_tf[0]; 1622 pps_tf[0] = u_usec; 1623 if (pps_tf[0] > pps_tf[1]) { 1624 if (pps_tf[1] > pps_tf[2]) { 1625 pps_offset = pps_tf[1]; /* 0 1 2 */ 1626 v_usec = pps_tf[0] - pps_tf[2]; 1627 } else if (pps_tf[2] > pps_tf[0]) { 1628 pps_offset = pps_tf[0]; /* 2 0 1 */ 1629 v_usec = pps_tf[2] - pps_tf[1]; 1630 } else { 1631 pps_offset = pps_tf[2]; /* 0 2 1 */ 1632 v_usec = pps_tf[0] - pps_tf[1]; 1633 } 1634 } else { 1635 if (pps_tf[1] < pps_tf[2]) { 1636 pps_offset = pps_tf[1]; /* 2 1 0 */ 1637 v_usec = pps_tf[2] - pps_tf[0]; 1638 } else if (pps_tf[2] < pps_tf[0]) { 1639 pps_offset = pps_tf[0]; /* 1 0 2 */ 1640 v_usec = pps_tf[1] - pps_tf[2]; 1641 } else { 1642 pps_offset = pps_tf[2]; /* 1 2 0 */ 1643 v_usec = pps_tf[1] - pps_tf[0]; 1644 } 1645 } 1646 if (v_usec > MAXTIME) 1647 pps_jitcnt++; 1648 v_usec = (v_usec << PPS_AVG) - pps_jitter; 1649 if (v_usec < 0) 1650 pps_jitter -= -v_usec >> PPS_AVG; 1651 else 1652 pps_jitter += v_usec >> PPS_AVG; 1653 if (pps_jitter > (MAXTIME >> 1)) 1654 time_status |= STA_PPSJITTER; 1655 1656 /* 1657 * During the calibration interval adjust the starting time when 1658 * the tick overflows. At the end of the interval compute the 1659 * duration of the interval and the difference of the hardware 1660 * counters at the beginning and end of the interval. This code 1661 * is deliciously complicated by the fact valid differences may 1662 * exceed the value of tick when using long calibration 1663 * intervals and small ticks. Note that the counter can be 1664 * greater than tick if caught at just the wrong instant, but 1665 * the values returned and used here are correct. 1666 */ 1667 bigtick = (long)tick << SHIFT_USEC; 1668 pps_usec -= pps_freq; 1669 if (pps_usec >= bigtick) 1670 pps_usec -= bigtick; 1671 if (pps_usec < 0) 1672 pps_usec += bigtick; 1673 pps_time.tv_sec++; 1674 pps_count++; 1675 if (pps_count < (1 << pps_shift)) 1676 return; 1677 pps_count = 0; 1678 pps_calcnt++; 1679 u_usec = usec << SHIFT_USEC; 1680 v_usec = pps_usec - u_usec; 1681 if (v_usec >= bigtick >> 1) 1682 v_usec -= bigtick; 1683 if (v_usec < -(bigtick >> 1)) 1684 v_usec += bigtick; 1685 if (v_usec < 0) 1686 v_usec = -(-v_usec >> pps_shift); 1687 else 1688 v_usec = v_usec >> pps_shift; 1689 pps_usec = u_usec; 1690 cal_sec = tvp->tv_sec; 1691 cal_usec = tvp->tv_usec; 1692 cal_sec -= pps_time.tv_sec; 1693 cal_usec -= pps_time.tv_usec; 1694 if (cal_usec < 0) { 1695 cal_usec += 1000000; 1696 cal_sec--; 1697 } 1698 pps_time = *tvp; 1699 1700 /* 1701 * Check for lost interrupts, noise, excessive jitter and 1702 * excessive frequency error. The number of timer ticks during 1703 * the interval may vary +-1 tick. Add to this a margin of one 1704 * tick for the PPS signal jitter and maximum frequency 1705 * deviation. If the limits are exceeded, the calibration 1706 * interval is reset to the minimum and we start over. 1707 */ 1708 u_usec = (long)tick << 1; 1709 if (!((cal_sec == -1 && cal_usec > (1000000 - u_usec)) 1710 || (cal_sec == 0 && cal_usec < u_usec)) 1711 || v_usec > time_tolerance || v_usec < -time_tolerance) { 1712 pps_errcnt++; 1713 pps_shift = PPS_SHIFT; 1714 pps_intcnt = 0; 1715 time_status |= STA_PPSERROR; 1716 return; 1717 } 1718 1719 /* 1720 * A three-stage median filter is used to help deglitch the pps 1721 * frequency. The median sample becomes the frequency offset 1722 * estimate; the difference between the other two samples 1723 * becomes the frequency dispersion (stability) estimate. 1724 */ 1725 pps_ff[2] = pps_ff[1]; 1726 pps_ff[1] = pps_ff[0]; 1727 pps_ff[0] = v_usec; 1728 if (pps_ff[0] > pps_ff[1]) { 1729 if (pps_ff[1] > pps_ff[2]) { 1730 u_usec = pps_ff[1]; /* 0 1 2 */ 1731 v_usec = pps_ff[0] - pps_ff[2]; 1732 } else if (pps_ff[2] > pps_ff[0]) { 1733 u_usec = pps_ff[0]; /* 2 0 1 */ 1734 v_usec = pps_ff[2] - pps_ff[1]; 1735 } else { 1736 u_usec = pps_ff[2]; /* 0 2 1 */ 1737 v_usec = pps_ff[0] - pps_ff[1]; 1738 } 1739 } else { 1740 if (pps_ff[1] < pps_ff[2]) { 1741 u_usec = pps_ff[1]; /* 2 1 0 */ 1742 v_usec = pps_ff[2] - pps_ff[0]; 1743 } else if (pps_ff[2] < pps_ff[0]) { 1744 u_usec = pps_ff[0]; /* 1 0 2 */ 1745 v_usec = pps_ff[1] - pps_ff[2]; 1746 } else { 1747 u_usec = pps_ff[2]; /* 1 2 0 */ 1748 v_usec = pps_ff[1] - pps_ff[0]; 1749 } 1750 } 1751 1752 /* 1753 * Here the frequency dispersion (stability) is updated. If it 1754 * is less than one-fourth the maximum (MAXFREQ), the frequency 1755 * offset is updated as well, but clamped to the tolerance. It 1756 * will be processed later by the hardclock() routine. 1757 */ 1758 v_usec = (v_usec >> 1) - pps_stabil; 1759 if (v_usec < 0) 1760 pps_stabil -= -v_usec >> PPS_AVG; 1761 else 1762 pps_stabil += v_usec >> PPS_AVG; 1763 if (pps_stabil > MAXFREQ >> 2) { 1764 pps_stbcnt++; 1765 time_status |= STA_PPSWANDER; 1766 return; 1767 } 1768 if (time_status & STA_PPSFREQ) { 1769 if (u_usec < 0) { 1770 pps_freq -= -u_usec >> PPS_AVG; 1771 if (pps_freq < -time_tolerance) 1772 pps_freq = -time_tolerance; 1773 u_usec = -u_usec; 1774 } else { 1775 pps_freq += u_usec >> PPS_AVG; 1776 if (pps_freq > time_tolerance) 1777 pps_freq = time_tolerance; 1778 } 1779 } 1780 1781 /* 1782 * Here the calibration interval is adjusted. If the maximum 1783 * time difference is greater than tick / 4, reduce the interval 1784 * by half. If this is not the case for four consecutive 1785 * intervals, double the interval. 1786 */ 1787 if (u_usec << pps_shift > bigtick >> 2) { 1788 pps_intcnt = 0; 1789 if (pps_shift > PPS_SHIFT) 1790 pps_shift--; 1791 } else if (pps_intcnt >= 4) { 1792 pps_intcnt = 0; 1793 if (pps_shift < PPS_SHIFTMAX) 1794 pps_shift++; 1795 } else 1796 pps_intcnt++; 1797 } 1798 #endif /* PPS_SYNC */ 1799 #endif /* NTP */ 1800 1801 /* 1802 * Return information about system clocks. 1803 */ 1804 int 1805 sysctl_clockrate(void *where, size_t *sizep) 1806 { 1807 struct clockinfo clkinfo; 1808 1809 /* 1810 * Construct clockinfo structure. 1811 */ 1812 clkinfo.tick = tick; 1813 clkinfo.tickadj = tickadj; 1814 clkinfo.hz = hz; 1815 clkinfo.profhz = profhz; 1816 clkinfo.stathz = stathz ? stathz : hz; 1817 return (sysctl_rdstruct(where, sizep, NULL, &clkinfo, sizeof(clkinfo))); 1818 } 1819