xref: /netbsd-src/sys/kern/kern_clock.c (revision e5548b402ae4c44fb816de42c7bba9581ce23ef5)
1 /*	$NetBSD: kern_clock.c,v 1.96 2005/12/11 12:24:29 christos Exp $	*/
2 
3 /*-
4  * Copyright (c) 2000, 2004 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
9  * NASA Ames Research Center.
10  * This code is derived from software contributed to The NetBSD Foundation
11  * by Charles M. Hannum.
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  * 3. All advertising materials mentioning features or use of this software
22  *    must display the following acknowledgement:
23  *	This product includes software developed by the NetBSD
24  *	Foundation, Inc. and its contributors.
25  * 4. Neither the name of The NetBSD Foundation nor the names of its
26  *    contributors may be used to endorse or promote products derived
27  *    from this software without specific prior written permission.
28  *
29  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
30  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
31  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
32  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
33  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39  * POSSIBILITY OF SUCH DAMAGE.
40  */
41 
42 /*-
43  * Copyright (c) 1982, 1986, 1991, 1993
44  *	The Regents of the University of California.  All rights reserved.
45  * (c) UNIX System Laboratories, Inc.
46  * All or some portions of this file are derived from material licensed
47  * to the University of California by American Telephone and Telegraph
48  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
49  * the permission of UNIX System Laboratories, Inc.
50  *
51  * Redistribution and use in source and binary forms, with or without
52  * modification, are permitted provided that the following conditions
53  * are met:
54  * 1. Redistributions of source code must retain the above copyright
55  *    notice, this list of conditions and the following disclaimer.
56  * 2. Redistributions in binary form must reproduce the above copyright
57  *    notice, this list of conditions and the following disclaimer in the
58  *    documentation and/or other materials provided with the distribution.
59  * 3. Neither the name of the University nor the names of its contributors
60  *    may be used to endorse or promote products derived from this software
61  *    without specific prior written permission.
62  *
63  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
64  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
65  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
66  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
67  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
68  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
69  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
70  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
71  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
72  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
73  * SUCH DAMAGE.
74  *
75  *	@(#)kern_clock.c	8.5 (Berkeley) 1/21/94
76  */
77 
78 #include <sys/cdefs.h>
79 __KERNEL_RCSID(0, "$NetBSD: kern_clock.c,v 1.96 2005/12/11 12:24:29 christos Exp $");
80 
81 #include "opt_ntp.h"
82 #include "opt_multiprocessor.h"
83 #include "opt_perfctrs.h"
84 
85 #include <sys/param.h>
86 #include <sys/systm.h>
87 #include <sys/callout.h>
88 #include <sys/kernel.h>
89 #include <sys/proc.h>
90 #include <sys/resourcevar.h>
91 #include <sys/signalvar.h>
92 #include <sys/sysctl.h>
93 #include <sys/timex.h>
94 #include <sys/sched.h>
95 #include <sys/time.h>
96 
97 #include <machine/cpu.h>
98 #ifdef __HAVE_GENERIC_SOFT_INTERRUPTS
99 #include <machine/intr.h>
100 #endif
101 
102 #ifdef GPROF
103 #include <sys/gmon.h>
104 #endif
105 
106 /*
107  * Clock handling routines.
108  *
109  * This code is written to operate with two timers that run independently of
110  * each other.  The main clock, running hz times per second, is used to keep
111  * track of real time.  The second timer handles kernel and user profiling,
112  * and does resource use estimation.  If the second timer is programmable,
113  * it is randomized to avoid aliasing between the two clocks.  For example,
114  * the randomization prevents an adversary from always giving up the CPU
115  * just before its quantum expires.  Otherwise, it would never accumulate
116  * CPU ticks.  The mean frequency of the second timer is stathz.
117  *
118  * If no second timer exists, stathz will be zero; in this case we drive
119  * profiling and statistics off the main clock.  This WILL NOT be accurate;
120  * do not do it unless absolutely necessary.
121  *
122  * The statistics clock may (or may not) be run at a higher rate while
123  * profiling.  This profile clock runs at profhz.  We require that profhz
124  * be an integral multiple of stathz.
125  *
126  * If the statistics clock is running fast, it must be divided by the ratio
127  * profhz/stathz for statistics.  (For profiling, every tick counts.)
128  */
129 
130 #ifdef NTP	/* NTP phase-locked loop in kernel */
131 /*
132  * Phase/frequency-lock loop (PLL/FLL) definitions
133  *
134  * The following variables are read and set by the ntp_adjtime() system
135  * call.
136  *
137  * time_state shows the state of the system clock, with values defined
138  * in the timex.h header file.
139  *
140  * time_status shows the status of the system clock, with bits defined
141  * in the timex.h header file.
142  *
143  * time_offset is used by the PLL/FLL to adjust the system time in small
144  * increments.
145  *
146  * time_constant determines the bandwidth or "stiffness" of the PLL.
147  *
148  * time_tolerance determines maximum frequency error or tolerance of the
149  * CPU clock oscillator and is a property of the architecture; however,
150  * in principle it could change as result of the presence of external
151  * discipline signals, for instance.
152  *
153  * time_precision is usually equal to the kernel tick variable; however,
154  * in cases where a precision clock counter or external clock is
155  * available, the resolution can be much less than this and depend on
156  * whether the external clock is working or not.
157  *
158  * time_maxerror is initialized by a ntp_adjtime() call and increased by
159  * the kernel once each second to reflect the maximum error bound
160  * growth.
161  *
162  * time_esterror is set and read by the ntp_adjtime() call, but
163  * otherwise not used by the kernel.
164  */
165 int time_state = TIME_OK;	/* clock state */
166 int time_status = STA_UNSYNC;	/* clock status bits */
167 long time_offset = 0;		/* time offset (us) */
168 long time_constant = 0;		/* pll time constant */
169 long time_tolerance = MAXFREQ;	/* frequency tolerance (scaled ppm) */
170 long time_precision = 1;	/* clock precision (us) */
171 long time_maxerror = MAXPHASE;	/* maximum error (us) */
172 long time_esterror = MAXPHASE;	/* estimated error (us) */
173 
174 /*
175  * The following variables establish the state of the PLL/FLL and the
176  * residual time and frequency offset of the local clock. The scale
177  * factors are defined in the timex.h header file.
178  *
179  * time_phase and time_freq are the phase increment and the frequency
180  * increment, respectively, of the kernel time variable.
181  *
182  * time_freq is set via ntp_adjtime() from a value stored in a file when
183  * the synchronization daemon is first started. Its value is retrieved
184  * via ntp_adjtime() and written to the file about once per hour by the
185  * daemon.
186  *
187  * time_adj is the adjustment added to the value of tick at each timer
188  * interrupt and is recomputed from time_phase and time_freq at each
189  * seconds rollover.
190  *
191  * time_reftime is the second's portion of the system time at the last
192  * call to ntp_adjtime(). It is used to adjust the time_freq variable
193  * and to increase the time_maxerror as the time since last update
194  * increases.
195  */
196 long time_phase = 0;		/* phase offset (scaled us) */
197 long time_freq = 0;		/* frequency offset (scaled ppm) */
198 long time_adj = 0;		/* tick adjust (scaled 1 / hz) */
199 long time_reftime = 0;		/* time at last adjustment (s) */
200 
201 #ifdef PPS_SYNC
202 /*
203  * The following variables are used only if the kernel PPS discipline
204  * code is configured (PPS_SYNC). The scale factors are defined in the
205  * timex.h header file.
206  *
207  * pps_time contains the time at each calibration interval, as read by
208  * microtime(). pps_count counts the seconds of the calibration
209  * interval, the duration of which is nominally pps_shift in powers of
210  * two.
211  *
212  * pps_offset is the time offset produced by the time median filter
213  * pps_tf[], while pps_jitter is the dispersion (jitter) measured by
214  * this filter.
215  *
216  * pps_freq is the frequency offset produced by the frequency median
217  * filter pps_ff[], while pps_stabil is the dispersion (wander) measured
218  * by this filter.
219  *
220  * pps_usec is latched from a high resolution counter or external clock
221  * at pps_time. Here we want the hardware counter contents only, not the
222  * contents plus the time_tv.usec as usual.
223  *
224  * pps_valid counts the number of seconds since the last PPS update. It
225  * is used as a watchdog timer to disable the PPS discipline should the
226  * PPS signal be lost.
227  *
228  * pps_glitch counts the number of seconds since the beginning of an
229  * offset burst more than tick/2 from current nominal offset. It is used
230  * mainly to suppress error bursts due to priority conflicts between the
231  * PPS interrupt and timer interrupt.
232  *
233  * pps_intcnt counts the calibration intervals for use in the interval-
234  * adaptation algorithm. It's just too complicated for words.
235  *
236  * pps_kc_hardpps_source contains an arbitrary value that uniquely
237  * identifies the currently bound source of the PPS signal, or NULL
238  * if no source is bound.
239  *
240  * pps_kc_hardpps_mode indicates which transitions, if any, of the PPS
241  * signal should be reported.
242  */
243 struct timeval pps_time;	/* kernel time at last interval */
244 long pps_tf[] = {0, 0, 0};	/* pps time offset median filter (us) */
245 long pps_offset = 0;		/* pps time offset (us) */
246 long pps_jitter = MAXTIME;	/* time dispersion (jitter) (us) */
247 long pps_ff[] = {0, 0, 0};	/* pps frequency offset median filter */
248 long pps_freq = 0;		/* frequency offset (scaled ppm) */
249 long pps_stabil = MAXFREQ;	/* frequency dispersion (scaled ppm) */
250 long pps_usec = 0;		/* microsec counter at last interval */
251 long pps_valid = PPS_VALID;	/* pps signal watchdog counter */
252 int pps_glitch = 0;		/* pps signal glitch counter */
253 int pps_count = 0;		/* calibration interval counter (s) */
254 int pps_shift = PPS_SHIFT;	/* interval duration (s) (shift) */
255 int pps_intcnt = 0;		/* intervals at current duration */
256 void *pps_kc_hardpps_source = NULL; /* current PPS supplier's identifier */
257 int pps_kc_hardpps_mode = 0;	/* interesting edges of PPS signal */
258 
259 /*
260  * PPS signal quality monitors
261  *
262  * pps_jitcnt counts the seconds that have been discarded because the
263  * jitter measured by the time median filter exceeds the limit MAXTIME
264  * (100 us).
265  *
266  * pps_calcnt counts the frequency calibration intervals, which are
267  * variable from 4 s to 256 s.
268  *
269  * pps_errcnt counts the calibration intervals which have been discarded
270  * because the wander exceeds the limit MAXFREQ (100 ppm) or where the
271  * calibration interval jitter exceeds two ticks.
272  *
273  * pps_stbcnt counts the calibration intervals that have been discarded
274  * because the frequency wander exceeds the limit MAXFREQ / 4 (25 us).
275  */
276 long pps_jitcnt = 0;		/* jitter limit exceeded */
277 long pps_calcnt = 0;		/* calibration intervals */
278 long pps_errcnt = 0;		/* calibration errors */
279 long pps_stbcnt = 0;		/* stability limit exceeded */
280 #endif /* PPS_SYNC */
281 
282 #ifdef EXT_CLOCK
283 /*
284  * External clock definitions
285  *
286  * The following definitions and declarations are used only if an
287  * external clock is configured on the system.
288  */
289 #define CLOCK_INTERVAL 30	/* CPU clock update interval (s) */
290 
291 /*
292  * The clock_count variable is set to CLOCK_INTERVAL at each PPS
293  * interrupt and decremented once each second.
294  */
295 int clock_count = 0;		/* CPU clock counter */
296 
297 #ifdef HIGHBALL
298 /*
299  * The clock_offset and clock_cpu variables are used by the HIGHBALL
300  * interface. The clock_offset variable defines the offset between
301  * system time and the HIGBALL counters. The clock_cpu variable contains
302  * the offset between the system clock and the HIGHBALL clock for use in
303  * disciplining the kernel time variable.
304  */
305 extern struct timeval clock_offset; /* Highball clock offset */
306 long clock_cpu = 0;		/* CPU clock adjust */
307 #endif /* HIGHBALL */
308 #endif /* EXT_CLOCK */
309 #endif /* NTP */
310 
311 
312 /*
313  * Bump a timeval by a small number of usec's.
314  */
315 #define BUMPTIME(t, usec) { \
316 	volatile struct timeval *tp = (t); \
317 	long us; \
318  \
319 	tp->tv_usec = us = tp->tv_usec + (usec); \
320 	if (us >= 1000000) { \
321 		tp->tv_usec = us - 1000000; \
322 		tp->tv_sec++; \
323 	} \
324 }
325 
326 int	stathz;
327 int	profhz;
328 int	profsrc;
329 int	schedhz;
330 int	profprocs;
331 int	hardclock_ticks;
332 static int statscheddiv; /* stat => sched divider (used if schedhz == 0) */
333 static int psdiv;			/* prof => stat divider */
334 int	psratio;			/* ratio: prof / stat */
335 int	tickfix, tickfixinterval;	/* used if tick not really integral */
336 #ifndef NTP
337 static int tickfixcnt;			/* accumulated fractional error */
338 #else
339 int	fixtick;			/* used by NTP for same */
340 int	shifthz;
341 #endif
342 
343 /*
344  * We might want ldd to load the both words from time at once.
345  * To succeed we need to be quadword aligned.
346  * The sparc already does that, and that it has worked so far is a fluke.
347  */
348 volatile struct	timeval time  __attribute__((__aligned__(__alignof__(quad_t))));
349 volatile struct	timeval mono_time;
350 
351 void	*softclock_si;
352 
353 /*
354  * Initialize clock frequencies and start both clocks running.
355  */
356 void
357 initclocks(void)
358 {
359 	int i;
360 
361 #ifdef __HAVE_GENERIC_SOFT_INTERRUPTS
362 	softclock_si = softintr_establish(IPL_SOFTCLOCK, softclock, NULL);
363 	if (softclock_si == NULL)
364 		panic("initclocks: unable to register softclock intr");
365 #endif
366 
367 	/*
368 	 * Set divisors to 1 (normal case) and let the machine-specific
369 	 * code do its bit.
370 	 */
371 	psdiv = 1;
372 	cpu_initclocks();
373 
374 	/*
375 	 * Compute profhz/stathz/rrticks, and fix profhz if needed.
376 	 */
377 	i = stathz ? stathz : hz;
378 	if (profhz == 0)
379 		profhz = i;
380 	psratio = profhz / i;
381 	rrticks = hz / 10;
382 	if (schedhz == 0) {
383 		/* 16Hz is best */
384 		statscheddiv = i / 16;
385 		if (statscheddiv <= 0)
386 			panic("statscheddiv");
387 	}
388 
389 #ifdef NTP
390 	switch (hz) {
391 	case 1:
392 		shifthz = SHIFT_SCALE - 0;
393 		break;
394 	case 2:
395 		shifthz = SHIFT_SCALE - 1;
396 		break;
397 	case 4:
398 		shifthz = SHIFT_SCALE - 2;
399 		break;
400 	case 8:
401 		shifthz = SHIFT_SCALE - 3;
402 		break;
403 	case 16:
404 		shifthz = SHIFT_SCALE - 4;
405 		break;
406 	case 32:
407 		shifthz = SHIFT_SCALE - 5;
408 		break;
409 	case 50:
410 	case 60:
411 	case 64:
412 		shifthz = SHIFT_SCALE - 6;
413 		break;
414 	case 96:
415 	case 100:
416 	case 128:
417 		shifthz = SHIFT_SCALE - 7;
418 		break;
419 	case 256:
420 		shifthz = SHIFT_SCALE - 8;
421 		break;
422 	case 512:
423 		shifthz = SHIFT_SCALE - 9;
424 		break;
425 	case 1000:
426 	case 1024:
427 		shifthz = SHIFT_SCALE - 10;
428 		break;
429 	case 1200:
430 	case 2048:
431 		shifthz = SHIFT_SCALE - 11;
432 		break;
433 	case 4096:
434 		shifthz = SHIFT_SCALE - 12;
435 		break;
436 	case 8192:
437 		shifthz = SHIFT_SCALE - 13;
438 		break;
439 	case 16384:
440 		shifthz = SHIFT_SCALE - 14;
441 		break;
442 	case 32768:
443 		shifthz = SHIFT_SCALE - 15;
444 		break;
445 	case 65536:
446 		shifthz = SHIFT_SCALE - 16;
447 		break;
448 	default:
449 		panic("weird hz");
450 	}
451 	if (fixtick == 0) {
452 		/*
453 		 * Give MD code a chance to set this to a better
454 		 * value; but, if it doesn't, we should.
455 		 */
456 		fixtick = (1000000 - (hz*tick));
457 	}
458 #endif
459 }
460 
461 /*
462  * The real-time timer, interrupting hz times per second.
463  */
464 void
465 hardclock(struct clockframe *frame)
466 {
467 	struct lwp *l;
468 	struct proc *p;
469 	int delta;
470 	extern int tickdelta;
471 	extern long timedelta;
472 	struct cpu_info *ci = curcpu();
473 	struct ptimer *pt;
474 #ifdef NTP
475 	int time_update;
476 	int ltemp;
477 #endif
478 
479 	l = curlwp;
480 	if (l) {
481 		p = l->l_proc;
482 		/*
483 		 * Run current process's virtual and profile time, as needed.
484 		 */
485 		if (CLKF_USERMODE(frame) && p->p_timers &&
486 		    (pt = LIST_FIRST(&p->p_timers->pts_virtual)) != NULL)
487 			if (itimerdecr(pt, tick) == 0)
488 				itimerfire(pt);
489 		if (p->p_timers &&
490 		    (pt = LIST_FIRST(&p->p_timers->pts_prof)) != NULL)
491 			if (itimerdecr(pt, tick) == 0)
492 				itimerfire(pt);
493 	}
494 
495 	/*
496 	 * If no separate statistics clock is available, run it from here.
497 	 */
498 	if (stathz == 0)
499 		statclock(frame);
500 	if ((--ci->ci_schedstate.spc_rrticks) <= 0)
501 		roundrobin(ci);
502 
503 #if defined(MULTIPROCESSOR)
504 	/*
505 	 * If we are not the primary CPU, we're not allowed to do
506 	 * any more work.
507 	 */
508 	if (CPU_IS_PRIMARY(ci) == 0)
509 		return;
510 #endif
511 
512 	/*
513 	 * Increment the time-of-day.  The increment is normally just
514 	 * ``tick''.  If the machine is one which has a clock frequency
515 	 * such that ``hz'' would not divide the second evenly into
516 	 * milliseconds, a periodic adjustment must be applied.  Finally,
517 	 * if we are still adjusting the time (see adjtime()),
518 	 * ``tickdelta'' may also be added in.
519 	 */
520 	hardclock_ticks++;
521 	delta = tick;
522 
523 #ifndef NTP
524 	if (tickfix) {
525 		tickfixcnt += tickfix;
526 		if (tickfixcnt >= tickfixinterval) {
527 			delta++;
528 			tickfixcnt -= tickfixinterval;
529 		}
530 	}
531 #endif /* !NTP */
532 	/* Imprecise 4bsd adjtime() handling */
533 	if (timedelta != 0) {
534 		delta += tickdelta;
535 		timedelta -= tickdelta;
536 	}
537 
538 #ifdef notyet
539 	microset();
540 #endif
541 
542 #ifndef NTP
543 	BUMPTIME(&time, delta);		/* XXX Now done using NTP code below */
544 #endif
545 	BUMPTIME(&mono_time, delta);
546 
547 #ifdef NTP
548 	time_update = delta;
549 
550 	/*
551 	 * Compute the phase adjustment. If the low-order bits
552 	 * (time_phase) of the update overflow, bump the high-order bits
553 	 * (time_update).
554 	 */
555 	time_phase += time_adj;
556 	if (time_phase <= -FINEUSEC) {
557 		ltemp = -time_phase >> SHIFT_SCALE;
558 		time_phase += ltemp << SHIFT_SCALE;
559 		time_update -= ltemp;
560 	} else if (time_phase >= FINEUSEC) {
561 		ltemp = time_phase >> SHIFT_SCALE;
562 		time_phase -= ltemp << SHIFT_SCALE;
563 		time_update += ltemp;
564 	}
565 
566 #ifdef HIGHBALL
567 	/*
568 	 * If the HIGHBALL board is installed, we need to adjust the
569 	 * external clock offset in order to close the hardware feedback
570 	 * loop. This will adjust the external clock phase and frequency
571 	 * in small amounts. The additional phase noise and frequency
572 	 * wander this causes should be minimal. We also need to
573 	 * discipline the kernel time variable, since the PLL is used to
574 	 * discipline the external clock. If the Highball board is not
575 	 * present, we discipline kernel time with the PLL as usual. We
576 	 * assume that the external clock phase adjustment (time_update)
577 	 * and kernel phase adjustment (clock_cpu) are less than the
578 	 * value of tick.
579 	 */
580 	clock_offset.tv_usec += time_update;
581 	if (clock_offset.tv_usec >= 1000000) {
582 		clock_offset.tv_sec++;
583 		clock_offset.tv_usec -= 1000000;
584 	}
585 	if (clock_offset.tv_usec < 0) {
586 		clock_offset.tv_sec--;
587 		clock_offset.tv_usec += 1000000;
588 	}
589 	time.tv_usec += clock_cpu;
590 	clock_cpu = 0;
591 #else
592 	time.tv_usec += time_update;
593 #endif /* HIGHBALL */
594 
595 	/*
596 	 * On rollover of the second the phase adjustment to be used for
597 	 * the next second is calculated. Also, the maximum error is
598 	 * increased by the tolerance. If the PPS frequency discipline
599 	 * code is present, the phase is increased to compensate for the
600 	 * CPU clock oscillator frequency error.
601 	 *
602  	 * On a 32-bit machine and given parameters in the timex.h
603 	 * header file, the maximum phase adjustment is +-512 ms and
604 	 * maximum frequency offset is a tad less than) +-512 ppm. On a
605 	 * 64-bit machine, you shouldn't need to ask.
606 	 */
607 	if (time.tv_usec >= 1000000) {
608 		time.tv_usec -= 1000000;
609 		time.tv_sec++;
610 		time_maxerror += time_tolerance >> SHIFT_USEC;
611 
612 		/*
613 		 * Leap second processing. If in leap-insert state at
614 		 * the end of the day, the system clock is set back one
615 		 * second; if in leap-delete state, the system clock is
616 		 * set ahead one second. The microtime() routine or
617 		 * external clock driver will insure that reported time
618 		 * is always monotonic. The ugly divides should be
619 		 * replaced.
620 		 */
621 		switch (time_state) {
622 		case TIME_OK:
623 			if (time_status & STA_INS)
624 				time_state = TIME_INS;
625 			else if (time_status & STA_DEL)
626 				time_state = TIME_DEL;
627 			break;
628 
629 		case TIME_INS:
630 			if (time.tv_sec % 86400 == 0) {
631 				time.tv_sec--;
632 				time_state = TIME_OOP;
633 			}
634 			break;
635 
636 		case TIME_DEL:
637 			if ((time.tv_sec + 1) % 86400 == 0) {
638 				time.tv_sec++;
639 				time_state = TIME_WAIT;
640 			}
641 			break;
642 
643 		case TIME_OOP:
644 			time_state = TIME_WAIT;
645 			break;
646 
647 		case TIME_WAIT:
648 			if (!(time_status & (STA_INS | STA_DEL)))
649 				time_state = TIME_OK;
650 			break;
651 		}
652 
653 		/*
654 		 * Compute the phase adjustment for the next second. In
655 		 * PLL mode, the offset is reduced by a fixed factor
656 		 * times the time constant. In FLL mode the offset is
657 		 * used directly. In either mode, the maximum phase
658 		 * adjustment for each second is clamped so as to spread
659 		 * the adjustment over not more than the number of
660 		 * seconds between updates.
661 		 */
662 		if (time_offset < 0) {
663 			ltemp = -time_offset;
664 			if (!(time_status & STA_FLL))
665 				ltemp >>= SHIFT_KG + time_constant;
666 			if (ltemp > (MAXPHASE / MINSEC) << SHIFT_UPDATE)
667 				ltemp = (MAXPHASE / MINSEC) <<
668 				    SHIFT_UPDATE;
669 			time_offset += ltemp;
670 			time_adj = -ltemp << (shifthz - SHIFT_UPDATE);
671 		} else if (time_offset > 0) {
672 			ltemp = time_offset;
673 			if (!(time_status & STA_FLL))
674 				ltemp >>= SHIFT_KG + time_constant;
675 			if (ltemp > (MAXPHASE / MINSEC) << SHIFT_UPDATE)
676 				ltemp = (MAXPHASE / MINSEC) <<
677 				    SHIFT_UPDATE;
678 			time_offset -= ltemp;
679 			time_adj = ltemp << (shifthz - SHIFT_UPDATE);
680 		} else
681 			time_adj = 0;
682 
683 		/*
684 		 * Compute the frequency estimate and additional phase
685 		 * adjustment due to frequency error for the next
686 		 * second. When the PPS signal is engaged, gnaw on the
687 		 * watchdog counter and update the frequency computed by
688 		 * the pll and the PPS signal.
689 		 */
690 #ifdef PPS_SYNC
691 		pps_valid++;
692 		if (pps_valid == PPS_VALID) {
693 			pps_jitter = MAXTIME;
694 			pps_stabil = MAXFREQ;
695 			time_status &= ~(STA_PPSSIGNAL | STA_PPSJITTER |
696 			    STA_PPSWANDER | STA_PPSERROR);
697 		}
698 		ltemp = time_freq + pps_freq;
699 #else
700 		ltemp = time_freq;
701 #endif /* PPS_SYNC */
702 
703 		if (ltemp < 0)
704 			time_adj -= -ltemp >> (SHIFT_USEC - shifthz);
705 		else
706 			time_adj += ltemp >> (SHIFT_USEC - shifthz);
707 		time_adj += (long)fixtick << shifthz;
708 
709 		/*
710 		 * When the CPU clock oscillator frequency is not a
711 		 * power of 2 in Hz, shifthz is only an approximate
712 		 * scale factor.
713 		 *
714 		 * To determine the adjustment, you can do the following:
715 		 *   bc -q
716 		 *   scale=24
717 		 *   obase=2
718 		 *   idealhz/realhz
719 		 * where `idealhz' is the next higher power of 2, and `realhz'
720 		 * is the actual value.  You may need to factor this result
721 		 * into a sequence of 2 multipliers to get better precision.
722 		 *
723 		 * Likewise, the error can be calculated with (e.g. for 100Hz):
724 		 *   bc -q
725 		 *   scale=24
726 		 *   ((1+2^-2+2^-5)*(1-2^-10)*realhz-idealhz)/idealhz
727 		 * (and then multiply by 1000000 to get ppm).
728 		 */
729 		switch (hz) {
730 		case 60:
731 			/* A factor of 1.000100010001 gives about 15ppm
732 			   error. */
733 			if (time_adj < 0) {
734 				time_adj -= (-time_adj >> 4);
735 				time_adj -= (-time_adj >> 8);
736 			} else {
737 				time_adj += (time_adj >> 4);
738 				time_adj += (time_adj >> 8);
739 			}
740 			break;
741 
742 		case 96:
743 			/* A factor of 1.0101010101 gives about 244ppm error. */
744 			if (time_adj < 0) {
745 				time_adj -= (-time_adj >> 2);
746 				time_adj -= (-time_adj >> 4) + (-time_adj >> 8);
747 			} else {
748 				time_adj += (time_adj >> 2);
749 				time_adj += (time_adj >> 4) + (time_adj >> 8);
750 			}
751 			break;
752 
753 		case 50:
754 		case 100:
755 			/* A factor of 1.010001111010111 gives about 1ppm
756 			   error. */
757 			if (time_adj < 0) {
758 				time_adj -= (-time_adj >> 2) + (-time_adj >> 5);
759 				time_adj += (-time_adj >> 10);
760 			} else {
761 				time_adj += (time_adj >> 2) + (time_adj >> 5);
762 				time_adj -= (time_adj >> 10);
763 			}
764 			break;
765 
766 		case 1000:
767 			/* A factor of 1.000001100010100001 gives about 50ppm
768 			   error. */
769 			if (time_adj < 0) {
770 				time_adj -= (-time_adj >> 6) + (-time_adj >> 11);
771 				time_adj -= (-time_adj >> 7);
772 			} else {
773 				time_adj += (time_adj >> 6) + (time_adj >> 11);
774 				time_adj += (time_adj >> 7);
775 			}
776 			break;
777 
778 		case 1200:
779 			/* A factor of 1.1011010011100001 gives about 64ppm
780 			   error. */
781 			if (time_adj < 0) {
782 				time_adj -= (-time_adj >> 1) + (-time_adj >> 6);
783 				time_adj -= (-time_adj >> 3) + (-time_adj >> 10);
784 			} else {
785 				time_adj += (time_adj >> 1) + (time_adj >> 6);
786 				time_adj += (time_adj >> 3) + (time_adj >> 10);
787 			}
788 			break;
789 		}
790 
791 #ifdef EXT_CLOCK
792 		/*
793 		 * If an external clock is present, it is necessary to
794 		 * discipline the kernel time variable anyway, since not
795 		 * all system components use the microtime() interface.
796 		 * Here, the time offset between the external clock and
797 		 * kernel time variable is computed every so often.
798 		 */
799 		clock_count++;
800 		if (clock_count > CLOCK_INTERVAL) {
801 			clock_count = 0;
802 			microtime(&clock_ext);
803 			delta.tv_sec = clock_ext.tv_sec - time.tv_sec;
804 			delta.tv_usec = clock_ext.tv_usec -
805 			    time.tv_usec;
806 			if (delta.tv_usec < 0)
807 				delta.tv_sec--;
808 			if (delta.tv_usec >= 500000) {
809 				delta.tv_usec -= 1000000;
810 				delta.tv_sec++;
811 			}
812 			if (delta.tv_usec < -500000) {
813 				delta.tv_usec += 1000000;
814 				delta.tv_sec--;
815 			}
816 			if (delta.tv_sec > 0 || (delta.tv_sec == 0 &&
817 			    delta.tv_usec > MAXPHASE) ||
818 			    delta.tv_sec < -1 || (delta.tv_sec == -1 &&
819 			    delta.tv_usec < -MAXPHASE)) {
820 				time = clock_ext;
821 				delta.tv_sec = 0;
822 				delta.tv_usec = 0;
823 			}
824 #ifdef HIGHBALL
825 			clock_cpu = delta.tv_usec;
826 #else /* HIGHBALL */
827 			hardupdate(delta.tv_usec);
828 #endif /* HIGHBALL */
829 		}
830 #endif /* EXT_CLOCK */
831 	}
832 
833 #endif /* NTP */
834 
835 	/*
836 	 * Update real-time timeout queue.
837 	 * Process callouts at a very low CPU priority, so we don't keep the
838 	 * relatively high clock interrupt priority any longer than necessary.
839 	 */
840 	if (callout_hardclock()) {
841 		if (CLKF_BASEPRI(frame)) {
842 			/*
843 			 * Save the overhead of a software interrupt;
844 			 * it will happen as soon as we return, so do
845 			 * it now.
846 			 */
847 			spllowersoftclock();
848 			KERNEL_LOCK(LK_CANRECURSE|LK_EXCLUSIVE);
849 			softclock(NULL);
850 			KERNEL_UNLOCK();
851 		} else {
852 #ifdef __HAVE_GENERIC_SOFT_INTERRUPTS
853 			softintr_schedule(softclock_si);
854 #else
855 			setsoftclock();
856 #endif
857 		}
858 	}
859 }
860 
861 /*
862  * Compute number of hz until specified time.  Used to compute second
863  * argument to callout_reset() from an absolute time.
864  */
865 int
866 hzto(struct timeval *tv)
867 {
868 	unsigned long ticks;
869 	long sec, usec;
870 	int s;
871 
872 	/*
873 	 * If the number of usecs in the whole seconds part of the time
874 	 * difference fits in a long, then the total number of usecs will
875 	 * fit in an unsigned long.  Compute the total and convert it to
876 	 * ticks, rounding up and adding 1 to allow for the current tick
877 	 * to expire.  Rounding also depends on unsigned long arithmetic
878 	 * to avoid overflow.
879 	 *
880 	 * Otherwise, if the number of ticks in the whole seconds part of
881 	 * the time difference fits in a long, then convert the parts to
882 	 * ticks separately and add, using similar rounding methods and
883 	 * overflow avoidance.  This method would work in the previous
884 	 * case, but it is slightly slower and assume that hz is integral.
885 	 *
886 	 * Otherwise, round the time difference down to the maximum
887 	 * representable value.
888 	 *
889 	 * If ints are 32-bit, then the maximum value for any timeout in
890 	 * 10ms ticks is 248 days.
891 	 */
892 	s = splclock();
893 	sec = tv->tv_sec - time.tv_sec;
894 	usec = tv->tv_usec - time.tv_usec;
895 	splx(s);
896 
897 	if (usec < 0) {
898 		sec--;
899 		usec += 1000000;
900 	}
901 
902 	if (sec < 0 || (sec == 0 && usec <= 0)) {
903 		/*
904 		 * Would expire now or in the past.  Return 0 ticks.
905 		 * This is different from the legacy hzto() interface,
906 		 * and callers need to check for it.
907 		 */
908 		ticks = 0;
909 	} else if (sec <= (LONG_MAX / 1000000))
910 		ticks = (((sec * 1000000) + (unsigned long)usec + (tick - 1))
911 		    / tick) + 1;
912 	else if (sec <= (LONG_MAX / hz))
913 		ticks = (sec * hz) +
914 		    (((unsigned long)usec + (tick - 1)) / tick) + 1;
915 	else
916 		ticks = LONG_MAX;
917 
918 	if (ticks > INT_MAX)
919 		ticks = INT_MAX;
920 
921 	return ((int)ticks);
922 }
923 
924 /*
925  * Start profiling on a process.
926  *
927  * Kernel profiling passes proc0 which never exits and hence
928  * keeps the profile clock running constantly.
929  */
930 void
931 startprofclock(struct proc *p)
932 {
933 
934 	if ((p->p_flag & P_PROFIL) == 0) {
935 		p->p_flag |= P_PROFIL;
936 		/*
937 		 * This is only necessary if using the clock as the
938 		 * profiling source.
939 		 */
940 		if (++profprocs == 1 && stathz != 0)
941 			psdiv = psratio;
942 	}
943 }
944 
945 /*
946  * Stop profiling on a process.
947  */
948 void
949 stopprofclock(struct proc *p)
950 {
951 
952 	if (p->p_flag & P_PROFIL) {
953 		p->p_flag &= ~P_PROFIL;
954 		/*
955 		 * This is only necessary if using the clock as the
956 		 * profiling source.
957 		 */
958 		if (--profprocs == 0 && stathz != 0)
959 			psdiv = 1;
960 	}
961 }
962 
963 #if defined(PERFCTRS)
964 /*
965  * Independent profiling "tick" in case we're using a separate
966  * clock or profiling event source.  Currently, that's just
967  * performance counters--hence the wrapper.
968  */
969 void
970 proftick(struct clockframe *frame)
971 {
972 #ifdef GPROF
973         struct gmonparam *g;
974         intptr_t i;
975 #endif
976 	struct proc *p;
977 
978 	p = curproc;
979 	if (CLKF_USERMODE(frame)) {
980 		if (p->p_flag & P_PROFIL)
981 			addupc_intr(p, CLKF_PC(frame));
982 	} else {
983 #ifdef GPROF
984 		g = &_gmonparam;
985 		if (g->state == GMON_PROF_ON) {
986 			i = CLKF_PC(frame) - g->lowpc;
987 			if (i < g->textsize) {
988 				i /= HISTFRACTION * sizeof(*g->kcount);
989 				g->kcount[i]++;
990 			}
991 		}
992 #endif
993 #ifdef PROC_PC
994                 if (p && p->p_flag & P_PROFIL)
995                         addupc_intr(p, PROC_PC(p));
996 #endif
997 	}
998 }
999 #endif
1000 
1001 /*
1002  * Statistics clock.  Grab profile sample, and if divider reaches 0,
1003  * do process and kernel statistics.
1004  */
1005 void
1006 statclock(struct clockframe *frame)
1007 {
1008 #ifdef GPROF
1009 	struct gmonparam *g;
1010 	intptr_t i;
1011 #endif
1012 	struct cpu_info *ci = curcpu();
1013 	struct schedstate_percpu *spc = &ci->ci_schedstate;
1014 	struct lwp *l;
1015 	struct proc *p;
1016 
1017 	/*
1018 	 * Notice changes in divisor frequency, and adjust clock
1019 	 * frequency accordingly.
1020 	 */
1021 	if (spc->spc_psdiv != psdiv) {
1022 		spc->spc_psdiv = psdiv;
1023 		spc->spc_pscnt = psdiv;
1024 		if (psdiv == 1) {
1025 			setstatclockrate(stathz);
1026 		} else {
1027 			setstatclockrate(profhz);
1028 		}
1029 	}
1030 	l = curlwp;
1031 	p = (l ? l->l_proc : 0);
1032 	if (CLKF_USERMODE(frame)) {
1033 		if (p->p_flag & P_PROFIL && profsrc == PROFSRC_CLOCK)
1034 			addupc_intr(p, CLKF_PC(frame));
1035 		if (--spc->spc_pscnt > 0)
1036 			return;
1037 		/*
1038 		 * Came from user mode; CPU was in user state.
1039 		 * If this process is being profiled record the tick.
1040 		 */
1041 		p->p_uticks++;
1042 		if (p->p_nice > NZERO)
1043 			spc->spc_cp_time[CP_NICE]++;
1044 		else
1045 			spc->spc_cp_time[CP_USER]++;
1046 	} else {
1047 #ifdef GPROF
1048 		/*
1049 		 * Kernel statistics are just like addupc_intr, only easier.
1050 		 */
1051 		g = &_gmonparam;
1052 		if (profsrc == PROFSRC_CLOCK && g->state == GMON_PROF_ON) {
1053 			i = CLKF_PC(frame) - g->lowpc;
1054 			if (i < g->textsize) {
1055 				i /= HISTFRACTION * sizeof(*g->kcount);
1056 				g->kcount[i]++;
1057 			}
1058 		}
1059 #endif
1060 #ifdef LWP_PC
1061 		if (p && profsrc == PROFSRC_CLOCK && p->p_flag & P_PROFIL)
1062 			addupc_intr(p, LWP_PC(l));
1063 #endif
1064 		if (--spc->spc_pscnt > 0)
1065 			return;
1066 		/*
1067 		 * Came from kernel mode, so we were:
1068 		 * - handling an interrupt,
1069 		 * - doing syscall or trap work on behalf of the current
1070 		 *   user process, or
1071 		 * - spinning in the idle loop.
1072 		 * Whichever it is, charge the time as appropriate.
1073 		 * Note that we charge interrupts to the current process,
1074 		 * regardless of whether they are ``for'' that process,
1075 		 * so that we know how much of its real time was spent
1076 		 * in ``non-process'' (i.e., interrupt) work.
1077 		 */
1078 		if (CLKF_INTR(frame)) {
1079 			if (p != NULL)
1080 				p->p_iticks++;
1081 			spc->spc_cp_time[CP_INTR]++;
1082 		} else if (p != NULL) {
1083 			p->p_sticks++;
1084 			spc->spc_cp_time[CP_SYS]++;
1085 		} else
1086 			spc->spc_cp_time[CP_IDLE]++;
1087 	}
1088 	spc->spc_pscnt = psdiv;
1089 
1090 	if (l != NULL) {
1091 		++p->p_cpticks;
1092 		/*
1093 		 * If no separate schedclock is provided, call it here
1094 		 * at about 16 Hz.
1095 		 */
1096 		if (schedhz == 0)
1097 			if ((int)(--ci->ci_schedstate.spc_schedticks) <= 0) {
1098 				schedclock(l);
1099 				ci->ci_schedstate.spc_schedticks = statscheddiv;
1100 			}
1101 	}
1102 }
1103 
1104 
1105 #ifdef NTP	/* NTP phase-locked loop in kernel */
1106 
1107 /*
1108  * hardupdate() - local clock update
1109  *
1110  * This routine is called by ntp_adjtime() to update the local clock
1111  * phase and frequency. The implementation is of an adaptive-parameter,
1112  * hybrid phase/frequency-lock loop (PLL/FLL). The routine computes new
1113  * time and frequency offset estimates for each call. If the kernel PPS
1114  * discipline code is configured (PPS_SYNC), the PPS signal itself
1115  * determines the new time offset, instead of the calling argument.
1116  * Presumably, calls to ntp_adjtime() occur only when the caller
1117  * believes the local clock is valid within some bound (+-128 ms with
1118  * NTP). If the caller's time is far different than the PPS time, an
1119  * argument will ensue, and it's not clear who will lose.
1120  *
1121  * For uncompensated quartz crystal oscillatores and nominal update
1122  * intervals less than 1024 s, operation should be in phase-lock mode
1123  * (STA_FLL = 0), where the loop is disciplined to phase. For update
1124  * intervals greater than thiss, operation should be in frequency-lock
1125  * mode (STA_FLL = 1), where the loop is disciplined to frequency.
1126  *
1127  * Note: splclock() is in effect.
1128  */
1129 void
1130 hardupdate(long offset)
1131 {
1132 	long ltemp, mtemp;
1133 
1134 	if (!(time_status & STA_PLL) && !(time_status & STA_PPSTIME))
1135 		return;
1136 	ltemp = offset;
1137 #ifdef PPS_SYNC
1138 	if (time_status & STA_PPSTIME && time_status & STA_PPSSIGNAL)
1139 		ltemp = pps_offset;
1140 #endif /* PPS_SYNC */
1141 
1142 	/*
1143 	 * Scale the phase adjustment and clamp to the operating range.
1144 	 */
1145 	if (ltemp > MAXPHASE)
1146 		time_offset = MAXPHASE << SHIFT_UPDATE;
1147 	else if (ltemp < -MAXPHASE)
1148 		time_offset = -(MAXPHASE << SHIFT_UPDATE);
1149 	else
1150 		time_offset = ltemp << SHIFT_UPDATE;
1151 
1152 	/*
1153 	 * Select whether the frequency is to be controlled and in which
1154 	 * mode (PLL or FLL). Clamp to the operating range. Ugly
1155 	 * multiply/divide should be replaced someday.
1156 	 */
1157 	if (time_status & STA_FREQHOLD || time_reftime == 0)
1158 		time_reftime = time.tv_sec;
1159 	mtemp = time.tv_sec - time_reftime;
1160 	time_reftime = time.tv_sec;
1161 	if (time_status & STA_FLL) {
1162 		if (mtemp >= MINSEC) {
1163 			ltemp = ((time_offset / mtemp) << (SHIFT_USEC -
1164 			    SHIFT_UPDATE));
1165 			if (ltemp < 0)
1166 				time_freq -= -ltemp >> SHIFT_KH;
1167 			else
1168 				time_freq += ltemp >> SHIFT_KH;
1169 		}
1170 	} else {
1171 		if (mtemp < MAXSEC) {
1172 			ltemp *= mtemp;
1173 			if (ltemp < 0)
1174 				time_freq -= -ltemp >> (time_constant +
1175 				    time_constant + SHIFT_KF -
1176 				    SHIFT_USEC);
1177 			else
1178 				time_freq += ltemp >> (time_constant +
1179 				    time_constant + SHIFT_KF -
1180 				    SHIFT_USEC);
1181 		}
1182 	}
1183 	if (time_freq > time_tolerance)
1184 		time_freq = time_tolerance;
1185 	else if (time_freq < -time_tolerance)
1186 		time_freq = -time_tolerance;
1187 }
1188 
1189 #ifdef PPS_SYNC
1190 /*
1191  * hardpps() - discipline CPU clock oscillator to external PPS signal
1192  *
1193  * This routine is called at each PPS interrupt in order to discipline
1194  * the CPU clock oscillator to the PPS signal. It measures the PPS phase
1195  * and leaves it in a handy spot for the hardclock() routine. It
1196  * integrates successive PPS phase differences and calculates the
1197  * frequency offset. This is used in hardclock() to discipline the CPU
1198  * clock oscillator so that intrinsic frequency error is cancelled out.
1199  * The code requires the caller to capture the time and hardware counter
1200  * value at the on-time PPS signal transition.
1201  *
1202  * Note that, on some Unix systems, this routine runs at an interrupt
1203  * priority level higher than the timer interrupt routine hardclock().
1204  * Therefore, the variables used are distinct from the hardclock()
1205  * variables, except for certain exceptions: The PPS frequency pps_freq
1206  * and phase pps_offset variables are determined by this routine and
1207  * updated atomically. The time_tolerance variable can be considered a
1208  * constant, since it is infrequently changed, and then only when the
1209  * PPS signal is disabled. The watchdog counter pps_valid is updated
1210  * once per second by hardclock() and is atomically cleared in this
1211  * routine.
1212  */
1213 void
1214 hardpps(struct timeval *tvp,		/* time at PPS */
1215 	long usec			/* hardware counter at PPS */)
1216 {
1217 	long u_usec, v_usec, bigtick;
1218 	long cal_sec, cal_usec;
1219 
1220 	/*
1221 	 * An occasional glitch can be produced when the PPS interrupt
1222 	 * occurs in the hardclock() routine before the time variable is
1223 	 * updated. Here the offset is discarded when the difference
1224 	 * between it and the last one is greater than tick/2, but not
1225 	 * if the interval since the first discard exceeds 30 s.
1226 	 */
1227 	time_status |= STA_PPSSIGNAL;
1228 	time_status &= ~(STA_PPSJITTER | STA_PPSWANDER | STA_PPSERROR);
1229 	pps_valid = 0;
1230 	u_usec = -tvp->tv_usec;
1231 	if (u_usec < -500000)
1232 		u_usec += 1000000;
1233 	v_usec = pps_offset - u_usec;
1234 	if (v_usec < 0)
1235 		v_usec = -v_usec;
1236 	if (v_usec > (tick >> 1)) {
1237 		if (pps_glitch > MAXGLITCH) {
1238 			pps_glitch = 0;
1239 			pps_tf[2] = u_usec;
1240 			pps_tf[1] = u_usec;
1241 		} else {
1242 			pps_glitch++;
1243 			u_usec = pps_offset;
1244 		}
1245 	} else
1246 		pps_glitch = 0;
1247 
1248 	/*
1249 	 * A three-stage median filter is used to help deglitch the pps
1250 	 * time. The median sample becomes the time offset estimate; the
1251 	 * difference between the other two samples becomes the time
1252 	 * dispersion (jitter) estimate.
1253 	 */
1254 	pps_tf[2] = pps_tf[1];
1255 	pps_tf[1] = pps_tf[0];
1256 	pps_tf[0] = u_usec;
1257 	if (pps_tf[0] > pps_tf[1]) {
1258 		if (pps_tf[1] > pps_tf[2]) {
1259 			pps_offset = pps_tf[1];		/* 0 1 2 */
1260 			v_usec = pps_tf[0] - pps_tf[2];
1261 		} else if (pps_tf[2] > pps_tf[0]) {
1262 			pps_offset = pps_tf[0];		/* 2 0 1 */
1263 			v_usec = pps_tf[2] - pps_tf[1];
1264 		} else {
1265 			pps_offset = pps_tf[2];		/* 0 2 1 */
1266 			v_usec = pps_tf[0] - pps_tf[1];
1267 		}
1268 	} else {
1269 		if (pps_tf[1] < pps_tf[2]) {
1270 			pps_offset = pps_tf[1];		/* 2 1 0 */
1271 			v_usec = pps_tf[2] - pps_tf[0];
1272 		} else  if (pps_tf[2] < pps_tf[0]) {
1273 			pps_offset = pps_tf[0];		/* 1 0 2 */
1274 			v_usec = pps_tf[1] - pps_tf[2];
1275 		} else {
1276 			pps_offset = pps_tf[2];		/* 1 2 0 */
1277 			v_usec = pps_tf[1] - pps_tf[0];
1278 		}
1279 	}
1280 	if (v_usec > MAXTIME)
1281 		pps_jitcnt++;
1282 	v_usec = (v_usec << PPS_AVG) - pps_jitter;
1283 	if (v_usec < 0)
1284 		pps_jitter -= -v_usec >> PPS_AVG;
1285 	else
1286 		pps_jitter += v_usec >> PPS_AVG;
1287 	if (pps_jitter > (MAXTIME >> 1))
1288 		time_status |= STA_PPSJITTER;
1289 
1290 	/*
1291 	 * During the calibration interval adjust the starting time when
1292 	 * the tick overflows. At the end of the interval compute the
1293 	 * duration of the interval and the difference of the hardware
1294 	 * counters at the beginning and end of the interval. This code
1295 	 * is deliciously complicated by the fact valid differences may
1296 	 * exceed the value of tick when using long calibration
1297 	 * intervals and small ticks. Note that the counter can be
1298 	 * greater than tick if caught at just the wrong instant, but
1299 	 * the values returned and used here are correct.
1300 	 */
1301 	bigtick = (long)tick << SHIFT_USEC;
1302 	pps_usec -= pps_freq;
1303 	if (pps_usec >= bigtick)
1304 		pps_usec -= bigtick;
1305 	if (pps_usec < 0)
1306 		pps_usec += bigtick;
1307 	pps_time.tv_sec++;
1308 	pps_count++;
1309 	if (pps_count < (1 << pps_shift))
1310 		return;
1311 	pps_count = 0;
1312 	pps_calcnt++;
1313 	u_usec = usec << SHIFT_USEC;
1314 	v_usec = pps_usec - u_usec;
1315 	if (v_usec >= bigtick >> 1)
1316 		v_usec -= bigtick;
1317 	if (v_usec < -(bigtick >> 1))
1318 		v_usec += bigtick;
1319 	if (v_usec < 0)
1320 		v_usec = -(-v_usec >> pps_shift);
1321 	else
1322 		v_usec = v_usec >> pps_shift;
1323 	pps_usec = u_usec;
1324 	cal_sec = tvp->tv_sec;
1325 	cal_usec = tvp->tv_usec;
1326 	cal_sec -= pps_time.tv_sec;
1327 	cal_usec -= pps_time.tv_usec;
1328 	if (cal_usec < 0) {
1329 		cal_usec += 1000000;
1330 		cal_sec--;
1331 	}
1332 	pps_time = *tvp;
1333 
1334 	/*
1335 	 * Check for lost interrupts, noise, excessive jitter and
1336 	 * excessive frequency error. The number of timer ticks during
1337 	 * the interval may vary +-1 tick. Add to this a margin of one
1338 	 * tick for the PPS signal jitter and maximum frequency
1339 	 * deviation. If the limits are exceeded, the calibration
1340 	 * interval is reset to the minimum and we start over.
1341 	 */
1342 	u_usec = (long)tick << 1;
1343 	if (!((cal_sec == -1 && cal_usec > (1000000 - u_usec))
1344 	    || (cal_sec == 0 && cal_usec < u_usec))
1345 	    || v_usec > time_tolerance || v_usec < -time_tolerance) {
1346 		pps_errcnt++;
1347 		pps_shift = PPS_SHIFT;
1348 		pps_intcnt = 0;
1349 		time_status |= STA_PPSERROR;
1350 		return;
1351 	}
1352 
1353 	/*
1354 	 * A three-stage median filter is used to help deglitch the pps
1355 	 * frequency. The median sample becomes the frequency offset
1356 	 * estimate; the difference between the other two samples
1357 	 * becomes the frequency dispersion (stability) estimate.
1358 	 */
1359 	pps_ff[2] = pps_ff[1];
1360 	pps_ff[1] = pps_ff[0];
1361 	pps_ff[0] = v_usec;
1362 	if (pps_ff[0] > pps_ff[1]) {
1363 		if (pps_ff[1] > pps_ff[2]) {
1364 			u_usec = pps_ff[1];		/* 0 1 2 */
1365 			v_usec = pps_ff[0] - pps_ff[2];
1366 		} else if (pps_ff[2] > pps_ff[0]) {
1367 			u_usec = pps_ff[0];		/* 2 0 1 */
1368 			v_usec = pps_ff[2] - pps_ff[1];
1369 		} else {
1370 			u_usec = pps_ff[2];		/* 0 2 1 */
1371 			v_usec = pps_ff[0] - pps_ff[1];
1372 		}
1373 	} else {
1374 		if (pps_ff[1] < pps_ff[2]) {
1375 			u_usec = pps_ff[1];		/* 2 1 0 */
1376 			v_usec = pps_ff[2] - pps_ff[0];
1377 		} else  if (pps_ff[2] < pps_ff[0]) {
1378 			u_usec = pps_ff[0];		/* 1 0 2 */
1379 			v_usec = pps_ff[1] - pps_ff[2];
1380 		} else {
1381 			u_usec = pps_ff[2];		/* 1 2 0 */
1382 			v_usec = pps_ff[1] - pps_ff[0];
1383 		}
1384 	}
1385 
1386 	/*
1387 	 * Here the frequency dispersion (stability) is updated. If it
1388 	 * is less than one-fourth the maximum (MAXFREQ), the frequency
1389 	 * offset is updated as well, but clamped to the tolerance. It
1390 	 * will be processed later by the hardclock() routine.
1391 	 */
1392 	v_usec = (v_usec >> 1) - pps_stabil;
1393 	if (v_usec < 0)
1394 		pps_stabil -= -v_usec >> PPS_AVG;
1395 	else
1396 		pps_stabil += v_usec >> PPS_AVG;
1397 	if (pps_stabil > MAXFREQ >> 2) {
1398 		pps_stbcnt++;
1399 		time_status |= STA_PPSWANDER;
1400 		return;
1401 	}
1402 	if (time_status & STA_PPSFREQ) {
1403 		if (u_usec < 0) {
1404 			pps_freq -= -u_usec >> PPS_AVG;
1405 			if (pps_freq < -time_tolerance)
1406 				pps_freq = -time_tolerance;
1407 			u_usec = -u_usec;
1408 		} else {
1409 			pps_freq += u_usec >> PPS_AVG;
1410 			if (pps_freq > time_tolerance)
1411 				pps_freq = time_tolerance;
1412 		}
1413 	}
1414 
1415 	/*
1416 	 * Here the calibration interval is adjusted. If the maximum
1417 	 * time difference is greater than tick / 4, reduce the interval
1418 	 * by half. If this is not the case for four consecutive
1419 	 * intervals, double the interval.
1420 	 */
1421 	if (u_usec << pps_shift > bigtick >> 2) {
1422 		pps_intcnt = 0;
1423 		if (pps_shift > PPS_SHIFT)
1424 			pps_shift--;
1425 	} else if (pps_intcnt >= 4) {
1426 		pps_intcnt = 0;
1427 		if (pps_shift < PPS_SHIFTMAX)
1428 			pps_shift++;
1429 	} else
1430 		pps_intcnt++;
1431 }
1432 #endif /* PPS_SYNC */
1433 #endif /* NTP  */
1434 
1435 /*
1436  * XXX: Until all md code has it.
1437  */
1438 struct timespec *
1439 nanotime(struct timespec *ts)
1440 {
1441 	struct timeval tv;
1442 
1443 	microtime(&tv);
1444 	TIMEVAL_TO_TIMESPEC(&tv, ts);
1445 	return ts;
1446 }
1447