xref: /netbsd-src/sys/kern/kern_ntptime.c (revision f983e71d70cfccf7b3de601eb4d998b2d886ede4)
1 /*	$NetBSD: kern_ntptime.c,v 1.48 2008/04/21 12:56:31 ad Exp $	*/
2 
3 /*-
4  * Copyright (c) 2008 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. All advertising materials mentioning features or use of this software
16  *    must display the following acknowledgement:
17  *	This product includes software developed by the NetBSD
18  *	Foundation, Inc. and its contributors.
19  * 4. Neither the name of The NetBSD Foundation nor the names of its
20  *    contributors may be used to endorse or promote products derived
21  *    from this software without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
24  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
25  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
26  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
27  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
28  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
29  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
30  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
31  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
32  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
33  * POSSIBILITY OF SUCH DAMAGE.
34  */
35 
36 /*-
37  ***********************************************************************
38  *								       *
39  * Copyright (c) David L. Mills 1993-2001			       *
40  *								       *
41  * Permission to use, copy, modify, and distribute this software and   *
42  * its documentation for any purpose and without fee is hereby	       *
43  * granted, provided that the above copyright notice appears in all    *
44  * copies and that both the copyright notice and this permission       *
45  * notice appear in supporting documentation, and that the name	       *
46  * University of Delaware not be used in advertising or publicity      *
47  * pertaining to distribution of the software without specific,	       *
48  * written prior permission. The University of Delaware makes no       *
49  * representations about the suitability this software for any	       *
50  * purpose. It is provided "as is" without express or implied	       *
51  * warranty.							       *
52  *								       *
53  **********************************************************************/
54 
55 /*
56  * Adapted from the original sources for FreeBSD and timecounters by:
57  * Poul-Henning Kamp <phk@FreeBSD.org>.
58  *
59  * The 32bit version of the "LP" macros seems a bit past its "sell by"
60  * date so I have retained only the 64bit version and included it directly
61  * in this file.
62  *
63  * Only minor changes done to interface with the timecounters over in
64  * sys/kern/kern_clock.c.   Some of the comments below may be (even more)
65  * confusing and/or plain wrong in that context.
66  */
67 
68 #include <sys/cdefs.h>
69 /* __FBSDID("$FreeBSD: src/sys/kern/kern_ntptime.c,v 1.59 2005/05/28 14:34:41 rwatson Exp $"); */
70 __KERNEL_RCSID(0, "$NetBSD: kern_ntptime.c,v 1.48 2008/04/21 12:56:31 ad Exp $");
71 
72 #include "opt_ntp.h"
73 #include "opt_compat_netbsd.h"
74 
75 #include <sys/param.h>
76 #include <sys/resourcevar.h>
77 #include <sys/systm.h>
78 #include <sys/kernel.h>
79 #include <sys/proc.h>
80 #include <sys/sysctl.h>
81 #include <sys/timex.h>
82 #include <sys/vnode.h>
83 #include <sys/kauth.h>
84 #include <sys/mount.h>
85 #include <sys/syscallargs.h>
86 #include <sys/cpu.h>
87 
88 #ifdef COMPAT_30
89 #include <compat/sys/timex.h>
90 #endif
91 
92 /*
93  * Single-precision macros for 64-bit machines
94  */
95 typedef int64_t l_fp;
96 #define L_ADD(v, u)	((v) += (u))
97 #define L_SUB(v, u)	((v) -= (u))
98 #define L_ADDHI(v, a)	((v) += (int64_t)(a) << 32)
99 #define L_NEG(v)	((v) = -(v))
100 #define L_RSHIFT(v, n) \
101 	do { \
102 		if ((v) < 0) \
103 			(v) = -(-(v) >> (n)); \
104 		else \
105 			(v) = (v) >> (n); \
106 	} while (0)
107 #define L_MPY(v, a)	((v) *= (a))
108 #define L_CLR(v)	((v) = 0)
109 #define L_ISNEG(v)	((v) < 0)
110 #define L_LINT(v, a)	((v) = (int64_t)(a) << 32)
111 #define L_GINT(v)	((v) < 0 ? -(-(v) >> 32) : (v) >> 32)
112 
113 #ifdef NTP
114 /*
115  * Generic NTP kernel interface
116  *
117  * These routines constitute the Network Time Protocol (NTP) interfaces
118  * for user and daemon application programs. The ntp_gettime() routine
119  * provides the time, maximum error (synch distance) and estimated error
120  * (dispersion) to client user application programs. The ntp_adjtime()
121  * routine is used by the NTP daemon to adjust the system clock to an
122  * externally derived time. The time offset and related variables set by
123  * this routine are used by other routines in this module to adjust the
124  * phase and frequency of the clock discipline loop which controls the
125  * system clock.
126  *
127  * When the kernel time is reckoned directly in nanoseconds (NTP_NANO
128  * defined), the time at each tick interrupt is derived directly from
129  * the kernel time variable. When the kernel time is reckoned in
130  * microseconds, (NTP_NANO undefined), the time is derived from the
131  * kernel time variable together with a variable representing the
132  * leftover nanoseconds at the last tick interrupt. In either case, the
133  * current nanosecond time is reckoned from these values plus an
134  * interpolated value derived by the clock routines in another
135  * architecture-specific module. The interpolation can use either a
136  * dedicated counter or a processor cycle counter (PCC) implemented in
137  * some architectures.
138  *
139  * Note that all routines must run at priority splclock or higher.
140  */
141 /*
142  * Phase/frequency-lock loop (PLL/FLL) definitions
143  *
144  * The nanosecond clock discipline uses two variable types, time
145  * variables and frequency variables. Both types are represented as 64-
146  * bit fixed-point quantities with the decimal point between two 32-bit
147  * halves. On a 32-bit machine, each half is represented as a single
148  * word and mathematical operations are done using multiple-precision
149  * arithmetic. On a 64-bit machine, ordinary computer arithmetic is
150  * used.
151  *
152  * A time variable is a signed 64-bit fixed-point number in ns and
153  * fraction. It represents the remaining time offset to be amortized
154  * over succeeding tick interrupts. The maximum time offset is about
155  * 0.5 s and the resolution is about 2.3e-10 ns.
156  *
157  *			1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 3 3
158  *  0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
159  * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
160  * |s s s|			 ns				   |
161  * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
162  * |			    fraction				   |
163  * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
164  *
165  * A frequency variable is a signed 64-bit fixed-point number in ns/s
166  * and fraction. It represents the ns and fraction to be added to the
167  * kernel time variable at each second. The maximum frequency offset is
168  * about +-500000 ns/s and the resolution is about 2.3e-10 ns/s.
169  *
170  *			1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 3 3
171  *  0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
172  * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
173  * |s s s s s s s s s s s s s|	          ns/s			   |
174  * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
175  * |			    fraction				   |
176  * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
177  */
178 /*
179  * The following variables establish the state of the PLL/FLL and the
180  * residual time and frequency offset of the local clock.
181  */
182 #define SHIFT_PLL	4		/* PLL loop gain (shift) */
183 #define SHIFT_FLL	2		/* FLL loop gain (shift) */
184 
185 static int time_state = TIME_OK;	/* clock state */
186 static int time_status = STA_UNSYNC;	/* clock status bits */
187 static long time_tai;			/* TAI offset (s) */
188 static long time_monitor;		/* last time offset scaled (ns) */
189 static long time_constant;		/* poll interval (shift) (s) */
190 static long time_precision = 1;		/* clock precision (ns) */
191 static long time_maxerror = MAXPHASE / 1000; /* maximum error (us) */
192 static long time_esterror = MAXPHASE / 1000; /* estimated error (us) */
193 static long time_reftime;		/* time at last adjustment (s) */
194 static l_fp time_offset;		/* time offset (ns) */
195 static l_fp time_freq;			/* frequency offset (ns/s) */
196 #endif /* NTP */
197 
198 static l_fp time_adj;			/* tick adjust (ns/s) */
199 int64_t time_adjtime;		/* correction from adjtime(2) (usec) */
200 
201 extern int time_adjusted;	/* ntp might have changed the system time */
202 
203 #ifdef NTP
204 #ifdef PPS_SYNC
205 /*
206  * The following variables are used when a pulse-per-second (PPS) signal
207  * is available and connected via a modem control lead. They establish
208  * the engineering parameters of the clock discipline loop when
209  * controlled by the PPS signal.
210  */
211 #define PPS_FAVG	2		/* min freq avg interval (s) (shift) */
212 #define PPS_FAVGDEF	8		/* default freq avg int (s) (shift) */
213 #define PPS_FAVGMAX	15		/* max freq avg interval (s) (shift) */
214 #define PPS_PAVG	4		/* phase avg interval (s) (shift) */
215 #define PPS_VALID	120		/* PPS signal watchdog max (s) */
216 #define PPS_MAXWANDER	100000		/* max PPS wander (ns/s) */
217 #define PPS_POPCORN	2		/* popcorn spike threshold (shift) */
218 
219 static struct timespec pps_tf[3];	/* phase median filter */
220 static l_fp pps_freq;			/* scaled frequency offset (ns/s) */
221 static long pps_fcount;			/* frequency accumulator */
222 static long pps_jitter;			/* nominal jitter (ns) */
223 static long pps_stabil;			/* nominal stability (scaled ns/s) */
224 static long pps_lastsec;		/* time at last calibration (s) */
225 static int pps_valid;			/* signal watchdog counter */
226 static int pps_shift = PPS_FAVG;	/* interval duration (s) (shift) */
227 static int pps_shiftmax = PPS_FAVGDEF;	/* max interval duration (s) (shift) */
228 static int pps_intcnt;			/* wander counter */
229 
230 /*
231  * PPS signal quality monitors
232  */
233 static long pps_calcnt;			/* calibration intervals */
234 static long pps_jitcnt;			/* jitter limit exceeded */
235 static long pps_stbcnt;			/* stability limit exceeded */
236 static long pps_errcnt;			/* calibration errors */
237 #endif /* PPS_SYNC */
238 /*
239  * End of phase/frequency-lock loop (PLL/FLL) definitions
240  */
241 
242 static void hardupdate(long offset);
243 
244 /*
245  * ntp_gettime() - NTP user application interface
246  */
247 void
248 ntp_gettime(struct ntptimeval *ntv)
249 {
250 
251 	mutex_spin_enter(&timecounter_lock);
252 	nanotime(&ntv->time);
253 	ntv->maxerror = time_maxerror;
254 	ntv->esterror = time_esterror;
255 	ntv->tai = time_tai;
256 	ntv->time_state = time_state;
257 	mutex_spin_exit(&timecounter_lock);
258 }
259 
260 /* ARGSUSED */
261 /*
262  * ntp_adjtime() - NTP daemon application interface
263  */
264 int
265 sys_ntp_adjtime(struct lwp *l, const struct sys_ntp_adjtime_args *uap, register_t *retval)
266 {
267 	/* {
268 		syscallarg(struct timex *) tp;
269 	} */
270 	struct timex ntv;
271 	int error = 0;
272 
273 	error = copyin((void *)SCARG(uap, tp), (void *)&ntv, sizeof(ntv));
274 	if (error != 0)
275 		return (error);
276 
277 	if (ntv.modes != 0 && (error = kauth_authorize_system(l->l_cred,
278 	    KAUTH_SYSTEM_TIME, KAUTH_REQ_SYSTEM_TIME_NTPADJTIME, NULL,
279 	    NULL, NULL)) != 0)
280 		return (error);
281 
282 	ntp_adjtime1(&ntv);
283 
284 	error = copyout((void *)&ntv, (void *)SCARG(uap, tp), sizeof(ntv));
285 	if (!error)
286 		*retval = ntp_timestatus();
287 
288 	return error;
289 }
290 
291 void
292 ntp_adjtime1(struct timex *ntv)
293 {
294 	long freq;
295 	int modes;
296 
297 	/*
298 	 * Update selected clock variables - only the superuser can
299 	 * change anything. Note that there is no error checking here on
300 	 * the assumption the superuser should know what it is doing.
301 	 * Note that either the time constant or TAI offset are loaded
302 	 * from the ntv.constant member, depending on the mode bits. If
303 	 * the STA_PLL bit in the status word is cleared, the state and
304 	 * status words are reset to the initial values at boot.
305 	 */
306 	mutex_spin_enter(&timecounter_lock);
307 	modes = ntv->modes;
308 	if (modes != 0)
309 		/* We need to save the system time during shutdown */
310 		time_adjusted |= 2;
311 	if (modes & MOD_MAXERROR)
312 		time_maxerror = ntv->maxerror;
313 	if (modes & MOD_ESTERROR)
314 		time_esterror = ntv->esterror;
315 	if (modes & MOD_STATUS) {
316 		if (time_status & STA_PLL && !(ntv->status & STA_PLL)) {
317 			time_state = TIME_OK;
318 			time_status = STA_UNSYNC;
319 #ifdef PPS_SYNC
320 			pps_shift = PPS_FAVG;
321 #endif /* PPS_SYNC */
322 		}
323 		time_status &= STA_RONLY;
324 		time_status |= ntv->status & ~STA_RONLY;
325 	}
326 	if (modes & MOD_TIMECONST) {
327 		if (ntv->constant < 0)
328 			time_constant = 0;
329 		else if (ntv->constant > MAXTC)
330 			time_constant = MAXTC;
331 		else
332 			time_constant = ntv->constant;
333 	}
334 	if (modes & MOD_TAI) {
335 		if (ntv->constant > 0)	/* XXX zero & negative numbers ? */
336 			time_tai = ntv->constant;
337 	}
338 #ifdef PPS_SYNC
339 	if (modes & MOD_PPSMAX) {
340 		if (ntv->shift < PPS_FAVG)
341 			pps_shiftmax = PPS_FAVG;
342 		else if (ntv->shift > PPS_FAVGMAX)
343 			pps_shiftmax = PPS_FAVGMAX;
344 		else
345 			pps_shiftmax = ntv->shift;
346 	}
347 #endif /* PPS_SYNC */
348 	if (modes & MOD_NANO)
349 		time_status |= STA_NANO;
350 	if (modes & MOD_MICRO)
351 		time_status &= ~STA_NANO;
352 	if (modes & MOD_CLKB)
353 		time_status |= STA_CLK;
354 	if (modes & MOD_CLKA)
355 		time_status &= ~STA_CLK;
356 	if (modes & MOD_FREQUENCY) {
357 		freq = (ntv->freq * 1000LL) >> 16;
358 		if (freq > MAXFREQ)
359 			L_LINT(time_freq, MAXFREQ);
360 		else if (freq < -MAXFREQ)
361 			L_LINT(time_freq, -MAXFREQ);
362 		else {
363 			/*
364 			 * ntv.freq is [PPM * 2^16] = [us/s * 2^16]
365 			 * time_freq is [ns/s * 2^32]
366 			 */
367 			time_freq = ntv->freq * 1000LL * 65536LL;
368 		}
369 #ifdef PPS_SYNC
370 		pps_freq = time_freq;
371 #endif /* PPS_SYNC */
372 	}
373 	if (modes & MOD_OFFSET) {
374 		if (time_status & STA_NANO)
375 			hardupdate(ntv->offset);
376 		else
377 			hardupdate(ntv->offset * 1000);
378 	}
379 
380 	/*
381 	 * Retrieve all clock variables. Note that the TAI offset is
382 	 * returned only by ntp_gettime();
383 	 */
384 	if (time_status & STA_NANO)
385 		ntv->offset = L_GINT(time_offset);
386 	else
387 		ntv->offset = L_GINT(time_offset) / 1000; /* XXX rounding ? */
388 	ntv->freq = L_GINT((time_freq / 1000LL) << 16);
389 	ntv->maxerror = time_maxerror;
390 	ntv->esterror = time_esterror;
391 	ntv->status = time_status;
392 	ntv->constant = time_constant;
393 	if (time_status & STA_NANO)
394 		ntv->precision = time_precision;
395 	else
396 		ntv->precision = time_precision / 1000;
397 	ntv->tolerance = MAXFREQ * SCALE_PPM;
398 #ifdef PPS_SYNC
399 	ntv->shift = pps_shift;
400 	ntv->ppsfreq = L_GINT((pps_freq / 1000LL) << 16);
401 	if (time_status & STA_NANO)
402 		ntv->jitter = pps_jitter;
403 	else
404 		ntv->jitter = pps_jitter / 1000;
405 	ntv->stabil = pps_stabil;
406 	ntv->calcnt = pps_calcnt;
407 	ntv->errcnt = pps_errcnt;
408 	ntv->jitcnt = pps_jitcnt;
409 	ntv->stbcnt = pps_stbcnt;
410 #endif /* PPS_SYNC */
411 	mutex_spin_exit(&timecounter_lock);
412 }
413 #endif /* NTP */
414 
415 /*
416  * second_overflow() - called after ntp_tick_adjust()
417  *
418  * This routine is ordinarily called immediately following the above
419  * routine ntp_tick_adjust(). While these two routines are normally
420  * combined, they are separated here only for the purposes of
421  * simulation.
422  */
423 void
424 ntp_update_second(int64_t *adjustment, time_t *newsec)
425 {
426 	int tickrate;
427 	l_fp ftemp;		/* 32/64-bit temporary */
428 
429 	KASSERT(mutex_owned(&timecounter_lock));
430 
431 #ifdef NTP
432 
433 	/*
434 	 * On rollover of the second both the nanosecond and microsecond
435 	 * clocks are updated and the state machine cranked as
436 	 * necessary. The phase adjustment to be used for the next
437 	 * second is calculated and the maximum error is increased by
438 	 * the tolerance.
439 	 */
440 	time_maxerror += MAXFREQ / 1000;
441 
442 	/*
443 	 * Leap second processing. If in leap-insert state at
444 	 * the end of the day, the system clock is set back one
445 	 * second; if in leap-delete state, the system clock is
446 	 * set ahead one second. The nano_time() routine or
447 	 * external clock driver will insure that reported time
448 	 * is always monotonic.
449 	 */
450 	switch (time_state) {
451 
452 		/*
453 		 * No warning.
454 		 */
455 		case TIME_OK:
456 		if (time_status & STA_INS)
457 			time_state = TIME_INS;
458 		else if (time_status & STA_DEL)
459 			time_state = TIME_DEL;
460 		break;
461 
462 		/*
463 		 * Insert second 23:59:60 following second
464 		 * 23:59:59.
465 		 */
466 		case TIME_INS:
467 		if (!(time_status & STA_INS))
468 			time_state = TIME_OK;
469 		else if ((*newsec) % 86400 == 0) {
470 			(*newsec)--;
471 			time_state = TIME_OOP;
472 			time_tai++;
473 		}
474 		break;
475 
476 		/*
477 		 * Delete second 23:59:59.
478 		 */
479 		case TIME_DEL:
480 		if (!(time_status & STA_DEL))
481 			time_state = TIME_OK;
482 		else if (((*newsec) + 1) % 86400 == 0) {
483 			(*newsec)++;
484 			time_tai--;
485 			time_state = TIME_WAIT;
486 		}
487 		break;
488 
489 		/*
490 		 * Insert second in progress.
491 		 */
492 		case TIME_OOP:
493 			time_state = TIME_WAIT;
494 		break;
495 
496 		/*
497 		 * Wait for status bits to clear.
498 		 */
499 		case TIME_WAIT:
500 		if (!(time_status & (STA_INS | STA_DEL)))
501 			time_state = TIME_OK;
502 	}
503 
504 	/*
505 	 * Compute the total time adjustment for the next second
506 	 * in ns. The offset is reduced by a factor depending on
507 	 * whether the PPS signal is operating. Note that the
508 	 * value is in effect scaled by the clock frequency,
509 	 * since the adjustment is added at each tick interrupt.
510 	 */
511 	ftemp = time_offset;
512 #ifdef PPS_SYNC
513 	/* XXX even if PPS signal dies we should finish adjustment ? */
514 	if (time_status & STA_PPSTIME && time_status &
515 	    STA_PPSSIGNAL)
516 		L_RSHIFT(ftemp, pps_shift);
517 	else
518 		L_RSHIFT(ftemp, SHIFT_PLL + time_constant);
519 #else
520 		L_RSHIFT(ftemp, SHIFT_PLL + time_constant);
521 #endif /* PPS_SYNC */
522 	time_adj = ftemp;
523 	L_SUB(time_offset, ftemp);
524 	L_ADD(time_adj, time_freq);
525 
526 #ifdef PPS_SYNC
527 	if (pps_valid > 0)
528 		pps_valid--;
529 	else
530 		time_status &= ~STA_PPSSIGNAL;
531 #endif /* PPS_SYNC */
532 #else  /* !NTP */
533 	L_CLR(time_adj);
534 #endif /* !NTP */
535 
536 	/*
537 	 * Apply any correction from adjtime(2).  If more than one second
538 	 * off we slew at a rate of 5ms/s (5000 PPM) else 500us/s (500PPM)
539 	 * until the last second is slewed the final < 500 usecs.
540 	 */
541 	if (time_adjtime != 0) {
542 		if (time_adjtime > 1000000)
543 			tickrate = 5000;
544 		else if (time_adjtime < -1000000)
545 			tickrate = -5000;
546 		else if (time_adjtime > 500)
547 			tickrate = 500;
548 		else if (time_adjtime < -500)
549 			tickrate = -500;
550 		else
551 			tickrate = time_adjtime;
552 		time_adjtime -= tickrate;
553 		L_LINT(ftemp, tickrate * 1000);
554 		L_ADD(time_adj, ftemp);
555 	}
556 	*adjustment = time_adj;
557 }
558 
559 /*
560  * ntp_init() - initialize variables and structures
561  *
562  * This routine must be called after the kernel variables hz and tick
563  * are set or changed and before the next tick interrupt. In this
564  * particular implementation, these values are assumed set elsewhere in
565  * the kernel. The design allows the clock frequency and tick interval
566  * to be changed while the system is running. So, this routine should
567  * probably be integrated with the code that does that.
568  */
569 void
570 ntp_init(void)
571 {
572 
573 	/*
574 	 * The following variables are initialized only at startup. Only
575 	 * those structures not cleared by the compiler need to be
576 	 * initialized, and these only in the simulator. In the actual
577 	 * kernel, any nonzero values here will quickly evaporate.
578 	 */
579 	L_CLR(time_adj);
580 #ifdef NTP
581 	L_CLR(time_offset);
582 	L_CLR(time_freq);
583 #ifdef PPS_SYNC
584 	pps_tf[0].tv_sec = pps_tf[0].tv_nsec = 0;
585 	pps_tf[1].tv_sec = pps_tf[1].tv_nsec = 0;
586 	pps_tf[2].tv_sec = pps_tf[2].tv_nsec = 0;
587 	pps_fcount = 0;
588 	L_CLR(pps_freq);
589 #endif /* PPS_SYNC */
590 #endif
591 }
592 
593 #ifdef NTP
594 /*
595  * hardupdate() - local clock update
596  *
597  * This routine is called by ntp_adjtime() to update the local clock
598  * phase and frequency. The implementation is of an adaptive-parameter,
599  * hybrid phase/frequency-lock loop (PLL/FLL). The routine computes new
600  * time and frequency offset estimates for each call. If the kernel PPS
601  * discipline code is configured (PPS_SYNC), the PPS signal itself
602  * determines the new time offset, instead of the calling argument.
603  * Presumably, calls to ntp_adjtime() occur only when the caller
604  * believes the local clock is valid within some bound (+-128 ms with
605  * NTP). If the caller's time is far different than the PPS time, an
606  * argument will ensue, and it's not clear who will lose.
607  *
608  * For uncompensated quartz crystal oscillators and nominal update
609  * intervals less than 256 s, operation should be in phase-lock mode,
610  * where the loop is disciplined to phase. For update intervals greater
611  * than 1024 s, operation should be in frequency-lock mode, where the
612  * loop is disciplined to frequency. Between 256 s and 1024 s, the mode
613  * is selected by the STA_MODE status bit.
614  *
615  * Note: splclock() is in effect.
616  */
617 void
618 hardupdate(long offset)
619 {
620 	long mtemp;
621 	l_fp ftemp;
622 
623 	KASSERT(mutex_owned(&timecounter_lock));
624 
625 	/*
626 	 * Select how the phase is to be controlled and from which
627 	 * source. If the PPS signal is present and enabled to
628 	 * discipline the time, the PPS offset is used; otherwise, the
629 	 * argument offset is used.
630 	 */
631 	if (!(time_status & STA_PLL))
632 		return;
633 	if (!(time_status & STA_PPSTIME && time_status &
634 	    STA_PPSSIGNAL)) {
635 		if (offset > MAXPHASE)
636 			time_monitor = MAXPHASE;
637 		else if (offset < -MAXPHASE)
638 			time_monitor = -MAXPHASE;
639 		else
640 			time_monitor = offset;
641 		L_LINT(time_offset, time_monitor);
642 	}
643 
644 	/*
645 	 * Select how the frequency is to be controlled and in which
646 	 * mode (PLL or FLL). If the PPS signal is present and enabled
647 	 * to discipline the frequency, the PPS frequency is used;
648 	 * otherwise, the argument offset is used to compute it.
649 	 */
650 	if (time_status & STA_PPSFREQ && time_status & STA_PPSSIGNAL) {
651 		time_reftime = time_second;
652 		return;
653 	}
654 	if (time_status & STA_FREQHOLD || time_reftime == 0)
655 		time_reftime = time_second;
656 	mtemp = time_second - time_reftime;
657 	L_LINT(ftemp, time_monitor);
658 	L_RSHIFT(ftemp, (SHIFT_PLL + 2 + time_constant) << 1);
659 	L_MPY(ftemp, mtemp);
660 	L_ADD(time_freq, ftemp);
661 	time_status &= ~STA_MODE;
662 	if (mtemp >= MINSEC && (time_status & STA_FLL || mtemp >
663 	    MAXSEC)) {
664 		L_LINT(ftemp, (time_monitor << 4) / mtemp);
665 		L_RSHIFT(ftemp, SHIFT_FLL + 4);
666 		L_ADD(time_freq, ftemp);
667 		time_status |= STA_MODE;
668 	}
669 	time_reftime = time_second;
670 	if (L_GINT(time_freq) > MAXFREQ)
671 		L_LINT(time_freq, MAXFREQ);
672 	else if (L_GINT(time_freq) < -MAXFREQ)
673 		L_LINT(time_freq, -MAXFREQ);
674 }
675 
676 #ifdef PPS_SYNC
677 /*
678  * hardpps() - discipline CPU clock oscillator to external PPS signal
679  *
680  * This routine is called at each PPS interrupt in order to discipline
681  * the CPU clock oscillator to the PPS signal. It measures the PPS phase
682  * and leaves it in a handy spot for the hardclock() routine. It
683  * integrates successive PPS phase differences and calculates the
684  * frequency offset. This is used in hardclock() to discipline the CPU
685  * clock oscillator so that intrinsic frequency error is cancelled out.
686  * The code requires the caller to capture the time and hardware counter
687  * value at the on-time PPS signal transition.
688  *
689  * Note that, on some Unix systems, this routine runs at an interrupt
690  * priority level higher than the timer interrupt routine hardclock().
691  * Therefore, the variables used are distinct from the hardclock()
692  * variables, except for certain exceptions: The PPS frequency pps_freq
693  * and phase pps_offset variables are determined by this routine and
694  * updated atomically. The time_tolerance variable can be considered a
695  * constant, since it is infrequently changed, and then only when the
696  * PPS signal is disabled. The watchdog counter pps_valid is updated
697  * once per second by hardclock() and is atomically cleared in this
698  * routine.
699  */
700 void
701 hardpps(struct timespec *tsp,		/* time at PPS */
702 	long nsec			/* hardware counter at PPS */)
703 {
704 	long u_sec, u_nsec, v_nsec; /* temps */
705 	l_fp ftemp;
706 
707 	KASSERT(mutex_owned(&timecounter_lock));
708 
709 	/*
710 	 * The signal is first processed by a range gate and frequency
711 	 * discriminator. The range gate rejects noise spikes outside
712 	 * the range +-500 us. The frequency discriminator rejects input
713 	 * signals with apparent frequency outside the range 1 +-500
714 	 * PPM. If two hits occur in the same second, we ignore the
715 	 * later hit; if not and a hit occurs outside the range gate,
716 	 * keep the later hit for later comparison, but do not process
717 	 * it.
718 	 */
719 	time_status |= STA_PPSSIGNAL | STA_PPSJITTER;
720 	time_status &= ~(STA_PPSWANDER | STA_PPSERROR);
721 	pps_valid = PPS_VALID;
722 	u_sec = tsp->tv_sec;
723 	u_nsec = tsp->tv_nsec;
724 	if (u_nsec >= (NANOSECOND >> 1)) {
725 		u_nsec -= NANOSECOND;
726 		u_sec++;
727 	}
728 	v_nsec = u_nsec - pps_tf[0].tv_nsec;
729 	if (u_sec == pps_tf[0].tv_sec && v_nsec < NANOSECOND -
730 	    MAXFREQ)
731 		return;
732 	pps_tf[2] = pps_tf[1];
733 	pps_tf[1] = pps_tf[0];
734 	pps_tf[0].tv_sec = u_sec;
735 	pps_tf[0].tv_nsec = u_nsec;
736 
737 	/*
738 	 * Compute the difference between the current and previous
739 	 * counter values. If the difference exceeds 0.5 s, assume it
740 	 * has wrapped around, so correct 1.0 s. If the result exceeds
741 	 * the tick interval, the sample point has crossed a tick
742 	 * boundary during the last second, so correct the tick. Very
743 	 * intricate.
744 	 */
745 	u_nsec = nsec;
746 	if (u_nsec > (NANOSECOND >> 1))
747 		u_nsec -= NANOSECOND;
748 	else if (u_nsec < -(NANOSECOND >> 1))
749 		u_nsec += NANOSECOND;
750 	pps_fcount += u_nsec;
751 	if (v_nsec > MAXFREQ || v_nsec < -MAXFREQ)
752 		return;
753 	time_status &= ~STA_PPSJITTER;
754 
755 	/*
756 	 * A three-stage median filter is used to help denoise the PPS
757 	 * time. The median sample becomes the time offset estimate; the
758 	 * difference between the other two samples becomes the time
759 	 * dispersion (jitter) estimate.
760 	 */
761 	if (pps_tf[0].tv_nsec > pps_tf[1].tv_nsec) {
762 		if (pps_tf[1].tv_nsec > pps_tf[2].tv_nsec) {
763 			v_nsec = pps_tf[1].tv_nsec;	/* 0 1 2 */
764 			u_nsec = pps_tf[0].tv_nsec - pps_tf[2].tv_nsec;
765 		} else if (pps_tf[2].tv_nsec > pps_tf[0].tv_nsec) {
766 			v_nsec = pps_tf[0].tv_nsec;	/* 2 0 1 */
767 			u_nsec = pps_tf[2].tv_nsec - pps_tf[1].tv_nsec;
768 		} else {
769 			v_nsec = pps_tf[2].tv_nsec;	/* 0 2 1 */
770 			u_nsec = pps_tf[0].tv_nsec - pps_tf[1].tv_nsec;
771 		}
772 	} else {
773 		if (pps_tf[1].tv_nsec < pps_tf[2].tv_nsec) {
774 			v_nsec = pps_tf[1].tv_nsec;	/* 2 1 0 */
775 			u_nsec = pps_tf[2].tv_nsec - pps_tf[0].tv_nsec;
776 		} else if (pps_tf[2].tv_nsec < pps_tf[0].tv_nsec) {
777 			v_nsec = pps_tf[0].tv_nsec;	/* 1 0 2 */
778 			u_nsec = pps_tf[1].tv_nsec - pps_tf[2].tv_nsec;
779 		} else {
780 			v_nsec = pps_tf[2].tv_nsec;	/* 1 2 0 */
781 			u_nsec = pps_tf[1].tv_nsec - pps_tf[0].tv_nsec;
782 		}
783 	}
784 
785 	/*
786 	 * Nominal jitter is due to PPS signal noise and interrupt
787 	 * latency. If it exceeds the popcorn threshold, the sample is
788 	 * discarded. otherwise, if so enabled, the time offset is
789 	 * updated. We can tolerate a modest loss of data here without
790 	 * much degrading time accuracy.
791 	 */
792 	if (u_nsec > (pps_jitter << PPS_POPCORN)) {
793 		time_status |= STA_PPSJITTER;
794 		pps_jitcnt++;
795 	} else if (time_status & STA_PPSTIME) {
796 		time_monitor = -v_nsec;
797 		L_LINT(time_offset, time_monitor);
798 	}
799 	pps_jitter += (u_nsec - pps_jitter) >> PPS_FAVG;
800 	u_sec = pps_tf[0].tv_sec - pps_lastsec;
801 	if (u_sec < (1 << pps_shift))
802 		return;
803 
804 	/*
805 	 * At the end of the calibration interval the difference between
806 	 * the first and last counter values becomes the scaled
807 	 * frequency. It will later be divided by the length of the
808 	 * interval to determine the frequency update. If the frequency
809 	 * exceeds a sanity threshold, or if the actual calibration
810 	 * interval is not equal to the expected length, the data are
811 	 * discarded. We can tolerate a modest loss of data here without
812 	 * much degrading frequency accuracy.
813 	 */
814 	pps_calcnt++;
815 	v_nsec = -pps_fcount;
816 	pps_lastsec = pps_tf[0].tv_sec;
817 	pps_fcount = 0;
818 	u_nsec = MAXFREQ << pps_shift;
819 	if (v_nsec > u_nsec || v_nsec < -u_nsec || u_sec != (1 <<
820 	    pps_shift)) {
821 		time_status |= STA_PPSERROR;
822 		pps_errcnt++;
823 		return;
824 	}
825 
826 	/*
827 	 * Here the raw frequency offset and wander (stability) is
828 	 * calculated. If the wander is less than the wander threshold
829 	 * for four consecutive averaging intervals, the interval is
830 	 * doubled; if it is greater than the threshold for four
831 	 * consecutive intervals, the interval is halved. The scaled
832 	 * frequency offset is converted to frequency offset. The
833 	 * stability metric is calculated as the average of recent
834 	 * frequency changes, but is used only for performance
835 	 * monitoring.
836 	 */
837 	L_LINT(ftemp, v_nsec);
838 	L_RSHIFT(ftemp, pps_shift);
839 	L_SUB(ftemp, pps_freq);
840 	u_nsec = L_GINT(ftemp);
841 	if (u_nsec > PPS_MAXWANDER) {
842 		L_LINT(ftemp, PPS_MAXWANDER);
843 		pps_intcnt--;
844 		time_status |= STA_PPSWANDER;
845 		pps_stbcnt++;
846 	} else if (u_nsec < -PPS_MAXWANDER) {
847 		L_LINT(ftemp, -PPS_MAXWANDER);
848 		pps_intcnt--;
849 		time_status |= STA_PPSWANDER;
850 		pps_stbcnt++;
851 	} else {
852 		pps_intcnt++;
853 	}
854 	if (pps_intcnt >= 4) {
855 		pps_intcnt = 4;
856 		if (pps_shift < pps_shiftmax) {
857 			pps_shift++;
858 			pps_intcnt = 0;
859 		}
860 	} else if (pps_intcnt <= -4 || pps_shift > pps_shiftmax) {
861 		pps_intcnt = -4;
862 		if (pps_shift > PPS_FAVG) {
863 			pps_shift--;
864 			pps_intcnt = 0;
865 		}
866 	}
867 	if (u_nsec < 0)
868 		u_nsec = -u_nsec;
869 	pps_stabil += (u_nsec * SCALE_PPM - pps_stabil) >> PPS_FAVG;
870 
871 	/*
872 	 * The PPS frequency is recalculated and clamped to the maximum
873 	 * MAXFREQ. If enabled, the system clock frequency is updated as
874 	 * well.
875 	 */
876 	L_ADD(pps_freq, ftemp);
877 	u_nsec = L_GINT(pps_freq);
878 	if (u_nsec > MAXFREQ)
879 		L_LINT(pps_freq, MAXFREQ);
880 	else if (u_nsec < -MAXFREQ)
881 		L_LINT(pps_freq, -MAXFREQ);
882 	if (time_status & STA_PPSFREQ)
883 		time_freq = pps_freq;
884 }
885 #endif /* PPS_SYNC */
886 #endif /* NTP */
887 
888 #ifdef NTP
889 int
890 ntp_timestatus(void)
891 {
892 	int rv;
893 
894 	/*
895 	 * Status word error decode. If any of these conditions
896 	 * occur, an error is returned, instead of the status
897 	 * word. Most applications will care only about the fact
898 	 * the system clock may not be trusted, not about the
899 	 * details.
900 	 *
901 	 * Hardware or software error
902 	 */
903 	mutex_spin_enter(&timecounter_lock);
904 	if ((time_status & (STA_UNSYNC | STA_CLOCKERR)) ||
905 
906 	/*
907 	 * PPS signal lost when either time or frequency
908 	 * synchronization requested
909 	 */
910 	    (time_status & (STA_PPSFREQ | STA_PPSTIME) &&
911 	     !(time_status & STA_PPSSIGNAL)) ||
912 
913 	/*
914 	 * PPS jitter exceeded when time synchronization
915 	 * requested
916 	 */
917 	    (time_status & STA_PPSTIME &&
918 	     time_status & STA_PPSJITTER) ||
919 
920 	/*
921 	 * PPS wander exceeded or calibration error when
922 	 * frequency synchronization requested
923 	 */
924 	    (time_status & STA_PPSFREQ &&
925 	     time_status & (STA_PPSWANDER | STA_PPSERROR)))
926 		rv = TIME_ERROR;
927 	else
928 		rv = time_state;
929 	mutex_spin_exit(&timecounter_lock);
930 
931 	return rv;
932 }
933 
934 /*ARGSUSED*/
935 /*
936  * ntp_gettime() - NTP user application interface
937  */
938 int
939 sys___ntp_gettime30(struct lwp *l, const struct sys___ntp_gettime30_args *uap, register_t *retval)
940 {
941 	/* {
942 		syscallarg(struct ntptimeval *) ntvp;
943 	} */
944 	struct ntptimeval ntv;
945 	int error = 0;
946 
947 	if (SCARG(uap, ntvp)) {
948 		ntp_gettime(&ntv);
949 
950 		error = copyout((void *)&ntv, (void *)SCARG(uap, ntvp),
951 				sizeof(ntv));
952 	}
953 	if (!error) {
954 		*retval = ntp_timestatus();
955 	}
956 	return(error);
957 }
958 
959 #ifdef COMPAT_30
960 int
961 compat_30_sys_ntp_gettime(struct lwp *l, const struct compat_30_sys_ntp_gettime_args *uap, register_t *retval)
962 {
963 	/* {
964 		syscallarg(struct ntptimeval30 *) ontvp;
965 	} */
966 	struct ntptimeval ntv;
967 	struct ntptimeval30 ontv;
968 	int error = 0;
969 
970 	if (SCARG(uap, ntvp)) {
971 		ntp_gettime(&ntv);
972 		TIMESPEC_TO_TIMEVAL(&ontv.time, &ntv.time);
973 		ontv.maxerror = ntv.maxerror;
974 		ontv.esterror = ntv.esterror;
975 
976 		error = copyout((void *)&ontv, (void *)SCARG(uap, ntvp),
977 				sizeof(ontv));
978  	}
979 	if (!error)
980 		*retval = ntp_timestatus();
981 
982 	return (error);
983 }
984 #endif
985 
986 /*
987  * return information about kernel precision timekeeping
988  */
989 static int
990 sysctl_kern_ntptime(SYSCTLFN_ARGS)
991 {
992 	struct sysctlnode node;
993 	struct ntptimeval ntv;
994 
995 	ntp_gettime(&ntv);
996 
997 	node = *rnode;
998 	node.sysctl_data = &ntv;
999 	node.sysctl_size = sizeof(ntv);
1000 	return (sysctl_lookup(SYSCTLFN_CALL(&node)));
1001 }
1002 
1003 SYSCTL_SETUP(sysctl_kern_ntptime_setup, "sysctl kern.ntptime node setup")
1004 {
1005 
1006 	sysctl_createv(clog, 0, NULL, NULL,
1007 		       CTLFLAG_PERMANENT,
1008 		       CTLTYPE_NODE, "kern", NULL,
1009 		       NULL, 0, NULL, 0,
1010 		       CTL_KERN, CTL_EOL);
1011 
1012 	sysctl_createv(clog, 0, NULL, NULL,
1013 		       CTLFLAG_PERMANENT,
1014 		       CTLTYPE_STRUCT, "ntptime",
1015 		       SYSCTL_DESCR("Kernel clock values for NTP"),
1016 		       sysctl_kern_ntptime, 0, NULL,
1017 		       sizeof(struct ntptimeval),
1018 		       CTL_KERN, KERN_NTPTIME, CTL_EOL);
1019 }
1020 #else /* !NTP */
1021 /* For some reason, raising SIGSYS (as sys_nosys would) is problematic. */
1022 
1023 int
1024 sys___ntp_gettime30(struct lwp *l, const struct sys___ntp_gettime30_args *uap, register_t *retval)
1025 {
1026 
1027 	return(ENOSYS);
1028 }
1029 
1030 #ifdef COMPAT_30
1031 int
1032 compat_30_sys_ntp_gettime(struct lwp *l, const struct compat_30_sys_ntp_gettime_args *uap, register_t *retval)
1033 {
1034 
1035  	return(ENOSYS);
1036 }
1037 #endif
1038 #endif /* !NTP */
1039