xref: /netbsd-src/sys/kern/kern_tc.c (revision f983e71d70cfccf7b3de601eb4d998b2d886ede4)
1 /* $NetBSD: kern_tc.c,v 1.33 2008/04/21 12:56:31 ad Exp $ */
2 
3 /*-
4  * Copyright (c) 2008 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. All advertising materials mentioning features or use of this software
16  *    must display the following acknowledgement:
17  *	This product includes software developed by the NetBSD
18  *	Foundation, Inc. and its contributors.
19  * 4. Neither the name of The NetBSD Foundation nor the names of its
20  *    contributors may be used to endorse or promote products derived
21  *    from this software without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
24  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
25  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
26  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
27  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
28  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
29  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
30  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
31  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
32  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
33  * POSSIBILITY OF SUCH DAMAGE.
34  */
35 
36 /*-
37  * ----------------------------------------------------------------------------
38  * "THE BEER-WARE LICENSE" (Revision 42):
39  * <phk@FreeBSD.ORG> wrote this file.  As long as you retain this notice you
40  * can do whatever you want with this stuff. If we meet some day, and you think
41  * this stuff is worth it, you can buy me a beer in return.   Poul-Henning Kamp
42  * ---------------------------------------------------------------------------
43  */
44 
45 #include <sys/cdefs.h>
46 /* __FBSDID("$FreeBSD: src/sys/kern/kern_tc.c,v 1.166 2005/09/19 22:16:31 andre Exp $"); */
47 __KERNEL_RCSID(0, "$NetBSD: kern_tc.c,v 1.33 2008/04/21 12:56:31 ad Exp $");
48 
49 #include "opt_ntp.h"
50 
51 #include <sys/param.h>
52 #include <sys/kernel.h>
53 #include <sys/reboot.h>	/* XXX just to get AB_VERBOSE */
54 #include <sys/sysctl.h>
55 #include <sys/syslog.h>
56 #include <sys/systm.h>
57 #include <sys/timepps.h>
58 #include <sys/timetc.h>
59 #include <sys/timex.h>
60 #include <sys/evcnt.h>
61 #include <sys/kauth.h>
62 #include <sys/mutex.h>
63 #include <sys/atomic.h>
64 
65 /*
66  * A large step happens on boot.  This constant detects such steps.
67  * It is relatively small so that ntp_update_second gets called enough
68  * in the typical 'missed a couple of seconds' case, but doesn't loop
69  * forever when the time step is large.
70  */
71 #define LARGE_STEP	200
72 
73 /*
74  * Implement a dummy timecounter which we can use until we get a real one
75  * in the air.  This allows the console and other early stuff to use
76  * time services.
77  */
78 
79 static u_int
80 dummy_get_timecount(struct timecounter *tc)
81 {
82 	static u_int now;
83 
84 	return (++now);
85 }
86 
87 static struct timecounter dummy_timecounter = {
88 	dummy_get_timecount, 0, ~0u, 1000000, "dummy", -1000000, NULL, NULL,
89 };
90 
91 struct timehands {
92 	/* These fields must be initialized by the driver. */
93 	struct timecounter	*th_counter;
94 	int64_t			th_adjustment;
95 	u_int64_t		th_scale;
96 	u_int	 		th_offset_count;
97 	struct bintime		th_offset;
98 	struct timeval		th_microtime;
99 	struct timespec		th_nanotime;
100 	/* Fields not to be copied in tc_windup start with th_generation. */
101 	volatile u_int		th_generation;
102 	struct timehands	*th_next;
103 };
104 
105 static struct timehands th0;
106 static struct timehands th9 = { .th_next = &th0, };
107 static struct timehands th8 = { .th_next = &th9, };
108 static struct timehands th7 = { .th_next = &th8, };
109 static struct timehands th6 = { .th_next = &th7, };
110 static struct timehands th5 = { .th_next = &th6, };
111 static struct timehands th4 = { .th_next = &th5, };
112 static struct timehands th3 = { .th_next = &th4, };
113 static struct timehands th2 = { .th_next = &th3, };
114 static struct timehands th1 = { .th_next = &th2, };
115 static struct timehands th0 = {
116 	.th_counter = &dummy_timecounter,
117 	.th_scale = (uint64_t)-1 / 1000000,
118 	.th_offset = { .sec = 1, .frac = 0 },
119 	.th_generation = 1,
120 	.th_next = &th1,
121 };
122 
123 static struct timehands *volatile timehands = &th0;
124 struct timecounter *timecounter = &dummy_timecounter;
125 static struct timecounter *timecounters = &dummy_timecounter;
126 
127 time_t time_second = 1;
128 time_t time_uptime = 1;
129 
130 static struct bintime timebasebin;
131 
132 static int timestepwarnings;
133 
134 extern kmutex_t time_lock;
135 kmutex_t timecounter_lock;
136 
137 #ifdef __FreeBSD__
138 SYSCTL_INT(_kern_timecounter, OID_AUTO, stepwarnings, CTLFLAG_RW,
139     &timestepwarnings, 0, "");
140 #endif /* __FreeBSD__ */
141 
142 /*
143  * sysctl helper routine for kern.timercounter.hardware
144  */
145 static int
146 sysctl_kern_timecounter_hardware(SYSCTLFN_ARGS)
147 {
148 	struct sysctlnode node;
149 	int error;
150 	char newname[MAX_TCNAMELEN];
151 	struct timecounter *newtc, *tc;
152 
153 	tc = timecounter;
154 
155 	strlcpy(newname, tc->tc_name, sizeof(newname));
156 
157 	node = *rnode;
158 	node.sysctl_data = newname;
159 	node.sysctl_size = sizeof(newname);
160 
161 	error = sysctl_lookup(SYSCTLFN_CALL(&node));
162 
163 	if (error ||
164 	    newp == NULL ||
165 	    strncmp(newname, tc->tc_name, sizeof(newname)) == 0)
166 		return error;
167 
168 	if (l != NULL && (error = kauth_authorize_system(l->l_cred,
169 	    KAUTH_SYSTEM_TIME, KAUTH_REQ_SYSTEM_TIME_TIMECOUNTERS, newname,
170 	    NULL, NULL)) != 0)
171 		return (error);
172 
173 	if (!cold)
174 		mutex_enter(&time_lock);
175 	error = EINVAL;
176 	for (newtc = timecounters; newtc != NULL; newtc = newtc->tc_next) {
177 		if (strcmp(newname, newtc->tc_name) != 0)
178 			continue;
179 		/* Warm up new timecounter. */
180 		(void)newtc->tc_get_timecount(newtc);
181 		(void)newtc->tc_get_timecount(newtc);
182 		timecounter = newtc;
183 		error = 0;
184 		break;
185 	}
186 	if (!cold)
187 		mutex_exit(&time_lock);
188 	return error;
189 }
190 
191 static int
192 sysctl_kern_timecounter_choice(SYSCTLFN_ARGS)
193 {
194 	char buf[MAX_TCNAMELEN+48];
195 	char *where = oldp;
196 	const char *spc;
197 	struct timecounter *tc;
198 	size_t needed, left, slen;
199 	int error;
200 
201 	if (newp != NULL)
202 		return (EPERM);
203 	if (namelen != 0)
204 		return (EINVAL);
205 
206 	spc = "";
207 	error = 0;
208 	needed = 0;
209 	left = *oldlenp;
210 
211 	mutex_enter(&time_lock);
212 	for (tc = timecounters; error == 0 && tc != NULL; tc = tc->tc_next) {
213 		if (where == NULL) {
214 			needed += sizeof(buf);  /* be conservative */
215 		} else {
216 			slen = snprintf(buf, sizeof(buf), "%s%s(q=%d, f=%" PRId64
217 					" Hz)", spc, tc->tc_name, tc->tc_quality,
218 					tc->tc_frequency);
219 			if (left < slen + 1)
220 				break;
221 			/* XXX use sysctl_copyout? (from sysctl_hw_disknames) */
222 			/* XXX copyout with held lock. */
223 			error = copyout(buf, where, slen + 1);
224 			spc = " ";
225 			where += slen;
226 			needed += slen;
227 			left -= slen;
228 		}
229 	}
230 	mutex_exit(&time_lock);
231 
232 	*oldlenp = needed;
233 	return (error);
234 }
235 
236 SYSCTL_SETUP(sysctl_timecounter_setup, "sysctl timecounter setup")
237 {
238 	const struct sysctlnode *node;
239 
240 	sysctl_createv(clog, 0, NULL, &node,
241 		       CTLFLAG_PERMANENT,
242 		       CTLTYPE_NODE, "timecounter",
243 		       SYSCTL_DESCR("time counter information"),
244 		       NULL, 0, NULL, 0,
245 		       CTL_KERN, CTL_CREATE, CTL_EOL);
246 
247 	if (node != NULL) {
248 		sysctl_createv(clog, 0, NULL, NULL,
249 			       CTLFLAG_PERMANENT,
250 			       CTLTYPE_STRING, "choice",
251 			       SYSCTL_DESCR("available counters"),
252 			       sysctl_kern_timecounter_choice, 0, NULL, 0,
253 			       CTL_KERN, node->sysctl_num, CTL_CREATE, CTL_EOL);
254 
255 		sysctl_createv(clog, 0, NULL, NULL,
256 			       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
257 			       CTLTYPE_STRING, "hardware",
258 			       SYSCTL_DESCR("currently active time counter"),
259 			       sysctl_kern_timecounter_hardware, 0, NULL, MAX_TCNAMELEN,
260 			       CTL_KERN, node->sysctl_num, CTL_CREATE, CTL_EOL);
261 
262 		sysctl_createv(clog, 0, NULL, NULL,
263 			       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
264 			       CTLTYPE_INT, "timestepwarnings",
265 			       SYSCTL_DESCR("log time steps"),
266 			       NULL, 0, &timestepwarnings, 0,
267 			       CTL_KERN, node->sysctl_num, CTL_CREATE, CTL_EOL);
268 	}
269 }
270 
271 #ifdef TC_COUNTERS
272 #define	TC_STATS(name)							\
273 static struct evcnt n##name =						\
274     EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, "timecounter", #name);	\
275 EVCNT_ATTACH_STATIC(n##name)
276 TC_STATS(binuptime);    TC_STATS(nanouptime);    TC_STATS(microuptime);
277 TC_STATS(bintime);      TC_STATS(nanotime);      TC_STATS(microtime);
278 TC_STATS(getbinuptime); TC_STATS(getnanouptime); TC_STATS(getmicrouptime);
279 TC_STATS(getbintime);   TC_STATS(getnanotime);   TC_STATS(getmicrotime);
280 TC_STATS(setclock);
281 #define	TC_COUNT(var)	var.ev_count++
282 #undef TC_STATS
283 #else
284 #define	TC_COUNT(var)	/* nothing */
285 #endif	/* TC_COUNTERS */
286 
287 static void tc_windup(void);
288 
289 /*
290  * Return the difference between the timehands' counter value now and what
291  * was when we copied it to the timehands' offset_count.
292  */
293 static __inline u_int
294 tc_delta(struct timehands *th)
295 {
296 	struct timecounter *tc;
297 
298 	tc = th->th_counter;
299 	return ((tc->tc_get_timecount(tc) -
300 		 th->th_offset_count) & tc->tc_counter_mask);
301 }
302 
303 /*
304  * Functions for reading the time.  We have to loop until we are sure that
305  * the timehands that we operated on was not updated under our feet.  See
306  * the comment in <sys/timevar.h> for a description of these 12 functions.
307  */
308 
309 void
310 binuptime(struct bintime *bt)
311 {
312 	struct timehands *th;
313 	u_int gen;
314 
315 	TC_COUNT(nbinuptime);
316 	do {
317 		th = timehands;
318 		gen = th->th_generation;
319 		*bt = th->th_offset;
320 		bintime_addx(bt, th->th_scale * tc_delta(th));
321 	} while (gen == 0 || gen != th->th_generation);
322 }
323 
324 void
325 nanouptime(struct timespec *tsp)
326 {
327 	struct bintime bt;
328 
329 	TC_COUNT(nnanouptime);
330 	binuptime(&bt);
331 	bintime2timespec(&bt, tsp);
332 }
333 
334 void
335 microuptime(struct timeval *tvp)
336 {
337 	struct bintime bt;
338 
339 	TC_COUNT(nmicrouptime);
340 	binuptime(&bt);
341 	bintime2timeval(&bt, tvp);
342 }
343 
344 void
345 bintime(struct bintime *bt)
346 {
347 
348 	TC_COUNT(nbintime);
349 	binuptime(bt);
350 	bintime_add(bt, &timebasebin);
351 }
352 
353 void
354 nanotime(struct timespec *tsp)
355 {
356 	struct bintime bt;
357 
358 	TC_COUNT(nnanotime);
359 	bintime(&bt);
360 	bintime2timespec(&bt, tsp);
361 }
362 
363 void
364 microtime(struct timeval *tvp)
365 {
366 	struct bintime bt;
367 
368 	TC_COUNT(nmicrotime);
369 	bintime(&bt);
370 	bintime2timeval(&bt, tvp);
371 }
372 
373 void
374 getbinuptime(struct bintime *bt)
375 {
376 	struct timehands *th;
377 	u_int gen;
378 
379 	TC_COUNT(ngetbinuptime);
380 	do {
381 		th = timehands;
382 		gen = th->th_generation;
383 		*bt = th->th_offset;
384 	} while (gen == 0 || gen != th->th_generation);
385 }
386 
387 void
388 getnanouptime(struct timespec *tsp)
389 {
390 	struct timehands *th;
391 	u_int gen;
392 
393 	TC_COUNT(ngetnanouptime);
394 	do {
395 		th = timehands;
396 		gen = th->th_generation;
397 		bintime2timespec(&th->th_offset, tsp);
398 	} while (gen == 0 || gen != th->th_generation);
399 }
400 
401 void
402 getmicrouptime(struct timeval *tvp)
403 {
404 	struct timehands *th;
405 	u_int gen;
406 
407 	TC_COUNT(ngetmicrouptime);
408 	do {
409 		th = timehands;
410 		gen = th->th_generation;
411 		bintime2timeval(&th->th_offset, tvp);
412 	} while (gen == 0 || gen != th->th_generation);
413 }
414 
415 void
416 getbintime(struct bintime *bt)
417 {
418 	struct timehands *th;
419 	u_int gen;
420 
421 	TC_COUNT(ngetbintime);
422 	do {
423 		th = timehands;
424 		gen = th->th_generation;
425 		*bt = th->th_offset;
426 	} while (gen == 0 || gen != th->th_generation);
427 	bintime_add(bt, &timebasebin);
428 }
429 
430 void
431 getnanotime(struct timespec *tsp)
432 {
433 	struct timehands *th;
434 	u_int gen;
435 
436 	TC_COUNT(ngetnanotime);
437 	do {
438 		th = timehands;
439 		gen = th->th_generation;
440 		*tsp = th->th_nanotime;
441 	} while (gen == 0 || gen != th->th_generation);
442 }
443 
444 void
445 getmicrotime(struct timeval *tvp)
446 {
447 	struct timehands *th;
448 	u_int gen;
449 
450 	TC_COUNT(ngetmicrotime);
451 	do {
452 		th = timehands;
453 		gen = th->th_generation;
454 		*tvp = th->th_microtime;
455 	} while (gen == 0 || gen != th->th_generation);
456 }
457 
458 /*
459  * Initialize a new timecounter and possibly use it.
460  */
461 void
462 tc_init(struct timecounter *tc)
463 {
464 	u_int u;
465 
466 	u = tc->tc_frequency / tc->tc_counter_mask;
467 	/* XXX: We need some margin here, 10% is a guess */
468 	u *= 11;
469 	u /= 10;
470 	if (u > hz && tc->tc_quality >= 0) {
471 		tc->tc_quality = -2000;
472 		aprint_verbose(
473 		    "timecounter: Timecounter \"%s\" frequency %ju Hz",
474 			    tc->tc_name, (uintmax_t)tc->tc_frequency);
475 		aprint_verbose(" -- Insufficient hz, needs at least %u\n", u);
476 	} else if (tc->tc_quality >= 0 || bootverbose) {
477 		aprint_verbose(
478 		    "timecounter: Timecounter \"%s\" frequency %ju Hz "
479 		    "quality %d\n", tc->tc_name, (uintmax_t)tc->tc_frequency,
480 		    tc->tc_quality);
481 	}
482 
483 	mutex_enter(&time_lock);
484 	mutex_spin_enter(&timecounter_lock);
485 	tc->tc_next = timecounters;
486 	timecounters = tc;
487 	/*
488 	 * Never automatically use a timecounter with negative quality.
489 	 * Even though we run on the dummy counter, switching here may be
490 	 * worse since this timecounter may not be monotonous.
491 	 */
492 	if (tc->tc_quality >= 0 && (tc->tc_quality > timecounter->tc_quality ||
493 	    (tc->tc_quality == timecounter->tc_quality &&
494 	    tc->tc_frequency > timecounter->tc_frequency))) {
495 		(void)tc->tc_get_timecount(tc);
496 		(void)tc->tc_get_timecount(tc);
497 		timecounter = tc;
498 		tc_windup();
499 	}
500 	mutex_spin_exit(&timecounter_lock);
501 	mutex_exit(&time_lock);
502 }
503 
504 /*
505  * Stop using a timecounter and remove it from the timecounters list.
506  */
507 int
508 tc_detach(struct timecounter *target)
509 {
510 	struct timecounter *best, *tc;
511 	struct timecounter **tcp = NULL;
512 	int rc = 0;
513 
514 	mutex_enter(&time_lock);
515 	for (tcp = &timecounters, tc = timecounters;
516 	     tc != NULL;
517 	     tcp = &tc->tc_next, tc = tc->tc_next) {
518 		if (tc == target)
519 			break;
520 	}
521 	if (tc == NULL) {
522 		rc = ESRCH;
523 		goto out;
524 	}
525 	*tcp = tc->tc_next;
526 
527 	if (timecounter != target)
528 		goto out;
529 
530 	for (best = tc = timecounters; tc != NULL; tc = tc->tc_next) {
531 		if (tc->tc_quality > best->tc_quality)
532 			best = tc;
533 		else if (tc->tc_quality < best->tc_quality)
534 			continue;
535 		else if (tc->tc_frequency > best->tc_frequency)
536 			best = tc;
537 	}
538 	mutex_spin_enter(&timecounter_lock);
539 	(void)best->tc_get_timecount(best);
540 	(void)best->tc_get_timecount(best);
541 	timecounter = best;
542 	tc_windup();
543 	mutex_spin_exit(&timecounter_lock);
544 out:
545 	mutex_exit(&time_lock);
546 	return rc;
547 }
548 
549 /* Report the frequency of the current timecounter. */
550 u_int64_t
551 tc_getfrequency(void)
552 {
553 
554 	return (timehands->th_counter->tc_frequency);
555 }
556 
557 /*
558  * Step our concept of UTC.  This is done by modifying our estimate of
559  * when we booted.
560  */
561 void
562 tc_setclock(struct timespec *ts)
563 {
564 	struct timespec ts2;
565 	struct bintime bt, bt2;
566 
567 	mutex_spin_enter(&timecounter_lock);
568 	TC_COUNT(nsetclock);
569 	binuptime(&bt2);
570 	timespec2bintime(ts, &bt);
571 	bintime_sub(&bt, &bt2);
572 	bintime_add(&bt2, &timebasebin);
573 	timebasebin = bt;
574 	tc_windup();
575 	mutex_spin_exit(&timecounter_lock);
576 
577 	if (timestepwarnings) {
578 		bintime2timespec(&bt2, &ts2);
579 		log(LOG_INFO, "Time stepped from %jd.%09ld to %jd.%09ld\n",
580 		    (intmax_t)ts2.tv_sec, ts2.tv_nsec,
581 		    (intmax_t)ts->tv_sec, ts->tv_nsec);
582 	}
583 }
584 
585 /*
586  * Initialize the next struct timehands in the ring and make
587  * it the active timehands.  Along the way we might switch to a different
588  * timecounter and/or do seconds processing in NTP.  Slightly magic.
589  */
590 static void
591 tc_windup(void)
592 {
593 	struct bintime bt;
594 	struct timehands *th, *tho;
595 	u_int64_t scale;
596 	u_int delta, ncount, ogen;
597 	int i, s_update;
598 	time_t t;
599 
600 	KASSERT(mutex_owned(&timecounter_lock));
601 
602 	s_update = 0;
603 
604 	/*
605 	 * Make the next timehands a copy of the current one, but do not
606 	 * overwrite the generation or next pointer.  While we update
607 	 * the contents, the generation must be zero.  Ensure global
608 	 * visibility of the generation before proceeding.
609 	 */
610 	tho = timehands;
611 	th = tho->th_next;
612 	ogen = th->th_generation;
613 	th->th_generation = 0;
614 	membar_producer();
615 	bcopy(tho, th, offsetof(struct timehands, th_generation));
616 
617 	/*
618 	 * Capture a timecounter delta on the current timecounter and if
619 	 * changing timecounters, a counter value from the new timecounter.
620 	 * Update the offset fields accordingly.
621 	 */
622 	delta = tc_delta(th);
623 	if (th->th_counter != timecounter)
624 		ncount = timecounter->tc_get_timecount(timecounter);
625 	else
626 		ncount = 0;
627 	th->th_offset_count += delta;
628 	th->th_offset_count &= th->th_counter->tc_counter_mask;
629 	bintime_addx(&th->th_offset, th->th_scale * delta);
630 
631 	/*
632 	 * Hardware latching timecounters may not generate interrupts on
633 	 * PPS events, so instead we poll them.  There is a finite risk that
634 	 * the hardware might capture a count which is later than the one we
635 	 * got above, and therefore possibly in the next NTP second which might
636 	 * have a different rate than the current NTP second.  It doesn't
637 	 * matter in practice.
638 	 */
639 	if (tho->th_counter->tc_poll_pps)
640 		tho->th_counter->tc_poll_pps(tho->th_counter);
641 
642 	/*
643 	 * Deal with NTP second processing.  The for loop normally
644 	 * iterates at most once, but in extreme situations it might
645 	 * keep NTP sane if timeouts are not run for several seconds.
646 	 * At boot, the time step can be large when the TOD hardware
647 	 * has been read, so on really large steps, we call
648 	 * ntp_update_second only twice.  We need to call it twice in
649 	 * case we missed a leap second.
650 	 * If NTP is not compiled in ntp_update_second still calculates
651 	 * the adjustment resulting from adjtime() calls.
652 	 */
653 	bt = th->th_offset;
654 	bintime_add(&bt, &timebasebin);
655 	i = bt.sec - tho->th_microtime.tv_sec;
656 	if (i > LARGE_STEP)
657 		i = 2;
658 	for (; i > 0; i--) {
659 		t = bt.sec;
660 		ntp_update_second(&th->th_adjustment, &bt.sec);
661 		s_update = 1;
662 		if (bt.sec != t)
663 			timebasebin.sec += bt.sec - t;
664 	}
665 
666 	/* Update the UTC timestamps used by the get*() functions. */
667 	/* XXX shouldn't do this here.  Should force non-`get' versions. */
668 	bintime2timeval(&bt, &th->th_microtime);
669 	bintime2timespec(&bt, &th->th_nanotime);
670 
671 	/* Now is a good time to change timecounters. */
672 	if (th->th_counter != timecounter) {
673 		th->th_counter = timecounter;
674 		th->th_offset_count = ncount;
675 		s_update = 1;
676 	}
677 
678 	/*-
679 	 * Recalculate the scaling factor.  We want the number of 1/2^64
680 	 * fractions of a second per period of the hardware counter, taking
681 	 * into account the th_adjustment factor which the NTP PLL/adjtime(2)
682 	 * processing provides us with.
683 	 *
684 	 * The th_adjustment is nanoseconds per second with 32 bit binary
685 	 * fraction and we want 64 bit binary fraction of second:
686 	 *
687 	 *	 x = a * 2^32 / 10^9 = a * 4.294967296
688 	 *
689 	 * The range of th_adjustment is +/- 5000PPM so inside a 64bit int
690 	 * we can only multiply by about 850 without overflowing, but that
691 	 * leaves suitably precise fractions for multiply before divide.
692 	 *
693 	 * Divide before multiply with a fraction of 2199/512 results in a
694 	 * systematic undercompensation of 10PPM of th_adjustment.  On a
695 	 * 5000PPM adjustment this is a 0.05PPM error.  This is acceptable.
696  	 *
697 	 * We happily sacrifice the lowest of the 64 bits of our result
698 	 * to the goddess of code clarity.
699 	 *
700 	 */
701 	if (s_update) {
702 		scale = (u_int64_t)1 << 63;
703 		scale += (th->th_adjustment / 1024) * 2199;
704 		scale /= th->th_counter->tc_frequency;
705 		th->th_scale = scale * 2;
706 	}
707 	/*
708 	 * Now that the struct timehands is again consistent, set the new
709 	 * generation number, making sure to not make it zero.  Ensure
710 	 * changes are globally visible before changing.
711 	 */
712 	if (++ogen == 0)
713 		ogen = 1;
714 	membar_producer();
715 	th->th_generation = ogen;
716 
717 	/*
718 	 * Go live with the new struct timehands.  Ensure changes are
719 	 * globally visible before changing.
720 	 */
721 	time_second = th->th_microtime.tv_sec;
722 	time_uptime = th->th_offset.sec;
723 	membar_producer();
724 	timehands = th;
725 
726 	/*
727 	 * Force users of the old timehand to move on.  This is
728 	 * necessary for MP systems; we need to ensure that the
729 	 * consumers will move away from the old timehand before
730 	 * we begin updating it again when we eventually wrap
731 	 * around.
732 	 */
733 	if (++tho->th_generation == 0)
734 		tho->th_generation = 1;
735 }
736 
737 /*
738  * RFC 2783 PPS-API implementation.
739  */
740 
741 int
742 pps_ioctl(u_long cmd, void *data, struct pps_state *pps)
743 {
744 	pps_params_t *app;
745 	pps_info_t *pipi;
746 #ifdef PPS_SYNC
747 	int *epi;
748 #endif
749 
750 	KASSERT(mutex_owned(&timecounter_lock));
751 
752 	KASSERT(pps != NULL); /* XXX ("NULL pps pointer in pps_ioctl") */
753 	switch (cmd) {
754 	case PPS_IOC_CREATE:
755 		return (0);
756 	case PPS_IOC_DESTROY:
757 		return (0);
758 	case PPS_IOC_SETPARAMS:
759 		app = (pps_params_t *)data;
760 		if (app->mode & ~pps->ppscap)
761 			return (EINVAL);
762 		pps->ppsparam = *app;
763 		return (0);
764 	case PPS_IOC_GETPARAMS:
765 		app = (pps_params_t *)data;
766 		*app = pps->ppsparam;
767 		app->api_version = PPS_API_VERS_1;
768 		return (0);
769 	case PPS_IOC_GETCAP:
770 		*(int*)data = pps->ppscap;
771 		return (0);
772 	case PPS_IOC_FETCH:
773 		pipi = (pps_info_t *)data;
774 		pps->ppsinfo.current_mode = pps->ppsparam.mode;
775 		*pipi = pps->ppsinfo;
776 		return (0);
777 	case PPS_IOC_KCBIND:
778 #ifdef PPS_SYNC
779 		epi = (int *)data;
780 		/* XXX Only root should be able to do this */
781 		if (*epi & ~pps->ppscap)
782 			return (EINVAL);
783 		pps->kcmode = *epi;
784 		return (0);
785 #else
786 		return (EOPNOTSUPP);
787 #endif
788 	default:
789 		return (EPASSTHROUGH);
790 	}
791 }
792 
793 void
794 pps_init(struct pps_state *pps)
795 {
796 
797 	KASSERT(mutex_owned(&timecounter_lock));
798 
799 	pps->ppscap |= PPS_TSFMT_TSPEC;
800 	if (pps->ppscap & PPS_CAPTUREASSERT)
801 		pps->ppscap |= PPS_OFFSETASSERT;
802 	if (pps->ppscap & PPS_CAPTURECLEAR)
803 		pps->ppscap |= PPS_OFFSETCLEAR;
804 }
805 
806 void
807 pps_capture(struct pps_state *pps)
808 {
809 	struct timehands *th;
810 
811 	KASSERT(mutex_owned(&timecounter_lock));
812 	KASSERT(pps != NULL);
813 
814 	th = timehands;
815 	pps->capgen = th->th_generation;
816 	pps->capth = th;
817 	pps->capcount = th->th_counter->tc_get_timecount(th->th_counter);
818 	if (pps->capgen != th->th_generation)
819 		pps->capgen = 0;
820 }
821 
822 void
823 pps_event(struct pps_state *pps, int event)
824 {
825 	struct bintime bt;
826 	struct timespec ts, *tsp, *osp;
827 	u_int tcount, *pcount;
828 	int foff, fhard;
829 	pps_seq_t *pseq;
830 
831 	KASSERT(mutex_owned(&timecounter_lock));
832 
833 	KASSERT(pps != NULL); /* XXX ("NULL pps pointer in pps_event") */
834 	/* If the timecounter was wound up underneath us, bail out. */
835 	if (pps->capgen == 0 || pps->capgen != pps->capth->th_generation)
836 		return;
837 
838 	/* Things would be easier with arrays. */
839 	if (event == PPS_CAPTUREASSERT) {
840 		tsp = &pps->ppsinfo.assert_timestamp;
841 		osp = &pps->ppsparam.assert_offset;
842 		foff = pps->ppsparam.mode & PPS_OFFSETASSERT;
843 		fhard = pps->kcmode & PPS_CAPTUREASSERT;
844 		pcount = &pps->ppscount[0];
845 		pseq = &pps->ppsinfo.assert_sequence;
846 	} else {
847 		tsp = &pps->ppsinfo.clear_timestamp;
848 		osp = &pps->ppsparam.clear_offset;
849 		foff = pps->ppsparam.mode & PPS_OFFSETCLEAR;
850 		fhard = pps->kcmode & PPS_CAPTURECLEAR;
851 		pcount = &pps->ppscount[1];
852 		pseq = &pps->ppsinfo.clear_sequence;
853 	}
854 
855 	/*
856 	 * If the timecounter changed, we cannot compare the count values, so
857 	 * we have to drop the rest of the PPS-stuff until the next event.
858 	 */
859 	if (pps->ppstc != pps->capth->th_counter) {
860 		pps->ppstc = pps->capth->th_counter;
861 		*pcount = pps->capcount;
862 		pps->ppscount[2] = pps->capcount;
863 		return;
864 	}
865 
866 	/* Convert the count to a timespec. */
867 	tcount = pps->capcount - pps->capth->th_offset_count;
868 	tcount &= pps->capth->th_counter->tc_counter_mask;
869 	bt = pps->capth->th_offset;
870 	bintime_addx(&bt, pps->capth->th_scale * tcount);
871 	bintime_add(&bt, &timebasebin);
872 	bintime2timespec(&bt, &ts);
873 
874 	/* If the timecounter was wound up underneath us, bail out. */
875 	if (pps->capgen != pps->capth->th_generation)
876 		return;
877 
878 	*pcount = pps->capcount;
879 	(*pseq)++;
880 	*tsp = ts;
881 
882 	if (foff) {
883 		timespecadd(tsp, osp, tsp);
884 		if (tsp->tv_nsec < 0) {
885 			tsp->tv_nsec += 1000000000;
886 			tsp->tv_sec -= 1;
887 		}
888 	}
889 #ifdef PPS_SYNC
890 	if (fhard) {
891 		u_int64_t scale;
892 
893 		/*
894 		 * Feed the NTP PLL/FLL.
895 		 * The FLL wants to know how many (hardware) nanoseconds
896 		 * elapsed since the previous event.
897 		 */
898 		tcount = pps->capcount - pps->ppscount[2];
899 		pps->ppscount[2] = pps->capcount;
900 		tcount &= pps->capth->th_counter->tc_counter_mask;
901 		scale = (u_int64_t)1 << 63;
902 		scale /= pps->capth->th_counter->tc_frequency;
903 		scale *= 2;
904 		bt.sec = 0;
905 		bt.frac = 0;
906 		bintime_addx(&bt, scale * tcount);
907 		bintime2timespec(&bt, &ts);
908 		hardpps(tsp, ts.tv_nsec + 1000000000 * ts.tv_sec);
909 	}
910 #endif
911 }
912 
913 /*
914  * Timecounters need to be updated every so often to prevent the hardware
915  * counter from overflowing.  Updating also recalculates the cached values
916  * used by the get*() family of functions, so their precision depends on
917  * the update frequency.
918  */
919 
920 static int tc_tick;
921 
922 void
923 tc_ticktock(void)
924 {
925 	static int count;
926 
927 	if (++count < tc_tick)
928 		return;
929 	count = 0;
930 	mutex_spin_enter(&timecounter_lock);
931 	tc_windup();
932 	mutex_spin_exit(&timecounter_lock);
933 }
934 
935 void
936 inittimecounter(void)
937 {
938 	u_int p;
939 
940 	mutex_init(&timecounter_lock, MUTEX_DEFAULT, IPL_SCHED);
941 
942 	/*
943 	 * Set the initial timeout to
944 	 * max(1, <approx. number of hardclock ticks in a millisecond>).
945 	 * People should probably not use the sysctl to set the timeout
946 	 * to smaller than its inital value, since that value is the
947 	 * smallest reasonable one.  If they want better timestamps they
948 	 * should use the non-"get"* functions.
949 	 */
950 	if (hz > 1000)
951 		tc_tick = (hz + 500) / 1000;
952 	else
953 		tc_tick = 1;
954 	p = (tc_tick * 1000000) / hz;
955 	aprint_verbose("timecounter: Timecounters tick every %d.%03u msec\n",
956 	    p / 1000, p % 1000);
957 
958 	/* warm up new timecounter (again) and get rolling. */
959 	(void)timecounter->tc_get_timecount(timecounter);
960 	(void)timecounter->tc_get_timecount(timecounter);
961 }
962