xref: /netbsd-src/sys/kern/kern_tc.c (revision 7c3f385475147b6e1c4753f2bee961630e2dfc40)
1 /* $NetBSD: kern_tc.c,v 1.32 2008/02/10 13:56:17 ad Exp $ */
2 
3 /*-
4  * ----------------------------------------------------------------------------
5  * "THE BEER-WARE LICENSE" (Revision 42):
6  * <phk@FreeBSD.ORG> wrote this file.  As long as you retain this notice you
7  * can do whatever you want with this stuff. If we meet some day, and you think
8  * this stuff is worth it, you can buy me a beer in return.   Poul-Henning Kamp
9  * ---------------------------------------------------------------------------
10  */
11 
12 #include <sys/cdefs.h>
13 /* __FBSDID("$FreeBSD: src/sys/kern/kern_tc.c,v 1.166 2005/09/19 22:16:31 andre Exp $"); */
14 __KERNEL_RCSID(0, "$NetBSD: kern_tc.c,v 1.32 2008/02/10 13:56:17 ad Exp $");
15 
16 #include "opt_ntp.h"
17 
18 #include <sys/param.h>
19 #include <sys/kernel.h>
20 #include <sys/reboot.h>	/* XXX just to get AB_VERBOSE */
21 #include <sys/sysctl.h>
22 #include <sys/syslog.h>
23 #include <sys/systm.h>
24 #include <sys/timepps.h>
25 #include <sys/timetc.h>
26 #include <sys/timex.h>
27 #include <sys/evcnt.h>
28 #include <sys/kauth.h>
29 #include <sys/mutex.h>
30 #include <sys/atomic.h>
31 
32 /*
33  * A large step happens on boot.  This constant detects such steps.
34  * It is relatively small so that ntp_update_second gets called enough
35  * in the typical 'missed a couple of seconds' case, but doesn't loop
36  * forever when the time step is large.
37  */
38 #define LARGE_STEP	200
39 
40 /*
41  * Implement a dummy timecounter which we can use until we get a real one
42  * in the air.  This allows the console and other early stuff to use
43  * time services.
44  */
45 
46 static u_int
47 dummy_get_timecount(struct timecounter *tc)
48 {
49 	static u_int now;
50 
51 	return (++now);
52 }
53 
54 static struct timecounter dummy_timecounter = {
55 	dummy_get_timecount, 0, ~0u, 1000000, "dummy", -1000000, NULL, NULL,
56 };
57 
58 struct timehands {
59 	/* These fields must be initialized by the driver. */
60 	struct timecounter	*th_counter;
61 	int64_t			th_adjustment;
62 	u_int64_t		th_scale;
63 	u_int	 		th_offset_count;
64 	struct bintime		th_offset;
65 	struct timeval		th_microtime;
66 	struct timespec		th_nanotime;
67 	/* Fields not to be copied in tc_windup start with th_generation. */
68 	volatile u_int		th_generation;
69 	struct timehands	*th_next;
70 };
71 
72 static struct timehands th0;
73 static struct timehands th9 = { .th_next = &th0, };
74 static struct timehands th8 = { .th_next = &th9, };
75 static struct timehands th7 = { .th_next = &th8, };
76 static struct timehands th6 = { .th_next = &th7, };
77 static struct timehands th5 = { .th_next = &th6, };
78 static struct timehands th4 = { .th_next = &th5, };
79 static struct timehands th3 = { .th_next = &th4, };
80 static struct timehands th2 = { .th_next = &th3, };
81 static struct timehands th1 = { .th_next = &th2, };
82 static struct timehands th0 = {
83 	.th_counter = &dummy_timecounter,
84 	.th_scale = (uint64_t)-1 / 1000000,
85 	.th_offset = { .sec = 1, .frac = 0 },
86 	.th_generation = 1,
87 	.th_next = &th1,
88 };
89 
90 static struct timehands *volatile timehands = &th0;
91 struct timecounter *timecounter = &dummy_timecounter;
92 static struct timecounter *timecounters = &dummy_timecounter;
93 
94 time_t time_second = 1;
95 time_t time_uptime = 1;
96 
97 static struct bintime timebasebin;
98 
99 static int timestepwarnings;
100 
101 extern kmutex_t time_lock;
102 static kmutex_t tc_windup_lock;
103 
104 #ifdef __FreeBSD__
105 SYSCTL_INT(_kern_timecounter, OID_AUTO, stepwarnings, CTLFLAG_RW,
106     &timestepwarnings, 0, "");
107 #endif /* __FreeBSD__ */
108 
109 /*
110  * sysctl helper routine for kern.timercounter.hardware
111  */
112 static int
113 sysctl_kern_timecounter_hardware(SYSCTLFN_ARGS)
114 {
115 	struct sysctlnode node;
116 	int error;
117 	char newname[MAX_TCNAMELEN];
118 	struct timecounter *newtc, *tc;
119 
120 	tc = timecounter;
121 
122 	strlcpy(newname, tc->tc_name, sizeof(newname));
123 
124 	node = *rnode;
125 	node.sysctl_data = newname;
126 	node.sysctl_size = sizeof(newname);
127 
128 	error = sysctl_lookup(SYSCTLFN_CALL(&node));
129 
130 	if (error ||
131 	    newp == NULL ||
132 	    strncmp(newname, tc->tc_name, sizeof(newname)) == 0)
133 		return error;
134 
135 	if (l != NULL && (error = kauth_authorize_system(l->l_cred,
136 	    KAUTH_SYSTEM_TIME, KAUTH_REQ_SYSTEM_TIME_TIMECOUNTERS, newname,
137 	    NULL, NULL)) != 0)
138 		return (error);
139 
140 	if (!cold)
141 		mutex_enter(&time_lock);
142 	error = EINVAL;
143 	for (newtc = timecounters; newtc != NULL; newtc = newtc->tc_next) {
144 		if (strcmp(newname, newtc->tc_name) != 0)
145 			continue;
146 		/* Warm up new timecounter. */
147 		(void)newtc->tc_get_timecount(newtc);
148 		(void)newtc->tc_get_timecount(newtc);
149 		timecounter = newtc;
150 		error = 0;
151 		break;
152 	}
153 	if (!cold)
154 		mutex_exit(&time_lock);
155 	return error;
156 }
157 
158 static int
159 sysctl_kern_timecounter_choice(SYSCTLFN_ARGS)
160 {
161 	char buf[MAX_TCNAMELEN+48];
162 	char *where = oldp;
163 	const char *spc;
164 	struct timecounter *tc;
165 	size_t needed, left, slen;
166 	int error;
167 
168 	if (newp != NULL)
169 		return (EPERM);
170 	if (namelen != 0)
171 		return (EINVAL);
172 
173 	spc = "";
174 	error = 0;
175 	needed = 0;
176 	left = *oldlenp;
177 
178 	mutex_enter(&time_lock);
179 	for (tc = timecounters; error == 0 && tc != NULL; tc = tc->tc_next) {
180 		if (where == NULL) {
181 			needed += sizeof(buf);  /* be conservative */
182 		} else {
183 			slen = snprintf(buf, sizeof(buf), "%s%s(q=%d, f=%" PRId64
184 					" Hz)", spc, tc->tc_name, tc->tc_quality,
185 					tc->tc_frequency);
186 			if (left < slen + 1)
187 				break;
188 			/* XXX use sysctl_copyout? (from sysctl_hw_disknames) */
189 			/* XXX copyout with held lock. */
190 			error = copyout(buf, where, slen + 1);
191 			spc = " ";
192 			where += slen;
193 			needed += slen;
194 			left -= slen;
195 		}
196 	}
197 	mutex_exit(&time_lock);
198 
199 	*oldlenp = needed;
200 	return (error);
201 }
202 
203 SYSCTL_SETUP(sysctl_timecounter_setup, "sysctl timecounter setup")
204 {
205 	const struct sysctlnode *node;
206 
207 	sysctl_createv(clog, 0, NULL, &node,
208 		       CTLFLAG_PERMANENT,
209 		       CTLTYPE_NODE, "timecounter",
210 		       SYSCTL_DESCR("time counter information"),
211 		       NULL, 0, NULL, 0,
212 		       CTL_KERN, CTL_CREATE, CTL_EOL);
213 
214 	if (node != NULL) {
215 		sysctl_createv(clog, 0, NULL, NULL,
216 			       CTLFLAG_PERMANENT,
217 			       CTLTYPE_STRING, "choice",
218 			       SYSCTL_DESCR("available counters"),
219 			       sysctl_kern_timecounter_choice, 0, NULL, 0,
220 			       CTL_KERN, node->sysctl_num, CTL_CREATE, CTL_EOL);
221 
222 		sysctl_createv(clog, 0, NULL, NULL,
223 			       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
224 			       CTLTYPE_STRING, "hardware",
225 			       SYSCTL_DESCR("currently active time counter"),
226 			       sysctl_kern_timecounter_hardware, 0, NULL, MAX_TCNAMELEN,
227 			       CTL_KERN, node->sysctl_num, CTL_CREATE, CTL_EOL);
228 
229 		sysctl_createv(clog, 0, NULL, NULL,
230 			       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
231 			       CTLTYPE_INT, "timestepwarnings",
232 			       SYSCTL_DESCR("log time steps"),
233 			       NULL, 0, &timestepwarnings, 0,
234 			       CTL_KERN, node->sysctl_num, CTL_CREATE, CTL_EOL);
235 	}
236 }
237 
238 #ifdef TC_COUNTERS
239 #define	TC_STATS(name)							\
240 static struct evcnt n##name =						\
241     EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, "timecounter", #name);	\
242 EVCNT_ATTACH_STATIC(n##name)
243 TC_STATS(binuptime);    TC_STATS(nanouptime);    TC_STATS(microuptime);
244 TC_STATS(bintime);      TC_STATS(nanotime);      TC_STATS(microtime);
245 TC_STATS(getbinuptime); TC_STATS(getnanouptime); TC_STATS(getmicrouptime);
246 TC_STATS(getbintime);   TC_STATS(getnanotime);   TC_STATS(getmicrotime);
247 TC_STATS(setclock);
248 #define	TC_COUNT(var)	var.ev_count++
249 #undef TC_STATS
250 #else
251 #define	TC_COUNT(var)	/* nothing */
252 #endif	/* TC_COUNTERS */
253 
254 static void tc_windup(void);
255 
256 /*
257  * Return the difference between the timehands' counter value now and what
258  * was when we copied it to the timehands' offset_count.
259  */
260 static __inline u_int
261 tc_delta(struct timehands *th)
262 {
263 	struct timecounter *tc;
264 
265 	tc = th->th_counter;
266 	return ((tc->tc_get_timecount(tc) -
267 		 th->th_offset_count) & tc->tc_counter_mask);
268 }
269 
270 /*
271  * Functions for reading the time.  We have to loop until we are sure that
272  * the timehands that we operated on was not updated under our feet.  See
273  * the comment in <sys/timevar.h> for a description of these 12 functions.
274  */
275 
276 void
277 binuptime(struct bintime *bt)
278 {
279 	struct timehands *th;
280 	u_int gen;
281 
282 	TC_COUNT(nbinuptime);
283 	do {
284 		th = timehands;
285 		gen = th->th_generation;
286 		*bt = th->th_offset;
287 		bintime_addx(bt, th->th_scale * tc_delta(th));
288 	} while (gen == 0 || gen != th->th_generation);
289 }
290 
291 void
292 nanouptime(struct timespec *tsp)
293 {
294 	struct bintime bt;
295 
296 	TC_COUNT(nnanouptime);
297 	binuptime(&bt);
298 	bintime2timespec(&bt, tsp);
299 }
300 
301 void
302 microuptime(struct timeval *tvp)
303 {
304 	struct bintime bt;
305 
306 	TC_COUNT(nmicrouptime);
307 	binuptime(&bt);
308 	bintime2timeval(&bt, tvp);
309 }
310 
311 void
312 bintime(struct bintime *bt)
313 {
314 
315 	TC_COUNT(nbintime);
316 	binuptime(bt);
317 	bintime_add(bt, &timebasebin);
318 }
319 
320 void
321 nanotime(struct timespec *tsp)
322 {
323 	struct bintime bt;
324 
325 	TC_COUNT(nnanotime);
326 	bintime(&bt);
327 	bintime2timespec(&bt, tsp);
328 }
329 
330 void
331 microtime(struct timeval *tvp)
332 {
333 	struct bintime bt;
334 
335 	TC_COUNT(nmicrotime);
336 	bintime(&bt);
337 	bintime2timeval(&bt, tvp);
338 }
339 
340 void
341 getbinuptime(struct bintime *bt)
342 {
343 	struct timehands *th;
344 	u_int gen;
345 
346 	TC_COUNT(ngetbinuptime);
347 	do {
348 		th = timehands;
349 		gen = th->th_generation;
350 		*bt = th->th_offset;
351 	} while (gen == 0 || gen != th->th_generation);
352 }
353 
354 void
355 getnanouptime(struct timespec *tsp)
356 {
357 	struct timehands *th;
358 	u_int gen;
359 
360 	TC_COUNT(ngetnanouptime);
361 	do {
362 		th = timehands;
363 		gen = th->th_generation;
364 		bintime2timespec(&th->th_offset, tsp);
365 	} while (gen == 0 || gen != th->th_generation);
366 }
367 
368 void
369 getmicrouptime(struct timeval *tvp)
370 {
371 	struct timehands *th;
372 	u_int gen;
373 
374 	TC_COUNT(ngetmicrouptime);
375 	do {
376 		th = timehands;
377 		gen = th->th_generation;
378 		bintime2timeval(&th->th_offset, tvp);
379 	} while (gen == 0 || gen != th->th_generation);
380 }
381 
382 void
383 getbintime(struct bintime *bt)
384 {
385 	struct timehands *th;
386 	u_int gen;
387 
388 	TC_COUNT(ngetbintime);
389 	do {
390 		th = timehands;
391 		gen = th->th_generation;
392 		*bt = th->th_offset;
393 	} while (gen == 0 || gen != th->th_generation);
394 	bintime_add(bt, &timebasebin);
395 }
396 
397 void
398 getnanotime(struct timespec *tsp)
399 {
400 	struct timehands *th;
401 	u_int gen;
402 
403 	TC_COUNT(ngetnanotime);
404 	do {
405 		th = timehands;
406 		gen = th->th_generation;
407 		*tsp = th->th_nanotime;
408 	} while (gen == 0 || gen != th->th_generation);
409 }
410 
411 void
412 getmicrotime(struct timeval *tvp)
413 {
414 	struct timehands *th;
415 	u_int gen;
416 
417 	TC_COUNT(ngetmicrotime);
418 	do {
419 		th = timehands;
420 		gen = th->th_generation;
421 		*tvp = th->th_microtime;
422 	} while (gen == 0 || gen != th->th_generation);
423 }
424 
425 /*
426  * Initialize a new timecounter and possibly use it.
427  */
428 void
429 tc_init(struct timecounter *tc)
430 {
431 	u_int u;
432 
433 	u = tc->tc_frequency / tc->tc_counter_mask;
434 	/* XXX: We need some margin here, 10% is a guess */
435 	u *= 11;
436 	u /= 10;
437 	if (u > hz && tc->tc_quality >= 0) {
438 		tc->tc_quality = -2000;
439 		aprint_verbose(
440 		    "timecounter: Timecounter \"%s\" frequency %ju Hz",
441 			    tc->tc_name, (uintmax_t)tc->tc_frequency);
442 		aprint_verbose(" -- Insufficient hz, needs at least %u\n", u);
443 	} else if (tc->tc_quality >= 0 || bootverbose) {
444 		aprint_verbose(
445 		    "timecounter: Timecounter \"%s\" frequency %ju Hz "
446 		    "quality %d\n", tc->tc_name, (uintmax_t)tc->tc_frequency,
447 		    tc->tc_quality);
448 	}
449 
450 	mutex_enter(&time_lock);
451 	mutex_spin_enter(&tc_windup_lock);
452 	tc->tc_next = timecounters;
453 	timecounters = tc;
454 	/*
455 	 * Never automatically use a timecounter with negative quality.
456 	 * Even though we run on the dummy counter, switching here may be
457 	 * worse since this timecounter may not be monotonous.
458 	 */
459 	if (tc->tc_quality >= 0 && (tc->tc_quality > timecounter->tc_quality ||
460 	    (tc->tc_quality == timecounter->tc_quality &&
461 	    tc->tc_frequency > timecounter->tc_frequency))) {
462 		(void)tc->tc_get_timecount(tc);
463 		(void)tc->tc_get_timecount(tc);
464 		timecounter = tc;
465 		tc_windup();
466 	}
467 	mutex_spin_exit(&tc_windup_lock);
468 	mutex_exit(&time_lock);
469 }
470 
471 /*
472  * Stop using a timecounter and remove it from the timecounters list.
473  */
474 int
475 tc_detach(struct timecounter *target)
476 {
477 	struct timecounter *best, *tc;
478 	struct timecounter **tcp = NULL;
479 	int rc = 0;
480 
481 	mutex_enter(&time_lock);
482 	for (tcp = &timecounters, tc = timecounters;
483 	     tc != NULL;
484 	     tcp = &tc->tc_next, tc = tc->tc_next) {
485 		if (tc == target)
486 			break;
487 	}
488 	if (tc == NULL) {
489 		rc = ESRCH;
490 		goto out;
491 	}
492 	*tcp = tc->tc_next;
493 
494 	if (timecounter != target)
495 		goto out;
496 
497 	for (best = tc = timecounters; tc != NULL; tc = tc->tc_next) {
498 		if (tc->tc_quality > best->tc_quality)
499 			best = tc;
500 		else if (tc->tc_quality < best->tc_quality)
501 			continue;
502 		else if (tc->tc_frequency > best->tc_frequency)
503 			best = tc;
504 	}
505 	mutex_spin_enter(&tc_windup_lock);
506 	(void)best->tc_get_timecount(best);
507 	(void)best->tc_get_timecount(best);
508 	timecounter = best;
509 	tc_windup();
510 	mutex_spin_exit(&tc_windup_lock);
511 out:
512 	mutex_exit(&time_lock);
513 	return rc;
514 }
515 
516 /* Report the frequency of the current timecounter. */
517 u_int64_t
518 tc_getfrequency(void)
519 {
520 
521 	return (timehands->th_counter->tc_frequency);
522 }
523 
524 /*
525  * Step our concept of UTC.  This is done by modifying our estimate of
526  * when we booted.
527  */
528 void
529 tc_setclock(struct timespec *ts)
530 {
531 	struct timespec ts2;
532 	struct bintime bt, bt2;
533 
534 	mutex_spin_enter(&tc_windup_lock);
535 	TC_COUNT(nsetclock);
536 	binuptime(&bt2);
537 	timespec2bintime(ts, &bt);
538 	bintime_sub(&bt, &bt2);
539 	bintime_add(&bt2, &timebasebin);
540 	timebasebin = bt;
541 	tc_windup();
542 	mutex_spin_exit(&tc_windup_lock);
543 
544 	if (timestepwarnings) {
545 		bintime2timespec(&bt2, &ts2);
546 		log(LOG_INFO, "Time stepped from %jd.%09ld to %jd.%09ld\n",
547 		    (intmax_t)ts2.tv_sec, ts2.tv_nsec,
548 		    (intmax_t)ts->tv_sec, ts->tv_nsec);
549 	}
550 }
551 
552 /*
553  * Initialize the next struct timehands in the ring and make
554  * it the active timehands.  Along the way we might switch to a different
555  * timecounter and/or do seconds processing in NTP.  Slightly magic.
556  */
557 static void
558 tc_windup(void)
559 {
560 	struct bintime bt;
561 	struct timehands *th, *tho;
562 	u_int64_t scale;
563 	u_int delta, ncount, ogen;
564 	int i, s_update;
565 	time_t t;
566 
567 	KASSERT(mutex_owned(&tc_windup_lock));
568 
569 	s_update = 0;
570 
571 	/*
572 	 * Make the next timehands a copy of the current one, but do not
573 	 * overwrite the generation or next pointer.  While we update
574 	 * the contents, the generation must be zero.  Ensure global
575 	 * visibility of the generation before proceeding.
576 	 */
577 	tho = timehands;
578 	th = tho->th_next;
579 	ogen = th->th_generation;
580 	th->th_generation = 0;
581 	membar_producer();
582 	bcopy(tho, th, offsetof(struct timehands, th_generation));
583 
584 	/*
585 	 * Capture a timecounter delta on the current timecounter and if
586 	 * changing timecounters, a counter value from the new timecounter.
587 	 * Update the offset fields accordingly.
588 	 */
589 	delta = tc_delta(th);
590 	if (th->th_counter != timecounter)
591 		ncount = timecounter->tc_get_timecount(timecounter);
592 	else
593 		ncount = 0;
594 	th->th_offset_count += delta;
595 	th->th_offset_count &= th->th_counter->tc_counter_mask;
596 	bintime_addx(&th->th_offset, th->th_scale * delta);
597 
598 	/*
599 	 * Hardware latching timecounters may not generate interrupts on
600 	 * PPS events, so instead we poll them.  There is a finite risk that
601 	 * the hardware might capture a count which is later than the one we
602 	 * got above, and therefore possibly in the next NTP second which might
603 	 * have a different rate than the current NTP second.  It doesn't
604 	 * matter in practice.
605 	 */
606 	if (tho->th_counter->tc_poll_pps)
607 		tho->th_counter->tc_poll_pps(tho->th_counter);
608 
609 	/*
610 	 * Deal with NTP second processing.  The for loop normally
611 	 * iterates at most once, but in extreme situations it might
612 	 * keep NTP sane if timeouts are not run for several seconds.
613 	 * At boot, the time step can be large when the TOD hardware
614 	 * has been read, so on really large steps, we call
615 	 * ntp_update_second only twice.  We need to call it twice in
616 	 * case we missed a leap second.
617 	 * If NTP is not compiled in ntp_update_second still calculates
618 	 * the adjustment resulting from adjtime() calls.
619 	 */
620 	bt = th->th_offset;
621 	bintime_add(&bt, &timebasebin);
622 	i = bt.sec - tho->th_microtime.tv_sec;
623 	if (i > LARGE_STEP)
624 		i = 2;
625 	for (; i > 0; i--) {
626 		t = bt.sec;
627 		ntp_update_second(&th->th_adjustment, &bt.sec);
628 		s_update = 1;
629 		if (bt.sec != t)
630 			timebasebin.sec += bt.sec - t;
631 	}
632 
633 	/* Update the UTC timestamps used by the get*() functions. */
634 	/* XXX shouldn't do this here.  Should force non-`get' versions. */
635 	bintime2timeval(&bt, &th->th_microtime);
636 	bintime2timespec(&bt, &th->th_nanotime);
637 
638 	/* Now is a good time to change timecounters. */
639 	if (th->th_counter != timecounter) {
640 		th->th_counter = timecounter;
641 		th->th_offset_count = ncount;
642 		s_update = 1;
643 	}
644 
645 	/*-
646 	 * Recalculate the scaling factor.  We want the number of 1/2^64
647 	 * fractions of a second per period of the hardware counter, taking
648 	 * into account the th_adjustment factor which the NTP PLL/adjtime(2)
649 	 * processing provides us with.
650 	 *
651 	 * The th_adjustment is nanoseconds per second with 32 bit binary
652 	 * fraction and we want 64 bit binary fraction of second:
653 	 *
654 	 *	 x = a * 2^32 / 10^9 = a * 4.294967296
655 	 *
656 	 * The range of th_adjustment is +/- 5000PPM so inside a 64bit int
657 	 * we can only multiply by about 850 without overflowing, but that
658 	 * leaves suitably precise fractions for multiply before divide.
659 	 *
660 	 * Divide before multiply with a fraction of 2199/512 results in a
661 	 * systematic undercompensation of 10PPM of th_adjustment.  On a
662 	 * 5000PPM adjustment this is a 0.05PPM error.  This is acceptable.
663  	 *
664 	 * We happily sacrifice the lowest of the 64 bits of our result
665 	 * to the goddess of code clarity.
666 	 *
667 	 */
668 	if (s_update) {
669 		scale = (u_int64_t)1 << 63;
670 		scale += (th->th_adjustment / 1024) * 2199;
671 		scale /= th->th_counter->tc_frequency;
672 		th->th_scale = scale * 2;
673 	}
674 	/*
675 	 * Now that the struct timehands is again consistent, set the new
676 	 * generation number, making sure to not make it zero.  Ensure
677 	 * changes are globally visible before changing.
678 	 */
679 	if (++ogen == 0)
680 		ogen = 1;
681 	membar_producer();
682 	th->th_generation = ogen;
683 
684 	/*
685 	 * Go live with the new struct timehands.  Ensure changes are
686 	 * globally visible before changing.
687 	 */
688 	time_second = th->th_microtime.tv_sec;
689 	time_uptime = th->th_offset.sec;
690 	membar_producer();
691 	timehands = th;
692 
693 	/*
694 	 * Force users of the old timehand to move on.  This is
695 	 * necessary for MP systems; we need to ensure that the
696 	 * consumers will move away from the old timehand before
697 	 * we begin updating it again when we eventually wrap
698 	 * around.
699 	 */
700 	if (++tho->th_generation == 0)
701 		tho->th_generation = 1;
702 }
703 
704 /*
705  * RFC 2783 PPS-API implementation.
706  */
707 
708 int
709 pps_ioctl(u_long cmd, void *data, struct pps_state *pps)
710 {
711 	pps_params_t *app;
712 	pps_info_t *pipi;
713 #ifdef PPS_SYNC
714 	int *epi;
715 #endif
716 
717 	KASSERT(pps != NULL); /* XXX ("NULL pps pointer in pps_ioctl") */
718 	switch (cmd) {
719 	case PPS_IOC_CREATE:
720 		return (0);
721 	case PPS_IOC_DESTROY:
722 		return (0);
723 	case PPS_IOC_SETPARAMS:
724 		app = (pps_params_t *)data;
725 		if (app->mode & ~pps->ppscap)
726 			return (EINVAL);
727 		pps->ppsparam = *app;
728 		return (0);
729 	case PPS_IOC_GETPARAMS:
730 		app = (pps_params_t *)data;
731 		*app = pps->ppsparam;
732 		app->api_version = PPS_API_VERS_1;
733 		return (0);
734 	case PPS_IOC_GETCAP:
735 		*(int*)data = pps->ppscap;
736 		return (0);
737 	case PPS_IOC_FETCH:
738 		pipi = (pps_info_t *)data;
739 		pps->ppsinfo.current_mode = pps->ppsparam.mode;
740 		*pipi = pps->ppsinfo;
741 		return (0);
742 	case PPS_IOC_KCBIND:
743 #ifdef PPS_SYNC
744 		epi = (int *)data;
745 		/* XXX Only root should be able to do this */
746 		if (*epi & ~pps->ppscap)
747 			return (EINVAL);
748 		pps->kcmode = *epi;
749 		return (0);
750 #else
751 		return (EOPNOTSUPP);
752 #endif
753 	default:
754 		return (EPASSTHROUGH);
755 	}
756 }
757 
758 void
759 pps_init(struct pps_state *pps)
760 {
761 	pps->ppscap |= PPS_TSFMT_TSPEC;
762 	if (pps->ppscap & PPS_CAPTUREASSERT)
763 		pps->ppscap |= PPS_OFFSETASSERT;
764 	if (pps->ppscap & PPS_CAPTURECLEAR)
765 		pps->ppscap |= PPS_OFFSETCLEAR;
766 }
767 
768 void
769 pps_capture(struct pps_state *pps)
770 {
771 	struct timehands *th;
772 
773 	KASSERT(pps != NULL); /* XXX ("NULL pps pointer in pps_capture") */
774 	th = timehands;
775 	pps->capgen = th->th_generation;
776 	pps->capth = th;
777 	pps->capcount = th->th_counter->tc_get_timecount(th->th_counter);
778 	if (pps->capgen != th->th_generation)
779 		pps->capgen = 0;
780 }
781 
782 void
783 pps_event(struct pps_state *pps, int event)
784 {
785 	struct bintime bt;
786 	struct timespec ts, *tsp, *osp;
787 	u_int tcount, *pcount;
788 	int foff, fhard;
789 	pps_seq_t *pseq;
790 
791 	KASSERT(pps != NULL); /* XXX ("NULL pps pointer in pps_event") */
792 	/* If the timecounter was wound up underneath us, bail out. */
793 	if (pps->capgen == 0 || pps->capgen != pps->capth->th_generation)
794 		return;
795 
796 	/* Things would be easier with arrays. */
797 	if (event == PPS_CAPTUREASSERT) {
798 		tsp = &pps->ppsinfo.assert_timestamp;
799 		osp = &pps->ppsparam.assert_offset;
800 		foff = pps->ppsparam.mode & PPS_OFFSETASSERT;
801 		fhard = pps->kcmode & PPS_CAPTUREASSERT;
802 		pcount = &pps->ppscount[0];
803 		pseq = &pps->ppsinfo.assert_sequence;
804 	} else {
805 		tsp = &pps->ppsinfo.clear_timestamp;
806 		osp = &pps->ppsparam.clear_offset;
807 		foff = pps->ppsparam.mode & PPS_OFFSETCLEAR;
808 		fhard = pps->kcmode & PPS_CAPTURECLEAR;
809 		pcount = &pps->ppscount[1];
810 		pseq = &pps->ppsinfo.clear_sequence;
811 	}
812 
813 	/*
814 	 * If the timecounter changed, we cannot compare the count values, so
815 	 * we have to drop the rest of the PPS-stuff until the next event.
816 	 */
817 	if (pps->ppstc != pps->capth->th_counter) {
818 		pps->ppstc = pps->capth->th_counter;
819 		*pcount = pps->capcount;
820 		pps->ppscount[2] = pps->capcount;
821 		return;
822 	}
823 
824 	/* Convert the count to a timespec. */
825 	tcount = pps->capcount - pps->capth->th_offset_count;
826 	tcount &= pps->capth->th_counter->tc_counter_mask;
827 	bt = pps->capth->th_offset;
828 	bintime_addx(&bt, pps->capth->th_scale * tcount);
829 	bintime_add(&bt, &timebasebin);
830 	bintime2timespec(&bt, &ts);
831 
832 	/* If the timecounter was wound up underneath us, bail out. */
833 	if (pps->capgen != pps->capth->th_generation)
834 		return;
835 
836 	*pcount = pps->capcount;
837 	(*pseq)++;
838 	*tsp = ts;
839 
840 	if (foff) {
841 		timespecadd(tsp, osp, tsp);
842 		if (tsp->tv_nsec < 0) {
843 			tsp->tv_nsec += 1000000000;
844 			tsp->tv_sec -= 1;
845 		}
846 	}
847 #ifdef PPS_SYNC
848 	if (fhard) {
849 		u_int64_t scale;
850 
851 		/*
852 		 * Feed the NTP PLL/FLL.
853 		 * The FLL wants to know how many (hardware) nanoseconds
854 		 * elapsed since the previous event.
855 		 */
856 		tcount = pps->capcount - pps->ppscount[2];
857 		pps->ppscount[2] = pps->capcount;
858 		tcount &= pps->capth->th_counter->tc_counter_mask;
859 		scale = (u_int64_t)1 << 63;
860 		scale /= pps->capth->th_counter->tc_frequency;
861 		scale *= 2;
862 		bt.sec = 0;
863 		bt.frac = 0;
864 		bintime_addx(&bt, scale * tcount);
865 		bintime2timespec(&bt, &ts);
866 		hardpps(tsp, ts.tv_nsec + 1000000000 * ts.tv_sec);
867 	}
868 #endif
869 }
870 
871 /*
872  * Timecounters need to be updated every so often to prevent the hardware
873  * counter from overflowing.  Updating also recalculates the cached values
874  * used by the get*() family of functions, so their precision depends on
875  * the update frequency.
876  */
877 
878 static int tc_tick;
879 
880 void
881 tc_ticktock(void)
882 {
883 	static int count;
884 
885 	if (++count < tc_tick)
886 		return;
887 	count = 0;
888 	mutex_spin_enter(&tc_windup_lock);
889 	tc_windup();
890 	mutex_spin_exit(&tc_windup_lock);
891 }
892 
893 void
894 inittimecounter(void)
895 {
896 	u_int p;
897 
898 	mutex_init(&tc_windup_lock, MUTEX_DEFAULT, IPL_SCHED);
899 
900 	/*
901 	 * Set the initial timeout to
902 	 * max(1, <approx. number of hardclock ticks in a millisecond>).
903 	 * People should probably not use the sysctl to set the timeout
904 	 * to smaller than its inital value, since that value is the
905 	 * smallest reasonable one.  If they want better timestamps they
906 	 * should use the non-"get"* functions.
907 	 */
908 	if (hz > 1000)
909 		tc_tick = (hz + 500) / 1000;
910 	else
911 		tc_tick = 1;
912 	p = (tc_tick * 1000000) / hz;
913 	aprint_verbose("timecounter: Timecounters tick every %d.%03u msec\n",
914 	    p / 1000, p % 1000);
915 
916 	/* warm up new timecounter (again) and get rolling. */
917 	(void)timecounter->tc_get_timecount(timecounter);
918 	(void)timecounter->tc_get_timecount(timecounter);
919 }
920