xref: /netbsd-src/sys/kern/kern_tc.c (revision 64e8b7a678a421257e0ddaec7df5b429902c2adf)
1*64e8b7a6Sandvar /* $NetBSD: kern_tc.c,v 1.77 2024/05/11 06:34:45 andvar Exp $ */
2a2249ef7Sad 
3a2249ef7Sad /*-
42fc2b080Sad  * Copyright (c) 2008, 2009 The NetBSD Foundation, Inc.
5a2249ef7Sad  * All rights reserved.
6a2249ef7Sad  *
72fc2b080Sad  * This code is derived from software contributed to The NetBSD Foundation
82fc2b080Sad  * by Andrew Doran.
92fc2b080Sad  *
10a2249ef7Sad  * Redistribution and use in source and binary forms, with or without
11a2249ef7Sad  * modification, are permitted provided that the following conditions
12a2249ef7Sad  * are met:
13a2249ef7Sad  * 1. Redistributions of source code must retain the above copyright
14a2249ef7Sad  *    notice, this list of conditions and the following disclaimer.
15a2249ef7Sad  * 2. Redistributions in binary form must reproduce the above copyright
16a2249ef7Sad  *    notice, this list of conditions and the following disclaimer in the
17a2249ef7Sad  *    documentation and/or other materials provided with the distribution.
18a2249ef7Sad  *
19a2249ef7Sad  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20a2249ef7Sad  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21a2249ef7Sad  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22a2249ef7Sad  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23a2249ef7Sad  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24a2249ef7Sad  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25a2249ef7Sad  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26a2249ef7Sad  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27a2249ef7Sad  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28a2249ef7Sad  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29a2249ef7Sad  * POSSIBILITY OF SUCH DAMAGE.
30a2249ef7Sad  */
31de4337abSkardel 
32769d33c5Ssimonb /*-
33769d33c5Ssimonb  * ----------------------------------------------------------------------------
34769d33c5Ssimonb  * "THE BEER-WARE LICENSE" (Revision 42):
35769d33c5Ssimonb  * <phk@FreeBSD.ORG> wrote this file.  As long as you retain this notice you
36769d33c5Ssimonb  * can do whatever you want with this stuff. If we meet some day, and you think
37769d33c5Ssimonb  * this stuff is worth it, you can buy me a beer in return.   Poul-Henning Kamp
38de4337abSkardel  * ---------------------------------------------------------------------------
39769d33c5Ssimonb  */
40769d33c5Ssimonb 
41002ee648Sriastradh /*
42002ee648Sriastradh  * https://papers.freebsd.org/2002/phk-timecounters.files/timecounter.pdf
43002ee648Sriastradh  */
44002ee648Sriastradh 
45769d33c5Ssimonb #include <sys/cdefs.h>
46de4337abSkardel /* __FBSDID("$FreeBSD: src/sys/kern/kern_tc.c,v 1.166 2005/09/19 22:16:31 andre Exp $"); */
47*64e8b7a6Sandvar __KERNEL_RCSID(0, "$NetBSD: kern_tc.c,v 1.77 2024/05/11 06:34:45 andvar Exp $");
48c88fab79Srin 
49c88fab79Srin #ifdef _KERNEL_OPT
50c88fab79Srin #include "opt_ntp.h"
51c88fab79Srin #endif
52769d33c5Ssimonb 
53769d33c5Ssimonb #include <sys/param.h>
545524172fSriastradh 
5591ec786cSsimonb #include <sys/atomic.h>
5691ec786cSsimonb #include <sys/evcnt.h>
5791ec786cSsimonb #include <sys/kauth.h>
58769d33c5Ssimonb #include <sys/kernel.h>
595524172fSriastradh #include <sys/lock.h>
6091ec786cSsimonb #include <sys/mutex.h>
61de4337abSkardel #include <sys/reboot.h>	/* XXX just to get AB_VERBOSE */
62769d33c5Ssimonb #include <sys/sysctl.h>
63769d33c5Ssimonb #include <sys/syslog.h>
64769d33c5Ssimonb #include <sys/systm.h>
65769d33c5Ssimonb #include <sys/timepps.h>
66769d33c5Ssimonb #include <sys/timetc.h>
67769d33c5Ssimonb #include <sys/timex.h>
682fc2b080Sad #include <sys/xcall.h>
69de4337abSkardel 
70de4337abSkardel /*
71769d33c5Ssimonb  * A large step happens on boot.  This constant detects such steps.
72769d33c5Ssimonb  * It is relatively small so that ntp_update_second gets called enough
73769d33c5Ssimonb  * in the typical 'missed a couple of seconds' case, but doesn't loop
74769d33c5Ssimonb  * forever when the time step is large.
75769d33c5Ssimonb  */
76769d33c5Ssimonb #define LARGE_STEP	200
77769d33c5Ssimonb 
78769d33c5Ssimonb /*
79769d33c5Ssimonb  * Implement a dummy timecounter which we can use until we get a real one
80769d33c5Ssimonb  * in the air.  This allows the console and other early stuff to use
81769d33c5Ssimonb  * time services.
82769d33c5Ssimonb  */
83769d33c5Ssimonb 
84769d33c5Ssimonb static u_int
dummy_get_timecount(struct timecounter * tc)851a7bc55dSyamt dummy_get_timecount(struct timecounter *tc)
86769d33c5Ssimonb {
87769d33c5Ssimonb 	static u_int now;
88769d33c5Ssimonb 
89e7faa7faSrin 	return ++now;
90769d33c5Ssimonb }
91769d33c5Ssimonb 
92769d33c5Ssimonb static struct timecounter dummy_timecounter = {
9350a782dcSriastradh 	.tc_get_timecount	= dummy_get_timecount,
9450a782dcSriastradh 	.tc_counter_mask	= ~0u,
9550a782dcSriastradh 	.tc_frequency		= 1000000,
9650a782dcSriastradh 	.tc_name		= "dummy",
9750a782dcSriastradh 	.tc_quality		= -1000000,
9850a782dcSriastradh 	.tc_priv		= NULL,
99769d33c5Ssimonb };
100769d33c5Ssimonb 
101769d33c5Ssimonb struct timehands {
102769d33c5Ssimonb 	/* These fields must be initialized by the driver. */
103a8881005Skardel 	struct timecounter	*th_counter;     /* active timecounter */
104a8881005Skardel 	int64_t			th_adjustment;   /* frequency adjustment */
105a8881005Skardel 						 /* (NTP/adjtime) */
1069b7a1c49Srin 	uint64_t		th_scale;        /* scale factor (counter */
107a8881005Skardel 						 /* tick->time) */
1089b7a1c49Srin 	uint64_t 		th_offset_count; /* offset at last time */
109a8881005Skardel 						 /* update (tc_windup()) */
110a8881005Skardel 	struct bintime		th_offset;       /* bin (up)time at windup */
111a8881005Skardel 	struct timeval		th_microtime;    /* cached microtime */
112a8881005Skardel 	struct timespec		th_nanotime;     /* cached nanotime */
113769d33c5Ssimonb 	/* Fields not to be copied in tc_windup start with th_generation. */
114a8881005Skardel 	volatile u_int		th_generation;   /* current genration */
115a8881005Skardel 	struct timehands	*th_next;        /* next timehand */
116769d33c5Ssimonb };
117769d33c5Ssimonb 
118769d33c5Ssimonb static struct timehands th0;
11928ea22fbSchristos static struct timehands th9 = { .th_next = &th0, };
12028ea22fbSchristos static struct timehands th8 = { .th_next = &th9, };
12128ea22fbSchristos static struct timehands th7 = { .th_next = &th8, };
12228ea22fbSchristos static struct timehands th6 = { .th_next = &th7, };
12328ea22fbSchristos static struct timehands th5 = { .th_next = &th6, };
12428ea22fbSchristos static struct timehands th4 = { .th_next = &th5, };
12528ea22fbSchristos static struct timehands th3 = { .th_next = &th4, };
12628ea22fbSchristos static struct timehands th2 = { .th_next = &th3, };
12728ea22fbSchristos static struct timehands th1 = { .th_next = &th2, };
128769d33c5Ssimonb static struct timehands th0 = {
12928ea22fbSchristos 	.th_counter = &dummy_timecounter,
13028ea22fbSchristos 	.th_scale = (uint64_t)-1 / 1000000,
13128ea22fbSchristos 	.th_offset = { .sec = 1, .frac = 0 },
13228ea22fbSchristos 	.th_generation = 1,
13328ea22fbSchristos 	.th_next = &th1,
134769d33c5Ssimonb };
135769d33c5Ssimonb 
136769d33c5Ssimonb static struct timehands *volatile timehands = &th0;
137769d33c5Ssimonb struct timecounter *timecounter = &dummy_timecounter;
138769d33c5Ssimonb static struct timecounter *timecounters = &dummy_timecounter;
139769d33c5Ssimonb 
140a263f025Sriastradh /* used by savecore(8) */
141a263f025Sriastradh time_t time_second_legacy asm("time_second");
142a263f025Sriastradh 
143a41dd9cfSriastradh #ifdef __HAVE_ATOMIC64_LOADSTORE
144f748b08cSriastradh volatile time_t time__second __cacheline_aligned = 1;
145f748b08cSriastradh volatile time_t time__uptime __cacheline_aligned = 1;
146a41dd9cfSriastradh #else
1475524172fSriastradh static volatile struct {
1485524172fSriastradh 	uint32_t lo, hi;
1495524172fSriastradh } time__uptime32 __cacheline_aligned = {
1505524172fSriastradh 	.lo = 1,
1515524172fSriastradh }, time__second32 __cacheline_aligned = {
1525524172fSriastradh 	.lo = 1,
1535524172fSriastradh };
1545524172fSriastradh #endif
155769d33c5Ssimonb 
156f9c3bb07Sriastradh static struct {
157f9c3bb07Sriastradh 	struct bintime bin;
158f9c3bb07Sriastradh 	volatile unsigned gen;	/* even when stable, odd when changing */
159f9c3bb07Sriastradh } timebase __cacheline_aligned;
160769d33c5Ssimonb 
161769d33c5Ssimonb static int timestepwarnings;
162de4337abSkardel 
163a2249ef7Sad kmutex_t timecounter_lock;
164541e4662Sad static u_int timecounter_mods;
1652fc2b080Sad static volatile int timecounter_removals = 1;
166541e4662Sad static u_int timecounter_bad;
16785a2e7d9Sad 
1685524172fSriastradh #ifdef __HAVE_ATOMIC64_LOADSTORE
1695524172fSriastradh 
1705524172fSriastradh static inline void
setrealuptime(time_t second,time_t uptime)1715524172fSriastradh setrealuptime(time_t second, time_t uptime)
1725524172fSriastradh {
1735524172fSriastradh 
174a263f025Sriastradh 	time_second_legacy = second;
175a263f025Sriastradh 
1765524172fSriastradh 	atomic_store_relaxed(&time__second, second);
1775524172fSriastradh 	atomic_store_relaxed(&time__uptime, uptime);
1785524172fSriastradh }
1795524172fSriastradh 
1805524172fSriastradh #else
1815524172fSriastradh 
1825524172fSriastradh static inline void
setrealuptime(time_t second,time_t uptime)1835524172fSriastradh setrealuptime(time_t second, time_t uptime)
1845524172fSriastradh {
1855524172fSriastradh 	uint32_t seclo = second & 0xffffffff, sechi = second >> 32;
1865524172fSriastradh 	uint32_t uplo = uptime & 0xffffffff, uphi = uptime >> 32;
1875524172fSriastradh 
1885524172fSriastradh 	KDASSERT(mutex_owned(&timecounter_lock));
1895524172fSriastradh 
190a263f025Sriastradh 	time_second_legacy = second;
191a263f025Sriastradh 
1925524172fSriastradh 	/*
1935524172fSriastradh 	 * Fast path -- no wraparound, just updating the low bits, so
1945524172fSriastradh 	 * no need for seqlocked access.
1955524172fSriastradh 	 */
1965524172fSriastradh 	if (__predict_true(sechi == time__second32.hi) &&
1975524172fSriastradh 	    __predict_true(uphi == time__uptime32.hi)) {
1985524172fSriastradh 		atomic_store_relaxed(&time__second32.lo, seclo);
1995524172fSriastradh 		atomic_store_relaxed(&time__uptime32.lo, uplo);
2005524172fSriastradh 		return;
2015524172fSriastradh 	}
2025524172fSriastradh 
2035524172fSriastradh 	atomic_store_relaxed(&time__second32.hi, 0xffffffff);
2045524172fSriastradh 	atomic_store_relaxed(&time__uptime32.hi, 0xffffffff);
2054dc2a56dSriastradh 	membar_producer();
2065524172fSriastradh 	atomic_store_relaxed(&time__second32.lo, seclo);
2075524172fSriastradh 	atomic_store_relaxed(&time__uptime32.lo, uplo);
2084dc2a56dSriastradh 	membar_producer();
2095524172fSriastradh 	atomic_store_relaxed(&time__second32.hi, sechi);
21032fba508Sriastradh 	atomic_store_relaxed(&time__uptime32.hi, uphi);
2115524172fSriastradh }
2125524172fSriastradh 
2135524172fSriastradh time_t
getrealtime(void)2145524172fSriastradh getrealtime(void)
2155524172fSriastradh {
2165524172fSriastradh 	uint32_t lo, hi;
2175524172fSriastradh 
2185524172fSriastradh 	do {
2195524172fSriastradh 		for (;;) {
2205524172fSriastradh 			hi = atomic_load_relaxed(&time__second32.hi);
2215524172fSriastradh 			if (__predict_true(hi != 0xffffffff))
2225524172fSriastradh 				break;
2235524172fSriastradh 			SPINLOCK_BACKOFF_HOOK;
2245524172fSriastradh 		}
2254dc2a56dSriastradh 		membar_consumer();
2265524172fSriastradh 		lo = atomic_load_relaxed(&time__second32.lo);
2274dc2a56dSriastradh 		membar_consumer();
2285524172fSriastradh 	} while (hi != atomic_load_relaxed(&time__second32.hi));
2295524172fSriastradh 
2305524172fSriastradh 	return ((time_t)hi << 32) | lo;
2315524172fSriastradh }
2325524172fSriastradh 
2335524172fSriastradh time_t
getuptime(void)2345524172fSriastradh getuptime(void)
2355524172fSriastradh {
2365524172fSriastradh 	uint32_t lo, hi;
2375524172fSriastradh 
2385524172fSriastradh 	do {
2395524172fSriastradh 		for (;;) {
2405524172fSriastradh 			hi = atomic_load_relaxed(&time__uptime32.hi);
2415524172fSriastradh 			if (__predict_true(hi != 0xffffffff))
2425524172fSriastradh 				break;
2435524172fSriastradh 			SPINLOCK_BACKOFF_HOOK;
2445524172fSriastradh 		}
2454dc2a56dSriastradh 		membar_consumer();
2465524172fSriastradh 		lo = atomic_load_relaxed(&time__uptime32.lo);
2474dc2a56dSriastradh 		membar_consumer();
2485524172fSriastradh 	} while (hi != atomic_load_relaxed(&time__uptime32.hi));
2495524172fSriastradh 
2505524172fSriastradh 	return ((time_t)hi << 32) | lo;
2515524172fSriastradh }
2525524172fSriastradh 
2535524172fSriastradh time_t
getboottime(void)2545524172fSriastradh getboottime(void)
2555524172fSriastradh {
2565524172fSriastradh 
2575524172fSriastradh 	return getrealtime() - getuptime();
2585524172fSriastradh }
2595524172fSriastradh 
2605524172fSriastradh uint32_t
getuptime32(void)2615524172fSriastradh getuptime32(void)
2625524172fSriastradh {
2635524172fSriastradh 
2645524172fSriastradh 	return atomic_load_relaxed(&time__uptime32.lo);
2655524172fSriastradh }
2665524172fSriastradh 
2675524172fSriastradh #endif	/* !defined(__HAVE_ATOMIC64_LOADSTORE) */
2685524172fSriastradh 
269de4337abSkardel /*
2705755384fSyamt  * sysctl helper routine for kern.timercounter.hardware
271de4337abSkardel  */
272de4337abSkardel static int
sysctl_kern_timecounter_hardware(SYSCTLFN_ARGS)273de4337abSkardel sysctl_kern_timecounter_hardware(SYSCTLFN_ARGS)
274de4337abSkardel {
275de4337abSkardel 	struct sysctlnode node;
276de4337abSkardel 	int error;
277de4337abSkardel 	char newname[MAX_TCNAMELEN];
278de4337abSkardel 	struct timecounter *newtc, *tc;
279769d33c5Ssimonb 
280de4337abSkardel 	tc = timecounter;
281de4337abSkardel 
282de4337abSkardel 	strlcpy(newname, tc->tc_name, sizeof(newname));
283de4337abSkardel 
284de4337abSkardel 	node = *rnode;
285de4337abSkardel 	node.sysctl_data = newname;
286de4337abSkardel 	node.sysctl_size = sizeof(newname);
287de4337abSkardel 
288de4337abSkardel 	error = sysctl_lookup(SYSCTLFN_CALL(&node));
289de4337abSkardel 
290de4337abSkardel 	if (error ||
291de4337abSkardel 	    newp == NULL ||
292de4337abSkardel 	    strncmp(newname, tc->tc_name, sizeof(newname)) == 0)
293de4337abSkardel 		return error;
294de4337abSkardel 
29518558073Selad 	if (l != NULL && (error = kauth_authorize_system(l->l_cred,
29618558073Selad 	    KAUTH_SYSTEM_TIME, KAUTH_REQ_SYSTEM_TIME_TIMECOUNTERS, newname,
29718558073Selad 	    NULL, NULL)) != 0)
298e7faa7faSrin 		return error;
299de4337abSkardel 
3000239b151Sad 	if (!cold)
301541e4662Sad 		mutex_spin_enter(&timecounter_lock);
302d3c8987bSad 	error = EINVAL;
303de4337abSkardel 	for (newtc = timecounters; newtc != NULL; newtc = newtc->tc_next) {
304de4337abSkardel 		if (strcmp(newname, newtc->tc_name) != 0)
305de4337abSkardel 			continue;
306de4337abSkardel 		/* Warm up new timecounter. */
307de4337abSkardel 		(void)newtc->tc_get_timecount(newtc);
308de4337abSkardel 		(void)newtc->tc_get_timecount(newtc);
309de4337abSkardel 		timecounter = newtc;
3100239b151Sad 		error = 0;
311d3c8987bSad 		break;
312d3c8987bSad 	}
3130239b151Sad 	if (!cold)
314541e4662Sad 		mutex_spin_exit(&timecounter_lock);
3150239b151Sad 	return error;
316de4337abSkardel }
317de4337abSkardel 
318de4337abSkardel static int
sysctl_kern_timecounter_choice(SYSCTLFN_ARGS)319de4337abSkardel sysctl_kern_timecounter_choice(SYSCTLFN_ARGS)
320de4337abSkardel {
321e23bdf3cSkardel 	char buf[MAX_TCNAMELEN+48];
322541e4662Sad 	char *where;
323de4337abSkardel 	const char *spc;
324de4337abSkardel 	struct timecounter *tc;
325de4337abSkardel 	size_t needed, left, slen;
326541e4662Sad 	int error, mods;
327de4337abSkardel 
328de4337abSkardel 	if (newp != NULL)
329e7faa7faSrin 		return EPERM;
330de4337abSkardel 	if (namelen != 0)
331e7faa7faSrin 		return EINVAL;
332de4337abSkardel 
333541e4662Sad 	mutex_spin_enter(&timecounter_lock);
334541e4662Sad  retry:
335de4337abSkardel 	spc = "";
336de4337abSkardel 	error = 0;
337de4337abSkardel 	needed = 0;
338de4337abSkardel 	left = *oldlenp;
339541e4662Sad 	where = oldp;
340de4337abSkardel 	for (tc = timecounters; error == 0 && tc != NULL; tc = tc->tc_next) {
341de4337abSkardel 		if (where == NULL) {
342de4337abSkardel 			needed += sizeof(buf);  /* be conservative */
343de4337abSkardel 		} else {
344de4337abSkardel 			slen = snprintf(buf, sizeof(buf), "%s%s(q=%d, f=%" PRId64
345de4337abSkardel 					" Hz)", spc, tc->tc_name, tc->tc_quality,
346de4337abSkardel 					tc->tc_frequency);
347de4337abSkardel 			if (left < slen + 1)
348de4337abSkardel 				break;
349541e4662Sad 		 	mods = timecounter_mods;
350541e4662Sad 			mutex_spin_exit(&timecounter_lock);
351de4337abSkardel 			error = copyout(buf, where, slen + 1);
352541e4662Sad 			mutex_spin_enter(&timecounter_lock);
353541e4662Sad 			if (mods != timecounter_mods) {
354541e4662Sad 				goto retry;
355541e4662Sad 			}
356de4337abSkardel 			spc = " ";
357de4337abSkardel 			where += slen;
358de4337abSkardel 			needed += slen;
359de4337abSkardel 			left -= slen;
360de4337abSkardel 		}
361de4337abSkardel 	}
362541e4662Sad 	mutex_spin_exit(&timecounter_lock);
363de4337abSkardel 
364de4337abSkardel 	*oldlenp = needed;
365e7faa7faSrin 	return error;
366de4337abSkardel }
367de4337abSkardel 
368de4337abSkardel SYSCTL_SETUP(sysctl_timecounter_setup, "sysctl timecounter setup")
369de4337abSkardel {
370de4337abSkardel 	const struct sysctlnode *node;
371de4337abSkardel 
372de4337abSkardel 	sysctl_createv(clog, 0, NULL, &node,
373de4337abSkardel 		       CTLFLAG_PERMANENT,
374de4337abSkardel 		       CTLTYPE_NODE, "timecounter",
375de4337abSkardel 		       SYSCTL_DESCR("time counter information"),
376de4337abSkardel 		       NULL, 0, NULL, 0,
377de4337abSkardel 		       CTL_KERN, CTL_CREATE, CTL_EOL);
378de4337abSkardel 
379de4337abSkardel 	if (node != NULL) {
380de4337abSkardel 		sysctl_createv(clog, 0, NULL, NULL,
381de4337abSkardel 			       CTLFLAG_PERMANENT,
382de4337abSkardel 			       CTLTYPE_STRING, "choice",
383de4337abSkardel 			       SYSCTL_DESCR("available counters"),
384de4337abSkardel 			       sysctl_kern_timecounter_choice, 0, NULL, 0,
385de4337abSkardel 			       CTL_KERN, node->sysctl_num, CTL_CREATE, CTL_EOL);
386de4337abSkardel 
387de4337abSkardel 		sysctl_createv(clog, 0, NULL, NULL,
388de4337abSkardel 			       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
389de4337abSkardel 			       CTLTYPE_STRING, "hardware",
390de4337abSkardel 			       SYSCTL_DESCR("currently active time counter"),
391de4337abSkardel 			       sysctl_kern_timecounter_hardware, 0, NULL, MAX_TCNAMELEN,
392de4337abSkardel 			       CTL_KERN, node->sysctl_num, CTL_CREATE, CTL_EOL);
393de4337abSkardel 
394de4337abSkardel 		sysctl_createv(clog, 0, NULL, NULL,
395de4337abSkardel 			       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
396de4337abSkardel 			       CTLTYPE_INT, "timestepwarnings",
397de4337abSkardel 			       SYSCTL_DESCR("log time steps"),
398de4337abSkardel 			       NULL, 0, &timestepwarnings, 0,
399de4337abSkardel 			       CTL_KERN, node->sysctl_num, CTL_CREATE, CTL_EOL);
400de4337abSkardel 	}
401de4337abSkardel }
402de4337abSkardel 
40359c1cd16Sad #ifdef TC_COUNTERS
404de4337abSkardel #define	TC_STATS(name)							\
405de4337abSkardel static struct evcnt n##name =						\
406de4337abSkardel     EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, "timecounter", #name);	\
407de4337abSkardel EVCNT_ATTACH_STATIC(n##name)
408de4337abSkardel TC_STATS(binuptime);    TC_STATS(nanouptime);    TC_STATS(microuptime);
409de4337abSkardel TC_STATS(bintime);      TC_STATS(nanotime);      TC_STATS(microtime);
410de4337abSkardel TC_STATS(getbinuptime); TC_STATS(getnanouptime); TC_STATS(getmicrouptime);
411de4337abSkardel TC_STATS(getbintime);   TC_STATS(getnanotime);   TC_STATS(getmicrotime);
412de4337abSkardel TC_STATS(setclock);
41359c1cd16Sad #define	TC_COUNT(var)	var.ev_count++
414769d33c5Ssimonb #undef TC_STATS
41559c1cd16Sad #else
41659c1cd16Sad #define	TC_COUNT(var)	/* nothing */
41759c1cd16Sad #endif	/* TC_COUNTERS */
418769d33c5Ssimonb 
419769d33c5Ssimonb static void tc_windup(void);
420769d33c5Ssimonb 
421769d33c5Ssimonb /*
422769d33c5Ssimonb  * Return the difference between the timehands' counter value now and what
423769d33c5Ssimonb  * was when we copied it to the timehands' offset_count.
424769d33c5Ssimonb  */
4252903e6d8Suebayasi static inline u_int
tc_delta(struct timehands * th)426769d33c5Ssimonb tc_delta(struct timehands *th)
427769d33c5Ssimonb {
428769d33c5Ssimonb 	struct timecounter *tc;
429769d33c5Ssimonb 
430769d33c5Ssimonb 	tc = th->th_counter;
431e7faa7faSrin 	return (tc->tc_get_timecount(tc) -
432e7faa7faSrin 		 th->th_offset_count) & tc->tc_counter_mask;
433769d33c5Ssimonb }
434769d33c5Ssimonb 
435769d33c5Ssimonb /*
436769d33c5Ssimonb  * Functions for reading the time.  We have to loop until we are sure that
437769d33c5Ssimonb  * the timehands that we operated on was not updated under our feet.  See
438e7d1e5e9Ssimonb  * the comment in <sys/timevar.h> for a description of these 12 functions.
439769d33c5Ssimonb  */
440769d33c5Ssimonb 
441769d33c5Ssimonb void
binuptime(struct bintime * bt)442769d33c5Ssimonb binuptime(struct bintime *bt)
443769d33c5Ssimonb {
444769d33c5Ssimonb 	struct timehands *th;
4452fc2b080Sad 	lwp_t *l;
4462fc2b080Sad 	u_int lgen, gen;
447769d33c5Ssimonb 
44859c1cd16Sad 	TC_COUNT(nbinuptime);
4492fc2b080Sad 
4502fc2b080Sad 	/*
4512fc2b080Sad 	 * Provide exclusion against tc_detach().
4522fc2b080Sad 	 *
4532fc2b080Sad 	 * We record the number of timecounter removals before accessing
4542fc2b080Sad 	 * timecounter state.  Note that the LWP can be using multiple
4552fc2b080Sad 	 * "generations" at once, due to interrupts (interrupted while in
4562fc2b080Sad 	 * this function).  Hardware interrupts will borrow the interrupted
4572fc2b080Sad 	 * LWP's l_tcgen value for this purpose, and can themselves be
4582fc2b080Sad 	 * interrupted by higher priority interrupts.  In this case we need
4592fc2b080Sad 	 * to ensure that the oldest generation in use is recorded.
4602fc2b080Sad 	 *
4612fc2b080Sad 	 * splsched() is too expensive to use, so we take care to structure
4622fc2b080Sad 	 * this code in such a way that it is not required.  Likewise, we
4632fc2b080Sad 	 * do not disable preemption.
4642fc2b080Sad 	 *
4652fc2b080Sad 	 * Memory barriers are also too expensive to use for such a
4662fc2b080Sad 	 * performance critical function.  The good news is that we do not
4672fc2b080Sad 	 * need memory barriers for this type of exclusion, as the thread
4682fc2b080Sad 	 * updating timecounter_removals will issue a broadcast cross call
4692fc2b080Sad 	 * before inspecting our l_tcgen value (this elides memory ordering
4702fc2b080Sad 	 * issues).
471113db6d7Sriastradh 	 *
472113db6d7Sriastradh 	 * XXX If the author of the above comment knows how to make it
473113db6d7Sriastradh 	 * safe to avoid memory barriers around the access to
474113db6d7Sriastradh 	 * th->th_generation, I'm all ears.
4752fc2b080Sad 	 */
4762fc2b080Sad 	l = curlwp;
4772fc2b080Sad 	lgen = l->l_tcgen;
4782fc2b080Sad 	if (__predict_true(lgen == 0)) {
4792fc2b080Sad 		l->l_tcgen = timecounter_removals;
4802fc2b080Sad 	}
4812fc2b080Sad 	__insn_barrier();
4822fc2b080Sad 
483769d33c5Ssimonb 	do {
48469dc4427Sriastradh 		th = atomic_load_consume(&timehands);
485769d33c5Ssimonb 		gen = th->th_generation;
486113db6d7Sriastradh 		membar_consumer();
487769d33c5Ssimonb 		*bt = th->th_offset;
488769d33c5Ssimonb 		bintime_addx(bt, th->th_scale * tc_delta(th));
489113db6d7Sriastradh 		membar_consumer();
490769d33c5Ssimonb 	} while (gen == 0 || gen != th->th_generation);
4912fc2b080Sad 
4922fc2b080Sad 	__insn_barrier();
4932fc2b080Sad 	l->l_tcgen = lgen;
494769d33c5Ssimonb }
495769d33c5Ssimonb 
496769d33c5Ssimonb void
nanouptime(struct timespec * tsp)497769d33c5Ssimonb nanouptime(struct timespec *tsp)
498769d33c5Ssimonb {
499769d33c5Ssimonb 	struct bintime bt;
500769d33c5Ssimonb 
50159c1cd16Sad 	TC_COUNT(nnanouptime);
502769d33c5Ssimonb 	binuptime(&bt);
503769d33c5Ssimonb 	bintime2timespec(&bt, tsp);
504769d33c5Ssimonb }
505769d33c5Ssimonb 
506769d33c5Ssimonb void
microuptime(struct timeval * tvp)507769d33c5Ssimonb microuptime(struct timeval *tvp)
508769d33c5Ssimonb {
509769d33c5Ssimonb 	struct bintime bt;
510769d33c5Ssimonb 
51159c1cd16Sad 	TC_COUNT(nmicrouptime);
512769d33c5Ssimonb 	binuptime(&bt);
513769d33c5Ssimonb 	bintime2timeval(&bt, tvp);
514769d33c5Ssimonb }
515769d33c5Ssimonb 
516769d33c5Ssimonb void
bintime(struct bintime * bt)517769d33c5Ssimonb bintime(struct bintime *bt)
518769d33c5Ssimonb {
519f9c3bb07Sriastradh 	struct bintime boottime;
520769d33c5Ssimonb 
52159c1cd16Sad 	TC_COUNT(nbintime);
522769d33c5Ssimonb 	binuptime(bt);
523f9c3bb07Sriastradh 	getbinboottime(&boottime);
524f9c3bb07Sriastradh 	bintime_add(bt, &boottime);
525769d33c5Ssimonb }
526769d33c5Ssimonb 
527769d33c5Ssimonb void
nanotime(struct timespec * tsp)528769d33c5Ssimonb nanotime(struct timespec *tsp)
529769d33c5Ssimonb {
530769d33c5Ssimonb 	struct bintime bt;
531769d33c5Ssimonb 
53259c1cd16Sad 	TC_COUNT(nnanotime);
533769d33c5Ssimonb 	bintime(&bt);
534769d33c5Ssimonb 	bintime2timespec(&bt, tsp);
535769d33c5Ssimonb }
536769d33c5Ssimonb 
537769d33c5Ssimonb void
microtime(struct timeval * tvp)538769d33c5Ssimonb microtime(struct timeval *tvp)
539769d33c5Ssimonb {
540769d33c5Ssimonb 	struct bintime bt;
541769d33c5Ssimonb 
54259c1cd16Sad 	TC_COUNT(nmicrotime);
543769d33c5Ssimonb 	bintime(&bt);
544769d33c5Ssimonb 	bintime2timeval(&bt, tvp);
545769d33c5Ssimonb }
546769d33c5Ssimonb 
547769d33c5Ssimonb void
getbinuptime(struct bintime * bt)548769d33c5Ssimonb getbinuptime(struct bintime *bt)
549769d33c5Ssimonb {
550769d33c5Ssimonb 	struct timehands *th;
551769d33c5Ssimonb 	u_int gen;
552769d33c5Ssimonb 
55359c1cd16Sad 	TC_COUNT(ngetbinuptime);
554769d33c5Ssimonb 	do {
55569dc4427Sriastradh 		th = atomic_load_consume(&timehands);
556769d33c5Ssimonb 		gen = th->th_generation;
557113db6d7Sriastradh 		membar_consumer();
558769d33c5Ssimonb 		*bt = th->th_offset;
559113db6d7Sriastradh 		membar_consumer();
560769d33c5Ssimonb 	} while (gen == 0 || gen != th->th_generation);
561769d33c5Ssimonb }
562769d33c5Ssimonb 
563769d33c5Ssimonb void
getnanouptime(struct timespec * tsp)564769d33c5Ssimonb getnanouptime(struct timespec *tsp)
565769d33c5Ssimonb {
566769d33c5Ssimonb 	struct timehands *th;
567769d33c5Ssimonb 	u_int gen;
568769d33c5Ssimonb 
56959c1cd16Sad 	TC_COUNT(ngetnanouptime);
570769d33c5Ssimonb 	do {
57169dc4427Sriastradh 		th = atomic_load_consume(&timehands);
572769d33c5Ssimonb 		gen = th->th_generation;
573113db6d7Sriastradh 		membar_consumer();
574769d33c5Ssimonb 		bintime2timespec(&th->th_offset, tsp);
575113db6d7Sriastradh 		membar_consumer();
576769d33c5Ssimonb 	} while (gen == 0 || gen != th->th_generation);
577769d33c5Ssimonb }
578769d33c5Ssimonb 
579769d33c5Ssimonb void
getmicrouptime(struct timeval * tvp)580769d33c5Ssimonb getmicrouptime(struct timeval *tvp)
581769d33c5Ssimonb {
582769d33c5Ssimonb 	struct timehands *th;
583769d33c5Ssimonb 	u_int gen;
584769d33c5Ssimonb 
58559c1cd16Sad 	TC_COUNT(ngetmicrouptime);
586769d33c5Ssimonb 	do {
58769dc4427Sriastradh 		th = atomic_load_consume(&timehands);
588769d33c5Ssimonb 		gen = th->th_generation;
589113db6d7Sriastradh 		membar_consumer();
590769d33c5Ssimonb 		bintime2timeval(&th->th_offset, tvp);
591113db6d7Sriastradh 		membar_consumer();
592769d33c5Ssimonb 	} while (gen == 0 || gen != th->th_generation);
593769d33c5Ssimonb }
594769d33c5Ssimonb 
595769d33c5Ssimonb void
getbintime(struct bintime * bt)596769d33c5Ssimonb getbintime(struct bintime *bt)
597769d33c5Ssimonb {
598769d33c5Ssimonb 	struct timehands *th;
599f9c3bb07Sriastradh 	struct bintime boottime;
600769d33c5Ssimonb 	u_int gen;
601769d33c5Ssimonb 
60259c1cd16Sad 	TC_COUNT(ngetbintime);
603769d33c5Ssimonb 	do {
60469dc4427Sriastradh 		th = atomic_load_consume(&timehands);
605769d33c5Ssimonb 		gen = th->th_generation;
606113db6d7Sriastradh 		membar_consumer();
607769d33c5Ssimonb 		*bt = th->th_offset;
608113db6d7Sriastradh 		membar_consumer();
609769d33c5Ssimonb 	} while (gen == 0 || gen != th->th_generation);
610f9c3bb07Sriastradh 	getbinboottime(&boottime);
611f9c3bb07Sriastradh 	bintime_add(bt, &boottime);
612769d33c5Ssimonb }
613769d33c5Ssimonb 
61420bf3061Schs static inline void
dogetnanotime(struct timespec * tsp)61520bf3061Schs dogetnanotime(struct timespec *tsp)
616769d33c5Ssimonb {
617769d33c5Ssimonb 	struct timehands *th;
618769d33c5Ssimonb 	u_int gen;
619769d33c5Ssimonb 
62059c1cd16Sad 	TC_COUNT(ngetnanotime);
621769d33c5Ssimonb 	do {
62269dc4427Sriastradh 		th = atomic_load_consume(&timehands);
623769d33c5Ssimonb 		gen = th->th_generation;
624113db6d7Sriastradh 		membar_consumer();
625769d33c5Ssimonb 		*tsp = th->th_nanotime;
626113db6d7Sriastradh 		membar_consumer();
627769d33c5Ssimonb 	} while (gen == 0 || gen != th->th_generation);
628769d33c5Ssimonb }
629769d33c5Ssimonb 
630769d33c5Ssimonb void
getnanotime(struct timespec * tsp)63120bf3061Schs getnanotime(struct timespec *tsp)
63220bf3061Schs {
63320bf3061Schs 
63420bf3061Schs 	dogetnanotime(tsp);
63520bf3061Schs }
63620bf3061Schs 
63720bf3061Schs void dtrace_getnanotime(struct timespec *tsp);
63820bf3061Schs 
63920bf3061Schs void
dtrace_getnanotime(struct timespec * tsp)64020bf3061Schs dtrace_getnanotime(struct timespec *tsp)
64120bf3061Schs {
64220bf3061Schs 
64320bf3061Schs 	dogetnanotime(tsp);
64420bf3061Schs }
64520bf3061Schs 
64620bf3061Schs void
getmicrotime(struct timeval * tvp)647769d33c5Ssimonb getmicrotime(struct timeval *tvp)
648769d33c5Ssimonb {
649769d33c5Ssimonb 	struct timehands *th;
650769d33c5Ssimonb 	u_int gen;
651769d33c5Ssimonb 
65259c1cd16Sad 	TC_COUNT(ngetmicrotime);
653769d33c5Ssimonb 	do {
65469dc4427Sriastradh 		th = atomic_load_consume(&timehands);
655769d33c5Ssimonb 		gen = th->th_generation;
656113db6d7Sriastradh 		membar_consumer();
657769d33c5Ssimonb 		*tvp = th->th_microtime;
658113db6d7Sriastradh 		membar_consumer();
659769d33c5Ssimonb 	} while (gen == 0 || gen != th->th_generation);
660769d33c5Ssimonb }
661769d33c5Ssimonb 
662d6c967bbSthorpej void
getnanoboottime(struct timespec * tsp)663d6c967bbSthorpej getnanoboottime(struct timespec *tsp)
664d6c967bbSthorpej {
665d6c967bbSthorpej 	struct bintime bt;
666d6c967bbSthorpej 
667d6c967bbSthorpej 	getbinboottime(&bt);
668d6c967bbSthorpej 	bintime2timespec(&bt, tsp);
669d6c967bbSthorpej }
670d6c967bbSthorpej 
671d6c967bbSthorpej void
getmicroboottime(struct timeval * tvp)672d6c967bbSthorpej getmicroboottime(struct timeval *tvp)
673d6c967bbSthorpej {
674d6c967bbSthorpej 	struct bintime bt;
675d6c967bbSthorpej 
676d6c967bbSthorpej 	getbinboottime(&bt);
677d6c967bbSthorpej 	bintime2timeval(&bt, tvp);
678d6c967bbSthorpej }
679d6c967bbSthorpej 
680d6c967bbSthorpej void
getbinboottime(struct bintime * basep)681f9c3bb07Sriastradh getbinboottime(struct bintime *basep)
682d6c967bbSthorpej {
683f9c3bb07Sriastradh 	struct bintime base;
684f9c3bb07Sriastradh 	unsigned gen;
685d6c967bbSthorpej 
686f9c3bb07Sriastradh 	do {
687f9c3bb07Sriastradh 		/* Spin until the timebase isn't changing.  */
688f9c3bb07Sriastradh 		while ((gen = atomic_load_relaxed(&timebase.gen)) & 1)
689f9c3bb07Sriastradh 			SPINLOCK_BACKOFF_HOOK;
690f9c3bb07Sriastradh 
691f9c3bb07Sriastradh 		/* Read out a snapshot of the timebase.  */
692f9c3bb07Sriastradh 		membar_consumer();
693f9c3bb07Sriastradh 		base = timebase.bin;
694f9c3bb07Sriastradh 		membar_consumer();
695f9c3bb07Sriastradh 
696f9c3bb07Sriastradh 		/* Restart if it changed while we were reading.  */
697f9c3bb07Sriastradh 	} while (gen != atomic_load_relaxed(&timebase.gen));
698f9c3bb07Sriastradh 
699f9c3bb07Sriastradh 	*basep = base;
700d6c967bbSthorpej }
701d6c967bbSthorpej 
702769d33c5Ssimonb /*
703769d33c5Ssimonb  * Initialize a new timecounter and possibly use it.
704769d33c5Ssimonb  */
705769d33c5Ssimonb void
tc_init(struct timecounter * tc)706769d33c5Ssimonb tc_init(struct timecounter *tc)
707769d33c5Ssimonb {
708769d33c5Ssimonb 	u_int u;
709769d33c5Ssimonb 
7100aa6ab47Ssimonb 	KASSERTMSG(tc->tc_next == NULL, "timecounter %s already initialised",
7110aa6ab47Ssimonb 	    tc->tc_name);
7120aa6ab47Ssimonb 
713769d33c5Ssimonb 	u = tc->tc_frequency / tc->tc_counter_mask;
714769d33c5Ssimonb 	/* XXX: We need some margin here, 10% is a guess */
715769d33c5Ssimonb 	u *= 11;
716769d33c5Ssimonb 	u /= 10;
717769d33c5Ssimonb 	if (u > hz && tc->tc_quality >= 0) {
718769d33c5Ssimonb 		tc->tc_quality = -2000;
719b07ec3fcSad 		aprint_verbose(
720b07ec3fcSad 		    "timecounter: Timecounter \"%s\" frequency %ju Hz",
72147ef6bacSbjh21 			    tc->tc_name, (uintmax_t)tc->tc_frequency);
722b07ec3fcSad 		aprint_verbose(" -- Insufficient hz, needs at least %u\n", u);
723769d33c5Ssimonb 	} else if (tc->tc_quality >= 0 || bootverbose) {
724b07ec3fcSad 		aprint_verbose(
725b07ec3fcSad 		    "timecounter: Timecounter \"%s\" frequency %ju Hz "
726b07ec3fcSad 		    "quality %d\n", tc->tc_name, (uintmax_t)tc->tc_frequency,
72747ef6bacSbjh21 		    tc->tc_quality);
728769d33c5Ssimonb 	}
729769d33c5Ssimonb 
730a2249ef7Sad 	mutex_spin_enter(&timecounter_lock);
731769d33c5Ssimonb 	tc->tc_next = timecounters;
732769d33c5Ssimonb 	timecounters = tc;
733541e4662Sad 	timecounter_mods++;
734769d33c5Ssimonb 	/*
735769d33c5Ssimonb 	 * Never automatically use a timecounter with negative quality.
736769d33c5Ssimonb 	 * Even though we run on the dummy counter, switching here may be
737769d33c5Ssimonb 	 * worse since this timecounter may not be monotonous.
738769d33c5Ssimonb 	 */
7390239b151Sad 	if (tc->tc_quality >= 0 && (tc->tc_quality > timecounter->tc_quality ||
740ab19d739Sad 	    (tc->tc_quality == timecounter->tc_quality &&
741ab19d739Sad 	    tc->tc_frequency > timecounter->tc_frequency))) {
742769d33c5Ssimonb 		(void)tc->tc_get_timecount(tc);
743769d33c5Ssimonb 		(void)tc->tc_get_timecount(tc);
744769d33c5Ssimonb 		timecounter = tc;
74554cd6fafSkardel 		tc_windup();
7460239b151Sad 	}
747a2249ef7Sad 	mutex_spin_exit(&timecounter_lock);
748769d33c5Ssimonb }
749769d33c5Ssimonb 
750a72fb242Sdyoung /*
751541e4662Sad  * Pick a new timecounter due to the existing counter going bad.
752a72fb242Sdyoung  */
753541e4662Sad static void
tc_pick(void)754541e4662Sad tc_pick(void)
755a72fb242Sdyoung {
756a72fb242Sdyoung 	struct timecounter *best, *tc;
757a72fb242Sdyoung 
75892e16844Sriastradh 	KASSERT(mutex_owned(&timecounter_lock));
759a72fb242Sdyoung 
760a72fb242Sdyoung 	for (best = tc = timecounters; tc != NULL; tc = tc->tc_next) {
761a72fb242Sdyoung 		if (tc->tc_quality > best->tc_quality)
762a72fb242Sdyoung 			best = tc;
763a72fb242Sdyoung 		else if (tc->tc_quality < best->tc_quality)
764a72fb242Sdyoung 			continue;
765a72fb242Sdyoung 		else if (tc->tc_frequency > best->tc_frequency)
766a72fb242Sdyoung 			best = tc;
767a72fb242Sdyoung 	}
768a72fb242Sdyoung 	(void)best->tc_get_timecount(best);
769a72fb242Sdyoung 	(void)best->tc_get_timecount(best);
770a72fb242Sdyoung 	timecounter = best;
771541e4662Sad }
772541e4662Sad 
773541e4662Sad /*
774541e4662Sad  * A timecounter has gone bad, arrange to pick a new one at the next
775541e4662Sad  * clock tick.
776541e4662Sad  */
777541e4662Sad void
tc_gonebad(struct timecounter * tc)778541e4662Sad tc_gonebad(struct timecounter *tc)
779541e4662Sad {
780541e4662Sad 
781541e4662Sad 	tc->tc_quality = -100;
782541e4662Sad 	membar_producer();
783541e4662Sad 	atomic_inc_uint(&timecounter_bad);
784541e4662Sad }
785541e4662Sad 
786541e4662Sad /*
787541e4662Sad  * Stop using a timecounter and remove it from the timecounters list.
788541e4662Sad  */
789541e4662Sad int
tc_detach(struct timecounter * target)790541e4662Sad tc_detach(struct timecounter *target)
791541e4662Sad {
792541e4662Sad 	struct timecounter *tc;
793541e4662Sad 	struct timecounter **tcp = NULL;
7942fc2b080Sad 	int removals;
7952fc2b080Sad 	lwp_t *l;
796541e4662Sad 
7972fc2b080Sad 	/* First, find the timecounter. */
798541e4662Sad 	mutex_spin_enter(&timecounter_lock);
799541e4662Sad 	for (tcp = &timecounters, tc = timecounters;
800541e4662Sad 	     tc != NULL;
801541e4662Sad 	     tcp = &tc->tc_next, tc = tc->tc_next) {
802541e4662Sad 		if (tc == target)
803541e4662Sad 			break;
804541e4662Sad 	}
805541e4662Sad 	if (tc == NULL) {
8062fc2b080Sad 		mutex_spin_exit(&timecounter_lock);
8072fc2b080Sad 		return ESRCH;
8082fc2b080Sad 	}
8092fc2b080Sad 
8102fc2b080Sad 	/* And now, remove it. */
811541e4662Sad 	*tcp = tc->tc_next;
812541e4662Sad 	if (timecounter == target) {
813541e4662Sad 		tc_pick();
814a72fb242Sdyoung 		tc_windup();
815541e4662Sad 	}
816541e4662Sad 	timecounter_mods++;
8172fc2b080Sad 	removals = timecounter_removals++;
818a2249ef7Sad 	mutex_spin_exit(&timecounter_lock);
8192fc2b080Sad 
8202fc2b080Sad 	/*
8212fc2b080Sad 	 * We now have to determine if any threads in the system are still
8222fc2b080Sad 	 * making use of this timecounter.
8232fc2b080Sad 	 *
8242fc2b080Sad 	 * We issue a broadcast cross call to elide memory ordering issues,
8252fc2b080Sad 	 * then scan all LWPs in the system looking at each's timecounter
8262fc2b080Sad 	 * generation number.  We need to see a value of zero (not actively
8272fc2b080Sad 	 * using a timecounter) or a value greater than our removal value.
8282fc2b080Sad 	 *
8292fc2b080Sad 	 * We may race with threads that read `timecounter_removals' and
8302fc2b080Sad 	 * and then get preempted before updating `l_tcgen'.  This is not
8312fc2b080Sad 	 * a problem, since it means that these threads have not yet started
8322fc2b080Sad 	 * accessing timecounter state.  All we do need is one clean
8332fc2b080Sad 	 * snapshot of the system where every thread appears not to be using
8342fc2b080Sad 	 * old timecounter state.
8352fc2b080Sad 	 */
8362fc2b080Sad 	for (;;) {
837edcef67eSuwe 		xc_barrier(0);
8382fc2b080Sad 
8390eaaa024Sad 		mutex_enter(&proc_lock);
8402fc2b080Sad 		LIST_FOREACH(l, &alllwp, l_list) {
8412fc2b080Sad 			if (l->l_tcgen == 0 || l->l_tcgen > removals) {
8422fc2b080Sad 				/*
8432fc2b080Sad 				 * Not using timecounter or old timecounter
8442fc2b080Sad 				 * state at time of our xcall or later.
8452fc2b080Sad 				 */
8462fc2b080Sad 				continue;
8472fc2b080Sad 			}
8482fc2b080Sad 			break;
8492fc2b080Sad 		}
8500eaaa024Sad 		mutex_exit(&proc_lock);
8512fc2b080Sad 
8522fc2b080Sad 		/*
8532fc2b080Sad 		 * If the timecounter is still in use, wait at least 10ms
8542fc2b080Sad 		 * before retrying.
8552fc2b080Sad 		 */
8562fc2b080Sad 		if (l == NULL) {
8571cfe42e8Sriastradh 			break;
8582fc2b080Sad 		}
8592fc2b080Sad 		(void)kpause("tcdetach", false, mstohz(10), NULL);
8602fc2b080Sad 	}
8611cfe42e8Sriastradh 
8621cfe42e8Sriastradh 	tc->tc_next = NULL;
8631cfe42e8Sriastradh 	return 0;
864a72fb242Sdyoung }
865a72fb242Sdyoung 
866769d33c5Ssimonb /* Report the frequency of the current timecounter. */
8679b7a1c49Srin uint64_t
tc_getfrequency(void)868769d33c5Ssimonb tc_getfrequency(void)
869769d33c5Ssimonb {
870769d33c5Ssimonb 
87169dc4427Sriastradh 	return atomic_load_consume(&timehands)->th_counter->tc_frequency;
872769d33c5Ssimonb }
873769d33c5Ssimonb 
874769d33c5Ssimonb /*
875769d33c5Ssimonb  * Step our concept of UTC.  This is done by modifying our estimate of
876769d33c5Ssimonb  * when we booted.
877769d33c5Ssimonb  */
878769d33c5Ssimonb void
tc_setclock(const struct timespec * ts)879461a86f9Schristos tc_setclock(const struct timespec *ts)
880769d33c5Ssimonb {
881769d33c5Ssimonb 	struct timespec ts2;
882769d33c5Ssimonb 	struct bintime bt, bt2;
883769d33c5Ssimonb 
884a2249ef7Sad 	mutex_spin_enter(&timecounter_lock);
88559c1cd16Sad 	TC_COUNT(nsetclock);
886769d33c5Ssimonb 	binuptime(&bt2);
887769d33c5Ssimonb 	timespec2bintime(ts, &bt);
888769d33c5Ssimonb 	bintime_sub(&bt, &bt2);
889f9c3bb07Sriastradh 	bintime_add(&bt2, &timebase.bin);
890f9c3bb07Sriastradh 	timebase.gen |= 1;	/* change in progress */
891f9c3bb07Sriastradh 	membar_producer();
892f9c3bb07Sriastradh 	timebase.bin = bt;
893f9c3bb07Sriastradh 	membar_producer();
894f9c3bb07Sriastradh 	timebase.gen++;		/* commit change */
895769d33c5Ssimonb 	tc_windup();
896a2249ef7Sad 	mutex_spin_exit(&timecounter_lock);
89763baaccbSad 
898769d33c5Ssimonb 	if (timestepwarnings) {
899769d33c5Ssimonb 		bintime2timespec(&bt2, &ts2);
900effe57d3Skardel 		log(LOG_INFO,
901effe57d3Skardel 		    "Time stepped from %lld.%09ld to %lld.%09ld\n",
902461a86f9Schristos 		    (long long)ts2.tv_sec, ts2.tv_nsec,
903461a86f9Schristos 		    (long long)ts->tv_sec, ts->tv_nsec);
904769d33c5Ssimonb 	}
905769d33c5Ssimonb }
906769d33c5Ssimonb 
907769d33c5Ssimonb /*
908769d33c5Ssimonb  * Initialize the next struct timehands in the ring and make
909769d33c5Ssimonb  * it the active timehands.  Along the way we might switch to a different
910769d33c5Ssimonb  * timecounter and/or do seconds processing in NTP.  Slightly magic.
911769d33c5Ssimonb  */
912769d33c5Ssimonb static void
tc_windup(void)913769d33c5Ssimonb tc_windup(void)
914769d33c5Ssimonb {
915769d33c5Ssimonb 	struct bintime bt;
916769d33c5Ssimonb 	struct timehands *th, *tho;
9179b7a1c49Srin 	uint64_t scale;
918769d33c5Ssimonb 	u_int delta, ncount, ogen;
919182632b8Skardel 	int i, s_update;
920769d33c5Ssimonb 	time_t t;
921769d33c5Ssimonb 
92292e16844Sriastradh 	KASSERT(mutex_owned(&timecounter_lock));
92363baaccbSad 
924182632b8Skardel 	s_update = 0;
92526c3495fSad 
926769d33c5Ssimonb 	/*
927769d33c5Ssimonb 	 * Make the next timehands a copy of the current one, but do not
928769d33c5Ssimonb 	 * overwrite the generation or next pointer.  While we update
92926c3495fSad 	 * the contents, the generation must be zero.  Ensure global
93026c3495fSad 	 * visibility of the generation before proceeding.
931769d33c5Ssimonb 	 */
932769d33c5Ssimonb 	tho = timehands;
933769d33c5Ssimonb 	th = tho->th_next;
934769d33c5Ssimonb 	ogen = th->th_generation;
935769d33c5Ssimonb 	th->th_generation = 0;
936b470ab62Sad 	membar_producer();
937769d33c5Ssimonb 	bcopy(tho, th, offsetof(struct timehands, th_generation));
938769d33c5Ssimonb 
939769d33c5Ssimonb 	/*
940769d33c5Ssimonb 	 * Capture a timecounter delta on the current timecounter and if
941769d33c5Ssimonb 	 * changing timecounters, a counter value from the new timecounter.
942769d33c5Ssimonb 	 * Update the offset fields accordingly.
943769d33c5Ssimonb 	 */
944769d33c5Ssimonb 	delta = tc_delta(th);
945769d33c5Ssimonb 	if (th->th_counter != timecounter)
946769d33c5Ssimonb 		ncount = timecounter->tc_get_timecount(timecounter);
947769d33c5Ssimonb 	else
948769d33c5Ssimonb 		ncount = 0;
949769d33c5Ssimonb 	th->th_offset_count += delta;
950769d33c5Ssimonb 	bintime_addx(&th->th_offset, th->th_scale * delta);
951769d33c5Ssimonb 
952769d33c5Ssimonb 	/*
953769d33c5Ssimonb 	 * Hardware latching timecounters may not generate interrupts on
954769d33c5Ssimonb 	 * PPS events, so instead we poll them.  There is a finite risk that
955769d33c5Ssimonb 	 * the hardware might capture a count which is later than the one we
956769d33c5Ssimonb 	 * got above, and therefore possibly in the next NTP second which might
957769d33c5Ssimonb 	 * have a different rate than the current NTP second.  It doesn't
958769d33c5Ssimonb 	 * matter in practice.
959769d33c5Ssimonb 	 */
960769d33c5Ssimonb 	if (tho->th_counter->tc_poll_pps)
961769d33c5Ssimonb 		tho->th_counter->tc_poll_pps(tho->th_counter);
962769d33c5Ssimonb 
963769d33c5Ssimonb 	/*
964769d33c5Ssimonb 	 * Deal with NTP second processing.  The for loop normally
965769d33c5Ssimonb 	 * iterates at most once, but in extreme situations it might
966769d33c5Ssimonb 	 * keep NTP sane if timeouts are not run for several seconds.
967769d33c5Ssimonb 	 * At boot, the time step can be large when the TOD hardware
968769d33c5Ssimonb 	 * has been read, so on really large steps, we call
969769d33c5Ssimonb 	 * ntp_update_second only twice.  We need to call it twice in
970769d33c5Ssimonb 	 * case we missed a leap second.
971de4337abSkardel 	 * If NTP is not compiled in ntp_update_second still calculates
972de4337abSkardel 	 * the adjustment resulting from adjtime() calls.
973769d33c5Ssimonb 	 */
974769d33c5Ssimonb 	bt = th->th_offset;
975f9c3bb07Sriastradh 	bintime_add(&bt, &timebase.bin);
976769d33c5Ssimonb 	i = bt.sec - tho->th_microtime.tv_sec;
977769d33c5Ssimonb 	if (i > LARGE_STEP)
978769d33c5Ssimonb 		i = 2;
979769d33c5Ssimonb 	for (; i > 0; i--) {
980769d33c5Ssimonb 		t = bt.sec;
981769d33c5Ssimonb 		ntp_update_second(&th->th_adjustment, &bt.sec);
982182632b8Skardel 		s_update = 1;
983f9c3bb07Sriastradh 		if (bt.sec != t) {
984f9c3bb07Sriastradh 			timebase.gen |= 1;	/* change in progress */
985f9c3bb07Sriastradh 			membar_producer();
986f9c3bb07Sriastradh 			timebase.bin.sec += bt.sec - t;
987f9c3bb07Sriastradh 			membar_producer();
988f9c3bb07Sriastradh 			timebase.gen++;		/* commit change */
989f9c3bb07Sriastradh 		}
990769d33c5Ssimonb 	}
991de4337abSkardel 
992769d33c5Ssimonb 	/* Update the UTC timestamps used by the get*() functions. */
993769d33c5Ssimonb 	/* XXX shouldn't do this here.  Should force non-`get' versions. */
994769d33c5Ssimonb 	bintime2timeval(&bt, &th->th_microtime);
995769d33c5Ssimonb 	bintime2timespec(&bt, &th->th_nanotime);
996769d33c5Ssimonb 	/* Now is a good time to change timecounters. */
997769d33c5Ssimonb 	if (th->th_counter != timecounter) {
998769d33c5Ssimonb 		th->th_counter = timecounter;
999769d33c5Ssimonb 		th->th_offset_count = ncount;
1000182632b8Skardel 		s_update = 1;
1001769d33c5Ssimonb 	}
1002769d33c5Ssimonb 
1003769d33c5Ssimonb 	/*-
1004769d33c5Ssimonb 	 * Recalculate the scaling factor.  We want the number of 1/2^64
1005769d33c5Ssimonb 	 * fractions of a second per period of the hardware counter, taking
1006769d33c5Ssimonb 	 * into account the th_adjustment factor which the NTP PLL/adjtime(2)
1007769d33c5Ssimonb 	 * processing provides us with.
1008769d33c5Ssimonb 	 *
1009769d33c5Ssimonb 	 * The th_adjustment is nanoseconds per second with 32 bit binary
1010769d33c5Ssimonb 	 * fraction and we want 64 bit binary fraction of second:
1011769d33c5Ssimonb 	 *
1012769d33c5Ssimonb 	 *	 x = a * 2^32 / 10^9 = a * 4.294967296
1013769d33c5Ssimonb 	 *
1014769d33c5Ssimonb 	 * The range of th_adjustment is +/- 5000PPM so inside a 64bit int
1015769d33c5Ssimonb 	 * we can only multiply by about 850 without overflowing, but that
1016769d33c5Ssimonb 	 * leaves suitably precise fractions for multiply before divide.
1017769d33c5Ssimonb 	 *
1018769d33c5Ssimonb 	 * Divide before multiply with a fraction of 2199/512 results in a
1019769d33c5Ssimonb 	 * systematic undercompensation of 10PPM of th_adjustment.  On a
1020769d33c5Ssimonb 	 * 5000PPM adjustment this is a 0.05PPM error.  This is acceptable.
1021769d33c5Ssimonb  	 *
1022769d33c5Ssimonb 	 * We happily sacrifice the lowest of the 64 bits of our result
1023769d33c5Ssimonb 	 * to the goddess of code clarity.
1024769d33c5Ssimonb 	 *
1025769d33c5Ssimonb 	 */
1026182632b8Skardel 	if (s_update) {
10279b7a1c49Srin 		scale = (uint64_t)1 << 63;
1028769d33c5Ssimonb 		scale += (th->th_adjustment / 1024) * 2199;
1029769d33c5Ssimonb 		scale /= th->th_counter->tc_frequency;
1030769d33c5Ssimonb 		th->th_scale = scale * 2;
1031182632b8Skardel 	}
1032769d33c5Ssimonb 	/*
1033769d33c5Ssimonb 	 * Now that the struct timehands is again consistent, set the new
103426c3495fSad 	 * generation number, making sure to not make it zero.  Ensure
103526c3495fSad 	 * changes are globally visible before changing.
1036769d33c5Ssimonb 	 */
1037769d33c5Ssimonb 	if (++ogen == 0)
1038769d33c5Ssimonb 		ogen = 1;
1039b470ab62Sad 	membar_producer();
1040769d33c5Ssimonb 	th->th_generation = ogen;
1041769d33c5Ssimonb 
104226c3495fSad 	/*
104326c3495fSad 	 * Go live with the new struct timehands.  Ensure changes are
104426c3495fSad 	 * globally visible before changing.
104526c3495fSad 	 */
10465524172fSriastradh 	setrealuptime(th->th_microtime.tv_sec, th->th_offset.sec);
104769dc4427Sriastradh 	atomic_store_release(&timehands, th);
1048ab19d739Sad 
1049ab19d739Sad 	/*
1050ab19d739Sad 	 * Force users of the old timehand to move on.  This is
1051ab19d739Sad 	 * necessary for MP systems; we need to ensure that the
1052ab19d739Sad 	 * consumers will move away from the old timehand before
1053ab19d739Sad 	 * we begin updating it again when we eventually wrap
1054ab19d739Sad 	 * around.
1055ab19d739Sad 	 */
1056ab19d739Sad 	if (++tho->th_generation == 0)
1057ab19d739Sad 		tho->th_generation = 1;
1058769d33c5Ssimonb }
1059769d33c5Ssimonb 
1060769d33c5Ssimonb /*
1061769d33c5Ssimonb  * RFC 2783 PPS-API implementation.
1062769d33c5Ssimonb  */
1063769d33c5Ssimonb 
1064769d33c5Ssimonb int
pps_ioctl(u_long cmd,void * data,struct pps_state * pps)106553524e44Schristos pps_ioctl(u_long cmd, void *data, struct pps_state *pps)
1066769d33c5Ssimonb {
1067769d33c5Ssimonb 	pps_params_t *app;
1068de4337abSkardel 	pps_info_t *pipi;
1069769d33c5Ssimonb #ifdef PPS_SYNC
1070de4337abSkardel 	int *epi;
1071769d33c5Ssimonb #endif
1072769d33c5Ssimonb 
1073a2249ef7Sad 	KASSERT(mutex_owned(&timecounter_lock));
1074a2249ef7Sad 
1075effe57d3Skardel 	KASSERT(pps != NULL);
1076effe57d3Skardel 
1077769d33c5Ssimonb 	switch (cmd) {
1078769d33c5Ssimonb 	case PPS_IOC_CREATE:
1079e7faa7faSrin 		return 0;
1080769d33c5Ssimonb 	case PPS_IOC_DESTROY:
1081e7faa7faSrin 		return 0;
1082769d33c5Ssimonb 	case PPS_IOC_SETPARAMS:
1083769d33c5Ssimonb 		app = (pps_params_t *)data;
1084769d33c5Ssimonb 		if (app->mode & ~pps->ppscap)
1085e7faa7faSrin 			return EINVAL;
1086769d33c5Ssimonb 		pps->ppsparam = *app;
1087e7faa7faSrin 		return 0;
1088769d33c5Ssimonb 	case PPS_IOC_GETPARAMS:
1089769d33c5Ssimonb 		app = (pps_params_t *)data;
1090769d33c5Ssimonb 		*app = pps->ppsparam;
1091769d33c5Ssimonb 		app->api_version = PPS_API_VERS_1;
1092e7faa7faSrin 		return 0;
1093769d33c5Ssimonb 	case PPS_IOC_GETCAP:
1094769d33c5Ssimonb 		*(int*)data = pps->ppscap;
1095e7faa7faSrin 		return 0;
1096769d33c5Ssimonb 	case PPS_IOC_FETCH:
1097de4337abSkardel 		pipi = (pps_info_t *)data;
1098769d33c5Ssimonb 		pps->ppsinfo.current_mode = pps->ppsparam.mode;
1099de4337abSkardel 		*pipi = pps->ppsinfo;
1100e7faa7faSrin 		return 0;
1101769d33c5Ssimonb 	case PPS_IOC_KCBIND:
1102769d33c5Ssimonb #ifdef PPS_SYNC
1103de4337abSkardel 		epi = (int *)data;
1104769d33c5Ssimonb 		/* XXX Only root should be able to do this */
1105de4337abSkardel 		if (*epi & ~pps->ppscap)
1106e7faa7faSrin 			return EINVAL;
1107de4337abSkardel 		pps->kcmode = *epi;
1108e7faa7faSrin 		return 0;
1109769d33c5Ssimonb #else
1110e7faa7faSrin 		return EOPNOTSUPP;
1111769d33c5Ssimonb #endif
1112769d33c5Ssimonb 	default:
1113e7faa7faSrin 		return EPASSTHROUGH;
1114769d33c5Ssimonb 	}
1115769d33c5Ssimonb }
1116769d33c5Ssimonb 
1117769d33c5Ssimonb void
pps_init(struct pps_state * pps)1118769d33c5Ssimonb pps_init(struct pps_state *pps)
1119769d33c5Ssimonb {
1120a2249ef7Sad 
1121a2249ef7Sad 	KASSERT(mutex_owned(&timecounter_lock));
1122a2249ef7Sad 
1123769d33c5Ssimonb 	pps->ppscap |= PPS_TSFMT_TSPEC;
1124769d33c5Ssimonb 	if (pps->ppscap & PPS_CAPTUREASSERT)
1125769d33c5Ssimonb 		pps->ppscap |= PPS_OFFSETASSERT;
1126769d33c5Ssimonb 	if (pps->ppscap & PPS_CAPTURECLEAR)
1127769d33c5Ssimonb 		pps->ppscap |= PPS_OFFSETCLEAR;
1128769d33c5Ssimonb }
1129769d33c5Ssimonb 
1130effe57d3Skardel /*
1131*64e8b7a6Sandvar  * capture a timestamp in the pps structure
1132effe57d3Skardel  */
1133769d33c5Ssimonb void
pps_capture(struct pps_state * pps)1134769d33c5Ssimonb pps_capture(struct pps_state *pps)
1135769d33c5Ssimonb {
1136769d33c5Ssimonb 	struct timehands *th;
1137769d33c5Ssimonb 
1138a2249ef7Sad 	KASSERT(mutex_owned(&timecounter_lock));
1139a2249ef7Sad 	KASSERT(pps != NULL);
1140a2249ef7Sad 
1141769d33c5Ssimonb 	th = timehands;
1142769d33c5Ssimonb 	pps->capgen = th->th_generation;
1143769d33c5Ssimonb 	pps->capth = th;
11449b7a1c49Srin 	pps->capcount = (uint64_t)tc_delta(th) + th->th_offset_count;
1145769d33c5Ssimonb 	if (pps->capgen != th->th_generation)
1146769d33c5Ssimonb 		pps->capgen = 0;
1147769d33c5Ssimonb }
1148769d33c5Ssimonb 
1149effe57d3Skardel #ifdef PPS_DEBUG
1150effe57d3Skardel int ppsdebug = 0;
1151effe57d3Skardel #endif
1152effe57d3Skardel 
1153effe57d3Skardel /*
1154effe57d3Skardel  * process a pps_capture()ed event
1155effe57d3Skardel  */
1156769d33c5Ssimonb void
pps_event(struct pps_state * pps,int event)1157769d33c5Ssimonb pps_event(struct pps_state *pps, int event)
1158769d33c5Ssimonb {
1159effe57d3Skardel 	pps_ref_event(pps, event, NULL, PPS_REFEVNT_PPS|PPS_REFEVNT_CAPTURE);
1160effe57d3Skardel }
1161effe57d3Skardel 
1162effe57d3Skardel /*
1163effe57d3Skardel  * extended pps api /  kernel pll/fll entry point
1164effe57d3Skardel  *
1165effe57d3Skardel  * feed reference time stamps to PPS engine
1166effe57d3Skardel  *
1167effe57d3Skardel  * will simulate a PPS event and feed
1168effe57d3Skardel  * the NTP PLL/FLL if requested.
1169effe57d3Skardel  *
1170effe57d3Skardel  * the ref time stamps should be roughly once
1171effe57d3Skardel  * a second but do not need to be exactly in phase
1172effe57d3Skardel  * with the UTC second but should be close to it.
1173effe57d3Skardel  * this relaxation of requirements allows callout
1174effe57d3Skardel  * driven timestamping mechanisms to feed to pps
1175effe57d3Skardel  * capture/kernel pll logic.
1176effe57d3Skardel  *
1177effe57d3Skardel  * calling pattern is:
1178effe57d3Skardel  *  pps_capture() (for PPS_REFEVNT_{CAPTURE|CAPCUR})
1179effe57d3Skardel  *  read timestamp from reference source
1180effe57d3Skardel  *  pps_ref_event()
1181effe57d3Skardel  *
1182effe57d3Skardel  * supported refmodes:
1183effe57d3Skardel  *  PPS_REFEVNT_CAPTURE
1184effe57d3Skardel  *    use system timestamp of pps_capture()
1185effe57d3Skardel  *  PPS_REFEVNT_CURRENT
1186effe57d3Skardel  *    use system timestamp of this call
1187effe57d3Skardel  *  PPS_REFEVNT_CAPCUR
1188effe57d3Skardel  *    use average of read capture and current system time stamp
1189effe57d3Skardel  *  PPS_REFEVNT_PPS
1190effe57d3Skardel  *    assume timestamp on second mark - ref_ts is ignored
1191effe57d3Skardel  *
1192effe57d3Skardel  */
1193effe57d3Skardel 
1194effe57d3Skardel void
pps_ref_event(struct pps_state * pps,int event,struct bintime * ref_ts,int refmode)1195effe57d3Skardel pps_ref_event(struct pps_state *pps,
1196effe57d3Skardel 	      int event,
1197effe57d3Skardel 	      struct bintime *ref_ts,
1198effe57d3Skardel 	      int refmode
1199effe57d3Skardel 	)
1200effe57d3Skardel {
1201effe57d3Skardel 	struct bintime bt;	/* current time */
1202effe57d3Skardel 	struct bintime btd;	/* time difference */
1203effe57d3Skardel 	struct bintime bt_ref;	/* reference time */
1204769d33c5Ssimonb 	struct timespec ts, *tsp, *osp;
1205effe57d3Skardel 	struct timehands *th;
12069b7a1c49Srin 	uint64_t tcount, acount, dcount, *pcount;
1207083fcd5dSmartin 	int foff, gen;
1208083fcd5dSmartin #ifdef PPS_SYNC
1209083fcd5dSmartin 	int fhard;
1210083fcd5dSmartin #endif
1211769d33c5Ssimonb 	pps_seq_t *pseq;
1212769d33c5Ssimonb 
1213a2249ef7Sad 	KASSERT(mutex_owned(&timecounter_lock));
1214a2249ef7Sad 
1215effe57d3Skardel 	KASSERT(pps != NULL);
1216769d33c5Ssimonb 
1217effe57d3Skardel         /* pick up current time stamp if needed */
1218effe57d3Skardel 	if (refmode & (PPS_REFEVNT_CURRENT|PPS_REFEVNT_CAPCUR)) {
1219effe57d3Skardel 		/* pick up current time stamp */
1220effe57d3Skardel 		th = timehands;
1221effe57d3Skardel 		gen = th->th_generation;
12229b7a1c49Srin 		tcount = (uint64_t)tc_delta(th) + th->th_offset_count;
1223effe57d3Skardel 		if (gen != th->th_generation)
1224effe57d3Skardel 			gen = 0;
1225effe57d3Skardel 
1226effe57d3Skardel 		/* If the timecounter was wound up underneath us, bail out. */
1227effe57d3Skardel 		if (pps->capgen == 0 ||
1228effe57d3Skardel 		    pps->capgen != pps->capth->th_generation ||
1229effe57d3Skardel 		    gen == 0 ||
1230effe57d3Skardel 		    gen != pps->capgen) {
1231effe57d3Skardel #ifdef PPS_DEBUG
1232effe57d3Skardel 			if (ppsdebug & 0x1) {
1233effe57d3Skardel 				log(LOG_DEBUG,
1234effe57d3Skardel 				    "pps_ref_event(pps=%p, event=%d, ...): DROP (wind-up)\n",
1235effe57d3Skardel 				    pps, event);
1236effe57d3Skardel 			}
1237effe57d3Skardel #endif
1238effe57d3Skardel 			return;
1239effe57d3Skardel 		}
1240effe57d3Skardel 	} else {
1241effe57d3Skardel 		tcount = 0;	/* keep GCC happy */
1242effe57d3Skardel 	}
1243effe57d3Skardel 
1244effe57d3Skardel #ifdef PPS_DEBUG
1245effe57d3Skardel 	if (ppsdebug & 0x1) {
1246effe57d3Skardel 		struct timespec tmsp;
1247effe57d3Skardel 
1248effe57d3Skardel 		if (ref_ts == NULL) {
1249effe57d3Skardel 			tmsp.tv_sec = 0;
1250effe57d3Skardel 			tmsp.tv_nsec = 0;
1251effe57d3Skardel 		} else {
1252effe57d3Skardel 			bintime2timespec(ref_ts, &tmsp);
1253effe57d3Skardel 		}
1254effe57d3Skardel 
1255effe57d3Skardel 		log(LOG_DEBUG,
1256effe57d3Skardel 		    "pps_ref_event(pps=%p, event=%d, ref_ts=%"PRIi64
1257effe57d3Skardel 		    ".%09"PRIi32", refmode=0x%1x)\n",
1258effe57d3Skardel 		    pps, event, tmsp.tv_sec, (int32_t)tmsp.tv_nsec, refmode);
1259effe57d3Skardel 	}
1260effe57d3Skardel #endif
1261effe57d3Skardel 
1262effe57d3Skardel 	/* setup correct event references */
1263769d33c5Ssimonb 	if (event == PPS_CAPTUREASSERT) {
1264769d33c5Ssimonb 		tsp = &pps->ppsinfo.assert_timestamp;
1265769d33c5Ssimonb 		osp = &pps->ppsparam.assert_offset;
1266769d33c5Ssimonb 		foff = pps->ppsparam.mode & PPS_OFFSETASSERT;
1267083fcd5dSmartin #ifdef PPS_SYNC
1268769d33c5Ssimonb 		fhard = pps->kcmode & PPS_CAPTUREASSERT;
1269083fcd5dSmartin #endif
1270769d33c5Ssimonb 		pcount = &pps->ppscount[0];
1271769d33c5Ssimonb 		pseq = &pps->ppsinfo.assert_sequence;
1272769d33c5Ssimonb 	} else {
1273769d33c5Ssimonb 		tsp = &pps->ppsinfo.clear_timestamp;
1274769d33c5Ssimonb 		osp = &pps->ppsparam.clear_offset;
1275769d33c5Ssimonb 		foff = pps->ppsparam.mode & PPS_OFFSETCLEAR;
1276083fcd5dSmartin #ifdef PPS_SYNC
1277769d33c5Ssimonb 		fhard = pps->kcmode & PPS_CAPTURECLEAR;
1278083fcd5dSmartin #endif
1279769d33c5Ssimonb 		pcount = &pps->ppscount[1];
1280769d33c5Ssimonb 		pseq = &pps->ppsinfo.clear_sequence;
1281769d33c5Ssimonb 	}
1282769d33c5Ssimonb 
1283effe57d3Skardel 	/* determine system time stamp according to refmode */
1284effe57d3Skardel 	dcount = 0;		/* keep GCC happy */
1285effe57d3Skardel 	switch (refmode & PPS_REFEVNT_RMASK) {
1286effe57d3Skardel 	case PPS_REFEVNT_CAPTURE:
1287effe57d3Skardel 		acount = pps->capcount;	/* use capture timestamp */
1288effe57d3Skardel 		break;
1289effe57d3Skardel 
1290effe57d3Skardel 	case PPS_REFEVNT_CURRENT:
1291effe57d3Skardel 		acount = tcount; /* use current timestamp */
1292effe57d3Skardel 		break;
1293effe57d3Skardel 
1294effe57d3Skardel 	case PPS_REFEVNT_CAPCUR:
1295effe57d3Skardel 		/*
1296effe57d3Skardel 		 * calculate counter value between pps_capture() and
1297effe57d3Skardel 		 * pps_ref_event()
1298effe57d3Skardel 		 */
1299effe57d3Skardel 		dcount = tcount - pps->capcount;
1300effe57d3Skardel 		acount = (dcount / 2) + pps->capcount;
1301effe57d3Skardel 		break;
1302effe57d3Skardel 
1303effe57d3Skardel 	default:		/* ignore call error silently */
1304effe57d3Skardel 		return;
1305effe57d3Skardel 	}
1306effe57d3Skardel 
1307769d33c5Ssimonb 	/*
1308769d33c5Ssimonb 	 * If the timecounter changed, we cannot compare the count values, so
1309769d33c5Ssimonb 	 * we have to drop the rest of the PPS-stuff until the next event.
1310769d33c5Ssimonb 	 */
1311769d33c5Ssimonb 	if (pps->ppstc != pps->capth->th_counter) {
1312769d33c5Ssimonb 		pps->ppstc = pps->capth->th_counter;
1313effe57d3Skardel 		pps->capcount = acount;
1314effe57d3Skardel 		*pcount = acount;
1315effe57d3Skardel 		pps->ppscount[2] = acount;
1316effe57d3Skardel #ifdef PPS_DEBUG
1317effe57d3Skardel 		if (ppsdebug & 0x1) {
1318effe57d3Skardel 			log(LOG_DEBUG,
1319effe57d3Skardel 			    "pps_ref_event(pps=%p, event=%d, ...): DROP (time-counter change)\n",
1320effe57d3Skardel 			    pps, event);
1321effe57d3Skardel 		}
1322effe57d3Skardel #endif
1323769d33c5Ssimonb 		return;
1324769d33c5Ssimonb 	}
1325769d33c5Ssimonb 
1326effe57d3Skardel 	pps->capcount = acount;
1327effe57d3Skardel 
1328effe57d3Skardel 	/* Convert the count to a bintime. */
1329769d33c5Ssimonb 	bt = pps->capth->th_offset;
1330effe57d3Skardel 	bintime_addx(&bt, pps->capth->th_scale * (acount - pps->capth->th_offset_count));
1331f9c3bb07Sriastradh 	bintime_add(&bt, &timebase.bin);
1332effe57d3Skardel 
1333effe57d3Skardel 	if ((refmode & PPS_REFEVNT_PPS) == 0) {
1334effe57d3Skardel 		/* determine difference to reference time stamp */
1335effe57d3Skardel 		bt_ref = *ref_ts;
1336effe57d3Skardel 
1337effe57d3Skardel 		btd = bt;
1338effe57d3Skardel 		bintime_sub(&btd, &bt_ref);
1339effe57d3Skardel 
1340effe57d3Skardel 		/*
1341effe57d3Skardel 		 * simulate a PPS timestamp by dropping the fraction
1342effe57d3Skardel 		 * and applying the offset
1343effe57d3Skardel 		 */
1344effe57d3Skardel 		if (bt.frac >= (uint64_t)1<<63)	/* skip to nearest second */
1345effe57d3Skardel 			bt.sec++;
1346effe57d3Skardel 		bt.frac = 0;
1347effe57d3Skardel 		bintime_add(&bt, &btd);
1348effe57d3Skardel 	} else {
1349effe57d3Skardel 		/*
1350effe57d3Skardel 		 * create ref_ts from current time -
1351effe57d3Skardel 		 * we are supposed to be called on
1352effe57d3Skardel 		 * the second mark
1353effe57d3Skardel 		 */
1354effe57d3Skardel 		bt_ref = bt;
1355effe57d3Skardel 		if (bt_ref.frac >= (uint64_t)1<<63)	/* skip to nearest second */
1356effe57d3Skardel 			bt_ref.sec++;
1357effe57d3Skardel 		bt_ref.frac = 0;
1358effe57d3Skardel 	}
1359effe57d3Skardel 
1360effe57d3Skardel 	/* convert bintime to timestamp */
1361769d33c5Ssimonb 	bintime2timespec(&bt, &ts);
1362769d33c5Ssimonb 
1363769d33c5Ssimonb 	/* If the timecounter was wound up underneath us, bail out. */
1364769d33c5Ssimonb 	if (pps->capgen != pps->capth->th_generation)
1365769d33c5Ssimonb 		return;
1366769d33c5Ssimonb 
1367effe57d3Skardel 	/* store time stamp */
1368769d33c5Ssimonb 	*pcount = pps->capcount;
1369769d33c5Ssimonb 	(*pseq)++;
1370769d33c5Ssimonb 	*tsp = ts;
1371769d33c5Ssimonb 
1372effe57d3Skardel 	/* add offset correction */
1373769d33c5Ssimonb 	if (foff) {
1374de4337abSkardel 		timespecadd(tsp, osp, tsp);
1375769d33c5Ssimonb 		if (tsp->tv_nsec < 0) {
1376769d33c5Ssimonb 			tsp->tv_nsec += 1000000000;
1377769d33c5Ssimonb 			tsp->tv_sec -= 1;
1378769d33c5Ssimonb 		}
1379769d33c5Ssimonb 	}
1380effe57d3Skardel 
1381effe57d3Skardel #ifdef PPS_DEBUG
1382effe57d3Skardel 	if (ppsdebug & 0x2) {
1383effe57d3Skardel 		struct timespec ts2;
1384effe57d3Skardel 		struct timespec ts3;
1385effe57d3Skardel 
1386effe57d3Skardel 		bintime2timespec(&bt_ref, &ts2);
1387effe57d3Skardel 
1388effe57d3Skardel 		bt.sec = 0;
1389effe57d3Skardel 		bt.frac = 0;
1390effe57d3Skardel 
1391effe57d3Skardel 		if (refmode & PPS_REFEVNT_CAPCUR) {
1392effe57d3Skardel 			    bintime_addx(&bt, pps->capth->th_scale * dcount);
1393effe57d3Skardel 		}
1394effe57d3Skardel 		bintime2timespec(&bt, &ts3);
1395effe57d3Skardel 
1396effe57d3Skardel 		log(LOG_DEBUG, "ref_ts=%"PRIi64".%09"PRIi32
1397effe57d3Skardel 		    ", ts=%"PRIi64".%09"PRIi32", read latency=%"PRIi64" ns\n",
1398effe57d3Skardel 		    ts2.tv_sec, (int32_t)ts2.tv_nsec,
1399effe57d3Skardel 		    tsp->tv_sec, (int32_t)tsp->tv_nsec,
1400effe57d3Skardel 		    timespec2ns(&ts3));
1401effe57d3Skardel 	}
1402effe57d3Skardel #endif
1403effe57d3Skardel 
1404769d33c5Ssimonb #ifdef PPS_SYNC
1405769d33c5Ssimonb 	if (fhard) {
1406effe57d3Skardel 		uint64_t scale;
1407effe57d3Skardel 		uint64_t div;
1408769d33c5Ssimonb 
1409769d33c5Ssimonb 		/*
1410769d33c5Ssimonb 		 * Feed the NTP PLL/FLL.
1411769d33c5Ssimonb 		 * The FLL wants to know how many (hardware) nanoseconds
1412effe57d3Skardel 		 * elapsed since the previous event (mod 1 second) thus
1413effe57d3Skardel 		 * we are actually looking at the frequency difference scaled
1414effe57d3Skardel 		 * in nsec.
1415effe57d3Skardel 		 * As the counter time stamps are not truly at 1Hz
1416effe57d3Skardel 		 * we need to scale the count by the elapsed
1417effe57d3Skardel 		 * reference time.
1418effe57d3Skardel 		 * valid sampling interval: [0.5..2[ sec
1419769d33c5Ssimonb 		 */
1420effe57d3Skardel 
1421effe57d3Skardel 		/* calculate elapsed raw count */
1422769d33c5Ssimonb 		tcount = pps->capcount - pps->ppscount[2];
1423769d33c5Ssimonb 		pps->ppscount[2] = pps->capcount;
1424769d33c5Ssimonb 		tcount &= pps->capth->th_counter->tc_counter_mask;
1425effe57d3Skardel 
1426effe57d3Skardel 		/* calculate elapsed ref time */
1427effe57d3Skardel 		btd = bt_ref;
1428effe57d3Skardel 		bintime_sub(&btd, &pps->ref_time);
1429effe57d3Skardel 		pps->ref_time = bt_ref;
1430effe57d3Skardel 
1431effe57d3Skardel 		/* check that we stay below 2 sec */
1432effe57d3Skardel 		if (btd.sec < 0 || btd.sec > 1)
1433effe57d3Skardel 			return;
1434effe57d3Skardel 
1435effe57d3Skardel 		/* we want at least 0.5 sec between samples */
1436effe57d3Skardel 		if (btd.sec == 0 && btd.frac < (uint64_t)1<<63)
1437effe57d3Skardel 			return;
1438effe57d3Skardel 
1439effe57d3Skardel 		/*
1440effe57d3Skardel 		 * calculate cycles per period by multiplying
1441effe57d3Skardel 		 * the frequency with the elapsed period
1442effe57d3Skardel 		 * we pick a fraction of 30 bits
1443effe57d3Skardel 		 * ~1ns resolution for elapsed time
1444effe57d3Skardel 		 */
1445effe57d3Skardel 		div   = (uint64_t)btd.sec << 30;
1446effe57d3Skardel 		div  |= (btd.frac >> 34) & (((uint64_t)1 << 30) - 1);
1447effe57d3Skardel 		div  *= pps->capth->th_counter->tc_frequency;
1448effe57d3Skardel 		div >>= 30;
1449effe57d3Skardel 
1450effe57d3Skardel 		if (div == 0)	/* safeguard */
1451effe57d3Skardel 			return;
1452effe57d3Skardel 
1453effe57d3Skardel 		scale = (uint64_t)1 << 63;
1454effe57d3Skardel 		scale /= div;
1455769d33c5Ssimonb 		scale *= 2;
1456effe57d3Skardel 
1457769d33c5Ssimonb 		bt.sec = 0;
1458769d33c5Ssimonb 		bt.frac = 0;
1459769d33c5Ssimonb 		bintime_addx(&bt, scale * tcount);
1460769d33c5Ssimonb 		bintime2timespec(&bt, &ts);
1461effe57d3Skardel 
1462effe57d3Skardel #ifdef PPS_DEBUG
1463effe57d3Skardel 		if (ppsdebug & 0x4) {
1464effe57d3Skardel 			struct timespec ts2;
1465effe57d3Skardel 			int64_t df;
1466effe57d3Skardel 
1467effe57d3Skardel 			bintime2timespec(&bt_ref, &ts2);
1468effe57d3Skardel 			df = timespec2ns(&ts);
1469effe57d3Skardel 			if (df > 500000000)
1470effe57d3Skardel 				df -= 1000000000;
1471effe57d3Skardel 			log(LOG_DEBUG, "hardpps: ref_ts=%"PRIi64
1472effe57d3Skardel 			    ".%09"PRIi32", ts=%"PRIi64".%09"PRIi32
1473effe57d3Skardel 			    ", freqdiff=%"PRIi64" ns/s\n",
1474effe57d3Skardel 			    ts2.tv_sec, (int32_t)ts2.tv_nsec,
1475effe57d3Skardel 			    tsp->tv_sec, (int32_t)tsp->tv_nsec,
1476effe57d3Skardel 			    df);
1477effe57d3Skardel 		}
1478effe57d3Skardel #endif
1479effe57d3Skardel 
1480effe57d3Skardel 		hardpps(tsp, timespec2ns(&ts));
1481769d33c5Ssimonb 	}
1482769d33c5Ssimonb #endif
1483769d33c5Ssimonb }
1484769d33c5Ssimonb 
1485769d33c5Ssimonb /*
1486769d33c5Ssimonb  * Timecounters need to be updated every so often to prevent the hardware
1487769d33c5Ssimonb  * counter from overflowing.  Updating also recalculates the cached values
1488769d33c5Ssimonb  * used by the get*() family of functions, so their precision depends on
1489769d33c5Ssimonb  * the update frequency.
1490769d33c5Ssimonb  */
1491769d33c5Ssimonb 
1492769d33c5Ssimonb static int tc_tick;
1493769d33c5Ssimonb 
1494769d33c5Ssimonb void
tc_ticktock(void)1495769d33c5Ssimonb tc_ticktock(void)
1496769d33c5Ssimonb {
1497769d33c5Ssimonb 	static int count;
1498769d33c5Ssimonb 
1499769d33c5Ssimonb 	if (++count < tc_tick)
1500769d33c5Ssimonb 		return;
1501769d33c5Ssimonb 	count = 0;
1502a2249ef7Sad 	mutex_spin_enter(&timecounter_lock);
1503599e5d1cSrin 	if (__predict_false(timecounter_bad != 0)) {
1504541e4662Sad 		/* An existing timecounter has gone bad, pick a new one. */
1505541e4662Sad 		(void)atomic_swap_uint(&timecounter_bad, 0);
1506541e4662Sad 		if (timecounter->tc_quality < 0) {
1507541e4662Sad 			tc_pick();
1508541e4662Sad 		}
1509541e4662Sad 	}
1510769d33c5Ssimonb 	tc_windup();
1511a2249ef7Sad 	mutex_spin_exit(&timecounter_lock);
1512769d33c5Ssimonb }
1513769d33c5Ssimonb 
1514de4337abSkardel void
inittimecounter(void)1515de4337abSkardel inittimecounter(void)
1516769d33c5Ssimonb {
1517769d33c5Ssimonb 	u_int p;
1518769d33c5Ssimonb 
1519ccab995bSkardel 	mutex_init(&timecounter_lock, MUTEX_DEFAULT, IPL_HIGH);
152063baaccbSad 
1521769d33c5Ssimonb 	/*
1522769d33c5Ssimonb 	 * Set the initial timeout to
1523769d33c5Ssimonb 	 * max(1, <approx. number of hardclock ticks in a millisecond>).
1524769d33c5Ssimonb 	 * People should probably not use the sysctl to set the timeout
1525a5effc3cSmsaitoh 	 * to smaller than its initial value, since that value is the
1526769d33c5Ssimonb 	 * smallest reasonable one.  If they want better timestamps they
1527769d33c5Ssimonb 	 * should use the non-"get"* functions.
1528769d33c5Ssimonb 	 */
1529769d33c5Ssimonb 	if (hz > 1000)
1530769d33c5Ssimonb 		tc_tick = (hz + 500) / 1000;
1531769d33c5Ssimonb 	else
1532769d33c5Ssimonb 		tc_tick = 1;
1533769d33c5Ssimonb 	p = (tc_tick * 1000000) / hz;
1534b07ec3fcSad 	aprint_verbose("timecounter: Timecounters tick every %d.%03u msec\n",
1535b07ec3fcSad 	    p / 1000, p % 1000);
1536769d33c5Ssimonb 
1537769d33c5Ssimonb 	/* warm up new timecounter (again) and get rolling. */
1538769d33c5Ssimonb 	(void)timecounter->tc_get_timecount(timecounter);
1539769d33c5Ssimonb 	(void)timecounter->tc_get_timecount(timecounter);
1540769d33c5Ssimonb }
1541