1*97c55bccScheloha /* $OpenBSD: kern_tc.c,v 1.69 2020/09/16 00:00:40 cheloha Exp $ */ 2a515b5bdSbeck 33c7a1782Sbeck /* 43c7a1782Sbeck * Copyright (c) 2000 Poul-Henning Kamp <phk@FreeBSD.org> 53751347eStholo * 63c7a1782Sbeck * Permission to use, copy, modify, and distribute this software for any 73c7a1782Sbeck * purpose with or without fee is hereby granted, provided that the above 83c7a1782Sbeck * copyright notice and this permission notice appear in all copies. 93c7a1782Sbeck * 103c7a1782Sbeck * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 113c7a1782Sbeck * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 123c7a1782Sbeck * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 133c7a1782Sbeck * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 143c7a1782Sbeck * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 153c7a1782Sbeck * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 163c7a1782Sbeck * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 173c7a1782Sbeck */ 183c7a1782Sbeck 193c7a1782Sbeck /* 203c7a1782Sbeck * If we meet some day, and you think this stuff is worth it, you 213c7a1782Sbeck * can buy me a beer in return. Poul-Henning Kamp 223751347eStholo */ 233751347eStholo 243751347eStholo #include <sys/param.h> 25a701e5dfSbluhm #include <sys/atomic.h> 263751347eStholo #include <sys/kernel.h> 271d8de610Scheloha #include <sys/mutex.h> 28af3eeb45Scheloha #include <sys/rwlock.h> 2974106511Scheloha #include <sys/stdint.h> 307f58a11fSjsg #include <sys/timeout.h> 313751347eStholo #include <sys/sysctl.h> 323751347eStholo #include <sys/syslog.h> 333751347eStholo #include <sys/systm.h> 343751347eStholo #include <sys/timetc.h> 358334e679Scheloha #include <sys/queue.h> 363751347eStholo #include <sys/malloc.h> 373751347eStholo 385a8003fbSgrange u_int dummy_get_timecount(struct timecounter *); 395a8003fbSgrange 403751347eStholo int sysctl_tc_hardware(void *, size_t *, void *, size_t); 413751347eStholo int sysctl_tc_choice(void *, size_t *, void *, size_t); 423751347eStholo 433751347eStholo /* 443751347eStholo * Implement a dummy timecounter which we can use until we get a real one 453751347eStholo * in the air. This allows the console and other early stuff to use 463751347eStholo * time services. 473751347eStholo */ 483751347eStholo 495a8003fbSgrange u_int 503751347eStholo dummy_get_timecount(struct timecounter *tc) 513751347eStholo { 523751347eStholo static u_int now; 533751347eStholo 54e62bad27Scheloha return atomic_inc_int_nv(&now); 553751347eStholo } 563751347eStholo 573751347eStholo static struct timecounter dummy_timecounter = { 58d82e6535Spirofti dummy_get_timecount, 0, ~0u, 1000000, "dummy", -1000000, NULL, 0 593751347eStholo }; 603751347eStholo 61af3eeb45Scheloha /* 62af3eeb45Scheloha * Locks used to protect struct members, global variables in this file: 63af3eeb45Scheloha * I immutable after initialization 64b609c616Santon * T tc_lock 65b609c616Santon * W windup_mtx 66af3eeb45Scheloha */ 67af3eeb45Scheloha 683751347eStholo struct timehands { 693751347eStholo /* These fields must be initialized by the driver. */ 70b609c616Santon struct timecounter *th_counter; /* [W] */ 71b609c616Santon int64_t th_adjtimedelta; /* [T,W] */ 72fecf25f8Scheloha struct bintime th_next_ntp_update; /* [T,W] */ 73b609c616Santon int64_t th_adjustment; /* [W] */ 74b609c616Santon u_int64_t th_scale; /* [W] */ 75b609c616Santon u_int th_offset_count; /* [W] */ 76b609c616Santon struct bintime th_boottime; /* [T,W] */ 77b609c616Santon struct bintime th_offset; /* [W] */ 78b609c616Santon struct bintime th_naptime; /* [W] */ 79b609c616Santon struct timeval th_microtime; /* [W] */ 80b609c616Santon struct timespec th_nanotime; /* [W] */ 813751347eStholo /* Fields not to be copied in tc_windup start with th_generation. */ 82b609c616Santon volatile u_int th_generation; /* [W] */ 83af3eeb45Scheloha struct timehands *th_next; /* [I] */ 843751347eStholo }; 853751347eStholo 866ab36b32Srobert static struct timehands th0; 8774106511Scheloha static struct timehands th1 = { 8874106511Scheloha .th_next = &th0 8974106511Scheloha }; 903751347eStholo static struct timehands th0 = { 9174106511Scheloha .th_counter = &dummy_timecounter, 9274106511Scheloha .th_scale = UINT64_MAX / 1000000, 9374106511Scheloha .th_offset = { .sec = 1, .frac = 0 }, 9474106511Scheloha .th_generation = 1, 9574106511Scheloha .th_next = &th1 963751347eStholo }; 973751347eStholo 98af3eeb45Scheloha struct rwlock tc_lock = RWLOCK_INITIALIZER("tc_lock"); 99af3eeb45Scheloha 1001d8de610Scheloha /* 1011d8de610Scheloha * tc_windup() must be called before leaving this mutex. 1021d8de610Scheloha */ 103ceab5aefScheloha struct mutex windup_mtx = MUTEX_INITIALIZER(IPL_CLOCK); 1041d8de610Scheloha 105b609c616Santon static struct timehands *volatile timehands = &th0; /* [W] */ 106b609c616Santon struct timecounter *timecounter = &dummy_timecounter; /* [T] */ 1078334e679Scheloha static SLIST_HEAD(, timecounter) tc_list = SLIST_HEAD_INITIALIZER(tc_list); 1083751347eStholo 1090d88cff5Scheloha /* 1100d88cff5Scheloha * These are updated from tc_windup(). They are useful when 1110d88cff5Scheloha * examining kernel core dumps. 1120d88cff5Scheloha */ 113*97c55bccScheloha volatile time_t naptime = 0; 114ead574e1Sart volatile time_t time_second = 1; 115ead574e1Sart volatile time_t time_uptime = 0; 1163751347eStholo 1173751347eStholo static int timestepwarnings; 1183751347eStholo 119c54148e4Scheloha void ntp_update_second(struct timehands *); 120c54148e4Scheloha void tc_windup(struct bintime *, struct bintime *, int64_t *); 1213751347eStholo 1223751347eStholo /* 1233751347eStholo * Return the difference between the timehands' counter value now and what 1243751347eStholo * was when we copied it to the timehands' offset_count. 1253751347eStholo */ 1263751347eStholo static __inline u_int 1273751347eStholo tc_delta(struct timehands *th) 1283751347eStholo { 1293751347eStholo struct timecounter *tc; 1303751347eStholo 1313751347eStholo tc = th->th_counter; 1323751347eStholo return ((tc->tc_get_timecount(tc) - th->th_offset_count) & 1333751347eStholo tc->tc_counter_mask); 1343751347eStholo } 1353751347eStholo 1363751347eStholo /* 1373751347eStholo * Functions for reading the time. We have to loop until we are sure that 1383751347eStholo * the timehands that we operated on was not updated under our feet. See 139fa5a0c50Scheloha * the comment in <sys/time.h> for a description of these functions. 1403751347eStholo */ 1413751347eStholo 1423751347eStholo void 143fa5a0c50Scheloha binboottime(struct bintime *bt) 144fa5a0c50Scheloha { 145fa5a0c50Scheloha struct timehands *th; 146fa5a0c50Scheloha u_int gen; 147fa5a0c50Scheloha 148fa5a0c50Scheloha do { 149fa5a0c50Scheloha th = timehands; 150fa5a0c50Scheloha gen = th->th_generation; 151fa5a0c50Scheloha membar_consumer(); 152fa5a0c50Scheloha *bt = th->th_boottime; 153fa5a0c50Scheloha membar_consumer(); 154fa5a0c50Scheloha } while (gen == 0 || gen != th->th_generation); 155fa5a0c50Scheloha } 156fa5a0c50Scheloha 157fa5a0c50Scheloha void 158fa5a0c50Scheloha microboottime(struct timeval *tvp) 159fa5a0c50Scheloha { 160fa5a0c50Scheloha struct bintime bt; 161fa5a0c50Scheloha 162fa5a0c50Scheloha binboottime(&bt); 16375b45b05Scheloha BINTIME_TO_TIMEVAL(&bt, tvp); 164fa5a0c50Scheloha } 165fa5a0c50Scheloha 166fa5a0c50Scheloha void 16702f434f1Scheloha nanoboottime(struct timespec *tsp) 16802f434f1Scheloha { 16902f434f1Scheloha struct bintime bt; 17002f434f1Scheloha 17102f434f1Scheloha binboottime(&bt); 17202f434f1Scheloha BINTIME_TO_TIMESPEC(&bt, tsp); 17302f434f1Scheloha } 17402f434f1Scheloha 17502f434f1Scheloha void 1763751347eStholo binuptime(struct bintime *bt) 1773751347eStholo { 1783751347eStholo struct timehands *th; 1793751347eStholo u_int gen; 1803751347eStholo 1813751347eStholo do { 1823751347eStholo th = timehands; 1833751347eStholo gen = th->th_generation; 184a701e5dfSbluhm membar_consumer(); 1853751347eStholo *bt = th->th_offset; 18675b45b05Scheloha bintimeaddfrac(bt, th->th_scale * tc_delta(th), bt); 187a701e5dfSbluhm membar_consumer(); 1883751347eStholo } while (gen == 0 || gen != th->th_generation); 1893751347eStholo } 1903751347eStholo 1913751347eStholo void 1923751347eStholo nanouptime(struct timespec *tsp) 1933751347eStholo { 1943751347eStholo struct bintime bt; 1953751347eStholo 1963751347eStholo binuptime(&bt); 19775b45b05Scheloha BINTIME_TO_TIMESPEC(&bt, tsp); 1983751347eStholo } 1993751347eStholo 2003751347eStholo void 2013751347eStholo microuptime(struct timeval *tvp) 2023751347eStholo { 2033751347eStholo struct bintime bt; 2043751347eStholo 2053751347eStholo binuptime(&bt); 20675b45b05Scheloha BINTIME_TO_TIMEVAL(&bt, tvp); 2073751347eStholo } 2083751347eStholo 2098dca5d44Scheloha time_t 2108dca5d44Scheloha getuptime(void) 2118dca5d44Scheloha { 2128dca5d44Scheloha #if defined(__LP64__) 2138dca5d44Scheloha return time_uptime; /* atomic */ 2148dca5d44Scheloha #else 2158dca5d44Scheloha time_t now; 2168dca5d44Scheloha struct timehands *th; 2178dca5d44Scheloha u_int gen; 2188dca5d44Scheloha 2198dca5d44Scheloha do { 2208dca5d44Scheloha th = timehands; 2218dca5d44Scheloha gen = th->th_generation; 2228dca5d44Scheloha membar_consumer(); 2238dca5d44Scheloha now = th->th_offset.sec; 2248dca5d44Scheloha membar_consumer(); 2258dca5d44Scheloha } while (gen == 0 || gen != th->th_generation); 2268dca5d44Scheloha 2278dca5d44Scheloha return now; 2288dca5d44Scheloha #endif 2298dca5d44Scheloha } 2308dca5d44Scheloha 2313751347eStholo void 23287ba7848Scheloha binruntime(struct bintime *bt) 23387ba7848Scheloha { 23487ba7848Scheloha struct timehands *th; 23587ba7848Scheloha u_int gen; 23687ba7848Scheloha 23787ba7848Scheloha do { 23887ba7848Scheloha th = timehands; 23987ba7848Scheloha gen = th->th_generation; 24087ba7848Scheloha membar_consumer(); 24187ba7848Scheloha bintimeaddfrac(&th->th_offset, th->th_scale * tc_delta(th), bt); 24287ba7848Scheloha bintimesub(bt, &th->th_naptime, bt); 24387ba7848Scheloha membar_consumer(); 24487ba7848Scheloha } while (gen == 0 || gen != th->th_generation); 24587ba7848Scheloha } 24687ba7848Scheloha 24787ba7848Scheloha void 24887ba7848Scheloha nanoruntime(struct timespec *ts) 24987ba7848Scheloha { 25087ba7848Scheloha struct bintime bt; 25187ba7848Scheloha 25287ba7848Scheloha binruntime(&bt); 25387ba7848Scheloha BINTIME_TO_TIMESPEC(&bt, ts); 25487ba7848Scheloha } 25587ba7848Scheloha 25687ba7848Scheloha void 2573751347eStholo bintime(struct bintime *bt) 2583751347eStholo { 259fa5a0c50Scheloha struct timehands *th; 260fa5a0c50Scheloha u_int gen; 2613751347eStholo 262fa5a0c50Scheloha do { 263fa5a0c50Scheloha th = timehands; 264fa5a0c50Scheloha gen = th->th_generation; 265fa5a0c50Scheloha membar_consumer(); 266fa5a0c50Scheloha *bt = th->th_offset; 26775b45b05Scheloha bintimeaddfrac(bt, th->th_scale * tc_delta(th), bt); 26875b45b05Scheloha bintimeadd(bt, &th->th_boottime, bt); 269fa5a0c50Scheloha membar_consumer(); 270fa5a0c50Scheloha } while (gen == 0 || gen != th->th_generation); 2713751347eStholo } 2723751347eStholo 2733751347eStholo void 2743751347eStholo nanotime(struct timespec *tsp) 2753751347eStholo { 2763751347eStholo struct bintime bt; 2773751347eStholo 2783751347eStholo bintime(&bt); 27975b45b05Scheloha BINTIME_TO_TIMESPEC(&bt, tsp); 2803751347eStholo } 2813751347eStholo 2823751347eStholo void 2833751347eStholo microtime(struct timeval *tvp) 2843751347eStholo { 2853751347eStholo struct bintime bt; 2863751347eStholo 2873751347eStholo bintime(&bt); 28875b45b05Scheloha BINTIME_TO_TIMEVAL(&bt, tvp); 2893751347eStholo } 2903751347eStholo 2918dca5d44Scheloha time_t 2928dca5d44Scheloha gettime(void) 2938dca5d44Scheloha { 2948dca5d44Scheloha #if defined(__LP64__) 2958dca5d44Scheloha return time_second; /* atomic */ 2968dca5d44Scheloha #else 2978dca5d44Scheloha time_t now; 2988dca5d44Scheloha struct timehands *th; 2998dca5d44Scheloha u_int gen; 3008dca5d44Scheloha 3018dca5d44Scheloha do { 3028dca5d44Scheloha th = timehands; 3038dca5d44Scheloha gen = th->th_generation; 3048dca5d44Scheloha membar_consumer(); 3058dca5d44Scheloha now = th->th_microtime.tv_sec; 3068dca5d44Scheloha membar_consumer(); 3078dca5d44Scheloha } while (gen == 0 || gen != th->th_generation); 3088dca5d44Scheloha 3098dca5d44Scheloha return now; 3108dca5d44Scheloha #endif 3118dca5d44Scheloha } 3128dca5d44Scheloha 3133751347eStholo void 3143751347eStholo getnanouptime(struct timespec *tsp) 3153751347eStholo { 3163751347eStholo struct timehands *th; 3173751347eStholo u_int gen; 3183751347eStholo 3193751347eStholo do { 3203751347eStholo th = timehands; 3213751347eStholo gen = th->th_generation; 322a701e5dfSbluhm membar_consumer(); 32375b45b05Scheloha BINTIME_TO_TIMESPEC(&th->th_offset, tsp); 324a701e5dfSbluhm membar_consumer(); 3253751347eStholo } while (gen == 0 || gen != th->th_generation); 3263751347eStholo } 3273751347eStholo 3283751347eStholo void 3293751347eStholo getmicrouptime(struct timeval *tvp) 3303751347eStholo { 3313751347eStholo struct timehands *th; 3323751347eStholo u_int gen; 3333751347eStholo 3343751347eStholo do { 3353751347eStholo th = timehands; 3363751347eStholo gen = th->th_generation; 337a701e5dfSbluhm membar_consumer(); 33875b45b05Scheloha BINTIME_TO_TIMEVAL(&th->th_offset, tvp); 339a701e5dfSbluhm membar_consumer(); 3403751347eStholo } while (gen == 0 || gen != th->th_generation); 3413751347eStholo } 3423751347eStholo 3433751347eStholo void 3443751347eStholo getnanotime(struct timespec *tsp) 3453751347eStholo { 3463751347eStholo struct timehands *th; 3473751347eStholo u_int gen; 3483751347eStholo 3493751347eStholo do { 3503751347eStholo th = timehands; 3513751347eStholo gen = th->th_generation; 352a701e5dfSbluhm membar_consumer(); 3533751347eStholo *tsp = th->th_nanotime; 354a701e5dfSbluhm membar_consumer(); 3553751347eStholo } while (gen == 0 || gen != th->th_generation); 3563751347eStholo } 3573751347eStholo 3583751347eStholo void 3593751347eStholo getmicrotime(struct timeval *tvp) 3603751347eStholo { 3613751347eStholo struct timehands *th; 3623751347eStholo u_int gen; 3633751347eStholo 3643751347eStholo do { 3653751347eStholo th = timehands; 3663751347eStholo gen = th->th_generation; 367a701e5dfSbluhm membar_consumer(); 3683751347eStholo *tvp = th->th_microtime; 369a701e5dfSbluhm membar_consumer(); 3703751347eStholo } while (gen == 0 || gen != th->th_generation); 3713751347eStholo } 3723751347eStholo 3733751347eStholo /* 3743751347eStholo * Initialize a new timecounter and possibly use it. 3753751347eStholo */ 3763751347eStholo void 3773751347eStholo tc_init(struct timecounter *tc) 3783751347eStholo { 379875f2e32Scheloha u_int64_t tmp; 3805a8003fbSgrange u_int u; 3813751347eStholo 3823751347eStholo u = tc->tc_frequency / tc->tc_counter_mask; 3833751347eStholo /* XXX: We need some margin here, 10% is a guess */ 3843751347eStholo u *= 11; 3853751347eStholo u /= 10; 3863751347eStholo if (tc->tc_quality >= 0) { 3873751347eStholo if (u > hz) { 3883751347eStholo tc->tc_quality = -2000; 389ead574e1Sart printf("Timecounter \"%s\" frequency %lu Hz", 390ead574e1Sart tc->tc_name, (unsigned long)tc->tc_frequency); 3913751347eStholo printf(" -- Insufficient hz, needs at least %u\n", u); 3923751347eStholo } 3933751347eStholo } 3943751347eStholo 395875f2e32Scheloha /* Determine the counter's precision. */ 396875f2e32Scheloha for (tmp = 1; (tmp & tc->tc_counter_mask) == 0; tmp <<= 1) 397875f2e32Scheloha continue; 398875f2e32Scheloha tc->tc_precision = tmp; 399875f2e32Scheloha 4008334e679Scheloha SLIST_INSERT_HEAD(&tc_list, tc, tc_next); 4018334e679Scheloha 4023751347eStholo /* 4033751347eStholo * Never automatically use a timecounter with negative quality. 4043751347eStholo * Even though we run on the dummy counter, switching here may be 4051b6d31a4Sguenther * worse since this timecounter may not be monotonic. 4063751347eStholo */ 4073751347eStholo if (tc->tc_quality < 0) 4083751347eStholo return; 4093751347eStholo if (tc->tc_quality < timecounter->tc_quality) 4103751347eStholo return; 4113751347eStholo if (tc->tc_quality == timecounter->tc_quality && 4123751347eStholo tc->tc_frequency < timecounter->tc_frequency) 4133751347eStholo return; 4143751347eStholo (void)tc->tc_get_timecount(tc); 4159e9abf5bSjasper enqueue_randomness(tc->tc_get_timecount(tc)); 4169d4264a7Sderaadt 4173751347eStholo timecounter = tc; 4183751347eStholo } 4193751347eStholo 4203751347eStholo /* Report the frequency of the current timecounter. */ 4213751347eStholo u_int64_t 4223751347eStholo tc_getfrequency(void) 4233751347eStholo { 4243751347eStholo return (timehands->th_counter->tc_frequency); 4253751347eStholo } 4263751347eStholo 427875f2e32Scheloha /* Report the precision of the current timecounter. */ 428875f2e32Scheloha u_int64_t 429875f2e32Scheloha tc_getprecision(void) 430875f2e32Scheloha { 431875f2e32Scheloha return (timehands->th_counter->tc_precision); 432875f2e32Scheloha } 433875f2e32Scheloha 4343751347eStholo /* 4351b6d31a4Sguenther * Step our concept of UTC, aka the realtime clock. 4361b6d31a4Sguenther * This is done by modifying our estimate of when we booted. 437c54148e4Scheloha * 438c54148e4Scheloha * Any ongoing adjustment is meaningless after a clock jump, 439c54148e4Scheloha * so we zero adjtimedelta here as well. 4403751347eStholo */ 4413751347eStholo void 44224421defSguenther tc_setrealtimeclock(const struct timespec *ts) 4433751347eStholo { 4441fb8cdb7Scheloha struct bintime boottime, old_utc, uptime, utc; 4451fb8cdb7Scheloha struct timespec tmp; 446c54148e4Scheloha int64_t zero = 0; 4473751347eStholo 4481fb8cdb7Scheloha TIMESPEC_TO_BINTIME(ts, &utc); 4491fb8cdb7Scheloha 450af3eeb45Scheloha rw_enter_write(&tc_lock); 451ceab5aefScheloha mtx_enter(&windup_mtx); 4523c2e3f4bScheloha 4531fb8cdb7Scheloha binuptime(&uptime); 4541fb8cdb7Scheloha bintimesub(&utc, &uptime, &boottime); 4551fb8cdb7Scheloha bintimeadd(&timehands->th_boottime, &uptime, &old_utc); 4563751347eStholo /* XXX fiddle all the little crinkly bits around the fiords... */ 4571fb8cdb7Scheloha tc_windup(&boottime, NULL, &zero); 4581fb8cdb7Scheloha 459ceab5aefScheloha mtx_leave(&windup_mtx); 460af3eeb45Scheloha rw_exit_write(&tc_lock); 4611d8de610Scheloha 4621d8de610Scheloha enqueue_randomness(ts->tv_sec); 4631d8de610Scheloha 4643751347eStholo if (timestepwarnings) { 4651fb8cdb7Scheloha BINTIME_TO_TIMESPEC(&old_utc, &tmp); 4667952239bSguenther log(LOG_INFO, "Time stepped from %lld.%09ld to %lld.%09ld\n", 4671fb8cdb7Scheloha (long long)tmp.tv_sec, tmp.tv_nsec, 4687952239bSguenther (long long)ts->tv_sec, ts->tv_nsec); 4693751347eStholo } 4703751347eStholo } 4713751347eStholo 4723751347eStholo /* 4731b6d31a4Sguenther * Step the monotonic and realtime clocks, triggering any timeouts that 4741b6d31a4Sguenther * should have occurred across the interval. 4751b6d31a4Sguenther */ 4761b6d31a4Sguenther void 47724421defSguenther tc_setclock(const struct timespec *ts) 4781b6d31a4Sguenther { 479*97c55bccScheloha struct bintime new_naptime, old_naptime, uptime, utc; 4801fb8cdb7Scheloha struct timespec tmp; 481fa5a0c50Scheloha static int first = 1; 4821b6d31a4Sguenther #ifndef SMALL_KERNEL 4836e581dd8Sderaadt struct bintime elapsed; 4841b6d31a4Sguenther long long adj_ticks; 4851b6d31a4Sguenther #endif 4861b6d31a4Sguenther 4871b6d31a4Sguenther /* 4881b6d31a4Sguenther * When we're called for the first time, during boot when 489fa5a0c50Scheloha * the root partition is mounted, we need to set boottime. 4901b6d31a4Sguenther */ 491fa5a0c50Scheloha if (first) { 4921b6d31a4Sguenther tc_setrealtimeclock(ts); 493fa5a0c50Scheloha first = 0; 4941b6d31a4Sguenther return; 4951b6d31a4Sguenther } 4961b6d31a4Sguenther 4979e9abf5bSjasper enqueue_randomness(ts->tv_sec); 4981b6d31a4Sguenther 4991fb8cdb7Scheloha TIMESPEC_TO_BINTIME(ts, &utc); 5001fb8cdb7Scheloha 501ceab5aefScheloha mtx_enter(&windup_mtx); 5021fb8cdb7Scheloha 5031fb8cdb7Scheloha bintimesub(&utc, &timehands->th_boottime, &uptime); 50487ba7848Scheloha old_naptime = timehands->th_naptime; 505e98df54aScheloha /* XXX fiddle all the little crinkly bits around the fiords... */ 5061fb8cdb7Scheloha tc_windup(NULL, &uptime, NULL); 507*97c55bccScheloha new_naptime = timehands->th_naptime; 5081fb8cdb7Scheloha 509ceab5aefScheloha mtx_leave(&windup_mtx); 510e98df54aScheloha 511*97c55bccScheloha if (bintimecmp(&old_naptime, &new_naptime, ==)) { 5121fb8cdb7Scheloha BINTIME_TO_TIMESPEC(&uptime, &tmp); 513e12a049bScheloha printf("%s: cannot rewind uptime to %lld.%09ld\n", 5141fb8cdb7Scheloha __func__, (long long)tmp.tv_sec, tmp.tv_nsec); 515e12a049bScheloha } 516e12a049bScheloha 5171b6d31a4Sguenther #ifndef SMALL_KERNEL 5181b6d31a4Sguenther /* convert the bintime to ticks */ 519*97c55bccScheloha bintimesub(&new_naptime, &old_naptime, &elapsed); 5201fb8cdb7Scheloha adj_ticks = (uint64_t)hz * elapsed.sec + 5211fb8cdb7Scheloha (((uint64_t)1000000 * (uint32_t)(elapsed.frac >> 32)) >> 32) / tick; 5221b6d31a4Sguenther if (adj_ticks > 0) { 5231b6d31a4Sguenther if (adj_ticks > INT_MAX) 5241b6d31a4Sguenther adj_ticks = INT_MAX; 525a40acd8aScheloha timeout_adjust_ticks(adj_ticks); 5261b6d31a4Sguenther } 5271b6d31a4Sguenther #endif 5281b6d31a4Sguenther } 5291b6d31a4Sguenther 530d82e6535Spirofti void 531d82e6535Spirofti tc_update_timekeep(void) 532d82e6535Spirofti { 533d82e6535Spirofti static struct timecounter *last_tc = NULL; 534d82e6535Spirofti struct timehands *th; 535d82e6535Spirofti 53604cecb01Scheloha MUTEX_ASSERT_LOCKED(&windup_mtx); 53704cecb01Scheloha 538d82e6535Spirofti if (timekeep == NULL) 539d82e6535Spirofti return; 540d82e6535Spirofti 541d82e6535Spirofti th = timehands; 542d82e6535Spirofti timekeep->tk_generation = 0; 543d82e6535Spirofti membar_producer(); 544d82e6535Spirofti timekeep->tk_scale = th->th_scale; 545d82e6535Spirofti timekeep->tk_offset_count = th->th_offset_count; 546d82e6535Spirofti timekeep->tk_offset = th->th_offset; 547d82e6535Spirofti timekeep->tk_naptime = th->th_naptime; 548d82e6535Spirofti timekeep->tk_boottime = th->th_boottime; 549d82e6535Spirofti if (last_tc != th->th_counter) { 550d82e6535Spirofti timekeep->tk_counter_mask = th->th_counter->tc_counter_mask; 551d82e6535Spirofti timekeep->tk_user = th->th_counter->tc_user; 552d82e6535Spirofti last_tc = th->th_counter; 553d82e6535Spirofti } 554d82e6535Spirofti membar_producer(); 555d82e6535Spirofti timekeep->tk_generation = th->th_generation; 556d82e6535Spirofti 557d82e6535Spirofti return; 558d82e6535Spirofti } 559d82e6535Spirofti 5601b6d31a4Sguenther /* 5613751347eStholo * Initialize the next struct timehands in the ring and make 5623751347eStholo * it the active timehands. Along the way we might switch to a different 5633751347eStholo * timecounter and/or do seconds processing in NTP. Slightly magic. 5643751347eStholo */ 5655a8003fbSgrange void 566c54148e4Scheloha tc_windup(struct bintime *new_boottime, struct bintime *new_offset, 567c54148e4Scheloha int64_t *new_adjtimedelta) 5683751347eStholo { 5693751347eStholo struct bintime bt; 570827d5adbScheloha struct timecounter *active_tc; 5713751347eStholo struct timehands *th, *tho; 5723751347eStholo u_int64_t scale; 5733751347eStholo u_int delta, ncount, ogen; 5743751347eStholo 575af3eeb45Scheloha if (new_boottime != NULL || new_adjtimedelta != NULL) 576af3eeb45Scheloha rw_assert_wrlock(&tc_lock); 577ceab5aefScheloha MUTEX_ASSERT_LOCKED(&windup_mtx); 5781d8de610Scheloha 579827d5adbScheloha active_tc = timecounter; 580827d5adbScheloha 5813751347eStholo /* 5823751347eStholo * Make the next timehands a copy of the current one, but do not 5833751347eStholo * overwrite the generation or next pointer. While we update 5843751347eStholo * the contents, the generation must be zero. 5853751347eStholo */ 5863751347eStholo tho = timehands; 58763cc33c4Sgkoehler ogen = tho->th_generation; 5883751347eStholo th = tho->th_next; 5893751347eStholo th->th_generation = 0; 590a701e5dfSbluhm membar_producer(); 5912955d5bcStedu memcpy(th, tho, offsetof(struct timehands, th_generation)); 5923751347eStholo 5933751347eStholo /* 5943751347eStholo * Capture a timecounter delta on the current timecounter and if 5953751347eStholo * changing timecounters, a counter value from the new timecounter. 5963751347eStholo * Update the offset fields accordingly. 5973751347eStholo */ 5983751347eStholo delta = tc_delta(th); 599827d5adbScheloha if (th->th_counter != active_tc) 600827d5adbScheloha ncount = active_tc->tc_get_timecount(active_tc); 6013751347eStholo else 6023751347eStholo ncount = 0; 6033751347eStholo th->th_offset_count += delta; 6043751347eStholo th->th_offset_count &= th->th_counter->tc_counter_mask; 60575b45b05Scheloha bintimeaddfrac(&th->th_offset, th->th_scale * delta, &th->th_offset); 6063751347eStholo 60787ba7848Scheloha /* 60887ba7848Scheloha * Ignore new offsets that predate the current offset. 60987ba7848Scheloha * If changing the offset, first increase the naptime 61087ba7848Scheloha * accordingly. 61187ba7848Scheloha */ 61287ba7848Scheloha if (new_offset != NULL && bintimecmp(&th->th_offset, new_offset, <)) { 61387ba7848Scheloha bintimesub(new_offset, &th->th_offset, &bt); 61487ba7848Scheloha bintimeadd(&th->th_naptime, &bt, &th->th_naptime); 615*97c55bccScheloha naptime = th->th_naptime.sec; 61687ba7848Scheloha th->th_offset = *new_offset; 61787ba7848Scheloha } 61887ba7848Scheloha 6193751347eStholo #ifdef notyet 6203751347eStholo /* 6213751347eStholo * Hardware latching timecounters may not generate interrupts on 6223751347eStholo * PPS events, so instead we poll them. There is a finite risk that 6233751347eStholo * the hardware might capture a count which is later than the one we 6243751347eStholo * got above, and therefore possibly in the next NTP second which might 6253751347eStholo * have a different rate than the current NTP second. It doesn't 6263751347eStholo * matter in practice. 6273751347eStholo */ 6283751347eStholo if (tho->th_counter->tc_poll_pps) 6293751347eStholo tho->th_counter->tc_poll_pps(tho->th_counter); 6303751347eStholo #endif 6313751347eStholo 6323751347eStholo /* 633c54148e4Scheloha * If changing the boot time or clock adjustment, do so before 634c54148e4Scheloha * NTP processing. 6357c21e1f3Scheloha */ 636c54148e4Scheloha if (new_boottime != NULL) 6377c21e1f3Scheloha th->th_boottime = *new_boottime; 638fecf25f8Scheloha if (new_adjtimedelta != NULL) { 639c54148e4Scheloha th->th_adjtimedelta = *new_adjtimedelta; 640fecf25f8Scheloha /* Reset the NTP update period. */ 641fecf25f8Scheloha bintimesub(&th->th_offset, &th->th_naptime, 642fecf25f8Scheloha &th->th_next_ntp_update); 643fecf25f8Scheloha } 6447c21e1f3Scheloha 6457c21e1f3Scheloha /* 646fecf25f8Scheloha * Deal with NTP second processing. The while-loop normally 6473751347eStholo * iterates at most once, but in extreme situations it might 648fecf25f8Scheloha * keep NTP sane if tc_windup() is not run for several seconds. 6493751347eStholo */ 650fecf25f8Scheloha bintimesub(&th->th_offset, &th->th_naptime, &bt); 651fecf25f8Scheloha while (bintimecmp(&th->th_next_ntp_update, &bt, <=)) { 652c54148e4Scheloha ntp_update_second(th); 653fecf25f8Scheloha th->th_next_ntp_update.sec++; 654fecf25f8Scheloha } 6553751347eStholo 6563751347eStholo /* Update the UTC timestamps used by the get*() functions. */ 657fecf25f8Scheloha bintimeadd(&th->th_boottime, &th->th_offset, &bt); 65875b45b05Scheloha BINTIME_TO_TIMEVAL(&bt, &th->th_microtime); 65975b45b05Scheloha BINTIME_TO_TIMESPEC(&bt, &th->th_nanotime); 6603751347eStholo 6613751347eStholo /* Now is a good time to change timecounters. */ 662827d5adbScheloha if (th->th_counter != active_tc) { 663827d5adbScheloha th->th_counter = active_tc; 6643751347eStholo th->th_offset_count = ncount; 6653751347eStholo } 6663751347eStholo 6673751347eStholo /*- 6683751347eStholo * Recalculate the scaling factor. We want the number of 1/2^64 6693751347eStholo * fractions of a second per period of the hardware counter, taking 6703751347eStholo * into account the th_adjustment factor which the NTP PLL/adjtime(2) 6713751347eStholo * processing provides us with. 6723751347eStholo * 6733751347eStholo * The th_adjustment is nanoseconds per second with 32 bit binary 6743751347eStholo * fraction and we want 64 bit binary fraction of second: 6753751347eStholo * 6763751347eStholo * x = a * 2^32 / 10^9 = a * 4.294967296 6773751347eStholo * 6783751347eStholo * The range of th_adjustment is +/- 5000PPM so inside a 64bit int 6793751347eStholo * we can only multiply by about 850 without overflowing, but that 6803751347eStholo * leaves suitably precise fractions for multiply before divide. 6813751347eStholo * 6823751347eStholo * Divide before multiply with a fraction of 2199/512 results in a 6833751347eStholo * systematic undercompensation of 10PPM of th_adjustment. On a 6843751347eStholo * 5000PPM adjustment this is a 0.05PPM error. This is acceptable. 6853751347eStholo * 6863751347eStholo * We happily sacrifice the lowest of the 64 bits of our result 6873751347eStholo * to the goddess of code clarity. 6883751347eStholo * 6893751347eStholo */ 6903751347eStholo scale = (u_int64_t)1 << 63; 6919098a9c7Scheloha scale += \ 6929098a9c7Scheloha ((th->th_adjustment + th->th_counter->tc_freq_adj) / 1024) * 2199; 6933751347eStholo scale /= th->th_counter->tc_frequency; 6943751347eStholo th->th_scale = scale * 2; 6953751347eStholo 6963751347eStholo /* 6973751347eStholo * Now that the struct timehands is again consistent, set the new 6983751347eStholo * generation number, making sure to not make it zero. 6993751347eStholo */ 7003751347eStholo if (++ogen == 0) 7013751347eStholo ogen = 1; 702a701e5dfSbluhm membar_producer(); 7033751347eStholo th->th_generation = ogen; 7043751347eStholo 7053751347eStholo /* Go live with the new struct timehands. */ 7063751347eStholo time_second = th->th_microtime.tv_sec; 7073751347eStholo time_uptime = th->th_offset.sec; 708a701e5dfSbluhm membar_producer(); 7093751347eStholo timehands = th; 710d82e6535Spirofti 711d82e6535Spirofti tc_update_timekeep(); 7123751347eStholo } 7133751347eStholo 7143751347eStholo /* Report or change the active timecounter hardware. */ 7153751347eStholo int 7163751347eStholo sysctl_tc_hardware(void *oldp, size_t *oldlenp, void *newp, size_t newlen) 7173751347eStholo { 7183751347eStholo char newname[32]; 7193751347eStholo struct timecounter *newtc, *tc; 7203751347eStholo int error; 7213751347eStholo 7223751347eStholo tc = timecounter; 7233751347eStholo strlcpy(newname, tc->tc_name, sizeof(newname)); 7243751347eStholo 7253751347eStholo error = sysctl_string(oldp, oldlenp, newp, newlen, newname, sizeof(newname)); 7263751347eStholo if (error != 0 || strcmp(newname, tc->tc_name) == 0) 7273751347eStholo return (error); 7288334e679Scheloha SLIST_FOREACH(newtc, &tc_list, tc_next) { 7293751347eStholo if (strcmp(newname, newtc->tc_name) != 0) 7303751347eStholo continue; 7313751347eStholo 7323751347eStholo /* Warm up new timecounter. */ 7333751347eStholo (void)newtc->tc_get_timecount(newtc); 7343751347eStholo (void)newtc->tc_get_timecount(newtc); 7353751347eStholo 736af3eeb45Scheloha rw_enter_write(&tc_lock); 7373751347eStholo timecounter = newtc; 738af3eeb45Scheloha rw_exit_write(&tc_lock); 739af3eeb45Scheloha 7403751347eStholo return (0); 7413751347eStholo } 7423751347eStholo return (EINVAL); 7433751347eStholo } 7443751347eStholo 7453751347eStholo /* Report or change the active timecounter hardware. */ 7463751347eStholo int 7473751347eStholo sysctl_tc_choice(void *oldp, size_t *oldlenp, void *newp, size_t newlen) 7483751347eStholo { 7493751347eStholo char buf[32], *spc, *choices; 7503751347eStholo struct timecounter *tc; 7510816f330Sderaadt int error, maxlen; 7523751347eStholo 7538334e679Scheloha if (SLIST_EMPTY(&tc_list)) 75408e05d41Scheloha return (sysctl_rdstring(oldp, oldlenp, newp, "")); 75508e05d41Scheloha 7563751347eStholo spc = ""; 7573751347eStholo maxlen = 0; 7588334e679Scheloha SLIST_FOREACH(tc, &tc_list, tc_next) 7593751347eStholo maxlen += sizeof(buf); 7603751347eStholo choices = malloc(maxlen, M_TEMP, M_WAITOK); 7613751347eStholo *choices = '\0'; 7628334e679Scheloha SLIST_FOREACH(tc, &tc_list, tc_next) { 7630816f330Sderaadt snprintf(buf, sizeof(buf), "%s%s(%d)", 7643751347eStholo spc, tc->tc_name, tc->tc_quality); 7653751347eStholo spc = " "; 7663751347eStholo strlcat(choices, buf, maxlen); 7673751347eStholo } 7683751347eStholo error = sysctl_rdstring(oldp, oldlenp, newp, choices); 769fc62de09Stedu free(choices, M_TEMP, maxlen); 7703751347eStholo return (error); 7713751347eStholo } 7723751347eStholo 7733751347eStholo /* 7743751347eStholo * Timecounters need to be updated every so often to prevent the hardware 7753751347eStholo * counter from overflowing. Updating also recalculates the cached values 7763751347eStholo * used by the get*() family of functions, so their precision depends on 7773751347eStholo * the update frequency. 7783751347eStholo */ 7793751347eStholo static int tc_tick; 7803751347eStholo 7813751347eStholo void 7823751347eStholo tc_ticktock(void) 7833751347eStholo { 7843751347eStholo static int count; 7853751347eStholo 7863751347eStholo if (++count < tc_tick) 7873751347eStholo return; 788ceab5aefScheloha if (!mtx_enter_try(&windup_mtx)) 7891d8de610Scheloha return; 7903751347eStholo count = 0; 791c54148e4Scheloha tc_windup(NULL, NULL, NULL); 792ceab5aefScheloha mtx_leave(&windup_mtx); 7933751347eStholo } 7943751347eStholo 7953751347eStholo void 7963751347eStholo inittimecounter(void) 7973751347eStholo { 7984c33e0a9Sderaadt #ifdef DEBUG 7993751347eStholo u_int p; 8004c33e0a9Sderaadt #endif 8013751347eStholo 8023751347eStholo /* 8033751347eStholo * Set the initial timeout to 8043751347eStholo * max(1, <approx. number of hardclock ticks in a millisecond>). 8053751347eStholo * People should probably not use the sysctl to set the timeout 806a3c911baSschwarze * to smaller than its initial value, since that value is the 8073751347eStholo * smallest reasonable one. If they want better timestamps they 8083751347eStholo * should use the non-"get"* functions. 8093751347eStholo */ 8103751347eStholo if (hz > 1000) 8113751347eStholo tc_tick = (hz + 500) / 1000; 8123751347eStholo else 8133751347eStholo tc_tick = 1; 814ead574e1Sart #ifdef DEBUG 8154c33e0a9Sderaadt p = (tc_tick * 1000000) / hz; 8163751347eStholo printf("Timecounters tick every %d.%03u msec\n", p / 1000, p % 1000); 817ead574e1Sart #endif 8183751347eStholo 8193751347eStholo /* warm up new timecounter (again) and get rolling. */ 8203751347eStholo (void)timecounter->tc_get_timecount(timecounter); 8213751347eStholo (void)timecounter->tc_get_timecount(timecounter); 8223751347eStholo } 8233751347eStholo 8243751347eStholo /* 8253751347eStholo * Return timecounter-related information. 8263751347eStholo */ 8273751347eStholo int 8283751347eStholo sysctl_tc(int *name, u_int namelen, void *oldp, size_t *oldlenp, 8293751347eStholo void *newp, size_t newlen) 8303751347eStholo { 8313751347eStholo if (namelen != 1) 8323751347eStholo return (ENOTDIR); 8333751347eStholo 8343751347eStholo switch (name[0]) { 8353751347eStholo case KERN_TIMECOUNTER_TICK: 8363751347eStholo return (sysctl_rdint(oldp, oldlenp, newp, tc_tick)); 8373751347eStholo case KERN_TIMECOUNTER_TIMESTEPWARNINGS: 8383751347eStholo return (sysctl_int(oldp, oldlenp, newp, newlen, 8393751347eStholo ×tepwarnings)); 8403751347eStholo case KERN_TIMECOUNTER_HARDWARE: 8413751347eStholo return (sysctl_tc_hardware(oldp, oldlenp, newp, newlen)); 8423751347eStholo case KERN_TIMECOUNTER_CHOICE: 8433751347eStholo return (sysctl_tc_choice(oldp, oldlenp, newp, newlen)); 8443751347eStholo default: 8453751347eStholo return (EOPNOTSUPP); 8463751347eStholo } 8473751347eStholo /* NOTREACHED */ 8483751347eStholo } 8493751347eStholo 850c54148e4Scheloha /* 8519098a9c7Scheloha * Skew the timehands according to any adjtime(2) adjustment. 852c54148e4Scheloha */ 8533751347eStholo void 854c54148e4Scheloha ntp_update_second(struct timehands *th) 8553751347eStholo { 856a1745eadSkettenis int64_t adj; 8573751347eStholo 858c54148e4Scheloha MUTEX_ASSERT_LOCKED(&windup_mtx); 859c54148e4Scheloha 860c54148e4Scheloha if (th->th_adjtimedelta > 0) 861c54148e4Scheloha adj = MIN(5000, th->th_adjtimedelta); 862a1745eadSkettenis else 863c54148e4Scheloha adj = MAX(-5000, th->th_adjtimedelta); 864c54148e4Scheloha th->th_adjtimedelta -= adj; 865c54148e4Scheloha th->th_adjustment = (adj * 1000) << 32; 86617f73788Sotto } 86717f73788Sotto 868af3eeb45Scheloha void 86917f73788Sotto tc_adjfreq(int64_t *old, int64_t *new) 87017f73788Sotto { 87117f73788Sotto if (old != NULL) { 872af3eeb45Scheloha rw_assert_anylock(&tc_lock); 87317f73788Sotto *old = timecounter->tc_freq_adj; 87417f73788Sotto } 87517f73788Sotto if (new != NULL) { 876af3eeb45Scheloha rw_assert_wrlock(&tc_lock); 877af3eeb45Scheloha mtx_enter(&windup_mtx); 87817f73788Sotto timecounter->tc_freq_adj = *new; 879af3eeb45Scheloha tc_windup(NULL, NULL, NULL); 880af3eeb45Scheloha mtx_leave(&windup_mtx); 88117f73788Sotto } 8823751347eStholo } 8833c2e3f4bScheloha 8843c2e3f4bScheloha void 8853c2e3f4bScheloha tc_adjtime(int64_t *old, int64_t *new) 8863c2e3f4bScheloha { 887c54148e4Scheloha struct timehands *th; 888c54148e4Scheloha u_int gen; 889c54148e4Scheloha 890c54148e4Scheloha if (old != NULL) { 891c54148e4Scheloha do { 892c54148e4Scheloha th = timehands; 893c54148e4Scheloha gen = th->th_generation; 894c54148e4Scheloha membar_consumer(); 895c54148e4Scheloha *old = th->th_adjtimedelta; 896c54148e4Scheloha membar_consumer(); 897c54148e4Scheloha } while (gen == 0 || gen != th->th_generation); 898c54148e4Scheloha } 899c54148e4Scheloha if (new != NULL) { 900af3eeb45Scheloha rw_assert_wrlock(&tc_lock); 901c54148e4Scheloha mtx_enter(&windup_mtx); 902c54148e4Scheloha tc_windup(NULL, NULL, new); 903c54148e4Scheloha mtx_leave(&windup_mtx); 904c54148e4Scheloha } 9053c2e3f4bScheloha } 906