1 /* $NetBSD: kern_tc.c,v 1.47 2017/06/09 01:16:33 chs Exp $ */ 2 3 /*- 4 * Copyright (c) 2008, 2009 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Andrew Doran. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /*- 33 * ---------------------------------------------------------------------------- 34 * "THE BEER-WARE LICENSE" (Revision 42): 35 * <phk@FreeBSD.ORG> wrote this file. As long as you retain this notice you 36 * can do whatever you want with this stuff. If we meet some day, and you think 37 * this stuff is worth it, you can buy me a beer in return. Poul-Henning Kamp 38 * --------------------------------------------------------------------------- 39 */ 40 41 #include <sys/cdefs.h> 42 /* __FBSDID("$FreeBSD: src/sys/kern/kern_tc.c,v 1.166 2005/09/19 22:16:31 andre Exp $"); */ 43 __KERNEL_RCSID(0, "$NetBSD: kern_tc.c,v 1.47 2017/06/09 01:16:33 chs Exp $"); 44 45 #ifdef _KERNEL_OPT 46 #include "opt_ntp.h" 47 #endif 48 49 #include <sys/param.h> 50 #include <sys/kernel.h> 51 #include <sys/reboot.h> /* XXX just to get AB_VERBOSE */ 52 #include <sys/sysctl.h> 53 #include <sys/syslog.h> 54 #include <sys/systm.h> 55 #include <sys/timepps.h> 56 #include <sys/timetc.h> 57 #include <sys/timex.h> 58 #include <sys/evcnt.h> 59 #include <sys/kauth.h> 60 #include <sys/mutex.h> 61 #include <sys/atomic.h> 62 #include <sys/xcall.h> 63 64 /* 65 * A large step happens on boot. This constant detects such steps. 66 * It is relatively small so that ntp_update_second gets called enough 67 * in the typical 'missed a couple of seconds' case, but doesn't loop 68 * forever when the time step is large. 69 */ 70 #define LARGE_STEP 200 71 72 /* 73 * Implement a dummy timecounter which we can use until we get a real one 74 * in the air. This allows the console and other early stuff to use 75 * time services. 76 */ 77 78 static u_int 79 dummy_get_timecount(struct timecounter *tc) 80 { 81 static u_int now; 82 83 return (++now); 84 } 85 86 static struct timecounter dummy_timecounter = { 87 dummy_get_timecount, 0, ~0u, 1000000, "dummy", -1000000, NULL, NULL, 88 }; 89 90 struct timehands { 91 /* These fields must be initialized by the driver. */ 92 struct timecounter *th_counter; /* active timecounter */ 93 int64_t th_adjustment; /* frequency adjustment */ 94 /* (NTP/adjtime) */ 95 u_int64_t th_scale; /* scale factor (counter */ 96 /* tick->time) */ 97 u_int64_t th_offset_count; /* offset at last time */ 98 /* update (tc_windup()) */ 99 struct bintime th_offset; /* bin (up)time at windup */ 100 struct timeval th_microtime; /* cached microtime */ 101 struct timespec th_nanotime; /* cached nanotime */ 102 /* Fields not to be copied in tc_windup start with th_generation. */ 103 volatile u_int th_generation; /* current genration */ 104 struct timehands *th_next; /* next timehand */ 105 }; 106 107 static struct timehands th0; 108 static struct timehands th9 = { .th_next = &th0, }; 109 static struct timehands th8 = { .th_next = &th9, }; 110 static struct timehands th7 = { .th_next = &th8, }; 111 static struct timehands th6 = { .th_next = &th7, }; 112 static struct timehands th5 = { .th_next = &th6, }; 113 static struct timehands th4 = { .th_next = &th5, }; 114 static struct timehands th3 = { .th_next = &th4, }; 115 static struct timehands th2 = { .th_next = &th3, }; 116 static struct timehands th1 = { .th_next = &th2, }; 117 static struct timehands th0 = { 118 .th_counter = &dummy_timecounter, 119 .th_scale = (uint64_t)-1 / 1000000, 120 .th_offset = { .sec = 1, .frac = 0 }, 121 .th_generation = 1, 122 .th_next = &th1, 123 }; 124 125 static struct timehands *volatile timehands = &th0; 126 struct timecounter *timecounter = &dummy_timecounter; 127 static struct timecounter *timecounters = &dummy_timecounter; 128 129 volatile time_t time_second = 1; 130 volatile time_t time_uptime = 1; 131 132 static struct bintime timebasebin; 133 134 static int timestepwarnings; 135 136 kmutex_t timecounter_lock; 137 static u_int timecounter_mods; 138 static volatile int timecounter_removals = 1; 139 static u_int timecounter_bad; 140 141 #ifdef __FreeBSD__ 142 SYSCTL_INT(_kern_timecounter, OID_AUTO, stepwarnings, CTLFLAG_RW, 143 ×tepwarnings, 0, ""); 144 #endif /* __FreeBSD__ */ 145 146 /* 147 * sysctl helper routine for kern.timercounter.hardware 148 */ 149 static int 150 sysctl_kern_timecounter_hardware(SYSCTLFN_ARGS) 151 { 152 struct sysctlnode node; 153 int error; 154 char newname[MAX_TCNAMELEN]; 155 struct timecounter *newtc, *tc; 156 157 tc = timecounter; 158 159 strlcpy(newname, tc->tc_name, sizeof(newname)); 160 161 node = *rnode; 162 node.sysctl_data = newname; 163 node.sysctl_size = sizeof(newname); 164 165 error = sysctl_lookup(SYSCTLFN_CALL(&node)); 166 167 if (error || 168 newp == NULL || 169 strncmp(newname, tc->tc_name, sizeof(newname)) == 0) 170 return error; 171 172 if (l != NULL && (error = kauth_authorize_system(l->l_cred, 173 KAUTH_SYSTEM_TIME, KAUTH_REQ_SYSTEM_TIME_TIMECOUNTERS, newname, 174 NULL, NULL)) != 0) 175 return (error); 176 177 if (!cold) 178 mutex_spin_enter(&timecounter_lock); 179 error = EINVAL; 180 for (newtc = timecounters; newtc != NULL; newtc = newtc->tc_next) { 181 if (strcmp(newname, newtc->tc_name) != 0) 182 continue; 183 /* Warm up new timecounter. */ 184 (void)newtc->tc_get_timecount(newtc); 185 (void)newtc->tc_get_timecount(newtc); 186 timecounter = newtc; 187 error = 0; 188 break; 189 } 190 if (!cold) 191 mutex_spin_exit(&timecounter_lock); 192 return error; 193 } 194 195 static int 196 sysctl_kern_timecounter_choice(SYSCTLFN_ARGS) 197 { 198 char buf[MAX_TCNAMELEN+48]; 199 char *where; 200 const char *spc; 201 struct timecounter *tc; 202 size_t needed, left, slen; 203 int error, mods; 204 205 if (newp != NULL) 206 return (EPERM); 207 if (namelen != 0) 208 return (EINVAL); 209 210 mutex_spin_enter(&timecounter_lock); 211 retry: 212 spc = ""; 213 error = 0; 214 needed = 0; 215 left = *oldlenp; 216 where = oldp; 217 for (tc = timecounters; error == 0 && tc != NULL; tc = tc->tc_next) { 218 if (where == NULL) { 219 needed += sizeof(buf); /* be conservative */ 220 } else { 221 slen = snprintf(buf, sizeof(buf), "%s%s(q=%d, f=%" PRId64 222 " Hz)", spc, tc->tc_name, tc->tc_quality, 223 tc->tc_frequency); 224 if (left < slen + 1) 225 break; 226 mods = timecounter_mods; 227 mutex_spin_exit(&timecounter_lock); 228 error = copyout(buf, where, slen + 1); 229 mutex_spin_enter(&timecounter_lock); 230 if (mods != timecounter_mods) { 231 goto retry; 232 } 233 spc = " "; 234 where += slen; 235 needed += slen; 236 left -= slen; 237 } 238 } 239 mutex_spin_exit(&timecounter_lock); 240 241 *oldlenp = needed; 242 return (error); 243 } 244 245 SYSCTL_SETUP(sysctl_timecounter_setup, "sysctl timecounter setup") 246 { 247 const struct sysctlnode *node; 248 249 sysctl_createv(clog, 0, NULL, &node, 250 CTLFLAG_PERMANENT, 251 CTLTYPE_NODE, "timecounter", 252 SYSCTL_DESCR("time counter information"), 253 NULL, 0, NULL, 0, 254 CTL_KERN, CTL_CREATE, CTL_EOL); 255 256 if (node != NULL) { 257 sysctl_createv(clog, 0, NULL, NULL, 258 CTLFLAG_PERMANENT, 259 CTLTYPE_STRING, "choice", 260 SYSCTL_DESCR("available counters"), 261 sysctl_kern_timecounter_choice, 0, NULL, 0, 262 CTL_KERN, node->sysctl_num, CTL_CREATE, CTL_EOL); 263 264 sysctl_createv(clog, 0, NULL, NULL, 265 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 266 CTLTYPE_STRING, "hardware", 267 SYSCTL_DESCR("currently active time counter"), 268 sysctl_kern_timecounter_hardware, 0, NULL, MAX_TCNAMELEN, 269 CTL_KERN, node->sysctl_num, CTL_CREATE, CTL_EOL); 270 271 sysctl_createv(clog, 0, NULL, NULL, 272 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 273 CTLTYPE_INT, "timestepwarnings", 274 SYSCTL_DESCR("log time steps"), 275 NULL, 0, ×tepwarnings, 0, 276 CTL_KERN, node->sysctl_num, CTL_CREATE, CTL_EOL); 277 } 278 } 279 280 #ifdef TC_COUNTERS 281 #define TC_STATS(name) \ 282 static struct evcnt n##name = \ 283 EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, "timecounter", #name); \ 284 EVCNT_ATTACH_STATIC(n##name) 285 TC_STATS(binuptime); TC_STATS(nanouptime); TC_STATS(microuptime); 286 TC_STATS(bintime); TC_STATS(nanotime); TC_STATS(microtime); 287 TC_STATS(getbinuptime); TC_STATS(getnanouptime); TC_STATS(getmicrouptime); 288 TC_STATS(getbintime); TC_STATS(getnanotime); TC_STATS(getmicrotime); 289 TC_STATS(setclock); 290 #define TC_COUNT(var) var.ev_count++ 291 #undef TC_STATS 292 #else 293 #define TC_COUNT(var) /* nothing */ 294 #endif /* TC_COUNTERS */ 295 296 static void tc_windup(void); 297 298 /* 299 * Return the difference between the timehands' counter value now and what 300 * was when we copied it to the timehands' offset_count. 301 */ 302 static inline u_int 303 tc_delta(struct timehands *th) 304 { 305 struct timecounter *tc; 306 307 tc = th->th_counter; 308 return ((tc->tc_get_timecount(tc) - 309 th->th_offset_count) & tc->tc_counter_mask); 310 } 311 312 /* 313 * Functions for reading the time. We have to loop until we are sure that 314 * the timehands that we operated on was not updated under our feet. See 315 * the comment in <sys/timevar.h> for a description of these 12 functions. 316 */ 317 318 void 319 binuptime(struct bintime *bt) 320 { 321 struct timehands *th; 322 lwp_t *l; 323 u_int lgen, gen; 324 325 TC_COUNT(nbinuptime); 326 327 /* 328 * Provide exclusion against tc_detach(). 329 * 330 * We record the number of timecounter removals before accessing 331 * timecounter state. Note that the LWP can be using multiple 332 * "generations" at once, due to interrupts (interrupted while in 333 * this function). Hardware interrupts will borrow the interrupted 334 * LWP's l_tcgen value for this purpose, and can themselves be 335 * interrupted by higher priority interrupts. In this case we need 336 * to ensure that the oldest generation in use is recorded. 337 * 338 * splsched() is too expensive to use, so we take care to structure 339 * this code in such a way that it is not required. Likewise, we 340 * do not disable preemption. 341 * 342 * Memory barriers are also too expensive to use for such a 343 * performance critical function. The good news is that we do not 344 * need memory barriers for this type of exclusion, as the thread 345 * updating timecounter_removals will issue a broadcast cross call 346 * before inspecting our l_tcgen value (this elides memory ordering 347 * issues). 348 */ 349 l = curlwp; 350 lgen = l->l_tcgen; 351 if (__predict_true(lgen == 0)) { 352 l->l_tcgen = timecounter_removals; 353 } 354 __insn_barrier(); 355 356 do { 357 th = timehands; 358 gen = th->th_generation; 359 *bt = th->th_offset; 360 bintime_addx(bt, th->th_scale * tc_delta(th)); 361 } while (gen == 0 || gen != th->th_generation); 362 363 __insn_barrier(); 364 l->l_tcgen = lgen; 365 } 366 367 void 368 nanouptime(struct timespec *tsp) 369 { 370 struct bintime bt; 371 372 TC_COUNT(nnanouptime); 373 binuptime(&bt); 374 bintime2timespec(&bt, tsp); 375 } 376 377 void 378 microuptime(struct timeval *tvp) 379 { 380 struct bintime bt; 381 382 TC_COUNT(nmicrouptime); 383 binuptime(&bt); 384 bintime2timeval(&bt, tvp); 385 } 386 387 void 388 bintime(struct bintime *bt) 389 { 390 391 TC_COUNT(nbintime); 392 binuptime(bt); 393 bintime_add(bt, &timebasebin); 394 } 395 396 void 397 nanotime(struct timespec *tsp) 398 { 399 struct bintime bt; 400 401 TC_COUNT(nnanotime); 402 bintime(&bt); 403 bintime2timespec(&bt, tsp); 404 } 405 406 void 407 microtime(struct timeval *tvp) 408 { 409 struct bintime bt; 410 411 TC_COUNT(nmicrotime); 412 bintime(&bt); 413 bintime2timeval(&bt, tvp); 414 } 415 416 void 417 getbinuptime(struct bintime *bt) 418 { 419 struct timehands *th; 420 u_int gen; 421 422 TC_COUNT(ngetbinuptime); 423 do { 424 th = timehands; 425 gen = th->th_generation; 426 *bt = th->th_offset; 427 } while (gen == 0 || gen != th->th_generation); 428 } 429 430 void 431 getnanouptime(struct timespec *tsp) 432 { 433 struct timehands *th; 434 u_int gen; 435 436 TC_COUNT(ngetnanouptime); 437 do { 438 th = timehands; 439 gen = th->th_generation; 440 bintime2timespec(&th->th_offset, tsp); 441 } while (gen == 0 || gen != th->th_generation); 442 } 443 444 void 445 getmicrouptime(struct timeval *tvp) 446 { 447 struct timehands *th; 448 u_int gen; 449 450 TC_COUNT(ngetmicrouptime); 451 do { 452 th = timehands; 453 gen = th->th_generation; 454 bintime2timeval(&th->th_offset, tvp); 455 } while (gen == 0 || gen != th->th_generation); 456 } 457 458 void 459 getbintime(struct bintime *bt) 460 { 461 struct timehands *th; 462 u_int gen; 463 464 TC_COUNT(ngetbintime); 465 do { 466 th = timehands; 467 gen = th->th_generation; 468 *bt = th->th_offset; 469 } while (gen == 0 || gen != th->th_generation); 470 bintime_add(bt, &timebasebin); 471 } 472 473 static inline void 474 dogetnanotime(struct timespec *tsp) 475 { 476 struct timehands *th; 477 u_int gen; 478 479 TC_COUNT(ngetnanotime); 480 do { 481 th = timehands; 482 gen = th->th_generation; 483 *tsp = th->th_nanotime; 484 } while (gen == 0 || gen != th->th_generation); 485 } 486 487 void 488 getnanotime(struct timespec *tsp) 489 { 490 491 dogetnanotime(tsp); 492 } 493 494 void dtrace_getnanotime(struct timespec *tsp); 495 496 void 497 dtrace_getnanotime(struct timespec *tsp) 498 { 499 500 dogetnanotime(tsp); 501 } 502 503 void 504 getmicrotime(struct timeval *tvp) 505 { 506 struct timehands *th; 507 u_int gen; 508 509 TC_COUNT(ngetmicrotime); 510 do { 511 th = timehands; 512 gen = th->th_generation; 513 *tvp = th->th_microtime; 514 } while (gen == 0 || gen != th->th_generation); 515 } 516 517 /* 518 * Initialize a new timecounter and possibly use it. 519 */ 520 void 521 tc_init(struct timecounter *tc) 522 { 523 u_int u; 524 525 u = tc->tc_frequency / tc->tc_counter_mask; 526 /* XXX: We need some margin here, 10% is a guess */ 527 u *= 11; 528 u /= 10; 529 if (u > hz && tc->tc_quality >= 0) { 530 tc->tc_quality = -2000; 531 aprint_verbose( 532 "timecounter: Timecounter \"%s\" frequency %ju Hz", 533 tc->tc_name, (uintmax_t)tc->tc_frequency); 534 aprint_verbose(" -- Insufficient hz, needs at least %u\n", u); 535 } else if (tc->tc_quality >= 0 || bootverbose) { 536 aprint_verbose( 537 "timecounter: Timecounter \"%s\" frequency %ju Hz " 538 "quality %d\n", tc->tc_name, (uintmax_t)tc->tc_frequency, 539 tc->tc_quality); 540 } 541 542 mutex_spin_enter(&timecounter_lock); 543 tc->tc_next = timecounters; 544 timecounters = tc; 545 timecounter_mods++; 546 /* 547 * Never automatically use a timecounter with negative quality. 548 * Even though we run on the dummy counter, switching here may be 549 * worse since this timecounter may not be monotonous. 550 */ 551 if (tc->tc_quality >= 0 && (tc->tc_quality > timecounter->tc_quality || 552 (tc->tc_quality == timecounter->tc_quality && 553 tc->tc_frequency > timecounter->tc_frequency))) { 554 (void)tc->tc_get_timecount(tc); 555 (void)tc->tc_get_timecount(tc); 556 timecounter = tc; 557 tc_windup(); 558 } 559 mutex_spin_exit(&timecounter_lock); 560 } 561 562 /* 563 * Pick a new timecounter due to the existing counter going bad. 564 */ 565 static void 566 tc_pick(void) 567 { 568 struct timecounter *best, *tc; 569 570 KASSERT(mutex_owned(&timecounter_lock)); 571 572 for (best = tc = timecounters; tc != NULL; tc = tc->tc_next) { 573 if (tc->tc_quality > best->tc_quality) 574 best = tc; 575 else if (tc->tc_quality < best->tc_quality) 576 continue; 577 else if (tc->tc_frequency > best->tc_frequency) 578 best = tc; 579 } 580 (void)best->tc_get_timecount(best); 581 (void)best->tc_get_timecount(best); 582 timecounter = best; 583 } 584 585 /* 586 * A timecounter has gone bad, arrange to pick a new one at the next 587 * clock tick. 588 */ 589 void 590 tc_gonebad(struct timecounter *tc) 591 { 592 593 tc->tc_quality = -100; 594 membar_producer(); 595 atomic_inc_uint(&timecounter_bad); 596 } 597 598 /* 599 * Stop using a timecounter and remove it from the timecounters list. 600 */ 601 int 602 tc_detach(struct timecounter *target) 603 { 604 struct timecounter *tc; 605 struct timecounter **tcp = NULL; 606 int removals; 607 uint64_t where; 608 lwp_t *l; 609 610 /* First, find the timecounter. */ 611 mutex_spin_enter(&timecounter_lock); 612 for (tcp = &timecounters, tc = timecounters; 613 tc != NULL; 614 tcp = &tc->tc_next, tc = tc->tc_next) { 615 if (tc == target) 616 break; 617 } 618 if (tc == NULL) { 619 mutex_spin_exit(&timecounter_lock); 620 return ESRCH; 621 } 622 623 /* And now, remove it. */ 624 *tcp = tc->tc_next; 625 if (timecounter == target) { 626 tc_pick(); 627 tc_windup(); 628 } 629 timecounter_mods++; 630 removals = timecounter_removals++; 631 mutex_spin_exit(&timecounter_lock); 632 633 /* 634 * We now have to determine if any threads in the system are still 635 * making use of this timecounter. 636 * 637 * We issue a broadcast cross call to elide memory ordering issues, 638 * then scan all LWPs in the system looking at each's timecounter 639 * generation number. We need to see a value of zero (not actively 640 * using a timecounter) or a value greater than our removal value. 641 * 642 * We may race with threads that read `timecounter_removals' and 643 * and then get preempted before updating `l_tcgen'. This is not 644 * a problem, since it means that these threads have not yet started 645 * accessing timecounter state. All we do need is one clean 646 * snapshot of the system where every thread appears not to be using 647 * old timecounter state. 648 */ 649 for (;;) { 650 where = xc_broadcast(0, (xcfunc_t)nullop, NULL, NULL); 651 xc_wait(where); 652 653 mutex_enter(proc_lock); 654 LIST_FOREACH(l, &alllwp, l_list) { 655 if (l->l_tcgen == 0 || l->l_tcgen > removals) { 656 /* 657 * Not using timecounter or old timecounter 658 * state at time of our xcall or later. 659 */ 660 continue; 661 } 662 break; 663 } 664 mutex_exit(proc_lock); 665 666 /* 667 * If the timecounter is still in use, wait at least 10ms 668 * before retrying. 669 */ 670 if (l == NULL) { 671 return 0; 672 } 673 (void)kpause("tcdetach", false, mstohz(10), NULL); 674 } 675 } 676 677 /* Report the frequency of the current timecounter. */ 678 u_int64_t 679 tc_getfrequency(void) 680 { 681 682 return (timehands->th_counter->tc_frequency); 683 } 684 685 /* 686 * Step our concept of UTC. This is done by modifying our estimate of 687 * when we booted. 688 */ 689 void 690 tc_setclock(const struct timespec *ts) 691 { 692 struct timespec ts2; 693 struct bintime bt, bt2; 694 695 mutex_spin_enter(&timecounter_lock); 696 TC_COUNT(nsetclock); 697 binuptime(&bt2); 698 timespec2bintime(ts, &bt); 699 bintime_sub(&bt, &bt2); 700 bintime_add(&bt2, &timebasebin); 701 timebasebin = bt; 702 tc_windup(); 703 mutex_spin_exit(&timecounter_lock); 704 705 if (timestepwarnings) { 706 bintime2timespec(&bt2, &ts2); 707 log(LOG_INFO, 708 "Time stepped from %lld.%09ld to %lld.%09ld\n", 709 (long long)ts2.tv_sec, ts2.tv_nsec, 710 (long long)ts->tv_sec, ts->tv_nsec); 711 } 712 } 713 714 /* 715 * Initialize the next struct timehands in the ring and make 716 * it the active timehands. Along the way we might switch to a different 717 * timecounter and/or do seconds processing in NTP. Slightly magic. 718 */ 719 static void 720 tc_windup(void) 721 { 722 struct bintime bt; 723 struct timehands *th, *tho; 724 u_int64_t scale; 725 u_int delta, ncount, ogen; 726 int i, s_update; 727 time_t t; 728 729 KASSERT(mutex_owned(&timecounter_lock)); 730 731 s_update = 0; 732 733 /* 734 * Make the next timehands a copy of the current one, but do not 735 * overwrite the generation or next pointer. While we update 736 * the contents, the generation must be zero. Ensure global 737 * visibility of the generation before proceeding. 738 */ 739 tho = timehands; 740 th = tho->th_next; 741 ogen = th->th_generation; 742 th->th_generation = 0; 743 membar_producer(); 744 bcopy(tho, th, offsetof(struct timehands, th_generation)); 745 746 /* 747 * Capture a timecounter delta on the current timecounter and if 748 * changing timecounters, a counter value from the new timecounter. 749 * Update the offset fields accordingly. 750 */ 751 delta = tc_delta(th); 752 if (th->th_counter != timecounter) 753 ncount = timecounter->tc_get_timecount(timecounter); 754 else 755 ncount = 0; 756 th->th_offset_count += delta; 757 bintime_addx(&th->th_offset, th->th_scale * delta); 758 759 /* 760 * Hardware latching timecounters may not generate interrupts on 761 * PPS events, so instead we poll them. There is a finite risk that 762 * the hardware might capture a count which is later than the one we 763 * got above, and therefore possibly in the next NTP second which might 764 * have a different rate than the current NTP second. It doesn't 765 * matter in practice. 766 */ 767 if (tho->th_counter->tc_poll_pps) 768 tho->th_counter->tc_poll_pps(tho->th_counter); 769 770 /* 771 * Deal with NTP second processing. The for loop normally 772 * iterates at most once, but in extreme situations it might 773 * keep NTP sane if timeouts are not run for several seconds. 774 * At boot, the time step can be large when the TOD hardware 775 * has been read, so on really large steps, we call 776 * ntp_update_second only twice. We need to call it twice in 777 * case we missed a leap second. 778 * If NTP is not compiled in ntp_update_second still calculates 779 * the adjustment resulting from adjtime() calls. 780 */ 781 bt = th->th_offset; 782 bintime_add(&bt, &timebasebin); 783 i = bt.sec - tho->th_microtime.tv_sec; 784 if (i > LARGE_STEP) 785 i = 2; 786 for (; i > 0; i--) { 787 t = bt.sec; 788 ntp_update_second(&th->th_adjustment, &bt.sec); 789 s_update = 1; 790 if (bt.sec != t) 791 timebasebin.sec += bt.sec - t; 792 } 793 794 /* Update the UTC timestamps used by the get*() functions. */ 795 /* XXX shouldn't do this here. Should force non-`get' versions. */ 796 bintime2timeval(&bt, &th->th_microtime); 797 bintime2timespec(&bt, &th->th_nanotime); 798 /* Now is a good time to change timecounters. */ 799 if (th->th_counter != timecounter) { 800 th->th_counter = timecounter; 801 th->th_offset_count = ncount; 802 s_update = 1; 803 } 804 805 /*- 806 * Recalculate the scaling factor. We want the number of 1/2^64 807 * fractions of a second per period of the hardware counter, taking 808 * into account the th_adjustment factor which the NTP PLL/adjtime(2) 809 * processing provides us with. 810 * 811 * The th_adjustment is nanoseconds per second with 32 bit binary 812 * fraction and we want 64 bit binary fraction of second: 813 * 814 * x = a * 2^32 / 10^9 = a * 4.294967296 815 * 816 * The range of th_adjustment is +/- 5000PPM so inside a 64bit int 817 * we can only multiply by about 850 without overflowing, but that 818 * leaves suitably precise fractions for multiply before divide. 819 * 820 * Divide before multiply with a fraction of 2199/512 results in a 821 * systematic undercompensation of 10PPM of th_adjustment. On a 822 * 5000PPM adjustment this is a 0.05PPM error. This is acceptable. 823 * 824 * We happily sacrifice the lowest of the 64 bits of our result 825 * to the goddess of code clarity. 826 * 827 */ 828 if (s_update) { 829 scale = (u_int64_t)1 << 63; 830 scale += (th->th_adjustment / 1024) * 2199; 831 scale /= th->th_counter->tc_frequency; 832 th->th_scale = scale * 2; 833 } 834 /* 835 * Now that the struct timehands is again consistent, set the new 836 * generation number, making sure to not make it zero. Ensure 837 * changes are globally visible before changing. 838 */ 839 if (++ogen == 0) 840 ogen = 1; 841 membar_producer(); 842 th->th_generation = ogen; 843 844 /* 845 * Go live with the new struct timehands. Ensure changes are 846 * globally visible before changing. 847 */ 848 time_second = th->th_microtime.tv_sec; 849 time_uptime = th->th_offset.sec; 850 membar_producer(); 851 timehands = th; 852 853 /* 854 * Force users of the old timehand to move on. This is 855 * necessary for MP systems; we need to ensure that the 856 * consumers will move away from the old timehand before 857 * we begin updating it again when we eventually wrap 858 * around. 859 */ 860 if (++tho->th_generation == 0) 861 tho->th_generation = 1; 862 } 863 864 /* 865 * RFC 2783 PPS-API implementation. 866 */ 867 868 int 869 pps_ioctl(u_long cmd, void *data, struct pps_state *pps) 870 { 871 pps_params_t *app; 872 pps_info_t *pipi; 873 #ifdef PPS_SYNC 874 int *epi; 875 #endif 876 877 KASSERT(mutex_owned(&timecounter_lock)); 878 879 KASSERT(pps != NULL); 880 881 switch (cmd) { 882 case PPS_IOC_CREATE: 883 return (0); 884 case PPS_IOC_DESTROY: 885 return (0); 886 case PPS_IOC_SETPARAMS: 887 app = (pps_params_t *)data; 888 if (app->mode & ~pps->ppscap) 889 return (EINVAL); 890 pps->ppsparam = *app; 891 return (0); 892 case PPS_IOC_GETPARAMS: 893 app = (pps_params_t *)data; 894 *app = pps->ppsparam; 895 app->api_version = PPS_API_VERS_1; 896 return (0); 897 case PPS_IOC_GETCAP: 898 *(int*)data = pps->ppscap; 899 return (0); 900 case PPS_IOC_FETCH: 901 pipi = (pps_info_t *)data; 902 pps->ppsinfo.current_mode = pps->ppsparam.mode; 903 *pipi = pps->ppsinfo; 904 return (0); 905 case PPS_IOC_KCBIND: 906 #ifdef PPS_SYNC 907 epi = (int *)data; 908 /* XXX Only root should be able to do this */ 909 if (*epi & ~pps->ppscap) 910 return (EINVAL); 911 pps->kcmode = *epi; 912 return (0); 913 #else 914 return (EOPNOTSUPP); 915 #endif 916 default: 917 return (EPASSTHROUGH); 918 } 919 } 920 921 void 922 pps_init(struct pps_state *pps) 923 { 924 925 KASSERT(mutex_owned(&timecounter_lock)); 926 927 pps->ppscap |= PPS_TSFMT_TSPEC; 928 if (pps->ppscap & PPS_CAPTUREASSERT) 929 pps->ppscap |= PPS_OFFSETASSERT; 930 if (pps->ppscap & PPS_CAPTURECLEAR) 931 pps->ppscap |= PPS_OFFSETCLEAR; 932 } 933 934 /* 935 * capture a timetamp in the pps structure 936 */ 937 void 938 pps_capture(struct pps_state *pps) 939 { 940 struct timehands *th; 941 942 KASSERT(mutex_owned(&timecounter_lock)); 943 KASSERT(pps != NULL); 944 945 th = timehands; 946 pps->capgen = th->th_generation; 947 pps->capth = th; 948 pps->capcount = (u_int64_t)tc_delta(th) + th->th_offset_count; 949 if (pps->capgen != th->th_generation) 950 pps->capgen = 0; 951 } 952 953 #ifdef PPS_DEBUG 954 int ppsdebug = 0; 955 #endif 956 957 /* 958 * process a pps_capture()ed event 959 */ 960 void 961 pps_event(struct pps_state *pps, int event) 962 { 963 pps_ref_event(pps, event, NULL, PPS_REFEVNT_PPS|PPS_REFEVNT_CAPTURE); 964 } 965 966 /* 967 * extended pps api / kernel pll/fll entry point 968 * 969 * feed reference time stamps to PPS engine 970 * 971 * will simulate a PPS event and feed 972 * the NTP PLL/FLL if requested. 973 * 974 * the ref time stamps should be roughly once 975 * a second but do not need to be exactly in phase 976 * with the UTC second but should be close to it. 977 * this relaxation of requirements allows callout 978 * driven timestamping mechanisms to feed to pps 979 * capture/kernel pll logic. 980 * 981 * calling pattern is: 982 * pps_capture() (for PPS_REFEVNT_{CAPTURE|CAPCUR}) 983 * read timestamp from reference source 984 * pps_ref_event() 985 * 986 * supported refmodes: 987 * PPS_REFEVNT_CAPTURE 988 * use system timestamp of pps_capture() 989 * PPS_REFEVNT_CURRENT 990 * use system timestamp of this call 991 * PPS_REFEVNT_CAPCUR 992 * use average of read capture and current system time stamp 993 * PPS_REFEVNT_PPS 994 * assume timestamp on second mark - ref_ts is ignored 995 * 996 */ 997 998 void 999 pps_ref_event(struct pps_state *pps, 1000 int event, 1001 struct bintime *ref_ts, 1002 int refmode 1003 ) 1004 { 1005 struct bintime bt; /* current time */ 1006 struct bintime btd; /* time difference */ 1007 struct bintime bt_ref; /* reference time */ 1008 struct timespec ts, *tsp, *osp; 1009 struct timehands *th; 1010 u_int64_t tcount, acount, dcount, *pcount; 1011 int foff, gen; 1012 #ifdef PPS_SYNC 1013 int fhard; 1014 #endif 1015 pps_seq_t *pseq; 1016 1017 KASSERT(mutex_owned(&timecounter_lock)); 1018 1019 KASSERT(pps != NULL); 1020 1021 /* pick up current time stamp if needed */ 1022 if (refmode & (PPS_REFEVNT_CURRENT|PPS_REFEVNT_CAPCUR)) { 1023 /* pick up current time stamp */ 1024 th = timehands; 1025 gen = th->th_generation; 1026 tcount = (u_int64_t)tc_delta(th) + th->th_offset_count; 1027 if (gen != th->th_generation) 1028 gen = 0; 1029 1030 /* If the timecounter was wound up underneath us, bail out. */ 1031 if (pps->capgen == 0 || 1032 pps->capgen != pps->capth->th_generation || 1033 gen == 0 || 1034 gen != pps->capgen) { 1035 #ifdef PPS_DEBUG 1036 if (ppsdebug & 0x1) { 1037 log(LOG_DEBUG, 1038 "pps_ref_event(pps=%p, event=%d, ...): DROP (wind-up)\n", 1039 pps, event); 1040 } 1041 #endif 1042 return; 1043 } 1044 } else { 1045 tcount = 0; /* keep GCC happy */ 1046 } 1047 1048 #ifdef PPS_DEBUG 1049 if (ppsdebug & 0x1) { 1050 struct timespec tmsp; 1051 1052 if (ref_ts == NULL) { 1053 tmsp.tv_sec = 0; 1054 tmsp.tv_nsec = 0; 1055 } else { 1056 bintime2timespec(ref_ts, &tmsp); 1057 } 1058 1059 log(LOG_DEBUG, 1060 "pps_ref_event(pps=%p, event=%d, ref_ts=%"PRIi64 1061 ".%09"PRIi32", refmode=0x%1x)\n", 1062 pps, event, tmsp.tv_sec, (int32_t)tmsp.tv_nsec, refmode); 1063 } 1064 #endif 1065 1066 /* setup correct event references */ 1067 if (event == PPS_CAPTUREASSERT) { 1068 tsp = &pps->ppsinfo.assert_timestamp; 1069 osp = &pps->ppsparam.assert_offset; 1070 foff = pps->ppsparam.mode & PPS_OFFSETASSERT; 1071 #ifdef PPS_SYNC 1072 fhard = pps->kcmode & PPS_CAPTUREASSERT; 1073 #endif 1074 pcount = &pps->ppscount[0]; 1075 pseq = &pps->ppsinfo.assert_sequence; 1076 } else { 1077 tsp = &pps->ppsinfo.clear_timestamp; 1078 osp = &pps->ppsparam.clear_offset; 1079 foff = pps->ppsparam.mode & PPS_OFFSETCLEAR; 1080 #ifdef PPS_SYNC 1081 fhard = pps->kcmode & PPS_CAPTURECLEAR; 1082 #endif 1083 pcount = &pps->ppscount[1]; 1084 pseq = &pps->ppsinfo.clear_sequence; 1085 } 1086 1087 /* determine system time stamp according to refmode */ 1088 dcount = 0; /* keep GCC happy */ 1089 switch (refmode & PPS_REFEVNT_RMASK) { 1090 case PPS_REFEVNT_CAPTURE: 1091 acount = pps->capcount; /* use capture timestamp */ 1092 break; 1093 1094 case PPS_REFEVNT_CURRENT: 1095 acount = tcount; /* use current timestamp */ 1096 break; 1097 1098 case PPS_REFEVNT_CAPCUR: 1099 /* 1100 * calculate counter value between pps_capture() and 1101 * pps_ref_event() 1102 */ 1103 dcount = tcount - pps->capcount; 1104 acount = (dcount / 2) + pps->capcount; 1105 break; 1106 1107 default: /* ignore call error silently */ 1108 return; 1109 } 1110 1111 /* 1112 * If the timecounter changed, we cannot compare the count values, so 1113 * we have to drop the rest of the PPS-stuff until the next event. 1114 */ 1115 if (pps->ppstc != pps->capth->th_counter) { 1116 pps->ppstc = pps->capth->th_counter; 1117 pps->capcount = acount; 1118 *pcount = acount; 1119 pps->ppscount[2] = acount; 1120 #ifdef PPS_DEBUG 1121 if (ppsdebug & 0x1) { 1122 log(LOG_DEBUG, 1123 "pps_ref_event(pps=%p, event=%d, ...): DROP (time-counter change)\n", 1124 pps, event); 1125 } 1126 #endif 1127 return; 1128 } 1129 1130 pps->capcount = acount; 1131 1132 /* Convert the count to a bintime. */ 1133 bt = pps->capth->th_offset; 1134 bintime_addx(&bt, pps->capth->th_scale * (acount - pps->capth->th_offset_count)); 1135 bintime_add(&bt, &timebasebin); 1136 1137 if ((refmode & PPS_REFEVNT_PPS) == 0) { 1138 /* determine difference to reference time stamp */ 1139 bt_ref = *ref_ts; 1140 1141 btd = bt; 1142 bintime_sub(&btd, &bt_ref); 1143 1144 /* 1145 * simulate a PPS timestamp by dropping the fraction 1146 * and applying the offset 1147 */ 1148 if (bt.frac >= (uint64_t)1<<63) /* skip to nearest second */ 1149 bt.sec++; 1150 bt.frac = 0; 1151 bintime_add(&bt, &btd); 1152 } else { 1153 /* 1154 * create ref_ts from current time - 1155 * we are supposed to be called on 1156 * the second mark 1157 */ 1158 bt_ref = bt; 1159 if (bt_ref.frac >= (uint64_t)1<<63) /* skip to nearest second */ 1160 bt_ref.sec++; 1161 bt_ref.frac = 0; 1162 } 1163 1164 /* convert bintime to timestamp */ 1165 bintime2timespec(&bt, &ts); 1166 1167 /* If the timecounter was wound up underneath us, bail out. */ 1168 if (pps->capgen != pps->capth->th_generation) 1169 return; 1170 1171 /* store time stamp */ 1172 *pcount = pps->capcount; 1173 (*pseq)++; 1174 *tsp = ts; 1175 1176 /* add offset correction */ 1177 if (foff) { 1178 timespecadd(tsp, osp, tsp); 1179 if (tsp->tv_nsec < 0) { 1180 tsp->tv_nsec += 1000000000; 1181 tsp->tv_sec -= 1; 1182 } 1183 } 1184 1185 #ifdef PPS_DEBUG 1186 if (ppsdebug & 0x2) { 1187 struct timespec ts2; 1188 struct timespec ts3; 1189 1190 bintime2timespec(&bt_ref, &ts2); 1191 1192 bt.sec = 0; 1193 bt.frac = 0; 1194 1195 if (refmode & PPS_REFEVNT_CAPCUR) { 1196 bintime_addx(&bt, pps->capth->th_scale * dcount); 1197 } 1198 bintime2timespec(&bt, &ts3); 1199 1200 log(LOG_DEBUG, "ref_ts=%"PRIi64".%09"PRIi32 1201 ", ts=%"PRIi64".%09"PRIi32", read latency=%"PRIi64" ns\n", 1202 ts2.tv_sec, (int32_t)ts2.tv_nsec, 1203 tsp->tv_sec, (int32_t)tsp->tv_nsec, 1204 timespec2ns(&ts3)); 1205 } 1206 #endif 1207 1208 #ifdef PPS_SYNC 1209 if (fhard) { 1210 uint64_t scale; 1211 uint64_t div; 1212 1213 /* 1214 * Feed the NTP PLL/FLL. 1215 * The FLL wants to know how many (hardware) nanoseconds 1216 * elapsed since the previous event (mod 1 second) thus 1217 * we are actually looking at the frequency difference scaled 1218 * in nsec. 1219 * As the counter time stamps are not truly at 1Hz 1220 * we need to scale the count by the elapsed 1221 * reference time. 1222 * valid sampling interval: [0.5..2[ sec 1223 */ 1224 1225 /* calculate elapsed raw count */ 1226 tcount = pps->capcount - pps->ppscount[2]; 1227 pps->ppscount[2] = pps->capcount; 1228 tcount &= pps->capth->th_counter->tc_counter_mask; 1229 1230 /* calculate elapsed ref time */ 1231 btd = bt_ref; 1232 bintime_sub(&btd, &pps->ref_time); 1233 pps->ref_time = bt_ref; 1234 1235 /* check that we stay below 2 sec */ 1236 if (btd.sec < 0 || btd.sec > 1) 1237 return; 1238 1239 /* we want at least 0.5 sec between samples */ 1240 if (btd.sec == 0 && btd.frac < (uint64_t)1<<63) 1241 return; 1242 1243 /* 1244 * calculate cycles per period by multiplying 1245 * the frequency with the elapsed period 1246 * we pick a fraction of 30 bits 1247 * ~1ns resolution for elapsed time 1248 */ 1249 div = (uint64_t)btd.sec << 30; 1250 div |= (btd.frac >> 34) & (((uint64_t)1 << 30) - 1); 1251 div *= pps->capth->th_counter->tc_frequency; 1252 div >>= 30; 1253 1254 if (div == 0) /* safeguard */ 1255 return; 1256 1257 scale = (uint64_t)1 << 63; 1258 scale /= div; 1259 scale *= 2; 1260 1261 bt.sec = 0; 1262 bt.frac = 0; 1263 bintime_addx(&bt, scale * tcount); 1264 bintime2timespec(&bt, &ts); 1265 1266 #ifdef PPS_DEBUG 1267 if (ppsdebug & 0x4) { 1268 struct timespec ts2; 1269 int64_t df; 1270 1271 bintime2timespec(&bt_ref, &ts2); 1272 df = timespec2ns(&ts); 1273 if (df > 500000000) 1274 df -= 1000000000; 1275 log(LOG_DEBUG, "hardpps: ref_ts=%"PRIi64 1276 ".%09"PRIi32", ts=%"PRIi64".%09"PRIi32 1277 ", freqdiff=%"PRIi64" ns/s\n", 1278 ts2.tv_sec, (int32_t)ts2.tv_nsec, 1279 tsp->tv_sec, (int32_t)tsp->tv_nsec, 1280 df); 1281 } 1282 #endif 1283 1284 hardpps(tsp, timespec2ns(&ts)); 1285 } 1286 #endif 1287 } 1288 1289 /* 1290 * Timecounters need to be updated every so often to prevent the hardware 1291 * counter from overflowing. Updating also recalculates the cached values 1292 * used by the get*() family of functions, so their precision depends on 1293 * the update frequency. 1294 */ 1295 1296 static int tc_tick; 1297 1298 void 1299 tc_ticktock(void) 1300 { 1301 static int count; 1302 1303 if (++count < tc_tick) 1304 return; 1305 count = 0; 1306 mutex_spin_enter(&timecounter_lock); 1307 if (timecounter_bad != 0) { 1308 /* An existing timecounter has gone bad, pick a new one. */ 1309 (void)atomic_swap_uint(&timecounter_bad, 0); 1310 if (timecounter->tc_quality < 0) { 1311 tc_pick(); 1312 } 1313 } 1314 tc_windup(); 1315 mutex_spin_exit(&timecounter_lock); 1316 } 1317 1318 void 1319 inittimecounter(void) 1320 { 1321 u_int p; 1322 1323 mutex_init(&timecounter_lock, MUTEX_DEFAULT, IPL_HIGH); 1324 1325 /* 1326 * Set the initial timeout to 1327 * max(1, <approx. number of hardclock ticks in a millisecond>). 1328 * People should probably not use the sysctl to set the timeout 1329 * to smaller than its inital value, since that value is the 1330 * smallest reasonable one. If they want better timestamps they 1331 * should use the non-"get"* functions. 1332 */ 1333 if (hz > 1000) 1334 tc_tick = (hz + 500) / 1000; 1335 else 1336 tc_tick = 1; 1337 p = (tc_tick * 1000000) / hz; 1338 aprint_verbose("timecounter: Timecounters tick every %d.%03u msec\n", 1339 p / 1000, p % 1000); 1340 1341 /* warm up new timecounter (again) and get rolling. */ 1342 (void)timecounter->tc_get_timecount(timecounter); 1343 (void)timecounter->tc_get_timecount(timecounter); 1344 } 1345