1 /* $NetBSD: kern_tc.c,v 1.40 2009/06/14 13:16:32 kardel Exp $ */ 2 3 /*- 4 * Copyright (c) 2008, 2009 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Andrew Doran. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /*- 33 * ---------------------------------------------------------------------------- 34 * "THE BEER-WARE LICENSE" (Revision 42): 35 * <phk@FreeBSD.ORG> wrote this file. As long as you retain this notice you 36 * can do whatever you want with this stuff. If we meet some day, and you think 37 * this stuff is worth it, you can buy me a beer in return. Poul-Henning Kamp 38 * --------------------------------------------------------------------------- 39 */ 40 41 #include <sys/cdefs.h> 42 /* __FBSDID("$FreeBSD: src/sys/kern/kern_tc.c,v 1.166 2005/09/19 22:16:31 andre Exp $"); */ 43 __KERNEL_RCSID(0, "$NetBSD: kern_tc.c,v 1.40 2009/06/14 13:16:32 kardel Exp $"); 44 45 #include "opt_ntp.h" 46 47 #include <sys/param.h> 48 #include <sys/kernel.h> 49 #include <sys/reboot.h> /* XXX just to get AB_VERBOSE */ 50 #include <sys/sysctl.h> 51 #include <sys/syslog.h> 52 #include <sys/systm.h> 53 #include <sys/timepps.h> 54 #include <sys/timetc.h> 55 #include <sys/timex.h> 56 #include <sys/evcnt.h> 57 #include <sys/kauth.h> 58 #include <sys/mutex.h> 59 #include <sys/atomic.h> 60 #include <sys/xcall.h> 61 62 /* 63 * A large step happens on boot. This constant detects such steps. 64 * It is relatively small so that ntp_update_second gets called enough 65 * in the typical 'missed a couple of seconds' case, but doesn't loop 66 * forever when the time step is large. 67 */ 68 #define LARGE_STEP 200 69 70 /* 71 * Implement a dummy timecounter which we can use until we get a real one 72 * in the air. This allows the console and other early stuff to use 73 * time services. 74 */ 75 76 static u_int 77 dummy_get_timecount(struct timecounter *tc) 78 { 79 static u_int now; 80 81 return (++now); 82 } 83 84 static struct timecounter dummy_timecounter = { 85 dummy_get_timecount, 0, ~0u, 1000000, "dummy", -1000000, NULL, NULL, 86 }; 87 88 struct timehands { 89 /* These fields must be initialized by the driver. */ 90 struct timecounter *th_counter; /* active timecounter */ 91 int64_t th_adjustment; /* frequency adjustment */ 92 /* (NTP/adjtime) */ 93 u_int64_t th_scale; /* scale factor (counter */ 94 /* tick->time) */ 95 u_int64_t th_offset_count; /* offset at last time */ 96 /* update (tc_windup()) */ 97 struct bintime th_offset; /* bin (up)time at windup */ 98 struct timeval th_microtime; /* cached microtime */ 99 struct timespec th_nanotime; /* cached nanotime */ 100 /* Fields not to be copied in tc_windup start with th_generation. */ 101 volatile u_int th_generation; /* current genration */ 102 struct timehands *th_next; /* next timehand */ 103 }; 104 105 static struct timehands th0; 106 static struct timehands th9 = { .th_next = &th0, }; 107 static struct timehands th8 = { .th_next = &th9, }; 108 static struct timehands th7 = { .th_next = &th8, }; 109 static struct timehands th6 = { .th_next = &th7, }; 110 static struct timehands th5 = { .th_next = &th6, }; 111 static struct timehands th4 = { .th_next = &th5, }; 112 static struct timehands th3 = { .th_next = &th4, }; 113 static struct timehands th2 = { .th_next = &th3, }; 114 static struct timehands th1 = { .th_next = &th2, }; 115 static struct timehands th0 = { 116 .th_counter = &dummy_timecounter, 117 .th_scale = (uint64_t)-1 / 1000000, 118 .th_offset = { .sec = 1, .frac = 0 }, 119 .th_generation = 1, 120 .th_next = &th1, 121 }; 122 123 static struct timehands *volatile timehands = &th0; 124 struct timecounter *timecounter = &dummy_timecounter; 125 static struct timecounter *timecounters = &dummy_timecounter; 126 127 time_t time_second = 1; 128 time_t time_uptime = 1; 129 130 static struct bintime timebasebin; 131 132 static int timestepwarnings; 133 134 kmutex_t timecounter_lock; 135 static u_int timecounter_mods; 136 static volatile int timecounter_removals = 1; 137 static u_int timecounter_bad; 138 139 #ifdef __FreeBSD__ 140 SYSCTL_INT(_kern_timecounter, OID_AUTO, stepwarnings, CTLFLAG_RW, 141 ×tepwarnings, 0, ""); 142 #endif /* __FreeBSD__ */ 143 144 /* 145 * sysctl helper routine for kern.timercounter.hardware 146 */ 147 static int 148 sysctl_kern_timecounter_hardware(SYSCTLFN_ARGS) 149 { 150 struct sysctlnode node; 151 int error; 152 char newname[MAX_TCNAMELEN]; 153 struct timecounter *newtc, *tc; 154 155 tc = timecounter; 156 157 strlcpy(newname, tc->tc_name, sizeof(newname)); 158 159 node = *rnode; 160 node.sysctl_data = newname; 161 node.sysctl_size = sizeof(newname); 162 163 error = sysctl_lookup(SYSCTLFN_CALL(&node)); 164 165 if (error || 166 newp == NULL || 167 strncmp(newname, tc->tc_name, sizeof(newname)) == 0) 168 return error; 169 170 if (l != NULL && (error = kauth_authorize_system(l->l_cred, 171 KAUTH_SYSTEM_TIME, KAUTH_REQ_SYSTEM_TIME_TIMECOUNTERS, newname, 172 NULL, NULL)) != 0) 173 return (error); 174 175 if (!cold) 176 mutex_spin_enter(&timecounter_lock); 177 error = EINVAL; 178 for (newtc = timecounters; newtc != NULL; newtc = newtc->tc_next) { 179 if (strcmp(newname, newtc->tc_name) != 0) 180 continue; 181 /* Warm up new timecounter. */ 182 (void)newtc->tc_get_timecount(newtc); 183 (void)newtc->tc_get_timecount(newtc); 184 timecounter = newtc; 185 error = 0; 186 break; 187 } 188 if (!cold) 189 mutex_spin_exit(&timecounter_lock); 190 return error; 191 } 192 193 static int 194 sysctl_kern_timecounter_choice(SYSCTLFN_ARGS) 195 { 196 char buf[MAX_TCNAMELEN+48]; 197 char *where; 198 const char *spc; 199 struct timecounter *tc; 200 size_t needed, left, slen; 201 int error, mods; 202 203 if (newp != NULL) 204 return (EPERM); 205 if (namelen != 0) 206 return (EINVAL); 207 208 mutex_spin_enter(&timecounter_lock); 209 retry: 210 spc = ""; 211 error = 0; 212 needed = 0; 213 left = *oldlenp; 214 where = oldp; 215 for (tc = timecounters; error == 0 && tc != NULL; tc = tc->tc_next) { 216 if (where == NULL) { 217 needed += sizeof(buf); /* be conservative */ 218 } else { 219 slen = snprintf(buf, sizeof(buf), "%s%s(q=%d, f=%" PRId64 220 " Hz)", spc, tc->tc_name, tc->tc_quality, 221 tc->tc_frequency); 222 if (left < slen + 1) 223 break; 224 mods = timecounter_mods; 225 mutex_spin_exit(&timecounter_lock); 226 error = copyout(buf, where, slen + 1); 227 mutex_spin_enter(&timecounter_lock); 228 if (mods != timecounter_mods) { 229 goto retry; 230 } 231 spc = " "; 232 where += slen; 233 needed += slen; 234 left -= slen; 235 } 236 } 237 mutex_spin_exit(&timecounter_lock); 238 239 *oldlenp = needed; 240 return (error); 241 } 242 243 SYSCTL_SETUP(sysctl_timecounter_setup, "sysctl timecounter setup") 244 { 245 const struct sysctlnode *node; 246 247 sysctl_createv(clog, 0, NULL, &node, 248 CTLFLAG_PERMANENT, 249 CTLTYPE_NODE, "timecounter", 250 SYSCTL_DESCR("time counter information"), 251 NULL, 0, NULL, 0, 252 CTL_KERN, CTL_CREATE, CTL_EOL); 253 254 if (node != NULL) { 255 sysctl_createv(clog, 0, NULL, NULL, 256 CTLFLAG_PERMANENT, 257 CTLTYPE_STRING, "choice", 258 SYSCTL_DESCR("available counters"), 259 sysctl_kern_timecounter_choice, 0, NULL, 0, 260 CTL_KERN, node->sysctl_num, CTL_CREATE, CTL_EOL); 261 262 sysctl_createv(clog, 0, NULL, NULL, 263 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 264 CTLTYPE_STRING, "hardware", 265 SYSCTL_DESCR("currently active time counter"), 266 sysctl_kern_timecounter_hardware, 0, NULL, MAX_TCNAMELEN, 267 CTL_KERN, node->sysctl_num, CTL_CREATE, CTL_EOL); 268 269 sysctl_createv(clog, 0, NULL, NULL, 270 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 271 CTLTYPE_INT, "timestepwarnings", 272 SYSCTL_DESCR("log time steps"), 273 NULL, 0, ×tepwarnings, 0, 274 CTL_KERN, node->sysctl_num, CTL_CREATE, CTL_EOL); 275 } 276 } 277 278 #ifdef TC_COUNTERS 279 #define TC_STATS(name) \ 280 static struct evcnt n##name = \ 281 EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, "timecounter", #name); \ 282 EVCNT_ATTACH_STATIC(n##name) 283 TC_STATS(binuptime); TC_STATS(nanouptime); TC_STATS(microuptime); 284 TC_STATS(bintime); TC_STATS(nanotime); TC_STATS(microtime); 285 TC_STATS(getbinuptime); TC_STATS(getnanouptime); TC_STATS(getmicrouptime); 286 TC_STATS(getbintime); TC_STATS(getnanotime); TC_STATS(getmicrotime); 287 TC_STATS(setclock); 288 #define TC_COUNT(var) var.ev_count++ 289 #undef TC_STATS 290 #else 291 #define TC_COUNT(var) /* nothing */ 292 #endif /* TC_COUNTERS */ 293 294 static void tc_windup(void); 295 296 /* 297 * Return the difference between the timehands' counter value now and what 298 * was when we copied it to the timehands' offset_count. 299 */ 300 static __inline u_int 301 tc_delta(struct timehands *th) 302 { 303 struct timecounter *tc; 304 305 tc = th->th_counter; 306 return ((tc->tc_get_timecount(tc) - 307 th->th_offset_count) & tc->tc_counter_mask); 308 } 309 310 /* 311 * Functions for reading the time. We have to loop until we are sure that 312 * the timehands that we operated on was not updated under our feet. See 313 * the comment in <sys/timevar.h> for a description of these 12 functions. 314 */ 315 316 void 317 binuptime(struct bintime *bt) 318 { 319 struct timehands *th; 320 lwp_t *l; 321 u_int lgen, gen; 322 323 TC_COUNT(nbinuptime); 324 325 /* 326 * Provide exclusion against tc_detach(). 327 * 328 * We record the number of timecounter removals before accessing 329 * timecounter state. Note that the LWP can be using multiple 330 * "generations" at once, due to interrupts (interrupted while in 331 * this function). Hardware interrupts will borrow the interrupted 332 * LWP's l_tcgen value for this purpose, and can themselves be 333 * interrupted by higher priority interrupts. In this case we need 334 * to ensure that the oldest generation in use is recorded. 335 * 336 * splsched() is too expensive to use, so we take care to structure 337 * this code in such a way that it is not required. Likewise, we 338 * do not disable preemption. 339 * 340 * Memory barriers are also too expensive to use for such a 341 * performance critical function. The good news is that we do not 342 * need memory barriers for this type of exclusion, as the thread 343 * updating timecounter_removals will issue a broadcast cross call 344 * before inspecting our l_tcgen value (this elides memory ordering 345 * issues). 346 */ 347 l = curlwp; 348 lgen = l->l_tcgen; 349 if (__predict_true(lgen == 0)) { 350 l->l_tcgen = timecounter_removals; 351 } 352 __insn_barrier(); 353 354 do { 355 th = timehands; 356 gen = th->th_generation; 357 *bt = th->th_offset; 358 bintime_addx(bt, th->th_scale * tc_delta(th)); 359 } while (gen == 0 || gen != th->th_generation); 360 361 __insn_barrier(); 362 l->l_tcgen = lgen; 363 } 364 365 void 366 nanouptime(struct timespec *tsp) 367 { 368 struct bintime bt; 369 370 TC_COUNT(nnanouptime); 371 binuptime(&bt); 372 bintime2timespec(&bt, tsp); 373 } 374 375 void 376 microuptime(struct timeval *tvp) 377 { 378 struct bintime bt; 379 380 TC_COUNT(nmicrouptime); 381 binuptime(&bt); 382 bintime2timeval(&bt, tvp); 383 } 384 385 void 386 bintime(struct bintime *bt) 387 { 388 389 TC_COUNT(nbintime); 390 binuptime(bt); 391 bintime_add(bt, &timebasebin); 392 } 393 394 void 395 nanotime(struct timespec *tsp) 396 { 397 struct bintime bt; 398 399 TC_COUNT(nnanotime); 400 bintime(&bt); 401 bintime2timespec(&bt, tsp); 402 } 403 404 void 405 microtime(struct timeval *tvp) 406 { 407 struct bintime bt; 408 409 TC_COUNT(nmicrotime); 410 bintime(&bt); 411 bintime2timeval(&bt, tvp); 412 } 413 414 void 415 getbinuptime(struct bintime *bt) 416 { 417 struct timehands *th; 418 u_int gen; 419 420 TC_COUNT(ngetbinuptime); 421 do { 422 th = timehands; 423 gen = th->th_generation; 424 *bt = th->th_offset; 425 } while (gen == 0 || gen != th->th_generation); 426 } 427 428 void 429 getnanouptime(struct timespec *tsp) 430 { 431 struct timehands *th; 432 u_int gen; 433 434 TC_COUNT(ngetnanouptime); 435 do { 436 th = timehands; 437 gen = th->th_generation; 438 bintime2timespec(&th->th_offset, tsp); 439 } while (gen == 0 || gen != th->th_generation); 440 } 441 442 void 443 getmicrouptime(struct timeval *tvp) 444 { 445 struct timehands *th; 446 u_int gen; 447 448 TC_COUNT(ngetmicrouptime); 449 do { 450 th = timehands; 451 gen = th->th_generation; 452 bintime2timeval(&th->th_offset, tvp); 453 } while (gen == 0 || gen != th->th_generation); 454 } 455 456 void 457 getbintime(struct bintime *bt) 458 { 459 struct timehands *th; 460 u_int gen; 461 462 TC_COUNT(ngetbintime); 463 do { 464 th = timehands; 465 gen = th->th_generation; 466 *bt = th->th_offset; 467 } while (gen == 0 || gen != th->th_generation); 468 bintime_add(bt, &timebasebin); 469 } 470 471 void 472 getnanotime(struct timespec *tsp) 473 { 474 struct timehands *th; 475 u_int gen; 476 477 TC_COUNT(ngetnanotime); 478 do { 479 th = timehands; 480 gen = th->th_generation; 481 *tsp = th->th_nanotime; 482 } while (gen == 0 || gen != th->th_generation); 483 } 484 485 void 486 getmicrotime(struct timeval *tvp) 487 { 488 struct timehands *th; 489 u_int gen; 490 491 TC_COUNT(ngetmicrotime); 492 do { 493 th = timehands; 494 gen = th->th_generation; 495 *tvp = th->th_microtime; 496 } while (gen == 0 || gen != th->th_generation); 497 } 498 499 /* 500 * Initialize a new timecounter and possibly use it. 501 */ 502 void 503 tc_init(struct timecounter *tc) 504 { 505 u_int u; 506 507 u = tc->tc_frequency / tc->tc_counter_mask; 508 /* XXX: We need some margin here, 10% is a guess */ 509 u *= 11; 510 u /= 10; 511 if (u > hz && tc->tc_quality >= 0) { 512 tc->tc_quality = -2000; 513 aprint_verbose( 514 "timecounter: Timecounter \"%s\" frequency %ju Hz", 515 tc->tc_name, (uintmax_t)tc->tc_frequency); 516 aprint_verbose(" -- Insufficient hz, needs at least %u\n", u); 517 } else if (tc->tc_quality >= 0 || bootverbose) { 518 aprint_verbose( 519 "timecounter: Timecounter \"%s\" frequency %ju Hz " 520 "quality %d\n", tc->tc_name, (uintmax_t)tc->tc_frequency, 521 tc->tc_quality); 522 } 523 524 mutex_spin_enter(&timecounter_lock); 525 tc->tc_next = timecounters; 526 timecounters = tc; 527 timecounter_mods++; 528 /* 529 * Never automatically use a timecounter with negative quality. 530 * Even though we run on the dummy counter, switching here may be 531 * worse since this timecounter may not be monotonous. 532 */ 533 if (tc->tc_quality >= 0 && (tc->tc_quality > timecounter->tc_quality || 534 (tc->tc_quality == timecounter->tc_quality && 535 tc->tc_frequency > timecounter->tc_frequency))) { 536 (void)tc->tc_get_timecount(tc); 537 (void)tc->tc_get_timecount(tc); 538 timecounter = tc; 539 tc_windup(); 540 } 541 mutex_spin_exit(&timecounter_lock); 542 } 543 544 /* 545 * Pick a new timecounter due to the existing counter going bad. 546 */ 547 static void 548 tc_pick(void) 549 { 550 struct timecounter *best, *tc; 551 552 KASSERT(mutex_owned(&timecounter_lock)); 553 554 for (best = tc = timecounters; tc != NULL; tc = tc->tc_next) { 555 if (tc->tc_quality > best->tc_quality) 556 best = tc; 557 else if (tc->tc_quality < best->tc_quality) 558 continue; 559 else if (tc->tc_frequency > best->tc_frequency) 560 best = tc; 561 } 562 (void)best->tc_get_timecount(best); 563 (void)best->tc_get_timecount(best); 564 timecounter = best; 565 } 566 567 /* 568 * A timecounter has gone bad, arrange to pick a new one at the next 569 * clock tick. 570 */ 571 void 572 tc_gonebad(struct timecounter *tc) 573 { 574 575 tc->tc_quality = -100; 576 membar_producer(); 577 atomic_inc_uint(&timecounter_bad); 578 } 579 580 /* 581 * Stop using a timecounter and remove it from the timecounters list. 582 */ 583 int 584 tc_detach(struct timecounter *target) 585 { 586 struct timecounter *tc; 587 struct timecounter **tcp = NULL; 588 int removals; 589 uint64_t where; 590 lwp_t *l; 591 592 /* First, find the timecounter. */ 593 mutex_spin_enter(&timecounter_lock); 594 for (tcp = &timecounters, tc = timecounters; 595 tc != NULL; 596 tcp = &tc->tc_next, tc = tc->tc_next) { 597 if (tc == target) 598 break; 599 } 600 if (tc == NULL) { 601 mutex_spin_exit(&timecounter_lock); 602 return ESRCH; 603 } 604 605 /* And now, remove it. */ 606 *tcp = tc->tc_next; 607 if (timecounter == target) { 608 tc_pick(); 609 tc_windup(); 610 } 611 timecounter_mods++; 612 removals = timecounter_removals++; 613 mutex_spin_exit(&timecounter_lock); 614 615 /* 616 * We now have to determine if any threads in the system are still 617 * making use of this timecounter. 618 * 619 * We issue a broadcast cross call to elide memory ordering issues, 620 * then scan all LWPs in the system looking at each's timecounter 621 * generation number. We need to see a value of zero (not actively 622 * using a timecounter) or a value greater than our removal value. 623 * 624 * We may race with threads that read `timecounter_removals' and 625 * and then get preempted before updating `l_tcgen'. This is not 626 * a problem, since it means that these threads have not yet started 627 * accessing timecounter state. All we do need is one clean 628 * snapshot of the system where every thread appears not to be using 629 * old timecounter state. 630 */ 631 for (;;) { 632 where = xc_broadcast(0, (xcfunc_t)nullop, NULL, NULL); 633 xc_wait(where); 634 635 mutex_enter(proc_lock); 636 LIST_FOREACH(l, &alllwp, l_list) { 637 if (l->l_tcgen == 0 || l->l_tcgen > removals) { 638 /* 639 * Not using timecounter or old timecounter 640 * state at time of our xcall or later. 641 */ 642 continue; 643 } 644 break; 645 } 646 mutex_exit(proc_lock); 647 648 /* 649 * If the timecounter is still in use, wait at least 10ms 650 * before retrying. 651 */ 652 if (l == NULL) { 653 return 0; 654 } 655 (void)kpause("tcdetach", false, mstohz(10), NULL); 656 } 657 } 658 659 /* Report the frequency of the current timecounter. */ 660 u_int64_t 661 tc_getfrequency(void) 662 { 663 664 return (timehands->th_counter->tc_frequency); 665 } 666 667 /* 668 * Step our concept of UTC. This is done by modifying our estimate of 669 * when we booted. 670 */ 671 void 672 tc_setclock(const struct timespec *ts) 673 { 674 struct timespec ts2; 675 struct bintime bt, bt2; 676 677 mutex_spin_enter(&timecounter_lock); 678 TC_COUNT(nsetclock); 679 binuptime(&bt2); 680 timespec2bintime(ts, &bt); 681 bintime_sub(&bt, &bt2); 682 bintime_add(&bt2, &timebasebin); 683 timebasebin = bt; 684 tc_windup(); 685 mutex_spin_exit(&timecounter_lock); 686 687 if (timestepwarnings) { 688 bintime2timespec(&bt2, &ts2); 689 log(LOG_INFO, "Time stepped from %lld.%09ld to %lld.%09ld\n", 690 (long long)ts2.tv_sec, ts2.tv_nsec, 691 (long long)ts->tv_sec, ts->tv_nsec); 692 } 693 } 694 695 /* 696 * Initialize the next struct timehands in the ring and make 697 * it the active timehands. Along the way we might switch to a different 698 * timecounter and/or do seconds processing in NTP. Slightly magic. 699 */ 700 static void 701 tc_windup(void) 702 { 703 struct bintime bt; 704 struct timehands *th, *tho; 705 u_int64_t scale; 706 u_int delta, ncount, ogen; 707 int i, s_update; 708 time_t t; 709 710 KASSERT(mutex_owned(&timecounter_lock)); 711 712 s_update = 0; 713 714 /* 715 * Make the next timehands a copy of the current one, but do not 716 * overwrite the generation or next pointer. While we update 717 * the contents, the generation must be zero. Ensure global 718 * visibility of the generation before proceeding. 719 */ 720 tho = timehands; 721 th = tho->th_next; 722 ogen = th->th_generation; 723 th->th_generation = 0; 724 membar_producer(); 725 bcopy(tho, th, offsetof(struct timehands, th_generation)); 726 727 /* 728 * Capture a timecounter delta on the current timecounter and if 729 * changing timecounters, a counter value from the new timecounter. 730 * Update the offset fields accordingly. 731 */ 732 delta = tc_delta(th); 733 if (th->th_counter != timecounter) 734 ncount = timecounter->tc_get_timecount(timecounter); 735 else 736 ncount = 0; 737 th->th_offset_count += delta; 738 bintime_addx(&th->th_offset, th->th_scale * delta); 739 740 /* 741 * Hardware latching timecounters may not generate interrupts on 742 * PPS events, so instead we poll them. There is a finite risk that 743 * the hardware might capture a count which is later than the one we 744 * got above, and therefore possibly in the next NTP second which might 745 * have a different rate than the current NTP second. It doesn't 746 * matter in practice. 747 */ 748 if (tho->th_counter->tc_poll_pps) 749 tho->th_counter->tc_poll_pps(tho->th_counter); 750 751 /* 752 * Deal with NTP second processing. The for loop normally 753 * iterates at most once, but in extreme situations it might 754 * keep NTP sane if timeouts are not run for several seconds. 755 * At boot, the time step can be large when the TOD hardware 756 * has been read, so on really large steps, we call 757 * ntp_update_second only twice. We need to call it twice in 758 * case we missed a leap second. 759 * If NTP is not compiled in ntp_update_second still calculates 760 * the adjustment resulting from adjtime() calls. 761 */ 762 bt = th->th_offset; 763 bintime_add(&bt, &timebasebin); 764 i = bt.sec - tho->th_microtime.tv_sec; 765 if (i > LARGE_STEP) 766 i = 2; 767 for (; i > 0; i--) { 768 t = bt.sec; 769 ntp_update_second(&th->th_adjustment, &bt.sec); 770 s_update = 1; 771 if (bt.sec != t) 772 timebasebin.sec += bt.sec - t; 773 } 774 775 /* Update the UTC timestamps used by the get*() functions. */ 776 /* XXX shouldn't do this here. Should force non-`get' versions. */ 777 bintime2timeval(&bt, &th->th_microtime); 778 bintime2timespec(&bt, &th->th_nanotime); 779 /* Now is a good time to change timecounters. */ 780 if (th->th_counter != timecounter) { 781 th->th_counter = timecounter; 782 th->th_offset_count = ncount; 783 s_update = 1; 784 } 785 786 /*- 787 * Recalculate the scaling factor. We want the number of 1/2^64 788 * fractions of a second per period of the hardware counter, taking 789 * into account the th_adjustment factor which the NTP PLL/adjtime(2) 790 * processing provides us with. 791 * 792 * The th_adjustment is nanoseconds per second with 32 bit binary 793 * fraction and we want 64 bit binary fraction of second: 794 * 795 * x = a * 2^32 / 10^9 = a * 4.294967296 796 * 797 * The range of th_adjustment is +/- 5000PPM so inside a 64bit int 798 * we can only multiply by about 850 without overflowing, but that 799 * leaves suitably precise fractions for multiply before divide. 800 * 801 * Divide before multiply with a fraction of 2199/512 results in a 802 * systematic undercompensation of 10PPM of th_adjustment. On a 803 * 5000PPM adjustment this is a 0.05PPM error. This is acceptable. 804 * 805 * We happily sacrifice the lowest of the 64 bits of our result 806 * to the goddess of code clarity. 807 * 808 */ 809 if (s_update) { 810 scale = (u_int64_t)1 << 63; 811 scale += (th->th_adjustment / 1024) * 2199; 812 scale /= th->th_counter->tc_frequency; 813 th->th_scale = scale * 2; 814 } 815 /* 816 * Now that the struct timehands is again consistent, set the new 817 * generation number, making sure to not make it zero. Ensure 818 * changes are globally visible before changing. 819 */ 820 if (++ogen == 0) 821 ogen = 1; 822 membar_producer(); 823 th->th_generation = ogen; 824 825 /* 826 * Go live with the new struct timehands. Ensure changes are 827 * globally visible before changing. 828 */ 829 time_second = th->th_microtime.tv_sec; 830 time_uptime = th->th_offset.sec; 831 membar_producer(); 832 timehands = th; 833 834 /* 835 * Force users of the old timehand to move on. This is 836 * necessary for MP systems; we need to ensure that the 837 * consumers will move away from the old timehand before 838 * we begin updating it again when we eventually wrap 839 * around. 840 */ 841 if (++tho->th_generation == 0) 842 tho->th_generation = 1; 843 } 844 845 /* 846 * RFC 2783 PPS-API implementation. 847 */ 848 849 int 850 pps_ioctl(u_long cmd, void *data, struct pps_state *pps) 851 { 852 pps_params_t *app; 853 pps_info_t *pipi; 854 #ifdef PPS_SYNC 855 int *epi; 856 #endif 857 858 KASSERT(mutex_owned(&timecounter_lock)); 859 860 KASSERT(pps != NULL); /* XXX ("NULL pps pointer in pps_ioctl") */ 861 switch (cmd) { 862 case PPS_IOC_CREATE: 863 return (0); 864 case PPS_IOC_DESTROY: 865 return (0); 866 case PPS_IOC_SETPARAMS: 867 app = (pps_params_t *)data; 868 if (app->mode & ~pps->ppscap) 869 return (EINVAL); 870 pps->ppsparam = *app; 871 return (0); 872 case PPS_IOC_GETPARAMS: 873 app = (pps_params_t *)data; 874 *app = pps->ppsparam; 875 app->api_version = PPS_API_VERS_1; 876 return (0); 877 case PPS_IOC_GETCAP: 878 *(int*)data = pps->ppscap; 879 return (0); 880 case PPS_IOC_FETCH: 881 pipi = (pps_info_t *)data; 882 pps->ppsinfo.current_mode = pps->ppsparam.mode; 883 *pipi = pps->ppsinfo; 884 return (0); 885 case PPS_IOC_KCBIND: 886 #ifdef PPS_SYNC 887 epi = (int *)data; 888 /* XXX Only root should be able to do this */ 889 if (*epi & ~pps->ppscap) 890 return (EINVAL); 891 pps->kcmode = *epi; 892 return (0); 893 #else 894 return (EOPNOTSUPP); 895 #endif 896 default: 897 return (EPASSTHROUGH); 898 } 899 } 900 901 void 902 pps_init(struct pps_state *pps) 903 { 904 905 KASSERT(mutex_owned(&timecounter_lock)); 906 907 pps->ppscap |= PPS_TSFMT_TSPEC; 908 if (pps->ppscap & PPS_CAPTUREASSERT) 909 pps->ppscap |= PPS_OFFSETASSERT; 910 if (pps->ppscap & PPS_CAPTURECLEAR) 911 pps->ppscap |= PPS_OFFSETCLEAR; 912 } 913 914 void 915 pps_capture(struct pps_state *pps) 916 { 917 struct timehands *th; 918 919 KASSERT(mutex_owned(&timecounter_lock)); 920 KASSERT(pps != NULL); 921 922 th = timehands; 923 pps->capgen = th->th_generation; 924 pps->capth = th; 925 pps->capcount = (u_int64_t)tc_delta(th) + th->th_offset_count; 926 if (pps->capgen != th->th_generation) 927 pps->capgen = 0; 928 } 929 930 void 931 pps_event(struct pps_state *pps, int event) 932 { 933 struct bintime bt; 934 struct timespec ts, *tsp, *osp; 935 u_int64_t tcount, *pcount; 936 int foff, fhard; 937 pps_seq_t *pseq; 938 939 KASSERT(mutex_owned(&timecounter_lock)); 940 941 KASSERT(pps != NULL); /* XXX ("NULL pps pointer in pps_event") */ 942 /* If the timecounter was wound up underneath us, bail out. */ 943 if (pps->capgen == 0 || pps->capgen != pps->capth->th_generation) 944 return; 945 946 /* Things would be easier with arrays. */ 947 if (event == PPS_CAPTUREASSERT) { 948 tsp = &pps->ppsinfo.assert_timestamp; 949 osp = &pps->ppsparam.assert_offset; 950 foff = pps->ppsparam.mode & PPS_OFFSETASSERT; 951 fhard = pps->kcmode & PPS_CAPTUREASSERT; 952 pcount = &pps->ppscount[0]; 953 pseq = &pps->ppsinfo.assert_sequence; 954 } else { 955 tsp = &pps->ppsinfo.clear_timestamp; 956 osp = &pps->ppsparam.clear_offset; 957 foff = pps->ppsparam.mode & PPS_OFFSETCLEAR; 958 fhard = pps->kcmode & PPS_CAPTURECLEAR; 959 pcount = &pps->ppscount[1]; 960 pseq = &pps->ppsinfo.clear_sequence; 961 } 962 963 /* 964 * If the timecounter changed, we cannot compare the count values, so 965 * we have to drop the rest of the PPS-stuff until the next event. 966 */ 967 if (pps->ppstc != pps->capth->th_counter) { 968 pps->ppstc = pps->capth->th_counter; 969 *pcount = pps->capcount; 970 pps->ppscount[2] = pps->capcount; 971 return; 972 } 973 974 /* Convert the count to a timespec. */ 975 tcount = pps->capcount - pps->capth->th_offset_count; 976 bt = pps->capth->th_offset; 977 bintime_addx(&bt, pps->capth->th_scale * tcount); 978 bintime_add(&bt, &timebasebin); 979 bintime2timespec(&bt, &ts); 980 981 /* If the timecounter was wound up underneath us, bail out. */ 982 if (pps->capgen != pps->capth->th_generation) 983 return; 984 985 *pcount = pps->capcount; 986 (*pseq)++; 987 *tsp = ts; 988 989 if (foff) { 990 timespecadd(tsp, osp, tsp); 991 if (tsp->tv_nsec < 0) { 992 tsp->tv_nsec += 1000000000; 993 tsp->tv_sec -= 1; 994 } 995 } 996 #ifdef PPS_SYNC 997 if (fhard) { 998 u_int64_t scale; 999 1000 /* 1001 * Feed the NTP PLL/FLL. 1002 * The FLL wants to know how many (hardware) nanoseconds 1003 * elapsed since the previous event. 1004 */ 1005 tcount = pps->capcount - pps->ppscount[2]; 1006 pps->ppscount[2] = pps->capcount; 1007 tcount &= pps->capth->th_counter->tc_counter_mask; 1008 scale = (u_int64_t)1 << 63; 1009 scale /= pps->capth->th_counter->tc_frequency; 1010 scale *= 2; 1011 bt.sec = 0; 1012 bt.frac = 0; 1013 bintime_addx(&bt, scale * tcount); 1014 bintime2timespec(&bt, &ts); 1015 hardpps(tsp, ts.tv_nsec + 1000000000 * ts.tv_sec); 1016 } 1017 #endif 1018 } 1019 1020 /* 1021 * Timecounters need to be updated every so often to prevent the hardware 1022 * counter from overflowing. Updating also recalculates the cached values 1023 * used by the get*() family of functions, so their precision depends on 1024 * the update frequency. 1025 */ 1026 1027 static int tc_tick; 1028 1029 void 1030 tc_ticktock(void) 1031 { 1032 static int count; 1033 1034 if (++count < tc_tick) 1035 return; 1036 count = 0; 1037 mutex_spin_enter(&timecounter_lock); 1038 if (timecounter_bad != 0) { 1039 /* An existing timecounter has gone bad, pick a new one. */ 1040 (void)atomic_swap_uint(&timecounter_bad, 0); 1041 if (timecounter->tc_quality < 0) { 1042 tc_pick(); 1043 } 1044 } 1045 tc_windup(); 1046 mutex_spin_exit(&timecounter_lock); 1047 } 1048 1049 void 1050 inittimecounter(void) 1051 { 1052 u_int p; 1053 1054 mutex_init(&timecounter_lock, MUTEX_DEFAULT, IPL_HIGH); 1055 1056 /* 1057 * Set the initial timeout to 1058 * max(1, <approx. number of hardclock ticks in a millisecond>). 1059 * People should probably not use the sysctl to set the timeout 1060 * to smaller than its inital value, since that value is the 1061 * smallest reasonable one. If they want better timestamps they 1062 * should use the non-"get"* functions. 1063 */ 1064 if (hz > 1000) 1065 tc_tick = (hz + 500) / 1000; 1066 else 1067 tc_tick = 1; 1068 p = (tc_tick * 1000000) / hz; 1069 aprint_verbose("timecounter: Timecounters tick every %d.%03u msec\n", 1070 p / 1000, p % 1000); 1071 1072 /* warm up new timecounter (again) and get rolling. */ 1073 (void)timecounter->tc_get_timecount(timecounter); 1074 (void)timecounter->tc_get_timecount(timecounter); 1075 } 1076