1 /* $OpenBSD: kern_tc.c,v 1.77 2022/08/12 02:20:36 cheloha Exp $ */ 2 3 /* 4 * Copyright (c) 2000 Poul-Henning Kamp <phk@FreeBSD.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 19 /* 20 * If we meet some day, and you think this stuff is worth it, you 21 * can buy me a beer in return. Poul-Henning Kamp 22 */ 23 24 #include <sys/param.h> 25 #include <sys/atomic.h> 26 #include <sys/kernel.h> 27 #include <sys/mutex.h> 28 #include <sys/rwlock.h> 29 #include <sys/stdint.h> 30 #include <sys/timeout.h> 31 #include <sys/sysctl.h> 32 #include <sys/syslog.h> 33 #include <sys/systm.h> 34 #include <sys/timetc.h> 35 #include <sys/queue.h> 36 #include <sys/malloc.h> 37 38 u_int dummy_get_timecount(struct timecounter *); 39 40 int sysctl_tc_hardware(void *, size_t *, void *, size_t); 41 int sysctl_tc_choice(void *, size_t *, void *, size_t); 42 43 /* 44 * Implement a dummy timecounter which we can use until we get a real one 45 * in the air. This allows the console and other early stuff to use 46 * time services. 47 */ 48 49 u_int 50 dummy_get_timecount(struct timecounter *tc) 51 { 52 static u_int now; 53 54 return atomic_inc_int_nv(&now); 55 } 56 57 static struct timecounter dummy_timecounter = { 58 .tc_get_timecount = dummy_get_timecount, 59 .tc_poll_pps = NULL, 60 .tc_counter_mask = ~0u, 61 .tc_frequency = 1000000, 62 .tc_name = "dummy", 63 .tc_quality = -1000000, 64 .tc_priv = NULL, 65 .tc_user = 0, 66 }; 67 68 /* 69 * Locks used to protect struct members, global variables in this file: 70 * I immutable after initialization 71 * T tc_lock 72 * W windup_mtx 73 */ 74 75 struct timehands { 76 /* These fields must be initialized by the driver. */ 77 struct timecounter *th_counter; /* [W] */ 78 int64_t th_adjtimedelta; /* [T,W] */ 79 struct bintime th_next_ntp_update; /* [T,W] */ 80 int64_t th_adjustment; /* [W] */ 81 u_int64_t th_scale; /* [W] */ 82 u_int th_offset_count; /* [W] */ 83 struct bintime th_boottime; /* [T,W] */ 84 struct bintime th_offset; /* [W] */ 85 struct bintime th_naptime; /* [W] */ 86 struct timeval th_microtime; /* [W] */ 87 struct timespec th_nanotime; /* [W] */ 88 /* Fields not to be copied in tc_windup start with th_generation. */ 89 volatile u_int th_generation; /* [W] */ 90 struct timehands *th_next; /* [I] */ 91 }; 92 93 static struct timehands th0; 94 static struct timehands th1 = { 95 .th_next = &th0 96 }; 97 static struct timehands th0 = { 98 .th_counter = &dummy_timecounter, 99 .th_scale = UINT64_MAX / 1000000, 100 .th_offset = { .sec = 1, .frac = 0 }, 101 .th_generation = 1, 102 .th_next = &th1 103 }; 104 105 struct rwlock tc_lock = RWLOCK_INITIALIZER("tc_lock"); 106 107 /* 108 * tc_windup() must be called before leaving this mutex. 109 */ 110 struct mutex windup_mtx = MUTEX_INITIALIZER(IPL_CLOCK); 111 112 static struct timehands *volatile timehands = &th0; /* [W] */ 113 struct timecounter *timecounter = &dummy_timecounter; /* [T] */ 114 static SLIST_HEAD(, timecounter) tc_list = SLIST_HEAD_INITIALIZER(tc_list); 115 116 /* 117 * These are updated from tc_windup(). They are useful when 118 * examining kernel core dumps. 119 */ 120 volatile time_t naptime = 0; 121 volatile time_t time_second = 1; 122 volatile time_t time_uptime = 0; 123 124 static int timestepwarnings; 125 126 void ntp_update_second(struct timehands *); 127 void tc_windup(struct bintime *, struct bintime *, int64_t *); 128 129 /* 130 * Return the difference between the timehands' counter value now and what 131 * was when we copied it to the timehands' offset_count. 132 */ 133 static __inline u_int 134 tc_delta(struct timehands *th) 135 { 136 struct timecounter *tc; 137 138 tc = th->th_counter; 139 return ((tc->tc_get_timecount(tc) - th->th_offset_count) & 140 tc->tc_counter_mask); 141 } 142 143 /* 144 * Functions for reading the time. We have to loop until we are sure that 145 * the timehands that we operated on was not updated under our feet. See 146 * the comment in <sys/time.h> for a description of these functions. 147 */ 148 149 void 150 binboottime(struct bintime *bt) 151 { 152 struct timehands *th; 153 u_int gen; 154 155 do { 156 th = timehands; 157 gen = th->th_generation; 158 membar_consumer(); 159 *bt = th->th_boottime; 160 membar_consumer(); 161 } while (gen == 0 || gen != th->th_generation); 162 } 163 164 void 165 microboottime(struct timeval *tvp) 166 { 167 struct bintime bt; 168 169 binboottime(&bt); 170 BINTIME_TO_TIMEVAL(&bt, tvp); 171 } 172 173 void 174 nanoboottime(struct timespec *tsp) 175 { 176 struct bintime bt; 177 178 binboottime(&bt); 179 BINTIME_TO_TIMESPEC(&bt, tsp); 180 } 181 182 void 183 binuptime(struct bintime *bt) 184 { 185 struct timehands *th; 186 u_int gen; 187 188 do { 189 th = timehands; 190 gen = th->th_generation; 191 membar_consumer(); 192 TIMECOUNT_TO_BINTIME(tc_delta(th), th->th_scale, bt); 193 bintimeadd(bt, &th->th_offset, bt); 194 membar_consumer(); 195 } while (gen == 0 || gen != th->th_generation); 196 } 197 198 void 199 getbinuptime(struct bintime *bt) 200 { 201 struct timehands *th; 202 u_int gen; 203 204 do { 205 th = timehands; 206 gen = th->th_generation; 207 membar_consumer(); 208 *bt = th->th_offset; 209 membar_consumer(); 210 } while (gen == 0 || gen != th->th_generation); 211 } 212 213 void 214 nanouptime(struct timespec *tsp) 215 { 216 struct bintime bt; 217 218 binuptime(&bt); 219 BINTIME_TO_TIMESPEC(&bt, tsp); 220 } 221 222 void 223 microuptime(struct timeval *tvp) 224 { 225 struct bintime bt; 226 227 binuptime(&bt); 228 BINTIME_TO_TIMEVAL(&bt, tvp); 229 } 230 231 time_t 232 getuptime(void) 233 { 234 #if defined(__LP64__) 235 return time_uptime; /* atomic */ 236 #else 237 time_t now; 238 struct timehands *th; 239 u_int gen; 240 241 do { 242 th = timehands; 243 gen = th->th_generation; 244 membar_consumer(); 245 now = th->th_offset.sec; 246 membar_consumer(); 247 } while (gen == 0 || gen != th->th_generation); 248 249 return now; 250 #endif 251 } 252 253 uint64_t 254 nsecuptime(void) 255 { 256 struct bintime bt; 257 258 binuptime(&bt); 259 return BINTIME_TO_NSEC(&bt); 260 } 261 262 uint64_t 263 getnsecuptime(void) 264 { 265 struct bintime bt; 266 267 getbinuptime(&bt); 268 return BINTIME_TO_NSEC(&bt); 269 } 270 271 void 272 binruntime(struct bintime *bt) 273 { 274 struct timehands *th; 275 u_int gen; 276 277 do { 278 th = timehands; 279 gen = th->th_generation; 280 membar_consumer(); 281 TIMECOUNT_TO_BINTIME(tc_delta(th), th->th_scale, bt); 282 bintimeadd(bt, &th->th_offset, bt); 283 bintimesub(bt, &th->th_naptime, bt); 284 membar_consumer(); 285 } while (gen == 0 || gen != th->th_generation); 286 } 287 288 void 289 nanoruntime(struct timespec *ts) 290 { 291 struct bintime bt; 292 293 binruntime(&bt); 294 BINTIME_TO_TIMESPEC(&bt, ts); 295 } 296 297 void 298 bintime(struct bintime *bt) 299 { 300 struct timehands *th; 301 u_int gen; 302 303 do { 304 th = timehands; 305 gen = th->th_generation; 306 membar_consumer(); 307 TIMECOUNT_TO_BINTIME(tc_delta(th), th->th_scale, bt); 308 bintimeadd(bt, &th->th_offset, bt); 309 bintimeadd(bt, &th->th_boottime, bt); 310 membar_consumer(); 311 } while (gen == 0 || gen != th->th_generation); 312 } 313 314 void 315 nanotime(struct timespec *tsp) 316 { 317 struct bintime bt; 318 319 bintime(&bt); 320 BINTIME_TO_TIMESPEC(&bt, tsp); 321 } 322 323 void 324 microtime(struct timeval *tvp) 325 { 326 struct bintime bt; 327 328 bintime(&bt); 329 BINTIME_TO_TIMEVAL(&bt, tvp); 330 } 331 332 time_t 333 gettime(void) 334 { 335 #if defined(__LP64__) 336 return time_second; /* atomic */ 337 #else 338 time_t now; 339 struct timehands *th; 340 u_int gen; 341 342 do { 343 th = timehands; 344 gen = th->th_generation; 345 membar_consumer(); 346 now = th->th_microtime.tv_sec; 347 membar_consumer(); 348 } while (gen == 0 || gen != th->th_generation); 349 350 return now; 351 #endif 352 } 353 354 void 355 getnanouptime(struct timespec *tsp) 356 { 357 struct timehands *th; 358 u_int gen; 359 360 do { 361 th = timehands; 362 gen = th->th_generation; 363 membar_consumer(); 364 BINTIME_TO_TIMESPEC(&th->th_offset, tsp); 365 membar_consumer(); 366 } while (gen == 0 || gen != th->th_generation); 367 } 368 369 void 370 getmicrouptime(struct timeval *tvp) 371 { 372 struct timehands *th; 373 u_int gen; 374 375 do { 376 th = timehands; 377 gen = th->th_generation; 378 membar_consumer(); 379 BINTIME_TO_TIMEVAL(&th->th_offset, tvp); 380 membar_consumer(); 381 } while (gen == 0 || gen != th->th_generation); 382 } 383 384 void 385 getnanotime(struct timespec *tsp) 386 { 387 struct timehands *th; 388 u_int gen; 389 390 do { 391 th = timehands; 392 gen = th->th_generation; 393 membar_consumer(); 394 *tsp = th->th_nanotime; 395 membar_consumer(); 396 } while (gen == 0 || gen != th->th_generation); 397 } 398 399 void 400 getmicrotime(struct timeval *tvp) 401 { 402 struct timehands *th; 403 u_int gen; 404 405 do { 406 th = timehands; 407 gen = th->th_generation; 408 membar_consumer(); 409 *tvp = th->th_microtime; 410 membar_consumer(); 411 } while (gen == 0 || gen != th->th_generation); 412 } 413 414 /* 415 * Initialize a new timecounter and possibly use it. 416 */ 417 void 418 tc_init(struct timecounter *tc) 419 { 420 u_int64_t tmp; 421 u_int u; 422 423 u = tc->tc_frequency / tc->tc_counter_mask; 424 /* XXX: We need some margin here, 10% is a guess */ 425 u *= 11; 426 u /= 10; 427 if (tc->tc_quality >= 0) { 428 if (u > hz) { 429 tc->tc_quality = -2000; 430 printf("Timecounter \"%s\" frequency %lu Hz", 431 tc->tc_name, (unsigned long)tc->tc_frequency); 432 printf(" -- Insufficient hz, needs at least %u\n", u); 433 } 434 } 435 436 /* Determine the counter's precision. */ 437 for (tmp = 1; (tmp & tc->tc_counter_mask) == 0; tmp <<= 1) 438 continue; 439 tc->tc_precision = tmp; 440 441 SLIST_INSERT_HEAD(&tc_list, tc, tc_next); 442 443 /* 444 * Never automatically use a timecounter with negative quality. 445 * Even though we run on the dummy counter, switching here may be 446 * worse since this timecounter may not be monotonic. 447 */ 448 if (tc->tc_quality < 0) 449 return; 450 if (tc->tc_quality < timecounter->tc_quality) 451 return; 452 if (tc->tc_quality == timecounter->tc_quality && 453 tc->tc_frequency < timecounter->tc_frequency) 454 return; 455 (void)tc->tc_get_timecount(tc); 456 enqueue_randomness(tc->tc_get_timecount(tc)); 457 458 timecounter = tc; 459 } 460 461 /* 462 * Change the given timecounter's quality. If it is the active 463 * counter and it is no longer the best counter, activate the 464 * best counter. 465 */ 466 void 467 tc_reset_quality(struct timecounter *tc, int quality) 468 { 469 struct timecounter *best = &dummy_timecounter, *tmp; 470 471 if (tc == &dummy_timecounter) 472 panic("%s: cannot change dummy counter quality", __func__); 473 474 tc->tc_quality = quality; 475 if (timecounter == tc) { 476 SLIST_FOREACH(tmp, &tc_list, tc_next) { 477 if (tmp->tc_quality < 0) 478 continue; 479 if (tmp->tc_quality < best->tc_quality) 480 continue; 481 if (tmp->tc_quality == best->tc_quality && 482 tmp->tc_frequency < best->tc_frequency) 483 continue; 484 best = tmp; 485 } 486 if (best != tc) { 487 enqueue_randomness(best->tc_get_timecount(best)); 488 timecounter = best; 489 } 490 } 491 } 492 493 /* Report the frequency of the current timecounter. */ 494 u_int64_t 495 tc_getfrequency(void) 496 { 497 return (timehands->th_counter->tc_frequency); 498 } 499 500 /* Report the precision of the current timecounter. */ 501 u_int64_t 502 tc_getprecision(void) 503 { 504 return (timehands->th_counter->tc_precision); 505 } 506 507 /* 508 * Step our concept of UTC, aka the realtime clock. 509 * This is done by modifying our estimate of when we booted. 510 * 511 * Any ongoing adjustment is meaningless after a clock jump, 512 * so we zero adjtimedelta here as well. 513 */ 514 void 515 tc_setrealtimeclock(const struct timespec *ts) 516 { 517 struct bintime boottime, old_utc, uptime, utc; 518 struct timespec tmp; 519 int64_t zero = 0; 520 521 TIMESPEC_TO_BINTIME(ts, &utc); 522 523 rw_enter_write(&tc_lock); 524 mtx_enter(&windup_mtx); 525 526 binuptime(&uptime); 527 bintimesub(&utc, &uptime, &boottime); 528 bintimeadd(&timehands->th_boottime, &uptime, &old_utc); 529 /* XXX fiddle all the little crinkly bits around the fiords... */ 530 tc_windup(&boottime, NULL, &zero); 531 532 mtx_leave(&windup_mtx); 533 rw_exit_write(&tc_lock); 534 535 enqueue_randomness(ts->tv_sec); 536 537 if (timestepwarnings) { 538 BINTIME_TO_TIMESPEC(&old_utc, &tmp); 539 log(LOG_INFO, "Time stepped from %lld.%09ld to %lld.%09ld\n", 540 (long long)tmp.tv_sec, tmp.tv_nsec, 541 (long long)ts->tv_sec, ts->tv_nsec); 542 } 543 } 544 545 /* 546 * Step the monotonic and realtime clocks, triggering any timeouts that 547 * should have occurred across the interval. 548 */ 549 void 550 tc_setclock(const struct timespec *ts) 551 { 552 struct bintime new_naptime, old_naptime, uptime, utc; 553 struct timespec tmp; 554 static int first = 1; 555 #ifndef SMALL_KERNEL 556 struct bintime elapsed; 557 long long adj_ticks; 558 #endif 559 560 /* 561 * When we're called for the first time, during boot when 562 * the root partition is mounted, we need to set boottime. 563 */ 564 if (first) { 565 tc_setrealtimeclock(ts); 566 first = 0; 567 return; 568 } 569 570 enqueue_randomness(ts->tv_sec); 571 572 TIMESPEC_TO_BINTIME(ts, &utc); 573 574 mtx_enter(&windup_mtx); 575 576 bintimesub(&utc, &timehands->th_boottime, &uptime); 577 old_naptime = timehands->th_naptime; 578 /* XXX fiddle all the little crinkly bits around the fiords... */ 579 tc_windup(NULL, &uptime, NULL); 580 new_naptime = timehands->th_naptime; 581 582 mtx_leave(&windup_mtx); 583 584 if (bintimecmp(&old_naptime, &new_naptime, ==)) { 585 BINTIME_TO_TIMESPEC(&uptime, &tmp); 586 printf("%s: cannot rewind uptime to %lld.%09ld\n", 587 __func__, (long long)tmp.tv_sec, tmp.tv_nsec); 588 } 589 590 #ifndef SMALL_KERNEL 591 /* convert the bintime to ticks */ 592 bintimesub(&new_naptime, &old_naptime, &elapsed); 593 adj_ticks = BINTIME_TO_NSEC(&elapsed) / tick_nsec; 594 if (adj_ticks > 0) { 595 if (adj_ticks > INT_MAX) 596 adj_ticks = INT_MAX; 597 timeout_adjust_ticks(adj_ticks); 598 } 599 #endif 600 } 601 602 void 603 tc_update_timekeep(void) 604 { 605 static struct timecounter *last_tc = NULL; 606 struct timehands *th; 607 608 MUTEX_ASSERT_LOCKED(&windup_mtx); 609 610 if (timekeep == NULL) 611 return; 612 613 th = timehands; 614 timekeep->tk_generation = 0; 615 membar_producer(); 616 timekeep->tk_scale = th->th_scale; 617 timekeep->tk_offset_count = th->th_offset_count; 618 timekeep->tk_offset = th->th_offset; 619 timekeep->tk_naptime = th->th_naptime; 620 timekeep->tk_boottime = th->th_boottime; 621 if (last_tc != th->th_counter) { 622 timekeep->tk_counter_mask = th->th_counter->tc_counter_mask; 623 timekeep->tk_user = th->th_counter->tc_user; 624 last_tc = th->th_counter; 625 } 626 membar_producer(); 627 timekeep->tk_generation = th->th_generation; 628 629 return; 630 } 631 632 /* 633 * Initialize the next struct timehands in the ring and make 634 * it the active timehands. Along the way we might switch to a different 635 * timecounter and/or do seconds processing in NTP. Slightly magic. 636 */ 637 void 638 tc_windup(struct bintime *new_boottime, struct bintime *new_offset, 639 int64_t *new_adjtimedelta) 640 { 641 struct bintime bt; 642 struct timecounter *active_tc; 643 struct timehands *th, *tho; 644 u_int64_t scale; 645 u_int delta, ncount, ogen; 646 647 if (new_boottime != NULL || new_adjtimedelta != NULL) 648 rw_assert_wrlock(&tc_lock); 649 MUTEX_ASSERT_LOCKED(&windup_mtx); 650 651 active_tc = timecounter; 652 653 /* 654 * Make the next timehands a copy of the current one, but do not 655 * overwrite the generation or next pointer. While we update 656 * the contents, the generation must be zero. 657 */ 658 tho = timehands; 659 ogen = tho->th_generation; 660 th = tho->th_next; 661 th->th_generation = 0; 662 membar_producer(); 663 memcpy(th, tho, offsetof(struct timehands, th_generation)); 664 665 /* 666 * Capture a timecounter delta on the current timecounter and if 667 * changing timecounters, a counter value from the new timecounter. 668 * Update the offset fields accordingly. 669 */ 670 delta = tc_delta(th); 671 if (th->th_counter != active_tc) 672 ncount = active_tc->tc_get_timecount(active_tc); 673 else 674 ncount = 0; 675 th->th_offset_count += delta; 676 th->th_offset_count &= th->th_counter->tc_counter_mask; 677 TIMECOUNT_TO_BINTIME(delta, th->th_scale, &bt); 678 bintimeadd(&th->th_offset, &bt, &th->th_offset); 679 680 /* 681 * Ignore new offsets that predate the current offset. 682 * If changing the offset, first increase the naptime 683 * accordingly. 684 */ 685 if (new_offset != NULL && bintimecmp(&th->th_offset, new_offset, <)) { 686 bintimesub(new_offset, &th->th_offset, &bt); 687 bintimeadd(&th->th_naptime, &bt, &th->th_naptime); 688 naptime = th->th_naptime.sec; 689 th->th_offset = *new_offset; 690 } 691 692 #ifdef notyet 693 /* 694 * Hardware latching timecounters may not generate interrupts on 695 * PPS events, so instead we poll them. There is a finite risk that 696 * the hardware might capture a count which is later than the one we 697 * got above, and therefore possibly in the next NTP second which might 698 * have a different rate than the current NTP second. It doesn't 699 * matter in practice. 700 */ 701 if (tho->th_counter->tc_poll_pps) 702 tho->th_counter->tc_poll_pps(tho->th_counter); 703 #endif 704 705 /* 706 * If changing the boot time or clock adjustment, do so before 707 * NTP processing. 708 */ 709 if (new_boottime != NULL) 710 th->th_boottime = *new_boottime; 711 if (new_adjtimedelta != NULL) { 712 th->th_adjtimedelta = *new_adjtimedelta; 713 /* Reset the NTP update period. */ 714 bintimesub(&th->th_offset, &th->th_naptime, 715 &th->th_next_ntp_update); 716 } 717 718 /* 719 * Deal with NTP second processing. The while-loop normally 720 * iterates at most once, but in extreme situations it might 721 * keep NTP sane if tc_windup() is not run for several seconds. 722 */ 723 bintimesub(&th->th_offset, &th->th_naptime, &bt); 724 while (bintimecmp(&th->th_next_ntp_update, &bt, <=)) { 725 ntp_update_second(th); 726 th->th_next_ntp_update.sec++; 727 } 728 729 /* Update the UTC timestamps used by the get*() functions. */ 730 bintimeadd(&th->th_boottime, &th->th_offset, &bt); 731 BINTIME_TO_TIMEVAL(&bt, &th->th_microtime); 732 BINTIME_TO_TIMESPEC(&bt, &th->th_nanotime); 733 734 /* Now is a good time to change timecounters. */ 735 if (th->th_counter != active_tc) { 736 th->th_counter = active_tc; 737 th->th_offset_count = ncount; 738 } 739 740 /*- 741 * Recalculate the scaling factor. We want the number of 1/2^64 742 * fractions of a second per period of the hardware counter, taking 743 * into account the th_adjustment factor which the NTP PLL/adjtime(2) 744 * processing provides us with. 745 * 746 * The th_adjustment is nanoseconds per second with 32 bit binary 747 * fraction and we want 64 bit binary fraction of second: 748 * 749 * x = a * 2^32 / 10^9 = a * 4.294967296 750 * 751 * The range of th_adjustment is +/- 5000PPM so inside a 64bit int 752 * we can only multiply by about 850 without overflowing, but that 753 * leaves suitably precise fractions for multiply before divide. 754 * 755 * Divide before multiply with a fraction of 2199/512 results in a 756 * systematic undercompensation of 10PPM of th_adjustment. On a 757 * 5000PPM adjustment this is a 0.05PPM error. This is acceptable. 758 * 759 * We happily sacrifice the lowest of the 64 bits of our result 760 * to the goddess of code clarity. 761 * 762 */ 763 scale = (u_int64_t)1 << 63; 764 scale += \ 765 ((th->th_adjustment + th->th_counter->tc_freq_adj) / 1024) * 2199; 766 scale /= th->th_counter->tc_frequency; 767 th->th_scale = scale * 2; 768 769 /* 770 * Now that the struct timehands is again consistent, set the new 771 * generation number, making sure to not make it zero. 772 */ 773 if (++ogen == 0) 774 ogen = 1; 775 membar_producer(); 776 th->th_generation = ogen; 777 778 /* Go live with the new struct timehands. */ 779 time_second = th->th_microtime.tv_sec; 780 time_uptime = th->th_offset.sec; 781 membar_producer(); 782 timehands = th; 783 784 tc_update_timekeep(); 785 } 786 787 /* Report or change the active timecounter hardware. */ 788 int 789 sysctl_tc_hardware(void *oldp, size_t *oldlenp, void *newp, size_t newlen) 790 { 791 char newname[32]; 792 struct timecounter *newtc, *tc; 793 int error; 794 795 tc = timecounter; 796 strlcpy(newname, tc->tc_name, sizeof(newname)); 797 798 error = sysctl_string(oldp, oldlenp, newp, newlen, newname, sizeof(newname)); 799 if (error != 0 || strcmp(newname, tc->tc_name) == 0) 800 return (error); 801 SLIST_FOREACH(newtc, &tc_list, tc_next) { 802 if (strcmp(newname, newtc->tc_name) != 0) 803 continue; 804 805 /* Warm up new timecounter. */ 806 (void)newtc->tc_get_timecount(newtc); 807 (void)newtc->tc_get_timecount(newtc); 808 809 rw_enter_write(&tc_lock); 810 timecounter = newtc; 811 rw_exit_write(&tc_lock); 812 813 return (0); 814 } 815 return (EINVAL); 816 } 817 818 /* Report or change the active timecounter hardware. */ 819 int 820 sysctl_tc_choice(void *oldp, size_t *oldlenp, void *newp, size_t newlen) 821 { 822 char buf[32], *spc, *choices; 823 struct timecounter *tc; 824 int error, maxlen; 825 826 if (SLIST_EMPTY(&tc_list)) 827 return (sysctl_rdstring(oldp, oldlenp, newp, "")); 828 829 spc = ""; 830 maxlen = 0; 831 SLIST_FOREACH(tc, &tc_list, tc_next) 832 maxlen += sizeof(buf); 833 choices = malloc(maxlen, M_TEMP, M_WAITOK); 834 *choices = '\0'; 835 SLIST_FOREACH(tc, &tc_list, tc_next) { 836 snprintf(buf, sizeof(buf), "%s%s(%d)", 837 spc, tc->tc_name, tc->tc_quality); 838 spc = " "; 839 strlcat(choices, buf, maxlen); 840 } 841 error = sysctl_rdstring(oldp, oldlenp, newp, choices); 842 free(choices, M_TEMP, maxlen); 843 return (error); 844 } 845 846 /* 847 * Timecounters need to be updated every so often to prevent the hardware 848 * counter from overflowing. Updating also recalculates the cached values 849 * used by the get*() family of functions, so their precision depends on 850 * the update frequency. 851 */ 852 static int tc_tick; 853 854 void 855 tc_ticktock(void) 856 { 857 static int count; 858 859 if (++count < tc_tick) 860 return; 861 if (!mtx_enter_try(&windup_mtx)) 862 return; 863 count = 0; 864 tc_windup(NULL, NULL, NULL); 865 mtx_leave(&windup_mtx); 866 } 867 868 void 869 inittimecounter(void) 870 { 871 #ifdef DEBUG 872 u_int p; 873 #endif 874 875 /* 876 * Set the initial timeout to 877 * max(1, <approx. number of hardclock ticks in a millisecond>). 878 * People should probably not use the sysctl to set the timeout 879 * to smaller than its initial value, since that value is the 880 * smallest reasonable one. If they want better timestamps they 881 * should use the non-"get"* functions. 882 */ 883 if (hz > 1000) 884 tc_tick = (hz + 500) / 1000; 885 else 886 tc_tick = 1; 887 #ifdef DEBUG 888 p = (tc_tick * 1000000) / hz; 889 printf("Timecounters tick every %d.%03u msec\n", p / 1000, p % 1000); 890 #endif 891 892 /* warm up new timecounter (again) and get rolling. */ 893 (void)timecounter->tc_get_timecount(timecounter); 894 (void)timecounter->tc_get_timecount(timecounter); 895 } 896 897 const struct sysctl_bounded_args tc_vars[] = { 898 { KERN_TIMECOUNTER_TICK, &tc_tick, SYSCTL_INT_READONLY }, 899 { KERN_TIMECOUNTER_TIMESTEPWARNINGS, ×tepwarnings, 0, 1 }, 900 }; 901 902 /* 903 * Return timecounter-related information. 904 */ 905 int 906 sysctl_tc(int *name, u_int namelen, void *oldp, size_t *oldlenp, 907 void *newp, size_t newlen) 908 { 909 if (namelen != 1) 910 return (ENOTDIR); 911 912 switch (name[0]) { 913 case KERN_TIMECOUNTER_HARDWARE: 914 return (sysctl_tc_hardware(oldp, oldlenp, newp, newlen)); 915 case KERN_TIMECOUNTER_CHOICE: 916 return (sysctl_tc_choice(oldp, oldlenp, newp, newlen)); 917 default: 918 return (sysctl_bounded_arr(tc_vars, nitems(tc_vars), name, 919 namelen, oldp, oldlenp, newp, newlen)); 920 } 921 /* NOTREACHED */ 922 } 923 924 /* 925 * Skew the timehands according to any adjtime(2) adjustment. 926 */ 927 void 928 ntp_update_second(struct timehands *th) 929 { 930 int64_t adj; 931 932 MUTEX_ASSERT_LOCKED(&windup_mtx); 933 934 if (th->th_adjtimedelta > 0) 935 adj = MIN(5000, th->th_adjtimedelta); 936 else 937 adj = MAX(-5000, th->th_adjtimedelta); 938 th->th_adjtimedelta -= adj; 939 th->th_adjustment = (adj * 1000) << 32; 940 } 941 942 void 943 tc_adjfreq(int64_t *old, int64_t *new) 944 { 945 if (old != NULL) { 946 rw_assert_anylock(&tc_lock); 947 *old = timecounter->tc_freq_adj; 948 } 949 if (new != NULL) { 950 rw_assert_wrlock(&tc_lock); 951 mtx_enter(&windup_mtx); 952 timecounter->tc_freq_adj = *new; 953 tc_windup(NULL, NULL, NULL); 954 mtx_leave(&windup_mtx); 955 } 956 } 957 958 void 959 tc_adjtime(int64_t *old, int64_t *new) 960 { 961 struct timehands *th; 962 u_int gen; 963 964 if (old != NULL) { 965 do { 966 th = timehands; 967 gen = th->th_generation; 968 membar_consumer(); 969 *old = th->th_adjtimedelta; 970 membar_consumer(); 971 } while (gen == 0 || gen != th->th_generation); 972 } 973 if (new != NULL) { 974 rw_assert_wrlock(&tc_lock); 975 mtx_enter(&windup_mtx); 976 tc_windup(NULL, NULL, new); 977 mtx_leave(&windup_mtx); 978 } 979 } 980