1 /* $OpenBSD: kern_tc.c,v 1.80 2022/12/05 23:18:37 deraadt Exp $ */ 2 3 /* 4 * Copyright (c) 2000 Poul-Henning Kamp <phk@FreeBSD.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 19 /* 20 * If we meet some day, and you think this stuff is worth it, you 21 * can buy me a beer in return. Poul-Henning Kamp 22 */ 23 24 #include <sys/param.h> 25 #include <sys/atomic.h> 26 #include <sys/kernel.h> 27 #include <sys/mutex.h> 28 #include <sys/rwlock.h> 29 #include <sys/stdint.h> 30 #include <sys/timeout.h> 31 #include <sys/sysctl.h> 32 #include <sys/syslog.h> 33 #include <sys/systm.h> 34 #include <sys/timetc.h> 35 #include <sys/queue.h> 36 #include <sys/malloc.h> 37 38 u_int dummy_get_timecount(struct timecounter *); 39 40 int sysctl_tc_hardware(void *, size_t *, void *, size_t); 41 int sysctl_tc_choice(void *, size_t *, void *, size_t); 42 43 /* 44 * Implement a dummy timecounter which we can use until we get a real one 45 * in the air. This allows the console and other early stuff to use 46 * time services. 47 */ 48 49 u_int 50 dummy_get_timecount(struct timecounter *tc) 51 { 52 static u_int now; 53 54 return atomic_inc_int_nv(&now); 55 } 56 57 static struct timecounter dummy_timecounter = { 58 .tc_get_timecount = dummy_get_timecount, 59 .tc_poll_pps = NULL, 60 .tc_counter_mask = ~0u, 61 .tc_frequency = 1000000, 62 .tc_name = "dummy", 63 .tc_quality = -1000000, 64 .tc_priv = NULL, 65 .tc_user = 0, 66 }; 67 68 /* 69 * Locks used to protect struct members, global variables in this file: 70 * I immutable after initialization 71 * T tc_lock 72 * W windup_mtx 73 */ 74 75 struct timehands { 76 /* These fields must be initialized by the driver. */ 77 struct timecounter *th_counter; /* [W] */ 78 int64_t th_adjtimedelta; /* [T,W] */ 79 struct bintime th_next_ntp_update; /* [T,W] */ 80 int64_t th_adjustment; /* [W] */ 81 u_int64_t th_scale; /* [W] */ 82 u_int th_offset_count; /* [W] */ 83 struct bintime th_boottime; /* [T,W] */ 84 struct bintime th_offset; /* [W] */ 85 struct bintime th_naptime; /* [W] */ 86 struct timeval th_microtime; /* [W] */ 87 struct timespec th_nanotime; /* [W] */ 88 /* Fields not to be copied in tc_windup start with th_generation. */ 89 volatile u_int th_generation; /* [W] */ 90 struct timehands *th_next; /* [I] */ 91 }; 92 93 static struct timehands th0; 94 static struct timehands th1 = { 95 .th_next = &th0 96 }; 97 static struct timehands th0 = { 98 .th_counter = &dummy_timecounter, 99 .th_scale = UINT64_MAX / 1000000, 100 .th_offset = { .sec = 1, .frac = 0 }, 101 .th_generation = 1, 102 .th_next = &th1 103 }; 104 105 struct rwlock tc_lock = RWLOCK_INITIALIZER("tc_lock"); 106 107 /* 108 * tc_windup() must be called before leaving this mutex. 109 */ 110 struct mutex windup_mtx = MUTEX_INITIALIZER(IPL_CLOCK); 111 112 static struct timehands *volatile timehands = &th0; /* [W] */ 113 struct timecounter *timecounter = &dummy_timecounter; /* [T] */ 114 static SLIST_HEAD(, timecounter) tc_list = SLIST_HEAD_INITIALIZER(tc_list); 115 116 /* 117 * These are updated from tc_windup(). They are useful when 118 * examining kernel core dumps. 119 */ 120 volatile time_t naptime = 0; 121 volatile time_t time_second = 1; 122 volatile time_t time_uptime = 0; 123 124 static int timestepwarnings; 125 126 void ntp_update_second(struct timehands *); 127 void tc_windup(struct bintime *, struct bintime *, int64_t *); 128 129 /* 130 * Return the difference between the timehands' counter value now and what 131 * was when we copied it to the timehands' offset_count. 132 */ 133 static __inline u_int 134 tc_delta(struct timehands *th) 135 { 136 struct timecounter *tc; 137 138 tc = th->th_counter; 139 return ((tc->tc_get_timecount(tc) - th->th_offset_count) & 140 tc->tc_counter_mask); 141 } 142 143 /* 144 * Functions for reading the time. We have to loop until we are sure that 145 * the timehands that we operated on was not updated under our feet. See 146 * the comment in <sys/time.h> for a description of these functions. 147 */ 148 149 void 150 binboottime(struct bintime *bt) 151 { 152 struct timehands *th; 153 u_int gen; 154 155 do { 156 th = timehands; 157 gen = th->th_generation; 158 membar_consumer(); 159 *bt = th->th_boottime; 160 membar_consumer(); 161 } while (gen == 0 || gen != th->th_generation); 162 } 163 164 void 165 microboottime(struct timeval *tvp) 166 { 167 struct bintime bt; 168 169 binboottime(&bt); 170 BINTIME_TO_TIMEVAL(&bt, tvp); 171 } 172 173 void 174 nanoboottime(struct timespec *tsp) 175 { 176 struct bintime bt; 177 178 binboottime(&bt); 179 BINTIME_TO_TIMESPEC(&bt, tsp); 180 } 181 182 void 183 binuptime(struct bintime *bt) 184 { 185 struct timehands *th; 186 u_int gen; 187 188 do { 189 th = timehands; 190 gen = th->th_generation; 191 membar_consumer(); 192 TIMECOUNT_TO_BINTIME(tc_delta(th), th->th_scale, bt); 193 bintimeadd(bt, &th->th_offset, bt); 194 membar_consumer(); 195 } while (gen == 0 || gen != th->th_generation); 196 } 197 198 void 199 getbinuptime(struct bintime *bt) 200 { 201 struct timehands *th; 202 u_int gen; 203 204 do { 205 th = timehands; 206 gen = th->th_generation; 207 membar_consumer(); 208 *bt = th->th_offset; 209 membar_consumer(); 210 } while (gen == 0 || gen != th->th_generation); 211 } 212 213 void 214 nanouptime(struct timespec *tsp) 215 { 216 struct bintime bt; 217 218 binuptime(&bt); 219 BINTIME_TO_TIMESPEC(&bt, tsp); 220 } 221 222 void 223 microuptime(struct timeval *tvp) 224 { 225 struct bintime bt; 226 227 binuptime(&bt); 228 BINTIME_TO_TIMEVAL(&bt, tvp); 229 } 230 231 time_t 232 getuptime(void) 233 { 234 #if defined(__LP64__) 235 return time_uptime; /* atomic */ 236 #else 237 time_t now; 238 struct timehands *th; 239 u_int gen; 240 241 do { 242 th = timehands; 243 gen = th->th_generation; 244 membar_consumer(); 245 now = th->th_offset.sec; 246 membar_consumer(); 247 } while (gen == 0 || gen != th->th_generation); 248 249 return now; 250 #endif 251 } 252 253 uint64_t 254 nsecuptime(void) 255 { 256 struct bintime bt; 257 258 binuptime(&bt); 259 return BINTIME_TO_NSEC(&bt); 260 } 261 262 uint64_t 263 getnsecuptime(void) 264 { 265 struct bintime bt; 266 267 getbinuptime(&bt); 268 return BINTIME_TO_NSEC(&bt); 269 } 270 271 void 272 binruntime(struct bintime *bt) 273 { 274 struct timehands *th; 275 u_int gen; 276 277 do { 278 th = timehands; 279 gen = th->th_generation; 280 membar_consumer(); 281 TIMECOUNT_TO_BINTIME(tc_delta(th), th->th_scale, bt); 282 bintimeadd(bt, &th->th_offset, bt); 283 bintimesub(bt, &th->th_naptime, bt); 284 membar_consumer(); 285 } while (gen == 0 || gen != th->th_generation); 286 } 287 288 void 289 nanoruntime(struct timespec *ts) 290 { 291 struct bintime bt; 292 293 binruntime(&bt); 294 BINTIME_TO_TIMESPEC(&bt, ts); 295 } 296 297 void 298 bintime(struct bintime *bt) 299 { 300 struct timehands *th; 301 u_int gen; 302 303 do { 304 th = timehands; 305 gen = th->th_generation; 306 membar_consumer(); 307 TIMECOUNT_TO_BINTIME(tc_delta(th), th->th_scale, bt); 308 bintimeadd(bt, &th->th_offset, bt); 309 bintimeadd(bt, &th->th_boottime, bt); 310 membar_consumer(); 311 } while (gen == 0 || gen != th->th_generation); 312 } 313 314 void 315 nanotime(struct timespec *tsp) 316 { 317 struct bintime bt; 318 319 bintime(&bt); 320 BINTIME_TO_TIMESPEC(&bt, tsp); 321 } 322 323 void 324 microtime(struct timeval *tvp) 325 { 326 struct bintime bt; 327 328 bintime(&bt); 329 BINTIME_TO_TIMEVAL(&bt, tvp); 330 } 331 332 time_t 333 gettime(void) 334 { 335 #if defined(__LP64__) 336 return time_second; /* atomic */ 337 #else 338 time_t now; 339 struct timehands *th; 340 u_int gen; 341 342 do { 343 th = timehands; 344 gen = th->th_generation; 345 membar_consumer(); 346 now = th->th_microtime.tv_sec; 347 membar_consumer(); 348 } while (gen == 0 || gen != th->th_generation); 349 350 return now; 351 #endif 352 } 353 354 void 355 getnanouptime(struct timespec *tsp) 356 { 357 struct timehands *th; 358 u_int gen; 359 360 do { 361 th = timehands; 362 gen = th->th_generation; 363 membar_consumer(); 364 BINTIME_TO_TIMESPEC(&th->th_offset, tsp); 365 membar_consumer(); 366 } while (gen == 0 || gen != th->th_generation); 367 } 368 369 void 370 getmicrouptime(struct timeval *tvp) 371 { 372 struct timehands *th; 373 u_int gen; 374 375 do { 376 th = timehands; 377 gen = th->th_generation; 378 membar_consumer(); 379 BINTIME_TO_TIMEVAL(&th->th_offset, tvp); 380 membar_consumer(); 381 } while (gen == 0 || gen != th->th_generation); 382 } 383 384 void 385 getnanotime(struct timespec *tsp) 386 { 387 struct timehands *th; 388 u_int gen; 389 390 do { 391 th = timehands; 392 gen = th->th_generation; 393 membar_consumer(); 394 *tsp = th->th_nanotime; 395 membar_consumer(); 396 } while (gen == 0 || gen != th->th_generation); 397 } 398 399 void 400 getmicrotime(struct timeval *tvp) 401 { 402 struct timehands *th; 403 u_int gen; 404 405 do { 406 th = timehands; 407 gen = th->th_generation; 408 membar_consumer(); 409 *tvp = th->th_microtime; 410 membar_consumer(); 411 } while (gen == 0 || gen != th->th_generation); 412 } 413 414 /* 415 * Initialize a new timecounter and possibly use it. 416 */ 417 void 418 tc_init(struct timecounter *tc) 419 { 420 u_int64_t tmp; 421 u_int u; 422 423 u = tc->tc_frequency / tc->tc_counter_mask; 424 /* XXX: We need some margin here, 10% is a guess */ 425 u *= 11; 426 u /= 10; 427 if (tc->tc_quality >= 0) { 428 if (u > hz) { 429 tc->tc_quality = -2000; 430 printf("Timecounter \"%s\" frequency %lu Hz", 431 tc->tc_name, (unsigned long)tc->tc_frequency); 432 printf(" -- Insufficient hz, needs at least %u\n", u); 433 } 434 } 435 436 /* Determine the counter's precision. */ 437 for (tmp = 1; (tmp & tc->tc_counter_mask) == 0; tmp <<= 1) 438 continue; 439 tc->tc_precision = tmp; 440 441 SLIST_INSERT_HEAD(&tc_list, tc, tc_next); 442 443 /* 444 * Never automatically use a timecounter with negative quality. 445 * Even though we run on the dummy counter, switching here may be 446 * worse since this timecounter may not be monotonic. 447 */ 448 if (tc->tc_quality < 0) 449 return; 450 if (tc->tc_quality < timecounter->tc_quality) 451 return; 452 if (tc->tc_quality == timecounter->tc_quality && 453 tc->tc_frequency < timecounter->tc_frequency) 454 return; 455 (void)tc->tc_get_timecount(tc); 456 enqueue_randomness(tc->tc_get_timecount(tc)); 457 458 timecounter = tc; 459 } 460 461 /* 462 * Change the given timecounter's quality. If it is the active 463 * counter and it is no longer the best counter, activate the 464 * best counter. 465 */ 466 void 467 tc_reset_quality(struct timecounter *tc, int quality) 468 { 469 struct timecounter *best = &dummy_timecounter, *tmp; 470 471 if (tc == &dummy_timecounter) 472 panic("%s: cannot change dummy counter quality", __func__); 473 474 tc->tc_quality = quality; 475 if (timecounter == tc) { 476 SLIST_FOREACH(tmp, &tc_list, tc_next) { 477 if (tmp->tc_quality < 0) 478 continue; 479 if (tmp->tc_quality < best->tc_quality) 480 continue; 481 if (tmp->tc_quality == best->tc_quality && 482 tmp->tc_frequency < best->tc_frequency) 483 continue; 484 best = tmp; 485 } 486 if (best != tc) { 487 enqueue_randomness(best->tc_get_timecount(best)); 488 timecounter = best; 489 printf("timecounter: active counter changed: %s -> %s\n", 490 tc->tc_name, best->tc_name); 491 } 492 } 493 } 494 495 /* Report the frequency of the current timecounter. */ 496 u_int64_t 497 tc_getfrequency(void) 498 { 499 return (timehands->th_counter->tc_frequency); 500 } 501 502 /* Report the precision of the current timecounter. */ 503 u_int64_t 504 tc_getprecision(void) 505 { 506 return (timehands->th_counter->tc_precision); 507 } 508 509 /* 510 * Step our concept of UTC, aka the realtime clock. 511 * This is done by modifying our estimate of when we booted. 512 * 513 * Any ongoing adjustment is meaningless after a clock jump, 514 * so we zero adjtimedelta here as well. 515 */ 516 void 517 tc_setrealtimeclock(const struct timespec *ts) 518 { 519 struct bintime boottime, old_utc, uptime, utc; 520 struct timespec tmp; 521 int64_t zero = 0; 522 523 TIMESPEC_TO_BINTIME(ts, &utc); 524 525 rw_enter_write(&tc_lock); 526 mtx_enter(&windup_mtx); 527 528 binuptime(&uptime); 529 bintimesub(&utc, &uptime, &boottime); 530 bintimeadd(&timehands->th_boottime, &uptime, &old_utc); 531 /* XXX fiddle all the little crinkly bits around the fiords... */ 532 tc_windup(&boottime, NULL, &zero); 533 534 mtx_leave(&windup_mtx); 535 rw_exit_write(&tc_lock); 536 537 enqueue_randomness(ts->tv_sec); 538 539 if (timestepwarnings) { 540 BINTIME_TO_TIMESPEC(&old_utc, &tmp); 541 log(LOG_INFO, "Time stepped from %lld.%09ld to %lld.%09ld\n", 542 (long long)tmp.tv_sec, tmp.tv_nsec, 543 (long long)ts->tv_sec, ts->tv_nsec); 544 } 545 } 546 547 /* 548 * Step the monotonic and realtime clocks, triggering any timeouts that 549 * should have occurred across the interval. 550 */ 551 void 552 tc_setclock(const struct timespec *ts) 553 { 554 struct bintime new_naptime, old_naptime, uptime, utc; 555 static int first = 1; 556 #ifndef SMALL_KERNEL 557 struct bintime elapsed; 558 long long adj_ticks; 559 #endif 560 561 /* 562 * When we're called for the first time, during boot when 563 * the root partition is mounted, we need to set boottime. 564 */ 565 if (first) { 566 tc_setrealtimeclock(ts); 567 first = 0; 568 return; 569 } 570 571 enqueue_randomness(ts->tv_sec); 572 573 TIMESPEC_TO_BINTIME(ts, &utc); 574 575 mtx_enter(&windup_mtx); 576 577 bintimesub(&utc, &timehands->th_boottime, &uptime); 578 old_naptime = timehands->th_naptime; 579 /* XXX fiddle all the little crinkly bits around the fiords... */ 580 tc_windup(NULL, &uptime, NULL); 581 new_naptime = timehands->th_naptime; 582 583 mtx_leave(&windup_mtx); 584 585 #ifndef SMALL_KERNEL 586 /* convert the bintime to ticks */ 587 bintimesub(&new_naptime, &old_naptime, &elapsed); 588 adj_ticks = BINTIME_TO_NSEC(&elapsed) / tick_nsec; 589 if (adj_ticks > 0) { 590 if (adj_ticks > INT_MAX) 591 adj_ticks = INT_MAX; 592 timeout_adjust_ticks(adj_ticks); 593 } 594 #endif 595 } 596 597 void 598 tc_update_timekeep(void) 599 { 600 static struct timecounter *last_tc = NULL; 601 struct timehands *th; 602 603 MUTEX_ASSERT_LOCKED(&windup_mtx); 604 605 if (timekeep == NULL) 606 return; 607 608 th = timehands; 609 timekeep->tk_generation = 0; 610 membar_producer(); 611 timekeep->tk_scale = th->th_scale; 612 timekeep->tk_offset_count = th->th_offset_count; 613 timekeep->tk_offset = th->th_offset; 614 timekeep->tk_naptime = th->th_naptime; 615 timekeep->tk_boottime = th->th_boottime; 616 if (last_tc != th->th_counter) { 617 timekeep->tk_counter_mask = th->th_counter->tc_counter_mask; 618 timekeep->tk_user = th->th_counter->tc_user; 619 last_tc = th->th_counter; 620 } 621 membar_producer(); 622 timekeep->tk_generation = th->th_generation; 623 624 return; 625 } 626 627 /* 628 * Initialize the next struct timehands in the ring and make 629 * it the active timehands. Along the way we might switch to a different 630 * timecounter and/or do seconds processing in NTP. Slightly magic. 631 */ 632 void 633 tc_windup(struct bintime *new_boottime, struct bintime *new_offset, 634 int64_t *new_adjtimedelta) 635 { 636 struct bintime bt; 637 struct timecounter *active_tc; 638 struct timehands *th, *tho; 639 u_int64_t scale; 640 u_int delta, ncount, ogen; 641 642 if (new_boottime != NULL || new_adjtimedelta != NULL) 643 rw_assert_wrlock(&tc_lock); 644 MUTEX_ASSERT_LOCKED(&windup_mtx); 645 646 active_tc = timecounter; 647 648 /* 649 * Make the next timehands a copy of the current one, but do not 650 * overwrite the generation or next pointer. While we update 651 * the contents, the generation must be zero. 652 */ 653 tho = timehands; 654 ogen = tho->th_generation; 655 th = tho->th_next; 656 th->th_generation = 0; 657 membar_producer(); 658 memcpy(th, tho, offsetof(struct timehands, th_generation)); 659 660 /* 661 * Capture a timecounter delta on the current timecounter and if 662 * changing timecounters, a counter value from the new timecounter. 663 * Update the offset fields accordingly. 664 */ 665 delta = tc_delta(th); 666 if (th->th_counter != active_tc) 667 ncount = active_tc->tc_get_timecount(active_tc); 668 else 669 ncount = 0; 670 th->th_offset_count += delta; 671 th->th_offset_count &= th->th_counter->tc_counter_mask; 672 TIMECOUNT_TO_BINTIME(delta, th->th_scale, &bt); 673 bintimeadd(&th->th_offset, &bt, &th->th_offset); 674 675 /* 676 * Ignore new offsets that predate the current offset. 677 * If changing the offset, first increase the naptime 678 * accordingly. 679 */ 680 if (new_offset != NULL && bintimecmp(&th->th_offset, new_offset, <)) { 681 bintimesub(new_offset, &th->th_offset, &bt); 682 bintimeadd(&th->th_naptime, &bt, &th->th_naptime); 683 naptime = th->th_naptime.sec; 684 th->th_offset = *new_offset; 685 } 686 687 #ifdef notyet 688 /* 689 * Hardware latching timecounters may not generate interrupts on 690 * PPS events, so instead we poll them. There is a finite risk that 691 * the hardware might capture a count which is later than the one we 692 * got above, and therefore possibly in the next NTP second which might 693 * have a different rate than the current NTP second. It doesn't 694 * matter in practice. 695 */ 696 if (tho->th_counter->tc_poll_pps) 697 tho->th_counter->tc_poll_pps(tho->th_counter); 698 #endif 699 700 /* 701 * If changing the boot time or clock adjustment, do so before 702 * NTP processing. 703 */ 704 if (new_boottime != NULL) 705 th->th_boottime = *new_boottime; 706 if (new_adjtimedelta != NULL) { 707 th->th_adjtimedelta = *new_adjtimedelta; 708 /* Reset the NTP update period. */ 709 bintimesub(&th->th_offset, &th->th_naptime, 710 &th->th_next_ntp_update); 711 } 712 713 /* 714 * Deal with NTP second processing. The while-loop normally 715 * iterates at most once, but in extreme situations it might 716 * keep NTP sane if tc_windup() is not run for several seconds. 717 */ 718 bintimesub(&th->th_offset, &th->th_naptime, &bt); 719 while (bintimecmp(&th->th_next_ntp_update, &bt, <=)) { 720 ntp_update_second(th); 721 th->th_next_ntp_update.sec++; 722 } 723 724 /* Update the UTC timestamps used by the get*() functions. */ 725 bintimeadd(&th->th_boottime, &th->th_offset, &bt); 726 BINTIME_TO_TIMEVAL(&bt, &th->th_microtime); 727 BINTIME_TO_TIMESPEC(&bt, &th->th_nanotime); 728 729 /* Now is a good time to change timecounters. */ 730 if (th->th_counter != active_tc) { 731 th->th_counter = active_tc; 732 th->th_offset_count = ncount; 733 } 734 735 /*- 736 * Recalculate the scaling factor. We want the number of 1/2^64 737 * fractions of a second per period of the hardware counter, taking 738 * into account the th_adjustment factor which the NTP PLL/adjtime(2) 739 * processing provides us with. 740 * 741 * The th_adjustment is nanoseconds per second with 32 bit binary 742 * fraction and we want 64 bit binary fraction of second: 743 * 744 * x = a * 2^32 / 10^9 = a * 4.294967296 745 * 746 * The range of th_adjustment is +/- 5000PPM so inside a 64bit int 747 * we can only multiply by about 850 without overflowing, but that 748 * leaves suitably precise fractions for multiply before divide. 749 * 750 * Divide before multiply with a fraction of 2199/512 results in a 751 * systematic undercompensation of 10PPM of th_adjustment. On a 752 * 5000PPM adjustment this is a 0.05PPM error. This is acceptable. 753 * 754 * We happily sacrifice the lowest of the 64 bits of our result 755 * to the goddess of code clarity. 756 * 757 */ 758 scale = (u_int64_t)1 << 63; 759 scale += \ 760 ((th->th_adjustment + th->th_counter->tc_freq_adj) / 1024) * 2199; 761 scale /= th->th_counter->tc_frequency; 762 th->th_scale = scale * 2; 763 764 /* 765 * Now that the struct timehands is again consistent, set the new 766 * generation number, making sure to not make it zero. 767 */ 768 if (++ogen == 0) 769 ogen = 1; 770 membar_producer(); 771 th->th_generation = ogen; 772 773 /* Go live with the new struct timehands. */ 774 time_second = th->th_microtime.tv_sec; 775 time_uptime = th->th_offset.sec; 776 membar_producer(); 777 timehands = th; 778 779 tc_update_timekeep(); 780 } 781 782 /* Report or change the active timecounter hardware. */ 783 int 784 sysctl_tc_hardware(void *oldp, size_t *oldlenp, void *newp, size_t newlen) 785 { 786 char newname[32]; 787 struct timecounter *newtc, *tc; 788 int error; 789 790 tc = timecounter; 791 strlcpy(newname, tc->tc_name, sizeof(newname)); 792 793 error = sysctl_string(oldp, oldlenp, newp, newlen, newname, sizeof(newname)); 794 if (error != 0 || strcmp(newname, tc->tc_name) == 0) 795 return (error); 796 SLIST_FOREACH(newtc, &tc_list, tc_next) { 797 if (strcmp(newname, newtc->tc_name) != 0) 798 continue; 799 800 /* Warm up new timecounter. */ 801 (void)newtc->tc_get_timecount(newtc); 802 (void)newtc->tc_get_timecount(newtc); 803 804 rw_enter_write(&tc_lock); 805 timecounter = newtc; 806 rw_exit_write(&tc_lock); 807 808 return (0); 809 } 810 return (EINVAL); 811 } 812 813 /* Report or change the active timecounter hardware. */ 814 int 815 sysctl_tc_choice(void *oldp, size_t *oldlenp, void *newp, size_t newlen) 816 { 817 char buf[32], *spc, *choices; 818 struct timecounter *tc; 819 int error, maxlen; 820 821 if (SLIST_EMPTY(&tc_list)) 822 return (sysctl_rdstring(oldp, oldlenp, newp, "")); 823 824 spc = ""; 825 maxlen = 0; 826 SLIST_FOREACH(tc, &tc_list, tc_next) 827 maxlen += sizeof(buf); 828 choices = malloc(maxlen, M_TEMP, M_WAITOK); 829 *choices = '\0'; 830 SLIST_FOREACH(tc, &tc_list, tc_next) { 831 snprintf(buf, sizeof(buf), "%s%s(%d)", 832 spc, tc->tc_name, tc->tc_quality); 833 spc = " "; 834 strlcat(choices, buf, maxlen); 835 } 836 error = sysctl_rdstring(oldp, oldlenp, newp, choices); 837 free(choices, M_TEMP, maxlen); 838 return (error); 839 } 840 841 /* 842 * Timecounters need to be updated every so often to prevent the hardware 843 * counter from overflowing. Updating also recalculates the cached values 844 * used by the get*() family of functions, so their precision depends on 845 * the update frequency. 846 */ 847 static int tc_tick; 848 849 void 850 tc_ticktock(void) 851 { 852 static int count; 853 854 if (++count < tc_tick) 855 return; 856 if (!mtx_enter_try(&windup_mtx)) 857 return; 858 count = 0; 859 tc_windup(NULL, NULL, NULL); 860 mtx_leave(&windup_mtx); 861 } 862 863 void 864 inittimecounter(void) 865 { 866 #ifdef DEBUG 867 u_int p; 868 #endif 869 870 /* 871 * Set the initial timeout to 872 * max(1, <approx. number of hardclock ticks in a millisecond>). 873 * People should probably not use the sysctl to set the timeout 874 * to smaller than its initial value, since that value is the 875 * smallest reasonable one. If they want better timestamps they 876 * should use the non-"get"* functions. 877 */ 878 if (hz > 1000) 879 tc_tick = (hz + 500) / 1000; 880 else 881 tc_tick = 1; 882 #ifdef DEBUG 883 p = (tc_tick * 1000000) / hz; 884 printf("Timecounters tick every %d.%03u msec\n", p / 1000, p % 1000); 885 #endif 886 887 /* warm up new timecounter (again) and get rolling. */ 888 (void)timecounter->tc_get_timecount(timecounter); 889 (void)timecounter->tc_get_timecount(timecounter); 890 } 891 892 const struct sysctl_bounded_args tc_vars[] = { 893 { KERN_TIMECOUNTER_TICK, &tc_tick, SYSCTL_INT_READONLY }, 894 { KERN_TIMECOUNTER_TIMESTEPWARNINGS, ×tepwarnings, 0, 1 }, 895 }; 896 897 /* 898 * Return timecounter-related information. 899 */ 900 int 901 sysctl_tc(int *name, u_int namelen, void *oldp, size_t *oldlenp, 902 void *newp, size_t newlen) 903 { 904 if (namelen != 1) 905 return (ENOTDIR); 906 907 switch (name[0]) { 908 case KERN_TIMECOUNTER_HARDWARE: 909 return (sysctl_tc_hardware(oldp, oldlenp, newp, newlen)); 910 case KERN_TIMECOUNTER_CHOICE: 911 return (sysctl_tc_choice(oldp, oldlenp, newp, newlen)); 912 default: 913 return (sysctl_bounded_arr(tc_vars, nitems(tc_vars), name, 914 namelen, oldp, oldlenp, newp, newlen)); 915 } 916 /* NOTREACHED */ 917 } 918 919 /* 920 * Skew the timehands according to any adjtime(2) adjustment. 921 */ 922 void 923 ntp_update_second(struct timehands *th) 924 { 925 int64_t adj; 926 927 MUTEX_ASSERT_LOCKED(&windup_mtx); 928 929 if (th->th_adjtimedelta > 0) 930 adj = MIN(5000, th->th_adjtimedelta); 931 else 932 adj = MAX(-5000, th->th_adjtimedelta); 933 th->th_adjtimedelta -= adj; 934 th->th_adjustment = (adj * 1000) << 32; 935 } 936 937 void 938 tc_adjfreq(int64_t *old, int64_t *new) 939 { 940 if (old != NULL) { 941 rw_assert_anylock(&tc_lock); 942 *old = timecounter->tc_freq_adj; 943 } 944 if (new != NULL) { 945 rw_assert_wrlock(&tc_lock); 946 mtx_enter(&windup_mtx); 947 timecounter->tc_freq_adj = *new; 948 tc_windup(NULL, NULL, NULL); 949 mtx_leave(&windup_mtx); 950 } 951 } 952 953 void 954 tc_adjtime(int64_t *old, int64_t *new) 955 { 956 struct timehands *th; 957 u_int gen; 958 959 if (old != NULL) { 960 do { 961 th = timehands; 962 gen = th->th_generation; 963 membar_consumer(); 964 *old = th->th_adjtimedelta; 965 membar_consumer(); 966 } while (gen == 0 || gen != th->th_generation); 967 } 968 if (new != NULL) { 969 rw_assert_wrlock(&tc_lock); 970 mtx_enter(&windup_mtx); 971 tc_windup(NULL, NULL, new); 972 mtx_leave(&windup_mtx); 973 } 974 } 975