1 /* $OpenBSD: kern_tc.c,v 1.62 2020/07/06 13:33:09 pirofti Exp $ */ 2 3 /* 4 * Copyright (c) 2000 Poul-Henning Kamp <phk@FreeBSD.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 19 /* 20 * If we meet some day, and you think this stuff is worth it, you 21 * can buy me a beer in return. Poul-Henning Kamp 22 */ 23 24 #include <sys/param.h> 25 #include <sys/atomic.h> 26 #include <sys/kernel.h> 27 #include <sys/mutex.h> 28 #include <sys/rwlock.h> 29 #include <sys/stdint.h> 30 #include <sys/timeout.h> 31 #include <sys/sysctl.h> 32 #include <sys/syslog.h> 33 #include <sys/systm.h> 34 #include <sys/timetc.h> 35 #include <sys/queue.h> 36 #include <sys/malloc.h> 37 38 /* 39 * A large step happens on boot. This constant detects such steps. 40 * It is relatively small so that ntp_update_second gets called enough 41 * in the typical 'missed a couple of seconds' case, but doesn't loop 42 * forever when the time step is large. 43 */ 44 #define LARGE_STEP 200 45 46 u_int dummy_get_timecount(struct timecounter *); 47 48 int sysctl_tc_hardware(void *, size_t *, void *, size_t); 49 int sysctl_tc_choice(void *, size_t *, void *, size_t); 50 51 /* 52 * Implement a dummy timecounter which we can use until we get a real one 53 * in the air. This allows the console and other early stuff to use 54 * time services. 55 */ 56 57 u_int 58 dummy_get_timecount(struct timecounter *tc) 59 { 60 static u_int now; 61 62 return atomic_inc_int_nv(&now); 63 } 64 65 static struct timecounter dummy_timecounter = { 66 dummy_get_timecount, 0, ~0u, 1000000, "dummy", -1000000, NULL, 0 67 }; 68 69 /* 70 * Locks used to protect struct members, global variables in this file: 71 * I immutable after initialization 72 * T tc_lock 73 * W windup_mtx 74 */ 75 76 struct timehands { 77 /* These fields must be initialized by the driver. */ 78 struct timecounter *th_counter; /* [W] */ 79 int64_t th_adjtimedelta; /* [T,W] */ 80 int64_t th_adjustment; /* [W] */ 81 u_int64_t th_scale; /* [W] */ 82 u_int th_offset_count; /* [W] */ 83 struct bintime th_boottime; /* [T,W] */ 84 struct bintime th_offset; /* [W] */ 85 struct bintime th_naptime; /* [W] */ 86 struct timeval th_microtime; /* [W] */ 87 struct timespec th_nanotime; /* [W] */ 88 /* Fields not to be copied in tc_windup start with th_generation. */ 89 volatile u_int th_generation; /* [W] */ 90 struct timehands *th_next; /* [I] */ 91 }; 92 93 static struct timehands th0; 94 static struct timehands th1 = { 95 .th_next = &th0 96 }; 97 static struct timehands th0 = { 98 .th_counter = &dummy_timecounter, 99 .th_scale = UINT64_MAX / 1000000, 100 .th_offset = { .sec = 1, .frac = 0 }, 101 .th_generation = 1, 102 .th_next = &th1 103 }; 104 105 struct rwlock tc_lock = RWLOCK_INITIALIZER("tc_lock"); 106 107 /* 108 * tc_windup() must be called before leaving this mutex. 109 */ 110 struct mutex windup_mtx = MUTEX_INITIALIZER(IPL_CLOCK); 111 112 static struct timehands *volatile timehands = &th0; /* [W] */ 113 struct timecounter *timecounter = &dummy_timecounter; /* [T] */ 114 static SLIST_HEAD(, timecounter) tc_list = SLIST_HEAD_INITIALIZER(tc_list); 115 116 /* 117 * These are updated from tc_windup(). They are useful when 118 * examining kernel core dumps. 119 */ 120 volatile time_t time_second = 1; 121 volatile time_t time_uptime = 0; 122 123 static int timestepwarnings; 124 125 void ntp_update_second(struct timehands *); 126 void tc_windup(struct bintime *, struct bintime *, int64_t *); 127 128 /* 129 * Return the difference between the timehands' counter value now and what 130 * was when we copied it to the timehands' offset_count. 131 */ 132 static __inline u_int 133 tc_delta(struct timehands *th) 134 { 135 struct timecounter *tc; 136 137 tc = th->th_counter; 138 return ((tc->tc_get_timecount(tc) - th->th_offset_count) & 139 tc->tc_counter_mask); 140 } 141 142 /* 143 * Functions for reading the time. We have to loop until we are sure that 144 * the timehands that we operated on was not updated under our feet. See 145 * the comment in <sys/time.h> for a description of these functions. 146 */ 147 148 void 149 binboottime(struct bintime *bt) 150 { 151 struct timehands *th; 152 u_int gen; 153 154 do { 155 th = timehands; 156 gen = th->th_generation; 157 membar_consumer(); 158 *bt = th->th_boottime; 159 membar_consumer(); 160 } while (gen == 0 || gen != th->th_generation); 161 } 162 163 void 164 microboottime(struct timeval *tvp) 165 { 166 struct bintime bt; 167 168 binboottime(&bt); 169 BINTIME_TO_TIMEVAL(&bt, tvp); 170 } 171 172 void 173 nanoboottime(struct timespec *tsp) 174 { 175 struct bintime bt; 176 177 binboottime(&bt); 178 BINTIME_TO_TIMESPEC(&bt, tsp); 179 } 180 181 void 182 binuptime(struct bintime *bt) 183 { 184 struct timehands *th; 185 u_int gen; 186 187 do { 188 th = timehands; 189 gen = th->th_generation; 190 membar_consumer(); 191 *bt = th->th_offset; 192 bintimeaddfrac(bt, th->th_scale * tc_delta(th), bt); 193 membar_consumer(); 194 } while (gen == 0 || gen != th->th_generation); 195 } 196 197 void 198 nanouptime(struct timespec *tsp) 199 { 200 struct bintime bt; 201 202 binuptime(&bt); 203 BINTIME_TO_TIMESPEC(&bt, tsp); 204 } 205 206 void 207 microuptime(struct timeval *tvp) 208 { 209 struct bintime bt; 210 211 binuptime(&bt); 212 BINTIME_TO_TIMEVAL(&bt, tvp); 213 } 214 215 time_t 216 getuptime(void) 217 { 218 #if defined(__LP64__) 219 return time_uptime; /* atomic */ 220 #else 221 time_t now; 222 struct timehands *th; 223 u_int gen; 224 225 do { 226 th = timehands; 227 gen = th->th_generation; 228 membar_consumer(); 229 now = th->th_offset.sec; 230 membar_consumer(); 231 } while (gen == 0 || gen != th->th_generation); 232 233 return now; 234 #endif 235 } 236 237 void 238 binruntime(struct bintime *bt) 239 { 240 struct timehands *th; 241 u_int gen; 242 243 do { 244 th = timehands; 245 gen = th->th_generation; 246 membar_consumer(); 247 bintimeaddfrac(&th->th_offset, th->th_scale * tc_delta(th), bt); 248 bintimesub(bt, &th->th_naptime, bt); 249 membar_consumer(); 250 } while (gen == 0 || gen != th->th_generation); 251 } 252 253 void 254 nanoruntime(struct timespec *ts) 255 { 256 struct bintime bt; 257 258 binruntime(&bt); 259 BINTIME_TO_TIMESPEC(&bt, ts); 260 } 261 262 void 263 bintime(struct bintime *bt) 264 { 265 struct timehands *th; 266 u_int gen; 267 268 do { 269 th = timehands; 270 gen = th->th_generation; 271 membar_consumer(); 272 *bt = th->th_offset; 273 bintimeaddfrac(bt, th->th_scale * tc_delta(th), bt); 274 bintimeadd(bt, &th->th_boottime, bt); 275 membar_consumer(); 276 } while (gen == 0 || gen != th->th_generation); 277 } 278 279 void 280 nanotime(struct timespec *tsp) 281 { 282 struct bintime bt; 283 284 bintime(&bt); 285 BINTIME_TO_TIMESPEC(&bt, tsp); 286 } 287 288 void 289 microtime(struct timeval *tvp) 290 { 291 struct bintime bt; 292 293 bintime(&bt); 294 BINTIME_TO_TIMEVAL(&bt, tvp); 295 } 296 297 time_t 298 gettime(void) 299 { 300 #if defined(__LP64__) 301 return time_second; /* atomic */ 302 #else 303 time_t now; 304 struct timehands *th; 305 u_int gen; 306 307 do { 308 th = timehands; 309 gen = th->th_generation; 310 membar_consumer(); 311 now = th->th_microtime.tv_sec; 312 membar_consumer(); 313 } while (gen == 0 || gen != th->th_generation); 314 315 return now; 316 #endif 317 } 318 319 void 320 getnanouptime(struct timespec *tsp) 321 { 322 struct timehands *th; 323 u_int gen; 324 325 do { 326 th = timehands; 327 gen = th->th_generation; 328 membar_consumer(); 329 BINTIME_TO_TIMESPEC(&th->th_offset, tsp); 330 membar_consumer(); 331 } while (gen == 0 || gen != th->th_generation); 332 } 333 334 void 335 getmicrouptime(struct timeval *tvp) 336 { 337 struct timehands *th; 338 u_int gen; 339 340 do { 341 th = timehands; 342 gen = th->th_generation; 343 membar_consumer(); 344 BINTIME_TO_TIMEVAL(&th->th_offset, tvp); 345 membar_consumer(); 346 } while (gen == 0 || gen != th->th_generation); 347 } 348 349 void 350 getnanotime(struct timespec *tsp) 351 { 352 struct timehands *th; 353 u_int gen; 354 355 do { 356 th = timehands; 357 gen = th->th_generation; 358 membar_consumer(); 359 *tsp = th->th_nanotime; 360 membar_consumer(); 361 } while (gen == 0 || gen != th->th_generation); 362 } 363 364 void 365 getmicrotime(struct timeval *tvp) 366 { 367 struct timehands *th; 368 u_int gen; 369 370 do { 371 th = timehands; 372 gen = th->th_generation; 373 membar_consumer(); 374 *tvp = th->th_microtime; 375 membar_consumer(); 376 } while (gen == 0 || gen != th->th_generation); 377 } 378 379 /* 380 * Initialize a new timecounter and possibly use it. 381 */ 382 void 383 tc_init(struct timecounter *tc) 384 { 385 u_int64_t tmp; 386 u_int u; 387 388 u = tc->tc_frequency / tc->tc_counter_mask; 389 /* XXX: We need some margin here, 10% is a guess */ 390 u *= 11; 391 u /= 10; 392 if (tc->tc_quality >= 0) { 393 if (u > hz) { 394 tc->tc_quality = -2000; 395 printf("Timecounter \"%s\" frequency %lu Hz", 396 tc->tc_name, (unsigned long)tc->tc_frequency); 397 printf(" -- Insufficient hz, needs at least %u\n", u); 398 } 399 } 400 401 /* Determine the counter's precision. */ 402 for (tmp = 1; (tmp & tc->tc_counter_mask) == 0; tmp <<= 1) 403 continue; 404 tc->tc_precision = tmp; 405 406 SLIST_INSERT_HEAD(&tc_list, tc, tc_next); 407 408 /* 409 * Never automatically use a timecounter with negative quality. 410 * Even though we run on the dummy counter, switching here may be 411 * worse since this timecounter may not be monotonic. 412 */ 413 if (tc->tc_quality < 0) 414 return; 415 if (tc->tc_quality < timecounter->tc_quality) 416 return; 417 if (tc->tc_quality == timecounter->tc_quality && 418 tc->tc_frequency < timecounter->tc_frequency) 419 return; 420 (void)tc->tc_get_timecount(tc); 421 enqueue_randomness(tc->tc_get_timecount(tc)); 422 423 timecounter = tc; 424 } 425 426 /* Report the frequency of the current timecounter. */ 427 u_int64_t 428 tc_getfrequency(void) 429 { 430 return (timehands->th_counter->tc_frequency); 431 } 432 433 /* Report the precision of the current timecounter. */ 434 u_int64_t 435 tc_getprecision(void) 436 { 437 return (timehands->th_counter->tc_precision); 438 } 439 440 /* 441 * Step our concept of UTC, aka the realtime clock. 442 * This is done by modifying our estimate of when we booted. 443 * 444 * Any ongoing adjustment is meaningless after a clock jump, 445 * so we zero adjtimedelta here as well. 446 */ 447 void 448 tc_setrealtimeclock(const struct timespec *ts) 449 { 450 struct timespec ts2; 451 struct bintime bt, bt2; 452 int64_t zero = 0; 453 454 rw_enter_write(&tc_lock); 455 mtx_enter(&windup_mtx); 456 binuptime(&bt2); 457 TIMESPEC_TO_BINTIME(ts, &bt); 458 bintimesub(&bt, &bt2, &bt); 459 bintimeadd(&bt2, &timehands->th_boottime, &bt2); 460 461 /* XXX fiddle all the little crinkly bits around the fiords... */ 462 tc_windup(&bt, NULL, &zero); 463 mtx_leave(&windup_mtx); 464 rw_exit_write(&tc_lock); 465 466 enqueue_randomness(ts->tv_sec); 467 468 if (timestepwarnings) { 469 BINTIME_TO_TIMESPEC(&bt2, &ts2); 470 log(LOG_INFO, "Time stepped from %lld.%09ld to %lld.%09ld\n", 471 (long long)ts2.tv_sec, ts2.tv_nsec, 472 (long long)ts->tv_sec, ts->tv_nsec); 473 } 474 } 475 476 /* 477 * Step the monotonic and realtime clocks, triggering any timeouts that 478 * should have occurred across the interval. 479 */ 480 void 481 tc_setclock(const struct timespec *ts) 482 { 483 struct bintime bt, old_naptime, naptime; 484 struct timespec earlier; 485 static int first = 1; 486 #ifndef SMALL_KERNEL 487 long long adj_ticks; 488 #endif 489 490 /* 491 * When we're called for the first time, during boot when 492 * the root partition is mounted, we need to set boottime. 493 */ 494 if (first) { 495 tc_setrealtimeclock(ts); 496 first = 0; 497 return; 498 } 499 500 enqueue_randomness(ts->tv_sec); 501 502 mtx_enter(&windup_mtx); 503 TIMESPEC_TO_BINTIME(ts, &bt); 504 bintimesub(&bt, &timehands->th_boottime, &bt); 505 old_naptime = timehands->th_naptime; 506 /* XXX fiddle all the little crinkly bits around the fiords... */ 507 tc_windup(NULL, &bt, NULL); 508 naptime = timehands->th_naptime; 509 mtx_leave(&windup_mtx); 510 511 if (bintimecmp(&old_naptime, &naptime, ==)) { 512 BINTIME_TO_TIMESPEC(&bt, &earlier); 513 printf("%s: cannot rewind uptime to %lld.%09ld\n", 514 __func__, (long long)earlier.tv_sec, earlier.tv_nsec); 515 } 516 517 #ifndef SMALL_KERNEL 518 /* convert the bintime to ticks */ 519 bintimesub(&naptime, &old_naptime, &bt); 520 adj_ticks = (uint64_t)hz * bt.sec + 521 (((uint64_t)1000000 * (uint32_t)(bt.frac >> 32)) >> 32) / tick; 522 if (adj_ticks > 0) { 523 if (adj_ticks > INT_MAX) 524 adj_ticks = INT_MAX; 525 timeout_adjust_ticks(adj_ticks); 526 } 527 #endif 528 } 529 530 void 531 tc_update_timekeep(void) 532 { 533 static struct timecounter *last_tc = NULL; 534 struct timehands *th; 535 536 if (timekeep == NULL) 537 return; 538 539 th = timehands; 540 timekeep->tk_generation = 0; 541 membar_producer(); 542 timekeep->tk_scale = th->th_scale; 543 timekeep->tk_offset_count = th->th_offset_count; 544 timekeep->tk_offset = th->th_offset; 545 timekeep->tk_naptime = th->th_naptime; 546 timekeep->tk_boottime = th->th_boottime; 547 if (last_tc != th->th_counter) { 548 timekeep->tk_counter_mask = th->th_counter->tc_counter_mask; 549 timekeep->tk_user = th->th_counter->tc_user; 550 last_tc = th->th_counter; 551 } 552 membar_producer(); 553 timekeep->tk_generation = th->th_generation; 554 555 return; 556 } 557 558 /* 559 * Initialize the next struct timehands in the ring and make 560 * it the active timehands. Along the way we might switch to a different 561 * timecounter and/or do seconds processing in NTP. Slightly magic. 562 */ 563 void 564 tc_windup(struct bintime *new_boottime, struct bintime *new_offset, 565 int64_t *new_adjtimedelta) 566 { 567 struct bintime bt; 568 struct timecounter *active_tc; 569 struct timehands *th, *tho; 570 u_int64_t scale; 571 u_int delta, ncount, ogen; 572 int i; 573 574 if (new_boottime != NULL || new_adjtimedelta != NULL) 575 rw_assert_wrlock(&tc_lock); 576 MUTEX_ASSERT_LOCKED(&windup_mtx); 577 578 active_tc = timecounter; 579 580 /* 581 * Make the next timehands a copy of the current one, but do not 582 * overwrite the generation or next pointer. While we update 583 * the contents, the generation must be zero. 584 */ 585 tho = timehands; 586 th = tho->th_next; 587 ogen = th->th_generation; 588 th->th_generation = 0; 589 membar_producer(); 590 memcpy(th, tho, offsetof(struct timehands, th_generation)); 591 592 /* 593 * Capture a timecounter delta on the current timecounter and if 594 * changing timecounters, a counter value from the new timecounter. 595 * Update the offset fields accordingly. 596 */ 597 delta = tc_delta(th); 598 if (th->th_counter != active_tc) 599 ncount = active_tc->tc_get_timecount(active_tc); 600 else 601 ncount = 0; 602 th->th_offset_count += delta; 603 th->th_offset_count &= th->th_counter->tc_counter_mask; 604 bintimeaddfrac(&th->th_offset, th->th_scale * delta, &th->th_offset); 605 606 /* 607 * Ignore new offsets that predate the current offset. 608 * If changing the offset, first increase the naptime 609 * accordingly. 610 */ 611 if (new_offset != NULL && bintimecmp(&th->th_offset, new_offset, <)) { 612 bintimesub(new_offset, &th->th_offset, &bt); 613 bintimeadd(&th->th_naptime, &bt, &th->th_naptime); 614 th->th_offset = *new_offset; 615 } 616 617 #ifdef notyet 618 /* 619 * Hardware latching timecounters may not generate interrupts on 620 * PPS events, so instead we poll them. There is a finite risk that 621 * the hardware might capture a count which is later than the one we 622 * got above, and therefore possibly in the next NTP second which might 623 * have a different rate than the current NTP second. It doesn't 624 * matter in practice. 625 */ 626 if (tho->th_counter->tc_poll_pps) 627 tho->th_counter->tc_poll_pps(tho->th_counter); 628 #endif 629 630 /* 631 * If changing the boot time or clock adjustment, do so before 632 * NTP processing. 633 */ 634 if (new_boottime != NULL) 635 th->th_boottime = *new_boottime; 636 if (new_adjtimedelta != NULL) 637 th->th_adjtimedelta = *new_adjtimedelta; 638 639 /* 640 * Deal with NTP second processing. The for loop normally 641 * iterates at most once, but in extreme situations it might 642 * keep NTP sane if timeouts are not run for several seconds. 643 * At boot, the time step can be large when the TOD hardware 644 * has been read, so on really large steps, we call 645 * ntp_update_second only twice. We need to call it twice in 646 * case we missed a leap second. 647 */ 648 bt = th->th_offset; 649 bintimeadd(&bt, &th->th_boottime, &bt); 650 i = bt.sec - tho->th_microtime.tv_sec; 651 if (i > LARGE_STEP) 652 i = 2; 653 for (; i > 0; i--) 654 ntp_update_second(th); 655 656 /* Update the UTC timestamps used by the get*() functions. */ 657 /* XXX shouldn't do this here. Should force non-`get' versions. */ 658 BINTIME_TO_TIMEVAL(&bt, &th->th_microtime); 659 BINTIME_TO_TIMESPEC(&bt, &th->th_nanotime); 660 661 /* Now is a good time to change timecounters. */ 662 if (th->th_counter != active_tc) { 663 th->th_counter = active_tc; 664 th->th_offset_count = ncount; 665 } 666 667 /*- 668 * Recalculate the scaling factor. We want the number of 1/2^64 669 * fractions of a second per period of the hardware counter, taking 670 * into account the th_adjustment factor which the NTP PLL/adjtime(2) 671 * processing provides us with. 672 * 673 * The th_adjustment is nanoseconds per second with 32 bit binary 674 * fraction and we want 64 bit binary fraction of second: 675 * 676 * x = a * 2^32 / 10^9 = a * 4.294967296 677 * 678 * The range of th_adjustment is +/- 5000PPM so inside a 64bit int 679 * we can only multiply by about 850 without overflowing, but that 680 * leaves suitably precise fractions for multiply before divide. 681 * 682 * Divide before multiply with a fraction of 2199/512 results in a 683 * systematic undercompensation of 10PPM of th_adjustment. On a 684 * 5000PPM adjustment this is a 0.05PPM error. This is acceptable. 685 * 686 * We happily sacrifice the lowest of the 64 bits of our result 687 * to the goddess of code clarity. 688 * 689 */ 690 scale = (u_int64_t)1 << 63; 691 scale += \ 692 ((th->th_adjustment + th->th_counter->tc_freq_adj) / 1024) * 2199; 693 scale /= th->th_counter->tc_frequency; 694 th->th_scale = scale * 2; 695 696 /* 697 * Now that the struct timehands is again consistent, set the new 698 * generation number, making sure to not make it zero. 699 */ 700 if (++ogen == 0) 701 ogen = 1; 702 membar_producer(); 703 th->th_generation = ogen; 704 705 /* Go live with the new struct timehands. */ 706 time_second = th->th_microtime.tv_sec; 707 time_uptime = th->th_offset.sec; 708 membar_producer(); 709 timehands = th; 710 711 tc_update_timekeep(); 712 } 713 714 /* Report or change the active timecounter hardware. */ 715 int 716 sysctl_tc_hardware(void *oldp, size_t *oldlenp, void *newp, size_t newlen) 717 { 718 char newname[32]; 719 struct timecounter *newtc, *tc; 720 int error; 721 722 tc = timecounter; 723 strlcpy(newname, tc->tc_name, sizeof(newname)); 724 725 error = sysctl_string(oldp, oldlenp, newp, newlen, newname, sizeof(newname)); 726 if (error != 0 || strcmp(newname, tc->tc_name) == 0) 727 return (error); 728 SLIST_FOREACH(newtc, &tc_list, tc_next) { 729 if (strcmp(newname, newtc->tc_name) != 0) 730 continue; 731 732 /* Warm up new timecounter. */ 733 (void)newtc->tc_get_timecount(newtc); 734 (void)newtc->tc_get_timecount(newtc); 735 736 rw_enter_write(&tc_lock); 737 timecounter = newtc; 738 rw_exit_write(&tc_lock); 739 740 return (0); 741 } 742 return (EINVAL); 743 } 744 745 /* Report or change the active timecounter hardware. */ 746 int 747 sysctl_tc_choice(void *oldp, size_t *oldlenp, void *newp, size_t newlen) 748 { 749 char buf[32], *spc, *choices; 750 struct timecounter *tc; 751 int error, maxlen; 752 753 if (SLIST_EMPTY(&tc_list)) 754 return (sysctl_rdstring(oldp, oldlenp, newp, "")); 755 756 spc = ""; 757 maxlen = 0; 758 SLIST_FOREACH(tc, &tc_list, tc_next) 759 maxlen += sizeof(buf); 760 choices = malloc(maxlen, M_TEMP, M_WAITOK); 761 *choices = '\0'; 762 SLIST_FOREACH(tc, &tc_list, tc_next) { 763 snprintf(buf, sizeof(buf), "%s%s(%d)", 764 spc, tc->tc_name, tc->tc_quality); 765 spc = " "; 766 strlcat(choices, buf, maxlen); 767 } 768 error = sysctl_rdstring(oldp, oldlenp, newp, choices); 769 free(choices, M_TEMP, maxlen); 770 return (error); 771 } 772 773 /* 774 * Timecounters need to be updated every so often to prevent the hardware 775 * counter from overflowing. Updating also recalculates the cached values 776 * used by the get*() family of functions, so their precision depends on 777 * the update frequency. 778 */ 779 static int tc_tick; 780 781 void 782 tc_ticktock(void) 783 { 784 static int count; 785 786 if (++count < tc_tick) 787 return; 788 if (!mtx_enter_try(&windup_mtx)) 789 return; 790 count = 0; 791 tc_windup(NULL, NULL, NULL); 792 mtx_leave(&windup_mtx); 793 } 794 795 void 796 inittimecounter(void) 797 { 798 #ifdef DEBUG 799 u_int p; 800 #endif 801 802 /* 803 * Set the initial timeout to 804 * max(1, <approx. number of hardclock ticks in a millisecond>). 805 * People should probably not use the sysctl to set the timeout 806 * to smaller than its initial value, since that value is the 807 * smallest reasonable one. If they want better timestamps they 808 * should use the non-"get"* functions. 809 */ 810 if (hz > 1000) 811 tc_tick = (hz + 500) / 1000; 812 else 813 tc_tick = 1; 814 #ifdef DEBUG 815 p = (tc_tick * 1000000) / hz; 816 printf("Timecounters tick every %d.%03u msec\n", p / 1000, p % 1000); 817 #endif 818 819 /* warm up new timecounter (again) and get rolling. */ 820 (void)timecounter->tc_get_timecount(timecounter); 821 (void)timecounter->tc_get_timecount(timecounter); 822 } 823 824 /* 825 * Return timecounter-related information. 826 */ 827 int 828 sysctl_tc(int *name, u_int namelen, void *oldp, size_t *oldlenp, 829 void *newp, size_t newlen) 830 { 831 if (namelen != 1) 832 return (ENOTDIR); 833 834 switch (name[0]) { 835 case KERN_TIMECOUNTER_TICK: 836 return (sysctl_rdint(oldp, oldlenp, newp, tc_tick)); 837 case KERN_TIMECOUNTER_TIMESTEPWARNINGS: 838 return (sysctl_int(oldp, oldlenp, newp, newlen, 839 ×tepwarnings)); 840 case KERN_TIMECOUNTER_HARDWARE: 841 return (sysctl_tc_hardware(oldp, oldlenp, newp, newlen)); 842 case KERN_TIMECOUNTER_CHOICE: 843 return (sysctl_tc_choice(oldp, oldlenp, newp, newlen)); 844 default: 845 return (EOPNOTSUPP); 846 } 847 /* NOTREACHED */ 848 } 849 850 /* 851 * Skew the timehands according to any adjtime(2) adjustment. 852 */ 853 void 854 ntp_update_second(struct timehands *th) 855 { 856 int64_t adj; 857 858 MUTEX_ASSERT_LOCKED(&windup_mtx); 859 860 if (th->th_adjtimedelta > 0) 861 adj = MIN(5000, th->th_adjtimedelta); 862 else 863 adj = MAX(-5000, th->th_adjtimedelta); 864 th->th_adjtimedelta -= adj; 865 th->th_adjustment = (adj * 1000) << 32; 866 } 867 868 void 869 tc_adjfreq(int64_t *old, int64_t *new) 870 { 871 if (old != NULL) { 872 rw_assert_anylock(&tc_lock); 873 *old = timecounter->tc_freq_adj; 874 } 875 if (new != NULL) { 876 rw_assert_wrlock(&tc_lock); 877 mtx_enter(&windup_mtx); 878 timecounter->tc_freq_adj = *new; 879 tc_windup(NULL, NULL, NULL); 880 mtx_leave(&windup_mtx); 881 } 882 } 883 884 void 885 tc_adjtime(int64_t *old, int64_t *new) 886 { 887 struct timehands *th; 888 u_int gen; 889 890 if (old != NULL) { 891 do { 892 th = timehands; 893 gen = th->th_generation; 894 membar_consumer(); 895 *old = th->th_adjtimedelta; 896 membar_consumer(); 897 } while (gen == 0 || gen != th->th_generation); 898 } 899 if (new != NULL) { 900 rw_assert_wrlock(&tc_lock); 901 mtx_enter(&windup_mtx); 902 tc_windup(NULL, NULL, new); 903 mtx_leave(&windup_mtx); 904 } 905 } 906