1 /* $OpenBSD: kern_tc.c,v 1.34 2018/09/18 20:47:11 bluhm Exp $ */ 2 3 /* 4 * Copyright (c) 2000 Poul-Henning Kamp <phk@FreeBSD.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 19 /* 20 * If we meet some day, and you think this stuff is worth it, you 21 * can buy me a beer in return. Poul-Henning Kamp 22 */ 23 24 #include <sys/param.h> 25 #include <sys/atomic.h> 26 #include <sys/kernel.h> 27 #include <sys/timeout.h> 28 #include <sys/sysctl.h> 29 #include <sys/syslog.h> 30 #include <sys/systm.h> 31 #include <sys/timetc.h> 32 #include <sys/malloc.h> 33 #include <dev/rndvar.h> 34 35 /* 36 * A large step happens on boot. This constant detects such steps. 37 * It is relatively small so that ntp_update_second gets called enough 38 * in the typical 'missed a couple of seconds' case, but doesn't loop 39 * forever when the time step is large. 40 */ 41 #define LARGE_STEP 200 42 43 u_int dummy_get_timecount(struct timecounter *); 44 45 void ntp_update_second(int64_t *); 46 int sysctl_tc_hardware(void *, size_t *, void *, size_t); 47 int sysctl_tc_choice(void *, size_t *, void *, size_t); 48 49 /* 50 * Implement a dummy timecounter which we can use until we get a real one 51 * in the air. This allows the console and other early stuff to use 52 * time services. 53 */ 54 55 u_int 56 dummy_get_timecount(struct timecounter *tc) 57 { 58 static u_int now; 59 60 return (++now); 61 } 62 63 static struct timecounter dummy_timecounter = { 64 dummy_get_timecount, 0, ~0u, 1000000, "dummy", -1000000 65 }; 66 67 struct timehands { 68 /* These fields must be initialized by the driver. */ 69 struct timecounter *th_counter; 70 int64_t th_adjustment; 71 u_int64_t th_scale; 72 u_int th_offset_count; 73 struct bintime th_offset; 74 struct timeval th_microtime; 75 struct timespec th_nanotime; 76 /* Fields not to be copied in tc_windup start with th_generation. */ 77 volatile u_int th_generation; 78 struct timehands *th_next; 79 }; 80 81 static struct timehands th0; 82 static struct timehands th9 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th0}; 83 static struct timehands th8 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th9}; 84 static struct timehands th7 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th8}; 85 static struct timehands th6 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th7}; 86 static struct timehands th5 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th6}; 87 static struct timehands th4 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th5}; 88 static struct timehands th3 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th4}; 89 static struct timehands th2 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th3}; 90 static struct timehands th1 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th2}; 91 static struct timehands th0 = { 92 &dummy_timecounter, 93 0, 94 (uint64_t)-1 / 1000000, 95 0, 96 {1, 0}, 97 {0, 0}, 98 {0, 0}, 99 1, 100 &th1 101 }; 102 103 static struct timehands *volatile timehands = &th0; 104 struct timecounter *timecounter = &dummy_timecounter; 105 static struct timecounter *timecounters = &dummy_timecounter; 106 107 volatile time_t time_second = 1; 108 volatile time_t time_uptime = 0; 109 110 struct bintime naptime; 111 static struct bintime boottimebin; 112 static int timestepwarnings; 113 114 void tc_windup(void); 115 116 /* 117 * Return the difference between the timehands' counter value now and what 118 * was when we copied it to the timehands' offset_count. 119 */ 120 static __inline u_int 121 tc_delta(struct timehands *th) 122 { 123 struct timecounter *tc; 124 125 tc = th->th_counter; 126 return ((tc->tc_get_timecount(tc) - th->th_offset_count) & 127 tc->tc_counter_mask); 128 } 129 130 /* 131 * Functions for reading the time. We have to loop until we are sure that 132 * the timehands that we operated on was not updated under our feet. See 133 * the comment in <sys/time.h> for a description of these 12 functions. 134 */ 135 136 void 137 binuptime(struct bintime *bt) 138 { 139 struct timehands *th; 140 u_int gen; 141 142 do { 143 th = timehands; 144 gen = th->th_generation; 145 membar_consumer(); 146 *bt = th->th_offset; 147 bintime_addx(bt, th->th_scale * tc_delta(th)); 148 membar_consumer(); 149 } while (gen == 0 || gen != th->th_generation); 150 } 151 152 void 153 nanouptime(struct timespec *tsp) 154 { 155 struct bintime bt; 156 157 binuptime(&bt); 158 bintime2timespec(&bt, tsp); 159 } 160 161 void 162 microuptime(struct timeval *tvp) 163 { 164 struct bintime bt; 165 166 binuptime(&bt); 167 bintime2timeval(&bt, tvp); 168 } 169 170 void 171 bintime(struct bintime *bt) 172 { 173 174 binuptime(bt); 175 bintime_add(bt, &boottimebin); 176 } 177 178 void 179 nanotime(struct timespec *tsp) 180 { 181 struct bintime bt; 182 183 bintime(&bt); 184 bintime2timespec(&bt, tsp); 185 } 186 187 void 188 microtime(struct timeval *tvp) 189 { 190 struct bintime bt; 191 192 bintime(&bt); 193 bintime2timeval(&bt, tvp); 194 } 195 196 void 197 getnanouptime(struct timespec *tsp) 198 { 199 struct timehands *th; 200 u_int gen; 201 202 do { 203 th = timehands; 204 gen = th->th_generation; 205 membar_consumer(); 206 bintime2timespec(&th->th_offset, tsp); 207 membar_consumer(); 208 } while (gen == 0 || gen != th->th_generation); 209 } 210 211 void 212 getmicrouptime(struct timeval *tvp) 213 { 214 struct timehands *th; 215 u_int gen; 216 217 do { 218 th = timehands; 219 gen = th->th_generation; 220 membar_consumer(); 221 bintime2timeval(&th->th_offset, tvp); 222 membar_consumer(); 223 } while (gen == 0 || gen != th->th_generation); 224 } 225 226 void 227 getnanotime(struct timespec *tsp) 228 { 229 struct timehands *th; 230 u_int gen; 231 232 do { 233 th = timehands; 234 gen = th->th_generation; 235 membar_consumer(); 236 *tsp = th->th_nanotime; 237 membar_consumer(); 238 } while (gen == 0 || gen != th->th_generation); 239 } 240 241 void 242 getmicrotime(struct timeval *tvp) 243 { 244 struct timehands *th; 245 u_int gen; 246 247 do { 248 th = timehands; 249 gen = th->th_generation; 250 membar_consumer(); 251 *tvp = th->th_microtime; 252 membar_consumer(); 253 } while (gen == 0 || gen != th->th_generation); 254 } 255 256 /* 257 * Initialize a new timecounter and possibly use it. 258 */ 259 void 260 tc_init(struct timecounter *tc) 261 { 262 u_int u; 263 264 u = tc->tc_frequency / tc->tc_counter_mask; 265 /* XXX: We need some margin here, 10% is a guess */ 266 u *= 11; 267 u /= 10; 268 if (tc->tc_quality >= 0) { 269 if (u > hz) { 270 tc->tc_quality = -2000; 271 printf("Timecounter \"%s\" frequency %lu Hz", 272 tc->tc_name, (unsigned long)tc->tc_frequency); 273 printf(" -- Insufficient hz, needs at least %u\n", u); 274 } 275 } 276 277 tc->tc_next = timecounters; 278 timecounters = tc; 279 /* 280 * Never automatically use a timecounter with negative quality. 281 * Even though we run on the dummy counter, switching here may be 282 * worse since this timecounter may not be monotonic. 283 */ 284 if (tc->tc_quality < 0) 285 return; 286 if (tc->tc_quality < timecounter->tc_quality) 287 return; 288 if (tc->tc_quality == timecounter->tc_quality && 289 tc->tc_frequency < timecounter->tc_frequency) 290 return; 291 (void)tc->tc_get_timecount(tc); 292 enqueue_randomness(tc->tc_get_timecount(tc)); 293 294 timecounter = tc; 295 } 296 297 /* Report the frequency of the current timecounter. */ 298 u_int64_t 299 tc_getfrequency(void) 300 { 301 302 return (timehands->th_counter->tc_frequency); 303 } 304 305 /* 306 * Step our concept of UTC, aka the realtime clock. 307 * This is done by modifying our estimate of when we booted. 308 * XXX: not locked. 309 */ 310 void 311 tc_setrealtimeclock(const struct timespec *ts) 312 { 313 struct timespec ts2; 314 struct bintime bt, bt2; 315 316 binuptime(&bt2); 317 timespec2bintime(ts, &bt); 318 bintime_sub(&bt, &bt2); 319 bintime_add(&bt2, &boottimebin); 320 boottimebin = bt; 321 bintime2timespec(&bt, &boottime); 322 enqueue_randomness(ts->tv_sec); 323 324 /* XXX fiddle all the little crinkly bits around the fiords... */ 325 tc_windup(); 326 if (timestepwarnings) { 327 bintime2timespec(&bt2, &ts2); 328 log(LOG_INFO, "Time stepped from %lld.%09ld to %lld.%09ld\n", 329 (long long)ts2.tv_sec, ts2.tv_nsec, 330 (long long)ts->tv_sec, ts->tv_nsec); 331 } 332 } 333 334 /* 335 * Step the monotonic and realtime clocks, triggering any timeouts that 336 * should have occurred across the interval. 337 * XXX: not locked. 338 */ 339 void 340 tc_setclock(const struct timespec *ts) 341 { 342 struct bintime bt, bt2; 343 #ifndef SMALL_KERNEL 344 long long adj_ticks; 345 #endif 346 347 /* 348 * When we're called for the first time, during boot when 349 * the root partition is mounted, boottime is still zero: 350 * we just need to set it. 351 */ 352 if (boottimebin.sec == 0) { 353 tc_setrealtimeclock(ts); 354 return; 355 } 356 357 enqueue_randomness(ts->tv_sec); 358 359 timespec2bintime(ts, &bt); 360 bintime_sub(&bt, &boottimebin); 361 bt2 = timehands->th_offset; 362 timehands->th_offset = bt; 363 364 /* XXX fiddle all the little crinkly bits around the fiords... */ 365 tc_windup(); 366 367 #ifndef SMALL_KERNEL 368 /* convert the bintime to ticks */ 369 bintime_sub(&bt, &bt2); 370 bintime_add(&naptime, &bt); 371 adj_ticks = (uint64_t)hz * bt.sec + 372 (((uint64_t)1000000 * (uint32_t)(bt.frac >> 32)) >> 32) / tick; 373 if (adj_ticks > 0) { 374 if (adj_ticks > INT_MAX) 375 adj_ticks = INT_MAX; 376 timeout_adjust_ticks(adj_ticks); 377 } 378 #endif 379 } 380 381 /* 382 * Initialize the next struct timehands in the ring and make 383 * it the active timehands. Along the way we might switch to a different 384 * timecounter and/or do seconds processing in NTP. Slightly magic. 385 */ 386 void 387 tc_windup(void) 388 { 389 struct bintime bt; 390 struct timehands *th, *tho; 391 u_int64_t scale; 392 u_int delta, ncount, ogen; 393 int i; 394 395 /* 396 * Make the next timehands a copy of the current one, but do not 397 * overwrite the generation or next pointer. While we update 398 * the contents, the generation must be zero. 399 */ 400 tho = timehands; 401 th = tho->th_next; 402 ogen = th->th_generation; 403 th->th_generation = 0; 404 membar_producer(); 405 memcpy(th, tho, offsetof(struct timehands, th_generation)); 406 407 /* 408 * Capture a timecounter delta on the current timecounter and if 409 * changing timecounters, a counter value from the new timecounter. 410 * Update the offset fields accordingly. 411 */ 412 delta = tc_delta(th); 413 if (th->th_counter != timecounter) 414 ncount = timecounter->tc_get_timecount(timecounter); 415 else 416 ncount = 0; 417 th->th_offset_count += delta; 418 th->th_offset_count &= th->th_counter->tc_counter_mask; 419 bintime_addx(&th->th_offset, th->th_scale * delta); 420 421 #ifdef notyet 422 /* 423 * Hardware latching timecounters may not generate interrupts on 424 * PPS events, so instead we poll them. There is a finite risk that 425 * the hardware might capture a count which is later than the one we 426 * got above, and therefore possibly in the next NTP second which might 427 * have a different rate than the current NTP second. It doesn't 428 * matter in practice. 429 */ 430 if (tho->th_counter->tc_poll_pps) 431 tho->th_counter->tc_poll_pps(tho->th_counter); 432 #endif 433 434 /* 435 * Deal with NTP second processing. The for loop normally 436 * iterates at most once, but in extreme situations it might 437 * keep NTP sane if timeouts are not run for several seconds. 438 * At boot, the time step can be large when the TOD hardware 439 * has been read, so on really large steps, we call 440 * ntp_update_second only twice. We need to call it twice in 441 * case we missed a leap second. 442 */ 443 bt = th->th_offset; 444 bintime_add(&bt, &boottimebin); 445 i = bt.sec - tho->th_microtime.tv_sec; 446 if (i > LARGE_STEP) 447 i = 2; 448 for (; i > 0; i--) 449 ntp_update_second(&th->th_adjustment); 450 451 /* Update the UTC timestamps used by the get*() functions. */ 452 /* XXX shouldn't do this here. Should force non-`get' versions. */ 453 bintime2timeval(&bt, &th->th_microtime); 454 bintime2timespec(&bt, &th->th_nanotime); 455 456 /* Now is a good time to change timecounters. */ 457 if (th->th_counter != timecounter) { 458 th->th_counter = timecounter; 459 th->th_offset_count = ncount; 460 } 461 462 /*- 463 * Recalculate the scaling factor. We want the number of 1/2^64 464 * fractions of a second per period of the hardware counter, taking 465 * into account the th_adjustment factor which the NTP PLL/adjtime(2) 466 * processing provides us with. 467 * 468 * The th_adjustment is nanoseconds per second with 32 bit binary 469 * fraction and we want 64 bit binary fraction of second: 470 * 471 * x = a * 2^32 / 10^9 = a * 4.294967296 472 * 473 * The range of th_adjustment is +/- 5000PPM so inside a 64bit int 474 * we can only multiply by about 850 without overflowing, but that 475 * leaves suitably precise fractions for multiply before divide. 476 * 477 * Divide before multiply with a fraction of 2199/512 results in a 478 * systematic undercompensation of 10PPM of th_adjustment. On a 479 * 5000PPM adjustment this is a 0.05PPM error. This is acceptable. 480 * 481 * We happily sacrifice the lowest of the 64 bits of our result 482 * to the goddess of code clarity. 483 * 484 */ 485 scale = (u_int64_t)1 << 63; 486 scale += (th->th_adjustment / 1024) * 2199; 487 scale /= th->th_counter->tc_frequency; 488 th->th_scale = scale * 2; 489 490 /* 491 * Now that the struct timehands is again consistent, set the new 492 * generation number, making sure to not make it zero. 493 */ 494 if (++ogen == 0) 495 ogen = 1; 496 membar_producer(); 497 th->th_generation = ogen; 498 499 /* Go live with the new struct timehands. */ 500 time_second = th->th_microtime.tv_sec; 501 time_uptime = th->th_offset.sec; 502 membar_producer(); 503 timehands = th; 504 } 505 506 /* Report or change the active timecounter hardware. */ 507 int 508 sysctl_tc_hardware(void *oldp, size_t *oldlenp, void *newp, size_t newlen) 509 { 510 char newname[32]; 511 struct timecounter *newtc, *tc; 512 int error; 513 514 tc = timecounter; 515 strlcpy(newname, tc->tc_name, sizeof(newname)); 516 517 error = sysctl_string(oldp, oldlenp, newp, newlen, newname, sizeof(newname)); 518 if (error != 0 || strcmp(newname, tc->tc_name) == 0) 519 return (error); 520 for (newtc = timecounters; newtc != NULL; newtc = newtc->tc_next) { 521 if (strcmp(newname, newtc->tc_name) != 0) 522 continue; 523 524 /* Warm up new timecounter. */ 525 (void)newtc->tc_get_timecount(newtc); 526 (void)newtc->tc_get_timecount(newtc); 527 528 timecounter = newtc; 529 return (0); 530 } 531 return (EINVAL); 532 } 533 534 /* Report or change the active timecounter hardware. */ 535 int 536 sysctl_tc_choice(void *oldp, size_t *oldlenp, void *newp, size_t newlen) 537 { 538 char buf[32], *spc, *choices; 539 struct timecounter *tc; 540 int error, maxlen; 541 542 spc = ""; 543 maxlen = 0; 544 for (tc = timecounters; tc != NULL; tc = tc->tc_next) 545 maxlen += sizeof(buf); 546 choices = malloc(maxlen, M_TEMP, M_WAITOK); 547 *choices = '\0'; 548 for (tc = timecounters; tc != NULL; tc = tc->tc_next) { 549 snprintf(buf, sizeof(buf), "%s%s(%d)", 550 spc, tc->tc_name, tc->tc_quality); 551 spc = " "; 552 strlcat(choices, buf, maxlen); 553 } 554 error = sysctl_rdstring(oldp, oldlenp, newp, choices); 555 free(choices, M_TEMP, maxlen); 556 return (error); 557 } 558 559 /* 560 * Timecounters need to be updated every so often to prevent the hardware 561 * counter from overflowing. Updating also recalculates the cached values 562 * used by the get*() family of functions, so their precision depends on 563 * the update frequency. 564 */ 565 static int tc_tick; 566 567 void 568 tc_ticktock(void) 569 { 570 static int count; 571 572 if (++count < tc_tick) 573 return; 574 count = 0; 575 tc_windup(); 576 } 577 578 void 579 inittimecounter(void) 580 { 581 #ifdef DEBUG 582 u_int p; 583 #endif 584 585 /* 586 * Set the initial timeout to 587 * max(1, <approx. number of hardclock ticks in a millisecond>). 588 * People should probably not use the sysctl to set the timeout 589 * to smaller than its initial value, since that value is the 590 * smallest reasonable one. If they want better timestamps they 591 * should use the non-"get"* functions. 592 */ 593 if (hz > 1000) 594 tc_tick = (hz + 500) / 1000; 595 else 596 tc_tick = 1; 597 #ifdef DEBUG 598 p = (tc_tick * 1000000) / hz; 599 printf("Timecounters tick every %d.%03u msec\n", p / 1000, p % 1000); 600 #endif 601 602 /* warm up new timecounter (again) and get rolling. */ 603 (void)timecounter->tc_get_timecount(timecounter); 604 (void)timecounter->tc_get_timecount(timecounter); 605 } 606 607 /* 608 * Return timecounter-related information. 609 */ 610 int 611 sysctl_tc(int *name, u_int namelen, void *oldp, size_t *oldlenp, 612 void *newp, size_t newlen) 613 { 614 if (namelen != 1) 615 return (ENOTDIR); 616 617 switch (name[0]) { 618 case KERN_TIMECOUNTER_TICK: 619 return (sysctl_rdint(oldp, oldlenp, newp, tc_tick)); 620 case KERN_TIMECOUNTER_TIMESTEPWARNINGS: 621 return (sysctl_int(oldp, oldlenp, newp, newlen, 622 ×tepwarnings)); 623 case KERN_TIMECOUNTER_HARDWARE: 624 return (sysctl_tc_hardware(oldp, oldlenp, newp, newlen)); 625 case KERN_TIMECOUNTER_CHOICE: 626 return (sysctl_tc_choice(oldp, oldlenp, newp, newlen)); 627 default: 628 return (EOPNOTSUPP); 629 } 630 /* NOTREACHED */ 631 } 632 633 void 634 ntp_update_second(int64_t *adjust) 635 { 636 int64_t adj; 637 638 /* Skew time according to any adjtime(2) adjustments. */ 639 if (adjtimedelta > 0) 640 adj = MIN(5000, adjtimedelta); 641 else 642 adj = MAX(-5000, adjtimedelta); 643 adjtimedelta -= adj; 644 *adjust = (adj * 1000) << 32; 645 *adjust += timecounter->tc_freq_adj; 646 } 647 648 int 649 tc_adjfreq(int64_t *old, int64_t *new) 650 { 651 if (old != NULL) { 652 *old = timecounter->tc_freq_adj; 653 } 654 if (new != NULL) { 655 timecounter->tc_freq_adj = *new; 656 } 657 return 0; 658 } 659