1 /* $OpenBSD: kern_tc.c,v 1.31 2017/03/07 20:22:37 dhill Exp $ */ 2 3 /* 4 * Copyright (c) 2000 Poul-Henning Kamp <phk@FreeBSD.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 19 /* 20 * If we meet some day, and you think this stuff is worth it, you 21 * can buy me a beer in return. Poul-Henning Kamp 22 */ 23 24 #include <sys/param.h> 25 #include <sys/kernel.h> 26 #include <sys/timeout.h> 27 #include <sys/sysctl.h> 28 #include <sys/syslog.h> 29 #include <sys/systm.h> 30 #include <sys/timetc.h> 31 #include <sys/malloc.h> 32 #include <dev/rndvar.h> 33 34 /* 35 * A large step happens on boot. This constant detects such steps. 36 * It is relatively small so that ntp_update_second gets called enough 37 * in the typical 'missed a couple of seconds' case, but doesn't loop 38 * forever when the time step is large. 39 */ 40 #define LARGE_STEP 200 41 42 u_int dummy_get_timecount(struct timecounter *); 43 44 void ntp_update_second(int64_t *); 45 int sysctl_tc_hardware(void *, size_t *, void *, size_t); 46 int sysctl_tc_choice(void *, size_t *, void *, size_t); 47 48 /* 49 * Implement a dummy timecounter which we can use until we get a real one 50 * in the air. This allows the console and other early stuff to use 51 * time services. 52 */ 53 54 u_int 55 dummy_get_timecount(struct timecounter *tc) 56 { 57 static u_int now; 58 59 return (++now); 60 } 61 62 static struct timecounter dummy_timecounter = { 63 dummy_get_timecount, 0, ~0u, 1000000, "dummy", -1000000 64 }; 65 66 struct timehands { 67 /* These fields must be initialized by the driver. */ 68 struct timecounter *th_counter; 69 int64_t th_adjustment; 70 u_int64_t th_scale; 71 u_int th_offset_count; 72 struct bintime th_offset; 73 struct timeval th_microtime; 74 struct timespec th_nanotime; 75 /* Fields not to be copied in tc_windup start with th_generation. */ 76 volatile u_int th_generation; 77 struct timehands *th_next; 78 }; 79 80 static struct timehands th0; 81 static struct timehands th9 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th0}; 82 static struct timehands th8 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th9}; 83 static struct timehands th7 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th8}; 84 static struct timehands th6 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th7}; 85 static struct timehands th5 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th6}; 86 static struct timehands th4 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th5}; 87 static struct timehands th3 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th4}; 88 static struct timehands th2 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th3}; 89 static struct timehands th1 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th2}; 90 static struct timehands th0 = { 91 &dummy_timecounter, 92 0, 93 (uint64_t)-1 / 1000000, 94 0, 95 {1, 0}, 96 {0, 0}, 97 {0, 0}, 98 1, 99 &th1 100 }; 101 102 static struct timehands *volatile timehands = &th0; 103 struct timecounter *timecounter = &dummy_timecounter; 104 static struct timecounter *timecounters = &dummy_timecounter; 105 106 volatile time_t time_second = 1; 107 volatile time_t time_uptime = 0; 108 109 struct bintime naptime; 110 static struct bintime boottimebin; 111 static int timestepwarnings; 112 113 void tc_windup(void); 114 115 /* 116 * Return the difference between the timehands' counter value now and what 117 * was when we copied it to the timehands' offset_count. 118 */ 119 static __inline u_int 120 tc_delta(struct timehands *th) 121 { 122 struct timecounter *tc; 123 124 tc = th->th_counter; 125 return ((tc->tc_get_timecount(tc) - th->th_offset_count) & 126 tc->tc_counter_mask); 127 } 128 129 /* 130 * Functions for reading the time. We have to loop until we are sure that 131 * the timehands that we operated on was not updated under our feet. See 132 * the comment in <sys/time.h> for a description of these 12 functions. 133 */ 134 135 void 136 binuptime(struct bintime *bt) 137 { 138 struct timehands *th; 139 u_int gen; 140 141 do { 142 th = timehands; 143 gen = th->th_generation; 144 *bt = th->th_offset; 145 bintime_addx(bt, th->th_scale * tc_delta(th)); 146 } while (gen == 0 || gen != th->th_generation); 147 } 148 149 void 150 nanouptime(struct timespec *tsp) 151 { 152 struct bintime bt; 153 154 binuptime(&bt); 155 bintime2timespec(&bt, tsp); 156 } 157 158 void 159 microuptime(struct timeval *tvp) 160 { 161 struct bintime bt; 162 163 binuptime(&bt); 164 bintime2timeval(&bt, tvp); 165 } 166 167 void 168 bintime(struct bintime *bt) 169 { 170 171 binuptime(bt); 172 bintime_add(bt, &boottimebin); 173 } 174 175 void 176 nanotime(struct timespec *tsp) 177 { 178 struct bintime bt; 179 180 bintime(&bt); 181 bintime2timespec(&bt, tsp); 182 } 183 184 void 185 microtime(struct timeval *tvp) 186 { 187 struct bintime bt; 188 189 bintime(&bt); 190 bintime2timeval(&bt, tvp); 191 } 192 193 void 194 getnanouptime(struct timespec *tsp) 195 { 196 struct timehands *th; 197 u_int gen; 198 199 do { 200 th = timehands; 201 gen = th->th_generation; 202 bintime2timespec(&th->th_offset, tsp); 203 } while (gen == 0 || gen != th->th_generation); 204 } 205 206 void 207 getmicrouptime(struct timeval *tvp) 208 { 209 struct timehands *th; 210 u_int gen; 211 212 do { 213 th = timehands; 214 gen = th->th_generation; 215 bintime2timeval(&th->th_offset, tvp); 216 } while (gen == 0 || gen != th->th_generation); 217 } 218 219 void 220 getnanotime(struct timespec *tsp) 221 { 222 struct timehands *th; 223 u_int gen; 224 225 do { 226 th = timehands; 227 gen = th->th_generation; 228 *tsp = th->th_nanotime; 229 } while (gen == 0 || gen != th->th_generation); 230 } 231 232 void 233 getmicrotime(struct timeval *tvp) 234 { 235 struct timehands *th; 236 u_int gen; 237 238 do { 239 th = timehands; 240 gen = th->th_generation; 241 *tvp = th->th_microtime; 242 } while (gen == 0 || gen != th->th_generation); 243 } 244 245 /* 246 * Initialize a new timecounter and possibly use it. 247 */ 248 void 249 tc_init(struct timecounter *tc) 250 { 251 u_int u; 252 253 u = tc->tc_frequency / tc->tc_counter_mask; 254 /* XXX: We need some margin here, 10% is a guess */ 255 u *= 11; 256 u /= 10; 257 if (tc->tc_quality >= 0) { 258 if (u > hz) { 259 tc->tc_quality = -2000; 260 printf("Timecounter \"%s\" frequency %lu Hz", 261 tc->tc_name, (unsigned long)tc->tc_frequency); 262 printf(" -- Insufficient hz, needs at least %u\n", u); 263 } 264 } 265 266 tc->tc_next = timecounters; 267 timecounters = tc; 268 /* 269 * Never automatically use a timecounter with negative quality. 270 * Even though we run on the dummy counter, switching here may be 271 * worse since this timecounter may not be monotonic. 272 */ 273 if (tc->tc_quality < 0) 274 return; 275 if (tc->tc_quality < timecounter->tc_quality) 276 return; 277 if (tc->tc_quality == timecounter->tc_quality && 278 tc->tc_frequency < timecounter->tc_frequency) 279 return; 280 (void)tc->tc_get_timecount(tc); 281 add_timer_randomness(tc->tc_get_timecount(tc)); 282 283 timecounter = tc; 284 } 285 286 /* Report the frequency of the current timecounter. */ 287 u_int64_t 288 tc_getfrequency(void) 289 { 290 291 return (timehands->th_counter->tc_frequency); 292 } 293 294 /* 295 * Step our concept of UTC, aka the realtime clock. 296 * This is done by modifying our estimate of when we booted. 297 * XXX: not locked. 298 */ 299 void 300 tc_setrealtimeclock(struct timespec *ts) 301 { 302 struct timespec ts2; 303 struct bintime bt, bt2; 304 305 binuptime(&bt2); 306 timespec2bintime(ts, &bt); 307 bintime_sub(&bt, &bt2); 308 bintime_add(&bt2, &boottimebin); 309 boottimebin = bt; 310 bintime2timespec(&bt, &boottime); 311 add_timer_randomness(ts->tv_sec); 312 313 /* XXX fiddle all the little crinkly bits around the fiords... */ 314 tc_windup(); 315 if (timestepwarnings) { 316 bintime2timespec(&bt2, &ts2); 317 log(LOG_INFO, "Time stepped from %lld.%09ld to %lld.%09ld\n", 318 (long long)ts2.tv_sec, ts2.tv_nsec, 319 (long long)ts->tv_sec, ts->tv_nsec); 320 } 321 } 322 323 /* 324 * Step the monotonic and realtime clocks, triggering any timeouts that 325 * should have occurred across the interval. 326 * XXX: not locked. 327 */ 328 void 329 tc_setclock(struct timespec *ts) 330 { 331 struct bintime bt, bt2; 332 #ifndef SMALL_KERNEL 333 long long adj_ticks; 334 #endif 335 336 /* 337 * When we're called for the first time, during boot when 338 * the root partition is mounted, boottime is still zero: 339 * we just need to set it. 340 */ 341 if (boottimebin.sec == 0) { 342 tc_setrealtimeclock(ts); 343 return; 344 } 345 346 add_timer_randomness(ts->tv_sec); 347 348 timespec2bintime(ts, &bt); 349 bintime_sub(&bt, &boottimebin); 350 bt2 = timehands->th_offset; 351 timehands->th_offset = bt; 352 353 /* XXX fiddle all the little crinkly bits around the fiords... */ 354 tc_windup(); 355 356 #ifndef SMALL_KERNEL 357 /* convert the bintime to ticks */ 358 bintime_sub(&bt, &bt2); 359 bintime_add(&naptime, &bt); 360 adj_ticks = (uint64_t)hz * bt.sec + 361 (((uint64_t)1000000 * (uint32_t)(bt.frac >> 32)) >> 32) / tick; 362 if (adj_ticks > 0) { 363 if (adj_ticks > INT_MAX) 364 adj_ticks = INT_MAX; 365 timeout_adjust_ticks(adj_ticks); 366 } 367 #endif 368 } 369 370 /* 371 * Initialize the next struct timehands in the ring and make 372 * it the active timehands. Along the way we might switch to a different 373 * timecounter and/or do seconds processing in NTP. Slightly magic. 374 */ 375 void 376 tc_windup(void) 377 { 378 struct bintime bt; 379 struct timehands *th, *tho; 380 u_int64_t scale; 381 u_int delta, ncount, ogen; 382 int i; 383 384 /* 385 * Make the next timehands a copy of the current one, but do not 386 * overwrite the generation or next pointer. While we update 387 * the contents, the generation must be zero. 388 */ 389 tho = timehands; 390 th = tho->th_next; 391 ogen = th->th_generation; 392 th->th_generation = 0; 393 memcpy(th, tho, offsetof(struct timehands, th_generation)); 394 395 /* 396 * Capture a timecounter delta on the current timecounter and if 397 * changing timecounters, a counter value from the new timecounter. 398 * Update the offset fields accordingly. 399 */ 400 delta = tc_delta(th); 401 if (th->th_counter != timecounter) 402 ncount = timecounter->tc_get_timecount(timecounter); 403 else 404 ncount = 0; 405 th->th_offset_count += delta; 406 th->th_offset_count &= th->th_counter->tc_counter_mask; 407 bintime_addx(&th->th_offset, th->th_scale * delta); 408 409 #ifdef notyet 410 /* 411 * Hardware latching timecounters may not generate interrupts on 412 * PPS events, so instead we poll them. There is a finite risk that 413 * the hardware might capture a count which is later than the one we 414 * got above, and therefore possibly in the next NTP second which might 415 * have a different rate than the current NTP second. It doesn't 416 * matter in practice. 417 */ 418 if (tho->th_counter->tc_poll_pps) 419 tho->th_counter->tc_poll_pps(tho->th_counter); 420 #endif 421 422 /* 423 * Deal with NTP second processing. The for loop normally 424 * iterates at most once, but in extreme situations it might 425 * keep NTP sane if timeouts are not run for several seconds. 426 * At boot, the time step can be large when the TOD hardware 427 * has been read, so on really large steps, we call 428 * ntp_update_second only twice. We need to call it twice in 429 * case we missed a leap second. 430 */ 431 bt = th->th_offset; 432 bintime_add(&bt, &boottimebin); 433 i = bt.sec - tho->th_microtime.tv_sec; 434 if (i > LARGE_STEP) 435 i = 2; 436 for (; i > 0; i--) 437 ntp_update_second(&th->th_adjustment); 438 439 /* Update the UTC timestamps used by the get*() functions. */ 440 /* XXX shouldn't do this here. Should force non-`get' versions. */ 441 bintime2timeval(&bt, &th->th_microtime); 442 bintime2timespec(&bt, &th->th_nanotime); 443 444 /* Now is a good time to change timecounters. */ 445 if (th->th_counter != timecounter) { 446 th->th_counter = timecounter; 447 th->th_offset_count = ncount; 448 } 449 450 /*- 451 * Recalculate the scaling factor. We want the number of 1/2^64 452 * fractions of a second per period of the hardware counter, taking 453 * into account the th_adjustment factor which the NTP PLL/adjtime(2) 454 * processing provides us with. 455 * 456 * The th_adjustment is nanoseconds per second with 32 bit binary 457 * fraction and we want 64 bit binary fraction of second: 458 * 459 * x = a * 2^32 / 10^9 = a * 4.294967296 460 * 461 * The range of th_adjustment is +/- 5000PPM so inside a 64bit int 462 * we can only multiply by about 850 without overflowing, but that 463 * leaves suitably precise fractions for multiply before divide. 464 * 465 * Divide before multiply with a fraction of 2199/512 results in a 466 * systematic undercompensation of 10PPM of th_adjustment. On a 467 * 5000PPM adjustment this is a 0.05PPM error. This is acceptable. 468 * 469 * We happily sacrifice the lowest of the 64 bits of our result 470 * to the goddess of code clarity. 471 * 472 */ 473 scale = (u_int64_t)1 << 63; 474 scale += (th->th_adjustment / 1024) * 2199; 475 scale /= th->th_counter->tc_frequency; 476 th->th_scale = scale * 2; 477 478 /* 479 * Now that the struct timehands is again consistent, set the new 480 * generation number, making sure to not make it zero. 481 */ 482 if (++ogen == 0) 483 ogen = 1; 484 th->th_generation = ogen; 485 486 /* Go live with the new struct timehands. */ 487 time_second = th->th_microtime.tv_sec; 488 time_uptime = th->th_offset.sec; 489 timehands = th; 490 } 491 492 /* Report or change the active timecounter hardware. */ 493 int 494 sysctl_tc_hardware(void *oldp, size_t *oldlenp, void *newp, size_t newlen) 495 { 496 char newname[32]; 497 struct timecounter *newtc, *tc; 498 int error; 499 500 tc = timecounter; 501 strlcpy(newname, tc->tc_name, sizeof(newname)); 502 503 error = sysctl_string(oldp, oldlenp, newp, newlen, newname, sizeof(newname)); 504 if (error != 0 || strcmp(newname, tc->tc_name) == 0) 505 return (error); 506 for (newtc = timecounters; newtc != NULL; newtc = newtc->tc_next) { 507 if (strcmp(newname, newtc->tc_name) != 0) 508 continue; 509 510 /* Warm up new timecounter. */ 511 (void)newtc->tc_get_timecount(newtc); 512 (void)newtc->tc_get_timecount(newtc); 513 514 timecounter = newtc; 515 return (0); 516 } 517 return (EINVAL); 518 } 519 520 /* Report or change the active timecounter hardware. */ 521 int 522 sysctl_tc_choice(void *oldp, size_t *oldlenp, void *newp, size_t newlen) 523 { 524 char buf[32], *spc, *choices; 525 struct timecounter *tc; 526 int error, maxlen; 527 528 spc = ""; 529 maxlen = 0; 530 for (tc = timecounters; tc != NULL; tc = tc->tc_next) 531 maxlen += sizeof(buf); 532 choices = malloc(maxlen, M_TEMP, M_WAITOK); 533 *choices = '\0'; 534 for (tc = timecounters; tc != NULL; tc = tc->tc_next) { 535 snprintf(buf, sizeof(buf), "%s%s(%d)", 536 spc, tc->tc_name, tc->tc_quality); 537 spc = " "; 538 strlcat(choices, buf, maxlen); 539 } 540 error = sysctl_rdstring(oldp, oldlenp, newp, choices); 541 free(choices, M_TEMP, maxlen); 542 return (error); 543 } 544 545 /* 546 * Timecounters need to be updated every so often to prevent the hardware 547 * counter from overflowing. Updating also recalculates the cached values 548 * used by the get*() family of functions, so their precision depends on 549 * the update frequency. 550 */ 551 static int tc_tick; 552 553 void 554 tc_ticktock(void) 555 { 556 static int count; 557 558 if (++count < tc_tick) 559 return; 560 count = 0; 561 tc_windup(); 562 } 563 564 void 565 inittimecounter(void) 566 { 567 #ifdef DEBUG 568 u_int p; 569 #endif 570 571 /* 572 * Set the initial timeout to 573 * max(1, <approx. number of hardclock ticks in a millisecond>). 574 * People should probably not use the sysctl to set the timeout 575 * to smaller than its initial value, since that value is the 576 * smallest reasonable one. If they want better timestamps they 577 * should use the non-"get"* functions. 578 */ 579 if (hz > 1000) 580 tc_tick = (hz + 500) / 1000; 581 else 582 tc_tick = 1; 583 #ifdef DEBUG 584 p = (tc_tick * 1000000) / hz; 585 printf("Timecounters tick every %d.%03u msec\n", p / 1000, p % 1000); 586 #endif 587 588 /* warm up new timecounter (again) and get rolling. */ 589 (void)timecounter->tc_get_timecount(timecounter); 590 (void)timecounter->tc_get_timecount(timecounter); 591 } 592 593 /* 594 * Return timecounter-related information. 595 */ 596 int 597 sysctl_tc(int *name, u_int namelen, void *oldp, size_t *oldlenp, 598 void *newp, size_t newlen) 599 { 600 if (namelen != 1) 601 return (ENOTDIR); 602 603 switch (name[0]) { 604 case KERN_TIMECOUNTER_TICK: 605 return (sysctl_rdint(oldp, oldlenp, newp, tc_tick)); 606 case KERN_TIMECOUNTER_TIMESTEPWARNINGS: 607 return (sysctl_int(oldp, oldlenp, newp, newlen, 608 ×tepwarnings)); 609 case KERN_TIMECOUNTER_HARDWARE: 610 return (sysctl_tc_hardware(oldp, oldlenp, newp, newlen)); 611 case KERN_TIMECOUNTER_CHOICE: 612 return (sysctl_tc_choice(oldp, oldlenp, newp, newlen)); 613 default: 614 return (EOPNOTSUPP); 615 } 616 /* NOTREACHED */ 617 } 618 619 void 620 ntp_update_second(int64_t *adjust) 621 { 622 int64_t adj; 623 624 /* Skew time according to any adjtime(2) adjustments. */ 625 if (adjtimedelta > 0) 626 adj = MIN(5000, adjtimedelta); 627 else 628 adj = MAX(-5000, adjtimedelta); 629 adjtimedelta -= adj; 630 *adjust = (adj * 1000) << 32; 631 *adjust += timecounter->tc_freq_adj; 632 } 633 634 int 635 tc_adjfreq(int64_t *old, int64_t *new) 636 { 637 if (old != NULL) { 638 *old = timecounter->tc_freq_adj; 639 } 640 if (new != NULL) { 641 timecounter->tc_freq_adj = *new; 642 } 643 return 0; 644 } 645