1 /* $OpenBSD: kern_tc.c,v 1.29 2016/07/06 15:53:01 tedu Exp $ */ 2 3 /* 4 * Copyright (c) 2000 Poul-Henning Kamp <phk@FreeBSD.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 19 /* 20 * If we meet some day, and you think this stuff is worth it, you 21 * can buy me a beer in return. Poul-Henning Kamp 22 */ 23 24 #include <sys/param.h> 25 #include <sys/kernel.h> 26 #include <sys/timeout.h> 27 #include <sys/sysctl.h> 28 #include <sys/syslog.h> 29 #include <sys/systm.h> 30 #include <sys/timetc.h> 31 #include <sys/malloc.h> 32 #include <dev/rndvar.h> 33 34 /* 35 * A large step happens on boot. This constant detects such steps. 36 * It is relatively small so that ntp_update_second gets called enough 37 * in the typical 'missed a couple of seconds' case, but doesn't loop 38 * forever when the time step is large. 39 */ 40 #define LARGE_STEP 200 41 42 u_int dummy_get_timecount(struct timecounter *); 43 44 void ntp_update_second(int64_t *, time_t *); 45 int sysctl_tc_hardware(void *, size_t *, void *, size_t); 46 int sysctl_tc_choice(void *, size_t *, void *, size_t); 47 48 /* 49 * Implement a dummy timecounter which we can use until we get a real one 50 * in the air. This allows the console and other early stuff to use 51 * time services. 52 */ 53 54 u_int 55 dummy_get_timecount(struct timecounter *tc) 56 { 57 static u_int now; 58 59 return (++now); 60 } 61 62 static struct timecounter dummy_timecounter = { 63 dummy_get_timecount, 0, ~0u, 1000000, "dummy", -1000000 64 }; 65 66 struct timehands { 67 /* These fields must be initialized by the driver. */ 68 struct timecounter *th_counter; 69 int64_t th_adjustment; 70 u_int64_t th_scale; 71 u_int th_offset_count; 72 struct bintime th_offset; 73 struct timeval th_microtime; 74 struct timespec th_nanotime; 75 /* Fields not to be copied in tc_windup start with th_generation. */ 76 volatile u_int th_generation; 77 struct timehands *th_next; 78 }; 79 80 static struct timehands th0; 81 static struct timehands th9 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th0}; 82 static struct timehands th8 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th9}; 83 static struct timehands th7 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th8}; 84 static struct timehands th6 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th7}; 85 static struct timehands th5 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th6}; 86 static struct timehands th4 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th5}; 87 static struct timehands th3 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th4}; 88 static struct timehands th2 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th3}; 89 static struct timehands th1 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th2}; 90 static struct timehands th0 = { 91 &dummy_timecounter, 92 0, 93 (uint64_t)-1 / 1000000, 94 0, 95 {1, 0}, 96 {0, 0}, 97 {0, 0}, 98 1, 99 &th1 100 }; 101 102 static struct timehands *volatile timehands = &th0; 103 struct timecounter *timecounter = &dummy_timecounter; 104 static struct timecounter *timecounters = &dummy_timecounter; 105 106 volatile time_t time_second = 1; 107 volatile time_t time_uptime = 0; 108 109 struct bintime naptime; 110 static struct bintime boottimebin; 111 static int timestepwarnings; 112 113 void tc_windup(void); 114 115 /* 116 * Return the difference between the timehands' counter value now and what 117 * was when we copied it to the timehands' offset_count. 118 */ 119 static __inline u_int 120 tc_delta(struct timehands *th) 121 { 122 struct timecounter *tc; 123 124 tc = th->th_counter; 125 return ((tc->tc_get_timecount(tc) - th->th_offset_count) & 126 tc->tc_counter_mask); 127 } 128 129 /* 130 * Functions for reading the time. We have to loop until we are sure that 131 * the timehands that we operated on was not updated under our feet. See 132 * the comment in <sys/time.h> for a description of these 12 functions. 133 */ 134 135 void 136 binuptime(struct bintime *bt) 137 { 138 struct timehands *th; 139 u_int gen; 140 141 do { 142 th = timehands; 143 gen = th->th_generation; 144 *bt = th->th_offset; 145 bintime_addx(bt, th->th_scale * tc_delta(th)); 146 } while (gen == 0 || gen != th->th_generation); 147 } 148 149 void 150 nanouptime(struct timespec *tsp) 151 { 152 struct bintime bt; 153 154 binuptime(&bt); 155 bintime2timespec(&bt, tsp); 156 } 157 158 void 159 microuptime(struct timeval *tvp) 160 { 161 struct bintime bt; 162 163 binuptime(&bt); 164 bintime2timeval(&bt, tvp); 165 } 166 167 void 168 bintime(struct bintime *bt) 169 { 170 171 binuptime(bt); 172 bintime_add(bt, &boottimebin); 173 } 174 175 void 176 nanotime(struct timespec *tsp) 177 { 178 struct bintime bt; 179 180 bintime(&bt); 181 bintime2timespec(&bt, tsp); 182 } 183 184 void 185 microtime(struct timeval *tvp) 186 { 187 struct bintime bt; 188 189 bintime(&bt); 190 bintime2timeval(&bt, tvp); 191 } 192 193 void 194 getnanouptime(struct timespec *tsp) 195 { 196 struct timehands *th; 197 u_int gen; 198 199 do { 200 th = timehands; 201 gen = th->th_generation; 202 bintime2timespec(&th->th_offset, tsp); 203 } while (gen == 0 || gen != th->th_generation); 204 } 205 206 void 207 getmicrouptime(struct timeval *tvp) 208 { 209 struct timehands *th; 210 u_int gen; 211 212 do { 213 th = timehands; 214 gen = th->th_generation; 215 bintime2timeval(&th->th_offset, tvp); 216 } while (gen == 0 || gen != th->th_generation); 217 } 218 219 void 220 getnanotime(struct timespec *tsp) 221 { 222 struct timehands *th; 223 u_int gen; 224 225 do { 226 th = timehands; 227 gen = th->th_generation; 228 *tsp = th->th_nanotime; 229 } while (gen == 0 || gen != th->th_generation); 230 } 231 232 void 233 getmicrotime(struct timeval *tvp) 234 { 235 struct timehands *th; 236 u_int gen; 237 238 do { 239 th = timehands; 240 gen = th->th_generation; 241 *tvp = th->th_microtime; 242 } while (gen == 0 || gen != th->th_generation); 243 } 244 245 /* 246 * Initialize a new timecounter and possibly use it. 247 */ 248 void 249 tc_init(struct timecounter *tc) 250 { 251 u_int u; 252 253 u = tc->tc_frequency / tc->tc_counter_mask; 254 /* XXX: We need some margin here, 10% is a guess */ 255 u *= 11; 256 u /= 10; 257 if (tc->tc_quality >= 0) { 258 if (u > hz) { 259 tc->tc_quality = -2000; 260 printf("Timecounter \"%s\" frequency %lu Hz", 261 tc->tc_name, (unsigned long)tc->tc_frequency); 262 printf(" -- Insufficient hz, needs at least %u\n", u); 263 } 264 } 265 266 tc->tc_next = timecounters; 267 timecounters = tc; 268 /* 269 * Never automatically use a timecounter with negative quality. 270 * Even though we run on the dummy counter, switching here may be 271 * worse since this timecounter may not be monotonic. 272 */ 273 if (tc->tc_quality < 0) 274 return; 275 if (tc->tc_quality < timecounter->tc_quality) 276 return; 277 if (tc->tc_quality == timecounter->tc_quality && 278 tc->tc_frequency < timecounter->tc_frequency) 279 return; 280 (void)tc->tc_get_timecount(tc); 281 add_timer_randomness(tc->tc_get_timecount(tc)); 282 283 timecounter = tc; 284 } 285 286 /* Report the frequency of the current timecounter. */ 287 u_int64_t 288 tc_getfrequency(void) 289 { 290 291 return (timehands->th_counter->tc_frequency); 292 } 293 294 /* 295 * Step our concept of UTC, aka the realtime clock. 296 * This is done by modifying our estimate of when we booted. 297 * XXX: not locked. 298 */ 299 void 300 tc_setrealtimeclock(struct timespec *ts) 301 { 302 struct timespec ts2; 303 struct bintime bt, bt2; 304 305 binuptime(&bt2); 306 timespec2bintime(ts, &bt); 307 bintime_sub(&bt, &bt2); 308 bintime_add(&bt2, &boottimebin); 309 boottimebin = bt; 310 bintime2timespec(&bt, &boottime); 311 add_timer_randomness(ts->tv_sec); 312 313 /* XXX fiddle all the little crinkly bits around the fiords... */ 314 tc_windup(); 315 if (timestepwarnings) { 316 bintime2timespec(&bt2, &ts2); 317 log(LOG_INFO, "Time stepped from %lld.%09ld to %lld.%09ld\n", 318 (long long)ts2.tv_sec, ts2.tv_nsec, 319 (long long)ts->tv_sec, ts->tv_nsec); 320 } 321 } 322 323 /* 324 * Step the monotonic and realtime clocks, triggering any timeouts that 325 * should have occurred across the interval. 326 * XXX: not locked. 327 */ 328 void 329 tc_setclock(struct timespec *ts) 330 { 331 struct bintime bt, bt2; 332 #ifndef SMALL_KERNEL 333 long long adj_ticks; 334 #endif 335 336 /* 337 * When we're called for the first time, during boot when 338 * the root partition is mounted, boottime is still zero: 339 * we just need to set it. 340 */ 341 if (boottimebin.sec == 0) { 342 tc_setrealtimeclock(ts); 343 return; 344 } 345 346 add_timer_randomness(ts->tv_sec); 347 348 timespec2bintime(ts, &bt); 349 bintime_sub(&bt, &boottimebin); 350 bt2 = timehands->th_offset; 351 timehands->th_offset = bt; 352 353 /* XXX fiddle all the little crinkly bits around the fiords... */ 354 tc_windup(); 355 356 #ifndef SMALL_KERNEL 357 /* convert the bintime to ticks */ 358 bintime_sub(&bt, &bt2); 359 bintime_add(&naptime, &bt); 360 adj_ticks = (uint64_t)hz * bt.sec + 361 (((uint64_t)1000000 * (uint32_t)(bt.frac >> 32)) >> 32) / tick; 362 if (adj_ticks > 0) { 363 if (adj_ticks > INT_MAX) 364 adj_ticks = INT_MAX; 365 timeout_adjust_ticks(adj_ticks); 366 } 367 #endif 368 } 369 370 /* 371 * Initialize the next struct timehands in the ring and make 372 * it the active timehands. Along the way we might switch to a different 373 * timecounter and/or do seconds processing in NTP. Slightly magic. 374 */ 375 void 376 tc_windup(void) 377 { 378 struct bintime bt; 379 struct timehands *th, *tho; 380 u_int64_t scale; 381 u_int delta, ncount, ogen; 382 int i; 383 #ifdef leapsecs 384 time_t t; 385 #endif 386 387 /* 388 * Make the next timehands a copy of the current one, but do not 389 * overwrite the generation or next pointer. While we update 390 * the contents, the generation must be zero. 391 */ 392 tho = timehands; 393 th = tho->th_next; 394 ogen = th->th_generation; 395 th->th_generation = 0; 396 memcpy(th, tho, offsetof(struct timehands, th_generation)); 397 398 /* 399 * Capture a timecounter delta on the current timecounter and if 400 * changing timecounters, a counter value from the new timecounter. 401 * Update the offset fields accordingly. 402 */ 403 delta = tc_delta(th); 404 if (th->th_counter != timecounter) 405 ncount = timecounter->tc_get_timecount(timecounter); 406 else 407 ncount = 0; 408 th->th_offset_count += delta; 409 th->th_offset_count &= th->th_counter->tc_counter_mask; 410 bintime_addx(&th->th_offset, th->th_scale * delta); 411 412 #ifdef notyet 413 /* 414 * Hardware latching timecounters may not generate interrupts on 415 * PPS events, so instead we poll them. There is a finite risk that 416 * the hardware might capture a count which is later than the one we 417 * got above, and therefore possibly in the next NTP second which might 418 * have a different rate than the current NTP second. It doesn't 419 * matter in practice. 420 */ 421 if (tho->th_counter->tc_poll_pps) 422 tho->th_counter->tc_poll_pps(tho->th_counter); 423 #endif 424 425 /* 426 * Deal with NTP second processing. The for loop normally 427 * iterates at most once, but in extreme situations it might 428 * keep NTP sane if timeouts are not run for several seconds. 429 * At boot, the time step can be large when the TOD hardware 430 * has been read, so on really large steps, we call 431 * ntp_update_second only twice. We need to call it twice in 432 * case we missed a leap second. 433 */ 434 bt = th->th_offset; 435 bintime_add(&bt, &boottimebin); 436 i = bt.sec - tho->th_microtime.tv_sec; 437 if (i > LARGE_STEP) 438 i = 2; 439 for (; i > 0; i--) 440 ntp_update_second(&th->th_adjustment, &bt.sec); 441 442 /* Update the UTC timestamps used by the get*() functions. */ 443 /* XXX shouldn't do this here. Should force non-`get' versions. */ 444 bintime2timeval(&bt, &th->th_microtime); 445 bintime2timespec(&bt, &th->th_nanotime); 446 447 /* Now is a good time to change timecounters. */ 448 if (th->th_counter != timecounter) { 449 th->th_counter = timecounter; 450 th->th_offset_count = ncount; 451 } 452 453 /*- 454 * Recalculate the scaling factor. We want the number of 1/2^64 455 * fractions of a second per period of the hardware counter, taking 456 * into account the th_adjustment factor which the NTP PLL/adjtime(2) 457 * processing provides us with. 458 * 459 * The th_adjustment is nanoseconds per second with 32 bit binary 460 * fraction and we want 64 bit binary fraction of second: 461 * 462 * x = a * 2^32 / 10^9 = a * 4.294967296 463 * 464 * The range of th_adjustment is +/- 5000PPM so inside a 64bit int 465 * we can only multiply by about 850 without overflowing, but that 466 * leaves suitably precise fractions for multiply before divide. 467 * 468 * Divide before multiply with a fraction of 2199/512 results in a 469 * systematic undercompensation of 10PPM of th_adjustment. On a 470 * 5000PPM adjustment this is a 0.05PPM error. This is acceptable. 471 * 472 * We happily sacrifice the lowest of the 64 bits of our result 473 * to the goddess of code clarity. 474 * 475 */ 476 scale = (u_int64_t)1 << 63; 477 scale += (th->th_adjustment / 1024) * 2199; 478 scale /= th->th_counter->tc_frequency; 479 th->th_scale = scale * 2; 480 481 /* 482 * Now that the struct timehands is again consistent, set the new 483 * generation number, making sure to not make it zero. 484 */ 485 if (++ogen == 0) 486 ogen = 1; 487 th->th_generation = ogen; 488 489 /* Go live with the new struct timehands. */ 490 time_second = th->th_microtime.tv_sec; 491 time_uptime = th->th_offset.sec; 492 timehands = th; 493 } 494 495 /* Report or change the active timecounter hardware. */ 496 int 497 sysctl_tc_hardware(void *oldp, size_t *oldlenp, void *newp, size_t newlen) 498 { 499 char newname[32]; 500 struct timecounter *newtc, *tc; 501 int error; 502 503 tc = timecounter; 504 strlcpy(newname, tc->tc_name, sizeof(newname)); 505 506 error = sysctl_string(oldp, oldlenp, newp, newlen, newname, sizeof(newname)); 507 if (error != 0 || strcmp(newname, tc->tc_name) == 0) 508 return (error); 509 for (newtc = timecounters; newtc != NULL; newtc = newtc->tc_next) { 510 if (strcmp(newname, newtc->tc_name) != 0) 511 continue; 512 513 /* Warm up new timecounter. */ 514 (void)newtc->tc_get_timecount(newtc); 515 (void)newtc->tc_get_timecount(newtc); 516 517 timecounter = newtc; 518 return (0); 519 } 520 return (EINVAL); 521 } 522 523 /* Report or change the active timecounter hardware. */ 524 int 525 sysctl_tc_choice(void *oldp, size_t *oldlenp, void *newp, size_t newlen) 526 { 527 char buf[32], *spc, *choices; 528 struct timecounter *tc; 529 int error, maxlen; 530 531 spc = ""; 532 maxlen = 0; 533 for (tc = timecounters; tc != NULL; tc = tc->tc_next) 534 maxlen += sizeof(buf); 535 choices = malloc(maxlen, M_TEMP, M_WAITOK); 536 *choices = '\0'; 537 for (tc = timecounters; tc != NULL; tc = tc->tc_next) { 538 snprintf(buf, sizeof(buf), "%s%s(%d)", 539 spc, tc->tc_name, tc->tc_quality); 540 spc = " "; 541 strlcat(choices, buf, maxlen); 542 } 543 error = sysctl_rdstring(oldp, oldlenp, newp, choices); 544 free(choices, M_TEMP, maxlen); 545 return (error); 546 } 547 548 /* 549 * Timecounters need to be updated every so often to prevent the hardware 550 * counter from overflowing. Updating also recalculates the cached values 551 * used by the get*() family of functions, so their precision depends on 552 * the update frequency. 553 */ 554 static int tc_tick; 555 556 void 557 tc_ticktock(void) 558 { 559 static int count; 560 561 if (++count < tc_tick) 562 return; 563 count = 0; 564 tc_windup(); 565 } 566 567 void 568 inittimecounter(void) 569 { 570 #ifdef DEBUG 571 u_int p; 572 #endif 573 574 /* 575 * Set the initial timeout to 576 * max(1, <approx. number of hardclock ticks in a millisecond>). 577 * People should probably not use the sysctl to set the timeout 578 * to smaller than its initial value, since that value is the 579 * smallest reasonable one. If they want better timestamps they 580 * should use the non-"get"* functions. 581 */ 582 if (hz > 1000) 583 tc_tick = (hz + 500) / 1000; 584 else 585 tc_tick = 1; 586 #ifdef DEBUG 587 p = (tc_tick * 1000000) / hz; 588 printf("Timecounters tick every %d.%03u msec\n", p / 1000, p % 1000); 589 #endif 590 591 /* warm up new timecounter (again) and get rolling. */ 592 (void)timecounter->tc_get_timecount(timecounter); 593 (void)timecounter->tc_get_timecount(timecounter); 594 } 595 596 /* 597 * Return timecounter-related information. 598 */ 599 int 600 sysctl_tc(int *name, u_int namelen, void *oldp, size_t *oldlenp, 601 void *newp, size_t newlen) 602 { 603 if (namelen != 1) 604 return (ENOTDIR); 605 606 switch (name[0]) { 607 case KERN_TIMECOUNTER_TICK: 608 return (sysctl_rdint(oldp, oldlenp, newp, tc_tick)); 609 case KERN_TIMECOUNTER_TIMESTEPWARNINGS: 610 return (sysctl_int(oldp, oldlenp, newp, newlen, 611 ×tepwarnings)); 612 case KERN_TIMECOUNTER_HARDWARE: 613 return (sysctl_tc_hardware(oldp, oldlenp, newp, newlen)); 614 case KERN_TIMECOUNTER_CHOICE: 615 return (sysctl_tc_choice(oldp, oldlenp, newp, newlen)); 616 default: 617 return (EOPNOTSUPP); 618 } 619 /* NOTREACHED */ 620 } 621 622 void 623 ntp_update_second(int64_t *adjust, time_t *sec) 624 { 625 int64_t adj; 626 627 /* Skew time according to any adjtime(2) adjustments. */ 628 if (adjtimedelta > 0) 629 adj = MIN(5000, adjtimedelta); 630 else 631 adj = MAX(-5000, adjtimedelta); 632 adjtimedelta -= adj; 633 *adjust = (adj * 1000) << 32; 634 *adjust += timecounter->tc_freq_adj; 635 } 636 637 int 638 tc_adjfreq(int64_t *old, int64_t *new) 639 { 640 if (old != NULL) { 641 *old = timecounter->tc_freq_adj; 642 } 643 if (new != NULL) { 644 timecounter->tc_freq_adj = *new; 645 } 646 return 0; 647 } 648