1 /*- 2 * ---------------------------------------------------------------------------- 3 * "THE BEER-WARE LICENSE" (Revision 42): 4 * <phk@FreeBSD.ORG> wrote this file. As long as you retain this notice you 5 * can do whatever you want with this stuff. If we meet some day, and you think 6 * this stuff is worth it, you can buy me a beer in return. Poul-Henning Kamp 7 * ---------------------------------------------------------------------------- 8 * 9 * $OpenBSD: kern_tc.c,v 1.18 2012/11/05 19:39:35 miod Exp $ 10 * $FreeBSD: src/sys/kern/kern_tc.c,v 1.148 2003/03/18 08:45:23 phk Exp $ 11 */ 12 13 #include <sys/param.h> 14 #include <sys/kernel.h> 15 #include <sys/proc.h> 16 #include <sys/sysctl.h> 17 #include <sys/syslog.h> 18 #include <sys/systm.h> 19 #include <sys/timetc.h> 20 #include <sys/malloc.h> 21 #include <dev/rndvar.h> 22 23 /* 24 * A large step happens on boot. This constant detects such steps. 25 * It is relatively small so that ntp_update_second gets called enough 26 * in the typical 'missed a couple of seconds' case, but doesn't loop 27 * forever when the time step is large. 28 */ 29 #define LARGE_STEP 200 30 31 u_int dummy_get_timecount(struct timecounter *); 32 33 void ntp_update_second(int64_t *, time_t *); 34 int sysctl_tc_hardware(void *, size_t *, void *, size_t); 35 int sysctl_tc_choice(void *, size_t *, void *, size_t); 36 37 /* 38 * Implement a dummy timecounter which we can use until we get a real one 39 * in the air. This allows the console and other early stuff to use 40 * time services. 41 */ 42 43 u_int 44 dummy_get_timecount(struct timecounter *tc) 45 { 46 static u_int now; 47 48 return (++now); 49 } 50 51 static struct timecounter dummy_timecounter = { 52 dummy_get_timecount, 0, ~0u, 1000000, "dummy", -1000000 53 }; 54 55 struct timehands { 56 /* These fields must be initialized by the driver. */ 57 struct timecounter *th_counter; 58 int64_t th_adjustment; 59 u_int64_t th_scale; 60 u_int th_offset_count; 61 struct bintime th_offset; 62 struct timeval th_microtime; 63 struct timespec th_nanotime; 64 /* Fields not to be copied in tc_windup start with th_generation. */ 65 volatile u_int th_generation; 66 struct timehands *th_next; 67 }; 68 69 static struct timehands th0; 70 static struct timehands th9 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th0}; 71 static struct timehands th8 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th9}; 72 static struct timehands th7 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th8}; 73 static struct timehands th6 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th7}; 74 static struct timehands th5 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th6}; 75 static struct timehands th4 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th5}; 76 static struct timehands th3 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th4}; 77 static struct timehands th2 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th3}; 78 static struct timehands th1 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th2}; 79 static struct timehands th0 = { 80 &dummy_timecounter, 81 0, 82 (uint64_t)-1 / 1000000, 83 0, 84 {1, 0}, 85 {0, 0}, 86 {0, 0}, 87 1, 88 &th1 89 }; 90 91 static struct timehands *volatile timehands = &th0; 92 struct timecounter *timecounter = &dummy_timecounter; 93 static struct timecounter *timecounters = &dummy_timecounter; 94 95 volatile time_t time_second = 1; 96 volatile time_t time_uptime = 0; 97 98 extern struct timeval adjtimedelta; 99 static struct bintime boottimebin; 100 static int timestepwarnings; 101 102 void tc_windup(void); 103 104 /* 105 * Return the difference between the timehands' counter value now and what 106 * was when we copied it to the timehands' offset_count. 107 */ 108 static __inline u_int 109 tc_delta(struct timehands *th) 110 { 111 struct timecounter *tc; 112 113 tc = th->th_counter; 114 return ((tc->tc_get_timecount(tc) - th->th_offset_count) & 115 tc->tc_counter_mask); 116 } 117 118 /* 119 * Functions for reading the time. We have to loop until we are sure that 120 * the timehands that we operated on was not updated under our feet. See 121 * the comment in <sys/time.h> for a description of these 12 functions. 122 */ 123 124 void 125 binuptime(struct bintime *bt) 126 { 127 struct timehands *th; 128 u_int gen; 129 130 do { 131 th = timehands; 132 gen = th->th_generation; 133 *bt = th->th_offset; 134 bintime_addx(bt, th->th_scale * tc_delta(th)); 135 } while (gen == 0 || gen != th->th_generation); 136 } 137 138 void 139 nanouptime(struct timespec *tsp) 140 { 141 struct bintime bt; 142 143 binuptime(&bt); 144 bintime2timespec(&bt, tsp); 145 } 146 147 void 148 microuptime(struct timeval *tvp) 149 { 150 struct bintime bt; 151 152 binuptime(&bt); 153 bintime2timeval(&bt, tvp); 154 } 155 156 void 157 bintime(struct bintime *bt) 158 { 159 160 binuptime(bt); 161 bintime_add(bt, &boottimebin); 162 } 163 164 void 165 nanotime(struct timespec *tsp) 166 { 167 struct bintime bt; 168 169 bintime(&bt); 170 bintime2timespec(&bt, tsp); 171 } 172 173 void 174 microtime(struct timeval *tvp) 175 { 176 struct bintime bt; 177 178 bintime(&bt); 179 bintime2timeval(&bt, tvp); 180 } 181 182 void 183 getnanouptime(struct timespec *tsp) 184 { 185 struct timehands *th; 186 u_int gen; 187 188 do { 189 th = timehands; 190 gen = th->th_generation; 191 bintime2timespec(&th->th_offset, tsp); 192 } while (gen == 0 || gen != th->th_generation); 193 } 194 195 void 196 getmicrouptime(struct timeval *tvp) 197 { 198 struct timehands *th; 199 u_int gen; 200 201 do { 202 th = timehands; 203 gen = th->th_generation; 204 bintime2timeval(&th->th_offset, tvp); 205 } while (gen == 0 || gen != th->th_generation); 206 } 207 208 void 209 getnanotime(struct timespec *tsp) 210 { 211 struct timehands *th; 212 u_int gen; 213 214 do { 215 th = timehands; 216 gen = th->th_generation; 217 *tsp = th->th_nanotime; 218 } while (gen == 0 || gen != th->th_generation); 219 } 220 221 void 222 getmicrotime(struct timeval *tvp) 223 { 224 struct timehands *th; 225 u_int gen; 226 227 do { 228 th = timehands; 229 gen = th->th_generation; 230 *tvp = th->th_microtime; 231 } while (gen == 0 || gen != th->th_generation); 232 } 233 234 /* 235 * Initialize a new timecounter and possibly use it. 236 */ 237 void 238 tc_init(struct timecounter *tc) 239 { 240 u_int u; 241 242 u = tc->tc_frequency / tc->tc_counter_mask; 243 /* XXX: We need some margin here, 10% is a guess */ 244 u *= 11; 245 u /= 10; 246 if (tc->tc_quality >= 0) { 247 if (u > hz) { 248 tc->tc_quality = -2000; 249 printf("Timecounter \"%s\" frequency %lu Hz", 250 tc->tc_name, (unsigned long)tc->tc_frequency); 251 printf(" -- Insufficient hz, needs at least %u\n", u); 252 } 253 } 254 255 tc->tc_next = timecounters; 256 timecounters = tc; 257 /* 258 * Never automatically use a timecounter with negative quality. 259 * Even though we run on the dummy counter, switching here may be 260 * worse since this timecounter may not be monotonic. 261 */ 262 if (tc->tc_quality < 0) 263 return; 264 if (tc->tc_quality < timecounter->tc_quality) 265 return; 266 if (tc->tc_quality == timecounter->tc_quality && 267 tc->tc_frequency < timecounter->tc_frequency) 268 return; 269 (void)tc->tc_get_timecount(tc); 270 add_timer_randomness(tc->tc_get_timecount(tc)); 271 272 timecounter = tc; 273 } 274 275 /* Report the frequency of the current timecounter. */ 276 u_int64_t 277 tc_getfrequency(void) 278 { 279 280 return (timehands->th_counter->tc_frequency); 281 } 282 283 /* 284 * Step our concept of UTC, aka the realtime clock. 285 * This is done by modifying our estimate of when we booted. 286 * XXX: not locked. 287 */ 288 void 289 tc_setrealtimeclock(struct timespec *ts) 290 { 291 struct timespec ts2; 292 struct bintime bt, bt2; 293 294 binuptime(&bt2); 295 timespec2bintime(ts, &bt); 296 bintime_sub(&bt, &bt2); 297 bintime_add(&bt2, &boottimebin); 298 boottimebin = bt; 299 bintime2timeval(&bt, &boottime); 300 add_timer_randomness(ts->tv_sec); 301 302 /* XXX fiddle all the little crinkly bits around the fiords... */ 303 tc_windup(); 304 if (timestepwarnings) { 305 bintime2timespec(&bt2, &ts2); 306 log(LOG_INFO, "Time stepped from %ld.%09ld to %ld.%09ld\n", 307 (long)ts2.tv_sec, ts2.tv_nsec, 308 (long)ts->tv_sec, ts->tv_nsec); 309 } 310 } 311 312 /* 313 * Step the monotonic and realtime clocks, triggering any timeouts that 314 * should have occurred across the interval. 315 * XXX: not locked. 316 */ 317 void 318 tc_setclock(struct timespec *ts) 319 { 320 struct bintime bt, bt2; 321 #ifndef SMALL_KERNEL 322 long long adj_ticks; 323 #endif 324 325 /* 326 * When we're called for the first time, during boot when 327 * the root partition is mounted, boottime is still zero: 328 * we just need to set it. 329 */ 330 if (boottimebin.sec == 0) { 331 tc_setrealtimeclock(ts); 332 return; 333 } 334 335 add_timer_randomness(ts->tv_sec); 336 337 timespec2bintime(ts, &bt); 338 bintime_sub(&bt, &boottimebin); 339 bt2 = timehands->th_offset; 340 timehands->th_offset = bt; 341 342 #ifndef SMALL_KERNEL 343 /* convert the bintime to ticks */ 344 bintime_sub(&bt, &bt2); 345 adj_ticks = (long long)hz * bt.sec + 346 (((uint64_t)1000000 * (uint32_t)(bt.frac >> 32)) >> 32) / tick; 347 if (adj_ticks > 0) { 348 if (adj_ticks > INT_MAX) 349 adj_ticks = INT_MAX; 350 timeout_adjust_ticks(adj_ticks); 351 } 352 #endif 353 354 /* XXX fiddle all the little crinkly bits around the fiords... */ 355 tc_windup(); 356 } 357 358 /* 359 * Initialize the next struct timehands in the ring and make 360 * it the active timehands. Along the way we might switch to a different 361 * timecounter and/or do seconds processing in NTP. Slightly magic. 362 */ 363 void 364 tc_windup(void) 365 { 366 struct bintime bt; 367 struct timehands *th, *tho; 368 u_int64_t scale; 369 u_int delta, ncount, ogen; 370 int i; 371 #ifdef leapsecs 372 time_t t; 373 #endif 374 375 /* 376 * Make the next timehands a copy of the current one, but do not 377 * overwrite the generation or next pointer. While we update 378 * the contents, the generation must be zero. 379 */ 380 tho = timehands; 381 th = tho->th_next; 382 ogen = th->th_generation; 383 th->th_generation = 0; 384 bcopy(tho, th, offsetof(struct timehands, th_generation)); 385 386 /* 387 * Capture a timecounter delta on the current timecounter and if 388 * changing timecounters, a counter value from the new timecounter. 389 * Update the offset fields accordingly. 390 */ 391 delta = tc_delta(th); 392 if (th->th_counter != timecounter) 393 ncount = timecounter->tc_get_timecount(timecounter); 394 else 395 ncount = 0; 396 th->th_offset_count += delta; 397 th->th_offset_count &= th->th_counter->tc_counter_mask; 398 bintime_addx(&th->th_offset, th->th_scale * delta); 399 400 #ifdef notyet 401 /* 402 * Hardware latching timecounters may not generate interrupts on 403 * PPS events, so instead we poll them. There is a finite risk that 404 * the hardware might capture a count which is later than the one we 405 * got above, and therefore possibly in the next NTP second which might 406 * have a different rate than the current NTP second. It doesn't 407 * matter in practice. 408 */ 409 if (tho->th_counter->tc_poll_pps) 410 tho->th_counter->tc_poll_pps(tho->th_counter); 411 #endif 412 413 /* 414 * Deal with NTP second processing. The for loop normally 415 * iterates at most once, but in extreme situations it might 416 * keep NTP sane if timeouts are not run for several seconds. 417 * At boot, the time step can be large when the TOD hardware 418 * has been read, so on really large steps, we call 419 * ntp_update_second only twice. We need to call it twice in 420 * case we missed a leap second. 421 */ 422 bt = th->th_offset; 423 bintime_add(&bt, &boottimebin); 424 i = bt.sec - tho->th_microtime.tv_sec; 425 if (i > LARGE_STEP) 426 i = 2; 427 for (; i > 0; i--) 428 ntp_update_second(&th->th_adjustment, &bt.sec); 429 430 /* Update the UTC timestamps used by the get*() functions. */ 431 /* XXX shouldn't do this here. Should force non-`get' versions. */ 432 bintime2timeval(&bt, &th->th_microtime); 433 bintime2timespec(&bt, &th->th_nanotime); 434 435 /* Now is a good time to change timecounters. */ 436 if (th->th_counter != timecounter) { 437 th->th_counter = timecounter; 438 th->th_offset_count = ncount; 439 } 440 441 /*- 442 * Recalculate the scaling factor. We want the number of 1/2^64 443 * fractions of a second per period of the hardware counter, taking 444 * into account the th_adjustment factor which the NTP PLL/adjtime(2) 445 * processing provides us with. 446 * 447 * The th_adjustment is nanoseconds per second with 32 bit binary 448 * fraction and we want 64 bit binary fraction of second: 449 * 450 * x = a * 2^32 / 10^9 = a * 4.294967296 451 * 452 * The range of th_adjustment is +/- 5000PPM so inside a 64bit int 453 * we can only multiply by about 850 without overflowing, but that 454 * leaves suitably precise fractions for multiply before divide. 455 * 456 * Divide before multiply with a fraction of 2199/512 results in a 457 * systematic undercompensation of 10PPM of th_adjustment. On a 458 * 5000PPM adjustment this is a 0.05PPM error. This is acceptable. 459 * 460 * We happily sacrifice the lowest of the 64 bits of our result 461 * to the goddess of code clarity. 462 * 463 */ 464 scale = (u_int64_t)1 << 63; 465 scale += (th->th_adjustment / 1024) * 2199; 466 scale /= th->th_counter->tc_frequency; 467 th->th_scale = scale * 2; 468 469 /* 470 * Now that the struct timehands is again consistent, set the new 471 * generation number, making sure to not make it zero. 472 */ 473 if (++ogen == 0) 474 ogen = 1; 475 th->th_generation = ogen; 476 477 /* Go live with the new struct timehands. */ 478 time_second = th->th_microtime.tv_sec; 479 time_uptime = th->th_offset.sec; 480 timehands = th; 481 } 482 483 /* Report or change the active timecounter hardware. */ 484 int 485 sysctl_tc_hardware(void *oldp, size_t *oldlenp, void *newp, size_t newlen) 486 { 487 char newname[32]; 488 struct timecounter *newtc, *tc; 489 int error; 490 491 tc = timecounter; 492 strlcpy(newname, tc->tc_name, sizeof(newname)); 493 494 error = sysctl_string(oldp, oldlenp, newp, newlen, newname, sizeof(newname)); 495 if (error != 0 || strcmp(newname, tc->tc_name) == 0) 496 return (error); 497 for (newtc = timecounters; newtc != NULL; newtc = newtc->tc_next) { 498 if (strcmp(newname, newtc->tc_name) != 0) 499 continue; 500 501 /* Warm up new timecounter. */ 502 (void)newtc->tc_get_timecount(newtc); 503 (void)newtc->tc_get_timecount(newtc); 504 505 timecounter = newtc; 506 return (0); 507 } 508 return (EINVAL); 509 } 510 511 /* Report or change the active timecounter hardware. */ 512 int 513 sysctl_tc_choice(void *oldp, size_t *oldlenp, void *newp, size_t newlen) 514 { 515 char buf[32], *spc, *choices; 516 struct timecounter *tc; 517 int error, maxlen; 518 519 spc = ""; 520 maxlen = 0; 521 for (tc = timecounters; tc != NULL; tc = tc->tc_next) 522 maxlen += sizeof(buf); 523 choices = malloc(maxlen, M_TEMP, M_WAITOK); 524 *choices = '\0'; 525 for (tc = timecounters; tc != NULL; tc = tc->tc_next) { 526 snprintf(buf, sizeof(buf), "%s%s(%d)", 527 spc, tc->tc_name, tc->tc_quality); 528 spc = " "; 529 strlcat(choices, buf, maxlen); 530 } 531 error = sysctl_rdstring(oldp, oldlenp, newp, choices); 532 free(choices, M_TEMP); 533 return (error); 534 } 535 536 /* 537 * Timecounters need to be updated every so often to prevent the hardware 538 * counter from overflowing. Updating also recalculates the cached values 539 * used by the get*() family of functions, so their precision depends on 540 * the update frequency. 541 */ 542 static int tc_tick; 543 544 void 545 tc_ticktock(void) 546 { 547 static int count; 548 549 if (++count < tc_tick) 550 return; 551 count = 0; 552 tc_windup(); 553 } 554 555 void 556 inittimecounter(void) 557 { 558 #ifdef DEBUG 559 u_int p; 560 #endif 561 562 /* 563 * Set the initial timeout to 564 * max(1, <approx. number of hardclock ticks in a millisecond>). 565 * People should probably not use the sysctl to set the timeout 566 * to smaller than its initial value, since that value is the 567 * smallest reasonable one. If they want better timestamps they 568 * should use the non-"get"* functions. 569 */ 570 if (hz > 1000) 571 tc_tick = (hz + 500) / 1000; 572 else 573 tc_tick = 1; 574 #ifdef DEBUG 575 p = (tc_tick * 1000000) / hz; 576 printf("Timecounters tick every %d.%03u msec\n", p / 1000, p % 1000); 577 #endif 578 579 /* warm up new timecounter (again) and get rolling. */ 580 (void)timecounter->tc_get_timecount(timecounter); 581 (void)timecounter->tc_get_timecount(timecounter); 582 } 583 584 /* 585 * Return timecounter-related information. 586 */ 587 int 588 sysctl_tc(int *name, u_int namelen, void *oldp, size_t *oldlenp, 589 void *newp, size_t newlen) 590 { 591 if (namelen != 1) 592 return (ENOTDIR); 593 594 switch (name[0]) { 595 case KERN_TIMECOUNTER_TICK: 596 return (sysctl_rdint(oldp, oldlenp, newp, tc_tick)); 597 case KERN_TIMECOUNTER_TIMESTEPWARNINGS: 598 return (sysctl_int(oldp, oldlenp, newp, newlen, 599 ×tepwarnings)); 600 case KERN_TIMECOUNTER_HARDWARE: 601 return (sysctl_tc_hardware(oldp, oldlenp, newp, newlen)); 602 case KERN_TIMECOUNTER_CHOICE: 603 return (sysctl_tc_choice(oldp, oldlenp, newp, newlen)); 604 default: 605 return (EOPNOTSUPP); 606 } 607 /* NOTREACHED */ 608 } 609 610 void 611 ntp_update_second(int64_t *adjust, time_t *sec) 612 { 613 struct timeval adj; 614 615 /* Skew time according to any adjtime(2) adjustments. */ 616 timerclear(&adj); 617 if (adjtimedelta.tv_sec > 0) 618 adj.tv_usec = 5000; 619 else if (adjtimedelta.tv_sec == 0) 620 adj.tv_usec = MIN(5000, adjtimedelta.tv_usec); 621 else if (adjtimedelta.tv_sec < -1) 622 adj.tv_usec = -5000; 623 else if (adjtimedelta.tv_sec == -1) 624 adj.tv_usec = MAX(-5000, adjtimedelta.tv_usec - 1000000); 625 timersub(&adjtimedelta, &adj, &adjtimedelta); 626 *adjust = ((int64_t)adj.tv_usec * 1000) << 32; 627 *adjust += timecounter->tc_freq_adj; 628 } 629 630 int 631 tc_adjfreq(int64_t *old, int64_t *new) 632 { 633 if (old != NULL) { 634 *old = timecounter->tc_freq_adj; 635 } 636 if (new != NULL) { 637 timecounter->tc_freq_adj = *new; 638 } 639 return 0; 640 } 641