1 /*- 2 * ---------------------------------------------------------------------------- 3 * "THE BEER-WARE LICENSE" (Revision 42): 4 * <phk@FreeBSD.ORG> wrote this file. As long as you retain this notice you 5 * can do whatever you want with this stuff. If we meet some day, and you think 6 * this stuff is worth it, you can buy me a beer in return. Poul-Henning Kamp 7 * ---------------------------------------------------------------------------- 8 * 9 * $OpenBSD: kern_tc.c,v 1.17 2012/05/24 07:17:42 guenther Exp $ 10 * $FreeBSD: src/sys/kern/kern_tc.c,v 1.148 2003/03/18 08:45:23 phk Exp $ 11 */ 12 13 #include <sys/param.h> 14 #include <sys/kernel.h> 15 #include <sys/proc.h> 16 #include <sys/sysctl.h> 17 #include <sys/syslog.h> 18 #include <sys/systm.h> 19 #include <sys/timetc.h> 20 #include <sys/malloc.h> 21 #include <dev/rndvar.h> 22 23 #ifdef __HAVE_TIMECOUNTER 24 /* 25 * A large step happens on boot. This constant detects such steps. 26 * It is relatively small so that ntp_update_second gets called enough 27 * in the typical 'missed a couple of seconds' case, but doesn't loop 28 * forever when the time step is large. 29 */ 30 #define LARGE_STEP 200 31 32 u_int dummy_get_timecount(struct timecounter *); 33 34 void ntp_update_second(int64_t *, time_t *); 35 int sysctl_tc_hardware(void *, size_t *, void *, size_t); 36 int sysctl_tc_choice(void *, size_t *, void *, size_t); 37 38 /* 39 * Implement a dummy timecounter which we can use until we get a real one 40 * in the air. This allows the console and other early stuff to use 41 * time services. 42 */ 43 44 u_int 45 dummy_get_timecount(struct timecounter *tc) 46 { 47 static u_int now; 48 49 return (++now); 50 } 51 52 static struct timecounter dummy_timecounter = { 53 dummy_get_timecount, 0, ~0u, 1000000, "dummy", -1000000 54 }; 55 56 struct timehands { 57 /* These fields must be initialized by the driver. */ 58 struct timecounter *th_counter; 59 int64_t th_adjustment; 60 u_int64_t th_scale; 61 u_int th_offset_count; 62 struct bintime th_offset; 63 struct timeval th_microtime; 64 struct timespec th_nanotime; 65 /* Fields not to be copied in tc_windup start with th_generation. */ 66 volatile u_int th_generation; 67 struct timehands *th_next; 68 }; 69 70 static struct timehands th0; 71 static struct timehands th9 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th0}; 72 static struct timehands th8 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th9}; 73 static struct timehands th7 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th8}; 74 static struct timehands th6 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th7}; 75 static struct timehands th5 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th6}; 76 static struct timehands th4 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th5}; 77 static struct timehands th3 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th4}; 78 static struct timehands th2 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th3}; 79 static struct timehands th1 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th2}; 80 static struct timehands th0 = { 81 &dummy_timecounter, 82 0, 83 (uint64_t)-1 / 1000000, 84 0, 85 {1, 0}, 86 {0, 0}, 87 {0, 0}, 88 1, 89 &th1 90 }; 91 92 static struct timehands *volatile timehands = &th0; 93 struct timecounter *timecounter = &dummy_timecounter; 94 static struct timecounter *timecounters = &dummy_timecounter; 95 96 volatile time_t time_second = 1; 97 volatile time_t time_uptime = 0; 98 99 extern struct timeval adjtimedelta; 100 static struct bintime boottimebin; 101 static int timestepwarnings; 102 103 void tc_windup(void); 104 105 /* 106 * Return the difference between the timehands' counter value now and what 107 * was when we copied it to the timehands' offset_count. 108 */ 109 static __inline u_int 110 tc_delta(struct timehands *th) 111 { 112 struct timecounter *tc; 113 114 tc = th->th_counter; 115 return ((tc->tc_get_timecount(tc) - th->th_offset_count) & 116 tc->tc_counter_mask); 117 } 118 119 /* 120 * Functions for reading the time. We have to loop until we are sure that 121 * the timehands that we operated on was not updated under our feet. See 122 * the comment in <sys/time.h> for a description of these 12 functions. 123 */ 124 125 void 126 binuptime(struct bintime *bt) 127 { 128 struct timehands *th; 129 u_int gen; 130 131 do { 132 th = timehands; 133 gen = th->th_generation; 134 *bt = th->th_offset; 135 bintime_addx(bt, th->th_scale * tc_delta(th)); 136 } while (gen == 0 || gen != th->th_generation); 137 } 138 139 void 140 nanouptime(struct timespec *tsp) 141 { 142 struct bintime bt; 143 144 binuptime(&bt); 145 bintime2timespec(&bt, tsp); 146 } 147 148 void 149 microuptime(struct timeval *tvp) 150 { 151 struct bintime bt; 152 153 binuptime(&bt); 154 bintime2timeval(&bt, tvp); 155 } 156 157 void 158 bintime(struct bintime *bt) 159 { 160 161 binuptime(bt); 162 bintime_add(bt, &boottimebin); 163 } 164 165 void 166 nanotime(struct timespec *tsp) 167 { 168 struct bintime bt; 169 170 bintime(&bt); 171 bintime2timespec(&bt, tsp); 172 } 173 174 void 175 microtime(struct timeval *tvp) 176 { 177 struct bintime bt; 178 179 bintime(&bt); 180 bintime2timeval(&bt, tvp); 181 } 182 183 void 184 getnanouptime(struct timespec *tsp) 185 { 186 struct timehands *th; 187 u_int gen; 188 189 do { 190 th = timehands; 191 gen = th->th_generation; 192 bintime2timespec(&th->th_offset, tsp); 193 } while (gen == 0 || gen != th->th_generation); 194 } 195 196 void 197 getmicrouptime(struct timeval *tvp) 198 { 199 struct timehands *th; 200 u_int gen; 201 202 do { 203 th = timehands; 204 gen = th->th_generation; 205 bintime2timeval(&th->th_offset, tvp); 206 } while (gen == 0 || gen != th->th_generation); 207 } 208 209 void 210 getnanotime(struct timespec *tsp) 211 { 212 struct timehands *th; 213 u_int gen; 214 215 do { 216 th = timehands; 217 gen = th->th_generation; 218 *tsp = th->th_nanotime; 219 } while (gen == 0 || gen != th->th_generation); 220 } 221 222 void 223 getmicrotime(struct timeval *tvp) 224 { 225 struct timehands *th; 226 u_int gen; 227 228 do { 229 th = timehands; 230 gen = th->th_generation; 231 *tvp = th->th_microtime; 232 } while (gen == 0 || gen != th->th_generation); 233 } 234 235 /* 236 * Initialize a new timecounter and possibly use it. 237 */ 238 void 239 tc_init(struct timecounter *tc) 240 { 241 u_int u; 242 243 u = tc->tc_frequency / tc->tc_counter_mask; 244 /* XXX: We need some margin here, 10% is a guess */ 245 u *= 11; 246 u /= 10; 247 if (tc->tc_quality >= 0) { 248 if (u > hz) { 249 tc->tc_quality = -2000; 250 printf("Timecounter \"%s\" frequency %lu Hz", 251 tc->tc_name, (unsigned long)tc->tc_frequency); 252 printf(" -- Insufficient hz, needs at least %u\n", u); 253 } 254 } 255 256 tc->tc_next = timecounters; 257 timecounters = tc; 258 /* 259 * Never automatically use a timecounter with negative quality. 260 * Even though we run on the dummy counter, switching here may be 261 * worse since this timecounter may not be monotonic. 262 */ 263 if (tc->tc_quality < 0) 264 return; 265 if (tc->tc_quality < timecounter->tc_quality) 266 return; 267 if (tc->tc_quality == timecounter->tc_quality && 268 tc->tc_frequency < timecounter->tc_frequency) 269 return; 270 (void)tc->tc_get_timecount(tc); 271 add_timer_randomness(tc->tc_get_timecount(tc)); 272 273 timecounter = tc; 274 } 275 276 /* Report the frequency of the current timecounter. */ 277 u_int64_t 278 tc_getfrequency(void) 279 { 280 281 return (timehands->th_counter->tc_frequency); 282 } 283 284 /* 285 * Step our concept of UTC, aka the realtime clock. 286 * This is done by modifying our estimate of when we booted. 287 * XXX: not locked. 288 */ 289 void 290 tc_setrealtimeclock(struct timespec *ts) 291 { 292 struct timespec ts2; 293 struct bintime bt, bt2; 294 295 binuptime(&bt2); 296 timespec2bintime(ts, &bt); 297 bintime_sub(&bt, &bt2); 298 bintime_add(&bt2, &boottimebin); 299 boottimebin = bt; 300 bintime2timeval(&bt, &boottime); 301 add_timer_randomness(ts->tv_sec); 302 303 /* XXX fiddle all the little crinkly bits around the fiords... */ 304 tc_windup(); 305 if (timestepwarnings) { 306 bintime2timespec(&bt2, &ts2); 307 log(LOG_INFO, "Time stepped from %ld.%09ld to %ld.%09ld\n", 308 (long)ts2.tv_sec, ts2.tv_nsec, 309 (long)ts->tv_sec, ts->tv_nsec); 310 } 311 } 312 313 /* 314 * Step the monotonic and realtime clocks, triggering any timeouts that 315 * should have occurred across the interval. 316 * XXX: not locked. 317 */ 318 void 319 tc_setclock(struct timespec *ts) 320 { 321 struct bintime bt, bt2; 322 #ifndef SMALL_KERNEL 323 long long adj_ticks; 324 #endif 325 326 /* 327 * When we're called for the first time, during boot when 328 * the root partition is mounted, boottime is still zero: 329 * we just need to set it. 330 */ 331 if (boottimebin.sec == 0) { 332 tc_setrealtimeclock(ts); 333 return; 334 } 335 336 add_timer_randomness(ts->tv_sec); 337 338 timespec2bintime(ts, &bt); 339 bintime_sub(&bt, &boottimebin); 340 bt2 = timehands->th_offset; 341 timehands->th_offset = bt; 342 343 #ifndef SMALL_KERNEL 344 /* convert the bintime to ticks */ 345 bintime_sub(&bt, &bt2); 346 adj_ticks = (long long)hz * bt.sec + 347 (((uint64_t)1000000 * (uint32_t)(bt.frac >> 32)) >> 32) / tick; 348 if (adj_ticks > 0) { 349 if (adj_ticks > INT_MAX) 350 adj_ticks = INT_MAX; 351 timeout_adjust_ticks(adj_ticks); 352 } 353 #endif 354 355 /* XXX fiddle all the little crinkly bits around the fiords... */ 356 tc_windup(); 357 } 358 359 /* 360 * Initialize the next struct timehands in the ring and make 361 * it the active timehands. Along the way we might switch to a different 362 * timecounter and/or do seconds processing in NTP. Slightly magic. 363 */ 364 void 365 tc_windup(void) 366 { 367 struct bintime bt; 368 struct timehands *th, *tho; 369 u_int64_t scale; 370 u_int delta, ncount, ogen; 371 int i; 372 #ifdef leapsecs 373 time_t t; 374 #endif 375 376 /* 377 * Make the next timehands a copy of the current one, but do not 378 * overwrite the generation or next pointer. While we update 379 * the contents, the generation must be zero. 380 */ 381 tho = timehands; 382 th = tho->th_next; 383 ogen = th->th_generation; 384 th->th_generation = 0; 385 bcopy(tho, th, offsetof(struct timehands, th_generation)); 386 387 /* 388 * Capture a timecounter delta on the current timecounter and if 389 * changing timecounters, a counter value from the new timecounter. 390 * Update the offset fields accordingly. 391 */ 392 delta = tc_delta(th); 393 if (th->th_counter != timecounter) 394 ncount = timecounter->tc_get_timecount(timecounter); 395 else 396 ncount = 0; 397 th->th_offset_count += delta; 398 th->th_offset_count &= th->th_counter->tc_counter_mask; 399 bintime_addx(&th->th_offset, th->th_scale * delta); 400 401 #ifdef notyet 402 /* 403 * Hardware latching timecounters may not generate interrupts on 404 * PPS events, so instead we poll them. There is a finite risk that 405 * the hardware might capture a count which is later than the one we 406 * got above, and therefore possibly in the next NTP second which might 407 * have a different rate than the current NTP second. It doesn't 408 * matter in practice. 409 */ 410 if (tho->th_counter->tc_poll_pps) 411 tho->th_counter->tc_poll_pps(tho->th_counter); 412 #endif 413 414 /* 415 * Deal with NTP second processing. The for loop normally 416 * iterates at most once, but in extreme situations it might 417 * keep NTP sane if timeouts are not run for several seconds. 418 * At boot, the time step can be large when the TOD hardware 419 * has been read, so on really large steps, we call 420 * ntp_update_second only twice. We need to call it twice in 421 * case we missed a leap second. 422 */ 423 bt = th->th_offset; 424 bintime_add(&bt, &boottimebin); 425 i = bt.sec - tho->th_microtime.tv_sec; 426 if (i > LARGE_STEP) 427 i = 2; 428 for (; i > 0; i--) 429 ntp_update_second(&th->th_adjustment, &bt.sec); 430 431 /* Update the UTC timestamps used by the get*() functions. */ 432 /* XXX shouldn't do this here. Should force non-`get' versions. */ 433 bintime2timeval(&bt, &th->th_microtime); 434 bintime2timespec(&bt, &th->th_nanotime); 435 436 /* Now is a good time to change timecounters. */ 437 if (th->th_counter != timecounter) { 438 th->th_counter = timecounter; 439 th->th_offset_count = ncount; 440 } 441 442 /*- 443 * Recalculate the scaling factor. We want the number of 1/2^64 444 * fractions of a second per period of the hardware counter, taking 445 * into account the th_adjustment factor which the NTP PLL/adjtime(2) 446 * processing provides us with. 447 * 448 * The th_adjustment is nanoseconds per second with 32 bit binary 449 * fraction and we want 64 bit binary fraction of second: 450 * 451 * x = a * 2^32 / 10^9 = a * 4.294967296 452 * 453 * The range of th_adjustment is +/- 5000PPM so inside a 64bit int 454 * we can only multiply by about 850 without overflowing, but that 455 * leaves suitably precise fractions for multiply before divide. 456 * 457 * Divide before multiply with a fraction of 2199/512 results in a 458 * systematic undercompensation of 10PPM of th_adjustment. On a 459 * 5000PPM adjustment this is a 0.05PPM error. This is acceptable. 460 * 461 * We happily sacrifice the lowest of the 64 bits of our result 462 * to the goddess of code clarity. 463 * 464 */ 465 scale = (u_int64_t)1 << 63; 466 scale += (th->th_adjustment / 1024) * 2199; 467 scale /= th->th_counter->tc_frequency; 468 th->th_scale = scale * 2; 469 470 /* 471 * Now that the struct timehands is again consistent, set the new 472 * generation number, making sure to not make it zero. 473 */ 474 if (++ogen == 0) 475 ogen = 1; 476 th->th_generation = ogen; 477 478 /* Go live with the new struct timehands. */ 479 time_second = th->th_microtime.tv_sec; 480 time_uptime = th->th_offset.sec; 481 timehands = th; 482 } 483 484 /* Report or change the active timecounter hardware. */ 485 int 486 sysctl_tc_hardware(void *oldp, size_t *oldlenp, void *newp, size_t newlen) 487 { 488 char newname[32]; 489 struct timecounter *newtc, *tc; 490 int error; 491 492 tc = timecounter; 493 strlcpy(newname, tc->tc_name, sizeof(newname)); 494 495 error = sysctl_string(oldp, oldlenp, newp, newlen, newname, sizeof(newname)); 496 if (error != 0 || strcmp(newname, tc->tc_name) == 0) 497 return (error); 498 for (newtc = timecounters; newtc != NULL; newtc = newtc->tc_next) { 499 if (strcmp(newname, newtc->tc_name) != 0) 500 continue; 501 502 /* Warm up new timecounter. */ 503 (void)newtc->tc_get_timecount(newtc); 504 (void)newtc->tc_get_timecount(newtc); 505 506 timecounter = newtc; 507 return (0); 508 } 509 return (EINVAL); 510 } 511 512 /* Report or change the active timecounter hardware. */ 513 int 514 sysctl_tc_choice(void *oldp, size_t *oldlenp, void *newp, size_t newlen) 515 { 516 char buf[32], *spc, *choices; 517 struct timecounter *tc; 518 int error, maxlen; 519 520 spc = ""; 521 maxlen = 0; 522 for (tc = timecounters; tc != NULL; tc = tc->tc_next) 523 maxlen += sizeof(buf); 524 choices = malloc(maxlen, M_TEMP, M_WAITOK); 525 *choices = '\0'; 526 for (tc = timecounters; tc != NULL; tc = tc->tc_next) { 527 snprintf(buf, sizeof(buf), "%s%s(%d)", 528 spc, tc->tc_name, tc->tc_quality); 529 spc = " "; 530 strlcat(choices, buf, maxlen); 531 } 532 error = sysctl_rdstring(oldp, oldlenp, newp, choices); 533 free(choices, M_TEMP); 534 return (error); 535 } 536 537 /* 538 * Timecounters need to be updated every so often to prevent the hardware 539 * counter from overflowing. Updating also recalculates the cached values 540 * used by the get*() family of functions, so their precision depends on 541 * the update frequency. 542 */ 543 static int tc_tick; 544 545 void 546 tc_ticktock(void) 547 { 548 static int count; 549 550 if (++count < tc_tick) 551 return; 552 count = 0; 553 tc_windup(); 554 } 555 556 void 557 inittimecounter(void) 558 { 559 #ifdef DEBUG 560 u_int p; 561 #endif 562 563 /* 564 * Set the initial timeout to 565 * max(1, <approx. number of hardclock ticks in a millisecond>). 566 * People should probably not use the sysctl to set the timeout 567 * to smaller than its initial value, since that value is the 568 * smallest reasonable one. If they want better timestamps they 569 * should use the non-"get"* functions. 570 */ 571 if (hz > 1000) 572 tc_tick = (hz + 500) / 1000; 573 else 574 tc_tick = 1; 575 #ifdef DEBUG 576 p = (tc_tick * 1000000) / hz; 577 printf("Timecounters tick every %d.%03u msec\n", p / 1000, p % 1000); 578 #endif 579 580 /* warm up new timecounter (again) and get rolling. */ 581 (void)timecounter->tc_get_timecount(timecounter); 582 (void)timecounter->tc_get_timecount(timecounter); 583 } 584 585 /* 586 * Return timecounter-related information. 587 */ 588 int 589 sysctl_tc(int *name, u_int namelen, void *oldp, size_t *oldlenp, 590 void *newp, size_t newlen) 591 { 592 if (namelen != 1) 593 return (ENOTDIR); 594 595 switch (name[0]) { 596 case KERN_TIMECOUNTER_TICK: 597 return (sysctl_rdint(oldp, oldlenp, newp, tc_tick)); 598 case KERN_TIMECOUNTER_TIMESTEPWARNINGS: 599 return (sysctl_int(oldp, oldlenp, newp, newlen, 600 ×tepwarnings)); 601 case KERN_TIMECOUNTER_HARDWARE: 602 return (sysctl_tc_hardware(oldp, oldlenp, newp, newlen)); 603 case KERN_TIMECOUNTER_CHOICE: 604 return (sysctl_tc_choice(oldp, oldlenp, newp, newlen)); 605 default: 606 return (EOPNOTSUPP); 607 } 608 /* NOTREACHED */ 609 } 610 611 void 612 ntp_update_second(int64_t *adjust, time_t *sec) 613 { 614 struct timeval adj; 615 616 /* Skew time according to any adjtime(2) adjustments. */ 617 timerclear(&adj); 618 if (adjtimedelta.tv_sec > 0) 619 adj.tv_usec = 5000; 620 else if (adjtimedelta.tv_sec == 0) 621 adj.tv_usec = MIN(5000, adjtimedelta.tv_usec); 622 else if (adjtimedelta.tv_sec < -1) 623 adj.tv_usec = -5000; 624 else if (adjtimedelta.tv_sec == -1) 625 adj.tv_usec = MAX(-5000, adjtimedelta.tv_usec - 1000000); 626 timersub(&adjtimedelta, &adj, &adjtimedelta); 627 *adjust = ((int64_t)adj.tv_usec * 1000) << 32; 628 *adjust += timecounter->tc_freq_adj; 629 } 630 631 int 632 tc_adjfreq(int64_t *old, int64_t *new) 633 { 634 if (old != NULL) { 635 *old = timecounter->tc_freq_adj; 636 } 637 if (new != NULL) { 638 timecounter->tc_freq_adj = *new; 639 } 640 return 0; 641 } 642 #endif /* __HAVE_TIMECOUNTER */ 643