1 /*- 2 * ---------------------------------------------------------------------------- 3 * "THE BEER-WARE LICENSE" (Revision 42): 4 * <phk@FreeBSD.ORG> wrote this file. As long as you retain this notice you 5 * can do whatever you want with this stuff. If we meet some day, and you think 6 * this stuff is worth it, you can buy me a beer in return. Poul-Henning Kamp 7 * ---------------------------------------------------------------------------- 8 * 9 * $OpenBSD: kern_tc.c,v 1.12 2008/11/24 16:38:05 deraadt Exp $ 10 * $FreeBSD: src/sys/kern/kern_tc.c,v 1.148 2003/03/18 08:45:23 phk Exp $ 11 */ 12 13 #include <sys/param.h> 14 #include <sys/kernel.h> 15 #include <sys/sysctl.h> 16 #include <sys/syslog.h> 17 #include <sys/systm.h> 18 #include <sys/timetc.h> 19 #include <sys/malloc.h> 20 #include <dev/rndvar.h> 21 22 #ifdef __HAVE_TIMECOUNTER 23 /* 24 * A large step happens on boot. This constant detects such steps. 25 * It is relatively small so that ntp_update_second gets called enough 26 * in the typical 'missed a couple of seconds' case, but doesn't loop 27 * forever when the time step is large. 28 */ 29 #define LARGE_STEP 200 30 31 u_int dummy_get_timecount(struct timecounter *); 32 33 void ntp_update_second(int64_t *, time_t *); 34 int sysctl_tc_hardware(void *, size_t *, void *, size_t); 35 int sysctl_tc_choice(void *, size_t *, void *, size_t); 36 37 /* 38 * Implement a dummy timecounter which we can use until we get a real one 39 * in the air. This allows the console and other early stuff to use 40 * time services. 41 */ 42 43 u_int 44 dummy_get_timecount(struct timecounter *tc) 45 { 46 static u_int now; 47 48 return (++now); 49 } 50 51 static struct timecounter dummy_timecounter = { 52 dummy_get_timecount, 0, ~0u, 1000000, "dummy", -1000000 53 }; 54 55 struct timehands { 56 /* These fields must be initialized by the driver. */ 57 struct timecounter *th_counter; 58 int64_t th_adjustment; 59 u_int64_t th_scale; 60 u_int th_offset_count; 61 struct bintime th_offset; 62 struct timeval th_microtime; 63 struct timespec th_nanotime; 64 /* Fields not to be copied in tc_windup start with th_generation. */ 65 volatile u_int th_generation; 66 struct timehands *th_next; 67 }; 68 69 static struct timehands th0; 70 static struct timehands th9 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th0}; 71 static struct timehands th8 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th9}; 72 static struct timehands th7 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th8}; 73 static struct timehands th6 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th7}; 74 static struct timehands th5 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th6}; 75 static struct timehands th4 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th5}; 76 static struct timehands th3 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th4}; 77 static struct timehands th2 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th3}; 78 static struct timehands th1 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th2}; 79 static struct timehands th0 = { 80 &dummy_timecounter, 81 0, 82 (uint64_t)-1 / 1000000, 83 0, 84 {1, 0}, 85 {0, 0}, 86 {0, 0}, 87 1, 88 &th1 89 }; 90 91 static struct timehands *volatile timehands = &th0; 92 struct timecounter *timecounter = &dummy_timecounter; 93 static struct timecounter *timecounters = &dummy_timecounter; 94 95 volatile time_t time_second = 1; 96 volatile time_t time_uptime = 0; 97 98 extern struct timeval adjtimedelta; 99 static struct bintime boottimebin; 100 static int timestepwarnings; 101 102 void tc_windup(void); 103 104 /* 105 * Return the difference between the timehands' counter value now and what 106 * was when we copied it to the timehands' offset_count. 107 */ 108 static __inline u_int 109 tc_delta(struct timehands *th) 110 { 111 struct timecounter *tc; 112 113 tc = th->th_counter; 114 return ((tc->tc_get_timecount(tc) - th->th_offset_count) & 115 tc->tc_counter_mask); 116 } 117 118 /* 119 * Functions for reading the time. We have to loop until we are sure that 120 * the timehands that we operated on was not updated under our feet. See 121 * the comment in <sys/time.h> for a description of these 12 functions. 122 */ 123 124 void 125 binuptime(struct bintime *bt) 126 { 127 struct timehands *th; 128 u_int gen; 129 130 do { 131 th = timehands; 132 gen = th->th_generation; 133 *bt = th->th_offset; 134 bintime_addx(bt, th->th_scale * tc_delta(th)); 135 } while (gen == 0 || gen != th->th_generation); 136 } 137 138 void 139 nanouptime(struct timespec *tsp) 140 { 141 struct bintime bt; 142 143 binuptime(&bt); 144 bintime2timespec(&bt, tsp); 145 } 146 147 void 148 microuptime(struct timeval *tvp) 149 { 150 struct bintime bt; 151 152 binuptime(&bt); 153 bintime2timeval(&bt, tvp); 154 } 155 156 void 157 bintime(struct bintime *bt) 158 { 159 160 binuptime(bt); 161 bintime_add(bt, &boottimebin); 162 } 163 164 void 165 nanotime(struct timespec *tsp) 166 { 167 struct bintime bt; 168 169 bintime(&bt); 170 bintime2timespec(&bt, tsp); 171 } 172 173 void 174 microtime(struct timeval *tvp) 175 { 176 struct bintime bt; 177 178 bintime(&bt); 179 bintime2timeval(&bt, tvp); 180 } 181 182 void 183 getnanouptime(struct timespec *tsp) 184 { 185 struct timehands *th; 186 u_int gen; 187 188 do { 189 th = timehands; 190 gen = th->th_generation; 191 bintime2timespec(&th->th_offset, tsp); 192 } while (gen == 0 || gen != th->th_generation); 193 } 194 195 void 196 getmicrouptime(struct timeval *tvp) 197 { 198 struct timehands *th; 199 u_int gen; 200 201 do { 202 th = timehands; 203 gen = th->th_generation; 204 bintime2timeval(&th->th_offset, tvp); 205 } while (gen == 0 || gen != th->th_generation); 206 } 207 208 void 209 getnanotime(struct timespec *tsp) 210 { 211 struct timehands *th; 212 u_int gen; 213 214 do { 215 th = timehands; 216 gen = th->th_generation; 217 *tsp = th->th_nanotime; 218 } while (gen == 0 || gen != th->th_generation); 219 } 220 221 void 222 getmicrotime(struct timeval *tvp) 223 { 224 struct timehands *th; 225 u_int gen; 226 227 do { 228 th = timehands; 229 gen = th->th_generation; 230 *tvp = th->th_microtime; 231 } while (gen == 0 || gen != th->th_generation); 232 } 233 234 /* 235 * Initialize a new timecounter and possibly use it. 236 */ 237 void 238 tc_init(struct timecounter *tc) 239 { 240 u_int u; 241 242 u = tc->tc_frequency / tc->tc_counter_mask; 243 /* XXX: We need some margin here, 10% is a guess */ 244 u *= 11; 245 u /= 10; 246 if (tc->tc_quality >= 0) { 247 if (u > hz) { 248 tc->tc_quality = -2000; 249 printf("Timecounter \"%s\" frequency %lu Hz", 250 tc->tc_name, (unsigned long)tc->tc_frequency); 251 printf(" -- Insufficient hz, needs at least %u\n", u); 252 } 253 } 254 255 tc->tc_next = timecounters; 256 timecounters = tc; 257 /* 258 * Never automatically use a timecounter with negative quality. 259 * Even though we run on the dummy counter, switching here may be 260 * worse since this timecounter may not be monotonous. 261 */ 262 if (tc->tc_quality < 0) 263 return; 264 if (tc->tc_quality < timecounter->tc_quality) 265 return; 266 if (tc->tc_quality == timecounter->tc_quality && 267 tc->tc_frequency < timecounter->tc_frequency) 268 return; 269 (void)tc->tc_get_timecount(tc); 270 add_timer_randomness(tc->tc_get_timecount(tc)); 271 272 timecounter = tc; 273 } 274 275 /* Report the frequency of the current timecounter. */ 276 u_int64_t 277 tc_getfrequency(void) 278 { 279 280 return (timehands->th_counter->tc_frequency); 281 } 282 283 /* 284 * Step our concept of UTC. This is done by modifying our estimate of 285 * when we booted. 286 * XXX: not locked. 287 */ 288 void 289 tc_setclock(struct timespec *ts) 290 { 291 struct timespec ts2; 292 struct bintime bt, bt2; 293 294 binuptime(&bt2); 295 timespec2bintime(ts, &bt); 296 bintime_sub(&bt, &bt2); 297 bintime_add(&bt2, &boottimebin); 298 boottimebin = bt; 299 bintime2timeval(&bt, &boottime); 300 add_timer_randomness(ts->tv_sec); 301 302 /* XXX fiddle all the little crinkly bits around the fiords... */ 303 tc_windup(); 304 if (timestepwarnings) { 305 bintime2timespec(&bt2, &ts2); 306 log(LOG_INFO, "Time stepped from %ld.%09ld to %ld.%09ld\n", 307 (long)ts2.tv_sec, ts2.tv_nsec, 308 (long)ts->tv_sec, ts->tv_nsec); 309 } 310 } 311 312 /* 313 * Initialize the next struct timehands in the ring and make 314 * it the active timehands. Along the way we might switch to a different 315 * timecounter and/or do seconds processing in NTP. Slightly magic. 316 */ 317 void 318 tc_windup(void) 319 { 320 struct bintime bt; 321 struct timehands *th, *tho; 322 u_int64_t scale; 323 u_int delta, ncount, ogen; 324 int i; 325 #ifdef leapsecs 326 time_t t; 327 #endif 328 329 /* 330 * Make the next timehands a copy of the current one, but do not 331 * overwrite the generation or next pointer. While we update 332 * the contents, the generation must be zero. 333 */ 334 tho = timehands; 335 th = tho->th_next; 336 ogen = th->th_generation; 337 th->th_generation = 0; 338 bcopy(tho, th, offsetof(struct timehands, th_generation)); 339 340 /* 341 * Capture a timecounter delta on the current timecounter and if 342 * changing timecounters, a counter value from the new timecounter. 343 * Update the offset fields accordingly. 344 */ 345 delta = tc_delta(th); 346 if (th->th_counter != timecounter) 347 ncount = timecounter->tc_get_timecount(timecounter); 348 else 349 ncount = 0; 350 th->th_offset_count += delta; 351 th->th_offset_count &= th->th_counter->tc_counter_mask; 352 bintime_addx(&th->th_offset, th->th_scale * delta); 353 354 #ifdef notyet 355 /* 356 * Hardware latching timecounters may not generate interrupts on 357 * PPS events, so instead we poll them. There is a finite risk that 358 * the hardware might capture a count which is later than the one we 359 * got above, and therefore possibly in the next NTP second which might 360 * have a different rate than the current NTP second. It doesn't 361 * matter in practice. 362 */ 363 if (tho->th_counter->tc_poll_pps) 364 tho->th_counter->tc_poll_pps(tho->th_counter); 365 #endif 366 367 /* 368 * Deal with NTP second processing. The for loop normally 369 * iterates at most once, but in extreme situations it might 370 * keep NTP sane if timeouts are not run for several seconds. 371 * At boot, the time step can be large when the TOD hardware 372 * has been read, so on really large steps, we call 373 * ntp_update_second only twice. We need to call it twice in 374 * case we missed a leap second. 375 */ 376 bt = th->th_offset; 377 bintime_add(&bt, &boottimebin); 378 i = bt.sec - tho->th_microtime.tv_sec; 379 if (i > LARGE_STEP) 380 i = 2; 381 for (; i > 0; i--) 382 ntp_update_second(&th->th_adjustment, &bt.sec); 383 384 /* Update the UTC timestamps used by the get*() functions. */ 385 /* XXX shouldn't do this here. Should force non-`get' versions. */ 386 bintime2timeval(&bt, &th->th_microtime); 387 bintime2timespec(&bt, &th->th_nanotime); 388 389 /* Now is a good time to change timecounters. */ 390 if (th->th_counter != timecounter) { 391 th->th_counter = timecounter; 392 th->th_offset_count = ncount; 393 } 394 395 /*- 396 * Recalculate the scaling factor. We want the number of 1/2^64 397 * fractions of a second per period of the hardware counter, taking 398 * into account the th_adjustment factor which the NTP PLL/adjtime(2) 399 * processing provides us with. 400 * 401 * The th_adjustment is nanoseconds per second with 32 bit binary 402 * fraction and we want 64 bit binary fraction of second: 403 * 404 * x = a * 2^32 / 10^9 = a * 4.294967296 405 * 406 * The range of th_adjustment is +/- 5000PPM so inside a 64bit int 407 * we can only multiply by about 850 without overflowing, but that 408 * leaves suitably precise fractions for multiply before divide. 409 * 410 * Divide before multiply with a fraction of 2199/512 results in a 411 * systematic undercompensation of 10PPM of th_adjustment. On a 412 * 5000PPM adjustment this is a 0.05PPM error. This is acceptable. 413 * 414 * We happily sacrifice the lowest of the 64 bits of our result 415 * to the goddess of code clarity. 416 * 417 */ 418 scale = (u_int64_t)1 << 63; 419 scale += (th->th_adjustment / 1024) * 2199; 420 scale /= th->th_counter->tc_frequency; 421 th->th_scale = scale * 2; 422 423 /* 424 * Now that the struct timehands is again consistent, set the new 425 * generation number, making sure to not make it zero. 426 */ 427 if (++ogen == 0) 428 ogen = 1; 429 th->th_generation = ogen; 430 431 /* Go live with the new struct timehands. */ 432 time_second = th->th_microtime.tv_sec; 433 time_uptime = th->th_offset.sec; 434 timehands = th; 435 } 436 437 /* Report or change the active timecounter hardware. */ 438 int 439 sysctl_tc_hardware(void *oldp, size_t *oldlenp, void *newp, size_t newlen) 440 { 441 char newname[32]; 442 struct timecounter *newtc, *tc; 443 int error; 444 445 tc = timecounter; 446 strlcpy(newname, tc->tc_name, sizeof(newname)); 447 448 error = sysctl_string(oldp, oldlenp, newp, newlen, newname, sizeof(newname)); 449 if (error != 0 || strcmp(newname, tc->tc_name) == 0) 450 return (error); 451 for (newtc = timecounters; newtc != NULL; newtc = newtc->tc_next) { 452 if (strcmp(newname, newtc->tc_name) != 0) 453 continue; 454 455 /* Warm up new timecounter. */ 456 (void)newtc->tc_get_timecount(newtc); 457 (void)newtc->tc_get_timecount(newtc); 458 459 timecounter = newtc; 460 return (0); 461 } 462 return (EINVAL); 463 } 464 465 /* Report or change the active timecounter hardware. */ 466 int 467 sysctl_tc_choice(void *oldp, size_t *oldlenp, void *newp, size_t newlen) 468 { 469 char buf[32], *spc, *choices; 470 struct timecounter *tc; 471 int error, maxlen; 472 473 spc = ""; 474 error = 0; 475 maxlen = 0; 476 for (tc = timecounters; tc != NULL; tc = tc->tc_next) 477 maxlen += sizeof(buf); 478 choices = malloc(maxlen, M_TEMP, M_WAITOK); 479 *choices = '\0'; 480 for (tc = timecounters; tc != NULL; tc = tc->tc_next) { 481 snprintf(buf, sizeof(buf), "%s%s(%d)", 482 spc, tc->tc_name, tc->tc_quality); 483 spc = " "; 484 strlcat(choices, buf, maxlen); 485 } 486 error = sysctl_rdstring(oldp, oldlenp, newp, choices); 487 free(choices, M_TEMP); 488 return (error); 489 } 490 491 /* 492 * Timecounters need to be updated every so often to prevent the hardware 493 * counter from overflowing. Updating also recalculates the cached values 494 * used by the get*() family of functions, so their precision depends on 495 * the update frequency. 496 */ 497 static int tc_tick; 498 499 void 500 tc_ticktock(void) 501 { 502 static int count; 503 504 if (++count < tc_tick) 505 return; 506 count = 0; 507 tc_windup(); 508 } 509 510 void 511 inittimecounter(void) 512 { 513 u_int p; 514 515 /* 516 * Set the initial timeout to 517 * max(1, <approx. number of hardclock ticks in a millisecond>). 518 * People should probably not use the sysctl to set the timeout 519 * to smaller than its inital value, since that value is the 520 * smallest reasonable one. If they want better timestamps they 521 * should use the non-"get"* functions. 522 */ 523 if (hz > 1000) 524 tc_tick = (hz + 500) / 1000; 525 else 526 tc_tick = 1; 527 p = (tc_tick * 1000000) / hz; 528 #ifdef DEBUG 529 printf("Timecounters tick every %d.%03u msec\n", p / 1000, p % 1000); 530 #endif 531 532 /* warm up new timecounter (again) and get rolling. */ 533 (void)timecounter->tc_get_timecount(timecounter); 534 (void)timecounter->tc_get_timecount(timecounter); 535 } 536 537 /* 538 * Return timecounter-related information. 539 */ 540 int 541 sysctl_tc(int *name, u_int namelen, void *oldp, size_t *oldlenp, 542 void *newp, size_t newlen) 543 { 544 if (namelen != 1) 545 return (ENOTDIR); 546 547 switch (name[0]) { 548 case KERN_TIMECOUNTER_TICK: 549 return (sysctl_rdint(oldp, oldlenp, newp, tc_tick)); 550 case KERN_TIMECOUNTER_TIMESTEPWARNINGS: 551 return (sysctl_int(oldp, oldlenp, newp, newlen, 552 ×tepwarnings)); 553 case KERN_TIMECOUNTER_HARDWARE: 554 return (sysctl_tc_hardware(oldp, oldlenp, newp, newlen)); 555 case KERN_TIMECOUNTER_CHOICE: 556 return (sysctl_tc_choice(oldp, oldlenp, newp, newlen)); 557 default: 558 return (EOPNOTSUPP); 559 } 560 /* NOTREACHED */ 561 } 562 563 void 564 ntp_update_second(int64_t *adjust, time_t *sec) 565 { 566 struct timeval adj; 567 568 /* Skew time according to any adjtime(2) adjustments. */ 569 timerclear(&adj); 570 if (adjtimedelta.tv_sec > 0) 571 adj.tv_usec = 5000; 572 else if (adjtimedelta.tv_sec == 0) 573 adj.tv_usec = MIN(5000, adjtimedelta.tv_usec); 574 else if (adjtimedelta.tv_sec < -1) 575 adj.tv_usec = -5000; 576 else if (adjtimedelta.tv_sec == -1) 577 adj.tv_usec = MAX(-5000, adjtimedelta.tv_usec - 1000000); 578 timersub(&adjtimedelta, &adj, &adjtimedelta); 579 *adjust = ((int64_t)adj.tv_usec * 1000) << 32; 580 *adjust += timecounter->tc_freq_adj; 581 } 582 583 int 584 tc_adjfreq(int64_t *old, int64_t *new) 585 { 586 if (old != NULL) { 587 *old = timecounter->tc_freq_adj; 588 } 589 if (new != NULL) { 590 timecounter->tc_freq_adj = *new; 591 } 592 return 0; 593 } 594 #endif /* __HAVE_TIMECOUNTER */ 595