1 /*- 2 * ---------------------------------------------------------------------------- 3 * "THE BEER-WARE LICENSE" (Revision 42): 4 * <phk@FreeBSD.ORG> wrote this file. As long as you retain this notice you 5 * can do whatever you want with this stuff. If we meet some day, and you think 6 * this stuff is worth it, you can buy me a beer in return. Poul-Henning Kamp 7 * ---------------------------------------------------------------------------- 8 * 9 * $OpenBSD: kern_tc.c,v 1.10 2007/12/27 19:05:22 otto Exp $ 10 * $FreeBSD: src/sys/kern/kern_tc.c,v 1.148 2003/03/18 08:45:23 phk Exp $ 11 */ 12 13 #include <sys/param.h> 14 #include <sys/kernel.h> 15 #include <sys/sysctl.h> 16 #include <sys/syslog.h> 17 #include <sys/systm.h> 18 #include <sys/timetc.h> 19 #include <sys/malloc.h> 20 21 #ifdef __HAVE_TIMECOUNTER 22 /* 23 * A large step happens on boot. This constant detects such steps. 24 * It is relatively small so that ntp_update_second gets called enough 25 * in the typical 'missed a couple of seconds' case, but doesn't loop 26 * forever when the time step is large. 27 */ 28 #define LARGE_STEP 200 29 30 u_int dummy_get_timecount(struct timecounter *); 31 32 void ntp_update_second(int64_t *, time_t *); 33 int sysctl_tc_hardware(void *, size_t *, void *, size_t); 34 int sysctl_tc_choice(void *, size_t *, void *, size_t); 35 36 /* 37 * Implement a dummy timecounter which we can use until we get a real one 38 * in the air. This allows the console and other early stuff to use 39 * time services. 40 */ 41 42 u_int 43 dummy_get_timecount(struct timecounter *tc) 44 { 45 static u_int now; 46 47 return (++now); 48 } 49 50 static struct timecounter dummy_timecounter = { 51 dummy_get_timecount, 0, ~0u, 1000000, "dummy", -1000000 52 }; 53 54 struct timehands { 55 /* These fields must be initialized by the driver. */ 56 struct timecounter *th_counter; 57 int64_t th_adjustment; 58 u_int64_t th_scale; 59 u_int th_offset_count; 60 struct bintime th_offset; 61 struct timeval th_microtime; 62 struct timespec th_nanotime; 63 /* Fields not to be copied in tc_windup start with th_generation. */ 64 volatile u_int th_generation; 65 struct timehands *th_next; 66 }; 67 68 extern struct timehands th0; 69 static struct timehands th9 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th0}; 70 static struct timehands th8 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th9}; 71 static struct timehands th7 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th8}; 72 static struct timehands th6 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th7}; 73 static struct timehands th5 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th6}; 74 static struct timehands th4 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th5}; 75 static struct timehands th3 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th4}; 76 static struct timehands th2 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th3}; 77 static struct timehands th1 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th2}; 78 static struct timehands th0 = { 79 &dummy_timecounter, 80 0, 81 (uint64_t)-1 / 1000000, 82 0, 83 {1, 0}, 84 {0, 0}, 85 {0, 0}, 86 1, 87 &th1 88 }; 89 90 static struct timehands *volatile timehands = &th0; 91 struct timecounter *timecounter = &dummy_timecounter; 92 static struct timecounter *timecounters = &dummy_timecounter; 93 94 volatile time_t time_second = 1; 95 volatile time_t time_uptime = 0; 96 97 extern struct timeval adjtimedelta; 98 static struct bintime boottimebin; 99 static int timestepwarnings; 100 101 void tc_windup(void); 102 103 /* 104 * Return the difference between the timehands' counter value now and what 105 * was when we copied it to the timehands' offset_count. 106 */ 107 static __inline u_int 108 tc_delta(struct timehands *th) 109 { 110 struct timecounter *tc; 111 112 tc = th->th_counter; 113 return ((tc->tc_get_timecount(tc) - th->th_offset_count) & 114 tc->tc_counter_mask); 115 } 116 117 /* 118 * Functions for reading the time. We have to loop until we are sure that 119 * the timehands that we operated on was not updated under our feet. See 120 * the comment in <sys/time.h> for a description of these 12 functions. 121 */ 122 123 void 124 binuptime(struct bintime *bt) 125 { 126 struct timehands *th; 127 u_int gen; 128 129 do { 130 th = timehands; 131 gen = th->th_generation; 132 *bt = th->th_offset; 133 bintime_addx(bt, th->th_scale * tc_delta(th)); 134 } while (gen == 0 || gen != th->th_generation); 135 } 136 137 void 138 nanouptime(struct timespec *tsp) 139 { 140 struct bintime bt; 141 142 binuptime(&bt); 143 bintime2timespec(&bt, tsp); 144 } 145 146 void 147 microuptime(struct timeval *tvp) 148 { 149 struct bintime bt; 150 151 binuptime(&bt); 152 bintime2timeval(&bt, tvp); 153 } 154 155 void 156 bintime(struct bintime *bt) 157 { 158 159 binuptime(bt); 160 bintime_add(bt, &boottimebin); 161 } 162 163 void 164 nanotime(struct timespec *tsp) 165 { 166 struct bintime bt; 167 168 bintime(&bt); 169 bintime2timespec(&bt, tsp); 170 } 171 172 void 173 microtime(struct timeval *tvp) 174 { 175 struct bintime bt; 176 177 bintime(&bt); 178 bintime2timeval(&bt, tvp); 179 } 180 181 void 182 getnanouptime(struct timespec *tsp) 183 { 184 struct timehands *th; 185 u_int gen; 186 187 do { 188 th = timehands; 189 gen = th->th_generation; 190 bintime2timespec(&th->th_offset, tsp); 191 } while (gen == 0 || gen != th->th_generation); 192 } 193 194 void 195 getmicrouptime(struct timeval *tvp) 196 { 197 struct timehands *th; 198 u_int gen; 199 200 do { 201 th = timehands; 202 gen = th->th_generation; 203 bintime2timeval(&th->th_offset, tvp); 204 } while (gen == 0 || gen != th->th_generation); 205 } 206 207 void 208 getnanotime(struct timespec *tsp) 209 { 210 struct timehands *th; 211 u_int gen; 212 213 do { 214 th = timehands; 215 gen = th->th_generation; 216 *tsp = th->th_nanotime; 217 } while (gen == 0 || gen != th->th_generation); 218 } 219 220 void 221 getmicrotime(struct timeval *tvp) 222 { 223 struct timehands *th; 224 u_int gen; 225 226 do { 227 th = timehands; 228 gen = th->th_generation; 229 *tvp = th->th_microtime; 230 } while (gen == 0 || gen != th->th_generation); 231 } 232 233 /* 234 * Initialize a new timecounter and possibly use it. 235 */ 236 void 237 tc_init(struct timecounter *tc) 238 { 239 u_int u; 240 241 u = tc->tc_frequency / tc->tc_counter_mask; 242 /* XXX: We need some margin here, 10% is a guess */ 243 u *= 11; 244 u /= 10; 245 if (tc->tc_quality >= 0) { 246 if (u > hz) { 247 tc->tc_quality = -2000; 248 printf("Timecounter \"%s\" frequency %lu Hz", 249 tc->tc_name, (unsigned long)tc->tc_frequency); 250 printf(" -- Insufficient hz, needs at least %u\n", u); 251 } 252 } 253 254 tc->tc_next = timecounters; 255 timecounters = tc; 256 /* 257 * Never automatically use a timecounter with negative quality. 258 * Even though we run on the dummy counter, switching here may be 259 * worse since this timecounter may not be monotonous. 260 */ 261 if (tc->tc_quality < 0) 262 return; 263 if (tc->tc_quality < timecounter->tc_quality) 264 return; 265 if (tc->tc_quality == timecounter->tc_quality && 266 tc->tc_frequency < timecounter->tc_frequency) 267 return; 268 (void)tc->tc_get_timecount(tc); 269 (void)tc->tc_get_timecount(tc); 270 timecounter = tc; 271 } 272 273 /* Report the frequency of the current timecounter. */ 274 u_int64_t 275 tc_getfrequency(void) 276 { 277 278 return (timehands->th_counter->tc_frequency); 279 } 280 281 /* 282 * Step our concept of UTC. This is done by modifying our estimate of 283 * when we booted. 284 * XXX: not locked. 285 */ 286 void 287 tc_setclock(struct timespec *ts) 288 { 289 struct timespec ts2; 290 struct bintime bt, bt2; 291 292 binuptime(&bt2); 293 timespec2bintime(ts, &bt); 294 bintime_sub(&bt, &bt2); 295 bintime_add(&bt2, &boottimebin); 296 boottimebin = bt; 297 bintime2timeval(&bt, &boottime); 298 299 /* XXX fiddle all the little crinkly bits around the fiords... */ 300 tc_windup(); 301 if (timestepwarnings) { 302 bintime2timespec(&bt2, &ts2); 303 log(LOG_INFO, "Time stepped from %ld.%09ld to %ld.%09ld\n", 304 (long)ts2.tv_sec, ts2.tv_nsec, 305 (long)ts->tv_sec, ts->tv_nsec); 306 } 307 } 308 309 /* 310 * Initialize the next struct timehands in the ring and make 311 * it the active timehands. Along the way we might switch to a different 312 * timecounter and/or do seconds processing in NTP. Slightly magic. 313 */ 314 void 315 tc_windup(void) 316 { 317 struct bintime bt; 318 struct timehands *th, *tho; 319 u_int64_t scale; 320 u_int delta, ncount, ogen; 321 int i; 322 #ifdef leapsecs 323 time_t t; 324 #endif 325 326 /* 327 * Make the next timehands a copy of the current one, but do not 328 * overwrite the generation or next pointer. While we update 329 * the contents, the generation must be zero. 330 */ 331 tho = timehands; 332 th = tho->th_next; 333 ogen = th->th_generation; 334 th->th_generation = 0; 335 bcopy(tho, th, offsetof(struct timehands, th_generation)); 336 337 /* 338 * Capture a timecounter delta on the current timecounter and if 339 * changing timecounters, a counter value from the new timecounter. 340 * Update the offset fields accordingly. 341 */ 342 delta = tc_delta(th); 343 if (th->th_counter != timecounter) 344 ncount = timecounter->tc_get_timecount(timecounter); 345 else 346 ncount = 0; 347 th->th_offset_count += delta; 348 th->th_offset_count &= th->th_counter->tc_counter_mask; 349 bintime_addx(&th->th_offset, th->th_scale * delta); 350 351 #ifdef notyet 352 /* 353 * Hardware latching timecounters may not generate interrupts on 354 * PPS events, so instead we poll them. There is a finite risk that 355 * the hardware might capture a count which is later than the one we 356 * got above, and therefore possibly in the next NTP second which might 357 * have a different rate than the current NTP second. It doesn't 358 * matter in practice. 359 */ 360 if (tho->th_counter->tc_poll_pps) 361 tho->th_counter->tc_poll_pps(tho->th_counter); 362 #endif 363 364 /* 365 * Deal with NTP second processing. The for loop normally 366 * iterates at most once, but in extreme situations it might 367 * keep NTP sane if timeouts are not run for several seconds. 368 * At boot, the time step can be large when the TOD hardware 369 * has been read, so on really large steps, we call 370 * ntp_update_second only twice. We need to call it twice in 371 * case we missed a leap second. 372 */ 373 bt = th->th_offset; 374 bintime_add(&bt, &boottimebin); 375 i = bt.sec - tho->th_microtime.tv_sec; 376 if (i > LARGE_STEP) 377 i = 2; 378 for (; i > 0; i--) 379 ntp_update_second(&th->th_adjustment, &bt.sec); 380 381 /* Update the UTC timestamps used by the get*() functions. */ 382 /* XXX shouldn't do this here. Should force non-`get' versions. */ 383 bintime2timeval(&bt, &th->th_microtime); 384 bintime2timespec(&bt, &th->th_nanotime); 385 386 /* Now is a good time to change timecounters. */ 387 if (th->th_counter != timecounter) { 388 th->th_counter = timecounter; 389 th->th_offset_count = ncount; 390 } 391 392 /*- 393 * Recalculate the scaling factor. We want the number of 1/2^64 394 * fractions of a second per period of the hardware counter, taking 395 * into account the th_adjustment factor which the NTP PLL/adjtime(2) 396 * processing provides us with. 397 * 398 * The th_adjustment is nanoseconds per second with 32 bit binary 399 * fraction and we want 64 bit binary fraction of second: 400 * 401 * x = a * 2^32 / 10^9 = a * 4.294967296 402 * 403 * The range of th_adjustment is +/- 5000PPM so inside a 64bit int 404 * we can only multiply by about 850 without overflowing, but that 405 * leaves suitably precise fractions for multiply before divide. 406 * 407 * Divide before multiply with a fraction of 2199/512 results in a 408 * systematic undercompensation of 10PPM of th_adjustment. On a 409 * 5000PPM adjustment this is a 0.05PPM error. This is acceptable. 410 * 411 * We happily sacrifice the lowest of the 64 bits of our result 412 * to the goddess of code clarity. 413 * 414 */ 415 scale = (u_int64_t)1 << 63; 416 scale += (th->th_adjustment / 1024) * 2199; 417 scale /= th->th_counter->tc_frequency; 418 th->th_scale = scale * 2; 419 420 /* 421 * Now that the struct timehands is again consistent, set the new 422 * generation number, making sure to not make it zero. 423 */ 424 if (++ogen == 0) 425 ogen = 1; 426 th->th_generation = ogen; 427 428 /* Go live with the new struct timehands. */ 429 time_second = th->th_microtime.tv_sec; 430 time_uptime = th->th_offset.sec; 431 timehands = th; 432 } 433 434 /* Report or change the active timecounter hardware. */ 435 int 436 sysctl_tc_hardware(void *oldp, size_t *oldlenp, void *newp, size_t newlen) 437 { 438 char newname[32]; 439 struct timecounter *newtc, *tc; 440 int error; 441 442 tc = timecounter; 443 strlcpy(newname, tc->tc_name, sizeof(newname)); 444 445 error = sysctl_string(oldp, oldlenp, newp, newlen, newname, sizeof(newname)); 446 if (error != 0 || strcmp(newname, tc->tc_name) == 0) 447 return (error); 448 for (newtc = timecounters; newtc != NULL; newtc = newtc->tc_next) { 449 if (strcmp(newname, newtc->tc_name) != 0) 450 continue; 451 452 /* Warm up new timecounter. */ 453 (void)newtc->tc_get_timecount(newtc); 454 (void)newtc->tc_get_timecount(newtc); 455 456 timecounter = newtc; 457 return (0); 458 } 459 return (EINVAL); 460 } 461 462 /* Report or change the active timecounter hardware. */ 463 int 464 sysctl_tc_choice(void *oldp, size_t *oldlenp, void *newp, size_t newlen) 465 { 466 char buf[32], *spc, *choices; 467 struct timecounter *tc; 468 int error, maxlen; 469 470 spc = ""; 471 error = 0; 472 maxlen = 0; 473 for (tc = timecounters; tc != NULL; tc = tc->tc_next) 474 maxlen += sizeof(buf); 475 choices = malloc(maxlen, M_TEMP, M_WAITOK); 476 *choices = '\0'; 477 for (tc = timecounters; tc != NULL; tc = tc->tc_next) { 478 snprintf(buf, sizeof(buf), "%s%s(%d)", 479 spc, tc->tc_name, tc->tc_quality); 480 spc = " "; 481 strlcat(choices, buf, maxlen); 482 } 483 error = sysctl_rdstring(oldp, oldlenp, newp, choices); 484 free(choices, M_TEMP); 485 return (error); 486 } 487 488 /* 489 * Timecounters need to be updated every so often to prevent the hardware 490 * counter from overflowing. Updating also recalculates the cached values 491 * used by the get*() family of functions, so their precision depends on 492 * the update frequency. 493 */ 494 static int tc_tick; 495 496 void 497 tc_ticktock(void) 498 { 499 static int count; 500 501 if (++count < tc_tick) 502 return; 503 count = 0; 504 tc_windup(); 505 } 506 507 void 508 inittimecounter(void) 509 { 510 u_int p; 511 512 /* 513 * Set the initial timeout to 514 * max(1, <approx. number of hardclock ticks in a millisecond>). 515 * People should probably not use the sysctl to set the timeout 516 * to smaller than its inital value, since that value is the 517 * smallest reasonable one. If they want better timestamps they 518 * should use the non-"get"* functions. 519 */ 520 if (hz > 1000) 521 tc_tick = (hz + 500) / 1000; 522 else 523 tc_tick = 1; 524 p = (tc_tick * 1000000) / hz; 525 #ifdef DEBUG 526 printf("Timecounters tick every %d.%03u msec\n", p / 1000, p % 1000); 527 #endif 528 529 /* warm up new timecounter (again) and get rolling. */ 530 (void)timecounter->tc_get_timecount(timecounter); 531 (void)timecounter->tc_get_timecount(timecounter); 532 } 533 534 /* 535 * Return timecounter-related information. 536 */ 537 int 538 sysctl_tc(int *name, u_int namelen, void *oldp, size_t *oldlenp, 539 void *newp, size_t newlen) 540 { 541 if (namelen != 1) 542 return (ENOTDIR); 543 544 switch (name[0]) { 545 case KERN_TIMECOUNTER_TICK: 546 return (sysctl_rdint(oldp, oldlenp, newp, tc_tick)); 547 case KERN_TIMECOUNTER_TIMESTEPWARNINGS: 548 return (sysctl_int(oldp, oldlenp, newp, newlen, 549 ×tepwarnings)); 550 case KERN_TIMECOUNTER_HARDWARE: 551 return (sysctl_tc_hardware(oldp, oldlenp, newp, newlen)); 552 case KERN_TIMECOUNTER_CHOICE: 553 return (sysctl_tc_choice(oldp, oldlenp, newp, newlen)); 554 default: 555 return (EOPNOTSUPP); 556 } 557 /* NOTREACHED */ 558 } 559 560 void 561 ntp_update_second(int64_t *adjust, time_t *sec) 562 { 563 struct timeval adj; 564 565 /* Skew time according to any adjtime(2) adjustments. */ 566 timerclear(&adj); 567 if (adjtimedelta.tv_sec > 0) 568 adj.tv_usec = 5000; 569 else if (adjtimedelta.tv_sec == 0) 570 adj.tv_usec = MIN(5000, adjtimedelta.tv_usec); 571 else if (adjtimedelta.tv_sec < -1) 572 adj.tv_usec = -5000; 573 else if (adjtimedelta.tv_sec == -1) 574 adj.tv_usec = MAX(-5000, adjtimedelta.tv_usec - 1000000); 575 timersub(&adjtimedelta, &adj, &adjtimedelta); 576 *adjust = ((int64_t)adj.tv_usec * 1000) << 32; 577 *adjust += timecounter->tc_freq_adj; 578 } 579 580 int 581 tc_adjfreq(int64_t *old, int64_t *new) 582 { 583 if (old != NULL) { 584 *old = timecounter->tc_freq_adj; 585 } 586 if (new != NULL) { 587 timecounter->tc_freq_adj = *new; 588 } 589 return 0; 590 } 591 #endif /* __HAVE_TIMECOUNTER */ 592