1 /*- 2 * ---------------------------------------------------------------------------- 3 * "THE BEER-WARE LICENSE" (Revision 42): 4 * <phk@FreeBSD.ORG> wrote this file. As long as you retain this notice you 5 * can do whatever you want with this stuff. If we meet some day, and you think 6 * this stuff is worth it, you can buy me a beer in return. Poul-Henning Kamp 7 * ---------------------------------------------------------------------------- 8 * 9 * $OpenBSD: kern_tc.c,v 1.4 2005/04/21 00:20:13 deraadt Exp $ 10 * $FreeBSD: src/sys/kern/kern_tc.c,v 1.148 2003/03/18 08:45:23 phk Exp $ 11 */ 12 13 #include <sys/param.h> 14 #include <sys/kernel.h> 15 #include <sys/sysctl.h> 16 #include <sys/syslog.h> 17 #include <sys/systm.h> 18 #include <sys/timetc.h> 19 #include <sys/malloc.h> 20 21 #ifdef __HAVE_TIMECOUNTER 22 /* 23 * A large step happens on boot. This constant detects such steps. 24 * It is relatively small so that ntp_update_second gets called enough 25 * in the typical 'missed a couple of seconds' case, but doesn't loop 26 * forever when the time step is large. 27 */ 28 #define LARGE_STEP 200 29 30 u_int dummy_get_timecount(struct timecounter *); 31 32 void ntp_update_second(int64_t *, time_t *); 33 int sysctl_tc_hardware(void *, size_t *, void *, size_t); 34 int sysctl_tc_choice(void *, size_t *, void *, size_t); 35 36 /* 37 * Implement a dummy timecounter which we can use until we get a real one 38 * in the air. This allows the console and other early stuff to use 39 * time services. 40 */ 41 42 u_int 43 dummy_get_timecount(struct timecounter *tc) 44 { 45 static u_int now; 46 47 return (++now); 48 } 49 50 static struct timecounter dummy_timecounter = { 51 dummy_get_timecount, 0, ~0u, 1000000, "dummy", -1000000 52 }; 53 54 struct timehands { 55 /* These fields must be initialized by the driver. */ 56 struct timecounter *th_counter; 57 int64_t th_adjustment; 58 u_int64_t th_scale; 59 u_int th_offset_count; 60 struct bintime th_offset; 61 struct timeval th_microtime; 62 struct timespec th_nanotime; 63 /* Fields not to be copied in tc_windup start with th_generation. */ 64 volatile u_int th_generation; 65 struct timehands *th_next; 66 }; 67 68 extern struct timehands th0; 69 static struct timehands th9 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th0}; 70 static struct timehands th8 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th9}; 71 static struct timehands th7 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th8}; 72 static struct timehands th6 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th7}; 73 static struct timehands th5 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th6}; 74 static struct timehands th4 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th5}; 75 static struct timehands th3 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th4}; 76 static struct timehands th2 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th3}; 77 static struct timehands th1 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th2}; 78 static struct timehands th0 = { 79 &dummy_timecounter, 80 0, 81 (uint64_t)-1 / 1000000, 82 0, 83 {1, 0}, 84 {0, 0}, 85 {0, 0}, 86 1, 87 &th1 88 }; 89 90 static struct timehands *volatile timehands = &th0; 91 struct timecounter *timecounter = &dummy_timecounter; 92 static struct timecounter *timecounters = &dummy_timecounter; 93 94 volatile time_t time_second = 1; 95 volatile time_t time_uptime = 0; 96 97 extern struct timeval adjtimedelta; 98 static struct bintime boottimebin; 99 static int timestepwarnings; 100 101 void tc_windup(void); 102 103 /* 104 * Return the difference between the timehands' counter value now and what 105 * was when we copied it to the timehands' offset_count. 106 */ 107 static __inline u_int 108 tc_delta(struct timehands *th) 109 { 110 struct timecounter *tc; 111 112 tc = th->th_counter; 113 return ((tc->tc_get_timecount(tc) - th->th_offset_count) & 114 tc->tc_counter_mask); 115 } 116 117 /* 118 * Functions for reading the time. We have to loop until we are sure that 119 * the timehands that we operated on was not updated under our feet. See 120 * the comment in <sys/time.h> for a description of these 12 functions. 121 */ 122 123 void 124 binuptime(struct bintime *bt) 125 { 126 struct timehands *th; 127 u_int gen; 128 129 do { 130 th = timehands; 131 gen = th->th_generation; 132 *bt = th->th_offset; 133 bintime_addx(bt, th->th_scale * tc_delta(th)); 134 } while (gen == 0 || gen != th->th_generation); 135 } 136 137 void 138 nanouptime(struct timespec *tsp) 139 { 140 struct bintime bt; 141 142 binuptime(&bt); 143 bintime2timespec(&bt, tsp); 144 } 145 146 void 147 microuptime(struct timeval *tvp) 148 { 149 struct bintime bt; 150 151 binuptime(&bt); 152 bintime2timeval(&bt, tvp); 153 } 154 155 void 156 bintime(struct bintime *bt) 157 { 158 159 binuptime(bt); 160 bintime_add(bt, &boottimebin); 161 } 162 163 void 164 nanotime(struct timespec *tsp) 165 { 166 struct bintime bt; 167 168 bintime(&bt); 169 bintime2timespec(&bt, tsp); 170 } 171 172 void 173 microtime(struct timeval *tvp) 174 { 175 struct bintime bt; 176 177 bintime(&bt); 178 bintime2timeval(&bt, tvp); 179 } 180 181 void 182 getbinuptime(struct bintime *bt) 183 { 184 struct timehands *th; 185 u_int gen; 186 187 do { 188 th = timehands; 189 gen = th->th_generation; 190 *bt = th->th_offset; 191 } while (gen == 0 || gen != th->th_generation); 192 } 193 194 void 195 getnanouptime(struct timespec *tsp) 196 { 197 struct timehands *th; 198 u_int gen; 199 200 do { 201 th = timehands; 202 gen = th->th_generation; 203 bintime2timespec(&th->th_offset, tsp); 204 } while (gen == 0 || gen != th->th_generation); 205 } 206 207 void 208 getmicrouptime(struct timeval *tvp) 209 { 210 struct timehands *th; 211 u_int gen; 212 213 do { 214 th = timehands; 215 gen = th->th_generation; 216 bintime2timeval(&th->th_offset, tvp); 217 } while (gen == 0 || gen != th->th_generation); 218 } 219 220 void 221 getbintime(struct bintime *bt) 222 { 223 struct timehands *th; 224 u_int gen; 225 226 do { 227 th = timehands; 228 gen = th->th_generation; 229 *bt = th->th_offset; 230 } while (gen == 0 || gen != th->th_generation); 231 bintime_add(bt, &boottimebin); 232 } 233 234 void 235 getnanotime(struct timespec *tsp) 236 { 237 struct timehands *th; 238 u_int gen; 239 240 do { 241 th = timehands; 242 gen = th->th_generation; 243 *tsp = th->th_nanotime; 244 } while (gen == 0 || gen != th->th_generation); 245 } 246 247 void 248 getmicrotime(struct timeval *tvp) 249 { 250 struct timehands *th; 251 u_int gen; 252 253 do { 254 th = timehands; 255 gen = th->th_generation; 256 *tvp = th->th_microtime; 257 } while (gen == 0 || gen != th->th_generation); 258 } 259 260 /* 261 * Initialize a new timecounter and possibly use it. 262 */ 263 void 264 tc_init(struct timecounter *tc) 265 { 266 u_int u; 267 268 u = tc->tc_frequency / tc->tc_counter_mask; 269 /* XXX: We need some margin here, 10% is a guess */ 270 u *= 11; 271 u /= 10; 272 if (tc->tc_quality >= 0) { 273 if (u > hz) { 274 tc->tc_quality = -2000; 275 printf("Timecounter \"%s\" frequency %lu Hz", 276 tc->tc_name, (unsigned long)tc->tc_frequency); 277 printf(" -- Insufficient hz, needs at least %u\n", u); 278 } 279 } 280 281 tc->tc_next = timecounters; 282 timecounters = tc; 283 /* 284 * Never automatically use a timecounter with negative quality. 285 * Even though we run on the dummy counter, switching here may be 286 * worse since this timecounter may not be monotonous. 287 */ 288 if (tc->tc_quality < 0) 289 return; 290 if (tc->tc_quality < timecounter->tc_quality) 291 return; 292 if (tc->tc_quality == timecounter->tc_quality && 293 tc->tc_frequency < timecounter->tc_frequency) 294 return; 295 (void)tc->tc_get_timecount(tc); 296 (void)tc->tc_get_timecount(tc); 297 timecounter = tc; 298 } 299 300 /* Report the frequency of the current timecounter. */ 301 u_int64_t 302 tc_getfrequency(void) 303 { 304 305 return (timehands->th_counter->tc_frequency); 306 } 307 308 /* 309 * Step our concept of UTC. This is done by modifying our estimate of 310 * when we booted. 311 * XXX: not locked. 312 */ 313 void 314 tc_setclock(struct timespec *ts) 315 { 316 struct timespec ts2; 317 struct bintime bt, bt2; 318 319 binuptime(&bt2); 320 timespec2bintime(ts, &bt); 321 bintime_sub(&bt, &bt2); 322 bintime_add(&bt2, &boottimebin); 323 boottimebin = bt; 324 bintime2timeval(&bt, &boottime); 325 326 /* XXX fiddle all the little crinkly bits around the fiords... */ 327 tc_windup(); 328 if (timestepwarnings) { 329 log(LOG_INFO, "Time stepped from %ld.%09ld to %ld.%09ld\n", 330 (long)ts2.tv_sec, ts2.tv_nsec, 331 (long)ts->tv_sec, ts->tv_nsec); 332 } 333 } 334 335 /* 336 * Initialize the next struct timehands in the ring and make 337 * it the active timehands. Along the way we might switch to a different 338 * timecounter and/or do seconds processing in NTP. Slightly magic. 339 */ 340 void 341 tc_windup(void) 342 { 343 struct bintime bt; 344 struct timehands *th, *tho; 345 u_int64_t scale; 346 u_int delta, ncount, ogen; 347 int i; 348 #ifdef leapsecs 349 time_t t; 350 #endif 351 352 /* 353 * Make the next timehands a copy of the current one, but do not 354 * overwrite the generation or next pointer. While we update 355 * the contents, the generation must be zero. 356 */ 357 tho = timehands; 358 th = tho->th_next; 359 ogen = th->th_generation; 360 th->th_generation = 0; 361 bcopy(tho, th, offsetof(struct timehands, th_generation)); 362 363 /* 364 * Capture a timecounter delta on the current timecounter and if 365 * changing timecounters, a counter value from the new timecounter. 366 * Update the offset fields accordingly. 367 */ 368 delta = tc_delta(th); 369 if (th->th_counter != timecounter) 370 ncount = timecounter->tc_get_timecount(timecounter); 371 else 372 ncount = 0; 373 th->th_offset_count += delta; 374 th->th_offset_count &= th->th_counter->tc_counter_mask; 375 bintime_addx(&th->th_offset, th->th_scale * delta); 376 377 #ifdef notyet 378 /* 379 * Hardware latching timecounters may not generate interrupts on 380 * PPS events, so instead we poll them. There is a finite risk that 381 * the hardware might capture a count which is later than the one we 382 * got above, and therefore possibly in the next NTP second which might 383 * have a different rate than the current NTP second. It doesn't 384 * matter in practice. 385 */ 386 if (tho->th_counter->tc_poll_pps) 387 tho->th_counter->tc_poll_pps(tho->th_counter); 388 #endif 389 390 /* 391 * Deal with NTP second processing. The for loop normally 392 * iterates at most once, but in extreme situations it might 393 * keep NTP sane if timeouts are not run for several seconds. 394 * At boot, the time step can be large when the TOD hardware 395 * has been read, so on really large steps, we call 396 * ntp_update_second only twice. We need to call it twice in 397 * case we missed a leap second. 398 */ 399 bt = th->th_offset; 400 bintime_add(&bt, &boottimebin); 401 i = bt.sec - tho->th_microtime.tv_sec; 402 if (i > LARGE_STEP) 403 i = 2; 404 for (; i > 0; i--) 405 ntp_update_second(&th->th_adjustment, &bt.sec); 406 407 /* Update the UTC timestamps used by the get*() functions. */ 408 /* XXX shouldn't do this here. Should force non-`get' versions. */ 409 bintime2timeval(&bt, &th->th_microtime); 410 bintime2timespec(&bt, &th->th_nanotime); 411 412 /* Now is a good time to change timecounters. */ 413 if (th->th_counter != timecounter) { 414 th->th_counter = timecounter; 415 th->th_offset_count = ncount; 416 } 417 418 /*- 419 * Recalculate the scaling factor. We want the number of 1/2^64 420 * fractions of a second per period of the hardware counter, taking 421 * into account the th_adjustment factor which the NTP PLL/adjtime(2) 422 * processing provides us with. 423 * 424 * The th_adjustment is nanoseconds per second with 32 bit binary 425 * fraction and we want 64 bit binary fraction of second: 426 * 427 * x = a * 2^32 / 10^9 = a * 4.294967296 428 * 429 * The range of th_adjustment is +/- 5000PPM so inside a 64bit int 430 * we can only multiply by about 850 without overflowing, but that 431 * leaves suitably precise fractions for multiply before divide. 432 * 433 * Divide before multiply with a fraction of 2199/512 results in a 434 * systematic undercompensation of 10PPM of th_adjustment. On a 435 * 5000PPM adjustment this is a 0.05PPM error. This is acceptable. 436 * 437 * We happily sacrifice the lowest of the 64 bits of our result 438 * to the goddess of code clarity. 439 * 440 */ 441 scale = (u_int64_t)1 << 63; 442 scale += (th->th_adjustment / 1024) * 2199; 443 scale /= th->th_counter->tc_frequency; 444 th->th_scale = scale * 2; 445 446 /* 447 * Now that the struct timehands is again consistent, set the new 448 * generation number, making sure to not make it zero. 449 */ 450 if (++ogen == 0) 451 ogen = 1; 452 th->th_generation = ogen; 453 454 /* Go live with the new struct timehands. */ 455 time_second = th->th_microtime.tv_sec; 456 time_uptime = th->th_offset.sec; 457 timehands = th; 458 } 459 460 /* Report or change the active timecounter hardware. */ 461 int 462 sysctl_tc_hardware(void *oldp, size_t *oldlenp, void *newp, size_t newlen) 463 { 464 char newname[32]; 465 struct timecounter *newtc, *tc; 466 int error; 467 468 tc = timecounter; 469 strlcpy(newname, tc->tc_name, sizeof(newname)); 470 471 error = sysctl_string(oldp, oldlenp, newp, newlen, newname, sizeof(newname)); 472 if (error != 0 || strcmp(newname, tc->tc_name) == 0) 473 return (error); 474 for (newtc = timecounters; newtc != NULL; newtc = newtc->tc_next) { 475 if (strcmp(newname, newtc->tc_name) != 0) 476 continue; 477 478 /* Warm up new timecounter. */ 479 (void)newtc->tc_get_timecount(newtc); 480 (void)newtc->tc_get_timecount(newtc); 481 482 timecounter = newtc; 483 return (0); 484 } 485 return (EINVAL); 486 } 487 488 /* Report or change the active timecounter hardware. */ 489 int 490 sysctl_tc_choice(void *oldp, size_t *oldlenp, void *newp, size_t newlen) 491 { 492 char buf[32], *spc, *choices; 493 struct timecounter *tc; 494 int error, maxlen; 495 496 spc = ""; 497 error = 0; 498 maxlen = 0; 499 for (tc = timecounters; tc != NULL; tc = tc->tc_next) 500 maxlen += sizeof(buf); 501 choices = malloc(maxlen, M_TEMP, M_WAITOK); 502 *choices = '\0'; 503 for (tc = timecounters; error == 0 && tc != NULL; tc = tc->tc_next) { 504 snprintf(buf, sizeof(buf), "%s%s(%d)", 505 spc, tc->tc_name, tc->tc_quality); 506 spc = " "; 507 strlcat(choices, buf, maxlen); 508 } 509 if (!error) 510 error = sysctl_rdstring(oldp, oldlenp, newp, choices); 511 free(choices, M_TEMP); 512 return (error); 513 } 514 515 /* 516 * Timecounters need to be updated every so often to prevent the hardware 517 * counter from overflowing. Updating also recalculates the cached values 518 * used by the get*() family of functions, so their precision depends on 519 * the update frequency. 520 */ 521 static int tc_tick; 522 523 void 524 tc_ticktock(void) 525 { 526 static int count; 527 528 if (++count < tc_tick) 529 return; 530 count = 0; 531 tc_windup(); 532 } 533 534 void 535 inittimecounter(void) 536 { 537 u_int p; 538 539 /* 540 * Set the initial timeout to 541 * max(1, <approx. number of hardclock ticks in a millisecond>). 542 * People should probably not use the sysctl to set the timeout 543 * to smaller than its inital value, since that value is the 544 * smallest reasonable one. If they want better timestamps they 545 * should use the non-"get"* functions. 546 */ 547 if (hz > 1000) 548 tc_tick = (hz + 500) / 1000; 549 else 550 tc_tick = 1; 551 p = (tc_tick * 1000000) / hz; 552 #ifdef DEBUG 553 printf("Timecounters tick every %d.%03u msec\n", p / 1000, p % 1000); 554 #endif 555 556 /* warm up new timecounter (again) and get rolling. */ 557 (void)timecounter->tc_get_timecount(timecounter); 558 (void)timecounter->tc_get_timecount(timecounter); 559 } 560 561 /* 562 * Return timecounter-related information. 563 */ 564 int 565 sysctl_tc(int *name, u_int namelen, void *oldp, size_t *oldlenp, 566 void *newp, size_t newlen) 567 { 568 if (namelen != 1) 569 return (ENOTDIR); 570 571 switch (name[0]) { 572 case KERN_TIMECOUNTER_TICK: 573 return (sysctl_rdint(oldp, oldlenp, newp, tc_tick)); 574 case KERN_TIMECOUNTER_TIMESTEPWARNINGS: 575 return (sysctl_int(oldp, oldlenp, newp, newlen, 576 ×tepwarnings)); 577 case KERN_TIMECOUNTER_HARDWARE: 578 return (sysctl_tc_hardware(oldp, oldlenp, newp, newlen)); 579 case KERN_TIMECOUNTER_CHOICE: 580 return (sysctl_tc_choice(oldp, oldlenp, newp, newlen)); 581 default: 582 return (EOPNOTSUPP); 583 } 584 /* NOTREACHED */ 585 } 586 587 void 588 ntp_update_second(int64_t *adjust, time_t *sec) 589 { 590 struct timeval adj; 591 592 /* Slew time according to any adjtime(2) adjustments. */ 593 timerclear(&adj); 594 if (adjtimedelta.tv_sec > 0) 595 adj.tv_usec = 5000; 596 else if (adjtimedelta.tv_sec == 0) 597 adj.tv_usec = MIN(500, adjtimedelta.tv_usec); 598 else if (adjtimedelta.tv_sec < -1) 599 adj.tv_usec = -5000; 600 else if (adjtimedelta.tv_sec == -1) 601 adj.tv_usec = MAX(-500, adjtimedelta.tv_usec - 1000000); 602 timersub(&adjtimedelta, &adj, &adjtimedelta); 603 *adjust = ((int64_t)adj.tv_usec * 1000) << 32; 604 } 605 #endif /* __HAVE_TIMECOUNTER */ 606