1 /*- 2 * Copyright (c) 1990 The Regents of the University of California. 3 * Copyright (c) 2008 The DragonFly Project. 4 * All rights reserved. 5 * 6 * This code is derived from software contributed to Berkeley by 7 * William Jolitz and Don Ahn. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 * 33 * from: @(#)clock.c 7.2 (Berkeley) 5/12/91 34 * $FreeBSD: src/sys/i386/isa/clock.c,v 1.149.2.6 2002/11/02 04:41:50 iwasaki Exp $ 35 */ 36 37 /* 38 * Routines to handle clock hardware. 39 */ 40 41 /* 42 * inittodr, settodr and support routines written 43 * by Christoph Robitschko <chmr@edvz.tu-graz.ac.at> 44 * 45 * reintroduced and updated by Chris Stenton <chris@gnome.co.uk> 8/10/94 46 */ 47 48 #if 0 49 #include "opt_clock.h" 50 #endif 51 52 #include <sys/param.h> 53 #include <sys/systm.h> 54 #include <sys/eventhandler.h> 55 #include <sys/time.h> 56 #include <sys/kernel.h> 57 #include <sys/bus.h> 58 #include <sys/sysctl.h> 59 #include <sys/cons.h> 60 #include <sys/kbio.h> 61 #include <sys/systimer.h> 62 #include <sys/globaldata.h> 63 #include <sys/machintr.h> 64 #include <sys/interrupt.h> 65 66 #include <sys/thread2.h> 67 68 #include <machine/clock.h> 69 #include <machine/cputypes.h> 70 #include <machine/frame.h> 71 #include <machine/ipl.h> 72 #include <machine/limits.h> 73 #include <machine/md_var.h> 74 #include <machine/psl.h> 75 #include <machine/segments.h> 76 #include <machine/smp.h> 77 #include <machine/specialreg.h> 78 #include <machine/intr_machdep.h> 79 80 #include <machine_base/apic/ioapic.h> 81 #include <machine_base/apic/ioapic_abi.h> 82 #include <machine_base/icu/icu.h> 83 #include <bus/isa/isa.h> 84 #include <bus/isa/rtc.h> 85 #include <machine_base/isa/timerreg.h> 86 87 SET_DECLARE(timecounter_init_set, const timecounter_init_t); 88 TIMECOUNTER_INIT(placeholder, NULL); 89 90 static void i8254_restore(void); 91 static void resettodr_on_shutdown(void *arg __unused); 92 93 /* 94 * 32-bit time_t's can't reach leap years before 1904 or after 2036, so we 95 * can use a simple formula for leap years. 96 */ 97 #define LEAPYEAR(y) ((u_int)(y) % 4 == 0) 98 #define DAYSPERYEAR (31+28+31+30+31+30+31+31+30+31+30+31) 99 100 #ifndef TIMER_FREQ 101 #define TIMER_FREQ 1193182 102 #endif 103 104 static uint8_t i8254_walltimer_sel; 105 static uint16_t i8254_walltimer_cntr; 106 107 int adjkerntz; /* local offset from GMT in seconds */ 108 int disable_rtc_set; /* disable resettodr() if != 0 */ 109 int tsc_present; 110 int tsc_invariant; 111 int tsc_mpsync; 112 int tsc_is_broken; 113 int wall_cmos_clock; /* wall CMOS clock assumed if != 0 */ 114 int timer0_running; 115 tsc_uclock_t tsc_frequency; 116 tsc_uclock_t tsc_oneus_approx; /* always at least 1, approx only */ 117 118 enum tstate { RELEASED, ACQUIRED }; 119 enum tstate timer0_state; 120 enum tstate timer1_state; 121 enum tstate timer2_state; 122 123 int i8254_cputimer_disable; /* No need to initialize i8254 cputimer. */ 124 125 static int beeping = 0; 126 static const u_char daysinmonth[] = {31,28,31,30,31,30,31,31,30,31,30,31}; 127 static u_char rtc_statusa = RTCSA_DIVIDER | RTCSA_NOPROF; 128 static u_char rtc_statusb = RTCSB_24HR | RTCSB_PINTR; 129 static int rtc_loaded; 130 131 static int i8254_cputimer_div; 132 133 static int i8254_nointr; 134 static int i8254_intr_disable = 1; 135 TUNABLE_INT("hw.i8254.intr_disable", &i8254_intr_disable); 136 137 static int calibrate_timers_with_rtc = 0; 138 TUNABLE_INT("hw.calibrate_timers_with_rtc", &calibrate_timers_with_rtc); 139 140 static int calibrate_tsc_fast = 1; 141 TUNABLE_INT("hw.calibrate_tsc_fast", &calibrate_tsc_fast); 142 143 static int calibrate_test; 144 TUNABLE_INT("hw.tsc_calibrate_test", &calibrate_test); 145 146 static struct callout sysbeepstop_ch; 147 148 static sysclock_t i8254_cputimer_count(void); 149 static void i8254_cputimer_construct(struct cputimer *cputimer, sysclock_t last); 150 static void i8254_cputimer_destruct(struct cputimer *cputimer); 151 152 static struct cputimer i8254_cputimer = { 153 .next = SLIST_ENTRY_INITIALIZER, 154 .name = "i8254", 155 .pri = CPUTIMER_PRI_8254, 156 .type = 0, /* determined later */ 157 .count = i8254_cputimer_count, 158 .fromhz = cputimer_default_fromhz, 159 .fromus = cputimer_default_fromus, 160 .construct = i8254_cputimer_construct, 161 .destruct = i8254_cputimer_destruct, 162 .freq = TIMER_FREQ 163 }; 164 165 static sysclock_t tsc_cputimer_count_mfence(void); 166 static sysclock_t tsc_cputimer_count_lfence(void); 167 static void tsc_cputimer_construct(struct cputimer *, sysclock_t); 168 169 static struct cputimer tsc_cputimer = { 170 .next = SLIST_ENTRY_INITIALIZER, 171 .name = "TSC", 172 .pri = CPUTIMER_PRI_TSC, 173 .type = CPUTIMER_TSC, 174 .count = NULL, /* determined later */ 175 .fromhz = cputimer_default_fromhz, 176 .fromus = cputimer_default_fromus, 177 .construct = tsc_cputimer_construct, 178 .destruct = cputimer_default_destruct, 179 .freq = 0 /* determined later */ 180 }; 181 182 static struct cpucounter tsc_cpucounter = { 183 .freq = 0, /* determined later */ 184 .count = NULL, /* determined later */ 185 .flags = 0, /* adjusted later */ 186 .prio = CPUCOUNTER_PRIO_TSC, 187 .type = CPUCOUNTER_TSC 188 }; 189 190 static void i8254_intr_reload(struct cputimer_intr *, sysclock_t); 191 static void i8254_intr_config(struct cputimer_intr *, const struct cputimer *); 192 static void i8254_intr_initclock(struct cputimer_intr *, boolean_t); 193 194 static struct cputimer_intr i8254_cputimer_intr = { 195 .freq = TIMER_FREQ, 196 .reload = i8254_intr_reload, 197 .enable = cputimer_intr_default_enable, 198 .config = i8254_intr_config, 199 .restart = cputimer_intr_default_restart, 200 .pmfixup = cputimer_intr_default_pmfixup, 201 .initclock = i8254_intr_initclock, 202 .pcpuhand = NULL, 203 .next = SLIST_ENTRY_INITIALIZER, 204 .name = "i8254", 205 .type = CPUTIMER_INTR_8254, 206 .prio = CPUTIMER_INTR_PRIO_8254, 207 .caps = CPUTIMER_INTR_CAP_PS, 208 .priv = NULL 209 }; 210 211 /* 212 * Use this to lwkt_switch() when the scheduler clock is not 213 * yet running, otherwise lwkt_switch() won't do anything. 214 * XXX needs cleaning up in lwkt_thread.c 215 */ 216 static void 217 lwkt_force_switch(void) 218 { 219 crit_enter(); 220 lwkt_schedulerclock(curthread); 221 crit_exit(); 222 lwkt_switch(); 223 } 224 225 /* 226 * timer0 clock interrupt. Timer0 is in one-shot mode and has stopped 227 * counting as of this interrupt. We use timer1 in free-running mode (not 228 * generating any interrupts) as our main counter. Each cpu has timeouts 229 * pending. 230 * 231 * This code is INTR_MPSAFE and may be called without the BGL held. 232 */ 233 static void 234 clkintr(void *dummy, void *frame_arg) 235 { 236 static sysclock_t sysclock_count; /* NOTE! Must be static */ 237 struct globaldata *gd = mycpu; 238 struct globaldata *gscan; 239 int n; 240 241 /* 242 * SWSTROBE mode is a one-shot, the timer is no longer running 243 */ 244 timer0_running = 0; 245 246 /* 247 * XXX the dispatcher needs work. right now we call systimer_intr() 248 * directly or via IPI for any cpu with systimers queued, which is 249 * usually *ALL* of them. We need to use the LAPIC timer for this. 250 */ 251 sysclock_count = sys_cputimer->count(); 252 for (n = 0; n < ncpus; ++n) { 253 gscan = globaldata_find(n); 254 if (TAILQ_FIRST(&gscan->gd_systimerq) == NULL) 255 continue; 256 if (gscan != gd) { 257 lwkt_send_ipiq3(gscan, (ipifunc3_t)systimer_intr, 258 &sysclock_count, 1); 259 } else { 260 systimer_intr(&sysclock_count, 0, frame_arg); 261 } 262 } 263 } 264 265 266 /* 267 * NOTE! not MP safe. 268 */ 269 int 270 acquire_timer2(int mode) 271 { 272 if (timer2_state != RELEASED) 273 return (-1); 274 timer2_state = ACQUIRED; 275 276 /* 277 * This access to the timer registers is as atomic as possible 278 * because it is a single instruction. We could do better if we 279 * knew the rate. 280 */ 281 outb(TIMER_MODE, TIMER_SEL2 | (mode & 0x3f)); 282 return (0); 283 } 284 285 int 286 release_timer2(void) 287 { 288 if (timer2_state != ACQUIRED) 289 return (-1); 290 outb(TIMER_MODE, TIMER_SEL2 | TIMER_SQWAVE | TIMER_16BIT); 291 timer2_state = RELEASED; 292 return (0); 293 } 294 295 #include "opt_ddb.h" 296 #ifdef DDB 297 #include <ddb/ddb.h> 298 299 DB_SHOW_COMMAND(rtc, rtc) 300 { 301 kprintf("%02x/%02x/%02x %02x:%02x:%02x, A = %02x, B = %02x, C = %02x\n", 302 rtcin(RTC_YEAR), rtcin(RTC_MONTH), rtcin(RTC_DAY), 303 rtcin(RTC_HRS), rtcin(RTC_MIN), rtcin(RTC_SEC), 304 rtcin(RTC_STATUSA), rtcin(RTC_STATUSB), rtcin(RTC_INTR)); 305 } 306 #endif /* DDB */ 307 308 /* 309 * Return the current cpu timer count as a 32 bit integer. 310 */ 311 static 312 sysclock_t 313 i8254_cputimer_count(void) 314 { 315 static uint16_t cputimer_last; 316 uint16_t count; 317 sysclock_t ret; 318 319 clock_lock(); 320 outb(TIMER_MODE, i8254_walltimer_sel | TIMER_LATCH); 321 count = (uint8_t)inb(i8254_walltimer_cntr); /* get countdown */ 322 count |= ((uint8_t)inb(i8254_walltimer_cntr) << 8); 323 count = -count; /* -> countup */ 324 if (count < cputimer_last) /* rollover */ 325 i8254_cputimer.base += 0x00010000; 326 ret = i8254_cputimer.base | count; 327 cputimer_last = count; 328 clock_unlock(); 329 return(ret); 330 } 331 332 /* 333 * This function is called whenever the system timebase changes, allowing 334 * us to calculate what is needed to convert a system timebase tick 335 * into an 8254 tick for the interrupt timer. If we can convert to a 336 * simple shift, multiplication, or division, we do so. Otherwise 64 337 * bit arithmatic is required every time the interrupt timer is reloaded. 338 */ 339 static void 340 i8254_intr_config(struct cputimer_intr *cti, const struct cputimer *timer) 341 { 342 int freq; 343 int div; 344 345 /* 346 * Will a simple divide do the trick? 347 */ 348 div = (timer->freq + (cti->freq / 2)) / cti->freq; 349 freq = cti->freq * div; 350 351 if (freq >= timer->freq - 1 && freq <= timer->freq + 1) 352 i8254_cputimer_div = div; 353 else 354 i8254_cputimer_div = 0; 355 } 356 357 /* 358 * Reload for the next timeout. It is possible for the reload value 359 * to be 0 or negative, indicating that an immediate timer interrupt 360 * is desired. For now make the minimum 2 ticks. 361 * 362 * We may have to convert from the system timebase to the 8254 timebase. 363 */ 364 static void 365 i8254_intr_reload(struct cputimer_intr *cti, sysclock_t reload) 366 { 367 uint16_t count; 368 369 if (i8254_cputimer_div) 370 reload /= i8254_cputimer_div; 371 else 372 reload = (int64_t)reload * cti->freq / sys_cputimer->freq; 373 374 if ((int)reload < 2) 375 reload = 2; 376 377 clock_lock(); 378 if (timer0_running) { 379 outb(TIMER_MODE, TIMER_SEL0 | TIMER_LATCH); /* count-down timer */ 380 count = (uint8_t)inb(TIMER_CNTR0); /* lsb */ 381 count |= ((uint8_t)inb(TIMER_CNTR0) << 8); /* msb */ 382 if (reload < count) { 383 outb(TIMER_MODE, TIMER_SEL0 | TIMER_SWSTROBE | TIMER_16BIT); 384 outb(TIMER_CNTR0, (uint8_t)reload); /* lsb */ 385 outb(TIMER_CNTR0, (uint8_t)(reload >> 8)); /* msb */ 386 } 387 } else { 388 timer0_running = 1; 389 if (reload > 0xFFFF) 390 reload = 0; /* full count */ 391 outb(TIMER_MODE, TIMER_SEL0 | TIMER_SWSTROBE | TIMER_16BIT); 392 outb(TIMER_CNTR0, (uint8_t)reload); /* lsb */ 393 outb(TIMER_CNTR0, (uint8_t)(reload >> 8)); /* msb */ 394 } 395 clock_unlock(); 396 } 397 398 /* 399 * DELAY(usec) - Spin for the specified number of microseconds. 400 * DRIVERSLEEP(usec) - Spin for the specified number of microseconds, 401 * but do a thread switch in the loop 402 * 403 * Relies on timer 1 counting down from (cputimer_freq / hz) 404 * Note: timer had better have been programmed before this is first used! 405 */ 406 static void 407 DODELAY(int n, int doswitch) 408 { 409 ssysclock_t delta, ticks_left; 410 sysclock_t prev_tick, tick; 411 412 #ifdef DELAYDEBUG 413 int getit_calls = 1; 414 int n1; 415 static int state = 0; 416 417 if (state == 0) { 418 state = 1; 419 for (n1 = 1; n1 <= 10000000; n1 *= 10) 420 DELAY(n1); 421 state = 2; 422 } 423 if (state == 1) 424 kprintf("DELAY(%d)...", n); 425 #endif 426 /* 427 * Guard against the timer being uninitialized if we are called 428 * early for console i/o. 429 */ 430 if (timer0_state == RELEASED && i8254_cputimer_disable == 0) 431 i8254_restore(); 432 433 /* 434 * Read the counter first, so that the rest of the setup overhead is 435 * counted. Then calculate the number of hardware timer ticks 436 * required, rounding up to be sure we delay at least the requested 437 * number of microseconds. 438 */ 439 prev_tick = sys_cputimer->count(); 440 ticks_left = ((u_int)n * (int64_t)sys_cputimer->freq + 999999) / 441 1000000; 442 443 /* 444 * Loop until done. 445 */ 446 while (ticks_left > 0) { 447 tick = sys_cputimer->count(); 448 #ifdef DELAYDEBUG 449 ++getit_calls; 450 #endif 451 delta = tick - prev_tick; 452 prev_tick = tick; 453 if (delta < 0) 454 delta = 0; 455 ticks_left -= delta; 456 if (doswitch && ticks_left > 0) 457 lwkt_switch(); 458 cpu_pause(); 459 } 460 #ifdef DELAYDEBUG 461 if (state == 1) 462 kprintf(" %d calls to getit() at %d usec each\n", 463 getit_calls, (n + 5) / getit_calls); 464 #endif 465 } 466 467 /* 468 * DELAY() never switches. 469 */ 470 void 471 DELAY(int n) 472 { 473 DODELAY(n, 0); 474 } 475 476 /* 477 * Returns non-zero if the specified time period has elapsed. Call 478 * first with last_clock set to 0. 479 */ 480 int 481 CHECKTIMEOUT(TOTALDELAY *tdd) 482 { 483 sysclock_t delta; 484 int us; 485 486 if (tdd->started == 0) { 487 if (timer0_state == RELEASED && i8254_cputimer_disable == 0) 488 i8254_restore(); 489 tdd->last_clock = sys_cputimer->count(); 490 tdd->started = 1; 491 return(0); 492 } 493 delta = sys_cputimer->count() - tdd->last_clock; 494 us = (u_int64_t)delta * (u_int64_t)1000000 / 495 (u_int64_t)sys_cputimer->freq; 496 tdd->last_clock += (u_int64_t)us * (u_int64_t)sys_cputimer->freq / 497 1000000; 498 tdd->us -= us; 499 return (tdd->us < 0); 500 } 501 502 503 /* 504 * DRIVERSLEEP() does not switch if called with a spinlock held or 505 * from a hard interrupt. 506 */ 507 void 508 DRIVERSLEEP(int usec) 509 { 510 globaldata_t gd = mycpu; 511 512 if (gd->gd_intr_nesting_level || gd->gd_spinlocks) { 513 DODELAY(usec, 0); 514 } else { 515 DODELAY(usec, 1); 516 } 517 } 518 519 static void 520 sysbeepstop(void *chan) 521 { 522 outb(IO_PPI, inb(IO_PPI)&0xFC); /* disable counter2 output to speaker */ 523 beeping = 0; 524 release_timer2(); 525 } 526 527 int 528 sysbeep(int pitch, int period) 529 { 530 if (acquire_timer2(TIMER_SQWAVE|TIMER_16BIT)) 531 return(-1); 532 if (sysbeep_enable == 0) 533 return(-1); 534 /* 535 * Nobody else is using timer2, we do not need the clock lock 536 */ 537 outb(TIMER_CNTR2, pitch); 538 outb(TIMER_CNTR2, (pitch>>8)); 539 if (!beeping) { 540 /* enable counter2 output to speaker */ 541 outb(IO_PPI, inb(IO_PPI) | 3); 542 beeping = period; 543 callout_reset(&sysbeepstop_ch, period, sysbeepstop, NULL); 544 } 545 return (0); 546 } 547 548 /* 549 * RTC support routines 550 */ 551 552 int 553 rtcin(int reg) 554 { 555 u_char val; 556 557 crit_enter(); 558 outb(IO_RTC, reg); 559 inb(0x84); 560 val = inb(IO_RTC + 1); 561 inb(0x84); 562 crit_exit(); 563 return (val); 564 } 565 566 static __inline void 567 writertc(u_char reg, u_char val) 568 { 569 crit_enter(); 570 inb(0x84); 571 outb(IO_RTC, reg); 572 inb(0x84); 573 outb(IO_RTC + 1, val); 574 inb(0x84); /* XXX work around wrong order in rtcin() */ 575 crit_exit(); 576 } 577 578 static __inline int 579 readrtc(int port) 580 { 581 return(bcd2bin(rtcin(port))); 582 } 583 584 static u_int 585 calibrate_clocks(void) 586 { 587 tsc_uclock_t old_tsc; 588 u_int tot_count; 589 sysclock_t count, prev_count; 590 int sec, start_sec, timeout; 591 592 if (bootverbose) 593 kprintf("Calibrating clock(s) ...\n"); 594 if (!(rtcin(RTC_STATUSD) & RTCSD_PWR)) 595 goto fail; 596 timeout = 100000000; 597 598 /* Read the mc146818A seconds counter. */ 599 for (;;) { 600 if (!(rtcin(RTC_STATUSA) & RTCSA_TUP)) { 601 sec = rtcin(RTC_SEC); 602 break; 603 } 604 if (--timeout == 0) 605 goto fail; 606 } 607 608 /* Wait for the mC146818A seconds counter to change. */ 609 start_sec = sec; 610 for (;;) { 611 if (!(rtcin(RTC_STATUSA) & RTCSA_TUP)) { 612 sec = rtcin(RTC_SEC); 613 if (sec != start_sec) 614 break; 615 } 616 if (--timeout == 0) 617 goto fail; 618 } 619 620 /* Start keeping track of the i8254 counter. */ 621 prev_count = sys_cputimer->count(); 622 tot_count = 0; 623 624 if (tsc_present) 625 old_tsc = rdtsc(); 626 else 627 old_tsc = 0; /* shut up gcc */ 628 629 /* 630 * Wait for the mc146818A seconds counter to change. Read the i8254 631 * counter for each iteration since this is convenient and only 632 * costs a few usec of inaccuracy. The timing of the final reads 633 * of the counters almost matches the timing of the initial reads, 634 * so the main cause of inaccuracy is the varying latency from 635 * inside getit() or rtcin(RTC_STATUSA) to the beginning of the 636 * rtcin(RTC_SEC) that returns a changed seconds count. The 637 * maximum inaccuracy from this cause is < 10 usec on 486's. 638 */ 639 start_sec = sec; 640 for (;;) { 641 if (!(rtcin(RTC_STATUSA) & RTCSA_TUP)) 642 sec = rtcin(RTC_SEC); 643 count = sys_cputimer->count(); 644 tot_count += (int)(count - prev_count); 645 prev_count = count; 646 if (sec != start_sec) 647 break; 648 if (--timeout == 0) 649 goto fail; 650 } 651 652 /* 653 * Read the cpu cycle counter. The timing considerations are 654 * similar to those for the i8254 clock. 655 */ 656 if (tsc_present) { 657 tsc_frequency = rdtsc() - old_tsc; 658 if (bootverbose) { 659 kprintf("TSC clock: %jd Hz (Method A)\n", 660 (intmax_t)tsc_frequency); 661 } 662 } 663 tsc_oneus_approx = ((tsc_frequency|1) + 999999) / 1000000; 664 665 kprintf("i8254 clock: %u Hz\n", tot_count); 666 return (tot_count); 667 668 fail: 669 kprintf("failed, using default i8254 clock of %u Hz\n", 670 i8254_cputimer.freq); 671 return (i8254_cputimer.freq); 672 } 673 674 static void 675 i8254_restore(void) 676 { 677 timer0_state = ACQUIRED; 678 679 clock_lock(); 680 681 /* 682 * Timer0 is our fine-grained variable clock interrupt 683 */ 684 outb(TIMER_MODE, TIMER_SEL0 | TIMER_SWSTROBE | TIMER_16BIT); 685 outb(TIMER_CNTR0, 2); /* lsb */ 686 outb(TIMER_CNTR0, 0); /* msb */ 687 clock_unlock(); 688 689 if (!i8254_nointr) { 690 cputimer_intr_register(&i8254_cputimer_intr); 691 cputimer_intr_select(&i8254_cputimer_intr, 0); 692 } 693 694 /* 695 * Timer1 or timer2 is our free-running clock, but only if another 696 * has not been selected. 697 */ 698 cputimer_register(&i8254_cputimer); 699 cputimer_select(&i8254_cputimer, 0); 700 } 701 702 static void 703 i8254_cputimer_construct(struct cputimer *timer, sysclock_t oldclock) 704 { 705 int which; 706 707 /* 708 * Should we use timer 1 or timer 2 ? 709 */ 710 which = 0; 711 TUNABLE_INT_FETCH("hw.i8254.walltimer", &which); 712 if (which != 1 && which != 2) 713 which = 2; 714 715 switch(which) { 716 case 1: 717 timer->name = "i8254_timer1"; 718 timer->type = CPUTIMER_8254_SEL1; 719 i8254_walltimer_sel = TIMER_SEL1; 720 i8254_walltimer_cntr = TIMER_CNTR1; 721 timer1_state = ACQUIRED; 722 break; 723 case 2: 724 timer->name = "i8254_timer2"; 725 timer->type = CPUTIMER_8254_SEL2; 726 i8254_walltimer_sel = TIMER_SEL2; 727 i8254_walltimer_cntr = TIMER_CNTR2; 728 timer2_state = ACQUIRED; 729 break; 730 } 731 732 timer->base = (oldclock + 0xFFFF) & ~0xFFFF; 733 734 clock_lock(); 735 outb(TIMER_MODE, i8254_walltimer_sel | TIMER_RATEGEN | TIMER_16BIT); 736 outb(i8254_walltimer_cntr, 0); /* lsb */ 737 outb(i8254_walltimer_cntr, 0); /* msb */ 738 outb(IO_PPI, inb(IO_PPI) | 1); /* bit 0: enable gate, bit 1: spkr */ 739 clock_unlock(); 740 } 741 742 static void 743 i8254_cputimer_destruct(struct cputimer *timer) 744 { 745 switch(timer->type) { 746 case CPUTIMER_8254_SEL1: 747 timer1_state = RELEASED; 748 break; 749 case CPUTIMER_8254_SEL2: 750 timer2_state = RELEASED; 751 break; 752 default: 753 break; 754 } 755 timer->type = 0; 756 } 757 758 static void 759 rtc_restore(void) 760 { 761 /* Restore all of the RTC's "status" (actually, control) registers. */ 762 writertc(RTC_STATUSB, RTCSB_24HR); 763 writertc(RTC_STATUSA, rtc_statusa); 764 writertc(RTC_STATUSB, rtc_statusb); 765 } 766 767 /* 768 * Restore all the timers. 769 * 770 * This function is called to resynchronize our core timekeeping after a 771 * long halt, e.g. from apm_default_resume() and friends. It is also 772 * called if after a BIOS call we have detected munging of the 8254. 773 * It is necessary because cputimer_count() counter's delta may have grown 774 * too large for nanouptime() and friends to handle, or (in the case of 8254 775 * munging) might cause the SYSTIMER code to prematurely trigger. 776 */ 777 void 778 timer_restore(void) 779 { 780 crit_enter(); 781 if (i8254_cputimer_disable == 0) 782 i8254_restore(); /* restore timer_freq and hz */ 783 rtc_restore(); /* reenable RTC interrupts */ 784 crit_exit(); 785 } 786 787 #define MAX_MEASURE_RETRIES 100 788 789 static u_int64_t 790 do_measure(u_int64_t timer_latency, u_int64_t *latency, sysclock_t *time, 791 int *retries) 792 { 793 u_int64_t tsc1, tsc2; 794 u_int64_t threshold; 795 sysclock_t val; 796 int cnt = 0; 797 798 do { 799 if (cnt > MAX_MEASURE_RETRIES/2) 800 threshold = timer_latency << 1; 801 else 802 threshold = timer_latency + (timer_latency >> 2); 803 804 cnt++; 805 tsc1 = rdtsc_ordered(); 806 val = sys_cputimer->count(); 807 tsc2 = rdtsc_ordered(); 808 } while (timer_latency > 0 && cnt < MAX_MEASURE_RETRIES && 809 tsc2 - tsc1 > threshold); 810 811 *retries = cnt - 1; 812 *latency = tsc2 - tsc1; 813 *time = val; 814 return tsc1; 815 } 816 817 static u_int64_t 818 do_calibrate_cputimer(u_int usecs, u_int64_t timer_latency) 819 { 820 if (calibrate_tsc_fast) { 821 u_int64_t old_tsc1, start_lat1, new_tsc1, end_lat1; 822 u_int64_t old_tsc2, start_lat2, new_tsc2, end_lat2; 823 u_int64_t freq1, freq2; 824 sysclock_t start1, end1, start2, end2; 825 int retries1, retries2, retries3, retries4; 826 827 DELAY(1000); 828 old_tsc1 = do_measure(timer_latency, &start_lat1, &start1, 829 &retries1); 830 DELAY(20000); 831 old_tsc2 = do_measure(timer_latency, &start_lat2, &start2, 832 &retries2); 833 DELAY(usecs); 834 new_tsc1 = do_measure(timer_latency, &end_lat1, &end1, 835 &retries3); 836 DELAY(20000); 837 new_tsc2 = do_measure(timer_latency, &end_lat2, &end2, 838 &retries4); 839 840 old_tsc1 += start_lat1; 841 old_tsc2 += start_lat2; 842 freq1 = (new_tsc1 - old_tsc1) + (start_lat1 + end_lat1) / 2; 843 freq2 = (new_tsc2 - old_tsc2) + (start_lat2 + end_lat2) / 2; 844 end1 -= start1; 845 end2 -= start2; 846 /* This should in practice be safe from overflows. */ 847 freq1 = (freq1 * sys_cputimer->freq) / end1; 848 freq2 = (freq2 * sys_cputimer->freq) / end2; 849 if (calibrate_test && (retries1 > 0 || retries2 > 0)) { 850 kprintf("%s: retries: %d, %d, %d, %d\n", 851 __func__, retries1, retries2, retries3, retries4); 852 } 853 if (calibrate_test) { 854 kprintf("%s: freq1=%ju freq2=%ju avg=%ju\n", 855 __func__, freq1, freq2, (freq1 + freq2) / 2); 856 } 857 return (freq1 + freq2) / 2; 858 } else { 859 u_int64_t old_tsc, new_tsc; 860 u_int64_t freq; 861 862 old_tsc = rdtsc_ordered(); 863 DELAY(usecs); 864 new_tsc = rdtsc(); 865 freq = new_tsc - old_tsc; 866 /* This should in practice be safe from overflows. */ 867 freq = (freq * 1000 * 1000) / usecs; 868 return freq; 869 } 870 } 871 872 /* 873 * Initialize 8254 timer 0 early so that it can be used in DELAY(). 874 */ 875 void 876 startrtclock(void) 877 { 878 const timecounter_init_t **list; 879 u_int delta, freq; 880 881 callout_init_mp(&sysbeepstop_ch); 882 883 /* 884 * Can we use the TSC? 885 * 886 * NOTE: If running under qemu, probably a good idea to force the 887 * TSC because we are not likely to detect it as being 888 * invariant or mpsyncd if you don't. This will greatly 889 * reduce SMP contention. 890 */ 891 if (cpu_feature & CPUID_TSC) { 892 tsc_present = 1; 893 TUNABLE_INT_FETCH("hw.tsc_cputimer_force", &tsc_invariant); 894 895 if ((cpu_vendor_id == CPU_VENDOR_INTEL || 896 cpu_vendor_id == CPU_VENDOR_AMD) && 897 cpu_exthigh >= 0x80000007) { 898 u_int regs[4]; 899 900 do_cpuid(0x80000007, regs); 901 if (regs[3] & 0x100) 902 tsc_invariant = 1; 903 } 904 } else { 905 tsc_present = 0; 906 } 907 908 /* 909 * Initial RTC state, don't do anything unexpected 910 */ 911 writertc(RTC_STATUSA, rtc_statusa); 912 writertc(RTC_STATUSB, RTCSB_24HR); 913 914 SET_FOREACH(list, timecounter_init_set) { 915 if ((*list)->configure != NULL) 916 (*list)->configure(); 917 } 918 919 /* 920 * If tsc_frequency is already initialized now, and a flag is set 921 * that i8254 timer is unneeded, we are done. 922 */ 923 if (tsc_frequency != 0 && i8254_cputimer_disable != 0) 924 goto done; 925 926 /* 927 * Set the 8254 timer0 in TIMER_SWSTROBE mode and cause it to 928 * generate an interrupt, which we will ignore for now. 929 * 930 * Set the 8254 timer1 in TIMER_RATEGEN mode and load 0x0000 931 * (so it counts a full 2^16 and repeats). We will use this timer 932 * for our counting. 933 */ 934 if (i8254_cputimer_disable == 0) 935 i8254_restore(); 936 937 kprintf("Using cputimer %s for TSC calibration\n", sys_cputimer->name); 938 939 /* 940 * When booting without verbose messages, it's pointless to run the 941 * calibrate_clocks() calibration code, when we don't use the 942 * results in any way. With bootverbose, we are at least printing 943 * this information to the kernel log. 944 */ 945 if (i8254_cputimer_disable != 0 || 946 (calibrate_timers_with_rtc == 0 && !bootverbose)) { 947 goto skip_rtc_based; 948 } 949 950 freq = calibrate_clocks(); 951 #ifdef CLK_CALIBRATION_LOOP 952 if (bootverbose) { 953 int c; 954 955 cnpoll(TRUE); 956 kprintf("Press a key on the console to " 957 "abort clock calibration\n"); 958 while ((c = cncheckc()) == -1 || c == NOKEY) 959 calibrate_clocks(); 960 cnpoll(FALSE); 961 } 962 #endif 963 964 /* 965 * Use the calibrated i8254 frequency if it seems reasonable. 966 * Otherwise use the default, and don't use the calibrated i586 967 * frequency. 968 */ 969 delta = freq > i8254_cputimer.freq ? 970 freq - i8254_cputimer.freq : i8254_cputimer.freq - freq; 971 if (delta < i8254_cputimer.freq / 100) { 972 if (calibrate_timers_with_rtc == 0) { 973 kprintf( 974 "hw.calibrate_timers_with_rtc not set - using default i8254 frequency\n"); 975 freq = i8254_cputimer.freq; 976 } 977 /* 978 * NOTE: 979 * Interrupt timer's freq must be adjusted 980 * before we change the cuptimer's frequency. 981 */ 982 i8254_cputimer_intr.freq = freq; 983 cputimer_set_frequency(&i8254_cputimer, freq); 984 } else { 985 if (bootverbose) 986 kprintf("%d Hz differs from default of %d Hz " 987 "by more than 1%%\n", 988 freq, i8254_cputimer.freq); 989 tsc_frequency = 0; 990 } 991 992 if (tsc_frequency != 0 && calibrate_timers_with_rtc == 0) { 993 kprintf("hw.calibrate_timers_with_rtc not " 994 "set - using old calibration method\n"); 995 tsc_frequency = 0; 996 } 997 998 skip_rtc_based: 999 if (tsc_present && tsc_frequency == 0) { 1000 u_int cnt; 1001 u_int64_t cputime_latency_tsc = 0, max = 0, min = 0; 1002 int i; 1003 1004 for (i = 0; i < 10; i++) { 1005 /* Warm up */ 1006 (void)sys_cputimer->count(); 1007 } 1008 for (i = 0; i < 100; i++) { 1009 u_int64_t old_tsc, new_tsc; 1010 1011 old_tsc = rdtsc_ordered(); 1012 (void)sys_cputimer->count(); 1013 new_tsc = rdtsc_ordered(); 1014 cputime_latency_tsc += (new_tsc - old_tsc); 1015 if (max < (new_tsc - old_tsc)) 1016 max = new_tsc - old_tsc; 1017 if (min == 0 || min > (new_tsc - old_tsc)) 1018 min = new_tsc - old_tsc; 1019 } 1020 cputime_latency_tsc /= 100; 1021 kprintf( 1022 "Timer latency (in TSC ticks): %lu min=%lu max=%lu\n", 1023 cputime_latency_tsc, min, max); 1024 /* XXX Instead of this, properly filter out outliers. */ 1025 cputime_latency_tsc = min; 1026 1027 if (calibrate_test > 0) { 1028 u_int64_t values[20], avg = 0; 1029 for (i = 1; i <= 20; i++) { 1030 u_int64_t freq; 1031 1032 freq = do_calibrate_cputimer(i * 100 * 1000, 1033 cputime_latency_tsc); 1034 values[i - 1] = freq; 1035 } 1036 /* Compute an average TSC for the 1s to 2s delays. */ 1037 for (i = 10; i < 20; i++) 1038 avg += values[i]; 1039 avg /= 10; 1040 for (i = 0; i < 20; i++) { 1041 kprintf("%ums: %lu (Diff from average: %ld)\n", 1042 (i + 1) * 100, values[i], 1043 (int64_t)(values[i] - avg)); 1044 } 1045 } 1046 1047 if (calibrate_tsc_fast > 0) { 1048 /* HPET would typically be >10MHz */ 1049 if (sys_cputimer->freq >= 10000000) 1050 cnt = 200000; 1051 else 1052 cnt = 500000; 1053 } else { 1054 cnt = 1000000; 1055 } 1056 1057 tsc_frequency = do_calibrate_cputimer(cnt, cputime_latency_tsc); 1058 if (bootverbose && calibrate_timers_with_rtc) { 1059 kprintf("TSC clock: %jd Hz (Method B)\n", 1060 (intmax_t)tsc_frequency); 1061 } 1062 } 1063 1064 done: 1065 if (tsc_present) { 1066 kprintf("TSC%s clock: %jd Hz\n", 1067 tsc_invariant ? " invariant" : "", 1068 (intmax_t)tsc_frequency); 1069 } 1070 tsc_oneus_approx = ((tsc_frequency|1) + 999999) / 1000000; 1071 1072 EVENTHANDLER_REGISTER(shutdown_post_sync, resettodr_on_shutdown, 1073 NULL, SHUTDOWN_PRI_LAST); 1074 } 1075 1076 /* 1077 * Sync the time of day back to the RTC on shutdown, but only if 1078 * we have already loaded it and have not crashed. 1079 */ 1080 static void 1081 resettodr_on_shutdown(void *arg __unused) 1082 { 1083 if (rtc_loaded && panicstr == NULL) { 1084 resettodr(); 1085 } 1086 } 1087 1088 /* 1089 * Initialize the time of day register, based on the time base which is, e.g. 1090 * from a filesystem. 1091 */ 1092 void 1093 inittodr(time_t base) 1094 { 1095 unsigned long sec, days; 1096 int year, month; 1097 int y, m; 1098 struct timespec ts; 1099 1100 if (base) { 1101 ts.tv_sec = base; 1102 ts.tv_nsec = 0; 1103 set_timeofday(&ts); 1104 } 1105 1106 /* Look if we have a RTC present and the time is valid */ 1107 if (!(rtcin(RTC_STATUSD) & RTCSD_PWR)) 1108 goto wrong_time; 1109 1110 /* wait for time update to complete */ 1111 /* If RTCSA_TUP is zero, we have at least 244us before next update */ 1112 crit_enter(); 1113 while (rtcin(RTC_STATUSA) & RTCSA_TUP) { 1114 crit_exit(); 1115 crit_enter(); 1116 } 1117 1118 days = 0; 1119 #ifdef USE_RTC_CENTURY 1120 year = readrtc(RTC_YEAR) + readrtc(RTC_CENTURY) * 100; 1121 #else 1122 year = readrtc(RTC_YEAR) + 1900; 1123 if (year < 1970) 1124 year += 100; 1125 #endif 1126 if (year < 1970) { 1127 crit_exit(); 1128 goto wrong_time; 1129 } 1130 month = readrtc(RTC_MONTH); 1131 for (m = 1; m < month; m++) 1132 days += daysinmonth[m-1]; 1133 if ((month > 2) && LEAPYEAR(year)) 1134 days ++; 1135 days += readrtc(RTC_DAY) - 1; 1136 for (y = 1970; y < year; y++) 1137 days += DAYSPERYEAR + LEAPYEAR(y); 1138 sec = ((( days * 24 + 1139 readrtc(RTC_HRS)) * 60 + 1140 readrtc(RTC_MIN)) * 60 + 1141 readrtc(RTC_SEC)); 1142 /* sec now contains the number of seconds, since Jan 1 1970, 1143 in the local time zone */ 1144 1145 sec += tz.tz_minuteswest * 60 + (wall_cmos_clock ? adjkerntz : 0); 1146 1147 y = (int)(time_second - sec); 1148 if (y <= -2 || y >= 2) { 1149 /* badly off, adjust it */ 1150 ts.tv_sec = sec; 1151 ts.tv_nsec = 0; 1152 set_timeofday(&ts); 1153 } 1154 rtc_loaded = 1; 1155 crit_exit(); 1156 return; 1157 1158 wrong_time: 1159 kprintf("Invalid time in real time clock.\n"); 1160 kprintf("Check and reset the date immediately!\n"); 1161 } 1162 1163 /* 1164 * Write system time back to RTC 1165 */ 1166 void 1167 resettodr(void) 1168 { 1169 struct timeval tv; 1170 unsigned long tm; 1171 int m; 1172 int y; 1173 1174 if (disable_rtc_set) 1175 return; 1176 1177 microtime(&tv); 1178 tm = tv.tv_sec; 1179 1180 crit_enter(); 1181 /* Disable RTC updates and interrupts. */ 1182 writertc(RTC_STATUSB, RTCSB_HALT | RTCSB_24HR); 1183 1184 /* Calculate local time to put in RTC */ 1185 1186 tm -= tz.tz_minuteswest * 60 + (wall_cmos_clock ? adjkerntz : 0); 1187 1188 writertc(RTC_SEC, bin2bcd(tm%60)); tm /= 60; /* Write back Seconds */ 1189 writertc(RTC_MIN, bin2bcd(tm%60)); tm /= 60; /* Write back Minutes */ 1190 writertc(RTC_HRS, bin2bcd(tm%24)); tm /= 24; /* Write back Hours */ 1191 1192 /* We have now the days since 01-01-1970 in tm */ 1193 writertc(RTC_WDAY, (tm+4)%7); /* Write back Weekday */ 1194 for (y = 1970, m = DAYSPERYEAR + LEAPYEAR(y); 1195 tm >= m; 1196 y++, m = DAYSPERYEAR + LEAPYEAR(y)) 1197 tm -= m; 1198 1199 /* Now we have the years in y and the day-of-the-year in tm */ 1200 writertc(RTC_YEAR, bin2bcd(y%100)); /* Write back Year */ 1201 #ifdef USE_RTC_CENTURY 1202 writertc(RTC_CENTURY, bin2bcd(y/100)); /* ... and Century */ 1203 #endif 1204 for (m = 0; ; m++) { 1205 int ml; 1206 1207 ml = daysinmonth[m]; 1208 if (m == 1 && LEAPYEAR(y)) 1209 ml++; 1210 if (tm < ml) 1211 break; 1212 tm -= ml; 1213 } 1214 1215 writertc(RTC_MONTH, bin2bcd(m + 1)); /* Write back Month */ 1216 writertc(RTC_DAY, bin2bcd(tm + 1)); /* Write back Month Day */ 1217 1218 /* Reenable RTC updates and interrupts. */ 1219 writertc(RTC_STATUSB, rtc_statusb); 1220 crit_exit(); 1221 } 1222 1223 static int 1224 i8254_ioapic_trial(int irq, struct cputimer_intr *cti) 1225 { 1226 sysclock_t base; 1227 long lastcnt; 1228 1229 /* 1230 * Following code assumes the 8254 is the cpu timer, 1231 * so make sure it is. 1232 */ 1233 KKASSERT(sys_cputimer == &i8254_cputimer); 1234 KKASSERT(cti == &i8254_cputimer_intr); 1235 1236 lastcnt = get_interrupt_counter(irq, mycpuid); 1237 1238 /* 1239 * Force an 8254 Timer0 interrupt and wait 1/100s for 1240 * it to happen, then see if we got it. 1241 */ 1242 kprintf("IOAPIC: testing 8254 interrupt delivery\n"); 1243 1244 i8254_intr_reload(cti, 2); 1245 base = sys_cputimer->count(); 1246 while (sys_cputimer->count() - base < sys_cputimer->freq / 100) 1247 ; /* nothing */ 1248 1249 if (get_interrupt_counter(irq, mycpuid) - lastcnt == 0) 1250 return ENOENT; 1251 return 0; 1252 } 1253 1254 /* 1255 * Start both clocks running. DragonFly note: the stat clock is no longer 1256 * used. Instead, 8254 based systimers are used for all major clock 1257 * interrupts. 1258 */ 1259 static void 1260 i8254_intr_initclock(struct cputimer_intr *cti, boolean_t selected) 1261 { 1262 void *clkdesc = NULL; 1263 int irq = 0, mixed_mode = 0, error; 1264 1265 KKASSERT(mycpuid == 0); 1266 1267 if (!selected && i8254_intr_disable) 1268 goto nointr; 1269 1270 /* 1271 * The stat interrupt mask is different without the 1272 * statistics clock. Also, don't set the interrupt 1273 * flag which would normally cause the RTC to generate 1274 * interrupts. 1275 */ 1276 rtc_statusb = RTCSB_24HR; 1277 1278 /* Finish initializing 8254 timer 0. */ 1279 if (ioapic_enable) { 1280 irq = machintr_legacy_intr_find(0, INTR_TRIGGER_EDGE, 1281 INTR_POLARITY_HIGH); 1282 if (irq < 0) { 1283 mixed_mode_setup: 1284 error = ioapic_conf_legacy_extint(0); 1285 if (!error) { 1286 irq = machintr_legacy_intr_find(0, 1287 INTR_TRIGGER_EDGE, INTR_POLARITY_HIGH); 1288 if (irq < 0) 1289 error = ENOENT; 1290 } 1291 1292 if (error) { 1293 if (!selected) { 1294 kprintf("IOAPIC: setup mixed mode for " 1295 "irq 0 failed: %d\n", error); 1296 goto nointr; 1297 } else { 1298 panic("IOAPIC: setup mixed mode for " 1299 "irq 0 failed: %d\n", error); 1300 } 1301 } 1302 mixed_mode = 1; 1303 } 1304 clkdesc = register_int(irq, clkintr, NULL, "clk", 1305 NULL, 1306 INTR_EXCL | INTR_CLOCK | 1307 INTR_NOPOLL | INTR_MPSAFE | 1308 INTR_NOENTROPY, 0); 1309 } else { 1310 register_int(0, clkintr, NULL, "clk", NULL, 1311 INTR_EXCL | INTR_CLOCK | 1312 INTR_NOPOLL | INTR_MPSAFE | 1313 INTR_NOENTROPY, 0); 1314 } 1315 1316 /* Initialize RTC. */ 1317 writertc(RTC_STATUSA, rtc_statusa); 1318 writertc(RTC_STATUSB, RTCSB_24HR); 1319 1320 if (ioapic_enable) { 1321 error = i8254_ioapic_trial(irq, cti); 1322 if (error) { 1323 if (mixed_mode) { 1324 if (!selected) { 1325 kprintf("IOAPIC: mixed mode for irq %d " 1326 "trial failed: %d\n", 1327 irq, error); 1328 goto nointr; 1329 } else { 1330 panic("IOAPIC: mixed mode for irq %d " 1331 "trial failed: %d\n", irq, error); 1332 } 1333 } else { 1334 kprintf("IOAPIC: warning 8254 is not connected " 1335 "to the correct pin, try mixed mode\n"); 1336 unregister_int(clkdesc, 0); 1337 goto mixed_mode_setup; 1338 } 1339 } 1340 } 1341 return; 1342 1343 nointr: 1344 i8254_nointr = 1; /* don't try to register again */ 1345 cputimer_intr_deregister(cti); 1346 } 1347 1348 void 1349 setstatclockrate(int newhz) 1350 { 1351 if (newhz == RTC_PROFRATE) 1352 rtc_statusa = RTCSA_DIVIDER | RTCSA_PROF; 1353 else 1354 rtc_statusa = RTCSA_DIVIDER | RTCSA_NOPROF; 1355 writertc(RTC_STATUSA, rtc_statusa); 1356 } 1357 1358 #if 0 1359 static unsigned 1360 tsc_get_timecount(struct timecounter *tc) 1361 { 1362 return (rdtsc()); 1363 } 1364 #endif 1365 1366 #ifdef KERN_TIMESTAMP 1367 #define KERN_TIMESTAMP_SIZE 16384 1368 static u_long tsc[KERN_TIMESTAMP_SIZE] ; 1369 SYSCTL_OPAQUE(_debug, OID_AUTO, timestamp, CTLFLAG_RD, tsc, 1370 sizeof(tsc), "LU", "Kernel timestamps"); 1371 void 1372 _TSTMP(u_int32_t x) 1373 { 1374 static int i; 1375 1376 tsc[i] = (u_int32_t)rdtsc(); 1377 tsc[i+1] = x; 1378 i = i + 2; 1379 if (i >= KERN_TIMESTAMP_SIZE) 1380 i = 0; 1381 tsc[i] = 0; /* mark last entry */ 1382 } 1383 #endif /* KERN_TIMESTAMP */ 1384 1385 /* 1386 * 1387 */ 1388 1389 static int 1390 hw_i8254_timestamp(SYSCTL_HANDLER_ARGS) 1391 { 1392 sysclock_t count; 1393 uint64_t tscval; 1394 char buf[32]; 1395 1396 crit_enter(); 1397 if (sys_cputimer == &i8254_cputimer) 1398 count = sys_cputimer->count(); 1399 else 1400 count = 0; 1401 if (tsc_present) 1402 tscval = rdtsc(); 1403 else 1404 tscval = 0; 1405 crit_exit(); 1406 ksnprintf(buf, sizeof(buf), "%08x %016llx", count, (long long)tscval); 1407 return(SYSCTL_OUT(req, buf, strlen(buf) + 1)); 1408 } 1409 1410 struct tsc_mpsync_info { 1411 volatile int tsc_ready_cnt; 1412 volatile int tsc_done_cnt; 1413 volatile int tsc_command; 1414 volatile int unused01[5]; 1415 struct { 1416 uint64_t v; 1417 uint64_t unused02; 1418 } tsc_saved[MAXCPU]; 1419 } __cachealign; 1420 1421 #if 0 1422 static void 1423 tsc_mpsync_test_loop(struct tsc_mpsync_thr *info) 1424 { 1425 struct globaldata *gd = mycpu; 1426 tsc_uclock_t test_end, test_begin; 1427 u_int i; 1428 1429 if (bootverbose) { 1430 kprintf("cpu%d: TSC testing MP synchronization ...\n", 1431 gd->gd_cpuid); 1432 } 1433 1434 test_begin = rdtsc_ordered(); 1435 /* Run test for 100ms */ 1436 test_end = test_begin + (tsc_frequency / 10); 1437 1438 arg->tsc_mpsync = 1; 1439 arg->tsc_target = test_begin; 1440 1441 #define TSC_TEST_TRYMAX 1000000 /* Make sure we could stop */ 1442 #define TSC_TEST_TRYMIN 50000 1443 1444 for (i = 0; i < TSC_TEST_TRYMAX; ++i) { 1445 struct lwkt_cpusync cs; 1446 1447 crit_enter(); 1448 lwkt_cpusync_init(&cs, gd->gd_other_cpus, 1449 tsc_mpsync_test_remote, arg); 1450 lwkt_cpusync_interlock(&cs); 1451 cpu_pause(); 1452 arg->tsc_target = rdtsc_ordered(); 1453 cpu_mfence(); 1454 lwkt_cpusync_deinterlock(&cs); 1455 crit_exit(); 1456 cpu_pause(); 1457 1458 if (!arg->tsc_mpsync) { 1459 kprintf("cpu%d: TSC is not MP synchronized @%u\n", 1460 gd->gd_cpuid, i); 1461 break; 1462 } 1463 if (arg->tsc_target > test_end && i >= TSC_TEST_TRYMIN) 1464 break; 1465 } 1466 1467 #undef TSC_TEST_TRYMIN 1468 #undef TSC_TEST_TRYMAX 1469 1470 if (arg->tsc_target == test_begin) { 1471 kprintf("cpu%d: TSC does not tick?!\n", gd->gd_cpuid); 1472 /* XXX disable TSC? */ 1473 tsc_invariant = 0; 1474 arg->tsc_mpsync = 0; 1475 return; 1476 } 1477 1478 if (arg->tsc_mpsync && bootverbose) { 1479 kprintf("cpu%d: TSC is MP synchronized after %u tries\n", 1480 gd->gd_cpuid, i); 1481 } 1482 } 1483 1484 #endif 1485 1486 #define TSC_TEST_COUNT 50000 1487 1488 static void 1489 tsc_mpsync_ap_thread(void *xinfo) 1490 { 1491 struct tsc_mpsync_info *info = xinfo; 1492 int cpu = mycpuid; 1493 int i; 1494 1495 /* 1496 * Tell main loop that we are ready and wait for initiation 1497 */ 1498 atomic_add_int(&info->tsc_ready_cnt, 1); 1499 while (info->tsc_command == 0) { 1500 lwkt_force_switch(); 1501 } 1502 1503 /* 1504 * Run test for 10000 loops or until tsc_done_cnt != 0 (another 1505 * cpu has finished its test), then increment done. 1506 */ 1507 crit_enter(); 1508 for (i = 0; i < TSC_TEST_COUNT && info->tsc_done_cnt == 0; ++i) { 1509 info->tsc_saved[cpu].v = rdtsc_ordered(); 1510 } 1511 crit_exit(); 1512 atomic_add_int(&info->tsc_done_cnt, 1); 1513 1514 lwkt_exit(); 1515 } 1516 1517 static void 1518 tsc_mpsync_test(void) 1519 { 1520 int cpu; 1521 int try; 1522 1523 if (!tsc_invariant) { 1524 /* Not even invariant TSC */ 1525 return; 1526 } 1527 1528 if (ncpus == 1) { 1529 /* Only one CPU */ 1530 tsc_mpsync = 1; 1531 return; 1532 } 1533 1534 /* 1535 * Forcing can be used w/qemu to reduce contention 1536 */ 1537 TUNABLE_INT_FETCH("hw.tsc_cputimer_force", &tsc_mpsync); 1538 1539 if (tsc_mpsync == 0) { 1540 switch (cpu_vendor_id) { 1541 case CPU_VENDOR_INTEL: 1542 /* 1543 * Intel probably works 1544 */ 1545 break; 1546 1547 case CPU_VENDOR_AMD: 1548 /* 1549 * For AMD 15h and 16h (i.e. The Bulldozer and Jaguar 1550 * architectures) we have to watch out for 1551 * Erratum 778: 1552 * "Processor Core Time Stamp Counters May 1553 * Experience Drift" 1554 * This Erratum is only listed for cpus in Family 1555 * 15h < Model 30h and for 16h < Model 30h. 1556 * 1557 * AMD < Bulldozer probably doesn't work 1558 */ 1559 if (CPUID_TO_FAMILY(cpu_id) == 0x15 || 1560 CPUID_TO_FAMILY(cpu_id) == 0x16) { 1561 if (CPUID_TO_MODEL(cpu_id) < 0x30) 1562 return; 1563 } else if (CPUID_TO_FAMILY(cpu_id) < 0x17) { 1564 return; 1565 } 1566 break; 1567 1568 default: 1569 /* probably won't work */ 1570 return; 1571 } 1572 } else if (tsc_mpsync < 0) { 1573 kprintf("TSC MP synchronization test is disabled\n"); 1574 tsc_mpsync = 0; 1575 return; 1576 } 1577 1578 /* 1579 * Test even if forced to 1 above. If forced, we will use the TSC 1580 * even if the test fails. (set forced to -1 to disable entirely). 1581 */ 1582 kprintf("TSC testing MP synchronization ...\n"); 1583 1584 /* 1585 * Test TSC MP synchronization on APs. Try up to 4 times. 1586 */ 1587 for (try = 0; try < 4; ++try) { 1588 struct tsc_mpsync_info info; 1589 uint64_t last; 1590 int64_t xdelta; 1591 int64_t delta; 1592 1593 bzero(&info, sizeof(info)); 1594 1595 for (cpu = 0; cpu < ncpus; ++cpu) { 1596 thread_t td; 1597 lwkt_create(tsc_mpsync_ap_thread, &info, &td, 1598 NULL, TDF_NOSTART, cpu, 1599 "tsc mpsync %d", cpu); 1600 lwkt_setpri_initial(td, curthread->td_pri); 1601 lwkt_schedule(td); 1602 } 1603 while (info.tsc_ready_cnt != ncpus) 1604 lwkt_force_switch(); 1605 1606 /* 1607 * All threads are ready, start the test and wait for 1608 * completion. 1609 */ 1610 info.tsc_command = 1; 1611 while (info.tsc_done_cnt != ncpus) 1612 lwkt_force_switch(); 1613 1614 /* 1615 * Process results 1616 */ 1617 last = info.tsc_saved[0].v; 1618 delta = 0; 1619 for (cpu = 0; cpu < ncpus; ++cpu) { 1620 xdelta = (int64_t)(info.tsc_saved[cpu].v - last); 1621 last = info.tsc_saved[cpu].v; 1622 if (xdelta < 0) 1623 xdelta = -xdelta; 1624 delta += xdelta; 1625 1626 } 1627 1628 /* 1629 * Result from attempt. If its too wild just stop now. 1630 * Also break out if we succeed, no need to try further. 1631 */ 1632 kprintf("TSC MPSYNC TEST %jd %d -> %jd (10uS=%jd)\n", 1633 delta, ncpus, delta / ncpus, 1634 tsc_frequency / 100000); 1635 if (delta / ncpus > tsc_frequency / 100) 1636 break; 1637 if (delta / ncpus < tsc_frequency / 100000) { 1638 tsc_mpsync = 1; 1639 break; 1640 } 1641 } 1642 1643 if (tsc_mpsync) 1644 kprintf("TSC is MP synchronized\n"); 1645 else 1646 kprintf("TSC is not MP synchronized\n"); 1647 } 1648 SYSINIT(tsc_mpsync, SI_BOOT2_FINISH_SMP, SI_ORDER_ANY, tsc_mpsync_test, NULL); 1649 1650 #define TSC_CPUTIMER_FREQMAX 128000000 /* 128Mhz */ 1651 1652 static int tsc_cputimer_shift; 1653 1654 static void 1655 tsc_cputimer_construct(struct cputimer *timer, sysclock_t oldclock) 1656 { 1657 timer->base = 0; 1658 timer->base = oldclock - timer->count(); 1659 } 1660 1661 static __inline sysclock_t 1662 tsc_cputimer_count(void) 1663 { 1664 uint64_t tsc; 1665 1666 tsc = rdtsc(); 1667 tsc >>= tsc_cputimer_shift; 1668 1669 return (tsc + tsc_cputimer.base); 1670 } 1671 1672 static sysclock_t 1673 tsc_cputimer_count_lfence(void) 1674 { 1675 cpu_lfence(); 1676 return tsc_cputimer_count(); 1677 } 1678 1679 static sysclock_t 1680 tsc_cputimer_count_mfence(void) 1681 { 1682 cpu_mfence(); 1683 return tsc_cputimer_count(); 1684 } 1685 1686 static uint64_t 1687 tsc_cpucounter_count_lfence(void) 1688 { 1689 1690 cpu_lfence(); 1691 return (rdtsc()); 1692 } 1693 1694 static uint64_t 1695 tsc_cpucounter_count_mfence(void) 1696 { 1697 1698 cpu_mfence(); 1699 return (rdtsc()); 1700 } 1701 1702 static void 1703 tsc_cputimer_register(void) 1704 { 1705 uint64_t freq; 1706 int enable = 1; 1707 1708 if (!tsc_mpsync) { 1709 if (tsc_invariant) { 1710 /* Per-cpu cpucounter still works. */ 1711 goto regcnt; 1712 } 1713 return; 1714 } 1715 1716 TUNABLE_INT_FETCH("hw.tsc_cputimer_enable", &enable); 1717 if (!enable) 1718 return; 1719 1720 freq = tsc_frequency; 1721 while (freq > TSC_CPUTIMER_FREQMAX) { 1722 freq >>= 1; 1723 ++tsc_cputimer_shift; 1724 } 1725 kprintf("TSC: cputimer freq %ju, shift %d\n", 1726 (uintmax_t)freq, tsc_cputimer_shift); 1727 1728 tsc_cputimer.freq = freq; 1729 1730 if (cpu_vendor_id == CPU_VENDOR_INTEL) 1731 tsc_cputimer.count = tsc_cputimer_count_lfence; 1732 else 1733 tsc_cputimer.count = tsc_cputimer_count_mfence; /* safe bet */ 1734 1735 cputimer_register(&tsc_cputimer); 1736 cputimer_select(&tsc_cputimer, 0); 1737 1738 tsc_cpucounter.flags |= CPUCOUNTER_FLAG_MPSYNC; 1739 regcnt: 1740 tsc_cpucounter.freq = tsc_frequency; 1741 if (cpu_vendor_id == CPU_VENDOR_INTEL) { 1742 tsc_cpucounter.count = 1743 tsc_cpucounter_count_lfence; 1744 } else { 1745 tsc_cpucounter.count = 1746 tsc_cpucounter_count_mfence; /* safe bet */ 1747 } 1748 cpucounter_register(&tsc_cpucounter); 1749 } 1750 SYSINIT(tsc_cputimer_reg, SI_BOOT2_POST_SMP, SI_ORDER_FIRST, 1751 tsc_cputimer_register, NULL); 1752 1753 SYSCTL_NODE(_hw, OID_AUTO, i8254, CTLFLAG_RW, 0, "I8254"); 1754 SYSCTL_UINT(_hw_i8254, OID_AUTO, freq, CTLFLAG_RD, &i8254_cputimer.freq, 0, 1755 "frequency"); 1756 SYSCTL_PROC(_hw_i8254, OID_AUTO, timestamp, CTLTYPE_STRING|CTLFLAG_RD, 1757 0, 0, hw_i8254_timestamp, "A", ""); 1758 1759 SYSCTL_INT(_hw, OID_AUTO, tsc_present, CTLFLAG_RD, 1760 &tsc_present, 0, "TSC Available"); 1761 SYSCTL_INT(_hw, OID_AUTO, tsc_invariant, CTLFLAG_RD, 1762 &tsc_invariant, 0, "Invariant TSC"); 1763 SYSCTL_INT(_hw, OID_AUTO, tsc_mpsync, CTLFLAG_RD, 1764 &tsc_mpsync, 0, "TSC is synchronized across CPUs"); 1765 SYSCTL_QUAD(_hw, OID_AUTO, tsc_frequency, CTLFLAG_RD, 1766 &tsc_frequency, 0, "TSC Frequency"); 1767