xref: /dflybsd-src/sys/platform/pc64/isa/clock.c (revision b227f3f50d5dc0f5fdecd8f9df23e96e8521baaf)
1 /*-
2  * Copyright (c) 1990 The Regents of the University of California.
3  * Copyright (c) 2008 The DragonFly Project.
4  * All rights reserved.
5  *
6  * This code is derived from software contributed to Berkeley by
7  * William Jolitz and Don Ahn.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  * 3. Neither the name of the University nor the names of its contributors
18  *    may be used to endorse or promote products derived from this software
19  *    without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  *
33  *	from: @(#)clock.c	7.2 (Berkeley) 5/12/91
34  * $FreeBSD: src/sys/i386/isa/clock.c,v 1.149.2.6 2002/11/02 04:41:50 iwasaki Exp $
35  */
36 
37 /*
38  * Routines to handle clock hardware.
39  */
40 
41 /*
42  * inittodr, settodr and support routines written
43  * by Christoph Robitschko <chmr@edvz.tu-graz.ac.at>
44  *
45  * reintroduced and updated by Chris Stenton <chris@gnome.co.uk> 8/10/94
46  */
47 
48 #if 0
49 #include "opt_clock.h"
50 #endif
51 
52 #include <sys/param.h>
53 #include <sys/systm.h>
54 #include <sys/eventhandler.h>
55 #include <sys/time.h>
56 #include <sys/kernel.h>
57 #include <sys/bus.h>
58 #include <sys/sysctl.h>
59 #include <sys/cons.h>
60 #include <sys/kbio.h>
61 #include <sys/systimer.h>
62 #include <sys/globaldata.h>
63 #include <sys/machintr.h>
64 #include <sys/interrupt.h>
65 
66 #include <sys/thread2.h>
67 
68 #include <machine/clock.h>
69 #include <machine/cputypes.h>
70 #include <machine/frame.h>
71 #include <machine/ipl.h>
72 #include <machine/limits.h>
73 #include <machine/md_var.h>
74 #include <machine/psl.h>
75 #include <machine/segments.h>
76 #include <machine/smp.h>
77 #include <machine/specialreg.h>
78 #include <machine/intr_machdep.h>
79 
80 #include <machine_base/apic/ioapic.h>
81 #include <machine_base/apic/ioapic_abi.h>
82 #include <machine_base/icu/icu.h>
83 #include <bus/isa/isa.h>
84 #include <bus/isa/rtc.h>
85 #include <machine_base/isa/timerreg.h>
86 
87 SET_DECLARE(timecounter_init_set, const timecounter_init_t);
88 TIMECOUNTER_INIT(placeholder, NULL);
89 
90 static void i8254_restore(void);
91 static void resettodr_on_shutdown(void *arg __unused);
92 
93 /*
94  * 32-bit time_t's can't reach leap years before 1904 or after 2036, so we
95  * can use a simple formula for leap years.
96  */
97 #define	LEAPYEAR(y) ((u_int)(y) % 4 == 0)
98 #define DAYSPERYEAR   (31+28+31+30+31+30+31+31+30+31+30+31)
99 
100 #ifndef TIMER_FREQ
101 #define TIMER_FREQ   1193182
102 #endif
103 
104 static uint8_t i8254_walltimer_sel;
105 static uint16_t i8254_walltimer_cntr;
106 
107 int	adjkerntz;		/* local offset from GMT in seconds */
108 int	disable_rtc_set;	/* disable resettodr() if != 0 */
109 int	tsc_present;
110 int	tsc_invariant;
111 int	tsc_mpsync;
112 int	tsc_is_broken;
113 int	wall_cmos_clock;	/* wall CMOS clock assumed if != 0 */
114 int	timer0_running;
115 tsc_uclock_t tsc_frequency;
116 tsc_uclock_t tsc_oneus_approx;	/* always at least 1, approx only */
117 
118 enum tstate { RELEASED, ACQUIRED };
119 enum tstate timer0_state;
120 enum tstate timer1_state;
121 enum tstate timer2_state;
122 
123 int	i8254_cputimer_disable;	/* No need to initialize i8254 cputimer. */
124 
125 static	int	beeping = 0;
126 static	const u_char daysinmonth[] = {31,28,31,30,31,30,31,31,30,31,30,31};
127 static	u_char	rtc_statusa = RTCSA_DIVIDER | RTCSA_NOPROF;
128 static	u_char	rtc_statusb = RTCSB_24HR | RTCSB_PINTR;
129 static  int	rtc_loaded;
130 
131 static int i8254_cputimer_div;
132 
133 static int i8254_nointr;
134 static int i8254_intr_disable = 1;
135 TUNABLE_INT("hw.i8254.intr_disable", &i8254_intr_disable);
136 
137 static int calibrate_timers_with_rtc = 0;
138 TUNABLE_INT("hw.calibrate_timers_with_rtc", &calibrate_timers_with_rtc);
139 
140 static int calibrate_tsc_fast = 1;
141 TUNABLE_INT("hw.calibrate_tsc_fast", &calibrate_tsc_fast);
142 
143 static int calibrate_test;
144 TUNABLE_INT("hw.tsc_calibrate_test", &calibrate_test);
145 
146 static struct callout sysbeepstop_ch;
147 
148 static sysclock_t i8254_cputimer_count(void);
149 static void i8254_cputimer_construct(struct cputimer *cputimer, sysclock_t last);
150 static void i8254_cputimer_destruct(struct cputimer *cputimer);
151 
152 static struct cputimer	i8254_cputimer = {
153     .next		= SLIST_ENTRY_INITIALIZER,
154     .name		= "i8254",
155     .pri		= CPUTIMER_PRI_8254,
156     .type		= 0,	/* determined later */
157     .count		= i8254_cputimer_count,
158     .fromhz		= cputimer_default_fromhz,
159     .fromus		= cputimer_default_fromus,
160     .construct		= i8254_cputimer_construct,
161     .destruct		= i8254_cputimer_destruct,
162     .freq		= TIMER_FREQ
163 };
164 
165 static sysclock_t tsc_cputimer_count_mfence(void);
166 static sysclock_t tsc_cputimer_count_lfence(void);
167 static void tsc_cputimer_construct(struct cputimer *, sysclock_t);
168 
169 static struct cputimer	tsc_cputimer = {
170     .next		= SLIST_ENTRY_INITIALIZER,
171     .name		= "TSC",
172     .pri		= CPUTIMER_PRI_TSC,
173     .type		= CPUTIMER_TSC,
174     .count		= NULL,	/* determined later */
175     .fromhz		= cputimer_default_fromhz,
176     .fromus		= cputimer_default_fromus,
177     .construct		= tsc_cputimer_construct,
178     .destruct		= cputimer_default_destruct,
179     .freq		= 0	/* determined later */
180 };
181 
182 static struct cpucounter tsc_cpucounter = {
183     .freq		= 0,	/* determined later */
184     .count		= NULL,	/* determined later */
185     .flags		= 0,	/* adjusted later */
186     .prio		= CPUCOUNTER_PRIO_TSC,
187     .type		= CPUCOUNTER_TSC
188 };
189 
190 static void i8254_intr_reload(struct cputimer_intr *, sysclock_t);
191 static void i8254_intr_config(struct cputimer_intr *, const struct cputimer *);
192 static void i8254_intr_initclock(struct cputimer_intr *, boolean_t);
193 
194 static struct cputimer_intr i8254_cputimer_intr = {
195     .freq = TIMER_FREQ,
196     .reload = i8254_intr_reload,
197     .enable = cputimer_intr_default_enable,
198     .config = i8254_intr_config,
199     .restart = cputimer_intr_default_restart,
200     .pmfixup = cputimer_intr_default_pmfixup,
201     .initclock = i8254_intr_initclock,
202     .pcpuhand = NULL,
203     .next = SLIST_ENTRY_INITIALIZER,
204     .name = "i8254",
205     .type = CPUTIMER_INTR_8254,
206     .prio = CPUTIMER_INTR_PRIO_8254,
207     .caps = CPUTIMER_INTR_CAP_PS,
208     .priv = NULL
209 };
210 
211 /*
212  * Use this to lwkt_switch() when the scheduler clock is not
213  * yet running, otherwise lwkt_switch() won't do anything.
214  * XXX needs cleaning up in lwkt_thread.c
215  */
216 static void
217 lwkt_force_switch(void)
218 {
219 	crit_enter();
220 	lwkt_schedulerclock(curthread);
221 	crit_exit();
222 	lwkt_switch();
223 }
224 
225 /*
226  * timer0 clock interrupt.  Timer0 is in one-shot mode and has stopped
227  * counting as of this interrupt.  We use timer1 in free-running mode (not
228  * generating any interrupts) as our main counter.  Each cpu has timeouts
229  * pending.
230  *
231  * This code is INTR_MPSAFE and may be called without the BGL held.
232  */
233 static void
234 clkintr(void *dummy, void *frame_arg)
235 {
236 	static sysclock_t sysclock_count;	/* NOTE! Must be static */
237 	struct globaldata *gd = mycpu;
238 	struct globaldata *gscan;
239 	int n;
240 
241 	/*
242 	 * SWSTROBE mode is a one-shot, the timer is no longer running
243 	 */
244 	timer0_running = 0;
245 
246 	/*
247 	 * XXX the dispatcher needs work.  right now we call systimer_intr()
248 	 * directly or via IPI for any cpu with systimers queued, which is
249 	 * usually *ALL* of them.  We need to use the LAPIC timer for this.
250 	 */
251 	sysclock_count = sys_cputimer->count();
252 	for (n = 0; n < ncpus; ++n) {
253 	    gscan = globaldata_find(n);
254 	    if (TAILQ_FIRST(&gscan->gd_systimerq) == NULL)
255 		continue;
256 	    if (gscan != gd) {
257 		lwkt_send_ipiq3(gscan, (ipifunc3_t)systimer_intr,
258 				&sysclock_count, 1);
259 	    } else {
260 		systimer_intr(&sysclock_count, 0, frame_arg);
261 	    }
262 	}
263 }
264 
265 
266 /*
267  * NOTE! not MP safe.
268  */
269 int
270 acquire_timer2(int mode)
271 {
272 	if (timer2_state != RELEASED)
273 		return (-1);
274 	timer2_state = ACQUIRED;
275 
276 	/*
277 	 * This access to the timer registers is as atomic as possible
278 	 * because it is a single instruction.  We could do better if we
279 	 * knew the rate.
280 	 */
281 	outb(TIMER_MODE, TIMER_SEL2 | (mode & 0x3f));
282 	return (0);
283 }
284 
285 int
286 release_timer2(void)
287 {
288 	if (timer2_state != ACQUIRED)
289 		return (-1);
290 	outb(TIMER_MODE, TIMER_SEL2 | TIMER_SQWAVE | TIMER_16BIT);
291 	timer2_state = RELEASED;
292 	return (0);
293 }
294 
295 #include "opt_ddb.h"
296 #ifdef DDB
297 #include <ddb/ddb.h>
298 
299 DB_SHOW_COMMAND(rtc, rtc)
300 {
301 	kprintf("%02x/%02x/%02x %02x:%02x:%02x, A = %02x, B = %02x, C = %02x\n",
302 	       rtcin(RTC_YEAR), rtcin(RTC_MONTH), rtcin(RTC_DAY),
303 	       rtcin(RTC_HRS), rtcin(RTC_MIN), rtcin(RTC_SEC),
304 	       rtcin(RTC_STATUSA), rtcin(RTC_STATUSB), rtcin(RTC_INTR));
305 }
306 #endif /* DDB */
307 
308 /*
309  * Return the current cpu timer count as a 32 bit integer.
310  */
311 static
312 sysclock_t
313 i8254_cputimer_count(void)
314 {
315 	static uint16_t cputimer_last;
316 	uint16_t count;
317 	sysclock_t ret;
318 
319 	clock_lock();
320 	outb(TIMER_MODE, i8254_walltimer_sel | TIMER_LATCH);
321 	count = (uint8_t)inb(i8254_walltimer_cntr);		/* get countdown */
322 	count |= ((uint8_t)inb(i8254_walltimer_cntr) << 8);
323 	count = -count;					/* -> countup */
324 	if (count < cputimer_last)			/* rollover */
325 		i8254_cputimer.base += 0x00010000;
326 	ret = i8254_cputimer.base | count;
327 	cputimer_last = count;
328 	clock_unlock();
329 	return(ret);
330 }
331 
332 /*
333  * This function is called whenever the system timebase changes, allowing
334  * us to calculate what is needed to convert a system timebase tick
335  * into an 8254 tick for the interrupt timer.  If we can convert to a
336  * simple shift, multiplication, or division, we do so.  Otherwise 64
337  * bit arithmatic is required every time the interrupt timer is reloaded.
338  */
339 static void
340 i8254_intr_config(struct cputimer_intr *cti, const struct cputimer *timer)
341 {
342     int freq;
343     int div;
344 
345     /*
346      * Will a simple divide do the trick?
347      */
348     div = (timer->freq + (cti->freq / 2)) / cti->freq;
349     freq = cti->freq * div;
350 
351     if (freq >= timer->freq - 1 && freq <= timer->freq + 1)
352 	i8254_cputimer_div = div;
353     else
354 	i8254_cputimer_div = 0;
355 }
356 
357 /*
358  * Reload for the next timeout.  It is possible for the reload value
359  * to be 0 or negative, indicating that an immediate timer interrupt
360  * is desired.  For now make the minimum 2 ticks.
361  *
362  * We may have to convert from the system timebase to the 8254 timebase.
363  */
364 static void
365 i8254_intr_reload(struct cputimer_intr *cti, sysclock_t reload)
366 {
367     uint16_t count;
368 
369     if (i8254_cputimer_div)
370 	reload /= i8254_cputimer_div;
371     else
372 	reload = (int64_t)reload * cti->freq / sys_cputimer->freq;
373 
374     if ((int)reload < 2)
375 	reload = 2;
376 
377     clock_lock();
378     if (timer0_running) {
379 	outb(TIMER_MODE, TIMER_SEL0 | TIMER_LATCH);	/* count-down timer */
380 	count = (uint8_t)inb(TIMER_CNTR0);		/* lsb */
381 	count |= ((uint8_t)inb(TIMER_CNTR0) << 8);	/* msb */
382 	if (reload < count) {
383 	    outb(TIMER_MODE, TIMER_SEL0 | TIMER_SWSTROBE | TIMER_16BIT);
384 	    outb(TIMER_CNTR0, (uint8_t)reload); 	/* lsb */
385 	    outb(TIMER_CNTR0, (uint8_t)(reload >> 8));	/* msb */
386 	}
387     } else {
388 	timer0_running = 1;
389 	if (reload > 0xFFFF)
390 	    reload = 0;		/* full count */
391 	outb(TIMER_MODE, TIMER_SEL0 | TIMER_SWSTROBE | TIMER_16BIT);
392 	outb(TIMER_CNTR0, (uint8_t)reload); 		/* lsb */
393 	outb(TIMER_CNTR0, (uint8_t)(reload >> 8));	/* msb */
394     }
395     clock_unlock();
396 }
397 
398 /*
399  * DELAY(usec)	     - Spin for the specified number of microseconds.
400  * DRIVERSLEEP(usec) - Spin for the specified number of microseconds,
401  *		       but do a thread switch in the loop
402  *
403  * Relies on timer 1 counting down from (cputimer_freq / hz)
404  * Note: timer had better have been programmed before this is first used!
405  */
406 static void
407 DODELAY(int n, int doswitch)
408 {
409 	ssysclock_t delta, ticks_left;
410 	sysclock_t prev_tick, tick;
411 
412 #ifdef DELAYDEBUG
413 	int getit_calls = 1;
414 	int n1;
415 	static int state = 0;
416 
417 	if (state == 0) {
418 		state = 1;
419 		for (n1 = 1; n1 <= 10000000; n1 *= 10)
420 			DELAY(n1);
421 		state = 2;
422 	}
423 	if (state == 1)
424 		kprintf("DELAY(%d)...", n);
425 #endif
426 	/*
427 	 * Guard against the timer being uninitialized if we are called
428 	 * early for console i/o.
429 	 */
430 	if (timer0_state == RELEASED && i8254_cputimer_disable == 0)
431 		i8254_restore();
432 
433 	/*
434 	 * Read the counter first, so that the rest of the setup overhead is
435 	 * counted.  Then calculate the number of hardware timer ticks
436 	 * required, rounding up to be sure we delay at least the requested
437 	 * number of microseconds.
438 	 */
439 	prev_tick = sys_cputimer->count();
440 	ticks_left = ((u_int)n * (int64_t)sys_cputimer->freq + 999999) /
441 		     1000000;
442 
443 	/*
444 	 * Loop until done.
445 	 */
446 	while (ticks_left > 0) {
447 		tick = sys_cputimer->count();
448 #ifdef DELAYDEBUG
449 		++getit_calls;
450 #endif
451 		delta = tick - prev_tick;
452 		prev_tick = tick;
453 		if (delta < 0)
454 			delta = 0;
455 		ticks_left -= delta;
456 		if (doswitch && ticks_left > 0)
457 			lwkt_switch();
458 		cpu_pause();
459 	}
460 #ifdef DELAYDEBUG
461 	if (state == 1)
462 		kprintf(" %d calls to getit() at %d usec each\n",
463 		       getit_calls, (n + 5) / getit_calls);
464 #endif
465 }
466 
467 /*
468  * DELAY() never switches.
469  */
470 void
471 DELAY(int n)
472 {
473 	DODELAY(n, 0);
474 }
475 
476 /*
477  * Returns non-zero if the specified time period has elapsed.  Call
478  * first with last_clock set to 0.
479  */
480 int
481 CHECKTIMEOUT(TOTALDELAY *tdd)
482 {
483 	sysclock_t delta;
484 	int us;
485 
486 	if (tdd->started == 0) {
487 		if (timer0_state == RELEASED && i8254_cputimer_disable == 0)
488 			i8254_restore();
489 		tdd->last_clock = sys_cputimer->count();
490 		tdd->started = 1;
491 		return(0);
492 	}
493 	delta = sys_cputimer->count() - tdd->last_clock;
494 	us = (u_int64_t)delta * (u_int64_t)1000000 /
495 	     (u_int64_t)sys_cputimer->freq;
496 	tdd->last_clock += (u_int64_t)us * (u_int64_t)sys_cputimer->freq /
497 			   1000000;
498 	tdd->us -= us;
499 	return (tdd->us < 0);
500 }
501 
502 
503 /*
504  * DRIVERSLEEP() does not switch if called with a spinlock held or
505  * from a hard interrupt.
506  */
507 void
508 DRIVERSLEEP(int usec)
509 {
510 	globaldata_t gd = mycpu;
511 
512 	if (gd->gd_intr_nesting_level || gd->gd_spinlocks) {
513 		DODELAY(usec, 0);
514 	} else {
515 		DODELAY(usec, 1);
516 	}
517 }
518 
519 static void
520 sysbeepstop(void *chan)
521 {
522 	outb(IO_PPI, inb(IO_PPI)&0xFC);	/* disable counter2 output to speaker */
523 	beeping = 0;
524 	release_timer2();
525 }
526 
527 int
528 sysbeep(int pitch, int period)
529 {
530 	if (acquire_timer2(TIMER_SQWAVE|TIMER_16BIT))
531 		return(-1);
532 	if (sysbeep_enable == 0)
533 		return(-1);
534 	/*
535 	 * Nobody else is using timer2, we do not need the clock lock
536 	 */
537 	outb(TIMER_CNTR2, pitch);
538 	outb(TIMER_CNTR2, (pitch>>8));
539 	if (!beeping) {
540 		/* enable counter2 output to speaker */
541 		outb(IO_PPI, inb(IO_PPI) | 3);
542 		beeping = period;
543 		callout_reset(&sysbeepstop_ch, period, sysbeepstop, NULL);
544 	}
545 	return (0);
546 }
547 
548 /*
549  * RTC support routines
550  */
551 
552 int
553 rtcin(int reg)
554 {
555 	u_char val;
556 
557 	crit_enter();
558 	outb(IO_RTC, reg);
559 	inb(0x84);
560 	val = inb(IO_RTC + 1);
561 	inb(0x84);
562 	crit_exit();
563 	return (val);
564 }
565 
566 static __inline void
567 writertc(u_char reg, u_char val)
568 {
569 	crit_enter();
570 	inb(0x84);
571 	outb(IO_RTC, reg);
572 	inb(0x84);
573 	outb(IO_RTC + 1, val);
574 	inb(0x84);		/* XXX work around wrong order in rtcin() */
575 	crit_exit();
576 }
577 
578 static __inline int
579 readrtc(int port)
580 {
581 	return(bcd2bin(rtcin(port)));
582 }
583 
584 static u_int
585 calibrate_clocks(void)
586 {
587 	tsc_uclock_t old_tsc;
588 	u_int tot_count;
589 	sysclock_t count, prev_count;
590 	int sec, start_sec, timeout;
591 
592 	if (bootverbose)
593 	        kprintf("Calibrating clock(s) ...\n");
594 	if (!(rtcin(RTC_STATUSD) & RTCSD_PWR))
595 		goto fail;
596 	timeout = 100000000;
597 
598 	/* Read the mc146818A seconds counter. */
599 	for (;;) {
600 		if (!(rtcin(RTC_STATUSA) & RTCSA_TUP)) {
601 			sec = rtcin(RTC_SEC);
602 			break;
603 		}
604 		if (--timeout == 0)
605 			goto fail;
606 	}
607 
608 	/* Wait for the mC146818A seconds counter to change. */
609 	start_sec = sec;
610 	for (;;) {
611 		if (!(rtcin(RTC_STATUSA) & RTCSA_TUP)) {
612 			sec = rtcin(RTC_SEC);
613 			if (sec != start_sec)
614 				break;
615 		}
616 		if (--timeout == 0)
617 			goto fail;
618 	}
619 
620 	/* Start keeping track of the i8254 counter. */
621 	prev_count = sys_cputimer->count();
622 	tot_count = 0;
623 
624 	if (tsc_present)
625 		old_tsc = rdtsc();
626 	else
627 		old_tsc = 0;		/* shut up gcc */
628 
629 	/*
630 	 * Wait for the mc146818A seconds counter to change.  Read the i8254
631 	 * counter for each iteration since this is convenient and only
632 	 * costs a few usec of inaccuracy. The timing of the final reads
633 	 * of the counters almost matches the timing of the initial reads,
634 	 * so the main cause of inaccuracy is the varying latency from
635 	 * inside getit() or rtcin(RTC_STATUSA) to the beginning of the
636 	 * rtcin(RTC_SEC) that returns a changed seconds count.  The
637 	 * maximum inaccuracy from this cause is < 10 usec on 486's.
638 	 */
639 	start_sec = sec;
640 	for (;;) {
641 		if (!(rtcin(RTC_STATUSA) & RTCSA_TUP))
642 			sec = rtcin(RTC_SEC);
643 		count = sys_cputimer->count();
644 		tot_count += (int)(count - prev_count);
645 		prev_count = count;
646 		if (sec != start_sec)
647 			break;
648 		if (--timeout == 0)
649 			goto fail;
650 	}
651 
652 	/*
653 	 * Read the cpu cycle counter.  The timing considerations are
654 	 * similar to those for the i8254 clock.
655 	 */
656 	if (tsc_present) {
657 		tsc_frequency = rdtsc() - old_tsc;
658 		if (bootverbose) {
659 			kprintf("TSC clock: %jd Hz (Method A)\n",
660 			    (intmax_t)tsc_frequency);
661 		}
662 	}
663 	tsc_oneus_approx = ((tsc_frequency|1) + 999999) / 1000000;
664 
665 	kprintf("i8254 clock: %u Hz\n", tot_count);
666 	return (tot_count);
667 
668 fail:
669 	kprintf("failed, using default i8254 clock of %u Hz\n",
670 		i8254_cputimer.freq);
671 	return (i8254_cputimer.freq);
672 }
673 
674 static void
675 i8254_restore(void)
676 {
677 	timer0_state = ACQUIRED;
678 
679 	clock_lock();
680 
681 	/*
682 	 * Timer0 is our fine-grained variable clock interrupt
683 	 */
684 	outb(TIMER_MODE, TIMER_SEL0 | TIMER_SWSTROBE | TIMER_16BIT);
685 	outb(TIMER_CNTR0, 2);	/* lsb */
686 	outb(TIMER_CNTR0, 0);	/* msb */
687 	clock_unlock();
688 
689 	if (!i8254_nointr) {
690 		cputimer_intr_register(&i8254_cputimer_intr);
691 		cputimer_intr_select(&i8254_cputimer_intr, 0);
692 	}
693 
694 	/*
695 	 * Timer1 or timer2 is our free-running clock, but only if another
696 	 * has not been selected.
697 	 */
698 	cputimer_register(&i8254_cputimer);
699 	cputimer_select(&i8254_cputimer, 0);
700 }
701 
702 static void
703 i8254_cputimer_construct(struct cputimer *timer, sysclock_t oldclock)
704 {
705  	int which;
706 
707 	/*
708 	 * Should we use timer 1 or timer 2 ?
709 	 */
710 	which = 0;
711 	TUNABLE_INT_FETCH("hw.i8254.walltimer", &which);
712 	if (which != 1 && which != 2)
713 		which = 2;
714 
715 	switch(which) {
716 	case 1:
717 		timer->name = "i8254_timer1";
718 		timer->type = CPUTIMER_8254_SEL1;
719 		i8254_walltimer_sel = TIMER_SEL1;
720 		i8254_walltimer_cntr = TIMER_CNTR1;
721 		timer1_state = ACQUIRED;
722 		break;
723 	case 2:
724 		timer->name = "i8254_timer2";
725 		timer->type = CPUTIMER_8254_SEL2;
726 		i8254_walltimer_sel = TIMER_SEL2;
727 		i8254_walltimer_cntr = TIMER_CNTR2;
728 		timer2_state = ACQUIRED;
729 		break;
730 	}
731 
732 	timer->base = (oldclock + 0xFFFF) & ~0xFFFF;
733 
734 	clock_lock();
735 	outb(TIMER_MODE, i8254_walltimer_sel | TIMER_RATEGEN | TIMER_16BIT);
736 	outb(i8254_walltimer_cntr, 0);	/* lsb */
737 	outb(i8254_walltimer_cntr, 0);	/* msb */
738 	outb(IO_PPI, inb(IO_PPI) | 1);	/* bit 0: enable gate, bit 1: spkr */
739 	clock_unlock();
740 }
741 
742 static void
743 i8254_cputimer_destruct(struct cputimer *timer)
744 {
745 	switch(timer->type) {
746 	case CPUTIMER_8254_SEL1:
747 	    timer1_state = RELEASED;
748 	    break;
749 	case CPUTIMER_8254_SEL2:
750 	    timer2_state = RELEASED;
751 	    break;
752 	default:
753 	    break;
754 	}
755 	timer->type = 0;
756 }
757 
758 static void
759 rtc_restore(void)
760 {
761 	/* Restore all of the RTC's "status" (actually, control) registers. */
762 	writertc(RTC_STATUSB, RTCSB_24HR);
763 	writertc(RTC_STATUSA, rtc_statusa);
764 	writertc(RTC_STATUSB, rtc_statusb);
765 }
766 
767 /*
768  * Restore all the timers.
769  *
770  * This function is called to resynchronize our core timekeeping after a
771  * long halt, e.g. from apm_default_resume() and friends.  It is also
772  * called if after a BIOS call we have detected munging of the 8254.
773  * It is necessary because cputimer_count() counter's delta may have grown
774  * too large for nanouptime() and friends to handle, or (in the case of 8254
775  * munging) might cause the SYSTIMER code to prematurely trigger.
776  */
777 void
778 timer_restore(void)
779 {
780 	crit_enter();
781 	if (i8254_cputimer_disable == 0)
782 		i8254_restore();	/* restore timer_freq and hz */
783 	rtc_restore();			/* reenable RTC interrupts */
784 	crit_exit();
785 }
786 
787 #define MAX_MEASURE_RETRIES	100
788 
789 static u_int64_t
790 do_measure(u_int64_t timer_latency, u_int64_t *latency, sysclock_t *time,
791     int *retries)
792 {
793 	u_int64_t tsc1, tsc2;
794 	u_int64_t threshold;
795 	sysclock_t val;
796 	int cnt = 0;
797 
798 	do {
799 		if (cnt > MAX_MEASURE_RETRIES/2)
800 			threshold = timer_latency << 1;
801 		else
802 			threshold = timer_latency + (timer_latency >> 2);
803 
804 		cnt++;
805 		tsc1 = rdtsc_ordered();
806 		val = sys_cputimer->count();
807 		tsc2 = rdtsc_ordered();
808 	} while (timer_latency > 0 && cnt < MAX_MEASURE_RETRIES &&
809 	    tsc2 - tsc1 > threshold);
810 
811 	*retries = cnt - 1;
812 	*latency = tsc2 - tsc1;
813 	*time = val;
814 	return tsc1;
815 }
816 
817 static u_int64_t
818 do_calibrate_cputimer(u_int usecs, u_int64_t timer_latency)
819 {
820 	if (calibrate_tsc_fast) {
821 		u_int64_t old_tsc1, start_lat1, new_tsc1, end_lat1;
822 		u_int64_t old_tsc2, start_lat2, new_tsc2, end_lat2;
823 		u_int64_t freq1, freq2;
824 		sysclock_t start1, end1, start2, end2;
825 		int retries1, retries2, retries3, retries4;
826 
827 		DELAY(1000);
828 		old_tsc1 = do_measure(timer_latency, &start_lat1, &start1,
829 		    &retries1);
830 		DELAY(20000);
831 		old_tsc2 = do_measure(timer_latency, &start_lat2, &start2,
832 		    &retries2);
833 		DELAY(usecs);
834 		new_tsc1 = do_measure(timer_latency, &end_lat1, &end1,
835 		    &retries3);
836 		DELAY(20000);
837 		new_tsc2 = do_measure(timer_latency, &end_lat2, &end2,
838 		    &retries4);
839 
840 		old_tsc1 += start_lat1;
841 		old_tsc2 += start_lat2;
842 		freq1 = (new_tsc1 - old_tsc1) + (start_lat1 + end_lat1) / 2;
843 		freq2 = (new_tsc2 - old_tsc2) + (start_lat2 + end_lat2) / 2;
844 		end1 -= start1;
845 		end2 -= start2;
846 		/* This should in practice be safe from overflows. */
847 		freq1 = (freq1 * sys_cputimer->freq) / end1;
848 		freq2 = (freq2 * sys_cputimer->freq) / end2;
849 		if (calibrate_test && (retries1 > 0 || retries2 > 0)) {
850 			kprintf("%s: retries: %d, %d, %d, %d\n",
851 			    __func__, retries1, retries2, retries3, retries4);
852 		}
853 		if (calibrate_test) {
854 			kprintf("%s: freq1=%ju freq2=%ju avg=%ju\n",
855 			    __func__, freq1, freq2, (freq1 + freq2) / 2);
856 		}
857 		return (freq1 + freq2) / 2;
858 	} else {
859 		u_int64_t old_tsc, new_tsc;
860 		u_int64_t freq;
861 
862 		old_tsc = rdtsc_ordered();
863 		DELAY(usecs);
864 		new_tsc = rdtsc();
865 		freq = new_tsc - old_tsc;
866 		/* This should in practice be safe from overflows. */
867 		freq = (freq * 1000 * 1000) / usecs;
868 		return freq;
869 	}
870 }
871 
872 /*
873  * Initialize 8254 timer 0 early so that it can be used in DELAY().
874  */
875 void
876 startrtclock(void)
877 {
878 	const timecounter_init_t **list;
879 	u_int delta, freq;
880 
881 	callout_init_mp(&sysbeepstop_ch);
882 
883 	/*
884 	 * Can we use the TSC?
885 	 *
886 	 * NOTE: If running under qemu, probably a good idea to force the
887 	 *	 TSC because we are not likely to detect it as being
888 	 *	 invariant or mpsyncd if you don't.  This will greatly
889 	 *	 reduce SMP contention.
890 	 */
891 	if (cpu_feature & CPUID_TSC) {
892 		tsc_present = 1;
893 		TUNABLE_INT_FETCH("hw.tsc_cputimer_force", &tsc_invariant);
894 
895 		if ((cpu_vendor_id == CPU_VENDOR_INTEL ||
896 		     cpu_vendor_id == CPU_VENDOR_AMD) &&
897 		    cpu_exthigh >= 0x80000007) {
898 			u_int regs[4];
899 
900 			do_cpuid(0x80000007, regs);
901 			if (regs[3] & 0x100)
902 				tsc_invariant = 1;
903 		}
904 	} else {
905 		tsc_present = 0;
906 	}
907 
908 	/*
909 	 * Initial RTC state, don't do anything unexpected
910 	 */
911 	writertc(RTC_STATUSA, rtc_statusa);
912 	writertc(RTC_STATUSB, RTCSB_24HR);
913 
914 	SET_FOREACH(list, timecounter_init_set) {
915 		if ((*list)->configure != NULL)
916 			(*list)->configure();
917 	}
918 
919 	/*
920 	 * If tsc_frequency is already initialized now, and a flag is set
921 	 * that i8254 timer is unneeded, we are done.
922 	 */
923 	if (tsc_frequency != 0 && i8254_cputimer_disable != 0)
924 		goto done;
925 
926 	/*
927 	 * Set the 8254 timer0 in TIMER_SWSTROBE mode and cause it to
928 	 * generate an interrupt, which we will ignore for now.
929 	 *
930 	 * Set the 8254 timer1 in TIMER_RATEGEN mode and load 0x0000
931 	 * (so it counts a full 2^16 and repeats).  We will use this timer
932 	 * for our counting.
933 	 */
934 	if (i8254_cputimer_disable == 0)
935 		i8254_restore();
936 
937 	kprintf("Using cputimer %s for TSC calibration\n", sys_cputimer->name);
938 
939 	/*
940 	 * When booting without verbose messages, it's pointless to run the
941 	 * calibrate_clocks() calibration code, when we don't use the
942 	 * results in any way. With bootverbose, we are at least printing
943 	 *  this information to the kernel log.
944 	 */
945 	if (i8254_cputimer_disable != 0 ||
946 	    (calibrate_timers_with_rtc == 0 && !bootverbose)) {
947 		goto skip_rtc_based;
948 	}
949 
950 	freq = calibrate_clocks();
951 #ifdef CLK_CALIBRATION_LOOP
952 	if (bootverbose) {
953 		int c;
954 
955 		cnpoll(TRUE);
956 		kprintf("Press a key on the console to "
957 			"abort clock calibration\n");
958 		while ((c = cncheckc()) == -1 || c == NOKEY)
959 			calibrate_clocks();
960 		cnpoll(FALSE);
961 	}
962 #endif
963 
964 	/*
965 	 * Use the calibrated i8254 frequency if it seems reasonable.
966 	 * Otherwise use the default, and don't use the calibrated i586
967 	 * frequency.
968 	 */
969 	delta = freq > i8254_cputimer.freq ?
970 			freq - i8254_cputimer.freq : i8254_cputimer.freq - freq;
971 	if (delta < i8254_cputimer.freq / 100) {
972 		if (calibrate_timers_with_rtc == 0) {
973 			kprintf(
974 "hw.calibrate_timers_with_rtc not set - using default i8254 frequency\n");
975 			freq = i8254_cputimer.freq;
976 		}
977 		/*
978 		 * NOTE:
979 		 * Interrupt timer's freq must be adjusted
980 		 * before we change the cuptimer's frequency.
981 		 */
982 		i8254_cputimer_intr.freq = freq;
983 		cputimer_set_frequency(&i8254_cputimer, freq);
984 	} else {
985 		if (bootverbose)
986 			kprintf("%d Hz differs from default of %d Hz "
987 				"by more than 1%%\n",
988 			        freq, i8254_cputimer.freq);
989 		tsc_frequency = 0;
990 	}
991 
992 	if (tsc_frequency != 0 && calibrate_timers_with_rtc == 0) {
993 		kprintf("hw.calibrate_timers_with_rtc not "
994 			"set - using old calibration method\n");
995 		tsc_frequency = 0;
996 	}
997 
998 skip_rtc_based:
999 	if (tsc_present && tsc_frequency == 0) {
1000 		u_int cnt;
1001 		u_int64_t cputime_latency_tsc = 0, max = 0, min = 0;
1002 		int i;
1003 
1004 		for (i = 0; i < 10; i++) {
1005 			/* Warm up */
1006 			(void)sys_cputimer->count();
1007 		}
1008 		for (i = 0; i < 100; i++) {
1009 			u_int64_t old_tsc, new_tsc;
1010 
1011 			old_tsc = rdtsc_ordered();
1012 			(void)sys_cputimer->count();
1013 			new_tsc = rdtsc_ordered();
1014 			cputime_latency_tsc += (new_tsc - old_tsc);
1015 			if (max < (new_tsc - old_tsc))
1016 				max = new_tsc - old_tsc;
1017 			if (min == 0 || min > (new_tsc - old_tsc))
1018 				min = new_tsc - old_tsc;
1019 		}
1020 		cputime_latency_tsc /= 100;
1021 		kprintf(
1022 		    "Timer latency (in TSC ticks): %lu min=%lu max=%lu\n",
1023 		    cputime_latency_tsc, min, max);
1024 		/* XXX Instead of this, properly filter out outliers. */
1025 		cputime_latency_tsc = min;
1026 
1027 		if (calibrate_test > 0) {
1028 			u_int64_t values[20], avg = 0;
1029 			for (i = 1; i <= 20; i++) {
1030 				u_int64_t freq;
1031 
1032 				freq = do_calibrate_cputimer(i * 100 * 1000,
1033 				    cputime_latency_tsc);
1034 				values[i - 1] = freq;
1035 			}
1036 			/* Compute an average TSC for the 1s to 2s delays. */
1037 			for (i = 10; i < 20; i++)
1038 				avg += values[i];
1039 			avg /= 10;
1040 			for (i = 0; i < 20; i++) {
1041 				kprintf("%ums: %lu (Diff from average: %ld)\n",
1042 				    (i + 1) * 100, values[i],
1043 				    (int64_t)(values[i] - avg));
1044 			}
1045 		}
1046 
1047 		if (calibrate_tsc_fast > 0) {
1048 			/* HPET would typically be >10MHz */
1049 			if (sys_cputimer->freq >= 10000000)
1050 				cnt = 200000;
1051 			else
1052 				cnt = 500000;
1053 		} else {
1054 			cnt = 1000000;
1055 		}
1056 
1057 		tsc_frequency = do_calibrate_cputimer(cnt, cputime_latency_tsc);
1058 		if (bootverbose && calibrate_timers_with_rtc) {
1059 			kprintf("TSC clock: %jd Hz (Method B)\n",
1060 			    (intmax_t)tsc_frequency);
1061 		}
1062 	}
1063 
1064 done:
1065 	if (tsc_present) {
1066 		kprintf("TSC%s clock: %jd Hz\n",
1067 		    tsc_invariant ? " invariant" : "",
1068 		    (intmax_t)tsc_frequency);
1069 	}
1070 	tsc_oneus_approx = ((tsc_frequency|1) + 999999) / 1000000;
1071 
1072 	EVENTHANDLER_REGISTER(shutdown_post_sync, resettodr_on_shutdown,
1073 			      NULL, SHUTDOWN_PRI_LAST);
1074 }
1075 
1076 /*
1077  * Sync the time of day back to the RTC on shutdown, but only if
1078  * we have already loaded it and have not crashed.
1079  */
1080 static void
1081 resettodr_on_shutdown(void *arg __unused)
1082 {
1083  	if (rtc_loaded && panicstr == NULL) {
1084 		resettodr();
1085 	}
1086 }
1087 
1088 /*
1089  * Initialize the time of day register, based on the time base which is, e.g.
1090  * from a filesystem.
1091  */
1092 void
1093 inittodr(time_t base)
1094 {
1095 	unsigned long	sec, days;
1096 	int		year, month;
1097 	int		y, m;
1098 	struct timespec ts;
1099 
1100 	if (base) {
1101 		ts.tv_sec = base;
1102 		ts.tv_nsec = 0;
1103 		set_timeofday(&ts);
1104 	}
1105 
1106 	/* Look if we have a RTC present and the time is valid */
1107 	if (!(rtcin(RTC_STATUSD) & RTCSD_PWR))
1108 		goto wrong_time;
1109 
1110 	/* wait for time update to complete */
1111 	/* If RTCSA_TUP is zero, we have at least 244us before next update */
1112 	crit_enter();
1113 	while (rtcin(RTC_STATUSA) & RTCSA_TUP) {
1114 		crit_exit();
1115 		crit_enter();
1116 	}
1117 
1118 	days = 0;
1119 #ifdef USE_RTC_CENTURY
1120 	year = readrtc(RTC_YEAR) + readrtc(RTC_CENTURY) * 100;
1121 #else
1122 	year = readrtc(RTC_YEAR) + 1900;
1123 	if (year < 1970)
1124 		year += 100;
1125 #endif
1126 	if (year < 1970) {
1127 		crit_exit();
1128 		goto wrong_time;
1129 	}
1130 	month = readrtc(RTC_MONTH);
1131 	for (m = 1; m < month; m++)
1132 		days += daysinmonth[m-1];
1133 	if ((month > 2) && LEAPYEAR(year))
1134 		days ++;
1135 	days += readrtc(RTC_DAY) - 1;
1136 	for (y = 1970; y < year; y++)
1137 		days += DAYSPERYEAR + LEAPYEAR(y);
1138 	sec = ((( days * 24 +
1139 		  readrtc(RTC_HRS)) * 60 +
1140 		  readrtc(RTC_MIN)) * 60 +
1141 		  readrtc(RTC_SEC));
1142 	/* sec now contains the number of seconds, since Jan 1 1970,
1143 	   in the local time zone */
1144 
1145 	sec += tz.tz_minuteswest * 60 + (wall_cmos_clock ? adjkerntz : 0);
1146 
1147 	y = (int)(time_second - sec);
1148 	if (y <= -2 || y >= 2) {
1149 		/* badly off, adjust it */
1150 		ts.tv_sec = sec;
1151 		ts.tv_nsec = 0;
1152 		set_timeofday(&ts);
1153 	}
1154 	rtc_loaded = 1;
1155 	crit_exit();
1156 	return;
1157 
1158 wrong_time:
1159 	kprintf("Invalid time in real time clock.\n");
1160 	kprintf("Check and reset the date immediately!\n");
1161 }
1162 
1163 /*
1164  * Write system time back to RTC
1165  */
1166 void
1167 resettodr(void)
1168 {
1169 	struct timeval tv;
1170 	unsigned long tm;
1171 	int m;
1172 	int y;
1173 
1174 	if (disable_rtc_set)
1175 		return;
1176 
1177 	microtime(&tv);
1178 	tm = tv.tv_sec;
1179 
1180 	crit_enter();
1181 	/* Disable RTC updates and interrupts. */
1182 	writertc(RTC_STATUSB, RTCSB_HALT | RTCSB_24HR);
1183 
1184 	/* Calculate local time to put in RTC */
1185 
1186 	tm -= tz.tz_minuteswest * 60 + (wall_cmos_clock ? adjkerntz : 0);
1187 
1188 	writertc(RTC_SEC, bin2bcd(tm%60)); tm /= 60;	/* Write back Seconds */
1189 	writertc(RTC_MIN, bin2bcd(tm%60)); tm /= 60;	/* Write back Minutes */
1190 	writertc(RTC_HRS, bin2bcd(tm%24)); tm /= 24;	/* Write back Hours   */
1191 
1192 	/* We have now the days since 01-01-1970 in tm */
1193 	writertc(RTC_WDAY, (tm+4)%7);			/* Write back Weekday */
1194 	for (y = 1970, m = DAYSPERYEAR + LEAPYEAR(y);
1195 	     tm >= m;
1196 	     y++,      m = DAYSPERYEAR + LEAPYEAR(y))
1197 	     tm -= m;
1198 
1199 	/* Now we have the years in y and the day-of-the-year in tm */
1200 	writertc(RTC_YEAR, bin2bcd(y%100));		/* Write back Year    */
1201 #ifdef USE_RTC_CENTURY
1202 	writertc(RTC_CENTURY, bin2bcd(y/100));		/* ... and Century    */
1203 #endif
1204 	for (m = 0; ; m++) {
1205 		int ml;
1206 
1207 		ml = daysinmonth[m];
1208 		if (m == 1 && LEAPYEAR(y))
1209 			ml++;
1210 		if (tm < ml)
1211 			break;
1212 		tm -= ml;
1213 	}
1214 
1215 	writertc(RTC_MONTH, bin2bcd(m + 1));            /* Write back Month   */
1216 	writertc(RTC_DAY, bin2bcd(tm + 1));             /* Write back Month Day */
1217 
1218 	/* Reenable RTC updates and interrupts. */
1219 	writertc(RTC_STATUSB, rtc_statusb);
1220 	crit_exit();
1221 }
1222 
1223 static int
1224 i8254_ioapic_trial(int irq, struct cputimer_intr *cti)
1225 {
1226 	sysclock_t base;
1227 	long lastcnt;
1228 
1229 	/*
1230 	 * Following code assumes the 8254 is the cpu timer,
1231 	 * so make sure it is.
1232 	 */
1233 	KKASSERT(sys_cputimer == &i8254_cputimer);
1234 	KKASSERT(cti == &i8254_cputimer_intr);
1235 
1236 	lastcnt = get_interrupt_counter(irq, mycpuid);
1237 
1238 	/*
1239 	 * Force an 8254 Timer0 interrupt and wait 1/100s for
1240 	 * it to happen, then see if we got it.
1241 	 */
1242 	kprintf("IOAPIC: testing 8254 interrupt delivery\n");
1243 
1244 	i8254_intr_reload(cti, 2);
1245 	base = sys_cputimer->count();
1246 	while (sys_cputimer->count() - base < sys_cputimer->freq / 100)
1247 		; /* nothing */
1248 
1249 	if (get_interrupt_counter(irq, mycpuid) - lastcnt == 0)
1250 		return ENOENT;
1251 	return 0;
1252 }
1253 
1254 /*
1255  * Start both clocks running.  DragonFly note: the stat clock is no longer
1256  * used.  Instead, 8254 based systimers are used for all major clock
1257  * interrupts.
1258  */
1259 static void
1260 i8254_intr_initclock(struct cputimer_intr *cti, boolean_t selected)
1261 {
1262 	void *clkdesc = NULL;
1263 	int irq = 0, mixed_mode = 0, error;
1264 
1265 	KKASSERT(mycpuid == 0);
1266 
1267 	if (!selected && i8254_intr_disable)
1268 		goto nointr;
1269 
1270 	/*
1271 	 * The stat interrupt mask is different without the
1272 	 * statistics clock.  Also, don't set the interrupt
1273 	 * flag which would normally cause the RTC to generate
1274 	 * interrupts.
1275 	 */
1276 	rtc_statusb = RTCSB_24HR;
1277 
1278 	/* Finish initializing 8254 timer 0. */
1279 	if (ioapic_enable) {
1280 		irq = machintr_legacy_intr_find(0, INTR_TRIGGER_EDGE,
1281 			INTR_POLARITY_HIGH);
1282 		if (irq < 0) {
1283 mixed_mode_setup:
1284 			error = ioapic_conf_legacy_extint(0);
1285 			if (!error) {
1286 				irq = machintr_legacy_intr_find(0,
1287 				    INTR_TRIGGER_EDGE, INTR_POLARITY_HIGH);
1288 				if (irq < 0)
1289 					error = ENOENT;
1290 			}
1291 
1292 			if (error) {
1293 				if (!selected) {
1294 					kprintf("IOAPIC: setup mixed mode for "
1295 						"irq 0 failed: %d\n", error);
1296 					goto nointr;
1297 				} else {
1298 					panic("IOAPIC: setup mixed mode for "
1299 					      "irq 0 failed: %d\n", error);
1300 				}
1301 			}
1302 			mixed_mode = 1;
1303 		}
1304 		clkdesc = register_int(irq, clkintr, NULL, "clk",
1305 				       NULL,
1306 				       INTR_EXCL | INTR_CLOCK |
1307 				       INTR_NOPOLL | INTR_MPSAFE |
1308 				       INTR_NOENTROPY, 0);
1309 	} else {
1310 		register_int(0, clkintr, NULL, "clk", NULL,
1311 			     INTR_EXCL | INTR_CLOCK |
1312 			     INTR_NOPOLL | INTR_MPSAFE |
1313 			     INTR_NOENTROPY, 0);
1314 	}
1315 
1316 	/* Initialize RTC. */
1317 	writertc(RTC_STATUSA, rtc_statusa);
1318 	writertc(RTC_STATUSB, RTCSB_24HR);
1319 
1320 	if (ioapic_enable) {
1321 		error = i8254_ioapic_trial(irq, cti);
1322 		if (error) {
1323 			if (mixed_mode) {
1324 				if (!selected) {
1325 					kprintf("IOAPIC: mixed mode for irq %d "
1326 						"trial failed: %d\n",
1327 						irq, error);
1328 					goto nointr;
1329 				} else {
1330 					panic("IOAPIC: mixed mode for irq %d "
1331 					      "trial failed: %d\n", irq, error);
1332 				}
1333 			} else {
1334 				kprintf("IOAPIC: warning 8254 is not connected "
1335 					"to the correct pin, try mixed mode\n");
1336 				unregister_int(clkdesc, 0);
1337 				goto mixed_mode_setup;
1338 			}
1339 		}
1340 	}
1341 	return;
1342 
1343 nointr:
1344 	i8254_nointr = 1; /* don't try to register again */
1345 	cputimer_intr_deregister(cti);
1346 }
1347 
1348 void
1349 setstatclockrate(int newhz)
1350 {
1351 	if (newhz == RTC_PROFRATE)
1352 		rtc_statusa = RTCSA_DIVIDER | RTCSA_PROF;
1353 	else
1354 		rtc_statusa = RTCSA_DIVIDER | RTCSA_NOPROF;
1355 	writertc(RTC_STATUSA, rtc_statusa);
1356 }
1357 
1358 #if 0
1359 static unsigned
1360 tsc_get_timecount(struct timecounter *tc)
1361 {
1362 	return (rdtsc());
1363 }
1364 #endif
1365 
1366 #ifdef KERN_TIMESTAMP
1367 #define KERN_TIMESTAMP_SIZE 16384
1368 static u_long tsc[KERN_TIMESTAMP_SIZE] ;
1369 SYSCTL_OPAQUE(_debug, OID_AUTO, timestamp, CTLFLAG_RD, tsc,
1370 	sizeof(tsc), "LU", "Kernel timestamps");
1371 void
1372 _TSTMP(u_int32_t x)
1373 {
1374 	static int i;
1375 
1376 	tsc[i] = (u_int32_t)rdtsc();
1377 	tsc[i+1] = x;
1378 	i = i + 2;
1379 	if (i >= KERN_TIMESTAMP_SIZE)
1380 		i = 0;
1381 	tsc[i] = 0; /* mark last entry */
1382 }
1383 #endif /* KERN_TIMESTAMP */
1384 
1385 /*
1386  *
1387  */
1388 
1389 static int
1390 hw_i8254_timestamp(SYSCTL_HANDLER_ARGS)
1391 {
1392     sysclock_t count;
1393     uint64_t tscval;
1394     char buf[32];
1395 
1396     crit_enter();
1397     if (sys_cputimer == &i8254_cputimer)
1398 	count = sys_cputimer->count();
1399     else
1400 	count = 0;
1401     if (tsc_present)
1402 	tscval = rdtsc();
1403     else
1404 	tscval = 0;
1405     crit_exit();
1406     ksnprintf(buf, sizeof(buf), "%08x %016llx", count, (long long)tscval);
1407     return(SYSCTL_OUT(req, buf, strlen(buf) + 1));
1408 }
1409 
1410 struct tsc_mpsync_info {
1411 	volatile int		tsc_ready_cnt;
1412 	volatile int		tsc_done_cnt;
1413 	volatile int		tsc_command;
1414 	volatile int		unused01[5];
1415 	struct {
1416 		uint64_t	v;
1417 		uint64_t	unused02;
1418 	} tsc_saved[MAXCPU];
1419 } __cachealign;
1420 
1421 #if 0
1422 static void
1423 tsc_mpsync_test_loop(struct tsc_mpsync_thr *info)
1424 {
1425 	struct globaldata *gd = mycpu;
1426 	tsc_uclock_t test_end, test_begin;
1427 	u_int i;
1428 
1429 	if (bootverbose) {
1430 		kprintf("cpu%d: TSC testing MP synchronization ...\n",
1431 		    gd->gd_cpuid);
1432 	}
1433 
1434 	test_begin = rdtsc_ordered();
1435 	/* Run test for 100ms */
1436 	test_end = test_begin + (tsc_frequency / 10);
1437 
1438 	arg->tsc_mpsync = 1;
1439 	arg->tsc_target = test_begin;
1440 
1441 #define TSC_TEST_TRYMAX		1000000	/* Make sure we could stop */
1442 #define TSC_TEST_TRYMIN		50000
1443 
1444 	for (i = 0; i < TSC_TEST_TRYMAX; ++i) {
1445 		struct lwkt_cpusync cs;
1446 
1447 		crit_enter();
1448 		lwkt_cpusync_init(&cs, gd->gd_other_cpus,
1449 		    tsc_mpsync_test_remote, arg);
1450 		lwkt_cpusync_interlock(&cs);
1451 		cpu_pause();
1452 		arg->tsc_target = rdtsc_ordered();
1453 		cpu_mfence();
1454 		lwkt_cpusync_deinterlock(&cs);
1455 		crit_exit();
1456 		cpu_pause();
1457 
1458 		if (!arg->tsc_mpsync) {
1459 			kprintf("cpu%d: TSC is not MP synchronized @%u\n",
1460 			    gd->gd_cpuid, i);
1461 			break;
1462 		}
1463 		if (arg->tsc_target > test_end && i >= TSC_TEST_TRYMIN)
1464 			break;
1465 	}
1466 
1467 #undef TSC_TEST_TRYMIN
1468 #undef TSC_TEST_TRYMAX
1469 
1470 	if (arg->tsc_target == test_begin) {
1471 		kprintf("cpu%d: TSC does not tick?!\n", gd->gd_cpuid);
1472 		/* XXX disable TSC? */
1473 		tsc_invariant = 0;
1474 		arg->tsc_mpsync = 0;
1475 		return;
1476 	}
1477 
1478 	if (arg->tsc_mpsync && bootverbose) {
1479 		kprintf("cpu%d: TSC is MP synchronized after %u tries\n",
1480 		    gd->gd_cpuid, i);
1481 	}
1482 }
1483 
1484 #endif
1485 
1486 #define TSC_TEST_COUNT		50000
1487 
1488 static void
1489 tsc_mpsync_ap_thread(void *xinfo)
1490 {
1491 	struct tsc_mpsync_info *info = xinfo;
1492 	int cpu = mycpuid;
1493 	int i;
1494 
1495 	/*
1496 	 * Tell main loop that we are ready and wait for initiation
1497 	 */
1498 	atomic_add_int(&info->tsc_ready_cnt, 1);
1499 	while (info->tsc_command == 0) {
1500 		lwkt_force_switch();
1501 	}
1502 
1503 	/*
1504 	 * Run test for 10000 loops or until tsc_done_cnt != 0 (another
1505 	 * cpu has finished its test), then increment done.
1506 	 */
1507 	crit_enter();
1508 	for (i = 0; i < TSC_TEST_COUNT && info->tsc_done_cnt == 0; ++i) {
1509 		info->tsc_saved[cpu].v = rdtsc_ordered();
1510 	}
1511 	crit_exit();
1512 	atomic_add_int(&info->tsc_done_cnt, 1);
1513 
1514 	lwkt_exit();
1515 }
1516 
1517 static void
1518 tsc_mpsync_test(void)
1519 {
1520 	int cpu;
1521 	int try;
1522 
1523 	if (!tsc_invariant) {
1524 		/* Not even invariant TSC */
1525 		return;
1526 	}
1527 
1528 	if (ncpus == 1) {
1529 		/* Only one CPU */
1530 		tsc_mpsync = 1;
1531 		return;
1532 	}
1533 
1534 	/*
1535 	 * Forcing can be used w/qemu to reduce contention
1536 	 */
1537 	TUNABLE_INT_FETCH("hw.tsc_cputimer_force", &tsc_mpsync);
1538 
1539 	if (tsc_mpsync == 0) {
1540 		switch (cpu_vendor_id) {
1541 		case CPU_VENDOR_INTEL:
1542 			/*
1543 			 * Intel probably works
1544 			 */
1545 			break;
1546 
1547 		case CPU_VENDOR_AMD:
1548 			/*
1549 			 * For AMD 15h and 16h (i.e. The Bulldozer and Jaguar
1550 			 * architectures) we have to watch out for
1551 			 * Erratum 778:
1552 			 *     "Processor Core Time Stamp Counters May
1553 			 *      Experience Drift"
1554 			 * This Erratum is only listed for cpus in Family
1555 			 * 15h < Model 30h and for 16h < Model 30h.
1556 			 *
1557 			 * AMD < Bulldozer probably doesn't work
1558 			 */
1559 			if (CPUID_TO_FAMILY(cpu_id) == 0x15 ||
1560 			    CPUID_TO_FAMILY(cpu_id) == 0x16) {
1561 				if (CPUID_TO_MODEL(cpu_id) < 0x30)
1562 					return;
1563 			} else if (CPUID_TO_FAMILY(cpu_id) < 0x17) {
1564 				return;
1565 			}
1566 			break;
1567 
1568 		default:
1569 			/* probably won't work */
1570 			return;
1571 		}
1572 	} else if (tsc_mpsync < 0) {
1573 		kprintf("TSC MP synchronization test is disabled\n");
1574 		tsc_mpsync = 0;
1575 		return;
1576 	}
1577 
1578 	/*
1579 	 * Test even if forced to 1 above.  If forced, we will use the TSC
1580 	 * even if the test fails.  (set forced to -1 to disable entirely).
1581 	 */
1582 	kprintf("TSC testing MP synchronization ...\n");
1583 
1584 	/*
1585 	 * Test TSC MP synchronization on APs.  Try up to 4 times.
1586 	 */
1587 	for (try = 0; try < 4; ++try) {
1588 		struct tsc_mpsync_info info;
1589 		uint64_t last;
1590 		int64_t xdelta;
1591 		int64_t delta;
1592 
1593 		bzero(&info, sizeof(info));
1594 
1595 		for (cpu = 0; cpu < ncpus; ++cpu) {
1596 			thread_t td;
1597 			lwkt_create(tsc_mpsync_ap_thread, &info, &td,
1598 				    NULL, TDF_NOSTART, cpu,
1599 				    "tsc mpsync %d", cpu);
1600 			lwkt_setpri_initial(td, curthread->td_pri);
1601 			lwkt_schedule(td);
1602 		}
1603 		while (info.tsc_ready_cnt != ncpus)
1604 			lwkt_force_switch();
1605 
1606 		/*
1607 		 * All threads are ready, start the test and wait for
1608 		 * completion.
1609 		 */
1610 		info.tsc_command = 1;
1611 		while (info.tsc_done_cnt != ncpus)
1612 			lwkt_force_switch();
1613 
1614 		/*
1615 		 * Process results
1616 		 */
1617 		last = info.tsc_saved[0].v;
1618 		delta = 0;
1619 		for (cpu = 0; cpu < ncpus; ++cpu) {
1620 			xdelta = (int64_t)(info.tsc_saved[cpu].v - last);
1621 			last = info.tsc_saved[cpu].v;
1622 			if (xdelta < 0)
1623 				xdelta = -xdelta;
1624 			delta += xdelta;
1625 
1626 		}
1627 
1628 		/*
1629 		 * Result from attempt.  If its too wild just stop now.
1630 		 * Also break out if we succeed, no need to try further.
1631 		 */
1632 		kprintf("TSC MPSYNC TEST %jd %d -> %jd (10uS=%jd)\n",
1633 			delta, ncpus, delta / ncpus,
1634 			tsc_frequency / 100000);
1635 		if (delta / ncpus > tsc_frequency / 100)
1636 			break;
1637 		if (delta / ncpus < tsc_frequency / 100000) {
1638 			tsc_mpsync = 1;
1639 			break;
1640 		}
1641 	}
1642 
1643 	if (tsc_mpsync)
1644 		kprintf("TSC is MP synchronized\n");
1645 	else
1646 		kprintf("TSC is not MP synchronized\n");
1647 }
1648 SYSINIT(tsc_mpsync, SI_BOOT2_FINISH_SMP, SI_ORDER_ANY, tsc_mpsync_test, NULL);
1649 
1650 #define TSC_CPUTIMER_FREQMAX	128000000	/* 128Mhz */
1651 
1652 static int tsc_cputimer_shift;
1653 
1654 static void
1655 tsc_cputimer_construct(struct cputimer *timer, sysclock_t oldclock)
1656 {
1657 	timer->base = 0;
1658 	timer->base = oldclock - timer->count();
1659 }
1660 
1661 static __inline sysclock_t
1662 tsc_cputimer_count(void)
1663 {
1664 	uint64_t tsc;
1665 
1666 	tsc = rdtsc();
1667 	tsc >>= tsc_cputimer_shift;
1668 
1669 	return (tsc + tsc_cputimer.base);
1670 }
1671 
1672 static sysclock_t
1673 tsc_cputimer_count_lfence(void)
1674 {
1675 	cpu_lfence();
1676 	return tsc_cputimer_count();
1677 }
1678 
1679 static sysclock_t
1680 tsc_cputimer_count_mfence(void)
1681 {
1682 	cpu_mfence();
1683 	return tsc_cputimer_count();
1684 }
1685 
1686 static uint64_t
1687 tsc_cpucounter_count_lfence(void)
1688 {
1689 
1690 	cpu_lfence();
1691 	return (rdtsc());
1692 }
1693 
1694 static uint64_t
1695 tsc_cpucounter_count_mfence(void)
1696 {
1697 
1698 	cpu_mfence();
1699 	return (rdtsc());
1700 }
1701 
1702 static void
1703 tsc_cputimer_register(void)
1704 {
1705 	uint64_t freq;
1706 	int enable = 1;
1707 
1708 	if (!tsc_mpsync) {
1709 		if (tsc_invariant) {
1710 			/* Per-cpu cpucounter still works. */
1711 			goto regcnt;
1712 		}
1713 		return;
1714 	}
1715 
1716 	TUNABLE_INT_FETCH("hw.tsc_cputimer_enable", &enable);
1717 	if (!enable)
1718 		return;
1719 
1720 	freq = tsc_frequency;
1721 	while (freq > TSC_CPUTIMER_FREQMAX) {
1722 		freq >>= 1;
1723 		++tsc_cputimer_shift;
1724 	}
1725 	kprintf("TSC: cputimer freq %ju, shift %d\n",
1726 	    (uintmax_t)freq, tsc_cputimer_shift);
1727 
1728 	tsc_cputimer.freq = freq;
1729 
1730 	if (cpu_vendor_id == CPU_VENDOR_INTEL)
1731 		tsc_cputimer.count = tsc_cputimer_count_lfence;
1732 	else
1733 		tsc_cputimer.count = tsc_cputimer_count_mfence; /* safe bet */
1734 
1735 	cputimer_register(&tsc_cputimer);
1736 	cputimer_select(&tsc_cputimer, 0);
1737 
1738 	tsc_cpucounter.flags |= CPUCOUNTER_FLAG_MPSYNC;
1739 regcnt:
1740 	tsc_cpucounter.freq = tsc_frequency;
1741 	if (cpu_vendor_id == CPU_VENDOR_INTEL) {
1742 		tsc_cpucounter.count =
1743 		    tsc_cpucounter_count_lfence;
1744 	} else {
1745 		tsc_cpucounter.count =
1746 		    tsc_cpucounter_count_mfence; /* safe bet */
1747 	}
1748 	cpucounter_register(&tsc_cpucounter);
1749 }
1750 SYSINIT(tsc_cputimer_reg, SI_BOOT2_POST_SMP, SI_ORDER_FIRST,
1751 	tsc_cputimer_register, NULL);
1752 
1753 SYSCTL_NODE(_hw, OID_AUTO, i8254, CTLFLAG_RW, 0, "I8254");
1754 SYSCTL_UINT(_hw_i8254, OID_AUTO, freq, CTLFLAG_RD, &i8254_cputimer.freq, 0,
1755 	    "frequency");
1756 SYSCTL_PROC(_hw_i8254, OID_AUTO, timestamp, CTLTYPE_STRING|CTLFLAG_RD,
1757 	    0, 0, hw_i8254_timestamp, "A", "");
1758 
1759 SYSCTL_INT(_hw, OID_AUTO, tsc_present, CTLFLAG_RD,
1760 	    &tsc_present, 0, "TSC Available");
1761 SYSCTL_INT(_hw, OID_AUTO, tsc_invariant, CTLFLAG_RD,
1762 	    &tsc_invariant, 0, "Invariant TSC");
1763 SYSCTL_INT(_hw, OID_AUTO, tsc_mpsync, CTLFLAG_RD,
1764 	    &tsc_mpsync, 0, "TSC is synchronized across CPUs");
1765 SYSCTL_QUAD(_hw, OID_AUTO, tsc_frequency, CTLFLAG_RD,
1766 	    &tsc_frequency, 0, "TSC Frequency");
1767