xref: /onnv-gate/usr/src/uts/i86pc/os/timestamp.c (revision 0:68f95e015346)
1*0Sstevel@tonic-gate /*
2*0Sstevel@tonic-gate  * CDDL HEADER START
3*0Sstevel@tonic-gate  *
4*0Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
5*0Sstevel@tonic-gate  * Common Development and Distribution License, Version 1.0 only
6*0Sstevel@tonic-gate  * (the "License").  You may not use this file except in compliance
7*0Sstevel@tonic-gate  * with the License.
8*0Sstevel@tonic-gate  *
9*0Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10*0Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
11*0Sstevel@tonic-gate  * See the License for the specific language governing permissions
12*0Sstevel@tonic-gate  * and limitations under the License.
13*0Sstevel@tonic-gate  *
14*0Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
15*0Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16*0Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
17*0Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
18*0Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
19*0Sstevel@tonic-gate  *
20*0Sstevel@tonic-gate  * CDDL HEADER END
21*0Sstevel@tonic-gate  */
22*0Sstevel@tonic-gate /*
23*0Sstevel@tonic-gate  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24*0Sstevel@tonic-gate  * Use is subject to license terms.
25*0Sstevel@tonic-gate  */
26*0Sstevel@tonic-gate 
27*0Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
28*0Sstevel@tonic-gate 
29*0Sstevel@tonic-gate #include <sys/types.h>
30*0Sstevel@tonic-gate #include <sys/param.h>
31*0Sstevel@tonic-gate #include <sys/systm.h>
32*0Sstevel@tonic-gate #include <sys/disp.h>
33*0Sstevel@tonic-gate #include <sys/var.h>
34*0Sstevel@tonic-gate #include <sys/cmn_err.h>
35*0Sstevel@tonic-gate #include <sys/debug.h>
36*0Sstevel@tonic-gate #include <sys/x86_archext.h>
37*0Sstevel@tonic-gate #include <sys/archsystm.h>
38*0Sstevel@tonic-gate #include <sys/cpuvar.h>
39*0Sstevel@tonic-gate #include <sys/psm_defs.h>
40*0Sstevel@tonic-gate #include <sys/clock.h>
41*0Sstevel@tonic-gate #include <sys/atomic.h>
42*0Sstevel@tonic-gate #include <sys/lockstat.h>
43*0Sstevel@tonic-gate #include <sys/smp_impldefs.h>
44*0Sstevel@tonic-gate #include <sys/dtrace.h>
45*0Sstevel@tonic-gate #include <sys/time.h>
46*0Sstevel@tonic-gate 
47*0Sstevel@tonic-gate /*
48*0Sstevel@tonic-gate  * Using the Pentium's TSC register for gethrtime()
49*0Sstevel@tonic-gate  * ------------------------------------------------
50*0Sstevel@tonic-gate  *
51*0Sstevel@tonic-gate  * The Pentium family, like many chip architectures, has a high-resolution
52*0Sstevel@tonic-gate  * timestamp counter ("TSC") which increments once per CPU cycle.  The contents
53*0Sstevel@tonic-gate  * of the timestamp counter are read with the RDTSC instruction.
54*0Sstevel@tonic-gate  *
55*0Sstevel@tonic-gate  * As with its UltraSPARC equivalent (the %tick register), TSC's cycle count
56*0Sstevel@tonic-gate  * must be translated into nanoseconds in order to implement gethrtime().
57*0Sstevel@tonic-gate  * We avoid inducing floating point operations in this conversion by
58*0Sstevel@tonic-gate  * implementing the same nsec_scale algorithm as that found in the sun4u
59*0Sstevel@tonic-gate  * platform code.  The sun4u NATIVE_TIME_TO_NSEC_SCALE block comment contains
60*0Sstevel@tonic-gate  * a detailed description of the algorithm; the comment is not reproduced
61*0Sstevel@tonic-gate  * here.  This implementation differs only in its value for NSEC_SHIFT:
62*0Sstevel@tonic-gate  * we implement an NSEC_SHIFT of 5 (instead of sun4u's 4) to allow for
63*0Sstevel@tonic-gate  * 60 MHz Pentiums.
64*0Sstevel@tonic-gate  *
65*0Sstevel@tonic-gate  * While TSC and %tick are both cycle counting registers, TSC's functionality
66*0Sstevel@tonic-gate  * falls short in several critical ways:
67*0Sstevel@tonic-gate  *
68*0Sstevel@tonic-gate  *  (a)	TSCs on different CPUs are not guaranteed to be in sync.  While in
69*0Sstevel@tonic-gate  *	practice they often _are_ in sync, this isn't guaranteed by the
70*0Sstevel@tonic-gate  *	architecture.
71*0Sstevel@tonic-gate  *
72*0Sstevel@tonic-gate  *  (b)	The TSC cannot be reliably set to an arbitrary value.  The architecture
73*0Sstevel@tonic-gate  *	only supports writing the low 32-bits of TSC, making it impractical
74*0Sstevel@tonic-gate  *	to rewrite.
75*0Sstevel@tonic-gate  *
76*0Sstevel@tonic-gate  *  (c)	The architecture doesn't have the capacity to interrupt based on
77*0Sstevel@tonic-gate  *	arbitrary values of TSC; there is no TICK_CMPR equivalent.
78*0Sstevel@tonic-gate  *
79*0Sstevel@tonic-gate  * Together, (a) and (b) imply that software must track the skew between
80*0Sstevel@tonic-gate  * TSCs and account for it (it is assumed that while there may exist skew,
81*0Sstevel@tonic-gate  * there does not exist drift).  To determine the skew between CPUs, we
82*0Sstevel@tonic-gate  * have newly onlined CPUs call tsc_sync_slave(), while the CPU performing
83*0Sstevel@tonic-gate  * the online operation calls tsc_sync_master().  Once both CPUs are ready,
84*0Sstevel@tonic-gate  * the master sets a shared flag, and each reads its TSC register.  To reduce
85*0Sstevel@tonic-gate  * bias, we then wait until both CPUs are ready again, but this time the
86*0Sstevel@tonic-gate  * slave sets the shared flag, and each reads its TSC register again. The
87*0Sstevel@tonic-gate  * master compares the average of the two sample values, and, if observable
88*0Sstevel@tonic-gate  * skew is found, changes the gethrtimef function pointer to point to a
89*0Sstevel@tonic-gate  * gethrtime() implementation which will take the discovered skew into
90*0Sstevel@tonic-gate  * consideration.
91*0Sstevel@tonic-gate  *
92*0Sstevel@tonic-gate  * In the absence of time-of-day clock adjustments, gethrtime() must stay in
93*0Sstevel@tonic-gate  * sync with gettimeofday().  This is problematic; given (c), the software
94*0Sstevel@tonic-gate  * cannot drive its time-of-day source from TSC, and yet they must somehow be
95*0Sstevel@tonic-gate  * kept in sync.  We implement this by having a routine, tsc_tick(), which
96*0Sstevel@tonic-gate  * is called once per second from the interrupt which drives time-of-day.
97*0Sstevel@tonic-gate  * tsc_tick() recalculates nsec_scale based on the number of the CPU cycles
98*0Sstevel@tonic-gate  * since boot versus the number of seconds since boot.  This algorithm
99*0Sstevel@tonic-gate  * becomes more accurate over time and converges quickly; the error in
100*0Sstevel@tonic-gate  * nsec_scale is typically under 1 ppm less than 10 seconds after boot, and
101*0Sstevel@tonic-gate  * is less than 100 ppb 1 minute after boot.
102*0Sstevel@tonic-gate  *
103*0Sstevel@tonic-gate  * Note that the hrtime base for gethrtime, tsc_hrtime_base, is modified
104*0Sstevel@tonic-gate  * atomically with nsec_scale under CLOCK_LOCK.  This assures that time
105*0Sstevel@tonic-gate  * monotonically increases.
106*0Sstevel@tonic-gate  */
107*0Sstevel@tonic-gate 
108*0Sstevel@tonic-gate #define	NSEC_SHIFT 5
109*0Sstevel@tonic-gate 
110*0Sstevel@tonic-gate static uint_t nsec_scale;
111*0Sstevel@tonic-gate 
112*0Sstevel@tonic-gate /*
113*0Sstevel@tonic-gate  * These two variables used to be grouped together inside of a structure that
114*0Sstevel@tonic-gate  * lived on a single cache line. A regression (bug ID 4623398) caused the
115*0Sstevel@tonic-gate  * compiler to emit code that "optimized" away the while-loops below. The
116*0Sstevel@tonic-gate  * result was that no synchronization between the onlining and onlined CPUs
117*0Sstevel@tonic-gate  * took place.
118*0Sstevel@tonic-gate  */
119*0Sstevel@tonic-gate static volatile int tsc_ready;
120*0Sstevel@tonic-gate static volatile int tsc_sync_go;
121*0Sstevel@tonic-gate 
122*0Sstevel@tonic-gate /*
123*0Sstevel@tonic-gate  * Used as indices into the tsc_sync_snaps[] array.
124*0Sstevel@tonic-gate  */
125*0Sstevel@tonic-gate #define	TSC_MASTER		0
126*0Sstevel@tonic-gate #define	TSC_SLAVE		1
127*0Sstevel@tonic-gate 
128*0Sstevel@tonic-gate /*
129*0Sstevel@tonic-gate  * Used in the tsc_master_sync()/tsc_slave_sync() rendezvous.
130*0Sstevel@tonic-gate  */
131*0Sstevel@tonic-gate #define	TSC_SYNC_STOP		1
132*0Sstevel@tonic-gate #define	TSC_SYNC_GO		2
133*0Sstevel@tonic-gate #define	TSC_SYNC_AGAIN		3
134*0Sstevel@tonic-gate 
135*0Sstevel@tonic-gate /*
136*0Sstevel@tonic-gate  * XX64	Is the faster way to do this with a 64-bit ABI?
137*0Sstevel@tonic-gate  */
138*0Sstevel@tonic-gate #define	TSC_CONVERT_AND_ADD(tsc, hrt, scale) { \
139*0Sstevel@tonic-gate 	unsigned int *_l = (unsigned int *)&(tsc); \
140*0Sstevel@tonic-gate 	(hrt) += mul32(_l[1], scale) << NSEC_SHIFT; \
141*0Sstevel@tonic-gate 	(hrt) += mul32(_l[0], scale) >> (32 - NSEC_SHIFT); \
142*0Sstevel@tonic-gate }
143*0Sstevel@tonic-gate 
144*0Sstevel@tonic-gate #define	TSC_CONVERT(tsc, hrt, scale) { \
145*0Sstevel@tonic-gate 	unsigned int *_l = (unsigned int *)&(tsc); \
146*0Sstevel@tonic-gate 	(hrt) = mul32(_l[1], scale) << NSEC_SHIFT; \
147*0Sstevel@tonic-gate 	(hrt) += mul32(_l[0], scale) >> (32 - NSEC_SHIFT); \
148*0Sstevel@tonic-gate }
149*0Sstevel@tonic-gate 
150*0Sstevel@tonic-gate 
151*0Sstevel@tonic-gate 
152*0Sstevel@tonic-gate static int	tsc_max_delta;
153*0Sstevel@tonic-gate static hrtime_t tsc_sync_snaps[2];
154*0Sstevel@tonic-gate static hrtime_t tsc_sync_delta[NCPU];
155*0Sstevel@tonic-gate static hrtime_t tsc_sync_tick_delta[NCPU];
156*0Sstevel@tonic-gate static hrtime_t	tsc_last = 0;
157*0Sstevel@tonic-gate static hrtime_t	tsc_last_jumped = 0;
158*0Sstevel@tonic-gate static hrtime_t	tsc_hrtime_base = 0;
159*0Sstevel@tonic-gate static int	tsc_jumped = 0;
160*0Sstevel@tonic-gate 
161*0Sstevel@tonic-gate static hrtime_t	shadow_tsc_hrtime_base;
162*0Sstevel@tonic-gate static hrtime_t	shadow_tsc_last;
163*0Sstevel@tonic-gate static uint_t	shadow_nsec_scale;
164*0Sstevel@tonic-gate static uint32_t	shadow_hres_lock;
165*0Sstevel@tonic-gate 
166*0Sstevel@tonic-gate /*
167*0Sstevel@tonic-gate  * Called by the master after the sync operation is complete.  If the
168*0Sstevel@tonic-gate  * slave is discovered to lag, gethrtimef will be changed to point to
169*0Sstevel@tonic-gate  * tsc_gethrtime_delta().
170*0Sstevel@tonic-gate  */
171*0Sstevel@tonic-gate static void
172*0Sstevel@tonic-gate tsc_digest(processorid_t target)
173*0Sstevel@tonic-gate {
174*0Sstevel@tonic-gate 	hrtime_t tdelta, hdelta = 0;
175*0Sstevel@tonic-gate 	int max = tsc_max_delta;
176*0Sstevel@tonic-gate 	processorid_t source = CPU->cpu_id;
177*0Sstevel@tonic-gate 	int update;
178*0Sstevel@tonic-gate 
179*0Sstevel@tonic-gate 	update = tsc_sync_delta[source] != 0 ||
180*0Sstevel@tonic-gate 	    gethrtimef == tsc_gethrtime_delta;
181*0Sstevel@tonic-gate 
182*0Sstevel@tonic-gate 	/*
183*0Sstevel@tonic-gate 	 * We divide by 2 since each of the data points is the sum of two TSC
184*0Sstevel@tonic-gate 	 * reads; this takes the average of the two.
185*0Sstevel@tonic-gate 	 */
186*0Sstevel@tonic-gate 	tdelta = (tsc_sync_snaps[TSC_SLAVE] - tsc_sync_snaps[TSC_MASTER]) / 2;
187*0Sstevel@tonic-gate 	if ((tdelta > max) || ((tdelta >= 0) && update)) {
188*0Sstevel@tonic-gate 		TSC_CONVERT_AND_ADD(tdelta, hdelta, nsec_scale);
189*0Sstevel@tonic-gate 		tsc_sync_delta[target] = tsc_sync_delta[source] - hdelta;
190*0Sstevel@tonic-gate 		tsc_sync_tick_delta[target] = -tdelta;
191*0Sstevel@tonic-gate 		gethrtimef = tsc_gethrtime_delta;
192*0Sstevel@tonic-gate 		gethrtimeunscaledf = tsc_gethrtimeunscaled_delta;
193*0Sstevel@tonic-gate 		return;
194*0Sstevel@tonic-gate 	}
195*0Sstevel@tonic-gate 
196*0Sstevel@tonic-gate 	tdelta = -tdelta;
197*0Sstevel@tonic-gate 	if ((tdelta > max) || update) {
198*0Sstevel@tonic-gate 		TSC_CONVERT_AND_ADD(tdelta, hdelta, nsec_scale);
199*0Sstevel@tonic-gate 		tsc_sync_delta[target] = tsc_sync_delta[source] + hdelta;
200*0Sstevel@tonic-gate 		tsc_sync_tick_delta[target] = tdelta;
201*0Sstevel@tonic-gate 		gethrtimef = tsc_gethrtime_delta;
202*0Sstevel@tonic-gate 		gethrtimeunscaledf = tsc_gethrtimeunscaled_delta;
203*0Sstevel@tonic-gate 	}
204*0Sstevel@tonic-gate 
205*0Sstevel@tonic-gate }
206*0Sstevel@tonic-gate 
207*0Sstevel@tonic-gate /*
208*0Sstevel@tonic-gate  * Called by a CPU which has just performed an online operation on another
209*0Sstevel@tonic-gate  * CPU.  It is expected that the newly onlined CPU will call tsc_sync_slave().
210*0Sstevel@tonic-gate  */
211*0Sstevel@tonic-gate void
212*0Sstevel@tonic-gate tsc_sync_master(processorid_t slave)
213*0Sstevel@tonic-gate {
214*0Sstevel@tonic-gate 	int flags;
215*0Sstevel@tonic-gate 	hrtime_t hrt;
216*0Sstevel@tonic-gate 
217*0Sstevel@tonic-gate 	ASSERT(tsc_sync_go != TSC_SYNC_GO);
218*0Sstevel@tonic-gate 
219*0Sstevel@tonic-gate 	flags = clear_int_flag();
220*0Sstevel@tonic-gate 
221*0Sstevel@tonic-gate 	/*
222*0Sstevel@tonic-gate 	 * Wait for the slave CPU to arrive.
223*0Sstevel@tonic-gate 	 */
224*0Sstevel@tonic-gate 	while (tsc_ready != TSC_SYNC_GO)
225*0Sstevel@tonic-gate 		continue;
226*0Sstevel@tonic-gate 
227*0Sstevel@tonic-gate 	/*
228*0Sstevel@tonic-gate 	 * Tell the slave CPU to begin reading its TSC; read our own.
229*0Sstevel@tonic-gate 	 */
230*0Sstevel@tonic-gate 	tsc_sync_go = TSC_SYNC_GO;
231*0Sstevel@tonic-gate 	hrt = tsc_read();
232*0Sstevel@tonic-gate 
233*0Sstevel@tonic-gate 	/*
234*0Sstevel@tonic-gate 	 * Tell the slave that we're ready, and wait for the slave to tell us
235*0Sstevel@tonic-gate 	 * to read our TSC again.
236*0Sstevel@tonic-gate 	 */
237*0Sstevel@tonic-gate 	tsc_ready = TSC_SYNC_AGAIN;
238*0Sstevel@tonic-gate 	while (tsc_sync_go != TSC_SYNC_AGAIN)
239*0Sstevel@tonic-gate 		continue;
240*0Sstevel@tonic-gate 
241*0Sstevel@tonic-gate 	hrt += tsc_read();
242*0Sstevel@tonic-gate 	tsc_sync_snaps[TSC_MASTER] = hrt;
243*0Sstevel@tonic-gate 
244*0Sstevel@tonic-gate 	/*
245*0Sstevel@tonic-gate 	 * Wait for the slave to finish reading its TSC.
246*0Sstevel@tonic-gate 	 */
247*0Sstevel@tonic-gate 	while (tsc_ready != TSC_SYNC_STOP)
248*0Sstevel@tonic-gate 		continue;
249*0Sstevel@tonic-gate 
250*0Sstevel@tonic-gate 	/*
251*0Sstevel@tonic-gate 	 * At this point, both CPUs have performed their tsc_read() calls.
252*0Sstevel@tonic-gate 	 * We'll digest it now before letting the slave CPU return.
253*0Sstevel@tonic-gate 	 */
254*0Sstevel@tonic-gate 	tsc_digest(slave);
255*0Sstevel@tonic-gate 	tsc_sync_go = TSC_SYNC_STOP;
256*0Sstevel@tonic-gate 
257*0Sstevel@tonic-gate 	restore_int_flag(flags);
258*0Sstevel@tonic-gate }
259*0Sstevel@tonic-gate 
260*0Sstevel@tonic-gate /*
261*0Sstevel@tonic-gate  * Called by a CPU which has just been onlined.  It is expected that the CPU
262*0Sstevel@tonic-gate  * performing the online operation will call tsc_sync_master().
263*0Sstevel@tonic-gate  */
264*0Sstevel@tonic-gate void
265*0Sstevel@tonic-gate tsc_sync_slave(void)
266*0Sstevel@tonic-gate {
267*0Sstevel@tonic-gate 	int flags;
268*0Sstevel@tonic-gate 	hrtime_t hrt;
269*0Sstevel@tonic-gate 
270*0Sstevel@tonic-gate 	ASSERT(tsc_sync_go != TSC_SYNC_GO);
271*0Sstevel@tonic-gate 
272*0Sstevel@tonic-gate 	flags = clear_int_flag();
273*0Sstevel@tonic-gate 
274*0Sstevel@tonic-gate 	/*
275*0Sstevel@tonic-gate 	 * Tell the master CPU that we're ready, and wait for the master to
276*0Sstevel@tonic-gate 	 * tell us to begin reading our TSC.
277*0Sstevel@tonic-gate 	 */
278*0Sstevel@tonic-gate 	tsc_ready = TSC_SYNC_GO;
279*0Sstevel@tonic-gate 	while (tsc_sync_go != TSC_SYNC_GO)
280*0Sstevel@tonic-gate 		continue;
281*0Sstevel@tonic-gate 
282*0Sstevel@tonic-gate 	hrt = tsc_read();
283*0Sstevel@tonic-gate 
284*0Sstevel@tonic-gate 	/*
285*0Sstevel@tonic-gate 	 * Wait for the master CPU to be ready to read its TSC again.
286*0Sstevel@tonic-gate 	 */
287*0Sstevel@tonic-gate 	while (tsc_ready != TSC_SYNC_AGAIN)
288*0Sstevel@tonic-gate 		continue;
289*0Sstevel@tonic-gate 
290*0Sstevel@tonic-gate 	/*
291*0Sstevel@tonic-gate 	 * Tell the master CPU to read its TSC again; read ours again.
292*0Sstevel@tonic-gate 	 */
293*0Sstevel@tonic-gate 	tsc_sync_go = TSC_SYNC_AGAIN;
294*0Sstevel@tonic-gate 
295*0Sstevel@tonic-gate 	hrt += tsc_read();
296*0Sstevel@tonic-gate 	tsc_sync_snaps[TSC_SLAVE] = hrt;
297*0Sstevel@tonic-gate 
298*0Sstevel@tonic-gate 	/*
299*0Sstevel@tonic-gate 	 * Tell the master that we're done, and wait to be dismissed.
300*0Sstevel@tonic-gate 	 */
301*0Sstevel@tonic-gate 	tsc_ready = TSC_SYNC_STOP;
302*0Sstevel@tonic-gate 	while (tsc_sync_go != TSC_SYNC_STOP)
303*0Sstevel@tonic-gate 		continue;
304*0Sstevel@tonic-gate 
305*0Sstevel@tonic-gate 	restore_int_flag(flags);
306*0Sstevel@tonic-gate }
307*0Sstevel@tonic-gate 
308*0Sstevel@tonic-gate void
309*0Sstevel@tonic-gate tsc_hrtimeinit(uint64_t cpu_freq_hz)
310*0Sstevel@tonic-gate {
311*0Sstevel@tonic-gate 	longlong_t tsc;
312*0Sstevel@tonic-gate 	int flags;
313*0Sstevel@tonic-gate 
314*0Sstevel@tonic-gate 	/*
315*0Sstevel@tonic-gate 	 * cpu_freq_hz is the measured cpu frequency in hertz
316*0Sstevel@tonic-gate 	 */
317*0Sstevel@tonic-gate 
318*0Sstevel@tonic-gate 	/*
319*0Sstevel@tonic-gate 	 * We can't accommodate CPUs slower than 31.25 MHz.
320*0Sstevel@tonic-gate 	 */
321*0Sstevel@tonic-gate 	ASSERT(cpu_freq_hz > NANOSEC / (1 << NSEC_SHIFT));
322*0Sstevel@tonic-gate 	nsec_scale =
323*0Sstevel@tonic-gate 	    (uint_t)
324*0Sstevel@tonic-gate 		(((uint64_t)NANOSEC << (32 - NSEC_SHIFT)) / cpu_freq_hz);
325*0Sstevel@tonic-gate 
326*0Sstevel@tonic-gate 	flags = clear_int_flag();
327*0Sstevel@tonic-gate 	tsc = tsc_read();
328*0Sstevel@tonic-gate 	(void) tsc_gethrtime();
329*0Sstevel@tonic-gate 	tsc_max_delta = tsc_read() - tsc;
330*0Sstevel@tonic-gate 	restore_int_flag(flags);
331*0Sstevel@tonic-gate }
332*0Sstevel@tonic-gate 
333*0Sstevel@tonic-gate /*
334*0Sstevel@tonic-gate  * Called once per second on CPU 0 from the cyclic subsystem's CY_HIGH_LEVEL
335*0Sstevel@tonic-gate  * interrupt.
336*0Sstevel@tonic-gate  */
337*0Sstevel@tonic-gate void
338*0Sstevel@tonic-gate tsc_tick(void)
339*0Sstevel@tonic-gate {
340*0Sstevel@tonic-gate 	hrtime_t now, delta;
341*0Sstevel@tonic-gate 	ushort_t spl;
342*0Sstevel@tonic-gate 
343*0Sstevel@tonic-gate 	/*
344*0Sstevel@tonic-gate 	 * Before we set the new variables, we set the shadow values.  This
345*0Sstevel@tonic-gate 	 * allows for lock free operation in dtrace_gethrtime().
346*0Sstevel@tonic-gate 	 */
347*0Sstevel@tonic-gate 	lock_set_spl((lock_t *)&shadow_hres_lock + HRES_LOCK_OFFSET,
348*0Sstevel@tonic-gate 	    ipltospl(CBE_HIGH_PIL), &spl);
349*0Sstevel@tonic-gate 
350*0Sstevel@tonic-gate 	shadow_tsc_hrtime_base = tsc_hrtime_base;
351*0Sstevel@tonic-gate 	shadow_tsc_last = tsc_last;
352*0Sstevel@tonic-gate 	shadow_nsec_scale = nsec_scale;
353*0Sstevel@tonic-gate 
354*0Sstevel@tonic-gate 	shadow_hres_lock++;
355*0Sstevel@tonic-gate 	splx(spl);
356*0Sstevel@tonic-gate 
357*0Sstevel@tonic-gate 	CLOCK_LOCK(&spl);
358*0Sstevel@tonic-gate 
359*0Sstevel@tonic-gate 	now = tsc_read();
360*0Sstevel@tonic-gate 
361*0Sstevel@tonic-gate 	if (now < tsc_last) {
362*0Sstevel@tonic-gate 		/*
363*0Sstevel@tonic-gate 		 * The TSC has just jumped into the past.  We assume that
364*0Sstevel@tonic-gate 		 * this is due to a suspend/resume cycle, and we're going
365*0Sstevel@tonic-gate 		 * to use the _current_ value of TSC as the delta.  This
366*0Sstevel@tonic-gate 		 * will keep tsc_hrtime_base correct.  We're also going to
367*0Sstevel@tonic-gate 		 * assume that rate of tsc does not change after a suspend
368*0Sstevel@tonic-gate 		 * resume (i.e nsec_scale remains the same).
369*0Sstevel@tonic-gate 		 */
370*0Sstevel@tonic-gate 		delta = now;
371*0Sstevel@tonic-gate 		tsc_last_jumped += tsc_last;
372*0Sstevel@tonic-gate 		tsc_jumped = 1;
373*0Sstevel@tonic-gate 	} else {
374*0Sstevel@tonic-gate 		/*
375*0Sstevel@tonic-gate 		 * Determine the number of TSC ticks since the last clock
376*0Sstevel@tonic-gate 		 * tick, and add that to the hrtime base.
377*0Sstevel@tonic-gate 		 */
378*0Sstevel@tonic-gate 		delta = now - tsc_last;
379*0Sstevel@tonic-gate 	}
380*0Sstevel@tonic-gate 
381*0Sstevel@tonic-gate 	TSC_CONVERT_AND_ADD(delta, tsc_hrtime_base, nsec_scale);
382*0Sstevel@tonic-gate 	tsc_last = now;
383*0Sstevel@tonic-gate 
384*0Sstevel@tonic-gate 	CLOCK_UNLOCK(spl);
385*0Sstevel@tonic-gate }
386*0Sstevel@tonic-gate 
387*0Sstevel@tonic-gate hrtime_t
388*0Sstevel@tonic-gate tsc_gethrtime(void)
389*0Sstevel@tonic-gate {
390*0Sstevel@tonic-gate 	uint32_t old_hres_lock;
391*0Sstevel@tonic-gate 	hrtime_t tsc, hrt;
392*0Sstevel@tonic-gate 
393*0Sstevel@tonic-gate 	do {
394*0Sstevel@tonic-gate 		old_hres_lock = hres_lock;
395*0Sstevel@tonic-gate 
396*0Sstevel@tonic-gate 		if ((tsc = tsc_read()) >= tsc_last) {
397*0Sstevel@tonic-gate 			/*
398*0Sstevel@tonic-gate 			 * It would seem to be obvious that this is true
399*0Sstevel@tonic-gate 			 * (that is, the past is less than the present),
400*0Sstevel@tonic-gate 			 * but it isn't true in the presence of suspend/resume
401*0Sstevel@tonic-gate 			 * cycles.  If we manage to call gethrtime()
402*0Sstevel@tonic-gate 			 * after a resume, but before the first call to
403*0Sstevel@tonic-gate 			 * tsc_tick(), we will see the jump.  In this case,
404*0Sstevel@tonic-gate 			 * we will simply use the value in TSC as the delta.
405*0Sstevel@tonic-gate 			 */
406*0Sstevel@tonic-gate 			tsc -= tsc_last;
407*0Sstevel@tonic-gate 		} else if (tsc >= tsc_last - 2*tsc_max_delta) {
408*0Sstevel@tonic-gate 			/*
409*0Sstevel@tonic-gate 			 * There is a chance that tsc_tick() has just run on
410*0Sstevel@tonic-gate 			 * another CPU, and we have drifted just enough so that
411*0Sstevel@tonic-gate 			 * we appear behind tsc_last.  In this case, force the
412*0Sstevel@tonic-gate 			 * delta to be zero.
413*0Sstevel@tonic-gate 			 */
414*0Sstevel@tonic-gate 			tsc = 0;
415*0Sstevel@tonic-gate 		}
416*0Sstevel@tonic-gate 
417*0Sstevel@tonic-gate 		hrt = tsc_hrtime_base;
418*0Sstevel@tonic-gate 
419*0Sstevel@tonic-gate 		TSC_CONVERT_AND_ADD(tsc, hrt, nsec_scale);
420*0Sstevel@tonic-gate 	} while ((old_hres_lock & ~1) != hres_lock);
421*0Sstevel@tonic-gate 
422*0Sstevel@tonic-gate 	return (hrt);
423*0Sstevel@tonic-gate }
424*0Sstevel@tonic-gate 
425*0Sstevel@tonic-gate /*
426*0Sstevel@tonic-gate  * This is similar to the above, but it cannot actually spin on hres_lock.
427*0Sstevel@tonic-gate  * As a result, it caches all of the variables it needs; if the variables
428*0Sstevel@tonic-gate  * don't change, it's done.
429*0Sstevel@tonic-gate  */
430*0Sstevel@tonic-gate hrtime_t
431*0Sstevel@tonic-gate dtrace_gethrtime(void)
432*0Sstevel@tonic-gate {
433*0Sstevel@tonic-gate 	uint32_t old_hres_lock;
434*0Sstevel@tonic-gate 	hrtime_t tsc, hrt;
435*0Sstevel@tonic-gate 
436*0Sstevel@tonic-gate 	do {
437*0Sstevel@tonic-gate 		old_hres_lock = hres_lock;
438*0Sstevel@tonic-gate 
439*0Sstevel@tonic-gate 		/*
440*0Sstevel@tonic-gate 		 * See the comments in tsc_gethrtime(), above.
441*0Sstevel@tonic-gate 		 */
442*0Sstevel@tonic-gate 		if ((tsc = tsc_read()) >= tsc_last)
443*0Sstevel@tonic-gate 			tsc -= tsc_last;
444*0Sstevel@tonic-gate 		else if (tsc >= tsc_last - 2*tsc_max_delta)
445*0Sstevel@tonic-gate 			tsc = 0;
446*0Sstevel@tonic-gate 
447*0Sstevel@tonic-gate 		hrt = tsc_hrtime_base;
448*0Sstevel@tonic-gate 
449*0Sstevel@tonic-gate 		TSC_CONVERT_AND_ADD(tsc, hrt, nsec_scale);
450*0Sstevel@tonic-gate 
451*0Sstevel@tonic-gate 		if ((old_hres_lock & ~1) == hres_lock)
452*0Sstevel@tonic-gate 			break;
453*0Sstevel@tonic-gate 
454*0Sstevel@tonic-gate 		/*
455*0Sstevel@tonic-gate 		 * If we're here, the clock lock is locked -- or it has been
456*0Sstevel@tonic-gate 		 * unlocked and locked since we looked.  This may be due to
457*0Sstevel@tonic-gate 		 * tsc_tick() running on another CPU -- or it may be because
458*0Sstevel@tonic-gate 		 * some code path has ended up in dtrace_probe() with
459*0Sstevel@tonic-gate 		 * CLOCK_LOCK held.  We'll try to determine that we're in
460*0Sstevel@tonic-gate 		 * the former case by taking another lap if the lock has
461*0Sstevel@tonic-gate 		 * changed since when we first looked at it.
462*0Sstevel@tonic-gate 		 */
463*0Sstevel@tonic-gate 		if (old_hres_lock != hres_lock)
464*0Sstevel@tonic-gate 			continue;
465*0Sstevel@tonic-gate 
466*0Sstevel@tonic-gate 		/*
467*0Sstevel@tonic-gate 		 * So the lock was and is locked.  We'll use the old data
468*0Sstevel@tonic-gate 		 * instead.
469*0Sstevel@tonic-gate 		 */
470*0Sstevel@tonic-gate 		old_hres_lock = shadow_hres_lock;
471*0Sstevel@tonic-gate 
472*0Sstevel@tonic-gate 		/*
473*0Sstevel@tonic-gate 		 * See the comments in tsc_gethrtime(), above.
474*0Sstevel@tonic-gate 		 */
475*0Sstevel@tonic-gate 		if ((tsc = tsc_read()) >= shadow_tsc_last)
476*0Sstevel@tonic-gate 			tsc -= shadow_tsc_last;
477*0Sstevel@tonic-gate 		else if (tsc >= shadow_tsc_last - 2*tsc_max_delta)
478*0Sstevel@tonic-gate 			tsc = 0;
479*0Sstevel@tonic-gate 
480*0Sstevel@tonic-gate 		hrt = shadow_tsc_hrtime_base;
481*0Sstevel@tonic-gate 
482*0Sstevel@tonic-gate 		TSC_CONVERT_AND_ADD(tsc, hrt, shadow_nsec_scale);
483*0Sstevel@tonic-gate 	} while ((old_hres_lock & ~1) != shadow_hres_lock);
484*0Sstevel@tonic-gate 
485*0Sstevel@tonic-gate 	return (hrt);
486*0Sstevel@tonic-gate }
487*0Sstevel@tonic-gate 
488*0Sstevel@tonic-gate hrtime_t
489*0Sstevel@tonic-gate tsc_gethrtime_delta(void)
490*0Sstevel@tonic-gate {
491*0Sstevel@tonic-gate 	hrtime_t hrt;
492*0Sstevel@tonic-gate 	int flags;
493*0Sstevel@tonic-gate 
494*0Sstevel@tonic-gate 	/*
495*0Sstevel@tonic-gate 	 * We need to disable interrupts here to assure that we don't migrate
496*0Sstevel@tonic-gate 	 * between the call to tsc_gethrtime() and adding the CPU's hrtime
497*0Sstevel@tonic-gate 	 * delta. Note that disabling and reenabling preemption is forbidden
498*0Sstevel@tonic-gate 	 * here because we may be in the middle of a fast trap. In the amd64
499*0Sstevel@tonic-gate 	 * kernel we cannot tolerate preemption during a fast trap. See
500*0Sstevel@tonic-gate 	 * _update_sregs().
501*0Sstevel@tonic-gate 	 */
502*0Sstevel@tonic-gate 
503*0Sstevel@tonic-gate 	flags = clear_int_flag();
504*0Sstevel@tonic-gate 	hrt = tsc_gethrtime() + tsc_sync_delta[CPU->cpu_id];
505*0Sstevel@tonic-gate 	restore_int_flag(flags);
506*0Sstevel@tonic-gate 
507*0Sstevel@tonic-gate 	return (hrt);
508*0Sstevel@tonic-gate }
509*0Sstevel@tonic-gate 
510*0Sstevel@tonic-gate extern uint64_t cpu_freq_hz;
511*0Sstevel@tonic-gate extern int tsc_gethrtime_enable;
512*0Sstevel@tonic-gate 
513*0Sstevel@tonic-gate /*
514*0Sstevel@tonic-gate  * The following converts nanoseconds of highres-time to ticks
515*0Sstevel@tonic-gate  */
516*0Sstevel@tonic-gate 
517*0Sstevel@tonic-gate static uint64_t
518*0Sstevel@tonic-gate hrtime2tick(hrtime_t ts)
519*0Sstevel@tonic-gate {
520*0Sstevel@tonic-gate 	hrtime_t q = ts / NANOSEC;
521*0Sstevel@tonic-gate 	hrtime_t r = ts - (q * NANOSEC);
522*0Sstevel@tonic-gate 
523*0Sstevel@tonic-gate 	return (q * cpu_freq_hz + ((r * cpu_freq_hz) / NANOSEC));
524*0Sstevel@tonic-gate }
525*0Sstevel@tonic-gate 
526*0Sstevel@tonic-gate /*
527*0Sstevel@tonic-gate  * This is used to convert scaled high-res time from nanoseconds to
528*0Sstevel@tonic-gate  * unscaled hardware ticks.  (Read from hardware timestamp counter)
529*0Sstevel@tonic-gate  */
530*0Sstevel@tonic-gate 
531*0Sstevel@tonic-gate uint64_t
532*0Sstevel@tonic-gate unscalehrtime(hrtime_t ts)
533*0Sstevel@tonic-gate {
534*0Sstevel@tonic-gate 	if (tsc_gethrtime_enable) {
535*0Sstevel@tonic-gate 		uint64_t unscale = 0;
536*0Sstevel@tonic-gate 		hrtime_t rescale;
537*0Sstevel@tonic-gate 		hrtime_t diff = ts;
538*0Sstevel@tonic-gate 
539*0Sstevel@tonic-gate 		while (diff > (nsec_per_tick)) {
540*0Sstevel@tonic-gate 			unscale += hrtime2tick(diff);
541*0Sstevel@tonic-gate 			rescale = unscale;
542*0Sstevel@tonic-gate 			scalehrtime(&rescale);
543*0Sstevel@tonic-gate 			diff = ts - rescale;
544*0Sstevel@tonic-gate 		}
545*0Sstevel@tonic-gate 
546*0Sstevel@tonic-gate 		return (unscale);
547*0Sstevel@tonic-gate 	}
548*0Sstevel@tonic-gate 	return (0);
549*0Sstevel@tonic-gate }
550*0Sstevel@tonic-gate 
551*0Sstevel@tonic-gate 
552*0Sstevel@tonic-gate hrtime_t
553*0Sstevel@tonic-gate tsc_gethrtimeunscaled(void)
554*0Sstevel@tonic-gate {
555*0Sstevel@tonic-gate 	uint32_t old_hres_lock;
556*0Sstevel@tonic-gate 	hrtime_t tsc;
557*0Sstevel@tonic-gate 
558*0Sstevel@tonic-gate 	do {
559*0Sstevel@tonic-gate 		old_hres_lock = hres_lock;
560*0Sstevel@tonic-gate 
561*0Sstevel@tonic-gate 		if ((tsc = tsc_read()) < tsc_last) {
562*0Sstevel@tonic-gate 			/*
563*0Sstevel@tonic-gate 			 * see comments in tsc_gethrtime
564*0Sstevel@tonic-gate 			 */
565*0Sstevel@tonic-gate 			tsc += tsc_last_jumped;
566*0Sstevel@tonic-gate 		}
567*0Sstevel@tonic-gate 
568*0Sstevel@tonic-gate 	} while ((old_hres_lock & ~1) != hres_lock);
569*0Sstevel@tonic-gate 
570*0Sstevel@tonic-gate 	return (tsc);
571*0Sstevel@tonic-gate }
572*0Sstevel@tonic-gate 
573*0Sstevel@tonic-gate 
574*0Sstevel@tonic-gate /* Convert a tsc timestamp to nanoseconds */
575*0Sstevel@tonic-gate void
576*0Sstevel@tonic-gate tsc_scalehrtime(hrtime_t *tsc)
577*0Sstevel@tonic-gate {
578*0Sstevel@tonic-gate 	hrtime_t hrt;
579*0Sstevel@tonic-gate 	hrtime_t mytsc;
580*0Sstevel@tonic-gate 
581*0Sstevel@tonic-gate 	if (tsc == NULL)
582*0Sstevel@tonic-gate 		return;
583*0Sstevel@tonic-gate 	mytsc = *tsc;
584*0Sstevel@tonic-gate 
585*0Sstevel@tonic-gate 	TSC_CONVERT(mytsc, hrt, nsec_scale);
586*0Sstevel@tonic-gate 	*tsc  = hrt;
587*0Sstevel@tonic-gate }
588*0Sstevel@tonic-gate 
589*0Sstevel@tonic-gate hrtime_t
590*0Sstevel@tonic-gate tsc_gethrtimeunscaled_delta(void)
591*0Sstevel@tonic-gate {
592*0Sstevel@tonic-gate 	hrtime_t hrt;
593*0Sstevel@tonic-gate 	int flags;
594*0Sstevel@tonic-gate 
595*0Sstevel@tonic-gate 	/*
596*0Sstevel@tonic-gate 	 * Similarly to tsc_gethrtime_delta, we need to disable preemption
597*0Sstevel@tonic-gate 	 * to prevent migration between the call to tsc_gethrtimeunscaled
598*0Sstevel@tonic-gate 	 * and adding the CPU's hrtime delta. Note that disabling and
599*0Sstevel@tonic-gate 	 * reenabling preemption is forbidden here because we may be in the
600*0Sstevel@tonic-gate 	 * middle of a fast trap. In the amd64 kernel we cannot tolerate
601*0Sstevel@tonic-gate 	 * preemption during a fast trap. See _update_sregs().
602*0Sstevel@tonic-gate 	 */
603*0Sstevel@tonic-gate 
604*0Sstevel@tonic-gate 	flags = clear_int_flag();
605*0Sstevel@tonic-gate 	hrt = tsc_gethrtimeunscaled() + tsc_sync_tick_delta[CPU->cpu_id];
606*0Sstevel@tonic-gate 	restore_int_flag(flags);
607*0Sstevel@tonic-gate 
608*0Sstevel@tonic-gate 	return (hrt);
609*0Sstevel@tonic-gate }
610