xref: /onnv-gate/usr/src/uts/sun4/sys/clock.h (revision 0:68f95e015346)
1*0Sstevel@tonic-gate /*
2*0Sstevel@tonic-gate  * CDDL HEADER START
3*0Sstevel@tonic-gate  *
4*0Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
5*0Sstevel@tonic-gate  * Common Development and Distribution License, Version 1.0 only
6*0Sstevel@tonic-gate  * (the "License").  You may not use this file except in compliance
7*0Sstevel@tonic-gate  * with the License.
8*0Sstevel@tonic-gate  *
9*0Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10*0Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
11*0Sstevel@tonic-gate  * See the License for the specific language governing permissions
12*0Sstevel@tonic-gate  * and limitations under the License.
13*0Sstevel@tonic-gate  *
14*0Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
15*0Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16*0Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
17*0Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
18*0Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
19*0Sstevel@tonic-gate  *
20*0Sstevel@tonic-gate  * CDDL HEADER END
21*0Sstevel@tonic-gate  */
22*0Sstevel@tonic-gate /*
23*0Sstevel@tonic-gate  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24*0Sstevel@tonic-gate  * Use is subject to license terms.
25*0Sstevel@tonic-gate  */
26*0Sstevel@tonic-gate 
27*0Sstevel@tonic-gate #ifndef _SYS_CLOCK_H
28*0Sstevel@tonic-gate #define	_SYS_CLOCK_H
29*0Sstevel@tonic-gate 
30*0Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
31*0Sstevel@tonic-gate 
32*0Sstevel@tonic-gate #ifdef	__cplusplus
33*0Sstevel@tonic-gate extern "C" {
34*0Sstevel@tonic-gate #endif
35*0Sstevel@tonic-gate 
36*0Sstevel@tonic-gate #include <sys/spl.h>
37*0Sstevel@tonic-gate #include <sys/time.h>
38*0Sstevel@tonic-gate #include <sys/machclock.h>
39*0Sstevel@tonic-gate 
40*0Sstevel@tonic-gate #ifndef _ASM
41*0Sstevel@tonic-gate 
42*0Sstevel@tonic-gate #ifdef	_KERNEL
43*0Sstevel@tonic-gate 
44*0Sstevel@tonic-gate extern void	setcpudelay(void);
45*0Sstevel@tonic-gate 
46*0Sstevel@tonic-gate extern uint_t	nsec_scale;
47*0Sstevel@tonic-gate extern uint_t	nsec_shift;
48*0Sstevel@tonic-gate extern uint_t	nsec_per_sys_tick;
49*0Sstevel@tonic-gate extern uint64_t	sys_tick_freq;
50*0Sstevel@tonic-gate 
51*0Sstevel@tonic-gate extern int	traptrace_use_stick;
52*0Sstevel@tonic-gate extern uint64_t	system_clock_freq;
53*0Sstevel@tonic-gate extern uint_t	sys_clock_mhz;
54*0Sstevel@tonic-gate 
55*0Sstevel@tonic-gate extern void mon_clock_init(void);
56*0Sstevel@tonic-gate extern void mon_clock_start(void);
57*0Sstevel@tonic-gate extern void mon_clock_stop(void);
58*0Sstevel@tonic-gate extern void mon_clock_share(void);
59*0Sstevel@tonic-gate extern void mon_clock_unshare(void);
60*0Sstevel@tonic-gate 
61*0Sstevel@tonic-gate extern hrtime_t hrtime_base;
62*0Sstevel@tonic-gate extern void hres_tick(void);
63*0Sstevel@tonic-gate extern void	clkstart(void);
64*0Sstevel@tonic-gate extern void cbe_level14();
65*0Sstevel@tonic-gate extern hrtime_t tick2ns(hrtime_t, uint_t);
66*0Sstevel@tonic-gate 
67*0Sstevel@tonic-gate typedef struct {
68*0Sstevel@tonic-gate 	uint32_t cbe_level1_inum;
69*0Sstevel@tonic-gate 	uint32_t cbe_level10_inum;
70*0Sstevel@tonic-gate } cbe_data_t;
71*0Sstevel@tonic-gate 
72*0Sstevel@tonic-gate #endif	/* _KERNEL */
73*0Sstevel@tonic-gate 
74*0Sstevel@tonic-gate #endif	/* _ASM */
75*0Sstevel@tonic-gate 
76*0Sstevel@tonic-gate 
77*0Sstevel@tonic-gate #define	CBE_LOW_PIL	1
78*0Sstevel@tonic-gate #define	CBE_LOCK_PIL	LOCK_LEVEL
79*0Sstevel@tonic-gate #define	CBE_HIGH_PIL	14
80*0Sstevel@tonic-gate 
81*0Sstevel@tonic-gate #define	ADJ_SHIFT	4	/* used in get_hrestime and _level10 */
82*0Sstevel@tonic-gate 
83*0Sstevel@tonic-gate /*
84*0Sstevel@tonic-gate  * Locking strategy for high-resolution timing services
85*0Sstevel@tonic-gate  *
86*0Sstevel@tonic-gate  * We generally construct timestamps from two or more components:
87*0Sstevel@tonic-gate  * a hardware time source and one or more software time sources.
88*0Sstevel@tonic-gate  * These components cannot all be loaded simultaneously, so we need
89*0Sstevel@tonic-gate  * some sort of locking strategy to generate consistent timestamps.
90*0Sstevel@tonic-gate  *
91*0Sstevel@tonic-gate  * To minimize lock contention and cache thrashing we employ the
92*0Sstevel@tonic-gate  * weakest possible synchronization model: writers (rare) serialize
93*0Sstevel@tonic-gate  * on an acquisition-counting mutex, described below; readers (common)
94*0Sstevel@tonic-gate  * execute in parallel with no synchronization at all -- they don't
95*0Sstevel@tonic-gate  * exclude other readers, and they don't even exclude writers.  Instead,
96*0Sstevel@tonic-gate  * readers just examine the writer lock's value before and after loading
97*0Sstevel@tonic-gate  * all the components of a timestamp to detect writer intervention.
98*0Sstevel@tonic-gate  * In the rare case when a writer does intervene, the reader will
99*0Sstevel@tonic-gate  * detect it, discard the timestamp and try again.
100*0Sstevel@tonic-gate  *
101*0Sstevel@tonic-gate  * The writer lock, hres_lock, is a 32-bit integer consisting of an
102*0Sstevel@tonic-gate  * 8-bit lock and a 24-bit acquisition count.  To acquire the lock we
103*0Sstevel@tonic-gate  * set the lock field with ldstub, which sets the low-order 8 bits to
104*0Sstevel@tonic-gate  * 0xff; to clear the lock, we increment it, which simultaneously clears
105*0Sstevel@tonic-gate  * the lock field (0xff --> 0x00) and increments the acquisition count
106*0Sstevel@tonic-gate  * (due to carry into bit 8).  Thus each acquisition transforms hres_lock
107*0Sstevel@tonic-gate  * from N:0 to N:ff, and each release transforms N:ff into (N+1):0.
108*0Sstevel@tonic-gate  *
109*0Sstevel@tonic-gate  * Readers can detect writer intervention by loading hres_lock before
110*0Sstevel@tonic-gate  * and after loading the time components they need; if either lock value
111*0Sstevel@tonic-gate  * contains 0xff in the low-order bits (lock held), or if the lock values
112*0Sstevel@tonic-gate  * are not equal (lock was acquired and released), a writer intervened
113*0Sstevel@tonic-gate  * and the reader must try again.  If the lock values are equal and the
114*0Sstevel@tonic-gate  * low-order 8 bits are clear, the timestamp must be valid.  We can check
115*0Sstevel@tonic-gate  * both of these conditions with a single compare instruction by checking
116*0Sstevel@tonic-gate  * whether old_hres_lock & ~1 == new_hres_lock, as illustrated by the
117*0Sstevel@tonic-gate  * following table of all possible lock states:
118*0Sstevel@tonic-gate  *
119*0Sstevel@tonic-gate  *	initial	& ~1	final		result of compare
120*0Sstevel@tonic-gate  *	------------	-----		-----------------
121*0Sstevel@tonic-gate  *	now:00		now:00		valid
122*0Sstevel@tonic-gate  *	now:00		now:ff		invalid
123*0Sstevel@tonic-gate  *	now:00		later:00	invalid
124*0Sstevel@tonic-gate  *	now:00		later:ff	invalid
125*0Sstevel@tonic-gate  *	now:fe		now:ff		invalid
126*0Sstevel@tonic-gate  *	now:fe		later:00	invalid
127*0Sstevel@tonic-gate  *	now:fe		later:ff	invalid
128*0Sstevel@tonic-gate  *
129*0Sstevel@tonic-gate  * Implementation considerations:
130*0Sstevel@tonic-gate  *
131*0Sstevel@tonic-gate  * (1) Load buffering.
132*0Sstevel@tonic-gate  *
133*0Sstevel@tonic-gate  * On a CPU that does load buffering we must ensure that the load of
134*0Sstevel@tonic-gate  * hres_lock completes before the load of any timestamp components.
135*0Sstevel@tonic-gate  * This is essential *even on a CPU that does in-order loads* because
136*0Sstevel@tonic-gate  * accessing the hardware time source may not involve a memory reference
137*0Sstevel@tonic-gate  * (e.g. rd %tick).  A convenient way to address this is to clear the
138*0Sstevel@tonic-gate  * lower bit (andn with 1) of the old lock value right away, since this
139*0Sstevel@tonic-gate  * generates a dependency on the load of hres_lock.  We have to do this
140*0Sstevel@tonic-gate  * anyway to perform the lock comparison described above.
141*0Sstevel@tonic-gate  *
142*0Sstevel@tonic-gate  * (2) Out-of-order loads.
143*0Sstevel@tonic-gate  *
144*0Sstevel@tonic-gate  * On a CPU that does out-of-order loads we must ensure that the loads
145*0Sstevel@tonic-gate  * of all timestamp components have completed before we load the final
146*0Sstevel@tonic-gate  * value of hres_lock.  This can be done either by generating load
147*0Sstevel@tonic-gate  * dependencies on the timestamp components or by membar #LoadLoad.
148*0Sstevel@tonic-gate  *
149*0Sstevel@tonic-gate  * (3) Interaction with the high level cyclic handler, hres_tick().
150*0Sstevel@tonic-gate  *
151*0Sstevel@tonic-gate  * One unusual property of hres_lock is that it's acquired in a high
152*0Sstevel@tonic-gate  * level cyclic handler, hres_tick().  Thus, hres_lock must be acquired at
153*0Sstevel@tonic-gate  * CBE_HIGH_PIL or higher to prevent single-CPU deadlock.
154*0Sstevel@tonic-gate  *
155*0Sstevel@tonic-gate  * (4) Cross-calls.
156*0Sstevel@tonic-gate  *
157*0Sstevel@tonic-gate  * If a cross-call happens while one CPU has hres_lock and another is
158*0Sstevel@tonic-gate  * trying to acquire it in the clock interrupt path, the system will
159*0Sstevel@tonic-gate  * deadlock: the first CPU will never release hres_lock since it's
160*0Sstevel@tonic-gate  * waiting to be released from the cross-call, and the cross-call can't
161*0Sstevel@tonic-gate  * complete because the second CPU is spinning on hres_lock with traps
162*0Sstevel@tonic-gate  * disabled.  Thus cross-calls must be blocked while holding hres_lock.
163*0Sstevel@tonic-gate  *
164*0Sstevel@tonic-gate  * Together, (3) and (4) imply that hres_lock should only be acquired
165*0Sstevel@tonic-gate  * at PIL >= max(XCALL_PIL, CBE_HIGH_PIL), or while traps are disabled.
166*0Sstevel@tonic-gate  */
167*0Sstevel@tonic-gate #define	HRES_LOCK_OFFSET 3
168*0Sstevel@tonic-gate 
169*0Sstevel@tonic-gate #define	CLOCK_LOCK(oldsplp)	\
170*0Sstevel@tonic-gate 	lock_set_spl((lock_t *)&hres_lock + HRES_LOCK_OFFSET, \
171*0Sstevel@tonic-gate 		ipltospl(CBE_HIGH_PIL), oldsplp)
172*0Sstevel@tonic-gate 
173*0Sstevel@tonic-gate #define	CLOCK_UNLOCK(spl)	\
174*0Sstevel@tonic-gate 	membar_ldst_stst();	\
175*0Sstevel@tonic-gate 	hres_lock++;		\
176*0Sstevel@tonic-gate 	splx(spl);		\
177*0Sstevel@tonic-gate 	LOCKSTAT_RECORD0(LS_CLOCK_UNLOCK_RELEASE,	\
178*0Sstevel@tonic-gate 		(lock_t *)&hres_lock + HRES_LOCK_OFFSET);
179*0Sstevel@tonic-gate 
180*0Sstevel@tonic-gate /*
181*0Sstevel@tonic-gate  * NATIVE_TIME_TO_NSEC_SCALE is called with NSEC_SHIFT to convert hi-res
182*0Sstevel@tonic-gate  * timestamps into nanoseconds. On systems that have a %stick register,
183*0Sstevel@tonic-gate  * hi-res timestamps are in %stick units. On systems that do not have a
184*0Sstevel@tonic-gate  * %stick register, hi-res timestamps are in %tick units.
185*0Sstevel@tonic-gate  *
186*0Sstevel@tonic-gate  * NATIVE_TIME_TO_NSEC_SCALE is called with TICK_NSEC_SHIFT to convert from
187*0Sstevel@tonic-gate  * %tick units to nanoseconds on all implementations whether %stick is
188*0Sstevel@tonic-gate  * available or not.
189*0Sstevel@tonic-gate  */
190*0Sstevel@tonic-gate 
191*0Sstevel@tonic-gate /*
192*0Sstevel@tonic-gate  * At least 62.5 MHz CPU %tick frequency
193*0Sstevel@tonic-gate  */
194*0Sstevel@tonic-gate 
195*0Sstevel@tonic-gate #define	TICK_NSEC_SHIFT	4
196*0Sstevel@tonic-gate 
197*0Sstevel@tonic-gate /*
198*0Sstevel@tonic-gate  * Convert hi-res native time (V9's %tick in our case) into nanoseconds.
199*0Sstevel@tonic-gate  *
200*0Sstevel@tonic-gate  * The challenge is to multiply a %tick value by (NANOSEC / sys_tick_freq)
201*0Sstevel@tonic-gate  * without using floating point and without overflowing 64-bit integers.
202*0Sstevel@tonic-gate  * We assume that all sun4u systems will have a 16 nsec or better clock
203*0Sstevel@tonic-gate  * (i.e. faster than 62.5 MHz), which means that (ticks << 4) has units
204*0Sstevel@tonic-gate  * greater than one nanosecond, so converting from (ticks << 4) to nsec
205*0Sstevel@tonic-gate  * requires multiplication by a rational number, R, between 0 and 1.
206*0Sstevel@tonic-gate  * To avoid floating-point we precompute (R * 2^32) during boot and
207*0Sstevel@tonic-gate  * stash this away in nsec_scale.  Thus we can compute (tick * R) as
208*0Sstevel@tonic-gate  * (tick * nsec_scale) >> 32, which is accurate to about 1 part per billion.
209*0Sstevel@tonic-gate  *
210*0Sstevel@tonic-gate  * To avoid 64-bit overflow when multiplying (tick << 4) by nsec_scale,
211*0Sstevel@tonic-gate  * we split (tick << 4) into its high and low 32-bit pieces, H and L,
212*0Sstevel@tonic-gate  * multiply each piece separately, and add up the relevant bits of the
213*0Sstevel@tonic-gate  * partial products.  Putting it all together we have:
214*0Sstevel@tonic-gate  *
215*0Sstevel@tonic-gate  * nsec = (tick << 4) * R
216*0Sstevel@tonic-gate  *	= ((tick << 4) * nsec_scale) >> 32
217*0Sstevel@tonic-gate  *	= ((H << 32) + L) * nsec_scale) >> 32
218*0Sstevel@tonic-gate  *	= (H * nsec_scale) + ((L * nsec_scale) >> 32)
219*0Sstevel@tonic-gate  *
220*0Sstevel@tonic-gate  * The last line is the computation we actually perform: it requires no
221*0Sstevel@tonic-gate  * floating point and all intermediate results fit in 64-bit registers.
222*0Sstevel@tonic-gate  *
223*0Sstevel@tonic-gate  * Note that we require that tick is less than (1 << (64 - NSEC_SHIFT));
224*0Sstevel@tonic-gate  * greater values will result in overflow and misbehavior (not that this
225*0Sstevel@tonic-gate  * is a serious problem; (1 << (64 - NSEC_SHIFT)) nanoseconds is over
226*0Sstevel@tonic-gate  * thirty-six years).  Nonetheless, clients may wish to be aware of this
227*0Sstevel@tonic-gate  * limitation; NATIVE_TIME_MAX() returns this maximum native time.
228*0Sstevel@tonic-gate  *
229*0Sstevel@tonic-gate  * We provide two versions of this macro: a "full-service" version that
230*0Sstevel@tonic-gate  * just converts ticks to nanoseconds and a higher-performance version that
231*0Sstevel@tonic-gate  * expects the scaling factor nsec_scale as its second argument (so that
232*0Sstevel@tonic-gate  * callers can distance the load of nsec_scale from its use).  Note that
233*0Sstevel@tonic-gate  * we take a fast path if we determine the ticks to be less than 32 bits
234*0Sstevel@tonic-gate  * (as it often is for the delta between %tick values for successive
235*0Sstevel@tonic-gate  * firings of the hres_tick() cyclic).
236*0Sstevel@tonic-gate  *
237*0Sstevel@tonic-gate  * Note that in the 32-bit path we don't even bother clearing NPT.
238*0Sstevel@tonic-gate  * We get away with this by making hardclk.c ensure than nsec_scale
239*0Sstevel@tonic-gate  * is even, so we can take advantage of the associativity of modular
240*0Sstevel@tonic-gate  * arithmetic: multiplying %tick by any even number, say 2*n, is
241*0Sstevel@tonic-gate  * equivalent to multiplying %tick by 2, then by n.  Multiplication
242*0Sstevel@tonic-gate  * by 2 is equivalent to shifting left by one, which clears NPT.
243*0Sstevel@tonic-gate  *
244*0Sstevel@tonic-gate  * Finally, note that the macros use the labels "6:" and "7:"; these
245*0Sstevel@tonic-gate  * labels must not be used across an invocation of either macro.
246*0Sstevel@tonic-gate  */
247*0Sstevel@tonic-gate #define	NATIVE_TIME_TO_NSEC_SCALE(out, scr1, scr2, shift)		\
248*0Sstevel@tonic-gate 	srlx	out, 32, scr2;		/* check high 32 bits */	\
249*0Sstevel@tonic-gate /* CSTYLED */ 								\
250*0Sstevel@tonic-gate 	brz,a,pt scr2, 6f;		/* if clear, 32-bit fast path */\
251*0Sstevel@tonic-gate 	mulx	out, scr1, out;		/* delay: 32-bit fast path */	\
252*0Sstevel@tonic-gate 	sllx	out, shift, out;	/* clear NPT and pre-scale */	\
253*0Sstevel@tonic-gate 	srlx	out, 32, scr2;		/* scr2 = hi32(tick<<4) = H */	\
254*0Sstevel@tonic-gate 	mulx	scr2, scr1, scr2;	/* scr2 = (H*F) */		\
255*0Sstevel@tonic-gate 	srl	out, 0, out;		/* out = lo32(tick<<4) = L */	\
256*0Sstevel@tonic-gate 	mulx	out, scr1, scr1;	/* scr1 = (L*F) */		\
257*0Sstevel@tonic-gate 	srlx	scr1, 32, scr1;		/* scr1 = (L*F) >> 32 */	\
258*0Sstevel@tonic-gate 	ba	7f;			/* branch over 32-bit path */	\
259*0Sstevel@tonic-gate 	add	scr1, scr2, out;	/* out = (H*F) + ((L*F) >> 32) */\
260*0Sstevel@tonic-gate 6:									\
261*0Sstevel@tonic-gate 	srlx	out, 32 - shift, out;					\
262*0Sstevel@tonic-gate 7:
263*0Sstevel@tonic-gate 
264*0Sstevel@tonic-gate #define	NATIVE_TIME_TO_NSEC(out, scr1, scr2)				\
265*0Sstevel@tonic-gate 	sethi	%hi(nsec_scale), scr1;	/* load scaling factor */	\
266*0Sstevel@tonic-gate 	ld	[scr1 + %lo(nsec_scale)], scr1;				\
267*0Sstevel@tonic-gate 	NATIVE_TIME_TO_NSEC_SCALE(out, scr1, scr2, NSEC_SHIFT);
268*0Sstevel@tonic-gate 
269*0Sstevel@tonic-gate #define	NATIVE_TIME_MAX(out)						\
270*0Sstevel@tonic-gate 	mov	-1, out;						\
271*0Sstevel@tonic-gate 	srlx	out, NSEC_SHIFT, out
272*0Sstevel@tonic-gate 
273*0Sstevel@tonic-gate 
274*0Sstevel@tonic-gate /*
275*0Sstevel@tonic-gate  * The following macros are only for use in the cpu module.
276*0Sstevel@tonic-gate  */
277*0Sstevel@tonic-gate #if defined(CPU_MODULE)
278*0Sstevel@tonic-gate 
279*0Sstevel@tonic-gate /*
280*0Sstevel@tonic-gate  * NSEC_SHIFT and VTRACE_SHIFT constants are defined in
281*0Sstevel@tonic-gate  * <sys/machclock.h> file.
282*0Sstevel@tonic-gate  */
283*0Sstevel@tonic-gate 
284*0Sstevel@tonic-gate 
285*0Sstevel@tonic-gate /*
286*0Sstevel@tonic-gate  * NOTE: the macros below assume that the various time-related variables
287*0Sstevel@tonic-gate  * (hrestime, hrestime_adj, hres_last_tick, timedelta, nsec_scale, etc)
288*0Sstevel@tonic-gate  * are all stored together on a 64-byte boundary.  The primary motivation
289*0Sstevel@tonic-gate  * is cache performance, but we also take advantage of a convenient side
290*0Sstevel@tonic-gate  * effect: these variables all have the same high 22 address bits, so only
291*0Sstevel@tonic-gate  * one sethi is needed to access them all.
292*0Sstevel@tonic-gate  */
293*0Sstevel@tonic-gate 
294*0Sstevel@tonic-gate /*
295*0Sstevel@tonic-gate  * GET_HRESTIME() returns the value of hrestime, hrestime_adj and the
296*0Sstevel@tonic-gate  * number of nanoseconds since the last clock tick ('nslt').  It also
297*0Sstevel@tonic-gate  * sets 'nano' to the value NANOSEC (one billion).
298*0Sstevel@tonic-gate  *
299*0Sstevel@tonic-gate  * This macro assumes that all registers are globals or outs so they can
300*0Sstevel@tonic-gate  * safely contain 64-bit data, and that it's safe to use the label "5:".
301*0Sstevel@tonic-gate  * Further, this macro calls the NATIVE_TIME_TO_NSEC_SCALE which in turn
302*0Sstevel@tonic-gate  * uses the labels "6:" and "7:"; labels "5:", "6:" and "7:" must not
303*0Sstevel@tonic-gate  * be used across invocations of this macro.
304*0Sstevel@tonic-gate  */
305*0Sstevel@tonic-gate #define	GET_HRESTIME(hrestsec, hrestnsec, adj, nslt, nano, scr, hrlock, \
306*0Sstevel@tonic-gate     gnt1, gnt2) \
307*0Sstevel@tonic-gate 5:	sethi	%hi(hres_lock), scr;					\
308*0Sstevel@tonic-gate 	lduw	[scr + %lo(hres_lock)], hrlock;	/* load clock lock */	\
309*0Sstevel@tonic-gate 	lduw	[scr + %lo(nsec_scale)], nano;	/* tick-to-ns factor */	\
310*0Sstevel@tonic-gate 	andn	hrlock, 1, hrlock;  	/* see comments above! */	\
311*0Sstevel@tonic-gate 	ldx	[scr + %lo(hres_last_tick)], nslt;			\
312*0Sstevel@tonic-gate 	ldn	[scr + %lo(hrestime)], hrestsec; /* load hrestime.sec */\
313*0Sstevel@tonic-gate 	add	scr, %lo(hrestime), hrestnsec;				\
314*0Sstevel@tonic-gate 	ldn	[hrestnsec + CLONGSIZE], hrestnsec;			\
315*0Sstevel@tonic-gate 	GET_NATIVE_TIME(adj, gnt1, gnt2);	/* get current %tick */	\
316*0Sstevel@tonic-gate 	subcc	adj, nslt, nslt; /* nslt = ticks since last clockint */	\
317*0Sstevel@tonic-gate 	movneg	%xcc, %g0, nslt; /* ignore neg delta from tick skew */	\
318*0Sstevel@tonic-gate 	ldx	[scr + %lo(hrestime_adj)], adj; /* load hrestime_adj */	\
319*0Sstevel@tonic-gate 	/* membar #LoadLoad; (see comment (2) above) */			\
320*0Sstevel@tonic-gate 	lduw	[scr + %lo(hres_lock)], scr; /* load clock lock */	\
321*0Sstevel@tonic-gate 	NATIVE_TIME_TO_NSEC_SCALE(nslt, nano, gnt1, NSEC_SHIFT);	\
322*0Sstevel@tonic-gate 	sethi	%hi(NANOSEC), nano;					\
323*0Sstevel@tonic-gate 	xor	hrlock, scr, scr;					\
324*0Sstevel@tonic-gate /* CSTYLED */ 								\
325*0Sstevel@tonic-gate 	brnz,pn	scr, 5b;						\
326*0Sstevel@tonic-gate 	or	nano, %lo(NANOSEC), nano;
327*0Sstevel@tonic-gate 
328*0Sstevel@tonic-gate /*
329*0Sstevel@tonic-gate  * Similar to above, but returns current gethrtime() value in 'base'.
330*0Sstevel@tonic-gate  */
331*0Sstevel@tonic-gate #define	GET_HRTIME(base, now, nslt, scale, scr, hrlock, gnt1, gnt2)	\
332*0Sstevel@tonic-gate 5:	sethi	%hi(hres_lock), scr;					\
333*0Sstevel@tonic-gate 	lduw	[scr + %lo(hres_lock)], hrlock;	/* load clock lock */	\
334*0Sstevel@tonic-gate 	lduw	[scr + %lo(nsec_scale)], scale;	/* tick-to-ns factor */	\
335*0Sstevel@tonic-gate 	andn	hrlock, 1, hrlock;  	/* see comments above! */	\
336*0Sstevel@tonic-gate 	ldx	[scr + %lo(hres_last_tick)], nslt;			\
337*0Sstevel@tonic-gate 	ldx	[scr + %lo(hrtime_base)], base;	/* load hrtime_base */	\
338*0Sstevel@tonic-gate 	GET_NATIVE_TIME(now, gnt1, gnt2);	/* get current %tick */	\
339*0Sstevel@tonic-gate 	subcc	now, nslt, nslt; /* nslt = ticks since last clockint */	\
340*0Sstevel@tonic-gate 	movneg	%xcc, %g0, nslt; /* ignore neg delta from tick skew */	\
341*0Sstevel@tonic-gate 	/* membar #LoadLoad; (see comment (2) above) */			\
342*0Sstevel@tonic-gate 	ld	[scr + %lo(hres_lock)], scr; /* load clock lock */	\
343*0Sstevel@tonic-gate 	NATIVE_TIME_TO_NSEC_SCALE(nslt, scale, gnt1, NSEC_SHIFT);	\
344*0Sstevel@tonic-gate 	xor	hrlock, scr, scr;					\
345*0Sstevel@tonic-gate /* CSTYLED */ 								\
346*0Sstevel@tonic-gate 	brnz,pn	scr, 5b;						\
347*0Sstevel@tonic-gate 	add	base, nslt, base;
348*0Sstevel@tonic-gate 
349*0Sstevel@tonic-gate /*
350*0Sstevel@tonic-gate  * Maximum-performance timestamp for kernel tracing.  We don't bother
351*0Sstevel@tonic-gate  * clearing NPT because vtrace expresses everything in 32-bit deltas,
352*0Sstevel@tonic-gate  * so only the low-order 32 bits matter.  We do shift down a few bits,
353*0Sstevel@tonic-gate  * however, so that the trace framework doesn't emit a ridiculous number
354*0Sstevel@tonic-gate  * of 32_bit_elapsed_time records (trace points are more expensive when
355*0Sstevel@tonic-gate  * the time since the last trace point doesn't fit in a 16-bit delta).
356*0Sstevel@tonic-gate  * We currently shift by 4 (divide by 16) on the grounds that (1) there's
357*0Sstevel@tonic-gate  * no point making the timing finer-grained than the trace point latency,
358*0Sstevel@tonic-gate  * which exceeds 16 cycles; and (2) the cost and probe effect of many
359*0Sstevel@tonic-gate  * 32-bit time records far exceeds the cost of the 'srlx' instruction.
360*0Sstevel@tonic-gate  */
361*0Sstevel@tonic-gate #define	GET_VTRACE_TIME(out, scr1, scr2)				\
362*0Sstevel@tonic-gate 	GET_NATIVE_TIME(out, scr1, scr2);	/* get current %tick */	\
363*0Sstevel@tonic-gate 	srlx	out, VTRACE_SHIFT, out;
364*0Sstevel@tonic-gate 
365*0Sstevel@tonic-gate /*
366*0Sstevel@tonic-gate  * Full 64-bit version for those truly rare occasions when you need it.
367*0Sstevel@tonic-gate  * Currently this is only needed to generate the TR_START_TIME record.
368*0Sstevel@tonic-gate  */
369*0Sstevel@tonic-gate #define	GET_VTRACE_TIME_64(out, scr1, scr2)				\
370*0Sstevel@tonic-gate 	GET_NATIVE_TIME(out, scr1, scr2);	/* get current %tick */	\
371*0Sstevel@tonic-gate 	add	out, out, out;						\
372*0Sstevel@tonic-gate 	srlx	out, VTRACE_SHIFT + 1, out;
373*0Sstevel@tonic-gate 
374*0Sstevel@tonic-gate /*
375*0Sstevel@tonic-gate  * Return the rate at which the vtrace clock runs.
376*0Sstevel@tonic-gate  */
377*0Sstevel@tonic-gate #define	GET_VTRACE_FREQUENCY(out, scr1, scr2)				\
378*0Sstevel@tonic-gate 	sethi	%hi(sys_tick_freq), out;				\
379*0Sstevel@tonic-gate 	ldx	[out + %lo(sys_tick_freq)], out;			\
380*0Sstevel@tonic-gate 	srlx	out, VTRACE_SHIFT, out;
381*0Sstevel@tonic-gate 
382*0Sstevel@tonic-gate #endif /* CPU_MODULE */
383*0Sstevel@tonic-gate 
384*0Sstevel@tonic-gate #ifdef	__cplusplus
385*0Sstevel@tonic-gate }
386*0Sstevel@tonic-gate #endif
387*0Sstevel@tonic-gate 
388*0Sstevel@tonic-gate #endif	/* !_SYS_CLOCK_H */
389