xref: /onnv-gate/usr/src/uts/i86xpv/os/xpv_timestamp.c (revision 11330:06c12a9be468)
15084Sjohnlev /*
25084Sjohnlev  * CDDL HEADER START
35084Sjohnlev  *
45084Sjohnlev  * The contents of this file are subject to the terms of the
55084Sjohnlev  * Common Development and Distribution License (the "License").
65084Sjohnlev  * You may not use this file except in compliance with the License.
75084Sjohnlev  *
85084Sjohnlev  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
95084Sjohnlev  * or http://www.opensolaris.org/os/licensing.
105084Sjohnlev  * See the License for the specific language governing permissions
115084Sjohnlev  * and limitations under the License.
125084Sjohnlev  *
135084Sjohnlev  * When distributing Covered Code, include this CDDL HEADER in each
145084Sjohnlev  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
155084Sjohnlev  * If applicable, add the following below this CDDL HEADER, with the
165084Sjohnlev  * fields enclosed by brackets "[]" replaced with your own identifying
175084Sjohnlev  * information: Portions Copyright [yyyy] [name of copyright owner]
185084Sjohnlev  *
195084Sjohnlev  * CDDL HEADER END
205084Sjohnlev  */
215084Sjohnlev 
225084Sjohnlev /*
2310175SStuart.Maybee@Sun.COM  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
245084Sjohnlev  * Use is subject to license terms.
255084Sjohnlev  */
265084Sjohnlev 
275084Sjohnlev #include <sys/types.h>
285084Sjohnlev #include <sys/clock.h>
295084Sjohnlev #include <sys/panic.h>
305084Sjohnlev #include <sys/atomic.h>
315084Sjohnlev #include <sys/hypervisor.h>
325084Sjohnlev 
335084Sjohnlev #include <sys/archsystm.h>
345084Sjohnlev 
355084Sjohnlev /*
365084Sjohnlev  * On the hypervisor, we have a virtualized system time based upon the
375084Sjohnlev  * information provided for each VCPU, which is updated every time it is
385084Sjohnlev  * scheduled onto a real CPU.  Thus, none of the traditional code in
395084Sjohnlev  * i86pc/os/timestamp.c applies, our gethrtime() implementation is run through
405084Sjohnlev  * the PSM, and there is no scaling step to apply.
415084Sjohnlev  *
425084Sjohnlev  * However, the platform does not guarantee monotonicity; thus we have to fake
435084Sjohnlev  * this up, which is a deeply unpleasant thing to have to do.
445084Sjohnlev  *
455084Sjohnlev  * Note that the virtualized interface still relies on the current TSC to
465084Sjohnlev  * calculate the time in nanoseconds since the VCPU was scheduled, and is thus
475084Sjohnlev  * subject to all the problems with that.  For the most part, the hypervisor is
485084Sjohnlev  * supposed to deal with them.
495084Sjohnlev  *
505084Sjohnlev  * Another wrinkle involves suspend/resume/migration.  If we come back and time
515084Sjohnlev  * is apparently less, we may have resumed on a different machine or on the
525084Sjohnlev  * same machine after a reboot.  In this case we need to maintain an addend to
535084Sjohnlev  * ensure time continues reasonably.  Otherwise we could end up taking a very
545084Sjohnlev  * long time to expire cyclics in the heap.  Thus we have two functions:
555084Sjohnlev  *
565084Sjohnlev  * xpv_getsystime()
575084Sjohnlev  *
585084Sjohnlev  *	The unadulterated system time from the hypervisor.  This is only to be
595084Sjohnlev  *	used when programming the hypervisor (setting a timer or calculating
605084Sjohnlev  *	the TOD).
615084Sjohnlev  *
625084Sjohnlev  * xpv_gethrtime()
635084Sjohnlev  *
645084Sjohnlev  *	This is the monotonic hrtime counter to be used by everything else such
655084Sjohnlev  *	as the cyclic subsystem.  We should never pass an hrtime directly into
665084Sjohnlev  *	a hypervisor interface, as hrtime_addend may well be non-zero.
675084Sjohnlev  */
685084Sjohnlev 
6910175SStuart.Maybee@Sun.COM int hrtime_fake_mt = 1;
705084Sjohnlev static volatile hrtime_t hrtime_last;
715084Sjohnlev static hrtime_t hrtime_suspend_time;
725084Sjohnlev static hrtime_t hrtime_addend;
735084Sjohnlev 
745084Sjohnlev /*
755084Sjohnlev  * These functions are used in DTrace probe context, and must be removed from
765084Sjohnlev  * fbt consideration.  Currently fbt ignores all weak symbols, so this will
775084Sjohnlev  * achieve that.
785084Sjohnlev  */
795084Sjohnlev #pragma weak xpv_gethrtime = dtrace_xpv_gethrtime
805084Sjohnlev #pragma weak xpv_getsystime = dtrace_xpv_getsystime
815084Sjohnlev #pragma weak dtrace_gethrtime = dtrace_xpv_gethrtime
825084Sjohnlev #pragma weak tsc_read = dtrace_xpv_gethrtime
835084Sjohnlev 
845084Sjohnlev hrtime_t
dtrace_xpv_getsystime(void)855084Sjohnlev dtrace_xpv_getsystime(void)
865084Sjohnlev {
875084Sjohnlev 	vcpu_time_info_t *src;
885084Sjohnlev 	vcpu_time_info_t __vti, *dst = &__vti;
895084Sjohnlev 	uint64_t tsc_delta;
905084Sjohnlev 	uint64_t tsc;
915084Sjohnlev 	hrtime_t result;
92*11330SFrank.Vanderlinden@Sun.COM 	uint32_t stamp;
935084Sjohnlev 
945084Sjohnlev 	src = &CPU->cpu_m.mcpu_vcpu_info->time;
955084Sjohnlev 
965084Sjohnlev 	/*
975084Sjohnlev 	 * Loop until version has not been changed during our update, and a Xen
985084Sjohnlev 	 * update is not under way (lowest bit is set).
995084Sjohnlev 	 */
1005084Sjohnlev 	do {
1015084Sjohnlev 		dst->version = src->version;
102*11330SFrank.Vanderlinden@Sun.COM 		stamp = CPU->cpu_m.mcpu_istamp;
1035084Sjohnlev 
1045084Sjohnlev 		membar_consumer();
1055084Sjohnlev 
1065084Sjohnlev 		dst->tsc_timestamp = src->tsc_timestamp;
1075084Sjohnlev 		dst->system_time = src->system_time;
1085084Sjohnlev 		dst->tsc_to_system_mul = src->tsc_to_system_mul;
1095084Sjohnlev 		dst->tsc_shift = src->tsc_shift;
1105084Sjohnlev 
1115084Sjohnlev 		/*
1125084Sjohnlev 		 * Note that this use of the -actual- TSC register
1135084Sjohnlev 		 * should probably be the SOLE one in the system on this
1145084Sjohnlev 		 * paravirtualized platform.
1155084Sjohnlev 		 */
1165084Sjohnlev 		tsc = __rdtsc_insn();
1175084Sjohnlev 		tsc_delta = tsc - dst->tsc_timestamp;
1185084Sjohnlev 
1195084Sjohnlev 		membar_consumer();
1205084Sjohnlev 
121*11330SFrank.Vanderlinden@Sun.COM 	} while (((src->version & 1) | (dst->version ^ src->version)) ||
122*11330SFrank.Vanderlinden@Sun.COM 	    CPU->cpu_m.mcpu_istamp != stamp);
1235084Sjohnlev 
1245084Sjohnlev 	if (dst->tsc_shift >= 0)
1255084Sjohnlev 		tsc_delta <<= dst->tsc_shift;
1265084Sjohnlev 	else if (dst->tsc_shift < 0)
1275084Sjohnlev 		tsc_delta >>= -dst->tsc_shift;
1285084Sjohnlev 
1295084Sjohnlev 	result = dst->system_time +
1305084Sjohnlev 	    ((uint64_t)(tsc_delta * (uint64_t)dst->tsc_to_system_mul) >> 32);
1315084Sjohnlev 
1325084Sjohnlev 	return (result);
1335084Sjohnlev }
1345084Sjohnlev 
1355084Sjohnlev hrtime_t
dtrace_xpv_gethrtime(void)1365084Sjohnlev dtrace_xpv_gethrtime(void)
1375084Sjohnlev {
1385084Sjohnlev 	hrtime_t result = xpv_getsystime() + hrtime_addend;
1395084Sjohnlev 
1405084Sjohnlev 	if (hrtime_fake_mt) {
1415084Sjohnlev 		hrtime_t last;
1425084Sjohnlev 		do {
1435084Sjohnlev 			last = hrtime_last;
1445084Sjohnlev 			if (result < last)
1455084Sjohnlev 				result = last + 1;
1465084Sjohnlev 		} while (atomic_cas_64((volatile uint64_t *)&hrtime_last,
1475084Sjohnlev 		    last, result) != last);
1485084Sjohnlev 	}
1495084Sjohnlev 
1505084Sjohnlev 	return (result);
1515084Sjohnlev }
1525084Sjohnlev 
1535084Sjohnlev void
xpv_time_suspend(void)1545084Sjohnlev xpv_time_suspend(void)
1555084Sjohnlev {
1565084Sjohnlev 	hrtime_suspend_time = xpv_getsystime();
1575084Sjohnlev }
1585084Sjohnlev 
1595084Sjohnlev void
xpv_time_resume(void)1605084Sjohnlev xpv_time_resume(void)
1615084Sjohnlev {
1625084Sjohnlev 	hrtime_t delta = xpv_getsystime() - hrtime_suspend_time;
1635084Sjohnlev 
1645084Sjohnlev 	if (delta < 0)
1655084Sjohnlev 		hrtime_addend += -delta;
1665084Sjohnlev }
167