xref: /onnv-gate/usr/src/uts/sun4v/os/suspend.c (revision 12987:9e5c3f16523e)
111172SHaik.Aftandilian@Sun.COM /*
211172SHaik.Aftandilian@Sun.COM  * CDDL HEADER START
311172SHaik.Aftandilian@Sun.COM  *
411172SHaik.Aftandilian@Sun.COM  * The contents of this file are subject to the terms of the
511172SHaik.Aftandilian@Sun.COM  * Common Development and Distribution License (the "License").
611172SHaik.Aftandilian@Sun.COM  * You may not use this file except in compliance with the License.
711172SHaik.Aftandilian@Sun.COM  *
811172SHaik.Aftandilian@Sun.COM  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
911172SHaik.Aftandilian@Sun.COM  * or http://www.opensolaris.org/os/licensing.
1011172SHaik.Aftandilian@Sun.COM  * See the License for the specific language governing permissions
1111172SHaik.Aftandilian@Sun.COM  * and limitations under the License.
1211172SHaik.Aftandilian@Sun.COM  *
1311172SHaik.Aftandilian@Sun.COM  * When distributing Covered Code, include this CDDL HEADER in each
1411172SHaik.Aftandilian@Sun.COM  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
1511172SHaik.Aftandilian@Sun.COM  * If applicable, add the following below this CDDL HEADER, with the
1611172SHaik.Aftandilian@Sun.COM  * fields enclosed by brackets "[]" replaced with your own identifying
1711172SHaik.Aftandilian@Sun.COM  * information: Portions Copyright [yyyy] [name of copyright owner]
1811172SHaik.Aftandilian@Sun.COM  *
1911172SHaik.Aftandilian@Sun.COM  * CDDL HEADER END
2011172SHaik.Aftandilian@Sun.COM  */
2111172SHaik.Aftandilian@Sun.COM /*
2212260SHaik.Aftandilian@Sun.COM  * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
2311172SHaik.Aftandilian@Sun.COM  */
2411172SHaik.Aftandilian@Sun.COM 
2511172SHaik.Aftandilian@Sun.COM #include <sys/mutex.h>
2611172SHaik.Aftandilian@Sun.COM #include <sys/cpuvar.h>
2711172SHaik.Aftandilian@Sun.COM #include <sys/cyclic.h>
2811172SHaik.Aftandilian@Sun.COM #include <sys/disp.h>
2911172SHaik.Aftandilian@Sun.COM #include <sys/ddi.h>
3011172SHaik.Aftandilian@Sun.COM #include <sys/wdt.h>
3111172SHaik.Aftandilian@Sun.COM #include <sys/callb.h>
3211172SHaik.Aftandilian@Sun.COM #include <sys/cmn_err.h>
3311172SHaik.Aftandilian@Sun.COM #include <sys/hypervisor_api.h>
3411172SHaik.Aftandilian@Sun.COM #include <sys/membar.h>
3511172SHaik.Aftandilian@Sun.COM #include <sys/x_call.h>
3611172SHaik.Aftandilian@Sun.COM #include <sys/promif.h>
3711172SHaik.Aftandilian@Sun.COM #include <sys/systm.h>
3811172SHaik.Aftandilian@Sun.COM #include <sys/mach_descrip.h>
3911172SHaik.Aftandilian@Sun.COM #include <sys/cpu_module.h>
4011172SHaik.Aftandilian@Sun.COM #include <sys/pg.h>
4111172SHaik.Aftandilian@Sun.COM #include <sys/lgrp.h>
4211172SHaik.Aftandilian@Sun.COM #include <sys/sysmacros.h>
4311172SHaik.Aftandilian@Sun.COM #include <sys/sunddi.h>
4411172SHaik.Aftandilian@Sun.COM #include <sys/cpupart.h>
4511172SHaik.Aftandilian@Sun.COM #include <sys/hsvc.h>
4612013SHaik.Aftandilian@Sun.COM #include <sys/mpo.h>
4711713SPavel.Tatashin@Sun.COM #include <vm/hat_sfmmu.h>
4812015SHaik.Aftandilian@Sun.COM #include <sys/time.h>
4912015SHaik.Aftandilian@Sun.COM #include <sys/clock.h>
5011172SHaik.Aftandilian@Sun.COM 
5111172SHaik.Aftandilian@Sun.COM /*
5211172SHaik.Aftandilian@Sun.COM  * Sun4v OS Suspend
5311172SHaik.Aftandilian@Sun.COM  *
5411172SHaik.Aftandilian@Sun.COM  * Provides a means to suspend a sun4v guest domain by pausing CPUs and then
5511172SHaik.Aftandilian@Sun.COM  * calling into the HV to initiate a suspension. Suspension is sequenced
5611172SHaik.Aftandilian@Sun.COM  * externally by calling suspend_pre, suspend_start, and suspend_post.
5711172SHaik.Aftandilian@Sun.COM  * suspend_pre and suspend_post are meant to perform any special operations
5811172SHaik.Aftandilian@Sun.COM  * that should be done before or after a suspend/resume operation. e.g.,
5911172SHaik.Aftandilian@Sun.COM  * callbacks to cluster software to disable heartbeat monitoring before the
6011172SHaik.Aftandilian@Sun.COM  * system is suspended. suspend_start prepares kernel services to be suspended
6111172SHaik.Aftandilian@Sun.COM  * and then suspends the domain by calling hv_guest_suspend.
6211172SHaik.Aftandilian@Sun.COM  *
6311172SHaik.Aftandilian@Sun.COM  * Special Handling for %tick and %stick Registers
6411172SHaik.Aftandilian@Sun.COM  *
6511172SHaik.Aftandilian@Sun.COM  * After a suspend/resume operation, the %tick and %stick registers may have
6611172SHaik.Aftandilian@Sun.COM  * jumped forwards or backwards. The delta is assumed to be consistent across
6711172SHaik.Aftandilian@Sun.COM  * all CPUs, within the negligible level of %tick and %stick variation
6811172SHaik.Aftandilian@Sun.COM  * acceptable on a cold boot. In order to maintain increasing %tick and %stick
6911172SHaik.Aftandilian@Sun.COM  * counter values without exposing large positive or negative jumps to kernel
7011172SHaik.Aftandilian@Sun.COM  * or user code, a %tick and %stick offset is used. Kernel reads of these
7111172SHaik.Aftandilian@Sun.COM  * counters return the sum of the hardware register counter and offset
7211172SHaik.Aftandilian@Sun.COM  * variable. After a suspend/resume operation, user reads of %tick or %stick
7311172SHaik.Aftandilian@Sun.COM  * are emulated. Suspend code enables emulation by setting the
7411172SHaik.Aftandilian@Sun.COM  * %{tick,stick}.NPT fields which trigger a privileged instruction access
7511172SHaik.Aftandilian@Sun.COM  * trap whenever the registers are read from user mode. If emulation has been
7611172SHaik.Aftandilian@Sun.COM  * enabled, the trap handler emulates the instruction. Emulation is only
7711172SHaik.Aftandilian@Sun.COM  * enabled during a successful suspend/resume operation. When emulation is
7811172SHaik.Aftandilian@Sun.COM  * enabled, CPUs that are DR'd into the system will have their
7911172SHaik.Aftandilian@Sun.COM  * %{tick,stick}.NPT bits set to 1 as well.
8011172SHaik.Aftandilian@Sun.COM  */
8111172SHaik.Aftandilian@Sun.COM 
8211172SHaik.Aftandilian@Sun.COM extern u_longlong_t gettick(void);	/* returns %stick */
8311172SHaik.Aftandilian@Sun.COM extern uint64_t gettick_counter(void);	/* returns %tick */
8411172SHaik.Aftandilian@Sun.COM extern uint64_t gettick_npt(void);
8511172SHaik.Aftandilian@Sun.COM extern uint64_t getstick_npt(void);
8611172SHaik.Aftandilian@Sun.COM extern int mach_descrip_update(void);
8711172SHaik.Aftandilian@Sun.COM extern cpuset_t cpu_ready_set;
8811172SHaik.Aftandilian@Sun.COM extern uint64_t native_tick_offset;
8911172SHaik.Aftandilian@Sun.COM extern uint64_t native_stick_offset;
9012015SHaik.Aftandilian@Sun.COM extern uint64_t sys_tick_freq;
9111172SHaik.Aftandilian@Sun.COM 
9211172SHaik.Aftandilian@Sun.COM /*
9311172SHaik.Aftandilian@Sun.COM  * Global Sun Cluster pre/post callbacks.
9411172SHaik.Aftandilian@Sun.COM  */
9511172SHaik.Aftandilian@Sun.COM const char *(*cl_suspend_error_decode)(int);
9611172SHaik.Aftandilian@Sun.COM int (*cl_suspend_pre_callback)(void);
9711172SHaik.Aftandilian@Sun.COM int (*cl_suspend_post_callback)(void);
9811172SHaik.Aftandilian@Sun.COM #define	SC_PRE_FAIL_STR_FMT	"Sun Cluster pre-suspend failure: %d"
9911172SHaik.Aftandilian@Sun.COM #define	SC_POST_FAIL_STR_FMT	"Sun Cluster post-suspend failure: %d"
10011172SHaik.Aftandilian@Sun.COM #define	SC_FAIL_STR_MAX		256
10111172SHaik.Aftandilian@Sun.COM 
10211172SHaik.Aftandilian@Sun.COM /*
10311172SHaik.Aftandilian@Sun.COM  * The minimum major and minor version of the HSVC_GROUP_CORE API group
10411172SHaik.Aftandilian@Sun.COM  * required in order to use OS suspend.
10511172SHaik.Aftandilian@Sun.COM  */
10611172SHaik.Aftandilian@Sun.COM #define	SUSPEND_CORE_MAJOR	1
10711172SHaik.Aftandilian@Sun.COM #define	SUSPEND_CORE_MINOR	2
10811172SHaik.Aftandilian@Sun.COM 
10911172SHaik.Aftandilian@Sun.COM /*
11011172SHaik.Aftandilian@Sun.COM  * By default, sun4v OS suspend is supported if the required HV version
11111172SHaik.Aftandilian@Sun.COM  * is present. suspend_disabled should be set on platforms that do not
11211172SHaik.Aftandilian@Sun.COM  * allow OS suspend regardless of whether or not the HV supports it.
11311172SHaik.Aftandilian@Sun.COM  * It can also be set in /etc/system.
11411172SHaik.Aftandilian@Sun.COM  */
11511172SHaik.Aftandilian@Sun.COM static int suspend_disabled = 0;
11611172SHaik.Aftandilian@Sun.COM 
11711172SHaik.Aftandilian@Sun.COM /*
11811172SHaik.Aftandilian@Sun.COM  * Controls whether or not user-land tick and stick register emulation
11911172SHaik.Aftandilian@Sun.COM  * will be enabled following a successful suspend operation.
12011172SHaik.Aftandilian@Sun.COM  */
12111172SHaik.Aftandilian@Sun.COM static int enable_user_tick_stick_emulation = 1;
12211172SHaik.Aftandilian@Sun.COM 
12311172SHaik.Aftandilian@Sun.COM /*
12411172SHaik.Aftandilian@Sun.COM  * Indicates whether or not tick and stick emulation is currently active.
12511172SHaik.Aftandilian@Sun.COM  * After a successful suspend operation, if emulation is enabled, this
12611172SHaik.Aftandilian@Sun.COM  * variable is set to B_TRUE. Global scope to allow emulation code to
12711172SHaik.Aftandilian@Sun.COM  * check if emulation is active.
12811172SHaik.Aftandilian@Sun.COM  */
12911172SHaik.Aftandilian@Sun.COM boolean_t tick_stick_emulation_active = B_FALSE;
13011172SHaik.Aftandilian@Sun.COM 
13111172SHaik.Aftandilian@Sun.COM /*
13211713SPavel.Tatashin@Sun.COM  * When non-zero, after a successful suspend and resume, cpunodes, CPU HW
13311713SPavel.Tatashin@Sun.COM  * sharing data structures, and processor groups will be updated using
13411713SPavel.Tatashin@Sun.COM  * information from the updated MD.
13511172SHaik.Aftandilian@Sun.COM  */
13611172SHaik.Aftandilian@Sun.COM static int suspend_update_cpu_mappings = 1;
13711172SHaik.Aftandilian@Sun.COM 
13811172SHaik.Aftandilian@Sun.COM /*
13912015SHaik.Aftandilian@Sun.COM  * The maximum number of microseconds by which the %tick or %stick register
14012015SHaik.Aftandilian@Sun.COM  * can vary between any two CPUs in the system. To calculate the
14112015SHaik.Aftandilian@Sun.COM  * native_stick_offset and native_tick_offset, we measure the change in these
14212015SHaik.Aftandilian@Sun.COM  * registers on one CPU over a suspend/resume. Other CPUs may experience
14312015SHaik.Aftandilian@Sun.COM  * slightly larger or smaller changes. %tick and %stick should be synchronized
14412015SHaik.Aftandilian@Sun.COM  * between CPUs, but there may be some variation. So we add an additional value
14512015SHaik.Aftandilian@Sun.COM  * derived from this variable to ensure that these registers always increase
14612015SHaik.Aftandilian@Sun.COM  * over a suspend/resume operation, assuming all %tick and %stick registers
14712015SHaik.Aftandilian@Sun.COM  * are synchronized (within a certain limit) across CPUs in the system. The
14812015SHaik.Aftandilian@Sun.COM  * delta between %sticks on different CPUs should be a small number of cycles,
14912015SHaik.Aftandilian@Sun.COM  * not perceptible to readers of %stick that migrate between CPUs. We set this
15012015SHaik.Aftandilian@Sun.COM  * value to 1 millisecond which means that over a suspend/resume operation,
15112015SHaik.Aftandilian@Sun.COM  * all CPU's %tick and %stick will advance forwards as long as, across all
15212015SHaik.Aftandilian@Sun.COM  * CPUs, the %tick and %stick are synchronized to within 1 ms. This applies to
15312015SHaik.Aftandilian@Sun.COM  * CPUs before the suspend and CPUs after the resume. 1 ms is conservative,
15412015SHaik.Aftandilian@Sun.COM  * but small enough to not trigger TOD faults.
15512015SHaik.Aftandilian@Sun.COM  */
15612015SHaik.Aftandilian@Sun.COM static uint64_t suspend_tick_stick_max_delta = 1000; /* microseconds */
15712015SHaik.Aftandilian@Sun.COM 
15812015SHaik.Aftandilian@Sun.COM /*
15912260SHaik.Aftandilian@Sun.COM  * The number of times the system has been suspended and resumed.
16012260SHaik.Aftandilian@Sun.COM  */
16112260SHaik.Aftandilian@Sun.COM static uint64_t suspend_count = 0;
16212260SHaik.Aftandilian@Sun.COM 
16312260SHaik.Aftandilian@Sun.COM /*
16411172SHaik.Aftandilian@Sun.COM  * DBG and DBG_PROM() macro.
16511172SHaik.Aftandilian@Sun.COM  */
16611172SHaik.Aftandilian@Sun.COM #ifdef	DEBUG
16711172SHaik.Aftandilian@Sun.COM 
16811172SHaik.Aftandilian@Sun.COM static int suspend_debug_flag = 0;
16911172SHaik.Aftandilian@Sun.COM 
17011172SHaik.Aftandilian@Sun.COM #define	DBG_PROM		\
17111172SHaik.Aftandilian@Sun.COM if (suspend_debug_flag)		\
17211172SHaik.Aftandilian@Sun.COM 	prom_printf
17311172SHaik.Aftandilian@Sun.COM 
17411172SHaik.Aftandilian@Sun.COM #define	DBG			\
17511172SHaik.Aftandilian@Sun.COM if (suspend_debug_flag)		\
17611172SHaik.Aftandilian@Sun.COM 	suspend_debug
17711172SHaik.Aftandilian@Sun.COM 
17811172SHaik.Aftandilian@Sun.COM static void
suspend_debug(const char * fmt,...)17911172SHaik.Aftandilian@Sun.COM suspend_debug(const char *fmt, ...)
18011172SHaik.Aftandilian@Sun.COM {
18111172SHaik.Aftandilian@Sun.COM 	char	buf[512];
18211172SHaik.Aftandilian@Sun.COM 	va_list	ap;
18311172SHaik.Aftandilian@Sun.COM 
18411172SHaik.Aftandilian@Sun.COM 	va_start(ap, fmt);
18511172SHaik.Aftandilian@Sun.COM 	(void) vsprintf(buf, fmt, ap);
18611172SHaik.Aftandilian@Sun.COM 	va_end(ap);
18711172SHaik.Aftandilian@Sun.COM 
18811172SHaik.Aftandilian@Sun.COM 	cmn_err(CE_NOTE, "%s", buf);
18911172SHaik.Aftandilian@Sun.COM }
19011172SHaik.Aftandilian@Sun.COM 
19111172SHaik.Aftandilian@Sun.COM #else /* DEBUG */
19211172SHaik.Aftandilian@Sun.COM 
19311172SHaik.Aftandilian@Sun.COM #define	DBG_PROM
19411172SHaik.Aftandilian@Sun.COM #define	DBG
19511172SHaik.Aftandilian@Sun.COM 
19611172SHaik.Aftandilian@Sun.COM #endif /* DEBUG */
19711172SHaik.Aftandilian@Sun.COM 
19811172SHaik.Aftandilian@Sun.COM /*
19911172SHaik.Aftandilian@Sun.COM  * Return true if the HV supports OS suspend and if suspend has not been
20011172SHaik.Aftandilian@Sun.COM  * disabled on this platform.
20111172SHaik.Aftandilian@Sun.COM  */
20211172SHaik.Aftandilian@Sun.COM boolean_t
suspend_supported(void)20311172SHaik.Aftandilian@Sun.COM suspend_supported(void)
20411172SHaik.Aftandilian@Sun.COM {
20511172SHaik.Aftandilian@Sun.COM 	uint64_t major, minor;
20611172SHaik.Aftandilian@Sun.COM 
20711172SHaik.Aftandilian@Sun.COM 	if (suspend_disabled)
20811172SHaik.Aftandilian@Sun.COM 		return (B_FALSE);
20911172SHaik.Aftandilian@Sun.COM 
21011172SHaik.Aftandilian@Sun.COM 	if (hsvc_version(HSVC_GROUP_CORE, &major, &minor) != 0)
21111172SHaik.Aftandilian@Sun.COM 		return (B_FALSE);
21211172SHaik.Aftandilian@Sun.COM 
21311172SHaik.Aftandilian@Sun.COM 	return ((major == SUSPEND_CORE_MAJOR && minor >= SUSPEND_CORE_MINOR) ||
21411172SHaik.Aftandilian@Sun.COM 	    (major > SUSPEND_CORE_MAJOR));
21511172SHaik.Aftandilian@Sun.COM }
21611172SHaik.Aftandilian@Sun.COM 
21711172SHaik.Aftandilian@Sun.COM /*
21812260SHaik.Aftandilian@Sun.COM  * Memory DR is not permitted if the system has been suspended and resumed.
21912260SHaik.Aftandilian@Sun.COM  * It is the responsibility of the caller of suspend_start and the DR
22012260SHaik.Aftandilian@Sun.COM  * subsystem to serialize DR operations and suspend_memdr_allowed() checks.
22112260SHaik.Aftandilian@Sun.COM  */
22212260SHaik.Aftandilian@Sun.COM boolean_t
suspend_memdr_allowed(void)22312260SHaik.Aftandilian@Sun.COM suspend_memdr_allowed(void)
22412260SHaik.Aftandilian@Sun.COM {
22512260SHaik.Aftandilian@Sun.COM 	return (suspend_count == 0);
22612260SHaik.Aftandilian@Sun.COM }
22712260SHaik.Aftandilian@Sun.COM 
22812260SHaik.Aftandilian@Sun.COM /*
22912015SHaik.Aftandilian@Sun.COM  * Given a source tick, stick, and tod value, set the tick and stick offsets
23012015SHaik.Aftandilian@Sun.COM  * such that the (current physical register value) + offset == (source value)
23112015SHaik.Aftandilian@Sun.COM  * and in addition account for some variation between the %tick/%stick on
23212015SHaik.Aftandilian@Sun.COM  * different CPUs. We account for this variation by adding in double the value
23312015SHaik.Aftandilian@Sun.COM  * of suspend_tick_stick_max_delta. The following is an explanation of why
23412015SHaik.Aftandilian@Sun.COM  * suspend_tick_stick_max_delta must be multplied by two and added to
23512015SHaik.Aftandilian@Sun.COM  * native_stick_offset.
23612015SHaik.Aftandilian@Sun.COM  *
23712015SHaik.Aftandilian@Sun.COM  * Consider a guest instance that is yet to be suspended with CPUs p0 and p1
23812015SHaik.Aftandilian@Sun.COM  * with physical "source" %stick values s0 and s1 respectively. When the guest
23912015SHaik.Aftandilian@Sun.COM  * is first resumed, the physical "target" %stick values are t0 and t1
24012015SHaik.Aftandilian@Sun.COM  * respectively. The virtual %stick values after the resume are v0 and v1
24112015SHaik.Aftandilian@Sun.COM  * respectively. Let x be the maximum difference between any two CPU's %stick
24212015SHaik.Aftandilian@Sun.COM  * register at a given point in time and let the %stick values be assigned
24312015SHaik.Aftandilian@Sun.COM  * such that
24412015SHaik.Aftandilian@Sun.COM  *
24512015SHaik.Aftandilian@Sun.COM  *     s1 = s0 + x and
24612015SHaik.Aftandilian@Sun.COM  *     t1 = t0 - x
24712015SHaik.Aftandilian@Sun.COM  *
24812015SHaik.Aftandilian@Sun.COM  * Let us assume that p0 is driving the suspend and resume. Then, we will
24912015SHaik.Aftandilian@Sun.COM  * calculate the stick offset f and the virtual %stick on p0 after the
25012015SHaik.Aftandilian@Sun.COM  * resume as follows.
25112015SHaik.Aftandilian@Sun.COM  *
25212015SHaik.Aftandilian@Sun.COM  *      f = s0 - t0 and
25312015SHaik.Aftandilian@Sun.COM  *     v0 = t0 + f
25412015SHaik.Aftandilian@Sun.COM  *
25512015SHaik.Aftandilian@Sun.COM  * We calculate the virtual %stick v1 on p1 after the resume as
25612015SHaik.Aftandilian@Sun.COM  *
25712015SHaik.Aftandilian@Sun.COM  *     v1 = t1 + f
25812015SHaik.Aftandilian@Sun.COM  *
25912015SHaik.Aftandilian@Sun.COM  * Substitution yields
26012015SHaik.Aftandilian@Sun.COM  *
26112015SHaik.Aftandilian@Sun.COM  *     v1 = t1 + (s0 - t0)
26212015SHaik.Aftandilian@Sun.COM  *     v1 = (t0 - x) + (s0 - t0)
26312015SHaik.Aftandilian@Sun.COM  *     v1 = -x + s0
26412015SHaik.Aftandilian@Sun.COM  *     v1 = s0 - x
26512015SHaik.Aftandilian@Sun.COM  *     v1 = (s1 - x) - x
26612015SHaik.Aftandilian@Sun.COM  *     v1 = s1 - 2x
26712015SHaik.Aftandilian@Sun.COM  *
26812015SHaik.Aftandilian@Sun.COM  * Therefore, in this scenario, without accounting for %stick variation in
26912015SHaik.Aftandilian@Sun.COM  * the calculation of the native_stick_offset f, the virtual %stick on p1
27012015SHaik.Aftandilian@Sun.COM  * is less than the value of the %stick on p1 before the suspend which is
27112015SHaik.Aftandilian@Sun.COM  * unacceptable. By adding 2x to v1, we guarantee it will be equal to s1
27212015SHaik.Aftandilian@Sun.COM  * which means the %stick on p1 after the resume will always be greater
27312015SHaik.Aftandilian@Sun.COM  * than or equal to the %stick on p1 before the suspend. Since v1 = t1 + f
27412015SHaik.Aftandilian@Sun.COM  * at any point in time, we can accomplish this by adding 2x to f. This
27512015SHaik.Aftandilian@Sun.COM  * guarantees any processes bound to CPU P0 or P1 will not see a %stick
27612015SHaik.Aftandilian@Sun.COM  * decrease across a suspend/resume. Hence, in the code below, we multiply
27712015SHaik.Aftandilian@Sun.COM  * suspend_tick_stick_max_delta by two in the calculation for
27812015SHaik.Aftandilian@Sun.COM  * native_stick_offset, native_tick_offset, and target_hrtime.
27911172SHaik.Aftandilian@Sun.COM  */
28011172SHaik.Aftandilian@Sun.COM static void
set_tick_offsets(uint64_t source_tick,uint64_t source_stick,timestruc_t * tsp)28112015SHaik.Aftandilian@Sun.COM set_tick_offsets(uint64_t source_tick, uint64_t source_stick, timestruc_t *tsp)
28211172SHaik.Aftandilian@Sun.COM {
28311172SHaik.Aftandilian@Sun.COM 	uint64_t target_tick;
28411172SHaik.Aftandilian@Sun.COM 	uint64_t target_stick;
28512015SHaik.Aftandilian@Sun.COM 	hrtime_t source_hrtime;
28612015SHaik.Aftandilian@Sun.COM 	hrtime_t target_hrtime;
28711172SHaik.Aftandilian@Sun.COM 
28812015SHaik.Aftandilian@Sun.COM 	/*
28912015SHaik.Aftandilian@Sun.COM 	 * Temporarily set the offsets to zero so that the following reads
29012015SHaik.Aftandilian@Sun.COM 	 * of the registers will yield physical unadjusted counter values.
29112015SHaik.Aftandilian@Sun.COM 	 */
29211172SHaik.Aftandilian@Sun.COM 	native_tick_offset = 0;
29311172SHaik.Aftandilian@Sun.COM 	native_stick_offset = 0;
29411172SHaik.Aftandilian@Sun.COM 
29511172SHaik.Aftandilian@Sun.COM 	target_tick = gettick_counter();	/* returns %tick */
29611172SHaik.Aftandilian@Sun.COM 	target_stick = gettick();		/* returns %stick */
29711172SHaik.Aftandilian@Sun.COM 
29812015SHaik.Aftandilian@Sun.COM 	/*
29912015SHaik.Aftandilian@Sun.COM 	 * Calculate the new offsets. In addition to the delta observed on
30012015SHaik.Aftandilian@Sun.COM 	 * this CPU, add an additional value. Multiply the %tick/%stick
30112015SHaik.Aftandilian@Sun.COM 	 * frequency by suspend_tick_stick_max_delta (us). Then, multiply by 2
30212015SHaik.Aftandilian@Sun.COM 	 * to account for a delta between CPUs before the suspend and a
30312015SHaik.Aftandilian@Sun.COM 	 * delta between CPUs after the resume.
30412015SHaik.Aftandilian@Sun.COM 	 */
30512015SHaik.Aftandilian@Sun.COM 	native_tick_offset = (source_tick - target_tick) +
30612015SHaik.Aftandilian@Sun.COM 	    (CPU->cpu_curr_clock * suspend_tick_stick_max_delta * 2 / MICROSEC);
30712015SHaik.Aftandilian@Sun.COM 	native_stick_offset = (source_stick - target_stick) +
30812015SHaik.Aftandilian@Sun.COM 	    (sys_tick_freq * suspend_tick_stick_max_delta * 2 / MICROSEC);
30912015SHaik.Aftandilian@Sun.COM 
31012015SHaik.Aftandilian@Sun.COM 	/*
31112015SHaik.Aftandilian@Sun.COM 	 * We've effectively increased %stick and %tick by twice the value
31212015SHaik.Aftandilian@Sun.COM 	 * of suspend_tick_stick_max_delta to account for variation across
31312015SHaik.Aftandilian@Sun.COM 	 * CPUs. Now adjust the preserved TOD by the same amount.
31412015SHaik.Aftandilian@Sun.COM 	 */
31512015SHaik.Aftandilian@Sun.COM 	source_hrtime = ts2hrt(tsp);
31612015SHaik.Aftandilian@Sun.COM 	target_hrtime = source_hrtime +
31712015SHaik.Aftandilian@Sun.COM 	    (suspend_tick_stick_max_delta * 2 * (NANOSEC/MICROSEC));
31812015SHaik.Aftandilian@Sun.COM 	hrt2ts(target_hrtime, tsp);
31911172SHaik.Aftandilian@Sun.COM }
32011172SHaik.Aftandilian@Sun.COM 
32111172SHaik.Aftandilian@Sun.COM /*
32211172SHaik.Aftandilian@Sun.COM  * Set the {tick,stick}.NPT field to 1 on this CPU.
32311172SHaik.Aftandilian@Sun.COM  */
32411172SHaik.Aftandilian@Sun.COM static void
enable_tick_stick_npt(void)32511172SHaik.Aftandilian@Sun.COM enable_tick_stick_npt(void)
32611172SHaik.Aftandilian@Sun.COM {
32711387SSurya.Prakki@Sun.COM 	(void) hv_stick_set_npt(1);
32811387SSurya.Prakki@Sun.COM 	(void) hv_tick_set_npt(1);
32911172SHaik.Aftandilian@Sun.COM }
33011172SHaik.Aftandilian@Sun.COM 
33111172SHaik.Aftandilian@Sun.COM /*
33211172SHaik.Aftandilian@Sun.COM  * Synchronize a CPU's {tick,stick}.NPT fields with the current state
33311172SHaik.Aftandilian@Sun.COM  * of the system. This is used when a CPU is DR'd into the system.
33411172SHaik.Aftandilian@Sun.COM  */
33511172SHaik.Aftandilian@Sun.COM void
suspend_sync_tick_stick_npt(void)33611172SHaik.Aftandilian@Sun.COM suspend_sync_tick_stick_npt(void)
33711172SHaik.Aftandilian@Sun.COM {
33811172SHaik.Aftandilian@Sun.COM 	if (tick_stick_emulation_active) {
33911172SHaik.Aftandilian@Sun.COM 		DBG("enabling {%%tick/%%stick}.NPT on CPU 0x%x", CPU->cpu_id);
34011387SSurya.Prakki@Sun.COM 		(void) hv_stick_set_npt(1);
34111387SSurya.Prakki@Sun.COM 		(void) hv_tick_set_npt(1);
34211172SHaik.Aftandilian@Sun.COM 	} else {
34311172SHaik.Aftandilian@Sun.COM 		ASSERT(gettick_npt() == 0);
34411172SHaik.Aftandilian@Sun.COM 		ASSERT(getstick_npt() == 0);
34511172SHaik.Aftandilian@Sun.COM 	}
34611172SHaik.Aftandilian@Sun.COM }
34711172SHaik.Aftandilian@Sun.COM 
34811172SHaik.Aftandilian@Sun.COM /*
34911172SHaik.Aftandilian@Sun.COM  * Obtain an updated MD from the hypervisor and update cpunodes, CPU HW
35011172SHaik.Aftandilian@Sun.COM  * sharing data structures, and processor groups.
35111172SHaik.Aftandilian@Sun.COM  */
35211172SHaik.Aftandilian@Sun.COM static void
update_cpu_mappings(void)35311172SHaik.Aftandilian@Sun.COM update_cpu_mappings(void)
35411172SHaik.Aftandilian@Sun.COM {
35511172SHaik.Aftandilian@Sun.COM 	md_t		*mdp;
35611172SHaik.Aftandilian@Sun.COM 	processorid_t	id;
35711172SHaik.Aftandilian@Sun.COM 	cpu_t		*cp;
35811172SHaik.Aftandilian@Sun.COM 	cpu_pg_t	*pgps[NCPU];
35911172SHaik.Aftandilian@Sun.COM 
36011172SHaik.Aftandilian@Sun.COM 	if ((mdp = md_get_handle()) == NULL) {
36111172SHaik.Aftandilian@Sun.COM 		DBG("suspend: md_get_handle failed");
36211172SHaik.Aftandilian@Sun.COM 		return;
36311172SHaik.Aftandilian@Sun.COM 	}
36411172SHaik.Aftandilian@Sun.COM 
36511172SHaik.Aftandilian@Sun.COM 	DBG("suspend: updating CPU mappings");
36611172SHaik.Aftandilian@Sun.COM 
36711172SHaik.Aftandilian@Sun.COM 	mutex_enter(&cpu_lock);
36811172SHaik.Aftandilian@Sun.COM 
36911172SHaik.Aftandilian@Sun.COM 	setup_chip_mappings(mdp);
37011172SHaik.Aftandilian@Sun.COM 	setup_exec_unit_mappings(mdp);
37111172SHaik.Aftandilian@Sun.COM 	for (id = 0; id < NCPU; id++) {
37211172SHaik.Aftandilian@Sun.COM 		if ((cp = cpu_get(id)) == NULL)
37311172SHaik.Aftandilian@Sun.COM 			continue;
37411172SHaik.Aftandilian@Sun.COM 		cpu_map_exec_units(cp);
37511172SHaik.Aftandilian@Sun.COM 	}
37611172SHaik.Aftandilian@Sun.COM 
37711172SHaik.Aftandilian@Sun.COM 	/*
37811172SHaik.Aftandilian@Sun.COM 	 * Re-calculate processor groups.
37911172SHaik.Aftandilian@Sun.COM 	 *
38011172SHaik.Aftandilian@Sun.COM 	 * First tear down all PG information before adding any new PG
38111172SHaik.Aftandilian@Sun.COM 	 * information derived from the MD we just downloaded. We must
38211172SHaik.Aftandilian@Sun.COM 	 * call pg_cpu_inactive and pg_cpu_active with CPUs paused and
38311172SHaik.Aftandilian@Sun.COM 	 * we want to minimize the number of times pause_cpus is called.
38411172SHaik.Aftandilian@Sun.COM 	 * Inactivating all CPUs would leave PGs without any active CPUs,
38511172SHaik.Aftandilian@Sun.COM 	 * so while CPUs are paused, call pg_cpu_inactive and swap in the
38611172SHaik.Aftandilian@Sun.COM 	 * bootstrap PG structure saving the original PG structure to be
38711172SHaik.Aftandilian@Sun.COM 	 * fini'd afterwards. This prevents the dispatcher from encountering
388*12987SHaik.Aftandilian@Oracle.COM 	 * PGs in which all CPUs are inactive. Offline CPUs are already
389*12987SHaik.Aftandilian@Oracle.COM 	 * inactive in their PGs and shouldn't be reactivated, so we must
390*12987SHaik.Aftandilian@Oracle.COM 	 * not call pg_cpu_inactive or pg_cpu_active for those CPUs.
39111172SHaik.Aftandilian@Sun.COM 	 */
39211172SHaik.Aftandilian@Sun.COM 	pause_cpus(NULL);
39311172SHaik.Aftandilian@Sun.COM 	for (id = 0; id < NCPU; id++) {
39411172SHaik.Aftandilian@Sun.COM 		if ((cp = cpu_get(id)) == NULL)
39511172SHaik.Aftandilian@Sun.COM 			continue;
396*12987SHaik.Aftandilian@Oracle.COM 		if ((cp->cpu_flags & CPU_OFFLINE) == 0)
397*12987SHaik.Aftandilian@Oracle.COM 			pg_cpu_inactive(cp);
39811172SHaik.Aftandilian@Sun.COM 		pgps[id] = cp->cpu_pg;
39911172SHaik.Aftandilian@Sun.COM 		pg_cpu_bootstrap(cp);
40011172SHaik.Aftandilian@Sun.COM 	}
40111172SHaik.Aftandilian@Sun.COM 	start_cpus();
40211172SHaik.Aftandilian@Sun.COM 
40311172SHaik.Aftandilian@Sun.COM 	/*
40411172SHaik.Aftandilian@Sun.COM 	 * pg_cpu_fini* and pg_cpu_init* must be called while CPUs are
40511172SHaik.Aftandilian@Sun.COM 	 * not paused. Use two separate loops here so that we do not
40611172SHaik.Aftandilian@Sun.COM 	 * initialize PG data for CPUs until all the old PG data structures
40711172SHaik.Aftandilian@Sun.COM 	 * are torn down.
40811172SHaik.Aftandilian@Sun.COM 	 */
40911172SHaik.Aftandilian@Sun.COM 	for (id = 0; id < NCPU; id++) {
41011172SHaik.Aftandilian@Sun.COM 		if ((cp = cpu_get(id)) == NULL)
41111172SHaik.Aftandilian@Sun.COM 			continue;
41211172SHaik.Aftandilian@Sun.COM 		pg_cpu_fini(cp, pgps[id]);
41312013SHaik.Aftandilian@Sun.COM 		mpo_cpu_remove(id);
41411172SHaik.Aftandilian@Sun.COM 	}
41511172SHaik.Aftandilian@Sun.COM 
41611172SHaik.Aftandilian@Sun.COM 	/*
41711172SHaik.Aftandilian@Sun.COM 	 * Initialize PG data for each CPU, but leave the bootstrapped
41811172SHaik.Aftandilian@Sun.COM 	 * PG structure in place to avoid running with any PGs containing
41911172SHaik.Aftandilian@Sun.COM 	 * nothing but inactive CPUs.
42011172SHaik.Aftandilian@Sun.COM 	 */
42111172SHaik.Aftandilian@Sun.COM 	for (id = 0; id < NCPU; id++) {
42211172SHaik.Aftandilian@Sun.COM 		if ((cp = cpu_get(id)) == NULL)
42311172SHaik.Aftandilian@Sun.COM 			continue;
42412013SHaik.Aftandilian@Sun.COM 		mpo_cpu_add(mdp, id);
42511172SHaik.Aftandilian@Sun.COM 		pgps[id] = pg_cpu_init(cp, B_TRUE);
42611172SHaik.Aftandilian@Sun.COM 	}
42711172SHaik.Aftandilian@Sun.COM 
42811172SHaik.Aftandilian@Sun.COM 	/*
42911172SHaik.Aftandilian@Sun.COM 	 * Now that PG data has been initialized for all CPUs in the
43011172SHaik.Aftandilian@Sun.COM 	 * system, replace the bootstrapped PG structure with the
43111172SHaik.Aftandilian@Sun.COM 	 * initialized PG structure and call pg_cpu_active for each CPU.
43211172SHaik.Aftandilian@Sun.COM 	 */
43311172SHaik.Aftandilian@Sun.COM 	pause_cpus(NULL);
43411172SHaik.Aftandilian@Sun.COM 	for (id = 0; id < NCPU; id++) {
43511172SHaik.Aftandilian@Sun.COM 		if ((cp = cpu_get(id)) == NULL)
43611172SHaik.Aftandilian@Sun.COM 			continue;
43711172SHaik.Aftandilian@Sun.COM 		cp->cpu_pg = pgps[id];
438*12987SHaik.Aftandilian@Oracle.COM 		if ((cp->cpu_flags & CPU_OFFLINE) == 0)
439*12987SHaik.Aftandilian@Oracle.COM 			pg_cpu_active(cp);
44011172SHaik.Aftandilian@Sun.COM 	}
44111172SHaik.Aftandilian@Sun.COM 	start_cpus();
44211172SHaik.Aftandilian@Sun.COM 
44311172SHaik.Aftandilian@Sun.COM 	mutex_exit(&cpu_lock);
44411172SHaik.Aftandilian@Sun.COM 
44511172SHaik.Aftandilian@Sun.COM 	(void) md_fini_handle(mdp);
44611172SHaik.Aftandilian@Sun.COM }
44711172SHaik.Aftandilian@Sun.COM 
44811172SHaik.Aftandilian@Sun.COM /*
44911172SHaik.Aftandilian@Sun.COM  * Wrapper for the Sun Cluster error decoding function.
45011172SHaik.Aftandilian@Sun.COM  */
45111172SHaik.Aftandilian@Sun.COM static int
cluster_error_decode(int error,char * error_reason,size_t max_reason_len)45211172SHaik.Aftandilian@Sun.COM cluster_error_decode(int error, char *error_reason, size_t max_reason_len)
45311172SHaik.Aftandilian@Sun.COM {
45411172SHaik.Aftandilian@Sun.COM 	const char	*decoded;
45511172SHaik.Aftandilian@Sun.COM 	size_t		decoded_len;
45611172SHaik.Aftandilian@Sun.COM 
45711172SHaik.Aftandilian@Sun.COM 	ASSERT(error_reason != NULL);
45811172SHaik.Aftandilian@Sun.COM 	ASSERT(max_reason_len > 0);
45911172SHaik.Aftandilian@Sun.COM 
46011172SHaik.Aftandilian@Sun.COM 	max_reason_len = MIN(max_reason_len, SC_FAIL_STR_MAX);
46111172SHaik.Aftandilian@Sun.COM 
46211172SHaik.Aftandilian@Sun.COM 	if (cl_suspend_error_decode == NULL)
46311172SHaik.Aftandilian@Sun.COM 		return (-1);
46411172SHaik.Aftandilian@Sun.COM 
46511172SHaik.Aftandilian@Sun.COM 	if ((decoded = (*cl_suspend_error_decode)(error)) == NULL)
46611172SHaik.Aftandilian@Sun.COM 		return (-1);
46711172SHaik.Aftandilian@Sun.COM 
46811172SHaik.Aftandilian@Sun.COM 	/* Get number of non-NULL bytes */
46911172SHaik.Aftandilian@Sun.COM 	if ((decoded_len = strnlen(decoded, max_reason_len - 1)) == 0)
47011172SHaik.Aftandilian@Sun.COM 		return (-1);
47111172SHaik.Aftandilian@Sun.COM 
47211172SHaik.Aftandilian@Sun.COM 	bcopy(decoded, error_reason, decoded_len);
47311172SHaik.Aftandilian@Sun.COM 
47411172SHaik.Aftandilian@Sun.COM 	/*
47511172SHaik.Aftandilian@Sun.COM 	 * The error string returned from cl_suspend_error_decode
47611172SHaik.Aftandilian@Sun.COM 	 * should be NULL-terminated, but set the terminator here
47711172SHaik.Aftandilian@Sun.COM 	 * because we only copied non-NULL bytes. If the decoded
47811172SHaik.Aftandilian@Sun.COM 	 * string was not NULL-terminated, this guarantees that
47911172SHaik.Aftandilian@Sun.COM 	 * error_reason will be.
48011172SHaik.Aftandilian@Sun.COM 	 */
48111172SHaik.Aftandilian@Sun.COM 	error_reason[decoded_len] = '\0';
48211172SHaik.Aftandilian@Sun.COM 
48311172SHaik.Aftandilian@Sun.COM 	return (0);
48411172SHaik.Aftandilian@Sun.COM }
48511172SHaik.Aftandilian@Sun.COM 
48611172SHaik.Aftandilian@Sun.COM /*
48711172SHaik.Aftandilian@Sun.COM  * Wrapper for the Sun Cluster pre-suspend callback.
48811172SHaik.Aftandilian@Sun.COM  */
48911172SHaik.Aftandilian@Sun.COM static int
cluster_pre_wrapper(char * error_reason,size_t max_reason_len)49011172SHaik.Aftandilian@Sun.COM cluster_pre_wrapper(char *error_reason, size_t max_reason_len)
49111172SHaik.Aftandilian@Sun.COM {
49211172SHaik.Aftandilian@Sun.COM 	int rv = 0;
49311172SHaik.Aftandilian@Sun.COM 
49411172SHaik.Aftandilian@Sun.COM 	if (cl_suspend_pre_callback != NULL) {
49511172SHaik.Aftandilian@Sun.COM 		rv = (*cl_suspend_pre_callback)();
49611172SHaik.Aftandilian@Sun.COM 		DBG("suspend: cl_suspend_pre_callback returned %d", rv);
49711172SHaik.Aftandilian@Sun.COM 		if (rv != 0 && error_reason != NULL && max_reason_len > 0) {
49811172SHaik.Aftandilian@Sun.COM 			if (cluster_error_decode(rv, error_reason,
49911172SHaik.Aftandilian@Sun.COM 			    max_reason_len)) {
50011172SHaik.Aftandilian@Sun.COM 				(void) snprintf(error_reason, max_reason_len,
50111172SHaik.Aftandilian@Sun.COM 				    SC_PRE_FAIL_STR_FMT, rv);
50211172SHaik.Aftandilian@Sun.COM 			}
50311172SHaik.Aftandilian@Sun.COM 		}
50411172SHaik.Aftandilian@Sun.COM 	}
50511172SHaik.Aftandilian@Sun.COM 
50611172SHaik.Aftandilian@Sun.COM 	return (rv);
50711172SHaik.Aftandilian@Sun.COM }
50811172SHaik.Aftandilian@Sun.COM 
50911172SHaik.Aftandilian@Sun.COM /*
51011172SHaik.Aftandilian@Sun.COM  * Wrapper for the Sun Cluster post-suspend callback.
51111172SHaik.Aftandilian@Sun.COM  */
51211172SHaik.Aftandilian@Sun.COM static int
cluster_post_wrapper(char * error_reason,size_t max_reason_len)51311172SHaik.Aftandilian@Sun.COM cluster_post_wrapper(char *error_reason, size_t max_reason_len)
51411172SHaik.Aftandilian@Sun.COM {
51511172SHaik.Aftandilian@Sun.COM 	int rv = 0;
51611172SHaik.Aftandilian@Sun.COM 
51711172SHaik.Aftandilian@Sun.COM 	if (cl_suspend_post_callback != NULL) {
51811172SHaik.Aftandilian@Sun.COM 		rv = (*cl_suspend_post_callback)();
51911172SHaik.Aftandilian@Sun.COM 		DBG("suspend: cl_suspend_post_callback returned %d", rv);
52011172SHaik.Aftandilian@Sun.COM 		if (rv != 0 && error_reason != NULL && max_reason_len > 0) {
52111172SHaik.Aftandilian@Sun.COM 			if (cluster_error_decode(rv, error_reason,
52211172SHaik.Aftandilian@Sun.COM 			    max_reason_len)) {
52311172SHaik.Aftandilian@Sun.COM 				(void) snprintf(error_reason,
52411172SHaik.Aftandilian@Sun.COM 				    max_reason_len, SC_POST_FAIL_STR_FMT, rv);
52511172SHaik.Aftandilian@Sun.COM 			}
52611172SHaik.Aftandilian@Sun.COM 		}
52711172SHaik.Aftandilian@Sun.COM 	}
52811172SHaik.Aftandilian@Sun.COM 
52911172SHaik.Aftandilian@Sun.COM 	return (rv);
53011172SHaik.Aftandilian@Sun.COM }
53111172SHaik.Aftandilian@Sun.COM 
53211172SHaik.Aftandilian@Sun.COM /*
53311172SHaik.Aftandilian@Sun.COM  * Execute pre-suspend callbacks preparing the system for a suspend operation.
53411172SHaik.Aftandilian@Sun.COM  * Returns zero on success, non-zero on failure. Sets the recovered argument
53511172SHaik.Aftandilian@Sun.COM  * to indicate whether or not callbacks could be undone in the event of a
53611172SHaik.Aftandilian@Sun.COM  * failure--if callbacks were successfully undone, *recovered is set to B_TRUE,
53711172SHaik.Aftandilian@Sun.COM  * otherwise *recovered is set to B_FALSE. Must be called successfully before
53811172SHaik.Aftandilian@Sun.COM  * suspend_start can be called. Callers should first call suspend_support to
53911172SHaik.Aftandilian@Sun.COM  * determine if OS suspend is supported.
54011172SHaik.Aftandilian@Sun.COM  */
54111172SHaik.Aftandilian@Sun.COM int
suspend_pre(char * error_reason,size_t max_reason_len,boolean_t * recovered)54211172SHaik.Aftandilian@Sun.COM suspend_pre(char *error_reason, size_t max_reason_len, boolean_t *recovered)
54311172SHaik.Aftandilian@Sun.COM {
54411172SHaik.Aftandilian@Sun.COM 	int rv;
54511172SHaik.Aftandilian@Sun.COM 
54611172SHaik.Aftandilian@Sun.COM 	ASSERT(recovered != NULL);
54711172SHaik.Aftandilian@Sun.COM 
54811172SHaik.Aftandilian@Sun.COM 	/*
54911172SHaik.Aftandilian@Sun.COM 	 * Return an error if suspend_pre is erreoneously called
55011172SHaik.Aftandilian@Sun.COM 	 * when OS suspend is not supported.
55111172SHaik.Aftandilian@Sun.COM 	 */
55211172SHaik.Aftandilian@Sun.COM 	ASSERT(suspend_supported());
55311172SHaik.Aftandilian@Sun.COM 	if (!suspend_supported()) {
55411172SHaik.Aftandilian@Sun.COM 		DBG("suspend: suspend_pre called without suspend support");
55511172SHaik.Aftandilian@Sun.COM 		*recovered = B_TRUE;
55611172SHaik.Aftandilian@Sun.COM 		return (ENOTSUP);
55711172SHaik.Aftandilian@Sun.COM 	}
55811172SHaik.Aftandilian@Sun.COM 	DBG("suspend: %s", __func__);
55911172SHaik.Aftandilian@Sun.COM 
56011172SHaik.Aftandilian@Sun.COM 	rv = cluster_pre_wrapper(error_reason, max_reason_len);
56111172SHaik.Aftandilian@Sun.COM 
56211172SHaik.Aftandilian@Sun.COM 	/*
56311172SHaik.Aftandilian@Sun.COM 	 * At present, only one pre-suspend operation exists.
56411172SHaik.Aftandilian@Sun.COM 	 * If it fails, no recovery needs to be done.
56511172SHaik.Aftandilian@Sun.COM 	 */
56611172SHaik.Aftandilian@Sun.COM 	if (rv != 0 && recovered != NULL)
56711172SHaik.Aftandilian@Sun.COM 		*recovered = B_TRUE;
56811172SHaik.Aftandilian@Sun.COM 
56911172SHaik.Aftandilian@Sun.COM 	return (rv);
57011172SHaik.Aftandilian@Sun.COM }
57111172SHaik.Aftandilian@Sun.COM 
57211172SHaik.Aftandilian@Sun.COM /*
57311172SHaik.Aftandilian@Sun.COM  * Execute post-suspend callbacks. Returns zero on success, non-zero on
57411172SHaik.Aftandilian@Sun.COM  * failure. Must be called after suspend_start is called, regardless of
57511172SHaik.Aftandilian@Sun.COM  * whether or not suspend_start is successful.
57611172SHaik.Aftandilian@Sun.COM  */
57711172SHaik.Aftandilian@Sun.COM int
suspend_post(char * error_reason,size_t max_reason_len)57811172SHaik.Aftandilian@Sun.COM suspend_post(char *error_reason, size_t max_reason_len)
57911172SHaik.Aftandilian@Sun.COM {
58011172SHaik.Aftandilian@Sun.COM 	ASSERT(suspend_supported());
58111172SHaik.Aftandilian@Sun.COM 	DBG("suspend: %s", __func__);
58211172SHaik.Aftandilian@Sun.COM 	return (cluster_post_wrapper(error_reason, max_reason_len));
58311172SHaik.Aftandilian@Sun.COM }
58411172SHaik.Aftandilian@Sun.COM 
58511172SHaik.Aftandilian@Sun.COM /*
58611172SHaik.Aftandilian@Sun.COM  * Suspends the OS by pausing CPUs and calling into the HV to initiate
58711172SHaik.Aftandilian@Sun.COM  * the suspend. When the HV routine hv_guest_suspend returns, the system
58811172SHaik.Aftandilian@Sun.COM  * will be resumed. Must be called after a successful call to suspend_pre.
58911172SHaik.Aftandilian@Sun.COM  * suspend_post must be called after suspend_start, whether or not
59011172SHaik.Aftandilian@Sun.COM  * suspend_start returns an error.
59111172SHaik.Aftandilian@Sun.COM  */
59211172SHaik.Aftandilian@Sun.COM /*ARGSUSED*/
59311172SHaik.Aftandilian@Sun.COM int
suspend_start(char * error_reason,size_t max_reason_len)59411172SHaik.Aftandilian@Sun.COM suspend_start(char *error_reason, size_t max_reason_len)
59511172SHaik.Aftandilian@Sun.COM {
59611172SHaik.Aftandilian@Sun.COM 	uint64_t	source_tick;
59711172SHaik.Aftandilian@Sun.COM 	uint64_t	source_stick;
59811172SHaik.Aftandilian@Sun.COM 	uint64_t	rv;
59911172SHaik.Aftandilian@Sun.COM 	timestruc_t	source_tod;
60011172SHaik.Aftandilian@Sun.COM 	int		spl;
60111172SHaik.Aftandilian@Sun.COM 
60211172SHaik.Aftandilian@Sun.COM 	ASSERT(suspend_supported());
60311172SHaik.Aftandilian@Sun.COM 	DBG("suspend: %s", __func__);
60411172SHaik.Aftandilian@Sun.COM 
60511713SPavel.Tatashin@Sun.COM 	sfmmu_ctxdoms_lock();
60611713SPavel.Tatashin@Sun.COM 
60711172SHaik.Aftandilian@Sun.COM 	mutex_enter(&cpu_lock);
60811172SHaik.Aftandilian@Sun.COM 
60911172SHaik.Aftandilian@Sun.COM 	/* Suspend the watchdog */
61011172SHaik.Aftandilian@Sun.COM 	watchdog_suspend();
61111172SHaik.Aftandilian@Sun.COM 
61211172SHaik.Aftandilian@Sun.COM 	/* Record the TOD */
61311172SHaik.Aftandilian@Sun.COM 	mutex_enter(&tod_lock);
61411172SHaik.Aftandilian@Sun.COM 	source_tod = tod_get();
61511172SHaik.Aftandilian@Sun.COM 	mutex_exit(&tod_lock);
61611172SHaik.Aftandilian@Sun.COM 
61711172SHaik.Aftandilian@Sun.COM 	/* Pause all other CPUs */
61811172SHaik.Aftandilian@Sun.COM 	pause_cpus(NULL);
61911172SHaik.Aftandilian@Sun.COM 	DBG_PROM("suspend: CPUs paused\n");
62011172SHaik.Aftandilian@Sun.COM 
62112015SHaik.Aftandilian@Sun.COM 	/* Suspend cyclics */
62211172SHaik.Aftandilian@Sun.COM 	cyclic_suspend();
62311172SHaik.Aftandilian@Sun.COM 	DBG_PROM("suspend: cyclics suspended\n");
62412015SHaik.Aftandilian@Sun.COM 
62512015SHaik.Aftandilian@Sun.COM 	/* Disable interrupts */
62611172SHaik.Aftandilian@Sun.COM 	spl = spl8();
62712015SHaik.Aftandilian@Sun.COM 	DBG_PROM("suspend: spl8()\n");
62811172SHaik.Aftandilian@Sun.COM 
62911172SHaik.Aftandilian@Sun.COM 	source_tick = gettick_counter();
63011172SHaik.Aftandilian@Sun.COM 	source_stick = gettick();
63111172SHaik.Aftandilian@Sun.COM 	DBG_PROM("suspend: source_tick: 0x%lx\n", source_tick);
63211172SHaik.Aftandilian@Sun.COM 	DBG_PROM("suspend: source_stick: 0x%lx\n", source_stick);
63311172SHaik.Aftandilian@Sun.COM 
63411172SHaik.Aftandilian@Sun.COM 	/*
63512015SHaik.Aftandilian@Sun.COM 	 * Call into the HV to initiate the suspend. hv_guest_suspend()
63612015SHaik.Aftandilian@Sun.COM 	 * returns after the guest has been resumed or if the suspend
63712015SHaik.Aftandilian@Sun.COM 	 * operation failed or was cancelled. After a successful suspend,
63812015SHaik.Aftandilian@Sun.COM 	 * the %tick and %stick registers may have changed by an amount
63912015SHaik.Aftandilian@Sun.COM 	 * that is not proportional to the amount of time that has passed.
64012015SHaik.Aftandilian@Sun.COM 	 * They may have jumped forwards or backwards. Some variation is
64112015SHaik.Aftandilian@Sun.COM 	 * allowed and accounted for using suspend_tick_stick_max_delta,
64212015SHaik.Aftandilian@Sun.COM 	 * but otherwise this jump must be uniform across all CPUs and we
64312015SHaik.Aftandilian@Sun.COM 	 * operate under the assumption that it is (maintaining two global
64412015SHaik.Aftandilian@Sun.COM 	 * offset variables--one for %tick and one for %stick.)
64511172SHaik.Aftandilian@Sun.COM 	 */
64611172SHaik.Aftandilian@Sun.COM 	DBG_PROM("suspend: suspending... \n");
64711172SHaik.Aftandilian@Sun.COM 	rv = hv_guest_suspend();
64811172SHaik.Aftandilian@Sun.COM 	if (rv != 0) {
64911172SHaik.Aftandilian@Sun.COM 		splx(spl);
65011172SHaik.Aftandilian@Sun.COM 		cyclic_resume();
65111172SHaik.Aftandilian@Sun.COM 		start_cpus();
65211172SHaik.Aftandilian@Sun.COM 		watchdog_resume();
65311172SHaik.Aftandilian@Sun.COM 		mutex_exit(&cpu_lock);
65411713SPavel.Tatashin@Sun.COM 		sfmmu_ctxdoms_unlock();
65511172SHaik.Aftandilian@Sun.COM 		DBG("suspend: failed, rv: %ld\n", rv);
65611172SHaik.Aftandilian@Sun.COM 		return (rv);
65711172SHaik.Aftandilian@Sun.COM 	}
65811172SHaik.Aftandilian@Sun.COM 
65912260SHaik.Aftandilian@Sun.COM 	suspend_count++;
66012260SHaik.Aftandilian@Sun.COM 
66112015SHaik.Aftandilian@Sun.COM 	/* Update the global tick and stick offsets and the preserved TOD */
66212015SHaik.Aftandilian@Sun.COM 	set_tick_offsets(source_tick, source_stick, &source_tod);
66311172SHaik.Aftandilian@Sun.COM 
66411172SHaik.Aftandilian@Sun.COM 	/* Ensure new offsets are globally visible before resuming CPUs */
66511172SHaik.Aftandilian@Sun.COM 	membar_sync();
66611172SHaik.Aftandilian@Sun.COM 
66711172SHaik.Aftandilian@Sun.COM 	/* Enable interrupts */
66811172SHaik.Aftandilian@Sun.COM 	splx(spl);
66911172SHaik.Aftandilian@Sun.COM 
67011172SHaik.Aftandilian@Sun.COM 	/* Set the {%tick,%stick}.NPT bits on all CPUs */
67111172SHaik.Aftandilian@Sun.COM 	if (enable_user_tick_stick_emulation) {
67211172SHaik.Aftandilian@Sun.COM 		xc_all((xcfunc_t *)enable_tick_stick_npt, NULL, NULL);
67311172SHaik.Aftandilian@Sun.COM 		xt_sync(cpu_ready_set);
67411172SHaik.Aftandilian@Sun.COM 		ASSERT(gettick_npt() != 0);
67511172SHaik.Aftandilian@Sun.COM 		ASSERT(getstick_npt() != 0);
67611172SHaik.Aftandilian@Sun.COM 	}
67711172SHaik.Aftandilian@Sun.COM 
67811172SHaik.Aftandilian@Sun.COM 	/* If emulation is enabled, but not currently active, enable it */
67911172SHaik.Aftandilian@Sun.COM 	if (enable_user_tick_stick_emulation && !tick_stick_emulation_active) {
68011172SHaik.Aftandilian@Sun.COM 		tick_stick_emulation_active = B_TRUE;
68111172SHaik.Aftandilian@Sun.COM 	}
68211172SHaik.Aftandilian@Sun.COM 
68311713SPavel.Tatashin@Sun.COM 	sfmmu_ctxdoms_remove();
68411713SPavel.Tatashin@Sun.COM 
68511172SHaik.Aftandilian@Sun.COM 	/* Resume cyclics, unpause CPUs */
68611172SHaik.Aftandilian@Sun.COM 	cyclic_resume();
68711172SHaik.Aftandilian@Sun.COM 	start_cpus();
68811172SHaik.Aftandilian@Sun.COM 
68911172SHaik.Aftandilian@Sun.COM 	/* Set the TOD */
69011172SHaik.Aftandilian@Sun.COM 	mutex_enter(&tod_lock);
69111172SHaik.Aftandilian@Sun.COM 	tod_set(source_tod);
69211172SHaik.Aftandilian@Sun.COM 	mutex_exit(&tod_lock);
69311172SHaik.Aftandilian@Sun.COM 
69411172SHaik.Aftandilian@Sun.COM 	/* Re-enable the watchdog */
69511172SHaik.Aftandilian@Sun.COM 	watchdog_resume();
69611172SHaik.Aftandilian@Sun.COM 
69711172SHaik.Aftandilian@Sun.COM 	mutex_exit(&cpu_lock);
69811172SHaik.Aftandilian@Sun.COM 
69911713SPavel.Tatashin@Sun.COM 	/* Download the latest MD */
70011713SPavel.Tatashin@Sun.COM 	if ((rv = mach_descrip_update()) != 0)
70111713SPavel.Tatashin@Sun.COM 		cmn_err(CE_PANIC, "suspend: mach_descrip_update failed: %ld",
70211713SPavel.Tatashin@Sun.COM 		    rv);
70311713SPavel.Tatashin@Sun.COM 
70411713SPavel.Tatashin@Sun.COM 	sfmmu_ctxdoms_update();
70511713SPavel.Tatashin@Sun.COM 	sfmmu_ctxdoms_unlock();
70611713SPavel.Tatashin@Sun.COM 
70711172SHaik.Aftandilian@Sun.COM 	/* Get new MD, update CPU mappings/relationships */
70811172SHaik.Aftandilian@Sun.COM 	if (suspend_update_cpu_mappings)
70911172SHaik.Aftandilian@Sun.COM 		update_cpu_mappings();
71011172SHaik.Aftandilian@Sun.COM 
71111172SHaik.Aftandilian@Sun.COM 	DBG("suspend: target tick: 0x%lx", gettick_counter());
71211172SHaik.Aftandilian@Sun.COM 	DBG("suspend: target stick: 0x%llx", gettick());
71311172SHaik.Aftandilian@Sun.COM 	DBG("suspend: user %%tick/%%stick emulation is %d",
71411172SHaik.Aftandilian@Sun.COM 	    tick_stick_emulation_active);
71511172SHaik.Aftandilian@Sun.COM 	DBG("suspend: finished");
71611172SHaik.Aftandilian@Sun.COM 
71711172SHaik.Aftandilian@Sun.COM 	return (0);
71811172SHaik.Aftandilian@Sun.COM }
719