111172SHaik.Aftandilian@Sun.COM /*
211172SHaik.Aftandilian@Sun.COM * CDDL HEADER START
311172SHaik.Aftandilian@Sun.COM *
411172SHaik.Aftandilian@Sun.COM * The contents of this file are subject to the terms of the
511172SHaik.Aftandilian@Sun.COM * Common Development and Distribution License (the "License").
611172SHaik.Aftandilian@Sun.COM * You may not use this file except in compliance with the License.
711172SHaik.Aftandilian@Sun.COM *
811172SHaik.Aftandilian@Sun.COM * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
911172SHaik.Aftandilian@Sun.COM * or http://www.opensolaris.org/os/licensing.
1011172SHaik.Aftandilian@Sun.COM * See the License for the specific language governing permissions
1111172SHaik.Aftandilian@Sun.COM * and limitations under the License.
1211172SHaik.Aftandilian@Sun.COM *
1311172SHaik.Aftandilian@Sun.COM * When distributing Covered Code, include this CDDL HEADER in each
1411172SHaik.Aftandilian@Sun.COM * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
1511172SHaik.Aftandilian@Sun.COM * If applicable, add the following below this CDDL HEADER, with the
1611172SHaik.Aftandilian@Sun.COM * fields enclosed by brackets "[]" replaced with your own identifying
1711172SHaik.Aftandilian@Sun.COM * information: Portions Copyright [yyyy] [name of copyright owner]
1811172SHaik.Aftandilian@Sun.COM *
1911172SHaik.Aftandilian@Sun.COM * CDDL HEADER END
2011172SHaik.Aftandilian@Sun.COM */
2111172SHaik.Aftandilian@Sun.COM /*
2212260SHaik.Aftandilian@Sun.COM * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
2311172SHaik.Aftandilian@Sun.COM */
2411172SHaik.Aftandilian@Sun.COM
2511172SHaik.Aftandilian@Sun.COM #include <sys/mutex.h>
2611172SHaik.Aftandilian@Sun.COM #include <sys/cpuvar.h>
2711172SHaik.Aftandilian@Sun.COM #include <sys/cyclic.h>
2811172SHaik.Aftandilian@Sun.COM #include <sys/disp.h>
2911172SHaik.Aftandilian@Sun.COM #include <sys/ddi.h>
3011172SHaik.Aftandilian@Sun.COM #include <sys/wdt.h>
3111172SHaik.Aftandilian@Sun.COM #include <sys/callb.h>
3211172SHaik.Aftandilian@Sun.COM #include <sys/cmn_err.h>
3311172SHaik.Aftandilian@Sun.COM #include <sys/hypervisor_api.h>
3411172SHaik.Aftandilian@Sun.COM #include <sys/membar.h>
3511172SHaik.Aftandilian@Sun.COM #include <sys/x_call.h>
3611172SHaik.Aftandilian@Sun.COM #include <sys/promif.h>
3711172SHaik.Aftandilian@Sun.COM #include <sys/systm.h>
3811172SHaik.Aftandilian@Sun.COM #include <sys/mach_descrip.h>
3911172SHaik.Aftandilian@Sun.COM #include <sys/cpu_module.h>
4011172SHaik.Aftandilian@Sun.COM #include <sys/pg.h>
4111172SHaik.Aftandilian@Sun.COM #include <sys/lgrp.h>
4211172SHaik.Aftandilian@Sun.COM #include <sys/sysmacros.h>
4311172SHaik.Aftandilian@Sun.COM #include <sys/sunddi.h>
4411172SHaik.Aftandilian@Sun.COM #include <sys/cpupart.h>
4511172SHaik.Aftandilian@Sun.COM #include <sys/hsvc.h>
4612013SHaik.Aftandilian@Sun.COM #include <sys/mpo.h>
4711713SPavel.Tatashin@Sun.COM #include <vm/hat_sfmmu.h>
4812015SHaik.Aftandilian@Sun.COM #include <sys/time.h>
4912015SHaik.Aftandilian@Sun.COM #include <sys/clock.h>
5011172SHaik.Aftandilian@Sun.COM
5111172SHaik.Aftandilian@Sun.COM /*
5211172SHaik.Aftandilian@Sun.COM * Sun4v OS Suspend
5311172SHaik.Aftandilian@Sun.COM *
5411172SHaik.Aftandilian@Sun.COM * Provides a means to suspend a sun4v guest domain by pausing CPUs and then
5511172SHaik.Aftandilian@Sun.COM * calling into the HV to initiate a suspension. Suspension is sequenced
5611172SHaik.Aftandilian@Sun.COM * externally by calling suspend_pre, suspend_start, and suspend_post.
5711172SHaik.Aftandilian@Sun.COM * suspend_pre and suspend_post are meant to perform any special operations
5811172SHaik.Aftandilian@Sun.COM * that should be done before or after a suspend/resume operation. e.g.,
5911172SHaik.Aftandilian@Sun.COM * callbacks to cluster software to disable heartbeat monitoring before the
6011172SHaik.Aftandilian@Sun.COM * system is suspended. suspend_start prepares kernel services to be suspended
6111172SHaik.Aftandilian@Sun.COM * and then suspends the domain by calling hv_guest_suspend.
6211172SHaik.Aftandilian@Sun.COM *
6311172SHaik.Aftandilian@Sun.COM * Special Handling for %tick and %stick Registers
6411172SHaik.Aftandilian@Sun.COM *
6511172SHaik.Aftandilian@Sun.COM * After a suspend/resume operation, the %tick and %stick registers may have
6611172SHaik.Aftandilian@Sun.COM * jumped forwards or backwards. The delta is assumed to be consistent across
6711172SHaik.Aftandilian@Sun.COM * all CPUs, within the negligible level of %tick and %stick variation
6811172SHaik.Aftandilian@Sun.COM * acceptable on a cold boot. In order to maintain increasing %tick and %stick
6911172SHaik.Aftandilian@Sun.COM * counter values without exposing large positive or negative jumps to kernel
7011172SHaik.Aftandilian@Sun.COM * or user code, a %tick and %stick offset is used. Kernel reads of these
7111172SHaik.Aftandilian@Sun.COM * counters return the sum of the hardware register counter and offset
7211172SHaik.Aftandilian@Sun.COM * variable. After a suspend/resume operation, user reads of %tick or %stick
7311172SHaik.Aftandilian@Sun.COM * are emulated. Suspend code enables emulation by setting the
7411172SHaik.Aftandilian@Sun.COM * %{tick,stick}.NPT fields which trigger a privileged instruction access
7511172SHaik.Aftandilian@Sun.COM * trap whenever the registers are read from user mode. If emulation has been
7611172SHaik.Aftandilian@Sun.COM * enabled, the trap handler emulates the instruction. Emulation is only
7711172SHaik.Aftandilian@Sun.COM * enabled during a successful suspend/resume operation. When emulation is
7811172SHaik.Aftandilian@Sun.COM * enabled, CPUs that are DR'd into the system will have their
7911172SHaik.Aftandilian@Sun.COM * %{tick,stick}.NPT bits set to 1 as well.
8011172SHaik.Aftandilian@Sun.COM */
8111172SHaik.Aftandilian@Sun.COM
8211172SHaik.Aftandilian@Sun.COM extern u_longlong_t gettick(void); /* returns %stick */
8311172SHaik.Aftandilian@Sun.COM extern uint64_t gettick_counter(void); /* returns %tick */
8411172SHaik.Aftandilian@Sun.COM extern uint64_t gettick_npt(void);
8511172SHaik.Aftandilian@Sun.COM extern uint64_t getstick_npt(void);
8611172SHaik.Aftandilian@Sun.COM extern int mach_descrip_update(void);
8711172SHaik.Aftandilian@Sun.COM extern cpuset_t cpu_ready_set;
8811172SHaik.Aftandilian@Sun.COM extern uint64_t native_tick_offset;
8911172SHaik.Aftandilian@Sun.COM extern uint64_t native_stick_offset;
9012015SHaik.Aftandilian@Sun.COM extern uint64_t sys_tick_freq;
9111172SHaik.Aftandilian@Sun.COM
9211172SHaik.Aftandilian@Sun.COM /*
9311172SHaik.Aftandilian@Sun.COM * Global Sun Cluster pre/post callbacks.
9411172SHaik.Aftandilian@Sun.COM */
9511172SHaik.Aftandilian@Sun.COM const char *(*cl_suspend_error_decode)(int);
9611172SHaik.Aftandilian@Sun.COM int (*cl_suspend_pre_callback)(void);
9711172SHaik.Aftandilian@Sun.COM int (*cl_suspend_post_callback)(void);
9811172SHaik.Aftandilian@Sun.COM #define SC_PRE_FAIL_STR_FMT "Sun Cluster pre-suspend failure: %d"
9911172SHaik.Aftandilian@Sun.COM #define SC_POST_FAIL_STR_FMT "Sun Cluster post-suspend failure: %d"
10011172SHaik.Aftandilian@Sun.COM #define SC_FAIL_STR_MAX 256
10111172SHaik.Aftandilian@Sun.COM
10211172SHaik.Aftandilian@Sun.COM /*
10311172SHaik.Aftandilian@Sun.COM * The minimum major and minor version of the HSVC_GROUP_CORE API group
10411172SHaik.Aftandilian@Sun.COM * required in order to use OS suspend.
10511172SHaik.Aftandilian@Sun.COM */
10611172SHaik.Aftandilian@Sun.COM #define SUSPEND_CORE_MAJOR 1
10711172SHaik.Aftandilian@Sun.COM #define SUSPEND_CORE_MINOR 2
10811172SHaik.Aftandilian@Sun.COM
10911172SHaik.Aftandilian@Sun.COM /*
11011172SHaik.Aftandilian@Sun.COM * By default, sun4v OS suspend is supported if the required HV version
11111172SHaik.Aftandilian@Sun.COM * is present. suspend_disabled should be set on platforms that do not
11211172SHaik.Aftandilian@Sun.COM * allow OS suspend regardless of whether or not the HV supports it.
11311172SHaik.Aftandilian@Sun.COM * It can also be set in /etc/system.
11411172SHaik.Aftandilian@Sun.COM */
11511172SHaik.Aftandilian@Sun.COM static int suspend_disabled = 0;
11611172SHaik.Aftandilian@Sun.COM
11711172SHaik.Aftandilian@Sun.COM /*
11811172SHaik.Aftandilian@Sun.COM * Controls whether or not user-land tick and stick register emulation
11911172SHaik.Aftandilian@Sun.COM * will be enabled following a successful suspend operation.
12011172SHaik.Aftandilian@Sun.COM */
12111172SHaik.Aftandilian@Sun.COM static int enable_user_tick_stick_emulation = 1;
12211172SHaik.Aftandilian@Sun.COM
12311172SHaik.Aftandilian@Sun.COM /*
12411172SHaik.Aftandilian@Sun.COM * Indicates whether or not tick and stick emulation is currently active.
12511172SHaik.Aftandilian@Sun.COM * After a successful suspend operation, if emulation is enabled, this
12611172SHaik.Aftandilian@Sun.COM * variable is set to B_TRUE. Global scope to allow emulation code to
12711172SHaik.Aftandilian@Sun.COM * check if emulation is active.
12811172SHaik.Aftandilian@Sun.COM */
12911172SHaik.Aftandilian@Sun.COM boolean_t tick_stick_emulation_active = B_FALSE;
13011172SHaik.Aftandilian@Sun.COM
13111172SHaik.Aftandilian@Sun.COM /*
13211713SPavel.Tatashin@Sun.COM * When non-zero, after a successful suspend and resume, cpunodes, CPU HW
13311713SPavel.Tatashin@Sun.COM * sharing data structures, and processor groups will be updated using
13411713SPavel.Tatashin@Sun.COM * information from the updated MD.
13511172SHaik.Aftandilian@Sun.COM */
13611172SHaik.Aftandilian@Sun.COM static int suspend_update_cpu_mappings = 1;
13711172SHaik.Aftandilian@Sun.COM
13811172SHaik.Aftandilian@Sun.COM /*
13912015SHaik.Aftandilian@Sun.COM * The maximum number of microseconds by which the %tick or %stick register
14012015SHaik.Aftandilian@Sun.COM * can vary between any two CPUs in the system. To calculate the
14112015SHaik.Aftandilian@Sun.COM * native_stick_offset and native_tick_offset, we measure the change in these
14212015SHaik.Aftandilian@Sun.COM * registers on one CPU over a suspend/resume. Other CPUs may experience
14312015SHaik.Aftandilian@Sun.COM * slightly larger or smaller changes. %tick and %stick should be synchronized
14412015SHaik.Aftandilian@Sun.COM * between CPUs, but there may be some variation. So we add an additional value
14512015SHaik.Aftandilian@Sun.COM * derived from this variable to ensure that these registers always increase
14612015SHaik.Aftandilian@Sun.COM * over a suspend/resume operation, assuming all %tick and %stick registers
14712015SHaik.Aftandilian@Sun.COM * are synchronized (within a certain limit) across CPUs in the system. The
14812015SHaik.Aftandilian@Sun.COM * delta between %sticks on different CPUs should be a small number of cycles,
14912015SHaik.Aftandilian@Sun.COM * not perceptible to readers of %stick that migrate between CPUs. We set this
15012015SHaik.Aftandilian@Sun.COM * value to 1 millisecond which means that over a suspend/resume operation,
15112015SHaik.Aftandilian@Sun.COM * all CPU's %tick and %stick will advance forwards as long as, across all
15212015SHaik.Aftandilian@Sun.COM * CPUs, the %tick and %stick are synchronized to within 1 ms. This applies to
15312015SHaik.Aftandilian@Sun.COM * CPUs before the suspend and CPUs after the resume. 1 ms is conservative,
15412015SHaik.Aftandilian@Sun.COM * but small enough to not trigger TOD faults.
15512015SHaik.Aftandilian@Sun.COM */
15612015SHaik.Aftandilian@Sun.COM static uint64_t suspend_tick_stick_max_delta = 1000; /* microseconds */
15712015SHaik.Aftandilian@Sun.COM
15812015SHaik.Aftandilian@Sun.COM /*
15912260SHaik.Aftandilian@Sun.COM * The number of times the system has been suspended and resumed.
16012260SHaik.Aftandilian@Sun.COM */
16112260SHaik.Aftandilian@Sun.COM static uint64_t suspend_count = 0;
16212260SHaik.Aftandilian@Sun.COM
16312260SHaik.Aftandilian@Sun.COM /*
16411172SHaik.Aftandilian@Sun.COM * DBG and DBG_PROM() macro.
16511172SHaik.Aftandilian@Sun.COM */
16611172SHaik.Aftandilian@Sun.COM #ifdef DEBUG
16711172SHaik.Aftandilian@Sun.COM
16811172SHaik.Aftandilian@Sun.COM static int suspend_debug_flag = 0;
16911172SHaik.Aftandilian@Sun.COM
17011172SHaik.Aftandilian@Sun.COM #define DBG_PROM \
17111172SHaik.Aftandilian@Sun.COM if (suspend_debug_flag) \
17211172SHaik.Aftandilian@Sun.COM prom_printf
17311172SHaik.Aftandilian@Sun.COM
17411172SHaik.Aftandilian@Sun.COM #define DBG \
17511172SHaik.Aftandilian@Sun.COM if (suspend_debug_flag) \
17611172SHaik.Aftandilian@Sun.COM suspend_debug
17711172SHaik.Aftandilian@Sun.COM
17811172SHaik.Aftandilian@Sun.COM static void
suspend_debug(const char * fmt,...)17911172SHaik.Aftandilian@Sun.COM suspend_debug(const char *fmt, ...)
18011172SHaik.Aftandilian@Sun.COM {
18111172SHaik.Aftandilian@Sun.COM char buf[512];
18211172SHaik.Aftandilian@Sun.COM va_list ap;
18311172SHaik.Aftandilian@Sun.COM
18411172SHaik.Aftandilian@Sun.COM va_start(ap, fmt);
18511172SHaik.Aftandilian@Sun.COM (void) vsprintf(buf, fmt, ap);
18611172SHaik.Aftandilian@Sun.COM va_end(ap);
18711172SHaik.Aftandilian@Sun.COM
18811172SHaik.Aftandilian@Sun.COM cmn_err(CE_NOTE, "%s", buf);
18911172SHaik.Aftandilian@Sun.COM }
19011172SHaik.Aftandilian@Sun.COM
19111172SHaik.Aftandilian@Sun.COM #else /* DEBUG */
19211172SHaik.Aftandilian@Sun.COM
19311172SHaik.Aftandilian@Sun.COM #define DBG_PROM
19411172SHaik.Aftandilian@Sun.COM #define DBG
19511172SHaik.Aftandilian@Sun.COM
19611172SHaik.Aftandilian@Sun.COM #endif /* DEBUG */
19711172SHaik.Aftandilian@Sun.COM
19811172SHaik.Aftandilian@Sun.COM /*
19911172SHaik.Aftandilian@Sun.COM * Return true if the HV supports OS suspend and if suspend has not been
20011172SHaik.Aftandilian@Sun.COM * disabled on this platform.
20111172SHaik.Aftandilian@Sun.COM */
20211172SHaik.Aftandilian@Sun.COM boolean_t
suspend_supported(void)20311172SHaik.Aftandilian@Sun.COM suspend_supported(void)
20411172SHaik.Aftandilian@Sun.COM {
20511172SHaik.Aftandilian@Sun.COM uint64_t major, minor;
20611172SHaik.Aftandilian@Sun.COM
20711172SHaik.Aftandilian@Sun.COM if (suspend_disabled)
20811172SHaik.Aftandilian@Sun.COM return (B_FALSE);
20911172SHaik.Aftandilian@Sun.COM
21011172SHaik.Aftandilian@Sun.COM if (hsvc_version(HSVC_GROUP_CORE, &major, &minor) != 0)
21111172SHaik.Aftandilian@Sun.COM return (B_FALSE);
21211172SHaik.Aftandilian@Sun.COM
21311172SHaik.Aftandilian@Sun.COM return ((major == SUSPEND_CORE_MAJOR && minor >= SUSPEND_CORE_MINOR) ||
21411172SHaik.Aftandilian@Sun.COM (major > SUSPEND_CORE_MAJOR));
21511172SHaik.Aftandilian@Sun.COM }
21611172SHaik.Aftandilian@Sun.COM
21711172SHaik.Aftandilian@Sun.COM /*
21812260SHaik.Aftandilian@Sun.COM * Memory DR is not permitted if the system has been suspended and resumed.
21912260SHaik.Aftandilian@Sun.COM * It is the responsibility of the caller of suspend_start and the DR
22012260SHaik.Aftandilian@Sun.COM * subsystem to serialize DR operations and suspend_memdr_allowed() checks.
22112260SHaik.Aftandilian@Sun.COM */
22212260SHaik.Aftandilian@Sun.COM boolean_t
suspend_memdr_allowed(void)22312260SHaik.Aftandilian@Sun.COM suspend_memdr_allowed(void)
22412260SHaik.Aftandilian@Sun.COM {
22512260SHaik.Aftandilian@Sun.COM return (suspend_count == 0);
22612260SHaik.Aftandilian@Sun.COM }
22712260SHaik.Aftandilian@Sun.COM
22812260SHaik.Aftandilian@Sun.COM /*
22912015SHaik.Aftandilian@Sun.COM * Given a source tick, stick, and tod value, set the tick and stick offsets
23012015SHaik.Aftandilian@Sun.COM * such that the (current physical register value) + offset == (source value)
23112015SHaik.Aftandilian@Sun.COM * and in addition account for some variation between the %tick/%stick on
23212015SHaik.Aftandilian@Sun.COM * different CPUs. We account for this variation by adding in double the value
23312015SHaik.Aftandilian@Sun.COM * of suspend_tick_stick_max_delta. The following is an explanation of why
23412015SHaik.Aftandilian@Sun.COM * suspend_tick_stick_max_delta must be multplied by two and added to
23512015SHaik.Aftandilian@Sun.COM * native_stick_offset.
23612015SHaik.Aftandilian@Sun.COM *
23712015SHaik.Aftandilian@Sun.COM * Consider a guest instance that is yet to be suspended with CPUs p0 and p1
23812015SHaik.Aftandilian@Sun.COM * with physical "source" %stick values s0 and s1 respectively. When the guest
23912015SHaik.Aftandilian@Sun.COM * is first resumed, the physical "target" %stick values are t0 and t1
24012015SHaik.Aftandilian@Sun.COM * respectively. The virtual %stick values after the resume are v0 and v1
24112015SHaik.Aftandilian@Sun.COM * respectively. Let x be the maximum difference between any two CPU's %stick
24212015SHaik.Aftandilian@Sun.COM * register at a given point in time and let the %stick values be assigned
24312015SHaik.Aftandilian@Sun.COM * such that
24412015SHaik.Aftandilian@Sun.COM *
24512015SHaik.Aftandilian@Sun.COM * s1 = s0 + x and
24612015SHaik.Aftandilian@Sun.COM * t1 = t0 - x
24712015SHaik.Aftandilian@Sun.COM *
24812015SHaik.Aftandilian@Sun.COM * Let us assume that p0 is driving the suspend and resume. Then, we will
24912015SHaik.Aftandilian@Sun.COM * calculate the stick offset f and the virtual %stick on p0 after the
25012015SHaik.Aftandilian@Sun.COM * resume as follows.
25112015SHaik.Aftandilian@Sun.COM *
25212015SHaik.Aftandilian@Sun.COM * f = s0 - t0 and
25312015SHaik.Aftandilian@Sun.COM * v0 = t0 + f
25412015SHaik.Aftandilian@Sun.COM *
25512015SHaik.Aftandilian@Sun.COM * We calculate the virtual %stick v1 on p1 after the resume as
25612015SHaik.Aftandilian@Sun.COM *
25712015SHaik.Aftandilian@Sun.COM * v1 = t1 + f
25812015SHaik.Aftandilian@Sun.COM *
25912015SHaik.Aftandilian@Sun.COM * Substitution yields
26012015SHaik.Aftandilian@Sun.COM *
26112015SHaik.Aftandilian@Sun.COM * v1 = t1 + (s0 - t0)
26212015SHaik.Aftandilian@Sun.COM * v1 = (t0 - x) + (s0 - t0)
26312015SHaik.Aftandilian@Sun.COM * v1 = -x + s0
26412015SHaik.Aftandilian@Sun.COM * v1 = s0 - x
26512015SHaik.Aftandilian@Sun.COM * v1 = (s1 - x) - x
26612015SHaik.Aftandilian@Sun.COM * v1 = s1 - 2x
26712015SHaik.Aftandilian@Sun.COM *
26812015SHaik.Aftandilian@Sun.COM * Therefore, in this scenario, without accounting for %stick variation in
26912015SHaik.Aftandilian@Sun.COM * the calculation of the native_stick_offset f, the virtual %stick on p1
27012015SHaik.Aftandilian@Sun.COM * is less than the value of the %stick on p1 before the suspend which is
27112015SHaik.Aftandilian@Sun.COM * unacceptable. By adding 2x to v1, we guarantee it will be equal to s1
27212015SHaik.Aftandilian@Sun.COM * which means the %stick on p1 after the resume will always be greater
27312015SHaik.Aftandilian@Sun.COM * than or equal to the %stick on p1 before the suspend. Since v1 = t1 + f
27412015SHaik.Aftandilian@Sun.COM * at any point in time, we can accomplish this by adding 2x to f. This
27512015SHaik.Aftandilian@Sun.COM * guarantees any processes bound to CPU P0 or P1 will not see a %stick
27612015SHaik.Aftandilian@Sun.COM * decrease across a suspend/resume. Hence, in the code below, we multiply
27712015SHaik.Aftandilian@Sun.COM * suspend_tick_stick_max_delta by two in the calculation for
27812015SHaik.Aftandilian@Sun.COM * native_stick_offset, native_tick_offset, and target_hrtime.
27911172SHaik.Aftandilian@Sun.COM */
28011172SHaik.Aftandilian@Sun.COM static void
set_tick_offsets(uint64_t source_tick,uint64_t source_stick,timestruc_t * tsp)28112015SHaik.Aftandilian@Sun.COM set_tick_offsets(uint64_t source_tick, uint64_t source_stick, timestruc_t *tsp)
28211172SHaik.Aftandilian@Sun.COM {
28311172SHaik.Aftandilian@Sun.COM uint64_t target_tick;
28411172SHaik.Aftandilian@Sun.COM uint64_t target_stick;
28512015SHaik.Aftandilian@Sun.COM hrtime_t source_hrtime;
28612015SHaik.Aftandilian@Sun.COM hrtime_t target_hrtime;
28711172SHaik.Aftandilian@Sun.COM
28812015SHaik.Aftandilian@Sun.COM /*
28912015SHaik.Aftandilian@Sun.COM * Temporarily set the offsets to zero so that the following reads
29012015SHaik.Aftandilian@Sun.COM * of the registers will yield physical unadjusted counter values.
29112015SHaik.Aftandilian@Sun.COM */
29211172SHaik.Aftandilian@Sun.COM native_tick_offset = 0;
29311172SHaik.Aftandilian@Sun.COM native_stick_offset = 0;
29411172SHaik.Aftandilian@Sun.COM
29511172SHaik.Aftandilian@Sun.COM target_tick = gettick_counter(); /* returns %tick */
29611172SHaik.Aftandilian@Sun.COM target_stick = gettick(); /* returns %stick */
29711172SHaik.Aftandilian@Sun.COM
29812015SHaik.Aftandilian@Sun.COM /*
29912015SHaik.Aftandilian@Sun.COM * Calculate the new offsets. In addition to the delta observed on
30012015SHaik.Aftandilian@Sun.COM * this CPU, add an additional value. Multiply the %tick/%stick
30112015SHaik.Aftandilian@Sun.COM * frequency by suspend_tick_stick_max_delta (us). Then, multiply by 2
30212015SHaik.Aftandilian@Sun.COM * to account for a delta between CPUs before the suspend and a
30312015SHaik.Aftandilian@Sun.COM * delta between CPUs after the resume.
30412015SHaik.Aftandilian@Sun.COM */
30512015SHaik.Aftandilian@Sun.COM native_tick_offset = (source_tick - target_tick) +
30612015SHaik.Aftandilian@Sun.COM (CPU->cpu_curr_clock * suspend_tick_stick_max_delta * 2 / MICROSEC);
30712015SHaik.Aftandilian@Sun.COM native_stick_offset = (source_stick - target_stick) +
30812015SHaik.Aftandilian@Sun.COM (sys_tick_freq * suspend_tick_stick_max_delta * 2 / MICROSEC);
30912015SHaik.Aftandilian@Sun.COM
31012015SHaik.Aftandilian@Sun.COM /*
31112015SHaik.Aftandilian@Sun.COM * We've effectively increased %stick and %tick by twice the value
31212015SHaik.Aftandilian@Sun.COM * of suspend_tick_stick_max_delta to account for variation across
31312015SHaik.Aftandilian@Sun.COM * CPUs. Now adjust the preserved TOD by the same amount.
31412015SHaik.Aftandilian@Sun.COM */
31512015SHaik.Aftandilian@Sun.COM source_hrtime = ts2hrt(tsp);
31612015SHaik.Aftandilian@Sun.COM target_hrtime = source_hrtime +
31712015SHaik.Aftandilian@Sun.COM (suspend_tick_stick_max_delta * 2 * (NANOSEC/MICROSEC));
31812015SHaik.Aftandilian@Sun.COM hrt2ts(target_hrtime, tsp);
31911172SHaik.Aftandilian@Sun.COM }
32011172SHaik.Aftandilian@Sun.COM
32111172SHaik.Aftandilian@Sun.COM /*
32211172SHaik.Aftandilian@Sun.COM * Set the {tick,stick}.NPT field to 1 on this CPU.
32311172SHaik.Aftandilian@Sun.COM */
32411172SHaik.Aftandilian@Sun.COM static void
enable_tick_stick_npt(void)32511172SHaik.Aftandilian@Sun.COM enable_tick_stick_npt(void)
32611172SHaik.Aftandilian@Sun.COM {
32711387SSurya.Prakki@Sun.COM (void) hv_stick_set_npt(1);
32811387SSurya.Prakki@Sun.COM (void) hv_tick_set_npt(1);
32911172SHaik.Aftandilian@Sun.COM }
33011172SHaik.Aftandilian@Sun.COM
33111172SHaik.Aftandilian@Sun.COM /*
33211172SHaik.Aftandilian@Sun.COM * Synchronize a CPU's {tick,stick}.NPT fields with the current state
33311172SHaik.Aftandilian@Sun.COM * of the system. This is used when a CPU is DR'd into the system.
33411172SHaik.Aftandilian@Sun.COM */
33511172SHaik.Aftandilian@Sun.COM void
suspend_sync_tick_stick_npt(void)33611172SHaik.Aftandilian@Sun.COM suspend_sync_tick_stick_npt(void)
33711172SHaik.Aftandilian@Sun.COM {
33811172SHaik.Aftandilian@Sun.COM if (tick_stick_emulation_active) {
33911172SHaik.Aftandilian@Sun.COM DBG("enabling {%%tick/%%stick}.NPT on CPU 0x%x", CPU->cpu_id);
34011387SSurya.Prakki@Sun.COM (void) hv_stick_set_npt(1);
34111387SSurya.Prakki@Sun.COM (void) hv_tick_set_npt(1);
34211172SHaik.Aftandilian@Sun.COM } else {
34311172SHaik.Aftandilian@Sun.COM ASSERT(gettick_npt() == 0);
34411172SHaik.Aftandilian@Sun.COM ASSERT(getstick_npt() == 0);
34511172SHaik.Aftandilian@Sun.COM }
34611172SHaik.Aftandilian@Sun.COM }
34711172SHaik.Aftandilian@Sun.COM
34811172SHaik.Aftandilian@Sun.COM /*
34911172SHaik.Aftandilian@Sun.COM * Obtain an updated MD from the hypervisor and update cpunodes, CPU HW
35011172SHaik.Aftandilian@Sun.COM * sharing data structures, and processor groups.
35111172SHaik.Aftandilian@Sun.COM */
35211172SHaik.Aftandilian@Sun.COM static void
update_cpu_mappings(void)35311172SHaik.Aftandilian@Sun.COM update_cpu_mappings(void)
35411172SHaik.Aftandilian@Sun.COM {
35511172SHaik.Aftandilian@Sun.COM md_t *mdp;
35611172SHaik.Aftandilian@Sun.COM processorid_t id;
35711172SHaik.Aftandilian@Sun.COM cpu_t *cp;
35811172SHaik.Aftandilian@Sun.COM cpu_pg_t *pgps[NCPU];
35911172SHaik.Aftandilian@Sun.COM
36011172SHaik.Aftandilian@Sun.COM if ((mdp = md_get_handle()) == NULL) {
36111172SHaik.Aftandilian@Sun.COM DBG("suspend: md_get_handle failed");
36211172SHaik.Aftandilian@Sun.COM return;
36311172SHaik.Aftandilian@Sun.COM }
36411172SHaik.Aftandilian@Sun.COM
36511172SHaik.Aftandilian@Sun.COM DBG("suspend: updating CPU mappings");
36611172SHaik.Aftandilian@Sun.COM
36711172SHaik.Aftandilian@Sun.COM mutex_enter(&cpu_lock);
36811172SHaik.Aftandilian@Sun.COM
36911172SHaik.Aftandilian@Sun.COM setup_chip_mappings(mdp);
37011172SHaik.Aftandilian@Sun.COM setup_exec_unit_mappings(mdp);
37111172SHaik.Aftandilian@Sun.COM for (id = 0; id < NCPU; id++) {
37211172SHaik.Aftandilian@Sun.COM if ((cp = cpu_get(id)) == NULL)
37311172SHaik.Aftandilian@Sun.COM continue;
37411172SHaik.Aftandilian@Sun.COM cpu_map_exec_units(cp);
37511172SHaik.Aftandilian@Sun.COM }
37611172SHaik.Aftandilian@Sun.COM
37711172SHaik.Aftandilian@Sun.COM /*
37811172SHaik.Aftandilian@Sun.COM * Re-calculate processor groups.
37911172SHaik.Aftandilian@Sun.COM *
38011172SHaik.Aftandilian@Sun.COM * First tear down all PG information before adding any new PG
38111172SHaik.Aftandilian@Sun.COM * information derived from the MD we just downloaded. We must
38211172SHaik.Aftandilian@Sun.COM * call pg_cpu_inactive and pg_cpu_active with CPUs paused and
38311172SHaik.Aftandilian@Sun.COM * we want to minimize the number of times pause_cpus is called.
38411172SHaik.Aftandilian@Sun.COM * Inactivating all CPUs would leave PGs without any active CPUs,
38511172SHaik.Aftandilian@Sun.COM * so while CPUs are paused, call pg_cpu_inactive and swap in the
38611172SHaik.Aftandilian@Sun.COM * bootstrap PG structure saving the original PG structure to be
38711172SHaik.Aftandilian@Sun.COM * fini'd afterwards. This prevents the dispatcher from encountering
388*12987SHaik.Aftandilian@Oracle.COM * PGs in which all CPUs are inactive. Offline CPUs are already
389*12987SHaik.Aftandilian@Oracle.COM * inactive in their PGs and shouldn't be reactivated, so we must
390*12987SHaik.Aftandilian@Oracle.COM * not call pg_cpu_inactive or pg_cpu_active for those CPUs.
39111172SHaik.Aftandilian@Sun.COM */
39211172SHaik.Aftandilian@Sun.COM pause_cpus(NULL);
39311172SHaik.Aftandilian@Sun.COM for (id = 0; id < NCPU; id++) {
39411172SHaik.Aftandilian@Sun.COM if ((cp = cpu_get(id)) == NULL)
39511172SHaik.Aftandilian@Sun.COM continue;
396*12987SHaik.Aftandilian@Oracle.COM if ((cp->cpu_flags & CPU_OFFLINE) == 0)
397*12987SHaik.Aftandilian@Oracle.COM pg_cpu_inactive(cp);
39811172SHaik.Aftandilian@Sun.COM pgps[id] = cp->cpu_pg;
39911172SHaik.Aftandilian@Sun.COM pg_cpu_bootstrap(cp);
40011172SHaik.Aftandilian@Sun.COM }
40111172SHaik.Aftandilian@Sun.COM start_cpus();
40211172SHaik.Aftandilian@Sun.COM
40311172SHaik.Aftandilian@Sun.COM /*
40411172SHaik.Aftandilian@Sun.COM * pg_cpu_fini* and pg_cpu_init* must be called while CPUs are
40511172SHaik.Aftandilian@Sun.COM * not paused. Use two separate loops here so that we do not
40611172SHaik.Aftandilian@Sun.COM * initialize PG data for CPUs until all the old PG data structures
40711172SHaik.Aftandilian@Sun.COM * are torn down.
40811172SHaik.Aftandilian@Sun.COM */
40911172SHaik.Aftandilian@Sun.COM for (id = 0; id < NCPU; id++) {
41011172SHaik.Aftandilian@Sun.COM if ((cp = cpu_get(id)) == NULL)
41111172SHaik.Aftandilian@Sun.COM continue;
41211172SHaik.Aftandilian@Sun.COM pg_cpu_fini(cp, pgps[id]);
41312013SHaik.Aftandilian@Sun.COM mpo_cpu_remove(id);
41411172SHaik.Aftandilian@Sun.COM }
41511172SHaik.Aftandilian@Sun.COM
41611172SHaik.Aftandilian@Sun.COM /*
41711172SHaik.Aftandilian@Sun.COM * Initialize PG data for each CPU, but leave the bootstrapped
41811172SHaik.Aftandilian@Sun.COM * PG structure in place to avoid running with any PGs containing
41911172SHaik.Aftandilian@Sun.COM * nothing but inactive CPUs.
42011172SHaik.Aftandilian@Sun.COM */
42111172SHaik.Aftandilian@Sun.COM for (id = 0; id < NCPU; id++) {
42211172SHaik.Aftandilian@Sun.COM if ((cp = cpu_get(id)) == NULL)
42311172SHaik.Aftandilian@Sun.COM continue;
42412013SHaik.Aftandilian@Sun.COM mpo_cpu_add(mdp, id);
42511172SHaik.Aftandilian@Sun.COM pgps[id] = pg_cpu_init(cp, B_TRUE);
42611172SHaik.Aftandilian@Sun.COM }
42711172SHaik.Aftandilian@Sun.COM
42811172SHaik.Aftandilian@Sun.COM /*
42911172SHaik.Aftandilian@Sun.COM * Now that PG data has been initialized for all CPUs in the
43011172SHaik.Aftandilian@Sun.COM * system, replace the bootstrapped PG structure with the
43111172SHaik.Aftandilian@Sun.COM * initialized PG structure and call pg_cpu_active for each CPU.
43211172SHaik.Aftandilian@Sun.COM */
43311172SHaik.Aftandilian@Sun.COM pause_cpus(NULL);
43411172SHaik.Aftandilian@Sun.COM for (id = 0; id < NCPU; id++) {
43511172SHaik.Aftandilian@Sun.COM if ((cp = cpu_get(id)) == NULL)
43611172SHaik.Aftandilian@Sun.COM continue;
43711172SHaik.Aftandilian@Sun.COM cp->cpu_pg = pgps[id];
438*12987SHaik.Aftandilian@Oracle.COM if ((cp->cpu_flags & CPU_OFFLINE) == 0)
439*12987SHaik.Aftandilian@Oracle.COM pg_cpu_active(cp);
44011172SHaik.Aftandilian@Sun.COM }
44111172SHaik.Aftandilian@Sun.COM start_cpus();
44211172SHaik.Aftandilian@Sun.COM
44311172SHaik.Aftandilian@Sun.COM mutex_exit(&cpu_lock);
44411172SHaik.Aftandilian@Sun.COM
44511172SHaik.Aftandilian@Sun.COM (void) md_fini_handle(mdp);
44611172SHaik.Aftandilian@Sun.COM }
44711172SHaik.Aftandilian@Sun.COM
44811172SHaik.Aftandilian@Sun.COM /*
44911172SHaik.Aftandilian@Sun.COM * Wrapper for the Sun Cluster error decoding function.
45011172SHaik.Aftandilian@Sun.COM */
45111172SHaik.Aftandilian@Sun.COM static int
cluster_error_decode(int error,char * error_reason,size_t max_reason_len)45211172SHaik.Aftandilian@Sun.COM cluster_error_decode(int error, char *error_reason, size_t max_reason_len)
45311172SHaik.Aftandilian@Sun.COM {
45411172SHaik.Aftandilian@Sun.COM const char *decoded;
45511172SHaik.Aftandilian@Sun.COM size_t decoded_len;
45611172SHaik.Aftandilian@Sun.COM
45711172SHaik.Aftandilian@Sun.COM ASSERT(error_reason != NULL);
45811172SHaik.Aftandilian@Sun.COM ASSERT(max_reason_len > 0);
45911172SHaik.Aftandilian@Sun.COM
46011172SHaik.Aftandilian@Sun.COM max_reason_len = MIN(max_reason_len, SC_FAIL_STR_MAX);
46111172SHaik.Aftandilian@Sun.COM
46211172SHaik.Aftandilian@Sun.COM if (cl_suspend_error_decode == NULL)
46311172SHaik.Aftandilian@Sun.COM return (-1);
46411172SHaik.Aftandilian@Sun.COM
46511172SHaik.Aftandilian@Sun.COM if ((decoded = (*cl_suspend_error_decode)(error)) == NULL)
46611172SHaik.Aftandilian@Sun.COM return (-1);
46711172SHaik.Aftandilian@Sun.COM
46811172SHaik.Aftandilian@Sun.COM /* Get number of non-NULL bytes */
46911172SHaik.Aftandilian@Sun.COM if ((decoded_len = strnlen(decoded, max_reason_len - 1)) == 0)
47011172SHaik.Aftandilian@Sun.COM return (-1);
47111172SHaik.Aftandilian@Sun.COM
47211172SHaik.Aftandilian@Sun.COM bcopy(decoded, error_reason, decoded_len);
47311172SHaik.Aftandilian@Sun.COM
47411172SHaik.Aftandilian@Sun.COM /*
47511172SHaik.Aftandilian@Sun.COM * The error string returned from cl_suspend_error_decode
47611172SHaik.Aftandilian@Sun.COM * should be NULL-terminated, but set the terminator here
47711172SHaik.Aftandilian@Sun.COM * because we only copied non-NULL bytes. If the decoded
47811172SHaik.Aftandilian@Sun.COM * string was not NULL-terminated, this guarantees that
47911172SHaik.Aftandilian@Sun.COM * error_reason will be.
48011172SHaik.Aftandilian@Sun.COM */
48111172SHaik.Aftandilian@Sun.COM error_reason[decoded_len] = '\0';
48211172SHaik.Aftandilian@Sun.COM
48311172SHaik.Aftandilian@Sun.COM return (0);
48411172SHaik.Aftandilian@Sun.COM }
48511172SHaik.Aftandilian@Sun.COM
48611172SHaik.Aftandilian@Sun.COM /*
48711172SHaik.Aftandilian@Sun.COM * Wrapper for the Sun Cluster pre-suspend callback.
48811172SHaik.Aftandilian@Sun.COM */
48911172SHaik.Aftandilian@Sun.COM static int
cluster_pre_wrapper(char * error_reason,size_t max_reason_len)49011172SHaik.Aftandilian@Sun.COM cluster_pre_wrapper(char *error_reason, size_t max_reason_len)
49111172SHaik.Aftandilian@Sun.COM {
49211172SHaik.Aftandilian@Sun.COM int rv = 0;
49311172SHaik.Aftandilian@Sun.COM
49411172SHaik.Aftandilian@Sun.COM if (cl_suspend_pre_callback != NULL) {
49511172SHaik.Aftandilian@Sun.COM rv = (*cl_suspend_pre_callback)();
49611172SHaik.Aftandilian@Sun.COM DBG("suspend: cl_suspend_pre_callback returned %d", rv);
49711172SHaik.Aftandilian@Sun.COM if (rv != 0 && error_reason != NULL && max_reason_len > 0) {
49811172SHaik.Aftandilian@Sun.COM if (cluster_error_decode(rv, error_reason,
49911172SHaik.Aftandilian@Sun.COM max_reason_len)) {
50011172SHaik.Aftandilian@Sun.COM (void) snprintf(error_reason, max_reason_len,
50111172SHaik.Aftandilian@Sun.COM SC_PRE_FAIL_STR_FMT, rv);
50211172SHaik.Aftandilian@Sun.COM }
50311172SHaik.Aftandilian@Sun.COM }
50411172SHaik.Aftandilian@Sun.COM }
50511172SHaik.Aftandilian@Sun.COM
50611172SHaik.Aftandilian@Sun.COM return (rv);
50711172SHaik.Aftandilian@Sun.COM }
50811172SHaik.Aftandilian@Sun.COM
50911172SHaik.Aftandilian@Sun.COM /*
51011172SHaik.Aftandilian@Sun.COM * Wrapper for the Sun Cluster post-suspend callback.
51111172SHaik.Aftandilian@Sun.COM */
51211172SHaik.Aftandilian@Sun.COM static int
cluster_post_wrapper(char * error_reason,size_t max_reason_len)51311172SHaik.Aftandilian@Sun.COM cluster_post_wrapper(char *error_reason, size_t max_reason_len)
51411172SHaik.Aftandilian@Sun.COM {
51511172SHaik.Aftandilian@Sun.COM int rv = 0;
51611172SHaik.Aftandilian@Sun.COM
51711172SHaik.Aftandilian@Sun.COM if (cl_suspend_post_callback != NULL) {
51811172SHaik.Aftandilian@Sun.COM rv = (*cl_suspend_post_callback)();
51911172SHaik.Aftandilian@Sun.COM DBG("suspend: cl_suspend_post_callback returned %d", rv);
52011172SHaik.Aftandilian@Sun.COM if (rv != 0 && error_reason != NULL && max_reason_len > 0) {
52111172SHaik.Aftandilian@Sun.COM if (cluster_error_decode(rv, error_reason,
52211172SHaik.Aftandilian@Sun.COM max_reason_len)) {
52311172SHaik.Aftandilian@Sun.COM (void) snprintf(error_reason,
52411172SHaik.Aftandilian@Sun.COM max_reason_len, SC_POST_FAIL_STR_FMT, rv);
52511172SHaik.Aftandilian@Sun.COM }
52611172SHaik.Aftandilian@Sun.COM }
52711172SHaik.Aftandilian@Sun.COM }
52811172SHaik.Aftandilian@Sun.COM
52911172SHaik.Aftandilian@Sun.COM return (rv);
53011172SHaik.Aftandilian@Sun.COM }
53111172SHaik.Aftandilian@Sun.COM
53211172SHaik.Aftandilian@Sun.COM /*
53311172SHaik.Aftandilian@Sun.COM * Execute pre-suspend callbacks preparing the system for a suspend operation.
53411172SHaik.Aftandilian@Sun.COM * Returns zero on success, non-zero on failure. Sets the recovered argument
53511172SHaik.Aftandilian@Sun.COM * to indicate whether or not callbacks could be undone in the event of a
53611172SHaik.Aftandilian@Sun.COM * failure--if callbacks were successfully undone, *recovered is set to B_TRUE,
53711172SHaik.Aftandilian@Sun.COM * otherwise *recovered is set to B_FALSE. Must be called successfully before
53811172SHaik.Aftandilian@Sun.COM * suspend_start can be called. Callers should first call suspend_support to
53911172SHaik.Aftandilian@Sun.COM * determine if OS suspend is supported.
54011172SHaik.Aftandilian@Sun.COM */
54111172SHaik.Aftandilian@Sun.COM int
suspend_pre(char * error_reason,size_t max_reason_len,boolean_t * recovered)54211172SHaik.Aftandilian@Sun.COM suspend_pre(char *error_reason, size_t max_reason_len, boolean_t *recovered)
54311172SHaik.Aftandilian@Sun.COM {
54411172SHaik.Aftandilian@Sun.COM int rv;
54511172SHaik.Aftandilian@Sun.COM
54611172SHaik.Aftandilian@Sun.COM ASSERT(recovered != NULL);
54711172SHaik.Aftandilian@Sun.COM
54811172SHaik.Aftandilian@Sun.COM /*
54911172SHaik.Aftandilian@Sun.COM * Return an error if suspend_pre is erreoneously called
55011172SHaik.Aftandilian@Sun.COM * when OS suspend is not supported.
55111172SHaik.Aftandilian@Sun.COM */
55211172SHaik.Aftandilian@Sun.COM ASSERT(suspend_supported());
55311172SHaik.Aftandilian@Sun.COM if (!suspend_supported()) {
55411172SHaik.Aftandilian@Sun.COM DBG("suspend: suspend_pre called without suspend support");
55511172SHaik.Aftandilian@Sun.COM *recovered = B_TRUE;
55611172SHaik.Aftandilian@Sun.COM return (ENOTSUP);
55711172SHaik.Aftandilian@Sun.COM }
55811172SHaik.Aftandilian@Sun.COM DBG("suspend: %s", __func__);
55911172SHaik.Aftandilian@Sun.COM
56011172SHaik.Aftandilian@Sun.COM rv = cluster_pre_wrapper(error_reason, max_reason_len);
56111172SHaik.Aftandilian@Sun.COM
56211172SHaik.Aftandilian@Sun.COM /*
56311172SHaik.Aftandilian@Sun.COM * At present, only one pre-suspend operation exists.
56411172SHaik.Aftandilian@Sun.COM * If it fails, no recovery needs to be done.
56511172SHaik.Aftandilian@Sun.COM */
56611172SHaik.Aftandilian@Sun.COM if (rv != 0 && recovered != NULL)
56711172SHaik.Aftandilian@Sun.COM *recovered = B_TRUE;
56811172SHaik.Aftandilian@Sun.COM
56911172SHaik.Aftandilian@Sun.COM return (rv);
57011172SHaik.Aftandilian@Sun.COM }
57111172SHaik.Aftandilian@Sun.COM
57211172SHaik.Aftandilian@Sun.COM /*
57311172SHaik.Aftandilian@Sun.COM * Execute post-suspend callbacks. Returns zero on success, non-zero on
57411172SHaik.Aftandilian@Sun.COM * failure. Must be called after suspend_start is called, regardless of
57511172SHaik.Aftandilian@Sun.COM * whether or not suspend_start is successful.
57611172SHaik.Aftandilian@Sun.COM */
57711172SHaik.Aftandilian@Sun.COM int
suspend_post(char * error_reason,size_t max_reason_len)57811172SHaik.Aftandilian@Sun.COM suspend_post(char *error_reason, size_t max_reason_len)
57911172SHaik.Aftandilian@Sun.COM {
58011172SHaik.Aftandilian@Sun.COM ASSERT(suspend_supported());
58111172SHaik.Aftandilian@Sun.COM DBG("suspend: %s", __func__);
58211172SHaik.Aftandilian@Sun.COM return (cluster_post_wrapper(error_reason, max_reason_len));
58311172SHaik.Aftandilian@Sun.COM }
58411172SHaik.Aftandilian@Sun.COM
58511172SHaik.Aftandilian@Sun.COM /*
58611172SHaik.Aftandilian@Sun.COM * Suspends the OS by pausing CPUs and calling into the HV to initiate
58711172SHaik.Aftandilian@Sun.COM * the suspend. When the HV routine hv_guest_suspend returns, the system
58811172SHaik.Aftandilian@Sun.COM * will be resumed. Must be called after a successful call to suspend_pre.
58911172SHaik.Aftandilian@Sun.COM * suspend_post must be called after suspend_start, whether or not
59011172SHaik.Aftandilian@Sun.COM * suspend_start returns an error.
59111172SHaik.Aftandilian@Sun.COM */
59211172SHaik.Aftandilian@Sun.COM /*ARGSUSED*/
59311172SHaik.Aftandilian@Sun.COM int
suspend_start(char * error_reason,size_t max_reason_len)59411172SHaik.Aftandilian@Sun.COM suspend_start(char *error_reason, size_t max_reason_len)
59511172SHaik.Aftandilian@Sun.COM {
59611172SHaik.Aftandilian@Sun.COM uint64_t source_tick;
59711172SHaik.Aftandilian@Sun.COM uint64_t source_stick;
59811172SHaik.Aftandilian@Sun.COM uint64_t rv;
59911172SHaik.Aftandilian@Sun.COM timestruc_t source_tod;
60011172SHaik.Aftandilian@Sun.COM int spl;
60111172SHaik.Aftandilian@Sun.COM
60211172SHaik.Aftandilian@Sun.COM ASSERT(suspend_supported());
60311172SHaik.Aftandilian@Sun.COM DBG("suspend: %s", __func__);
60411172SHaik.Aftandilian@Sun.COM
60511713SPavel.Tatashin@Sun.COM sfmmu_ctxdoms_lock();
60611713SPavel.Tatashin@Sun.COM
60711172SHaik.Aftandilian@Sun.COM mutex_enter(&cpu_lock);
60811172SHaik.Aftandilian@Sun.COM
60911172SHaik.Aftandilian@Sun.COM /* Suspend the watchdog */
61011172SHaik.Aftandilian@Sun.COM watchdog_suspend();
61111172SHaik.Aftandilian@Sun.COM
61211172SHaik.Aftandilian@Sun.COM /* Record the TOD */
61311172SHaik.Aftandilian@Sun.COM mutex_enter(&tod_lock);
61411172SHaik.Aftandilian@Sun.COM source_tod = tod_get();
61511172SHaik.Aftandilian@Sun.COM mutex_exit(&tod_lock);
61611172SHaik.Aftandilian@Sun.COM
61711172SHaik.Aftandilian@Sun.COM /* Pause all other CPUs */
61811172SHaik.Aftandilian@Sun.COM pause_cpus(NULL);
61911172SHaik.Aftandilian@Sun.COM DBG_PROM("suspend: CPUs paused\n");
62011172SHaik.Aftandilian@Sun.COM
62112015SHaik.Aftandilian@Sun.COM /* Suspend cyclics */
62211172SHaik.Aftandilian@Sun.COM cyclic_suspend();
62311172SHaik.Aftandilian@Sun.COM DBG_PROM("suspend: cyclics suspended\n");
62412015SHaik.Aftandilian@Sun.COM
62512015SHaik.Aftandilian@Sun.COM /* Disable interrupts */
62611172SHaik.Aftandilian@Sun.COM spl = spl8();
62712015SHaik.Aftandilian@Sun.COM DBG_PROM("suspend: spl8()\n");
62811172SHaik.Aftandilian@Sun.COM
62911172SHaik.Aftandilian@Sun.COM source_tick = gettick_counter();
63011172SHaik.Aftandilian@Sun.COM source_stick = gettick();
63111172SHaik.Aftandilian@Sun.COM DBG_PROM("suspend: source_tick: 0x%lx\n", source_tick);
63211172SHaik.Aftandilian@Sun.COM DBG_PROM("suspend: source_stick: 0x%lx\n", source_stick);
63311172SHaik.Aftandilian@Sun.COM
63411172SHaik.Aftandilian@Sun.COM /*
63512015SHaik.Aftandilian@Sun.COM * Call into the HV to initiate the suspend. hv_guest_suspend()
63612015SHaik.Aftandilian@Sun.COM * returns after the guest has been resumed or if the suspend
63712015SHaik.Aftandilian@Sun.COM * operation failed or was cancelled. After a successful suspend,
63812015SHaik.Aftandilian@Sun.COM * the %tick and %stick registers may have changed by an amount
63912015SHaik.Aftandilian@Sun.COM * that is not proportional to the amount of time that has passed.
64012015SHaik.Aftandilian@Sun.COM * They may have jumped forwards or backwards. Some variation is
64112015SHaik.Aftandilian@Sun.COM * allowed and accounted for using suspend_tick_stick_max_delta,
64212015SHaik.Aftandilian@Sun.COM * but otherwise this jump must be uniform across all CPUs and we
64312015SHaik.Aftandilian@Sun.COM * operate under the assumption that it is (maintaining two global
64412015SHaik.Aftandilian@Sun.COM * offset variables--one for %tick and one for %stick.)
64511172SHaik.Aftandilian@Sun.COM */
64611172SHaik.Aftandilian@Sun.COM DBG_PROM("suspend: suspending... \n");
64711172SHaik.Aftandilian@Sun.COM rv = hv_guest_suspend();
64811172SHaik.Aftandilian@Sun.COM if (rv != 0) {
64911172SHaik.Aftandilian@Sun.COM splx(spl);
65011172SHaik.Aftandilian@Sun.COM cyclic_resume();
65111172SHaik.Aftandilian@Sun.COM start_cpus();
65211172SHaik.Aftandilian@Sun.COM watchdog_resume();
65311172SHaik.Aftandilian@Sun.COM mutex_exit(&cpu_lock);
65411713SPavel.Tatashin@Sun.COM sfmmu_ctxdoms_unlock();
65511172SHaik.Aftandilian@Sun.COM DBG("suspend: failed, rv: %ld\n", rv);
65611172SHaik.Aftandilian@Sun.COM return (rv);
65711172SHaik.Aftandilian@Sun.COM }
65811172SHaik.Aftandilian@Sun.COM
65912260SHaik.Aftandilian@Sun.COM suspend_count++;
66012260SHaik.Aftandilian@Sun.COM
66112015SHaik.Aftandilian@Sun.COM /* Update the global tick and stick offsets and the preserved TOD */
66212015SHaik.Aftandilian@Sun.COM set_tick_offsets(source_tick, source_stick, &source_tod);
66311172SHaik.Aftandilian@Sun.COM
66411172SHaik.Aftandilian@Sun.COM /* Ensure new offsets are globally visible before resuming CPUs */
66511172SHaik.Aftandilian@Sun.COM membar_sync();
66611172SHaik.Aftandilian@Sun.COM
66711172SHaik.Aftandilian@Sun.COM /* Enable interrupts */
66811172SHaik.Aftandilian@Sun.COM splx(spl);
66911172SHaik.Aftandilian@Sun.COM
67011172SHaik.Aftandilian@Sun.COM /* Set the {%tick,%stick}.NPT bits on all CPUs */
67111172SHaik.Aftandilian@Sun.COM if (enable_user_tick_stick_emulation) {
67211172SHaik.Aftandilian@Sun.COM xc_all((xcfunc_t *)enable_tick_stick_npt, NULL, NULL);
67311172SHaik.Aftandilian@Sun.COM xt_sync(cpu_ready_set);
67411172SHaik.Aftandilian@Sun.COM ASSERT(gettick_npt() != 0);
67511172SHaik.Aftandilian@Sun.COM ASSERT(getstick_npt() != 0);
67611172SHaik.Aftandilian@Sun.COM }
67711172SHaik.Aftandilian@Sun.COM
67811172SHaik.Aftandilian@Sun.COM /* If emulation is enabled, but not currently active, enable it */
67911172SHaik.Aftandilian@Sun.COM if (enable_user_tick_stick_emulation && !tick_stick_emulation_active) {
68011172SHaik.Aftandilian@Sun.COM tick_stick_emulation_active = B_TRUE;
68111172SHaik.Aftandilian@Sun.COM }
68211172SHaik.Aftandilian@Sun.COM
68311713SPavel.Tatashin@Sun.COM sfmmu_ctxdoms_remove();
68411713SPavel.Tatashin@Sun.COM
68511172SHaik.Aftandilian@Sun.COM /* Resume cyclics, unpause CPUs */
68611172SHaik.Aftandilian@Sun.COM cyclic_resume();
68711172SHaik.Aftandilian@Sun.COM start_cpus();
68811172SHaik.Aftandilian@Sun.COM
68911172SHaik.Aftandilian@Sun.COM /* Set the TOD */
69011172SHaik.Aftandilian@Sun.COM mutex_enter(&tod_lock);
69111172SHaik.Aftandilian@Sun.COM tod_set(source_tod);
69211172SHaik.Aftandilian@Sun.COM mutex_exit(&tod_lock);
69311172SHaik.Aftandilian@Sun.COM
69411172SHaik.Aftandilian@Sun.COM /* Re-enable the watchdog */
69511172SHaik.Aftandilian@Sun.COM watchdog_resume();
69611172SHaik.Aftandilian@Sun.COM
69711172SHaik.Aftandilian@Sun.COM mutex_exit(&cpu_lock);
69811172SHaik.Aftandilian@Sun.COM
69911713SPavel.Tatashin@Sun.COM /* Download the latest MD */
70011713SPavel.Tatashin@Sun.COM if ((rv = mach_descrip_update()) != 0)
70111713SPavel.Tatashin@Sun.COM cmn_err(CE_PANIC, "suspend: mach_descrip_update failed: %ld",
70211713SPavel.Tatashin@Sun.COM rv);
70311713SPavel.Tatashin@Sun.COM
70411713SPavel.Tatashin@Sun.COM sfmmu_ctxdoms_update();
70511713SPavel.Tatashin@Sun.COM sfmmu_ctxdoms_unlock();
70611713SPavel.Tatashin@Sun.COM
70711172SHaik.Aftandilian@Sun.COM /* Get new MD, update CPU mappings/relationships */
70811172SHaik.Aftandilian@Sun.COM if (suspend_update_cpu_mappings)
70911172SHaik.Aftandilian@Sun.COM update_cpu_mappings();
71011172SHaik.Aftandilian@Sun.COM
71111172SHaik.Aftandilian@Sun.COM DBG("suspend: target tick: 0x%lx", gettick_counter());
71211172SHaik.Aftandilian@Sun.COM DBG("suspend: target stick: 0x%llx", gettick());
71311172SHaik.Aftandilian@Sun.COM DBG("suspend: user %%tick/%%stick emulation is %d",
71411172SHaik.Aftandilian@Sun.COM tick_stick_emulation_active);
71511172SHaik.Aftandilian@Sun.COM DBG("suspend: finished");
71611172SHaik.Aftandilian@Sun.COM
71711172SHaik.Aftandilian@Sun.COM return (0);
71811172SHaik.Aftandilian@Sun.COM }
719