15084Sjohnlev /*
25084Sjohnlev * CDDL HEADER START
35084Sjohnlev *
45084Sjohnlev * The contents of this file are subject to the terms of the
55084Sjohnlev * Common Development and Distribution License (the "License").
65084Sjohnlev * You may not use this file except in compliance with the License.
75084Sjohnlev *
85084Sjohnlev * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
95084Sjohnlev * or http://www.opensolaris.org/os/licensing.
105084Sjohnlev * See the License for the specific language governing permissions
115084Sjohnlev * and limitations under the License.
125084Sjohnlev *
135084Sjohnlev * When distributing Covered Code, include this CDDL HEADER in each
145084Sjohnlev * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
155084Sjohnlev * If applicable, add the following below this CDDL HEADER, with the
165084Sjohnlev * fields enclosed by brackets "[]" replaced with your own identifying
175084Sjohnlev * information: Portions Copyright [yyyy] [name of copyright owner]
185084Sjohnlev *
195084Sjohnlev * CDDL HEADER END
205084Sjohnlev */
215084Sjohnlev
225084Sjohnlev /*
2310175SStuart.Maybee@Sun.COM * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
245084Sjohnlev * Use is subject to license terms.
255084Sjohnlev */
265084Sjohnlev
275084Sjohnlev /* derived from netbsd's xen_machdep.c 1.1.2.1 */
285084Sjohnlev
295084Sjohnlev /*
305084Sjohnlev *
315084Sjohnlev * Copyright (c) 2004 Christian Limpach.
325084Sjohnlev * All rights reserved.
335084Sjohnlev *
345084Sjohnlev * Redistribution and use in source and binary forms, with or without
355084Sjohnlev * modification, are permitted provided that the following conditions
365084Sjohnlev * are met:
375084Sjohnlev * 1. Redistributions of source code must retain the above copyright
385084Sjohnlev * notice, this list of conditions and the following disclaimer.
395084Sjohnlev * 2. Redistributions in binary form must reproduce the above copyright
405084Sjohnlev * notice, this list of conditions and the following disclaimer in the
415084Sjohnlev * documentation and/or other materials provided with the distribution.
425084Sjohnlev * 3. This section intentionally left blank.
435084Sjohnlev * 4. The name of the author may not be used to endorse or promote products
445084Sjohnlev * derived from this software without specific prior written permission.
455084Sjohnlev *
465084Sjohnlev * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
475084Sjohnlev * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
485084Sjohnlev * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
495084Sjohnlev * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
505084Sjohnlev * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
515084Sjohnlev * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
525084Sjohnlev * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
535084Sjohnlev * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
545084Sjohnlev * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
555084Sjohnlev * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
565084Sjohnlev */
575084Sjohnlev /*
585084Sjohnlev * Section 3 of the above license was updated in response to bug 6379571.
595084Sjohnlev */
605084Sjohnlev
6110175SStuart.Maybee@Sun.COM #include <sys/xpv_user.h>
6210175SStuart.Maybee@Sun.COM
6310175SStuart.Maybee@Sun.COM /* XXX 3.3. TODO remove this include */
6410175SStuart.Maybee@Sun.COM #include <xen/public/arch-x86/xen-mca.h>
6510175SStuart.Maybee@Sun.COM
665199Sgarypen #include <sys/ctype.h>
675084Sjohnlev #include <sys/types.h>
685084Sjohnlev #include <sys/cmn_err.h>
695084Sjohnlev #include <sys/trap.h>
705084Sjohnlev #include <sys/segments.h>
715084Sjohnlev #include <sys/hypervisor.h>
725084Sjohnlev #include <sys/xen_mmu.h>
735084Sjohnlev #include <sys/machsystm.h>
745084Sjohnlev #include <sys/promif.h>
755084Sjohnlev #include <sys/bootconf.h>
765084Sjohnlev #include <sys/bootinfo.h>
775084Sjohnlev #include <sys/cpr.h>
785084Sjohnlev #include <sys/taskq.h>
795084Sjohnlev #include <sys/uadmin.h>
805084Sjohnlev #include <sys/evtchn_impl.h>
815084Sjohnlev #include <sys/archsystm.h>
825084Sjohnlev #include <xen/sys/xenbus_impl.h>
835084Sjohnlev #include <sys/mach_mmu.h>
845084Sjohnlev #include <vm/hat_i86.h>
855084Sjohnlev #include <sys/gnttab.h>
865084Sjohnlev #include <sys/reboot.h>
875084Sjohnlev #include <sys/stack.h>
885084Sjohnlev #include <sys/clock.h>
895084Sjohnlev #include <sys/bitmap.h>
905084Sjohnlev #include <sys/processor.h>
915084Sjohnlev #include <sys/xen_errno.h>
925084Sjohnlev #include <sys/xpv_panic.h>
935084Sjohnlev #include <sys/smp_impldefs.h>
945084Sjohnlev #include <sys/cpu.h>
955084Sjohnlev #include <sys/balloon_impl.h>
965084Sjohnlev #include <sys/ddi.h>
975084Sjohnlev
985084Sjohnlev #ifdef DEBUG
995084Sjohnlev #define SUSPEND_DEBUG if (xen_suspend_debug) xen_printf
1005084Sjohnlev #else
1015084Sjohnlev #define SUSPEND_DEBUG(...)
1025084Sjohnlev #endif
1035084Sjohnlev
1045084Sjohnlev int cpr_debug;
1055084Sjohnlev cpuset_t cpu_suspend_lost_set;
1065084Sjohnlev static int xen_suspend_debug;
1075084Sjohnlev
1087532SSean.Ye@Sun.COM uint_t xen_phys_ncpus;
1097532SSean.Ye@Sun.COM xen_mc_logical_cpu_t *xen_phys_cpus;
1107532SSean.Ye@Sun.COM int xen_physinfo_debug = 0;
1117532SSean.Ye@Sun.COM
1125199Sgarypen /*
1135199Sgarypen * Determine helpful version information.
1145199Sgarypen *
1155199Sgarypen * (And leave copies in the data segment so we can look at them later
1165199Sgarypen * with e.g. kmdb.)
1175199Sgarypen */
1185199Sgarypen
1195199Sgarypen typedef enum xen_version {
1205199Sgarypen XENVER_BOOT_IDX,
1215199Sgarypen XENVER_CURRENT_IDX
1225199Sgarypen } xen_version_t;
1235199Sgarypen
1245199Sgarypen struct xenver {
1255199Sgarypen ulong_t xv_major;
1265199Sgarypen ulong_t xv_minor;
1275199Sgarypen ulong_t xv_revision;
1285199Sgarypen xen_extraversion_t xv_ver;
1295316Sjohnlev ulong_t xv_is_xvm;
1305199Sgarypen xen_changeset_info_t xv_chgset;
1315199Sgarypen xen_compile_info_t xv_build;
1325199Sgarypen xen_capabilities_info_t xv_caps;
1335199Sgarypen } xenver[2];
1345199Sgarypen
1355199Sgarypen #define XENVER_BOOT(m) (xenver[XENVER_BOOT_IDX].m)
1365199Sgarypen #define XENVER_CURRENT(m) (xenver[XENVER_CURRENT_IDX].m)
1375199Sgarypen
1385199Sgarypen /*
1395199Sgarypen * Update the xenver data. We maintain two copies, boot and
1405199Sgarypen * current. If we are setting the boot, then also set current.
1415199Sgarypen */
1425199Sgarypen static void
xen_set_version(xen_version_t idx)1435199Sgarypen xen_set_version(xen_version_t idx)
1445199Sgarypen {
1455199Sgarypen ulong_t ver;
1465199Sgarypen
1475199Sgarypen bzero(&xenver[idx], sizeof (xenver[idx]));
1485199Sgarypen
1495199Sgarypen ver = HYPERVISOR_xen_version(XENVER_version, 0);
1505199Sgarypen
1515199Sgarypen xenver[idx].xv_major = BITX(ver, 31, 16);
1525199Sgarypen xenver[idx].xv_minor = BITX(ver, 15, 0);
1535199Sgarypen
1545199Sgarypen (void) HYPERVISOR_xen_version(XENVER_extraversion, &xenver[idx].xv_ver);
1555199Sgarypen
1565199Sgarypen /*
1575199Sgarypen * The revision is buried in the extraversion information that is
1585199Sgarypen * maintained by the hypervisor. For our purposes we expect that
1595199Sgarypen * the revision number is:
1605199Sgarypen * - the second character in the extraversion information
1615199Sgarypen * - one character long
1625199Sgarypen * - numeric digit
1635199Sgarypen * If it isn't then we can't extract the revision and we leave it
1645199Sgarypen * set to 0.
1655199Sgarypen */
1665199Sgarypen if (strlen(xenver[idx].xv_ver) > 1 && isdigit(xenver[idx].xv_ver[1]))
1675199Sgarypen xenver[idx].xv_revision = xenver[idx].xv_ver[1] - '0';
1685199Sgarypen else
1695199Sgarypen cmn_err(CE_WARN, "Cannot extract revision on this hypervisor "
1705199Sgarypen "version: v%s, unexpected version format",
1715199Sgarypen xenver[idx].xv_ver);
1725199Sgarypen
1735316Sjohnlev xenver[idx].xv_is_xvm = 0;
1745316Sjohnlev
175*11120SMark.Johnson@Sun.COM if (strstr(xenver[idx].xv_ver, "-xvm") != NULL)
1765316Sjohnlev xenver[idx].xv_is_xvm = 1;
1775316Sjohnlev
1785199Sgarypen (void) HYPERVISOR_xen_version(XENVER_changeset,
1795199Sgarypen &xenver[idx].xv_chgset);
1805199Sgarypen
1815199Sgarypen (void) HYPERVISOR_xen_version(XENVER_compile_info,
1825199Sgarypen &xenver[idx].xv_build);
1835199Sgarypen /*
1845199Sgarypen * Capabilities are a set of space separated ascii strings
1855199Sgarypen * e.g. 'xen-3.1-x86_32p' or 'hvm-3.2-x86_64'
1865199Sgarypen */
1875199Sgarypen (void) HYPERVISOR_xen_version(XENVER_capabilities,
1885199Sgarypen &xenver[idx].xv_caps);
1895199Sgarypen
1905199Sgarypen cmn_err(CE_CONT, "?v%lu.%lu%s chgset '%s'\n", xenver[idx].xv_major,
1915199Sgarypen xenver[idx].xv_minor, xenver[idx].xv_ver, xenver[idx].xv_chgset);
1925199Sgarypen
1935199Sgarypen if (idx == XENVER_BOOT_IDX)
1945199Sgarypen bcopy(&xenver[XENVER_BOOT_IDX], &xenver[XENVER_CURRENT_IDX],
1955199Sgarypen sizeof (xenver[XENVER_BOOT_IDX]));
1965199Sgarypen }
1975199Sgarypen
1985199Sgarypen typedef enum xen_hypervisor_check {
1995199Sgarypen XEN_RUN_CHECK,
2005199Sgarypen XEN_SUSPEND_CHECK
2015199Sgarypen } xen_hypervisor_check_t;
2025199Sgarypen
2035199Sgarypen /*
2045199Sgarypen * To run the hypervisor must be 3.0.4 or better. To suspend/resume
2055199Sgarypen * we need 3.0.4 or better and if it is 3.0.4. then it must be provided
2065199Sgarypen * by the Solaris xVM project.
2075199Sgarypen * Checking can be disabled for testing purposes by setting the
2085199Sgarypen * xen_suspend_debug variable.
2095199Sgarypen */
2105199Sgarypen static int
xen_hypervisor_supports_solaris(xen_hypervisor_check_t check)2115199Sgarypen xen_hypervisor_supports_solaris(xen_hypervisor_check_t check)
2125199Sgarypen {
2135199Sgarypen if (xen_suspend_debug == 1)
2145199Sgarypen return (1);
2155199Sgarypen if (XENVER_CURRENT(xv_major) < 3)
2165199Sgarypen return (0);
2175199Sgarypen if (XENVER_CURRENT(xv_major) > 3)
2185199Sgarypen return (1);
2195199Sgarypen if (XENVER_CURRENT(xv_minor) > 0)
2205199Sgarypen return (1);
2215199Sgarypen if (XENVER_CURRENT(xv_revision) < 4)
2225199Sgarypen return (0);
2235316Sjohnlev if (check == XEN_SUSPEND_CHECK && XENVER_CURRENT(xv_revision) == 4 &&
2245316Sjohnlev !XENVER_CURRENT(xv_is_xvm))
2255316Sjohnlev return (0);
2265316Sjohnlev
2275199Sgarypen return (1);
2285199Sgarypen }
2295199Sgarypen
2305316Sjohnlev /*
2315316Sjohnlev * If the hypervisor is -xvm, or 3.1.2 or higher, we don't need the
2325316Sjohnlev * workaround.
2335316Sjohnlev */
2345316Sjohnlev static void
xen_pte_workaround(void)2355316Sjohnlev xen_pte_workaround(void)
2365316Sjohnlev {
2375316Sjohnlev #if defined(__amd64)
2385316Sjohnlev extern int pt_kern;
2395316Sjohnlev
2405316Sjohnlev if (XENVER_CURRENT(xv_major) != 3)
2415316Sjohnlev return;
2425316Sjohnlev if (XENVER_CURRENT(xv_minor) > 1)
2435316Sjohnlev return;
2445316Sjohnlev if (XENVER_CURRENT(xv_minor) == 1 &&
2455316Sjohnlev XENVER_CURRENT(xv_revision) > 1)
2465316Sjohnlev return;
2475316Sjohnlev if (XENVER_CURRENT(xv_is_xvm))
2485316Sjohnlev return;
2495316Sjohnlev
2505316Sjohnlev pt_kern = PT_USER;
2515316Sjohnlev #endif
2525316Sjohnlev }
2535316Sjohnlev
2545084Sjohnlev void
xen_set_callback(void (* func)(void),uint_t type,uint_t flags)2555084Sjohnlev xen_set_callback(void (*func)(void), uint_t type, uint_t flags)
2565084Sjohnlev {
2575084Sjohnlev struct callback_register cb;
2585084Sjohnlev
2595084Sjohnlev bzero(&cb, sizeof (cb));
2605084Sjohnlev #if defined(__amd64)
2615084Sjohnlev cb.address = (ulong_t)func;
2625084Sjohnlev #elif defined(__i386)
2635084Sjohnlev cb.address.cs = KCS_SEL;
2645084Sjohnlev cb.address.eip = (ulong_t)func;
2655084Sjohnlev #endif
2665084Sjohnlev cb.type = type;
2675084Sjohnlev cb.flags = flags;
2685084Sjohnlev
2695084Sjohnlev /*
2705084Sjohnlev * XXPV always ignore return value for NMI
2715084Sjohnlev */
2725084Sjohnlev if (HYPERVISOR_callback_op(CALLBACKOP_register, &cb) != 0 &&
2735084Sjohnlev type != CALLBACKTYPE_nmi)
2745084Sjohnlev panic("HYPERVISOR_callback_op failed");
2755084Sjohnlev }
2765084Sjohnlev
2775084Sjohnlev void
xen_init_callbacks(void)2785084Sjohnlev xen_init_callbacks(void)
2795084Sjohnlev {
2805084Sjohnlev /*
2815084Sjohnlev * register event (interrupt) handler.
2825084Sjohnlev */
2835084Sjohnlev xen_set_callback(xen_callback, CALLBACKTYPE_event, 0);
2845084Sjohnlev
2855084Sjohnlev /*
2865084Sjohnlev * failsafe handler.
2875084Sjohnlev */
2885084Sjohnlev xen_set_callback(xen_failsafe_callback, CALLBACKTYPE_failsafe,
2895084Sjohnlev CALLBACKF_mask_events);
2905084Sjohnlev
2915084Sjohnlev /*
2925084Sjohnlev * NMI handler.
2935084Sjohnlev */
2945084Sjohnlev xen_set_callback(nmiint, CALLBACKTYPE_nmi, 0);
2955084Sjohnlev
2965084Sjohnlev /*
2975084Sjohnlev * system call handler
2985084Sjohnlev * XXPV move to init_cpu_syscall?
2995084Sjohnlev */
3005084Sjohnlev #if defined(__amd64)
3015084Sjohnlev xen_set_callback(sys_syscall, CALLBACKTYPE_syscall,
3025084Sjohnlev CALLBACKF_mask_events);
3035084Sjohnlev #endif /* __amd64 */
3045084Sjohnlev }
3055084Sjohnlev
3065084Sjohnlev
3075084Sjohnlev /*
3085084Sjohnlev * cmn_err() followed by a 1/4 second delay; this gives the
3095084Sjohnlev * logging service a chance to flush messages and helps avoid
3105084Sjohnlev * intermixing output from prom_printf().
3115084Sjohnlev * XXPV: doesn't exactly help us on UP though.
3125084Sjohnlev */
3135084Sjohnlev /*PRINTFLIKE2*/
3145084Sjohnlev void
cpr_err(int ce,const char * fmt,...)3155084Sjohnlev cpr_err(int ce, const char *fmt, ...)
3165084Sjohnlev {
3175084Sjohnlev va_list adx;
3185084Sjohnlev
3195084Sjohnlev va_start(adx, fmt);
3205084Sjohnlev vcmn_err(ce, fmt, adx);
3215084Sjohnlev va_end(adx);
3225084Sjohnlev drv_usecwait(MICROSEC >> 2);
3235084Sjohnlev }
3245084Sjohnlev
3255084Sjohnlev void
xen_suspend_devices(void)3265084Sjohnlev xen_suspend_devices(void)
3275084Sjohnlev {
3285084Sjohnlev int rc;
3295084Sjohnlev
3305084Sjohnlev SUSPEND_DEBUG("xen_suspend_devices\n");
3315084Sjohnlev
3325084Sjohnlev if ((rc = cpr_suspend_devices(ddi_root_node())) != 0)
3335084Sjohnlev panic("failed to suspend devices: %d", rc);
3345084Sjohnlev }
3355084Sjohnlev
3365084Sjohnlev void
xen_resume_devices(void)3375084Sjohnlev xen_resume_devices(void)
3385084Sjohnlev {
3395084Sjohnlev int rc;
3405084Sjohnlev
3415084Sjohnlev SUSPEND_DEBUG("xen_resume_devices\n");
3425084Sjohnlev
3435084Sjohnlev if ((rc = cpr_resume_devices(ddi_root_node(), 0)) != 0)
3445084Sjohnlev panic("failed to resume devices: %d", rc);
3455084Sjohnlev }
3465084Sjohnlev
3475084Sjohnlev /*
3485084Sjohnlev * The list of mfn pages is out of date. Recompute it.
3495084Sjohnlev */
3505084Sjohnlev static void
rebuild_mfn_list(void)3515084Sjohnlev rebuild_mfn_list(void)
3525084Sjohnlev {
3535084Sjohnlev int i = 0;
3545084Sjohnlev size_t sz;
3555084Sjohnlev size_t off;
3565084Sjohnlev pfn_t pfn;
3575084Sjohnlev
3585084Sjohnlev SUSPEND_DEBUG("rebuild_mfn_list\n");
3595084Sjohnlev
3605084Sjohnlev sz = ((mfn_count * sizeof (mfn_t)) + MMU_PAGEOFFSET) & MMU_PAGEMASK;
3615084Sjohnlev
3625084Sjohnlev for (off = 0; off < sz; off += MMU_PAGESIZE) {
3635084Sjohnlev size_t j = mmu_btop(off);
3645084Sjohnlev if (((j * sizeof (mfn_t)) & MMU_PAGEOFFSET) == 0) {
3655084Sjohnlev pfn = hat_getpfnum(kas.a_hat,
3665084Sjohnlev (caddr_t)&mfn_list_pages[j]);
3675084Sjohnlev mfn_list_pages_page[i++] = pfn_to_mfn(pfn);
3685084Sjohnlev }
3695084Sjohnlev
3705084Sjohnlev pfn = hat_getpfnum(kas.a_hat, (caddr_t)mfn_list + off);
3715084Sjohnlev mfn_list_pages[j] = pfn_to_mfn(pfn);
3725084Sjohnlev }
3735084Sjohnlev
3745084Sjohnlev pfn = hat_getpfnum(kas.a_hat, (caddr_t)mfn_list_pages_page);
3755084Sjohnlev HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list
3765084Sjohnlev = pfn_to_mfn(pfn);
3775084Sjohnlev }
3785084Sjohnlev
3795084Sjohnlev static void
suspend_cpus(void)3805084Sjohnlev suspend_cpus(void)
3815084Sjohnlev {
3825084Sjohnlev int i;
3835084Sjohnlev
3845084Sjohnlev SUSPEND_DEBUG("suspend_cpus\n");
3855084Sjohnlev
3865159Sjohnlev mp_enter_barrier();
3875084Sjohnlev
3885084Sjohnlev for (i = 1; i < ncpus; i++) {
3895084Sjohnlev if (!CPU_IN_SET(cpu_suspend_lost_set, i)) {
3905084Sjohnlev SUSPEND_DEBUG("xen_vcpu_down %d\n", i);
3915084Sjohnlev (void) xen_vcpu_down(i);
3925084Sjohnlev }
3935084Sjohnlev
3945084Sjohnlev mach_cpucontext_reset(cpu[i]);
3955084Sjohnlev }
3965084Sjohnlev }
3975084Sjohnlev
3985084Sjohnlev static void
resume_cpus(void)3995084Sjohnlev resume_cpus(void)
4005084Sjohnlev {
4015084Sjohnlev int i;
4025084Sjohnlev
4035084Sjohnlev for (i = 1; i < ncpus; i++) {
4045084Sjohnlev if (cpu[i] == NULL)
4055084Sjohnlev continue;
4065084Sjohnlev
4075084Sjohnlev if (!CPU_IN_SET(cpu_suspend_lost_set, i)) {
4085084Sjohnlev SUSPEND_DEBUG("xen_vcpu_up %d\n", i);
4095084Sjohnlev mach_cpucontext_restore(cpu[i]);
4105084Sjohnlev (void) xen_vcpu_up(i);
4115084Sjohnlev }
4125084Sjohnlev }
4135084Sjohnlev
4145159Sjohnlev mp_leave_barrier();
4155084Sjohnlev }
4165084Sjohnlev
4175084Sjohnlev /*
4185084Sjohnlev * Top level routine to direct suspend/resume of a domain.
4195084Sjohnlev */
4205084Sjohnlev void
xen_suspend_domain(void)4215084Sjohnlev xen_suspend_domain(void)
4225084Sjohnlev {
4235084Sjohnlev extern void rtcsync(void);
4245084Sjohnlev extern hrtime_t hres_last_tick;
4255084Sjohnlev mfn_t start_info_mfn;
4265084Sjohnlev ulong_t flags;
4275084Sjohnlev pfn_t pfn;
4285084Sjohnlev int i;
4295084Sjohnlev
4305084Sjohnlev /*
4315199Sgarypen * Check that we are happy to suspend on this hypervisor.
4325199Sgarypen */
4335199Sgarypen if (xen_hypervisor_supports_solaris(XEN_SUSPEND_CHECK) == 0) {
4345199Sgarypen cpr_err(CE_WARN, "Cannot suspend on this hypervisor "
4355199Sgarypen "version: v%lu.%lu%s, need at least version v3.0.4 or "
4365199Sgarypen "-xvm based hypervisor", XENVER_CURRENT(xv_major),
4375199Sgarypen XENVER_CURRENT(xv_minor), XENVER_CURRENT(xv_ver));
4385199Sgarypen return;
4395199Sgarypen }
4405199Sgarypen
4415199Sgarypen /*
4425084Sjohnlev * XXPV - Are we definitely OK to suspend by the time we've connected
4435084Sjohnlev * the handler?
4445084Sjohnlev */
4455084Sjohnlev
4465084Sjohnlev cpr_err(CE_NOTE, "Domain suspending for save/migrate");
4475084Sjohnlev
4485084Sjohnlev SUSPEND_DEBUG("xen_suspend_domain\n");
4495084Sjohnlev
4505084Sjohnlev /*
4515084Sjohnlev * suspend interrupts and devices
4525084Sjohnlev * XXPV - we use suspend/resume for both save/restore domains (like sun
4535084Sjohnlev * cpr) and for migration. Would be nice to know the difference if
4545084Sjohnlev * possible. For save/restore where down time may be a long time, we
4555084Sjohnlev * may want to do more of the things that cpr does. (i.e. notify user
4565084Sjohnlev * processes, shrink memory footprint for faster restore, etc.)
4575084Sjohnlev */
4585084Sjohnlev xen_suspend_devices();
4595084Sjohnlev SUSPEND_DEBUG("xenbus_suspend\n");
4605084Sjohnlev xenbus_suspend();
4615084Sjohnlev
4625084Sjohnlev pfn = hat_getpfnum(kas.a_hat, (caddr_t)xen_info);
4635084Sjohnlev start_info_mfn = pfn_to_mfn(pfn);
4645084Sjohnlev
4655084Sjohnlev /*
4665084Sjohnlev * XXPV: cpu hotplug can hold this under a xenbus watch. Are we safe
4675084Sjohnlev * wrt xenbus being suspended here?
4685084Sjohnlev */
4695084Sjohnlev mutex_enter(&cpu_lock);
4705084Sjohnlev
4715084Sjohnlev /*
4725084Sjohnlev * Suspend must be done on vcpu 0, as no context for other CPUs is
4735084Sjohnlev * saved.
4745084Sjohnlev *
4755084Sjohnlev * XXPV - add to taskq API ?
4765084Sjohnlev */
4775084Sjohnlev thread_affinity_set(curthread, 0);
4785084Sjohnlev kpreempt_disable();
4795084Sjohnlev
4805084Sjohnlev SUSPEND_DEBUG("xen_start_migrate\n");
4815084Sjohnlev xen_start_migrate();
4825084Sjohnlev if (ncpus > 1)
4835084Sjohnlev suspend_cpus();
4845084Sjohnlev
4855084Sjohnlev /*
4865084Sjohnlev * We can grab the ec_lock as it's a spinlock with a high SPL. Hence
4875084Sjohnlev * any holder would have dropped it to get through suspend_cpus().
4885084Sjohnlev */
4895084Sjohnlev mutex_enter(&ec_lock);
4905084Sjohnlev
4915084Sjohnlev /*
4925084Sjohnlev * From here on in, we can't take locks.
4935084Sjohnlev */
4945084Sjohnlev SUSPEND_DEBUG("ec_suspend\n");
4955084Sjohnlev ec_suspend();
4965084Sjohnlev SUSPEND_DEBUG("gnttab_suspend\n");
4975084Sjohnlev gnttab_suspend();
4985084Sjohnlev
4995084Sjohnlev flags = intr_clear();
5005084Sjohnlev
5015084Sjohnlev xpv_time_suspend();
5025084Sjohnlev
5035084Sjohnlev /*
5045084Sjohnlev * Currently, the hypervisor incorrectly fails to bring back
5055084Sjohnlev * powered-down VCPUs. Thus we need to record any powered-down VCPUs
5065084Sjohnlev * to prevent any attempts to operate on them. But we have to do this
5075084Sjohnlev * *after* the very first time we do ec_suspend().
5085084Sjohnlev */
5095084Sjohnlev for (i = 1; i < ncpus; i++) {
5105084Sjohnlev if (cpu[i] == NULL)
5115084Sjohnlev continue;
5125084Sjohnlev
5135084Sjohnlev if (cpu_get_state(cpu[i]) == P_POWEROFF)
5145084Sjohnlev CPUSET_ATOMIC_ADD(cpu_suspend_lost_set, i);
5155084Sjohnlev }
5165084Sjohnlev
5175084Sjohnlev /*
5185084Sjohnlev * The dom0 save/migrate code doesn't automatically translate
5195084Sjohnlev * these into PFNs, but expects them to be, so we do it here.
5205084Sjohnlev * We don't use mfn_to_pfn() because so many OS services have
5215084Sjohnlev * been disabled at this point.
5225084Sjohnlev */
5235084Sjohnlev xen_info->store_mfn = mfn_to_pfn_mapping[xen_info->store_mfn];
5245084Sjohnlev xen_info->console.domU.mfn =
5255084Sjohnlev mfn_to_pfn_mapping[xen_info->console.domU.mfn];
5265084Sjohnlev
5275084Sjohnlev if (CPU->cpu_m.mcpu_vcpu_info->evtchn_upcall_mask == 0) {
5285084Sjohnlev prom_printf("xen_suspend_domain(): "
5295084Sjohnlev "CPU->cpu_m.mcpu_vcpu_info->evtchn_upcall_mask not set\n");
5305084Sjohnlev (void) HYPERVISOR_shutdown(SHUTDOWN_crash);
5315084Sjohnlev }
5325084Sjohnlev
5335084Sjohnlev if (HYPERVISOR_update_va_mapping((uintptr_t)HYPERVISOR_shared_info,
5345084Sjohnlev 0, UVMF_INVLPG)) {
5355084Sjohnlev prom_printf("xen_suspend_domain(): "
5365084Sjohnlev "HYPERVISOR_update_va_mapping() failed\n");
5375084Sjohnlev (void) HYPERVISOR_shutdown(SHUTDOWN_crash);
5385084Sjohnlev }
5395084Sjohnlev
5405084Sjohnlev SUSPEND_DEBUG("HYPERVISOR_suspend\n");
5415084Sjohnlev
5425084Sjohnlev /*
5435084Sjohnlev * At this point we suspend and sometime later resume.
5445084Sjohnlev */
5455084Sjohnlev if (HYPERVISOR_suspend(start_info_mfn)) {
5465084Sjohnlev prom_printf("xen_suspend_domain(): "
5475084Sjohnlev "HYPERVISOR_suspend() failed\n");
5485084Sjohnlev (void) HYPERVISOR_shutdown(SHUTDOWN_crash);
5495084Sjohnlev }
5505084Sjohnlev
5515084Sjohnlev /*
5525084Sjohnlev * Point HYPERVISOR_shared_info to its new value.
5535084Sjohnlev */
5545084Sjohnlev if (HYPERVISOR_update_va_mapping((uintptr_t)HYPERVISOR_shared_info,
5555084Sjohnlev xen_info->shared_info | PT_NOCONSIST | PT_VALID | PT_WRITABLE,
5565084Sjohnlev UVMF_INVLPG))
5575084Sjohnlev (void) HYPERVISOR_shutdown(SHUTDOWN_crash);
5585084Sjohnlev
5595084Sjohnlev if (xen_info->nr_pages != mfn_count) {
5605084Sjohnlev prom_printf("xen_suspend_domain(): number of pages"
5615084Sjohnlev " changed, was 0x%lx, now 0x%lx\n", mfn_count,
5625084Sjohnlev xen_info->nr_pages);
5635084Sjohnlev (void) HYPERVISOR_shutdown(SHUTDOWN_crash);
5645084Sjohnlev }
5655084Sjohnlev
5665084Sjohnlev xpv_time_resume();
5675084Sjohnlev
5685084Sjohnlev cached_max_mfn = 0;
5695084Sjohnlev
5705084Sjohnlev SUSPEND_DEBUG("gnttab_resume\n");
5715084Sjohnlev gnttab_resume();
5725084Sjohnlev
5735084Sjohnlev /* XXPV: add a note that this must be lockless. */
5745084Sjohnlev SUSPEND_DEBUG("ec_resume\n");
5755084Sjohnlev ec_resume();
5765084Sjohnlev
5775084Sjohnlev intr_restore(flags);
5785084Sjohnlev
5795084Sjohnlev if (ncpus > 1)
5805084Sjohnlev resume_cpus();
5815084Sjohnlev
5825084Sjohnlev mutex_exit(&ec_lock);
5835084Sjohnlev xen_end_migrate();
5845084Sjohnlev mutex_exit(&cpu_lock);
5855084Sjohnlev
5865084Sjohnlev /*
5875084Sjohnlev * Now we can take locks again.
5885084Sjohnlev */
5895084Sjohnlev
5905084Sjohnlev /*
5915084Sjohnlev * Force the tick value used for tv_nsec in hres_tick() to be up to
5925084Sjohnlev * date. rtcsync() will reset the hrestime value appropriately.
5935084Sjohnlev */
5945084Sjohnlev hres_last_tick = xpv_gethrtime();
5955084Sjohnlev
5965084Sjohnlev /*
5975084Sjohnlev * XXPV: we need to have resumed the CPUs since this takes locks, but
5985084Sjohnlev * can remote CPUs see bad state? Presumably yes. Should probably nest
5995084Sjohnlev * taking of todlock inside of cpu_lock, or vice versa, then provide an
6005084Sjohnlev * unlocked version. Probably need to call clkinitf to reset cpu freq
6015084Sjohnlev * and re-calibrate if we migrated to a different speed cpu. Also need
6025084Sjohnlev * to make a (re)init_cpu_info call to update processor info structs
6035084Sjohnlev * and device tree info. That remains to be written at the moment.
6045084Sjohnlev */
6055084Sjohnlev rtcsync();
6065084Sjohnlev
6075084Sjohnlev rebuild_mfn_list();
6085084Sjohnlev
6095084Sjohnlev SUSPEND_DEBUG("xenbus_resume\n");
6105084Sjohnlev xenbus_resume();
6115084Sjohnlev SUSPEND_DEBUG("xenbus_resume_devices\n");
6125084Sjohnlev xen_resume_devices();
6135084Sjohnlev
6145084Sjohnlev thread_affinity_clear(curthread);
6155084Sjohnlev kpreempt_enable();
6165084Sjohnlev
6175084Sjohnlev SUSPEND_DEBUG("finished xen_suspend_domain\n");
6185199Sgarypen
6195199Sgarypen /*
6205199Sgarypen * We have restarted our suspended domain, update the hypervisor
6215199Sgarypen * details. NB: This must be done at the end of this function,
6225199Sgarypen * since we need the domain to be completely resumed before
6235199Sgarypen * these functions will work correctly.
6245199Sgarypen */
6255199Sgarypen xen_set_version(XENVER_CURRENT_IDX);
6265199Sgarypen
6275199Sgarypen /*
6285199Sgarypen * We can check and report a warning, but we don't stop the
6295199Sgarypen * process.
6305199Sgarypen */
6315199Sgarypen if (xen_hypervisor_supports_solaris(XEN_SUSPEND_CHECK) == 0)
6325199Sgarypen cmn_err(CE_WARN, "Found hypervisor version: v%lu.%lu%s "
6335199Sgarypen "but need at least version v3.0.4",
6345199Sgarypen XENVER_CURRENT(xv_major), XENVER_CURRENT(xv_minor),
6355199Sgarypen XENVER_CURRENT(xv_ver));
6365199Sgarypen
6375084Sjohnlev cmn_err(CE_NOTE, "domain restore/migrate completed");
6385084Sjohnlev }
6395084Sjohnlev
6405084Sjohnlev /*ARGSUSED*/
6415084Sjohnlev int
xen_debug_handler(void * arg)6425084Sjohnlev xen_debug_handler(void *arg)
6435084Sjohnlev {
6445084Sjohnlev debug_enter("External debug event received");
6455084Sjohnlev
6465084Sjohnlev /*
6475084Sjohnlev * If we've not got KMDB loaded, output some stuff difficult to capture
6485084Sjohnlev * from a domain core.
6495084Sjohnlev */
6505084Sjohnlev if (!(boothowto & RB_DEBUG)) {
6515084Sjohnlev shared_info_t *si = HYPERVISOR_shared_info;
6525084Sjohnlev int i;
6535084Sjohnlev
6545084Sjohnlev prom_printf("evtchn_pending [ ");
6555084Sjohnlev for (i = 0; i < 8; i++)
6565084Sjohnlev prom_printf("%lx ", si->evtchn_pending[i]);
6575084Sjohnlev prom_printf("]\nevtchn_mask [ ");
6585084Sjohnlev for (i = 0; i < 8; i++)
6595084Sjohnlev prom_printf("%lx ", si->evtchn_mask[i]);
6605084Sjohnlev prom_printf("]\n");
6615084Sjohnlev
6625084Sjohnlev for (i = 0; i < ncpus; i++) {
6635084Sjohnlev vcpu_info_t *vcpu = &si->vcpu_info[i];
6645084Sjohnlev if (cpu[i] == NULL)
6655084Sjohnlev continue;
6665084Sjohnlev prom_printf("CPU%d pending %d mask %d sel %lx\n",
6675084Sjohnlev i, vcpu->evtchn_upcall_pending,
6685084Sjohnlev vcpu->evtchn_upcall_mask,
6695084Sjohnlev vcpu->evtchn_pending_sel);
6705084Sjohnlev }
6715084Sjohnlev }
6725084Sjohnlev
6735084Sjohnlev return (0);
6745084Sjohnlev }
6755084Sjohnlev
6765084Sjohnlev /*ARGSUSED*/
6775084Sjohnlev static void
xen_sysrq_handler(struct xenbus_watch * watch,const char ** vec,unsigned int len)6785084Sjohnlev xen_sysrq_handler(struct xenbus_watch *watch, const char **vec,
6795084Sjohnlev unsigned int len)
6805084Sjohnlev {
6815084Sjohnlev xenbus_transaction_t xbt;
6825084Sjohnlev char key = '\0';
6835084Sjohnlev int ret;
6845084Sjohnlev
6855084Sjohnlev retry:
6865084Sjohnlev if (xenbus_transaction_start(&xbt)) {
6875084Sjohnlev cmn_err(CE_WARN, "failed to start sysrq transaction");
6885084Sjohnlev return;
6895084Sjohnlev }
6905084Sjohnlev
6915084Sjohnlev if ((ret = xenbus_scanf(xbt, "control", "sysrq", "%c", &key)) != 0) {
6925084Sjohnlev /*
6935084Sjohnlev * ENOENT happens in response to our own xenbus_rm.
6945084Sjohnlev * XXPV - this happens spuriously on boot?
6955084Sjohnlev */
6965084Sjohnlev if (ret != ENOENT)
6975084Sjohnlev cmn_err(CE_WARN, "failed to read sysrq: %d", ret);
6985084Sjohnlev goto out;
6995084Sjohnlev }
7005084Sjohnlev
7015084Sjohnlev if ((ret = xenbus_rm(xbt, "control", "sysrq")) != 0) {
7025084Sjohnlev cmn_err(CE_WARN, "failed to reset sysrq: %d", ret);
7035084Sjohnlev goto out;
7045084Sjohnlev }
7055084Sjohnlev
7065084Sjohnlev if (xenbus_transaction_end(xbt, 0) == EAGAIN)
7075084Sjohnlev goto retry;
7085084Sjohnlev
7095084Sjohnlev /*
7105084Sjohnlev * Somewhat arbitrary - on Linux this means 'reboot'. We could just
7115084Sjohnlev * accept any key, but this might increase the risk of sending a
7125084Sjohnlev * harmless sysrq to the wrong domain...
7135084Sjohnlev */
7145084Sjohnlev if (key == 'b')
7155084Sjohnlev (void) xen_debug_handler(NULL);
7165084Sjohnlev else
7175084Sjohnlev cmn_err(CE_WARN, "Ignored sysrq %c", key);
7185084Sjohnlev return;
7195084Sjohnlev
7205084Sjohnlev out:
7215084Sjohnlev (void) xenbus_transaction_end(xbt, 1);
7225084Sjohnlev }
7235084Sjohnlev
7245084Sjohnlev taskq_t *xen_shutdown_tq;
7255084Sjohnlev
7265084Sjohnlev #define SHUTDOWN_INVALID -1
7275084Sjohnlev #define SHUTDOWN_POWEROFF 0
7285084Sjohnlev #define SHUTDOWN_REBOOT 1
7295084Sjohnlev #define SHUTDOWN_SUSPEND 2
7305084Sjohnlev #define SHUTDOWN_HALT 3
7315084Sjohnlev #define SHUTDOWN_MAX 4
7325084Sjohnlev
7335084Sjohnlev #define SHUTDOWN_TIMEOUT_SECS (60 * 5)
7345084Sjohnlev
7355084Sjohnlev static const char *cmd_strings[SHUTDOWN_MAX] = {
7365084Sjohnlev "poweroff",
7375084Sjohnlev "reboot",
7385084Sjohnlev "suspend",
7395084Sjohnlev "halt"
7405084Sjohnlev };
7415084Sjohnlev
7425084Sjohnlev static void
xen_dirty_shutdown(void * arg)7435084Sjohnlev xen_dirty_shutdown(void *arg)
7445084Sjohnlev {
7455084Sjohnlev int cmd = (uintptr_t)arg;
7465084Sjohnlev
7475084Sjohnlev cmn_err(CE_WARN, "Externally requested shutdown failed or "
7485084Sjohnlev "timed out.\nShutting down.\n");
7495084Sjohnlev
7505084Sjohnlev switch (cmd) {
7515084Sjohnlev case SHUTDOWN_HALT:
7525084Sjohnlev case SHUTDOWN_POWEROFF:
7535084Sjohnlev (void) kadmin(A_SHUTDOWN, AD_POWEROFF, NULL, kcred);
7545084Sjohnlev break;
7555084Sjohnlev case SHUTDOWN_REBOOT:
7565084Sjohnlev (void) kadmin(A_REBOOT, AD_BOOT, NULL, kcred);
7575084Sjohnlev break;
7585084Sjohnlev }
7595084Sjohnlev }
7605084Sjohnlev
7615084Sjohnlev static void
xen_shutdown(void * arg)7625084Sjohnlev xen_shutdown(void *arg)
7635084Sjohnlev {
7645084Sjohnlev int cmd = (uintptr_t)arg;
7656681Sjohnlev proc_t *initpp;
7665084Sjohnlev
7675084Sjohnlev ASSERT(cmd > SHUTDOWN_INVALID && cmd < SHUTDOWN_MAX);
7685084Sjohnlev
7695084Sjohnlev if (cmd == SHUTDOWN_SUSPEND) {
7705084Sjohnlev xen_suspend_domain();
7715084Sjohnlev return;
7725084Sjohnlev }
7735084Sjohnlev
7746681Sjohnlev switch (cmd) {
7756681Sjohnlev case SHUTDOWN_POWEROFF:
7766681Sjohnlev force_shutdown_method = AD_POWEROFF;
7776681Sjohnlev break;
7786681Sjohnlev case SHUTDOWN_HALT:
7796681Sjohnlev force_shutdown_method = AD_HALT;
7806681Sjohnlev break;
7816681Sjohnlev case SHUTDOWN_REBOOT:
7826681Sjohnlev force_shutdown_method = AD_BOOT;
7836681Sjohnlev break;
7846681Sjohnlev }
7855084Sjohnlev
7866681Sjohnlev /*
7876681Sjohnlev * If we're still booting and init(1) isn't set up yet, simply halt.
7886681Sjohnlev */
7896681Sjohnlev mutex_enter(&pidlock);
7906681Sjohnlev initpp = prfind(P_INITPID);
7916681Sjohnlev mutex_exit(&pidlock);
7926681Sjohnlev if (initpp == NULL) {
7936681Sjohnlev extern void halt(char *);
7946681Sjohnlev halt("Power off the System"); /* just in case */
7956681Sjohnlev }
7965084Sjohnlev
7976681Sjohnlev /*
7986681Sjohnlev * else, graceful shutdown with inittab and all getting involved
7996681Sjohnlev */
8006681Sjohnlev psignal(initpp, SIGPWR);
8015084Sjohnlev
8025084Sjohnlev (void) timeout(xen_dirty_shutdown, arg,
8035084Sjohnlev SHUTDOWN_TIMEOUT_SECS * drv_usectohz(MICROSEC));
8045084Sjohnlev }
8055084Sjohnlev
8065084Sjohnlev /*ARGSUSED*/
8075084Sjohnlev static void
xen_shutdown_handler(struct xenbus_watch * watch,const char ** vec,unsigned int len)8085084Sjohnlev xen_shutdown_handler(struct xenbus_watch *watch, const char **vec,
8095084Sjohnlev unsigned int len)
8105084Sjohnlev {
8115084Sjohnlev char *str;
8125084Sjohnlev xenbus_transaction_t xbt;
8135084Sjohnlev int err, shutdown_code = SHUTDOWN_INVALID;
8145084Sjohnlev unsigned int slen;
8155084Sjohnlev
8165084Sjohnlev again:
8175084Sjohnlev err = xenbus_transaction_start(&xbt);
8185084Sjohnlev if (err)
8195084Sjohnlev return;
8205084Sjohnlev if (xenbus_read(xbt, "control", "shutdown", (void *)&str, &slen)) {
8215084Sjohnlev (void) xenbus_transaction_end(xbt, 1);
8225084Sjohnlev return;
8235084Sjohnlev }
8245084Sjohnlev
8255084Sjohnlev SUSPEND_DEBUG("%d: xen_shutdown_handler: \"%s\"\n", CPU->cpu_id, str);
8265084Sjohnlev
8275084Sjohnlev /*
8285084Sjohnlev * If this is a watch fired from our write below, check out early to
8295084Sjohnlev * avoid an infinite loop.
8305084Sjohnlev */
8315084Sjohnlev if (strcmp(str, "") == 0) {
8325084Sjohnlev (void) xenbus_transaction_end(xbt, 0);
8335084Sjohnlev kmem_free(str, slen);
8345084Sjohnlev return;
8355084Sjohnlev } else if (strcmp(str, "poweroff") == 0) {
8365084Sjohnlev shutdown_code = SHUTDOWN_POWEROFF;
8375084Sjohnlev } else if (strcmp(str, "reboot") == 0) {
8385084Sjohnlev shutdown_code = SHUTDOWN_REBOOT;
8395084Sjohnlev } else if (strcmp(str, "suspend") == 0) {
8405084Sjohnlev shutdown_code = SHUTDOWN_SUSPEND;
8415084Sjohnlev } else if (strcmp(str, "halt") == 0) {
8425084Sjohnlev shutdown_code = SHUTDOWN_HALT;
8435084Sjohnlev } else {
8445084Sjohnlev printf("Ignoring shutdown request: %s\n", str);
8455084Sjohnlev }
8465084Sjohnlev
8475084Sjohnlev /*
8485084Sjohnlev * XXPV Should we check the value of xenbus_write() too, or are all
8495084Sjohnlev * errors automatically folded into xenbus_transaction_end() ??
8505084Sjohnlev */
8515084Sjohnlev (void) xenbus_write(xbt, "control", "shutdown", "");
8525084Sjohnlev err = xenbus_transaction_end(xbt, 0);
8535084Sjohnlev if (err == EAGAIN) {
8545084Sjohnlev SUSPEND_DEBUG("%d: trying again\n", CPU->cpu_id);
8555084Sjohnlev kmem_free(str, slen);
8565084Sjohnlev goto again;
8575084Sjohnlev }
8585084Sjohnlev
8595084Sjohnlev kmem_free(str, slen);
8605084Sjohnlev if (shutdown_code != SHUTDOWN_INVALID) {
8615084Sjohnlev (void) taskq_dispatch(xen_shutdown_tq, xen_shutdown,
8625084Sjohnlev (void *)(intptr_t)shutdown_code, 0);
8635084Sjohnlev }
8645084Sjohnlev }
8655084Sjohnlev
8665084Sjohnlev static struct xenbus_watch shutdown_watch;
8675084Sjohnlev static struct xenbus_watch sysrq_watch;
8685084Sjohnlev
8695084Sjohnlev void
xen_late_startup(void)8705084Sjohnlev xen_late_startup(void)
8715084Sjohnlev {
8725084Sjohnlev if (!DOMAIN_IS_INITDOMAIN(xen_info)) {
8735084Sjohnlev xen_shutdown_tq = taskq_create("shutdown_taskq", 1,
8745084Sjohnlev maxclsyspri - 1, 1, 1, TASKQ_PREPOPULATE);
8755084Sjohnlev shutdown_watch.node = "control/shutdown";
8765084Sjohnlev shutdown_watch.callback = xen_shutdown_handler;
8775084Sjohnlev if (register_xenbus_watch(&shutdown_watch))
8785084Sjohnlev cmn_err(CE_WARN, "Failed to set shutdown watcher");
8795084Sjohnlev
8805084Sjohnlev sysrq_watch.node = "control/sysrq";
8815084Sjohnlev sysrq_watch.callback = xen_sysrq_handler;
8825084Sjohnlev if (register_xenbus_watch(&sysrq_watch))
8835084Sjohnlev cmn_err(CE_WARN, "Failed to set sysrq watcher");
8845084Sjohnlev }
8855084Sjohnlev balloon_init(xen_info->nr_pages);
8865084Sjohnlev }
8875084Sjohnlev
8885084Sjohnlev #ifdef DEBUG
8895084Sjohnlev #define XEN_PRINTF_BUFSIZE 1024
8905084Sjohnlev
8915084Sjohnlev char xen_printf_buffer[XEN_PRINTF_BUFSIZE];
8925084Sjohnlev
8935084Sjohnlev /*
8945084Sjohnlev * Printf function that calls hypervisor directly. For DomU it only
8955084Sjohnlev * works when running on a xen hypervisor built with debug on. Works
8965084Sjohnlev * always since no I/O ring interaction is needed.
8975084Sjohnlev */
8985084Sjohnlev /*PRINTFLIKE1*/
8995084Sjohnlev void
xen_printf(const char * fmt,...)9005084Sjohnlev xen_printf(const char *fmt, ...)
9015084Sjohnlev {
9025084Sjohnlev va_list ap;
9035084Sjohnlev
9045084Sjohnlev va_start(ap, fmt);
9055084Sjohnlev (void) vsnprintf(xen_printf_buffer, XEN_PRINTF_BUFSIZE, fmt, ap);
9065084Sjohnlev va_end(ap);
9075084Sjohnlev
9085084Sjohnlev (void) HYPERVISOR_console_io(CONSOLEIO_write,
9095084Sjohnlev strlen(xen_printf_buffer), xen_printf_buffer);
9105084Sjohnlev }
9115084Sjohnlev #else
9125084Sjohnlev void
xen_printf(const char * fmt,...)9135084Sjohnlev xen_printf(const char *fmt, ...)
9145084Sjohnlev {
9155084Sjohnlev }
9165084Sjohnlev #endif /* DEBUG */
9175084Sjohnlev
9185084Sjohnlev void
startup_xen_version(void)9195316Sjohnlev startup_xen_version(void)
9205084Sjohnlev {
9215199Sgarypen xen_set_version(XENVER_BOOT_IDX);
9225199Sgarypen if (xen_hypervisor_supports_solaris(XEN_RUN_CHECK) == 0)
9235199Sgarypen cmn_err(CE_WARN, "Found hypervisor version: v%lu.%lu%s "
9245199Sgarypen "but need at least version v3.0.4",
9255199Sgarypen XENVER_CURRENT(xv_major), XENVER_CURRENT(xv_minor),
9265199Sgarypen XENVER_CURRENT(xv_ver));
9275316Sjohnlev xen_pte_workaround();
9285084Sjohnlev }
9295084Sjohnlev
9307532SSean.Ye@Sun.COM int xen_mca_simulate_mc_physinfo_failure = 0;
9317532SSean.Ye@Sun.COM
9327532SSean.Ye@Sun.COM void
startup_xen_mca(void)9337532SSean.Ye@Sun.COM startup_xen_mca(void)
9347532SSean.Ye@Sun.COM {
9357532SSean.Ye@Sun.COM if (!DOMAIN_IS_INITDOMAIN(xen_info))
9367532SSean.Ye@Sun.COM return;
9377532SSean.Ye@Sun.COM
9387532SSean.Ye@Sun.COM xen_phys_ncpus = 0;
9397532SSean.Ye@Sun.COM xen_phys_cpus = NULL;
9407532SSean.Ye@Sun.COM
9417532SSean.Ye@Sun.COM if (xen_mca_simulate_mc_physinfo_failure ||
9427532SSean.Ye@Sun.COM xen_get_mc_physcpuinfo(NULL, &xen_phys_ncpus) != 0) {
9437532SSean.Ye@Sun.COM cmn_err(CE_WARN,
9447532SSean.Ye@Sun.COM "%sxen_get_mc_physinfo failure during xen MCA startup: "
9457532SSean.Ye@Sun.COM "there will be no machine check support",
9467532SSean.Ye@Sun.COM xen_mca_simulate_mc_physinfo_failure ? "(simulated) " : "");
9477532SSean.Ye@Sun.COM return;
9487532SSean.Ye@Sun.COM }
9497532SSean.Ye@Sun.COM
9507532SSean.Ye@Sun.COM xen_phys_cpus = kmem_alloc(xen_phys_ncpus *
9517532SSean.Ye@Sun.COM sizeof (xen_mc_logical_cpu_t), KM_NOSLEEP);
9527532SSean.Ye@Sun.COM
9537532SSean.Ye@Sun.COM if (xen_phys_cpus == NULL) {
9547532SSean.Ye@Sun.COM cmn_err(CE_WARN,
95510175SStuart.Maybee@Sun.COM "xen_get_mc_physinfo failure: can't allocate CPU array");
9567532SSean.Ye@Sun.COM return;
9577532SSean.Ye@Sun.COM }
9587532SSean.Ye@Sun.COM
9597532SSean.Ye@Sun.COM if (xen_get_mc_physcpuinfo(xen_phys_cpus, &xen_phys_ncpus) != 0) {
9607532SSean.Ye@Sun.COM cmn_err(CE_WARN, "xen_get_mc_physinfo failure: no "
9617532SSean.Ye@Sun.COM "physical CPU info");
9627532SSean.Ye@Sun.COM kmem_free(xen_phys_cpus,
9637532SSean.Ye@Sun.COM xen_phys_ncpus * sizeof (xen_mc_logical_cpu_t));
9647532SSean.Ye@Sun.COM xen_phys_ncpus = 0;
9657532SSean.Ye@Sun.COM xen_phys_cpus = NULL;
9667532SSean.Ye@Sun.COM }
9677532SSean.Ye@Sun.COM
9687532SSean.Ye@Sun.COM if (xen_physinfo_debug) {
9697532SSean.Ye@Sun.COM xen_mc_logical_cpu_t *xcp;
9707532SSean.Ye@Sun.COM unsigned i;
9717532SSean.Ye@Sun.COM
9727532SSean.Ye@Sun.COM cmn_err(CE_NOTE, "xvm mca: %u physical cpus:\n",
9737532SSean.Ye@Sun.COM xen_phys_ncpus);
9747532SSean.Ye@Sun.COM for (i = 0; i < xen_phys_ncpus; i++) {
9757532SSean.Ye@Sun.COM xcp = &xen_phys_cpus[i];
9767532SSean.Ye@Sun.COM cmn_err(CE_NOTE, "cpu%u: (%u, %u, %u) apid %u",
9777532SSean.Ye@Sun.COM xcp->mc_cpunr, xcp->mc_chipid, xcp->mc_coreid,
9787532SSean.Ye@Sun.COM xcp->mc_threadid, xcp->mc_apicid);
9797532SSean.Ye@Sun.COM }
9807532SSean.Ye@Sun.COM }
9817532SSean.Ye@Sun.COM }
9827532SSean.Ye@Sun.COM
9835084Sjohnlev /*
9845084Sjohnlev * Miscellaneous hypercall wrappers with slightly more verbose diagnostics.
9855084Sjohnlev */
9865084Sjohnlev
9875084Sjohnlev void
xen_set_gdt(ulong_t * frame_list,int entries)9885084Sjohnlev xen_set_gdt(ulong_t *frame_list, int entries)
9895084Sjohnlev {
9905084Sjohnlev int err;
9915084Sjohnlev if ((err = HYPERVISOR_set_gdt(frame_list, entries)) != 0) {
9925084Sjohnlev /*
9935084Sjohnlev * X_EINVAL: reserved entry or bad frames
9945084Sjohnlev * X_EFAULT: bad address
9955084Sjohnlev */
9965084Sjohnlev panic("xen_set_gdt(%p, %d): error %d",
9975084Sjohnlev (void *)frame_list, entries, -(int)err);
9985084Sjohnlev }
9995084Sjohnlev }
10005084Sjohnlev
10015084Sjohnlev void
xen_set_ldt(user_desc_t * ldt,uint_t nsels)10025084Sjohnlev xen_set_ldt(user_desc_t *ldt, uint_t nsels)
10035084Sjohnlev {
10045084Sjohnlev struct mmuext_op op;
10055084Sjohnlev long err;
10065084Sjohnlev
10075084Sjohnlev op.cmd = MMUEXT_SET_LDT;
10085084Sjohnlev op.arg1.linear_addr = (uintptr_t)ldt;
10095084Sjohnlev op.arg2.nr_ents = nsels;
10105084Sjohnlev
10115084Sjohnlev if ((err = HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF)) != 0) {
10125084Sjohnlev panic("xen_set_ldt(%p, %d): error %d",
10135084Sjohnlev (void *)ldt, nsels, -(int)err);
10145084Sjohnlev }
10155084Sjohnlev }
10165084Sjohnlev
10175084Sjohnlev void
xen_stack_switch(ulong_t ss,ulong_t esp)10185084Sjohnlev xen_stack_switch(ulong_t ss, ulong_t esp)
10195084Sjohnlev {
10205084Sjohnlev long err;
10215084Sjohnlev
10225084Sjohnlev if ((err = HYPERVISOR_stack_switch(ss, esp)) != 0) {
10235084Sjohnlev /*
10245084Sjohnlev * X_EPERM: bad selector
10255084Sjohnlev */
10265084Sjohnlev panic("xen_stack_switch(%lx, %lx): error %d", ss, esp,
10275084Sjohnlev -(int)err);
10285084Sjohnlev }
10295084Sjohnlev }
10305084Sjohnlev
10315084Sjohnlev long
xen_set_trap_table(trap_info_t * table)10325084Sjohnlev xen_set_trap_table(trap_info_t *table)
10335084Sjohnlev {
10345084Sjohnlev long err;
10355084Sjohnlev
10365084Sjohnlev if ((err = HYPERVISOR_set_trap_table(table)) != 0) {
10375084Sjohnlev /*
10385084Sjohnlev * X_EFAULT: bad address
10395084Sjohnlev * X_EPERM: bad selector
10405084Sjohnlev */
10415084Sjohnlev panic("xen_set_trap_table(%p): error %d", (void *)table,
10425084Sjohnlev -(int)err);
10435084Sjohnlev }
10445084Sjohnlev return (err);
10455084Sjohnlev }
10465084Sjohnlev
10475084Sjohnlev #if defined(__amd64)
10485084Sjohnlev void
xen_set_segment_base(int reg,ulong_t value)10495084Sjohnlev xen_set_segment_base(int reg, ulong_t value)
10505084Sjohnlev {
10515084Sjohnlev long err;
10525084Sjohnlev
10535084Sjohnlev if ((err = HYPERVISOR_set_segment_base(reg, value)) != 0) {
10545084Sjohnlev /*
10555084Sjohnlev * X_EFAULT: bad address
10565084Sjohnlev * X_EINVAL: bad type
10575084Sjohnlev */
10585084Sjohnlev panic("xen_set_segment_base(%d, %lx): error %d",
10595084Sjohnlev reg, value, -(int)err);
10605084Sjohnlev }
10615084Sjohnlev }
10625084Sjohnlev #endif /* __amd64 */
10635084Sjohnlev
10645084Sjohnlev /*
10655084Sjohnlev * Translate a hypervisor errcode to a Solaris error code.
10665084Sjohnlev */
10675084Sjohnlev int
xen_xlate_errcode(int error)10685084Sjohnlev xen_xlate_errcode(int error)
10695084Sjohnlev {
10705084Sjohnlev switch (-error) {
10715084Sjohnlev
10725084Sjohnlev /*
10735084Sjohnlev * Translate hypervisor errno's into native errno's
10745084Sjohnlev */
10755084Sjohnlev
10765084Sjohnlev #define CASE(num) case X_##num: error = num; break
10775084Sjohnlev
10785084Sjohnlev CASE(EPERM); CASE(ENOENT); CASE(ESRCH);
10795084Sjohnlev CASE(EINTR); CASE(EIO); CASE(ENXIO);
10805084Sjohnlev CASE(E2BIG); CASE(ENOMEM); CASE(EACCES);
10815084Sjohnlev CASE(EFAULT); CASE(EBUSY); CASE(EEXIST);
10825084Sjohnlev CASE(ENODEV); CASE(EISDIR); CASE(EINVAL);
10835084Sjohnlev CASE(ENOSPC); CASE(ESPIPE); CASE(EROFS);
10845084Sjohnlev CASE(ENOSYS); CASE(ENOTEMPTY); CASE(EISCONN);
108510175SStuart.Maybee@Sun.COM CASE(ENODATA); CASE(EAGAIN);
10865084Sjohnlev
10875084Sjohnlev #undef CASE
10885084Sjohnlev
10895084Sjohnlev default:
10905084Sjohnlev panic("xen_xlate_errcode: unknown error %d", error);
10915084Sjohnlev }
10925084Sjohnlev
10935084Sjohnlev return (error);
10945084Sjohnlev }
10955084Sjohnlev
10965084Sjohnlev /*
10975084Sjohnlev * Raise PS_IOPL on current vcpu to user level.
10985084Sjohnlev * Caller responsible for preventing kernel preemption.
10995084Sjohnlev */
11005084Sjohnlev void
xen_enable_user_iopl(void)11015084Sjohnlev xen_enable_user_iopl(void)
11025084Sjohnlev {
11035084Sjohnlev physdev_set_iopl_t set_iopl;
11045084Sjohnlev set_iopl.iopl = 3; /* user ring 3 */
11055084Sjohnlev (void) HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl);
11065084Sjohnlev }
11075084Sjohnlev
11085084Sjohnlev /*
11095084Sjohnlev * Drop PS_IOPL on current vcpu to kernel level
11105084Sjohnlev */
11115084Sjohnlev void
xen_disable_user_iopl(void)11125084Sjohnlev xen_disable_user_iopl(void)
11135084Sjohnlev {
11145084Sjohnlev physdev_set_iopl_t set_iopl;
11155084Sjohnlev set_iopl.iopl = 1; /* kernel pseudo ring 1 */
11165084Sjohnlev (void) HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl);
11175084Sjohnlev }
11185084Sjohnlev
11195084Sjohnlev int
xen_gdt_setprot(cpu_t * cp,uint_t prot)11205084Sjohnlev xen_gdt_setprot(cpu_t *cp, uint_t prot)
11215084Sjohnlev {
11225084Sjohnlev int err;
11235084Sjohnlev #if defined(__amd64)
11245084Sjohnlev int pt_bits = PT_VALID;
11255084Sjohnlev if (prot & PROT_WRITE)
11265084Sjohnlev pt_bits |= PT_WRITABLE;
11275084Sjohnlev #endif
11285084Sjohnlev
11295084Sjohnlev if ((err = as_setprot(&kas, (caddr_t)cp->cpu_gdt,
11305084Sjohnlev MMU_PAGESIZE, prot)) != 0)
11315084Sjohnlev goto done;
11325084Sjohnlev
11335084Sjohnlev #if defined(__amd64)
11345084Sjohnlev err = xen_kpm_page(mmu_btop(cp->cpu_m.mcpu_gdtpa), pt_bits);
11355084Sjohnlev #endif
11365084Sjohnlev
11375084Sjohnlev done:
11385084Sjohnlev if (err) {
11395084Sjohnlev cmn_err(CE_WARN, "cpu%d: xen_gdt_setprot(%s) failed: error %d",
11405084Sjohnlev cp->cpu_id, (prot & PROT_WRITE) ? "writable" : "read-only",
11415084Sjohnlev err);
11425084Sjohnlev }
11435084Sjohnlev
11445084Sjohnlev return (err);
11455084Sjohnlev }
11465084Sjohnlev
11475084Sjohnlev int
xen_ldt_setprot(user_desc_t * ldt,size_t lsize,uint_t prot)11485084Sjohnlev xen_ldt_setprot(user_desc_t *ldt, size_t lsize, uint_t prot)
11495084Sjohnlev {
11505084Sjohnlev int err;
11515084Sjohnlev caddr_t lva = (caddr_t)ldt;
11525084Sjohnlev #if defined(__amd64)
11535084Sjohnlev int pt_bits = PT_VALID;
11545084Sjohnlev pgcnt_t npgs;
11555084Sjohnlev if (prot & PROT_WRITE)
11565084Sjohnlev pt_bits |= PT_WRITABLE;
11575084Sjohnlev #endif /* __amd64 */
11585084Sjohnlev
11595084Sjohnlev if ((err = as_setprot(&kas, (caddr_t)ldt, lsize, prot)) != 0)
11605084Sjohnlev goto done;
11615084Sjohnlev
11625084Sjohnlev #if defined(__amd64)
11635084Sjohnlev
11645084Sjohnlev ASSERT(IS_P2ALIGNED(lsize, PAGESIZE));
11655084Sjohnlev npgs = mmu_btop(lsize);
11665084Sjohnlev while (npgs--) {
11675084Sjohnlev if ((err = xen_kpm_page(hat_getpfnum(kas.a_hat, lva),
11685084Sjohnlev pt_bits)) != 0)
11695084Sjohnlev break;
11705084Sjohnlev lva += PAGESIZE;
11715084Sjohnlev }
11725084Sjohnlev #endif /* __amd64 */
11735084Sjohnlev
11745084Sjohnlev done:
11755084Sjohnlev if (err) {
11765084Sjohnlev cmn_err(CE_WARN, "xen_ldt_setprot(%p, %s) failed: error %d",
11775084Sjohnlev (void *)lva,
11785084Sjohnlev (prot & PROT_WRITE) ? "writable" : "read-only", err);
11795084Sjohnlev }
11805084Sjohnlev
11815084Sjohnlev return (err);
11825084Sjohnlev }
11837532SSean.Ye@Sun.COM
11847532SSean.Ye@Sun.COM int
xen_get_mc_physcpuinfo(xen_mc_logical_cpu_t * log_cpus,uint_t * ncpus)11857532SSean.Ye@Sun.COM xen_get_mc_physcpuinfo(xen_mc_logical_cpu_t *log_cpus, uint_t *ncpus)
11867532SSean.Ye@Sun.COM {
1187*11120SMark.Johnson@Sun.COM xen_mc_t xmc;
1188*11120SMark.Johnson@Sun.COM struct xen_mc_physcpuinfo *cpi = &xmc.u.mc_physcpuinfo;
11897532SSean.Ye@Sun.COM
1190*11120SMark.Johnson@Sun.COM cpi->ncpus = *ncpus;
11917532SSean.Ye@Sun.COM /*LINTED: constant in conditional context*/
1192*11120SMark.Johnson@Sun.COM set_xen_guest_handle(cpi->info, log_cpus);
11937532SSean.Ye@Sun.COM
1194*11120SMark.Johnson@Sun.COM if (HYPERVISOR_mca(XEN_MC_physcpuinfo, &xmc) != 0)
11957532SSean.Ye@Sun.COM return (-1);
11967532SSean.Ye@Sun.COM
1197*11120SMark.Johnson@Sun.COM *ncpus = cpi->ncpus;
11987532SSean.Ye@Sun.COM return (0);
11997532SSean.Ye@Sun.COM }
12007532SSean.Ye@Sun.COM
12017532SSean.Ye@Sun.COM void
print_panic(const char * str)12027532SSean.Ye@Sun.COM print_panic(const char *str)
12037532SSean.Ye@Sun.COM {
12047532SSean.Ye@Sun.COM xen_printf(str);
12057532SSean.Ye@Sun.COM }
12067532SSean.Ye@Sun.COM
12077532SSean.Ye@Sun.COM /*
12087532SSean.Ye@Sun.COM * Interfaces to iterate over real cpu information, but only that info
12097532SSean.Ye@Sun.COM * which we choose to expose here. These are of interest to dom0
12107532SSean.Ye@Sun.COM * only (and the backing hypercall should not work for domu).
12117532SSean.Ye@Sun.COM */
12127532SSean.Ye@Sun.COM
12137532SSean.Ye@Sun.COM xen_mc_lcpu_cookie_t
xen_physcpu_next(xen_mc_lcpu_cookie_t cookie)12147532SSean.Ye@Sun.COM xen_physcpu_next(xen_mc_lcpu_cookie_t cookie)
12157532SSean.Ye@Sun.COM {
12167532SSean.Ye@Sun.COM xen_mc_logical_cpu_t *xcp = (xen_mc_logical_cpu_t *)cookie;
12177532SSean.Ye@Sun.COM
12187532SSean.Ye@Sun.COM if (!DOMAIN_IS_INITDOMAIN(xen_info))
12197532SSean.Ye@Sun.COM return (NULL);
12207532SSean.Ye@Sun.COM
12217532SSean.Ye@Sun.COM if (cookie == NULL)
12227532SSean.Ye@Sun.COM return ((xen_mc_lcpu_cookie_t)xen_phys_cpus);
12237532SSean.Ye@Sun.COM
12247532SSean.Ye@Sun.COM if (xcp == xen_phys_cpus + xen_phys_ncpus - 1)
12257532SSean.Ye@Sun.COM return (NULL);
12267532SSean.Ye@Sun.COM else
12277532SSean.Ye@Sun.COM return ((xen_mc_lcpu_cookie_t)++xcp);
12287532SSean.Ye@Sun.COM }
12297532SSean.Ye@Sun.COM
12307532SSean.Ye@Sun.COM #define COOKIE2XCP(c) ((xen_mc_logical_cpu_t *)(c))
12317532SSean.Ye@Sun.COM
12327532SSean.Ye@Sun.COM const char *
xen_physcpu_vendorstr(xen_mc_lcpu_cookie_t cookie)12337532SSean.Ye@Sun.COM xen_physcpu_vendorstr(xen_mc_lcpu_cookie_t cookie)
12347532SSean.Ye@Sun.COM {
12357532SSean.Ye@Sun.COM xen_mc_logical_cpu_t *xcp = COOKIE2XCP(cookie);
12367532SSean.Ye@Sun.COM
12377532SSean.Ye@Sun.COM return ((const char *)&xcp->mc_vendorid[0]);
12387532SSean.Ye@Sun.COM }
12397532SSean.Ye@Sun.COM
12407532SSean.Ye@Sun.COM int
xen_physcpu_family(xen_mc_lcpu_cookie_t cookie)12417532SSean.Ye@Sun.COM xen_physcpu_family(xen_mc_lcpu_cookie_t cookie)
12427532SSean.Ye@Sun.COM {
12437532SSean.Ye@Sun.COM return (COOKIE2XCP(cookie)->mc_family);
12447532SSean.Ye@Sun.COM }
12457532SSean.Ye@Sun.COM
12467532SSean.Ye@Sun.COM int
xen_physcpu_model(xen_mc_lcpu_cookie_t cookie)12477532SSean.Ye@Sun.COM xen_physcpu_model(xen_mc_lcpu_cookie_t cookie)
12487532SSean.Ye@Sun.COM {
12497532SSean.Ye@Sun.COM return (COOKIE2XCP(cookie)->mc_model);
12507532SSean.Ye@Sun.COM }
12517532SSean.Ye@Sun.COM
12527532SSean.Ye@Sun.COM int
xen_physcpu_stepping(xen_mc_lcpu_cookie_t cookie)12537532SSean.Ye@Sun.COM xen_physcpu_stepping(xen_mc_lcpu_cookie_t cookie)
12547532SSean.Ye@Sun.COM {
12557532SSean.Ye@Sun.COM return (COOKIE2XCP(cookie)->mc_step);
12567532SSean.Ye@Sun.COM }
12577532SSean.Ye@Sun.COM
12587532SSean.Ye@Sun.COM id_t
xen_physcpu_chipid(xen_mc_lcpu_cookie_t cookie)12597532SSean.Ye@Sun.COM xen_physcpu_chipid(xen_mc_lcpu_cookie_t cookie)
12607532SSean.Ye@Sun.COM {
12617532SSean.Ye@Sun.COM return (COOKIE2XCP(cookie)->mc_chipid);
12627532SSean.Ye@Sun.COM }
12637532SSean.Ye@Sun.COM
12647532SSean.Ye@Sun.COM id_t
xen_physcpu_coreid(xen_mc_lcpu_cookie_t cookie)12657532SSean.Ye@Sun.COM xen_physcpu_coreid(xen_mc_lcpu_cookie_t cookie)
12667532SSean.Ye@Sun.COM {
12677532SSean.Ye@Sun.COM return (COOKIE2XCP(cookie)->mc_coreid);
12687532SSean.Ye@Sun.COM }
12697532SSean.Ye@Sun.COM
12707532SSean.Ye@Sun.COM id_t
xen_physcpu_strandid(xen_mc_lcpu_cookie_t cookie)12717532SSean.Ye@Sun.COM xen_physcpu_strandid(xen_mc_lcpu_cookie_t cookie)
12727532SSean.Ye@Sun.COM {
12737532SSean.Ye@Sun.COM return (COOKIE2XCP(cookie)->mc_threadid);
12747532SSean.Ye@Sun.COM }
12757532SSean.Ye@Sun.COM
12767532SSean.Ye@Sun.COM id_t
xen_physcpu_initial_apicid(xen_mc_lcpu_cookie_t cookie)127710942STom.Pothier@Sun.COM xen_physcpu_initial_apicid(xen_mc_lcpu_cookie_t cookie)
127810942STom.Pothier@Sun.COM {
127910942STom.Pothier@Sun.COM return (COOKIE2XCP(cookie)->mc_clusterid);
128010942STom.Pothier@Sun.COM }
128110942STom.Pothier@Sun.COM
128210942STom.Pothier@Sun.COM id_t
xen_physcpu_logical_id(xen_mc_lcpu_cookie_t cookie)12837532SSean.Ye@Sun.COM xen_physcpu_logical_id(xen_mc_lcpu_cookie_t cookie)
12847532SSean.Ye@Sun.COM {
12857532SSean.Ye@Sun.COM return (COOKIE2XCP(cookie)->mc_cpunr);
12867532SSean.Ye@Sun.COM }
12877532SSean.Ye@Sun.COM
12887532SSean.Ye@Sun.COM boolean_t
xen_physcpu_is_cmt(xen_mc_lcpu_cookie_t cookie)12897532SSean.Ye@Sun.COM xen_physcpu_is_cmt(xen_mc_lcpu_cookie_t cookie)
12907532SSean.Ye@Sun.COM {
12917532SSean.Ye@Sun.COM return (COOKIE2XCP(cookie)->mc_nthreads > 1);
12927532SSean.Ye@Sun.COM }
12937532SSean.Ye@Sun.COM
12947532SSean.Ye@Sun.COM uint64_t
xen_physcpu_mcg_cap(xen_mc_lcpu_cookie_t cookie)12957532SSean.Ye@Sun.COM xen_physcpu_mcg_cap(xen_mc_lcpu_cookie_t cookie)
12967532SSean.Ye@Sun.COM {
12977532SSean.Ye@Sun.COM xen_mc_logical_cpu_t *xcp = COOKIE2XCP(cookie);
12987532SSean.Ye@Sun.COM
12997532SSean.Ye@Sun.COM /*
13007532SSean.Ye@Sun.COM * Need to #define the indices, or search through the array.
13017532SSean.Ye@Sun.COM */
13027532SSean.Ye@Sun.COM return (xcp->mc_msrvalues[0].value);
13037532SSean.Ye@Sun.COM }
13047756SMark.Johnson@Sun.COM
13057756SMark.Johnson@Sun.COM int
xen_map_gref(uint_t cmd,gnttab_map_grant_ref_t * mapop,uint_t count,boolean_t uvaddr)13067756SMark.Johnson@Sun.COM xen_map_gref(uint_t cmd, gnttab_map_grant_ref_t *mapop, uint_t count,
13077756SMark.Johnson@Sun.COM boolean_t uvaddr)
13087756SMark.Johnson@Sun.COM {
13097756SMark.Johnson@Sun.COM long rc;
131010175SStuart.Maybee@Sun.COM uint_t i;
13117756SMark.Johnson@Sun.COM
13127756SMark.Johnson@Sun.COM ASSERT(cmd == GNTTABOP_map_grant_ref);
13137756SMark.Johnson@Sun.COM
13147756SMark.Johnson@Sun.COM #if !defined(_BOOT)
131510175SStuart.Maybee@Sun.COM if (uvaddr == B_FALSE) {
13167756SMark.Johnson@Sun.COM for (i = 0; i < count; ++i) {
131710175SStuart.Maybee@Sun.COM mapop[i].flags |= (PT_FOREIGN <<_GNTMAP_guest_avail0);
13187756SMark.Johnson@Sun.COM }
13197756SMark.Johnson@Sun.COM }
13207756SMark.Johnson@Sun.COM #endif
13217756SMark.Johnson@Sun.COM
132210175SStuart.Maybee@Sun.COM rc = HYPERVISOR_grant_table_op(cmd, mapop, count);
132310175SStuart.Maybee@Sun.COM
13247756SMark.Johnson@Sun.COM return (rc);
13257756SMark.Johnson@Sun.COM }
132610175SStuart.Maybee@Sun.COM
132710175SStuart.Maybee@Sun.COM static int
xpv_get_physinfo(xen_sysctl_physinfo_t * pi)132810175SStuart.Maybee@Sun.COM xpv_get_physinfo(xen_sysctl_physinfo_t *pi)
132910175SStuart.Maybee@Sun.COM {
133010175SStuart.Maybee@Sun.COM xen_sysctl_t op;
133110175SStuart.Maybee@Sun.COM struct sp { void *p; } *sp = (struct sp *)&op.u.physinfo.cpu_to_node;
133210175SStuart.Maybee@Sun.COM int ret;
133310175SStuart.Maybee@Sun.COM
133410175SStuart.Maybee@Sun.COM bzero(&op, sizeof (op));
133510175SStuart.Maybee@Sun.COM op.cmd = XEN_SYSCTL_physinfo;
133610175SStuart.Maybee@Sun.COM op.interface_version = XEN_SYSCTL_INTERFACE_VERSION;
133710175SStuart.Maybee@Sun.COM /*LINTED: constant in conditional context*/
133810175SStuart.Maybee@Sun.COM set_xen_guest_handle(*sp, NULL);
133910175SStuart.Maybee@Sun.COM
134010175SStuart.Maybee@Sun.COM ret = HYPERVISOR_sysctl(&op);
134110175SStuart.Maybee@Sun.COM
134210175SStuart.Maybee@Sun.COM if (ret != 0)
134310175SStuart.Maybee@Sun.COM return (xen_xlate_errcode(ret));
134410175SStuart.Maybee@Sun.COM
134510175SStuart.Maybee@Sun.COM bcopy(&op.u.physinfo, pi, sizeof (op.u.physinfo));
134610175SStuart.Maybee@Sun.COM return (0);
134710175SStuart.Maybee@Sun.COM }
134810175SStuart.Maybee@Sun.COM
134910175SStuart.Maybee@Sun.COM /*
135010175SStuart.Maybee@Sun.COM * On dom0, we can determine the number of physical cpus on the machine.
135110175SStuart.Maybee@Sun.COM * This number is important when figuring out what workarounds are
135210175SStuart.Maybee@Sun.COM * appropriate, so compute it now.
135310175SStuart.Maybee@Sun.COM */
135410175SStuart.Maybee@Sun.COM uint_t
xpv_nr_phys_cpus(void)135510175SStuart.Maybee@Sun.COM xpv_nr_phys_cpus(void)
135610175SStuart.Maybee@Sun.COM {
135710175SStuart.Maybee@Sun.COM static uint_t nphyscpus = 0;
135810175SStuart.Maybee@Sun.COM
135910175SStuart.Maybee@Sun.COM ASSERT(DOMAIN_IS_INITDOMAIN(xen_info));
136010175SStuart.Maybee@Sun.COM
136110175SStuart.Maybee@Sun.COM if (nphyscpus == 0) {
136210175SStuart.Maybee@Sun.COM xen_sysctl_physinfo_t pi;
136310175SStuart.Maybee@Sun.COM int ret;
136410175SStuart.Maybee@Sun.COM
136510175SStuart.Maybee@Sun.COM if ((ret = xpv_get_physinfo(&pi)) != 0)
136610175SStuart.Maybee@Sun.COM panic("xpv_get_physinfo() failed: %d\n", ret);
136710175SStuart.Maybee@Sun.COM nphyscpus = pi.nr_cpus;
136810175SStuart.Maybee@Sun.COM }
136910175SStuart.Maybee@Sun.COM return (nphyscpus);
137010175SStuart.Maybee@Sun.COM }
137110175SStuart.Maybee@Sun.COM
137210175SStuart.Maybee@Sun.COM pgcnt_t
xpv_nr_phys_pages(void)137310175SStuart.Maybee@Sun.COM xpv_nr_phys_pages(void)
137410175SStuart.Maybee@Sun.COM {
137510175SStuart.Maybee@Sun.COM xen_sysctl_physinfo_t pi;
137610175SStuart.Maybee@Sun.COM int ret;
137710175SStuart.Maybee@Sun.COM
137810175SStuart.Maybee@Sun.COM ASSERT(DOMAIN_IS_INITDOMAIN(xen_info));
137910175SStuart.Maybee@Sun.COM
138010175SStuart.Maybee@Sun.COM if ((ret = xpv_get_physinfo(&pi)) != 0)
138110175SStuart.Maybee@Sun.COM panic("xpv_get_physinfo() failed: %d\n", ret);
138210175SStuart.Maybee@Sun.COM
138310175SStuart.Maybee@Sun.COM return ((pgcnt_t)pi.total_pages);
138410175SStuart.Maybee@Sun.COM }
138510175SStuart.Maybee@Sun.COM
138610175SStuart.Maybee@Sun.COM uint64_t
xpv_cpu_khz(void)138710175SStuart.Maybee@Sun.COM xpv_cpu_khz(void)
138810175SStuart.Maybee@Sun.COM {
138910175SStuart.Maybee@Sun.COM xen_sysctl_physinfo_t pi;
139010175SStuart.Maybee@Sun.COM int ret;
139110175SStuart.Maybee@Sun.COM
139210175SStuart.Maybee@Sun.COM ASSERT(DOMAIN_IS_INITDOMAIN(xen_info));
139310175SStuart.Maybee@Sun.COM
139410175SStuart.Maybee@Sun.COM if ((ret = xpv_get_physinfo(&pi)) != 0)
139510175SStuart.Maybee@Sun.COM panic("xpv_get_physinfo() failed: %d\n", ret);
139610175SStuart.Maybee@Sun.COM return ((uint64_t)pi.cpu_khz);
139710175SStuart.Maybee@Sun.COM }
1398