xref: /onnv-gate/usr/src/uts/i86pc/i86hvm/io/xpv/xpv_support.c (revision 10175:dd9708d1f561)
16451Sedp /*
26451Sedp  * CDDL HEADER START
36451Sedp  *
46451Sedp  * The contents of this file are subject to the terms of the
56451Sedp  * Common Development and Distribution License (the "License").
66451Sedp  * You may not use this file except in compliance with the License.
76451Sedp  *
86451Sedp  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
96451Sedp  * or http://www.opensolaris.org/os/licensing.
106451Sedp  * See the License for the specific language governing permissions
116451Sedp  * and limitations under the License.
126451Sedp  *
136451Sedp  * When distributing Covered Code, include this CDDL HEADER in each
146451Sedp  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
156451Sedp  * If applicable, add the following below this CDDL HEADER, with the
166451Sedp  * fields enclosed by brackets "[]" replaced with your own identifying
176451Sedp  * information: Portions Copyright [yyyy] [name of copyright owner]
186451Sedp  *
196451Sedp  * CDDL HEADER END
206451Sedp  */
216451Sedp /*
22*10175SStuart.Maybee@Sun.COM  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
236451Sedp  * Use is subject to license terms.
246451Sedp  */
256451Sedp 
266451Sedp #include <sys/modctl.h>
276451Sedp #include <sys/types.h>
286451Sedp #include <sys/archsystm.h>
296451Sedp #include <sys/machsystm.h>
306451Sedp #include <sys/sunndi.h>
316451Sedp #include <sys/sunddi.h>
326451Sedp #include <sys/ddi_subrdefs.h>
336451Sedp #include <sys/xpv_support.h>
346451Sedp #include <sys/xen_errno.h>
356451Sedp #include <sys/hypervisor.h>
366451Sedp #include <sys/gnttab.h>
376451Sedp #include <sys/xenbus_comms.h>
386451Sedp #include <sys/xenbus_impl.h>
396451Sedp #include <xen/sys/xendev.h>
406451Sedp #include <sys/sysmacros.h>
416451Sedp #include <sys/x86_archext.h>
426451Sedp #include <sys/mman.h>
436451Sedp #include <sys/stat.h>
446451Sedp #include <sys/conf.h>
456451Sedp #include <sys/devops.h>
466451Sedp #include <sys/pc_mmu.h>
476451Sedp #include <sys/cmn_err.h>
486451Sedp #include <sys/cpr.h>
496451Sedp #include <sys/ddi.h>
506451Sedp #include <vm/seg_kmem.h>
516451Sedp #include <vm/as.h>
526451Sedp #include <vm/hat_pte.h>
536451Sedp #include <vm/hat_i86.h>
546451Sedp 
556451Sedp #define	XPV_MINOR 0
566451Sedp #define	XPV_BUFSIZE 128
576451Sedp 
586451Sedp /* virtual addr for the store_mfn page */
596451Sedp caddr_t xb_addr;
606451Sedp 
616451Sedp dev_info_t *xpv_dip;
626451Sedp static dev_info_t *xpvd_dip;
636451Sedp 
646451Sedp #ifdef DEBUG
656451Sedp int xen_suspend_debug;
666451Sedp 
676451Sedp #define	SUSPEND_DEBUG if (xen_suspend_debug) xen_printf
686451Sedp #else
696451Sedp #define	SUSPEND_DEBUG(...)
706451Sedp #endif
716451Sedp 
726451Sedp /*
736451Sedp  * Forward declarations
746451Sedp  */
756451Sedp static int xpv_getinfo(dev_info_t *, ddi_info_cmd_t, void *, void **);
766451Sedp static int xpv_attach(dev_info_t *, ddi_attach_cmd_t);
776451Sedp static int xpv_detach(dev_info_t *, ddi_detach_cmd_t);
786451Sedp static int xpv_open(dev_t *, int, int, cred_t *);
796451Sedp static int xpv_ioctl(dev_t, int, intptr_t, int, cred_t *, int *);
806451Sedp 
816451Sedp static struct cb_ops xpv_cb_ops = {
826451Sedp 	xpv_open,
836451Sedp 	nulldev,	/* close */
846451Sedp 	nodev,		/* strategy */
856451Sedp 	nodev,		/* print */
866451Sedp 	nodev,		/* dump */
876451Sedp 	nodev,		/* read */
886451Sedp 	nodev,		/* write */
896451Sedp 	xpv_ioctl,	/* ioctl */
906451Sedp 	nodev,		/* devmap */
916451Sedp 	nodev,		/* mmap */
926451Sedp 	nodev,		/* segmap */
936451Sedp 	nochpoll,	/* poll */
946451Sedp 	ddi_prop_op,
956451Sedp 	NULL,
966451Sedp 	D_MP,
976451Sedp 	CB_REV,
986451Sedp 	NULL,
996451Sedp 	NULL
1006451Sedp };
1016451Sedp 
1026451Sedp static struct dev_ops xpv_dv_ops = {
1036451Sedp 	DEVO_REV,
1046451Sedp 	0,
1056451Sedp 	xpv_getinfo,
1066451Sedp 	nulldev,	/* identify */
1076451Sedp 	nulldev,	/* probe */
1086451Sedp 	xpv_attach,
1096451Sedp 	xpv_detach,
1106451Sedp 	nodev,		/* reset */
1116451Sedp 	&xpv_cb_ops,
1126451Sedp 	NULL,		/* struct bus_ops */
1137656SSherry.Moore@Sun.COM 	NULL,		/* power */
1147656SSherry.Moore@Sun.COM 	ddi_quiesce_not_supported,	/* devo_quiesce */
1156451Sedp };
1166451Sedp 
1176451Sedp static struct modldrv modldrv = {
1186451Sedp 	&mod_driverops,
1197542SRichard.Bean@Sun.COM 	"xpv driver",
1206451Sedp 	&xpv_dv_ops
1216451Sedp };
1226451Sedp 
1236451Sedp static struct modlinkage modl = {
1246451Sedp 	MODREV_1,
1256451Sedp 	{
1266451Sedp 		(void *)&modldrv,
1276451Sedp 		NULL		/* null termination */
1286451Sedp 	}
1296451Sedp };
1306451Sedp 
1316451Sedp static ddi_dma_attr_t xpv_dma_attr = {
1326451Sedp 	DMA_ATTR_V0,		/* version of this structure */
1336451Sedp 	0,			/* lowest usable address */
1346451Sedp 	0xffffffffffffffffULL,	/* highest usable address */
1356451Sedp 	0x7fffffff,		/* maximum DMAable byte count */
1366451Sedp 	MMU_PAGESIZE,		/* alignment in bytes */
1376451Sedp 	0x7ff,			/* bitmap of burst sizes */
1386451Sedp 	1,			/* minimum transfer */
1396451Sedp 	0xffffffffU,		/* maximum transfer */
1406451Sedp 	0x7fffffffULL,		/* maximum segment length */
1416451Sedp 	1,			/* maximum number of segments */
1426451Sedp 	1,			/* granularity */
1436451Sedp 	0,			/* flags (reserved) */
1446451Sedp };
1456451Sedp 
1466451Sedp static ddi_device_acc_attr_t xpv_accattr = {
1476451Sedp 	DDI_DEVICE_ATTR_V0,
1486451Sedp 	DDI_NEVERSWAP_ACC,
1496451Sedp 	DDI_STRICTORDER_ACC
1506451Sedp };
1516451Sedp 
1526451Sedp #define	MAX_ALLOCATIONS 10
1536451Sedp static ddi_dma_handle_t xpv_dma_handle[MAX_ALLOCATIONS];
1546451Sedp static ddi_acc_handle_t xpv_dma_acchandle[MAX_ALLOCATIONS];
1556451Sedp static int xen_alloc_cnt = 0;
1566451Sedp 
1576451Sedp void *
xen_alloc_pages(pgcnt_t cnt)1586451Sedp xen_alloc_pages(pgcnt_t cnt)
1596451Sedp {
1606451Sedp 	size_t len;
1616451Sedp 	int a = xen_alloc_cnt++;
1626451Sedp 	caddr_t addr;
1636451Sedp 
1646451Sedp 	ASSERT(xen_alloc_cnt < MAX_ALLOCATIONS);
1656451Sedp 	if (ddi_dma_alloc_handle(xpv_dip, &xpv_dma_attr, DDI_DMA_SLEEP, 0,
1666451Sedp 	    &xpv_dma_handle[a]) != DDI_SUCCESS)
1676451Sedp 		return (NULL);
1686451Sedp 
1696451Sedp 	if (ddi_dma_mem_alloc(xpv_dma_handle[a], MMU_PAGESIZE * cnt,
1706451Sedp 	    &xpv_accattr, DDI_DMA_CONSISTENT, DDI_DMA_SLEEP, 0,
1716451Sedp 	    &addr, &len, &xpv_dma_acchandle[a]) != DDI_SUCCESS) {
1726451Sedp 		ddi_dma_free_handle(&xpv_dma_handle[a]);
1736451Sedp 		cmn_err(CE_WARN, "Couldn't allocate memory for xpv devices");
1746451Sedp 		return (NULL);
1756451Sedp 	}
1766451Sedp 	return (addr);
1776451Sedp }
1786451Sedp 
1796451Sedp /*
1806451Sedp  * This function is invoked twice, first time with reprogram=0 to set up
1816451Sedp  * the xpvd portion of the device tree. The second time it is ignored.
1826451Sedp  */
1836451Sedp static void
xpv_enumerate(int reprogram)1846451Sedp xpv_enumerate(int reprogram)
1856451Sedp {
1866451Sedp 	dev_info_t *dip;
1876451Sedp 
1886451Sedp 	if (reprogram != 0)
1896451Sedp 		return;
1906451Sedp 
1916451Sedp 	ndi_devi_alloc_sleep(ddi_root_node(), "xpvd",
1926451Sedp 	    (pnode_t)DEVI_SID_NODEID, &dip);
1936451Sedp 
1946451Sedp 	(void) ndi_devi_bind_driver(dip, 0);
1956451Sedp 
1966451Sedp 	/*
1976451Sedp 	 * Too early to enumerate split device drivers in domU
1986451Sedp 	 * since we need to create taskq thread during enumeration.
1996451Sedp 	 * So, we only enumerate softdevs and console here.
2006451Sedp 	 */
2016451Sedp 	xendev_enum_all(dip, B_TRUE);
2026451Sedp }
2036451Sedp 
2046451Sedp /*
2056451Sedp  * Translate a hypervisor errcode to a Solaris error code.
2066451Sedp  */
2076451Sedp int
xen_xlate_errcode(int error)2086451Sedp xen_xlate_errcode(int error)
2096451Sedp {
2106451Sedp #define	CASE(num)	case X_##num: error = num; break
2116451Sedp 
2126451Sedp 	switch (-error) {
2136451Sedp 		CASE(EPERM);    CASE(ENOENT);   CASE(ESRCH);
2146451Sedp 		CASE(EINTR);	CASE(EIO);	CASE(ENXIO);
2156451Sedp 		CASE(E2BIG);    CASE(ENOMEM);   CASE(EACCES);
2166451Sedp 		CASE(EFAULT);   CASE(EBUSY);    CASE(EEXIST);
2176451Sedp 		CASE(ENODEV);   CASE(EISDIR);   CASE(EINVAL);
2186451Sedp 		CASE(ENOSPC);   CASE(ESPIPE);   CASE(EROFS);
2196451Sedp 		CASE(ENOSYS);   CASE(ENOTEMPTY); CASE(EISCONN);
2206451Sedp 		CASE(ENODATA);
2216451Sedp 		default:
2226451Sedp 		panic("xen_xlate_errcode: unknown error %d", error);
2236451Sedp 	}
2246451Sedp 	return (error);
2256451Sedp #undef CASE
2266451Sedp }
2276451Sedp 
2286451Sedp /*PRINTFLIKE1*/
2296451Sedp void
xen_printf(const char * fmt,...)2306451Sedp xen_printf(const char *fmt, ...)
2316451Sedp {
2326451Sedp 	va_list adx;
2336451Sedp 
2346451Sedp 	va_start(adx, fmt);
2356451Sedp 	printf(fmt, adx);
2366451Sedp 	va_end(adx);
2376451Sedp }
2386451Sedp 
2396451Sedp /*
2406451Sedp  * Stub functions to get the FE drivers to build, and to catch drivers that
2416451Sedp  * misbehave in HVM domains.
2426451Sedp  */
2436451Sedp /*ARGSUSED*/
2446451Sedp void
xen_release_pfn(pfn_t pfn)245*10175SStuart.Maybee@Sun.COM xen_release_pfn(pfn_t pfn)
2466451Sedp {
2476451Sedp 	panic("xen_release_pfn() is not supported in HVM domains");
2486451Sedp }
2496451Sedp 
2506451Sedp /*ARGSUSED*/
2516451Sedp void
reassign_pfn(pfn_t pfn,mfn_t mfn)2526451Sedp reassign_pfn(pfn_t pfn, mfn_t mfn)
2536451Sedp {
2546451Sedp 	panic("reassign_pfn() is not supported in HVM domains");
2556451Sedp }
2566451Sedp 
2576451Sedp /*ARGSUSED*/
2586451Sedp long
balloon_free_pages(uint_t page_cnt,mfn_t * mfns,caddr_t kva,pfn_t * pfns)2596451Sedp balloon_free_pages(uint_t page_cnt, mfn_t *mfns, caddr_t kva, pfn_t *pfns)
2606451Sedp {
2616451Sedp 	panic("balloon_free_pages() is not supported in HVM domains");
2626451Sedp 	return (0);
2636451Sedp }
2646451Sedp 
2656451Sedp /*ARGSUSED*/
2666451Sedp void
balloon_drv_added(int64_t delta)2676451Sedp balloon_drv_added(int64_t delta)
2686451Sedp {
2696451Sedp 	panic("balloon_drv_added() is not supported in HVM domains");
2706451Sedp }
2716451Sedp 
2726451Sedp /*
2736451Sedp  * Add a mapping for the machine page at the given virtual address.
2746451Sedp  */
2756451Sedp void
kbm_map_ma(maddr_t ma,uintptr_t va,uint_t level)2766451Sedp kbm_map_ma(maddr_t ma, uintptr_t va, uint_t level)
2776451Sedp {
2786451Sedp 	ASSERT(level == 0);
2796451Sedp 
2806451Sedp 	hat_devload(kas.a_hat, (caddr_t)va, MMU_PAGESIZE,
2816451Sedp 	    mmu_btop(ma), PROT_READ | PROT_WRITE, HAT_LOAD);
2826451Sedp }
2836451Sedp 
2847756SMark.Johnson@Sun.COM /*ARGSUSED*/
2857756SMark.Johnson@Sun.COM int
xen_map_gref(uint_t cmd,gnttab_map_grant_ref_t * mapop,uint_t count,boolean_t uvaddr)2867756SMark.Johnson@Sun.COM xen_map_gref(uint_t cmd, gnttab_map_grant_ref_t *mapop, uint_t count,
2877756SMark.Johnson@Sun.COM     boolean_t uvaddr)
2887756SMark.Johnson@Sun.COM {
2897756SMark.Johnson@Sun.COM 	long rc;
2907756SMark.Johnson@Sun.COM 
2917756SMark.Johnson@Sun.COM 	ASSERT(cmd == GNTTABOP_map_grant_ref);
2927756SMark.Johnson@Sun.COM 	rc = HYPERVISOR_grant_table_op(cmd, mapop, count);
2937756SMark.Johnson@Sun.COM 
2947756SMark.Johnson@Sun.COM 	return (rc);
2957756SMark.Johnson@Sun.COM }
2967756SMark.Johnson@Sun.COM 
2976451Sedp static struct xenbus_watch shutdown_watch;
2986451Sedp taskq_t *xen_shutdown_tq;
2996451Sedp 
3006451Sedp #define	SHUTDOWN_INVALID	-1
3016451Sedp #define	SHUTDOWN_POWEROFF	0
3026451Sedp #define	SHUTDOWN_REBOOT		1
3036451Sedp #define	SHUTDOWN_SUSPEND	2
3046451Sedp #define	SHUTDOWN_HALT		3
3056451Sedp #define	SHUTDOWN_MAX		4
3066451Sedp 
3076451Sedp #define	SHUTDOWN_TIMEOUT_SECS (60 * 5)
3086451Sedp 
3096451Sedp int
xen_suspend_devices(dev_info_t * dip)3106451Sedp xen_suspend_devices(dev_info_t *dip)
3116451Sedp {
3126451Sedp 	int error;
3136451Sedp 	char buf[XPV_BUFSIZE];
3146451Sedp 
3156451Sedp 	SUSPEND_DEBUG("xen_suspend_devices\n");
3166451Sedp 
3176451Sedp 	for (; dip != NULL; dip = ddi_get_next_sibling(dip)) {
3186451Sedp 		if (xen_suspend_devices(ddi_get_child(dip)))
3196451Sedp 			return (ENXIO);
3206451Sedp 		if (ddi_get_driver(dip) == NULL)
3216451Sedp 			continue;
3226451Sedp 		SUSPEND_DEBUG("Suspending device %s\n", ddi_deviname(dip, buf));
3236451Sedp 		ASSERT((DEVI(dip)->devi_cpr_flags & DCF_CPR_SUSPENDED) == 0);
3246451Sedp 
3256451Sedp 
3266451Sedp 		if (!i_ddi_devi_attached(dip)) {
3276451Sedp 			error = DDI_FAILURE;
3286451Sedp 		} else {
3296451Sedp 			error = devi_detach(dip, DDI_SUSPEND);
3306451Sedp 		}
3316451Sedp 
3326451Sedp 		if (error == DDI_SUCCESS) {
3336451Sedp 			DEVI(dip)->devi_cpr_flags |= DCF_CPR_SUSPENDED;
3346451Sedp 		} else {
3356451Sedp 			SUSPEND_DEBUG("WARNING: Unable to suspend device %s\n",
3366451Sedp 			    ddi_deviname(dip, buf));
3376451Sedp 			cmn_err(CE_WARN, "Unable to suspend device %s.",
3386451Sedp 			    ddi_deviname(dip, buf));
3396451Sedp 			cmn_err(CE_WARN, "Device is busy or does not "
3406451Sedp 			    "support suspend/resume.");
3416451Sedp 				return (ENXIO);
3426451Sedp 		}
3436451Sedp 	}
3446451Sedp 	return (0);
3456451Sedp }
3466451Sedp 
3476451Sedp int
xen_resume_devices(dev_info_t * start,int resume_failed)3486451Sedp xen_resume_devices(dev_info_t *start, int resume_failed)
3496451Sedp {
3506451Sedp 	dev_info_t *dip, *next, *last = NULL;
3516451Sedp 	int did_suspend;
3526451Sedp 	int error = resume_failed;
3536451Sedp 	char buf[XPV_BUFSIZE];
3546451Sedp 
3556451Sedp 	SUSPEND_DEBUG("xen_resume_devices\n");
3566451Sedp 
3576451Sedp 	while (last != start) {
3586451Sedp 		dip = start;
3596451Sedp 		next = ddi_get_next_sibling(dip);
3606451Sedp 		while (next != last) {
3616451Sedp 			dip = next;
3626451Sedp 			next = ddi_get_next_sibling(dip);
3636451Sedp 		}
3646451Sedp 
3656451Sedp 		/*
3666451Sedp 		 * cpr is the only one that uses this field and the device
3676451Sedp 		 * itself hasn't resumed yet, there is no need to use a
3686451Sedp 		 * lock, even though kernel threads are active by now.
3696451Sedp 		 */
3706451Sedp 		did_suspend = DEVI(dip)->devi_cpr_flags & DCF_CPR_SUSPENDED;
3716451Sedp 		if (did_suspend)
3726451Sedp 			DEVI(dip)->devi_cpr_flags &= ~DCF_CPR_SUSPENDED;
3736451Sedp 
3746451Sedp 		/*
3756451Sedp 		 * There may be background attaches happening on devices
3766451Sedp 		 * that were not originally suspended by cpr, so resume
3776451Sedp 		 * only devices that were suspended by cpr. Also, stop
3786451Sedp 		 * resuming after the first resume failure, but traverse
3796451Sedp 		 * the entire tree to clear the suspend flag.
3806451Sedp 		 */
3816451Sedp 		if (did_suspend && !error) {
3826451Sedp 			SUSPEND_DEBUG("Resuming device %s\n",
3836451Sedp 			    ddi_deviname(dip, buf));
3846451Sedp 			/*
3856451Sedp 			 * If a device suspended by cpr gets detached during
3866451Sedp 			 * the resume process (for example, due to hotplugging)
3876451Sedp 			 * before cpr gets around to issuing it a DDI_RESUME,
3886451Sedp 			 * we'll have problems.
3896451Sedp 			 */
3906451Sedp 			if (!i_ddi_devi_attached(dip)) {
3916451Sedp 				cmn_err(CE_WARN, "Skipping %s, device "
3926451Sedp 				    "not ready for resume",
3936451Sedp 				    ddi_deviname(dip, buf));
3946451Sedp 			} else {
3956451Sedp 				if (devi_attach(dip, DDI_RESUME) !=
3966451Sedp 				    DDI_SUCCESS) {
3976451Sedp 					error = ENXIO;
3986451Sedp 				}
3996451Sedp 			}
4006451Sedp 		}
4016451Sedp 
4026451Sedp 		if (error == ENXIO) {
4036451Sedp 			cmn_err(CE_WARN, "Unable to resume device %s",
4046451Sedp 			    ddi_deviname(dip, buf));
4056451Sedp 		}
4066451Sedp 
4076451Sedp 		error = xen_resume_devices(ddi_get_child(dip), error);
4086451Sedp 		last = dip;
4096451Sedp 	}
4106451Sedp 
4116451Sedp 	return (error);
4126451Sedp }
4136451Sedp 
4146451Sedp /*ARGSUSED*/
4156451Sedp static int
check_xpvd(dev_info_t * dip,void * arg)4166451Sedp check_xpvd(dev_info_t *dip, void *arg)
4176451Sedp {
4186451Sedp 	char *name;
4196451Sedp 
4206451Sedp 	name = ddi_node_name(dip);
4216451Sedp 	if (name == NULL || strcmp(name, "xpvd")) {
4226451Sedp 		return (DDI_WALK_CONTINUE);
4236451Sedp 	} else {
4246451Sedp 		xpvd_dip = dip;
4256451Sedp 		return (DDI_WALK_TERMINATE);
4266451Sedp 	}
4276451Sedp }
4286451Sedp 
4296451Sedp /*
4306451Sedp  * Top level routine to direct suspend/resume of a domain.
4316451Sedp  */
4326451Sedp void
xen_suspend_domain(void)4336451Sedp xen_suspend_domain(void)
4346451Sedp {
4356451Sedp 	extern void rtcsync(void);
4366451Sedp 	extern void ec_resume(void);
4376451Sedp 	extern kmutex_t ec_lock;
4386451Sedp 	struct xen_add_to_physmap xatp;
4396451Sedp 	ulong_t flags;
4406451Sedp 	int err;
4416451Sedp 
4426451Sedp 	cmn_err(CE_NOTE, "Domain suspending for save/migrate");
4436451Sedp 
4446451Sedp 	SUSPEND_DEBUG("xen_suspend_domain\n");
4456451Sedp 
4466451Sedp 	/*
4476451Sedp 	 * We only want to suspend the PV devices, since the emulated devices
4486451Sedp 	 * are suspended by saving the emulated device state.  The PV devices
4496451Sedp 	 * are all children of the xpvd nexus device.  So we search the
4506451Sedp 	 * device tree for the xpvd node to use as the root of the tree to
4516451Sedp 	 * be suspended.
4526451Sedp 	 */
4536451Sedp 	if (xpvd_dip == NULL)
4546451Sedp 		ddi_walk_devs(ddi_root_node(), check_xpvd, NULL);
4556451Sedp 
4566451Sedp 	/*
4576451Sedp 	 * suspend interrupts and devices
4586451Sedp 	 */
4596451Sedp 	if (xpvd_dip != NULL)
4606451Sedp 		(void) xen_suspend_devices(ddi_get_child(xpvd_dip));
4616451Sedp 	else
4626451Sedp 		cmn_err(CE_WARN, "No PV devices found to suspend");
4636451Sedp 	SUSPEND_DEBUG("xenbus_suspend\n");
4646451Sedp 	xenbus_suspend();
4656451Sedp 
4666451Sedp 	mutex_enter(&cpu_lock);
4676451Sedp 
4686451Sedp 	/*
4696451Sedp 	 * Suspend on vcpu 0
4706451Sedp 	 */
4716451Sedp 	thread_affinity_set(curthread, 0);
4726451Sedp 	kpreempt_disable();
4736451Sedp 
4746451Sedp 	if (ncpus > 1)
4756451Sedp 		pause_cpus(NULL);
4766451Sedp 	/*
4776451Sedp 	 * We can grab the ec_lock as it's a spinlock with a high SPL. Hence
4786451Sedp 	 * any holder would have dropped it to get through pause_cpus().
4796451Sedp 	 */
4806451Sedp 	mutex_enter(&ec_lock);
4816451Sedp 
4826451Sedp 	/*
4836451Sedp 	 * From here on in, we can't take locks.
4846451Sedp 	 */
4856451Sedp 
4866451Sedp 	flags = intr_clear();
4876451Sedp 
4886451Sedp 	SUSPEND_DEBUG("HYPERVISOR_suspend\n");
4896451Sedp 	/*
4906451Sedp 	 * At this point we suspend and sometime later resume.
4916451Sedp 	 * Note that this call may return with an indication of a cancelled
4926451Sedp 	 * for now no matter ehat the return we do a full resume of all
4936451Sedp 	 * suspended drivers, etc.
4946451Sedp 	 */
4956451Sedp 	(void) HYPERVISOR_shutdown(SHUTDOWN_suspend);
4966451Sedp 
4976451Sedp 	/*
4986451Sedp 	 * Point HYPERVISOR_shared_info to the proper place.
4996451Sedp 	 */
5006451Sedp 	xatp.domid = DOMID_SELF;
5016451Sedp 	xatp.idx = 0;
5026451Sedp 	xatp.space = XENMAPSPACE_shared_info;
503*10175SStuart.Maybee@Sun.COM 	xatp.gpfn = xen_shared_info_frame;
5046451Sedp 	if ((err = HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp)) != 0)
5056451Sedp 		panic("Could not set shared_info page. error: %d", err);
5066451Sedp 
5076451Sedp 	SUSPEND_DEBUG("gnttab_resume\n");
5086451Sedp 	gnttab_resume();
5096451Sedp 
5106451Sedp 	SUSPEND_DEBUG("ec_resume\n");
5116451Sedp 	ec_resume();
5126451Sedp 
5136451Sedp 	intr_restore(flags);
5146451Sedp 
5156451Sedp 	if (ncpus > 1)
5166451Sedp 		start_cpus();
5176451Sedp 
5186451Sedp 	mutex_exit(&ec_lock);
5196451Sedp 	mutex_exit(&cpu_lock);
5206451Sedp 
5216451Sedp 	/*
5226451Sedp 	 * Now we can take locks again.
5236451Sedp 	 */
5246451Sedp 
5256451Sedp 	rtcsync();
5266451Sedp 
5276451Sedp 	SUSPEND_DEBUG("xenbus_resume\n");
5286451Sedp 	xenbus_resume();
5296451Sedp 	SUSPEND_DEBUG("xen_resume_devices\n");
5306451Sedp 	if (xpvd_dip != NULL)
5316451Sedp 		(void) xen_resume_devices(ddi_get_child(xpvd_dip), 0);
5326451Sedp 
5336451Sedp 	thread_affinity_clear(curthread);
5346451Sedp 	kpreempt_enable();
5356451Sedp 
5366451Sedp 	SUSPEND_DEBUG("finished xen_suspend_domain\n");
5376451Sedp 
5386451Sedp 	cmn_err(CE_NOTE, "domain restore/migrate completed");
5396451Sedp }
5406451Sedp 
5416451Sedp static void
xen_dirty_shutdown(void * arg)5426451Sedp xen_dirty_shutdown(void *arg)
5436451Sedp {
5446451Sedp 	int cmd = (uintptr_t)arg;
5456451Sedp 
5466451Sedp 	cmn_err(CE_WARN, "Externally requested shutdown failed or "
5476451Sedp 	    "timed out.\nShutting down.\n");
5486451Sedp 
5496451Sedp 	switch (cmd) {
5506451Sedp 	case SHUTDOWN_HALT:
5516451Sedp 	case SHUTDOWN_POWEROFF:
5526451Sedp 		(void) kadmin(A_SHUTDOWN, AD_POWEROFF, NULL, kcred);
5536451Sedp 		break;
5546451Sedp 	case SHUTDOWN_REBOOT:
5556451Sedp 		(void) kadmin(A_REBOOT, AD_BOOT, NULL, kcred);
5566451Sedp 		break;
5576451Sedp 	}
5586451Sedp }
5596451Sedp 
5606451Sedp static void
xen_shutdown(void * arg)5616451Sedp xen_shutdown(void *arg)
5626451Sedp {
5636451Sedp 	int cmd = (uintptr_t)arg;
5646681Sjohnlev 	proc_t *initpp;
5656451Sedp 
5666451Sedp 	ASSERT(cmd > SHUTDOWN_INVALID && cmd < SHUTDOWN_MAX);
5676451Sedp 
5686451Sedp 	if (cmd == SHUTDOWN_SUSPEND) {
5696451Sedp 		xen_suspend_domain();
5706451Sedp 		return;
5716451Sedp 	}
5726451Sedp 
5736681Sjohnlev 	switch (cmd) {
5746681Sjohnlev 	case SHUTDOWN_POWEROFF:
5756681Sjohnlev 		force_shutdown_method = AD_POWEROFF;
5766681Sjohnlev 		break;
5776681Sjohnlev 	case SHUTDOWN_HALT:
5786681Sjohnlev 		force_shutdown_method = AD_HALT;
5796681Sjohnlev 		break;
5806681Sjohnlev 	case SHUTDOWN_REBOOT:
5816681Sjohnlev 		force_shutdown_method = AD_BOOT;
5826681Sjohnlev 		break;
5836681Sjohnlev 	}
5846451Sedp 
5856451Sedp 
5866681Sjohnlev 	/*
5876681Sjohnlev 	 * If we're still booting and init(1) isn't set up yet, simply halt.
5886681Sjohnlev 	 */
5896681Sjohnlev 	mutex_enter(&pidlock);
5906681Sjohnlev 	initpp = prfind(P_INITPID);
5916681Sjohnlev 	mutex_exit(&pidlock);
5926681Sjohnlev 	if (initpp == NULL) {
5936681Sjohnlev 		extern void halt(char *);
5946681Sjohnlev 		halt("Power off the System");   /* just in case */
5956681Sjohnlev 	}
5966451Sedp 
5976681Sjohnlev 	/*
5986681Sjohnlev 	 * else, graceful shutdown with inittab and all getting involved
5996681Sjohnlev 	 */
6006681Sjohnlev 	psignal(initpp, SIGPWR);
6016451Sedp 
6026451Sedp 	(void) timeout(xen_dirty_shutdown, arg,
6036451Sedp 	    SHUTDOWN_TIMEOUT_SECS * drv_usectohz(MICROSEC));
6046451Sedp }
6056451Sedp 
6066451Sedp /*ARGSUSED*/
6076451Sedp static void
xen_shutdown_handler(struct xenbus_watch * watch,const char ** vec,unsigned int len)6086451Sedp xen_shutdown_handler(struct xenbus_watch *watch, const char **vec,
6096451Sedp 	unsigned int len)
6106451Sedp {
6116451Sedp 	char *str;
6126451Sedp 	xenbus_transaction_t xbt;
6136451Sedp 	int err, shutdown_code = SHUTDOWN_INVALID;
6146451Sedp 	unsigned int slen;
6156451Sedp 
6166451Sedp again:
6176451Sedp 	err = xenbus_transaction_start(&xbt);
6186451Sedp 	if (err)
6196451Sedp 		return;
6206451Sedp 	if (xenbus_read(xbt, "control", "shutdown", (void *)&str, &slen)) {
6216451Sedp 		(void) xenbus_transaction_end(xbt, 1);
6226451Sedp 		return;
6236451Sedp 	}
6246451Sedp 
6256451Sedp 	SUSPEND_DEBUG("%d: xen_shutdown_handler: \"%s\"\n", CPU->cpu_id, str);
6266451Sedp 
6276451Sedp 	/*
6286451Sedp 	 * If this is a watch fired from our write below, check out early to
6296451Sedp 	 * avoid an infinite loop.
6306451Sedp 	 */
6316451Sedp 	if (strcmp(str, "") == 0) {
6326451Sedp 		(void) xenbus_transaction_end(xbt, 0);
6336451Sedp 		kmem_free(str, slen);
6346451Sedp 		return;
6356451Sedp 	} else if (strcmp(str, "poweroff") == 0) {
6366451Sedp 		shutdown_code = SHUTDOWN_POWEROFF;
6376451Sedp 	} else if (strcmp(str, "reboot") == 0) {
6386451Sedp 		shutdown_code = SHUTDOWN_REBOOT;
6396451Sedp 	} else if (strcmp(str, "suspend") == 0) {
6406451Sedp 		shutdown_code = SHUTDOWN_SUSPEND;
6416451Sedp 	} else if (strcmp(str, "halt") == 0) {
6426451Sedp 		shutdown_code = SHUTDOWN_HALT;
6436451Sedp 	} else {
6446451Sedp 		printf("Ignoring shutdown request: %s\n", str);
6456451Sedp 	}
6466451Sedp 
6476451Sedp 	(void) xenbus_write(xbt, "control", "shutdown", "");
6486451Sedp 	err = xenbus_transaction_end(xbt, 0);
6496451Sedp 	if (err == EAGAIN) {
6506451Sedp 		SUSPEND_DEBUG("%d: trying again\n", CPU->cpu_id);
6516451Sedp 		kmem_free(str, slen);
6526451Sedp 		goto again;
6536451Sedp 	}
6546451Sedp 
6556451Sedp 	kmem_free(str, slen);
6566451Sedp 	if (shutdown_code != SHUTDOWN_INVALID) {
6576451Sedp 		(void) taskq_dispatch(xen_shutdown_tq, xen_shutdown,
6586451Sedp 		    (void *)(intptr_t)shutdown_code, 0);
6596451Sedp 	}
6606451Sedp }
6616451Sedp 
6626451Sedp static int
xpv_drv_init(void)663*10175SStuart.Maybee@Sun.COM xpv_drv_init(void)
6646451Sedp {
665*10175SStuart.Maybee@Sun.COM 	if (xpv_feature(XPVF_HYPERCALLS) < 0 ||
666*10175SStuart.Maybee@Sun.COM 	    xpv_feature(XPVF_SHARED_INFO) < 0)
6676451Sedp 		return (-1);
6686451Sedp 
6696451Sedp 	/* Set up the grant tables.  */
6706451Sedp 	gnttab_init();
6716451Sedp 
6726451Sedp 	/* Set up event channel support */
673*10175SStuart.Maybee@Sun.COM 	if (ec_init() != 0)
6746451Sedp 		return (-1);
6756451Sedp 
6766451Sedp 	/* Set up xenbus */
6776451Sedp 	xb_addr = vmem_alloc(heap_arena, MMU_PAGESIZE, VM_SLEEP);
6786451Sedp 	xs_early_init();
6796451Sedp 	xs_domu_init();
6806451Sedp 
6816451Sedp 	/* Set up for suspend/resume/migrate */
6826451Sedp 	xen_shutdown_tq = taskq_create("shutdown_taskq", 1,
6836451Sedp 	    maxclsyspri - 1, 1, 1, TASKQ_PREPOPULATE);
6846451Sedp 	shutdown_watch.node = "control/shutdown";
6856451Sedp 	shutdown_watch.callback = xen_shutdown_handler;
6866451Sedp 	if (register_xenbus_watch(&shutdown_watch))
6876451Sedp 		cmn_err(CE_WARN, "Failed to set shutdown watcher");
6886451Sedp 
6896451Sedp 	return (0);
6906451Sedp }
6916451Sedp 
6926451Sedp static void
xen_pv_fini()6936451Sedp xen_pv_fini()
6946451Sedp {
6956451Sedp 	ec_fini();
6966451Sedp }
6976451Sedp 
6986451Sedp /*ARGSUSED*/
6996451Sedp static int
xpv_getinfo(dev_info_t * dip,ddi_info_cmd_t cmd,void * arg,void ** result)7006451Sedp xpv_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **result)
7016451Sedp {
7026451Sedp 	if (getminor((dev_t)arg) != XPV_MINOR)
7036451Sedp 		return (DDI_FAILURE);
7046451Sedp 
7056451Sedp 	switch (cmd) {
7066451Sedp 	case DDI_INFO_DEVT2DEVINFO:
7076451Sedp 		*result = xpv_dip;
7086451Sedp 		break;
7096451Sedp 	case DDI_INFO_DEVT2INSTANCE:
7106451Sedp 		*result = 0;
7116451Sedp 		break;
7126451Sedp 	default:
7136451Sedp 		return (DDI_FAILURE);
7146451Sedp 	}
7156451Sedp 
7166451Sedp 	return (DDI_SUCCESS);
7176451Sedp }
7186451Sedp 
7196451Sedp static int
xpv_attach(dev_info_t * dip,ddi_attach_cmd_t cmd)7206451Sedp xpv_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
7216451Sedp {
7226451Sedp 	if (cmd != DDI_ATTACH)
7236451Sedp 		return (DDI_FAILURE);
7246451Sedp 
7256451Sedp 	if (ddi_create_minor_node(dip, ddi_get_name(dip), S_IFCHR,
7266451Sedp 	    ddi_get_instance(dip), DDI_PSEUDO, 0) != DDI_SUCCESS)
7276451Sedp 		return (DDI_FAILURE);
7286451Sedp 
7296451Sedp 	xpv_dip = dip;
7306451Sedp 
731*10175SStuart.Maybee@Sun.COM 	if (xpv_drv_init() != 0)
7326451Sedp 		return (DDI_FAILURE);
7336451Sedp 
7346451Sedp 	ddi_report_dev(dip);
7356451Sedp 
7366451Sedp 	/*
7376451Sedp 	 * If the memscrubber attempts to scrub the pages we hand to Xen,
7386451Sedp 	 * the domain will panic.
7396451Sedp 	 */
7406451Sedp 	memscrub_disable();
7416451Sedp 
7426451Sedp 	/*
7436451Sedp 	 * Report our version to dom0.
7446451Sedp 	 */
745*10175SStuart.Maybee@Sun.COM 	if (xenbus_printf(XBT_NULL, "guest/xpv", "version", "%d",
7466451Sedp 	    HVMPV_XPV_VERS))
7476451Sedp 		cmn_err(CE_WARN, "xpv: couldn't write version\n");
7486451Sedp 
7496451Sedp 	return (DDI_SUCCESS);
7506451Sedp }
7516451Sedp 
7526451Sedp /*
7536451Sedp  * Attempts to reload the PV driver plumbing hang on Intel platforms, so
7546451Sedp  * we don't want to unload the framework by accident.
7556451Sedp  */
7566451Sedp int xpv_allow_detach = 0;
7576451Sedp 
7586451Sedp static int
xpv_detach(dev_info_t * dip,ddi_detach_cmd_t cmd)7596451Sedp xpv_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
7606451Sedp {
7616451Sedp 	if (cmd != DDI_DETACH || xpv_allow_detach == 0)
7626451Sedp 		return (DDI_FAILURE);
7636451Sedp 
7646451Sedp 	if (xpv_dip != NULL) {
7656451Sedp 		xen_pv_fini();
7666451Sedp 		ddi_remove_minor_node(dip, NULL);
7676451Sedp 		xpv_dip = NULL;
7686451Sedp 	}
7696451Sedp 
7706451Sedp 	return (DDI_SUCCESS);
7716451Sedp }
7726451Sedp 
7736451Sedp /*ARGSUSED1*/
7746451Sedp static int
xpv_open(dev_t * dev,int flag,int otyp,cred_t * cr)7756451Sedp xpv_open(dev_t *dev, int flag, int otyp, cred_t *cr)
7766451Sedp {
7776451Sedp 	return (getminor(*dev) == XPV_MINOR ? 0 : ENXIO);
7786451Sedp }
7796451Sedp 
7806451Sedp /*ARGSUSED*/
7816451Sedp static int
xpv_ioctl(dev_t dev,int cmd,intptr_t arg,int mode,cred_t * cr,int * rval_p)7826451Sedp xpv_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *cr,
7836451Sedp     int *rval_p)
7846451Sedp {
7856451Sedp 	return (EINVAL);
7866451Sedp }
7876451Sedp 
7886451Sedp int
_init(void)7896451Sedp _init(void)
7906451Sedp {
7916451Sedp 	int err;
7926451Sedp 
7936451Sedp 	if ((err = mod_install(&modl)) != 0)
7946451Sedp 		return (err);
7956451Sedp 
7966451Sedp 	impl_bus_add_probe(xpv_enumerate);
7976451Sedp 	return (0);
7986451Sedp }
7996451Sedp 
8006451Sedp int
_fini(void)8016451Sedp _fini(void)
8026451Sedp {
8036451Sedp 	int err;
8046451Sedp 
8056451Sedp 	if ((err = mod_remove(&modl)) != 0)
8066451Sedp 		return (err);
8076451Sedp 
8086451Sedp 	impl_bus_delete_probe(xpv_enumerate);
8096451Sedp 	return (0);
8106451Sedp }
8116451Sedp 
8126451Sedp int
_info(struct modinfo * modinfop)8136451Sedp _info(struct modinfo *modinfop)
8146451Sedp {
8156451Sedp 	return (mod_info(&modl, modinfop));
8166451Sedp }
817