xref: /onnv-gate/usr/src/uts/i86xpv/io/privcmd.c (revision 10175:dd9708d1f561)
15084Sjohnlev /*
25084Sjohnlev  * CDDL HEADER START
35084Sjohnlev  *
45084Sjohnlev  * The contents of this file are subject to the terms of the
55084Sjohnlev  * Common Development and Distribution License (the "License").
65084Sjohnlev  * You may not use this file except in compliance with the License.
75084Sjohnlev  *
85084Sjohnlev  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
95084Sjohnlev  * or http://www.opensolaris.org/os/licensing.
105084Sjohnlev  * See the License for the specific language governing permissions
115084Sjohnlev  * and limitations under the License.
125084Sjohnlev  *
135084Sjohnlev  * When distributing Covered Code, include this CDDL HEADER in each
145084Sjohnlev  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
155084Sjohnlev  * If applicable, add the following below this CDDL HEADER, with the
165084Sjohnlev  * fields enclosed by brackets "[]" replaced with your own identifying
175084Sjohnlev  * information: Portions Copyright [yyyy] [name of copyright owner]
185084Sjohnlev  *
195084Sjohnlev  * CDDL HEADER END
205084Sjohnlev  */
215084Sjohnlev 
225084Sjohnlev /*
23*10175SStuart.Maybee@Sun.COM  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
245084Sjohnlev  * Use is subject to license terms.
255084Sjohnlev  */
265084Sjohnlev 
27*10175SStuart.Maybee@Sun.COM #include <sys/xpv_user.h>
28*10175SStuart.Maybee@Sun.COM 
295084Sjohnlev #include <sys/types.h>
305084Sjohnlev #include <sys/file.h>
315084Sjohnlev #include <sys/errno.h>
325084Sjohnlev #include <sys/open.h>
335084Sjohnlev #include <sys/cred.h>
345084Sjohnlev #include <sys/conf.h>
355084Sjohnlev #include <sys/stat.h>
365084Sjohnlev #include <sys/modctl.h>
375084Sjohnlev #include <sys/ddi.h>
385084Sjohnlev #include <sys/sunddi.h>
395084Sjohnlev #include <sys/vmsystm.h>
405084Sjohnlev #include <sys/sdt.h>
415084Sjohnlev #include <sys/hypervisor.h>
425084Sjohnlev #include <sys/xen_errno.h>
436784Sjohnlev #include <sys/policy.h>
445084Sjohnlev 
455084Sjohnlev #include <vm/hat_i86.h>
465084Sjohnlev #include <vm/hat_pte.h>
475084Sjohnlev #include <vm/seg_mf.h>
485084Sjohnlev 
495084Sjohnlev #include <xen/sys/privcmd.h>
505084Sjohnlev #include <sys/privcmd_impl.h>
515084Sjohnlev 
525084Sjohnlev static dev_info_t *privcmd_devi;
535084Sjohnlev 
545084Sjohnlev /*ARGSUSED*/
555084Sjohnlev static int
privcmd_getinfo(dev_info_t * devi,ddi_info_cmd_t cmd,void * arg,void ** result)565084Sjohnlev privcmd_getinfo(dev_info_t *devi, ddi_info_cmd_t cmd, void *arg, void **result)
575084Sjohnlev {
585084Sjohnlev 	switch (cmd) {
595084Sjohnlev 	case DDI_INFO_DEVT2DEVINFO:
605084Sjohnlev 	case DDI_INFO_DEVT2INSTANCE:
615084Sjohnlev 		break;
625084Sjohnlev 	default:
635084Sjohnlev 		return (DDI_FAILURE);
645084Sjohnlev 	}
655084Sjohnlev 
665084Sjohnlev 	switch (getminor((dev_t)arg)) {
675084Sjohnlev 	case PRIVCMD_MINOR:
685084Sjohnlev 		break;
695084Sjohnlev 	default:
705084Sjohnlev 		return (DDI_FAILURE);
715084Sjohnlev 	}
725084Sjohnlev 
735084Sjohnlev 	if (cmd == DDI_INFO_DEVT2INSTANCE)
745084Sjohnlev 		*result = 0;
755084Sjohnlev 	else
765084Sjohnlev 		*result = privcmd_devi;
775084Sjohnlev 	return (DDI_SUCCESS);
785084Sjohnlev }
795084Sjohnlev 
805084Sjohnlev static int
privcmd_attach(dev_info_t * devi,ddi_attach_cmd_t cmd)815084Sjohnlev privcmd_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
825084Sjohnlev {
835084Sjohnlev 	if (cmd != DDI_ATTACH)
845084Sjohnlev 		return (DDI_FAILURE);
855084Sjohnlev 
865084Sjohnlev 	if (ddi_create_minor_node(devi, PRIVCMD_NODE,
875084Sjohnlev 	    S_IFCHR, PRIVCMD_MINOR, DDI_PSEUDO, 0) != DDI_SUCCESS)
885084Sjohnlev 		return (DDI_FAILURE);
895084Sjohnlev 
905084Sjohnlev 	privcmd_devi = devi;
915084Sjohnlev 	ddi_report_dev(devi);
925084Sjohnlev 	return (DDI_SUCCESS);
935084Sjohnlev }
945084Sjohnlev 
955084Sjohnlev static int
privcmd_detach(dev_info_t * devi,ddi_detach_cmd_t cmd)965084Sjohnlev privcmd_detach(dev_info_t *devi, ddi_detach_cmd_t cmd)
975084Sjohnlev {
985084Sjohnlev 	if (cmd != DDI_DETACH)
995084Sjohnlev 		return (DDI_FAILURE);
1005084Sjohnlev 	ddi_remove_minor_node(devi, NULL);
1015084Sjohnlev 	privcmd_devi = NULL;
1025084Sjohnlev 	return (DDI_SUCCESS);
1035084Sjohnlev }
1045084Sjohnlev 
1055084Sjohnlev /*ARGSUSED1*/
1065084Sjohnlev static int
privcmd_open(dev_t * dev,int flag,int otyp,cred_t * cr)1075084Sjohnlev privcmd_open(dev_t *dev, int flag, int otyp, cred_t *cr)
1085084Sjohnlev {
1095084Sjohnlev 	return (getminor(*dev) == PRIVCMD_MINOR ? 0 : ENXIO);
1105084Sjohnlev }
1115084Sjohnlev 
1125084Sjohnlev /*
1135084Sjohnlev  * Map a contiguous set of machine frames in a foreign domain.
1145084Sjohnlev  * Used in the following way:
1155084Sjohnlev  *
1165084Sjohnlev  *	privcmd_mmap_t p;
1175084Sjohnlev  *	privcmd_mmap_entry_t e;
1185084Sjohnlev  *
1195084Sjohnlev  *	addr = mmap(NULL, size, prot, MAP_SHARED, fd, 0);
1205084Sjohnlev  *	p.num = number of privcmd_mmap_entry_t's
1215084Sjohnlev  *	p.dom = domid;
1225084Sjohnlev  *	p.entry = &e;
1235084Sjohnlev  *	e.va = addr;
1245084Sjohnlev  *	e.mfn = mfn;
1255084Sjohnlev  *	e.npages = btopr(size);
1265084Sjohnlev  *	ioctl(fd, IOCTL_PRIVCMD_MMAP, &p);
1275084Sjohnlev  */
1285084Sjohnlev /*ARGSUSED2*/
1295084Sjohnlev int
do_privcmd_mmap(void * uarg,int mode,cred_t * cr)1305084Sjohnlev do_privcmd_mmap(void *uarg, int mode, cred_t *cr)
1315084Sjohnlev {
1325084Sjohnlev 	privcmd_mmap_t __mmapcmd, *mmc = &__mmapcmd;
1335084Sjohnlev 	privcmd_mmap_entry_t *umme;
1345084Sjohnlev 	struct as *as = curproc->p_as;
1355084Sjohnlev 	struct seg *seg;
1365084Sjohnlev 	int i, error = 0;
1375084Sjohnlev 
1385084Sjohnlev 	if (ddi_copyin(uarg, mmc, sizeof (*mmc), mode))
1395084Sjohnlev 		return (EFAULT);
1405084Sjohnlev 
1415084Sjohnlev 	DTRACE_XPV3(mmap__start, domid_t, mmc->dom, int, mmc->num,
1425084Sjohnlev 	    privcmd_mmap_entry_t *, mmc->entry);
1435084Sjohnlev 
1445084Sjohnlev 	if (mmc->dom == DOMID_SELF) {
1455084Sjohnlev 		error = ENOTSUP;	/* Too paranoid? */
1465084Sjohnlev 		goto done;
1475084Sjohnlev 	}
1485084Sjohnlev 
1495084Sjohnlev 	for (umme = mmc->entry, i = 0; i < mmc->num; i++, umme++) {
1505084Sjohnlev 		privcmd_mmap_entry_t __mmapent, *mme = &__mmapent;
1515084Sjohnlev 		caddr_t addr;
1525084Sjohnlev 
1535084Sjohnlev 		if (ddi_copyin(umme, mme, sizeof (*mme), mode)) {
1545084Sjohnlev 			error = EFAULT;
1555084Sjohnlev 			break;
1565084Sjohnlev 		}
1575084Sjohnlev 
1585084Sjohnlev 		DTRACE_XPV3(mmap__entry, ulong_t, mme->va, ulong_t, mme->mfn,
1595084Sjohnlev 		    ulong_t, mme->npages);
1605084Sjohnlev 
1615084Sjohnlev 		if (mme->mfn == MFN_INVALID) {
1625084Sjohnlev 			error = EINVAL;
1635084Sjohnlev 			break;
1645084Sjohnlev 		}
1655084Sjohnlev 
1665084Sjohnlev 		addr = (caddr_t)mme->va;
1675084Sjohnlev 
1685084Sjohnlev 		/*
1695084Sjohnlev 		 * Find the segment we want to mess with, then add
1705084Sjohnlev 		 * the mfn range to the segment.
1715084Sjohnlev 		 */
1725084Sjohnlev 		AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
1735084Sjohnlev 		if ((seg = as_findseg(as, addr, 0)) == NULL ||
1745084Sjohnlev 		    addr + mmu_ptob(mme->npages) > seg->s_base + seg->s_size)
1755084Sjohnlev 			error = EINVAL;
1765084Sjohnlev 		else
1775084Sjohnlev 			error = segmf_add_mfns(seg, addr,
1785084Sjohnlev 			    mme->mfn, mme->npages, mmc->dom);
1795084Sjohnlev 		AS_LOCK_EXIT(as, &as->a_lock);
1805084Sjohnlev 
1815084Sjohnlev 		if (error != 0)
1825084Sjohnlev 			break;
1835084Sjohnlev 	}
1845084Sjohnlev 
1855084Sjohnlev done:
1865084Sjohnlev 	DTRACE_XPV1(mmap__end, int, error);
1875084Sjohnlev 
1885084Sjohnlev 	return (error);
1895084Sjohnlev }
1905084Sjohnlev 
1915084Sjohnlev /*
1925084Sjohnlev  * Set up the address range to map to an array of mfns in
1935084Sjohnlev  * a foreign domain.  Used in the following way:
1945084Sjohnlev  *
1955084Sjohnlev  *	privcmd_mmap_batch_t p;
1965084Sjohnlev  *
1975084Sjohnlev  *	addr = mmap(NULL, size, prot, MAP_SHARED, fd, 0);
1985084Sjohnlev  *	p.num = number of pages
1995084Sjohnlev  *	p.dom = domid
2005084Sjohnlev  *	p.addr = addr;
2015084Sjohnlev  *	p.arr = array of mfns, indexed 0 .. p.num - 1
2025084Sjohnlev  *	ioctl(fd, IOCTL_PRIVCMD_MMAPBATCH, &p);
2035084Sjohnlev  */
2045084Sjohnlev /*ARGSUSED2*/
2055084Sjohnlev static int
do_privcmd_mmapbatch(void * uarg,int mode,cred_t * cr)2065084Sjohnlev do_privcmd_mmapbatch(void *uarg, int mode, cred_t *cr)
2075084Sjohnlev {
2085084Sjohnlev 	privcmd_mmapbatch_t __mmapbatch, *mmb = &__mmapbatch;
2095084Sjohnlev 	struct as *as = curproc->p_as;
2105084Sjohnlev 	struct seg *seg;
2115084Sjohnlev 	int i, error = 0;
2125084Sjohnlev 	caddr_t addr;
2135084Sjohnlev 	ulong_t *ulp;
2145084Sjohnlev 
2155084Sjohnlev 	if (ddi_copyin(uarg, mmb, sizeof (*mmb), mode))
2165084Sjohnlev 		return (EFAULT);
2175084Sjohnlev 
2185084Sjohnlev 	DTRACE_XPV3(mmapbatch__start, domid_t, mmb->dom, int, mmb->num,
2195084Sjohnlev 	    caddr_t, mmb->addr);
2205084Sjohnlev 
2215084Sjohnlev 	addr = (caddr_t)mmb->addr;
2225084Sjohnlev 	AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
2235084Sjohnlev 	if ((seg = as_findseg(as, addr, 0)) == NULL ||
2245084Sjohnlev 	    addr + ptob(mmb->num) > seg->s_base + seg->s_size) {
2255084Sjohnlev 		error = EINVAL;
2265084Sjohnlev 		goto done;
2275084Sjohnlev 	}
2285084Sjohnlev 
2295084Sjohnlev 	for (i = 0, ulp = mmb->arr;
2305084Sjohnlev 	    i < mmb->num; i++, addr += PAGESIZE, ulp++) {
2315084Sjohnlev 		mfn_t mfn;
2325084Sjohnlev 
2335084Sjohnlev 		if (fulword(ulp, &mfn) != 0) {
2345084Sjohnlev 			error = EFAULT;
2355084Sjohnlev 			break;
2365084Sjohnlev 		}
2375084Sjohnlev 
2385084Sjohnlev 		if (mfn == MFN_INVALID) {
2396144Srab 			/*
2406144Srab 			 * This mfn is invalid and should not be added to
2416144Srab 			 * segmf, as we'd only cause an immediate EFAULT when
2426144Srab 			 * we tried to fault it in.
2436144Srab 			 */
2446144Srab 			mfn |= XEN_DOMCTL_PFINFO_XTAB;
2456144Srab 			continue;
2465084Sjohnlev 		}
2475084Sjohnlev 
2485084Sjohnlev 		if (segmf_add_mfns(seg, addr, mfn, 1, mmb->dom) == 0)
2495084Sjohnlev 			continue;
2505084Sjohnlev 
2515084Sjohnlev 		/*
2525084Sjohnlev 		 * Tell the process that this MFN could not be mapped, so it
2535084Sjohnlev 		 * won't later try to access it.
2545084Sjohnlev 		 */
2556144Srab 		mfn |= XEN_DOMCTL_PFINFO_XTAB;
2565084Sjohnlev 		if (sulword(ulp, mfn) != 0) {
2575084Sjohnlev 			error = EFAULT;
2585084Sjohnlev 			break;
2595084Sjohnlev 		}
2605084Sjohnlev 	}
2615084Sjohnlev 
2625084Sjohnlev done:
2635084Sjohnlev 	AS_LOCK_EXIT(as, &as->a_lock);
2645084Sjohnlev 
2655084Sjohnlev 	DTRACE_XPV3(mmapbatch__end, int, error, struct seg *, seg, caddr_t,
2665084Sjohnlev 	    mmb->addr);
2675084Sjohnlev 
2685084Sjohnlev 	return (error);
2695084Sjohnlev }
2705084Sjohnlev 
2715084Sjohnlev /*ARGSUSED*/
2725084Sjohnlev static int
privcmd_ioctl(dev_t dev,int cmd,intptr_t arg,int mode,cred_t * cr,int * rval)2735084Sjohnlev privcmd_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *cr, int *rval)
2745084Sjohnlev {
2756784Sjohnlev 	if (secpolicy_xvm_control(cr))
2766784Sjohnlev 		return (EPERM);
2775084Sjohnlev 
2785084Sjohnlev 	/*
2795084Sjohnlev 	 * Everything is a -native- data type.
2805084Sjohnlev 	 */
2816784Sjohnlev 	if ((mode & FMODELS) != FNATIVE)
2826784Sjohnlev 		return (EOVERFLOW);
2835084Sjohnlev 
2845084Sjohnlev 	switch (cmd) {
2855084Sjohnlev 	case IOCTL_PRIVCMD_HYPERCALL:
2865084Sjohnlev 		return (do_privcmd_hypercall((void *)arg, mode, cr, rval));
2875084Sjohnlev 	case IOCTL_PRIVCMD_MMAP:
2885084Sjohnlev 		if (DOMAIN_IS_PRIVILEGED(xen_info))
2895084Sjohnlev 			return (do_privcmd_mmap((void *)arg, mode, cr));
2905084Sjohnlev 		break;
2915084Sjohnlev 	case IOCTL_PRIVCMD_MMAPBATCH:
2925084Sjohnlev 		if (DOMAIN_IS_PRIVILEGED(xen_info))
2935084Sjohnlev 			return (do_privcmd_mmapbatch((void *)arg, mode, cr));
2945084Sjohnlev 		break;
2955084Sjohnlev 	default:
2965084Sjohnlev 		break;
2975084Sjohnlev 	}
2985084Sjohnlev 	return (EINVAL);
2995084Sjohnlev }
3005084Sjohnlev 
3015084Sjohnlev /*
3025084Sjohnlev  * The real magic happens in the segmf segment driver.
3035084Sjohnlev  */
3045084Sjohnlev /*ARGSUSED8*/
3055084Sjohnlev static int
privcmd_segmap(dev_t dev,off_t off,struct as * as,caddr_t * addrp,off_t len,uint_t prot,uint_t maxprot,uint_t flags,cred_t * cr)3065084Sjohnlev privcmd_segmap(dev_t dev, off_t off, struct as *as, caddr_t *addrp,
3075084Sjohnlev     off_t len, uint_t prot, uint_t maxprot, uint_t flags, cred_t *cr)
3085084Sjohnlev {
3095084Sjohnlev 	struct segmf_crargs a;
3105084Sjohnlev 	int error;
3115084Sjohnlev 
3126784Sjohnlev 	if (secpolicy_xvm_control(cr))
3136784Sjohnlev 		return (EPERM);
3146784Sjohnlev 
3155084Sjohnlev 	as_rangelock(as);
3165084Sjohnlev 	if ((flags & MAP_FIXED) == 0) {
3175084Sjohnlev 		map_addr(addrp, len, (offset_t)off, 0, flags);
3185084Sjohnlev 		if (*addrp == NULL) {
3195084Sjohnlev 			error = ENOMEM;
3205084Sjohnlev 			goto rangeunlock;
3215084Sjohnlev 		}
3225084Sjohnlev 	} else {
3235084Sjohnlev 		/*
3245084Sjohnlev 		 * User specified address
3255084Sjohnlev 		 */
3265084Sjohnlev 		(void) as_unmap(as, *addrp, len);
3275084Sjohnlev 	}
3285084Sjohnlev 
3295084Sjohnlev 	/*
3305084Sjohnlev 	 * The mapping *must* be MAP_SHARED at offset 0.
3315084Sjohnlev 	 *
3325084Sjohnlev 	 * (Foreign pages are treated like device memory; the
3335084Sjohnlev 	 * ioctl interface allows the backing objects to be
3345084Sjohnlev 	 * arbitrarily redefined to point at any machine frame.)
3355084Sjohnlev 	 */
3365084Sjohnlev 	if ((flags & MAP_TYPE) != MAP_SHARED || off != 0) {
3375084Sjohnlev 		error = EINVAL;
3385084Sjohnlev 		goto rangeunlock;
3395084Sjohnlev 	}
3405084Sjohnlev 
3415084Sjohnlev 	a.dev = dev;
3425084Sjohnlev 	a.prot = (uchar_t)prot;
3435084Sjohnlev 	a.maxprot = (uchar_t)maxprot;
3445084Sjohnlev 	error = as_map(as, *addrp, len, segmf_create, &a);
3455084Sjohnlev 
3465084Sjohnlev rangeunlock:
3475084Sjohnlev 	as_rangeunlock(as);
3485084Sjohnlev 	return (error);
3495084Sjohnlev }
3505084Sjohnlev 
3515084Sjohnlev static struct cb_ops privcmd_cb_ops = {
3525084Sjohnlev 	privcmd_open,
3535084Sjohnlev 	nulldev,	/* close */
3545084Sjohnlev 	nodev,		/* strategy */
3555084Sjohnlev 	nodev,		/* print */
3565084Sjohnlev 	nodev,		/* dump */
3575084Sjohnlev 	nodev,		/* read */
3585084Sjohnlev 	nodev,		/* write */
3595084Sjohnlev 	privcmd_ioctl,
3605084Sjohnlev 	nodev,		/* devmap */
3615084Sjohnlev 	nodev,		/* mmap */
3625084Sjohnlev 	privcmd_segmap,
3635084Sjohnlev 	nochpoll,	/* poll */
3645084Sjohnlev 	ddi_prop_op,
3655084Sjohnlev 	NULL,
3665084Sjohnlev 	D_64BIT | D_NEW | D_MP
3675084Sjohnlev };
3685084Sjohnlev 
3695084Sjohnlev static struct dev_ops privcmd_dv_ops = {
3705084Sjohnlev 	DEVO_REV,
3715084Sjohnlev 	0,
3725084Sjohnlev 	privcmd_getinfo,
3737656SSherry.Moore@Sun.COM 	nulldev,		/* identify */
3747656SSherry.Moore@Sun.COM 	nulldev,		/* probe */
3755084Sjohnlev 	privcmd_attach,
3765084Sjohnlev 	privcmd_detach,
3777656SSherry.Moore@Sun.COM 	nodev,			/* reset */
3785084Sjohnlev 	&privcmd_cb_ops,
3797656SSherry.Moore@Sun.COM 	0,			/* struct bus_ops */
3807656SSherry.Moore@Sun.COM 	NULL,			/* power */
3817656SSherry.Moore@Sun.COM 	ddi_quiesce_not_needed,		/* quiesce */
3825084Sjohnlev };
3835084Sjohnlev 
3845084Sjohnlev static struct modldrv modldrv = {
3855084Sjohnlev 	&mod_driverops,
3867542SRichard.Bean@Sun.COM 	"privcmd driver",
3875084Sjohnlev 	&privcmd_dv_ops
3885084Sjohnlev };
3895084Sjohnlev 
3905084Sjohnlev static struct modlinkage modl = {
3915084Sjohnlev 	MODREV_1,
3925084Sjohnlev 	&modldrv
3935084Sjohnlev };
3945084Sjohnlev 
3955084Sjohnlev int
_init(void)3965084Sjohnlev _init(void)
3975084Sjohnlev {
3985084Sjohnlev 	return (mod_install(&modl));
3995084Sjohnlev }
4005084Sjohnlev 
4015084Sjohnlev int
_fini(void)4025084Sjohnlev _fini(void)
4035084Sjohnlev {
4045084Sjohnlev 	return (mod_remove(&modl));
4055084Sjohnlev }
4065084Sjohnlev 
4075084Sjohnlev int
_info(struct modinfo * modinfo)4085084Sjohnlev _info(struct modinfo *modinfo)
4095084Sjohnlev {
4105084Sjohnlev 	return (mod_info(&modl, modinfo));
4115084Sjohnlev }
412