xref: /illumos-gate/usr/src/uts/common/xen/io/xpvtap.c (revision 80d5689f5d4588adc071138e25e9d0d5252d9b55)
17eea693dSMark Johnson /*
27eea693dSMark Johnson  * CDDL HEADER START
37eea693dSMark Johnson  *
47eea693dSMark Johnson  * The contents of this file are subject to the terms of the
57eea693dSMark Johnson  * Common Development and Distribution License (the "License").
67eea693dSMark Johnson  * You may not use this file except in compliance with the License.
77eea693dSMark Johnson  *
87eea693dSMark Johnson  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
97eea693dSMark Johnson  * or http://www.opensolaris.org/os/licensing.
107eea693dSMark Johnson  * See the License for the specific language governing permissions
117eea693dSMark Johnson  * and limitations under the License.
127eea693dSMark Johnson  *
137eea693dSMark Johnson  * When distributing Covered Code, include this CDDL HEADER in each
147eea693dSMark Johnson  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
157eea693dSMark Johnson  * If applicable, add the following below this CDDL HEADER, with the
167eea693dSMark Johnson  * fields enclosed by brackets "[]" replaced with your own identifying
177eea693dSMark Johnson  * information: Portions Copyright [yyyy] [name of copyright owner]
187eea693dSMark Johnson  *
197eea693dSMark Johnson  * CDDL HEADER END
207eea693dSMark Johnson  */
217eea693dSMark Johnson 
227eea693dSMark Johnson /*
23d3d50737SRafael Vanoni  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
247eea693dSMark Johnson  * Use is subject to license terms.
25*80d5689fSPatrick Mooney  * Copyright 2017 Joyent, Inc.
267eea693dSMark Johnson  */
277eea693dSMark Johnson 
287eea693dSMark Johnson 
297eea693dSMark Johnson #include <sys/errno.h>
307eea693dSMark Johnson #include <sys/types.h>
317eea693dSMark Johnson #include <sys/conf.h>
327eea693dSMark Johnson #include <sys/kmem.h>
337eea693dSMark Johnson #include <sys/ddi.h>
347eea693dSMark Johnson #include <sys/stat.h>
357eea693dSMark Johnson #include <sys/sunddi.h>
367eea693dSMark Johnson #include <sys/file.h>
377eea693dSMark Johnson #include <sys/open.h>
387eea693dSMark Johnson #include <sys/modctl.h>
397eea693dSMark Johnson #include <sys/ddi_impldefs.h>
407eea693dSMark Johnson #include <sys/sysmacros.h>
417eea693dSMark Johnson #include <sys/ddidevmap.h>
427eea693dSMark Johnson #include <sys/policy.h>
437eea693dSMark Johnson 
447eea693dSMark Johnson #include <sys/vmsystm.h>
457eea693dSMark Johnson #include <vm/hat_i86.h>
467eea693dSMark Johnson #include <vm/hat_pte.h>
477eea693dSMark Johnson #include <vm/seg_kmem.h>
487eea693dSMark Johnson #include <vm/seg_mf.h>
497eea693dSMark Johnson 
507eea693dSMark Johnson #include <xen/io/blkif_impl.h>
517eea693dSMark Johnson #include <xen/io/blk_common.h>
527eea693dSMark Johnson #include <xen/io/xpvtap.h>
537eea693dSMark Johnson 
547eea693dSMark Johnson 
557eea693dSMark Johnson static int xpvtap_open(dev_t *devp, int flag, int otyp, cred_t *cred);
567eea693dSMark Johnson static int xpvtap_close(dev_t devp, int flag, int otyp, cred_t *cred);
577eea693dSMark Johnson static int xpvtap_ioctl(dev_t dev, int cmd, intptr_t arg, int mode,
587eea693dSMark Johnson     cred_t *cred, int *rval);
597eea693dSMark Johnson static int xpvtap_devmap(dev_t dev, devmap_cookie_t dhp, offset_t off,
607eea693dSMark Johnson     size_t len, size_t *maplen, uint_t model);
617eea693dSMark Johnson static int xpvtap_segmap(dev_t dev, off_t off, struct as *asp, caddr_t *addrp,
627eea693dSMark Johnson     off_t len, unsigned int prot, unsigned int maxprot, unsigned int flags,
637eea693dSMark Johnson     cred_t *cred_p);
647eea693dSMark Johnson static int xpvtap_chpoll(dev_t dev, short events, int anyyet, short *reventsp,
657eea693dSMark Johnson     struct pollhead **phpp);
667eea693dSMark Johnson 
677eea693dSMark Johnson static 	struct cb_ops xpvtap_cb_ops = {
687eea693dSMark Johnson 	xpvtap_open,		/* cb_open */
697eea693dSMark Johnson 	xpvtap_close,		/* cb_close */
707eea693dSMark Johnson 	nodev,			/* cb_strategy */
717eea693dSMark Johnson 	nodev,			/* cb_print */
727eea693dSMark Johnson 	nodev,			/* cb_dump */
737eea693dSMark Johnson 	nodev,			/* cb_read */
747eea693dSMark Johnson 	nodev,			/* cb_write */
757eea693dSMark Johnson 	xpvtap_ioctl,		/* cb_ioctl */
767eea693dSMark Johnson 	xpvtap_devmap,		/* cb_devmap */
777eea693dSMark Johnson 	nodev,			/* cb_mmap */
787eea693dSMark Johnson 	xpvtap_segmap,		/* cb_segmap */
797eea693dSMark Johnson 	xpvtap_chpoll,		/* cb_chpoll */
807eea693dSMark Johnson 	ddi_prop_op,		/* cb_prop_op */
817eea693dSMark Johnson 	NULL,			/* cb_stream */
827eea693dSMark Johnson 	D_NEW | D_MP | D_64BIT | D_DEVMAP,	/* cb_flag */
837eea693dSMark Johnson 	CB_REV
847eea693dSMark Johnson };
857eea693dSMark Johnson 
867eea693dSMark Johnson static int xpvtap_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg,
877eea693dSMark Johnson     void **result);
887eea693dSMark Johnson static int xpvtap_attach(dev_info_t *devi, ddi_attach_cmd_t cmd);
897eea693dSMark Johnson static int xpvtap_detach(dev_info_t *devi, ddi_detach_cmd_t cmd);
907eea693dSMark Johnson 
917eea693dSMark Johnson static struct dev_ops xpvtap_dev_ops = {
927eea693dSMark Johnson 	DEVO_REV,		/* devo_rev */
937eea693dSMark Johnson 	0,			/* devo_refcnt */
947eea693dSMark Johnson 	xpvtap_getinfo,		/* devo_getinfo */
957eea693dSMark Johnson 	nulldev,		/* devo_identify */
967eea693dSMark Johnson 	nulldev,		/* devo_probe */
977eea693dSMark Johnson 	xpvtap_attach,		/* devo_attach */
987eea693dSMark Johnson 	xpvtap_detach,		/* devo_detach */
997eea693dSMark Johnson 	nodev,			/* devo_reset */
1007eea693dSMark Johnson 	&xpvtap_cb_ops,		/* devo_cb_ops */
1017eea693dSMark Johnson 	NULL,			/* devo_bus_ops */
1027eea693dSMark Johnson 	NULL			/* power */
1037eea693dSMark Johnson };
1047eea693dSMark Johnson 
1057eea693dSMark Johnson 
1067eea693dSMark Johnson static struct modldrv xpvtap_modldrv = {
1077eea693dSMark Johnson 	&mod_driverops,		/* Type of module.  This one is a driver */
1087eea693dSMark Johnson 	"xpvtap driver",	/* Name of the module. */
1097eea693dSMark Johnson 	&xpvtap_dev_ops,	/* driver ops */
1107eea693dSMark Johnson };
1117eea693dSMark Johnson 
1127eea693dSMark Johnson static struct modlinkage xpvtap_modlinkage = {
1137eea693dSMark Johnson 	MODREV_1,
1147eea693dSMark Johnson 	(void *) &xpvtap_modldrv,
1157eea693dSMark Johnson 	NULL
1167eea693dSMark Johnson };
1177eea693dSMark Johnson 
1187eea693dSMark Johnson 
1197eea693dSMark Johnson void *xpvtap_statep;
1207eea693dSMark Johnson 
1217eea693dSMark Johnson 
1227eea693dSMark Johnson static xpvtap_state_t *xpvtap_drv_init(int instance);
1237eea693dSMark Johnson static void xpvtap_drv_fini(xpvtap_state_t *state);
1247eea693dSMark Johnson static uint_t xpvtap_intr(caddr_t arg);
1257eea693dSMark Johnson 
1267eea693dSMark Johnson typedef void (*xpvtap_rs_cleanup_t)(xpvtap_state_t *state, uint_t rs);
1277eea693dSMark Johnson static void xpvtap_rs_init(uint_t min_val, uint_t max_val,
1287eea693dSMark Johnson     xpvtap_rs_hdl_t *handle);
1297eea693dSMark Johnson static void xpvtap_rs_fini(xpvtap_rs_hdl_t *handle);
1307eea693dSMark Johnson static int xpvtap_rs_alloc(xpvtap_rs_hdl_t handle, uint_t *rs);
1317eea693dSMark Johnson static void xpvtap_rs_free(xpvtap_rs_hdl_t handle, uint_t rs);
1327eea693dSMark Johnson static void xpvtap_rs_flush(xpvtap_rs_hdl_t handle,
1337eea693dSMark Johnson     xpvtap_rs_cleanup_t callback, void *arg);
1347eea693dSMark Johnson 
1357eea693dSMark Johnson static int xpvtap_segmf_register(xpvtap_state_t *state);
1367eea693dSMark Johnson static void xpvtap_segmf_unregister(struct as *as, void *arg, uint_t event);
1377eea693dSMark Johnson 
1387eea693dSMark Johnson static int xpvtap_user_init(xpvtap_state_t *state);
1397eea693dSMark Johnson static void xpvtap_user_fini(xpvtap_state_t *state);
1407eea693dSMark Johnson static int xpvtap_user_ring_init(xpvtap_state_t *state);
1417eea693dSMark Johnson static void xpvtap_user_ring_fini(xpvtap_state_t *state);
1427eea693dSMark Johnson static int xpvtap_user_thread_init(xpvtap_state_t *state);
1437eea693dSMark Johnson static void xpvtap_user_thread_fini(xpvtap_state_t *state);
1447eea693dSMark Johnson static void xpvtap_user_thread_start(caddr_t arg);
1457eea693dSMark Johnson static void xpvtap_user_thread_stop(xpvtap_state_t *state);
1467eea693dSMark Johnson static void xpvtap_user_thread(void *arg);
1477eea693dSMark Johnson 
1487eea693dSMark Johnson static void xpvtap_user_app_stop(caddr_t arg);
1497eea693dSMark Johnson 
1507eea693dSMark Johnson static int xpvtap_user_request_map(xpvtap_state_t *state, blkif_request_t *req,
1517eea693dSMark Johnson     uint_t *uid);
1527eea693dSMark Johnson static int xpvtap_user_request_push(xpvtap_state_t *state,
1537eea693dSMark Johnson     blkif_request_t *req, uint_t uid);
1547eea693dSMark Johnson static int xpvtap_user_response_get(xpvtap_state_t *state,
1557eea693dSMark Johnson     blkif_response_t *resp, uint_t *uid);
1567eea693dSMark Johnson static void xpvtap_user_request_unmap(xpvtap_state_t *state, uint_t uid);
1577eea693dSMark Johnson 
1587eea693dSMark Johnson 
1597eea693dSMark Johnson /*
1607eea693dSMark Johnson  * _init()
1617eea693dSMark Johnson  */
1627eea693dSMark Johnson int
_init(void)1637eea693dSMark Johnson _init(void)
1647eea693dSMark Johnson {
1657eea693dSMark Johnson 	int e;
1667eea693dSMark Johnson 
1677eea693dSMark Johnson 	e = ddi_soft_state_init(&xpvtap_statep, sizeof (xpvtap_state_t), 1);
1687eea693dSMark Johnson 	if (e != 0) {
1697eea693dSMark Johnson 		return (e);
1707eea693dSMark Johnson 	}
1717eea693dSMark Johnson 
1727eea693dSMark Johnson 	e = mod_install(&xpvtap_modlinkage);
1737eea693dSMark Johnson 	if (e != 0) {
1747eea693dSMark Johnson 		ddi_soft_state_fini(&xpvtap_statep);
1757eea693dSMark Johnson 		return (e);
1767eea693dSMark Johnson 	}
1777eea693dSMark Johnson 
1787eea693dSMark Johnson 	return (0);
1797eea693dSMark Johnson }
1807eea693dSMark Johnson 
1817eea693dSMark Johnson 
1827eea693dSMark Johnson /*
1837eea693dSMark Johnson  * _info()
1847eea693dSMark Johnson  */
1857eea693dSMark Johnson int
_info(struct modinfo * modinfop)1867eea693dSMark Johnson _info(struct modinfo *modinfop)
1877eea693dSMark Johnson {
1887eea693dSMark Johnson 	return (mod_info(&xpvtap_modlinkage, modinfop));
1897eea693dSMark Johnson }
1907eea693dSMark Johnson 
1917eea693dSMark Johnson 
1927eea693dSMark Johnson /*
1937eea693dSMark Johnson  * _fini()
1947eea693dSMark Johnson  */
1957eea693dSMark Johnson int
_fini(void)1967eea693dSMark Johnson _fini(void)
1977eea693dSMark Johnson {
1987eea693dSMark Johnson 	int e;
1997eea693dSMark Johnson 
2007eea693dSMark Johnson 	e = mod_remove(&xpvtap_modlinkage);
2017eea693dSMark Johnson 	if (e != 0) {
2027eea693dSMark Johnson 		return (e);
2037eea693dSMark Johnson 	}
2047eea693dSMark Johnson 
2057eea693dSMark Johnson 	ddi_soft_state_fini(&xpvtap_statep);
2067eea693dSMark Johnson 
2077eea693dSMark Johnson 	return (0);
2087eea693dSMark Johnson }
2097eea693dSMark Johnson 
2107eea693dSMark Johnson 
2117eea693dSMark Johnson /*
2127eea693dSMark Johnson  * xpvtap_attach()
2137eea693dSMark Johnson  */
2147eea693dSMark Johnson static int
xpvtap_attach(dev_info_t * dip,ddi_attach_cmd_t cmd)2157eea693dSMark Johnson xpvtap_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
2167eea693dSMark Johnson {
2177eea693dSMark Johnson 	blk_ringinit_args_t args;
2187eea693dSMark Johnson 	xpvtap_state_t *state;
2197eea693dSMark Johnson 	int instance;
2207eea693dSMark Johnson 	int e;
2217eea693dSMark Johnson 
2227eea693dSMark Johnson 
2237eea693dSMark Johnson 	switch (cmd) {
2247eea693dSMark Johnson 	case DDI_ATTACH:
2257eea693dSMark Johnson 		break;
2267eea693dSMark Johnson 
2277eea693dSMark Johnson 	case DDI_RESUME:
2287eea693dSMark Johnson 		return (DDI_SUCCESS);
2297eea693dSMark Johnson 
2307eea693dSMark Johnson 	default:
2317eea693dSMark Johnson 		return (DDI_FAILURE);
2327eea693dSMark Johnson 	}
2337eea693dSMark Johnson 
2347eea693dSMark Johnson 	/* initialize our state info */
2357eea693dSMark Johnson 	instance = ddi_get_instance(dip);
2367eea693dSMark Johnson 	state = xpvtap_drv_init(instance);
2377eea693dSMark Johnson 	if (state == NULL) {
2387eea693dSMark Johnson 		return (DDI_FAILURE);
2397eea693dSMark Johnson 	}
2407eea693dSMark Johnson 	state->bt_dip = dip;
2417eea693dSMark Johnson 
2427eea693dSMark Johnson 	/* Initialize the guest ring */
2437eea693dSMark Johnson 	args.ar_dip = state->bt_dip;
2447eea693dSMark Johnson 	args.ar_intr = xpvtap_intr;
2457eea693dSMark Johnson 	args.ar_intr_arg = (caddr_t)state;
2467eea693dSMark Johnson 	args.ar_ringup = xpvtap_user_thread_start;
2477eea693dSMark Johnson 	args.ar_ringup_arg = (caddr_t)state;
2487eea693dSMark Johnson 	args.ar_ringdown = xpvtap_user_app_stop;
2497eea693dSMark Johnson 	args.ar_ringdown_arg = (caddr_t)state;
2507eea693dSMark Johnson 	e = blk_ring_init(&args, &state->bt_guest_ring);
2517eea693dSMark Johnson 	if (e != DDI_SUCCESS) {
2527eea693dSMark Johnson 		goto attachfail_ringinit;
2537eea693dSMark Johnson 	}
2547eea693dSMark Johnson 
2557eea693dSMark Johnson 	/* create the minor node (for ioctl/mmap) */
2567eea693dSMark Johnson 	e = ddi_create_minor_node(dip, "xpvtap", S_IFCHR, instance,
2577eea693dSMark Johnson 	    DDI_PSEUDO, 0);
2587eea693dSMark Johnson 	if (e != DDI_SUCCESS) {
2597eea693dSMark Johnson 		goto attachfail_minor_node;
2607eea693dSMark Johnson 	}
2617eea693dSMark Johnson 
2627eea693dSMark Johnson 	/* Report that driver was loaded */
2637eea693dSMark Johnson 	ddi_report_dev(dip);
2647eea693dSMark Johnson 
2657eea693dSMark Johnson 	return (DDI_SUCCESS);
2667eea693dSMark Johnson 
2677eea693dSMark Johnson attachfail_minor_node:
2687eea693dSMark Johnson 	blk_ring_fini(&state->bt_guest_ring);
2697eea693dSMark Johnson attachfail_ringinit:
2707eea693dSMark Johnson 	xpvtap_drv_fini(state);
2717eea693dSMark Johnson 	return (DDI_FAILURE);
2727eea693dSMark Johnson }
2737eea693dSMark Johnson 
2747eea693dSMark Johnson 
2757eea693dSMark Johnson /*
2767eea693dSMark Johnson  * xpvtap_detach()
2777eea693dSMark Johnson  */
2787eea693dSMark Johnson static int
xpvtap_detach(dev_info_t * dip,ddi_detach_cmd_t cmd)2797eea693dSMark Johnson xpvtap_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
2807eea693dSMark Johnson {
2817eea693dSMark Johnson 	xpvtap_state_t *state;
2827eea693dSMark Johnson 	int instance;
2837eea693dSMark Johnson 
2847eea693dSMark Johnson 
2857eea693dSMark Johnson 	instance = ddi_get_instance(dip);
2867eea693dSMark Johnson 	state = ddi_get_soft_state(xpvtap_statep, instance);
2877eea693dSMark Johnson 	if (state == NULL) {
2887eea693dSMark Johnson 		return (DDI_FAILURE);
2897eea693dSMark Johnson 	}
2907eea693dSMark Johnson 
2917eea693dSMark Johnson 	switch (cmd) {
2927eea693dSMark Johnson 	case DDI_DETACH:
2937eea693dSMark Johnson 		break;
2947eea693dSMark Johnson 
2957eea693dSMark Johnson 	case DDI_SUSPEND:
2967eea693dSMark Johnson 	default:
2977eea693dSMark Johnson 		return (DDI_FAILURE);
2987eea693dSMark Johnson 	}
2997eea693dSMark Johnson 
3007eea693dSMark Johnson 	xpvtap_user_thread_stop(state);
3017eea693dSMark Johnson 	blk_ring_fini(&state->bt_guest_ring);
3027eea693dSMark Johnson 	xpvtap_drv_fini(state);
3037eea693dSMark Johnson 	ddi_remove_minor_node(dip, NULL);
3047eea693dSMark Johnson 
3057eea693dSMark Johnson 	return (DDI_SUCCESS);
3067eea693dSMark Johnson }
3077eea693dSMark Johnson 
3087eea693dSMark Johnson 
3097eea693dSMark Johnson /*
3107eea693dSMark Johnson  * xpvtap_getinfo()
3117eea693dSMark Johnson  */
3127eea693dSMark Johnson /*ARGSUSED*/
3137eea693dSMark Johnson static int
xpvtap_getinfo(dev_info_t * dip,ddi_info_cmd_t cmd,void * arg,void ** result)3147eea693dSMark Johnson xpvtap_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **result)
3157eea693dSMark Johnson {
3167eea693dSMark Johnson 	xpvtap_state_t *state;
3177eea693dSMark Johnson 	int instance;
3187eea693dSMark Johnson 	dev_t dev;
3197eea693dSMark Johnson 	int e;
3207eea693dSMark Johnson 
3217eea693dSMark Johnson 
3227eea693dSMark Johnson 	dev = (dev_t)arg;
3237eea693dSMark Johnson 	instance = getminor(dev);
3247eea693dSMark Johnson 
3257eea693dSMark Johnson 	switch (cmd) {
3267eea693dSMark Johnson 	case DDI_INFO_DEVT2DEVINFO:
3277eea693dSMark Johnson 		state = ddi_get_soft_state(xpvtap_statep, instance);
3287eea693dSMark Johnson 		if (state == NULL) {
3297eea693dSMark Johnson 			return (DDI_FAILURE);
3307eea693dSMark Johnson 		}
3317eea693dSMark Johnson 		*result = (void *)state->bt_dip;
3327eea693dSMark Johnson 		e = DDI_SUCCESS;
3337eea693dSMark Johnson 		break;
3347eea693dSMark Johnson 
3357eea693dSMark Johnson 	case DDI_INFO_DEVT2INSTANCE:
3367eea693dSMark Johnson 		*result = (void *)(uintptr_t)instance;
3377eea693dSMark Johnson 		e = DDI_SUCCESS;
3387eea693dSMark Johnson 		break;
3397eea693dSMark Johnson 
3407eea693dSMark Johnson 	default:
3417eea693dSMark Johnson 		e = DDI_FAILURE;
3427eea693dSMark Johnson 		break;
3437eea693dSMark Johnson 	}
3447eea693dSMark Johnson 
3457eea693dSMark Johnson 	return (e);
3467eea693dSMark Johnson }
3477eea693dSMark Johnson 
3487eea693dSMark Johnson 
3497eea693dSMark Johnson /*
3507eea693dSMark Johnson  * xpvtap_open()
3517eea693dSMark Johnson  */
3527eea693dSMark Johnson /*ARGSUSED*/
3537eea693dSMark Johnson static int
xpvtap_open(dev_t * devp,int flag,int otyp,cred_t * cred)3547eea693dSMark Johnson xpvtap_open(dev_t *devp, int flag, int otyp, cred_t *cred)
3557eea693dSMark Johnson {
3567eea693dSMark Johnson 	xpvtap_state_t *state;
3577eea693dSMark Johnson 	int instance;
3587eea693dSMark Johnson 
3597eea693dSMark Johnson 
3607eea693dSMark Johnson 	if (secpolicy_xvm_control(cred)) {
3617eea693dSMark Johnson 		return (EPERM);
3627eea693dSMark Johnson 	}
3637eea693dSMark Johnson 
3647eea693dSMark Johnson 	instance = getminor(*devp);
3657eea693dSMark Johnson 	state = ddi_get_soft_state(xpvtap_statep, instance);
3667eea693dSMark Johnson 	if (state == NULL) {
3677eea693dSMark Johnson 		return (ENXIO);
3687eea693dSMark Johnson 	}
3697eea693dSMark Johnson 
3707eea693dSMark Johnson 	/* we should only be opened once */
3717eea693dSMark Johnson 	mutex_enter(&state->bt_open.bo_mutex);
3727eea693dSMark Johnson 	if (state->bt_open.bo_opened) {
3737eea693dSMark Johnson 		mutex_exit(&state->bt_open.bo_mutex);
3747eea693dSMark Johnson 		return (EBUSY);
3757eea693dSMark Johnson 	}
3767eea693dSMark Johnson 	state->bt_open.bo_opened = B_TRUE;
3777eea693dSMark Johnson 	mutex_exit(&state->bt_open.bo_mutex);
3787eea693dSMark Johnson 
3797eea693dSMark Johnson 	/*
3807eea693dSMark Johnson 	 * save the apps address space. need it for mapping/unmapping grefs
3817eea693dSMark Johnson 	 * since will be doing it in a separate kernel thread.
3827eea693dSMark Johnson 	 */
3837eea693dSMark Johnson 	state->bt_map.um_as = curproc->p_as;
3847eea693dSMark Johnson 
3857eea693dSMark Johnson 	return (0);
3867eea693dSMark Johnson }
3877eea693dSMark Johnson 
3887eea693dSMark Johnson 
3897eea693dSMark Johnson /*
3907eea693dSMark Johnson  * xpvtap_close()
3917eea693dSMark Johnson  */
3927eea693dSMark Johnson /*ARGSUSED*/
3937eea693dSMark Johnson static int
xpvtap_close(dev_t devp,int flag,int otyp,cred_t * cred)3947eea693dSMark Johnson xpvtap_close(dev_t devp, int flag, int otyp, cred_t *cred)
3957eea693dSMark Johnson {
3967eea693dSMark Johnson 	xpvtap_state_t *state;
3977eea693dSMark Johnson 	int instance;
3987eea693dSMark Johnson 
3997eea693dSMark Johnson 
4007eea693dSMark Johnson 	instance = getminor(devp);
4017eea693dSMark Johnson 	state = ddi_get_soft_state(xpvtap_statep, instance);
4027eea693dSMark Johnson 	if (state == NULL) {
4037eea693dSMark Johnson 		return (ENXIO);
4047eea693dSMark Johnson 	}
4057eea693dSMark Johnson 
4067eea693dSMark Johnson 	/*
4077eea693dSMark Johnson 	 * wake thread so it can cleanup and wait for it to exit so we can
4087eea693dSMark Johnson 	 * be sure it's not in the middle of processing a request/response.
4097eea693dSMark Johnson 	 */
4107eea693dSMark Johnson 	mutex_enter(&state->bt_thread.ut_mutex);
4117eea693dSMark Johnson 	state->bt_thread.ut_wake = B_TRUE;
4127eea693dSMark Johnson 	state->bt_thread.ut_exit = B_TRUE;
4137eea693dSMark Johnson 	cv_signal(&state->bt_thread.ut_wake_cv);
4147eea693dSMark Johnson 	if (!state->bt_thread.ut_exit_done) {
4157eea693dSMark Johnson 		cv_wait(&state->bt_thread.ut_exit_done_cv,
4167eea693dSMark Johnson 		    &state->bt_thread.ut_mutex);
4177eea693dSMark Johnson 	}
4187eea693dSMark Johnson 	ASSERT(state->bt_thread.ut_exit_done);
4197eea693dSMark Johnson 	mutex_exit(&state->bt_thread.ut_mutex);
4207eea693dSMark Johnson 
4217eea693dSMark Johnson 	state->bt_map.um_as = NULL;
4227eea693dSMark Johnson 	state->bt_map.um_guest_pages = NULL;
4237eea693dSMark Johnson 
4247eea693dSMark Johnson 	/*
4257eea693dSMark Johnson 	 * when the ring is brought down, a userland hotplug script is run
4267eea693dSMark Johnson 	 * which tries to bring the userland app down. We'll wait for a bit
4277eea693dSMark Johnson 	 * for the user app to exit. Notify the thread waiting that the app
4287eea693dSMark Johnson 	 * has closed the driver.
4297eea693dSMark Johnson 	 */
4307eea693dSMark Johnson 	mutex_enter(&state->bt_open.bo_mutex);
4317eea693dSMark Johnson 	ASSERT(state->bt_open.bo_opened);
4327eea693dSMark Johnson 	state->bt_open.bo_opened = B_FALSE;
4337eea693dSMark Johnson 	cv_signal(&state->bt_open.bo_exit_cv);
4347eea693dSMark Johnson 	mutex_exit(&state->bt_open.bo_mutex);
4357eea693dSMark Johnson 
4367eea693dSMark Johnson 	return (0);
4377eea693dSMark Johnson }
4387eea693dSMark Johnson 
4397eea693dSMark Johnson 
4407eea693dSMark Johnson /*
4417eea693dSMark Johnson  * xpvtap_ioctl()
4427eea693dSMark Johnson  */
4437eea693dSMark Johnson /*ARGSUSED*/
4447eea693dSMark Johnson static int
xpvtap_ioctl(dev_t dev,int cmd,intptr_t arg,int mode,cred_t * cred,int * rval)4457eea693dSMark Johnson xpvtap_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *cred,
4467eea693dSMark Johnson     int *rval)
4477eea693dSMark Johnson {
4487eea693dSMark Johnson 	xpvtap_state_t *state;
4497eea693dSMark Johnson 	int instance;
4507eea693dSMark Johnson 
4517eea693dSMark Johnson 
4527eea693dSMark Johnson 	if (secpolicy_xvm_control(cred)) {
4537eea693dSMark Johnson 		return (EPERM);
4547eea693dSMark Johnson 	}
4557eea693dSMark Johnson 
4567eea693dSMark Johnson 	instance = getminor(dev);
4577eea693dSMark Johnson 	if (instance == -1) {
4587eea693dSMark Johnson 		return (EBADF);
4597eea693dSMark Johnson 	}
4607eea693dSMark Johnson 
4617eea693dSMark Johnson 	state = ddi_get_soft_state(xpvtap_statep, instance);
4627eea693dSMark Johnson 	if (state == NULL) {
4637eea693dSMark Johnson 		return (EBADF);
4647eea693dSMark Johnson 	}
4657eea693dSMark Johnson 
4667eea693dSMark Johnson 	switch (cmd) {
4677eea693dSMark Johnson 	case XPVTAP_IOCTL_RESP_PUSH:
4687eea693dSMark Johnson 		/*
4697eea693dSMark Johnson 		 * wake thread, thread handles guest requests and user app
4707eea693dSMark Johnson 		 * responses.
4717eea693dSMark Johnson 		 */
4727eea693dSMark Johnson 		mutex_enter(&state->bt_thread.ut_mutex);
4737eea693dSMark Johnson 		state->bt_thread.ut_wake = B_TRUE;
4747eea693dSMark Johnson 		cv_signal(&state->bt_thread.ut_wake_cv);
4757eea693dSMark Johnson 		mutex_exit(&state->bt_thread.ut_mutex);
4767eea693dSMark Johnson 		break;
4777eea693dSMark Johnson 
4787eea693dSMark Johnson 	default:
4797eea693dSMark Johnson 		cmn_err(CE_WARN, "ioctl(%d) not supported\n", cmd);
4807eea693dSMark Johnson 		return (ENXIO);
4817eea693dSMark Johnson 	}
4827eea693dSMark Johnson 
4837eea693dSMark Johnson 	return (0);
4847eea693dSMark Johnson }
4857eea693dSMark Johnson 
4867eea693dSMark Johnson 
4877eea693dSMark Johnson /*
4887eea693dSMark Johnson  * xpvtap_segmap()
4897eea693dSMark Johnson  */
4907eea693dSMark Johnson /*ARGSUSED*/
4917eea693dSMark Johnson static int
xpvtap_segmap(dev_t dev,off_t off,struct as * asp,caddr_t * addrp,off_t len,unsigned int prot,unsigned int maxprot,unsigned int flags,cred_t * cred_p)4927eea693dSMark Johnson xpvtap_segmap(dev_t dev, off_t off, struct as *asp, caddr_t *addrp,
4937eea693dSMark Johnson     off_t len, unsigned int prot, unsigned int maxprot, unsigned int flags,
4947eea693dSMark Johnson     cred_t *cred_p)
4957eea693dSMark Johnson {
4967eea693dSMark Johnson 	struct segmf_crargs a;
4977eea693dSMark Johnson 	xpvtap_state_t *state;
4987eea693dSMark Johnson 	int instance;
4997eea693dSMark Johnson 	int e;
5007eea693dSMark Johnson 
5017eea693dSMark Johnson 
5027eea693dSMark Johnson 	if (secpolicy_xvm_control(cred_p)) {
5037eea693dSMark Johnson 		return (EPERM);
5047eea693dSMark Johnson 	}
5057eea693dSMark Johnson 
5067eea693dSMark Johnson 	instance = getminor(dev);
5077eea693dSMark Johnson 	state = ddi_get_soft_state(xpvtap_statep, instance);
5087eea693dSMark Johnson 	if (state == NULL) {
5097eea693dSMark Johnson 		return (EBADF);
5107eea693dSMark Johnson 	}
5117eea693dSMark Johnson 
5127eea693dSMark Johnson 	/* the user app should be doing a MAP_SHARED mapping */
5137eea693dSMark Johnson 	if ((flags & MAP_TYPE) != MAP_SHARED) {
5147eea693dSMark Johnson 		return (EINVAL);
5157eea693dSMark Johnson 	}
5167eea693dSMark Johnson 
5177eea693dSMark Johnson 	/*
5187eea693dSMark Johnson 	 * if this is the user ring (offset = 0), devmap it (which ends up in
5197eea693dSMark Johnson 	 * xpvtap_devmap). devmap will alloc and map the ring into the
5207eea693dSMark Johnson 	 * app's VA space.
5217eea693dSMark Johnson 	 */
5227eea693dSMark Johnson 	if (off == 0) {
5237eea693dSMark Johnson 		e = devmap_setup(dev, (offset_t)off, asp, addrp, (size_t)len,
5247eea693dSMark Johnson 		    prot, maxprot, flags, cred_p);
5257eea693dSMark Johnson 		return (e);
5267eea693dSMark Johnson 	}
5277eea693dSMark Johnson 
5287eea693dSMark Johnson 	/* this should be the mmap for the gref pages (offset = PAGESIZE) */
5297eea693dSMark Johnson 	if (off != PAGESIZE) {
5307eea693dSMark Johnson 		return (EINVAL);
5317eea693dSMark Johnson 	}
5327eea693dSMark Johnson 
5337eea693dSMark Johnson 	/* make sure we get the size we're expecting */
5347eea693dSMark Johnson 	if (len != XPVTAP_GREF_BUFSIZE) {
5357eea693dSMark Johnson 		return (EINVAL);
5367eea693dSMark Johnson 	}
5377eea693dSMark Johnson 
5387eea693dSMark Johnson 	/*
5397eea693dSMark Johnson 	 * reserve user app VA space for the gref pages and use segmf to
5407eea693dSMark Johnson 	 * manage the backing store for the physical memory. segmf will
5417eea693dSMark Johnson 	 * map in/out the grefs and fault them in/out.
5427eea693dSMark Johnson 	 */
5437eea693dSMark Johnson 	ASSERT(asp == state->bt_map.um_as);
5447eea693dSMark Johnson 	as_rangelock(asp);
5457eea693dSMark Johnson 	if ((flags & MAP_FIXED) == 0) {
5467eea693dSMark Johnson 		map_addr(addrp, len, 0, 0, flags);
5477eea693dSMark Johnson 		if (*addrp == NULL) {
5487eea693dSMark Johnson 			as_rangeunlock(asp);
5497eea693dSMark Johnson 			return (ENOMEM);
5507eea693dSMark Johnson 		}
5517eea693dSMark Johnson 	} else {
5527eea693dSMark Johnson 		/* User specified address */
5537eea693dSMark Johnson 		(void) as_unmap(asp, *addrp, len);
5547eea693dSMark Johnson 	}
5557eea693dSMark Johnson 	a.dev = dev;
5567eea693dSMark Johnson 	a.prot = (uchar_t)prot;
5577eea693dSMark Johnson 	a.maxprot = (uchar_t)maxprot;
5587eea693dSMark Johnson 	e = as_map(asp, *addrp, len, segmf_create, &a);
5597eea693dSMark Johnson 	if (e != 0) {
5607eea693dSMark Johnson 		as_rangeunlock(asp);
5617eea693dSMark Johnson 		return (e);
5627eea693dSMark Johnson 	}
5637eea693dSMark Johnson 	as_rangeunlock(asp);
5647eea693dSMark Johnson 
5657eea693dSMark Johnson 	/*
5667eea693dSMark Johnson 	 * Stash user base address, and compute address where the request
5677eea693dSMark Johnson 	 * array will end up.
5687eea693dSMark Johnson 	 */
5697eea693dSMark Johnson 	state->bt_map.um_guest_pages = (caddr_t)*addrp;
5707eea693dSMark Johnson 	state->bt_map.um_guest_size = (size_t)len;
5717eea693dSMark Johnson 
5727eea693dSMark Johnson 	/* register an as callback so we can cleanup when the app goes away */
5737eea693dSMark Johnson 	e = as_add_callback(asp, xpvtap_segmf_unregister, state,
5747eea693dSMark Johnson 	    AS_UNMAP_EVENT, *addrp, len, KM_SLEEP);
5757eea693dSMark Johnson 	if (e != 0) {
5767eea693dSMark Johnson 		(void) as_unmap(asp, *addrp, len);
5777eea693dSMark Johnson 		return (EINVAL);
5787eea693dSMark Johnson 	}
5797eea693dSMark Johnson 
5807eea693dSMark Johnson 	/* wake thread to see if there are requests already queued up */
5817eea693dSMark Johnson 	mutex_enter(&state->bt_thread.ut_mutex);
5827eea693dSMark Johnson 	state->bt_thread.ut_wake = B_TRUE;
5837eea693dSMark Johnson 	cv_signal(&state->bt_thread.ut_wake_cv);
5847eea693dSMark Johnson 	mutex_exit(&state->bt_thread.ut_mutex);
5857eea693dSMark Johnson 
5867eea693dSMark Johnson 	return (0);
5877eea693dSMark Johnson }
5887eea693dSMark Johnson 
5897eea693dSMark Johnson 
5907eea693dSMark Johnson /*
5917eea693dSMark Johnson  * xpvtap_devmap()
5927eea693dSMark Johnson  */
5937eea693dSMark Johnson /*ARGSUSED*/
5947eea693dSMark Johnson static int
xpvtap_devmap(dev_t dev,devmap_cookie_t dhp,offset_t off,size_t len,size_t * maplen,uint_t model)5957eea693dSMark Johnson xpvtap_devmap(dev_t dev, devmap_cookie_t dhp, offset_t off, size_t len,
5967eea693dSMark Johnson     size_t *maplen, uint_t model)
5977eea693dSMark Johnson {
5987eea693dSMark Johnson 	xpvtap_user_ring_t *usring;
5997eea693dSMark Johnson 	xpvtap_state_t *state;
6007eea693dSMark Johnson 	int instance;
6017eea693dSMark Johnson 	int e;
6027eea693dSMark Johnson 
6037eea693dSMark Johnson 
6047eea693dSMark Johnson 	instance = getminor(dev);
6057eea693dSMark Johnson 	state = ddi_get_soft_state(xpvtap_statep, instance);
6067eea693dSMark Johnson 	if (state == NULL) {
6077eea693dSMark Johnson 		return (EBADF);
6087eea693dSMark Johnson 	}
6097eea693dSMark Johnson 
6107eea693dSMark Johnson 	/* we should only get here if the offset was == 0 */
6117eea693dSMark Johnson 	if (off != 0) {
6127eea693dSMark Johnson 		return (EINVAL);
6137eea693dSMark Johnson 	}
6147eea693dSMark Johnson 
6157eea693dSMark Johnson 	/* we should only be mapping in one page */
6167eea693dSMark Johnson 	if (len != PAGESIZE) {
6177eea693dSMark Johnson 		return (EINVAL);
6187eea693dSMark Johnson 	}
6197eea693dSMark Johnson 
6207eea693dSMark Johnson 	/*
6217eea693dSMark Johnson 	 * we already allocated the user ring during driver attach, all we
6227eea693dSMark Johnson 	 * need to do is map it into the user app's VA.
6237eea693dSMark Johnson 	 */
6247eea693dSMark Johnson 	usring = &state->bt_user_ring;
6257eea693dSMark Johnson 	e = devmap_umem_setup(dhp, state->bt_dip, NULL, usring->ur_cookie, 0,
6267eea693dSMark Johnson 	    PAGESIZE, PROT_ALL, DEVMAP_DEFAULTS, NULL);
6277eea693dSMark Johnson 	if (e < 0) {
6287eea693dSMark Johnson 		return (e);
6297eea693dSMark Johnson 	}
6307eea693dSMark Johnson 
6317eea693dSMark Johnson 	/* return the size to compete the devmap */
6327eea693dSMark Johnson 	*maplen = PAGESIZE;
6337eea693dSMark Johnson 
6347eea693dSMark Johnson 	return (0);
6357eea693dSMark Johnson }
6367eea693dSMark Johnson 
6377eea693dSMark Johnson 
6387eea693dSMark Johnson /*
6397eea693dSMark Johnson  * xpvtap_chpoll()
6407eea693dSMark Johnson  */
6417eea693dSMark Johnson static int
xpvtap_chpoll(dev_t dev,short events,int anyyet,short * reventsp,struct pollhead ** phpp)6427eea693dSMark Johnson xpvtap_chpoll(dev_t dev, short events, int anyyet, short *reventsp,
6437eea693dSMark Johnson     struct pollhead **phpp)
6447eea693dSMark Johnson {
6457eea693dSMark Johnson 	xpvtap_user_ring_t *usring;
6467eea693dSMark Johnson 	xpvtap_state_t *state;
6477eea693dSMark Johnson 	int instance;
6487eea693dSMark Johnson 
6497eea693dSMark Johnson 
6507eea693dSMark Johnson 	instance = getminor(dev);
6517eea693dSMark Johnson 	if (instance == -1) {
6527eea693dSMark Johnson 		return (EBADF);
6537eea693dSMark Johnson 	}
6547eea693dSMark Johnson 	state = ddi_get_soft_state(xpvtap_statep, instance);
6557eea693dSMark Johnson 	if (state == NULL) {
6567eea693dSMark Johnson 		return (EBADF);
6577eea693dSMark Johnson 	}
6587eea693dSMark Johnson 
6597eea693dSMark Johnson 	if (((events & (POLLIN | POLLRDNORM)) == 0) && !anyyet) {
6607eea693dSMark Johnson 		return (EINVAL);
6617eea693dSMark Johnson 	}
6627eea693dSMark Johnson 
6637eea693dSMark Johnson 	/*
6647eea693dSMark Johnson 	 * if we pushed requests on the user ring since the last poll, wakeup
6657eea693dSMark Johnson 	 * the user app
6667eea693dSMark Johnson 	 */
667*80d5689fSPatrick Mooney 	*reventsp = 0;
6687eea693dSMark Johnson 	usring = &state->bt_user_ring;
6697eea693dSMark Johnson 	if (usring->ur_prod_polled != usring->ur_ring.req_prod_pvt) {
6707eea693dSMark Johnson 
6717eea693dSMark Johnson 		/*
6727eea693dSMark Johnson 		 * XXX - is this faster here or xpvtap_user_request_push??
6737eea693dSMark Johnson 		 * prelim data says here.  Because less membars or because
6747eea693dSMark Johnson 		 * user thread will spin in poll requests before getting to
6757eea693dSMark Johnson 		 * responses?
6767eea693dSMark Johnson 		 */
6777eea693dSMark Johnson 		RING_PUSH_REQUESTS(&usring->ur_ring);
6787eea693dSMark Johnson 
6797eea693dSMark Johnson 		usring->ur_prod_polled = usring->ur_ring.sring->req_prod;
6807eea693dSMark Johnson 		*reventsp =  POLLIN | POLLRDNORM;
6817eea693dSMark Johnson 	}
682*80d5689fSPatrick Mooney 
683*80d5689fSPatrick Mooney 	if ((*reventsp == 0 && !anyyet) || (events & POLLET)) {
684*80d5689fSPatrick Mooney 		*phpp = &state->bt_pollhead;
6857eea693dSMark Johnson 	}
6867eea693dSMark Johnson 
6877eea693dSMark Johnson 	return (0);
6887eea693dSMark Johnson }
6897eea693dSMark Johnson 
6907eea693dSMark Johnson 
6917eea693dSMark Johnson /*
6927eea693dSMark Johnson  * xpvtap_drv_init()
6937eea693dSMark Johnson  */
6947eea693dSMark Johnson static xpvtap_state_t *
xpvtap_drv_init(int instance)6957eea693dSMark Johnson xpvtap_drv_init(int instance)
6967eea693dSMark Johnson {
6977eea693dSMark Johnson 	xpvtap_state_t *state;
6987eea693dSMark Johnson 	int e;
6997eea693dSMark Johnson 
7007eea693dSMark Johnson 
7017eea693dSMark Johnson 	e = ddi_soft_state_zalloc(xpvtap_statep, instance);
7027eea693dSMark Johnson 	if (e != DDI_SUCCESS) {
7037eea693dSMark Johnson 		return (NULL);
7047eea693dSMark Johnson 	}
7057eea693dSMark Johnson 	state = ddi_get_soft_state(xpvtap_statep, instance);
7067eea693dSMark Johnson 	if (state == NULL) {
7077eea693dSMark Johnson 		goto drvinitfail_get_soft_state;
7087eea693dSMark Johnson 	}
7097eea693dSMark Johnson 
7107eea693dSMark Johnson 	state->bt_instance = instance;
7117eea693dSMark Johnson 	mutex_init(&state->bt_open.bo_mutex, NULL, MUTEX_DRIVER, NULL);
7127eea693dSMark Johnson 	cv_init(&state->bt_open.bo_exit_cv, NULL, CV_DRIVER, NULL);
7137eea693dSMark Johnson 	state->bt_open.bo_opened = B_FALSE;
7147eea693dSMark Johnson 	state->bt_map.um_registered = B_FALSE;
7157eea693dSMark Johnson 
7167eea693dSMark Johnson 	/* initialize user ring, thread, mapping state */
7177eea693dSMark Johnson 	e = xpvtap_user_init(state);
7187eea693dSMark Johnson 	if (e != DDI_SUCCESS) {
7197eea693dSMark Johnson 		goto drvinitfail_userinit;
7207eea693dSMark Johnson 	}
7217eea693dSMark Johnson 
7227eea693dSMark Johnson 	return (state);
7237eea693dSMark Johnson 
7247eea693dSMark Johnson drvinitfail_userinit:
7257eea693dSMark Johnson 	cv_destroy(&state->bt_open.bo_exit_cv);
7267eea693dSMark Johnson 	mutex_destroy(&state->bt_open.bo_mutex);
7277eea693dSMark Johnson drvinitfail_get_soft_state:
7287eea693dSMark Johnson 	(void) ddi_soft_state_free(xpvtap_statep, instance);
7297eea693dSMark Johnson 	return (NULL);
7307eea693dSMark Johnson }
7317eea693dSMark Johnson 
7327eea693dSMark Johnson 
7337eea693dSMark Johnson /*
7347eea693dSMark Johnson  * xpvtap_drv_fini()
7357eea693dSMark Johnson  */
7367eea693dSMark Johnson static void
xpvtap_drv_fini(xpvtap_state_t * state)7377eea693dSMark Johnson xpvtap_drv_fini(xpvtap_state_t *state)
7387eea693dSMark Johnson {
7397eea693dSMark Johnson 	xpvtap_user_fini(state);
7407eea693dSMark Johnson 	cv_destroy(&state->bt_open.bo_exit_cv);
7417eea693dSMark Johnson 	mutex_destroy(&state->bt_open.bo_mutex);
7427eea693dSMark Johnson 	(void) ddi_soft_state_free(xpvtap_statep, state->bt_instance);
7437eea693dSMark Johnson }
7447eea693dSMark Johnson 
7457eea693dSMark Johnson 
7467eea693dSMark Johnson /*
7477eea693dSMark Johnson  * xpvtap_intr()
7487eea693dSMark Johnson  *    this routine will be called when we have a request on the guest ring.
7497eea693dSMark Johnson  */
7507eea693dSMark Johnson static uint_t
xpvtap_intr(caddr_t arg)7517eea693dSMark Johnson xpvtap_intr(caddr_t arg)
7527eea693dSMark Johnson {
7537eea693dSMark Johnson 	xpvtap_state_t *state;
7547eea693dSMark Johnson 
7557eea693dSMark Johnson 
7567eea693dSMark Johnson 	state = (xpvtap_state_t *)arg;
7577eea693dSMark Johnson 
7587eea693dSMark Johnson 	/* wake thread, thread handles guest requests and user app responses */
7597eea693dSMark Johnson 	mutex_enter(&state->bt_thread.ut_mutex);
7607eea693dSMark Johnson 	state->bt_thread.ut_wake = B_TRUE;
7617eea693dSMark Johnson 	cv_signal(&state->bt_thread.ut_wake_cv);
7627eea693dSMark Johnson 	mutex_exit(&state->bt_thread.ut_mutex);
7637eea693dSMark Johnson 
7647eea693dSMark Johnson 	return (DDI_INTR_CLAIMED);
7657eea693dSMark Johnson }
7667eea693dSMark Johnson 
7677eea693dSMark Johnson 
7687eea693dSMark Johnson /*
7697eea693dSMark Johnson  * xpvtap_segmf_register()
7707eea693dSMark Johnson  */
7717eea693dSMark Johnson static int
xpvtap_segmf_register(xpvtap_state_t * state)7727eea693dSMark Johnson xpvtap_segmf_register(xpvtap_state_t *state)
7737eea693dSMark Johnson {
7747eea693dSMark Johnson 	struct seg *seg;
7757eea693dSMark Johnson 	uint64_t pte_ma;
7767eea693dSMark Johnson 	struct as *as;
7777eea693dSMark Johnson 	caddr_t uaddr;
7787eea693dSMark Johnson 	uint_t pgcnt;
7797eea693dSMark Johnson 	int i;
7807eea693dSMark Johnson 
7817eea693dSMark Johnson 
7827eea693dSMark Johnson 	as = state->bt_map.um_as;
7837eea693dSMark Johnson 	pgcnt = btopr(state->bt_map.um_guest_size);
7847eea693dSMark Johnson 	uaddr = state->bt_map.um_guest_pages;
7857eea693dSMark Johnson 
7867eea693dSMark Johnson 	if (pgcnt == 0) {
7877eea693dSMark Johnson 		return (DDI_FAILURE);
7887eea693dSMark Johnson 	}
7897eea693dSMark Johnson 
790dc32d872SJosef 'Jeff' Sipek 	AS_LOCK_ENTER(as, RW_READER);
7917eea693dSMark Johnson 
7927eea693dSMark Johnson 	seg = as_findseg(as, state->bt_map.um_guest_pages, 0);
7937eea693dSMark Johnson 	if ((seg == NULL) || ((uaddr + state->bt_map.um_guest_size) >
7947eea693dSMark Johnson 	    (seg->s_base + seg->s_size))) {
795dc32d872SJosef 'Jeff' Sipek 		AS_LOCK_EXIT(as);
7967eea693dSMark Johnson 		return (DDI_FAILURE);
7977eea693dSMark Johnson 	}
7987eea693dSMark Johnson 
7997eea693dSMark Johnson 	/*
8007eea693dSMark Johnson 	 * lock down the htables so the HAT can't steal them. Register the
8017eea693dSMark Johnson 	 * PTE MA's for each gref page with seg_mf so we can do user space
8027eea693dSMark Johnson 	 * gref mappings.
8037eea693dSMark Johnson 	 */
8047eea693dSMark Johnson 	for (i = 0; i < pgcnt; i++) {
8057eea693dSMark Johnson 		hat_prepare_mapping(as->a_hat, uaddr, &pte_ma);
8067eea693dSMark Johnson 		hat_devload(as->a_hat, uaddr, PAGESIZE, (pfn_t)0,
8077eea693dSMark Johnson 		    PROT_READ | PROT_WRITE | PROT_USER | HAT_UNORDERED_OK,
8087eea693dSMark Johnson 		    HAT_LOAD_NOCONSIST | HAT_LOAD_LOCK);
8097eea693dSMark Johnson 		hat_release_mapping(as->a_hat, uaddr);
8107eea693dSMark Johnson 		segmf_add_gref_pte(seg, uaddr, pte_ma);
8117eea693dSMark Johnson 		uaddr += PAGESIZE;
8127eea693dSMark Johnson 	}
8137eea693dSMark Johnson 
8147eea693dSMark Johnson 	state->bt_map.um_registered = B_TRUE;
8157eea693dSMark Johnson 
816dc32d872SJosef 'Jeff' Sipek 	AS_LOCK_EXIT(as);
8177eea693dSMark Johnson 
8187eea693dSMark Johnson 	return (DDI_SUCCESS);
8197eea693dSMark Johnson }
8207eea693dSMark Johnson 
8217eea693dSMark Johnson 
8227eea693dSMark Johnson /*
8237eea693dSMark Johnson  * xpvtap_segmf_unregister()
8247eea693dSMark Johnson  *    as_callback routine
8257eea693dSMark Johnson  */
8267eea693dSMark Johnson /*ARGSUSED*/
8277eea693dSMark Johnson static void
xpvtap_segmf_unregister(struct as * as,void * arg,uint_t event)8287eea693dSMark Johnson xpvtap_segmf_unregister(struct as *as, void *arg, uint_t event)
8297eea693dSMark Johnson {
8307eea693dSMark Johnson 	xpvtap_state_t *state;
8317eea693dSMark Johnson 	caddr_t uaddr;
8327eea693dSMark Johnson 	uint_t pgcnt;
8337eea693dSMark Johnson 	int i;
8347eea693dSMark Johnson 
8357eea693dSMark Johnson 
8367eea693dSMark Johnson 	state = (xpvtap_state_t *)arg;
8377eea693dSMark Johnson 	if (!state->bt_map.um_registered) {
83855501136SMark Johnson 		/* remove the callback (which is this routine) */
83955501136SMark Johnson 		(void) as_delete_callback(as, arg);
8407eea693dSMark Johnson 		return;
8417eea693dSMark Johnson 	}
8427eea693dSMark Johnson 
8437eea693dSMark Johnson 	pgcnt = btopr(state->bt_map.um_guest_size);
8447eea693dSMark Johnson 	uaddr = state->bt_map.um_guest_pages;
8457eea693dSMark Johnson 
8467eea693dSMark Johnson 	/* unmap any outstanding req's grefs */
8477eea693dSMark Johnson 	xpvtap_rs_flush(state->bt_map.um_rs, xpvtap_user_request_unmap, state);
8487eea693dSMark Johnson 
8497eea693dSMark Johnson 	/* Unlock the gref pages */
8507eea693dSMark Johnson 	for (i = 0; i < pgcnt; i++) {
851dc32d872SJosef 'Jeff' Sipek 		AS_LOCK_ENTER(as, RW_WRITER);
8527eea693dSMark Johnson 		hat_prepare_mapping(as->a_hat, uaddr, NULL);
8537eea693dSMark Johnson 		hat_unload(as->a_hat, uaddr, PAGESIZE, HAT_UNLOAD_UNLOCK);
8547eea693dSMark Johnson 		hat_release_mapping(as->a_hat, uaddr);
855dc32d872SJosef 'Jeff' Sipek 		AS_LOCK_EXIT(as);
8567eea693dSMark Johnson 		uaddr += PAGESIZE;
8577eea693dSMark Johnson 	}
8587eea693dSMark Johnson 
8597eea693dSMark Johnson 	/* remove the callback (which is this routine) */
8607eea693dSMark Johnson 	(void) as_delete_callback(as, arg);
8617eea693dSMark Johnson 
8627eea693dSMark Johnson 	state->bt_map.um_registered = B_FALSE;
8637eea693dSMark Johnson }
8647eea693dSMark Johnson 
8657eea693dSMark Johnson 
8667eea693dSMark Johnson /*
8677eea693dSMark Johnson  * xpvtap_user_init()
8687eea693dSMark Johnson  */
8697eea693dSMark Johnson static int
xpvtap_user_init(xpvtap_state_t * state)8707eea693dSMark Johnson xpvtap_user_init(xpvtap_state_t *state)
8717eea693dSMark Johnson {
8727eea693dSMark Johnson 	xpvtap_user_map_t *map;
8737eea693dSMark Johnson 	int e;
8747eea693dSMark Johnson 
8757eea693dSMark Johnson 
8767eea693dSMark Johnson 	map = &state->bt_map;
8777eea693dSMark Johnson 
8787eea693dSMark Johnson 	/* Setup the ring between the driver and user app */
8797eea693dSMark Johnson 	e = xpvtap_user_ring_init(state);
8807eea693dSMark Johnson 	if (e != DDI_SUCCESS) {
8817eea693dSMark Johnson 		return (DDI_FAILURE);
8827eea693dSMark Johnson 	}
8837eea693dSMark Johnson 
8847eea693dSMark Johnson 	/*
8857eea693dSMark Johnson 	 * the user ring can handle BLKIF_RING_SIZE outstanding requests. This
8867eea693dSMark Johnson 	 * is the same number of requests as the guest ring. Initialize the
8877eea693dSMark Johnson 	 * state we use to track request IDs to the user app. These IDs will
8887eea693dSMark Johnson 	 * also identify which group of gref pages correspond with the
8897eea693dSMark Johnson 	 * request.
8907eea693dSMark Johnson 	 */
8917eea693dSMark Johnson 	xpvtap_rs_init(0, (BLKIF_RING_SIZE - 1), &map->um_rs);
8927eea693dSMark Johnson 
8937eea693dSMark Johnson 	/*
8947eea693dSMark Johnson 	 * allocate the space to store a copy of each outstanding requests. We
8957eea693dSMark Johnson 	 * will need to reference the ID and the number of segments when we
8967eea693dSMark Johnson 	 * get the response from the user app.
8977eea693dSMark Johnson 	 */
8987eea693dSMark Johnson 	map->um_outstanding_reqs = kmem_zalloc(
8997eea693dSMark Johnson 	    sizeof (*map->um_outstanding_reqs) * BLKIF_RING_SIZE,
9007eea693dSMark Johnson 	    KM_SLEEP);
9017eea693dSMark Johnson 
9027eea693dSMark Johnson 	/*
9037eea693dSMark Johnson 	 * initialize the thread we use to process guest requests and user
9047eea693dSMark Johnson 	 * responses.
9057eea693dSMark Johnson 	 */
9067eea693dSMark Johnson 	e = xpvtap_user_thread_init(state);
9077eea693dSMark Johnson 	if (e != DDI_SUCCESS) {
9087eea693dSMark Johnson 		goto userinitfail_user_thread_init;
9097eea693dSMark Johnson 	}
9107eea693dSMark Johnson 
9117eea693dSMark Johnson 	return (DDI_SUCCESS);
9127eea693dSMark Johnson 
9137eea693dSMark Johnson userinitfail_user_thread_init:
9147eea693dSMark Johnson 	xpvtap_rs_fini(&map->um_rs);
9157eea693dSMark Johnson 	kmem_free(map->um_outstanding_reqs,
9167eea693dSMark Johnson 	    sizeof (*map->um_outstanding_reqs) * BLKIF_RING_SIZE);
9177eea693dSMark Johnson 	xpvtap_user_ring_fini(state);
9187eea693dSMark Johnson 	return (DDI_FAILURE);
9197eea693dSMark Johnson }
9207eea693dSMark Johnson 
9217eea693dSMark Johnson 
9227eea693dSMark Johnson /*
9237eea693dSMark Johnson  * xpvtap_user_ring_init()
9247eea693dSMark Johnson  */
9257eea693dSMark Johnson static int
xpvtap_user_ring_init(xpvtap_state_t * state)9267eea693dSMark Johnson xpvtap_user_ring_init(xpvtap_state_t *state)
9277eea693dSMark Johnson {
9287eea693dSMark Johnson 	xpvtap_user_ring_t *usring;
9297eea693dSMark Johnson 
9307eea693dSMark Johnson 
9317eea693dSMark Johnson 	usring = &state->bt_user_ring;
9327eea693dSMark Johnson 
9337eea693dSMark Johnson 	/* alocate and initialize the page for the shared user ring */
9347eea693dSMark Johnson 	usring->ur_sring = (blkif_sring_t *)ddi_umem_alloc(PAGESIZE,
9357eea693dSMark Johnson 	    DDI_UMEM_SLEEP, &usring->ur_cookie);
9367eea693dSMark Johnson 	SHARED_RING_INIT(usring->ur_sring);
9377eea693dSMark Johnson 	FRONT_RING_INIT(&usring->ur_ring, usring->ur_sring, PAGESIZE);
9387eea693dSMark Johnson 	usring->ur_prod_polled = 0;
9397eea693dSMark Johnson 
9407eea693dSMark Johnson 	return (DDI_SUCCESS);
9417eea693dSMark Johnson }
9427eea693dSMark Johnson 
9437eea693dSMark Johnson 
9447eea693dSMark Johnson /*
9457eea693dSMark Johnson  * xpvtap_user_thread_init()
9467eea693dSMark Johnson  */
9477eea693dSMark Johnson static int
xpvtap_user_thread_init(xpvtap_state_t * state)9487eea693dSMark Johnson xpvtap_user_thread_init(xpvtap_state_t *state)
9497eea693dSMark Johnson {
9507eea693dSMark Johnson 	xpvtap_user_thread_t *thread;
9517eea693dSMark Johnson 	char taskqname[32];
9527eea693dSMark Johnson 
9537eea693dSMark Johnson 
9547eea693dSMark Johnson 	thread = &state->bt_thread;
9557eea693dSMark Johnson 
9567eea693dSMark Johnson 	mutex_init(&thread->ut_mutex, NULL, MUTEX_DRIVER, NULL);
9577eea693dSMark Johnson 	cv_init(&thread->ut_wake_cv, NULL, CV_DRIVER, NULL);
9587eea693dSMark Johnson 	cv_init(&thread->ut_exit_done_cv, NULL, CV_DRIVER, NULL);
9597eea693dSMark Johnson 	thread->ut_wake = B_FALSE;
9607eea693dSMark Johnson 	thread->ut_exit = B_FALSE;
9617eea693dSMark Johnson 	thread->ut_exit_done = B_TRUE;
9627eea693dSMark Johnson 
9637eea693dSMark Johnson 	/* create but don't start the user thread */
9647eea693dSMark Johnson 	(void) sprintf(taskqname, "xvptap_%d", state->bt_instance);
9657eea693dSMark Johnson 	thread->ut_taskq = ddi_taskq_create(state->bt_dip, taskqname, 1,
9667eea693dSMark Johnson 	    TASKQ_DEFAULTPRI, 0);
9677eea693dSMark Johnson 	if (thread->ut_taskq == NULL) {
9687eea693dSMark Johnson 		goto userinitthrfail_taskq_create;
9697eea693dSMark Johnson 	}
9707eea693dSMark Johnson 
9717eea693dSMark Johnson 	return (DDI_SUCCESS);
9727eea693dSMark Johnson 
9737eea693dSMark Johnson userinitthrfail_taskq_dispatch:
9747eea693dSMark Johnson 	ddi_taskq_destroy(thread->ut_taskq);
9757eea693dSMark Johnson userinitthrfail_taskq_create:
9767eea693dSMark Johnson 	cv_destroy(&thread->ut_exit_done_cv);
9777eea693dSMark Johnson 	cv_destroy(&thread->ut_wake_cv);
9787eea693dSMark Johnson 	mutex_destroy(&thread->ut_mutex);
9797eea693dSMark Johnson 
9807eea693dSMark Johnson 	return (DDI_FAILURE);
9817eea693dSMark Johnson }
9827eea693dSMark Johnson 
9837eea693dSMark Johnson 
9847eea693dSMark Johnson /*
9857eea693dSMark Johnson  * xpvtap_user_thread_start()
9867eea693dSMark Johnson  */
9877eea693dSMark Johnson static void
xpvtap_user_thread_start(caddr_t arg)9887eea693dSMark Johnson xpvtap_user_thread_start(caddr_t arg)
9897eea693dSMark Johnson {
9907eea693dSMark Johnson 	xpvtap_user_thread_t *thread;
9917eea693dSMark Johnson 	xpvtap_state_t *state;
9927eea693dSMark Johnson 	int e;
9937eea693dSMark Johnson 
9947eea693dSMark Johnson 
9957eea693dSMark Johnson 	state = (xpvtap_state_t *)arg;
9967eea693dSMark Johnson 	thread = &state->bt_thread;
9977eea693dSMark Johnson 
9987eea693dSMark Johnson 	/* start the user thread */
9997eea693dSMark Johnson 	thread->ut_exit_done = B_FALSE;
10007eea693dSMark Johnson 	e = ddi_taskq_dispatch(thread->ut_taskq, xpvtap_user_thread, state,
10017eea693dSMark Johnson 	    DDI_SLEEP);
10027eea693dSMark Johnson 	if (e != DDI_SUCCESS) {
10037eea693dSMark Johnson 		thread->ut_exit_done = B_TRUE;
10047eea693dSMark Johnson 		cmn_err(CE_WARN, "Unable to start user thread\n");
10057eea693dSMark Johnson 	}
10067eea693dSMark Johnson }
10077eea693dSMark Johnson 
10087eea693dSMark Johnson 
10097eea693dSMark Johnson /*
10107eea693dSMark Johnson  * xpvtap_user_thread_stop()
10117eea693dSMark Johnson  */
10127eea693dSMark Johnson static void
xpvtap_user_thread_stop(xpvtap_state_t * state)10137eea693dSMark Johnson xpvtap_user_thread_stop(xpvtap_state_t *state)
10147eea693dSMark Johnson {
10157eea693dSMark Johnson 	/* wake thread so it can exit */
10167eea693dSMark Johnson 	mutex_enter(&state->bt_thread.ut_mutex);
10177eea693dSMark Johnson 	state->bt_thread.ut_wake = B_TRUE;
10187eea693dSMark Johnson 	state->bt_thread.ut_exit = B_TRUE;
10197eea693dSMark Johnson 	cv_signal(&state->bt_thread.ut_wake_cv);
10207eea693dSMark Johnson 	if (!state->bt_thread.ut_exit_done) {
10217eea693dSMark Johnson 		cv_wait(&state->bt_thread.ut_exit_done_cv,
10227eea693dSMark Johnson 		    &state->bt_thread.ut_mutex);
10237eea693dSMark Johnson 	}
10247eea693dSMark Johnson 	mutex_exit(&state->bt_thread.ut_mutex);
10257eea693dSMark Johnson 	ASSERT(state->bt_thread.ut_exit_done);
10267eea693dSMark Johnson }
10277eea693dSMark Johnson 
10287eea693dSMark Johnson 
10297eea693dSMark Johnson /*
10307eea693dSMark Johnson  * xpvtap_user_fini()
10317eea693dSMark Johnson  */
10327eea693dSMark Johnson static void
xpvtap_user_fini(xpvtap_state_t * state)10337eea693dSMark Johnson xpvtap_user_fini(xpvtap_state_t *state)
10347eea693dSMark Johnson {
10357eea693dSMark Johnson 	xpvtap_user_map_t *map;
10367eea693dSMark Johnson 
10377eea693dSMark Johnson 
10387eea693dSMark Johnson 	map = &state->bt_map;
10397eea693dSMark Johnson 
10407eea693dSMark Johnson 	xpvtap_user_thread_fini(state);
10417eea693dSMark Johnson 	xpvtap_rs_fini(&map->um_rs);
10427eea693dSMark Johnson 	kmem_free(map->um_outstanding_reqs,
10437eea693dSMark Johnson 	    sizeof (*map->um_outstanding_reqs) * BLKIF_RING_SIZE);
10447eea693dSMark Johnson 	xpvtap_user_ring_fini(state);
10457eea693dSMark Johnson }
10467eea693dSMark Johnson 
10477eea693dSMark Johnson 
10487eea693dSMark Johnson /*
10497eea693dSMark Johnson  * xpvtap_user_ring_fini()
10507eea693dSMark Johnson  */
10517eea693dSMark Johnson static void
xpvtap_user_ring_fini(xpvtap_state_t * state)10527eea693dSMark Johnson xpvtap_user_ring_fini(xpvtap_state_t *state)
10537eea693dSMark Johnson {
10547eea693dSMark Johnson 	ddi_umem_free(state->bt_user_ring.ur_cookie);
10557eea693dSMark Johnson }
10567eea693dSMark Johnson 
10577eea693dSMark Johnson 
10587eea693dSMark Johnson /*
10597eea693dSMark Johnson  * xpvtap_user_thread_fini()
10607eea693dSMark Johnson  */
10617eea693dSMark Johnson static void
xpvtap_user_thread_fini(xpvtap_state_t * state)10627eea693dSMark Johnson xpvtap_user_thread_fini(xpvtap_state_t *state)
10637eea693dSMark Johnson {
10647eea693dSMark Johnson 	ddi_taskq_destroy(state->bt_thread.ut_taskq);
10657eea693dSMark Johnson 	cv_destroy(&state->bt_thread.ut_exit_done_cv);
10667eea693dSMark Johnson 	cv_destroy(&state->bt_thread.ut_wake_cv);
10677eea693dSMark Johnson 	mutex_destroy(&state->bt_thread.ut_mutex);
10687eea693dSMark Johnson }
10697eea693dSMark Johnson 
10707eea693dSMark Johnson 
10717eea693dSMark Johnson /*
10727eea693dSMark Johnson  * xpvtap_user_thread()
10737eea693dSMark Johnson  */
10747eea693dSMark Johnson static void
xpvtap_user_thread(void * arg)10757eea693dSMark Johnson xpvtap_user_thread(void *arg)
10767eea693dSMark Johnson {
10777eea693dSMark Johnson 	xpvtap_user_thread_t *thread;
10787eea693dSMark Johnson 	blkif_response_t resp;
10797eea693dSMark Johnson 	xpvtap_state_t *state;
10807eea693dSMark Johnson 	blkif_request_t req;
10817eea693dSMark Johnson 	boolean_t b;
10827eea693dSMark Johnson 	uint_t uid;
10837eea693dSMark Johnson 	int e;
10847eea693dSMark Johnson 
10857eea693dSMark Johnson 
10867eea693dSMark Johnson 	state = (xpvtap_state_t *)arg;
10877eea693dSMark Johnson 	thread = &state->bt_thread;
10887eea693dSMark Johnson 
10897eea693dSMark Johnson xpvtap_thread_start:
10907eea693dSMark Johnson 	/* See if we are supposed to exit */
10917eea693dSMark Johnson 	mutex_enter(&thread->ut_mutex);
10927eea693dSMark Johnson 	if (thread->ut_exit) {
10937eea693dSMark Johnson 		thread->ut_exit_done = B_TRUE;
10947eea693dSMark Johnson 		cv_signal(&state->bt_thread.ut_exit_done_cv);
10957eea693dSMark Johnson 		mutex_exit(&thread->ut_mutex);
10967eea693dSMark Johnson 		return;
10977eea693dSMark Johnson 	}
10987eea693dSMark Johnson 
10997eea693dSMark Johnson 	/*
11007eea693dSMark Johnson 	 * if we aren't supposed to be awake, wait until someone wakes us.
11017eea693dSMark Johnson 	 * when we wake up, check for a kill or someone telling us to exit.
11027eea693dSMark Johnson 	 */
11037eea693dSMark Johnson 	if (!thread->ut_wake) {
11047eea693dSMark Johnson 		e = cv_wait_sig(&thread->ut_wake_cv, &thread->ut_mutex);
11057eea693dSMark Johnson 		if ((e == 0) || (thread->ut_exit)) {
11067eea693dSMark Johnson 			thread->ut_exit = B_TRUE;
11077eea693dSMark Johnson 			mutex_exit(&thread->ut_mutex);
11087eea693dSMark Johnson 			goto xpvtap_thread_start;
11097eea693dSMark Johnson 		}
11107eea693dSMark Johnson 	}
11117eea693dSMark Johnson 
11127eea693dSMark Johnson 	/* if someone didn't wake us, go back to the start of the thread */
11137eea693dSMark Johnson 	if (!thread->ut_wake) {
11147eea693dSMark Johnson 		mutex_exit(&thread->ut_mutex);
11157eea693dSMark Johnson 		goto xpvtap_thread_start;
11167eea693dSMark Johnson 	}
11177eea693dSMark Johnson 
11187eea693dSMark Johnson 	/* we are awake */
11197eea693dSMark Johnson 	thread->ut_wake = B_FALSE;
11207eea693dSMark Johnson 	mutex_exit(&thread->ut_mutex);
11217eea693dSMark Johnson 
11227eea693dSMark Johnson 	/* process requests from the guest */
11237eea693dSMark Johnson 	do {
11247eea693dSMark Johnson 		/*
11257eea693dSMark Johnson 		 * check for requests from the guest. if we don't have any,
11267eea693dSMark Johnson 		 * break out of the loop.
11277eea693dSMark Johnson 		 */
11287eea693dSMark Johnson 		e = blk_ring_request_get(state->bt_guest_ring, &req);
11297eea693dSMark Johnson 		if (e == B_FALSE) {
11307eea693dSMark Johnson 			break;
11317eea693dSMark Johnson 		}
11327eea693dSMark Johnson 
11337eea693dSMark Johnson 		/* we got a request, map the grefs into the user app's VA */
11347eea693dSMark Johnson 		e = xpvtap_user_request_map(state, &req, &uid);
11357eea693dSMark Johnson 		if (e != DDI_SUCCESS) {
11367eea693dSMark Johnson 			/*
11377eea693dSMark Johnson 			 * If we couldn't map the request (e.g. user app hasn't
11387eea693dSMark Johnson 			 * opened the device yet), requeue it and try again
11397eea693dSMark Johnson 			 * later
11407eea693dSMark Johnson 			 */
11417eea693dSMark Johnson 			blk_ring_request_requeue(state->bt_guest_ring);
11427eea693dSMark Johnson 			break;
11437eea693dSMark Johnson 		}
11447eea693dSMark Johnson 
11457eea693dSMark Johnson 		/* push the request to the user app */
11467eea693dSMark Johnson 		e = xpvtap_user_request_push(state, &req, uid);
11477eea693dSMark Johnson 		if (e != DDI_SUCCESS) {
11487eea693dSMark Johnson 			resp.id = req.id;
11497eea693dSMark Johnson 			resp.operation = req.operation;
11507eea693dSMark Johnson 			resp.status = BLKIF_RSP_ERROR;
11517eea693dSMark Johnson 			blk_ring_response_put(state->bt_guest_ring, &resp);
11527eea693dSMark Johnson 		}
11537eea693dSMark Johnson 	} while (!thread->ut_exit);
11547eea693dSMark Johnson 
11557eea693dSMark Johnson 	/* process reponses from the user app */
11567eea693dSMark Johnson 	do {
11577eea693dSMark Johnson 		/*
11587eea693dSMark Johnson 		 * check for responses from the user app. if we don't have any,
11597eea693dSMark Johnson 		 * break out of the loop.
11607eea693dSMark Johnson 		 */
11617eea693dSMark Johnson 		b = xpvtap_user_response_get(state, &resp, &uid);
11627eea693dSMark Johnson 		if (b != B_TRUE) {
11637eea693dSMark Johnson 			break;
11647eea693dSMark Johnson 		}
11657eea693dSMark Johnson 
11667eea693dSMark Johnson 		/*
11677eea693dSMark Johnson 		 * if we got a response, unmap the grefs from the matching
11687eea693dSMark Johnson 		 * request.
11697eea693dSMark Johnson 		 */
11707eea693dSMark Johnson 		xpvtap_user_request_unmap(state, uid);
11717eea693dSMark Johnson 
11727eea693dSMark Johnson 		/* push the response to the guest */
11737eea693dSMark Johnson 		blk_ring_response_put(state->bt_guest_ring, &resp);
11747eea693dSMark Johnson 	} while (!thread->ut_exit);
11757eea693dSMark Johnson 
11767eea693dSMark Johnson 	goto xpvtap_thread_start;
11777eea693dSMark Johnson }
11787eea693dSMark Johnson 
11797eea693dSMark Johnson 
11807eea693dSMark Johnson /*
11817eea693dSMark Johnson  * xpvtap_user_request_map()
11827eea693dSMark Johnson  */
11837eea693dSMark Johnson static int
xpvtap_user_request_map(xpvtap_state_t * state,blkif_request_t * req,uint_t * uid)11847eea693dSMark Johnson xpvtap_user_request_map(xpvtap_state_t *state, blkif_request_t *req,
11857eea693dSMark Johnson     uint_t *uid)
11867eea693dSMark Johnson {
11877eea693dSMark Johnson 	grant_ref_t gref[BLKIF_MAX_SEGMENTS_PER_REQUEST];
11887eea693dSMark Johnson 	struct seg *seg;
11897eea693dSMark Johnson 	struct as *as;
11907eea693dSMark Johnson 	domid_t domid;
11917eea693dSMark Johnson 	caddr_t uaddr;
11927eea693dSMark Johnson 	uint_t flags;
11937eea693dSMark Johnson 	int i;
11947eea693dSMark Johnson 	int e;
11957eea693dSMark Johnson 
11967eea693dSMark Johnson 
11977eea693dSMark Johnson 	domid = xvdi_get_oeid(state->bt_dip);
11987eea693dSMark Johnson 
11997eea693dSMark Johnson 	as = state->bt_map.um_as;
12007eea693dSMark Johnson 	if ((as == NULL) || (state->bt_map.um_guest_pages == NULL)) {
12017eea693dSMark Johnson 		return (DDI_FAILURE);
12027eea693dSMark Johnson 	}
12037eea693dSMark Johnson 
12047eea693dSMark Johnson 	/* has to happen after segmap returns */
12057eea693dSMark Johnson 	if (!state->bt_map.um_registered) {
12067eea693dSMark Johnson 		/* register the pte's with segmf */
12077eea693dSMark Johnson 		e = xpvtap_segmf_register(state);
12087eea693dSMark Johnson 		if (e != DDI_SUCCESS) {
12097eea693dSMark Johnson 			return (DDI_FAILURE);
12107eea693dSMark Johnson 		}
12117eea693dSMark Johnson 	}
12127eea693dSMark Johnson 
12137eea693dSMark Johnson 	/* alloc an ID for the user ring */
12147eea693dSMark Johnson 	e = xpvtap_rs_alloc(state->bt_map.um_rs, uid);
12157eea693dSMark Johnson 	if (e != DDI_SUCCESS) {
12167eea693dSMark Johnson 		return (DDI_FAILURE);
12177eea693dSMark Johnson 	}
12187eea693dSMark Johnson 
12197eea693dSMark Johnson 	/* if we don't have any segments to map, we're done */
12207eea693dSMark Johnson 	if ((req->operation == BLKIF_OP_WRITE_BARRIER) ||
12217eea693dSMark Johnson 	    (req->operation == BLKIF_OP_FLUSH_DISKCACHE) ||
12227eea693dSMark Johnson 	    (req->nr_segments == 0)) {
12237eea693dSMark Johnson 		return (DDI_SUCCESS);
12247eea693dSMark Johnson 	}
12257eea693dSMark Johnson 
12267eea693dSMark Johnson 	/* get the apps gref address */
12277eea693dSMark Johnson 	uaddr = XPVTAP_GREF_REQADDR(state->bt_map.um_guest_pages, *uid);
12287eea693dSMark Johnson 
1229dc32d872SJosef 'Jeff' Sipek 	AS_LOCK_ENTER(as, RW_READER);
12307eea693dSMark Johnson 	seg = as_findseg(as, state->bt_map.um_guest_pages, 0);
12317eea693dSMark Johnson 	if ((seg == NULL) || ((uaddr + mmu_ptob(req->nr_segments)) >
12327eea693dSMark Johnson 	    (seg->s_base + seg->s_size))) {
1233dc32d872SJosef 'Jeff' Sipek 		AS_LOCK_EXIT(as);
12347eea693dSMark Johnson 		return (DDI_FAILURE);
12357eea693dSMark Johnson 	}
12367eea693dSMark Johnson 
12377eea693dSMark Johnson 	/* if we are reading from disk, we are writing into memory */
12387eea693dSMark Johnson 	flags = 0;
12397eea693dSMark Johnson 	if (req->operation == BLKIF_OP_READ) {
12407eea693dSMark Johnson 		flags |= SEGMF_GREF_WR;
12417eea693dSMark Johnson 	}
12427eea693dSMark Johnson 
12437eea693dSMark Johnson 	/* Load the grefs into seg_mf */
12447eea693dSMark Johnson 	for (i = 0; i < req->nr_segments; i++) {
12457eea693dSMark Johnson 		gref[i] = req->seg[i].gref;
12467eea693dSMark Johnson 	}
12477eea693dSMark Johnson 	(void) segmf_add_grefs(seg, uaddr, flags, gref, req->nr_segments,
12487eea693dSMark Johnson 	    domid);
12497eea693dSMark Johnson 
1250dc32d872SJosef 'Jeff' Sipek 	AS_LOCK_EXIT(as);
12517eea693dSMark Johnson 
12527eea693dSMark Johnson 	return (DDI_SUCCESS);
12537eea693dSMark Johnson }
12547eea693dSMark Johnson 
12557eea693dSMark Johnson 
12567eea693dSMark Johnson /*
12577eea693dSMark Johnson  * xpvtap_user_request_push()
12587eea693dSMark Johnson  */
12597eea693dSMark Johnson static int
xpvtap_user_request_push(xpvtap_state_t * state,blkif_request_t * req,uint_t uid)12607eea693dSMark Johnson xpvtap_user_request_push(xpvtap_state_t *state, blkif_request_t *req,
12617eea693dSMark Johnson     uint_t uid)
12627eea693dSMark Johnson {
12637eea693dSMark Johnson 	blkif_request_t *outstanding_req;
12647eea693dSMark Johnson 	blkif_front_ring_t *uring;
12657eea693dSMark Johnson 	blkif_request_t *target;
12667eea693dSMark Johnson 	xpvtap_user_map_t *map;
12677eea693dSMark Johnson 
12687eea693dSMark Johnson 
12697eea693dSMark Johnson 	uring = &state->bt_user_ring.ur_ring;
12707eea693dSMark Johnson 	map = &state->bt_map;
12717eea693dSMark Johnson 
12727eea693dSMark Johnson 	target = RING_GET_REQUEST(uring, uring->req_prod_pvt);
12737eea693dSMark Johnson 
12747eea693dSMark Johnson 	/*
12757eea693dSMark Johnson 	 * Save request from the frontend. used for ID mapping and unmap
12767eea693dSMark Johnson 	 * on response/cleanup
12777eea693dSMark Johnson 	 */
12787eea693dSMark Johnson 	outstanding_req = &map->um_outstanding_reqs[uid];
12797eea693dSMark Johnson 	bcopy(req, outstanding_req, sizeof (*outstanding_req));
12807eea693dSMark Johnson 
12817eea693dSMark Johnson 	/* put the request on the user ring */
12827eea693dSMark Johnson 	bcopy(req, target, sizeof (*req));
12837eea693dSMark Johnson 	target->id = (uint64_t)uid;
12847eea693dSMark Johnson 	uring->req_prod_pvt++;
12857eea693dSMark Johnson 
12867eea693dSMark Johnson 	pollwakeup(&state->bt_pollhead, POLLIN | POLLRDNORM);
12877eea693dSMark Johnson 
12887eea693dSMark Johnson 	return (DDI_SUCCESS);
12897eea693dSMark Johnson }
12907eea693dSMark Johnson 
12917eea693dSMark Johnson 
12927eea693dSMark Johnson static void
xpvtap_user_request_unmap(xpvtap_state_t * state,uint_t uid)12937eea693dSMark Johnson xpvtap_user_request_unmap(xpvtap_state_t *state, uint_t uid)
12947eea693dSMark Johnson {
12957eea693dSMark Johnson 	blkif_request_t *req;
12967eea693dSMark Johnson 	struct seg *seg;
12977eea693dSMark Johnson 	struct as *as;
12987eea693dSMark Johnson 	caddr_t uaddr;
12997eea693dSMark Johnson 	int e;
13007eea693dSMark Johnson 
13017eea693dSMark Johnson 
13027eea693dSMark Johnson 	as = state->bt_map.um_as;
13037eea693dSMark Johnson 	if (as == NULL) {
13047eea693dSMark Johnson 		return;
13057eea693dSMark Johnson 	}
13067eea693dSMark Johnson 
13077eea693dSMark Johnson 	/* get a copy of the original request */
13087eea693dSMark Johnson 	req = &state->bt_map.um_outstanding_reqs[uid];
13097eea693dSMark Johnson 
13107eea693dSMark Johnson 	/* unmap the grefs for this request */
13117eea693dSMark Johnson 	if ((req->operation != BLKIF_OP_WRITE_BARRIER) &&
13127eea693dSMark Johnson 	    (req->operation != BLKIF_OP_FLUSH_DISKCACHE) &&
13137eea693dSMark Johnson 	    (req->nr_segments != 0)) {
13147eea693dSMark Johnson 		uaddr = XPVTAP_GREF_REQADDR(state->bt_map.um_guest_pages, uid);
1315dc32d872SJosef 'Jeff' Sipek 		AS_LOCK_ENTER(as, RW_READER);
13167eea693dSMark Johnson 		seg = as_findseg(as, state->bt_map.um_guest_pages, 0);
13177eea693dSMark Johnson 		if ((seg == NULL) || ((uaddr + mmu_ptob(req->nr_segments)) >
13187eea693dSMark Johnson 		    (seg->s_base + seg->s_size))) {
1319dc32d872SJosef 'Jeff' Sipek 			AS_LOCK_EXIT(as);
13207eea693dSMark Johnson 			xpvtap_rs_free(state->bt_map.um_rs, uid);
13217eea693dSMark Johnson 			return;
13227eea693dSMark Johnson 		}
13237eea693dSMark Johnson 
13247eea693dSMark Johnson 		e = segmf_release_grefs(seg, uaddr, req->nr_segments);
13257eea693dSMark Johnson 		if (e != 0) {
13267eea693dSMark Johnson 			cmn_err(CE_WARN, "unable to release grefs");
13277eea693dSMark Johnson 		}
13287eea693dSMark Johnson 
1329dc32d872SJosef 'Jeff' Sipek 		AS_LOCK_EXIT(as);
13307eea693dSMark Johnson 	}
13317eea693dSMark Johnson 
13327eea693dSMark Johnson 	/* free up the user ring id */
13337eea693dSMark Johnson 	xpvtap_rs_free(state->bt_map.um_rs, uid);
13347eea693dSMark Johnson }
13357eea693dSMark Johnson 
13367eea693dSMark Johnson 
13377eea693dSMark Johnson static int
xpvtap_user_response_get(xpvtap_state_t * state,blkif_response_t * resp,uint_t * uid)13387eea693dSMark Johnson xpvtap_user_response_get(xpvtap_state_t *state, blkif_response_t *resp,
13397eea693dSMark Johnson     uint_t *uid)
13407eea693dSMark Johnson {
13417eea693dSMark Johnson 	blkif_front_ring_t *uring;
13427eea693dSMark Johnson 	blkif_response_t *target;
13437eea693dSMark Johnson 
13447eea693dSMark Johnson 
13457eea693dSMark Johnson 	uring = &state->bt_user_ring.ur_ring;
13467eea693dSMark Johnson 
13477eea693dSMark Johnson 	if (!RING_HAS_UNCONSUMED_RESPONSES(uring)) {
13487eea693dSMark Johnson 		return (B_FALSE);
13497eea693dSMark Johnson 	}
13507eea693dSMark Johnson 
13517eea693dSMark Johnson 	target = NULL;
13527eea693dSMark Johnson 	target = RING_GET_RESPONSE(uring, uring->rsp_cons);
13537eea693dSMark Johnson 	if (target == NULL) {
13547eea693dSMark Johnson 		return (B_FALSE);
13557eea693dSMark Johnson 	}
13567eea693dSMark Johnson 
13577eea693dSMark Johnson 	/* copy out the user app response */
13587eea693dSMark Johnson 	bcopy(target, resp, sizeof (*resp));
13597eea693dSMark Johnson 	uring->rsp_cons++;
13607eea693dSMark Johnson 
13617eea693dSMark Johnson 	/* restore the quests id from the original request */
13627eea693dSMark Johnson 	*uid = (uint_t)resp->id;
13637eea693dSMark Johnson 	resp->id = state->bt_map.um_outstanding_reqs[*uid].id;
13647eea693dSMark Johnson 
13657eea693dSMark Johnson 	return (B_TRUE);
13667eea693dSMark Johnson }
13677eea693dSMark Johnson 
13687eea693dSMark Johnson 
13697eea693dSMark Johnson /*
13707eea693dSMark Johnson  * xpvtap_user_app_stop()
13717eea693dSMark Johnson  */
xpvtap_user_app_stop(caddr_t arg)13727eea693dSMark Johnson static void xpvtap_user_app_stop(caddr_t arg)
13737eea693dSMark Johnson {
13747eea693dSMark Johnson 	xpvtap_state_t *state;
13757eea693dSMark Johnson 	clock_t rc;
13767eea693dSMark Johnson 
13777eea693dSMark Johnson 	state = (xpvtap_state_t *)arg;
13787eea693dSMark Johnson 
13797eea693dSMark Johnson 	/*
13807eea693dSMark Johnson 	 * Give the app 10 secs to exit. If it doesn't exit, it's not a serious
13817eea693dSMark Johnson 	 * problem, we just won't auto-detach the driver.
13827eea693dSMark Johnson 	 */
13837eea693dSMark Johnson 	mutex_enter(&state->bt_open.bo_mutex);
13847eea693dSMark Johnson 	if (state->bt_open.bo_opened) {
1385d3d50737SRafael Vanoni 		rc = cv_reltimedwait(&state->bt_open.bo_exit_cv,
1386d3d50737SRafael Vanoni 		    &state->bt_open.bo_mutex, drv_usectohz(10000000),
1387d3d50737SRafael Vanoni 		    TR_CLOCK_TICK);
13887eea693dSMark Johnson 		if (rc <= 0) {
13897eea693dSMark Johnson 			cmn_err(CE_NOTE, "!user process still has driver open, "
13907eea693dSMark Johnson 			    "deferring detach\n");
13917eea693dSMark Johnson 		}
13927eea693dSMark Johnson 	}
13937eea693dSMark Johnson 	mutex_exit(&state->bt_open.bo_mutex);
13947eea693dSMark Johnson }
13957eea693dSMark Johnson 
13967eea693dSMark Johnson 
13977eea693dSMark Johnson /*
13987eea693dSMark Johnson  * xpvtap_rs_init()
13997eea693dSMark Johnson  *    Initialize the resource structure. init() returns a handle to be used
14007eea693dSMark Johnson  *    for the rest of the resource functions. This code is written assuming
14017eea693dSMark Johnson  *    that min_val will be close to 0. Therefore, we will allocate the free
14027eea693dSMark Johnson  *    buffer only taking max_val into account.
14037eea693dSMark Johnson  */
14047eea693dSMark Johnson static void
xpvtap_rs_init(uint_t min_val,uint_t max_val,xpvtap_rs_hdl_t * handle)14057eea693dSMark Johnson xpvtap_rs_init(uint_t min_val, uint_t max_val, xpvtap_rs_hdl_t *handle)
14067eea693dSMark Johnson {
14077eea693dSMark Johnson 	xpvtap_rs_t *rstruct;
14087eea693dSMark Johnson 	uint_t array_size;
14097eea693dSMark Johnson 	uint_t index;
14107eea693dSMark Johnson 
14117eea693dSMark Johnson 
14127eea693dSMark Johnson 	ASSERT(handle != NULL);
14137eea693dSMark Johnson 	ASSERT(min_val < max_val);
14147eea693dSMark Johnson 
14157eea693dSMark Johnson 	/* alloc space for resource structure */
14167eea693dSMark Johnson 	rstruct = kmem_alloc(sizeof (xpvtap_rs_t), KM_SLEEP);
14177eea693dSMark Johnson 
14187eea693dSMark Johnson 	/*
14197eea693dSMark Johnson 	 * Test to see if the max value is 64-bit aligned. If so, we don't need
14207eea693dSMark Johnson 	 * to allocate an extra 64-bit word. alloc space for free buffer
14217eea693dSMark Johnson 	 * (8 bytes per uint64_t).
14227eea693dSMark Johnson 	 */
14237eea693dSMark Johnson 	if ((max_val & 0x3F) == 0) {
14247eea693dSMark Johnson 		rstruct->rs_free_size = (max_val >> 6) * 8;
14257eea693dSMark Johnson 	} else {
14267eea693dSMark Johnson 		rstruct->rs_free_size = ((max_val >> 6) + 1) * 8;
14277eea693dSMark Johnson 	}
14287eea693dSMark Johnson 	rstruct->rs_free = kmem_alloc(rstruct->rs_free_size, KM_SLEEP);
14297eea693dSMark Johnson 
14307eea693dSMark Johnson 	/* Initialize resource structure */
14317eea693dSMark Johnson 	rstruct->rs_min = min_val;
14327eea693dSMark Johnson 	rstruct->rs_last = min_val;
14337eea693dSMark Johnson 	rstruct->rs_max = max_val;
14347eea693dSMark Johnson 	mutex_init(&rstruct->rs_mutex, NULL, MUTEX_DRIVER, NULL);
14357eea693dSMark Johnson 	rstruct->rs_flushing = B_FALSE;
14367eea693dSMark Johnson 
14377eea693dSMark Johnson 	/* Mark all resources as free */
14387eea693dSMark Johnson 	array_size = rstruct->rs_free_size >> 3;
14397eea693dSMark Johnson 	for (index = 0; index < array_size; index++) {
14407eea693dSMark Johnson 		rstruct->rs_free[index] = (uint64_t)0xFFFFFFFFFFFFFFFF;
14417eea693dSMark Johnson 	}
14427eea693dSMark Johnson 
14437eea693dSMark Johnson 	/* setup handle which is returned from this function */
14447eea693dSMark Johnson 	*handle = rstruct;
14457eea693dSMark Johnson }
14467eea693dSMark Johnson 
14477eea693dSMark Johnson 
14487eea693dSMark Johnson /*
14497eea693dSMark Johnson  * xpvtap_rs_fini()
14507eea693dSMark Johnson  *    Frees up the space allocated in init().  Notice that a pointer to the
14517eea693dSMark Johnson  *    handle is used for the parameter.  fini() will set the handle to NULL
14527eea693dSMark Johnson  *    before returning.
14537eea693dSMark Johnson  */
14547eea693dSMark Johnson static void
xpvtap_rs_fini(xpvtap_rs_hdl_t * handle)14557eea693dSMark Johnson xpvtap_rs_fini(xpvtap_rs_hdl_t *handle)
14567eea693dSMark Johnson {
14577eea693dSMark Johnson 	xpvtap_rs_t *rstruct;
14587eea693dSMark Johnson 
14597eea693dSMark Johnson 
14607eea693dSMark Johnson 	ASSERT(handle != NULL);
14617eea693dSMark Johnson 
14627eea693dSMark Johnson 	rstruct = (xpvtap_rs_t *)*handle;
14637eea693dSMark Johnson 
14647eea693dSMark Johnson 	mutex_destroy(&rstruct->rs_mutex);
14657eea693dSMark Johnson 	kmem_free(rstruct->rs_free, rstruct->rs_free_size);
14667eea693dSMark Johnson 	kmem_free(rstruct, sizeof (xpvtap_rs_t));
14677eea693dSMark Johnson 
14687eea693dSMark Johnson 	/* set handle to null.  This helps catch bugs. */
14697eea693dSMark Johnson 	*handle = NULL;
14707eea693dSMark Johnson }
14717eea693dSMark Johnson 
14727eea693dSMark Johnson 
14737eea693dSMark Johnson /*
14747eea693dSMark Johnson  * xpvtap_rs_alloc()
14757eea693dSMark Johnson  *    alloc a resource. If alloc fails, we are out of resources.
14767eea693dSMark Johnson  */
14777eea693dSMark Johnson static int
xpvtap_rs_alloc(xpvtap_rs_hdl_t handle,uint_t * resource)14787eea693dSMark Johnson xpvtap_rs_alloc(xpvtap_rs_hdl_t handle, uint_t *resource)
14797eea693dSMark Johnson {
14807eea693dSMark Johnson 	xpvtap_rs_t *rstruct;
14817eea693dSMark Johnson 	uint_t array_idx;
14827eea693dSMark Johnson 	uint64_t free;
14837eea693dSMark Johnson 	uint_t index;
14847eea693dSMark Johnson 	uint_t last;
14857eea693dSMark Johnson 	uint_t min;
14867eea693dSMark Johnson 	uint_t max;
14877eea693dSMark Johnson 
14887eea693dSMark Johnson 
14897eea693dSMark Johnson 	ASSERT(handle != NULL);
14907eea693dSMark Johnson 	ASSERT(resource != NULL);
14917eea693dSMark Johnson 
14927eea693dSMark Johnson 	rstruct = (xpvtap_rs_t *)handle;
14937eea693dSMark Johnson 
14947eea693dSMark Johnson 	mutex_enter(&rstruct->rs_mutex);
14957eea693dSMark Johnson 	min = rstruct->rs_min;
14967eea693dSMark Johnson 	max = rstruct->rs_max;
14977eea693dSMark Johnson 
14987eea693dSMark Johnson 	/*
14997eea693dSMark Johnson 	 * Find a free resource. This will return out of the loop once it finds
15007eea693dSMark Johnson 	 * a free resource. There are a total of 'max'-'min'+1 resources.
15017eea693dSMark Johnson 	 * Performs a round robin allocation.
15027eea693dSMark Johnson 	 */
15037eea693dSMark Johnson 	for (index = min; index <= max; index++) {
15047eea693dSMark Johnson 
15057eea693dSMark Johnson 		array_idx = rstruct->rs_last >> 6;
15067eea693dSMark Johnson 		free = rstruct->rs_free[array_idx];
15077eea693dSMark Johnson 		last = rstruct->rs_last & 0x3F;
15087eea693dSMark Johnson 
15097eea693dSMark Johnson 		/* if the next resource to check is free */
15107eea693dSMark Johnson 		if ((free & ((uint64_t)1 << last)) != 0) {
15117eea693dSMark Johnson 			/* we are using this resource */
15127eea693dSMark Johnson 			*resource = rstruct->rs_last;
15137eea693dSMark Johnson 
15147eea693dSMark Johnson 			/* take it out of the free list */
15157eea693dSMark Johnson 			rstruct->rs_free[array_idx] &= ~((uint64_t)1 << last);
15167eea693dSMark Johnson 
15177eea693dSMark Johnson 			/*
15187eea693dSMark Johnson 			 * increment the last count so we start checking the
15197eea693dSMark Johnson 			 * next resource on the next alloc().  Note the rollover
15207eea693dSMark Johnson 			 * at 'max'+1.
15217eea693dSMark Johnson 			 */
15227eea693dSMark Johnson 			rstruct->rs_last++;
15237eea693dSMark Johnson 			if (rstruct->rs_last > max) {
15247eea693dSMark Johnson 				rstruct->rs_last = rstruct->rs_min;
15257eea693dSMark Johnson 			}
15267eea693dSMark Johnson 
15277eea693dSMark Johnson 			/* unlock the resource structure */
15287eea693dSMark Johnson 			mutex_exit(&rstruct->rs_mutex);
15297eea693dSMark Johnson 
15307eea693dSMark Johnson 			return (DDI_SUCCESS);
15317eea693dSMark Johnson 		}
15327eea693dSMark Johnson 
15337eea693dSMark Johnson 		/*
15347eea693dSMark Johnson 		 * This resource is not free, lets go to the next one. Note the
15357eea693dSMark Johnson 		 * rollover at 'max'.
15367eea693dSMark Johnson 		 */
15377eea693dSMark Johnson 		rstruct->rs_last++;
15387eea693dSMark Johnson 		if (rstruct->rs_last > max) {
15397eea693dSMark Johnson 			rstruct->rs_last = rstruct->rs_min;
15407eea693dSMark Johnson 		}
15417eea693dSMark Johnson 	}
15427eea693dSMark Johnson 
15437eea693dSMark Johnson 	mutex_exit(&rstruct->rs_mutex);
15447eea693dSMark Johnson 
15457eea693dSMark Johnson 	return (DDI_FAILURE);
15467eea693dSMark Johnson }
15477eea693dSMark Johnson 
15487eea693dSMark Johnson 
15497eea693dSMark Johnson /*
15507eea693dSMark Johnson  * xpvtap_rs_free()
15517eea693dSMark Johnson  *    Free the previously alloc'd resource.  Once a resource has been free'd,
15527eea693dSMark Johnson  *    it can be used again when alloc is called.
15537eea693dSMark Johnson  */
15547eea693dSMark Johnson static void
xpvtap_rs_free(xpvtap_rs_hdl_t handle,uint_t resource)15557eea693dSMark Johnson xpvtap_rs_free(xpvtap_rs_hdl_t handle, uint_t resource)
15567eea693dSMark Johnson {
15577eea693dSMark Johnson 	xpvtap_rs_t *rstruct;
15587eea693dSMark Johnson 	uint_t array_idx;
15597eea693dSMark Johnson 	uint_t offset;
15607eea693dSMark Johnson 
15617eea693dSMark Johnson 
15627eea693dSMark Johnson 	ASSERT(handle != NULL);
15637eea693dSMark Johnson 
15647eea693dSMark Johnson 	rstruct = (xpvtap_rs_t *)handle;
15657eea693dSMark Johnson 	ASSERT(resource >= rstruct->rs_min);
15667eea693dSMark Johnson 	ASSERT(resource <= rstruct->rs_max);
15677eea693dSMark Johnson 
15687eea693dSMark Johnson 	if (!rstruct->rs_flushing) {
15697eea693dSMark Johnson 		mutex_enter(&rstruct->rs_mutex);
15707eea693dSMark Johnson 	}
15717eea693dSMark Johnson 
15727eea693dSMark Johnson 	/* Put the resource back in the free list */
15737eea693dSMark Johnson 	array_idx = resource >> 6;
15747eea693dSMark Johnson 	offset = resource & 0x3F;
15757eea693dSMark Johnson 	rstruct->rs_free[array_idx] |= ((uint64_t)1 << offset);
15767eea693dSMark Johnson 
15777eea693dSMark Johnson 	if (!rstruct->rs_flushing) {
15787eea693dSMark Johnson 		mutex_exit(&rstruct->rs_mutex);
15797eea693dSMark Johnson 	}
15807eea693dSMark Johnson }
15817eea693dSMark Johnson 
15827eea693dSMark Johnson 
15837eea693dSMark Johnson /*
15847eea693dSMark Johnson  * xpvtap_rs_flush()
15857eea693dSMark Johnson  */
15867eea693dSMark Johnson static void
xpvtap_rs_flush(xpvtap_rs_hdl_t handle,xpvtap_rs_cleanup_t callback,void * arg)15877eea693dSMark Johnson xpvtap_rs_flush(xpvtap_rs_hdl_t handle, xpvtap_rs_cleanup_t callback,
15887eea693dSMark Johnson     void *arg)
15897eea693dSMark Johnson {
15907eea693dSMark Johnson 	xpvtap_rs_t *rstruct;
15917eea693dSMark Johnson 	uint_t array_idx;
15927eea693dSMark Johnson 	uint64_t free;
15937eea693dSMark Johnson 	uint_t index;
15947eea693dSMark Johnson 	uint_t last;
15957eea693dSMark Johnson 	uint_t min;
15967eea693dSMark Johnson 	uint_t max;
15977eea693dSMark Johnson 
15987eea693dSMark Johnson 
15997eea693dSMark Johnson 	ASSERT(handle != NULL);
16007eea693dSMark Johnson 
16017eea693dSMark Johnson 	rstruct = (xpvtap_rs_t *)handle;
16027eea693dSMark Johnson 
16037eea693dSMark Johnson 	mutex_enter(&rstruct->rs_mutex);
16047eea693dSMark Johnson 	min = rstruct->rs_min;
16057eea693dSMark Johnson 	max = rstruct->rs_max;
16067eea693dSMark Johnson 
16077eea693dSMark Johnson 	rstruct->rs_flushing = B_TRUE;
16087eea693dSMark Johnson 
16097eea693dSMark Johnson 	/*
16107eea693dSMark Johnson 	 * for all resources not free, call the callback routine to clean it
16117eea693dSMark Johnson 	 * up.
16127eea693dSMark Johnson 	 */
16137eea693dSMark Johnson 	for (index = min; index <= max; index++) {
16147eea693dSMark Johnson 
16157eea693dSMark Johnson 		array_idx = rstruct->rs_last >> 6;
16167eea693dSMark Johnson 		free = rstruct->rs_free[array_idx];
16177eea693dSMark Johnson 		last = rstruct->rs_last & 0x3F;
16187eea693dSMark Johnson 
16197eea693dSMark Johnson 		/* if the next resource to check is not free */
16207eea693dSMark Johnson 		if ((free & ((uint64_t)1 << last)) == 0) {
16217eea693dSMark Johnson 			/* call the callback to cleanup */
16227eea693dSMark Johnson 			(*callback)(arg, rstruct->rs_last);
16237eea693dSMark Johnson 
16247eea693dSMark Johnson 			/* put it back in the free list */
16257eea693dSMark Johnson 			rstruct->rs_free[array_idx] |= ((uint64_t)1 << last);
16267eea693dSMark Johnson 		}
16277eea693dSMark Johnson 
16287eea693dSMark Johnson 		/* go to the next one. Note the rollover at 'max' */
16297eea693dSMark Johnson 		rstruct->rs_last++;
16307eea693dSMark Johnson 		if (rstruct->rs_last > max) {
16317eea693dSMark Johnson 			rstruct->rs_last = rstruct->rs_min;
16327eea693dSMark Johnson 		}
16337eea693dSMark Johnson 	}
16347eea693dSMark Johnson 
16357eea693dSMark Johnson 	mutex_exit(&rstruct->rs_mutex);
16367eea693dSMark Johnson }
1637