xref: /onnv-gate/usr/src/uts/common/xen/io/xnb.c (revision 11878:ac93462db6d7)
15084Sjohnlev /*
25084Sjohnlev  * CDDL HEADER START
35084Sjohnlev  *
45084Sjohnlev  * The contents of this file are subject to the terms of the
55084Sjohnlev  * Common Development and Distribution License (the "License").
65084Sjohnlev  * You may not use this file except in compliance with the License.
75084Sjohnlev  *
85084Sjohnlev  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
95084Sjohnlev  * or http://www.opensolaris.org/os/licensing.
105084Sjohnlev  * See the License for the specific language governing permissions
115084Sjohnlev  * and limitations under the License.
125084Sjohnlev  *
135084Sjohnlev  * When distributing Covered Code, include this CDDL HEADER in each
145084Sjohnlev  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
155084Sjohnlev  * If applicable, add the following below this CDDL HEADER, with the
165084Sjohnlev  * fields enclosed by brackets "[]" replaced with your own identifying
175084Sjohnlev  * information: Portions Copyright [yyyy] [name of copyright owner]
185084Sjohnlev  *
195084Sjohnlev  * CDDL HEADER END
205084Sjohnlev  */
215084Sjohnlev 
225084Sjohnlev /*
2311588Sdavid.edmondson@sun.com  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
245084Sjohnlev  * Use is subject to license terms.
255084Sjohnlev  */
265084Sjohnlev 
275084Sjohnlev #ifdef DEBUG
285084Sjohnlev #define	XNB_DEBUG 1
295084Sjohnlev #endif /* DEBUG */
305084Sjohnlev 
315084Sjohnlev #include "xnb.h"
325084Sjohnlev 
335084Sjohnlev #include <sys/sunddi.h>
345084Sjohnlev #include <sys/sunndi.h>
355084Sjohnlev #include <sys/modctl.h>
365084Sjohnlev #include <sys/conf.h>
375084Sjohnlev #include <sys/mac.h>
3810958Sdme@sun.com #include <sys/mac_impl.h> /* For mac_fix_cksum(). */
395084Sjohnlev #include <sys/dlpi.h>
405084Sjohnlev #include <sys/strsubr.h>
415084Sjohnlev #include <sys/strsun.h>
425741Smrj #include <sys/types.h>
435084Sjohnlev #include <sys/pattr.h>
445084Sjohnlev #include <vm/seg_kmem.h>
455084Sjohnlev #include <vm/hat_i86.h>
465084Sjohnlev #include <xen/sys/xenbus_impl.h>
475084Sjohnlev #include <xen/sys/xendev.h>
485084Sjohnlev #include <sys/balloon_impl.h>
495084Sjohnlev #include <sys/evtchn_impl.h>
505084Sjohnlev #include <sys/gnttab.h>
515262Srscott #include <vm/vm_dep.h>
5210958Sdme@sun.com #include <sys/note.h>
535084Sjohnlev #include <sys/gld.h>
545084Sjohnlev #include <inet/ip.h>
555084Sjohnlev #include <inet/ip_impl.h>
565084Sjohnlev 
575084Sjohnlev /*
587615SMax.Zhen@Sun.COM  * The terms "transmit" and "receive" are used in alignment with domU,
597615SMax.Zhen@Sun.COM  * which means that packets originating from the peer domU are "transmitted"
607615SMax.Zhen@Sun.COM  * to other parts of the system and packets are "received" from them.
615084Sjohnlev  */
625084Sjohnlev 
635084Sjohnlev /*
6410958Sdme@sun.com  * Should we allow guests to manipulate multicast group membership?
655084Sjohnlev  */
6610958Sdme@sun.com static boolean_t	xnb_multicast_control = B_TRUE;
675084Sjohnlev 
685084Sjohnlev static boolean_t	xnb_connect_rings(dev_info_t *);
695084Sjohnlev static void		xnb_disconnect_rings(dev_info_t *);
705084Sjohnlev static void		xnb_oe_state_change(dev_info_t *, ddi_eventcookie_t,
715084Sjohnlev     void *, void *);
725084Sjohnlev static void		xnb_hp_state_change(dev_info_t *, ddi_eventcookie_t,
735084Sjohnlev     void *, void *);
745084Sjohnlev 
757615SMax.Zhen@Sun.COM static int	xnb_txbuf_constructor(void *, void *, int);
767615SMax.Zhen@Sun.COM static void	xnb_txbuf_destructor(void *, void *);
7710958Sdme@sun.com static void	xnb_tx_notify_peer(xnb_t *, boolean_t);
787615SMax.Zhen@Sun.COM static void	xnb_tx_mark_complete(xnb_t *, RING_IDX, int16_t);
7910958Sdme@sun.com 
8010958Sdme@sun.com mblk_t		*xnb_to_peer(xnb_t *, mblk_t *);
815741Smrj mblk_t		*xnb_copy_to_peer(xnb_t *, mblk_t *);
825741Smrj 
8310958Sdme@sun.com static void		setup_gop(xnb_t *, gnttab_copy_t *, uchar_t *,
8410958Sdme@sun.com     size_t, size_t, size_t, grant_ref_t);
8510958Sdme@sun.com #pragma inline(setup_gop)
8610958Sdme@sun.com static boolean_t	is_foreign(void *);
8710958Sdme@sun.com #pragma inline(is_foreign)
885741Smrj 
895084Sjohnlev #define	INVALID_GRANT_HANDLE	((grant_handle_t)-1)
905084Sjohnlev #define	INVALID_GRANT_REF	((grant_ref_t)-1)
915084Sjohnlev 
925084Sjohnlev static kmutex_t	xnb_alloc_page_lock;
935084Sjohnlev 
945084Sjohnlev /*
9510958Sdme@sun.com  * On a 32 bit PAE system physical and machine addresses are larger
9610958Sdme@sun.com  * than 32 bits.  ddi_btop() on such systems take an unsigned long
9710958Sdme@sun.com  * argument, and so addresses above 4G are truncated before ddi_btop()
9810958Sdme@sun.com  * gets to see them.  To avoid this, code the shift operation here.
9910958Sdme@sun.com  */
10010958Sdme@sun.com #define	xnb_btop(addr)	((addr) >> PAGESHIFT)
10110958Sdme@sun.com 
10210958Sdme@sun.com /* DMA attributes for transmit and receive data */
10310958Sdme@sun.com static ddi_dma_attr_t buf_dma_attr = {
10410958Sdme@sun.com 	DMA_ATTR_V0,		/* version of this structure */
10510958Sdme@sun.com 	0,			/* lowest usable address */
10610958Sdme@sun.com 	0xffffffffffffffffULL,	/* highest usable address */
10710958Sdme@sun.com 	0x7fffffff,		/* maximum DMAable byte count */
10810958Sdme@sun.com 	MMU_PAGESIZE,		/* alignment in bytes */
10910958Sdme@sun.com 	0x7ff,			/* bitmap of burst sizes */
11010958Sdme@sun.com 	1,			/* minimum transfer */
11110958Sdme@sun.com 	0xffffffffU,		/* maximum transfer */
11210958Sdme@sun.com 	0xffffffffffffffffULL,	/* maximum segment length */
11310958Sdme@sun.com 	1,			/* maximum number of segments */
11410958Sdme@sun.com 	1,			/* granularity */
11510958Sdme@sun.com 	0,			/* flags (reserved) */
11610958Sdme@sun.com };
11710958Sdme@sun.com 
11810958Sdme@sun.com /* DMA access attributes for data: NOT to be byte swapped. */
11910958Sdme@sun.com static ddi_device_acc_attr_t data_accattr = {
12010958Sdme@sun.com 	DDI_DEVICE_ATTR_V0,
12110958Sdme@sun.com 	DDI_NEVERSWAP_ACC,
12210958Sdme@sun.com 	DDI_STRICTORDER_ACC
12310958Sdme@sun.com };
12410958Sdme@sun.com 
12510958Sdme@sun.com /*
1265084Sjohnlev  * Statistics.
1275084Sjohnlev  */
12811588Sdavid.edmondson@sun.com static const char * const aux_statistics[] = {
1297615SMax.Zhen@Sun.COM 	"rx_cksum_deferred",
1307615SMax.Zhen@Sun.COM 	"tx_cksum_no_need",
1317615SMax.Zhen@Sun.COM 	"rx_rsp_notok",
1325084Sjohnlev 	"tx_notify_deferred",
1335084Sjohnlev 	"tx_notify_sent",
1345084Sjohnlev 	"rx_notify_deferred",
1355084Sjohnlev 	"rx_notify_sent",
1365084Sjohnlev 	"tx_too_early",
1375084Sjohnlev 	"rx_too_early",
1385084Sjohnlev 	"rx_allocb_failed",
1395741Smrj 	"tx_allocb_failed",
1407615SMax.Zhen@Sun.COM 	"rx_foreign_page",
1415084Sjohnlev 	"mac_full",
1425084Sjohnlev 	"spurious_intr",
1435084Sjohnlev 	"allocation_success",
1445084Sjohnlev 	"allocation_failure",
1455084Sjohnlev 	"small_allocation_success",
1465084Sjohnlev 	"small_allocation_failure",
1475741Smrj 	"other_allocation_failure",
1487615SMax.Zhen@Sun.COM 	"rx_pageboundary_crossed",
1497615SMax.Zhen@Sun.COM 	"rx_cpoparea_grown",
1505084Sjohnlev 	"csum_hardware",
1515084Sjohnlev 	"csum_software",
15211588Sdavid.edmondson@sun.com 	"tx_overflow_page",
15311588Sdavid.edmondson@sun.com 	"tx_unexpected_flags",
1545084Sjohnlev };
1555084Sjohnlev 
1565084Sjohnlev static int
xnb_ks_aux_update(kstat_t * ksp,int flag)1575084Sjohnlev xnb_ks_aux_update(kstat_t *ksp, int flag)
1585084Sjohnlev {
1595084Sjohnlev 	xnb_t *xnbp;
1605084Sjohnlev 	kstat_named_t *knp;
1615084Sjohnlev 
1625084Sjohnlev 	if (flag != KSTAT_READ)
1635084Sjohnlev 		return (EACCES);
1645084Sjohnlev 
1655084Sjohnlev 	xnbp = ksp->ks_private;
1665084Sjohnlev 	knp = ksp->ks_data;
1675084Sjohnlev 
1685084Sjohnlev 	/*
1695084Sjohnlev 	 * Assignment order should match that of the names in
1705084Sjohnlev 	 * aux_statistics.
1715084Sjohnlev 	 */
1727615SMax.Zhen@Sun.COM 	(knp++)->value.ui64 = xnbp->xnb_stat_rx_cksum_deferred;
1737615SMax.Zhen@Sun.COM 	(knp++)->value.ui64 = xnbp->xnb_stat_tx_cksum_no_need;
1747615SMax.Zhen@Sun.COM 	(knp++)->value.ui64 = xnbp->xnb_stat_rx_rsp_notok;
1755741Smrj 	(knp++)->value.ui64 = xnbp->xnb_stat_tx_notify_deferred;
1765741Smrj 	(knp++)->value.ui64 = xnbp->xnb_stat_tx_notify_sent;
1775741Smrj 	(knp++)->value.ui64 = xnbp->xnb_stat_rx_notify_deferred;
1785741Smrj 	(knp++)->value.ui64 = xnbp->xnb_stat_rx_notify_sent;
1795741Smrj 	(knp++)->value.ui64 = xnbp->xnb_stat_tx_too_early;
1805741Smrj 	(knp++)->value.ui64 = xnbp->xnb_stat_rx_too_early;
1815741Smrj 	(knp++)->value.ui64 = xnbp->xnb_stat_rx_allocb_failed;
1825741Smrj 	(knp++)->value.ui64 = xnbp->xnb_stat_tx_allocb_failed;
1837615SMax.Zhen@Sun.COM 	(knp++)->value.ui64 = xnbp->xnb_stat_rx_foreign_page;
1845741Smrj 	(knp++)->value.ui64 = xnbp->xnb_stat_mac_full;
1855741Smrj 	(knp++)->value.ui64 = xnbp->xnb_stat_spurious_intr;
1865741Smrj 	(knp++)->value.ui64 = xnbp->xnb_stat_allocation_success;
1875741Smrj 	(knp++)->value.ui64 = xnbp->xnb_stat_allocation_failure;
1885741Smrj 	(knp++)->value.ui64 = xnbp->xnb_stat_small_allocation_success;
1895741Smrj 	(knp++)->value.ui64 = xnbp->xnb_stat_small_allocation_failure;
1905741Smrj 	(knp++)->value.ui64 = xnbp->xnb_stat_other_allocation_failure;
1917615SMax.Zhen@Sun.COM 	(knp++)->value.ui64 = xnbp->xnb_stat_rx_pagebndry_crossed;
1927615SMax.Zhen@Sun.COM 	(knp++)->value.ui64 = xnbp->xnb_stat_rx_cpoparea_grown;
1935741Smrj 	(knp++)->value.ui64 = xnbp->xnb_stat_csum_hardware;
1945741Smrj 	(knp++)->value.ui64 = xnbp->xnb_stat_csum_software;
19511588Sdavid.edmondson@sun.com 	(knp++)->value.ui64 = xnbp->xnb_stat_tx_overflow_page;
19611588Sdavid.edmondson@sun.com 	(knp++)->value.ui64 = xnbp->xnb_stat_tx_unexpected_flags;
1975084Sjohnlev 
1985084Sjohnlev 	return (0);
1995084Sjohnlev }
2005084Sjohnlev 
2015084Sjohnlev static boolean_t
xnb_ks_init(xnb_t * xnbp)2025084Sjohnlev xnb_ks_init(xnb_t *xnbp)
2035084Sjohnlev {
2045084Sjohnlev 	int nstat = sizeof (aux_statistics) /
2055084Sjohnlev 	    sizeof (aux_statistics[0]);
20611588Sdavid.edmondson@sun.com 	const char * const *cp = aux_statistics;
2075084Sjohnlev 	kstat_named_t *knp;
2085084Sjohnlev 
2095084Sjohnlev 	/*
2105084Sjohnlev 	 * Create and initialise kstats.
2115084Sjohnlev 	 */
2125741Smrj 	xnbp->xnb_kstat_aux = kstat_create(ddi_driver_name(xnbp->xnb_devinfo),
2135741Smrj 	    ddi_get_instance(xnbp->xnb_devinfo), "aux_statistics", "net",
2145084Sjohnlev 	    KSTAT_TYPE_NAMED, nstat, 0);
2155741Smrj 	if (xnbp->xnb_kstat_aux == NULL)
2165084Sjohnlev 		return (B_FALSE);
2175084Sjohnlev 
2185741Smrj 	xnbp->xnb_kstat_aux->ks_private = xnbp;
2195741Smrj 	xnbp->xnb_kstat_aux->ks_update = xnb_ks_aux_update;
2205084Sjohnlev 
2215741Smrj 	knp = xnbp->xnb_kstat_aux->ks_data;
2225084Sjohnlev 	while (nstat > 0) {
2235084Sjohnlev 		kstat_named_init(knp, *cp, KSTAT_DATA_UINT64);
2245084Sjohnlev 
2255084Sjohnlev 		knp++;
2265084Sjohnlev 		cp++;
2275084Sjohnlev 		nstat--;
2285084Sjohnlev 	}
2295084Sjohnlev 
2305741Smrj 	kstat_install(xnbp->xnb_kstat_aux);
2315084Sjohnlev 
2325084Sjohnlev 	return (B_TRUE);
2335084Sjohnlev }
2345084Sjohnlev 
2355084Sjohnlev static void
xnb_ks_free(xnb_t * xnbp)2365084Sjohnlev xnb_ks_free(xnb_t *xnbp)
2375084Sjohnlev {
2385741Smrj 	kstat_delete(xnbp->xnb_kstat_aux);
2395084Sjohnlev }
2405084Sjohnlev 
2415084Sjohnlev /*
24210958Sdme@sun.com  * Calculate and insert the transport checksum for an arbitrary packet.
2435084Sjohnlev  */
2445084Sjohnlev static mblk_t *
xnb_software_csum(xnb_t * xnbp,mblk_t * mp)2455084Sjohnlev xnb_software_csum(xnb_t *xnbp, mblk_t *mp)
2465084Sjohnlev {
24710958Sdme@sun.com 	_NOTE(ARGUNUSED(xnbp));
24810958Sdme@sun.com 
2495084Sjohnlev 	/*
25010958Sdme@sun.com 	 * XXPV dme: shouldn't rely on mac_fix_cksum(), not least
2515084Sjohnlev 	 * because it doesn't cover all of the interesting cases :-(
2525084Sjohnlev 	 */
253*11878SVenu.Iyer@Sun.COM 	mac_hcksum_set(mp, 0, 0, 0, 0, HCK_FULLCKSUM);
2545084Sjohnlev 
2558275SEric Cheng 	return (mac_fix_cksum(mp));
2565084Sjohnlev }
2575084Sjohnlev 
2585084Sjohnlev mblk_t *
xnb_process_cksum_flags(xnb_t * xnbp,mblk_t * mp,uint32_t capab)2595084Sjohnlev xnb_process_cksum_flags(xnb_t *xnbp, mblk_t *mp, uint32_t capab)
2605084Sjohnlev {
2615084Sjohnlev 	struct ether_header *ehp;
2625084Sjohnlev 	uint16_t sap;
2635084Sjohnlev 	uint32_t offset;
2645084Sjohnlev 	ipha_t *ipha;
2655084Sjohnlev 
2665084Sjohnlev 	ASSERT(mp->b_next == NULL);
2675084Sjohnlev 
2685084Sjohnlev 	/*
2695084Sjohnlev 	 * Check that the packet is contained in a single mblk.  In
27010958Sdme@sun.com 	 * the "from peer" path this is true today, but may change
2715084Sjohnlev 	 * when scatter gather support is added.  In the "to peer"
2725084Sjohnlev 	 * path we cannot be sure, but in most cases it will be true
2735084Sjohnlev 	 * (in the xnbo case the packet has come from a MAC device
2745084Sjohnlev 	 * which is unlikely to split packets).
2755084Sjohnlev 	 */
2765084Sjohnlev 	if (mp->b_cont != NULL)
2775084Sjohnlev 		goto software;
2785084Sjohnlev 
2795084Sjohnlev 	/*
2805084Sjohnlev 	 * If the MAC has no hardware capability don't do any further
2815084Sjohnlev 	 * checking.
2825084Sjohnlev 	 */
2835084Sjohnlev 	if (capab == 0)
2845084Sjohnlev 		goto software;
2855084Sjohnlev 
2865084Sjohnlev 	ASSERT(MBLKL(mp) >= sizeof (struct ether_header));
2875084Sjohnlev 	ehp = (struct ether_header *)mp->b_rptr;
2885084Sjohnlev 
2895084Sjohnlev 	if (ntohs(ehp->ether_type) == VLAN_TPID) {
2905084Sjohnlev 		struct ether_vlan_header *evhp;
2915084Sjohnlev 
2925084Sjohnlev 		ASSERT(MBLKL(mp) >= sizeof (struct ether_vlan_header));
2935084Sjohnlev 		evhp = (struct ether_vlan_header *)mp->b_rptr;
2945084Sjohnlev 		sap = ntohs(evhp->ether_type);
2955084Sjohnlev 		offset = sizeof (struct ether_vlan_header);
2965084Sjohnlev 	} else {
2975084Sjohnlev 		sap = ntohs(ehp->ether_type);
2985084Sjohnlev 		offset = sizeof (struct ether_header);
2995084Sjohnlev 	}
3005084Sjohnlev 
3015084Sjohnlev 	/*
3025084Sjohnlev 	 * We only attempt to do IPv4 packets in hardware.
3035084Sjohnlev 	 */
3045084Sjohnlev 	if (sap != ETHERTYPE_IP)
3055084Sjohnlev 		goto software;
3065084Sjohnlev 
3075084Sjohnlev 	/*
3085084Sjohnlev 	 * We know that this is an IPv4 packet.
3095084Sjohnlev 	 */
3105084Sjohnlev 	ipha = (ipha_t *)(mp->b_rptr + offset);
3115084Sjohnlev 
3125084Sjohnlev 	switch (ipha->ipha_protocol) {
3135084Sjohnlev 	case IPPROTO_TCP:
3147351Sdme@sun.com 	case IPPROTO_UDP: {
3157351Sdme@sun.com 		uint32_t start, length, stuff, cksum;
3167351Sdme@sun.com 		uint16_t *stuffp;
3177351Sdme@sun.com 
3185084Sjohnlev 		/*
3197351Sdme@sun.com 		 * This is a TCP/IPv4 or UDP/IPv4 packet, for which we
3207351Sdme@sun.com 		 * can use full IPv4 and partial checksum offload.
3215084Sjohnlev 		 */
3227351Sdme@sun.com 		if ((capab & (HCKSUM_INET_FULL_V4|HCKSUM_INET_PARTIAL)) == 0)
3237351Sdme@sun.com 			break;
3247351Sdme@sun.com 
3257351Sdme@sun.com 		start = IP_SIMPLE_HDR_LENGTH;
3267351Sdme@sun.com 		length = ntohs(ipha->ipha_length);
3277351Sdme@sun.com 		if (ipha->ipha_protocol == IPPROTO_TCP) {
3287351Sdme@sun.com 			stuff = start + TCP_CHECKSUM_OFFSET;
3297351Sdme@sun.com 			cksum = IP_TCP_CSUM_COMP;
3307351Sdme@sun.com 		} else {
3317351Sdme@sun.com 			stuff = start + UDP_CHECKSUM_OFFSET;
3327351Sdme@sun.com 			cksum = IP_UDP_CSUM_COMP;
3337351Sdme@sun.com 		}
3347351Sdme@sun.com 		stuffp = (uint16_t *)(mp->b_rptr + offset + stuff);
3357351Sdme@sun.com 
3367351Sdme@sun.com 		if (capab & HCKSUM_INET_FULL_V4) {
3377351Sdme@sun.com 			/*
3387351Sdme@sun.com 			 * Some devices require that the checksum
3397351Sdme@sun.com 			 * field of the packet is zero for full
3407351Sdme@sun.com 			 * offload.
3417351Sdme@sun.com 			 */
3427351Sdme@sun.com 			*stuffp = 0;
3437351Sdme@sun.com 
344*11878SVenu.Iyer@Sun.COM 			mac_hcksum_set(mp, 0, 0, 0, 0, HCK_FULLCKSUM);
3455084Sjohnlev 
3465741Smrj 			xnbp->xnb_stat_csum_hardware++;
3475084Sjohnlev 
3485084Sjohnlev 			return (mp);
3495084Sjohnlev 		}
3505084Sjohnlev 
3517351Sdme@sun.com 		if (capab & HCKSUM_INET_PARTIAL) {
3527351Sdme@sun.com 			if (*stuffp == 0) {
3537351Sdme@sun.com 				ipaddr_t src, dst;
3547351Sdme@sun.com 
3557351Sdme@sun.com 				/*
3567351Sdme@sun.com 				 * Older Solaris guests don't insert
3577351Sdme@sun.com 				 * the pseudo-header checksum, so we
3587351Sdme@sun.com 				 * calculate it here.
3597351Sdme@sun.com 				 */
3607351Sdme@sun.com 				src = ipha->ipha_src;
3617351Sdme@sun.com 				dst = ipha->ipha_dst;
3627351Sdme@sun.com 
3637351Sdme@sun.com 				cksum += (dst >> 16) + (dst & 0xFFFF);
3647351Sdme@sun.com 				cksum += (src >> 16) + (src & 0xFFFF);
3657351Sdme@sun.com 				cksum += length - IP_SIMPLE_HDR_LENGTH;
3665084Sjohnlev 
3677351Sdme@sun.com 				cksum = (cksum >> 16) + (cksum & 0xFFFF);
3687351Sdme@sun.com 				cksum = (cksum >> 16) + (cksum & 0xFFFF);
3697351Sdme@sun.com 
3707351Sdme@sun.com 				ASSERT(cksum <= 0xFFFF);
3717351Sdme@sun.com 
3727351Sdme@sun.com 				*stuffp = (uint16_t)(cksum ? cksum : ~cksum);
3737351Sdme@sun.com 			}
3747351Sdme@sun.com 
375*11878SVenu.Iyer@Sun.COM 			mac_hcksum_set(mp, start, stuff, length, 0,
376*11878SVenu.Iyer@Sun.COM 			    HCK_PARTIALCKSUM);
3777351Sdme@sun.com 
3787351Sdme@sun.com 			xnbp->xnb_stat_csum_hardware++;
3797351Sdme@sun.com 
3807351Sdme@sun.com 			return (mp);
3817351Sdme@sun.com 		}
3827351Sdme@sun.com 
3837351Sdme@sun.com 		/* NOTREACHED */
3845084Sjohnlev 		break;
3857351Sdme@sun.com 	}
3865084Sjohnlev 
3875084Sjohnlev 	default:
3885084Sjohnlev 		/* Use software. */
3895084Sjohnlev 		break;
3905084Sjohnlev 	}
3915084Sjohnlev 
3925084Sjohnlev software:
3935084Sjohnlev 	/*
3945084Sjohnlev 	 * We are not able to use any offload so do the whole thing in
3955084Sjohnlev 	 * software.
3965084Sjohnlev 	 */
3975741Smrj 	xnbp->xnb_stat_csum_software++;
3985084Sjohnlev 
3995084Sjohnlev 	return (xnb_software_csum(xnbp, mp));
4005084Sjohnlev }
4015084Sjohnlev 
4025084Sjohnlev int
xnb_attach(dev_info_t * dip,xnb_flavour_t * flavour,void * flavour_data)4035084Sjohnlev xnb_attach(dev_info_t *dip, xnb_flavour_t *flavour, void *flavour_data)
4045084Sjohnlev {
4055084Sjohnlev 	xnb_t *xnbp;
40610958Sdme@sun.com 	char *xsname;
40710958Sdme@sun.com 	char cachename[32];
4085084Sjohnlev 
4095084Sjohnlev 	xnbp = kmem_zalloc(sizeof (*xnbp), KM_SLEEP);
4105084Sjohnlev 
4115741Smrj 	xnbp->xnb_flavour = flavour;
4125741Smrj 	xnbp->xnb_flavour_data = flavour_data;
4135741Smrj 	xnbp->xnb_devinfo = dip;
4145741Smrj 	xnbp->xnb_evtchn = INVALID_EVTCHN;
4155741Smrj 	xnbp->xnb_irq = B_FALSE;
4165741Smrj 	xnbp->xnb_tx_ring_handle = INVALID_GRANT_HANDLE;
4175741Smrj 	xnbp->xnb_rx_ring_handle = INVALID_GRANT_HANDLE;
4185741Smrj 	xnbp->xnb_connected = B_FALSE;
4195741Smrj 	xnbp->xnb_hotplugged = B_FALSE;
4205741Smrj 	xnbp->xnb_detachable = B_FALSE;
4215741Smrj 	xnbp->xnb_peer = xvdi_get_oeid(dip);
42210958Sdme@sun.com 	xnbp->xnb_be_status = XNB_STATE_INIT;
42310958Sdme@sun.com 	xnbp->xnb_fe_status = XNB_STATE_INIT;
4245084Sjohnlev 
4257615SMax.Zhen@Sun.COM 	xnbp->xnb_tx_buf_count = 0;
4265741Smrj 
42710958Sdme@sun.com 	xnbp->xnb_rx_hv_copy = B_FALSE;
42810958Sdme@sun.com 	xnbp->xnb_multicast_control = B_FALSE;
4295084Sjohnlev 
4307615SMax.Zhen@Sun.COM 	xnbp->xnb_rx_va = vmem_alloc(heap_arena, PAGESIZE, VM_SLEEP);
4317615SMax.Zhen@Sun.COM 	ASSERT(xnbp->xnb_rx_va != NULL);
4325084Sjohnlev 
4335741Smrj 	if (ddi_get_iblock_cookie(dip, 0, &xnbp->xnb_icookie)
4345084Sjohnlev 	    != DDI_SUCCESS)
4355084Sjohnlev 		goto failure;
4365084Sjohnlev 
43710958Sdme@sun.com 	/* Allocated on demand, when/if we enter xnb_copy_to_peer(). */
4387615SMax.Zhen@Sun.COM 	xnbp->xnb_rx_cpop = NULL;
43910958Sdme@sun.com 	xnbp->xnb_rx_cpop_count = 0;
4405741Smrj 
4415741Smrj 	mutex_init(&xnbp->xnb_tx_lock, NULL, MUTEX_DRIVER,
4425741Smrj 	    xnbp->xnb_icookie);
4435741Smrj 	mutex_init(&xnbp->xnb_rx_lock, NULL, MUTEX_DRIVER,
4445741Smrj 	    xnbp->xnb_icookie);
44510958Sdme@sun.com 	mutex_init(&xnbp->xnb_state_lock, NULL, MUTEX_DRIVER,
44610958Sdme@sun.com 	    xnbp->xnb_icookie);
4475084Sjohnlev 
44810958Sdme@sun.com 	/* Set driver private pointer now. */
4495084Sjohnlev 	ddi_set_driver_private(dip, xnbp);
4505084Sjohnlev 
45110958Sdme@sun.com 	(void) sprintf(cachename, "xnb_tx_buf_cache_%d", ddi_get_instance(dip));
45210958Sdme@sun.com 	xnbp->xnb_tx_buf_cache = kmem_cache_create(cachename,
45310958Sdme@sun.com 	    sizeof (xnb_txbuf_t), 0,
45410958Sdme@sun.com 	    xnb_txbuf_constructor, xnb_txbuf_destructor,
45510958Sdme@sun.com 	    NULL, xnbp, NULL, 0);
45610958Sdme@sun.com 	if (xnbp->xnb_tx_buf_cache == NULL)
45710958Sdme@sun.com 		goto failure_0;
45810958Sdme@sun.com 
4595084Sjohnlev 	if (!xnb_ks_init(xnbp))
4605741Smrj 		goto failure_1;
4615084Sjohnlev 
4625084Sjohnlev 	/*
4635084Sjohnlev 	 * Receive notification of changes in the state of the
4645084Sjohnlev 	 * driver in the guest domain.
4655084Sjohnlev 	 */
4667756SMark.Johnson@Sun.COM 	if (xvdi_add_event_handler(dip, XS_OE_STATE, xnb_oe_state_change,
4677756SMark.Johnson@Sun.COM 	    NULL) != DDI_SUCCESS)
4685741Smrj 		goto failure_2;
4695084Sjohnlev 
4705084Sjohnlev 	/*
4715084Sjohnlev 	 * Receive notification of hotplug events.
4725084Sjohnlev 	 */
4737756SMark.Johnson@Sun.COM 	if (xvdi_add_event_handler(dip, XS_HP_STATE, xnb_hp_state_change,
4747756SMark.Johnson@Sun.COM 	    NULL) != DDI_SUCCESS)
4755741Smrj 		goto failure_2;
4765084Sjohnlev 
4775084Sjohnlev 	xsname = xvdi_get_xsname(dip);
4785084Sjohnlev 
4795084Sjohnlev 	if (xenbus_printf(XBT_NULL, xsname,
48010958Sdme@sun.com 	    "feature-multicast-control", "%d",
48110958Sdme@sun.com 	    xnb_multicast_control ? 1 : 0) != 0)
4825741Smrj 		goto failure_3;
4835741Smrj 
4845741Smrj 	if (xenbus_printf(XBT_NULL, xsname,
48510958Sdme@sun.com 	    "feature-rx-copy", "%d",  1) != 0)
4865741Smrj 		goto failure_3;
4875741Smrj 	/*
4885741Smrj 	 * Linux domUs seem to depend on "feature-rx-flip" being 0
4895741Smrj 	 * in addition to "feature-rx-copy" being 1. It seems strange
4905741Smrj 	 * to use four possible states to describe a binary decision,
4915741Smrj 	 * but we might as well play nice.
4925741Smrj 	 */
4935741Smrj 	if (xenbus_printf(XBT_NULL, xsname,
49410958Sdme@sun.com 	    "feature-rx-flip", "%d", 0) != 0)
4955741Smrj 		goto failure_3;
4965084Sjohnlev 
4975084Sjohnlev 	(void) xvdi_switch_state(dip, XBT_NULL, XenbusStateInitWait);
4985084Sjohnlev 	(void) xvdi_post_event(dip, XEN_HP_ADD);
4995084Sjohnlev 
5005084Sjohnlev 	return (DDI_SUCCESS);
5015084Sjohnlev 
5025741Smrj failure_3:
5035084Sjohnlev 	xvdi_remove_event_handler(dip, NULL);
5045084Sjohnlev 
5055741Smrj failure_2:
5065084Sjohnlev 	xnb_ks_free(xnbp);
5075084Sjohnlev 
5085741Smrj failure_1:
50910958Sdme@sun.com 	kmem_cache_destroy(xnbp->xnb_tx_buf_cache);
51010958Sdme@sun.com 
51110958Sdme@sun.com failure_0:
51210958Sdme@sun.com 	mutex_destroy(&xnbp->xnb_state_lock);
5135741Smrj 	mutex_destroy(&xnbp->xnb_rx_lock);
5145741Smrj 	mutex_destroy(&xnbp->xnb_tx_lock);
5155084Sjohnlev 
5165084Sjohnlev failure:
5177615SMax.Zhen@Sun.COM 	vmem_free(heap_arena, xnbp->xnb_rx_va, PAGESIZE);
5185084Sjohnlev 	kmem_free(xnbp, sizeof (*xnbp));
5195084Sjohnlev 	return (DDI_FAILURE);
5205084Sjohnlev }
5215084Sjohnlev 
5225084Sjohnlev void
xnb_detach(dev_info_t * dip)5235084Sjohnlev xnb_detach(dev_info_t *dip)
5245084Sjohnlev {
5255084Sjohnlev 	xnb_t *xnbp = ddi_get_driver_private(dip);
5265084Sjohnlev 
5275084Sjohnlev 	ASSERT(xnbp != NULL);
5285741Smrj 	ASSERT(!xnbp->xnb_connected);
5297615SMax.Zhen@Sun.COM 	ASSERT(xnbp->xnb_tx_buf_count == 0);
5305084Sjohnlev 
5315084Sjohnlev 	xnb_disconnect_rings(dip);
5325084Sjohnlev 
5335084Sjohnlev 	xvdi_remove_event_handler(dip, NULL);
5345084Sjohnlev 
5355084Sjohnlev 	xnb_ks_free(xnbp);
5365084Sjohnlev 
53710958Sdme@sun.com 	kmem_cache_destroy(xnbp->xnb_tx_buf_cache);
53810958Sdme@sun.com 
5395084Sjohnlev 	ddi_set_driver_private(dip, NULL);
5405084Sjohnlev 
54110958Sdme@sun.com 	mutex_destroy(&xnbp->xnb_state_lock);
54210958Sdme@sun.com 	mutex_destroy(&xnbp->xnb_rx_lock);
5435741Smrj 	mutex_destroy(&xnbp->xnb_tx_lock);
5445084Sjohnlev 
54510958Sdme@sun.com 	if (xnbp->xnb_rx_cpop_count > 0)
54610958Sdme@sun.com 		kmem_free(xnbp->xnb_rx_cpop, sizeof (xnbp->xnb_rx_cpop[0])
54710958Sdme@sun.com 		    * xnbp->xnb_rx_cpop_count);
5485741Smrj 
5497615SMax.Zhen@Sun.COM 	ASSERT(xnbp->xnb_rx_va != NULL);
5507615SMax.Zhen@Sun.COM 	vmem_free(heap_arena, xnbp->xnb_rx_va, PAGESIZE);
5515084Sjohnlev 
5525084Sjohnlev 	kmem_free(xnbp, sizeof (*xnbp));
5535084Sjohnlev }
5545084Sjohnlev 
55510958Sdme@sun.com /*
55610958Sdme@sun.com  * Allocate a page from the hypervisor to be flipped to the peer.
55710958Sdme@sun.com  *
55810958Sdme@sun.com  * Try to get pages in batches to reduce the overhead of calls into
55910958Sdme@sun.com  * the balloon driver.
56010958Sdme@sun.com  */
5615084Sjohnlev static mfn_t
xnb_alloc_page(xnb_t * xnbp)5625084Sjohnlev xnb_alloc_page(xnb_t *xnbp)
5635084Sjohnlev {
5645084Sjohnlev #define	WARNING_RATE_LIMIT 100
5655084Sjohnlev #define	BATCH_SIZE 256
5665084Sjohnlev 	static mfn_t mfns[BATCH_SIZE];	/* common across all instances */
5675084Sjohnlev 	static int nth = BATCH_SIZE;
5685084Sjohnlev 	mfn_t mfn;
5695084Sjohnlev 
5705084Sjohnlev 	mutex_enter(&xnb_alloc_page_lock);
5715084Sjohnlev 	if (nth == BATCH_SIZE) {
5725084Sjohnlev 		if (balloon_alloc_pages(BATCH_SIZE, mfns) != BATCH_SIZE) {
5735741Smrj 			xnbp->xnb_stat_allocation_failure++;
5745084Sjohnlev 			mutex_exit(&xnb_alloc_page_lock);
5755084Sjohnlev 
5765084Sjohnlev 			/*
5775084Sjohnlev 			 * Try for a single page in low memory situations.
5785084Sjohnlev 			 */
5795084Sjohnlev 			if (balloon_alloc_pages(1, &mfn) != 1) {
5805741Smrj 				if ((xnbp->xnb_stat_small_allocation_failure++
5815741Smrj 				    % WARNING_RATE_LIMIT) == 0)
5825084Sjohnlev 					cmn_err(CE_WARN, "xnb_alloc_page: "
5835084Sjohnlev 					    "Cannot allocate memory to "
5845084Sjohnlev 					    "transfer packets to peer.");
5855084Sjohnlev 				return (0);
5865084Sjohnlev 			} else {
5875741Smrj 				xnbp->xnb_stat_small_allocation_success++;
5885084Sjohnlev 				return (mfn);
5895084Sjohnlev 			}
5905084Sjohnlev 		}
5915084Sjohnlev 
5925084Sjohnlev 		nth = 0;
5935741Smrj 		xnbp->xnb_stat_allocation_success++;
5945084Sjohnlev 	}
5955084Sjohnlev 
5965084Sjohnlev 	mfn = mfns[nth++];
5975084Sjohnlev 	mutex_exit(&xnb_alloc_page_lock);
5985084Sjohnlev 
5995084Sjohnlev 	ASSERT(mfn != 0);
6005084Sjohnlev 
6015084Sjohnlev 	return (mfn);
6025084Sjohnlev #undef BATCH_SIZE
6035084Sjohnlev #undef WARNING_RATE_LIMIT
6045084Sjohnlev }
6055084Sjohnlev 
60610958Sdme@sun.com /*
60710958Sdme@sun.com  * Free a page back to the hypervisor.
60810958Sdme@sun.com  *
60910958Sdme@sun.com  * This happens only in the error path, so batching is not worth the
61010958Sdme@sun.com  * complication.
61110958Sdme@sun.com  */
6125084Sjohnlev static void
xnb_free_page(xnb_t * xnbp,mfn_t mfn)6135084Sjohnlev xnb_free_page(xnb_t *xnbp, mfn_t mfn)
6145084Sjohnlev {
61510958Sdme@sun.com 	_NOTE(ARGUNUSED(xnbp));
6165084Sjohnlev 	int r;
6175262Srscott 	pfn_t pfn;
6185262Srscott 
6195262Srscott 	pfn = xen_assign_pfn(mfn);
6205262Srscott 	pfnzero(pfn, 0, PAGESIZE);
6215262Srscott 	xen_release_pfn(pfn);
6225084Sjohnlev 
6235084Sjohnlev 	if ((r = balloon_free_pages(1, &mfn, NULL, NULL)) != 1) {
6245084Sjohnlev 		cmn_err(CE_WARN, "free_page: cannot decrease memory "
6255084Sjohnlev 		    "reservation (%d): page kept but unusable (mfn = 0x%lx).",
6265084Sjohnlev 		    r, mfn);
6275084Sjohnlev 	}
6285084Sjohnlev }
6295084Sjohnlev 
6305741Smrj /*
63110958Sdme@sun.com  * Similar to RING_HAS_UNCONSUMED_REQUESTS(&xnbp->rx_ring) but using
63210958Sdme@sun.com  * local variables. Used in both xnb_to_peer() and xnb_copy_to_peer().
6335741Smrj  */
6345741Smrj #define	XNB_RING_HAS_UNCONSUMED_REQUESTS(_r)		\
6355741Smrj 	((((_r)->sring->req_prod - loop) <		\
6365741Smrj 		(RING_SIZE(_r) - (loop - prod))) ?	\
6375741Smrj 	    ((_r)->sring->req_prod - loop) :		\
6385741Smrj 	    (RING_SIZE(_r) - (loop - prod)))
6395741Smrj 
64010958Sdme@sun.com /*
64110958Sdme@sun.com  * Pass packets to the peer using page flipping.
64210958Sdme@sun.com  */
6435084Sjohnlev mblk_t *
xnb_to_peer(xnb_t * xnbp,mblk_t * mp)6445084Sjohnlev xnb_to_peer(xnb_t *xnbp, mblk_t *mp)
6455084Sjohnlev {
6465084Sjohnlev 	mblk_t *free = mp, *prev = NULL;
6475084Sjohnlev 	size_t len;
6485084Sjohnlev 	gnttab_transfer_t *gop;
6495084Sjohnlev 	boolean_t notify;
6505084Sjohnlev 	RING_IDX loop, prod, end;
6515084Sjohnlev 
6525084Sjohnlev 	/*
6535084Sjohnlev 	 * For each packet the sequence of operations is:
6545084Sjohnlev 	 *
6555084Sjohnlev 	 * 1. get a new page from the hypervisor.
6565084Sjohnlev 	 * 2. get a request slot from the ring.
6575084Sjohnlev 	 * 3. copy the data into the new page.
6585084Sjohnlev 	 * 4. transfer the page to the peer.
6595084Sjohnlev 	 * 5. update the request slot.
6605084Sjohnlev 	 * 6. kick the peer.
6615084Sjohnlev 	 * 7. free mp.
6625084Sjohnlev 	 *
6635084Sjohnlev 	 * In order to reduce the number of hypercalls, we prepare
6645084Sjohnlev 	 * several packets for the peer and perform a single hypercall
6655084Sjohnlev 	 * to transfer them.
6665084Sjohnlev 	 */
6675084Sjohnlev 
6687615SMax.Zhen@Sun.COM 	mutex_enter(&xnbp->xnb_rx_lock);
6695084Sjohnlev 
6705084Sjohnlev 	/*
6715084Sjohnlev 	 * If we are not connected to the peer or have not yet
6725084Sjohnlev 	 * finished hotplug it is too early to pass packets to the
6735084Sjohnlev 	 * peer.
6745084Sjohnlev 	 */
6755741Smrj 	if (!(xnbp->xnb_connected && xnbp->xnb_hotplugged)) {
6767615SMax.Zhen@Sun.COM 		mutex_exit(&xnbp->xnb_rx_lock);
6777615SMax.Zhen@Sun.COM 		DTRACE_PROBE(flip_rx_too_early);
6787615SMax.Zhen@Sun.COM 		xnbp->xnb_stat_rx_too_early++;
6795084Sjohnlev 		return (mp);
6805084Sjohnlev 	}
6815084Sjohnlev 
6825741Smrj 	loop = xnbp->xnb_rx_ring.req_cons;
6835741Smrj 	prod = xnbp->xnb_rx_ring.rsp_prod_pvt;
6847615SMax.Zhen@Sun.COM 	gop = xnbp->xnb_rx_top;
6855084Sjohnlev 
6865084Sjohnlev 	while ((mp != NULL) &&
6875741Smrj 	    XNB_RING_HAS_UNCONSUMED_REQUESTS(&xnbp->xnb_rx_ring)) {
6885084Sjohnlev 
6895084Sjohnlev 		mfn_t mfn;
6905084Sjohnlev 		pfn_t pfn;
6915084Sjohnlev 		netif_rx_request_t *rxreq;
6925084Sjohnlev 		netif_rx_response_t *rxresp;
6935084Sjohnlev 		char *valoop;
6945084Sjohnlev 		mblk_t *ml;
6955084Sjohnlev 		uint16_t cksum_flags;
6965084Sjohnlev 
6975084Sjohnlev 		/* 1 */
6985084Sjohnlev 		if ((mfn = xnb_alloc_page(xnbp)) == 0) {
6997615SMax.Zhen@Sun.COM 			xnbp->xnb_stat_rx_defer++;
7005084Sjohnlev 			break;
7015084Sjohnlev 		}
7025084Sjohnlev 
7035084Sjohnlev 		/* 2 */
7045741Smrj 		rxreq = RING_GET_REQUEST(&xnbp->xnb_rx_ring, loop);
7055084Sjohnlev 
7065084Sjohnlev #ifdef XNB_DEBUG
7075084Sjohnlev 		if (!(rxreq->id < NET_RX_RING_SIZE))
7085084Sjohnlev 			cmn_err(CE_PANIC, "xnb_to_peer: "
7095084Sjohnlev 			    "id %d out of range in request 0x%p",
7105084Sjohnlev 			    rxreq->id, (void *)rxreq);
7115084Sjohnlev #endif /* XNB_DEBUG */
7125084Sjohnlev 
7135084Sjohnlev 		/* Assign a pfn and map the new page at the allocated va. */
7145084Sjohnlev 		pfn = xen_assign_pfn(mfn);
7157615SMax.Zhen@Sun.COM 		hat_devload(kas.a_hat, xnbp->xnb_rx_va, PAGESIZE,
7165084Sjohnlev 		    pfn, PROT_READ | PROT_WRITE, HAT_LOAD);
7175084Sjohnlev 
7185084Sjohnlev 		/* 3 */
7195084Sjohnlev 		len = 0;
7208757Sdme@sun.com 		valoop = xnbp->xnb_rx_va;
7215084Sjohnlev 		for (ml = mp; ml != NULL; ml = ml->b_cont) {
7225084Sjohnlev 			size_t chunk = ml->b_wptr - ml->b_rptr;
7235084Sjohnlev 
7245084Sjohnlev 			bcopy(ml->b_rptr, valoop, chunk);
7255084Sjohnlev 			valoop += chunk;
7265084Sjohnlev 			len += chunk;
7275084Sjohnlev 		}
7285084Sjohnlev 
7298757Sdme@sun.com 		ASSERT(len < PAGESIZE);
7305084Sjohnlev 
7315084Sjohnlev 		/* Release the pfn. */
7327615SMax.Zhen@Sun.COM 		hat_unload(kas.a_hat, xnbp->xnb_rx_va, PAGESIZE,
7335084Sjohnlev 		    HAT_UNLOAD_UNMAP);
7345084Sjohnlev 		xen_release_pfn(pfn);
7355084Sjohnlev 
7365084Sjohnlev 		/* 4 */
7375084Sjohnlev 		gop->mfn = mfn;
7385741Smrj 		gop->domid = xnbp->xnb_peer;
7395084Sjohnlev 		gop->ref = rxreq->gref;
7405084Sjohnlev 
7415084Sjohnlev 		/* 5.1 */
7425741Smrj 		rxresp = RING_GET_RESPONSE(&xnbp->xnb_rx_ring, prod);
7438757Sdme@sun.com 		rxresp->offset = 0;
7445084Sjohnlev 		rxresp->flags = 0;
7455084Sjohnlev 
7465741Smrj 		cksum_flags = xnbp->xnb_flavour->xf_cksum_to_peer(xnbp, mp);
7475084Sjohnlev 		if (cksum_flags != 0)
7487615SMax.Zhen@Sun.COM 			xnbp->xnb_stat_rx_cksum_deferred++;
7495084Sjohnlev 		rxresp->flags |= cksum_flags;
7505084Sjohnlev 
7515741Smrj 		rxresp->id = RING_GET_REQUEST(&xnbp->xnb_rx_ring, prod)->id;
7525084Sjohnlev 		rxresp->status = len;
7535084Sjohnlev 
7545084Sjohnlev 		loop++;
7555084Sjohnlev 		prod++;
7565084Sjohnlev 		gop++;
7575084Sjohnlev 		prev = mp;
7585084Sjohnlev 		mp = mp->b_next;
7595084Sjohnlev 	}
7605084Sjohnlev 
7615084Sjohnlev 	/*
7625084Sjohnlev 	 * Did we actually do anything?
7635084Sjohnlev 	 */
7645741Smrj 	if (loop == xnbp->xnb_rx_ring.req_cons) {
7657615SMax.Zhen@Sun.COM 		mutex_exit(&xnbp->xnb_rx_lock);
7665084Sjohnlev 		return (mp);
7675084Sjohnlev 	}
7685084Sjohnlev 
7695084Sjohnlev 	end = loop;
7705084Sjohnlev 
7715084Sjohnlev 	/*
7725084Sjohnlev 	 * Unlink the end of the 'done' list from the remainder.
7735084Sjohnlev 	 */
7745084Sjohnlev 	ASSERT(prev != NULL);
7755084Sjohnlev 	prev->b_next = NULL;
7765084Sjohnlev 
7777615SMax.Zhen@Sun.COM 	if (HYPERVISOR_grant_table_op(GNTTABOP_transfer, xnbp->xnb_rx_top,
7785741Smrj 	    loop - xnbp->xnb_rx_ring.req_cons) != 0) {
7795084Sjohnlev 		cmn_err(CE_WARN, "xnb_to_peer: transfer operation failed");
7805084Sjohnlev 	}
7815084Sjohnlev 
7825741Smrj 	loop = xnbp->xnb_rx_ring.req_cons;
7835741Smrj 	prod = xnbp->xnb_rx_ring.rsp_prod_pvt;
7847615SMax.Zhen@Sun.COM 	gop = xnbp->xnb_rx_top;
7855084Sjohnlev 
7865084Sjohnlev 	while (loop < end) {
7875084Sjohnlev 		int16_t status = NETIF_RSP_OKAY;
7885084Sjohnlev 
7895084Sjohnlev 		if (gop->status != 0) {
7905084Sjohnlev 			status = NETIF_RSP_ERROR;
7915084Sjohnlev 
7925084Sjohnlev 			/*
7935084Sjohnlev 			 * If the status is anything other than
7945084Sjohnlev 			 * GNTST_bad_page then we don't own the page
7955084Sjohnlev 			 * any more, so don't try to give it back.
7965084Sjohnlev 			 */
7975084Sjohnlev 			if (gop->status != GNTST_bad_page)
7985084Sjohnlev 				gop->mfn = 0;
7995084Sjohnlev 		} else {
8005084Sjohnlev 			/* The page is no longer ours. */
8015084Sjohnlev 			gop->mfn = 0;
8025084Sjohnlev 		}
8035084Sjohnlev 
8045084Sjohnlev 		if (gop->mfn != 0)
8055084Sjohnlev 			/*
8065084Sjohnlev 			 * Give back the page, as we won't be using
8075084Sjohnlev 			 * it.
8085084Sjohnlev 			 */
8095084Sjohnlev 			xnb_free_page(xnbp, gop->mfn);
8105084Sjohnlev 		else
8115084Sjohnlev 			/*
8125084Sjohnlev 			 * We gave away a page, update our accounting
8135084Sjohnlev 			 * now.
8145084Sjohnlev 			 */
8155084Sjohnlev 			balloon_drv_subtracted(1);
8165084Sjohnlev 
8175084Sjohnlev 		/* 5.2 */
8185084Sjohnlev 		if (status != NETIF_RSP_OKAY) {
8195741Smrj 			RING_GET_RESPONSE(&xnbp->xnb_rx_ring, prod)->status =
8205084Sjohnlev 			    status;
8215084Sjohnlev 		} else {
8227615SMax.Zhen@Sun.COM 			xnbp->xnb_stat_ipackets++;
8237615SMax.Zhen@Sun.COM 			xnbp->xnb_stat_rbytes += len;
8245084Sjohnlev 		}
8255084Sjohnlev 
8265084Sjohnlev 		loop++;
8275084Sjohnlev 		prod++;
8285084Sjohnlev 		gop++;
8295084Sjohnlev 	}
8305084Sjohnlev 
8315741Smrj 	xnbp->xnb_rx_ring.req_cons = loop;
8325741Smrj 	xnbp->xnb_rx_ring.rsp_prod_pvt = prod;
8335084Sjohnlev 
8345084Sjohnlev 	/* 6 */
8355741Smrj 	/* LINTED: constant in conditional context */
8365741Smrj 	RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&xnbp->xnb_rx_ring, notify);
8375084Sjohnlev 	if (notify) {
8385741Smrj 		ec_notify_via_evtchn(xnbp->xnb_evtchn);
8397615SMax.Zhen@Sun.COM 		xnbp->xnb_stat_rx_notify_sent++;
8405084Sjohnlev 	} else {
8417615SMax.Zhen@Sun.COM 		xnbp->xnb_stat_rx_notify_deferred++;
8425084Sjohnlev 	}
8435084Sjohnlev 
8445084Sjohnlev 	if (mp != NULL)
8457615SMax.Zhen@Sun.COM 		xnbp->xnb_stat_rx_defer++;
8465084Sjohnlev 
8477615SMax.Zhen@Sun.COM 	mutex_exit(&xnbp->xnb_rx_lock);
8485084Sjohnlev 
8495084Sjohnlev 	/* Free mblk_t's that we consumed. */
8505084Sjohnlev 	freemsgchain(free);
8515084Sjohnlev 
8525084Sjohnlev 	return (mp);
8535084Sjohnlev }
8545084Sjohnlev 
85510958Sdme@sun.com /* Helper functions for xnb_copy_to_peer(). */
8565741Smrj 
8575741Smrj /*
8585741Smrj  * Grow the array of copy operation descriptors.
8595741Smrj  */
86010958Sdme@sun.com static boolean_t
grow_cpop_area(xnb_t * xnbp)86110958Sdme@sun.com grow_cpop_area(xnb_t *xnbp)
8625741Smrj {
86310958Sdme@sun.com 	size_t count;
86410958Sdme@sun.com 	gnttab_copy_t *new;
8655741Smrj 
8667615SMax.Zhen@Sun.COM 	ASSERT(MUTEX_HELD(&xnbp->xnb_rx_lock));
8675741Smrj 
86810958Sdme@sun.com 	count = xnbp->xnb_rx_cpop_count + CPOP_DEFCNT;
8695741Smrj 
87010958Sdme@sun.com 	if ((new = kmem_alloc(sizeof (new[0]) * count, KM_NOSLEEP)) == NULL) {
8715741Smrj 		xnbp->xnb_stat_other_allocation_failure++;
87210958Sdme@sun.com 		return (B_FALSE);
8735741Smrj 	}
8745741Smrj 
87510958Sdme@sun.com 	bcopy(xnbp->xnb_rx_cpop, new,
87610958Sdme@sun.com 	    sizeof (xnbp->xnb_rx_cpop[0]) * xnbp->xnb_rx_cpop_count);
8775741Smrj 
87810958Sdme@sun.com 	kmem_free(xnbp->xnb_rx_cpop,
87910958Sdme@sun.com 	    sizeof (xnbp->xnb_rx_cpop[0]) * xnbp->xnb_rx_cpop_count);
8805741Smrj 
88110958Sdme@sun.com 	xnbp->xnb_rx_cpop = new;
88210958Sdme@sun.com 	xnbp->xnb_rx_cpop_count = count;
8835741Smrj 
8847615SMax.Zhen@Sun.COM 	xnbp->xnb_stat_rx_cpoparea_grown++;
8855741Smrj 
88610958Sdme@sun.com 	return (B_TRUE);
8875741Smrj }
8885741Smrj 
8895741Smrj /*
8905741Smrj  * Check whether an address is on a page that's foreign to this domain.
8915741Smrj  */
8925741Smrj static boolean_t
is_foreign(void * addr)8935741Smrj is_foreign(void *addr)
8945741Smrj {
89510958Sdme@sun.com 	pfn_t pfn = hat_getpfnum(kas.a_hat, addr);
8965741Smrj 
89710958Sdme@sun.com 	return ((pfn & PFN_IS_FOREIGN_MFN) == PFN_IS_FOREIGN_MFN);
8985741Smrj }
8995741Smrj 
9005741Smrj /*
9015741Smrj  * Insert a newly allocated mblk into a chain, replacing the old one.
9025741Smrj  */
9035741Smrj static mblk_t *
replace_msg(mblk_t * mp,size_t len,mblk_t * mp_prev,mblk_t * ml_prev)9045741Smrj replace_msg(mblk_t *mp, size_t len, mblk_t *mp_prev, mblk_t *ml_prev)
9055741Smrj {
9065741Smrj 	uint32_t	start, stuff, end, value, flags;
9075741Smrj 	mblk_t		*new_mp;
9085741Smrj 
9095741Smrj 	new_mp = copyb(mp);
910*11878SVenu.Iyer@Sun.COM 	if (new_mp == NULL) {
9115741Smrj 		cmn_err(CE_PANIC, "replace_msg: cannot alloc new message"
9125741Smrj 		    "for %p, len %lu", (void *) mp, len);
913*11878SVenu.Iyer@Sun.COM 	}
9145741Smrj 
915*11878SVenu.Iyer@Sun.COM 	mac_hcksum_get(mp, &start, &stuff, &end, &value, &flags);
916*11878SVenu.Iyer@Sun.COM 	mac_hcksum_set(new_mp, start, stuff, end, value, flags);
9175741Smrj 
9185741Smrj 	new_mp->b_next = mp->b_next;
9195741Smrj 	new_mp->b_prev = mp->b_prev;
9205741Smrj 	new_mp->b_cont = mp->b_cont;
9215741Smrj 
9225741Smrj 	/* Make sure we only overwrite pointers to the mblk being replaced. */
9235741Smrj 	if (mp_prev != NULL && mp_prev->b_next == mp)
9245741Smrj 		mp_prev->b_next = new_mp;
9255741Smrj 
9265741Smrj 	if (ml_prev != NULL && ml_prev->b_cont == mp)
9275741Smrj 		ml_prev->b_cont = new_mp;
9285741Smrj 
9295741Smrj 	mp->b_next = mp->b_prev = mp->b_cont = NULL;
9305741Smrj 	freemsg(mp);
9315741Smrj 
9325741Smrj 	return (new_mp);
9335741Smrj }
9345741Smrj 
9355741Smrj /*
9365741Smrj  * Set all the fields in a gnttab_copy_t.
9375741Smrj  */
9385741Smrj static void
setup_gop(xnb_t * xnbp,gnttab_copy_t * gp,uchar_t * rptr,size_t s_off,size_t d_off,size_t len,grant_ref_t d_ref)9395741Smrj setup_gop(xnb_t *xnbp, gnttab_copy_t *gp, uchar_t *rptr,
9405741Smrj     size_t s_off, size_t d_off, size_t len, grant_ref_t d_ref)
9415741Smrj {
9425741Smrj 	ASSERT(xnbp != NULL && gp != NULL);
9435741Smrj 
9445741Smrj 	gp->source.offset = s_off;
9455741Smrj 	gp->source.u.gmfn = pfn_to_mfn(hat_getpfnum(kas.a_hat, (caddr_t)rptr));
9465741Smrj 	gp->source.domid = DOMID_SELF;
9475741Smrj 
9485741Smrj 	gp->len = (uint16_t)len;
9495741Smrj 	gp->flags = GNTCOPY_dest_gref;
9505741Smrj 	gp->status = 0;
9515741Smrj 
9525741Smrj 	gp->dest.u.ref = d_ref;
9535741Smrj 	gp->dest.offset = d_off;
9545741Smrj 	gp->dest.domid = xnbp->xnb_peer;
9555741Smrj }
9565741Smrj 
95710958Sdme@sun.com /*
95810958Sdme@sun.com  * Pass packets to the peer using hypervisor copy operations.
95910958Sdme@sun.com  */
9605741Smrj mblk_t *
xnb_copy_to_peer(xnb_t * xnbp,mblk_t * mp)9615741Smrj xnb_copy_to_peer(xnb_t *xnbp, mblk_t *mp)
9625741Smrj {
9635741Smrj 	mblk_t		*free = mp, *mp_prev = NULL, *saved_mp = mp;
9645741Smrj 	mblk_t		*ml, *ml_prev;
9655741Smrj 	boolean_t	notify;
9665741Smrj 	RING_IDX	loop, prod;
9675741Smrj 	int		i;
9685741Smrj 
96910958Sdme@sun.com 	/*
97010958Sdme@sun.com 	 * If the peer does not pre-post buffers for received packets,
97110958Sdme@sun.com 	 * use page flipping to pass packets to it.
97210958Sdme@sun.com 	 */
97310958Sdme@sun.com 	if (!xnbp->xnb_rx_hv_copy)
9745741Smrj 		return (xnb_to_peer(xnbp, mp));
9755741Smrj 
9765741Smrj 	/*
9775741Smrj 	 * For each packet the sequence of operations is:
9785741Smrj 	 *
9795741Smrj 	 *  1. get a request slot from the ring.
9805741Smrj 	 *  2. set up data for hypercall (see NOTE below)
9815741Smrj 	 *  3. have the hypervisore copy the data
9825741Smrj 	 *  4. update the request slot.
9835741Smrj 	 *  5. kick the peer.
9845741Smrj 	 *
9855741Smrj 	 * NOTE ad 2.
9865741Smrj 	 *  In order to reduce the number of hypercalls, we prepare
98710958Sdme@sun.com 	 *  several mblks (mp->b_cont != NULL) for the peer and
98810958Sdme@sun.com 	 *  perform a single hypercall to transfer them.  We also have
98910958Sdme@sun.com 	 *  to set up a seperate copy operation for every page.
9905741Smrj 	 *
99110958Sdme@sun.com 	 * If we have more than one packet (mp->b_next != NULL), we do
99210958Sdme@sun.com 	 * this whole dance repeatedly.
9935741Smrj 	 */
9945741Smrj 
9957615SMax.Zhen@Sun.COM 	mutex_enter(&xnbp->xnb_rx_lock);
9965741Smrj 
9975741Smrj 	if (!(xnbp->xnb_connected && xnbp->xnb_hotplugged)) {
9987615SMax.Zhen@Sun.COM 		mutex_exit(&xnbp->xnb_rx_lock);
9997615SMax.Zhen@Sun.COM 		DTRACE_PROBE(copy_rx_too_early);
10007615SMax.Zhen@Sun.COM 		xnbp->xnb_stat_rx_too_early++;
10015741Smrj 		return (mp);
10025741Smrj 	}
10035741Smrj 
10045741Smrj 	loop = xnbp->xnb_rx_ring.req_cons;
10055741Smrj 	prod = xnbp->xnb_rx_ring.rsp_prod_pvt;
10065741Smrj 
10075741Smrj 	while ((mp != NULL) &&
10085741Smrj 	    XNB_RING_HAS_UNCONSUMED_REQUESTS(&xnbp->xnb_rx_ring)) {
10095741Smrj 		netif_rx_request_t	*rxreq;
101010958Sdme@sun.com 		size_t			d_offset, len;
101110958Sdme@sun.com 		int			item_count;
101210958Sdme@sun.com 		gnttab_copy_t		*gop_cp;
10135741Smrj 		netif_rx_response_t	*rxresp;
10145741Smrj 		uint16_t		cksum_flags;
10155741Smrj 		int16_t			status = NETIF_RSP_OKAY;
10165741Smrj 
10175741Smrj 		/* 1 */
10185741Smrj 		rxreq = RING_GET_REQUEST(&xnbp->xnb_rx_ring, loop);
10195741Smrj 
10205741Smrj #ifdef XNB_DEBUG
10215741Smrj 		if (!(rxreq->id < NET_RX_RING_SIZE))
10225741Smrj 			cmn_err(CE_PANIC, "xnb_copy_to_peer: "
10235741Smrj 			    "id %d out of range in request 0x%p",
10245741Smrj 			    rxreq->id, (void *)rxreq);
10255741Smrj #endif /* XNB_DEBUG */
10265741Smrj 
10275741Smrj 		/* 2 */
10288757Sdme@sun.com 		d_offset = 0;
10295741Smrj 		len = 0;
10305741Smrj 		item_count = 0;
10315741Smrj 
10327615SMax.Zhen@Sun.COM 		gop_cp = xnbp->xnb_rx_cpop;
10335741Smrj 
10345741Smrj 		/*
103510958Sdme@sun.com 		 * We walk the b_cont pointers and set up a
103610958Sdme@sun.com 		 * gnttab_copy_t for each sub-page chunk in each data
103710958Sdme@sun.com 		 * block.
10385741Smrj 		 */
10395741Smrj 		/* 2a */
10405741Smrj 		for (ml = mp, ml_prev = NULL; ml != NULL; ml = ml->b_cont) {
10415741Smrj 			size_t	chunk = ml->b_wptr - ml->b_rptr;
10425741Smrj 			uchar_t	*r_tmp,	*rpt_align;
10435741Smrj 			size_t	r_offset;
10445741Smrj 
10455741Smrj 			/*
104610958Sdme@sun.com 			 * The hypervisor will not allow us to
104710958Sdme@sun.com 			 * reference a foreign page (e.g. one
104810958Sdme@sun.com 			 * belonging to another domain) by mfn in the
104910958Sdme@sun.com 			 * copy operation. If the data in this mblk is
105010958Sdme@sun.com 			 * on such a page we must copy the data into a
105110958Sdme@sun.com 			 * local page before initiating the hypervisor
105210958Sdme@sun.com 			 * copy operation.
10535741Smrj 			 */
10545741Smrj 			if (is_foreign(ml->b_rptr) || is_foreign(ml->b_wptr)) {
10555741Smrj 				mblk_t *ml_new = replace_msg(ml, chunk,
10565741Smrj 				    mp_prev, ml_prev);
10575741Smrj 
10585741Smrj 				/* We can still use old ml, but not *ml! */
10595741Smrj 				if (free == ml)
10605741Smrj 					free = ml_new;
10615741Smrj 				if (mp == ml)
10625741Smrj 					mp = ml_new;
10635741Smrj 				ml = ml_new;
10645741Smrj 
10657615SMax.Zhen@Sun.COM 				xnbp->xnb_stat_rx_foreign_page++;
10665741Smrj 			}
10675741Smrj 
10685741Smrj 			rpt_align = (uchar_t *)ALIGN2PAGE(ml->b_rptr);
10695741Smrj 			r_offset = (uint16_t)(ml->b_rptr - rpt_align);
10705741Smrj 			r_tmp = ml->b_rptr;
10715741Smrj 
10725741Smrj 			if (d_offset + chunk > PAGESIZE)
10735741Smrj 				cmn_err(CE_PANIC, "xnb_copy_to_peer: mp %p "
10745741Smrj 				    "(svd: %p), ml %p,rpt_alg. %p, d_offset "
10755741Smrj 				    "(%lu) + chunk (%lu) > PAGESIZE %d!",
10765741Smrj 				    (void *)mp, (void *)saved_mp, (void *)ml,
10775741Smrj 				    (void *)rpt_align,
10785741Smrj 				    d_offset, chunk, (int)PAGESIZE);
10795741Smrj 
10805741Smrj 			while (chunk > 0) {
10815741Smrj 				size_t part_len;
10825741Smrj 
108310958Sdme@sun.com 				if (item_count == xnbp->xnb_rx_cpop_count) {
108410958Sdme@sun.com 					if (!grow_cpop_area(xnbp))
10855741Smrj 						goto failure;
108610958Sdme@sun.com 					gop_cp = &xnbp->xnb_rx_cpop[item_count];
10875741Smrj 				}
10885741Smrj 				/*
10895741Smrj 				 * If our mblk crosses a page boundary, we need
109010958Sdme@sun.com 				 * to do a seperate copy for each page.
10915741Smrj 				 */
10925741Smrj 				if (r_offset + chunk > PAGESIZE) {
10935741Smrj 					part_len = PAGESIZE - r_offset;
10945741Smrj 
10955741Smrj 					DTRACE_PROBE3(mblk_page_crossed,
10965741Smrj 					    (mblk_t *), ml, int, chunk, int,
10975741Smrj 					    (int)r_offset);
10985741Smrj 
10997615SMax.Zhen@Sun.COM 					xnbp->xnb_stat_rx_pagebndry_crossed++;
11005741Smrj 				} else {
11015741Smrj 					part_len = chunk;
11025741Smrj 				}
11035741Smrj 
11045741Smrj 				setup_gop(xnbp, gop_cp, r_tmp, r_offset,
11055741Smrj 				    d_offset, part_len, rxreq->gref);
11065741Smrj 
11075741Smrj 				chunk -= part_len;
11085741Smrj 
11095741Smrj 				len += part_len;
11105741Smrj 				d_offset += part_len;
11115741Smrj 				r_tmp += part_len;
11125741Smrj 				/*
11135741Smrj 				 * The 2nd, 3rd ... last copies will always
11145741Smrj 				 * start at r_tmp, therefore r_offset is 0.
11155741Smrj 				 */
11165741Smrj 				r_offset = 0;
11175741Smrj 				gop_cp++;
111810958Sdme@sun.com 				item_count++;
11195741Smrj 			}
11205741Smrj 			ml_prev = ml;
112110958Sdme@sun.com 
11225741Smrj 			DTRACE_PROBE4(mblk_loop_end, (mblk_t *), ml, int,
11235741Smrj 			    chunk, int, len, int, item_count);
11245741Smrj 		}
11255741Smrj 		/* 3 */
11267615SMax.Zhen@Sun.COM 		if (HYPERVISOR_grant_table_op(GNTTABOP_copy, xnbp->xnb_rx_cpop,
11275741Smrj 		    item_count) != 0) {
11285741Smrj 			cmn_err(CE_WARN, "xnb_copy_to_peer: copy op. failed");
11295741Smrj 			DTRACE_PROBE(HV_granttableopfailed);
11305741Smrj 		}
11315741Smrj 
11325741Smrj 		/* 4 */
11335741Smrj 		rxresp = RING_GET_RESPONSE(&xnbp->xnb_rx_ring, prod);
11348757Sdme@sun.com 		rxresp->offset = 0;
11355741Smrj 
11365741Smrj 		rxresp->flags = 0;
11375741Smrj 
11385741Smrj 		DTRACE_PROBE4(got_RX_rsp, int, (int)rxresp->id, int,
11395741Smrj 		    (int)rxresp->offset, int, (int)rxresp->flags, int,
11405741Smrj 		    (int)rxresp->status);
11415741Smrj 
11425741Smrj 		cksum_flags = xnbp->xnb_flavour->xf_cksum_to_peer(xnbp, mp);
11435741Smrj 		if (cksum_flags != 0)
11447615SMax.Zhen@Sun.COM 			xnbp->xnb_stat_rx_cksum_deferred++;
11455741Smrj 		rxresp->flags |= cksum_flags;
11465741Smrj 
11475741Smrj 		rxresp->id = RING_GET_REQUEST(&xnbp->xnb_rx_ring, prod)->id;
11485741Smrj 		rxresp->status = len;
11495741Smrj 
11505741Smrj 		DTRACE_PROBE4(RX_rsp_set, int, (int)rxresp->id, int,
11515741Smrj 		    (int)rxresp->offset, int, (int)rxresp->flags, int,
11525741Smrj 		    (int)rxresp->status);
11535741Smrj 
11545741Smrj 		for (i = 0; i < item_count; i++) {
11557615SMax.Zhen@Sun.COM 			if (xnbp->xnb_rx_cpop[i].status != 0) {
115610958Sdme@sun.com 				DTRACE_PROBE2(cpop_status_nonnull, int,
11577615SMax.Zhen@Sun.COM 				    (int)xnbp->xnb_rx_cpop[i].status,
11585741Smrj 				    int, i);
11595741Smrj 				status = NETIF_RSP_ERROR;
11605741Smrj 			}
11615741Smrj 		}
11625741Smrj 
11635741Smrj 		/* 5.2 */
11645741Smrj 		if (status != NETIF_RSP_OKAY) {
11655741Smrj 			RING_GET_RESPONSE(&xnbp->xnb_rx_ring, prod)->status =
11665741Smrj 			    status;
11677615SMax.Zhen@Sun.COM 			xnbp->xnb_stat_rx_rsp_notok++;
11685741Smrj 		} else {
11697615SMax.Zhen@Sun.COM 			xnbp->xnb_stat_ipackets++;
11707615SMax.Zhen@Sun.COM 			xnbp->xnb_stat_rbytes += len;
11715741Smrj 		}
11725741Smrj 
11735741Smrj 		loop++;
11745741Smrj 		prod++;
11755741Smrj 		mp_prev = mp;
11765741Smrj 		mp = mp->b_next;
11775741Smrj 	}
11785741Smrj failure:
11795741Smrj 	/*
11805741Smrj 	 * Did we actually do anything?
11815741Smrj 	 */
11825741Smrj 	if (loop == xnbp->xnb_rx_ring.req_cons) {
11837615SMax.Zhen@Sun.COM 		mutex_exit(&xnbp->xnb_rx_lock);
11845741Smrj 		return (mp);
11855741Smrj 	}
11865741Smrj 
11875741Smrj 	/*
11885741Smrj 	 * Unlink the end of the 'done' list from the remainder.
11895741Smrj 	 */
11905741Smrj 	ASSERT(mp_prev != NULL);
11915741Smrj 	mp_prev->b_next = NULL;
11925741Smrj 
11935741Smrj 	xnbp->xnb_rx_ring.req_cons = loop;
11945741Smrj 	xnbp->xnb_rx_ring.rsp_prod_pvt = prod;
11955741Smrj 
11965741Smrj 	/* 6 */
11975741Smrj 	/* LINTED: constant in conditional context */
11985741Smrj 	RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&xnbp->xnb_rx_ring, notify);
11995741Smrj 	if (notify) {
12005741Smrj 		ec_notify_via_evtchn(xnbp->xnb_evtchn);
12017615SMax.Zhen@Sun.COM 		xnbp->xnb_stat_rx_notify_sent++;
12025741Smrj 	} else {
12037615SMax.Zhen@Sun.COM 		xnbp->xnb_stat_rx_notify_deferred++;
12045741Smrj 	}
12055741Smrj 
12065741Smrj 	if (mp != NULL)
12077615SMax.Zhen@Sun.COM 		xnbp->xnb_stat_rx_defer++;
12085741Smrj 
12097615SMax.Zhen@Sun.COM 	mutex_exit(&xnbp->xnb_rx_lock);
12105741Smrj 
12115741Smrj 	/* Free mblk_t structs we have consumed. */
12125741Smrj 	freemsgchain(free);
12135741Smrj 
12145741Smrj 	return (mp);
12155741Smrj }
12165741Smrj 
12175084Sjohnlev 
12185084Sjohnlev static void
xnb_tx_notify_peer(xnb_t * xnbp,boolean_t force)121910958Sdme@sun.com xnb_tx_notify_peer(xnb_t *xnbp, boolean_t force)
12205084Sjohnlev {
12215084Sjohnlev 	boolean_t notify;
12225084Sjohnlev 
12237615SMax.Zhen@Sun.COM 	ASSERT(MUTEX_HELD(&xnbp->xnb_tx_lock));
12245084Sjohnlev 
12255741Smrj 	/* LINTED: constant in conditional context */
12265741Smrj 	RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&xnbp->xnb_tx_ring, notify);
122710958Sdme@sun.com 	if (notify || force) {
12285741Smrj 		ec_notify_via_evtchn(xnbp->xnb_evtchn);
12297615SMax.Zhen@Sun.COM 		xnbp->xnb_stat_tx_notify_sent++;
12305084Sjohnlev 	} else {
12317615SMax.Zhen@Sun.COM 		xnbp->xnb_stat_tx_notify_deferred++;
12325084Sjohnlev 	}
12335084Sjohnlev }
12345084Sjohnlev 
12355084Sjohnlev static void
xnb_tx_mark_complete(xnb_t * xnbp,RING_IDX id,int16_t status)12367615SMax.Zhen@Sun.COM xnb_tx_mark_complete(xnb_t *xnbp, RING_IDX id, int16_t status)
12375084Sjohnlev {
12385084Sjohnlev 	RING_IDX i;
12395084Sjohnlev 	netif_tx_response_t *txresp;
12405084Sjohnlev 
12417615SMax.Zhen@Sun.COM 	ASSERT(MUTEX_HELD(&xnbp->xnb_tx_lock));
12425084Sjohnlev 
12435741Smrj 	i = xnbp->xnb_tx_ring.rsp_prod_pvt;
12445084Sjohnlev 
12455741Smrj 	txresp = RING_GET_RESPONSE(&xnbp->xnb_tx_ring, i);
12465084Sjohnlev 	txresp->id = id;
12475084Sjohnlev 	txresp->status = status;
12485084Sjohnlev 
12495741Smrj 	xnbp->xnb_tx_ring.rsp_prod_pvt = i + 1;
12505084Sjohnlev 
12515084Sjohnlev 	/*
12525084Sjohnlev 	 * Note that we don't push the change to the peer here - that
12535084Sjohnlev 	 * is the callers responsibility.
12545084Sjohnlev 	 */
12555084Sjohnlev }
12565084Sjohnlev 
12575084Sjohnlev static void
xnb_txbuf_recycle(xnb_txbuf_t * txp)125810958Sdme@sun.com xnb_txbuf_recycle(xnb_txbuf_t *txp)
12595084Sjohnlev {
126010958Sdme@sun.com 	xnb_t *xnbp = txp->xt_xnbp;
12615084Sjohnlev 
126210958Sdme@sun.com 	kmem_cache_free(xnbp->xnb_tx_buf_cache, txp);
126310958Sdme@sun.com 
126410958Sdme@sun.com 	xnbp->xnb_tx_buf_outstanding--;
126510958Sdme@sun.com }
12665741Smrj 
126710958Sdme@sun.com static int
xnb_txbuf_constructor(void * buf,void * arg,int kmflag)126810958Sdme@sun.com xnb_txbuf_constructor(void *buf, void *arg, int kmflag)
126910958Sdme@sun.com {
127010958Sdme@sun.com 	_NOTE(ARGUNUSED(kmflag));
127110958Sdme@sun.com 	xnb_txbuf_t *txp = buf;
127210958Sdme@sun.com 	xnb_t *xnbp = arg;
127310958Sdme@sun.com 	size_t len;
127410958Sdme@sun.com 	ddi_dma_cookie_t dma_cookie;
127510958Sdme@sun.com 	uint_t ncookies;
12765084Sjohnlev 
127710958Sdme@sun.com 	txp->xt_free_rtn.free_func = xnb_txbuf_recycle;
127810958Sdme@sun.com 	txp->xt_free_rtn.free_arg = (caddr_t)txp;
127910958Sdme@sun.com 	txp->xt_xnbp = xnbp;
128010958Sdme@sun.com 	txp->xt_next = NULL;
128110958Sdme@sun.com 
128210958Sdme@sun.com 	if (ddi_dma_alloc_handle(xnbp->xnb_devinfo, &buf_dma_attr,
128310958Sdme@sun.com 	    0, 0, &txp->xt_dma_handle) != DDI_SUCCESS)
128410958Sdme@sun.com 		goto failure;
128510958Sdme@sun.com 
128610958Sdme@sun.com 	if (ddi_dma_mem_alloc(txp->xt_dma_handle, PAGESIZE, &data_accattr,
128710958Sdme@sun.com 	    DDI_DMA_STREAMING, 0, 0, &txp->xt_buf, &len,
128810958Sdme@sun.com 	    &txp->xt_acc_handle) != DDI_SUCCESS)
128910958Sdme@sun.com 		goto failure_1;
12905084Sjohnlev 
129110958Sdme@sun.com 	if (ddi_dma_addr_bind_handle(txp->xt_dma_handle, NULL, txp->xt_buf,
129210958Sdme@sun.com 	    len, DDI_DMA_RDWR | DDI_DMA_STREAMING, DDI_DMA_DONTWAIT, 0,
129310958Sdme@sun.com 	    &dma_cookie, &ncookies)
129410958Sdme@sun.com 	    != DDI_DMA_MAPPED)
129510958Sdme@sun.com 		goto failure_2;
129610958Sdme@sun.com 	ASSERT(ncookies == 1);
129710958Sdme@sun.com 
129810958Sdme@sun.com 	txp->xt_mfn = xnb_btop(dma_cookie.dmac_laddress);
129910958Sdme@sun.com 	txp->xt_buflen = dma_cookie.dmac_size;
130010958Sdme@sun.com 
130110958Sdme@sun.com 	DTRACE_PROBE(txbuf_allocated);
130210958Sdme@sun.com 
130310958Sdme@sun.com 	atomic_add_32(&xnbp->xnb_tx_buf_count, 1);
130410958Sdme@sun.com 	xnbp->xnb_tx_buf_outstanding++;
130510958Sdme@sun.com 
130610958Sdme@sun.com 	return (0);
130710958Sdme@sun.com 
130810958Sdme@sun.com failure_2:
130910958Sdme@sun.com 	ddi_dma_mem_free(&txp->xt_acc_handle);
13105084Sjohnlev 
131110958Sdme@sun.com failure_1:
131210958Sdme@sun.com 	ddi_dma_free_handle(&txp->xt_dma_handle);
131310958Sdme@sun.com 
131410958Sdme@sun.com failure:
131510958Sdme@sun.com 
131610958Sdme@sun.com 	return (-1);
131710958Sdme@sun.com }
13185741Smrj 
131910958Sdme@sun.com static void
xnb_txbuf_destructor(void * buf,void * arg)132010958Sdme@sun.com xnb_txbuf_destructor(void *buf, void *arg)
132110958Sdme@sun.com {
132210958Sdme@sun.com 	xnb_txbuf_t *txp = buf;
132310958Sdme@sun.com 	xnb_t *xnbp = arg;
132410958Sdme@sun.com 
132510958Sdme@sun.com 	(void) ddi_dma_unbind_handle(txp->xt_dma_handle);
132610958Sdme@sun.com 	ddi_dma_mem_free(&txp->xt_acc_handle);
132710958Sdme@sun.com 	ddi_dma_free_handle(&txp->xt_dma_handle);
132810958Sdme@sun.com 
132910958Sdme@sun.com 	atomic_add_32(&xnbp->xnb_tx_buf_count, -1);
13305084Sjohnlev }
13315084Sjohnlev 
13325741Smrj /*
133310958Sdme@sun.com  * Take packets from the peer and deliver them onward.
13345741Smrj  */
13355084Sjohnlev static mblk_t *
xnb_from_peer(xnb_t * xnbp)13367615SMax.Zhen@Sun.COM xnb_from_peer(xnb_t *xnbp)
13375084Sjohnlev {
13385084Sjohnlev 	RING_IDX start, end, loop;
133910958Sdme@sun.com 	gnttab_copy_t *cop;
13407615SMax.Zhen@Sun.COM 	xnb_txbuf_t **txpp;
13415084Sjohnlev 	netif_tx_request_t *txreq;
134210958Sdme@sun.com 	boolean_t work_to_do, need_notify = B_FALSE;
13435084Sjohnlev 	mblk_t *head, *tail;
134410958Sdme@sun.com 	int n_data_req, i;
13455084Sjohnlev 
134610958Sdme@sun.com 	ASSERT(MUTEX_HELD(&xnbp->xnb_tx_lock));
13475084Sjohnlev 
13485084Sjohnlev 	head = tail = NULL;
13495084Sjohnlev around:
13505084Sjohnlev 
13515741Smrj 	/* LINTED: constant in conditional context */
13525741Smrj 	RING_FINAL_CHECK_FOR_REQUESTS(&xnbp->xnb_tx_ring, work_to_do);
13535084Sjohnlev 	if (!work_to_do) {
13545084Sjohnlev finished:
135510958Sdme@sun.com 		xnb_tx_notify_peer(xnbp, need_notify);
135610958Sdme@sun.com 
13575084Sjohnlev 		return (head);
13585084Sjohnlev 	}
13595084Sjohnlev 
13605741Smrj 	start = xnbp->xnb_tx_ring.req_cons;
13615741Smrj 	end = xnbp->xnb_tx_ring.sring->req_prod;
13625084Sjohnlev 
13637676Sdme@sun.com 	if ((end - start) > NET_TX_RING_SIZE) {
13647676Sdme@sun.com 		/*
13657676Sdme@sun.com 		 * This usually indicates that the frontend driver is
13667676Sdme@sun.com 		 * misbehaving, as it's not possible to have more than
13677676Sdme@sun.com 		 * NET_TX_RING_SIZE ring elements in play at any one
13687676Sdme@sun.com 		 * time.
13697676Sdme@sun.com 		 *
13707676Sdme@sun.com 		 * We reset the ring pointers to the state declared by
13717676Sdme@sun.com 		 * the frontend and try to carry on.
13727676Sdme@sun.com 		 */
13737676Sdme@sun.com 		cmn_err(CE_WARN, "xnb_from_peer: domain %d tried to give us %u "
13747676Sdme@sun.com 		    "items in the ring, resetting and trying to recover.",
13757676Sdme@sun.com 		    xnbp->xnb_peer, (end - start));
13767676Sdme@sun.com 
13777676Sdme@sun.com 		/* LINTED: constant in conditional context */
13787676Sdme@sun.com 		BACK_RING_ATTACH(&xnbp->xnb_tx_ring,
13797676Sdme@sun.com 		    (netif_tx_sring_t *)xnbp->xnb_tx_ring_addr, PAGESIZE);
13807676Sdme@sun.com 
13817676Sdme@sun.com 		goto around;
13827676Sdme@sun.com 	}
13837676Sdme@sun.com 
138410958Sdme@sun.com 	loop = start;
138510958Sdme@sun.com 	cop = xnbp->xnb_tx_cop;
138610958Sdme@sun.com 	txpp = xnbp->xnb_tx_bufp;
138710958Sdme@sun.com 	n_data_req = 0;
138810958Sdme@sun.com 
138910958Sdme@sun.com 	while (loop < end) {
139011588Sdavid.edmondson@sun.com 		static const uint16_t acceptable_flags =
139111588Sdavid.edmondson@sun.com 		    NETTXF_csum_blank |
139211588Sdavid.edmondson@sun.com 		    NETTXF_data_validated |
139311588Sdavid.edmondson@sun.com 		    NETTXF_extra_info;
139411588Sdavid.edmondson@sun.com 		uint16_t unexpected_flags;
139511588Sdavid.edmondson@sun.com 
139610958Sdme@sun.com 		txreq = RING_GET_REQUEST(&xnbp->xnb_tx_ring, loop);
139710958Sdme@sun.com 
139811588Sdavid.edmondson@sun.com 		unexpected_flags = txreq->flags & ~acceptable_flags;
139911588Sdavid.edmondson@sun.com 		if (unexpected_flags != 0) {
140011588Sdavid.edmondson@sun.com 			/*
140111588Sdavid.edmondson@sun.com 			 * The peer used flag bits that we do not
140211588Sdavid.edmondson@sun.com 			 * recognize.
140311588Sdavid.edmondson@sun.com 			 */
140411588Sdavid.edmondson@sun.com 			cmn_err(CE_WARN, "xnb_from_peer: "
140511588Sdavid.edmondson@sun.com 			    "unexpected flag bits (0x%x) from peer "
140611588Sdavid.edmondson@sun.com 			    "in transmit request",
140711588Sdavid.edmondson@sun.com 			    unexpected_flags);
140811588Sdavid.edmondson@sun.com 			xnbp->xnb_stat_tx_unexpected_flags++;
140911588Sdavid.edmondson@sun.com 
141011588Sdavid.edmondson@sun.com 			/* Mark this entry as failed. */
141111588Sdavid.edmondson@sun.com 			xnb_tx_mark_complete(xnbp, txreq->id, NETIF_RSP_ERROR);
141211588Sdavid.edmondson@sun.com 			need_notify = B_TRUE;
141311588Sdavid.edmondson@sun.com 
141411588Sdavid.edmondson@sun.com 		} else if (txreq->flags & NETTXF_extra_info) {
141510958Sdme@sun.com 			struct netif_extra_info *erp;
141610958Sdme@sun.com 			boolean_t status;
141710958Sdme@sun.com 
141810958Sdme@sun.com 			loop++; /* Consume another slot in the ring. */
141910958Sdme@sun.com 			ASSERT(loop <= end);
142010958Sdme@sun.com 
142110958Sdme@sun.com 			erp = (struct netif_extra_info *)
142210958Sdme@sun.com 			    RING_GET_REQUEST(&xnbp->xnb_tx_ring, loop);
14235084Sjohnlev 
142410958Sdme@sun.com 			switch (erp->type) {
142510958Sdme@sun.com 			case XEN_NETIF_EXTRA_TYPE_MCAST_ADD:
142610958Sdme@sun.com 				ASSERT(xnbp->xnb_multicast_control);
142710958Sdme@sun.com 				status = xnbp->xnb_flavour->xf_mcast_add(xnbp,
142810958Sdme@sun.com 				    &erp->u.mcast.addr);
142910958Sdme@sun.com 				break;
143010958Sdme@sun.com 			case XEN_NETIF_EXTRA_TYPE_MCAST_DEL:
143110958Sdme@sun.com 				ASSERT(xnbp->xnb_multicast_control);
143210958Sdme@sun.com 				status = xnbp->xnb_flavour->xf_mcast_del(xnbp,
143310958Sdme@sun.com 				    &erp->u.mcast.addr);
143410958Sdme@sun.com 				break;
143510958Sdme@sun.com 			default:
143610958Sdme@sun.com 				status = B_FALSE;
143710958Sdme@sun.com 				cmn_err(CE_WARN, "xnb_from_peer: "
143810958Sdme@sun.com 				    "unknown extra type %d", erp->type);
143910958Sdme@sun.com 				break;
144010958Sdme@sun.com 			}
14415084Sjohnlev 
144210958Sdme@sun.com 			xnb_tx_mark_complete(xnbp, txreq->id,
144310958Sdme@sun.com 			    status ? NETIF_RSP_OKAY : NETIF_RSP_ERROR);
144410958Sdme@sun.com 			need_notify = B_TRUE;
144511588Sdavid.edmondson@sun.com 
144611588Sdavid.edmondson@sun.com 		} else if ((txreq->offset > PAGESIZE) ||
144711588Sdavid.edmondson@sun.com 		    (txreq->offset + txreq->size > PAGESIZE)) {
144811588Sdavid.edmondson@sun.com 			/*
144911588Sdavid.edmondson@sun.com 			 * Peer attempted to refer to data beyond the
145011588Sdavid.edmondson@sun.com 			 * end of the granted page.
145111588Sdavid.edmondson@sun.com 			 */
145211588Sdavid.edmondson@sun.com 			cmn_err(CE_WARN, "xnb_from_peer: "
145311588Sdavid.edmondson@sun.com 			    "attempt to refer beyond the end of granted "
145411588Sdavid.edmondson@sun.com 			    "page in txreq (offset %d, size %d).",
145511588Sdavid.edmondson@sun.com 			    txreq->offset, txreq->size);
145611588Sdavid.edmondson@sun.com 			xnbp->xnb_stat_tx_overflow_page++;
145711588Sdavid.edmondson@sun.com 
145811588Sdavid.edmondson@sun.com 			/* Mark this entry as failed. */
145911588Sdavid.edmondson@sun.com 			xnb_tx_mark_complete(xnbp, txreq->id, NETIF_RSP_ERROR);
146011588Sdavid.edmondson@sun.com 			need_notify = B_TRUE;
146111588Sdavid.edmondson@sun.com 
146210958Sdme@sun.com 		} else {
146310958Sdme@sun.com 			xnb_txbuf_t *txp;
146410958Sdme@sun.com 
146510958Sdme@sun.com 			txp = kmem_cache_alloc(xnbp->xnb_tx_buf_cache,
146610958Sdme@sun.com 			    KM_NOSLEEP);
146710958Sdme@sun.com 			if (txp == NULL)
146810958Sdme@sun.com 				break;
146910958Sdme@sun.com 
147010958Sdme@sun.com 			txp->xt_mblk = desballoc((unsigned char *)txp->xt_buf,
147110958Sdme@sun.com 			    txp->xt_buflen, 0, &txp->xt_free_rtn);
147210958Sdme@sun.com 			if (txp->xt_mblk == NULL) {
147310958Sdme@sun.com 				kmem_cache_free(xnbp->xnb_tx_buf_cache, txp);
147410958Sdme@sun.com 				break;
147510958Sdme@sun.com 			}
147610958Sdme@sun.com 
147710958Sdme@sun.com 			txp->xt_idx = loop;
147810958Sdme@sun.com 			txp->xt_id = txreq->id;
14795084Sjohnlev 
148010958Sdme@sun.com 			cop->source.u.ref = txreq->gref;
148110958Sdme@sun.com 			cop->source.domid = xnbp->xnb_peer;
148210958Sdme@sun.com 			cop->source.offset = txreq->offset;
148310958Sdme@sun.com 
148410958Sdme@sun.com 			cop->dest.u.gmfn = txp->xt_mfn;
148510958Sdme@sun.com 			cop->dest.domid = DOMID_SELF;
148610958Sdme@sun.com 			cop->dest.offset = 0;
14875084Sjohnlev 
148810958Sdme@sun.com 			cop->len = txreq->size;
148910958Sdme@sun.com 			cop->flags = GNTCOPY_source_gref;
149010958Sdme@sun.com 			cop->status = 0;
149110958Sdme@sun.com 
149210958Sdme@sun.com 			*txpp = txp;
149310958Sdme@sun.com 
149410958Sdme@sun.com 			txpp++;
149510958Sdme@sun.com 			cop++;
149610958Sdme@sun.com 			n_data_req++;
149710958Sdme@sun.com 
149810958Sdme@sun.com 			ASSERT(n_data_req <= NET_TX_RING_SIZE);
149910958Sdme@sun.com 		}
150010958Sdme@sun.com 
150110958Sdme@sun.com 		loop++;
15025084Sjohnlev 	}
15035084Sjohnlev 
150410958Sdme@sun.com 	xnbp->xnb_tx_ring.req_cons = loop;
15055084Sjohnlev 
150610958Sdme@sun.com 	if (n_data_req == 0)
150710958Sdme@sun.com 		goto around;
15085084Sjohnlev 
150910958Sdme@sun.com 	if (HYPERVISOR_grant_table_op(GNTTABOP_copy,
151010958Sdme@sun.com 	    xnbp->xnb_tx_cop, n_data_req) != 0) {
15115084Sjohnlev 
151210958Sdme@sun.com 		cmn_err(CE_WARN, "xnb_from_peer: copy operation failed");
15135084Sjohnlev 
151410958Sdme@sun.com 		txpp = xnbp->xnb_tx_bufp;
151510958Sdme@sun.com 		i = n_data_req;
151610958Sdme@sun.com 		while (i > 0) {
151710958Sdme@sun.com 			kmem_cache_free(xnbp->xnb_tx_buf_cache, *txpp);
15187615SMax.Zhen@Sun.COM 			txpp++;
151910958Sdme@sun.com 			i--;
15205084Sjohnlev 		}
15215084Sjohnlev 
15225084Sjohnlev 		goto finished;
15235084Sjohnlev 	}
15245084Sjohnlev 
152510958Sdme@sun.com 	txpp = xnbp->xnb_tx_bufp;
152610958Sdme@sun.com 	cop = xnbp->xnb_tx_cop;
152710958Sdme@sun.com 	i = n_data_req;
152810958Sdme@sun.com 
152910958Sdme@sun.com 	while (i > 0) {
15307615SMax.Zhen@Sun.COM 		xnb_txbuf_t *txp = *txpp;
15315084Sjohnlev 
153210958Sdme@sun.com 		txreq = RING_GET_REQUEST(&xnbp->xnb_tx_ring, txp->xt_idx);
15335084Sjohnlev 
153410958Sdme@sun.com 		if (cop->status != 0) {
153510958Sdme@sun.com #ifdef XNB_DEBUG
153610958Sdme@sun.com 			cmn_err(CE_WARN, "xnb_from_peer: "
153710958Sdme@sun.com 			    "txpp 0x%p failed (%d)",
153810958Sdme@sun.com 			    (void *)*txpp, cop->status);
153910958Sdme@sun.com #endif /* XNB_DEBUG */
154011588Sdavid.edmondson@sun.com 			xnb_tx_mark_complete(xnbp, txp->xt_id, NETIF_RSP_ERROR);
154110958Sdme@sun.com 			freemsg(txp->xt_mblk);
154210958Sdme@sun.com 		} else {
154310958Sdme@sun.com 			mblk_t *mp;
15445084Sjohnlev 
154510958Sdme@sun.com 			mp = txp->xt_mblk;
154610958Sdme@sun.com 			mp->b_rptr = mp->b_wptr = (unsigned char *)txp->xt_buf;
154710958Sdme@sun.com 			mp->b_wptr += txreq->size;
154810958Sdme@sun.com 			mp->b_next = NULL;
15495084Sjohnlev 
15505084Sjohnlev 			/*
155110958Sdme@sun.com 			 * If there are checksum flags, process them
155210958Sdme@sun.com 			 * appropriately.
15535084Sjohnlev 			 */
155410958Sdme@sun.com 			if ((txreq->flags &
15555084Sjohnlev 			    (NETTXF_csum_blank | NETTXF_data_validated))
155610958Sdme@sun.com 			    != 0) {
15575741Smrj 				mp = xnbp->xnb_flavour->xf_cksum_from_peer(xnbp,
15585084Sjohnlev 				    mp, txreq->flags);
15597615SMax.Zhen@Sun.COM 				xnbp->xnb_stat_tx_cksum_no_need++;
15605084Sjohnlev 
156110958Sdme@sun.com 				txp->xt_mblk = mp;
156210958Sdme@sun.com 			}
15635084Sjohnlev 
15645084Sjohnlev 			if (head == NULL) {
15655084Sjohnlev 				ASSERT(tail == NULL);
15665084Sjohnlev 				head = mp;
15675084Sjohnlev 			} else {
15685084Sjohnlev 				ASSERT(tail != NULL);
15695084Sjohnlev 				tail->b_next = mp;
15705084Sjohnlev 			}
15715084Sjohnlev 			tail = mp;
157210958Sdme@sun.com 
157310958Sdme@sun.com 			xnbp->xnb_stat_opackets++;
157410958Sdme@sun.com 			xnbp->xnb_stat_obytes += txreq->size;
157510958Sdme@sun.com 
157611588Sdavid.edmondson@sun.com 			xnb_tx_mark_complete(xnbp, txp->xt_id, NETIF_RSP_OKAY);
15775084Sjohnlev 		}
157810958Sdme@sun.com 
157910958Sdme@sun.com 		txpp++;
158010958Sdme@sun.com 		cop++;
158110958Sdme@sun.com 		i--;
15825084Sjohnlev 	}
15835084Sjohnlev 
15845084Sjohnlev 	goto around;
15855084Sjohnlev 	/* NOTREACHED */
15865084Sjohnlev }
15875084Sjohnlev 
15885084Sjohnlev static uint_t
xnb_intr(caddr_t arg)15895084Sjohnlev xnb_intr(caddr_t arg)
15905084Sjohnlev {
15915084Sjohnlev 	xnb_t *xnbp = (xnb_t *)arg;
15925084Sjohnlev 	mblk_t *mp;
15935084Sjohnlev 
15945741Smrj 	xnbp->xnb_stat_intr++;
15955084Sjohnlev 
15967615SMax.Zhen@Sun.COM 	mutex_enter(&xnbp->xnb_tx_lock);
15975084Sjohnlev 
15985741Smrj 	ASSERT(xnbp->xnb_connected);
15995084Sjohnlev 
16007615SMax.Zhen@Sun.COM 	mp = xnb_from_peer(xnbp);
16015084Sjohnlev 
16027615SMax.Zhen@Sun.COM 	mutex_exit(&xnbp->xnb_tx_lock);
16035084Sjohnlev 
16045741Smrj 	if (!xnbp->xnb_hotplugged) {
16057615SMax.Zhen@Sun.COM 		xnbp->xnb_stat_tx_too_early++;
16065084Sjohnlev 		goto fail;
16075084Sjohnlev 	}
16085084Sjohnlev 	if (mp == NULL) {
16095741Smrj 		xnbp->xnb_stat_spurious_intr++;
16105084Sjohnlev 		goto fail;
16115084Sjohnlev 	}
16125084Sjohnlev 
16137615SMax.Zhen@Sun.COM 	xnbp->xnb_flavour->xf_from_peer(xnbp, mp);
16145084Sjohnlev 
16155084Sjohnlev 	return (DDI_INTR_CLAIMED);
16165084Sjohnlev 
16175084Sjohnlev fail:
16185084Sjohnlev 	freemsgchain(mp);
16195084Sjohnlev 	return (DDI_INTR_CLAIMED);
16205084Sjohnlev }
16215084Sjohnlev 
162210958Sdme@sun.com /*
162310958Sdme@sun.com  * Read our configuration from xenstore.
162410958Sdme@sun.com  */
162510958Sdme@sun.com boolean_t
xnb_read_xs_config(xnb_t * xnbp)162610958Sdme@sun.com xnb_read_xs_config(xnb_t *xnbp)
162710958Sdme@sun.com {
162810958Sdme@sun.com 	char *xsname;
162910958Sdme@sun.com 	char mac[ETHERADDRL * 3];
163010958Sdme@sun.com 
163110958Sdme@sun.com 	xsname = xvdi_get_xsname(xnbp->xnb_devinfo);
163210958Sdme@sun.com 
163310958Sdme@sun.com 	if (xenbus_scanf(XBT_NULL, xsname,
163410958Sdme@sun.com 	    "mac", "%s", mac) != 0) {
163510958Sdme@sun.com 		cmn_err(CE_WARN, "xnb_attach: "
163610958Sdme@sun.com 		    "cannot read mac address from %s",
163710958Sdme@sun.com 		    xsname);
163810958Sdme@sun.com 		return (B_FALSE);
163910958Sdme@sun.com 	}
164010958Sdme@sun.com 
164110958Sdme@sun.com 	if (ether_aton(mac, xnbp->xnb_mac_addr) != ETHERADDRL) {
164210958Sdme@sun.com 		cmn_err(CE_WARN,
164310958Sdme@sun.com 		    "xnb_attach: cannot parse mac address %s",
164410958Sdme@sun.com 		    mac);
164510958Sdme@sun.com 		return (B_FALSE);
164610958Sdme@sun.com 	}
164710958Sdme@sun.com 
164810958Sdme@sun.com 	return (B_TRUE);
164910958Sdme@sun.com }
165010958Sdme@sun.com 
165110958Sdme@sun.com /*
165210958Sdme@sun.com  * Read the configuration of the peer from xenstore.
165310958Sdme@sun.com  */
165410958Sdme@sun.com boolean_t
xnb_read_oe_config(xnb_t * xnbp)165510958Sdme@sun.com xnb_read_oe_config(xnb_t *xnbp)
165610958Sdme@sun.com {
165710958Sdme@sun.com 	char *oename;
165810958Sdme@sun.com 	int i;
165910958Sdme@sun.com 
166010958Sdme@sun.com 	oename = xvdi_get_oename(xnbp->xnb_devinfo);
166110958Sdme@sun.com 
166210958Sdme@sun.com 	if (xenbus_gather(XBT_NULL, oename,
166310958Sdme@sun.com 	    "event-channel", "%u", &xnbp->xnb_fe_evtchn,
166410958Sdme@sun.com 	    "tx-ring-ref", "%lu", &xnbp->xnb_tx_ring_ref,
166510958Sdme@sun.com 	    "rx-ring-ref", "%lu", &xnbp->xnb_rx_ring_ref,
166610958Sdme@sun.com 	    NULL) != 0) {
166710958Sdme@sun.com 		cmn_err(CE_WARN, "xnb_read_oe_config: "
166810958Sdme@sun.com 		    "cannot read other-end details from %s",
166910958Sdme@sun.com 		    oename);
167010958Sdme@sun.com 		return (B_FALSE);
167110958Sdme@sun.com 	}
167210958Sdme@sun.com 
167310958Sdme@sun.com 	/*
167410958Sdme@sun.com 	 * Check whether our peer requests receive side hypervisor
167510958Sdme@sun.com 	 * copy.
167610958Sdme@sun.com 	 */
167710958Sdme@sun.com 	if (xenbus_scanf(XBT_NULL, oename,
167810958Sdme@sun.com 	    "request-rx-copy", "%d", &i) != 0)
167910958Sdme@sun.com 		i = 0;
168010958Sdme@sun.com 	if (i != 0)
168110958Sdme@sun.com 		xnbp->xnb_rx_hv_copy = B_TRUE;
168210958Sdme@sun.com 
168310958Sdme@sun.com 	/*
168410958Sdme@sun.com 	 * Check whether our peer requests multicast_control.
168510958Sdme@sun.com 	 */
168610958Sdme@sun.com 	if (xenbus_scanf(XBT_NULL, oename,
168710958Sdme@sun.com 	    "request-multicast-control", "%d", &i) != 0)
168810958Sdme@sun.com 		i = 0;
168910958Sdme@sun.com 	if (i != 0)
169010958Sdme@sun.com 		xnbp->xnb_multicast_control = B_TRUE;
169110958Sdme@sun.com 
169210958Sdme@sun.com 	/*
169310958Sdme@sun.com 	 * The Linux backend driver here checks to see if the peer has
169410958Sdme@sun.com 	 * set 'feature-no-csum-offload'. This is used to indicate
169510958Sdme@sun.com 	 * that the guest cannot handle receiving packets without a
169610958Sdme@sun.com 	 * valid checksum. We don't check here, because packets passed
169710958Sdme@sun.com 	 * to the peer _always_ have a valid checksum.
169810958Sdme@sun.com 	 *
169910958Sdme@sun.com 	 * There are three cases:
170010958Sdme@sun.com 	 *
170110958Sdme@sun.com 	 * - the NIC is dedicated: packets from the wire should always
170210958Sdme@sun.com 	 *   have a valid checksum. If the hardware validates the
170310958Sdme@sun.com 	 *   checksum then the relevant bit will be set in the packet
170410958Sdme@sun.com 	 *   attributes and we will inform the peer. It can choose to
170510958Sdme@sun.com 	 *   ignore the hardware verification.
170610958Sdme@sun.com 	 *
170710958Sdme@sun.com 	 * - the NIC is shared (VNIC) and a packet originates from the
170810958Sdme@sun.com 	 *   wire: this is the same as the case above - the packets
170910958Sdme@sun.com 	 *   will have a valid checksum.
171010958Sdme@sun.com 	 *
171110958Sdme@sun.com 	 * - the NIC is shared (VNIC) and a packet originates from the
171210958Sdme@sun.com 	 *   host: the MAC layer ensures that all such packets have a
171310958Sdme@sun.com 	 *   valid checksum by calculating one if the stack did not.
171410958Sdme@sun.com 	 */
171510958Sdme@sun.com 
171610958Sdme@sun.com 	return (B_TRUE);
171710958Sdme@sun.com }
171810958Sdme@sun.com 
171910958Sdme@sun.com void
xnb_start_connect(xnb_t * xnbp)172010958Sdme@sun.com xnb_start_connect(xnb_t *xnbp)
172110958Sdme@sun.com {
172210958Sdme@sun.com 	dev_info_t  *dip = xnbp->xnb_devinfo;
172310958Sdme@sun.com 
172410958Sdme@sun.com 	if (!xnb_connect_rings(dip)) {
172510958Sdme@sun.com 		cmn_err(CE_WARN, "xnb_start_connect: "
172610958Sdme@sun.com 		    "cannot connect rings");
172710958Sdme@sun.com 		goto failed;
172810958Sdme@sun.com 	}
172910958Sdme@sun.com 
173010958Sdme@sun.com 	if (!xnbp->xnb_flavour->xf_start_connect(xnbp)) {
173110958Sdme@sun.com 		cmn_err(CE_WARN, "xnb_start_connect: "
173210958Sdme@sun.com 		    "flavour failed to connect");
173310958Sdme@sun.com 		goto failed;
173410958Sdme@sun.com 	}
173510958Sdme@sun.com 
173610958Sdme@sun.com 	(void) xvdi_switch_state(dip, XBT_NULL, XenbusStateConnected);
173710958Sdme@sun.com 	return;
173810958Sdme@sun.com 
173910958Sdme@sun.com failed:
174010958Sdme@sun.com 	xnbp->xnb_flavour->xf_peer_disconnected(xnbp);
174110958Sdme@sun.com 	xnb_disconnect_rings(dip);
174210958Sdme@sun.com 	(void) xvdi_switch_state(dip, XBT_NULL,
174310958Sdme@sun.com 	    XenbusStateClosed);
174410958Sdme@sun.com 	(void) xvdi_post_event(dip, XEN_HP_REMOVE);
174510958Sdme@sun.com }
174610958Sdme@sun.com 
17475084Sjohnlev static boolean_t
xnb_connect_rings(dev_info_t * dip)17485084Sjohnlev xnb_connect_rings(dev_info_t *dip)
17495084Sjohnlev {
17505084Sjohnlev 	xnb_t *xnbp = ddi_get_driver_private(dip);
17515084Sjohnlev 	struct gnttab_map_grant_ref map_op;
17525084Sjohnlev 
17535084Sjohnlev 	/*
17545084Sjohnlev 	 * Cannot attempt to connect the rings if already connected.
17555084Sjohnlev 	 */
17565741Smrj 	ASSERT(!xnbp->xnb_connected);
17575084Sjohnlev 
17585084Sjohnlev 	/*
17595084Sjohnlev 	 * 1. allocate a vaddr for the tx page, one for the rx page.
17605084Sjohnlev 	 * 2. call GNTTABOP_map_grant_ref to map the relevant pages
17615084Sjohnlev 	 *    into the allocated vaddr (one for tx, one for rx).
17625084Sjohnlev 	 * 3. call EVTCHNOP_bind_interdomain to have the event channel
17635084Sjohnlev 	 *    bound to this domain.
17645084Sjohnlev 	 * 4. associate the event channel with an interrupt.
176510958Sdme@sun.com 	 * 5. enable the interrupt.
17665084Sjohnlev 	 */
17675084Sjohnlev 
17685084Sjohnlev 	/* 1.tx */
17695741Smrj 	xnbp->xnb_tx_ring_addr = vmem_xalloc(heap_arena, PAGESIZE, PAGESIZE,
17705084Sjohnlev 	    0, 0, 0, 0, VM_SLEEP);
17715741Smrj 	ASSERT(xnbp->xnb_tx_ring_addr != NULL);
17725084Sjohnlev 
17735084Sjohnlev 	/* 2.tx */
17745741Smrj 	map_op.host_addr = (uint64_t)((long)xnbp->xnb_tx_ring_addr);
17755084Sjohnlev 	map_op.flags = GNTMAP_host_map;
17765741Smrj 	map_op.ref = xnbp->xnb_tx_ring_ref;
17775741Smrj 	map_op.dom = xnbp->xnb_peer;
17787756SMark.Johnson@Sun.COM 	hat_prepare_mapping(kas.a_hat, xnbp->xnb_tx_ring_addr, NULL);
17797756SMark.Johnson@Sun.COM 	if (xen_map_gref(GNTTABOP_map_grant_ref, &map_op, 1, B_FALSE) != 0 ||
17807756SMark.Johnson@Sun.COM 	    map_op.status != 0) {
17815084Sjohnlev 		cmn_err(CE_WARN, "xnb_connect_rings: cannot map tx-ring page.");
17825084Sjohnlev 		goto fail;
17835084Sjohnlev 	}
17845741Smrj 	xnbp->xnb_tx_ring_handle = map_op.handle;
17855084Sjohnlev 
17865741Smrj 	/* LINTED: constant in conditional context */
17875741Smrj 	BACK_RING_INIT(&xnbp->xnb_tx_ring,
17885741Smrj 	    (netif_tx_sring_t *)xnbp->xnb_tx_ring_addr, PAGESIZE);
17895084Sjohnlev 
17905084Sjohnlev 	/* 1.rx */
17915741Smrj 	xnbp->xnb_rx_ring_addr = vmem_xalloc(heap_arena, PAGESIZE, PAGESIZE,
17925084Sjohnlev 	    0, 0, 0, 0, VM_SLEEP);
17935741Smrj 	ASSERT(xnbp->xnb_rx_ring_addr != NULL);
17945084Sjohnlev 
17955084Sjohnlev 	/* 2.rx */
17965741Smrj 	map_op.host_addr = (uint64_t)((long)xnbp->xnb_rx_ring_addr);
17975084Sjohnlev 	map_op.flags = GNTMAP_host_map;
17985741Smrj 	map_op.ref = xnbp->xnb_rx_ring_ref;
17995741Smrj 	map_op.dom = xnbp->xnb_peer;
18007756SMark.Johnson@Sun.COM 	hat_prepare_mapping(kas.a_hat, xnbp->xnb_rx_ring_addr, NULL);
18017756SMark.Johnson@Sun.COM 	if (xen_map_gref(GNTTABOP_map_grant_ref, &map_op, 1, B_FALSE) != 0 ||
18027756SMark.Johnson@Sun.COM 	    map_op.status != 0) {
18035084Sjohnlev 		cmn_err(CE_WARN, "xnb_connect_rings: cannot map rx-ring page.");
18045084Sjohnlev 		goto fail;
18055084Sjohnlev 	}
18065741Smrj 	xnbp->xnb_rx_ring_handle = map_op.handle;
18075084Sjohnlev 
18085741Smrj 	/* LINTED: constant in conditional context */
18095741Smrj 	BACK_RING_INIT(&xnbp->xnb_rx_ring,
18105741Smrj 	    (netif_rx_sring_t *)xnbp->xnb_rx_ring_addr, PAGESIZE);
18115084Sjohnlev 
18125084Sjohnlev 	/* 3 */
181310958Sdme@sun.com 	if (xvdi_bind_evtchn(dip, xnbp->xnb_fe_evtchn) != DDI_SUCCESS) {
18145084Sjohnlev 		cmn_err(CE_WARN, "xnb_connect_rings: "
18155741Smrj 		    "cannot bind event channel %d", xnbp->xnb_evtchn);
18165741Smrj 		xnbp->xnb_evtchn = INVALID_EVTCHN;
18175084Sjohnlev 		goto fail;
18185084Sjohnlev 	}
18195741Smrj 	xnbp->xnb_evtchn = xvdi_get_evtchn(dip);
18205084Sjohnlev 
18215084Sjohnlev 	/*
18225084Sjohnlev 	 * It would be good to set the state to XenbusStateConnected
18235084Sjohnlev 	 * here as well, but then what if ddi_add_intr() failed?
18245084Sjohnlev 	 * Changing the state in the store will be noticed by the peer
18255084Sjohnlev 	 * and cannot be "taken back".
18265084Sjohnlev 	 */
18275741Smrj 	mutex_enter(&xnbp->xnb_tx_lock);
18285741Smrj 	mutex_enter(&xnbp->xnb_rx_lock);
18295084Sjohnlev 
18305741Smrj 	xnbp->xnb_connected = B_TRUE;
18315084Sjohnlev 
18325741Smrj 	mutex_exit(&xnbp->xnb_rx_lock);
18335741Smrj 	mutex_exit(&xnbp->xnb_tx_lock);
18345084Sjohnlev 
183510958Sdme@sun.com 	/* 4, 5 */
18365084Sjohnlev 	if (ddi_add_intr(dip, 0, NULL, NULL, xnb_intr, (caddr_t)xnbp)
18375084Sjohnlev 	    != DDI_SUCCESS) {
18385084Sjohnlev 		cmn_err(CE_WARN, "xnb_connect_rings: cannot add interrupt");
18395084Sjohnlev 		goto fail;
18405084Sjohnlev 	}
18415741Smrj 	xnbp->xnb_irq = B_TRUE;
18425084Sjohnlev 
18435084Sjohnlev 	return (B_TRUE);
18445084Sjohnlev 
18455084Sjohnlev fail:
18465741Smrj 	mutex_enter(&xnbp->xnb_tx_lock);
18475741Smrj 	mutex_enter(&xnbp->xnb_rx_lock);
18485084Sjohnlev 
18495741Smrj 	xnbp->xnb_connected = B_FALSE;
185010958Sdme@sun.com 
18515741Smrj 	mutex_exit(&xnbp->xnb_rx_lock);
18525741Smrj 	mutex_exit(&xnbp->xnb_tx_lock);
18535084Sjohnlev 
18545084Sjohnlev 	return (B_FALSE);
18555084Sjohnlev }
18565084Sjohnlev 
18575084Sjohnlev static void
xnb_disconnect_rings(dev_info_t * dip)18585084Sjohnlev xnb_disconnect_rings(dev_info_t *dip)
18595084Sjohnlev {
18605084Sjohnlev 	xnb_t *xnbp = ddi_get_driver_private(dip);
18615084Sjohnlev 
18625741Smrj 	if (xnbp->xnb_irq) {
18635084Sjohnlev 		ddi_remove_intr(dip, 0, NULL);
18645741Smrj 		xnbp->xnb_irq = B_FALSE;
18655084Sjohnlev 	}
18665084Sjohnlev 
18675741Smrj 	if (xnbp->xnb_evtchn != INVALID_EVTCHN) {
18685084Sjohnlev 		xvdi_free_evtchn(dip);
18695741Smrj 		xnbp->xnb_evtchn = INVALID_EVTCHN;
18705084Sjohnlev 	}
18715084Sjohnlev 
18725741Smrj 	if (xnbp->xnb_rx_ring_handle != INVALID_GRANT_HANDLE) {
18735084Sjohnlev 		struct gnttab_unmap_grant_ref unmap_op;
18745084Sjohnlev 
18755741Smrj 		unmap_op.host_addr = (uint64_t)(uintptr_t)
18765741Smrj 		    xnbp->xnb_rx_ring_addr;
18775084Sjohnlev 		unmap_op.dev_bus_addr = 0;
18785741Smrj 		unmap_op.handle = xnbp->xnb_rx_ring_handle;
18795084Sjohnlev 		if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref,
18805084Sjohnlev 		    &unmap_op, 1) != 0)
18815084Sjohnlev 			cmn_err(CE_WARN, "xnb_disconnect_rings: "
18825084Sjohnlev 			    "cannot unmap rx-ring page (%d)",
18835084Sjohnlev 			    unmap_op.status);
18845084Sjohnlev 
18855741Smrj 		xnbp->xnb_rx_ring_handle = INVALID_GRANT_HANDLE;
18865084Sjohnlev 	}
18875084Sjohnlev 
18885741Smrj 	if (xnbp->xnb_rx_ring_addr != NULL) {
18895741Smrj 		hat_release_mapping(kas.a_hat, xnbp->xnb_rx_ring_addr);
18905741Smrj 		vmem_free(heap_arena, xnbp->xnb_rx_ring_addr, PAGESIZE);
18915741Smrj 		xnbp->xnb_rx_ring_addr = NULL;
18925084Sjohnlev 	}
18935084Sjohnlev 
18945741Smrj 	if (xnbp->xnb_tx_ring_handle != INVALID_GRANT_HANDLE) {
18955084Sjohnlev 		struct gnttab_unmap_grant_ref unmap_op;
18965084Sjohnlev 
18975741Smrj 		unmap_op.host_addr = (uint64_t)(uintptr_t)
18985741Smrj 		    xnbp->xnb_tx_ring_addr;
18995084Sjohnlev 		unmap_op.dev_bus_addr = 0;
19005741Smrj 		unmap_op.handle = xnbp->xnb_tx_ring_handle;
19015084Sjohnlev 		if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref,
19025084Sjohnlev 		    &unmap_op, 1) != 0)
19035084Sjohnlev 			cmn_err(CE_WARN, "xnb_disconnect_rings: "
19045084Sjohnlev 			    "cannot unmap tx-ring page (%d)",
19055084Sjohnlev 			    unmap_op.status);
19065084Sjohnlev 
19075741Smrj 		xnbp->xnb_tx_ring_handle = INVALID_GRANT_HANDLE;
19085084Sjohnlev 	}
19095084Sjohnlev 
19105741Smrj 	if (xnbp->xnb_tx_ring_addr != NULL) {
19115741Smrj 		hat_release_mapping(kas.a_hat, xnbp->xnb_tx_ring_addr);
19125741Smrj 		vmem_free(heap_arena, xnbp->xnb_tx_ring_addr, PAGESIZE);
19135741Smrj 		xnbp->xnb_tx_ring_addr = NULL;
19145084Sjohnlev 	}
19155084Sjohnlev }
19165084Sjohnlev 
19175084Sjohnlev static void
xnb_oe_state_change(dev_info_t * dip,ddi_eventcookie_t id,void * arg,void * impl_data)19185084Sjohnlev xnb_oe_state_change(dev_info_t *dip, ddi_eventcookie_t id,
19195084Sjohnlev     void *arg, void *impl_data)
19205084Sjohnlev {
192110958Sdme@sun.com 	_NOTE(ARGUNUSED(id, arg));
19225084Sjohnlev 	xnb_t *xnbp = ddi_get_driver_private(dip);
19235084Sjohnlev 	XenbusState new_state = *(XenbusState *)impl_data;
19245084Sjohnlev 
19255084Sjohnlev 	ASSERT(xnbp != NULL);
19265084Sjohnlev 
19275084Sjohnlev 	switch (new_state) {
19285084Sjohnlev 	case XenbusStateConnected:
19297005Scz147101 		/* spurious state change */
19307005Scz147101 		if (xnbp->xnb_connected)
19317005Scz147101 			return;
19327005Scz147101 
193310958Sdme@sun.com 		if (!xnb_read_oe_config(xnbp) ||
193410958Sdme@sun.com 		    !xnbp->xnb_flavour->xf_peer_connected(xnbp)) {
193510958Sdme@sun.com 			cmn_err(CE_WARN, "xnb_oe_state_change: "
193610958Sdme@sun.com 			    "read otherend config error");
19375084Sjohnlev 			(void) xvdi_switch_state(dip, XBT_NULL,
19385084Sjohnlev 			    XenbusStateClosed);
19395084Sjohnlev 			(void) xvdi_post_event(dip, XEN_HP_REMOVE);
194010958Sdme@sun.com 
194110958Sdme@sun.com 			break;
19425084Sjohnlev 		}
19435084Sjohnlev 
194410958Sdme@sun.com 
194510958Sdme@sun.com 		mutex_enter(&xnbp->xnb_state_lock);
194610958Sdme@sun.com 		xnbp->xnb_fe_status = XNB_STATE_READY;
194710958Sdme@sun.com 		if (xnbp->xnb_be_status == XNB_STATE_READY)
194810958Sdme@sun.com 			xnb_start_connect(xnbp);
194910958Sdme@sun.com 		mutex_exit(&xnbp->xnb_state_lock);
195010958Sdme@sun.com 
19515084Sjohnlev 		/*
19525084Sjohnlev 		 * Now that we've attempted to connect it's reasonable
19535084Sjohnlev 		 * to allow an attempt to detach.
19545084Sjohnlev 		 */
19555741Smrj 		xnbp->xnb_detachable = B_TRUE;
19565084Sjohnlev 
19575084Sjohnlev 		break;
19585084Sjohnlev 
19595084Sjohnlev 	case XenbusStateClosing:
19605084Sjohnlev 		(void) xvdi_switch_state(dip, XBT_NULL, XenbusStateClosing);
19615084Sjohnlev 
19625084Sjohnlev 		break;
19635084Sjohnlev 
19645084Sjohnlev 	case XenbusStateClosed:
19655741Smrj 		xnbp->xnb_flavour->xf_peer_disconnected(xnbp);
19665084Sjohnlev 
19675741Smrj 		mutex_enter(&xnbp->xnb_tx_lock);
19685741Smrj 		mutex_enter(&xnbp->xnb_rx_lock);
19695084Sjohnlev 
19705084Sjohnlev 		xnb_disconnect_rings(dip);
19715741Smrj 		xnbp->xnb_connected = B_FALSE;
19725084Sjohnlev 
19735741Smrj 		mutex_exit(&xnbp->xnb_rx_lock);
19745741Smrj 		mutex_exit(&xnbp->xnb_tx_lock);
19755084Sjohnlev 
19765084Sjohnlev 		(void) xvdi_switch_state(dip, XBT_NULL, XenbusStateClosed);
19775084Sjohnlev 		(void) xvdi_post_event(dip, XEN_HP_REMOVE);
19785084Sjohnlev 		/*
19795084Sjohnlev 		 * In all likelyhood this is already set (in the above
19805084Sjohnlev 		 * case), but if the peer never attempted to connect
19815084Sjohnlev 		 * and the domain is destroyed we get here without
19825084Sjohnlev 		 * having been through the case above, so we set it to
19835084Sjohnlev 		 * be sure.
19845084Sjohnlev 		 */
19855741Smrj 		xnbp->xnb_detachable = B_TRUE;
19865084Sjohnlev 
19875084Sjohnlev 		break;
19885084Sjohnlev 
19895084Sjohnlev 	default:
19905084Sjohnlev 		break;
19915084Sjohnlev 	}
19925084Sjohnlev }
19935084Sjohnlev 
19945084Sjohnlev static void
xnb_hp_state_change(dev_info_t * dip,ddi_eventcookie_t id,void * arg,void * impl_data)19955084Sjohnlev xnb_hp_state_change(dev_info_t *dip, ddi_eventcookie_t id,
19965084Sjohnlev     void *arg, void *impl_data)
19975084Sjohnlev {
199810958Sdme@sun.com 	_NOTE(ARGUNUSED(id, arg));
19995084Sjohnlev 	xnb_t *xnbp = ddi_get_driver_private(dip);
20005084Sjohnlev 	xendev_hotplug_state_t state = *(xendev_hotplug_state_t *)impl_data;
20015084Sjohnlev 
20025084Sjohnlev 	ASSERT(xnbp != NULL);
20035084Sjohnlev 
20045084Sjohnlev 	switch (state) {
20055084Sjohnlev 	case Connected:
20067005Scz147101 		/* spurious hotplug event */
20077005Scz147101 		if (xnbp->xnb_hotplugged)
200810958Sdme@sun.com 			break;
20097005Scz147101 
201010958Sdme@sun.com 		if (!xnb_read_xs_config(xnbp))
201110958Sdme@sun.com 			break;
201210958Sdme@sun.com 
201310958Sdme@sun.com 		if (!xnbp->xnb_flavour->xf_hotplug_connected(xnbp))
201410958Sdme@sun.com 			break;
20155084Sjohnlev 
20165741Smrj 		mutex_enter(&xnbp->xnb_tx_lock);
20175741Smrj 		mutex_enter(&xnbp->xnb_rx_lock);
20185084Sjohnlev 
201910958Sdme@sun.com 		xnbp->xnb_hotplugged = B_TRUE;
20205084Sjohnlev 
20215741Smrj 		mutex_exit(&xnbp->xnb_rx_lock);
20225741Smrj 		mutex_exit(&xnbp->xnb_tx_lock);
202310958Sdme@sun.com 
202410958Sdme@sun.com 		mutex_enter(&xnbp->xnb_state_lock);
202510958Sdme@sun.com 		xnbp->xnb_be_status = XNB_STATE_READY;
202610958Sdme@sun.com 		if (xnbp->xnb_fe_status == XNB_STATE_READY)
202710958Sdme@sun.com 			xnb_start_connect(xnbp);
202810958Sdme@sun.com 		mutex_exit(&xnbp->xnb_state_lock);
202910958Sdme@sun.com 
20305084Sjohnlev 		break;
20315084Sjohnlev 
20325084Sjohnlev 	default:
20335084Sjohnlev 		break;
20345084Sjohnlev 	}
20355084Sjohnlev }
20365084Sjohnlev 
20375084Sjohnlev static struct modldrv modldrv = {
20387351Sdme@sun.com 	&mod_miscops, "xnb",
20395084Sjohnlev };
20405084Sjohnlev 
20415084Sjohnlev static struct modlinkage modlinkage = {
20425084Sjohnlev 	MODREV_1, &modldrv, NULL
20435084Sjohnlev };
20445084Sjohnlev 
20455084Sjohnlev int
_init(void)20465084Sjohnlev _init(void)
20475084Sjohnlev {
20485084Sjohnlev 	int i;
20495084Sjohnlev 
20505084Sjohnlev 	mutex_init(&xnb_alloc_page_lock, NULL, MUTEX_DRIVER, NULL);
20515084Sjohnlev 
205210958Sdme@sun.com 	i = mod_install(&modlinkage);
205310958Sdme@sun.com 	if (i != DDI_SUCCESS)
205410958Sdme@sun.com 		mutex_destroy(&xnb_alloc_page_lock);
20555084Sjohnlev 
20565084Sjohnlev 	return (i);
20575084Sjohnlev }
20585084Sjohnlev 
20595084Sjohnlev int
_info(struct modinfo * modinfop)20605084Sjohnlev _info(struct modinfo *modinfop)
20615084Sjohnlev {
20625084Sjohnlev 	return (mod_info(&modlinkage, modinfop));
20635084Sjohnlev }
20645084Sjohnlev 
20655084Sjohnlev int
_fini(void)20665084Sjohnlev _fini(void)
20675084Sjohnlev {
20685084Sjohnlev 	int i;
20695084Sjohnlev 
20705084Sjohnlev 	i = mod_remove(&modlinkage);
207110958Sdme@sun.com 	if (i == DDI_SUCCESS)
20725084Sjohnlev 		mutex_destroy(&xnb_alloc_page_lock);
207310958Sdme@sun.com 
20745084Sjohnlev 	return (i);
20755084Sjohnlev }
2076