15084Sjohnlev /*
25084Sjohnlev * CDDL HEADER START
35084Sjohnlev *
45084Sjohnlev * The contents of this file are subject to the terms of the
55084Sjohnlev * Common Development and Distribution License (the "License").
65084Sjohnlev * You may not use this file except in compliance with the License.
75084Sjohnlev *
85084Sjohnlev * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
95084Sjohnlev * or http://www.opensolaris.org/os/licensing.
105084Sjohnlev * See the License for the specific language governing permissions
115084Sjohnlev * and limitations under the License.
125084Sjohnlev *
135084Sjohnlev * When distributing Covered Code, include this CDDL HEADER in each
145084Sjohnlev * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
155084Sjohnlev * If applicable, add the following below this CDDL HEADER, with the
165084Sjohnlev * fields enclosed by brackets "[]" replaced with your own identifying
175084Sjohnlev * information: Portions Copyright [yyyy] [name of copyright owner]
185084Sjohnlev *
195084Sjohnlev * CDDL HEADER END
205084Sjohnlev */
215084Sjohnlev
225084Sjohnlev /*
2311588Sdavid.edmondson@sun.com * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
245084Sjohnlev * Use is subject to license terms.
255084Sjohnlev */
265084Sjohnlev
275084Sjohnlev #ifdef DEBUG
285084Sjohnlev #define XNB_DEBUG 1
295084Sjohnlev #endif /* DEBUG */
305084Sjohnlev
315084Sjohnlev #include "xnb.h"
325084Sjohnlev
335084Sjohnlev #include <sys/sunddi.h>
345084Sjohnlev #include <sys/sunndi.h>
355084Sjohnlev #include <sys/modctl.h>
365084Sjohnlev #include <sys/conf.h>
375084Sjohnlev #include <sys/mac.h>
3810958Sdme@sun.com #include <sys/mac_impl.h> /* For mac_fix_cksum(). */
395084Sjohnlev #include <sys/dlpi.h>
405084Sjohnlev #include <sys/strsubr.h>
415084Sjohnlev #include <sys/strsun.h>
425741Smrj #include <sys/types.h>
435084Sjohnlev #include <sys/pattr.h>
445084Sjohnlev #include <vm/seg_kmem.h>
455084Sjohnlev #include <vm/hat_i86.h>
465084Sjohnlev #include <xen/sys/xenbus_impl.h>
475084Sjohnlev #include <xen/sys/xendev.h>
485084Sjohnlev #include <sys/balloon_impl.h>
495084Sjohnlev #include <sys/evtchn_impl.h>
505084Sjohnlev #include <sys/gnttab.h>
515262Srscott #include <vm/vm_dep.h>
5210958Sdme@sun.com #include <sys/note.h>
535084Sjohnlev #include <sys/gld.h>
545084Sjohnlev #include <inet/ip.h>
555084Sjohnlev #include <inet/ip_impl.h>
565084Sjohnlev
575084Sjohnlev /*
587615SMax.Zhen@Sun.COM * The terms "transmit" and "receive" are used in alignment with domU,
597615SMax.Zhen@Sun.COM * which means that packets originating from the peer domU are "transmitted"
607615SMax.Zhen@Sun.COM * to other parts of the system and packets are "received" from them.
615084Sjohnlev */
625084Sjohnlev
635084Sjohnlev /*
6410958Sdme@sun.com * Should we allow guests to manipulate multicast group membership?
655084Sjohnlev */
6610958Sdme@sun.com static boolean_t xnb_multicast_control = B_TRUE;
675084Sjohnlev
685084Sjohnlev static boolean_t xnb_connect_rings(dev_info_t *);
695084Sjohnlev static void xnb_disconnect_rings(dev_info_t *);
705084Sjohnlev static void xnb_oe_state_change(dev_info_t *, ddi_eventcookie_t,
715084Sjohnlev void *, void *);
725084Sjohnlev static void xnb_hp_state_change(dev_info_t *, ddi_eventcookie_t,
735084Sjohnlev void *, void *);
745084Sjohnlev
757615SMax.Zhen@Sun.COM static int xnb_txbuf_constructor(void *, void *, int);
767615SMax.Zhen@Sun.COM static void xnb_txbuf_destructor(void *, void *);
7710958Sdme@sun.com static void xnb_tx_notify_peer(xnb_t *, boolean_t);
787615SMax.Zhen@Sun.COM static void xnb_tx_mark_complete(xnb_t *, RING_IDX, int16_t);
7910958Sdme@sun.com
8010958Sdme@sun.com mblk_t *xnb_to_peer(xnb_t *, mblk_t *);
815741Smrj mblk_t *xnb_copy_to_peer(xnb_t *, mblk_t *);
825741Smrj
8310958Sdme@sun.com static void setup_gop(xnb_t *, gnttab_copy_t *, uchar_t *,
8410958Sdme@sun.com size_t, size_t, size_t, grant_ref_t);
8510958Sdme@sun.com #pragma inline(setup_gop)
8610958Sdme@sun.com static boolean_t is_foreign(void *);
8710958Sdme@sun.com #pragma inline(is_foreign)
885741Smrj
895084Sjohnlev #define INVALID_GRANT_HANDLE ((grant_handle_t)-1)
905084Sjohnlev #define INVALID_GRANT_REF ((grant_ref_t)-1)
915084Sjohnlev
925084Sjohnlev static kmutex_t xnb_alloc_page_lock;
935084Sjohnlev
945084Sjohnlev /*
9510958Sdme@sun.com * On a 32 bit PAE system physical and machine addresses are larger
9610958Sdme@sun.com * than 32 bits. ddi_btop() on such systems take an unsigned long
9710958Sdme@sun.com * argument, and so addresses above 4G are truncated before ddi_btop()
9810958Sdme@sun.com * gets to see them. To avoid this, code the shift operation here.
9910958Sdme@sun.com */
10010958Sdme@sun.com #define xnb_btop(addr) ((addr) >> PAGESHIFT)
10110958Sdme@sun.com
10210958Sdme@sun.com /* DMA attributes for transmit and receive data */
10310958Sdme@sun.com static ddi_dma_attr_t buf_dma_attr = {
10410958Sdme@sun.com DMA_ATTR_V0, /* version of this structure */
10510958Sdme@sun.com 0, /* lowest usable address */
10610958Sdme@sun.com 0xffffffffffffffffULL, /* highest usable address */
10710958Sdme@sun.com 0x7fffffff, /* maximum DMAable byte count */
10810958Sdme@sun.com MMU_PAGESIZE, /* alignment in bytes */
10910958Sdme@sun.com 0x7ff, /* bitmap of burst sizes */
11010958Sdme@sun.com 1, /* minimum transfer */
11110958Sdme@sun.com 0xffffffffU, /* maximum transfer */
11210958Sdme@sun.com 0xffffffffffffffffULL, /* maximum segment length */
11310958Sdme@sun.com 1, /* maximum number of segments */
11410958Sdme@sun.com 1, /* granularity */
11510958Sdme@sun.com 0, /* flags (reserved) */
11610958Sdme@sun.com };
11710958Sdme@sun.com
11810958Sdme@sun.com /* DMA access attributes for data: NOT to be byte swapped. */
11910958Sdme@sun.com static ddi_device_acc_attr_t data_accattr = {
12010958Sdme@sun.com DDI_DEVICE_ATTR_V0,
12110958Sdme@sun.com DDI_NEVERSWAP_ACC,
12210958Sdme@sun.com DDI_STRICTORDER_ACC
12310958Sdme@sun.com };
12410958Sdme@sun.com
12510958Sdme@sun.com /*
1265084Sjohnlev * Statistics.
1275084Sjohnlev */
12811588Sdavid.edmondson@sun.com static const char * const aux_statistics[] = {
1297615SMax.Zhen@Sun.COM "rx_cksum_deferred",
1307615SMax.Zhen@Sun.COM "tx_cksum_no_need",
1317615SMax.Zhen@Sun.COM "rx_rsp_notok",
1325084Sjohnlev "tx_notify_deferred",
1335084Sjohnlev "tx_notify_sent",
1345084Sjohnlev "rx_notify_deferred",
1355084Sjohnlev "rx_notify_sent",
1365084Sjohnlev "tx_too_early",
1375084Sjohnlev "rx_too_early",
1385084Sjohnlev "rx_allocb_failed",
1395741Smrj "tx_allocb_failed",
1407615SMax.Zhen@Sun.COM "rx_foreign_page",
1415084Sjohnlev "mac_full",
1425084Sjohnlev "spurious_intr",
1435084Sjohnlev "allocation_success",
1445084Sjohnlev "allocation_failure",
1455084Sjohnlev "small_allocation_success",
1465084Sjohnlev "small_allocation_failure",
1475741Smrj "other_allocation_failure",
1487615SMax.Zhen@Sun.COM "rx_pageboundary_crossed",
1497615SMax.Zhen@Sun.COM "rx_cpoparea_grown",
1505084Sjohnlev "csum_hardware",
1515084Sjohnlev "csum_software",
15211588Sdavid.edmondson@sun.com "tx_overflow_page",
15311588Sdavid.edmondson@sun.com "tx_unexpected_flags",
1545084Sjohnlev };
1555084Sjohnlev
1565084Sjohnlev static int
xnb_ks_aux_update(kstat_t * ksp,int flag)1575084Sjohnlev xnb_ks_aux_update(kstat_t *ksp, int flag)
1585084Sjohnlev {
1595084Sjohnlev xnb_t *xnbp;
1605084Sjohnlev kstat_named_t *knp;
1615084Sjohnlev
1625084Sjohnlev if (flag != KSTAT_READ)
1635084Sjohnlev return (EACCES);
1645084Sjohnlev
1655084Sjohnlev xnbp = ksp->ks_private;
1665084Sjohnlev knp = ksp->ks_data;
1675084Sjohnlev
1685084Sjohnlev /*
1695084Sjohnlev * Assignment order should match that of the names in
1705084Sjohnlev * aux_statistics.
1715084Sjohnlev */
1727615SMax.Zhen@Sun.COM (knp++)->value.ui64 = xnbp->xnb_stat_rx_cksum_deferred;
1737615SMax.Zhen@Sun.COM (knp++)->value.ui64 = xnbp->xnb_stat_tx_cksum_no_need;
1747615SMax.Zhen@Sun.COM (knp++)->value.ui64 = xnbp->xnb_stat_rx_rsp_notok;
1755741Smrj (knp++)->value.ui64 = xnbp->xnb_stat_tx_notify_deferred;
1765741Smrj (knp++)->value.ui64 = xnbp->xnb_stat_tx_notify_sent;
1775741Smrj (knp++)->value.ui64 = xnbp->xnb_stat_rx_notify_deferred;
1785741Smrj (knp++)->value.ui64 = xnbp->xnb_stat_rx_notify_sent;
1795741Smrj (knp++)->value.ui64 = xnbp->xnb_stat_tx_too_early;
1805741Smrj (knp++)->value.ui64 = xnbp->xnb_stat_rx_too_early;
1815741Smrj (knp++)->value.ui64 = xnbp->xnb_stat_rx_allocb_failed;
1825741Smrj (knp++)->value.ui64 = xnbp->xnb_stat_tx_allocb_failed;
1837615SMax.Zhen@Sun.COM (knp++)->value.ui64 = xnbp->xnb_stat_rx_foreign_page;
1845741Smrj (knp++)->value.ui64 = xnbp->xnb_stat_mac_full;
1855741Smrj (knp++)->value.ui64 = xnbp->xnb_stat_spurious_intr;
1865741Smrj (knp++)->value.ui64 = xnbp->xnb_stat_allocation_success;
1875741Smrj (knp++)->value.ui64 = xnbp->xnb_stat_allocation_failure;
1885741Smrj (knp++)->value.ui64 = xnbp->xnb_stat_small_allocation_success;
1895741Smrj (knp++)->value.ui64 = xnbp->xnb_stat_small_allocation_failure;
1905741Smrj (knp++)->value.ui64 = xnbp->xnb_stat_other_allocation_failure;
1917615SMax.Zhen@Sun.COM (knp++)->value.ui64 = xnbp->xnb_stat_rx_pagebndry_crossed;
1927615SMax.Zhen@Sun.COM (knp++)->value.ui64 = xnbp->xnb_stat_rx_cpoparea_grown;
1935741Smrj (knp++)->value.ui64 = xnbp->xnb_stat_csum_hardware;
1945741Smrj (knp++)->value.ui64 = xnbp->xnb_stat_csum_software;
19511588Sdavid.edmondson@sun.com (knp++)->value.ui64 = xnbp->xnb_stat_tx_overflow_page;
19611588Sdavid.edmondson@sun.com (knp++)->value.ui64 = xnbp->xnb_stat_tx_unexpected_flags;
1975084Sjohnlev
1985084Sjohnlev return (0);
1995084Sjohnlev }
2005084Sjohnlev
2015084Sjohnlev static boolean_t
xnb_ks_init(xnb_t * xnbp)2025084Sjohnlev xnb_ks_init(xnb_t *xnbp)
2035084Sjohnlev {
2045084Sjohnlev int nstat = sizeof (aux_statistics) /
2055084Sjohnlev sizeof (aux_statistics[0]);
20611588Sdavid.edmondson@sun.com const char * const *cp = aux_statistics;
2075084Sjohnlev kstat_named_t *knp;
2085084Sjohnlev
2095084Sjohnlev /*
2105084Sjohnlev * Create and initialise kstats.
2115084Sjohnlev */
2125741Smrj xnbp->xnb_kstat_aux = kstat_create(ddi_driver_name(xnbp->xnb_devinfo),
2135741Smrj ddi_get_instance(xnbp->xnb_devinfo), "aux_statistics", "net",
2145084Sjohnlev KSTAT_TYPE_NAMED, nstat, 0);
2155741Smrj if (xnbp->xnb_kstat_aux == NULL)
2165084Sjohnlev return (B_FALSE);
2175084Sjohnlev
2185741Smrj xnbp->xnb_kstat_aux->ks_private = xnbp;
2195741Smrj xnbp->xnb_kstat_aux->ks_update = xnb_ks_aux_update;
2205084Sjohnlev
2215741Smrj knp = xnbp->xnb_kstat_aux->ks_data;
2225084Sjohnlev while (nstat > 0) {
2235084Sjohnlev kstat_named_init(knp, *cp, KSTAT_DATA_UINT64);
2245084Sjohnlev
2255084Sjohnlev knp++;
2265084Sjohnlev cp++;
2275084Sjohnlev nstat--;
2285084Sjohnlev }
2295084Sjohnlev
2305741Smrj kstat_install(xnbp->xnb_kstat_aux);
2315084Sjohnlev
2325084Sjohnlev return (B_TRUE);
2335084Sjohnlev }
2345084Sjohnlev
2355084Sjohnlev static void
xnb_ks_free(xnb_t * xnbp)2365084Sjohnlev xnb_ks_free(xnb_t *xnbp)
2375084Sjohnlev {
2385741Smrj kstat_delete(xnbp->xnb_kstat_aux);
2395084Sjohnlev }
2405084Sjohnlev
2415084Sjohnlev /*
24210958Sdme@sun.com * Calculate and insert the transport checksum for an arbitrary packet.
2435084Sjohnlev */
2445084Sjohnlev static mblk_t *
xnb_software_csum(xnb_t * xnbp,mblk_t * mp)2455084Sjohnlev xnb_software_csum(xnb_t *xnbp, mblk_t *mp)
2465084Sjohnlev {
24710958Sdme@sun.com _NOTE(ARGUNUSED(xnbp));
24810958Sdme@sun.com
2495084Sjohnlev /*
25010958Sdme@sun.com * XXPV dme: shouldn't rely on mac_fix_cksum(), not least
2515084Sjohnlev * because it doesn't cover all of the interesting cases :-(
2525084Sjohnlev */
253*11878SVenu.Iyer@Sun.COM mac_hcksum_set(mp, 0, 0, 0, 0, HCK_FULLCKSUM);
2545084Sjohnlev
2558275SEric Cheng return (mac_fix_cksum(mp));
2565084Sjohnlev }
2575084Sjohnlev
2585084Sjohnlev mblk_t *
xnb_process_cksum_flags(xnb_t * xnbp,mblk_t * mp,uint32_t capab)2595084Sjohnlev xnb_process_cksum_flags(xnb_t *xnbp, mblk_t *mp, uint32_t capab)
2605084Sjohnlev {
2615084Sjohnlev struct ether_header *ehp;
2625084Sjohnlev uint16_t sap;
2635084Sjohnlev uint32_t offset;
2645084Sjohnlev ipha_t *ipha;
2655084Sjohnlev
2665084Sjohnlev ASSERT(mp->b_next == NULL);
2675084Sjohnlev
2685084Sjohnlev /*
2695084Sjohnlev * Check that the packet is contained in a single mblk. In
27010958Sdme@sun.com * the "from peer" path this is true today, but may change
2715084Sjohnlev * when scatter gather support is added. In the "to peer"
2725084Sjohnlev * path we cannot be sure, but in most cases it will be true
2735084Sjohnlev * (in the xnbo case the packet has come from a MAC device
2745084Sjohnlev * which is unlikely to split packets).
2755084Sjohnlev */
2765084Sjohnlev if (mp->b_cont != NULL)
2775084Sjohnlev goto software;
2785084Sjohnlev
2795084Sjohnlev /*
2805084Sjohnlev * If the MAC has no hardware capability don't do any further
2815084Sjohnlev * checking.
2825084Sjohnlev */
2835084Sjohnlev if (capab == 0)
2845084Sjohnlev goto software;
2855084Sjohnlev
2865084Sjohnlev ASSERT(MBLKL(mp) >= sizeof (struct ether_header));
2875084Sjohnlev ehp = (struct ether_header *)mp->b_rptr;
2885084Sjohnlev
2895084Sjohnlev if (ntohs(ehp->ether_type) == VLAN_TPID) {
2905084Sjohnlev struct ether_vlan_header *evhp;
2915084Sjohnlev
2925084Sjohnlev ASSERT(MBLKL(mp) >= sizeof (struct ether_vlan_header));
2935084Sjohnlev evhp = (struct ether_vlan_header *)mp->b_rptr;
2945084Sjohnlev sap = ntohs(evhp->ether_type);
2955084Sjohnlev offset = sizeof (struct ether_vlan_header);
2965084Sjohnlev } else {
2975084Sjohnlev sap = ntohs(ehp->ether_type);
2985084Sjohnlev offset = sizeof (struct ether_header);
2995084Sjohnlev }
3005084Sjohnlev
3015084Sjohnlev /*
3025084Sjohnlev * We only attempt to do IPv4 packets in hardware.
3035084Sjohnlev */
3045084Sjohnlev if (sap != ETHERTYPE_IP)
3055084Sjohnlev goto software;
3065084Sjohnlev
3075084Sjohnlev /*
3085084Sjohnlev * We know that this is an IPv4 packet.
3095084Sjohnlev */
3105084Sjohnlev ipha = (ipha_t *)(mp->b_rptr + offset);
3115084Sjohnlev
3125084Sjohnlev switch (ipha->ipha_protocol) {
3135084Sjohnlev case IPPROTO_TCP:
3147351Sdme@sun.com case IPPROTO_UDP: {
3157351Sdme@sun.com uint32_t start, length, stuff, cksum;
3167351Sdme@sun.com uint16_t *stuffp;
3177351Sdme@sun.com
3185084Sjohnlev /*
3197351Sdme@sun.com * This is a TCP/IPv4 or UDP/IPv4 packet, for which we
3207351Sdme@sun.com * can use full IPv4 and partial checksum offload.
3215084Sjohnlev */
3227351Sdme@sun.com if ((capab & (HCKSUM_INET_FULL_V4|HCKSUM_INET_PARTIAL)) == 0)
3237351Sdme@sun.com break;
3247351Sdme@sun.com
3257351Sdme@sun.com start = IP_SIMPLE_HDR_LENGTH;
3267351Sdme@sun.com length = ntohs(ipha->ipha_length);
3277351Sdme@sun.com if (ipha->ipha_protocol == IPPROTO_TCP) {
3287351Sdme@sun.com stuff = start + TCP_CHECKSUM_OFFSET;
3297351Sdme@sun.com cksum = IP_TCP_CSUM_COMP;
3307351Sdme@sun.com } else {
3317351Sdme@sun.com stuff = start + UDP_CHECKSUM_OFFSET;
3327351Sdme@sun.com cksum = IP_UDP_CSUM_COMP;
3337351Sdme@sun.com }
3347351Sdme@sun.com stuffp = (uint16_t *)(mp->b_rptr + offset + stuff);
3357351Sdme@sun.com
3367351Sdme@sun.com if (capab & HCKSUM_INET_FULL_V4) {
3377351Sdme@sun.com /*
3387351Sdme@sun.com * Some devices require that the checksum
3397351Sdme@sun.com * field of the packet is zero for full
3407351Sdme@sun.com * offload.
3417351Sdme@sun.com */
3427351Sdme@sun.com *stuffp = 0;
3437351Sdme@sun.com
344*11878SVenu.Iyer@Sun.COM mac_hcksum_set(mp, 0, 0, 0, 0, HCK_FULLCKSUM);
3455084Sjohnlev
3465741Smrj xnbp->xnb_stat_csum_hardware++;
3475084Sjohnlev
3485084Sjohnlev return (mp);
3495084Sjohnlev }
3505084Sjohnlev
3517351Sdme@sun.com if (capab & HCKSUM_INET_PARTIAL) {
3527351Sdme@sun.com if (*stuffp == 0) {
3537351Sdme@sun.com ipaddr_t src, dst;
3547351Sdme@sun.com
3557351Sdme@sun.com /*
3567351Sdme@sun.com * Older Solaris guests don't insert
3577351Sdme@sun.com * the pseudo-header checksum, so we
3587351Sdme@sun.com * calculate it here.
3597351Sdme@sun.com */
3607351Sdme@sun.com src = ipha->ipha_src;
3617351Sdme@sun.com dst = ipha->ipha_dst;
3627351Sdme@sun.com
3637351Sdme@sun.com cksum += (dst >> 16) + (dst & 0xFFFF);
3647351Sdme@sun.com cksum += (src >> 16) + (src & 0xFFFF);
3657351Sdme@sun.com cksum += length - IP_SIMPLE_HDR_LENGTH;
3665084Sjohnlev
3677351Sdme@sun.com cksum = (cksum >> 16) + (cksum & 0xFFFF);
3687351Sdme@sun.com cksum = (cksum >> 16) + (cksum & 0xFFFF);
3697351Sdme@sun.com
3707351Sdme@sun.com ASSERT(cksum <= 0xFFFF);
3717351Sdme@sun.com
3727351Sdme@sun.com *stuffp = (uint16_t)(cksum ? cksum : ~cksum);
3737351Sdme@sun.com }
3747351Sdme@sun.com
375*11878SVenu.Iyer@Sun.COM mac_hcksum_set(mp, start, stuff, length, 0,
376*11878SVenu.Iyer@Sun.COM HCK_PARTIALCKSUM);
3777351Sdme@sun.com
3787351Sdme@sun.com xnbp->xnb_stat_csum_hardware++;
3797351Sdme@sun.com
3807351Sdme@sun.com return (mp);
3817351Sdme@sun.com }
3827351Sdme@sun.com
3837351Sdme@sun.com /* NOTREACHED */
3845084Sjohnlev break;
3857351Sdme@sun.com }
3865084Sjohnlev
3875084Sjohnlev default:
3885084Sjohnlev /* Use software. */
3895084Sjohnlev break;
3905084Sjohnlev }
3915084Sjohnlev
3925084Sjohnlev software:
3935084Sjohnlev /*
3945084Sjohnlev * We are not able to use any offload so do the whole thing in
3955084Sjohnlev * software.
3965084Sjohnlev */
3975741Smrj xnbp->xnb_stat_csum_software++;
3985084Sjohnlev
3995084Sjohnlev return (xnb_software_csum(xnbp, mp));
4005084Sjohnlev }
4015084Sjohnlev
4025084Sjohnlev int
xnb_attach(dev_info_t * dip,xnb_flavour_t * flavour,void * flavour_data)4035084Sjohnlev xnb_attach(dev_info_t *dip, xnb_flavour_t *flavour, void *flavour_data)
4045084Sjohnlev {
4055084Sjohnlev xnb_t *xnbp;
40610958Sdme@sun.com char *xsname;
40710958Sdme@sun.com char cachename[32];
4085084Sjohnlev
4095084Sjohnlev xnbp = kmem_zalloc(sizeof (*xnbp), KM_SLEEP);
4105084Sjohnlev
4115741Smrj xnbp->xnb_flavour = flavour;
4125741Smrj xnbp->xnb_flavour_data = flavour_data;
4135741Smrj xnbp->xnb_devinfo = dip;
4145741Smrj xnbp->xnb_evtchn = INVALID_EVTCHN;
4155741Smrj xnbp->xnb_irq = B_FALSE;
4165741Smrj xnbp->xnb_tx_ring_handle = INVALID_GRANT_HANDLE;
4175741Smrj xnbp->xnb_rx_ring_handle = INVALID_GRANT_HANDLE;
4185741Smrj xnbp->xnb_connected = B_FALSE;
4195741Smrj xnbp->xnb_hotplugged = B_FALSE;
4205741Smrj xnbp->xnb_detachable = B_FALSE;
4215741Smrj xnbp->xnb_peer = xvdi_get_oeid(dip);
42210958Sdme@sun.com xnbp->xnb_be_status = XNB_STATE_INIT;
42310958Sdme@sun.com xnbp->xnb_fe_status = XNB_STATE_INIT;
4245084Sjohnlev
4257615SMax.Zhen@Sun.COM xnbp->xnb_tx_buf_count = 0;
4265741Smrj
42710958Sdme@sun.com xnbp->xnb_rx_hv_copy = B_FALSE;
42810958Sdme@sun.com xnbp->xnb_multicast_control = B_FALSE;
4295084Sjohnlev
4307615SMax.Zhen@Sun.COM xnbp->xnb_rx_va = vmem_alloc(heap_arena, PAGESIZE, VM_SLEEP);
4317615SMax.Zhen@Sun.COM ASSERT(xnbp->xnb_rx_va != NULL);
4325084Sjohnlev
4335741Smrj if (ddi_get_iblock_cookie(dip, 0, &xnbp->xnb_icookie)
4345084Sjohnlev != DDI_SUCCESS)
4355084Sjohnlev goto failure;
4365084Sjohnlev
43710958Sdme@sun.com /* Allocated on demand, when/if we enter xnb_copy_to_peer(). */
4387615SMax.Zhen@Sun.COM xnbp->xnb_rx_cpop = NULL;
43910958Sdme@sun.com xnbp->xnb_rx_cpop_count = 0;
4405741Smrj
4415741Smrj mutex_init(&xnbp->xnb_tx_lock, NULL, MUTEX_DRIVER,
4425741Smrj xnbp->xnb_icookie);
4435741Smrj mutex_init(&xnbp->xnb_rx_lock, NULL, MUTEX_DRIVER,
4445741Smrj xnbp->xnb_icookie);
44510958Sdme@sun.com mutex_init(&xnbp->xnb_state_lock, NULL, MUTEX_DRIVER,
44610958Sdme@sun.com xnbp->xnb_icookie);
4475084Sjohnlev
44810958Sdme@sun.com /* Set driver private pointer now. */
4495084Sjohnlev ddi_set_driver_private(dip, xnbp);
4505084Sjohnlev
45110958Sdme@sun.com (void) sprintf(cachename, "xnb_tx_buf_cache_%d", ddi_get_instance(dip));
45210958Sdme@sun.com xnbp->xnb_tx_buf_cache = kmem_cache_create(cachename,
45310958Sdme@sun.com sizeof (xnb_txbuf_t), 0,
45410958Sdme@sun.com xnb_txbuf_constructor, xnb_txbuf_destructor,
45510958Sdme@sun.com NULL, xnbp, NULL, 0);
45610958Sdme@sun.com if (xnbp->xnb_tx_buf_cache == NULL)
45710958Sdme@sun.com goto failure_0;
45810958Sdme@sun.com
4595084Sjohnlev if (!xnb_ks_init(xnbp))
4605741Smrj goto failure_1;
4615084Sjohnlev
4625084Sjohnlev /*
4635084Sjohnlev * Receive notification of changes in the state of the
4645084Sjohnlev * driver in the guest domain.
4655084Sjohnlev */
4667756SMark.Johnson@Sun.COM if (xvdi_add_event_handler(dip, XS_OE_STATE, xnb_oe_state_change,
4677756SMark.Johnson@Sun.COM NULL) != DDI_SUCCESS)
4685741Smrj goto failure_2;
4695084Sjohnlev
4705084Sjohnlev /*
4715084Sjohnlev * Receive notification of hotplug events.
4725084Sjohnlev */
4737756SMark.Johnson@Sun.COM if (xvdi_add_event_handler(dip, XS_HP_STATE, xnb_hp_state_change,
4747756SMark.Johnson@Sun.COM NULL) != DDI_SUCCESS)
4755741Smrj goto failure_2;
4765084Sjohnlev
4775084Sjohnlev xsname = xvdi_get_xsname(dip);
4785084Sjohnlev
4795084Sjohnlev if (xenbus_printf(XBT_NULL, xsname,
48010958Sdme@sun.com "feature-multicast-control", "%d",
48110958Sdme@sun.com xnb_multicast_control ? 1 : 0) != 0)
4825741Smrj goto failure_3;
4835741Smrj
4845741Smrj if (xenbus_printf(XBT_NULL, xsname,
48510958Sdme@sun.com "feature-rx-copy", "%d", 1) != 0)
4865741Smrj goto failure_3;
4875741Smrj /*
4885741Smrj * Linux domUs seem to depend on "feature-rx-flip" being 0
4895741Smrj * in addition to "feature-rx-copy" being 1. It seems strange
4905741Smrj * to use four possible states to describe a binary decision,
4915741Smrj * but we might as well play nice.
4925741Smrj */
4935741Smrj if (xenbus_printf(XBT_NULL, xsname,
49410958Sdme@sun.com "feature-rx-flip", "%d", 0) != 0)
4955741Smrj goto failure_3;
4965084Sjohnlev
4975084Sjohnlev (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateInitWait);
4985084Sjohnlev (void) xvdi_post_event(dip, XEN_HP_ADD);
4995084Sjohnlev
5005084Sjohnlev return (DDI_SUCCESS);
5015084Sjohnlev
5025741Smrj failure_3:
5035084Sjohnlev xvdi_remove_event_handler(dip, NULL);
5045084Sjohnlev
5055741Smrj failure_2:
5065084Sjohnlev xnb_ks_free(xnbp);
5075084Sjohnlev
5085741Smrj failure_1:
50910958Sdme@sun.com kmem_cache_destroy(xnbp->xnb_tx_buf_cache);
51010958Sdme@sun.com
51110958Sdme@sun.com failure_0:
51210958Sdme@sun.com mutex_destroy(&xnbp->xnb_state_lock);
5135741Smrj mutex_destroy(&xnbp->xnb_rx_lock);
5145741Smrj mutex_destroy(&xnbp->xnb_tx_lock);
5155084Sjohnlev
5165084Sjohnlev failure:
5177615SMax.Zhen@Sun.COM vmem_free(heap_arena, xnbp->xnb_rx_va, PAGESIZE);
5185084Sjohnlev kmem_free(xnbp, sizeof (*xnbp));
5195084Sjohnlev return (DDI_FAILURE);
5205084Sjohnlev }
5215084Sjohnlev
5225084Sjohnlev void
xnb_detach(dev_info_t * dip)5235084Sjohnlev xnb_detach(dev_info_t *dip)
5245084Sjohnlev {
5255084Sjohnlev xnb_t *xnbp = ddi_get_driver_private(dip);
5265084Sjohnlev
5275084Sjohnlev ASSERT(xnbp != NULL);
5285741Smrj ASSERT(!xnbp->xnb_connected);
5297615SMax.Zhen@Sun.COM ASSERT(xnbp->xnb_tx_buf_count == 0);
5305084Sjohnlev
5315084Sjohnlev xnb_disconnect_rings(dip);
5325084Sjohnlev
5335084Sjohnlev xvdi_remove_event_handler(dip, NULL);
5345084Sjohnlev
5355084Sjohnlev xnb_ks_free(xnbp);
5365084Sjohnlev
53710958Sdme@sun.com kmem_cache_destroy(xnbp->xnb_tx_buf_cache);
53810958Sdme@sun.com
5395084Sjohnlev ddi_set_driver_private(dip, NULL);
5405084Sjohnlev
54110958Sdme@sun.com mutex_destroy(&xnbp->xnb_state_lock);
54210958Sdme@sun.com mutex_destroy(&xnbp->xnb_rx_lock);
5435741Smrj mutex_destroy(&xnbp->xnb_tx_lock);
5445084Sjohnlev
54510958Sdme@sun.com if (xnbp->xnb_rx_cpop_count > 0)
54610958Sdme@sun.com kmem_free(xnbp->xnb_rx_cpop, sizeof (xnbp->xnb_rx_cpop[0])
54710958Sdme@sun.com * xnbp->xnb_rx_cpop_count);
5485741Smrj
5497615SMax.Zhen@Sun.COM ASSERT(xnbp->xnb_rx_va != NULL);
5507615SMax.Zhen@Sun.COM vmem_free(heap_arena, xnbp->xnb_rx_va, PAGESIZE);
5515084Sjohnlev
5525084Sjohnlev kmem_free(xnbp, sizeof (*xnbp));
5535084Sjohnlev }
5545084Sjohnlev
55510958Sdme@sun.com /*
55610958Sdme@sun.com * Allocate a page from the hypervisor to be flipped to the peer.
55710958Sdme@sun.com *
55810958Sdme@sun.com * Try to get pages in batches to reduce the overhead of calls into
55910958Sdme@sun.com * the balloon driver.
56010958Sdme@sun.com */
5615084Sjohnlev static mfn_t
xnb_alloc_page(xnb_t * xnbp)5625084Sjohnlev xnb_alloc_page(xnb_t *xnbp)
5635084Sjohnlev {
5645084Sjohnlev #define WARNING_RATE_LIMIT 100
5655084Sjohnlev #define BATCH_SIZE 256
5665084Sjohnlev static mfn_t mfns[BATCH_SIZE]; /* common across all instances */
5675084Sjohnlev static int nth = BATCH_SIZE;
5685084Sjohnlev mfn_t mfn;
5695084Sjohnlev
5705084Sjohnlev mutex_enter(&xnb_alloc_page_lock);
5715084Sjohnlev if (nth == BATCH_SIZE) {
5725084Sjohnlev if (balloon_alloc_pages(BATCH_SIZE, mfns) != BATCH_SIZE) {
5735741Smrj xnbp->xnb_stat_allocation_failure++;
5745084Sjohnlev mutex_exit(&xnb_alloc_page_lock);
5755084Sjohnlev
5765084Sjohnlev /*
5775084Sjohnlev * Try for a single page in low memory situations.
5785084Sjohnlev */
5795084Sjohnlev if (balloon_alloc_pages(1, &mfn) != 1) {
5805741Smrj if ((xnbp->xnb_stat_small_allocation_failure++
5815741Smrj % WARNING_RATE_LIMIT) == 0)
5825084Sjohnlev cmn_err(CE_WARN, "xnb_alloc_page: "
5835084Sjohnlev "Cannot allocate memory to "
5845084Sjohnlev "transfer packets to peer.");
5855084Sjohnlev return (0);
5865084Sjohnlev } else {
5875741Smrj xnbp->xnb_stat_small_allocation_success++;
5885084Sjohnlev return (mfn);
5895084Sjohnlev }
5905084Sjohnlev }
5915084Sjohnlev
5925084Sjohnlev nth = 0;
5935741Smrj xnbp->xnb_stat_allocation_success++;
5945084Sjohnlev }
5955084Sjohnlev
5965084Sjohnlev mfn = mfns[nth++];
5975084Sjohnlev mutex_exit(&xnb_alloc_page_lock);
5985084Sjohnlev
5995084Sjohnlev ASSERT(mfn != 0);
6005084Sjohnlev
6015084Sjohnlev return (mfn);
6025084Sjohnlev #undef BATCH_SIZE
6035084Sjohnlev #undef WARNING_RATE_LIMIT
6045084Sjohnlev }
6055084Sjohnlev
60610958Sdme@sun.com /*
60710958Sdme@sun.com * Free a page back to the hypervisor.
60810958Sdme@sun.com *
60910958Sdme@sun.com * This happens only in the error path, so batching is not worth the
61010958Sdme@sun.com * complication.
61110958Sdme@sun.com */
6125084Sjohnlev static void
xnb_free_page(xnb_t * xnbp,mfn_t mfn)6135084Sjohnlev xnb_free_page(xnb_t *xnbp, mfn_t mfn)
6145084Sjohnlev {
61510958Sdme@sun.com _NOTE(ARGUNUSED(xnbp));
6165084Sjohnlev int r;
6175262Srscott pfn_t pfn;
6185262Srscott
6195262Srscott pfn = xen_assign_pfn(mfn);
6205262Srscott pfnzero(pfn, 0, PAGESIZE);
6215262Srscott xen_release_pfn(pfn);
6225084Sjohnlev
6235084Sjohnlev if ((r = balloon_free_pages(1, &mfn, NULL, NULL)) != 1) {
6245084Sjohnlev cmn_err(CE_WARN, "free_page: cannot decrease memory "
6255084Sjohnlev "reservation (%d): page kept but unusable (mfn = 0x%lx).",
6265084Sjohnlev r, mfn);
6275084Sjohnlev }
6285084Sjohnlev }
6295084Sjohnlev
6305741Smrj /*
63110958Sdme@sun.com * Similar to RING_HAS_UNCONSUMED_REQUESTS(&xnbp->rx_ring) but using
63210958Sdme@sun.com * local variables. Used in both xnb_to_peer() and xnb_copy_to_peer().
6335741Smrj */
6345741Smrj #define XNB_RING_HAS_UNCONSUMED_REQUESTS(_r) \
6355741Smrj ((((_r)->sring->req_prod - loop) < \
6365741Smrj (RING_SIZE(_r) - (loop - prod))) ? \
6375741Smrj ((_r)->sring->req_prod - loop) : \
6385741Smrj (RING_SIZE(_r) - (loop - prod)))
6395741Smrj
64010958Sdme@sun.com /*
64110958Sdme@sun.com * Pass packets to the peer using page flipping.
64210958Sdme@sun.com */
6435084Sjohnlev mblk_t *
xnb_to_peer(xnb_t * xnbp,mblk_t * mp)6445084Sjohnlev xnb_to_peer(xnb_t *xnbp, mblk_t *mp)
6455084Sjohnlev {
6465084Sjohnlev mblk_t *free = mp, *prev = NULL;
6475084Sjohnlev size_t len;
6485084Sjohnlev gnttab_transfer_t *gop;
6495084Sjohnlev boolean_t notify;
6505084Sjohnlev RING_IDX loop, prod, end;
6515084Sjohnlev
6525084Sjohnlev /*
6535084Sjohnlev * For each packet the sequence of operations is:
6545084Sjohnlev *
6555084Sjohnlev * 1. get a new page from the hypervisor.
6565084Sjohnlev * 2. get a request slot from the ring.
6575084Sjohnlev * 3. copy the data into the new page.
6585084Sjohnlev * 4. transfer the page to the peer.
6595084Sjohnlev * 5. update the request slot.
6605084Sjohnlev * 6. kick the peer.
6615084Sjohnlev * 7. free mp.
6625084Sjohnlev *
6635084Sjohnlev * In order to reduce the number of hypercalls, we prepare
6645084Sjohnlev * several packets for the peer and perform a single hypercall
6655084Sjohnlev * to transfer them.
6665084Sjohnlev */
6675084Sjohnlev
6687615SMax.Zhen@Sun.COM mutex_enter(&xnbp->xnb_rx_lock);
6695084Sjohnlev
6705084Sjohnlev /*
6715084Sjohnlev * If we are not connected to the peer or have not yet
6725084Sjohnlev * finished hotplug it is too early to pass packets to the
6735084Sjohnlev * peer.
6745084Sjohnlev */
6755741Smrj if (!(xnbp->xnb_connected && xnbp->xnb_hotplugged)) {
6767615SMax.Zhen@Sun.COM mutex_exit(&xnbp->xnb_rx_lock);
6777615SMax.Zhen@Sun.COM DTRACE_PROBE(flip_rx_too_early);
6787615SMax.Zhen@Sun.COM xnbp->xnb_stat_rx_too_early++;
6795084Sjohnlev return (mp);
6805084Sjohnlev }
6815084Sjohnlev
6825741Smrj loop = xnbp->xnb_rx_ring.req_cons;
6835741Smrj prod = xnbp->xnb_rx_ring.rsp_prod_pvt;
6847615SMax.Zhen@Sun.COM gop = xnbp->xnb_rx_top;
6855084Sjohnlev
6865084Sjohnlev while ((mp != NULL) &&
6875741Smrj XNB_RING_HAS_UNCONSUMED_REQUESTS(&xnbp->xnb_rx_ring)) {
6885084Sjohnlev
6895084Sjohnlev mfn_t mfn;
6905084Sjohnlev pfn_t pfn;
6915084Sjohnlev netif_rx_request_t *rxreq;
6925084Sjohnlev netif_rx_response_t *rxresp;
6935084Sjohnlev char *valoop;
6945084Sjohnlev mblk_t *ml;
6955084Sjohnlev uint16_t cksum_flags;
6965084Sjohnlev
6975084Sjohnlev /* 1 */
6985084Sjohnlev if ((mfn = xnb_alloc_page(xnbp)) == 0) {
6997615SMax.Zhen@Sun.COM xnbp->xnb_stat_rx_defer++;
7005084Sjohnlev break;
7015084Sjohnlev }
7025084Sjohnlev
7035084Sjohnlev /* 2 */
7045741Smrj rxreq = RING_GET_REQUEST(&xnbp->xnb_rx_ring, loop);
7055084Sjohnlev
7065084Sjohnlev #ifdef XNB_DEBUG
7075084Sjohnlev if (!(rxreq->id < NET_RX_RING_SIZE))
7085084Sjohnlev cmn_err(CE_PANIC, "xnb_to_peer: "
7095084Sjohnlev "id %d out of range in request 0x%p",
7105084Sjohnlev rxreq->id, (void *)rxreq);
7115084Sjohnlev #endif /* XNB_DEBUG */
7125084Sjohnlev
7135084Sjohnlev /* Assign a pfn and map the new page at the allocated va. */
7145084Sjohnlev pfn = xen_assign_pfn(mfn);
7157615SMax.Zhen@Sun.COM hat_devload(kas.a_hat, xnbp->xnb_rx_va, PAGESIZE,
7165084Sjohnlev pfn, PROT_READ | PROT_WRITE, HAT_LOAD);
7175084Sjohnlev
7185084Sjohnlev /* 3 */
7195084Sjohnlev len = 0;
7208757Sdme@sun.com valoop = xnbp->xnb_rx_va;
7215084Sjohnlev for (ml = mp; ml != NULL; ml = ml->b_cont) {
7225084Sjohnlev size_t chunk = ml->b_wptr - ml->b_rptr;
7235084Sjohnlev
7245084Sjohnlev bcopy(ml->b_rptr, valoop, chunk);
7255084Sjohnlev valoop += chunk;
7265084Sjohnlev len += chunk;
7275084Sjohnlev }
7285084Sjohnlev
7298757Sdme@sun.com ASSERT(len < PAGESIZE);
7305084Sjohnlev
7315084Sjohnlev /* Release the pfn. */
7327615SMax.Zhen@Sun.COM hat_unload(kas.a_hat, xnbp->xnb_rx_va, PAGESIZE,
7335084Sjohnlev HAT_UNLOAD_UNMAP);
7345084Sjohnlev xen_release_pfn(pfn);
7355084Sjohnlev
7365084Sjohnlev /* 4 */
7375084Sjohnlev gop->mfn = mfn;
7385741Smrj gop->domid = xnbp->xnb_peer;
7395084Sjohnlev gop->ref = rxreq->gref;
7405084Sjohnlev
7415084Sjohnlev /* 5.1 */
7425741Smrj rxresp = RING_GET_RESPONSE(&xnbp->xnb_rx_ring, prod);
7438757Sdme@sun.com rxresp->offset = 0;
7445084Sjohnlev rxresp->flags = 0;
7455084Sjohnlev
7465741Smrj cksum_flags = xnbp->xnb_flavour->xf_cksum_to_peer(xnbp, mp);
7475084Sjohnlev if (cksum_flags != 0)
7487615SMax.Zhen@Sun.COM xnbp->xnb_stat_rx_cksum_deferred++;
7495084Sjohnlev rxresp->flags |= cksum_flags;
7505084Sjohnlev
7515741Smrj rxresp->id = RING_GET_REQUEST(&xnbp->xnb_rx_ring, prod)->id;
7525084Sjohnlev rxresp->status = len;
7535084Sjohnlev
7545084Sjohnlev loop++;
7555084Sjohnlev prod++;
7565084Sjohnlev gop++;
7575084Sjohnlev prev = mp;
7585084Sjohnlev mp = mp->b_next;
7595084Sjohnlev }
7605084Sjohnlev
7615084Sjohnlev /*
7625084Sjohnlev * Did we actually do anything?
7635084Sjohnlev */
7645741Smrj if (loop == xnbp->xnb_rx_ring.req_cons) {
7657615SMax.Zhen@Sun.COM mutex_exit(&xnbp->xnb_rx_lock);
7665084Sjohnlev return (mp);
7675084Sjohnlev }
7685084Sjohnlev
7695084Sjohnlev end = loop;
7705084Sjohnlev
7715084Sjohnlev /*
7725084Sjohnlev * Unlink the end of the 'done' list from the remainder.
7735084Sjohnlev */
7745084Sjohnlev ASSERT(prev != NULL);
7755084Sjohnlev prev->b_next = NULL;
7765084Sjohnlev
7777615SMax.Zhen@Sun.COM if (HYPERVISOR_grant_table_op(GNTTABOP_transfer, xnbp->xnb_rx_top,
7785741Smrj loop - xnbp->xnb_rx_ring.req_cons) != 0) {
7795084Sjohnlev cmn_err(CE_WARN, "xnb_to_peer: transfer operation failed");
7805084Sjohnlev }
7815084Sjohnlev
7825741Smrj loop = xnbp->xnb_rx_ring.req_cons;
7835741Smrj prod = xnbp->xnb_rx_ring.rsp_prod_pvt;
7847615SMax.Zhen@Sun.COM gop = xnbp->xnb_rx_top;
7855084Sjohnlev
7865084Sjohnlev while (loop < end) {
7875084Sjohnlev int16_t status = NETIF_RSP_OKAY;
7885084Sjohnlev
7895084Sjohnlev if (gop->status != 0) {
7905084Sjohnlev status = NETIF_RSP_ERROR;
7915084Sjohnlev
7925084Sjohnlev /*
7935084Sjohnlev * If the status is anything other than
7945084Sjohnlev * GNTST_bad_page then we don't own the page
7955084Sjohnlev * any more, so don't try to give it back.
7965084Sjohnlev */
7975084Sjohnlev if (gop->status != GNTST_bad_page)
7985084Sjohnlev gop->mfn = 0;
7995084Sjohnlev } else {
8005084Sjohnlev /* The page is no longer ours. */
8015084Sjohnlev gop->mfn = 0;
8025084Sjohnlev }
8035084Sjohnlev
8045084Sjohnlev if (gop->mfn != 0)
8055084Sjohnlev /*
8065084Sjohnlev * Give back the page, as we won't be using
8075084Sjohnlev * it.
8085084Sjohnlev */
8095084Sjohnlev xnb_free_page(xnbp, gop->mfn);
8105084Sjohnlev else
8115084Sjohnlev /*
8125084Sjohnlev * We gave away a page, update our accounting
8135084Sjohnlev * now.
8145084Sjohnlev */
8155084Sjohnlev balloon_drv_subtracted(1);
8165084Sjohnlev
8175084Sjohnlev /* 5.2 */
8185084Sjohnlev if (status != NETIF_RSP_OKAY) {
8195741Smrj RING_GET_RESPONSE(&xnbp->xnb_rx_ring, prod)->status =
8205084Sjohnlev status;
8215084Sjohnlev } else {
8227615SMax.Zhen@Sun.COM xnbp->xnb_stat_ipackets++;
8237615SMax.Zhen@Sun.COM xnbp->xnb_stat_rbytes += len;
8245084Sjohnlev }
8255084Sjohnlev
8265084Sjohnlev loop++;
8275084Sjohnlev prod++;
8285084Sjohnlev gop++;
8295084Sjohnlev }
8305084Sjohnlev
8315741Smrj xnbp->xnb_rx_ring.req_cons = loop;
8325741Smrj xnbp->xnb_rx_ring.rsp_prod_pvt = prod;
8335084Sjohnlev
8345084Sjohnlev /* 6 */
8355741Smrj /* LINTED: constant in conditional context */
8365741Smrj RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&xnbp->xnb_rx_ring, notify);
8375084Sjohnlev if (notify) {
8385741Smrj ec_notify_via_evtchn(xnbp->xnb_evtchn);
8397615SMax.Zhen@Sun.COM xnbp->xnb_stat_rx_notify_sent++;
8405084Sjohnlev } else {
8417615SMax.Zhen@Sun.COM xnbp->xnb_stat_rx_notify_deferred++;
8425084Sjohnlev }
8435084Sjohnlev
8445084Sjohnlev if (mp != NULL)
8457615SMax.Zhen@Sun.COM xnbp->xnb_stat_rx_defer++;
8465084Sjohnlev
8477615SMax.Zhen@Sun.COM mutex_exit(&xnbp->xnb_rx_lock);
8485084Sjohnlev
8495084Sjohnlev /* Free mblk_t's that we consumed. */
8505084Sjohnlev freemsgchain(free);
8515084Sjohnlev
8525084Sjohnlev return (mp);
8535084Sjohnlev }
8545084Sjohnlev
85510958Sdme@sun.com /* Helper functions for xnb_copy_to_peer(). */
8565741Smrj
8575741Smrj /*
8585741Smrj * Grow the array of copy operation descriptors.
8595741Smrj */
86010958Sdme@sun.com static boolean_t
grow_cpop_area(xnb_t * xnbp)86110958Sdme@sun.com grow_cpop_area(xnb_t *xnbp)
8625741Smrj {
86310958Sdme@sun.com size_t count;
86410958Sdme@sun.com gnttab_copy_t *new;
8655741Smrj
8667615SMax.Zhen@Sun.COM ASSERT(MUTEX_HELD(&xnbp->xnb_rx_lock));
8675741Smrj
86810958Sdme@sun.com count = xnbp->xnb_rx_cpop_count + CPOP_DEFCNT;
8695741Smrj
87010958Sdme@sun.com if ((new = kmem_alloc(sizeof (new[0]) * count, KM_NOSLEEP)) == NULL) {
8715741Smrj xnbp->xnb_stat_other_allocation_failure++;
87210958Sdme@sun.com return (B_FALSE);
8735741Smrj }
8745741Smrj
87510958Sdme@sun.com bcopy(xnbp->xnb_rx_cpop, new,
87610958Sdme@sun.com sizeof (xnbp->xnb_rx_cpop[0]) * xnbp->xnb_rx_cpop_count);
8775741Smrj
87810958Sdme@sun.com kmem_free(xnbp->xnb_rx_cpop,
87910958Sdme@sun.com sizeof (xnbp->xnb_rx_cpop[0]) * xnbp->xnb_rx_cpop_count);
8805741Smrj
88110958Sdme@sun.com xnbp->xnb_rx_cpop = new;
88210958Sdme@sun.com xnbp->xnb_rx_cpop_count = count;
8835741Smrj
8847615SMax.Zhen@Sun.COM xnbp->xnb_stat_rx_cpoparea_grown++;
8855741Smrj
88610958Sdme@sun.com return (B_TRUE);
8875741Smrj }
8885741Smrj
8895741Smrj /*
8905741Smrj * Check whether an address is on a page that's foreign to this domain.
8915741Smrj */
8925741Smrj static boolean_t
is_foreign(void * addr)8935741Smrj is_foreign(void *addr)
8945741Smrj {
89510958Sdme@sun.com pfn_t pfn = hat_getpfnum(kas.a_hat, addr);
8965741Smrj
89710958Sdme@sun.com return ((pfn & PFN_IS_FOREIGN_MFN) == PFN_IS_FOREIGN_MFN);
8985741Smrj }
8995741Smrj
9005741Smrj /*
9015741Smrj * Insert a newly allocated mblk into a chain, replacing the old one.
9025741Smrj */
9035741Smrj static mblk_t *
replace_msg(mblk_t * mp,size_t len,mblk_t * mp_prev,mblk_t * ml_prev)9045741Smrj replace_msg(mblk_t *mp, size_t len, mblk_t *mp_prev, mblk_t *ml_prev)
9055741Smrj {
9065741Smrj uint32_t start, stuff, end, value, flags;
9075741Smrj mblk_t *new_mp;
9085741Smrj
9095741Smrj new_mp = copyb(mp);
910*11878SVenu.Iyer@Sun.COM if (new_mp == NULL) {
9115741Smrj cmn_err(CE_PANIC, "replace_msg: cannot alloc new message"
9125741Smrj "for %p, len %lu", (void *) mp, len);
913*11878SVenu.Iyer@Sun.COM }
9145741Smrj
915*11878SVenu.Iyer@Sun.COM mac_hcksum_get(mp, &start, &stuff, &end, &value, &flags);
916*11878SVenu.Iyer@Sun.COM mac_hcksum_set(new_mp, start, stuff, end, value, flags);
9175741Smrj
9185741Smrj new_mp->b_next = mp->b_next;
9195741Smrj new_mp->b_prev = mp->b_prev;
9205741Smrj new_mp->b_cont = mp->b_cont;
9215741Smrj
9225741Smrj /* Make sure we only overwrite pointers to the mblk being replaced. */
9235741Smrj if (mp_prev != NULL && mp_prev->b_next == mp)
9245741Smrj mp_prev->b_next = new_mp;
9255741Smrj
9265741Smrj if (ml_prev != NULL && ml_prev->b_cont == mp)
9275741Smrj ml_prev->b_cont = new_mp;
9285741Smrj
9295741Smrj mp->b_next = mp->b_prev = mp->b_cont = NULL;
9305741Smrj freemsg(mp);
9315741Smrj
9325741Smrj return (new_mp);
9335741Smrj }
9345741Smrj
9355741Smrj /*
9365741Smrj * Set all the fields in a gnttab_copy_t.
9375741Smrj */
9385741Smrj static void
setup_gop(xnb_t * xnbp,gnttab_copy_t * gp,uchar_t * rptr,size_t s_off,size_t d_off,size_t len,grant_ref_t d_ref)9395741Smrj setup_gop(xnb_t *xnbp, gnttab_copy_t *gp, uchar_t *rptr,
9405741Smrj size_t s_off, size_t d_off, size_t len, grant_ref_t d_ref)
9415741Smrj {
9425741Smrj ASSERT(xnbp != NULL && gp != NULL);
9435741Smrj
9445741Smrj gp->source.offset = s_off;
9455741Smrj gp->source.u.gmfn = pfn_to_mfn(hat_getpfnum(kas.a_hat, (caddr_t)rptr));
9465741Smrj gp->source.domid = DOMID_SELF;
9475741Smrj
9485741Smrj gp->len = (uint16_t)len;
9495741Smrj gp->flags = GNTCOPY_dest_gref;
9505741Smrj gp->status = 0;
9515741Smrj
9525741Smrj gp->dest.u.ref = d_ref;
9535741Smrj gp->dest.offset = d_off;
9545741Smrj gp->dest.domid = xnbp->xnb_peer;
9555741Smrj }
9565741Smrj
95710958Sdme@sun.com /*
95810958Sdme@sun.com * Pass packets to the peer using hypervisor copy operations.
95910958Sdme@sun.com */
9605741Smrj mblk_t *
xnb_copy_to_peer(xnb_t * xnbp,mblk_t * mp)9615741Smrj xnb_copy_to_peer(xnb_t *xnbp, mblk_t *mp)
9625741Smrj {
9635741Smrj mblk_t *free = mp, *mp_prev = NULL, *saved_mp = mp;
9645741Smrj mblk_t *ml, *ml_prev;
9655741Smrj boolean_t notify;
9665741Smrj RING_IDX loop, prod;
9675741Smrj int i;
9685741Smrj
96910958Sdme@sun.com /*
97010958Sdme@sun.com * If the peer does not pre-post buffers for received packets,
97110958Sdme@sun.com * use page flipping to pass packets to it.
97210958Sdme@sun.com */
97310958Sdme@sun.com if (!xnbp->xnb_rx_hv_copy)
9745741Smrj return (xnb_to_peer(xnbp, mp));
9755741Smrj
9765741Smrj /*
9775741Smrj * For each packet the sequence of operations is:
9785741Smrj *
9795741Smrj * 1. get a request slot from the ring.
9805741Smrj * 2. set up data for hypercall (see NOTE below)
9815741Smrj * 3. have the hypervisore copy the data
9825741Smrj * 4. update the request slot.
9835741Smrj * 5. kick the peer.
9845741Smrj *
9855741Smrj * NOTE ad 2.
9865741Smrj * In order to reduce the number of hypercalls, we prepare
98710958Sdme@sun.com * several mblks (mp->b_cont != NULL) for the peer and
98810958Sdme@sun.com * perform a single hypercall to transfer them. We also have
98910958Sdme@sun.com * to set up a seperate copy operation for every page.
9905741Smrj *
99110958Sdme@sun.com * If we have more than one packet (mp->b_next != NULL), we do
99210958Sdme@sun.com * this whole dance repeatedly.
9935741Smrj */
9945741Smrj
9957615SMax.Zhen@Sun.COM mutex_enter(&xnbp->xnb_rx_lock);
9965741Smrj
9975741Smrj if (!(xnbp->xnb_connected && xnbp->xnb_hotplugged)) {
9987615SMax.Zhen@Sun.COM mutex_exit(&xnbp->xnb_rx_lock);
9997615SMax.Zhen@Sun.COM DTRACE_PROBE(copy_rx_too_early);
10007615SMax.Zhen@Sun.COM xnbp->xnb_stat_rx_too_early++;
10015741Smrj return (mp);
10025741Smrj }
10035741Smrj
10045741Smrj loop = xnbp->xnb_rx_ring.req_cons;
10055741Smrj prod = xnbp->xnb_rx_ring.rsp_prod_pvt;
10065741Smrj
10075741Smrj while ((mp != NULL) &&
10085741Smrj XNB_RING_HAS_UNCONSUMED_REQUESTS(&xnbp->xnb_rx_ring)) {
10095741Smrj netif_rx_request_t *rxreq;
101010958Sdme@sun.com size_t d_offset, len;
101110958Sdme@sun.com int item_count;
101210958Sdme@sun.com gnttab_copy_t *gop_cp;
10135741Smrj netif_rx_response_t *rxresp;
10145741Smrj uint16_t cksum_flags;
10155741Smrj int16_t status = NETIF_RSP_OKAY;
10165741Smrj
10175741Smrj /* 1 */
10185741Smrj rxreq = RING_GET_REQUEST(&xnbp->xnb_rx_ring, loop);
10195741Smrj
10205741Smrj #ifdef XNB_DEBUG
10215741Smrj if (!(rxreq->id < NET_RX_RING_SIZE))
10225741Smrj cmn_err(CE_PANIC, "xnb_copy_to_peer: "
10235741Smrj "id %d out of range in request 0x%p",
10245741Smrj rxreq->id, (void *)rxreq);
10255741Smrj #endif /* XNB_DEBUG */
10265741Smrj
10275741Smrj /* 2 */
10288757Sdme@sun.com d_offset = 0;
10295741Smrj len = 0;
10305741Smrj item_count = 0;
10315741Smrj
10327615SMax.Zhen@Sun.COM gop_cp = xnbp->xnb_rx_cpop;
10335741Smrj
10345741Smrj /*
103510958Sdme@sun.com * We walk the b_cont pointers and set up a
103610958Sdme@sun.com * gnttab_copy_t for each sub-page chunk in each data
103710958Sdme@sun.com * block.
10385741Smrj */
10395741Smrj /* 2a */
10405741Smrj for (ml = mp, ml_prev = NULL; ml != NULL; ml = ml->b_cont) {
10415741Smrj size_t chunk = ml->b_wptr - ml->b_rptr;
10425741Smrj uchar_t *r_tmp, *rpt_align;
10435741Smrj size_t r_offset;
10445741Smrj
10455741Smrj /*
104610958Sdme@sun.com * The hypervisor will not allow us to
104710958Sdme@sun.com * reference a foreign page (e.g. one
104810958Sdme@sun.com * belonging to another domain) by mfn in the
104910958Sdme@sun.com * copy operation. If the data in this mblk is
105010958Sdme@sun.com * on such a page we must copy the data into a
105110958Sdme@sun.com * local page before initiating the hypervisor
105210958Sdme@sun.com * copy operation.
10535741Smrj */
10545741Smrj if (is_foreign(ml->b_rptr) || is_foreign(ml->b_wptr)) {
10555741Smrj mblk_t *ml_new = replace_msg(ml, chunk,
10565741Smrj mp_prev, ml_prev);
10575741Smrj
10585741Smrj /* We can still use old ml, but not *ml! */
10595741Smrj if (free == ml)
10605741Smrj free = ml_new;
10615741Smrj if (mp == ml)
10625741Smrj mp = ml_new;
10635741Smrj ml = ml_new;
10645741Smrj
10657615SMax.Zhen@Sun.COM xnbp->xnb_stat_rx_foreign_page++;
10665741Smrj }
10675741Smrj
10685741Smrj rpt_align = (uchar_t *)ALIGN2PAGE(ml->b_rptr);
10695741Smrj r_offset = (uint16_t)(ml->b_rptr - rpt_align);
10705741Smrj r_tmp = ml->b_rptr;
10715741Smrj
10725741Smrj if (d_offset + chunk > PAGESIZE)
10735741Smrj cmn_err(CE_PANIC, "xnb_copy_to_peer: mp %p "
10745741Smrj "(svd: %p), ml %p,rpt_alg. %p, d_offset "
10755741Smrj "(%lu) + chunk (%lu) > PAGESIZE %d!",
10765741Smrj (void *)mp, (void *)saved_mp, (void *)ml,
10775741Smrj (void *)rpt_align,
10785741Smrj d_offset, chunk, (int)PAGESIZE);
10795741Smrj
10805741Smrj while (chunk > 0) {
10815741Smrj size_t part_len;
10825741Smrj
108310958Sdme@sun.com if (item_count == xnbp->xnb_rx_cpop_count) {
108410958Sdme@sun.com if (!grow_cpop_area(xnbp))
10855741Smrj goto failure;
108610958Sdme@sun.com gop_cp = &xnbp->xnb_rx_cpop[item_count];
10875741Smrj }
10885741Smrj /*
10895741Smrj * If our mblk crosses a page boundary, we need
109010958Sdme@sun.com * to do a seperate copy for each page.
10915741Smrj */
10925741Smrj if (r_offset + chunk > PAGESIZE) {
10935741Smrj part_len = PAGESIZE - r_offset;
10945741Smrj
10955741Smrj DTRACE_PROBE3(mblk_page_crossed,
10965741Smrj (mblk_t *), ml, int, chunk, int,
10975741Smrj (int)r_offset);
10985741Smrj
10997615SMax.Zhen@Sun.COM xnbp->xnb_stat_rx_pagebndry_crossed++;
11005741Smrj } else {
11015741Smrj part_len = chunk;
11025741Smrj }
11035741Smrj
11045741Smrj setup_gop(xnbp, gop_cp, r_tmp, r_offset,
11055741Smrj d_offset, part_len, rxreq->gref);
11065741Smrj
11075741Smrj chunk -= part_len;
11085741Smrj
11095741Smrj len += part_len;
11105741Smrj d_offset += part_len;
11115741Smrj r_tmp += part_len;
11125741Smrj /*
11135741Smrj * The 2nd, 3rd ... last copies will always
11145741Smrj * start at r_tmp, therefore r_offset is 0.
11155741Smrj */
11165741Smrj r_offset = 0;
11175741Smrj gop_cp++;
111810958Sdme@sun.com item_count++;
11195741Smrj }
11205741Smrj ml_prev = ml;
112110958Sdme@sun.com
11225741Smrj DTRACE_PROBE4(mblk_loop_end, (mblk_t *), ml, int,
11235741Smrj chunk, int, len, int, item_count);
11245741Smrj }
11255741Smrj /* 3 */
11267615SMax.Zhen@Sun.COM if (HYPERVISOR_grant_table_op(GNTTABOP_copy, xnbp->xnb_rx_cpop,
11275741Smrj item_count) != 0) {
11285741Smrj cmn_err(CE_WARN, "xnb_copy_to_peer: copy op. failed");
11295741Smrj DTRACE_PROBE(HV_granttableopfailed);
11305741Smrj }
11315741Smrj
11325741Smrj /* 4 */
11335741Smrj rxresp = RING_GET_RESPONSE(&xnbp->xnb_rx_ring, prod);
11348757Sdme@sun.com rxresp->offset = 0;
11355741Smrj
11365741Smrj rxresp->flags = 0;
11375741Smrj
11385741Smrj DTRACE_PROBE4(got_RX_rsp, int, (int)rxresp->id, int,
11395741Smrj (int)rxresp->offset, int, (int)rxresp->flags, int,
11405741Smrj (int)rxresp->status);
11415741Smrj
11425741Smrj cksum_flags = xnbp->xnb_flavour->xf_cksum_to_peer(xnbp, mp);
11435741Smrj if (cksum_flags != 0)
11447615SMax.Zhen@Sun.COM xnbp->xnb_stat_rx_cksum_deferred++;
11455741Smrj rxresp->flags |= cksum_flags;
11465741Smrj
11475741Smrj rxresp->id = RING_GET_REQUEST(&xnbp->xnb_rx_ring, prod)->id;
11485741Smrj rxresp->status = len;
11495741Smrj
11505741Smrj DTRACE_PROBE4(RX_rsp_set, int, (int)rxresp->id, int,
11515741Smrj (int)rxresp->offset, int, (int)rxresp->flags, int,
11525741Smrj (int)rxresp->status);
11535741Smrj
11545741Smrj for (i = 0; i < item_count; i++) {
11557615SMax.Zhen@Sun.COM if (xnbp->xnb_rx_cpop[i].status != 0) {
115610958Sdme@sun.com DTRACE_PROBE2(cpop_status_nonnull, int,
11577615SMax.Zhen@Sun.COM (int)xnbp->xnb_rx_cpop[i].status,
11585741Smrj int, i);
11595741Smrj status = NETIF_RSP_ERROR;
11605741Smrj }
11615741Smrj }
11625741Smrj
11635741Smrj /* 5.2 */
11645741Smrj if (status != NETIF_RSP_OKAY) {
11655741Smrj RING_GET_RESPONSE(&xnbp->xnb_rx_ring, prod)->status =
11665741Smrj status;
11677615SMax.Zhen@Sun.COM xnbp->xnb_stat_rx_rsp_notok++;
11685741Smrj } else {
11697615SMax.Zhen@Sun.COM xnbp->xnb_stat_ipackets++;
11707615SMax.Zhen@Sun.COM xnbp->xnb_stat_rbytes += len;
11715741Smrj }
11725741Smrj
11735741Smrj loop++;
11745741Smrj prod++;
11755741Smrj mp_prev = mp;
11765741Smrj mp = mp->b_next;
11775741Smrj }
11785741Smrj failure:
11795741Smrj /*
11805741Smrj * Did we actually do anything?
11815741Smrj */
11825741Smrj if (loop == xnbp->xnb_rx_ring.req_cons) {
11837615SMax.Zhen@Sun.COM mutex_exit(&xnbp->xnb_rx_lock);
11845741Smrj return (mp);
11855741Smrj }
11865741Smrj
11875741Smrj /*
11885741Smrj * Unlink the end of the 'done' list from the remainder.
11895741Smrj */
11905741Smrj ASSERT(mp_prev != NULL);
11915741Smrj mp_prev->b_next = NULL;
11925741Smrj
11935741Smrj xnbp->xnb_rx_ring.req_cons = loop;
11945741Smrj xnbp->xnb_rx_ring.rsp_prod_pvt = prod;
11955741Smrj
11965741Smrj /* 6 */
11975741Smrj /* LINTED: constant in conditional context */
11985741Smrj RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&xnbp->xnb_rx_ring, notify);
11995741Smrj if (notify) {
12005741Smrj ec_notify_via_evtchn(xnbp->xnb_evtchn);
12017615SMax.Zhen@Sun.COM xnbp->xnb_stat_rx_notify_sent++;
12025741Smrj } else {
12037615SMax.Zhen@Sun.COM xnbp->xnb_stat_rx_notify_deferred++;
12045741Smrj }
12055741Smrj
12065741Smrj if (mp != NULL)
12077615SMax.Zhen@Sun.COM xnbp->xnb_stat_rx_defer++;
12085741Smrj
12097615SMax.Zhen@Sun.COM mutex_exit(&xnbp->xnb_rx_lock);
12105741Smrj
12115741Smrj /* Free mblk_t structs we have consumed. */
12125741Smrj freemsgchain(free);
12135741Smrj
12145741Smrj return (mp);
12155741Smrj }
12165741Smrj
12175084Sjohnlev
12185084Sjohnlev static void
xnb_tx_notify_peer(xnb_t * xnbp,boolean_t force)121910958Sdme@sun.com xnb_tx_notify_peer(xnb_t *xnbp, boolean_t force)
12205084Sjohnlev {
12215084Sjohnlev boolean_t notify;
12225084Sjohnlev
12237615SMax.Zhen@Sun.COM ASSERT(MUTEX_HELD(&xnbp->xnb_tx_lock));
12245084Sjohnlev
12255741Smrj /* LINTED: constant in conditional context */
12265741Smrj RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&xnbp->xnb_tx_ring, notify);
122710958Sdme@sun.com if (notify || force) {
12285741Smrj ec_notify_via_evtchn(xnbp->xnb_evtchn);
12297615SMax.Zhen@Sun.COM xnbp->xnb_stat_tx_notify_sent++;
12305084Sjohnlev } else {
12317615SMax.Zhen@Sun.COM xnbp->xnb_stat_tx_notify_deferred++;
12325084Sjohnlev }
12335084Sjohnlev }
12345084Sjohnlev
12355084Sjohnlev static void
xnb_tx_mark_complete(xnb_t * xnbp,RING_IDX id,int16_t status)12367615SMax.Zhen@Sun.COM xnb_tx_mark_complete(xnb_t *xnbp, RING_IDX id, int16_t status)
12375084Sjohnlev {
12385084Sjohnlev RING_IDX i;
12395084Sjohnlev netif_tx_response_t *txresp;
12405084Sjohnlev
12417615SMax.Zhen@Sun.COM ASSERT(MUTEX_HELD(&xnbp->xnb_tx_lock));
12425084Sjohnlev
12435741Smrj i = xnbp->xnb_tx_ring.rsp_prod_pvt;
12445084Sjohnlev
12455741Smrj txresp = RING_GET_RESPONSE(&xnbp->xnb_tx_ring, i);
12465084Sjohnlev txresp->id = id;
12475084Sjohnlev txresp->status = status;
12485084Sjohnlev
12495741Smrj xnbp->xnb_tx_ring.rsp_prod_pvt = i + 1;
12505084Sjohnlev
12515084Sjohnlev /*
12525084Sjohnlev * Note that we don't push the change to the peer here - that
12535084Sjohnlev * is the callers responsibility.
12545084Sjohnlev */
12555084Sjohnlev }
12565084Sjohnlev
12575084Sjohnlev static void
xnb_txbuf_recycle(xnb_txbuf_t * txp)125810958Sdme@sun.com xnb_txbuf_recycle(xnb_txbuf_t *txp)
12595084Sjohnlev {
126010958Sdme@sun.com xnb_t *xnbp = txp->xt_xnbp;
12615084Sjohnlev
126210958Sdme@sun.com kmem_cache_free(xnbp->xnb_tx_buf_cache, txp);
126310958Sdme@sun.com
126410958Sdme@sun.com xnbp->xnb_tx_buf_outstanding--;
126510958Sdme@sun.com }
12665741Smrj
126710958Sdme@sun.com static int
xnb_txbuf_constructor(void * buf,void * arg,int kmflag)126810958Sdme@sun.com xnb_txbuf_constructor(void *buf, void *arg, int kmflag)
126910958Sdme@sun.com {
127010958Sdme@sun.com _NOTE(ARGUNUSED(kmflag));
127110958Sdme@sun.com xnb_txbuf_t *txp = buf;
127210958Sdme@sun.com xnb_t *xnbp = arg;
127310958Sdme@sun.com size_t len;
127410958Sdme@sun.com ddi_dma_cookie_t dma_cookie;
127510958Sdme@sun.com uint_t ncookies;
12765084Sjohnlev
127710958Sdme@sun.com txp->xt_free_rtn.free_func = xnb_txbuf_recycle;
127810958Sdme@sun.com txp->xt_free_rtn.free_arg = (caddr_t)txp;
127910958Sdme@sun.com txp->xt_xnbp = xnbp;
128010958Sdme@sun.com txp->xt_next = NULL;
128110958Sdme@sun.com
128210958Sdme@sun.com if (ddi_dma_alloc_handle(xnbp->xnb_devinfo, &buf_dma_attr,
128310958Sdme@sun.com 0, 0, &txp->xt_dma_handle) != DDI_SUCCESS)
128410958Sdme@sun.com goto failure;
128510958Sdme@sun.com
128610958Sdme@sun.com if (ddi_dma_mem_alloc(txp->xt_dma_handle, PAGESIZE, &data_accattr,
128710958Sdme@sun.com DDI_DMA_STREAMING, 0, 0, &txp->xt_buf, &len,
128810958Sdme@sun.com &txp->xt_acc_handle) != DDI_SUCCESS)
128910958Sdme@sun.com goto failure_1;
12905084Sjohnlev
129110958Sdme@sun.com if (ddi_dma_addr_bind_handle(txp->xt_dma_handle, NULL, txp->xt_buf,
129210958Sdme@sun.com len, DDI_DMA_RDWR | DDI_DMA_STREAMING, DDI_DMA_DONTWAIT, 0,
129310958Sdme@sun.com &dma_cookie, &ncookies)
129410958Sdme@sun.com != DDI_DMA_MAPPED)
129510958Sdme@sun.com goto failure_2;
129610958Sdme@sun.com ASSERT(ncookies == 1);
129710958Sdme@sun.com
129810958Sdme@sun.com txp->xt_mfn = xnb_btop(dma_cookie.dmac_laddress);
129910958Sdme@sun.com txp->xt_buflen = dma_cookie.dmac_size;
130010958Sdme@sun.com
130110958Sdme@sun.com DTRACE_PROBE(txbuf_allocated);
130210958Sdme@sun.com
130310958Sdme@sun.com atomic_add_32(&xnbp->xnb_tx_buf_count, 1);
130410958Sdme@sun.com xnbp->xnb_tx_buf_outstanding++;
130510958Sdme@sun.com
130610958Sdme@sun.com return (0);
130710958Sdme@sun.com
130810958Sdme@sun.com failure_2:
130910958Sdme@sun.com ddi_dma_mem_free(&txp->xt_acc_handle);
13105084Sjohnlev
131110958Sdme@sun.com failure_1:
131210958Sdme@sun.com ddi_dma_free_handle(&txp->xt_dma_handle);
131310958Sdme@sun.com
131410958Sdme@sun.com failure:
131510958Sdme@sun.com
131610958Sdme@sun.com return (-1);
131710958Sdme@sun.com }
13185741Smrj
131910958Sdme@sun.com static void
xnb_txbuf_destructor(void * buf,void * arg)132010958Sdme@sun.com xnb_txbuf_destructor(void *buf, void *arg)
132110958Sdme@sun.com {
132210958Sdme@sun.com xnb_txbuf_t *txp = buf;
132310958Sdme@sun.com xnb_t *xnbp = arg;
132410958Sdme@sun.com
132510958Sdme@sun.com (void) ddi_dma_unbind_handle(txp->xt_dma_handle);
132610958Sdme@sun.com ddi_dma_mem_free(&txp->xt_acc_handle);
132710958Sdme@sun.com ddi_dma_free_handle(&txp->xt_dma_handle);
132810958Sdme@sun.com
132910958Sdme@sun.com atomic_add_32(&xnbp->xnb_tx_buf_count, -1);
13305084Sjohnlev }
13315084Sjohnlev
13325741Smrj /*
133310958Sdme@sun.com * Take packets from the peer and deliver them onward.
13345741Smrj */
13355084Sjohnlev static mblk_t *
xnb_from_peer(xnb_t * xnbp)13367615SMax.Zhen@Sun.COM xnb_from_peer(xnb_t *xnbp)
13375084Sjohnlev {
13385084Sjohnlev RING_IDX start, end, loop;
133910958Sdme@sun.com gnttab_copy_t *cop;
13407615SMax.Zhen@Sun.COM xnb_txbuf_t **txpp;
13415084Sjohnlev netif_tx_request_t *txreq;
134210958Sdme@sun.com boolean_t work_to_do, need_notify = B_FALSE;
13435084Sjohnlev mblk_t *head, *tail;
134410958Sdme@sun.com int n_data_req, i;
13455084Sjohnlev
134610958Sdme@sun.com ASSERT(MUTEX_HELD(&xnbp->xnb_tx_lock));
13475084Sjohnlev
13485084Sjohnlev head = tail = NULL;
13495084Sjohnlev around:
13505084Sjohnlev
13515741Smrj /* LINTED: constant in conditional context */
13525741Smrj RING_FINAL_CHECK_FOR_REQUESTS(&xnbp->xnb_tx_ring, work_to_do);
13535084Sjohnlev if (!work_to_do) {
13545084Sjohnlev finished:
135510958Sdme@sun.com xnb_tx_notify_peer(xnbp, need_notify);
135610958Sdme@sun.com
13575084Sjohnlev return (head);
13585084Sjohnlev }
13595084Sjohnlev
13605741Smrj start = xnbp->xnb_tx_ring.req_cons;
13615741Smrj end = xnbp->xnb_tx_ring.sring->req_prod;
13625084Sjohnlev
13637676Sdme@sun.com if ((end - start) > NET_TX_RING_SIZE) {
13647676Sdme@sun.com /*
13657676Sdme@sun.com * This usually indicates that the frontend driver is
13667676Sdme@sun.com * misbehaving, as it's not possible to have more than
13677676Sdme@sun.com * NET_TX_RING_SIZE ring elements in play at any one
13687676Sdme@sun.com * time.
13697676Sdme@sun.com *
13707676Sdme@sun.com * We reset the ring pointers to the state declared by
13717676Sdme@sun.com * the frontend and try to carry on.
13727676Sdme@sun.com */
13737676Sdme@sun.com cmn_err(CE_WARN, "xnb_from_peer: domain %d tried to give us %u "
13747676Sdme@sun.com "items in the ring, resetting and trying to recover.",
13757676Sdme@sun.com xnbp->xnb_peer, (end - start));
13767676Sdme@sun.com
13777676Sdme@sun.com /* LINTED: constant in conditional context */
13787676Sdme@sun.com BACK_RING_ATTACH(&xnbp->xnb_tx_ring,
13797676Sdme@sun.com (netif_tx_sring_t *)xnbp->xnb_tx_ring_addr, PAGESIZE);
13807676Sdme@sun.com
13817676Sdme@sun.com goto around;
13827676Sdme@sun.com }
13837676Sdme@sun.com
138410958Sdme@sun.com loop = start;
138510958Sdme@sun.com cop = xnbp->xnb_tx_cop;
138610958Sdme@sun.com txpp = xnbp->xnb_tx_bufp;
138710958Sdme@sun.com n_data_req = 0;
138810958Sdme@sun.com
138910958Sdme@sun.com while (loop < end) {
139011588Sdavid.edmondson@sun.com static const uint16_t acceptable_flags =
139111588Sdavid.edmondson@sun.com NETTXF_csum_blank |
139211588Sdavid.edmondson@sun.com NETTXF_data_validated |
139311588Sdavid.edmondson@sun.com NETTXF_extra_info;
139411588Sdavid.edmondson@sun.com uint16_t unexpected_flags;
139511588Sdavid.edmondson@sun.com
139610958Sdme@sun.com txreq = RING_GET_REQUEST(&xnbp->xnb_tx_ring, loop);
139710958Sdme@sun.com
139811588Sdavid.edmondson@sun.com unexpected_flags = txreq->flags & ~acceptable_flags;
139911588Sdavid.edmondson@sun.com if (unexpected_flags != 0) {
140011588Sdavid.edmondson@sun.com /*
140111588Sdavid.edmondson@sun.com * The peer used flag bits that we do not
140211588Sdavid.edmondson@sun.com * recognize.
140311588Sdavid.edmondson@sun.com */
140411588Sdavid.edmondson@sun.com cmn_err(CE_WARN, "xnb_from_peer: "
140511588Sdavid.edmondson@sun.com "unexpected flag bits (0x%x) from peer "
140611588Sdavid.edmondson@sun.com "in transmit request",
140711588Sdavid.edmondson@sun.com unexpected_flags);
140811588Sdavid.edmondson@sun.com xnbp->xnb_stat_tx_unexpected_flags++;
140911588Sdavid.edmondson@sun.com
141011588Sdavid.edmondson@sun.com /* Mark this entry as failed. */
141111588Sdavid.edmondson@sun.com xnb_tx_mark_complete(xnbp, txreq->id, NETIF_RSP_ERROR);
141211588Sdavid.edmondson@sun.com need_notify = B_TRUE;
141311588Sdavid.edmondson@sun.com
141411588Sdavid.edmondson@sun.com } else if (txreq->flags & NETTXF_extra_info) {
141510958Sdme@sun.com struct netif_extra_info *erp;
141610958Sdme@sun.com boolean_t status;
141710958Sdme@sun.com
141810958Sdme@sun.com loop++; /* Consume another slot in the ring. */
141910958Sdme@sun.com ASSERT(loop <= end);
142010958Sdme@sun.com
142110958Sdme@sun.com erp = (struct netif_extra_info *)
142210958Sdme@sun.com RING_GET_REQUEST(&xnbp->xnb_tx_ring, loop);
14235084Sjohnlev
142410958Sdme@sun.com switch (erp->type) {
142510958Sdme@sun.com case XEN_NETIF_EXTRA_TYPE_MCAST_ADD:
142610958Sdme@sun.com ASSERT(xnbp->xnb_multicast_control);
142710958Sdme@sun.com status = xnbp->xnb_flavour->xf_mcast_add(xnbp,
142810958Sdme@sun.com &erp->u.mcast.addr);
142910958Sdme@sun.com break;
143010958Sdme@sun.com case XEN_NETIF_EXTRA_TYPE_MCAST_DEL:
143110958Sdme@sun.com ASSERT(xnbp->xnb_multicast_control);
143210958Sdme@sun.com status = xnbp->xnb_flavour->xf_mcast_del(xnbp,
143310958Sdme@sun.com &erp->u.mcast.addr);
143410958Sdme@sun.com break;
143510958Sdme@sun.com default:
143610958Sdme@sun.com status = B_FALSE;
143710958Sdme@sun.com cmn_err(CE_WARN, "xnb_from_peer: "
143810958Sdme@sun.com "unknown extra type %d", erp->type);
143910958Sdme@sun.com break;
144010958Sdme@sun.com }
14415084Sjohnlev
144210958Sdme@sun.com xnb_tx_mark_complete(xnbp, txreq->id,
144310958Sdme@sun.com status ? NETIF_RSP_OKAY : NETIF_RSP_ERROR);
144410958Sdme@sun.com need_notify = B_TRUE;
144511588Sdavid.edmondson@sun.com
144611588Sdavid.edmondson@sun.com } else if ((txreq->offset > PAGESIZE) ||
144711588Sdavid.edmondson@sun.com (txreq->offset + txreq->size > PAGESIZE)) {
144811588Sdavid.edmondson@sun.com /*
144911588Sdavid.edmondson@sun.com * Peer attempted to refer to data beyond the
145011588Sdavid.edmondson@sun.com * end of the granted page.
145111588Sdavid.edmondson@sun.com */
145211588Sdavid.edmondson@sun.com cmn_err(CE_WARN, "xnb_from_peer: "
145311588Sdavid.edmondson@sun.com "attempt to refer beyond the end of granted "
145411588Sdavid.edmondson@sun.com "page in txreq (offset %d, size %d).",
145511588Sdavid.edmondson@sun.com txreq->offset, txreq->size);
145611588Sdavid.edmondson@sun.com xnbp->xnb_stat_tx_overflow_page++;
145711588Sdavid.edmondson@sun.com
145811588Sdavid.edmondson@sun.com /* Mark this entry as failed. */
145911588Sdavid.edmondson@sun.com xnb_tx_mark_complete(xnbp, txreq->id, NETIF_RSP_ERROR);
146011588Sdavid.edmondson@sun.com need_notify = B_TRUE;
146111588Sdavid.edmondson@sun.com
146210958Sdme@sun.com } else {
146310958Sdme@sun.com xnb_txbuf_t *txp;
146410958Sdme@sun.com
146510958Sdme@sun.com txp = kmem_cache_alloc(xnbp->xnb_tx_buf_cache,
146610958Sdme@sun.com KM_NOSLEEP);
146710958Sdme@sun.com if (txp == NULL)
146810958Sdme@sun.com break;
146910958Sdme@sun.com
147010958Sdme@sun.com txp->xt_mblk = desballoc((unsigned char *)txp->xt_buf,
147110958Sdme@sun.com txp->xt_buflen, 0, &txp->xt_free_rtn);
147210958Sdme@sun.com if (txp->xt_mblk == NULL) {
147310958Sdme@sun.com kmem_cache_free(xnbp->xnb_tx_buf_cache, txp);
147410958Sdme@sun.com break;
147510958Sdme@sun.com }
147610958Sdme@sun.com
147710958Sdme@sun.com txp->xt_idx = loop;
147810958Sdme@sun.com txp->xt_id = txreq->id;
14795084Sjohnlev
148010958Sdme@sun.com cop->source.u.ref = txreq->gref;
148110958Sdme@sun.com cop->source.domid = xnbp->xnb_peer;
148210958Sdme@sun.com cop->source.offset = txreq->offset;
148310958Sdme@sun.com
148410958Sdme@sun.com cop->dest.u.gmfn = txp->xt_mfn;
148510958Sdme@sun.com cop->dest.domid = DOMID_SELF;
148610958Sdme@sun.com cop->dest.offset = 0;
14875084Sjohnlev
148810958Sdme@sun.com cop->len = txreq->size;
148910958Sdme@sun.com cop->flags = GNTCOPY_source_gref;
149010958Sdme@sun.com cop->status = 0;
149110958Sdme@sun.com
149210958Sdme@sun.com *txpp = txp;
149310958Sdme@sun.com
149410958Sdme@sun.com txpp++;
149510958Sdme@sun.com cop++;
149610958Sdme@sun.com n_data_req++;
149710958Sdme@sun.com
149810958Sdme@sun.com ASSERT(n_data_req <= NET_TX_RING_SIZE);
149910958Sdme@sun.com }
150010958Sdme@sun.com
150110958Sdme@sun.com loop++;
15025084Sjohnlev }
15035084Sjohnlev
150410958Sdme@sun.com xnbp->xnb_tx_ring.req_cons = loop;
15055084Sjohnlev
150610958Sdme@sun.com if (n_data_req == 0)
150710958Sdme@sun.com goto around;
15085084Sjohnlev
150910958Sdme@sun.com if (HYPERVISOR_grant_table_op(GNTTABOP_copy,
151010958Sdme@sun.com xnbp->xnb_tx_cop, n_data_req) != 0) {
15115084Sjohnlev
151210958Sdme@sun.com cmn_err(CE_WARN, "xnb_from_peer: copy operation failed");
15135084Sjohnlev
151410958Sdme@sun.com txpp = xnbp->xnb_tx_bufp;
151510958Sdme@sun.com i = n_data_req;
151610958Sdme@sun.com while (i > 0) {
151710958Sdme@sun.com kmem_cache_free(xnbp->xnb_tx_buf_cache, *txpp);
15187615SMax.Zhen@Sun.COM txpp++;
151910958Sdme@sun.com i--;
15205084Sjohnlev }
15215084Sjohnlev
15225084Sjohnlev goto finished;
15235084Sjohnlev }
15245084Sjohnlev
152510958Sdme@sun.com txpp = xnbp->xnb_tx_bufp;
152610958Sdme@sun.com cop = xnbp->xnb_tx_cop;
152710958Sdme@sun.com i = n_data_req;
152810958Sdme@sun.com
152910958Sdme@sun.com while (i > 0) {
15307615SMax.Zhen@Sun.COM xnb_txbuf_t *txp = *txpp;
15315084Sjohnlev
153210958Sdme@sun.com txreq = RING_GET_REQUEST(&xnbp->xnb_tx_ring, txp->xt_idx);
15335084Sjohnlev
153410958Sdme@sun.com if (cop->status != 0) {
153510958Sdme@sun.com #ifdef XNB_DEBUG
153610958Sdme@sun.com cmn_err(CE_WARN, "xnb_from_peer: "
153710958Sdme@sun.com "txpp 0x%p failed (%d)",
153810958Sdme@sun.com (void *)*txpp, cop->status);
153910958Sdme@sun.com #endif /* XNB_DEBUG */
154011588Sdavid.edmondson@sun.com xnb_tx_mark_complete(xnbp, txp->xt_id, NETIF_RSP_ERROR);
154110958Sdme@sun.com freemsg(txp->xt_mblk);
154210958Sdme@sun.com } else {
154310958Sdme@sun.com mblk_t *mp;
15445084Sjohnlev
154510958Sdme@sun.com mp = txp->xt_mblk;
154610958Sdme@sun.com mp->b_rptr = mp->b_wptr = (unsigned char *)txp->xt_buf;
154710958Sdme@sun.com mp->b_wptr += txreq->size;
154810958Sdme@sun.com mp->b_next = NULL;
15495084Sjohnlev
15505084Sjohnlev /*
155110958Sdme@sun.com * If there are checksum flags, process them
155210958Sdme@sun.com * appropriately.
15535084Sjohnlev */
155410958Sdme@sun.com if ((txreq->flags &
15555084Sjohnlev (NETTXF_csum_blank | NETTXF_data_validated))
155610958Sdme@sun.com != 0) {
15575741Smrj mp = xnbp->xnb_flavour->xf_cksum_from_peer(xnbp,
15585084Sjohnlev mp, txreq->flags);
15597615SMax.Zhen@Sun.COM xnbp->xnb_stat_tx_cksum_no_need++;
15605084Sjohnlev
156110958Sdme@sun.com txp->xt_mblk = mp;
156210958Sdme@sun.com }
15635084Sjohnlev
15645084Sjohnlev if (head == NULL) {
15655084Sjohnlev ASSERT(tail == NULL);
15665084Sjohnlev head = mp;
15675084Sjohnlev } else {
15685084Sjohnlev ASSERT(tail != NULL);
15695084Sjohnlev tail->b_next = mp;
15705084Sjohnlev }
15715084Sjohnlev tail = mp;
157210958Sdme@sun.com
157310958Sdme@sun.com xnbp->xnb_stat_opackets++;
157410958Sdme@sun.com xnbp->xnb_stat_obytes += txreq->size;
157510958Sdme@sun.com
157611588Sdavid.edmondson@sun.com xnb_tx_mark_complete(xnbp, txp->xt_id, NETIF_RSP_OKAY);
15775084Sjohnlev }
157810958Sdme@sun.com
157910958Sdme@sun.com txpp++;
158010958Sdme@sun.com cop++;
158110958Sdme@sun.com i--;
15825084Sjohnlev }
15835084Sjohnlev
15845084Sjohnlev goto around;
15855084Sjohnlev /* NOTREACHED */
15865084Sjohnlev }
15875084Sjohnlev
15885084Sjohnlev static uint_t
xnb_intr(caddr_t arg)15895084Sjohnlev xnb_intr(caddr_t arg)
15905084Sjohnlev {
15915084Sjohnlev xnb_t *xnbp = (xnb_t *)arg;
15925084Sjohnlev mblk_t *mp;
15935084Sjohnlev
15945741Smrj xnbp->xnb_stat_intr++;
15955084Sjohnlev
15967615SMax.Zhen@Sun.COM mutex_enter(&xnbp->xnb_tx_lock);
15975084Sjohnlev
15985741Smrj ASSERT(xnbp->xnb_connected);
15995084Sjohnlev
16007615SMax.Zhen@Sun.COM mp = xnb_from_peer(xnbp);
16015084Sjohnlev
16027615SMax.Zhen@Sun.COM mutex_exit(&xnbp->xnb_tx_lock);
16035084Sjohnlev
16045741Smrj if (!xnbp->xnb_hotplugged) {
16057615SMax.Zhen@Sun.COM xnbp->xnb_stat_tx_too_early++;
16065084Sjohnlev goto fail;
16075084Sjohnlev }
16085084Sjohnlev if (mp == NULL) {
16095741Smrj xnbp->xnb_stat_spurious_intr++;
16105084Sjohnlev goto fail;
16115084Sjohnlev }
16125084Sjohnlev
16137615SMax.Zhen@Sun.COM xnbp->xnb_flavour->xf_from_peer(xnbp, mp);
16145084Sjohnlev
16155084Sjohnlev return (DDI_INTR_CLAIMED);
16165084Sjohnlev
16175084Sjohnlev fail:
16185084Sjohnlev freemsgchain(mp);
16195084Sjohnlev return (DDI_INTR_CLAIMED);
16205084Sjohnlev }
16215084Sjohnlev
162210958Sdme@sun.com /*
162310958Sdme@sun.com * Read our configuration from xenstore.
162410958Sdme@sun.com */
162510958Sdme@sun.com boolean_t
xnb_read_xs_config(xnb_t * xnbp)162610958Sdme@sun.com xnb_read_xs_config(xnb_t *xnbp)
162710958Sdme@sun.com {
162810958Sdme@sun.com char *xsname;
162910958Sdme@sun.com char mac[ETHERADDRL * 3];
163010958Sdme@sun.com
163110958Sdme@sun.com xsname = xvdi_get_xsname(xnbp->xnb_devinfo);
163210958Sdme@sun.com
163310958Sdme@sun.com if (xenbus_scanf(XBT_NULL, xsname,
163410958Sdme@sun.com "mac", "%s", mac) != 0) {
163510958Sdme@sun.com cmn_err(CE_WARN, "xnb_attach: "
163610958Sdme@sun.com "cannot read mac address from %s",
163710958Sdme@sun.com xsname);
163810958Sdme@sun.com return (B_FALSE);
163910958Sdme@sun.com }
164010958Sdme@sun.com
164110958Sdme@sun.com if (ether_aton(mac, xnbp->xnb_mac_addr) != ETHERADDRL) {
164210958Sdme@sun.com cmn_err(CE_WARN,
164310958Sdme@sun.com "xnb_attach: cannot parse mac address %s",
164410958Sdme@sun.com mac);
164510958Sdme@sun.com return (B_FALSE);
164610958Sdme@sun.com }
164710958Sdme@sun.com
164810958Sdme@sun.com return (B_TRUE);
164910958Sdme@sun.com }
165010958Sdme@sun.com
165110958Sdme@sun.com /*
165210958Sdme@sun.com * Read the configuration of the peer from xenstore.
165310958Sdme@sun.com */
165410958Sdme@sun.com boolean_t
xnb_read_oe_config(xnb_t * xnbp)165510958Sdme@sun.com xnb_read_oe_config(xnb_t *xnbp)
165610958Sdme@sun.com {
165710958Sdme@sun.com char *oename;
165810958Sdme@sun.com int i;
165910958Sdme@sun.com
166010958Sdme@sun.com oename = xvdi_get_oename(xnbp->xnb_devinfo);
166110958Sdme@sun.com
166210958Sdme@sun.com if (xenbus_gather(XBT_NULL, oename,
166310958Sdme@sun.com "event-channel", "%u", &xnbp->xnb_fe_evtchn,
166410958Sdme@sun.com "tx-ring-ref", "%lu", &xnbp->xnb_tx_ring_ref,
166510958Sdme@sun.com "rx-ring-ref", "%lu", &xnbp->xnb_rx_ring_ref,
166610958Sdme@sun.com NULL) != 0) {
166710958Sdme@sun.com cmn_err(CE_WARN, "xnb_read_oe_config: "
166810958Sdme@sun.com "cannot read other-end details from %s",
166910958Sdme@sun.com oename);
167010958Sdme@sun.com return (B_FALSE);
167110958Sdme@sun.com }
167210958Sdme@sun.com
167310958Sdme@sun.com /*
167410958Sdme@sun.com * Check whether our peer requests receive side hypervisor
167510958Sdme@sun.com * copy.
167610958Sdme@sun.com */
167710958Sdme@sun.com if (xenbus_scanf(XBT_NULL, oename,
167810958Sdme@sun.com "request-rx-copy", "%d", &i) != 0)
167910958Sdme@sun.com i = 0;
168010958Sdme@sun.com if (i != 0)
168110958Sdme@sun.com xnbp->xnb_rx_hv_copy = B_TRUE;
168210958Sdme@sun.com
168310958Sdme@sun.com /*
168410958Sdme@sun.com * Check whether our peer requests multicast_control.
168510958Sdme@sun.com */
168610958Sdme@sun.com if (xenbus_scanf(XBT_NULL, oename,
168710958Sdme@sun.com "request-multicast-control", "%d", &i) != 0)
168810958Sdme@sun.com i = 0;
168910958Sdme@sun.com if (i != 0)
169010958Sdme@sun.com xnbp->xnb_multicast_control = B_TRUE;
169110958Sdme@sun.com
169210958Sdme@sun.com /*
169310958Sdme@sun.com * The Linux backend driver here checks to see if the peer has
169410958Sdme@sun.com * set 'feature-no-csum-offload'. This is used to indicate
169510958Sdme@sun.com * that the guest cannot handle receiving packets without a
169610958Sdme@sun.com * valid checksum. We don't check here, because packets passed
169710958Sdme@sun.com * to the peer _always_ have a valid checksum.
169810958Sdme@sun.com *
169910958Sdme@sun.com * There are three cases:
170010958Sdme@sun.com *
170110958Sdme@sun.com * - the NIC is dedicated: packets from the wire should always
170210958Sdme@sun.com * have a valid checksum. If the hardware validates the
170310958Sdme@sun.com * checksum then the relevant bit will be set in the packet
170410958Sdme@sun.com * attributes and we will inform the peer. It can choose to
170510958Sdme@sun.com * ignore the hardware verification.
170610958Sdme@sun.com *
170710958Sdme@sun.com * - the NIC is shared (VNIC) and a packet originates from the
170810958Sdme@sun.com * wire: this is the same as the case above - the packets
170910958Sdme@sun.com * will have a valid checksum.
171010958Sdme@sun.com *
171110958Sdme@sun.com * - the NIC is shared (VNIC) and a packet originates from the
171210958Sdme@sun.com * host: the MAC layer ensures that all such packets have a
171310958Sdme@sun.com * valid checksum by calculating one if the stack did not.
171410958Sdme@sun.com */
171510958Sdme@sun.com
171610958Sdme@sun.com return (B_TRUE);
171710958Sdme@sun.com }
171810958Sdme@sun.com
171910958Sdme@sun.com void
xnb_start_connect(xnb_t * xnbp)172010958Sdme@sun.com xnb_start_connect(xnb_t *xnbp)
172110958Sdme@sun.com {
172210958Sdme@sun.com dev_info_t *dip = xnbp->xnb_devinfo;
172310958Sdme@sun.com
172410958Sdme@sun.com if (!xnb_connect_rings(dip)) {
172510958Sdme@sun.com cmn_err(CE_WARN, "xnb_start_connect: "
172610958Sdme@sun.com "cannot connect rings");
172710958Sdme@sun.com goto failed;
172810958Sdme@sun.com }
172910958Sdme@sun.com
173010958Sdme@sun.com if (!xnbp->xnb_flavour->xf_start_connect(xnbp)) {
173110958Sdme@sun.com cmn_err(CE_WARN, "xnb_start_connect: "
173210958Sdme@sun.com "flavour failed to connect");
173310958Sdme@sun.com goto failed;
173410958Sdme@sun.com }
173510958Sdme@sun.com
173610958Sdme@sun.com (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateConnected);
173710958Sdme@sun.com return;
173810958Sdme@sun.com
173910958Sdme@sun.com failed:
174010958Sdme@sun.com xnbp->xnb_flavour->xf_peer_disconnected(xnbp);
174110958Sdme@sun.com xnb_disconnect_rings(dip);
174210958Sdme@sun.com (void) xvdi_switch_state(dip, XBT_NULL,
174310958Sdme@sun.com XenbusStateClosed);
174410958Sdme@sun.com (void) xvdi_post_event(dip, XEN_HP_REMOVE);
174510958Sdme@sun.com }
174610958Sdme@sun.com
17475084Sjohnlev static boolean_t
xnb_connect_rings(dev_info_t * dip)17485084Sjohnlev xnb_connect_rings(dev_info_t *dip)
17495084Sjohnlev {
17505084Sjohnlev xnb_t *xnbp = ddi_get_driver_private(dip);
17515084Sjohnlev struct gnttab_map_grant_ref map_op;
17525084Sjohnlev
17535084Sjohnlev /*
17545084Sjohnlev * Cannot attempt to connect the rings if already connected.
17555084Sjohnlev */
17565741Smrj ASSERT(!xnbp->xnb_connected);
17575084Sjohnlev
17585084Sjohnlev /*
17595084Sjohnlev * 1. allocate a vaddr for the tx page, one for the rx page.
17605084Sjohnlev * 2. call GNTTABOP_map_grant_ref to map the relevant pages
17615084Sjohnlev * into the allocated vaddr (one for tx, one for rx).
17625084Sjohnlev * 3. call EVTCHNOP_bind_interdomain to have the event channel
17635084Sjohnlev * bound to this domain.
17645084Sjohnlev * 4. associate the event channel with an interrupt.
176510958Sdme@sun.com * 5. enable the interrupt.
17665084Sjohnlev */
17675084Sjohnlev
17685084Sjohnlev /* 1.tx */
17695741Smrj xnbp->xnb_tx_ring_addr = vmem_xalloc(heap_arena, PAGESIZE, PAGESIZE,
17705084Sjohnlev 0, 0, 0, 0, VM_SLEEP);
17715741Smrj ASSERT(xnbp->xnb_tx_ring_addr != NULL);
17725084Sjohnlev
17735084Sjohnlev /* 2.tx */
17745741Smrj map_op.host_addr = (uint64_t)((long)xnbp->xnb_tx_ring_addr);
17755084Sjohnlev map_op.flags = GNTMAP_host_map;
17765741Smrj map_op.ref = xnbp->xnb_tx_ring_ref;
17775741Smrj map_op.dom = xnbp->xnb_peer;
17787756SMark.Johnson@Sun.COM hat_prepare_mapping(kas.a_hat, xnbp->xnb_tx_ring_addr, NULL);
17797756SMark.Johnson@Sun.COM if (xen_map_gref(GNTTABOP_map_grant_ref, &map_op, 1, B_FALSE) != 0 ||
17807756SMark.Johnson@Sun.COM map_op.status != 0) {
17815084Sjohnlev cmn_err(CE_WARN, "xnb_connect_rings: cannot map tx-ring page.");
17825084Sjohnlev goto fail;
17835084Sjohnlev }
17845741Smrj xnbp->xnb_tx_ring_handle = map_op.handle;
17855084Sjohnlev
17865741Smrj /* LINTED: constant in conditional context */
17875741Smrj BACK_RING_INIT(&xnbp->xnb_tx_ring,
17885741Smrj (netif_tx_sring_t *)xnbp->xnb_tx_ring_addr, PAGESIZE);
17895084Sjohnlev
17905084Sjohnlev /* 1.rx */
17915741Smrj xnbp->xnb_rx_ring_addr = vmem_xalloc(heap_arena, PAGESIZE, PAGESIZE,
17925084Sjohnlev 0, 0, 0, 0, VM_SLEEP);
17935741Smrj ASSERT(xnbp->xnb_rx_ring_addr != NULL);
17945084Sjohnlev
17955084Sjohnlev /* 2.rx */
17965741Smrj map_op.host_addr = (uint64_t)((long)xnbp->xnb_rx_ring_addr);
17975084Sjohnlev map_op.flags = GNTMAP_host_map;
17985741Smrj map_op.ref = xnbp->xnb_rx_ring_ref;
17995741Smrj map_op.dom = xnbp->xnb_peer;
18007756SMark.Johnson@Sun.COM hat_prepare_mapping(kas.a_hat, xnbp->xnb_rx_ring_addr, NULL);
18017756SMark.Johnson@Sun.COM if (xen_map_gref(GNTTABOP_map_grant_ref, &map_op, 1, B_FALSE) != 0 ||
18027756SMark.Johnson@Sun.COM map_op.status != 0) {
18035084Sjohnlev cmn_err(CE_WARN, "xnb_connect_rings: cannot map rx-ring page.");
18045084Sjohnlev goto fail;
18055084Sjohnlev }
18065741Smrj xnbp->xnb_rx_ring_handle = map_op.handle;
18075084Sjohnlev
18085741Smrj /* LINTED: constant in conditional context */
18095741Smrj BACK_RING_INIT(&xnbp->xnb_rx_ring,
18105741Smrj (netif_rx_sring_t *)xnbp->xnb_rx_ring_addr, PAGESIZE);
18115084Sjohnlev
18125084Sjohnlev /* 3 */
181310958Sdme@sun.com if (xvdi_bind_evtchn(dip, xnbp->xnb_fe_evtchn) != DDI_SUCCESS) {
18145084Sjohnlev cmn_err(CE_WARN, "xnb_connect_rings: "
18155741Smrj "cannot bind event channel %d", xnbp->xnb_evtchn);
18165741Smrj xnbp->xnb_evtchn = INVALID_EVTCHN;
18175084Sjohnlev goto fail;
18185084Sjohnlev }
18195741Smrj xnbp->xnb_evtchn = xvdi_get_evtchn(dip);
18205084Sjohnlev
18215084Sjohnlev /*
18225084Sjohnlev * It would be good to set the state to XenbusStateConnected
18235084Sjohnlev * here as well, but then what if ddi_add_intr() failed?
18245084Sjohnlev * Changing the state in the store will be noticed by the peer
18255084Sjohnlev * and cannot be "taken back".
18265084Sjohnlev */
18275741Smrj mutex_enter(&xnbp->xnb_tx_lock);
18285741Smrj mutex_enter(&xnbp->xnb_rx_lock);
18295084Sjohnlev
18305741Smrj xnbp->xnb_connected = B_TRUE;
18315084Sjohnlev
18325741Smrj mutex_exit(&xnbp->xnb_rx_lock);
18335741Smrj mutex_exit(&xnbp->xnb_tx_lock);
18345084Sjohnlev
183510958Sdme@sun.com /* 4, 5 */
18365084Sjohnlev if (ddi_add_intr(dip, 0, NULL, NULL, xnb_intr, (caddr_t)xnbp)
18375084Sjohnlev != DDI_SUCCESS) {
18385084Sjohnlev cmn_err(CE_WARN, "xnb_connect_rings: cannot add interrupt");
18395084Sjohnlev goto fail;
18405084Sjohnlev }
18415741Smrj xnbp->xnb_irq = B_TRUE;
18425084Sjohnlev
18435084Sjohnlev return (B_TRUE);
18445084Sjohnlev
18455084Sjohnlev fail:
18465741Smrj mutex_enter(&xnbp->xnb_tx_lock);
18475741Smrj mutex_enter(&xnbp->xnb_rx_lock);
18485084Sjohnlev
18495741Smrj xnbp->xnb_connected = B_FALSE;
185010958Sdme@sun.com
18515741Smrj mutex_exit(&xnbp->xnb_rx_lock);
18525741Smrj mutex_exit(&xnbp->xnb_tx_lock);
18535084Sjohnlev
18545084Sjohnlev return (B_FALSE);
18555084Sjohnlev }
18565084Sjohnlev
18575084Sjohnlev static void
xnb_disconnect_rings(dev_info_t * dip)18585084Sjohnlev xnb_disconnect_rings(dev_info_t *dip)
18595084Sjohnlev {
18605084Sjohnlev xnb_t *xnbp = ddi_get_driver_private(dip);
18615084Sjohnlev
18625741Smrj if (xnbp->xnb_irq) {
18635084Sjohnlev ddi_remove_intr(dip, 0, NULL);
18645741Smrj xnbp->xnb_irq = B_FALSE;
18655084Sjohnlev }
18665084Sjohnlev
18675741Smrj if (xnbp->xnb_evtchn != INVALID_EVTCHN) {
18685084Sjohnlev xvdi_free_evtchn(dip);
18695741Smrj xnbp->xnb_evtchn = INVALID_EVTCHN;
18705084Sjohnlev }
18715084Sjohnlev
18725741Smrj if (xnbp->xnb_rx_ring_handle != INVALID_GRANT_HANDLE) {
18735084Sjohnlev struct gnttab_unmap_grant_ref unmap_op;
18745084Sjohnlev
18755741Smrj unmap_op.host_addr = (uint64_t)(uintptr_t)
18765741Smrj xnbp->xnb_rx_ring_addr;
18775084Sjohnlev unmap_op.dev_bus_addr = 0;
18785741Smrj unmap_op.handle = xnbp->xnb_rx_ring_handle;
18795084Sjohnlev if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref,
18805084Sjohnlev &unmap_op, 1) != 0)
18815084Sjohnlev cmn_err(CE_WARN, "xnb_disconnect_rings: "
18825084Sjohnlev "cannot unmap rx-ring page (%d)",
18835084Sjohnlev unmap_op.status);
18845084Sjohnlev
18855741Smrj xnbp->xnb_rx_ring_handle = INVALID_GRANT_HANDLE;
18865084Sjohnlev }
18875084Sjohnlev
18885741Smrj if (xnbp->xnb_rx_ring_addr != NULL) {
18895741Smrj hat_release_mapping(kas.a_hat, xnbp->xnb_rx_ring_addr);
18905741Smrj vmem_free(heap_arena, xnbp->xnb_rx_ring_addr, PAGESIZE);
18915741Smrj xnbp->xnb_rx_ring_addr = NULL;
18925084Sjohnlev }
18935084Sjohnlev
18945741Smrj if (xnbp->xnb_tx_ring_handle != INVALID_GRANT_HANDLE) {
18955084Sjohnlev struct gnttab_unmap_grant_ref unmap_op;
18965084Sjohnlev
18975741Smrj unmap_op.host_addr = (uint64_t)(uintptr_t)
18985741Smrj xnbp->xnb_tx_ring_addr;
18995084Sjohnlev unmap_op.dev_bus_addr = 0;
19005741Smrj unmap_op.handle = xnbp->xnb_tx_ring_handle;
19015084Sjohnlev if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref,
19025084Sjohnlev &unmap_op, 1) != 0)
19035084Sjohnlev cmn_err(CE_WARN, "xnb_disconnect_rings: "
19045084Sjohnlev "cannot unmap tx-ring page (%d)",
19055084Sjohnlev unmap_op.status);
19065084Sjohnlev
19075741Smrj xnbp->xnb_tx_ring_handle = INVALID_GRANT_HANDLE;
19085084Sjohnlev }
19095084Sjohnlev
19105741Smrj if (xnbp->xnb_tx_ring_addr != NULL) {
19115741Smrj hat_release_mapping(kas.a_hat, xnbp->xnb_tx_ring_addr);
19125741Smrj vmem_free(heap_arena, xnbp->xnb_tx_ring_addr, PAGESIZE);
19135741Smrj xnbp->xnb_tx_ring_addr = NULL;
19145084Sjohnlev }
19155084Sjohnlev }
19165084Sjohnlev
19175084Sjohnlev static void
xnb_oe_state_change(dev_info_t * dip,ddi_eventcookie_t id,void * arg,void * impl_data)19185084Sjohnlev xnb_oe_state_change(dev_info_t *dip, ddi_eventcookie_t id,
19195084Sjohnlev void *arg, void *impl_data)
19205084Sjohnlev {
192110958Sdme@sun.com _NOTE(ARGUNUSED(id, arg));
19225084Sjohnlev xnb_t *xnbp = ddi_get_driver_private(dip);
19235084Sjohnlev XenbusState new_state = *(XenbusState *)impl_data;
19245084Sjohnlev
19255084Sjohnlev ASSERT(xnbp != NULL);
19265084Sjohnlev
19275084Sjohnlev switch (new_state) {
19285084Sjohnlev case XenbusStateConnected:
19297005Scz147101 /* spurious state change */
19307005Scz147101 if (xnbp->xnb_connected)
19317005Scz147101 return;
19327005Scz147101
193310958Sdme@sun.com if (!xnb_read_oe_config(xnbp) ||
193410958Sdme@sun.com !xnbp->xnb_flavour->xf_peer_connected(xnbp)) {
193510958Sdme@sun.com cmn_err(CE_WARN, "xnb_oe_state_change: "
193610958Sdme@sun.com "read otherend config error");
19375084Sjohnlev (void) xvdi_switch_state(dip, XBT_NULL,
19385084Sjohnlev XenbusStateClosed);
19395084Sjohnlev (void) xvdi_post_event(dip, XEN_HP_REMOVE);
194010958Sdme@sun.com
194110958Sdme@sun.com break;
19425084Sjohnlev }
19435084Sjohnlev
194410958Sdme@sun.com
194510958Sdme@sun.com mutex_enter(&xnbp->xnb_state_lock);
194610958Sdme@sun.com xnbp->xnb_fe_status = XNB_STATE_READY;
194710958Sdme@sun.com if (xnbp->xnb_be_status == XNB_STATE_READY)
194810958Sdme@sun.com xnb_start_connect(xnbp);
194910958Sdme@sun.com mutex_exit(&xnbp->xnb_state_lock);
195010958Sdme@sun.com
19515084Sjohnlev /*
19525084Sjohnlev * Now that we've attempted to connect it's reasonable
19535084Sjohnlev * to allow an attempt to detach.
19545084Sjohnlev */
19555741Smrj xnbp->xnb_detachable = B_TRUE;
19565084Sjohnlev
19575084Sjohnlev break;
19585084Sjohnlev
19595084Sjohnlev case XenbusStateClosing:
19605084Sjohnlev (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateClosing);
19615084Sjohnlev
19625084Sjohnlev break;
19635084Sjohnlev
19645084Sjohnlev case XenbusStateClosed:
19655741Smrj xnbp->xnb_flavour->xf_peer_disconnected(xnbp);
19665084Sjohnlev
19675741Smrj mutex_enter(&xnbp->xnb_tx_lock);
19685741Smrj mutex_enter(&xnbp->xnb_rx_lock);
19695084Sjohnlev
19705084Sjohnlev xnb_disconnect_rings(dip);
19715741Smrj xnbp->xnb_connected = B_FALSE;
19725084Sjohnlev
19735741Smrj mutex_exit(&xnbp->xnb_rx_lock);
19745741Smrj mutex_exit(&xnbp->xnb_tx_lock);
19755084Sjohnlev
19765084Sjohnlev (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateClosed);
19775084Sjohnlev (void) xvdi_post_event(dip, XEN_HP_REMOVE);
19785084Sjohnlev /*
19795084Sjohnlev * In all likelyhood this is already set (in the above
19805084Sjohnlev * case), but if the peer never attempted to connect
19815084Sjohnlev * and the domain is destroyed we get here without
19825084Sjohnlev * having been through the case above, so we set it to
19835084Sjohnlev * be sure.
19845084Sjohnlev */
19855741Smrj xnbp->xnb_detachable = B_TRUE;
19865084Sjohnlev
19875084Sjohnlev break;
19885084Sjohnlev
19895084Sjohnlev default:
19905084Sjohnlev break;
19915084Sjohnlev }
19925084Sjohnlev }
19935084Sjohnlev
19945084Sjohnlev static void
xnb_hp_state_change(dev_info_t * dip,ddi_eventcookie_t id,void * arg,void * impl_data)19955084Sjohnlev xnb_hp_state_change(dev_info_t *dip, ddi_eventcookie_t id,
19965084Sjohnlev void *arg, void *impl_data)
19975084Sjohnlev {
199810958Sdme@sun.com _NOTE(ARGUNUSED(id, arg));
19995084Sjohnlev xnb_t *xnbp = ddi_get_driver_private(dip);
20005084Sjohnlev xendev_hotplug_state_t state = *(xendev_hotplug_state_t *)impl_data;
20015084Sjohnlev
20025084Sjohnlev ASSERT(xnbp != NULL);
20035084Sjohnlev
20045084Sjohnlev switch (state) {
20055084Sjohnlev case Connected:
20067005Scz147101 /* spurious hotplug event */
20077005Scz147101 if (xnbp->xnb_hotplugged)
200810958Sdme@sun.com break;
20097005Scz147101
201010958Sdme@sun.com if (!xnb_read_xs_config(xnbp))
201110958Sdme@sun.com break;
201210958Sdme@sun.com
201310958Sdme@sun.com if (!xnbp->xnb_flavour->xf_hotplug_connected(xnbp))
201410958Sdme@sun.com break;
20155084Sjohnlev
20165741Smrj mutex_enter(&xnbp->xnb_tx_lock);
20175741Smrj mutex_enter(&xnbp->xnb_rx_lock);
20185084Sjohnlev
201910958Sdme@sun.com xnbp->xnb_hotplugged = B_TRUE;
20205084Sjohnlev
20215741Smrj mutex_exit(&xnbp->xnb_rx_lock);
20225741Smrj mutex_exit(&xnbp->xnb_tx_lock);
202310958Sdme@sun.com
202410958Sdme@sun.com mutex_enter(&xnbp->xnb_state_lock);
202510958Sdme@sun.com xnbp->xnb_be_status = XNB_STATE_READY;
202610958Sdme@sun.com if (xnbp->xnb_fe_status == XNB_STATE_READY)
202710958Sdme@sun.com xnb_start_connect(xnbp);
202810958Sdme@sun.com mutex_exit(&xnbp->xnb_state_lock);
202910958Sdme@sun.com
20305084Sjohnlev break;
20315084Sjohnlev
20325084Sjohnlev default:
20335084Sjohnlev break;
20345084Sjohnlev }
20355084Sjohnlev }
20365084Sjohnlev
20375084Sjohnlev static struct modldrv modldrv = {
20387351Sdme@sun.com &mod_miscops, "xnb",
20395084Sjohnlev };
20405084Sjohnlev
20415084Sjohnlev static struct modlinkage modlinkage = {
20425084Sjohnlev MODREV_1, &modldrv, NULL
20435084Sjohnlev };
20445084Sjohnlev
20455084Sjohnlev int
_init(void)20465084Sjohnlev _init(void)
20475084Sjohnlev {
20485084Sjohnlev int i;
20495084Sjohnlev
20505084Sjohnlev mutex_init(&xnb_alloc_page_lock, NULL, MUTEX_DRIVER, NULL);
20515084Sjohnlev
205210958Sdme@sun.com i = mod_install(&modlinkage);
205310958Sdme@sun.com if (i != DDI_SUCCESS)
205410958Sdme@sun.com mutex_destroy(&xnb_alloc_page_lock);
20555084Sjohnlev
20565084Sjohnlev return (i);
20575084Sjohnlev }
20585084Sjohnlev
20595084Sjohnlev int
_info(struct modinfo * modinfop)20605084Sjohnlev _info(struct modinfo *modinfop)
20615084Sjohnlev {
20625084Sjohnlev return (mod_info(&modlinkage, modinfop));
20635084Sjohnlev }
20645084Sjohnlev
20655084Sjohnlev int
_fini(void)20665084Sjohnlev _fini(void)
20675084Sjohnlev {
20685084Sjohnlev int i;
20695084Sjohnlev
20705084Sjohnlev i = mod_remove(&modlinkage);
207110958Sdme@sun.com if (i == DDI_SUCCESS)
20725084Sjohnlev mutex_destroy(&xnb_alloc_page_lock);
207310958Sdme@sun.com
20745084Sjohnlev return (i);
20755084Sjohnlev }
2076