xref: /onnv-gate/usr/src/uts/common/io/vnic/vnic_dev.c (revision 5895:f251acdd9bdc)
15084Sjohnlev /*
25084Sjohnlev  * CDDL HEADER START
35084Sjohnlev  *
45084Sjohnlev  * The contents of this file are subject to the terms of the
55084Sjohnlev  * Common Development and Distribution License (the "License").
65084Sjohnlev  * You may not use this file except in compliance with the License.
75084Sjohnlev  *
85084Sjohnlev  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
95084Sjohnlev  * or http://www.opensolaris.org/os/licensing.
105084Sjohnlev  * See the License for the specific language governing permissions
115084Sjohnlev  * and limitations under the License.
125084Sjohnlev  *
135084Sjohnlev  * When distributing Covered Code, include this CDDL HEADER in each
145084Sjohnlev  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
155084Sjohnlev  * If applicable, add the following below this CDDL HEADER, with the
165084Sjohnlev  * fields enclosed by brackets "[]" replaced with your own identifying
175084Sjohnlev  * information: Portions Copyright [yyyy] [name of copyright owner]
185084Sjohnlev  *
195084Sjohnlev  * CDDL HEADER END
205084Sjohnlev  */
215084Sjohnlev /*
22*5895Syz147064  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
235084Sjohnlev  * Use is subject to license terms.
245084Sjohnlev  */
255084Sjohnlev 
265084Sjohnlev #pragma ident	"%Z%%M%	%I%	%E% SMI"
275084Sjohnlev 
285084Sjohnlev #include <sys/types.h>
295084Sjohnlev #include <sys/sysmacros.h>
305084Sjohnlev #include <sys/conf.h>
315084Sjohnlev #include <sys/cmn_err.h>
325084Sjohnlev #include <sys/list.h>
335084Sjohnlev #include <sys/ksynch.h>
345084Sjohnlev #include <sys/kmem.h>
355084Sjohnlev #include <sys/stream.h>
365084Sjohnlev #include <sys/modctl.h>
375084Sjohnlev #include <sys/ddi.h>
385084Sjohnlev #include <sys/sunddi.h>
395084Sjohnlev #include <sys/atomic.h>
405084Sjohnlev #include <sys/stat.h>
415084Sjohnlev #include <sys/modhash.h>
425084Sjohnlev #include <sys/strsubr.h>
435084Sjohnlev #include <sys/strsun.h>
445084Sjohnlev #include <sys/dlpi.h>
455084Sjohnlev #include <sys/mac.h>
465084Sjohnlev #include <sys/mac_ether.h>
47*5895Syz147064 #include <sys/dls.h>
485084Sjohnlev #include <sys/pattr.h>
495084Sjohnlev #include <sys/vnic.h>
505084Sjohnlev #include <sys/vnic_impl.h>
515084Sjohnlev #include <sys/gld.h>
525084Sjohnlev #include <inet/ip.h>
535084Sjohnlev #include <inet/ip_impl.h>
545084Sjohnlev 
555084Sjohnlev static int vnic_m_start(void *);
565084Sjohnlev static void vnic_m_stop(void *);
575084Sjohnlev static int vnic_m_promisc(void *, boolean_t);
585084Sjohnlev static int vnic_m_multicst(void *, boolean_t, const uint8_t *);
595084Sjohnlev static int vnic_m_unicst(void *, const uint8_t *);
605084Sjohnlev static int vnic_m_stat(void *, uint_t, uint64_t *);
615084Sjohnlev static void vnic_m_resources(void *);
625084Sjohnlev static mblk_t *vnic_m_tx(void *, mblk_t *);
635084Sjohnlev static boolean_t vnic_m_capab_get(void *, mac_capab_t, void *);
645084Sjohnlev static void vnic_mac_free(vnic_mac_t *);
655084Sjohnlev static uint_t vnic_info_walker(mod_hash_key_t, mod_hash_val_t *, void *);
665084Sjohnlev static void vnic_notify_cb(void *, mac_notify_type_t);
675084Sjohnlev static int vnic_modify_mac_addr(vnic_t *, uint_t, uchar_t *);
685084Sjohnlev static mblk_t *vnic_active_tx(void *, mblk_t *);
695084Sjohnlev static int vnic_promisc_set(vnic_t *, boolean_t);
705084Sjohnlev 
715084Sjohnlev static kmem_cache_t	*vnic_cache;
725084Sjohnlev static kmem_cache_t	*vnic_mac_cache;
735084Sjohnlev static krwlock_t	vnic_lock;
745084Sjohnlev static kmutex_t		vnic_mac_lock;
755084Sjohnlev static uint_t		vnic_count;
765084Sjohnlev 
775084Sjohnlev /* hash of VNICs (vnic_t's), keyed by VNIC id */
785084Sjohnlev static mod_hash_t	*vnic_hash;
795084Sjohnlev #define	VNIC_HASHSZ	64
805084Sjohnlev #define	VNIC_HASH_KEY(vnic_id)	((mod_hash_key_t)(uintptr_t)vnic_id)
815084Sjohnlev 
825084Sjohnlev /*
835084Sjohnlev  * Hash of underlying open MACs (vnic_mac_t's), keyed by the string
845084Sjohnlev  * "<device name><instance number>/<port number>".
855084Sjohnlev  */
865084Sjohnlev static mod_hash_t	*vnic_mac_hash;
875084Sjohnlev #define	VNIC_MAC_HASHSZ	64
885084Sjohnlev 
895084Sjohnlev #define	VNIC_MAC_REFHOLD(va) {			\
905084Sjohnlev 	ASSERT(MUTEX_HELD(&vnic_mac_lock));	\
915084Sjohnlev 	(va)->va_refs++;			\
925084Sjohnlev 	ASSERT((va)->va_refs != 0);		\
935084Sjohnlev }
945084Sjohnlev 
955084Sjohnlev #define	VNIC_MAC_REFRELE(va) {			\
965084Sjohnlev 	ASSERT(MUTEX_HELD(&vnic_mac_lock));	\
975084Sjohnlev 	ASSERT((va)->va_refs != 0);		\
985084Sjohnlev 	if (--((va)->va_refs) == 0)		\
995084Sjohnlev 		vnic_mac_free(va);		\
1005084Sjohnlev }
1015084Sjohnlev 
1025084Sjohnlev static uchar_t vnic_brdcst_mac[] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
1035084Sjohnlev 
1045084Sjohnlev /* used by vnic_walker */
1055084Sjohnlev typedef struct vnic_info_state {
106*5895Syz147064 	datalink_id_t	vs_vnic_id;
107*5895Syz147064 	datalink_id_t	vs_linkid;
1085084Sjohnlev 	boolean_t	vs_vnic_found;
1095084Sjohnlev 	vnic_info_new_vnic_fn_t	vs_new_vnic_fn;
1105084Sjohnlev 	void		*vs_fn_arg;
1115084Sjohnlev 	int		vs_rc;
1125084Sjohnlev } vnic_info_state_t;
1135084Sjohnlev 
1145084Sjohnlev #define	VNIC_M_CALLBACK_FLAGS	(MC_RESOURCES | MC_GETCAPAB)
1155084Sjohnlev 
1165084Sjohnlev static mac_callbacks_t vnic_m_callbacks = {
1175084Sjohnlev 	VNIC_M_CALLBACK_FLAGS,
1185084Sjohnlev 	vnic_m_stat,
1195084Sjohnlev 	vnic_m_start,
1205084Sjohnlev 	vnic_m_stop,
1215084Sjohnlev 	vnic_m_promisc,
1225084Sjohnlev 	vnic_m_multicst,
1235084Sjohnlev 	vnic_m_unicst,
1245084Sjohnlev 	vnic_m_tx,
1255084Sjohnlev 	vnic_m_resources,
1265084Sjohnlev 	NULL,			/* m_ioctl */
1275084Sjohnlev 	vnic_m_capab_get
1285084Sjohnlev };
1295084Sjohnlev 
1305084Sjohnlev /* ARGSUSED */
1315084Sjohnlev static int
1325084Sjohnlev vnic_mac_ctor(void *buf, void *arg, int kmflag)
1335084Sjohnlev {
1345084Sjohnlev 	vnic_mac_t *vnic_mac = buf;
1355084Sjohnlev 
1365084Sjohnlev 	bzero(vnic_mac, sizeof (vnic_mac_t));
1375084Sjohnlev 	rw_init(&vnic_mac->va_bcast_grp_lock, NULL, RW_DRIVER, NULL);
1385084Sjohnlev 	rw_init(&vnic_mac->va_promisc_lock, NULL, RW_DRIVER, NULL);
1395084Sjohnlev 
1405084Sjohnlev 	return (0);
1415084Sjohnlev }
1425084Sjohnlev 
1435084Sjohnlev /* ARGSUSED */
1445084Sjohnlev static void
1455084Sjohnlev vnic_mac_dtor(void *buf, void *arg)
1465084Sjohnlev {
1475084Sjohnlev 	vnic_mac_t *vnic_mac = buf;
1485084Sjohnlev 
1495084Sjohnlev 	rw_destroy(&vnic_mac->va_promisc_lock);
1505084Sjohnlev 	rw_destroy(&vnic_mac->va_bcast_grp_lock);
1515084Sjohnlev }
1525084Sjohnlev 
1535084Sjohnlev void
1545084Sjohnlev vnic_dev_init(void)
1555084Sjohnlev {
1565084Sjohnlev 	vnic_cache = kmem_cache_create("vnic_cache",
1575084Sjohnlev 	    sizeof (vnic_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
1585084Sjohnlev 
1595084Sjohnlev 	vnic_mac_cache = kmem_cache_create("vnic_mac_cache",
1605084Sjohnlev 	    sizeof (vnic_mac_t), 0, vnic_mac_ctor, vnic_mac_dtor,
1615084Sjohnlev 	    NULL, NULL, NULL, 0);
1625084Sjohnlev 
1635084Sjohnlev 	vnic_hash = mod_hash_create_idhash("vnic_hash",
1645084Sjohnlev 	    VNIC_HASHSZ, mod_hash_null_valdtor);
1655084Sjohnlev 
166*5895Syz147064 	vnic_mac_hash = mod_hash_create_idhash("vnic_mac_hash",
1675084Sjohnlev 	    VNIC_MAC_HASHSZ, mod_hash_null_valdtor);
1685084Sjohnlev 
1695084Sjohnlev 	rw_init(&vnic_lock, NULL, RW_DEFAULT, NULL);
1705084Sjohnlev 
1715084Sjohnlev 	mutex_init(&vnic_mac_lock, NULL, MUTEX_DEFAULT, NULL);
1725084Sjohnlev 
1735084Sjohnlev 	vnic_count = 0;
1745084Sjohnlev }
1755084Sjohnlev 
1765084Sjohnlev void
1775084Sjohnlev vnic_dev_fini(void)
1785084Sjohnlev {
1795084Sjohnlev 	ASSERT(vnic_count == 0);
1805084Sjohnlev 
1815084Sjohnlev 	mutex_destroy(&vnic_mac_lock);
1825084Sjohnlev 	rw_destroy(&vnic_lock);
183*5895Syz147064 	mod_hash_destroy_idhash(vnic_mac_hash);
1845084Sjohnlev 	mod_hash_destroy_idhash(vnic_hash);
1855084Sjohnlev 	kmem_cache_destroy(vnic_mac_cache);
1865084Sjohnlev 	kmem_cache_destroy(vnic_cache);
1875084Sjohnlev }
1885084Sjohnlev 
1895084Sjohnlev uint_t
1905084Sjohnlev vnic_dev_count(void)
1915084Sjohnlev {
1925084Sjohnlev 	return (vnic_count);
1935084Sjohnlev }
1945084Sjohnlev 
1955084Sjohnlev static int
196*5895Syz147064 vnic_mac_open(datalink_id_t linkid, vnic_mac_t **vmp)
1975084Sjohnlev {
1985084Sjohnlev 	int err;
1995084Sjohnlev 	vnic_mac_t *vnic_mac = NULL;
2005084Sjohnlev 	const mac_info_t *mip;
2015084Sjohnlev 
2025084Sjohnlev 	*vmp = NULL;
2035084Sjohnlev 
2045084Sjohnlev 	mutex_enter(&vnic_mac_lock);
2055084Sjohnlev 
206*5895Syz147064 	err = mod_hash_find(vnic_mac_hash, (mod_hash_key_t)(uintptr_t)linkid,
2075084Sjohnlev 	    (mod_hash_val_t *)&vnic_mac);
2085084Sjohnlev 	if (err == 0) {
2095084Sjohnlev 		/* this MAC is already opened, increment reference count */
2105084Sjohnlev 		VNIC_MAC_REFHOLD(vnic_mac);
2115084Sjohnlev 		mutex_exit(&vnic_mac_lock);
2125084Sjohnlev 		*vmp = vnic_mac;
2135084Sjohnlev 		return (0);
2145084Sjohnlev 	}
2155084Sjohnlev 
2165084Sjohnlev 	vnic_mac = kmem_cache_alloc(vnic_mac_cache, KM_SLEEP);
217*5895Syz147064 	if ((err = mac_open_by_linkid(linkid, &vnic_mac->va_mh)) != 0) {
218*5895Syz147064 		vnic_mac->va_mh = NULL;
219*5895Syz147064 		goto bail;
220*5895Syz147064 	}
2215084Sjohnlev 
222*5895Syz147064 	/*
223*5895Syz147064 	 * For now, we do not support VNICs over legacy drivers.  This will
224*5895Syz147064 	 * soon be changed.
225*5895Syz147064 	 */
226*5895Syz147064 	if (mac_is_legacy(vnic_mac->va_mh)) {
227*5895Syz147064 		err = ENOTSUP;
2285084Sjohnlev 		goto bail;
2295084Sjohnlev 	}
2305084Sjohnlev 
2315084Sjohnlev 	/* only ethernet support, for now */
2325084Sjohnlev 	mip = mac_info(vnic_mac->va_mh);
2335084Sjohnlev 	if (mip->mi_media != DL_ETHER) {
2345084Sjohnlev 		err = ENOTSUP;
2355084Sjohnlev 		goto bail;
2365084Sjohnlev 	}
2375084Sjohnlev 	if (mip->mi_media != mip->mi_nativemedia) {
2385084Sjohnlev 		err = ENOTSUP;
2395084Sjohnlev 		goto bail;
2405084Sjohnlev 	}
2415084Sjohnlev 
242*5895Syz147064 	vnic_mac->va_linkid = linkid;
2435084Sjohnlev 
2445084Sjohnlev 	/* add entry to hash table */
245*5895Syz147064 	err = mod_hash_insert(vnic_mac_hash, (mod_hash_key_t)(uintptr_t)linkid,
2465084Sjohnlev 	    (mod_hash_val_t)vnic_mac);
2475084Sjohnlev 	ASSERT(err == 0);
2485084Sjohnlev 
2495084Sjohnlev 	/* initialize the flow table associated with lower MAC */
2505084Sjohnlev 	vnic_mac->va_addr_len = ETHERADDRL;
2515084Sjohnlev 	(void) vnic_classifier_flow_tab_init(vnic_mac, vnic_mac->va_addr_len,
2525084Sjohnlev 	    KM_SLEEP);
2535084Sjohnlev 
2545084Sjohnlev 	vnic_mac->va_txinfo = mac_vnic_tx_get(vnic_mac->va_mh);
2555084Sjohnlev 	vnic_mac->va_notify_hdl = mac_notify_add(vnic_mac->va_mh,
2565084Sjohnlev 	    vnic_notify_cb, vnic_mac);
2575084Sjohnlev 
2585084Sjohnlev 	VNIC_MAC_REFHOLD(vnic_mac);
2595084Sjohnlev 	*vmp = vnic_mac;
2605084Sjohnlev 	mutex_exit(&vnic_mac_lock);
2615084Sjohnlev 	return (0);
2625084Sjohnlev 
2635084Sjohnlev bail:
2645084Sjohnlev 	if (vnic_mac != NULL) {
2655084Sjohnlev 		if (vnic_mac->va_mh != NULL)
2665084Sjohnlev 			mac_close(vnic_mac->va_mh);
2675084Sjohnlev 		kmem_cache_free(vnic_mac_cache, vnic_mac);
2685084Sjohnlev 	}
2695084Sjohnlev 	mutex_exit(&vnic_mac_lock);
2705084Sjohnlev 	return (err);
2715084Sjohnlev }
2725084Sjohnlev 
2735084Sjohnlev /*
2745084Sjohnlev  * Create a new flow for the active MAC client sharing the NIC
2755084Sjohnlev  * with the VNICs. This allows the unicast packets for that NIC
2765084Sjohnlev  * to be classified and passed up to the active MAC client. It
2775084Sjohnlev  * also allows packets sent from a VNIC to the active link to
2785084Sjohnlev  * be classified by the VNIC transmit function and delivered via
2795084Sjohnlev  * the MAC module locally. Returns B_TRUE on success, B_FALSE on
2805084Sjohnlev  * failure.
2815084Sjohnlev  */
2825084Sjohnlev static int
2835084Sjohnlev vnic_init_active_rx(vnic_mac_t *vnic_mac)
2845084Sjohnlev {
2855084Sjohnlev 	uchar_t nic_mac_addr[MAXMACADDRLEN];
2865084Sjohnlev 
2875084Sjohnlev 	if (vnic_mac->va_active_flow != NULL)
2885084Sjohnlev 		return (B_TRUE);
2895084Sjohnlev 
2905084Sjohnlev 	mac_unicst_get(vnic_mac->va_mh, nic_mac_addr);
2915084Sjohnlev 
2925084Sjohnlev 	vnic_mac->va_active_flow = vnic_classifier_flow_create(
2935084Sjohnlev 	    vnic_mac->va_addr_len, nic_mac_addr, NULL, B_TRUE, KM_SLEEP);
2945084Sjohnlev 
2955084Sjohnlev 	vnic_classifier_flow_add(vnic_mac, vnic_mac->va_active_flow,
2965084Sjohnlev 	    (vnic_rx_fn_t)mac_active_rx, vnic_mac->va_mh, NULL);
2975084Sjohnlev 	return (B_TRUE);
2985084Sjohnlev }
2995084Sjohnlev 
3005084Sjohnlev static void
3015084Sjohnlev vnic_fini_active_rx(vnic_mac_t *vnic_mac)
3025084Sjohnlev {
3035084Sjohnlev 	if (vnic_mac->va_active_flow == NULL)
3045084Sjohnlev 		return;
3055084Sjohnlev 
3065084Sjohnlev 	vnic_classifier_flow_remove(vnic_mac, vnic_mac->va_active_flow);
3075084Sjohnlev 	vnic_classifier_flow_destroy(vnic_mac->va_active_flow);
3085084Sjohnlev 	vnic_mac->va_active_flow = NULL;
3095084Sjohnlev }
3105084Sjohnlev 
3115084Sjohnlev static void
3125084Sjohnlev vnic_update_active_rx(vnic_mac_t *vnic_mac)
3135084Sjohnlev {
3145084Sjohnlev 	if (vnic_mac->va_active_flow == NULL)
3155084Sjohnlev 		return;
3165084Sjohnlev 
3175084Sjohnlev 	vnic_fini_active_rx(vnic_mac);
3185084Sjohnlev 	(void) vnic_init_active_rx(vnic_mac);
3195084Sjohnlev }
3205084Sjohnlev 
3215084Sjohnlev /*
3225084Sjohnlev  * Copy an mblk, preserving its hardware checksum flags.
3235084Sjohnlev  */
3245084Sjohnlev mblk_t *
3255084Sjohnlev vnic_copymsg_cksum(mblk_t *mp)
3265084Sjohnlev {
3275084Sjohnlev 	mblk_t *mp1;
3285084Sjohnlev 	uint32_t start, stuff, end, value, flags;
3295084Sjohnlev 
3305084Sjohnlev 	mp1 = copymsg(mp);
3315084Sjohnlev 	if (mp1 == NULL)
3325084Sjohnlev 		return (NULL);
3335084Sjohnlev 
3345084Sjohnlev 	hcksum_retrieve(mp, NULL, NULL, &start, &stuff, &end, &value, &flags);
3355084Sjohnlev 	(void) hcksum_assoc(mp1, NULL, NULL, start, stuff, end, value,
3365084Sjohnlev 	    flags, KM_NOSLEEP);
3375084Sjohnlev 
3385084Sjohnlev 	return (mp1);
3395084Sjohnlev }
3405084Sjohnlev 
3415084Sjohnlev /*
3425084Sjohnlev  * Copy an mblk chain, presenting the hardware checksum flags of the
3435084Sjohnlev  * individual mblks.
3445084Sjohnlev  */
3455084Sjohnlev mblk_t *
3465084Sjohnlev vnic_copymsgchain_cksum(mblk_t *mp)
3475084Sjohnlev {
3485084Sjohnlev 	mblk_t *nmp = NULL;
3495084Sjohnlev 	mblk_t **nmpp = &nmp;
3505084Sjohnlev 
3515084Sjohnlev 	for (; mp != NULL; mp = mp->b_next) {
3525084Sjohnlev 		if ((*nmpp = vnic_copymsg_cksum(mp)) == NULL) {
3535084Sjohnlev 			freemsgchain(nmp);
3545084Sjohnlev 			return (NULL);
3555084Sjohnlev 		}
3565084Sjohnlev 
3575084Sjohnlev 		nmpp = &((*nmpp)->b_next);
3585084Sjohnlev 	}
3595084Sjohnlev 
3605084Sjohnlev 	return (nmp);
3615084Sjohnlev }
3625084Sjohnlev 
3635084Sjohnlev 
3645084Sjohnlev /*
3655084Sjohnlev  * Process the specified mblk chain for proper handling of hardware
3665084Sjohnlev  * checksum offload. This routine is invoked for loopback VNIC traffic.
3675084Sjohnlev  * The function handles a NULL mblk chain passed as argument.
3685084Sjohnlev  */
3695084Sjohnlev mblk_t *
3705084Sjohnlev vnic_fix_cksum(mblk_t *mp_chain)
3715084Sjohnlev {
3725084Sjohnlev 	mblk_t *mp, *prev = NULL, *new_chain = mp_chain, *mp1;
3735084Sjohnlev 	uint32_t flags, start, stuff, end, value;
3745084Sjohnlev 
3755084Sjohnlev 	for (mp = mp_chain; mp != NULL; prev = mp, mp = mp->b_next) {
3765084Sjohnlev 		uint16_t len;
3775084Sjohnlev 		uint32_t offset;
3785084Sjohnlev 		struct ether_header *ehp;
3795084Sjohnlev 		uint16_t sap;
3805084Sjohnlev 
3815084Sjohnlev 		hcksum_retrieve(mp, NULL, NULL, &start, &stuff, &end, &value,
3825084Sjohnlev 		    &flags);
3835084Sjohnlev 		if (flags == 0)
3845084Sjohnlev 			continue;
3855084Sjohnlev 
3865084Sjohnlev 		/*
3875084Sjohnlev 		 * Since the processing of checksum offload for loopback
3885084Sjohnlev 		 * traffic requires modification of the packet contents,
3895084Sjohnlev 		 * ensure sure that we are always modifying our own copy.
3905084Sjohnlev 		 */
3915084Sjohnlev 		if (DB_REF(mp) > 1) {
3925084Sjohnlev 			mp1 = copymsg(mp);
3935084Sjohnlev 			if (mp1 == NULL)
3945084Sjohnlev 				continue;
3955084Sjohnlev 			mp1->b_next = mp->b_next;
3965084Sjohnlev 			mp->b_next = NULL;
3975084Sjohnlev 			freemsg(mp);
3985084Sjohnlev 			if (prev != NULL)
3995084Sjohnlev 				prev->b_next = mp1;
4005084Sjohnlev 			else
4015084Sjohnlev 				new_chain = mp1;
4025084Sjohnlev 			mp = mp1;
4035084Sjohnlev 		}
4045084Sjohnlev 
4055084Sjohnlev 		/*
4065084Sjohnlev 		 * Ethernet, and optionally VLAN header.
4075084Sjohnlev 		 */
4085084Sjohnlev 		/*LINTED*/
4095084Sjohnlev 		ehp = (struct ether_header *)mp->b_rptr;
4105084Sjohnlev 		if (ntohs(ehp->ether_type) == VLAN_TPID) {
4115084Sjohnlev 			struct ether_vlan_header *evhp;
4125084Sjohnlev 
4135084Sjohnlev 			ASSERT(MBLKL(mp) >=
4145084Sjohnlev 			    sizeof (struct ether_vlan_header));
4155084Sjohnlev 			/*LINTED*/
4165084Sjohnlev 			evhp = (struct ether_vlan_header *)mp->b_rptr;
4175084Sjohnlev 			sap = ntohs(evhp->ether_type);
4185084Sjohnlev 			offset = sizeof (struct ether_vlan_header);
4195084Sjohnlev 		} else {
4205084Sjohnlev 			sap = ntohs(ehp->ether_type);
4215084Sjohnlev 			offset = sizeof (struct ether_header);
4225084Sjohnlev 		}
4235084Sjohnlev 
4245084Sjohnlev 		if (MBLKL(mp) <= offset) {
4255084Sjohnlev 			offset -= MBLKL(mp);
4265084Sjohnlev 			if (mp->b_cont == NULL) {
4275084Sjohnlev 				/* corrupted packet, skip it */
4285084Sjohnlev 				if (prev != NULL)
4295084Sjohnlev 					prev->b_next = mp->b_next;
4305084Sjohnlev 				else
4315084Sjohnlev 					new_chain = mp->b_next;
4325084Sjohnlev 				mp1 = mp->b_next;
4335084Sjohnlev 				mp->b_next = NULL;
4345084Sjohnlev 				freemsg(mp);
4355084Sjohnlev 				mp = mp1;
4365084Sjohnlev 				continue;
4375084Sjohnlev 			}
4385084Sjohnlev 			mp = mp->b_cont;
4395084Sjohnlev 		}
4405084Sjohnlev 
4415084Sjohnlev 		if (flags & (HCK_FULLCKSUM | HCK_IPV4_HDRCKSUM)) {
4425084Sjohnlev 			ipha_t *ipha = NULL;
4435084Sjohnlev 
4445084Sjohnlev 			/*
4455084Sjohnlev 			 * In order to compute the full and header
4465084Sjohnlev 			 * checksums, we need to find and parse
4475084Sjohnlev 			 * the IP and/or ULP headers.
4485084Sjohnlev 			 */
4495084Sjohnlev 
4505084Sjohnlev 			sap = (sap < ETHERTYPE_802_MIN) ? 0 : sap;
4515084Sjohnlev 
4525084Sjohnlev 			/*
4535084Sjohnlev 			 * IP header.
4545084Sjohnlev 			 */
4555084Sjohnlev 			if (sap != ETHERTYPE_IP)
4565084Sjohnlev 				continue;
4575084Sjohnlev 
4585084Sjohnlev 			ASSERT(MBLKL(mp) >= offset + sizeof (ipha_t));
4595084Sjohnlev 			/*LINTED*/
4605084Sjohnlev 			ipha = (ipha_t *)(mp->b_rptr + offset);
4615084Sjohnlev 
4625084Sjohnlev 			if (flags & HCK_FULLCKSUM) {
4635084Sjohnlev 				ipaddr_t src, dst;
4645084Sjohnlev 				uint32_t cksum;
4655084Sjohnlev 				uint16_t *up;
4665084Sjohnlev 				uint8_t proto;
4675084Sjohnlev 
4685084Sjohnlev 				/*
4695084Sjohnlev 				 * Pointer to checksum field in ULP header.
4705084Sjohnlev 				 */
4715084Sjohnlev 				proto = ipha->ipha_protocol;
4725084Sjohnlev 				ASSERT(ipha->ipha_version_and_hdr_length ==
4735084Sjohnlev 				    IP_SIMPLE_HDR_VERSION);
4745084Sjohnlev 				if (proto == IPPROTO_TCP) {
4755084Sjohnlev 					/*LINTED*/
4765084Sjohnlev 					up = IPH_TCPH_CHECKSUMP(ipha,
4775084Sjohnlev 					    IP_SIMPLE_HDR_LENGTH);
4785084Sjohnlev 				} else {
4795084Sjohnlev 					ASSERT(proto == IPPROTO_UDP);
4805084Sjohnlev 					/*LINTED*/
4815084Sjohnlev 					up = IPH_UDPH_CHECKSUMP(ipha,
4825084Sjohnlev 					    IP_SIMPLE_HDR_LENGTH);
4835084Sjohnlev 				}
4845084Sjohnlev 
4855084Sjohnlev 				/*
4865084Sjohnlev 				 * Pseudo-header checksum.
4875084Sjohnlev 				 */
4885084Sjohnlev 				src = ipha->ipha_src;
4895084Sjohnlev 				dst = ipha->ipha_dst;
4905084Sjohnlev 				len = ntohs(ipha->ipha_length) -
4915084Sjohnlev 				    IP_SIMPLE_HDR_LENGTH;
4925084Sjohnlev 
4935084Sjohnlev 				cksum = (dst >> 16) + (dst & 0xFFFF) +
4945084Sjohnlev 				    (src >> 16) + (src & 0xFFFF);
4955084Sjohnlev 				cksum += htons(len);
4965084Sjohnlev 
4975084Sjohnlev 				/*
4985084Sjohnlev 				 * The checksum value stored in the packet needs
4995084Sjohnlev 				 * to be correct. Compute it here.
5005084Sjohnlev 				 */
5015084Sjohnlev 				*up = 0;
5025084Sjohnlev 				cksum += (((proto) == IPPROTO_UDP) ?
5035084Sjohnlev 				    IP_UDP_CSUM_COMP : IP_TCP_CSUM_COMP);
5045084Sjohnlev 				cksum = IP_CSUM(mp, IP_SIMPLE_HDR_LENGTH +
5055084Sjohnlev 				    offset, cksum);
5065084Sjohnlev 				*(up) = (uint16_t)(cksum ? cksum : ~cksum);
5075084Sjohnlev 
5085084Sjohnlev 				flags |= HCK_FULLCKSUM_OK;
5095084Sjohnlev 				value = 0xffff;
5105084Sjohnlev 			}
5115084Sjohnlev 
5125084Sjohnlev 			if (flags & HCK_IPV4_HDRCKSUM) {
5135084Sjohnlev 				ASSERT(ipha != NULL);
5145084Sjohnlev 				ipha->ipha_hdr_checksum =
5155084Sjohnlev 				    (uint16_t)ip_csum_hdr(ipha);
5165084Sjohnlev 			}
5175084Sjohnlev 		}
5185084Sjohnlev 
5195084Sjohnlev 		if (flags & HCK_PARTIALCKSUM) {
5205084Sjohnlev 			uint16_t *up, partial, cksum;
5215084Sjohnlev 			uchar_t *ipp; /* ptr to beginning of IP header */
5225084Sjohnlev 
5235084Sjohnlev 			if (mp->b_cont != NULL) {
5245084Sjohnlev 				mblk_t *mp1;
5255084Sjohnlev 
5265084Sjohnlev 				mp1 = msgpullup(mp, offset + end);
5275084Sjohnlev 				if (mp1 == NULL)
5285084Sjohnlev 					continue;
5295084Sjohnlev 				mp1->b_next = mp->b_next;
5305084Sjohnlev 				mp->b_next = NULL;
5315084Sjohnlev 				freemsg(mp);
5325084Sjohnlev 				if (prev != NULL)
5335084Sjohnlev 					prev->b_next = mp1;
5345084Sjohnlev 				else
5355084Sjohnlev 					new_chain = mp1;
5365084Sjohnlev 				mp = mp1;
5375084Sjohnlev 			}
5385084Sjohnlev 
5395084Sjohnlev 			ipp = mp->b_rptr + offset;
5405084Sjohnlev 			/*LINTED*/
5415084Sjohnlev 			up = (uint16_t *)((uchar_t *)ipp + stuff);
5425084Sjohnlev 			partial = *up;
5435084Sjohnlev 			*up = 0;
5445084Sjohnlev 
5455084Sjohnlev 			cksum = IP_BCSUM_PARTIAL(mp->b_rptr + offset + start,
5465084Sjohnlev 			    end - start, partial);
5475084Sjohnlev 			cksum = ~cksum;
5485084Sjohnlev 			*up = cksum ? cksum : ~cksum;
5495084Sjohnlev 
5505084Sjohnlev 			/*
5515084Sjohnlev 			 * Since we already computed the whole checksum,
5525084Sjohnlev 			 * indicate to the stack that it has already
5535084Sjohnlev 			 * been verified by the hardware.
5545084Sjohnlev 			 */
5555084Sjohnlev 			flags &= ~HCK_PARTIALCKSUM;
5565084Sjohnlev 			flags |= (HCK_FULLCKSUM | HCK_FULLCKSUM_OK);
5575084Sjohnlev 			value = 0xffff;
5585084Sjohnlev 		}
5595084Sjohnlev 
5605084Sjohnlev 		(void) hcksum_assoc(mp, NULL, NULL, start, stuff, end,
5615084Sjohnlev 		    value, flags, KM_NOSLEEP);
5625084Sjohnlev 	}
5635084Sjohnlev 
5645084Sjohnlev 	return (new_chain);
5655084Sjohnlev }
5665084Sjohnlev 
5675084Sjohnlev static void
5685084Sjohnlev vnic_mac_close(vnic_mac_t *vnic_mac)
5695084Sjohnlev {
5705084Sjohnlev 	mutex_enter(&vnic_mac_lock);
5715084Sjohnlev 	VNIC_MAC_REFRELE(vnic_mac);
5725084Sjohnlev 	mutex_exit(&vnic_mac_lock);
5735084Sjohnlev }
5745084Sjohnlev 
5755084Sjohnlev static void
5765084Sjohnlev vnic_mac_free(vnic_mac_t *vnic_mac)
5775084Sjohnlev {
5785084Sjohnlev 	mod_hash_val_t val;
5795084Sjohnlev 
5805084Sjohnlev 	ASSERT(MUTEX_HELD(&vnic_mac_lock));
5815084Sjohnlev 	vnic_fini_active_rx(vnic_mac);
5825084Sjohnlev 	mac_notify_remove(vnic_mac->va_mh, vnic_mac->va_notify_hdl);
5835084Sjohnlev 	if (vnic_mac->va_mac_set) {
5845084Sjohnlev 		vnic_mac->va_mac_set = B_FALSE;
5855084Sjohnlev 		mac_vnic_clear(vnic_mac->va_mh);
5865084Sjohnlev 	}
5875084Sjohnlev 	vnic_classifier_flow_tab_fini(vnic_mac);
5885084Sjohnlev 	mac_close(vnic_mac->va_mh);
5895084Sjohnlev 
5905084Sjohnlev 	(void) mod_hash_remove(vnic_mac_hash,
591*5895Syz147064 	    (mod_hash_key_t)(uintptr_t)vnic_mac->va_linkid, &val);
5925084Sjohnlev 	ASSERT(vnic_mac == (vnic_mac_t *)val);
5935084Sjohnlev 
5945084Sjohnlev 	kmem_cache_free(vnic_mac_cache, vnic_mac);
5955084Sjohnlev }
5965084Sjohnlev 
5975084Sjohnlev /*
5985084Sjohnlev  * Initial VNIC receive routine. Invoked for packets that are steered
5995084Sjohnlev  * to a VNIC but the VNIC has not been started yet.
6005084Sjohnlev  */
6015084Sjohnlev /* ARGSUSED */
6025084Sjohnlev static void
6035084Sjohnlev vnic_rx_initial(void *arg1, void *arg2, mblk_t *mp_chain)
6045084Sjohnlev {
6055084Sjohnlev 	vnic_t *vnic = arg1;
6065084Sjohnlev 	mblk_t *mp;
6075084Sjohnlev 
6085084Sjohnlev 	/* update stats */
6095084Sjohnlev 	for (mp = mp_chain; mp != NULL; mp = mp->b_next)
6105084Sjohnlev 		vnic->vn_stat_ierrors++;
6115084Sjohnlev 	freemsgchain(mp_chain);
6125084Sjohnlev }
6135084Sjohnlev 
6145084Sjohnlev /*
6155084Sjohnlev  * VNIC receive routine invoked after the classifier for the VNIC
6165084Sjohnlev  * has been initialized and the VNIC has been started.
6175084Sjohnlev  */
6185084Sjohnlev /* ARGSUSED */
6195084Sjohnlev void
6205084Sjohnlev vnic_rx(void *arg1, void *arg2, mblk_t *mp_chain)
6215084Sjohnlev {
6225084Sjohnlev 	vnic_t *vnic = arg1;
6235084Sjohnlev 	mblk_t *mp;
6245084Sjohnlev 
6255084Sjohnlev 	/* update stats */
6265084Sjohnlev 	for (mp = mp_chain; mp != NULL; mp = mp->b_next) {
6275084Sjohnlev 		vnic->vn_stat_ipackets++;
6285084Sjohnlev 		vnic->vn_stat_rbytes += msgdsize(mp);
6295084Sjohnlev 	}
6305084Sjohnlev 
6315084Sjohnlev 	/* pass packet up */
6325084Sjohnlev 	mac_rx(vnic->vn_mh, NULL, mp_chain);
6335084Sjohnlev }
6345084Sjohnlev 
6355084Sjohnlev /*
6365084Sjohnlev  * Routine to create a MAC-based VNIC. Adds the passed MAC address
6375084Sjohnlev  * to an unused slot in the NIC if one is available. Otherwise it
6385084Sjohnlev  * sets the NIC in promiscuous mode and assigns the MAC address to
6395084Sjohnlev  * a Rx ring if available or a soft ring.
6405084Sjohnlev  */
6415084Sjohnlev static int
6425084Sjohnlev vnic_add_unicstaddr(vnic_t *vnic, mac_multi_addr_t *maddr)
6435084Sjohnlev {
6445084Sjohnlev 	vnic_mac_t *vnic_mac = vnic->vn_vnic_mac;
6455084Sjohnlev 	int err;
6465084Sjohnlev 
6475638Sdme 	if (mac_unicst_verify(vnic_mac->va_mh, maddr->mma_addr,
6485638Sdme 	    maddr->mma_addrlen) == B_FALSE)
6495638Sdme 		return (EINVAL);
6505638Sdme 
6515084Sjohnlev 	if (mac_vnic_capab_get(vnic_mac->va_mh, MAC_CAPAB_MULTIADDRESS,
6525084Sjohnlev 	    &(vnic->vn_mma_capab))) {
6535084Sjohnlev 		if (vnic->vn_maddr_naddrfree == 0) {
6545084Sjohnlev 			/*
6555084Sjohnlev 			 * No free address slots available.
6565084Sjohnlev 			 * Enable promiscuous mode.
6575084Sjohnlev 			 */
6585084Sjohnlev 			goto set_promisc;
6595084Sjohnlev 		}
6605084Sjohnlev 
6615084Sjohnlev 		err = vnic->vn_maddr_add(vnic->vn_maddr_handle, maddr);
6625084Sjohnlev 		if (err != 0) {
6635084Sjohnlev 			if (err == ENOSPC) {
6645084Sjohnlev 				/*
6655084Sjohnlev 				 * There was a race to add addresses
6665084Sjohnlev 				 * with other multiple address consumers,
6675084Sjohnlev 				 * and we lost out. Use promisc mode.
6685084Sjohnlev 				 */
6695084Sjohnlev 				goto set_promisc;
6705084Sjohnlev 			}
6715084Sjohnlev 
6725084Sjohnlev 			return (err);
6735084Sjohnlev 		}
6745084Sjohnlev 
6755084Sjohnlev 		vnic->vn_slot_id = maddr->mma_slot;
6765084Sjohnlev 		vnic->vn_multi_mac = B_TRUE;
6775084Sjohnlev 	} else {
6785084Sjohnlev 		/*
6795084Sjohnlev 		 * Either multiple MAC address support is not
6805084Sjohnlev 		 * available or all available addresses have
6815084Sjohnlev 		 * been used up.
6825084Sjohnlev 		 */
6835084Sjohnlev 	set_promisc:
684*5895Syz147064 		if ((err = mac_promisc_set(vnic_mac->va_mh, B_TRUE,
685*5895Syz147064 		    MAC_DEVPROMISC)) != 0) {
6865084Sjohnlev 			return (err);
6875084Sjohnlev 		}
6885084Sjohnlev 
6895084Sjohnlev 		vnic->vn_promisc_mac = B_TRUE;
6905084Sjohnlev 	}
6915084Sjohnlev 	return (err);
6925084Sjohnlev }
6935084Sjohnlev 
6945084Sjohnlev /*
6955084Sjohnlev  * VNIC is getting deleted. Remove the MAC address from the slot.
6965084Sjohnlev  * If promiscuous mode was being used, then unset the promiscuous mode.
6975084Sjohnlev  */
6985084Sjohnlev static int
6995084Sjohnlev vnic_remove_unicstaddr(vnic_t *vnic)
7005084Sjohnlev {
7015084Sjohnlev 	vnic_mac_t *vnic_mac = vnic->vn_vnic_mac;
7025084Sjohnlev 	int err;
7035084Sjohnlev 
7045084Sjohnlev 	if (vnic->vn_multi_mac) {
7055084Sjohnlev 		ASSERT(vnic->vn_promisc_mac == B_FALSE);
7065084Sjohnlev 		err = vnic->vn_maddr_remove(vnic->vn_maddr_handle,
7075084Sjohnlev 		    vnic->vn_slot_id);
7085084Sjohnlev 		vnic->vn_multi_mac = B_FALSE;
7095084Sjohnlev 	}
7105084Sjohnlev 
7115084Sjohnlev 	if (vnic->vn_promisc_mac) {
7125084Sjohnlev 		ASSERT(vnic->vn_multi_mac == B_FALSE);
7135084Sjohnlev 		err = mac_promisc_set(vnic_mac->va_mh, B_FALSE, MAC_DEVPROMISC);
7145084Sjohnlev 		vnic->vn_promisc_mac = B_FALSE;
7155084Sjohnlev 	}
7165084Sjohnlev 
7175084Sjohnlev 	return (err);
7185084Sjohnlev }
7195084Sjohnlev 
7205084Sjohnlev /*
7215084Sjohnlev  * Create a new VNIC upon request from administrator.
7225084Sjohnlev  * Returns 0 on success, an errno on failure.
7235084Sjohnlev  */
7245084Sjohnlev int
725*5895Syz147064 vnic_dev_create(datalink_id_t vnic_id, datalink_id_t linkid, int mac_len,
726*5895Syz147064     uchar_t *mac_addr)
7275084Sjohnlev {
7285084Sjohnlev 	vnic_t *vnic = NULL;
7295084Sjohnlev 	mac_register_t *mac;
7305084Sjohnlev 	int err;
7315084Sjohnlev 	vnic_mac_t *vnic_mac;
7325084Sjohnlev 	const mac_info_t *lower_mac_info;
7335084Sjohnlev 	mac_multi_addr_t maddr;
7345084Sjohnlev 	mac_txinfo_t tx_info;
7355084Sjohnlev 
7365084Sjohnlev 	if (mac_len != ETHERADDRL) {
7375084Sjohnlev 		/* currently only ethernet NICs are supported */
7385084Sjohnlev 		return (EINVAL);
7395084Sjohnlev 	}
7405084Sjohnlev 
7415084Sjohnlev 	rw_enter(&vnic_lock, RW_WRITER);
7425084Sjohnlev 
7435084Sjohnlev 	/* does a VNIC with the same id already exist? */
7445084Sjohnlev 	err = mod_hash_find(vnic_hash, VNIC_HASH_KEY(vnic_id),
7455084Sjohnlev 	    (mod_hash_val_t *)&vnic);
7465084Sjohnlev 	if (err == 0) {
7475084Sjohnlev 		rw_exit(&vnic_lock);
7485084Sjohnlev 		return (EEXIST);
7495084Sjohnlev 	}
7505084Sjohnlev 
7515084Sjohnlev 	vnic = kmem_cache_alloc(vnic_cache, KM_NOSLEEP);
7525084Sjohnlev 	if (vnic == NULL) {
7535084Sjohnlev 		rw_exit(&vnic_lock);
7545084Sjohnlev 		return (ENOMEM);
7555084Sjohnlev 	}
7565084Sjohnlev 
7575084Sjohnlev 	/* open underlying MAC */
758*5895Syz147064 	err = vnic_mac_open(linkid, &vnic_mac);
7595084Sjohnlev 	if (err != 0) {
7605084Sjohnlev 		kmem_cache_free(vnic_cache, vnic);
7615084Sjohnlev 		rw_exit(&vnic_lock);
7625084Sjohnlev 		return (err);
7635084Sjohnlev 	}
7645084Sjohnlev 
7655084Sjohnlev 	bzero(vnic, sizeof (*vnic));
7665084Sjohnlev 	vnic->vn_id = vnic_id;
7675084Sjohnlev 	vnic->vn_vnic_mac = vnic_mac;
7685084Sjohnlev 
7695084Sjohnlev 	vnic->vn_started = B_FALSE;
7705084Sjohnlev 	vnic->vn_promisc = B_FALSE;
7715084Sjohnlev 	vnic->vn_multi_mac = B_FALSE;
7725084Sjohnlev 	vnic->vn_bcast_grp = B_FALSE;
7735084Sjohnlev 
7745084Sjohnlev 	/* set the VNIC MAC address */
7755084Sjohnlev 	maddr.mma_addrlen = mac_len;
7765084Sjohnlev 	maddr.mma_slot = 0;
7775084Sjohnlev 	maddr.mma_flags = 0;
7785084Sjohnlev 	bcopy(mac_addr, maddr.mma_addr, mac_len);
7795084Sjohnlev 	if ((err = vnic_add_unicstaddr(vnic, &maddr)) != 0)
7805084Sjohnlev 		goto bail;
7815084Sjohnlev 	bcopy(mac_addr, vnic->vn_addr, mac_len);
7825084Sjohnlev 
7835084Sjohnlev 	/* set the initial VNIC capabilities */
7845084Sjohnlev 	if (!mac_vnic_capab_get(vnic_mac->va_mh, MAC_CAPAB_HCKSUM,
7855084Sjohnlev 	    &vnic->vn_hcksum_txflags))
7865084Sjohnlev 		vnic->vn_hcksum_txflags = 0;
7875084Sjohnlev 
7885084Sjohnlev 	/* register with the MAC module */
7895084Sjohnlev 	if ((mac = mac_alloc(MAC_VERSION)) == NULL)
7905084Sjohnlev 		goto bail;
7915084Sjohnlev 
7925084Sjohnlev 	mac->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
7935084Sjohnlev 	mac->m_driver = vnic;
7945084Sjohnlev 	mac->m_dip = vnic_get_dip();
795*5895Syz147064 	mac->m_instance = (uint_t)-1;
7965084Sjohnlev 	mac->m_src_addr = vnic->vn_addr;
7975084Sjohnlev 	mac->m_callbacks = &vnic_m_callbacks;
7985084Sjohnlev 
7995084Sjohnlev 	lower_mac_info = mac_info(vnic_mac->va_mh);
8005084Sjohnlev 	mac->m_min_sdu = lower_mac_info->mi_sdu_min;
8015084Sjohnlev 	mac->m_max_sdu = lower_mac_info->mi_sdu_max;
8025084Sjohnlev 
803*5895Syz147064 	/*
804*5895Syz147064 	 * As the current margin size of the underlying mac is used to
805*5895Syz147064 	 * determine the margin size of the VNIC itself, request the
806*5895Syz147064 	 * underlying mac not to change to a smaller margin size.
807*5895Syz147064 	 */
808*5895Syz147064 	err = mac_margin_add(vnic_mac->va_mh, &(vnic->vn_margin), B_TRUE);
809*5895Syz147064 	if (err != 0)
810*5895Syz147064 		goto bail;
811*5895Syz147064 	mac->m_margin = vnic->vn_margin;
8125084Sjohnlev 	err = mac_register(mac, &vnic->vn_mh);
8135084Sjohnlev 	mac_free(mac);
814*5895Syz147064 	if (err != 0) {
815*5895Syz147064 		VERIFY(mac_margin_remove(vnic_mac->va_mh,
816*5895Syz147064 		    vnic->vn_margin) == 0);
8175084Sjohnlev 		goto bail;
818*5895Syz147064 	}
819*5895Syz147064 
820*5895Syz147064 	if ((err = dls_devnet_create(vnic->vn_mh, vnic->vn_id)) != 0) {
821*5895Syz147064 		VERIFY(mac_margin_remove(vnic_mac->va_mh,
822*5895Syz147064 		    vnic->vn_margin) == 0);
823*5895Syz147064 		(void) mac_unregister(vnic->vn_mh);
824*5895Syz147064 		goto bail;
825*5895Syz147064 	}
8265084Sjohnlev 
8275084Sjohnlev 	/* add new VNIC to hash table */
8285084Sjohnlev 	err = mod_hash_insert(vnic_hash, VNIC_HASH_KEY(vnic_id),
8295084Sjohnlev 	    (mod_hash_val_t)vnic);
8305084Sjohnlev 	ASSERT(err == 0);
8315084Sjohnlev 	vnic_count++;
8325084Sjohnlev 
8335084Sjohnlev 	rw_exit(&vnic_lock);
8345084Sjohnlev 
8355084Sjohnlev 	/* Create a flow, initialized with the MAC address of the VNIC */
8365084Sjohnlev 	if ((vnic->vn_flow_ent = vnic_classifier_flow_create(mac_len, mac_addr,
8375084Sjohnlev 	    NULL, B_FALSE, KM_SLEEP)) == NULL) {
8385084Sjohnlev 		(void) vnic_dev_delete(vnic_id);
8395084Sjohnlev 		vnic = NULL;
8405084Sjohnlev 		err = ENOMEM;
8415084Sjohnlev 		goto bail_unlocked;
8425084Sjohnlev 	}
8435084Sjohnlev 
8445084Sjohnlev 	vnic_classifier_flow_add(vnic_mac, vnic->vn_flow_ent, vnic_rx_initial,
8455084Sjohnlev 	    vnic, vnic);
8465084Sjohnlev 
8475084Sjohnlev 	/* setup VNIC to receive broadcast packets */
8485084Sjohnlev 	err = vnic_bcast_add(vnic, vnic_brdcst_mac, MAC_ADDRTYPE_BROADCAST);
8495084Sjohnlev 	if (err != 0) {
8505084Sjohnlev 		(void) vnic_dev_delete(vnic_id);
8515084Sjohnlev 		vnic = NULL;
8525084Sjohnlev 		goto bail_unlocked;
8535084Sjohnlev 	}
8545084Sjohnlev 	vnic->vn_bcast_grp = B_TRUE;
8555084Sjohnlev 
8565084Sjohnlev 	mutex_enter(&vnic_mac_lock);
8575084Sjohnlev 	if (!vnic_mac->va_mac_set) {
8585084Sjohnlev 		/*
8595084Sjohnlev 		 * We want to MAC layer to call the VNIC tx outbound
8605084Sjohnlev 		 * routine, so that local broadcast packets sent by
8615084Sjohnlev 		 * the active interface sharing the underlying NIC (if
8625084Sjohnlev 		 * any), can be broadcast to every VNIC.
8635084Sjohnlev 		 */
8645084Sjohnlev 		tx_info.mt_fn = vnic_active_tx;
8655084Sjohnlev 		tx_info.mt_arg = vnic_mac;
8665084Sjohnlev 		if (!mac_vnic_set(vnic_mac->va_mh, &tx_info,
8675084Sjohnlev 		    vnic_m_capab_get, vnic)) {
8685084Sjohnlev 			mutex_exit(&vnic_mac_lock);
8695084Sjohnlev 			(void) vnic_dev_delete(vnic_id);
8705084Sjohnlev 			vnic = NULL;
8715084Sjohnlev 			err = EBUSY;
8725084Sjohnlev 			goto bail_unlocked;
8735084Sjohnlev 		}
8745084Sjohnlev 		vnic_mac->va_mac_set = B_TRUE;
8755084Sjohnlev 	}
8765084Sjohnlev 	mutex_exit(&vnic_mac_lock);
8775084Sjohnlev 
8785084Sjohnlev 	/* allow passing packets to NIC's active MAC client */
8795084Sjohnlev 	if (!vnic_init_active_rx(vnic_mac)) {
8805084Sjohnlev 		(void) vnic_dev_delete(vnic_id);
8815084Sjohnlev 		vnic = NULL;
8825084Sjohnlev 		err = ENOMEM;
8835084Sjohnlev 		goto bail_unlocked;
8845084Sjohnlev 	}
8855084Sjohnlev 
8865084Sjohnlev 	return (0);
8875084Sjohnlev 
8885084Sjohnlev bail:
8895084Sjohnlev 	(void) vnic_remove_unicstaddr(vnic);
8905084Sjohnlev 	vnic_mac_close(vnic_mac);
8915084Sjohnlev 	rw_exit(&vnic_lock);
8925084Sjohnlev 
8935084Sjohnlev bail_unlocked:
8945084Sjohnlev 	if (vnic != NULL) {
8955084Sjohnlev 		kmem_cache_free(vnic_cache, vnic);
8965084Sjohnlev 	}
8975084Sjohnlev 
8985084Sjohnlev 	return (err);
8995084Sjohnlev }
9005084Sjohnlev 
9015084Sjohnlev /*
9025084Sjohnlev  * Modify the properties of an existing VNIC.
9035084Sjohnlev  */
9045084Sjohnlev /* ARGSUSED */
9055084Sjohnlev int
906*5895Syz147064 vnic_dev_modify(datalink_id_t vnic_id, uint_t modify_mask,
9075084Sjohnlev     vnic_mac_addr_type_t mac_addr_type, uint_t mac_len, uchar_t *mac_addr)
9085084Sjohnlev {
9095084Sjohnlev 	vnic_t *vnic = NULL;
9105084Sjohnlev 	int rv = 0;
9115084Sjohnlev 	boolean_t notify_mac_addr = B_FALSE;
9125084Sjohnlev 
9135084Sjohnlev 	rw_enter(&vnic_lock, RW_WRITER);
9145084Sjohnlev 
9155084Sjohnlev 	if (mod_hash_find(vnic_hash, VNIC_HASH_KEY(vnic_id),
9165084Sjohnlev 	    (mod_hash_val_t *)&vnic) != 0) {
9175084Sjohnlev 		rw_exit(&vnic_lock);
9185084Sjohnlev 		return (ENOENT);
9195084Sjohnlev 	}
9205084Sjohnlev 
9215084Sjohnlev 	if (modify_mask & VNIC_IOC_MODIFY_ADDR) {
9225084Sjohnlev 		rv = vnic_modify_mac_addr(vnic, mac_len, mac_addr);
9235084Sjohnlev 		if (rv == 0)
9245084Sjohnlev 			notify_mac_addr = B_TRUE;
9255084Sjohnlev 	}
9265084Sjohnlev 
9275084Sjohnlev 	rw_exit(&vnic_lock);
9285084Sjohnlev 
9295084Sjohnlev 	if (notify_mac_addr)
9305084Sjohnlev 		mac_unicst_update(vnic->vn_mh, mac_addr);
9315084Sjohnlev 
9325084Sjohnlev 	return (rv);
9335084Sjohnlev }
9345084Sjohnlev 
9355084Sjohnlev int
936*5895Syz147064 vnic_dev_delete(datalink_id_t vnic_id)
9375084Sjohnlev {
9385084Sjohnlev 	vnic_t *vnic = NULL;
9395084Sjohnlev 	mod_hash_val_t val;
9405084Sjohnlev 	vnic_flow_t *flent;
941*5895Syz147064 	datalink_id_t tmpid;
9425084Sjohnlev 	int rc;
9435702Sdme 	vnic_mac_t *vnic_mac;
9445084Sjohnlev 
9455084Sjohnlev 	rw_enter(&vnic_lock, RW_WRITER);
9465084Sjohnlev 
9475084Sjohnlev 	if (mod_hash_find(vnic_hash, VNIC_HASH_KEY(vnic_id),
9485084Sjohnlev 	    (mod_hash_val_t *)&vnic) != 0) {
9495084Sjohnlev 		rw_exit(&vnic_lock);
9505084Sjohnlev 		return (ENOENT);
9515084Sjohnlev 	}
9525084Sjohnlev 
953*5895Syz147064 	if ((rc = dls_devnet_destroy(vnic->vn_mh, &tmpid)) != 0) {
954*5895Syz147064 		rw_exit(&vnic_lock);
955*5895Syz147064 		return (rc);
956*5895Syz147064 	}
957*5895Syz147064 
958*5895Syz147064 	ASSERT(vnic_id == tmpid);
959*5895Syz147064 
9605084Sjohnlev 	/*
9615084Sjohnlev 	 * We cannot unregister the MAC yet. Unregistering would
9625084Sjohnlev 	 * free up mac_impl_t which should not happen at this time.
9635084Sjohnlev 	 * Packets could be entering vnic_rx() through the
9645084Sjohnlev 	 * flow entry and so mac_impl_t cannot be NULL. So disable
9655084Sjohnlev 	 * mac_impl_t by calling mac_disable(). This will prevent any
9665084Sjohnlev 	 * new claims on mac_impl_t.
9675084Sjohnlev 	 */
9685084Sjohnlev 	if (mac_disable(vnic->vn_mh) != 0) {
969*5895Syz147064 		(void) dls_devnet_create(vnic->vn_mh, vnic_id);
9705084Sjohnlev 		rw_exit(&vnic_lock);
9715084Sjohnlev 		return (EBUSY);
9725084Sjohnlev 	}
9735084Sjohnlev 
9745084Sjohnlev 	(void) mod_hash_remove(vnic_hash, VNIC_HASH_KEY(vnic_id), &val);
9755084Sjohnlev 	ASSERT(vnic == (vnic_t *)val);
9765084Sjohnlev 
9775084Sjohnlev 	if (vnic->vn_bcast_grp)
9785084Sjohnlev 		(void) vnic_bcast_delete(vnic, vnic_brdcst_mac);
9795084Sjohnlev 
9805084Sjohnlev 	flent = vnic->vn_flow_ent;
9815084Sjohnlev 	if (flent != NULL) {
9825084Sjohnlev 		/*
9835084Sjohnlev 		 * vnic_classifier_flow_destroy() ensures that the
9845084Sjohnlev 		 * flow is no longer used.
9855084Sjohnlev 		 */
9865084Sjohnlev 		vnic_classifier_flow_remove(vnic->vn_vnic_mac, flent);
9875084Sjohnlev 		vnic_classifier_flow_destroy(flent);
9885084Sjohnlev 	}
9895084Sjohnlev 
990*5895Syz147064 	rc = mac_margin_remove(vnic->vn_vnic_mac->va_mh, vnic->vn_margin);
991*5895Syz147064 	ASSERT(rc == 0);
9925084Sjohnlev 	rc = mac_unregister(vnic->vn_mh);
9935084Sjohnlev 	ASSERT(rc == 0);
9945084Sjohnlev 	(void) vnic_remove_unicstaddr(vnic);
9955702Sdme 	vnic_mac = vnic->vn_vnic_mac;
9965084Sjohnlev 	kmem_cache_free(vnic_cache, vnic);
9975084Sjohnlev 	vnic_count--;
9985084Sjohnlev 	rw_exit(&vnic_lock);
9995702Sdme 	vnic_mac_close(vnic_mac);
10005084Sjohnlev 	return (0);
10015084Sjohnlev }
10025084Sjohnlev 
10035084Sjohnlev /*
10045084Sjohnlev  * For the specified packet chain, return a sub-chain to be sent
10055084Sjohnlev  * and the transmit function to be used to send the packet. Also
10065084Sjohnlev  * return a pointer to the sub-chain of packets that should
10075084Sjohnlev  * be re-classified. If the function returns NULL, the packet
10085084Sjohnlev  * should be sent using the underlying NIC.
10095084Sjohnlev  */
10105084Sjohnlev static vnic_flow_t *
10115084Sjohnlev vnic_classify(vnic_mac_t *vnic_mac, mblk_t *mp, mblk_t **mp_chain_rest)
10125084Sjohnlev {
10135084Sjohnlev 	vnic_flow_t *flow_ent;
10145084Sjohnlev 
10155084Sjohnlev 	/* one packet at a time */
10165084Sjohnlev 	*mp_chain_rest = mp->b_next;
10175084Sjohnlev 	mp->b_next = NULL;
10185084Sjohnlev 
10195084Sjohnlev 	/* do classification on the packet */
10205084Sjohnlev 	flow_ent = vnic_classifier_get_flow(vnic_mac, mp);
10215084Sjohnlev 
10225084Sjohnlev 	return (flow_ent);
10235084Sjohnlev }
10245084Sjohnlev 
10255084Sjohnlev /*
10265084Sjohnlev  * Send a packet chain to a local VNIC or an active MAC client.
10275084Sjohnlev  */
10285084Sjohnlev static void
10295084Sjohnlev vnic_local_tx(vnic_mac_t *vnic_mac, vnic_flow_t *flow_ent, mblk_t *mp_chain)
10305084Sjohnlev {
10315084Sjohnlev 	mblk_t *mp1;
10325084Sjohnlev 	const vnic_flow_fn_info_t *fn_info;
10335084Sjohnlev 	vnic_t *vnic;
10345084Sjohnlev 
10355084Sjohnlev 	if (!vnic_classifier_is_active(flow_ent) &&
10365084Sjohnlev 	    mac_promisc_get(vnic_mac->va_mh, MAC_PROMISC)) {
10375084Sjohnlev 		/*
10385084Sjohnlev 		 * If the MAC is in promiscous mode,
10395084Sjohnlev 		 * send a copy of the active client.
10405084Sjohnlev 		 */
10415084Sjohnlev 		if ((mp1 = vnic_copymsgchain_cksum(mp_chain)) == NULL)
10425084Sjohnlev 			goto sendit;
10435084Sjohnlev 		if ((mp1 = vnic_fix_cksum(mp1)) == NULL)
10445084Sjohnlev 			goto sendit;
10455084Sjohnlev 		mac_active_rx(vnic_mac->va_mh, NULL, mp1);
10465084Sjohnlev 	}
10475084Sjohnlev sendit:
10485084Sjohnlev 	fn_info = vnic_classifier_get_fn_info(flow_ent);
10495084Sjohnlev 	/*
10505084Sjohnlev 	 * If the vnic to which we would deliver this packet is in
10515084Sjohnlev 	 * promiscuous mode then it already received the packet via
10525084Sjohnlev 	 * vnic_promisc_rx().
10535084Sjohnlev 	 *
10545084Sjohnlev 	 * XXX assumes that ff_arg2 is a vnic_t pointer if it is
10555084Sjohnlev 	 * non-NULL (currently always true).
10565084Sjohnlev 	 */
10575084Sjohnlev 	vnic = (vnic_t *)fn_info->ff_arg2;
10585084Sjohnlev 	if ((vnic != NULL) && vnic->vn_promisc)
10595084Sjohnlev 		freemsg(mp_chain);
10605084Sjohnlev 	else if ((mp1 = vnic_fix_cksum(mp_chain)) != NULL)
10615084Sjohnlev 		(fn_info->ff_fn)(fn_info->ff_arg1, fn_info->ff_arg2, mp1);
10625084Sjohnlev }
10635084Sjohnlev 
10645084Sjohnlev /*
10655084Sjohnlev  * This function is invoked when a MAC client needs to send a packet
10665084Sjohnlev  * to a NIC which is shared by VNICs. It is passed to the MAC layer
10675084Sjohnlev  * by a call to mac_vnic_set() when the NIC is opened, and is returned
10685084Sjohnlev  * to MAC clients by mac_tx_get() when VNICs are present.
10695084Sjohnlev  */
10705084Sjohnlev mblk_t *
10715084Sjohnlev vnic_active_tx(void *arg, mblk_t *mp_chain)
10725084Sjohnlev {
10735084Sjohnlev 	vnic_mac_t *vnic_mac = arg;
10745084Sjohnlev 	mblk_t *mp, *extra_mp = NULL;
10755084Sjohnlev 	vnic_flow_t *flow_ent;
10765084Sjohnlev 	void *flow_cookie;
10775084Sjohnlev 	const mac_txinfo_t *mtp = vnic_mac->va_txinfo;
10785084Sjohnlev 
10795084Sjohnlev 	for (mp = mp_chain; mp != NULL; mp = extra_mp) {
10805084Sjohnlev 		mblk_t *next;
10815084Sjohnlev 
10825084Sjohnlev 		next = mp->b_next;
10835084Sjohnlev 		mp->b_next = NULL;
10845084Sjohnlev 
10855084Sjohnlev 		vnic_promisc_rx(vnic_mac, (vnic_t *)-1, mp);
10865084Sjohnlev 
10875084Sjohnlev 		flow_ent = vnic_classify(vnic_mac, mp, &extra_mp);
10885084Sjohnlev 		ASSERT(extra_mp == NULL);
10895084Sjohnlev 		extra_mp = next;
10905084Sjohnlev 
10915084Sjohnlev 		if (flow_ent != NULL) {
10925084Sjohnlev 			flow_cookie = vnic_classifier_get_client_cookie(
10935084Sjohnlev 			    flow_ent);
10945084Sjohnlev 			if (flow_cookie != NULL) {
10955084Sjohnlev 				/*
10965084Sjohnlev 				 * Send a copy to every VNIC defined on the
10975084Sjohnlev 				 * interface, as well as the underlying MAC.
10985084Sjohnlev 				 */
10995084Sjohnlev 				vnic_bcast_send(flow_cookie, (vnic_t *)-1, mp);
11005084Sjohnlev 			} else {
11015084Sjohnlev 				/*
11025084Sjohnlev 				 * loopback the packet to a local VNIC or
11035084Sjohnlev 				 * an active MAC client.
11045084Sjohnlev 				 */
11055084Sjohnlev 				vnic_local_tx(vnic_mac, flow_ent, mp);
11065084Sjohnlev 			}
11075084Sjohnlev 			VNIC_FLOW_REFRELE(flow_ent);
11085084Sjohnlev 			mp_chain = NULL;
11095084Sjohnlev 		} else {
11105084Sjohnlev 			/*
11115084Sjohnlev 			 * Non-VNIC destination, send via the underlying
11125084Sjohnlev 			 * NIC. In order to avoid a recursive call
11135084Sjohnlev 			 * to this function, we ensured that mtp points
11145084Sjohnlev 			 * to the unerlying NIC transmit function
11155084Sjohnlev 			 * by inilizating through mac_vnic_tx_get().
11165084Sjohnlev 			 */
11175084Sjohnlev 			mp_chain = mtp->mt_fn(mtp->mt_arg, mp);
11185084Sjohnlev 			if (mp_chain != NULL)
11195084Sjohnlev 				break;
11205084Sjohnlev 		}
11215084Sjohnlev 	}
11225084Sjohnlev 
11235084Sjohnlev 	if ((mp_chain != NULL) && (extra_mp != NULL)) {
11245084Sjohnlev 		ASSERT(mp_chain->b_next == NULL);
11255084Sjohnlev 		mp_chain->b_next = extra_mp;
11265084Sjohnlev 	}
11275084Sjohnlev 	return (mp_chain);
11285084Sjohnlev }
11295084Sjohnlev 
11305084Sjohnlev /*
11315084Sjohnlev  * VNIC transmit function.
11325084Sjohnlev  */
11335084Sjohnlev mblk_t *
11345084Sjohnlev vnic_m_tx(void *arg, mblk_t *mp_chain)
11355084Sjohnlev {
11365084Sjohnlev 	vnic_t *vnic = arg;
11375084Sjohnlev 	vnic_mac_t *vnic_mac = vnic->vn_vnic_mac;
11385084Sjohnlev 	mblk_t *mp, *extra_mp = NULL;
11395084Sjohnlev 	vnic_flow_t *flow_ent;
11405084Sjohnlev 	void *flow_cookie;
11415084Sjohnlev 
11425084Sjohnlev 	/*
11435084Sjohnlev 	 * Update stats.
11445084Sjohnlev 	 */
11455084Sjohnlev 	for (mp = mp_chain; mp != NULL; mp = mp->b_next) {
11465084Sjohnlev 		vnic->vn_stat_opackets++;
11475084Sjohnlev 		vnic->vn_stat_obytes += msgdsize(mp);
11485084Sjohnlev 	}
11495084Sjohnlev 
11505084Sjohnlev 	for (mp = mp_chain; mp != NULL; mp = extra_mp) {
11515084Sjohnlev 		mblk_t *next;
11525084Sjohnlev 
11535084Sjohnlev 		next = mp->b_next;
11545084Sjohnlev 		mp->b_next = NULL;
11555084Sjohnlev 
11565084Sjohnlev 		vnic_promisc_rx(vnic->vn_vnic_mac, vnic, mp);
11575084Sjohnlev 
11585084Sjohnlev 		flow_ent = vnic_classify(vnic->vn_vnic_mac, mp, &extra_mp);
11595084Sjohnlev 		ASSERT(extra_mp == NULL);
11605084Sjohnlev 		extra_mp = next;
11615084Sjohnlev 
11625084Sjohnlev 		if (flow_ent != NULL) {
11635084Sjohnlev 			flow_cookie = vnic_classifier_get_client_cookie(
11645084Sjohnlev 			    flow_ent);
11655084Sjohnlev 			if (flow_cookie != NULL) {
11665084Sjohnlev 				/*
11675084Sjohnlev 				 * The vnic_bcast_send function expects
11685084Sjohnlev 				 * to receive the sender VNIC as value
11695084Sjohnlev 				 * for arg2.
11705084Sjohnlev 				 */
11715084Sjohnlev 				vnic_bcast_send(flow_cookie, vnic, mp);
11725084Sjohnlev 			} else {
11735084Sjohnlev 				/*
11745084Sjohnlev 				 * loopback the packet to a local VNIC or
11755084Sjohnlev 				 * an active MAC client.
11765084Sjohnlev 				 */
11775084Sjohnlev 				vnic_local_tx(vnic_mac, flow_ent, mp);
11785084Sjohnlev 			}
11795084Sjohnlev 			VNIC_FLOW_REFRELE(flow_ent);
11805084Sjohnlev 			mp_chain = NULL;
11815084Sjohnlev 		} else {
11825084Sjohnlev 			/*
11835084Sjohnlev 			 * Non-local destination, send via the underlying
11845084Sjohnlev 			 * NIC.
11855084Sjohnlev 			 */
11865084Sjohnlev 			const mac_txinfo_t *mtp = vnic->vn_txinfo;
11875084Sjohnlev 			mp_chain = mtp->mt_fn(mtp->mt_arg, mp);
11885084Sjohnlev 			if (mp_chain != NULL)
11895084Sjohnlev 				break;
11905084Sjohnlev 		}
11915084Sjohnlev 	}
11925084Sjohnlev 
11935084Sjohnlev 	/* update stats to account for unsent packets */
11945084Sjohnlev 	for (mp = mp_chain; mp != NULL; mp = mp->b_next) {
11955084Sjohnlev 		vnic->vn_stat_opackets--;
11965084Sjohnlev 		vnic->vn_stat_obytes -= msgdsize(mp);
11975084Sjohnlev 		vnic->vn_stat_oerrors++;
11985084Sjohnlev 		/*
11995084Sjohnlev 		 * link back in the last portion not counted due to bandwidth
12005084Sjohnlev 		 * control.
12015084Sjohnlev 		 */
12025084Sjohnlev 		if (mp->b_next == NULL) {
12035084Sjohnlev 			mp->b_next = extra_mp;
12045084Sjohnlev 			break;
12055084Sjohnlev 		}
12065084Sjohnlev 	}
12075084Sjohnlev 
12085084Sjohnlev 	return (mp_chain);
12095084Sjohnlev }
12105084Sjohnlev 
12115084Sjohnlev /* ARGSUSED */
12125084Sjohnlev static void
12135084Sjohnlev vnic_m_resources(void *arg)
12145084Sjohnlev {
12155084Sjohnlev 	/* no resources to advertise */
12165084Sjohnlev }
12175084Sjohnlev 
12185084Sjohnlev static int
12195084Sjohnlev vnic_m_stat(void *arg, uint_t stat, uint64_t *val)
12205084Sjohnlev {
12215084Sjohnlev 	vnic_t *vnic = arg;
12225084Sjohnlev 	int rval = 0;
12235084Sjohnlev 
12245084Sjohnlev 	rw_enter(&vnic_lock, RW_READER);
12255084Sjohnlev 
12265084Sjohnlev 	switch (stat) {
12275084Sjohnlev 	case ETHER_STAT_LINK_DUPLEX:
12285084Sjohnlev 		*val = mac_stat_get(vnic->vn_vnic_mac->va_mh,
12295084Sjohnlev 		    ETHER_STAT_LINK_DUPLEX);
12305084Sjohnlev 		break;
12315084Sjohnlev 	case MAC_STAT_IFSPEED:
12325084Sjohnlev 		*val = mac_stat_get(vnic->vn_vnic_mac->va_mh,
12335084Sjohnlev 		    MAC_STAT_IFSPEED);
12345084Sjohnlev 		break;
12355084Sjohnlev 	case MAC_STAT_MULTIRCV:
12365084Sjohnlev 		*val = vnic->vn_stat_multircv;
12375084Sjohnlev 		break;
12385084Sjohnlev 	case MAC_STAT_BRDCSTRCV:
12395084Sjohnlev 		*val = vnic->vn_stat_brdcstrcv;
12405084Sjohnlev 		break;
12415084Sjohnlev 	case MAC_STAT_MULTIXMT:
12425084Sjohnlev 		*val = vnic->vn_stat_multixmt;
12435084Sjohnlev 		break;
12445084Sjohnlev 	case MAC_STAT_BRDCSTXMT:
12455084Sjohnlev 		*val = vnic->vn_stat_brdcstxmt;
12465084Sjohnlev 		break;
12475084Sjohnlev 	case MAC_STAT_IERRORS:
12485084Sjohnlev 		*val = vnic->vn_stat_ierrors;
12495084Sjohnlev 		break;
12505084Sjohnlev 	case MAC_STAT_OERRORS:
12515084Sjohnlev 		*val = vnic->vn_stat_oerrors;
12525084Sjohnlev 		break;
12535084Sjohnlev 	case MAC_STAT_RBYTES:
12545084Sjohnlev 		*val = vnic->vn_stat_rbytes;
12555084Sjohnlev 		break;
12565084Sjohnlev 	case MAC_STAT_IPACKETS:
12575084Sjohnlev 		*val = vnic->vn_stat_ipackets;
12585084Sjohnlev 		break;
12595084Sjohnlev 	case MAC_STAT_OBYTES:
12605084Sjohnlev 		*val = vnic->vn_stat_obytes;
12615084Sjohnlev 		break;
12625084Sjohnlev 	case MAC_STAT_OPACKETS:
12635084Sjohnlev 		*val = vnic->vn_stat_opackets;
12645084Sjohnlev 		break;
12655084Sjohnlev 	default:
12665084Sjohnlev 		rval = ENOTSUP;
12675084Sjohnlev 	}
12685084Sjohnlev 
12695084Sjohnlev 	rw_exit(&vnic_lock);
12705084Sjohnlev 	return (rval);
12715084Sjohnlev }
12725084Sjohnlev 
12735084Sjohnlev /*
12745084Sjohnlev  * Return information about the specified capability.
12755084Sjohnlev  */
12765084Sjohnlev /* ARGSUSED */
12775084Sjohnlev static boolean_t
12785084Sjohnlev vnic_m_capab_get(void *arg, mac_capab_t cap, void *cap_data)
12795084Sjohnlev {
12805084Sjohnlev 	vnic_t *vnic = arg;
12815084Sjohnlev 
12825084Sjohnlev 	switch (cap) {
12835084Sjohnlev 	case MAC_CAPAB_POLL:
12845084Sjohnlev 		return (B_TRUE);
12855084Sjohnlev 	case MAC_CAPAB_HCKSUM: {
12865084Sjohnlev 		uint32_t *hcksum_txflags = cap_data;
12875084Sjohnlev 
12885084Sjohnlev 		*hcksum_txflags = vnic->vn_hcksum_txflags &
12895084Sjohnlev 		    (HCKSUM_INET_FULL_V4 | HCKSUM_IPHDRCKSUM |
12905084Sjohnlev 		    HCKSUM_INET_PARTIAL);
12915084Sjohnlev 		break;
12925084Sjohnlev 	}
12935084Sjohnlev 	default:
12945084Sjohnlev 		return (B_FALSE);
12955084Sjohnlev 	}
12965084Sjohnlev 	return (B_TRUE);
12975084Sjohnlev }
12985084Sjohnlev 
12995084Sjohnlev static int
13005084Sjohnlev vnic_m_start(void *arg)
13015084Sjohnlev {
13025084Sjohnlev 	vnic_t *vnic = arg;
13035084Sjohnlev 	mac_handle_t lower_mh = vnic->vn_vnic_mac->va_mh;
13045084Sjohnlev 	int rc;
13055084Sjohnlev 
13065084Sjohnlev 	rc = mac_start(lower_mh);
13075084Sjohnlev 	if (rc != 0)
13085084Sjohnlev 		return (rc);
13095084Sjohnlev 
13105084Sjohnlev 	vnic_classifier_flow_update_fn(vnic->vn_flow_ent, vnic_rx, vnic, vnic);
13115084Sjohnlev 	return (0);
13125084Sjohnlev }
13135084Sjohnlev 
13145084Sjohnlev static void
13155084Sjohnlev vnic_m_stop(void *arg)
13165084Sjohnlev {
13175084Sjohnlev 	vnic_t *vnic = arg;
13185084Sjohnlev 	mac_handle_t lower_mh = vnic->vn_vnic_mac->va_mh;
13195084Sjohnlev 
13205084Sjohnlev 	vnic_classifier_flow_update_fn(vnic->vn_flow_ent, vnic_rx_initial,
13215084Sjohnlev 	    vnic, vnic);
13225084Sjohnlev 	mac_stop(lower_mh);
13235084Sjohnlev }
13245084Sjohnlev 
13255084Sjohnlev /* ARGSUSED */
13265084Sjohnlev static int
13275084Sjohnlev vnic_m_promisc(void *arg, boolean_t on)
13285084Sjohnlev {
13295084Sjohnlev 	vnic_t *vnic = arg;
13305084Sjohnlev 
13315084Sjohnlev 	return (vnic_promisc_set(vnic, on));
13325084Sjohnlev }
13335084Sjohnlev 
13345084Sjohnlev static int
13355084Sjohnlev vnic_m_multicst(void *arg, boolean_t add, const uint8_t *addrp)
13365084Sjohnlev {
13375084Sjohnlev 	vnic_t *vnic = arg;
13385084Sjohnlev 	int rc = 0;
13395084Sjohnlev 
13405084Sjohnlev 	if (add)
13415084Sjohnlev 		rc = vnic_bcast_add(vnic, addrp, MAC_ADDRTYPE_MULTICAST);
13425084Sjohnlev 	else
13435084Sjohnlev 		vnic_bcast_delete(vnic, addrp);
13445084Sjohnlev 
13455084Sjohnlev 	return (rc);
13465084Sjohnlev }
13475084Sjohnlev 
13485084Sjohnlev static int
13495084Sjohnlev vnic_m_unicst(void *arg, const uint8_t *mac_addr)
13505084Sjohnlev {
13515084Sjohnlev 	vnic_t *vnic = arg;
13525084Sjohnlev 	vnic_mac_t *vnic_mac = vnic->vn_vnic_mac;
13535084Sjohnlev 	int rv;
13545084Sjohnlev 
13555084Sjohnlev 	rw_enter(&vnic_lock, RW_WRITER);
13565084Sjohnlev 	rv = vnic_modify_mac_addr(vnic, vnic_mac->va_addr_len,
13575084Sjohnlev 	    (uchar_t *)mac_addr);
13585084Sjohnlev 	rw_exit(&vnic_lock);
13595084Sjohnlev 
13605084Sjohnlev 	if (rv == 0)
13615084Sjohnlev 		mac_unicst_update(vnic->vn_mh, mac_addr);
13625084Sjohnlev 	return (0);
13635084Sjohnlev }
13645084Sjohnlev 
13655084Sjohnlev int
1366*5895Syz147064 vnic_info(uint_t *nvnics, datalink_id_t vnic_id, datalink_id_t linkid,
1367*5895Syz147064     void *fn_arg, vnic_info_new_vnic_fn_t new_vnic_fn)
13685084Sjohnlev {
13695084Sjohnlev 	vnic_info_state_t state;
13705084Sjohnlev 	int rc = 0;
13715084Sjohnlev 
13725084Sjohnlev 	rw_enter(&vnic_lock, RW_READER);
13735084Sjohnlev 
13745084Sjohnlev 	*nvnics = vnic_count;
13755084Sjohnlev 
13765084Sjohnlev 	bzero(&state, sizeof (state));
13775084Sjohnlev 	state.vs_vnic_id = vnic_id;
1378*5895Syz147064 	state.vs_linkid = linkid;
13795084Sjohnlev 	state.vs_new_vnic_fn = new_vnic_fn;
13805084Sjohnlev 	state.vs_fn_arg = fn_arg;
13815084Sjohnlev 
13825084Sjohnlev 	mod_hash_walk(vnic_hash, vnic_info_walker, &state);
13835084Sjohnlev 
1384*5895Syz147064 	if ((rc = state.vs_rc) == 0 && vnic_id != DATALINK_ALL_LINKID &&
13855733Syz147064 	    !state.vs_vnic_found)
13865084Sjohnlev 		rc = ENOENT;
13875084Sjohnlev 
13885084Sjohnlev 	rw_exit(&vnic_lock);
13895084Sjohnlev 	return (rc);
13905084Sjohnlev }
13915084Sjohnlev 
13925084Sjohnlev /*
13935084Sjohnlev  * Walker invoked when building a list of vnics that must be passed
13945084Sjohnlev  * up to user space.
13955084Sjohnlev  */
13965084Sjohnlev /*ARGSUSED*/
13975084Sjohnlev static uint_t
13985084Sjohnlev vnic_info_walker(mod_hash_key_t key, mod_hash_val_t *val, void *arg)
13995084Sjohnlev {
14005084Sjohnlev 	vnic_t *vnic;
14015084Sjohnlev 	vnic_info_state_t *state = arg;
14025084Sjohnlev 
14035084Sjohnlev 	if (state->vs_rc != 0)
14045084Sjohnlev 		return (MH_WALK_TERMINATE);	/* terminate walk */
14055084Sjohnlev 
14065084Sjohnlev 	vnic = (vnic_t *)val;
14075084Sjohnlev 
1408*5895Syz147064 	if (state->vs_vnic_id != DATALINK_ALL_LINKID &&
1409*5895Syz147064 	    vnic->vn_id != state->vs_vnic_id) {
14105084Sjohnlev 		goto bail;
1411*5895Syz147064 	}
14125084Sjohnlev 
14135084Sjohnlev 	state->vs_vnic_found = B_TRUE;
14145084Sjohnlev 
14155084Sjohnlev 	state->vs_rc = state->vs_new_vnic_fn(state->vs_fn_arg,
14165084Sjohnlev 	    vnic->vn_id, vnic->vn_addr_type, vnic->vn_vnic_mac->va_addr_len,
1417*5895Syz147064 	    vnic->vn_addr, vnic->vn_vnic_mac->va_linkid);
14185084Sjohnlev bail:
14195084Sjohnlev 	return ((state->vs_rc == 0) ? MH_WALK_CONTINUE : MH_WALK_TERMINATE);
14205084Sjohnlev }
14215084Sjohnlev 
14225084Sjohnlev /*
14235084Sjohnlev  * vnic_notify_cb() and vnic_notify_walker() below are used to
14245084Sjohnlev  * process events received from an underlying NIC and, if needed,
14255084Sjohnlev  * forward these events to the VNICs defined on top of that NIC.
14265084Sjohnlev  */
14275084Sjohnlev 
14285084Sjohnlev typedef struct vnic_notify_state {
14295084Sjohnlev 	mac_notify_type_t	vo_type;
14305084Sjohnlev 	vnic_mac_t		*vo_vnic_mac;
14315084Sjohnlev } vnic_notify_state_t;
14325084Sjohnlev 
14335084Sjohnlev /* ARGSUSED */
14345084Sjohnlev static uint_t
14355084Sjohnlev vnic_notify_walker(mod_hash_key_t key, mod_hash_val_t *val, void *arg)
14365084Sjohnlev {
14375084Sjohnlev 	vnic_t *vnic = (vnic_t *)val;
14385084Sjohnlev 	vnic_notify_state_t *state = arg;
14395084Sjohnlev 
14405084Sjohnlev 	/* ignore VNICs that don't use the specified underlying MAC */
14415084Sjohnlev 	if (vnic->vn_vnic_mac != state->vo_vnic_mac)
14425084Sjohnlev 		return (MH_WALK_CONTINUE);
14435084Sjohnlev 
14445084Sjohnlev 	switch (state->vo_type) {
14455084Sjohnlev 	case MAC_NOTE_TX:
14465084Sjohnlev 		mac_tx_update(vnic->vn_mh);
14475084Sjohnlev 		break;
14485084Sjohnlev 	case MAC_NOTE_LINK:
14495084Sjohnlev 		/*
14505084Sjohnlev 		 * The VNIC link state must be up regardless of
14515084Sjohnlev 		 * the link state of the underlying NIC to maintain
14525084Sjohnlev 		 * connectivity between VNICs on the same host.
14535084Sjohnlev 		 */
14545084Sjohnlev 		mac_link_update(vnic->vn_mh, LINK_STATE_UP);
14555084Sjohnlev 		break;
14565084Sjohnlev 	case MAC_NOTE_UNICST:
14575084Sjohnlev 		vnic_update_active_rx(vnic->vn_vnic_mac);
14585084Sjohnlev 		break;
14595084Sjohnlev 	case MAC_NOTE_VNIC:
14605084Sjohnlev 		/* only for clients which share a NIC with a VNIC */
14615084Sjohnlev 		break;
14625084Sjohnlev 	case MAC_NOTE_PROMISC:
14635084Sjohnlev 		mutex_enter(&vnic_mac_lock);
14645084Sjohnlev 		vnic->vn_vnic_mac->va_txinfo = mac_vnic_tx_get(
14655084Sjohnlev 		    vnic->vn_vnic_mac->va_mh);
14665084Sjohnlev 		mutex_exit(&vnic_mac_lock);
14675084Sjohnlev 		break;
14685084Sjohnlev 	}
14695084Sjohnlev 
14705084Sjohnlev 	return (MH_WALK_CONTINUE);
14715084Sjohnlev }
14725084Sjohnlev 
14735084Sjohnlev static void
14745084Sjohnlev vnic_notify_cb(void *arg, mac_notify_type_t type)
14755084Sjohnlev {
14765084Sjohnlev 	vnic_mac_t *vnic = arg;
14775084Sjohnlev 	vnic_notify_state_t state;
14785084Sjohnlev 
14795084Sjohnlev 	state.vo_type = type;
14805084Sjohnlev 	state.vo_vnic_mac = vnic;
14815084Sjohnlev 
14825084Sjohnlev 	rw_enter(&vnic_lock, RW_READER);
14835084Sjohnlev 	mod_hash_walk(vnic_hash, vnic_notify_walker, &state);
14845084Sjohnlev 	rw_exit(&vnic_lock);
14855084Sjohnlev }
14865084Sjohnlev 
14875084Sjohnlev static int
14885084Sjohnlev vnic_modify_mac_addr(vnic_t *vnic, uint_t mac_len, uchar_t *mac_addr)
14895084Sjohnlev {
14905084Sjohnlev 	vnic_mac_t *vnic_mac = vnic->vn_vnic_mac;
14915084Sjohnlev 	vnic_flow_t *vnic_flow = vnic->vn_flow_ent;
14925084Sjohnlev 
14935084Sjohnlev 	ASSERT(RW_WRITE_HELD(&vnic_lock));
14945084Sjohnlev 
14955084Sjohnlev 	if (mac_len != vnic_mac->va_addr_len)
14965084Sjohnlev 		return (EINVAL);
14975084Sjohnlev 
14985084Sjohnlev 	vnic_classifier_flow_update_addr(vnic_flow, mac_addr);
14995084Sjohnlev 	return (0);
15005084Sjohnlev }
15015084Sjohnlev 
15025084Sjohnlev static int
15035084Sjohnlev vnic_promisc_set(vnic_t *vnic, boolean_t on)
15045084Sjohnlev {
15055084Sjohnlev 	vnic_mac_t *vnic_mac = vnic->vn_vnic_mac;
15065084Sjohnlev 	int r = -1;
15075084Sjohnlev 
15085084Sjohnlev 	if (vnic->vn_promisc == on)
15095084Sjohnlev 		return (0);
15105084Sjohnlev 
15115084Sjohnlev 	if (on) {
1512*5895Syz147064 		if ((r = mac_promisc_set(vnic_mac->va_mh, B_TRUE,
1513*5895Syz147064 		    MAC_DEVPROMISC)) != 0) {
15145084Sjohnlev 			return (r);
1515*5895Syz147064 		}
15165084Sjohnlev 
15175084Sjohnlev 		rw_enter(&vnic_mac->va_promisc_lock, RW_WRITER);
15185084Sjohnlev 		vnic->vn_promisc_next = vnic_mac->va_promisc;
15195084Sjohnlev 		vnic_mac->va_promisc = vnic;
15205084Sjohnlev 		vnic_mac->va_promisc_gen++;
15215084Sjohnlev 
15225084Sjohnlev 		vnic->vn_promisc = B_TRUE;
15235084Sjohnlev 		rw_exit(&vnic_mac->va_promisc_lock);
15245084Sjohnlev 
15255084Sjohnlev 		return (0);
15265084Sjohnlev 	} else {
15275084Sjohnlev 		vnic_t *loop, *prev = NULL;
15285084Sjohnlev 
15295084Sjohnlev 		rw_enter(&vnic_mac->va_promisc_lock, RW_WRITER);
15305084Sjohnlev 		loop = vnic_mac->va_promisc;
15315084Sjohnlev 
15325084Sjohnlev 		while ((loop != NULL) && (loop != vnic)) {
15335084Sjohnlev 			prev = loop;
15345084Sjohnlev 			loop = loop->vn_promisc_next;
15355084Sjohnlev 		}
15365084Sjohnlev 
15375084Sjohnlev 		if ((loop != NULL) &&
15385084Sjohnlev 		    ((r = mac_promisc_set(vnic_mac->va_mh, B_FALSE,
15395084Sjohnlev 		    MAC_DEVPROMISC)) == 0)) {
15405084Sjohnlev 			if (prev != NULL)
15415084Sjohnlev 				prev->vn_promisc_next = loop->vn_promisc_next;
15425084Sjohnlev 			else
15435084Sjohnlev 				vnic_mac->va_promisc = loop->vn_promisc_next;
15445084Sjohnlev 			vnic_mac->va_promisc_gen++;
15455084Sjohnlev 
15465084Sjohnlev 			vnic->vn_promisc = B_FALSE;
15475084Sjohnlev 		}
15485084Sjohnlev 		rw_exit(&vnic_mac->va_promisc_lock);
15495084Sjohnlev 
15505084Sjohnlev 		return (r);
15515084Sjohnlev 	}
15525084Sjohnlev }
15535084Sjohnlev 
15545084Sjohnlev void
15555084Sjohnlev vnic_promisc_rx(vnic_mac_t *vnic_mac, vnic_t *sender, mblk_t *mp)
15565084Sjohnlev {
15575084Sjohnlev 	vnic_t *loop;
15585084Sjohnlev 	vnic_flow_t *flow;
15595084Sjohnlev 	const vnic_flow_fn_info_t *fn_info;
15605084Sjohnlev 	mac_header_info_t hdr_info;
15615084Sjohnlev 	boolean_t dst_must_match = B_TRUE;
15625084Sjohnlev 
15635084Sjohnlev 	ASSERT(mp->b_next == NULL);
15645084Sjohnlev 
15655084Sjohnlev 	rw_enter(&vnic_mac->va_promisc_lock, RW_READER);
15665084Sjohnlev 	if (vnic_mac->va_promisc == NULL)
15675084Sjohnlev 		goto done;
15685084Sjohnlev 
15695084Sjohnlev 	if (mac_header_info(vnic_mac->va_mh, mp, &hdr_info) != 0)
15705084Sjohnlev 		goto done;
15715084Sjohnlev 
15725084Sjohnlev 	/*
15735084Sjohnlev 	 * If this is broadcast or multicast then the destination
15745084Sjohnlev 	 * address need not match for us to deliver it.
15755084Sjohnlev 	 */
15765084Sjohnlev 	if ((hdr_info.mhi_dsttype == MAC_ADDRTYPE_BROADCAST) ||
15775084Sjohnlev 	    (hdr_info.mhi_dsttype == MAC_ADDRTYPE_MULTICAST))
15785084Sjohnlev 		dst_must_match = B_FALSE;
15795084Sjohnlev 
15805084Sjohnlev 	for (loop = vnic_mac->va_promisc;
15815084Sjohnlev 	    loop != NULL;
15825084Sjohnlev 	    loop = loop->vn_promisc_next) {
15835084Sjohnlev 		if (loop == sender)
15845084Sjohnlev 			continue;
15855084Sjohnlev 
15865084Sjohnlev 		if (dst_must_match &&
15875084Sjohnlev 		    (bcmp(hdr_info.mhi_daddr, loop->vn_addr,
15885084Sjohnlev 		    sizeof (loop->vn_addr)) != 0))
15895084Sjohnlev 			continue;
15905084Sjohnlev 
15915084Sjohnlev 		flow = loop->vn_flow_ent;
15925084Sjohnlev 		ASSERT(flow != NULL);
15935084Sjohnlev 
15945084Sjohnlev 		if (!flow->vf_is_active) {
15955159Sjohnlev 			mblk_t *copy;
15965159Sjohnlev 			uint64_t gen;
15975159Sjohnlev 
15985159Sjohnlev 			if ((copy = vnic_copymsg_cksum(mp)) == NULL)
15995159Sjohnlev 				break;
16005159Sjohnlev 			if ((sender != NULL) &&
16015159Sjohnlev 			    ((copy = vnic_fix_cksum(copy)) == NULL))
16025159Sjohnlev 				break;
16035159Sjohnlev 
16045084Sjohnlev 			VNIC_FLOW_REFHOLD(flow);
16055084Sjohnlev 			gen = vnic_mac->va_promisc_gen;
16065084Sjohnlev 			rw_exit(&vnic_mac->va_promisc_lock);
16075084Sjohnlev 
16085159Sjohnlev 			fn_info = vnic_classifier_get_fn_info(flow);
16095159Sjohnlev 			(fn_info->ff_fn)(fn_info->ff_arg1,
16105159Sjohnlev 			    fn_info->ff_arg2, copy);
16115084Sjohnlev 
16125084Sjohnlev 			VNIC_FLOW_REFRELE(flow);
16135084Sjohnlev 			rw_enter(&vnic_mac->va_promisc_lock, RW_READER);
16145084Sjohnlev 			if (vnic_mac->va_promisc_gen != gen)
16155084Sjohnlev 				break;
16165084Sjohnlev 		}
16175084Sjohnlev 	}
16185084Sjohnlev done:
16195084Sjohnlev 	rw_exit(&vnic_mac->va_promisc_lock);
16205084Sjohnlev }
1621