xref: /onnv-gate/usr/src/uts/common/io/mac/mac_bcast.c (revision 11878:ac93462db6d7)
18275SEric Cheng /*
28275SEric Cheng  * CDDL HEADER START
38275SEric Cheng  *
48275SEric Cheng  * The contents of this file are subject to the terms of the
58275SEric Cheng  * Common Development and Distribution License (the "License").
68275SEric Cheng  * You may not use this file except in compliance with the License.
78275SEric Cheng  *
88275SEric Cheng  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
98275SEric Cheng  * or http://www.opensolaris.org/os/licensing.
108275SEric Cheng  * See the License for the specific language governing permissions
118275SEric Cheng  * and limitations under the License.
128275SEric Cheng  *
138275SEric Cheng  * When distributing Covered Code, include this CDDL HEADER in each
148275SEric Cheng  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
158275SEric Cheng  * If applicable, add the following below this CDDL HEADER, with the
168275SEric Cheng  * fields enclosed by brackets "[]" replaced with your own identifying
178275SEric Cheng  * information: Portions Copyright [yyyy] [name of copyright owner]
188275SEric Cheng  *
198275SEric Cheng  * CDDL HEADER END
208275SEric Cheng  */
218275SEric Cheng /*
22*11878SVenu.Iyer@Sun.COM  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
238275SEric Cheng  * Use is subject to license terms.
248275SEric Cheng  */
258275SEric Cheng 
268275SEric Cheng #include <sys/types.h>
278275SEric Cheng #include <sys/sysmacros.h>
288275SEric Cheng #include <sys/conf.h>
298275SEric Cheng #include <sys/cmn_err.h>
308275SEric Cheng #include <sys/list.h>
318275SEric Cheng #include <sys/kmem.h>
328275SEric Cheng #include <sys/stream.h>
338275SEric Cheng #include <sys/modctl.h>
348275SEric Cheng #include <sys/ddi.h>
358275SEric Cheng #include <sys/sunddi.h>
368275SEric Cheng #include <sys/atomic.h>
378275SEric Cheng #include <sys/stat.h>
388275SEric Cheng #include <sys/modhash.h>
398275SEric Cheng #include <sys/strsubr.h>
408275SEric Cheng #include <sys/strsun.h>
418275SEric Cheng #include <sys/sdt.h>
428275SEric Cheng #include <sys/mac.h>
438275SEric Cheng #include <sys/mac_impl.h>
448275SEric Cheng #include <sys/mac_client_impl.h>
458275SEric Cheng #include <sys/mac_client_priv.h>
468275SEric Cheng #include <sys/mac_flow_impl.h>
478275SEric Cheng 
488275SEric Cheng /*
498275SEric Cheng  * Broadcast and multicast traffic must be distributed to the MAC clients
508275SEric Cheng  * that are defined on top of the same MAC. The set of
518275SEric Cheng  * destinations to which a multicast packet must be sent is a subset
528275SEric Cheng  * of all MAC clients defined on top of the MAC. A MAC client can be member
538275SEric Cheng  * of more than one such subset.
548275SEric Cheng  *
558275SEric Cheng  * To accomodate these requirements, we introduce broadcast groups.
568275SEric Cheng  * A broadcast group is associated with a broadcast or multicast
578275SEric Cheng  * address. The members of a broadcast group consist of the MAC clients
588275SEric Cheng  * that should received copies of packets sent to the address
598275SEric Cheng  * associated with the group, and are defined on top of the
608275SEric Cheng  * same MAC.
618275SEric Cheng  *
628275SEric Cheng  * The broadcast groups defined on top of a MAC are chained,
638275SEric Cheng  * hanging off the mac_impl_t. The broadcast group id's are
648275SEric Cheng  * unique globally (tracked by mac_bcast_id).
658275SEric Cheng  */
668275SEric Cheng 
678275SEric Cheng /*
688275SEric Cheng  * The same MAC client may be added for different <addr,vid> tuple,
698275SEric Cheng  * we maintain a ref count for the number of times it has been added
708275SEric Cheng  * to account for deleting the MAC client from the group.
718275SEric Cheng  */
728275SEric Cheng typedef struct mac_bcast_grp_mcip_s {
738275SEric Cheng 	mac_client_impl_t	*mgb_client;
748275SEric Cheng 	int			mgb_client_ref;
758275SEric Cheng } mac_bcast_grp_mcip_t;
768275SEric Cheng 
778275SEric Cheng typedef struct mac_bcast_grp_s {			/* Protected by */
788275SEric Cheng 	struct mac_bcast_grp_s	*mbg_next;		/* SL */
798275SEric Cheng 	void			*mbg_addr;		/* SL */
808275SEric Cheng 	uint16_t		mbg_vid;		/* SL */
818275SEric Cheng 	mac_impl_t		*mbg_mac_impl;		/* WO */
828275SEric Cheng 	mac_addrtype_t		mbg_addrtype;		/* WO */
838275SEric Cheng 	flow_entry_t		*mbg_flow_ent;		/* WO */
848275SEric Cheng 	mac_bcast_grp_mcip_t	*mbg_clients;		/* mi_rw_lock */
858275SEric Cheng 	uint_t			mbg_nclients;		/* mi_rw_lock */
868275SEric Cheng 	uint_t			mbg_nclients_alloc;	/* SL */
878275SEric Cheng 	uint64_t		mbg_clients_gen;	/* mi_rw_lock */
888275SEric Cheng 	uint32_t		mbg_id;			/* atomic */
898275SEric Cheng } mac_bcast_grp_t;
908275SEric Cheng 
918275SEric Cheng static kmem_cache_t *mac_bcast_grp_cache;
928275SEric Cheng static uint32_t mac_bcast_id = 0;
938275SEric Cheng 
948275SEric Cheng void
mac_bcast_init(void)958275SEric Cheng mac_bcast_init(void)
968275SEric Cheng {
978275SEric Cheng 	mac_bcast_grp_cache = kmem_cache_create("mac_bcast_grp_cache",
988275SEric Cheng 	    sizeof (mac_bcast_grp_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
998275SEric Cheng }
1008275SEric Cheng 
1018275SEric Cheng void
mac_bcast_fini(void)1028275SEric Cheng mac_bcast_fini(void)
1038275SEric Cheng {
1048275SEric Cheng 	kmem_cache_destroy(mac_bcast_grp_cache);
1058275SEric Cheng }
1068275SEric Cheng 
1078275SEric Cheng mac_impl_t *
mac_bcast_grp_mip(void * grp)1088275SEric Cheng mac_bcast_grp_mip(void *grp)
1098275SEric Cheng {
1108275SEric Cheng 	mac_bcast_grp_t *bcast_grp = grp;
1118275SEric Cheng 
1128275SEric Cheng 	return (bcast_grp->mbg_mac_impl);
1138275SEric Cheng }
1148275SEric Cheng 
1158275SEric Cheng /*
1168275SEric Cheng  * Free the specific broadcast group. Invoked when the last reference
1178275SEric Cheng  * to the group is released.
1188275SEric Cheng  */
1198275SEric Cheng void
mac_bcast_grp_free(void * bcast_grp)1208275SEric Cheng mac_bcast_grp_free(void *bcast_grp)
1218275SEric Cheng {
1228275SEric Cheng 	mac_bcast_grp_t	*grp = bcast_grp;
1238275SEric Cheng 	mac_impl_t *mip = grp->mbg_mac_impl;
1248275SEric Cheng 
1258275SEric Cheng 	ASSERT(MAC_PERIM_HELD((mac_handle_t)mip));
1268275SEric Cheng 
1278275SEric Cheng 	ASSERT(grp->mbg_addr != NULL);
1288275SEric Cheng 	kmem_free(grp->mbg_addr, mip->mi_type->mt_addr_length);
1298275SEric Cheng 	kmem_free(grp->mbg_clients,
1308275SEric Cheng 	    grp->mbg_nclients_alloc * sizeof (mac_bcast_grp_mcip_t));
1318275SEric Cheng 	mip->mi_bcast_ngrps--;
1328275SEric Cheng 	kmem_cache_free(mac_bcast_grp_cache, grp);
1338275SEric Cheng }
1348275SEric Cheng 
1358275SEric Cheng /*
1368275SEric Cheng  * arg1: broadcast group
1378275SEric Cheng  * arg2: sender MAC client if it is being sent by a MAC client,
1388275SEric Cheng  * NULL if it was received from the wire.
1398275SEric Cheng  */
1408275SEric Cheng void
mac_bcast_send(void * arg1,void * arg2,mblk_t * mp_chain,boolean_t is_loopback)1418275SEric Cheng mac_bcast_send(void *arg1, void *arg2, mblk_t *mp_chain, boolean_t is_loopback)
1428275SEric Cheng {
1438275SEric Cheng 	mac_bcast_grp_t *grp = arg1;
1448275SEric Cheng 	mac_client_impl_t *src_mcip = arg2, *dst_mcip;
1458275SEric Cheng 	mac_impl_t *mip = grp->mbg_mac_impl;
1468275SEric Cheng 	uint64_t gen;
1478275SEric Cheng 	uint_t i;
1488275SEric Cheng 	mblk_t *mp_chain1;
1498275SEric Cheng 	flow_entry_t	*flent;
1508275SEric Cheng 	int err;
1518275SEric Cheng 
1528275SEric Cheng 	rw_enter(&mip->mi_rw_lock, RW_READER);
1538275SEric Cheng 
1548275SEric Cheng 	/*
1558275SEric Cheng 	 * Pass a copy of the mp chain to every MAC client except the sender
1568275SEric Cheng 	 * MAC client, if the packet was not received from the underlying NIC.
1578275SEric Cheng 	 *
1588275SEric Cheng 	 * The broadcast group lock should not be held across calls to
1598275SEric Cheng 	 * the flow's callback function, since the same group could
1608275SEric Cheng 	 * potentially be accessed from the same context. When the lock
1618275SEric Cheng 	 * is reacquired, changes to the broadcast group while the lock
1628275SEric Cheng 	 * was released are caught using a generation counter incremented
1638275SEric Cheng 	 * each time the list of MAC clients associated with the broadcast
1648275SEric Cheng 	 * group is changed.
1658275SEric Cheng 	 */
1668275SEric Cheng 	for (i = 0; i < grp->mbg_nclients_alloc; i++) {
1678275SEric Cheng 		dst_mcip = grp->mbg_clients[i].mgb_client;
1688275SEric Cheng 		if (dst_mcip == NULL)
1698275SEric Cheng 			continue;
1708275SEric Cheng 		flent = dst_mcip->mci_flent;
1718275SEric Cheng 		if (flent == NULL || dst_mcip == src_mcip) {
1728275SEric Cheng 			/*
1738275SEric Cheng 			 * Don't send a copy of the packet back to
1748275SEric Cheng 			 * its sender.
1758275SEric Cheng 			 */
1768275SEric Cheng 			continue;
1778275SEric Cheng 		}
1788275SEric Cheng 
1798275SEric Cheng 		/*
1808275SEric Cheng 		 * It is important to hold a reference on the
1818275SEric Cheng 		 * flow_ent here.
1828275SEric Cheng 		 */
1838275SEric Cheng 		if ((mp_chain1 = mac_copymsgchain_cksum(mp_chain)) == NULL)
1848275SEric Cheng 			break;
1858275SEric Cheng 		/*
1868275SEric Cheng 		 * Fix the checksum for packets originating
1878275SEric Cheng 		 * from the local machine.
1888275SEric Cheng 		 */
1898275SEric Cheng 		if ((src_mcip != NULL) &&
1908275SEric Cheng 		    (mp_chain1 = mac_fix_cksum(mp_chain1)) == NULL)
1918275SEric Cheng 			break;
1928275SEric Cheng 
1938275SEric Cheng 		FLOW_TRY_REFHOLD(flent, err);
1948275SEric Cheng 		if (err != 0) {
1958275SEric Cheng 			freemsgchain(mp_chain1);
1968275SEric Cheng 			continue;
1978275SEric Cheng 		}
1988275SEric Cheng 
1998275SEric Cheng 		gen = grp->mbg_clients_gen;
2008275SEric Cheng 
2018275SEric Cheng 		rw_exit(&mip->mi_rw_lock);
2028275SEric Cheng 
2038275SEric Cheng 		DTRACE_PROBE4(mac__bcast__send__to, mac_client_impl_t *,
2048275SEric Cheng 		    src_mcip, flow_fn_t, dst_mcip->mci_flent->fe_cb_fn,
2058275SEric Cheng 		    void *, dst_mcip->mci_flent->fe_cb_arg1,
2068275SEric Cheng 		    void *, dst_mcip->mci_flent->fe_cb_arg2);
2078275SEric Cheng 
2088275SEric Cheng 		(dst_mcip->mci_flent->fe_cb_fn)(dst_mcip->mci_flent->fe_cb_arg1,
2098275SEric Cheng 		    dst_mcip->mci_flent->fe_cb_arg2, mp_chain1, is_loopback);
2108275SEric Cheng 		FLOW_REFRELE(flent);
2118275SEric Cheng 
2128275SEric Cheng 		rw_enter(&mip->mi_rw_lock, RW_READER);
2138275SEric Cheng 
2148275SEric Cheng 		/* update stats */
215*11878SVenu.Iyer@Sun.COM 		if (grp->mbg_addrtype == MAC_ADDRTYPE_MULTICAST) {
216*11878SVenu.Iyer@Sun.COM 			MCIP_STAT_UPDATE(dst_mcip, multircv, 1);
217*11878SVenu.Iyer@Sun.COM 			MCIP_STAT_UPDATE(dst_mcip, multircvbytes,
218*11878SVenu.Iyer@Sun.COM 			    msgdsize(mp_chain));
219*11878SVenu.Iyer@Sun.COM 		} else {
220*11878SVenu.Iyer@Sun.COM 			MCIP_STAT_UPDATE(dst_mcip, brdcstrcv, 1);
221*11878SVenu.Iyer@Sun.COM 			MCIP_STAT_UPDATE(dst_mcip, brdcstrcvbytes,
222*11878SVenu.Iyer@Sun.COM 			    msgdsize(mp_chain));
223*11878SVenu.Iyer@Sun.COM 		}
2248275SEric Cheng 
2258275SEric Cheng 		if (grp->mbg_clients_gen != gen) {
2268275SEric Cheng 			/*
2278275SEric Cheng 			 * The list of MAC clients associated with the group
2288275SEric Cheng 			 * was changed while the lock was released.
2298275SEric Cheng 			 * Give up on the current packet.
2308275SEric Cheng 			 */
2318275SEric Cheng 			rw_exit(&mip->mi_rw_lock);
2328275SEric Cheng 			freemsgchain(mp_chain);
2338275SEric Cheng 			return;
2348275SEric Cheng 		}
2358275SEric Cheng 	}
2368275SEric Cheng 	rw_exit(&mip->mi_rw_lock);
2378275SEric Cheng 
2388275SEric Cheng 	if (src_mcip != NULL) {
2398275SEric Cheng 		/*
2408275SEric Cheng 		 * The packet was sent from one of the MAC clients,
2418275SEric Cheng 		 * so we need to send a copy of the packet to the
2428275SEric Cheng 		 * underlying NIC so that it can be sent on the wire.
2438275SEric Cheng 		 */
244*11878SVenu.Iyer@Sun.COM 		MCIP_STAT_UPDATE(src_mcip, multixmt, 1);
245*11878SVenu.Iyer@Sun.COM 		MCIP_STAT_UPDATE(src_mcip, multixmtbytes, msgdsize(mp_chain));
246*11878SVenu.Iyer@Sun.COM 		MCIP_STAT_UPDATE(src_mcip, brdcstxmt, 1);
247*11878SVenu.Iyer@Sun.COM 		MCIP_STAT_UPDATE(src_mcip, brdcstxmtbytes, msgdsize(mp_chain));
2488275SEric Cheng 
249*11878SVenu.Iyer@Sun.COM 		MAC_TX(mip, mip->mi_default_tx_ring, mp_chain, src_mcip);
25010491SRishi.Srivatsavai@Sun.COM 		if (mp_chain != NULL)
25110491SRishi.Srivatsavai@Sun.COM 			freemsgchain(mp_chain);
2528275SEric Cheng 	} else {
2538275SEric Cheng 		freemsgchain(mp_chain);
2548275SEric Cheng 	}
2558275SEric Cheng }
2568275SEric Cheng 
2578275SEric Cheng /*
2588275SEric Cheng  * Add the specified MAC client to the group corresponding to the specified
2598275SEric Cheng  * broadcast or multicast address.
2608275SEric Cheng  * Return 0 on success, or an errno value on failure.
2618275SEric Cheng  */
2628275SEric Cheng int
mac_bcast_add(mac_client_impl_t * mcip,const uint8_t * addr,uint16_t vid,mac_addrtype_t addrtype)2638275SEric Cheng mac_bcast_add(mac_client_impl_t *mcip, const uint8_t *addr, uint16_t vid,
2648275SEric Cheng     mac_addrtype_t addrtype)
2658275SEric Cheng {
2668275SEric Cheng 	mac_impl_t 		*mip = mcip->mci_mip;
2678275SEric Cheng 	mac_bcast_grp_t		*grp = NULL, **last_grp;
2688275SEric Cheng 	size_t			addr_len = mip->mi_type->mt_addr_length;
2698275SEric Cheng 	int			rc = 0;
2708275SEric Cheng 	int			i, index = -1;
2718833SVenu.Iyer@Sun.COM 	mac_mcast_addrs_t	**prev_mi_addr = NULL;
2728833SVenu.Iyer@Sun.COM 	mac_mcast_addrs_t	**prev_mci_addr = NULL;
2738275SEric Cheng 
2748275SEric Cheng 	ASSERT(MAC_PERIM_HELD((mac_handle_t)mip));
2758275SEric Cheng 
2768275SEric Cheng 	ASSERT(addrtype == MAC_ADDRTYPE_MULTICAST ||
2778275SEric Cheng 	    addrtype == MAC_ADDRTYPE_BROADCAST);
2788275SEric Cheng 
2798833SVenu.Iyer@Sun.COM 	/*
2808833SVenu.Iyer@Sun.COM 	 * Add the MAC client to the list of MAC clients associated
2818833SVenu.Iyer@Sun.COM 	 * with the group.
2828833SVenu.Iyer@Sun.COM 	 */
2838833SVenu.Iyer@Sun.COM 	if (addrtype == MAC_ADDRTYPE_MULTICAST) {
2848833SVenu.Iyer@Sun.COM 		mac_mcast_addrs_t	*maddr;
2858833SVenu.Iyer@Sun.COM 
2868833SVenu.Iyer@Sun.COM 		/*
2878833SVenu.Iyer@Sun.COM 		 * In case of a driver (say aggr), we need this information
2888833SVenu.Iyer@Sun.COM 		 * on a per MAC instance basis.
2898833SVenu.Iyer@Sun.COM 		 */
2908833SVenu.Iyer@Sun.COM 		prev_mi_addr = &mip->mi_mcast_addrs;
2918833SVenu.Iyer@Sun.COM 		for (maddr = *prev_mi_addr; maddr != NULL;
2928833SVenu.Iyer@Sun.COM 		    prev_mi_addr = &maddr->mma_next, maddr = maddr->mma_next) {
2938833SVenu.Iyer@Sun.COM 			if (bcmp(maddr->mma_addr, addr, addr_len) == 0)
2948833SVenu.Iyer@Sun.COM 				break;
2958833SVenu.Iyer@Sun.COM 		}
2968833SVenu.Iyer@Sun.COM 		if (maddr == NULL) {
2978833SVenu.Iyer@Sun.COM 			/*
2988833SVenu.Iyer@Sun.COM 			 * For multicast addresses, have the underlying MAC
2998833SVenu.Iyer@Sun.COM 			 * join the corresponding multicast group.
3008833SVenu.Iyer@Sun.COM 			 */
3018833SVenu.Iyer@Sun.COM 			rc = mip->mi_multicst(mip->mi_driver, B_TRUE, addr);
3028833SVenu.Iyer@Sun.COM 			if (rc != 0)
3038833SVenu.Iyer@Sun.COM 				return (rc);
3048833SVenu.Iyer@Sun.COM 			maddr = kmem_zalloc(sizeof (mac_mcast_addrs_t),
3058833SVenu.Iyer@Sun.COM 			    KM_SLEEP);
3068833SVenu.Iyer@Sun.COM 			bcopy(addr, maddr->mma_addr, addr_len);
3078833SVenu.Iyer@Sun.COM 			*prev_mi_addr = maddr;
3088833SVenu.Iyer@Sun.COM 		} else {
3098833SVenu.Iyer@Sun.COM 			prev_mi_addr = NULL;
3108833SVenu.Iyer@Sun.COM 		}
3118833SVenu.Iyer@Sun.COM 		maddr->mma_ref++;
3128833SVenu.Iyer@Sun.COM 
3138833SVenu.Iyer@Sun.COM 		/*
3148833SVenu.Iyer@Sun.COM 		 * We maintain a separate list for each MAC client. Get
3158833SVenu.Iyer@Sun.COM 		 * the entry or add, if it is not present.
3168833SVenu.Iyer@Sun.COM 		 */
3178833SVenu.Iyer@Sun.COM 		prev_mci_addr = &mcip->mci_mcast_addrs;
3188833SVenu.Iyer@Sun.COM 		for (maddr = *prev_mci_addr; maddr != NULL;
3198833SVenu.Iyer@Sun.COM 		    prev_mci_addr = &maddr->mma_next, maddr = maddr->mma_next) {
3208833SVenu.Iyer@Sun.COM 			if (bcmp(maddr->mma_addr, addr, addr_len) == 0)
3218833SVenu.Iyer@Sun.COM 				break;
3228833SVenu.Iyer@Sun.COM 		}
3238833SVenu.Iyer@Sun.COM 		if (maddr == NULL) {
3248833SVenu.Iyer@Sun.COM 			maddr = kmem_zalloc(sizeof (mac_mcast_addrs_t),
3258833SVenu.Iyer@Sun.COM 			    KM_SLEEP);
3268833SVenu.Iyer@Sun.COM 			bcopy(addr, maddr->mma_addr, addr_len);
3278833SVenu.Iyer@Sun.COM 			*prev_mci_addr = maddr;
3288833SVenu.Iyer@Sun.COM 		} else {
3298833SVenu.Iyer@Sun.COM 			prev_mci_addr = NULL;
3308833SVenu.Iyer@Sun.COM 		}
3318833SVenu.Iyer@Sun.COM 		maddr->mma_ref++;
3328833SVenu.Iyer@Sun.COM 	}
3338833SVenu.Iyer@Sun.COM 
3348275SEric Cheng 	/* The list is protected by the perimeter */
3358275SEric Cheng 	last_grp = &mip->mi_bcast_grp;
3368275SEric Cheng 	for (grp = *last_grp; grp != NULL;
3378275SEric Cheng 	    last_grp = &grp->mbg_next, grp = grp->mbg_next) {
3388275SEric Cheng 		if (bcmp(grp->mbg_addr, addr, addr_len) == 0 &&
3398275SEric Cheng 		    grp->mbg_vid == vid)
3408275SEric Cheng 			break;
3418275SEric Cheng 	}
3428275SEric Cheng 
3438275SEric Cheng 	if (grp == NULL) {
3448275SEric Cheng 		/*
3458275SEric Cheng 		 * The group does not yet exist, create it.
3468275SEric Cheng 		 */
3478275SEric Cheng 		flow_desc_t flow_desc;
3488558SGirish.Moodalbail@Sun.COM 		char flow_name[MAXFLOWNAMELEN];
3498275SEric Cheng 
3508275SEric Cheng 		grp = kmem_cache_alloc(mac_bcast_grp_cache, KM_SLEEP);
3518275SEric Cheng 		bzero(grp, sizeof (mac_bcast_grp_t));
3528275SEric Cheng 		grp->mbg_next = NULL;
3538275SEric Cheng 		grp->mbg_mac_impl = mip;
3548275SEric Cheng 
3558275SEric Cheng 		DTRACE_PROBE1(mac__bcast__add__new__group, mac_bcast_grp_t *,
3568275SEric Cheng 		    grp);
3578275SEric Cheng 
3588275SEric Cheng 		grp->mbg_addr = kmem_zalloc(addr_len, KM_SLEEP);
3598275SEric Cheng 		bcopy(addr, grp->mbg_addr, addr_len);
3608275SEric Cheng 		grp->mbg_addrtype = addrtype;
3618275SEric Cheng 		grp->mbg_vid = vid;
3628275SEric Cheng 
3638275SEric Cheng 		/*
3648275SEric Cheng 		 * Add a new flow to the underlying MAC.
3658275SEric Cheng 		 */
3668275SEric Cheng 		bzero(&flow_desc, sizeof (flow_desc));
3678275SEric Cheng 		bcopy(addr, &flow_desc.fd_dst_mac, addr_len);
3688275SEric Cheng 		flow_desc.fd_mac_len = (uint32_t)addr_len;
3698275SEric Cheng 
3708275SEric Cheng 		flow_desc.fd_mask = FLOW_LINK_DST;
3718275SEric Cheng 		if (vid != 0) {
3728275SEric Cheng 			flow_desc.fd_vid = vid;
3738275SEric Cheng 			flow_desc.fd_mask |= FLOW_LINK_VID;
3748275SEric Cheng 		}
3758275SEric Cheng 
3768275SEric Cheng 		grp->mbg_id = atomic_add_32_nv(&mac_bcast_id, 1);
3778275SEric Cheng 		(void) sprintf(flow_name,
3788275SEric Cheng 		    "mac/%s/mcast%d", mip->mi_name, grp->mbg_id);
3798275SEric Cheng 
3808275SEric Cheng 		rc = mac_flow_create(&flow_desc, NULL, flow_name,
3818275SEric Cheng 		    grp, FLOW_MCAST, &grp->mbg_flow_ent);
3828275SEric Cheng 		if (rc != 0) {
3838275SEric Cheng 			kmem_free(grp->mbg_addr, addr_len);
3848275SEric Cheng 			kmem_cache_free(mac_bcast_grp_cache, grp);
3858833SVenu.Iyer@Sun.COM 			goto fail;
3868275SEric Cheng 		}
3878275SEric Cheng 		grp->mbg_flow_ent->fe_mbg = grp;
3888275SEric Cheng 		mip->mi_bcast_ngrps++;
3898275SEric Cheng 
3908275SEric Cheng 		/*
3918275SEric Cheng 		 * Initial creation reference on the flow. This is released
3928275SEric Cheng 		 * in the corresponding delete action i_mac_bcast_delete()
3938275SEric Cheng 		 */
3948275SEric Cheng 		FLOW_REFHOLD(grp->mbg_flow_ent);
3958275SEric Cheng 
3968275SEric Cheng 		/*
3978275SEric Cheng 		 * When the multicast and broadcast packet is received
3988275SEric Cheng 		 * by the underlying NIC, mac_rx_classify() will invoke
3998275SEric Cheng 		 * mac_bcast_send() with arg2=NULL, which will cause
4008275SEric Cheng 		 * mac_bcast_send() to send a copy of the packet(s)
4018275SEric Cheng 		 * to every MAC client opened on top of the underlying MAC.
4028275SEric Cheng 		 *
4038275SEric Cheng 		 * When the mac_bcast_send() function is invoked from
4048275SEric Cheng 		 * the transmit path of a MAC client, it will specify the
4058275SEric Cheng 		 * transmitting MAC client as the arg2 value, which will
4068275SEric Cheng 		 * allow mac_bcast_send() to skip that MAC client and not
4078275SEric Cheng 		 * send it a copy of the packet.
4088275SEric Cheng 		 *
4098275SEric Cheng 		 * We program the classifier to dispatch matching broadcast
4108275SEric Cheng 		 * packets to mac_bcast_send().
4118275SEric Cheng 		 */
4128275SEric Cheng 
4138275SEric Cheng 		grp->mbg_flow_ent->fe_cb_fn = mac_bcast_send;
4148275SEric Cheng 		grp->mbg_flow_ent->fe_cb_arg1 = grp;
4158275SEric Cheng 		grp->mbg_flow_ent->fe_cb_arg2 = NULL;
4168275SEric Cheng 
4178275SEric Cheng 		rc = mac_flow_add(mip->mi_flow_tab, grp->mbg_flow_ent);
4188275SEric Cheng 		if (rc != 0) {
4198275SEric Cheng 			FLOW_FINAL_REFRELE(grp->mbg_flow_ent);
4208833SVenu.Iyer@Sun.COM 			goto fail;
4218275SEric Cheng 		}
4228275SEric Cheng 
4238275SEric Cheng 		*last_grp = grp;
4248275SEric Cheng 	}
4258275SEric Cheng 
4268275SEric Cheng 	ASSERT(grp->mbg_addrtype == addrtype);
4278275SEric Cheng 
4288275SEric Cheng 	/*
4298275SEric Cheng 	 * Add the MAC client to the list of MAC clients associated
4308275SEric Cheng 	 * with the group.
4318275SEric Cheng 	 */
4328275SEric Cheng 	rw_enter(&mip->mi_rw_lock, RW_WRITER);
4338275SEric Cheng 	for (i = 0; i < grp->mbg_nclients_alloc; i++) {
4348275SEric Cheng 		/*
4358275SEric Cheng 		 * The MAC client was already added, say when we have
4368275SEric Cheng 		 * different unicast addresses with the same vid.
4378275SEric Cheng 		 * Just increment the ref and we are done.
4388275SEric Cheng 		 */
4398275SEric Cheng 		if (grp->mbg_clients[i].mgb_client == mcip) {
4408275SEric Cheng 			grp->mbg_clients[i].mgb_client_ref++;
4418833SVenu.Iyer@Sun.COM 			rw_exit(&mip->mi_rw_lock);
4428833SVenu.Iyer@Sun.COM 			return (0);
4438275SEric Cheng 		} else if (grp->mbg_clients[i].mgb_client == NULL &&
4448275SEric Cheng 		    index == -1) {
4458275SEric Cheng 			index = i;
4468275SEric Cheng 		}
4478275SEric Cheng 	}
4488275SEric Cheng 	if (grp->mbg_nclients_alloc == grp->mbg_nclients) {
4498275SEric Cheng 		mac_bcast_grp_mcip_t	*new_clients;
4508275SEric Cheng 		uint_t			new_size = grp->mbg_nclients+1;
4518275SEric Cheng 
4528275SEric Cheng 		new_clients = kmem_zalloc(new_size *
4538275SEric Cheng 		    sizeof (mac_bcast_grp_mcip_t), KM_SLEEP);
4548275SEric Cheng 
4558275SEric Cheng 		if (grp->mbg_nclients > 0) {
4568275SEric Cheng 			ASSERT(grp->mbg_clients != NULL);
4578275SEric Cheng 			bcopy(grp->mbg_clients, new_clients, grp->mbg_nclients *
4588275SEric Cheng 			    sizeof (mac_bcast_grp_mcip_t));
4598275SEric Cheng 			kmem_free(grp->mbg_clients, grp->mbg_nclients *
4608275SEric Cheng 			    sizeof (mac_bcast_grp_mcip_t));
4618275SEric Cheng 		}
4628275SEric Cheng 
4638275SEric Cheng 		grp->mbg_clients = new_clients;
4648275SEric Cheng 		grp->mbg_nclients_alloc = new_size;
4658275SEric Cheng 		index = new_size - 1;
4668275SEric Cheng 	}
4678275SEric Cheng 
4688275SEric Cheng 	ASSERT(index != -1);
4698275SEric Cheng 	grp->mbg_clients[index].mgb_client = mcip;
4708275SEric Cheng 	grp->mbg_clients[index].mgb_client_ref = 1;
4718275SEric Cheng 	grp->mbg_nclients++;
4728275SEric Cheng 	/*
4738275SEric Cheng 	 * Since we're adding to the list of MAC clients using that group,
4748275SEric Cheng 	 * kick the generation count, which will allow mac_bcast_send()
4758275SEric Cheng 	 * to detect that condition after re-acquiring the lock.
4768275SEric Cheng 	 */
4778275SEric Cheng 	grp->mbg_clients_gen++;
4788275SEric Cheng 	rw_exit(&mip->mi_rw_lock);
4798833SVenu.Iyer@Sun.COM 	return (0);
4808275SEric Cheng 
4818833SVenu.Iyer@Sun.COM fail:
4828833SVenu.Iyer@Sun.COM 	if (prev_mi_addr != NULL) {
4838833SVenu.Iyer@Sun.COM 		kmem_free(*prev_mi_addr, sizeof (mac_mcast_addrs_t));
4848833SVenu.Iyer@Sun.COM 		*prev_mi_addr = NULL;
4858833SVenu.Iyer@Sun.COM 		(void) mip->mi_multicst(mip->mi_driver, B_FALSE, addr);
4868833SVenu.Iyer@Sun.COM 	}
4878833SVenu.Iyer@Sun.COM 	if (prev_mci_addr != NULL) {
4888833SVenu.Iyer@Sun.COM 		kmem_free(*prev_mci_addr, sizeof (mac_mcast_addrs_t));
4898833SVenu.Iyer@Sun.COM 		*prev_mci_addr = NULL;
4908833SVenu.Iyer@Sun.COM 	}
4918833SVenu.Iyer@Sun.COM 	return (rc);
4928275SEric Cheng }
4938275SEric Cheng 
4948275SEric Cheng /*
4958275SEric Cheng  * Remove the specified MAC client from the group corresponding to
4968275SEric Cheng  * the specific broadcast or multicast address.
4978275SEric Cheng  *
4988275SEric Cheng  * Note: mac_bcast_delete() calls  mac_remove_flow() which
4998275SEric Cheng  * will call cv_wait for fe_refcnt to drop to 0. So this function
5008275SEric Cheng  * should not be called from interrupt or STREAMS context.
5018275SEric Cheng  */
5028275SEric Cheng void
mac_bcast_delete(mac_client_impl_t * mcip,const uint8_t * addr,uint16_t vid)5038275SEric Cheng mac_bcast_delete(mac_client_impl_t *mcip, const uint8_t *addr, uint16_t vid)
5048275SEric Cheng {
5058275SEric Cheng 	mac_impl_t *mip = mcip->mci_mip;
5068275SEric Cheng 	mac_bcast_grp_t *grp = NULL, **prev;
5078275SEric Cheng 	size_t addr_len = mip->mi_type->mt_addr_length;
5088275SEric Cheng 	flow_entry_t *flent;
5098275SEric Cheng 	uint_t i;
5108275SEric Cheng 	mac_mcast_addrs_t	*maddr = NULL;
5118275SEric Cheng 	mac_mcast_addrs_t	**mprev;
5128275SEric Cheng 
5138275SEric Cheng 	ASSERT(MAC_PERIM_HELD((mac_handle_t)mip));
5148275SEric Cheng 
5158275SEric Cheng 	/* find the broadcast group. The list is protected by the perimeter */
5168275SEric Cheng 	prev = &mip->mi_bcast_grp;
5178275SEric Cheng 	for (grp = mip->mi_bcast_grp; grp != NULL; prev = &grp->mbg_next,
5188275SEric Cheng 	    grp = grp->mbg_next) {
5198275SEric Cheng 		if (bcmp(grp->mbg_addr, addr, addr_len) == 0 &&
5208275SEric Cheng 		    grp->mbg_vid == vid)
5218275SEric Cheng 			break;
5228275SEric Cheng 	}
5238275SEric Cheng 	ASSERT(grp != NULL);
5248275SEric Cheng 
5258275SEric Cheng 	/*
5268275SEric Cheng 	 * Remove the MAC client from the list of MAC clients associated
5278275SEric Cheng 	 * with that broadcast group.
5288275SEric Cheng 	 *
5298275SEric Cheng 	 * We mark the mbg_clients[] location corresponding to the removed MAC
5308275SEric Cheng 	 * client NULL and reuse that location when we add a new MAC client.
5318275SEric Cheng 	 */
5328275SEric Cheng 
5338275SEric Cheng 	rw_enter(&mip->mi_rw_lock, RW_WRITER);
5348275SEric Cheng 
5358275SEric Cheng 	for (i = 0; i < grp->mbg_nclients_alloc; i++) {
5368275SEric Cheng 		if (grp->mbg_clients[i].mgb_client == mcip)
5378275SEric Cheng 			break;
5388275SEric Cheng 	}
5398275SEric Cheng 
5408275SEric Cheng 	ASSERT(i < grp->mbg_nclients_alloc);
5418275SEric Cheng 	/*
5428275SEric Cheng 	 * If there are more references to this MAC client, then we let
5438275SEric Cheng 	 * it remain till it goes to 0.
5448275SEric Cheng 	 */
5458275SEric Cheng 	if (--grp->mbg_clients[i].mgb_client_ref > 0)
5468275SEric Cheng 		goto update_maddr;
5478275SEric Cheng 
5488275SEric Cheng 	grp->mbg_clients[i].mgb_client = NULL;
5498275SEric Cheng 	grp->mbg_clients[i].mgb_client_ref = 0;
5508275SEric Cheng 
5518275SEric Cheng 	/*
5528275SEric Cheng 	 * Since we're removing from the list of MAC clients using that group,
5538275SEric Cheng 	 * kick the generation count, which will allow mac_bcast_send()
5548275SEric Cheng 	 * to detect that condition.
5558275SEric Cheng 	 */
5568275SEric Cheng 	grp->mbg_clients_gen++;
5578275SEric Cheng 
5588275SEric Cheng 	if (--grp->mbg_nclients == 0) {
5598275SEric Cheng 		/*
5608275SEric Cheng 		 * The last MAC client of the group was just removed.
5618275SEric Cheng 		 * Unlink the current group from the list of groups
5628275SEric Cheng 		 * defined on top of the underlying NIC. The group
5638275SEric Cheng 		 * structure will stay around until the last reference
5648275SEric Cheng 		 * is dropped.
5658275SEric Cheng 		 */
5668275SEric Cheng 		*prev = grp->mbg_next;
5678275SEric Cheng 	}
5688275SEric Cheng update_maddr:
5698833SVenu.Iyer@Sun.COM 	rw_exit(&mip->mi_rw_lock);
5708833SVenu.Iyer@Sun.COM 
5718275SEric Cheng 	if (grp->mbg_addrtype == MAC_ADDRTYPE_MULTICAST) {
5728275SEric Cheng 		mprev = &mcip->mci_mcast_addrs;
5738275SEric Cheng 		for (maddr = mcip->mci_mcast_addrs; maddr != NULL;
5748275SEric Cheng 		    mprev = &maddr->mma_next, maddr = maddr->mma_next) {
5758275SEric Cheng 			if (bcmp(grp->mbg_addr, maddr->mma_addr,
5768275SEric Cheng 			    mip->mi_type->mt_addr_length) == 0)
5778275SEric Cheng 				break;
5788275SEric Cheng 		}
5798275SEric Cheng 		ASSERT(maddr != NULL);
5808275SEric Cheng 		if (--maddr->mma_ref == 0) {
5818275SEric Cheng 			*mprev = maddr->mma_next;
5828275SEric Cheng 			maddr->mma_next = NULL;
5838275SEric Cheng 			kmem_free(maddr, sizeof (mac_mcast_addrs_t));
5848275SEric Cheng 		}
5858275SEric Cheng 
5868275SEric Cheng 		mprev = &mip->mi_mcast_addrs;
5878275SEric Cheng 		for (maddr = mip->mi_mcast_addrs; maddr != NULL;
5888275SEric Cheng 		    mprev = &maddr->mma_next, maddr = maddr->mma_next) {
5898275SEric Cheng 			if (bcmp(grp->mbg_addr, maddr->mma_addr,
5908275SEric Cheng 			    mip->mi_type->mt_addr_length) == 0)
5918275SEric Cheng 				break;
5928275SEric Cheng 		}
5938275SEric Cheng 		ASSERT(maddr != NULL);
5948275SEric Cheng 		if (--maddr->mma_ref == 0) {
5958833SVenu.Iyer@Sun.COM 			(void) mip->mi_multicst(mip->mi_driver, B_FALSE, addr);
5968275SEric Cheng 			*mprev = maddr->mma_next;
5978275SEric Cheng 			maddr->mma_next = NULL;
5988275SEric Cheng 			kmem_free(maddr, sizeof (mac_mcast_addrs_t));
5998275SEric Cheng 		}
6008275SEric Cheng 	}
6018275SEric Cheng 
6028275SEric Cheng 	/*
6038275SEric Cheng 	 * If the group itself is being removed, remove the
6048275SEric Cheng 	 * corresponding flow from the underlying NIC.
6058275SEric Cheng 	 */
6068275SEric Cheng 	flent = grp->mbg_flow_ent;
6078275SEric Cheng 	if (grp->mbg_nclients == 0) {
6088275SEric Cheng 		mac_flow_remove(mip->mi_flow_tab, flent, B_FALSE);
6098275SEric Cheng 		mac_flow_wait(flent, FLOW_DRIVER_UPCALL);
6108275SEric Cheng 		FLOW_FINAL_REFRELE(flent);
6118275SEric Cheng 	}
6128275SEric Cheng }
6138275SEric Cheng 
6148275SEric Cheng /*
6158275SEric Cheng  * This will be called by a driver, such as aggr, when a port is added/removed
6168275SEric Cheng  * to add/remove the port to/from all the multcast addresses for that aggr.
6178275SEric Cheng  */
6188275SEric Cheng void
mac_bcast_refresh(mac_impl_t * mip,mac_multicst_t refresh_fn,void * arg,boolean_t add)6198275SEric Cheng mac_bcast_refresh(mac_impl_t *mip, mac_multicst_t refresh_fn, void *arg,
6208275SEric Cheng     boolean_t add)
6218275SEric Cheng {
6228275SEric Cheng 	mac_mcast_addrs_t *grp, *next;
6238275SEric Cheng 
6248275SEric Cheng 	ASSERT(refresh_fn != NULL);
6258275SEric Cheng 
6268275SEric Cheng 	ASSERT(MAC_PERIM_HELD((mac_handle_t)mip));
6278275SEric Cheng 
6288275SEric Cheng 	/*
6298275SEric Cheng 	 * Walk the multicast address list and call the refresh function for
6308275SEric Cheng 	 * each address.
6318275SEric Cheng 	 */
6328275SEric Cheng 
6338275SEric Cheng 	for (grp = mip->mi_mcast_addrs; grp != NULL; grp = next) {
6348275SEric Cheng 		/*
6358275SEric Cheng 		 * Save the next pointer just in case the refresh
6368275SEric Cheng 		 * function's action causes the group entry to be
6378275SEric Cheng 		 * freed.
6388275SEric Cheng 		 * We won't be adding to this list as part of the
6398275SEric Cheng 		 * refresh.
6408275SEric Cheng 		 */
6418275SEric Cheng 		next = grp->mma_next;
6428275SEric Cheng 		refresh_fn(arg, add, grp->mma_addr);
6438275SEric Cheng 	}
6448275SEric Cheng }
6458275SEric Cheng 
6468275SEric Cheng /*
6478275SEric Cheng  * Walk the MAC client's multicast address list and add/remove the addr/vid
6488275SEric Cheng  * ('arg' is 'flent') to all the addresses.
6498275SEric Cheng  */
6508275SEric Cheng void
mac_client_bcast_refresh(mac_client_impl_t * mcip,mac_multicst_t refresh_fn,void * arg,boolean_t add)6518275SEric Cheng mac_client_bcast_refresh(mac_client_impl_t *mcip, mac_multicst_t refresh_fn,
6528275SEric Cheng     void *arg, boolean_t add)
6538275SEric Cheng {
6548275SEric Cheng 	mac_mcast_addrs_t *grp, *next;
6558275SEric Cheng 	mac_impl_t		*mip = mcip->mci_mip;
6568275SEric Cheng 
6578275SEric Cheng 	ASSERT(refresh_fn != NULL);
6588275SEric Cheng 
6598275SEric Cheng 	ASSERT(MAC_PERIM_HELD((mac_handle_t)mip));
6608275SEric Cheng 	/*
6618275SEric Cheng 	 * Walk the multicast address list and call the refresh function for
6628275SEric Cheng 	 * each address.
6638275SEric Cheng 	 * Broadcast addresses are not added or removed through the multicast
6648275SEric Cheng 	 * entry points, so don't include them as part of the refresh.
6658275SEric Cheng 	 */
6668275SEric Cheng 	for (grp = mcip->mci_mcast_addrs; grp != NULL; grp = next) {
6678275SEric Cheng 		/*
6688275SEric Cheng 		 * Save the next pointer just in case the refresh
6698275SEric Cheng 		 * function's action causes the group entry to be
6708275SEric Cheng 		 * freed.
6718275SEric Cheng 		 * We won't be adding to this list as part of the
6728275SEric Cheng 		 * refresh.
6738275SEric Cheng 		 */
6748275SEric Cheng 		next = grp->mma_next;
6758275SEric Cheng 		refresh_fn(arg, add, grp->mma_addr);
6768275SEric Cheng 	}
6778275SEric Cheng }
678