xref: /onnv-gate/usr/src/uts/common/io/dls/dls_link.c (revision 2760:38f12e308f6d)
10Sstevel@tonic-gate /*
20Sstevel@tonic-gate  * CDDL HEADER START
30Sstevel@tonic-gate  *
40Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
51502Sericheng  * Common Development and Distribution License (the "License").
61502Sericheng  * You may not use this file except in compliance with the License.
70Sstevel@tonic-gate  *
80Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
100Sstevel@tonic-gate  * See the License for the specific language governing permissions
110Sstevel@tonic-gate  * and limitations under the License.
120Sstevel@tonic-gate  *
130Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
140Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
160Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
170Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
180Sstevel@tonic-gate  *
190Sstevel@tonic-gate  * CDDL HEADER END
200Sstevel@tonic-gate  */
210Sstevel@tonic-gate /*
221502Sericheng  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
230Sstevel@tonic-gate  * Use is subject to license terms.
240Sstevel@tonic-gate  */
250Sstevel@tonic-gate 
260Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
270Sstevel@tonic-gate 
280Sstevel@tonic-gate /*
290Sstevel@tonic-gate  * Data-Link Services Module
300Sstevel@tonic-gate  */
310Sstevel@tonic-gate 
320Sstevel@tonic-gate #include	<sys/types.h>
330Sstevel@tonic-gate #include	<sys/stream.h>
340Sstevel@tonic-gate #include	<sys/strsun.h>
350Sstevel@tonic-gate #include	<sys/strsubr.h>
360Sstevel@tonic-gate #include	<sys/sysmacros.h>
370Sstevel@tonic-gate #include	<sys/atomic.h>
38269Sericheng #include	<sys/modhash.h>
390Sstevel@tonic-gate #include	<sys/dlpi.h>
400Sstevel@tonic-gate #include	<sys/ethernet.h>
410Sstevel@tonic-gate #include	<sys/byteorder.h>
420Sstevel@tonic-gate #include	<sys/vlan.h>
430Sstevel@tonic-gate #include	<sys/mac.h>
440Sstevel@tonic-gate #include	<sys/sdt.h>
450Sstevel@tonic-gate 
460Sstevel@tonic-gate #include	<sys/dls.h>
470Sstevel@tonic-gate #include	<sys/dld_impl.h>
480Sstevel@tonic-gate #include	<sys/dls_impl.h>
490Sstevel@tonic-gate 
500Sstevel@tonic-gate static kmem_cache_t	*i_dls_link_cachep;
51269Sericheng static mod_hash_t	*i_dls_link_hash;
52269Sericheng static uint_t		i_dls_link_count;
53269Sericheng static krwlock_t	i_dls_link_lock;
540Sstevel@tonic-gate 
550Sstevel@tonic-gate #define		LINK_HASHSZ	67	/* prime */
560Sstevel@tonic-gate #define		IMPL_HASHSZ	67	/* prime */
570Sstevel@tonic-gate 
580Sstevel@tonic-gate /*
590Sstevel@tonic-gate  * Construct a hash key encompassing both DLSAP value and VLAN idenitifier.
600Sstevel@tonic-gate  */
610Sstevel@tonic-gate #define	MAKE_KEY(_sap, _vid)						\
62269Sericheng 	((mod_hash_key_t)(uintptr_t)					\
63269Sericheng 	(((_sap) << VLAN_ID_SIZE) | (_vid) & VLAN_ID_MASK))
640Sstevel@tonic-gate 
650Sstevel@tonic-gate /*
660Sstevel@tonic-gate  * Extract the DLSAP value from the hash key.
670Sstevel@tonic-gate  */
680Sstevel@tonic-gate #define	KEY_SAP(_key)							\
690Sstevel@tonic-gate 	(((uint32_t)(uintptr_t)(_key)) >> VLAN_ID_SIZE)
700Sstevel@tonic-gate 
712311Sseb #define	DLS_STRIP_PADDING(pktsize, p) {			\
722311Sseb 	if (pktsize != 0) {				\
732311Sseb 		ssize_t delta = pktsize - msgdsize(p);	\
742311Sseb 							\
752311Sseb 		if (delta < 0)				\
762311Sseb 			(void) adjmsg(p, delta);	\
772311Sseb 	}						\
782311Sseb }
792311Sseb 
800Sstevel@tonic-gate /*
810Sstevel@tonic-gate  * Private functions.
820Sstevel@tonic-gate  */
830Sstevel@tonic-gate 
840Sstevel@tonic-gate /*ARGSUSED*/
850Sstevel@tonic-gate static int
860Sstevel@tonic-gate i_dls_link_constructor(void *buf, void *arg, int kmflag)
870Sstevel@tonic-gate {
880Sstevel@tonic-gate 	dls_link_t	*dlp = buf;
890Sstevel@tonic-gate 	char		name[MAXNAMELEN];
900Sstevel@tonic-gate 
910Sstevel@tonic-gate 	bzero(buf, sizeof (dls_link_t));
920Sstevel@tonic-gate 
93269Sericheng 	(void) sprintf(name, "dls_link_t_%p_hash", buf);
94269Sericheng 	dlp->dl_impl_hash = mod_hash_create_idhash(name, IMPL_HASHSZ,
95269Sericheng 	    mod_hash_null_valdtor);
960Sstevel@tonic-gate 
970Sstevel@tonic-gate 	mutex_init(&dlp->dl_lock, NULL, MUTEX_DEFAULT, NULL);
9856Smeem 	mutex_init(&dlp->dl_promisc_lock, NULL, MUTEX_DEFAULT, NULL);
99269Sericheng 	rw_init(&dlp->dl_impl_lock, NULL, RW_DEFAULT, NULL);
1000Sstevel@tonic-gate 	return (0);
1010Sstevel@tonic-gate }
1020Sstevel@tonic-gate 
1030Sstevel@tonic-gate /*ARGSUSED*/
1040Sstevel@tonic-gate static void
1050Sstevel@tonic-gate i_dls_link_destructor(void *buf, void *arg)
1060Sstevel@tonic-gate {
1070Sstevel@tonic-gate 	dls_link_t	*dlp = buf;
1080Sstevel@tonic-gate 
1090Sstevel@tonic-gate 	ASSERT(dlp->dl_ref == 0);
1100Sstevel@tonic-gate 	ASSERT(dlp->dl_mh == NULL);
1110Sstevel@tonic-gate 	ASSERT(dlp->dl_unknowns == 0);
1120Sstevel@tonic-gate 
113269Sericheng 	mod_hash_destroy_idhash(dlp->dl_impl_hash);
114269Sericheng 	dlp->dl_impl_hash = NULL;
1150Sstevel@tonic-gate 
1160Sstevel@tonic-gate 	mutex_destroy(&dlp->dl_lock);
11756Smeem 	mutex_destroy(&dlp->dl_promisc_lock);
118269Sericheng 	rw_destroy(&dlp->dl_impl_lock);
1190Sstevel@tonic-gate }
1200Sstevel@tonic-gate 
1212311Sseb /*
122*2760Sdg199075  * - Parse the mac header information of the given packet.
123*2760Sdg199075  * - Strip the padding and skip over the header. Note that because some
124*2760Sdg199075  *   DLS consumers only check the db_ref count of the first mblk, we
125*2760Sdg199075  *   pullup the message into a single mblk. The dls_link_header_info()
126*2760Sdg199075  *   function ensures that the size of the pulled message is greater
127*2760Sdg199075  *   than the MAC header size.
128*2760Sdg199075  *
129*2760Sdg199075  * We choose to use a macro for performance reasons.
130*2760Sdg199075  */
131*2760Sdg199075 #define	DLS_PREPARE_PKT(dlp, mp, mhip, err) {				\
132*2760Sdg199075 	mblk_t *nextp = (mp)->b_next;					\
133*2760Sdg199075 	if (((err) = dls_link_header_info((dlp), (mp), (mhip))) == 0) {	\
134*2760Sdg199075 		DLS_STRIP_PADDING((mhip)->mhi_pktsize, (mp));		\
135*2760Sdg199075 		if (MBLKL((mp)) < (mhip)->mhi_hdrsize) {		\
136*2760Sdg199075 			mblk_t *newmp;					\
137*2760Sdg199075 			if ((newmp = msgpullup((mp), -1)) == NULL) {	\
138*2760Sdg199075 				(err) = EINVAL;				\
139*2760Sdg199075 			} else {					\
140*2760Sdg199075 				freemsg((mp));				\
141*2760Sdg199075 				(mp) = newmp;				\
142*2760Sdg199075 				(mp)->b_next = nextp;			\
143*2760Sdg199075 				(mp)->b_rptr += (mhip)->mhi_hdrsize;	\
144*2760Sdg199075 			}						\
145*2760Sdg199075 		} else {						\
146*2760Sdg199075 			(mp)->b_rptr += (mhip)->mhi_hdrsize;		\
147*2760Sdg199075 		}							\
148*2760Sdg199075 	}								\
149*2760Sdg199075 }
150*2760Sdg199075 
151*2760Sdg199075 /*
1522311Sseb  * Truncate the chain starting at mp such that all packets in the chain
153*2760Sdg199075  * have identical source and destination addresses, saps, and tag types
154*2760Sdg199075  * (see below).  It returns a pointer to the mblk following the chain,
155*2760Sdg199075  * NULL if there is no further packet following the processed chain.
156*2760Sdg199075  * The countp argument is set to the number of valid packets in the chain.
157*2760Sdg199075  * Note that the whole MAC header (including the VLAN tag if any) in each
158*2760Sdg199075  * packet will be stripped.
1592311Sseb  */
1600Sstevel@tonic-gate static mblk_t *
161*2760Sdg199075 i_dls_link_subchain(dls_link_t *dlp, mblk_t *mp, const mac_header_info_t *mhip,
162*2760Sdg199075     uint_t *countp)
1630Sstevel@tonic-gate {
164*2760Sdg199075 	mblk_t		*prevp;
165*2760Sdg199075 	uint_t		npacket = 1;
1662311Sseb 	size_t		addr_size = dlp->dl_mip->mi_addr_length;
167*2760Sdg199075 	uint16_t	vid = VLAN_ID(mhip->mhi_tci);
168*2760Sdg199075 	uint16_t	pri = VLAN_PRI(mhip->mhi_tci);
1690Sstevel@tonic-gate 
1700Sstevel@tonic-gate 	/*
1710Sstevel@tonic-gate 	 * Compare with subsequent headers until we find one that has
1721502Sericheng 	 * differing header information. After checking each packet
1731502Sericheng 	 * strip padding and skip over the header.
1740Sstevel@tonic-gate 	 */
175*2760Sdg199075 	for (prevp = mp; (mp = mp->b_next) != NULL; prevp = mp) {
1762311Sseb 		mac_header_info_t cmhi;
177*2760Sdg199075 		uint16_t cvid, cpri;
178*2760Sdg199075 		int err;
1792311Sseb 
180*2760Sdg199075 		DLS_PREPARE_PKT(dlp, mp, &cmhi, err);
181*2760Sdg199075 		if (err != 0)
1820Sstevel@tonic-gate 			break;
1832311Sseb 
184*2760Sdg199075 		prevp->b_next = mp;
185*2760Sdg199075 
1862311Sseb 		/*
1872311Sseb 		 * The source, destination, sap, and vlan id must all match
1882311Sseb 		 * in a given subchain.
1892311Sseb 		 */
1902311Sseb 		if (memcmp(mhip->mhi_daddr, cmhi.mhi_daddr, addr_size) != 0 ||
1912311Sseb 		    memcmp(mhip->mhi_saddr, cmhi.mhi_saddr, addr_size) != 0 ||
1922311Sseb 		    mhip->mhi_bindsap != cmhi.mhi_bindsap) {
193*2760Sdg199075 			/*
194*2760Sdg199075 			 * Note that we don't need to restore the padding.
195*2760Sdg199075 			 */
196*2760Sdg199075 			mp->b_rptr -= cmhi.mhi_hdrsize;
1972311Sseb 			break;
1982311Sseb 		}
1992311Sseb 
200*2760Sdg199075 		cvid = VLAN_ID(cmhi.mhi_tci);
201*2760Sdg199075 		cpri = VLAN_PRI(cmhi.mhi_tci);
2022311Sseb 
203*2760Sdg199075 		/*
204*2760Sdg199075 		 * There are several types of packets. Packets don't match
205*2760Sdg199075 		 * if they are classified to different type or if they are
206*2760Sdg199075 		 * VLAN packets but belong to different VLANs:
207*2760Sdg199075 		 *
208*2760Sdg199075 		 * packet type		tagged		vid		pri
209*2760Sdg199075 		 * ---------------------------------------------------------
210*2760Sdg199075 		 * untagged		No		zero		zero
211*2760Sdg199075 		 * VLAN packets		Yes		non-zero	-
212*2760Sdg199075 		 * priority tagged	Yes		zero		non-zero
213*2760Sdg199075 		 * 0 tagged		Yes		zero		zero
214*2760Sdg199075 		 */
215*2760Sdg199075 		if ((mhip->mhi_istagged != cmhi.mhi_istagged) ||
216*2760Sdg199075 		    (vid != cvid) || ((vid == VLAN_ID_NONE) &&
217*2760Sdg199075 		    (((pri == 0) && (cpri != 0)) ||
218*2760Sdg199075 		    ((pri != 0) && (cpri == 0))))) {
219*2760Sdg199075 			mp->b_rptr -= cmhi.mhi_hdrsize;
220*2760Sdg199075 			break;
221*2760Sdg199075 		}
222*2760Sdg199075 
2230Sstevel@tonic-gate 		npacket++;
2240Sstevel@tonic-gate 	}
2250Sstevel@tonic-gate 
2260Sstevel@tonic-gate 	/*
2270Sstevel@tonic-gate 	 * Break the chain at this point and return a pointer to the next
2280Sstevel@tonic-gate 	 * sub-chain.
2290Sstevel@tonic-gate 	 */
230*2760Sdg199075 	prevp->b_next = NULL;
2310Sstevel@tonic-gate 	*countp = npacket;
232*2760Sdg199075 	return (mp);
2330Sstevel@tonic-gate }
2340Sstevel@tonic-gate 
2350Sstevel@tonic-gate static void
236269Sericheng i_dls_head_hold(dls_head_t *dhp)
237269Sericheng {
238269Sericheng 	atomic_inc_32(&dhp->dh_ref);
239269Sericheng }
240269Sericheng 
241269Sericheng static void
242269Sericheng i_dls_head_rele(dls_head_t *dhp)
243269Sericheng {
244269Sericheng 	atomic_dec_32(&dhp->dh_ref);
245269Sericheng }
246269Sericheng 
247269Sericheng static dls_head_t *
248269Sericheng i_dls_head_alloc(mod_hash_key_t key)
249269Sericheng {
250269Sericheng 	dls_head_t	*dhp;
251269Sericheng 
252269Sericheng 	dhp = kmem_zalloc(sizeof (dls_head_t), KM_SLEEP);
253269Sericheng 	dhp->dh_key = key;
254269Sericheng 	return (dhp);
255269Sericheng }
256269Sericheng 
257269Sericheng static void
258269Sericheng i_dls_head_free(dls_head_t *dhp)
259269Sericheng {
260269Sericheng 	ASSERT(dhp->dh_ref == 0);
261269Sericheng 	kmem_free(dhp, sizeof (dls_head_t));
262269Sericheng }
263269Sericheng 
264*2760Sdg199075 /*
265*2760Sdg199075  * Try to send mp up to the streams of the given sap and vid. Return B_TRUE
266*2760Sdg199075  * if this message is sent to any streams.
267*2760Sdg199075  * Note that this function will copy the message chain and the original
268*2760Sdg199075  * mp will remain valid after this function
269*2760Sdg199075  */
270*2760Sdg199075 static uint_t
271*2760Sdg199075 i_dls_link_rx_func(dls_link_t *dlp, mac_resource_handle_t mrh,
272*2760Sdg199075     mac_header_info_t *mhip, mblk_t *mp, uint32_t sap, uint16_t vid,
273*2760Sdg199075     boolean_t (*acceptfunc)())
274*2760Sdg199075 {
275*2760Sdg199075 	mod_hash_t	*hash = dlp->dl_impl_hash;
276*2760Sdg199075 	mod_hash_key_t	key;
277*2760Sdg199075 	dls_head_t	*dhp;
278*2760Sdg199075 	dls_impl_t	*dip;
279*2760Sdg199075 	mblk_t		*nmp;
280*2760Sdg199075 	dls_rx_t	di_rx;
281*2760Sdg199075 	void		*di_rx_arg;
282*2760Sdg199075 	uint_t		naccepted = 0;
283*2760Sdg199075 
284*2760Sdg199075 	/*
285*2760Sdg199075 	 * Construct a hash key from the VLAN identifier and the
286*2760Sdg199075 	 * DLSAP that represents dls_impl_t in promiscuous mode.
287*2760Sdg199075 	 */
288*2760Sdg199075 	key = MAKE_KEY(sap, vid);
289*2760Sdg199075 
290*2760Sdg199075 	/*
291*2760Sdg199075 	 * Search the hash table for dls_impl_t eligible to receive
292*2760Sdg199075 	 * a packet chain for this DLSAP/VLAN combination.
293*2760Sdg199075 	 */
294*2760Sdg199075 	rw_enter(&dlp->dl_impl_lock, RW_READER);
295*2760Sdg199075 	if (mod_hash_find(hash, key, (mod_hash_val_t *)&dhp) != 0) {
296*2760Sdg199075 		rw_exit(&dlp->dl_impl_lock);
297*2760Sdg199075 		return (B_FALSE);
298*2760Sdg199075 	}
299*2760Sdg199075 	i_dls_head_hold(dhp);
300*2760Sdg199075 	rw_exit(&dlp->dl_impl_lock);
301*2760Sdg199075 
302*2760Sdg199075 	/*
303*2760Sdg199075 	 * Find dls_impl_t that will accept the sub-chain.
304*2760Sdg199075 	 */
305*2760Sdg199075 	for (dip = dhp->dh_list; dip != NULL; dip = dip->di_nextp) {
306*2760Sdg199075 		if (!acceptfunc(dip, mhip, &di_rx, &di_rx_arg))
307*2760Sdg199075 			continue;
308*2760Sdg199075 
309*2760Sdg199075 		/*
310*2760Sdg199075 		 * We have at least one acceptor.
311*2760Sdg199075 		 */
312*2760Sdg199075 		naccepted ++;
313*2760Sdg199075 
314*2760Sdg199075 		/*
315*2760Sdg199075 		 * There will normally be at least more dls_impl_t
316*2760Sdg199075 		 * (since we've yet to check for non-promiscuous
317*2760Sdg199075 		 * dls_impl_t) so dup the sub-chain.
318*2760Sdg199075 		 */
319*2760Sdg199075 		if ((nmp = copymsgchain(mp)) != NULL)
320*2760Sdg199075 			di_rx(di_rx_arg, mrh, nmp, mhip);
321*2760Sdg199075 	}
322*2760Sdg199075 
323*2760Sdg199075 	/*
324*2760Sdg199075 	 * Release the hold on the dls_impl_t chain now that we have
325*2760Sdg199075 	 * finished walking it.
326*2760Sdg199075 	 */
327*2760Sdg199075 	i_dls_head_rele(dhp);
328*2760Sdg199075 	return (naccepted);
329*2760Sdg199075 }
330*2760Sdg199075 
331269Sericheng static void
3322311Sseb i_dls_link_rx(void *arg, mac_resource_handle_t mrh, mblk_t *mp)
3330Sstevel@tonic-gate {
3340Sstevel@tonic-gate 	dls_link_t			*dlp = arg;
335269Sericheng 	mod_hash_t			*hash = dlp->dl_impl_hash;
3360Sstevel@tonic-gate 	mblk_t				*nextp;
3372311Sseb 	mac_header_info_t		mhi;
338269Sericheng 	dls_head_t			*dhp;
3390Sstevel@tonic-gate 	dls_impl_t			*dip;
3400Sstevel@tonic-gate 	dls_impl_t			*ndip;
3410Sstevel@tonic-gate 	mblk_t				*nmp;
342269Sericheng 	mod_hash_key_t			key;
3430Sstevel@tonic-gate 	uint_t				npacket;
3440Sstevel@tonic-gate 	boolean_t			accepted;
345449Sericheng 	dls_rx_t			di_rx, ndi_rx;
346449Sericheng 	void				*di_rx_arg, *ndi_rx_arg;
347*2760Sdg199075 	uint16_t			vid;
348*2760Sdg199075 	int				err;
3490Sstevel@tonic-gate 
3500Sstevel@tonic-gate 	/*
3510Sstevel@tonic-gate 	 * Walk the packet chain.
3520Sstevel@tonic-gate 	 */
353*2760Sdg199075 	for (; mp != NULL; mp = nextp) {
3540Sstevel@tonic-gate 		/*
3550Sstevel@tonic-gate 		 * Wipe the accepted state.
3560Sstevel@tonic-gate 		 */
3570Sstevel@tonic-gate 		accepted = B_FALSE;
3580Sstevel@tonic-gate 
359*2760Sdg199075 		DLS_PREPARE_PKT(dlp, mp, &mhi, err);
360*2760Sdg199075 		if (err != 0) {
361*2760Sdg199075 			atomic_add_32(&(dlp->dl_unknowns), 1);
362*2760Sdg199075 			nextp = mp->b_next;
363*2760Sdg199075 			freemsg(mp);
364*2760Sdg199075 			continue;
365*2760Sdg199075 		}
366*2760Sdg199075 
3670Sstevel@tonic-gate 		/*
3680Sstevel@tonic-gate 		 * Grab the longest sub-chain we can process as a single
3690Sstevel@tonic-gate 		 * unit.
3700Sstevel@tonic-gate 		 */
371*2760Sdg199075 		nextp = i_dls_link_subchain(dlp, mp, &mhi, &npacket);
372*2760Sdg199075 		ASSERT(npacket != 0);
3730Sstevel@tonic-gate 
374*2760Sdg199075 		vid = VLAN_ID(mhi.mhi_tci);
375*2760Sdg199075 
376*2760Sdg199075 		if (mhi.mhi_istagged) {
3772311Sseb 			/*
378*2760Sdg199075 			 * If it is tagged traffic, send it upstream to
379*2760Sdg199075 			 * all dls_impl_t which are attached to the physical
380*2760Sdg199075 			 * link and bound to SAP 0x8100.
3812311Sseb 			 */
382*2760Sdg199075 			if (i_dls_link_rx_func(dlp, mrh, &mhi, mp,
383*2760Sdg199075 			    ETHERTYPE_VLAN, VLAN_ID_NONE, dls_accept) > 0) {
384*2760Sdg199075 				accepted = B_TRUE;
385*2760Sdg199075 			}
386*2760Sdg199075 
387*2760Sdg199075 			/*
388*2760Sdg199075 			 * Don't pass the packets up if they are tagged
389*2760Sdg199075 			 * packets and:
390*2760Sdg199075 			 *  - their VID and priority are both zero (invalid
391*2760Sdg199075 			 *    packets).
392*2760Sdg199075 			 *  - their sap is ETHERTYPE_VLAN and their VID is
393*2760Sdg199075 			 *    zero as they have already been sent upstreams.
394*2760Sdg199075 			 */
395*2760Sdg199075 			if ((vid == VLAN_ID_NONE &&
396*2760Sdg199075 			    VLAN_PRI(mhi.mhi_tci) == 0) ||
397*2760Sdg199075 			    (mhi.mhi_bindsap == ETHERTYPE_VLAN &&
398*2760Sdg199075 			    vid == VLAN_ID_NONE)) {
399*2760Sdg199075 				freemsgchain(mp);
400*2760Sdg199075 				goto loop;
401*2760Sdg199075 			}
4022311Sseb 		}
4030Sstevel@tonic-gate 
4040Sstevel@tonic-gate 		/*
4050Sstevel@tonic-gate 		 * Construct a hash key from the VLAN identifier and the
4060Sstevel@tonic-gate 		 * DLSAP.
4070Sstevel@tonic-gate 		 */
4082311Sseb 		key = MAKE_KEY(mhi.mhi_bindsap, vid);
4090Sstevel@tonic-gate 
4100Sstevel@tonic-gate 		/*
4110Sstevel@tonic-gate 		 * Search the has table for dls_impl_t eligible to receive
4120Sstevel@tonic-gate 		 * a packet chain for this DLSAP/VLAN combination.
4130Sstevel@tonic-gate 		 */
414269Sericheng 		rw_enter(&dlp->dl_impl_lock, RW_READER);
415269Sericheng 		if (mod_hash_find(hash, key, (mod_hash_val_t *)&dhp) != 0) {
416269Sericheng 			rw_exit(&dlp->dl_impl_lock);
4170Sstevel@tonic-gate 			freemsgchain(mp);
4180Sstevel@tonic-gate 			goto loop;
4190Sstevel@tonic-gate 		}
420269Sericheng 		i_dls_head_hold(dhp);
421269Sericheng 		rw_exit(&dlp->dl_impl_lock);
4220Sstevel@tonic-gate 
4230Sstevel@tonic-gate 		/*
4240Sstevel@tonic-gate 		 * Find the first dls_impl_t that will accept the sub-chain.
4250Sstevel@tonic-gate 		 */
426269Sericheng 		for (dip = dhp->dh_list; dip != NULL; dip = dip->di_nextp)
4272311Sseb 			if (dls_accept(dip, &mhi, &di_rx, &di_rx_arg))
4280Sstevel@tonic-gate 				break;
4290Sstevel@tonic-gate 
4300Sstevel@tonic-gate 		/*
4310Sstevel@tonic-gate 		 * If we did not find any dls_impl_t willing to accept the
4320Sstevel@tonic-gate 		 * sub-chain then throw it away.
4330Sstevel@tonic-gate 		 */
4340Sstevel@tonic-gate 		if (dip == NULL) {
435269Sericheng 			i_dls_head_rele(dhp);
4360Sstevel@tonic-gate 			freemsgchain(mp);
4370Sstevel@tonic-gate 			goto loop;
4380Sstevel@tonic-gate 		}
4390Sstevel@tonic-gate 
4400Sstevel@tonic-gate 		/*
4410Sstevel@tonic-gate 		 * We have at least one acceptor.
4420Sstevel@tonic-gate 		 */
4430Sstevel@tonic-gate 		accepted = B_TRUE;
4440Sstevel@tonic-gate 		for (;;) {
4450Sstevel@tonic-gate 			/*
4460Sstevel@tonic-gate 			 * Find the next dls_impl_t that will accept the
4470Sstevel@tonic-gate 			 * sub-chain.
4480Sstevel@tonic-gate 			 */
4490Sstevel@tonic-gate 			for (ndip = dip->di_nextp; ndip != NULL;
4500Sstevel@tonic-gate 			    ndip = ndip->di_nextp)
4512311Sseb 				if (dls_accept(ndip, &mhi, &ndi_rx,
452449Sericheng 				    &ndi_rx_arg))
4530Sstevel@tonic-gate 					break;
4540Sstevel@tonic-gate 
4550Sstevel@tonic-gate 			/*
4560Sstevel@tonic-gate 			 * If there are no more dls_impl_t that are willing
4570Sstevel@tonic-gate 			 * to accept the sub-chain then we don't need to dup
4580Sstevel@tonic-gate 			 * it before handing it to the current one.
4590Sstevel@tonic-gate 			 */
4600Sstevel@tonic-gate 			if (ndip == NULL) {
461*2760Sdg199075 				di_rx(di_rx_arg, mrh, mp, &mhi);
4620Sstevel@tonic-gate 
4630Sstevel@tonic-gate 				/*
4640Sstevel@tonic-gate 				 * Since there are no more dls_impl_t, we're
4650Sstevel@tonic-gate 				 * done.
4660Sstevel@tonic-gate 				 */
4670Sstevel@tonic-gate 				break;
4680Sstevel@tonic-gate 			}
4690Sstevel@tonic-gate 
4700Sstevel@tonic-gate 			/*
4710Sstevel@tonic-gate 			 * There are more dls_impl_t so dup the sub-chain.
4720Sstevel@tonic-gate 			 */
4730Sstevel@tonic-gate 			if ((nmp = copymsgchain(mp)) != NULL)
474*2760Sdg199075 				di_rx(di_rx_arg, mrh, nmp, &mhi);
4750Sstevel@tonic-gate 
4760Sstevel@tonic-gate 			dip = ndip;
477449Sericheng 			di_rx = ndi_rx;
478449Sericheng 			di_rx_arg = ndi_rx_arg;
4790Sstevel@tonic-gate 		}
4800Sstevel@tonic-gate 
4810Sstevel@tonic-gate 		/*
4820Sstevel@tonic-gate 		 * Release the hold on the dls_impl_t chain now that we have
4830Sstevel@tonic-gate 		 * finished walking it.
4840Sstevel@tonic-gate 		 */
485269Sericheng 		i_dls_head_rele(dhp);
4860Sstevel@tonic-gate 
4870Sstevel@tonic-gate loop:
4880Sstevel@tonic-gate 		/*
4890Sstevel@tonic-gate 		 * If there were no acceptors then add the packet count to the
4900Sstevel@tonic-gate 		 * 'unknown' count.
4910Sstevel@tonic-gate 		 */
4920Sstevel@tonic-gate 		if (!accepted)
4930Sstevel@tonic-gate 			atomic_add_32(&(dlp->dl_unknowns), npacket);
4940Sstevel@tonic-gate 	}
4950Sstevel@tonic-gate }
4960Sstevel@tonic-gate 
497*2760Sdg199075 /*
498*2760Sdg199075  * Try to send mp up to the DLS_SAP_PROMISC listeners. Return B_TRUE if this
499*2760Sdg199075  * message is sent to any streams.
500*2760Sdg199075  */
501*2760Sdg199075 static uint_t
502*2760Sdg199075 i_dls_link_rx_common_promisc(dls_link_t *dlp, mac_resource_handle_t mrh,
503*2760Sdg199075     mac_header_info_t *mhip, mblk_t *mp, uint16_t vid,
504*2760Sdg199075     boolean_t (*acceptfunc)())
505*2760Sdg199075 {
506*2760Sdg199075 	uint_t naccepted;
507*2760Sdg199075 
508*2760Sdg199075 	naccepted = i_dls_link_rx_func(dlp, mrh, mhip, mp, DLS_SAP_PROMISC,
509*2760Sdg199075 	    vid, acceptfunc);
510*2760Sdg199075 
511*2760Sdg199075 	if (vid != VLAN_ID_NONE) {
512*2760Sdg199075 		naccepted += i_dls_link_rx_func(dlp, mrh, mhip, mp,
513*2760Sdg199075 		    DLS_SAP_PROMISC, VLAN_ID_NONE, acceptfunc);
514*2760Sdg199075 	}
515*2760Sdg199075 	return (naccepted);
516*2760Sdg199075 }
517*2760Sdg199075 
5180Sstevel@tonic-gate static void
519*2760Sdg199075 i_dls_link_rx_common(void *arg, mac_resource_handle_t mrh, mblk_t *mp,
520*2760Sdg199075     boolean_t (*acceptfunc)())
5210Sstevel@tonic-gate {
5220Sstevel@tonic-gate 	dls_link_t			*dlp = arg;
523269Sericheng 	mod_hash_t			*hash = dlp->dl_impl_hash;
5240Sstevel@tonic-gate 	mblk_t				*nextp;
5252311Sseb 	mac_header_info_t		mhi;
526*2760Sdg199075 	uint16_t			vid, vidkey, pri;
527269Sericheng 	dls_head_t			*dhp;
5280Sstevel@tonic-gate 	dls_impl_t			*dip;
5290Sstevel@tonic-gate 	mblk_t				*nmp;
530269Sericheng 	mod_hash_key_t			key;
5310Sstevel@tonic-gate 	uint_t				npacket;
532*2760Sdg199075 	uint32_t			sap;
5330Sstevel@tonic-gate 	boolean_t			accepted;
534*2760Sdg199075 	dls_rx_t			di_rx, fdi_rx;
535*2760Sdg199075 	void				*di_rx_arg, *fdi_rx_arg;
536*2760Sdg199075 	boolean_t			pass2;
537*2760Sdg199075 	int				err;
5380Sstevel@tonic-gate 
5390Sstevel@tonic-gate 	/*
5400Sstevel@tonic-gate 	 * Walk the packet chain.
5410Sstevel@tonic-gate 	 */
542*2760Sdg199075 	for (; mp != NULL; mp = nextp) {
5430Sstevel@tonic-gate 		/*
544*2760Sdg199075 		 * Wipe the accepted state and the receive information of
545*2760Sdg199075 		 * the first eligible dls_impl_t.
5460Sstevel@tonic-gate 		 */
5470Sstevel@tonic-gate 		accepted = B_FALSE;
548*2760Sdg199075 		pass2 = B_FALSE;
549*2760Sdg199075 		fdi_rx = NULL;
550*2760Sdg199075 		fdi_rx_arg = NULL;
551*2760Sdg199075 
552*2760Sdg199075 		DLS_PREPARE_PKT(dlp, mp, &mhi, err);
553*2760Sdg199075 		if (err != 0) {
554*2760Sdg199075 			if (acceptfunc == dls_accept)
555*2760Sdg199075 				atomic_add_32(&(dlp->dl_unknowns), 1);
556*2760Sdg199075 			nextp = mp->b_next;
557*2760Sdg199075 			freemsg(mp);
558*2760Sdg199075 			continue;
559*2760Sdg199075 		}
5600Sstevel@tonic-gate 
5610Sstevel@tonic-gate 		/*
5620Sstevel@tonic-gate 		 * Grab the longest sub-chain we can process as a single
5630Sstevel@tonic-gate 		 * unit.
5640Sstevel@tonic-gate 		 */
565*2760Sdg199075 		nextp = i_dls_link_subchain(dlp, mp, &mhi, &npacket);
566*2760Sdg199075 		ASSERT(npacket != 0);
567*2760Sdg199075 
568*2760Sdg199075 		vid = VLAN_ID(mhi.mhi_tci);
569*2760Sdg199075 		pri = VLAN_PRI(mhi.mhi_tci);
570*2760Sdg199075 
571*2760Sdg199075 		vidkey = vid;
5722311Sseb 
573*2760Sdg199075 		/*
574*2760Sdg199075 		 * Note that we need to first send to the dls_impl_t
575*2760Sdg199075 		 * in promiscuous mode in order to avoid the packet reordering
576*2760Sdg199075 		 * when snooping.
577*2760Sdg199075 		 */
578*2760Sdg199075 		if (i_dls_link_rx_common_promisc(dlp, mrh, &mhi, mp, vidkey,
579*2760Sdg199075 		    acceptfunc) > 0) {
580*2760Sdg199075 			accepted = B_TRUE;
5812311Sseb 		}
5820Sstevel@tonic-gate 
5830Sstevel@tonic-gate 		/*
584*2760Sdg199075 		 * Non promisc case. Two passes:
585*2760Sdg199075 		 *   1. send tagged packets to ETHERTYPE_VLAN listeners
586*2760Sdg199075 		 *   2. send packets to listeners bound to the specific SAP.
5870Sstevel@tonic-gate 		 */
588*2760Sdg199075 		if (mhi.mhi_istagged) {
589*2760Sdg199075 			vidkey = VLAN_ID_NONE;
590*2760Sdg199075 			sap = ETHERTYPE_VLAN;
591*2760Sdg199075 		} else {
592*2760Sdg199075 			goto non_promisc_loop;
5930Sstevel@tonic-gate 		}
5940Sstevel@tonic-gate non_promisc:
5950Sstevel@tonic-gate 		/*
5960Sstevel@tonic-gate 		 * Construct a hash key from the VLAN identifier and the
5970Sstevel@tonic-gate 		 * DLSAP.
5980Sstevel@tonic-gate 		 */
599*2760Sdg199075 		key = MAKE_KEY(sap, vidkey);
6000Sstevel@tonic-gate 
6010Sstevel@tonic-gate 		/*
6020Sstevel@tonic-gate 		 * Search the has table for dls_impl_t eligible to receive
6030Sstevel@tonic-gate 		 * a packet chain for this DLSAP/VLAN combination.
6040Sstevel@tonic-gate 		 */
605269Sericheng 		rw_enter(&dlp->dl_impl_lock, RW_READER);
606269Sericheng 		if (mod_hash_find(hash, key, (mod_hash_val_t *)&dhp) != 0) {
607269Sericheng 			rw_exit(&dlp->dl_impl_lock);
608*2760Sdg199075 			goto non_promisc_loop;
6090Sstevel@tonic-gate 		}
610269Sericheng 		i_dls_head_hold(dhp);
611269Sericheng 		rw_exit(&dlp->dl_impl_lock);
6120Sstevel@tonic-gate 
6130Sstevel@tonic-gate 		/*
6140Sstevel@tonic-gate 		 * Find the first dls_impl_t that will accept the sub-chain.
6150Sstevel@tonic-gate 		 */
616*2760Sdg199075 		for (dip = dhp->dh_list; dip != NULL; dip = dip->di_nextp) {
617*2760Sdg199075 			if (!acceptfunc(dip, &mhi, &di_rx, &di_rx_arg))
618*2760Sdg199075 				continue;
6190Sstevel@tonic-gate 
620*2760Sdg199075 			accepted = B_TRUE;
6210Sstevel@tonic-gate 
6220Sstevel@tonic-gate 			/*
623*2760Sdg199075 			 * To avoid the extra copymsgchain(), if this
624*2760Sdg199075 			 * is the first eligible dls_impl_t, remember required
625*2760Sdg199075 			 * information and send up the message afterwards.
6260Sstevel@tonic-gate 			 */
627*2760Sdg199075 			if (fdi_rx == NULL) {
628*2760Sdg199075 				fdi_rx = di_rx;
629*2760Sdg199075 				fdi_rx_arg = di_rx_arg;
630*2760Sdg199075 				continue;
6310Sstevel@tonic-gate 			}
6320Sstevel@tonic-gate 
6330Sstevel@tonic-gate 			if ((nmp = copymsgchain(mp)) != NULL)
634*2760Sdg199075 				di_rx(di_rx_arg, mrh, nmp, &mhi);
6350Sstevel@tonic-gate 		}
6360Sstevel@tonic-gate 
6370Sstevel@tonic-gate 		/*
6380Sstevel@tonic-gate 		 * Release the hold on the dls_impl_t chain now that we have
6390Sstevel@tonic-gate 		 * finished walking it.
6400Sstevel@tonic-gate 		 */
641269Sericheng 		i_dls_head_rele(dhp);
6420Sstevel@tonic-gate 
643*2760Sdg199075 non_promisc_loop:
644*2760Sdg199075 		/*
645*2760Sdg199075 		 * Don't pass the packets up again if:
646*2760Sdg199075 		 * - First pass is done and the packets are tagged and their:
647*2760Sdg199075 		 *	- VID and priority are both zero (invalid packets).
648*2760Sdg199075 		 *	- their sap is ETHERTYPE_VLAN and their VID is zero
649*2760Sdg199075 		 *	  (they have already been sent upstreams).
650*2760Sdg199075 		 *  - Second pass is done:
651*2760Sdg199075 		 */
652*2760Sdg199075 		if (pass2 || (mhi.mhi_istagged &&
653*2760Sdg199075 		    ((vid == VLAN_ID_NONE && pri == 0) ||
654*2760Sdg199075 		    (mhi.mhi_bindsap == ETHERTYPE_VLAN &&
655*2760Sdg199075 		    vid == VLAN_ID_NONE)))) {
656*2760Sdg199075 			/*
657*2760Sdg199075 			 * Send the message up to the first eligible dls_impl_t.
658*2760Sdg199075 			 */
659*2760Sdg199075 			if (fdi_rx != NULL)
660*2760Sdg199075 				fdi_rx(fdi_rx_arg, mrh, mp, &mhi);
661*2760Sdg199075 			else
662*2760Sdg199075 				freemsgchain(mp);
663*2760Sdg199075 		} else {
664*2760Sdg199075 			vidkey = vid;
665*2760Sdg199075 			sap = mhi.mhi_bindsap;
666*2760Sdg199075 			pass2 = B_TRUE;
667*2760Sdg199075 			goto non_promisc;
668*2760Sdg199075 		}
669*2760Sdg199075 
6700Sstevel@tonic-gate 		/*
6710Sstevel@tonic-gate 		 * If there were no acceptors then add the packet count to the
6720Sstevel@tonic-gate 		 * 'unknown' count.
6730Sstevel@tonic-gate 		 */
674*2760Sdg199075 		if (!accepted && (acceptfunc == dls_accept))
6750Sstevel@tonic-gate 			atomic_add_32(&(dlp->dl_unknowns), npacket);
676*2760Sdg199075 	}
677*2760Sdg199075 }
6780Sstevel@tonic-gate 
679*2760Sdg199075 static void
680*2760Sdg199075 i_dls_link_rx_promisc(void *arg, mac_resource_handle_t mrh, mblk_t *mp)
681*2760Sdg199075 {
682*2760Sdg199075 	i_dls_link_rx_common(arg, mrh, mp, dls_accept);
6830Sstevel@tonic-gate }
6840Sstevel@tonic-gate 
6850Sstevel@tonic-gate static void
6862311Sseb i_dls_link_txloop(void *arg, mblk_t *mp)
6870Sstevel@tonic-gate {
688*2760Sdg199075 	i_dls_link_rx_common(arg, NULL, mp, dls_accept_loopback);
6890Sstevel@tonic-gate }
6900Sstevel@tonic-gate 
691269Sericheng /*ARGSUSED*/
692269Sericheng static uint_t
693269Sericheng i_dls_link_walk(mod_hash_key_t key, mod_hash_val_t *val, void *arg)
6940Sstevel@tonic-gate {
6950Sstevel@tonic-gate 	boolean_t	*promiscp = arg;
6960Sstevel@tonic-gate 	uint32_t	sap = KEY_SAP(key);
6970Sstevel@tonic-gate 
6980Sstevel@tonic-gate 	if (sap == DLS_SAP_PROMISC) {
6990Sstevel@tonic-gate 		*promiscp = B_TRUE;
700269Sericheng 		return (MH_WALK_TERMINATE);
7010Sstevel@tonic-gate 	}
7020Sstevel@tonic-gate 
703269Sericheng 	return (MH_WALK_CONTINUE);
7040Sstevel@tonic-gate }
7050Sstevel@tonic-gate 
7060Sstevel@tonic-gate static int
7072311Sseb i_dls_link_create(const char *name, uint_t ddi_instance, dls_link_t **dlpp)
7080Sstevel@tonic-gate {
7090Sstevel@tonic-gate 	dls_link_t		*dlp;
7100Sstevel@tonic-gate 
7110Sstevel@tonic-gate 	/*
7120Sstevel@tonic-gate 	 * Allocate a new dls_link_t structure.
7130Sstevel@tonic-gate 	 */
7140Sstevel@tonic-gate 	dlp = kmem_cache_alloc(i_dls_link_cachep, KM_SLEEP);
7150Sstevel@tonic-gate 
7160Sstevel@tonic-gate 	/*
7170Sstevel@tonic-gate 	 * Name the dls_link_t after the MAC interface it represents.
7180Sstevel@tonic-gate 	 */
7192311Sseb 	(void) strlcpy(dlp->dl_name, name, sizeof (dlp->dl_name));
7202311Sseb 	dlp->dl_ddi_instance = ddi_instance;
7210Sstevel@tonic-gate 
7220Sstevel@tonic-gate 	/*
7230Sstevel@tonic-gate 	 * Set the packet loopback function for use when the MAC is in
7240Sstevel@tonic-gate 	 * promiscuous mode, and initialize promiscuous bookeeping fields.
7250Sstevel@tonic-gate 	 */
7262311Sseb 	dlp->dl_txloop = i_dls_link_txloop;
7270Sstevel@tonic-gate 	dlp->dl_npromisc = 0;
7280Sstevel@tonic-gate 	dlp->dl_mth = NULL;
7290Sstevel@tonic-gate 
7300Sstevel@tonic-gate 	*dlpp = dlp;
7310Sstevel@tonic-gate 	return (0);
7320Sstevel@tonic-gate }
7330Sstevel@tonic-gate 
7340Sstevel@tonic-gate static void
7350Sstevel@tonic-gate i_dls_link_destroy(dls_link_t *dlp)
7360Sstevel@tonic-gate {
7370Sstevel@tonic-gate 	ASSERT(dlp->dl_npromisc == 0);
7380Sstevel@tonic-gate 	ASSERT(dlp->dl_nactive == 0);
7390Sstevel@tonic-gate 	ASSERT(dlp->dl_mth == NULL);
7400Sstevel@tonic-gate 	ASSERT(dlp->dl_macref == 0);
7410Sstevel@tonic-gate 	ASSERT(dlp->dl_mh == NULL);
7420Sstevel@tonic-gate 	ASSERT(dlp->dl_mip == NULL);
743269Sericheng 	ASSERT(dlp->dl_impl_count == 0);
744269Sericheng 	ASSERT(dlp->dl_mrh == NULL);
7450Sstevel@tonic-gate 
7460Sstevel@tonic-gate 	/*
7470Sstevel@tonic-gate 	 * Free the structure back to the cache.
7480Sstevel@tonic-gate 	 */
7490Sstevel@tonic-gate 	dlp->dl_unknowns = 0;
7500Sstevel@tonic-gate 	kmem_cache_free(i_dls_link_cachep, dlp);
7510Sstevel@tonic-gate }
7520Sstevel@tonic-gate 
7530Sstevel@tonic-gate /*
7540Sstevel@tonic-gate  * Module initialization functions.
7550Sstevel@tonic-gate  */
7560Sstevel@tonic-gate 
7570Sstevel@tonic-gate void
7580Sstevel@tonic-gate dls_link_init(void)
7590Sstevel@tonic-gate {
7600Sstevel@tonic-gate 	/*
7610Sstevel@tonic-gate 	 * Create a kmem_cache of dls_link_t structures.
7620Sstevel@tonic-gate 	 */
7630Sstevel@tonic-gate 	i_dls_link_cachep = kmem_cache_create("dls_link_cache",
7640Sstevel@tonic-gate 	    sizeof (dls_link_t), 0, i_dls_link_constructor,
7650Sstevel@tonic-gate 	    i_dls_link_destructor, NULL, NULL, NULL, 0);
7660Sstevel@tonic-gate 	ASSERT(i_dls_link_cachep != NULL);
7670Sstevel@tonic-gate 
7680Sstevel@tonic-gate 	/*
769269Sericheng 	 * Create a dls_link_t hash table and associated lock.
7700Sstevel@tonic-gate 	 */
771269Sericheng 	i_dls_link_hash = mod_hash_create_extended("dls_link_hash",
772269Sericheng 	    IMPL_HASHSZ, mod_hash_null_keydtor, mod_hash_null_valdtor,
773269Sericheng 	    mod_hash_bystr, NULL, mod_hash_strkey_cmp, KM_SLEEP);
774269Sericheng 	rw_init(&i_dls_link_lock, NULL, RW_DEFAULT, NULL);
775269Sericheng 	i_dls_link_count = 0;
7760Sstevel@tonic-gate }
7770Sstevel@tonic-gate 
7780Sstevel@tonic-gate int
7790Sstevel@tonic-gate dls_link_fini(void)
7800Sstevel@tonic-gate {
781269Sericheng 	if (i_dls_link_count > 0)
782269Sericheng 		return (EBUSY);
7830Sstevel@tonic-gate 
7840Sstevel@tonic-gate 	/*
7850Sstevel@tonic-gate 	 * Destroy the kmem_cache.
7860Sstevel@tonic-gate 	 */
7870Sstevel@tonic-gate 	kmem_cache_destroy(i_dls_link_cachep);
788269Sericheng 
789269Sericheng 	/*
790269Sericheng 	 * Destroy the hash table and associated lock.
791269Sericheng 	 */
792269Sericheng 	mod_hash_destroy_hash(i_dls_link_hash);
793269Sericheng 	rw_destroy(&i_dls_link_lock);
7940Sstevel@tonic-gate 	return (0);
7950Sstevel@tonic-gate }
7960Sstevel@tonic-gate 
7970Sstevel@tonic-gate /*
7980Sstevel@tonic-gate  * Exported functions.
7990Sstevel@tonic-gate  */
8000Sstevel@tonic-gate 
8010Sstevel@tonic-gate int
8022311Sseb dls_link_hold(const char *name, uint_t ddi_instance, dls_link_t **dlpp)
8030Sstevel@tonic-gate {
8040Sstevel@tonic-gate 	dls_link_t		*dlp;
8050Sstevel@tonic-gate 	int			err;
8060Sstevel@tonic-gate 
8070Sstevel@tonic-gate 	/*
8080Sstevel@tonic-gate 	 * Look up a dls_link_t corresponding to the given mac_handle_t
809269Sericheng 	 * in the global hash table. We need to hold i_dls_link_lock in
810269Sericheng 	 * order to atomically find and insert a dls_link_t into the
811269Sericheng 	 * hash table.
8120Sstevel@tonic-gate 	 */
813269Sericheng 	rw_enter(&i_dls_link_lock, RW_WRITER);
814269Sericheng 	if ((err = mod_hash_find(i_dls_link_hash, (mod_hash_key_t)name,
815269Sericheng 	    (mod_hash_val_t *)&dlp)) == 0)
8160Sstevel@tonic-gate 		goto done;
817269Sericheng 
818269Sericheng 	ASSERT(err == MH_ERR_NOTFOUND);
8190Sstevel@tonic-gate 
8200Sstevel@tonic-gate 	/*
8210Sstevel@tonic-gate 	 * We didn't find anything so we need to create one.
8220Sstevel@tonic-gate 	 */
8232311Sseb 	if ((err = i_dls_link_create(name, ddi_instance, &dlp)) != 0) {
824269Sericheng 		rw_exit(&i_dls_link_lock);
8250Sstevel@tonic-gate 		return (err);
8260Sstevel@tonic-gate 	}
8270Sstevel@tonic-gate 
8280Sstevel@tonic-gate 	/*
829269Sericheng 	 * Insert the dls_link_t.
8300Sstevel@tonic-gate 	 */
8312311Sseb 	err = mod_hash_insert(i_dls_link_hash, (mod_hash_key_t)name,
832269Sericheng 	    (mod_hash_val_t)dlp);
8330Sstevel@tonic-gate 	ASSERT(err == 0);
8340Sstevel@tonic-gate 
835269Sericheng 	i_dls_link_count++;
836269Sericheng 	ASSERT(i_dls_link_count != 0);
837269Sericheng 
8380Sstevel@tonic-gate done:
8390Sstevel@tonic-gate 	/*
8400Sstevel@tonic-gate 	 * Bump the reference count and hand back the reference.
8410Sstevel@tonic-gate 	 */
8420Sstevel@tonic-gate 	dlp->dl_ref++;
8430Sstevel@tonic-gate 	*dlpp = dlp;
844269Sericheng 	rw_exit(&i_dls_link_lock);
845269Sericheng 	return (0);
8460Sstevel@tonic-gate }
8470Sstevel@tonic-gate 
8480Sstevel@tonic-gate void
8490Sstevel@tonic-gate dls_link_rele(dls_link_t *dlp)
8500Sstevel@tonic-gate {
851269Sericheng 	mod_hash_val_t	val;
8520Sstevel@tonic-gate 
853269Sericheng 	rw_enter(&i_dls_link_lock, RW_WRITER);
8540Sstevel@tonic-gate 
8550Sstevel@tonic-gate 	/*
8560Sstevel@tonic-gate 	 * Check if there are any more references.
8570Sstevel@tonic-gate 	 */
8580Sstevel@tonic-gate 	if (--dlp->dl_ref != 0) {
8590Sstevel@tonic-gate 		/*
8600Sstevel@tonic-gate 		 * There are more references so there's nothing more to do.
8610Sstevel@tonic-gate 		 */
8620Sstevel@tonic-gate 		goto done;
8630Sstevel@tonic-gate 	}
8640Sstevel@tonic-gate 
865269Sericheng 	(void) mod_hash_remove(i_dls_link_hash,
866269Sericheng 	    (mod_hash_key_t)dlp->dl_name, &val);
867269Sericheng 	ASSERT(dlp == (dls_link_t *)val);
8680Sstevel@tonic-gate 
8690Sstevel@tonic-gate 	/*
8700Sstevel@tonic-gate 	 * Destroy the dls_link_t.
8710Sstevel@tonic-gate 	 */
8720Sstevel@tonic-gate 	i_dls_link_destroy(dlp);
873269Sericheng 	ASSERT(i_dls_link_count > 0);
874269Sericheng 	i_dls_link_count--;
8750Sstevel@tonic-gate done:
876269Sericheng 	rw_exit(&i_dls_link_lock);
8770Sstevel@tonic-gate }
8780Sstevel@tonic-gate 
8790Sstevel@tonic-gate int
8800Sstevel@tonic-gate dls_mac_hold(dls_link_t *dlp)
8810Sstevel@tonic-gate {
8820Sstevel@tonic-gate 	int err = 0;
8830Sstevel@tonic-gate 
8840Sstevel@tonic-gate 	mutex_enter(&dlp->dl_lock);
8850Sstevel@tonic-gate 
8860Sstevel@tonic-gate 	ASSERT(IMPLY(dlp->dl_macref != 0, dlp->dl_mh != NULL));
8870Sstevel@tonic-gate 	ASSERT(IMPLY(dlp->dl_macref == 0, dlp->dl_mh == NULL));
8880Sstevel@tonic-gate 
8890Sstevel@tonic-gate 	if (dlp->dl_macref == 0) {
8900Sstevel@tonic-gate 		/*
8910Sstevel@tonic-gate 		 * First reference; hold open the MAC interface.
8920Sstevel@tonic-gate 		 */
8932311Sseb 		err = mac_open(dlp->dl_name, dlp->dl_ddi_instance, &dlp->dl_mh);
8940Sstevel@tonic-gate 		if (err != 0)
8950Sstevel@tonic-gate 			goto done;
8960Sstevel@tonic-gate 
8970Sstevel@tonic-gate 		dlp->dl_mip = mac_info(dlp->dl_mh);
8980Sstevel@tonic-gate 	}
8990Sstevel@tonic-gate 
9000Sstevel@tonic-gate 	dlp->dl_macref++;
9010Sstevel@tonic-gate done:
9020Sstevel@tonic-gate 	mutex_exit(&dlp->dl_lock);
9030Sstevel@tonic-gate 	return (err);
9040Sstevel@tonic-gate }
9050Sstevel@tonic-gate 
9060Sstevel@tonic-gate void
9070Sstevel@tonic-gate dls_mac_rele(dls_link_t *dlp)
9080Sstevel@tonic-gate {
9090Sstevel@tonic-gate 	mutex_enter(&dlp->dl_lock);
9100Sstevel@tonic-gate 	ASSERT(dlp->dl_mh != NULL);
9110Sstevel@tonic-gate 
9120Sstevel@tonic-gate 	if (--dlp->dl_macref == 0) {
9130Sstevel@tonic-gate 		mac_close(dlp->dl_mh);
9140Sstevel@tonic-gate 		dlp->dl_mh = NULL;
9150Sstevel@tonic-gate 		dlp->dl_mip = NULL;
9160Sstevel@tonic-gate 	}
9170Sstevel@tonic-gate 	mutex_exit(&dlp->dl_lock);
9180Sstevel@tonic-gate }
9190Sstevel@tonic-gate 
9200Sstevel@tonic-gate void
9210Sstevel@tonic-gate dls_link_add(dls_link_t *dlp, uint32_t sap, dls_impl_t *dip)
9220Sstevel@tonic-gate {
9230Sstevel@tonic-gate 	dls_vlan_t	*dvp = dip->di_dvp;
924269Sericheng 	mod_hash_t	*hash = dlp->dl_impl_hash;
925269Sericheng 	mod_hash_key_t	key;
926269Sericheng 	dls_head_t	*dhp;
9270Sstevel@tonic-gate 	dls_impl_t	*p;
9280Sstevel@tonic-gate 	mac_rx_t	rx;
9290Sstevel@tonic-gate 	int		err;
930269Sericheng 	boolean_t	promisc = B_FALSE;
9310Sstevel@tonic-gate 
9320Sstevel@tonic-gate 	/*
9332311Sseb 	 * Generate a hash key based on the sap and the VLAN id.
9340Sstevel@tonic-gate 	 */
9350Sstevel@tonic-gate 	key = MAKE_KEY(sap, dvp->dv_id);
9360Sstevel@tonic-gate 
9370Sstevel@tonic-gate 	/*
9380Sstevel@tonic-gate 	 * We need dl_lock here because we want to be able to walk
9390Sstevel@tonic-gate 	 * the hash table *and* set the mac rx func atomically. if
9400Sstevel@tonic-gate 	 * these two operations are separate, someone else could
941269Sericheng 	 * insert/remove dls_impl_t from the hash table after we
942269Sericheng 	 * drop the hash lock and this could cause our chosen rx
943269Sericheng 	 * func to be incorrect. note that we cannot call mac_rx_add
944269Sericheng 	 * when holding the hash lock because this can cause deadlock.
9450Sstevel@tonic-gate 	 */
9460Sstevel@tonic-gate 	mutex_enter(&dlp->dl_lock);
9470Sstevel@tonic-gate 
9480Sstevel@tonic-gate 	/*
949269Sericheng 	 * Search the table for a list head with this key.
9500Sstevel@tonic-gate 	 */
951269Sericheng 	rw_enter(&dlp->dl_impl_lock, RW_WRITER);
9520Sstevel@tonic-gate 
953269Sericheng 	if ((err = mod_hash_find(hash, key, (mod_hash_val_t *)&dhp)) != 0) {
954269Sericheng 		ASSERT(err == MH_ERR_NOTFOUND);
9550Sstevel@tonic-gate 
956269Sericheng 		dhp = i_dls_head_alloc(key);
957269Sericheng 		err = mod_hash_insert(hash, key, (mod_hash_val_t)dhp);
958269Sericheng 		ASSERT(err == 0);
9590Sstevel@tonic-gate 	}
9600Sstevel@tonic-gate 
9610Sstevel@tonic-gate 	/*
962269Sericheng 	 * Add the dls_impl_t to the head of the list.
963269Sericheng 	 */
964269Sericheng 	ASSERT(dip->di_nextp == NULL);
965269Sericheng 	p = dhp->dh_list;
966269Sericheng 	dip->di_nextp = p;
967269Sericheng 	dhp->dh_list = dip;
968269Sericheng 
969269Sericheng 	/*
970269Sericheng 	 * Save a pointer to the list head.
971269Sericheng 	 */
972269Sericheng 	dip->di_headp = dhp;
973269Sericheng 	dlp->dl_impl_count++;
974269Sericheng 
975269Sericheng 	/*
976269Sericheng 	 * Walk the bound dls_impl_t to see if there are any
977269Sericheng 	 * in promiscuous 'all sap' mode.
9780Sstevel@tonic-gate 	 */
979269Sericheng 	mod_hash_walk(hash, i_dls_link_walk, (void *)&promisc);
980269Sericheng 	rw_exit(&dlp->dl_impl_lock);
981269Sericheng 
982269Sericheng 	/*
983269Sericheng 	 * If there are then we need to use a receive routine
984269Sericheng 	 * which will route packets to those dls_impl_t as well
985269Sericheng 	 * as ones bound to the  DLSAP of the packet.
986269Sericheng 	 */
987269Sericheng 	if (promisc)
9882311Sseb 		rx = i_dls_link_rx_promisc;
989269Sericheng 	else
9902311Sseb 		rx = i_dls_link_rx;
991269Sericheng 
992269Sericheng 	/* Replace the existing receive function if there is one. */
993269Sericheng 	if (dlp->dl_mrh != NULL)
994269Sericheng 		mac_rx_remove(dlp->dl_mh, dlp->dl_mrh);
995269Sericheng 	dlp->dl_mrh = mac_rx_add(dlp->dl_mh, rx, (void *)dlp);
996269Sericheng 	mutex_exit(&dlp->dl_lock);
997269Sericheng }
998269Sericheng 
999269Sericheng void
1000269Sericheng dls_link_remove(dls_link_t *dlp, dls_impl_t *dip)
1001269Sericheng {
1002269Sericheng 	mod_hash_t	*hash = dlp->dl_impl_hash;
1003269Sericheng 	dls_impl_t	**pp;
1004269Sericheng 	dls_impl_t	*p;
1005269Sericheng 	dls_head_t	*dhp;
1006269Sericheng 	mac_rx_t	rx;
10070Sstevel@tonic-gate 
10080Sstevel@tonic-gate 	/*
1009269Sericheng 	 * We need dl_lock here because we want to be able to walk
1010269Sericheng 	 * the hash table *and* set the mac rx func atomically. if
1011269Sericheng 	 * these two operations are separate, someone else could
1012269Sericheng 	 * insert/remove dls_impl_t from the hash table after we
1013269Sericheng 	 * drop the hash lock and this could cause our chosen rx
1014269Sericheng 	 * func to be incorrect. note that we cannot call mac_rx_add
1015269Sericheng 	 * when holding the hash lock because this can cause deadlock.
10160Sstevel@tonic-gate 	 */
1017269Sericheng 	mutex_enter(&dlp->dl_lock);
1018269Sericheng 	rw_enter(&dlp->dl_impl_lock, RW_WRITER);
10190Sstevel@tonic-gate 
1020269Sericheng 	/*
1021269Sericheng 	 * Poll the hash table entry until all references have been dropped.
1022269Sericheng 	 * We need to drop all locks before sleeping because we don't want
1023269Sericheng 	 * the interrupt handler to block. We set di_removing here to
1024269Sericheng 	 * tell the receive callbacks not to pass up packets anymore.
1025269Sericheng 	 * This is only a hint to quicken the decrease of the refcnt so
1026269Sericheng 	 * the assignment need not be protected by any lock.
1027269Sericheng 	 */
1028269Sericheng 	dhp = dip->di_headp;
1029269Sericheng 	dip->di_removing = B_TRUE;
1030269Sericheng 	while (dhp->dh_ref != 0) {
1031269Sericheng 		rw_exit(&dlp->dl_impl_lock);
1032269Sericheng 		mutex_exit(&dlp->dl_lock);
1033269Sericheng 		delay(drv_usectohz(1000));	/* 1ms delay */
1034269Sericheng 		mutex_enter(&dlp->dl_lock);
1035269Sericheng 		rw_enter(&dlp->dl_impl_lock, RW_WRITER);
1036269Sericheng 	}
10370Sstevel@tonic-gate 
10380Sstevel@tonic-gate 	/*
1039269Sericheng 	 * Walk the list and remove the dls_impl_t.
10400Sstevel@tonic-gate 	 */
1041269Sericheng 	for (pp = &dhp->dh_list; (p = *pp) != NULL; pp = &(p->di_nextp)) {
1042269Sericheng 		if (p == dip)
1043269Sericheng 			break;
1044269Sericheng 	}
1045269Sericheng 	ASSERT(p != NULL);
1046269Sericheng 	*pp = p->di_nextp;
1047269Sericheng 	p->di_nextp = NULL;
1048269Sericheng 
1049269Sericheng 	ASSERT(dlp->dl_impl_count > 0);
1050269Sericheng 	dlp->dl_impl_count--;
10510Sstevel@tonic-gate 
1052269Sericheng 	if (dhp->dh_list == NULL) {
1053269Sericheng 		mod_hash_val_t	val = NULL;
1054269Sericheng 
1055269Sericheng 		/*
1056269Sericheng 		 * The list is empty so remove the hash table entry.
1057269Sericheng 		 */
1058269Sericheng 		(void) mod_hash_remove(hash, dhp->dh_key, &val);
1059269Sericheng 		ASSERT(dhp == (dls_head_t *)val);
1060269Sericheng 		i_dls_head_free(dhp);
1061269Sericheng 	}
1062269Sericheng 	dip->di_removing = B_FALSE;
1063269Sericheng 
10640Sstevel@tonic-gate 	/*
1065269Sericheng 	 * If there are no dls_impl_t then there's no need to register a
1066269Sericheng 	 * receive function with the mac.
10670Sstevel@tonic-gate 	 */
1068269Sericheng 	if (dlp->dl_impl_count == 0) {
1069269Sericheng 		rw_exit(&dlp->dl_impl_lock);
1070269Sericheng 		mac_rx_remove(dlp->dl_mh, dlp->dl_mrh);
1071269Sericheng 		dlp->dl_mrh = NULL;
10720Sstevel@tonic-gate 	} else {
10730Sstevel@tonic-gate 		boolean_t promisc = B_FALSE;
10740Sstevel@tonic-gate 
10750Sstevel@tonic-gate 		/*
10760Sstevel@tonic-gate 		 * Walk the bound dls_impl_t to see if there are any
10770Sstevel@tonic-gate 		 * in promiscuous 'all sap' mode.
10780Sstevel@tonic-gate 		 */
1079269Sericheng 		mod_hash_walk(hash, i_dls_link_walk, (void *)&promisc);
1080269Sericheng 		rw_exit(&dlp->dl_impl_lock);
10810Sstevel@tonic-gate 
10820Sstevel@tonic-gate 		/*
10830Sstevel@tonic-gate 		 * If there are then we need to use a receive routine
10840Sstevel@tonic-gate 		 * which will route packets to those dls_impl_t as well
10850Sstevel@tonic-gate 		 * as ones bound to the  DLSAP of the packet.
10860Sstevel@tonic-gate 		 */
10870Sstevel@tonic-gate 		if (promisc)
10882311Sseb 			rx = i_dls_link_rx_promisc;
10890Sstevel@tonic-gate 		else
10902311Sseb 			rx = i_dls_link_rx;
10910Sstevel@tonic-gate 
10920Sstevel@tonic-gate 		mac_rx_remove(dlp->dl_mh, dlp->dl_mrh);
10930Sstevel@tonic-gate 		dlp->dl_mrh = mac_rx_add(dlp->dl_mh, rx, (void *)dlp);
10940Sstevel@tonic-gate 	}
10950Sstevel@tonic-gate 	mutex_exit(&dlp->dl_lock);
10960Sstevel@tonic-gate }
10972311Sseb 
10982311Sseb int
1099*2760Sdg199075 dls_link_header_info(dls_link_t *dlp, mblk_t *mp, mac_header_info_t *mhip)
11002311Sseb {
11012311Sseb 	boolean_t	is_ethernet = (dlp->dl_mip->mi_media == DL_ETHER);
11022311Sseb 	int		err = 0;
11032311Sseb 
1104*2760Sdg199075 	/*
1105*2760Sdg199075 	 * Packets should always be at least 16 bit aligned.
1106*2760Sdg199075 	 */
1107*2760Sdg199075 	ASSERT(IS_P2ALIGNED(mp->b_rptr, sizeof (uint16_t)));
1108*2760Sdg199075 
11092311Sseb 	if ((err = mac_header_info(dlp->dl_mh, mp, mhip)) != 0)
11102311Sseb 		return (err);
11112311Sseb 
11122311Sseb 	/*
11132311Sseb 	 * If this is a VLAN-tagged Ethernet packet, then the SAP in the
1114*2760Sdg199075 	 * mac_header_info_t as returned by mac_header_info() is
1115*2760Sdg199075 	 * ETHERTYPE_VLAN. We need to grab the ethertype from the VLAN header.
11162311Sseb 	 */
1117*2760Sdg199075 	if (is_ethernet && (mhip->mhi_bindsap == ETHERTYPE_VLAN)) {
11182311Sseb 		struct ether_vlan_header *evhp;
11192311Sseb 		uint16_t sap;
1120*2760Sdg199075 		mblk_t *tmp = NULL;
1121*2760Sdg199075 		size_t size;
11222311Sseb 
1123*2760Sdg199075 		size = sizeof (struct ether_vlan_header);
1124*2760Sdg199075 		if (MBLKL(mp) < size) {
1125*2760Sdg199075 			/*
1126*2760Sdg199075 			 * Pullup the message in order to get the MAC header
1127*2760Sdg199075 			 * infomation. Note that this is a read-only function,
1128*2760Sdg199075 			 * we keep the input packet intact.
1129*2760Sdg199075 			 */
1130*2760Sdg199075 			if ((tmp = msgpullup(mp, size)) == NULL)
1131*2760Sdg199075 				return (EINVAL);
1132*2760Sdg199075 
1133*2760Sdg199075 			mp = tmp;
1134*2760Sdg199075 		}
11352311Sseb 		evhp = (struct ether_vlan_header *)mp->b_rptr;
11362311Sseb 		sap = ntohs(evhp->ether_type);
11372311Sseb 		(void) mac_sap_verify(dlp->dl_mh, sap, &mhip->mhi_bindsap);
11382311Sseb 		mhip->mhi_hdrsize = sizeof (struct ether_vlan_header);
1139*2760Sdg199075 		mhip->mhi_tci = ntohs(evhp->ether_tci);
1140*2760Sdg199075 		mhip->mhi_istagged = B_TRUE;
1141*2760Sdg199075 		freemsg(tmp);
1142*2760Sdg199075 
1143*2760Sdg199075 		if (VLAN_CFI(mhip->mhi_tci) != ETHER_CFI)
1144*2760Sdg199075 			return (EINVAL);
1145*2760Sdg199075 	} else {
1146*2760Sdg199075 		mhip->mhi_istagged = B_FALSE;
1147*2760Sdg199075 		mhip->mhi_tci = 0;
11482311Sseb 	}
11492311Sseb 	return (0);
11502311Sseb }
1151