xref: /onnv-gate/usr/src/uts/common/io/mac/mac_util.c (revision 11878:ac93462db6d7)
18275SEric Cheng /*
28275SEric Cheng  * CDDL HEADER START
38275SEric Cheng  *
48275SEric Cheng  * The contents of this file are subject to the terms of the
58275SEric Cheng  * Common Development and Distribution License (the "License").
68275SEric Cheng  * You may not use this file except in compliance with the License.
78275SEric Cheng  *
88275SEric Cheng  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
98275SEric Cheng  * or http://www.opensolaris.org/os/licensing.
108275SEric Cheng  * See the License for the specific language governing permissions
118275SEric Cheng  * and limitations under the License.
128275SEric Cheng  *
138275SEric Cheng  * When distributing Covered Code, include this CDDL HEADER in each
148275SEric Cheng  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
158275SEric Cheng  * If applicable, add the following below this CDDL HEADER, with the
168275SEric Cheng  * fields enclosed by brackets "[]" replaced with your own identifying
178275SEric Cheng  * information: Portions Copyright [yyyy] [name of copyright owner]
188275SEric Cheng  *
198275SEric Cheng  * CDDL HEADER END
208275SEric Cheng  */
218275SEric Cheng /*
2211528SBaban.Kenkre@Sun.COM  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
238275SEric Cheng  * Use is subject to license terms.
248275SEric Cheng  */
258275SEric Cheng 
268275SEric Cheng /*
278275SEric Cheng  * MAC Services Module - misc utilities
288275SEric Cheng  */
298275SEric Cheng 
308275SEric Cheng #include <sys/types.h>
318275SEric Cheng #include <sys/mac.h>
328275SEric Cheng #include <sys/mac_impl.h>
338275SEric Cheng #include <sys/mac_client_priv.h>
348275SEric Cheng #include <sys/mac_client_impl.h>
358275SEric Cheng #include <sys/mac_soft_ring.h>
368275SEric Cheng #include <sys/strsubr.h>
378275SEric Cheng #include <sys/strsun.h>
388275SEric Cheng #include <sys/vlan.h>
398275SEric Cheng #include <sys/pattr.h>
408275SEric Cheng #include <sys/pci_tools.h>
418275SEric Cheng #include <inet/ip.h>
428275SEric Cheng #include <inet/ip_impl.h>
438275SEric Cheng #include <inet/ip6.h>
448275SEric Cheng #include <sys/vtrace.h>
458275SEric Cheng #include <sys/dlpi.h>
468275SEric Cheng #include <sys/sunndi.h>
478833SVenu.Iyer@Sun.COM #include <inet/ipsec_impl.h>
488833SVenu.Iyer@Sun.COM #include <inet/sadb.h>
498833SVenu.Iyer@Sun.COM #include <inet/ipsecesp.h>
508833SVenu.Iyer@Sun.COM #include <inet/ipsecah.h>
518275SEric Cheng 
528275SEric Cheng /*
538275SEric Cheng  * Copy an mblk, preserving its hardware checksum flags.
548275SEric Cheng  */
558275SEric Cheng static mblk_t *
568275SEric Cheng mac_copymsg_cksum(mblk_t *mp)
578275SEric Cheng {
588275SEric Cheng 	mblk_t *mp1;
598275SEric Cheng 	uint32_t start, stuff, end, value, flags;
608275SEric Cheng 
618275SEric Cheng 	mp1 = copymsg(mp);
628275SEric Cheng 	if (mp1 == NULL)
638275SEric Cheng 		return (NULL);
648275SEric Cheng 
658275SEric Cheng 	hcksum_retrieve(mp, NULL, NULL, &start, &stuff, &end, &value, &flags);
668275SEric Cheng 	(void) hcksum_assoc(mp1, NULL, NULL, start, stuff, end, value,
678275SEric Cheng 	    flags, KM_NOSLEEP);
688275SEric Cheng 
698275SEric Cheng 	return (mp1);
708275SEric Cheng }
718275SEric Cheng 
728275SEric Cheng /*
738275SEric Cheng  * Copy an mblk chain, presenting the hardware checksum flags of the
748275SEric Cheng  * individual mblks.
758275SEric Cheng  */
768275SEric Cheng mblk_t *
778275SEric Cheng mac_copymsgchain_cksum(mblk_t *mp)
788275SEric Cheng {
798275SEric Cheng 	mblk_t *nmp = NULL;
808275SEric Cheng 	mblk_t **nmpp = &nmp;
818275SEric Cheng 
828275SEric Cheng 	for (; mp != NULL; mp = mp->b_next) {
838275SEric Cheng 		if ((*nmpp = mac_copymsg_cksum(mp)) == NULL) {
848275SEric Cheng 			freemsgchain(nmp);
858275SEric Cheng 			return (NULL);
868275SEric Cheng 		}
878275SEric Cheng 
888275SEric Cheng 		nmpp = &((*nmpp)->b_next);
898275SEric Cheng 	}
908275SEric Cheng 
918275SEric Cheng 	return (nmp);
928275SEric Cheng }
938275SEric Cheng 
948275SEric Cheng /*
958275SEric Cheng  * Process the specified mblk chain for proper handling of hardware
968275SEric Cheng  * checksum offload. This routine is invoked for loopback traffic
978275SEric Cheng  * between MAC clients.
988275SEric Cheng  * The function handles a NULL mblk chain passed as argument.
998275SEric Cheng  */
1008275SEric Cheng mblk_t *
1018275SEric Cheng mac_fix_cksum(mblk_t *mp_chain)
1028275SEric Cheng {
1038275SEric Cheng 	mblk_t *mp, *prev = NULL, *new_chain = mp_chain, *mp1;
1048275SEric Cheng 	uint32_t flags, start, stuff, end, value;
1058275SEric Cheng 
1068275SEric Cheng 	for (mp = mp_chain; mp != NULL; prev = mp, mp = mp->b_next) {
1078275SEric Cheng 		uint16_t len;
1088275SEric Cheng 		uint32_t offset;
1098275SEric Cheng 		struct ether_header *ehp;
1108275SEric Cheng 		uint16_t sap;
1118275SEric Cheng 
1128275SEric Cheng 		hcksum_retrieve(mp, NULL, NULL, &start, &stuff, &end, &value,
1138275SEric Cheng 		    &flags);
1148275SEric Cheng 		if (flags == 0)
1158275SEric Cheng 			continue;
1168275SEric Cheng 
1178275SEric Cheng 		/*
1188275SEric Cheng 		 * Since the processing of checksum offload for loopback
1198275SEric Cheng 		 * traffic requires modification of the packet contents,
1208275SEric Cheng 		 * ensure sure that we are always modifying our own copy.
1218275SEric Cheng 		 */
1228275SEric Cheng 		if (DB_REF(mp) > 1) {
1238275SEric Cheng 			mp1 = copymsg(mp);
1248275SEric Cheng 			if (mp1 == NULL)
1258275SEric Cheng 				continue;
1268275SEric Cheng 			mp1->b_next = mp->b_next;
1278275SEric Cheng 			mp->b_next = NULL;
1288275SEric Cheng 			freemsg(mp);
1298275SEric Cheng 			if (prev != NULL)
1308275SEric Cheng 				prev->b_next = mp1;
1318275SEric Cheng 			else
1328275SEric Cheng 				new_chain = mp1;
1338275SEric Cheng 			mp = mp1;
1348275SEric Cheng 		}
1358275SEric Cheng 
1368275SEric Cheng 		/*
1378275SEric Cheng 		 * Ethernet, and optionally VLAN header.
1388275SEric Cheng 		 */
1398275SEric Cheng 		/* LINTED: improper alignment cast */
1408275SEric Cheng 		ehp = (struct ether_header *)mp->b_rptr;
1418275SEric Cheng 		if (ntohs(ehp->ether_type) == VLAN_TPID) {
1428275SEric Cheng 			struct ether_vlan_header *evhp;
1438275SEric Cheng 
1448275SEric Cheng 			ASSERT(MBLKL(mp) >= sizeof (struct ether_vlan_header));
1458275SEric Cheng 			/* LINTED: improper alignment cast */
1468275SEric Cheng 			evhp = (struct ether_vlan_header *)mp->b_rptr;
1478275SEric Cheng 			sap = ntohs(evhp->ether_type);
1488275SEric Cheng 			offset = sizeof (struct ether_vlan_header);
1498275SEric Cheng 		} else {
1508275SEric Cheng 			sap = ntohs(ehp->ether_type);
1518275SEric Cheng 			offset = sizeof (struct ether_header);
1528275SEric Cheng 		}
1538275SEric Cheng 
1548275SEric Cheng 		if (MBLKL(mp) <= offset) {
1558275SEric Cheng 			offset -= MBLKL(mp);
1568275SEric Cheng 			if (mp->b_cont == NULL) {
1578275SEric Cheng 				/* corrupted packet, skip it */
1588275SEric Cheng 				if (prev != NULL)
1598275SEric Cheng 					prev->b_next = mp->b_next;
1608275SEric Cheng 				else
1618275SEric Cheng 					new_chain = mp->b_next;
1628275SEric Cheng 				mp1 = mp->b_next;
1638275SEric Cheng 				mp->b_next = NULL;
1648275SEric Cheng 				freemsg(mp);
1658275SEric Cheng 				mp = mp1;
1668275SEric Cheng 				continue;
1678275SEric Cheng 			}
1688275SEric Cheng 			mp = mp->b_cont;
1698275SEric Cheng 		}
1708275SEric Cheng 
1718275SEric Cheng 		if (flags & (HCK_FULLCKSUM | HCK_IPV4_HDRCKSUM)) {
1728275SEric Cheng 			ipha_t *ipha = NULL;
1738275SEric Cheng 
1748275SEric Cheng 			/*
1758275SEric Cheng 			 * In order to compute the full and header
1768275SEric Cheng 			 * checksums, we need to find and parse
1778275SEric Cheng 			 * the IP and/or ULP headers.
1788275SEric Cheng 			 */
1798275SEric Cheng 
1808275SEric Cheng 			sap = (sap < ETHERTYPE_802_MIN) ? 0 : sap;
1818275SEric Cheng 
1828275SEric Cheng 			/*
1838275SEric Cheng 			 * IP header.
1848275SEric Cheng 			 */
1858275SEric Cheng 			if (sap != ETHERTYPE_IP)
1868275SEric Cheng 				continue;
1878275SEric Cheng 
1888275SEric Cheng 			ASSERT(MBLKL(mp) >= offset + sizeof (ipha_t));
1898275SEric Cheng 			/* LINTED: improper alignment cast */
1908275SEric Cheng 			ipha = (ipha_t *)(mp->b_rptr + offset);
1918275SEric Cheng 
1928275SEric Cheng 			if (flags & HCK_FULLCKSUM) {
1938275SEric Cheng 				ipaddr_t src, dst;
1948275SEric Cheng 				uint32_t cksum;
1958275SEric Cheng 				uint16_t *up;
1968275SEric Cheng 				uint8_t proto;
1978275SEric Cheng 
1988275SEric Cheng 				/*
1998275SEric Cheng 				 * Pointer to checksum field in ULP header.
2008275SEric Cheng 				 */
2018275SEric Cheng 				proto = ipha->ipha_protocol;
2028275SEric Cheng 				ASSERT(ipha->ipha_version_and_hdr_length ==
2038275SEric Cheng 				    IP_SIMPLE_HDR_VERSION);
20411588Sdavid.edmondson@sun.com 
20511588Sdavid.edmondson@sun.com 				switch (proto) {
20611588Sdavid.edmondson@sun.com 				case IPPROTO_TCP:
2078275SEric Cheng 					/* LINTED: improper alignment cast */
2088275SEric Cheng 					up = IPH_TCPH_CHECKSUMP(ipha,
2098275SEric Cheng 					    IP_SIMPLE_HDR_LENGTH);
21011588Sdavid.edmondson@sun.com 					break;
21111588Sdavid.edmondson@sun.com 
21211588Sdavid.edmondson@sun.com 				case IPPROTO_UDP:
2138275SEric Cheng 					/* LINTED: improper alignment cast */
2148275SEric Cheng 					up = IPH_UDPH_CHECKSUMP(ipha,
2158275SEric Cheng 					    IP_SIMPLE_HDR_LENGTH);
21611588Sdavid.edmondson@sun.com 					break;
21711588Sdavid.edmondson@sun.com 
21811588Sdavid.edmondson@sun.com 				default:
21911588Sdavid.edmondson@sun.com 					cmn_err(CE_WARN, "mac_fix_cksum: "
22011588Sdavid.edmondson@sun.com 					    "unexpected protocol: %d", proto);
22111588Sdavid.edmondson@sun.com 					continue;
2228275SEric Cheng 				}
2238275SEric Cheng 
2248275SEric Cheng 				/*
2258275SEric Cheng 				 * Pseudo-header checksum.
2268275SEric Cheng 				 */
2278275SEric Cheng 				src = ipha->ipha_src;
2288275SEric Cheng 				dst = ipha->ipha_dst;
2298275SEric Cheng 				len = ntohs(ipha->ipha_length) -
2308275SEric Cheng 				    IP_SIMPLE_HDR_LENGTH;
2318275SEric Cheng 
2328275SEric Cheng 				cksum = (dst >> 16) + (dst & 0xFFFF) +
2338275SEric Cheng 				    (src >> 16) + (src & 0xFFFF);
2348275SEric Cheng 				cksum += htons(len);
2358275SEric Cheng 
2368275SEric Cheng 				/*
2378275SEric Cheng 				 * The checksum value stored in the packet needs
2388275SEric Cheng 				 * to be correct. Compute it here.
2398275SEric Cheng 				 */
2408275SEric Cheng 				*up = 0;
2418275SEric Cheng 				cksum += (((proto) == IPPROTO_UDP) ?
2428275SEric Cheng 				    IP_UDP_CSUM_COMP : IP_TCP_CSUM_COMP);
2438275SEric Cheng 				cksum = IP_CSUM(mp, IP_SIMPLE_HDR_LENGTH +
2448275SEric Cheng 				    offset, cksum);
2458275SEric Cheng 				*(up) = (uint16_t)(cksum ? cksum : ~cksum);
2468275SEric Cheng 
247*11878SVenu.Iyer@Sun.COM 				/*
248*11878SVenu.Iyer@Sun.COM 				 * Flag the packet so that it appears
249*11878SVenu.Iyer@Sun.COM 				 * that the checksum has already been
250*11878SVenu.Iyer@Sun.COM 				 * verified by the hardware.
251*11878SVenu.Iyer@Sun.COM 				 */
252*11878SVenu.Iyer@Sun.COM 				flags &= ~HCK_FULLCKSUM;
2538275SEric Cheng 				flags |= HCK_FULLCKSUM_OK;
254*11878SVenu.Iyer@Sun.COM 				value = 0;
2558275SEric Cheng 			}
2568275SEric Cheng 
2578275SEric Cheng 			if (flags & HCK_IPV4_HDRCKSUM) {
2588275SEric Cheng 				ASSERT(ipha != NULL);
2598275SEric Cheng 				ipha->ipha_hdr_checksum =
2608275SEric Cheng 				    (uint16_t)ip_csum_hdr(ipha);
261*11878SVenu.Iyer@Sun.COM 				flags &= ~HCK_IPV4_HDRCKSUM;
262*11878SVenu.Iyer@Sun.COM 				flags |= HCK_IPV4_HDRCKSUM_OK;
263*11878SVenu.Iyer@Sun.COM 
2648275SEric Cheng 			}
2658275SEric Cheng 		}
2668275SEric Cheng 
2678275SEric Cheng 		if (flags & HCK_PARTIALCKSUM) {
2688275SEric Cheng 			uint16_t *up, partial, cksum;
2698275SEric Cheng 			uchar_t *ipp; /* ptr to beginning of IP header */
2708275SEric Cheng 
2718275SEric Cheng 			if (mp->b_cont != NULL) {
2728275SEric Cheng 				mblk_t *mp1;
2738275SEric Cheng 
2748275SEric Cheng 				mp1 = msgpullup(mp, offset + end);
2758275SEric Cheng 				if (mp1 == NULL)
2768275SEric Cheng 					continue;
2778275SEric Cheng 				mp1->b_next = mp->b_next;
2788275SEric Cheng 				mp->b_next = NULL;
2798275SEric Cheng 				freemsg(mp);
2808275SEric Cheng 				if (prev != NULL)
2818275SEric Cheng 					prev->b_next = mp1;
2828275SEric Cheng 				else
2838275SEric Cheng 					new_chain = mp1;
2848275SEric Cheng 				mp = mp1;
2858275SEric Cheng 			}
2868275SEric Cheng 
2878275SEric Cheng 			ipp = mp->b_rptr + offset;
2888275SEric Cheng 			/* LINTED: cast may result in improper alignment */
2898275SEric Cheng 			up = (uint16_t *)((uchar_t *)ipp + stuff);
2908275SEric Cheng 			partial = *up;
2918275SEric Cheng 			*up = 0;
2928275SEric Cheng 
2938275SEric Cheng 			cksum = IP_BCSUM_PARTIAL(mp->b_rptr + offset + start,
2948275SEric Cheng 			    end - start, partial);
2958275SEric Cheng 			cksum = ~cksum;
2968275SEric Cheng 			*up = cksum ? cksum : ~cksum;
2978275SEric Cheng 
2988275SEric Cheng 			/*
2998275SEric Cheng 			 * Since we already computed the whole checksum,
3008275SEric Cheng 			 * indicate to the stack that it has already
3018275SEric Cheng 			 * been verified by the hardware.
3028275SEric Cheng 			 */
3038275SEric Cheng 			flags &= ~HCK_PARTIALCKSUM;
304*11878SVenu.Iyer@Sun.COM 			flags |= HCK_FULLCKSUM_OK;
305*11878SVenu.Iyer@Sun.COM 			value = 0;
3068275SEric Cheng 		}
3078275SEric Cheng 
3088275SEric Cheng 		(void) hcksum_assoc(mp, NULL, NULL, start, stuff, end,
3098275SEric Cheng 		    value, flags, KM_NOSLEEP);
3108275SEric Cheng 	}
3118275SEric Cheng 
3128275SEric Cheng 	return (new_chain);
3138275SEric Cheng }
3148275SEric Cheng 
3158275SEric Cheng /*
3168275SEric Cheng  * Add VLAN tag to the specified mblk.
3178275SEric Cheng  */
3188275SEric Cheng mblk_t *
3198275SEric Cheng mac_add_vlan_tag(mblk_t *mp, uint_t pri, uint16_t vid)
3208275SEric Cheng {
3218275SEric Cheng 	mblk_t *hmp;
3228275SEric Cheng 	struct ether_vlan_header *evhp;
3238275SEric Cheng 	struct ether_header *ehp;
3248275SEric Cheng 	uint32_t start, stuff, end, value, flags;
3258275SEric Cheng 
3268275SEric Cheng 	ASSERT(pri != 0 || vid != 0);
3278275SEric Cheng 
3288275SEric Cheng 	/*
3298275SEric Cheng 	 * Allocate an mblk for the new tagged ethernet header,
3308275SEric Cheng 	 * and copy the MAC addresses and ethertype from the
3318275SEric Cheng 	 * original header.
3328275SEric Cheng 	 */
3338275SEric Cheng 
3348275SEric Cheng 	hmp = allocb(sizeof (struct ether_vlan_header), BPRI_MED);
3358275SEric Cheng 	if (hmp == NULL) {
3368275SEric Cheng 		freemsg(mp);
3378275SEric Cheng 		return (NULL);
3388275SEric Cheng 	}
3398275SEric Cheng 
3408275SEric Cheng 	evhp = (struct ether_vlan_header *)hmp->b_rptr;
3418275SEric Cheng 	ehp = (struct ether_header *)mp->b_rptr;
3428275SEric Cheng 
3438275SEric Cheng 	bcopy(ehp, evhp, (ETHERADDRL * 2));
3448275SEric Cheng 	evhp->ether_type = ehp->ether_type;
3458275SEric Cheng 	evhp->ether_tpid = htons(ETHERTYPE_VLAN);
3468275SEric Cheng 
3478275SEric Cheng 	hmp->b_wptr += sizeof (struct ether_vlan_header);
3488275SEric Cheng 	mp->b_rptr += sizeof (struct ether_header);
3498275SEric Cheng 
3508275SEric Cheng 	/*
3518275SEric Cheng 	 * Free the original message if it's now empty. Link the
3528275SEric Cheng 	 * rest of messages to the header message.
3538275SEric Cheng 	 */
3548275SEric Cheng 	hcksum_retrieve(mp, NULL, NULL, &start, &stuff, &end, &value, &flags);
3558275SEric Cheng 	(void) hcksum_assoc(hmp, NULL, NULL, start, stuff, end, value, flags,
3568275SEric Cheng 	    KM_NOSLEEP);
3578275SEric Cheng 	if (MBLKL(mp) == 0) {
3588275SEric Cheng 		hmp->b_cont = mp->b_cont;
3598275SEric Cheng 		freeb(mp);
3608275SEric Cheng 	} else {
3618275SEric Cheng 		hmp->b_cont = mp;
3628275SEric Cheng 	}
3638275SEric Cheng 	ASSERT(MBLKL(hmp) >= sizeof (struct ether_vlan_header));
3648275SEric Cheng 
3658275SEric Cheng 	/*
3668275SEric Cheng 	 * Initialize the new TCI (Tag Control Information).
3678275SEric Cheng 	 */
3688275SEric Cheng 	evhp->ether_tci = htons(VLAN_TCI(pri, 0, vid));
3698275SEric Cheng 
3708275SEric Cheng 	return (hmp);
3718275SEric Cheng }
3728275SEric Cheng 
3738275SEric Cheng /*
3748275SEric Cheng  * Adds a VLAN tag with the specified VID and priority to each mblk of
3758275SEric Cheng  * the specified chain.
3768275SEric Cheng  */
3778275SEric Cheng mblk_t *
3788275SEric Cheng mac_add_vlan_tag_chain(mblk_t *mp_chain, uint_t pri, uint16_t vid)
3798275SEric Cheng {
3808275SEric Cheng 	mblk_t *next_mp, **prev, *mp;
3818275SEric Cheng 
3828275SEric Cheng 	mp = mp_chain;
3838275SEric Cheng 	prev = &mp_chain;
3848275SEric Cheng 
3858275SEric Cheng 	while (mp != NULL) {
3868275SEric Cheng 		next_mp = mp->b_next;
3878275SEric Cheng 		mp->b_next = NULL;
3888275SEric Cheng 		if ((mp = mac_add_vlan_tag(mp, pri, vid)) == NULL) {
3898275SEric Cheng 			freemsgchain(next_mp);
3908275SEric Cheng 			break;
3918275SEric Cheng 		}
3928275SEric Cheng 		*prev = mp;
3938275SEric Cheng 		prev = &mp->b_next;
3948275SEric Cheng 		mp = mp->b_next = next_mp;
3958275SEric Cheng 	}
3968275SEric Cheng 
3978275SEric Cheng 	return (mp_chain);
3988275SEric Cheng }
3998275SEric Cheng 
4008275SEric Cheng /*
4018275SEric Cheng  * Strip VLAN tag
4028275SEric Cheng  */
4038275SEric Cheng mblk_t *
4048275SEric Cheng mac_strip_vlan_tag(mblk_t *mp)
4058275SEric Cheng {
4068275SEric Cheng 	mblk_t *newmp;
4078275SEric Cheng 	struct ether_vlan_header *evhp;
4088275SEric Cheng 
4098275SEric Cheng 	evhp = (struct ether_vlan_header *)mp->b_rptr;
4108275SEric Cheng 	if (ntohs(evhp->ether_tpid) == ETHERTYPE_VLAN) {
4118275SEric Cheng 		ASSERT(MBLKL(mp) >= sizeof (struct ether_vlan_header));
4128275SEric Cheng 
4138275SEric Cheng 		if (DB_REF(mp) > 1) {
4148275SEric Cheng 			newmp = copymsg(mp);
4158275SEric Cheng 			if (newmp == NULL)
4168275SEric Cheng 				return (NULL);
4178275SEric Cheng 			freemsg(mp);
4188275SEric Cheng 			mp = newmp;
4198275SEric Cheng 		}
4208275SEric Cheng 
4218275SEric Cheng 		evhp = (struct ether_vlan_header *)mp->b_rptr;
4228275SEric Cheng 
4238275SEric Cheng 		ovbcopy(mp->b_rptr, mp->b_rptr + VLAN_TAGSZ, 2 * ETHERADDRL);
4248275SEric Cheng 		mp->b_rptr += VLAN_TAGSZ;
4258275SEric Cheng 	}
4268275SEric Cheng 	return (mp);
4278275SEric Cheng }
4288275SEric Cheng 
4298275SEric Cheng /*
4308275SEric Cheng  * Strip VLAN tag from each mblk of the chain.
4318275SEric Cheng  */
4328275SEric Cheng mblk_t *
4338275SEric Cheng mac_strip_vlan_tag_chain(mblk_t *mp_chain)
4348275SEric Cheng {
4358275SEric Cheng 	mblk_t *mp, *next_mp, **prev;
4368275SEric Cheng 
4378275SEric Cheng 	mp = mp_chain;
4388275SEric Cheng 	prev = &mp_chain;
4398275SEric Cheng 
4408275SEric Cheng 	while (mp != NULL) {
4418275SEric Cheng 		next_mp = mp->b_next;
4428275SEric Cheng 		mp->b_next = NULL;
4438275SEric Cheng 		if ((mp = mac_strip_vlan_tag(mp)) == NULL) {
4448275SEric Cheng 			freemsgchain(next_mp);
4458275SEric Cheng 			break;
4468275SEric Cheng 		}
4478275SEric Cheng 		*prev = mp;
4488275SEric Cheng 		prev = &mp->b_next;
4498275SEric Cheng 		mp = mp->b_next = next_mp;
4508275SEric Cheng 	}
4518275SEric Cheng 
4528275SEric Cheng 	return (mp_chain);
4538275SEric Cheng }
4548275SEric Cheng 
4558275SEric Cheng /*
4568275SEric Cheng  * Default callback function. Used when the datapath is not yet initialized.
4578275SEric Cheng  */
4588275SEric Cheng /* ARGSUSED */
4598275SEric Cheng void
4608275SEric Cheng mac_pkt_drop(void *arg, mac_resource_handle_t resource, mblk_t *mp,
4618275SEric Cheng     boolean_t loopback)
4628275SEric Cheng {
4638275SEric Cheng 	mblk_t	*mp1 = mp;
4648275SEric Cheng 
4658275SEric Cheng 	while (mp1 != NULL) {
4668275SEric Cheng 		mp1->b_prev = NULL;
4678275SEric Cheng 		mp1->b_queue = NULL;
4688275SEric Cheng 		mp1 = mp1->b_next;
4698275SEric Cheng 	}
4708275SEric Cheng 	freemsgchain(mp);
4718275SEric Cheng }
4728275SEric Cheng 
4738275SEric Cheng /*
4748275SEric Cheng  * Determines the IPv6 header length accounting for all the optional IPv6
4758275SEric Cheng  * headers (hop-by-hop, destination, routing and fragment). The header length
4768275SEric Cheng  * and next header value (a transport header) is captured.
4778275SEric Cheng  *
4788275SEric Cheng  * Returns B_FALSE if all the IP headers are not in the same mblk otherwise
4798275SEric Cheng  * returns B_TRUE.
4808275SEric Cheng  */
4818275SEric Cheng boolean_t
482*11878SVenu.Iyer@Sun.COM mac_ip_hdr_length_v6(ip6_t *ip6h, uint8_t *endptr, uint16_t *hdr_length,
483*11878SVenu.Iyer@Sun.COM     uint8_t *next_hdr, ip6_frag_t **fragp)
4848275SEric Cheng {
4858275SEric Cheng 	uint16_t length;
4868275SEric Cheng 	uint_t	ehdrlen;
4878275SEric Cheng 	uint8_t *whereptr;
4888275SEric Cheng 	uint8_t *nexthdrp;
4898275SEric Cheng 	ip6_dest_t *desthdr;
4908275SEric Cheng 	ip6_rthdr_t *rthdr;
4918275SEric Cheng 	ip6_frag_t *fraghdr;
4928275SEric Cheng 
4938275SEric Cheng 	if (((uchar_t *)ip6h + IPV6_HDR_LEN) > endptr)
4948275SEric Cheng 		return (B_FALSE);
49511042SErik.Nordmark@Sun.COM 	ASSERT(IPH_HDR_VERSION(ip6h) == IPV6_VERSION);
4968275SEric Cheng 	length = IPV6_HDR_LEN;
4978275SEric Cheng 	whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */
4988275SEric Cheng 
499*11878SVenu.Iyer@Sun.COM 	if (fragp != NULL)
500*11878SVenu.Iyer@Sun.COM 		*fragp = NULL;
50111528SBaban.Kenkre@Sun.COM 
5028275SEric Cheng 	nexthdrp = &ip6h->ip6_nxt;
5038275SEric Cheng 	while (whereptr < endptr) {
5048275SEric Cheng 		/* Is there enough left for len + nexthdr? */
5058275SEric Cheng 		if (whereptr + MIN_EHDR_LEN > endptr)
5068275SEric Cheng 			break;
5078275SEric Cheng 
5088275SEric Cheng 		switch (*nexthdrp) {
5098275SEric Cheng 		case IPPROTO_HOPOPTS:
5108275SEric Cheng 		case IPPROTO_DSTOPTS:
5118275SEric Cheng 			/* Assumes the headers are identical for hbh and dst */
5128275SEric Cheng 			desthdr = (ip6_dest_t *)whereptr;
5138275SEric Cheng 			ehdrlen = 8 * (desthdr->ip6d_len + 1);
5148275SEric Cheng 			if ((uchar_t *)desthdr +  ehdrlen > endptr)
5158275SEric Cheng 				return (B_FALSE);
5168275SEric Cheng 			nexthdrp = &desthdr->ip6d_nxt;
5178275SEric Cheng 			break;
5188275SEric Cheng 		case IPPROTO_ROUTING:
5198275SEric Cheng 			rthdr = (ip6_rthdr_t *)whereptr;
5208275SEric Cheng 			ehdrlen =  8 * (rthdr->ip6r_len + 1);
5218275SEric Cheng 			if ((uchar_t *)rthdr +  ehdrlen > endptr)
5228275SEric Cheng 				return (B_FALSE);
5238275SEric Cheng 			nexthdrp = &rthdr->ip6r_nxt;
5248275SEric Cheng 			break;
5258275SEric Cheng 		case IPPROTO_FRAGMENT:
5268275SEric Cheng 			fraghdr = (ip6_frag_t *)whereptr;
5278275SEric Cheng 			ehdrlen = sizeof (ip6_frag_t);
5288275SEric Cheng 			if ((uchar_t *)&fraghdr[1] > endptr)
5298275SEric Cheng 				return (B_FALSE);
5308275SEric Cheng 			nexthdrp = &fraghdr->ip6f_nxt;
531*11878SVenu.Iyer@Sun.COM 			if (fragp != NULL)
532*11878SVenu.Iyer@Sun.COM 				*fragp = fraghdr;
5338275SEric Cheng 			break;
5348275SEric Cheng 		case IPPROTO_NONE:
5358275SEric Cheng 			/* No next header means we're finished */
5368275SEric Cheng 		default:
5378275SEric Cheng 			*hdr_length = length;
5388275SEric Cheng 			*next_hdr = *nexthdrp;
5398275SEric Cheng 			return (B_TRUE);
5408275SEric Cheng 		}
5418275SEric Cheng 		length += ehdrlen;
5428275SEric Cheng 		whereptr += ehdrlen;
5438275SEric Cheng 		*hdr_length = length;
5448275SEric Cheng 		*next_hdr = *nexthdrp;
5458275SEric Cheng 	}
5468275SEric Cheng 	switch (*nexthdrp) {
5478275SEric Cheng 	case IPPROTO_HOPOPTS:
5488275SEric Cheng 	case IPPROTO_DSTOPTS:
5498275SEric Cheng 	case IPPROTO_ROUTING:
5508275SEric Cheng 	case IPPROTO_FRAGMENT:
5518275SEric Cheng 		/*
5528275SEric Cheng 		 * If any know extension headers are still to be processed,
5538275SEric Cheng 		 * the packet's malformed (or at least all the IP header(s) are
5548275SEric Cheng 		 * not in the same mblk - and that should never happen.
5558275SEric Cheng 		 */
5568275SEric Cheng 		return (B_FALSE);
5578275SEric Cheng 
5588275SEric Cheng 	default:
5598275SEric Cheng 		/*
5608275SEric Cheng 		 * If we get here, we know that all of the IP headers were in
5618275SEric Cheng 		 * the same mblk, even if the ULP header is in the next mblk.
5628275SEric Cheng 		 */
5638275SEric Cheng 		*hdr_length = length;
5648275SEric Cheng 		*next_hdr = *nexthdrp;
5658275SEric Cheng 		return (B_TRUE);
5668275SEric Cheng 	}
5678275SEric Cheng }
5688275SEric Cheng 
569*11878SVenu.Iyer@Sun.COM /*
570*11878SVenu.Iyer@Sun.COM  * The following set of routines are there to take care of interrupt
571*11878SVenu.Iyer@Sun.COM  * re-targeting for legacy (fixed) interrupts. Some older versions
572*11878SVenu.Iyer@Sun.COM  * of the popular NICs like e1000g do not support MSI-X interrupts
573*11878SVenu.Iyer@Sun.COM  * and they reserve fixed interrupts for RX/TX rings. To re-target
574*11878SVenu.Iyer@Sun.COM  * these interrupts, PCITOOL ioctls need to be used.
575*11878SVenu.Iyer@Sun.COM  */
5768275SEric Cheng typedef struct mac_dladm_intr {
5778275SEric Cheng 	int	ino;
5788275SEric Cheng 	int	cpu_id;
5798275SEric Cheng 	char	driver_path[MAXPATHLEN];
5808275SEric Cheng 	char	nexus_path[MAXPATHLEN];
5818275SEric Cheng } mac_dladm_intr_t;
5828275SEric Cheng 
5838275SEric Cheng /* Bind the interrupt to cpu_num */
5848275SEric Cheng static int
5858275SEric Cheng mac_set_intr(ldi_handle_t lh, processorid_t cpu_num, int ino)
5868275SEric Cheng {
5878275SEric Cheng 	pcitool_intr_set_t	iset;
5888275SEric Cheng 	int			err;
5898275SEric Cheng 
5908275SEric Cheng 	iset.ino = ino;
5918275SEric Cheng 	iset.cpu_id = cpu_num;
5928275SEric Cheng 	iset.user_version = PCITOOL_VERSION;
5938275SEric Cheng 	err = ldi_ioctl(lh, PCITOOL_DEVICE_SET_INTR, (intptr_t)&iset, FKIOCTL,
5948275SEric Cheng 	    kcred, NULL);
5958275SEric Cheng 
5968275SEric Cheng 	return (err);
5978275SEric Cheng }
5988275SEric Cheng 
5998275SEric Cheng /*
6008275SEric Cheng  * Search interrupt information. iget is filled in with the info to search
6018275SEric Cheng  */
6028275SEric Cheng static boolean_t
6038275SEric Cheng mac_search_intrinfo(pcitool_intr_get_t *iget_p, mac_dladm_intr_t *dln)
6048275SEric Cheng {
6058275SEric Cheng 	int	i;
6068275SEric Cheng 	char	driver_path[2 * MAXPATHLEN];
6078275SEric Cheng 
6088275SEric Cheng 	for (i = 0; i < iget_p->num_devs; i++) {
6098275SEric Cheng 		(void) strlcpy(driver_path, iget_p->dev[i].path, MAXPATHLEN);
6108275SEric Cheng 		(void) snprintf(&driver_path[strlen(driver_path)], MAXPATHLEN,
6118275SEric Cheng 		    ":%s%d", iget_p->dev[i].driver_name,
6128275SEric Cheng 		    iget_p->dev[i].dev_inst);
6138275SEric Cheng 		/* Match the device path for the device path */
6148275SEric Cheng 		if (strcmp(driver_path, dln->driver_path) == 0) {
6158275SEric Cheng 			dln->ino = iget_p->ino;
6168275SEric Cheng 			dln->cpu_id = iget_p->cpu_id;
6178275SEric Cheng 			return (B_TRUE);
6188275SEric Cheng 		}
6198275SEric Cheng 	}
6208275SEric Cheng 	return (B_FALSE);
6218275SEric Cheng }
6228275SEric Cheng 
6238275SEric Cheng /*
6248275SEric Cheng  * Get information about ino, i.e. if this is the interrupt for our
6258275SEric Cheng  * device and where it is bound etc.
6268275SEric Cheng  */
6278275SEric Cheng static boolean_t
6288275SEric Cheng mac_get_single_intr(ldi_handle_t lh, int ino, mac_dladm_intr_t *dln)
6298275SEric Cheng {
6308275SEric Cheng 	pcitool_intr_get_t	*iget_p;
6318275SEric Cheng 	int			ipsz;
6328275SEric Cheng 	int			nipsz;
6338275SEric Cheng 	int			err;
6348275SEric Cheng 	uint8_t			inum;
6358275SEric Cheng 
6368275SEric Cheng 	/*
6378275SEric Cheng 	 * Check if SLEEP is OK, i.e if could come here in response to
6388275SEric Cheng 	 * changing the fanout due to some callback from the driver, say
6398275SEric Cheng 	 * link speed changes.
6408275SEric Cheng 	 */
6418275SEric Cheng 	ipsz = PCITOOL_IGET_SIZE(0);
6428275SEric Cheng 	iget_p = kmem_zalloc(ipsz, KM_SLEEP);
6438275SEric Cheng 
6448275SEric Cheng 	iget_p->num_devs_ret = 0;
6458275SEric Cheng 	iget_p->user_version = PCITOOL_VERSION;
6468275SEric Cheng 	iget_p->ino = ino;
6478275SEric Cheng 
6488275SEric Cheng 	err = ldi_ioctl(lh, PCITOOL_DEVICE_GET_INTR, (intptr_t)iget_p,
6498275SEric Cheng 	    FKIOCTL, kcred, NULL);
6508275SEric Cheng 	if (err != 0) {
6518275SEric Cheng 		kmem_free(iget_p, ipsz);
6528275SEric Cheng 		return (B_FALSE);
6538275SEric Cheng 	}
6548275SEric Cheng 	if (iget_p->num_devs == 0) {
6558275SEric Cheng 		kmem_free(iget_p, ipsz);
6568275SEric Cheng 		return (B_FALSE);
6578275SEric Cheng 	}
6588275SEric Cheng 	inum = iget_p->num_devs;
6598275SEric Cheng 	if (iget_p->num_devs_ret < iget_p->num_devs) {
6608275SEric Cheng 		/* Reallocate */
6618275SEric Cheng 		nipsz = PCITOOL_IGET_SIZE(iget_p->num_devs);
6628275SEric Cheng 
6638275SEric Cheng 		kmem_free(iget_p, ipsz);
6648275SEric Cheng 		ipsz = nipsz;
6658275SEric Cheng 		iget_p = kmem_zalloc(ipsz, KM_SLEEP);
6668275SEric Cheng 
6678275SEric Cheng 		iget_p->num_devs_ret = inum;
6688275SEric Cheng 		iget_p->ino = ino;
6698275SEric Cheng 		iget_p->user_version = PCITOOL_VERSION;
6708275SEric Cheng 		err = ldi_ioctl(lh, PCITOOL_DEVICE_GET_INTR, (intptr_t)iget_p,
6718275SEric Cheng 		    FKIOCTL, kcred, NULL);
6728275SEric Cheng 		if (err != 0) {
6738275SEric Cheng 			kmem_free(iget_p, ipsz);
6748275SEric Cheng 			return (B_FALSE);
6758275SEric Cheng 		}
6768275SEric Cheng 		/* defensive */
6778275SEric Cheng 		if (iget_p->num_devs != iget_p->num_devs_ret) {
6788275SEric Cheng 			kmem_free(iget_p, ipsz);
6798275SEric Cheng 			return (B_FALSE);
6808275SEric Cheng 		}
6818275SEric Cheng 	}
6828275SEric Cheng 
6838275SEric Cheng 	if (mac_search_intrinfo(iget_p, dln)) {
6848275SEric Cheng 		kmem_free(iget_p, ipsz);
6858275SEric Cheng 		return (B_TRUE);
6868275SEric Cheng 	}
6878275SEric Cheng 	kmem_free(iget_p, ipsz);
6888275SEric Cheng 	return (B_FALSE);
6898275SEric Cheng }
6908275SEric Cheng 
6918275SEric Cheng /*
6928275SEric Cheng  * Get the interrupts and check each one to see if it is for our device.
6938275SEric Cheng  */
6948275SEric Cheng static int
6958275SEric Cheng mac_validate_intr(ldi_handle_t lh, mac_dladm_intr_t *dln, processorid_t cpuid)
6968275SEric Cheng {
6978275SEric Cheng 	pcitool_intr_info_t	intr_info;
6988275SEric Cheng 	int			err;
6998275SEric Cheng 	int			ino;
7008275SEric Cheng 
7018275SEric Cheng 	err = ldi_ioctl(lh, PCITOOL_SYSTEM_INTR_INFO, (intptr_t)&intr_info,
7028275SEric Cheng 	    FKIOCTL, kcred, NULL);
7038275SEric Cheng 	if (err != 0)
7048275SEric Cheng 		return (-1);
7058275SEric Cheng 
7068275SEric Cheng 	for (ino = 0; ino < intr_info.num_intr; ino++) {
7078275SEric Cheng 		if (mac_get_single_intr(lh, ino, dln)) {
7088275SEric Cheng 			if (dln->cpu_id == cpuid)
7098275SEric Cheng 				return (0);
7108275SEric Cheng 			return (1);
7118275SEric Cheng 		}
7128275SEric Cheng 	}
7138275SEric Cheng 	return (-1);
7148275SEric Cheng }
7158275SEric Cheng 
7168275SEric Cheng /*
7178275SEric Cheng  * Obtain the nexus parent node info. for mdip.
7188275SEric Cheng  */
7198275SEric Cheng static dev_info_t *
7208275SEric Cheng mac_get_nexus_node(dev_info_t *mdip, mac_dladm_intr_t *dln)
7218275SEric Cheng {
7228275SEric Cheng 	struct dev_info		*tdip = (struct dev_info *)mdip;
7238275SEric Cheng 	struct ddi_minor_data	*minordata;
7248275SEric Cheng 	int			circ;
7258275SEric Cheng 	dev_info_t		*pdip;
7268275SEric Cheng 	char			pathname[MAXPATHLEN];
7278275SEric Cheng 
7288275SEric Cheng 	while (tdip != NULL) {
7299359SEric Cheng 		/*
7309359SEric Cheng 		 * The netboot code could call this function while walking the
7319359SEric Cheng 		 * device tree so we need to use ndi_devi_tryenter() here to
7329359SEric Cheng 		 * avoid deadlock.
7339359SEric Cheng 		 */
7349359SEric Cheng 		if (ndi_devi_tryenter((dev_info_t *)tdip, &circ) == 0)
7359359SEric Cheng 			break;
7369359SEric Cheng 
7378275SEric Cheng 		for (minordata = tdip->devi_minor; minordata != NULL;
7388275SEric Cheng 		    minordata = minordata->next) {
7398275SEric Cheng 			if (strncmp(minordata->ddm_node_type, DDI_NT_INTRCTL,
7408275SEric Cheng 			    strlen(DDI_NT_INTRCTL)) == 0) {
7418275SEric Cheng 				pdip = minordata->dip;
7428275SEric Cheng 				(void) ddi_pathname(pdip, pathname);
7438275SEric Cheng 				(void) snprintf(dln->nexus_path, MAXPATHLEN,
7448275SEric Cheng 				    "/devices%s:intr", pathname);
7458275SEric Cheng 				(void) ddi_pathname_minor(minordata, pathname);
7468275SEric Cheng 				ndi_devi_exit((dev_info_t *)tdip, circ);
7478275SEric Cheng 				return (pdip);
7488275SEric Cheng 			}
7498275SEric Cheng 		}
7508275SEric Cheng 		ndi_devi_exit((dev_info_t *)tdip, circ);
7518275SEric Cheng 		tdip = tdip->devi_parent;
7528275SEric Cheng 	}
7538275SEric Cheng 	return (NULL);
7548275SEric Cheng }
7558275SEric Cheng 
7568275SEric Cheng /*
7578275SEric Cheng  * For a primary MAC client, if the user has set a list or CPUs or
7588275SEric Cheng  * we have obtained it implicitly, we try to retarget the interrupt
7598275SEric Cheng  * for that device on one of the CPUs in the list.
7608275SEric Cheng  * We assign the interrupt to the same CPU as the poll thread.
7618275SEric Cheng  */
7628275SEric Cheng static boolean_t
7638275SEric Cheng mac_check_interrupt_binding(dev_info_t *mdip, int32_t cpuid)
7648275SEric Cheng {
7658275SEric Cheng 	ldi_handle_t		lh = NULL;
7668275SEric Cheng 	ldi_ident_t		li = NULL;
7678275SEric Cheng 	int			err;
7688275SEric Cheng 	int			ret;
7698275SEric Cheng 	mac_dladm_intr_t	dln;
7708275SEric Cheng 	dev_info_t		*dip;
7718275SEric Cheng 	struct ddi_minor_data	*minordata;
7728275SEric Cheng 
7738275SEric Cheng 	dln.nexus_path[0] = '\0';
7748275SEric Cheng 	dln.driver_path[0] = '\0';
7758275SEric Cheng 
7768275SEric Cheng 	minordata = ((struct dev_info *)mdip)->devi_minor;
7778275SEric Cheng 	while (minordata != NULL) {
7788275SEric Cheng 		if (minordata->type == DDM_MINOR)
7798275SEric Cheng 			break;
7808275SEric Cheng 		minordata = minordata->next;
7818275SEric Cheng 	}
7828275SEric Cheng 	if (minordata == NULL)
7838275SEric Cheng 		return (B_FALSE);
7848275SEric Cheng 
7858275SEric Cheng 	(void) ddi_pathname_minor(minordata, dln.driver_path);
7868275SEric Cheng 
7878275SEric Cheng 	dip = mac_get_nexus_node(mdip, &dln);
7888275SEric Cheng 	/* defensive */
7898275SEric Cheng 	if (dip == NULL)
7908275SEric Cheng 		return (B_FALSE);
7918275SEric Cheng 
7928275SEric Cheng 	err = ldi_ident_from_major(ddi_driver_major(dip), &li);
7938275SEric Cheng 	if (err != 0)
7948275SEric Cheng 		return (B_FALSE);
7958275SEric Cheng 
7968275SEric Cheng 	err = ldi_open_by_name(dln.nexus_path, FREAD|FWRITE, kcred, &lh, li);
7978275SEric Cheng 	if (err != 0)
7988275SEric Cheng 		return (B_FALSE);
7998275SEric Cheng 
8008275SEric Cheng 	ret = mac_validate_intr(lh, &dln, cpuid);
8018275SEric Cheng 	if (ret < 0) {
8028275SEric Cheng 		(void) ldi_close(lh, FREAD|FWRITE, kcred);
8038275SEric Cheng 		return (B_FALSE);
8048275SEric Cheng 	}
8058275SEric Cheng 	/* cmn_note? */
8068275SEric Cheng 	if (ret != 0)
8078275SEric Cheng 		if ((err = (mac_set_intr(lh, cpuid, dln.ino))) != 0) {
8088275SEric Cheng 			(void) ldi_close(lh, FREAD|FWRITE, kcred);
8098275SEric Cheng 			return (B_FALSE);
8108275SEric Cheng 		}
8118275SEric Cheng 	(void) ldi_close(lh, FREAD|FWRITE, kcred);
8128275SEric Cheng 	return (B_TRUE);
8138275SEric Cheng }
8148275SEric Cheng 
8158275SEric Cheng void
8168275SEric Cheng mac_client_set_intr_cpu(void *arg, mac_client_handle_t mch, int32_t cpuid)
8178275SEric Cheng {
8188275SEric Cheng 	dev_info_t		*mdip = (dev_info_t *)arg;
8198275SEric Cheng 	mac_client_impl_t	*mcip = (mac_client_impl_t *)mch;
8208275SEric Cheng 	mac_resource_props_t	*mrp;
8218275SEric Cheng 	mac_perim_handle_t	mph;
822*11878SVenu.Iyer@Sun.COM 	flow_entry_t		*flent = mcip->mci_flent;
823*11878SVenu.Iyer@Sun.COM 	mac_soft_ring_set_t	*rx_srs;
824*11878SVenu.Iyer@Sun.COM 	mac_cpus_t		*srs_cpu;
8258275SEric Cheng 
826*11878SVenu.Iyer@Sun.COM 	if (!mac_check_interrupt_binding(mdip, cpuid))
827*11878SVenu.Iyer@Sun.COM 		cpuid = -1;
8288275SEric Cheng 	mac_perim_enter_by_mh((mac_handle_t)mcip->mci_mip, &mph);
8298275SEric Cheng 	mrp = MCIP_RESOURCE_PROPS(mcip);
830*11878SVenu.Iyer@Sun.COM 	mrp->mrp_rx_intr_cpu = cpuid;
831*11878SVenu.Iyer@Sun.COM 	if (flent != NULL && flent->fe_rx_srs_cnt == 2) {
832*11878SVenu.Iyer@Sun.COM 		rx_srs = flent->fe_rx_srs[1];
833*11878SVenu.Iyer@Sun.COM 		srs_cpu = &rx_srs->srs_cpu;
834*11878SVenu.Iyer@Sun.COM 		srs_cpu->mc_rx_intr_cpu = cpuid;
835*11878SVenu.Iyer@Sun.COM 	}
8368275SEric Cheng 	mac_perim_exit(mph);
8378275SEric Cheng }
8388275SEric Cheng 
8398275SEric Cheng int32_t
8408275SEric Cheng mac_client_intr_cpu(mac_client_handle_t mch)
8418275SEric Cheng {
8428275SEric Cheng 	mac_client_impl_t	*mcip = (mac_client_impl_t *)mch;
8438275SEric Cheng 	mac_cpus_t		*srs_cpu;
8448275SEric Cheng 	mac_soft_ring_set_t	*rx_srs;
8458275SEric Cheng 	flow_entry_t		*flent = mcip->mci_flent;
8468275SEric Cheng 	mac_resource_props_t	*mrp = MCIP_RESOURCE_PROPS(mcip);
847*11878SVenu.Iyer@Sun.COM 	mac_ring_t		*ring;
848*11878SVenu.Iyer@Sun.COM 	mac_intr_t		*mintr;
8498275SEric Cheng 
8508275SEric Cheng 	/*
8518275SEric Cheng 	 * Check if we need to retarget the interrupt. We do this only
8528275SEric Cheng 	 * for the primary MAC client. We do this if we have the only
853*11878SVenu.Iyer@Sun.COM 	 * exclusive ring in the group.
8548275SEric Cheng 	 */
8558275SEric Cheng 	if (mac_is_primary_client(mcip) && flent->fe_rx_srs_cnt == 2) {
8568275SEric Cheng 		rx_srs = flent->fe_rx_srs[1];
8578275SEric Cheng 		srs_cpu = &rx_srs->srs_cpu;
858*11878SVenu.Iyer@Sun.COM 		ring = rx_srs->srs_ring;
859*11878SVenu.Iyer@Sun.COM 		mintr = &ring->mr_info.mri_intr;
860*11878SVenu.Iyer@Sun.COM 		/*
861*11878SVenu.Iyer@Sun.COM 		 * If ddi_handle is present or the poll CPU is
862*11878SVenu.Iyer@Sun.COM 		 * already bound to the interrupt CPU, return -1.
863*11878SVenu.Iyer@Sun.COM 		 */
864*11878SVenu.Iyer@Sun.COM 		if (mintr->mi_ddi_handle != NULL ||
865*11878SVenu.Iyer@Sun.COM 		    ((mrp->mrp_ncpus != 0) &&
866*11878SVenu.Iyer@Sun.COM 		    (mrp->mrp_rx_intr_cpu == srs_cpu->mc_rx_pollid))) {
8678275SEric Cheng 			return (-1);
868*11878SVenu.Iyer@Sun.COM 		}
869*11878SVenu.Iyer@Sun.COM 		return (srs_cpu->mc_rx_pollid);
8708275SEric Cheng 	}
8718275SEric Cheng 	return (-1);
8728275SEric Cheng }
8738275SEric Cheng 
8748275SEric Cheng void *
8758275SEric Cheng mac_get_devinfo(mac_handle_t mh)
8768275SEric Cheng {
8778275SEric Cheng 	mac_impl_t	*mip = (mac_impl_t *)mh;
8788275SEric Cheng 
8798275SEric Cheng 	return ((void *)mip->mi_dip);
8808275SEric Cheng }
8818833SVenu.Iyer@Sun.COM 
88211528SBaban.Kenkre@Sun.COM #define	PKT_HASH_2BYTES(x) ((x)[0] ^ (x)[1])
8838833SVenu.Iyer@Sun.COM #define	PKT_HASH_4BYTES(x) ((x)[0] ^ (x)[1] ^ (x)[2] ^ (x)[3])
8848833SVenu.Iyer@Sun.COM #define	PKT_HASH_MAC(x) ((x)[0] ^ (x)[1] ^ (x)[2] ^ (x)[3] ^ (x)[4] ^ (x)[5])
8858833SVenu.Iyer@Sun.COM 
8868833SVenu.Iyer@Sun.COM uint64_t
8878833SVenu.Iyer@Sun.COM mac_pkt_hash(uint_t media, mblk_t *mp, uint8_t policy, boolean_t is_outbound)
8888833SVenu.Iyer@Sun.COM {
8898833SVenu.Iyer@Sun.COM 	struct ether_header *ehp;
8908833SVenu.Iyer@Sun.COM 	uint64_t hash = 0;
8918833SVenu.Iyer@Sun.COM 	uint16_t sap;
8928833SVenu.Iyer@Sun.COM 	uint_t skip_len;
8938833SVenu.Iyer@Sun.COM 	uint8_t proto;
89411528SBaban.Kenkre@Sun.COM 	boolean_t ip_fragmented;
8958833SVenu.Iyer@Sun.COM 
8968833SVenu.Iyer@Sun.COM 	/*
8978833SVenu.Iyer@Sun.COM 	 * We may want to have one of these per MAC type plugin in the
8988833SVenu.Iyer@Sun.COM 	 * future. For now supports only ethernet.
8998833SVenu.Iyer@Sun.COM 	 */
9008833SVenu.Iyer@Sun.COM 	if (media != DL_ETHER)
9018833SVenu.Iyer@Sun.COM 		return (0L);
9028833SVenu.Iyer@Sun.COM 
9038833SVenu.Iyer@Sun.COM 	/* for now we support only outbound packets */
9048833SVenu.Iyer@Sun.COM 	ASSERT(is_outbound);
9058833SVenu.Iyer@Sun.COM 	ASSERT(IS_P2ALIGNED(mp->b_rptr, sizeof (uint16_t)));
9068833SVenu.Iyer@Sun.COM 	ASSERT(MBLKL(mp) >= sizeof (struct ether_header));
9078833SVenu.Iyer@Sun.COM 
9088833SVenu.Iyer@Sun.COM 	/* compute L2 hash */
9098833SVenu.Iyer@Sun.COM 
9108833SVenu.Iyer@Sun.COM 	ehp = (struct ether_header *)mp->b_rptr;
9118833SVenu.Iyer@Sun.COM 
9128833SVenu.Iyer@Sun.COM 	if ((policy & MAC_PKT_HASH_L2) != 0) {
9138833SVenu.Iyer@Sun.COM 		uchar_t *mac_src = ehp->ether_shost.ether_addr_octet;
9148833SVenu.Iyer@Sun.COM 		uchar_t *mac_dst = ehp->ether_dhost.ether_addr_octet;
9158833SVenu.Iyer@Sun.COM 		hash = PKT_HASH_MAC(mac_src) ^ PKT_HASH_MAC(mac_dst);
9168833SVenu.Iyer@Sun.COM 		policy &= ~MAC_PKT_HASH_L2;
9178833SVenu.Iyer@Sun.COM 	}
9188833SVenu.Iyer@Sun.COM 
9198833SVenu.Iyer@Sun.COM 	if (policy == 0)
9208833SVenu.Iyer@Sun.COM 		goto done;
9218833SVenu.Iyer@Sun.COM 
9228833SVenu.Iyer@Sun.COM 	/* skip ethernet header */
9238833SVenu.Iyer@Sun.COM 
9248833SVenu.Iyer@Sun.COM 	sap = ntohs(ehp->ether_type);
9258833SVenu.Iyer@Sun.COM 	if (sap == ETHERTYPE_VLAN) {
9268833SVenu.Iyer@Sun.COM 		struct ether_vlan_header *evhp;
9278833SVenu.Iyer@Sun.COM 		mblk_t *newmp = NULL;
9288833SVenu.Iyer@Sun.COM 
9298833SVenu.Iyer@Sun.COM 		skip_len = sizeof (struct ether_vlan_header);
9308833SVenu.Iyer@Sun.COM 		if (MBLKL(mp) < skip_len) {
9318833SVenu.Iyer@Sun.COM 			/* the vlan tag is the payload, pull up first */
9328833SVenu.Iyer@Sun.COM 			newmp = msgpullup(mp, -1);
9338833SVenu.Iyer@Sun.COM 			if ((newmp == NULL) || (MBLKL(newmp) < skip_len)) {
9348833SVenu.Iyer@Sun.COM 				goto done;
9358833SVenu.Iyer@Sun.COM 			}
9368833SVenu.Iyer@Sun.COM 			evhp = (struct ether_vlan_header *)newmp->b_rptr;
9378833SVenu.Iyer@Sun.COM 		} else {
9388833SVenu.Iyer@Sun.COM 			evhp = (struct ether_vlan_header *)mp->b_rptr;
9398833SVenu.Iyer@Sun.COM 		}
9408833SVenu.Iyer@Sun.COM 
9418833SVenu.Iyer@Sun.COM 		sap = ntohs(evhp->ether_type);
9428833SVenu.Iyer@Sun.COM 		freemsg(newmp);
9438833SVenu.Iyer@Sun.COM 	} else {
9448833SVenu.Iyer@Sun.COM 		skip_len = sizeof (struct ether_header);
9458833SVenu.Iyer@Sun.COM 	}
9468833SVenu.Iyer@Sun.COM 
9478833SVenu.Iyer@Sun.COM 	/* if ethernet header is in its own mblk, skip it */
9488833SVenu.Iyer@Sun.COM 	if (MBLKL(mp) <= skip_len) {
9498833SVenu.Iyer@Sun.COM 		skip_len -= MBLKL(mp);
9508833SVenu.Iyer@Sun.COM 		mp = mp->b_cont;
9518833SVenu.Iyer@Sun.COM 		if (mp == NULL)
9528833SVenu.Iyer@Sun.COM 			goto done;
9538833SVenu.Iyer@Sun.COM 	}
9548833SVenu.Iyer@Sun.COM 
9558833SVenu.Iyer@Sun.COM 	sap = (sap < ETHERTYPE_802_MIN) ? 0 : sap;
9568833SVenu.Iyer@Sun.COM 
9578833SVenu.Iyer@Sun.COM 	/* compute IP src/dst addresses hash and skip IPv{4,6} header */
9588833SVenu.Iyer@Sun.COM 
9598833SVenu.Iyer@Sun.COM 	switch (sap) {
9608833SVenu.Iyer@Sun.COM 	case ETHERTYPE_IP: {
9618833SVenu.Iyer@Sun.COM 		ipha_t *iphp;
9628833SVenu.Iyer@Sun.COM 
9638833SVenu.Iyer@Sun.COM 		/*
9648833SVenu.Iyer@Sun.COM 		 * If the header is not aligned or the header doesn't fit
9658833SVenu.Iyer@Sun.COM 		 * in the mblk, bail now. Note that this may cause packets
9668833SVenu.Iyer@Sun.COM 		 * reordering.
9678833SVenu.Iyer@Sun.COM 		 */
9688833SVenu.Iyer@Sun.COM 		iphp = (ipha_t *)(mp->b_rptr + skip_len);
9698833SVenu.Iyer@Sun.COM 		if (((unsigned char *)iphp + sizeof (ipha_t) > mp->b_wptr) ||
9708833SVenu.Iyer@Sun.COM 		    !OK_32PTR((char *)iphp))
9718833SVenu.Iyer@Sun.COM 			goto done;
9728833SVenu.Iyer@Sun.COM 
9738833SVenu.Iyer@Sun.COM 		proto = iphp->ipha_protocol;
9748833SVenu.Iyer@Sun.COM 		skip_len += IPH_HDR_LENGTH(iphp);
9758833SVenu.Iyer@Sun.COM 
97611528SBaban.Kenkre@Sun.COM 		/* Check if the packet is fragmented. */
97711528SBaban.Kenkre@Sun.COM 		ip_fragmented = ntohs(iphp->ipha_fragment_offset_and_flags) &
97811528SBaban.Kenkre@Sun.COM 		    IPH_OFFSET;
97911528SBaban.Kenkre@Sun.COM 
98011528SBaban.Kenkre@Sun.COM 		/*
98111528SBaban.Kenkre@Sun.COM 		 * For fragmented packets, use addresses in addition to
98211528SBaban.Kenkre@Sun.COM 		 * the frag_id to generate the hash inorder to get
98311528SBaban.Kenkre@Sun.COM 		 * better distribution.
98411528SBaban.Kenkre@Sun.COM 		 */
98511528SBaban.Kenkre@Sun.COM 		if (ip_fragmented || (policy & MAC_PKT_HASH_L3) != 0) {
9868833SVenu.Iyer@Sun.COM 			uint8_t *ip_src = (uint8_t *)&(iphp->ipha_src);
9878833SVenu.Iyer@Sun.COM 			uint8_t *ip_dst = (uint8_t *)&(iphp->ipha_dst);
9888833SVenu.Iyer@Sun.COM 
9898833SVenu.Iyer@Sun.COM 			hash ^= (PKT_HASH_4BYTES(ip_src) ^
9908833SVenu.Iyer@Sun.COM 			    PKT_HASH_4BYTES(ip_dst));
9918833SVenu.Iyer@Sun.COM 			policy &= ~MAC_PKT_HASH_L3;
9928833SVenu.Iyer@Sun.COM 		}
99311528SBaban.Kenkre@Sun.COM 
99411528SBaban.Kenkre@Sun.COM 		if (ip_fragmented) {
99511528SBaban.Kenkre@Sun.COM 			uint8_t *identp = (uint8_t *)&iphp->ipha_ident;
99611528SBaban.Kenkre@Sun.COM 			hash ^= PKT_HASH_2BYTES(identp);
99711528SBaban.Kenkre@Sun.COM 			goto done;
99811528SBaban.Kenkre@Sun.COM 		}
9998833SVenu.Iyer@Sun.COM 		break;
10008833SVenu.Iyer@Sun.COM 	}
10018833SVenu.Iyer@Sun.COM 	case ETHERTYPE_IPV6: {
10028833SVenu.Iyer@Sun.COM 		ip6_t *ip6hp;
1003*11878SVenu.Iyer@Sun.COM 		ip6_frag_t *frag = NULL;
10048833SVenu.Iyer@Sun.COM 		uint16_t hdr_length;
10058833SVenu.Iyer@Sun.COM 
10068833SVenu.Iyer@Sun.COM 		/*
10078833SVenu.Iyer@Sun.COM 		 * If the header is not aligned or the header doesn't fit
10088833SVenu.Iyer@Sun.COM 		 * in the mblk, bail now. Note that this may cause packets
10098833SVenu.Iyer@Sun.COM 		 * reordering.
10108833SVenu.Iyer@Sun.COM 		 */
10118833SVenu.Iyer@Sun.COM 
10128833SVenu.Iyer@Sun.COM 		ip6hp = (ip6_t *)(mp->b_rptr + skip_len);
10138833SVenu.Iyer@Sun.COM 		if (((unsigned char *)ip6hp + IPV6_HDR_LEN > mp->b_wptr) ||
10148833SVenu.Iyer@Sun.COM 		    !OK_32PTR((char *)ip6hp))
10158833SVenu.Iyer@Sun.COM 			goto done;
10168833SVenu.Iyer@Sun.COM 
1017*11878SVenu.Iyer@Sun.COM 		if (!mac_ip_hdr_length_v6(ip6hp, mp->b_wptr, &hdr_length,
1018*11878SVenu.Iyer@Sun.COM 		    &proto, &frag))
10198833SVenu.Iyer@Sun.COM 			goto done;
10208833SVenu.Iyer@Sun.COM 		skip_len += hdr_length;
10218833SVenu.Iyer@Sun.COM 
102211528SBaban.Kenkre@Sun.COM 		/*
102311528SBaban.Kenkre@Sun.COM 		 * For fragmented packets, use addresses in addition to
102411528SBaban.Kenkre@Sun.COM 		 * the frag_id to generate the hash inorder to get
102511528SBaban.Kenkre@Sun.COM 		 * better distribution.
102611528SBaban.Kenkre@Sun.COM 		 */
1027*11878SVenu.Iyer@Sun.COM 		if (frag != NULL || (policy & MAC_PKT_HASH_L3) != 0) {
10288833SVenu.Iyer@Sun.COM 			uint8_t *ip_src = &(ip6hp->ip6_src.s6_addr8[12]);
10298833SVenu.Iyer@Sun.COM 			uint8_t *ip_dst = &(ip6hp->ip6_dst.s6_addr8[12]);
10308833SVenu.Iyer@Sun.COM 
10318833SVenu.Iyer@Sun.COM 			hash ^= (PKT_HASH_4BYTES(ip_src) ^
10328833SVenu.Iyer@Sun.COM 			    PKT_HASH_4BYTES(ip_dst));
10338833SVenu.Iyer@Sun.COM 			policy &= ~MAC_PKT_HASH_L3;
10348833SVenu.Iyer@Sun.COM 		}
103511528SBaban.Kenkre@Sun.COM 
1036*11878SVenu.Iyer@Sun.COM 		if (frag != NULL) {
1037*11878SVenu.Iyer@Sun.COM 			uint8_t *identp = (uint8_t *)&frag->ip6f_ident;
103811528SBaban.Kenkre@Sun.COM 			hash ^= PKT_HASH_4BYTES(identp);
103911528SBaban.Kenkre@Sun.COM 			goto done;
104011528SBaban.Kenkre@Sun.COM 		}
10418833SVenu.Iyer@Sun.COM 		break;
10428833SVenu.Iyer@Sun.COM 	}
10438833SVenu.Iyer@Sun.COM 	default:
10448833SVenu.Iyer@Sun.COM 		goto done;
10458833SVenu.Iyer@Sun.COM 	}
10468833SVenu.Iyer@Sun.COM 
10478833SVenu.Iyer@Sun.COM 	if (policy == 0)
10488833SVenu.Iyer@Sun.COM 		goto done;
10498833SVenu.Iyer@Sun.COM 
10508833SVenu.Iyer@Sun.COM 	/* if ip header is in its own mblk, skip it */
10518833SVenu.Iyer@Sun.COM 	if (MBLKL(mp) <= skip_len) {
10528833SVenu.Iyer@Sun.COM 		skip_len -= MBLKL(mp);
10538833SVenu.Iyer@Sun.COM 		mp = mp->b_cont;
10548833SVenu.Iyer@Sun.COM 		if (mp == NULL)
10558833SVenu.Iyer@Sun.COM 			goto done;
10568833SVenu.Iyer@Sun.COM 	}
10578833SVenu.Iyer@Sun.COM 
10588833SVenu.Iyer@Sun.COM 	/* parse ULP header */
10598833SVenu.Iyer@Sun.COM again:
10608833SVenu.Iyer@Sun.COM 	switch (proto) {
10618833SVenu.Iyer@Sun.COM 	case IPPROTO_TCP:
10628833SVenu.Iyer@Sun.COM 	case IPPROTO_UDP:
10638833SVenu.Iyer@Sun.COM 	case IPPROTO_ESP:
10648833SVenu.Iyer@Sun.COM 	case IPPROTO_SCTP:
10658833SVenu.Iyer@Sun.COM 		/*
10668833SVenu.Iyer@Sun.COM 		 * These Internet Protocols are intentionally designed
10678833SVenu.Iyer@Sun.COM 		 * for hashing from the git-go.  Port numbers are in the first
10688833SVenu.Iyer@Sun.COM 		 * word for transports, SPI is first for ESP.
10698833SVenu.Iyer@Sun.COM 		 */
10708833SVenu.Iyer@Sun.COM 		if (mp->b_rptr + skip_len + 4 > mp->b_wptr)
10718833SVenu.Iyer@Sun.COM 			goto done;
10728833SVenu.Iyer@Sun.COM 		hash ^= PKT_HASH_4BYTES((mp->b_rptr + skip_len));
10738833SVenu.Iyer@Sun.COM 		break;
10748833SVenu.Iyer@Sun.COM 
10758833SVenu.Iyer@Sun.COM 	case IPPROTO_AH: {
10768833SVenu.Iyer@Sun.COM 		ah_t *ah = (ah_t *)(mp->b_rptr + skip_len);
10778833SVenu.Iyer@Sun.COM 		uint_t ah_length = AH_TOTAL_LEN(ah);
10788833SVenu.Iyer@Sun.COM 
10798833SVenu.Iyer@Sun.COM 		if ((unsigned char *)ah + sizeof (ah_t) > mp->b_wptr)
10808833SVenu.Iyer@Sun.COM 			goto done;
10818833SVenu.Iyer@Sun.COM 
10828833SVenu.Iyer@Sun.COM 		proto = ah->ah_nexthdr;
10838833SVenu.Iyer@Sun.COM 		skip_len += ah_length;
10848833SVenu.Iyer@Sun.COM 
10858833SVenu.Iyer@Sun.COM 		/* if AH header is in its own mblk, skip it */
10868833SVenu.Iyer@Sun.COM 		if (MBLKL(mp) <= skip_len) {
10878833SVenu.Iyer@Sun.COM 			skip_len -= MBLKL(mp);
10888833SVenu.Iyer@Sun.COM 			mp = mp->b_cont;
10898833SVenu.Iyer@Sun.COM 			if (mp == NULL)
10908833SVenu.Iyer@Sun.COM 				goto done;
10918833SVenu.Iyer@Sun.COM 		}
10928833SVenu.Iyer@Sun.COM 
10938833SVenu.Iyer@Sun.COM 		goto again;
10948833SVenu.Iyer@Sun.COM 	}
10958833SVenu.Iyer@Sun.COM 	}
10968833SVenu.Iyer@Sun.COM 
10978833SVenu.Iyer@Sun.COM done:
10988833SVenu.Iyer@Sun.COM 	return (hash);
10998833SVenu.Iyer@Sun.COM }
1100