18275SEric Cheng /* 28275SEric Cheng * CDDL HEADER START 38275SEric Cheng * 48275SEric Cheng * The contents of this file are subject to the terms of the 58275SEric Cheng * Common Development and Distribution License (the "License"). 68275SEric Cheng * You may not use this file except in compliance with the License. 78275SEric Cheng * 88275SEric Cheng * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 98275SEric Cheng * or http://www.opensolaris.org/os/licensing. 108275SEric Cheng * See the License for the specific language governing permissions 118275SEric Cheng * and limitations under the License. 128275SEric Cheng * 138275SEric Cheng * When distributing Covered Code, include this CDDL HEADER in each 148275SEric Cheng * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 158275SEric Cheng * If applicable, add the following below this CDDL HEADER, with the 168275SEric Cheng * fields enclosed by brackets "[]" replaced with your own identifying 178275SEric Cheng * information: Portions Copyright [yyyy] [name of copyright owner] 188275SEric Cheng * 198275SEric Cheng * CDDL HEADER END 208275SEric Cheng */ 218275SEric Cheng /* 22*12683SJimmy.Vetayases@oracle.com * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. 238275SEric Cheng */ 248275SEric Cheng 258275SEric Cheng /* 268275SEric Cheng * MAC Services Module - misc utilities 278275SEric Cheng */ 288275SEric Cheng 298275SEric Cheng #include <sys/types.h> 308275SEric Cheng #include <sys/mac.h> 318275SEric Cheng #include <sys/mac_impl.h> 328275SEric Cheng #include <sys/mac_client_priv.h> 338275SEric Cheng #include <sys/mac_client_impl.h> 348275SEric Cheng #include <sys/mac_soft_ring.h> 358275SEric Cheng #include <sys/strsubr.h> 368275SEric Cheng #include <sys/strsun.h> 378275SEric Cheng #include <sys/vlan.h> 388275SEric Cheng #include <sys/pattr.h> 398275SEric Cheng #include <sys/pci_tools.h> 408275SEric Cheng #include <inet/ip.h> 418275SEric Cheng #include <inet/ip_impl.h> 428275SEric Cheng #include <inet/ip6.h> 438275SEric Cheng #include <sys/vtrace.h> 448275SEric Cheng #include <sys/dlpi.h> 458275SEric Cheng #include <sys/sunndi.h> 468833SVenu.Iyer@Sun.COM #include <inet/ipsec_impl.h> 478833SVenu.Iyer@Sun.COM #include <inet/sadb.h> 488833SVenu.Iyer@Sun.COM #include <inet/ipsecesp.h> 498833SVenu.Iyer@Sun.COM #include <inet/ipsecah.h> 508275SEric Cheng 518275SEric Cheng /* 528275SEric Cheng * Copy an mblk, preserving its hardware checksum flags. 538275SEric Cheng */ 548275SEric Cheng static mblk_t * 558275SEric Cheng mac_copymsg_cksum(mblk_t *mp) 568275SEric Cheng { 578275SEric Cheng mblk_t *mp1; 588275SEric Cheng uint32_t start, stuff, end, value, flags; 598275SEric Cheng 608275SEric Cheng mp1 = copymsg(mp); 618275SEric Cheng if (mp1 == NULL) 628275SEric Cheng return (NULL); 638275SEric Cheng 648275SEric Cheng hcksum_retrieve(mp, NULL, NULL, &start, &stuff, &end, &value, &flags); 658275SEric Cheng (void) hcksum_assoc(mp1, NULL, NULL, start, stuff, end, value, 668275SEric Cheng flags, KM_NOSLEEP); 678275SEric Cheng 688275SEric Cheng return (mp1); 698275SEric Cheng } 708275SEric Cheng 718275SEric Cheng /* 728275SEric Cheng * Copy an mblk chain, presenting the hardware checksum flags of the 738275SEric Cheng * individual mblks. 748275SEric Cheng */ 758275SEric Cheng mblk_t * 768275SEric Cheng mac_copymsgchain_cksum(mblk_t *mp) 778275SEric Cheng { 788275SEric Cheng mblk_t *nmp = NULL; 798275SEric Cheng mblk_t **nmpp = &nmp; 808275SEric Cheng 818275SEric Cheng for (; mp != NULL; mp = mp->b_next) { 828275SEric Cheng if ((*nmpp = mac_copymsg_cksum(mp)) == NULL) { 838275SEric Cheng freemsgchain(nmp); 848275SEric Cheng return (NULL); 858275SEric Cheng } 868275SEric Cheng 878275SEric Cheng nmpp = &((*nmpp)->b_next); 888275SEric Cheng } 898275SEric Cheng 908275SEric Cheng return (nmp); 918275SEric Cheng } 928275SEric Cheng 938275SEric Cheng /* 948275SEric Cheng * Process the specified mblk chain for proper handling of hardware 958275SEric Cheng * checksum offload. This routine is invoked for loopback traffic 968275SEric Cheng * between MAC clients. 978275SEric Cheng * The function handles a NULL mblk chain passed as argument. 988275SEric Cheng */ 998275SEric Cheng mblk_t * 1008275SEric Cheng mac_fix_cksum(mblk_t *mp_chain) 1018275SEric Cheng { 1028275SEric Cheng mblk_t *mp, *prev = NULL, *new_chain = mp_chain, *mp1; 1038275SEric Cheng uint32_t flags, start, stuff, end, value; 1048275SEric Cheng 1058275SEric Cheng for (mp = mp_chain; mp != NULL; prev = mp, mp = mp->b_next) { 1068275SEric Cheng uint16_t len; 1078275SEric Cheng uint32_t offset; 1088275SEric Cheng struct ether_header *ehp; 1098275SEric Cheng uint16_t sap; 1108275SEric Cheng 1118275SEric Cheng hcksum_retrieve(mp, NULL, NULL, &start, &stuff, &end, &value, 1128275SEric Cheng &flags); 1138275SEric Cheng if (flags == 0) 1148275SEric Cheng continue; 1158275SEric Cheng 1168275SEric Cheng /* 1178275SEric Cheng * Since the processing of checksum offload for loopback 1188275SEric Cheng * traffic requires modification of the packet contents, 1198275SEric Cheng * ensure sure that we are always modifying our own copy. 1208275SEric Cheng */ 1218275SEric Cheng if (DB_REF(mp) > 1) { 1228275SEric Cheng mp1 = copymsg(mp); 1238275SEric Cheng if (mp1 == NULL) 1248275SEric Cheng continue; 1258275SEric Cheng mp1->b_next = mp->b_next; 1268275SEric Cheng mp->b_next = NULL; 1278275SEric Cheng freemsg(mp); 1288275SEric Cheng if (prev != NULL) 1298275SEric Cheng prev->b_next = mp1; 1308275SEric Cheng else 1318275SEric Cheng new_chain = mp1; 1328275SEric Cheng mp = mp1; 1338275SEric Cheng } 1348275SEric Cheng 1358275SEric Cheng /* 1368275SEric Cheng * Ethernet, and optionally VLAN header. 1378275SEric Cheng */ 1388275SEric Cheng /* LINTED: improper alignment cast */ 1398275SEric Cheng ehp = (struct ether_header *)mp->b_rptr; 1408275SEric Cheng if (ntohs(ehp->ether_type) == VLAN_TPID) { 1418275SEric Cheng struct ether_vlan_header *evhp; 1428275SEric Cheng 1438275SEric Cheng ASSERT(MBLKL(mp) >= sizeof (struct ether_vlan_header)); 1448275SEric Cheng /* LINTED: improper alignment cast */ 1458275SEric Cheng evhp = (struct ether_vlan_header *)mp->b_rptr; 1468275SEric Cheng sap = ntohs(evhp->ether_type); 1478275SEric Cheng offset = sizeof (struct ether_vlan_header); 1488275SEric Cheng } else { 1498275SEric Cheng sap = ntohs(ehp->ether_type); 1508275SEric Cheng offset = sizeof (struct ether_header); 1518275SEric Cheng } 1528275SEric Cheng 1538275SEric Cheng if (MBLKL(mp) <= offset) { 1548275SEric Cheng offset -= MBLKL(mp); 1558275SEric Cheng if (mp->b_cont == NULL) { 1568275SEric Cheng /* corrupted packet, skip it */ 1578275SEric Cheng if (prev != NULL) 1588275SEric Cheng prev->b_next = mp->b_next; 1598275SEric Cheng else 1608275SEric Cheng new_chain = mp->b_next; 1618275SEric Cheng mp1 = mp->b_next; 1628275SEric Cheng mp->b_next = NULL; 1638275SEric Cheng freemsg(mp); 1648275SEric Cheng mp = mp1; 1658275SEric Cheng continue; 1668275SEric Cheng } 1678275SEric Cheng mp = mp->b_cont; 1688275SEric Cheng } 1698275SEric Cheng 1708275SEric Cheng if (flags & (HCK_FULLCKSUM | HCK_IPV4_HDRCKSUM)) { 1718275SEric Cheng ipha_t *ipha = NULL; 1728275SEric Cheng 1738275SEric Cheng /* 1748275SEric Cheng * In order to compute the full and header 1758275SEric Cheng * checksums, we need to find and parse 1768275SEric Cheng * the IP and/or ULP headers. 1778275SEric Cheng */ 1788275SEric Cheng 1798275SEric Cheng sap = (sap < ETHERTYPE_802_MIN) ? 0 : sap; 1808275SEric Cheng 1818275SEric Cheng /* 1828275SEric Cheng * IP header. 1838275SEric Cheng */ 1848275SEric Cheng if (sap != ETHERTYPE_IP) 1858275SEric Cheng continue; 1868275SEric Cheng 1878275SEric Cheng ASSERT(MBLKL(mp) >= offset + sizeof (ipha_t)); 1888275SEric Cheng /* LINTED: improper alignment cast */ 1898275SEric Cheng ipha = (ipha_t *)(mp->b_rptr + offset); 1908275SEric Cheng 1918275SEric Cheng if (flags & HCK_FULLCKSUM) { 1928275SEric Cheng ipaddr_t src, dst; 1938275SEric Cheng uint32_t cksum; 1948275SEric Cheng uint16_t *up; 1958275SEric Cheng uint8_t proto; 1968275SEric Cheng 1978275SEric Cheng /* 1988275SEric Cheng * Pointer to checksum field in ULP header. 1998275SEric Cheng */ 2008275SEric Cheng proto = ipha->ipha_protocol; 2018275SEric Cheng ASSERT(ipha->ipha_version_and_hdr_length == 2028275SEric Cheng IP_SIMPLE_HDR_VERSION); 20311588Sdavid.edmondson@sun.com 20411588Sdavid.edmondson@sun.com switch (proto) { 20511588Sdavid.edmondson@sun.com case IPPROTO_TCP: 2068275SEric Cheng /* LINTED: improper alignment cast */ 2078275SEric Cheng up = IPH_TCPH_CHECKSUMP(ipha, 2088275SEric Cheng IP_SIMPLE_HDR_LENGTH); 20911588Sdavid.edmondson@sun.com break; 21011588Sdavid.edmondson@sun.com 21111588Sdavid.edmondson@sun.com case IPPROTO_UDP: 2128275SEric Cheng /* LINTED: improper alignment cast */ 2138275SEric Cheng up = IPH_UDPH_CHECKSUMP(ipha, 2148275SEric Cheng IP_SIMPLE_HDR_LENGTH); 21511588Sdavid.edmondson@sun.com break; 21611588Sdavid.edmondson@sun.com 21711588Sdavid.edmondson@sun.com default: 21811588Sdavid.edmondson@sun.com cmn_err(CE_WARN, "mac_fix_cksum: " 21911588Sdavid.edmondson@sun.com "unexpected protocol: %d", proto); 22011588Sdavid.edmondson@sun.com continue; 2218275SEric Cheng } 2228275SEric Cheng 2238275SEric Cheng /* 2248275SEric Cheng * Pseudo-header checksum. 2258275SEric Cheng */ 2268275SEric Cheng src = ipha->ipha_src; 2278275SEric Cheng dst = ipha->ipha_dst; 2288275SEric Cheng len = ntohs(ipha->ipha_length) - 2298275SEric Cheng IP_SIMPLE_HDR_LENGTH; 2308275SEric Cheng 2318275SEric Cheng cksum = (dst >> 16) + (dst & 0xFFFF) + 2328275SEric Cheng (src >> 16) + (src & 0xFFFF); 2338275SEric Cheng cksum += htons(len); 2348275SEric Cheng 2358275SEric Cheng /* 2368275SEric Cheng * The checksum value stored in the packet needs 2378275SEric Cheng * to be correct. Compute it here. 2388275SEric Cheng */ 2398275SEric Cheng *up = 0; 2408275SEric Cheng cksum += (((proto) == IPPROTO_UDP) ? 2418275SEric Cheng IP_UDP_CSUM_COMP : IP_TCP_CSUM_COMP); 2428275SEric Cheng cksum = IP_CSUM(mp, IP_SIMPLE_HDR_LENGTH + 2438275SEric Cheng offset, cksum); 2448275SEric Cheng *(up) = (uint16_t)(cksum ? cksum : ~cksum); 2458275SEric Cheng 24611878SVenu.Iyer@Sun.COM /* 24711878SVenu.Iyer@Sun.COM * Flag the packet so that it appears 24811878SVenu.Iyer@Sun.COM * that the checksum has already been 24911878SVenu.Iyer@Sun.COM * verified by the hardware. 25011878SVenu.Iyer@Sun.COM */ 25111878SVenu.Iyer@Sun.COM flags &= ~HCK_FULLCKSUM; 2528275SEric Cheng flags |= HCK_FULLCKSUM_OK; 25311878SVenu.Iyer@Sun.COM value = 0; 2548275SEric Cheng } 2558275SEric Cheng 2568275SEric Cheng if (flags & HCK_IPV4_HDRCKSUM) { 2578275SEric Cheng ASSERT(ipha != NULL); 2588275SEric Cheng ipha->ipha_hdr_checksum = 2598275SEric Cheng (uint16_t)ip_csum_hdr(ipha); 26011878SVenu.Iyer@Sun.COM flags &= ~HCK_IPV4_HDRCKSUM; 26111878SVenu.Iyer@Sun.COM flags |= HCK_IPV4_HDRCKSUM_OK; 26211878SVenu.Iyer@Sun.COM 2638275SEric Cheng } 2648275SEric Cheng } 2658275SEric Cheng 2668275SEric Cheng if (flags & HCK_PARTIALCKSUM) { 2678275SEric Cheng uint16_t *up, partial, cksum; 2688275SEric Cheng uchar_t *ipp; /* ptr to beginning of IP header */ 2698275SEric Cheng 2708275SEric Cheng if (mp->b_cont != NULL) { 2718275SEric Cheng mblk_t *mp1; 2728275SEric Cheng 2738275SEric Cheng mp1 = msgpullup(mp, offset + end); 2748275SEric Cheng if (mp1 == NULL) 2758275SEric Cheng continue; 2768275SEric Cheng mp1->b_next = mp->b_next; 2778275SEric Cheng mp->b_next = NULL; 2788275SEric Cheng freemsg(mp); 2798275SEric Cheng if (prev != NULL) 2808275SEric Cheng prev->b_next = mp1; 2818275SEric Cheng else 2828275SEric Cheng new_chain = mp1; 2838275SEric Cheng mp = mp1; 2848275SEric Cheng } 2858275SEric Cheng 2868275SEric Cheng ipp = mp->b_rptr + offset; 2878275SEric Cheng /* LINTED: cast may result in improper alignment */ 2888275SEric Cheng up = (uint16_t *)((uchar_t *)ipp + stuff); 2898275SEric Cheng partial = *up; 2908275SEric Cheng *up = 0; 2918275SEric Cheng 2928275SEric Cheng cksum = IP_BCSUM_PARTIAL(mp->b_rptr + offset + start, 2938275SEric Cheng end - start, partial); 2948275SEric Cheng cksum = ~cksum; 2958275SEric Cheng *up = cksum ? cksum : ~cksum; 2968275SEric Cheng 2978275SEric Cheng /* 2988275SEric Cheng * Since we already computed the whole checksum, 2998275SEric Cheng * indicate to the stack that it has already 3008275SEric Cheng * been verified by the hardware. 3018275SEric Cheng */ 3028275SEric Cheng flags &= ~HCK_PARTIALCKSUM; 30311878SVenu.Iyer@Sun.COM flags |= HCK_FULLCKSUM_OK; 30411878SVenu.Iyer@Sun.COM value = 0; 3058275SEric Cheng } 3068275SEric Cheng 3078275SEric Cheng (void) hcksum_assoc(mp, NULL, NULL, start, stuff, end, 3088275SEric Cheng value, flags, KM_NOSLEEP); 3098275SEric Cheng } 3108275SEric Cheng 3118275SEric Cheng return (new_chain); 3128275SEric Cheng } 3138275SEric Cheng 3148275SEric Cheng /* 3158275SEric Cheng * Add VLAN tag to the specified mblk. 3168275SEric Cheng */ 3178275SEric Cheng mblk_t * 3188275SEric Cheng mac_add_vlan_tag(mblk_t *mp, uint_t pri, uint16_t vid) 3198275SEric Cheng { 3208275SEric Cheng mblk_t *hmp; 3218275SEric Cheng struct ether_vlan_header *evhp; 3228275SEric Cheng struct ether_header *ehp; 3238275SEric Cheng uint32_t start, stuff, end, value, flags; 3248275SEric Cheng 3258275SEric Cheng ASSERT(pri != 0 || vid != 0); 3268275SEric Cheng 3278275SEric Cheng /* 3288275SEric Cheng * Allocate an mblk for the new tagged ethernet header, 3298275SEric Cheng * and copy the MAC addresses and ethertype from the 3308275SEric Cheng * original header. 3318275SEric Cheng */ 3328275SEric Cheng 3338275SEric Cheng hmp = allocb(sizeof (struct ether_vlan_header), BPRI_MED); 3348275SEric Cheng if (hmp == NULL) { 3358275SEric Cheng freemsg(mp); 3368275SEric Cheng return (NULL); 3378275SEric Cheng } 3388275SEric Cheng 3398275SEric Cheng evhp = (struct ether_vlan_header *)hmp->b_rptr; 3408275SEric Cheng ehp = (struct ether_header *)mp->b_rptr; 3418275SEric Cheng 3428275SEric Cheng bcopy(ehp, evhp, (ETHERADDRL * 2)); 3438275SEric Cheng evhp->ether_type = ehp->ether_type; 3448275SEric Cheng evhp->ether_tpid = htons(ETHERTYPE_VLAN); 3458275SEric Cheng 3468275SEric Cheng hmp->b_wptr += sizeof (struct ether_vlan_header); 3478275SEric Cheng mp->b_rptr += sizeof (struct ether_header); 3488275SEric Cheng 3498275SEric Cheng /* 3508275SEric Cheng * Free the original message if it's now empty. Link the 3518275SEric Cheng * rest of messages to the header message. 3528275SEric Cheng */ 3538275SEric Cheng hcksum_retrieve(mp, NULL, NULL, &start, &stuff, &end, &value, &flags); 3548275SEric Cheng (void) hcksum_assoc(hmp, NULL, NULL, start, stuff, end, value, flags, 3558275SEric Cheng KM_NOSLEEP); 3568275SEric Cheng if (MBLKL(mp) == 0) { 3578275SEric Cheng hmp->b_cont = mp->b_cont; 3588275SEric Cheng freeb(mp); 3598275SEric Cheng } else { 3608275SEric Cheng hmp->b_cont = mp; 3618275SEric Cheng } 3628275SEric Cheng ASSERT(MBLKL(hmp) >= sizeof (struct ether_vlan_header)); 3638275SEric Cheng 3648275SEric Cheng /* 3658275SEric Cheng * Initialize the new TCI (Tag Control Information). 3668275SEric Cheng */ 3678275SEric Cheng evhp->ether_tci = htons(VLAN_TCI(pri, 0, vid)); 3688275SEric Cheng 3698275SEric Cheng return (hmp); 3708275SEric Cheng } 3718275SEric Cheng 3728275SEric Cheng /* 3738275SEric Cheng * Adds a VLAN tag with the specified VID and priority to each mblk of 3748275SEric Cheng * the specified chain. 3758275SEric Cheng */ 3768275SEric Cheng mblk_t * 3778275SEric Cheng mac_add_vlan_tag_chain(mblk_t *mp_chain, uint_t pri, uint16_t vid) 3788275SEric Cheng { 3798275SEric Cheng mblk_t *next_mp, **prev, *mp; 3808275SEric Cheng 3818275SEric Cheng mp = mp_chain; 3828275SEric Cheng prev = &mp_chain; 3838275SEric Cheng 3848275SEric Cheng while (mp != NULL) { 3858275SEric Cheng next_mp = mp->b_next; 3868275SEric Cheng mp->b_next = NULL; 3878275SEric Cheng if ((mp = mac_add_vlan_tag(mp, pri, vid)) == NULL) { 3888275SEric Cheng freemsgchain(next_mp); 3898275SEric Cheng break; 3908275SEric Cheng } 3918275SEric Cheng *prev = mp; 3928275SEric Cheng prev = &mp->b_next; 3938275SEric Cheng mp = mp->b_next = next_mp; 3948275SEric Cheng } 3958275SEric Cheng 3968275SEric Cheng return (mp_chain); 3978275SEric Cheng } 3988275SEric Cheng 3998275SEric Cheng /* 4008275SEric Cheng * Strip VLAN tag 4018275SEric Cheng */ 4028275SEric Cheng mblk_t * 4038275SEric Cheng mac_strip_vlan_tag(mblk_t *mp) 4048275SEric Cheng { 4058275SEric Cheng mblk_t *newmp; 4068275SEric Cheng struct ether_vlan_header *evhp; 4078275SEric Cheng 4088275SEric Cheng evhp = (struct ether_vlan_header *)mp->b_rptr; 4098275SEric Cheng if (ntohs(evhp->ether_tpid) == ETHERTYPE_VLAN) { 4108275SEric Cheng ASSERT(MBLKL(mp) >= sizeof (struct ether_vlan_header)); 4118275SEric Cheng 4128275SEric Cheng if (DB_REF(mp) > 1) { 4138275SEric Cheng newmp = copymsg(mp); 4148275SEric Cheng if (newmp == NULL) 4158275SEric Cheng return (NULL); 4168275SEric Cheng freemsg(mp); 4178275SEric Cheng mp = newmp; 4188275SEric Cheng } 4198275SEric Cheng 4208275SEric Cheng evhp = (struct ether_vlan_header *)mp->b_rptr; 4218275SEric Cheng 4228275SEric Cheng ovbcopy(mp->b_rptr, mp->b_rptr + VLAN_TAGSZ, 2 * ETHERADDRL); 4238275SEric Cheng mp->b_rptr += VLAN_TAGSZ; 4248275SEric Cheng } 4258275SEric Cheng return (mp); 4268275SEric Cheng } 4278275SEric Cheng 4288275SEric Cheng /* 4298275SEric Cheng * Strip VLAN tag from each mblk of the chain. 4308275SEric Cheng */ 4318275SEric Cheng mblk_t * 4328275SEric Cheng mac_strip_vlan_tag_chain(mblk_t *mp_chain) 4338275SEric Cheng { 4348275SEric Cheng mblk_t *mp, *next_mp, **prev; 4358275SEric Cheng 4368275SEric Cheng mp = mp_chain; 4378275SEric Cheng prev = &mp_chain; 4388275SEric Cheng 4398275SEric Cheng while (mp != NULL) { 4408275SEric Cheng next_mp = mp->b_next; 4418275SEric Cheng mp->b_next = NULL; 4428275SEric Cheng if ((mp = mac_strip_vlan_tag(mp)) == NULL) { 4438275SEric Cheng freemsgchain(next_mp); 4448275SEric Cheng break; 4458275SEric Cheng } 4468275SEric Cheng *prev = mp; 4478275SEric Cheng prev = &mp->b_next; 4488275SEric Cheng mp = mp->b_next = next_mp; 4498275SEric Cheng } 4508275SEric Cheng 4518275SEric Cheng return (mp_chain); 4528275SEric Cheng } 4538275SEric Cheng 4548275SEric Cheng /* 4558275SEric Cheng * Default callback function. Used when the datapath is not yet initialized. 4568275SEric Cheng */ 4578275SEric Cheng /* ARGSUSED */ 4588275SEric Cheng void 4598275SEric Cheng mac_pkt_drop(void *arg, mac_resource_handle_t resource, mblk_t *mp, 4608275SEric Cheng boolean_t loopback) 4618275SEric Cheng { 4628275SEric Cheng mblk_t *mp1 = mp; 4638275SEric Cheng 4648275SEric Cheng while (mp1 != NULL) { 4658275SEric Cheng mp1->b_prev = NULL; 4668275SEric Cheng mp1->b_queue = NULL; 4678275SEric Cheng mp1 = mp1->b_next; 4688275SEric Cheng } 4698275SEric Cheng freemsgchain(mp); 4708275SEric Cheng } 4718275SEric Cheng 4728275SEric Cheng /* 4738275SEric Cheng * Determines the IPv6 header length accounting for all the optional IPv6 4748275SEric Cheng * headers (hop-by-hop, destination, routing and fragment). The header length 4758275SEric Cheng * and next header value (a transport header) is captured. 4768275SEric Cheng * 4778275SEric Cheng * Returns B_FALSE if all the IP headers are not in the same mblk otherwise 4788275SEric Cheng * returns B_TRUE. 4798275SEric Cheng */ 4808275SEric Cheng boolean_t 48111878SVenu.Iyer@Sun.COM mac_ip_hdr_length_v6(ip6_t *ip6h, uint8_t *endptr, uint16_t *hdr_length, 48211878SVenu.Iyer@Sun.COM uint8_t *next_hdr, ip6_frag_t **fragp) 4838275SEric Cheng { 4848275SEric Cheng uint16_t length; 4858275SEric Cheng uint_t ehdrlen; 4868275SEric Cheng uint8_t *whereptr; 4878275SEric Cheng uint8_t *nexthdrp; 4888275SEric Cheng ip6_dest_t *desthdr; 4898275SEric Cheng ip6_rthdr_t *rthdr; 4908275SEric Cheng ip6_frag_t *fraghdr; 4918275SEric Cheng 4928275SEric Cheng if (((uchar_t *)ip6h + IPV6_HDR_LEN) > endptr) 4938275SEric Cheng return (B_FALSE); 49411042SErik.Nordmark@Sun.COM ASSERT(IPH_HDR_VERSION(ip6h) == IPV6_VERSION); 4958275SEric Cheng length = IPV6_HDR_LEN; 4968275SEric Cheng whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */ 4978275SEric Cheng 49811878SVenu.Iyer@Sun.COM if (fragp != NULL) 49911878SVenu.Iyer@Sun.COM *fragp = NULL; 50011528SBaban.Kenkre@Sun.COM 5018275SEric Cheng nexthdrp = &ip6h->ip6_nxt; 5028275SEric Cheng while (whereptr < endptr) { 5038275SEric Cheng /* Is there enough left for len + nexthdr? */ 5048275SEric Cheng if (whereptr + MIN_EHDR_LEN > endptr) 5058275SEric Cheng break; 5068275SEric Cheng 5078275SEric Cheng switch (*nexthdrp) { 5088275SEric Cheng case IPPROTO_HOPOPTS: 5098275SEric Cheng case IPPROTO_DSTOPTS: 5108275SEric Cheng /* Assumes the headers are identical for hbh and dst */ 5118275SEric Cheng desthdr = (ip6_dest_t *)whereptr; 5128275SEric Cheng ehdrlen = 8 * (desthdr->ip6d_len + 1); 5138275SEric Cheng if ((uchar_t *)desthdr + ehdrlen > endptr) 5148275SEric Cheng return (B_FALSE); 5158275SEric Cheng nexthdrp = &desthdr->ip6d_nxt; 5168275SEric Cheng break; 5178275SEric Cheng case IPPROTO_ROUTING: 5188275SEric Cheng rthdr = (ip6_rthdr_t *)whereptr; 5198275SEric Cheng ehdrlen = 8 * (rthdr->ip6r_len + 1); 5208275SEric Cheng if ((uchar_t *)rthdr + ehdrlen > endptr) 5218275SEric Cheng return (B_FALSE); 5228275SEric Cheng nexthdrp = &rthdr->ip6r_nxt; 5238275SEric Cheng break; 5248275SEric Cheng case IPPROTO_FRAGMENT: 5258275SEric Cheng fraghdr = (ip6_frag_t *)whereptr; 5268275SEric Cheng ehdrlen = sizeof (ip6_frag_t); 5278275SEric Cheng if ((uchar_t *)&fraghdr[1] > endptr) 5288275SEric Cheng return (B_FALSE); 5298275SEric Cheng nexthdrp = &fraghdr->ip6f_nxt; 53011878SVenu.Iyer@Sun.COM if (fragp != NULL) 53111878SVenu.Iyer@Sun.COM *fragp = fraghdr; 5328275SEric Cheng break; 5338275SEric Cheng case IPPROTO_NONE: 5348275SEric Cheng /* No next header means we're finished */ 5358275SEric Cheng default: 5368275SEric Cheng *hdr_length = length; 5378275SEric Cheng *next_hdr = *nexthdrp; 5388275SEric Cheng return (B_TRUE); 5398275SEric Cheng } 5408275SEric Cheng length += ehdrlen; 5418275SEric Cheng whereptr += ehdrlen; 5428275SEric Cheng *hdr_length = length; 5438275SEric Cheng *next_hdr = *nexthdrp; 5448275SEric Cheng } 5458275SEric Cheng switch (*nexthdrp) { 5468275SEric Cheng case IPPROTO_HOPOPTS: 5478275SEric Cheng case IPPROTO_DSTOPTS: 5488275SEric Cheng case IPPROTO_ROUTING: 5498275SEric Cheng case IPPROTO_FRAGMENT: 5508275SEric Cheng /* 5518275SEric Cheng * If any know extension headers are still to be processed, 5528275SEric Cheng * the packet's malformed (or at least all the IP header(s) are 5538275SEric Cheng * not in the same mblk - and that should never happen. 5548275SEric Cheng */ 5558275SEric Cheng return (B_FALSE); 5568275SEric Cheng 5578275SEric Cheng default: 5588275SEric Cheng /* 5598275SEric Cheng * If we get here, we know that all of the IP headers were in 5608275SEric Cheng * the same mblk, even if the ULP header is in the next mblk. 5618275SEric Cheng */ 5628275SEric Cheng *hdr_length = length; 5638275SEric Cheng *next_hdr = *nexthdrp; 5648275SEric Cheng return (B_TRUE); 5658275SEric Cheng } 5668275SEric Cheng } 5678275SEric Cheng 56811878SVenu.Iyer@Sun.COM /* 56911878SVenu.Iyer@Sun.COM * The following set of routines are there to take care of interrupt 57011878SVenu.Iyer@Sun.COM * re-targeting for legacy (fixed) interrupts. Some older versions 57111878SVenu.Iyer@Sun.COM * of the popular NICs like e1000g do not support MSI-X interrupts 57211878SVenu.Iyer@Sun.COM * and they reserve fixed interrupts for RX/TX rings. To re-target 57311878SVenu.Iyer@Sun.COM * these interrupts, PCITOOL ioctls need to be used. 57411878SVenu.Iyer@Sun.COM */ 5758275SEric Cheng typedef struct mac_dladm_intr { 5768275SEric Cheng int ino; 5778275SEric Cheng int cpu_id; 5788275SEric Cheng char driver_path[MAXPATHLEN]; 5798275SEric Cheng char nexus_path[MAXPATHLEN]; 5808275SEric Cheng } mac_dladm_intr_t; 5818275SEric Cheng 5828275SEric Cheng /* Bind the interrupt to cpu_num */ 5838275SEric Cheng static int 584*12683SJimmy.Vetayases@oracle.com mac_set_intr(ldi_handle_t lh, processorid_t cpu_num, int oldcpuid, int ino) 5858275SEric Cheng { 5868275SEric Cheng pcitool_intr_set_t iset; 5878275SEric Cheng int err; 5888275SEric Cheng 589*12683SJimmy.Vetayases@oracle.com iset.old_cpu = oldcpuid; 5908275SEric Cheng iset.ino = ino; 5918275SEric Cheng iset.cpu_id = cpu_num; 5928275SEric Cheng iset.user_version = PCITOOL_VERSION; 5938275SEric Cheng err = ldi_ioctl(lh, PCITOOL_DEVICE_SET_INTR, (intptr_t)&iset, FKIOCTL, 5948275SEric Cheng kcred, NULL); 5958275SEric Cheng 5968275SEric Cheng return (err); 5978275SEric Cheng } 5988275SEric Cheng 5998275SEric Cheng /* 6008275SEric Cheng * Search interrupt information. iget is filled in with the info to search 6018275SEric Cheng */ 6028275SEric Cheng static boolean_t 6038275SEric Cheng mac_search_intrinfo(pcitool_intr_get_t *iget_p, mac_dladm_intr_t *dln) 6048275SEric Cheng { 6058275SEric Cheng int i; 6068275SEric Cheng char driver_path[2 * MAXPATHLEN]; 6078275SEric Cheng 6088275SEric Cheng for (i = 0; i < iget_p->num_devs; i++) { 6098275SEric Cheng (void) strlcpy(driver_path, iget_p->dev[i].path, MAXPATHLEN); 6108275SEric Cheng (void) snprintf(&driver_path[strlen(driver_path)], MAXPATHLEN, 6118275SEric Cheng ":%s%d", iget_p->dev[i].driver_name, 6128275SEric Cheng iget_p->dev[i].dev_inst); 6138275SEric Cheng /* Match the device path for the device path */ 6148275SEric Cheng if (strcmp(driver_path, dln->driver_path) == 0) { 6158275SEric Cheng dln->ino = iget_p->ino; 6168275SEric Cheng dln->cpu_id = iget_p->cpu_id; 6178275SEric Cheng return (B_TRUE); 6188275SEric Cheng } 6198275SEric Cheng } 6208275SEric Cheng return (B_FALSE); 6218275SEric Cheng } 6228275SEric Cheng 6238275SEric Cheng /* 6248275SEric Cheng * Get information about ino, i.e. if this is the interrupt for our 6258275SEric Cheng * device and where it is bound etc. 6268275SEric Cheng */ 6278275SEric Cheng static boolean_t 628*12683SJimmy.Vetayases@oracle.com mac_get_single_intr(ldi_handle_t lh, int oldcpuid, int ino, 629*12683SJimmy.Vetayases@oracle.com mac_dladm_intr_t *dln) 6308275SEric Cheng { 6318275SEric Cheng pcitool_intr_get_t *iget_p; 6328275SEric Cheng int ipsz; 6338275SEric Cheng int nipsz; 6348275SEric Cheng int err; 6358275SEric Cheng uint8_t inum; 6368275SEric Cheng 6378275SEric Cheng /* 6388275SEric Cheng * Check if SLEEP is OK, i.e if could come here in response to 6398275SEric Cheng * changing the fanout due to some callback from the driver, say 6408275SEric Cheng * link speed changes. 6418275SEric Cheng */ 6428275SEric Cheng ipsz = PCITOOL_IGET_SIZE(0); 6438275SEric Cheng iget_p = kmem_zalloc(ipsz, KM_SLEEP); 6448275SEric Cheng 6458275SEric Cheng iget_p->num_devs_ret = 0; 6468275SEric Cheng iget_p->user_version = PCITOOL_VERSION; 647*12683SJimmy.Vetayases@oracle.com iget_p->cpu_id = oldcpuid; 6488275SEric Cheng iget_p->ino = ino; 6498275SEric Cheng 6508275SEric Cheng err = ldi_ioctl(lh, PCITOOL_DEVICE_GET_INTR, (intptr_t)iget_p, 6518275SEric Cheng FKIOCTL, kcred, NULL); 6528275SEric Cheng if (err != 0) { 6538275SEric Cheng kmem_free(iget_p, ipsz); 6548275SEric Cheng return (B_FALSE); 6558275SEric Cheng } 6568275SEric Cheng if (iget_p->num_devs == 0) { 6578275SEric Cheng kmem_free(iget_p, ipsz); 6588275SEric Cheng return (B_FALSE); 6598275SEric Cheng } 6608275SEric Cheng inum = iget_p->num_devs; 6618275SEric Cheng if (iget_p->num_devs_ret < iget_p->num_devs) { 6628275SEric Cheng /* Reallocate */ 6638275SEric Cheng nipsz = PCITOOL_IGET_SIZE(iget_p->num_devs); 6648275SEric Cheng 6658275SEric Cheng kmem_free(iget_p, ipsz); 6668275SEric Cheng ipsz = nipsz; 6678275SEric Cheng iget_p = kmem_zalloc(ipsz, KM_SLEEP); 6688275SEric Cheng 6698275SEric Cheng iget_p->num_devs_ret = inum; 670*12683SJimmy.Vetayases@oracle.com iget_p->cpu_id = oldcpuid; 6718275SEric Cheng iget_p->ino = ino; 6728275SEric Cheng iget_p->user_version = PCITOOL_VERSION; 6738275SEric Cheng err = ldi_ioctl(lh, PCITOOL_DEVICE_GET_INTR, (intptr_t)iget_p, 6748275SEric Cheng FKIOCTL, kcred, NULL); 6758275SEric Cheng if (err != 0) { 6768275SEric Cheng kmem_free(iget_p, ipsz); 6778275SEric Cheng return (B_FALSE); 6788275SEric Cheng } 6798275SEric Cheng /* defensive */ 6808275SEric Cheng if (iget_p->num_devs != iget_p->num_devs_ret) { 6818275SEric Cheng kmem_free(iget_p, ipsz); 6828275SEric Cheng return (B_FALSE); 6838275SEric Cheng } 6848275SEric Cheng } 6858275SEric Cheng 6868275SEric Cheng if (mac_search_intrinfo(iget_p, dln)) { 6878275SEric Cheng kmem_free(iget_p, ipsz); 6888275SEric Cheng return (B_TRUE); 6898275SEric Cheng } 6908275SEric Cheng kmem_free(iget_p, ipsz); 6918275SEric Cheng return (B_FALSE); 6928275SEric Cheng } 6938275SEric Cheng 6948275SEric Cheng /* 6958275SEric Cheng * Get the interrupts and check each one to see if it is for our device. 6968275SEric Cheng */ 6978275SEric Cheng static int 6988275SEric Cheng mac_validate_intr(ldi_handle_t lh, mac_dladm_intr_t *dln, processorid_t cpuid) 6998275SEric Cheng { 7008275SEric Cheng pcitool_intr_info_t intr_info; 7018275SEric Cheng int err; 7028275SEric Cheng int ino; 703*12683SJimmy.Vetayases@oracle.com int oldcpuid; 7048275SEric Cheng 7058275SEric Cheng err = ldi_ioctl(lh, PCITOOL_SYSTEM_INTR_INFO, (intptr_t)&intr_info, 7068275SEric Cheng FKIOCTL, kcred, NULL); 7078275SEric Cheng if (err != 0) 7088275SEric Cheng return (-1); 7098275SEric Cheng 710*12683SJimmy.Vetayases@oracle.com for (oldcpuid = 0; oldcpuid < intr_info.num_cpu; oldcpuid++) { 711*12683SJimmy.Vetayases@oracle.com for (ino = 0; ino < intr_info.num_intr; ino++) { 712*12683SJimmy.Vetayases@oracle.com if (mac_get_single_intr(lh, oldcpuid, ino, dln)) { 713*12683SJimmy.Vetayases@oracle.com if (dln->cpu_id == cpuid) 714*12683SJimmy.Vetayases@oracle.com return (0); 715*12683SJimmy.Vetayases@oracle.com return (1); 716*12683SJimmy.Vetayases@oracle.com } 7178275SEric Cheng } 7188275SEric Cheng } 7198275SEric Cheng return (-1); 7208275SEric Cheng } 7218275SEric Cheng 7228275SEric Cheng /* 7238275SEric Cheng * Obtain the nexus parent node info. for mdip. 7248275SEric Cheng */ 7258275SEric Cheng static dev_info_t * 7268275SEric Cheng mac_get_nexus_node(dev_info_t *mdip, mac_dladm_intr_t *dln) 7278275SEric Cheng { 7288275SEric Cheng struct dev_info *tdip = (struct dev_info *)mdip; 7298275SEric Cheng struct ddi_minor_data *minordata; 7308275SEric Cheng int circ; 7318275SEric Cheng dev_info_t *pdip; 7328275SEric Cheng char pathname[MAXPATHLEN]; 7338275SEric Cheng 7348275SEric Cheng while (tdip != NULL) { 7359359SEric Cheng /* 7369359SEric Cheng * The netboot code could call this function while walking the 7379359SEric Cheng * device tree so we need to use ndi_devi_tryenter() here to 7389359SEric Cheng * avoid deadlock. 7399359SEric Cheng */ 7409359SEric Cheng if (ndi_devi_tryenter((dev_info_t *)tdip, &circ) == 0) 7419359SEric Cheng break; 7429359SEric Cheng 7438275SEric Cheng for (minordata = tdip->devi_minor; minordata != NULL; 7448275SEric Cheng minordata = minordata->next) { 7458275SEric Cheng if (strncmp(minordata->ddm_node_type, DDI_NT_INTRCTL, 7468275SEric Cheng strlen(DDI_NT_INTRCTL)) == 0) { 7478275SEric Cheng pdip = minordata->dip; 7488275SEric Cheng (void) ddi_pathname(pdip, pathname); 7498275SEric Cheng (void) snprintf(dln->nexus_path, MAXPATHLEN, 7508275SEric Cheng "/devices%s:intr", pathname); 7518275SEric Cheng (void) ddi_pathname_minor(minordata, pathname); 7528275SEric Cheng ndi_devi_exit((dev_info_t *)tdip, circ); 7538275SEric Cheng return (pdip); 7548275SEric Cheng } 7558275SEric Cheng } 7568275SEric Cheng ndi_devi_exit((dev_info_t *)tdip, circ); 7578275SEric Cheng tdip = tdip->devi_parent; 7588275SEric Cheng } 7598275SEric Cheng return (NULL); 7608275SEric Cheng } 7618275SEric Cheng 7628275SEric Cheng /* 7638275SEric Cheng * For a primary MAC client, if the user has set a list or CPUs or 7648275SEric Cheng * we have obtained it implicitly, we try to retarget the interrupt 7658275SEric Cheng * for that device on one of the CPUs in the list. 7668275SEric Cheng * We assign the interrupt to the same CPU as the poll thread. 7678275SEric Cheng */ 7688275SEric Cheng static boolean_t 7698275SEric Cheng mac_check_interrupt_binding(dev_info_t *mdip, int32_t cpuid) 7708275SEric Cheng { 7718275SEric Cheng ldi_handle_t lh = NULL; 7728275SEric Cheng ldi_ident_t li = NULL; 7738275SEric Cheng int err; 7748275SEric Cheng int ret; 7758275SEric Cheng mac_dladm_intr_t dln; 7768275SEric Cheng dev_info_t *dip; 7778275SEric Cheng struct ddi_minor_data *minordata; 7788275SEric Cheng 7798275SEric Cheng dln.nexus_path[0] = '\0'; 7808275SEric Cheng dln.driver_path[0] = '\0'; 7818275SEric Cheng 7828275SEric Cheng minordata = ((struct dev_info *)mdip)->devi_minor; 7838275SEric Cheng while (minordata != NULL) { 7848275SEric Cheng if (minordata->type == DDM_MINOR) 7858275SEric Cheng break; 7868275SEric Cheng minordata = minordata->next; 7878275SEric Cheng } 7888275SEric Cheng if (minordata == NULL) 7898275SEric Cheng return (B_FALSE); 7908275SEric Cheng 7918275SEric Cheng (void) ddi_pathname_minor(minordata, dln.driver_path); 7928275SEric Cheng 7938275SEric Cheng dip = mac_get_nexus_node(mdip, &dln); 7948275SEric Cheng /* defensive */ 7958275SEric Cheng if (dip == NULL) 7968275SEric Cheng return (B_FALSE); 7978275SEric Cheng 7988275SEric Cheng err = ldi_ident_from_major(ddi_driver_major(dip), &li); 7998275SEric Cheng if (err != 0) 8008275SEric Cheng return (B_FALSE); 8018275SEric Cheng 8028275SEric Cheng err = ldi_open_by_name(dln.nexus_path, FREAD|FWRITE, kcred, &lh, li); 8038275SEric Cheng if (err != 0) 8048275SEric Cheng return (B_FALSE); 8058275SEric Cheng 8068275SEric Cheng ret = mac_validate_intr(lh, &dln, cpuid); 8078275SEric Cheng if (ret < 0) { 8088275SEric Cheng (void) ldi_close(lh, FREAD|FWRITE, kcred); 8098275SEric Cheng return (B_FALSE); 8108275SEric Cheng } 8118275SEric Cheng /* cmn_note? */ 8128275SEric Cheng if (ret != 0) 813*12683SJimmy.Vetayases@oracle.com if ((err = (mac_set_intr(lh, cpuid, dln.cpu_id, dln.ino))) 814*12683SJimmy.Vetayases@oracle.com != 0) { 8158275SEric Cheng (void) ldi_close(lh, FREAD|FWRITE, kcred); 8168275SEric Cheng return (B_FALSE); 8178275SEric Cheng } 8188275SEric Cheng (void) ldi_close(lh, FREAD|FWRITE, kcred); 8198275SEric Cheng return (B_TRUE); 8208275SEric Cheng } 8218275SEric Cheng 8228275SEric Cheng void 8238275SEric Cheng mac_client_set_intr_cpu(void *arg, mac_client_handle_t mch, int32_t cpuid) 8248275SEric Cheng { 8258275SEric Cheng dev_info_t *mdip = (dev_info_t *)arg; 8268275SEric Cheng mac_client_impl_t *mcip = (mac_client_impl_t *)mch; 8278275SEric Cheng mac_resource_props_t *mrp; 8288275SEric Cheng mac_perim_handle_t mph; 82911878SVenu.Iyer@Sun.COM flow_entry_t *flent = mcip->mci_flent; 83011878SVenu.Iyer@Sun.COM mac_soft_ring_set_t *rx_srs; 83111878SVenu.Iyer@Sun.COM mac_cpus_t *srs_cpu; 8328275SEric Cheng 83311878SVenu.Iyer@Sun.COM if (!mac_check_interrupt_binding(mdip, cpuid)) 83411878SVenu.Iyer@Sun.COM cpuid = -1; 8358275SEric Cheng mac_perim_enter_by_mh((mac_handle_t)mcip->mci_mip, &mph); 8368275SEric Cheng mrp = MCIP_RESOURCE_PROPS(mcip); 83711878SVenu.Iyer@Sun.COM mrp->mrp_rx_intr_cpu = cpuid; 83811878SVenu.Iyer@Sun.COM if (flent != NULL && flent->fe_rx_srs_cnt == 2) { 83911878SVenu.Iyer@Sun.COM rx_srs = flent->fe_rx_srs[1]; 84011878SVenu.Iyer@Sun.COM srs_cpu = &rx_srs->srs_cpu; 84111878SVenu.Iyer@Sun.COM srs_cpu->mc_rx_intr_cpu = cpuid; 84211878SVenu.Iyer@Sun.COM } 8438275SEric Cheng mac_perim_exit(mph); 8448275SEric Cheng } 8458275SEric Cheng 8468275SEric Cheng int32_t 8478275SEric Cheng mac_client_intr_cpu(mac_client_handle_t mch) 8488275SEric Cheng { 8498275SEric Cheng mac_client_impl_t *mcip = (mac_client_impl_t *)mch; 8508275SEric Cheng mac_cpus_t *srs_cpu; 8518275SEric Cheng mac_soft_ring_set_t *rx_srs; 8528275SEric Cheng flow_entry_t *flent = mcip->mci_flent; 8538275SEric Cheng mac_resource_props_t *mrp = MCIP_RESOURCE_PROPS(mcip); 85411878SVenu.Iyer@Sun.COM mac_ring_t *ring; 85511878SVenu.Iyer@Sun.COM mac_intr_t *mintr; 8568275SEric Cheng 8578275SEric Cheng /* 8588275SEric Cheng * Check if we need to retarget the interrupt. We do this only 8598275SEric Cheng * for the primary MAC client. We do this if we have the only 86011878SVenu.Iyer@Sun.COM * exclusive ring in the group. 8618275SEric Cheng */ 8628275SEric Cheng if (mac_is_primary_client(mcip) && flent->fe_rx_srs_cnt == 2) { 8638275SEric Cheng rx_srs = flent->fe_rx_srs[1]; 8648275SEric Cheng srs_cpu = &rx_srs->srs_cpu; 86511878SVenu.Iyer@Sun.COM ring = rx_srs->srs_ring; 86611878SVenu.Iyer@Sun.COM mintr = &ring->mr_info.mri_intr; 86711878SVenu.Iyer@Sun.COM /* 86811878SVenu.Iyer@Sun.COM * If ddi_handle is present or the poll CPU is 86911878SVenu.Iyer@Sun.COM * already bound to the interrupt CPU, return -1. 87011878SVenu.Iyer@Sun.COM */ 87111878SVenu.Iyer@Sun.COM if (mintr->mi_ddi_handle != NULL || 87211878SVenu.Iyer@Sun.COM ((mrp->mrp_ncpus != 0) && 87311878SVenu.Iyer@Sun.COM (mrp->mrp_rx_intr_cpu == srs_cpu->mc_rx_pollid))) { 8748275SEric Cheng return (-1); 87511878SVenu.Iyer@Sun.COM } 87611878SVenu.Iyer@Sun.COM return (srs_cpu->mc_rx_pollid); 8778275SEric Cheng } 8788275SEric Cheng return (-1); 8798275SEric Cheng } 8808275SEric Cheng 8818275SEric Cheng void * 8828275SEric Cheng mac_get_devinfo(mac_handle_t mh) 8838275SEric Cheng { 8848275SEric Cheng mac_impl_t *mip = (mac_impl_t *)mh; 8858275SEric Cheng 8868275SEric Cheng return ((void *)mip->mi_dip); 8878275SEric Cheng } 8888833SVenu.Iyer@Sun.COM 88911528SBaban.Kenkre@Sun.COM #define PKT_HASH_2BYTES(x) ((x)[0] ^ (x)[1]) 8908833SVenu.Iyer@Sun.COM #define PKT_HASH_4BYTES(x) ((x)[0] ^ (x)[1] ^ (x)[2] ^ (x)[3]) 8918833SVenu.Iyer@Sun.COM #define PKT_HASH_MAC(x) ((x)[0] ^ (x)[1] ^ (x)[2] ^ (x)[3] ^ (x)[4] ^ (x)[5]) 8928833SVenu.Iyer@Sun.COM 8938833SVenu.Iyer@Sun.COM uint64_t 8948833SVenu.Iyer@Sun.COM mac_pkt_hash(uint_t media, mblk_t *mp, uint8_t policy, boolean_t is_outbound) 8958833SVenu.Iyer@Sun.COM { 8968833SVenu.Iyer@Sun.COM struct ether_header *ehp; 8978833SVenu.Iyer@Sun.COM uint64_t hash = 0; 8988833SVenu.Iyer@Sun.COM uint16_t sap; 8998833SVenu.Iyer@Sun.COM uint_t skip_len; 9008833SVenu.Iyer@Sun.COM uint8_t proto; 90111528SBaban.Kenkre@Sun.COM boolean_t ip_fragmented; 9028833SVenu.Iyer@Sun.COM 9038833SVenu.Iyer@Sun.COM /* 9048833SVenu.Iyer@Sun.COM * We may want to have one of these per MAC type plugin in the 9058833SVenu.Iyer@Sun.COM * future. For now supports only ethernet. 9068833SVenu.Iyer@Sun.COM */ 9078833SVenu.Iyer@Sun.COM if (media != DL_ETHER) 9088833SVenu.Iyer@Sun.COM return (0L); 9098833SVenu.Iyer@Sun.COM 9108833SVenu.Iyer@Sun.COM /* for now we support only outbound packets */ 9118833SVenu.Iyer@Sun.COM ASSERT(is_outbound); 9128833SVenu.Iyer@Sun.COM ASSERT(IS_P2ALIGNED(mp->b_rptr, sizeof (uint16_t))); 9138833SVenu.Iyer@Sun.COM ASSERT(MBLKL(mp) >= sizeof (struct ether_header)); 9148833SVenu.Iyer@Sun.COM 9158833SVenu.Iyer@Sun.COM /* compute L2 hash */ 9168833SVenu.Iyer@Sun.COM 9178833SVenu.Iyer@Sun.COM ehp = (struct ether_header *)mp->b_rptr; 9188833SVenu.Iyer@Sun.COM 9198833SVenu.Iyer@Sun.COM if ((policy & MAC_PKT_HASH_L2) != 0) { 9208833SVenu.Iyer@Sun.COM uchar_t *mac_src = ehp->ether_shost.ether_addr_octet; 9218833SVenu.Iyer@Sun.COM uchar_t *mac_dst = ehp->ether_dhost.ether_addr_octet; 9228833SVenu.Iyer@Sun.COM hash = PKT_HASH_MAC(mac_src) ^ PKT_HASH_MAC(mac_dst); 9238833SVenu.Iyer@Sun.COM policy &= ~MAC_PKT_HASH_L2; 9248833SVenu.Iyer@Sun.COM } 9258833SVenu.Iyer@Sun.COM 9268833SVenu.Iyer@Sun.COM if (policy == 0) 9278833SVenu.Iyer@Sun.COM goto done; 9288833SVenu.Iyer@Sun.COM 9298833SVenu.Iyer@Sun.COM /* skip ethernet header */ 9308833SVenu.Iyer@Sun.COM 9318833SVenu.Iyer@Sun.COM sap = ntohs(ehp->ether_type); 9328833SVenu.Iyer@Sun.COM if (sap == ETHERTYPE_VLAN) { 9338833SVenu.Iyer@Sun.COM struct ether_vlan_header *evhp; 9348833SVenu.Iyer@Sun.COM mblk_t *newmp = NULL; 9358833SVenu.Iyer@Sun.COM 9368833SVenu.Iyer@Sun.COM skip_len = sizeof (struct ether_vlan_header); 9378833SVenu.Iyer@Sun.COM if (MBLKL(mp) < skip_len) { 9388833SVenu.Iyer@Sun.COM /* the vlan tag is the payload, pull up first */ 9398833SVenu.Iyer@Sun.COM newmp = msgpullup(mp, -1); 9408833SVenu.Iyer@Sun.COM if ((newmp == NULL) || (MBLKL(newmp) < skip_len)) { 9418833SVenu.Iyer@Sun.COM goto done; 9428833SVenu.Iyer@Sun.COM } 9438833SVenu.Iyer@Sun.COM evhp = (struct ether_vlan_header *)newmp->b_rptr; 9448833SVenu.Iyer@Sun.COM } else { 9458833SVenu.Iyer@Sun.COM evhp = (struct ether_vlan_header *)mp->b_rptr; 9468833SVenu.Iyer@Sun.COM } 9478833SVenu.Iyer@Sun.COM 9488833SVenu.Iyer@Sun.COM sap = ntohs(evhp->ether_type); 9498833SVenu.Iyer@Sun.COM freemsg(newmp); 9508833SVenu.Iyer@Sun.COM } else { 9518833SVenu.Iyer@Sun.COM skip_len = sizeof (struct ether_header); 9528833SVenu.Iyer@Sun.COM } 9538833SVenu.Iyer@Sun.COM 9548833SVenu.Iyer@Sun.COM /* if ethernet header is in its own mblk, skip it */ 9558833SVenu.Iyer@Sun.COM if (MBLKL(mp) <= skip_len) { 9568833SVenu.Iyer@Sun.COM skip_len -= MBLKL(mp); 9578833SVenu.Iyer@Sun.COM mp = mp->b_cont; 9588833SVenu.Iyer@Sun.COM if (mp == NULL) 9598833SVenu.Iyer@Sun.COM goto done; 9608833SVenu.Iyer@Sun.COM } 9618833SVenu.Iyer@Sun.COM 9628833SVenu.Iyer@Sun.COM sap = (sap < ETHERTYPE_802_MIN) ? 0 : sap; 9638833SVenu.Iyer@Sun.COM 9648833SVenu.Iyer@Sun.COM /* compute IP src/dst addresses hash and skip IPv{4,6} header */ 9658833SVenu.Iyer@Sun.COM 9668833SVenu.Iyer@Sun.COM switch (sap) { 9678833SVenu.Iyer@Sun.COM case ETHERTYPE_IP: { 9688833SVenu.Iyer@Sun.COM ipha_t *iphp; 9698833SVenu.Iyer@Sun.COM 9708833SVenu.Iyer@Sun.COM /* 9718833SVenu.Iyer@Sun.COM * If the header is not aligned or the header doesn't fit 9728833SVenu.Iyer@Sun.COM * in the mblk, bail now. Note that this may cause packets 9738833SVenu.Iyer@Sun.COM * reordering. 9748833SVenu.Iyer@Sun.COM */ 9758833SVenu.Iyer@Sun.COM iphp = (ipha_t *)(mp->b_rptr + skip_len); 9768833SVenu.Iyer@Sun.COM if (((unsigned char *)iphp + sizeof (ipha_t) > mp->b_wptr) || 9778833SVenu.Iyer@Sun.COM !OK_32PTR((char *)iphp)) 9788833SVenu.Iyer@Sun.COM goto done; 9798833SVenu.Iyer@Sun.COM 9808833SVenu.Iyer@Sun.COM proto = iphp->ipha_protocol; 9818833SVenu.Iyer@Sun.COM skip_len += IPH_HDR_LENGTH(iphp); 9828833SVenu.Iyer@Sun.COM 98311528SBaban.Kenkre@Sun.COM /* Check if the packet is fragmented. */ 98411528SBaban.Kenkre@Sun.COM ip_fragmented = ntohs(iphp->ipha_fragment_offset_and_flags) & 98511528SBaban.Kenkre@Sun.COM IPH_OFFSET; 98611528SBaban.Kenkre@Sun.COM 98711528SBaban.Kenkre@Sun.COM /* 98811528SBaban.Kenkre@Sun.COM * For fragmented packets, use addresses in addition to 98911528SBaban.Kenkre@Sun.COM * the frag_id to generate the hash inorder to get 99011528SBaban.Kenkre@Sun.COM * better distribution. 99111528SBaban.Kenkre@Sun.COM */ 99211528SBaban.Kenkre@Sun.COM if (ip_fragmented || (policy & MAC_PKT_HASH_L3) != 0) { 9938833SVenu.Iyer@Sun.COM uint8_t *ip_src = (uint8_t *)&(iphp->ipha_src); 9948833SVenu.Iyer@Sun.COM uint8_t *ip_dst = (uint8_t *)&(iphp->ipha_dst); 9958833SVenu.Iyer@Sun.COM 9968833SVenu.Iyer@Sun.COM hash ^= (PKT_HASH_4BYTES(ip_src) ^ 9978833SVenu.Iyer@Sun.COM PKT_HASH_4BYTES(ip_dst)); 9988833SVenu.Iyer@Sun.COM policy &= ~MAC_PKT_HASH_L3; 9998833SVenu.Iyer@Sun.COM } 100011528SBaban.Kenkre@Sun.COM 100111528SBaban.Kenkre@Sun.COM if (ip_fragmented) { 100211528SBaban.Kenkre@Sun.COM uint8_t *identp = (uint8_t *)&iphp->ipha_ident; 100311528SBaban.Kenkre@Sun.COM hash ^= PKT_HASH_2BYTES(identp); 100411528SBaban.Kenkre@Sun.COM goto done; 100511528SBaban.Kenkre@Sun.COM } 10068833SVenu.Iyer@Sun.COM break; 10078833SVenu.Iyer@Sun.COM } 10088833SVenu.Iyer@Sun.COM case ETHERTYPE_IPV6: { 10098833SVenu.Iyer@Sun.COM ip6_t *ip6hp; 101011878SVenu.Iyer@Sun.COM ip6_frag_t *frag = NULL; 10118833SVenu.Iyer@Sun.COM uint16_t hdr_length; 10128833SVenu.Iyer@Sun.COM 10138833SVenu.Iyer@Sun.COM /* 10148833SVenu.Iyer@Sun.COM * If the header is not aligned or the header doesn't fit 10158833SVenu.Iyer@Sun.COM * in the mblk, bail now. Note that this may cause packets 10168833SVenu.Iyer@Sun.COM * reordering. 10178833SVenu.Iyer@Sun.COM */ 10188833SVenu.Iyer@Sun.COM 10198833SVenu.Iyer@Sun.COM ip6hp = (ip6_t *)(mp->b_rptr + skip_len); 10208833SVenu.Iyer@Sun.COM if (((unsigned char *)ip6hp + IPV6_HDR_LEN > mp->b_wptr) || 10218833SVenu.Iyer@Sun.COM !OK_32PTR((char *)ip6hp)) 10228833SVenu.Iyer@Sun.COM goto done; 10238833SVenu.Iyer@Sun.COM 102411878SVenu.Iyer@Sun.COM if (!mac_ip_hdr_length_v6(ip6hp, mp->b_wptr, &hdr_length, 102511878SVenu.Iyer@Sun.COM &proto, &frag)) 10268833SVenu.Iyer@Sun.COM goto done; 10278833SVenu.Iyer@Sun.COM skip_len += hdr_length; 10288833SVenu.Iyer@Sun.COM 102911528SBaban.Kenkre@Sun.COM /* 103011528SBaban.Kenkre@Sun.COM * For fragmented packets, use addresses in addition to 103111528SBaban.Kenkre@Sun.COM * the frag_id to generate the hash inorder to get 103211528SBaban.Kenkre@Sun.COM * better distribution. 103311528SBaban.Kenkre@Sun.COM */ 103411878SVenu.Iyer@Sun.COM if (frag != NULL || (policy & MAC_PKT_HASH_L3) != 0) { 10358833SVenu.Iyer@Sun.COM uint8_t *ip_src = &(ip6hp->ip6_src.s6_addr8[12]); 10368833SVenu.Iyer@Sun.COM uint8_t *ip_dst = &(ip6hp->ip6_dst.s6_addr8[12]); 10378833SVenu.Iyer@Sun.COM 10388833SVenu.Iyer@Sun.COM hash ^= (PKT_HASH_4BYTES(ip_src) ^ 10398833SVenu.Iyer@Sun.COM PKT_HASH_4BYTES(ip_dst)); 10408833SVenu.Iyer@Sun.COM policy &= ~MAC_PKT_HASH_L3; 10418833SVenu.Iyer@Sun.COM } 104211528SBaban.Kenkre@Sun.COM 104311878SVenu.Iyer@Sun.COM if (frag != NULL) { 104411878SVenu.Iyer@Sun.COM uint8_t *identp = (uint8_t *)&frag->ip6f_ident; 104511528SBaban.Kenkre@Sun.COM hash ^= PKT_HASH_4BYTES(identp); 104611528SBaban.Kenkre@Sun.COM goto done; 104711528SBaban.Kenkre@Sun.COM } 10488833SVenu.Iyer@Sun.COM break; 10498833SVenu.Iyer@Sun.COM } 10508833SVenu.Iyer@Sun.COM default: 10518833SVenu.Iyer@Sun.COM goto done; 10528833SVenu.Iyer@Sun.COM } 10538833SVenu.Iyer@Sun.COM 10548833SVenu.Iyer@Sun.COM if (policy == 0) 10558833SVenu.Iyer@Sun.COM goto done; 10568833SVenu.Iyer@Sun.COM 10578833SVenu.Iyer@Sun.COM /* if ip header is in its own mblk, skip it */ 10588833SVenu.Iyer@Sun.COM if (MBLKL(mp) <= skip_len) { 10598833SVenu.Iyer@Sun.COM skip_len -= MBLKL(mp); 10608833SVenu.Iyer@Sun.COM mp = mp->b_cont; 10618833SVenu.Iyer@Sun.COM if (mp == NULL) 10628833SVenu.Iyer@Sun.COM goto done; 10638833SVenu.Iyer@Sun.COM } 10648833SVenu.Iyer@Sun.COM 10658833SVenu.Iyer@Sun.COM /* parse ULP header */ 10668833SVenu.Iyer@Sun.COM again: 10678833SVenu.Iyer@Sun.COM switch (proto) { 10688833SVenu.Iyer@Sun.COM case IPPROTO_TCP: 10698833SVenu.Iyer@Sun.COM case IPPROTO_UDP: 10708833SVenu.Iyer@Sun.COM case IPPROTO_ESP: 10718833SVenu.Iyer@Sun.COM case IPPROTO_SCTP: 10728833SVenu.Iyer@Sun.COM /* 10738833SVenu.Iyer@Sun.COM * These Internet Protocols are intentionally designed 10748833SVenu.Iyer@Sun.COM * for hashing from the git-go. Port numbers are in the first 10758833SVenu.Iyer@Sun.COM * word for transports, SPI is first for ESP. 10768833SVenu.Iyer@Sun.COM */ 10778833SVenu.Iyer@Sun.COM if (mp->b_rptr + skip_len + 4 > mp->b_wptr) 10788833SVenu.Iyer@Sun.COM goto done; 10798833SVenu.Iyer@Sun.COM hash ^= PKT_HASH_4BYTES((mp->b_rptr + skip_len)); 10808833SVenu.Iyer@Sun.COM break; 10818833SVenu.Iyer@Sun.COM 10828833SVenu.Iyer@Sun.COM case IPPROTO_AH: { 10838833SVenu.Iyer@Sun.COM ah_t *ah = (ah_t *)(mp->b_rptr + skip_len); 10848833SVenu.Iyer@Sun.COM uint_t ah_length = AH_TOTAL_LEN(ah); 10858833SVenu.Iyer@Sun.COM 10868833SVenu.Iyer@Sun.COM if ((unsigned char *)ah + sizeof (ah_t) > mp->b_wptr) 10878833SVenu.Iyer@Sun.COM goto done; 10888833SVenu.Iyer@Sun.COM 10898833SVenu.Iyer@Sun.COM proto = ah->ah_nexthdr; 10908833SVenu.Iyer@Sun.COM skip_len += ah_length; 10918833SVenu.Iyer@Sun.COM 10928833SVenu.Iyer@Sun.COM /* if AH header is in its own mblk, skip it */ 10938833SVenu.Iyer@Sun.COM if (MBLKL(mp) <= skip_len) { 10948833SVenu.Iyer@Sun.COM skip_len -= MBLKL(mp); 10958833SVenu.Iyer@Sun.COM mp = mp->b_cont; 10968833SVenu.Iyer@Sun.COM if (mp == NULL) 10978833SVenu.Iyer@Sun.COM goto done; 10988833SVenu.Iyer@Sun.COM } 10998833SVenu.Iyer@Sun.COM 11008833SVenu.Iyer@Sun.COM goto again; 11018833SVenu.Iyer@Sun.COM } 11028833SVenu.Iyer@Sun.COM } 11038833SVenu.Iyer@Sun.COM 11048833SVenu.Iyer@Sun.COM done: 11058833SVenu.Iyer@Sun.COM return (hash); 11068833SVenu.Iyer@Sun.COM } 1107