1*8275SEric Cheng /* 2*8275SEric Cheng * CDDL HEADER START 3*8275SEric Cheng * 4*8275SEric Cheng * The contents of this file are subject to the terms of the 5*8275SEric Cheng * Common Development and Distribution License (the "License"). 6*8275SEric Cheng * You may not use this file except in compliance with the License. 7*8275SEric Cheng * 8*8275SEric Cheng * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9*8275SEric Cheng * or http://www.opensolaris.org/os/licensing. 10*8275SEric Cheng * See the License for the specific language governing permissions 11*8275SEric Cheng * and limitations under the License. 12*8275SEric Cheng * 13*8275SEric Cheng * When distributing Covered Code, include this CDDL HEADER in each 14*8275SEric Cheng * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15*8275SEric Cheng * If applicable, add the following below this CDDL HEADER, with the 16*8275SEric Cheng * fields enclosed by brackets "[]" replaced with your own identifying 17*8275SEric Cheng * information: Portions Copyright [yyyy] [name of copyright owner] 18*8275SEric Cheng * 19*8275SEric Cheng * CDDL HEADER END 20*8275SEric Cheng */ 21*8275SEric Cheng /* 22*8275SEric Cheng * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23*8275SEric Cheng * Use is subject to license terms. 24*8275SEric Cheng */ 25*8275SEric Cheng 26*8275SEric Cheng /* 27*8275SEric Cheng * MAC Services Module - misc utilities 28*8275SEric Cheng */ 29*8275SEric Cheng 30*8275SEric Cheng #include <sys/types.h> 31*8275SEric Cheng #include <sys/mac.h> 32*8275SEric Cheng #include <sys/mac_impl.h> 33*8275SEric Cheng #include <sys/mac_client_priv.h> 34*8275SEric Cheng #include <sys/mac_client_impl.h> 35*8275SEric Cheng #include <sys/mac_soft_ring.h> 36*8275SEric Cheng #include <sys/strsubr.h> 37*8275SEric Cheng #include <sys/strsun.h> 38*8275SEric Cheng #include <sys/vlan.h> 39*8275SEric Cheng #include <sys/pattr.h> 40*8275SEric Cheng #include <sys/pci_tools.h> 41*8275SEric Cheng #include <inet/ip.h> 42*8275SEric Cheng #include <inet/ip_impl.h> 43*8275SEric Cheng #include <inet/ip6.h> 44*8275SEric Cheng #include <sys/vtrace.h> 45*8275SEric Cheng #include <sys/dlpi.h> 46*8275SEric Cheng #include <sys/sunndi.h> 47*8275SEric Cheng 48*8275SEric Cheng /* 49*8275SEric Cheng * Copy an mblk, preserving its hardware checksum flags. 50*8275SEric Cheng */ 51*8275SEric Cheng static mblk_t * 52*8275SEric Cheng mac_copymsg_cksum(mblk_t *mp) 53*8275SEric Cheng { 54*8275SEric Cheng mblk_t *mp1; 55*8275SEric Cheng uint32_t start, stuff, end, value, flags; 56*8275SEric Cheng 57*8275SEric Cheng mp1 = copymsg(mp); 58*8275SEric Cheng if (mp1 == NULL) 59*8275SEric Cheng return (NULL); 60*8275SEric Cheng 61*8275SEric Cheng hcksum_retrieve(mp, NULL, NULL, &start, &stuff, &end, &value, &flags); 62*8275SEric Cheng (void) hcksum_assoc(mp1, NULL, NULL, start, stuff, end, value, 63*8275SEric Cheng flags, KM_NOSLEEP); 64*8275SEric Cheng 65*8275SEric Cheng return (mp1); 66*8275SEric Cheng } 67*8275SEric Cheng 68*8275SEric Cheng /* 69*8275SEric Cheng * Copy an mblk chain, presenting the hardware checksum flags of the 70*8275SEric Cheng * individual mblks. 71*8275SEric Cheng */ 72*8275SEric Cheng mblk_t * 73*8275SEric Cheng mac_copymsgchain_cksum(mblk_t *mp) 74*8275SEric Cheng { 75*8275SEric Cheng mblk_t *nmp = NULL; 76*8275SEric Cheng mblk_t **nmpp = &nmp; 77*8275SEric Cheng 78*8275SEric Cheng for (; mp != NULL; mp = mp->b_next) { 79*8275SEric Cheng if ((*nmpp = mac_copymsg_cksum(mp)) == NULL) { 80*8275SEric Cheng freemsgchain(nmp); 81*8275SEric Cheng return (NULL); 82*8275SEric Cheng } 83*8275SEric Cheng 84*8275SEric Cheng nmpp = &((*nmpp)->b_next); 85*8275SEric Cheng } 86*8275SEric Cheng 87*8275SEric Cheng return (nmp); 88*8275SEric Cheng } 89*8275SEric Cheng 90*8275SEric Cheng /* 91*8275SEric Cheng * Process the specified mblk chain for proper handling of hardware 92*8275SEric Cheng * checksum offload. This routine is invoked for loopback traffic 93*8275SEric Cheng * between MAC clients. 94*8275SEric Cheng * The function handles a NULL mblk chain passed as argument. 95*8275SEric Cheng */ 96*8275SEric Cheng mblk_t * 97*8275SEric Cheng mac_fix_cksum(mblk_t *mp_chain) 98*8275SEric Cheng { 99*8275SEric Cheng mblk_t *mp, *prev = NULL, *new_chain = mp_chain, *mp1; 100*8275SEric Cheng uint32_t flags, start, stuff, end, value; 101*8275SEric Cheng 102*8275SEric Cheng for (mp = mp_chain; mp != NULL; prev = mp, mp = mp->b_next) { 103*8275SEric Cheng uint16_t len; 104*8275SEric Cheng uint32_t offset; 105*8275SEric Cheng struct ether_header *ehp; 106*8275SEric Cheng uint16_t sap; 107*8275SEric Cheng 108*8275SEric Cheng hcksum_retrieve(mp, NULL, NULL, &start, &stuff, &end, &value, 109*8275SEric Cheng &flags); 110*8275SEric Cheng if (flags == 0) 111*8275SEric Cheng continue; 112*8275SEric Cheng 113*8275SEric Cheng /* 114*8275SEric Cheng * Since the processing of checksum offload for loopback 115*8275SEric Cheng * traffic requires modification of the packet contents, 116*8275SEric Cheng * ensure sure that we are always modifying our own copy. 117*8275SEric Cheng */ 118*8275SEric Cheng if (DB_REF(mp) > 1) { 119*8275SEric Cheng mp1 = copymsg(mp); 120*8275SEric Cheng if (mp1 == NULL) 121*8275SEric Cheng continue; 122*8275SEric Cheng mp1->b_next = mp->b_next; 123*8275SEric Cheng mp->b_next = NULL; 124*8275SEric Cheng freemsg(mp); 125*8275SEric Cheng if (prev != NULL) 126*8275SEric Cheng prev->b_next = mp1; 127*8275SEric Cheng else 128*8275SEric Cheng new_chain = mp1; 129*8275SEric Cheng mp = mp1; 130*8275SEric Cheng } 131*8275SEric Cheng 132*8275SEric Cheng /* 133*8275SEric Cheng * Ethernet, and optionally VLAN header. 134*8275SEric Cheng */ 135*8275SEric Cheng /* LINTED: improper alignment cast */ 136*8275SEric Cheng ehp = (struct ether_header *)mp->b_rptr; 137*8275SEric Cheng if (ntohs(ehp->ether_type) == VLAN_TPID) { 138*8275SEric Cheng struct ether_vlan_header *evhp; 139*8275SEric Cheng 140*8275SEric Cheng ASSERT(MBLKL(mp) >= sizeof (struct ether_vlan_header)); 141*8275SEric Cheng /* LINTED: improper alignment cast */ 142*8275SEric Cheng evhp = (struct ether_vlan_header *)mp->b_rptr; 143*8275SEric Cheng sap = ntohs(evhp->ether_type); 144*8275SEric Cheng offset = sizeof (struct ether_vlan_header); 145*8275SEric Cheng } else { 146*8275SEric Cheng sap = ntohs(ehp->ether_type); 147*8275SEric Cheng offset = sizeof (struct ether_header); 148*8275SEric Cheng } 149*8275SEric Cheng 150*8275SEric Cheng if (MBLKL(mp) <= offset) { 151*8275SEric Cheng offset -= MBLKL(mp); 152*8275SEric Cheng if (mp->b_cont == NULL) { 153*8275SEric Cheng /* corrupted packet, skip it */ 154*8275SEric Cheng if (prev != NULL) 155*8275SEric Cheng prev->b_next = mp->b_next; 156*8275SEric Cheng else 157*8275SEric Cheng new_chain = mp->b_next; 158*8275SEric Cheng mp1 = mp->b_next; 159*8275SEric Cheng mp->b_next = NULL; 160*8275SEric Cheng freemsg(mp); 161*8275SEric Cheng mp = mp1; 162*8275SEric Cheng continue; 163*8275SEric Cheng } 164*8275SEric Cheng mp = mp->b_cont; 165*8275SEric Cheng } 166*8275SEric Cheng 167*8275SEric Cheng if (flags & (HCK_FULLCKSUM | HCK_IPV4_HDRCKSUM)) { 168*8275SEric Cheng ipha_t *ipha = NULL; 169*8275SEric Cheng 170*8275SEric Cheng /* 171*8275SEric Cheng * In order to compute the full and header 172*8275SEric Cheng * checksums, we need to find and parse 173*8275SEric Cheng * the IP and/or ULP headers. 174*8275SEric Cheng */ 175*8275SEric Cheng 176*8275SEric Cheng sap = (sap < ETHERTYPE_802_MIN) ? 0 : sap; 177*8275SEric Cheng 178*8275SEric Cheng /* 179*8275SEric Cheng * IP header. 180*8275SEric Cheng */ 181*8275SEric Cheng if (sap != ETHERTYPE_IP) 182*8275SEric Cheng continue; 183*8275SEric Cheng 184*8275SEric Cheng ASSERT(MBLKL(mp) >= offset + sizeof (ipha_t)); 185*8275SEric Cheng /* LINTED: improper alignment cast */ 186*8275SEric Cheng ipha = (ipha_t *)(mp->b_rptr + offset); 187*8275SEric Cheng 188*8275SEric Cheng if (flags & HCK_FULLCKSUM) { 189*8275SEric Cheng ipaddr_t src, dst; 190*8275SEric Cheng uint32_t cksum; 191*8275SEric Cheng uint16_t *up; 192*8275SEric Cheng uint8_t proto; 193*8275SEric Cheng 194*8275SEric Cheng /* 195*8275SEric Cheng * Pointer to checksum field in ULP header. 196*8275SEric Cheng */ 197*8275SEric Cheng proto = ipha->ipha_protocol; 198*8275SEric Cheng ASSERT(ipha->ipha_version_and_hdr_length == 199*8275SEric Cheng IP_SIMPLE_HDR_VERSION); 200*8275SEric Cheng if (proto == IPPROTO_TCP) { 201*8275SEric Cheng /* LINTED: improper alignment cast */ 202*8275SEric Cheng up = IPH_TCPH_CHECKSUMP(ipha, 203*8275SEric Cheng IP_SIMPLE_HDR_LENGTH); 204*8275SEric Cheng } else { 205*8275SEric Cheng ASSERT(proto == IPPROTO_UDP); 206*8275SEric Cheng /* LINTED: improper alignment cast */ 207*8275SEric Cheng up = IPH_UDPH_CHECKSUMP(ipha, 208*8275SEric Cheng IP_SIMPLE_HDR_LENGTH); 209*8275SEric Cheng } 210*8275SEric Cheng 211*8275SEric Cheng /* 212*8275SEric Cheng * Pseudo-header checksum. 213*8275SEric Cheng */ 214*8275SEric Cheng src = ipha->ipha_src; 215*8275SEric Cheng dst = ipha->ipha_dst; 216*8275SEric Cheng len = ntohs(ipha->ipha_length) - 217*8275SEric Cheng IP_SIMPLE_HDR_LENGTH; 218*8275SEric Cheng 219*8275SEric Cheng cksum = (dst >> 16) + (dst & 0xFFFF) + 220*8275SEric Cheng (src >> 16) + (src & 0xFFFF); 221*8275SEric Cheng cksum += htons(len); 222*8275SEric Cheng 223*8275SEric Cheng /* 224*8275SEric Cheng * The checksum value stored in the packet needs 225*8275SEric Cheng * to be correct. Compute it here. 226*8275SEric Cheng */ 227*8275SEric Cheng *up = 0; 228*8275SEric Cheng cksum += (((proto) == IPPROTO_UDP) ? 229*8275SEric Cheng IP_UDP_CSUM_COMP : IP_TCP_CSUM_COMP); 230*8275SEric Cheng cksum = IP_CSUM(mp, IP_SIMPLE_HDR_LENGTH + 231*8275SEric Cheng offset, cksum); 232*8275SEric Cheng *(up) = (uint16_t)(cksum ? cksum : ~cksum); 233*8275SEric Cheng 234*8275SEric Cheng flags |= HCK_FULLCKSUM_OK; 235*8275SEric Cheng value = 0xffff; 236*8275SEric Cheng } 237*8275SEric Cheng 238*8275SEric Cheng if (flags & HCK_IPV4_HDRCKSUM) { 239*8275SEric Cheng ASSERT(ipha != NULL); 240*8275SEric Cheng ipha->ipha_hdr_checksum = 241*8275SEric Cheng (uint16_t)ip_csum_hdr(ipha); 242*8275SEric Cheng } 243*8275SEric Cheng } 244*8275SEric Cheng 245*8275SEric Cheng if (flags & HCK_PARTIALCKSUM) { 246*8275SEric Cheng uint16_t *up, partial, cksum; 247*8275SEric Cheng uchar_t *ipp; /* ptr to beginning of IP header */ 248*8275SEric Cheng 249*8275SEric Cheng if (mp->b_cont != NULL) { 250*8275SEric Cheng mblk_t *mp1; 251*8275SEric Cheng 252*8275SEric Cheng mp1 = msgpullup(mp, offset + end); 253*8275SEric Cheng if (mp1 == NULL) 254*8275SEric Cheng continue; 255*8275SEric Cheng mp1->b_next = mp->b_next; 256*8275SEric Cheng mp->b_next = NULL; 257*8275SEric Cheng freemsg(mp); 258*8275SEric Cheng if (prev != NULL) 259*8275SEric Cheng prev->b_next = mp1; 260*8275SEric Cheng else 261*8275SEric Cheng new_chain = mp1; 262*8275SEric Cheng mp = mp1; 263*8275SEric Cheng } 264*8275SEric Cheng 265*8275SEric Cheng ipp = mp->b_rptr + offset; 266*8275SEric Cheng /* LINTED: cast may result in improper alignment */ 267*8275SEric Cheng up = (uint16_t *)((uchar_t *)ipp + stuff); 268*8275SEric Cheng partial = *up; 269*8275SEric Cheng *up = 0; 270*8275SEric Cheng 271*8275SEric Cheng cksum = IP_BCSUM_PARTIAL(mp->b_rptr + offset + start, 272*8275SEric Cheng end - start, partial); 273*8275SEric Cheng cksum = ~cksum; 274*8275SEric Cheng *up = cksum ? cksum : ~cksum; 275*8275SEric Cheng 276*8275SEric Cheng /* 277*8275SEric Cheng * Since we already computed the whole checksum, 278*8275SEric Cheng * indicate to the stack that it has already 279*8275SEric Cheng * been verified by the hardware. 280*8275SEric Cheng */ 281*8275SEric Cheng flags &= ~HCK_PARTIALCKSUM; 282*8275SEric Cheng flags |= (HCK_FULLCKSUM | HCK_FULLCKSUM_OK); 283*8275SEric Cheng value = 0xffff; 284*8275SEric Cheng } 285*8275SEric Cheng 286*8275SEric Cheng (void) hcksum_assoc(mp, NULL, NULL, start, stuff, end, 287*8275SEric Cheng value, flags, KM_NOSLEEP); 288*8275SEric Cheng } 289*8275SEric Cheng 290*8275SEric Cheng return (new_chain); 291*8275SEric Cheng } 292*8275SEric Cheng 293*8275SEric Cheng /* 294*8275SEric Cheng * Add VLAN tag to the specified mblk. 295*8275SEric Cheng */ 296*8275SEric Cheng mblk_t * 297*8275SEric Cheng mac_add_vlan_tag(mblk_t *mp, uint_t pri, uint16_t vid) 298*8275SEric Cheng { 299*8275SEric Cheng mblk_t *hmp; 300*8275SEric Cheng struct ether_vlan_header *evhp; 301*8275SEric Cheng struct ether_header *ehp; 302*8275SEric Cheng uint32_t start, stuff, end, value, flags; 303*8275SEric Cheng 304*8275SEric Cheng ASSERT(pri != 0 || vid != 0); 305*8275SEric Cheng 306*8275SEric Cheng /* 307*8275SEric Cheng * Allocate an mblk for the new tagged ethernet header, 308*8275SEric Cheng * and copy the MAC addresses and ethertype from the 309*8275SEric Cheng * original header. 310*8275SEric Cheng */ 311*8275SEric Cheng 312*8275SEric Cheng hmp = allocb(sizeof (struct ether_vlan_header), BPRI_MED); 313*8275SEric Cheng if (hmp == NULL) { 314*8275SEric Cheng freemsg(mp); 315*8275SEric Cheng return (NULL); 316*8275SEric Cheng } 317*8275SEric Cheng 318*8275SEric Cheng evhp = (struct ether_vlan_header *)hmp->b_rptr; 319*8275SEric Cheng ehp = (struct ether_header *)mp->b_rptr; 320*8275SEric Cheng 321*8275SEric Cheng bcopy(ehp, evhp, (ETHERADDRL * 2)); 322*8275SEric Cheng evhp->ether_type = ehp->ether_type; 323*8275SEric Cheng evhp->ether_tpid = htons(ETHERTYPE_VLAN); 324*8275SEric Cheng 325*8275SEric Cheng hmp->b_wptr += sizeof (struct ether_vlan_header); 326*8275SEric Cheng mp->b_rptr += sizeof (struct ether_header); 327*8275SEric Cheng 328*8275SEric Cheng /* 329*8275SEric Cheng * Free the original message if it's now empty. Link the 330*8275SEric Cheng * rest of messages to the header message. 331*8275SEric Cheng */ 332*8275SEric Cheng hcksum_retrieve(mp, NULL, NULL, &start, &stuff, &end, &value, &flags); 333*8275SEric Cheng (void) hcksum_assoc(hmp, NULL, NULL, start, stuff, end, value, flags, 334*8275SEric Cheng KM_NOSLEEP); 335*8275SEric Cheng if (MBLKL(mp) == 0) { 336*8275SEric Cheng hmp->b_cont = mp->b_cont; 337*8275SEric Cheng freeb(mp); 338*8275SEric Cheng } else { 339*8275SEric Cheng hmp->b_cont = mp; 340*8275SEric Cheng } 341*8275SEric Cheng ASSERT(MBLKL(hmp) >= sizeof (struct ether_vlan_header)); 342*8275SEric Cheng 343*8275SEric Cheng /* 344*8275SEric Cheng * Initialize the new TCI (Tag Control Information). 345*8275SEric Cheng */ 346*8275SEric Cheng evhp->ether_tci = htons(VLAN_TCI(pri, 0, vid)); 347*8275SEric Cheng 348*8275SEric Cheng return (hmp); 349*8275SEric Cheng } 350*8275SEric Cheng 351*8275SEric Cheng /* 352*8275SEric Cheng * Adds a VLAN tag with the specified VID and priority to each mblk of 353*8275SEric Cheng * the specified chain. 354*8275SEric Cheng */ 355*8275SEric Cheng mblk_t * 356*8275SEric Cheng mac_add_vlan_tag_chain(mblk_t *mp_chain, uint_t pri, uint16_t vid) 357*8275SEric Cheng { 358*8275SEric Cheng mblk_t *next_mp, **prev, *mp; 359*8275SEric Cheng 360*8275SEric Cheng mp = mp_chain; 361*8275SEric Cheng prev = &mp_chain; 362*8275SEric Cheng 363*8275SEric Cheng while (mp != NULL) { 364*8275SEric Cheng next_mp = mp->b_next; 365*8275SEric Cheng mp->b_next = NULL; 366*8275SEric Cheng if ((mp = mac_add_vlan_tag(mp, pri, vid)) == NULL) { 367*8275SEric Cheng freemsgchain(next_mp); 368*8275SEric Cheng break; 369*8275SEric Cheng } 370*8275SEric Cheng *prev = mp; 371*8275SEric Cheng prev = &mp->b_next; 372*8275SEric Cheng mp = mp->b_next = next_mp; 373*8275SEric Cheng } 374*8275SEric Cheng 375*8275SEric Cheng return (mp_chain); 376*8275SEric Cheng } 377*8275SEric Cheng 378*8275SEric Cheng /* 379*8275SEric Cheng * Strip VLAN tag 380*8275SEric Cheng */ 381*8275SEric Cheng mblk_t * 382*8275SEric Cheng mac_strip_vlan_tag(mblk_t *mp) 383*8275SEric Cheng { 384*8275SEric Cheng mblk_t *newmp; 385*8275SEric Cheng struct ether_vlan_header *evhp; 386*8275SEric Cheng 387*8275SEric Cheng evhp = (struct ether_vlan_header *)mp->b_rptr; 388*8275SEric Cheng if (ntohs(evhp->ether_tpid) == ETHERTYPE_VLAN) { 389*8275SEric Cheng ASSERT(MBLKL(mp) >= sizeof (struct ether_vlan_header)); 390*8275SEric Cheng 391*8275SEric Cheng if (DB_REF(mp) > 1) { 392*8275SEric Cheng newmp = copymsg(mp); 393*8275SEric Cheng if (newmp == NULL) 394*8275SEric Cheng return (NULL); 395*8275SEric Cheng freemsg(mp); 396*8275SEric Cheng mp = newmp; 397*8275SEric Cheng } 398*8275SEric Cheng 399*8275SEric Cheng evhp = (struct ether_vlan_header *)mp->b_rptr; 400*8275SEric Cheng 401*8275SEric Cheng ovbcopy(mp->b_rptr, mp->b_rptr + VLAN_TAGSZ, 2 * ETHERADDRL); 402*8275SEric Cheng mp->b_rptr += VLAN_TAGSZ; 403*8275SEric Cheng } 404*8275SEric Cheng return (mp); 405*8275SEric Cheng } 406*8275SEric Cheng 407*8275SEric Cheng /* 408*8275SEric Cheng * Strip VLAN tag from each mblk of the chain. 409*8275SEric Cheng */ 410*8275SEric Cheng mblk_t * 411*8275SEric Cheng mac_strip_vlan_tag_chain(mblk_t *mp_chain) 412*8275SEric Cheng { 413*8275SEric Cheng mblk_t *mp, *next_mp, **prev; 414*8275SEric Cheng 415*8275SEric Cheng mp = mp_chain; 416*8275SEric Cheng prev = &mp_chain; 417*8275SEric Cheng 418*8275SEric Cheng while (mp != NULL) { 419*8275SEric Cheng next_mp = mp->b_next; 420*8275SEric Cheng mp->b_next = NULL; 421*8275SEric Cheng if ((mp = mac_strip_vlan_tag(mp)) == NULL) { 422*8275SEric Cheng freemsgchain(next_mp); 423*8275SEric Cheng break; 424*8275SEric Cheng } 425*8275SEric Cheng *prev = mp; 426*8275SEric Cheng prev = &mp->b_next; 427*8275SEric Cheng mp = mp->b_next = next_mp; 428*8275SEric Cheng } 429*8275SEric Cheng 430*8275SEric Cheng return (mp_chain); 431*8275SEric Cheng } 432*8275SEric Cheng 433*8275SEric Cheng /* 434*8275SEric Cheng * Default callback function. Used when the datapath is not yet initialized. 435*8275SEric Cheng */ 436*8275SEric Cheng /* ARGSUSED */ 437*8275SEric Cheng void 438*8275SEric Cheng mac_pkt_drop(void *arg, mac_resource_handle_t resource, mblk_t *mp, 439*8275SEric Cheng boolean_t loopback) 440*8275SEric Cheng { 441*8275SEric Cheng mblk_t *mp1 = mp; 442*8275SEric Cheng 443*8275SEric Cheng while (mp1 != NULL) { 444*8275SEric Cheng mp1->b_prev = NULL; 445*8275SEric Cheng mp1->b_queue = NULL; 446*8275SEric Cheng mp1 = mp1->b_next; 447*8275SEric Cheng } 448*8275SEric Cheng freemsgchain(mp); 449*8275SEric Cheng } 450*8275SEric Cheng 451*8275SEric Cheng /* 452*8275SEric Cheng * Determines the IPv6 header length accounting for all the optional IPv6 453*8275SEric Cheng * headers (hop-by-hop, destination, routing and fragment). The header length 454*8275SEric Cheng * and next header value (a transport header) is captured. 455*8275SEric Cheng * 456*8275SEric Cheng * Returns B_FALSE if all the IP headers are not in the same mblk otherwise 457*8275SEric Cheng * returns B_TRUE. 458*8275SEric Cheng */ 459*8275SEric Cheng boolean_t 460*8275SEric Cheng mac_ip_hdr_length_v6(mblk_t *mp, ip6_t *ip6h, uint16_t *hdr_length, 461*8275SEric Cheng uint8_t *next_hdr) 462*8275SEric Cheng { 463*8275SEric Cheng uint16_t length; 464*8275SEric Cheng uint_t ehdrlen; 465*8275SEric Cheng uint8_t *whereptr; 466*8275SEric Cheng uint8_t *endptr; 467*8275SEric Cheng uint8_t *nexthdrp; 468*8275SEric Cheng ip6_dest_t *desthdr; 469*8275SEric Cheng ip6_rthdr_t *rthdr; 470*8275SEric Cheng ip6_frag_t *fraghdr; 471*8275SEric Cheng 472*8275SEric Cheng endptr = mp->b_wptr; 473*8275SEric Cheng if (((uchar_t *)ip6h + IPV6_HDR_LEN) > endptr) 474*8275SEric Cheng return (B_FALSE); 475*8275SEric Cheng ASSERT((IPH_HDR_VERSION(ip6h) & ~IP_FORWARD_PROG_BIT) == IPV6_VERSION); 476*8275SEric Cheng length = IPV6_HDR_LEN; 477*8275SEric Cheng whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */ 478*8275SEric Cheng 479*8275SEric Cheng nexthdrp = &ip6h->ip6_nxt; 480*8275SEric Cheng while (whereptr < endptr) { 481*8275SEric Cheng /* Is there enough left for len + nexthdr? */ 482*8275SEric Cheng if (whereptr + MIN_EHDR_LEN > endptr) 483*8275SEric Cheng break; 484*8275SEric Cheng 485*8275SEric Cheng switch (*nexthdrp) { 486*8275SEric Cheng case IPPROTO_HOPOPTS: 487*8275SEric Cheng case IPPROTO_DSTOPTS: 488*8275SEric Cheng /* Assumes the headers are identical for hbh and dst */ 489*8275SEric Cheng desthdr = (ip6_dest_t *)whereptr; 490*8275SEric Cheng ehdrlen = 8 * (desthdr->ip6d_len + 1); 491*8275SEric Cheng if ((uchar_t *)desthdr + ehdrlen > endptr) 492*8275SEric Cheng return (B_FALSE); 493*8275SEric Cheng nexthdrp = &desthdr->ip6d_nxt; 494*8275SEric Cheng break; 495*8275SEric Cheng case IPPROTO_ROUTING: 496*8275SEric Cheng rthdr = (ip6_rthdr_t *)whereptr; 497*8275SEric Cheng ehdrlen = 8 * (rthdr->ip6r_len + 1); 498*8275SEric Cheng if ((uchar_t *)rthdr + ehdrlen > endptr) 499*8275SEric Cheng return (B_FALSE); 500*8275SEric Cheng nexthdrp = &rthdr->ip6r_nxt; 501*8275SEric Cheng break; 502*8275SEric Cheng case IPPROTO_FRAGMENT: 503*8275SEric Cheng fraghdr = (ip6_frag_t *)whereptr; 504*8275SEric Cheng ehdrlen = sizeof (ip6_frag_t); 505*8275SEric Cheng if ((uchar_t *)&fraghdr[1] > endptr) 506*8275SEric Cheng return (B_FALSE); 507*8275SEric Cheng nexthdrp = &fraghdr->ip6f_nxt; 508*8275SEric Cheng break; 509*8275SEric Cheng case IPPROTO_NONE: 510*8275SEric Cheng /* No next header means we're finished */ 511*8275SEric Cheng default: 512*8275SEric Cheng *hdr_length = length; 513*8275SEric Cheng *next_hdr = *nexthdrp; 514*8275SEric Cheng return (B_TRUE); 515*8275SEric Cheng } 516*8275SEric Cheng length += ehdrlen; 517*8275SEric Cheng whereptr += ehdrlen; 518*8275SEric Cheng *hdr_length = length; 519*8275SEric Cheng *next_hdr = *nexthdrp; 520*8275SEric Cheng } 521*8275SEric Cheng switch (*nexthdrp) { 522*8275SEric Cheng case IPPROTO_HOPOPTS: 523*8275SEric Cheng case IPPROTO_DSTOPTS: 524*8275SEric Cheng case IPPROTO_ROUTING: 525*8275SEric Cheng case IPPROTO_FRAGMENT: 526*8275SEric Cheng /* 527*8275SEric Cheng * If any know extension headers are still to be processed, 528*8275SEric Cheng * the packet's malformed (or at least all the IP header(s) are 529*8275SEric Cheng * not in the same mblk - and that should never happen. 530*8275SEric Cheng */ 531*8275SEric Cheng return (B_FALSE); 532*8275SEric Cheng 533*8275SEric Cheng default: 534*8275SEric Cheng /* 535*8275SEric Cheng * If we get here, we know that all of the IP headers were in 536*8275SEric Cheng * the same mblk, even if the ULP header is in the next mblk. 537*8275SEric Cheng */ 538*8275SEric Cheng *hdr_length = length; 539*8275SEric Cheng *next_hdr = *nexthdrp; 540*8275SEric Cheng return (B_TRUE); 541*8275SEric Cheng } 542*8275SEric Cheng } 543*8275SEric Cheng 544*8275SEric Cheng typedef struct mac_dladm_intr { 545*8275SEric Cheng int ino; 546*8275SEric Cheng int cpu_id; 547*8275SEric Cheng char driver_path[MAXPATHLEN]; 548*8275SEric Cheng char nexus_path[MAXPATHLEN]; 549*8275SEric Cheng } mac_dladm_intr_t; 550*8275SEric Cheng 551*8275SEric Cheng /* Bind the interrupt to cpu_num */ 552*8275SEric Cheng static int 553*8275SEric Cheng mac_set_intr(ldi_handle_t lh, processorid_t cpu_num, int ino) 554*8275SEric Cheng { 555*8275SEric Cheng pcitool_intr_set_t iset; 556*8275SEric Cheng int err; 557*8275SEric Cheng 558*8275SEric Cheng iset.ino = ino; 559*8275SEric Cheng iset.cpu_id = cpu_num; 560*8275SEric Cheng iset.user_version = PCITOOL_VERSION; 561*8275SEric Cheng err = ldi_ioctl(lh, PCITOOL_DEVICE_SET_INTR, (intptr_t)&iset, FKIOCTL, 562*8275SEric Cheng kcred, NULL); 563*8275SEric Cheng 564*8275SEric Cheng return (err); 565*8275SEric Cheng } 566*8275SEric Cheng 567*8275SEric Cheng /* 568*8275SEric Cheng * Search interrupt information. iget is filled in with the info to search 569*8275SEric Cheng */ 570*8275SEric Cheng static boolean_t 571*8275SEric Cheng mac_search_intrinfo(pcitool_intr_get_t *iget_p, mac_dladm_intr_t *dln) 572*8275SEric Cheng { 573*8275SEric Cheng int i; 574*8275SEric Cheng char driver_path[2 * MAXPATHLEN]; 575*8275SEric Cheng 576*8275SEric Cheng for (i = 0; i < iget_p->num_devs; i++) { 577*8275SEric Cheng (void) strlcpy(driver_path, iget_p->dev[i].path, MAXPATHLEN); 578*8275SEric Cheng (void) snprintf(&driver_path[strlen(driver_path)], MAXPATHLEN, 579*8275SEric Cheng ":%s%d", iget_p->dev[i].driver_name, 580*8275SEric Cheng iget_p->dev[i].dev_inst); 581*8275SEric Cheng /* Match the device path for the device path */ 582*8275SEric Cheng if (strcmp(driver_path, dln->driver_path) == 0) { 583*8275SEric Cheng dln->ino = iget_p->ino; 584*8275SEric Cheng dln->cpu_id = iget_p->cpu_id; 585*8275SEric Cheng return (B_TRUE); 586*8275SEric Cheng } 587*8275SEric Cheng } 588*8275SEric Cheng return (B_FALSE); 589*8275SEric Cheng } 590*8275SEric Cheng 591*8275SEric Cheng /* 592*8275SEric Cheng * Get information about ino, i.e. if this is the interrupt for our 593*8275SEric Cheng * device and where it is bound etc. 594*8275SEric Cheng */ 595*8275SEric Cheng static boolean_t 596*8275SEric Cheng mac_get_single_intr(ldi_handle_t lh, int ino, mac_dladm_intr_t *dln) 597*8275SEric Cheng { 598*8275SEric Cheng pcitool_intr_get_t *iget_p; 599*8275SEric Cheng int ipsz; 600*8275SEric Cheng int nipsz; 601*8275SEric Cheng int err; 602*8275SEric Cheng uint8_t inum; 603*8275SEric Cheng 604*8275SEric Cheng /* 605*8275SEric Cheng * Check if SLEEP is OK, i.e if could come here in response to 606*8275SEric Cheng * changing the fanout due to some callback from the driver, say 607*8275SEric Cheng * link speed changes. 608*8275SEric Cheng */ 609*8275SEric Cheng ipsz = PCITOOL_IGET_SIZE(0); 610*8275SEric Cheng iget_p = kmem_zalloc(ipsz, KM_SLEEP); 611*8275SEric Cheng 612*8275SEric Cheng iget_p->num_devs_ret = 0; 613*8275SEric Cheng iget_p->user_version = PCITOOL_VERSION; 614*8275SEric Cheng iget_p->ino = ino; 615*8275SEric Cheng 616*8275SEric Cheng err = ldi_ioctl(lh, PCITOOL_DEVICE_GET_INTR, (intptr_t)iget_p, 617*8275SEric Cheng FKIOCTL, kcred, NULL); 618*8275SEric Cheng if (err != 0) { 619*8275SEric Cheng kmem_free(iget_p, ipsz); 620*8275SEric Cheng return (B_FALSE); 621*8275SEric Cheng } 622*8275SEric Cheng if (iget_p->num_devs == 0) { 623*8275SEric Cheng kmem_free(iget_p, ipsz); 624*8275SEric Cheng return (B_FALSE); 625*8275SEric Cheng } 626*8275SEric Cheng inum = iget_p->num_devs; 627*8275SEric Cheng if (iget_p->num_devs_ret < iget_p->num_devs) { 628*8275SEric Cheng /* Reallocate */ 629*8275SEric Cheng nipsz = PCITOOL_IGET_SIZE(iget_p->num_devs); 630*8275SEric Cheng 631*8275SEric Cheng kmem_free(iget_p, ipsz); 632*8275SEric Cheng ipsz = nipsz; 633*8275SEric Cheng iget_p = kmem_zalloc(ipsz, KM_SLEEP); 634*8275SEric Cheng 635*8275SEric Cheng iget_p->num_devs_ret = inum; 636*8275SEric Cheng iget_p->ino = ino; 637*8275SEric Cheng iget_p->user_version = PCITOOL_VERSION; 638*8275SEric Cheng err = ldi_ioctl(lh, PCITOOL_DEVICE_GET_INTR, (intptr_t)iget_p, 639*8275SEric Cheng FKIOCTL, kcred, NULL); 640*8275SEric Cheng if (err != 0) { 641*8275SEric Cheng kmem_free(iget_p, ipsz); 642*8275SEric Cheng return (B_FALSE); 643*8275SEric Cheng } 644*8275SEric Cheng /* defensive */ 645*8275SEric Cheng if (iget_p->num_devs != iget_p->num_devs_ret) { 646*8275SEric Cheng kmem_free(iget_p, ipsz); 647*8275SEric Cheng return (B_FALSE); 648*8275SEric Cheng } 649*8275SEric Cheng } 650*8275SEric Cheng 651*8275SEric Cheng if (mac_search_intrinfo(iget_p, dln)) { 652*8275SEric Cheng kmem_free(iget_p, ipsz); 653*8275SEric Cheng return (B_TRUE); 654*8275SEric Cheng } 655*8275SEric Cheng kmem_free(iget_p, ipsz); 656*8275SEric Cheng return (B_FALSE); 657*8275SEric Cheng } 658*8275SEric Cheng 659*8275SEric Cheng /* 660*8275SEric Cheng * Get the interrupts and check each one to see if it is for our device. 661*8275SEric Cheng */ 662*8275SEric Cheng static int 663*8275SEric Cheng mac_validate_intr(ldi_handle_t lh, mac_dladm_intr_t *dln, processorid_t cpuid) 664*8275SEric Cheng { 665*8275SEric Cheng pcitool_intr_info_t intr_info; 666*8275SEric Cheng int err; 667*8275SEric Cheng int ino; 668*8275SEric Cheng 669*8275SEric Cheng err = ldi_ioctl(lh, PCITOOL_SYSTEM_INTR_INFO, (intptr_t)&intr_info, 670*8275SEric Cheng FKIOCTL, kcred, NULL); 671*8275SEric Cheng if (err != 0) 672*8275SEric Cheng return (-1); 673*8275SEric Cheng 674*8275SEric Cheng for (ino = 0; ino < intr_info.num_intr; ino++) { 675*8275SEric Cheng if (mac_get_single_intr(lh, ino, dln)) { 676*8275SEric Cheng if (dln->cpu_id == cpuid) 677*8275SEric Cheng return (0); 678*8275SEric Cheng return (1); 679*8275SEric Cheng } 680*8275SEric Cheng } 681*8275SEric Cheng return (-1); 682*8275SEric Cheng } 683*8275SEric Cheng 684*8275SEric Cheng /* 685*8275SEric Cheng * Obtain the nexus parent node info. for mdip. 686*8275SEric Cheng */ 687*8275SEric Cheng static dev_info_t * 688*8275SEric Cheng mac_get_nexus_node(dev_info_t *mdip, mac_dladm_intr_t *dln) 689*8275SEric Cheng { 690*8275SEric Cheng struct dev_info *tdip = (struct dev_info *)mdip; 691*8275SEric Cheng struct ddi_minor_data *minordata; 692*8275SEric Cheng int circ; 693*8275SEric Cheng dev_info_t *pdip; 694*8275SEric Cheng char pathname[MAXPATHLEN]; 695*8275SEric Cheng 696*8275SEric Cheng while (tdip != NULL) { 697*8275SEric Cheng ndi_devi_enter((dev_info_t *)tdip, &circ); 698*8275SEric Cheng for (minordata = tdip->devi_minor; minordata != NULL; 699*8275SEric Cheng minordata = minordata->next) { 700*8275SEric Cheng if (strncmp(minordata->ddm_node_type, DDI_NT_INTRCTL, 701*8275SEric Cheng strlen(DDI_NT_INTRCTL)) == 0) { 702*8275SEric Cheng pdip = minordata->dip; 703*8275SEric Cheng (void) ddi_pathname(pdip, pathname); 704*8275SEric Cheng (void) snprintf(dln->nexus_path, MAXPATHLEN, 705*8275SEric Cheng "/devices%s:intr", pathname); 706*8275SEric Cheng (void) ddi_pathname_minor(minordata, pathname); 707*8275SEric Cheng ndi_devi_exit((dev_info_t *)tdip, circ); 708*8275SEric Cheng return (pdip); 709*8275SEric Cheng } 710*8275SEric Cheng } 711*8275SEric Cheng ndi_devi_exit((dev_info_t *)tdip, circ); 712*8275SEric Cheng tdip = tdip->devi_parent; 713*8275SEric Cheng } 714*8275SEric Cheng return (NULL); 715*8275SEric Cheng } 716*8275SEric Cheng 717*8275SEric Cheng /* 718*8275SEric Cheng * For a primary MAC client, if the user has set a list or CPUs or 719*8275SEric Cheng * we have obtained it implicitly, we try to retarget the interrupt 720*8275SEric Cheng * for that device on one of the CPUs in the list. 721*8275SEric Cheng * We assign the interrupt to the same CPU as the poll thread. 722*8275SEric Cheng */ 723*8275SEric Cheng static boolean_t 724*8275SEric Cheng mac_check_interrupt_binding(dev_info_t *mdip, int32_t cpuid) 725*8275SEric Cheng { 726*8275SEric Cheng ldi_handle_t lh = NULL; 727*8275SEric Cheng ldi_ident_t li = NULL; 728*8275SEric Cheng int err; 729*8275SEric Cheng int ret; 730*8275SEric Cheng mac_dladm_intr_t dln; 731*8275SEric Cheng dev_info_t *dip; 732*8275SEric Cheng struct ddi_minor_data *minordata; 733*8275SEric Cheng 734*8275SEric Cheng dln.nexus_path[0] = '\0'; 735*8275SEric Cheng dln.driver_path[0] = '\0'; 736*8275SEric Cheng 737*8275SEric Cheng minordata = ((struct dev_info *)mdip)->devi_minor; 738*8275SEric Cheng while (minordata != NULL) { 739*8275SEric Cheng if (minordata->type == DDM_MINOR) 740*8275SEric Cheng break; 741*8275SEric Cheng minordata = minordata->next; 742*8275SEric Cheng } 743*8275SEric Cheng if (minordata == NULL) 744*8275SEric Cheng return (B_FALSE); 745*8275SEric Cheng 746*8275SEric Cheng (void) ddi_pathname_minor(minordata, dln.driver_path); 747*8275SEric Cheng 748*8275SEric Cheng dip = mac_get_nexus_node(mdip, &dln); 749*8275SEric Cheng /* defensive */ 750*8275SEric Cheng if (dip == NULL) 751*8275SEric Cheng return (B_FALSE); 752*8275SEric Cheng 753*8275SEric Cheng err = ldi_ident_from_major(ddi_driver_major(dip), &li); 754*8275SEric Cheng if (err != 0) 755*8275SEric Cheng return (B_FALSE); 756*8275SEric Cheng 757*8275SEric Cheng err = ldi_open_by_name(dln.nexus_path, FREAD|FWRITE, kcred, &lh, li); 758*8275SEric Cheng if (err != 0) 759*8275SEric Cheng return (B_FALSE); 760*8275SEric Cheng 761*8275SEric Cheng ret = mac_validate_intr(lh, &dln, cpuid); 762*8275SEric Cheng if (ret < 0) { 763*8275SEric Cheng (void) ldi_close(lh, FREAD|FWRITE, kcred); 764*8275SEric Cheng return (B_FALSE); 765*8275SEric Cheng } 766*8275SEric Cheng /* cmn_note? */ 767*8275SEric Cheng if (ret != 0) 768*8275SEric Cheng if ((err = (mac_set_intr(lh, cpuid, dln.ino))) != 0) { 769*8275SEric Cheng (void) ldi_close(lh, FREAD|FWRITE, kcred); 770*8275SEric Cheng return (B_FALSE); 771*8275SEric Cheng } 772*8275SEric Cheng (void) ldi_close(lh, FREAD|FWRITE, kcred); 773*8275SEric Cheng return (B_TRUE); 774*8275SEric Cheng } 775*8275SEric Cheng 776*8275SEric Cheng void 777*8275SEric Cheng mac_client_set_intr_cpu(void *arg, mac_client_handle_t mch, int32_t cpuid) 778*8275SEric Cheng { 779*8275SEric Cheng dev_info_t *mdip = (dev_info_t *)arg; 780*8275SEric Cheng mac_client_impl_t *mcip = (mac_client_impl_t *)mch; 781*8275SEric Cheng mac_resource_props_t *mrp; 782*8275SEric Cheng mac_perim_handle_t mph; 783*8275SEric Cheng 784*8275SEric Cheng if (cpuid == -1 || !mac_check_interrupt_binding(mdip, cpuid)) 785*8275SEric Cheng return; 786*8275SEric Cheng 787*8275SEric Cheng mac_perim_enter_by_mh((mac_handle_t)mcip->mci_mip, &mph); 788*8275SEric Cheng mrp = MCIP_RESOURCE_PROPS(mcip); 789*8275SEric Cheng mrp->mrp_intr_cpu = cpuid; 790*8275SEric Cheng mac_perim_exit(mph); 791*8275SEric Cheng } 792*8275SEric Cheng 793*8275SEric Cheng int32_t 794*8275SEric Cheng mac_client_intr_cpu(mac_client_handle_t mch) 795*8275SEric Cheng { 796*8275SEric Cheng mac_client_impl_t *mcip = (mac_client_impl_t *)mch; 797*8275SEric Cheng mac_cpus_t *srs_cpu; 798*8275SEric Cheng mac_soft_ring_set_t *rx_srs; 799*8275SEric Cheng flow_entry_t *flent = mcip->mci_flent; 800*8275SEric Cheng mac_resource_props_t *mrp = MCIP_RESOURCE_PROPS(mcip); 801*8275SEric Cheng 802*8275SEric Cheng /* 803*8275SEric Cheng * Check if we need to retarget the interrupt. We do this only 804*8275SEric Cheng * for the primary MAC client. We do this if we have the only 805*8275SEric Cheng * exclusive ring in the group. 806*8275SEric Cheng */ 807*8275SEric Cheng if (mac_is_primary_client(mcip) && flent->fe_rx_srs_cnt == 2) { 808*8275SEric Cheng rx_srs = flent->fe_rx_srs[1]; 809*8275SEric Cheng srs_cpu = &rx_srs->srs_cpu; 810*8275SEric Cheng if (mrp->mrp_intr_cpu == srs_cpu->mc_pollid) 811*8275SEric Cheng return (-1); 812*8275SEric Cheng return (srs_cpu->mc_pollid); 813*8275SEric Cheng } 814*8275SEric Cheng return (-1); 815*8275SEric Cheng } 816*8275SEric Cheng 817*8275SEric Cheng void * 818*8275SEric Cheng mac_get_devinfo(mac_handle_t mh) 819*8275SEric Cheng { 820*8275SEric Cheng mac_impl_t *mip = (mac_impl_t *)mh; 821*8275SEric Cheng 822*8275SEric Cheng return ((void *)mip->mi_dip); 823*8275SEric Cheng } 824