15084Sjohnlev /* 25084Sjohnlev * CDDL HEADER START 35084Sjohnlev * 45084Sjohnlev * The contents of this file are subject to the terms of the 55084Sjohnlev * Common Development and Distribution License (the "License"). 65084Sjohnlev * You may not use this file except in compliance with the License. 75084Sjohnlev * 85084Sjohnlev * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 95084Sjohnlev * or http://www.opensolaris.org/os/licensing. 105084Sjohnlev * See the License for the specific language governing permissions 115084Sjohnlev * and limitations under the License. 125084Sjohnlev * 135084Sjohnlev * When distributing Covered Code, include this CDDL HEADER in each 145084Sjohnlev * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 155084Sjohnlev * If applicable, add the following below this CDDL HEADER, with the 165084Sjohnlev * fields enclosed by brackets "[]" replaced with your own identifying 175084Sjohnlev * information: Portions Copyright [yyyy] [name of copyright owner] 185084Sjohnlev * 195084Sjohnlev * CDDL HEADER END 205084Sjohnlev */ 215084Sjohnlev /* 22*5895Syz147064 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 235084Sjohnlev * Use is subject to license terms. 245084Sjohnlev */ 255084Sjohnlev 265084Sjohnlev #pragma ident "%Z%%M% %I% %E% SMI" 275084Sjohnlev 285084Sjohnlev #include <sys/types.h> 295084Sjohnlev #include <sys/sysmacros.h> 305084Sjohnlev #include <sys/conf.h> 315084Sjohnlev #include <sys/cmn_err.h> 325084Sjohnlev #include <sys/list.h> 335084Sjohnlev #include <sys/ksynch.h> 345084Sjohnlev #include <sys/kmem.h> 355084Sjohnlev #include <sys/stream.h> 365084Sjohnlev #include <sys/modctl.h> 375084Sjohnlev #include <sys/ddi.h> 385084Sjohnlev #include <sys/sunddi.h> 395084Sjohnlev #include <sys/atomic.h> 405084Sjohnlev #include <sys/stat.h> 415084Sjohnlev #include <sys/modhash.h> 425084Sjohnlev #include <sys/strsubr.h> 435084Sjohnlev #include <sys/strsun.h> 445084Sjohnlev #include <sys/dlpi.h> 455084Sjohnlev #include <sys/mac.h> 465084Sjohnlev #include <sys/mac_ether.h> 47*5895Syz147064 #include <sys/dls.h> 485084Sjohnlev #include <sys/pattr.h> 495084Sjohnlev #include <sys/vnic.h> 505084Sjohnlev #include <sys/vnic_impl.h> 515084Sjohnlev #include <sys/gld.h> 525084Sjohnlev #include <inet/ip.h> 535084Sjohnlev #include <inet/ip_impl.h> 545084Sjohnlev 555084Sjohnlev static int vnic_m_start(void *); 565084Sjohnlev static void vnic_m_stop(void *); 575084Sjohnlev static int vnic_m_promisc(void *, boolean_t); 585084Sjohnlev static int vnic_m_multicst(void *, boolean_t, const uint8_t *); 595084Sjohnlev static int vnic_m_unicst(void *, const uint8_t *); 605084Sjohnlev static int vnic_m_stat(void *, uint_t, uint64_t *); 615084Sjohnlev static void vnic_m_resources(void *); 625084Sjohnlev static mblk_t *vnic_m_tx(void *, mblk_t *); 635084Sjohnlev static boolean_t vnic_m_capab_get(void *, mac_capab_t, void *); 645084Sjohnlev static void vnic_mac_free(vnic_mac_t *); 655084Sjohnlev static uint_t vnic_info_walker(mod_hash_key_t, mod_hash_val_t *, void *); 665084Sjohnlev static void vnic_notify_cb(void *, mac_notify_type_t); 675084Sjohnlev static int vnic_modify_mac_addr(vnic_t *, uint_t, uchar_t *); 685084Sjohnlev static mblk_t *vnic_active_tx(void *, mblk_t *); 695084Sjohnlev static int vnic_promisc_set(vnic_t *, boolean_t); 705084Sjohnlev 715084Sjohnlev static kmem_cache_t *vnic_cache; 725084Sjohnlev static kmem_cache_t *vnic_mac_cache; 735084Sjohnlev static krwlock_t vnic_lock; 745084Sjohnlev static kmutex_t vnic_mac_lock; 755084Sjohnlev static uint_t vnic_count; 765084Sjohnlev 775084Sjohnlev /* hash of VNICs (vnic_t's), keyed by VNIC id */ 785084Sjohnlev static mod_hash_t *vnic_hash; 795084Sjohnlev #define VNIC_HASHSZ 64 805084Sjohnlev #define VNIC_HASH_KEY(vnic_id) ((mod_hash_key_t)(uintptr_t)vnic_id) 815084Sjohnlev 825084Sjohnlev /* 835084Sjohnlev * Hash of underlying open MACs (vnic_mac_t's), keyed by the string 845084Sjohnlev * "<device name><instance number>/<port number>". 855084Sjohnlev */ 865084Sjohnlev static mod_hash_t *vnic_mac_hash; 875084Sjohnlev #define VNIC_MAC_HASHSZ 64 885084Sjohnlev 895084Sjohnlev #define VNIC_MAC_REFHOLD(va) { \ 905084Sjohnlev ASSERT(MUTEX_HELD(&vnic_mac_lock)); \ 915084Sjohnlev (va)->va_refs++; \ 925084Sjohnlev ASSERT((va)->va_refs != 0); \ 935084Sjohnlev } 945084Sjohnlev 955084Sjohnlev #define VNIC_MAC_REFRELE(va) { \ 965084Sjohnlev ASSERT(MUTEX_HELD(&vnic_mac_lock)); \ 975084Sjohnlev ASSERT((va)->va_refs != 0); \ 985084Sjohnlev if (--((va)->va_refs) == 0) \ 995084Sjohnlev vnic_mac_free(va); \ 1005084Sjohnlev } 1015084Sjohnlev 1025084Sjohnlev static uchar_t vnic_brdcst_mac[] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; 1035084Sjohnlev 1045084Sjohnlev /* used by vnic_walker */ 1055084Sjohnlev typedef struct vnic_info_state { 106*5895Syz147064 datalink_id_t vs_vnic_id; 107*5895Syz147064 datalink_id_t vs_linkid; 1085084Sjohnlev boolean_t vs_vnic_found; 1095084Sjohnlev vnic_info_new_vnic_fn_t vs_new_vnic_fn; 1105084Sjohnlev void *vs_fn_arg; 1115084Sjohnlev int vs_rc; 1125084Sjohnlev } vnic_info_state_t; 1135084Sjohnlev 1145084Sjohnlev #define VNIC_M_CALLBACK_FLAGS (MC_RESOURCES | MC_GETCAPAB) 1155084Sjohnlev 1165084Sjohnlev static mac_callbacks_t vnic_m_callbacks = { 1175084Sjohnlev VNIC_M_CALLBACK_FLAGS, 1185084Sjohnlev vnic_m_stat, 1195084Sjohnlev vnic_m_start, 1205084Sjohnlev vnic_m_stop, 1215084Sjohnlev vnic_m_promisc, 1225084Sjohnlev vnic_m_multicst, 1235084Sjohnlev vnic_m_unicst, 1245084Sjohnlev vnic_m_tx, 1255084Sjohnlev vnic_m_resources, 1265084Sjohnlev NULL, /* m_ioctl */ 1275084Sjohnlev vnic_m_capab_get 1285084Sjohnlev }; 1295084Sjohnlev 1305084Sjohnlev /* ARGSUSED */ 1315084Sjohnlev static int 1325084Sjohnlev vnic_mac_ctor(void *buf, void *arg, int kmflag) 1335084Sjohnlev { 1345084Sjohnlev vnic_mac_t *vnic_mac = buf; 1355084Sjohnlev 1365084Sjohnlev bzero(vnic_mac, sizeof (vnic_mac_t)); 1375084Sjohnlev rw_init(&vnic_mac->va_bcast_grp_lock, NULL, RW_DRIVER, NULL); 1385084Sjohnlev rw_init(&vnic_mac->va_promisc_lock, NULL, RW_DRIVER, NULL); 1395084Sjohnlev 1405084Sjohnlev return (0); 1415084Sjohnlev } 1425084Sjohnlev 1435084Sjohnlev /* ARGSUSED */ 1445084Sjohnlev static void 1455084Sjohnlev vnic_mac_dtor(void *buf, void *arg) 1465084Sjohnlev { 1475084Sjohnlev vnic_mac_t *vnic_mac = buf; 1485084Sjohnlev 1495084Sjohnlev rw_destroy(&vnic_mac->va_promisc_lock); 1505084Sjohnlev rw_destroy(&vnic_mac->va_bcast_grp_lock); 1515084Sjohnlev } 1525084Sjohnlev 1535084Sjohnlev void 1545084Sjohnlev vnic_dev_init(void) 1555084Sjohnlev { 1565084Sjohnlev vnic_cache = kmem_cache_create("vnic_cache", 1575084Sjohnlev sizeof (vnic_t), 0, NULL, NULL, NULL, NULL, NULL, 0); 1585084Sjohnlev 1595084Sjohnlev vnic_mac_cache = kmem_cache_create("vnic_mac_cache", 1605084Sjohnlev sizeof (vnic_mac_t), 0, vnic_mac_ctor, vnic_mac_dtor, 1615084Sjohnlev NULL, NULL, NULL, 0); 1625084Sjohnlev 1635084Sjohnlev vnic_hash = mod_hash_create_idhash("vnic_hash", 1645084Sjohnlev VNIC_HASHSZ, mod_hash_null_valdtor); 1655084Sjohnlev 166*5895Syz147064 vnic_mac_hash = mod_hash_create_idhash("vnic_mac_hash", 1675084Sjohnlev VNIC_MAC_HASHSZ, mod_hash_null_valdtor); 1685084Sjohnlev 1695084Sjohnlev rw_init(&vnic_lock, NULL, RW_DEFAULT, NULL); 1705084Sjohnlev 1715084Sjohnlev mutex_init(&vnic_mac_lock, NULL, MUTEX_DEFAULT, NULL); 1725084Sjohnlev 1735084Sjohnlev vnic_count = 0; 1745084Sjohnlev } 1755084Sjohnlev 1765084Sjohnlev void 1775084Sjohnlev vnic_dev_fini(void) 1785084Sjohnlev { 1795084Sjohnlev ASSERT(vnic_count == 0); 1805084Sjohnlev 1815084Sjohnlev mutex_destroy(&vnic_mac_lock); 1825084Sjohnlev rw_destroy(&vnic_lock); 183*5895Syz147064 mod_hash_destroy_idhash(vnic_mac_hash); 1845084Sjohnlev mod_hash_destroy_idhash(vnic_hash); 1855084Sjohnlev kmem_cache_destroy(vnic_mac_cache); 1865084Sjohnlev kmem_cache_destroy(vnic_cache); 1875084Sjohnlev } 1885084Sjohnlev 1895084Sjohnlev uint_t 1905084Sjohnlev vnic_dev_count(void) 1915084Sjohnlev { 1925084Sjohnlev return (vnic_count); 1935084Sjohnlev } 1945084Sjohnlev 1955084Sjohnlev static int 196*5895Syz147064 vnic_mac_open(datalink_id_t linkid, vnic_mac_t **vmp) 1975084Sjohnlev { 1985084Sjohnlev int err; 1995084Sjohnlev vnic_mac_t *vnic_mac = NULL; 2005084Sjohnlev const mac_info_t *mip; 2015084Sjohnlev 2025084Sjohnlev *vmp = NULL; 2035084Sjohnlev 2045084Sjohnlev mutex_enter(&vnic_mac_lock); 2055084Sjohnlev 206*5895Syz147064 err = mod_hash_find(vnic_mac_hash, (mod_hash_key_t)(uintptr_t)linkid, 2075084Sjohnlev (mod_hash_val_t *)&vnic_mac); 2085084Sjohnlev if (err == 0) { 2095084Sjohnlev /* this MAC is already opened, increment reference count */ 2105084Sjohnlev VNIC_MAC_REFHOLD(vnic_mac); 2115084Sjohnlev mutex_exit(&vnic_mac_lock); 2125084Sjohnlev *vmp = vnic_mac; 2135084Sjohnlev return (0); 2145084Sjohnlev } 2155084Sjohnlev 2165084Sjohnlev vnic_mac = kmem_cache_alloc(vnic_mac_cache, KM_SLEEP); 217*5895Syz147064 if ((err = mac_open_by_linkid(linkid, &vnic_mac->va_mh)) != 0) { 218*5895Syz147064 vnic_mac->va_mh = NULL; 219*5895Syz147064 goto bail; 220*5895Syz147064 } 2215084Sjohnlev 222*5895Syz147064 /* 223*5895Syz147064 * For now, we do not support VNICs over legacy drivers. This will 224*5895Syz147064 * soon be changed. 225*5895Syz147064 */ 226*5895Syz147064 if (mac_is_legacy(vnic_mac->va_mh)) { 227*5895Syz147064 err = ENOTSUP; 2285084Sjohnlev goto bail; 2295084Sjohnlev } 2305084Sjohnlev 2315084Sjohnlev /* only ethernet support, for now */ 2325084Sjohnlev mip = mac_info(vnic_mac->va_mh); 2335084Sjohnlev if (mip->mi_media != DL_ETHER) { 2345084Sjohnlev err = ENOTSUP; 2355084Sjohnlev goto bail; 2365084Sjohnlev } 2375084Sjohnlev if (mip->mi_media != mip->mi_nativemedia) { 2385084Sjohnlev err = ENOTSUP; 2395084Sjohnlev goto bail; 2405084Sjohnlev } 2415084Sjohnlev 242*5895Syz147064 vnic_mac->va_linkid = linkid; 2435084Sjohnlev 2445084Sjohnlev /* add entry to hash table */ 245*5895Syz147064 err = mod_hash_insert(vnic_mac_hash, (mod_hash_key_t)(uintptr_t)linkid, 2465084Sjohnlev (mod_hash_val_t)vnic_mac); 2475084Sjohnlev ASSERT(err == 0); 2485084Sjohnlev 2495084Sjohnlev /* initialize the flow table associated with lower MAC */ 2505084Sjohnlev vnic_mac->va_addr_len = ETHERADDRL; 2515084Sjohnlev (void) vnic_classifier_flow_tab_init(vnic_mac, vnic_mac->va_addr_len, 2525084Sjohnlev KM_SLEEP); 2535084Sjohnlev 2545084Sjohnlev vnic_mac->va_txinfo = mac_vnic_tx_get(vnic_mac->va_mh); 2555084Sjohnlev vnic_mac->va_notify_hdl = mac_notify_add(vnic_mac->va_mh, 2565084Sjohnlev vnic_notify_cb, vnic_mac); 2575084Sjohnlev 2585084Sjohnlev VNIC_MAC_REFHOLD(vnic_mac); 2595084Sjohnlev *vmp = vnic_mac; 2605084Sjohnlev mutex_exit(&vnic_mac_lock); 2615084Sjohnlev return (0); 2625084Sjohnlev 2635084Sjohnlev bail: 2645084Sjohnlev if (vnic_mac != NULL) { 2655084Sjohnlev if (vnic_mac->va_mh != NULL) 2665084Sjohnlev mac_close(vnic_mac->va_mh); 2675084Sjohnlev kmem_cache_free(vnic_mac_cache, vnic_mac); 2685084Sjohnlev } 2695084Sjohnlev mutex_exit(&vnic_mac_lock); 2705084Sjohnlev return (err); 2715084Sjohnlev } 2725084Sjohnlev 2735084Sjohnlev /* 2745084Sjohnlev * Create a new flow for the active MAC client sharing the NIC 2755084Sjohnlev * with the VNICs. This allows the unicast packets for that NIC 2765084Sjohnlev * to be classified and passed up to the active MAC client. It 2775084Sjohnlev * also allows packets sent from a VNIC to the active link to 2785084Sjohnlev * be classified by the VNIC transmit function and delivered via 2795084Sjohnlev * the MAC module locally. Returns B_TRUE on success, B_FALSE on 2805084Sjohnlev * failure. 2815084Sjohnlev */ 2825084Sjohnlev static int 2835084Sjohnlev vnic_init_active_rx(vnic_mac_t *vnic_mac) 2845084Sjohnlev { 2855084Sjohnlev uchar_t nic_mac_addr[MAXMACADDRLEN]; 2865084Sjohnlev 2875084Sjohnlev if (vnic_mac->va_active_flow != NULL) 2885084Sjohnlev return (B_TRUE); 2895084Sjohnlev 2905084Sjohnlev mac_unicst_get(vnic_mac->va_mh, nic_mac_addr); 2915084Sjohnlev 2925084Sjohnlev vnic_mac->va_active_flow = vnic_classifier_flow_create( 2935084Sjohnlev vnic_mac->va_addr_len, nic_mac_addr, NULL, B_TRUE, KM_SLEEP); 2945084Sjohnlev 2955084Sjohnlev vnic_classifier_flow_add(vnic_mac, vnic_mac->va_active_flow, 2965084Sjohnlev (vnic_rx_fn_t)mac_active_rx, vnic_mac->va_mh, NULL); 2975084Sjohnlev return (B_TRUE); 2985084Sjohnlev } 2995084Sjohnlev 3005084Sjohnlev static void 3015084Sjohnlev vnic_fini_active_rx(vnic_mac_t *vnic_mac) 3025084Sjohnlev { 3035084Sjohnlev if (vnic_mac->va_active_flow == NULL) 3045084Sjohnlev return; 3055084Sjohnlev 3065084Sjohnlev vnic_classifier_flow_remove(vnic_mac, vnic_mac->va_active_flow); 3075084Sjohnlev vnic_classifier_flow_destroy(vnic_mac->va_active_flow); 3085084Sjohnlev vnic_mac->va_active_flow = NULL; 3095084Sjohnlev } 3105084Sjohnlev 3115084Sjohnlev static void 3125084Sjohnlev vnic_update_active_rx(vnic_mac_t *vnic_mac) 3135084Sjohnlev { 3145084Sjohnlev if (vnic_mac->va_active_flow == NULL) 3155084Sjohnlev return; 3165084Sjohnlev 3175084Sjohnlev vnic_fini_active_rx(vnic_mac); 3185084Sjohnlev (void) vnic_init_active_rx(vnic_mac); 3195084Sjohnlev } 3205084Sjohnlev 3215084Sjohnlev /* 3225084Sjohnlev * Copy an mblk, preserving its hardware checksum flags. 3235084Sjohnlev */ 3245084Sjohnlev mblk_t * 3255084Sjohnlev vnic_copymsg_cksum(mblk_t *mp) 3265084Sjohnlev { 3275084Sjohnlev mblk_t *mp1; 3285084Sjohnlev uint32_t start, stuff, end, value, flags; 3295084Sjohnlev 3305084Sjohnlev mp1 = copymsg(mp); 3315084Sjohnlev if (mp1 == NULL) 3325084Sjohnlev return (NULL); 3335084Sjohnlev 3345084Sjohnlev hcksum_retrieve(mp, NULL, NULL, &start, &stuff, &end, &value, &flags); 3355084Sjohnlev (void) hcksum_assoc(mp1, NULL, NULL, start, stuff, end, value, 3365084Sjohnlev flags, KM_NOSLEEP); 3375084Sjohnlev 3385084Sjohnlev return (mp1); 3395084Sjohnlev } 3405084Sjohnlev 3415084Sjohnlev /* 3425084Sjohnlev * Copy an mblk chain, presenting the hardware checksum flags of the 3435084Sjohnlev * individual mblks. 3445084Sjohnlev */ 3455084Sjohnlev mblk_t * 3465084Sjohnlev vnic_copymsgchain_cksum(mblk_t *mp) 3475084Sjohnlev { 3485084Sjohnlev mblk_t *nmp = NULL; 3495084Sjohnlev mblk_t **nmpp = &nmp; 3505084Sjohnlev 3515084Sjohnlev for (; mp != NULL; mp = mp->b_next) { 3525084Sjohnlev if ((*nmpp = vnic_copymsg_cksum(mp)) == NULL) { 3535084Sjohnlev freemsgchain(nmp); 3545084Sjohnlev return (NULL); 3555084Sjohnlev } 3565084Sjohnlev 3575084Sjohnlev nmpp = &((*nmpp)->b_next); 3585084Sjohnlev } 3595084Sjohnlev 3605084Sjohnlev return (nmp); 3615084Sjohnlev } 3625084Sjohnlev 3635084Sjohnlev 3645084Sjohnlev /* 3655084Sjohnlev * Process the specified mblk chain for proper handling of hardware 3665084Sjohnlev * checksum offload. This routine is invoked for loopback VNIC traffic. 3675084Sjohnlev * The function handles a NULL mblk chain passed as argument. 3685084Sjohnlev */ 3695084Sjohnlev mblk_t * 3705084Sjohnlev vnic_fix_cksum(mblk_t *mp_chain) 3715084Sjohnlev { 3725084Sjohnlev mblk_t *mp, *prev = NULL, *new_chain = mp_chain, *mp1; 3735084Sjohnlev uint32_t flags, start, stuff, end, value; 3745084Sjohnlev 3755084Sjohnlev for (mp = mp_chain; mp != NULL; prev = mp, mp = mp->b_next) { 3765084Sjohnlev uint16_t len; 3775084Sjohnlev uint32_t offset; 3785084Sjohnlev struct ether_header *ehp; 3795084Sjohnlev uint16_t sap; 3805084Sjohnlev 3815084Sjohnlev hcksum_retrieve(mp, NULL, NULL, &start, &stuff, &end, &value, 3825084Sjohnlev &flags); 3835084Sjohnlev if (flags == 0) 3845084Sjohnlev continue; 3855084Sjohnlev 3865084Sjohnlev /* 3875084Sjohnlev * Since the processing of checksum offload for loopback 3885084Sjohnlev * traffic requires modification of the packet contents, 3895084Sjohnlev * ensure sure that we are always modifying our own copy. 3905084Sjohnlev */ 3915084Sjohnlev if (DB_REF(mp) > 1) { 3925084Sjohnlev mp1 = copymsg(mp); 3935084Sjohnlev if (mp1 == NULL) 3945084Sjohnlev continue; 3955084Sjohnlev mp1->b_next = mp->b_next; 3965084Sjohnlev mp->b_next = NULL; 3975084Sjohnlev freemsg(mp); 3985084Sjohnlev if (prev != NULL) 3995084Sjohnlev prev->b_next = mp1; 4005084Sjohnlev else 4015084Sjohnlev new_chain = mp1; 4025084Sjohnlev mp = mp1; 4035084Sjohnlev } 4045084Sjohnlev 4055084Sjohnlev /* 4065084Sjohnlev * Ethernet, and optionally VLAN header. 4075084Sjohnlev */ 4085084Sjohnlev /*LINTED*/ 4095084Sjohnlev ehp = (struct ether_header *)mp->b_rptr; 4105084Sjohnlev if (ntohs(ehp->ether_type) == VLAN_TPID) { 4115084Sjohnlev struct ether_vlan_header *evhp; 4125084Sjohnlev 4135084Sjohnlev ASSERT(MBLKL(mp) >= 4145084Sjohnlev sizeof (struct ether_vlan_header)); 4155084Sjohnlev /*LINTED*/ 4165084Sjohnlev evhp = (struct ether_vlan_header *)mp->b_rptr; 4175084Sjohnlev sap = ntohs(evhp->ether_type); 4185084Sjohnlev offset = sizeof (struct ether_vlan_header); 4195084Sjohnlev } else { 4205084Sjohnlev sap = ntohs(ehp->ether_type); 4215084Sjohnlev offset = sizeof (struct ether_header); 4225084Sjohnlev } 4235084Sjohnlev 4245084Sjohnlev if (MBLKL(mp) <= offset) { 4255084Sjohnlev offset -= MBLKL(mp); 4265084Sjohnlev if (mp->b_cont == NULL) { 4275084Sjohnlev /* corrupted packet, skip it */ 4285084Sjohnlev if (prev != NULL) 4295084Sjohnlev prev->b_next = mp->b_next; 4305084Sjohnlev else 4315084Sjohnlev new_chain = mp->b_next; 4325084Sjohnlev mp1 = mp->b_next; 4335084Sjohnlev mp->b_next = NULL; 4345084Sjohnlev freemsg(mp); 4355084Sjohnlev mp = mp1; 4365084Sjohnlev continue; 4375084Sjohnlev } 4385084Sjohnlev mp = mp->b_cont; 4395084Sjohnlev } 4405084Sjohnlev 4415084Sjohnlev if (flags & (HCK_FULLCKSUM | HCK_IPV4_HDRCKSUM)) { 4425084Sjohnlev ipha_t *ipha = NULL; 4435084Sjohnlev 4445084Sjohnlev /* 4455084Sjohnlev * In order to compute the full and header 4465084Sjohnlev * checksums, we need to find and parse 4475084Sjohnlev * the IP and/or ULP headers. 4485084Sjohnlev */ 4495084Sjohnlev 4505084Sjohnlev sap = (sap < ETHERTYPE_802_MIN) ? 0 : sap; 4515084Sjohnlev 4525084Sjohnlev /* 4535084Sjohnlev * IP header. 4545084Sjohnlev */ 4555084Sjohnlev if (sap != ETHERTYPE_IP) 4565084Sjohnlev continue; 4575084Sjohnlev 4585084Sjohnlev ASSERT(MBLKL(mp) >= offset + sizeof (ipha_t)); 4595084Sjohnlev /*LINTED*/ 4605084Sjohnlev ipha = (ipha_t *)(mp->b_rptr + offset); 4615084Sjohnlev 4625084Sjohnlev if (flags & HCK_FULLCKSUM) { 4635084Sjohnlev ipaddr_t src, dst; 4645084Sjohnlev uint32_t cksum; 4655084Sjohnlev uint16_t *up; 4665084Sjohnlev uint8_t proto; 4675084Sjohnlev 4685084Sjohnlev /* 4695084Sjohnlev * Pointer to checksum field in ULP header. 4705084Sjohnlev */ 4715084Sjohnlev proto = ipha->ipha_protocol; 4725084Sjohnlev ASSERT(ipha->ipha_version_and_hdr_length == 4735084Sjohnlev IP_SIMPLE_HDR_VERSION); 4745084Sjohnlev if (proto == IPPROTO_TCP) { 4755084Sjohnlev /*LINTED*/ 4765084Sjohnlev up = IPH_TCPH_CHECKSUMP(ipha, 4775084Sjohnlev IP_SIMPLE_HDR_LENGTH); 4785084Sjohnlev } else { 4795084Sjohnlev ASSERT(proto == IPPROTO_UDP); 4805084Sjohnlev /*LINTED*/ 4815084Sjohnlev up = IPH_UDPH_CHECKSUMP(ipha, 4825084Sjohnlev IP_SIMPLE_HDR_LENGTH); 4835084Sjohnlev } 4845084Sjohnlev 4855084Sjohnlev /* 4865084Sjohnlev * Pseudo-header checksum. 4875084Sjohnlev */ 4885084Sjohnlev src = ipha->ipha_src; 4895084Sjohnlev dst = ipha->ipha_dst; 4905084Sjohnlev len = ntohs(ipha->ipha_length) - 4915084Sjohnlev IP_SIMPLE_HDR_LENGTH; 4925084Sjohnlev 4935084Sjohnlev cksum = (dst >> 16) + (dst & 0xFFFF) + 4945084Sjohnlev (src >> 16) + (src & 0xFFFF); 4955084Sjohnlev cksum += htons(len); 4965084Sjohnlev 4975084Sjohnlev /* 4985084Sjohnlev * The checksum value stored in the packet needs 4995084Sjohnlev * to be correct. Compute it here. 5005084Sjohnlev */ 5015084Sjohnlev *up = 0; 5025084Sjohnlev cksum += (((proto) == IPPROTO_UDP) ? 5035084Sjohnlev IP_UDP_CSUM_COMP : IP_TCP_CSUM_COMP); 5045084Sjohnlev cksum = IP_CSUM(mp, IP_SIMPLE_HDR_LENGTH + 5055084Sjohnlev offset, cksum); 5065084Sjohnlev *(up) = (uint16_t)(cksum ? cksum : ~cksum); 5075084Sjohnlev 5085084Sjohnlev flags |= HCK_FULLCKSUM_OK; 5095084Sjohnlev value = 0xffff; 5105084Sjohnlev } 5115084Sjohnlev 5125084Sjohnlev if (flags & HCK_IPV4_HDRCKSUM) { 5135084Sjohnlev ASSERT(ipha != NULL); 5145084Sjohnlev ipha->ipha_hdr_checksum = 5155084Sjohnlev (uint16_t)ip_csum_hdr(ipha); 5165084Sjohnlev } 5175084Sjohnlev } 5185084Sjohnlev 5195084Sjohnlev if (flags & HCK_PARTIALCKSUM) { 5205084Sjohnlev uint16_t *up, partial, cksum; 5215084Sjohnlev uchar_t *ipp; /* ptr to beginning of IP header */ 5225084Sjohnlev 5235084Sjohnlev if (mp->b_cont != NULL) { 5245084Sjohnlev mblk_t *mp1; 5255084Sjohnlev 5265084Sjohnlev mp1 = msgpullup(mp, offset + end); 5275084Sjohnlev if (mp1 == NULL) 5285084Sjohnlev continue; 5295084Sjohnlev mp1->b_next = mp->b_next; 5305084Sjohnlev mp->b_next = NULL; 5315084Sjohnlev freemsg(mp); 5325084Sjohnlev if (prev != NULL) 5335084Sjohnlev prev->b_next = mp1; 5345084Sjohnlev else 5355084Sjohnlev new_chain = mp1; 5365084Sjohnlev mp = mp1; 5375084Sjohnlev } 5385084Sjohnlev 5395084Sjohnlev ipp = mp->b_rptr + offset; 5405084Sjohnlev /*LINTED*/ 5415084Sjohnlev up = (uint16_t *)((uchar_t *)ipp + stuff); 5425084Sjohnlev partial = *up; 5435084Sjohnlev *up = 0; 5445084Sjohnlev 5455084Sjohnlev cksum = IP_BCSUM_PARTIAL(mp->b_rptr + offset + start, 5465084Sjohnlev end - start, partial); 5475084Sjohnlev cksum = ~cksum; 5485084Sjohnlev *up = cksum ? cksum : ~cksum; 5495084Sjohnlev 5505084Sjohnlev /* 5515084Sjohnlev * Since we already computed the whole checksum, 5525084Sjohnlev * indicate to the stack that it has already 5535084Sjohnlev * been verified by the hardware. 5545084Sjohnlev */ 5555084Sjohnlev flags &= ~HCK_PARTIALCKSUM; 5565084Sjohnlev flags |= (HCK_FULLCKSUM | HCK_FULLCKSUM_OK); 5575084Sjohnlev value = 0xffff; 5585084Sjohnlev } 5595084Sjohnlev 5605084Sjohnlev (void) hcksum_assoc(mp, NULL, NULL, start, stuff, end, 5615084Sjohnlev value, flags, KM_NOSLEEP); 5625084Sjohnlev } 5635084Sjohnlev 5645084Sjohnlev return (new_chain); 5655084Sjohnlev } 5665084Sjohnlev 5675084Sjohnlev static void 5685084Sjohnlev vnic_mac_close(vnic_mac_t *vnic_mac) 5695084Sjohnlev { 5705084Sjohnlev mutex_enter(&vnic_mac_lock); 5715084Sjohnlev VNIC_MAC_REFRELE(vnic_mac); 5725084Sjohnlev mutex_exit(&vnic_mac_lock); 5735084Sjohnlev } 5745084Sjohnlev 5755084Sjohnlev static void 5765084Sjohnlev vnic_mac_free(vnic_mac_t *vnic_mac) 5775084Sjohnlev { 5785084Sjohnlev mod_hash_val_t val; 5795084Sjohnlev 5805084Sjohnlev ASSERT(MUTEX_HELD(&vnic_mac_lock)); 5815084Sjohnlev vnic_fini_active_rx(vnic_mac); 5825084Sjohnlev mac_notify_remove(vnic_mac->va_mh, vnic_mac->va_notify_hdl); 5835084Sjohnlev if (vnic_mac->va_mac_set) { 5845084Sjohnlev vnic_mac->va_mac_set = B_FALSE; 5855084Sjohnlev mac_vnic_clear(vnic_mac->va_mh); 5865084Sjohnlev } 5875084Sjohnlev vnic_classifier_flow_tab_fini(vnic_mac); 5885084Sjohnlev mac_close(vnic_mac->va_mh); 5895084Sjohnlev 5905084Sjohnlev (void) mod_hash_remove(vnic_mac_hash, 591*5895Syz147064 (mod_hash_key_t)(uintptr_t)vnic_mac->va_linkid, &val); 5925084Sjohnlev ASSERT(vnic_mac == (vnic_mac_t *)val); 5935084Sjohnlev 5945084Sjohnlev kmem_cache_free(vnic_mac_cache, vnic_mac); 5955084Sjohnlev } 5965084Sjohnlev 5975084Sjohnlev /* 5985084Sjohnlev * Initial VNIC receive routine. Invoked for packets that are steered 5995084Sjohnlev * to a VNIC but the VNIC has not been started yet. 6005084Sjohnlev */ 6015084Sjohnlev /* ARGSUSED */ 6025084Sjohnlev static void 6035084Sjohnlev vnic_rx_initial(void *arg1, void *arg2, mblk_t *mp_chain) 6045084Sjohnlev { 6055084Sjohnlev vnic_t *vnic = arg1; 6065084Sjohnlev mblk_t *mp; 6075084Sjohnlev 6085084Sjohnlev /* update stats */ 6095084Sjohnlev for (mp = mp_chain; mp != NULL; mp = mp->b_next) 6105084Sjohnlev vnic->vn_stat_ierrors++; 6115084Sjohnlev freemsgchain(mp_chain); 6125084Sjohnlev } 6135084Sjohnlev 6145084Sjohnlev /* 6155084Sjohnlev * VNIC receive routine invoked after the classifier for the VNIC 6165084Sjohnlev * has been initialized and the VNIC has been started. 6175084Sjohnlev */ 6185084Sjohnlev /* ARGSUSED */ 6195084Sjohnlev void 6205084Sjohnlev vnic_rx(void *arg1, void *arg2, mblk_t *mp_chain) 6215084Sjohnlev { 6225084Sjohnlev vnic_t *vnic = arg1; 6235084Sjohnlev mblk_t *mp; 6245084Sjohnlev 6255084Sjohnlev /* update stats */ 6265084Sjohnlev for (mp = mp_chain; mp != NULL; mp = mp->b_next) { 6275084Sjohnlev vnic->vn_stat_ipackets++; 6285084Sjohnlev vnic->vn_stat_rbytes += msgdsize(mp); 6295084Sjohnlev } 6305084Sjohnlev 6315084Sjohnlev /* pass packet up */ 6325084Sjohnlev mac_rx(vnic->vn_mh, NULL, mp_chain); 6335084Sjohnlev } 6345084Sjohnlev 6355084Sjohnlev /* 6365084Sjohnlev * Routine to create a MAC-based VNIC. Adds the passed MAC address 6375084Sjohnlev * to an unused slot in the NIC if one is available. Otherwise it 6385084Sjohnlev * sets the NIC in promiscuous mode and assigns the MAC address to 6395084Sjohnlev * a Rx ring if available or a soft ring. 6405084Sjohnlev */ 6415084Sjohnlev static int 6425084Sjohnlev vnic_add_unicstaddr(vnic_t *vnic, mac_multi_addr_t *maddr) 6435084Sjohnlev { 6445084Sjohnlev vnic_mac_t *vnic_mac = vnic->vn_vnic_mac; 6455084Sjohnlev int err; 6465084Sjohnlev 6475638Sdme if (mac_unicst_verify(vnic_mac->va_mh, maddr->mma_addr, 6485638Sdme maddr->mma_addrlen) == B_FALSE) 6495638Sdme return (EINVAL); 6505638Sdme 6515084Sjohnlev if (mac_vnic_capab_get(vnic_mac->va_mh, MAC_CAPAB_MULTIADDRESS, 6525084Sjohnlev &(vnic->vn_mma_capab))) { 6535084Sjohnlev if (vnic->vn_maddr_naddrfree == 0) { 6545084Sjohnlev /* 6555084Sjohnlev * No free address slots available. 6565084Sjohnlev * Enable promiscuous mode. 6575084Sjohnlev */ 6585084Sjohnlev goto set_promisc; 6595084Sjohnlev } 6605084Sjohnlev 6615084Sjohnlev err = vnic->vn_maddr_add(vnic->vn_maddr_handle, maddr); 6625084Sjohnlev if (err != 0) { 6635084Sjohnlev if (err == ENOSPC) { 6645084Sjohnlev /* 6655084Sjohnlev * There was a race to add addresses 6665084Sjohnlev * with other multiple address consumers, 6675084Sjohnlev * and we lost out. Use promisc mode. 6685084Sjohnlev */ 6695084Sjohnlev goto set_promisc; 6705084Sjohnlev } 6715084Sjohnlev 6725084Sjohnlev return (err); 6735084Sjohnlev } 6745084Sjohnlev 6755084Sjohnlev vnic->vn_slot_id = maddr->mma_slot; 6765084Sjohnlev vnic->vn_multi_mac = B_TRUE; 6775084Sjohnlev } else { 6785084Sjohnlev /* 6795084Sjohnlev * Either multiple MAC address support is not 6805084Sjohnlev * available or all available addresses have 6815084Sjohnlev * been used up. 6825084Sjohnlev */ 6835084Sjohnlev set_promisc: 684*5895Syz147064 if ((err = mac_promisc_set(vnic_mac->va_mh, B_TRUE, 685*5895Syz147064 MAC_DEVPROMISC)) != 0) { 6865084Sjohnlev return (err); 6875084Sjohnlev } 6885084Sjohnlev 6895084Sjohnlev vnic->vn_promisc_mac = B_TRUE; 6905084Sjohnlev } 6915084Sjohnlev return (err); 6925084Sjohnlev } 6935084Sjohnlev 6945084Sjohnlev /* 6955084Sjohnlev * VNIC is getting deleted. Remove the MAC address from the slot. 6965084Sjohnlev * If promiscuous mode was being used, then unset the promiscuous mode. 6975084Sjohnlev */ 6985084Sjohnlev static int 6995084Sjohnlev vnic_remove_unicstaddr(vnic_t *vnic) 7005084Sjohnlev { 7015084Sjohnlev vnic_mac_t *vnic_mac = vnic->vn_vnic_mac; 7025084Sjohnlev int err; 7035084Sjohnlev 7045084Sjohnlev if (vnic->vn_multi_mac) { 7055084Sjohnlev ASSERT(vnic->vn_promisc_mac == B_FALSE); 7065084Sjohnlev err = vnic->vn_maddr_remove(vnic->vn_maddr_handle, 7075084Sjohnlev vnic->vn_slot_id); 7085084Sjohnlev vnic->vn_multi_mac = B_FALSE; 7095084Sjohnlev } 7105084Sjohnlev 7115084Sjohnlev if (vnic->vn_promisc_mac) { 7125084Sjohnlev ASSERT(vnic->vn_multi_mac == B_FALSE); 7135084Sjohnlev err = mac_promisc_set(vnic_mac->va_mh, B_FALSE, MAC_DEVPROMISC); 7145084Sjohnlev vnic->vn_promisc_mac = B_FALSE; 7155084Sjohnlev } 7165084Sjohnlev 7175084Sjohnlev return (err); 7185084Sjohnlev } 7195084Sjohnlev 7205084Sjohnlev /* 7215084Sjohnlev * Create a new VNIC upon request from administrator. 7225084Sjohnlev * Returns 0 on success, an errno on failure. 7235084Sjohnlev */ 7245084Sjohnlev int 725*5895Syz147064 vnic_dev_create(datalink_id_t vnic_id, datalink_id_t linkid, int mac_len, 726*5895Syz147064 uchar_t *mac_addr) 7275084Sjohnlev { 7285084Sjohnlev vnic_t *vnic = NULL; 7295084Sjohnlev mac_register_t *mac; 7305084Sjohnlev int err; 7315084Sjohnlev vnic_mac_t *vnic_mac; 7325084Sjohnlev const mac_info_t *lower_mac_info; 7335084Sjohnlev mac_multi_addr_t maddr; 7345084Sjohnlev mac_txinfo_t tx_info; 7355084Sjohnlev 7365084Sjohnlev if (mac_len != ETHERADDRL) { 7375084Sjohnlev /* currently only ethernet NICs are supported */ 7385084Sjohnlev return (EINVAL); 7395084Sjohnlev } 7405084Sjohnlev 7415084Sjohnlev rw_enter(&vnic_lock, RW_WRITER); 7425084Sjohnlev 7435084Sjohnlev /* does a VNIC with the same id already exist? */ 7445084Sjohnlev err = mod_hash_find(vnic_hash, VNIC_HASH_KEY(vnic_id), 7455084Sjohnlev (mod_hash_val_t *)&vnic); 7465084Sjohnlev if (err == 0) { 7475084Sjohnlev rw_exit(&vnic_lock); 7485084Sjohnlev return (EEXIST); 7495084Sjohnlev } 7505084Sjohnlev 7515084Sjohnlev vnic = kmem_cache_alloc(vnic_cache, KM_NOSLEEP); 7525084Sjohnlev if (vnic == NULL) { 7535084Sjohnlev rw_exit(&vnic_lock); 7545084Sjohnlev return (ENOMEM); 7555084Sjohnlev } 7565084Sjohnlev 7575084Sjohnlev /* open underlying MAC */ 758*5895Syz147064 err = vnic_mac_open(linkid, &vnic_mac); 7595084Sjohnlev if (err != 0) { 7605084Sjohnlev kmem_cache_free(vnic_cache, vnic); 7615084Sjohnlev rw_exit(&vnic_lock); 7625084Sjohnlev return (err); 7635084Sjohnlev } 7645084Sjohnlev 7655084Sjohnlev bzero(vnic, sizeof (*vnic)); 7665084Sjohnlev vnic->vn_id = vnic_id; 7675084Sjohnlev vnic->vn_vnic_mac = vnic_mac; 7685084Sjohnlev 7695084Sjohnlev vnic->vn_started = B_FALSE; 7705084Sjohnlev vnic->vn_promisc = B_FALSE; 7715084Sjohnlev vnic->vn_multi_mac = B_FALSE; 7725084Sjohnlev vnic->vn_bcast_grp = B_FALSE; 7735084Sjohnlev 7745084Sjohnlev /* set the VNIC MAC address */ 7755084Sjohnlev maddr.mma_addrlen = mac_len; 7765084Sjohnlev maddr.mma_slot = 0; 7775084Sjohnlev maddr.mma_flags = 0; 7785084Sjohnlev bcopy(mac_addr, maddr.mma_addr, mac_len); 7795084Sjohnlev if ((err = vnic_add_unicstaddr(vnic, &maddr)) != 0) 7805084Sjohnlev goto bail; 7815084Sjohnlev bcopy(mac_addr, vnic->vn_addr, mac_len); 7825084Sjohnlev 7835084Sjohnlev /* set the initial VNIC capabilities */ 7845084Sjohnlev if (!mac_vnic_capab_get(vnic_mac->va_mh, MAC_CAPAB_HCKSUM, 7855084Sjohnlev &vnic->vn_hcksum_txflags)) 7865084Sjohnlev vnic->vn_hcksum_txflags = 0; 7875084Sjohnlev 7885084Sjohnlev /* register with the MAC module */ 7895084Sjohnlev if ((mac = mac_alloc(MAC_VERSION)) == NULL) 7905084Sjohnlev goto bail; 7915084Sjohnlev 7925084Sjohnlev mac->m_type_ident = MAC_PLUGIN_IDENT_ETHER; 7935084Sjohnlev mac->m_driver = vnic; 7945084Sjohnlev mac->m_dip = vnic_get_dip(); 795*5895Syz147064 mac->m_instance = (uint_t)-1; 7965084Sjohnlev mac->m_src_addr = vnic->vn_addr; 7975084Sjohnlev mac->m_callbacks = &vnic_m_callbacks; 7985084Sjohnlev 7995084Sjohnlev lower_mac_info = mac_info(vnic_mac->va_mh); 8005084Sjohnlev mac->m_min_sdu = lower_mac_info->mi_sdu_min; 8015084Sjohnlev mac->m_max_sdu = lower_mac_info->mi_sdu_max; 8025084Sjohnlev 803*5895Syz147064 /* 804*5895Syz147064 * As the current margin size of the underlying mac is used to 805*5895Syz147064 * determine the margin size of the VNIC itself, request the 806*5895Syz147064 * underlying mac not to change to a smaller margin size. 807*5895Syz147064 */ 808*5895Syz147064 err = mac_margin_add(vnic_mac->va_mh, &(vnic->vn_margin), B_TRUE); 809*5895Syz147064 if (err != 0) 810*5895Syz147064 goto bail; 811*5895Syz147064 mac->m_margin = vnic->vn_margin; 8125084Sjohnlev err = mac_register(mac, &vnic->vn_mh); 8135084Sjohnlev mac_free(mac); 814*5895Syz147064 if (err != 0) { 815*5895Syz147064 VERIFY(mac_margin_remove(vnic_mac->va_mh, 816*5895Syz147064 vnic->vn_margin) == 0); 8175084Sjohnlev goto bail; 818*5895Syz147064 } 819*5895Syz147064 820*5895Syz147064 if ((err = dls_devnet_create(vnic->vn_mh, vnic->vn_id)) != 0) { 821*5895Syz147064 VERIFY(mac_margin_remove(vnic_mac->va_mh, 822*5895Syz147064 vnic->vn_margin) == 0); 823*5895Syz147064 (void) mac_unregister(vnic->vn_mh); 824*5895Syz147064 goto bail; 825*5895Syz147064 } 8265084Sjohnlev 8275084Sjohnlev /* add new VNIC to hash table */ 8285084Sjohnlev err = mod_hash_insert(vnic_hash, VNIC_HASH_KEY(vnic_id), 8295084Sjohnlev (mod_hash_val_t)vnic); 8305084Sjohnlev ASSERT(err == 0); 8315084Sjohnlev vnic_count++; 8325084Sjohnlev 8335084Sjohnlev rw_exit(&vnic_lock); 8345084Sjohnlev 8355084Sjohnlev /* Create a flow, initialized with the MAC address of the VNIC */ 8365084Sjohnlev if ((vnic->vn_flow_ent = vnic_classifier_flow_create(mac_len, mac_addr, 8375084Sjohnlev NULL, B_FALSE, KM_SLEEP)) == NULL) { 8385084Sjohnlev (void) vnic_dev_delete(vnic_id); 8395084Sjohnlev vnic = NULL; 8405084Sjohnlev err = ENOMEM; 8415084Sjohnlev goto bail_unlocked; 8425084Sjohnlev } 8435084Sjohnlev 8445084Sjohnlev vnic_classifier_flow_add(vnic_mac, vnic->vn_flow_ent, vnic_rx_initial, 8455084Sjohnlev vnic, vnic); 8465084Sjohnlev 8475084Sjohnlev /* setup VNIC to receive broadcast packets */ 8485084Sjohnlev err = vnic_bcast_add(vnic, vnic_brdcst_mac, MAC_ADDRTYPE_BROADCAST); 8495084Sjohnlev if (err != 0) { 8505084Sjohnlev (void) vnic_dev_delete(vnic_id); 8515084Sjohnlev vnic = NULL; 8525084Sjohnlev goto bail_unlocked; 8535084Sjohnlev } 8545084Sjohnlev vnic->vn_bcast_grp = B_TRUE; 8555084Sjohnlev 8565084Sjohnlev mutex_enter(&vnic_mac_lock); 8575084Sjohnlev if (!vnic_mac->va_mac_set) { 8585084Sjohnlev /* 8595084Sjohnlev * We want to MAC layer to call the VNIC tx outbound 8605084Sjohnlev * routine, so that local broadcast packets sent by 8615084Sjohnlev * the active interface sharing the underlying NIC (if 8625084Sjohnlev * any), can be broadcast to every VNIC. 8635084Sjohnlev */ 8645084Sjohnlev tx_info.mt_fn = vnic_active_tx; 8655084Sjohnlev tx_info.mt_arg = vnic_mac; 8665084Sjohnlev if (!mac_vnic_set(vnic_mac->va_mh, &tx_info, 8675084Sjohnlev vnic_m_capab_get, vnic)) { 8685084Sjohnlev mutex_exit(&vnic_mac_lock); 8695084Sjohnlev (void) vnic_dev_delete(vnic_id); 8705084Sjohnlev vnic = NULL; 8715084Sjohnlev err = EBUSY; 8725084Sjohnlev goto bail_unlocked; 8735084Sjohnlev } 8745084Sjohnlev vnic_mac->va_mac_set = B_TRUE; 8755084Sjohnlev } 8765084Sjohnlev mutex_exit(&vnic_mac_lock); 8775084Sjohnlev 8785084Sjohnlev /* allow passing packets to NIC's active MAC client */ 8795084Sjohnlev if (!vnic_init_active_rx(vnic_mac)) { 8805084Sjohnlev (void) vnic_dev_delete(vnic_id); 8815084Sjohnlev vnic = NULL; 8825084Sjohnlev err = ENOMEM; 8835084Sjohnlev goto bail_unlocked; 8845084Sjohnlev } 8855084Sjohnlev 8865084Sjohnlev return (0); 8875084Sjohnlev 8885084Sjohnlev bail: 8895084Sjohnlev (void) vnic_remove_unicstaddr(vnic); 8905084Sjohnlev vnic_mac_close(vnic_mac); 8915084Sjohnlev rw_exit(&vnic_lock); 8925084Sjohnlev 8935084Sjohnlev bail_unlocked: 8945084Sjohnlev if (vnic != NULL) { 8955084Sjohnlev kmem_cache_free(vnic_cache, vnic); 8965084Sjohnlev } 8975084Sjohnlev 8985084Sjohnlev return (err); 8995084Sjohnlev } 9005084Sjohnlev 9015084Sjohnlev /* 9025084Sjohnlev * Modify the properties of an existing VNIC. 9035084Sjohnlev */ 9045084Sjohnlev /* ARGSUSED */ 9055084Sjohnlev int 906*5895Syz147064 vnic_dev_modify(datalink_id_t vnic_id, uint_t modify_mask, 9075084Sjohnlev vnic_mac_addr_type_t mac_addr_type, uint_t mac_len, uchar_t *mac_addr) 9085084Sjohnlev { 9095084Sjohnlev vnic_t *vnic = NULL; 9105084Sjohnlev int rv = 0; 9115084Sjohnlev boolean_t notify_mac_addr = B_FALSE; 9125084Sjohnlev 9135084Sjohnlev rw_enter(&vnic_lock, RW_WRITER); 9145084Sjohnlev 9155084Sjohnlev if (mod_hash_find(vnic_hash, VNIC_HASH_KEY(vnic_id), 9165084Sjohnlev (mod_hash_val_t *)&vnic) != 0) { 9175084Sjohnlev rw_exit(&vnic_lock); 9185084Sjohnlev return (ENOENT); 9195084Sjohnlev } 9205084Sjohnlev 9215084Sjohnlev if (modify_mask & VNIC_IOC_MODIFY_ADDR) { 9225084Sjohnlev rv = vnic_modify_mac_addr(vnic, mac_len, mac_addr); 9235084Sjohnlev if (rv == 0) 9245084Sjohnlev notify_mac_addr = B_TRUE; 9255084Sjohnlev } 9265084Sjohnlev 9275084Sjohnlev rw_exit(&vnic_lock); 9285084Sjohnlev 9295084Sjohnlev if (notify_mac_addr) 9305084Sjohnlev mac_unicst_update(vnic->vn_mh, mac_addr); 9315084Sjohnlev 9325084Sjohnlev return (rv); 9335084Sjohnlev } 9345084Sjohnlev 9355084Sjohnlev int 936*5895Syz147064 vnic_dev_delete(datalink_id_t vnic_id) 9375084Sjohnlev { 9385084Sjohnlev vnic_t *vnic = NULL; 9395084Sjohnlev mod_hash_val_t val; 9405084Sjohnlev vnic_flow_t *flent; 941*5895Syz147064 datalink_id_t tmpid; 9425084Sjohnlev int rc; 9435702Sdme vnic_mac_t *vnic_mac; 9445084Sjohnlev 9455084Sjohnlev rw_enter(&vnic_lock, RW_WRITER); 9465084Sjohnlev 9475084Sjohnlev if (mod_hash_find(vnic_hash, VNIC_HASH_KEY(vnic_id), 9485084Sjohnlev (mod_hash_val_t *)&vnic) != 0) { 9495084Sjohnlev rw_exit(&vnic_lock); 9505084Sjohnlev return (ENOENT); 9515084Sjohnlev } 9525084Sjohnlev 953*5895Syz147064 if ((rc = dls_devnet_destroy(vnic->vn_mh, &tmpid)) != 0) { 954*5895Syz147064 rw_exit(&vnic_lock); 955*5895Syz147064 return (rc); 956*5895Syz147064 } 957*5895Syz147064 958*5895Syz147064 ASSERT(vnic_id == tmpid); 959*5895Syz147064 9605084Sjohnlev /* 9615084Sjohnlev * We cannot unregister the MAC yet. Unregistering would 9625084Sjohnlev * free up mac_impl_t which should not happen at this time. 9635084Sjohnlev * Packets could be entering vnic_rx() through the 9645084Sjohnlev * flow entry and so mac_impl_t cannot be NULL. So disable 9655084Sjohnlev * mac_impl_t by calling mac_disable(). This will prevent any 9665084Sjohnlev * new claims on mac_impl_t. 9675084Sjohnlev */ 9685084Sjohnlev if (mac_disable(vnic->vn_mh) != 0) { 969*5895Syz147064 (void) dls_devnet_create(vnic->vn_mh, vnic_id); 9705084Sjohnlev rw_exit(&vnic_lock); 9715084Sjohnlev return (EBUSY); 9725084Sjohnlev } 9735084Sjohnlev 9745084Sjohnlev (void) mod_hash_remove(vnic_hash, VNIC_HASH_KEY(vnic_id), &val); 9755084Sjohnlev ASSERT(vnic == (vnic_t *)val); 9765084Sjohnlev 9775084Sjohnlev if (vnic->vn_bcast_grp) 9785084Sjohnlev (void) vnic_bcast_delete(vnic, vnic_brdcst_mac); 9795084Sjohnlev 9805084Sjohnlev flent = vnic->vn_flow_ent; 9815084Sjohnlev if (flent != NULL) { 9825084Sjohnlev /* 9835084Sjohnlev * vnic_classifier_flow_destroy() ensures that the 9845084Sjohnlev * flow is no longer used. 9855084Sjohnlev */ 9865084Sjohnlev vnic_classifier_flow_remove(vnic->vn_vnic_mac, flent); 9875084Sjohnlev vnic_classifier_flow_destroy(flent); 9885084Sjohnlev } 9895084Sjohnlev 990*5895Syz147064 rc = mac_margin_remove(vnic->vn_vnic_mac->va_mh, vnic->vn_margin); 991*5895Syz147064 ASSERT(rc == 0); 9925084Sjohnlev rc = mac_unregister(vnic->vn_mh); 9935084Sjohnlev ASSERT(rc == 0); 9945084Sjohnlev (void) vnic_remove_unicstaddr(vnic); 9955702Sdme vnic_mac = vnic->vn_vnic_mac; 9965084Sjohnlev kmem_cache_free(vnic_cache, vnic); 9975084Sjohnlev vnic_count--; 9985084Sjohnlev rw_exit(&vnic_lock); 9995702Sdme vnic_mac_close(vnic_mac); 10005084Sjohnlev return (0); 10015084Sjohnlev } 10025084Sjohnlev 10035084Sjohnlev /* 10045084Sjohnlev * For the specified packet chain, return a sub-chain to be sent 10055084Sjohnlev * and the transmit function to be used to send the packet. Also 10065084Sjohnlev * return a pointer to the sub-chain of packets that should 10075084Sjohnlev * be re-classified. If the function returns NULL, the packet 10085084Sjohnlev * should be sent using the underlying NIC. 10095084Sjohnlev */ 10105084Sjohnlev static vnic_flow_t * 10115084Sjohnlev vnic_classify(vnic_mac_t *vnic_mac, mblk_t *mp, mblk_t **mp_chain_rest) 10125084Sjohnlev { 10135084Sjohnlev vnic_flow_t *flow_ent; 10145084Sjohnlev 10155084Sjohnlev /* one packet at a time */ 10165084Sjohnlev *mp_chain_rest = mp->b_next; 10175084Sjohnlev mp->b_next = NULL; 10185084Sjohnlev 10195084Sjohnlev /* do classification on the packet */ 10205084Sjohnlev flow_ent = vnic_classifier_get_flow(vnic_mac, mp); 10215084Sjohnlev 10225084Sjohnlev return (flow_ent); 10235084Sjohnlev } 10245084Sjohnlev 10255084Sjohnlev /* 10265084Sjohnlev * Send a packet chain to a local VNIC or an active MAC client. 10275084Sjohnlev */ 10285084Sjohnlev static void 10295084Sjohnlev vnic_local_tx(vnic_mac_t *vnic_mac, vnic_flow_t *flow_ent, mblk_t *mp_chain) 10305084Sjohnlev { 10315084Sjohnlev mblk_t *mp1; 10325084Sjohnlev const vnic_flow_fn_info_t *fn_info; 10335084Sjohnlev vnic_t *vnic; 10345084Sjohnlev 10355084Sjohnlev if (!vnic_classifier_is_active(flow_ent) && 10365084Sjohnlev mac_promisc_get(vnic_mac->va_mh, MAC_PROMISC)) { 10375084Sjohnlev /* 10385084Sjohnlev * If the MAC is in promiscous mode, 10395084Sjohnlev * send a copy of the active client. 10405084Sjohnlev */ 10415084Sjohnlev if ((mp1 = vnic_copymsgchain_cksum(mp_chain)) == NULL) 10425084Sjohnlev goto sendit; 10435084Sjohnlev if ((mp1 = vnic_fix_cksum(mp1)) == NULL) 10445084Sjohnlev goto sendit; 10455084Sjohnlev mac_active_rx(vnic_mac->va_mh, NULL, mp1); 10465084Sjohnlev } 10475084Sjohnlev sendit: 10485084Sjohnlev fn_info = vnic_classifier_get_fn_info(flow_ent); 10495084Sjohnlev /* 10505084Sjohnlev * If the vnic to which we would deliver this packet is in 10515084Sjohnlev * promiscuous mode then it already received the packet via 10525084Sjohnlev * vnic_promisc_rx(). 10535084Sjohnlev * 10545084Sjohnlev * XXX assumes that ff_arg2 is a vnic_t pointer if it is 10555084Sjohnlev * non-NULL (currently always true). 10565084Sjohnlev */ 10575084Sjohnlev vnic = (vnic_t *)fn_info->ff_arg2; 10585084Sjohnlev if ((vnic != NULL) && vnic->vn_promisc) 10595084Sjohnlev freemsg(mp_chain); 10605084Sjohnlev else if ((mp1 = vnic_fix_cksum(mp_chain)) != NULL) 10615084Sjohnlev (fn_info->ff_fn)(fn_info->ff_arg1, fn_info->ff_arg2, mp1); 10625084Sjohnlev } 10635084Sjohnlev 10645084Sjohnlev /* 10655084Sjohnlev * This function is invoked when a MAC client needs to send a packet 10665084Sjohnlev * to a NIC which is shared by VNICs. It is passed to the MAC layer 10675084Sjohnlev * by a call to mac_vnic_set() when the NIC is opened, and is returned 10685084Sjohnlev * to MAC clients by mac_tx_get() when VNICs are present. 10695084Sjohnlev */ 10705084Sjohnlev mblk_t * 10715084Sjohnlev vnic_active_tx(void *arg, mblk_t *mp_chain) 10725084Sjohnlev { 10735084Sjohnlev vnic_mac_t *vnic_mac = arg; 10745084Sjohnlev mblk_t *mp, *extra_mp = NULL; 10755084Sjohnlev vnic_flow_t *flow_ent; 10765084Sjohnlev void *flow_cookie; 10775084Sjohnlev const mac_txinfo_t *mtp = vnic_mac->va_txinfo; 10785084Sjohnlev 10795084Sjohnlev for (mp = mp_chain; mp != NULL; mp = extra_mp) { 10805084Sjohnlev mblk_t *next; 10815084Sjohnlev 10825084Sjohnlev next = mp->b_next; 10835084Sjohnlev mp->b_next = NULL; 10845084Sjohnlev 10855084Sjohnlev vnic_promisc_rx(vnic_mac, (vnic_t *)-1, mp); 10865084Sjohnlev 10875084Sjohnlev flow_ent = vnic_classify(vnic_mac, mp, &extra_mp); 10885084Sjohnlev ASSERT(extra_mp == NULL); 10895084Sjohnlev extra_mp = next; 10905084Sjohnlev 10915084Sjohnlev if (flow_ent != NULL) { 10925084Sjohnlev flow_cookie = vnic_classifier_get_client_cookie( 10935084Sjohnlev flow_ent); 10945084Sjohnlev if (flow_cookie != NULL) { 10955084Sjohnlev /* 10965084Sjohnlev * Send a copy to every VNIC defined on the 10975084Sjohnlev * interface, as well as the underlying MAC. 10985084Sjohnlev */ 10995084Sjohnlev vnic_bcast_send(flow_cookie, (vnic_t *)-1, mp); 11005084Sjohnlev } else { 11015084Sjohnlev /* 11025084Sjohnlev * loopback the packet to a local VNIC or 11035084Sjohnlev * an active MAC client. 11045084Sjohnlev */ 11055084Sjohnlev vnic_local_tx(vnic_mac, flow_ent, mp); 11065084Sjohnlev } 11075084Sjohnlev VNIC_FLOW_REFRELE(flow_ent); 11085084Sjohnlev mp_chain = NULL; 11095084Sjohnlev } else { 11105084Sjohnlev /* 11115084Sjohnlev * Non-VNIC destination, send via the underlying 11125084Sjohnlev * NIC. In order to avoid a recursive call 11135084Sjohnlev * to this function, we ensured that mtp points 11145084Sjohnlev * to the unerlying NIC transmit function 11155084Sjohnlev * by inilizating through mac_vnic_tx_get(). 11165084Sjohnlev */ 11175084Sjohnlev mp_chain = mtp->mt_fn(mtp->mt_arg, mp); 11185084Sjohnlev if (mp_chain != NULL) 11195084Sjohnlev break; 11205084Sjohnlev } 11215084Sjohnlev } 11225084Sjohnlev 11235084Sjohnlev if ((mp_chain != NULL) && (extra_mp != NULL)) { 11245084Sjohnlev ASSERT(mp_chain->b_next == NULL); 11255084Sjohnlev mp_chain->b_next = extra_mp; 11265084Sjohnlev } 11275084Sjohnlev return (mp_chain); 11285084Sjohnlev } 11295084Sjohnlev 11305084Sjohnlev /* 11315084Sjohnlev * VNIC transmit function. 11325084Sjohnlev */ 11335084Sjohnlev mblk_t * 11345084Sjohnlev vnic_m_tx(void *arg, mblk_t *mp_chain) 11355084Sjohnlev { 11365084Sjohnlev vnic_t *vnic = arg; 11375084Sjohnlev vnic_mac_t *vnic_mac = vnic->vn_vnic_mac; 11385084Sjohnlev mblk_t *mp, *extra_mp = NULL; 11395084Sjohnlev vnic_flow_t *flow_ent; 11405084Sjohnlev void *flow_cookie; 11415084Sjohnlev 11425084Sjohnlev /* 11435084Sjohnlev * Update stats. 11445084Sjohnlev */ 11455084Sjohnlev for (mp = mp_chain; mp != NULL; mp = mp->b_next) { 11465084Sjohnlev vnic->vn_stat_opackets++; 11475084Sjohnlev vnic->vn_stat_obytes += msgdsize(mp); 11485084Sjohnlev } 11495084Sjohnlev 11505084Sjohnlev for (mp = mp_chain; mp != NULL; mp = extra_mp) { 11515084Sjohnlev mblk_t *next; 11525084Sjohnlev 11535084Sjohnlev next = mp->b_next; 11545084Sjohnlev mp->b_next = NULL; 11555084Sjohnlev 11565084Sjohnlev vnic_promisc_rx(vnic->vn_vnic_mac, vnic, mp); 11575084Sjohnlev 11585084Sjohnlev flow_ent = vnic_classify(vnic->vn_vnic_mac, mp, &extra_mp); 11595084Sjohnlev ASSERT(extra_mp == NULL); 11605084Sjohnlev extra_mp = next; 11615084Sjohnlev 11625084Sjohnlev if (flow_ent != NULL) { 11635084Sjohnlev flow_cookie = vnic_classifier_get_client_cookie( 11645084Sjohnlev flow_ent); 11655084Sjohnlev if (flow_cookie != NULL) { 11665084Sjohnlev /* 11675084Sjohnlev * The vnic_bcast_send function expects 11685084Sjohnlev * to receive the sender VNIC as value 11695084Sjohnlev * for arg2. 11705084Sjohnlev */ 11715084Sjohnlev vnic_bcast_send(flow_cookie, vnic, mp); 11725084Sjohnlev } else { 11735084Sjohnlev /* 11745084Sjohnlev * loopback the packet to a local VNIC or 11755084Sjohnlev * an active MAC client. 11765084Sjohnlev */ 11775084Sjohnlev vnic_local_tx(vnic_mac, flow_ent, mp); 11785084Sjohnlev } 11795084Sjohnlev VNIC_FLOW_REFRELE(flow_ent); 11805084Sjohnlev mp_chain = NULL; 11815084Sjohnlev } else { 11825084Sjohnlev /* 11835084Sjohnlev * Non-local destination, send via the underlying 11845084Sjohnlev * NIC. 11855084Sjohnlev */ 11865084Sjohnlev const mac_txinfo_t *mtp = vnic->vn_txinfo; 11875084Sjohnlev mp_chain = mtp->mt_fn(mtp->mt_arg, mp); 11885084Sjohnlev if (mp_chain != NULL) 11895084Sjohnlev break; 11905084Sjohnlev } 11915084Sjohnlev } 11925084Sjohnlev 11935084Sjohnlev /* update stats to account for unsent packets */ 11945084Sjohnlev for (mp = mp_chain; mp != NULL; mp = mp->b_next) { 11955084Sjohnlev vnic->vn_stat_opackets--; 11965084Sjohnlev vnic->vn_stat_obytes -= msgdsize(mp); 11975084Sjohnlev vnic->vn_stat_oerrors++; 11985084Sjohnlev /* 11995084Sjohnlev * link back in the last portion not counted due to bandwidth 12005084Sjohnlev * control. 12015084Sjohnlev */ 12025084Sjohnlev if (mp->b_next == NULL) { 12035084Sjohnlev mp->b_next = extra_mp; 12045084Sjohnlev break; 12055084Sjohnlev } 12065084Sjohnlev } 12075084Sjohnlev 12085084Sjohnlev return (mp_chain); 12095084Sjohnlev } 12105084Sjohnlev 12115084Sjohnlev /* ARGSUSED */ 12125084Sjohnlev static void 12135084Sjohnlev vnic_m_resources(void *arg) 12145084Sjohnlev { 12155084Sjohnlev /* no resources to advertise */ 12165084Sjohnlev } 12175084Sjohnlev 12185084Sjohnlev static int 12195084Sjohnlev vnic_m_stat(void *arg, uint_t stat, uint64_t *val) 12205084Sjohnlev { 12215084Sjohnlev vnic_t *vnic = arg; 12225084Sjohnlev int rval = 0; 12235084Sjohnlev 12245084Sjohnlev rw_enter(&vnic_lock, RW_READER); 12255084Sjohnlev 12265084Sjohnlev switch (stat) { 12275084Sjohnlev case ETHER_STAT_LINK_DUPLEX: 12285084Sjohnlev *val = mac_stat_get(vnic->vn_vnic_mac->va_mh, 12295084Sjohnlev ETHER_STAT_LINK_DUPLEX); 12305084Sjohnlev break; 12315084Sjohnlev case MAC_STAT_IFSPEED: 12325084Sjohnlev *val = mac_stat_get(vnic->vn_vnic_mac->va_mh, 12335084Sjohnlev MAC_STAT_IFSPEED); 12345084Sjohnlev break; 12355084Sjohnlev case MAC_STAT_MULTIRCV: 12365084Sjohnlev *val = vnic->vn_stat_multircv; 12375084Sjohnlev break; 12385084Sjohnlev case MAC_STAT_BRDCSTRCV: 12395084Sjohnlev *val = vnic->vn_stat_brdcstrcv; 12405084Sjohnlev break; 12415084Sjohnlev case MAC_STAT_MULTIXMT: 12425084Sjohnlev *val = vnic->vn_stat_multixmt; 12435084Sjohnlev break; 12445084Sjohnlev case MAC_STAT_BRDCSTXMT: 12455084Sjohnlev *val = vnic->vn_stat_brdcstxmt; 12465084Sjohnlev break; 12475084Sjohnlev case MAC_STAT_IERRORS: 12485084Sjohnlev *val = vnic->vn_stat_ierrors; 12495084Sjohnlev break; 12505084Sjohnlev case MAC_STAT_OERRORS: 12515084Sjohnlev *val = vnic->vn_stat_oerrors; 12525084Sjohnlev break; 12535084Sjohnlev case MAC_STAT_RBYTES: 12545084Sjohnlev *val = vnic->vn_stat_rbytes; 12555084Sjohnlev break; 12565084Sjohnlev case MAC_STAT_IPACKETS: 12575084Sjohnlev *val = vnic->vn_stat_ipackets; 12585084Sjohnlev break; 12595084Sjohnlev case MAC_STAT_OBYTES: 12605084Sjohnlev *val = vnic->vn_stat_obytes; 12615084Sjohnlev break; 12625084Sjohnlev case MAC_STAT_OPACKETS: 12635084Sjohnlev *val = vnic->vn_stat_opackets; 12645084Sjohnlev break; 12655084Sjohnlev default: 12665084Sjohnlev rval = ENOTSUP; 12675084Sjohnlev } 12685084Sjohnlev 12695084Sjohnlev rw_exit(&vnic_lock); 12705084Sjohnlev return (rval); 12715084Sjohnlev } 12725084Sjohnlev 12735084Sjohnlev /* 12745084Sjohnlev * Return information about the specified capability. 12755084Sjohnlev */ 12765084Sjohnlev /* ARGSUSED */ 12775084Sjohnlev static boolean_t 12785084Sjohnlev vnic_m_capab_get(void *arg, mac_capab_t cap, void *cap_data) 12795084Sjohnlev { 12805084Sjohnlev vnic_t *vnic = arg; 12815084Sjohnlev 12825084Sjohnlev switch (cap) { 12835084Sjohnlev case MAC_CAPAB_POLL: 12845084Sjohnlev return (B_TRUE); 12855084Sjohnlev case MAC_CAPAB_HCKSUM: { 12865084Sjohnlev uint32_t *hcksum_txflags = cap_data; 12875084Sjohnlev 12885084Sjohnlev *hcksum_txflags = vnic->vn_hcksum_txflags & 12895084Sjohnlev (HCKSUM_INET_FULL_V4 | HCKSUM_IPHDRCKSUM | 12905084Sjohnlev HCKSUM_INET_PARTIAL); 12915084Sjohnlev break; 12925084Sjohnlev } 12935084Sjohnlev default: 12945084Sjohnlev return (B_FALSE); 12955084Sjohnlev } 12965084Sjohnlev return (B_TRUE); 12975084Sjohnlev } 12985084Sjohnlev 12995084Sjohnlev static int 13005084Sjohnlev vnic_m_start(void *arg) 13015084Sjohnlev { 13025084Sjohnlev vnic_t *vnic = arg; 13035084Sjohnlev mac_handle_t lower_mh = vnic->vn_vnic_mac->va_mh; 13045084Sjohnlev int rc; 13055084Sjohnlev 13065084Sjohnlev rc = mac_start(lower_mh); 13075084Sjohnlev if (rc != 0) 13085084Sjohnlev return (rc); 13095084Sjohnlev 13105084Sjohnlev vnic_classifier_flow_update_fn(vnic->vn_flow_ent, vnic_rx, vnic, vnic); 13115084Sjohnlev return (0); 13125084Sjohnlev } 13135084Sjohnlev 13145084Sjohnlev static void 13155084Sjohnlev vnic_m_stop(void *arg) 13165084Sjohnlev { 13175084Sjohnlev vnic_t *vnic = arg; 13185084Sjohnlev mac_handle_t lower_mh = vnic->vn_vnic_mac->va_mh; 13195084Sjohnlev 13205084Sjohnlev vnic_classifier_flow_update_fn(vnic->vn_flow_ent, vnic_rx_initial, 13215084Sjohnlev vnic, vnic); 13225084Sjohnlev mac_stop(lower_mh); 13235084Sjohnlev } 13245084Sjohnlev 13255084Sjohnlev /* ARGSUSED */ 13265084Sjohnlev static int 13275084Sjohnlev vnic_m_promisc(void *arg, boolean_t on) 13285084Sjohnlev { 13295084Sjohnlev vnic_t *vnic = arg; 13305084Sjohnlev 13315084Sjohnlev return (vnic_promisc_set(vnic, on)); 13325084Sjohnlev } 13335084Sjohnlev 13345084Sjohnlev static int 13355084Sjohnlev vnic_m_multicst(void *arg, boolean_t add, const uint8_t *addrp) 13365084Sjohnlev { 13375084Sjohnlev vnic_t *vnic = arg; 13385084Sjohnlev int rc = 0; 13395084Sjohnlev 13405084Sjohnlev if (add) 13415084Sjohnlev rc = vnic_bcast_add(vnic, addrp, MAC_ADDRTYPE_MULTICAST); 13425084Sjohnlev else 13435084Sjohnlev vnic_bcast_delete(vnic, addrp); 13445084Sjohnlev 13455084Sjohnlev return (rc); 13465084Sjohnlev } 13475084Sjohnlev 13485084Sjohnlev static int 13495084Sjohnlev vnic_m_unicst(void *arg, const uint8_t *mac_addr) 13505084Sjohnlev { 13515084Sjohnlev vnic_t *vnic = arg; 13525084Sjohnlev vnic_mac_t *vnic_mac = vnic->vn_vnic_mac; 13535084Sjohnlev int rv; 13545084Sjohnlev 13555084Sjohnlev rw_enter(&vnic_lock, RW_WRITER); 13565084Sjohnlev rv = vnic_modify_mac_addr(vnic, vnic_mac->va_addr_len, 13575084Sjohnlev (uchar_t *)mac_addr); 13585084Sjohnlev rw_exit(&vnic_lock); 13595084Sjohnlev 13605084Sjohnlev if (rv == 0) 13615084Sjohnlev mac_unicst_update(vnic->vn_mh, mac_addr); 13625084Sjohnlev return (0); 13635084Sjohnlev } 13645084Sjohnlev 13655084Sjohnlev int 1366*5895Syz147064 vnic_info(uint_t *nvnics, datalink_id_t vnic_id, datalink_id_t linkid, 1367*5895Syz147064 void *fn_arg, vnic_info_new_vnic_fn_t new_vnic_fn) 13685084Sjohnlev { 13695084Sjohnlev vnic_info_state_t state; 13705084Sjohnlev int rc = 0; 13715084Sjohnlev 13725084Sjohnlev rw_enter(&vnic_lock, RW_READER); 13735084Sjohnlev 13745084Sjohnlev *nvnics = vnic_count; 13755084Sjohnlev 13765084Sjohnlev bzero(&state, sizeof (state)); 13775084Sjohnlev state.vs_vnic_id = vnic_id; 1378*5895Syz147064 state.vs_linkid = linkid; 13795084Sjohnlev state.vs_new_vnic_fn = new_vnic_fn; 13805084Sjohnlev state.vs_fn_arg = fn_arg; 13815084Sjohnlev 13825084Sjohnlev mod_hash_walk(vnic_hash, vnic_info_walker, &state); 13835084Sjohnlev 1384*5895Syz147064 if ((rc = state.vs_rc) == 0 && vnic_id != DATALINK_ALL_LINKID && 13855733Syz147064 !state.vs_vnic_found) 13865084Sjohnlev rc = ENOENT; 13875084Sjohnlev 13885084Sjohnlev rw_exit(&vnic_lock); 13895084Sjohnlev return (rc); 13905084Sjohnlev } 13915084Sjohnlev 13925084Sjohnlev /* 13935084Sjohnlev * Walker invoked when building a list of vnics that must be passed 13945084Sjohnlev * up to user space. 13955084Sjohnlev */ 13965084Sjohnlev /*ARGSUSED*/ 13975084Sjohnlev static uint_t 13985084Sjohnlev vnic_info_walker(mod_hash_key_t key, mod_hash_val_t *val, void *arg) 13995084Sjohnlev { 14005084Sjohnlev vnic_t *vnic; 14015084Sjohnlev vnic_info_state_t *state = arg; 14025084Sjohnlev 14035084Sjohnlev if (state->vs_rc != 0) 14045084Sjohnlev return (MH_WALK_TERMINATE); /* terminate walk */ 14055084Sjohnlev 14065084Sjohnlev vnic = (vnic_t *)val; 14075084Sjohnlev 1408*5895Syz147064 if (state->vs_vnic_id != DATALINK_ALL_LINKID && 1409*5895Syz147064 vnic->vn_id != state->vs_vnic_id) { 14105084Sjohnlev goto bail; 1411*5895Syz147064 } 14125084Sjohnlev 14135084Sjohnlev state->vs_vnic_found = B_TRUE; 14145084Sjohnlev 14155084Sjohnlev state->vs_rc = state->vs_new_vnic_fn(state->vs_fn_arg, 14165084Sjohnlev vnic->vn_id, vnic->vn_addr_type, vnic->vn_vnic_mac->va_addr_len, 1417*5895Syz147064 vnic->vn_addr, vnic->vn_vnic_mac->va_linkid); 14185084Sjohnlev bail: 14195084Sjohnlev return ((state->vs_rc == 0) ? MH_WALK_CONTINUE : MH_WALK_TERMINATE); 14205084Sjohnlev } 14215084Sjohnlev 14225084Sjohnlev /* 14235084Sjohnlev * vnic_notify_cb() and vnic_notify_walker() below are used to 14245084Sjohnlev * process events received from an underlying NIC and, if needed, 14255084Sjohnlev * forward these events to the VNICs defined on top of that NIC. 14265084Sjohnlev */ 14275084Sjohnlev 14285084Sjohnlev typedef struct vnic_notify_state { 14295084Sjohnlev mac_notify_type_t vo_type; 14305084Sjohnlev vnic_mac_t *vo_vnic_mac; 14315084Sjohnlev } vnic_notify_state_t; 14325084Sjohnlev 14335084Sjohnlev /* ARGSUSED */ 14345084Sjohnlev static uint_t 14355084Sjohnlev vnic_notify_walker(mod_hash_key_t key, mod_hash_val_t *val, void *arg) 14365084Sjohnlev { 14375084Sjohnlev vnic_t *vnic = (vnic_t *)val; 14385084Sjohnlev vnic_notify_state_t *state = arg; 14395084Sjohnlev 14405084Sjohnlev /* ignore VNICs that don't use the specified underlying MAC */ 14415084Sjohnlev if (vnic->vn_vnic_mac != state->vo_vnic_mac) 14425084Sjohnlev return (MH_WALK_CONTINUE); 14435084Sjohnlev 14445084Sjohnlev switch (state->vo_type) { 14455084Sjohnlev case MAC_NOTE_TX: 14465084Sjohnlev mac_tx_update(vnic->vn_mh); 14475084Sjohnlev break; 14485084Sjohnlev case MAC_NOTE_LINK: 14495084Sjohnlev /* 14505084Sjohnlev * The VNIC link state must be up regardless of 14515084Sjohnlev * the link state of the underlying NIC to maintain 14525084Sjohnlev * connectivity between VNICs on the same host. 14535084Sjohnlev */ 14545084Sjohnlev mac_link_update(vnic->vn_mh, LINK_STATE_UP); 14555084Sjohnlev break; 14565084Sjohnlev case MAC_NOTE_UNICST: 14575084Sjohnlev vnic_update_active_rx(vnic->vn_vnic_mac); 14585084Sjohnlev break; 14595084Sjohnlev case MAC_NOTE_VNIC: 14605084Sjohnlev /* only for clients which share a NIC with a VNIC */ 14615084Sjohnlev break; 14625084Sjohnlev case MAC_NOTE_PROMISC: 14635084Sjohnlev mutex_enter(&vnic_mac_lock); 14645084Sjohnlev vnic->vn_vnic_mac->va_txinfo = mac_vnic_tx_get( 14655084Sjohnlev vnic->vn_vnic_mac->va_mh); 14665084Sjohnlev mutex_exit(&vnic_mac_lock); 14675084Sjohnlev break; 14685084Sjohnlev } 14695084Sjohnlev 14705084Sjohnlev return (MH_WALK_CONTINUE); 14715084Sjohnlev } 14725084Sjohnlev 14735084Sjohnlev static void 14745084Sjohnlev vnic_notify_cb(void *arg, mac_notify_type_t type) 14755084Sjohnlev { 14765084Sjohnlev vnic_mac_t *vnic = arg; 14775084Sjohnlev vnic_notify_state_t state; 14785084Sjohnlev 14795084Sjohnlev state.vo_type = type; 14805084Sjohnlev state.vo_vnic_mac = vnic; 14815084Sjohnlev 14825084Sjohnlev rw_enter(&vnic_lock, RW_READER); 14835084Sjohnlev mod_hash_walk(vnic_hash, vnic_notify_walker, &state); 14845084Sjohnlev rw_exit(&vnic_lock); 14855084Sjohnlev } 14865084Sjohnlev 14875084Sjohnlev static int 14885084Sjohnlev vnic_modify_mac_addr(vnic_t *vnic, uint_t mac_len, uchar_t *mac_addr) 14895084Sjohnlev { 14905084Sjohnlev vnic_mac_t *vnic_mac = vnic->vn_vnic_mac; 14915084Sjohnlev vnic_flow_t *vnic_flow = vnic->vn_flow_ent; 14925084Sjohnlev 14935084Sjohnlev ASSERT(RW_WRITE_HELD(&vnic_lock)); 14945084Sjohnlev 14955084Sjohnlev if (mac_len != vnic_mac->va_addr_len) 14965084Sjohnlev return (EINVAL); 14975084Sjohnlev 14985084Sjohnlev vnic_classifier_flow_update_addr(vnic_flow, mac_addr); 14995084Sjohnlev return (0); 15005084Sjohnlev } 15015084Sjohnlev 15025084Sjohnlev static int 15035084Sjohnlev vnic_promisc_set(vnic_t *vnic, boolean_t on) 15045084Sjohnlev { 15055084Sjohnlev vnic_mac_t *vnic_mac = vnic->vn_vnic_mac; 15065084Sjohnlev int r = -1; 15075084Sjohnlev 15085084Sjohnlev if (vnic->vn_promisc == on) 15095084Sjohnlev return (0); 15105084Sjohnlev 15115084Sjohnlev if (on) { 1512*5895Syz147064 if ((r = mac_promisc_set(vnic_mac->va_mh, B_TRUE, 1513*5895Syz147064 MAC_DEVPROMISC)) != 0) { 15145084Sjohnlev return (r); 1515*5895Syz147064 } 15165084Sjohnlev 15175084Sjohnlev rw_enter(&vnic_mac->va_promisc_lock, RW_WRITER); 15185084Sjohnlev vnic->vn_promisc_next = vnic_mac->va_promisc; 15195084Sjohnlev vnic_mac->va_promisc = vnic; 15205084Sjohnlev vnic_mac->va_promisc_gen++; 15215084Sjohnlev 15225084Sjohnlev vnic->vn_promisc = B_TRUE; 15235084Sjohnlev rw_exit(&vnic_mac->va_promisc_lock); 15245084Sjohnlev 15255084Sjohnlev return (0); 15265084Sjohnlev } else { 15275084Sjohnlev vnic_t *loop, *prev = NULL; 15285084Sjohnlev 15295084Sjohnlev rw_enter(&vnic_mac->va_promisc_lock, RW_WRITER); 15305084Sjohnlev loop = vnic_mac->va_promisc; 15315084Sjohnlev 15325084Sjohnlev while ((loop != NULL) && (loop != vnic)) { 15335084Sjohnlev prev = loop; 15345084Sjohnlev loop = loop->vn_promisc_next; 15355084Sjohnlev } 15365084Sjohnlev 15375084Sjohnlev if ((loop != NULL) && 15385084Sjohnlev ((r = mac_promisc_set(vnic_mac->va_mh, B_FALSE, 15395084Sjohnlev MAC_DEVPROMISC)) == 0)) { 15405084Sjohnlev if (prev != NULL) 15415084Sjohnlev prev->vn_promisc_next = loop->vn_promisc_next; 15425084Sjohnlev else 15435084Sjohnlev vnic_mac->va_promisc = loop->vn_promisc_next; 15445084Sjohnlev vnic_mac->va_promisc_gen++; 15455084Sjohnlev 15465084Sjohnlev vnic->vn_promisc = B_FALSE; 15475084Sjohnlev } 15485084Sjohnlev rw_exit(&vnic_mac->va_promisc_lock); 15495084Sjohnlev 15505084Sjohnlev return (r); 15515084Sjohnlev } 15525084Sjohnlev } 15535084Sjohnlev 15545084Sjohnlev void 15555084Sjohnlev vnic_promisc_rx(vnic_mac_t *vnic_mac, vnic_t *sender, mblk_t *mp) 15565084Sjohnlev { 15575084Sjohnlev vnic_t *loop; 15585084Sjohnlev vnic_flow_t *flow; 15595084Sjohnlev const vnic_flow_fn_info_t *fn_info; 15605084Sjohnlev mac_header_info_t hdr_info; 15615084Sjohnlev boolean_t dst_must_match = B_TRUE; 15625084Sjohnlev 15635084Sjohnlev ASSERT(mp->b_next == NULL); 15645084Sjohnlev 15655084Sjohnlev rw_enter(&vnic_mac->va_promisc_lock, RW_READER); 15665084Sjohnlev if (vnic_mac->va_promisc == NULL) 15675084Sjohnlev goto done; 15685084Sjohnlev 15695084Sjohnlev if (mac_header_info(vnic_mac->va_mh, mp, &hdr_info) != 0) 15705084Sjohnlev goto done; 15715084Sjohnlev 15725084Sjohnlev /* 15735084Sjohnlev * If this is broadcast or multicast then the destination 15745084Sjohnlev * address need not match for us to deliver it. 15755084Sjohnlev */ 15765084Sjohnlev if ((hdr_info.mhi_dsttype == MAC_ADDRTYPE_BROADCAST) || 15775084Sjohnlev (hdr_info.mhi_dsttype == MAC_ADDRTYPE_MULTICAST)) 15785084Sjohnlev dst_must_match = B_FALSE; 15795084Sjohnlev 15805084Sjohnlev for (loop = vnic_mac->va_promisc; 15815084Sjohnlev loop != NULL; 15825084Sjohnlev loop = loop->vn_promisc_next) { 15835084Sjohnlev if (loop == sender) 15845084Sjohnlev continue; 15855084Sjohnlev 15865084Sjohnlev if (dst_must_match && 15875084Sjohnlev (bcmp(hdr_info.mhi_daddr, loop->vn_addr, 15885084Sjohnlev sizeof (loop->vn_addr)) != 0)) 15895084Sjohnlev continue; 15905084Sjohnlev 15915084Sjohnlev flow = loop->vn_flow_ent; 15925084Sjohnlev ASSERT(flow != NULL); 15935084Sjohnlev 15945084Sjohnlev if (!flow->vf_is_active) { 15955159Sjohnlev mblk_t *copy; 15965159Sjohnlev uint64_t gen; 15975159Sjohnlev 15985159Sjohnlev if ((copy = vnic_copymsg_cksum(mp)) == NULL) 15995159Sjohnlev break; 16005159Sjohnlev if ((sender != NULL) && 16015159Sjohnlev ((copy = vnic_fix_cksum(copy)) == NULL)) 16025159Sjohnlev break; 16035159Sjohnlev 16045084Sjohnlev VNIC_FLOW_REFHOLD(flow); 16055084Sjohnlev gen = vnic_mac->va_promisc_gen; 16065084Sjohnlev rw_exit(&vnic_mac->va_promisc_lock); 16075084Sjohnlev 16085159Sjohnlev fn_info = vnic_classifier_get_fn_info(flow); 16095159Sjohnlev (fn_info->ff_fn)(fn_info->ff_arg1, 16105159Sjohnlev fn_info->ff_arg2, copy); 16115084Sjohnlev 16125084Sjohnlev VNIC_FLOW_REFRELE(flow); 16135084Sjohnlev rw_enter(&vnic_mac->va_promisc_lock, RW_READER); 16145084Sjohnlev if (vnic_mac->va_promisc_gen != gen) 16155084Sjohnlev break; 16165084Sjohnlev } 16175084Sjohnlev } 16185084Sjohnlev done: 16195084Sjohnlev rw_exit(&vnic_mac->va_promisc_lock); 16205084Sjohnlev } 1621