15373Sraghuram /* 25373Sraghuram * CDDL HEADER START 35373Sraghuram * 45373Sraghuram * The contents of this file are subject to the terms of the 55373Sraghuram * Common Development and Distribution License (the "License"). 65373Sraghuram * You may not use this file except in compliance with the License. 75373Sraghuram * 85373Sraghuram * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 95373Sraghuram * or http://www.opensolaris.org/os/licensing. 105373Sraghuram * See the License for the specific language governing permissions 115373Sraghuram * and limitations under the License. 125373Sraghuram * 135373Sraghuram * When distributing Covered Code, include this CDDL HEADER in each 145373Sraghuram * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 155373Sraghuram * If applicable, add the following below this CDDL HEADER, with the 165373Sraghuram * fields enclosed by brackets "[]" replaced with your own identifying 175373Sraghuram * information: Portions Copyright [yyyy] [name of copyright owner] 185373Sraghuram * 195373Sraghuram * CDDL HEADER END 205373Sraghuram */ 215373Sraghuram 225373Sraghuram /* 23*5935Ssb155480 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 245373Sraghuram * Use is subject to license terms. 255373Sraghuram */ 265373Sraghuram 275373Sraghuram #pragma ident "%Z%%M% %I% %E% SMI" 285373Sraghuram 295373Sraghuram #include <sys/types.h> 305373Sraghuram #include <sys/errno.h> 315373Sraghuram #include <sys/debug.h> 325373Sraghuram #include <sys/time.h> 335373Sraghuram #include <sys/sysmacros.h> 345373Sraghuram #include <sys/systm.h> 355373Sraghuram #include <sys/user.h> 365373Sraghuram #include <sys/stropts.h> 375373Sraghuram #include <sys/stream.h> 385373Sraghuram #include <sys/strlog.h> 395373Sraghuram #include <sys/strsubr.h> 405373Sraghuram #include <sys/cmn_err.h> 415373Sraghuram #include <sys/cpu.h> 425373Sraghuram #include <sys/kmem.h> 435373Sraghuram #include <sys/conf.h> 445373Sraghuram #include <sys/ddi.h> 455373Sraghuram #include <sys/sunddi.h> 465373Sraghuram #include <sys/ksynch.h> 475373Sraghuram #include <sys/stat.h> 485373Sraghuram #include <sys/kstat.h> 495373Sraghuram #include <sys/vtrace.h> 505373Sraghuram #include <sys/strsun.h> 515373Sraghuram #include <sys/dlpi.h> 525373Sraghuram #include <sys/ethernet.h> 535373Sraghuram #include <net/if.h> 545373Sraghuram #include <sys/varargs.h> 555373Sraghuram #include <sys/machsystm.h> 565373Sraghuram #include <sys/modctl.h> 575373Sraghuram #include <sys/modhash.h> 585373Sraghuram #include <sys/mac.h> 595373Sraghuram #include <sys/mac_ether.h> 605373Sraghuram #include <sys/taskq.h> 615373Sraghuram #include <sys/note.h> 625373Sraghuram #include <sys/mach_descrip.h> 635373Sraghuram #include <sys/mac.h> 645373Sraghuram #include <sys/mdeg.h> 655373Sraghuram #include <sys/ldc.h> 665373Sraghuram #include <sys/vsw_fdb.h> 675373Sraghuram #include <sys/vsw.h> 685373Sraghuram #include <sys/vio_mailbox.h> 695373Sraghuram #include <sys/vnet_mailbox.h> 705373Sraghuram #include <sys/vnet_common.h> 715373Sraghuram #include <sys/vio_util.h> 725373Sraghuram #include <sys/sdt.h> 735373Sraghuram #include <sys/atomic.h> 745373Sraghuram 755373Sraghuram /* Switching setup routines */ 765373Sraghuram void vsw_setup_switching_timeout(void *arg); 775373Sraghuram void vsw_stop_switching_timeout(vsw_t *vswp); 785373Sraghuram int vsw_setup_switching(vsw_t *); 795373Sraghuram static int vsw_setup_layer2(vsw_t *); 805373Sraghuram static int vsw_setup_layer3(vsw_t *); 815373Sraghuram 825373Sraghuram /* Switching/data transmit routines */ 835373Sraghuram static void vsw_switch_l2_frame(vsw_t *vswp, mblk_t *mp, int caller, 845373Sraghuram vsw_port_t *port, mac_resource_handle_t); 855373Sraghuram static void vsw_switch_l3_frame(vsw_t *vswp, mblk_t *mp, int caller, 865373Sraghuram vsw_port_t *port, mac_resource_handle_t); 87*5935Ssb155480 static int vsw_forward_all(vsw_t *vswp, mblk_t *mp, 88*5935Ssb155480 int caller, vsw_port_t *port); 89*5935Ssb155480 static int vsw_forward_grp(vsw_t *vswp, mblk_t *mp, 905373Sraghuram int caller, vsw_port_t *port); 915373Sraghuram 925373Sraghuram /* Forwarding database (FDB) routines */ 935373Sraghuram static vsw_port_t *vsw_lookup_fdb(vsw_t *vswp, struct ether_header *); 945373Sraghuram int vsw_add_rem_mcst(vnet_mcast_msg_t *, vsw_port_t *); 955373Sraghuram void vsw_del_mcst_port(vsw_port_t *); 965373Sraghuram int vsw_add_mcst(vsw_t *, uint8_t, uint64_t, void *); 975373Sraghuram int vsw_del_mcst(vsw_t *, uint8_t, uint64_t, void *); 985373Sraghuram void vsw_del_mcst_vsw(vsw_t *); 995373Sraghuram int vsw_add_fdb(vsw_t *vswp, vsw_port_t *port); 1005373Sraghuram int vsw_del_fdb(vsw_t *vswp, vsw_port_t *port); 1015373Sraghuram 1025373Sraghuram /* Support functions */ 1035373Sraghuram static mblk_t *vsw_dupmsgchain(mblk_t *mp); 104*5935Ssb155480 static uint32_t vsw_get_same_dest_list(struct ether_header *ehp, 1055373Sraghuram mblk_t **rhead, mblk_t **rtail, mblk_t **mpp); 1065373Sraghuram 1075373Sraghuram 1085373Sraghuram /* 1095373Sraghuram * Functions imported from other files. 1105373Sraghuram */ 1115373Sraghuram extern mblk_t *vsw_tx_msg(vsw_t *, mblk_t *); 1125373Sraghuram extern mcst_addr_t *vsw_del_addr(uint8_t, void *, uint64_t); 1135373Sraghuram extern int vsw_mac_open(vsw_t *vswp); 1145373Sraghuram extern void vsw_mac_close(vsw_t *vswp); 115*5935Ssb155480 extern void vsw_mac_rx(vsw_t *vswp, mac_resource_handle_t mrh, 116*5935Ssb155480 mblk_t *mp, vsw_macrx_flags_t flags); 1175373Sraghuram extern void vsw_set_addrs(vsw_t *vswp); 1185373Sraghuram extern int vsw_get_hw_maddr(vsw_t *); 1195373Sraghuram extern int vsw_mac_attach(vsw_t *vswp); 120*5935Ssb155480 extern int vsw_portsend(vsw_port_t *port, mblk_t *mp, mblk_t *mpt, 121*5935Ssb155480 uint32_t count); 1225373Sraghuram 1235373Sraghuram /* 1245373Sraghuram * Tunables used in this file. 1255373Sraghuram */ 1265373Sraghuram extern int vsw_setup_switching_delay; 1275373Sraghuram 1285373Sraghuram 1295373Sraghuram /* 1305373Sraghuram * Timeout routine to setup switching mode: 1315373Sraghuram * vsw_setup_switching() is invoked from vsw_attach() or vsw_update_md_prop() 1325373Sraghuram * initially. If it fails and the error is EAGAIN, then this timeout handler 1335373Sraghuram * is started to retry vsw_setup_switching(). vsw_setup_switching() is retried 1345373Sraghuram * until we successfully finish it; or the returned error is not EAGAIN. 1355373Sraghuram */ 1365373Sraghuram void 1375373Sraghuram vsw_setup_switching_timeout(void *arg) 1385373Sraghuram { 1395373Sraghuram vsw_t *vswp = (vsw_t *)arg; 1405373Sraghuram int rv; 1415373Sraghuram 1425373Sraghuram if (vswp->swtmout_enabled == B_FALSE) 1435373Sraghuram return; 1445373Sraghuram 1455373Sraghuram rv = vsw_setup_switching(vswp); 1465373Sraghuram 1475373Sraghuram if (rv == 0) { 1485373Sraghuram /* 1495373Sraghuram * Successfully setup switching mode. 1505373Sraghuram * Program unicst, mcst addrs of vsw 1515373Sraghuram * interface and ports in the physdev. 1525373Sraghuram */ 1535373Sraghuram vsw_set_addrs(vswp); 1545373Sraghuram } 1555373Sraghuram 1565373Sraghuram mutex_enter(&vswp->swtmout_lock); 1575373Sraghuram 1585373Sraghuram if (rv == EAGAIN && vswp->swtmout_enabled == B_TRUE) { 1595373Sraghuram /* 1605373Sraghuram * Reschedule timeout() if the error is EAGAIN and the 1615373Sraghuram * timeout is still enabled. For errors other than EAGAIN, 1625373Sraghuram * we simply return without rescheduling timeout(). 1635373Sraghuram */ 1645373Sraghuram vswp->swtmout_id = 1655373Sraghuram timeout(vsw_setup_switching_timeout, vswp, 1665373Sraghuram (vsw_setup_switching_delay * drv_usectohz(MICROSEC))); 1675373Sraghuram goto exit; 1685373Sraghuram } 1695373Sraghuram 1705373Sraghuram /* timeout handler completed */ 1715373Sraghuram vswp->swtmout_enabled = B_FALSE; 1725373Sraghuram vswp->swtmout_id = 0; 1735373Sraghuram 1745373Sraghuram exit: 1755373Sraghuram mutex_exit(&vswp->swtmout_lock); 1765373Sraghuram } 1775373Sraghuram 1785373Sraghuram /* 1795373Sraghuram * Cancel the timeout handler to setup switching mode. 1805373Sraghuram */ 1815373Sraghuram void 1825373Sraghuram vsw_stop_switching_timeout(vsw_t *vswp) 1835373Sraghuram { 1845373Sraghuram timeout_id_t tid; 1855373Sraghuram 1865373Sraghuram mutex_enter(&vswp->swtmout_lock); 1875373Sraghuram 1885373Sraghuram tid = vswp->swtmout_id; 1895373Sraghuram 1905373Sraghuram if (tid != 0) { 1915373Sraghuram /* signal timeout handler to stop */ 1925373Sraghuram vswp->swtmout_enabled = B_FALSE; 1935373Sraghuram vswp->swtmout_id = 0; 1945373Sraghuram mutex_exit(&vswp->swtmout_lock); 1955373Sraghuram 1965373Sraghuram (void) untimeout(tid); 1975373Sraghuram } else { 1985373Sraghuram mutex_exit(&vswp->swtmout_lock); 1995373Sraghuram } 2005373Sraghuram 2015373Sraghuram (void) atomic_swap_32(&vswp->switching_setup_done, B_FALSE); 2025373Sraghuram 2035373Sraghuram mutex_enter(&vswp->mac_lock); 2045373Sraghuram vswp->mac_open_retries = 0; 2055373Sraghuram mutex_exit(&vswp->mac_lock); 2065373Sraghuram } 2075373Sraghuram 2085373Sraghuram /* 2095373Sraghuram * Setup the required switching mode. 2105373Sraghuram * This routine is invoked from vsw_attach() or vsw_update_md_prop() 2115373Sraghuram * initially. If it fails and the error is EAGAIN, then a timeout handler 2125373Sraghuram * is started to retry vsw_setup_switching(), until it successfully finishes; 2135373Sraghuram * or the returned error is not EAGAIN. 2145373Sraghuram * 2155373Sraghuram * Returns: 2165373Sraghuram * 0 on success. 2175373Sraghuram * EAGAIN if retry is needed. 2185373Sraghuram * 1 on all other failures. 2195373Sraghuram */ 2205373Sraghuram int 2215373Sraghuram vsw_setup_switching(vsw_t *vswp) 2225373Sraghuram { 2235373Sraghuram int i, rv = 1; 2245373Sraghuram 2255373Sraghuram D1(vswp, "%s: enter", __func__); 2265373Sraghuram 2275373Sraghuram /* 2285373Sraghuram * Select best switching mode. 2295373Sraghuram * Note that we start from the saved smode_idx. This is done as 2305373Sraghuram * this routine can be called from the timeout handler to retry 2315373Sraghuram * setting up a specific mode. Currently only the function which 2325373Sraghuram * sets up layer2/promisc mode returns EAGAIN if the underlying 2335373Sraghuram * physical device is not available yet, causing retries. 2345373Sraghuram */ 2355373Sraghuram for (i = vswp->smode_idx; i < vswp->smode_num; i++) { 2365373Sraghuram vswp->smode_idx = i; 2375373Sraghuram switch (vswp->smode[i]) { 2385373Sraghuram case VSW_LAYER2: 2395373Sraghuram case VSW_LAYER2_PROMISC: 2405373Sraghuram rv = vsw_setup_layer2(vswp); 2415373Sraghuram break; 2425373Sraghuram 2435373Sraghuram case VSW_LAYER3: 2445373Sraghuram rv = vsw_setup_layer3(vswp); 2455373Sraghuram break; 2465373Sraghuram 2475373Sraghuram default: 2485373Sraghuram DERR(vswp, "unknown switch mode"); 2495373Sraghuram break; 2505373Sraghuram } 2515373Sraghuram 2525373Sraghuram if ((rv == 0) || (rv == EAGAIN)) 2535373Sraghuram break; 2545373Sraghuram 2555373Sraghuram /* all other errors(rv != 0): continue & select the next mode */ 2565373Sraghuram rv = 1; 2575373Sraghuram } 2585373Sraghuram 2595373Sraghuram if (rv && (rv != EAGAIN)) { 2605373Sraghuram cmn_err(CE_WARN, "!vsw%d: Unable to setup specified " 2615373Sraghuram "switching mode", vswp->instance); 2625373Sraghuram } else if (rv == 0) { 2635373Sraghuram (void) atomic_swap_32(&vswp->switching_setup_done, B_TRUE); 2645373Sraghuram } 2655373Sraghuram 2665373Sraghuram D2(vswp, "%s: Operating in mode %d", __func__, 2675373Sraghuram vswp->smode[vswp->smode_idx]); 2685373Sraghuram 2695373Sraghuram D1(vswp, "%s: exit", __func__); 2705373Sraghuram 2715373Sraghuram return (rv); 2725373Sraghuram } 2735373Sraghuram 2745373Sraghuram /* 2755373Sraghuram * Setup for layer 2 switching. 2765373Sraghuram * 2775373Sraghuram * Returns: 2785373Sraghuram * 0 on success. 2795373Sraghuram * EAGAIN if retry is needed. 2805373Sraghuram * EIO on all other failures. 2815373Sraghuram */ 2825373Sraghuram static int 2835373Sraghuram vsw_setup_layer2(vsw_t *vswp) 2845373Sraghuram { 2855373Sraghuram int rv; 2865373Sraghuram 2875373Sraghuram D1(vswp, "%s: enter", __func__); 2885373Sraghuram 2895373Sraghuram vswp->vsw_switch_frame = vsw_switch_l2_frame; 2905373Sraghuram 2915373Sraghuram rv = strlen(vswp->physname); 2925373Sraghuram if (rv == 0) { 2935373Sraghuram /* 2945373Sraghuram * Physical device name is NULL, which is 2955373Sraghuram * required for layer 2. 2965373Sraghuram */ 2975373Sraghuram cmn_err(CE_WARN, "!vsw%d: no physical device name specified", 2985373Sraghuram vswp->instance); 2995373Sraghuram return (EIO); 3005373Sraghuram } 3015373Sraghuram 3025373Sraghuram mutex_enter(&vswp->mac_lock); 3035373Sraghuram 3045373Sraghuram rv = vsw_mac_open(vswp); 3055373Sraghuram if (rv != 0) { 3065373Sraghuram if (rv != EAGAIN) { 3075373Sraghuram cmn_err(CE_WARN, "!vsw%d: Unable to open physical " 3085373Sraghuram "device: %s\n", vswp->instance, vswp->physname); 3095373Sraghuram } 3105373Sraghuram mutex_exit(&vswp->mac_lock); 3115373Sraghuram return (rv); 3125373Sraghuram } 3135373Sraghuram 3145373Sraghuram if (vswp->smode[vswp->smode_idx] == VSW_LAYER2) { 3155373Sraghuram /* 3165373Sraghuram * Verify that underlying device can support multiple 3175373Sraghuram * unicast mac addresses. 3185373Sraghuram */ 3195373Sraghuram rv = vsw_get_hw_maddr(vswp); 3205373Sraghuram if (rv != 0) { 3215373Sraghuram cmn_err(CE_WARN, "!vsw%d: Unable to setup " 3225373Sraghuram "layer2 switching", vswp->instance); 3235373Sraghuram goto exit_error; 3245373Sraghuram } 3255373Sraghuram } 3265373Sraghuram 3275373Sraghuram /* 3285373Sraghuram * Attempt to link into the MAC layer so we can get 3295373Sraghuram * and send packets out over the physical adapter. 3305373Sraghuram */ 3315373Sraghuram rv = vsw_mac_attach(vswp); 3325373Sraghuram if (rv != 0) { 3335373Sraghuram /* 3345373Sraghuram * Registration with the MAC layer has failed, 3355373Sraghuram * so return error so that can fall back to next 3365373Sraghuram * prefered switching method. 3375373Sraghuram */ 3385373Sraghuram cmn_err(CE_WARN, "!vsw%d: Unable to setup physical device: " 3395373Sraghuram "%s\n", vswp->instance, vswp->physname); 3405373Sraghuram goto exit_error; 3415373Sraghuram } 3425373Sraghuram 3435373Sraghuram D1(vswp, "%s: exit", __func__); 3445373Sraghuram 3455373Sraghuram mutex_exit(&vswp->mac_lock); 3465373Sraghuram return (0); 3475373Sraghuram 3485373Sraghuram exit_error: 3495373Sraghuram vsw_mac_close(vswp); 3505373Sraghuram mutex_exit(&vswp->mac_lock); 3515373Sraghuram return (EIO); 3525373Sraghuram } 3535373Sraghuram 3545373Sraghuram static int 3555373Sraghuram vsw_setup_layer3(vsw_t *vswp) 3565373Sraghuram { 3575373Sraghuram D1(vswp, "%s: enter", __func__); 3585373Sraghuram 3595373Sraghuram D2(vswp, "%s: operating in layer 3 mode", __func__); 3605373Sraghuram vswp->vsw_switch_frame = vsw_switch_l3_frame; 3615373Sraghuram 3625373Sraghuram D1(vswp, "%s: exit", __func__); 3635373Sraghuram 3645373Sraghuram return (0); 3655373Sraghuram } 3665373Sraghuram 3675373Sraghuram /* 3685373Sraghuram * Switch the given ethernet frame when operating in layer 2 mode. 3695373Sraghuram * 3705373Sraghuram * vswp: pointer to the vsw instance 3715373Sraghuram * mp: pointer to chain of ethernet frame(s) to be switched 3725373Sraghuram * caller: identifies the source of this frame as: 3735373Sraghuram * 1. VSW_VNETPORT - a vsw port (connected to a vnet). 3745373Sraghuram * 2. VSW_PHYSDEV - the physical ethernet device 3755373Sraghuram * 3. VSW_LOCALDEV - vsw configured as a virtual interface 3765373Sraghuram * arg: argument provided by the caller. 3775373Sraghuram * 1. for VNETPORT - pointer to the corresponding vsw_port_t. 3785373Sraghuram * 2. for PHYSDEV - NULL 3795373Sraghuram * 3. for LOCALDEV - pointer to to this vsw_t(self) 3805373Sraghuram */ 3815373Sraghuram void 3825373Sraghuram vsw_switch_l2_frame(vsw_t *vswp, mblk_t *mp, int caller, 3835373Sraghuram vsw_port_t *arg, mac_resource_handle_t mrh) 3845373Sraghuram { 3855373Sraghuram struct ether_header *ehp; 3865373Sraghuram vsw_port_t *port = NULL; 3875373Sraghuram mblk_t *bp, *ret_m; 3885373Sraghuram mblk_t *mpt = NULL; 389*5935Ssb155480 uint32_t count; 3905373Sraghuram vsw_port_list_t *plist = &vswp->plist; 3915373Sraghuram 3925373Sraghuram D1(vswp, "%s: enter (caller %d)", __func__, caller); 3935373Sraghuram 3945373Sraghuram /* 3955373Sraghuram * PERF: rather than breaking up the chain here, scan it 3965373Sraghuram * to find all mblks heading to same destination and then 3975373Sraghuram * pass that sub-chain to the lower transmit functions. 3985373Sraghuram */ 3995373Sraghuram 4005373Sraghuram /* process the chain of packets */ 4015373Sraghuram bp = mp; 4025373Sraghuram while (bp) { 4035373Sraghuram ehp = (struct ether_header *)bp->b_rptr; 404*5935Ssb155480 count = vsw_get_same_dest_list(ehp, &mp, &mpt, &bp); 405*5935Ssb155480 ASSERT(count != 0); 4065373Sraghuram 4075373Sraghuram D2(vswp, "%s: mblk data buffer %lld : actual data size %lld", 4085373Sraghuram __func__, MBLKSIZE(mp), MBLKL(mp)); 4095373Sraghuram 4105373Sraghuram if (ether_cmp(&ehp->ether_dhost, &vswp->if_addr) == 0) { 4115373Sraghuram /* 4125373Sraghuram * If destination is VSW_LOCALDEV (vsw as an eth 4135373Sraghuram * interface) and if the device is up & running, 4145373Sraghuram * send the packet up the stack on this host. 4155373Sraghuram * If the virtual interface is down, drop the packet. 4165373Sraghuram */ 4175373Sraghuram if (caller != VSW_LOCALDEV) { 418*5935Ssb155480 vsw_mac_rx(vswp, mrh, mp, VSW_MACRX_FREEMSG); 4195373Sraghuram } else { 4205373Sraghuram freemsgchain(mp); 4215373Sraghuram } 4225373Sraghuram continue; 4235373Sraghuram } 4245373Sraghuram 4255373Sraghuram READ_ENTER(&plist->lockrw); 4265373Sraghuram port = vsw_lookup_fdb(vswp, ehp); 4275373Sraghuram if (port) { 4285373Sraghuram /* 4295373Sraghuram * Mark the port as in-use before releasing the lockrw. 4305373Sraghuram */ 4315373Sraghuram VSW_PORT_REFHOLD(port); 4325373Sraghuram RW_EXIT(&plist->lockrw); 4335373Sraghuram 4345373Sraghuram /* 4355373Sraghuram * If plumbed and in promisc mode then copy msg 4365373Sraghuram * and send up the stack. 4375373Sraghuram */ 438*5935Ssb155480 vsw_mac_rx(vswp, mrh, mp, 439*5935Ssb155480 VSW_MACRX_PROMISC | VSW_MACRX_COPYMSG); 4405373Sraghuram 4415373Sraghuram /* 4425373Sraghuram * If the destination is in FDB, the packet 4435373Sraghuram * should be forwarded to the correponding 4445373Sraghuram * vsw_port (connected to a vnet device - 4455373Sraghuram * VSW_VNETPORT) 4465373Sraghuram */ 447*5935Ssb155480 (void) vsw_portsend(port, mp, mpt, count); 4485373Sraghuram 4495373Sraghuram /* 4505373Sraghuram * Decrement use count in port. 4515373Sraghuram */ 4525373Sraghuram VSW_PORT_REFRELE(port); 4535373Sraghuram } else { 4545373Sraghuram RW_EXIT(&plist->lockrw); 4555373Sraghuram /* 4565373Sraghuram * Destination not in FDB. 4575373Sraghuram * 4585373Sraghuram * If the destination is broadcast or 4595373Sraghuram * multicast forward the packet to all 4605373Sraghuram * (VNETPORTs, PHYSDEV, LOCALDEV), 4615373Sraghuram * except the caller. 4625373Sraghuram */ 4635373Sraghuram if (IS_BROADCAST(ehp)) { 464*5935Ssb155480 D2(vswp, "%s: BROADCAST pkt", __func__); 465*5935Ssb155480 (void) vsw_forward_all(vswp, mp, caller, arg); 4665373Sraghuram } else if (IS_MULTICAST(ehp)) { 467*5935Ssb155480 D2(vswp, "%s: MULTICAST pkt", __func__); 468*5935Ssb155480 (void) vsw_forward_grp(vswp, mp, caller, arg); 4695373Sraghuram } else { 4705373Sraghuram /* 4715373Sraghuram * If the destination is unicast, and came 4725373Sraghuram * from either a logical network device or 4735373Sraghuram * the switch itself when it is plumbed, then 4745373Sraghuram * send it out on the physical device and also 4755373Sraghuram * up the stack if the logical interface is 4765373Sraghuram * in promiscious mode. 4775373Sraghuram * 4785373Sraghuram * NOTE: The assumption here is that if we 4795373Sraghuram * cannot find the destination in our fdb, its 4805373Sraghuram * a unicast address, and came from either a 4815373Sraghuram * vnet or down the stack (when plumbed) it 4825373Sraghuram * must be destinded for an ethernet device 4835373Sraghuram * outside our ldoms. 4845373Sraghuram */ 4855373Sraghuram if (caller == VSW_VNETPORT) { 4865373Sraghuram /* promisc check copy etc */ 487*5935Ssb155480 vsw_mac_rx(vswp, mrh, mp, 4885373Sraghuram VSW_MACRX_PROMISC | 4895373Sraghuram VSW_MACRX_COPYMSG); 4905373Sraghuram 4915373Sraghuram if ((ret_m = vsw_tx_msg(vswp, mp)) 4925373Sraghuram != NULL) { 4935373Sraghuram DERR(vswp, "%s: drop mblks to " 4945373Sraghuram "phys dev", __func__); 4955373Sraghuram freemsgchain(ret_m); 4965373Sraghuram } 4975373Sraghuram 4985373Sraghuram } else if (caller == VSW_PHYSDEV) { 4995373Sraghuram /* 5005373Sraghuram * Pkt seen because card in promisc 5015373Sraghuram * mode. Send up stack if plumbed in 5025373Sraghuram * promisc mode, else drop it. 5035373Sraghuram */ 504*5935Ssb155480 vsw_mac_rx(vswp, mrh, mp, 5055373Sraghuram VSW_MACRX_PROMISC | 5065373Sraghuram VSW_MACRX_FREEMSG); 5075373Sraghuram 5085373Sraghuram } else if (caller == VSW_LOCALDEV) { 5095373Sraghuram /* 5105373Sraghuram * Pkt came down the stack, send out 5115373Sraghuram * over physical device. 5125373Sraghuram */ 5135373Sraghuram if ((ret_m = vsw_tx_msg(vswp, mp)) 5145373Sraghuram != NULL) { 5155373Sraghuram DERR(vswp, "%s: drop mblks to " 5165373Sraghuram "phys dev", __func__); 5175373Sraghuram freemsgchain(ret_m); 5185373Sraghuram } 5195373Sraghuram } 5205373Sraghuram } 5215373Sraghuram } 5225373Sraghuram } 5235373Sraghuram D1(vswp, "%s: exit\n", __func__); 5245373Sraghuram } 5255373Sraghuram 5265373Sraghuram /* 5275373Sraghuram * Switch ethernet frame when in layer 3 mode (i.e. using IP 5285373Sraghuram * layer to do the routing). 5295373Sraghuram * 5305373Sraghuram * There is a large amount of overlap between this function and 5315373Sraghuram * vsw_switch_l2_frame. At some stage we need to revisit and refactor 5325373Sraghuram * both these functions. 5335373Sraghuram */ 5345373Sraghuram void 5355373Sraghuram vsw_switch_l3_frame(vsw_t *vswp, mblk_t *mp, int caller, 5365373Sraghuram vsw_port_t *arg, mac_resource_handle_t mrh) 5375373Sraghuram { 5385373Sraghuram struct ether_header *ehp; 5395373Sraghuram vsw_port_t *port = NULL; 5405373Sraghuram mblk_t *bp = NULL; 5415373Sraghuram mblk_t *mpt; 542*5935Ssb155480 uint32_t count; 5435373Sraghuram vsw_port_list_t *plist = &vswp->plist; 5445373Sraghuram 5455373Sraghuram D1(vswp, "%s: enter (caller %d)", __func__, caller); 5465373Sraghuram 5475373Sraghuram /* 5485373Sraghuram * In layer 3 mode should only ever be switching packets 5495373Sraghuram * between IP layer and vnet devices. So make sure thats 5505373Sraghuram * who is invoking us. 5515373Sraghuram */ 5525373Sraghuram if ((caller != VSW_LOCALDEV) && (caller != VSW_VNETPORT)) { 5535373Sraghuram DERR(vswp, "%s: unexpected caller (%d)", __func__, caller); 5545373Sraghuram freemsgchain(mp); 5555373Sraghuram return; 5565373Sraghuram } 5575373Sraghuram 5585373Sraghuram /* process the chain of packets */ 5595373Sraghuram bp = mp; 5605373Sraghuram while (bp) { 5615373Sraghuram ehp = (struct ether_header *)bp->b_rptr; 562*5935Ssb155480 count = vsw_get_same_dest_list(ehp, &mp, &mpt, &bp); 563*5935Ssb155480 ASSERT(count != 0); 5645373Sraghuram 5655373Sraghuram D2(vswp, "%s: mblk data buffer %lld : actual data size %lld", 5665373Sraghuram __func__, MBLKSIZE(mp), MBLKL(mp)); 5675373Sraghuram 5685373Sraghuram READ_ENTER(&plist->lockrw); 5695373Sraghuram port = vsw_lookup_fdb(vswp, ehp); 5705373Sraghuram if (port) { 5715373Sraghuram /* 5725373Sraghuram * Mark the port as in-use before releasing the lockrw. 5735373Sraghuram */ 5745373Sraghuram VSW_PORT_REFHOLD(port); 5755373Sraghuram RW_EXIT(&plist->lockrw); 5765373Sraghuram 5775373Sraghuram D2(vswp, "%s: sending to target port", __func__); 578*5935Ssb155480 (void) vsw_portsend(port, mp, mpt, count); 5795373Sraghuram 5805373Sraghuram /* 5815373Sraghuram * Decrement ref count. 5825373Sraghuram */ 5835373Sraghuram VSW_PORT_REFRELE(port); 5845373Sraghuram } else { 5855373Sraghuram RW_EXIT(&plist->lockrw); 5865373Sraghuram /* 5875373Sraghuram * Destination not in FDB 5885373Sraghuram * 5895373Sraghuram * If the destination is broadcast or 5905373Sraghuram * multicast forward the packet to all 5915373Sraghuram * (VNETPORTs, PHYSDEV, LOCALDEV), 5925373Sraghuram * except the caller. 5935373Sraghuram */ 5945373Sraghuram if (IS_BROADCAST(ehp)) { 5955373Sraghuram D2(vswp, "%s: BROADCAST pkt", __func__); 596*5935Ssb155480 (void) vsw_forward_all(vswp, mp, caller, arg); 5975373Sraghuram } else if (IS_MULTICAST(ehp)) { 5985373Sraghuram D2(vswp, "%s: MULTICAST pkt", __func__); 599*5935Ssb155480 (void) vsw_forward_grp(vswp, mp, caller, arg); 6005373Sraghuram } else { 6015373Sraghuram /* 6025373Sraghuram * Unicast pkt from vnet that we don't have 6035373Sraghuram * an FDB entry for, so must be destinded for 6045373Sraghuram * the outside world. Attempt to send up to the 6055373Sraghuram * IP layer to allow it to deal with it. 6065373Sraghuram */ 6075373Sraghuram if (caller == VSW_VNETPORT) { 608*5935Ssb155480 vsw_mac_rx(vswp, mrh, 609*5935Ssb155480 mp, VSW_MACRX_FREEMSG); 6105373Sraghuram } 6115373Sraghuram } 6125373Sraghuram } 6135373Sraghuram } 6145373Sraghuram 6155373Sraghuram D1(vswp, "%s: exit", __func__); 6165373Sraghuram } 6175373Sraghuram 6185373Sraghuram /* 6195373Sraghuram * Forward the ethernet frame to all ports (VNETPORTs, PHYSDEV, LOCALDEV), 6205373Sraghuram * except the caller (port on which frame arrived). 6215373Sraghuram */ 6225373Sraghuram static int 623*5935Ssb155480 vsw_forward_all(vsw_t *vswp, mblk_t *mp, int caller, vsw_port_t *arg) 6245373Sraghuram { 6255373Sraghuram vsw_port_list_t *plist = &vswp->plist; 6265373Sraghuram vsw_port_t *portp; 6275373Sraghuram mblk_t *nmp = NULL; 6285373Sraghuram mblk_t *ret_m = NULL; 6295373Sraghuram int skip_port = 0; 6305373Sraghuram 6315373Sraghuram D1(vswp, "vsw_forward_all: enter\n"); 6325373Sraghuram 6335373Sraghuram /* 6345373Sraghuram * Broadcast message from inside ldoms so send to outside 6355373Sraghuram * world if in either of layer 2 modes. 6365373Sraghuram */ 6375373Sraghuram if (((vswp->smode[vswp->smode_idx] == VSW_LAYER2) || 6385373Sraghuram (vswp->smode[vswp->smode_idx] == VSW_LAYER2_PROMISC)) && 6395373Sraghuram ((caller == VSW_LOCALDEV) || (caller == VSW_VNETPORT))) { 6405373Sraghuram 6415373Sraghuram nmp = vsw_dupmsgchain(mp); 6425373Sraghuram if (nmp) { 6435373Sraghuram if ((ret_m = vsw_tx_msg(vswp, nmp)) != NULL) { 6445373Sraghuram DERR(vswp, "%s: dropping pkt(s) " 6455373Sraghuram "consisting of %ld bytes of data for" 6465373Sraghuram " physical device", __func__, MBLKL(ret_m)); 6475373Sraghuram freemsgchain(ret_m); 6485373Sraghuram } 6495373Sraghuram } 6505373Sraghuram } 6515373Sraghuram 6525373Sraghuram if (caller == VSW_VNETPORT) 6535373Sraghuram skip_port = 1; 6545373Sraghuram 6555373Sraghuram /* 6565373Sraghuram * Broadcast message from other vnet (layer 2 or 3) or outside 6575373Sraghuram * world (layer 2 only), send up stack if plumbed. 6585373Sraghuram */ 6595373Sraghuram if ((caller == VSW_PHYSDEV) || (caller == VSW_VNETPORT)) { 660*5935Ssb155480 vsw_mac_rx(vswp, NULL, mp, VSW_MACRX_COPYMSG); 6615373Sraghuram } 6625373Sraghuram 6635373Sraghuram /* send it to all VNETPORTs */ 6645373Sraghuram READ_ENTER(&plist->lockrw); 6655373Sraghuram for (portp = plist->head; portp != NULL; portp = portp->p_next) { 6665373Sraghuram D2(vswp, "vsw_forward_all: port %d", portp->p_instance); 6675373Sraghuram /* 6685373Sraghuram * Caution ! - don't reorder these two checks as arg 6695373Sraghuram * will be NULL if the caller is PHYSDEV. skip_port is 6705373Sraghuram * only set if caller is VNETPORT. 6715373Sraghuram */ 6725373Sraghuram if ((skip_port) && (portp == arg)) { 6735373Sraghuram continue; 6745373Sraghuram } else { 6755373Sraghuram nmp = vsw_dupmsgchain(mp); 6765373Sraghuram if (nmp) { 677*5935Ssb155480 mblk_t *mpt = nmp; 678*5935Ssb155480 uint32_t count = 1; 6795373Sraghuram 6805373Sraghuram /* Find tail */ 6815373Sraghuram while (mpt->b_next != NULL) { 6825373Sraghuram mpt = mpt->b_next; 683*5935Ssb155480 count++; 6845373Sraghuram } 6855373Sraghuram /* 6865373Sraghuram * The plist->lockrw is protecting the 6875373Sraghuram * portp from getting destroyed here. 6885373Sraghuram * So, no ref_cnt is incremented here. 6895373Sraghuram */ 690*5935Ssb155480 (void) vsw_portsend(portp, nmp, mpt, count); 6915373Sraghuram } else { 6925373Sraghuram DERR(vswp, "vsw_forward_all: nmp NULL"); 6935373Sraghuram } 6945373Sraghuram } 6955373Sraghuram } 6965373Sraghuram RW_EXIT(&plist->lockrw); 6975373Sraghuram 6985373Sraghuram freemsgchain(mp); 6995373Sraghuram 7005373Sraghuram D1(vswp, "vsw_forward_all: exit\n"); 7015373Sraghuram return (0); 7025373Sraghuram } 7035373Sraghuram 7045373Sraghuram /* 7055373Sraghuram * Forward pkts to any devices or interfaces which have registered 7065373Sraghuram * an interest in them (i.e. multicast groups). 7075373Sraghuram */ 7085373Sraghuram static int 709*5935Ssb155480 vsw_forward_grp(vsw_t *vswp, mblk_t *mp, int caller, vsw_port_t *arg) 7105373Sraghuram { 7115373Sraghuram struct ether_header *ehp = (struct ether_header *)mp->b_rptr; 7125373Sraghuram mfdb_ent_t *entp = NULL; 7135373Sraghuram mfdb_ent_t *tpp = NULL; 7145373Sraghuram vsw_port_t *port; 7155373Sraghuram uint64_t key = 0; 7165373Sraghuram mblk_t *nmp = NULL; 7175373Sraghuram mblk_t *ret_m = NULL; 7185373Sraghuram boolean_t check_if = B_TRUE; 7195373Sraghuram 7205373Sraghuram /* 7215373Sraghuram * Convert address to hash table key 7225373Sraghuram */ 7235373Sraghuram KEY_HASH(key, ehp->ether_dhost); 7245373Sraghuram 7255373Sraghuram D1(vswp, "%s: key 0x%llx", __func__, key); 7265373Sraghuram 7275373Sraghuram /* 7285373Sraghuram * If pkt came from either a vnet or down the stack (if we are 7295373Sraghuram * plumbed) and we are in layer 2 mode, then we send the pkt out 7305373Sraghuram * over the physical adapter, and then check to see if any other 7315373Sraghuram * vnets are interested in it. 7325373Sraghuram */ 7335373Sraghuram if (((vswp->smode[vswp->smode_idx] == VSW_LAYER2) || 7345373Sraghuram (vswp->smode[vswp->smode_idx] == VSW_LAYER2_PROMISC)) && 7355373Sraghuram ((caller == VSW_VNETPORT) || (caller == VSW_LOCALDEV))) { 7365373Sraghuram nmp = vsw_dupmsgchain(mp); 7375373Sraghuram if (nmp) { 7385373Sraghuram if ((ret_m = vsw_tx_msg(vswp, nmp)) != NULL) { 7395373Sraghuram DERR(vswp, "%s: dropping pkt(s) consisting of " 7405373Sraghuram "%ld bytes of data for physical device", 7415373Sraghuram __func__, MBLKL(ret_m)); 7425373Sraghuram freemsgchain(ret_m); 7435373Sraghuram } 7445373Sraghuram } 7455373Sraghuram } 7465373Sraghuram 7475373Sraghuram READ_ENTER(&vswp->mfdbrw); 7485373Sraghuram if (mod_hash_find(vswp->mfdb, (mod_hash_key_t)key, 7495373Sraghuram (mod_hash_val_t *)&entp) != 0) { 7505373Sraghuram D3(vswp, "%s: no table entry found for addr 0x%llx", 7515373Sraghuram __func__, key); 7525373Sraghuram } else { 7535373Sraghuram /* 7545373Sraghuram * Send to list of devices associated with this address... 7555373Sraghuram */ 7565373Sraghuram for (tpp = entp; tpp != NULL; tpp = tpp->nextp) { 7575373Sraghuram 7585373Sraghuram /* dont send to ourselves */ 7595373Sraghuram if ((caller == VSW_VNETPORT) && 7605373Sraghuram (tpp->d_addr == (void *)arg)) { 7615373Sraghuram port = (vsw_port_t *)tpp->d_addr; 7625373Sraghuram D3(vswp, "%s: not sending to ourselves" 7635373Sraghuram " : port %d", __func__, port->p_instance); 7645373Sraghuram continue; 7655373Sraghuram 7665373Sraghuram } else if ((caller == VSW_LOCALDEV) && 7675373Sraghuram (tpp->d_type == VSW_LOCALDEV)) { 768*5935Ssb155480 D2(vswp, "%s: not sending back up stack", 7695373Sraghuram __func__); 7705373Sraghuram continue; 7715373Sraghuram } 7725373Sraghuram 7735373Sraghuram if (tpp->d_type == VSW_VNETPORT) { 7745373Sraghuram port = (vsw_port_t *)tpp->d_addr; 7755373Sraghuram D3(vswp, "%s: sending to port %ld for addr " 7765373Sraghuram "0x%llx", __func__, port->p_instance, key); 7775373Sraghuram 7785373Sraghuram nmp = vsw_dupmsgchain(mp); 7795373Sraghuram if (nmp) { 780*5935Ssb155480 mblk_t *mpt = nmp; 781*5935Ssb155480 uint32_t count = 1; 7825373Sraghuram 7835373Sraghuram /* Find tail */ 7845373Sraghuram while (mpt->b_next != NULL) { 7855373Sraghuram mpt = mpt->b_next; 786*5935Ssb155480 count++; 7875373Sraghuram } 7885373Sraghuram /* 7895373Sraghuram * The vswp->mfdbrw is protecting the 7905373Sraghuram * portp from getting destroyed here. 7915373Sraghuram * So, no ref_cnt is incremented here. 7925373Sraghuram */ 793*5935Ssb155480 (void) vsw_portsend(port, nmp, mpt, 794*5935Ssb155480 count); 7955373Sraghuram } 7965373Sraghuram } else { 797*5935Ssb155480 vsw_mac_rx(vswp, NULL, 798*5935Ssb155480 mp, VSW_MACRX_COPYMSG); 799*5935Ssb155480 D2(vswp, "%s: sending up stack" 8005373Sraghuram " for addr 0x%llx", __func__, key); 8015373Sraghuram check_if = B_FALSE; 8025373Sraghuram } 8035373Sraghuram } 8045373Sraghuram } 8055373Sraghuram 8065373Sraghuram RW_EXIT(&vswp->mfdbrw); 8075373Sraghuram 8085373Sraghuram /* 8095373Sraghuram * If the pkt came from either a vnet or from physical device, 8105373Sraghuram * and if we havent already sent the pkt up the stack then we 8115373Sraghuram * check now if we can/should (i.e. the interface is plumbed 8125373Sraghuram * and in promisc mode). 8135373Sraghuram */ 8145373Sraghuram if ((check_if) && 8155373Sraghuram ((caller == VSW_VNETPORT) || (caller == VSW_PHYSDEV))) { 816*5935Ssb155480 vsw_mac_rx(vswp, NULL, mp, 8175373Sraghuram VSW_MACRX_PROMISC | VSW_MACRX_COPYMSG); 8185373Sraghuram } 8195373Sraghuram 8205373Sraghuram freemsgchain(mp); 8215373Sraghuram 8225373Sraghuram D1(vswp, "%s: exit", __func__); 8235373Sraghuram 8245373Sraghuram return (0); 8255373Sraghuram } 8265373Sraghuram 8275373Sraghuram /* 8285373Sraghuram * Add an entry into FDB, for the given mac address and port_id. 8295373Sraghuram * Returns 0 on success, 1 on failure. 8305373Sraghuram * 8315373Sraghuram * Lock protecting FDB must be held by calling process. 8325373Sraghuram */ 8335373Sraghuram int 8345373Sraghuram vsw_add_fdb(vsw_t *vswp, vsw_port_t *port) 8355373Sraghuram { 8365373Sraghuram uint64_t addr = 0; 8375373Sraghuram 8385373Sraghuram D1(vswp, "%s: enter", __func__); 8395373Sraghuram 8405373Sraghuram KEY_HASH(addr, port->p_macaddr); 8415373Sraghuram 8425373Sraghuram D2(vswp, "%s: key = 0x%llx", __func__, addr); 8435373Sraghuram 8445373Sraghuram /* 8455373Sraghuram * Note: duplicate keys will be rejected by mod_hash. 8465373Sraghuram */ 8475373Sraghuram if (mod_hash_insert(vswp->fdb, (mod_hash_key_t)addr, 8485373Sraghuram (mod_hash_val_t)port) != 0) { 8495373Sraghuram DERR(vswp, "%s: unable to add entry into fdb.", __func__); 8505373Sraghuram return (1); 8515373Sraghuram } 8525373Sraghuram 8535373Sraghuram D1(vswp, "%s: exit", __func__); 8545373Sraghuram return (0); 8555373Sraghuram } 8565373Sraghuram 8575373Sraghuram /* 8585373Sraghuram * Remove an entry from FDB. 8595373Sraghuram * Returns 0 on success, 1 on failure. 8605373Sraghuram */ 8615373Sraghuram int 8625373Sraghuram vsw_del_fdb(vsw_t *vswp, vsw_port_t *port) 8635373Sraghuram { 8645373Sraghuram uint64_t addr = 0; 8655373Sraghuram 8665373Sraghuram D1(vswp, "%s: enter", __func__); 8675373Sraghuram 8685373Sraghuram KEY_HASH(addr, port->p_macaddr); 8695373Sraghuram 8705373Sraghuram D2(vswp, "%s: key = 0x%llx", __func__, addr); 8715373Sraghuram 8725373Sraghuram (void) mod_hash_destroy(vswp->fdb, (mod_hash_val_t)addr); 8735373Sraghuram 8745373Sraghuram D1(vswp, "%s: enter", __func__); 8755373Sraghuram 8765373Sraghuram return (0); 8775373Sraghuram } 8785373Sraghuram 8795373Sraghuram /* 8805373Sraghuram * Search fdb for a given mac address. 8815373Sraghuram * Returns pointer to the entry if found, else returns NULL. 8825373Sraghuram */ 8835373Sraghuram static vsw_port_t * 8845373Sraghuram vsw_lookup_fdb(vsw_t *vswp, struct ether_header *ehp) 8855373Sraghuram { 8865373Sraghuram uint64_t key = 0; 8875373Sraghuram vsw_port_t *port = NULL; 8885373Sraghuram 8895373Sraghuram D1(vswp, "%s: enter", __func__); 8905373Sraghuram 8915373Sraghuram KEY_HASH(key, ehp->ether_dhost); 8925373Sraghuram 8935373Sraghuram D2(vswp, "%s: key = 0x%llx", __func__, key); 8945373Sraghuram 8955373Sraghuram if (mod_hash_find(vswp->fdb, (mod_hash_key_t)key, 8965373Sraghuram (mod_hash_val_t *)&port) != 0) { 8975373Sraghuram D2(vswp, "%s: no port found", __func__); 8985373Sraghuram return (NULL); 8995373Sraghuram } 9005373Sraghuram 9015373Sraghuram D1(vswp, "%s: exit", __func__); 9025373Sraghuram 9035373Sraghuram return (port); 9045373Sraghuram } 9055373Sraghuram 9065373Sraghuram /* 9075373Sraghuram * Add or remove multicast address(es). 9085373Sraghuram * 9095373Sraghuram * Returns 0 on success, 1 on failure. 9105373Sraghuram */ 9115373Sraghuram int 9125373Sraghuram vsw_add_rem_mcst(vnet_mcast_msg_t *mcst_pkt, vsw_port_t *port) 9135373Sraghuram { 9145373Sraghuram mcst_addr_t *mcst_p = NULL; 9155373Sraghuram vsw_t *vswp = port->p_vswp; 9165373Sraghuram uint64_t addr = 0x0; 9175373Sraghuram int i; 9185373Sraghuram 9195373Sraghuram D1(vswp, "%s: enter", __func__); 9205373Sraghuram 9215373Sraghuram D2(vswp, "%s: %d addresses", __func__, mcst_pkt->count); 9225373Sraghuram 9235373Sraghuram for (i = 0; i < mcst_pkt->count; i++) { 9245373Sraghuram /* 9255373Sraghuram * Convert address into form that can be used 9265373Sraghuram * as hash table key. 9275373Sraghuram */ 9285373Sraghuram KEY_HASH(addr, mcst_pkt->mca[i]); 9295373Sraghuram 9305373Sraghuram /* 9315373Sraghuram * Add or delete the specified address/port combination. 9325373Sraghuram */ 9335373Sraghuram if (mcst_pkt->set == 0x1) { 9345373Sraghuram D3(vswp, "%s: adding multicast address 0x%llx for " 9355373Sraghuram "port %ld", __func__, addr, port->p_instance); 9365373Sraghuram if (vsw_add_mcst(vswp, VSW_VNETPORT, addr, port) == 0) { 9375373Sraghuram /* 9385373Sraghuram * Update the list of multicast 9395373Sraghuram * addresses contained within the 9405373Sraghuram * port structure to include this new 9415373Sraghuram * one. 9425373Sraghuram */ 9435373Sraghuram mcst_p = kmem_zalloc(sizeof (mcst_addr_t), 9445373Sraghuram KM_NOSLEEP); 9455373Sraghuram if (mcst_p == NULL) { 9465373Sraghuram DERR(vswp, "%s: unable to alloc mem", 9475373Sraghuram __func__); 9485373Sraghuram (void) vsw_del_mcst(vswp, 9495373Sraghuram VSW_VNETPORT, addr, port); 9505373Sraghuram return (1); 9515373Sraghuram } 9525373Sraghuram 9535373Sraghuram mcst_p->nextp = NULL; 9545373Sraghuram mcst_p->addr = addr; 9555373Sraghuram ether_copy(&mcst_pkt->mca[i], &mcst_p->mca); 9565373Sraghuram 9575373Sraghuram /* 9585373Sraghuram * Program the address into HW. If the addr 9595373Sraghuram * has already been programmed then the MAC 9605373Sraghuram * just increments a ref counter (which is 9615373Sraghuram * used when the address is being deleted) 9625373Sraghuram */ 9635373Sraghuram mutex_enter(&vswp->mac_lock); 9645373Sraghuram if (vswp->mh != NULL) { 9655373Sraghuram if (mac_multicst_add(vswp->mh, 9665373Sraghuram (uchar_t *)&mcst_pkt->mca[i])) { 9675373Sraghuram mutex_exit(&vswp->mac_lock); 9685373Sraghuram cmn_err(CE_WARN, "!vsw%d: " 9695373Sraghuram "unable to add multicast " 9705373Sraghuram "address: %s\n", 9715373Sraghuram vswp->instance, 9725373Sraghuram ether_sprintf((void *) 9735373Sraghuram &mcst_p->mca)); 9745373Sraghuram (void) vsw_del_mcst(vswp, 9755373Sraghuram VSW_VNETPORT, addr, port); 9765373Sraghuram kmem_free(mcst_p, 9775373Sraghuram sizeof (*mcst_p)); 9785373Sraghuram return (1); 9795373Sraghuram } 9805373Sraghuram mcst_p->mac_added = B_TRUE; 9815373Sraghuram } 9825373Sraghuram mutex_exit(&vswp->mac_lock); 9835373Sraghuram 9845373Sraghuram mutex_enter(&port->mca_lock); 9855373Sraghuram mcst_p->nextp = port->mcap; 9865373Sraghuram port->mcap = mcst_p; 9875373Sraghuram mutex_exit(&port->mca_lock); 9885373Sraghuram 9895373Sraghuram } else { 9905373Sraghuram DERR(vswp, "%s: error adding multicast " 9915373Sraghuram "address 0x%llx for port %ld", 9925373Sraghuram __func__, addr, port->p_instance); 9935373Sraghuram return (1); 9945373Sraghuram } 9955373Sraghuram } else { 9965373Sraghuram /* 9975373Sraghuram * Delete an entry from the multicast hash 9985373Sraghuram * table and update the address list 9995373Sraghuram * appropriately. 10005373Sraghuram */ 10015373Sraghuram if (vsw_del_mcst(vswp, VSW_VNETPORT, addr, port) == 0) { 10025373Sraghuram D3(vswp, "%s: deleting multicast address " 10035373Sraghuram "0x%llx for port %ld", __func__, addr, 10045373Sraghuram port->p_instance); 10055373Sraghuram 10065373Sraghuram mcst_p = vsw_del_addr(VSW_VNETPORT, port, addr); 10075373Sraghuram ASSERT(mcst_p != NULL); 10085373Sraghuram 10095373Sraghuram /* 10105373Sraghuram * Remove the address from HW. The address 10115373Sraghuram * will actually only be removed once the ref 10125373Sraghuram * count within the MAC layer has dropped to 10135373Sraghuram * zero. I.e. we can safely call this fn even 10145373Sraghuram * if other ports are interested in this 10155373Sraghuram * address. 10165373Sraghuram */ 10175373Sraghuram mutex_enter(&vswp->mac_lock); 10185373Sraghuram if (vswp->mh != NULL && mcst_p->mac_added) { 10195373Sraghuram if (mac_multicst_remove(vswp->mh, 10205373Sraghuram (uchar_t *)&mcst_pkt->mca[i])) { 10215373Sraghuram mutex_exit(&vswp->mac_lock); 10225373Sraghuram cmn_err(CE_WARN, "!vsw%d: " 10235373Sraghuram "unable to remove mcast " 10245373Sraghuram "address: %s\n", 10255373Sraghuram vswp->instance, 10265373Sraghuram ether_sprintf((void *) 10275373Sraghuram &mcst_p->mca)); 10285373Sraghuram kmem_free(mcst_p, 10295373Sraghuram sizeof (*mcst_p)); 10305373Sraghuram return (1); 10315373Sraghuram } 10325373Sraghuram mcst_p->mac_added = B_FALSE; 10335373Sraghuram } 10345373Sraghuram mutex_exit(&vswp->mac_lock); 10355373Sraghuram kmem_free(mcst_p, sizeof (*mcst_p)); 10365373Sraghuram 10375373Sraghuram } else { 10385373Sraghuram DERR(vswp, "%s: error deleting multicast " 10395373Sraghuram "addr 0x%llx for port %ld", 10405373Sraghuram __func__, addr, port->p_instance); 10415373Sraghuram return (1); 10425373Sraghuram } 10435373Sraghuram } 10445373Sraghuram } 10455373Sraghuram D1(vswp, "%s: exit", __func__); 10465373Sraghuram return (0); 10475373Sraghuram } 10485373Sraghuram 10495373Sraghuram /* 10505373Sraghuram * Add a new multicast entry. 10515373Sraghuram * 10525373Sraghuram * Search hash table based on address. If match found then 10535373Sraghuram * update associated val (which is chain of ports), otherwise 10545373Sraghuram * create new key/val (addr/port) pair and insert into table. 10555373Sraghuram */ 10565373Sraghuram int 10575373Sraghuram vsw_add_mcst(vsw_t *vswp, uint8_t devtype, uint64_t addr, void *arg) 10585373Sraghuram { 10595373Sraghuram int dup = 0; 10605373Sraghuram int rv = 0; 10615373Sraghuram mfdb_ent_t *ment = NULL; 10625373Sraghuram mfdb_ent_t *tmp_ent = NULL; 10635373Sraghuram mfdb_ent_t *new_ent = NULL; 10645373Sraghuram void *tgt = NULL; 10655373Sraghuram 10665373Sraghuram if (devtype == VSW_VNETPORT) { 10675373Sraghuram /* 10685373Sraghuram * Being invoked from a vnet. 10695373Sraghuram */ 10705373Sraghuram ASSERT(arg != NULL); 10715373Sraghuram tgt = arg; 10725373Sraghuram D2(NULL, "%s: port %d : address 0x%llx", __func__, 10735373Sraghuram ((vsw_port_t *)arg)->p_instance, addr); 10745373Sraghuram } else { 10755373Sraghuram /* 10765373Sraghuram * We are being invoked via the m_multicst mac entry 10775373Sraghuram * point. 10785373Sraghuram */ 10795373Sraghuram D2(NULL, "%s: address 0x%llx", __func__, addr); 10805373Sraghuram tgt = (void *)vswp; 10815373Sraghuram } 10825373Sraghuram 10835373Sraghuram WRITE_ENTER(&vswp->mfdbrw); 10845373Sraghuram if (mod_hash_find(vswp->mfdb, (mod_hash_key_t)addr, 10855373Sraghuram (mod_hash_val_t *)&ment) != 0) { 10865373Sraghuram 10875373Sraghuram /* address not currently in table */ 10885373Sraghuram ment = kmem_alloc(sizeof (mfdb_ent_t), KM_SLEEP); 10895373Sraghuram ment->d_addr = (void *)tgt; 10905373Sraghuram ment->d_type = devtype; 10915373Sraghuram ment->nextp = NULL; 10925373Sraghuram 10935373Sraghuram if (mod_hash_insert(vswp->mfdb, (mod_hash_key_t)addr, 10945373Sraghuram (mod_hash_val_t)ment) != 0) { 10955373Sraghuram DERR(vswp, "%s: hash table insertion failed", __func__); 10965373Sraghuram kmem_free(ment, sizeof (mfdb_ent_t)); 10975373Sraghuram rv = 1; 10985373Sraghuram } else { 10995373Sraghuram D2(vswp, "%s: added initial entry for 0x%llx to " 11005373Sraghuram "table", __func__, addr); 11015373Sraghuram } 11025373Sraghuram } else { 11035373Sraghuram /* 11045373Sraghuram * Address in table. Check to see if specified port 11055373Sraghuram * is already associated with the address. If not add 11065373Sraghuram * it now. 11075373Sraghuram */ 11085373Sraghuram tmp_ent = ment; 11095373Sraghuram while (tmp_ent != NULL) { 11105373Sraghuram if (tmp_ent->d_addr == (void *)tgt) { 11115373Sraghuram if (devtype == VSW_VNETPORT) { 11125373Sraghuram DERR(vswp, "%s: duplicate port entry " 11135373Sraghuram "found for portid %ld and key " 11145373Sraghuram "0x%llx", __func__, 11155373Sraghuram ((vsw_port_t *)arg)->p_instance, 11165373Sraghuram addr); 11175373Sraghuram } else { 11185373Sraghuram DERR(vswp, "%s: duplicate entry found" 11195373Sraghuram "for key 0x%llx", __func__, addr); 11205373Sraghuram } 11215373Sraghuram rv = 1; 11225373Sraghuram dup = 1; 11235373Sraghuram break; 11245373Sraghuram } 11255373Sraghuram tmp_ent = tmp_ent->nextp; 11265373Sraghuram } 11275373Sraghuram 11285373Sraghuram /* 11295373Sraghuram * Port not on list so add it to end now. 11305373Sraghuram */ 11315373Sraghuram if (0 == dup) { 11325373Sraghuram D2(vswp, "%s: added entry for 0x%llx to table", 11335373Sraghuram __func__, addr); 11345373Sraghuram new_ent = kmem_alloc(sizeof (mfdb_ent_t), KM_SLEEP); 11355373Sraghuram new_ent->d_addr = (void *)tgt; 11365373Sraghuram new_ent->d_type = devtype; 11375373Sraghuram new_ent->nextp = NULL; 11385373Sraghuram 11395373Sraghuram tmp_ent = ment; 11405373Sraghuram while (tmp_ent->nextp != NULL) 11415373Sraghuram tmp_ent = tmp_ent->nextp; 11425373Sraghuram 11435373Sraghuram tmp_ent->nextp = new_ent; 11445373Sraghuram } 11455373Sraghuram } 11465373Sraghuram 11475373Sraghuram RW_EXIT(&vswp->mfdbrw); 11485373Sraghuram return (rv); 11495373Sraghuram } 11505373Sraghuram 11515373Sraghuram /* 11525373Sraghuram * Remove a multicast entry from the hashtable. 11535373Sraghuram * 11545373Sraghuram * Search hash table based on address. If match found, scan 11555373Sraghuram * list of ports associated with address. If specified port 11565373Sraghuram * found remove it from list. 11575373Sraghuram */ 11585373Sraghuram int 11595373Sraghuram vsw_del_mcst(vsw_t *vswp, uint8_t devtype, uint64_t addr, void *arg) 11605373Sraghuram { 11615373Sraghuram mfdb_ent_t *ment = NULL; 11625373Sraghuram mfdb_ent_t *curr_p, *prev_p; 11635373Sraghuram void *tgt = NULL; 11645373Sraghuram 11655373Sraghuram D1(vswp, "%s: enter", __func__); 11665373Sraghuram 11675373Sraghuram if (devtype == VSW_VNETPORT) { 11685373Sraghuram tgt = (vsw_port_t *)arg; 11695373Sraghuram D2(vswp, "%s: removing port %d from mFDB for address" 11705373Sraghuram " 0x%llx", __func__, ((vsw_port_t *)tgt)->p_instance, addr); 11715373Sraghuram } else { 11725373Sraghuram D2(vswp, "%s: removing entry", __func__); 11735373Sraghuram tgt = (void *)vswp; 11745373Sraghuram } 11755373Sraghuram 11765373Sraghuram WRITE_ENTER(&vswp->mfdbrw); 11775373Sraghuram if (mod_hash_find(vswp->mfdb, (mod_hash_key_t)addr, 11785373Sraghuram (mod_hash_val_t *)&ment) != 0) { 11795373Sraghuram D2(vswp, "%s: address 0x%llx not in table", __func__, addr); 11805373Sraghuram RW_EXIT(&vswp->mfdbrw); 11815373Sraghuram return (1); 11825373Sraghuram } 11835373Sraghuram 11845373Sraghuram prev_p = curr_p = ment; 11855373Sraghuram 11865373Sraghuram while (curr_p != NULL) { 11875373Sraghuram if (curr_p->d_addr == (void *)tgt) { 11885373Sraghuram if (devtype == VSW_VNETPORT) { 11895373Sraghuram D2(vswp, "%s: port %d found", __func__, 11905373Sraghuram ((vsw_port_t *)tgt)->p_instance); 11915373Sraghuram } else { 11925373Sraghuram D2(vswp, "%s: instance found", __func__); 11935373Sraghuram } 11945373Sraghuram 11955373Sraghuram if (prev_p == curr_p) { 11965373Sraghuram /* 11975373Sraghuram * head of list, if no other element is in 11985373Sraghuram * list then destroy this entry, otherwise 11995373Sraghuram * just replace it with updated value. 12005373Sraghuram */ 12015373Sraghuram ment = curr_p->nextp; 12025373Sraghuram if (ment == NULL) { 12035373Sraghuram (void) mod_hash_destroy(vswp->mfdb, 12045373Sraghuram (mod_hash_val_t)addr); 12055373Sraghuram } else { 12065373Sraghuram (void) mod_hash_replace(vswp->mfdb, 12075373Sraghuram (mod_hash_key_t)addr, 12085373Sraghuram (mod_hash_val_t)ment); 12095373Sraghuram } 12105373Sraghuram } else { 12115373Sraghuram /* 12125373Sraghuram * Not head of list, no need to do 12135373Sraghuram * replacement, just adjust list pointers. 12145373Sraghuram */ 12155373Sraghuram prev_p->nextp = curr_p->nextp; 12165373Sraghuram } 12175373Sraghuram break; 12185373Sraghuram } 12195373Sraghuram 12205373Sraghuram prev_p = curr_p; 12215373Sraghuram curr_p = curr_p->nextp; 12225373Sraghuram } 12235373Sraghuram 12245373Sraghuram RW_EXIT(&vswp->mfdbrw); 12255373Sraghuram 12265373Sraghuram D1(vswp, "%s: exit", __func__); 12275373Sraghuram 12285373Sraghuram if (curr_p == NULL) 12295373Sraghuram return (1); 12305373Sraghuram kmem_free(curr_p, sizeof (mfdb_ent_t)); 12315373Sraghuram return (0); 12325373Sraghuram } 12335373Sraghuram 12345373Sraghuram /* 12355373Sraghuram * Port is being deleted, but has registered an interest in one 12365373Sraghuram * or more multicast groups. Using the list of addresses maintained 12375373Sraghuram * within the port structure find the appropriate entry in the hash 12385373Sraghuram * table and remove this port from the list of interested ports. 12395373Sraghuram */ 12405373Sraghuram void 12415373Sraghuram vsw_del_mcst_port(vsw_port_t *port) 12425373Sraghuram { 12435373Sraghuram mcst_addr_t *mcap = NULL; 12445373Sraghuram vsw_t *vswp = port->p_vswp; 12455373Sraghuram 12465373Sraghuram D1(vswp, "%s: enter", __func__); 12475373Sraghuram 12485373Sraghuram mutex_enter(&port->mca_lock); 12495373Sraghuram 12505373Sraghuram while ((mcap = port->mcap) != NULL) { 12515373Sraghuram 12525373Sraghuram port->mcap = mcap->nextp; 12535373Sraghuram 12545373Sraghuram mutex_exit(&port->mca_lock); 12555373Sraghuram 12565373Sraghuram (void) vsw_del_mcst(vswp, VSW_VNETPORT, 12575373Sraghuram mcap->addr, port); 12585373Sraghuram 12595373Sraghuram /* 12605373Sraghuram * Remove the address from HW. The address 12615373Sraghuram * will actually only be removed once the ref 12625373Sraghuram * count within the MAC layer has dropped to 12635373Sraghuram * zero. I.e. we can safely call this fn even 12645373Sraghuram * if other ports are interested in this 12655373Sraghuram * address. 12665373Sraghuram */ 12675373Sraghuram mutex_enter(&vswp->mac_lock); 12685373Sraghuram if (vswp->mh != NULL && mcap->mac_added) { 12695373Sraghuram (void) mac_multicst_remove(vswp->mh, 12705373Sraghuram (uchar_t *)&mcap->mca); 12715373Sraghuram } 12725373Sraghuram mutex_exit(&vswp->mac_lock); 12735373Sraghuram 12745373Sraghuram kmem_free(mcap, sizeof (*mcap)); 12755373Sraghuram 12765373Sraghuram mutex_enter(&port->mca_lock); 12775373Sraghuram 12785373Sraghuram } 12795373Sraghuram 12805373Sraghuram mutex_exit(&port->mca_lock); 12815373Sraghuram 12825373Sraghuram D1(vswp, "%s: exit", __func__); 12835373Sraghuram } 12845373Sraghuram 12855373Sraghuram /* 12865373Sraghuram * This vsw instance is detaching, but has registered an interest in one 12875373Sraghuram * or more multicast groups. Using the list of addresses maintained 12885373Sraghuram * within the vsw structure find the appropriate entry in the hash 12895373Sraghuram * table and remove this instance from the list of interested ports. 12905373Sraghuram */ 12915373Sraghuram void 12925373Sraghuram vsw_del_mcst_vsw(vsw_t *vswp) 12935373Sraghuram { 12945373Sraghuram mcst_addr_t *next_p = NULL; 12955373Sraghuram 12965373Sraghuram D1(vswp, "%s: enter", __func__); 12975373Sraghuram 12985373Sraghuram mutex_enter(&vswp->mca_lock); 12995373Sraghuram 13005373Sraghuram while (vswp->mcap != NULL) { 13015373Sraghuram DERR(vswp, "%s: deleting addr 0x%llx", 13025373Sraghuram __func__, vswp->mcap->addr); 13035373Sraghuram (void) vsw_del_mcst(vswp, VSW_LOCALDEV, vswp->mcap->addr, NULL); 13045373Sraghuram 13055373Sraghuram next_p = vswp->mcap->nextp; 13065373Sraghuram kmem_free(vswp->mcap, sizeof (mcst_addr_t)); 13075373Sraghuram vswp->mcap = next_p; 13085373Sraghuram } 13095373Sraghuram 13105373Sraghuram vswp->mcap = NULL; 13115373Sraghuram mutex_exit(&vswp->mca_lock); 13125373Sraghuram 13135373Sraghuram D1(vswp, "%s: exit", __func__); 13145373Sraghuram } 13155373Sraghuram 1316*5935Ssb155480 static uint32_t 13175373Sraghuram vsw_get_same_dest_list(struct ether_header *ehp, 13185373Sraghuram mblk_t **rhead, mblk_t **rtail, mblk_t **mpp) 13195373Sraghuram { 1320*5935Ssb155480 uint32_t count = 0; 1321*5935Ssb155480 mblk_t *bp; 1322*5935Ssb155480 mblk_t *nbp; 1323*5935Ssb155480 mblk_t *head = NULL; 1324*5935Ssb155480 mblk_t *tail = NULL; 1325*5935Ssb155480 mblk_t *prev = NULL; 1326*5935Ssb155480 struct ether_header *behp; 13275373Sraghuram 13285373Sraghuram /* process the chain of packets */ 13295373Sraghuram bp = *mpp; 13305373Sraghuram while (bp) { 13315373Sraghuram nbp = bp->b_next; 13325373Sraghuram behp = (struct ether_header *)bp->b_rptr; 13335373Sraghuram bp->b_prev = NULL; 13345373Sraghuram if (ether_cmp(&ehp->ether_dhost, &behp->ether_dhost) == 0) { 13355373Sraghuram if (prev == NULL) { 13365373Sraghuram *mpp = nbp; 13375373Sraghuram } else { 13385373Sraghuram prev->b_next = nbp; 13395373Sraghuram } 13405373Sraghuram bp->b_next = NULL; 13415373Sraghuram if (head == NULL) { 13425373Sraghuram head = tail = bp; 13435373Sraghuram } else { 13445373Sraghuram tail->b_next = bp; 13455373Sraghuram tail = bp; 13465373Sraghuram } 13475373Sraghuram count++; 13485373Sraghuram } else { 13495373Sraghuram prev = bp; 13505373Sraghuram } 13515373Sraghuram bp = nbp; 13525373Sraghuram } 13535373Sraghuram *rhead = head; 13545373Sraghuram *rtail = tail; 13555373Sraghuram DTRACE_PROBE1(vsw_same_dest, int, count); 13565373Sraghuram return (count); 13575373Sraghuram } 13585373Sraghuram 13595373Sraghuram static mblk_t * 13605373Sraghuram vsw_dupmsgchain(mblk_t *mp) 13615373Sraghuram { 13625373Sraghuram mblk_t *nmp = NULL; 13635373Sraghuram mblk_t **nmpp = &nmp; 13645373Sraghuram 13655373Sraghuram for (; mp != NULL; mp = mp->b_next) { 13665373Sraghuram if ((*nmpp = dupmsg(mp)) == NULL) { 13675373Sraghuram freemsgchain(nmp); 13685373Sraghuram return (NULL); 13695373Sraghuram } 13705373Sraghuram 13715373Sraghuram nmpp = &((*nmpp)->b_next); 13725373Sraghuram } 13735373Sraghuram 13745373Sraghuram return (nmp); 13755373Sraghuram } 1376