10Sstevel@tonic-gate /* 20Sstevel@tonic-gate * CDDL HEADER START 30Sstevel@tonic-gate * 40Sstevel@tonic-gate * The contents of this file are subject to the terms of the 51804Sericheng * Common Development and Distribution License (the "License"). 61804Sericheng * You may not use this file except in compliance with the License. 70Sstevel@tonic-gate * 80Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 90Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 100Sstevel@tonic-gate * See the License for the specific language governing permissions 110Sstevel@tonic-gate * and limitations under the License. 120Sstevel@tonic-gate * 130Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 140Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 150Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 160Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 170Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 180Sstevel@tonic-gate * 190Sstevel@tonic-gate * CDDL HEADER END 200Sstevel@tonic-gate */ 210Sstevel@tonic-gate /* 22*8833SVenu.Iyer@Sun.COM * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 230Sstevel@tonic-gate * Use is subject to license terms. 240Sstevel@tonic-gate */ 250Sstevel@tonic-gate 260Sstevel@tonic-gate /* 270Sstevel@tonic-gate * IEEE 802.3ad Link Aggregation - Send code. 280Sstevel@tonic-gate * 290Sstevel@tonic-gate * Implements the Distributor function. 300Sstevel@tonic-gate */ 310Sstevel@tonic-gate 320Sstevel@tonic-gate #include <sys/conf.h> 330Sstevel@tonic-gate #include <sys/modctl.h> 340Sstevel@tonic-gate #include <sys/sunddi.h> 350Sstevel@tonic-gate #include <sys/vlan.h> 360Sstevel@tonic-gate #include <sys/strsun.h> 370Sstevel@tonic-gate #include <sys/strsubr.h> 38*8833SVenu.Iyer@Sun.COM #include <sys/dlpi.h> 390Sstevel@tonic-gate 400Sstevel@tonic-gate #include <inet/common.h> 410Sstevel@tonic-gate #include <inet/led.h> 420Sstevel@tonic-gate #include <inet/ip.h> 430Sstevel@tonic-gate #include <inet/ip6.h> 440Sstevel@tonic-gate #include <inet/tcp.h> 450Sstevel@tonic-gate #include <netinet/udp.h> 460Sstevel@tonic-gate 470Sstevel@tonic-gate #include <sys/aggr.h> 480Sstevel@tonic-gate #include <sys/aggr_impl.h> 490Sstevel@tonic-gate 500Sstevel@tonic-gate /* 510Sstevel@tonic-gate * Update the TX load balancing policy of the specified group. 520Sstevel@tonic-gate */ 530Sstevel@tonic-gate void 540Sstevel@tonic-gate aggr_send_update_policy(aggr_grp_t *grp, uint32_t policy) 550Sstevel@tonic-gate { 56*8833SVenu.Iyer@Sun.COM uint8_t mac_policy = 0; 57*8833SVenu.Iyer@Sun.COM 588275SEric Cheng ASSERT(MAC_PERIM_HELD(grp->lg_mh)); 590Sstevel@tonic-gate 60*8833SVenu.Iyer@Sun.COM if ((policy & AGGR_POLICY_L2) != 0) 61*8833SVenu.Iyer@Sun.COM mac_policy |= MAC_PKT_HASH_L2; 62*8833SVenu.Iyer@Sun.COM if ((policy & AGGR_POLICY_L3) != 0) 63*8833SVenu.Iyer@Sun.COM mac_policy |= MAC_PKT_HASH_L3; 64*8833SVenu.Iyer@Sun.COM if ((policy & AGGR_POLICY_L4) != 0) 65*8833SVenu.Iyer@Sun.COM mac_policy |= MAC_PKT_HASH_L4; 66*8833SVenu.Iyer@Sun.COM 670Sstevel@tonic-gate grp->lg_tx_policy = policy; 68*8833SVenu.Iyer@Sun.COM grp->lg_mac_tx_policy = mac_policy; 690Sstevel@tonic-gate } 700Sstevel@tonic-gate 710Sstevel@tonic-gate /* 720Sstevel@tonic-gate * Send function invoked by the MAC service module. 730Sstevel@tonic-gate */ 740Sstevel@tonic-gate mblk_t * 750Sstevel@tonic-gate aggr_m_tx(void *arg, mblk_t *mp) 760Sstevel@tonic-gate { 770Sstevel@tonic-gate aggr_grp_t *grp = arg; 780Sstevel@tonic-gate aggr_port_t *port; 790Sstevel@tonic-gate mblk_t *nextp; 808275SEric Cheng mac_tx_cookie_t cookie; 818275SEric Cheng uint64_t hash; 828275SEric Cheng void *mytx_handle; 830Sstevel@tonic-gate 840Sstevel@tonic-gate for (;;) { 858275SEric Cheng rw_enter(&grp->lg_tx_lock, RW_READER); 861804Sericheng if (grp->lg_ntx_ports == 0) { 871804Sericheng /* 881804Sericheng * We could have returned from aggr_m_start() before 891804Sericheng * the ports were actually attached. Drop the chain. 901804Sericheng */ 918275SEric Cheng rw_exit(&grp->lg_tx_lock); 921804Sericheng freemsgchain(mp); 931804Sericheng return (NULL); 941804Sericheng } 958275SEric Cheng 960Sstevel@tonic-gate nextp = mp->b_next; 970Sstevel@tonic-gate mp->b_next = NULL; 980Sstevel@tonic-gate 99*8833SVenu.Iyer@Sun.COM hash = mac_pkt_hash(DL_ETHER, mp, grp->lg_mac_tx_policy, 100*8833SVenu.Iyer@Sun.COM B_TRUE); 1018275SEric Cheng port = grp->lg_tx_ports[hash % grp->lg_ntx_ports]; 1020Sstevel@tonic-gate 10356Smeem /* 1048275SEric Cheng * Bump the active Tx ref count so that the port won't 1058275SEric Cheng * be deleted. The reference count will be dropped in mac_tx(). 10656Smeem */ 1078275SEric Cheng mytx_handle = mac_tx_hold(port->lp_mch); 1088275SEric Cheng rw_exit(&grp->lg_tx_lock); 1098275SEric Cheng 1108275SEric Cheng if (mytx_handle == NULL) { 1118275SEric Cheng /* 1128275SEric Cheng * The port is quiesced. 1138275SEric Cheng */ 1148275SEric Cheng freemsg(mp); 1158275SEric Cheng } else { 116*8833SVenu.Iyer@Sun.COM mblk_t *ret_mp = NULL; 1177802SRamesh.K@Sun.COM 1188275SEric Cheng /* 1198275SEric Cheng * It is fine that the port state changes now. 1208275SEric Cheng * Set MAC_TX_NO_HOLD to inform mac_tx() not to bump 1218275SEric Cheng * the active Tx ref again. Use hash as the hint so 1228275SEric Cheng * to direct traffic to different TX rings. Note below 1238275SEric Cheng * bit operation is needed to get the most benefit 1248275SEric Cheng * from the mac_tx() hash algorithm. 1258275SEric Cheng */ 1268275SEric Cheng hash = (hash << 24 | hash << 16 | hash); 1278275SEric Cheng hash = (hash << 32 | hash); 1288275SEric Cheng cookie = mac_tx(port->lp_mch, mp, (uintptr_t)hash, 1298275SEric Cheng MAC_TX_NO_ENQUEUE | MAC_TX_NO_HOLD, &ret_mp); 1308275SEric Cheng 1318275SEric Cheng mac_tx_rele(port->lp_mch, mytx_handle); 1328275SEric Cheng 1338275SEric Cheng if (cookie != NULL) { 1348275SEric Cheng ret_mp->b_next = nextp; 1358275SEric Cheng mp = ret_mp; 1368275SEric Cheng break; 1378275SEric Cheng } 1380Sstevel@tonic-gate } 1390Sstevel@tonic-gate 1400Sstevel@tonic-gate if ((mp = nextp) == NULL) 1411804Sericheng break; 1420Sstevel@tonic-gate } 1430Sstevel@tonic-gate return (mp); 1440Sstevel@tonic-gate } 1450Sstevel@tonic-gate 1460Sstevel@tonic-gate /* 1470Sstevel@tonic-gate * Enable sending on the specified port. 1480Sstevel@tonic-gate */ 1490Sstevel@tonic-gate void 1500Sstevel@tonic-gate aggr_send_port_enable(aggr_port_t *port) 1510Sstevel@tonic-gate { 1520Sstevel@tonic-gate aggr_grp_t *grp = port->lp_grp; 1530Sstevel@tonic-gate 1548275SEric Cheng ASSERT(MAC_PERIM_HELD(grp->lg_mh)); 1558275SEric Cheng 1560Sstevel@tonic-gate if (port->lp_tx_enabled || (port->lp_state != 1570Sstevel@tonic-gate AGGR_PORT_STATE_ATTACHED)) { 1580Sstevel@tonic-gate /* already enabled or port not yet attached */ 1590Sstevel@tonic-gate return; 1600Sstevel@tonic-gate } 1610Sstevel@tonic-gate 1620Sstevel@tonic-gate /* 1630Sstevel@tonic-gate * Add to group's array of tx ports. 1640Sstevel@tonic-gate */ 1658275SEric Cheng rw_enter(&grp->lg_tx_lock, RW_WRITER); 1660Sstevel@tonic-gate if (grp->lg_tx_ports_size < grp->lg_ntx_ports+1) { 1670Sstevel@tonic-gate /* current array too small */ 1680Sstevel@tonic-gate aggr_port_t **new_ports; 1690Sstevel@tonic-gate uint_t new_size; 1700Sstevel@tonic-gate 1710Sstevel@tonic-gate new_size = grp->lg_ntx_ports+1; 1720Sstevel@tonic-gate new_ports = kmem_zalloc(new_size * sizeof (aggr_port_t *), 1730Sstevel@tonic-gate KM_SLEEP); 1740Sstevel@tonic-gate 1750Sstevel@tonic-gate if (grp->lg_tx_ports_size > 0) { 1760Sstevel@tonic-gate ASSERT(grp->lg_tx_ports != NULL); 1770Sstevel@tonic-gate bcopy(grp->lg_tx_ports, new_ports, 1780Sstevel@tonic-gate grp->lg_ntx_ports * sizeof (aggr_port_t *)); 1790Sstevel@tonic-gate kmem_free(grp->lg_tx_ports, 1800Sstevel@tonic-gate grp->lg_tx_ports_size * sizeof (aggr_port_t *)); 1810Sstevel@tonic-gate } 1820Sstevel@tonic-gate 1830Sstevel@tonic-gate grp->lg_tx_ports = new_ports; 1840Sstevel@tonic-gate grp->lg_tx_ports_size = new_size; 1850Sstevel@tonic-gate } 1860Sstevel@tonic-gate 1870Sstevel@tonic-gate grp->lg_tx_ports[grp->lg_ntx_ports++] = port; 1880Sstevel@tonic-gate port->lp_tx_idx = grp->lg_ntx_ports-1; 1898275SEric Cheng rw_exit(&grp->lg_tx_lock); 1900Sstevel@tonic-gate 1910Sstevel@tonic-gate port->lp_tx_enabled = B_TRUE; 1920Sstevel@tonic-gate } 1930Sstevel@tonic-gate 1940Sstevel@tonic-gate /* 1950Sstevel@tonic-gate * Disable sending from the specified port. 1960Sstevel@tonic-gate */ 1970Sstevel@tonic-gate void 1980Sstevel@tonic-gate aggr_send_port_disable(aggr_port_t *port) 1990Sstevel@tonic-gate { 2000Sstevel@tonic-gate uint_t idx, ntx; 2010Sstevel@tonic-gate aggr_grp_t *grp = port->lp_grp; 2020Sstevel@tonic-gate 2038275SEric Cheng ASSERT(MAC_PERIM_HELD(grp->lg_mh)); 2048275SEric Cheng ASSERT(MAC_PERIM_HELD(port->lp_mh)); 2050Sstevel@tonic-gate 2060Sstevel@tonic-gate if (!port->lp_tx_enabled) { 2070Sstevel@tonic-gate /* not yet enabled */ 2080Sstevel@tonic-gate return; 2090Sstevel@tonic-gate } 2100Sstevel@tonic-gate 2118275SEric Cheng rw_enter(&grp->lg_tx_lock, RW_WRITER); 2120Sstevel@tonic-gate idx = port->lp_tx_idx; 2130Sstevel@tonic-gate ntx = grp->lg_ntx_ports; 2140Sstevel@tonic-gate ASSERT(idx < ntx); 2150Sstevel@tonic-gate 2160Sstevel@tonic-gate /* remove from array of attached ports */ 2170Sstevel@tonic-gate if (idx == (ntx - 1)) { 2180Sstevel@tonic-gate grp->lg_tx_ports[idx] = NULL; 2190Sstevel@tonic-gate } else { 2200Sstevel@tonic-gate /* not the last entry, replace with last one */ 2210Sstevel@tonic-gate aggr_port_t *victim; 2220Sstevel@tonic-gate 2230Sstevel@tonic-gate victim = grp->lg_tx_ports[ntx - 1]; 2240Sstevel@tonic-gate grp->lg_tx_ports[ntx - 1] = NULL; 2250Sstevel@tonic-gate victim->lp_tx_idx = idx; 2260Sstevel@tonic-gate grp->lg_tx_ports[idx] = victim; 2270Sstevel@tonic-gate } 2280Sstevel@tonic-gate 2290Sstevel@tonic-gate port->lp_tx_idx = 0; 2300Sstevel@tonic-gate grp->lg_ntx_ports--; 2318275SEric Cheng rw_exit(&grp->lg_tx_lock); 2320Sstevel@tonic-gate 2330Sstevel@tonic-gate port->lp_tx_enabled = B_FALSE; 2340Sstevel@tonic-gate } 235