xref: /onnv-gate/usr/src/uts/common/io/aggr/aggr_send.c (revision 8833:8adf20bc60e3)
10Sstevel@tonic-gate /*
20Sstevel@tonic-gate  * CDDL HEADER START
30Sstevel@tonic-gate  *
40Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
51804Sericheng  * Common Development and Distribution License (the "License").
61804Sericheng  * You may not use this file except in compliance with the License.
70Sstevel@tonic-gate  *
80Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
100Sstevel@tonic-gate  * See the License for the specific language governing permissions
110Sstevel@tonic-gate  * and limitations under the License.
120Sstevel@tonic-gate  *
130Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
140Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
160Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
170Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
180Sstevel@tonic-gate  *
190Sstevel@tonic-gate  * CDDL HEADER END
200Sstevel@tonic-gate  */
210Sstevel@tonic-gate /*
22*8833SVenu.Iyer@Sun.COM  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
230Sstevel@tonic-gate  * Use is subject to license terms.
240Sstevel@tonic-gate  */
250Sstevel@tonic-gate 
260Sstevel@tonic-gate /*
270Sstevel@tonic-gate  * IEEE 802.3ad Link Aggregation - Send code.
280Sstevel@tonic-gate  *
290Sstevel@tonic-gate  * Implements the Distributor function.
300Sstevel@tonic-gate  */
310Sstevel@tonic-gate 
320Sstevel@tonic-gate #include <sys/conf.h>
330Sstevel@tonic-gate #include <sys/modctl.h>
340Sstevel@tonic-gate #include <sys/sunddi.h>
350Sstevel@tonic-gate #include <sys/vlan.h>
360Sstevel@tonic-gate #include <sys/strsun.h>
370Sstevel@tonic-gate #include <sys/strsubr.h>
38*8833SVenu.Iyer@Sun.COM #include <sys/dlpi.h>
390Sstevel@tonic-gate 
400Sstevel@tonic-gate #include <inet/common.h>
410Sstevel@tonic-gate #include <inet/led.h>
420Sstevel@tonic-gate #include <inet/ip.h>
430Sstevel@tonic-gate #include <inet/ip6.h>
440Sstevel@tonic-gate #include <inet/tcp.h>
450Sstevel@tonic-gate #include <netinet/udp.h>
460Sstevel@tonic-gate 
470Sstevel@tonic-gate #include <sys/aggr.h>
480Sstevel@tonic-gate #include <sys/aggr_impl.h>
490Sstevel@tonic-gate 
500Sstevel@tonic-gate /*
510Sstevel@tonic-gate  * Update the TX load balancing policy of the specified group.
520Sstevel@tonic-gate  */
530Sstevel@tonic-gate void
540Sstevel@tonic-gate aggr_send_update_policy(aggr_grp_t *grp, uint32_t policy)
550Sstevel@tonic-gate {
56*8833SVenu.Iyer@Sun.COM 	uint8_t mac_policy = 0;
57*8833SVenu.Iyer@Sun.COM 
588275SEric Cheng 	ASSERT(MAC_PERIM_HELD(grp->lg_mh));
590Sstevel@tonic-gate 
60*8833SVenu.Iyer@Sun.COM 	if ((policy & AGGR_POLICY_L2) != 0)
61*8833SVenu.Iyer@Sun.COM 		mac_policy |= MAC_PKT_HASH_L2;
62*8833SVenu.Iyer@Sun.COM 	if ((policy & AGGR_POLICY_L3) != 0)
63*8833SVenu.Iyer@Sun.COM 		mac_policy |= MAC_PKT_HASH_L3;
64*8833SVenu.Iyer@Sun.COM 	if ((policy & AGGR_POLICY_L4) != 0)
65*8833SVenu.Iyer@Sun.COM 		mac_policy |= MAC_PKT_HASH_L4;
66*8833SVenu.Iyer@Sun.COM 
670Sstevel@tonic-gate 	grp->lg_tx_policy = policy;
68*8833SVenu.Iyer@Sun.COM 	grp->lg_mac_tx_policy = mac_policy;
690Sstevel@tonic-gate }
700Sstevel@tonic-gate 
710Sstevel@tonic-gate /*
720Sstevel@tonic-gate  * Send function invoked by the MAC service module.
730Sstevel@tonic-gate  */
740Sstevel@tonic-gate mblk_t *
750Sstevel@tonic-gate aggr_m_tx(void *arg, mblk_t *mp)
760Sstevel@tonic-gate {
770Sstevel@tonic-gate 	aggr_grp_t *grp = arg;
780Sstevel@tonic-gate 	aggr_port_t *port;
790Sstevel@tonic-gate 	mblk_t *nextp;
808275SEric Cheng 	mac_tx_cookie_t	cookie;
818275SEric Cheng 	uint64_t hash;
828275SEric Cheng 	void	*mytx_handle;
830Sstevel@tonic-gate 
840Sstevel@tonic-gate 	for (;;) {
858275SEric Cheng 		rw_enter(&grp->lg_tx_lock, RW_READER);
861804Sericheng 		if (grp->lg_ntx_ports == 0) {
871804Sericheng 			/*
881804Sericheng 			 * We could have returned from aggr_m_start() before
891804Sericheng 			 * the ports were actually attached. Drop the chain.
901804Sericheng 			 */
918275SEric Cheng 			rw_exit(&grp->lg_tx_lock);
921804Sericheng 			freemsgchain(mp);
931804Sericheng 			return (NULL);
941804Sericheng 		}
958275SEric Cheng 
960Sstevel@tonic-gate 		nextp = mp->b_next;
970Sstevel@tonic-gate 		mp->b_next = NULL;
980Sstevel@tonic-gate 
99*8833SVenu.Iyer@Sun.COM 		hash = mac_pkt_hash(DL_ETHER, mp, grp->lg_mac_tx_policy,
100*8833SVenu.Iyer@Sun.COM 		    B_TRUE);
1018275SEric Cheng 		port = grp->lg_tx_ports[hash % grp->lg_ntx_ports];
1020Sstevel@tonic-gate 
10356Smeem 		/*
1048275SEric Cheng 		 * Bump the active Tx ref count so that the port won't
1058275SEric Cheng 		 * be deleted. The reference count will be dropped in mac_tx().
10656Smeem 		 */
1078275SEric Cheng 		mytx_handle = mac_tx_hold(port->lp_mch);
1088275SEric Cheng 		rw_exit(&grp->lg_tx_lock);
1098275SEric Cheng 
1108275SEric Cheng 		if (mytx_handle == NULL) {
1118275SEric Cheng 			/*
1128275SEric Cheng 			 * The port is quiesced.
1138275SEric Cheng 			 */
1148275SEric Cheng 			freemsg(mp);
1158275SEric Cheng 		} else {
116*8833SVenu.Iyer@Sun.COM 			mblk_t	*ret_mp = NULL;
1177802SRamesh.K@Sun.COM 
1188275SEric Cheng 			/*
1198275SEric Cheng 			 * It is fine that the port state changes now.
1208275SEric Cheng 			 * Set MAC_TX_NO_HOLD to inform mac_tx() not to bump
1218275SEric Cheng 			 * the active Tx ref again. Use hash as the hint so
1228275SEric Cheng 			 * to direct traffic to different TX rings. Note below
1238275SEric Cheng 			 * bit operation is needed to get the most benefit
1248275SEric Cheng 			 * from the mac_tx() hash algorithm.
1258275SEric Cheng 			 */
1268275SEric Cheng 			hash = (hash << 24 | hash << 16 | hash);
1278275SEric Cheng 			hash = (hash << 32 | hash);
1288275SEric Cheng 			cookie = mac_tx(port->lp_mch, mp, (uintptr_t)hash,
1298275SEric Cheng 			    MAC_TX_NO_ENQUEUE | MAC_TX_NO_HOLD, &ret_mp);
1308275SEric Cheng 
1318275SEric Cheng 			mac_tx_rele(port->lp_mch, mytx_handle);
1328275SEric Cheng 
1338275SEric Cheng 			if (cookie != NULL) {
1348275SEric Cheng 				ret_mp->b_next = nextp;
1358275SEric Cheng 				mp = ret_mp;
1368275SEric Cheng 				break;
1378275SEric Cheng 			}
1380Sstevel@tonic-gate 		}
1390Sstevel@tonic-gate 
1400Sstevel@tonic-gate 		if ((mp = nextp) == NULL)
1411804Sericheng 			break;
1420Sstevel@tonic-gate 	}
1430Sstevel@tonic-gate 	return (mp);
1440Sstevel@tonic-gate }
1450Sstevel@tonic-gate 
1460Sstevel@tonic-gate /*
1470Sstevel@tonic-gate  * Enable sending on the specified port.
1480Sstevel@tonic-gate  */
1490Sstevel@tonic-gate void
1500Sstevel@tonic-gate aggr_send_port_enable(aggr_port_t *port)
1510Sstevel@tonic-gate {
1520Sstevel@tonic-gate 	aggr_grp_t *grp = port->lp_grp;
1530Sstevel@tonic-gate 
1548275SEric Cheng 	ASSERT(MAC_PERIM_HELD(grp->lg_mh));
1558275SEric Cheng 
1560Sstevel@tonic-gate 	if (port->lp_tx_enabled || (port->lp_state !=
1570Sstevel@tonic-gate 	    AGGR_PORT_STATE_ATTACHED)) {
1580Sstevel@tonic-gate 		/* already enabled or port not yet attached */
1590Sstevel@tonic-gate 		return;
1600Sstevel@tonic-gate 	}
1610Sstevel@tonic-gate 
1620Sstevel@tonic-gate 	/*
1630Sstevel@tonic-gate 	 * Add to group's array of tx ports.
1640Sstevel@tonic-gate 	 */
1658275SEric Cheng 	rw_enter(&grp->lg_tx_lock, RW_WRITER);
1660Sstevel@tonic-gate 	if (grp->lg_tx_ports_size < grp->lg_ntx_ports+1) {
1670Sstevel@tonic-gate 		/* current array too small */
1680Sstevel@tonic-gate 		aggr_port_t **new_ports;
1690Sstevel@tonic-gate 		uint_t new_size;
1700Sstevel@tonic-gate 
1710Sstevel@tonic-gate 		new_size = grp->lg_ntx_ports+1;
1720Sstevel@tonic-gate 		new_ports = kmem_zalloc(new_size * sizeof (aggr_port_t *),
1730Sstevel@tonic-gate 		    KM_SLEEP);
1740Sstevel@tonic-gate 
1750Sstevel@tonic-gate 		if (grp->lg_tx_ports_size > 0) {
1760Sstevel@tonic-gate 			ASSERT(grp->lg_tx_ports != NULL);
1770Sstevel@tonic-gate 			bcopy(grp->lg_tx_ports, new_ports,
1780Sstevel@tonic-gate 			    grp->lg_ntx_ports * sizeof (aggr_port_t *));
1790Sstevel@tonic-gate 			kmem_free(grp->lg_tx_ports,
1800Sstevel@tonic-gate 			    grp->lg_tx_ports_size * sizeof (aggr_port_t *));
1810Sstevel@tonic-gate 		}
1820Sstevel@tonic-gate 
1830Sstevel@tonic-gate 		grp->lg_tx_ports = new_ports;
1840Sstevel@tonic-gate 		grp->lg_tx_ports_size = new_size;
1850Sstevel@tonic-gate 	}
1860Sstevel@tonic-gate 
1870Sstevel@tonic-gate 	grp->lg_tx_ports[grp->lg_ntx_ports++] = port;
1880Sstevel@tonic-gate 	port->lp_tx_idx = grp->lg_ntx_ports-1;
1898275SEric Cheng 	rw_exit(&grp->lg_tx_lock);
1900Sstevel@tonic-gate 
1910Sstevel@tonic-gate 	port->lp_tx_enabled = B_TRUE;
1920Sstevel@tonic-gate }
1930Sstevel@tonic-gate 
1940Sstevel@tonic-gate /*
1950Sstevel@tonic-gate  * Disable sending from the specified port.
1960Sstevel@tonic-gate  */
1970Sstevel@tonic-gate void
1980Sstevel@tonic-gate aggr_send_port_disable(aggr_port_t *port)
1990Sstevel@tonic-gate {
2000Sstevel@tonic-gate 	uint_t idx, ntx;
2010Sstevel@tonic-gate 	aggr_grp_t *grp = port->lp_grp;
2020Sstevel@tonic-gate 
2038275SEric Cheng 	ASSERT(MAC_PERIM_HELD(grp->lg_mh));
2048275SEric Cheng 	ASSERT(MAC_PERIM_HELD(port->lp_mh));
2050Sstevel@tonic-gate 
2060Sstevel@tonic-gate 	if (!port->lp_tx_enabled) {
2070Sstevel@tonic-gate 		/* not yet enabled */
2080Sstevel@tonic-gate 		return;
2090Sstevel@tonic-gate 	}
2100Sstevel@tonic-gate 
2118275SEric Cheng 	rw_enter(&grp->lg_tx_lock, RW_WRITER);
2120Sstevel@tonic-gate 	idx = port->lp_tx_idx;
2130Sstevel@tonic-gate 	ntx = grp->lg_ntx_ports;
2140Sstevel@tonic-gate 	ASSERT(idx < ntx);
2150Sstevel@tonic-gate 
2160Sstevel@tonic-gate 	/* remove from array of attached ports */
2170Sstevel@tonic-gate 	if (idx == (ntx - 1)) {
2180Sstevel@tonic-gate 		grp->lg_tx_ports[idx] = NULL;
2190Sstevel@tonic-gate 	} else {
2200Sstevel@tonic-gate 		/* not the last entry, replace with last one */
2210Sstevel@tonic-gate 		aggr_port_t *victim;
2220Sstevel@tonic-gate 
2230Sstevel@tonic-gate 		victim = grp->lg_tx_ports[ntx - 1];
2240Sstevel@tonic-gate 		grp->lg_tx_ports[ntx - 1] = NULL;
2250Sstevel@tonic-gate 		victim->lp_tx_idx = idx;
2260Sstevel@tonic-gate 		grp->lg_tx_ports[idx] = victim;
2270Sstevel@tonic-gate 	}
2280Sstevel@tonic-gate 
2290Sstevel@tonic-gate 	port->lp_tx_idx = 0;
2300Sstevel@tonic-gate 	grp->lg_ntx_ports--;
2318275SEric Cheng 	rw_exit(&grp->lg_tx_lock);
2320Sstevel@tonic-gate 
2330Sstevel@tonic-gate 	port->lp_tx_enabled = B_FALSE;
2340Sstevel@tonic-gate }
235