xref: /onnv-gate/usr/src/uts/common/io/ib/clients/rds/rdsib_ep.c (revision 10489:180acaca223b)
13302Sagiri /*
23302Sagiri  * CDDL HEADER START
33302Sagiri  *
43302Sagiri  * The contents of this file are subject to the terms of the
53302Sagiri  * Common Development and Distribution License (the "License").
63302Sagiri  * You may not use this file except in compliance with the License.
73302Sagiri  *
83302Sagiri  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
93302Sagiri  * or http://www.opensolaris.org/os/licensing.
103302Sagiri  * See the License for the specific language governing permissions
113302Sagiri  * and limitations under the License.
123302Sagiri  *
133302Sagiri  * When distributing Covered Code, include this CDDL HEADER in each
143302Sagiri  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
153302Sagiri  * If applicable, add the following below this CDDL HEADER, with the
163302Sagiri  * fields enclosed by brackets "[]" replaced with your own identifying
173302Sagiri  * information: Portions Copyright [yyyy] [name of copyright owner]
183302Sagiri  *
193302Sagiri  * CDDL HEADER END
203302Sagiri  */
213302Sagiri /*
22*10489SGiri.Adari@Sun.COM  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
233302Sagiri  * Use is subject to license terms.
243302Sagiri  */
253302Sagiri /*
263302Sagiri  * Copyright (c) 2005 SilverStorm Technologies, Inc. All rights reserved.
273302Sagiri  *
283302Sagiri  * This software is available to you under a choice of one of two
293302Sagiri  * licenses.  You may choose to be licensed under the terms of the GNU
303302Sagiri  * General Public License (GPL) Version 2, available from the file
313302Sagiri  * COPYING in the main directory of this source tree, or the
323302Sagiri  * OpenIB.org BSD license below:
333302Sagiri  *
343302Sagiri  *     Redistribution and use in source and binary forms, with or
353302Sagiri  *     without modification, are permitted provided that the following
363302Sagiri  *     conditions are met:
373302Sagiri  *
383302Sagiri  *	- Redistributions of source code must retain the above
393302Sagiri  *	  copyright notice, this list of conditions and the following
403302Sagiri  *	  disclaimer.
413302Sagiri  *
423302Sagiri  *	- Redistributions in binary form must reproduce the above
433302Sagiri  *	  copyright notice, this list of conditions and the following
443302Sagiri  *	  disclaimer in the documentation and/or other materials
453302Sagiri  *	  provided with the distribution.
463302Sagiri  *
473302Sagiri  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
483302Sagiri  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
493302Sagiri  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
503302Sagiri  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
513302Sagiri  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
523302Sagiri  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
533302Sagiri  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
543302Sagiri  * SOFTWARE.
553302Sagiri  *
563302Sagiri  */
573302Sagiri /*
583302Sagiri  * Sun elects to include this software in Sun product
593302Sagiri  * under the OpenIB BSD license.
603302Sagiri  *
613302Sagiri  *
623302Sagiri  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
633302Sagiri  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
643302Sagiri  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
653302Sagiri  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
663302Sagiri  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
673302Sagiri  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
683302Sagiri  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
693302Sagiri  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
703302Sagiri  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
713302Sagiri  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
723302Sagiri  * POSSIBILITY OF SUCH DAMAGE.
733302Sagiri  */
743302Sagiri 
753302Sagiri #include <sys/stream.h>
763302Sagiri #include <sys/ib/clients/rds/rdsib_cm.h>
773302Sagiri #include <sys/ib/clients/rds/rdsib_ib.h>
783302Sagiri #include <sys/ib/clients/rds/rdsib_buf.h>
793302Sagiri #include <sys/ib/clients/rds/rdsib_ep.h>
803302Sagiri #include <sys/ib/clients/rds/rds_kstat.h>
813302Sagiri #include <sys/zone.h>
823302Sagiri 
833302Sagiri #define	RDS_POLL_CQ_IN_2TICKS	1
843302Sagiri 
853302Sagiri /*
863302Sagiri  * This File contains the endpoint related calls
873302Sagiri  */
883302Sagiri 
893302Sagiri extern boolean_t rds_islocal(ipaddr_t addr);
903302Sagiri extern uint_t rds_wc_signal;
913302Sagiri 
924467Sagiri #define	RDS_LOOPBACK	0
934467Sagiri #define	RDS_LOCAL	1
944467Sagiri #define	RDS_REMOTE	2
954467Sagiri 
964703Shiremath #define	IBT_IPADDR	1
974703Shiremath 
983302Sagiri static uint8_t
rds_is_port_marked(rds_session_t * sp,in_port_t port,uint_t qualifier)994467Sagiri rds_is_port_marked(rds_session_t *sp, in_port_t port, uint_t qualifier)
1003302Sagiri {
1013302Sagiri 	uint8_t	ret;
1023302Sagiri 
1034467Sagiri 	switch (qualifier) {
1044467Sagiri 	case RDS_LOOPBACK: /* loopback */
1054467Sagiri 		rw_enter(&rds_loopback_portmap_lock, RW_READER);
1064467Sagiri 		ret = (rds_loopback_portmap[port/8] & (1 << (port % 8)));
1074467Sagiri 		rw_exit(&rds_loopback_portmap_lock);
1084467Sagiri 		break;
1094467Sagiri 
1104467Sagiri 	case RDS_LOCAL: /* Session local */
1114467Sagiri 		ASSERT(sp != NULL);
1124467Sagiri 		rw_enter(&sp->session_local_portmap_lock, RW_READER);
1134467Sagiri 		ret = (sp->session_local_portmap[port/8] & (1 << (port % 8)));
1144467Sagiri 		rw_exit(&sp->session_local_portmap_lock);
1154467Sagiri 		break;
1164467Sagiri 
1174467Sagiri 	case RDS_REMOTE: /* Session remote */
1184467Sagiri 		ASSERT(sp != NULL);
1194467Sagiri 		rw_enter(&sp->session_remote_portmap_lock, RW_READER);
1204467Sagiri 		ret = (sp->session_remote_portmap[port/8] & (1 << (port % 8)));
1214467Sagiri 		rw_exit(&sp->session_remote_portmap_lock);
1224467Sagiri 		break;
1233302Sagiri 	}
1243302Sagiri 
1253302Sagiri 	return (ret);
1263302Sagiri }
1273302Sagiri 
1283302Sagiri static uint8_t
rds_check_n_mark_port(rds_session_t * sp,in_port_t port,uint_t qualifier)1294467Sagiri rds_check_n_mark_port(rds_session_t *sp, in_port_t port, uint_t qualifier)
1303302Sagiri {
1313302Sagiri 	uint8_t	ret;
1323302Sagiri 
1334467Sagiri 	switch (qualifier) {
1344467Sagiri 	case RDS_LOOPBACK: /* loopback */
1354467Sagiri 		rw_enter(&rds_loopback_portmap_lock, RW_WRITER);
1364467Sagiri 		ret = (rds_loopback_portmap[port/8] & (1 << (port % 8)));
1374467Sagiri 		if (!ret) {
1384467Sagiri 			/* port is not marked, mark it */
1394467Sagiri 			rds_loopback_portmap[port/8] =
1404467Sagiri 			    rds_loopback_portmap[port/8] | (1 << (port % 8));
1414467Sagiri 		}
1424467Sagiri 		rw_exit(&rds_loopback_portmap_lock);
1434467Sagiri 		break;
1444467Sagiri 
1454467Sagiri 	case RDS_LOCAL: /* Session local */
1464467Sagiri 		ASSERT(sp != NULL);
1474467Sagiri 		rw_enter(&sp->session_local_portmap_lock, RW_WRITER);
1484467Sagiri 		ret = (sp->session_local_portmap[port/8] & (1 << (port % 8)));
1493302Sagiri 		if (!ret) {
1503302Sagiri 			/* port is not marked, mark it */
1514467Sagiri 			sp->session_local_portmap[port/8] =
1524467Sagiri 			    sp->session_local_portmap[port/8] |
1534467Sagiri 			    (1 << (port % 8));
1543302Sagiri 		}
1554467Sagiri 		rw_exit(&sp->session_local_portmap_lock);
1564467Sagiri 		break;
1574467Sagiri 
1584467Sagiri 	case RDS_REMOTE: /* Session remote */
1594467Sagiri 		ASSERT(sp != NULL);
1604467Sagiri 		rw_enter(&sp->session_remote_portmap_lock, RW_WRITER);
1614467Sagiri 		ret = (sp->session_remote_portmap[port/8] & (1 << (port % 8)));
1623302Sagiri 		if (!ret) {
1633302Sagiri 			/* port is not marked, mark it */
1644467Sagiri 			sp->session_remote_portmap[port/8] =
1654467Sagiri 			    sp->session_remote_portmap[port/8] |
1664467Sagiri 			    (1 << (port % 8));
1673302Sagiri 		}
1684467Sagiri 		rw_exit(&sp->session_remote_portmap_lock);
1694467Sagiri 		break;
1703302Sagiri 	}
1713302Sagiri 
1723302Sagiri 	return (ret);
1733302Sagiri }
1743302Sagiri 
1753302Sagiri static uint8_t
rds_check_n_unmark_port(rds_session_t * sp,in_port_t port,uint_t qualifier)1764467Sagiri rds_check_n_unmark_port(rds_session_t *sp, in_port_t port, uint_t qualifier)
1773302Sagiri {
1783302Sagiri 	uint8_t	ret;
1793302Sagiri 
1804467Sagiri 	switch (qualifier) {
1814467Sagiri 	case RDS_LOOPBACK: /* loopback */
1824467Sagiri 		rw_enter(&rds_loopback_portmap_lock, RW_WRITER);
1834467Sagiri 		ret = (rds_loopback_portmap[port/8] & (1 << (port % 8)));
1844467Sagiri 		if (ret) {
1854467Sagiri 			/* port is marked, unmark it */
1864467Sagiri 			rds_loopback_portmap[port/8] =
1874467Sagiri 			    rds_loopback_portmap[port/8] & ~(1 << (port % 8));
1884467Sagiri 		}
1894467Sagiri 		rw_exit(&rds_loopback_portmap_lock);
1904467Sagiri 		break;
1914467Sagiri 
1924467Sagiri 	case RDS_LOCAL: /* Session local */
1934467Sagiri 		ASSERT(sp != NULL);
1944467Sagiri 		rw_enter(&sp->session_local_portmap_lock, RW_WRITER);
1954467Sagiri 		ret = (sp->session_local_portmap[port/8] & (1 << (port % 8)));
1963302Sagiri 		if (ret) {
1973302Sagiri 			/* port is marked, unmark it */
1984467Sagiri 			sp->session_local_portmap[port/8] =
1994467Sagiri 			    sp->session_local_portmap[port/8] &
2004467Sagiri 			    ~(1 << (port % 8));
2013302Sagiri 		}
2024467Sagiri 		rw_exit(&sp->session_local_portmap_lock);
2034467Sagiri 		break;
2044467Sagiri 
2054467Sagiri 	case RDS_REMOTE: /* Session remote */
2064467Sagiri 		ASSERT(sp != NULL);
2074467Sagiri 		rw_enter(&sp->session_remote_portmap_lock, RW_WRITER);
2084467Sagiri 		ret = (sp->session_remote_portmap[port/8] & (1 << (port % 8)));
2093302Sagiri 		if (ret) {
2103302Sagiri 			/* port is marked, unmark it */
2114467Sagiri 			sp->session_remote_portmap[port/8] =
2124467Sagiri 			    sp->session_remote_portmap[port/8] &
2134467Sagiri 			    ~(1 << (port % 8));
2143302Sagiri 		}
2154467Sagiri 		rw_exit(&sp->session_remote_portmap_lock);
2164467Sagiri 		break;
2173302Sagiri 	}
2183302Sagiri 
2193302Sagiri 	return (ret);
2203302Sagiri }
2213302Sagiri 
2223302Sagiri static void
rds_mark_all_ports(rds_session_t * sp,uint_t qualifier)2234467Sagiri rds_mark_all_ports(rds_session_t *sp, uint_t qualifier)
2243302Sagiri {
2254467Sagiri 	switch (qualifier) {
2264467Sagiri 	case RDS_LOOPBACK: /* loopback */
2274467Sagiri 		rw_enter(&rds_loopback_portmap_lock, RW_WRITER);
2284467Sagiri 		(void) memset(rds_loopback_portmap, 0xFF, RDS_PORT_MAP_SIZE);
2294467Sagiri 		rw_exit(&rds_loopback_portmap_lock);
2304467Sagiri 		break;
2314467Sagiri 
2324467Sagiri 	case RDS_LOCAL: /* Session local */
2334467Sagiri 		ASSERT(sp != NULL);
2344467Sagiri 		rw_enter(&sp->session_local_portmap_lock, RW_WRITER);
2354467Sagiri 		(void) memset(sp->session_local_portmap, 0xFF,
2364467Sagiri 		    RDS_PORT_MAP_SIZE);
2374467Sagiri 		rw_exit(&sp->session_local_portmap_lock);
2384467Sagiri 		break;
2394467Sagiri 
2404467Sagiri 	case RDS_REMOTE: /* Session remote */
2414467Sagiri 		ASSERT(sp != NULL);
2424467Sagiri 		rw_enter(&sp->session_remote_portmap_lock, RW_WRITER);
2434467Sagiri 		(void) memset(sp->session_remote_portmap, 0xFF,
2444467Sagiri 		    RDS_PORT_MAP_SIZE);
2454467Sagiri 		rw_exit(&sp->session_remote_portmap_lock);
2464467Sagiri 		break;
2473302Sagiri 	}
2483302Sagiri }
2493302Sagiri 
2503302Sagiri static void
rds_unmark_all_ports(rds_session_t * sp,uint_t qualifier)2514467Sagiri rds_unmark_all_ports(rds_session_t *sp, uint_t qualifier)
2523302Sagiri {
2534467Sagiri 	switch (qualifier) {
2544467Sagiri 	case RDS_LOOPBACK: /* loopback */
2554467Sagiri 		rw_enter(&rds_loopback_portmap_lock, RW_WRITER);
2564467Sagiri 		bzero(rds_loopback_portmap, RDS_PORT_MAP_SIZE);
2574467Sagiri 		rw_exit(&rds_loopback_portmap_lock);
2584467Sagiri 		break;
2594467Sagiri 
2604467Sagiri 	case RDS_LOCAL: /* Session local */
2614467Sagiri 		ASSERT(sp != NULL);
2624467Sagiri 		rw_enter(&sp->session_local_portmap_lock, RW_WRITER);
2634467Sagiri 		bzero(sp->session_local_portmap, RDS_PORT_MAP_SIZE);
2644467Sagiri 		rw_exit(&sp->session_local_portmap_lock);
2654467Sagiri 		break;
2664467Sagiri 
2674467Sagiri 	case RDS_REMOTE: /* Session remote */
2684467Sagiri 		ASSERT(sp != NULL);
2694467Sagiri 		rw_enter(&sp->session_remote_portmap_lock, RW_WRITER);
2704467Sagiri 		bzero(sp->session_remote_portmap, RDS_PORT_MAP_SIZE);
2714467Sagiri 		rw_exit(&sp->session_remote_portmap_lock);
2724467Sagiri 		break;
2733302Sagiri 	}
2743302Sagiri }
2753302Sagiri 
2766702Sagiri static boolean_t
rds_add_session(rds_session_t * sp,boolean_t locked)2773302Sagiri rds_add_session(rds_session_t *sp, boolean_t locked)
2783302Sagiri {
2796702Sagiri 	boolean_t retval = B_TRUE;
2806702Sagiri 
2813302Sagiri 	RDS_DPRINTF2("rds_add_session", "Enter: SP(%p)", sp);
2823302Sagiri 
2833302Sagiri 	if (!locked) {
2843302Sagiri 		rw_enter(&rdsib_statep->rds_sessionlock, RW_WRITER);
2853302Sagiri 	}
2863302Sagiri 
2876702Sagiri 	/* Don't allow more sessions than configured in rdsib.conf */
2886702Sagiri 	if (rdsib_statep->rds_nsessions >= (MaxNodes - 1)) {
2896702Sagiri 		RDS_DPRINTF1("rds_add_session", "Max session limit reached");
2906702Sagiri 		retval = B_FALSE;
2916702Sagiri 	} else {
2926702Sagiri 		sp->session_nextp = rdsib_statep->rds_sessionlistp;
2936702Sagiri 		rdsib_statep->rds_sessionlistp = sp;
2946702Sagiri 		rdsib_statep->rds_nsessions++;
2956702Sagiri 		RDS_INCR_SESS();
2966702Sagiri 	}
2973302Sagiri 
2983302Sagiri 	if (!locked) {
2993302Sagiri 		rw_exit(&rdsib_statep->rds_sessionlock);
3003302Sagiri 	}
3013302Sagiri 
3023302Sagiri 	RDS_DPRINTF2("rds_add_session", "Return: SP(%p)", sp);
3036702Sagiri 
3046702Sagiri 	return (retval);
3053302Sagiri }
3063302Sagiri 
3073302Sagiri /* Session lookup based on destination IP or destination node guid */
3083302Sagiri rds_session_t *
rds_session_lkup(rds_state_t * statep,ipaddr_t remoteip,ib_guid_t node_guid)3093302Sagiri rds_session_lkup(rds_state_t *statep, ipaddr_t remoteip, ib_guid_t node_guid)
3103302Sagiri {
3113302Sagiri 	rds_session_t	*sp;
3123302Sagiri 
3133302Sagiri 	RDS_DPRINTF4("rds_session_lkup", "Enter: 0x%p 0x%x 0x%llx", statep,
3143302Sagiri 	    remoteip, node_guid);
3153302Sagiri 
3163302Sagiri 	/* A read/write lock is expected, will panic if none of them are held */
3173302Sagiri 	ASSERT(rw_lock_held(&statep->rds_sessionlock));
3183302Sagiri 	sp = statep->rds_sessionlistp;
3193302Sagiri 	while (sp) {
3204467Sagiri 		if ((sp->session_remip == remoteip) || ((node_guid != 0) &&
3214467Sagiri 		    (sp->session_rgid.gid_guid == node_guid))) {
3223302Sagiri 			break;
3233302Sagiri 		}
3243302Sagiri 
3253302Sagiri 		sp = sp->session_nextp;
3263302Sagiri 	}
3273302Sagiri 
3283302Sagiri 	RDS_DPRINTF4("rds_session_lkup", "Return: SP(%p)", sp);
3293302Sagiri 
3303302Sagiri 	return (sp);
3313302Sagiri }
3323302Sagiri 
3335342Sagiri boolean_t
rds_session_lkup_by_sp(rds_session_t * sp)3345342Sagiri rds_session_lkup_by_sp(rds_session_t *sp)
3355342Sagiri {
3365342Sagiri 	rds_session_t *sessionp;
3375342Sagiri 
3385342Sagiri 	RDS_DPRINTF4("rds_session_lkup_by_sp", "Enter: 0x%p", sp);
3395342Sagiri 
3405342Sagiri 	rw_enter(&rdsib_statep->rds_sessionlock, RW_READER);
3415342Sagiri 	sessionp = rdsib_statep->rds_sessionlistp;
3425342Sagiri 	while (sessionp) {
3435342Sagiri 		if (sessionp == sp) {
3445342Sagiri 			rw_exit(&rdsib_statep->rds_sessionlock);
3455342Sagiri 			return (B_TRUE);
3465342Sagiri 		}
3475342Sagiri 
3485342Sagiri 		sessionp = sessionp->session_nextp;
3495342Sagiri 	}
3505342Sagiri 	rw_exit(&rdsib_statep->rds_sessionlock);
3515342Sagiri 
3525342Sagiri 	return (B_FALSE);
3535342Sagiri }
3545342Sagiri 
3553302Sagiri static void
rds_ep_fini(rds_ep_t * ep)3563302Sagiri rds_ep_fini(rds_ep_t *ep)
3573302Sagiri {
3583302Sagiri 	RDS_DPRINTF3("rds_ep_fini", "Enter: EP(%p) type: %d", ep, ep->ep_type);
3593302Sagiri 
3603302Sagiri 	/* free send pool */
3613302Sagiri 	rds_free_send_pool(ep);
3623302Sagiri 
3633302Sagiri 	/* free recv pool */
3643302Sagiri 	rds_free_recv_pool(ep);
3653302Sagiri 
3664467Sagiri 	mutex_enter(&ep->ep_lock);
3674467Sagiri 	ep->ep_hca_guid = 0;
3684467Sagiri 	mutex_exit(&ep->ep_lock);
3694467Sagiri 
3703302Sagiri 	RDS_DPRINTF3("rds_ep_fini", "Return EP(%p)", ep);
3713302Sagiri }
3723302Sagiri 
3733302Sagiri /* Assumes SP write lock is held */
3743302Sagiri int
rds_ep_init(rds_ep_t * ep,ib_guid_t hca_guid)3754467Sagiri rds_ep_init(rds_ep_t *ep, ib_guid_t hca_guid)
3763302Sagiri {
3773302Sagiri 	uint_t		ret;
3783302Sagiri 
3793302Sagiri 	RDS_DPRINTF3("rds_ep_init", "Enter: EP(%p) Type: %d", ep, ep->ep_type);
3803302Sagiri 
3813302Sagiri 	/* send pool */
3824467Sagiri 	ret = rds_init_send_pool(ep, hca_guid);
3833302Sagiri 	if (ret != 0) {
3843302Sagiri 		RDS_DPRINTF2(LABEL, "EP(%p): rds_init_send_pool failed: %d",
3853302Sagiri 		    ep, ret);
3863302Sagiri 		return (-1);
3873302Sagiri 	}
3883302Sagiri 
3893302Sagiri 	/* recv pool */
3903302Sagiri 	ret = rds_init_recv_pool(ep);
3913302Sagiri 	if (ret != 0) {
3923302Sagiri 		RDS_DPRINTF2(LABEL, "EP(%p): rds_init_recv_pool failed: %d",
3933302Sagiri 		    ep, ret);
3943302Sagiri 		rds_free_send_pool(ep);
3953302Sagiri 		return (-1);
3963302Sagiri 	}
3973302Sagiri 
3983302Sagiri 	/* reset the ep state */
3993302Sagiri 	mutex_enter(&ep->ep_lock);
4003302Sagiri 	ep->ep_state = RDS_EP_STATE_UNCONNECTED;
4014467Sagiri 	ep->ep_hca_guid = hca_guid;
4023302Sagiri 	ep->ep_lbufid = NULL;
4033302Sagiri 	ep->ep_rbufid = NULL;
4043302Sagiri 	ep->ep_segfbp = NULL;
4053302Sagiri 	ep->ep_seglbp = NULL;
4063302Sagiri 
4073302Sagiri 	/* Initialize the WR to send acknowledgements */
4083302Sagiri 	ep->ep_ackwr.wr_id = RDS_RDMAW_WRID;
4093302Sagiri 	ep->ep_ackwr.wr_flags = IBT_WR_SEND_SOLICIT;
4103302Sagiri 	ep->ep_ackwr.wr_trans = IBT_RC_SRV;
4113302Sagiri 	ep->ep_ackwr.wr_opcode = IBT_WRC_RDMAW;
4123302Sagiri 	ep->ep_ackwr.wr_nds = 1;
4133302Sagiri 	ep->ep_ackwr.wr_sgl = &ep->ep_ackds;
4143302Sagiri 	ep->ep_ackwr.wr.rc.rcwr.rdma.rdma_raddr = NULL;
4153302Sagiri 	ep->ep_ackwr.wr.rc.rcwr.rdma.rdma_rkey = 0;
4163302Sagiri 	mutex_exit(&ep->ep_lock);
4173302Sagiri 
4183302Sagiri 	RDS_DPRINTF3("rds_ep_init", "Return: EP(%p) type: %d", ep, ep->ep_type);
4193302Sagiri 
4203302Sagiri 	return (0);
4213302Sagiri }
4223302Sagiri 
4234154Sagiri static int
rds_ep_reinit(rds_ep_t * ep,ib_guid_t hca_guid)4244154Sagiri rds_ep_reinit(rds_ep_t *ep, ib_guid_t hca_guid)
4254154Sagiri {
4264154Sagiri 	int	ret;
4274154Sagiri 
4284154Sagiri 	RDS_DPRINTF3("rds_ep_reinit", "Enter: EP(%p) Type: %d",
4294154Sagiri 	    ep, ep->ep_type);
4304154Sagiri 
4314154Sagiri 	/* Re-initialize send pool */
4324154Sagiri 	ret = rds_reinit_send_pool(ep, hca_guid);
4334154Sagiri 	if (ret != 0) {
4344154Sagiri 		RDS_DPRINTF2("rds_ep_reinit",
4354154Sagiri 		    "EP(%p): rds_reinit_send_pool failed: %d", ep, ret);
4364154Sagiri 		return (-1);
4374154Sagiri 	}
4384154Sagiri 
4394154Sagiri 	/* free all the receive buffers in the pool */
4404154Sagiri 	rds_free_recv_pool(ep);
4414154Sagiri 
4424154Sagiri 	RDS_DPRINTF3("rds_ep_reinit", "Return: EP(%p) Type: %d",
4434154Sagiri 	    ep, ep->ep_type);
4444154Sagiri 
4454154Sagiri 	return (0);
4464154Sagiri }
4474154Sagiri 
4483302Sagiri void
rds_session_fini(rds_session_t * sp)4493302Sagiri rds_session_fini(rds_session_t *sp)
4503302Sagiri {
4513302Sagiri 	RDS_DPRINTF2("rds_session_fini", "Enter: SP(0x%p)", sp);
4523302Sagiri 
4533302Sagiri 	rds_ep_fini(&sp->session_dataep);
4543302Sagiri 	rds_ep_fini(&sp->session_ctrlep);
4553302Sagiri 
4563302Sagiri 	RDS_DPRINTF2("rds_session_fini", "Return: SP(0x%p)", sp);
4573302Sagiri }
4583302Sagiri 
4593302Sagiri /*
4603302Sagiri  * Allocate and initialize the resources needed for the control and
4613302Sagiri  * data channels
4623302Sagiri  */
4633302Sagiri int
rds_session_init(rds_session_t * sp)4643302Sagiri rds_session_init(rds_session_t *sp)
4653302Sagiri {
4663302Sagiri 	int		ret;
4674467Sagiri 	rds_hca_t	*hcap;
4684467Sagiri 	ib_guid_t	hca_guid;
4693302Sagiri 
4703302Sagiri 	RDS_DPRINTF2("rds_session_init", "Enter: SP(0x%p)", sp);
4713302Sagiri 
4723302Sagiri 	/* CALLED WITH SESSION WRITE LOCK */
4733302Sagiri 
4744467Sagiri 	hcap = rds_gid_to_hcap(rdsib_statep, sp->session_lgid);
4754467Sagiri 	if (hcap == NULL) {
4766702Sagiri 		RDS_DPRINTF2("rds_session_init", "SGID is on an uninitialized "
4774467Sagiri 		    "HCA: %llx", sp->session_lgid.gid_guid);
4784467Sagiri 		return (-1);
4794467Sagiri 	}
4804467Sagiri 
4814467Sagiri 	hca_guid = hcap->hca_guid;
4828082SRamaswamy.Tummala@Sun.COM 	sp->session_hca_guid = hca_guid;
4834467Sagiri 
4843302Sagiri 	/* allocate and initialize the ctrl channel */
4854467Sagiri 	ret = rds_ep_init(&sp->session_ctrlep, hca_guid);
4863302Sagiri 	if (ret != 0) {
4873302Sagiri 		RDS_DPRINTF2(LABEL, "SP(%p): Ctrl EP(%p) initialization "
4883302Sagiri 		    "failed", sp, &sp->session_ctrlep);
4893302Sagiri 		return (-1);
4903302Sagiri 	}
4913302Sagiri 
4923302Sagiri 	RDS_DPRINTF2(LABEL, "SP(%p) Control EP(%p)", sp, &sp->session_ctrlep);
4933302Sagiri 
4943302Sagiri 	/* allocate and initialize the data channel */
4954467Sagiri 	ret = rds_ep_init(&sp->session_dataep, hca_guid);
4963302Sagiri 	if (ret != 0) {
4973302Sagiri 		RDS_DPRINTF2(LABEL, "SP(%p): Data EP(%p) initialization "
4983302Sagiri 		    "failed", sp, &sp->session_dataep);
4993302Sagiri 		rds_ep_fini(&sp->session_ctrlep);
5003302Sagiri 		return (-1);
5013302Sagiri 	}
5023302Sagiri 
5034467Sagiri 	/* Clear the portmaps */
5044467Sagiri 	rds_unmark_all_ports(sp, RDS_LOCAL);
5054467Sagiri 	rds_unmark_all_ports(sp, RDS_REMOTE);
5064467Sagiri 
5073302Sagiri 	RDS_DPRINTF2(LABEL, "SP(%p) Data EP(%p)", sp, &sp->session_dataep);
5083302Sagiri 
5093302Sagiri 	RDS_DPRINTF2("rds_session_init", "Return");
5103302Sagiri 
5113302Sagiri 	return (0);
5123302Sagiri }
5133302Sagiri 
5144154Sagiri /*
5154154Sagiri  * This should be called before moving a session from ERROR state to
5164154Sagiri  * INIT state. This will update the HCA keys incase the session has moved from
5174154Sagiri  * one HCA to another.
5184154Sagiri  */
5194154Sagiri int
rds_session_reinit(rds_session_t * sp,ib_gid_t lgid)5204154Sagiri rds_session_reinit(rds_session_t *sp, ib_gid_t lgid)
5214154Sagiri {
5224154Sagiri 	rds_hca_t	*hcap, *hcap1;
5234154Sagiri 	int		ret;
5244154Sagiri 
525*10489SGiri.Adari@Sun.COM 	RDS_DPRINTF2("rds_session_reinit", "Enter: SP(0x%p) - state: %d",
526*10489SGiri.Adari@Sun.COM 	    sp, sp->session_state);
5274154Sagiri 
5284154Sagiri 	/* CALLED WITH SESSION WRITE LOCK */
5294154Sagiri 
5306438Sagiri 	/* Clear the portmaps */
5316438Sagiri 	rds_unmark_all_ports(sp, RDS_LOCAL);
5326438Sagiri 	rds_unmark_all_ports(sp, RDS_REMOTE);
5336438Sagiri 
534*10489SGiri.Adari@Sun.COM 	/* This should not happen but just a safe guard */
535*10489SGiri.Adari@Sun.COM 	if (sp->session_dataep.ep_ack_addr == NULL) {
536*10489SGiri.Adari@Sun.COM 		RDS_DPRINTF2("rds_session_reinit",
537*10489SGiri.Adari@Sun.COM 		    "ERROR: Unexpected: SP(0x%p) - state: %d",
538*10489SGiri.Adari@Sun.COM 		    sp, sp->session_state);
539*10489SGiri.Adari@Sun.COM 		return (-1);
540*10489SGiri.Adari@Sun.COM 	}
541*10489SGiri.Adari@Sun.COM 
5426438Sagiri 	/* make the last buffer as the acknowledged */
5436438Sagiri 	*(uintptr_t *)sp->session_dataep.ep_ack_addr =
5446438Sagiri 	    (uintptr_t)sp->session_dataep.ep_sndpool.pool_tailp;
5456438Sagiri 
5464154Sagiri 	hcap = rds_gid_to_hcap(rdsib_statep, lgid);
5474154Sagiri 	if (hcap == NULL) {
5486702Sagiri 		RDS_DPRINTF2("rds_session_reinit", "SGID is on an "
5494154Sagiri 		    "uninitialized HCA: %llx", lgid.gid_guid);
5504154Sagiri 		return (-1);
5514154Sagiri 	}
5524154Sagiri 
5534154Sagiri 	hcap1 = rds_gid_to_hcap(rdsib_statep, sp->session_lgid);
5544154Sagiri 	if (hcap1 == NULL) {
5556702Sagiri 		RDS_DPRINTF2("rds_session_reinit", "Seems like HCA %llx "
5564154Sagiri 		    "is unplugged", sp->session_lgid.gid_guid);
5574154Sagiri 	} else if (hcap->hca_guid == hcap1->hca_guid) {
5584154Sagiri 		/*
5594154Sagiri 		 * No action is needed as the session did not move across
5604154Sagiri 		 * HCAs
5614154Sagiri 		 */
5624154Sagiri 		RDS_DPRINTF2("rds_session_reinit", "Failover on the same HCA");
5634154Sagiri 		return (0);
5644154Sagiri 	}
5654154Sagiri 
5664154Sagiri 	RDS_DPRINTF2("rds_session_reinit", "Failover across HCAs");
5674154Sagiri 
5688082SRamaswamy.Tummala@Sun.COM 	sp->session_hca_guid = hcap->hca_guid;
5698082SRamaswamy.Tummala@Sun.COM 
5704154Sagiri 	/* re-initialize the control channel */
5714154Sagiri 	ret = rds_ep_reinit(&sp->session_ctrlep, hcap->hca_guid);
5724154Sagiri 	if (ret != 0) {
5734154Sagiri 		RDS_DPRINTF2("rds_session_reinit",
5744154Sagiri 		    "SP(%p): Ctrl EP(%p) re-initialization failed",
5754154Sagiri 		    sp, &sp->session_ctrlep);
5764154Sagiri 		return (-1);
5774154Sagiri 	}
5784154Sagiri 
5794154Sagiri 	RDS_DPRINTF2("rds_session_reinit", "SP(%p) Control EP(%p)",
5804154Sagiri 	    sp, &sp->session_ctrlep);
5814154Sagiri 
5824154Sagiri 	/* re-initialize the data channel */
5834154Sagiri 	ret = rds_ep_reinit(&sp->session_dataep, hcap->hca_guid);
5844154Sagiri 	if (ret != 0) {
5854154Sagiri 		RDS_DPRINTF2("rds_session_reinit",
5864154Sagiri 		    "SP(%p): Data EP(%p) re-initialization failed",
5874154Sagiri 		    sp, &sp->session_dataep);
5884154Sagiri 		return (-1);
5894154Sagiri 	}
5904154Sagiri 
5914154Sagiri 	RDS_DPRINTF2("rds_session_reinit", "SP(%p) Data EP(%p)",
5924154Sagiri 	    sp, &sp->session_dataep);
5934154Sagiri 
5944154Sagiri 	sp->session_lgid = lgid;
5954154Sagiri 
5964154Sagiri 	RDS_DPRINTF2("rds_session_reinit", "Return: SP(0x%p)", sp);
5974154Sagiri 
5984154Sagiri 	return (0);
5994154Sagiri }
6004154Sagiri 
6013302Sagiri static int
rds_session_connect(rds_session_t * sp)6023302Sagiri rds_session_connect(rds_session_t *sp)
6033302Sagiri {
6043302Sagiri 	ibt_channel_hdl_t	ctrlchan, datachan;
6053302Sagiri 	rds_ep_t		*ep;
6063302Sagiri 	int			ret;
6073302Sagiri 
6083302Sagiri 	RDS_DPRINTF2("rds_session_connect", "Enter SP(%p)", sp);
6093302Sagiri 
6104703Shiremath 	sp->session_pinfo.pi_sid = rdsib_statep->rds_service_id;
6113302Sagiri 
6123302Sagiri 	/* Override the packet life time based on the conf file */
6133302Sagiri 	if (IBPktLifeTime != 0) {
6144703Shiremath 		sp->session_pinfo.pi_prim_cep_path.cep_cm_opaque1 =
6154703Shiremath 		    IBPktLifeTime;
6163302Sagiri 	}
6173302Sagiri 
6183302Sagiri 	/* Session type may change if we run into peer-to-peer case. */
6193302Sagiri 	rw_enter(&sp->session_lock, RW_READER);
6203302Sagiri 	if (sp->session_type == RDS_SESSION_PASSIVE) {
6213302Sagiri 		RDS_DPRINTF2("rds_session_connect", "SP(%p) is no longer the "
6223302Sagiri 		    "active end", sp);
6233302Sagiri 		rw_exit(&sp->session_lock);
6243302Sagiri 		return (0); /* return success */
6253302Sagiri 	}
6263302Sagiri 	rw_exit(&sp->session_lock);
6273302Sagiri 
6283302Sagiri 	/* connect the data ep first */
6293302Sagiri 	ep = &sp->session_dataep;
6303302Sagiri 	mutex_enter(&ep->ep_lock);
6313302Sagiri 	if (ep->ep_state == RDS_EP_STATE_UNCONNECTED) {
6323302Sagiri 		ep->ep_state = RDS_EP_STATE_ACTIVE_PENDING;
6333302Sagiri 		mutex_exit(&ep->ep_lock);
6344703Shiremath 		ret = rds_open_rc_channel(ep, &sp->session_pinfo, IBT_BLOCKING,
6354703Shiremath 		    &datachan);
6363302Sagiri 		if (ret != IBT_SUCCESS) {
6373302Sagiri 			RDS_DPRINTF2(LABEL, "EP(%p): rds_open_rc_channel "
6384154Sagiri 			    "failed: %d", ep, ret);
6393302Sagiri 			return (-1);
6403302Sagiri 		}
6413302Sagiri 		sp->session_dataep.ep_chanhdl = datachan;
6423302Sagiri 	} else {
6433302Sagiri 		RDS_DPRINTF2(LABEL, "SP(%p) Data EP(%p) is in "
6443302Sagiri 		    "unexpected state: %d", sp, ep, ep->ep_state);
6453302Sagiri 		mutex_exit(&ep->ep_lock);
6463302Sagiri 		return (-1);
6473302Sagiri 	}
6483302Sagiri 
6493302Sagiri 	RDS_DPRINTF3(LABEL, "SP(%p) EP(%p): Data channel is connected",
6503302Sagiri 	    sp, ep);
6513302Sagiri 
6523302Sagiri 	ep = &sp->session_ctrlep;
6533302Sagiri 	mutex_enter(&ep->ep_lock);
6543302Sagiri 	if (ep->ep_state == RDS_EP_STATE_UNCONNECTED) {
6553302Sagiri 		ep->ep_state = RDS_EP_STATE_ACTIVE_PENDING;
6563302Sagiri 		mutex_exit(&ep->ep_lock);
6574703Shiremath 		ret = rds_open_rc_channel(ep, &sp->session_pinfo, IBT_BLOCKING,
6584703Shiremath 		    &ctrlchan);
6593302Sagiri 		if (ret != IBT_SUCCESS) {
6603302Sagiri 			RDS_DPRINTF2(LABEL, "EP(%p): rds_open_rc_channel "
6613302Sagiri 			    "failed: %d", ep, ret);
6623302Sagiri 			return (-1);
6633302Sagiri 		}
6643302Sagiri 		sp->session_ctrlep.ep_chanhdl = ctrlchan;
6653302Sagiri 	} else {
6663302Sagiri 		RDS_DPRINTF2(LABEL, "SP(%p) Control EP(%p) is in "
6673302Sagiri 		    "unexpected state: %d", sp, ep, ep->ep_state);
6683302Sagiri 		mutex_exit(&ep->ep_lock);
6693302Sagiri 		return (-1);
6703302Sagiri 	}
6713302Sagiri 
6724154Sagiri 	RDS_DPRINTF2(LABEL, "Session (%p) 0x%x <--> 0x%x is CONNECTED",
6734154Sagiri 	    sp, sp->session_myip, sp->session_remip);
6744154Sagiri 
6753302Sagiri 	RDS_DPRINTF2("rds_session_connect", "Return SP(%p)", sp);
6763302Sagiri 
6773302Sagiri 	return (0);
6783302Sagiri }
6793302Sagiri 
6803302Sagiri /*
6813302Sagiri  * Can be called with or without session_lock.
6823302Sagiri  */
6833302Sagiri void
rds_session_close(rds_session_t * sp,ibt_execution_mode_t mode,uint_t wait)6843302Sagiri rds_session_close(rds_session_t *sp, ibt_execution_mode_t mode, uint_t wait)
6853302Sagiri {
6863302Sagiri 	rds_ep_t		*ep;
6873302Sagiri 
6883302Sagiri 	RDS_DPRINTF2("rds_session_close", "SP(%p) State: %d", sp,
6893302Sagiri 	    sp->session_state);
6903302Sagiri 
6913302Sagiri 	ep = &sp->session_dataep;
6923302Sagiri 	RDS_DPRINTF3(LABEL, "EP(%p) State: %d", ep, ep->ep_state);
6933302Sagiri 
6943302Sagiri 	/* wait until the SQ is empty before closing */
6956438Sagiri 	if (wait != 0) {
6966438Sagiri 		(void) rds_is_sendq_empty(ep, wait);
6976438Sagiri 	}
6983302Sagiri 
6993302Sagiri 	mutex_enter(&ep->ep_lock);
7003302Sagiri 	while (ep->ep_state == RDS_EP_STATE_CLOSING) {
7013302Sagiri 		mutex_exit(&ep->ep_lock);
7023302Sagiri 		delay(drv_usectohz(300000));
7033302Sagiri 		mutex_enter(&ep->ep_lock);
7043302Sagiri 	}
7053302Sagiri 
7063302Sagiri 	if (ep->ep_state == RDS_EP_STATE_CONNECTED) {
7073302Sagiri 		ep->ep_state = RDS_EP_STATE_CLOSING;
7083302Sagiri 		mutex_exit(&ep->ep_lock);
7093302Sagiri 		(void) rds_close_rc_channel(ep->ep_chanhdl, mode);
7106438Sagiri 		if (wait == 0) {
7116438Sagiri 			/* make sure all WCs are flushed before proceeding */
7126438Sagiri 			(void) rds_is_sendq_empty(ep, 1);
7136438Sagiri 		}
7143302Sagiri 		mutex_enter(&ep->ep_lock);
7153302Sagiri 	}
7163302Sagiri 	rds_ep_free_rc_channel(ep);
7173302Sagiri 	ep->ep_state = RDS_EP_STATE_UNCONNECTED;
7183302Sagiri 	ep->ep_segfbp = NULL;
7193302Sagiri 	ep->ep_seglbp = NULL;
7203302Sagiri 	mutex_exit(&ep->ep_lock);
7213302Sagiri 
7223302Sagiri 	ep = &sp->session_ctrlep;
7233302Sagiri 	RDS_DPRINTF3(LABEL, "EP(%p) State: %d", ep, ep->ep_state);
7243302Sagiri 
7253302Sagiri 	/* wait until the SQ is empty before closing */
7266438Sagiri 	if (wait != 0) {
7276438Sagiri 		(void) rds_is_sendq_empty(ep, wait);
7286438Sagiri 	}
7293302Sagiri 
7303302Sagiri 	mutex_enter(&ep->ep_lock);
7313302Sagiri 	while (ep->ep_state == RDS_EP_STATE_CLOSING) {
7323302Sagiri 		mutex_exit(&ep->ep_lock);
7333302Sagiri 		delay(drv_usectohz(300000));
7343302Sagiri 		mutex_enter(&ep->ep_lock);
7353302Sagiri 	}
7363302Sagiri 
7373302Sagiri 	if (ep->ep_state == RDS_EP_STATE_CONNECTED) {
7384703Shiremath 		ep->ep_state = RDS_EP_STATE_CLOSING;
7393302Sagiri 		mutex_exit(&ep->ep_lock);
7403302Sagiri 		(void) rds_close_rc_channel(ep->ep_chanhdl, mode);
7416438Sagiri 		if (wait == 0) {
7426438Sagiri 			/* make sure all WCs are flushed before proceeding */
7436438Sagiri 			(void) rds_is_sendq_empty(ep, 1);
7446438Sagiri 		}
7453302Sagiri 		mutex_enter(&ep->ep_lock);
7463302Sagiri 	}
7473302Sagiri 	rds_ep_free_rc_channel(ep);
7483302Sagiri 	ep->ep_state = RDS_EP_STATE_UNCONNECTED;
7493302Sagiri 	ep->ep_segfbp = NULL;
7503302Sagiri 	ep->ep_seglbp = NULL;
7513302Sagiri 	mutex_exit(&ep->ep_lock);
7523302Sagiri 
7533302Sagiri 	RDS_DPRINTF2("rds_session_close", "Return (%p)", sp);
7543302Sagiri }
7553302Sagiri 
7563302Sagiri /* Free the session */
7573302Sagiri static void
rds_destroy_session(rds_session_t * sp)7583302Sagiri rds_destroy_session(rds_session_t *sp)
7593302Sagiri {
7603302Sagiri 	rds_ep_t	*ep;
7613302Sagiri 	rds_bufpool_t	*pool;
7623302Sagiri 
7633302Sagiri 	ASSERT((sp->session_state == RDS_SESSION_STATE_CLOSED) ||
7643302Sagiri 	    (sp->session_state == RDS_SESSION_STATE_FAILED) ||
7653302Sagiri 	    (sp->session_state == RDS_SESSION_STATE_FINI) ||
7663302Sagiri 	    (sp->session_state == RDS_SESSION_STATE_PASSIVE_CLOSING));
7673302Sagiri 
7683302Sagiri 	rw_enter(&sp->session_lock, RW_READER);
7693302Sagiri 	RDS_DPRINTF2("rds_destroy_session", "SP(%p) State: %d", sp,
7703302Sagiri 	    sp->session_state);
7713302Sagiri 	while (!((sp->session_state == RDS_SESSION_STATE_CLOSED) ||
7723302Sagiri 	    (sp->session_state == RDS_SESSION_STATE_FAILED) ||
7733302Sagiri 	    (sp->session_state == RDS_SESSION_STATE_FINI))) {
7743302Sagiri 		rw_exit(&sp->session_lock);
7753302Sagiri 		delay(drv_usectohz(1000000));
7763302Sagiri 		rw_enter(&sp->session_lock, RW_READER);
7773302Sagiri 		RDS_DPRINTF2("rds_destroy_session", "SP(%p) State: %d WAITING "
7783302Sagiri 		    "ON SESSION", sp, sp->session_state);
7793302Sagiri 	}
7803302Sagiri 	rw_exit(&sp->session_lock);
7813302Sagiri 
7823302Sagiri 	/* data channel */
7833302Sagiri 	ep = &sp->session_dataep;
7843302Sagiri 
7853302Sagiri 	/* send pool locks */
7863302Sagiri 	pool = &ep->ep_sndpool;
7873302Sagiri 	cv_destroy(&pool->pool_cv);
7883302Sagiri 	mutex_destroy(&pool->pool_lock);
7893302Sagiri 
7903302Sagiri 	/* recv pool locks */
7913302Sagiri 	pool = &ep->ep_rcvpool;
7923302Sagiri 	cv_destroy(&pool->pool_cv);
7933302Sagiri 	mutex_destroy(&pool->pool_lock);
7943302Sagiri 	mutex_destroy(&ep->ep_recvqp.qp_lock);
7953302Sagiri 
7963302Sagiri 	/* control channel */
7973302Sagiri 	ep = &sp->session_ctrlep;
7983302Sagiri 
7993302Sagiri 	/* send pool locks */
8003302Sagiri 	pool = &ep->ep_sndpool;
8013302Sagiri 	cv_destroy(&pool->pool_cv);
8023302Sagiri 	mutex_destroy(&pool->pool_lock);
8033302Sagiri 
8043302Sagiri 	/* recv pool locks */
8053302Sagiri 	pool = &ep->ep_rcvpool;
8063302Sagiri 	cv_destroy(&pool->pool_cv);
8073302Sagiri 	mutex_destroy(&pool->pool_lock);
8083302Sagiri 	mutex_destroy(&ep->ep_recvqp.qp_lock);
8093302Sagiri 
8103302Sagiri 	/* session */
8113302Sagiri 	rw_destroy(&sp->session_lock);
8124467Sagiri 	rw_destroy(&sp->session_local_portmap_lock);
8134467Sagiri 	rw_destroy(&sp->session_remote_portmap_lock);
8143302Sagiri 
8153302Sagiri 	/* free the session */
8163302Sagiri 	kmem_free(sp, sizeof (rds_session_t));
8173302Sagiri 
8183302Sagiri 	RDS_DPRINTF2("rds_destroy_session", "SP(%p) Return", sp);
8193302Sagiri }
8203302Sagiri 
8213302Sagiri /* This is called on the taskq thread */
8228082SRamaswamy.Tummala@Sun.COM void
rds_failover_session(void * arg)8233302Sagiri rds_failover_session(void *arg)
8243302Sagiri {
8253302Sagiri 	rds_session_t	*sp = (rds_session_t *)arg;
8263302Sagiri 	ib_gid_t	lgid, rgid;
8273302Sagiri 	ipaddr_t	myip, remip;
8283302Sagiri 	int		ret, cnt = 0;
8298082SRamaswamy.Tummala@Sun.COM 	uint8_t		sp_state;
8303302Sagiri 
8313302Sagiri 	RDS_DPRINTF2("rds_failover_session", "Enter: (%p)", sp);
8323302Sagiri 
8335342Sagiri 	/* Make sure the session is still alive */
8345342Sagiri 	if (rds_session_lkup_by_sp(sp) == B_FALSE) {
8355342Sagiri 		RDS_DPRINTF2("rds_failover_session",
8365342Sagiri 		    "Return: SP(%p) not ALIVE", sp);
8375342Sagiri 		return;
8385342Sagiri 	}
8395342Sagiri 
8403302Sagiri 	RDS_INCR_FAILOVERS();
8413302Sagiri 
8423302Sagiri 	rw_enter(&sp->session_lock, RW_WRITER);
8433302Sagiri 	if (sp->session_type != RDS_SESSION_ACTIVE) {
8443302Sagiri 		/*
8453302Sagiri 		 * The remote side must have seen the error and initiated
8463302Sagiri 		 * a re-connect.
8473302Sagiri 		 */
8483302Sagiri 		RDS_DPRINTF2("rds_failover_session",
8493302Sagiri 		    "SP(%p) has become passive", sp);
8503302Sagiri 		rw_exit(&sp->session_lock);
8513302Sagiri 		return;
8523302Sagiri 	}
8534467Sagiri 	sp->session_failover = 1;
8548082SRamaswamy.Tummala@Sun.COM 	sp_state = sp->session_state;
8553302Sagiri 	rw_exit(&sp->session_lock);
8563302Sagiri 
8573302Sagiri 	/*
8583302Sagiri 	 * The session is in ERROR state but close both channels
8593302Sagiri 	 * for a clean start.
8603302Sagiri 	 */
8618082SRamaswamy.Tummala@Sun.COM 	if (sp_state == RDS_SESSION_STATE_ERROR) {
8628082SRamaswamy.Tummala@Sun.COM 		rds_session_close(sp, IBT_BLOCKING, 1);
8638082SRamaswamy.Tummala@Sun.COM 	}
8643302Sagiri 
8653302Sagiri 	/* wait 1 sec before re-connecting */
8663302Sagiri 	delay(drv_usectohz(1000000));
8673302Sagiri 
8683302Sagiri 	do {
8694703Shiremath 		ibt_ip_path_attr_t	ipattr;
8704703Shiremath 		ibt_ip_addr_t		dstip;
8714703Shiremath 
8723302Sagiri 		/* The ipaddr should be in the network order */
8733302Sagiri 		myip = sp->session_myip;
8743302Sagiri 		remip = sp->session_remip;
8753302Sagiri 		ret = rds_sc_path_lookup(&myip, &remip);
8763302Sagiri 		if (ret == 0) {
8773302Sagiri 			RDS_DPRINTF2(LABEL, "Path not found (0x%x 0x%x)",
8783302Sagiri 			    myip, remip);
8793302Sagiri 		}
8803302Sagiri 		/* check if we have (new) path from the source to destination */
8814467Sagiri 		lgid.gid_prefix = 0;
8824467Sagiri 		lgid.gid_guid = 0;
8834467Sagiri 		rgid.gid_prefix = 0;
8844467Sagiri 		rgid.gid_guid = 0;
8854703Shiremath 
8864703Shiremath 		bzero(&ipattr, sizeof (ibt_ip_path_attr_t));
8874703Shiremath 		dstip.family = AF_INET;
8887919SBill.Taylor@Sun.COM 		dstip.un.ip4addr = remip;
8894703Shiremath 		ipattr.ipa_dst_ip = &dstip;
8904703Shiremath 		ipattr.ipa_src_ip.family = AF_INET;
8917919SBill.Taylor@Sun.COM 		ipattr.ipa_src_ip.un.ip4addr = myip;
8924703Shiremath 		ipattr.ipa_ndst = 1;
8934703Shiremath 		ipattr.ipa_max_paths = 1;
8944703Shiremath 		RDS_DPRINTF2(LABEL, "ibt_get_ip_paths: 0x%x <-> 0x%x ",
8954703Shiremath 		    myip, remip);
8964703Shiremath 		ret = ibt_get_ip_paths(rdsib_statep->rds_ibhdl,
8974703Shiremath 		    IBT_PATH_NO_FLAGS, &ipattr, &sp->session_pinfo, NULL, NULL);
8984703Shiremath 		if (ret == IBT_SUCCESS) {
8994703Shiremath 			RDS_DPRINTF2(LABEL, "ibt_get_ip_paths success");
9004703Shiremath 			lgid = sp->session_pinfo.
9014703Shiremath 			    pi_prim_cep_path.cep_adds_vect.av_sgid;
9024703Shiremath 			rgid = sp->session_pinfo.
9034703Shiremath 			    pi_prim_cep_path.cep_adds_vect.av_dgid;
9043302Sagiri 			break;
9053302Sagiri 		}
9063302Sagiri 
9076702Sagiri 		RDS_DPRINTF2(LABEL, "ibt_get_ip_paths failed, ret: %d ", ret);
9084467Sagiri 
9093302Sagiri 		/* wait 1 sec before re-trying */
9103302Sagiri 		delay(drv_usectohz(1000000));
9113302Sagiri 		cnt++;
9124467Sagiri 	} while (cnt < 5);
9133302Sagiri 
9144703Shiremath 	if (ret != IBT_SUCCESS) {
9153302Sagiri 		rw_enter(&sp->session_lock, RW_WRITER);
9163302Sagiri 		if (sp->session_type == RDS_SESSION_ACTIVE) {
9173302Sagiri 			rds_session_fini(sp);
9183302Sagiri 			sp->session_state = RDS_SESSION_STATE_FAILED;
9194467Sagiri 			sp->session_failover = 0;
9204154Sagiri 			RDS_DPRINTF3("rds_failover_session",
9214154Sagiri 			    "SP(%p) State RDS_SESSION_STATE_FAILED", sp);
9223302Sagiri 		} else {
9233302Sagiri 			RDS_DPRINTF2("rds_failover_session",
9243302Sagiri 			    "SP(%p) has become passive", sp);
9253302Sagiri 		}
9263302Sagiri 		rw_exit(&sp->session_lock);
9273302Sagiri 		return;
9283302Sagiri 	}
9293302Sagiri 
9303302Sagiri 	RDS_DPRINTF2(LABEL, "lgid: %llx:%llx rgid: %llx:%llx",
9313302Sagiri 	    lgid.gid_prefix, lgid.gid_guid, rgid.gid_prefix,
9323302Sagiri 	    rgid.gid_guid);
9333302Sagiri 
9343302Sagiri 	rw_enter(&sp->session_lock, RW_WRITER);
9353302Sagiri 	if (sp->session_type != RDS_SESSION_ACTIVE) {
9363302Sagiri 		/*
9373302Sagiri 		 * The remote side must have seen the error and initiated
9383302Sagiri 		 * a re-connect.
9393302Sagiri 		 */
9403302Sagiri 		RDS_DPRINTF2("rds_failover_session",
9413302Sagiri 		    "SP(%p) has become passive", sp);
9423302Sagiri 		rw_exit(&sp->session_lock);
9433302Sagiri 		return;
9443302Sagiri 	}
9453302Sagiri 
9463302Sagiri 	/* move the session to init state */
9474154Sagiri 	ret = rds_session_reinit(sp, lgid);
9483302Sagiri 	sp->session_lgid = lgid;
9493302Sagiri 	sp->session_rgid = rgid;
9504154Sagiri 	if (ret != 0) {
9514154Sagiri 		rds_session_fini(sp);
9524154Sagiri 		sp->session_state = RDS_SESSION_STATE_FAILED;
9534467Sagiri 		sp->session_failover = 0;
9544154Sagiri 		RDS_DPRINTF3("rds_failover_session",
9554154Sagiri 		    "SP(%p) State RDS_SESSION_STATE_FAILED", sp);
9564154Sagiri 		rw_exit(&sp->session_lock);
9574154Sagiri 		return;
9584154Sagiri 	} else {
9594154Sagiri 		sp->session_state = RDS_SESSION_STATE_INIT;
9604154Sagiri 		RDS_DPRINTF3("rds_failover_session",
9614154Sagiri 		    "SP(%p) State RDS_SESSION_STATE_INIT", sp);
9624154Sagiri 	}
9633302Sagiri 	rw_exit(&sp->session_lock);
9643302Sagiri 
9653302Sagiri 	rds_session_open(sp);
9663302Sagiri 
9673302Sagiri 	RDS_DPRINTF2("rds_failover_session", "Return: (%p)", sp);
9683302Sagiri }
9693302Sagiri 
9703302Sagiri void
rds_handle_send_error(rds_ep_t * ep)9713302Sagiri rds_handle_send_error(rds_ep_t *ep)
9723302Sagiri {
9733302Sagiri 	if (rds_is_sendq_empty(ep, 0)) {
9743302Sagiri 		/* Session should already be in ERROR, try to reconnect */
9753302Sagiri 		RDS_DPRINTF2("rds_handle_send_error",
9763302Sagiri 		    "Dispatching taskq to failover SP(%p)", ep->ep_sp);
9773302Sagiri 		(void) ddi_taskq_dispatch(rds_taskq, rds_failover_session,
9783302Sagiri 		    (void *)ep->ep_sp, DDI_SLEEP);
9793302Sagiri 	}
9803302Sagiri }
9813302Sagiri 
9823302Sagiri /*
9833302Sagiri  * Called in the CM handler on the passive side
9843302Sagiri  * Called on a taskq thread.
9853302Sagiri  */
9863302Sagiri void
rds_cleanup_passive_session(void * arg)9873302Sagiri rds_cleanup_passive_session(void *arg)
9883302Sagiri {
9893302Sagiri 	rds_session_t	*sp = arg;
9903302Sagiri 
9913302Sagiri 	RDS_DPRINTF2("rds_cleanup_passive_session", "SP(%p) State: %d", sp,
9923302Sagiri 	    sp->session_state);
9933302Sagiri 	ASSERT((sp->session_state == RDS_SESSION_STATE_CLOSED) ||
9943302Sagiri 	    (sp->session_state == RDS_SESSION_STATE_ERROR));
9953302Sagiri 
9963302Sagiri 	rds_session_close(sp, IBT_BLOCKING, 1);
9973302Sagiri 
9983302Sagiri 	rw_enter(&sp->session_lock, RW_WRITER);
9993302Sagiri 	if (sp->session_state == RDS_SESSION_STATE_CLOSED) {
10003302Sagiri 		rds_session_fini(sp);
10013302Sagiri 		sp->session_state = RDS_SESSION_STATE_FINI;
10024467Sagiri 		sp->session_failover = 0;
10033302Sagiri 		RDS_DPRINTF3("rds_cleanup_passive_session",
10043302Sagiri 		    "SP(%p) State RDS_SESSION_STATE_FINI", sp);
10053302Sagiri 	} else if (sp->session_state == RDS_SESSION_STATE_ERROR) {
10063302Sagiri 		rds_session_fini(sp);
10073302Sagiri 		sp->session_state = RDS_SESSION_STATE_FAILED;
10084467Sagiri 		sp->session_failover = 0;
10093302Sagiri 		RDS_DPRINTF3("rds_cleanup_passive_session",
10103302Sagiri 		    "SP(%p) State RDS_SESSION_STATE_FAILED", sp);
10113302Sagiri 	}
10123302Sagiri 	rw_exit(&sp->session_lock);
10133302Sagiri 
10143302Sagiri 	RDS_DPRINTF2("rds_cleanup_passive_session", "Return: SP (%p)", sp);
10153302Sagiri }
10163302Sagiri 
10173302Sagiri /*
10183302Sagiri  * Called by the CM handler on the passive side
10193302Sagiri  * Called with WRITE lock on the session
10203302Sagiri  */
10213302Sagiri void
rds_passive_session_fini(rds_session_t * sp)10223302Sagiri rds_passive_session_fini(rds_session_t *sp)
10233302Sagiri {
10243302Sagiri 	rds_ep_t	*ep;
10253302Sagiri 
10263302Sagiri 	RDS_DPRINTF2("rds_passive_session_fini", "SP(%p) State: %d", sp,
10273302Sagiri 	    sp->session_state);
10283302Sagiri 	ASSERT((sp->session_state == RDS_SESSION_STATE_CLOSED) ||
10293302Sagiri 	    (sp->session_state == RDS_SESSION_STATE_ERROR));
10303302Sagiri 
10313302Sagiri 	/* clean the data channel */
10323302Sagiri 	ep = &sp->session_dataep;
10333302Sagiri 	(void) rds_is_sendq_empty(ep, 1);
10343302Sagiri 	mutex_enter(&ep->ep_lock);
10353302Sagiri 	RDS_DPRINTF2("rds_passive_session_fini", "EP(%p) State: %d", ep,
10363302Sagiri 	    ep->ep_state);
10373302Sagiri 	rds_ep_free_rc_channel(ep);
10383302Sagiri 	mutex_exit(&ep->ep_lock);
10393302Sagiri 
10403302Sagiri 	/* clean the control channel */
10413302Sagiri 	ep = &sp->session_ctrlep;
10423302Sagiri 	(void) rds_is_sendq_empty(ep, 1);
10433302Sagiri 	mutex_enter(&ep->ep_lock);
10443302Sagiri 	RDS_DPRINTF2("rds_passive_session_fini", "EP(%p) State: %d", ep,
10453302Sagiri 	    ep->ep_state);
10463302Sagiri 	rds_ep_free_rc_channel(ep);
10473302Sagiri 	mutex_exit(&ep->ep_lock);
10483302Sagiri 
10493302Sagiri 	rds_session_fini(sp);
10504467Sagiri 	sp->session_failover = 0;
10513302Sagiri 
10523302Sagiri 	RDS_DPRINTF2("rds_passive_session_fini", "Return: SP (%p)", sp);
10533302Sagiri }
10543302Sagiri 
10558082SRamaswamy.Tummala@Sun.COM void
rds_close_this_session(rds_session_t * sp,uint8_t wait)10568082SRamaswamy.Tummala@Sun.COM rds_close_this_session(rds_session_t *sp, uint8_t wait)
10578082SRamaswamy.Tummala@Sun.COM {
10588082SRamaswamy.Tummala@Sun.COM 	switch (sp->session_state) {
10598082SRamaswamy.Tummala@Sun.COM 	case RDS_SESSION_STATE_CONNECTED:
10608082SRamaswamy.Tummala@Sun.COM 		sp->session_state = RDS_SESSION_STATE_ACTIVE_CLOSING;
10618082SRamaswamy.Tummala@Sun.COM 		rw_exit(&sp->session_lock);
10628082SRamaswamy.Tummala@Sun.COM 
10638082SRamaswamy.Tummala@Sun.COM 		rds_session_close(sp, IBT_BLOCKING, wait);
10648082SRamaswamy.Tummala@Sun.COM 
10658082SRamaswamy.Tummala@Sun.COM 		rw_enter(&sp->session_lock, RW_WRITER);
10668082SRamaswamy.Tummala@Sun.COM 		sp->session_state = RDS_SESSION_STATE_CLOSED;
10678082SRamaswamy.Tummala@Sun.COM 		RDS_DPRINTF3("rds_close_sessions",
10688082SRamaswamy.Tummala@Sun.COM 		    "SP(%p) State RDS_SESSION_STATE_CLOSED", sp);
10698082SRamaswamy.Tummala@Sun.COM 		rds_session_fini(sp);
10708082SRamaswamy.Tummala@Sun.COM 		sp->session_state = RDS_SESSION_STATE_FINI;
10718082SRamaswamy.Tummala@Sun.COM 		sp->session_failover = 0;
10728082SRamaswamy.Tummala@Sun.COM 		RDS_DPRINTF3("rds_close_sessions",
10738082SRamaswamy.Tummala@Sun.COM 		    "SP(%p) State RDS_SESSION_STATE_FINI", sp);
10748082SRamaswamy.Tummala@Sun.COM 		break;
10758082SRamaswamy.Tummala@Sun.COM 
10768082SRamaswamy.Tummala@Sun.COM 	case RDS_SESSION_STATE_ERROR:
10778082SRamaswamy.Tummala@Sun.COM 	case RDS_SESSION_STATE_PASSIVE_CLOSING:
10788082SRamaswamy.Tummala@Sun.COM 	case RDS_SESSION_STATE_INIT:
10798082SRamaswamy.Tummala@Sun.COM 		sp->session_state = RDS_SESSION_STATE_ACTIVE_CLOSING;
10808082SRamaswamy.Tummala@Sun.COM 		rw_exit(&sp->session_lock);
10818082SRamaswamy.Tummala@Sun.COM 
10828082SRamaswamy.Tummala@Sun.COM 		rds_session_close(sp, IBT_BLOCKING, wait);
10838082SRamaswamy.Tummala@Sun.COM 
10848082SRamaswamy.Tummala@Sun.COM 		rw_enter(&sp->session_lock, RW_WRITER);
10858082SRamaswamy.Tummala@Sun.COM 		sp->session_state = RDS_SESSION_STATE_CLOSED;
10868082SRamaswamy.Tummala@Sun.COM 		RDS_DPRINTF3("rds_close_sessions",
10878082SRamaswamy.Tummala@Sun.COM 		    "SP(%p) State RDS_SESSION_STATE_CLOSED", sp);
10888082SRamaswamy.Tummala@Sun.COM 		/* FALLTHRU */
10898082SRamaswamy.Tummala@Sun.COM 	case RDS_SESSION_STATE_CLOSED:
10908082SRamaswamy.Tummala@Sun.COM 		rds_session_fini(sp);
10918082SRamaswamy.Tummala@Sun.COM 		sp->session_state = RDS_SESSION_STATE_FINI;
10928082SRamaswamy.Tummala@Sun.COM 		sp->session_failover = 0;
10938082SRamaswamy.Tummala@Sun.COM 		RDS_DPRINTF3("rds_close_sessions",
10948082SRamaswamy.Tummala@Sun.COM 		    "SP(%p) State RDS_SESSION_STATE_FINI", sp);
10958082SRamaswamy.Tummala@Sun.COM 		break;
10968082SRamaswamy.Tummala@Sun.COM 	}
10978082SRamaswamy.Tummala@Sun.COM }
10988082SRamaswamy.Tummala@Sun.COM 
10993302Sagiri /*
11003302Sagiri  * Can be called:
11013302Sagiri  * 1. on driver detach
11023302Sagiri  * 2. on taskq thread
11033302Sagiri  * arg is always NULL
11043302Sagiri  */
11053302Sagiri /* ARGSUSED */
11063302Sagiri void
rds_close_sessions(void * arg)11073302Sagiri rds_close_sessions(void *arg)
11083302Sagiri {
11093302Sagiri 	rds_session_t *sp, *spnextp;
11103302Sagiri 
11113302Sagiri 	RDS_DPRINTF2("rds_close_sessions", "Enter");
11123302Sagiri 
11133302Sagiri 	/* wait until all the buffers are freed by the sockets */
11143302Sagiri 	while (RDS_GET_RXPKTS_PEND() != 0) {
11153302Sagiri 		/* wait one second and try again */
11163302Sagiri 		RDS_DPRINTF2("rds_close_sessions", "waiting on "
11173302Sagiri 		    "pending packets", RDS_GET_RXPKTS_PEND());
11183302Sagiri 		delay(drv_usectohz(1000000));
11193302Sagiri 	}
11203302Sagiri 	RDS_DPRINTF2("rds_close_sessions", "No more RX packets pending");
11213302Sagiri 
11223302Sagiri 	/* close all the sessions */
11233302Sagiri 	rw_enter(&rdsib_statep->rds_sessionlock, RW_WRITER);
11243302Sagiri 	sp = rdsib_statep->rds_sessionlistp;
11253302Sagiri 	while (sp) {
11263302Sagiri 		rw_enter(&sp->session_lock, RW_WRITER);
11273302Sagiri 		RDS_DPRINTF2("rds_close_sessions", "SP(%p) State: %d", sp,
11283302Sagiri 		    sp->session_state);
11298082SRamaswamy.Tummala@Sun.COM 		rds_close_this_session(sp, 2);
11303302Sagiri 		rw_exit(&sp->session_lock);
11313302Sagiri 		sp = sp->session_nextp;
11323302Sagiri 	}
11333302Sagiri 
11343302Sagiri 	sp = rdsib_statep->rds_sessionlistp;
11353302Sagiri 	rdsib_statep->rds_sessionlistp = NULL;
11363302Sagiri 	rdsib_statep->rds_nsessions = 0;
11373302Sagiri 	rw_exit(&rdsib_statep->rds_sessionlock);
11383302Sagiri 
11393302Sagiri 	while (sp) {
11403302Sagiri 		spnextp = sp->session_nextp;
11413302Sagiri 		rds_destroy_session(sp);
11423302Sagiri 		RDS_DECR_SESS();
11433302Sagiri 		sp = spnextp;
11443302Sagiri 	}
11453302Sagiri 
11463302Sagiri 	/* free the global pool */
11473302Sagiri 	rds_free_recv_caches(rdsib_statep);
11483302Sagiri 
11493302Sagiri 	RDS_DPRINTF2("rds_close_sessions", "Return");
11503302Sagiri }
11513302Sagiri 
11523302Sagiri void
rds_session_open(rds_session_t * sp)11533302Sagiri rds_session_open(rds_session_t *sp)
11543302Sagiri {
11553302Sagiri 	int		ret;
11563302Sagiri 
11573302Sagiri 	RDS_DPRINTF2("rds_session_open", "Enter SP(%p)", sp);
11583302Sagiri 
11593302Sagiri 	ret = rds_session_connect(sp);
11603302Sagiri 	if (ret == -1) {
11613302Sagiri 		/*
11623302Sagiri 		 * may be the session has become passive due to
11633302Sagiri 		 * hitting peer-to-peer case
11643302Sagiri 		 */
11653302Sagiri 		rw_enter(&sp->session_lock, RW_READER);
11663302Sagiri 		if (sp->session_type == RDS_SESSION_PASSIVE) {
11673302Sagiri 			RDS_DPRINTF2("rds_session_open", "SP(%p) "
11683302Sagiri 			    "has become passive from active", sp);
11693302Sagiri 			rw_exit(&sp->session_lock);
11703302Sagiri 			return;
11713302Sagiri 		}
11723302Sagiri 
11733302Sagiri 		/* get the lock for writing */
11743302Sagiri 		rw_exit(&sp->session_lock);
11753302Sagiri 		rw_enter(&sp->session_lock, RW_WRITER);
11763302Sagiri 		sp->session_state = RDS_SESSION_STATE_ERROR;
11773302Sagiri 		RDS_DPRINTF3("rds_session_open",
11783302Sagiri 		    "SP(%p) State RDS_SESSION_STATE_ERROR", sp);
11793302Sagiri 		rw_exit(&sp->session_lock);
11803302Sagiri 
11813302Sagiri 		/* Connect request failed */
11823302Sagiri 		rds_session_close(sp, IBT_BLOCKING, 1);
11833302Sagiri 
11843302Sagiri 		rw_enter(&sp->session_lock, RW_WRITER);
11853302Sagiri 		rds_session_fini(sp);
11863302Sagiri 		sp->session_state = RDS_SESSION_STATE_FAILED;
11874467Sagiri 		sp->session_failover = 0;
11883302Sagiri 		RDS_DPRINTF3("rds_session_open",
11893302Sagiri 		    "SP(%p) State RDS_SESSION_STATE_FAILED", sp);
11903302Sagiri 		rw_exit(&sp->session_lock);
11913302Sagiri 
11923302Sagiri 		return;
11933302Sagiri 	}
11943302Sagiri 
11953302Sagiri 	RDS_DPRINTF2("rds_session_open", "Return: SP(%p)", sp);
11963302Sagiri }
11973302Sagiri 
11983302Sagiri /*
11993302Sagiri  * Creates a session and inserts it into the list of sessions. The session
12003302Sagiri  * state would be CREATED.
12013302Sagiri  * Return Values:
12023302Sagiri  *	EWOULDBLOCK
12033302Sagiri  */
12043302Sagiri rds_session_t *
rds_session_create(rds_state_t * statep,ipaddr_t localip,ipaddr_t remip,ibt_cm_req_rcv_t * reqp,uint8_t type)12053302Sagiri rds_session_create(rds_state_t *statep, ipaddr_t localip, ipaddr_t remip,
12063302Sagiri     ibt_cm_req_rcv_t *reqp, uint8_t type)
12073302Sagiri {
12083302Sagiri 	ib_gid_t	lgid, rgid;
12093302Sagiri 	rds_session_t	*newp, *oldp;
12103302Sagiri 	rds_ep_t	*dataep, *ctrlep;
12113302Sagiri 	rds_bufpool_t	*pool;
12123302Sagiri 	int		ret;
12133302Sagiri 
12146438Sagiri 	RDS_DPRINTF2("rds_session_create", "Enter: 0x%p 0x%x 0x%x, type: %d",
12156438Sagiri 	    statep, localip, remip, type);
12163302Sagiri 
12176702Sagiri 	/* Check if there is space for a new session */
12186702Sagiri 	rw_enter(&statep->rds_sessionlock, RW_READER);
12196702Sagiri 	if (statep->rds_nsessions >= (MaxNodes - 1)) {
12206702Sagiri 		rw_exit(&statep->rds_sessionlock);
12216702Sagiri 		RDS_DPRINTF1("rds_session_create", "No More Sessions allowed");
12226702Sagiri 		return (NULL);
12236702Sagiri 	}
12246702Sagiri 	rw_exit(&statep->rds_sessionlock);
12256702Sagiri 
12263302Sagiri 	/* Allocate and initialize global buffer pool */
12273302Sagiri 	ret = rds_init_recv_caches(statep);
12283302Sagiri 	if (ret != 0) {
12293302Sagiri 		RDS_DPRINTF2(LABEL, "Buffer Cache Initialization failed");
12303302Sagiri 		return (NULL);
12313302Sagiri 	}
12323302Sagiri 
12333302Sagiri 	/* enough memory for session (includes 2 endpoints) */
12343302Sagiri 	newp = kmem_zalloc(sizeof (rds_session_t), KM_SLEEP);
12353302Sagiri 
12363302Sagiri 	newp->session_remip = remip;
12373302Sagiri 	newp->session_myip = localip;
12383302Sagiri 	newp->session_type = type;
12393302Sagiri 	newp->session_state = RDS_SESSION_STATE_CREATED;
12403302Sagiri 	RDS_DPRINTF3("rds_session_create",
12413302Sagiri 	    "SP(%p) State RDS_SESSION_STATE_CREATED", newp);
12423302Sagiri 	rw_init(&newp->session_lock, NULL, RW_DRIVER, NULL);
12434467Sagiri 	rw_init(&newp->session_local_portmap_lock, NULL, RW_DRIVER, NULL);
12444467Sagiri 	rw_init(&newp->session_remote_portmap_lock, NULL, RW_DRIVER, NULL);
12453302Sagiri 
12463302Sagiri 	/* Initialize data endpoint */
12473302Sagiri 	dataep = &newp->session_dataep;
12483302Sagiri 	dataep->ep_remip = newp->session_remip;
12493302Sagiri 	dataep->ep_myip = newp->session_myip;
12503302Sagiri 	dataep->ep_state = RDS_EP_STATE_UNCONNECTED;
12513302Sagiri 	dataep->ep_sp = newp;
12523302Sagiri 	dataep->ep_type = RDS_EP_TYPE_DATA;
12533302Sagiri 	mutex_init(&dataep->ep_lock, NULL, MUTEX_DRIVER, NULL);
12543302Sagiri 
12553302Sagiri 	/* Initialize send pool locks */
12563302Sagiri 	pool = &dataep->ep_sndpool;
12573302Sagiri 	mutex_init(&pool->pool_lock, NULL, MUTEX_DRIVER, NULL);
12583302Sagiri 	cv_init(&pool->pool_cv, NULL, CV_DRIVER, NULL);
12593302Sagiri 
12603302Sagiri 	/* Initialize recv pool locks */
12613302Sagiri 	pool = &dataep->ep_rcvpool;
12623302Sagiri 	mutex_init(&dataep->ep_recvqp.qp_lock, NULL, MUTEX_DRIVER, NULL);
12633302Sagiri 	mutex_init(&pool->pool_lock, NULL, MUTEX_DRIVER, NULL);
12643302Sagiri 	cv_init(&pool->pool_cv, NULL, CV_DRIVER, NULL);
12653302Sagiri 
12663302Sagiri 	/* Initialize control endpoint */
12673302Sagiri 	ctrlep = &newp->session_ctrlep;
12683302Sagiri 	ctrlep->ep_remip = newp->session_remip;
12693302Sagiri 	ctrlep->ep_myip = newp->session_myip;
12703302Sagiri 	ctrlep->ep_state = RDS_EP_STATE_UNCONNECTED;
12713302Sagiri 	ctrlep->ep_sp = newp;
12723302Sagiri 	ctrlep->ep_type = RDS_EP_TYPE_CTRL;
12733302Sagiri 	mutex_init(&ctrlep->ep_lock, NULL, MUTEX_DRIVER, NULL);
12743302Sagiri 
12753302Sagiri 	/* Initialize send pool locks */
12763302Sagiri 	pool = &ctrlep->ep_sndpool;
12773302Sagiri 	mutex_init(&pool->pool_lock, NULL, MUTEX_DRIVER, NULL);
12783302Sagiri 	cv_init(&pool->pool_cv, NULL, CV_DRIVER, NULL);
12793302Sagiri 
12803302Sagiri 	/* Initialize recv pool locks */
12813302Sagiri 	pool = &ctrlep->ep_rcvpool;
12823302Sagiri 	mutex_init(&ctrlep->ep_recvqp.qp_lock, NULL, MUTEX_DRIVER, NULL);
12833302Sagiri 	mutex_init(&pool->pool_lock, NULL, MUTEX_DRIVER, NULL);
12843302Sagiri 	cv_init(&pool->pool_cv, NULL, CV_DRIVER, NULL);
12853302Sagiri 
12863302Sagiri 	/* lkup if there is already a session */
12873302Sagiri 	rw_enter(&statep->rds_sessionlock, RW_WRITER);
12883302Sagiri 	oldp = rds_session_lkup(statep, remip, 0);
12893302Sagiri 	if (oldp != NULL) {
12903302Sagiri 		/* A session to this destination exists */
12913302Sagiri 		rw_exit(&statep->rds_sessionlock);
12923302Sagiri 		rw_destroy(&newp->session_lock);
12934467Sagiri 		rw_destroy(&newp->session_local_portmap_lock);
12944467Sagiri 		rw_destroy(&newp->session_remote_portmap_lock);
12953302Sagiri 		mutex_destroy(&dataep->ep_lock);
12963302Sagiri 		mutex_destroy(&ctrlep->ep_lock);
12973302Sagiri 		kmem_free(newp, sizeof (rds_session_t));
12983302Sagiri 		return (NULL);
12993302Sagiri 	}
13003302Sagiri 
13013302Sagiri 	/* Insert this session into the list */
13026702Sagiri 	if (rds_add_session(newp, B_TRUE) != B_TRUE) {
13036702Sagiri 		/* No room to add this session */
13046702Sagiri 		rw_exit(&statep->rds_sessionlock);
13056702Sagiri 		rw_destroy(&newp->session_lock);
13066702Sagiri 		rw_destroy(&newp->session_local_portmap_lock);
13076702Sagiri 		rw_destroy(&newp->session_remote_portmap_lock);
13086702Sagiri 		mutex_destroy(&dataep->ep_lock);
13096702Sagiri 		mutex_destroy(&ctrlep->ep_lock);
13106702Sagiri 		kmem_free(newp, sizeof (rds_session_t));
13116702Sagiri 		return (NULL);
13126702Sagiri 	}
13133302Sagiri 
13143302Sagiri 	/* unlock the session list */
13153302Sagiri 	rw_exit(&statep->rds_sessionlock);
13163302Sagiri 
13173302Sagiri 	if (type == RDS_SESSION_ACTIVE) {
13186438Sagiri 		ipaddr_t		localip1, remip1;
13194703Shiremath 		ibt_ip_path_attr_t	ipattr;
13204703Shiremath 		ibt_ip_addr_t		dstip;
13213302Sagiri 
13223302Sagiri 		/* The ipaddr should be in the network order */
13233302Sagiri 		localip1 = localip;
13243302Sagiri 		remip1 = remip;
13253302Sagiri 		ret = rds_sc_path_lookup(&localip1, &remip1);
13263302Sagiri 		if (ret == 0) {
13273302Sagiri 			RDS_DPRINTF2(LABEL, "Path not found (0x%x 0x%x)",
13283302Sagiri 			    localip, remip);
13293302Sagiri 		}
13303302Sagiri 
13313302Sagiri 		/* Get the gids for the source and destination ip addrs */
13324467Sagiri 		lgid.gid_prefix = 0;
13334467Sagiri 		lgid.gid_guid = 0;
13344467Sagiri 		rgid.gid_prefix = 0;
13354467Sagiri 		rgid.gid_guid = 0;
13364703Shiremath 
13374703Shiremath 		bzero(&ipattr, sizeof (ibt_ip_path_attr_t));
13384703Shiremath 		dstip.family = AF_INET;
13397919SBill.Taylor@Sun.COM 		dstip.un.ip4addr = remip1;
13404703Shiremath 		ipattr.ipa_dst_ip = &dstip;
13414703Shiremath 		ipattr.ipa_src_ip.family = AF_INET;
13427919SBill.Taylor@Sun.COM 		ipattr.ipa_src_ip.un.ip4addr = localip1;
13434703Shiremath 		ipattr.ipa_ndst = 1;
13444703Shiremath 		ipattr.ipa_max_paths = 1;
13454703Shiremath 		RDS_DPRINTF2(LABEL, "ibt_get_ip_paths: 0x%x <-> 0x%x ",
13464703Shiremath 		    localip1, remip1);
13474703Shiremath 		ret = ibt_get_ip_paths(rdsib_statep->rds_ibhdl,
13484703Shiremath 		    IBT_PATH_NO_FLAGS, &ipattr, &newp->session_pinfo,
13494703Shiremath 		    NULL, NULL);
13504703Shiremath 		if (ret != IBT_SUCCESS) {
13516702Sagiri 			RDS_DPRINTF2(LABEL, "ibt_get_ip_paths failed, ret: %d "
13524467Sagiri 			    "lgid: %llx:%llx rgid: %llx:%llx", lgid.gid_prefix,
13534467Sagiri 			    lgid.gid_guid, rgid.gid_prefix, rgid.gid_guid);
13544467Sagiri 
13553302Sagiri 			RDS_SESSION_TRANSITION(newp, RDS_SESSION_STATE_FAILED);
13563302Sagiri 			return (NULL);
13573302Sagiri 		}
13584703Shiremath 		RDS_DPRINTF2(LABEL, "ibt_get_ip_paths success");
13594703Shiremath 		lgid =
13604703Shiremath 		    newp->session_pinfo.pi_prim_cep_path.cep_adds_vect.av_sgid;
13614703Shiremath 		rgid =
13624703Shiremath 		    newp->session_pinfo.pi_prim_cep_path.cep_adds_vect.av_dgid;
13633302Sagiri 
13643302Sagiri 		RDS_DPRINTF2(LABEL, "lgid: %llx:%llx rgid: %llx:%llx",
13653302Sagiri 		    lgid.gid_prefix, lgid.gid_guid, rgid.gid_prefix,
13663302Sagiri 		    rgid.gid_guid);
13673302Sagiri 	}
13683302Sagiri 
13693302Sagiri 	rw_enter(&newp->session_lock, RW_WRITER);
13703302Sagiri 	/* check for peer-to-peer case */
13713302Sagiri 	if (type == newp->session_type) {
13723302Sagiri 		/* no peer-to-peer case */
13733302Sagiri 		if (type == RDS_SESSION_ACTIVE) {
13743302Sagiri 			newp->session_lgid = lgid;
13753302Sagiri 			newp->session_rgid = rgid;
13763302Sagiri 		} else {
13773302Sagiri 			/* rgid is requester gid & lgid is receiver gid */
13783302Sagiri 			newp->session_rgid = reqp->req_prim_addr.av_dgid;
13793302Sagiri 			newp->session_lgid = reqp->req_prim_addr.av_sgid;
13803302Sagiri 		}
13813302Sagiri 	}
13823302Sagiri 	rw_exit(&newp->session_lock);
13833302Sagiri 
13843302Sagiri 	RDS_DPRINTF2("rds_session_create", "Return SP(%p)", newp);
13853302Sagiri 
13863302Sagiri 	return (newp);
13873302Sagiri }
13883302Sagiri 
13893302Sagiri void
rds_handle_close_session_request(void * arg)13908082SRamaswamy.Tummala@Sun.COM rds_handle_close_session_request(void *arg)
13918082SRamaswamy.Tummala@Sun.COM {
13928082SRamaswamy.Tummala@Sun.COM 	rds_session_t	*sp = (rds_session_t *)arg;
13938082SRamaswamy.Tummala@Sun.COM 
13948082SRamaswamy.Tummala@Sun.COM 	RDS_DPRINTF2("rds_handle_close_session_request",
13958082SRamaswamy.Tummala@Sun.COM 	    "Enter: Closing this Session (%p)", sp);
13968082SRamaswamy.Tummala@Sun.COM 
13978082SRamaswamy.Tummala@Sun.COM 	rw_enter(&sp->session_lock, RW_WRITER);
13988082SRamaswamy.Tummala@Sun.COM 	RDS_DPRINTF2("rds_handle_close_session_request",
13998082SRamaswamy.Tummala@Sun.COM 	    "SP(%p) State: %d", sp, sp->session_state);
14008082SRamaswamy.Tummala@Sun.COM 	rds_close_this_session(sp, 2);
14018082SRamaswamy.Tummala@Sun.COM 	rw_exit(&sp->session_lock);
14028082SRamaswamy.Tummala@Sun.COM 
14038082SRamaswamy.Tummala@Sun.COM 	RDS_DPRINTF2("rds_handle_close_session_request", "Return SP(%p)", sp);
14048082SRamaswamy.Tummala@Sun.COM }
14058082SRamaswamy.Tummala@Sun.COM 
14068082SRamaswamy.Tummala@Sun.COM void
rds_handle_control_message(rds_session_t * sp,rds_ctrl_pkt_t * cpkt)14073302Sagiri rds_handle_control_message(rds_session_t *sp, rds_ctrl_pkt_t *cpkt)
14083302Sagiri {
14093302Sagiri 	RDS_DPRINTF4("rds_handle_control_message", "Enter: SP(%p) code: %d "
14103302Sagiri 	    "port: %d", sp, cpkt->rcp_code, cpkt->rcp_port);
14113302Sagiri 
14123302Sagiri 	switch (cpkt->rcp_code) {
14133302Sagiri 	case RDS_CTRL_CODE_STALL:
14143302Sagiri 		RDS_INCR_STALLS_RCVD();
14154467Sagiri 		(void) rds_check_n_mark_port(sp, cpkt->rcp_port, RDS_REMOTE);
14163302Sagiri 		break;
14173302Sagiri 	case RDS_CTRL_CODE_UNSTALL:
14183302Sagiri 		RDS_INCR_UNSTALLS_RCVD();
14194467Sagiri 		(void) rds_check_n_unmark_port(sp, cpkt->rcp_port, RDS_REMOTE);
14203302Sagiri 		break;
14213302Sagiri 	case RDS_CTRL_CODE_STALL_PORTS:
14224467Sagiri 		rds_mark_all_ports(sp, RDS_REMOTE);
14233302Sagiri 		break;
14243302Sagiri 	case RDS_CTRL_CODE_UNSTALL_PORTS:
14254467Sagiri 		rds_unmark_all_ports(sp, RDS_REMOTE);
14263302Sagiri 		break;
14273302Sagiri 	case RDS_CTRL_CODE_HEARTBEAT:
14283302Sagiri 		break;
14298082SRamaswamy.Tummala@Sun.COM 	case RDS_CTRL_CODE_CLOSE_SESSION:
14308082SRamaswamy.Tummala@Sun.COM 		RDS_DPRINTF2("rds_handle_control_message",
14318082SRamaswamy.Tummala@Sun.COM 		    "SP(%p) Remote Requested to close this session", sp);
14328082SRamaswamy.Tummala@Sun.COM 		(void) ddi_taskq_dispatch(rds_taskq,
14338082SRamaswamy.Tummala@Sun.COM 		    rds_handle_close_session_request, (void *)sp, DDI_SLEEP);
14348082SRamaswamy.Tummala@Sun.COM 		break;
14353302Sagiri 	default:
14363302Sagiri 		RDS_DPRINTF2(LABEL, "ERROR: Invalid Control code: %d",
14373302Sagiri 		    cpkt->rcp_code);
14383302Sagiri 		break;
14393302Sagiri 	}
14403302Sagiri 
14413302Sagiri 	RDS_DPRINTF4("rds_handle_control_message", "Return");
14423302Sagiri }
14433302Sagiri 
14444467Sagiri int
rds_post_control_message(rds_session_t * sp,uint8_t code,in_port_t port)14454467Sagiri rds_post_control_message(rds_session_t *sp, uint8_t code, in_port_t port)
14463302Sagiri {
14473302Sagiri 	ibt_send_wr_t	wr;
14483302Sagiri 	rds_ep_t	*ep;
14493302Sagiri 	rds_buf_t	*bp;
14503302Sagiri 	rds_ctrl_pkt_t	*cp;
14513302Sagiri 	int		ret;
14523302Sagiri 
14533302Sagiri 	RDS_DPRINTF4("rds_post_control_message", "Enter: SP(%p) Code: %d "
14544467Sagiri 	    "Port: %d", sp, code, port);
14553302Sagiri 
14563302Sagiri 	ep = &sp->session_ctrlep;
14573302Sagiri 
14583302Sagiri 	bp = rds_get_send_buf(ep, 1);
14593302Sagiri 	if (bp == NULL) {
14603302Sagiri 		RDS_DPRINTF2(LABEL, "No buffers available to send control "
14614467Sagiri 		    "message: SP(%p) Code: %d Port: %d", sp, code,
14624467Sagiri 		    port);
14634467Sagiri 		return (-1);
14643302Sagiri 	}
14653302Sagiri 
14663302Sagiri 	cp = (rds_ctrl_pkt_t *)(uintptr_t)bp->buf_ds.ds_va;
14674467Sagiri 	cp->rcp_code = code;
14684467Sagiri 	cp->rcp_port = port;
14693302Sagiri 	bp->buf_ds.ds_len = RDS_CTRLPKT_SIZE;
14703302Sagiri 
14713302Sagiri 	wr.wr_id = (uintptr_t)bp;
14723302Sagiri 	wr.wr_flags = IBT_WR_SEND_SOLICIT;
14733302Sagiri 	wr.wr_trans = IBT_RC_SRV;
14743302Sagiri 	wr.wr_opcode = IBT_WRC_SEND;
14753302Sagiri 	wr.wr_nds = 1;
14763302Sagiri 	wr.wr_sgl = &bp->buf_ds;
14773302Sagiri 	RDS_DPRINTF5(LABEL, "ds_va %p ds_len %d ds_lkey 0x%llx",
14783302Sagiri 	    bp->buf_ds.ds_va, bp->buf_ds.ds_len, bp->buf_ds.ds_key);
14793302Sagiri 	ret = ibt_post_send(ep->ep_chanhdl, &wr, 1, NULL);
14803302Sagiri 	if (ret != IBT_SUCCESS) {
14813302Sagiri 		RDS_DPRINTF2(LABEL, "EP(%p): ibt_post_send failed: "
14823302Sagiri 		    "%d", ep, ret);
14833302Sagiri 		bp->buf_state = RDS_SNDBUF_FREE;
14843302Sagiri 		rds_free_send_buf(ep, bp, NULL, 1, B_FALSE);
14854467Sagiri 		return (-1);
14863302Sagiri 	}
14873302Sagiri 
14883302Sagiri 	RDS_DPRINTF4("rds_post_control_message", "Return SP(%p) Code: %d "
14894467Sagiri 	    "Port: %d", sp, code, port);
14904467Sagiri 
14914467Sagiri 	return (0);
14923302Sagiri }
14933302Sagiri 
14943302Sagiri void
rds_stall_port(rds_session_t * sp,in_port_t port,uint_t qualifier)14954467Sagiri rds_stall_port(rds_session_t *sp, in_port_t port, uint_t qualifier)
14963302Sagiri {
14974467Sagiri 	int		ret;
14984467Sagiri 
14994467Sagiri 	RDS_DPRINTF4("rds_stall_port", "Enter: SP(%p) Port %d", sp, port);
15004467Sagiri 
15014467Sagiri 	RDS_INCR_STALLS_TRIGGERED();
15024467Sagiri 
15034467Sagiri 	if (!rds_check_n_mark_port(sp, port, qualifier)) {
15044467Sagiri 
15054467Sagiri 		if (sp != NULL) {
15064467Sagiri 			ret = rds_post_control_message(sp,
15074467Sagiri 			    RDS_CTRL_CODE_STALL, port);
15084467Sagiri 			if (ret != 0) {
15094467Sagiri 				(void) rds_check_n_unmark_port(sp, port,
15104467Sagiri 				    qualifier);
15114467Sagiri 				return;
15124467Sagiri 			}
15134467Sagiri 			RDS_INCR_STALLS_SENT();
15144467Sagiri 		}
15154467Sagiri 	} else {
15164467Sagiri 		RDS_DPRINTF3(LABEL,
15174467Sagiri 		    "Port %d is already in stall state", port);
15184467Sagiri 	}
15194467Sagiri 
15204467Sagiri 	RDS_DPRINTF4("rds_stall_port", "Return: SP(%p) Port %d", sp, port);
15214467Sagiri }
15224467Sagiri 
15234467Sagiri void
rds_resume_port(in_port_t port)15244467Sagiri rds_resume_port(in_port_t port)
15254467Sagiri {
15263302Sagiri 	rds_session_t	*sp;
15273302Sagiri 	uint_t		ix;
15284467Sagiri 	int		ret;
15293302Sagiri 
15304467Sagiri 	RDS_DPRINTF4("rds_resume_port", "Enter: Port %d", port);
15313302Sagiri 
15324467Sagiri 	RDS_INCR_UNSTALLS_TRIGGERED();
15333302Sagiri 
15344467Sagiri 	/* resume loopback traffic */
15354467Sagiri 	(void) rds_check_n_unmark_port(NULL, port, RDS_LOOPBACK);
15364467Sagiri 
15374467Sagiri 	/* send unstall messages to resume the remote traffic */
15383302Sagiri 	rw_enter(&rdsib_statep->rds_sessionlock, RW_READER);
15393302Sagiri 
15403302Sagiri 	sp = rdsib_statep->rds_sessionlistp;
15413302Sagiri 	for (ix = 0; ix < rdsib_statep->rds_nsessions; ix++) {
15423302Sagiri 		ASSERT(sp != NULL);
15434467Sagiri 		if ((sp->session_state == RDS_SESSION_STATE_CONNECTED) &&
15444467Sagiri 		    (rds_check_n_unmark_port(sp, port, RDS_LOCAL))) {
15454467Sagiri 				ret = rds_post_control_message(sp,
15464467Sagiri 				    RDS_CTRL_CODE_UNSTALL, port);
15474467Sagiri 				if (ret != 0) {
15484467Sagiri 					(void) rds_check_n_mark_port(sp, port,
15494467Sagiri 					    RDS_LOCAL);
15504467Sagiri 				} else {
15514467Sagiri 					RDS_INCR_UNSTALLS_SENT();
15524467Sagiri 				}
15533302Sagiri 		}
15543302Sagiri 
15553302Sagiri 		sp = sp->session_nextp;
15563302Sagiri 	}
15573302Sagiri 
15583302Sagiri 	rw_exit(&rdsib_statep->rds_sessionlock);
15593302Sagiri 
15603302Sagiri 	RDS_DPRINTF4("rds_resume_port", "Return: Port %d", port);
15613302Sagiri }
15623302Sagiri 
15633302Sagiri static int
rds_build_n_post_msg(rds_ep_t * ep,uio_t * uiop,in_port_t sendport,in_port_t recvport)15643302Sagiri rds_build_n_post_msg(rds_ep_t *ep, uio_t *uiop, in_port_t sendport,
15653302Sagiri     in_port_t recvport)
15663302Sagiri {
15673302Sagiri 	ibt_send_wr_t	*wrp, wr;
15683302Sagiri 	rds_buf_t	*bp, *bp1;
15693302Sagiri 	rds_data_hdr_t	*pktp;
15703302Sagiri 	uint32_t	msgsize, npkts, residual, pktno, ix;
15713302Sagiri 	int		ret;
15723302Sagiri 
15733302Sagiri 	RDS_DPRINTF4("rds_build_n_post_msg", "Enter: EP(%p) UIOP(%p)",
15743302Sagiri 	    ep, uiop);
15753302Sagiri 
15763302Sagiri 	/* how many pkts are needed to carry this msg */
15773302Sagiri 	msgsize = uiop->uio_resid;
15783302Sagiri 	npkts = ((msgsize - 1) / UserBufferSize) + 1;
15793302Sagiri 	residual = ((msgsize - 1) % UserBufferSize) + 1;
15803302Sagiri 
15813302Sagiri 	RDS_DPRINTF5(LABEL, "EP(%p) UIOP(%p) msg size: %d npkts: %d", ep, uiop,
15823302Sagiri 	    msgsize, npkts);
15833302Sagiri 
15843302Sagiri 	/* Get the buffers needed to post this message */
15853302Sagiri 	bp = rds_get_send_buf(ep, npkts);
15863302Sagiri 	if (bp == NULL) {
15873302Sagiri 		RDS_INCR_ENOBUFS();
15883302Sagiri 		return (ENOBUFS);
15893302Sagiri 	}
15903302Sagiri 
15913302Sagiri 	if (npkts > 1) {
15923302Sagiri 		/*
15933302Sagiri 		 * multi-pkt messages are posted at the same time as a list
15943302Sagiri 		 * of WRs
15953302Sagiri 		 */
15963302Sagiri 		wrp = (ibt_send_wr_t *)kmem_zalloc(sizeof (ibt_send_wr_t) *
15973302Sagiri 		    npkts, KM_SLEEP);
15983302Sagiri 	}
15993302Sagiri 
16003302Sagiri 
16013302Sagiri 	pktno = 0;
16023302Sagiri 	bp1 = bp;
16033302Sagiri 	do {
16043302Sagiri 		/* prepare the header */
16053302Sagiri 		pktp = (rds_data_hdr_t *)(uintptr_t)bp1->buf_ds.ds_va;
16063302Sagiri 		pktp->dh_datalen = UserBufferSize;
16073302Sagiri 		pktp->dh_npkts = npkts - pktno;
16083302Sagiri 		pktp->dh_psn = pktno;
16093302Sagiri 		pktp->dh_sendport = sendport;
16103302Sagiri 		pktp->dh_recvport = recvport;
16113302Sagiri 		bp1->buf_ds.ds_len = RdsPktSize;
16123302Sagiri 
16133302Sagiri 		/* copy the data */
16143302Sagiri 		ret = uiomove((uint8_t *)pktp + RDS_DATA_HDR_SZ,
16153302Sagiri 		    UserBufferSize, UIO_WRITE, uiop);
16163302Sagiri 		if (ret != 0) {
16173302Sagiri 			break;
16183302Sagiri 		}
16193302Sagiri 
16203302Sagiri 		if (uiop->uio_resid == 0) {
16213302Sagiri 			pktp->dh_datalen = residual;
16223302Sagiri 			bp1->buf_ds.ds_len = residual + RDS_DATA_HDR_SZ;
16233302Sagiri 			break;
16243302Sagiri 		}
16253302Sagiri 		pktno++;
16263302Sagiri 		bp1 = bp1->buf_nextp;
16273302Sagiri 	} while (uiop->uio_resid);
16283302Sagiri 
16293302Sagiri 	if (ret) {
16303302Sagiri 		/* uiomove failed */
16313302Sagiri 		RDS_DPRINTF2("rds_build_n_post_msg", "UIO(%p) Move FAILED: %d",
16323302Sagiri 		    uiop, ret);
16333302Sagiri 		if (npkts > 1) {
16343302Sagiri 			kmem_free(wrp, npkts * sizeof (ibt_send_wr_t));
16353302Sagiri 		}
16363302Sagiri 		rds_free_send_buf(ep, bp, NULL, npkts, B_FALSE);
16373302Sagiri 		return (ret);
16383302Sagiri 	}
16393302Sagiri 
16403302Sagiri 	if (npkts > 1) {
16413302Sagiri 		/* multi-pkt message */
16423302Sagiri 		RDS_DPRINTF5(LABEL, "EP(%p) Sending Multiple Packets", ep);
16433302Sagiri 
16443302Sagiri 		bp1 = bp;
16453302Sagiri 		for (ix = 0; ix < npkts; ix++) {
16463302Sagiri 			wrp[ix].wr_id = (uintptr_t)bp1;
16473302Sagiri 			wrp[ix].wr_flags = IBT_WR_NO_FLAGS;
16483302Sagiri 			wrp[ix].wr_trans = IBT_RC_SRV;
16493302Sagiri 			wrp[ix].wr_opcode = IBT_WRC_SEND;
16503302Sagiri 			wrp[ix].wr_nds = 1;
16513302Sagiri 			wrp[ix].wr_sgl = &bp1->buf_ds;
16523302Sagiri 			bp1 = bp1->buf_nextp;
16533302Sagiri 		}
16543302Sagiri 		wrp[npkts - 1].wr_flags = IBT_WR_SEND_SOLICIT;
16553302Sagiri 
16563302Sagiri 		ret = ibt_post_send(ep->ep_chanhdl, wrp, npkts, &ix);
16573302Sagiri 		if (ret != IBT_SUCCESS) {
16583302Sagiri 			RDS_DPRINTF2(LABEL, "EP(%p): ibt_post_send failed: "
16593302Sagiri 			    "%d for %d pkts", ep, ret, npkts);
16603302Sagiri 			rds_free_send_buf(ep, bp, NULL, npkts, B_FALSE);
16613302Sagiri 			kmem_free(wrp, npkts * sizeof (ibt_send_wr_t));
16623302Sagiri 			return (ret);
16633302Sagiri 		}
16643302Sagiri 
16653302Sagiri 		kmem_free(wrp, npkts * sizeof (ibt_send_wr_t));
16663302Sagiri 	} else {
16673302Sagiri 		/* single pkt */
16683302Sagiri 		RDS_DPRINTF5(LABEL, "EP(%p) Sending Single Packet", ep);
16693302Sagiri 		wr.wr_id = (uintptr_t)bp;
16703302Sagiri 		wr.wr_flags = IBT_WR_SEND_SOLICIT;
16713302Sagiri 		wr.wr_trans = IBT_RC_SRV;
16723302Sagiri 		wr.wr_opcode = IBT_WRC_SEND;
16733302Sagiri 		wr.wr_nds = 1;
16743302Sagiri 		wr.wr_sgl = &bp->buf_ds;
16753302Sagiri 		RDS_DPRINTF5(LABEL, "ds_va %p ds_key 0x%llx ds_len %d ",
16763302Sagiri 		    bp->buf_ds.ds_va, bp->buf_ds.ds_key, bp->buf_ds.ds_len);
16773302Sagiri 		ret = ibt_post_send(ep->ep_chanhdl, &wr, 1, NULL);
16783302Sagiri 		if (ret != IBT_SUCCESS) {
16793302Sagiri 			RDS_DPRINTF2(LABEL, "EP(%p): ibt_post_send failed: "
16803302Sagiri 			    "%d", ep, ret);
16813302Sagiri 			rds_free_send_buf(ep, bp, NULL, 1, B_FALSE);
16823302Sagiri 			return (ret);
16833302Sagiri 		}
16843302Sagiri 	}
16853302Sagiri 
16863302Sagiri 	RDS_INCR_TXPKTS(npkts);
16873302Sagiri 	RDS_INCR_TXBYTES(msgsize);
16883302Sagiri 
16893302Sagiri 	RDS_DPRINTF4("rds_build_n_post_msg", "Return: EP(%p) UIOP(%p)",
16903302Sagiri 	    ep, uiop);
16913302Sagiri 
16923302Sagiri 	return (0);
16933302Sagiri }
16943302Sagiri 
16953302Sagiri static int
rds_deliver_loopback_msg(uio_t * uiop,ipaddr_t recvip,ipaddr_t sendip,in_port_t recvport,in_port_t sendport,zoneid_t zoneid)16963302Sagiri rds_deliver_loopback_msg(uio_t *uiop, ipaddr_t recvip, ipaddr_t sendip,
16973302Sagiri     in_port_t recvport, in_port_t sendport, zoneid_t zoneid)
16983302Sagiri {
16993302Sagiri 	mblk_t		*mp;
17003302Sagiri 	int		ret;
17013302Sagiri 
17023302Sagiri 	RDS_DPRINTF4("rds_deliver_loopback_msg", "Enter");
17033302Sagiri 
17043302Sagiri 	RDS_DPRINTF3(LABEL, "Loopback message: sendport: "
17053302Sagiri 	    "%d to recvport: %d", sendport, recvport);
17063302Sagiri 
17073302Sagiri 	mp = allocb(uiop->uio_resid, BPRI_MED);
17083302Sagiri 	if (mp == NULL) {
17093302Sagiri 		RDS_DPRINTF2(LABEL, "allocb failed, size: %d\n",
17103302Sagiri 		    uiop->uio_resid);
17113302Sagiri 		return (ENOSPC);
17123302Sagiri 	}
17133302Sagiri 	mp->b_wptr = mp->b_rptr + uiop->uio_resid;
17143302Sagiri 
17153302Sagiri 	ret = uiomove(mp->b_rptr, uiop->uio_resid, UIO_WRITE, uiop);
17163302Sagiri 	if (ret) {
17173302Sagiri 		RDS_DPRINTF2(LABEL, "ERROR: uiomove returned: %d", ret);
17183302Sagiri 		freeb(mp);
17193302Sagiri 		return (ret);
17203302Sagiri 	}
17213302Sagiri 
17223302Sagiri 	ret = rds_deliver_new_msg(mp, recvip, sendip, recvport, sendport,
17233302Sagiri 	    zoneid);
17243302Sagiri 	if (ret != 0) {
17253302Sagiri 		if (ret == ENOSPC) {
17263302Sagiri 			/*
17273302Sagiri 			 * The message is delivered but cannot take more,
17284467Sagiri 			 * stop further loopback traffic to this port
17293302Sagiri 			 */
17304467Sagiri 			RDS_DPRINTF3("rds_deliver_loopback_msg",
17314467Sagiri 			    "Port %d NO SPACE", recvport);
17324467Sagiri 			rds_stall_port(NULL, recvport, RDS_LOOPBACK);
17333302Sagiri 		} else {
17343302Sagiri 			RDS_DPRINTF2(LABEL, "Loopback message: port %d -> "
17353302Sagiri 			    "port %d failed: %d", sendport, recvport, ret);
17363302Sagiri 			return (ret);
17373302Sagiri 		}
17383302Sagiri 	}
17393302Sagiri 
17403302Sagiri 	RDS_DPRINTF4("rds_deliver_loopback_msg", "Return");
17413302Sagiri 	return (0);
17423302Sagiri }
17433302Sagiri 
17443302Sagiri static void
rds_resend_messages(void * arg)17453302Sagiri rds_resend_messages(void *arg)
17463302Sagiri {
17473302Sagiri 	rds_session_t	*sp = (rds_session_t *)arg;
17483302Sagiri 	rds_ep_t	*ep;
17493302Sagiri 	rds_bufpool_t	*spool;
17503302Sagiri 	rds_buf_t	*bp, *endp, *tmp;
17513302Sagiri 	ibt_send_wr_t	*wrp;
17523302Sagiri 	uint_t		nwr = 0, ix, jx;
17533302Sagiri 	int		ret;
17543302Sagiri 
17553302Sagiri 	RDS_DPRINTF2("rds_resend_messages", "Enter: SP(%p)", sp);
17563302Sagiri 
17573302Sagiri 	ep = &sp->session_dataep;
17583302Sagiri 
17593302Sagiri 	spool = &ep->ep_sndpool;
17603302Sagiri 	mutex_enter(&spool->pool_lock);
17613302Sagiri 
17623302Sagiri 	ASSERT(spool->pool_nfree == spool->pool_nbuffers);
17633302Sagiri 
17643302Sagiri 	if (ep->ep_lbufid == NULL) {
17653302Sagiri 		RDS_DPRINTF2("rds_resend_messages",
17663302Sagiri 		    "SP(%p) Remote session is cleaned up ", sp);
17673302Sagiri 		/*
17683302Sagiri 		 * The remote end cleaned up its session. There may be loss
17693302Sagiri 		 * of messages. Mark all buffers as acknowledged.
17703302Sagiri 		 */
17713302Sagiri 		tmp = spool->pool_tailp;
17723302Sagiri 	} else {
17733302Sagiri 		tmp = (rds_buf_t *)ep->ep_lbufid;
17743302Sagiri 		RDS_DPRINTF2("rds_resend_messages",
17753302Sagiri 		    "SP(%p) Last successful BP(%p) ", sp, tmp);
17763302Sagiri 	}
17773302Sagiri 
17783302Sagiri 	endp = spool->pool_tailp;
17793302Sagiri 	bp = spool->pool_headp;
17803302Sagiri 	jx = 0;
17813302Sagiri 	while ((bp != NULL) && (bp != tmp)) {
17823302Sagiri 		bp->buf_state = RDS_SNDBUF_FREE;
17833302Sagiri 		jx++;
17843302Sagiri 		bp = bp->buf_nextp;
17853302Sagiri 	}
17863302Sagiri 
17873302Sagiri 	if (bp == NULL) {
17883302Sagiri 		mutex_exit(&spool->pool_lock);
17893302Sagiri 		RDS_DPRINTF2("rds_resend_messages", "Alert: lbufid(%p) is not "
17903302Sagiri 		    "found in the list", tmp);
17913302Sagiri 
17923302Sagiri 		rw_enter(&sp->session_lock, RW_WRITER);
17933302Sagiri 		if (sp->session_state == RDS_SESSION_STATE_INIT) {
17943302Sagiri 			sp->session_state = RDS_SESSION_STATE_CONNECTED;
17953302Sagiri 		} else {
17963302Sagiri 			RDS_DPRINTF2("rds_resend_messages", "SP(%p) State: %d "
17973302Sagiri 			    "Expected State: %d", sp, sp->session_state,
17983302Sagiri 			    RDS_SESSION_STATE_CONNECTED);
17993302Sagiri 		}
18004467Sagiri 		sp->session_failover = 0;
18013302Sagiri 		rw_exit(&sp->session_lock);
18023302Sagiri 		return;
18033302Sagiri 	}
18043302Sagiri 
18053302Sagiri 	/* Found the match */
18063302Sagiri 	bp->buf_state = RDS_SNDBUF_FREE;
18073302Sagiri 	jx++;
18083302Sagiri 
18093302Sagiri 	spool->pool_tailp = bp;
18103302Sagiri 	bp = bp->buf_nextp;
18113302Sagiri 	spool->pool_tailp->buf_nextp = NULL;
18123302Sagiri 	nwr = spool->pool_nfree - jx;
18133302Sagiri 	spool->pool_nfree = jx;
18143302Sagiri 	mutex_exit(&spool->pool_lock);
18153302Sagiri 
18163302Sagiri 	RDS_DPRINTF2("rds_resend_messages", "SP(%p): Number of "
18173302Sagiri 	    "bufs (BP %p) to re-send: %d", sp, bp, nwr);
18183302Sagiri 
18193302Sagiri 	if (bp) {
18203302Sagiri 		wrp = (ibt_send_wr_t *)kmem_zalloc(sizeof (ibt_send_wr_t) * 100,
18213302Sagiri 		    KM_SLEEP);
18223302Sagiri 
18233302Sagiri 		while (nwr) {
18243302Sagiri 			jx = (nwr > 100) ? 100 : nwr;
18253302Sagiri 
18263302Sagiri 			tmp = bp;
18273302Sagiri 			for (ix = 0; ix < jx; ix++) {
18283302Sagiri 				bp->buf_state = RDS_SNDBUF_PENDING;
18293302Sagiri 				wrp[ix].wr_id = (uintptr_t)bp;
18303302Sagiri 				wrp[ix].wr_flags = IBT_WR_SEND_SOLICIT;
18313302Sagiri 				wrp[ix].wr_trans = IBT_RC_SRV;
18323302Sagiri 				wrp[ix].wr_opcode = IBT_WRC_SEND;
18333302Sagiri 				wrp[ix].wr_nds = 1;
18343302Sagiri 				wrp[ix].wr_sgl = &bp->buf_ds;
18353302Sagiri 				bp = bp->buf_nextp;
18363302Sagiri 			}
18373302Sagiri 
18383302Sagiri 			ret = ibt_post_send(ep->ep_chanhdl, wrp, jx, &ix);
18393302Sagiri 			if (ret != IBT_SUCCESS) {
18403302Sagiri 				RDS_DPRINTF2(LABEL, "EP(%p): ibt_post_send "
18413302Sagiri 				    "failed: %d for % pkts", ep, ret, jx);
18423302Sagiri 				break;
18433302Sagiri 			}
18443302Sagiri 
18453302Sagiri 			mutex_enter(&spool->pool_lock);
18463302Sagiri 			spool->pool_nbusy += jx;
18473302Sagiri 			mutex_exit(&spool->pool_lock);
18483302Sagiri 
18493302Sagiri 			nwr -= jx;
18503302Sagiri 		}
18513302Sagiri 
18523302Sagiri 		kmem_free(wrp, sizeof (ibt_send_wr_t) * 100);
18533302Sagiri 
18543302Sagiri 		if (nwr != 0) {
18553302Sagiri 
18563302Sagiri 			/*
18573302Sagiri 			 * An error while failover is in progress. Some WRs are
18583302Sagiri 			 * posted while other remain. If any of the posted WRs
18593302Sagiri 			 * complete in error then they would dispatch a taskq to
18603302Sagiri 			 * do a failover. Getting the session lock will prevent
18613302Sagiri 			 * the taskq to wait until we are done here.
18623302Sagiri 			 */
18633302Sagiri 			rw_enter(&sp->session_lock, RW_READER);
18643302Sagiri 
18653302Sagiri 			/*
18663302Sagiri 			 * Wait until all the previous WRs are completed and
18673302Sagiri 			 * then queue the remaining, otherwise the order of
18683302Sagiri 			 * the messages may change.
18693302Sagiri 			 */
18703302Sagiri 			(void) rds_is_sendq_empty(ep, 1);
18713302Sagiri 
18723302Sagiri 			/* free the remaining buffers */
18733302Sagiri 			rds_free_send_buf(ep, tmp, endp, nwr, B_FALSE);
18743302Sagiri 
18753302Sagiri 			rw_exit(&sp->session_lock);
18763302Sagiri 			return;
18773302Sagiri 		}
18783302Sagiri 	}
18793302Sagiri 
18803302Sagiri 	rw_enter(&sp->session_lock, RW_WRITER);
18813302Sagiri 	if (sp->session_state == RDS_SESSION_STATE_INIT) {
18823302Sagiri 		sp->session_state = RDS_SESSION_STATE_CONNECTED;
18833302Sagiri 	} else {
18843302Sagiri 		RDS_DPRINTF2("rds_resend_messages", "SP(%p) State: %d "
18853302Sagiri 		    "Expected State: %d", sp, sp->session_state,
18863302Sagiri 		    RDS_SESSION_STATE_CONNECTED);
18873302Sagiri 	}
18884467Sagiri 	sp->session_failover = 0;
18893302Sagiri 	rw_exit(&sp->session_lock);
18903302Sagiri 
18913302Sagiri 	RDS_DPRINTF2("rds_resend_messages", "Return: SP(%p)", sp);
18923302Sagiri }
18933302Sagiri 
18943302Sagiri /*
18953302Sagiri  * This is called when a channel is connected. Transition the session to
18963302Sagiri  * CONNECTED state iff both channels are connected.
18973302Sagiri  */
18983302Sagiri void
rds_session_active(rds_session_t * sp)18993302Sagiri rds_session_active(rds_session_t *sp)
19003302Sagiri {
19013302Sagiri 	rds_ep_t	*ep;
19023302Sagiri 	uint_t		failover;
19033302Sagiri 
19043302Sagiri 	RDS_DPRINTF2("rds_session_active", "Enter: 0x%p", sp);
19053302Sagiri 
19063302Sagiri 	rw_enter(&sp->session_lock, RW_READER);
19073302Sagiri 
19083302Sagiri 	failover = sp->session_failover;
19093302Sagiri 
19103302Sagiri 	/*
19113302Sagiri 	 * we establish the data channel first, so check the control channel
19123302Sagiri 	 * first but make sure it is initialized.
19133302Sagiri 	 */
19143302Sagiri 	ep = &sp->session_ctrlep;
19153302Sagiri 	mutex_enter(&ep->ep_lock);
19163302Sagiri 	if (ep->ep_state != RDS_EP_STATE_CONNECTED) {
19173302Sagiri 		/* the session is not ready yet */
19183302Sagiri 		mutex_exit(&ep->ep_lock);
19193302Sagiri 		rw_exit(&sp->session_lock);
19203302Sagiri 		return;
19213302Sagiri 	}
19223302Sagiri 	mutex_exit(&ep->ep_lock);
19233302Sagiri 
19243302Sagiri 	/* control channel is connected, check the data channel */
19253302Sagiri 	ep = &sp->session_dataep;
19263302Sagiri 	mutex_enter(&ep->ep_lock);
19273302Sagiri 	if (ep->ep_state != RDS_EP_STATE_CONNECTED) {
19283302Sagiri 		/* data channel is not yet connected */
19293302Sagiri 		mutex_exit(&ep->ep_lock);
19303302Sagiri 		rw_exit(&sp->session_lock);
19313302Sagiri 		return;
19323302Sagiri 	}
19333302Sagiri 	mutex_exit(&ep->ep_lock);
19343302Sagiri 
19353302Sagiri 	if (failover) {
19363302Sagiri 		rw_exit(&sp->session_lock);
19373302Sagiri 
19383302Sagiri 		/*
19393302Sagiri 		 * The session has failed over. Previous msgs have to be
19403302Sagiri 		 * re-sent before the session is moved to the connected
19413302Sagiri 		 * state.
19423302Sagiri 		 */
19433302Sagiri 		RDS_DPRINTF2("rds_session_active", "SP(%p) Dispatching taskq "
19443302Sagiri 		    "to re-send messages", sp);
19453302Sagiri 		(void) ddi_taskq_dispatch(rds_taskq,
19463302Sagiri 		    rds_resend_messages, (void *)sp, DDI_SLEEP);
19473302Sagiri 		return;
19483302Sagiri 	}
19493302Sagiri 
19503302Sagiri 	/* the session is ready */
19513302Sagiri 	sp->session_state = RDS_SESSION_STATE_CONNECTED;
19523302Sagiri 	RDS_DPRINTF3("rds_session_active",
19533302Sagiri 	    "SP(%p) State RDS_SESSION_STATE_CONNECTED", sp);
19543302Sagiri 
19553302Sagiri 	rw_exit(&sp->session_lock);
19563302Sagiri 
19573302Sagiri 	RDS_DPRINTF2("rds_session_active", "Return: SP(%p) is CONNECTED", sp);
19583302Sagiri }
19593302Sagiri 
19603302Sagiri static int
rds_ep_sendmsg(rds_ep_t * ep,uio_t * uiop,in_port_t sendport,in_port_t recvport)19613302Sagiri rds_ep_sendmsg(rds_ep_t *ep, uio_t *uiop, in_port_t sendport,
19623302Sagiri     in_port_t recvport)
19633302Sagiri {
19643302Sagiri 	int	ret;
19653302Sagiri 
19663302Sagiri 	RDS_DPRINTF4("rds_ep_sendmsg", "Enter: EP(%p) sendport: %d recvport: "
19673302Sagiri 	    "%d", ep, sendport, recvport);
19683302Sagiri 
19694467Sagiri 	/* make sure the remote port is not stalled */
19704467Sagiri 	if (rds_is_port_marked(ep->ep_sp, recvport, RDS_REMOTE)) {
19713302Sagiri 		RDS_DPRINTF2(LABEL, "SP(%p) Port:%d is in stall state",
19723302Sagiri 		    ep->ep_sp, recvport);
19733302Sagiri 		RDS_INCR_EWOULDBLOCK();
19743302Sagiri 		ret = ENOMEM;
19753302Sagiri 	} else {
19763302Sagiri 		ret = rds_build_n_post_msg(ep, uiop, sendport, recvport);
19773302Sagiri 	}
19783302Sagiri 
19793302Sagiri 	RDS_DPRINTF4("rds_ep_sendmsg", "Return: EP(%p)", ep);
19803302Sagiri 
19813302Sagiri 	return (ret);
19823302Sagiri }
19833302Sagiri 
19843302Sagiri /* Send a message to a destination socket */
19853302Sagiri int
rds_sendmsg(uio_t * uiop,ipaddr_t sendip,ipaddr_t recvip,in_port_t sendport,in_port_t recvport,zoneid_t zoneid)19863302Sagiri rds_sendmsg(uio_t *uiop, ipaddr_t sendip, ipaddr_t recvip, in_port_t sendport,
19873302Sagiri     in_port_t recvport, zoneid_t zoneid)
19883302Sagiri {
19893302Sagiri 	rds_session_t	*sp;
19903302Sagiri 	ib_gid_t	lgid, rgid;
19913302Sagiri 	int		ret;
19923302Sagiri 
19933302Sagiri 	RDS_DPRINTF4("rds_sendmsg", "Enter: uiop: 0x%p, srcIP: 0x%x destIP: "
19943302Sagiri 	    "0x%x sndport: %d recvport: %d", uiop, sendip, recvip,
19953302Sagiri 	    sendport, recvport);
19963302Sagiri 
19973302Sagiri 	/* If msg length is 0, just return success */
19983302Sagiri 	if (uiop->uio_resid == 0) {
19993302Sagiri 		RDS_DPRINTF2("rds_sendmsg", "Zero sized message");
20003302Sagiri 		return (0);
20013302Sagiri 	}
20023302Sagiri 
20033302Sagiri 	/* Is there a session to the destination? */
20043302Sagiri 	rw_enter(&rdsib_statep->rds_sessionlock, RW_READER);
20053302Sagiri 	sp = rds_session_lkup(rdsib_statep, recvip, 0);
20063302Sagiri 	rw_exit(&rdsib_statep->rds_sessionlock);
20073302Sagiri 
20083302Sagiri 	/* Is this a loopback message? */
20093302Sagiri 	if ((sp == NULL) && (rds_islocal(recvip))) {
20103302Sagiri 		/* make sure the port is not stalled */
20114467Sagiri 		if (rds_is_port_marked(NULL, recvport, RDS_LOOPBACK)) {
20123302Sagiri 			RDS_DPRINTF2(LABEL, "Local Port:%d is in stall state",
20133302Sagiri 			    recvport);
20143302Sagiri 			RDS_INCR_EWOULDBLOCK();
20153302Sagiri 			return (ENOMEM);
20163302Sagiri 		}
20173302Sagiri 		ret = rds_deliver_loopback_msg(uiop, recvip, sendip, recvport,
20183302Sagiri 		    sendport, zoneid);
20193302Sagiri 		return (ret);
20203302Sagiri 	}
20213302Sagiri 
20223302Sagiri 	/* Not a loopback message */
20233302Sagiri 	if (sp == NULL) {
20243302Sagiri 		/* There is no session to the destination, create one. */
20253302Sagiri 		RDS_DPRINTF3(LABEL, "There is no session to the destination "
20263302Sagiri 		    "IP: 0x%x", recvip);
20273302Sagiri 		sp = rds_session_create(rdsib_statep, sendip, recvip, NULL,
20283302Sagiri 		    RDS_SESSION_ACTIVE);
20293302Sagiri 		if (sp != NULL) {
20303302Sagiri 			rw_enter(&sp->session_lock, RW_WRITER);
20313302Sagiri 			if (sp->session_type == RDS_SESSION_ACTIVE) {
20323302Sagiri 				ret = rds_session_init(sp);
20333302Sagiri 				if (ret != 0) {
20343302Sagiri 					RDS_DPRINTF2("rds_sendmsg",
20353302Sagiri 					    "SP(%p): rds_session_init failed",
20363302Sagiri 					    sp);
20373302Sagiri 					sp->session_state =
20383302Sagiri 					    RDS_SESSION_STATE_FAILED;
20393302Sagiri 					RDS_DPRINTF3("rds_sendmsg",
20403302Sagiri 					    "SP(%p) State "
20413302Sagiri 					    "RDS_SESSION_STATE_FAILED", sp);
20423302Sagiri 					rw_exit(&sp->session_lock);
20433302Sagiri 					return (EFAULT);
20443302Sagiri 				}
20453302Sagiri 				sp->session_state = RDS_SESSION_STATE_INIT;
20463302Sagiri 				RDS_DPRINTF3("rds_sendmsg",
20473302Sagiri 				    "SP(%p) State "
20483302Sagiri 				    "RDS_SESSION_STATE_INIT", sp);
20493302Sagiri 				rw_exit(&sp->session_lock);
20503302Sagiri 				rds_session_open(sp);
20513302Sagiri 			} else {
20523302Sagiri 				rw_exit(&sp->session_lock);
20533302Sagiri 			}
20543302Sagiri 		} else {
20553302Sagiri 			/* Is a session created for this destination */
20563302Sagiri 			rw_enter(&rdsib_statep->rds_sessionlock, RW_READER);
20573302Sagiri 			sp = rds_session_lkup(rdsib_statep, recvip, 0);
20583302Sagiri 			rw_exit(&rdsib_statep->rds_sessionlock);
20593302Sagiri 			if (sp == NULL) {
20603302Sagiri 				return (EFAULT);
20613302Sagiri 			}
20623302Sagiri 		}
20633302Sagiri 	}
20643302Sagiri 
20653302Sagiri 	/* There is a session to the destination */
20663302Sagiri 	rw_enter(&sp->session_lock, RW_READER);
20673302Sagiri 	if (sp->session_state == RDS_SESSION_STATE_CONNECTED) {
20683302Sagiri 		rw_exit(&sp->session_lock);
20693302Sagiri 
20703302Sagiri 		ret = rds_ep_sendmsg(&sp->session_dataep, uiop, sendport,
20713302Sagiri 		    recvport);
20723302Sagiri 		return (ret);
20733302Sagiri 	} else if ((sp->session_state == RDS_SESSION_STATE_FAILED) ||
20743302Sagiri 	    (sp->session_state == RDS_SESSION_STATE_FINI)) {
20753302Sagiri 		ipaddr_t sendip1, recvip1;
20763302Sagiri 
20773302Sagiri 		RDS_DPRINTF3("rds_sendmsg", "SP(%p) is not connected, State: "
20786438Sagiri 		    "%d", sp, sp->session_state);
20793302Sagiri 		rw_exit(&sp->session_lock);
20803302Sagiri 		rw_enter(&sp->session_lock, RW_WRITER);
20813302Sagiri 		if ((sp->session_state == RDS_SESSION_STATE_FAILED) ||
20823302Sagiri 		    (sp->session_state == RDS_SESSION_STATE_FINI)) {
20834703Shiremath 			ibt_ip_path_attr_t	ipattr;
20844703Shiremath 			ibt_ip_addr_t		dstip;
20854703Shiremath 
20863302Sagiri 			sp->session_state = RDS_SESSION_STATE_CREATED;
20873302Sagiri 			sp->session_type = RDS_SESSION_ACTIVE;
20883302Sagiri 			RDS_DPRINTF3("rds_sendmsg", "SP(%p) State "
20893302Sagiri 			    "RDS_SESSION_STATE_CREATED", sp);
20903302Sagiri 			rw_exit(&sp->session_lock);
20913302Sagiri 
20923302Sagiri 
20933302Sagiri 			/* The ipaddr should be in the network order */
20943302Sagiri 			sendip1 = sendip;
20953302Sagiri 			recvip1 = recvip;
20963302Sagiri 			ret = rds_sc_path_lookup(&sendip1, &recvip1);
20973302Sagiri 			if (ret == 0) {
20983302Sagiri 				RDS_DPRINTF2(LABEL, "Path not found "
20993302Sagiri 				    "(0x%x 0x%x)", sendip1, recvip1);
21003302Sagiri 			}
21013302Sagiri 
21023302Sagiri 			/* Resolve the IP addresses */
21034467Sagiri 			lgid.gid_prefix = 0;
21044467Sagiri 			lgid.gid_guid = 0;
21054467Sagiri 			rgid.gid_prefix = 0;
21064467Sagiri 			rgid.gid_guid = 0;
21074703Shiremath 
21084703Shiremath 			bzero(&ipattr, sizeof (ibt_ip_path_attr_t));
21094703Shiremath 			dstip.family = AF_INET;
21107919SBill.Taylor@Sun.COM 			dstip.un.ip4addr = recvip1;
21114703Shiremath 			ipattr.ipa_dst_ip = &dstip;
21124703Shiremath 			ipattr.ipa_src_ip.family = AF_INET;
21137919SBill.Taylor@Sun.COM 			ipattr.ipa_src_ip.un.ip4addr = sendip1;
21144703Shiremath 			ipattr.ipa_ndst = 1;
21154703Shiremath 			ipattr.ipa_max_paths = 1;
21164703Shiremath 			RDS_DPRINTF2(LABEL, "ibt_get_ip_paths: 0x%x <-> 0x%x ",
21174703Shiremath 			    sendip1, recvip1);
21184703Shiremath 			ret = ibt_get_ip_paths(rdsib_statep->rds_ibhdl,
21194703Shiremath 			    IBT_PATH_NO_FLAGS, &ipattr, &sp->session_pinfo,
21204703Shiremath 			    NULL, NULL);
21214703Shiremath 			if (ret != IBT_SUCCESS) {
21226702Sagiri 				RDS_DPRINTF2("rds_sendmsg",
21234703Shiremath 				    "ibt_get_ip_paths failed, ret: %d ", ret);
21244467Sagiri 
21253302Sagiri 				rw_enter(&sp->session_lock, RW_WRITER);
21263302Sagiri 				if (sp->session_type == RDS_SESSION_ACTIVE) {
21273302Sagiri 					sp->session_state =
21283302Sagiri 					    RDS_SESSION_STATE_FAILED;
21293302Sagiri 					RDS_DPRINTF3("rds_sendmsg",
21303302Sagiri 					    "SP(%p) State "
21313302Sagiri 					    "RDS_SESSION_STATE_FAILED", sp);
21323302Sagiri 					rw_exit(&sp->session_lock);
21333302Sagiri 					return (EFAULT);
21343302Sagiri 				} else {
21353302Sagiri 					rw_exit(&sp->session_lock);
21363302Sagiri 					return (ENOMEM);
21373302Sagiri 				}
21383302Sagiri 			}
21394703Shiremath 			RDS_DPRINTF2(LABEL, "ibt_get_ip_paths success");
21404703Shiremath 			lgid = sp->session_pinfo.
21414703Shiremath 			    pi_prim_cep_path.cep_adds_vect.av_sgid;
21424703Shiremath 			rgid = sp->session_pinfo.
21434703Shiremath 			    pi_prim_cep_path.cep_adds_vect.av_dgid;
21443302Sagiri 
21453302Sagiri 			RDS_DPRINTF2(LABEL, "lgid: %llx:%llx rgid: %llx:%llx",
21463302Sagiri 			    lgid.gid_prefix, lgid.gid_guid, rgid.gid_prefix,
21473302Sagiri 			    rgid.gid_guid);
21483302Sagiri 
21493302Sagiri 			rw_enter(&sp->session_lock, RW_WRITER);
21503302Sagiri 			if (sp->session_type == RDS_SESSION_ACTIVE) {
21513302Sagiri 				sp->session_lgid = lgid;
21523302Sagiri 				sp->session_rgid = rgid;
21533302Sagiri 				ret = rds_session_init(sp);
21543302Sagiri 				if (ret != 0) {
21553302Sagiri 					RDS_DPRINTF2("rds_sendmsg",
21563302Sagiri 					    "SP(%p): rds_session_init failed",
21573302Sagiri 					    sp);
21583302Sagiri 					sp->session_state =
21593302Sagiri 					    RDS_SESSION_STATE_FAILED;
21603302Sagiri 					RDS_DPRINTF3("rds_sendmsg",
21613302Sagiri 					    "SP(%p) State "
21623302Sagiri 					    "RDS_SESSION_STATE_FAILED", sp);
21633302Sagiri 					rw_exit(&sp->session_lock);
21643302Sagiri 					return (EFAULT);
21653302Sagiri 				}
21663302Sagiri 				sp->session_state = RDS_SESSION_STATE_INIT;
21673302Sagiri 				rw_exit(&sp->session_lock);
21683302Sagiri 
21693302Sagiri 				rds_session_open(sp);
21703302Sagiri 
21713302Sagiri 			} else {
21724467Sagiri 				RDS_DPRINTF2("rds_sendmsg",
21734467Sagiri 				    "SP(%p): type changed to %d",
21744467Sagiri 				    sp, sp->session_type);
21753302Sagiri 				rw_exit(&sp->session_lock);
21763302Sagiri 				return (ENOMEM);
21773302Sagiri 			}
21783302Sagiri 		} else {
21794467Sagiri 			RDS_DPRINTF2("rds_sendmsg",
21804467Sagiri 			    "SP(%p): Session state %d changed",
21813302Sagiri 			    sp, sp->session_state);
21823302Sagiri 			rw_exit(&sp->session_lock);
21833302Sagiri 			return (ENOMEM);
21843302Sagiri 		}
21853302Sagiri 	} else {
21868082SRamaswamy.Tummala@Sun.COM 		RDS_DPRINTF4("rds_sendmsg", "SP(%p): Session is in %d state",
21873302Sagiri 		    sp, sp->session_state);
21883302Sagiri 		rw_exit(&sp->session_lock);
21893302Sagiri 		return (ENOMEM);
21903302Sagiri 	}
21913302Sagiri 
21923302Sagiri 	rw_enter(&sp->session_lock, RW_READER);
21933302Sagiri 	if (sp->session_state == RDS_SESSION_STATE_CONNECTED) {
21943302Sagiri 		rw_exit(&sp->session_lock);
21953302Sagiri 
21963302Sagiri 		ret = rds_ep_sendmsg(&sp->session_dataep, uiop, sendport,
21973302Sagiri 		    recvport);
21983302Sagiri 	} else {
21994467Sagiri 		RDS_DPRINTF2("rds_sendmsg", "SP(%p): state(%d) not connected",
22003302Sagiri 		    sp, sp->session_state);
22013302Sagiri 		rw_exit(&sp->session_lock);
22023302Sagiri 	}
22033302Sagiri 
22043302Sagiri 	RDS_DPRINTF4("rds_sendmsg", "Return: SP(%p) ret: %d", sp, ret);
22053302Sagiri 
22063302Sagiri 	return (ret);
22073302Sagiri }
22083302Sagiri 
22093302Sagiri /* Note: This is called on the CQ handler thread */
22103302Sagiri void
rds_received_msg(rds_ep_t * ep,rds_buf_t * bp)22113302Sagiri rds_received_msg(rds_ep_t *ep, rds_buf_t *bp)
22123302Sagiri {
22133302Sagiri 	mblk_t		*mp, *mp1;
22143302Sagiri 	rds_data_hdr_t	*pktp, *pktp1;
22153302Sagiri 	uint8_t		*datap;
22163302Sagiri 	rds_buf_t	*bp1;
22173302Sagiri 	rds_bufpool_t	*rpool;
22183302Sagiri 	uint_t		npkts, ix;
22193302Sagiri 	int		ret;
22203302Sagiri 
22213302Sagiri 	RDS_DPRINTF4("rds_received_msg", "Enter: EP(%p)", ep);
22223302Sagiri 
22233302Sagiri 	pktp = (rds_data_hdr_t *)(uintptr_t)bp->buf_ds.ds_va;
22243302Sagiri 	datap = ((uint8_t *)(uintptr_t)bp->buf_ds.ds_va) + RDS_DATA_HDR_SZ;
22253302Sagiri 	npkts = pktp->dh_npkts;
22263302Sagiri 
22273302Sagiri 	/* increment rx pending here */
22283302Sagiri 	rpool = &ep->ep_rcvpool;
22293302Sagiri 	mutex_enter(&rpool->pool_lock);
22303302Sagiri 	rpool->pool_nbusy += npkts;
22313302Sagiri 	mutex_exit(&rpool->pool_lock);
22323302Sagiri 
22333302Sagiri 	/* this will get freed by sockfs */
22343302Sagiri 	mp = esballoc(datap, pktp->dh_datalen, BPRI_HI, &bp->buf_frtn);
22353302Sagiri 	if (mp == NULL) {
22363302Sagiri 		RDS_DPRINTF2(LABEL, "EP(%p) BP(%p): allocb failed",
22373302Sagiri 		    ep, bp);
22383302Sagiri 		rds_free_recv_buf(bp, npkts);
22393302Sagiri 		return;
22403302Sagiri 	}
22413302Sagiri 	mp->b_wptr = datap + pktp->dh_datalen;
22423302Sagiri 	mp->b_datap->db_type = M_DATA;
22433302Sagiri 
22443302Sagiri 	mp1 = mp;
22453302Sagiri 	bp1 = bp->buf_nextp;
22463302Sagiri 	while (bp1 != NULL) {
22473302Sagiri 		pktp1 = (rds_data_hdr_t *)(uintptr_t)bp1->buf_ds.ds_va;
22483302Sagiri 		datap = ((uint8_t *)(uintptr_t)bp1->buf_ds.ds_va) +
22493302Sagiri 		    RDS_DATA_HDR_SZ;
22503302Sagiri 
22513302Sagiri 		mp1->b_cont = esballoc(datap, pktp1->dh_datalen,
22523302Sagiri 		    BPRI_HI, &bp1->buf_frtn);
22533302Sagiri 		if (mp1->b_cont == NULL) {
22543302Sagiri 			RDS_DPRINTF2(LABEL, "EP(%p) BP(%p): allocb failed",
22553302Sagiri 			    ep, bp1);
22563302Sagiri 			freemsg(mp);
22573302Sagiri 			rds_free_recv_buf(bp1, pktp1->dh_npkts);
22583302Sagiri 			return;
22593302Sagiri 		}
22603302Sagiri 		mp1 = mp1->b_cont;
22613302Sagiri 		mp1->b_wptr = datap + pktp1->dh_datalen;
22623302Sagiri 		mp1->b_datap->db_type = M_DATA;
22633302Sagiri 
22643302Sagiri 		bp1 = bp1->buf_nextp;
22653302Sagiri 	}
22663302Sagiri 
22673302Sagiri 	RDS_INCR_RXPKTS_PEND(npkts);
22683302Sagiri 	RDS_INCR_RXPKTS(npkts);
22693302Sagiri 	RDS_INCR_RXBYTES(msgdsize(mp));
22703302Sagiri 
22713302Sagiri 	RDS_DPRINTF5(LABEL, "Deliver Message: sendIP: 0x%x recvIP: 0x%x "
22723302Sagiri 	    "sendport: %d recvport: %d npkts: %d pktno: %d", ep->ep_remip,
22733302Sagiri 	    ep->ep_myip, pktp->dh_sendport, pktp->dh_recvport,
22743302Sagiri 	    npkts, pktp->dh_psn);
22753302Sagiri 
22763302Sagiri 	/* store the last buffer id, no lock needed */
22773302Sagiri 	if (npkts > 1) {
22783302Sagiri 		ep->ep_rbufid = pktp1->dh_bufid;
22793302Sagiri 	} else {
22803302Sagiri 		ep->ep_rbufid = pktp->dh_bufid;
22813302Sagiri 	}
22823302Sagiri 
22833302Sagiri 	ret = rds_deliver_new_msg(mp, ep->ep_myip, ep->ep_remip,
22843302Sagiri 	    pktp->dh_recvport, pktp->dh_sendport, ALL_ZONES);
22853302Sagiri 	if (ret != 0) {
22863302Sagiri 		if (ret == ENOSPC) {
22873302Sagiri 			/*
22883302Sagiri 			 * The message is delivered but cannot take more,
22894467Sagiri 			 * stop further remote messages coming to this port
22903302Sagiri 			 */
22914467Sagiri 			RDS_DPRINTF3("rds_received_msg", "Port %d NO SPACE",
22923302Sagiri 			    pktp->dh_recvport);
22934467Sagiri 			rds_stall_port(ep->ep_sp, pktp->dh_recvport, RDS_LOCAL);
22943302Sagiri 		} else {
22956702Sagiri 			RDS_DPRINTF2(LABEL, "rds_deliver_new_msg returned: %d",
22963302Sagiri 			    ret);
22973302Sagiri 		}
22983302Sagiri 	}
22993302Sagiri 
23003302Sagiri 	mutex_enter(&ep->ep_lock);
23016438Sagiri 	/* The first message can come in before the conn est event */
23026438Sagiri 	if ((ep->ep_rdmacnt == 0) && (ep->ep_state == RDS_EP_STATE_CONNECTED)) {
23033302Sagiri 		ep->ep_rdmacnt++;
23043302Sagiri 		*(uintptr_t *)(uintptr_t)ep->ep_ackds.ds_va = ep->ep_rbufid;
23053302Sagiri 		mutex_exit(&ep->ep_lock);
23063302Sagiri 
23073302Sagiri 		/* send acknowledgement */
23083302Sagiri 		RDS_INCR_TXACKS();
23093302Sagiri 		ret = ibt_post_send(ep->ep_chanhdl, &ep->ep_ackwr, 1, &ix);
23103302Sagiri 		if (ret != IBT_SUCCESS) {
23116702Sagiri 			RDS_DPRINTF2(LABEL, "EP(%p): ibt_post_send for "
23123302Sagiri 			    "acknowledgement failed: %d, SQ depth: %d",
23133302Sagiri 			    ep, ret, ep->ep_sndpool.pool_nbusy);
23143302Sagiri 			mutex_enter(&ep->ep_lock);
23153302Sagiri 			ep->ep_rdmacnt--;
23163302Sagiri 			mutex_exit(&ep->ep_lock);
23173302Sagiri 		}
23183302Sagiri 	} else {
23193302Sagiri 		/* no room to send acknowledgement */
23203302Sagiri 		mutex_exit(&ep->ep_lock);
23213302Sagiri 	}
23223302Sagiri 
23233302Sagiri 	RDS_DPRINTF4("rds_received_msg", "Return: EP(%p)", ep);
23243302Sagiri }
2325