xref: /onnv-gate/usr/src/uts/common/io/ib/clients/rds/rdsib_cm.c (revision 10489:180acaca223b)
13302Sagiri /*
23302Sagiri  * CDDL HEADER START
33302Sagiri  *
43302Sagiri  * The contents of this file are subject to the terms of the
53302Sagiri  * Common Development and Distribution License (the "License").
63302Sagiri  * You may not use this file except in compliance with the License.
73302Sagiri  *
83302Sagiri  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
93302Sagiri  * or http://www.opensolaris.org/os/licensing.
103302Sagiri  * See the License for the specific language governing permissions
113302Sagiri  * and limitations under the License.
123302Sagiri  *
133302Sagiri  * When distributing Covered Code, include this CDDL HEADER in each
143302Sagiri  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
153302Sagiri  * If applicable, add the following below this CDDL HEADER, with the
163302Sagiri  * fields enclosed by brackets "[]" replaced with your own identifying
173302Sagiri  * information: Portions Copyright [yyyy] [name of copyright owner]
183302Sagiri  *
193302Sagiri  * CDDL HEADER END
203302Sagiri  */
213302Sagiri /*
22*10489SGiri.Adari@Sun.COM  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
233302Sagiri  * Use is subject to license terms.
243302Sagiri  */
253302Sagiri /*
263302Sagiri  * Copyright (c) 2005 SilverStorm Technologies, Inc. All rights reserved.
273302Sagiri  *
283302Sagiri  * This software is available to you under a choice of one of two
293302Sagiri  * licenses.  You may choose to be licensed under the terms of the GNU
303302Sagiri  * General Public License (GPL) Version 2, available from the file
313302Sagiri  * COPYING in the main directory of this source tree, or the
323302Sagiri  * OpenIB.org BSD license below:
333302Sagiri  *
343302Sagiri  *     Redistribution and use in source and binary forms, with or
353302Sagiri  *     without modification, are permitted provided that the following
363302Sagiri  *     conditions are met:
373302Sagiri  *
383302Sagiri  *	- Redistributions of source code must retain the above
393302Sagiri  *	  copyright notice, this list of conditions and the following
403302Sagiri  *	  disclaimer.
413302Sagiri  *
423302Sagiri  *	- Redistributions in binary form must reproduce the above
433302Sagiri  *	  copyright notice, this list of conditions and the following
443302Sagiri  *	  disclaimer in the documentation and/or other materials
453302Sagiri  *	  provided with the distribution.
463302Sagiri  *
473302Sagiri  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
483302Sagiri  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
493302Sagiri  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
503302Sagiri  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
513302Sagiri  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
523302Sagiri  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
533302Sagiri  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
543302Sagiri  * SOFTWARE.
553302Sagiri  *
563302Sagiri  */
573302Sagiri /*
583302Sagiri  * Sun elects to include this software in Sun product
593302Sagiri  * under the OpenIB BSD license.
603302Sagiri  *
613302Sagiri  *
623302Sagiri  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
633302Sagiri  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
643302Sagiri  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
653302Sagiri  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
663302Sagiri  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
673302Sagiri  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
683302Sagiri  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
693302Sagiri  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
703302Sagiri  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
713302Sagiri  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
723302Sagiri  * POSSIBILITY OF SUCH DAMAGE.
733302Sagiri  */
743302Sagiri 
753302Sagiri #include <sys/ib/clients/rds/rdsib_cm.h>
763302Sagiri #include <sys/ib/clients/rds/rdsib_ib.h>
773302Sagiri #include <sys/ib/clients/rds/rdsib_buf.h>
783302Sagiri #include <sys/ib/clients/rds/rdsib_ep.h>
793302Sagiri 
803302Sagiri /*
813302Sagiri  * This file contains CM related work:
823302Sagiri  *
833302Sagiri  * Service registration/deregistration
843302Sagiri  * Path lookup
853302Sagiri  * CM connection callbacks
863302Sagiri  * CM active and passive connection establishment
873302Sagiri  * Connection failover
883302Sagiri  */
893302Sagiri 
904804Sagiri #define	SRCIP	src_addr.un.ip4addr
914804Sagiri #define	DSTIP	dst_addr.un.ip4addr
924804Sagiri 
933302Sagiri /*
943302Sagiri  * Handle an incoming CM REQ
953302Sagiri  */
963302Sagiri /* ARGSUSED */
973302Sagiri static ibt_cm_status_t
rds_handle_cm_req(rds_state_t * statep,ibt_cm_event_t * evp,ibt_cm_return_args_t * rargsp,void * rcmp,ibt_priv_data_len_t rcmp_len)983302Sagiri rds_handle_cm_req(rds_state_t *statep, ibt_cm_event_t *evp,
993302Sagiri     ibt_cm_return_args_t *rargsp, void *rcmp, ibt_priv_data_len_t rcmp_len)
1003302Sagiri {
1013302Sagiri 	ibt_cm_req_rcv_t	*reqp;
1023302Sagiri 	ib_gid_t		lgid, rgid;
1033302Sagiri 	rds_cm_private_data_t	cmp;
1043302Sagiri 	rds_session_t		*sp;
1053302Sagiri 	rds_ep_t		*ep;
1063302Sagiri 	ibt_channel_hdl_t	chanhdl;
1074804Sagiri 	ibt_ip_cm_info_t	ipcm_info;
108*10489SGiri.Adari@Sun.COM 	uint8_t			save_state, save_type;
1093302Sagiri 	int			ret;
1103302Sagiri 
1113302Sagiri 	RDS_DPRINTF2("rds_handle_cm_req", "Enter");
1123302Sagiri 
1133302Sagiri 	reqp = &evp->cm_event.req;
1143302Sagiri 	rgid = reqp->req_prim_addr.av_dgid; /* requester gid */
1153302Sagiri 	lgid = reqp->req_prim_addr.av_sgid; /* receiver gid */
1163302Sagiri 
1173302Sagiri 	RDS_DPRINTF2(LABEL, "REQ Received: From: %llx:%llx To: %llx:%llx",
1183302Sagiri 	    rgid.gid_prefix, rgid.gid_guid, lgid.gid_prefix, lgid.gid_guid);
1193302Sagiri 
1203302Sagiri 	/*
1213302Sagiri 	 * CM private data brings IP information
1223302Sagiri 	 * Private data received is a stream of bytes and may not be properly
1233302Sagiri 	 * aligned. So, bcopy the data onto the stack before accessing it.
1243302Sagiri 	 */
1253302Sagiri 	bcopy((uint8_t *)evp->cm_priv_data, &cmp,
1263302Sagiri 	    sizeof (rds_cm_private_data_t));
1273302Sagiri 
1284804Sagiri 	/* extract the CM IP info */
1294804Sagiri 	ret = ibt_get_ip_data(evp->cm_priv_data_len, evp->cm_priv_data,
1304804Sagiri 	    &ipcm_info);
1314804Sagiri 	if (ret != IBT_SUCCESS) {
1324804Sagiri 		RDS_DPRINTF2("rds_handle_cm_req", "ibt_get_ip_data failed: %d",
1334804Sagiri 		    ret);
1344804Sagiri 		return (IBT_CM_REJECT);
1354804Sagiri 	}
1364804Sagiri 
1374804Sagiri 	RDS_DPRINTF2("rds_handle_cm_req",
1384804Sagiri 	    "REQ Received: From IP: 0x%x To IP: 0x%x type: %d",
1397919SBill.Taylor@Sun.COM 	    ipcm_info.SRCIP, ipcm_info.DSTIP, cmp.cmp_eptype);
1403302Sagiri 
1413302Sagiri 	if (cmp.cmp_version != RDS_VERSION) {
1426702Sagiri 		RDS_DPRINTF2(LABEL, "Version Mismatch: Local version: %d "
1433302Sagiri 		    "Remote version: %d", RDS_VERSION, cmp.cmp_version);
1443302Sagiri 		return (IBT_CM_REJECT);
1453302Sagiri 	}
1463302Sagiri 
1474804Sagiri 	/* RDS supports V4 addresses only */
1484804Sagiri 	if ((ipcm_info.src_addr.family != AF_INET) ||
1494804Sagiri 	    (ipcm_info.dst_addr.family != AF_INET)) {
1504804Sagiri 		RDS_DPRINTF2(LABEL, "Unsupported Address Family: "
1514804Sagiri 		    "src: %d dst: %d", ipcm_info.src_addr.family,
1524804Sagiri 		    ipcm_info.dst_addr.family);
1534804Sagiri 		return (IBT_CM_REJECT);
1544804Sagiri 	}
1554804Sagiri 
1563302Sagiri 	if (cmp.cmp_arch != RDS_THIS_ARCH) {
1573302Sagiri 		RDS_DPRINTF2(LABEL, "ARCH does not match (%d != %d)",
1583302Sagiri 		    cmp.cmp_arch, RDS_THIS_ARCH);
1593302Sagiri 		return (IBT_CM_REJECT);
1603302Sagiri 	}
1613302Sagiri 
1623302Sagiri 	if ((cmp.cmp_eptype != RDS_EP_TYPE_CTRL) &&
1633302Sagiri 	    (cmp.cmp_eptype != RDS_EP_TYPE_DATA)) {
1643302Sagiri 		RDS_DPRINTF2(LABEL, "Unknown Channel type: %d", cmp.cmp_eptype);
1653302Sagiri 		return (IBT_CM_REJECT);
1663302Sagiri 	}
1673302Sagiri 
1683302Sagiri 	/* user_buffer_size should be same on all nodes */
1693302Sagiri 	if (cmp.cmp_user_buffer_size != UserBufferSize) {
1703302Sagiri 		RDS_DPRINTF2(LABEL,
1713302Sagiri 		    "UserBufferSize Mismatch, this node: %d remote node: %d",
1723302Sagiri 		    UserBufferSize, cmp.cmp_user_buffer_size);
1733302Sagiri 		return (IBT_CM_REJECT);
1743302Sagiri 	}
1753302Sagiri 
1764154Sagiri 	/*
1774154Sagiri 	 * RDS needs more time to process a failover REQ so send an MRA.
1784154Sagiri 	 * Otherwise, the remote may retry the REQ and fail the connection.
1794154Sagiri 	 */
1804154Sagiri 	if ((cmp.cmp_failover) && (cmp.cmp_eptype == RDS_EP_TYPE_DATA)) {
1814154Sagiri 		RDS_DPRINTF2("rds_handle_cm_req", "Session Failover, send MRA");
1824154Sagiri 		(void) ibt_cm_delay(IBT_CM_DELAY_REQ, evp->cm_session_id,
1834154Sagiri 		    10000000 /* 10 sec */, NULL, 0);
1844154Sagiri 	}
1854154Sagiri 
1863302Sagiri 	/* Is there a session to the destination node? */
1873302Sagiri 	rw_enter(&statep->rds_sessionlock, RW_READER);
1887919SBill.Taylor@Sun.COM 	sp = rds_session_lkup(statep, ipcm_info.SRCIP, rgid.gid_guid);
1893302Sagiri 	rw_exit(&statep->rds_sessionlock);
1903302Sagiri 
1913302Sagiri 	if (sp == NULL) {
1923302Sagiri 		/*
1933302Sagiri 		 * currently there is no session to the destination
1943302Sagiri 		 * remote ip in the private data is the local ip and vice
1953302Sagiri 		 * versa
1963302Sagiri 		 */
1977919SBill.Taylor@Sun.COM 		sp = rds_session_create(statep, ipcm_info.DSTIP,
1987919SBill.Taylor@Sun.COM 		    ipcm_info.SRCIP, reqp, RDS_SESSION_PASSIVE);
1993302Sagiri 		if (sp == NULL) {
2003302Sagiri 			/* Check the list anyway. */
2013302Sagiri 			rw_enter(&statep->rds_sessionlock, RW_READER);
2027919SBill.Taylor@Sun.COM 			sp = rds_session_lkup(statep, ipcm_info.SRCIP,
2033302Sagiri 			    rgid.gid_guid);
2043302Sagiri 			rw_exit(&statep->rds_sessionlock);
2053302Sagiri 			if (sp == NULL) {
2063302Sagiri 				/*
2073302Sagiri 				 * The only way this can fail is due to lack
2083302Sagiri 				 * of kernel resources
2093302Sagiri 				 */
2103302Sagiri 				return (IBT_CM_REJECT);
2113302Sagiri 			}
2123302Sagiri 		}
2133302Sagiri 	}
2143302Sagiri 
2153302Sagiri 	rw_enter(&sp->session_lock, RW_WRITER);
2163302Sagiri 
2173302Sagiri 	/* catch peer-to-peer case as soon as possible */
2184467Sagiri 	if ((sp->session_state == RDS_SESSION_STATE_CREATED) ||
2194467Sagiri 	    (sp->session_state == RDS_SESSION_STATE_INIT)) {
2203302Sagiri 		/* Check possible peer-to-peer case here */
2213302Sagiri 		if (sp->session_type != RDS_SESSION_PASSIVE) {
2224467Sagiri 			RDS_DPRINTF2("rds_handle_cm_req",
2234467Sagiri 			    "SP(%p) Peer-peer connection handling", sp);
2243302Sagiri 			if (lgid.gid_guid > rgid.gid_guid) {
2253302Sagiri 				/* this node is active so reject this request */
2263302Sagiri 				rw_exit(&sp->session_lock);
2273302Sagiri 				return (IBT_CM_REJECT);
2283302Sagiri 			} else {
2293302Sagiri 				/* this node is passive, change the session */
2303302Sagiri 				sp->session_type = RDS_SESSION_PASSIVE;
2313302Sagiri 				sp->session_lgid = lgid;
2323302Sagiri 				sp->session_rgid = rgid;
2333302Sagiri 			}
2343302Sagiri 		}
2353302Sagiri 	}
2363302Sagiri 
2373302Sagiri 	RDS_DPRINTF2(LABEL, "SP(%p) state: %d", sp, sp->session_state);
238*10489SGiri.Adari@Sun.COM 	save_state = sp->session_state;
239*10489SGiri.Adari@Sun.COM 	save_type = sp->session_type;
2403302Sagiri 
2413302Sagiri 	switch (sp->session_state) {
2423302Sagiri 	case RDS_SESSION_STATE_CONNECTED:
2433302Sagiri 		RDS_DPRINTF2(LABEL, "STALE Session Detected SP(%p)", sp);
2443302Sagiri 		sp->session_state = RDS_SESSION_STATE_ERROR;
2453302Sagiri 		RDS_DPRINTF3("rds_handle_cm_req", "SP(%p) State "
2463302Sagiri 		    "RDS_SESSION_STATE_ERROR", sp);
2473302Sagiri 
2483302Sagiri 		/* FALLTHRU */
2493302Sagiri 	case RDS_SESSION_STATE_ERROR:
2503302Sagiri 	case RDS_SESSION_STATE_PASSIVE_CLOSING:
251*10489SGiri.Adari@Sun.COM 		/*
252*10489SGiri.Adari@Sun.COM 		 * Some other thread must be processing this session,
253*10489SGiri.Adari@Sun.COM 		 * this thread must wait until the other thread finishes.
254*10489SGiri.Adari@Sun.COM 		 */
2553302Sagiri 		sp->session_type = RDS_SESSION_PASSIVE;
2563302Sagiri 		rw_exit(&sp->session_lock);
2573302Sagiri 
2586438Sagiri 		/* Handling this will take some time, so send an MRA */
2596438Sagiri 		(void) ibt_cm_delay(IBT_CM_DELAY_REQ, evp->cm_session_id,
2606438Sagiri 		    10000000 /* 10 sec */, NULL, 0);
2616438Sagiri 
2626438Sagiri 		/*
2636438Sagiri 		 * Any pending completions don't get flushed until the channel
2646438Sagiri 		 * is closed. So, passing 0 here will not wait for pending
2656438Sagiri 		 * completions in rds_session_close before closing the channel
2666438Sagiri 		 */
2676438Sagiri 		rds_session_close(sp, IBT_NOCALLBACKS, 0);
2683302Sagiri 
2693302Sagiri 		rw_enter(&sp->session_lock, RW_WRITER);
270*10489SGiri.Adari@Sun.COM 
271*10489SGiri.Adari@Sun.COM 		/*
272*10489SGiri.Adari@Sun.COM 		 * If the session was in ERROR, then either a failover thread
273*10489SGiri.Adari@Sun.COM 		 * or event_failure thread would be processing this session.
274*10489SGiri.Adari@Sun.COM 		 * This thread should wait for event_failure thread to
275*10489SGiri.Adari@Sun.COM 		 * complete. This need not wait for failover thread.
276*10489SGiri.Adari@Sun.COM 		 */
277*10489SGiri.Adari@Sun.COM 		if ((save_state != RDS_SESSION_STATE_CONNECTED) &&
278*10489SGiri.Adari@Sun.COM 		    (save_type == RDS_SESSION_PASSIVE)) {
279*10489SGiri.Adari@Sun.COM 				/*
280*10489SGiri.Adari@Sun.COM 				 * The other thread is event_failure thread,
281*10489SGiri.Adari@Sun.COM 				 * wait until it finishes.
282*10489SGiri.Adari@Sun.COM 				 */
283*10489SGiri.Adari@Sun.COM 				while (!((sp->session_state ==
284*10489SGiri.Adari@Sun.COM 				    RDS_SESSION_STATE_FAILED) ||
285*10489SGiri.Adari@Sun.COM 				    (sp->session_state ==
286*10489SGiri.Adari@Sun.COM 				    RDS_SESSION_STATE_FINI))) {
287*10489SGiri.Adari@Sun.COM 					rw_exit(&sp->session_lock);
288*10489SGiri.Adari@Sun.COM 					delay(drv_usectohz(1000000));
289*10489SGiri.Adari@Sun.COM 					rw_enter(&sp->session_lock, RW_WRITER);
290*10489SGiri.Adari@Sun.COM 				}
2914154Sagiri 		}
2923302Sagiri 
293*10489SGiri.Adari@Sun.COM 		/* move the session to init state */
294*10489SGiri.Adari@Sun.COM 		if ((sp->session_state == RDS_SESSION_STATE_ERROR) ||
295*10489SGiri.Adari@Sun.COM 		    (sp->session_state == RDS_SESSION_STATE_PASSIVE_CLOSING)) {
296*10489SGiri.Adari@Sun.COM 			ret = rds_session_reinit(sp, lgid);
297*10489SGiri.Adari@Sun.COM 			sp->session_myip = ipcm_info.DSTIP;
298*10489SGiri.Adari@Sun.COM 			sp->session_lgid = lgid;
299*10489SGiri.Adari@Sun.COM 			sp->session_rgid = rgid;
300*10489SGiri.Adari@Sun.COM 			if (ret != 0) {
301*10489SGiri.Adari@Sun.COM 				rds_session_fini(sp);
302*10489SGiri.Adari@Sun.COM 				sp->session_state = RDS_SESSION_STATE_FAILED;
303*10489SGiri.Adari@Sun.COM 				RDS_DPRINTF3("rds_handle_cm_req",
304*10489SGiri.Adari@Sun.COM 				    "SP(%p) State RDS_SESSION_STATE_FAILED",
305*10489SGiri.Adari@Sun.COM 				    sp);
306*10489SGiri.Adari@Sun.COM 				rw_exit(&sp->session_lock);
307*10489SGiri.Adari@Sun.COM 				return (IBT_CM_REJECT);
308*10489SGiri.Adari@Sun.COM 			} else {
309*10489SGiri.Adari@Sun.COM 				sp->session_state = RDS_SESSION_STATE_INIT;
310*10489SGiri.Adari@Sun.COM 				RDS_DPRINTF3("rds_handle_cm_req",
311*10489SGiri.Adari@Sun.COM 				    "SP(%p) State RDS_SESSION_STATE_INIT", sp);
312*10489SGiri.Adari@Sun.COM 			}
313*10489SGiri.Adari@Sun.COM 
314*10489SGiri.Adari@Sun.COM 			if (cmp.cmp_eptype == RDS_EP_TYPE_CTRL) {
315*10489SGiri.Adari@Sun.COM 				ep = &sp->session_ctrlep;
316*10489SGiri.Adari@Sun.COM 			} else {
317*10489SGiri.Adari@Sun.COM 				ep = &sp->session_dataep;
318*10489SGiri.Adari@Sun.COM 			}
319*10489SGiri.Adari@Sun.COM 			break;
3203302Sagiri 		}
321*10489SGiri.Adari@Sun.COM 
322*10489SGiri.Adari@Sun.COM 		/* FALLTHRU */
3233302Sagiri 	case RDS_SESSION_STATE_CREATED:
3243302Sagiri 	case RDS_SESSION_STATE_FAILED:
3253302Sagiri 	case RDS_SESSION_STATE_FINI:
3263302Sagiri 		/*
3273302Sagiri 		 * Initialize both channels, we accept this connection
3283302Sagiri 		 * only if both channels are initialized
3293302Sagiri 		 */
3304467Sagiri 		sp->session_type = RDS_SESSION_PASSIVE;
3314467Sagiri 		sp->session_lgid = lgid;
3324467Sagiri 		sp->session_rgid = rgid;
3333302Sagiri 		sp->session_state = RDS_SESSION_STATE_CREATED;
3343302Sagiri 		RDS_DPRINTF3("rds_handle_cm_req", "SP(%p) State "
3353302Sagiri 		    "RDS_SESSION_STATE_CREATED", sp);
3363302Sagiri 		ret = rds_session_init(sp);
3373302Sagiri 		if (ret != 0) {
3383302Sagiri 			/* Seems like there are not enough resources */
3393302Sagiri 			sp->session_state = RDS_SESSION_STATE_FAILED;
3403302Sagiri 			RDS_DPRINTF3("rds_handle_cm_req", "SP(%p) State "
3413302Sagiri 			    "RDS_SESSION_STATE_FAILED", sp);
3423302Sagiri 			rw_exit(&sp->session_lock);
3433302Sagiri 			return (IBT_CM_REJECT);
3443302Sagiri 		}
3453302Sagiri 		sp->session_state = RDS_SESSION_STATE_INIT;
3463302Sagiri 		RDS_DPRINTF3("rds_handle_cm_req", "SP(%p) State "
3473302Sagiri 		    "RDS_SESSION_STATE_INIT", sp);
3483302Sagiri 
3493302Sagiri 		/* FALLTHRU */
3503302Sagiri 	case RDS_SESSION_STATE_INIT:
3514467Sagiri 		/*
3524467Sagiri 		 * When re-using an existing session, make sure the
3534467Sagiri 		 * session is still through the same HCA. Otherwise, the
3544467Sagiri 		 * memory registrations have to moved to the new HCA.
3554467Sagiri 		 */
3564467Sagiri 		if (cmp.cmp_eptype == RDS_EP_TYPE_DATA) {
3574467Sagiri 			if (sp->session_lgid.gid_guid != lgid.gid_guid) {
3584467Sagiri 				RDS_DPRINTF2("rds_handle_cm_req",
3594467Sagiri 				    "Existing Session but different gid "
3604467Sagiri 				    "existing: 0x%llx, new: 0x%llx, "
3614467Sagiri 				    "sending an MRA",
3624467Sagiri 				    sp->session_lgid.gid_guid, lgid.gid_guid);
3634467Sagiri 				(void) ibt_cm_delay(IBT_CM_DELAY_REQ,
3644467Sagiri 				    evp->cm_session_id, 10000000 /* 10 sec */,
3654467Sagiri 				    NULL, 0);
3664467Sagiri 				ret = rds_session_reinit(sp, lgid);
3674467Sagiri 				if (ret != 0) {
3684467Sagiri 					rds_session_fini(sp);
3694467Sagiri 					sp->session_state =
3704467Sagiri 					    RDS_SESSION_STATE_FAILED;
3714467Sagiri 					sp->session_failover = 0;
3724467Sagiri 					RDS_DPRINTF3("rds_failover_session",
3734467Sagiri 					    "SP(%p) State "
3744467Sagiri 					    "RDS_SESSION_STATE_FAILED", sp);
3754467Sagiri 					rw_exit(&sp->session_lock);
3764467Sagiri 					return (IBT_CM_REJECT);
3774467Sagiri 				}
3784467Sagiri 			}
3794467Sagiri 			ep = &sp->session_dataep;
3804467Sagiri 		} else {
3813302Sagiri 			ep = &sp->session_ctrlep;
3823302Sagiri 		}
3833302Sagiri 
3843302Sagiri 		break;
3853302Sagiri 	default:
3863302Sagiri 		RDS_DPRINTF2(LABEL, "ERROR: SP(%p) is in an unexpected "
3873302Sagiri 		    "state: %d", sp, sp->session_state);
3883302Sagiri 		rw_exit(&sp->session_lock);
3893302Sagiri 		return (IBT_CM_REJECT);
3903302Sagiri 	}
3913302Sagiri 
3924467Sagiri 	sp->session_failover = 0; /* reset any previous value */
3933302Sagiri 	if (cmp.cmp_failover) {
3943302Sagiri 		RDS_DPRINTF2("rds_handle_cm_req",
3953302Sagiri 		    "SP(%p) Failover Session (BP %p)", sp, cmp.cmp_last_bufid);
3964467Sagiri 		sp->session_failover = 1;
3973302Sagiri 	}
3983302Sagiri 
3993302Sagiri 	mutex_enter(&ep->ep_lock);
4003302Sagiri 	if (ep->ep_state == RDS_EP_STATE_UNCONNECTED) {
4013302Sagiri 		ep->ep_state = RDS_EP_STATE_PASSIVE_PENDING;
4023302Sagiri 		sp->session_type = RDS_SESSION_PASSIVE;
4033302Sagiri 		rw_exit(&sp->session_lock);
4043302Sagiri 	} else if (ep->ep_state == RDS_EP_STATE_ACTIVE_PENDING) {
4053302Sagiri 		rw_exit(&sp->session_lock);
4063302Sagiri 		/*
4073302Sagiri 		 * Peer to peer connection. There is an active
4083302Sagiri 		 * connection pending on this ep. The one with
4093302Sagiri 		 * greater port guid becomes active and the
4103302Sagiri 		 * other becomes passive.
4113302Sagiri 		 */
4124467Sagiri 		RDS_DPRINTF2("rds_handle_cm_req",
4134467Sagiri 		    "EP(%p) Peer-peer connection handling", ep);
4143302Sagiri 		if (lgid.gid_guid > rgid.gid_guid) {
4153302Sagiri 			/* this node is active so reject this request */
4163302Sagiri 			mutex_exit(&ep->ep_lock);
4173302Sagiri 			RDS_DPRINTF2(LABEL, "SP(%p) EP(%p): "
4183302Sagiri 			    "Rejecting passive in favor of active", sp, ep);
4193302Sagiri 			return (IBT_CM_REJECT);
4203302Sagiri 		} else {
4213302Sagiri 			/*
4223302Sagiri 			 * This session is not the active end, change it
4233302Sagiri 			 * to passive end.
4243302Sagiri 			 */
4253302Sagiri 			ep->ep_state = RDS_EP_STATE_PASSIVE_PENDING;
4263302Sagiri 
4273302Sagiri 			rw_enter(&sp->session_lock, RW_WRITER);
4283302Sagiri 			sp->session_type = RDS_SESSION_PASSIVE;
4293302Sagiri 			sp->session_lgid = lgid;
4303302Sagiri 			sp->session_rgid = rgid;
4313302Sagiri 			rw_exit(&sp->session_lock);
4323302Sagiri 		}
4333302Sagiri 	} else {
4343302Sagiri 		rw_exit(&sp->session_lock);
4353302Sagiri 	}
4363302Sagiri 
4373302Sagiri 	ep->ep_lbufid = cmp.cmp_last_bufid;
4383302Sagiri 	ep->ep_ackwr.wr.rc.rcwr.rdma.rdma_raddr = (ib_vaddr_t)cmp.cmp_ack_addr;
4393302Sagiri 	ep->ep_ackwr.wr.rc.rcwr.rdma.rdma_rkey = cmp.cmp_ack_rkey;
4403302Sagiri 	cmp.cmp_last_bufid = ep->ep_rbufid;
4413302Sagiri 	cmp.cmp_ack_addr = ep->ep_ack_addr;
4423302Sagiri 	cmp.cmp_ack_rkey = ep->ep_ack_rkey;
4433302Sagiri 	mutex_exit(&ep->ep_lock);
4443302Sagiri 
4453302Sagiri 	/* continue with accepting the connection request for this channel */
4463302Sagiri 	chanhdl = rds_ep_alloc_rc_channel(ep, reqp->req_prim_hca_port);
4473302Sagiri 	if (chanhdl == NULL) {
4483302Sagiri 		mutex_enter(&ep->ep_lock);
4493302Sagiri 		ep->ep_state = RDS_EP_STATE_UNCONNECTED;
4503302Sagiri 		mutex_exit(&ep->ep_lock);
4513302Sagiri 		return (IBT_CM_REJECT);
4523302Sagiri 	}
4533302Sagiri 
4543302Sagiri 	/* pre-post recv buffers in the RQ */
4553302Sagiri 	rds_post_recv_buf((void *)chanhdl);
4563302Sagiri 
4573302Sagiri 	rargsp->cm_ret_len = sizeof (rds_cm_private_data_t);
4583302Sagiri 	bcopy((uint8_t *)&cmp, rcmp, sizeof (rds_cm_private_data_t));
4593302Sagiri 	rargsp->cm_ret.rep.cm_channel = chanhdl;
4603302Sagiri 	rargsp->cm_ret.rep.cm_rdma_ra_out = 4;
4613302Sagiri 	rargsp->cm_ret.rep.cm_rdma_ra_in = 4;
4623302Sagiri 	rargsp->cm_ret.rep.cm_rnr_retry_cnt = MinRnrRetry;
4633302Sagiri 
4643302Sagiri 	RDS_DPRINTF2("rds_handle_cm_req", "Return: SP(%p) EP(%p) Chan (%p)",
4653302Sagiri 	    sp, ep, chanhdl);
4663302Sagiri 
4673302Sagiri 	return (IBT_CM_ACCEPT);
4683302Sagiri }
4693302Sagiri 
4703302Sagiri /*
4713302Sagiri  * Handle an incoming CM REP
4723302Sagiri  * Pre-post recv buffers for the QP
4733302Sagiri  */
4743302Sagiri /* ARGSUSED */
4753302Sagiri static ibt_cm_status_t
rds_handle_cm_rep(ibt_cm_event_t * evp,ibt_cm_return_args_t * rargsp,void * rcmp,ibt_priv_data_len_t rcmp_len)4763302Sagiri rds_handle_cm_rep(ibt_cm_event_t *evp, ibt_cm_return_args_t *rargsp,
4773302Sagiri     void *rcmp, ibt_priv_data_len_t rcmp_len)
4783302Sagiri {
4793302Sagiri 	rds_ep_t	*ep;
4803302Sagiri 	rds_cm_private_data_t	cmp;
4813302Sagiri 
4823302Sagiri 	RDS_DPRINTF2("rds_handle_cm_rep", "Enter");
4833302Sagiri 
4843302Sagiri 	/* pre-post recv buffers in the RQ */
4853302Sagiri 	rds_post_recv_buf((void *)evp->cm_channel);
4863302Sagiri 
4873302Sagiri 	ep = (rds_ep_t *)ibt_get_chan_private(evp->cm_channel);
4883302Sagiri 	bcopy((uint8_t *)evp->cm_priv_data, &cmp,
4893302Sagiri 	    sizeof (rds_cm_private_data_t));
4903302Sagiri 	ep->ep_lbufid = cmp.cmp_last_bufid;
4913302Sagiri 	ep->ep_ackwr.wr.rc.rcwr.rdma.rdma_raddr = (ib_vaddr_t)cmp.cmp_ack_addr;
4923302Sagiri 	ep->ep_ackwr.wr.rc.rcwr.rdma.rdma_rkey = cmp.cmp_ack_rkey;
4933302Sagiri 
4943302Sagiri 	rargsp->cm_ret_len = 0;
4953302Sagiri 
4963302Sagiri 	RDS_DPRINTF2("rds_handle_cm_rep", "Return: lbufid: %p", ep->ep_lbufid);
4973302Sagiri 
4983302Sagiri 	return (IBT_CM_ACCEPT);
4993302Sagiri }
5003302Sagiri 
5013302Sagiri /*
5023302Sagiri  * Handle CONN EST
5033302Sagiri  */
5043302Sagiri static ibt_cm_status_t
rds_handle_cm_conn_est(ibt_cm_event_t * evp)5053302Sagiri rds_handle_cm_conn_est(ibt_cm_event_t *evp)
5063302Sagiri {
5073302Sagiri 	rds_session_t	*sp;
5083302Sagiri 	rds_ep_t	*ep;
5093302Sagiri 
5103302Sagiri 	ep = (rds_ep_t *)ibt_get_chan_private(evp->cm_channel);
5113302Sagiri 
5123302Sagiri 	RDS_DPRINTF2("rds_handle_cm_conn_est", "EP(%p) State: %d", ep,
5133302Sagiri 	    ep->ep_state);
5143302Sagiri 
5153302Sagiri 	mutex_enter(&ep->ep_lock);
5163302Sagiri 	ASSERT((ep->ep_state == RDS_EP_STATE_ACTIVE_PENDING) ||
5173302Sagiri 	    (ep->ep_state == RDS_EP_STATE_PASSIVE_PENDING));
5183302Sagiri 	ep->ep_state = RDS_EP_STATE_CONNECTED;
5193302Sagiri 	ep->ep_chanhdl = evp->cm_channel;
5203302Sagiri 	sp = ep->ep_sp;
5213302Sagiri 	mutex_exit(&ep->ep_lock);
5223302Sagiri 
5233302Sagiri 	(void) rds_session_active(sp);
5243302Sagiri 
5253302Sagiri 	RDS_DPRINTF2("rds_handle_cm_conn_est", "Return");
5263302Sagiri 	return (IBT_CM_ACCEPT);
5273302Sagiri }
5283302Sagiri 
5293302Sagiri /*
5303302Sagiri  * Handle CONN CLOSED
5313302Sagiri  */
5323302Sagiri static ibt_cm_status_t
rds_handle_cm_conn_closed(ibt_cm_event_t * evp)5333302Sagiri rds_handle_cm_conn_closed(ibt_cm_event_t *evp)
5343302Sagiri {
5353302Sagiri 	rds_ep_t	*ep;
5363302Sagiri 	rds_session_t	*sp;
5373302Sagiri 
5383302Sagiri 	/* Catch DREQs but ignore DREPs */
5393302Sagiri 	if (evp->cm_event.closed != IBT_CM_CLOSED_DREQ_RCVD) {
5403302Sagiri 		RDS_DPRINTF2("rds_handle_cm_conn_closed",
5413302Sagiri 		    "Ignoring Event: %d received", evp->cm_event.closed);
5423302Sagiri 		return (IBT_CM_ACCEPT);
5433302Sagiri 	}
5443302Sagiri 
5453302Sagiri 	ep = (rds_ep_t *)ibt_get_chan_private(evp->cm_channel);
5463302Sagiri 	sp = ep->ep_sp;
5476438Sagiri 	RDS_DPRINTF2("rds_handle_cm_conn_closed", "EP(%p) Chan(%p) Enter",
5486438Sagiri 	    ep, evp->cm_channel);
5493302Sagiri 
5503302Sagiri 	mutex_enter(&ep->ep_lock);
5513302Sagiri 	if (ep->ep_state != RDS_EP_STATE_CONNECTED) {
5523302Sagiri 		/* Ignore this DREQ */
5533302Sagiri 		RDS_DPRINTF2("rds_handle_cm_conn_closed",
5543302Sagiri 		    "EP(%p) not connected, state: %d", ep, ep->ep_state);
5553302Sagiri 		mutex_exit(&ep->ep_lock);
5563302Sagiri 		return (IBT_CM_ACCEPT);
5573302Sagiri 	}
5583302Sagiri 	ep->ep_state = RDS_EP_STATE_CLOSING;
5593302Sagiri 	mutex_exit(&ep->ep_lock);
5603302Sagiri 
5613302Sagiri 	rw_enter(&sp->session_lock, RW_WRITER);
5623302Sagiri 	RDS_DPRINTF2("rds_handle_cm_conn_closed", "SP(%p) - state: %d", sp,
5633302Sagiri 	    sp->session_state);
5643302Sagiri 
5653302Sagiri 	switch (sp->session_state) {
5663302Sagiri 	case RDS_SESSION_STATE_CONNECTED:
5678082SRamaswamy.Tummala@Sun.COM 	case RDS_SESSION_STATE_HCA_CLOSING:
5683302Sagiri 		sp->session_state = RDS_SESSION_STATE_PASSIVE_CLOSING;
5693302Sagiri 		RDS_DPRINTF3("rds_handle_cm_conn_closed", "SP(%p) State "
5703302Sagiri 		    "RDS_SESSION_STATE_PASSIVE_CLOSING", sp);
5713302Sagiri 		break;
5723302Sagiri 
5733302Sagiri 	case RDS_SESSION_STATE_PASSIVE_CLOSING:
5743302Sagiri 		sp->session_state = RDS_SESSION_STATE_CLOSED;
5753302Sagiri 		RDS_DPRINTF3("rds_handle_cm_conn_closed", "SP(%p) State "
5763302Sagiri 		    "RDS_SESSION_STATE_CLOSED", sp);
5773302Sagiri 		rds_passive_session_fini(sp);
5783302Sagiri 		sp->session_state = RDS_SESSION_STATE_FINI;
5793302Sagiri 		RDS_DPRINTF3("rds_handle_cm_conn_closed",
5803302Sagiri 		    "SP(%p) State RDS_SESSION_STATE_FINI", sp);
5813302Sagiri 		break;
5823302Sagiri 
5833302Sagiri 	case RDS_SESSION_STATE_ACTIVE_CLOSING:
5843302Sagiri 	case RDS_SESSION_STATE_ERROR:
5853302Sagiri 	case RDS_SESSION_STATE_CLOSED:
5863302Sagiri 		break;
5873302Sagiri 
5883302Sagiri 	case RDS_SESSION_STATE_INIT:
5893302Sagiri 		sp->session_state = RDS_SESSION_STATE_ERROR;
5903302Sagiri 		RDS_DPRINTF3("rds_handle_cm_conn_closed", "SP(%p) State "
5913302Sagiri 		    "RDS_SESSION_STATE_ERROR", sp);
5923302Sagiri 		rds_passive_session_fini(sp);
5933302Sagiri 		sp->session_state = RDS_SESSION_STATE_FAILED;
5943302Sagiri 		RDS_DPRINTF3("rds_handle_cm_conn_closed",
5953302Sagiri 		    "SP(%p) State RDS_SESSION_STATE_FAILED", sp);
5963302Sagiri 		break;
5973302Sagiri 
5983302Sagiri 	default:
5993302Sagiri 		RDS_DPRINTF2("rds_handle_cm_conn_closed",
6003302Sagiri 		    "SP(%p) - Unexpected state: %d", sp, sp->session_state);
6013302Sagiri 		rds_passive_session_fini(sp);
6023302Sagiri 		sp->session_state = RDS_SESSION_STATE_FAILED;
6033302Sagiri 		RDS_DPRINTF3("rds_handle_cm_conn_closed", "SP(%p) State "
6043302Sagiri 		    "RDS_SESSION_STATE_FAILED", sp);
6053302Sagiri 	}
6063302Sagiri 	rw_exit(&sp->session_lock);
6073302Sagiri 
6083302Sagiri 	mutex_enter(&ep->ep_lock);
6093302Sagiri 	ep->ep_state = RDS_EP_STATE_CLOSED;
6103302Sagiri 	mutex_exit(&ep->ep_lock);
6113302Sagiri 
6123302Sagiri 	RDS_DPRINTF2("rds_handle_cm_conn_closed", "SP(%p) Return", sp);
6133302Sagiri 	return (IBT_CM_ACCEPT);
6143302Sagiri }
6153302Sagiri 
6163302Sagiri /*
6173302Sagiri  * Handle EVENT FAILURE
6183302Sagiri  */
6193302Sagiri static ibt_cm_status_t
rds_handle_cm_event_failure(ibt_cm_event_t * evp)6203302Sagiri rds_handle_cm_event_failure(ibt_cm_event_t *evp)
6213302Sagiri {
6223302Sagiri 	rds_ep_t	*ep;
6233302Sagiri 	rds_session_t	*sp;
6243302Sagiri 	int		ret;
6253302Sagiri 
6263302Sagiri 	RDS_DPRINTF2("rds_handle_cm_event_failure", "Enter: Chan hdl: 0x%p "
6273302Sagiri 	    "Code: %d msg: %d reason: %d", evp->cm_channel,
6283302Sagiri 	    evp->cm_event.failed.cf_code, evp->cm_event.failed.cf_msg,
6293302Sagiri 	    evp->cm_event.failed.cf_reason);
6303302Sagiri 
6314703Shiremath 	if (evp->cm_event.failed.cf_reason == IBT_CM_INVALID_SID) {
6326702Sagiri 		RDS_DPRINTF2(LABEL,
6334703Shiremath 		    "Received REJ with reason IBT_CM_INVALID_SID: "
6346702Sagiri 		    "RDS may not be loaded on the remote system");
6354703Shiremath 	}
6364703Shiremath 
6373302Sagiri 	if (evp->cm_channel == NULL) {
6383302Sagiri 		return (IBT_CM_ACCEPT);
6393302Sagiri 	}
6403302Sagiri 
6415724Sagiri 	if ((evp->cm_event.failed.cf_code != IBT_CM_FAILURE_STALE) &&
6425724Sagiri 	    (evp->cm_event.failed.cf_msg == IBT_CM_FAILURE_REQ)) {
6435724Sagiri 		/*
6445724Sagiri 		 * This end is active, just ignore, ibt_open_rc_channel()
6455724Sagiri 		 * caller will take care of cleanup.
6465724Sagiri 		 */
6475724Sagiri 		RDS_DPRINTF2("rds_handle_cm_event_failure",
6485724Sagiri 		    "Ignoring this event: Chan hdl: 0x%p", evp->cm_channel);
6495724Sagiri 		return (IBT_CM_ACCEPT);
6505724Sagiri 	}
6515724Sagiri 
6523302Sagiri 	ep = (rds_ep_t *)ibt_get_chan_private(evp->cm_channel);
6533302Sagiri 	sp = ep->ep_sp;
6543302Sagiri 
6553302Sagiri 	rw_enter(&sp->session_lock, RW_WRITER);
6563302Sagiri 	if (sp->session_type == RDS_SESSION_PASSIVE) {
6573302Sagiri 		RDS_DPRINTF2("rds_handle_cm_event_failure",
6583302Sagiri 		    "SP(%p) - state: %d", sp, sp->session_state);
6593302Sagiri 		if ((sp->session_state == RDS_SESSION_STATE_INIT) ||
6603302Sagiri 		    (sp->session_state == RDS_SESSION_STATE_CONNECTED)) {
6613302Sagiri 			sp->session_state = RDS_SESSION_STATE_ERROR;
6623302Sagiri 			RDS_DPRINTF3("rds_handle_cm_event_failure",
6633302Sagiri 			    "SP(%p) State RDS_SESSION_STATE_ERROR", sp);
6644154Sagiri 
6654154Sagiri 			/*
6664154Sagiri 			 * Store the cm_channel for freeing later
6674154Sagiri 			 * Active side frees it on ibt_open_rc_channel
6684154Sagiri 			 * failure
6694154Sagiri 			 */
6704154Sagiri 			if (ep->ep_chanhdl == NULL) {
6714154Sagiri 				ep->ep_chanhdl = evp->cm_channel;
6724154Sagiri 			}
6733302Sagiri 			rw_exit(&sp->session_lock);
6743302Sagiri 
6753302Sagiri 			/*
6763302Sagiri 			 * rds_passive_session_fini should not be called
6773302Sagiri 			 * directly in the CM handler. It will cause a deadlock.
6783302Sagiri 			 */
6793302Sagiri 			ret = ddi_taskq_dispatch(rds_taskq,
6803302Sagiri 			    rds_cleanup_passive_session, (void *)sp,
6813302Sagiri 			    DDI_NOSLEEP);
6823302Sagiri 			if (ret != DDI_SUCCESS) {
6836702Sagiri 				RDS_DPRINTF2("rds_handle_cm_event_failure",
6843302Sagiri 				    "SP(%p) TaskQ dispatch FAILED:%d", sp, ret);
6853302Sagiri 			}
6863302Sagiri 			return (IBT_CM_ACCEPT);
6873302Sagiri 		}
6883302Sagiri 	}
6893302Sagiri 	rw_exit(&sp->session_lock);
6903302Sagiri 
6913302Sagiri 	RDS_DPRINTF2("rds_handle_cm_event_failure", "SP(%p) Return", sp);
6923302Sagiri 	return (IBT_CM_ACCEPT);
6933302Sagiri }
6943302Sagiri 
6953302Sagiri /*
6963302Sagiri  * CM Handler
6973302Sagiri  *
6983302Sagiri  * Called by IBCM
6993302Sagiri  * The cm_private type differs for active and passive events.
7003302Sagiri  */
7013302Sagiri ibt_cm_status_t
rds_cm_handler(void * cm_private,ibt_cm_event_t * eventp,ibt_cm_return_args_t * ret_args,void * ret_priv_data,ibt_priv_data_len_t ret_len_max)7023302Sagiri rds_cm_handler(void *cm_private, ibt_cm_event_t *eventp,
7033302Sagiri     ibt_cm_return_args_t *ret_args, void *ret_priv_data,
7043302Sagiri     ibt_priv_data_len_t ret_len_max)
7053302Sagiri {
7063302Sagiri 	ibt_cm_status_t		ret = IBT_CM_ACCEPT;
7073302Sagiri 
7083302Sagiri 	RDS_DPRINTF2("rds_cm_handler", "Enter: event: %d", eventp->cm_type);
7093302Sagiri 
7103302Sagiri 	switch (eventp->cm_type) {
7113302Sagiri 	case IBT_CM_EVENT_REQ_RCV:
7123302Sagiri 		ret = rds_handle_cm_req((rds_state_t *)cm_private, eventp,
7133302Sagiri 		    ret_args, ret_priv_data, ret_len_max);
7143302Sagiri 		break;
7153302Sagiri 	case IBT_CM_EVENT_REP_RCV:
7163302Sagiri 		ret = rds_handle_cm_rep(eventp, ret_args, ret_priv_data,
7173302Sagiri 		    ret_len_max);
7183302Sagiri 		break;
7193302Sagiri 	case IBT_CM_EVENT_MRA_RCV:
7203302Sagiri 		/* Not supported */
7213302Sagiri 		break;
7223302Sagiri 	case IBT_CM_EVENT_CONN_EST:
7233302Sagiri 		ret = rds_handle_cm_conn_est(eventp);
7243302Sagiri 		break;
7253302Sagiri 	case IBT_CM_EVENT_CONN_CLOSED:
7263302Sagiri 		ret = rds_handle_cm_conn_closed(eventp);
7273302Sagiri 		break;
7283302Sagiri 	case IBT_CM_EVENT_FAILURE:
7293302Sagiri 		ret = rds_handle_cm_event_failure(eventp);
7303302Sagiri 		break;
7313302Sagiri 	case IBT_CM_EVENT_LAP_RCV:
7323302Sagiri 		/* Not supported */
7333302Sagiri 		RDS_DPRINTF2(LABEL, "LAP message received");
7343302Sagiri 		break;
7353302Sagiri 	case IBT_CM_EVENT_APR_RCV:
7363302Sagiri 		/* Not supported */
7373302Sagiri 		RDS_DPRINTF2(LABEL, "APR message received");
7383302Sagiri 		break;
7393302Sagiri 	default:
7403302Sagiri 		break;
7413302Sagiri 	}
7423302Sagiri 
7433302Sagiri 	RDS_DPRINTF2("rds_cm_handler", "Return");
7443302Sagiri 
7453302Sagiri 	return (ret);
7463302Sagiri }
7473302Sagiri 
7484703Shiremath /* This is based on OFED Linux RDS */
7494703Shiremath #define	RDS_PORT_NUM	6556
7504703Shiremath 
7513302Sagiri /*
7523302Sagiri  * Register the wellknown service with service id: RDS_SERVICE_ID
7533302Sagiri  * Incoming connection requests should arrive on this service id.
7543302Sagiri  */
7553302Sagiri ibt_srv_hdl_t
rds_register_service(ibt_clnt_hdl_t rds_ibhdl)7563302Sagiri rds_register_service(ibt_clnt_hdl_t rds_ibhdl)
7573302Sagiri {
7583302Sagiri 	ibt_srv_hdl_t	srvhdl;
7593302Sagiri 	ibt_srv_desc_t	srvdesc;
7603302Sagiri 	int		ret;
7613302Sagiri 
7623302Sagiri 	RDS_DPRINTF2("rds_register_service", "Enter: 0x%p", rds_ibhdl);
7633302Sagiri 
7643302Sagiri 	bzero(&srvdesc, sizeof (ibt_srv_desc_t));
7653302Sagiri 	srvdesc.sd_handler = rds_cm_handler;
7663302Sagiri 	srvdesc.sd_flags = IBT_SRV_NO_FLAGS;
7673302Sagiri 
7684703Shiremath 	/*
7694703Shiremath 	 * This is the new service id as per:
7704703Shiremath 	 * Annex A11: RDMA IP CM Service
7714703Shiremath 	 */
7724703Shiremath 	rdsib_statep->rds_service_id = ibt_get_ip_sid(IPPROTO_TCP,
7734703Shiremath 	    RDS_PORT_NUM);
7744703Shiremath 	ret = ibt_register_service(rds_ibhdl, &srvdesc,
7754703Shiremath 	    rdsib_statep->rds_service_id, 1, &srvhdl, NULL);
7764703Shiremath 	if (ret != IBT_SUCCESS) {
7774703Shiremath 		RDS_DPRINTF2(LABEL,
7784703Shiremath 		    "RDS Service (0x%llx) Registration Failed: %d",
7794703Shiremath 		    rdsib_statep->rds_service_id, ret);
7803302Sagiri 		return (NULL);
7813302Sagiri 	}
7823302Sagiri 
7833302Sagiri 	RDS_DPRINTF2("rds_register_service", "Return: 0x%p", srvhdl);
7843302Sagiri 	return (srvhdl);
7853302Sagiri }
7863302Sagiri 
7873302Sagiri /* Bind the RDS service on all ports */
7883302Sagiri int
rds_bind_service(rds_state_t * statep)7893302Sagiri rds_bind_service(rds_state_t *statep)
7903302Sagiri {
7913302Sagiri 	rds_hca_t	*hcap;
7923302Sagiri 	ib_gid_t	gid;
7933302Sagiri 	uint_t		jx, nbinds = 0, nports = 0;
7943302Sagiri 	int		ret;
7953302Sagiri 
7963302Sagiri 	RDS_DPRINTF2("rds_bind_service", "Enter: 0x%p", statep);
7973302Sagiri 
7988082SRamaswamy.Tummala@Sun.COM 	rw_enter(&statep->rds_hca_lock, RW_READER);
7998082SRamaswamy.Tummala@Sun.COM 
8003302Sagiri 	hcap = statep->rds_hcalistp;
8013302Sagiri 	while (hcap != NULL) {
8028082SRamaswamy.Tummala@Sun.COM 
8038082SRamaswamy.Tummala@Sun.COM 		/* skip the HCAs that are not fully online */
8048082SRamaswamy.Tummala@Sun.COM 		if ((hcap->hca_state != RDS_HCA_STATE_OPEN) &&
8058082SRamaswamy.Tummala@Sun.COM 		    (hcap->hca_state != RDS_HCA_STATE_MEM_REGISTERED)) {
8068082SRamaswamy.Tummala@Sun.COM 			RDS_DPRINTF2("rds_bind_service",
8078082SRamaswamy.Tummala@Sun.COM 			    "Skipping HCA: 0x%llx, state: %d",
8088082SRamaswamy.Tummala@Sun.COM 			    hcap->hca_guid, hcap->hca_state);
8098082SRamaswamy.Tummala@Sun.COM 			hcap = hcap->hca_nextp;
8108082SRamaswamy.Tummala@Sun.COM 			continue;
8118082SRamaswamy.Tummala@Sun.COM 		}
8128082SRamaswamy.Tummala@Sun.COM 
8138082SRamaswamy.Tummala@Sun.COM 		/* currently, we have space for only 4 bindhdls */
8148082SRamaswamy.Tummala@Sun.COM 		ASSERT(hcap->hca_nports < 4);
8153302Sagiri 		for (jx = 0; jx < hcap->hca_nports; jx++) {
8163302Sagiri 			nports++;
8173302Sagiri 			if (hcap->hca_pinfop[jx].p_linkstate !=
8183302Sagiri 			    IBT_PORT_ACTIVE) {
8193302Sagiri 				/*
8203302Sagiri 				 * service bind will be called in the async
8218082SRamaswamy.Tummala@Sun.COM 				 * handler when the port comes up. Clear any
8228082SRamaswamy.Tummala@Sun.COM 				 * stale bind handle.
8233302Sagiri 				 */
8248082SRamaswamy.Tummala@Sun.COM 				hcap->hca_bindhdl[jx] = NULL;
8253302Sagiri 				continue;
8263302Sagiri 			}
8273302Sagiri 
8283302Sagiri 			gid = hcap->hca_pinfop[jx].p_sgid_tbl[0];
8293302Sagiri 			RDS_DPRINTF5(LABEL, "HCA: 0x%llx Port: %d "
8303302Sagiri 			    "gid: %llx:%llx", hcap->hca_guid,
8313302Sagiri 			    hcap->hca_pinfop[jx].p_port_num, gid.gid_prefix,
8323302Sagiri 			    gid.gid_guid);
8333302Sagiri 
8343302Sagiri 			/* pass statep as cm_private */
8353302Sagiri 			ret = ibt_bind_service(statep->rds_srvhdl, gid,
8368082SRamaswamy.Tummala@Sun.COM 			    NULL, statep, &hcap->hca_bindhdl[jx]);
8373302Sagiri 			if (ret != IBT_SUCCESS) {
8383302Sagiri 				RDS_DPRINTF2(LABEL, "Bind service for "
8393302Sagiri 				    "HCA: 0x%llx Port: %d gid %llx:%llx "
8403302Sagiri 				    "failed: %d", hcap->hca_guid,
8413302Sagiri 				    hcap->hca_pinfop[jx].p_port_num,
8423302Sagiri 				    gid.gid_prefix, gid.gid_guid, ret);
8433302Sagiri 				continue;
8443302Sagiri 			}
8453302Sagiri 
8463302Sagiri 			nbinds++;
8473302Sagiri 		}
8483302Sagiri 		hcap = hcap->hca_nextp;
8493302Sagiri 	}
8503302Sagiri 
8518082SRamaswamy.Tummala@Sun.COM 	rw_exit(&statep->rds_hca_lock);
8528082SRamaswamy.Tummala@Sun.COM 
8533302Sagiri 	RDS_DPRINTF2(LABEL, "RDS Service available on %d/%d ports",
8543302Sagiri 	    nbinds, nports);
8553302Sagiri 
8563302Sagiri #if 0
8573302Sagiri 	if (nbinds == 0) {
8583302Sagiri 		return (-1);
8593302Sagiri 	}
8603302Sagiri #endif
8613302Sagiri 
8623302Sagiri 	RDS_DPRINTF2("rds_bind_service", "Return");
8633302Sagiri 
8643302Sagiri 	return (0);
8653302Sagiri }
8663302Sagiri 
8673302Sagiri /* Open an RC connection */
8683302Sagiri int
rds_open_rc_channel(rds_ep_t * ep,ibt_path_info_t * pinfo,ibt_execution_mode_t mode,ibt_channel_hdl_t * chanhdl)8693302Sagiri rds_open_rc_channel(rds_ep_t *ep, ibt_path_info_t *pinfo,
8703302Sagiri     ibt_execution_mode_t mode, ibt_channel_hdl_t *chanhdl)
8713302Sagiri {
8723302Sagiri 	rds_session_t		*sp;
8733302Sagiri 	ibt_chan_open_args_t	ocargs;
8743302Sagiri 	ibt_rc_returns_t	ocrets;
8753302Sagiri 	rds_cm_private_data_t	cmp;
8763302Sagiri 	uint8_t			hca_port;
8773302Sagiri 	ibt_channel_hdl_t	hdl;
8784703Shiremath 	ibt_status_t		ret = 0;
8794703Shiremath 	ibt_ip_cm_info_t	ipcm_info;
8803302Sagiri 
8813302Sagiri 	RDS_DPRINTF2("rds_open_rc_channel", "Enter: EP(%p) mode: %d", ep, mode);
8823302Sagiri 
8833302Sagiri 	sp = ep->ep_sp;
8843302Sagiri 
8854703Shiremath 	bzero(&ipcm_info, sizeof (ibt_ip_cm_info_t));
8864703Shiremath 	ipcm_info.src_addr.family = AF_INET;
8877919SBill.Taylor@Sun.COM 	ipcm_info.SRCIP = sp->session_myip;
8884703Shiremath 	ipcm_info.dst_addr.family = AF_INET;
8897919SBill.Taylor@Sun.COM 	ipcm_info.DSTIP = sp->session_remip;
8907919SBill.Taylor@Sun.COM 	ipcm_info.src_port = RDS_PORT_NUM;
8914703Shiremath 	ret = ibt_format_ip_private_data(&ipcm_info,
8924703Shiremath 	    sizeof (rds_cm_private_data_t), &cmp);
8934703Shiremath 	if (ret != IBT_SUCCESS) {
8944703Shiremath 		RDS_DPRINTF2(LABEL, "SP(%p) EP(%p) ibt_format_ip_private_data "
8954703Shiremath 		    "failed: %d", sp, ep, ret);
8964703Shiremath 		return (-1);
8974703Shiremath 	}
8984703Shiremath 
8993302Sagiri 	hca_port = pinfo->pi_prim_cep_path.cep_hca_port_num;
9003302Sagiri 
9013302Sagiri 	hdl = rds_ep_alloc_rc_channel(ep, hca_port);
9023302Sagiri 	if (hdl == NULL) {
9033302Sagiri 		return (-1);
9043302Sagiri 	}
9053302Sagiri 
9063302Sagiri 	cmp.cmp_version = RDS_VERSION;
9073302Sagiri 	cmp.cmp_arch = RDS_THIS_ARCH;
9083302Sagiri 	cmp.cmp_eptype = ep->ep_type;
9093302Sagiri 	cmp.cmp_failover = sp->session_failover;
9103302Sagiri 	cmp.cmp_last_bufid = ep->ep_rbufid;
9113302Sagiri 	cmp.cmp_user_buffer_size = UserBufferSize;
9123302Sagiri 	cmp.cmp_ack_addr = ep->ep_ack_addr;
9133302Sagiri 	cmp.cmp_ack_rkey = ep->ep_ack_rkey;
9143302Sagiri 
9153302Sagiri 	bzero(&ocargs, sizeof (ibt_chan_open_args_t));
9163302Sagiri 	bzero(&ocrets, sizeof (ibt_rc_returns_t));
9173302Sagiri 	ocargs.oc_path = pinfo;
9183302Sagiri 	ocargs.oc_cm_handler = rds_cm_handler;
9193302Sagiri 	ocargs.oc_cm_clnt_private = NULL;
9203302Sagiri 	ocargs.oc_rdma_ra_out = 4;
9213302Sagiri 	ocargs.oc_rdma_ra_in = 4;
9223302Sagiri 	ocargs.oc_priv_data_len = sizeof (rds_cm_private_data_t);
9233302Sagiri 	ocargs.oc_priv_data = &cmp;
9243302Sagiri 	ocargs.oc_path_retry_cnt = IBPathRetryCount;
9253302Sagiri 	ocargs.oc_path_rnr_retry_cnt = MinRnrRetry;
9263302Sagiri 	ret = ibt_open_rc_channel(hdl, IBT_OCHAN_NO_FLAGS,
9273302Sagiri 	    mode, &ocargs, &ocrets);
9283302Sagiri 	if (ret != IBT_SUCCESS) {
9293302Sagiri 		RDS_DPRINTF2(LABEL, "SP(%p) EP(%p) ibt_open_rc_channel "
9303302Sagiri 		    "failed: %d", sp, ep, ret);
9313302Sagiri 		(void) ibt_flush_channel(hdl);
9323302Sagiri 		(void) ibt_free_channel(hdl);
9335342Sagiri 
9345342Sagiri 		mutex_enter(&ep->ep_lock);
9355724Sagiri 		/* don't cleanup if this failure is due to peer-peer race */
9365724Sagiri 		if (ep->ep_state == RDS_EP_STATE_ACTIVE_PENDING) {
9375724Sagiri 			/* cleanup stuff allocated in rds_ep_alloc_rc_channel */
9385724Sagiri 			ep->ep_state = RDS_EP_STATE_ERROR;
9395724Sagiri 			rds_ep_free_rc_channel(ep);
9405724Sagiri 		}
9415342Sagiri 		mutex_exit(&ep->ep_lock);
9425342Sagiri 
9434154Sagiri 		return (-1);
9443302Sagiri 	}
9453302Sagiri 
9463302Sagiri 	*chanhdl = hdl;
9473302Sagiri 
9483302Sagiri 	RDS_DPRINTF2("rds_open_rc_channel", "Return: EP(%p) Chan: %p", ep,
9493302Sagiri 	    *chanhdl);
9503302Sagiri 
9514154Sagiri 	return (0);
9523302Sagiri }
9533302Sagiri 
9543302Sagiri int
rds_close_rc_channel(ibt_channel_hdl_t chanhdl,ibt_execution_mode_t mode)9553302Sagiri rds_close_rc_channel(ibt_channel_hdl_t chanhdl, ibt_execution_mode_t mode)
9563302Sagiri {
9573302Sagiri 	int	ret;
9583302Sagiri 
9593302Sagiri 	RDS_DPRINTF2("rds_close_rc_channel", "Enter: Chan(%p) Mode(%d)",
9603302Sagiri 	    chanhdl, mode);
9613302Sagiri 
9623302Sagiri 	ret = ibt_close_rc_channel(chanhdl, mode, NULL, 0, NULL, NULL, 0);
9633302Sagiri 
9643302Sagiri 	RDS_DPRINTF2("rds_close_rc_channel", "Return Chan(%p)", chanhdl);
9653302Sagiri 
9663302Sagiri 	return (ret);
9673302Sagiri }
968