xref: /onnv-gate/usr/src/uts/common/io/ib/clients/rds/rdsib_cm.c (revision 3302:e75a684d1697)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 /*
26  * Copyright (c) 2005 SilverStorm Technologies, Inc. All rights reserved.
27  *
28  * This software is available to you under a choice of one of two
29  * licenses.  You may choose to be licensed under the terms of the GNU
30  * General Public License (GPL) Version 2, available from the file
31  * COPYING in the main directory of this source tree, or the
32  * OpenIB.org BSD license below:
33  *
34  *     Redistribution and use in source and binary forms, with or
35  *     without modification, are permitted provided that the following
36  *     conditions are met:
37  *
38  *	- Redistributions of source code must retain the above
39  *	  copyright notice, this list of conditions and the following
40  *	  disclaimer.
41  *
42  *	- Redistributions in binary form must reproduce the above
43  *	  copyright notice, this list of conditions and the following
44  *	  disclaimer in the documentation and/or other materials
45  *	  provided with the distribution.
46  *
47  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
48  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
49  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
50  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
51  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
52  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
53  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
54  * SOFTWARE.
55  *
56  */
57 /*
58  * Sun elects to include this software in Sun product
59  * under the OpenIB BSD license.
60  *
61  *
62  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
63  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
64  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
65  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
66  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
67  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
68  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
69  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
70  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
71  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
72  * POSSIBILITY OF SUCH DAMAGE.
73  */
74 
75 #pragma ident	"%Z%%M%	%I%	%E% SMI"
76 
77 #include <sys/ib/clients/rds/rdsib_cm.h>
78 #include <sys/ib/clients/rds/rdsib_ib.h>
79 #include <sys/ib/clients/rds/rdsib_buf.h>
80 #include <sys/ib/clients/rds/rdsib_ep.h>
81 
82 /*
83  * This file contains CM related work:
84  *
85  * Service registration/deregistration
86  * Path lookup
87  * CM connection callbacks
88  * CM active and passive connection establishment
89  * Connection failover
90  */
91 
92 /*
93  * Handle an incoming CM REQ
94  */
95 /* ARGSUSED */
96 static ibt_cm_status_t
97 rds_handle_cm_req(rds_state_t *statep, ibt_cm_event_t *evp,
98     ibt_cm_return_args_t *rargsp, void *rcmp, ibt_priv_data_len_t rcmp_len)
99 {
100 	ibt_cm_req_rcv_t	*reqp;
101 	ib_gid_t		lgid, rgid;
102 	rds_cm_private_data_t	cmp;
103 	rds_session_t		*sp;
104 	rds_ep_t		*ep;
105 	ibt_channel_hdl_t	chanhdl;
106 	rds_hca_t		*hcap;
107 	int			ret;
108 
109 	RDS_DPRINTF2("rds_handle_cm_req", "Enter");
110 
111 	reqp = &evp->cm_event.req;
112 	rgid = reqp->req_prim_addr.av_dgid; /* requester gid */
113 	lgid = reqp->req_prim_addr.av_sgid; /* receiver gid */
114 
115 	RDS_DPRINTF2(LABEL, "REQ Received: From: %llx:%llx To: %llx:%llx",
116 	    rgid.gid_prefix, rgid.gid_guid, lgid.gid_prefix, lgid.gid_guid);
117 
118 	/*
119 	 * CM private data brings IP information
120 	 * Private data received is a stream of bytes and may not be properly
121 	 * aligned. So, bcopy the data onto the stack before accessing it.
122 	 */
123 	bcopy((uint8_t *)evp->cm_priv_data, &cmp,
124 	    sizeof (rds_cm_private_data_t));
125 
126 	RDS_DPRINTF2(LABEL, "REQ Received: From IP: 0x%x To IP: 0x%x type: %d",
127 	    cmp.cmp_localip, cmp.cmp_remip, cmp.cmp_eptype);
128 
129 	if (cmp.cmp_version != RDS_VERSION) {
130 		RDS_DPRINTF2(LABEL, "Version Mismatch: Local version: %d "
131 		    "Remote version: %d", RDS_VERSION, cmp.cmp_version);
132 		return (IBT_CM_REJECT);
133 	}
134 
135 	if (cmp.cmp_arch != RDS_THIS_ARCH) {
136 		RDS_DPRINTF2(LABEL, "ARCH does not match (%d != %d)",
137 		    cmp.cmp_arch, RDS_THIS_ARCH);
138 		return (IBT_CM_REJECT);
139 	}
140 
141 	if ((cmp.cmp_eptype != RDS_EP_TYPE_CTRL) &&
142 	    (cmp.cmp_eptype != RDS_EP_TYPE_DATA)) {
143 		RDS_DPRINTF2(LABEL, "Unknown Channel type: %d", cmp.cmp_eptype);
144 		return (IBT_CM_REJECT);
145 	}
146 
147 	/* user_buffer_size should be same on all nodes */
148 	if (cmp.cmp_user_buffer_size != UserBufferSize) {
149 		RDS_DPRINTF2(LABEL,
150 		    "UserBufferSize Mismatch, this node: %d remote node: %d",
151 		    UserBufferSize, cmp.cmp_user_buffer_size);
152 		return (IBT_CM_REJECT);
153 	}
154 
155 	/* Is there a session to the destination node? */
156 	rw_enter(&statep->rds_sessionlock, RW_READER);
157 	sp = rds_session_lkup(statep, cmp.cmp_localip, rgid.gid_guid);
158 	rw_exit(&statep->rds_sessionlock);
159 
160 	if (sp == NULL) {
161 		/*
162 		 * currently there is no session to the destination
163 		 * remote ip in the private data is the local ip and vice
164 		 * versa
165 		 */
166 		sp = rds_session_create(statep, cmp.cmp_remip, cmp.cmp_localip,
167 		    reqp, RDS_SESSION_PASSIVE);
168 		if (sp == NULL) {
169 			/* Check the list anyway. */
170 			rw_enter(&statep->rds_sessionlock, RW_READER);
171 			sp = rds_session_lkup(statep, cmp.cmp_localip,
172 			    rgid.gid_guid);
173 			rw_exit(&statep->rds_sessionlock);
174 			if (sp == NULL) {
175 				/*
176 				 * The only way this can fail is due to lack
177 				 * of kernel resources
178 				 */
179 				return (IBT_CM_REJECT);
180 			}
181 		}
182 	}
183 
184 	rw_enter(&sp->session_lock, RW_WRITER);
185 
186 	/* catch peer-to-peer case as soon as possible */
187 	if (sp->session_state == RDS_SESSION_STATE_CREATED) {
188 		/* Check possible peer-to-peer case here */
189 		if (sp->session_type != RDS_SESSION_PASSIVE) {
190 			RDS_DPRINTF2(LABEL, "SP(%p) Peer-peer connection "
191 			    "handling", sp);
192 			if (lgid.gid_guid > rgid.gid_guid) {
193 				/* this node is active so reject this request */
194 				rw_exit(&sp->session_lock);
195 				return (IBT_CM_REJECT);
196 			} else {
197 				/* this node is passive, change the session */
198 				sp->session_type = RDS_SESSION_PASSIVE;
199 				sp->session_myip = cmp.cmp_remip;
200 				sp->session_lgid = lgid;
201 				sp->session_rgid = rgid;
202 				hcap = rds_gid_to_hcap(statep, lgid);
203 
204 				/* change the data channel */
205 				mutex_enter(&sp->session_dataep.ep_lock);
206 				sp->session_dataep.ep_myip = cmp.cmp_remip;
207 				sp->session_dataep.ep_hca_guid =
208 				    hcap->hca_guid;
209 				mutex_exit(&sp->session_dataep.ep_lock);
210 
211 				/* change the control channel */
212 				mutex_enter(&sp->session_ctrlep.ep_lock);
213 				sp->session_ctrlep.ep_myip = cmp.cmp_remip;
214 				sp->session_ctrlep.ep_hca_guid =
215 				    hcap->hca_guid;
216 				mutex_exit(&sp->session_ctrlep.ep_lock);
217 			}
218 		}
219 	}
220 
221 	RDS_DPRINTF2(LABEL, "SP(%p) state: %d", sp, sp->session_state);
222 
223 	switch (sp->session_state) {
224 	case RDS_SESSION_STATE_CONNECTED:
225 		RDS_DPRINTF2(LABEL, "STALE Session Detected SP(%p)", sp);
226 		sp->session_state = RDS_SESSION_STATE_ERROR;
227 		RDS_DPRINTF3("rds_handle_cm_req", "SP(%p) State "
228 		    "RDS_SESSION_STATE_ERROR", sp);
229 
230 		/* FALLTHRU */
231 	case RDS_SESSION_STATE_ERROR:
232 	case RDS_SESSION_STATE_PASSIVE_CLOSING:
233 		sp->session_type = RDS_SESSION_PASSIVE;
234 		rw_exit(&sp->session_lock);
235 
236 		rds_session_close(sp, IBT_NOCALLBACKS, 1);
237 
238 		/* move the session to init state */
239 		rw_enter(&sp->session_lock, RW_WRITER);
240 		sp->session_state = RDS_SESSION_STATE_INIT;
241 		sp->session_myip = cmp.cmp_remip;
242 		sp->session_lgid = lgid;
243 		sp->session_rgid = rgid;
244 		hcap = rds_gid_to_hcap(statep, lgid);
245 
246 		/* change the data channel */
247 		mutex_enter(&sp->session_dataep.ep_lock);
248 		sp->session_dataep.ep_myip = cmp.cmp_remip;
249 		sp->session_dataep.ep_hca_guid = hcap->hca_guid;
250 		mutex_exit(&sp->session_dataep.ep_lock);
251 
252 		/* change the control channel */
253 		mutex_enter(&sp->session_ctrlep.ep_lock);
254 		sp->session_ctrlep.ep_myip = cmp.cmp_remip;
255 		sp->session_ctrlep.ep_hca_guid = hcap->hca_guid;
256 		mutex_exit(&sp->session_ctrlep.ep_lock);
257 
258 		if (cmp.cmp_eptype == RDS_EP_TYPE_CTRL) {
259 			ep = &sp->session_ctrlep;
260 		} else {
261 			ep = &sp->session_dataep;
262 		}
263 		break;
264 	case RDS_SESSION_STATE_CREATED:
265 	case RDS_SESSION_STATE_FAILED:
266 	case RDS_SESSION_STATE_FINI:
267 		/*
268 		 * Initialize both channels, we accept this connection
269 		 * only if both channels are initialized
270 		 */
271 		sp->session_state = RDS_SESSION_STATE_CREATED;
272 		RDS_DPRINTF3("rds_handle_cm_req", "SP(%p) State "
273 		    "RDS_SESSION_STATE_CREATED", sp);
274 		ret = rds_session_init(sp);
275 		if (ret != 0) {
276 			/* Seems like there are not enough resources */
277 			sp->session_state = RDS_SESSION_STATE_FAILED;
278 			RDS_DPRINTF3("rds_handle_cm_req", "SP(%p) State "
279 			    "RDS_SESSION_STATE_FAILED", sp);
280 			rw_exit(&sp->session_lock);
281 			return (IBT_CM_REJECT);
282 		}
283 		sp->session_state = RDS_SESSION_STATE_INIT;
284 		RDS_DPRINTF3("rds_handle_cm_req", "SP(%p) State "
285 		    "RDS_SESSION_STATE_INIT", sp);
286 
287 		/* FALLTHRU */
288 	case RDS_SESSION_STATE_INIT:
289 		if (cmp.cmp_eptype == RDS_EP_TYPE_CTRL) {
290 			ep = &sp->session_ctrlep;
291 		} else {
292 			ep = &sp->session_dataep;
293 		}
294 
295 		break;
296 	default:
297 		RDS_DPRINTF2(LABEL, "ERROR: SP(%p) is in an unexpected "
298 		    "state: %d", sp, sp->session_state);
299 		rw_exit(&sp->session_lock);
300 		return (IBT_CM_REJECT);
301 	}
302 
303 	if (cmp.cmp_failover) {
304 		RDS_DPRINTF2("rds_handle_cm_req",
305 		    "SP(%p) Failover Session (BP %p)", sp, cmp.cmp_last_bufid);
306 		sp->session_failover = cmp.cmp_failover;
307 	}
308 
309 	mutex_enter(&ep->ep_lock);
310 	if (ep->ep_state == RDS_EP_STATE_UNCONNECTED) {
311 		ep->ep_state = RDS_EP_STATE_PASSIVE_PENDING;
312 		sp->session_type = RDS_SESSION_PASSIVE;
313 		rw_exit(&sp->session_lock);
314 	} else if (ep->ep_state == RDS_EP_STATE_ACTIVE_PENDING) {
315 		rw_exit(&sp->session_lock);
316 		/*
317 		 * Peer to peer connection. There is an active
318 		 * connection pending on this ep. The one with
319 		 * greater port guid becomes active and the
320 		 * other becomes passive.
321 		 */
322 		RDS_DPRINTF2(LABEL, "EP(%p) Peer-peer connection handling", ep);
323 		if (lgid.gid_guid > rgid.gid_guid) {
324 			/* this node is active so reject this request */
325 			mutex_exit(&ep->ep_lock);
326 			RDS_DPRINTF2(LABEL, "SP(%p) EP(%p): "
327 			    "Rejecting passive in favor of active", sp, ep);
328 			return (IBT_CM_REJECT);
329 		} else {
330 			/*
331 			 * This session is not the active end, change it
332 			 * to passive end.
333 			 */
334 			ASSERT(sp->session_type == RDS_SESSION_ACTIVE);
335 			ep->ep_state = RDS_EP_STATE_PASSIVE_PENDING;
336 			ep->ep_myip = cmp.cmp_remip;
337 			hcap = rds_gid_to_hcap(statep, lgid);
338 			ep->ep_hca_guid = hcap->hca_guid;
339 
340 			/* change the control channel too */
341 			mutex_enter(&sp->session_ctrlep.ep_lock);
342 			sp->session_ctrlep.ep_myip = cmp.cmp_remip;
343 			sp->session_ctrlep.ep_hca_guid = hcap->hca_guid;
344 			mutex_exit(&sp->session_dataep.ep_lock);
345 
346 			rw_enter(&sp->session_lock, RW_WRITER);
347 			sp->session_type = RDS_SESSION_PASSIVE;
348 			sp->session_lgid = lgid;
349 			sp->session_rgid = rgid;
350 			rw_exit(&sp->session_lock);
351 		}
352 	} else {
353 		rw_exit(&sp->session_lock);
354 	}
355 
356 	ep->ep_lbufid = cmp.cmp_last_bufid;
357 	ep->ep_ackwr.wr.rc.rcwr.rdma.rdma_raddr = (ib_vaddr_t)cmp.cmp_ack_addr;
358 	ep->ep_ackwr.wr.rc.rcwr.rdma.rdma_rkey = cmp.cmp_ack_rkey;
359 	cmp.cmp_last_bufid = ep->ep_rbufid;
360 	cmp.cmp_ack_addr = ep->ep_ack_addr;
361 	cmp.cmp_ack_rkey = ep->ep_ack_rkey;
362 	mutex_exit(&ep->ep_lock);
363 
364 	/* continue with accepting the connection request for this channel */
365 	chanhdl = rds_ep_alloc_rc_channel(ep, reqp->req_prim_hca_port);
366 	if (chanhdl == NULL) {
367 		mutex_enter(&ep->ep_lock);
368 		ep->ep_state = RDS_EP_STATE_UNCONNECTED;
369 		mutex_exit(&ep->ep_lock);
370 		return (IBT_CM_REJECT);
371 	}
372 
373 	/* pre-post recv buffers in the RQ */
374 	rds_post_recv_buf((void *)chanhdl);
375 
376 	rargsp->cm_ret_len = sizeof (rds_cm_private_data_t);
377 	bcopy((uint8_t *)&cmp, rcmp, sizeof (rds_cm_private_data_t));
378 	rargsp->cm_ret.rep.cm_channel = chanhdl;
379 	rargsp->cm_ret.rep.cm_rdma_ra_out = 4;
380 	rargsp->cm_ret.rep.cm_rdma_ra_in = 4;
381 	rargsp->cm_ret.rep.cm_rnr_retry_cnt = MinRnrRetry;
382 
383 	RDS_DPRINTF2("rds_handle_cm_req", "Return: SP(%p) EP(%p) Chan (%p)",
384 	    sp, ep, chanhdl);
385 
386 	return (IBT_CM_ACCEPT);
387 }
388 
389 /*
390  * Handle an incoming CM REP
391  * Pre-post recv buffers for the QP
392  */
393 /* ARGSUSED */
394 static ibt_cm_status_t
395 rds_handle_cm_rep(ibt_cm_event_t *evp, ibt_cm_return_args_t *rargsp,
396     void *rcmp, ibt_priv_data_len_t rcmp_len)
397 {
398 	rds_ep_t	*ep;
399 	rds_cm_private_data_t	cmp;
400 
401 	RDS_DPRINTF2("rds_handle_cm_rep", "Enter");
402 
403 	/* pre-post recv buffers in the RQ */
404 	rds_post_recv_buf((void *)evp->cm_channel);
405 
406 	ep = (rds_ep_t *)ibt_get_chan_private(evp->cm_channel);
407 	bcopy((uint8_t *)evp->cm_priv_data, &cmp,
408 	    sizeof (rds_cm_private_data_t));
409 	ep->ep_lbufid = cmp.cmp_last_bufid;
410 	ep->ep_ackwr.wr.rc.rcwr.rdma.rdma_raddr = (ib_vaddr_t)cmp.cmp_ack_addr;
411 	ep->ep_ackwr.wr.rc.rcwr.rdma.rdma_rkey = cmp.cmp_ack_rkey;
412 
413 	rargsp->cm_ret_len = 0;
414 
415 	RDS_DPRINTF2("rds_handle_cm_rep", "Return: lbufid: %p", ep->ep_lbufid);
416 
417 	return (IBT_CM_ACCEPT);
418 }
419 
420 /*
421  * Handle CONN EST
422  */
423 static ibt_cm_status_t
424 rds_handle_cm_conn_est(ibt_cm_event_t *evp)
425 {
426 	rds_session_t	*sp;
427 	rds_ep_t	*ep;
428 
429 	ep = (rds_ep_t *)ibt_get_chan_private(evp->cm_channel);
430 
431 	RDS_DPRINTF2("rds_handle_cm_conn_est", "EP(%p) State: %d", ep,
432 	    ep->ep_state);
433 
434 	mutex_enter(&ep->ep_lock);
435 	ASSERT((ep->ep_state == RDS_EP_STATE_ACTIVE_PENDING) ||
436 	    (ep->ep_state == RDS_EP_STATE_PASSIVE_PENDING));
437 	ep->ep_state = RDS_EP_STATE_CONNECTED;
438 	ep->ep_chanhdl = evp->cm_channel;
439 	sp = ep->ep_sp;
440 	mutex_exit(&ep->ep_lock);
441 
442 	(void) rds_session_active(sp);
443 
444 	RDS_DPRINTF2("rds_handle_cm_conn_est", "Return");
445 	return (IBT_CM_ACCEPT);
446 }
447 
448 /*
449  * Handle CONN CLOSED
450  */
451 static ibt_cm_status_t
452 rds_handle_cm_conn_closed(ibt_cm_event_t *evp)
453 {
454 	rds_ep_t	*ep;
455 	rds_session_t	*sp;
456 
457 	/* Catch DREQs but ignore DREPs */
458 	if (evp->cm_event.closed != IBT_CM_CLOSED_DREQ_RCVD) {
459 		RDS_DPRINTF2("rds_handle_cm_conn_closed",
460 		    "Ignoring Event: %d received", evp->cm_event.closed);
461 		return (IBT_CM_ACCEPT);
462 	}
463 
464 	ep = (rds_ep_t *)ibt_get_chan_private(evp->cm_channel);
465 	sp = ep->ep_sp;
466 	RDS_DPRINTF2("rds_handle_cm_conn_closed", "EP(%p) Enter", ep);
467 
468 	mutex_enter(&ep->ep_lock);
469 	if (ep->ep_state != RDS_EP_STATE_CONNECTED) {
470 		/* Ignore this DREQ */
471 		RDS_DPRINTF2("rds_handle_cm_conn_closed",
472 		    "EP(%p) not connected, state: %d", ep, ep->ep_state);
473 		mutex_exit(&ep->ep_lock);
474 		return (IBT_CM_ACCEPT);
475 	}
476 	ep->ep_state = RDS_EP_STATE_CLOSING;
477 	mutex_exit(&ep->ep_lock);
478 
479 	rw_enter(&sp->session_lock, RW_WRITER);
480 	RDS_DPRINTF2("rds_handle_cm_conn_closed", "SP(%p) - state: %d", sp,
481 	    sp->session_state);
482 
483 	switch (sp->session_state) {
484 	case RDS_SESSION_STATE_CONNECTED:
485 		sp->session_state = RDS_SESSION_STATE_PASSIVE_CLOSING;
486 		RDS_DPRINTF3("rds_handle_cm_conn_closed", "SP(%p) State "
487 		    "RDS_SESSION_STATE_PASSIVE_CLOSING", sp);
488 		break;
489 
490 	case RDS_SESSION_STATE_PASSIVE_CLOSING:
491 		sp->session_state = RDS_SESSION_STATE_CLOSED;
492 		RDS_DPRINTF3("rds_handle_cm_conn_closed", "SP(%p) State "
493 		    "RDS_SESSION_STATE_CLOSED", sp);
494 		rds_passive_session_fini(sp);
495 		sp->session_state = RDS_SESSION_STATE_FINI;
496 		RDS_DPRINTF3("rds_handle_cm_conn_closed",
497 		    "SP(%p) State RDS_SESSION_STATE_FINI", sp);
498 		break;
499 
500 	case RDS_SESSION_STATE_ACTIVE_CLOSING:
501 	case RDS_SESSION_STATE_ERROR:
502 	case RDS_SESSION_STATE_CLOSED:
503 		break;
504 
505 	case RDS_SESSION_STATE_INIT:
506 		sp->session_state = RDS_SESSION_STATE_ERROR;
507 		RDS_DPRINTF3("rds_handle_cm_conn_closed", "SP(%p) State "
508 		    "RDS_SESSION_STATE_ERROR", sp);
509 		rds_passive_session_fini(sp);
510 		sp->session_state = RDS_SESSION_STATE_FAILED;
511 		RDS_DPRINTF3("rds_handle_cm_conn_closed",
512 		    "SP(%p) State RDS_SESSION_STATE_FAILED", sp);
513 		break;
514 
515 	default:
516 		RDS_DPRINTF2("rds_handle_cm_conn_closed",
517 		    "SP(%p) - Unexpected state: %d", sp, sp->session_state);
518 		rds_passive_session_fini(sp);
519 		sp->session_state = RDS_SESSION_STATE_FAILED;
520 		RDS_DPRINTF3("rds_handle_cm_conn_closed", "SP(%p) State "
521 		    "RDS_SESSION_STATE_FAILED", sp);
522 	}
523 	rw_exit(&sp->session_lock);
524 
525 	mutex_enter(&ep->ep_lock);
526 	ep->ep_state = RDS_EP_STATE_CLOSED;
527 	mutex_exit(&ep->ep_lock);
528 
529 	RDS_DPRINTF2("rds_handle_cm_conn_closed", "SP(%p) Return", sp);
530 	return (IBT_CM_ACCEPT);
531 }
532 
533 /*
534  * Handle EVENT FAILURE
535  */
536 static ibt_cm_status_t
537 rds_handle_cm_event_failure(ibt_cm_event_t *evp)
538 {
539 	rds_ep_t	*ep;
540 	rds_session_t	*sp;
541 	int		ret;
542 
543 	RDS_DPRINTF2("rds_handle_cm_event_failure", "Enter: Chan hdl: 0x%p "
544 	    "Code: %d msg: %d reason: %d", evp->cm_channel,
545 	    evp->cm_event.failed.cf_code, evp->cm_event.failed.cf_msg,
546 	    evp->cm_event.failed.cf_reason);
547 
548 	if (evp->cm_channel == NULL) {
549 		return (IBT_CM_ACCEPT);
550 	}
551 
552 	ep = (rds_ep_t *)ibt_get_chan_private(evp->cm_channel);
553 	sp = ep->ep_sp;
554 
555 	mutex_enter(&ep->ep_lock);
556 	ep->ep_state = RDS_EP_STATE_ERROR;
557 	mutex_exit(&ep->ep_lock);
558 
559 	rw_enter(&sp->session_lock, RW_WRITER);
560 	if (sp->session_type == RDS_SESSION_PASSIVE) {
561 		RDS_DPRINTF2("rds_handle_cm_event_failure",
562 		    "SP(%p) - state: %d", sp, sp->session_state);
563 		if ((sp->session_state == RDS_SESSION_STATE_INIT) ||
564 		    (sp->session_state == RDS_SESSION_STATE_CONNECTED)) {
565 			sp->session_state = RDS_SESSION_STATE_ERROR;
566 			RDS_DPRINTF3("rds_handle_cm_event_failure",
567 			    "SP(%p) State RDS_SESSION_STATE_ERROR", sp);
568 			rw_exit(&sp->session_lock);
569 
570 			/*
571 			 * rds_passive_session_fini should not be called
572 			 * directly in the CM handler. It will cause a deadlock.
573 			 */
574 			ret = ddi_taskq_dispatch(rds_taskq,
575 			    rds_cleanup_passive_session, (void *)sp,
576 			    DDI_NOSLEEP);
577 			if (ret != DDI_SUCCESS) {
578 				RDS_DPRINTF1("rds_handle_cm_event_failure",
579 				    "SP(%p) TaskQ dispatch FAILED:%d", sp, ret);
580 			}
581 			return (IBT_CM_ACCEPT);
582 		}
583 	}
584 	rw_exit(&sp->session_lock);
585 
586 	RDS_DPRINTF2("rds_handle_cm_event_failure", "SP(%p) Return", sp);
587 	return (IBT_CM_ACCEPT);
588 }
589 
590 /*
591  * CM Handler
592  *
593  * Called by IBCM
594  * The cm_private type differs for active and passive events.
595  */
596 ibt_cm_status_t
597 rds_cm_handler(void *cm_private, ibt_cm_event_t *eventp,
598     ibt_cm_return_args_t *ret_args, void *ret_priv_data,
599     ibt_priv_data_len_t ret_len_max)
600 {
601 	ibt_cm_status_t		ret = IBT_CM_ACCEPT;
602 
603 	RDS_DPRINTF2("rds_cm_handler", "Enter: event: %d", eventp->cm_type);
604 
605 	switch (eventp->cm_type) {
606 	case IBT_CM_EVENT_REQ_RCV:
607 		ret = rds_handle_cm_req((rds_state_t *)cm_private, eventp,
608 		    ret_args, ret_priv_data, ret_len_max);
609 		break;
610 	case IBT_CM_EVENT_REP_RCV:
611 		ret = rds_handle_cm_rep(eventp, ret_args, ret_priv_data,
612 		    ret_len_max);
613 		break;
614 	case IBT_CM_EVENT_MRA_RCV:
615 		/* Not supported */
616 		break;
617 	case IBT_CM_EVENT_CONN_EST:
618 		ret = rds_handle_cm_conn_est(eventp);
619 		break;
620 	case IBT_CM_EVENT_CONN_CLOSED:
621 		ret = rds_handle_cm_conn_closed(eventp);
622 		break;
623 	case IBT_CM_EVENT_FAILURE:
624 		ret = rds_handle_cm_event_failure(eventp);
625 		break;
626 	case IBT_CM_EVENT_LAP_RCV:
627 		/* Not supported */
628 		RDS_DPRINTF2(LABEL, "LAP message received");
629 		break;
630 	case IBT_CM_EVENT_APR_RCV:
631 		/* Not supported */
632 		RDS_DPRINTF2(LABEL, "APR message received");
633 		break;
634 	default:
635 		break;
636 	}
637 
638 	RDS_DPRINTF2("rds_cm_handler", "Return");
639 
640 	return (ret);
641 }
642 
643 /*
644  * Register the wellknown service with service id: RDS_SERVICE_ID
645  * Incoming connection requests should arrive on this service id.
646  */
647 ibt_srv_hdl_t
648 rds_register_service(ibt_clnt_hdl_t rds_ibhdl)
649 {
650 	ibt_srv_hdl_t	srvhdl;
651 	ibt_srv_desc_t	srvdesc;
652 	int		ret;
653 
654 	RDS_DPRINTF2("rds_register_service", "Enter: 0x%p", rds_ibhdl);
655 
656 	bzero(&srvdesc, sizeof (ibt_srv_desc_t));
657 	srvdesc.sd_handler = rds_cm_handler;
658 	srvdesc.sd_flags = IBT_SRV_NO_FLAGS;
659 
660 	ret = ibt_register_service(rds_ibhdl, &srvdesc, RDS_SERVICE_ID,
661 	    1, &srvhdl, NULL);
662 	if (ret != IBT_SUCCESS) {
663 		RDS_DPRINTF2(LABEL, "RDS Service Registration Failed: %d",
664 		    ret);
665 		return (NULL);
666 	}
667 
668 	RDS_DPRINTF2("rds_register_service", "Return: 0x%p", srvhdl);
669 	return (srvhdl);
670 }
671 
672 /* Bind the RDS service on all ports */
673 int
674 rds_bind_service(rds_state_t *statep)
675 {
676 	rds_hca_t	*hcap;
677 	ib_gid_t	gid;
678 	uint_t		jx, nbinds = 0, nports = 0;
679 	int		ret;
680 
681 	RDS_DPRINTF2("rds_bind_service", "Enter: 0x%p", statep);
682 
683 	hcap = statep->rds_hcalistp;
684 	while (hcap != NULL) {
685 		for (jx = 0; jx < hcap->hca_nports; jx++) {
686 			nports++;
687 			if (hcap->hca_pinfop[jx].p_linkstate !=
688 			    IBT_PORT_ACTIVE) {
689 				/*
690 				 * service bind will be called in the async
691 				 * handler when the port comes up
692 				 */
693 				continue;
694 			}
695 
696 			gid = hcap->hca_pinfop[jx].p_sgid_tbl[0];
697 			RDS_DPRINTF5(LABEL, "HCA: 0x%llx Port: %d "
698 			    "gid: %llx:%llx", hcap->hca_guid,
699 			    hcap->hca_pinfop[jx].p_port_num, gid.gid_prefix,
700 			    gid.gid_guid);
701 
702 			/* pass statep as cm_private */
703 			ret = ibt_bind_service(statep->rds_srvhdl, gid,
704 			    NULL, statep, NULL);
705 			if (ret != IBT_SUCCESS) {
706 				RDS_DPRINTF2(LABEL, "Bind service for "
707 				    "HCA: 0x%llx Port: %d gid %llx:%llx "
708 				    "failed: %d", hcap->hca_guid,
709 				    hcap->hca_pinfop[jx].p_port_num,
710 				    gid.gid_prefix, gid.gid_guid, ret);
711 				continue;
712 			}
713 
714 			nbinds++;
715 		}
716 		hcap = hcap->hca_nextp;
717 	}
718 
719 	RDS_DPRINTF2(LABEL, "RDS Service available on %d/%d ports",
720 	    nbinds, nports);
721 
722 #if 0
723 	if (nbinds == 0) {
724 		return (-1);
725 	}
726 #endif
727 
728 	RDS_DPRINTF2("rds_bind_service", "Return");
729 
730 	return (0);
731 }
732 
733 /* Open an RC connection */
734 int
735 rds_open_rc_channel(rds_ep_t *ep, ibt_path_info_t *pinfo,
736     ibt_execution_mode_t mode, ibt_channel_hdl_t *chanhdl)
737 {
738 	rds_session_t		*sp;
739 	ibt_chan_open_args_t	ocargs;
740 	ibt_rc_returns_t	ocrets;
741 	rds_cm_private_data_t	cmp;
742 	uint8_t			hca_port;
743 	ibt_channel_hdl_t	hdl;
744 	int			ret = 0;
745 
746 	RDS_DPRINTF2("rds_open_rc_channel", "Enter: EP(%p) mode: %d", ep, mode);
747 
748 	sp = ep->ep_sp;
749 
750 	hca_port = pinfo->pi_prim_cep_path.cep_hca_port_num;
751 
752 	hdl = rds_ep_alloc_rc_channel(ep, hca_port);
753 	if (hdl == NULL) {
754 		return (-1);
755 	}
756 
757 	cmp.cmp_version = RDS_VERSION;
758 	cmp.cmp_arch = RDS_THIS_ARCH;
759 	cmp.cmp_remip = sp->session_remip;
760 	cmp.cmp_localip = sp->session_myip;
761 	cmp.cmp_eptype = ep->ep_type;
762 	cmp.cmp_failover = sp->session_failover;
763 	cmp.cmp_last_bufid = ep->ep_rbufid;
764 	cmp.cmp_user_buffer_size = UserBufferSize;
765 	cmp.cmp_ack_addr = ep->ep_ack_addr;
766 	cmp.cmp_ack_rkey = ep->ep_ack_rkey;
767 
768 	bzero(&ocargs, sizeof (ibt_chan_open_args_t));
769 	bzero(&ocrets, sizeof (ibt_rc_returns_t));
770 	ocargs.oc_path = pinfo;
771 	ocargs.oc_cm_handler = rds_cm_handler;
772 	ocargs.oc_cm_clnt_private = NULL;
773 	ocargs.oc_rdma_ra_out = 4;
774 	ocargs.oc_rdma_ra_in = 4;
775 	ocargs.oc_priv_data_len = sizeof (rds_cm_private_data_t);
776 	ocargs.oc_priv_data = &cmp;
777 	ocargs.oc_path_retry_cnt = IBPathRetryCount;
778 	ocargs.oc_path_rnr_retry_cnt = MinRnrRetry;
779 	ret = ibt_open_rc_channel(hdl, IBT_OCHAN_NO_FLAGS,
780 	    mode, &ocargs, &ocrets);
781 	if (ret != IBT_SUCCESS) {
782 		RDS_DPRINTF2(LABEL, "SP(%p) EP(%p) ibt_open_rc_channel "
783 		    "failed: %d", sp, ep, ret);
784 		(void) ibt_flush_channel(hdl);
785 		(void) ibt_free_channel(hdl);
786 		/* cleanup stuff allocated in rds_ep_alloc_rc_channel */
787 		(void) ibt_free_cq(ep->ep_recvcq);
788 		ep->ep_recvcq = NULL;
789 		(void) ibt_free_cq(ep->ep_sendcq);
790 		ep->ep_sendcq = NULL;
791 	}
792 
793 	*chanhdl = hdl;
794 
795 	RDS_DPRINTF2("rds_open_rc_channel", "Return: EP(%p) Chan: %p", ep,
796 	    *chanhdl);
797 
798 	return (ret);
799 }
800 
801 int
802 rds_close_rc_channel(ibt_channel_hdl_t chanhdl, ibt_execution_mode_t mode)
803 {
804 	int	ret;
805 
806 	RDS_DPRINTF2("rds_close_rc_channel", "Enter: Chan(%p) Mode(%d)",
807 	    chanhdl, mode);
808 
809 	ret = ibt_close_rc_channel(chanhdl, mode, NULL, 0, NULL, NULL, 0);
810 
811 	RDS_DPRINTF2("rds_close_rc_channel", "Return Chan(%p)", chanhdl);
812 
813 	return (ret);
814 }
815