xref: /onnv-gate/usr/src/uts/common/io/ib/clients/rdsv3/ib_cm.c (revision 13118:e192495818d4)
112198SEiji.Ota@Sun.COM /*
212198SEiji.Ota@Sun.COM  * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
312198SEiji.Ota@Sun.COM  */
412198SEiji.Ota@Sun.COM 
512198SEiji.Ota@Sun.COM /*
612763SGiri.Adari@Sun.COM  * This file contains code imported from the OFED rds source file ib_cm.c
712763SGiri.Adari@Sun.COM  * Oracle elects to have and use the contents of ib_cm.c under and governed
812763SGiri.Adari@Sun.COM  * by the OpenIB.org BSD license (see below for full license text). However,
912763SGiri.Adari@Sun.COM  * the following notice accompanied the original version of this file:
1012763SGiri.Adari@Sun.COM  */
1112763SGiri.Adari@Sun.COM 
1212763SGiri.Adari@Sun.COM /*
1312198SEiji.Ota@Sun.COM  * Copyright (c) 2006 Oracle.  All rights reserved.
1412198SEiji.Ota@Sun.COM  *
1512198SEiji.Ota@Sun.COM  * This software is available to you under a choice of one of two
1612198SEiji.Ota@Sun.COM  * licenses.  You may choose to be licensed under the terms of the GNU
1712198SEiji.Ota@Sun.COM  * General Public License (GPL) Version 2, available from the file
1812198SEiji.Ota@Sun.COM  * COPYING in the main directory of this source tree, or the
1912198SEiji.Ota@Sun.COM  * OpenIB.org BSD license below:
2012198SEiji.Ota@Sun.COM  *
2112198SEiji.Ota@Sun.COM  *     Redistribution and use in source and binary forms, with or
2212198SEiji.Ota@Sun.COM  *     without modification, are permitted provided that the following
2312198SEiji.Ota@Sun.COM  *     conditions are met:
2412198SEiji.Ota@Sun.COM  *
2512198SEiji.Ota@Sun.COM  *      - Redistributions of source code must retain the above
2612198SEiji.Ota@Sun.COM  *        copyright notice, this list of conditions and the following
2712198SEiji.Ota@Sun.COM  *        disclaimer.
2812198SEiji.Ota@Sun.COM  *
2912198SEiji.Ota@Sun.COM  *      - Redistributions in binary form must reproduce the above
3012198SEiji.Ota@Sun.COM  *        copyright notice, this list of conditions and the following
3112198SEiji.Ota@Sun.COM  *        disclaimer in the documentation and/or other materials
3212198SEiji.Ota@Sun.COM  *        provided with the distribution.
3312198SEiji.Ota@Sun.COM  *
3412198SEiji.Ota@Sun.COM  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
3512198SEiji.Ota@Sun.COM  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
3612198SEiji.Ota@Sun.COM  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
3712198SEiji.Ota@Sun.COM  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
3812198SEiji.Ota@Sun.COM  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
3912198SEiji.Ota@Sun.COM  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
4012198SEiji.Ota@Sun.COM  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
4112198SEiji.Ota@Sun.COM  * SOFTWARE.
4212198SEiji.Ota@Sun.COM  *
4312198SEiji.Ota@Sun.COM  */
4412198SEiji.Ota@Sun.COM #include <sys/rds.h>
4512198SEiji.Ota@Sun.COM 
4612198SEiji.Ota@Sun.COM #include <sys/ib/clients/of/ofed_kernel.h>
4712198SEiji.Ota@Sun.COM #include <sys/ib/clients/of/rdma/ib_addr.h>
4812198SEiji.Ota@Sun.COM #include <sys/ib/clients/of/rdma/rdma_cm.h>
4912198SEiji.Ota@Sun.COM 
5012198SEiji.Ota@Sun.COM #include <sys/ib/clients/rdsv3/rdsv3.h>
5112198SEiji.Ota@Sun.COM #include <sys/ib/clients/rdsv3/ib.h>
5212198SEiji.Ota@Sun.COM #include <sys/ib/clients/rdsv3/rdsv3_debug.h>
5312198SEiji.Ota@Sun.COM 
5412676SEiji.Ota@Sun.COM extern int rdsv3_enable_snd_cq;
5512198SEiji.Ota@Sun.COM 
5612198SEiji.Ota@Sun.COM /*
5712198SEiji.Ota@Sun.COM  * Set the selected protocol version
5812198SEiji.Ota@Sun.COM  */
5912198SEiji.Ota@Sun.COM static void
rdsv3_ib_set_protocol(struct rdsv3_connection * conn,unsigned int version)6012198SEiji.Ota@Sun.COM rdsv3_ib_set_protocol(struct rdsv3_connection *conn, unsigned int version)
6112198SEiji.Ota@Sun.COM {
6212198SEiji.Ota@Sun.COM 	RDSV3_DPRINTF4("rdsv3_ib_set_protocol", "conn: %p version: %d",
6312198SEiji.Ota@Sun.COM 	    conn, version);
6412198SEiji.Ota@Sun.COM 	conn->c_version = version;
6512198SEiji.Ota@Sun.COM }
6612198SEiji.Ota@Sun.COM 
6712198SEiji.Ota@Sun.COM /*
6812198SEiji.Ota@Sun.COM  * Set up flow control
6912198SEiji.Ota@Sun.COM  */
7012198SEiji.Ota@Sun.COM static void
rdsv3_ib_set_flow_control(struct rdsv3_connection * conn,uint32_t credits)7112198SEiji.Ota@Sun.COM rdsv3_ib_set_flow_control(struct rdsv3_connection *conn, uint32_t credits)
7212198SEiji.Ota@Sun.COM {
7312198SEiji.Ota@Sun.COM 	struct rdsv3_ib_connection *ic = conn->c_transport_data;
7412198SEiji.Ota@Sun.COM 
7512198SEiji.Ota@Sun.COM 	RDSV3_DPRINTF2("rdsv3_ib_set_flow_control",
7612198SEiji.Ota@Sun.COM 	    "Enter: conn: %p credits: %d", conn, credits);
7712198SEiji.Ota@Sun.COM 
7812198SEiji.Ota@Sun.COM 	if (rdsv3_ib_sysctl_flow_control && credits != 0) {
7912198SEiji.Ota@Sun.COM 		/* We're doing flow control */
8012198SEiji.Ota@Sun.COM 		ic->i_flowctl = 1;
8112198SEiji.Ota@Sun.COM 		rdsv3_ib_send_add_credits(conn, credits);
8212198SEiji.Ota@Sun.COM 	} else {
8312198SEiji.Ota@Sun.COM 		ic->i_flowctl = 0;
8412198SEiji.Ota@Sun.COM 	}
8512198SEiji.Ota@Sun.COM 
8612198SEiji.Ota@Sun.COM 	RDSV3_DPRINTF2("rdsv3_ib_set_flow_control",
8712198SEiji.Ota@Sun.COM 	    "Return: conn: %p credits: %d",
8812198SEiji.Ota@Sun.COM 	    conn, credits);
8912198SEiji.Ota@Sun.COM }
9012198SEiji.Ota@Sun.COM 
9112198SEiji.Ota@Sun.COM /*
9212198SEiji.Ota@Sun.COM  * Tune RNR behavior. Without flow control, we use a rather
9312198SEiji.Ota@Sun.COM  * low timeout, but not the absolute minimum - this should
9412198SEiji.Ota@Sun.COM  * be tunable.
9512198SEiji.Ota@Sun.COM  *
9612198SEiji.Ota@Sun.COM  * We already set the RNR retry count to 7 (which is the
9712198SEiji.Ota@Sun.COM  * smallest infinite number :-) above.
9812198SEiji.Ota@Sun.COM  * If flow control is off, we want to change this back to 0
9912198SEiji.Ota@Sun.COM  * so that we learn quickly when our credit accounting is
10012198SEiji.Ota@Sun.COM  * buggy.
10112198SEiji.Ota@Sun.COM  *
10212198SEiji.Ota@Sun.COM  * Caller passes in a qp_attr pointer - don't waste stack spacv
10312198SEiji.Ota@Sun.COM  * by allocation this twice.
10412198SEiji.Ota@Sun.COM  */
10512198SEiji.Ota@Sun.COM static void
rdsv3_ib_tune_rnr(struct rdsv3_ib_connection * ic,struct ib_qp_attr * attr)10612198SEiji.Ota@Sun.COM rdsv3_ib_tune_rnr(struct rdsv3_ib_connection *ic, struct ib_qp_attr *attr)
10712198SEiji.Ota@Sun.COM {
10812198SEiji.Ota@Sun.COM 	int ret;
10912198SEiji.Ota@Sun.COM 
11012198SEiji.Ota@Sun.COM 	RDSV3_DPRINTF2("rdsv3_ib_tune_rnr", "Enter ic: %p attr: %p",
11112198SEiji.Ota@Sun.COM 	    ic, attr);
11212198SEiji.Ota@Sun.COM 
11312198SEiji.Ota@Sun.COM 	attr->min_rnr_timer = IB_RNR_TIMER_000_32;
11412198SEiji.Ota@Sun.COM 	ret = ib_modify_qp(ic->i_cm_id->qp, attr, IB_QP_MIN_RNR_TIMER);
11512198SEiji.Ota@Sun.COM 	if (ret)
11612320SGiri.Adari@Sun.COM 		RDSV3_DPRINTF2("rdsv3_ib_tune_rnr",
11712198SEiji.Ota@Sun.COM 		    "ib_modify_qp(IB_QP_MIN_RNR_TIMER): err=%d", -ret);
11812198SEiji.Ota@Sun.COM }
11912198SEiji.Ota@Sun.COM 
12012198SEiji.Ota@Sun.COM /*
12112198SEiji.Ota@Sun.COM  * Connection established.
12212198SEiji.Ota@Sun.COM  * We get here for both outgoing and incoming connection.
12312198SEiji.Ota@Sun.COM  */
12412198SEiji.Ota@Sun.COM void
rdsv3_ib_cm_connect_complete(struct rdsv3_connection * conn,struct rdma_cm_event * event)12512198SEiji.Ota@Sun.COM rdsv3_ib_cm_connect_complete(struct rdsv3_connection *conn,
12612198SEiji.Ota@Sun.COM     struct rdma_cm_event *event)
12712198SEiji.Ota@Sun.COM {
12812198SEiji.Ota@Sun.COM 	const struct rdsv3_ib_connect_private *dp = NULL;
12912198SEiji.Ota@Sun.COM 	struct rdsv3_ib_connection *ic = conn->c_transport_data;
13012676SEiji.Ota@Sun.COM 	struct rdsv3_ib_device *rds_ibdev =
13112676SEiji.Ota@Sun.COM 	    ib_get_client_data(ic->i_cm_id->device, &rdsv3_ib_client);
13212198SEiji.Ota@Sun.COM 	struct ib_qp_attr qp_attr;
13312198SEiji.Ota@Sun.COM 	int err;
13412198SEiji.Ota@Sun.COM 
13512198SEiji.Ota@Sun.COM 	RDSV3_DPRINTF2("rdsv3_ib_cm_connect_complete",
13612198SEiji.Ota@Sun.COM 	    "Enter conn: %p event: %p", conn, event);
13712198SEiji.Ota@Sun.COM 
13812198SEiji.Ota@Sun.COM 	if (event->param.conn.private_data_len >= sizeof (*dp)) {
13912198SEiji.Ota@Sun.COM 		dp = event->param.conn.private_data;
14012198SEiji.Ota@Sun.COM 
14112198SEiji.Ota@Sun.COM 		/* make sure it isn't empty data */
14212198SEiji.Ota@Sun.COM 		if (dp->dp_protocol_major) {
14312198SEiji.Ota@Sun.COM 			rdsv3_ib_set_protocol(conn,
14412198SEiji.Ota@Sun.COM 			    RDS_PROTOCOL(dp->dp_protocol_major,
14512198SEiji.Ota@Sun.COM 			    dp->dp_protocol_minor));
14612198SEiji.Ota@Sun.COM 			rdsv3_ib_set_flow_control(conn,
14712198SEiji.Ota@Sun.COM 			    ntohl(dp->dp_credit));
14812198SEiji.Ota@Sun.COM 		}
14912198SEiji.Ota@Sun.COM 	}
15012198SEiji.Ota@Sun.COM 
15112676SEiji.Ota@Sun.COM 	if (conn->c_version < RDS_PROTOCOL(3, 1)) {
15212676SEiji.Ota@Sun.COM 		RDSV3_DPRINTF2("rdsv3_ib_cm_connect_complete",
15312676SEiji.Ota@Sun.COM 		    "RDS/IB: Connection to %u.%u.%u.%u version %u.%u failed",
15412676SEiji.Ota@Sun.COM 		    NIPQUAD(conn->c_faddr),
15512676SEiji.Ota@Sun.COM 		    RDS_PROTOCOL_MAJOR(conn->c_version),
15612676SEiji.Ota@Sun.COM 		    RDS_PROTOCOL_MINOR(conn->c_version));
15712676SEiji.Ota@Sun.COM 		rdsv3_conn_destroy(conn);
15812676SEiji.Ota@Sun.COM 		return;
15912676SEiji.Ota@Sun.COM 	} else {
16012676SEiji.Ota@Sun.COM 		RDSV3_DPRINTF2("rdsv3_ib_cm_connect_complete",
16112676SEiji.Ota@Sun.COM 		    "RDS/IB: connected to %u.%u.%u.%u version %u.%u%s",
16212676SEiji.Ota@Sun.COM 		    NIPQUAD(conn->c_faddr),
16312676SEiji.Ota@Sun.COM 		    RDS_PROTOCOL_MAJOR(conn->c_version),
16412676SEiji.Ota@Sun.COM 		    RDS_PROTOCOL_MINOR(conn->c_version),
16512676SEiji.Ota@Sun.COM 		    ic->i_flowctl ? ", flow control" : "");
16612676SEiji.Ota@Sun.COM 	}
16712676SEiji.Ota@Sun.COM 
16812676SEiji.Ota@Sun.COM 	ASSERT(ic->i_soft_cq == NULL);
16912676SEiji.Ota@Sun.COM 	ic->i_soft_cq = rdsv3_af_intr_thr_create(rdsv3_ib_tasklet_fn,
17012676SEiji.Ota@Sun.COM 	    (void *)ic, SCQ_INTR_BIND_CPU, rds_ibdev->aft_hcagp,
17112676SEiji.Ota@Sun.COM 	    ic->i_cq->ibt_cq);
17212676SEiji.Ota@Sun.COM 	if (rdsv3_enable_snd_cq) {
17312676SEiji.Ota@Sun.COM 		ic->i_snd_soft_cq = rdsv3_af_intr_thr_create(
17412676SEiji.Ota@Sun.COM 		    rdsv3_ib_snd_tasklet_fn,
17512676SEiji.Ota@Sun.COM 		    (void *)ic, SCQ_INTR_BIND_CPU, rds_ibdev->aft_hcagp,
17612676SEiji.Ota@Sun.COM 		    ic->i_snd_cq->ibt_cq);
17712676SEiji.Ota@Sun.COM 	}
178*13118SEiji.Ota@Sun.COM 	/* rdsv3_ib_refill_fn is expecting i_max_recv_alloc set */
179*13118SEiji.Ota@Sun.COM 	ic->i_max_recv_alloc = rdsv3_ib_sysctl_max_recv_allocation;
18012676SEiji.Ota@Sun.COM 	ic->i_refill_rq = rdsv3_af_thr_create(rdsv3_ib_refill_fn, (void *)conn,
18112676SEiji.Ota@Sun.COM 	    SCQ_WRK_BIND_CPU, rds_ibdev->aft_hcagp);
18212676SEiji.Ota@Sun.COM 	rdsv3_af_grp_draw(rds_ibdev->aft_hcagp);
18312676SEiji.Ota@Sun.COM 
18412676SEiji.Ota@Sun.COM 	(void) ib_req_notify_cq(ic->i_cq, IB_CQ_SOLICITED);
18512676SEiji.Ota@Sun.COM 	if (rdsv3_enable_snd_cq) {
18612676SEiji.Ota@Sun.COM 		(void) ib_req_notify_cq(ic->i_snd_cq, IB_CQ_NEXT_COMP);
18712676SEiji.Ota@Sun.COM 	}
18812198SEiji.Ota@Sun.COM 
18912198SEiji.Ota@Sun.COM 	/*
19012198SEiji.Ota@Sun.COM 	 * Init rings and fill recv. this needs to wait until protocol
19112198SEiji.Ota@Sun.COM 	 * negotiation
19212198SEiji.Ota@Sun.COM 	 * is complete, since ring layout is different from 3.0 to 3.1.
19312198SEiji.Ota@Sun.COM 	 */
19412198SEiji.Ota@Sun.COM 	rdsv3_ib_send_init_ring(ic);
19512198SEiji.Ota@Sun.COM 	rdsv3_ib_recv_init_ring(ic);
19612198SEiji.Ota@Sun.COM 	/*
19712198SEiji.Ota@Sun.COM 	 * Post receive buffers - as a side effect, this will update
19812198SEiji.Ota@Sun.COM 	 * the posted credit count.
19912198SEiji.Ota@Sun.COM 	 */
20012676SEiji.Ota@Sun.COM 	(void) rdsv3_ib_recv_refill(conn, 1);
20112198SEiji.Ota@Sun.COM 
20212198SEiji.Ota@Sun.COM 	/* Tune RNR behavior */
20312198SEiji.Ota@Sun.COM 	rdsv3_ib_tune_rnr(ic, &qp_attr);
20412198SEiji.Ota@Sun.COM 
20512198SEiji.Ota@Sun.COM 	qp_attr.qp_state = IB_QPS_RTS;
20612198SEiji.Ota@Sun.COM 	err = ib_modify_qp(ic->i_cm_id->qp, &qp_attr, IB_QP_STATE);
20712198SEiji.Ota@Sun.COM 	if (err)
20812320SGiri.Adari@Sun.COM 		RDSV3_DPRINTF2("rdsv3_ib_cm_connect_complete",
20912198SEiji.Ota@Sun.COM 		    "ib_modify_qp(IB_QP_STATE, RTS): err=%d", err);
21012198SEiji.Ota@Sun.COM 
21112198SEiji.Ota@Sun.COM 	/* update ib_device with this local ipaddr & conn */
21212198SEiji.Ota@Sun.COM 	err = rdsv3_ib_update_ipaddr(rds_ibdev, conn->c_laddr);
21312198SEiji.Ota@Sun.COM 	if (err)
21412320SGiri.Adari@Sun.COM 		RDSV3_DPRINTF2("rdsv3_ib_cm_connect_complete",
21512198SEiji.Ota@Sun.COM 		    "rdsv3_ib_update_ipaddr failed (%d)", err);
21612198SEiji.Ota@Sun.COM 	rdsv3_ib_add_conn(rds_ibdev, conn);
21712198SEiji.Ota@Sun.COM 
21812198SEiji.Ota@Sun.COM 	/*
21912198SEiji.Ota@Sun.COM 	 * If the peer gave us the last packet it saw, process this as if
22012198SEiji.Ota@Sun.COM 	 * we had received a regular ACK.
22112198SEiji.Ota@Sun.COM 	 */
22212198SEiji.Ota@Sun.COM 	if (dp && dp->dp_ack_seq)
22312198SEiji.Ota@Sun.COM 		rdsv3_send_drop_acked(conn, ntohll(dp->dp_ack_seq), NULL);
22412198SEiji.Ota@Sun.COM 
22512198SEiji.Ota@Sun.COM 	rdsv3_connect_complete(conn);
22612198SEiji.Ota@Sun.COM 
22712198SEiji.Ota@Sun.COM 	RDSV3_DPRINTF2("rdsv3_ib_cm_connect_complete",
22812198SEiji.Ota@Sun.COM 	    "Return conn: %p event: %p",
22912198SEiji.Ota@Sun.COM 	    conn, event);
23012198SEiji.Ota@Sun.COM }
23112198SEiji.Ota@Sun.COM 
23212198SEiji.Ota@Sun.COM static void
rdsv3_ib_cm_fill_conn_param(struct rdsv3_connection * conn,struct rdma_conn_param * conn_param,struct rdsv3_ib_connect_private * dp,uint32_t protocol_version,uint32_t max_responder_resources,uint32_t max_initiator_depth)23312198SEiji.Ota@Sun.COM rdsv3_ib_cm_fill_conn_param(struct rdsv3_connection *conn,
23412198SEiji.Ota@Sun.COM     struct rdma_conn_param *conn_param,
23512198SEiji.Ota@Sun.COM     struct rdsv3_ib_connect_private *dp,
23612676SEiji.Ota@Sun.COM     uint32_t protocol_version,
23712676SEiji.Ota@Sun.COM     uint32_t max_responder_resources,
23812676SEiji.Ota@Sun.COM     uint32_t max_initiator_depth)
23912198SEiji.Ota@Sun.COM {
24012676SEiji.Ota@Sun.COM 	struct rdsv3_ib_connection *ic = conn->c_transport_data;
24112676SEiji.Ota@Sun.COM 	struct rdsv3_ib_device *rds_ibdev;
24212676SEiji.Ota@Sun.COM 
24312198SEiji.Ota@Sun.COM 	RDSV3_DPRINTF2("rdsv3_ib_cm_fill_conn_param",
24412198SEiji.Ota@Sun.COM 	    "Enter conn: %p conn_param: %p private: %p version: %d",
24512198SEiji.Ota@Sun.COM 	    conn, conn_param, dp, protocol_version);
24612198SEiji.Ota@Sun.COM 
24712198SEiji.Ota@Sun.COM 	(void) memset(conn_param, 0, sizeof (struct rdma_conn_param));
24812676SEiji.Ota@Sun.COM 
24912676SEiji.Ota@Sun.COM 	rds_ibdev = ib_get_client_data(ic->i_cm_id->device, &rdsv3_ib_client);
25012676SEiji.Ota@Sun.COM 
25112676SEiji.Ota@Sun.COM 	conn_param->responder_resources =
25212676SEiji.Ota@Sun.COM 	    MIN(rds_ibdev->max_responder_resources, max_responder_resources);
25312676SEiji.Ota@Sun.COM 	conn_param->initiator_depth =
25412676SEiji.Ota@Sun.COM 	    MIN(rds_ibdev->max_initiator_depth, max_initiator_depth);
25512198SEiji.Ota@Sun.COM 	conn_param->retry_count = min(rdsv3_ib_retry_count, 7);
25612198SEiji.Ota@Sun.COM 	conn_param->rnr_retry_count = 7;
25712198SEiji.Ota@Sun.COM 
25812198SEiji.Ota@Sun.COM 	if (dp) {
25912198SEiji.Ota@Sun.COM 		(void) memset(dp, 0, sizeof (*dp));
26012198SEiji.Ota@Sun.COM 		dp->dp_saddr = conn->c_laddr;
26112198SEiji.Ota@Sun.COM 		dp->dp_daddr = conn->c_faddr;
26212198SEiji.Ota@Sun.COM 		dp->dp_protocol_major = RDS_PROTOCOL_MAJOR(protocol_version);
26312198SEiji.Ota@Sun.COM 		dp->dp_protocol_minor = RDS_PROTOCOL_MINOR(protocol_version);
26412198SEiji.Ota@Sun.COM 		dp->dp_protocol_minor_mask =
26512198SEiji.Ota@Sun.COM 		    htons(RDSV3_IB_SUPPORTED_PROTOCOLS);
26612198SEiji.Ota@Sun.COM 		dp->dp_ack_seq = rdsv3_ib_piggyb_ack(ic);
26712198SEiji.Ota@Sun.COM 
26812198SEiji.Ota@Sun.COM 		/* Advertise flow control */
26912198SEiji.Ota@Sun.COM 		if (ic->i_flowctl) {
27012198SEiji.Ota@Sun.COM 			unsigned int credits;
27112198SEiji.Ota@Sun.COM 
27212198SEiji.Ota@Sun.COM 			credits = IB_GET_POST_CREDITS(
27312198SEiji.Ota@Sun.COM 			    atomic_get(&ic->i_credits));
27412198SEiji.Ota@Sun.COM 			dp->dp_credit = htonl(credits);
27512198SEiji.Ota@Sun.COM 			atomic_add_32(&ic->i_credits,
27612198SEiji.Ota@Sun.COM 			    -IB_SET_POST_CREDITS(credits));
27712198SEiji.Ota@Sun.COM 		}
27812198SEiji.Ota@Sun.COM 
27912198SEiji.Ota@Sun.COM 		conn_param->private_data = dp;
28012198SEiji.Ota@Sun.COM 		conn_param->private_data_len = sizeof (*dp);
28112198SEiji.Ota@Sun.COM 	}
28212198SEiji.Ota@Sun.COM 
28312198SEiji.Ota@Sun.COM 	RDSV3_DPRINTF2("rdsv3_ib_cm_fill_conn_param",
28412198SEiji.Ota@Sun.COM 	    "Return conn: %p conn_param: %p private: %p version: %d",
28512198SEiji.Ota@Sun.COM 	    conn, conn_param, dp, protocol_version);
28612198SEiji.Ota@Sun.COM }
28712198SEiji.Ota@Sun.COM 
28812198SEiji.Ota@Sun.COM static void
rdsv3_ib_cq_event_handler(struct ib_event * event,void * data)28912198SEiji.Ota@Sun.COM rdsv3_ib_cq_event_handler(struct ib_event *event, void *data)
29012198SEiji.Ota@Sun.COM {
29112198SEiji.Ota@Sun.COM 	RDSV3_DPRINTF3("rdsv3_ib_cq_event_handler", "event %u data %p",
29212198SEiji.Ota@Sun.COM 	    event->event, data);
29312198SEiji.Ota@Sun.COM }
29412198SEiji.Ota@Sun.COM 
29512198SEiji.Ota@Sun.COM static void
rdsv3_ib_snd_cq_comp_handler(struct ib_cq * cq,void * context)29612676SEiji.Ota@Sun.COM rdsv3_ib_snd_cq_comp_handler(struct ib_cq *cq, void *context)
29712676SEiji.Ota@Sun.COM {
29812676SEiji.Ota@Sun.COM 	struct rdsv3_connection *conn = context;
29912676SEiji.Ota@Sun.COM 	struct rdsv3_ib_connection *ic = conn->c_transport_data;
30012676SEiji.Ota@Sun.COM 
30112676SEiji.Ota@Sun.COM 	RDSV3_DPRINTF4("rdsv3_ib_snd_cq_comp_handler",
30212676SEiji.Ota@Sun.COM 	    "Enter(conn: %p ic: %p cq: %p)", conn, ic, cq);
30312676SEiji.Ota@Sun.COM 
30412676SEiji.Ota@Sun.COM 	rdsv3_af_thr_fire(ic->i_snd_soft_cq);
30512676SEiji.Ota@Sun.COM }
30612676SEiji.Ota@Sun.COM 
30712676SEiji.Ota@Sun.COM void
rdsv3_ib_snd_tasklet_fn(void * data)30812676SEiji.Ota@Sun.COM rdsv3_ib_snd_tasklet_fn(void *data)
30912676SEiji.Ota@Sun.COM {
31012676SEiji.Ota@Sun.COM 	struct rdsv3_ib_connection *ic = (struct rdsv3_ib_connection *)data;
31112676SEiji.Ota@Sun.COM 	struct rdsv3_connection *conn = ic->conn;
31212676SEiji.Ota@Sun.COM 	struct rdsv3_ib_ack_state ack_state = { 0, };
31312676SEiji.Ota@Sun.COM 	ibt_wc_t wc;
31412676SEiji.Ota@Sun.COM 	uint_t polled;
31512676SEiji.Ota@Sun.COM 
31612676SEiji.Ota@Sun.COM 	RDSV3_DPRINTF4("rdsv3_ib_snd_tasklet_fn",
31712676SEiji.Ota@Sun.COM 	    "Enter(conn: %p ic: %p)", conn, ic);
31812676SEiji.Ota@Sun.COM 
31912676SEiji.Ota@Sun.COM 	/*
32012676SEiji.Ota@Sun.COM 	 * Poll in a loop before and after enabling the next event
32112676SEiji.Ota@Sun.COM 	 */
32212676SEiji.Ota@Sun.COM 	while (ibt_poll_cq(RDSV3_CQ2CQHDL(ic->i_snd_cq), &wc, 1, &polled) ==
32312676SEiji.Ota@Sun.COM 	    IBT_SUCCESS) {
32412794SGiri.Adari@Sun.COM 		RDSV3_DPRINTF4("rdsv3_ib_snd_tasklet_fn",
32512676SEiji.Ota@Sun.COM 		    "wc_id 0x%llx type %d status %u byte_len %u imm_data %u\n",
32612676SEiji.Ota@Sun.COM 		    (unsigned long long)wc.wc_id, wc.wc_type, wc.wc_status,
32712676SEiji.Ota@Sun.COM 		    wc.wc_bytes_xfer, ntohl(wc.wc_immed_data));
32812676SEiji.Ota@Sun.COM 
32912676SEiji.Ota@Sun.COM 		ASSERT(wc.wc_id & RDSV3_IB_SEND_OP);
33012676SEiji.Ota@Sun.COM 		rdsv3_ib_send_cqe_handler(ic, &wc);
33112676SEiji.Ota@Sun.COM 	}
33212676SEiji.Ota@Sun.COM 	(void) ibt_enable_cq_notify(RDSV3_CQ2CQHDL(ic->i_snd_cq),
33312676SEiji.Ota@Sun.COM 	    IBT_NEXT_COMPLETION);
33412794SGiri.Adari@Sun.COM 	while (ibt_poll_cq(RDSV3_CQ2CQHDL(ic->i_snd_cq), &wc, 1, &polled) ==
33512676SEiji.Ota@Sun.COM 	    IBT_SUCCESS) {
33612794SGiri.Adari@Sun.COM 		RDSV3_DPRINTF4("rdsv3_ib_snd_tasklet_fn",
33712794SGiri.Adari@Sun.COM 		    "wc_id 0x%llx type %d status %u byte_len %u imm_data %u\n",
33812794SGiri.Adari@Sun.COM 		    (unsigned long long)wc.wc_id, wc.wc_type, wc.wc_status,
33912794SGiri.Adari@Sun.COM 		    wc.wc_bytes_xfer, ntohl(wc.wc_immed_data));
34012794SGiri.Adari@Sun.COM 
34112676SEiji.Ota@Sun.COM 		ASSERT(wc.wc_id & RDSV3_IB_SEND_OP);
34212676SEiji.Ota@Sun.COM 		rdsv3_ib_send_cqe_handler(ic, &wc);
34312676SEiji.Ota@Sun.COM 	}
34412676SEiji.Ota@Sun.COM }
34512676SEiji.Ota@Sun.COM 
34612676SEiji.Ota@Sun.COM static void
rdsv3_ib_cq_comp_handler(struct ib_cq * cq,void * context)34712676SEiji.Ota@Sun.COM rdsv3_ib_cq_comp_handler(struct ib_cq *cq, void *context)
34812676SEiji.Ota@Sun.COM {
34912676SEiji.Ota@Sun.COM 	struct rdsv3_connection *conn = context;
35012676SEiji.Ota@Sun.COM 	struct rdsv3_ib_connection *ic = conn->c_transport_data;
35112676SEiji.Ota@Sun.COM 
35212676SEiji.Ota@Sun.COM 	RDSV3_DPRINTF4("rdsv3_ib_cq_comp_handler",
35312676SEiji.Ota@Sun.COM 	    "Enter(conn: %p cq: %p)", conn, cq);
35412676SEiji.Ota@Sun.COM 
35512676SEiji.Ota@Sun.COM 	rdsv3_ib_stats_inc(s_ib_evt_handler_call);
35612676SEiji.Ota@Sun.COM 
35712676SEiji.Ota@Sun.COM 	rdsv3_af_thr_fire(ic->i_soft_cq);
35812676SEiji.Ota@Sun.COM }
35912676SEiji.Ota@Sun.COM 
36012676SEiji.Ota@Sun.COM void
rdsv3_ib_refill_fn(void * data)36112676SEiji.Ota@Sun.COM rdsv3_ib_refill_fn(void *data)
36212676SEiji.Ota@Sun.COM {
36312676SEiji.Ota@Sun.COM 	struct rdsv3_connection *conn = (struct rdsv3_connection *)data;
36412676SEiji.Ota@Sun.COM 
36512676SEiji.Ota@Sun.COM 	(void) rdsv3_ib_recv_refill(conn, 0);
36612676SEiji.Ota@Sun.COM }
36712676SEiji.Ota@Sun.COM 
36812676SEiji.Ota@Sun.COM void
rdsv3_ib_tasklet_fn(void * data)36912676SEiji.Ota@Sun.COM rdsv3_ib_tasklet_fn(void *data)
37012676SEiji.Ota@Sun.COM {
37112676SEiji.Ota@Sun.COM 	struct rdsv3_ib_connection *ic = (struct rdsv3_ib_connection *)data;
37212676SEiji.Ota@Sun.COM 	struct rdsv3_connection *conn = ic->conn;
37312676SEiji.Ota@Sun.COM 	struct rdsv3_ib_ack_state ack_state = { 0, };
374*13118SEiji.Ota@Sun.COM 	ibt_wc_t wc[RDSV3_IB_WC_POLL_SIZE];
37512676SEiji.Ota@Sun.COM 	uint_t polled;
376*13118SEiji.Ota@Sun.COM 	int i;
37712676SEiji.Ota@Sun.COM 
37812676SEiji.Ota@Sun.COM 	RDSV3_DPRINTF4("rdsv3_ib_tasklet_fn",
37912676SEiji.Ota@Sun.COM 	    "Enter(conn: %p ic: %p)", conn, ic);
38012676SEiji.Ota@Sun.COM 
38112676SEiji.Ota@Sun.COM 	rdsv3_ib_stats_inc(s_ib_tasklet_call);
38212676SEiji.Ota@Sun.COM 
38312676SEiji.Ota@Sun.COM 	/*
38412676SEiji.Ota@Sun.COM 	 * Poll in a loop before and after enabling the next event
38512676SEiji.Ota@Sun.COM 	 */
386*13118SEiji.Ota@Sun.COM 	while (ibt_poll_cq(RDSV3_CQ2CQHDL(ic->i_cq), &wc[0],
387*13118SEiji.Ota@Sun.COM 	    RDSV3_IB_WC_POLL_SIZE, &polled) == IBT_SUCCESS) {
388*13118SEiji.Ota@Sun.COM 		for (i = 0; i < polled; i++) {
389*13118SEiji.Ota@Sun.COM 			RDSV3_DPRINTF4("rdsv3_ib_tasklet_fn",
390*13118SEiji.Ota@Sun.COM 			"wc_id 0x%llx type %d status %u byte_len %u \
391*13118SEiji.Ota@Sun.COM 			    imm_data %u\n",
392*13118SEiji.Ota@Sun.COM 			    (unsigned long long)wc[i].wc_id, wc[i].wc_type,
393*13118SEiji.Ota@Sun.COM 			    wc[i].wc_status, wc[i].wc_bytes_xfer,
394*13118SEiji.Ota@Sun.COM 			    ntohl(wc[i].wc_immed_data));
39512676SEiji.Ota@Sun.COM 
396*13118SEiji.Ota@Sun.COM 			if (wc[i].wc_id & RDSV3_IB_SEND_OP) {
397*13118SEiji.Ota@Sun.COM 				rdsv3_ib_send_cqe_handler(ic, &wc[i]);
398*13118SEiji.Ota@Sun.COM 			} else {
399*13118SEiji.Ota@Sun.COM 				rdsv3_ib_recv_cqe_handler(ic, &wc[i],
400*13118SEiji.Ota@Sun.COM 				    &ack_state);
401*13118SEiji.Ota@Sun.COM 			}
40212676SEiji.Ota@Sun.COM 		}
40312676SEiji.Ota@Sun.COM 	}
40412676SEiji.Ota@Sun.COM 	(void) ibt_enable_cq_notify(RDSV3_CQ2CQHDL(ic->i_cq),
40512676SEiji.Ota@Sun.COM 	    IBT_NEXT_SOLICITED);
406*13118SEiji.Ota@Sun.COM 	while (ibt_poll_cq(RDSV3_CQ2CQHDL(ic->i_cq), &wc[0],
407*13118SEiji.Ota@Sun.COM 	    RDSV3_IB_WC_POLL_SIZE, &polled) == IBT_SUCCESS) {
408*13118SEiji.Ota@Sun.COM 		for (i = 0; i < polled; i++) {
409*13118SEiji.Ota@Sun.COM 			RDSV3_DPRINTF4("rdsv3_ib_tasklet_fn",
410*13118SEiji.Ota@Sun.COM 			"wc_id 0x%llx type %d status %u byte_len %u \
411*13118SEiji.Ota@Sun.COM 			    imm_data %u\n",
412*13118SEiji.Ota@Sun.COM 			    (unsigned long long)wc[i].wc_id, wc[i].wc_type,
413*13118SEiji.Ota@Sun.COM 			    wc[i].wc_status, wc[i].wc_bytes_xfer,
414*13118SEiji.Ota@Sun.COM 			    ntohl(wc[i].wc_immed_data));
415*13118SEiji.Ota@Sun.COM 
416*13118SEiji.Ota@Sun.COM 			if (wc[i].wc_id & RDSV3_IB_SEND_OP) {
417*13118SEiji.Ota@Sun.COM 				rdsv3_ib_send_cqe_handler(ic, &wc[i]);
418*13118SEiji.Ota@Sun.COM 			} else {
419*13118SEiji.Ota@Sun.COM 				rdsv3_ib_recv_cqe_handler(ic, &wc[i],
420*13118SEiji.Ota@Sun.COM 				    &ack_state);
421*13118SEiji.Ota@Sun.COM 			}
422*13118SEiji.Ota@Sun.COM 		}
423*13118SEiji.Ota@Sun.COM 	}
42412676SEiji.Ota@Sun.COM 
42512676SEiji.Ota@Sun.COM 	if (ack_state.ack_next_valid) {
42612676SEiji.Ota@Sun.COM 		rdsv3_ib_set_ack(ic, ack_state.ack_next,
42712676SEiji.Ota@Sun.COM 		    ack_state.ack_required);
42812676SEiji.Ota@Sun.COM 	}
42912676SEiji.Ota@Sun.COM 	if (ack_state.ack_recv_valid && ack_state.ack_recv > ic->i_ack_recv) {
43012676SEiji.Ota@Sun.COM 		rdsv3_send_drop_acked(conn, ack_state.ack_recv, NULL);
43112676SEiji.Ota@Sun.COM 		ic->i_ack_recv = ack_state.ack_recv;
43212676SEiji.Ota@Sun.COM 	}
43312676SEiji.Ota@Sun.COM 	if (rdsv3_conn_up(conn)) {
43412676SEiji.Ota@Sun.COM 		if (!test_bit(RDSV3_LL_SEND_FULL, &conn->c_flags))
43512676SEiji.Ota@Sun.COM 			(void) rdsv3_send_xmit(ic->conn);
43612676SEiji.Ota@Sun.COM 		rdsv3_ib_attempt_ack(ic);
43712676SEiji.Ota@Sun.COM 	}
43812676SEiji.Ota@Sun.COM }
43912676SEiji.Ota@Sun.COM 
44012676SEiji.Ota@Sun.COM static void
rdsv3_ib_qp_event_handler(struct ib_event * event,void * data)44112198SEiji.Ota@Sun.COM rdsv3_ib_qp_event_handler(struct ib_event *event, void *data)
44212198SEiji.Ota@Sun.COM {
44312198SEiji.Ota@Sun.COM 	struct rdsv3_connection *conn = data;
44412198SEiji.Ota@Sun.COM 	struct rdsv3_ib_connection *ic = conn->c_transport_data;
44512198SEiji.Ota@Sun.COM 
44612198SEiji.Ota@Sun.COM 	RDSV3_DPRINTF2("rdsv3_ib_qp_event_handler", "conn %p ic %p event %u",
44712198SEiji.Ota@Sun.COM 	    conn, ic, event->event);
44812198SEiji.Ota@Sun.COM 
44912198SEiji.Ota@Sun.COM 	switch (event->event) {
45012198SEiji.Ota@Sun.COM 	case IB_EVENT_COMM_EST:
45112198SEiji.Ota@Sun.COM 		(void) rdma_notify(ic->i_cm_id, IB_EVENT_COMM_EST);
45212198SEiji.Ota@Sun.COM 		break;
45312198SEiji.Ota@Sun.COM 	default:
45412198SEiji.Ota@Sun.COM 		if (conn) {
45512198SEiji.Ota@Sun.COM 			RDSV3_DPRINTF2("rdsv3_ib_qp_event_handler",
45612198SEiji.Ota@Sun.COM 			    "RDS/IB: Fatal QP Event %u - "
45712198SEiji.Ota@Sun.COM 			    "connection %u.%u.%u.%u ->%u.%u.%u.%u "
45812198SEiji.Ota@Sun.COM 			    "...reconnecting",
45912198SEiji.Ota@Sun.COM 			    event->event, NIPQUAD(conn->c_laddr),
46012198SEiji.Ota@Sun.COM 			    NIPQUAD(conn->c_faddr));
46112198SEiji.Ota@Sun.COM 			rdsv3_conn_drop(conn);
46212198SEiji.Ota@Sun.COM 		} else {
46312198SEiji.Ota@Sun.COM 			RDSV3_DPRINTF2("rdsv3_ib_qp_event_handler",
46412198SEiji.Ota@Sun.COM 			    "RDS/IB: Fatal QP Event %u - connection"
46512198SEiji.Ota@Sun.COM 			    "...reconnecting", event->event);
46612198SEiji.Ota@Sun.COM 		}
46712198SEiji.Ota@Sun.COM 		break;
46812198SEiji.Ota@Sun.COM 	}
46912198SEiji.Ota@Sun.COM 
47012198SEiji.Ota@Sun.COM 	RDSV3_DPRINTF2("rdsv3_ib_qp_event_handler", "Return conn: %p event: %p",
47112198SEiji.Ota@Sun.COM 	    conn, event);
47212198SEiji.Ota@Sun.COM }
47312198SEiji.Ota@Sun.COM 
47412198SEiji.Ota@Sun.COM extern int rdsv3_ib_alloc_hdrs(ib_device_t *dev,
47512198SEiji.Ota@Sun.COM     struct rdsv3_ib_connection *ic);
47612198SEiji.Ota@Sun.COM extern void rdsv3_ib_free_hdrs(ib_device_t *dev,
47712198SEiji.Ota@Sun.COM     struct rdsv3_ib_connection *ic);
47812198SEiji.Ota@Sun.COM 
47912198SEiji.Ota@Sun.COM /*
48012198SEiji.Ota@Sun.COM  * This needs to be very careful to not leave IS_ERR pointers around for
48112198SEiji.Ota@Sun.COM  * cleanup to trip over.
48212198SEiji.Ota@Sun.COM  */
48312198SEiji.Ota@Sun.COM static int
rdsv3_ib_setup_qp(struct rdsv3_connection * conn)48412198SEiji.Ota@Sun.COM rdsv3_ib_setup_qp(struct rdsv3_connection *conn)
48512198SEiji.Ota@Sun.COM {
48612198SEiji.Ota@Sun.COM 	struct rdsv3_ib_connection *ic = conn->c_transport_data;
48712198SEiji.Ota@Sun.COM 	struct ib_device *dev = ic->i_cm_id->device;
48812198SEiji.Ota@Sun.COM 	struct ib_qp_init_attr attr;
48912198SEiji.Ota@Sun.COM 	struct rdsv3_ib_device *rds_ibdev;
49012198SEiji.Ota@Sun.COM 	ibt_send_wr_t *wrp;
49112198SEiji.Ota@Sun.COM 	ibt_wr_ds_t *sgl;
49212198SEiji.Ota@Sun.COM 	int ret, i;
49312198SEiji.Ota@Sun.COM 
49412198SEiji.Ota@Sun.COM 	RDSV3_DPRINTF2("rdsv3_ib_setup_qp", "Enter conn: %p", conn);
49512198SEiji.Ota@Sun.COM 
49612198SEiji.Ota@Sun.COM 	/*
49712198SEiji.Ota@Sun.COM 	 * rdsv3_ib_add_one creates a rdsv3_ib_device object per IB device,
49812198SEiji.Ota@Sun.COM 	 * and allocates a protection domain, memory range and FMR pool
49912198SEiji.Ota@Sun.COM 	 * for each.  If that fails for any reason, it will not register
50012198SEiji.Ota@Sun.COM 	 * the rds_ibdev at all.
50112198SEiji.Ota@Sun.COM 	 */
50212198SEiji.Ota@Sun.COM 	rds_ibdev = ib_get_client_data(dev, &rdsv3_ib_client);
50312676SEiji.Ota@Sun.COM 	if (!rds_ibdev) {
50412320SGiri.Adari@Sun.COM 		RDSV3_DPRINTF2("rdsv3_ib_setup_qp",
50512198SEiji.Ota@Sun.COM 		    "RDS/IB: No client_data for device %s", dev->name);
50612198SEiji.Ota@Sun.COM 		return (-EOPNOTSUPP);
50712198SEiji.Ota@Sun.COM 	}
50812444SGiri.Adari@Sun.COM 	ic->rds_ibdev = rds_ibdev;
50912198SEiji.Ota@Sun.COM 
51012198SEiji.Ota@Sun.COM 	if (rds_ibdev->max_wrs < ic->i_send_ring.w_nr + 1)
51112198SEiji.Ota@Sun.COM 		rdsv3_ib_ring_resize(&ic->i_send_ring, rds_ibdev->max_wrs - 1);
51212198SEiji.Ota@Sun.COM 	if (rds_ibdev->max_wrs < ic->i_recv_ring.w_nr + 1)
51312198SEiji.Ota@Sun.COM 		rdsv3_ib_ring_resize(&ic->i_recv_ring, rds_ibdev->max_wrs - 1);
51412198SEiji.Ota@Sun.COM 
51512198SEiji.Ota@Sun.COM 	/* Protection domain and memory range */
51612198SEiji.Ota@Sun.COM 	ic->i_pd = rds_ibdev->pd;
51712198SEiji.Ota@Sun.COM 
51812414SEiji.Ota@Sun.COM 	/*
51912414SEiji.Ota@Sun.COM 	 * IB_CQ_VECTOR_LEAST_ATTACHED and/or the corresponding feature is
52012414SEiji.Ota@Sun.COM 	 * not implmeneted in Hermon yet, but we can pass it to ib_create_cq()
52112414SEiji.Ota@Sun.COM 	 * anyway.
52212414SEiji.Ota@Sun.COM 	 */
52312676SEiji.Ota@Sun.COM 	ic->i_cq = ib_create_cq(dev, rdsv3_ib_cq_comp_handler,
52412198SEiji.Ota@Sun.COM 	    rdsv3_ib_cq_event_handler, conn,
52512676SEiji.Ota@Sun.COM 	    ic->i_recv_ring.w_nr + ic->i_send_ring.w_nr + 1,
52612965SWilliam.Taylor@Oracle.COM 	    rdsv3_af_grp_get_sched(ic->rds_ibdev->aft_hcagp));
52712676SEiji.Ota@Sun.COM 	if (IS_ERR(ic->i_cq)) {
52812676SEiji.Ota@Sun.COM 		ret = PTR_ERR(ic->i_cq);
52912676SEiji.Ota@Sun.COM 		ic->i_cq = NULL;
53012198SEiji.Ota@Sun.COM 		RDSV3_DPRINTF2("rdsv3_ib_setup_qp",
53112676SEiji.Ota@Sun.COM 		    "ib_create_cq failed: %d", ret);
53212198SEiji.Ota@Sun.COM 		goto out;
53312198SEiji.Ota@Sun.COM 	}
53412676SEiji.Ota@Sun.COM 	if (rdsv3_enable_snd_cq) {
53512676SEiji.Ota@Sun.COM 		ic->i_snd_cq = ib_create_cq(dev, rdsv3_ib_snd_cq_comp_handler,
53612676SEiji.Ota@Sun.COM 		    rdsv3_ib_cq_event_handler, conn, ic->i_send_ring.w_nr + 1,
53712965SWilliam.Taylor@Oracle.COM 		    rdsv3_af_grp_get_sched(ic->rds_ibdev->aft_hcagp));
53812676SEiji.Ota@Sun.COM 		if (IS_ERR(ic->i_snd_cq)) {
53912676SEiji.Ota@Sun.COM 			ret = PTR_ERR(ic->i_snd_cq);
54012676SEiji.Ota@Sun.COM 			(void) ib_destroy_cq(ic->i_cq);
54112676SEiji.Ota@Sun.COM 			ic->i_cq = NULL;
54212676SEiji.Ota@Sun.COM 			ic->i_snd_cq = NULL;
54312676SEiji.Ota@Sun.COM 			RDSV3_DPRINTF2("rdsv3_ib_setup_qp",
54412676SEiji.Ota@Sun.COM 			    "ib_create_cq send cq failed: %d", ret);
54512676SEiji.Ota@Sun.COM 			goto out;
54612676SEiji.Ota@Sun.COM 		}
54712198SEiji.Ota@Sun.COM 	}
54812198SEiji.Ota@Sun.COM 
54912198SEiji.Ota@Sun.COM 	/* XXX negotiate max send/recv with remote? */
55012198SEiji.Ota@Sun.COM 	(void) memset(&attr, 0, sizeof (attr));
55112198SEiji.Ota@Sun.COM 	attr.event_handler = rdsv3_ib_qp_event_handler;
55212198SEiji.Ota@Sun.COM 	attr.qp_context = conn;
55312198SEiji.Ota@Sun.COM 	/* + 1 to allow for the single ack message */
55412198SEiji.Ota@Sun.COM 	attr.cap.max_send_wr = ic->i_send_ring.w_nr + 1;
55512198SEiji.Ota@Sun.COM 	attr.cap.max_recv_wr = ic->i_recv_ring.w_nr + 1;
55612198SEiji.Ota@Sun.COM 	attr.cap.max_send_sge = rds_ibdev->max_sge;
55712198SEiji.Ota@Sun.COM 	attr.cap.max_recv_sge = RDSV3_IB_RECV_SGE;
55812198SEiji.Ota@Sun.COM 	attr.sq_sig_type = IB_SIGNAL_REQ_WR;
55912198SEiji.Ota@Sun.COM 	attr.qp_type = IB_QPT_RC;
56012676SEiji.Ota@Sun.COM 	if (rdsv3_enable_snd_cq) {
56112676SEiji.Ota@Sun.COM 		attr.send_cq = ic->i_snd_cq;
56212676SEiji.Ota@Sun.COM 	} else {
56312676SEiji.Ota@Sun.COM 		attr.send_cq = ic->i_cq;
56412676SEiji.Ota@Sun.COM 	}
56512676SEiji.Ota@Sun.COM 	attr.recv_cq = ic->i_cq;
56612198SEiji.Ota@Sun.COM 
56712198SEiji.Ota@Sun.COM 	/*
56812198SEiji.Ota@Sun.COM 	 * XXX this can fail if max_*_wr is too large?  Are we supposed
56912198SEiji.Ota@Sun.COM 	 * to back off until we get a value that the hardware can support?
57012198SEiji.Ota@Sun.COM 	 */
57112198SEiji.Ota@Sun.COM 	ret = rdma_create_qp(ic->i_cm_id, ic->i_pd, &attr);
57212198SEiji.Ota@Sun.COM 	if (ret) {
57312198SEiji.Ota@Sun.COM 		RDSV3_DPRINTF2("rdsv3_ib_setup_qp",
57412198SEiji.Ota@Sun.COM 		    "rdma_create_qp failed: %d", ret);
57512198SEiji.Ota@Sun.COM 		goto out;
57612198SEiji.Ota@Sun.COM 	}
57712198SEiji.Ota@Sun.COM 
57812198SEiji.Ota@Sun.COM 	ret = rdsv3_ib_alloc_hdrs(dev, ic);
57912198SEiji.Ota@Sun.COM 	if (ret != 0) {
58012198SEiji.Ota@Sun.COM 		ret = -ENOMEM;
58112198SEiji.Ota@Sun.COM 		RDSV3_DPRINTF2("rdsv3_ib_setup_qp",
58212198SEiji.Ota@Sun.COM 		    "rdsv3_ib_alloc_hdrs failed: %d", ret);
58312198SEiji.Ota@Sun.COM 		goto out;
58412198SEiji.Ota@Sun.COM 	}
58512198SEiji.Ota@Sun.COM 
58612198SEiji.Ota@Sun.COM 	ic->i_sends = kmem_alloc(ic->i_send_ring.w_nr *
58712198SEiji.Ota@Sun.COM 	    sizeof (struct rdsv3_ib_send_work), KM_NOSLEEP);
58812198SEiji.Ota@Sun.COM 	if (ic->i_sends == NULL) {
58912198SEiji.Ota@Sun.COM 		ret = -ENOMEM;
59012198SEiji.Ota@Sun.COM 		RDSV3_DPRINTF2("rdsv3_ib_setup_qp",
59112198SEiji.Ota@Sun.COM 		    "send allocation failed: %d", ret);
59212198SEiji.Ota@Sun.COM 		goto out;
59312198SEiji.Ota@Sun.COM 	}
59412198SEiji.Ota@Sun.COM 	(void) memset(ic->i_sends, 0, ic->i_send_ring.w_nr *
59512198SEiji.Ota@Sun.COM 	    sizeof (struct rdsv3_ib_send_work));
59612198SEiji.Ota@Sun.COM 
59712198SEiji.Ota@Sun.COM 	ic->i_send_wrs =
59812794SGiri.Adari@Sun.COM 	    kmem_alloc(ic->i_send_ring.w_nr * (sizeof (ibt_send_wr_t) +
59912198SEiji.Ota@Sun.COM 	    RDSV3_IB_MAX_SGE * sizeof (ibt_wr_ds_t)), KM_NOSLEEP);
60012198SEiji.Ota@Sun.COM 	if (ic->i_send_wrs == NULL) {
60112198SEiji.Ota@Sun.COM 		ret = -ENOMEM;
60212198SEiji.Ota@Sun.COM 		RDSV3_DPRINTF2("rdsv3_ib_setup_qp",
60312444SGiri.Adari@Sun.COM 		    "Send WR allocation failed: %d", ret);
60412198SEiji.Ota@Sun.COM 		goto out;
60512198SEiji.Ota@Sun.COM 	}
60612198SEiji.Ota@Sun.COM 	sgl = (ibt_wr_ds_t *)((uint8_t *)ic->i_send_wrs +
60712794SGiri.Adari@Sun.COM 	    (ic->i_send_ring.w_nr * sizeof (ibt_send_wr_t)));
60812794SGiri.Adari@Sun.COM 	for (i = 0; i < ic->i_send_ring.w_nr; i++) {
60912198SEiji.Ota@Sun.COM 		wrp = &ic->i_send_wrs[i];
61012198SEiji.Ota@Sun.COM 		wrp->wr_sgl = &sgl[i * RDSV3_IB_MAX_SGE];
61112198SEiji.Ota@Sun.COM 	}
61212198SEiji.Ota@Sun.COM 
61312198SEiji.Ota@Sun.COM 	ic->i_recvs = kmem_alloc(ic->i_recv_ring.w_nr *
61412198SEiji.Ota@Sun.COM 	    sizeof (struct rdsv3_ib_recv_work), KM_NOSLEEP);
61512198SEiji.Ota@Sun.COM 	if (ic->i_recvs == NULL) {
61612198SEiji.Ota@Sun.COM 		ret = -ENOMEM;
61712198SEiji.Ota@Sun.COM 		RDSV3_DPRINTF2("rdsv3_ib_setup_qp",
61812198SEiji.Ota@Sun.COM 		    "recv allocation failed: %d", ret);
61912198SEiji.Ota@Sun.COM 		goto out;
62012198SEiji.Ota@Sun.COM 	}
62112198SEiji.Ota@Sun.COM 	(void) memset(ic->i_recvs, 0, ic->i_recv_ring.w_nr *
62212198SEiji.Ota@Sun.COM 	    sizeof (struct rdsv3_ib_recv_work));
62312198SEiji.Ota@Sun.COM 
62412444SGiri.Adari@Sun.COM 	ic->i_recv_wrs =
62512444SGiri.Adari@Sun.COM 	    kmem_alloc(ic->i_recv_ring.w_nr * sizeof (ibt_recv_wr_t),
62612444SGiri.Adari@Sun.COM 	    KM_NOSLEEP);
62712444SGiri.Adari@Sun.COM 	if (ic->i_recv_wrs == NULL) {
62812444SGiri.Adari@Sun.COM 		ret = -ENOMEM;
62912444SGiri.Adari@Sun.COM 		RDSV3_DPRINTF2("rdsv3_ib_setup_qp",
63012444SGiri.Adari@Sun.COM 		    "Recv WR allocation failed: %d", ret);
63112444SGiri.Adari@Sun.COM 		goto out;
63212444SGiri.Adari@Sun.COM 	}
63312444SGiri.Adari@Sun.COM 
63412198SEiji.Ota@Sun.COM 	rdsv3_ib_recv_init_ack(ic);
63512198SEiji.Ota@Sun.COM 
63612676SEiji.Ota@Sun.COM 	RDSV3_DPRINTF2("rdsv3_ib_setup_qp", "conn %p pd %p mr %p cq %p",
63712676SEiji.Ota@Sun.COM 	    conn, ic->i_pd, ic->i_mr, ic->i_cq);
63812198SEiji.Ota@Sun.COM 
63912198SEiji.Ota@Sun.COM out:
64012198SEiji.Ota@Sun.COM 	return (ret);
64112198SEiji.Ota@Sun.COM }
64212198SEiji.Ota@Sun.COM 
64312198SEiji.Ota@Sun.COM static uint32_t
rdsv3_ib_protocol_compatible(struct rdma_cm_event * event)64412198SEiji.Ota@Sun.COM rdsv3_ib_protocol_compatible(struct rdma_cm_event *event)
64512198SEiji.Ota@Sun.COM {
64612198SEiji.Ota@Sun.COM 	const struct rdsv3_ib_connect_private *dp =
64712198SEiji.Ota@Sun.COM 	    event->param.conn.private_data;
64812198SEiji.Ota@Sun.COM 	uint16_t common;
64912198SEiji.Ota@Sun.COM 	uint32_t version = 0;
65012198SEiji.Ota@Sun.COM 
65112198SEiji.Ota@Sun.COM 	RDSV3_DPRINTF2("rdsv3_ib_protocol_compatible", "Enter event: %p",
65212198SEiji.Ota@Sun.COM 	    event);
65312198SEiji.Ota@Sun.COM 
65412198SEiji.Ota@Sun.COM 	/*
65512198SEiji.Ota@Sun.COM 	 * rdma_cm private data is odd - when there is any private data in the
65612198SEiji.Ota@Sun.COM 	 * request, we will be given a pretty large buffer without telling us
65712198SEiji.Ota@Sun.COM 	 * the
65812198SEiji.Ota@Sun.COM 	 * original size. The only way to tell the difference is by looking at
65912198SEiji.Ota@Sun.COM 	 * the contents, which are initialized to zero.
66012198SEiji.Ota@Sun.COM 	 * If the protocol version fields aren't set,
66112198SEiji.Ota@Sun.COM 	 * this is a connection attempt
66212198SEiji.Ota@Sun.COM 	 * from an older version. This could could be 3.0 or 2.0 -
66312198SEiji.Ota@Sun.COM 	 * we can't tell.
66412198SEiji.Ota@Sun.COM 	 * We really should have changed this for OFED 1.3 :-(
66512198SEiji.Ota@Sun.COM 	 */
66612198SEiji.Ota@Sun.COM 
66712198SEiji.Ota@Sun.COM 	/* Be paranoid. RDS always has privdata */
66812198SEiji.Ota@Sun.COM 	if (!event->param.conn.private_data_len) {
66912198SEiji.Ota@Sun.COM 		RDSV3_DPRINTF2("rdsv3_ib_protocol_compatible",
67012198SEiji.Ota@Sun.COM 		    "RDS incoming connection has no private data, rejecting");
67112198SEiji.Ota@Sun.COM 		return (0);
67212198SEiji.Ota@Sun.COM 	}
67312198SEiji.Ota@Sun.COM 
67412198SEiji.Ota@Sun.COM 	/* Even if len is crap *now* I still want to check it. -ASG */
67512198SEiji.Ota@Sun.COM 	if (event->param.conn.private_data_len < sizeof (*dp) ||
67612198SEiji.Ota@Sun.COM 	    dp->dp_protocol_major == 0)
67712198SEiji.Ota@Sun.COM 		return (RDS_PROTOCOL_3_0);
67812198SEiji.Ota@Sun.COM 
67912198SEiji.Ota@Sun.COM 	common = ntohs(dp->dp_protocol_minor_mask) &
68012198SEiji.Ota@Sun.COM 	    RDSV3_IB_SUPPORTED_PROTOCOLS;
68112198SEiji.Ota@Sun.COM 	if (dp->dp_protocol_major == 3 && common) {
68212198SEiji.Ota@Sun.COM 		version = RDS_PROTOCOL_3_0;
68312198SEiji.Ota@Sun.COM 		while ((common >>= 1) != 0)
68412198SEiji.Ota@Sun.COM 			version++;
68512198SEiji.Ota@Sun.COM 	} else {
68612320SGiri.Adari@Sun.COM 		RDSV3_DPRINTF2("rdsv3_ib_protocol_compatible",
68712198SEiji.Ota@Sun.COM 		    "RDS: Connection from %u.%u.%u.%u using "
68812198SEiji.Ota@Sun.COM 		    "incompatible protocol version %u.%u\n",
68912198SEiji.Ota@Sun.COM 		    NIPQUAD(dp->dp_saddr),
69012198SEiji.Ota@Sun.COM 		    dp->dp_protocol_major,
69112198SEiji.Ota@Sun.COM 		    dp->dp_protocol_minor);
69212198SEiji.Ota@Sun.COM 	}
69312198SEiji.Ota@Sun.COM 
69412198SEiji.Ota@Sun.COM 	RDSV3_DPRINTF2("rdsv3_ib_protocol_compatible", "Return event: %p",
69512198SEiji.Ota@Sun.COM 	    event);
69612198SEiji.Ota@Sun.COM 
69712198SEiji.Ota@Sun.COM 	return (version);
69812198SEiji.Ota@Sun.COM }
69912198SEiji.Ota@Sun.COM 
70012198SEiji.Ota@Sun.COM int
rdsv3_ib_cm_handle_connect(struct rdma_cm_id * cm_id,struct rdma_cm_event * event)70112198SEiji.Ota@Sun.COM rdsv3_ib_cm_handle_connect(struct rdma_cm_id *cm_id,
70212198SEiji.Ota@Sun.COM     struct rdma_cm_event *event)
70312198SEiji.Ota@Sun.COM {
70412198SEiji.Ota@Sun.COM 	uint64_be_t lguid = cm_id->route.path_rec->sgid.global.interface_id;
70512198SEiji.Ota@Sun.COM 	uint64_be_t fguid = cm_id->route.path_rec->dgid.global.interface_id;
70612198SEiji.Ota@Sun.COM 	const struct rdsv3_ib_connect_private *dp =
70712198SEiji.Ota@Sun.COM 	    event->param.conn.private_data;
70812198SEiji.Ota@Sun.COM 	struct rdsv3_ib_connect_private dp_rep;
70912198SEiji.Ota@Sun.COM 	struct rdsv3_connection *conn = NULL;
71012198SEiji.Ota@Sun.COM 	struct rdsv3_ib_connection *ic = NULL;
71112198SEiji.Ota@Sun.COM 	struct rdma_conn_param conn_param;
71212198SEiji.Ota@Sun.COM 	uint32_t version;
71312198SEiji.Ota@Sun.COM 	int err, destroy = 1;
71412198SEiji.Ota@Sun.COM 	boolean_t conn_created = B_FALSE;
71512198SEiji.Ota@Sun.COM 
71612198SEiji.Ota@Sun.COM 	RDSV3_DPRINTF2("rdsv3_ib_cm_handle_connect",
71712198SEiji.Ota@Sun.COM 	    "Enter cm_id: %p event: %p", cm_id, event);
71812198SEiji.Ota@Sun.COM 
71912198SEiji.Ota@Sun.COM 	/* Check whether the remote protocol version matches ours. */
72012198SEiji.Ota@Sun.COM 	version = rdsv3_ib_protocol_compatible(event);
72112198SEiji.Ota@Sun.COM 	if (!version) {
72212198SEiji.Ota@Sun.COM 		RDSV3_DPRINTF2("rdsv3_ib_cm_handle_connect",
72312198SEiji.Ota@Sun.COM 		    "version mismatch");
72412198SEiji.Ota@Sun.COM 		goto out;
72512198SEiji.Ota@Sun.COM 	}
72612198SEiji.Ota@Sun.COM 
72712198SEiji.Ota@Sun.COM 	RDSV3_DPRINTF2("rdsv3_ib_cm_handle_connect",
72812198SEiji.Ota@Sun.COM 	    "saddr %u.%u.%u.%u daddr %u.%u.%u.%u RDSv%d.%d lguid 0x%llx fguid "
72912198SEiji.Ota@Sun.COM 	    "0x%llx", NIPQUAD(dp->dp_saddr), NIPQUAD(dp->dp_daddr),
73012198SEiji.Ota@Sun.COM 	    RDS_PROTOCOL_MAJOR(version), RDS_PROTOCOL_MINOR(version),
73112198SEiji.Ota@Sun.COM 	    (unsigned long long)ntohll(lguid),
73212198SEiji.Ota@Sun.COM 	    (unsigned long long)ntohll(fguid));
73312198SEiji.Ota@Sun.COM 
73412198SEiji.Ota@Sun.COM 	conn = rdsv3_conn_create(dp->dp_daddr, dp->dp_saddr,
73512198SEiji.Ota@Sun.COM 	    &rdsv3_ib_transport, KM_NOSLEEP);
73612198SEiji.Ota@Sun.COM 	if (IS_ERR(conn)) {
73712198SEiji.Ota@Sun.COM 		RDSV3_DPRINTF2("rdsv3_ib_cm_handle_connect",
73812198SEiji.Ota@Sun.COM 		    "rdsv3_conn_create failed (%ld)", PTR_ERR(conn));
73912198SEiji.Ota@Sun.COM 		conn = NULL;
74012198SEiji.Ota@Sun.COM 		goto out;
74112198SEiji.Ota@Sun.COM 	}
74212198SEiji.Ota@Sun.COM 
74312198SEiji.Ota@Sun.COM 	/*
74412198SEiji.Ota@Sun.COM 	 * The connection request may occur while the
74512198SEiji.Ota@Sun.COM 	 * previous connection exist, e.g. in case of failover.
74612198SEiji.Ota@Sun.COM 	 * But as connections may be initiated simultaneously
74712198SEiji.Ota@Sun.COM 	 * by both hosts, we have a random backoff mechanism -
74812198SEiji.Ota@Sun.COM 	 * see the comment above rdsv3_queue_reconnect()
74912198SEiji.Ota@Sun.COM 	 */
75012198SEiji.Ota@Sun.COM 	mutex_enter(&conn->c_cm_lock);
75112198SEiji.Ota@Sun.COM 	if (!rdsv3_conn_transition(conn, RDSV3_CONN_DOWN,
75212198SEiji.Ota@Sun.COM 	    RDSV3_CONN_CONNECTING)) {
75312198SEiji.Ota@Sun.COM 		if (rdsv3_conn_state(conn) == RDSV3_CONN_UP) {
75412198SEiji.Ota@Sun.COM 			RDSV3_DPRINTF2("rdsv3_ib_cm_handle_connect",
75512198SEiji.Ota@Sun.COM 			    "incoming connect when connected: %p",
75612198SEiji.Ota@Sun.COM 			    conn);
75712198SEiji.Ota@Sun.COM 			rdsv3_conn_drop(conn);
75812198SEiji.Ota@Sun.COM 			rdsv3_ib_stats_inc(s_ib_listen_closed_stale);
75912198SEiji.Ota@Sun.COM 			mutex_exit(&conn->c_cm_lock);
76012198SEiji.Ota@Sun.COM 			goto out;
76112198SEiji.Ota@Sun.COM 		} else if (rdsv3_conn_state(conn) == RDSV3_CONN_CONNECTING) {
76212198SEiji.Ota@Sun.COM 			/* Wait and see - our connect may still be succeeding */
76312198SEiji.Ota@Sun.COM 			RDSV3_DPRINTF2("rdsv3_ib_cm_handle_connect",
76412198SEiji.Ota@Sun.COM 			    "peer-to-peer connection request: %p, "
76512198SEiji.Ota@Sun.COM 			    "lguid: 0x%llx fguid: 0x%llx",
76612198SEiji.Ota@Sun.COM 			    conn, lguid, fguid);
76712198SEiji.Ota@Sun.COM 			rdsv3_ib_stats_inc(s_ib_connect_raced);
76812198SEiji.Ota@Sun.COM 		}
76912198SEiji.Ota@Sun.COM 		mutex_exit(&conn->c_cm_lock);
77012198SEiji.Ota@Sun.COM 		goto out;
77112198SEiji.Ota@Sun.COM 	}
77212198SEiji.Ota@Sun.COM 
77312198SEiji.Ota@Sun.COM 	ic = conn->c_transport_data;
77412198SEiji.Ota@Sun.COM 
77512198SEiji.Ota@Sun.COM 	rdsv3_ib_set_protocol(conn, version);
77612198SEiji.Ota@Sun.COM 	rdsv3_ib_set_flow_control(conn, ntohl(dp->dp_credit));
77712198SEiji.Ota@Sun.COM 
77812198SEiji.Ota@Sun.COM 	/*
77912198SEiji.Ota@Sun.COM 	 * If the peer gave us the last packet it saw, process this as if
78012198SEiji.Ota@Sun.COM 	 * we had received a regular ACK.
78112198SEiji.Ota@Sun.COM 	 */
78212198SEiji.Ota@Sun.COM 	if (dp->dp_ack_seq)
78312198SEiji.Ota@Sun.COM 		rdsv3_send_drop_acked(conn, ntohll(dp->dp_ack_seq), NULL);
78412198SEiji.Ota@Sun.COM 
78512198SEiji.Ota@Sun.COM 	ASSERT(!cm_id->context);
78612198SEiji.Ota@Sun.COM 	ASSERT(!ic->i_cm_id);
78712198SEiji.Ota@Sun.COM 
78812198SEiji.Ota@Sun.COM 	if (ic->i_cm_id != NULL)
78912198SEiji.Ota@Sun.COM 		RDSV3_PANIC();
79012198SEiji.Ota@Sun.COM 
79112198SEiji.Ota@Sun.COM 	ic->i_cm_id = cm_id;
79212198SEiji.Ota@Sun.COM 	cm_id->context = conn;
79312198SEiji.Ota@Sun.COM 
79412198SEiji.Ota@Sun.COM 	/*
79512198SEiji.Ota@Sun.COM 	 * We got halfway through setting up the ib_connection, if we
79612198SEiji.Ota@Sun.COM 	 * fail now, we have to take the long route out of this mess.
79712198SEiji.Ota@Sun.COM 	 */
79812198SEiji.Ota@Sun.COM 	destroy = 0;
79912198SEiji.Ota@Sun.COM 
80012198SEiji.Ota@Sun.COM 	err = rdsv3_ib_setup_qp(conn);
80112198SEiji.Ota@Sun.COM 	if (err) {
80212198SEiji.Ota@Sun.COM 		RDSV3_DPRINTF2("rdsv3_ib_cm_handle_connect",
80312198SEiji.Ota@Sun.COM 		    "rdsv3_ib_setup_qp failed (%d)", err);
80412320SGiri.Adari@Sun.COM 		mutex_exit(&conn->c_cm_lock);
80512198SEiji.Ota@Sun.COM 		rdsv3_conn_drop(conn);
80612198SEiji.Ota@Sun.COM 		goto out;
80712198SEiji.Ota@Sun.COM 	}
80812198SEiji.Ota@Sun.COM 
80912676SEiji.Ota@Sun.COM 	rdsv3_ib_cm_fill_conn_param(conn, &conn_param, &dp_rep, version,
81012676SEiji.Ota@Sun.COM 	    event->param.conn.responder_resources,
81112676SEiji.Ota@Sun.COM 	    event->param.conn.initiator_depth);
81212198SEiji.Ota@Sun.COM 
81312198SEiji.Ota@Sun.COM 	/* rdma_accept() calls rdma_reject() internally if it fails */
81412198SEiji.Ota@Sun.COM 	err = rdma_accept(cm_id, &conn_param);
81512198SEiji.Ota@Sun.COM 	mutex_exit(&conn->c_cm_lock);
81612198SEiji.Ota@Sun.COM 	if (err) {
81712198SEiji.Ota@Sun.COM 		RDSV3_DPRINTF2("rdsv3_ib_cm_handle_connect",
81812198SEiji.Ota@Sun.COM 		    "rdma_accept failed (%d)", err);
81912198SEiji.Ota@Sun.COM 		rdsv3_conn_drop(conn);
82012198SEiji.Ota@Sun.COM 		goto out;
82112198SEiji.Ota@Sun.COM 	}
82212198SEiji.Ota@Sun.COM 
82312198SEiji.Ota@Sun.COM 	RDSV3_DPRINTF2("rdsv3_ib_cm_handle_connect",
82412198SEiji.Ota@Sun.COM 	    "Return cm_id: %p event: %p", cm_id, event);
82512198SEiji.Ota@Sun.COM 
82612198SEiji.Ota@Sun.COM 	return (0);
82712198SEiji.Ota@Sun.COM 
82812198SEiji.Ota@Sun.COM out:
82912198SEiji.Ota@Sun.COM 	(void) rdma_reject(cm_id, NULL, 0);
83012198SEiji.Ota@Sun.COM 	return (destroy);
83112198SEiji.Ota@Sun.COM }
83212198SEiji.Ota@Sun.COM 
83312198SEiji.Ota@Sun.COM 
83412198SEiji.Ota@Sun.COM int
rdsv3_ib_cm_initiate_connect(struct rdma_cm_id * cm_id)83512198SEiji.Ota@Sun.COM rdsv3_ib_cm_initiate_connect(struct rdma_cm_id *cm_id)
83612198SEiji.Ota@Sun.COM {
83712198SEiji.Ota@Sun.COM 	struct rdsv3_connection *conn = cm_id->context;
83812198SEiji.Ota@Sun.COM 	struct rdsv3_ib_connection *ic = conn->c_transport_data;
83912198SEiji.Ota@Sun.COM 	struct rdma_conn_param conn_param;
84012198SEiji.Ota@Sun.COM 	struct rdsv3_ib_connect_private dp;
84112198SEiji.Ota@Sun.COM 	int ret;
84212198SEiji.Ota@Sun.COM 
84312198SEiji.Ota@Sun.COM 	RDSV3_DPRINTF2("rdsv3_ib_cm_initiate_connect", "Enter: cm_id: %p",
84412198SEiji.Ota@Sun.COM 	    cm_id);
84512198SEiji.Ota@Sun.COM 
84612198SEiji.Ota@Sun.COM 	/*
84712198SEiji.Ota@Sun.COM 	 * If the peer doesn't do protocol negotiation, we must
84812198SEiji.Ota@Sun.COM 	 * default to RDSv3.0
84912198SEiji.Ota@Sun.COM 	 */
85012198SEiji.Ota@Sun.COM 	rdsv3_ib_set_protocol(conn, RDS_PROTOCOL_3_0);
85112198SEiji.Ota@Sun.COM 	ic->i_flowctl =
85212198SEiji.Ota@Sun.COM 	    rdsv3_ib_sysctl_flow_control;	/* advertise flow control */
85312198SEiji.Ota@Sun.COM 
85412198SEiji.Ota@Sun.COM 	ret = rdsv3_ib_setup_qp(conn);
85512198SEiji.Ota@Sun.COM 	if (ret) {
85612198SEiji.Ota@Sun.COM 		RDSV3_DPRINTF2("rdsv3_ib_cm_initiate_connect",
85712198SEiji.Ota@Sun.COM 		    "rdsv3_ib_setup_qp failed (%d)", ret);
85812198SEiji.Ota@Sun.COM 		rdsv3_conn_drop(conn);
85912198SEiji.Ota@Sun.COM 		goto out;
86012198SEiji.Ota@Sun.COM 	}
86112198SEiji.Ota@Sun.COM 
86212676SEiji.Ota@Sun.COM 	rdsv3_ib_cm_fill_conn_param(conn, &conn_param, &dp,
86312676SEiji.Ota@Sun.COM 	    RDS_PROTOCOL_VERSION, UINT_MAX, UINT_MAX);
86412198SEiji.Ota@Sun.COM 
86512198SEiji.Ota@Sun.COM 	ret = rdma_connect(cm_id, &conn_param);
86612198SEiji.Ota@Sun.COM 	if (ret) {
86712198SEiji.Ota@Sun.COM 		RDSV3_DPRINTF2("rdsv3_ib_cm_initiate_connect",
86812198SEiji.Ota@Sun.COM 		    "rdma_connect failed (%d)", ret);
86912198SEiji.Ota@Sun.COM 		rdsv3_conn_drop(conn);
87012198SEiji.Ota@Sun.COM 	}
87112198SEiji.Ota@Sun.COM 
87212198SEiji.Ota@Sun.COM 	RDSV3_DPRINTF2("rdsv3_ib_cm_initiate_connect",
87312198SEiji.Ota@Sun.COM 	    "Return: cm_id: %p", cm_id);
87412198SEiji.Ota@Sun.COM 
87512198SEiji.Ota@Sun.COM out:
87612198SEiji.Ota@Sun.COM 	/*
87712198SEiji.Ota@Sun.COM 	 * Beware - returning non-zero tells the rdma_cm to destroy
87812198SEiji.Ota@Sun.COM 	 * the cm_id. We should certainly not do it as long as we still
87912198SEiji.Ota@Sun.COM 	 * "own" the cm_id.
88012198SEiji.Ota@Sun.COM 	 */
88112198SEiji.Ota@Sun.COM 	if (ret) {
88212198SEiji.Ota@Sun.COM 		if (ic->i_cm_id == cm_id)
88312198SEiji.Ota@Sun.COM 			ret = 0;
88412198SEiji.Ota@Sun.COM 	}
88512198SEiji.Ota@Sun.COM 	return (ret);
88612198SEiji.Ota@Sun.COM }
88712198SEiji.Ota@Sun.COM 
88812198SEiji.Ota@Sun.COM int
rdsv3_ib_conn_connect(struct rdsv3_connection * conn)88912198SEiji.Ota@Sun.COM rdsv3_ib_conn_connect(struct rdsv3_connection *conn)
89012198SEiji.Ota@Sun.COM {
89112198SEiji.Ota@Sun.COM 	struct rdsv3_ib_connection *ic = conn->c_transport_data;
89212198SEiji.Ota@Sun.COM 	struct sockaddr_in src, dest;
89312198SEiji.Ota@Sun.COM 	ipaddr_t	laddr, faddr;
89412198SEiji.Ota@Sun.COM 	int ret;
89512198SEiji.Ota@Sun.COM 
89612198SEiji.Ota@Sun.COM 	RDSV3_DPRINTF2("rdsv3_ib_conn_connect", "Enter: conn: %p", conn);
89712198SEiji.Ota@Sun.COM 
89812198SEiji.Ota@Sun.COM 	/*
89912198SEiji.Ota@Sun.COM 	 * XXX I wonder what affect the port space has
90012198SEiji.Ota@Sun.COM 	 */
90112198SEiji.Ota@Sun.COM 	/* delegate cm event handler to rdma_transport */
90212198SEiji.Ota@Sun.COM 	ic->i_cm_id = rdma_create_id(rdsv3_rdma_cm_event_handler, conn,
90312198SEiji.Ota@Sun.COM 	    RDMA_PS_TCP);
90412198SEiji.Ota@Sun.COM 	if (IS_ERR(ic->i_cm_id)) {
90512198SEiji.Ota@Sun.COM 		ret = PTR_ERR(ic->i_cm_id);
90612198SEiji.Ota@Sun.COM 		ic->i_cm_id = NULL;
90712198SEiji.Ota@Sun.COM 		RDSV3_DPRINTF2("rdsv3_ib_conn_connect",
90812198SEiji.Ota@Sun.COM 		    "rdma_create_id() failed: %d", ret);
90912198SEiji.Ota@Sun.COM 		goto out;
91012198SEiji.Ota@Sun.COM 	}
91112198SEiji.Ota@Sun.COM 
91212198SEiji.Ota@Sun.COM 	RDSV3_DPRINTF3("rdsv3_ib_conn_connect",
91312198SEiji.Ota@Sun.COM 	    "created cm id %p for conn %p", ic->i_cm_id, conn);
91412198SEiji.Ota@Sun.COM 
91512198SEiji.Ota@Sun.COM 	/* The ipaddr should be in the network order */
91612198SEiji.Ota@Sun.COM 	laddr = conn->c_laddr;
91712198SEiji.Ota@Sun.COM 	faddr = conn->c_faddr;
91812198SEiji.Ota@Sun.COM 	ret = rdsv3_sc_path_lookup(&laddr, &faddr);
91912198SEiji.Ota@Sun.COM 	if (ret == 0) {
92012198SEiji.Ota@Sun.COM 		RDSV3_DPRINTF2(LABEL, "Path not found (0x%x 0x%x)",
92112198SEiji.Ota@Sun.COM 		    ntohl(laddr), ntohl(faddr));
92212198SEiji.Ota@Sun.COM 	}
92312198SEiji.Ota@Sun.COM 
92412198SEiji.Ota@Sun.COM 	src.sin_family = AF_INET;
92512198SEiji.Ota@Sun.COM 	src.sin_addr.s_addr = (uint32_t)laddr;
92612198SEiji.Ota@Sun.COM 	src.sin_port = (uint16_t)htons(0);
92712198SEiji.Ota@Sun.COM 
92812198SEiji.Ota@Sun.COM 	dest.sin_family = AF_INET;
92912198SEiji.Ota@Sun.COM 	dest.sin_addr.s_addr = (uint32_t)faddr;
93012198SEiji.Ota@Sun.COM 	dest.sin_port = (uint16_t)htons(RDSV3_PORT);
93112198SEiji.Ota@Sun.COM 
93212198SEiji.Ota@Sun.COM 	ret = rdma_resolve_addr(ic->i_cm_id, (struct sockaddr *)&src,
93312198SEiji.Ota@Sun.COM 	    (struct sockaddr *)&dest,
93412198SEiji.Ota@Sun.COM 	    RDSV3_RDMA_RESOLVE_TIMEOUT_MS);
93512198SEiji.Ota@Sun.COM 	if (ret) {
93612198SEiji.Ota@Sun.COM 		RDSV3_DPRINTF2("rdsv3_ib_conn_connect",
93712198SEiji.Ota@Sun.COM 		    "addr resolve failed for cm id %p: %d", ic->i_cm_id, ret);
93812198SEiji.Ota@Sun.COM 		rdma_destroy_id(ic->i_cm_id);
93912198SEiji.Ota@Sun.COM 		ic->i_cm_id = NULL;
94012198SEiji.Ota@Sun.COM 	}
94112198SEiji.Ota@Sun.COM 
94212198SEiji.Ota@Sun.COM 	RDSV3_DPRINTF2("rdsv3_ib_conn_connect", "Return: conn: %p", conn);
94312198SEiji.Ota@Sun.COM 
94412198SEiji.Ota@Sun.COM out:
94512198SEiji.Ota@Sun.COM 	return (ret);
94612198SEiji.Ota@Sun.COM }
94712198SEiji.Ota@Sun.COM 
94812198SEiji.Ota@Sun.COM /*
94912198SEiji.Ota@Sun.COM  * This is so careful about only cleaning up resources that were built up
95012198SEiji.Ota@Sun.COM  * so that it can be called at any point during startup.  In fact it
95112198SEiji.Ota@Sun.COM  * can be called multiple times for a given connection.
95212198SEiji.Ota@Sun.COM  */
95312198SEiji.Ota@Sun.COM void
rdsv3_ib_conn_shutdown(struct rdsv3_connection * conn)95412198SEiji.Ota@Sun.COM rdsv3_ib_conn_shutdown(struct rdsv3_connection *conn)
95512198SEiji.Ota@Sun.COM {
95612198SEiji.Ota@Sun.COM 	struct rdsv3_ib_connection *ic = conn->c_transport_data;
95712198SEiji.Ota@Sun.COM 	int err = 0;
95812198SEiji.Ota@Sun.COM 
95912198SEiji.Ota@Sun.COM 	RDSV3_DPRINTF2("rdsv3_ib_conn_shutdown",
96012676SEiji.Ota@Sun.COM 	    "cm %p pd %p cq %p qp %p", ic->i_cm_id,
96112676SEiji.Ota@Sun.COM 	    ic->i_pd, ic->i_cq, ic->i_cm_id ? ic->i_cm_id->qp : NULL);
96212198SEiji.Ota@Sun.COM 
96312198SEiji.Ota@Sun.COM 	if (ic->i_cm_id) {
96412198SEiji.Ota@Sun.COM 		struct ib_device *dev = ic->i_cm_id->device;
96512198SEiji.Ota@Sun.COM 
96612198SEiji.Ota@Sun.COM 		RDSV3_DPRINTF2("rdsv3_ib_conn_shutdown",
96712198SEiji.Ota@Sun.COM 		    "disconnecting cm %p", ic->i_cm_id);
96812198SEiji.Ota@Sun.COM 		err = rdma_disconnect(ic->i_cm_id);
96912198SEiji.Ota@Sun.COM 		if (err) {
97012198SEiji.Ota@Sun.COM 			/*
97112198SEiji.Ota@Sun.COM 			 * Actually this may happen quite frequently, when
97212198SEiji.Ota@Sun.COM 			 * an outgoing connect raced with an incoming connect.
97312198SEiji.Ota@Sun.COM 			 */
97412198SEiji.Ota@Sun.COM 			RDSV3_DPRINTF2("rdsv3_ib_conn_shutdown",
97512198SEiji.Ota@Sun.COM 			    "failed to disconnect, cm: %p err %d",
97612198SEiji.Ota@Sun.COM 			    ic->i_cm_id, err);
97712198SEiji.Ota@Sun.COM 		}
97812198SEiji.Ota@Sun.COM 
97912198SEiji.Ota@Sun.COM 		if (ic->i_cm_id->qp) {
98012198SEiji.Ota@Sun.COM 			(void) ibt_flush_qp(
98112198SEiji.Ota@Sun.COM 			    ib_get_ibt_channel_hdl(ic->i_cm_id));
98212676SEiji.Ota@Sun.COM 			/*
98312676SEiji.Ota@Sun.COM 			 * Don't wait for the send ring to be empty -- there
98412676SEiji.Ota@Sun.COM 			 * may be completed non-signaled entries sitting on
98512676SEiji.Ota@Sun.COM 			 * there. We unmap these below.
98612676SEiji.Ota@Sun.COM 			 */
98712676SEiji.Ota@Sun.COM 			rdsv3_wait_event(&ic->i_recv_ring.w_empty_wait,
98812198SEiji.Ota@Sun.COM 			    rdsv3_ib_ring_empty(&ic->i_recv_ring));
98912676SEiji.Ota@Sun.COM 			/*
99012676SEiji.Ota@Sun.COM 			 * Note that Linux original code calls
99112676SEiji.Ota@Sun.COM 			 * rdma_destroy_qp() after rdsv3_ib_recv_clear_ring(ic).
99212676SEiji.Ota@Sun.COM 			 */
99312198SEiji.Ota@Sun.COM 			rdma_destroy_qp(ic->i_cm_id);
99412198SEiji.Ota@Sun.COM 		}
99512198SEiji.Ota@Sun.COM 
99612676SEiji.Ota@Sun.COM 		if (rdsv3_enable_snd_cq) {
99712676SEiji.Ota@Sun.COM 			if (ic->i_snd_soft_cq) {
99812676SEiji.Ota@Sun.COM 				rdsv3_af_thr_destroy(ic->i_snd_soft_cq);
99912676SEiji.Ota@Sun.COM 				ic->i_snd_soft_cq = NULL;
100012676SEiji.Ota@Sun.COM 			}
100112676SEiji.Ota@Sun.COM 			if (ic->i_snd_cq)
100212676SEiji.Ota@Sun.COM 				(void) ib_destroy_cq(ic->i_snd_cq);
100312676SEiji.Ota@Sun.COM 		}
100412676SEiji.Ota@Sun.COM 		if (ic->i_soft_cq) {
100512676SEiji.Ota@Sun.COM 			rdsv3_af_thr_destroy(ic->i_soft_cq);
100612676SEiji.Ota@Sun.COM 			ic->i_soft_cq = NULL;
100712676SEiji.Ota@Sun.COM 		}
100812676SEiji.Ota@Sun.COM 		if (ic->i_refill_rq) {
100912676SEiji.Ota@Sun.COM 			rdsv3_af_thr_destroy(ic->i_refill_rq);
101012676SEiji.Ota@Sun.COM 			ic->i_refill_rq = NULL;
101112676SEiji.Ota@Sun.COM 		}
101212676SEiji.Ota@Sun.COM 		if (ic->i_cq)
101312676SEiji.Ota@Sun.COM 			(void) ib_destroy_cq(ic->i_cq);
101412198SEiji.Ota@Sun.COM 
101512198SEiji.Ota@Sun.COM 		if (ic->i_mr)
101612198SEiji.Ota@Sun.COM 			rdsv3_ib_free_hdrs(dev, ic);
101712198SEiji.Ota@Sun.COM 
101812198SEiji.Ota@Sun.COM 		if (ic->i_sends)
101912198SEiji.Ota@Sun.COM 			rdsv3_ib_send_clear_ring(ic);
102012198SEiji.Ota@Sun.COM 		if (ic->i_recvs)
102112198SEiji.Ota@Sun.COM 			rdsv3_ib_recv_clear_ring(ic);
102212198SEiji.Ota@Sun.COM 
102312198SEiji.Ota@Sun.COM 		rdma_destroy_id(ic->i_cm_id);
102412198SEiji.Ota@Sun.COM 
102512198SEiji.Ota@Sun.COM 		/*
102612198SEiji.Ota@Sun.COM 		 * Move connection back to the nodev list.
102712198SEiji.Ota@Sun.COM 		 */
102812444SGiri.Adari@Sun.COM 		if (ic->i_on_dev_list)
102912198SEiji.Ota@Sun.COM 			rdsv3_ib_remove_conn(ic->rds_ibdev, conn);
103012198SEiji.Ota@Sun.COM 
103112198SEiji.Ota@Sun.COM 		ic->i_cm_id = NULL;
103212198SEiji.Ota@Sun.COM 		ic->i_pd = NULL;
103312198SEiji.Ota@Sun.COM 		ic->i_mr = NULL;
103412676SEiji.Ota@Sun.COM 		ic->i_cq = NULL;
103512676SEiji.Ota@Sun.COM 		ic->i_snd_cq = NULL;
103612198SEiji.Ota@Sun.COM 		ic->i_send_hdrs = NULL;
103712198SEiji.Ota@Sun.COM 		ic->i_recv_hdrs = NULL;
103812198SEiji.Ota@Sun.COM 		ic->i_ack = NULL;
103912198SEiji.Ota@Sun.COM 	}
104012444SGiri.Adari@Sun.COM 	ASSERT(!ic->i_on_dev_list);
104112198SEiji.Ota@Sun.COM 
104212198SEiji.Ota@Sun.COM 	/* Clear pending transmit */
104312198SEiji.Ota@Sun.COM 	if (ic->i_rm) {
104412198SEiji.Ota@Sun.COM 		rdsv3_message_put(ic->i_rm);
104512198SEiji.Ota@Sun.COM 		ic->i_rm = NULL;
104612198SEiji.Ota@Sun.COM 	}
104712198SEiji.Ota@Sun.COM 
104812198SEiji.Ota@Sun.COM 	/* Clear the ACK state */
104912198SEiji.Ota@Sun.COM 	clear_bit(IB_ACK_IN_FLIGHT, &ic->i_ack_flags);
105012198SEiji.Ota@Sun.COM 	ic->i_ack_next = 0;
105112198SEiji.Ota@Sun.COM 	ic->i_ack_recv = 0;
105212198SEiji.Ota@Sun.COM 
105312198SEiji.Ota@Sun.COM 	/* Clear flow control state */
105412198SEiji.Ota@Sun.COM 	ic->i_flowctl = 0;
105512198SEiji.Ota@Sun.COM 	ic->i_credits = 0;
105612198SEiji.Ota@Sun.COM 
105712198SEiji.Ota@Sun.COM 	rdsv3_ib_ring_init(&ic->i_send_ring, rdsv3_ib_sysctl_max_send_wr);
105812198SEiji.Ota@Sun.COM 	rdsv3_ib_ring_init(&ic->i_recv_ring, rdsv3_ib_sysctl_max_recv_wr);
105912198SEiji.Ota@Sun.COM 
106012198SEiji.Ota@Sun.COM 	if (ic->i_ibinc) {
106112198SEiji.Ota@Sun.COM 		rdsv3_inc_put(&ic->i_ibinc->ii_inc);
106212198SEiji.Ota@Sun.COM 		ic->i_ibinc = NULL;
106312198SEiji.Ota@Sun.COM 	}
106412198SEiji.Ota@Sun.COM 
106512198SEiji.Ota@Sun.COM 	if (ic->i_sends) {
106612198SEiji.Ota@Sun.COM 		kmem_free(ic->i_sends,
106712198SEiji.Ota@Sun.COM 		    ic->i_send_ring.w_nr * sizeof (struct rdsv3_ib_send_work));
106812198SEiji.Ota@Sun.COM 		ic->i_sends = NULL;
106912198SEiji.Ota@Sun.COM 	}
107012198SEiji.Ota@Sun.COM 	if (ic->i_send_wrs) {
107112794SGiri.Adari@Sun.COM 		kmem_free(ic->i_send_wrs, ic->i_send_ring.w_nr *
107212198SEiji.Ota@Sun.COM 		    (sizeof (ibt_send_wr_t) +
107312198SEiji.Ota@Sun.COM 		    RDSV3_IB_MAX_SGE * sizeof (ibt_wr_ds_t)));
107412198SEiji.Ota@Sun.COM 		ic->i_send_wrs = NULL;
107512198SEiji.Ota@Sun.COM 	}
107612198SEiji.Ota@Sun.COM 	if (ic->i_recvs) {
107712198SEiji.Ota@Sun.COM 		kmem_free(ic->i_recvs,
107812198SEiji.Ota@Sun.COM 		    ic->i_recv_ring.w_nr * sizeof (struct rdsv3_ib_recv_work));
107912198SEiji.Ota@Sun.COM 		ic->i_recvs = NULL;
108012198SEiji.Ota@Sun.COM 	}
108112676SEiji.Ota@Sun.COM 	if (ic->i_recv_wrs) {
108212676SEiji.Ota@Sun.COM 		kmem_free(ic->i_recv_wrs, ic->i_recv_ring.w_nr *
108312676SEiji.Ota@Sun.COM 		    (sizeof (ibt_recv_wr_t)));
108412676SEiji.Ota@Sun.COM 		ic->i_recv_wrs = NULL;
108512676SEiji.Ota@Sun.COM 	}
108612198SEiji.Ota@Sun.COM 
108712198SEiji.Ota@Sun.COM 	RDSV3_DPRINTF2("rdsv3_ib_conn_shutdown", "Return conn: %p", conn);
108812198SEiji.Ota@Sun.COM }
108912198SEiji.Ota@Sun.COM 
109012198SEiji.Ota@Sun.COM /* ARGSUSED */
109112198SEiji.Ota@Sun.COM int
rdsv3_ib_conn_alloc(struct rdsv3_connection * conn,int gfp)109212198SEiji.Ota@Sun.COM rdsv3_ib_conn_alloc(struct rdsv3_connection *conn, int gfp)
109312198SEiji.Ota@Sun.COM {
109412198SEiji.Ota@Sun.COM 	struct rdsv3_ib_connection *ic;
109512198SEiji.Ota@Sun.COM 
109612198SEiji.Ota@Sun.COM 	RDSV3_DPRINTF2("rdsv3_ib_conn_alloc", "conn: %p", conn);
109712198SEiji.Ota@Sun.COM 
109812198SEiji.Ota@Sun.COM 	/* XXX too lazy? */
109912198SEiji.Ota@Sun.COM 	ic = kmem_zalloc(sizeof (struct rdsv3_ib_connection), gfp);
110012676SEiji.Ota@Sun.COM 	if (!ic)
110112198SEiji.Ota@Sun.COM 		return (-ENOMEM);
110212198SEiji.Ota@Sun.COM 
110312198SEiji.Ota@Sun.COM 	list_link_init(&ic->ib_node);
110412198SEiji.Ota@Sun.COM 
110512198SEiji.Ota@Sun.COM 	mutex_init(&ic->i_recv_mutex, NULL, MUTEX_DRIVER, NULL);
110612198SEiji.Ota@Sun.COM 	mutex_init(&ic->i_ack_lock, NULL, MUTEX_DRIVER, NULL);
110712198SEiji.Ota@Sun.COM 
110812198SEiji.Ota@Sun.COM 	/*
110912198SEiji.Ota@Sun.COM 	 * rdsv3_ib_conn_shutdown() waits for these to be emptied so they
111012198SEiji.Ota@Sun.COM 	 * must be initialized before it can be called.
111112198SEiji.Ota@Sun.COM 	 */
111212198SEiji.Ota@Sun.COM 	rdsv3_ib_ring_init(&ic->i_send_ring, rdsv3_ib_sysctl_max_send_wr);
111312198SEiji.Ota@Sun.COM 	rdsv3_ib_ring_init(&ic->i_recv_ring, rdsv3_ib_sysctl_max_recv_wr);
111412198SEiji.Ota@Sun.COM 
111512198SEiji.Ota@Sun.COM 	ic->conn = conn;
111612198SEiji.Ota@Sun.COM 	conn->c_transport_data = ic;
111712198SEiji.Ota@Sun.COM 
111812198SEiji.Ota@Sun.COM 	mutex_enter(&ib_nodev_conns_lock);
111912198SEiji.Ota@Sun.COM 	list_insert_tail(&ib_nodev_conns, ic);
112012198SEiji.Ota@Sun.COM 	mutex_exit(&ib_nodev_conns_lock);
112112198SEiji.Ota@Sun.COM 
112212198SEiji.Ota@Sun.COM 	RDSV3_DPRINTF2("rdsv3_ib_conn_alloc", "conn %p conn ic %p",
112312198SEiji.Ota@Sun.COM 	    conn, conn->c_transport_data);
112412198SEiji.Ota@Sun.COM 	return (0);
112512198SEiji.Ota@Sun.COM }
112612198SEiji.Ota@Sun.COM 
112712198SEiji.Ota@Sun.COM /*
112812198SEiji.Ota@Sun.COM  * Free a connection. Connection must be shut down and not set for reconnect.
112912198SEiji.Ota@Sun.COM  */
113012198SEiji.Ota@Sun.COM void
rdsv3_ib_conn_free(void * arg)113112198SEiji.Ota@Sun.COM rdsv3_ib_conn_free(void *arg)
113212198SEiji.Ota@Sun.COM {
113312198SEiji.Ota@Sun.COM 	struct rdsv3_ib_connection *ic = arg;
113412198SEiji.Ota@Sun.COM 	kmutex_t	*lock_ptr;
113512198SEiji.Ota@Sun.COM 
113612198SEiji.Ota@Sun.COM 	RDSV3_DPRINTF2("rdsv3_ib_conn_free", "ic %p\n", ic);
113712198SEiji.Ota@Sun.COM 
113812198SEiji.Ota@Sun.COM #ifndef __lock_lint
113912198SEiji.Ota@Sun.COM 	/*
114012198SEiji.Ota@Sun.COM 	 * Conn is either on a dev's list or on the nodev list.
114112198SEiji.Ota@Sun.COM 	 * A race with shutdown() or connect() would cause problems
114212198SEiji.Ota@Sun.COM 	 * (since rds_ibdev would change) but that should never happen.
114312198SEiji.Ota@Sun.COM 	 */
114412444SGiri.Adari@Sun.COM 	lock_ptr = ic->i_on_dev_list ?
114512198SEiji.Ota@Sun.COM 	    &ic->rds_ibdev->spinlock : &ib_nodev_conns_lock;
114612198SEiji.Ota@Sun.COM 
114712198SEiji.Ota@Sun.COM 	mutex_enter(lock_ptr);
114812198SEiji.Ota@Sun.COM 	list_remove_node(&ic->ib_node);
114912198SEiji.Ota@Sun.COM 	mutex_exit(lock_ptr);
115012198SEiji.Ota@Sun.COM #endif
115112198SEiji.Ota@Sun.COM 	kmem_free(ic, sizeof (*ic));
115212198SEiji.Ota@Sun.COM }
115312198SEiji.Ota@Sun.COM 
115412198SEiji.Ota@Sun.COM /*
115512198SEiji.Ota@Sun.COM  * An error occurred on the connection
115612198SEiji.Ota@Sun.COM  */
115712198SEiji.Ota@Sun.COM void
__rdsv3_ib_conn_error(struct rdsv3_connection * conn)115812198SEiji.Ota@Sun.COM __rdsv3_ib_conn_error(struct rdsv3_connection *conn)
115912198SEiji.Ota@Sun.COM {
116012198SEiji.Ota@Sun.COM 	rdsv3_conn_drop(conn);
116112198SEiji.Ota@Sun.COM }
1162