xref: /onnv-gate/usr/src/uts/common/io/ib/clients/ibd/ibd_cm.c (revision 13060:9c2bff94c54d)
111534SKevin.Ge@Sun.COM /*
211534SKevin.Ge@Sun.COM  * CDDL HEADER START
311534SKevin.Ge@Sun.COM  *
411534SKevin.Ge@Sun.COM  * The contents of this file are subject to the terms of the
511534SKevin.Ge@Sun.COM  * Common Development and Distribution License (the "License").
611534SKevin.Ge@Sun.COM  * You may not use this file except in compliance with the License.
711534SKevin.Ge@Sun.COM  *
811534SKevin.Ge@Sun.COM  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
911534SKevin.Ge@Sun.COM  * or http://www.opensolaris.org/os/licensing.
1011534SKevin.Ge@Sun.COM  * See the License for the specific language governing permissions
1111534SKevin.Ge@Sun.COM  * and limitations under the License.
1211534SKevin.Ge@Sun.COM  *
1311534SKevin.Ge@Sun.COM  * When distributing Covered Code, include this CDDL HEADER in each
1411534SKevin.Ge@Sun.COM  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
1511534SKevin.Ge@Sun.COM  * If applicable, add the following below this CDDL HEADER, with the
1611534SKevin.Ge@Sun.COM  * fields enclosed by brackets "[]" replaced with your own identifying
1711534SKevin.Ge@Sun.COM  * information: Portions Copyright [yyyy] [name of copyright owner]
1811534SKevin.Ge@Sun.COM  *
1911534SKevin.Ge@Sun.COM  * CDDL HEADER END
2011534SKevin.Ge@Sun.COM  */
2111534SKevin.Ge@Sun.COM 
2211534SKevin.Ge@Sun.COM /*
2312163SRamaswamy.Tummala@Sun.COM  * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
2411534SKevin.Ge@Sun.COM  */
2511534SKevin.Ge@Sun.COM /* Copyright (c) 1990 Mentat Inc. */
2611534SKevin.Ge@Sun.COM 
2711534SKevin.Ge@Sun.COM /*
2811534SKevin.Ge@Sun.COM  * An implementation of the IPoIB-CM standard based on PSARC 2009/593.
2911534SKevin.Ge@Sun.COM  */
3011534SKevin.Ge@Sun.COM #include <sys/types.h>
3111534SKevin.Ge@Sun.COM #include <sys/conf.h>
3211534SKevin.Ge@Sun.COM #include <sys/ddi.h>
3311534SKevin.Ge@Sun.COM #include <sys/sunddi.h>
3411534SKevin.Ge@Sun.COM #include <sys/modctl.h>
3511534SKevin.Ge@Sun.COM #include <sys/stropts.h>
3611534SKevin.Ge@Sun.COM #include <sys/stream.h>
3711534SKevin.Ge@Sun.COM #include <sys/strsun.h>
3811534SKevin.Ge@Sun.COM #include <sys/strsubr.h>
3911534SKevin.Ge@Sun.COM #include <sys/dlpi.h>
4011534SKevin.Ge@Sun.COM #include <sys/mac_provider.h>
4111534SKevin.Ge@Sun.COM 
4211534SKevin.Ge@Sun.COM #include <sys/pattr.h>		/* for HCK_FULLCKSUM */
4311534SKevin.Ge@Sun.COM #include <sys/atomic.h>		/* for atomic_add*() */
4411534SKevin.Ge@Sun.COM #include <sys/ethernet.h>	/* for ETHERTYPE_IP */
4511534SKevin.Ge@Sun.COM #include <netinet/in.h>		/* for netinet/ip.h below */
4611534SKevin.Ge@Sun.COM #include <netinet/ip.h>		/* for struct ip */
4711534SKevin.Ge@Sun.COM #include <inet/common.h>	/* for inet/ip.h below */
4811534SKevin.Ge@Sun.COM #include <inet/ip.h>		/* for ipha_t */
4911534SKevin.Ge@Sun.COM #include <inet/ip_if.h>		/* for ETHERTYPE_IPV6 */
5011534SKevin.Ge@Sun.COM #include <inet/ip6.h>		/* for ip6_t */
5111534SKevin.Ge@Sun.COM #include <netinet/icmp6.h>	/* for icmp6_t */
5211534SKevin.Ge@Sun.COM 
5311534SKevin.Ge@Sun.COM #include <sys/ib/clients/ibd/ibd.h>
5411534SKevin.Ge@Sun.COM 
5511642SKevin.Ge@Sun.COM extern ibd_global_state_t ibd_gstate;
5613030SKevin.Ge@Sun.COM extern int ibd_rc_conn_timeout;
5711534SKevin.Ge@Sun.COM uint_t ibd_rc_tx_softintr = 1;
5811534SKevin.Ge@Sun.COM /*
5911534SKevin.Ge@Sun.COM  * If the number of WRs in receive queue of each RC connection less than
6011534SKevin.Ge@Sun.COM  * IBD_RC_RX_WR_THRESHOLD, we will post more receive WRs into it.
6111534SKevin.Ge@Sun.COM  */
6211534SKevin.Ge@Sun.COM #define	IBD_RC_RX_WR_THRESHOLD		0x20
6311534SKevin.Ge@Sun.COM 
6411534SKevin.Ge@Sun.COM /*
6511534SKevin.Ge@Sun.COM  * If the number of free SWQEs (or large Tx buf) is larger than or equal to
6611534SKevin.Ge@Sun.COM  * IBD_RC_TX_FREE_THRESH, we will call mac_tx_update to notify GLD to continue
6711534SKevin.Ge@Sun.COM  * transmitting packets.
6811534SKevin.Ge@Sun.COM  */
6911534SKevin.Ge@Sun.COM #define	IBD_RC_TX_FREE_THRESH		8
7011534SKevin.Ge@Sun.COM 
7111534SKevin.Ge@Sun.COM #define	IBD_RC_QPN_TO_SID(qpn) \
7211534SKevin.Ge@Sun.COM 	((uint64_t)(IBD_RC_SERVICE_ID | ((qpn) & 0xffffff)))
7311534SKevin.Ge@Sun.COM 
7411534SKevin.Ge@Sun.COM /* For interop with legacy OFED */
7511534SKevin.Ge@Sun.COM #define	IBD_RC_QPN_TO_SID_OFED_INTEROP(qpn) \
7611534SKevin.Ge@Sun.COM 	((uint64_t)(IBD_RC_SERVICE_ID_OFED_INTEROP | ((qpn) & 0xffffff)))
7711534SKevin.Ge@Sun.COM 
7811534SKevin.Ge@Sun.COM /* Internet Header + 64 bits of Data Datagram. Refer to RFC 792 */
7911534SKevin.Ge@Sun.COM #define	IBD_RC_IP_ICMP_RETURN_DATA_BYTES	64
8011534SKevin.Ge@Sun.COM 
8111534SKevin.Ge@Sun.COM 
8211534SKevin.Ge@Sun.COM /* Functions for Reliable Connected Mode */
8311534SKevin.Ge@Sun.COM /* Connection Setup/Close Functions */
8411534SKevin.Ge@Sun.COM static ibt_cm_status_t ibd_rc_dispatch_pass_mad(void *,
8511534SKevin.Ge@Sun.COM     ibt_cm_event_t *, ibt_cm_return_args_t *, void *, ibt_priv_data_len_t);
8611534SKevin.Ge@Sun.COM static ibt_cm_status_t ibd_rc_dispatch_actv_mad(void *,
8711534SKevin.Ge@Sun.COM     ibt_cm_event_t *, ibt_cm_return_args_t *, void *, ibt_priv_data_len_t);
8813030SKevin.Ge@Sun.COM static void ibd_rc_act_close(ibd_rc_chan_t *, boolean_t);
8911534SKevin.Ge@Sun.COM 
9011534SKevin.Ge@Sun.COM static inline void ibd_rc_add_to_chan_list(ibd_rc_chan_list_t *,
9111534SKevin.Ge@Sun.COM     ibd_rc_chan_t *);
9211534SKevin.Ge@Sun.COM static inline ibd_rc_chan_t *ibd_rc_rm_header_chan_list(
9311534SKevin.Ge@Sun.COM     ibd_rc_chan_list_t *);
9413030SKevin.Ge@Sun.COM static inline ibd_rc_chan_t *ibd_rc_rm_from_chan_list(ibd_rc_chan_list_t *,
9511534SKevin.Ge@Sun.COM     ibd_rc_chan_t *);
9611534SKevin.Ge@Sun.COM 
9711534SKevin.Ge@Sun.COM /* CQ handlers */
9811534SKevin.Ge@Sun.COM static void ibd_rc_rcq_handler(ibt_cq_hdl_t, void *);
9911534SKevin.Ge@Sun.COM static void ibd_rc_scq_handler(ibt_cq_hdl_t, void *);
10011534SKevin.Ge@Sun.COM static void ibd_rc_poll_rcq(ibd_rc_chan_t *, ibt_cq_hdl_t);
10111534SKevin.Ge@Sun.COM 
10211534SKevin.Ge@Sun.COM /* Receive Functions */
10311534SKevin.Ge@Sun.COM static int ibd_rc_post_srq(ibd_state_t *, ibd_rwqe_t *);
10411534SKevin.Ge@Sun.COM static void ibd_rc_srq_freemsg_cb(char *);
10511642SKevin.Ge@Sun.COM static void ibd_rc_srq_free_rwqe(ibd_state_t *, ibd_rwqe_t *);
10611534SKevin.Ge@Sun.COM 
10711534SKevin.Ge@Sun.COM static int ibd_rc_post_rwqe(ibd_rc_chan_t *, ibd_rwqe_t *);
10811534SKevin.Ge@Sun.COM static void ibd_rc_freemsg_cb(char *);
10911534SKevin.Ge@Sun.COM static void ibd_rc_process_rx(ibd_rc_chan_t *, ibd_rwqe_t *, ibt_wc_t *);
11011534SKevin.Ge@Sun.COM static void ibd_rc_free_rwqe(ibd_rc_chan_t *, ibd_rwqe_t *);
11111534SKevin.Ge@Sun.COM static void ibd_rc_fini_rxlist(ibd_rc_chan_t *);
11211534SKevin.Ge@Sun.COM 
11311534SKevin.Ge@Sun.COM 
11411534SKevin.Ge@Sun.COM /* Send Functions */
11511534SKevin.Ge@Sun.COM static void ibd_rc_release_swqe(ibd_rc_chan_t *, ibd_swqe_t *);
11611534SKevin.Ge@Sun.COM static int ibd_rc_init_txlist(ibd_rc_chan_t *);
11711534SKevin.Ge@Sun.COM static void ibd_rc_fini_txlist(ibd_rc_chan_t *);
11811534SKevin.Ge@Sun.COM static uint_t ibd_rc_tx_recycle(caddr_t);
11911534SKevin.Ge@Sun.COM 
12011534SKevin.Ge@Sun.COM 
12111534SKevin.Ge@Sun.COM void
ibd_async_rc_close_act_chan(ibd_state_t * state,ibd_req_t * req)12211534SKevin.Ge@Sun.COM ibd_async_rc_close_act_chan(ibd_state_t *state, ibd_req_t *req)
12311534SKevin.Ge@Sun.COM {
12411534SKevin.Ge@Sun.COM 	ibd_rc_chan_t *rc_chan = req->rq_ptr;
12511534SKevin.Ge@Sun.COM 	ibd_ace_t *ace;
12611534SKevin.Ge@Sun.COM 
12711534SKevin.Ge@Sun.COM 	while (rc_chan != NULL) {
12811534SKevin.Ge@Sun.COM 		ace = rc_chan->ace;
12911534SKevin.Ge@Sun.COM 		ASSERT(ace != NULL);
13011534SKevin.Ge@Sun.COM 		/* Close old RC channel */
13113030SKevin.Ge@Sun.COM 		ibd_rc_act_close(rc_chan, B_TRUE);
13211534SKevin.Ge@Sun.COM 		mutex_enter(&state->id_ac_mutex);
13311534SKevin.Ge@Sun.COM 		ASSERT(ace->ac_ref != 0);
13411534SKevin.Ge@Sun.COM 		atomic_dec_32(&ace->ac_ref);
13511534SKevin.Ge@Sun.COM 		ace->ac_chan = NULL;
13611534SKevin.Ge@Sun.COM 		if ((ace->ac_ref == 0) || (ace->ac_ref == CYCLEVAL)) {
13711534SKevin.Ge@Sun.COM 			IBD_ACACHE_INSERT_FREE(state, ace);
13811534SKevin.Ge@Sun.COM 			ace->ac_ref = 0;
13911534SKevin.Ge@Sun.COM 		} else {
14011534SKevin.Ge@Sun.COM 			ace->ac_ref |= CYCLEVAL;
14111534SKevin.Ge@Sun.COM 			state->rc_delay_ace_recycle++;
14211534SKevin.Ge@Sun.COM 		}
14311534SKevin.Ge@Sun.COM 		mutex_exit(&state->id_ac_mutex);
14411534SKevin.Ge@Sun.COM 		rc_chan = ibd_rc_rm_header_chan_list(
14511534SKevin.Ge@Sun.COM 		    &state->rc_obs_act_chan_list);
14611534SKevin.Ge@Sun.COM 	}
14711534SKevin.Ge@Sun.COM }
14811534SKevin.Ge@Sun.COM 
14911534SKevin.Ge@Sun.COM void
ibd_async_rc_recycle_ace(ibd_state_t * state,ibd_req_t * req)15011534SKevin.Ge@Sun.COM ibd_async_rc_recycle_ace(ibd_state_t *state, ibd_req_t *req)
15111534SKevin.Ge@Sun.COM {
15211534SKevin.Ge@Sun.COM 	ibd_ace_t *ace = req->rq_ptr;
15311534SKevin.Ge@Sun.COM 	ibd_rc_chan_t *rc_chan;
15411534SKevin.Ge@Sun.COM 
15511534SKevin.Ge@Sun.COM 	ASSERT(ace != NULL);
15611534SKevin.Ge@Sun.COM 	rc_chan = ace->ac_chan;
15711534SKevin.Ge@Sun.COM 	ASSERT(rc_chan != NULL);
15811534SKevin.Ge@Sun.COM 	/* Close old RC channel */
15913030SKevin.Ge@Sun.COM 	ibd_rc_act_close(rc_chan, B_TRUE);
16011534SKevin.Ge@Sun.COM 	mutex_enter(&state->id_ac_mutex);
16111534SKevin.Ge@Sun.COM 	ASSERT(ace->ac_ref != 0);
16211534SKevin.Ge@Sun.COM 	atomic_dec_32(&ace->ac_ref);
16311534SKevin.Ge@Sun.COM 	ace->ac_chan = NULL;
16411534SKevin.Ge@Sun.COM 	if ((ace->ac_ref == 0) || (ace->ac_ref == CYCLEVAL)) {
16511534SKevin.Ge@Sun.COM 		IBD_ACACHE_INSERT_FREE(state, ace);
16611534SKevin.Ge@Sun.COM 		ace->ac_ref = 0;
16711534SKevin.Ge@Sun.COM 	} else {
16811534SKevin.Ge@Sun.COM 		ace->ac_ref |= CYCLEVAL;
16911534SKevin.Ge@Sun.COM 		state->rc_delay_ace_recycle++;
17011534SKevin.Ge@Sun.COM 	}
17111534SKevin.Ge@Sun.COM 	mutex_exit(&state->id_ac_mutex);
17211534SKevin.Ge@Sun.COM 	mutex_enter(&state->rc_ace_recycle_lock);
17311534SKevin.Ge@Sun.COM 	state->rc_ace_recycle = NULL;
17411534SKevin.Ge@Sun.COM 	mutex_exit(&state->rc_ace_recycle_lock);
17511534SKevin.Ge@Sun.COM }
17611534SKevin.Ge@Sun.COM 
17711534SKevin.Ge@Sun.COM /* Simple ICMP IP Header Template */
17811534SKevin.Ge@Sun.COM static const ipha_t icmp_ipha = {
17911534SKevin.Ge@Sun.COM 	IP_SIMPLE_HDR_VERSION, 0, 0, 0, 0, 0, IPPROTO_ICMP
18011534SKevin.Ge@Sun.COM };
18111534SKevin.Ge@Sun.COM 
18211534SKevin.Ge@Sun.COM /* Packet is too big. Send ICMP packet to GLD to request a smaller MTU */
18311534SKevin.Ge@Sun.COM void
ibd_async_rc_process_too_big(ibd_state_t * state,ibd_req_t * req)18411534SKevin.Ge@Sun.COM ibd_async_rc_process_too_big(ibd_state_t *state, ibd_req_t *req)
18511534SKevin.Ge@Sun.COM {
18611534SKevin.Ge@Sun.COM 	mblk_t *mp = req->rq_ptr;
18711534SKevin.Ge@Sun.COM 	ibd_ace_t *ace = req->rq_ptr2;
18811534SKevin.Ge@Sun.COM 	uint16_t mtu = state->id_mtu - IPOIB_HDRSIZE;
18911534SKevin.Ge@Sun.COM 	uint_t	len_needed;
19011534SKevin.Ge@Sun.COM 	size_t	msg_len;
19111534SKevin.Ge@Sun.COM 	mblk_t	*pmtu_mp;
19211534SKevin.Ge@Sun.COM 	ushort_t	sap;
19311534SKevin.Ge@Sun.COM 	ib_header_info_t *ibha;	/* ib header for pmtu_pkt */
19411534SKevin.Ge@Sun.COM 	/*
19511534SKevin.Ge@Sun.COM 	 * ipha: IP header for pmtu_pkt
19611534SKevin.Ge@Sun.COM 	 * old_ipha: IP header for old packet
19711534SKevin.Ge@Sun.COM 	 */
19811534SKevin.Ge@Sun.COM 	ipha_t *ipha, *old_ipha;
19911534SKevin.Ge@Sun.COM 	icmph_t	*icmph;
20011534SKevin.Ge@Sun.COM 
20111534SKevin.Ge@Sun.COM 	sap = ntohs(((ipoib_hdr_t *)mp->b_rptr)->ipoib_type);
20211534SKevin.Ge@Sun.COM 
20311534SKevin.Ge@Sun.COM 	if (!pullupmsg(mp, -1)) {
20411534SKevin.Ge@Sun.COM 		DPRINT(40, "ibd_async_rc_process_too_big: pullupmsg fail");
20511534SKevin.Ge@Sun.COM 		goto too_big_fail;
20611534SKevin.Ge@Sun.COM 	}
20711534SKevin.Ge@Sun.COM 	/* move to IP header. */
20811534SKevin.Ge@Sun.COM 	mp->b_rptr += IPOIB_HDRSIZE;
20911534SKevin.Ge@Sun.COM 	old_ipha = (ipha_t *)mp->b_rptr;
21011534SKevin.Ge@Sun.COM 
21111534SKevin.Ge@Sun.COM 	len_needed = IPH_HDR_LENGTH(old_ipha);
21211534SKevin.Ge@Sun.COM 	if (old_ipha->ipha_protocol == IPPROTO_ENCAP) {
21311534SKevin.Ge@Sun.COM 		len_needed += IPH_HDR_LENGTH(((uchar_t *)old_ipha +
21411534SKevin.Ge@Sun.COM 		    len_needed));
21511534SKevin.Ge@Sun.COM 	} else if (old_ipha->ipha_protocol == IPPROTO_IPV6) {
21611534SKevin.Ge@Sun.COM 		ip6_t *ip6h = (ip6_t *)((uchar_t *)old_ipha
21711534SKevin.Ge@Sun.COM 		    + len_needed);
21811534SKevin.Ge@Sun.COM 		len_needed += ip_hdr_length_v6(mp, ip6h);
21911534SKevin.Ge@Sun.COM 	}
22011534SKevin.Ge@Sun.COM 	len_needed += IBD_RC_IP_ICMP_RETURN_DATA_BYTES;
22111534SKevin.Ge@Sun.COM 	msg_len = msgdsize(mp);
22211534SKevin.Ge@Sun.COM 	if (msg_len > len_needed) {
22311534SKevin.Ge@Sun.COM 		(void) adjmsg(mp, len_needed - msg_len);
22411534SKevin.Ge@Sun.COM 		msg_len = len_needed;
22511534SKevin.Ge@Sun.COM 	}
22611534SKevin.Ge@Sun.COM 
22711534SKevin.Ge@Sun.COM 	if ((pmtu_mp = allocb(sizeof (ib_header_info_t) + sizeof (ipha_t)
22811534SKevin.Ge@Sun.COM 	    + sizeof (icmph_t), BPRI_MED)) == NULL) {
22911534SKevin.Ge@Sun.COM 		DPRINT(40, "ibd_async_rc_process_too_big: allocb fail");
23011534SKevin.Ge@Sun.COM 		goto too_big_fail;
23111534SKevin.Ge@Sun.COM 	}
23211534SKevin.Ge@Sun.COM 	pmtu_mp->b_cont = mp;
23311534SKevin.Ge@Sun.COM 	pmtu_mp->b_wptr = pmtu_mp->b_rptr + sizeof (ib_header_info_t)
23411534SKevin.Ge@Sun.COM 	    + sizeof (ipha_t) + sizeof (icmph_t);
23511534SKevin.Ge@Sun.COM 
23611534SKevin.Ge@Sun.COM 	ibha = (ib_header_info_t *)pmtu_mp->b_rptr;
23711534SKevin.Ge@Sun.COM 
23811534SKevin.Ge@Sun.COM 	/* Fill IB header */
23911534SKevin.Ge@Sun.COM 	bcopy(&state->id_macaddr, &ibha->ib_dst, IPOIB_ADDRL);
24011534SKevin.Ge@Sun.COM 	/*
24111534SKevin.Ge@Sun.COM 	 * If the GRH is not valid, indicate to GLDv3 by setting
24211534SKevin.Ge@Sun.COM 	 * the VerTcFlow field to 0.
24311534SKevin.Ge@Sun.COM 	 */
24411534SKevin.Ge@Sun.COM 	ibha->ib_grh.ipoib_vertcflow = 0;
24511534SKevin.Ge@Sun.COM 	ibha->ipib_rhdr.ipoib_type = htons(sap);
24611534SKevin.Ge@Sun.COM 	ibha->ipib_rhdr.ipoib_mbz = 0;
24711534SKevin.Ge@Sun.COM 
24811534SKevin.Ge@Sun.COM 	/* Fill IP header */
24911534SKevin.Ge@Sun.COM 	ipha = (ipha_t *)&ibha[1];
25011534SKevin.Ge@Sun.COM 	*ipha = icmp_ipha;
25111534SKevin.Ge@Sun.COM 	ipha->ipha_src = old_ipha->ipha_dst;
25211534SKevin.Ge@Sun.COM 	ipha->ipha_dst = old_ipha->ipha_src;
25311534SKevin.Ge@Sun.COM 	ipha->ipha_ttl = old_ipha->ipha_ttl;
25411534SKevin.Ge@Sun.COM 	msg_len += sizeof (icmp_ipha) + sizeof (icmph_t);
25511534SKevin.Ge@Sun.COM 	if (msg_len > IP_MAXPACKET) {
25611534SKevin.Ge@Sun.COM 		ibd_print_warn(state, "ibd_rc_process_too_big_pkt: msg_len(%d) "
25711534SKevin.Ge@Sun.COM 		    "> IP_MAXPACKET", (uint32_t)msg_len);
25811534SKevin.Ge@Sun.COM 		(void) adjmsg(mp, IP_MAXPACKET - msg_len);
25911534SKevin.Ge@Sun.COM 		msg_len = IP_MAXPACKET;
26011534SKevin.Ge@Sun.COM 	}
26111534SKevin.Ge@Sun.COM 	ipha->ipha_length = htons((uint16_t)msg_len);
26211534SKevin.Ge@Sun.COM 	ipha->ipha_hdr_checksum = 0;
26311534SKevin.Ge@Sun.COM 	ipha->ipha_hdr_checksum = (uint16_t)ip_csum_hdr(ipha);
26411534SKevin.Ge@Sun.COM 
26511534SKevin.Ge@Sun.COM 	/* Fill ICMP body */
26611534SKevin.Ge@Sun.COM 	icmph = (icmph_t *)&ipha[1];
26711534SKevin.Ge@Sun.COM 	bzero(icmph, sizeof (icmph_t));
26811534SKevin.Ge@Sun.COM 	icmph->icmph_type = ICMP_DEST_UNREACHABLE;
26911534SKevin.Ge@Sun.COM 	icmph->icmph_code = ICMP_FRAGMENTATION_NEEDED;
27011534SKevin.Ge@Sun.COM 	icmph->icmph_du_mtu = htons(mtu);
27111534SKevin.Ge@Sun.COM 	icmph->icmph_checksum = 0;
27211534SKevin.Ge@Sun.COM 	icmph->icmph_checksum = IP_CSUM(pmtu_mp,
27311534SKevin.Ge@Sun.COM 	    (int32_t)sizeof (ib_header_info_t) + (int32_t)sizeof (ipha_t), 0);
27411534SKevin.Ge@Sun.COM 
27511534SKevin.Ge@Sun.COM 	(void) hcksum_assoc(pmtu_mp, NULL, NULL, 0, 0, 0, 0,
27611534SKevin.Ge@Sun.COM 	    HCK_FULLCKSUM | HCK_FULLCKSUM_OK, 0);
27711534SKevin.Ge@Sun.COM 
27811534SKevin.Ge@Sun.COM 	DPRINT(30, "ibd_async_rc_process_too_big: sap=0x%x, ip_src=0x%x, "
27911534SKevin.Ge@Sun.COM 	    "ip_dst=0x%x, ttl=%d, len_needed=%d, msg_len=%d",
28011534SKevin.Ge@Sun.COM 	    sap, ipha->ipha_src, ipha->ipha_dst, ipha->ipha_ttl,
28111534SKevin.Ge@Sun.COM 	    len_needed, (uint32_t)msg_len);
28211534SKevin.Ge@Sun.COM 
28311534SKevin.Ge@Sun.COM 	mac_rx(state->id_mh, state->id_rh, pmtu_mp);
28411534SKevin.Ge@Sun.COM 
28511534SKevin.Ge@Sun.COM 	mutex_enter(&ace->tx_too_big_mutex);
28611534SKevin.Ge@Sun.COM 	ace->tx_too_big_ongoing = B_FALSE;
28711534SKevin.Ge@Sun.COM 	mutex_exit(&ace->tx_too_big_mutex);
28811534SKevin.Ge@Sun.COM 	return;
28911534SKevin.Ge@Sun.COM 
29011534SKevin.Ge@Sun.COM too_big_fail:
29111534SKevin.Ge@Sun.COM 	/* Drop packet */
29211534SKevin.Ge@Sun.COM 	freemsg(mp);
29311534SKevin.Ge@Sun.COM 	mutex_enter(&ace->tx_too_big_mutex);
29411534SKevin.Ge@Sun.COM 	ace->tx_too_big_ongoing = B_FALSE;
29511534SKevin.Ge@Sun.COM 	mutex_exit(&ace->tx_too_big_mutex);
29611534SKevin.Ge@Sun.COM }
29711534SKevin.Ge@Sun.COM 
29813030SKevin.Ge@Sun.COM /*
29913030SKevin.Ge@Sun.COM  * Check all active/passive channels. If any ative/passive
30013030SKevin.Ge@Sun.COM  * channel has not been used for a long time, close it.
30113030SKevin.Ge@Sun.COM  */
30213030SKevin.Ge@Sun.COM void
ibd_rc_conn_timeout_call(void * carg)30313030SKevin.Ge@Sun.COM ibd_rc_conn_timeout_call(void *carg)
30413030SKevin.Ge@Sun.COM {
30513030SKevin.Ge@Sun.COM 	ibd_state_t *state = carg;
30613030SKevin.Ge@Sun.COM 	ibd_ace_t *ace, *pre_ace;
30713030SKevin.Ge@Sun.COM 	ibd_rc_chan_t *chan, *pre_chan, *next_chan;
30813030SKevin.Ge@Sun.COM 	ibd_req_t *req;
30913030SKevin.Ge@Sun.COM 
31013030SKevin.Ge@Sun.COM 	/* Check all active channels. If chan->is_used == B_FALSE, close it */
31113030SKevin.Ge@Sun.COM 	mutex_enter(&state->id_ac_mutex);
31213030SKevin.Ge@Sun.COM 	ace = list_head(&state->id_ah_active);
31313030SKevin.Ge@Sun.COM 	while ((pre_ace = ace) != NULL) {
31413030SKevin.Ge@Sun.COM 		ace = list_next(&state->id_ah_active, ace);
31513030SKevin.Ge@Sun.COM 		if (pre_ace->ac_chan != NULL) {
31613030SKevin.Ge@Sun.COM 			chan = pre_ace->ac_chan;
31713030SKevin.Ge@Sun.COM 			ASSERT(state->id_enable_rc == B_TRUE);
31813030SKevin.Ge@Sun.COM 			if (chan->chan_state == IBD_RC_STATE_ACT_ESTAB) {
31913030SKevin.Ge@Sun.COM 				if (chan->is_used == B_FALSE) {
32013030SKevin.Ge@Sun.COM 					state->rc_timeout_act++;
32113030SKevin.Ge@Sun.COM 					INC_REF(pre_ace, 1);
32213030SKevin.Ge@Sun.COM 					IBD_ACACHE_PULLOUT_ACTIVE(state,
32313030SKevin.Ge@Sun.COM 					    pre_ace);
32413030SKevin.Ge@Sun.COM 					chan->chan_state =
32513030SKevin.Ge@Sun.COM 					    IBD_RC_STATE_ACT_CLOSING;
32613030SKevin.Ge@Sun.COM 					ibd_rc_signal_act_close(state, pre_ace);
32713030SKevin.Ge@Sun.COM 				} else {
32813030SKevin.Ge@Sun.COM 					chan->is_used = B_FALSE;
32913030SKevin.Ge@Sun.COM 				}
33013030SKevin.Ge@Sun.COM 			}
33113030SKevin.Ge@Sun.COM 		}
33213030SKevin.Ge@Sun.COM 	}
33313030SKevin.Ge@Sun.COM 	mutex_exit(&state->id_ac_mutex);
33413030SKevin.Ge@Sun.COM 
33513030SKevin.Ge@Sun.COM 	/* Check all passive channels. If chan->is_used == B_FALSE, close it */
33613030SKevin.Ge@Sun.COM 	mutex_enter(&state->rc_pass_chan_list.chan_list_mutex);
33713030SKevin.Ge@Sun.COM 	next_chan = state->rc_pass_chan_list.chan_list;
33813030SKevin.Ge@Sun.COM 	pre_chan = NULL;
33913030SKevin.Ge@Sun.COM 	while ((chan = next_chan) != NULL) {
34013030SKevin.Ge@Sun.COM 		next_chan = chan->next;
34113030SKevin.Ge@Sun.COM 		if (chan->is_used == B_FALSE) {
34213030SKevin.Ge@Sun.COM 			req = kmem_cache_alloc(state->id_req_kmc, KM_NOSLEEP);
34313030SKevin.Ge@Sun.COM 			if (req != NULL) {
34413030SKevin.Ge@Sun.COM 				/* remove it */
34513030SKevin.Ge@Sun.COM 				state->rc_timeout_pas++;
34613030SKevin.Ge@Sun.COM 				req->rq_ptr = chan;
34713030SKevin.Ge@Sun.COM 				ibd_queue_work_slot(state, req,
34813030SKevin.Ge@Sun.COM 				    IBD_ASYNC_RC_CLOSE_PAS_CHAN);
34913030SKevin.Ge@Sun.COM 			} else {
35013030SKevin.Ge@Sun.COM 				ibd_print_warn(state, "ibd_rc_conn_timeout: "
35113030SKevin.Ge@Sun.COM 				    "alloc ibd_req_t fail");
35213030SKevin.Ge@Sun.COM 				if (pre_chan == NULL) {
35313030SKevin.Ge@Sun.COM 					state->rc_pass_chan_list.chan_list =
35413030SKevin.Ge@Sun.COM 					    chan;
35513030SKevin.Ge@Sun.COM 				} else {
35613030SKevin.Ge@Sun.COM 					pre_chan->next = chan;
35713030SKevin.Ge@Sun.COM 				}
35813030SKevin.Ge@Sun.COM 				pre_chan = chan;
35913030SKevin.Ge@Sun.COM 			}
36013030SKevin.Ge@Sun.COM 		} else {
36113030SKevin.Ge@Sun.COM 			if (pre_chan == NULL) {
36213030SKevin.Ge@Sun.COM 				state->rc_pass_chan_list.chan_list = chan;
36313030SKevin.Ge@Sun.COM 			} else {
36413030SKevin.Ge@Sun.COM 				pre_chan->next = chan;
36513030SKevin.Ge@Sun.COM 			}
36613030SKevin.Ge@Sun.COM 			pre_chan = chan;
36713030SKevin.Ge@Sun.COM 			chan->is_used = B_FALSE;
36813030SKevin.Ge@Sun.COM 		}
36913030SKevin.Ge@Sun.COM 	}
37013030SKevin.Ge@Sun.COM 	if (pre_chan != NULL) {
37113030SKevin.Ge@Sun.COM 		pre_chan->next = NULL;
37213030SKevin.Ge@Sun.COM 	} else {
37313030SKevin.Ge@Sun.COM 		state->rc_pass_chan_list.chan_list = NULL;
37413030SKevin.Ge@Sun.COM 	}
37513030SKevin.Ge@Sun.COM 	mutex_exit(&state->rc_pass_chan_list.chan_list_mutex);
37613030SKevin.Ge@Sun.COM 
37713030SKevin.Ge@Sun.COM 	mutex_enter(&state->rc_timeout_lock);
37813030SKevin.Ge@Sun.COM 	if (state->rc_timeout_start == B_TRUE) {
37913030SKevin.Ge@Sun.COM 		state->rc_timeout = timeout(ibd_rc_conn_timeout_call, state,
38013030SKevin.Ge@Sun.COM 		    SEC_TO_TICK(ibd_rc_conn_timeout));
38113030SKevin.Ge@Sun.COM 	}
38213030SKevin.Ge@Sun.COM 	mutex_exit(&state->rc_timeout_lock);
38313030SKevin.Ge@Sun.COM }
38413030SKevin.Ge@Sun.COM 
38511534SKevin.Ge@Sun.COM #ifdef DEBUG
38611534SKevin.Ge@Sun.COM /*
38711534SKevin.Ge@Sun.COM  * ibd_rc_update_stats - update driver private kstat counters
38811534SKevin.Ge@Sun.COM  *
38911534SKevin.Ge@Sun.COM  * This routine will dump the internal statistics counters for ibd's
39011534SKevin.Ge@Sun.COM  * Reliable Connected Mode. The current stats dump values will
39111534SKevin.Ge@Sun.COM  * be sent to the kernel status area.
39211534SKevin.Ge@Sun.COM  */
39311534SKevin.Ge@Sun.COM static int
ibd_rc_update_stats(kstat_t * ksp,int rw)39411534SKevin.Ge@Sun.COM ibd_rc_update_stats(kstat_t *ksp, int rw)
39511534SKevin.Ge@Sun.COM {
39611534SKevin.Ge@Sun.COM 	ibd_state_t *state;
39711534SKevin.Ge@Sun.COM 	ibd_rc_stat_t *ibd_rc_ksp;
39811534SKevin.Ge@Sun.COM 
39911534SKevin.Ge@Sun.COM 	if (rw == KSTAT_WRITE)
40011534SKevin.Ge@Sun.COM 		return (EACCES);
40111534SKevin.Ge@Sun.COM 
40211534SKevin.Ge@Sun.COM 	state = (ibd_state_t *)ksp->ks_private;
40311534SKevin.Ge@Sun.COM 	ASSERT(state != NULL);
40411534SKevin.Ge@Sun.COM 	ibd_rc_ksp = (ibd_rc_stat_t *)ksp->ks_data;
40511534SKevin.Ge@Sun.COM 
40611534SKevin.Ge@Sun.COM 	ibd_rc_ksp->rc_rcv_trans_byte.value.ul = state->rc_rcv_trans_byte;
40711534SKevin.Ge@Sun.COM 	ibd_rc_ksp->rc_rcv_trans_pkt.value.ul = state->rc_rcv_trans_pkt;
40811534SKevin.Ge@Sun.COM 	ibd_rc_ksp->rc_rcv_copy_byte.value.ul = state->rc_rcv_copy_byte;
40911534SKevin.Ge@Sun.COM 	ibd_rc_ksp->rc_rcv_copy_pkt.value.ul = state->rc_rcv_copy_pkt;
41011534SKevin.Ge@Sun.COM 	ibd_rc_ksp->rc_rcv_alloc_fail.value.ul = state->rc_rcv_alloc_fail;
41111534SKevin.Ge@Sun.COM 
41211534SKevin.Ge@Sun.COM 	ibd_rc_ksp->rc_rcq_err.value.ul = state->rc_rcq_err;
41311534SKevin.Ge@Sun.COM 
41411534SKevin.Ge@Sun.COM 	ibd_rc_ksp->rc_rwqe_short.value.ul = state->rc_rwqe_short;
41511534SKevin.Ge@Sun.COM 
41611534SKevin.Ge@Sun.COM 	ibd_rc_ksp->rc_xmt_bytes.value.ul = state->rc_xmt_bytes;
41711534SKevin.Ge@Sun.COM 	ibd_rc_ksp->rc_xmt_small_pkt.value.ul = state->rc_xmt_small_pkt;
41811534SKevin.Ge@Sun.COM 	ibd_rc_ksp->rc_xmt_fragmented_pkt.value.ul =
41911534SKevin.Ge@Sun.COM 	    state->rc_xmt_fragmented_pkt;
42011534SKevin.Ge@Sun.COM 	ibd_rc_ksp->rc_xmt_map_fail_pkt.value.ul = state->rc_xmt_map_fail_pkt;
42111534SKevin.Ge@Sun.COM 	ibd_rc_ksp->rc_xmt_map_succ_pkt.value.ul = state->rc_xmt_map_succ_pkt;
42211534SKevin.Ge@Sun.COM 	ibd_rc_ksp->rc_ace_not_found.value.ul = state->rc_ace_not_found;
42311534SKevin.Ge@Sun.COM 
42411534SKevin.Ge@Sun.COM 	ibd_rc_ksp->rc_scq_no_swqe.value.ul = state->rc_scq_no_swqe;
42511534SKevin.Ge@Sun.COM 	ibd_rc_ksp->rc_scq_no_largebuf.value.ul = state->rc_scq_no_largebuf;
42611534SKevin.Ge@Sun.COM 	ibd_rc_ksp->rc_swqe_short.value.ul = state->rc_swqe_short;
42711534SKevin.Ge@Sun.COM 	ibd_rc_ksp->rc_swqe_mac_update.value.ul = state->rc_swqe_mac_update;
42811534SKevin.Ge@Sun.COM 	ibd_rc_ksp->rc_xmt_buf_short.value.ul = state->rc_xmt_buf_short;
42911534SKevin.Ge@Sun.COM 	ibd_rc_ksp->rc_xmt_buf_mac_update.value.ul =
43011534SKevin.Ge@Sun.COM 	    state->rc_xmt_buf_mac_update;
43111534SKevin.Ge@Sun.COM 
43211534SKevin.Ge@Sun.COM 	ibd_rc_ksp->rc_conn_succ.value.ul = state->rc_conn_succ;
43311534SKevin.Ge@Sun.COM 	ibd_rc_ksp->rc_conn_fail.value.ul = state->rc_conn_fail;
43411534SKevin.Ge@Sun.COM 	ibd_rc_ksp->rc_null_conn.value.ul = state->rc_null_conn;
43511534SKevin.Ge@Sun.COM 	ibd_rc_ksp->rc_no_estab_conn.value.ul = state->rc_no_estab_conn;
43611534SKevin.Ge@Sun.COM 
43711534SKevin.Ge@Sun.COM 	ibd_rc_ksp->rc_act_close.value.ul = state->rc_act_close;
43811534SKevin.Ge@Sun.COM 	ibd_rc_ksp->rc_pas_close.value.ul = state->rc_pas_close;
43911534SKevin.Ge@Sun.COM 	ibd_rc_ksp->rc_delay_ace_recycle.value.ul = state->rc_delay_ace_recycle;
44011534SKevin.Ge@Sun.COM 	ibd_rc_ksp->rc_act_close_simultaneous.value.ul =
44111534SKevin.Ge@Sun.COM 	    state->rc_act_close_simultaneous;
44211534SKevin.Ge@Sun.COM 	ibd_rc_ksp->rc_reset_cnt.value.ul = state->rc_reset_cnt;
44313030SKevin.Ge@Sun.COM 	ibd_rc_ksp->rc_timeout_act.value.ul = state->rc_timeout_act;
44413030SKevin.Ge@Sun.COM 	ibd_rc_ksp->rc_timeout_pas.value.ul = state->rc_timeout_pas;
44511534SKevin.Ge@Sun.COM 
44611534SKevin.Ge@Sun.COM 	return (0);
44711534SKevin.Ge@Sun.COM }
44811534SKevin.Ge@Sun.COM 
44911534SKevin.Ge@Sun.COM 
45011534SKevin.Ge@Sun.COM /*
45111534SKevin.Ge@Sun.COM  * ibd_rc_init_stats - initialize kstat data structures
45211534SKevin.Ge@Sun.COM  *
45311534SKevin.Ge@Sun.COM  * This routine will create and initialize the driver private
45411534SKevin.Ge@Sun.COM  * statistics counters.
45511534SKevin.Ge@Sun.COM  */
45611534SKevin.Ge@Sun.COM int
ibd_rc_init_stats(ibd_state_t * state)45711534SKevin.Ge@Sun.COM ibd_rc_init_stats(ibd_state_t *state)
45811534SKevin.Ge@Sun.COM {
45911534SKevin.Ge@Sun.COM 	kstat_t *ksp;
46011534SKevin.Ge@Sun.COM 	ibd_rc_stat_t *ibd_rc_ksp;
46113030SKevin.Ge@Sun.COM 	char stat_name[KSTAT_STRLEN];
46212163SRamaswamy.Tummala@Sun.COM 	int inst;
46311534SKevin.Ge@Sun.COM 
46411534SKevin.Ge@Sun.COM 	/*
46511534SKevin.Ge@Sun.COM 	 * Create and init kstat
46611534SKevin.Ge@Sun.COM 	 */
46712163SRamaswamy.Tummala@Sun.COM 	inst = ddi_get_instance(state->id_dip);
468*13060SPavan.Chandrashekar@Sun.COM 	(void) snprintf(stat_name, KSTAT_STRLEN, "statistics%d_%x_%u", inst,
469*13060SPavan.Chandrashekar@Sun.COM 	    state->id_pkey, state->id_plinkid);
47012163SRamaswamy.Tummala@Sun.COM 	ksp = kstat_create("ibd", 0, stat_name, "net", KSTAT_TYPE_NAMED,
47111534SKevin.Ge@Sun.COM 	    sizeof (ibd_rc_stat_t) / sizeof (kstat_named_t), 0);
47211534SKevin.Ge@Sun.COM 
47311534SKevin.Ge@Sun.COM 	if (ksp == NULL) {
47411534SKevin.Ge@Sun.COM 		ibd_print_warn(state, "ibd_rc_init_stats: Could not create "
47511534SKevin.Ge@Sun.COM 		    "kernel statistics");
47611534SKevin.Ge@Sun.COM 		return (DDI_FAILURE);
47711534SKevin.Ge@Sun.COM 	}
47811534SKevin.Ge@Sun.COM 
47911534SKevin.Ge@Sun.COM 	state->rc_ksp = ksp;	/* Fill in the ksp of ibd over RC mode */
48011534SKevin.Ge@Sun.COM 
48111534SKevin.Ge@Sun.COM 	ibd_rc_ksp = (ibd_rc_stat_t *)ksp->ks_data;
48211534SKevin.Ge@Sun.COM 
48311534SKevin.Ge@Sun.COM 	/*
48411534SKevin.Ge@Sun.COM 	 * Initialize all the statistics
48511534SKevin.Ge@Sun.COM 	 */
48611534SKevin.Ge@Sun.COM 	kstat_named_init(&ibd_rc_ksp->rc_rcv_trans_byte, "RC: Rx Bytes, "
48711534SKevin.Ge@Sun.COM 	    "transfer mode", KSTAT_DATA_ULONG);
48811534SKevin.Ge@Sun.COM 	kstat_named_init(&ibd_rc_ksp->rc_rcv_trans_pkt, "RC: Rx Pkts, "
48911534SKevin.Ge@Sun.COM 	    "transfer mode", KSTAT_DATA_ULONG);
49011534SKevin.Ge@Sun.COM 	kstat_named_init(&ibd_rc_ksp->rc_rcv_copy_byte, "RC: Rx Bytes, "
49111534SKevin.Ge@Sun.COM 	    "copy mode", KSTAT_DATA_ULONG);
49211534SKevin.Ge@Sun.COM 	kstat_named_init(&ibd_rc_ksp->rc_rcv_copy_pkt, "RC: Rx Pkts, "
49311534SKevin.Ge@Sun.COM 	    "copy mode", KSTAT_DATA_ULONG);
49411534SKevin.Ge@Sun.COM 	kstat_named_init(&ibd_rc_ksp->rc_rcv_alloc_fail, "RC: Rx alloc fail",
49511534SKevin.Ge@Sun.COM 	    KSTAT_DATA_ULONG);
49611534SKevin.Ge@Sun.COM 
49711534SKevin.Ge@Sun.COM 	kstat_named_init(&ibd_rc_ksp->rc_rcq_err, "RC: fail in Recv CQ handler",
49811534SKevin.Ge@Sun.COM 	    KSTAT_DATA_ULONG);
49911534SKevin.Ge@Sun.COM 
50011534SKevin.Ge@Sun.COM 	kstat_named_init(&ibd_rc_ksp->rc_rwqe_short, "RC: Short rwqe",
50111534SKevin.Ge@Sun.COM 	    KSTAT_DATA_ULONG);
50211534SKevin.Ge@Sun.COM 
50311534SKevin.Ge@Sun.COM 	kstat_named_init(&ibd_rc_ksp->rc_xmt_bytes, "RC: Sent Bytes",
50411534SKevin.Ge@Sun.COM 	    KSTAT_DATA_ULONG);
50511534SKevin.Ge@Sun.COM 	kstat_named_init(&ibd_rc_ksp->rc_xmt_small_pkt,
50611534SKevin.Ge@Sun.COM 	    "RC: Tx pkt small size", KSTAT_DATA_ULONG);
50711534SKevin.Ge@Sun.COM 	kstat_named_init(&ibd_rc_ksp->rc_xmt_fragmented_pkt,
50811534SKevin.Ge@Sun.COM 	    "RC: Tx pkt fragmentary", KSTAT_DATA_ULONG);
50911534SKevin.Ge@Sun.COM 	kstat_named_init(&ibd_rc_ksp->rc_xmt_map_fail_pkt,
51011534SKevin.Ge@Sun.COM 	    "RC: Tx pkt fail ibt_map_mem_iov()", KSTAT_DATA_ULONG);
51111534SKevin.Ge@Sun.COM 	kstat_named_init(&ibd_rc_ksp->rc_xmt_map_succ_pkt,
51211534SKevin.Ge@Sun.COM 	    "RC: Tx pkt succ ibt_map_mem_iov()", KSTAT_DATA_ULONG);
51311534SKevin.Ge@Sun.COM 	kstat_named_init(&ibd_rc_ksp->rc_ace_not_found, "RC: ace not found",
51411534SKevin.Ge@Sun.COM 	    KSTAT_DATA_ULONG);
51511534SKevin.Ge@Sun.COM 
51611534SKevin.Ge@Sun.COM 	kstat_named_init(&ibd_rc_ksp->rc_scq_no_swqe, "RC: No swqe after "
51711534SKevin.Ge@Sun.COM 	    "recycle", KSTAT_DATA_ULONG);
51811534SKevin.Ge@Sun.COM 	kstat_named_init(&ibd_rc_ksp->rc_scq_no_largebuf, "RC: No large tx buf "
51911534SKevin.Ge@Sun.COM 	    "after recycle", KSTAT_DATA_ULONG);
52011534SKevin.Ge@Sun.COM 	kstat_named_init(&ibd_rc_ksp->rc_swqe_short, "RC: No swqe in ibd_send",
52111534SKevin.Ge@Sun.COM 	    KSTAT_DATA_ULONG);
52211534SKevin.Ge@Sun.COM 	kstat_named_init(&ibd_rc_ksp->rc_swqe_mac_update, "RC: mac_tx_update "
52311534SKevin.Ge@Sun.COM 	    "#, swqe available", KSTAT_DATA_ULONG);
52411534SKevin.Ge@Sun.COM 	kstat_named_init(&ibd_rc_ksp->rc_xmt_buf_short, "RC: No buf in "
52511534SKevin.Ge@Sun.COM 	    "ibd_send", KSTAT_DATA_ULONG);
52611534SKevin.Ge@Sun.COM 	kstat_named_init(&ibd_rc_ksp->rc_xmt_buf_mac_update, "RC: "
52711534SKevin.Ge@Sun.COM 	    "mac_tx_update #, buf available", KSTAT_DATA_ULONG);
52811534SKevin.Ge@Sun.COM 
52911534SKevin.Ge@Sun.COM 	kstat_named_init(&ibd_rc_ksp->rc_conn_succ, "RC: succ connected",
53011534SKevin.Ge@Sun.COM 	    KSTAT_DATA_ULONG);
53111534SKevin.Ge@Sun.COM 	kstat_named_init(&ibd_rc_ksp->rc_conn_fail, "RC: fail connect",
53211534SKevin.Ge@Sun.COM 	    KSTAT_DATA_ULONG);
53311534SKevin.Ge@Sun.COM 	kstat_named_init(&ibd_rc_ksp->rc_null_conn, "RC: null conn for unicast "
53411534SKevin.Ge@Sun.COM 	    "pkt", KSTAT_DATA_ULONG);
53511534SKevin.Ge@Sun.COM 	kstat_named_init(&ibd_rc_ksp->rc_no_estab_conn, "RC: not in act estab "
53611534SKevin.Ge@Sun.COM 	    "state", KSTAT_DATA_ULONG);
53711534SKevin.Ge@Sun.COM 
53811534SKevin.Ge@Sun.COM 	kstat_named_init(&ibd_rc_ksp->rc_act_close, "RC: call ibd_rc_act_close",
53911534SKevin.Ge@Sun.COM 	    KSTAT_DATA_ULONG);
54011534SKevin.Ge@Sun.COM 	kstat_named_init(&ibd_rc_ksp->rc_pas_close, "RC: call ibd_rc_pas_close",
54111534SKevin.Ge@Sun.COM 	    KSTAT_DATA_ULONG);
54211534SKevin.Ge@Sun.COM 	kstat_named_init(&ibd_rc_ksp->rc_delay_ace_recycle, "RC: delay ace "
54311534SKevin.Ge@Sun.COM 	    "recycle", KSTAT_DATA_ULONG);
54411534SKevin.Ge@Sun.COM 	kstat_named_init(&ibd_rc_ksp->rc_act_close_simultaneous, "RC: "
54511534SKevin.Ge@Sun.COM 	    "simultaneous ibd_rc_act_close", KSTAT_DATA_ULONG);
54611534SKevin.Ge@Sun.COM 	kstat_named_init(&ibd_rc_ksp->rc_reset_cnt, "RC: Reset RC channel",
54711534SKevin.Ge@Sun.COM 	    KSTAT_DATA_ULONG);
54813030SKevin.Ge@Sun.COM 	kstat_named_init(&ibd_rc_ksp->rc_act_close, "RC: timeout act side",
54913030SKevin.Ge@Sun.COM 	    KSTAT_DATA_ULONG);
55013030SKevin.Ge@Sun.COM 	kstat_named_init(&ibd_rc_ksp->rc_pas_close, "RC: timeout pas side",
55113030SKevin.Ge@Sun.COM 	    KSTAT_DATA_ULONG);
55211534SKevin.Ge@Sun.COM 
55311534SKevin.Ge@Sun.COM 	/*
55411534SKevin.Ge@Sun.COM 	 * Function to provide kernel stat update on demand
55511534SKevin.Ge@Sun.COM 	 */
55611534SKevin.Ge@Sun.COM 	ksp->ks_update = ibd_rc_update_stats;
55711534SKevin.Ge@Sun.COM 
55811534SKevin.Ge@Sun.COM 	/*
55911534SKevin.Ge@Sun.COM 	 * Pointer into provider's raw statistics
56011534SKevin.Ge@Sun.COM 	 */
56111534SKevin.Ge@Sun.COM 	ksp->ks_private = (void *)state;
56211534SKevin.Ge@Sun.COM 
56311534SKevin.Ge@Sun.COM 	/*
56411534SKevin.Ge@Sun.COM 	 * Add kstat to systems kstat chain
56511534SKevin.Ge@Sun.COM 	 */
56611534SKevin.Ge@Sun.COM 	kstat_install(ksp);
56711534SKevin.Ge@Sun.COM 
56811534SKevin.Ge@Sun.COM 	return (DDI_SUCCESS);
56911534SKevin.Ge@Sun.COM }
57011534SKevin.Ge@Sun.COM #endif
57111534SKevin.Ge@Sun.COM 
57211534SKevin.Ge@Sun.COM static ibt_status_t
ibd_rc_alloc_chan(ibd_rc_chan_t ** ret_chan,ibd_state_t * state,boolean_t is_tx_chan)57311534SKevin.Ge@Sun.COM ibd_rc_alloc_chan(ibd_rc_chan_t **ret_chan, ibd_state_t *state,
57411534SKevin.Ge@Sun.COM     boolean_t is_tx_chan)
57511534SKevin.Ge@Sun.COM {
57611534SKevin.Ge@Sun.COM 	ibt_status_t result;
57711534SKevin.Ge@Sun.COM 	ibd_rc_chan_t *chan;
57811534SKevin.Ge@Sun.COM 	ibt_rc_chan_alloc_args_t alloc_args;
57911534SKevin.Ge@Sun.COM 	ibt_chan_alloc_flags_t alloc_flags;
58011534SKevin.Ge@Sun.COM 	ibt_chan_sizes_t sizes;
58111534SKevin.Ge@Sun.COM 	ibt_cq_attr_t cq_atts;
58211534SKevin.Ge@Sun.COM 	int rv;
58311534SKevin.Ge@Sun.COM 
58411534SKevin.Ge@Sun.COM 	chan = kmem_zalloc(sizeof (ibd_rc_chan_t), KM_SLEEP);
58511534SKevin.Ge@Sun.COM 
58611534SKevin.Ge@Sun.COM 	chan->state = state;
58711534SKevin.Ge@Sun.COM 	mutex_init(&chan->rx_wqe_list.dl_mutex, NULL, MUTEX_DRIVER, NULL);
58811534SKevin.Ge@Sun.COM 	mutex_init(&chan->rx_free_list.dl_mutex, NULL, MUTEX_DRIVER, NULL);
58911534SKevin.Ge@Sun.COM 	mutex_init(&chan->tx_wqe_list.dl_mutex, NULL, MUTEX_DRIVER, NULL);
59011534SKevin.Ge@Sun.COM 	mutex_init(&chan->tx_rel_list.dl_mutex, NULL, MUTEX_DRIVER, NULL);
59111534SKevin.Ge@Sun.COM 	mutex_init(&chan->tx_post_lock, NULL, MUTEX_DRIVER, NULL);
59211534SKevin.Ge@Sun.COM 	mutex_init(&chan->tx_poll_lock, NULL, MUTEX_DRIVER, NULL);
59311534SKevin.Ge@Sun.COM 
59411534SKevin.Ge@Sun.COM 	/* Allocate IB structures for a new RC channel. */
59511534SKevin.Ge@Sun.COM 	if (is_tx_chan) {
59612163SRamaswamy.Tummala@Sun.COM 		chan->scq_size = state->id_rc_num_swqe;
59711534SKevin.Ge@Sun.COM 		chan->rcq_size = IBD_RC_MIN_CQ_SIZE;
59811534SKevin.Ge@Sun.COM 	} else {
59911534SKevin.Ge@Sun.COM 		chan->scq_size = IBD_RC_MIN_CQ_SIZE;
60012163SRamaswamy.Tummala@Sun.COM 		chan->rcq_size = state->id_rc_num_rwqe;
60111534SKevin.Ge@Sun.COM 	}
60211534SKevin.Ge@Sun.COM 	cq_atts.cq_size = chan->scq_size;
60311534SKevin.Ge@Sun.COM 	cq_atts.cq_sched = NULL;
60411534SKevin.Ge@Sun.COM 	cq_atts.cq_flags = IBT_CQ_NO_FLAGS;
60511534SKevin.Ge@Sun.COM 	result = ibt_alloc_cq(state->id_hca_hdl, &cq_atts, &chan->scq_hdl,
60611534SKevin.Ge@Sun.COM 	    &chan->scq_size);
60711534SKevin.Ge@Sun.COM 	if (result != IBT_SUCCESS) {
60811534SKevin.Ge@Sun.COM 		DPRINT(40, "ibd_rc_alloc_chan: error <%d>"
60911534SKevin.Ge@Sun.COM 		    "create scq completion queue (size <%d>)",
61011534SKevin.Ge@Sun.COM 		    result, chan->scq_size);
61111534SKevin.Ge@Sun.COM 		goto alloc_scq_err;
61211534SKevin.Ge@Sun.COM 	}	/* if failure to alloc cq */
61311534SKevin.Ge@Sun.COM 
61412163SRamaswamy.Tummala@Sun.COM 	if (ibt_modify_cq(chan->scq_hdl, state->id_rc_tx_comp_count,
61512163SRamaswamy.Tummala@Sun.COM 	    state->id_rc_tx_comp_usec, 0) != IBT_SUCCESS) {
61613030SKevin.Ge@Sun.COM 		DPRINT(30, "ibd_rc_alloc_chan: Send CQ "
61712163SRamaswamy.Tummala@Sun.COM 		    "interrupt moderation failed");
61811534SKevin.Ge@Sun.COM 	}
61911534SKevin.Ge@Sun.COM 
62011534SKevin.Ge@Sun.COM 	ibt_set_cq_private(chan->scq_hdl, (void *) (uintptr_t)chan);
62111534SKevin.Ge@Sun.COM 	ibt_set_cq_handler(chan->scq_hdl, ibd_rc_scq_handler,
62211534SKevin.Ge@Sun.COM 	    (void *) (uintptr_t)chan);
62311534SKevin.Ge@Sun.COM 
62411534SKevin.Ge@Sun.COM 	cq_atts.cq_size = chan->rcq_size;
62511534SKevin.Ge@Sun.COM 	cq_atts.cq_sched = NULL;
62611534SKevin.Ge@Sun.COM 	cq_atts.cq_flags = IBT_CQ_NO_FLAGS;
62711534SKevin.Ge@Sun.COM 	result = ibt_alloc_cq(state->id_hca_hdl, &cq_atts, &chan->rcq_hdl,
62811534SKevin.Ge@Sun.COM 	    &chan->rcq_size);
62911534SKevin.Ge@Sun.COM 	if (result != IBT_SUCCESS) {
63011534SKevin.Ge@Sun.COM 		ibd_print_warn(state, "ibd_rc_alloc_chan: error <%d> creating "
63111534SKevin.Ge@Sun.COM 		    "rx completion queue (size <%d>)", result, chan->rcq_size);
63211534SKevin.Ge@Sun.COM 		goto alloc_rcq_err;
63311534SKevin.Ge@Sun.COM 	}	/* if failure to alloc cq */
63411534SKevin.Ge@Sun.COM 
63512163SRamaswamy.Tummala@Sun.COM 	if (ibt_modify_cq(chan->rcq_hdl, state->id_rc_rx_comp_count,
63612163SRamaswamy.Tummala@Sun.COM 	    state->id_rc_rx_comp_usec, 0) != IBT_SUCCESS) {
63713030SKevin.Ge@Sun.COM 		DPRINT(30, "ibd_rc_alloc_chan: Receive CQ "
63812163SRamaswamy.Tummala@Sun.COM 		    "interrupt moderation failed");
63911534SKevin.Ge@Sun.COM 	}
64012163SRamaswamy.Tummala@Sun.COM 
64111534SKevin.Ge@Sun.COM 	ibt_set_cq_private(chan->rcq_hdl, (void *) (uintptr_t)chan);
64211534SKevin.Ge@Sun.COM 	ibt_set_cq_handler(chan->rcq_hdl, ibd_rc_rcq_handler,
64311534SKevin.Ge@Sun.COM 	    (void *)(uintptr_t)chan);
64411534SKevin.Ge@Sun.COM 
64511534SKevin.Ge@Sun.COM 	if (is_tx_chan) {
64611534SKevin.Ge@Sun.COM 		chan->is_tx_chan = B_TRUE;
64711534SKevin.Ge@Sun.COM 		if (ibd_rc_init_txlist(chan) != DDI_SUCCESS) {
64811534SKevin.Ge@Sun.COM 			ibd_print_warn(state, "ibd_rc_alloc_chan: "
64911534SKevin.Ge@Sun.COM 			    "ibd_rc_init_txlist failed");
65011534SKevin.Ge@Sun.COM 			goto init_txlist_err;
65111534SKevin.Ge@Sun.COM 		}
65211534SKevin.Ge@Sun.COM 		if (ibd_rc_tx_softintr == 1) {
65311534SKevin.Ge@Sun.COM 			if ((rv = ddi_add_softintr(state->id_dip,
65411534SKevin.Ge@Sun.COM 			    DDI_SOFTINT_LOW, &chan->scq_softintr, NULL, NULL,
65511534SKevin.Ge@Sun.COM 			    ibd_rc_tx_recycle, (caddr_t)chan)) !=
65611534SKevin.Ge@Sun.COM 			    DDI_SUCCESS) {
65711534SKevin.Ge@Sun.COM 				DPRINT(10, "ibd_rc_alloc_chan: failed in "
65811534SKevin.Ge@Sun.COM 				    "ddi_add_softintr(scq_softintr), ret=%d",
65911534SKevin.Ge@Sun.COM 				    rv);
66011534SKevin.Ge@Sun.COM 				goto alloc_softintr_err;
66111534SKevin.Ge@Sun.COM 			}
66211534SKevin.Ge@Sun.COM 		}
66311534SKevin.Ge@Sun.COM 	} else {
66411534SKevin.Ge@Sun.COM 		chan->is_tx_chan = B_FALSE;
66511534SKevin.Ge@Sun.COM 	}
66611534SKevin.Ge@Sun.COM 
66711534SKevin.Ge@Sun.COM 	/*
66811534SKevin.Ge@Sun.COM 	 * enable completions
66911534SKevin.Ge@Sun.COM 	 */
67011534SKevin.Ge@Sun.COM 	result = ibt_enable_cq_notify(chan->scq_hdl, IBT_NEXT_COMPLETION);
67111534SKevin.Ge@Sun.COM 	if (result != IBT_SUCCESS) {
67211534SKevin.Ge@Sun.COM 		ibd_print_warn(state, "ibd_rc_alloc_chan: ibt_enable_cq_notify"
67311534SKevin.Ge@Sun.COM 		    "(scq) failed: status %d\n", result);
67411534SKevin.Ge@Sun.COM 		goto alloc_scq_enable_err;
67511534SKevin.Ge@Sun.COM 	}
67611534SKevin.Ge@Sun.COM 
67711534SKevin.Ge@Sun.COM 	/* We will enable chan->rcq_hdl later. */
67811534SKevin.Ge@Sun.COM 
67911534SKevin.Ge@Sun.COM 	/* alloc a RC channel */
68011534SKevin.Ge@Sun.COM 	bzero(&alloc_args, sizeof (ibt_rc_chan_alloc_args_t));
68111534SKevin.Ge@Sun.COM 	bzero(&sizes, sizeof (ibt_chan_sizes_t));
68211534SKevin.Ge@Sun.COM 
68311534SKevin.Ge@Sun.COM 	alloc_args.rc_flags = IBT_WR_SIGNALED;
68411534SKevin.Ge@Sun.COM 	alloc_args.rc_control = IBT_CEP_NO_FLAGS;
68511534SKevin.Ge@Sun.COM 
68611534SKevin.Ge@Sun.COM 	alloc_args.rc_scq = chan->scq_hdl;
68711534SKevin.Ge@Sun.COM 	alloc_args.rc_rcq = chan->rcq_hdl;
68811534SKevin.Ge@Sun.COM 	alloc_args.rc_pd = state->id_pd_hdl;
68911534SKevin.Ge@Sun.COM 
69011534SKevin.Ge@Sun.COM 	alloc_args.rc_hca_port_num = state->id_port;
69111534SKevin.Ge@Sun.COM 	alloc_args.rc_clone_chan = NULL;
69211534SKevin.Ge@Sun.COM 
69311534SKevin.Ge@Sun.COM 	/* scatter/gather */
69411534SKevin.Ge@Sun.COM 	alloc_args.rc_sizes.cs_sq_sgl = state->rc_tx_max_sqseg;
69511534SKevin.Ge@Sun.COM 
69611534SKevin.Ge@Sun.COM 	/*
69711534SKevin.Ge@Sun.COM 	 * For the number of SGL elements in receive side, I think it
69811534SKevin.Ge@Sun.COM 	 * should be 1. Because ibd driver allocates a whole block memory
69911534SKevin.Ge@Sun.COM 	 * for each ibt_post_recv().
70011534SKevin.Ge@Sun.COM 	 */
70111534SKevin.Ge@Sun.COM 	alloc_args.rc_sizes.cs_rq_sgl = 1;
70211534SKevin.Ge@Sun.COM 
70311534SKevin.Ge@Sun.COM 	/* The send queue size and the receive queue size */
70411534SKevin.Ge@Sun.COM 	alloc_args.rc_sizes.cs_sq = chan->scq_size;
70511534SKevin.Ge@Sun.COM 	alloc_args.rc_sizes.cs_rq = chan->rcq_size;
70611534SKevin.Ge@Sun.COM 
70711534SKevin.Ge@Sun.COM 	if (state->id_hca_res_lkey_capab) {
70811534SKevin.Ge@Sun.COM 		alloc_args.rc_flags = IBT_FAST_REG_RES_LKEY;
70911534SKevin.Ge@Sun.COM 	} else {
71011534SKevin.Ge@Sun.COM 		DPRINT(40, "ibd_rc_alloc_chan: not support reserved lkey");
71111534SKevin.Ge@Sun.COM 	}
71211534SKevin.Ge@Sun.COM 
71311534SKevin.Ge@Sun.COM 	if (state->rc_enable_srq) {
71411534SKevin.Ge@Sun.COM 		alloc_flags = IBT_ACHAN_USES_SRQ;
71511534SKevin.Ge@Sun.COM 		alloc_args.rc_srq = state->rc_srq_hdl;
71611534SKevin.Ge@Sun.COM 	} else {
71711534SKevin.Ge@Sun.COM 		alloc_flags = IBT_ACHAN_NO_FLAGS;
71811534SKevin.Ge@Sun.COM 	}
71911534SKevin.Ge@Sun.COM 
72011534SKevin.Ge@Sun.COM 	result = ibt_alloc_rc_channel(state->id_hca_hdl,
72111534SKevin.Ge@Sun.COM 	    alloc_flags, &alloc_args, &chan->chan_hdl, &sizes);
72211534SKevin.Ge@Sun.COM 	if (result != IBT_SUCCESS) {
72311534SKevin.Ge@Sun.COM 		ibd_print_warn(state, "ibd_rc_alloc_chan: ibd_rc_open_channel"
72411534SKevin.Ge@Sun.COM 		    " fail:<%d>", result);
72511534SKevin.Ge@Sun.COM 		goto alloc_scq_enable_err;
72611534SKevin.Ge@Sun.COM 	}
72711534SKevin.Ge@Sun.COM 
72813030SKevin.Ge@Sun.COM 	if (is_tx_chan)
72913030SKevin.Ge@Sun.COM 		atomic_inc_32(&state->rc_num_tx_chan);
73013030SKevin.Ge@Sun.COM 	else
73113030SKevin.Ge@Sun.COM 		atomic_inc_32(&state->rc_num_rx_chan);
73213030SKevin.Ge@Sun.COM 
73313030SKevin.Ge@Sun.COM 	/* For the connection reaper routine ibd_rc_conn_timeout_call() */
73413030SKevin.Ge@Sun.COM 	chan->is_used = B_TRUE;
73513030SKevin.Ge@Sun.COM 
73611534SKevin.Ge@Sun.COM 	*ret_chan = chan;
73711534SKevin.Ge@Sun.COM 	return (IBT_SUCCESS);
73811534SKevin.Ge@Sun.COM 
73911534SKevin.Ge@Sun.COM alloc_scq_enable_err:
74011534SKevin.Ge@Sun.COM 	if (is_tx_chan) {
74111534SKevin.Ge@Sun.COM 		if (ibd_rc_tx_softintr == 1) {
74211534SKevin.Ge@Sun.COM 			ddi_remove_softintr(chan->scq_softintr);
74311534SKevin.Ge@Sun.COM 		}
74411534SKevin.Ge@Sun.COM 	}
74511534SKevin.Ge@Sun.COM alloc_softintr_err:
74611534SKevin.Ge@Sun.COM 	if (is_tx_chan) {
74711534SKevin.Ge@Sun.COM 		ibd_rc_fini_txlist(chan);
74811534SKevin.Ge@Sun.COM 	}
74911534SKevin.Ge@Sun.COM init_txlist_err:
75011534SKevin.Ge@Sun.COM 	(void) ibt_free_cq(chan->rcq_hdl);
75111534SKevin.Ge@Sun.COM alloc_rcq_err:
75211534SKevin.Ge@Sun.COM 	(void) ibt_free_cq(chan->scq_hdl);
75311534SKevin.Ge@Sun.COM alloc_scq_err:
75411534SKevin.Ge@Sun.COM 	mutex_destroy(&chan->tx_poll_lock);
75511534SKevin.Ge@Sun.COM 	mutex_destroy(&chan->tx_post_lock);
75611534SKevin.Ge@Sun.COM 	mutex_destroy(&chan->tx_rel_list.dl_mutex);
75711534SKevin.Ge@Sun.COM 	mutex_destroy(&chan->tx_wqe_list.dl_mutex);
75811534SKevin.Ge@Sun.COM 	mutex_destroy(&chan->rx_free_list.dl_mutex);
75911534SKevin.Ge@Sun.COM 	mutex_destroy(&chan->rx_wqe_list.dl_mutex);
76011534SKevin.Ge@Sun.COM 	kmem_free(chan, sizeof (ibd_rc_chan_t));
76111534SKevin.Ge@Sun.COM 	return (result);
76211534SKevin.Ge@Sun.COM }
76311534SKevin.Ge@Sun.COM 
76411534SKevin.Ge@Sun.COM static void
ibd_rc_free_chan(ibd_rc_chan_t * chan)76511534SKevin.Ge@Sun.COM ibd_rc_free_chan(ibd_rc_chan_t *chan)
76611534SKevin.Ge@Sun.COM {
76711534SKevin.Ge@Sun.COM 	ibt_status_t ret;
76811534SKevin.Ge@Sun.COM 
76911534SKevin.Ge@Sun.COM 	/* DPRINT(30, "ibd_rc_free_chan: chan=%p", chan); */
77011534SKevin.Ge@Sun.COM 
77111534SKevin.Ge@Sun.COM 	if (chan->chan_hdl != NULL) {
77211534SKevin.Ge@Sun.COM 		ret = ibt_free_channel(chan->chan_hdl);
77311534SKevin.Ge@Sun.COM 		if (ret != IBT_SUCCESS) {
77411534SKevin.Ge@Sun.COM 			DPRINT(40, "ib_rc_free_chan: ibt_free_channel failed, "
77511534SKevin.Ge@Sun.COM 			    "chan=%p, returned: %d", chan, ret);
77611534SKevin.Ge@Sun.COM 			return;
77711534SKevin.Ge@Sun.COM 		}
77811534SKevin.Ge@Sun.COM 		chan->chan_hdl = NULL;
77911534SKevin.Ge@Sun.COM 	}
78011534SKevin.Ge@Sun.COM 
78111534SKevin.Ge@Sun.COM 	if (chan->rcq_hdl != NULL) {
78211534SKevin.Ge@Sun.COM 		ret = ibt_free_cq(chan->rcq_hdl);
78311534SKevin.Ge@Sun.COM 		if (ret != IBT_SUCCESS) {
78411534SKevin.Ge@Sun.COM 			DPRINT(40, "ib_rc_free_chan: ibt_free_cq(rcq) failed, "
78511534SKevin.Ge@Sun.COM 			    "chan=%p, returned: %d", chan, ret);
78611534SKevin.Ge@Sun.COM 			return;
78711534SKevin.Ge@Sun.COM 		}
78811534SKevin.Ge@Sun.COM 		chan->rcq_hdl = NULL;
78911534SKevin.Ge@Sun.COM 	}
79011534SKevin.Ge@Sun.COM 
79111534SKevin.Ge@Sun.COM 	if (chan->scq_hdl != NULL) {
79211534SKevin.Ge@Sun.COM 		ret = ibt_free_cq(chan->scq_hdl);
79311534SKevin.Ge@Sun.COM 		if (ret != IBT_SUCCESS) {
79411534SKevin.Ge@Sun.COM 			DPRINT(40, "ib_rc_free_chan: ibt_free_cq(scq) failed, "
79511534SKevin.Ge@Sun.COM 			    "chan=%p, returned: %d", chan, ret);
79611534SKevin.Ge@Sun.COM 			return;
79711534SKevin.Ge@Sun.COM 		}
79811534SKevin.Ge@Sun.COM 		chan->scq_hdl = NULL;
79911534SKevin.Ge@Sun.COM 	}
80011534SKevin.Ge@Sun.COM 
80111534SKevin.Ge@Sun.COM 	/* Free buffers */
80211534SKevin.Ge@Sun.COM 	if (chan->is_tx_chan) {
80311534SKevin.Ge@Sun.COM 		ibd_rc_fini_txlist(chan);
80411534SKevin.Ge@Sun.COM 		if (ibd_rc_tx_softintr == 1) {
80511534SKevin.Ge@Sun.COM 			ddi_remove_softintr(chan->scq_softintr);
80611534SKevin.Ge@Sun.COM 		}
80713030SKevin.Ge@Sun.COM 		atomic_dec_32(&chan->state->rc_num_tx_chan);
80811534SKevin.Ge@Sun.COM 	} else {
80911534SKevin.Ge@Sun.COM 		if (!chan->state->rc_enable_srq) {
81011534SKevin.Ge@Sun.COM 			ibd_rc_fini_rxlist(chan);
81111534SKevin.Ge@Sun.COM 		}
81213030SKevin.Ge@Sun.COM 		atomic_dec_32(&chan->state->rc_num_rx_chan);
81311534SKevin.Ge@Sun.COM 	}
81411534SKevin.Ge@Sun.COM 
81511534SKevin.Ge@Sun.COM 	mutex_destroy(&chan->tx_poll_lock);
81611534SKevin.Ge@Sun.COM 	mutex_destroy(&chan->tx_post_lock);
81711534SKevin.Ge@Sun.COM 	mutex_destroy(&chan->tx_rel_list.dl_mutex);
81811534SKevin.Ge@Sun.COM 	mutex_destroy(&chan->tx_wqe_list.dl_mutex);
81911534SKevin.Ge@Sun.COM 	mutex_destroy(&chan->rx_free_list.dl_mutex);
82011534SKevin.Ge@Sun.COM 	mutex_destroy(&chan->rx_wqe_list.dl_mutex);
82111534SKevin.Ge@Sun.COM 
82211534SKevin.Ge@Sun.COM 	/*
82311534SKevin.Ge@Sun.COM 	 * If it is a passive channel, must make sure it has been removed
82411534SKevin.Ge@Sun.COM 	 * from chan->state->rc_pass_chan_list
82511534SKevin.Ge@Sun.COM 	 */
82611534SKevin.Ge@Sun.COM 	kmem_free(chan, sizeof (ibd_rc_chan_t));
82711534SKevin.Ge@Sun.COM }
82811534SKevin.Ge@Sun.COM 
82911534SKevin.Ge@Sun.COM /* Add a RC channel */
83011534SKevin.Ge@Sun.COM static inline void
ibd_rc_add_to_chan_list(ibd_rc_chan_list_t * list,ibd_rc_chan_t * chan)83111534SKevin.Ge@Sun.COM ibd_rc_add_to_chan_list(ibd_rc_chan_list_t *list, ibd_rc_chan_t *chan)
83211534SKevin.Ge@Sun.COM {
83311534SKevin.Ge@Sun.COM 	mutex_enter(&list->chan_list_mutex);
83411534SKevin.Ge@Sun.COM 	if (list->chan_list == NULL) {
83511534SKevin.Ge@Sun.COM 		list->chan_list = chan;
83613030SKevin.Ge@Sun.COM 		chan->next = NULL;
83711534SKevin.Ge@Sun.COM 	} else {
83811534SKevin.Ge@Sun.COM 		chan->next = list->chan_list;
83911534SKevin.Ge@Sun.COM 		list->chan_list = chan;
84011534SKevin.Ge@Sun.COM 	}
84111534SKevin.Ge@Sun.COM 	mutex_exit(&list->chan_list_mutex);
84211534SKevin.Ge@Sun.COM }
84311534SKevin.Ge@Sun.COM 
84413030SKevin.Ge@Sun.COM static boolean_t
ibd_rc_re_add_to_pas_chan_list(ibd_rc_chan_t * chan)84513030SKevin.Ge@Sun.COM ibd_rc_re_add_to_pas_chan_list(ibd_rc_chan_t *chan)
84613030SKevin.Ge@Sun.COM {
84713030SKevin.Ge@Sun.COM 	ibd_state_t *state = chan->state;
84813030SKevin.Ge@Sun.COM 
84913030SKevin.Ge@Sun.COM 	mutex_enter(&state->rc_pass_chan_list.chan_list_mutex);
85013030SKevin.Ge@Sun.COM 	if ((state->id_mac_state & IBD_DRV_STARTED) == 0) {
85113030SKevin.Ge@Sun.COM 		mutex_exit(&state->rc_pass_chan_list.chan_list_mutex);
85213030SKevin.Ge@Sun.COM 		return (B_FALSE);
85313030SKevin.Ge@Sun.COM 	} else {
85413030SKevin.Ge@Sun.COM 		if (state->rc_pass_chan_list.chan_list == NULL) {
85513030SKevin.Ge@Sun.COM 			state->rc_pass_chan_list.chan_list = chan;
85613030SKevin.Ge@Sun.COM 			chan->next = NULL;
85713030SKevin.Ge@Sun.COM 		} else {
85813030SKevin.Ge@Sun.COM 			chan->next = state->rc_pass_chan_list.chan_list;
85913030SKevin.Ge@Sun.COM 			state->rc_pass_chan_list.chan_list = chan;
86013030SKevin.Ge@Sun.COM 		}
86113030SKevin.Ge@Sun.COM 		mutex_exit(&state->rc_pass_chan_list.chan_list_mutex);
86213030SKevin.Ge@Sun.COM 		return (B_TRUE);
86313030SKevin.Ge@Sun.COM 	}
86413030SKevin.Ge@Sun.COM }
86513030SKevin.Ge@Sun.COM 
86611534SKevin.Ge@Sun.COM /* Remove a RC channel */
86713030SKevin.Ge@Sun.COM static inline ibd_rc_chan_t *
ibd_rc_rm_from_chan_list(ibd_rc_chan_list_t * list,ibd_rc_chan_t * chan)86811534SKevin.Ge@Sun.COM ibd_rc_rm_from_chan_list(ibd_rc_chan_list_t *list, ibd_rc_chan_t *chan)
86911534SKevin.Ge@Sun.COM {
87011534SKevin.Ge@Sun.COM 	ibd_rc_chan_t *pre_chan;
87111534SKevin.Ge@Sun.COM 
87211534SKevin.Ge@Sun.COM 	mutex_enter(&list->chan_list_mutex);
87311534SKevin.Ge@Sun.COM 	if (list->chan_list == chan) {
87411534SKevin.Ge@Sun.COM 		DPRINT(30, "ibd_rc_rm_from_chan_list(first): found chan(%p)"
87511534SKevin.Ge@Sun.COM 		    " in chan_list", chan);
87611534SKevin.Ge@Sun.COM 		list->chan_list = chan->next;
87711534SKevin.Ge@Sun.COM 	} else {
87811534SKevin.Ge@Sun.COM 		pre_chan = list->chan_list;
87911534SKevin.Ge@Sun.COM 		while (pre_chan != NULL) {
88011534SKevin.Ge@Sun.COM 			if (pre_chan->next == chan) {
88111534SKevin.Ge@Sun.COM 				DPRINT(30, "ibd_rc_rm_from_chan_list"
88213030SKevin.Ge@Sun.COM 				    "(middle): found chan(%p)", chan);
88311534SKevin.Ge@Sun.COM 				pre_chan->next = chan->next;
88411534SKevin.Ge@Sun.COM 				break;
88511534SKevin.Ge@Sun.COM 			}
88611534SKevin.Ge@Sun.COM 			pre_chan = pre_chan->next;
88711534SKevin.Ge@Sun.COM 		}
88813030SKevin.Ge@Sun.COM 		if (pre_chan == NULL)
88913030SKevin.Ge@Sun.COM 			chan = NULL;
89011534SKevin.Ge@Sun.COM 	}
89111534SKevin.Ge@Sun.COM 	mutex_exit(&list->chan_list_mutex);
89213030SKevin.Ge@Sun.COM 	return (chan);
89311534SKevin.Ge@Sun.COM }
89411534SKevin.Ge@Sun.COM 
89511534SKevin.Ge@Sun.COM static inline ibd_rc_chan_t *
ibd_rc_rm_header_chan_list(ibd_rc_chan_list_t * list)89611534SKevin.Ge@Sun.COM ibd_rc_rm_header_chan_list(ibd_rc_chan_list_t *list)
89711534SKevin.Ge@Sun.COM {
89811534SKevin.Ge@Sun.COM 	ibd_rc_chan_t *rc_chan;
89911534SKevin.Ge@Sun.COM 
90011534SKevin.Ge@Sun.COM 	mutex_enter(&list->chan_list_mutex);
90111534SKevin.Ge@Sun.COM 	rc_chan = list->chan_list;
90211534SKevin.Ge@Sun.COM 	if (rc_chan != NULL) {
90311534SKevin.Ge@Sun.COM 		list->chan_list = rc_chan->next;
90411534SKevin.Ge@Sun.COM 	}
90511534SKevin.Ge@Sun.COM 	mutex_exit(&list->chan_list_mutex);
90611534SKevin.Ge@Sun.COM 	return (rc_chan);
90711534SKevin.Ge@Sun.COM }
90811534SKevin.Ge@Sun.COM 
90911534SKevin.Ge@Sun.COM static int
ibd_rc_alloc_srq_copybufs(ibd_state_t * state)91011534SKevin.Ge@Sun.COM ibd_rc_alloc_srq_copybufs(ibd_state_t *state)
91111534SKevin.Ge@Sun.COM {
91211534SKevin.Ge@Sun.COM 	ibt_mr_attr_t mem_attr;
91311534SKevin.Ge@Sun.COM 	uint_t rc_rx_bufs_sz;
91411534SKevin.Ge@Sun.COM 
91511534SKevin.Ge@Sun.COM 	/*
91611534SKevin.Ge@Sun.COM 	 * Allocate one big chunk for all regular rx copy bufs
91711534SKevin.Ge@Sun.COM 	 */
91811534SKevin.Ge@Sun.COM 	rc_rx_bufs_sz =  (state->rc_mtu + IPOIB_GRH_SIZE) * state->rc_srq_size;
91911534SKevin.Ge@Sun.COM 
92011534SKevin.Ge@Sun.COM 	state->rc_srq_rx_bufs = kmem_zalloc(rc_rx_bufs_sz, KM_SLEEP);
92111534SKevin.Ge@Sun.COM 
92211534SKevin.Ge@Sun.COM 	state->rc_srq_rwqes = kmem_zalloc(state->rc_srq_size *
92311534SKevin.Ge@Sun.COM 	    sizeof (ibd_rwqe_t), KM_SLEEP);
92411534SKevin.Ge@Sun.COM 
92511534SKevin.Ge@Sun.COM 	/*
92611534SKevin.Ge@Sun.COM 	 * Do one memory registration on the entire rxbuf area
92711534SKevin.Ge@Sun.COM 	 */
92811534SKevin.Ge@Sun.COM 	mem_attr.mr_vaddr = (uint64_t)(uintptr_t)state->rc_srq_rx_bufs;
92911534SKevin.Ge@Sun.COM 	mem_attr.mr_len = rc_rx_bufs_sz;
93011534SKevin.Ge@Sun.COM 	mem_attr.mr_as = NULL;
93111534SKevin.Ge@Sun.COM 	mem_attr.mr_flags = IBT_MR_SLEEP | IBT_MR_ENABLE_LOCAL_WRITE;
93211534SKevin.Ge@Sun.COM 	if (ibt_register_mr(state->id_hca_hdl, state->id_pd_hdl, &mem_attr,
93311534SKevin.Ge@Sun.COM 	    &state->rc_srq_rx_mr_hdl, &state->rc_srq_rx_mr_desc)
93411534SKevin.Ge@Sun.COM 	    != IBT_SUCCESS) {
93511534SKevin.Ge@Sun.COM 		DPRINT(40, "ibd_rc_alloc_srq_copybufs: ibt_register_mr() "
93611534SKevin.Ge@Sun.COM 		    "failed");
93711534SKevin.Ge@Sun.COM 		kmem_free(state->rc_srq_rwqes,
93811534SKevin.Ge@Sun.COM 		    state->rc_srq_size * sizeof (ibd_rwqe_t));
93911534SKevin.Ge@Sun.COM 		kmem_free(state->rc_srq_rx_bufs, rc_rx_bufs_sz);
94011534SKevin.Ge@Sun.COM 		state->rc_srq_rx_bufs = NULL;
94111534SKevin.Ge@Sun.COM 		state->rc_srq_rwqes = NULL;
94211534SKevin.Ge@Sun.COM 		return (DDI_FAILURE);
94311534SKevin.Ge@Sun.COM 	}
94411534SKevin.Ge@Sun.COM 
94511534SKevin.Ge@Sun.COM 	return (DDI_SUCCESS);
94611534SKevin.Ge@Sun.COM }
94711534SKevin.Ge@Sun.COM 
94811534SKevin.Ge@Sun.COM static void
ibd_rc_free_srq_copybufs(ibd_state_t * state)94911534SKevin.Ge@Sun.COM ibd_rc_free_srq_copybufs(ibd_state_t *state)
95011534SKevin.Ge@Sun.COM {
95111534SKevin.Ge@Sun.COM 	uint_t rc_rx_buf_sz;
95211534SKevin.Ge@Sun.COM 
95311534SKevin.Ge@Sun.COM 	/*
95411534SKevin.Ge@Sun.COM 	 * Don't change the value of state->rc_mtu at the period from call
95511534SKevin.Ge@Sun.COM 	 * ibd_rc_alloc_srq_copybufs() to call ibd_rc_free_srq_copybufs().
95611534SKevin.Ge@Sun.COM 	 */
95711534SKevin.Ge@Sun.COM 	rc_rx_buf_sz = state->rc_mtu + IPOIB_GRH_SIZE;
95811534SKevin.Ge@Sun.COM 
95911534SKevin.Ge@Sun.COM 	/*
96011534SKevin.Ge@Sun.COM 	 * Unregister rxbuf mr
96111534SKevin.Ge@Sun.COM 	 */
96211534SKevin.Ge@Sun.COM 	if (ibt_deregister_mr(state->id_hca_hdl,
96311534SKevin.Ge@Sun.COM 	    state->rc_srq_rx_mr_hdl) != IBT_SUCCESS) {
96411534SKevin.Ge@Sun.COM 		DPRINT(40, "ibd_rc_free_srq_copybufs: ibt_deregister_mr()"
96511534SKevin.Ge@Sun.COM 		    " failed");
96611534SKevin.Ge@Sun.COM 	}
96711534SKevin.Ge@Sun.COM 	state->rc_srq_rx_mr_hdl = NULL;
96811534SKevin.Ge@Sun.COM 
96911534SKevin.Ge@Sun.COM 	/*
97011534SKevin.Ge@Sun.COM 	 * Free rxbuf memory
97111534SKevin.Ge@Sun.COM 	 */
97211534SKevin.Ge@Sun.COM 	kmem_free(state->rc_srq_rwqes,
97311534SKevin.Ge@Sun.COM 	    state->rc_srq_size * sizeof (ibd_rwqe_t));
97411534SKevin.Ge@Sun.COM 	kmem_free(state->rc_srq_rx_bufs, state->rc_srq_size * rc_rx_buf_sz);
97511534SKevin.Ge@Sun.COM 	state->rc_srq_rwqes = NULL;
97611534SKevin.Ge@Sun.COM 	state->rc_srq_rx_bufs = NULL;
97711534SKevin.Ge@Sun.COM }
97811534SKevin.Ge@Sun.COM 
97911534SKevin.Ge@Sun.COM /*
98011534SKevin.Ge@Sun.COM  * Allocate and post a certain number of SRQ receive buffers and WRs.
98111534SKevin.Ge@Sun.COM  */
98211534SKevin.Ge@Sun.COM int
ibd_rc_init_srq_list(ibd_state_t * state)98311534SKevin.Ge@Sun.COM ibd_rc_init_srq_list(ibd_state_t *state)
98411534SKevin.Ge@Sun.COM {
98511534SKevin.Ge@Sun.COM 	ibd_rwqe_t *rwqe;
98611534SKevin.Ge@Sun.COM 	ibt_lkey_t lkey;
98711534SKevin.Ge@Sun.COM 	int i;
98811534SKevin.Ge@Sun.COM 	uint_t len;
98911534SKevin.Ge@Sun.COM 	uint8_t *bufaddr;
99011534SKevin.Ge@Sun.COM 	ibt_srq_sizes_t srq_sizes;
99111534SKevin.Ge@Sun.COM 	ibt_srq_sizes_t	 srq_real_sizes;
99211534SKevin.Ge@Sun.COM 	ibt_status_t ret;
99311534SKevin.Ge@Sun.COM 
99411534SKevin.Ge@Sun.COM 	srq_sizes.srq_sgl_sz = 1;
99512163SRamaswamy.Tummala@Sun.COM 	srq_sizes.srq_wr_sz = state->id_rc_num_srq;
99611534SKevin.Ge@Sun.COM 	ret = ibt_alloc_srq(state->id_hca_hdl, IBT_SRQ_NO_FLAGS,
99711534SKevin.Ge@Sun.COM 	    state->id_pd_hdl, &srq_sizes, &state->rc_srq_hdl, &srq_real_sizes);
99811534SKevin.Ge@Sun.COM 	if (ret != IBT_SUCCESS) {
99913030SKevin.Ge@Sun.COM 		/*
100013030SKevin.Ge@Sun.COM 		 * The following code is for CR 6932460 (can't configure ibd
100113030SKevin.Ge@Sun.COM 		 * interface on 32 bits x86 systems). 32 bits x86 system has
100213030SKevin.Ge@Sun.COM 		 * less memory resource than 64 bits x86 system. If current
100313030SKevin.Ge@Sun.COM 		 * resource request can't be satisfied, we request less
100413030SKevin.Ge@Sun.COM 		 * resource here.
100513030SKevin.Ge@Sun.COM 		 */
100613030SKevin.Ge@Sun.COM 		len = state->id_rc_num_srq;
100713030SKevin.Ge@Sun.COM 		while ((ret == IBT_HCA_WR_EXCEEDED) &&
100813030SKevin.Ge@Sun.COM 		    (len >= 2 * IBD_RC_MIN_CQ_SIZE)) {
100913030SKevin.Ge@Sun.COM 			len = len/2;
101013030SKevin.Ge@Sun.COM 			srq_sizes.srq_sgl_sz = 1;
101113030SKevin.Ge@Sun.COM 			srq_sizes.srq_wr_sz = len;
101213030SKevin.Ge@Sun.COM 			ret = ibt_alloc_srq(state->id_hca_hdl,
101313030SKevin.Ge@Sun.COM 			    IBT_SRQ_NO_FLAGS, state->id_pd_hdl, &srq_sizes,
101413030SKevin.Ge@Sun.COM 			    &state->rc_srq_hdl, &srq_real_sizes);
101513030SKevin.Ge@Sun.COM 		}
101613030SKevin.Ge@Sun.COM 		if (ret != IBT_SUCCESS) {
101713030SKevin.Ge@Sun.COM 			DPRINT(10, "ibd_rc_init_srq_list: ibt_alloc_srq failed."
101813030SKevin.Ge@Sun.COM 			    "req_sgl_sz=%d, req_wr_sz=0x%x, final_req_wr_sz="
101913030SKevin.Ge@Sun.COM 			    "0x%x, ret=%d", srq_sizes.srq_sgl_sz,
102013030SKevin.Ge@Sun.COM 			    srq_sizes.srq_wr_sz, len, ret);
102113030SKevin.Ge@Sun.COM 			return (DDI_FAILURE);
102213030SKevin.Ge@Sun.COM 		}
102313030SKevin.Ge@Sun.COM 		state->id_rc_num_srq = len;
102413030SKevin.Ge@Sun.COM 		state->id_rc_num_rwqe = state->id_rc_num_srq + 1;
102511534SKevin.Ge@Sun.COM 	}
102611534SKevin.Ge@Sun.COM 
102711534SKevin.Ge@Sun.COM 	state->rc_srq_size = srq_real_sizes.srq_wr_sz;
102811534SKevin.Ge@Sun.COM 	if (ibd_rc_alloc_srq_copybufs(state) != DDI_SUCCESS) {
102911534SKevin.Ge@Sun.COM 		ret = ibt_free_srq(state->rc_srq_hdl);
103011534SKevin.Ge@Sun.COM 		if (ret != IBT_SUCCESS) {
103111534SKevin.Ge@Sun.COM 			ibd_print_warn(state, "ibd_rc_init_srq_list: "
103211534SKevin.Ge@Sun.COM 			    "ibt_free_srq fail, ret=%d", ret);
103311534SKevin.Ge@Sun.COM 		}
103411534SKevin.Ge@Sun.COM 		return (DDI_FAILURE);
103511534SKevin.Ge@Sun.COM 	}
103611534SKevin.Ge@Sun.COM 
103711534SKevin.Ge@Sun.COM 	/*
103811534SKevin.Ge@Sun.COM 	 * Allocate and setup the rwqe list
103911534SKevin.Ge@Sun.COM 	 */
104011534SKevin.Ge@Sun.COM 	lkey = state->rc_srq_rx_mr_desc.md_lkey;
104111534SKevin.Ge@Sun.COM 	rwqe = state->rc_srq_rwqes;
104211534SKevin.Ge@Sun.COM 	bufaddr = state->rc_srq_rx_bufs;
104311534SKevin.Ge@Sun.COM 	len = state->rc_mtu + IPOIB_GRH_SIZE;
104411534SKevin.Ge@Sun.COM 	state->rc_srq_rwqe_list.dl_cnt = 0;
104511534SKevin.Ge@Sun.COM 	state->rc_srq_rwqe_list.dl_bufs_outstanding = 0;
104611534SKevin.Ge@Sun.COM 	for (i = 0; i < state->rc_srq_size; i++, rwqe++, bufaddr += len) {
104711534SKevin.Ge@Sun.COM 		rwqe->w_state = state;
104811534SKevin.Ge@Sun.COM 		rwqe->w_freeing_wqe = B_FALSE;
104911534SKevin.Ge@Sun.COM 		rwqe->w_freemsg_cb.free_func = ibd_rc_srq_freemsg_cb;
105011534SKevin.Ge@Sun.COM 		rwqe->w_freemsg_cb.free_arg = (char *)rwqe;
105111534SKevin.Ge@Sun.COM 		rwqe->rwqe_copybuf.ic_bufaddr = bufaddr;
105211534SKevin.Ge@Sun.COM 
105311534SKevin.Ge@Sun.COM 		if ((rwqe->rwqe_im_mblk = desballoc(bufaddr, len, 0,
105411534SKevin.Ge@Sun.COM 		    &rwqe->w_freemsg_cb)) == NULL) {
105511534SKevin.Ge@Sun.COM 			DPRINT(40, "ibd_rc_init_srq_list : desballoc() failed");
105611534SKevin.Ge@Sun.COM 			rwqe->rwqe_copybuf.ic_bufaddr = NULL;
105711642SKevin.Ge@Sun.COM 			if (atomic_dec_32_nv(&state->id_running) != 0) {
105811642SKevin.Ge@Sun.COM 				cmn_err(CE_WARN, "ibd_rc_init_srq_list: "
105911642SKevin.Ge@Sun.COM 				    "id_running was not 1\n");
106011642SKevin.Ge@Sun.COM 			}
106111534SKevin.Ge@Sun.COM 			ibd_rc_fini_srq_list(state);
106211642SKevin.Ge@Sun.COM 			atomic_inc_32(&state->id_running);
106311534SKevin.Ge@Sun.COM 			return (DDI_FAILURE);
106411534SKevin.Ge@Sun.COM 		}
106511534SKevin.Ge@Sun.COM 
106611534SKevin.Ge@Sun.COM 		rwqe->rwqe_copybuf.ic_sgl.ds_key = lkey;
106711534SKevin.Ge@Sun.COM 		/* Leave IPOIB_GRH_SIZE space */
106811534SKevin.Ge@Sun.COM 		rwqe->rwqe_copybuf.ic_sgl.ds_va =
106911534SKevin.Ge@Sun.COM 		    (ib_vaddr_t)(uintptr_t)(bufaddr + IPOIB_GRH_SIZE);
107011534SKevin.Ge@Sun.COM 		rwqe->rwqe_copybuf.ic_sgl.ds_len = state->rc_mtu;
107111534SKevin.Ge@Sun.COM 		rwqe->w_rwr.wr_id = (ibt_wrid_t)(uintptr_t)rwqe;
107211534SKevin.Ge@Sun.COM 		rwqe->w_rwr.wr_nds = 1;
107311534SKevin.Ge@Sun.COM 		rwqe->w_rwr.wr_sgl = &rwqe->rwqe_copybuf.ic_sgl;
107411534SKevin.Ge@Sun.COM 		(void) ibd_rc_post_srq(state, rwqe);
107511534SKevin.Ge@Sun.COM 	}
107611534SKevin.Ge@Sun.COM 
107711642SKevin.Ge@Sun.COM 	mutex_enter(&state->rc_srq_free_list.dl_mutex);
107811642SKevin.Ge@Sun.COM 	state->rc_srq_free_list.dl_head = NULL;
107911642SKevin.Ge@Sun.COM 	state->rc_srq_free_list.dl_cnt = 0;
108011642SKevin.Ge@Sun.COM 	mutex_exit(&state->rc_srq_free_list.dl_mutex);
108111642SKevin.Ge@Sun.COM 
108211534SKevin.Ge@Sun.COM 	return (DDI_SUCCESS);
108311534SKevin.Ge@Sun.COM }
108411534SKevin.Ge@Sun.COM 
108511534SKevin.Ge@Sun.COM /*
108611534SKevin.Ge@Sun.COM  * Free the statically allocated Rx buffer list for SRQ.
108711534SKevin.Ge@Sun.COM  */
108811534SKevin.Ge@Sun.COM void
ibd_rc_fini_srq_list(ibd_state_t * state)108911534SKevin.Ge@Sun.COM ibd_rc_fini_srq_list(ibd_state_t *state)
109011534SKevin.Ge@Sun.COM {
109111534SKevin.Ge@Sun.COM 	ibd_rwqe_t *rwqe;
109211534SKevin.Ge@Sun.COM 	int i;
109311534SKevin.Ge@Sun.COM 	ibt_status_t ret;
109411534SKevin.Ge@Sun.COM 
109511642SKevin.Ge@Sun.COM 	ASSERT(state->id_running == 0);
109611534SKevin.Ge@Sun.COM 	ret = ibt_free_srq(state->rc_srq_hdl);
109711534SKevin.Ge@Sun.COM 	if (ret != IBT_SUCCESS) {
109811534SKevin.Ge@Sun.COM 		ibd_print_warn(state, "ibd_rc_fini_srq_list: "
109911534SKevin.Ge@Sun.COM 		    "ibt_free_srq fail, ret=%d", ret);
110011534SKevin.Ge@Sun.COM 	}
110111534SKevin.Ge@Sun.COM 
110211534SKevin.Ge@Sun.COM 	mutex_enter(&state->rc_srq_rwqe_list.dl_mutex);
110311534SKevin.Ge@Sun.COM 	rwqe = state->rc_srq_rwqes;
110411534SKevin.Ge@Sun.COM 	for (i = 0; i < state->rc_srq_size; i++, rwqe++) {
110511534SKevin.Ge@Sun.COM 		if (rwqe->rwqe_im_mblk != NULL) {
110611534SKevin.Ge@Sun.COM 			rwqe->w_freeing_wqe = B_TRUE;
110711534SKevin.Ge@Sun.COM 			freemsg(rwqe->rwqe_im_mblk);
110811534SKevin.Ge@Sun.COM 		}
110911534SKevin.Ge@Sun.COM 	}
111011534SKevin.Ge@Sun.COM 	mutex_exit(&state->rc_srq_rwqe_list.dl_mutex);
111111534SKevin.Ge@Sun.COM 
111211534SKevin.Ge@Sun.COM 	ibd_rc_free_srq_copybufs(state);
111311534SKevin.Ge@Sun.COM }
111411534SKevin.Ge@Sun.COM 
111511642SKevin.Ge@Sun.COM /* Repost the elements in state->ib_rc_free_list */
111611642SKevin.Ge@Sun.COM int
ibd_rc_repost_srq_free_list(ibd_state_t * state)111711642SKevin.Ge@Sun.COM ibd_rc_repost_srq_free_list(ibd_state_t *state)
111811642SKevin.Ge@Sun.COM {
111911642SKevin.Ge@Sun.COM 	ibd_rwqe_t *rwqe;
112011642SKevin.Ge@Sun.COM 	ibd_wqe_t *list;
112111642SKevin.Ge@Sun.COM 	uint_t len;
112211642SKevin.Ge@Sun.COM 
112311642SKevin.Ge@Sun.COM 	mutex_enter(&state->rc_srq_free_list.dl_mutex);
112411642SKevin.Ge@Sun.COM 	if (state->rc_srq_free_list.dl_head != NULL) {
112511642SKevin.Ge@Sun.COM 		/* repost them */
112611642SKevin.Ge@Sun.COM 		len = state->rc_mtu + IPOIB_GRH_SIZE;
112711642SKevin.Ge@Sun.COM 		list = state->rc_srq_free_list.dl_head;
112811642SKevin.Ge@Sun.COM 		state->rc_srq_free_list.dl_head = NULL;
112911642SKevin.Ge@Sun.COM 		state->rc_srq_free_list.dl_cnt = 0;
113011642SKevin.Ge@Sun.COM 		mutex_exit(&state->rc_srq_free_list.dl_mutex);
113111642SKevin.Ge@Sun.COM 		while (list != NULL) {
113211642SKevin.Ge@Sun.COM 			rwqe = WQE_TO_RWQE(list);
113311642SKevin.Ge@Sun.COM 			if ((rwqe->rwqe_im_mblk == NULL) &&
113411642SKevin.Ge@Sun.COM 			    ((rwqe->rwqe_im_mblk = desballoc(
113511642SKevin.Ge@Sun.COM 			    rwqe->rwqe_copybuf.ic_bufaddr, len, 0,
113611642SKevin.Ge@Sun.COM 			    &rwqe->w_freemsg_cb)) == NULL)) {
113711642SKevin.Ge@Sun.COM 				DPRINT(40, "ibd_rc_repost_srq_free_list: "
113811642SKevin.Ge@Sun.COM 				    "failed in desballoc()");
113911642SKevin.Ge@Sun.COM 				do {
114011642SKevin.Ge@Sun.COM 					ibd_rc_srq_free_rwqe(state, rwqe);
114111642SKevin.Ge@Sun.COM 					list = list->w_next;
114211642SKevin.Ge@Sun.COM 					rwqe = WQE_TO_RWQE(list);
114311642SKevin.Ge@Sun.COM 				} while (list != NULL);
114411642SKevin.Ge@Sun.COM 				return (DDI_FAILURE);
114511642SKevin.Ge@Sun.COM 			}
114611642SKevin.Ge@Sun.COM 			if (ibd_rc_post_srq(state, rwqe) == DDI_FAILURE) {
114711642SKevin.Ge@Sun.COM 				ibd_rc_srq_free_rwqe(state, rwqe);
114811642SKevin.Ge@Sun.COM 			}
114911642SKevin.Ge@Sun.COM 			list = list->w_next;
115011642SKevin.Ge@Sun.COM 		}
115111642SKevin.Ge@Sun.COM 		return (DDI_SUCCESS);
115211642SKevin.Ge@Sun.COM 	}
115311642SKevin.Ge@Sun.COM 	mutex_exit(&state->rc_srq_free_list.dl_mutex);
115411642SKevin.Ge@Sun.COM 	return (DDI_SUCCESS);
115511642SKevin.Ge@Sun.COM }
115611642SKevin.Ge@Sun.COM 
115711534SKevin.Ge@Sun.COM /*
115811534SKevin.Ge@Sun.COM  * Free an allocated recv wqe.
115911534SKevin.Ge@Sun.COM  */
116011642SKevin.Ge@Sun.COM static void
ibd_rc_srq_free_rwqe(ibd_state_t * state,ibd_rwqe_t * rwqe)116111534SKevin.Ge@Sun.COM ibd_rc_srq_free_rwqe(ibd_state_t *state, ibd_rwqe_t *rwqe)
116211534SKevin.Ge@Sun.COM {
116311534SKevin.Ge@Sun.COM 	/*
116411534SKevin.Ge@Sun.COM 	 * desballoc() failed (no memory) or the posting of rwqe failed.
116511534SKevin.Ge@Sun.COM 	 *
116611534SKevin.Ge@Sun.COM 	 * This rwqe is placed on a free list so that it
116711534SKevin.Ge@Sun.COM 	 * can be reinstated in future.
116811534SKevin.Ge@Sun.COM 	 *
116911534SKevin.Ge@Sun.COM 	 * NOTE: no code currently exists to reinstate
117011534SKevin.Ge@Sun.COM 	 * these "lost" rwqes.
117111534SKevin.Ge@Sun.COM 	 */
117211534SKevin.Ge@Sun.COM 	mutex_enter(&state->rc_srq_free_list.dl_mutex);
117311534SKevin.Ge@Sun.COM 	state->rc_srq_free_list.dl_cnt++;
117411534SKevin.Ge@Sun.COM 	rwqe->rwqe_next = state->rc_srq_free_list.dl_head;
117511534SKevin.Ge@Sun.COM 	state->rc_srq_free_list.dl_head = RWQE_TO_WQE(rwqe);
117611534SKevin.Ge@Sun.COM 	mutex_exit(&state->rc_srq_free_list.dl_mutex);
117711534SKevin.Ge@Sun.COM }
117811534SKevin.Ge@Sun.COM 
117911534SKevin.Ge@Sun.COM static void
ibd_rc_srq_freemsg_cb(char * arg)118011534SKevin.Ge@Sun.COM ibd_rc_srq_freemsg_cb(char *arg)
118111534SKevin.Ge@Sun.COM {
118211534SKevin.Ge@Sun.COM 	ibd_rwqe_t *rwqe = (ibd_rwqe_t *)arg;
118311534SKevin.Ge@Sun.COM 	ibd_state_t *state = rwqe->w_state;
118411534SKevin.Ge@Sun.COM 
118511534SKevin.Ge@Sun.COM 	ASSERT(state->rc_enable_srq);
118611534SKevin.Ge@Sun.COM 
118711534SKevin.Ge@Sun.COM 	/*
118811642SKevin.Ge@Sun.COM 	 * If the driver is stopped, just free the rwqe.
118911534SKevin.Ge@Sun.COM 	 */
119011642SKevin.Ge@Sun.COM 	if (atomic_add_32_nv(&state->id_running, 0) == 0) {
119111642SKevin.Ge@Sun.COM 		if (!rwqe->w_freeing_wqe) {
119211642SKevin.Ge@Sun.COM 			atomic_dec_32(
119311642SKevin.Ge@Sun.COM 			    &state->rc_srq_rwqe_list.dl_bufs_outstanding);
119411642SKevin.Ge@Sun.COM 			DPRINT(6, "ibd_rc_srq_freemsg_cb: wqe being freed");
119511642SKevin.Ge@Sun.COM 			rwqe->rwqe_im_mblk = NULL;
119611642SKevin.Ge@Sun.COM 			ibd_rc_srq_free_rwqe(state, rwqe);
119711642SKevin.Ge@Sun.COM 		}
119811534SKevin.Ge@Sun.COM 		return;
119911534SKevin.Ge@Sun.COM 	}
120011534SKevin.Ge@Sun.COM 
120111642SKevin.Ge@Sun.COM 	atomic_dec_32(&state->rc_srq_rwqe_list.dl_bufs_outstanding);
120211642SKevin.Ge@Sun.COM 
120311534SKevin.Ge@Sun.COM 	ASSERT(state->rc_srq_rwqe_list.dl_cnt < state->rc_srq_size);
120411642SKevin.Ge@Sun.COM 	ASSERT(!rwqe->w_freeing_wqe);
120511534SKevin.Ge@Sun.COM 
120611534SKevin.Ge@Sun.COM 	/*
120711534SKevin.Ge@Sun.COM 	 * Upper layer has released held mblk, so we have
120811534SKevin.Ge@Sun.COM 	 * no more use for keeping the old pointer in
120911534SKevin.Ge@Sun.COM 	 * our rwqe.
121011534SKevin.Ge@Sun.COM 	 */
121111534SKevin.Ge@Sun.COM 	rwqe->rwqe_im_mblk = desballoc(rwqe->rwqe_copybuf.ic_bufaddr,
121211534SKevin.Ge@Sun.COM 	    state->rc_mtu + IPOIB_GRH_SIZE, 0, &rwqe->w_freemsg_cb);
121311534SKevin.Ge@Sun.COM 	if (rwqe->rwqe_im_mblk == NULL) {
121411534SKevin.Ge@Sun.COM 		DPRINT(40, "ibd_rc_srq_freemsg_cb: desballoc failed");
121511534SKevin.Ge@Sun.COM 		ibd_rc_srq_free_rwqe(state, rwqe);
121611534SKevin.Ge@Sun.COM 		return;
121711534SKevin.Ge@Sun.COM 	}
121811534SKevin.Ge@Sun.COM 
121911534SKevin.Ge@Sun.COM 	if (ibd_rc_post_srq(state, rwqe) == DDI_FAILURE) {
122011642SKevin.Ge@Sun.COM 		ibd_print_warn(state, "ibd_rc_srq_freemsg_cb: ibd_rc_post_srq"
122111642SKevin.Ge@Sun.COM 		    " failed");
122211534SKevin.Ge@Sun.COM 		ibd_rc_srq_free_rwqe(state, rwqe);
122311534SKevin.Ge@Sun.COM 		return;
122411534SKevin.Ge@Sun.COM 	}
122511534SKevin.Ge@Sun.COM }
122611534SKevin.Ge@Sun.COM 
122711534SKevin.Ge@Sun.COM /*
122811534SKevin.Ge@Sun.COM  * Post a rwqe to the hardware and add it to the Rx list.
122911534SKevin.Ge@Sun.COM  */
123011534SKevin.Ge@Sun.COM static int
ibd_rc_post_srq(ibd_state_t * state,ibd_rwqe_t * rwqe)123111534SKevin.Ge@Sun.COM ibd_rc_post_srq(ibd_state_t *state, ibd_rwqe_t *rwqe)
123211534SKevin.Ge@Sun.COM {
123311534SKevin.Ge@Sun.COM 	/*
123411534SKevin.Ge@Sun.COM 	 * Here we should add dl_cnt before post recv, because
123511534SKevin.Ge@Sun.COM 	 * we would have to make sure dl_cnt is updated before
123611534SKevin.Ge@Sun.COM 	 * the corresponding ibd_rc_process_rx() is called.
123711534SKevin.Ge@Sun.COM 	 */
123811534SKevin.Ge@Sun.COM 	ASSERT(state->rc_srq_rwqe_list.dl_cnt < state->rc_srq_size);
123911534SKevin.Ge@Sun.COM 	atomic_add_32(&state->rc_srq_rwqe_list.dl_cnt, 1);
124011534SKevin.Ge@Sun.COM 	if (ibt_post_srq(state->rc_srq_hdl, &rwqe->w_rwr, 1, NULL) !=
124111534SKevin.Ge@Sun.COM 	    IBT_SUCCESS) {
124211534SKevin.Ge@Sun.COM 		atomic_dec_32(&state->rc_srq_rwqe_list.dl_cnt);
124311534SKevin.Ge@Sun.COM 		DPRINT(40, "ibd_rc_post_srq : ibt_post_srq() failed");
124411534SKevin.Ge@Sun.COM 		return (DDI_FAILURE);
124511534SKevin.Ge@Sun.COM 	}
124611534SKevin.Ge@Sun.COM 
124711534SKevin.Ge@Sun.COM 	return (DDI_SUCCESS);
124811534SKevin.Ge@Sun.COM }
124911534SKevin.Ge@Sun.COM 
125011534SKevin.Ge@Sun.COM /*
125111534SKevin.Ge@Sun.COM  * Post a rwqe to the hardware and add it to the Rx list.
125211534SKevin.Ge@Sun.COM  */
125311534SKevin.Ge@Sun.COM static int
ibd_rc_post_rwqe(ibd_rc_chan_t * chan,ibd_rwqe_t * rwqe)125411534SKevin.Ge@Sun.COM ibd_rc_post_rwqe(ibd_rc_chan_t *chan, ibd_rwqe_t *rwqe)
125511534SKevin.Ge@Sun.COM {
125611534SKevin.Ge@Sun.COM 	/*
125711534SKevin.Ge@Sun.COM 	 * Here we should add dl_cnt before post recv, because we would
125811534SKevin.Ge@Sun.COM 	 * have to make sure dl_cnt has already updated before
125911534SKevin.Ge@Sun.COM 	 * corresponding ibd_rc_process_rx() is called.
126011534SKevin.Ge@Sun.COM 	 */
126111534SKevin.Ge@Sun.COM 	atomic_add_32(&chan->rx_wqe_list.dl_cnt, 1);
126211534SKevin.Ge@Sun.COM 	if (ibt_post_recv(chan->chan_hdl, &rwqe->w_rwr, 1, NULL) !=
126311534SKevin.Ge@Sun.COM 	    IBT_SUCCESS) {
126411534SKevin.Ge@Sun.COM 		atomic_dec_32(&chan->rx_wqe_list.dl_cnt);
126511534SKevin.Ge@Sun.COM 		DPRINT(40, "ibd_rc_post_rwqe : failed in ibt_post_recv()");
126611534SKevin.Ge@Sun.COM 		return (DDI_FAILURE);
126711534SKevin.Ge@Sun.COM 	}
126811534SKevin.Ge@Sun.COM 	return (DDI_SUCCESS);
126911534SKevin.Ge@Sun.COM }
127011534SKevin.Ge@Sun.COM 
127111534SKevin.Ge@Sun.COM static int
ibd_rc_alloc_rx_copybufs(ibd_rc_chan_t * chan)127211534SKevin.Ge@Sun.COM ibd_rc_alloc_rx_copybufs(ibd_rc_chan_t *chan)
127311534SKevin.Ge@Sun.COM {
127411534SKevin.Ge@Sun.COM 	ibd_state_t *state = chan->state;
127511534SKevin.Ge@Sun.COM 	ibt_mr_attr_t mem_attr;
127611534SKevin.Ge@Sun.COM 	uint_t rc_rx_bufs_sz;
127711534SKevin.Ge@Sun.COM 
127811534SKevin.Ge@Sun.COM 	/*
127911534SKevin.Ge@Sun.COM 	 * Allocate one big chunk for all regular rx copy bufs
128011534SKevin.Ge@Sun.COM 	 */
128111534SKevin.Ge@Sun.COM 	rc_rx_bufs_sz = (state->rc_mtu + IPOIB_GRH_SIZE) * chan->rcq_size;
128211534SKevin.Ge@Sun.COM 
128311534SKevin.Ge@Sun.COM 	chan->rx_bufs = kmem_zalloc(rc_rx_bufs_sz, KM_SLEEP);
128411534SKevin.Ge@Sun.COM 
128511534SKevin.Ge@Sun.COM 	chan->rx_rwqes = kmem_zalloc(chan->rcq_size *
128611534SKevin.Ge@Sun.COM 	    sizeof (ibd_rwqe_t), KM_SLEEP);
128711534SKevin.Ge@Sun.COM 
128811534SKevin.Ge@Sun.COM 	/*
128911534SKevin.Ge@Sun.COM 	 * Do one memory registration on the entire rxbuf area
129011534SKevin.Ge@Sun.COM 	 */
129111534SKevin.Ge@Sun.COM 	mem_attr.mr_vaddr = (uint64_t)(uintptr_t)chan->rx_bufs;
129211534SKevin.Ge@Sun.COM 	mem_attr.mr_len = rc_rx_bufs_sz;
129311534SKevin.Ge@Sun.COM 	mem_attr.mr_as = NULL;
129411534SKevin.Ge@Sun.COM 	mem_attr.mr_flags = IBT_MR_SLEEP | IBT_MR_ENABLE_LOCAL_WRITE;
129511534SKevin.Ge@Sun.COM 	if (ibt_register_mr(state->id_hca_hdl, state->id_pd_hdl, &mem_attr,
129611534SKevin.Ge@Sun.COM 	    &chan->rx_mr_hdl, &chan->rx_mr_desc) != IBT_SUCCESS) {
129711534SKevin.Ge@Sun.COM 		DPRINT(40, "ibd_rc_alloc_srq_copybufs: ibt_register_mr failed");
129811534SKevin.Ge@Sun.COM 		kmem_free(chan->rx_rwqes, chan->rcq_size * sizeof (ibd_rwqe_t));
129911534SKevin.Ge@Sun.COM 		kmem_free(chan->rx_bufs, rc_rx_bufs_sz);
130011534SKevin.Ge@Sun.COM 		chan->rx_bufs = NULL;
130111534SKevin.Ge@Sun.COM 		chan->rx_rwqes = NULL;
130211534SKevin.Ge@Sun.COM 		return (DDI_FAILURE);
130311534SKevin.Ge@Sun.COM 	}
130411534SKevin.Ge@Sun.COM 
130511534SKevin.Ge@Sun.COM 	return (DDI_SUCCESS);
130611534SKevin.Ge@Sun.COM }
130711534SKevin.Ge@Sun.COM 
130811534SKevin.Ge@Sun.COM static void
ibd_rc_free_rx_copybufs(ibd_rc_chan_t * chan)130911534SKevin.Ge@Sun.COM ibd_rc_free_rx_copybufs(ibd_rc_chan_t *chan)
131011534SKevin.Ge@Sun.COM {
131111534SKevin.Ge@Sun.COM 	ibd_state_t *state = chan->state;
131211534SKevin.Ge@Sun.COM 	uint_t rc_rx_buf_sz;
131311534SKevin.Ge@Sun.COM 
131411534SKevin.Ge@Sun.COM 	ASSERT(!state->rc_enable_srq);
131511534SKevin.Ge@Sun.COM 	ASSERT(chan->rx_rwqes != NULL);
131611534SKevin.Ge@Sun.COM 	ASSERT(chan->rx_bufs != NULL);
131711534SKevin.Ge@Sun.COM 
131811534SKevin.Ge@Sun.COM 	/*
131911534SKevin.Ge@Sun.COM 	 * Don't change the value of state->rc_mtu at the period from call
132011534SKevin.Ge@Sun.COM 	 * ibd_rc_alloc_rx_copybufs() to call ibd_rc_free_rx_copybufs().
132111534SKevin.Ge@Sun.COM 	 */
132211534SKevin.Ge@Sun.COM 	rc_rx_buf_sz = state->rc_mtu + IPOIB_GRH_SIZE;
132311534SKevin.Ge@Sun.COM 
132411534SKevin.Ge@Sun.COM 	/*
132511534SKevin.Ge@Sun.COM 	 * Unregister rxbuf mr
132611534SKevin.Ge@Sun.COM 	 */
132711534SKevin.Ge@Sun.COM 	if (ibt_deregister_mr(state->id_hca_hdl,
132811534SKevin.Ge@Sun.COM 	    chan->rx_mr_hdl) != IBT_SUCCESS) {
132911534SKevin.Ge@Sun.COM 		DPRINT(40, "ibd_rc_free_rx_copybufs: ibt_deregister_mr failed");
133011534SKevin.Ge@Sun.COM 	}
133111534SKevin.Ge@Sun.COM 	chan->rx_mr_hdl = NULL;
133211534SKevin.Ge@Sun.COM 
133311534SKevin.Ge@Sun.COM 	/*
133411534SKevin.Ge@Sun.COM 	 * Free rxbuf memory
133511534SKevin.Ge@Sun.COM 	 */
133611534SKevin.Ge@Sun.COM 	kmem_free(chan->rx_rwqes, chan->rcq_size * sizeof (ibd_rwqe_t));
133711534SKevin.Ge@Sun.COM 	chan->rx_rwqes = NULL;
133811534SKevin.Ge@Sun.COM 
133911534SKevin.Ge@Sun.COM 	kmem_free(chan->rx_bufs, chan->rcq_size * rc_rx_buf_sz);
134011534SKevin.Ge@Sun.COM 	chan->rx_bufs = NULL;
134111534SKevin.Ge@Sun.COM }
134211534SKevin.Ge@Sun.COM 
134311534SKevin.Ge@Sun.COM /*
134411534SKevin.Ge@Sun.COM  * Post a certain number of receive buffers and WRs on a RC channel.
134511534SKevin.Ge@Sun.COM  */
134611534SKevin.Ge@Sun.COM static int
ibd_rc_init_rxlist(ibd_rc_chan_t * chan)134711534SKevin.Ge@Sun.COM ibd_rc_init_rxlist(ibd_rc_chan_t *chan)
134811534SKevin.Ge@Sun.COM {
134911534SKevin.Ge@Sun.COM 	ibd_state_t *state = chan->state;
135011534SKevin.Ge@Sun.COM 	ibd_rwqe_t *rwqe;
135111534SKevin.Ge@Sun.COM 	ibt_lkey_t lkey;
135211534SKevin.Ge@Sun.COM 	int i;
135311534SKevin.Ge@Sun.COM 	uint_t len;
135411534SKevin.Ge@Sun.COM 	uint8_t *bufaddr;
135511534SKevin.Ge@Sun.COM 
135611534SKevin.Ge@Sun.COM 	ASSERT(!state->rc_enable_srq);
135711534SKevin.Ge@Sun.COM 	if (ibd_rc_alloc_rx_copybufs(chan) != DDI_SUCCESS)
135811534SKevin.Ge@Sun.COM 		return (DDI_FAILURE);
135911534SKevin.Ge@Sun.COM 
136011534SKevin.Ge@Sun.COM 	/*
136111534SKevin.Ge@Sun.COM 	 * Allocate and setup the rwqe list
136211534SKevin.Ge@Sun.COM 	 */
136311534SKevin.Ge@Sun.COM 	lkey = chan->rx_mr_desc.md_lkey;
136411534SKevin.Ge@Sun.COM 	rwqe = chan->rx_rwqes;
136511534SKevin.Ge@Sun.COM 	bufaddr = chan->rx_bufs;
136611534SKevin.Ge@Sun.COM 	len = state->rc_mtu + IPOIB_GRH_SIZE;
136711534SKevin.Ge@Sun.COM 	for (i = 0; i < chan->rcq_size; i++, rwqe++, bufaddr += len) {
136811534SKevin.Ge@Sun.COM 		rwqe->w_state = state;
136911534SKevin.Ge@Sun.COM 		rwqe->w_chan = chan;
137011534SKevin.Ge@Sun.COM 		rwqe->w_freeing_wqe = B_FALSE;
137111534SKevin.Ge@Sun.COM 		rwqe->w_freemsg_cb.free_func = ibd_rc_freemsg_cb;
137211534SKevin.Ge@Sun.COM 		rwqe->w_freemsg_cb.free_arg = (char *)rwqe;
137311534SKevin.Ge@Sun.COM 		rwqe->rwqe_copybuf.ic_bufaddr = bufaddr;
137411534SKevin.Ge@Sun.COM 
137511534SKevin.Ge@Sun.COM 		if ((rwqe->rwqe_im_mblk = desballoc(bufaddr, len, 0,
137611534SKevin.Ge@Sun.COM 		    &rwqe->w_freemsg_cb)) == NULL) {
137711534SKevin.Ge@Sun.COM 			DPRINT(40, "ibd_rc_init_srq_list: desballoc() failed");
137811534SKevin.Ge@Sun.COM 			rwqe->rwqe_copybuf.ic_bufaddr = NULL;
137911534SKevin.Ge@Sun.COM 			ibd_rc_fini_rxlist(chan);
138011534SKevin.Ge@Sun.COM 			return (DDI_FAILURE);
138111534SKevin.Ge@Sun.COM 		}
138211534SKevin.Ge@Sun.COM 
138311534SKevin.Ge@Sun.COM 		rwqe->rwqe_copybuf.ic_sgl.ds_key = lkey;
138411534SKevin.Ge@Sun.COM 		rwqe->rwqe_copybuf.ic_sgl.ds_va =
138511534SKevin.Ge@Sun.COM 		    (ib_vaddr_t)(uintptr_t)(bufaddr + IPOIB_GRH_SIZE);
138611534SKevin.Ge@Sun.COM 		rwqe->rwqe_copybuf.ic_sgl.ds_len = state->rc_mtu;
138711534SKevin.Ge@Sun.COM 		rwqe->w_rwr.wr_id = (ibt_wrid_t)(uintptr_t)rwqe;
138811534SKevin.Ge@Sun.COM 		rwqe->w_rwr.wr_nds = 1;
138911534SKevin.Ge@Sun.COM 		rwqe->w_rwr.wr_sgl = &rwqe->rwqe_copybuf.ic_sgl;
139011534SKevin.Ge@Sun.COM 		(void) ibd_rc_post_rwqe(chan, rwqe);
139111534SKevin.Ge@Sun.COM 	}
139211534SKevin.Ge@Sun.COM 
139311534SKevin.Ge@Sun.COM 	return (DDI_SUCCESS);
139411534SKevin.Ge@Sun.COM }
139511534SKevin.Ge@Sun.COM 
139611534SKevin.Ge@Sun.COM /*
139711534SKevin.Ge@Sun.COM  * Free the statically allocated Rx buffer list for SRQ.
139811534SKevin.Ge@Sun.COM  */
139911534SKevin.Ge@Sun.COM static void
ibd_rc_fini_rxlist(ibd_rc_chan_t * chan)140011534SKevin.Ge@Sun.COM ibd_rc_fini_rxlist(ibd_rc_chan_t *chan)
140111534SKevin.Ge@Sun.COM {
140211534SKevin.Ge@Sun.COM 	ibd_rwqe_t *rwqe;
140311534SKevin.Ge@Sun.COM 	int i;
140411534SKevin.Ge@Sun.COM 
140511534SKevin.Ge@Sun.COM 	if (chan->rx_bufs == NULL) {
140611534SKevin.Ge@Sun.COM 		DPRINT(40, "ibd_rc_fini_rxlist: empty chan->rx_bufs, quit");
140711534SKevin.Ge@Sun.COM 		return;
140811534SKevin.Ge@Sun.COM 	}
140911534SKevin.Ge@Sun.COM 
141011534SKevin.Ge@Sun.COM 	/* bufs_outstanding must be 0 */
141111534SKevin.Ge@Sun.COM 	ASSERT((chan->rx_wqe_list.dl_head == NULL) ||
141211534SKevin.Ge@Sun.COM 	    (chan->rx_wqe_list.dl_bufs_outstanding == 0));
141311534SKevin.Ge@Sun.COM 
141411534SKevin.Ge@Sun.COM 	mutex_enter(&chan->rx_wqe_list.dl_mutex);
141511534SKevin.Ge@Sun.COM 	rwqe = chan->rx_rwqes;
141611534SKevin.Ge@Sun.COM 	for (i = 0; i < chan->rcq_size; i++, rwqe++) {
141711534SKevin.Ge@Sun.COM 		if (rwqe->rwqe_im_mblk != NULL) {
141811534SKevin.Ge@Sun.COM 			rwqe->w_freeing_wqe = B_TRUE;
141911534SKevin.Ge@Sun.COM 			freemsg(rwqe->rwqe_im_mblk);
142011534SKevin.Ge@Sun.COM 		}
142111534SKevin.Ge@Sun.COM 	}
142211534SKevin.Ge@Sun.COM 	mutex_exit(&chan->rx_wqe_list.dl_mutex);
142311534SKevin.Ge@Sun.COM 
142411534SKevin.Ge@Sun.COM 	ibd_rc_free_rx_copybufs(chan);
142511534SKevin.Ge@Sun.COM }
142611534SKevin.Ge@Sun.COM 
142711534SKevin.Ge@Sun.COM /*
142811534SKevin.Ge@Sun.COM  * Free an allocated recv wqe.
142911534SKevin.Ge@Sun.COM  */
143011534SKevin.Ge@Sun.COM static void
ibd_rc_free_rwqe(ibd_rc_chan_t * chan,ibd_rwqe_t * rwqe)143111534SKevin.Ge@Sun.COM ibd_rc_free_rwqe(ibd_rc_chan_t *chan, ibd_rwqe_t *rwqe)
143211534SKevin.Ge@Sun.COM {
143311534SKevin.Ge@Sun.COM 	/*
143411534SKevin.Ge@Sun.COM 	 * desballoc() failed (no memory) or the posting of rwqe failed.
143511534SKevin.Ge@Sun.COM 	 *
143611534SKevin.Ge@Sun.COM 	 * This rwqe is placed on a free list so that it
143711534SKevin.Ge@Sun.COM 	 * can be reinstated in future.
143811534SKevin.Ge@Sun.COM 	 *
143911534SKevin.Ge@Sun.COM 	 * NOTE: no code currently exists to reinstate
144011534SKevin.Ge@Sun.COM 	 * these "lost" rwqes.
144111534SKevin.Ge@Sun.COM 	 */
144211534SKevin.Ge@Sun.COM 	mutex_enter(&chan->rx_free_list.dl_mutex);
144311534SKevin.Ge@Sun.COM 	chan->rx_free_list.dl_cnt++;
144411534SKevin.Ge@Sun.COM 	rwqe->rwqe_next = chan->rx_free_list.dl_head;
144511534SKevin.Ge@Sun.COM 	chan->rx_free_list.dl_head = RWQE_TO_WQE(rwqe);
144611534SKevin.Ge@Sun.COM 	mutex_exit(&chan->rx_free_list.dl_mutex);
144711534SKevin.Ge@Sun.COM }
144811534SKevin.Ge@Sun.COM 
144911534SKevin.Ge@Sun.COM /*
145011534SKevin.Ge@Sun.COM  * Processing to be done after receipt of a packet; hand off to GLD
145111534SKevin.Ge@Sun.COM  * in the format expected by GLD.
145211534SKevin.Ge@Sun.COM  */
145311534SKevin.Ge@Sun.COM static void
ibd_rc_process_rx(ibd_rc_chan_t * chan,ibd_rwqe_t * rwqe,ibt_wc_t * wc)145411534SKevin.Ge@Sun.COM ibd_rc_process_rx(ibd_rc_chan_t *chan, ibd_rwqe_t *rwqe, ibt_wc_t *wc)
145511534SKevin.Ge@Sun.COM {
145611534SKevin.Ge@Sun.COM 	ibd_state_t *state = chan->state;
145711534SKevin.Ge@Sun.COM 	ib_header_info_t *phdr;
145811534SKevin.Ge@Sun.COM 	ipoib_hdr_t *ipibp;
145911534SKevin.Ge@Sun.COM 	mblk_t *mp;
146011534SKevin.Ge@Sun.COM 	mblk_t *mpc;
146111534SKevin.Ge@Sun.COM 	int rxcnt;
146211534SKevin.Ge@Sun.COM 	ip6_t *ip6h;
146311534SKevin.Ge@Sun.COM 	int len;
146411534SKevin.Ge@Sun.COM 
146511534SKevin.Ge@Sun.COM 	/*
146611534SKevin.Ge@Sun.COM 	 * Track number handed to upper layer, and number still
146711534SKevin.Ge@Sun.COM 	 * available to receive packets.
146811534SKevin.Ge@Sun.COM 	 */
146911534SKevin.Ge@Sun.COM 	if (state->rc_enable_srq) {
147011534SKevin.Ge@Sun.COM 		rxcnt = atomic_dec_32_nv(&state->rc_srq_rwqe_list.dl_cnt);
147111534SKevin.Ge@Sun.COM 	} else {
147211534SKevin.Ge@Sun.COM 		rxcnt = atomic_dec_32_nv(&chan->rx_wqe_list.dl_cnt);
147311534SKevin.Ge@Sun.COM 	}
147411534SKevin.Ge@Sun.COM 
147511534SKevin.Ge@Sun.COM 	/*
147611534SKevin.Ge@Sun.COM 	 * It can not be a IBA multicast packet.
147711534SKevin.Ge@Sun.COM 	 */
147811534SKevin.Ge@Sun.COM 	ASSERT(!wc->wc_flags & IBT_WC_GRH_PRESENT);
147911534SKevin.Ge@Sun.COM 
148013030SKevin.Ge@Sun.COM 	/* For the connection reaper routine ibd_rc_conn_timeout_call() */
148113030SKevin.Ge@Sun.COM 	chan->is_used = B_TRUE;
148211534SKevin.Ge@Sun.COM 
148311534SKevin.Ge@Sun.COM #ifdef DEBUG
148412163SRamaswamy.Tummala@Sun.COM 	if (rxcnt < state->id_rc_rx_rwqe_thresh) {
148511534SKevin.Ge@Sun.COM 		state->rc_rwqe_short++;
148611534SKevin.Ge@Sun.COM 	}
148711534SKevin.Ge@Sun.COM #endif
148811534SKevin.Ge@Sun.COM 
148911534SKevin.Ge@Sun.COM 	/*
149011534SKevin.Ge@Sun.COM 	 * Possibly replenish the Rx pool if needed.
149111534SKevin.Ge@Sun.COM 	 */
149212163SRamaswamy.Tummala@Sun.COM 	if ((rxcnt >= state->id_rc_rx_rwqe_thresh) &&
149312163SRamaswamy.Tummala@Sun.COM 	    (wc->wc_bytes_xfer > state->id_rc_rx_copy_thresh)) {
149411534SKevin.Ge@Sun.COM 		atomic_add_64(&state->rc_rcv_trans_byte, wc->wc_bytes_xfer);
149511534SKevin.Ge@Sun.COM 		atomic_inc_64(&state->rc_rcv_trans_pkt);
149611534SKevin.Ge@Sun.COM 
149711534SKevin.Ge@Sun.COM 		/*
149811534SKevin.Ge@Sun.COM 		 * Record how many rwqe has been occupied by upper
149911534SKevin.Ge@Sun.COM 		 * network layer
150011534SKevin.Ge@Sun.COM 		 */
150111534SKevin.Ge@Sun.COM 		if (state->rc_enable_srq) {
150211534SKevin.Ge@Sun.COM 			atomic_add_32(&state->rc_srq_rwqe_list.
150311534SKevin.Ge@Sun.COM 			    dl_bufs_outstanding, 1);
150411534SKevin.Ge@Sun.COM 		} else {
150511534SKevin.Ge@Sun.COM 			atomic_add_32(&chan->rx_wqe_list.
150611534SKevin.Ge@Sun.COM 			    dl_bufs_outstanding, 1);
150711534SKevin.Ge@Sun.COM 		}
150811534SKevin.Ge@Sun.COM 		mp = rwqe->rwqe_im_mblk;
150911534SKevin.Ge@Sun.COM 	} else {
151011534SKevin.Ge@Sun.COM 		atomic_add_64(&state->rc_rcv_copy_byte, wc->wc_bytes_xfer);
151111534SKevin.Ge@Sun.COM 		atomic_inc_64(&state->rc_rcv_copy_pkt);
151211534SKevin.Ge@Sun.COM 
151311534SKevin.Ge@Sun.COM 		if ((mp = allocb(wc->wc_bytes_xfer + IPOIB_GRH_SIZE,
151411534SKevin.Ge@Sun.COM 		    BPRI_HI)) == NULL) {	/* no memory */
151511534SKevin.Ge@Sun.COM 			DPRINT(40, "ibd_rc_process_rx: allocb() failed");
151611534SKevin.Ge@Sun.COM 			state->rc_rcv_alloc_fail++;
151711534SKevin.Ge@Sun.COM 			if (state->rc_enable_srq) {
151811534SKevin.Ge@Sun.COM 				if (ibd_rc_post_srq(state, rwqe) ==
151911534SKevin.Ge@Sun.COM 				    DDI_FAILURE) {
152011534SKevin.Ge@Sun.COM 					ibd_rc_srq_free_rwqe(state, rwqe);
152111534SKevin.Ge@Sun.COM 				}
152211534SKevin.Ge@Sun.COM 			} else {
152311534SKevin.Ge@Sun.COM 				if (ibd_rc_post_rwqe(chan, rwqe) ==
152411534SKevin.Ge@Sun.COM 				    DDI_FAILURE) {
152511534SKevin.Ge@Sun.COM 					ibd_rc_free_rwqe(chan, rwqe);
152611534SKevin.Ge@Sun.COM 				}
152711534SKevin.Ge@Sun.COM 			}
152811534SKevin.Ge@Sun.COM 			return;
152911534SKevin.Ge@Sun.COM 		}
153011534SKevin.Ge@Sun.COM 
153111534SKevin.Ge@Sun.COM 		bcopy(rwqe->rwqe_im_mblk->b_rptr + IPOIB_GRH_SIZE,
153211534SKevin.Ge@Sun.COM 		    mp->b_wptr + IPOIB_GRH_SIZE, wc->wc_bytes_xfer);
153311534SKevin.Ge@Sun.COM 
153411534SKevin.Ge@Sun.COM 		if (state->rc_enable_srq) {
153511534SKevin.Ge@Sun.COM 			if (ibd_rc_post_srq(state, rwqe) == DDI_FAILURE) {
153611534SKevin.Ge@Sun.COM 				ibd_rc_srq_free_rwqe(state, rwqe);
153711534SKevin.Ge@Sun.COM 			}
153811534SKevin.Ge@Sun.COM 		} else {
153911534SKevin.Ge@Sun.COM 			if (ibd_rc_post_rwqe(chan, rwqe) == DDI_FAILURE) {
154011534SKevin.Ge@Sun.COM 				ibd_rc_free_rwqe(chan, rwqe);
154111534SKevin.Ge@Sun.COM 			}
154211534SKevin.Ge@Sun.COM 		}
154311534SKevin.Ge@Sun.COM 	}
154411534SKevin.Ge@Sun.COM 
154511534SKevin.Ge@Sun.COM 	ipibp = (ipoib_hdr_t *)((uchar_t *)mp->b_rptr + IPOIB_GRH_SIZE);
154611534SKevin.Ge@Sun.COM 	if (ntohs(ipibp->ipoib_type) == ETHERTYPE_IPV6) {
154711534SKevin.Ge@Sun.COM 		ip6h = (ip6_t *)((uchar_t *)ipibp + sizeof (ipoib_hdr_t));
154811534SKevin.Ge@Sun.COM 		len = ntohs(ip6h->ip6_plen);
154911534SKevin.Ge@Sun.COM 		if (ip6h->ip6_nxt == IPPROTO_ICMPV6) {
155011534SKevin.Ge@Sun.COM 			/* LINTED: E_CONSTANT_CONDITION */
155111534SKevin.Ge@Sun.COM 			IBD_PAD_NSNA(ip6h, len, IBD_RECV);
155211534SKevin.Ge@Sun.COM 		}
155311534SKevin.Ge@Sun.COM 	}
155411534SKevin.Ge@Sun.COM 
155511534SKevin.Ge@Sun.COM 	phdr = (ib_header_info_t *)mp->b_rptr;
155611534SKevin.Ge@Sun.COM 	phdr->ib_grh.ipoib_vertcflow = 0;
155711534SKevin.Ge@Sun.COM 	ovbcopy(&state->id_macaddr, &phdr->ib_dst,
155811534SKevin.Ge@Sun.COM 	    sizeof (ipoib_mac_t));
155911534SKevin.Ge@Sun.COM 	mp->b_wptr = mp->b_rptr + wc->wc_bytes_xfer+ IPOIB_GRH_SIZE;
156011534SKevin.Ge@Sun.COM 
156111534SKevin.Ge@Sun.COM 	/*
156211534SKevin.Ge@Sun.COM 	 * Can RC mode in IB guarantee its checksum correctness?
156311534SKevin.Ge@Sun.COM 	 *
156411534SKevin.Ge@Sun.COM 	 *	(void) hcksum_assoc(mp, NULL, NULL, 0, 0, 0, 0,
156511534SKevin.Ge@Sun.COM 	 *	    HCK_FULLCKSUM | HCK_FULLCKSUM_OK, 0);
156611534SKevin.Ge@Sun.COM 	 */
156711534SKevin.Ge@Sun.COM 
156811534SKevin.Ge@Sun.COM 	/*
156911534SKevin.Ge@Sun.COM 	 * Make sure this is NULL or we're in trouble.
157011534SKevin.Ge@Sun.COM 	 */
157111534SKevin.Ge@Sun.COM 	if (mp->b_next != NULL) {
157211534SKevin.Ge@Sun.COM 		ibd_print_warn(state,
157311534SKevin.Ge@Sun.COM 		    "ibd_rc_process_rx: got duplicate mp from rcq?");
157411534SKevin.Ge@Sun.COM 		mp->b_next = NULL;
157511534SKevin.Ge@Sun.COM 	}
157611534SKevin.Ge@Sun.COM 
157711534SKevin.Ge@Sun.COM 	/*
157811534SKevin.Ge@Sun.COM 	 * Add this mp to the list of processed mp's to send to
157911534SKevin.Ge@Sun.COM 	 * the nw layer
158011534SKevin.Ge@Sun.COM 	 */
158111534SKevin.Ge@Sun.COM 	if (state->rc_enable_srq) {
158211534SKevin.Ge@Sun.COM 		mutex_enter(&state->rc_rx_lock);
158311534SKevin.Ge@Sun.COM 		if (state->rc_rx_mp) {
158411534SKevin.Ge@Sun.COM 			ASSERT(state->rc_rx_mp_tail != NULL);
158511534SKevin.Ge@Sun.COM 			state->rc_rx_mp_tail->b_next = mp;
158611534SKevin.Ge@Sun.COM 		} else {
158711534SKevin.Ge@Sun.COM 			ASSERT(state->rc_rx_mp_tail == NULL);
158811534SKevin.Ge@Sun.COM 			state->rc_rx_mp = mp;
158911534SKevin.Ge@Sun.COM 		}
159011534SKevin.Ge@Sun.COM 
159111534SKevin.Ge@Sun.COM 		state->rc_rx_mp_tail = mp;
159211534SKevin.Ge@Sun.COM 		state->rc_rx_mp_len++;
159311534SKevin.Ge@Sun.COM 
159411534SKevin.Ge@Sun.COM 		if (state->rc_rx_mp_len  >= IBD_MAX_RX_MP_LEN) {
159511534SKevin.Ge@Sun.COM 			mpc = state->rc_rx_mp;
159611534SKevin.Ge@Sun.COM 
159711534SKevin.Ge@Sun.COM 			state->rc_rx_mp = NULL;
159811534SKevin.Ge@Sun.COM 			state->rc_rx_mp_tail = NULL;
159911534SKevin.Ge@Sun.COM 			state->rc_rx_mp_len = 0;
160011534SKevin.Ge@Sun.COM 			mutex_exit(&state->rc_rx_lock);
160111534SKevin.Ge@Sun.COM 			mac_rx(state->id_mh, NULL, mpc);
160211534SKevin.Ge@Sun.COM 		} else {
160311534SKevin.Ge@Sun.COM 			mutex_exit(&state->rc_rx_lock);
160411534SKevin.Ge@Sun.COM 		}
160511534SKevin.Ge@Sun.COM 	} else {
160611534SKevin.Ge@Sun.COM 		mutex_enter(&chan->rx_lock);
160711534SKevin.Ge@Sun.COM 		if (chan->rx_mp) {
160811534SKevin.Ge@Sun.COM 			ASSERT(chan->rx_mp_tail != NULL);
160911534SKevin.Ge@Sun.COM 			chan->rx_mp_tail->b_next = mp;
161011534SKevin.Ge@Sun.COM 		} else {
161111534SKevin.Ge@Sun.COM 			ASSERT(chan->rx_mp_tail == NULL);
161211534SKevin.Ge@Sun.COM 			chan->rx_mp = mp;
161311534SKevin.Ge@Sun.COM 		}
161411534SKevin.Ge@Sun.COM 
161511534SKevin.Ge@Sun.COM 		chan->rx_mp_tail = mp;
161611534SKevin.Ge@Sun.COM 		chan->rx_mp_len++;
161711534SKevin.Ge@Sun.COM 
161811534SKevin.Ge@Sun.COM 		if (chan->rx_mp_len  >= IBD_MAX_RX_MP_LEN) {
161911534SKevin.Ge@Sun.COM 			mpc = chan->rx_mp;
162011534SKevin.Ge@Sun.COM 
162111534SKevin.Ge@Sun.COM 			chan->rx_mp = NULL;
162211534SKevin.Ge@Sun.COM 			chan->rx_mp_tail = NULL;
162311534SKevin.Ge@Sun.COM 			chan->rx_mp_len = 0;
162411534SKevin.Ge@Sun.COM 			mutex_exit(&chan->rx_lock);
162511534SKevin.Ge@Sun.COM 			mac_rx(state->id_mh, NULL, mpc);
162611534SKevin.Ge@Sun.COM 		} else {
162711534SKevin.Ge@Sun.COM 			mutex_exit(&chan->rx_lock);
162811534SKevin.Ge@Sun.COM 		}
162911534SKevin.Ge@Sun.COM 	}
163011534SKevin.Ge@Sun.COM }
163111534SKevin.Ge@Sun.COM 
163211534SKevin.Ge@Sun.COM /*
163311534SKevin.Ge@Sun.COM  * Callback code invoked from STREAMs when the recv data buffer is free
163411534SKevin.Ge@Sun.COM  * for recycling.
163511534SKevin.Ge@Sun.COM  */
163611534SKevin.Ge@Sun.COM static void
ibd_rc_freemsg_cb(char * arg)163711534SKevin.Ge@Sun.COM ibd_rc_freemsg_cb(char *arg)
163811534SKevin.Ge@Sun.COM {
163911534SKevin.Ge@Sun.COM 	ibd_rwqe_t *rwqe = (ibd_rwqe_t *)arg;
164011534SKevin.Ge@Sun.COM 	ibd_rc_chan_t *chan = rwqe->w_chan;
164111534SKevin.Ge@Sun.COM 	ibd_state_t *state = rwqe->w_state;
164211534SKevin.Ge@Sun.COM 
164311534SKevin.Ge@Sun.COM 	/*
164411534SKevin.Ge@Sun.COM 	 * If the wqe is being destructed, do not attempt recycling.
164511534SKevin.Ge@Sun.COM 	 */
164611534SKevin.Ge@Sun.COM 	if (rwqe->w_freeing_wqe == B_TRUE) {
164711534SKevin.Ge@Sun.COM 		return;
164811534SKevin.Ge@Sun.COM 	}
164911534SKevin.Ge@Sun.COM 
165011534SKevin.Ge@Sun.COM 	ASSERT(!state->rc_enable_srq);
165111534SKevin.Ge@Sun.COM 	ASSERT(chan->rx_wqe_list.dl_cnt < chan->rcq_size);
165211534SKevin.Ge@Sun.COM 
165311534SKevin.Ge@Sun.COM 	rwqe->rwqe_im_mblk = desballoc(rwqe->rwqe_copybuf.ic_bufaddr,
165411534SKevin.Ge@Sun.COM 	    state->rc_mtu + IPOIB_GRH_SIZE, 0, &rwqe->w_freemsg_cb);
165511534SKevin.Ge@Sun.COM 	if (rwqe->rwqe_im_mblk == NULL) {
165611534SKevin.Ge@Sun.COM 		DPRINT(40, "ibd_rc_freemsg_cb: desballoc() failed");
165711534SKevin.Ge@Sun.COM 		ibd_rc_free_rwqe(chan, rwqe);
165811534SKevin.Ge@Sun.COM 		return;
165911534SKevin.Ge@Sun.COM 	}
166011534SKevin.Ge@Sun.COM 
166111534SKevin.Ge@Sun.COM 	/*
166211534SKevin.Ge@Sun.COM 	 * Post back to h/w. We could actually have more than
166311534SKevin.Ge@Sun.COM 	 * id_num_rwqe WQEs on the list if there were multiple
166411534SKevin.Ge@Sun.COM 	 * ibd_freemsg_cb() calls outstanding (since the lock is
166511534SKevin.Ge@Sun.COM 	 * not held the entire time). This will start getting
166611534SKevin.Ge@Sun.COM 	 * corrected over subsequent ibd_freemsg_cb() calls.
166711534SKevin.Ge@Sun.COM 	 */
166811534SKevin.Ge@Sun.COM 	if (ibd_rc_post_rwqe(chan, rwqe) == DDI_FAILURE) {
166911534SKevin.Ge@Sun.COM 		ibd_rc_free_rwqe(chan, rwqe);
167011534SKevin.Ge@Sun.COM 		return;
167111534SKevin.Ge@Sun.COM 	}
167211534SKevin.Ge@Sun.COM 	atomic_add_32(&chan->rx_wqe_list.dl_bufs_outstanding, -1);
167311534SKevin.Ge@Sun.COM }
167411534SKevin.Ge@Sun.COM 
167511534SKevin.Ge@Sun.COM /*
167611534SKevin.Ge@Sun.COM  * Common code for interrupt handling as well as for polling
167711534SKevin.Ge@Sun.COM  * for all completed wqe's while detaching.
167811534SKevin.Ge@Sun.COM  */
167911534SKevin.Ge@Sun.COM static void
ibd_rc_poll_rcq(ibd_rc_chan_t * chan,ibt_cq_hdl_t cq_hdl)168011534SKevin.Ge@Sun.COM ibd_rc_poll_rcq(ibd_rc_chan_t *chan, ibt_cq_hdl_t cq_hdl)
168111534SKevin.Ge@Sun.COM {
168211534SKevin.Ge@Sun.COM 	ibd_wqe_t *wqe;
168311534SKevin.Ge@Sun.COM 	ibt_wc_t *wc, *wcs;
168411534SKevin.Ge@Sun.COM 	uint_t numwcs, real_numwcs;
168511534SKevin.Ge@Sun.COM 	int i;
168611534SKevin.Ge@Sun.COM 
168711534SKevin.Ge@Sun.COM 	wcs = chan->rx_wc;
168811534SKevin.Ge@Sun.COM 	numwcs = IBD_RC_MAX_CQ_WC;
168911534SKevin.Ge@Sun.COM 
169011534SKevin.Ge@Sun.COM 	while (ibt_poll_cq(cq_hdl, wcs, numwcs, &real_numwcs) == IBT_SUCCESS) {
169111534SKevin.Ge@Sun.COM 		for (i = 0, wc = wcs; i < real_numwcs; i++, wc++) {
169211534SKevin.Ge@Sun.COM 			wqe = (ibd_wqe_t *)(uintptr_t)wc->wc_id;
169311534SKevin.Ge@Sun.COM 			if (wc->wc_status != IBT_WC_SUCCESS) {
169411534SKevin.Ge@Sun.COM 				chan->state->rc_rcq_err++;
169511534SKevin.Ge@Sun.COM 				/*
169611534SKevin.Ge@Sun.COM 				 * Channel being torn down.
169711534SKevin.Ge@Sun.COM 				 */
169811534SKevin.Ge@Sun.COM 				DPRINT(40, "ibd_rc_poll_rcq: wc_status(%d) != "
169911534SKevin.Ge@Sun.COM 				    "SUCC, chan=%p", wc->wc_status, chan);
170011534SKevin.Ge@Sun.COM 				if (wc->wc_status == IBT_WC_WR_FLUSHED_ERR) {
170111534SKevin.Ge@Sun.COM 					/*
170211534SKevin.Ge@Sun.COM 					 * Do not invoke Rx handler because
170311534SKevin.Ge@Sun.COM 					 * it might add buffers to the Rx pool
170411534SKevin.Ge@Sun.COM 					 * when we are trying to deinitialize.
170511534SKevin.Ge@Sun.COM 					 */
170611534SKevin.Ge@Sun.COM 					continue;
170711534SKevin.Ge@Sun.COM 				}
170811534SKevin.Ge@Sun.COM 			}
170911534SKevin.Ge@Sun.COM 			ibd_rc_process_rx(chan, WQE_TO_RWQE(wqe), wc);
171011534SKevin.Ge@Sun.COM 		}
171111534SKevin.Ge@Sun.COM 	}
171211534SKevin.Ge@Sun.COM }
171311534SKevin.Ge@Sun.COM 
171411534SKevin.Ge@Sun.COM /* Receive CQ handler */
171511534SKevin.Ge@Sun.COM /* ARGSUSED */
171611534SKevin.Ge@Sun.COM static void
ibd_rc_rcq_handler(ibt_cq_hdl_t cq_hdl,void * arg)171711534SKevin.Ge@Sun.COM ibd_rc_rcq_handler(ibt_cq_hdl_t cq_hdl, void *arg)
171811534SKevin.Ge@Sun.COM {
171911534SKevin.Ge@Sun.COM 	ibd_rc_chan_t *chan = (ibd_rc_chan_t *)arg;
172011534SKevin.Ge@Sun.COM 	ibd_state_t *state = chan->state;
172111534SKevin.Ge@Sun.COM 
172213030SKevin.Ge@Sun.COM 	atomic_inc_32(&chan->rcq_invoking);
172311534SKevin.Ge@Sun.COM 	ASSERT(chan->chan_state == IBD_RC_STATE_PAS_ESTAB);
172411534SKevin.Ge@Sun.COM 
172511534SKevin.Ge@Sun.COM 	/*
172611534SKevin.Ge@Sun.COM 	 * Poll for completed entries; the CQ will not interrupt any
172711534SKevin.Ge@Sun.COM 	 * more for incoming (or transmitted) packets.
172811534SKevin.Ge@Sun.COM 	 */
172911534SKevin.Ge@Sun.COM 	ibd_rc_poll_rcq(chan, chan->rcq_hdl);
173011534SKevin.Ge@Sun.COM 
173111534SKevin.Ge@Sun.COM 	/*
173211534SKevin.Ge@Sun.COM 	 * Now enable CQ notifications; all packets that arrive now
173311534SKevin.Ge@Sun.COM 	 * (or complete transmission) will cause new interrupts.
173411534SKevin.Ge@Sun.COM 	 */
173511534SKevin.Ge@Sun.COM 	if (ibt_enable_cq_notify(chan->rcq_hdl, IBT_NEXT_COMPLETION) !=
173611534SKevin.Ge@Sun.COM 	    IBT_SUCCESS) {
173711534SKevin.Ge@Sun.COM 		/*
173811534SKevin.Ge@Sun.COM 		 * We do not expect a failure here.
173911534SKevin.Ge@Sun.COM 		 */
174011534SKevin.Ge@Sun.COM 		DPRINT(40, "ibd_rc_rcq_handler: ibt_enable_cq_notify() failed");
174111534SKevin.Ge@Sun.COM 	}
174211534SKevin.Ge@Sun.COM 
174311534SKevin.Ge@Sun.COM 	/*
174411534SKevin.Ge@Sun.COM 	 * Repoll to catch all packets that might have arrived after
174511534SKevin.Ge@Sun.COM 	 * we finished the first poll loop and before interrupts got
174611534SKevin.Ge@Sun.COM 	 * armed.
174711534SKevin.Ge@Sun.COM 	 */
174811534SKevin.Ge@Sun.COM 	ibd_rc_poll_rcq(chan, chan->rcq_hdl);
174911534SKevin.Ge@Sun.COM 
175011534SKevin.Ge@Sun.COM 	if (state->rc_enable_srq) {
175111534SKevin.Ge@Sun.COM 		mutex_enter(&state->rc_rx_lock);
175211534SKevin.Ge@Sun.COM 
175311534SKevin.Ge@Sun.COM 		if (state->rc_rx_mp != NULL) {
175411534SKevin.Ge@Sun.COM 			mblk_t *mpc;
175511534SKevin.Ge@Sun.COM 			mpc = state->rc_rx_mp;
175611534SKevin.Ge@Sun.COM 
175711534SKevin.Ge@Sun.COM 			state->rc_rx_mp = NULL;
175811534SKevin.Ge@Sun.COM 			state->rc_rx_mp_tail = NULL;
175911534SKevin.Ge@Sun.COM 			state->rc_rx_mp_len = 0;
176011534SKevin.Ge@Sun.COM 
176111534SKevin.Ge@Sun.COM 			mutex_exit(&state->rc_rx_lock);
176211534SKevin.Ge@Sun.COM 			mac_rx(state->id_mh, NULL, mpc);
176311534SKevin.Ge@Sun.COM 		} else {
176411534SKevin.Ge@Sun.COM 			mutex_exit(&state->rc_rx_lock);
176511534SKevin.Ge@Sun.COM 		}
176611534SKevin.Ge@Sun.COM 	} else {
176711534SKevin.Ge@Sun.COM 		mutex_enter(&chan->rx_lock);
176811534SKevin.Ge@Sun.COM 
176911534SKevin.Ge@Sun.COM 		if (chan->rx_mp != NULL) {
177011534SKevin.Ge@Sun.COM 			mblk_t *mpc;
177111534SKevin.Ge@Sun.COM 			mpc = chan->rx_mp;
177211534SKevin.Ge@Sun.COM 
177311534SKevin.Ge@Sun.COM 			chan->rx_mp = NULL;
177411534SKevin.Ge@Sun.COM 			chan->rx_mp_tail = NULL;
177511534SKevin.Ge@Sun.COM 			chan->rx_mp_len = 0;
177611534SKevin.Ge@Sun.COM 
177711534SKevin.Ge@Sun.COM 			mutex_exit(&chan->rx_lock);
177811534SKevin.Ge@Sun.COM 			mac_rx(state->id_mh, NULL, mpc);
177911534SKevin.Ge@Sun.COM 		} else {
178011534SKevin.Ge@Sun.COM 			mutex_exit(&chan->rx_lock);
178111534SKevin.Ge@Sun.COM 		}
178211534SKevin.Ge@Sun.COM 	}
178313030SKevin.Ge@Sun.COM 	atomic_dec_32(&chan->rcq_invoking);
178411534SKevin.Ge@Sun.COM }
178511534SKevin.Ge@Sun.COM 
178611534SKevin.Ge@Sun.COM /*
178711534SKevin.Ge@Sun.COM  * Allocate the statically allocated Tx buffer list.
178811534SKevin.Ge@Sun.COM  */
178911534SKevin.Ge@Sun.COM int
ibd_rc_init_tx_largebuf_list(ibd_state_t * state)179011534SKevin.Ge@Sun.COM ibd_rc_init_tx_largebuf_list(ibd_state_t *state)
179111534SKevin.Ge@Sun.COM {
179211534SKevin.Ge@Sun.COM 	ibd_rc_tx_largebuf_t *lbufp;
179311534SKevin.Ge@Sun.COM 	ibd_rc_tx_largebuf_t *tail;
179411534SKevin.Ge@Sun.COM 	uint8_t *memp;
179511534SKevin.Ge@Sun.COM 	ibt_mr_attr_t mem_attr;
179611534SKevin.Ge@Sun.COM 	uint32_t num_swqe;
179711534SKevin.Ge@Sun.COM 	size_t  mem_size;
179811534SKevin.Ge@Sun.COM 	int i;
179911534SKevin.Ge@Sun.COM 
180012163SRamaswamy.Tummala@Sun.COM 	num_swqe = state->id_rc_num_swqe - 1;
180111534SKevin.Ge@Sun.COM 
180211534SKevin.Ge@Sun.COM 	/*
180311534SKevin.Ge@Sun.COM 	 * Allocate one big chunk for all Tx large copy bufs
180411534SKevin.Ge@Sun.COM 	 */
180511534SKevin.Ge@Sun.COM 	/* Don't transfer IPOIB_GRH_SIZE bytes (40 bytes) */
180611534SKevin.Ge@Sun.COM 	mem_size = num_swqe * state->rc_mtu;
180711534SKevin.Ge@Sun.COM 	state->rc_tx_mr_bufs = kmem_zalloc(mem_size, KM_SLEEP);
180811534SKevin.Ge@Sun.COM 
180911534SKevin.Ge@Sun.COM 	mem_attr.mr_len = mem_size;
181011534SKevin.Ge@Sun.COM 	mem_attr.mr_vaddr = (uint64_t)(uintptr_t)state->rc_tx_mr_bufs;
181111534SKevin.Ge@Sun.COM 	mem_attr.mr_as = NULL;
181211534SKevin.Ge@Sun.COM 	mem_attr.mr_flags = IBT_MR_SLEEP;
181311534SKevin.Ge@Sun.COM 	if (ibt_register_mr(state->id_hca_hdl, state->id_pd_hdl, &mem_attr,
181411534SKevin.Ge@Sun.COM 	    &state->rc_tx_mr_hdl, &state->rc_tx_mr_desc) != IBT_SUCCESS) {
181511534SKevin.Ge@Sun.COM 		DPRINT(40, "ibd_rc_init_tx_largebuf_list: ibt_register_mr "
181611534SKevin.Ge@Sun.COM 		    "failed");
181711534SKevin.Ge@Sun.COM 		kmem_free(state->rc_tx_mr_bufs, mem_size);
181811534SKevin.Ge@Sun.COM 		state->rc_tx_mr_bufs = NULL;
181911534SKevin.Ge@Sun.COM 		return (DDI_FAILURE);
182011534SKevin.Ge@Sun.COM 	}
182111534SKevin.Ge@Sun.COM 
182211534SKevin.Ge@Sun.COM 	state->rc_tx_largebuf_desc_base = kmem_zalloc(num_swqe *
182311534SKevin.Ge@Sun.COM 	    sizeof (ibd_rc_tx_largebuf_t), KM_SLEEP);
182411534SKevin.Ge@Sun.COM 
182511534SKevin.Ge@Sun.COM 	/*
182611534SKevin.Ge@Sun.COM 	 * Set up the buf chain
182711534SKevin.Ge@Sun.COM 	 */
182811534SKevin.Ge@Sun.COM 	memp = state->rc_tx_mr_bufs;
182911534SKevin.Ge@Sun.COM 	mutex_enter(&state->rc_tx_large_bufs_lock);
183011534SKevin.Ge@Sun.COM 	lbufp = state->rc_tx_largebuf_desc_base;
183111534SKevin.Ge@Sun.COM 	for (i = 0; i < num_swqe; i++) {
183211534SKevin.Ge@Sun.COM 		lbufp->lb_buf = memp;
183311534SKevin.Ge@Sun.COM 		lbufp->lb_next = lbufp + 1;
183411534SKevin.Ge@Sun.COM 
183511534SKevin.Ge@Sun.COM 		tail = lbufp;
183611534SKevin.Ge@Sun.COM 
183711534SKevin.Ge@Sun.COM 		memp += state->rc_mtu;
183811534SKevin.Ge@Sun.COM 		lbufp++;
183911534SKevin.Ge@Sun.COM 	}
184011534SKevin.Ge@Sun.COM 	tail->lb_next = NULL;
184111534SKevin.Ge@Sun.COM 
184211534SKevin.Ge@Sun.COM 	/*
184311534SKevin.Ge@Sun.COM 	 * Set up the buffer information in ibd state
184411534SKevin.Ge@Sun.COM 	 */
184511534SKevin.Ge@Sun.COM 	state->rc_tx_largebuf_free_head = state->rc_tx_largebuf_desc_base;
184611534SKevin.Ge@Sun.COM 	state->rc_tx_largebuf_nfree = num_swqe;
184711534SKevin.Ge@Sun.COM 	mutex_exit(&state->rc_tx_large_bufs_lock);
184811534SKevin.Ge@Sun.COM 	return (DDI_SUCCESS);
184911534SKevin.Ge@Sun.COM }
185011534SKevin.Ge@Sun.COM 
185111534SKevin.Ge@Sun.COM void
ibd_rc_fini_tx_largebuf_list(ibd_state_t * state)185211534SKevin.Ge@Sun.COM ibd_rc_fini_tx_largebuf_list(ibd_state_t *state)
185311534SKevin.Ge@Sun.COM {
185411534SKevin.Ge@Sun.COM 	uint32_t num_swqe;
185511534SKevin.Ge@Sun.COM 
185612163SRamaswamy.Tummala@Sun.COM 	num_swqe = state->id_rc_num_swqe - 1;
185711534SKevin.Ge@Sun.COM 
185811534SKevin.Ge@Sun.COM 	if (ibt_deregister_mr(state->id_hca_hdl,
185911534SKevin.Ge@Sun.COM 	    state->rc_tx_mr_hdl) != IBT_SUCCESS) {
186011534SKevin.Ge@Sun.COM 		DPRINT(40, "ibd_rc_fini_tx_largebuf_list: ibt_deregister_mr() "
186111534SKevin.Ge@Sun.COM 		    "failed");
186211534SKevin.Ge@Sun.COM 	}
186311534SKevin.Ge@Sun.COM 	state->rc_tx_mr_hdl = NULL;
186411534SKevin.Ge@Sun.COM 
186511534SKevin.Ge@Sun.COM 	kmem_free(state->rc_tx_mr_bufs, num_swqe * state->rc_mtu);
186611534SKevin.Ge@Sun.COM 	state->rc_tx_mr_bufs = NULL;
186711534SKevin.Ge@Sun.COM 
186811534SKevin.Ge@Sun.COM 	kmem_free(state->rc_tx_largebuf_desc_base,
186911534SKevin.Ge@Sun.COM 	    num_swqe * sizeof (ibd_rc_tx_largebuf_t));
187011534SKevin.Ge@Sun.COM 	state->rc_tx_largebuf_desc_base = NULL;
187111534SKevin.Ge@Sun.COM }
187211534SKevin.Ge@Sun.COM 
187311534SKevin.Ge@Sun.COM static int
ibd_rc_alloc_tx_copybufs(ibd_rc_chan_t * chan)187411534SKevin.Ge@Sun.COM ibd_rc_alloc_tx_copybufs(ibd_rc_chan_t *chan)
187511534SKevin.Ge@Sun.COM {
187611534SKevin.Ge@Sun.COM 	ibt_mr_attr_t mem_attr;
187711534SKevin.Ge@Sun.COM 	ibd_state_t *state;
187811534SKevin.Ge@Sun.COM 
187911534SKevin.Ge@Sun.COM 	state = chan->state;
188011534SKevin.Ge@Sun.COM 	ASSERT(state != NULL);
188111534SKevin.Ge@Sun.COM 
188211534SKevin.Ge@Sun.COM 	/*
188311534SKevin.Ge@Sun.COM 	 * Allocate one big chunk for all regular tx copy bufs
188411534SKevin.Ge@Sun.COM 	 */
188512163SRamaswamy.Tummala@Sun.COM 	mem_attr.mr_len = chan->scq_size * state->id_rc_tx_copy_thresh;
188611534SKevin.Ge@Sun.COM 
188711534SKevin.Ge@Sun.COM 	chan->tx_mr_bufs = kmem_zalloc(mem_attr.mr_len, KM_SLEEP);
188811534SKevin.Ge@Sun.COM 
188911534SKevin.Ge@Sun.COM 	/*
189011534SKevin.Ge@Sun.COM 	 * Do one memory registration on the entire txbuf area
189111534SKevin.Ge@Sun.COM 	 */
189211534SKevin.Ge@Sun.COM 	mem_attr.mr_vaddr = (uint64_t)(uintptr_t)chan->tx_mr_bufs;
189311534SKevin.Ge@Sun.COM 	mem_attr.mr_as = NULL;
189411534SKevin.Ge@Sun.COM 	mem_attr.mr_flags = IBT_MR_SLEEP;
189511534SKevin.Ge@Sun.COM 	if (ibt_register_mr(state->id_hca_hdl, state->id_pd_hdl, &mem_attr,
189611534SKevin.Ge@Sun.COM 	    &chan->tx_mr_hdl, &chan->tx_mr_desc) != IBT_SUCCESS) {
189711534SKevin.Ge@Sun.COM 		DPRINT(40, "ibd_rc_alloc_tx_copybufs: ibt_register_mr failed");
189811534SKevin.Ge@Sun.COM 		ASSERT(mem_attr.mr_len ==
189912163SRamaswamy.Tummala@Sun.COM 		    chan->scq_size * state->id_rc_tx_copy_thresh);
190011534SKevin.Ge@Sun.COM 		kmem_free(chan->tx_mr_bufs, mem_attr.mr_len);
190111534SKevin.Ge@Sun.COM 		chan->tx_mr_bufs = NULL;
190211534SKevin.Ge@Sun.COM 		return (DDI_FAILURE);
190311534SKevin.Ge@Sun.COM 	}
190411534SKevin.Ge@Sun.COM 
190511534SKevin.Ge@Sun.COM 	return (DDI_SUCCESS);
190611534SKevin.Ge@Sun.COM }
190711534SKevin.Ge@Sun.COM 
190811534SKevin.Ge@Sun.COM /*
190911534SKevin.Ge@Sun.COM  * Allocate the statically allocated Tx buffer list.
191011534SKevin.Ge@Sun.COM  */
191111534SKevin.Ge@Sun.COM static int
ibd_rc_init_txlist(ibd_rc_chan_t * chan)191211534SKevin.Ge@Sun.COM ibd_rc_init_txlist(ibd_rc_chan_t *chan)
191311534SKevin.Ge@Sun.COM {
191411534SKevin.Ge@Sun.COM 	ibd_swqe_t *swqe;
191511534SKevin.Ge@Sun.COM 	int i;
191611534SKevin.Ge@Sun.COM 	ibt_lkey_t lkey;
191712163SRamaswamy.Tummala@Sun.COM 	ibd_state_t *state = chan->state;
191811534SKevin.Ge@Sun.COM 
191911534SKevin.Ge@Sun.COM 	if (ibd_rc_alloc_tx_copybufs(chan) != DDI_SUCCESS)
192011534SKevin.Ge@Sun.COM 		return (DDI_FAILURE);
192111534SKevin.Ge@Sun.COM 
192211534SKevin.Ge@Sun.COM 	/*
192311534SKevin.Ge@Sun.COM 	 * Allocate and setup the swqe list
192411534SKevin.Ge@Sun.COM 	 */
192511534SKevin.Ge@Sun.COM 	lkey = chan->tx_mr_desc.md_lkey;
192611534SKevin.Ge@Sun.COM 	chan->tx_wqes = kmem_zalloc(chan->scq_size *
192711534SKevin.Ge@Sun.COM 	    sizeof (ibd_swqe_t), KM_SLEEP);
192811534SKevin.Ge@Sun.COM 	swqe = chan->tx_wqes;
192911534SKevin.Ge@Sun.COM 	for (i = 0; i < chan->scq_size; i++, swqe++) {
193011534SKevin.Ge@Sun.COM 		swqe->swqe_next = NULL;
193111534SKevin.Ge@Sun.COM 		swqe->swqe_im_mblk = NULL;
193211534SKevin.Ge@Sun.COM 
193311534SKevin.Ge@Sun.COM 		swqe->swqe_copybuf.ic_sgl.ds_key = lkey;
193411534SKevin.Ge@Sun.COM 		swqe->swqe_copybuf.ic_sgl.ds_len = 0; /* set in send */
193511534SKevin.Ge@Sun.COM 
193611534SKevin.Ge@Sun.COM 		swqe->w_swr.wr_id = (ibt_wrid_t)(uintptr_t)swqe;
193711534SKevin.Ge@Sun.COM 		swqe->w_swr.wr_flags = IBT_WR_SEND_SIGNAL;
193811534SKevin.Ge@Sun.COM 		swqe->swqe_copybuf.ic_sgl.ds_va = (ib_vaddr_t)(uintptr_t)
193912163SRamaswamy.Tummala@Sun.COM 		    (chan->tx_mr_bufs + i * state->id_rc_tx_copy_thresh);
194011534SKevin.Ge@Sun.COM 		swqe->w_swr.wr_trans = IBT_RC_SRV;
194111534SKevin.Ge@Sun.COM 
194211534SKevin.Ge@Sun.COM 		/* Add to list */
194311534SKevin.Ge@Sun.COM 		mutex_enter(&chan->tx_wqe_list.dl_mutex);
194411534SKevin.Ge@Sun.COM 		chan->tx_wqe_list.dl_cnt++;
194511534SKevin.Ge@Sun.COM 		swqe->swqe_next = chan->tx_wqe_list.dl_head;
194611534SKevin.Ge@Sun.COM 		chan->tx_wqe_list.dl_head = SWQE_TO_WQE(swqe);
194711534SKevin.Ge@Sun.COM 		mutex_exit(&chan->tx_wqe_list.dl_mutex);
194811534SKevin.Ge@Sun.COM 	}
194911534SKevin.Ge@Sun.COM 
195011534SKevin.Ge@Sun.COM 	return (DDI_SUCCESS);
195111534SKevin.Ge@Sun.COM }
195211534SKevin.Ge@Sun.COM 
195311534SKevin.Ge@Sun.COM /*
195411534SKevin.Ge@Sun.COM  * Free the statically allocated Tx buffer list.
195511534SKevin.Ge@Sun.COM  */
195611534SKevin.Ge@Sun.COM static void
ibd_rc_fini_txlist(ibd_rc_chan_t * chan)195711534SKevin.Ge@Sun.COM ibd_rc_fini_txlist(ibd_rc_chan_t *chan)
195811534SKevin.Ge@Sun.COM {
195912163SRamaswamy.Tummala@Sun.COM 	ibd_state_t *state = chan->state;
196011534SKevin.Ge@Sun.COM 	if (chan->tx_mr_hdl != NULL) {
196111534SKevin.Ge@Sun.COM 		if (ibt_deregister_mr(chan->state->id_hca_hdl,
196211534SKevin.Ge@Sun.COM 		    chan->tx_mr_hdl) != IBT_SUCCESS) {
196311534SKevin.Ge@Sun.COM 			DPRINT(40, "ibd_rc_fini_txlist: ibt_deregister_mr "
196411534SKevin.Ge@Sun.COM 			    "failed");
196511534SKevin.Ge@Sun.COM 		}
196611534SKevin.Ge@Sun.COM 		chan->tx_mr_hdl = NULL;
196711534SKevin.Ge@Sun.COM 	}
196811534SKevin.Ge@Sun.COM 
196911534SKevin.Ge@Sun.COM 	if (chan->tx_mr_bufs != NULL) {
197011534SKevin.Ge@Sun.COM 		kmem_free(chan->tx_mr_bufs, chan->scq_size *
197112163SRamaswamy.Tummala@Sun.COM 		    state->id_rc_tx_copy_thresh);
197211534SKevin.Ge@Sun.COM 		chan->tx_mr_bufs = NULL;
197311534SKevin.Ge@Sun.COM 	}
197411534SKevin.Ge@Sun.COM 
197511534SKevin.Ge@Sun.COM 	if (chan->tx_wqes != NULL) {
197611534SKevin.Ge@Sun.COM 		kmem_free(chan->tx_wqes, chan->scq_size *
197711534SKevin.Ge@Sun.COM 		    sizeof (ibd_swqe_t));
197811534SKevin.Ge@Sun.COM 		chan->tx_wqes = NULL;
197911534SKevin.Ge@Sun.COM 	}
198011534SKevin.Ge@Sun.COM }
198111534SKevin.Ge@Sun.COM 
198211534SKevin.Ge@Sun.COM /*
198311534SKevin.Ge@Sun.COM  * Acquire send wqe from free list.
198411534SKevin.Ge@Sun.COM  * Returns error number and send wqe pointer.
198511534SKevin.Ge@Sun.COM  */
198611534SKevin.Ge@Sun.COM ibd_swqe_t *
ibd_rc_acquire_swqes(ibd_rc_chan_t * chan)198711534SKevin.Ge@Sun.COM ibd_rc_acquire_swqes(ibd_rc_chan_t *chan)
198811534SKevin.Ge@Sun.COM {
198911534SKevin.Ge@Sun.COM 	ibd_swqe_t *wqe;
199011534SKevin.Ge@Sun.COM 
199111534SKevin.Ge@Sun.COM 	mutex_enter(&chan->tx_rel_list.dl_mutex);
199211534SKevin.Ge@Sun.COM 	if (chan->tx_rel_list.dl_head != NULL) {
199311534SKevin.Ge@Sun.COM 		/* transfer id_tx_rel_list to id_tx_list */
199411534SKevin.Ge@Sun.COM 		chan->tx_wqe_list.dl_head =
199511534SKevin.Ge@Sun.COM 		    chan->tx_rel_list.dl_head;
199611534SKevin.Ge@Sun.COM 		chan->tx_wqe_list.dl_cnt =
199711534SKevin.Ge@Sun.COM 		    chan->tx_rel_list.dl_cnt;
199811534SKevin.Ge@Sun.COM 		chan->tx_wqe_list.dl_pending_sends = B_FALSE;
199911534SKevin.Ge@Sun.COM 
200011534SKevin.Ge@Sun.COM 		/* clear id_tx_rel_list */
200111534SKevin.Ge@Sun.COM 		chan->tx_rel_list.dl_head = NULL;
200211534SKevin.Ge@Sun.COM 		chan->tx_rel_list.dl_cnt = 0;
200311534SKevin.Ge@Sun.COM 		mutex_exit(&chan->tx_rel_list.dl_mutex);
200411534SKevin.Ge@Sun.COM 
200511534SKevin.Ge@Sun.COM 		wqe = WQE_TO_SWQE(chan->tx_wqe_list.dl_head);
200611534SKevin.Ge@Sun.COM 		chan->tx_wqe_list.dl_cnt -= 1;
200711534SKevin.Ge@Sun.COM 		chan->tx_wqe_list.dl_head = wqe->swqe_next;
200811534SKevin.Ge@Sun.COM 	} else {	/* no free swqe */
200911534SKevin.Ge@Sun.COM 		mutex_exit(&chan->tx_rel_list.dl_mutex);
201011534SKevin.Ge@Sun.COM 		chan->tx_wqe_list.dl_pending_sends = B_TRUE;
201111534SKevin.Ge@Sun.COM 		wqe = NULL;
201211534SKevin.Ge@Sun.COM 	}
201311534SKevin.Ge@Sun.COM 	return (wqe);
201411534SKevin.Ge@Sun.COM }
201511534SKevin.Ge@Sun.COM 
201611534SKevin.Ge@Sun.COM /*
201711534SKevin.Ge@Sun.COM  * Release send wqe back into free list.
201811534SKevin.Ge@Sun.COM  */
201911534SKevin.Ge@Sun.COM static void
ibd_rc_release_swqe(ibd_rc_chan_t * chan,ibd_swqe_t * swqe)202011534SKevin.Ge@Sun.COM ibd_rc_release_swqe(ibd_rc_chan_t *chan, ibd_swqe_t *swqe)
202111534SKevin.Ge@Sun.COM {
202211534SKevin.Ge@Sun.COM 	/*
202311534SKevin.Ge@Sun.COM 	 * Add back on Tx list for reuse.
202411534SKevin.Ge@Sun.COM 	 */
202511534SKevin.Ge@Sun.COM 	swqe->swqe_next = NULL;
202611534SKevin.Ge@Sun.COM 	mutex_enter(&chan->tx_rel_list.dl_mutex);
202711534SKevin.Ge@Sun.COM 	chan->tx_rel_list.dl_pending_sends = B_FALSE;
202811534SKevin.Ge@Sun.COM 	swqe->swqe_next = chan->tx_rel_list.dl_head;
202911534SKevin.Ge@Sun.COM 	chan->tx_rel_list.dl_head = SWQE_TO_WQE(swqe);
203011534SKevin.Ge@Sun.COM 	chan->tx_rel_list.dl_cnt++;
203111534SKevin.Ge@Sun.COM 	mutex_exit(&chan->tx_rel_list.dl_mutex);
203211534SKevin.Ge@Sun.COM }
203311534SKevin.Ge@Sun.COM 
203411534SKevin.Ge@Sun.COM void
ibd_rc_post_send(ibd_rc_chan_t * chan,ibd_swqe_t * node)203511534SKevin.Ge@Sun.COM ibd_rc_post_send(ibd_rc_chan_t *chan, ibd_swqe_t *node)
203611534SKevin.Ge@Sun.COM {
203711534SKevin.Ge@Sun.COM 	uint_t		i;
203811534SKevin.Ge@Sun.COM 	uint_t		num_posted;
203911534SKevin.Ge@Sun.COM 	uint_t		n_wrs;
204011534SKevin.Ge@Sun.COM 	ibt_status_t	ibt_status;
204111534SKevin.Ge@Sun.COM 	ibt_send_wr_t	wrs[IBD_MAX_TX_POST_MULTIPLE];
204211534SKevin.Ge@Sun.COM 	ibd_swqe_t	*tx_head, *elem;
204311534SKevin.Ge@Sun.COM 	ibd_swqe_t	*nodes[IBD_MAX_TX_POST_MULTIPLE];
204411534SKevin.Ge@Sun.COM 
204511534SKevin.Ge@Sun.COM 	/* post the one request, then check for more */
204611534SKevin.Ge@Sun.COM 	ibt_status = ibt_post_send(chan->chan_hdl,
204711534SKevin.Ge@Sun.COM 	    &node->w_swr, 1, NULL);
204811534SKevin.Ge@Sun.COM 	if (ibt_status != IBT_SUCCESS) {
204911534SKevin.Ge@Sun.COM 		ibd_print_warn(chan->state, "ibd_post_send: "
205011534SKevin.Ge@Sun.COM 		    "posting one wr failed: ret=%d", ibt_status);
205111534SKevin.Ge@Sun.COM 		ibd_rc_tx_cleanup(node);
205211534SKevin.Ge@Sun.COM 	}
205311534SKevin.Ge@Sun.COM 
205411534SKevin.Ge@Sun.COM 	tx_head = NULL;
205511534SKevin.Ge@Sun.COM 	for (;;) {
205611534SKevin.Ge@Sun.COM 		if (tx_head == NULL) {
205711534SKevin.Ge@Sun.COM 			mutex_enter(&chan->tx_post_lock);
205811534SKevin.Ge@Sun.COM 			tx_head = chan->tx_head;
205911534SKevin.Ge@Sun.COM 			if (tx_head == NULL) {
206011534SKevin.Ge@Sun.COM 				chan->tx_busy = 0;
206111534SKevin.Ge@Sun.COM 				mutex_exit(&chan->tx_post_lock);
206211534SKevin.Ge@Sun.COM 				return;
206311534SKevin.Ge@Sun.COM 			}
206411534SKevin.Ge@Sun.COM 			chan->tx_head = NULL;
206511534SKevin.Ge@Sun.COM 			mutex_exit(&chan->tx_post_lock);
206611534SKevin.Ge@Sun.COM 		}
206711534SKevin.Ge@Sun.COM 
206811534SKevin.Ge@Sun.COM 		/*
206911534SKevin.Ge@Sun.COM 		 * Collect pending requests, IBD_MAX_TX_POST_MULTIPLE wrs
207011534SKevin.Ge@Sun.COM 		 * at a time if possible, and keep posting them.
207111534SKevin.Ge@Sun.COM 		 */
207211534SKevin.Ge@Sun.COM 		for (n_wrs = 0, elem = tx_head;
207311534SKevin.Ge@Sun.COM 		    (elem) && (n_wrs < IBD_MAX_TX_POST_MULTIPLE);
207411534SKevin.Ge@Sun.COM 		    elem = WQE_TO_SWQE(elem->swqe_next), n_wrs++) {
207511534SKevin.Ge@Sun.COM 			nodes[n_wrs] = elem;
207611534SKevin.Ge@Sun.COM 			wrs[n_wrs] = elem->w_swr;
207711534SKevin.Ge@Sun.COM 		}
207811534SKevin.Ge@Sun.COM 		tx_head = elem;
207911534SKevin.Ge@Sun.COM 
208011534SKevin.Ge@Sun.COM 		ASSERT(n_wrs != 0);
208111534SKevin.Ge@Sun.COM 
208211534SKevin.Ge@Sun.COM 		/*
208311534SKevin.Ge@Sun.COM 		 * If posting fails for some reason, we'll never receive
208411534SKevin.Ge@Sun.COM 		 * completion intimation, so we'll need to cleanup. But
208511534SKevin.Ge@Sun.COM 		 * we need to make sure we don't clean up nodes whose
208611534SKevin.Ge@Sun.COM 		 * wrs have been successfully posted. We assume that the
208711534SKevin.Ge@Sun.COM 		 * hca driver returns on the first failure to post and
208811534SKevin.Ge@Sun.COM 		 * therefore the first 'num_posted' entries don't need
208911534SKevin.Ge@Sun.COM 		 * cleanup here.
209011534SKevin.Ge@Sun.COM 		 */
209111534SKevin.Ge@Sun.COM 		num_posted = 0;
209211534SKevin.Ge@Sun.COM 		ibt_status = ibt_post_send(chan->chan_hdl,
209311534SKevin.Ge@Sun.COM 		    wrs, n_wrs, &num_posted);
209411534SKevin.Ge@Sun.COM 		if (ibt_status != IBT_SUCCESS) {
209511534SKevin.Ge@Sun.COM 			ibd_print_warn(chan->state, "ibd_post_send: "
209611534SKevin.Ge@Sun.COM 			    "posting multiple wrs failed: "
209711534SKevin.Ge@Sun.COM 			    "requested=%d, done=%d, ret=%d",
209811534SKevin.Ge@Sun.COM 			    n_wrs, num_posted, ibt_status);
209911534SKevin.Ge@Sun.COM 
210011534SKevin.Ge@Sun.COM 			for (i = num_posted; i < n_wrs; i++)
210111534SKevin.Ge@Sun.COM 				ibd_rc_tx_cleanup(nodes[i]);
210211534SKevin.Ge@Sun.COM 		}
210311534SKevin.Ge@Sun.COM 	}
210411534SKevin.Ge@Sun.COM }
210511534SKevin.Ge@Sun.COM 
210611534SKevin.Ge@Sun.COM /*
210711534SKevin.Ge@Sun.COM  * Common code that deals with clean ups after a successful or
210811534SKevin.Ge@Sun.COM  * erroneous transmission attempt.
210911534SKevin.Ge@Sun.COM  */
211011534SKevin.Ge@Sun.COM void
ibd_rc_tx_cleanup(ibd_swqe_t * swqe)211111534SKevin.Ge@Sun.COM ibd_rc_tx_cleanup(ibd_swqe_t *swqe)
211211534SKevin.Ge@Sun.COM {
211311534SKevin.Ge@Sun.COM 	ibd_ace_t *ace = swqe->w_ahandle;
211411534SKevin.Ge@Sun.COM 	ibd_state_t *state;
211511534SKevin.Ge@Sun.COM 
211611534SKevin.Ge@Sun.COM 	ASSERT(ace != NULL);
211711534SKevin.Ge@Sun.COM 	ASSERT(ace->ac_chan != NULL);
211811534SKevin.Ge@Sun.COM 
211911534SKevin.Ge@Sun.COM 	state = ace->ac_chan->state;
212011534SKevin.Ge@Sun.COM 
212111534SKevin.Ge@Sun.COM 	/*
212211534SKevin.Ge@Sun.COM 	 * If this was a dynamic registration in ibd_send(),
212311534SKevin.Ge@Sun.COM 	 * deregister now.
212411534SKevin.Ge@Sun.COM 	 */
212511534SKevin.Ge@Sun.COM 	if (swqe->swqe_im_mblk != NULL) {
212611534SKevin.Ge@Sun.COM 		ASSERT(swqe->w_buftype == IBD_WQE_MAPPED);
212711534SKevin.Ge@Sun.COM 		if (swqe->w_buftype == IBD_WQE_MAPPED) {
212811534SKevin.Ge@Sun.COM 			ibd_unmap_mem(state, swqe);
212911534SKevin.Ge@Sun.COM 		}
213011534SKevin.Ge@Sun.COM 		freemsg(swqe->swqe_im_mblk);
213111534SKevin.Ge@Sun.COM 		swqe->swqe_im_mblk = NULL;
213211534SKevin.Ge@Sun.COM 	} else {
213311534SKevin.Ge@Sun.COM 		ASSERT(swqe->w_buftype != IBD_WQE_MAPPED);
213411534SKevin.Ge@Sun.COM 	}
213511534SKevin.Ge@Sun.COM 
213611534SKevin.Ge@Sun.COM 	if (swqe->w_buftype == IBD_WQE_RC_COPYBUF) {
213711534SKevin.Ge@Sun.COM 		ibd_rc_tx_largebuf_t *lbufp;
213811534SKevin.Ge@Sun.COM 
213911534SKevin.Ge@Sun.COM 		lbufp = swqe->w_rc_tx_largebuf;
214011534SKevin.Ge@Sun.COM 		ASSERT(lbufp != NULL);
214111534SKevin.Ge@Sun.COM 
214211534SKevin.Ge@Sun.COM 		mutex_enter(&state->rc_tx_large_bufs_lock);
214311534SKevin.Ge@Sun.COM 		lbufp->lb_next = state->rc_tx_largebuf_free_head;
214411534SKevin.Ge@Sun.COM 		state->rc_tx_largebuf_free_head = lbufp;
214511534SKevin.Ge@Sun.COM 		state->rc_tx_largebuf_nfree ++;
214611534SKevin.Ge@Sun.COM 		mutex_exit(&state->rc_tx_large_bufs_lock);
214711534SKevin.Ge@Sun.COM 		swqe->w_rc_tx_largebuf = NULL;
214811534SKevin.Ge@Sun.COM 	}
214911534SKevin.Ge@Sun.COM 
215011534SKevin.Ge@Sun.COM 
215111534SKevin.Ge@Sun.COM 	/*
215211534SKevin.Ge@Sun.COM 	 * Release the send wqe for reuse.
215311534SKevin.Ge@Sun.COM 	 */
215411534SKevin.Ge@Sun.COM 	ibd_rc_release_swqe(ace->ac_chan, swqe);
215511534SKevin.Ge@Sun.COM 
215611534SKevin.Ge@Sun.COM 	/*
215711534SKevin.Ge@Sun.COM 	 * Drop the reference count on the AH; it can be reused
215811534SKevin.Ge@Sun.COM 	 * now for a different destination if there are no more
215911534SKevin.Ge@Sun.COM 	 * posted sends that will use it. This can be eliminated
216011534SKevin.Ge@Sun.COM 	 * if we can always associate each Tx buffer with an AH.
216111534SKevin.Ge@Sun.COM 	 * The ace can be null if we are cleaning up from the
216211534SKevin.Ge@Sun.COM 	 * ibd_send() error path.
216311534SKevin.Ge@Sun.COM 	 */
216411534SKevin.Ge@Sun.COM 	ibd_dec_ref_ace(state, ace);
216511534SKevin.Ge@Sun.COM }
216611534SKevin.Ge@Sun.COM 
216711534SKevin.Ge@Sun.COM void
ibd_rc_drain_scq(ibd_rc_chan_t * chan,ibt_cq_hdl_t cq_hdl)216811534SKevin.Ge@Sun.COM ibd_rc_drain_scq(ibd_rc_chan_t *chan, ibt_cq_hdl_t cq_hdl)
216911534SKevin.Ge@Sun.COM {
217011534SKevin.Ge@Sun.COM 	ibd_state_t *state = chan->state;
217111534SKevin.Ge@Sun.COM 	ibd_wqe_t *wqe;
217211534SKevin.Ge@Sun.COM 	ibt_wc_t *wc, *wcs;
217313030SKevin.Ge@Sun.COM 	ibd_ace_t *ace;
217411534SKevin.Ge@Sun.COM 	uint_t numwcs, real_numwcs;
217511534SKevin.Ge@Sun.COM 	int i;
217613030SKevin.Ge@Sun.COM 	boolean_t encount_error;
217711534SKevin.Ge@Sun.COM 
217811534SKevin.Ge@Sun.COM 	wcs = chan->tx_wc;
217911534SKevin.Ge@Sun.COM 	numwcs = IBD_RC_MAX_CQ_WC;
218013030SKevin.Ge@Sun.COM 	encount_error = B_FALSE;
218111534SKevin.Ge@Sun.COM 
218211534SKevin.Ge@Sun.COM 	while (ibt_poll_cq(cq_hdl, wcs, numwcs, &real_numwcs) == IBT_SUCCESS) {
218311534SKevin.Ge@Sun.COM 		for (i = 0, wc = wcs; i < real_numwcs; i++, wc++) {
218411534SKevin.Ge@Sun.COM 			wqe = (ibd_wqe_t *)(uintptr_t)wc->wc_id;
218511534SKevin.Ge@Sun.COM 			if (wc->wc_status != IBT_WC_SUCCESS) {
218613030SKevin.Ge@Sun.COM 				if (encount_error == B_FALSE) {
218713030SKevin.Ge@Sun.COM 					/*
218813030SKevin.Ge@Sun.COM 					 * This RC channle is in error status,
218913030SKevin.Ge@Sun.COM 					 * remove it.
219013030SKevin.Ge@Sun.COM 					 */
219113030SKevin.Ge@Sun.COM 					encount_error = B_TRUE;
219213030SKevin.Ge@Sun.COM 					mutex_enter(&state->id_ac_mutex);
219313030SKevin.Ge@Sun.COM 					if ((chan->chan_state ==
219413030SKevin.Ge@Sun.COM 					    IBD_RC_STATE_ACT_ESTAB) &&
219513030SKevin.Ge@Sun.COM 					    (chan->state->id_link_state ==
219613030SKevin.Ge@Sun.COM 					    LINK_STATE_UP) &&
219713030SKevin.Ge@Sun.COM 					    ((ace = ibd_acache_find(state,
219813030SKevin.Ge@Sun.COM 					    &chan->ace->ac_mac, B_FALSE, 0))
219913030SKevin.Ge@Sun.COM 					    != NULL) && (ace == chan->ace)) {
220013030SKevin.Ge@Sun.COM 						ASSERT(ace->ac_mce == NULL);
220113030SKevin.Ge@Sun.COM 						INC_REF(ace, 1);
220213030SKevin.Ge@Sun.COM 						IBD_ACACHE_PULLOUT_ACTIVE(
220313030SKevin.Ge@Sun.COM 						    state, ace);
220413030SKevin.Ge@Sun.COM 						chan->chan_state =
220513030SKevin.Ge@Sun.COM 						    IBD_RC_STATE_ACT_CLOSING;
220613030SKevin.Ge@Sun.COM 						mutex_exit(&state->id_ac_mutex);
220713030SKevin.Ge@Sun.COM 						state->rc_reset_cnt++;
220813030SKevin.Ge@Sun.COM 						DPRINT(30, "ibd_rc_drain_scq: "
220913030SKevin.Ge@Sun.COM 						    "wc_status(%d) != SUCC, "
221013030SKevin.Ge@Sun.COM 						    "chan=%p, ace=%p, "
221113030SKevin.Ge@Sun.COM 						    "link_state=%d"
221213030SKevin.Ge@Sun.COM 						    "reset RC channel",
221313030SKevin.Ge@Sun.COM 						    wc->wc_status, chan,
221413030SKevin.Ge@Sun.COM 						    chan->ace, chan->state->
221513030SKevin.Ge@Sun.COM 						    id_link_state);
221613030SKevin.Ge@Sun.COM 						ibd_rc_signal_act_close(
221713030SKevin.Ge@Sun.COM 						    state, ace);
221813030SKevin.Ge@Sun.COM 					} else {
221913030SKevin.Ge@Sun.COM 						mutex_exit(&state->id_ac_mutex);
222013030SKevin.Ge@Sun.COM 						state->
222113030SKevin.Ge@Sun.COM 						    rc_act_close_simultaneous++;
222213030SKevin.Ge@Sun.COM 						DPRINT(40, "ibd_rc_drain_scq: "
222313030SKevin.Ge@Sun.COM 						    "wc_status(%d) != SUCC, "
222413030SKevin.Ge@Sun.COM 						    "chan=%p, chan_state=%d,"
222513030SKevin.Ge@Sun.COM 						    "ace=%p, link_state=%d."
222613030SKevin.Ge@Sun.COM 						    "other thread is closing "
222713030SKevin.Ge@Sun.COM 						    "it", wc->wc_status, chan,
222813030SKevin.Ge@Sun.COM 						    chan->chan_state, chan->ace,
222913030SKevin.Ge@Sun.COM 						    chan->state->id_link_state);
223013030SKevin.Ge@Sun.COM 					}
223113030SKevin.Ge@Sun.COM 				}
223211534SKevin.Ge@Sun.COM 			}
223311534SKevin.Ge@Sun.COM 			ibd_rc_tx_cleanup(WQE_TO_SWQE(wqe));
223411534SKevin.Ge@Sun.COM 		}
223511534SKevin.Ge@Sun.COM 
223611534SKevin.Ge@Sun.COM 		mutex_enter(&state->id_sched_lock);
223711534SKevin.Ge@Sun.COM 		if (state->id_sched_needed == 0) {
223811534SKevin.Ge@Sun.COM 			mutex_exit(&state->id_sched_lock);
223911534SKevin.Ge@Sun.COM 		} else if (state->id_sched_needed & IBD_RSRC_RC_SWQE) {
224011534SKevin.Ge@Sun.COM 			mutex_enter(&chan->tx_wqe_list.dl_mutex);
224111534SKevin.Ge@Sun.COM 			mutex_enter(&chan->tx_rel_list.dl_mutex);
224211534SKevin.Ge@Sun.COM 			if ((chan->tx_rel_list.dl_cnt +
224311534SKevin.Ge@Sun.COM 			    chan->tx_wqe_list.dl_cnt) > IBD_RC_TX_FREE_THRESH) {
224411534SKevin.Ge@Sun.COM 				state->id_sched_needed &= ~IBD_RSRC_RC_SWQE;
224511534SKevin.Ge@Sun.COM 				mutex_exit(&chan->tx_rel_list.dl_mutex);
224611534SKevin.Ge@Sun.COM 				mutex_exit(&chan->tx_wqe_list.dl_mutex);
224711534SKevin.Ge@Sun.COM 				mutex_exit(&state->id_sched_lock);
224811534SKevin.Ge@Sun.COM 				state->rc_swqe_mac_update++;
224911534SKevin.Ge@Sun.COM 				mac_tx_update(state->id_mh);
225011534SKevin.Ge@Sun.COM 			} else {
225111534SKevin.Ge@Sun.COM 				state->rc_scq_no_swqe++;
225211534SKevin.Ge@Sun.COM 				mutex_exit(&chan->tx_rel_list.dl_mutex);
225311534SKevin.Ge@Sun.COM 				mutex_exit(&chan->tx_wqe_list.dl_mutex);
225411534SKevin.Ge@Sun.COM 				mutex_exit(&state->id_sched_lock);
225511534SKevin.Ge@Sun.COM 			}
225611534SKevin.Ge@Sun.COM 		} else if (state->id_sched_needed & IBD_RSRC_RC_TX_LARGEBUF) {
225711534SKevin.Ge@Sun.COM 			mutex_enter(&state->rc_tx_large_bufs_lock);
225811534SKevin.Ge@Sun.COM 			if (state->rc_tx_largebuf_nfree >
225911534SKevin.Ge@Sun.COM 			    IBD_RC_TX_FREE_THRESH) {
226011534SKevin.Ge@Sun.COM 				ASSERT(state->rc_tx_largebuf_free_head != NULL);
226111534SKevin.Ge@Sun.COM 				state->id_sched_needed &=
226211534SKevin.Ge@Sun.COM 				    ~IBD_RSRC_RC_TX_LARGEBUF;
226311534SKevin.Ge@Sun.COM 				mutex_exit(&state->rc_tx_large_bufs_lock);
226411534SKevin.Ge@Sun.COM 				mutex_exit(&state->id_sched_lock);
226511534SKevin.Ge@Sun.COM 				state->rc_xmt_buf_mac_update++;
226611534SKevin.Ge@Sun.COM 				mac_tx_update(state->id_mh);
226711534SKevin.Ge@Sun.COM 			} else {
226811534SKevin.Ge@Sun.COM 				state->rc_scq_no_largebuf++;
226911534SKevin.Ge@Sun.COM 				mutex_exit(&state->rc_tx_large_bufs_lock);
227011534SKevin.Ge@Sun.COM 				mutex_exit(&state->id_sched_lock);
227111534SKevin.Ge@Sun.COM 			}
227211534SKevin.Ge@Sun.COM 		} else if (state->id_sched_needed & IBD_RSRC_SWQE) {
227311534SKevin.Ge@Sun.COM 			mutex_enter(&state->id_tx_list.dl_mutex);
227411534SKevin.Ge@Sun.COM 			mutex_enter(&state->id_tx_rel_list.dl_mutex);
227511534SKevin.Ge@Sun.COM 			if ((state->id_tx_list.dl_cnt +
227611534SKevin.Ge@Sun.COM 			    state->id_tx_rel_list.dl_cnt)
227711534SKevin.Ge@Sun.COM 			    > IBD_FREE_SWQES_THRESH) {
227811534SKevin.Ge@Sun.COM 				state->id_sched_needed &= ~IBD_RSRC_SWQE;
227911534SKevin.Ge@Sun.COM 				state->id_sched_cnt++;
228011534SKevin.Ge@Sun.COM 				mutex_exit(&state->id_tx_rel_list.dl_mutex);
228111534SKevin.Ge@Sun.COM 				mutex_exit(&state->id_tx_list.dl_mutex);
228211534SKevin.Ge@Sun.COM 				mutex_exit(&state->id_sched_lock);
228311534SKevin.Ge@Sun.COM 				mac_tx_update(state->id_mh);
228411534SKevin.Ge@Sun.COM 			} else {
228511534SKevin.Ge@Sun.COM 				mutex_exit(&state->id_tx_rel_list.dl_mutex);
228611534SKevin.Ge@Sun.COM 				mutex_exit(&state->id_tx_list.dl_mutex);
228711534SKevin.Ge@Sun.COM 				mutex_exit(&state->id_sched_lock);
228811534SKevin.Ge@Sun.COM 			}
228911534SKevin.Ge@Sun.COM 		} else {
229011534SKevin.Ge@Sun.COM 			mutex_exit(&state->id_sched_lock);
229111534SKevin.Ge@Sun.COM 		}
229211534SKevin.Ge@Sun.COM 	}
229311534SKevin.Ge@Sun.COM }
229411534SKevin.Ge@Sun.COM 
229511534SKevin.Ge@Sun.COM /* Send CQ handler, call ibd_rx_tx_cleanup to recycle Tx buffers */
229611534SKevin.Ge@Sun.COM /* ARGSUSED */
229711534SKevin.Ge@Sun.COM static void
ibd_rc_scq_handler(ibt_cq_hdl_t cq_hdl,void * arg)229811534SKevin.Ge@Sun.COM ibd_rc_scq_handler(ibt_cq_hdl_t cq_hdl, void *arg)
229911534SKevin.Ge@Sun.COM {
230011534SKevin.Ge@Sun.COM 	ibd_rc_chan_t *chan = (ibd_rc_chan_t *)arg;
230111534SKevin.Ge@Sun.COM 
230211534SKevin.Ge@Sun.COM 	if (ibd_rc_tx_softintr == 1) {
230311534SKevin.Ge@Sun.COM 		mutex_enter(&chan->tx_poll_lock);
230411534SKevin.Ge@Sun.COM 		if (chan->tx_poll_busy & IBD_CQ_POLLING) {
230511534SKevin.Ge@Sun.COM 			chan->tx_poll_busy |= IBD_REDO_CQ_POLLING;
230611534SKevin.Ge@Sun.COM 			mutex_exit(&chan->tx_poll_lock);
230711534SKevin.Ge@Sun.COM 			return;
230811534SKevin.Ge@Sun.COM 		} else {
230911534SKevin.Ge@Sun.COM 			mutex_exit(&chan->tx_poll_lock);
231011534SKevin.Ge@Sun.COM 			ddi_trigger_softintr(chan->scq_softintr);
231111534SKevin.Ge@Sun.COM 		}
231211534SKevin.Ge@Sun.COM 	} else
231311534SKevin.Ge@Sun.COM 		(void) ibd_rc_tx_recycle(arg);
231411534SKevin.Ge@Sun.COM }
231511534SKevin.Ge@Sun.COM 
231611534SKevin.Ge@Sun.COM static uint_t
ibd_rc_tx_recycle(caddr_t arg)231711534SKevin.Ge@Sun.COM ibd_rc_tx_recycle(caddr_t arg)
231811534SKevin.Ge@Sun.COM {
231911534SKevin.Ge@Sun.COM 	ibd_rc_chan_t *chan = (ibd_rc_chan_t *)arg;
232011534SKevin.Ge@Sun.COM 	ibd_state_t *state = chan->state;
232111534SKevin.Ge@Sun.COM 	int flag, redo_flag;
232211534SKevin.Ge@Sun.COM 	int redo = 1;
232311534SKevin.Ge@Sun.COM 
232411534SKevin.Ge@Sun.COM 	flag = IBD_CQ_POLLING;
232511534SKevin.Ge@Sun.COM 	redo_flag = IBD_REDO_CQ_POLLING;
232611534SKevin.Ge@Sun.COM 
232711534SKevin.Ge@Sun.COM 	mutex_enter(&chan->tx_poll_lock);
232811534SKevin.Ge@Sun.COM 	if (chan->tx_poll_busy & flag) {
232911534SKevin.Ge@Sun.COM 		ibd_print_warn(state, "ibd_rc_tx_recycle: multiple polling "
233011534SKevin.Ge@Sun.COM 		    "threads");
233111534SKevin.Ge@Sun.COM 		chan->tx_poll_busy |= redo_flag;
233211534SKevin.Ge@Sun.COM 		mutex_exit(&chan->tx_poll_lock);
233311534SKevin.Ge@Sun.COM 		return (DDI_INTR_CLAIMED);
233411534SKevin.Ge@Sun.COM 	}
233511534SKevin.Ge@Sun.COM 	chan->tx_poll_busy |= flag;
233611534SKevin.Ge@Sun.COM 	mutex_exit(&chan->tx_poll_lock);
233711534SKevin.Ge@Sun.COM 
233811534SKevin.Ge@Sun.COM 	/*
233911534SKevin.Ge@Sun.COM 	 * Poll for completed entries; the CQ will not interrupt any
234011534SKevin.Ge@Sun.COM 	 * more for completed packets.
234111534SKevin.Ge@Sun.COM 	 */
234211534SKevin.Ge@Sun.COM 	ibd_rc_drain_scq(chan, chan->scq_hdl);
234311534SKevin.Ge@Sun.COM 
234411534SKevin.Ge@Sun.COM 	/*
234511534SKevin.Ge@Sun.COM 	 * Now enable CQ notifications; all completions originating now
234611534SKevin.Ge@Sun.COM 	 * will cause new interrupts.
234711534SKevin.Ge@Sun.COM 	 */
234811534SKevin.Ge@Sun.COM 	do {
234911534SKevin.Ge@Sun.COM 		if (ibt_enable_cq_notify(chan->scq_hdl, IBT_NEXT_COMPLETION) !=
235011534SKevin.Ge@Sun.COM 		    IBT_SUCCESS) {
235111534SKevin.Ge@Sun.COM 			/*
235211534SKevin.Ge@Sun.COM 			 * We do not expect a failure here.
235311534SKevin.Ge@Sun.COM 			 */
235411534SKevin.Ge@Sun.COM 			DPRINT(40, "ibd_rc_scq_handler: ibt_enable_cq_notify()"
235511534SKevin.Ge@Sun.COM 			    " failed");
235611534SKevin.Ge@Sun.COM 		}
235711534SKevin.Ge@Sun.COM 
235811534SKevin.Ge@Sun.COM 		ibd_rc_drain_scq(chan, chan->scq_hdl);
235911534SKevin.Ge@Sun.COM 
236011534SKevin.Ge@Sun.COM 		mutex_enter(&chan->tx_poll_lock);
236111534SKevin.Ge@Sun.COM 		if (chan->tx_poll_busy & redo_flag)
236211534SKevin.Ge@Sun.COM 			chan->tx_poll_busy &= ~redo_flag;
236311534SKevin.Ge@Sun.COM 		else {
236411534SKevin.Ge@Sun.COM 			chan->tx_poll_busy &= ~flag;
236511534SKevin.Ge@Sun.COM 			redo = 0;
236611534SKevin.Ge@Sun.COM 		}
236711534SKevin.Ge@Sun.COM 		mutex_exit(&chan->tx_poll_lock);
236811534SKevin.Ge@Sun.COM 
236911534SKevin.Ge@Sun.COM 	} while (redo);
237011534SKevin.Ge@Sun.COM 
237111534SKevin.Ge@Sun.COM 	return (DDI_INTR_CLAIMED);
237211534SKevin.Ge@Sun.COM }
237311534SKevin.Ge@Sun.COM 
237411642SKevin.Ge@Sun.COM static ibt_status_t
ibd_register_service(ibt_srv_desc_t * srv,ib_svc_id_t sid,int num_sids,ibt_srv_hdl_t * srv_hdl,ib_svc_id_t * ret_sid)237511642SKevin.Ge@Sun.COM ibd_register_service(ibt_srv_desc_t *srv, ib_svc_id_t sid,
237611642SKevin.Ge@Sun.COM     int num_sids, ibt_srv_hdl_t *srv_hdl, ib_svc_id_t *ret_sid)
237711642SKevin.Ge@Sun.COM {
237811642SKevin.Ge@Sun.COM 	ibd_service_t *p;
237911642SKevin.Ge@Sun.COM 	ibt_status_t status;
238011642SKevin.Ge@Sun.COM 
238111642SKevin.Ge@Sun.COM 	mutex_enter(&ibd_gstate.ig_mutex);
238211642SKevin.Ge@Sun.COM 	for (p = ibd_gstate.ig_service_list; p != NULL; p = p->is_link) {
238311642SKevin.Ge@Sun.COM 		if (p->is_sid == sid) {
238411642SKevin.Ge@Sun.COM 			p->is_ref_cnt++;
238511642SKevin.Ge@Sun.COM 			*srv_hdl = p->is_srv_hdl;
238611642SKevin.Ge@Sun.COM 			*ret_sid = sid;
238711642SKevin.Ge@Sun.COM 			mutex_exit(&ibd_gstate.ig_mutex);
238811642SKevin.Ge@Sun.COM 			return (IBT_SUCCESS);
238911642SKevin.Ge@Sun.COM 		}
239011642SKevin.Ge@Sun.COM 	}
239111642SKevin.Ge@Sun.COM 	status = ibt_register_service(ibd_gstate.ig_ibt_hdl, srv, sid,
239211642SKevin.Ge@Sun.COM 	    num_sids, srv_hdl, ret_sid);
239311642SKevin.Ge@Sun.COM 	if (status == IBT_SUCCESS) {
239411642SKevin.Ge@Sun.COM 		p = kmem_alloc(sizeof (*p), KM_SLEEP);
239511642SKevin.Ge@Sun.COM 		p->is_srv_hdl = *srv_hdl;
239611642SKevin.Ge@Sun.COM 		p->is_sid = sid;
239711642SKevin.Ge@Sun.COM 		p->is_ref_cnt = 1;
239811642SKevin.Ge@Sun.COM 		p->is_link = ibd_gstate.ig_service_list;
239911642SKevin.Ge@Sun.COM 		ibd_gstate.ig_service_list = p;
240011642SKevin.Ge@Sun.COM 	}
240111642SKevin.Ge@Sun.COM 	mutex_exit(&ibd_gstate.ig_mutex);
240211642SKevin.Ge@Sun.COM 	return (status);
240311642SKevin.Ge@Sun.COM }
240411642SKevin.Ge@Sun.COM 
240511642SKevin.Ge@Sun.COM static ibt_status_t
ibd_deregister_service(ibt_srv_hdl_t srv_hdl)240611642SKevin.Ge@Sun.COM ibd_deregister_service(ibt_srv_hdl_t srv_hdl)
240711642SKevin.Ge@Sun.COM {
240811642SKevin.Ge@Sun.COM 	ibd_service_t *p, **pp;
240911642SKevin.Ge@Sun.COM 	ibt_status_t status;
241011642SKevin.Ge@Sun.COM 
241111642SKevin.Ge@Sun.COM 	mutex_enter(&ibd_gstate.ig_mutex);
241211642SKevin.Ge@Sun.COM 	for (pp = &ibd_gstate.ig_service_list; *pp != NULL;
241311642SKevin.Ge@Sun.COM 	    pp = &((*pp)->is_link)) {
241411642SKevin.Ge@Sun.COM 		p = *pp;
241511642SKevin.Ge@Sun.COM 		if (p->is_srv_hdl == srv_hdl) {	/* Found it */
241611642SKevin.Ge@Sun.COM 			if (--p->is_ref_cnt == 0) {
241711642SKevin.Ge@Sun.COM 				status = ibt_deregister_service(
241811642SKevin.Ge@Sun.COM 				    ibd_gstate.ig_ibt_hdl, srv_hdl);
241911642SKevin.Ge@Sun.COM 				*pp = p->is_link; /* link prev to next */
242011642SKevin.Ge@Sun.COM 				kmem_free(p, sizeof (*p));
242111642SKevin.Ge@Sun.COM 			} else {
242211642SKevin.Ge@Sun.COM 				status = IBT_SUCCESS;
242311642SKevin.Ge@Sun.COM 			}
242411642SKevin.Ge@Sun.COM 			mutex_exit(&ibd_gstate.ig_mutex);
242511642SKevin.Ge@Sun.COM 			return (status);
242611642SKevin.Ge@Sun.COM 		}
242711642SKevin.Ge@Sun.COM 	}
242811642SKevin.Ge@Sun.COM 	/* Should not ever get here */
242911642SKevin.Ge@Sun.COM 	mutex_exit(&ibd_gstate.ig_mutex);
243011642SKevin.Ge@Sun.COM 	return (IBT_FAILURE);
243111642SKevin.Ge@Sun.COM }
243211642SKevin.Ge@Sun.COM 
243311534SKevin.Ge@Sun.COM /* Listen with corresponding service ID */
243411534SKevin.Ge@Sun.COM ibt_status_t
ibd_rc_listen(ibd_state_t * state)243511534SKevin.Ge@Sun.COM ibd_rc_listen(ibd_state_t *state)
243611534SKevin.Ge@Sun.COM {
243711534SKevin.Ge@Sun.COM 	ibt_srv_desc_t srvdesc;
243811534SKevin.Ge@Sun.COM 	ib_svc_id_t ret_sid;
243911534SKevin.Ge@Sun.COM 	ibt_status_t status;
244011534SKevin.Ge@Sun.COM 	ib_gid_t gid;
244111534SKevin.Ge@Sun.COM 
244211534SKevin.Ge@Sun.COM 	if (state->rc_listen_hdl != NULL) {
244311534SKevin.Ge@Sun.COM 		DPRINT(40, "ibd_rc_listen: rc_listen_hdl should be NULL");
244411534SKevin.Ge@Sun.COM 		return (IBT_FAILURE);
244511534SKevin.Ge@Sun.COM 	}
244611534SKevin.Ge@Sun.COM 
244711534SKevin.Ge@Sun.COM 	bzero(&srvdesc, sizeof (ibt_srv_desc_t));
244811534SKevin.Ge@Sun.COM 	srvdesc.sd_handler = ibd_rc_dispatch_pass_mad;
244911534SKevin.Ge@Sun.COM 	srvdesc.sd_flags = IBT_SRV_NO_FLAGS;
245011534SKevin.Ge@Sun.COM 
245111534SKevin.Ge@Sun.COM 	/*
245211534SKevin.Ge@Sun.COM 	 * Register the service with service id
245311534SKevin.Ge@Sun.COM 	 * Incoming connection requests should arrive on this service id.
245411534SKevin.Ge@Sun.COM 	 */
245511642SKevin.Ge@Sun.COM 	status = ibd_register_service(&srvdesc,
245611534SKevin.Ge@Sun.COM 	    IBD_RC_QPN_TO_SID(state->id_qpnum),
245711534SKevin.Ge@Sun.COM 	    1, &state->rc_listen_hdl, &ret_sid);
245811534SKevin.Ge@Sun.COM 	if (status != IBT_SUCCESS) {
245911534SKevin.Ge@Sun.COM 		DPRINT(40, "ibd_rc_listen: Service Registration Failed, "
246011534SKevin.Ge@Sun.COM 		    "ret=%d", status);
246111534SKevin.Ge@Sun.COM 		return (status);
246211534SKevin.Ge@Sun.COM 	}
246311534SKevin.Ge@Sun.COM 
246411534SKevin.Ge@Sun.COM 	gid = state->id_sgid;
246511534SKevin.Ge@Sun.COM 
246611534SKevin.Ge@Sun.COM 	/* pass state as cm_private */
246711534SKevin.Ge@Sun.COM 	status = ibt_bind_service(state->rc_listen_hdl,
246811534SKevin.Ge@Sun.COM 	    gid, NULL, state, &state->rc_listen_bind);
246911534SKevin.Ge@Sun.COM 	if (status != IBT_SUCCESS) {
247011534SKevin.Ge@Sun.COM 		DPRINT(40, "ibd_rc_listen:"
247111534SKevin.Ge@Sun.COM 		    " fail to bind port: <%d>", status);
247211642SKevin.Ge@Sun.COM 		(void) ibd_deregister_service(state->rc_listen_hdl);
247311534SKevin.Ge@Sun.COM 		return (status);
247411534SKevin.Ge@Sun.COM 	}
247511534SKevin.Ge@Sun.COM 
247611534SKevin.Ge@Sun.COM 	/*
247711534SKevin.Ge@Sun.COM 	 * Legacy OFED had used a wrong service ID (one additional zero digit)
247811534SKevin.Ge@Sun.COM 	 * for many years. To interop with legacy OFED, we support this wrong
247911534SKevin.Ge@Sun.COM 	 * service ID here.
248011534SKevin.Ge@Sun.COM 	 */
248111534SKevin.Ge@Sun.COM 	ASSERT(state->rc_listen_hdl_OFED_interop == NULL);
248211534SKevin.Ge@Sun.COM 
248311534SKevin.Ge@Sun.COM 	bzero(&srvdesc, sizeof (ibt_srv_desc_t));
248411534SKevin.Ge@Sun.COM 	srvdesc.sd_handler = ibd_rc_dispatch_pass_mad;
248511534SKevin.Ge@Sun.COM 	srvdesc.sd_flags = IBT_SRV_NO_FLAGS;
248611534SKevin.Ge@Sun.COM 
248711534SKevin.Ge@Sun.COM 	/*
248811534SKevin.Ge@Sun.COM 	 * Register the service with service id
248911534SKevin.Ge@Sun.COM 	 * Incoming connection requests should arrive on this service id.
249011534SKevin.Ge@Sun.COM 	 */
249111642SKevin.Ge@Sun.COM 	status = ibd_register_service(&srvdesc,
249211534SKevin.Ge@Sun.COM 	    IBD_RC_QPN_TO_SID_OFED_INTEROP(state->id_qpnum),
249311534SKevin.Ge@Sun.COM 	    1, &state->rc_listen_hdl_OFED_interop, &ret_sid);
249411534SKevin.Ge@Sun.COM 	if (status != IBT_SUCCESS) {
249511534SKevin.Ge@Sun.COM 		DPRINT(40,
249611534SKevin.Ge@Sun.COM 		    "ibd_rc_listen: Service Registration for Legacy OFED "
249711534SKevin.Ge@Sun.COM 		    "Failed %d", status);
249811534SKevin.Ge@Sun.COM 		(void) ibt_unbind_service(state->rc_listen_hdl,
249911534SKevin.Ge@Sun.COM 		    state->rc_listen_bind);
250011642SKevin.Ge@Sun.COM 		(void) ibd_deregister_service(state->rc_listen_hdl);
250111534SKevin.Ge@Sun.COM 		return (status);
250211534SKevin.Ge@Sun.COM 	}
250311534SKevin.Ge@Sun.COM 
250411534SKevin.Ge@Sun.COM 	gid = state->id_sgid;
250511534SKevin.Ge@Sun.COM 
250611534SKevin.Ge@Sun.COM 	/* pass state as cm_private */
250711534SKevin.Ge@Sun.COM 	status = ibt_bind_service(state->rc_listen_hdl_OFED_interop,
250811534SKevin.Ge@Sun.COM 	    gid, NULL, state, &state->rc_listen_bind_OFED_interop);
250911534SKevin.Ge@Sun.COM 	if (status != IBT_SUCCESS) {
251011534SKevin.Ge@Sun.COM 		DPRINT(40, "ibd_rc_listen: fail to bind port: <%d> for "
251111534SKevin.Ge@Sun.COM 		    "Legacy OFED listener", status);
251211642SKevin.Ge@Sun.COM 		(void) ibd_deregister_service(
251311534SKevin.Ge@Sun.COM 		    state->rc_listen_hdl_OFED_interop);
251411534SKevin.Ge@Sun.COM 		(void) ibt_unbind_service(state->rc_listen_hdl,
251511534SKevin.Ge@Sun.COM 		    state->rc_listen_bind);
251611642SKevin.Ge@Sun.COM 		(void) ibd_deregister_service(state->rc_listen_hdl);
251711534SKevin.Ge@Sun.COM 		return (status);
251811534SKevin.Ge@Sun.COM 	}
251911534SKevin.Ge@Sun.COM 
252011534SKevin.Ge@Sun.COM 	return (IBT_SUCCESS);
252111534SKevin.Ge@Sun.COM }
252211534SKevin.Ge@Sun.COM 
252311534SKevin.Ge@Sun.COM void
ibd_rc_stop_listen(ibd_state_t * state)252411534SKevin.Ge@Sun.COM ibd_rc_stop_listen(ibd_state_t *state)
252511534SKevin.Ge@Sun.COM {
252611534SKevin.Ge@Sun.COM 	int ret;
252711534SKevin.Ge@Sun.COM 
252811534SKevin.Ge@Sun.COM 	/* Disable incoming connection requests */
252911534SKevin.Ge@Sun.COM 	if (state->rc_listen_hdl != NULL) {
253011534SKevin.Ge@Sun.COM 		ret = ibt_unbind_all_services(state->rc_listen_hdl);
253111534SKevin.Ge@Sun.COM 		if (ret != 0) {
253211534SKevin.Ge@Sun.COM 			DPRINT(40, "ibd_rc_stop_listen:"
253311534SKevin.Ge@Sun.COM 			    "ibt_unbind_all_services() failed, ret=%d", ret);
253411534SKevin.Ge@Sun.COM 		}
253511642SKevin.Ge@Sun.COM 		ret = ibd_deregister_service(state->rc_listen_hdl);
253611534SKevin.Ge@Sun.COM 		if (ret != 0) {
253711534SKevin.Ge@Sun.COM 			DPRINT(40, "ibd_rc_stop_listen:"
253811642SKevin.Ge@Sun.COM 			    "ibd_deregister_service() failed, ret=%d", ret);
253911534SKevin.Ge@Sun.COM 		} else {
254011534SKevin.Ge@Sun.COM 			state->rc_listen_hdl = NULL;
254111534SKevin.Ge@Sun.COM 		}
254211534SKevin.Ge@Sun.COM 	}
254311534SKevin.Ge@Sun.COM 
254411534SKevin.Ge@Sun.COM 	/* Disable incoming connection requests */
254511534SKevin.Ge@Sun.COM 	if (state->rc_listen_hdl_OFED_interop != NULL) {
254611534SKevin.Ge@Sun.COM 		ret = ibt_unbind_all_services(
254711534SKevin.Ge@Sun.COM 		    state->rc_listen_hdl_OFED_interop);
254811534SKevin.Ge@Sun.COM 		if (ret != 0) {
254911534SKevin.Ge@Sun.COM 			DPRINT(40, "ibd_rc_stop_listen:"
255011534SKevin.Ge@Sun.COM 			    "ibt_unbind_all_services() failed: %d", ret);
255111534SKevin.Ge@Sun.COM 		}
255211642SKevin.Ge@Sun.COM 		ret = ibd_deregister_service(state->rc_listen_hdl_OFED_interop);
255311534SKevin.Ge@Sun.COM 		if (ret != 0) {
255411534SKevin.Ge@Sun.COM 			DPRINT(40, "ibd_rc_stop_listen:"
255511642SKevin.Ge@Sun.COM 			    "ibd_deregister_service() failed: %d", ret);
255611534SKevin.Ge@Sun.COM 		} else {
255711534SKevin.Ge@Sun.COM 			state->rc_listen_hdl_OFED_interop = NULL;
255811534SKevin.Ge@Sun.COM 		}
255911534SKevin.Ge@Sun.COM 	}
256011534SKevin.Ge@Sun.COM }
256111534SKevin.Ge@Sun.COM 
256211642SKevin.Ge@Sun.COM void
ibd_rc_close_all_chan(ibd_state_t * state)256311534SKevin.Ge@Sun.COM ibd_rc_close_all_chan(ibd_state_t *state)
256411534SKevin.Ge@Sun.COM {
256511642SKevin.Ge@Sun.COM 	ibd_rc_chan_t *rc_chan;
256613030SKevin.Ge@Sun.COM 	ibd_ace_t *ace, *pre_ace;
256711534SKevin.Ge@Sun.COM 	uint_t attempts;
256811534SKevin.Ge@Sun.COM 
256911534SKevin.Ge@Sun.COM 	/* Disable all Rx routines */
257011534SKevin.Ge@Sun.COM 	mutex_enter(&state->rc_pass_chan_list.chan_list_mutex);
257111534SKevin.Ge@Sun.COM 	rc_chan = state->rc_pass_chan_list.chan_list;
257211534SKevin.Ge@Sun.COM 	while (rc_chan != NULL) {
257311534SKevin.Ge@Sun.COM 		ibt_set_cq_handler(rc_chan->rcq_hdl, 0, 0);
257411534SKevin.Ge@Sun.COM 		rc_chan = rc_chan->next;
257511534SKevin.Ge@Sun.COM 	}
257611534SKevin.Ge@Sun.COM 	mutex_exit(&state->rc_pass_chan_list.chan_list_mutex);
257711534SKevin.Ge@Sun.COM 
257811534SKevin.Ge@Sun.COM 	if (state->rc_enable_srq) {
257911642SKevin.Ge@Sun.COM 		attempts = 10;
258011534SKevin.Ge@Sun.COM 		while (state->rc_srq_rwqe_list.dl_bufs_outstanding > 0) {
258111534SKevin.Ge@Sun.COM 			DPRINT(30, "ibd_rc_close_all_chan: outstanding > 0");
258211534SKevin.Ge@Sun.COM 			delay(drv_usectohz(100000));
258311534SKevin.Ge@Sun.COM 			if (--attempts == 0) {
258411534SKevin.Ge@Sun.COM 				/*
258511534SKevin.Ge@Sun.COM 				 * There are pending bufs with the network
258611534SKevin.Ge@Sun.COM 				 * layer and we have no choice but to wait
258711534SKevin.Ge@Sun.COM 				 * for them to be done with. Reap all the
258811534SKevin.Ge@Sun.COM 				 * Tx/Rx completions that were posted since
258911534SKevin.Ge@Sun.COM 				 * we turned off the notification and
259011534SKevin.Ge@Sun.COM 				 * return failure.
259111534SKevin.Ge@Sun.COM 				 */
259211642SKevin.Ge@Sun.COM 				break;
259311534SKevin.Ge@Sun.COM 			}
259411534SKevin.Ge@Sun.COM 		}
259511534SKevin.Ge@Sun.COM 	}
259611534SKevin.Ge@Sun.COM 
259711534SKevin.Ge@Sun.COM 	/* Close all passive RC channels */
259811534SKevin.Ge@Sun.COM 	rc_chan = ibd_rc_rm_header_chan_list(&state->rc_pass_chan_list);
259911534SKevin.Ge@Sun.COM 	while (rc_chan != NULL) {
260013030SKevin.Ge@Sun.COM 		(void) ibd_rc_pas_close(rc_chan, B_TRUE, B_FALSE);
260111534SKevin.Ge@Sun.COM 		rc_chan = ibd_rc_rm_header_chan_list(&state->rc_pass_chan_list);
260211534SKevin.Ge@Sun.COM 	}
260311534SKevin.Ge@Sun.COM 
260411534SKevin.Ge@Sun.COM 	/* Close all active RC channels */
260511534SKevin.Ge@Sun.COM 	mutex_enter(&state->id_ac_mutex);
260613030SKevin.Ge@Sun.COM 	state->id_ac_hot_ace = NULL;
260711534SKevin.Ge@Sun.COM 	ace = list_head(&state->id_ah_active);
260813030SKevin.Ge@Sun.COM 	while ((pre_ace = ace) != NULL) {
260913030SKevin.Ge@Sun.COM 		ace = list_next(&state->id_ah_active, ace);
261013030SKevin.Ge@Sun.COM 		if (pre_ace->ac_chan != NULL) {
261113030SKevin.Ge@Sun.COM 			INC_REF(pre_ace, 1);
261213030SKevin.Ge@Sun.COM 			IBD_ACACHE_PULLOUT_ACTIVE(state, pre_ace);
261313030SKevin.Ge@Sun.COM 			pre_ace->ac_chan->chan_state = IBD_RC_STATE_ACT_CLOSING;
261411534SKevin.Ge@Sun.COM 			ibd_rc_add_to_chan_list(&state->rc_obs_act_chan_list,
261513030SKevin.Ge@Sun.COM 			    pre_ace->ac_chan);
261611534SKevin.Ge@Sun.COM 		}
261711534SKevin.Ge@Sun.COM 	}
261811534SKevin.Ge@Sun.COM 	mutex_exit(&state->id_ac_mutex);
261911534SKevin.Ge@Sun.COM 
262011534SKevin.Ge@Sun.COM 	rc_chan = ibd_rc_rm_header_chan_list(&state->rc_obs_act_chan_list);
262111534SKevin.Ge@Sun.COM 	while (rc_chan != NULL) {
262211534SKevin.Ge@Sun.COM 		ace = rc_chan->ace;
262313030SKevin.Ge@Sun.COM 		ibd_rc_act_close(rc_chan, B_TRUE);
262413030SKevin.Ge@Sun.COM 		if (ace != NULL) {
262513030SKevin.Ge@Sun.COM 			mutex_enter(&state->id_ac_mutex);
262613030SKevin.Ge@Sun.COM 			ASSERT(ace->ac_ref != 0);
262713030SKevin.Ge@Sun.COM 			atomic_dec_32(&ace->ac_ref);
262811534SKevin.Ge@Sun.COM 			ace->ac_chan = NULL;
262913030SKevin.Ge@Sun.COM 			if ((ace->ac_ref == 0) || (ace->ac_ref == CYCLEVAL)) {
263013030SKevin.Ge@Sun.COM 				IBD_ACACHE_INSERT_FREE(state, ace);
263113030SKevin.Ge@Sun.COM 				ace->ac_ref = 0;
263213030SKevin.Ge@Sun.COM 			} else {
263313030SKevin.Ge@Sun.COM 				ace->ac_ref |= CYCLEVAL;
263413030SKevin.Ge@Sun.COM 				state->rc_delay_ace_recycle++;
263513030SKevin.Ge@Sun.COM 			}
263613030SKevin.Ge@Sun.COM 			mutex_exit(&state->id_ac_mutex);
263713030SKevin.Ge@Sun.COM 		}
263811534SKevin.Ge@Sun.COM 		rc_chan = ibd_rc_rm_header_chan_list(
263911534SKevin.Ge@Sun.COM 		    &state->rc_obs_act_chan_list);
264011534SKevin.Ge@Sun.COM 	}
264113030SKevin.Ge@Sun.COM 
264213030SKevin.Ge@Sun.COM 	attempts = 400;
264313030SKevin.Ge@Sun.COM 	while (((state->rc_num_tx_chan != 0) ||
264413030SKevin.Ge@Sun.COM 	    (state->rc_num_rx_chan != 0)) && (attempts > 0)) {
264513030SKevin.Ge@Sun.COM 		/* Other thread is closing CM channel, wait it */
264613030SKevin.Ge@Sun.COM 		delay(drv_usectohz(100000));
264713030SKevin.Ge@Sun.COM 		attempts--;
264813030SKevin.Ge@Sun.COM 	}
264911534SKevin.Ge@Sun.COM }
265011534SKevin.Ge@Sun.COM 
265111534SKevin.Ge@Sun.COM void
ibd_rc_try_connect(ibd_state_t * state,ibd_ace_t * ace,ibt_path_info_t * path)265211534SKevin.Ge@Sun.COM ibd_rc_try_connect(ibd_state_t *state, ibd_ace_t *ace,  ibt_path_info_t *path)
265311534SKevin.Ge@Sun.COM {
265411534SKevin.Ge@Sun.COM 	ibt_status_t status;
265511534SKevin.Ge@Sun.COM 
265613030SKevin.Ge@Sun.COM 	if ((state->id_mac_state & IBD_DRV_STARTED) == 0)
265713030SKevin.Ge@Sun.COM 		return;
265813030SKevin.Ge@Sun.COM 
265911534SKevin.Ge@Sun.COM 	status = ibd_rc_connect(state, ace, path,
266011534SKevin.Ge@Sun.COM 	    IBD_RC_SERVICE_ID_OFED_INTEROP);
266111534SKevin.Ge@Sun.COM 
266211534SKevin.Ge@Sun.COM 	if (status != IBT_SUCCESS) {
266311534SKevin.Ge@Sun.COM 		/* wait peer side remove stale channel */
266411534SKevin.Ge@Sun.COM 		delay(drv_usectohz(10000));
266513030SKevin.Ge@Sun.COM 		if ((state->id_mac_state & IBD_DRV_STARTED) == 0)
266613030SKevin.Ge@Sun.COM 			return;
266711534SKevin.Ge@Sun.COM 		status = ibd_rc_connect(state, ace, path,
266811534SKevin.Ge@Sun.COM 		    IBD_RC_SERVICE_ID_OFED_INTEROP);
266911534SKevin.Ge@Sun.COM 	}
267011534SKevin.Ge@Sun.COM 
267111534SKevin.Ge@Sun.COM 	if (status != IBT_SUCCESS) {
267211534SKevin.Ge@Sun.COM 		/* wait peer side remove stale channel */
267311534SKevin.Ge@Sun.COM 		delay(drv_usectohz(10000));
267413030SKevin.Ge@Sun.COM 		if ((state->id_mac_state & IBD_DRV_STARTED) == 0)
267513030SKevin.Ge@Sun.COM 			return;
267611534SKevin.Ge@Sun.COM 		(void) ibd_rc_connect(state, ace, path,
267711534SKevin.Ge@Sun.COM 		    IBD_RC_SERVICE_ID);
267811534SKevin.Ge@Sun.COM 	}
267911534SKevin.Ge@Sun.COM }
268011534SKevin.Ge@Sun.COM 
268111534SKevin.Ge@Sun.COM /*
268211534SKevin.Ge@Sun.COM  * Allocates channel and sets the ace->ac_chan to it.
268311534SKevin.Ge@Sun.COM  * Opens the channel.
268411534SKevin.Ge@Sun.COM  */
268511534SKevin.Ge@Sun.COM ibt_status_t
ibd_rc_connect(ibd_state_t * state,ibd_ace_t * ace,ibt_path_info_t * path,uint64_t ietf_cm_service_id)268611534SKevin.Ge@Sun.COM ibd_rc_connect(ibd_state_t *state, ibd_ace_t *ace,  ibt_path_info_t *path,
268711534SKevin.Ge@Sun.COM     uint64_t ietf_cm_service_id)
268811534SKevin.Ge@Sun.COM {
268911534SKevin.Ge@Sun.COM 	ibt_status_t status = 0;
269011534SKevin.Ge@Sun.COM 	ibt_rc_returns_t open_returns;
269111534SKevin.Ge@Sun.COM 	ibt_chan_open_args_t open_args;
269211534SKevin.Ge@Sun.COM 	ibd_rc_msg_hello_t hello_req_msg;
269311534SKevin.Ge@Sun.COM 	ibd_rc_msg_hello_t *hello_ack_msg;
269411534SKevin.Ge@Sun.COM 	ibd_rc_chan_t *chan;
269513030SKevin.Ge@Sun.COM 	ibt_ud_dest_query_attr_t dest_attrs;
269611534SKevin.Ge@Sun.COM 
269711534SKevin.Ge@Sun.COM 	ASSERT(ace != NULL);
269811534SKevin.Ge@Sun.COM 	ASSERT(ace->ac_mce == NULL);
269911534SKevin.Ge@Sun.COM 	ASSERT(ace->ac_chan == NULL);
270011534SKevin.Ge@Sun.COM 
270111534SKevin.Ge@Sun.COM 	if ((status = ibd_rc_alloc_chan(&chan, state, B_TRUE)) != IBT_SUCCESS) {
270211534SKevin.Ge@Sun.COM 		DPRINT(10, "ibd_rc_connect: ibd_rc_alloc_chan() failed");
270311534SKevin.Ge@Sun.COM 		return (status);
270411534SKevin.Ge@Sun.COM 	}
270511534SKevin.Ge@Sun.COM 
270611534SKevin.Ge@Sun.COM 	ace->ac_chan = chan;
270711534SKevin.Ge@Sun.COM 	chan->state = state;
270811534SKevin.Ge@Sun.COM 	chan->ace = ace;
270911534SKevin.Ge@Sun.COM 
271011534SKevin.Ge@Sun.COM 	ibt_set_chan_private(chan->chan_hdl, (void *)(uintptr_t)ace);
271111534SKevin.Ge@Sun.COM 
271211534SKevin.Ge@Sun.COM 	hello_ack_msg = kmem_zalloc(sizeof (ibd_rc_msg_hello_t), KM_SLEEP);
271311534SKevin.Ge@Sun.COM 
271411534SKevin.Ge@Sun.COM 	/*
271511534SKevin.Ge@Sun.COM 	 * open the channels
271611534SKevin.Ge@Sun.COM 	 */
271711534SKevin.Ge@Sun.COM 	bzero(&open_args, sizeof (ibt_chan_open_args_t));
271811534SKevin.Ge@Sun.COM 	bzero(&open_returns, sizeof (ibt_rc_returns_t));
271911534SKevin.Ge@Sun.COM 
272011534SKevin.Ge@Sun.COM 	open_args.oc_cm_handler = ibd_rc_dispatch_actv_mad;
272111534SKevin.Ge@Sun.COM 	open_args.oc_cm_clnt_private = (void *)(uintptr_t)ace;
272211534SKevin.Ge@Sun.COM 
272311534SKevin.Ge@Sun.COM 	/*
272411534SKevin.Ge@Sun.COM 	 * update path record with the SID
272511534SKevin.Ge@Sun.COM 	 */
272613030SKevin.Ge@Sun.COM 	if ((status = ibt_query_ud_dest(ace->ac_dest, &dest_attrs))
272713030SKevin.Ge@Sun.COM 	    != IBT_SUCCESS) {
272813030SKevin.Ge@Sun.COM 		DPRINT(40, "ibd_rc_connect: ibt_query_ud_dest() failed, "
272913030SKevin.Ge@Sun.COM 		    "ret=%d", status);
273013030SKevin.Ge@Sun.COM 		return (status);
273113030SKevin.Ge@Sun.COM 	}
273213030SKevin.Ge@Sun.COM 
273311534SKevin.Ge@Sun.COM 	path->pi_sid =
273413030SKevin.Ge@Sun.COM 	    ietf_cm_service_id | ((dest_attrs.ud_dst_qpn) & 0xffffff);
273511534SKevin.Ge@Sun.COM 
273611534SKevin.Ge@Sun.COM 
273711534SKevin.Ge@Sun.COM 	/* pre-allocate memory for hello ack message */
273811534SKevin.Ge@Sun.COM 	open_returns.rc_priv_data_len = sizeof (ibd_rc_msg_hello_t);
273911534SKevin.Ge@Sun.COM 	open_returns.rc_priv_data = hello_ack_msg;
274011534SKevin.Ge@Sun.COM 
274111534SKevin.Ge@Sun.COM 	open_args.oc_path = path;
274211534SKevin.Ge@Sun.COM 
274313030SKevin.Ge@Sun.COM 	open_args.oc_path_rnr_retry_cnt	= 1;
274413030SKevin.Ge@Sun.COM 	open_args.oc_path_retry_cnt = 1;
274511534SKevin.Ge@Sun.COM 
274611534SKevin.Ge@Sun.COM 	/* We don't do RDMA */
274711534SKevin.Ge@Sun.COM 	open_args.oc_rdma_ra_out = 0;
274811534SKevin.Ge@Sun.COM 	open_args.oc_rdma_ra_in	= 0;
274911534SKevin.Ge@Sun.COM 
275011534SKevin.Ge@Sun.COM 	hello_req_msg.reserved_qpn = htonl(state->id_qpnum);
275111534SKevin.Ge@Sun.COM 	hello_req_msg.rx_mtu = htonl(state->rc_mtu);
275211534SKevin.Ge@Sun.COM 	open_args.oc_priv_data_len = sizeof (ibd_rc_msg_hello_t);
275311534SKevin.Ge@Sun.COM 	open_args.oc_priv_data = (void *)(&hello_req_msg);
275411534SKevin.Ge@Sun.COM 
275511534SKevin.Ge@Sun.COM 	ASSERT(open_args.oc_priv_data_len <= IBT_REQ_PRIV_DATA_SZ);
275611534SKevin.Ge@Sun.COM 	ASSERT(open_returns.rc_priv_data_len <= IBT_REP_PRIV_DATA_SZ);
275711534SKevin.Ge@Sun.COM 	ASSERT(open_args.oc_cm_handler != NULL);
275811534SKevin.Ge@Sun.COM 
275911534SKevin.Ge@Sun.COM 	status = ibt_open_rc_channel(chan->chan_hdl, IBT_OCHAN_NO_FLAGS,
276011534SKevin.Ge@Sun.COM 	    IBT_BLOCKING, &open_args, &open_returns);
276111534SKevin.Ge@Sun.COM 
276211534SKevin.Ge@Sun.COM 	if (status == IBT_SUCCESS) {
276311534SKevin.Ge@Sun.COM 		/* Success! */
276411534SKevin.Ge@Sun.COM 		DPRINT(2, "ibd_rc_connect: call ibt_open_rc_channel succ!");
276511534SKevin.Ge@Sun.COM 		state->rc_conn_succ++;
276611534SKevin.Ge@Sun.COM 		kmem_free(hello_ack_msg, sizeof (ibd_rc_msg_hello_t));
276711534SKevin.Ge@Sun.COM 		return (IBT_SUCCESS);
276811534SKevin.Ge@Sun.COM 	}
276911534SKevin.Ge@Sun.COM 
277011534SKevin.Ge@Sun.COM 	/* failure */
277111534SKevin.Ge@Sun.COM 	(void) ibt_flush_channel(chan->chan_hdl);
277211534SKevin.Ge@Sun.COM 	ibd_rc_free_chan(chan);
277311534SKevin.Ge@Sun.COM 	ace->ac_chan = NULL;
277411534SKevin.Ge@Sun.COM 
277511534SKevin.Ge@Sun.COM 	/* check open_returns report error and exit */
277611534SKevin.Ge@Sun.COM 	DPRINT(30, "ibd_rc_connect: call ibt_open_rc_chan fail."
277711534SKevin.Ge@Sun.COM 	    "ret status = %d, reason=%d, ace=%p, mtu=0x%x, qpn=0x%x,"
277811534SKevin.Ge@Sun.COM 	    " peer qpn=0x%x", status, (int)open_returns.rc_status, ace,
277911534SKevin.Ge@Sun.COM 	    hello_req_msg.rx_mtu, hello_req_msg.reserved_qpn,
278013030SKevin.Ge@Sun.COM 	    dest_attrs.ud_dst_qpn);
278111534SKevin.Ge@Sun.COM 	kmem_free(hello_ack_msg, sizeof (ibd_rc_msg_hello_t));
278211534SKevin.Ge@Sun.COM 	return (status);
278311534SKevin.Ge@Sun.COM }
278411534SKevin.Ge@Sun.COM 
278511534SKevin.Ge@Sun.COM void
ibd_rc_signal_act_close(ibd_state_t * state,ibd_ace_t * ace)278611534SKevin.Ge@Sun.COM ibd_rc_signal_act_close(ibd_state_t *state, ibd_ace_t *ace)
278711534SKevin.Ge@Sun.COM {
278811534SKevin.Ge@Sun.COM 	ibd_req_t *req;
278911534SKevin.Ge@Sun.COM 
279011534SKevin.Ge@Sun.COM 	req = kmem_cache_alloc(state->id_req_kmc, KM_NOSLEEP);
279111534SKevin.Ge@Sun.COM 	if (req == NULL) {
279211534SKevin.Ge@Sun.COM 		ibd_print_warn(state, "ibd_rc_signal_act_close: alloc "
279311534SKevin.Ge@Sun.COM 		    "ibd_req_t fail");
279411534SKevin.Ge@Sun.COM 		mutex_enter(&state->rc_obs_act_chan_list.chan_list_mutex);
279511534SKevin.Ge@Sun.COM 		ace->ac_chan->next = state->rc_obs_act_chan_list.chan_list;
279611534SKevin.Ge@Sun.COM 		state->rc_obs_act_chan_list.chan_list = ace->ac_chan;
279711534SKevin.Ge@Sun.COM 		mutex_exit(&state->rc_obs_act_chan_list.chan_list_mutex);
279811534SKevin.Ge@Sun.COM 	} else {
279911534SKevin.Ge@Sun.COM 		req->rq_ptr = ace->ac_chan;
280011534SKevin.Ge@Sun.COM 		ibd_queue_work_slot(state, req, IBD_ASYNC_RC_CLOSE_ACT_CHAN);
280111534SKevin.Ge@Sun.COM 	}
280211534SKevin.Ge@Sun.COM }
280311534SKevin.Ge@Sun.COM 
280411534SKevin.Ge@Sun.COM void
ibd_rc_signal_ace_recycle(ibd_state_t * state,ibd_ace_t * ace)280511534SKevin.Ge@Sun.COM ibd_rc_signal_ace_recycle(ibd_state_t *state, ibd_ace_t *ace)
280611534SKevin.Ge@Sun.COM {
280711534SKevin.Ge@Sun.COM 	ibd_req_t *req;
280811534SKevin.Ge@Sun.COM 
280911534SKevin.Ge@Sun.COM 	mutex_enter(&state->rc_ace_recycle_lock);
281011534SKevin.Ge@Sun.COM 	if (state->rc_ace_recycle != NULL) {
281111534SKevin.Ge@Sun.COM 		mutex_exit(&state->rc_ace_recycle_lock);
281211534SKevin.Ge@Sun.COM 		return;
281311534SKevin.Ge@Sun.COM 	}
281411534SKevin.Ge@Sun.COM 
281511534SKevin.Ge@Sun.COM 	req = kmem_cache_alloc(state->id_req_kmc, KM_NOSLEEP);
281611534SKevin.Ge@Sun.COM 	if (req == NULL) {
281711534SKevin.Ge@Sun.COM 		mutex_exit(&state->rc_ace_recycle_lock);
281811534SKevin.Ge@Sun.COM 		return;
281911534SKevin.Ge@Sun.COM 	}
282011534SKevin.Ge@Sun.COM 
282111534SKevin.Ge@Sun.COM 	state->rc_ace_recycle = ace;
282211534SKevin.Ge@Sun.COM 	mutex_exit(&state->rc_ace_recycle_lock);
282311534SKevin.Ge@Sun.COM 	ASSERT(ace->ac_mce == NULL);
282411534SKevin.Ge@Sun.COM 	INC_REF(ace, 1);
282511534SKevin.Ge@Sun.COM 	IBD_ACACHE_PULLOUT_ACTIVE(state, ace);
282611534SKevin.Ge@Sun.COM 	req->rq_ptr = ace;
282711534SKevin.Ge@Sun.COM 	ibd_queue_work_slot(state, req, IBD_ASYNC_RC_RECYCLE_ACE);
282811534SKevin.Ge@Sun.COM }
282911534SKevin.Ge@Sun.COM 
283013030SKevin.Ge@Sun.COM /*
283113030SKevin.Ge@Sun.COM  * Close an active channel
283213030SKevin.Ge@Sun.COM  *
283313030SKevin.Ge@Sun.COM  * is_close_rc_chan: if B_TRUE, we will call ibt_close_rc_channel()
283413030SKevin.Ge@Sun.COM  */
283511534SKevin.Ge@Sun.COM static void
ibd_rc_act_close(ibd_rc_chan_t * chan,boolean_t is_close_rc_chan)283613030SKevin.Ge@Sun.COM ibd_rc_act_close(ibd_rc_chan_t *chan, boolean_t is_close_rc_chan)
283711534SKevin.Ge@Sun.COM {
283813030SKevin.Ge@Sun.COM 	ibd_state_t *state;
283913030SKevin.Ge@Sun.COM 	ibd_ace_t *ace;
284011534SKevin.Ge@Sun.COM 	uint_t times;
284111534SKevin.Ge@Sun.COM 	ibt_status_t ret;
284211534SKevin.Ge@Sun.COM 
284311534SKevin.Ge@Sun.COM 	ASSERT(chan != NULL);
284411534SKevin.Ge@Sun.COM 
284511534SKevin.Ge@Sun.COM 	chan->state->rc_act_close++;
284611534SKevin.Ge@Sun.COM 	switch (chan->chan_state) {
284711534SKevin.Ge@Sun.COM 	case IBD_RC_STATE_ACT_CLOSING:	/* stale, close it */
284811534SKevin.Ge@Sun.COM 	case IBD_RC_STATE_ACT_ESTAB:
284911534SKevin.Ge@Sun.COM 		DPRINT(30, "ibd_rc_act_close-1: close and free chan, "
285011534SKevin.Ge@Sun.COM 		    "act_state=%d, chan=%p", chan->chan_state, chan);
285111534SKevin.Ge@Sun.COM 		chan->chan_state = IBD_RC_STATE_ACT_CLOSED;
285211534SKevin.Ge@Sun.COM 		ibt_set_cq_handler(chan->rcq_hdl, 0, 0);
285313030SKevin.Ge@Sun.COM 		/*
285413030SKevin.Ge@Sun.COM 		 * Wait send queue empty. Its old value is 50 (5 seconds). But
285513030SKevin.Ge@Sun.COM 		 * in my experiment, 5 seconds is not enough time to let IBTL
285613030SKevin.Ge@Sun.COM 		 * return all buffers and ace->ac_ref. I tried 25 seconds, it
285713030SKevin.Ge@Sun.COM 		 * works well. As another evidence, I saw IBTL takes about 17
285813030SKevin.Ge@Sun.COM 		 * seconds every time it cleans a stale RC channel.
285913030SKevin.Ge@Sun.COM 		 */
286013030SKevin.Ge@Sun.COM 		times = 250;
286113030SKevin.Ge@Sun.COM 		ace = chan->ace;
286213030SKevin.Ge@Sun.COM 		ASSERT(ace != NULL);
286313030SKevin.Ge@Sun.COM 		state = chan->state;
286413030SKevin.Ge@Sun.COM 		ASSERT(state != NULL);
286513030SKevin.Ge@Sun.COM 		mutex_enter(&state->id_ac_mutex);
286611534SKevin.Ge@Sun.COM 		mutex_enter(&chan->tx_wqe_list.dl_mutex);
286711534SKevin.Ge@Sun.COM 		mutex_enter(&chan->tx_rel_list.dl_mutex);
286811534SKevin.Ge@Sun.COM 		while (((chan->tx_wqe_list.dl_cnt + chan->tx_rel_list.dl_cnt)
286913030SKevin.Ge@Sun.COM 		    != chan->scq_size) || ((ace->ac_ref != 1) &&
287013030SKevin.Ge@Sun.COM 		    (ace->ac_ref != (CYCLEVAL+1)))) {
287111534SKevin.Ge@Sun.COM 			mutex_exit(&chan->tx_rel_list.dl_mutex);
287211534SKevin.Ge@Sun.COM 			mutex_exit(&chan->tx_wqe_list.dl_mutex);
287313030SKevin.Ge@Sun.COM 			mutex_exit(&state->id_ac_mutex);
287413030SKevin.Ge@Sun.COM 			times--;
287513030SKevin.Ge@Sun.COM 			if (times == 0) {
287613030SKevin.Ge@Sun.COM 				state->rc_act_close_not_clean++;
287713030SKevin.Ge@Sun.COM 				DPRINT(40, "ibd_rc_act_close: dl_cnt(tx_wqe_"
287813030SKevin.Ge@Sun.COM 				    "list=%d, tx_rel_list=%d) != chan->"
287913030SKevin.Ge@Sun.COM 				    "scq_size=%d, OR ac_ref(=%d) not clean",
288013030SKevin.Ge@Sun.COM 				    chan->tx_wqe_list.dl_cnt,
288113030SKevin.Ge@Sun.COM 				    chan->tx_rel_list.dl_cnt,
288213030SKevin.Ge@Sun.COM 				    chan->scq_size, ace->ac_ref);
288313030SKevin.Ge@Sun.COM 				break;
288413030SKevin.Ge@Sun.COM 			}
288511534SKevin.Ge@Sun.COM 			mutex_enter(&chan->tx_poll_lock);
288611534SKevin.Ge@Sun.COM 			if (chan->tx_poll_busy & IBD_CQ_POLLING) {
288711534SKevin.Ge@Sun.COM 				DPRINT(40, "ibd_rc_act_close: multiple "
288811534SKevin.Ge@Sun.COM 				    "polling threads");
288911534SKevin.Ge@Sun.COM 				mutex_exit(&chan->tx_poll_lock);
289011534SKevin.Ge@Sun.COM 			} else {
289111534SKevin.Ge@Sun.COM 				chan->tx_poll_busy = IBD_CQ_POLLING;
289211534SKevin.Ge@Sun.COM 				mutex_exit(&chan->tx_poll_lock);
289311534SKevin.Ge@Sun.COM 				ibd_rc_drain_scq(chan, chan->scq_hdl);
289411534SKevin.Ge@Sun.COM 				mutex_enter(&chan->tx_poll_lock);
289511534SKevin.Ge@Sun.COM 				chan->tx_poll_busy = 0;
289611534SKevin.Ge@Sun.COM 				mutex_exit(&chan->tx_poll_lock);
289711534SKevin.Ge@Sun.COM 			}
289811534SKevin.Ge@Sun.COM 			delay(drv_usectohz(100000));
289913030SKevin.Ge@Sun.COM 			mutex_enter(&state->id_ac_mutex);
290011534SKevin.Ge@Sun.COM 			mutex_enter(&chan->tx_wqe_list.dl_mutex);
290111534SKevin.Ge@Sun.COM 			mutex_enter(&chan->tx_rel_list.dl_mutex);
290211534SKevin.Ge@Sun.COM 		}
290313030SKevin.Ge@Sun.COM 		if (times != 0) {
290413030SKevin.Ge@Sun.COM 			mutex_exit(&chan->tx_rel_list.dl_mutex);
290513030SKevin.Ge@Sun.COM 			mutex_exit(&chan->tx_wqe_list.dl_mutex);
290613030SKevin.Ge@Sun.COM 			mutex_exit(&state->id_ac_mutex);
290713030SKevin.Ge@Sun.COM 		}
290813030SKevin.Ge@Sun.COM 
290911534SKevin.Ge@Sun.COM 		ibt_set_cq_handler(chan->scq_hdl, 0, 0);
291013030SKevin.Ge@Sun.COM 		if (is_close_rc_chan) {
291113030SKevin.Ge@Sun.COM 			ret = ibt_close_rc_channel(chan->chan_hdl,
291213030SKevin.Ge@Sun.COM 			    IBT_BLOCKING|IBT_NOCALLBACKS, NULL, 0, NULL, NULL,
291313030SKevin.Ge@Sun.COM 			    0);
291413030SKevin.Ge@Sun.COM 			if (ret != IBT_SUCCESS) {
291513030SKevin.Ge@Sun.COM 				DPRINT(40, "ibd_rc_act_close: ibt_close_rc_"
291613030SKevin.Ge@Sun.COM 				    "channel fail, chan=%p, ret=%d",
291713030SKevin.Ge@Sun.COM 				    chan, ret);
291813030SKevin.Ge@Sun.COM 			} else {
291913030SKevin.Ge@Sun.COM 				DPRINT(30, "ibd_rc_act_close: ibt_close_rc_"
292013030SKevin.Ge@Sun.COM 				    "channel succ, chan=%p", chan);
292113030SKevin.Ge@Sun.COM 			}
292211534SKevin.Ge@Sun.COM 		}
292311534SKevin.Ge@Sun.COM 
292411534SKevin.Ge@Sun.COM 		ibd_rc_free_chan(chan);
292511534SKevin.Ge@Sun.COM 		break;
292611534SKevin.Ge@Sun.COM 	case IBD_RC_STATE_ACT_REP_RECV:
292711534SKevin.Ge@Sun.COM 		chan->chan_state = IBD_RC_STATE_ACT_CLOSED;
292811534SKevin.Ge@Sun.COM 		(void) ibt_flush_channel(chan->chan_hdl);
292911534SKevin.Ge@Sun.COM 		ibd_rc_free_chan(chan);
293011534SKevin.Ge@Sun.COM 		break;
293111534SKevin.Ge@Sun.COM 	case IBD_RC_STATE_ACT_ERROR:
293211534SKevin.Ge@Sun.COM 		DPRINT(40, "ibd_rc_act_close: IBD_RC_STATE_ERROR branch");
293311534SKevin.Ge@Sun.COM 		break;
293411534SKevin.Ge@Sun.COM 	default:
293511534SKevin.Ge@Sun.COM 		DPRINT(40, "ibd_rc_act_close: default branch, act_state=%d, "
293611534SKevin.Ge@Sun.COM 		    "chan=%p", chan->chan_state, chan);
293711534SKevin.Ge@Sun.COM 	}
293811534SKevin.Ge@Sun.COM }
293911534SKevin.Ge@Sun.COM 
294013030SKevin.Ge@Sun.COM /*
294113030SKevin.Ge@Sun.COM  * Close a passive channel
294213030SKevin.Ge@Sun.COM  *
294313030SKevin.Ge@Sun.COM  * is_close_rc_chan: if B_TRUE, we will call ibt_close_rc_channel()
294413030SKevin.Ge@Sun.COM  *
294513030SKevin.Ge@Sun.COM  * is_timeout_close: if B_TRUE, this function is called by the connection
294613030SKevin.Ge@Sun.COM  * reaper (refer to function ibd_rc_conn_timeout_call). When the connection
294713030SKevin.Ge@Sun.COM  * reaper calls ibd_rc_pas_close(), and if it finds that dl_bufs_outstanding
294813030SKevin.Ge@Sun.COM  * or chan->rcq_invoking is non-zero, then it can simply put that channel back
294913030SKevin.Ge@Sun.COM  * on the passive channels list and move on, since it might be an indication
295013030SKevin.Ge@Sun.COM  * that the channel became active again by the time we started it's cleanup.
295113030SKevin.Ge@Sun.COM  * It is costlier to do the cleanup and then reinitiate the channel
295213030SKevin.Ge@Sun.COM  * establishment and hence it will help to be conservative when we do the
295313030SKevin.Ge@Sun.COM  * cleanup.
295413030SKevin.Ge@Sun.COM  */
295513030SKevin.Ge@Sun.COM int
ibd_rc_pas_close(ibd_rc_chan_t * chan,boolean_t is_close_rc_chan,boolean_t is_timeout_close)295613030SKevin.Ge@Sun.COM ibd_rc_pas_close(ibd_rc_chan_t *chan, boolean_t is_close_rc_chan,
295713030SKevin.Ge@Sun.COM     boolean_t is_timeout_close)
295811534SKevin.Ge@Sun.COM {
295911534SKevin.Ge@Sun.COM 	uint_t times;
296011534SKevin.Ge@Sun.COM 	ibt_status_t ret;
296111534SKevin.Ge@Sun.COM 
296211534SKevin.Ge@Sun.COM 	ASSERT(chan != NULL);
296311534SKevin.Ge@Sun.COM 	chan->state->rc_pas_close++;
296411534SKevin.Ge@Sun.COM 
296511534SKevin.Ge@Sun.COM 	switch (chan->chan_state) {
296611534SKevin.Ge@Sun.COM 	case IBD_RC_STATE_PAS_ESTAB:
296713030SKevin.Ge@Sun.COM 		if (is_timeout_close) {
296813030SKevin.Ge@Sun.COM 			if ((chan->rcq_invoking != 0) ||
296913030SKevin.Ge@Sun.COM 			    ((!chan->state->rc_enable_srq) &&
297013030SKevin.Ge@Sun.COM 			    (chan->rx_wqe_list.dl_bufs_outstanding > 0))) {
297113030SKevin.Ge@Sun.COM 				if (ibd_rc_re_add_to_pas_chan_list(chan)) {
297213030SKevin.Ge@Sun.COM 					return (DDI_FAILURE);
297313030SKevin.Ge@Sun.COM 				}
297413030SKevin.Ge@Sun.COM 			}
297513030SKevin.Ge@Sun.COM 		}
297611534SKevin.Ge@Sun.COM 		/*
297711534SKevin.Ge@Sun.COM 		 * First, stop receive interrupts; this stops the
297811534SKevin.Ge@Sun.COM 		 * connection from handing up buffers to higher layers.
297911534SKevin.Ge@Sun.COM 		 * Wait for receive buffers to be returned; give up
298011534SKevin.Ge@Sun.COM 		 * after 5 seconds.
298111534SKevin.Ge@Sun.COM 		 */
298211534SKevin.Ge@Sun.COM 		ibt_set_cq_handler(chan->rcq_hdl, 0, 0);
298313030SKevin.Ge@Sun.COM 		/* Wait 0.01 second to let ibt_set_cq_handler() take effect */
298413030SKevin.Ge@Sun.COM 		delay(drv_usectohz(10000));
298511534SKevin.Ge@Sun.COM 		if (!chan->state->rc_enable_srq) {
298611534SKevin.Ge@Sun.COM 			times = 50;
298711534SKevin.Ge@Sun.COM 			while (chan->rx_wqe_list.dl_bufs_outstanding > 0) {
298811534SKevin.Ge@Sun.COM 				delay(drv_usectohz(100000));
298911534SKevin.Ge@Sun.COM 				if (--times == 0) {
299011534SKevin.Ge@Sun.COM 					DPRINT(40, "ibd_rc_pas_close : "
299111534SKevin.Ge@Sun.COM 					    "reclaiming failed");
299211534SKevin.Ge@Sun.COM 					ibd_rc_poll_rcq(chan, chan->rcq_hdl);
299311534SKevin.Ge@Sun.COM 					ibt_set_cq_handler(chan->rcq_hdl,
299411534SKevin.Ge@Sun.COM 					    ibd_rc_rcq_handler,
299511534SKevin.Ge@Sun.COM 					    (void *)(uintptr_t)chan);
299611534SKevin.Ge@Sun.COM 					return (DDI_FAILURE);
299711534SKevin.Ge@Sun.COM 				}
299811534SKevin.Ge@Sun.COM 			}
299911534SKevin.Ge@Sun.COM 		}
300013030SKevin.Ge@Sun.COM 		times = 50;
300113030SKevin.Ge@Sun.COM 		while (chan->rcq_invoking != 0) {
300213030SKevin.Ge@Sun.COM 			delay(drv_usectohz(100000));
300313030SKevin.Ge@Sun.COM 			if (--times == 0) {
300413030SKevin.Ge@Sun.COM 				DPRINT(40, "ibd_rc_pas_close : "
300513030SKevin.Ge@Sun.COM 				    "rcq handler is being invoked");
300613030SKevin.Ge@Sun.COM 				chan->state->rc_pas_close_rcq_invoking++;
300713030SKevin.Ge@Sun.COM 				break;
300813030SKevin.Ge@Sun.COM 			}
300913030SKevin.Ge@Sun.COM 		}
301011534SKevin.Ge@Sun.COM 		ibt_set_cq_handler(chan->scq_hdl, 0, 0);
301111534SKevin.Ge@Sun.COM 		chan->chan_state = IBD_RC_STATE_PAS_CLOSED;
301211534SKevin.Ge@Sun.COM 		DPRINT(30, "ibd_rc_pas_close-1: close and free chan, "
301311534SKevin.Ge@Sun.COM 		    "chan_state=%d, chan=%p", chan->chan_state, chan);
301413030SKevin.Ge@Sun.COM 		if (is_close_rc_chan) {
301513030SKevin.Ge@Sun.COM 			ret = ibt_close_rc_channel(chan->chan_hdl,
301613030SKevin.Ge@Sun.COM 			    IBT_BLOCKING|IBT_NOCALLBACKS, NULL, 0, NULL, NULL,
301713030SKevin.Ge@Sun.COM 			    0);
301813030SKevin.Ge@Sun.COM 			if (ret != IBT_SUCCESS) {
301913030SKevin.Ge@Sun.COM 				DPRINT(40, "ibd_rc_pas_close: ibt_close_rc_"
302013030SKevin.Ge@Sun.COM 				    "channel() fail, chan=%p, ret=%d", chan,
302113030SKevin.Ge@Sun.COM 				    ret);
302213030SKevin.Ge@Sun.COM 			} else {
302313030SKevin.Ge@Sun.COM 				DPRINT(30, "ibd_rc_pas_close: ibt_close_rc_"
302413030SKevin.Ge@Sun.COM 				    "channel() succ, chan=%p", chan);
302513030SKevin.Ge@Sun.COM 			}
302611534SKevin.Ge@Sun.COM 		}
302711534SKevin.Ge@Sun.COM 		ibd_rc_free_chan(chan);
302811534SKevin.Ge@Sun.COM 		break;
302911534SKevin.Ge@Sun.COM 	case IBD_RC_STATE_PAS_REQ_RECV:
303011534SKevin.Ge@Sun.COM 		chan->chan_state = IBD_RC_STATE_PAS_CLOSED;
303111534SKevin.Ge@Sun.COM 		(void) ibt_flush_channel(chan->chan_hdl);
303211534SKevin.Ge@Sun.COM 		ibd_rc_free_chan(chan);
303311534SKevin.Ge@Sun.COM 		break;
303411534SKevin.Ge@Sun.COM 	default:
303511534SKevin.Ge@Sun.COM 		DPRINT(40, "ibd_rc_pas_close: default, chan_state=%d, chan=%p",
303611534SKevin.Ge@Sun.COM 		    chan->chan_state, chan);
303711534SKevin.Ge@Sun.COM 	}
303811534SKevin.Ge@Sun.COM 	return (DDI_SUCCESS);
303911534SKevin.Ge@Sun.COM }
304011534SKevin.Ge@Sun.COM 
304111534SKevin.Ge@Sun.COM /*
304211534SKevin.Ge@Sun.COM  * Passive Side:
304311534SKevin.Ge@Sun.COM  *	Handle an incoming CM REQ from active side.
304411534SKevin.Ge@Sun.COM  *
304511534SKevin.Ge@Sun.COM  *	If success, this function allocates an ibd_rc_chan_t, then
304611534SKevin.Ge@Sun.COM  * assigns it to "*ret_conn".
304711534SKevin.Ge@Sun.COM  */
304811534SKevin.Ge@Sun.COM static ibt_cm_status_t
ibd_rc_handle_req(void * arg,ibd_rc_chan_t ** ret_conn,ibt_cm_event_t * ibt_cm_event,ibt_cm_return_args_t * ret_args,void * ret_priv_data)304911534SKevin.Ge@Sun.COM ibd_rc_handle_req(void *arg, ibd_rc_chan_t **ret_conn,
305011534SKevin.Ge@Sun.COM     ibt_cm_event_t *ibt_cm_event, ibt_cm_return_args_t *ret_args,
305111534SKevin.Ge@Sun.COM     void *ret_priv_data)
305211534SKevin.Ge@Sun.COM {
305311534SKevin.Ge@Sun.COM 	ibd_rc_msg_hello_t *hello_msg;
305411534SKevin.Ge@Sun.COM 	ibd_state_t *state = (ibd_state_t *)arg;
305511534SKevin.Ge@Sun.COM 	ibd_rc_chan_t *chan;
305611534SKevin.Ge@Sun.COM 
305711534SKevin.Ge@Sun.COM 	if (ibd_rc_alloc_chan(&chan, state, B_FALSE) != IBT_SUCCESS) {
305811534SKevin.Ge@Sun.COM 		DPRINT(40, "ibd_rc_handle_req: ibd_rc_alloc_chan() failed");
305911534SKevin.Ge@Sun.COM 		return (IBT_CM_REJECT);
306011534SKevin.Ge@Sun.COM 	}
306111534SKevin.Ge@Sun.COM 
306211534SKevin.Ge@Sun.COM 	ibd_rc_add_to_chan_list(&state->rc_pass_chan_list, chan);
306311534SKevin.Ge@Sun.COM 
306411534SKevin.Ge@Sun.COM 	ibt_set_chan_private(chan->chan_hdl, (void *)(uintptr_t)chan);
306511534SKevin.Ge@Sun.COM 
306611534SKevin.Ge@Sun.COM 	if (!state->rc_enable_srq) {
306711534SKevin.Ge@Sun.COM 		if (ibd_rc_init_rxlist(chan) != DDI_SUCCESS) {
306811534SKevin.Ge@Sun.COM 			ibd_rc_free_chan(chan);
306911534SKevin.Ge@Sun.COM 			DPRINT(40, "ibd_rc_handle_req: ibd_rc_init_rxlist() "
307011534SKevin.Ge@Sun.COM 			    "failed");
307111534SKevin.Ge@Sun.COM 			return (IBT_CM_REJECT);
307211534SKevin.Ge@Sun.COM 		}
307311534SKevin.Ge@Sun.COM 	}
307411534SKevin.Ge@Sun.COM 
307511534SKevin.Ge@Sun.COM 	ret_args->cm_ret.rep.cm_channel = chan->chan_hdl;
307611534SKevin.Ge@Sun.COM 
307711534SKevin.Ge@Sun.COM 	/* We don't do RDMA */
307811534SKevin.Ge@Sun.COM 	ret_args->cm_ret.rep.cm_rdma_ra_out = 0;
307911534SKevin.Ge@Sun.COM 	ret_args->cm_ret.rep.cm_rdma_ra_in = 0;
308011534SKevin.Ge@Sun.COM 
308111534SKevin.Ge@Sun.COM 	ret_args->cm_ret.rep.cm_rnr_retry_cnt = 7;
308211534SKevin.Ge@Sun.COM 	ret_args->cm_ret_len = sizeof (ibd_rc_msg_hello_t);
308311534SKevin.Ge@Sun.COM 
308411534SKevin.Ge@Sun.COM 	hello_msg = (ibd_rc_msg_hello_t *)ibt_cm_event->cm_priv_data;
308511534SKevin.Ge@Sun.COM 	DPRINT(30, "ibd_rc_handle_req(): peer qpn=0x%x, peer mtu=0x%x",
308611534SKevin.Ge@Sun.COM 	    ntohl(hello_msg->reserved_qpn), ntohl(hello_msg->rx_mtu));
308711534SKevin.Ge@Sun.COM 
308811534SKevin.Ge@Sun.COM 	hello_msg = (ibd_rc_msg_hello_t *)ret_priv_data;
308911534SKevin.Ge@Sun.COM 	hello_msg->reserved_qpn = htonl(state->id_qpnum);
309011534SKevin.Ge@Sun.COM 	hello_msg->rx_mtu = htonl(state->rc_mtu);
309111534SKevin.Ge@Sun.COM 
309211534SKevin.Ge@Sun.COM 	chan->chan_state = IBD_RC_STATE_PAS_REQ_RECV;	/* ready to receive */
309311534SKevin.Ge@Sun.COM 	*ret_conn = chan;
309411534SKevin.Ge@Sun.COM 
309511534SKevin.Ge@Sun.COM 	return (IBT_CM_ACCEPT);
309611534SKevin.Ge@Sun.COM }
309711534SKevin.Ge@Sun.COM 
309811534SKevin.Ge@Sun.COM /*
309911534SKevin.Ge@Sun.COM  * ibd_rc_handle_act_estab -- handler for connection established completion
310011534SKevin.Ge@Sun.COM  * for active side.
310111534SKevin.Ge@Sun.COM  */
310211534SKevin.Ge@Sun.COM static ibt_cm_status_t
ibd_rc_handle_act_estab(ibd_ace_t * ace)310311534SKevin.Ge@Sun.COM ibd_rc_handle_act_estab(ibd_ace_t *ace)
310411534SKevin.Ge@Sun.COM {
310511534SKevin.Ge@Sun.COM 	ibt_status_t result;
310611534SKevin.Ge@Sun.COM 
310711534SKevin.Ge@Sun.COM 	switch (ace->ac_chan->chan_state) {
310811534SKevin.Ge@Sun.COM 		case IBD_RC_STATE_ACT_REP_RECV:
310911534SKevin.Ge@Sun.COM 			ace->ac_chan->chan_state = IBD_RC_STATE_ACT_ESTAB;
311011534SKevin.Ge@Sun.COM 			result = ibt_enable_cq_notify(ace->ac_chan->rcq_hdl,
311111534SKevin.Ge@Sun.COM 			    IBT_NEXT_COMPLETION);
311211534SKevin.Ge@Sun.COM 			if (result != IBT_SUCCESS) {
311311534SKevin.Ge@Sun.COM 				DPRINT(40, "ibd_rc_handle_act_estab: "
311411534SKevin.Ge@Sun.COM 				    "ibt_enable_cq_notify(rcq) "
311511534SKevin.Ge@Sun.COM 				    "failed: status %d", result);
311611534SKevin.Ge@Sun.COM 				return (IBT_CM_REJECT);
311711534SKevin.Ge@Sun.COM 			}
311811534SKevin.Ge@Sun.COM 			break;
311911534SKevin.Ge@Sun.COM 		default:
312011534SKevin.Ge@Sun.COM 			DPRINT(40, "ibd_rc_handle_act_estab: default "
312111534SKevin.Ge@Sun.COM 			    "branch, act_state=%d", ace->ac_chan->chan_state);
312211534SKevin.Ge@Sun.COM 			return (IBT_CM_REJECT);
312311534SKevin.Ge@Sun.COM 	}
312411534SKevin.Ge@Sun.COM 	return (IBT_CM_ACCEPT);
312511534SKevin.Ge@Sun.COM }
312611534SKevin.Ge@Sun.COM 
312711534SKevin.Ge@Sun.COM /*
312811534SKevin.Ge@Sun.COM  * ibd_rc_handle_pas_estab -- handler for connection established completion
312911534SKevin.Ge@Sun.COM  * for passive side.
313011534SKevin.Ge@Sun.COM  */
313111534SKevin.Ge@Sun.COM static ibt_cm_status_t
ibd_rc_handle_pas_estab(ibd_rc_chan_t * chan)313211534SKevin.Ge@Sun.COM ibd_rc_handle_pas_estab(ibd_rc_chan_t *chan)
313311534SKevin.Ge@Sun.COM {
313411534SKevin.Ge@Sun.COM 	ibt_status_t result;
313511534SKevin.Ge@Sun.COM 
313611534SKevin.Ge@Sun.COM 	switch (chan->chan_state) {
313711534SKevin.Ge@Sun.COM 		case IBD_RC_STATE_PAS_REQ_RECV:
313811534SKevin.Ge@Sun.COM 			chan->chan_state = IBD_RC_STATE_PAS_ESTAB;
313911534SKevin.Ge@Sun.COM 
314011534SKevin.Ge@Sun.COM 			result = ibt_enable_cq_notify(chan->rcq_hdl,
314111534SKevin.Ge@Sun.COM 			    IBT_NEXT_COMPLETION);
314211534SKevin.Ge@Sun.COM 			if (result != IBT_SUCCESS) {
314311534SKevin.Ge@Sun.COM 				DPRINT(40, "ibd_rc_handle_pas_estab: "
314411534SKevin.Ge@Sun.COM 				    "ibt_enable_cq_notify(rcq) "
314511534SKevin.Ge@Sun.COM 				    "failed: status %d", result);
314611534SKevin.Ge@Sun.COM 				return (IBT_CM_REJECT);
314711534SKevin.Ge@Sun.COM 			}
314811534SKevin.Ge@Sun.COM 			break;
314911534SKevin.Ge@Sun.COM 		default:
315011534SKevin.Ge@Sun.COM 			DPRINT(40, "ibd_rc_handle_pas_estab: default "
315111534SKevin.Ge@Sun.COM 			    "branch, chan_state=%d", chan->chan_state);
315211534SKevin.Ge@Sun.COM 			return (IBT_CM_REJECT);
315311534SKevin.Ge@Sun.COM 	}
315411534SKevin.Ge@Sun.COM 	return (IBT_CM_ACCEPT);
315511534SKevin.Ge@Sun.COM }
315611534SKevin.Ge@Sun.COM 
315711534SKevin.Ge@Sun.COM /* ARGSUSED */
315811534SKevin.Ge@Sun.COM static ibt_cm_status_t
ibd_rc_dispatch_actv_mad(void * arg,ibt_cm_event_t * ibt_cm_event,ibt_cm_return_args_t * ret_args,void * ret_priv_data,ibt_priv_data_len_t ret_len_max)315911534SKevin.Ge@Sun.COM ibd_rc_dispatch_actv_mad(void *arg, ibt_cm_event_t *ibt_cm_event,
316011534SKevin.Ge@Sun.COM     ibt_cm_return_args_t *ret_args, void *ret_priv_data,
316111534SKevin.Ge@Sun.COM     ibt_priv_data_len_t ret_len_max)
316211534SKevin.Ge@Sun.COM {
316311534SKevin.Ge@Sun.COM 	ibt_cm_status_t result = IBT_CM_ACCEPT;
316411534SKevin.Ge@Sun.COM 	ibd_ace_t *ace = (ibd_ace_t *)(uintptr_t)arg;
316511534SKevin.Ge@Sun.COM 	ibd_rc_chan_t *rc_chan;
316611534SKevin.Ge@Sun.COM 	ibd_state_t *state;
316711534SKevin.Ge@Sun.COM 	ibd_rc_msg_hello_t *hello_ack;
316811534SKevin.Ge@Sun.COM 
316911534SKevin.Ge@Sun.COM 	switch (ibt_cm_event->cm_type) {
317011534SKevin.Ge@Sun.COM 	case IBT_CM_EVENT_REP_RCV:
317111534SKevin.Ge@Sun.COM 		ASSERT(ace->ac_chan != NULL);
317211534SKevin.Ge@Sun.COM 		ASSERT(ace->ac_chan->chan_state == IBD_RC_STATE_INIT);
317311534SKevin.Ge@Sun.COM 		hello_ack = (ibd_rc_msg_hello_t *)ibt_cm_event->cm_priv_data;
317411534SKevin.Ge@Sun.COM 		DPRINT(30, "ibd_rc_handle_rep: hello_ack->mtu=0x%x, "
317511534SKevin.Ge@Sun.COM 		    "hello_ack->qpn=0x%x", ntohl(hello_ack->rx_mtu),
317611534SKevin.Ge@Sun.COM 		    ntohl(hello_ack->reserved_qpn));
317711534SKevin.Ge@Sun.COM 		ace->ac_chan->chan_state = IBD_RC_STATE_ACT_REP_RECV;
317811534SKevin.Ge@Sun.COM 		break;
317911534SKevin.Ge@Sun.COM 
318011534SKevin.Ge@Sun.COM 	case IBT_CM_EVENT_CONN_EST:
318111534SKevin.Ge@Sun.COM 		ASSERT(ace->ac_chan != NULL);
318211534SKevin.Ge@Sun.COM 		DPRINT(30, "ibd_rc_dispatch_actv_mad: IBT_CM_EVENT_CONN_EST, "
318311534SKevin.Ge@Sun.COM 		    "ace=%p, act_state=%d, chan=%p",
318411534SKevin.Ge@Sun.COM 		    ace, ace->ac_chan->chan_state, ace->ac_chan);
318511534SKevin.Ge@Sun.COM 		result = ibd_rc_handle_act_estab(ace);
318611534SKevin.Ge@Sun.COM 		break;
318711534SKevin.Ge@Sun.COM 
318811534SKevin.Ge@Sun.COM 	case IBT_CM_EVENT_CONN_CLOSED:
318911534SKevin.Ge@Sun.COM 		rc_chan = ace->ac_chan;
319011534SKevin.Ge@Sun.COM 		if (rc_chan == NULL) {
319111534SKevin.Ge@Sun.COM 			DPRINT(40, "ibd_rc_dispatch_actv_mad: "
319211534SKevin.Ge@Sun.COM 			    "rc_chan==NULL, IBT_CM_EVENT_CONN_CLOSED");
319311534SKevin.Ge@Sun.COM 			return (IBT_CM_ACCEPT);
319411534SKevin.Ge@Sun.COM 		}
319511534SKevin.Ge@Sun.COM 		state = rc_chan->state;
319611534SKevin.Ge@Sun.COM 		mutex_enter(&state->id_ac_mutex);
319711534SKevin.Ge@Sun.COM 		if ((rc_chan->chan_state == IBD_RC_STATE_ACT_ESTAB) &&
319811534SKevin.Ge@Sun.COM 		    ((ace = ibd_acache_find(state, &ace->ac_mac, B_FALSE, 0))
319911534SKevin.Ge@Sun.COM 		    != NULL) && (ace == rc_chan->ace)) {
320011534SKevin.Ge@Sun.COM 			rc_chan->chan_state = IBD_RC_STATE_ACT_CLOSING;
320111534SKevin.Ge@Sun.COM 			ASSERT(ace->ac_mce == NULL);
320211534SKevin.Ge@Sun.COM 			INC_REF(ace, 1);
320311534SKevin.Ge@Sun.COM 			IBD_ACACHE_PULLOUT_ACTIVE(state, ace);
320411534SKevin.Ge@Sun.COM 			mutex_exit(&state->id_ac_mutex);
320511534SKevin.Ge@Sun.COM 			DPRINT(30, "ibd_rc_dispatch_actv_mad: "
320611534SKevin.Ge@Sun.COM 			    "IBT_CM_EVENT_CONN_CLOSED, ace=%p, chan=%p, "
320711534SKevin.Ge@Sun.COM 			    "reason=%d", ace, rc_chan,
320811534SKevin.Ge@Sun.COM 			    ibt_cm_event->cm_event.closed);
320911534SKevin.Ge@Sun.COM 		} else {
321011534SKevin.Ge@Sun.COM 			mutex_exit(&state->id_ac_mutex);
321111534SKevin.Ge@Sun.COM 			state->rc_act_close_simultaneous++;
321211534SKevin.Ge@Sun.COM 			DPRINT(40, "ibd_rc_dispatch_actv_mad: other thread "
321311534SKevin.Ge@Sun.COM 			    "is closing it, IBT_CM_EVENT_CONN_CLOSED, "
321411534SKevin.Ge@Sun.COM 			    "chan_state=%d", rc_chan->chan_state);
321511534SKevin.Ge@Sun.COM 			return (IBT_CM_ACCEPT);
321611534SKevin.Ge@Sun.COM 		}
321713030SKevin.Ge@Sun.COM 		ibd_rc_act_close(rc_chan, B_FALSE);
321811534SKevin.Ge@Sun.COM 		mutex_enter(&state->id_ac_mutex);
321911534SKevin.Ge@Sun.COM 		ace->ac_chan = NULL;
322011534SKevin.Ge@Sun.COM 		ASSERT(ace->ac_ref != 0);
322111534SKevin.Ge@Sun.COM 		atomic_dec_32(&ace->ac_ref);
322211534SKevin.Ge@Sun.COM 		if ((ace->ac_ref == 0) || (ace->ac_ref == CYCLEVAL)) {
322311534SKevin.Ge@Sun.COM 			IBD_ACACHE_INSERT_FREE(state, ace);
322411534SKevin.Ge@Sun.COM 			ace->ac_ref = 0;
322511534SKevin.Ge@Sun.COM 		} else {
322611534SKevin.Ge@Sun.COM 			ace->ac_ref |= CYCLEVAL;
322711534SKevin.Ge@Sun.COM 			state->rc_delay_ace_recycle++;
322811534SKevin.Ge@Sun.COM 		}
322911534SKevin.Ge@Sun.COM 		mutex_exit(&state->id_ac_mutex);
323011534SKevin.Ge@Sun.COM 		break;
323111534SKevin.Ge@Sun.COM 
323211534SKevin.Ge@Sun.COM 	case IBT_CM_EVENT_FAILURE:
323311534SKevin.Ge@Sun.COM 		DPRINT(30, "ibd_rc_dispatch_actv_mad: IBT_CM_EVENT_FAILURE,"
323411534SKevin.Ge@Sun.COM 		    "ace=%p, chan=%p, code: %d, msg: %d, reason=%d",
323511534SKevin.Ge@Sun.COM 		    ace, ace->ac_chan,
323611534SKevin.Ge@Sun.COM 		    ibt_cm_event->cm_event.failed.cf_code,
323711534SKevin.Ge@Sun.COM 		    ibt_cm_event->cm_event.failed.cf_msg,
323811534SKevin.Ge@Sun.COM 		    ibt_cm_event->cm_event.failed.cf_reason);
323911534SKevin.Ge@Sun.COM 		/*
324011534SKevin.Ge@Sun.COM 		 * Don't need free resource here. The resource is freed
324111534SKevin.Ge@Sun.COM 		 * at function ibd_rc_connect()
324211534SKevin.Ge@Sun.COM 		 */
324311534SKevin.Ge@Sun.COM 		break;
324411534SKevin.Ge@Sun.COM 
324511534SKevin.Ge@Sun.COM 	case IBT_CM_EVENT_MRA_RCV:
324611534SKevin.Ge@Sun.COM 		DPRINT(40, "ibd_rc_dispatch_actv_mad: IBT_CM_EVENT_MRA_RCV");
324711534SKevin.Ge@Sun.COM 		break;
324811534SKevin.Ge@Sun.COM 	case IBT_CM_EVENT_LAP_RCV:
324911534SKevin.Ge@Sun.COM 		DPRINT(40, "ibd_rc_dispatch_actv_mad: LAP message received");
325011534SKevin.Ge@Sun.COM 		break;
325111534SKevin.Ge@Sun.COM 	case IBT_CM_EVENT_APR_RCV:
325211534SKevin.Ge@Sun.COM 		DPRINT(40, "ibd_rc_dispatch_actv_mad: APR message received");
325311534SKevin.Ge@Sun.COM 		break;
325411534SKevin.Ge@Sun.COM 	default:
325511534SKevin.Ge@Sun.COM 		DPRINT(40, "ibd_rc_dispatch_actv_mad: default branch, "
325611534SKevin.Ge@Sun.COM 		    "ibt_cm_event->cm_type=%d", ibt_cm_event->cm_type);
325711534SKevin.Ge@Sun.COM 		break;
325811534SKevin.Ge@Sun.COM 	}
325911534SKevin.Ge@Sun.COM 
326011534SKevin.Ge@Sun.COM 	return (result);
326111534SKevin.Ge@Sun.COM }
326211534SKevin.Ge@Sun.COM 
326311534SKevin.Ge@Sun.COM /* ARGSUSED */
326411534SKevin.Ge@Sun.COM static ibt_cm_status_t
ibd_rc_dispatch_pass_mad(void * arg,ibt_cm_event_t * ibt_cm_event,ibt_cm_return_args_t * ret_args,void * ret_priv_data,ibt_priv_data_len_t ret_len_max)326511534SKevin.Ge@Sun.COM ibd_rc_dispatch_pass_mad(void *arg, ibt_cm_event_t *ibt_cm_event,
326611534SKevin.Ge@Sun.COM     ibt_cm_return_args_t *ret_args, void *ret_priv_data,
326711534SKevin.Ge@Sun.COM     ibt_priv_data_len_t ret_len_max)
326811534SKevin.Ge@Sun.COM {
326911534SKevin.Ge@Sun.COM 	ibt_cm_status_t result = IBT_CM_ACCEPT;
327011534SKevin.Ge@Sun.COM 	ibd_rc_chan_t *chan;
327111534SKevin.Ge@Sun.COM 
327211534SKevin.Ge@Sun.COM 	if (ibt_cm_event->cm_type == IBT_CM_EVENT_REQ_RCV) {
327311534SKevin.Ge@Sun.COM 		DPRINT(30, "ibd_rc_dispatch_pass_mad: IBT_CM_EVENT_REQ_RCV,"
327411534SKevin.Ge@Sun.COM 		    "req_pkey=%x", ibt_cm_event->cm_event.req.req_pkey);
327511534SKevin.Ge@Sun.COM 		/* Receive an incoming CM REQ from active side */
327611534SKevin.Ge@Sun.COM 		result = ibd_rc_handle_req(arg, &chan, ibt_cm_event, ret_args,
327711534SKevin.Ge@Sun.COM 		    ret_priv_data);
327811534SKevin.Ge@Sun.COM 		return (result);
327911534SKevin.Ge@Sun.COM 	}
328011534SKevin.Ge@Sun.COM 
328111534SKevin.Ge@Sun.COM 	if (ibt_cm_event->cm_channel == 0) {
328211534SKevin.Ge@Sun.COM 		DPRINT(30, "ibd_rc_dispatch_pass_mad: "
328311534SKevin.Ge@Sun.COM 		    "ERROR ibt_cm_event->cm_channel == 0");
328411534SKevin.Ge@Sun.COM 		return (IBT_CM_REJECT);
328511534SKevin.Ge@Sun.COM 	}
328611534SKevin.Ge@Sun.COM 
328711534SKevin.Ge@Sun.COM 	chan =
328811534SKevin.Ge@Sun.COM 	    (ibd_rc_chan_t *)ibt_get_chan_private(ibt_cm_event->cm_channel);
328911534SKevin.Ge@Sun.COM 	if (chan == NULL) {
329011534SKevin.Ge@Sun.COM 		DPRINT(40, "ibd_rc_dispatch_pass_mad: conn == 0");
329111534SKevin.Ge@Sun.COM 		return (IBT_CM_REJECT);
329211534SKevin.Ge@Sun.COM 	}
329311534SKevin.Ge@Sun.COM 
329411534SKevin.Ge@Sun.COM 	switch (ibt_cm_event->cm_type) {
329511534SKevin.Ge@Sun.COM 	case IBT_CM_EVENT_CONN_EST:
329611534SKevin.Ge@Sun.COM 		DPRINT(30, "ibd_rc_dispatch_pass_mad: IBT_CM_EVENT_CONN_EST, "
329711534SKevin.Ge@Sun.COM 		    "chan=%p", chan);
329811534SKevin.Ge@Sun.COM 		result = ibd_rc_handle_pas_estab(chan);
329911534SKevin.Ge@Sun.COM 		break;
330011534SKevin.Ge@Sun.COM 	case IBT_CM_EVENT_CONN_CLOSED:
330111534SKevin.Ge@Sun.COM 		DPRINT(30, "ibd_rc_dispatch_pass_mad: IBT_CM_EVENT_CONN_CLOSED,"
330211534SKevin.Ge@Sun.COM 		    " chan=%p, reason=%d", chan, ibt_cm_event->cm_event.closed);
330313030SKevin.Ge@Sun.COM 		chan = ibd_rc_rm_from_chan_list(&chan->state->rc_pass_chan_list,
330413030SKevin.Ge@Sun.COM 		    chan);
330513030SKevin.Ge@Sun.COM 		if (chan != NULL)
330613030SKevin.Ge@Sun.COM 			(void) ibd_rc_pas_close(chan, B_FALSE, B_FALSE);
330711534SKevin.Ge@Sun.COM 		break;
330811534SKevin.Ge@Sun.COM 	case IBT_CM_EVENT_FAILURE:
330911534SKevin.Ge@Sun.COM 		DPRINT(30, "ibd_rc_dispatch_pass_mad: IBT_CM_EVENT_FAILURE,"
331011534SKevin.Ge@Sun.COM 		    " chan=%p, code: %d, msg: %d, reason=%d", chan,
331111534SKevin.Ge@Sun.COM 		    ibt_cm_event->cm_event.failed.cf_code,
331211534SKevin.Ge@Sun.COM 		    ibt_cm_event->cm_event.failed.cf_msg,
331311534SKevin.Ge@Sun.COM 		    ibt_cm_event->cm_event.failed.cf_reason);
331413030SKevin.Ge@Sun.COM 		chan = ibd_rc_rm_from_chan_list(&chan->state->rc_pass_chan_list,
331513030SKevin.Ge@Sun.COM 		    chan);
331613030SKevin.Ge@Sun.COM 		if (chan != NULL)
331713030SKevin.Ge@Sun.COM 			(void) ibd_rc_pas_close(chan, B_FALSE, B_FALSE);
331811534SKevin.Ge@Sun.COM 		return (IBT_CM_ACCEPT);
331911534SKevin.Ge@Sun.COM 	case IBT_CM_EVENT_MRA_RCV:
332011534SKevin.Ge@Sun.COM 		DPRINT(40, "ibd_rc_dispatch_pass_mad: IBT_CM_EVENT_MRA_RCV");
332111534SKevin.Ge@Sun.COM 		break;
332211534SKevin.Ge@Sun.COM 	case IBT_CM_EVENT_LAP_RCV:
332311534SKevin.Ge@Sun.COM 		DPRINT(40, "ibd_rc_dispatch_pass_mad: LAP message received");
332411534SKevin.Ge@Sun.COM 		break;
332511534SKevin.Ge@Sun.COM 	case IBT_CM_EVENT_APR_RCV:
332611534SKevin.Ge@Sun.COM 		DPRINT(40, "ibd_rc_dispatch_pass_mad: APR message received");
332711534SKevin.Ge@Sun.COM 		break;
332811534SKevin.Ge@Sun.COM 	default:
332911534SKevin.Ge@Sun.COM 		DPRINT(40, "ibd_rc_dispatch_pass_mad: default, type=%d, "
333011534SKevin.Ge@Sun.COM 		    "chan=%p", ibt_cm_event->cm_type, chan);
333111534SKevin.Ge@Sun.COM 		break;
333211534SKevin.Ge@Sun.COM 	}
333311534SKevin.Ge@Sun.COM 
333411534SKevin.Ge@Sun.COM 	return (result);
333511534SKevin.Ge@Sun.COM }
3336