xref: /onnv-gate/usr/src/uts/common/io/ib/clients/rds/rdsib_buf.c (revision 11066:cebb50cbe4f9)
13302Sagiri /*
23302Sagiri  * CDDL HEADER START
33302Sagiri  *
43302Sagiri  * The contents of this file are subject to the terms of the
53302Sagiri  * Common Development and Distribution License (the "License").
63302Sagiri  * You may not use this file except in compliance with the License.
73302Sagiri  *
83302Sagiri  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
93302Sagiri  * or http://www.opensolaris.org/os/licensing.
103302Sagiri  * See the License for the specific language governing permissions
113302Sagiri  * and limitations under the License.
123302Sagiri  *
133302Sagiri  * When distributing Covered Code, include this CDDL HEADER in each
143302Sagiri  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
153302Sagiri  * If applicable, add the following below this CDDL HEADER, with the
163302Sagiri  * fields enclosed by brackets "[]" replaced with your own identifying
173302Sagiri  * information: Portions Copyright [yyyy] [name of copyright owner]
183302Sagiri  *
193302Sagiri  * CDDL HEADER END
203302Sagiri  */
213302Sagiri /*
2210489SGiri.Adari@Sun.COM  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
233302Sagiri  * Use is subject to license terms.
243302Sagiri  */
253302Sagiri /*
263302Sagiri  * Copyright (c) 2005 SilverStorm Technologies, Inc. All rights reserved.
273302Sagiri  *
283302Sagiri  * This software is available to you under a choice of one of two
293302Sagiri  * licenses.  You may choose to be licensed under the terms of the GNU
303302Sagiri  * General Public License (GPL) Version 2, available from the file
313302Sagiri  * COPYING in the main directory of this source tree, or the
323302Sagiri  * OpenIB.org BSD license below:
333302Sagiri  *
343302Sagiri  *     Redistribution and use in source and binary forms, with or
353302Sagiri  *     without modification, are permitted provided that the following
363302Sagiri  *     conditions are met:
373302Sagiri  *
383302Sagiri  *	- Redistributions of source code must retain the above
393302Sagiri  *	  copyright notice, this list of conditions and the following
403302Sagiri  *	  disclaimer.
413302Sagiri  *
423302Sagiri  *	- Redistributions in binary form must reproduce the above
433302Sagiri  *	  copyright notice, this list of conditions and the following
443302Sagiri  *	  disclaimer in the documentation and/or other materials
453302Sagiri  *	  provided with the distribution.
463302Sagiri  *
473302Sagiri  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
483302Sagiri  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
493302Sagiri  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
503302Sagiri  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
513302Sagiri  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
523302Sagiri  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
533302Sagiri  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
543302Sagiri  * SOFTWARE.
553302Sagiri  *
563302Sagiri  */
573302Sagiri /*
583302Sagiri  * Sun elects to include this software in Sun product
593302Sagiri  * under the OpenIB BSD license.
603302Sagiri  *
613302Sagiri  *
623302Sagiri  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
633302Sagiri  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
643302Sagiri  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
653302Sagiri  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
663302Sagiri  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
673302Sagiri  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
683302Sagiri  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
693302Sagiri  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
703302Sagiri  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
713302Sagiri  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
723302Sagiri  * POSSIBILITY OF SUCH DAMAGE.
733302Sagiri  */
743302Sagiri 
753302Sagiri #include <sys/ib/clients/rds/rdsib_cm.h>
763302Sagiri #include <sys/ib/clients/rds/rdsib_ib.h>
773302Sagiri #include <sys/ib/clients/rds/rdsib_buf.h>
783302Sagiri #include <sys/ib/clients/rds/rdsib_ep.h>
793302Sagiri #include <sys/ib/clients/rds/rds_kstat.h>
803302Sagiri 
813302Sagiri /*
823302Sagiri  * This File contains the buffer management code
833302Sagiri  */
843302Sagiri 
853302Sagiri #define	DUMP_USER_PARAMS()	\
865342Sagiri 	RDS_DPRINTF3(LABEL, "MaxNodes = %d", MaxNodes); \
873302Sagiri 	RDS_DPRINTF3(LABEL, "UserBufferSize = %d", UserBufferSize); \
883302Sagiri 	RDS_DPRINTF3(LABEL, "RdsPktSize = %d", RdsPktSize); \
893302Sagiri 	RDS_DPRINTF3(LABEL, "MaxDataSendBuffers = %d", MaxDataSendBuffers); \
903302Sagiri 	RDS_DPRINTF3(LABEL, "MaxDataRecvBuffers = %d", MaxDataRecvBuffers); \
913302Sagiri 	RDS_DPRINTF3(LABEL, "MaxCtrlSendBuffers = %d", MaxCtrlSendBuffers); \
923302Sagiri 	RDS_DPRINTF3(LABEL, "MaxCtrlRecvBuffers = %d", MaxCtrlRecvBuffers); \
933302Sagiri 	RDS_DPRINTF3(LABEL, "DataRecvBufferLWM = %d", DataRecvBufferLWM); \
943302Sagiri 	RDS_DPRINTF3(LABEL, "PendingRxPktsHWM = %d", PendingRxPktsHWM); \
953302Sagiri 	RDS_DPRINTF3(LABEL, "MinRnrRetry = %d", MinRnrRetry)
963302Sagiri 
9710489SGiri.Adari@Sun.COM uint_t	rds_nbuffers_to_putback;
9810489SGiri.Adari@Sun.COM 
993302Sagiri static void
rds_free_mblk(char * arg)1003302Sagiri rds_free_mblk(char *arg)
1013302Sagiri {
1023302Sagiri 	rds_buf_t *bp = (rds_buf_t *)(uintptr_t)arg;
1033302Sagiri 
1043302Sagiri 	/* Free the recv buffer */
1053302Sagiri 	RDS_DPRINTF4("rds_free_mblk", "Enter: BP(%p)", bp);
1063302Sagiri 	ASSERT(bp->buf_state == RDS_RCVBUF_ONSOCKQ);
1073302Sagiri 	rds_free_recv_buf(bp, 1);
1083302Sagiri 	RDS_DECR_RXPKTS_PEND(1);
1093302Sagiri 	RDS_DPRINTF4("rds_free_mblk", "Return: BP(%p)", bp);
1103302Sagiri }
1113302Sagiri 
1123302Sagiri void
rds_free_recv_caches(rds_state_t * statep)1133302Sagiri rds_free_recv_caches(rds_state_t *statep)
1143302Sagiri {
1153302Sagiri 	rds_hca_t	*hcap;
1163302Sagiri 	int		ret;
1173302Sagiri 
1183302Sagiri 	RDS_DPRINTF4("rds_free_recv_caches", "Enter");
1193302Sagiri 
1203302Sagiri 	mutex_enter(&rds_dpool.pool_lock);
1213302Sagiri 	if (rds_dpool.pool_memp == NULL) {
1223302Sagiri 		RDS_DPRINTF2("rds_free_recv_caches", "Caches are empty");
1233302Sagiri 		mutex_exit(&rds_dpool.pool_lock);
1243302Sagiri 		return;
1253302Sagiri 	}
1263302Sagiri 
1273302Sagiri 	/*
1283302Sagiri 	 * All buffers must have been freed as all sessions are closed
1293302Sagiri 	 * and destroyed
1303302Sagiri 	 */
1313302Sagiri 	ASSERT(rds_dpool.pool_nbusy == 0);
1323302Sagiri 	RDS_DPRINTF2("rds_free_recv_caches", "Data Pool has "
1333302Sagiri 	    "pending buffers: %d", rds_dpool.pool_nbusy);
1343302Sagiri 	while (rds_dpool.pool_nbusy != 0) {
1353302Sagiri 		mutex_exit(&rds_dpool.pool_lock);
1363302Sagiri 		delay(drv_usectohz(1000000));
1373302Sagiri 		mutex_enter(&rds_dpool.pool_lock);
1383302Sagiri 	}
1393302Sagiri 
1403302Sagiri 	hcap = statep->rds_hcalistp;
1413302Sagiri 	while (hcap != NULL) {
1423302Sagiri 		if (hcap->hca_mrhdl != NULL) {
1433302Sagiri 			ret = ibt_deregister_mr(hcap->hca_hdl,
1443302Sagiri 			    hcap->hca_mrhdl);
1453302Sagiri 			if (ret == IBT_SUCCESS) {
1463302Sagiri 				hcap->hca_mrhdl = NULL;
1473302Sagiri 				hcap->hca_lkey = 0;
1483302Sagiri 				hcap->hca_rkey = 0;
1493302Sagiri 			} else {
1503302Sagiri 				RDS_DPRINTF2(LABEL, "ibt_deregister_mr "
1513302Sagiri 				    "failed: %d, mrhdl: 0x%p", ret,
1523302Sagiri 				    hcap->hca_mrhdl);
1533302Sagiri 			}
1543302Sagiri 		}
1553302Sagiri 		hcap = hcap->hca_nextp;
1563302Sagiri 	}
1573302Sagiri 
1583302Sagiri 	kmem_free(rds_dpool.pool_bufmemp, (rds_dpool.pool_nbuffers +
1593302Sagiri 	    rds_cpool.pool_nbuffers) * sizeof (rds_buf_t));
1603302Sagiri 	rds_dpool.pool_bufmemp = NULL;
1613302Sagiri 
1623302Sagiri 	kmem_free(rds_dpool.pool_memp, rds_dpool.pool_memsize);
1633302Sagiri 	rds_dpool.pool_memp = NULL;
1643302Sagiri 
1653302Sagiri 	mutex_exit(&rds_dpool.pool_lock);
1663302Sagiri 
1673302Sagiri 	RDS_DPRINTF4("rds_free_recv_caches", "Return");
1683302Sagiri }
1693302Sagiri 
1703302Sagiri int
rds_init_recv_caches(rds_state_t * statep)1713302Sagiri rds_init_recv_caches(rds_state_t *statep)
1723302Sagiri {
1733302Sagiri 	uint8_t		*mp;
1743302Sagiri 	rds_buf_t	*bp;
1753302Sagiri 	rds_hca_t	*hcap;
1763302Sagiri 	uint32_t	nsessions;
1773302Sagiri 	uint_t		ix;
1785342Sagiri 	uint_t		nctrlrx;
1793302Sagiri 	uint8_t		*memp;
1803302Sagiri 	uint_t		memsize, nbuf;
1813302Sagiri 	rds_buf_t	*bufmemp;
1823302Sagiri 	ibt_mr_attr_t	mem_attr;
1833302Sagiri 	ibt_mr_desc_t	mem_desc;
1843302Sagiri 	int		ret;
1853302Sagiri 
1863302Sagiri 	RDS_DPRINTF4("rds_init_recv_caches", "Enter");
1873302Sagiri 
1883302Sagiri 	DUMP_USER_PARAMS();
1893302Sagiri 
1903302Sagiri 	mutex_enter(&rds_dpool.pool_lock);
1913302Sagiri 	if (rds_dpool.pool_memp != NULL) {
1923302Sagiri 		RDS_DPRINTF2("rds_init_recv_caches", "Pools are already "
1933302Sagiri 		    "initialized");
1943302Sagiri 		mutex_exit(&rds_dpool.pool_lock);
1953302Sagiri 		return (0);
1963302Sagiri 	}
1973302Sagiri 
1983302Sagiri 	/*
1993302Sagiri 	 * High water mark for the receive buffers in the system. If the
2003302Sagiri 	 * number of buffers used crosses this mark then all sockets in
2013302Sagiri 	 * would be stalled. The port quota for the sockets is set based
2023302Sagiri 	 * on this limit.
2033302Sagiri 	 */
2045342Sagiri 	rds_rx_pkts_pending_hwm = (PendingRxPktsHWM * NDataRX)/100;
2053302Sagiri 
20610489SGiri.Adari@Sun.COM 	rds_nbuffers_to_putback = min(MaxCtrlRecvBuffers, MaxDataRecvBuffers);
20710489SGiri.Adari@Sun.COM 
2083302Sagiri 	/* nsessions can never be less than 1 */
2095342Sagiri 	nsessions = MaxNodes - 1;
21010489SGiri.Adari@Sun.COM 	nctrlrx = (nsessions + 1) * MaxCtrlRecvBuffers * 2;
2113302Sagiri 
2123302Sagiri 	RDS_DPRINTF3(LABEL, "Number of Possible Sessions: %d", nsessions);
2133302Sagiri 
2143302Sagiri 	/* Add the hdr */
2153302Sagiri 	RdsPktSize = UserBufferSize + RDS_DATA_HDR_SZ;
2163302Sagiri 
2175342Sagiri 	memsize = (NDataRX * RdsPktSize) + (nctrlrx * RDS_CTRLPKT_SIZE);
2185342Sagiri 	nbuf = NDataRX + nctrlrx;
2193302Sagiri 	RDS_DPRINTF3(LABEL, "RDS Buffer Pool Memory: %lld", memsize);
2203302Sagiri 	RDS_DPRINTF3(LABEL, "Total Buffers: %d", nbuf);
2213302Sagiri 
2223302Sagiri 	memp = (uint8_t *)kmem_zalloc(memsize, KM_NOSLEEP);
2233302Sagiri 	if (memp == NULL) {
2243302Sagiri 		RDS_DPRINTF1(LABEL, "RDS Memory allocation failed");
2253302Sagiri 		mutex_exit(&rds_dpool.pool_lock);
2263302Sagiri 		return (-1);
2273302Sagiri 	}
2283302Sagiri 
2293302Sagiri 	RDS_DPRINTF3(LABEL, "RDS Buffer Entries Memory: %lld",
2303302Sagiri 	    nbuf * sizeof (rds_buf_t));
2313302Sagiri 
2323302Sagiri 	/* allocate memory for buffer entries */
2333302Sagiri 	bufmemp = (rds_buf_t *)kmem_zalloc(nbuf * sizeof (rds_buf_t),
2343302Sagiri 	    KM_SLEEP);
2353302Sagiri 
2363302Sagiri 	/* register the memory with all HCAs */
2373302Sagiri 	mem_attr.mr_vaddr = (ib_vaddr_t)(uintptr_t)memp;
2383302Sagiri 	mem_attr.mr_len = memsize;
2393302Sagiri 	mem_attr.mr_as = NULL;
2403302Sagiri 	mem_attr.mr_flags = IBT_MR_ENABLE_LOCAL_WRITE;
2413302Sagiri 
2428082SRamaswamy.Tummala@Sun.COM 	rw_enter(&statep->rds_hca_lock, RW_WRITER);
2438082SRamaswamy.Tummala@Sun.COM 
2443302Sagiri 	hcap = statep->rds_hcalistp;
2453302Sagiri 	while (hcap != NULL) {
2468082SRamaswamy.Tummala@Sun.COM 		if (hcap->hca_state != RDS_HCA_STATE_OPEN) {
2478082SRamaswamy.Tummala@Sun.COM 			hcap = hcap->hca_nextp;
2488082SRamaswamy.Tummala@Sun.COM 			continue;
2498082SRamaswamy.Tummala@Sun.COM 		}
2508082SRamaswamy.Tummala@Sun.COM 
2513302Sagiri 		ret = ibt_register_mr(hcap->hca_hdl, hcap->hca_pdhdl,
2523302Sagiri 		    &mem_attr, &hcap->hca_mrhdl, &mem_desc);
2533302Sagiri 		if (ret != IBT_SUCCESS) {
2543302Sagiri 			RDS_DPRINTF2(LABEL, "ibt_register_mr failed: %d", ret);
2555645Sagiri 			hcap = statep->rds_hcalistp;
2565645Sagiri 			while ((hcap) && (hcap->hca_mrhdl != NULL)) {
2575645Sagiri 				ret = ibt_deregister_mr(hcap->hca_hdl,
2585645Sagiri 				    hcap->hca_mrhdl);
2595645Sagiri 				if (ret == IBT_SUCCESS) {
2605645Sagiri 					hcap->hca_mrhdl = NULL;
2615645Sagiri 					hcap->hca_lkey = 0;
2625645Sagiri 					hcap->hca_rkey = 0;
2635645Sagiri 				} else {
2645645Sagiri 					RDS_DPRINTF2(LABEL, "ibt_deregister_mr "
2655645Sagiri 					    "failed: %d, mrhdl: 0x%p", ret,
2665645Sagiri 					    hcap->hca_mrhdl);
2675645Sagiri 				}
2685645Sagiri 				hcap = hcap->hca_nextp;
2695645Sagiri 			}
2705645Sagiri 			kmem_free(bufmemp, nbuf * sizeof (rds_buf_t));
2715645Sagiri 			kmem_free(memp, memsize);
2728082SRamaswamy.Tummala@Sun.COM 			rw_exit(&statep->rds_hca_lock);
2735645Sagiri 			mutex_exit(&rds_dpool.pool_lock);
2743302Sagiri 			return (-1);
2753302Sagiri 		}
2763302Sagiri 
2778082SRamaswamy.Tummala@Sun.COM 		hcap->hca_state = RDS_HCA_STATE_MEM_REGISTERED;
2783302Sagiri 		hcap->hca_lkey = mem_desc.md_lkey;
2793302Sagiri 		hcap->hca_rkey = mem_desc.md_rkey;
2803302Sagiri 
2813302Sagiri 		hcap = hcap->hca_nextp;
2823302Sagiri 	}
2838082SRamaswamy.Tummala@Sun.COM 	rw_exit(&statep->rds_hca_lock);
2843302Sagiri 
2853302Sagiri 	/* Initialize data pool */
2863302Sagiri 	rds_dpool.pool_memp = memp;
2873302Sagiri 	rds_dpool.pool_memsize = memsize;
2883302Sagiri 	rds_dpool.pool_bufmemp = bufmemp;
2895342Sagiri 	rds_dpool.pool_nbuffers = NDataRX;
2903302Sagiri 	rds_dpool.pool_nbusy = 0;
2915342Sagiri 	rds_dpool.pool_nfree = NDataRX;
2923302Sagiri 
2933302Sagiri 	/* chain the buffers */
2943302Sagiri 	mp = memp;
2953302Sagiri 	bp = bufmemp;
2965342Sagiri 	for (ix = 0; ix < NDataRX; ix++) {
2973302Sagiri 		bp[ix].buf_nextp = &bp[ix + 1];
2983302Sagiri 		bp[ix].buf_ds.ds_va = (ib_vaddr_t)(uintptr_t)mp;
2993302Sagiri 		bp[ix].buf_state = RDS_RCVBUF_FREE;
3003302Sagiri 		bp[ix].buf_frtn.free_func = rds_free_mblk;
3013302Sagiri 		bp[ix].buf_frtn.free_arg = (char *)&bp[ix];
3023302Sagiri 		mp = mp + RdsPktSize;
3033302Sagiri 	}
3045342Sagiri 	bp[NDataRX - 1].buf_nextp = NULL;
3053302Sagiri 	rds_dpool.pool_headp = &bp[0];
3065342Sagiri 	rds_dpool.pool_tailp = &bp[NDataRX - 1];
3073302Sagiri 
3083302Sagiri 	/* Initialize ctrl pool */
3093302Sagiri 	rds_cpool.pool_nbuffers = nctrlrx;
3103302Sagiri 	rds_cpool.pool_nbusy = 0;
3113302Sagiri 	rds_cpool.pool_nfree = nctrlrx;
3123302Sagiri 
3133302Sagiri 	/* chain the buffers */
3145342Sagiri 	for (ix = NDataRX; ix < nbuf - 1; ix++) {
3153302Sagiri 		bp[ix].buf_nextp = &bp[ix + 1];
3163302Sagiri 		bp[ix].buf_ds.ds_va = (ib_vaddr_t)(uintptr_t)mp;
3173302Sagiri 		mp = mp + RDS_CTRLPKT_SIZE;
3183302Sagiri 	}
3193302Sagiri 	bp[nbuf - 1].buf_ds.ds_va = (ib_vaddr_t)(uintptr_t)mp;
3203302Sagiri 	bp[nbuf - 1].buf_nextp = NULL;
3215342Sagiri 	rds_cpool.pool_headp = &bp[NDataRX];
3223302Sagiri 	rds_cpool.pool_tailp = &bp[nbuf - 1];
3233302Sagiri 
3243302Sagiri 	mutex_exit(&rds_dpool.pool_lock);
3253302Sagiri 
3263302Sagiri 	RDS_DPRINTF3(LABEL, "rdsmemp start: %p end: %p", memp, mp);
3273302Sagiri 	RDS_DPRINTF4("rds_init_recv_caches", "Return");
3283302Sagiri 	return (0);
3293302Sagiri }
3303302Sagiri 
3318082SRamaswamy.Tummala@Sun.COM rds_hca_t *rds_lkup_hca(ib_guid_t hca_guid);
3328082SRamaswamy.Tummala@Sun.COM 
3333302Sagiri void
rds_free_send_pool(rds_ep_t * ep)3343302Sagiri rds_free_send_pool(rds_ep_t *ep)
3353302Sagiri {
3363302Sagiri 	rds_bufpool_t   *pool;
3373302Sagiri 	rds_hca_t	*hcap;
3383302Sagiri 	int		ret;
3393302Sagiri 
3403302Sagiri 	pool = &ep->ep_sndpool;
3413302Sagiri 
3423302Sagiri 	mutex_enter(&pool->pool_lock);
3433302Sagiri 	if (pool->pool_memp == NULL) {
3443302Sagiri 		mutex_exit(&pool->pool_lock);
3453302Sagiri 		RDS_DPRINTF2("rds_free_send_pool",
3463302Sagiri 		    "EP(%p) DOUBLE Free on Send Pool", ep);
3473302Sagiri 		return;
3483302Sagiri 	}
3493302Sagiri 
3503302Sagiri 	/* get the hcap for the HCA hosting this channel */
3518082SRamaswamy.Tummala@Sun.COM 	hcap = rds_lkup_hca(ep->ep_hca_guid);
3523302Sagiri 	if (hcap == NULL) {
3533302Sagiri 		RDS_DPRINTF2("rds_free_send_pool", "HCA (0x%llx) not found",
3543302Sagiri 		    ep->ep_hca_guid);
3553302Sagiri 	} else {
3563302Sagiri 		ret = ibt_deregister_mr(hcap->hca_hdl, ep->ep_snd_mrhdl);
3573302Sagiri 		if (ret != IBT_SUCCESS) {
3583302Sagiri 			RDS_DPRINTF2(LABEL,
3593302Sagiri 			    "ibt_deregister_mr failed: %d, mrhdl: 0x%p",
3603302Sagiri 			    ret, ep->ep_snd_mrhdl);
3613302Sagiri 		}
3623302Sagiri 
3633302Sagiri 		if (ep->ep_ack_addr) {
3643302Sagiri 			ret = ibt_deregister_mr(hcap->hca_hdl, ep->ep_ackhdl);
3653302Sagiri 			if (ret != IBT_SUCCESS) {
3663302Sagiri 				RDS_DPRINTF2(LABEL,
3673302Sagiri 				    "ibt_deregister_mr ackhdl failed: %d, "
3683302Sagiri 				    "mrhdl: 0x%p", ret, ep->ep_ackhdl);
3693302Sagiri 			}
3703302Sagiri 
3713302Sagiri 			kmem_free((void *)ep->ep_ack_addr, sizeof (uintptr_t));
3723302Sagiri 			ep->ep_ack_addr = NULL;
3733302Sagiri 		}
3743302Sagiri 	}
3753302Sagiri 
3763302Sagiri 	kmem_free(pool->pool_memp, pool->pool_memsize);
3773302Sagiri 	kmem_free(pool->pool_bufmemp,
3783302Sagiri 	    pool->pool_nbuffers * sizeof (rds_buf_t));
3793302Sagiri 	pool->pool_memp = NULL;
3803302Sagiri 	pool->pool_bufmemp = NULL;
3813302Sagiri 	mutex_exit(&pool->pool_lock);
3823302Sagiri }
3833302Sagiri 
3843302Sagiri int
rds_init_send_pool(rds_ep_t * ep,ib_guid_t hca_guid)3854467Sagiri rds_init_send_pool(rds_ep_t *ep, ib_guid_t hca_guid)
3863302Sagiri {
3873302Sagiri 	uint8_t		*mp;
3883302Sagiri 	rds_buf_t	*bp;
3893302Sagiri 	rds_hca_t	*hcap;
3903302Sagiri 	uint_t		ix, rcv_len;
3913302Sagiri 	ibt_mr_attr_t   mem_attr;
3923302Sagiri 	ibt_mr_desc_t   mem_desc;
3933302Sagiri 	uint8_t		*memp;
3943302Sagiri 	rds_buf_t	*bufmemp;
3953302Sagiri 	uintptr_t	ack_addr = NULL;
3963302Sagiri 	uint_t		memsize;
3973302Sagiri 	uint_t		nbuf;
3983302Sagiri 	rds_bufpool_t   *spool;
3993302Sagiri 	rds_data_hdr_t	*pktp;
4003302Sagiri 	int		ret;
4013302Sagiri 
4023302Sagiri 	RDS_DPRINTF2("rds_init_send_pool", "Enter");
4033302Sagiri 
4043302Sagiri 	spool = &ep->ep_sndpool;
4053302Sagiri 
4063302Sagiri 	ASSERT(spool->pool_memp == NULL);
4074467Sagiri 	ASSERT(ep->ep_hca_guid == 0);
4083302Sagiri 
4093302Sagiri 	/* get the hcap for the HCA hosting this channel */
4104467Sagiri 	hcap = rds_get_hcap(rdsib_statep, hca_guid);
4113302Sagiri 	if (hcap == NULL) {
4123302Sagiri 		RDS_DPRINTF2("rds_init_send_pool", "HCA (0x%llx) not found",
4134467Sagiri 		    hca_guid);
4143302Sagiri 		return (-1);
4153302Sagiri 	}
4163302Sagiri 
4173302Sagiri 	if (ep->ep_type == RDS_EP_TYPE_DATA) {
4183302Sagiri 		spool->pool_nbuffers = MaxDataSendBuffers;
4193302Sagiri 		spool->pool_nbusy = 0;
4203302Sagiri 		spool->pool_nfree = MaxDataSendBuffers;
4213302Sagiri 		memsize = (MaxDataSendBuffers * RdsPktSize) +
4223302Sagiri 		    sizeof (uintptr_t);
4233302Sagiri 		rcv_len = RdsPktSize;
4243302Sagiri 	} else {
4253302Sagiri 		spool->pool_nbuffers = MaxCtrlSendBuffers;
4263302Sagiri 		spool->pool_nbusy = 0;
4273302Sagiri 		spool->pool_nfree = MaxCtrlSendBuffers;
4283302Sagiri 		memsize = MaxCtrlSendBuffers * RDS_CTRLPKT_SIZE;
4293302Sagiri 		rcv_len = RDS_CTRLPKT_SIZE;
4303302Sagiri 	}
4313302Sagiri 	nbuf = spool->pool_nbuffers;
4323302Sagiri 
4333302Sagiri 	RDS_DPRINTF3(LABEL, "RDS Send Pool Memory: %lld", memsize);
4343302Sagiri 
4353302Sagiri 	memp = (uint8_t *)kmem_zalloc(memsize, KM_NOSLEEP);
4363302Sagiri 	if (memp == NULL) {
4373302Sagiri 		RDS_DPRINTF1(LABEL, "RDS Send Memory allocation failed");
4383302Sagiri 		return (-1);
4393302Sagiri 	}
4403302Sagiri 
4413302Sagiri 	RDS_DPRINTF3(LABEL, "RDS Buffer Entries Memory: %lld",
4423302Sagiri 	    nbuf * sizeof (rds_buf_t));
4433302Sagiri 
4443302Sagiri 	/* allocate memory for buffer entries */
4453302Sagiri 	bufmemp = (rds_buf_t *)kmem_zalloc(nbuf * sizeof (rds_buf_t),
4463302Sagiri 	    KM_SLEEP);
4473302Sagiri 
4483302Sagiri 	if (ep->ep_type == RDS_EP_TYPE_DATA) {
4493302Sagiri 		ack_addr = (uintptr_t)kmem_zalloc(sizeof (uintptr_t), KM_SLEEP);
4503302Sagiri 
4513302Sagiri 		/* register the memory with the HCA for this channel */
4523302Sagiri 		mem_attr.mr_vaddr = (ib_vaddr_t)ack_addr;
4533302Sagiri 		mem_attr.mr_len = sizeof (uintptr_t);
4543302Sagiri 		mem_attr.mr_as = NULL;
4553302Sagiri 		mem_attr.mr_flags = IBT_MR_SLEEP | IBT_MR_ENABLE_LOCAL_WRITE |
4563302Sagiri 		    IBT_MR_ENABLE_REMOTE_WRITE;
4573302Sagiri 
4583302Sagiri 		ret = ibt_register_mr(hcap->hca_hdl, hcap->hca_pdhdl,
4593302Sagiri 		    &mem_attr, &ep->ep_ackhdl, &mem_desc);
4603302Sagiri 		if (ret != IBT_SUCCESS) {
4613302Sagiri 			RDS_DPRINTF2("rds_init_send_pool",
4623302Sagiri 			    "EP(%p): ibt_register_mr for ack failed: %d",
4633302Sagiri 			    ep, ret);
4643302Sagiri 			kmem_free(memp, memsize);
4653302Sagiri 			kmem_free(bufmemp, nbuf * sizeof (rds_buf_t));
4663302Sagiri 			kmem_free((void *)ack_addr, sizeof (uintptr_t));
4673302Sagiri 			return (-1);
4683302Sagiri 		}
4693302Sagiri 		ep->ep_ack_rkey = mem_desc.md_rkey;
4703302Sagiri 		ep->ep_ack_addr = ack_addr;
4713302Sagiri 	}
4723302Sagiri 
4733302Sagiri 	/* register the memory with the HCA for this channel */
4743302Sagiri 	mem_attr.mr_vaddr = (ib_vaddr_t)(uintptr_t)memp;
4753302Sagiri 	mem_attr.mr_len = memsize;
4763302Sagiri 	mem_attr.mr_as = NULL;
4773302Sagiri 	mem_attr.mr_flags = IBT_MR_SLEEP | IBT_MR_ENABLE_LOCAL_WRITE;
4783302Sagiri 
4793302Sagiri 	ret = ibt_register_mr(hcap->hca_hdl, hcap->hca_pdhdl,
4803302Sagiri 	    &mem_attr, &ep->ep_snd_mrhdl, &mem_desc);
4813302Sagiri 	if (ret != IBT_SUCCESS) {
4823302Sagiri 		RDS_DPRINTF2("rds_init_send_pool", "EP(%p): ibt_register_mr "
4833302Sagiri 		    "failed: %d", ep, ret);
4843302Sagiri 		kmem_free(memp, memsize);
4853302Sagiri 		kmem_free(bufmemp, nbuf * sizeof (rds_buf_t));
4863302Sagiri 		if (ack_addr != NULL)
4873302Sagiri 			kmem_free((void *)ack_addr, sizeof (uintptr_t));
4883302Sagiri 		return (-1);
4893302Sagiri 	}
4903302Sagiri 	ep->ep_snd_lkey = mem_desc.md_lkey;
4913302Sagiri 
4923302Sagiri 
4933302Sagiri 	/* Initialize the pool */
4943302Sagiri 	spool->pool_memp = memp;
4953302Sagiri 	spool->pool_memsize = memsize;
4963302Sagiri 	spool->pool_bufmemp = bufmemp;
4973302Sagiri 	spool->pool_sqpoll_pending = B_FALSE;
4983302Sagiri 
4993302Sagiri 	/* chain the buffers and initialize them */
5003302Sagiri 	mp = memp;
5013302Sagiri 	bp = bufmemp;
5023302Sagiri 
5033302Sagiri 	if (ep->ep_type == RDS_EP_TYPE_DATA) {
5043302Sagiri 		for (ix = 0; ix < nbuf - 1; ix++) {
5053302Sagiri 			bp[ix].buf_nextp = &bp[ix + 1];
5063302Sagiri 			bp[ix].buf_ep = ep;
5073302Sagiri 			bp[ix].buf_ds.ds_va = (ib_vaddr_t)(uintptr_t)mp;
5083302Sagiri 			bp[ix].buf_ds.ds_key = ep->ep_snd_lkey;
5093302Sagiri 			bp[ix].buf_state = RDS_SNDBUF_FREE;
5103302Sagiri 			pktp = (rds_data_hdr_t *)(uintptr_t)mp;
5113302Sagiri 			pktp->dh_bufid = (uintptr_t)&bp[ix];
5123302Sagiri 			mp = mp + rcv_len;
5133302Sagiri 		}
5143302Sagiri 		bp[nbuf - 1].buf_nextp = NULL;
5153302Sagiri 		bp[nbuf - 1].buf_ep = ep;
5163302Sagiri 		bp[nbuf - 1].buf_ds.ds_va = (ib_vaddr_t)(uintptr_t)mp;
5173302Sagiri 		bp[nbuf - 1].buf_ds.ds_key = ep->ep_snd_lkey;
5183302Sagiri 		bp[nbuf - 1].buf_state = RDS_SNDBUF_FREE;
5193302Sagiri 		pktp = (rds_data_hdr_t *)(uintptr_t)mp;
5203302Sagiri 		pktp->dh_bufid = (uintptr_t)&bp[nbuf - 1];
5213302Sagiri 
5223302Sagiri 		spool->pool_headp = &bp[0];
5233302Sagiri 		spool->pool_tailp = &bp[nbuf - 1];
5243302Sagiri 
5253302Sagiri 		mp = mp + rcv_len;
5263302Sagiri 		ep->ep_ackds.ds_va = (ib_vaddr_t)(uintptr_t)mp;
5273302Sagiri 		ep->ep_ackds.ds_key = ep->ep_snd_lkey;
5283302Sagiri 		ep->ep_ackds.ds_len = sizeof (uintptr_t);
5293302Sagiri 
5303302Sagiri 		*(uintptr_t *)ep->ep_ack_addr = (uintptr_t)spool->pool_tailp;
5313302Sagiri 	} else {
5323302Sagiri 		/* control send pool */
5333302Sagiri 		for (ix = 0; ix < nbuf - 1; ix++) {
5343302Sagiri 			bp[ix].buf_nextp = &bp[ix + 1];
5353302Sagiri 			bp[ix].buf_ep = ep;
5363302Sagiri 			bp[ix].buf_ds.ds_va = (ib_vaddr_t)(uintptr_t)mp;
5373302Sagiri 			bp[ix].buf_ds.ds_key = ep->ep_snd_lkey;
5383302Sagiri 			bp[ix].buf_state = RDS_SNDBUF_FREE;
5393302Sagiri 			mp = mp + rcv_len;
5403302Sagiri 		}
5413302Sagiri 		bp[nbuf - 1].buf_nextp = NULL;
5423302Sagiri 		bp[nbuf - 1].buf_ep = ep;
5433302Sagiri 		bp[nbuf - 1].buf_ds.ds_va = (ib_vaddr_t)(uintptr_t)mp;
5443302Sagiri 		bp[nbuf - 1].buf_ds.ds_key = ep->ep_snd_lkey;
5453302Sagiri 		bp[nbuf - 1].buf_state = RDS_SNDBUF_FREE;
5463302Sagiri 		spool->pool_headp = &bp[0];
5473302Sagiri 		spool->pool_tailp = &bp[nbuf - 1];
5483302Sagiri 	}
5493302Sagiri 
5503302Sagiri 	RDS_DPRINTF3(LABEL, "rdsmemp start: %p end: %p", memp, mp);
5513302Sagiri 	RDS_DPRINTF2("rds_init_send_pool", "Return");
5523302Sagiri 
5533302Sagiri 	return (0);
5543302Sagiri }
5553302Sagiri 
5564154Sagiri int
rds_reinit_send_pool(rds_ep_t * ep,ib_guid_t hca_guid)5574154Sagiri rds_reinit_send_pool(rds_ep_t *ep, ib_guid_t hca_guid)
5584154Sagiri {
5594154Sagiri 	rds_buf_t	*bp;
5604154Sagiri 	rds_hca_t	*hcap;
5614154Sagiri 	ibt_mr_attr_t   mem_attr;
5624154Sagiri 	ibt_mr_desc_t   mem_desc;
5634154Sagiri 	rds_bufpool_t   *spool;
5644154Sagiri 	int		ret;
5654154Sagiri 
5664154Sagiri 	RDS_DPRINTF2("rds_reinit_send_pool", "Enter: EP(%p)", ep);
5674154Sagiri 
5684154Sagiri 	spool = &ep->ep_sndpool;
5694154Sagiri 	ASSERT(spool->pool_memp != NULL);
5704154Sagiri 
5714154Sagiri 	/* deregister the send pool memory from the previous HCA */
5724154Sagiri 	hcap = rds_get_hcap(rdsib_statep, ep->ep_hca_guid);
5734154Sagiri 	if (hcap == NULL) {
5744154Sagiri 		RDS_DPRINTF2("rds_reinit_send_pool", "HCA (0x%llx) not found",
5754154Sagiri 		    ep->ep_hca_guid);
5764154Sagiri 	} else {
5774154Sagiri 		if (ep->ep_snd_mrhdl != NULL) {
5784154Sagiri 			(void) ibt_deregister_mr(hcap->hca_hdl,
5794154Sagiri 			    ep->ep_snd_mrhdl);
5804154Sagiri 			ep->ep_snd_mrhdl = NULL;
5814154Sagiri 			ep->ep_snd_lkey = 0;
5824154Sagiri 		}
5834154Sagiri 
5844154Sagiri 		if ((ep->ep_type == RDS_EP_TYPE_DATA) &&
5854154Sagiri 		    (ep->ep_ackhdl != NULL)) {
5864154Sagiri 			(void) ibt_deregister_mr(hcap->hca_hdl, ep->ep_ackhdl);
5874154Sagiri 			ep->ep_ackhdl = NULL;
5884154Sagiri 			ep->ep_ack_rkey = 0;
5894154Sagiri 		}
5904154Sagiri 
5914154Sagiri 		ep->ep_hca_guid = NULL;
5924154Sagiri 	}
5934154Sagiri 
5944154Sagiri 	/* get the hcap for the new HCA */
5954154Sagiri 	hcap = rds_get_hcap(rdsib_statep, hca_guid);
5964154Sagiri 	if (hcap == NULL) {
5974154Sagiri 		RDS_DPRINTF2("rds_reinit_send_pool", "HCA (0x%llx) not found",
5984154Sagiri 		    hca_guid);
5994154Sagiri 		return (-1);
6004154Sagiri 	}
6014154Sagiri 
6024154Sagiri 	/* register the send memory */
6034154Sagiri 	mem_attr.mr_vaddr = (ib_vaddr_t)(uintptr_t)spool->pool_memp;
6044154Sagiri 	mem_attr.mr_len = spool->pool_memsize;
6054154Sagiri 	mem_attr.mr_as = NULL;
6064154Sagiri 	mem_attr.mr_flags = IBT_MR_SLEEP | IBT_MR_ENABLE_LOCAL_WRITE;
6074154Sagiri 
6084154Sagiri 	ret = ibt_register_mr(hcap->hca_hdl, hcap->hca_pdhdl,
6094154Sagiri 	    &mem_attr, &ep->ep_snd_mrhdl, &mem_desc);
6104154Sagiri 	if (ret != IBT_SUCCESS) {
6114154Sagiri 		RDS_DPRINTF2("rds_reinit_send_pool",
6124154Sagiri 		    "EP(%p): ibt_register_mr failed: %d", ep, ret);
6134154Sagiri 		return (-1);
6144154Sagiri 	}
6154154Sagiri 	ep->ep_snd_lkey = mem_desc.md_lkey;
6164154Sagiri 
6174154Sagiri 	/* register the acknowledgement space */
6184154Sagiri 	if (ep->ep_type == RDS_EP_TYPE_DATA) {
6194154Sagiri 		mem_attr.mr_vaddr = (ib_vaddr_t)ep->ep_ack_addr;
6204154Sagiri 		mem_attr.mr_len = sizeof (uintptr_t);
6214154Sagiri 		mem_attr.mr_as = NULL;
6224154Sagiri 		mem_attr.mr_flags = IBT_MR_SLEEP | IBT_MR_ENABLE_LOCAL_WRITE |
6234154Sagiri 		    IBT_MR_ENABLE_REMOTE_WRITE;
6244154Sagiri 
6254154Sagiri 		ret = ibt_register_mr(hcap->hca_hdl, hcap->hca_pdhdl,
6264154Sagiri 		    &mem_attr, &ep->ep_ackhdl, &mem_desc);
6274154Sagiri 		if (ret != IBT_SUCCESS) {
6284154Sagiri 			RDS_DPRINTF2("rds_reinit_send_pool",
6294154Sagiri 			    "EP(%p): ibt_register_mr for ack failed: %d",
6304154Sagiri 			    ep, ret);
6314154Sagiri 			(void) ibt_deregister_mr(hcap->hca_hdl,
6324154Sagiri 			    ep->ep_snd_mrhdl);
6334154Sagiri 			ep->ep_snd_mrhdl = NULL;
6344154Sagiri 			ep->ep_snd_lkey = 0;
6354154Sagiri 			return (-1);
6364154Sagiri 		}
6374154Sagiri 		ep->ep_ack_rkey = mem_desc.md_rkey;
6384154Sagiri 
6394154Sagiri 		/* update the LKEY in the acknowledgement WR */
6404154Sagiri 		ep->ep_ackds.ds_key = ep->ep_snd_lkey;
6414154Sagiri 	}
6424154Sagiri 
6434154Sagiri 	/* update the LKEY in each buffer */
6444154Sagiri 	bp = spool->pool_headp;
6454154Sagiri 	while (bp) {
6464154Sagiri 		bp->buf_ds.ds_key = ep->ep_snd_lkey;
6474154Sagiri 		bp = bp->buf_nextp;
6484154Sagiri 	}
6494154Sagiri 
6504154Sagiri 	ep->ep_hca_guid = hca_guid;
6514154Sagiri 
6524154Sagiri 	RDS_DPRINTF2("rds_reinit_send_pool", "Return: EP(%p)", ep);
6534154Sagiri 
6544154Sagiri 	return (0);
6554154Sagiri }
6564154Sagiri 
6573302Sagiri void
rds_free_recv_pool(rds_ep_t * ep)6583302Sagiri rds_free_recv_pool(rds_ep_t *ep)
6593302Sagiri {
6603302Sagiri 	rds_bufpool_t *pool;
6613302Sagiri 
6623302Sagiri 	if (ep->ep_type == RDS_EP_TYPE_DATA) {
6633302Sagiri 		pool = &rds_dpool;
6643302Sagiri 	} else {
6653302Sagiri 		pool = &rds_cpool;
6663302Sagiri 	}
6673302Sagiri 
6683302Sagiri 	mutex_enter(&ep->ep_rcvpool.pool_lock);
6693302Sagiri 	if (ep->ep_rcvpool.pool_nfree != 0) {
6703302Sagiri 		rds_free_buf(pool, ep->ep_rcvpool.pool_headp,
6713302Sagiri 		    ep->ep_rcvpool.pool_nfree);
6723302Sagiri 		ep->ep_rcvpool.pool_nfree = 0;
6733302Sagiri 		ep->ep_rcvpool.pool_headp = NULL;
6743302Sagiri 		ep->ep_rcvpool.pool_tailp = NULL;
6753302Sagiri 	}
6763302Sagiri 	mutex_exit(&ep->ep_rcvpool.pool_lock);
6773302Sagiri }
6783302Sagiri 
6793302Sagiri int
rds_init_recv_pool(rds_ep_t * ep)6803302Sagiri rds_init_recv_pool(rds_ep_t *ep)
6813302Sagiri {
6823302Sagiri 	rds_bufpool_t	*rpool;
6833302Sagiri 	rds_qp_t	*recvqp;
6843302Sagiri 
6853302Sagiri 	recvqp = &ep->ep_recvqp;
6863302Sagiri 	rpool = &ep->ep_rcvpool;
6873302Sagiri 	if (ep->ep_type == RDS_EP_TYPE_DATA) {
6883302Sagiri 		recvqp->qp_depth = MaxDataRecvBuffers;
6893302Sagiri 		recvqp->qp_level = 0;
6903302Sagiri 		recvqp->qp_lwm = (DataRecvBufferLWM * MaxDataRecvBuffers)/100;
6913302Sagiri 		recvqp->qp_taskqpending = B_FALSE;
6923302Sagiri 
6933302Sagiri 		rpool->pool_nbuffers = MaxDataRecvBuffers;
6943302Sagiri 		rpool->pool_nbusy = 0;
6953302Sagiri 		rpool->pool_nfree = 0;
6963302Sagiri 	} else {
6973302Sagiri 		recvqp->qp_depth = MaxCtrlRecvBuffers;
6983302Sagiri 		recvqp->qp_level = 0;
6993302Sagiri 		recvqp->qp_lwm = (CtrlRecvBufferLWM * MaxCtrlRecvBuffers)/100;
7003302Sagiri 		recvqp->qp_taskqpending = B_FALSE;
7013302Sagiri 
7023302Sagiri 		rpool->pool_nbuffers = MaxCtrlRecvBuffers;
7033302Sagiri 		rpool->pool_nbusy = 0;
7043302Sagiri 		rpool->pool_nfree = 0;
7053302Sagiri 	}
7063302Sagiri 
7073302Sagiri 	return (0);
7083302Sagiri }
7093302Sagiri 
7103302Sagiri /* Free buffers to the global pool, either cpool or dpool */
7113302Sagiri void
rds_free_buf(rds_bufpool_t * pool,rds_buf_t * bp,uint_t nbuf)7123302Sagiri rds_free_buf(rds_bufpool_t *pool, rds_buf_t *bp, uint_t nbuf)
7133302Sagiri {
7143302Sagiri 	uint_t		ix;
7153302Sagiri 
7163302Sagiri 	RDS_DPRINTF4("rds_free_buf", "Enter");
7173302Sagiri 
7183302Sagiri 	ASSERT(nbuf != 0);
7193302Sagiri 
7203302Sagiri 	mutex_enter(&pool->pool_lock);
7213302Sagiri 
7223302Sagiri 	if (pool->pool_nfree != 0) {
7233302Sagiri 		pool->pool_tailp->buf_nextp = bp;
7243302Sagiri 	} else {
7253302Sagiri 		pool->pool_headp = bp;
7263302Sagiri 	}
7273302Sagiri 
7283302Sagiri 	if (nbuf == 1) {
7293302Sagiri 		ASSERT(bp->buf_state == RDS_RCVBUF_FREE);
7303302Sagiri 		bp->buf_ep = NULL;
7313302Sagiri 		bp->buf_nextp = NULL;
7323302Sagiri 		pool->pool_tailp = bp;
7333302Sagiri 	} else {
7343302Sagiri 		for (ix = 1; ix < nbuf; ix++) {
7353302Sagiri 			ASSERT(bp->buf_state == RDS_RCVBUF_FREE);
7363302Sagiri 			bp->buf_ep = NULL;
7373302Sagiri 			bp = bp->buf_nextp;
7383302Sagiri 		}
7393302Sagiri 		ASSERT(bp->buf_state == RDS_RCVBUF_FREE);
7403302Sagiri 		bp->buf_ep = NULL;
7413302Sagiri 		bp->buf_nextp = NULL;
7423302Sagiri 		pool->pool_tailp = bp;
7433302Sagiri 	}
7443302Sagiri 	/* tail is always the last buffer */
7453302Sagiri 	pool->pool_tailp->buf_nextp = NULL;
7463302Sagiri 
7473302Sagiri 	pool->pool_nfree += nbuf;
7483302Sagiri 	pool->pool_nbusy -= nbuf;
7493302Sagiri 
7503302Sagiri 	mutex_exit(&pool->pool_lock);
7513302Sagiri 
7523302Sagiri 	RDS_DPRINTF4("rds_free_buf", "Return");
7533302Sagiri }
7543302Sagiri 
7553302Sagiri /* Get buffers from the global pools, either cpool or dpool */
7563302Sagiri rds_buf_t *
rds_get_buf(rds_bufpool_t * pool,uint_t nbuf,uint_t * nret)7573302Sagiri rds_get_buf(rds_bufpool_t *pool, uint_t nbuf, uint_t *nret)
7583302Sagiri {
7593302Sagiri 	rds_buf_t	*bp = NULL, *bp1;
7603302Sagiri 	uint_t		ix;
7613302Sagiri 
7623302Sagiri 	RDS_DPRINTF4("rds_get_buf", "Enter");
7633302Sagiri 
7643302Sagiri 	mutex_enter(&pool->pool_lock);
7653302Sagiri 
7663302Sagiri 	RDS_DPRINTF3("rds_get_buf", "Available: %d Needed: %d",
7673302Sagiri 	    pool->pool_nfree, nbuf);
7683302Sagiri 
7693302Sagiri 	if (nbuf < pool->pool_nfree) {
7703302Sagiri 		*nret = nbuf;
7713302Sagiri 
7723302Sagiri 		bp1 = pool->pool_headp;
7733302Sagiri 		for (ix = 1; ix < nbuf; ix++) {
7743302Sagiri 			bp1 = bp1->buf_nextp;
7753302Sagiri 		}
7763302Sagiri 
7773302Sagiri 		bp = pool->pool_headp;
7783302Sagiri 		pool->pool_headp = bp1->buf_nextp;
7793302Sagiri 		bp1->buf_nextp = NULL;
7803302Sagiri 
7813302Sagiri 		pool->pool_nfree -= nbuf;
7823302Sagiri 		pool->pool_nbusy += nbuf;
7833302Sagiri 	} else if (nbuf >= pool->pool_nfree) {
7843302Sagiri 		*nret = pool->pool_nfree;
7853302Sagiri 
7863302Sagiri 		bp = pool->pool_headp;
7873302Sagiri 
7883302Sagiri 		pool->pool_headp = NULL;
7893302Sagiri 		pool->pool_tailp = NULL;
7903302Sagiri 
7913302Sagiri 		pool->pool_nbusy += pool->pool_nfree;
7923302Sagiri 		pool->pool_nfree = 0;
7933302Sagiri 	}
7943302Sagiri 
7953302Sagiri 	mutex_exit(&pool->pool_lock);
7963302Sagiri 
7973302Sagiri 	RDS_DPRINTF4("rds_get_buf", "Return");
7983302Sagiri 
7993302Sagiri 	return (bp);
8003302Sagiri }
8013302Sagiri 
8023302Sagiri boolean_t
rds_is_recvq_empty(rds_ep_t * ep,boolean_t wait)8033302Sagiri rds_is_recvq_empty(rds_ep_t *ep, boolean_t wait)
8043302Sagiri {
8053302Sagiri 	rds_qp_t	*recvqp;
8063302Sagiri 	rds_bufpool_t	*rpool;
8073302Sagiri 	boolean_t ret = B_TRUE;
8083302Sagiri 
8093302Sagiri 	recvqp = &ep->ep_recvqp;
8103302Sagiri 	mutex_enter(&recvqp->qp_lock);
8113302Sagiri 	RDS_DPRINTF2("rds_is_recvq_empty", "EP(%p): QP has %d WRs",
8123302Sagiri 	    ep, recvqp->qp_level);
8133302Sagiri 	if (wait) {
8143302Sagiri 		/* wait until the RQ is empty */
8153302Sagiri 		while (recvqp->qp_level != 0) {
8163302Sagiri 			/* wait one second and try again */
8173302Sagiri 			mutex_exit(&recvqp->qp_lock);
8183302Sagiri 			delay(drv_usectohz(1000000));
8193302Sagiri 			mutex_enter(&recvqp->qp_lock);
8203302Sagiri 		}
8213302Sagiri 	} else if (recvqp->qp_level != 0) {
8223302Sagiri 			ret = B_FALSE;
8233302Sagiri 	}
8243302Sagiri 	mutex_exit(&recvqp->qp_lock);
8253302Sagiri 
8263302Sagiri 	rpool = &ep->ep_rcvpool;
8273302Sagiri 	mutex_enter(&rpool->pool_lock);
82810489SGiri.Adari@Sun.COM 
82910489SGiri.Adari@Sun.COM 	/*
83010489SGiri.Adari@Sun.COM 	 * During failovers/reconnects, the app may still have some buffers
83110489SGiri.Adari@Sun.COM 	 * on thier socket queues. Waiting here for those buffers may
83210489SGiri.Adari@Sun.COM 	 * cause a hang. It seems ok for those buffers to get freed later.
83310489SGiri.Adari@Sun.COM 	 */
83410489SGiri.Adari@Sun.COM 	if (rpool->pool_nbusy != 0) {
83510489SGiri.Adari@Sun.COM 		RDS_DPRINTF2("rds_is_recvq_empty", "EP(%p): "
83610489SGiri.Adari@Sun.COM 		    "There are %d pending buffers on sockqs", ep,
83710489SGiri.Adari@Sun.COM 		    rpool->pool_nbusy);
83810489SGiri.Adari@Sun.COM 		ret = B_FALSE;
8393302Sagiri 	}
8403302Sagiri 	mutex_exit(&rpool->pool_lock);
8413302Sagiri 
8423302Sagiri 	return (ret);
8433302Sagiri }
8443302Sagiri 
8453302Sagiri boolean_t
rds_is_sendq_empty(rds_ep_t * ep,uint_t wait)8463302Sagiri rds_is_sendq_empty(rds_ep_t *ep, uint_t wait)
8473302Sagiri {
8483302Sagiri 	rds_bufpool_t	*spool;
8493302Sagiri 	rds_buf_t	*bp;
8503302Sagiri 	boolean_t	ret1 = B_TRUE;
8513302Sagiri 
8523302Sagiri 	/* check if all the sends completed */
8533302Sagiri 	spool = &ep->ep_sndpool;
8543302Sagiri 	mutex_enter(&spool->pool_lock);
8553302Sagiri 	RDS_DPRINTF2("rds_is_sendq_empty", "EP(%p): "
8563302Sagiri 	    "Send Pool contains: %d", ep, spool->pool_nbusy);
8573302Sagiri 	if (wait) {
8583302Sagiri 		while (spool->pool_nbusy != 0) {
8593302Sagiri 			if (rds_no_interrupts) {
8603302Sagiri 				/* wait one second and try again */
8613302Sagiri 				delay(drv_usectohz(1000000));
8623302Sagiri 				rds_poll_send_completions(ep->ep_sendcq, ep,
8633302Sagiri 				    B_TRUE);
8643302Sagiri 			} else {
8653302Sagiri 				/* wait one second and try again */
8663302Sagiri 				mutex_exit(&spool->pool_lock);
8673302Sagiri 				delay(drv_usectohz(1000000));
8683302Sagiri 				mutex_enter(&spool->pool_lock);
8693302Sagiri 			}
8703302Sagiri 		}
8713302Sagiri 
8723302Sagiri 		if ((wait == 2) && (ep->ep_type == RDS_EP_TYPE_DATA)) {
8733302Sagiri 			rds_buf_t	*ackbp;
8748082SRamaswamy.Tummala@Sun.COM 			rds_buf_t	*prev_ackbp;
8753302Sagiri 
8763302Sagiri 			/*
8773302Sagiri 			 * If the last one is acknowledged then everything
8783302Sagiri 			 * is acknowledged
8793302Sagiri 			 */
8803302Sagiri 			bp = spool->pool_tailp;
8813302Sagiri 			ackbp = *(rds_buf_t **)ep->ep_ack_addr;
8828082SRamaswamy.Tummala@Sun.COM 			prev_ackbp = ackbp;
8833302Sagiri 			RDS_DPRINTF2("rds_is_sendq_empty", "EP(%p): "
8843302Sagiri 			    "Checking for acknowledgements", ep);
8853302Sagiri 			while (bp != ackbp) {
8863302Sagiri 				RDS_DPRINTF2("rds_is_sendq_empty",
8873302Sagiri 				    "EP(%p) BP(0x%p/0x%p) last "
8883302Sagiri 				    "sent/acknowledged", ep, bp, ackbp);
8893302Sagiri 				mutex_exit(&spool->pool_lock);
8903302Sagiri 				delay(drv_usectohz(1000000));
8913302Sagiri 				mutex_enter(&spool->pool_lock);
8923302Sagiri 
8933302Sagiri 				bp = spool->pool_tailp;
8943302Sagiri 				ackbp = *(rds_buf_t **)ep->ep_ack_addr;
8958082SRamaswamy.Tummala@Sun.COM 				if (ackbp == prev_ackbp) {
8968082SRamaswamy.Tummala@Sun.COM 					RDS_DPRINTF2("rds_is_sendq_empty",
8978082SRamaswamy.Tummala@Sun.COM 					    "There has been no progress,"
8988082SRamaswamy.Tummala@Sun.COM 					    "give up and proceed");
8998082SRamaswamy.Tummala@Sun.COM 					break;
9008082SRamaswamy.Tummala@Sun.COM 				}
9018082SRamaswamy.Tummala@Sun.COM 				prev_ackbp = ackbp;
9023302Sagiri 			}
9033302Sagiri 		}
9043302Sagiri 	} else if (spool->pool_nbusy != 0) {
9053302Sagiri 			ret1 = B_FALSE;
9063302Sagiri 	}
9073302Sagiri 	mutex_exit(&spool->pool_lock);
9083302Sagiri 
9093302Sagiri 	/* check if all the rdma acks completed */
9103302Sagiri 	mutex_enter(&ep->ep_lock);
9113302Sagiri 	RDS_DPRINTF2("rds_is_sendq_empty", "EP(%p): "
9123302Sagiri 	    "Outstanding RDMA Acks: %d", ep, ep->ep_rdmacnt);
9133302Sagiri 	if (wait) {
9143302Sagiri 		while (ep->ep_rdmacnt != 0) {
9153302Sagiri 			if (rds_no_interrupts) {
9163302Sagiri 				/* wait one second and try again */
9173302Sagiri 				delay(drv_usectohz(1000000));
9183302Sagiri 				rds_poll_send_completions(ep->ep_sendcq, ep,
9193302Sagiri 				    B_FALSE);
9203302Sagiri 			} else {
9213302Sagiri 				/* wait one second and try again */
9223302Sagiri 				mutex_exit(&ep->ep_lock);
9233302Sagiri 				delay(drv_usectohz(1000000));
9243302Sagiri 				mutex_enter(&ep->ep_lock);
9253302Sagiri 			}
9263302Sagiri 		}
9273302Sagiri 	} else if (ep->ep_rdmacnt != 0) {
9283302Sagiri 			ret1 = B_FALSE;
9293302Sagiri 	}
9303302Sagiri 	mutex_exit(&ep->ep_lock);
9313302Sagiri 
9323302Sagiri 	return (ret1);
9333302Sagiri }
9343302Sagiri 
9353302Sagiri /* Get buffers from the send pool */
9363302Sagiri rds_buf_t *
rds_get_send_buf(rds_ep_t * ep,uint_t nbuf)9373302Sagiri rds_get_send_buf(rds_ep_t *ep, uint_t nbuf)
9383302Sagiri {
9393302Sagiri 	rds_buf_t	*bp = NULL, *bp1;
9403302Sagiri 	rds_bufpool_t	*spool;
9413302Sagiri 	uint_t		waittime = rds_waittime_ms * 1000;
9423302Sagiri 	uint_t		ix;
9433302Sagiri 	int		ret;
9443302Sagiri 
9453302Sagiri 	RDS_DPRINTF4("rds_get_send_buf", "Enter: EP(%p) Buffers requested: %d",
9463302Sagiri 	    ep, nbuf);
9473302Sagiri 
9483302Sagiri 	spool = &ep->ep_sndpool;
9493302Sagiri 	mutex_enter(&spool->pool_lock);
9503302Sagiri 
9513302Sagiri 	if (rds_no_interrupts) {
9523302Sagiri 		if ((spool->pool_sqpoll_pending == B_FALSE) &&
9533302Sagiri 		    (spool->pool_nbusy >
9543302Sagiri 		    (spool->pool_nbuffers * rds_poll_percent_full)/100)) {
9553302Sagiri 			spool->pool_sqpoll_pending = B_TRUE;
9563302Sagiri 			mutex_exit(&spool->pool_lock);
9573302Sagiri 			rds_poll_send_completions(ep->ep_sendcq, ep, B_FALSE);
9583302Sagiri 			mutex_enter(&spool->pool_lock);
9593302Sagiri 			spool->pool_sqpoll_pending = B_FALSE;
9603302Sagiri 		}
9613302Sagiri 	}
9623302Sagiri 
9633302Sagiri 	if (spool->pool_nfree < nbuf) {
9643302Sagiri 		/* wait for buffers to become available */
9653302Sagiri 		spool->pool_cv_count += nbuf;
966*11066Srafael.vanoni@sun.com 		ret = cv_reltimedwait_sig(&spool->pool_cv, &spool->pool_lock,
967*11066Srafael.vanoni@sun.com 		    drv_usectohz(waittime), TR_CLOCK_TICK);
9683302Sagiri 		/* ret = cv_wait_sig(&spool->pool_cv, &spool->pool_lock); */
9693302Sagiri 		if (ret == 0) {
9703302Sagiri 			/* signal pending */
9713302Sagiri 			spool->pool_cv_count -= nbuf;
9723302Sagiri 			mutex_exit(&spool->pool_lock);
9733302Sagiri 			return (NULL);
9743302Sagiri 		}
9753302Sagiri 
9763302Sagiri 		spool->pool_cv_count -= nbuf;
9773302Sagiri 	}
9783302Sagiri 
9793302Sagiri 	/* Have the number of buffers needed */
9803302Sagiri 	if (spool->pool_nfree > nbuf) {
9813302Sagiri 		bp = spool->pool_headp;
9823302Sagiri 
9833302Sagiri 		if (ep->ep_type == RDS_EP_TYPE_DATA) {
9843302Sagiri 			rds_buf_t *ackbp;
9853302Sagiri 			ackbp = *(rds_buf_t **)ep->ep_ack_addr;
9863302Sagiri 
9873302Sagiri 			/* check if all the needed buffers are acknowledged */
9883302Sagiri 			bp1 = bp;
9893302Sagiri 			for (ix = 0; ix < nbuf; ix++) {
9903302Sagiri 				if ((bp1 == ackbp) ||
9913302Sagiri 				    (bp1->buf_state != RDS_SNDBUF_FREE)) {
9923302Sagiri 					/*
9933302Sagiri 					 * The buffer is not yet signalled or
9943302Sagiri 					 * is not yet acknowledged
9953302Sagiri 					 */
9963302Sagiri 					RDS_DPRINTF5("rds_get_send_buf",
9973302Sagiri 					    "EP(%p) Buffer (%p) not yet "
9983302Sagiri 					    "acked/completed", ep, bp1);
9993302Sagiri 					mutex_exit(&spool->pool_lock);
10003302Sagiri 					return (NULL);
10013302Sagiri 				}
10023302Sagiri 
10033302Sagiri 				bp1 = bp1->buf_nextp;
10043302Sagiri 			}
10053302Sagiri 		}
10063302Sagiri 
10073302Sagiri 		/* mark the buffers as pending */
10083302Sagiri 		bp1 = bp;
10093302Sagiri 		for (ix = 1; ix < nbuf; ix++) {
10103302Sagiri 			ASSERT(bp1->buf_state == RDS_SNDBUF_FREE);
10113302Sagiri 			bp1->buf_state = RDS_SNDBUF_PENDING;
10123302Sagiri 			bp1 = bp1->buf_nextp;
10133302Sagiri 		}
10143302Sagiri 		ASSERT(bp1->buf_state == RDS_SNDBUF_FREE);
10153302Sagiri 		bp1->buf_state = RDS_SNDBUF_PENDING;
10163302Sagiri 
10173302Sagiri 		spool->pool_headp = bp1->buf_nextp;
10183302Sagiri 		bp1->buf_nextp = NULL;
10193302Sagiri 		if (spool->pool_headp == NULL)
10203302Sagiri 			spool->pool_tailp = NULL;
10213302Sagiri 		spool->pool_nfree -= nbuf;
10223302Sagiri 		spool->pool_nbusy += nbuf;
10233302Sagiri 	}
10243302Sagiri 	mutex_exit(&spool->pool_lock);
10253302Sagiri 
10263302Sagiri 	RDS_DPRINTF4("rds_get_send_buf", "Return: EP(%p) Buffers requested: %d",
10273302Sagiri 	    ep, nbuf);
10283302Sagiri 
10293302Sagiri 	return (bp);
10303302Sagiri }
10313302Sagiri 
10323302Sagiri #define	RDS_MIN_BUF_TO_WAKE_THREADS	10
10333302Sagiri 
10343302Sagiri void
rds_free_send_buf(rds_ep_t * ep,rds_buf_t * headp,rds_buf_t * tailp,uint_t nbuf,boolean_t lock)10353302Sagiri rds_free_send_buf(rds_ep_t *ep, rds_buf_t *headp, rds_buf_t *tailp, uint_t nbuf,
10363302Sagiri     boolean_t lock)
10373302Sagiri {
10383302Sagiri 	rds_bufpool_t	*spool;
10393302Sagiri 	rds_buf_t	*tmp;
10403302Sagiri 
10413302Sagiri 	RDS_DPRINTF4("rds_free_send_buf", "Enter");
10423302Sagiri 
10433302Sagiri 	ASSERT(nbuf != 0);
10443302Sagiri 
10453302Sagiri 	if (tailp == NULL) {
10463302Sagiri 		if (nbuf > 1) {
10473302Sagiri 			tmp = headp;
10483302Sagiri 			while (tmp->buf_nextp) {
10493302Sagiri 				tmp = tmp->buf_nextp;
10503302Sagiri 			}
10513302Sagiri 			tailp = tmp;
10523302Sagiri 		} else {
10533302Sagiri 			tailp = headp;
10543302Sagiri 		}
10553302Sagiri 	}
10563302Sagiri 
10573302Sagiri 	spool = &ep->ep_sndpool;
10583302Sagiri 
10593302Sagiri 	if (lock == B_FALSE) {
10603302Sagiri 		/* lock is not held outside */
10613302Sagiri 		mutex_enter(&spool->pool_lock);
10623302Sagiri 	}
10633302Sagiri 
10643302Sagiri 	if (spool->pool_nfree) {
10653302Sagiri 		spool->pool_tailp->buf_nextp = headp;
10663302Sagiri 	} else {
10673302Sagiri 		spool->pool_headp = headp;
10683302Sagiri 	}
10693302Sagiri 	spool->pool_tailp = tailp;
10703302Sagiri 
10713302Sagiri 	spool->pool_nfree += nbuf;
10723302Sagiri 	spool->pool_nbusy -= nbuf;
10733302Sagiri 
10743302Sagiri 	if ((spool->pool_cv_count > 0) &&
10753302Sagiri 	    (spool->pool_nfree > RDS_MIN_BUF_TO_WAKE_THREADS)) {
10763302Sagiri 		if (spool->pool_nfree >= spool->pool_cv_count)
10773302Sagiri 			cv_broadcast(&spool->pool_cv);
10783302Sagiri 		else
10793302Sagiri 			cv_signal(&spool->pool_cv);
10803302Sagiri 	}
10813302Sagiri 
10823302Sagiri 	if (lock == B_FALSE) {
10833302Sagiri 		mutex_exit(&spool->pool_lock);
10843302Sagiri 	}
10853302Sagiri 
10863302Sagiri 	RDS_DPRINTF4("rds_free_send_buf", "Return");
10873302Sagiri }
10883302Sagiri 
10893302Sagiri void
rds_free_recv_buf(rds_buf_t * bp,uint_t nbuf)10903302Sagiri rds_free_recv_buf(rds_buf_t *bp, uint_t nbuf)
10913302Sagiri {
10923302Sagiri 	rds_ep_t	*ep;
10933302Sagiri 	rds_bufpool_t	*rpool;
10943302Sagiri 	rds_buf_t	*bp1;
10953302Sagiri 	uint_t		ix;
10963302Sagiri 
10973302Sagiri 	RDS_DPRINTF4("rds_free_recv_buf", "Enter");
10983302Sagiri 
10993302Sagiri 	ASSERT(nbuf != 0);
11003302Sagiri 
11013302Sagiri 	ep = bp->buf_ep;
11023302Sagiri 	rpool = &ep->ep_rcvpool;
11033302Sagiri 
11043302Sagiri 	mutex_enter(&rpool->pool_lock);
11053302Sagiri 
11063302Sagiri 	/* Add the buffers to the local pool */
11073302Sagiri 	if (rpool->pool_tailp == NULL) {
11083302Sagiri 		ASSERT(rpool->pool_headp == NULL);
11093302Sagiri 		ASSERT(rpool->pool_nfree == 0);
11103302Sagiri 		rpool->pool_headp = bp;
11113302Sagiri 		bp1 = bp;
11123302Sagiri 		for (ix = 1; ix < nbuf; ix++) {
11133302Sagiri 			if (bp1->buf_state == RDS_RCVBUF_ONSOCKQ) {
11143302Sagiri 				rpool->pool_nbusy--;
11153302Sagiri 			}
11163302Sagiri 			bp1->buf_state = RDS_RCVBUF_FREE;
11173302Sagiri 			bp1 = bp1->buf_nextp;
11183302Sagiri 		}
11193302Sagiri 		bp1->buf_nextp = NULL;
11203302Sagiri 		if (bp->buf_state == RDS_RCVBUF_ONSOCKQ) {
11213302Sagiri 			rpool->pool_nbusy--;
11223302Sagiri 		}
11233302Sagiri 		bp->buf_state = RDS_RCVBUF_FREE;
11243302Sagiri 		rpool->pool_tailp = bp1;
11253302Sagiri 		rpool->pool_nfree += nbuf;
11263302Sagiri 	} else {
11273302Sagiri 		bp1 = bp;
11283302Sagiri 		for (ix = 1; ix < nbuf; ix++) {
11293302Sagiri 			if (bp1->buf_state == RDS_RCVBUF_ONSOCKQ) {
11303302Sagiri 				rpool->pool_nbusy--;
11313302Sagiri 			}
11323302Sagiri 			bp1->buf_state = RDS_RCVBUF_FREE;
11333302Sagiri 			bp1 = bp1->buf_nextp;
11343302Sagiri 		}
11353302Sagiri 		bp1->buf_nextp = NULL;
11363302Sagiri 		if (bp->buf_state == RDS_RCVBUF_ONSOCKQ) {
11373302Sagiri 			rpool->pool_nbusy--;
11383302Sagiri 		}
11393302Sagiri 		bp->buf_state = RDS_RCVBUF_FREE;
11403302Sagiri 		rpool->pool_tailp->buf_nextp = bp;
11413302Sagiri 		rpool->pool_tailp = bp1;
11423302Sagiri 		rpool->pool_nfree += nbuf;
11433302Sagiri 	}
11443302Sagiri 
114510489SGiri.Adari@Sun.COM 	if (rpool->pool_nfree >= rds_nbuffers_to_putback) {
11463302Sagiri 		bp = rpool->pool_headp;
11473302Sagiri 		nbuf = rpool->pool_nfree;
11483302Sagiri 		rpool->pool_headp = NULL;
11493302Sagiri 		rpool->pool_tailp = NULL;
11503302Sagiri 		rpool->pool_nfree = 0;
11513302Sagiri 		mutex_exit(&rpool->pool_lock);
11523302Sagiri 
11533302Sagiri 		/* Free the buffers to the global pool */
11543302Sagiri 		if (ep->ep_type == RDS_EP_TYPE_DATA) {
11553302Sagiri 			rds_free_buf(&rds_dpool, bp, nbuf);
11563302Sagiri 		} else {
11573302Sagiri 			rds_free_buf(&rds_cpool, bp, nbuf);
11583302Sagiri 		}
11593302Sagiri 
11603302Sagiri 		return;
11613302Sagiri 	}
11623302Sagiri 	mutex_exit(&rpool->pool_lock);
11633302Sagiri 
11643302Sagiri 	RDS_DPRINTF4("rds_free_recv_buf", "Return");
11653302Sagiri }
1166