13302Sagiri /*
23302Sagiri * CDDL HEADER START
33302Sagiri *
43302Sagiri * The contents of this file are subject to the terms of the
53302Sagiri * Common Development and Distribution License (the "License").
63302Sagiri * You may not use this file except in compliance with the License.
73302Sagiri *
83302Sagiri * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
93302Sagiri * or http://www.opensolaris.org/os/licensing.
103302Sagiri * See the License for the specific language governing permissions
113302Sagiri * and limitations under the License.
123302Sagiri *
133302Sagiri * When distributing Covered Code, include this CDDL HEADER in each
143302Sagiri * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
153302Sagiri * If applicable, add the following below this CDDL HEADER, with the
163302Sagiri * fields enclosed by brackets "[]" replaced with your own identifying
173302Sagiri * information: Portions Copyright [yyyy] [name of copyright owner]
183302Sagiri *
193302Sagiri * CDDL HEADER END
203302Sagiri */
213302Sagiri /*
2210489SGiri.Adari@Sun.COM * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
233302Sagiri * Use is subject to license terms.
243302Sagiri */
253302Sagiri /*
263302Sagiri * Copyright (c) 2005 SilverStorm Technologies, Inc. All rights reserved.
273302Sagiri *
283302Sagiri * This software is available to you under a choice of one of two
293302Sagiri * licenses. You may choose to be licensed under the terms of the GNU
303302Sagiri * General Public License (GPL) Version 2, available from the file
313302Sagiri * COPYING in the main directory of this source tree, or the
323302Sagiri * OpenIB.org BSD license below:
333302Sagiri *
343302Sagiri * Redistribution and use in source and binary forms, with or
353302Sagiri * without modification, are permitted provided that the following
363302Sagiri * conditions are met:
373302Sagiri *
383302Sagiri * - Redistributions of source code must retain the above
393302Sagiri * copyright notice, this list of conditions and the following
403302Sagiri * disclaimer.
413302Sagiri *
423302Sagiri * - Redistributions in binary form must reproduce the above
433302Sagiri * copyright notice, this list of conditions and the following
443302Sagiri * disclaimer in the documentation and/or other materials
453302Sagiri * provided with the distribution.
463302Sagiri *
473302Sagiri * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
483302Sagiri * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
493302Sagiri * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
503302Sagiri * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
513302Sagiri * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
523302Sagiri * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
533302Sagiri * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
543302Sagiri * SOFTWARE.
553302Sagiri *
563302Sagiri */
573302Sagiri /*
583302Sagiri * Sun elects to include this software in Sun product
593302Sagiri * under the OpenIB BSD license.
603302Sagiri *
613302Sagiri *
623302Sagiri * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
633302Sagiri * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
643302Sagiri * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
653302Sagiri * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
663302Sagiri * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
673302Sagiri * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
683302Sagiri * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
693302Sagiri * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
703302Sagiri * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
713302Sagiri * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
723302Sagiri * POSSIBILITY OF SUCH DAMAGE.
733302Sagiri */
743302Sagiri
753302Sagiri #include <sys/ib/clients/rds/rdsib_cm.h>
763302Sagiri #include <sys/ib/clients/rds/rdsib_ib.h>
773302Sagiri #include <sys/ib/clients/rds/rdsib_buf.h>
783302Sagiri #include <sys/ib/clients/rds/rdsib_ep.h>
793302Sagiri #include <sys/ib/clients/rds/rds_kstat.h>
803302Sagiri
813302Sagiri /*
823302Sagiri * This File contains the buffer management code
833302Sagiri */
843302Sagiri
853302Sagiri #define DUMP_USER_PARAMS() \
865342Sagiri RDS_DPRINTF3(LABEL, "MaxNodes = %d", MaxNodes); \
873302Sagiri RDS_DPRINTF3(LABEL, "UserBufferSize = %d", UserBufferSize); \
883302Sagiri RDS_DPRINTF3(LABEL, "RdsPktSize = %d", RdsPktSize); \
893302Sagiri RDS_DPRINTF3(LABEL, "MaxDataSendBuffers = %d", MaxDataSendBuffers); \
903302Sagiri RDS_DPRINTF3(LABEL, "MaxDataRecvBuffers = %d", MaxDataRecvBuffers); \
913302Sagiri RDS_DPRINTF3(LABEL, "MaxCtrlSendBuffers = %d", MaxCtrlSendBuffers); \
923302Sagiri RDS_DPRINTF3(LABEL, "MaxCtrlRecvBuffers = %d", MaxCtrlRecvBuffers); \
933302Sagiri RDS_DPRINTF3(LABEL, "DataRecvBufferLWM = %d", DataRecvBufferLWM); \
943302Sagiri RDS_DPRINTF3(LABEL, "PendingRxPktsHWM = %d", PendingRxPktsHWM); \
953302Sagiri RDS_DPRINTF3(LABEL, "MinRnrRetry = %d", MinRnrRetry)
963302Sagiri
9710489SGiri.Adari@Sun.COM uint_t rds_nbuffers_to_putback;
9810489SGiri.Adari@Sun.COM
993302Sagiri static void
rds_free_mblk(char * arg)1003302Sagiri rds_free_mblk(char *arg)
1013302Sagiri {
1023302Sagiri rds_buf_t *bp = (rds_buf_t *)(uintptr_t)arg;
1033302Sagiri
1043302Sagiri /* Free the recv buffer */
1053302Sagiri RDS_DPRINTF4("rds_free_mblk", "Enter: BP(%p)", bp);
1063302Sagiri ASSERT(bp->buf_state == RDS_RCVBUF_ONSOCKQ);
1073302Sagiri rds_free_recv_buf(bp, 1);
1083302Sagiri RDS_DECR_RXPKTS_PEND(1);
1093302Sagiri RDS_DPRINTF4("rds_free_mblk", "Return: BP(%p)", bp);
1103302Sagiri }
1113302Sagiri
1123302Sagiri void
rds_free_recv_caches(rds_state_t * statep)1133302Sagiri rds_free_recv_caches(rds_state_t *statep)
1143302Sagiri {
1153302Sagiri rds_hca_t *hcap;
1163302Sagiri int ret;
1173302Sagiri
1183302Sagiri RDS_DPRINTF4("rds_free_recv_caches", "Enter");
1193302Sagiri
1203302Sagiri mutex_enter(&rds_dpool.pool_lock);
1213302Sagiri if (rds_dpool.pool_memp == NULL) {
1223302Sagiri RDS_DPRINTF2("rds_free_recv_caches", "Caches are empty");
1233302Sagiri mutex_exit(&rds_dpool.pool_lock);
1243302Sagiri return;
1253302Sagiri }
1263302Sagiri
1273302Sagiri /*
1283302Sagiri * All buffers must have been freed as all sessions are closed
1293302Sagiri * and destroyed
1303302Sagiri */
1313302Sagiri ASSERT(rds_dpool.pool_nbusy == 0);
1323302Sagiri RDS_DPRINTF2("rds_free_recv_caches", "Data Pool has "
1333302Sagiri "pending buffers: %d", rds_dpool.pool_nbusy);
1343302Sagiri while (rds_dpool.pool_nbusy != 0) {
1353302Sagiri mutex_exit(&rds_dpool.pool_lock);
1363302Sagiri delay(drv_usectohz(1000000));
1373302Sagiri mutex_enter(&rds_dpool.pool_lock);
1383302Sagiri }
1393302Sagiri
1403302Sagiri hcap = statep->rds_hcalistp;
1413302Sagiri while (hcap != NULL) {
1423302Sagiri if (hcap->hca_mrhdl != NULL) {
1433302Sagiri ret = ibt_deregister_mr(hcap->hca_hdl,
1443302Sagiri hcap->hca_mrhdl);
1453302Sagiri if (ret == IBT_SUCCESS) {
1463302Sagiri hcap->hca_mrhdl = NULL;
1473302Sagiri hcap->hca_lkey = 0;
1483302Sagiri hcap->hca_rkey = 0;
1493302Sagiri } else {
1503302Sagiri RDS_DPRINTF2(LABEL, "ibt_deregister_mr "
1513302Sagiri "failed: %d, mrhdl: 0x%p", ret,
1523302Sagiri hcap->hca_mrhdl);
1533302Sagiri }
1543302Sagiri }
1553302Sagiri hcap = hcap->hca_nextp;
1563302Sagiri }
1573302Sagiri
1583302Sagiri kmem_free(rds_dpool.pool_bufmemp, (rds_dpool.pool_nbuffers +
1593302Sagiri rds_cpool.pool_nbuffers) * sizeof (rds_buf_t));
1603302Sagiri rds_dpool.pool_bufmemp = NULL;
1613302Sagiri
1623302Sagiri kmem_free(rds_dpool.pool_memp, rds_dpool.pool_memsize);
1633302Sagiri rds_dpool.pool_memp = NULL;
1643302Sagiri
1653302Sagiri mutex_exit(&rds_dpool.pool_lock);
1663302Sagiri
1673302Sagiri RDS_DPRINTF4("rds_free_recv_caches", "Return");
1683302Sagiri }
1693302Sagiri
1703302Sagiri int
rds_init_recv_caches(rds_state_t * statep)1713302Sagiri rds_init_recv_caches(rds_state_t *statep)
1723302Sagiri {
1733302Sagiri uint8_t *mp;
1743302Sagiri rds_buf_t *bp;
1753302Sagiri rds_hca_t *hcap;
1763302Sagiri uint32_t nsessions;
1773302Sagiri uint_t ix;
1785342Sagiri uint_t nctrlrx;
1793302Sagiri uint8_t *memp;
1803302Sagiri uint_t memsize, nbuf;
1813302Sagiri rds_buf_t *bufmemp;
1823302Sagiri ibt_mr_attr_t mem_attr;
1833302Sagiri ibt_mr_desc_t mem_desc;
1843302Sagiri int ret;
1853302Sagiri
1863302Sagiri RDS_DPRINTF4("rds_init_recv_caches", "Enter");
1873302Sagiri
1883302Sagiri DUMP_USER_PARAMS();
1893302Sagiri
1903302Sagiri mutex_enter(&rds_dpool.pool_lock);
1913302Sagiri if (rds_dpool.pool_memp != NULL) {
1923302Sagiri RDS_DPRINTF2("rds_init_recv_caches", "Pools are already "
1933302Sagiri "initialized");
1943302Sagiri mutex_exit(&rds_dpool.pool_lock);
1953302Sagiri return (0);
1963302Sagiri }
1973302Sagiri
1983302Sagiri /*
1993302Sagiri * High water mark for the receive buffers in the system. If the
2003302Sagiri * number of buffers used crosses this mark then all sockets in
2013302Sagiri * would be stalled. The port quota for the sockets is set based
2023302Sagiri * on this limit.
2033302Sagiri */
2045342Sagiri rds_rx_pkts_pending_hwm = (PendingRxPktsHWM * NDataRX)/100;
2053302Sagiri
20610489SGiri.Adari@Sun.COM rds_nbuffers_to_putback = min(MaxCtrlRecvBuffers, MaxDataRecvBuffers);
20710489SGiri.Adari@Sun.COM
2083302Sagiri /* nsessions can never be less than 1 */
2095342Sagiri nsessions = MaxNodes - 1;
21010489SGiri.Adari@Sun.COM nctrlrx = (nsessions + 1) * MaxCtrlRecvBuffers * 2;
2113302Sagiri
2123302Sagiri RDS_DPRINTF3(LABEL, "Number of Possible Sessions: %d", nsessions);
2133302Sagiri
2143302Sagiri /* Add the hdr */
2153302Sagiri RdsPktSize = UserBufferSize + RDS_DATA_HDR_SZ;
2163302Sagiri
2175342Sagiri memsize = (NDataRX * RdsPktSize) + (nctrlrx * RDS_CTRLPKT_SIZE);
2185342Sagiri nbuf = NDataRX + nctrlrx;
2193302Sagiri RDS_DPRINTF3(LABEL, "RDS Buffer Pool Memory: %lld", memsize);
2203302Sagiri RDS_DPRINTF3(LABEL, "Total Buffers: %d", nbuf);
2213302Sagiri
2223302Sagiri memp = (uint8_t *)kmem_zalloc(memsize, KM_NOSLEEP);
2233302Sagiri if (memp == NULL) {
2243302Sagiri RDS_DPRINTF1(LABEL, "RDS Memory allocation failed");
2253302Sagiri mutex_exit(&rds_dpool.pool_lock);
2263302Sagiri return (-1);
2273302Sagiri }
2283302Sagiri
2293302Sagiri RDS_DPRINTF3(LABEL, "RDS Buffer Entries Memory: %lld",
2303302Sagiri nbuf * sizeof (rds_buf_t));
2313302Sagiri
2323302Sagiri /* allocate memory for buffer entries */
2333302Sagiri bufmemp = (rds_buf_t *)kmem_zalloc(nbuf * sizeof (rds_buf_t),
2343302Sagiri KM_SLEEP);
2353302Sagiri
2363302Sagiri /* register the memory with all HCAs */
2373302Sagiri mem_attr.mr_vaddr = (ib_vaddr_t)(uintptr_t)memp;
2383302Sagiri mem_attr.mr_len = memsize;
2393302Sagiri mem_attr.mr_as = NULL;
2403302Sagiri mem_attr.mr_flags = IBT_MR_ENABLE_LOCAL_WRITE;
2413302Sagiri
2428082SRamaswamy.Tummala@Sun.COM rw_enter(&statep->rds_hca_lock, RW_WRITER);
2438082SRamaswamy.Tummala@Sun.COM
2443302Sagiri hcap = statep->rds_hcalistp;
2453302Sagiri while (hcap != NULL) {
2468082SRamaswamy.Tummala@Sun.COM if (hcap->hca_state != RDS_HCA_STATE_OPEN) {
2478082SRamaswamy.Tummala@Sun.COM hcap = hcap->hca_nextp;
2488082SRamaswamy.Tummala@Sun.COM continue;
2498082SRamaswamy.Tummala@Sun.COM }
2508082SRamaswamy.Tummala@Sun.COM
2513302Sagiri ret = ibt_register_mr(hcap->hca_hdl, hcap->hca_pdhdl,
2523302Sagiri &mem_attr, &hcap->hca_mrhdl, &mem_desc);
2533302Sagiri if (ret != IBT_SUCCESS) {
2543302Sagiri RDS_DPRINTF2(LABEL, "ibt_register_mr failed: %d", ret);
2555645Sagiri hcap = statep->rds_hcalistp;
2565645Sagiri while ((hcap) && (hcap->hca_mrhdl != NULL)) {
2575645Sagiri ret = ibt_deregister_mr(hcap->hca_hdl,
2585645Sagiri hcap->hca_mrhdl);
2595645Sagiri if (ret == IBT_SUCCESS) {
2605645Sagiri hcap->hca_mrhdl = NULL;
2615645Sagiri hcap->hca_lkey = 0;
2625645Sagiri hcap->hca_rkey = 0;
2635645Sagiri } else {
2645645Sagiri RDS_DPRINTF2(LABEL, "ibt_deregister_mr "
2655645Sagiri "failed: %d, mrhdl: 0x%p", ret,
2665645Sagiri hcap->hca_mrhdl);
2675645Sagiri }
2685645Sagiri hcap = hcap->hca_nextp;
2695645Sagiri }
2705645Sagiri kmem_free(bufmemp, nbuf * sizeof (rds_buf_t));
2715645Sagiri kmem_free(memp, memsize);
2728082SRamaswamy.Tummala@Sun.COM rw_exit(&statep->rds_hca_lock);
2735645Sagiri mutex_exit(&rds_dpool.pool_lock);
2743302Sagiri return (-1);
2753302Sagiri }
2763302Sagiri
2778082SRamaswamy.Tummala@Sun.COM hcap->hca_state = RDS_HCA_STATE_MEM_REGISTERED;
2783302Sagiri hcap->hca_lkey = mem_desc.md_lkey;
2793302Sagiri hcap->hca_rkey = mem_desc.md_rkey;
2803302Sagiri
2813302Sagiri hcap = hcap->hca_nextp;
2823302Sagiri }
2838082SRamaswamy.Tummala@Sun.COM rw_exit(&statep->rds_hca_lock);
2843302Sagiri
2853302Sagiri /* Initialize data pool */
2863302Sagiri rds_dpool.pool_memp = memp;
2873302Sagiri rds_dpool.pool_memsize = memsize;
2883302Sagiri rds_dpool.pool_bufmemp = bufmemp;
2895342Sagiri rds_dpool.pool_nbuffers = NDataRX;
2903302Sagiri rds_dpool.pool_nbusy = 0;
2915342Sagiri rds_dpool.pool_nfree = NDataRX;
2923302Sagiri
2933302Sagiri /* chain the buffers */
2943302Sagiri mp = memp;
2953302Sagiri bp = bufmemp;
2965342Sagiri for (ix = 0; ix < NDataRX; ix++) {
2973302Sagiri bp[ix].buf_nextp = &bp[ix + 1];
2983302Sagiri bp[ix].buf_ds.ds_va = (ib_vaddr_t)(uintptr_t)mp;
2993302Sagiri bp[ix].buf_state = RDS_RCVBUF_FREE;
3003302Sagiri bp[ix].buf_frtn.free_func = rds_free_mblk;
3013302Sagiri bp[ix].buf_frtn.free_arg = (char *)&bp[ix];
3023302Sagiri mp = mp + RdsPktSize;
3033302Sagiri }
3045342Sagiri bp[NDataRX - 1].buf_nextp = NULL;
3053302Sagiri rds_dpool.pool_headp = &bp[0];
3065342Sagiri rds_dpool.pool_tailp = &bp[NDataRX - 1];
3073302Sagiri
3083302Sagiri /* Initialize ctrl pool */
3093302Sagiri rds_cpool.pool_nbuffers = nctrlrx;
3103302Sagiri rds_cpool.pool_nbusy = 0;
3113302Sagiri rds_cpool.pool_nfree = nctrlrx;
3123302Sagiri
3133302Sagiri /* chain the buffers */
3145342Sagiri for (ix = NDataRX; ix < nbuf - 1; ix++) {
3153302Sagiri bp[ix].buf_nextp = &bp[ix + 1];
3163302Sagiri bp[ix].buf_ds.ds_va = (ib_vaddr_t)(uintptr_t)mp;
3173302Sagiri mp = mp + RDS_CTRLPKT_SIZE;
3183302Sagiri }
3193302Sagiri bp[nbuf - 1].buf_ds.ds_va = (ib_vaddr_t)(uintptr_t)mp;
3203302Sagiri bp[nbuf - 1].buf_nextp = NULL;
3215342Sagiri rds_cpool.pool_headp = &bp[NDataRX];
3223302Sagiri rds_cpool.pool_tailp = &bp[nbuf - 1];
3233302Sagiri
3243302Sagiri mutex_exit(&rds_dpool.pool_lock);
3253302Sagiri
3263302Sagiri RDS_DPRINTF3(LABEL, "rdsmemp start: %p end: %p", memp, mp);
3273302Sagiri RDS_DPRINTF4("rds_init_recv_caches", "Return");
3283302Sagiri return (0);
3293302Sagiri }
3303302Sagiri
3318082SRamaswamy.Tummala@Sun.COM rds_hca_t *rds_lkup_hca(ib_guid_t hca_guid);
3328082SRamaswamy.Tummala@Sun.COM
3333302Sagiri void
rds_free_send_pool(rds_ep_t * ep)3343302Sagiri rds_free_send_pool(rds_ep_t *ep)
3353302Sagiri {
3363302Sagiri rds_bufpool_t *pool;
3373302Sagiri rds_hca_t *hcap;
3383302Sagiri int ret;
3393302Sagiri
3403302Sagiri pool = &ep->ep_sndpool;
3413302Sagiri
3423302Sagiri mutex_enter(&pool->pool_lock);
3433302Sagiri if (pool->pool_memp == NULL) {
3443302Sagiri mutex_exit(&pool->pool_lock);
3453302Sagiri RDS_DPRINTF2("rds_free_send_pool",
3463302Sagiri "EP(%p) DOUBLE Free on Send Pool", ep);
3473302Sagiri return;
3483302Sagiri }
3493302Sagiri
3503302Sagiri /* get the hcap for the HCA hosting this channel */
3518082SRamaswamy.Tummala@Sun.COM hcap = rds_lkup_hca(ep->ep_hca_guid);
3523302Sagiri if (hcap == NULL) {
3533302Sagiri RDS_DPRINTF2("rds_free_send_pool", "HCA (0x%llx) not found",
3543302Sagiri ep->ep_hca_guid);
3553302Sagiri } else {
3563302Sagiri ret = ibt_deregister_mr(hcap->hca_hdl, ep->ep_snd_mrhdl);
3573302Sagiri if (ret != IBT_SUCCESS) {
3583302Sagiri RDS_DPRINTF2(LABEL,
3593302Sagiri "ibt_deregister_mr failed: %d, mrhdl: 0x%p",
3603302Sagiri ret, ep->ep_snd_mrhdl);
3613302Sagiri }
3623302Sagiri
3633302Sagiri if (ep->ep_ack_addr) {
3643302Sagiri ret = ibt_deregister_mr(hcap->hca_hdl, ep->ep_ackhdl);
3653302Sagiri if (ret != IBT_SUCCESS) {
3663302Sagiri RDS_DPRINTF2(LABEL,
3673302Sagiri "ibt_deregister_mr ackhdl failed: %d, "
3683302Sagiri "mrhdl: 0x%p", ret, ep->ep_ackhdl);
3693302Sagiri }
3703302Sagiri
3713302Sagiri kmem_free((void *)ep->ep_ack_addr, sizeof (uintptr_t));
3723302Sagiri ep->ep_ack_addr = NULL;
3733302Sagiri }
3743302Sagiri }
3753302Sagiri
3763302Sagiri kmem_free(pool->pool_memp, pool->pool_memsize);
3773302Sagiri kmem_free(pool->pool_bufmemp,
3783302Sagiri pool->pool_nbuffers * sizeof (rds_buf_t));
3793302Sagiri pool->pool_memp = NULL;
3803302Sagiri pool->pool_bufmemp = NULL;
3813302Sagiri mutex_exit(&pool->pool_lock);
3823302Sagiri }
3833302Sagiri
3843302Sagiri int
rds_init_send_pool(rds_ep_t * ep,ib_guid_t hca_guid)3854467Sagiri rds_init_send_pool(rds_ep_t *ep, ib_guid_t hca_guid)
3863302Sagiri {
3873302Sagiri uint8_t *mp;
3883302Sagiri rds_buf_t *bp;
3893302Sagiri rds_hca_t *hcap;
3903302Sagiri uint_t ix, rcv_len;
3913302Sagiri ibt_mr_attr_t mem_attr;
3923302Sagiri ibt_mr_desc_t mem_desc;
3933302Sagiri uint8_t *memp;
3943302Sagiri rds_buf_t *bufmemp;
3953302Sagiri uintptr_t ack_addr = NULL;
3963302Sagiri uint_t memsize;
3973302Sagiri uint_t nbuf;
3983302Sagiri rds_bufpool_t *spool;
3993302Sagiri rds_data_hdr_t *pktp;
4003302Sagiri int ret;
4013302Sagiri
4023302Sagiri RDS_DPRINTF2("rds_init_send_pool", "Enter");
4033302Sagiri
4043302Sagiri spool = &ep->ep_sndpool;
4053302Sagiri
4063302Sagiri ASSERT(spool->pool_memp == NULL);
4074467Sagiri ASSERT(ep->ep_hca_guid == 0);
4083302Sagiri
4093302Sagiri /* get the hcap for the HCA hosting this channel */
4104467Sagiri hcap = rds_get_hcap(rdsib_statep, hca_guid);
4113302Sagiri if (hcap == NULL) {
4123302Sagiri RDS_DPRINTF2("rds_init_send_pool", "HCA (0x%llx) not found",
4134467Sagiri hca_guid);
4143302Sagiri return (-1);
4153302Sagiri }
4163302Sagiri
4173302Sagiri if (ep->ep_type == RDS_EP_TYPE_DATA) {
4183302Sagiri spool->pool_nbuffers = MaxDataSendBuffers;
4193302Sagiri spool->pool_nbusy = 0;
4203302Sagiri spool->pool_nfree = MaxDataSendBuffers;
4213302Sagiri memsize = (MaxDataSendBuffers * RdsPktSize) +
4223302Sagiri sizeof (uintptr_t);
4233302Sagiri rcv_len = RdsPktSize;
4243302Sagiri } else {
4253302Sagiri spool->pool_nbuffers = MaxCtrlSendBuffers;
4263302Sagiri spool->pool_nbusy = 0;
4273302Sagiri spool->pool_nfree = MaxCtrlSendBuffers;
4283302Sagiri memsize = MaxCtrlSendBuffers * RDS_CTRLPKT_SIZE;
4293302Sagiri rcv_len = RDS_CTRLPKT_SIZE;
4303302Sagiri }
4313302Sagiri nbuf = spool->pool_nbuffers;
4323302Sagiri
4333302Sagiri RDS_DPRINTF3(LABEL, "RDS Send Pool Memory: %lld", memsize);
4343302Sagiri
4353302Sagiri memp = (uint8_t *)kmem_zalloc(memsize, KM_NOSLEEP);
4363302Sagiri if (memp == NULL) {
4373302Sagiri RDS_DPRINTF1(LABEL, "RDS Send Memory allocation failed");
4383302Sagiri return (-1);
4393302Sagiri }
4403302Sagiri
4413302Sagiri RDS_DPRINTF3(LABEL, "RDS Buffer Entries Memory: %lld",
4423302Sagiri nbuf * sizeof (rds_buf_t));
4433302Sagiri
4443302Sagiri /* allocate memory for buffer entries */
4453302Sagiri bufmemp = (rds_buf_t *)kmem_zalloc(nbuf * sizeof (rds_buf_t),
4463302Sagiri KM_SLEEP);
4473302Sagiri
4483302Sagiri if (ep->ep_type == RDS_EP_TYPE_DATA) {
4493302Sagiri ack_addr = (uintptr_t)kmem_zalloc(sizeof (uintptr_t), KM_SLEEP);
4503302Sagiri
4513302Sagiri /* register the memory with the HCA for this channel */
4523302Sagiri mem_attr.mr_vaddr = (ib_vaddr_t)ack_addr;
4533302Sagiri mem_attr.mr_len = sizeof (uintptr_t);
4543302Sagiri mem_attr.mr_as = NULL;
4553302Sagiri mem_attr.mr_flags = IBT_MR_SLEEP | IBT_MR_ENABLE_LOCAL_WRITE |
4563302Sagiri IBT_MR_ENABLE_REMOTE_WRITE;
4573302Sagiri
4583302Sagiri ret = ibt_register_mr(hcap->hca_hdl, hcap->hca_pdhdl,
4593302Sagiri &mem_attr, &ep->ep_ackhdl, &mem_desc);
4603302Sagiri if (ret != IBT_SUCCESS) {
4613302Sagiri RDS_DPRINTF2("rds_init_send_pool",
4623302Sagiri "EP(%p): ibt_register_mr for ack failed: %d",
4633302Sagiri ep, ret);
4643302Sagiri kmem_free(memp, memsize);
4653302Sagiri kmem_free(bufmemp, nbuf * sizeof (rds_buf_t));
4663302Sagiri kmem_free((void *)ack_addr, sizeof (uintptr_t));
4673302Sagiri return (-1);
4683302Sagiri }
4693302Sagiri ep->ep_ack_rkey = mem_desc.md_rkey;
4703302Sagiri ep->ep_ack_addr = ack_addr;
4713302Sagiri }
4723302Sagiri
4733302Sagiri /* register the memory with the HCA for this channel */
4743302Sagiri mem_attr.mr_vaddr = (ib_vaddr_t)(uintptr_t)memp;
4753302Sagiri mem_attr.mr_len = memsize;
4763302Sagiri mem_attr.mr_as = NULL;
4773302Sagiri mem_attr.mr_flags = IBT_MR_SLEEP | IBT_MR_ENABLE_LOCAL_WRITE;
4783302Sagiri
4793302Sagiri ret = ibt_register_mr(hcap->hca_hdl, hcap->hca_pdhdl,
4803302Sagiri &mem_attr, &ep->ep_snd_mrhdl, &mem_desc);
4813302Sagiri if (ret != IBT_SUCCESS) {
4823302Sagiri RDS_DPRINTF2("rds_init_send_pool", "EP(%p): ibt_register_mr "
4833302Sagiri "failed: %d", ep, ret);
4843302Sagiri kmem_free(memp, memsize);
4853302Sagiri kmem_free(bufmemp, nbuf * sizeof (rds_buf_t));
4863302Sagiri if (ack_addr != NULL)
4873302Sagiri kmem_free((void *)ack_addr, sizeof (uintptr_t));
4883302Sagiri return (-1);
4893302Sagiri }
4903302Sagiri ep->ep_snd_lkey = mem_desc.md_lkey;
4913302Sagiri
4923302Sagiri
4933302Sagiri /* Initialize the pool */
4943302Sagiri spool->pool_memp = memp;
4953302Sagiri spool->pool_memsize = memsize;
4963302Sagiri spool->pool_bufmemp = bufmemp;
4973302Sagiri spool->pool_sqpoll_pending = B_FALSE;
4983302Sagiri
4993302Sagiri /* chain the buffers and initialize them */
5003302Sagiri mp = memp;
5013302Sagiri bp = bufmemp;
5023302Sagiri
5033302Sagiri if (ep->ep_type == RDS_EP_TYPE_DATA) {
5043302Sagiri for (ix = 0; ix < nbuf - 1; ix++) {
5053302Sagiri bp[ix].buf_nextp = &bp[ix + 1];
5063302Sagiri bp[ix].buf_ep = ep;
5073302Sagiri bp[ix].buf_ds.ds_va = (ib_vaddr_t)(uintptr_t)mp;
5083302Sagiri bp[ix].buf_ds.ds_key = ep->ep_snd_lkey;
5093302Sagiri bp[ix].buf_state = RDS_SNDBUF_FREE;
5103302Sagiri pktp = (rds_data_hdr_t *)(uintptr_t)mp;
5113302Sagiri pktp->dh_bufid = (uintptr_t)&bp[ix];
5123302Sagiri mp = mp + rcv_len;
5133302Sagiri }
5143302Sagiri bp[nbuf - 1].buf_nextp = NULL;
5153302Sagiri bp[nbuf - 1].buf_ep = ep;
5163302Sagiri bp[nbuf - 1].buf_ds.ds_va = (ib_vaddr_t)(uintptr_t)mp;
5173302Sagiri bp[nbuf - 1].buf_ds.ds_key = ep->ep_snd_lkey;
5183302Sagiri bp[nbuf - 1].buf_state = RDS_SNDBUF_FREE;
5193302Sagiri pktp = (rds_data_hdr_t *)(uintptr_t)mp;
5203302Sagiri pktp->dh_bufid = (uintptr_t)&bp[nbuf - 1];
5213302Sagiri
5223302Sagiri spool->pool_headp = &bp[0];
5233302Sagiri spool->pool_tailp = &bp[nbuf - 1];
5243302Sagiri
5253302Sagiri mp = mp + rcv_len;
5263302Sagiri ep->ep_ackds.ds_va = (ib_vaddr_t)(uintptr_t)mp;
5273302Sagiri ep->ep_ackds.ds_key = ep->ep_snd_lkey;
5283302Sagiri ep->ep_ackds.ds_len = sizeof (uintptr_t);
5293302Sagiri
5303302Sagiri *(uintptr_t *)ep->ep_ack_addr = (uintptr_t)spool->pool_tailp;
5313302Sagiri } else {
5323302Sagiri /* control send pool */
5333302Sagiri for (ix = 0; ix < nbuf - 1; ix++) {
5343302Sagiri bp[ix].buf_nextp = &bp[ix + 1];
5353302Sagiri bp[ix].buf_ep = ep;
5363302Sagiri bp[ix].buf_ds.ds_va = (ib_vaddr_t)(uintptr_t)mp;
5373302Sagiri bp[ix].buf_ds.ds_key = ep->ep_snd_lkey;
5383302Sagiri bp[ix].buf_state = RDS_SNDBUF_FREE;
5393302Sagiri mp = mp + rcv_len;
5403302Sagiri }
5413302Sagiri bp[nbuf - 1].buf_nextp = NULL;
5423302Sagiri bp[nbuf - 1].buf_ep = ep;
5433302Sagiri bp[nbuf - 1].buf_ds.ds_va = (ib_vaddr_t)(uintptr_t)mp;
5443302Sagiri bp[nbuf - 1].buf_ds.ds_key = ep->ep_snd_lkey;
5453302Sagiri bp[nbuf - 1].buf_state = RDS_SNDBUF_FREE;
5463302Sagiri spool->pool_headp = &bp[0];
5473302Sagiri spool->pool_tailp = &bp[nbuf - 1];
5483302Sagiri }
5493302Sagiri
5503302Sagiri RDS_DPRINTF3(LABEL, "rdsmemp start: %p end: %p", memp, mp);
5513302Sagiri RDS_DPRINTF2("rds_init_send_pool", "Return");
5523302Sagiri
5533302Sagiri return (0);
5543302Sagiri }
5553302Sagiri
5564154Sagiri int
rds_reinit_send_pool(rds_ep_t * ep,ib_guid_t hca_guid)5574154Sagiri rds_reinit_send_pool(rds_ep_t *ep, ib_guid_t hca_guid)
5584154Sagiri {
5594154Sagiri rds_buf_t *bp;
5604154Sagiri rds_hca_t *hcap;
5614154Sagiri ibt_mr_attr_t mem_attr;
5624154Sagiri ibt_mr_desc_t mem_desc;
5634154Sagiri rds_bufpool_t *spool;
5644154Sagiri int ret;
5654154Sagiri
5664154Sagiri RDS_DPRINTF2("rds_reinit_send_pool", "Enter: EP(%p)", ep);
5674154Sagiri
5684154Sagiri spool = &ep->ep_sndpool;
5694154Sagiri ASSERT(spool->pool_memp != NULL);
5704154Sagiri
5714154Sagiri /* deregister the send pool memory from the previous HCA */
5724154Sagiri hcap = rds_get_hcap(rdsib_statep, ep->ep_hca_guid);
5734154Sagiri if (hcap == NULL) {
5744154Sagiri RDS_DPRINTF2("rds_reinit_send_pool", "HCA (0x%llx) not found",
5754154Sagiri ep->ep_hca_guid);
5764154Sagiri } else {
5774154Sagiri if (ep->ep_snd_mrhdl != NULL) {
5784154Sagiri (void) ibt_deregister_mr(hcap->hca_hdl,
5794154Sagiri ep->ep_snd_mrhdl);
5804154Sagiri ep->ep_snd_mrhdl = NULL;
5814154Sagiri ep->ep_snd_lkey = 0;
5824154Sagiri }
5834154Sagiri
5844154Sagiri if ((ep->ep_type == RDS_EP_TYPE_DATA) &&
5854154Sagiri (ep->ep_ackhdl != NULL)) {
5864154Sagiri (void) ibt_deregister_mr(hcap->hca_hdl, ep->ep_ackhdl);
5874154Sagiri ep->ep_ackhdl = NULL;
5884154Sagiri ep->ep_ack_rkey = 0;
5894154Sagiri }
5904154Sagiri
5914154Sagiri ep->ep_hca_guid = NULL;
5924154Sagiri }
5934154Sagiri
5944154Sagiri /* get the hcap for the new HCA */
5954154Sagiri hcap = rds_get_hcap(rdsib_statep, hca_guid);
5964154Sagiri if (hcap == NULL) {
5974154Sagiri RDS_DPRINTF2("rds_reinit_send_pool", "HCA (0x%llx) not found",
5984154Sagiri hca_guid);
5994154Sagiri return (-1);
6004154Sagiri }
6014154Sagiri
6024154Sagiri /* register the send memory */
6034154Sagiri mem_attr.mr_vaddr = (ib_vaddr_t)(uintptr_t)spool->pool_memp;
6044154Sagiri mem_attr.mr_len = spool->pool_memsize;
6054154Sagiri mem_attr.mr_as = NULL;
6064154Sagiri mem_attr.mr_flags = IBT_MR_SLEEP | IBT_MR_ENABLE_LOCAL_WRITE;
6074154Sagiri
6084154Sagiri ret = ibt_register_mr(hcap->hca_hdl, hcap->hca_pdhdl,
6094154Sagiri &mem_attr, &ep->ep_snd_mrhdl, &mem_desc);
6104154Sagiri if (ret != IBT_SUCCESS) {
6114154Sagiri RDS_DPRINTF2("rds_reinit_send_pool",
6124154Sagiri "EP(%p): ibt_register_mr failed: %d", ep, ret);
6134154Sagiri return (-1);
6144154Sagiri }
6154154Sagiri ep->ep_snd_lkey = mem_desc.md_lkey;
6164154Sagiri
6174154Sagiri /* register the acknowledgement space */
6184154Sagiri if (ep->ep_type == RDS_EP_TYPE_DATA) {
6194154Sagiri mem_attr.mr_vaddr = (ib_vaddr_t)ep->ep_ack_addr;
6204154Sagiri mem_attr.mr_len = sizeof (uintptr_t);
6214154Sagiri mem_attr.mr_as = NULL;
6224154Sagiri mem_attr.mr_flags = IBT_MR_SLEEP | IBT_MR_ENABLE_LOCAL_WRITE |
6234154Sagiri IBT_MR_ENABLE_REMOTE_WRITE;
6244154Sagiri
6254154Sagiri ret = ibt_register_mr(hcap->hca_hdl, hcap->hca_pdhdl,
6264154Sagiri &mem_attr, &ep->ep_ackhdl, &mem_desc);
6274154Sagiri if (ret != IBT_SUCCESS) {
6284154Sagiri RDS_DPRINTF2("rds_reinit_send_pool",
6294154Sagiri "EP(%p): ibt_register_mr for ack failed: %d",
6304154Sagiri ep, ret);
6314154Sagiri (void) ibt_deregister_mr(hcap->hca_hdl,
6324154Sagiri ep->ep_snd_mrhdl);
6334154Sagiri ep->ep_snd_mrhdl = NULL;
6344154Sagiri ep->ep_snd_lkey = 0;
6354154Sagiri return (-1);
6364154Sagiri }
6374154Sagiri ep->ep_ack_rkey = mem_desc.md_rkey;
6384154Sagiri
6394154Sagiri /* update the LKEY in the acknowledgement WR */
6404154Sagiri ep->ep_ackds.ds_key = ep->ep_snd_lkey;
6414154Sagiri }
6424154Sagiri
6434154Sagiri /* update the LKEY in each buffer */
6444154Sagiri bp = spool->pool_headp;
6454154Sagiri while (bp) {
6464154Sagiri bp->buf_ds.ds_key = ep->ep_snd_lkey;
6474154Sagiri bp = bp->buf_nextp;
6484154Sagiri }
6494154Sagiri
6504154Sagiri ep->ep_hca_guid = hca_guid;
6514154Sagiri
6524154Sagiri RDS_DPRINTF2("rds_reinit_send_pool", "Return: EP(%p)", ep);
6534154Sagiri
6544154Sagiri return (0);
6554154Sagiri }
6564154Sagiri
6573302Sagiri void
rds_free_recv_pool(rds_ep_t * ep)6583302Sagiri rds_free_recv_pool(rds_ep_t *ep)
6593302Sagiri {
6603302Sagiri rds_bufpool_t *pool;
6613302Sagiri
6623302Sagiri if (ep->ep_type == RDS_EP_TYPE_DATA) {
6633302Sagiri pool = &rds_dpool;
6643302Sagiri } else {
6653302Sagiri pool = &rds_cpool;
6663302Sagiri }
6673302Sagiri
6683302Sagiri mutex_enter(&ep->ep_rcvpool.pool_lock);
6693302Sagiri if (ep->ep_rcvpool.pool_nfree != 0) {
6703302Sagiri rds_free_buf(pool, ep->ep_rcvpool.pool_headp,
6713302Sagiri ep->ep_rcvpool.pool_nfree);
6723302Sagiri ep->ep_rcvpool.pool_nfree = 0;
6733302Sagiri ep->ep_rcvpool.pool_headp = NULL;
6743302Sagiri ep->ep_rcvpool.pool_tailp = NULL;
6753302Sagiri }
6763302Sagiri mutex_exit(&ep->ep_rcvpool.pool_lock);
6773302Sagiri }
6783302Sagiri
6793302Sagiri int
rds_init_recv_pool(rds_ep_t * ep)6803302Sagiri rds_init_recv_pool(rds_ep_t *ep)
6813302Sagiri {
6823302Sagiri rds_bufpool_t *rpool;
6833302Sagiri rds_qp_t *recvqp;
6843302Sagiri
6853302Sagiri recvqp = &ep->ep_recvqp;
6863302Sagiri rpool = &ep->ep_rcvpool;
6873302Sagiri if (ep->ep_type == RDS_EP_TYPE_DATA) {
6883302Sagiri recvqp->qp_depth = MaxDataRecvBuffers;
6893302Sagiri recvqp->qp_level = 0;
6903302Sagiri recvqp->qp_lwm = (DataRecvBufferLWM * MaxDataRecvBuffers)/100;
6913302Sagiri recvqp->qp_taskqpending = B_FALSE;
6923302Sagiri
6933302Sagiri rpool->pool_nbuffers = MaxDataRecvBuffers;
6943302Sagiri rpool->pool_nbusy = 0;
6953302Sagiri rpool->pool_nfree = 0;
6963302Sagiri } else {
6973302Sagiri recvqp->qp_depth = MaxCtrlRecvBuffers;
6983302Sagiri recvqp->qp_level = 0;
6993302Sagiri recvqp->qp_lwm = (CtrlRecvBufferLWM * MaxCtrlRecvBuffers)/100;
7003302Sagiri recvqp->qp_taskqpending = B_FALSE;
7013302Sagiri
7023302Sagiri rpool->pool_nbuffers = MaxCtrlRecvBuffers;
7033302Sagiri rpool->pool_nbusy = 0;
7043302Sagiri rpool->pool_nfree = 0;
7053302Sagiri }
7063302Sagiri
7073302Sagiri return (0);
7083302Sagiri }
7093302Sagiri
7103302Sagiri /* Free buffers to the global pool, either cpool or dpool */
7113302Sagiri void
rds_free_buf(rds_bufpool_t * pool,rds_buf_t * bp,uint_t nbuf)7123302Sagiri rds_free_buf(rds_bufpool_t *pool, rds_buf_t *bp, uint_t nbuf)
7133302Sagiri {
7143302Sagiri uint_t ix;
7153302Sagiri
7163302Sagiri RDS_DPRINTF4("rds_free_buf", "Enter");
7173302Sagiri
7183302Sagiri ASSERT(nbuf != 0);
7193302Sagiri
7203302Sagiri mutex_enter(&pool->pool_lock);
7213302Sagiri
7223302Sagiri if (pool->pool_nfree != 0) {
7233302Sagiri pool->pool_tailp->buf_nextp = bp;
7243302Sagiri } else {
7253302Sagiri pool->pool_headp = bp;
7263302Sagiri }
7273302Sagiri
7283302Sagiri if (nbuf == 1) {
7293302Sagiri ASSERT(bp->buf_state == RDS_RCVBUF_FREE);
7303302Sagiri bp->buf_ep = NULL;
7313302Sagiri bp->buf_nextp = NULL;
7323302Sagiri pool->pool_tailp = bp;
7333302Sagiri } else {
7343302Sagiri for (ix = 1; ix < nbuf; ix++) {
7353302Sagiri ASSERT(bp->buf_state == RDS_RCVBUF_FREE);
7363302Sagiri bp->buf_ep = NULL;
7373302Sagiri bp = bp->buf_nextp;
7383302Sagiri }
7393302Sagiri ASSERT(bp->buf_state == RDS_RCVBUF_FREE);
7403302Sagiri bp->buf_ep = NULL;
7413302Sagiri bp->buf_nextp = NULL;
7423302Sagiri pool->pool_tailp = bp;
7433302Sagiri }
7443302Sagiri /* tail is always the last buffer */
7453302Sagiri pool->pool_tailp->buf_nextp = NULL;
7463302Sagiri
7473302Sagiri pool->pool_nfree += nbuf;
7483302Sagiri pool->pool_nbusy -= nbuf;
7493302Sagiri
7503302Sagiri mutex_exit(&pool->pool_lock);
7513302Sagiri
7523302Sagiri RDS_DPRINTF4("rds_free_buf", "Return");
7533302Sagiri }
7543302Sagiri
7553302Sagiri /* Get buffers from the global pools, either cpool or dpool */
7563302Sagiri rds_buf_t *
rds_get_buf(rds_bufpool_t * pool,uint_t nbuf,uint_t * nret)7573302Sagiri rds_get_buf(rds_bufpool_t *pool, uint_t nbuf, uint_t *nret)
7583302Sagiri {
7593302Sagiri rds_buf_t *bp = NULL, *bp1;
7603302Sagiri uint_t ix;
7613302Sagiri
7623302Sagiri RDS_DPRINTF4("rds_get_buf", "Enter");
7633302Sagiri
7643302Sagiri mutex_enter(&pool->pool_lock);
7653302Sagiri
7663302Sagiri RDS_DPRINTF3("rds_get_buf", "Available: %d Needed: %d",
7673302Sagiri pool->pool_nfree, nbuf);
7683302Sagiri
7693302Sagiri if (nbuf < pool->pool_nfree) {
7703302Sagiri *nret = nbuf;
7713302Sagiri
7723302Sagiri bp1 = pool->pool_headp;
7733302Sagiri for (ix = 1; ix < nbuf; ix++) {
7743302Sagiri bp1 = bp1->buf_nextp;
7753302Sagiri }
7763302Sagiri
7773302Sagiri bp = pool->pool_headp;
7783302Sagiri pool->pool_headp = bp1->buf_nextp;
7793302Sagiri bp1->buf_nextp = NULL;
7803302Sagiri
7813302Sagiri pool->pool_nfree -= nbuf;
7823302Sagiri pool->pool_nbusy += nbuf;
7833302Sagiri } else if (nbuf >= pool->pool_nfree) {
7843302Sagiri *nret = pool->pool_nfree;
7853302Sagiri
7863302Sagiri bp = pool->pool_headp;
7873302Sagiri
7883302Sagiri pool->pool_headp = NULL;
7893302Sagiri pool->pool_tailp = NULL;
7903302Sagiri
7913302Sagiri pool->pool_nbusy += pool->pool_nfree;
7923302Sagiri pool->pool_nfree = 0;
7933302Sagiri }
7943302Sagiri
7953302Sagiri mutex_exit(&pool->pool_lock);
7963302Sagiri
7973302Sagiri RDS_DPRINTF4("rds_get_buf", "Return");
7983302Sagiri
7993302Sagiri return (bp);
8003302Sagiri }
8013302Sagiri
8023302Sagiri boolean_t
rds_is_recvq_empty(rds_ep_t * ep,boolean_t wait)8033302Sagiri rds_is_recvq_empty(rds_ep_t *ep, boolean_t wait)
8043302Sagiri {
8053302Sagiri rds_qp_t *recvqp;
8063302Sagiri rds_bufpool_t *rpool;
8073302Sagiri boolean_t ret = B_TRUE;
8083302Sagiri
8093302Sagiri recvqp = &ep->ep_recvqp;
8103302Sagiri mutex_enter(&recvqp->qp_lock);
8113302Sagiri RDS_DPRINTF2("rds_is_recvq_empty", "EP(%p): QP has %d WRs",
8123302Sagiri ep, recvqp->qp_level);
8133302Sagiri if (wait) {
8143302Sagiri /* wait until the RQ is empty */
8153302Sagiri while (recvqp->qp_level != 0) {
8163302Sagiri /* wait one second and try again */
8173302Sagiri mutex_exit(&recvqp->qp_lock);
8183302Sagiri delay(drv_usectohz(1000000));
8193302Sagiri mutex_enter(&recvqp->qp_lock);
8203302Sagiri }
8213302Sagiri } else if (recvqp->qp_level != 0) {
8223302Sagiri ret = B_FALSE;
8233302Sagiri }
8243302Sagiri mutex_exit(&recvqp->qp_lock);
8253302Sagiri
8263302Sagiri rpool = &ep->ep_rcvpool;
8273302Sagiri mutex_enter(&rpool->pool_lock);
82810489SGiri.Adari@Sun.COM
82910489SGiri.Adari@Sun.COM /*
83010489SGiri.Adari@Sun.COM * During failovers/reconnects, the app may still have some buffers
83110489SGiri.Adari@Sun.COM * on thier socket queues. Waiting here for those buffers may
83210489SGiri.Adari@Sun.COM * cause a hang. It seems ok for those buffers to get freed later.
83310489SGiri.Adari@Sun.COM */
83410489SGiri.Adari@Sun.COM if (rpool->pool_nbusy != 0) {
83510489SGiri.Adari@Sun.COM RDS_DPRINTF2("rds_is_recvq_empty", "EP(%p): "
83610489SGiri.Adari@Sun.COM "There are %d pending buffers on sockqs", ep,
83710489SGiri.Adari@Sun.COM rpool->pool_nbusy);
83810489SGiri.Adari@Sun.COM ret = B_FALSE;
8393302Sagiri }
8403302Sagiri mutex_exit(&rpool->pool_lock);
8413302Sagiri
8423302Sagiri return (ret);
8433302Sagiri }
8443302Sagiri
8453302Sagiri boolean_t
rds_is_sendq_empty(rds_ep_t * ep,uint_t wait)8463302Sagiri rds_is_sendq_empty(rds_ep_t *ep, uint_t wait)
8473302Sagiri {
8483302Sagiri rds_bufpool_t *spool;
8493302Sagiri rds_buf_t *bp;
8503302Sagiri boolean_t ret1 = B_TRUE;
8513302Sagiri
8523302Sagiri /* check if all the sends completed */
8533302Sagiri spool = &ep->ep_sndpool;
8543302Sagiri mutex_enter(&spool->pool_lock);
8553302Sagiri RDS_DPRINTF2("rds_is_sendq_empty", "EP(%p): "
8563302Sagiri "Send Pool contains: %d", ep, spool->pool_nbusy);
8573302Sagiri if (wait) {
8583302Sagiri while (spool->pool_nbusy != 0) {
8593302Sagiri if (rds_no_interrupts) {
8603302Sagiri /* wait one second and try again */
8613302Sagiri delay(drv_usectohz(1000000));
8623302Sagiri rds_poll_send_completions(ep->ep_sendcq, ep,
8633302Sagiri B_TRUE);
8643302Sagiri } else {
8653302Sagiri /* wait one second and try again */
8663302Sagiri mutex_exit(&spool->pool_lock);
8673302Sagiri delay(drv_usectohz(1000000));
8683302Sagiri mutex_enter(&spool->pool_lock);
8693302Sagiri }
8703302Sagiri }
8713302Sagiri
8723302Sagiri if ((wait == 2) && (ep->ep_type == RDS_EP_TYPE_DATA)) {
8733302Sagiri rds_buf_t *ackbp;
8748082SRamaswamy.Tummala@Sun.COM rds_buf_t *prev_ackbp;
8753302Sagiri
8763302Sagiri /*
8773302Sagiri * If the last one is acknowledged then everything
8783302Sagiri * is acknowledged
8793302Sagiri */
8803302Sagiri bp = spool->pool_tailp;
8813302Sagiri ackbp = *(rds_buf_t **)ep->ep_ack_addr;
8828082SRamaswamy.Tummala@Sun.COM prev_ackbp = ackbp;
8833302Sagiri RDS_DPRINTF2("rds_is_sendq_empty", "EP(%p): "
8843302Sagiri "Checking for acknowledgements", ep);
8853302Sagiri while (bp != ackbp) {
8863302Sagiri RDS_DPRINTF2("rds_is_sendq_empty",
8873302Sagiri "EP(%p) BP(0x%p/0x%p) last "
8883302Sagiri "sent/acknowledged", ep, bp, ackbp);
8893302Sagiri mutex_exit(&spool->pool_lock);
8903302Sagiri delay(drv_usectohz(1000000));
8913302Sagiri mutex_enter(&spool->pool_lock);
8923302Sagiri
8933302Sagiri bp = spool->pool_tailp;
8943302Sagiri ackbp = *(rds_buf_t **)ep->ep_ack_addr;
8958082SRamaswamy.Tummala@Sun.COM if (ackbp == prev_ackbp) {
8968082SRamaswamy.Tummala@Sun.COM RDS_DPRINTF2("rds_is_sendq_empty",
8978082SRamaswamy.Tummala@Sun.COM "There has been no progress,"
8988082SRamaswamy.Tummala@Sun.COM "give up and proceed");
8998082SRamaswamy.Tummala@Sun.COM break;
9008082SRamaswamy.Tummala@Sun.COM }
9018082SRamaswamy.Tummala@Sun.COM prev_ackbp = ackbp;
9023302Sagiri }
9033302Sagiri }
9043302Sagiri } else if (spool->pool_nbusy != 0) {
9053302Sagiri ret1 = B_FALSE;
9063302Sagiri }
9073302Sagiri mutex_exit(&spool->pool_lock);
9083302Sagiri
9093302Sagiri /* check if all the rdma acks completed */
9103302Sagiri mutex_enter(&ep->ep_lock);
9113302Sagiri RDS_DPRINTF2("rds_is_sendq_empty", "EP(%p): "
9123302Sagiri "Outstanding RDMA Acks: %d", ep, ep->ep_rdmacnt);
9133302Sagiri if (wait) {
9143302Sagiri while (ep->ep_rdmacnt != 0) {
9153302Sagiri if (rds_no_interrupts) {
9163302Sagiri /* wait one second and try again */
9173302Sagiri delay(drv_usectohz(1000000));
9183302Sagiri rds_poll_send_completions(ep->ep_sendcq, ep,
9193302Sagiri B_FALSE);
9203302Sagiri } else {
9213302Sagiri /* wait one second and try again */
9223302Sagiri mutex_exit(&ep->ep_lock);
9233302Sagiri delay(drv_usectohz(1000000));
9243302Sagiri mutex_enter(&ep->ep_lock);
9253302Sagiri }
9263302Sagiri }
9273302Sagiri } else if (ep->ep_rdmacnt != 0) {
9283302Sagiri ret1 = B_FALSE;
9293302Sagiri }
9303302Sagiri mutex_exit(&ep->ep_lock);
9313302Sagiri
9323302Sagiri return (ret1);
9333302Sagiri }
9343302Sagiri
9353302Sagiri /* Get buffers from the send pool */
9363302Sagiri rds_buf_t *
rds_get_send_buf(rds_ep_t * ep,uint_t nbuf)9373302Sagiri rds_get_send_buf(rds_ep_t *ep, uint_t nbuf)
9383302Sagiri {
9393302Sagiri rds_buf_t *bp = NULL, *bp1;
9403302Sagiri rds_bufpool_t *spool;
9413302Sagiri uint_t waittime = rds_waittime_ms * 1000;
9423302Sagiri uint_t ix;
9433302Sagiri int ret;
9443302Sagiri
9453302Sagiri RDS_DPRINTF4("rds_get_send_buf", "Enter: EP(%p) Buffers requested: %d",
9463302Sagiri ep, nbuf);
9473302Sagiri
9483302Sagiri spool = &ep->ep_sndpool;
9493302Sagiri mutex_enter(&spool->pool_lock);
9503302Sagiri
9513302Sagiri if (rds_no_interrupts) {
9523302Sagiri if ((spool->pool_sqpoll_pending == B_FALSE) &&
9533302Sagiri (spool->pool_nbusy >
9543302Sagiri (spool->pool_nbuffers * rds_poll_percent_full)/100)) {
9553302Sagiri spool->pool_sqpoll_pending = B_TRUE;
9563302Sagiri mutex_exit(&spool->pool_lock);
9573302Sagiri rds_poll_send_completions(ep->ep_sendcq, ep, B_FALSE);
9583302Sagiri mutex_enter(&spool->pool_lock);
9593302Sagiri spool->pool_sqpoll_pending = B_FALSE;
9603302Sagiri }
9613302Sagiri }
9623302Sagiri
9633302Sagiri if (spool->pool_nfree < nbuf) {
9643302Sagiri /* wait for buffers to become available */
9653302Sagiri spool->pool_cv_count += nbuf;
966*11066Srafael.vanoni@sun.com ret = cv_reltimedwait_sig(&spool->pool_cv, &spool->pool_lock,
967*11066Srafael.vanoni@sun.com drv_usectohz(waittime), TR_CLOCK_TICK);
9683302Sagiri /* ret = cv_wait_sig(&spool->pool_cv, &spool->pool_lock); */
9693302Sagiri if (ret == 0) {
9703302Sagiri /* signal pending */
9713302Sagiri spool->pool_cv_count -= nbuf;
9723302Sagiri mutex_exit(&spool->pool_lock);
9733302Sagiri return (NULL);
9743302Sagiri }
9753302Sagiri
9763302Sagiri spool->pool_cv_count -= nbuf;
9773302Sagiri }
9783302Sagiri
9793302Sagiri /* Have the number of buffers needed */
9803302Sagiri if (spool->pool_nfree > nbuf) {
9813302Sagiri bp = spool->pool_headp;
9823302Sagiri
9833302Sagiri if (ep->ep_type == RDS_EP_TYPE_DATA) {
9843302Sagiri rds_buf_t *ackbp;
9853302Sagiri ackbp = *(rds_buf_t **)ep->ep_ack_addr;
9863302Sagiri
9873302Sagiri /* check if all the needed buffers are acknowledged */
9883302Sagiri bp1 = bp;
9893302Sagiri for (ix = 0; ix < nbuf; ix++) {
9903302Sagiri if ((bp1 == ackbp) ||
9913302Sagiri (bp1->buf_state != RDS_SNDBUF_FREE)) {
9923302Sagiri /*
9933302Sagiri * The buffer is not yet signalled or
9943302Sagiri * is not yet acknowledged
9953302Sagiri */
9963302Sagiri RDS_DPRINTF5("rds_get_send_buf",
9973302Sagiri "EP(%p) Buffer (%p) not yet "
9983302Sagiri "acked/completed", ep, bp1);
9993302Sagiri mutex_exit(&spool->pool_lock);
10003302Sagiri return (NULL);
10013302Sagiri }
10023302Sagiri
10033302Sagiri bp1 = bp1->buf_nextp;
10043302Sagiri }
10053302Sagiri }
10063302Sagiri
10073302Sagiri /* mark the buffers as pending */
10083302Sagiri bp1 = bp;
10093302Sagiri for (ix = 1; ix < nbuf; ix++) {
10103302Sagiri ASSERT(bp1->buf_state == RDS_SNDBUF_FREE);
10113302Sagiri bp1->buf_state = RDS_SNDBUF_PENDING;
10123302Sagiri bp1 = bp1->buf_nextp;
10133302Sagiri }
10143302Sagiri ASSERT(bp1->buf_state == RDS_SNDBUF_FREE);
10153302Sagiri bp1->buf_state = RDS_SNDBUF_PENDING;
10163302Sagiri
10173302Sagiri spool->pool_headp = bp1->buf_nextp;
10183302Sagiri bp1->buf_nextp = NULL;
10193302Sagiri if (spool->pool_headp == NULL)
10203302Sagiri spool->pool_tailp = NULL;
10213302Sagiri spool->pool_nfree -= nbuf;
10223302Sagiri spool->pool_nbusy += nbuf;
10233302Sagiri }
10243302Sagiri mutex_exit(&spool->pool_lock);
10253302Sagiri
10263302Sagiri RDS_DPRINTF4("rds_get_send_buf", "Return: EP(%p) Buffers requested: %d",
10273302Sagiri ep, nbuf);
10283302Sagiri
10293302Sagiri return (bp);
10303302Sagiri }
10313302Sagiri
10323302Sagiri #define RDS_MIN_BUF_TO_WAKE_THREADS 10
10333302Sagiri
10343302Sagiri void
rds_free_send_buf(rds_ep_t * ep,rds_buf_t * headp,rds_buf_t * tailp,uint_t nbuf,boolean_t lock)10353302Sagiri rds_free_send_buf(rds_ep_t *ep, rds_buf_t *headp, rds_buf_t *tailp, uint_t nbuf,
10363302Sagiri boolean_t lock)
10373302Sagiri {
10383302Sagiri rds_bufpool_t *spool;
10393302Sagiri rds_buf_t *tmp;
10403302Sagiri
10413302Sagiri RDS_DPRINTF4("rds_free_send_buf", "Enter");
10423302Sagiri
10433302Sagiri ASSERT(nbuf != 0);
10443302Sagiri
10453302Sagiri if (tailp == NULL) {
10463302Sagiri if (nbuf > 1) {
10473302Sagiri tmp = headp;
10483302Sagiri while (tmp->buf_nextp) {
10493302Sagiri tmp = tmp->buf_nextp;
10503302Sagiri }
10513302Sagiri tailp = tmp;
10523302Sagiri } else {
10533302Sagiri tailp = headp;
10543302Sagiri }
10553302Sagiri }
10563302Sagiri
10573302Sagiri spool = &ep->ep_sndpool;
10583302Sagiri
10593302Sagiri if (lock == B_FALSE) {
10603302Sagiri /* lock is not held outside */
10613302Sagiri mutex_enter(&spool->pool_lock);
10623302Sagiri }
10633302Sagiri
10643302Sagiri if (spool->pool_nfree) {
10653302Sagiri spool->pool_tailp->buf_nextp = headp;
10663302Sagiri } else {
10673302Sagiri spool->pool_headp = headp;
10683302Sagiri }
10693302Sagiri spool->pool_tailp = tailp;
10703302Sagiri
10713302Sagiri spool->pool_nfree += nbuf;
10723302Sagiri spool->pool_nbusy -= nbuf;
10733302Sagiri
10743302Sagiri if ((spool->pool_cv_count > 0) &&
10753302Sagiri (spool->pool_nfree > RDS_MIN_BUF_TO_WAKE_THREADS)) {
10763302Sagiri if (spool->pool_nfree >= spool->pool_cv_count)
10773302Sagiri cv_broadcast(&spool->pool_cv);
10783302Sagiri else
10793302Sagiri cv_signal(&spool->pool_cv);
10803302Sagiri }
10813302Sagiri
10823302Sagiri if (lock == B_FALSE) {
10833302Sagiri mutex_exit(&spool->pool_lock);
10843302Sagiri }
10853302Sagiri
10863302Sagiri RDS_DPRINTF4("rds_free_send_buf", "Return");
10873302Sagiri }
10883302Sagiri
10893302Sagiri void
rds_free_recv_buf(rds_buf_t * bp,uint_t nbuf)10903302Sagiri rds_free_recv_buf(rds_buf_t *bp, uint_t nbuf)
10913302Sagiri {
10923302Sagiri rds_ep_t *ep;
10933302Sagiri rds_bufpool_t *rpool;
10943302Sagiri rds_buf_t *bp1;
10953302Sagiri uint_t ix;
10963302Sagiri
10973302Sagiri RDS_DPRINTF4("rds_free_recv_buf", "Enter");
10983302Sagiri
10993302Sagiri ASSERT(nbuf != 0);
11003302Sagiri
11013302Sagiri ep = bp->buf_ep;
11023302Sagiri rpool = &ep->ep_rcvpool;
11033302Sagiri
11043302Sagiri mutex_enter(&rpool->pool_lock);
11053302Sagiri
11063302Sagiri /* Add the buffers to the local pool */
11073302Sagiri if (rpool->pool_tailp == NULL) {
11083302Sagiri ASSERT(rpool->pool_headp == NULL);
11093302Sagiri ASSERT(rpool->pool_nfree == 0);
11103302Sagiri rpool->pool_headp = bp;
11113302Sagiri bp1 = bp;
11123302Sagiri for (ix = 1; ix < nbuf; ix++) {
11133302Sagiri if (bp1->buf_state == RDS_RCVBUF_ONSOCKQ) {
11143302Sagiri rpool->pool_nbusy--;
11153302Sagiri }
11163302Sagiri bp1->buf_state = RDS_RCVBUF_FREE;
11173302Sagiri bp1 = bp1->buf_nextp;
11183302Sagiri }
11193302Sagiri bp1->buf_nextp = NULL;
11203302Sagiri if (bp->buf_state == RDS_RCVBUF_ONSOCKQ) {
11213302Sagiri rpool->pool_nbusy--;
11223302Sagiri }
11233302Sagiri bp->buf_state = RDS_RCVBUF_FREE;
11243302Sagiri rpool->pool_tailp = bp1;
11253302Sagiri rpool->pool_nfree += nbuf;
11263302Sagiri } else {
11273302Sagiri bp1 = bp;
11283302Sagiri for (ix = 1; ix < nbuf; ix++) {
11293302Sagiri if (bp1->buf_state == RDS_RCVBUF_ONSOCKQ) {
11303302Sagiri rpool->pool_nbusy--;
11313302Sagiri }
11323302Sagiri bp1->buf_state = RDS_RCVBUF_FREE;
11333302Sagiri bp1 = bp1->buf_nextp;
11343302Sagiri }
11353302Sagiri bp1->buf_nextp = NULL;
11363302Sagiri if (bp->buf_state == RDS_RCVBUF_ONSOCKQ) {
11373302Sagiri rpool->pool_nbusy--;
11383302Sagiri }
11393302Sagiri bp->buf_state = RDS_RCVBUF_FREE;
11403302Sagiri rpool->pool_tailp->buf_nextp = bp;
11413302Sagiri rpool->pool_tailp = bp1;
11423302Sagiri rpool->pool_nfree += nbuf;
11433302Sagiri }
11443302Sagiri
114510489SGiri.Adari@Sun.COM if (rpool->pool_nfree >= rds_nbuffers_to_putback) {
11463302Sagiri bp = rpool->pool_headp;
11473302Sagiri nbuf = rpool->pool_nfree;
11483302Sagiri rpool->pool_headp = NULL;
11493302Sagiri rpool->pool_tailp = NULL;
11503302Sagiri rpool->pool_nfree = 0;
11513302Sagiri mutex_exit(&rpool->pool_lock);
11523302Sagiri
11533302Sagiri /* Free the buffers to the global pool */
11543302Sagiri if (ep->ep_type == RDS_EP_TYPE_DATA) {
11553302Sagiri rds_free_buf(&rds_dpool, bp, nbuf);
11563302Sagiri } else {
11573302Sagiri rds_free_buf(&rds_cpool, bp, nbuf);
11583302Sagiri }
11593302Sagiri
11603302Sagiri return;
11613302Sagiri }
11623302Sagiri mutex_exit(&rpool->pool_lock);
11633302Sagiri
11643302Sagiri RDS_DPRINTF4("rds_free_recv_buf", "Return");
11653302Sagiri }
1166