13302Sagiri /*
23302Sagiri * CDDL HEADER START
33302Sagiri *
43302Sagiri * The contents of this file are subject to the terms of the
53302Sagiri * Common Development and Distribution License (the "License").
63302Sagiri * You may not use this file except in compliance with the License.
73302Sagiri *
83302Sagiri * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
93302Sagiri * or http://www.opensolaris.org/os/licensing.
103302Sagiri * See the License for the specific language governing permissions
113302Sagiri * and limitations under the License.
123302Sagiri *
133302Sagiri * When distributing Covered Code, include this CDDL HEADER in each
143302Sagiri * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
153302Sagiri * If applicable, add the following below this CDDL HEADER, with the
163302Sagiri * fields enclosed by brackets "[]" replaced with your own identifying
173302Sagiri * information: Portions Copyright [yyyy] [name of copyright owner]
183302Sagiri *
193302Sagiri * CDDL HEADER END
203302Sagiri */
213302Sagiri /*
22*12089SSuhasini.Peddada@Sun.COM * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
233302Sagiri */
243302Sagiri /*
253302Sagiri * Copyright (c) 2005 SilverStorm Technologies, Inc. All rights reserved.
263302Sagiri *
273302Sagiri * This software is available to you under a choice of one of two
283302Sagiri * licenses. You may choose to be licensed under the terms of the GNU
293302Sagiri * General Public License (GPL) Version 2, available from the file
303302Sagiri * COPYING in the main directory of this source tree, or the
313302Sagiri * OpenIB.org BSD license below:
323302Sagiri *
333302Sagiri * Redistribution and use in source and binary forms, with or
343302Sagiri * without modification, are permitted provided that the following
353302Sagiri * conditions are met:
363302Sagiri *
373302Sagiri * - Redistributions of source code must retain the above
383302Sagiri * copyright notice, this list of conditions and the following
393302Sagiri * disclaimer.
403302Sagiri *
413302Sagiri * - Redistributions in binary form must reproduce the above
423302Sagiri * copyright notice, this list of conditions and the following
433302Sagiri * disclaimer in the documentation and/or other materials
443302Sagiri * provided with the distribution.
453302Sagiri *
463302Sagiri * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
473302Sagiri * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
483302Sagiri * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
493302Sagiri * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
503302Sagiri * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
513302Sagiri * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
523302Sagiri * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
533302Sagiri * SOFTWARE.
543302Sagiri *
553302Sagiri */
563302Sagiri /*
573302Sagiri * Sun elects to include this software in Sun product
583302Sagiri * under the OpenIB BSD license.
593302Sagiri *
603302Sagiri *
613302Sagiri * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
623302Sagiri * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
633302Sagiri * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
643302Sagiri * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
653302Sagiri * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
663302Sagiri * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
673302Sagiri * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
683302Sagiri * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
693302Sagiri * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
703302Sagiri * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
713302Sagiri * POSSIBILITY OF SUCH DAMAGE.
723302Sagiri */
733302Sagiri
743302Sagiri #include <sys/types.h>
753302Sagiri #include <sys/ddi.h>
763302Sagiri #include <sys/sunddi.h>
773302Sagiri #include <sys/ib/clients/rds/rdsib_cm.h>
783302Sagiri #include <sys/ib/clients/rds/rdsib_ib.h>
793302Sagiri #include <sys/ib/clients/rds/rdsib_buf.h>
803302Sagiri #include <sys/ib/clients/rds/rdsib_ep.h>
813302Sagiri #include <sys/ib/clients/rds/rds_kstat.h>
823302Sagiri
833302Sagiri static void rds_async_handler(void *clntp, ibt_hca_hdl_t hdl,
843302Sagiri ibt_async_code_t code, ibt_async_event_t *event);
853302Sagiri
863302Sagiri static struct ibt_clnt_modinfo_s rds_ib_modinfo = {
878580SBill.Taylor@Sun.COM IBTI_V_CURR,
883302Sagiri IBT_NETWORK,
893302Sagiri rds_async_handler,
903302Sagiri NULL,
913302Sagiri "RDS"
923302Sagiri };
933302Sagiri
943302Sagiri /* performance tunables */
953302Sagiri uint_t rds_no_interrupts = 0;
963302Sagiri uint_t rds_poll_percent_full = 25;
973302Sagiri uint_t rds_wc_signal = IBT_NEXT_SOLICITED;
983302Sagiri uint_t rds_waittime_ms = 100; /* ms */
993302Sagiri
1003302Sagiri extern dev_info_t *rdsib_dev_info;
1013302Sagiri extern void rds_close_sessions();
1023302Sagiri
1033302Sagiri static void
rdsib_validate_chan_sizes(ibt_hca_attr_t * hattrp)1043302Sagiri rdsib_validate_chan_sizes(ibt_hca_attr_t *hattrp)
1053302Sagiri {
1063302Sagiri /* The SQ size should not be more than that supported by the HCA */
1073302Sagiri if (((MaxDataSendBuffers + RDS_NUM_ACKS) > hattrp->hca_max_chan_sz) ||
1083302Sagiri ((MaxDataSendBuffers + RDS_NUM_ACKS) > hattrp->hca_max_cq_sz)) {
1096702Sagiri RDS_DPRINTF2("RDSIB", "MaxDataSendBuffers + %d is greater "
1103302Sagiri "than that supported by the HCA driver "
1113302Sagiri "(%d + %d > %d or %d), lowering it to a supported value.",
1123302Sagiri RDS_NUM_ACKS, MaxDataSendBuffers, RDS_NUM_ACKS,
1133302Sagiri hattrp->hca_max_chan_sz, hattrp->hca_max_cq_sz);
1143302Sagiri
1153302Sagiri MaxDataSendBuffers = (hattrp->hca_max_chan_sz >
1163302Sagiri hattrp->hca_max_cq_sz) ?
1173302Sagiri hattrp->hca_max_cq_sz - RDS_NUM_ACKS :
1183302Sagiri hattrp->hca_max_chan_sz - RDS_NUM_ACKS;
1193302Sagiri }
1203302Sagiri
1213302Sagiri /* The RQ size should not be more than that supported by the HCA */
1223302Sagiri if ((MaxDataRecvBuffers > hattrp->hca_max_chan_sz) ||
1233302Sagiri (MaxDataRecvBuffers > hattrp->hca_max_cq_sz)) {
1246702Sagiri RDS_DPRINTF2("RDSIB", "MaxDataRecvBuffers is greater than that "
1253302Sagiri "supported by the HCA driver (%d > %d or %d), lowering it "
1263302Sagiri "to a supported value.", MaxDataRecvBuffers,
1273302Sagiri hattrp->hca_max_chan_sz, hattrp->hca_max_cq_sz);
1283302Sagiri
1293302Sagiri MaxDataRecvBuffers = (hattrp->hca_max_chan_sz >
1303302Sagiri hattrp->hca_max_cq_sz) ? hattrp->hca_max_cq_sz :
1313302Sagiri hattrp->hca_max_chan_sz;
1323302Sagiri }
1333302Sagiri
1343302Sagiri /* The SQ size should not be more than that supported by the HCA */
1353302Sagiri if ((MaxCtrlSendBuffers > hattrp->hca_max_chan_sz) ||
1363302Sagiri (MaxCtrlSendBuffers > hattrp->hca_max_cq_sz)) {
1376702Sagiri RDS_DPRINTF2("RDSIB", "MaxCtrlSendBuffers is greater than that "
1383302Sagiri "supported by the HCA driver (%d > %d or %d), lowering it "
1393302Sagiri "to a supported value.", MaxCtrlSendBuffers,
1403302Sagiri hattrp->hca_max_chan_sz, hattrp->hca_max_cq_sz);
1413302Sagiri
1423302Sagiri MaxCtrlSendBuffers = (hattrp->hca_max_chan_sz >
1433302Sagiri hattrp->hca_max_cq_sz) ? hattrp->hca_max_cq_sz :
1443302Sagiri hattrp->hca_max_chan_sz;
1453302Sagiri }
1463302Sagiri
1473302Sagiri /* The RQ size should not be more than that supported by the HCA */
1483302Sagiri if ((MaxCtrlRecvBuffers > hattrp->hca_max_chan_sz) ||
1493302Sagiri (MaxCtrlRecvBuffers > hattrp->hca_max_cq_sz)) {
1506702Sagiri RDS_DPRINTF2("RDSIB", "MaxCtrlRecvBuffers is greater than that "
1513302Sagiri "supported by the HCA driver (%d > %d or %d), lowering it "
1523302Sagiri "to a supported value.", MaxCtrlRecvBuffers,
1533302Sagiri hattrp->hca_max_chan_sz, hattrp->hca_max_cq_sz);
1543302Sagiri
1553302Sagiri MaxCtrlRecvBuffers = (hattrp->hca_max_chan_sz >
1563302Sagiri hattrp->hca_max_cq_sz) ? hattrp->hca_max_cq_sz :
1573302Sagiri hattrp->hca_max_chan_sz;
1583302Sagiri }
1593302Sagiri
1603302Sagiri /* The MaxRecvMemory should be less than that supported by the HCA */
1615342Sagiri if ((NDataRX * RdsPktSize) > hattrp->hca_max_memr_len) {
1626702Sagiri RDS_DPRINTF2("RDSIB", "MaxRecvMemory is greater than that "
1633302Sagiri "supported by the HCA driver (%d > %d), lowering it to %d",
1645342Sagiri NDataRX * RdsPktSize, hattrp->hca_max_memr_len,
1653302Sagiri hattrp->hca_max_memr_len);
1663302Sagiri
1675342Sagiri NDataRX = hattrp->hca_max_memr_len/RdsPktSize;
1683302Sagiri }
1693302Sagiri }
1703302Sagiri
1718082SRamaswamy.Tummala@Sun.COM /* Return hcap, given the hca guid */
1728082SRamaswamy.Tummala@Sun.COM rds_hca_t *
rds_lkup_hca(ib_guid_t hca_guid)1738082SRamaswamy.Tummala@Sun.COM rds_lkup_hca(ib_guid_t hca_guid)
1748082SRamaswamy.Tummala@Sun.COM {
1758082SRamaswamy.Tummala@Sun.COM rds_hca_t *hcap;
1768082SRamaswamy.Tummala@Sun.COM
1778082SRamaswamy.Tummala@Sun.COM RDS_DPRINTF4("rds_lkup_hca", "Enter: statep: 0x%p "
1788082SRamaswamy.Tummala@Sun.COM "guid: %llx", rdsib_statep, hca_guid);
1798082SRamaswamy.Tummala@Sun.COM
1808082SRamaswamy.Tummala@Sun.COM rw_enter(&rdsib_statep->rds_hca_lock, RW_READER);
1818082SRamaswamy.Tummala@Sun.COM
1828082SRamaswamy.Tummala@Sun.COM hcap = rdsib_statep->rds_hcalistp;
1838082SRamaswamy.Tummala@Sun.COM while ((hcap != NULL) && (hcap->hca_guid != hca_guid)) {
1848082SRamaswamy.Tummala@Sun.COM hcap = hcap->hca_nextp;
1858082SRamaswamy.Tummala@Sun.COM }
1868082SRamaswamy.Tummala@Sun.COM
1878082SRamaswamy.Tummala@Sun.COM rw_exit(&rdsib_statep->rds_hca_lock);
1888082SRamaswamy.Tummala@Sun.COM
1898082SRamaswamy.Tummala@Sun.COM RDS_DPRINTF4("rds_lkup_hca", "return");
1908082SRamaswamy.Tummala@Sun.COM
1918082SRamaswamy.Tummala@Sun.COM return (hcap);
1928082SRamaswamy.Tummala@Sun.COM }
1938082SRamaswamy.Tummala@Sun.COM
19410489SGiri.Adari@Sun.COM void rds_randomize_qps(rds_hca_t *hcap);
1958082SRamaswamy.Tummala@Sun.COM
1968082SRamaswamy.Tummala@Sun.COM static rds_hca_t *
rdsib_init_hca(ib_guid_t hca_guid)1978082SRamaswamy.Tummala@Sun.COM rdsib_init_hca(ib_guid_t hca_guid)
1988082SRamaswamy.Tummala@Sun.COM {
1998082SRamaswamy.Tummala@Sun.COM rds_hca_t *hcap;
2008082SRamaswamy.Tummala@Sun.COM boolean_t alloc = B_FALSE;
2018082SRamaswamy.Tummala@Sun.COM int ret;
2028082SRamaswamy.Tummala@Sun.COM
2038082SRamaswamy.Tummala@Sun.COM RDS_DPRINTF2("rdsib_init_hca", "enter: HCA 0x%llx", hca_guid);
2048082SRamaswamy.Tummala@Sun.COM
2058082SRamaswamy.Tummala@Sun.COM /* Do a HCA lookup */
2068082SRamaswamy.Tummala@Sun.COM hcap = rds_lkup_hca(hca_guid);
2078082SRamaswamy.Tummala@Sun.COM
2088082SRamaswamy.Tummala@Sun.COM if (hcap != NULL && hcap->hca_hdl != NULL) {
2098082SRamaswamy.Tummala@Sun.COM /*
2108082SRamaswamy.Tummala@Sun.COM * This can happen if we get IBT_HCA_ATTACH_EVENT on an HCA
2118082SRamaswamy.Tummala@Sun.COM * that we have already opened. Just return NULL so that
2128082SRamaswamy.Tummala@Sun.COM * we'll not end up reinitializing the HCA again.
2138082SRamaswamy.Tummala@Sun.COM */
2148082SRamaswamy.Tummala@Sun.COM RDS_DPRINTF2("rdsib_init_hca", "HCA already initialized");
2158082SRamaswamy.Tummala@Sun.COM return (NULL);
2168082SRamaswamy.Tummala@Sun.COM }
2178082SRamaswamy.Tummala@Sun.COM
2188082SRamaswamy.Tummala@Sun.COM if (hcap == NULL) {
2198082SRamaswamy.Tummala@Sun.COM RDS_DPRINTF2("rdsib_init_hca", "New HCA is added");
2208082SRamaswamy.Tummala@Sun.COM hcap = (rds_hca_t *)kmem_zalloc(sizeof (rds_hca_t), KM_SLEEP);
2218082SRamaswamy.Tummala@Sun.COM alloc = B_TRUE;
2228082SRamaswamy.Tummala@Sun.COM }
2238082SRamaswamy.Tummala@Sun.COM
2248082SRamaswamy.Tummala@Sun.COM hcap->hca_guid = hca_guid;
2258082SRamaswamy.Tummala@Sun.COM ret = ibt_open_hca(rdsib_statep->rds_ibhdl, hca_guid,
2268082SRamaswamy.Tummala@Sun.COM &hcap->hca_hdl);
2278082SRamaswamy.Tummala@Sun.COM if (ret != IBT_SUCCESS) {
2288082SRamaswamy.Tummala@Sun.COM if (ret == IBT_HCA_IN_USE) {
2298082SRamaswamy.Tummala@Sun.COM RDS_DPRINTF2("rdsib_init_hca",
2308082SRamaswamy.Tummala@Sun.COM "ibt_open_hca: 0x%llx returned IBT_HCA_IN_USE",
2318082SRamaswamy.Tummala@Sun.COM hca_guid);
2328082SRamaswamy.Tummala@Sun.COM } else {
2338082SRamaswamy.Tummala@Sun.COM RDS_DPRINTF2("rdsib_init_hca",
2348082SRamaswamy.Tummala@Sun.COM "ibt_open_hca: 0x%llx failed: %d", hca_guid, ret);
2358082SRamaswamy.Tummala@Sun.COM }
2368082SRamaswamy.Tummala@Sun.COM if (alloc == B_TRUE) {
2378082SRamaswamy.Tummala@Sun.COM kmem_free(hcap, sizeof (rds_hca_t));
2388082SRamaswamy.Tummala@Sun.COM }
2398082SRamaswamy.Tummala@Sun.COM return (NULL);
2408082SRamaswamy.Tummala@Sun.COM }
2418082SRamaswamy.Tummala@Sun.COM
2428082SRamaswamy.Tummala@Sun.COM ret = ibt_query_hca(hcap->hca_hdl, &hcap->hca_attr);
2438082SRamaswamy.Tummala@Sun.COM if (ret != IBT_SUCCESS) {
2448082SRamaswamy.Tummala@Sun.COM RDS_DPRINTF2("rdsib_init_hca",
2458082SRamaswamy.Tummala@Sun.COM "Query HCA: 0x%llx failed: %d", hca_guid, ret);
2468082SRamaswamy.Tummala@Sun.COM ret = ibt_close_hca(hcap->hca_hdl);
2478082SRamaswamy.Tummala@Sun.COM ASSERT(ret == IBT_SUCCESS);
2488082SRamaswamy.Tummala@Sun.COM if (alloc == B_TRUE) {
2498082SRamaswamy.Tummala@Sun.COM kmem_free(hcap, sizeof (rds_hca_t));
2508082SRamaswamy.Tummala@Sun.COM } else {
2518082SRamaswamy.Tummala@Sun.COM hcap->hca_hdl = NULL;
2528082SRamaswamy.Tummala@Sun.COM }
2538082SRamaswamy.Tummala@Sun.COM return (NULL);
2548082SRamaswamy.Tummala@Sun.COM }
2558082SRamaswamy.Tummala@Sun.COM
2568082SRamaswamy.Tummala@Sun.COM ret = ibt_query_hca_ports(hcap->hca_hdl, 0,
2578082SRamaswamy.Tummala@Sun.COM &hcap->hca_pinfop, &hcap->hca_nports, &hcap->hca_pinfo_sz);
2588082SRamaswamy.Tummala@Sun.COM if (ret != IBT_SUCCESS) {
2598082SRamaswamy.Tummala@Sun.COM RDS_DPRINTF2("rdsib_init_hca",
2608082SRamaswamy.Tummala@Sun.COM "Query HCA 0x%llx ports failed: %d", hca_guid,
2618082SRamaswamy.Tummala@Sun.COM ret);
2628082SRamaswamy.Tummala@Sun.COM ret = ibt_close_hca(hcap->hca_hdl);
2638082SRamaswamy.Tummala@Sun.COM hcap->hca_hdl = NULL;
2648082SRamaswamy.Tummala@Sun.COM ASSERT(ret == IBT_SUCCESS);
2658082SRamaswamy.Tummala@Sun.COM if (alloc == B_TRUE) {
2668082SRamaswamy.Tummala@Sun.COM kmem_free(hcap, sizeof (rds_hca_t));
2678082SRamaswamy.Tummala@Sun.COM } else {
2688082SRamaswamy.Tummala@Sun.COM hcap->hca_hdl = NULL;
2698082SRamaswamy.Tummala@Sun.COM }
2708082SRamaswamy.Tummala@Sun.COM return (NULL);
2718082SRamaswamy.Tummala@Sun.COM }
2728082SRamaswamy.Tummala@Sun.COM
2738082SRamaswamy.Tummala@Sun.COM /* Only one PD per HCA is allocated, so do it here */
2748082SRamaswamy.Tummala@Sun.COM ret = ibt_alloc_pd(hcap->hca_hdl, IBT_PD_NO_FLAGS,
2758082SRamaswamy.Tummala@Sun.COM &hcap->hca_pdhdl);
2768082SRamaswamy.Tummala@Sun.COM if (ret != IBT_SUCCESS) {
2778082SRamaswamy.Tummala@Sun.COM RDS_DPRINTF2("rdsib_init_hca",
2788082SRamaswamy.Tummala@Sun.COM "ibt_alloc_pd 0x%llx failed: %d", hca_guid, ret);
2798082SRamaswamy.Tummala@Sun.COM (void) ibt_free_portinfo(hcap->hca_pinfop,
2808082SRamaswamy.Tummala@Sun.COM hcap->hca_pinfo_sz);
2818082SRamaswamy.Tummala@Sun.COM ret = ibt_close_hca(hcap->hca_hdl);
2828082SRamaswamy.Tummala@Sun.COM ASSERT(ret == IBT_SUCCESS);
2838082SRamaswamy.Tummala@Sun.COM hcap->hca_hdl = NULL;
2848082SRamaswamy.Tummala@Sun.COM if (alloc == B_TRUE) {
2858082SRamaswamy.Tummala@Sun.COM kmem_free(hcap, sizeof (rds_hca_t));
2868082SRamaswamy.Tummala@Sun.COM } else {
2878082SRamaswamy.Tummala@Sun.COM hcap->hca_hdl = NULL;
2888082SRamaswamy.Tummala@Sun.COM }
2898082SRamaswamy.Tummala@Sun.COM return (NULL);
2908082SRamaswamy.Tummala@Sun.COM }
2918082SRamaswamy.Tummala@Sun.COM
2928082SRamaswamy.Tummala@Sun.COM rdsib_validate_chan_sizes(&hcap->hca_attr);
2938082SRamaswamy.Tummala@Sun.COM
29410489SGiri.Adari@Sun.COM /* To minimize stale connections after ungraceful reboots */
29510489SGiri.Adari@Sun.COM rds_randomize_qps(hcap);
29610489SGiri.Adari@Sun.COM
2978082SRamaswamy.Tummala@Sun.COM rw_enter(&rdsib_statep->rds_hca_lock, RW_WRITER);
2988082SRamaswamy.Tummala@Sun.COM hcap->hca_state = RDS_HCA_STATE_OPEN;
2998082SRamaswamy.Tummala@Sun.COM if (alloc == B_TRUE) {
3008082SRamaswamy.Tummala@Sun.COM /* this is a new HCA, add it to the list */
3018082SRamaswamy.Tummala@Sun.COM rdsib_statep->rds_nhcas++;
3028082SRamaswamy.Tummala@Sun.COM hcap->hca_nextp = rdsib_statep->rds_hcalistp;
3038082SRamaswamy.Tummala@Sun.COM rdsib_statep->rds_hcalistp = hcap;
3048082SRamaswamy.Tummala@Sun.COM }
3058082SRamaswamy.Tummala@Sun.COM rw_exit(&rdsib_statep->rds_hca_lock);
3068082SRamaswamy.Tummala@Sun.COM
3078082SRamaswamy.Tummala@Sun.COM RDS_DPRINTF2("rdsib_init_hca", "return: HCA 0x%llx", hca_guid);
3088082SRamaswamy.Tummala@Sun.COM
3098082SRamaswamy.Tummala@Sun.COM return (hcap);
3108082SRamaswamy.Tummala@Sun.COM }
3118082SRamaswamy.Tummala@Sun.COM
3123302Sagiri /*
3136438Sagiri * Called from attach
3143302Sagiri */
3153302Sagiri int
rdsib_initialize_ib()3166438Sagiri rdsib_initialize_ib()
3173302Sagiri {
3183302Sagiri ib_guid_t *guidp;
3198082SRamaswamy.Tummala@Sun.COM rds_hca_t *hcap;
3203302Sagiri uint_t ix, hcaix, nhcas;
3213302Sagiri int ret;
3223302Sagiri
3236438Sagiri RDS_DPRINTF2("rdsib_initialize_ib", "enter: statep %p", rdsib_statep);
3243302Sagiri
3253302Sagiri ASSERT(rdsib_statep != NULL);
3263302Sagiri if (rdsib_statep == NULL) {
3276438Sagiri RDS_DPRINTF1("rdsib_initialize_ib",
3286438Sagiri "RDS Statep not initialized");
3293302Sagiri return (-1);
3303302Sagiri }
3313302Sagiri
3323302Sagiri /* How many hcas are there? */
3333302Sagiri nhcas = ibt_get_hca_list(&guidp);
3343302Sagiri if (nhcas == 0) {
3356438Sagiri RDS_DPRINTF2("rdsib_initialize_ib", "No IB HCAs Available");
3363302Sagiri return (-1);
3373302Sagiri }
3383302Sagiri
3396438Sagiri RDS_DPRINTF3("rdsib_initialize_ib", "Number of HCAs: %d", nhcas);
3403302Sagiri
3413302Sagiri /* Register with IBTF */
3423302Sagiri ret = ibt_attach(&rds_ib_modinfo, rdsib_dev_info, rdsib_statep,
3433302Sagiri &rdsib_statep->rds_ibhdl);
3443302Sagiri if (ret != IBT_SUCCESS) {
3456438Sagiri RDS_DPRINTF2("rdsib_initialize_ib", "ibt_attach failed: %d",
3466438Sagiri ret);
3473302Sagiri (void) ibt_free_hca_list(guidp, nhcas);
3483302Sagiri return (-1);
3493302Sagiri }
3503302Sagiri
3513302Sagiri /*
3523302Sagiri * Open each HCA and gather its information. Don't care about HCAs
3533302Sagiri * that cannot be opened. It is OK as long as atleast one HCA can be
3543302Sagiri * opened.
3553302Sagiri * Initialize a HCA only if all the information is available.
3563302Sagiri */
3573302Sagiri for (ix = 0, hcaix = 0; ix < nhcas; ix++) {
3583302Sagiri RDS_DPRINTF3(LABEL, "Open HCA: 0x%llx", guidp[ix]);
3593302Sagiri
3608082SRamaswamy.Tummala@Sun.COM hcap = rdsib_init_hca(guidp[ix]);
3618082SRamaswamy.Tummala@Sun.COM if (hcap != NULL) hcaix++;
3623302Sagiri }
3633302Sagiri
3643302Sagiri /* free the HCA list, we are done with it */
3653302Sagiri (void) ibt_free_hca_list(guidp, nhcas);
3663302Sagiri
3673302Sagiri if (hcaix == 0) {
3683302Sagiri /* Failed to Initialize even one HCA */
3696438Sagiri RDS_DPRINTF2("rdsib_initialize_ib", "No HCAs are initialized");
3703302Sagiri (void) ibt_detach(rdsib_statep->rds_ibhdl);
3713302Sagiri rdsib_statep->rds_ibhdl = NULL;
3723302Sagiri return (-1);
3733302Sagiri }
3743302Sagiri
3753302Sagiri if (hcaix < nhcas) {
3763302Sagiri RDS_DPRINTF2("rdsib_open_ib", "HCAs %d/%d failed to initialize",
3773302Sagiri (nhcas - hcaix), nhcas);
3783302Sagiri }
3793302Sagiri
3806438Sagiri RDS_DPRINTF2("rdsib_initialize_ib", "return: statep %p", rdsib_statep);
3813302Sagiri
3823302Sagiri return (0);
3833302Sagiri }
3843302Sagiri
3853302Sagiri /*
3866438Sagiri * Called from detach
3873302Sagiri */
3883302Sagiri void
rdsib_deinitialize_ib()3896438Sagiri rdsib_deinitialize_ib()
3903302Sagiri {
3914154Sagiri rds_hca_t *hcap, *nextp;
3923302Sagiri int ret;
3933302Sagiri
3946438Sagiri RDS_DPRINTF2("rdsib_deinitialize_ib", "enter: statep %p", rdsib_statep);
3953302Sagiri
3963302Sagiri /* close and destroy all the sessions */
3973302Sagiri rds_close_sessions(NULL);
3983302Sagiri
3994154Sagiri /* Release all HCA resources */
4004154Sagiri rw_enter(&rdsib_statep->rds_hca_lock, RW_WRITER);
4018082SRamaswamy.Tummala@Sun.COM RDS_DPRINTF2("rdsib_deinitialize_ib", "HCA List: %p, NHCA: %d",
4028082SRamaswamy.Tummala@Sun.COM rdsib_statep->rds_hcalistp, rdsib_statep->rds_nhcas);
4033302Sagiri hcap = rdsib_statep->rds_hcalistp;
4044154Sagiri rdsib_statep->rds_hcalistp = NULL;
4054154Sagiri rdsib_statep->rds_nhcas = 0;
4064154Sagiri rw_exit(&rdsib_statep->rds_hca_lock);
4074154Sagiri
4083302Sagiri while (hcap != NULL) {
4094154Sagiri nextp = hcap->hca_nextp;
4103302Sagiri
4118082SRamaswamy.Tummala@Sun.COM if (hcap->hca_hdl != NULL) {
4128082SRamaswamy.Tummala@Sun.COM ret = ibt_free_pd(hcap->hca_hdl, hcap->hca_pdhdl);
4138082SRamaswamy.Tummala@Sun.COM ASSERT(ret == IBT_SUCCESS);
4143302Sagiri
4158082SRamaswamy.Tummala@Sun.COM (void) ibt_free_portinfo(hcap->hca_pinfop,
4168082SRamaswamy.Tummala@Sun.COM hcap->hca_pinfo_sz);
4173302Sagiri
4188082SRamaswamy.Tummala@Sun.COM ret = ibt_close_hca(hcap->hca_hdl);
4198082SRamaswamy.Tummala@Sun.COM ASSERT(ret == IBT_SUCCESS);
4208082SRamaswamy.Tummala@Sun.COM }
4213302Sagiri
4223302Sagiri kmem_free(hcap, sizeof (rds_hca_t));
4234154Sagiri hcap = nextp;
4243302Sagiri }
4253302Sagiri
4263302Sagiri /* Deregister with IBTF */
4273302Sagiri if (rdsib_statep->rds_ibhdl != NULL) {
4283302Sagiri (void) ibt_detach(rdsib_statep->rds_ibhdl);
4293302Sagiri rdsib_statep->rds_ibhdl = NULL;
4303302Sagiri }
4313302Sagiri
4326438Sagiri RDS_DPRINTF2("rdsib_deinitialize_ib", "return: statep %p",
4336438Sagiri rdsib_statep);
4346438Sagiri }
4356438Sagiri
4366438Sagiri /*
4376438Sagiri * Called on open of first RDS socket
4386438Sagiri */
4396438Sagiri int
rdsib_open_ib()4406438Sagiri rdsib_open_ib()
4416438Sagiri {
4426438Sagiri int ret;
4436438Sagiri
4446438Sagiri RDS_DPRINTF2("rdsib_open_ib", "enter: statep %p", rdsib_statep);
4456438Sagiri
4466438Sagiri /* Enable incoming connection requests */
4476438Sagiri if (rdsib_statep->rds_srvhdl == NULL) {
4486438Sagiri rdsib_statep->rds_srvhdl =
4496438Sagiri rds_register_service(rdsib_statep->rds_ibhdl);
4506438Sagiri if (rdsib_statep->rds_srvhdl == NULL) {
4516438Sagiri RDS_DPRINTF2("rdsib_open_ib",
4526438Sagiri "Service registration failed");
4536438Sagiri return (-1);
4546438Sagiri } else {
4556438Sagiri /* bind the service on all available ports */
4566438Sagiri ret = rds_bind_service(rdsib_statep);
4576438Sagiri if (ret != 0) {
4586438Sagiri RDS_DPRINTF2("rdsib_open_ib",
4596438Sagiri "Bind service failed: %d", ret);
4606438Sagiri }
4616438Sagiri }
4626438Sagiri }
4636438Sagiri
4646438Sagiri RDS_DPRINTF2("rdsib_open_ib", "return: statep %p", rdsib_statep);
4656438Sagiri
4666438Sagiri return (0);
4676438Sagiri }
4686438Sagiri
4696438Sagiri /*
4706438Sagiri * Called when all ports are closed.
4716438Sagiri */
4726438Sagiri void
rdsib_close_ib()4736438Sagiri rdsib_close_ib()
4746438Sagiri {
4756438Sagiri int ret;
4766438Sagiri
4776438Sagiri RDS_DPRINTF2("rdsib_close_ib", "enter: statep %p", rdsib_statep);
4786438Sagiri
4796438Sagiri /* Disable incoming connection requests */
4806438Sagiri if (rdsib_statep->rds_srvhdl != NULL) {
4816438Sagiri ret = ibt_unbind_all_services(rdsib_statep->rds_srvhdl);
4826438Sagiri if (ret != 0) {
4836438Sagiri RDS_DPRINTF2("rdsib_close_ib",
4846438Sagiri "ibt_unbind_all_services failed: %d\n", ret);
4856438Sagiri }
4866438Sagiri ret = ibt_deregister_service(rdsib_statep->rds_ibhdl,
4876438Sagiri rdsib_statep->rds_srvhdl);
4886438Sagiri if (ret != 0) {
4896438Sagiri RDS_DPRINTF2("rdsib_close_ib",
4906438Sagiri "ibt_deregister_service failed: %d\n", ret);
4916438Sagiri } else {
4926438Sagiri rdsib_statep->rds_srvhdl = NULL;
4936438Sagiri }
4946438Sagiri }
4956438Sagiri
4966438Sagiri RDS_DPRINTF2("rdsib_close_ib", "return: statep %p", rdsib_statep);
4973302Sagiri }
4983302Sagiri
4993302Sagiri /* Return hcap, given the hca guid */
5003302Sagiri rds_hca_t *
rds_get_hcap(rds_state_t * statep,ib_guid_t hca_guid)5013302Sagiri rds_get_hcap(rds_state_t *statep, ib_guid_t hca_guid)
5023302Sagiri {
5033302Sagiri rds_hca_t *hcap;
5043302Sagiri
5053302Sagiri RDS_DPRINTF4("rds_get_hcap", "rds_get_hcap: Enter: statep: 0x%p "
5063302Sagiri "guid: %llx", statep, hca_guid);
5073302Sagiri
5083302Sagiri rw_enter(&statep->rds_hca_lock, RW_READER);
5093302Sagiri
5103302Sagiri hcap = statep->rds_hcalistp;
5113302Sagiri while ((hcap != NULL) && (hcap->hca_guid != hca_guid)) {
5123302Sagiri hcap = hcap->hca_nextp;
5133302Sagiri }
5143302Sagiri
5158082SRamaswamy.Tummala@Sun.COM /*
5168082SRamaswamy.Tummala@Sun.COM * don't let anyone use this HCA until the RECV memory
5178082SRamaswamy.Tummala@Sun.COM * is registered with this HCA
5188082SRamaswamy.Tummala@Sun.COM */
5198082SRamaswamy.Tummala@Sun.COM if ((hcap != NULL) &&
5208082SRamaswamy.Tummala@Sun.COM (hcap->hca_state == RDS_HCA_STATE_MEM_REGISTERED)) {
5218082SRamaswamy.Tummala@Sun.COM ASSERT(hcap->hca_mrhdl != NULL);
5228082SRamaswamy.Tummala@Sun.COM rw_exit(&statep->rds_hca_lock);
5238082SRamaswamy.Tummala@Sun.COM return (hcap);
5248082SRamaswamy.Tummala@Sun.COM }
5258082SRamaswamy.Tummala@Sun.COM
5268082SRamaswamy.Tummala@Sun.COM RDS_DPRINTF2("rds_get_hcap",
5278082SRamaswamy.Tummala@Sun.COM "HCA (0x%p, 0x%llx) is not initialized", hcap, hca_guid);
5283302Sagiri rw_exit(&statep->rds_hca_lock);
5293302Sagiri
5303302Sagiri RDS_DPRINTF4("rds_get_hcap", "rds_get_hcap: return");
5313302Sagiri
5328082SRamaswamy.Tummala@Sun.COM return (NULL);
5333302Sagiri }
5343302Sagiri
5353302Sagiri /* Return hcap, given a gid */
5363302Sagiri rds_hca_t *
rds_gid_to_hcap(rds_state_t * statep,ib_gid_t gid)5373302Sagiri rds_gid_to_hcap(rds_state_t *statep, ib_gid_t gid)
5383302Sagiri {
5394154Sagiri rds_hca_t *hcap;
5404154Sagiri uint_t ix;
5413302Sagiri
5423302Sagiri RDS_DPRINTF4("rds_gid_to_hcap", "Enter: statep: 0x%p gid: %llx:%llx",
5433302Sagiri statep, gid.gid_prefix, gid.gid_guid);
5443302Sagiri
5454154Sagiri rw_enter(&statep->rds_hca_lock, RW_READER);
5464154Sagiri
5474154Sagiri hcap = statep->rds_hcalistp;
5484154Sagiri while (hcap != NULL) {
5498082SRamaswamy.Tummala@Sun.COM
5508082SRamaswamy.Tummala@Sun.COM /*
5518082SRamaswamy.Tummala@Sun.COM * don't let anyone use this HCA until the RECV memory
5528082SRamaswamy.Tummala@Sun.COM * is registered with this HCA
5538082SRamaswamy.Tummala@Sun.COM */
5548082SRamaswamy.Tummala@Sun.COM if (hcap->hca_state != RDS_HCA_STATE_MEM_REGISTERED) {
5558082SRamaswamy.Tummala@Sun.COM RDS_DPRINTF3("rds_gid_to_hcap",
5568082SRamaswamy.Tummala@Sun.COM "HCA (0x%p, 0x%llx) is not initialized",
5578082SRamaswamy.Tummala@Sun.COM hcap, gid.gid_guid);
5588082SRamaswamy.Tummala@Sun.COM hcap = hcap->hca_nextp;
5598082SRamaswamy.Tummala@Sun.COM continue;
5608082SRamaswamy.Tummala@Sun.COM }
5618082SRamaswamy.Tummala@Sun.COM
5624154Sagiri for (ix = 0; ix < hcap->hca_nports; ix++) {
5634154Sagiri if ((hcap->hca_pinfop[ix].p_sgid_tbl[0].gid_prefix ==
5644154Sagiri gid.gid_prefix) &&
5654154Sagiri (hcap->hca_pinfop[ix].p_sgid_tbl[0].gid_guid ==
5664154Sagiri gid.gid_guid)) {
5674154Sagiri RDS_DPRINTF4("rds_gid_to_hcap",
5684154Sagiri "gid found in hcap: 0x%p", hcap);
5694154Sagiri rw_exit(&statep->rds_hca_lock);
5704154Sagiri return (hcap);
5714154Sagiri }
5724154Sagiri }
5734154Sagiri hcap = hcap->hca_nextp;
5743302Sagiri }
5753302Sagiri
5764154Sagiri rw_exit(&statep->rds_hca_lock);
5774154Sagiri
5784154Sagiri return (NULL);
5793302Sagiri }
5803302Sagiri
5813302Sagiri /* This is called from the send CQ handler */
5823302Sagiri void
rds_send_acknowledgement(rds_ep_t * ep)5833302Sagiri rds_send_acknowledgement(rds_ep_t *ep)
5843302Sagiri {
5853302Sagiri int ret;
5863302Sagiri uint_t ix;
5873302Sagiri
5883302Sagiri RDS_DPRINTF4("rds_send_acknowledgement", "Enter EP(%p)", ep);
5893302Sagiri
5903302Sagiri mutex_enter(&ep->ep_lock);
5913302Sagiri
5923302Sagiri ASSERT(ep->ep_rdmacnt != 0);
5933302Sagiri
5943302Sagiri /*
5953302Sagiri * The previous ACK completed successfully, send the next one
5963302Sagiri * if more messages were received after sending the last ACK
5973302Sagiri */
5983302Sagiri if (ep->ep_rbufid != *(uintptr_t *)(uintptr_t)ep->ep_ackds.ds_va) {
5993302Sagiri *(uintptr_t *)(uintptr_t)ep->ep_ackds.ds_va = ep->ep_rbufid;
6003302Sagiri mutex_exit(&ep->ep_lock);
6013302Sagiri
6023302Sagiri /* send acknowledgement */
6033302Sagiri RDS_INCR_TXACKS();
6043302Sagiri ret = ibt_post_send(ep->ep_chanhdl, &ep->ep_ackwr, 1, &ix);
6053302Sagiri if (ret != IBT_SUCCESS) {
6066702Sagiri RDS_DPRINTF2("rds_send_acknowledgement",
6073302Sagiri "EP(%p): ibt_post_send for acknowledgement "
6083302Sagiri "failed: %d, SQ depth: %d",
6093302Sagiri ep, ret, ep->ep_sndpool.pool_nbusy);
6103302Sagiri mutex_enter(&ep->ep_lock);
6113302Sagiri ep->ep_rdmacnt--;
6123302Sagiri mutex_exit(&ep->ep_lock);
6133302Sagiri }
6143302Sagiri } else {
6153302Sagiri /* ACKed all messages, no more to ACK */
6163302Sagiri ep->ep_rdmacnt--;
6173302Sagiri mutex_exit(&ep->ep_lock);
6183302Sagiri return;
6193302Sagiri }
6203302Sagiri
6213302Sagiri RDS_DPRINTF4("rds_send_acknowledgement", "Return EP(%p)", ep);
6223302Sagiri }
6233302Sagiri
6243302Sagiri static int
rds_poll_ctrl_completions(ibt_cq_hdl_t cq,rds_ep_t * ep)6253302Sagiri rds_poll_ctrl_completions(ibt_cq_hdl_t cq, rds_ep_t *ep)
6263302Sagiri {
6273302Sagiri ibt_wc_t wc;
6283302Sagiri uint_t npolled;
6293302Sagiri rds_buf_t *bp;
6303302Sagiri rds_ctrl_pkt_t *cpkt;
6313302Sagiri rds_qp_t *recvqp;
6323302Sagiri int ret = IBT_SUCCESS;
6333302Sagiri
6343302Sagiri RDS_DPRINTF4("rds_poll_ctrl_completions", "Enter: EP(%p)", ep);
6353302Sagiri
6363302Sagiri bzero(&wc, sizeof (ibt_wc_t));
6373302Sagiri ret = ibt_poll_cq(cq, &wc, 1, &npolled);
6383302Sagiri if (ret != IBT_SUCCESS) {
6393302Sagiri if (ret != IBT_CQ_EMPTY) {
6403302Sagiri RDS_DPRINTF2(LABEL, "EP(%p) CQ(%p): ibt_poll_cq "
6413302Sagiri "returned: %d", ep, cq, ret);
6423302Sagiri } else {
6433302Sagiri RDS_DPRINTF5(LABEL, "EP(%p) CQ(%p): ibt_poll_cq "
6443302Sagiri "returned: IBT_CQ_EMPTY", ep, cq);
6453302Sagiri }
6463302Sagiri return (ret);
6473302Sagiri }
6483302Sagiri
6493302Sagiri bp = (rds_buf_t *)(uintptr_t)wc.wc_id;
6503302Sagiri
6513302Sagiri if (wc.wc_status != IBT_WC_SUCCESS) {
6523302Sagiri mutex_enter(&ep->ep_recvqp.qp_lock);
6533302Sagiri ep->ep_recvqp.qp_level--;
6543302Sagiri mutex_exit(&ep->ep_recvqp.qp_lock);
6553302Sagiri
6563302Sagiri /* Free the buffer */
6573302Sagiri bp->buf_state = RDS_RCVBUF_FREE;
6583302Sagiri rds_free_recv_buf(bp, 1);
6593302Sagiri
6603302Sagiri /* Receive completion failure */
6613302Sagiri if (wc.wc_status != IBT_WC_WR_FLUSHED_ERR) {
6623302Sagiri RDS_DPRINTF2("rds_poll_ctrl_completions",
6633302Sagiri "EP(%p) CQ(%p) BP(%p): WC Error Status: %d",
6643302Sagiri ep, cq, wc.wc_id, wc.wc_status);
6653302Sagiri }
6663302Sagiri return (ret);
6673302Sagiri }
6683302Sagiri
6693302Sagiri /* there is one less in the RQ */
6703302Sagiri recvqp = &ep->ep_recvqp;
6713302Sagiri mutex_enter(&recvqp->qp_lock);
6723302Sagiri recvqp->qp_level--;
6733302Sagiri if ((recvqp->qp_taskqpending == B_FALSE) &&
6743302Sagiri (recvqp->qp_level <= recvqp->qp_lwm)) {
6753302Sagiri /* Time to post more buffers into the RQ */
6763302Sagiri recvqp->qp_taskqpending = B_TRUE;
6773302Sagiri mutex_exit(&recvqp->qp_lock);
6783302Sagiri
6793302Sagiri ret = ddi_taskq_dispatch(rds_taskq,
6803302Sagiri rds_post_recv_buf, (void *)ep->ep_chanhdl, DDI_NOSLEEP);
6813302Sagiri if (ret != DDI_SUCCESS) {
6826702Sagiri RDS_DPRINTF2(LABEL, "ddi_taskq_dispatch failed: %d",
6833302Sagiri ret);
6843302Sagiri mutex_enter(&recvqp->qp_lock);
6853302Sagiri recvqp->qp_taskqpending = B_FALSE;
6863302Sagiri mutex_exit(&recvqp->qp_lock);
6873302Sagiri }
6883302Sagiri } else {
6893302Sagiri mutex_exit(&recvqp->qp_lock);
6903302Sagiri }
6913302Sagiri
6923302Sagiri cpkt = (rds_ctrl_pkt_t *)(uintptr_t)bp->buf_ds.ds_va;
6933302Sagiri rds_handle_control_message(ep->ep_sp, cpkt);
6943302Sagiri
6953302Sagiri bp->buf_state = RDS_RCVBUF_FREE;
6963302Sagiri rds_free_recv_buf(bp, 1);
6973302Sagiri
6983302Sagiri RDS_DPRINTF4("rds_poll_ctrl_completions", "Return: EP(%p)", ep);
6993302Sagiri
7003302Sagiri return (ret);
7013302Sagiri }
7023302Sagiri
7033302Sagiri #define RDS_POST_FEW_ATATIME 100
7043302Sagiri /* Post recv WRs into the RQ. Assumes the ep->refcnt is already incremented */
7053302Sagiri void
rds_post_recv_buf(void * arg)7063302Sagiri rds_post_recv_buf(void *arg)
7073302Sagiri {
7083302Sagiri ibt_channel_hdl_t chanhdl;
7093302Sagiri rds_ep_t *ep;
7103302Sagiri rds_session_t *sp;
7113302Sagiri rds_qp_t *recvqp;
7123302Sagiri rds_bufpool_t *gp;
7133302Sagiri rds_buf_t *bp, *bp1;
7143302Sagiri ibt_recv_wr_t *wrp, wr[RDS_POST_FEW_ATATIME];
7153302Sagiri rds_hca_t *hcap;
7163302Sagiri uint_t npost, nspace, rcv_len;
7173302Sagiri uint_t ix, jx, kx;
7183302Sagiri int ret;
7193302Sagiri
7203302Sagiri chanhdl = (ibt_channel_hdl_t)arg;
7213302Sagiri RDS_DPRINTF4("rds_post_recv_buf", "Enter: CHAN(%p)", chanhdl);
7223302Sagiri RDS_INCR_POST_RCV_BUF_CALLS();
7233302Sagiri
7243302Sagiri ep = (rds_ep_t *)ibt_get_chan_private(chanhdl);
7253302Sagiri ASSERT(ep != NULL);
7263302Sagiri sp = ep->ep_sp;
7273302Sagiri recvqp = &ep->ep_recvqp;
7283302Sagiri
7293302Sagiri RDS_DPRINTF5("rds_post_recv_buf", "EP(%p)", ep);
7303302Sagiri
7313302Sagiri /* get the hcap for the HCA hosting this channel */
7328082SRamaswamy.Tummala@Sun.COM hcap = rds_lkup_hca(ep->ep_hca_guid);
7333302Sagiri if (hcap == NULL) {
7343302Sagiri RDS_DPRINTF2("rds_post_recv_buf", "HCA (0x%llx) not found",
7353302Sagiri ep->ep_hca_guid);
7363302Sagiri return;
7373302Sagiri }
7383302Sagiri
7393302Sagiri /* Make sure the session is still connected */
7403302Sagiri rw_enter(&sp->session_lock, RW_READER);
7413302Sagiri if ((sp->session_state != RDS_SESSION_STATE_INIT) &&
7428082SRamaswamy.Tummala@Sun.COM (sp->session_state != RDS_SESSION_STATE_CONNECTED) &&
7438082SRamaswamy.Tummala@Sun.COM (sp->session_state != RDS_SESSION_STATE_HCA_CLOSING)) {
7443302Sagiri RDS_DPRINTF2("rds_post_recv_buf", "EP(%p): Session is not "
7453302Sagiri "in active state (%d)", ep, sp->session_state);
7463302Sagiri rw_exit(&sp->session_lock);
7473302Sagiri return;
7483302Sagiri }
7493302Sagiri rw_exit(&sp->session_lock);
7503302Sagiri
7513302Sagiri /* how many can be posted */
7523302Sagiri mutex_enter(&recvqp->qp_lock);
7533302Sagiri nspace = recvqp->qp_depth - recvqp->qp_level;
7543302Sagiri if (nspace == 0) {
7553302Sagiri RDS_DPRINTF2("rds_post_recv_buf", "RQ is FULL");
7563302Sagiri recvqp->qp_taskqpending = B_FALSE;
7573302Sagiri mutex_exit(&recvqp->qp_lock);
7583302Sagiri return;
7593302Sagiri }
7603302Sagiri mutex_exit(&recvqp->qp_lock);
7613302Sagiri
7623302Sagiri if (ep->ep_type == RDS_EP_TYPE_DATA) {
7633302Sagiri gp = &rds_dpool;
7643302Sagiri rcv_len = RdsPktSize;
7653302Sagiri } else {
7663302Sagiri gp = &rds_cpool;
7673302Sagiri rcv_len = RDS_CTRLPKT_SIZE;
7683302Sagiri }
7693302Sagiri
7703302Sagiri bp = rds_get_buf(gp, nspace, &jx);
7713302Sagiri if (bp == NULL) {
7723302Sagiri RDS_DPRINTF2(LABEL, "EP(%p): No Recv buffers available", ep);
7733302Sagiri /* try again later */
7743302Sagiri ret = ddi_taskq_dispatch(rds_taskq, rds_post_recv_buf,
7759124SGiri.Adari@Sun.COM (void *)chanhdl, DDI_NOSLEEP);
7763302Sagiri if (ret != DDI_SUCCESS) {
7776702Sagiri RDS_DPRINTF2(LABEL, "ddi_taskq_dispatch failed: %d",
7783302Sagiri ret);
7793302Sagiri mutex_enter(&recvqp->qp_lock);
7803302Sagiri recvqp->qp_taskqpending = B_FALSE;
7813302Sagiri mutex_exit(&recvqp->qp_lock);
7823302Sagiri }
7833302Sagiri return;
7843302Sagiri }
7853302Sagiri
7863302Sagiri if (jx != nspace) {
7873302Sagiri RDS_DPRINTF2(LABEL, "EP(%p): Recv buffers "
7883302Sagiri "needed: %d available: %d", ep, nspace, jx);
7893302Sagiri nspace = jx;
7903302Sagiri }
7913302Sagiri
7923302Sagiri bp1 = bp;
7933302Sagiri for (ix = 0; ix < nspace; ix++) {
7943302Sagiri bp1->buf_ep = ep;
7953302Sagiri ASSERT(bp1->buf_state == RDS_RCVBUF_FREE);
7963302Sagiri bp1->buf_state = RDS_RCVBUF_POSTED;
7973302Sagiri bp1->buf_ds.ds_key = hcap->hca_lkey;
7983302Sagiri bp1->buf_ds.ds_len = rcv_len;
7993302Sagiri bp1 = bp1->buf_nextp;
8003302Sagiri }
8013302Sagiri
8023302Sagiri #if 0
8033302Sagiri wrp = kmem_zalloc(RDS_POST_FEW_ATATIME * sizeof (ibt_recv_wr_t),
8043302Sagiri KM_SLEEP);
8053302Sagiri #else
8063302Sagiri wrp = &wr[0];
8073302Sagiri #endif
8083302Sagiri
8093302Sagiri npost = nspace;
8103302Sagiri while (npost) {
8113302Sagiri jx = (npost > RDS_POST_FEW_ATATIME) ?
8123302Sagiri RDS_POST_FEW_ATATIME : npost;
8133302Sagiri for (ix = 0; ix < jx; ix++) {
8143302Sagiri wrp[ix].wr_id = (uintptr_t)bp;
8153302Sagiri wrp[ix].wr_nds = 1;
8163302Sagiri wrp[ix].wr_sgl = &bp->buf_ds;
8173302Sagiri bp = bp->buf_nextp;
8183302Sagiri }
8193302Sagiri
8203302Sagiri ret = ibt_post_recv(chanhdl, wrp, jx, &kx);
8213302Sagiri if ((ret != IBT_SUCCESS) || (kx != jx)) {
8226702Sagiri RDS_DPRINTF2(LABEL, "ibt_post_recv for %d WRs failed: "
8233302Sagiri "%d", npost, ret);
8243302Sagiri npost -= kx;
8253302Sagiri break;
8263302Sagiri }
8273302Sagiri
8283302Sagiri npost -= jx;
8293302Sagiri }
8303302Sagiri
8313302Sagiri mutex_enter(&recvqp->qp_lock);
8323302Sagiri if (npost != 0) {
8333302Sagiri RDS_DPRINTF2("rds_post_recv_buf",
8343302Sagiri "EP(%p) Failed to post %d WRs", ep, npost);
8353302Sagiri recvqp->qp_level += (nspace - npost);
8363302Sagiri } else {
8373302Sagiri recvqp->qp_level += nspace;
8383302Sagiri }
8393302Sagiri
8403302Sagiri /*
8413302Sagiri * sometimes, the recv WRs can get consumed as soon as they are
8423302Sagiri * posted. In that case, taskq thread to post more WRs to the RQ will
8433302Sagiri * not be scheduled as the taskqpending flag is still set.
8443302Sagiri */
8453302Sagiri if (recvqp->qp_level == 0) {
8463302Sagiri mutex_exit(&recvqp->qp_lock);
8473302Sagiri ret = ddi_taskq_dispatch(rds_taskq,
8489124SGiri.Adari@Sun.COM rds_post_recv_buf, (void *)chanhdl, DDI_NOSLEEP);
8493302Sagiri if (ret != DDI_SUCCESS) {
8506702Sagiri RDS_DPRINTF2("rds_post_recv_buf",
8513302Sagiri "ddi_taskq_dispatch failed: %d", ret);
8523302Sagiri mutex_enter(&recvqp->qp_lock);
8533302Sagiri recvqp->qp_taskqpending = B_FALSE;
8543302Sagiri mutex_exit(&recvqp->qp_lock);
8553302Sagiri }
8563302Sagiri } else {
8573302Sagiri recvqp->qp_taskqpending = B_FALSE;
8583302Sagiri mutex_exit(&recvqp->qp_lock);
8593302Sagiri }
8603302Sagiri
8613302Sagiri #if 0
8623302Sagiri kmem_free(wrp, RDS_POST_FEW_ATATIME * sizeof (ibt_recv_wr_t));
8633302Sagiri #endif
8643302Sagiri
8653302Sagiri RDS_DPRINTF4("rds_post_recv_buf", "Return: EP(%p)", ep);
8663302Sagiri }
8673302Sagiri
8683302Sagiri static int
rds_poll_data_completions(ibt_cq_hdl_t cq,rds_ep_t * ep)8693302Sagiri rds_poll_data_completions(ibt_cq_hdl_t cq, rds_ep_t *ep)
8703302Sagiri {
8713302Sagiri ibt_wc_t wc;
8723302Sagiri rds_buf_t *bp;
8733302Sagiri rds_data_hdr_t *pktp;
8743302Sagiri rds_qp_t *recvqp;
8753302Sagiri uint_t npolled;
8763302Sagiri int ret = IBT_SUCCESS;
8773302Sagiri
8783302Sagiri
8793302Sagiri RDS_DPRINTF4("rds_poll_data_completions", "Enter: EP(%p)", ep);
8803302Sagiri
8813302Sagiri bzero(&wc, sizeof (ibt_wc_t));
8823302Sagiri ret = ibt_poll_cq(cq, &wc, 1, &npolled);
8833302Sagiri if (ret != IBT_SUCCESS) {
8843302Sagiri if (ret != IBT_CQ_EMPTY) {
8853302Sagiri RDS_DPRINTF2(LABEL, "EP(%p) CQ(%p): ibt_poll_cq "
8863302Sagiri "returned: %d", ep, cq, ret);
8873302Sagiri } else {
8883302Sagiri RDS_DPRINTF5(LABEL, "EP(%p) CQ(%p): ibt_poll_cq "
8893302Sagiri "returned: IBT_CQ_EMPTY", ep, cq);
8903302Sagiri }
8913302Sagiri return (ret);
8923302Sagiri }
8933302Sagiri
8943302Sagiri bp = (rds_buf_t *)(uintptr_t)wc.wc_id;
8953302Sagiri ASSERT(bp->buf_state == RDS_RCVBUF_POSTED);
8963302Sagiri bp->buf_state = RDS_RCVBUF_ONSOCKQ;
8973302Sagiri bp->buf_nextp = NULL;
8983302Sagiri
8993302Sagiri if (wc.wc_status != IBT_WC_SUCCESS) {
9003302Sagiri mutex_enter(&ep->ep_recvqp.qp_lock);
9013302Sagiri ep->ep_recvqp.qp_level--;
9023302Sagiri mutex_exit(&ep->ep_recvqp.qp_lock);
9033302Sagiri
9043302Sagiri /* free the buffer */
9053302Sagiri bp->buf_state = RDS_RCVBUF_FREE;
9063302Sagiri rds_free_recv_buf(bp, 1);
9073302Sagiri
9083302Sagiri /* Receive completion failure */
9093302Sagiri if (wc.wc_status != IBT_WC_WR_FLUSHED_ERR) {
9103302Sagiri RDS_DPRINTF2("rds_poll_data_completions",
9113302Sagiri "EP(%p) CQ(%p) BP(%p): WC Error Status: %d",
9123302Sagiri ep, cq, wc.wc_id, wc.wc_status);
9133302Sagiri RDS_INCR_RXERRS();
9143302Sagiri }
9153302Sagiri return (ret);
9163302Sagiri }
9173302Sagiri
9183302Sagiri /* there is one less in the RQ */
9193302Sagiri recvqp = &ep->ep_recvqp;
9203302Sagiri mutex_enter(&recvqp->qp_lock);
9213302Sagiri recvqp->qp_level--;
9223302Sagiri if ((recvqp->qp_taskqpending == B_FALSE) &&
9233302Sagiri (recvqp->qp_level <= recvqp->qp_lwm)) {
9243302Sagiri /* Time to post more buffers into the RQ */
9253302Sagiri recvqp->qp_taskqpending = B_TRUE;
9263302Sagiri mutex_exit(&recvqp->qp_lock);
9273302Sagiri
9283302Sagiri ret = ddi_taskq_dispatch(rds_taskq,
9293302Sagiri rds_post_recv_buf, (void *)ep->ep_chanhdl, DDI_NOSLEEP);
9303302Sagiri if (ret != DDI_SUCCESS) {
9316702Sagiri RDS_DPRINTF2(LABEL, "ddi_taskq_dispatch failed: %d",
9323302Sagiri ret);
9333302Sagiri mutex_enter(&recvqp->qp_lock);
9343302Sagiri recvqp->qp_taskqpending = B_FALSE;
9353302Sagiri mutex_exit(&recvqp->qp_lock);
9363302Sagiri }
9373302Sagiri } else {
9383302Sagiri mutex_exit(&recvqp->qp_lock);
9393302Sagiri }
9403302Sagiri
9413302Sagiri pktp = (rds_data_hdr_t *)(uintptr_t)bp->buf_ds.ds_va;
9423302Sagiri ASSERT(pktp->dh_datalen != 0);
9433302Sagiri
9443302Sagiri RDS_DPRINTF5(LABEL, "Message Received: sendIP: 0x%x recvIP: 0x%x "
9453302Sagiri "sendport: %d recvport: %d npkts: %d pktno: %d", ep->ep_remip,
9463302Sagiri ep->ep_myip, pktp->dh_sendport, pktp->dh_recvport,
9473302Sagiri pktp->dh_npkts, pktp->dh_psn);
9483302Sagiri
9493302Sagiri RDS_DPRINTF3(LABEL, "BP(%p): npkts: %d psn: %d", bp,
9503302Sagiri pktp->dh_npkts, pktp->dh_psn);
9513302Sagiri
9523302Sagiri if (pktp->dh_npkts == 1) {
9533302Sagiri /* single pkt or last packet */
9543302Sagiri if (pktp->dh_psn != 0) {
9553302Sagiri /* last packet of a segmented message */
9563302Sagiri ASSERT(ep->ep_seglbp != NULL);
9573302Sagiri ep->ep_seglbp->buf_nextp = bp;
9583302Sagiri ep->ep_seglbp = bp;
9593302Sagiri rds_received_msg(ep, ep->ep_segfbp);
9603302Sagiri ep->ep_segfbp = NULL;
9613302Sagiri ep->ep_seglbp = NULL;
9623302Sagiri } else {
9633302Sagiri /* single packet */
9643302Sagiri rds_received_msg(ep, bp);
9653302Sagiri }
9663302Sagiri } else {
9673302Sagiri /* multi-pkt msg */
9683302Sagiri if (pktp->dh_psn == 0) {
9693302Sagiri /* first packet */
9703302Sagiri ASSERT(ep->ep_segfbp == NULL);
9713302Sagiri ep->ep_segfbp = bp;
9723302Sagiri ep->ep_seglbp = bp;
9733302Sagiri } else {
9743302Sagiri /* intermediate packet */
9753302Sagiri ASSERT(ep->ep_segfbp != NULL);
9763302Sagiri ep->ep_seglbp->buf_nextp = bp;
9773302Sagiri ep->ep_seglbp = bp;
9783302Sagiri }
9793302Sagiri }
9803302Sagiri
9813302Sagiri RDS_DPRINTF4("rds_poll_data_completions", "Return: EP(%p)", ep);
9823302Sagiri
9833302Sagiri return (ret);
9843302Sagiri }
9853302Sagiri
9863302Sagiri void
rds_recvcq_handler(ibt_cq_hdl_t cq,void * arg)9873302Sagiri rds_recvcq_handler(ibt_cq_hdl_t cq, void *arg)
9883302Sagiri {
9893302Sagiri rds_ep_t *ep;
9903302Sagiri int ret = IBT_SUCCESS;
9913302Sagiri int (*func)(ibt_cq_hdl_t, rds_ep_t *);
9923302Sagiri
9933302Sagiri ep = (rds_ep_t *)arg;
9943302Sagiri
9953302Sagiri RDS_DPRINTF4("rds_recvcq_handler", "enter: EP(%p)", ep);
9963302Sagiri
9973302Sagiri if (ep->ep_type == RDS_EP_TYPE_DATA) {
9983302Sagiri func = rds_poll_data_completions;
9993302Sagiri } else {
10003302Sagiri func = rds_poll_ctrl_completions;
10013302Sagiri }
10023302Sagiri
10033302Sagiri do {
10043302Sagiri ret = func(cq, ep);
10053302Sagiri } while (ret != IBT_CQ_EMPTY);
10063302Sagiri
10073302Sagiri /* enable the CQ */
10083302Sagiri ret = ibt_enable_cq_notify(cq, rds_wc_signal);
10093302Sagiri if (ret != IBT_SUCCESS) {
10103302Sagiri RDS_DPRINTF2(LABEL, "EP(%p) CQ(%p): ibt_enable_cq_notify "
10113302Sagiri "failed: %d", ep, cq, ret);
10123302Sagiri return;
10133302Sagiri }
10143302Sagiri
10153302Sagiri do {
10163302Sagiri ret = func(cq, ep);
10173302Sagiri } while (ret != IBT_CQ_EMPTY);
10183302Sagiri
10193302Sagiri RDS_DPRINTF4("rds_recvcq_handler", "Return: EP(%p)", ep);
10203302Sagiri }
10213302Sagiri
10223302Sagiri void
rds_poll_send_completions(ibt_cq_hdl_t cq,rds_ep_t * ep,boolean_t lock)10233302Sagiri rds_poll_send_completions(ibt_cq_hdl_t cq, rds_ep_t *ep, boolean_t lock)
10243302Sagiri {
10253302Sagiri ibt_wc_t wc[RDS_NUM_DATA_SEND_WCS];
10263302Sagiri uint_t npolled, nret, send_error = 0;
10273302Sagiri rds_buf_t *headp, *tailp, *bp;
10283302Sagiri int ret, ix;
10293302Sagiri
10303302Sagiri RDS_DPRINTF4("rds_poll_send_completions", "Enter EP(%p)", ep);
10313302Sagiri
10323302Sagiri headp = NULL;
10333302Sagiri tailp = NULL;
10343302Sagiri npolled = 0;
10353302Sagiri do {
10363302Sagiri ret = ibt_poll_cq(cq, wc, RDS_NUM_DATA_SEND_WCS, &nret);
10373302Sagiri if (ret != IBT_SUCCESS) {
10383302Sagiri if (ret != IBT_CQ_EMPTY) {
10393302Sagiri RDS_DPRINTF2(LABEL, "EP(%p) CQ(%p): "
10403302Sagiri "ibt_poll_cq returned: %d", ep, cq, ret);
10413302Sagiri } else {
10423302Sagiri RDS_DPRINTF5(LABEL, "EP(%p) CQ(%p): "
10433302Sagiri "ibt_poll_cq returned: IBT_CQ_EMPTY",
10443302Sagiri ep, cq);
10453302Sagiri }
10463302Sagiri
10473302Sagiri break;
10483302Sagiri }
10493302Sagiri
10503302Sagiri for (ix = 0; ix < nret; ix++) {
10513302Sagiri if (wc[ix].wc_status == IBT_WC_SUCCESS) {
10523302Sagiri if (wc[ix].wc_type == IBT_WRC_RDMAW) {
10533302Sagiri rds_send_acknowledgement(ep);
10543302Sagiri continue;
10553302Sagiri }
10563302Sagiri
10573302Sagiri bp = (rds_buf_t *)(uintptr_t)wc[ix].wc_id;
10583302Sagiri ASSERT(bp->buf_state == RDS_SNDBUF_PENDING);
10593302Sagiri bp->buf_state = RDS_SNDBUF_FREE;
10603302Sagiri } else if (wc[ix].wc_status == IBT_WC_WR_FLUSHED_ERR) {
10613302Sagiri RDS_INCR_TXERRS();
10623302Sagiri RDS_DPRINTF5("rds_poll_send_completions",
10633302Sagiri "EP(%p): WC ID: %p ERROR: %d", ep,
10643302Sagiri wc[ix].wc_id, wc[ix].wc_status);
10653302Sagiri
1066*12089SSuhasini.Peddada@Sun.COM send_error = 1;
1067*12089SSuhasini.Peddada@Sun.COM
10683302Sagiri if (wc[ix].wc_id == RDS_RDMAW_WRID) {
10693302Sagiri mutex_enter(&ep->ep_lock);
10703302Sagiri ep->ep_rdmacnt--;
10713302Sagiri mutex_exit(&ep->ep_lock);
10723302Sagiri continue;
10733302Sagiri }
10743302Sagiri
10753302Sagiri bp = (rds_buf_t *)(uintptr_t)wc[ix].wc_id;
10766438Sagiri ASSERT(bp->buf_state == RDS_SNDBUF_PENDING);
10776438Sagiri bp->buf_state = RDS_SNDBUF_FREE;
10783302Sagiri } else {
10793302Sagiri RDS_INCR_TXERRS();
10803302Sagiri RDS_DPRINTF2("rds_poll_send_completions",
10813302Sagiri "EP(%p): WC ID: %p ERROR: %d", ep,
10823302Sagiri wc[ix].wc_id, wc[ix].wc_status);
10833302Sagiri if (send_error == 0) {
10843302Sagiri rds_session_t *sp = ep->ep_sp;
10853302Sagiri
10863302Sagiri /* don't let anyone send anymore */
10873302Sagiri rw_enter(&sp->session_lock, RW_WRITER);
10883302Sagiri if (sp->session_state !=
10893302Sagiri RDS_SESSION_STATE_ERROR) {
10903302Sagiri sp->session_state =
10913302Sagiri RDS_SESSION_STATE_ERROR;
10923302Sagiri /* Make this the active end */
10933302Sagiri sp->session_type =
10943302Sagiri RDS_SESSION_ACTIVE;
10953302Sagiri }
10963302Sagiri rw_exit(&sp->session_lock);
10973302Sagiri }
10983302Sagiri
1099*12089SSuhasini.Peddada@Sun.COM send_error = 1;
11003302Sagiri
11013302Sagiri if (wc[ix].wc_id == RDS_RDMAW_WRID) {
11023302Sagiri mutex_enter(&ep->ep_lock);
11033302Sagiri ep->ep_rdmacnt--;
11043302Sagiri mutex_exit(&ep->ep_lock);
11053302Sagiri continue;
11063302Sagiri }
11073302Sagiri
11083302Sagiri bp = (rds_buf_t *)(uintptr_t)wc[ix].wc_id;
11096438Sagiri ASSERT(bp->buf_state == RDS_SNDBUF_PENDING);
11106438Sagiri bp->buf_state = RDS_SNDBUF_FREE;
11113302Sagiri }
11123302Sagiri
11133302Sagiri bp->buf_nextp = NULL;
11143302Sagiri if (headp) {
11153302Sagiri tailp->buf_nextp = bp;
11163302Sagiri tailp = bp;
11173302Sagiri } else {
11183302Sagiri headp = bp;
11193302Sagiri tailp = bp;
11203302Sagiri }
11213302Sagiri
11223302Sagiri npolled++;
11233302Sagiri }
11243302Sagiri
11253302Sagiri if (rds_no_interrupts && (npolled > 100)) {
11263302Sagiri break;
11273302Sagiri }
11283302Sagiri
11293302Sagiri if (rds_no_interrupts == 1) {
11303302Sagiri break;
11313302Sagiri }
11323302Sagiri } while (ret != IBT_CQ_EMPTY);
11333302Sagiri
11343302Sagiri RDS_DPRINTF5("rds_poll_send_completions", "Npolled: %d send_error: %d",
11353302Sagiri npolled, send_error);
11363302Sagiri
11373302Sagiri /* put the buffers to the pool */
11383302Sagiri if (npolled != 0) {
11393302Sagiri rds_free_send_buf(ep, headp, tailp, npolled, lock);
11403302Sagiri }
11413302Sagiri
11423302Sagiri if (send_error != 0) {
11433302Sagiri rds_handle_send_error(ep);
11443302Sagiri }
11453302Sagiri
11463302Sagiri RDS_DPRINTF4("rds_poll_send_completions", "Return EP(%p)", ep);
11473302Sagiri }
11483302Sagiri
11493302Sagiri void
rds_sendcq_handler(ibt_cq_hdl_t cq,void * arg)11503302Sagiri rds_sendcq_handler(ibt_cq_hdl_t cq, void *arg)
11513302Sagiri {
11523302Sagiri rds_ep_t *ep;
11533302Sagiri int ret;
11543302Sagiri
11553302Sagiri ep = (rds_ep_t *)arg;
11563302Sagiri
11573302Sagiri RDS_DPRINTF4("rds_sendcq_handler", "Enter: EP(%p)", ep);
11583302Sagiri
11593302Sagiri /* enable the CQ */
11603302Sagiri ret = ibt_enable_cq_notify(cq, IBT_NEXT_COMPLETION);
11613302Sagiri if (ret != IBT_SUCCESS) {
11623302Sagiri RDS_DPRINTF2(LABEL, "EP(%p) CQ(%p): ibt_enable_cq_notify "
11633302Sagiri "failed: %d", ep, cq, ret);
11643302Sagiri return;
11653302Sagiri }
11663302Sagiri
11673302Sagiri rds_poll_send_completions(cq, ep, B_FALSE);
11683302Sagiri
11693302Sagiri RDS_DPRINTF4("rds_sendcq_handler", "Return: EP(%p)", ep);
11703302Sagiri }
11713302Sagiri
11723302Sagiri void
rds_ep_free_rc_channel(rds_ep_t * ep)11733302Sagiri rds_ep_free_rc_channel(rds_ep_t *ep)
11743302Sagiri {
11753302Sagiri int ret;
11763302Sagiri
11773302Sagiri RDS_DPRINTF2("rds_ep_free_rc_channel", "EP(%p) - Enter", ep);
11783302Sagiri
11793302Sagiri ASSERT(mutex_owned(&ep->ep_lock));
11803302Sagiri
11813302Sagiri /* free the QP */
11823302Sagiri if (ep->ep_chanhdl != NULL) {
11833302Sagiri /* wait until the RQ is empty */
11843302Sagiri (void) ibt_flush_channel(ep->ep_chanhdl);
11853302Sagiri (void) rds_is_recvq_empty(ep, B_TRUE);
11863302Sagiri ret = ibt_free_channel(ep->ep_chanhdl);
11873302Sagiri if (ret != IBT_SUCCESS) {
11886702Sagiri RDS_DPRINTF2("rds_ep_free_rc_channel", "EP(%p) "
11893302Sagiri "ibt_free_channel returned: %d", ep, ret);
11903302Sagiri }
11913302Sagiri ep->ep_chanhdl = NULL;
11923302Sagiri } else {
11933302Sagiri RDS_DPRINTF2("rds_ep_free_rc_channel",
11943302Sagiri "EP(%p) Channel is ALREADY FREE", ep);
11953302Sagiri }
11963302Sagiri
11973302Sagiri /* free the Send CQ */
11983302Sagiri if (ep->ep_sendcq != NULL) {
11993302Sagiri ret = ibt_free_cq(ep->ep_sendcq);
12003302Sagiri if (ret != IBT_SUCCESS) {
12016702Sagiri RDS_DPRINTF2("rds_ep_free_rc_channel",
12023302Sagiri "EP(%p) - for sendcq, ibt_free_cq returned %d",
12033302Sagiri ep, ret);
12043302Sagiri }
12053302Sagiri ep->ep_sendcq = NULL;
12063302Sagiri } else {
12073302Sagiri RDS_DPRINTF2("rds_ep_free_rc_channel",
12083302Sagiri "EP(%p) SendCQ is ALREADY FREE", ep);
12093302Sagiri }
12103302Sagiri
12113302Sagiri /* free the Recv CQ */
12123302Sagiri if (ep->ep_recvcq != NULL) {
12133302Sagiri ret = ibt_free_cq(ep->ep_recvcq);
12143302Sagiri if (ret != IBT_SUCCESS) {
12156702Sagiri RDS_DPRINTF2("rds_ep_free_rc_channel",
12163302Sagiri "EP(%p) - for recvcq, ibt_free_cq returned %d",
12173302Sagiri ep, ret);
12183302Sagiri }
12193302Sagiri ep->ep_recvcq = NULL;
12203302Sagiri } else {
12213302Sagiri RDS_DPRINTF2("rds_ep_free_rc_channel",
12223302Sagiri "EP(%p) RecvCQ is ALREADY FREE", ep);
12233302Sagiri }
12243302Sagiri
12253302Sagiri RDS_DPRINTF2("rds_ep_free_rc_channel", "EP(%p) - Return", ep);
12263302Sagiri }
12273302Sagiri
12283302Sagiri /* Allocate resources for RC channel */
12293302Sagiri ibt_channel_hdl_t
rds_ep_alloc_rc_channel(rds_ep_t * ep,uint8_t hca_port)12303302Sagiri rds_ep_alloc_rc_channel(rds_ep_t *ep, uint8_t hca_port)
12313302Sagiri {
12323302Sagiri int ret = IBT_SUCCESS;
12333302Sagiri ibt_cq_attr_t scqattr, rcqattr;
12343302Sagiri ibt_rc_chan_alloc_args_t chanargs;
12353302Sagiri ibt_channel_hdl_t chanhdl;
12364154Sagiri rds_session_t *sp;
12373302Sagiri rds_hca_t *hcap;
12383302Sagiri
12393302Sagiri RDS_DPRINTF4("rds_ep_alloc_rc_channel", "Enter: 0x%p port: %d",
12403302Sagiri ep, hca_port);
12413302Sagiri
12424154Sagiri /* Update the EP with the right IP address and HCA guid */
12434154Sagiri sp = ep->ep_sp;
12444154Sagiri ASSERT(sp != NULL);
12454154Sagiri rw_enter(&sp->session_lock, RW_READER);
12464154Sagiri mutex_enter(&ep->ep_lock);
12474154Sagiri ep->ep_myip = sp->session_myip;
12484154Sagiri ep->ep_remip = sp->session_remip;
12494154Sagiri hcap = rds_gid_to_hcap(rdsib_statep, sp->session_lgid);
12504154Sagiri ep->ep_hca_guid = hcap->hca_guid;
12514154Sagiri mutex_exit(&ep->ep_lock);
12524154Sagiri rw_exit(&sp->session_lock);
12533302Sagiri
12543302Sagiri /* reset taskqpending flag here */
12553302Sagiri ep->ep_recvqp.qp_taskqpending = B_FALSE;
12563302Sagiri
12573302Sagiri if (ep->ep_type == RDS_EP_TYPE_CTRL) {
12583302Sagiri scqattr.cq_size = MaxCtrlSendBuffers;
12593302Sagiri scqattr.cq_sched = NULL;
12603302Sagiri scqattr.cq_flags = IBT_CQ_NO_FLAGS;
12613302Sagiri
12623302Sagiri rcqattr.cq_size = MaxCtrlRecvBuffers;
12633302Sagiri rcqattr.cq_sched = NULL;
12643302Sagiri rcqattr.cq_flags = IBT_CQ_NO_FLAGS;
12653302Sagiri
12663302Sagiri chanargs.rc_sizes.cs_sq = MaxCtrlSendBuffers;
12673302Sagiri chanargs.rc_sizes.cs_rq = MaxCtrlRecvBuffers;
12683302Sagiri chanargs.rc_sizes.cs_sq_sgl = 1;
12693302Sagiri chanargs.rc_sizes.cs_rq_sgl = 1;
12703302Sagiri } else {
12713302Sagiri scqattr.cq_size = MaxDataSendBuffers + RDS_NUM_ACKS;
12723302Sagiri scqattr.cq_sched = NULL;
12733302Sagiri scqattr.cq_flags = IBT_CQ_NO_FLAGS;
12743302Sagiri
12753302Sagiri rcqattr.cq_size = MaxDataRecvBuffers;
12763302Sagiri rcqattr.cq_sched = NULL;
12773302Sagiri rcqattr.cq_flags = IBT_CQ_NO_FLAGS;
12783302Sagiri
12793302Sagiri chanargs.rc_sizes.cs_sq = MaxDataSendBuffers + RDS_NUM_ACKS;
12803302Sagiri chanargs.rc_sizes.cs_rq = MaxDataRecvBuffers;
12813302Sagiri chanargs.rc_sizes.cs_sq_sgl = 1;
12823302Sagiri chanargs.rc_sizes.cs_rq_sgl = 1;
12833302Sagiri }
12843302Sagiri
12855342Sagiri mutex_enter(&ep->ep_lock);
12863302Sagiri if (ep->ep_sendcq == NULL) {
12873302Sagiri /* returned size is always greater than the requested size */
12883302Sagiri ret = ibt_alloc_cq(hcap->hca_hdl, &scqattr,
12893302Sagiri &ep->ep_sendcq, NULL);
12903302Sagiri if (ret != IBT_SUCCESS) {
12913302Sagiri RDS_DPRINTF2(LABEL, "ibt_alloc_cq for sendCQ "
12923302Sagiri "failed, size = %d: %d", scqattr.cq_size, ret);
12935342Sagiri mutex_exit(&ep->ep_lock);
12943302Sagiri return (NULL);
12953302Sagiri }
12963302Sagiri
12973302Sagiri (void) ibt_set_cq_handler(ep->ep_sendcq, rds_sendcq_handler,
12983302Sagiri ep);
12993302Sagiri
13003302Sagiri if (rds_no_interrupts == 0) {
13013302Sagiri ret = ibt_enable_cq_notify(ep->ep_sendcq,
13023302Sagiri IBT_NEXT_COMPLETION);
13033302Sagiri if (ret != IBT_SUCCESS) {
13043302Sagiri RDS_DPRINTF2(LABEL,
13053302Sagiri "ibt_enable_cq_notify failed: %d", ret);
13063302Sagiri (void) ibt_free_cq(ep->ep_sendcq);
13073302Sagiri ep->ep_sendcq = NULL;
13085342Sagiri mutex_exit(&ep->ep_lock);
13093302Sagiri return (NULL);
13103302Sagiri }
13113302Sagiri }
13123302Sagiri }
13133302Sagiri
13143302Sagiri if (ep->ep_recvcq == NULL) {
13153302Sagiri /* returned size is always greater than the requested size */
13163302Sagiri ret = ibt_alloc_cq(hcap->hca_hdl, &rcqattr,
13173302Sagiri &ep->ep_recvcq, NULL);
13183302Sagiri if (ret != IBT_SUCCESS) {
13193302Sagiri RDS_DPRINTF2(LABEL, "ibt_alloc_cq for recvCQ "
13203302Sagiri "failed, size = %d: %d", rcqattr.cq_size, ret);
13213302Sagiri (void) ibt_free_cq(ep->ep_sendcq);
13223302Sagiri ep->ep_sendcq = NULL;
13235342Sagiri mutex_exit(&ep->ep_lock);
13243302Sagiri return (NULL);
13253302Sagiri }
13263302Sagiri
13273302Sagiri (void) ibt_set_cq_handler(ep->ep_recvcq, rds_recvcq_handler,
13283302Sagiri ep);
13293302Sagiri
13303302Sagiri ret = ibt_enable_cq_notify(ep->ep_recvcq, rds_wc_signal);
13313302Sagiri if (ret != IBT_SUCCESS) {
13323302Sagiri RDS_DPRINTF2(LABEL,
13333302Sagiri "ibt_enable_cq_notify failed: %d", ret);
13343302Sagiri (void) ibt_free_cq(ep->ep_recvcq);
13353302Sagiri ep->ep_recvcq = NULL;
13363302Sagiri (void) ibt_free_cq(ep->ep_sendcq);
13373302Sagiri ep->ep_sendcq = NULL;
13385342Sagiri mutex_exit(&ep->ep_lock);
13393302Sagiri return (NULL);
13403302Sagiri }
13413302Sagiri }
13423302Sagiri
13433302Sagiri chanargs.rc_flags = IBT_ALL_SIGNALED;
13443302Sagiri chanargs.rc_control = IBT_CEP_RDMA_RD | IBT_CEP_RDMA_WR |
13453302Sagiri IBT_CEP_ATOMIC;
13463302Sagiri chanargs.rc_hca_port_num = hca_port;
13473302Sagiri chanargs.rc_scq = ep->ep_sendcq;
13483302Sagiri chanargs.rc_rcq = ep->ep_recvcq;
13493302Sagiri chanargs.rc_pd = hcap->hca_pdhdl;
13503302Sagiri chanargs.rc_srq = NULL;
13513302Sagiri
13523302Sagiri ret = ibt_alloc_rc_channel(hcap->hca_hdl,
13533302Sagiri IBT_ACHAN_NO_FLAGS, &chanargs, &chanhdl, NULL);
13543302Sagiri if (ret != IBT_SUCCESS) {
13553302Sagiri RDS_DPRINTF2(LABEL, "ibt_alloc_rc_channel fail: %d",
13563302Sagiri ret);
13573302Sagiri (void) ibt_free_cq(ep->ep_recvcq);
13583302Sagiri ep->ep_recvcq = NULL;
13593302Sagiri (void) ibt_free_cq(ep->ep_sendcq);
13603302Sagiri ep->ep_sendcq = NULL;
13615342Sagiri mutex_exit(&ep->ep_lock);
13623302Sagiri return (NULL);
13633302Sagiri }
13645342Sagiri mutex_exit(&ep->ep_lock);
13653302Sagiri
13663302Sagiri /* Chan private should contain the ep */
13673302Sagiri (void) ibt_set_chan_private(chanhdl, ep);
13683302Sagiri
13693302Sagiri RDS_DPRINTF4("rds_ep_alloc_rc_channel", "Return: 0x%p", chanhdl);
13703302Sagiri
13713302Sagiri return (chanhdl);
13723302Sagiri }
13733302Sagiri
13743302Sagiri
13753302Sagiri #if 0
13763302Sagiri
13773302Sagiri /* Return node guid given a port gid */
13783302Sagiri ib_guid_t
13793302Sagiri rds_gid_to_node_guid(ib_gid_t gid)
13803302Sagiri {
13813302Sagiri ibt_node_info_t nodeinfo;
13823302Sagiri int ret;
13833302Sagiri
13843302Sagiri RDS_DPRINTF4("rds_gid_to_node_guid", "Enter: gid: %llx:%llx",
13853302Sagiri gid.gid_prefix, gid.gid_guid);
13863302Sagiri
13873302Sagiri ret = ibt_gid_to_node_info(gid, &nodeinfo);
13883302Sagiri if (ret != IBT_SUCCESS) {
13893302Sagiri RDS_DPRINTF2(LABEL, "ibt_gid_node_info for gid: %llx:%llx "
13903302Sagiri "failed", gid.gid_prefix, gid.gid_guid);
13913302Sagiri return (0LL);
13923302Sagiri }
13933302Sagiri
13943302Sagiri RDS_DPRINTF4("rds_gid_to_node_guid", "Return: Node guid: %llx",
13953302Sagiri nodeinfo.n_node_guid);
13963302Sagiri
13973302Sagiri return (nodeinfo.n_node_guid);
13983302Sagiri }
13993302Sagiri
14003302Sagiri #endif
14013302Sagiri
14023302Sagiri static void
rds_handle_portup_event(rds_state_t * statep,ibt_hca_hdl_t hdl,ibt_async_event_t * event)14033302Sagiri rds_handle_portup_event(rds_state_t *statep, ibt_hca_hdl_t hdl,
14043302Sagiri ibt_async_event_t *event)
14053302Sagiri {
14063302Sagiri rds_hca_t *hcap;
14073302Sagiri ibt_hca_portinfo_t *newpinfop, *oldpinfop;
14083302Sagiri uint_t newsize, oldsize, nport;
14093302Sagiri ib_gid_t gid;
14103302Sagiri int ret;
14113302Sagiri
14124154Sagiri RDS_DPRINTF2("rds_handle_portup_event",
14134154Sagiri "Enter: GUID: 0x%llx Statep: %p", event->ev_hca_guid, statep);
14143302Sagiri
14158082SRamaswamy.Tummala@Sun.COM rw_enter(&statep->rds_hca_lock, RW_WRITER);
14168082SRamaswamy.Tummala@Sun.COM
14178082SRamaswamy.Tummala@Sun.COM hcap = statep->rds_hcalistp;
14188082SRamaswamy.Tummala@Sun.COM while ((hcap != NULL) && (hcap->hca_guid != event->ev_hca_guid)) {
14198082SRamaswamy.Tummala@Sun.COM hcap = hcap->hca_nextp;
14206438Sagiri }
14216438Sagiri
14224154Sagiri if (hcap == NULL) {
14234154Sagiri RDS_DPRINTF2("rds_handle_portup_event", "HCA: 0x%llx is "
14244154Sagiri "not in our list", event->ev_hca_guid);
14258082SRamaswamy.Tummala@Sun.COM rw_exit(&statep->rds_hca_lock);
14264154Sagiri return;
14274154Sagiri }
14283302Sagiri
14293302Sagiri ret = ibt_query_hca_ports(hdl, 0, &newpinfop, &nport, &newsize);
14303302Sagiri if (ret != IBT_SUCCESS) {
14313302Sagiri RDS_DPRINTF2(LABEL, "ibt_query_hca_ports failed: %d", ret);
14328082SRamaswamy.Tummala@Sun.COM rw_exit(&statep->rds_hca_lock);
14333302Sagiri return;
14343302Sagiri }
14353302Sagiri
14363302Sagiri oldpinfop = hcap->hca_pinfop;
14373302Sagiri oldsize = hcap->hca_pinfo_sz;
14383302Sagiri hcap->hca_pinfop = newpinfop;
14393302Sagiri hcap->hca_pinfo_sz = newsize;
14403302Sagiri
14418082SRamaswamy.Tummala@Sun.COM (void) ibt_free_portinfo(oldpinfop, oldsize);
14423302Sagiri
14438082SRamaswamy.Tummala@Sun.COM /* If RDS service is not registered then no bind is needed */
14448082SRamaswamy.Tummala@Sun.COM if (statep->rds_srvhdl == NULL) {
14458082SRamaswamy.Tummala@Sun.COM RDS_DPRINTF2("rds_handle_portup_event",
14468082SRamaswamy.Tummala@Sun.COM "RDS Service is not registered, so no action needed");
14478082SRamaswamy.Tummala@Sun.COM rw_exit(&statep->rds_hca_lock);
14488082SRamaswamy.Tummala@Sun.COM return;
14493302Sagiri }
14503302Sagiri
14518082SRamaswamy.Tummala@Sun.COM /*
14528082SRamaswamy.Tummala@Sun.COM * If the service was previously bound on this port and
14538082SRamaswamy.Tummala@Sun.COM * if this port has changed state down and now up, we do not
14548082SRamaswamy.Tummala@Sun.COM * need to bind the service again. The bind is expected to
14558082SRamaswamy.Tummala@Sun.COM * persist across state changes. If the service was never bound
14568082SRamaswamy.Tummala@Sun.COM * before then we bind it this time.
14578082SRamaswamy.Tummala@Sun.COM */
14588082SRamaswamy.Tummala@Sun.COM if (hcap->hca_bindhdl[event->ev_port - 1] == NULL) {
14598082SRamaswamy.Tummala@Sun.COM
14608082SRamaswamy.Tummala@Sun.COM /* structure copy */
14618082SRamaswamy.Tummala@Sun.COM gid = newpinfop[event->ev_port - 1].p_sgid_tbl[0];
14628082SRamaswamy.Tummala@Sun.COM
14638082SRamaswamy.Tummala@Sun.COM /* bind RDS service on the port, pass statep as cm_private */
14648082SRamaswamy.Tummala@Sun.COM ret = ibt_bind_service(statep->rds_srvhdl, gid, NULL, statep,
14658082SRamaswamy.Tummala@Sun.COM &hcap->hca_bindhdl[event->ev_port - 1]);
14668082SRamaswamy.Tummala@Sun.COM if (ret != IBT_SUCCESS) {
14678082SRamaswamy.Tummala@Sun.COM RDS_DPRINTF2("rds_handle_portup_event",
14688082SRamaswamy.Tummala@Sun.COM "Bind service for HCA: 0x%llx Port: %d "
14698082SRamaswamy.Tummala@Sun.COM "gid %llx:%llx returned: %d", event->ev_hca_guid,
14708082SRamaswamy.Tummala@Sun.COM event->ev_port, gid.gid_prefix, gid.gid_guid, ret);
14718082SRamaswamy.Tummala@Sun.COM }
14728082SRamaswamy.Tummala@Sun.COM }
14738082SRamaswamy.Tummala@Sun.COM
14748082SRamaswamy.Tummala@Sun.COM rw_exit(&statep->rds_hca_lock);
14753302Sagiri
14763302Sagiri RDS_DPRINTF2("rds_handle_portup_event", "Return: GUID: 0x%llx",
14773302Sagiri event->ev_hca_guid);
14783302Sagiri }
14793302Sagiri
14803302Sagiri static void
rdsib_add_hca(ib_guid_t hca_guid)14818082SRamaswamy.Tummala@Sun.COM rdsib_add_hca(ib_guid_t hca_guid)
14828082SRamaswamy.Tummala@Sun.COM {
14838082SRamaswamy.Tummala@Sun.COM rds_hca_t *hcap;
14848082SRamaswamy.Tummala@Sun.COM ibt_mr_attr_t mem_attr;
14858082SRamaswamy.Tummala@Sun.COM ibt_mr_desc_t mem_desc;
14868082SRamaswamy.Tummala@Sun.COM int ret;
14878082SRamaswamy.Tummala@Sun.COM
14888082SRamaswamy.Tummala@Sun.COM RDS_DPRINTF2("rdsib_add_hca", "Enter: GUID: 0x%llx", hca_guid);
14898082SRamaswamy.Tummala@Sun.COM
14908082SRamaswamy.Tummala@Sun.COM hcap = rdsib_init_hca(hca_guid);
14918082SRamaswamy.Tummala@Sun.COM if (hcap == NULL)
14928082SRamaswamy.Tummala@Sun.COM return;
14938082SRamaswamy.Tummala@Sun.COM
14948082SRamaswamy.Tummala@Sun.COM /* register the recv memory with this hca */
14958082SRamaswamy.Tummala@Sun.COM mutex_enter(&rds_dpool.pool_lock);
14968082SRamaswamy.Tummala@Sun.COM if (rds_dpool.pool_memp == NULL) {
14978082SRamaswamy.Tummala@Sun.COM /* no memory to register */
14988082SRamaswamy.Tummala@Sun.COM RDS_DPRINTF2("rdsib_add_hca", "No memory to register");
14998082SRamaswamy.Tummala@Sun.COM mutex_exit(&rds_dpool.pool_lock);
15008082SRamaswamy.Tummala@Sun.COM return;
15018082SRamaswamy.Tummala@Sun.COM }
15028082SRamaswamy.Tummala@Sun.COM
15038082SRamaswamy.Tummala@Sun.COM mem_attr.mr_vaddr = (ib_vaddr_t)(uintptr_t)rds_dpool.pool_memp;
15048082SRamaswamy.Tummala@Sun.COM mem_attr.mr_len = rds_dpool.pool_memsize;
15058082SRamaswamy.Tummala@Sun.COM mem_attr.mr_as = NULL;
15068082SRamaswamy.Tummala@Sun.COM mem_attr.mr_flags = IBT_MR_ENABLE_LOCAL_WRITE;
15078082SRamaswamy.Tummala@Sun.COM
15088082SRamaswamy.Tummala@Sun.COM ret = ibt_register_mr(hcap->hca_hdl, hcap->hca_pdhdl, &mem_attr,
15098082SRamaswamy.Tummala@Sun.COM &hcap->hca_mrhdl, &mem_desc);
15108082SRamaswamy.Tummala@Sun.COM
15118082SRamaswamy.Tummala@Sun.COM mutex_exit(&rds_dpool.pool_lock);
15128082SRamaswamy.Tummala@Sun.COM
15138082SRamaswamy.Tummala@Sun.COM if (ret != IBT_SUCCESS) {
15148082SRamaswamy.Tummala@Sun.COM RDS_DPRINTF2("rdsib_add_hca", "ibt_register_mr failed: %d",
15158082SRamaswamy.Tummala@Sun.COM ret);
15168082SRamaswamy.Tummala@Sun.COM } else {
15178082SRamaswamy.Tummala@Sun.COM rw_enter(&rdsib_statep->rds_hca_lock, RW_WRITER);
15188082SRamaswamy.Tummala@Sun.COM hcap->hca_state = RDS_HCA_STATE_MEM_REGISTERED;
15198082SRamaswamy.Tummala@Sun.COM hcap->hca_lkey = mem_desc.md_lkey;
15208082SRamaswamy.Tummala@Sun.COM hcap->hca_rkey = mem_desc.md_rkey;
15218082SRamaswamy.Tummala@Sun.COM rw_exit(&rdsib_statep->rds_hca_lock);
15228082SRamaswamy.Tummala@Sun.COM }
15238082SRamaswamy.Tummala@Sun.COM
15248082SRamaswamy.Tummala@Sun.COM RDS_DPRINTF2("rdsib_add_hca", "Retrun: GUID: 0x%llx", hca_guid);
15258082SRamaswamy.Tummala@Sun.COM }
15268082SRamaswamy.Tummala@Sun.COM
15278082SRamaswamy.Tummala@Sun.COM void rds_close_this_session(rds_session_t *sp, uint8_t wait);
15288082SRamaswamy.Tummala@Sun.COM int rds_post_control_message(rds_session_t *sp, uint8_t code, in_port_t port);
15298082SRamaswamy.Tummala@Sun.COM
15308082SRamaswamy.Tummala@Sun.COM static void
rdsib_del_hca(rds_state_t * statep,ib_guid_t hca_guid)15318082SRamaswamy.Tummala@Sun.COM rdsib_del_hca(rds_state_t *statep, ib_guid_t hca_guid)
15328082SRamaswamy.Tummala@Sun.COM {
15338082SRamaswamy.Tummala@Sun.COM rds_session_t *sp;
15348082SRamaswamy.Tummala@Sun.COM rds_hca_t *hcap;
15358082SRamaswamy.Tummala@Sun.COM rds_hca_state_t saved_state;
15368082SRamaswamy.Tummala@Sun.COM int ret, ix;
15378082SRamaswamy.Tummala@Sun.COM
15388082SRamaswamy.Tummala@Sun.COM RDS_DPRINTF2("rdsib_del_hca", "Enter: GUID: 0x%llx", hca_guid);
15398082SRamaswamy.Tummala@Sun.COM
15408082SRamaswamy.Tummala@Sun.COM /*
15418082SRamaswamy.Tummala@Sun.COM * This should be a write lock as we don't want anyone to get access
15428082SRamaswamy.Tummala@Sun.COM * to the hcap while we are modifing its contents
15438082SRamaswamy.Tummala@Sun.COM */
15448082SRamaswamy.Tummala@Sun.COM rw_enter(&statep->rds_hca_lock, RW_WRITER);
15458082SRamaswamy.Tummala@Sun.COM
15468082SRamaswamy.Tummala@Sun.COM hcap = statep->rds_hcalistp;
15478082SRamaswamy.Tummala@Sun.COM while ((hcap != NULL) && (hcap->hca_guid != hca_guid)) {
15488082SRamaswamy.Tummala@Sun.COM hcap = hcap->hca_nextp;
15498082SRamaswamy.Tummala@Sun.COM }
15508082SRamaswamy.Tummala@Sun.COM
15518082SRamaswamy.Tummala@Sun.COM /* Prevent initiating any new activity on this HCA */
15528082SRamaswamy.Tummala@Sun.COM ASSERT(hcap != NULL);
15538082SRamaswamy.Tummala@Sun.COM saved_state = hcap->hca_state;
15548082SRamaswamy.Tummala@Sun.COM hcap->hca_state = RDS_HCA_STATE_STOPPING;
15558082SRamaswamy.Tummala@Sun.COM
15568082SRamaswamy.Tummala@Sun.COM rw_exit(&statep->rds_hca_lock);
15578082SRamaswamy.Tummala@Sun.COM
15588082SRamaswamy.Tummala@Sun.COM /*
15598082SRamaswamy.Tummala@Sun.COM * stop the outgoing traffic and close any active sessions on this hca.
15608082SRamaswamy.Tummala@Sun.COM * Any pending messages in the SQ will be allowed to complete.
15618082SRamaswamy.Tummala@Sun.COM */
15628082SRamaswamy.Tummala@Sun.COM rw_enter(&statep->rds_sessionlock, RW_READER);
15638082SRamaswamy.Tummala@Sun.COM sp = statep->rds_sessionlistp;
15648082SRamaswamy.Tummala@Sun.COM while (sp) {
15658082SRamaswamy.Tummala@Sun.COM if (sp->session_hca_guid != hca_guid) {
15668082SRamaswamy.Tummala@Sun.COM sp = sp->session_nextp;
15678082SRamaswamy.Tummala@Sun.COM continue;
15688082SRamaswamy.Tummala@Sun.COM }
15698082SRamaswamy.Tummala@Sun.COM
15708082SRamaswamy.Tummala@Sun.COM rw_enter(&sp->session_lock, RW_WRITER);
15718082SRamaswamy.Tummala@Sun.COM RDS_DPRINTF2("rdsib_del_hca", "SP(%p) State: %d", sp,
15728082SRamaswamy.Tummala@Sun.COM sp->session_state);
15738082SRamaswamy.Tummala@Sun.COM /*
15748082SRamaswamy.Tummala@Sun.COM * We are changing the session state in advance. This prevents
15758082SRamaswamy.Tummala@Sun.COM * further messages to be posted to the SQ. We then
15768082SRamaswamy.Tummala@Sun.COM * send a control message to the remote and tell it close
15778082SRamaswamy.Tummala@Sun.COM * the session.
15788082SRamaswamy.Tummala@Sun.COM */
15798082SRamaswamy.Tummala@Sun.COM sp->session_state = RDS_SESSION_STATE_HCA_CLOSING;
15808082SRamaswamy.Tummala@Sun.COM RDS_DPRINTF3("rds_handle_cm_conn_closed", "SP(%p) State "
15818082SRamaswamy.Tummala@Sun.COM "RDS_SESSION_STATE_PASSIVE_CLOSING", sp);
15828082SRamaswamy.Tummala@Sun.COM rw_exit(&sp->session_lock);
15838082SRamaswamy.Tummala@Sun.COM
15848082SRamaswamy.Tummala@Sun.COM /*
15858082SRamaswamy.Tummala@Sun.COM * wait until the sendq is empty then tell the remote to
15868082SRamaswamy.Tummala@Sun.COM * close this session. This enables for graceful shutdown of
15878082SRamaswamy.Tummala@Sun.COM * the session
15888082SRamaswamy.Tummala@Sun.COM */
158910489SGiri.Adari@Sun.COM (void) rds_is_sendq_empty(&sp->session_dataep, 2);
15908082SRamaswamy.Tummala@Sun.COM (void) rds_post_control_message(sp,
15918082SRamaswamy.Tummala@Sun.COM RDS_CTRL_CODE_CLOSE_SESSION, 0);
15928082SRamaswamy.Tummala@Sun.COM
15938082SRamaswamy.Tummala@Sun.COM sp = sp->session_nextp;
15948082SRamaswamy.Tummala@Sun.COM }
15958082SRamaswamy.Tummala@Sun.COM
15968082SRamaswamy.Tummala@Sun.COM /* wait until all the sessions are off this HCA */
15978082SRamaswamy.Tummala@Sun.COM sp = statep->rds_sessionlistp;
15988082SRamaswamy.Tummala@Sun.COM while (sp) {
15998082SRamaswamy.Tummala@Sun.COM if (sp->session_hca_guid != hca_guid) {
16008082SRamaswamy.Tummala@Sun.COM sp = sp->session_nextp;
16018082SRamaswamy.Tummala@Sun.COM continue;
16028082SRamaswamy.Tummala@Sun.COM }
16038082SRamaswamy.Tummala@Sun.COM
16048082SRamaswamy.Tummala@Sun.COM rw_enter(&sp->session_lock, RW_READER);
16058082SRamaswamy.Tummala@Sun.COM RDS_DPRINTF2("rdsib_del_hca", "SP(%p) State: %d", sp,
16068082SRamaswamy.Tummala@Sun.COM sp->session_state);
16078082SRamaswamy.Tummala@Sun.COM
16088082SRamaswamy.Tummala@Sun.COM while ((sp->session_state == RDS_SESSION_STATE_HCA_CLOSING) ||
16098082SRamaswamy.Tummala@Sun.COM (sp->session_state == RDS_SESSION_STATE_ERROR) ||
16108082SRamaswamy.Tummala@Sun.COM (sp->session_state == RDS_SESSION_STATE_PASSIVE_CLOSING) ||
16118082SRamaswamy.Tummala@Sun.COM (sp->session_state == RDS_SESSION_STATE_CLOSED)) {
16128082SRamaswamy.Tummala@Sun.COM rw_exit(&sp->session_lock);
16138082SRamaswamy.Tummala@Sun.COM delay(drv_usectohz(1000000));
16148082SRamaswamy.Tummala@Sun.COM rw_enter(&sp->session_lock, RW_READER);
16158082SRamaswamy.Tummala@Sun.COM RDS_DPRINTF2("rdsib_del_hca", "SP(%p) State: %d", sp,
16168082SRamaswamy.Tummala@Sun.COM sp->session_state);
16178082SRamaswamy.Tummala@Sun.COM }
16188082SRamaswamy.Tummala@Sun.COM
16198082SRamaswamy.Tummala@Sun.COM rw_exit(&sp->session_lock);
16208082SRamaswamy.Tummala@Sun.COM
16218082SRamaswamy.Tummala@Sun.COM sp = sp->session_nextp;
16228082SRamaswamy.Tummala@Sun.COM }
16238082SRamaswamy.Tummala@Sun.COM rw_exit(&statep->rds_sessionlock);
16248082SRamaswamy.Tummala@Sun.COM
16258082SRamaswamy.Tummala@Sun.COM /*
16268082SRamaswamy.Tummala@Sun.COM * if rdsib_close_ib was called before this, then that would have
16278082SRamaswamy.Tummala@Sun.COM * unbound the service on all ports. In that case, the HCA structs
16288082SRamaswamy.Tummala@Sun.COM * will contain stale bindhdls. Hence, we do not call unbind unless
16298082SRamaswamy.Tummala@Sun.COM * the service is still registered.
16308082SRamaswamy.Tummala@Sun.COM */
16318082SRamaswamy.Tummala@Sun.COM if (statep->rds_srvhdl != NULL) {
16328082SRamaswamy.Tummala@Sun.COM /* unbind RDS service on all ports on this HCA */
16338082SRamaswamy.Tummala@Sun.COM for (ix = 0; ix < hcap->hca_nports; ix++) {
16348082SRamaswamy.Tummala@Sun.COM if (hcap->hca_bindhdl[ix] == NULL) {
16358082SRamaswamy.Tummala@Sun.COM continue;
16368082SRamaswamy.Tummala@Sun.COM }
16378082SRamaswamy.Tummala@Sun.COM
16388082SRamaswamy.Tummala@Sun.COM RDS_DPRINTF2("rdsib_del_hca",
16398082SRamaswamy.Tummala@Sun.COM "Unbinding Service: port: %d, bindhdl: %p",
16408082SRamaswamy.Tummala@Sun.COM ix + 1, hcap->hca_bindhdl[ix]);
16418082SRamaswamy.Tummala@Sun.COM (void) ibt_unbind_service(rdsib_statep->rds_srvhdl,
16428082SRamaswamy.Tummala@Sun.COM hcap->hca_bindhdl[ix]);
16438082SRamaswamy.Tummala@Sun.COM hcap->hca_bindhdl[ix] = NULL;
16448082SRamaswamy.Tummala@Sun.COM }
16458082SRamaswamy.Tummala@Sun.COM }
16468082SRamaswamy.Tummala@Sun.COM
16478082SRamaswamy.Tummala@Sun.COM RDS_DPRINTF2("rdsib_del_hca", "HCA(%p) State: %d", hcap,
16488082SRamaswamy.Tummala@Sun.COM hcap->hca_state);
16498082SRamaswamy.Tummala@Sun.COM
16508082SRamaswamy.Tummala@Sun.COM switch (saved_state) {
16518082SRamaswamy.Tummala@Sun.COM case RDS_HCA_STATE_MEM_REGISTERED:
16528082SRamaswamy.Tummala@Sun.COM ASSERT(hcap->hca_mrhdl != NULL);
16538082SRamaswamy.Tummala@Sun.COM ret = ibt_deregister_mr(hcap->hca_hdl, hcap->hca_mrhdl);
16548082SRamaswamy.Tummala@Sun.COM if (ret != IBT_SUCCESS) {
16558082SRamaswamy.Tummala@Sun.COM RDS_DPRINTF2("rdsib_del_hca",
16568082SRamaswamy.Tummala@Sun.COM "ibt_deregister_mr failed: %d", ret);
16578082SRamaswamy.Tummala@Sun.COM return;
16588082SRamaswamy.Tummala@Sun.COM }
16598082SRamaswamy.Tummala@Sun.COM hcap->hca_mrhdl = NULL;
16608082SRamaswamy.Tummala@Sun.COM /* FALLTHRU */
16618082SRamaswamy.Tummala@Sun.COM case RDS_HCA_STATE_OPEN:
16628082SRamaswamy.Tummala@Sun.COM ASSERT(hcap->hca_hdl != NULL);
16638082SRamaswamy.Tummala@Sun.COM ASSERT(hcap->hca_pdhdl != NULL);
16648082SRamaswamy.Tummala@Sun.COM
16658082SRamaswamy.Tummala@Sun.COM
16668082SRamaswamy.Tummala@Sun.COM ret = ibt_free_pd(hcap->hca_hdl, hcap->hca_pdhdl);
16678082SRamaswamy.Tummala@Sun.COM if (ret != IBT_SUCCESS) {
16688082SRamaswamy.Tummala@Sun.COM RDS_DPRINTF2("rdsib_del_hca",
16698082SRamaswamy.Tummala@Sun.COM "ibt_free_pd failed: %d", ret);
16708082SRamaswamy.Tummala@Sun.COM }
16718082SRamaswamy.Tummala@Sun.COM
16728082SRamaswamy.Tummala@Sun.COM (void) ibt_free_portinfo(hcap->hca_pinfop, hcap->hca_pinfo_sz);
16738082SRamaswamy.Tummala@Sun.COM
16748082SRamaswamy.Tummala@Sun.COM ret = ibt_close_hca(hcap->hca_hdl);
16758082SRamaswamy.Tummala@Sun.COM if (ret != IBT_SUCCESS) {
16768082SRamaswamy.Tummala@Sun.COM RDS_DPRINTF2("rdsib_del_hca",
16778082SRamaswamy.Tummala@Sun.COM "ibt_close_hca failed: %d", ret);
16788082SRamaswamy.Tummala@Sun.COM }
16798082SRamaswamy.Tummala@Sun.COM
16808082SRamaswamy.Tummala@Sun.COM hcap->hca_hdl = NULL;
16818082SRamaswamy.Tummala@Sun.COM hcap->hca_pdhdl = NULL;
16828082SRamaswamy.Tummala@Sun.COM hcap->hca_lkey = 0;
16838082SRamaswamy.Tummala@Sun.COM hcap->hca_rkey = 0;
16848082SRamaswamy.Tummala@Sun.COM }
16858082SRamaswamy.Tummala@Sun.COM
16868082SRamaswamy.Tummala@Sun.COM /*
16878082SRamaswamy.Tummala@Sun.COM * This should be a write lock as we don't want anyone to get access
16888082SRamaswamy.Tummala@Sun.COM * to the hcap while we are modifing its contents
16898082SRamaswamy.Tummala@Sun.COM */
16908082SRamaswamy.Tummala@Sun.COM rw_enter(&statep->rds_hca_lock, RW_WRITER);
16918082SRamaswamy.Tummala@Sun.COM hcap->hca_state = RDS_HCA_STATE_REMOVED;
16928082SRamaswamy.Tummala@Sun.COM rw_exit(&statep->rds_hca_lock);
16938082SRamaswamy.Tummala@Sun.COM
16948082SRamaswamy.Tummala@Sun.COM RDS_DPRINTF2("rdsib_del_hca", "Return: GUID: 0x%llx", hca_guid);
16958082SRamaswamy.Tummala@Sun.COM }
16968082SRamaswamy.Tummala@Sun.COM
16978082SRamaswamy.Tummala@Sun.COM static void
rds_async_handler(void * clntp,ibt_hca_hdl_t hdl,ibt_async_code_t code,ibt_async_event_t * event)16983302Sagiri rds_async_handler(void *clntp, ibt_hca_hdl_t hdl, ibt_async_code_t code,
16993302Sagiri ibt_async_event_t *event)
17003302Sagiri {
17018082SRamaswamy.Tummala@Sun.COM rds_state_t *statep = (rds_state_t *)clntp;
17023302Sagiri
17033302Sagiri RDS_DPRINTF2("rds_async_handler", "Async code: %d", code);
17043302Sagiri
17053302Sagiri switch (code) {
17063302Sagiri case IBT_EVENT_PORT_UP:
17073302Sagiri rds_handle_portup_event(statep, hdl, event);
17083302Sagiri break;
17098082SRamaswamy.Tummala@Sun.COM case IBT_HCA_ATTACH_EVENT:
17108082SRamaswamy.Tummala@Sun.COM /*
17118082SRamaswamy.Tummala@Sun.COM * NOTE: In some error recovery paths, it is possible to
17128082SRamaswamy.Tummala@Sun.COM * receive IBT_HCA_ATTACH_EVENTs on already known HCAs.
17138082SRamaswamy.Tummala@Sun.COM */
17148082SRamaswamy.Tummala@Sun.COM (void) rdsib_add_hca(event->ev_hca_guid);
17158082SRamaswamy.Tummala@Sun.COM break;
17168082SRamaswamy.Tummala@Sun.COM case IBT_HCA_DETACH_EVENT:
17178082SRamaswamy.Tummala@Sun.COM (void) rdsib_del_hca(statep, event->ev_hca_guid);
17188082SRamaswamy.Tummala@Sun.COM break;
17193302Sagiri
17203302Sagiri default:
17213302Sagiri RDS_DPRINTF2(LABEL, "Async event: %d not handled", code);
17223302Sagiri }
17233302Sagiri
17243302Sagiri RDS_DPRINTF2("rds_async_handler", "Return: code: %d", code);
17253302Sagiri }
172610489SGiri.Adari@Sun.COM
172710489SGiri.Adari@Sun.COM /*
172810489SGiri.Adari@Sun.COM * This routine exists to minimize stale connections across ungraceful
172910489SGiri.Adari@Sun.COM * reboots of nodes in a cluster.
173010489SGiri.Adari@Sun.COM */
173110489SGiri.Adari@Sun.COM void
rds_randomize_qps(rds_hca_t * hcap)173210489SGiri.Adari@Sun.COM rds_randomize_qps(rds_hca_t *hcap)
173310489SGiri.Adari@Sun.COM {
173410489SGiri.Adari@Sun.COM ibt_cq_attr_t cqattr;
173510489SGiri.Adari@Sun.COM ibt_rc_chan_alloc_args_t chanargs;
173610489SGiri.Adari@Sun.COM ibt_channel_hdl_t qp1, qp2;
173710489SGiri.Adari@Sun.COM ibt_cq_hdl_t cq_hdl;
173810489SGiri.Adari@Sun.COM hrtime_t nsec;
173910489SGiri.Adari@Sun.COM uint8_t i, j, rand1, rand2;
174010489SGiri.Adari@Sun.COM int ret;
174110489SGiri.Adari@Sun.COM
174210489SGiri.Adari@Sun.COM bzero(&cqattr, sizeof (ibt_cq_attr_t));
174310489SGiri.Adari@Sun.COM cqattr.cq_size = 1;
174410489SGiri.Adari@Sun.COM cqattr.cq_sched = NULL;
174510489SGiri.Adari@Sun.COM cqattr.cq_flags = IBT_CQ_NO_FLAGS;
174610489SGiri.Adari@Sun.COM ret = ibt_alloc_cq(hcap->hca_hdl, &cqattr, &cq_hdl, NULL);
174710489SGiri.Adari@Sun.COM if (ret != IBT_SUCCESS) {
174810489SGiri.Adari@Sun.COM RDS_DPRINTF2("rds_randomize_qps",
174910489SGiri.Adari@Sun.COM "ibt_alloc_cq failed: %d", ret);
175010489SGiri.Adari@Sun.COM return;
175110489SGiri.Adari@Sun.COM }
175210489SGiri.Adari@Sun.COM
175310489SGiri.Adari@Sun.COM bzero(&chanargs, sizeof (ibt_rc_chan_alloc_args_t));
175410489SGiri.Adari@Sun.COM chanargs.rc_flags = IBT_ALL_SIGNALED;
175510489SGiri.Adari@Sun.COM chanargs.rc_control = IBT_CEP_RDMA_RD | IBT_CEP_RDMA_WR |
175610489SGiri.Adari@Sun.COM IBT_CEP_ATOMIC;
175710489SGiri.Adari@Sun.COM chanargs.rc_hca_port_num = 1;
175810489SGiri.Adari@Sun.COM chanargs.rc_scq = cq_hdl;
175910489SGiri.Adari@Sun.COM chanargs.rc_rcq = cq_hdl;
176010489SGiri.Adari@Sun.COM chanargs.rc_pd = hcap->hca_pdhdl;
176110489SGiri.Adari@Sun.COM chanargs.rc_srq = NULL;
176210489SGiri.Adari@Sun.COM
176310489SGiri.Adari@Sun.COM nsec = gethrtime();
176410489SGiri.Adari@Sun.COM rand1 = (nsec & 0xF);
176510489SGiri.Adari@Sun.COM rand2 = (nsec >> 4) & 0xF;
176610489SGiri.Adari@Sun.COM RDS_DPRINTF2("rds_randomize_qps", "rand1: %d rand2: %d",
176710489SGiri.Adari@Sun.COM rand1, rand2);
176810489SGiri.Adari@Sun.COM
176910489SGiri.Adari@Sun.COM for (i = 0; i < rand1 + 3; i++) {
177010489SGiri.Adari@Sun.COM if (ibt_alloc_rc_channel(hcap->hca_hdl,
177110489SGiri.Adari@Sun.COM IBT_ACHAN_NO_FLAGS, &chanargs, &qp1, NULL) !=
177210489SGiri.Adari@Sun.COM IBT_SUCCESS) {
177310489SGiri.Adari@Sun.COM RDS_DPRINTF2("rds_randomize_qps",
177410489SGiri.Adari@Sun.COM "Bailing at i: %d", i);
177510489SGiri.Adari@Sun.COM (void) ibt_free_cq(cq_hdl);
177610489SGiri.Adari@Sun.COM return;
177710489SGiri.Adari@Sun.COM }
177810489SGiri.Adari@Sun.COM for (j = 0; j < rand2 + 3; j++) {
177910489SGiri.Adari@Sun.COM if (ibt_alloc_rc_channel(hcap->hca_hdl,
178010489SGiri.Adari@Sun.COM IBT_ACHAN_NO_FLAGS, &chanargs, &qp2,
178110489SGiri.Adari@Sun.COM NULL) != IBT_SUCCESS) {
178210489SGiri.Adari@Sun.COM RDS_DPRINTF2("rds_randomize_qps",
178310489SGiri.Adari@Sun.COM "Bailing at i: %d j: %d", i, j);
178410489SGiri.Adari@Sun.COM (void) ibt_free_channel(qp1);
178510489SGiri.Adari@Sun.COM (void) ibt_free_cq(cq_hdl);
178610489SGiri.Adari@Sun.COM return;
178710489SGiri.Adari@Sun.COM }
178810489SGiri.Adari@Sun.COM (void) ibt_free_channel(qp2);
178910489SGiri.Adari@Sun.COM }
179010489SGiri.Adari@Sun.COM (void) ibt_free_channel(qp1);
179110489SGiri.Adari@Sun.COM }
179210489SGiri.Adari@Sun.COM
179310489SGiri.Adari@Sun.COM (void) ibt_free_cq(cq_hdl);
179410489SGiri.Adari@Sun.COM }
1795