1c0dd49bdSEiji Ota /*
2c0dd49bdSEiji Ota * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
3c0dd49bdSEiji Ota */
4c0dd49bdSEiji Ota
5c0dd49bdSEiji Ota /*
616e76cddSagiri * This file contains code imported from the OFED rds source file ib.c
716e76cddSagiri * Oracle elects to have and use the contents of ib.c under and governed
816e76cddSagiri * by the OpenIB.org BSD license (see below for full license text). However,
916e76cddSagiri * the following notice accompanied the original version of this file:
1016e76cddSagiri */
1116e76cddSagiri
1216e76cddSagiri /*
13c0dd49bdSEiji Ota * Copyright (c) 2006 Oracle. All rights reserved.
14c0dd49bdSEiji Ota *
15c0dd49bdSEiji Ota * This software is available to you under a choice of one of two
16c0dd49bdSEiji Ota * licenses. You may choose to be licensed under the terms of the GNU
17c0dd49bdSEiji Ota * General Public License (GPL) Version 2, available from the file
18c0dd49bdSEiji Ota * COPYING in the main directory of this source tree, or the
19c0dd49bdSEiji Ota * OpenIB.org BSD license below:
20c0dd49bdSEiji Ota *
21c0dd49bdSEiji Ota * Redistribution and use in source and binary forms, with or
22c0dd49bdSEiji Ota * without modification, are permitted provided that the following
23c0dd49bdSEiji Ota * conditions are met:
24c0dd49bdSEiji Ota *
25c0dd49bdSEiji Ota * - Redistributions of source code must retain the above
26c0dd49bdSEiji Ota * copyright notice, this list of conditions and the following
27c0dd49bdSEiji Ota * disclaimer.
28c0dd49bdSEiji Ota *
29c0dd49bdSEiji Ota * - Redistributions in binary form must reproduce the above
30c0dd49bdSEiji Ota * copyright notice, this list of conditions and the following
31c0dd49bdSEiji Ota * disclaimer in the documentation and/or other materials
32c0dd49bdSEiji Ota * provided with the distribution.
33c0dd49bdSEiji Ota *
34c0dd49bdSEiji Ota * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
35c0dd49bdSEiji Ota * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
36c0dd49bdSEiji Ota * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
37c0dd49bdSEiji Ota * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
38c0dd49bdSEiji Ota * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
39c0dd49bdSEiji Ota * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
40c0dd49bdSEiji Ota * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
41c0dd49bdSEiji Ota * SOFTWARE.
42c0dd49bdSEiji Ota *
43c0dd49bdSEiji Ota */
44c0dd49bdSEiji Ota #include <sys/sysmacros.h>
45c0dd49bdSEiji Ota #include <sys/rds.h>
46c0dd49bdSEiji Ota
47c0dd49bdSEiji Ota #include <sys/ib/ibtl/ibti.h>
48c0dd49bdSEiji Ota #include <sys/ib/clients/rdsv3/rdsv3.h>
49c0dd49bdSEiji Ota #include <sys/ib/clients/rdsv3/ib.h>
50c0dd49bdSEiji Ota #include <sys/ib/clients/rdsv3/rdsv3_debug.h>
51c0dd49bdSEiji Ota
52c0dd49bdSEiji Ota unsigned int rdsv3_ib_retry_count = RDSV3_IB_DEFAULT_RETRY_COUNT;
53c0dd49bdSEiji Ota
54c0dd49bdSEiji Ota struct list rdsv3_ib_devices;
55c0dd49bdSEiji Ota
56c0dd49bdSEiji Ota /* NOTE: if also grabbing ibdev lock, grab this first */
57c0dd49bdSEiji Ota kmutex_t ib_nodev_conns_lock;
58c0dd49bdSEiji Ota list_t ib_nodev_conns;
59c0dd49bdSEiji Ota
60d2b539e7Sagiri extern int rdsv3_ib_frag_constructor(void *buf, void *arg, int kmflags);
61d2b539e7Sagiri extern void rdsv3_ib_frag_destructor(void *buf, void *arg);
62d2b539e7Sagiri
63c0dd49bdSEiji Ota void
rdsv3_ib_add_one(ib_device_t * device)64c0dd49bdSEiji Ota rdsv3_ib_add_one(ib_device_t *device)
65c0dd49bdSEiji Ota {
66c0dd49bdSEiji Ota struct rdsv3_ib_device *rds_ibdev;
67c0dd49bdSEiji Ota ibt_hca_attr_t *dev_attr;
68d2b539e7Sagiri char name[64];
69c0dd49bdSEiji Ota
70b27516f5Sagiri RDSV3_DPRINTF2("rdsv3_ib_add_one", "device: %p", device);
71c0dd49bdSEiji Ota
72c0dd49bdSEiji Ota /* Only handle IB (no iWARP) devices */
73c0dd49bdSEiji Ota if (device->node_type != RDMA_NODE_IB_CA)
74c0dd49bdSEiji Ota return;
75c0dd49bdSEiji Ota
76c0dd49bdSEiji Ota dev_attr = (ibt_hca_attr_t *)kmem_alloc(sizeof (*dev_attr),
77c0dd49bdSEiji Ota KM_NOSLEEP);
78c0dd49bdSEiji Ota if (!dev_attr)
79c0dd49bdSEiji Ota return;
80c0dd49bdSEiji Ota
81c0dd49bdSEiji Ota if (ibt_query_hca(ib_get_ibt_hca_hdl(device), dev_attr)) {
82d2b539e7Sagiri RDSV3_DPRINTF2("rdsv3_ib_add_one",
83c0dd49bdSEiji Ota "Query device failed for %s", device->name);
84c0dd49bdSEiji Ota goto free_attr;
85c0dd49bdSEiji Ota }
86c0dd49bdSEiji Ota
87c0dd49bdSEiji Ota /* We depend on Reserved Lkey */
88c0dd49bdSEiji Ota if (!(dev_attr->hca_flags2 & IBT_HCA2_RES_LKEY)) {
89d2b539e7Sagiri RDSV3_DPRINTF2("rdsv3_ib_add_one",
90c0dd49bdSEiji Ota "Reserved Lkey support is required: %s",
91c0dd49bdSEiji Ota device->name);
92c0dd49bdSEiji Ota goto free_attr;
93c0dd49bdSEiji Ota }
94c0dd49bdSEiji Ota
95c0dd49bdSEiji Ota rds_ibdev = kmem_zalloc(sizeof (*rds_ibdev), KM_NOSLEEP);
96c0dd49bdSEiji Ota if (!rds_ibdev)
97c0dd49bdSEiji Ota goto free_attr;
98c0dd49bdSEiji Ota
995d5562f5SEiji Ota rds_ibdev->ibt_hca_hdl = ib_get_ibt_hca_hdl(device);
1005d5562f5SEiji Ota rds_ibdev->hca_attr = *dev_attr;
1015d5562f5SEiji Ota
1025d5562f5SEiji Ota rw_init(&rds_ibdev->rwlock, NULL, RW_DRIVER, NULL);
103c0dd49bdSEiji Ota mutex_init(&rds_ibdev->spinlock, NULL, MUTEX_DRIVER, NULL);
104c0dd49bdSEiji Ota
105c0dd49bdSEiji Ota rds_ibdev->max_wrs = dev_attr->hca_max_chan_sz;
106c0dd49bdSEiji Ota rds_ibdev->max_sge = min(dev_attr->hca_max_sgl, RDSV3_IB_MAX_SGE);
107c0dd49bdSEiji Ota
1085d5562f5SEiji Ota rds_ibdev->max_initiator_depth = (uint_t)dev_attr->hca_max_rdma_in_qp;
1095d5562f5SEiji Ota rds_ibdev->max_responder_resources =
1105d5562f5SEiji Ota (uint_t)dev_attr->hca_max_rdma_in_qp;
1115d5562f5SEiji Ota
112c0dd49bdSEiji Ota rds_ibdev->dev = device;
113c0dd49bdSEiji Ota rds_ibdev->pd = ib_alloc_pd(device);
114c0dd49bdSEiji Ota if (IS_ERR(rds_ibdev->pd))
115c0dd49bdSEiji Ota goto free_dev;
116c0dd49bdSEiji Ota
117c0dd49bdSEiji Ota if (rdsv3_ib_create_mr_pool(rds_ibdev) != 0) {
118c0dd49bdSEiji Ota goto free_dev;
119c0dd49bdSEiji Ota }
120c0dd49bdSEiji Ota
1215d5562f5SEiji Ota if (rdsv3_ib_create_inc_pool(rds_ibdev) != 0) {
1225d5562f5SEiji Ota rdsv3_ib_destroy_mr_pool(rds_ibdev);
1235d5562f5SEiji Ota goto free_dev;
1245d5562f5SEiji Ota }
1255d5562f5SEiji Ota
126d2b539e7Sagiri (void) snprintf(name, 64, "RDSV3_IB_FRAG_%llx",
127d2b539e7Sagiri (longlong_t)htonll(dev_attr->hca_node_guid));
128d2b539e7Sagiri rds_ibdev->ib_frag_slab = kmem_cache_create(name,
129d2b539e7Sagiri sizeof (struct rdsv3_page_frag), 0, rdsv3_ib_frag_constructor,
130d2b539e7Sagiri rdsv3_ib_frag_destructor, NULL, (void *)rds_ibdev, NULL, 0);
131d2b539e7Sagiri if (rds_ibdev->ib_frag_slab == NULL) {
132d2b539e7Sagiri RDSV3_DPRINTF2("rdsv3_ib_add_one",
133d2b539e7Sagiri "kmem_cache_create for ib_frag_slab failed for device: %s",
134d2b539e7Sagiri device->name);
135d2b539e7Sagiri rdsv3_ib_destroy_mr_pool(rds_ibdev);
1365d5562f5SEiji Ota rdsv3_ib_destroy_inc_pool(rds_ibdev);
137d2b539e7Sagiri goto free_dev;
138d2b539e7Sagiri }
139d2b539e7Sagiri
1405d5562f5SEiji Ota rds_ibdev->aft_hcagp = rdsv3_af_grp_create(rds_ibdev->ibt_hca_hdl,
1415d5562f5SEiji Ota (uint64_t)rds_ibdev->hca_attr.hca_node_guid);
1425d5562f5SEiji Ota if (rds_ibdev->aft_hcagp == NULL) {
1435d5562f5SEiji Ota rdsv3_ib_destroy_mr_pool(rds_ibdev);
1445d5562f5SEiji Ota rdsv3_ib_destroy_inc_pool(rds_ibdev);
1455d5562f5SEiji Ota kmem_cache_destroy(rds_ibdev->ib_frag_slab);
1465d5562f5SEiji Ota goto free_dev;
1475d5562f5SEiji Ota }
1485d5562f5SEiji Ota rds_ibdev->fmr_soft_cq = rdsv3_af_thr_create(rdsv3_ib_drain_mrlist_fn,
1495d5562f5SEiji Ota (void *)rds_ibdev->fmr_pool, SCQ_HCA_BIND_CPU,
1505d5562f5SEiji Ota rds_ibdev->aft_hcagp);
1515d5562f5SEiji Ota if (rds_ibdev->fmr_soft_cq == NULL) {
1525d5562f5SEiji Ota rdsv3_af_grp_destroy(rds_ibdev->aft_hcagp);
1535d5562f5SEiji Ota rdsv3_ib_destroy_mr_pool(rds_ibdev);
1545d5562f5SEiji Ota rdsv3_ib_destroy_inc_pool(rds_ibdev);
1555d5562f5SEiji Ota kmem_cache_destroy(rds_ibdev->ib_frag_slab);
1565d5562f5SEiji Ota goto free_dev;
1575d5562f5SEiji Ota }
1585d5562f5SEiji Ota
1595d5562f5SEiji Ota rds_ibdev->inc_soft_cq = rdsv3_af_thr_create(rdsv3_ib_drain_inclist,
1605d5562f5SEiji Ota (void *)rds_ibdev->inc_pool, SCQ_HCA_BIND_CPU,
1615d5562f5SEiji Ota rds_ibdev->aft_hcagp);
1625d5562f5SEiji Ota if (rds_ibdev->inc_soft_cq == NULL) {
1635d5562f5SEiji Ota rdsv3_af_thr_destroy(rds_ibdev->fmr_soft_cq);
1645d5562f5SEiji Ota rdsv3_af_grp_destroy(rds_ibdev->aft_hcagp);
1655d5562f5SEiji Ota rdsv3_ib_destroy_mr_pool(rds_ibdev);
1665d5562f5SEiji Ota rdsv3_ib_destroy_inc_pool(rds_ibdev);
1675d5562f5SEiji Ota kmem_cache_destroy(rds_ibdev->ib_frag_slab);
1685d5562f5SEiji Ota goto free_dev;
1695d5562f5SEiji Ota }
170d2b539e7Sagiri
171c0dd49bdSEiji Ota list_create(&rds_ibdev->ipaddr_list, sizeof (struct rdsv3_ib_ipaddr),
172c0dd49bdSEiji Ota offsetof(struct rdsv3_ib_ipaddr, list));
173c0dd49bdSEiji Ota list_create(&rds_ibdev->conn_list, sizeof (struct rdsv3_ib_connection),
174c0dd49bdSEiji Ota offsetof(struct rdsv3_ib_connection, ib_node));
175c0dd49bdSEiji Ota
176c0dd49bdSEiji Ota list_insert_tail(&rdsv3_ib_devices, rds_ibdev);
177c0dd49bdSEiji Ota
178c0dd49bdSEiji Ota ib_set_client_data(device, &rdsv3_ib_client, rds_ibdev);
179c0dd49bdSEiji Ota
180b27516f5Sagiri RDSV3_DPRINTF2("rdsv3_ib_add_one", "Return: device: %p", device);
181c0dd49bdSEiji Ota
182c0dd49bdSEiji Ota goto free_attr;
183c0dd49bdSEiji Ota
184c0dd49bdSEiji Ota err_pd:
185c0dd49bdSEiji Ota (void) ib_dealloc_pd(rds_ibdev->pd);
186c0dd49bdSEiji Ota free_dev:
1875d5562f5SEiji Ota mutex_destroy(&rds_ibdev->spinlock);
1885d5562f5SEiji Ota rw_destroy(&rds_ibdev->rwlock);
189c0dd49bdSEiji Ota kmem_free(rds_ibdev, sizeof (*rds_ibdev));
190c0dd49bdSEiji Ota free_attr:
191c0dd49bdSEiji Ota kmem_free(dev_attr, sizeof (*dev_attr));
192c0dd49bdSEiji Ota }
193c0dd49bdSEiji Ota
194c0dd49bdSEiji Ota void
rdsv3_ib_remove_one(struct ib_device * device)195c0dd49bdSEiji Ota rdsv3_ib_remove_one(struct ib_device *device)
196c0dd49bdSEiji Ota {
197c0dd49bdSEiji Ota struct rdsv3_ib_device *rds_ibdev;
198c0dd49bdSEiji Ota struct rdsv3_ib_ipaddr *i_ipaddr, *i_next;
199c0dd49bdSEiji Ota
200b27516f5Sagiri RDSV3_DPRINTF2("rdsv3_ib_remove_one", "device: %p", device);
201c0dd49bdSEiji Ota
202c0dd49bdSEiji Ota rds_ibdev = ib_get_client_data(device, &rdsv3_ib_client);
203c0dd49bdSEiji Ota if (!rds_ibdev)
204c0dd49bdSEiji Ota return;
205c0dd49bdSEiji Ota
206c0dd49bdSEiji Ota RDSV3_FOR_EACH_LIST_NODE_SAFE(i_ipaddr, i_next, &rds_ibdev->ipaddr_list,
207c0dd49bdSEiji Ota list) {
208c0dd49bdSEiji Ota list_remove_node(&i_ipaddr->list);
209c0dd49bdSEiji Ota kmem_free(i_ipaddr, sizeof (*i_ipaddr));
210c0dd49bdSEiji Ota }
211c0dd49bdSEiji Ota
212c0dd49bdSEiji Ota rdsv3_ib_destroy_conns(rds_ibdev);
213c0dd49bdSEiji Ota
2145d5562f5SEiji Ota if (rds_ibdev->fmr_soft_cq)
2155d5562f5SEiji Ota rdsv3_af_thr_destroy(rds_ibdev->fmr_soft_cq);
2165d5562f5SEiji Ota if (rds_ibdev->inc_soft_cq)
2175d5562f5SEiji Ota rdsv3_af_thr_destroy(rds_ibdev->inc_soft_cq);
2185d5562f5SEiji Ota
219c0dd49bdSEiji Ota rdsv3_ib_destroy_mr_pool(rds_ibdev);
2205d5562f5SEiji Ota rdsv3_ib_destroy_inc_pool(rds_ibdev);
221c0dd49bdSEiji Ota
222d2b539e7Sagiri kmem_cache_destroy(rds_ibdev->ib_frag_slab);
223d2b539e7Sagiri
2245d5562f5SEiji Ota rdsv3_af_grp_destroy(rds_ibdev->aft_hcagp);
2255d5562f5SEiji Ota
226c0dd49bdSEiji Ota #if 0
227c0dd49bdSEiji Ota while (ib_dealloc_pd(rds_ibdev->pd)) {
228c0dd49bdSEiji Ota #ifndef __lock_lint
229c0dd49bdSEiji Ota RDSV3_DPRINTF5("rdsv3_ib_remove_one",
230c0dd49bdSEiji Ota "%s-%d Failed to dealloc pd %p",
231c0dd49bdSEiji Ota __func__, __LINE__, rds_ibdev->pd);
232c0dd49bdSEiji Ota #endif
233c0dd49bdSEiji Ota delay(drv_usectohz(1000));
234c0dd49bdSEiji Ota }
235c0dd49bdSEiji Ota #else
236c0dd49bdSEiji Ota if (ib_dealloc_pd(rds_ibdev->pd)) {
237c0dd49bdSEiji Ota #ifndef __lock_lint
238c0dd49bdSEiji Ota RDSV3_DPRINTF2("rdsv3_ib_remove_one",
239cadbfdc3SEiji Ota "Failed to dealloc pd %p\n", rds_ibdev->pd);
240c0dd49bdSEiji Ota #endif
241c0dd49bdSEiji Ota }
242c0dd49bdSEiji Ota #endif
243c0dd49bdSEiji Ota
244c0dd49bdSEiji Ota list_destroy(&rds_ibdev->ipaddr_list);
245c0dd49bdSEiji Ota list_destroy(&rds_ibdev->conn_list);
246c0dd49bdSEiji Ota list_remove_node(&rds_ibdev->list);
2475d5562f5SEiji Ota mutex_destroy(&rds_ibdev->spinlock);
2485d5562f5SEiji Ota rw_destroy(&rds_ibdev->rwlock);
249c0dd49bdSEiji Ota kmem_free(rds_ibdev, sizeof (*rds_ibdev));
250c0dd49bdSEiji Ota
251b27516f5Sagiri RDSV3_DPRINTF2("rdsv3_ib_remove_one", "Return: device: %p", device);
252c0dd49bdSEiji Ota }
253c0dd49bdSEiji Ota
254c0dd49bdSEiji Ota #ifndef __lock_lint
255c0dd49bdSEiji Ota struct ib_client rdsv3_ib_client = {
256c0dd49bdSEiji Ota .name = "rdsv3_ib",
257c0dd49bdSEiji Ota .add = rdsv3_ib_add_one,
258c0dd49bdSEiji Ota .remove = rdsv3_ib_remove_one,
259c0dd49bdSEiji Ota .clnt_hdl = NULL,
260c0dd49bdSEiji Ota .state = IB_CLNT_UNINITIALIZED
261c0dd49bdSEiji Ota };
262c0dd49bdSEiji Ota #else
263c0dd49bdSEiji Ota struct ib_client rdsv3_ib_client = {
264c0dd49bdSEiji Ota "rdsv3_ib",
265c0dd49bdSEiji Ota rdsv3_ib_add_one,
266c0dd49bdSEiji Ota rdsv3_ib_remove_one,
267c0dd49bdSEiji Ota NULL,
268c0dd49bdSEiji Ota NULL,
269c0dd49bdSEiji Ota IB_CLNT_UNINITIALIZED
270c0dd49bdSEiji Ota };
271c0dd49bdSEiji Ota #endif
272c0dd49bdSEiji Ota
273c0dd49bdSEiji Ota static int
rds_ib_conn_info_visitor(struct rdsv3_connection * conn,void * buffer)274c0dd49bdSEiji Ota rds_ib_conn_info_visitor(struct rdsv3_connection *conn,
275c0dd49bdSEiji Ota void *buffer)
276c0dd49bdSEiji Ota {
277fe817b60SEiji Ota struct rds_info_rdma_connection *iinfo = buffer;
278c0dd49bdSEiji Ota struct rdsv3_ib_connection *ic;
279c0dd49bdSEiji Ota
280c0dd49bdSEiji Ota RDSV3_DPRINTF4("rds_ib_conn_info_visitor", "conn: %p buffer: %p",
281c0dd49bdSEiji Ota conn, buffer);
282c0dd49bdSEiji Ota
283c0dd49bdSEiji Ota /* We will only ever look at IB transports */
284c0dd49bdSEiji Ota if (conn->c_trans != &rdsv3_ib_transport)
285c0dd49bdSEiji Ota return (0);
286c0dd49bdSEiji Ota
287c0dd49bdSEiji Ota iinfo->src_addr = conn->c_laddr;
288c0dd49bdSEiji Ota iinfo->dst_addr = conn->c_faddr;
289c0dd49bdSEiji Ota
290c0dd49bdSEiji Ota (void) memset(&iinfo->src_gid, 0, sizeof (iinfo->src_gid));
291c0dd49bdSEiji Ota (void) memset(&iinfo->dst_gid, 0, sizeof (iinfo->dst_gid));
292c0dd49bdSEiji Ota if (rdsv3_conn_state(conn) == RDSV3_CONN_UP) {
293c0dd49bdSEiji Ota struct rdsv3_ib_device *rds_ibdev;
294c0dd49bdSEiji Ota struct rdma_dev_addr *dev_addr;
295c0dd49bdSEiji Ota
296c0dd49bdSEiji Ota ic = conn->c_transport_data;
297c0dd49bdSEiji Ota dev_addr = &ic->i_cm_id->route.addr.dev_addr;
298c0dd49bdSEiji Ota
299c0dd49bdSEiji Ota ib_addr_get_sgid(dev_addr, (union ib_gid *)&iinfo->src_gid);
300c0dd49bdSEiji Ota ib_addr_get_dgid(dev_addr, (union ib_gid *)&iinfo->dst_gid);
301c0dd49bdSEiji Ota
302c0dd49bdSEiji Ota rds_ibdev = ib_get_client_data(ic->i_cm_id->device,
303c0dd49bdSEiji Ota &rdsv3_ib_client);
304c0dd49bdSEiji Ota iinfo->max_send_wr = ic->i_send_ring.w_nr;
305c0dd49bdSEiji Ota iinfo->max_recv_wr = ic->i_recv_ring.w_nr;
306c0dd49bdSEiji Ota iinfo->max_send_sge = rds_ibdev->max_sge;
307c0dd49bdSEiji Ota }
308c0dd49bdSEiji Ota
309c0dd49bdSEiji Ota RDSV3_DPRINTF4("rds_ib_conn_info_visitor", "conn: %p buffer: %p",
310c0dd49bdSEiji Ota conn, buffer);
311c0dd49bdSEiji Ota return (1);
312c0dd49bdSEiji Ota }
313c0dd49bdSEiji Ota
314c0dd49bdSEiji Ota static void
rds_ib_ic_info(struct rsock * sock,unsigned int len,struct rdsv3_info_iterator * iter,struct rdsv3_info_lengths * lens)315c0dd49bdSEiji Ota rds_ib_ic_info(struct rsock *sock, unsigned int len,
316c0dd49bdSEiji Ota struct rdsv3_info_iterator *iter,
317c0dd49bdSEiji Ota struct rdsv3_info_lengths *lens)
318c0dd49bdSEiji Ota {
319c0dd49bdSEiji Ota RDSV3_DPRINTF4("rds_ib_ic_info", "sk: %p iter: %p, lens: %p, len: %d",
320c0dd49bdSEiji Ota sock, iter, lens, len);
321c0dd49bdSEiji Ota
322c0dd49bdSEiji Ota rdsv3_for_each_conn_info(sock, len, iter, lens,
323c0dd49bdSEiji Ota rds_ib_conn_info_visitor,
324fe817b60SEiji Ota sizeof (struct rds_info_rdma_connection));
325c0dd49bdSEiji Ota }
326c0dd49bdSEiji Ota
327c0dd49bdSEiji Ota /*
328c0dd49bdSEiji Ota * Early RDS/IB was built to only bind to an address if there is an IPoIB
329c0dd49bdSEiji Ota * device with that address set.
330c0dd49bdSEiji Ota *
331c0dd49bdSEiji Ota * If it were me, I'd advocate for something more flexible. Sending and
332c0dd49bdSEiji Ota * receiving should be device-agnostic. Transports would try and maintain
333c0dd49bdSEiji Ota * connections between peers who have messages queued. Userspace would be
334c0dd49bdSEiji Ota * allowed to influence which paths have priority. We could call userspace
335c0dd49bdSEiji Ota * asserting this policy "routing".
336c0dd49bdSEiji Ota */
337c0dd49bdSEiji Ota static int
rds_ib_laddr_check(uint32_be_t addr)338c0dd49bdSEiji Ota rds_ib_laddr_check(uint32_be_t addr)
339c0dd49bdSEiji Ota {
340c0dd49bdSEiji Ota int ret;
341c0dd49bdSEiji Ota struct rdma_cm_id *cm_id;
342c0dd49bdSEiji Ota struct sockaddr_in sin;
343c0dd49bdSEiji Ota
344c0dd49bdSEiji Ota RDSV3_DPRINTF4("rds_ib_laddr_check", "addr: %x", ntohl(addr));
345c0dd49bdSEiji Ota
346c0dd49bdSEiji Ota /*
347c0dd49bdSEiji Ota * Create a CMA ID and try to bind it. This catches both
348c0dd49bdSEiji Ota * IB and iWARP capable NICs.
349c0dd49bdSEiji Ota */
350c0dd49bdSEiji Ota cm_id = rdma_create_id(NULL, NULL, RDMA_PS_TCP);
351cadbfdc3SEiji Ota if (!cm_id)
352cadbfdc3SEiji Ota return (-EADDRNOTAVAIL);
353c0dd49bdSEiji Ota
354c0dd49bdSEiji Ota (void) memset(&sin, 0, sizeof (sin));
355c0dd49bdSEiji Ota sin.sin_family = AF_INET;
356c0dd49bdSEiji Ota sin.sin_addr.s_addr = rdsv3_scaddr_to_ibaddr(addr);
357c0dd49bdSEiji Ota
358c0dd49bdSEiji Ota /* rdma_bind_addr will only succeed for IB & iWARP devices */
359c0dd49bdSEiji Ota ret = rdma_bind_addr(cm_id, (struct sockaddr *)&sin);
360c0dd49bdSEiji Ota /*
361c0dd49bdSEiji Ota * due to this, we will claim to support iWARP devices unless we
362c0dd49bdSEiji Ota * check node_type.
363c0dd49bdSEiji Ota */
364c0dd49bdSEiji Ota if (ret || cm_id->device->node_type != RDMA_NODE_IB_CA)
365c0dd49bdSEiji Ota ret = -EADDRNOTAVAIL;
366c0dd49bdSEiji Ota
367c0dd49bdSEiji Ota RDSV3_DPRINTF5("rds_ib_laddr_check",
368c0dd49bdSEiji Ota "addr %u.%u.%u.%u ret %d node type %d",
369c0dd49bdSEiji Ota NIPQUAD(addr), ret,
370c0dd49bdSEiji Ota cm_id->device ? cm_id->device->node_type : -1);
371c0dd49bdSEiji Ota
372c0dd49bdSEiji Ota rdma_destroy_id(cm_id);
373c0dd49bdSEiji Ota
374c0dd49bdSEiji Ota return (ret);
375c0dd49bdSEiji Ota }
376c0dd49bdSEiji Ota
377c0dd49bdSEiji Ota void
rdsv3_ib_exit(void)378c0dd49bdSEiji Ota rdsv3_ib_exit(void)
379c0dd49bdSEiji Ota {
380c0dd49bdSEiji Ota RDSV3_DPRINTF4("rds_ib_exit", "Enter");
381c0dd49bdSEiji Ota
382fe817b60SEiji Ota rdsv3_info_deregister_func(RDS_INFO_IB_CONNECTIONS, rds_ib_ic_info);
383c0dd49bdSEiji Ota rdsv3_ib_destroy_nodev_conns();
384c0dd49bdSEiji Ota ib_unregister_client(&rdsv3_ib_client);
385c0dd49bdSEiji Ota rdsv3_ib_sysctl_exit();
386c0dd49bdSEiji Ota rdsv3_ib_recv_exit();
387c0dd49bdSEiji Ota rdsv3_trans_unregister(&rdsv3_ib_transport);
388*a530e0a9Sagiri kmem_free(rdsv3_ib_stats,
389*a530e0a9Sagiri nr_cpus * sizeof (struct rdsv3_ib_statistics));
390c0dd49bdSEiji Ota mutex_destroy(&ib_nodev_conns_lock);
391c0dd49bdSEiji Ota list_destroy(&ib_nodev_conns);
392c0dd49bdSEiji Ota list_destroy(&rdsv3_ib_devices);
393c0dd49bdSEiji Ota
394c0dd49bdSEiji Ota RDSV3_DPRINTF4("rds_ib_exit", "Return");
395c0dd49bdSEiji Ota }
396c0dd49bdSEiji Ota
397c0dd49bdSEiji Ota #ifndef __lock_lint
398c0dd49bdSEiji Ota struct rdsv3_transport rdsv3_ib_transport = {
399c0dd49bdSEiji Ota .laddr_check = rds_ib_laddr_check,
400c0dd49bdSEiji Ota .xmit_complete = rdsv3_ib_xmit_complete,
401c0dd49bdSEiji Ota .xmit = rdsv3_ib_xmit,
402c0dd49bdSEiji Ota .xmit_cong_map = NULL,
403c0dd49bdSEiji Ota .xmit_rdma = rdsv3_ib_xmit_rdma,
404c0dd49bdSEiji Ota .recv = rdsv3_ib_recv,
405c0dd49bdSEiji Ota .conn_alloc = rdsv3_ib_conn_alloc,
406c0dd49bdSEiji Ota .conn_free = rdsv3_ib_conn_free,
407c0dd49bdSEiji Ota .conn_connect = rdsv3_ib_conn_connect,
408c0dd49bdSEiji Ota .conn_shutdown = rdsv3_ib_conn_shutdown,
409c0dd49bdSEiji Ota .inc_copy_to_user = rdsv3_ib_inc_copy_to_user,
410c0dd49bdSEiji Ota .inc_free = rdsv3_ib_inc_free,
411c0dd49bdSEiji Ota .cm_initiate_connect = rdsv3_ib_cm_initiate_connect,
412c0dd49bdSEiji Ota .cm_handle_connect = rdsv3_ib_cm_handle_connect,
413c0dd49bdSEiji Ota .cm_connect_complete = rdsv3_ib_cm_connect_complete,
414c0dd49bdSEiji Ota .stats_info_copy = rdsv3_ib_stats_info_copy,
415c0dd49bdSEiji Ota .exit = rdsv3_ib_exit,
416c0dd49bdSEiji Ota .get_mr = rdsv3_ib_get_mr,
417c0dd49bdSEiji Ota .sync_mr = rdsv3_ib_sync_mr,
418c0dd49bdSEiji Ota .free_mr = rdsv3_ib_free_mr,
419c0dd49bdSEiji Ota .flush_mrs = rdsv3_ib_flush_mrs,
420c0dd49bdSEiji Ota .t_name = "infiniband",
421cadbfdc3SEiji Ota .t_type = RDS_TRANS_IB
422c0dd49bdSEiji Ota };
423c0dd49bdSEiji Ota #else
424c0dd49bdSEiji Ota struct rdsv3_transport rdsv3_ib_transport;
425c0dd49bdSEiji Ota #endif
426c0dd49bdSEiji Ota
427c0dd49bdSEiji Ota int
rdsv3_ib_init(void)428c0dd49bdSEiji Ota rdsv3_ib_init(void)
429c0dd49bdSEiji Ota {
430c0dd49bdSEiji Ota int ret;
431c0dd49bdSEiji Ota
432c0dd49bdSEiji Ota RDSV3_DPRINTF4("rds_ib_init", "Enter");
433c0dd49bdSEiji Ota
434c0dd49bdSEiji Ota list_create(&rdsv3_ib_devices, sizeof (struct rdsv3_ib_device),
435c0dd49bdSEiji Ota offsetof(struct rdsv3_ib_device, list));
436c0dd49bdSEiji Ota list_create(&ib_nodev_conns, sizeof (struct rdsv3_ib_connection),
437c0dd49bdSEiji Ota offsetof(struct rdsv3_ib_connection, ib_node));
438c0dd49bdSEiji Ota mutex_init(&ib_nodev_conns_lock, NULL, MUTEX_DRIVER, NULL);
439c0dd49bdSEiji Ota
440*a530e0a9Sagiri /* allocate space for ib statistics */
441*a530e0a9Sagiri ASSERT(rdsv3_ib_stats == NULL);
442*a530e0a9Sagiri rdsv3_ib_stats = kmem_zalloc(nr_cpus *
443*a530e0a9Sagiri sizeof (struct rdsv3_ib_statistics), KM_SLEEP);
444*a530e0a9Sagiri
445c0dd49bdSEiji Ota rdsv3_ib_client.dip = rdsv3_dev_info;
446c0dd49bdSEiji Ota ret = ib_register_client(&rdsv3_ib_client);
447c0dd49bdSEiji Ota if (ret)
448c0dd49bdSEiji Ota goto out;
449c0dd49bdSEiji Ota
450c0dd49bdSEiji Ota ret = rdsv3_ib_sysctl_init();
451c0dd49bdSEiji Ota if (ret)
452c0dd49bdSEiji Ota goto out_ibreg;
453c0dd49bdSEiji Ota
454c0dd49bdSEiji Ota ret = rdsv3_ib_recv_init();
455c0dd49bdSEiji Ota if (ret)
456c0dd49bdSEiji Ota goto out_sysctl;
457c0dd49bdSEiji Ota
458c0dd49bdSEiji Ota ret = rdsv3_trans_register(&rdsv3_ib_transport);
459c0dd49bdSEiji Ota if (ret)
460c0dd49bdSEiji Ota goto out_recv;
461c0dd49bdSEiji Ota
462fe817b60SEiji Ota rdsv3_info_register_func(RDS_INFO_IB_CONNECTIONS, rds_ib_ic_info);
463c0dd49bdSEiji Ota
464c0dd49bdSEiji Ota RDSV3_DPRINTF4("rds_ib_init", "Return");
465c0dd49bdSEiji Ota
466c0dd49bdSEiji Ota return (0);
467c0dd49bdSEiji Ota
468c0dd49bdSEiji Ota out_recv:
469c0dd49bdSEiji Ota rdsv3_ib_recv_exit();
470c0dd49bdSEiji Ota out_sysctl:
471c0dd49bdSEiji Ota rdsv3_ib_sysctl_exit();
472c0dd49bdSEiji Ota out_ibreg:
473c0dd49bdSEiji Ota ib_unregister_client(&rdsv3_ib_client);
474c0dd49bdSEiji Ota out:
475*a530e0a9Sagiri kmem_free(rdsv3_ib_stats,
476*a530e0a9Sagiri nr_cpus * sizeof (struct rdsv3_ib_statistics));
477c0dd49bdSEiji Ota mutex_destroy(&ib_nodev_conns_lock);
478c0dd49bdSEiji Ota list_destroy(&ib_nodev_conns);
479c0dd49bdSEiji Ota list_destroy(&rdsv3_ib_devices);
480c0dd49bdSEiji Ota return (ret);
481c0dd49bdSEiji Ota }
482