xref: /spdk/lib/rdma_provider/rdma_provider_mlx5_dv.c (revision d1c46ed8e5f61500a9ef69d922f8d3f89a4e9cb3)
1 /*   SPDX-License-Identifier: BSD-3-Clause
2  *   Copyright (C) 2020 Intel Corporation. All rights reserved.
3  *   Copyright (c) 2020, 2021 Mellanox Technologies LTD. All rights reserved.
4  *   Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
5  */
6 
7 #include <rdma/rdma_cma.h>
8 #include <infiniband/mlx5dv.h>
9 
10 #include "spdk/stdinc.h"
11 #include "spdk/string.h"
12 #include "spdk/likely.h"
13 #include "spdk/dma.h"
14 
15 #include "spdk_internal/rdma_provider.h"
16 #include "spdk_internal/mlx5.h"
17 #include "spdk/log.h"
18 #include "spdk/util.h"
19 
20 struct spdk_rdma_mlx5_dv_qp {
21 	struct spdk_rdma_provider_qp common;
22 	struct spdk_memory_domain_rdma_ctx domain_ctx;
23 	struct ibv_qp_ex *qpex;
24 };
25 
26 static int
27 rdma_mlx5_dv_init_qpair(struct spdk_rdma_mlx5_dv_qp *mlx5_qp)
28 {
29 	struct ibv_qp_attr qp_attr;
30 	int qp_attr_mask, rc;
31 
32 	qp_attr.qp_state = IBV_QPS_INIT;
33 	rc = rdma_init_qp_attr(mlx5_qp->common.cm_id, &qp_attr, &qp_attr_mask);
34 	if (rc) {
35 		SPDK_ERRLOG("Failed to init attr IBV_QPS_INIT, errno %s (%d)\n", spdk_strerror(errno), errno);
36 		return rc;
37 	}
38 
39 	rc = ibv_modify_qp(mlx5_qp->common.qp, &qp_attr, qp_attr_mask);
40 	if (rc) {
41 		SPDK_ERRLOG("ibv_modify_qp(IBV_QPS_INIT) failed, rc %d\n", rc);
42 		return rc;
43 	}
44 
45 	qp_attr.qp_state = IBV_QPS_RTR;
46 	rc = rdma_init_qp_attr(mlx5_qp->common.cm_id, &qp_attr, &qp_attr_mask);
47 	if (rc) {
48 		SPDK_ERRLOG("Failed to init attr IBV_QPS_RTR, errno %s (%d)\n", spdk_strerror(errno), errno);
49 		return rc;
50 	}
51 
52 	rc = ibv_modify_qp(mlx5_qp->common.qp, &qp_attr, qp_attr_mask);
53 	if (rc) {
54 		SPDK_ERRLOG("ibv_modify_qp(IBV_QPS_RTR) failed, rc %d\n", rc);
55 		return rc;
56 	}
57 
58 	qp_attr.qp_state = IBV_QPS_RTS;
59 	rc = rdma_init_qp_attr(mlx5_qp->common.cm_id, &qp_attr, &qp_attr_mask);
60 	if (rc) {
61 		SPDK_ERRLOG("Failed to init attr IBV_QPS_RTR, errno %s (%d)\n", spdk_strerror(errno), errno);
62 		return rc;
63 	}
64 
65 	rc = ibv_modify_qp(mlx5_qp->common.qp, &qp_attr, qp_attr_mask);
66 	if (rc) {
67 		SPDK_ERRLOG("ibv_modify_qp(IBV_QPS_RTS) failed, rc %d\n", rc);
68 	}
69 
70 	return rc;
71 }
72 
73 struct spdk_rdma_provider_qp *
74 spdk_rdma_provider_qp_create(struct rdma_cm_id *cm_id,
75 			     struct spdk_rdma_provider_qp_init_attr *qp_attr)
76 {
77 	assert(cm_id);
78 	assert(qp_attr);
79 
80 	struct ibv_qp *qp;
81 	struct spdk_rdma_mlx5_dv_qp *mlx5_qp;
82 	struct ibv_qp_init_attr_ex dv_qp_attr = {
83 		.qp_context = qp_attr->qp_context,
84 		.send_cq = qp_attr->send_cq,
85 		.recv_cq = qp_attr->recv_cq,
86 		.srq = qp_attr->srq,
87 		.cap = qp_attr->cap,
88 		.qp_type = IBV_QPT_RC,
89 		.comp_mask = IBV_QP_INIT_ATTR_PD | IBV_QP_INIT_ATTR_SEND_OPS_FLAGS,
90 		.pd = qp_attr->pd ? qp_attr->pd : cm_id->pd
91 	};
92 	struct spdk_memory_domain_ctx ctx = {};
93 	int rc;
94 
95 	assert(dv_qp_attr.pd);
96 
97 	mlx5_qp = calloc(1, sizeof(*mlx5_qp));
98 	if (!mlx5_qp) {
99 		SPDK_ERRLOG("qp memory allocation failed\n");
100 		return NULL;
101 	}
102 
103 	if (qp_attr->stats) {
104 		mlx5_qp->common.stats = qp_attr->stats;
105 		mlx5_qp->common.shared_stats = true;
106 	} else {
107 		mlx5_qp->common.stats = calloc(1, sizeof(*mlx5_qp->common.stats));
108 		if (!mlx5_qp->common.stats) {
109 			SPDK_ERRLOG("qp statistics memory allocation failed\n");
110 			free(mlx5_qp);
111 			return NULL;
112 		}
113 	}
114 
115 	qp = mlx5dv_create_qp(cm_id->verbs, &dv_qp_attr, NULL);
116 
117 	if (!qp) {
118 		SPDK_ERRLOG("Failed to create qpair, errno %s (%d)\n", spdk_strerror(errno), errno);
119 		free(mlx5_qp);
120 		return NULL;
121 	}
122 
123 	mlx5_qp->common.qp = qp;
124 	mlx5_qp->common.cm_id = cm_id;
125 	mlx5_qp->qpex = ibv_qp_to_qp_ex(qp);
126 
127 	if (!mlx5_qp->qpex) {
128 		spdk_rdma_provider_qp_destroy(&mlx5_qp->common);
129 		return NULL;
130 	}
131 	mlx5_qp->domain_ctx.size = sizeof(mlx5_qp->domain_ctx);
132 	mlx5_qp->domain_ctx.ibv_pd = qp_attr->pd;
133 	ctx.size = sizeof(ctx);
134 	ctx.user_ctx = &mlx5_qp->domain_ctx;
135 	ctx.user_ctx_size = mlx5_qp->domain_ctx.size;
136 	rc = spdk_memory_domain_create(&mlx5_qp->common.domain, SPDK_DMA_DEVICE_TYPE_RDMA, &ctx,
137 				       SPDK_RDMA_DMA_DEVICE);
138 	if (rc) {
139 		SPDK_ERRLOG("Failed to create memory domain\n");
140 		spdk_rdma_provider_qp_destroy(&mlx5_qp->common);
141 		return NULL;
142 	}
143 	if (qp_attr->domain_transfer) {
144 		if (!spdk_rdma_provider_accel_sequence_supported()) {
145 			SPDK_ERRLOG("Data transfer functionality is not supported\n");
146 			spdk_rdma_provider_qp_destroy(&mlx5_qp->common);
147 			return NULL;
148 		}
149 		spdk_memory_domain_set_data_transfer(mlx5_qp->common.domain, qp_attr->domain_transfer);
150 	}
151 
152 	qp_attr->cap = dv_qp_attr.cap;
153 
154 	return &mlx5_qp->common;
155 }
156 
157 int
158 spdk_rdma_provider_qp_accept(struct spdk_rdma_provider_qp *spdk_rdma_qp,
159 			     struct rdma_conn_param *conn_param)
160 {
161 	struct spdk_rdma_mlx5_dv_qp *mlx5_qp;
162 
163 	assert(spdk_rdma_qp != NULL);
164 	assert(spdk_rdma_qp->cm_id != NULL);
165 
166 	mlx5_qp = SPDK_CONTAINEROF(spdk_rdma_qp, struct spdk_rdma_mlx5_dv_qp, common);
167 
168 	/* NVMEoF target must move qpair to RTS state */
169 	if (rdma_mlx5_dv_init_qpair(mlx5_qp) != 0) {
170 		SPDK_ERRLOG("Failed to initialize qpair\n");
171 		/* Set errno to be compliant with rdma_accept behaviour */
172 		errno = ECONNABORTED;
173 		return -1;
174 	}
175 
176 	return rdma_accept(spdk_rdma_qp->cm_id, conn_param);
177 }
178 
179 int
180 spdk_rdma_provider_qp_complete_connect(struct spdk_rdma_provider_qp *spdk_rdma_qp)
181 {
182 	struct spdk_rdma_mlx5_dv_qp *mlx5_qp;
183 	int rc;
184 
185 	assert(spdk_rdma_qp);
186 
187 	mlx5_qp = SPDK_CONTAINEROF(spdk_rdma_qp, struct spdk_rdma_mlx5_dv_qp, common);
188 
189 	rc = rdma_mlx5_dv_init_qpair(mlx5_qp);
190 	if (rc) {
191 		SPDK_ERRLOG("Failed to initialize qpair\n");
192 		return rc;
193 	}
194 
195 	rc = rdma_establish(mlx5_qp->common.cm_id);
196 	if (rc) {
197 		SPDK_ERRLOG("rdma_establish failed, errno %s (%d)\n", spdk_strerror(errno), errno);
198 	}
199 
200 	return rc;
201 }
202 
203 void
204 spdk_rdma_provider_qp_destroy(struct spdk_rdma_provider_qp *spdk_rdma_qp)
205 {
206 	struct spdk_rdma_mlx5_dv_qp *mlx5_qp;
207 	int rc;
208 
209 	assert(spdk_rdma_qp != NULL);
210 
211 	mlx5_qp = SPDK_CONTAINEROF(spdk_rdma_qp, struct spdk_rdma_mlx5_dv_qp, common);
212 
213 	if (spdk_rdma_qp->send_wrs.first != NULL) {
214 		SPDK_WARNLOG("Destroying qpair with queued Work Requests\n");
215 	}
216 
217 	if (!mlx5_qp->common.shared_stats) {
218 		free(mlx5_qp->common.stats);
219 	}
220 
221 	if (mlx5_qp->common.qp) {
222 		rc = ibv_destroy_qp(mlx5_qp->common.qp);
223 		if (rc) {
224 			SPDK_ERRLOG("Failed to destroy ibv qp %p, rc %d\n", mlx5_qp->common.qp, rc);
225 		}
226 	}
227 	if (spdk_rdma_qp->domain) {
228 		spdk_memory_domain_destroy(spdk_rdma_qp->domain);
229 	}
230 
231 	free(mlx5_qp);
232 }
233 
234 int
235 spdk_rdma_provider_qp_disconnect(struct spdk_rdma_provider_qp *spdk_rdma_qp)
236 {
237 	int rc = 0;
238 
239 	assert(spdk_rdma_qp != NULL);
240 
241 	if (spdk_rdma_qp->qp) {
242 		struct ibv_qp_attr qp_attr = {.qp_state = IBV_QPS_ERR};
243 
244 		rc = ibv_modify_qp(spdk_rdma_qp->qp, &qp_attr, IBV_QP_STATE);
245 		if (rc) {
246 			SPDK_ERRLOG("Failed to modify ibv qp %p state to ERR, rc %d\n", spdk_rdma_qp->qp, rc);
247 			return rc;
248 		}
249 	}
250 
251 	if (spdk_rdma_qp->cm_id) {
252 		rc = rdma_disconnect(spdk_rdma_qp->cm_id);
253 		if (rc) {
254 			SPDK_ERRLOG("rdma_disconnect failed, errno %s (%d)\n", spdk_strerror(errno), errno);
255 		}
256 	}
257 
258 	return rc;
259 }
260 
261 bool
262 spdk_rdma_provider_qp_queue_send_wrs(struct spdk_rdma_provider_qp *spdk_rdma_qp,
263 				     struct ibv_send_wr *first)
264 {
265 	struct ibv_send_wr *tmp;
266 	struct spdk_rdma_mlx5_dv_qp *mlx5_qp;
267 	bool is_first;
268 
269 	assert(spdk_rdma_qp);
270 	assert(first);
271 
272 	is_first = spdk_rdma_qp->send_wrs.first == NULL;
273 	mlx5_qp = SPDK_CONTAINEROF(spdk_rdma_qp, struct spdk_rdma_mlx5_dv_qp, common);
274 
275 	if (is_first) {
276 		ibv_wr_start(mlx5_qp->qpex);
277 		spdk_rdma_qp->send_wrs.first = first;
278 	} else {
279 		spdk_rdma_qp->send_wrs.last->next = first;
280 	}
281 
282 	for (tmp = first; tmp != NULL; tmp = tmp->next) {
283 		mlx5_qp->qpex->wr_id = tmp->wr_id;
284 		mlx5_qp->qpex->wr_flags = tmp->send_flags;
285 
286 		switch (tmp->opcode) {
287 		case IBV_WR_SEND:
288 			ibv_wr_send(mlx5_qp->qpex);
289 			break;
290 		case IBV_WR_SEND_WITH_INV:
291 			ibv_wr_send_inv(mlx5_qp->qpex, tmp->invalidate_rkey);
292 			break;
293 		case IBV_WR_RDMA_READ:
294 			ibv_wr_rdma_read(mlx5_qp->qpex, tmp->wr.rdma.rkey, tmp->wr.rdma.remote_addr);
295 			break;
296 		case IBV_WR_RDMA_WRITE:
297 			ibv_wr_rdma_write(mlx5_qp->qpex, tmp->wr.rdma.rkey, tmp->wr.rdma.remote_addr);
298 			break;
299 		default:
300 			SPDK_ERRLOG("Unexpected opcode %d\n", tmp->opcode);
301 			assert(0);
302 		}
303 
304 		ibv_wr_set_sge_list(mlx5_qp->qpex, tmp->num_sge, tmp->sg_list);
305 
306 		spdk_rdma_qp->send_wrs.last = tmp;
307 		spdk_rdma_qp->stats->send.num_submitted_wrs++;
308 	}
309 
310 	return is_first;
311 }
312 
313 int
314 spdk_rdma_provider_qp_flush_send_wrs(struct spdk_rdma_provider_qp *spdk_rdma_qp,
315 				     struct ibv_send_wr **bad_wr)
316 {
317 	struct spdk_rdma_mlx5_dv_qp *mlx5_qp;
318 	int rc;
319 
320 	assert(bad_wr);
321 	assert(spdk_rdma_qp);
322 
323 	mlx5_qp = SPDK_CONTAINEROF(spdk_rdma_qp, struct spdk_rdma_mlx5_dv_qp, common);
324 
325 	if (spdk_unlikely(spdk_rdma_qp->send_wrs.first == NULL)) {
326 		return 0;
327 	}
328 
329 	rc = ibv_wr_complete(mlx5_qp->qpex);
330 
331 	if (spdk_unlikely(rc)) {
332 		/* If ibv_wr_complete reports an error that means that no WRs are posted to NIC */
333 		*bad_wr = spdk_rdma_qp->send_wrs.first;
334 	}
335 
336 	spdk_rdma_qp->send_wrs.first = NULL;
337 	spdk_rdma_qp->stats->send.doorbell_updates++;
338 
339 	return rc;
340 }
341 
342 bool
343 spdk_rdma_provider_accel_sequence_supported(void)
344 {
345 	return spdk_mlx5_umr_implementer_is_registered();
346 }
347