xref: /spdk/module/accel/mlx5/accel_mlx5.c (revision bf30e09abe1667ae2769aa367cde39c550bcac00)
1 /*   SPDX-License-Identifier: BSD-3-Clause
2  *   Copyright (c) 2022-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3  */
4 
5 #include "spdk/env.h"
6 #include "spdk/thread.h"
7 #include "spdk/queue.h"
8 #include "spdk/log.h"
9 #include "spdk/string.h"
10 #include "spdk/likely.h"
11 #include "spdk/dma.h"
12 #include "spdk/json.h"
13 #include "spdk/util.h"
14 
15 #include "spdk_internal/mlx5.h"
16 #include "spdk_internal/rdma_utils.h"
17 #include "spdk/accel_module.h"
18 #include "spdk_internal/assert.h"
19 #include "spdk_internal/sgl.h"
20 #include "accel_mlx5.h"
21 
22 #include <infiniband/mlx5dv.h>
23 #include <rdma/rdma_cma.h>
24 
25 #define ACCEL_MLX5_QP_SIZE (256u)
26 #define ACCEL_MLX5_NUM_REQUESTS (2048u - 1)
27 
28 #define ACCEL_MLX5_MAX_SGE (16u)
29 #define ACCEL_MLX5_MAX_WC (64u)
30 #define ACCEL_MLX5_ALLOC_REQS_IN_BATCH (16u)
31 
32 /* Assume we have up to 16 devices */
33 #define ACCEL_MLX5_ALLOWED_DEVS_MAX_LEN ((SPDK_MLX5_DEV_MAX_NAME_LEN + 1) * 16)
34 
35 struct accel_mlx5_io_channel;
36 struct accel_mlx5_task;
37 
38 struct accel_mlx5_crypto_dev_ctx {
39 	struct spdk_mempool *requests_pool;
40 	struct ibv_context *context;
41 	struct ibv_pd *pd;
42 	struct spdk_memory_domain *domain;
43 	TAILQ_ENTRY(accel_mlx5_crypto_dev_ctx) link;
44 };
45 
46 struct accel_mlx5_module {
47 	struct spdk_accel_module_if module;
48 	struct accel_mlx5_crypto_dev_ctx *crypto_ctxs;
49 	uint32_t num_crypto_ctxs;
50 	struct accel_mlx5_attr attr;
51 	char **allowed_devs;
52 	size_t allowed_devs_count;
53 	bool enabled;
54 };
55 
56 enum accel_mlx5_wrid_type {
57 	ACCEL_MLX5_WRID_MKEY,
58 	ACCEL_MLX5_WRID_WRITE,
59 };
60 
61 struct accel_mlx5_wrid {
62 	uint8_t wrid;
63 };
64 
65 struct accel_mlx5_req {
66 	struct accel_mlx5_task *task;
67 	struct mlx5dv_mkey *mkey;
68 	struct ibv_sge src_sg[ACCEL_MLX5_MAX_SGE];
69 	struct ibv_sge dst_sg[ACCEL_MLX5_MAX_SGE];
70 	uint16_t src_sg_count;
71 	uint16_t dst_sg_count;
72 	struct accel_mlx5_wrid mkey_wrid;
73 	struct accel_mlx5_wrid write_wrid;
74 	TAILQ_ENTRY(accel_mlx5_req) link;
75 };
76 
77 struct accel_mlx5_task {
78 	struct spdk_accel_task base;
79 	struct accel_mlx5_dev *dev;
80 	TAILQ_HEAD(, accel_mlx5_req) reqs;
81 	uint32_t num_reqs;
82 	uint32_t num_completed_reqs;
83 	uint32_t num_submitted_reqs;
84 	int rc;
85 	struct spdk_iov_sgl src;
86 	struct spdk_iov_sgl dst;
87 	struct accel_mlx5_req *cur_req;
88 	/* If set, memory data will be encrypted during TX and wire data will be
89 	  decrypted during RX.
90 	  If not set, memory data will be decrypted during TX and wire data will
91 	  be encrypted during RX. */
92 	bool encrypt_on_tx;
93 	bool inplace;
94 	TAILQ_ENTRY(accel_mlx5_task) link;
95 };
96 
97 struct accel_mlx5_qp {
98 	struct ibv_qp *qp;
99 	struct ibv_qp_ex *qpex;
100 	struct mlx5dv_qp_ex *mqpx; /* more qpairs to the god of qpairs */
101 	struct ibv_cq *cq;
102 	struct accel_mlx5_io_channel *ch;
103 	bool wr_started;
104 	uint16_t num_reqs;
105 	uint16_t num_free_reqs;
106 };
107 
108 struct accel_mlx5_dev {
109 	struct accel_mlx5_qp *qp;
110 	struct ibv_cq *cq;
111 	struct spdk_rdma_utils_mem_map *mmap;
112 	struct accel_mlx5_crypto_dev_ctx *dev_ctx;
113 	uint32_t reqs_submitted;
114 	uint32_t max_reqs;
115 	/* Pending tasks waiting for requests resources */
116 	TAILQ_HEAD(, accel_mlx5_task) nomem;
117 	/* tasks submitted to HW. We can't complete a task even in error case until we reap completions for all
118 	 * submitted requests */
119 	TAILQ_HEAD(, accel_mlx5_task) in_hw;
120 	/* tasks between wr_start and wr_complete */
121 	TAILQ_HEAD(, accel_mlx5_task) before_submit;
122 	TAILQ_ENTRY(accel_mlx5_dev) link;
123 };
124 
125 struct accel_mlx5_io_channel {
126 	struct accel_mlx5_dev *devs;
127 	struct spdk_poller *poller;
128 	uint32_t num_devs;
129 	/* Index in \b devs to be used for crypto in round-robin way */
130 	uint32_t dev_idx;
131 };
132 
133 struct accel_mlx5_req_init_ctx {
134 	struct ibv_pd *pd;
135 	int rc;
136 };
137 
138 static struct accel_mlx5_module g_accel_mlx5;
139 
140 static int
141 mlx5_qp_init_2_rts(struct ibv_qp *qp, uint32_t dest_qp_num)
142 {
143 	struct ibv_qp_attr cur_attr = {}, attr = {};
144 	struct ibv_qp_init_attr init_attr = {};
145 	struct ibv_port_attr port_attr = {};
146 	union ibv_gid gid = {};
147 	int rc;
148 	uint8_t port;
149 	int attr_mask = IBV_QP_PKEY_INDEX |
150 			IBV_QP_PORT |
151 			IBV_QP_ACCESS_FLAGS |
152 			IBV_QP_PATH_MTU |
153 			IBV_QP_AV |
154 			IBV_QP_DEST_QPN |
155 			IBV_QP_RQ_PSN |
156 			IBV_QP_MAX_DEST_RD_ATOMIC |
157 			IBV_QP_MIN_RNR_TIMER |
158 			IBV_QP_TIMEOUT |
159 			IBV_QP_RETRY_CNT |
160 			IBV_QP_RNR_RETRY |
161 			IBV_QP_SQ_PSN |
162 			IBV_QP_MAX_QP_RD_ATOMIC;
163 
164 	if (!qp) {
165 		return -EINVAL;
166 	}
167 
168 	rc = ibv_query_qp(qp, &cur_attr, attr_mask, &init_attr);
169 	if (rc) {
170 		SPDK_ERRLOG("Failed to query qp %p %u\n", qp, qp->qp_num);
171 		return rc;
172 	}
173 
174 	port = cur_attr.port_num;
175 	rc = ibv_query_port(qp->context, port, &port_attr);
176 	if (rc) {
177 		SPDK_ERRLOG("Failed to query port num %d\n", port);
178 		return rc;
179 	}
180 
181 	if (port_attr.state != IBV_PORT_ARMED && port_attr.state != IBV_PORT_ACTIVE) {
182 		SPDK_ERRLOG("Wrong port %d state %d\n", port, port_attr.state);
183 		return -ENETUNREACH;
184 	}
185 
186 	rc = ibv_query_gid(qp->context, port, 0, &gid);
187 	if (rc) {
188 		SPDK_ERRLOG("Failed to get GID on port %d, rc %d\n", port, rc);
189 		return rc;
190 	}
191 
192 	attr.qp_state = IBV_QPS_INIT;
193 	attr.pkey_index = cur_attr.pkey_index;
194 	attr.port_num = cur_attr.port_num;
195 	attr.qp_access_flags = IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_READ | IBV_ACCESS_REMOTE_WRITE;
196 	attr_mask = IBV_QP_STATE | IBV_QP_PKEY_INDEX | IBV_QP_PORT | IBV_QP_ACCESS_FLAGS;
197 
198 	rc = ibv_modify_qp(qp, &attr, attr_mask);
199 	if (rc) {
200 		SPDK_ERRLOG("Failed to modify qp %p %u to INIT state, rc %d\n", qp, qp->qp_num, rc);
201 		return rc;
202 	}
203 
204 	attr.qp_state = IBV_QPS_RTR;
205 	attr.path_mtu = cur_attr.path_mtu;
206 	/* dest_qp_num == qp_num - self loopback connection */
207 	attr.dest_qp_num = dest_qp_num;
208 	attr.rq_psn = cur_attr.rq_psn;
209 	attr.max_dest_rd_atomic = cur_attr.max_dest_rd_atomic;
210 	attr.min_rnr_timer = cur_attr.min_rnr_timer;
211 	attr.ah_attr = cur_attr.ah_attr;
212 	attr.ah_attr.dlid = port_attr.lid;
213 	attr.ah_attr.sl = 0;
214 	attr.ah_attr.src_path_bits = 0;
215 
216 	if (port_attr.link_layer == IBV_LINK_LAYER_ETHERNET) {
217 		/* Ethernet requires to set GRH */
218 		attr.ah_attr.is_global = 1;
219 		attr.ah_attr.grh.hop_limit = 1;
220 		attr.ah_attr.grh.dgid = gid;
221 	} else {
222 		attr.ah_attr.is_global = 0;
223 	}
224 
225 	assert(attr.ah_attr.port_num == port);
226 
227 	attr_mask = IBV_QP_STATE | IBV_QP_PATH_MTU | IBV_QP_DEST_QPN | IBV_QP_RQ_PSN |
228 		    IBV_QP_MAX_DEST_RD_ATOMIC | IBV_QP_MIN_RNR_TIMER | IBV_QP_AV;
229 
230 	rc = ibv_modify_qp(qp, &attr, attr_mask);
231 	if (rc) {
232 		SPDK_ERRLOG("Failed to modify qp %p %u to RTR state, rc %d\n", qp, qp->qp_num, rc);
233 		return rc;
234 	}
235 
236 	memset(&attr, 0, sizeof(attr));
237 	attr.qp_state = IBV_QPS_RTS;
238 	attr.timeout = cur_attr.timeout;
239 	attr.retry_cnt = cur_attr.retry_cnt;
240 	attr.sq_psn = cur_attr.sq_psn;
241 	attr.rnr_retry = cur_attr.rnr_retry;
242 	attr.max_rd_atomic = cur_attr.max_rd_atomic;
243 	attr_mask = IBV_QP_STATE | IBV_QP_TIMEOUT | IBV_QP_RETRY_CNT | IBV_QP_SQ_PSN | IBV_QP_RNR_RETRY |
244 		    IBV_QP_MAX_QP_RD_ATOMIC;
245 
246 	rc = ibv_modify_qp(qp, &attr, attr_mask);
247 	if (rc) {
248 		SPDK_ERRLOG("Failed to modify qp %p %u to RTS state, rc %d\n", qp, qp->qp_num, rc);
249 		return rc;
250 	}
251 
252 	return 0;
253 }
254 
255 static inline enum ibv_qp_state
256 accel_mlx5_get_qp_state(struct ibv_qp *qp) {
257 	struct ibv_qp_attr qp_attr;
258 	struct ibv_qp_init_attr init_attr;
259 
260 	ibv_query_qp(qp, &qp_attr, IBV_QP_STATE, &init_attr);
261 
262 	return qp_attr.qp_state;
263 }
264 
265 static inline void
266 accel_mlx5_task_complete(struct accel_mlx5_task *task)
267 {
268 	struct accel_mlx5_req *req;
269 
270 	assert(task->num_reqs == task->num_completed_reqs);
271 	SPDK_DEBUGLOG(accel_mlx5, "Complete task %p, opc %d\n", task, task->base.op_code);
272 
273 	TAILQ_FOREACH(req, &task->reqs, link) {
274 		spdk_mempool_put(task->dev->dev_ctx->requests_pool, req);
275 	}
276 	spdk_accel_task_complete(&task->base, task->rc);
277 }
278 
279 static inline int
280 accel_mlx5_flush_wrs(struct accel_mlx5_dev *dev)
281 {
282 	struct accel_mlx5_task *task;
283 	struct accel_mlx5_qp *qp = dev->qp;
284 	int rc;
285 
286 	if (spdk_unlikely(!qp->wr_started)) {
287 		return 0;
288 	}
289 
290 	SPDK_DEBUGLOG(accel_mlx5, "Completing WRs on dev %s\n", dev->dev_ctx->context->device->name);
291 	rc = ibv_wr_complete(qp->qpex);
292 	if (spdk_unlikely(rc)) {
293 		SPDK_ERRLOG("ibv_wr_complete rc %d\n", rc);
294 		/* Complete all affected requests */
295 		TAILQ_FOREACH(task, &dev->before_submit, link) {
296 			task->rc = rc;
297 			accel_mlx5_task_complete(task);
298 		}
299 		TAILQ_INIT(&dev->before_submit);
300 	} else {
301 		TAILQ_CONCAT(&dev->in_hw, &dev->before_submit, link);
302 	}
303 
304 	qp->wr_started = false;
305 
306 	return rc;
307 }
308 
309 static int
310 accel_mlx5_translate_addr(void *addr, size_t size, struct spdk_memory_domain *domain,
311 			  void *domain_ctx, struct accel_mlx5_dev *dev, struct ibv_sge *sge)
312 {
313 	struct spdk_rdma_utils_memory_translation map_translation;
314 	struct spdk_memory_domain_translation_result domain_translation;
315 	struct spdk_memory_domain_translation_ctx local_ctx;
316 	int rc;
317 
318 	if (domain) {
319 		domain_translation.size = sizeof(struct spdk_memory_domain_translation_result);
320 		local_ctx.size = sizeof(local_ctx);
321 		local_ctx.rdma.ibv_qp = dev->qp->qp;
322 		rc = spdk_memory_domain_translate_data(domain, domain_ctx, dev->dev_ctx->domain,
323 						       &local_ctx, addr, size, &domain_translation);
324 		if (spdk_unlikely(rc || domain_translation.iov_count != 1)) {
325 			SPDK_ERRLOG("Memory domain translation failed, addr %p, length %zu, iovcnt %u\n", addr, size,
326 				    domain_translation.iov_count);
327 			if (rc == 0) {
328 				rc = -EINVAL;
329 			}
330 
331 			return rc;
332 		}
333 		sge->lkey = domain_translation.rdma.lkey;
334 		sge->addr = (uint64_t) domain_translation.iov.iov_base;
335 		sge->length = domain_translation.iov.iov_len;
336 	} else {
337 		rc = spdk_rdma_utils_get_translation(dev->mmap, addr, size,
338 						     &map_translation);
339 		if (spdk_unlikely(rc)) {
340 			SPDK_ERRLOG("Memory translation failed, addr %p, length %zu\n", addr, size);
341 			return rc;
342 		}
343 		sge->lkey = spdk_rdma_utils_memory_translation_get_lkey(&map_translation);
344 		sge->addr = (uint64_t)addr;
345 		sge->length = size;
346 	}
347 
348 	return 0;
349 }
350 
351 static inline int
352 accel_mlx5_fill_block_sge(struct accel_mlx5_dev *dev, struct accel_mlx5_req *req,
353 			  struct ibv_sge *sge,
354 			  struct spdk_iov_sgl *iovs, struct spdk_memory_domain *domain, void *domain_ctx)
355 {
356 	void *addr;
357 	uint32_t remaining = req->task->base.block_size;
358 	uint32_t size;
359 	int i = 0;
360 	int rc;
361 
362 	while (remaining && i < (int)ACCEL_MLX5_MAX_SGE) {
363 		size = spdk_min(remaining, iovs->iov->iov_len - iovs->iov_offset);
364 		addr = (void *)iovs->iov->iov_base + iovs->iov_offset;
365 		rc = accel_mlx5_translate_addr(addr, size, domain, domain_ctx, dev, &sge[i]);
366 		if (spdk_unlikely(rc)) {
367 			return rc;
368 		}
369 		spdk_iov_sgl_advance(iovs, size);
370 		i++;
371 		assert(remaining >= size);
372 		remaining -= size;
373 	}
374 	assert(remaining == 0);
375 
376 	return i;
377 }
378 
379 static inline bool
380 accel_mlx5_compare_iovs(struct iovec *v1, struct iovec *v2, uint32_t iovcnt)
381 {
382 	uint32_t i;
383 
384 	for (i = 0; i < iovcnt; i++) {
385 		if (v1[i].iov_base != v2[i].iov_base || v1[i].iov_len != v2[i].iov_len) {
386 			return false;
387 		}
388 	}
389 
390 	return true;
391 }
392 
393 static inline uint32_t
394 accel_mlx5_task_alloc_reqs(struct accel_mlx5_task *task)
395 {
396 	struct accel_mlx5_req *reqs_tmp[ACCEL_MLX5_ALLOC_REQS_IN_BATCH], *req;
397 	uint32_t i, num_reqs, allocated_reqs = 0;
398 	uint32_t remaining_reqs = task->num_reqs - task->num_completed_reqs;
399 	uint32_t qp_slot = task->dev->max_reqs - task->dev->reqs_submitted;
400 	int rc;
401 
402 	assert(task->num_reqs >= task->num_completed_reqs);
403 	remaining_reqs = spdk_min(remaining_reqs, qp_slot);
404 
405 	while (remaining_reqs) {
406 		num_reqs = spdk_min(ACCEL_MLX5_ALLOC_REQS_IN_BATCH, remaining_reqs);
407 		rc = spdk_mempool_get_bulk(task->dev->dev_ctx->requests_pool, (void **)reqs_tmp, num_reqs);
408 		if (spdk_unlikely(rc)) {
409 			return allocated_reqs;
410 		}
411 		for (i = 0; i < num_reqs; i++) {
412 			req = reqs_tmp[i];
413 			req->src_sg_count = 0;
414 			req->dst_sg_count = 0;
415 			req->task = task;
416 			TAILQ_INSERT_TAIL(&task->reqs, req, link);
417 		}
418 		allocated_reqs += num_reqs;
419 		remaining_reqs -= num_reqs;
420 	}
421 
422 	return allocated_reqs;
423 }
424 
425 static inline int
426 accel_mlx5_task_process(struct accel_mlx5_task *mlx5_task)
427 {
428 	struct spdk_accel_task *task = &mlx5_task->base;
429 	struct accel_mlx5_dev *dev = mlx5_task->dev;
430 	struct accel_mlx5_qp *qp = dev->qp;
431 	struct ibv_qp_ex *qpx = qp->qpex;
432 	struct mlx5dv_qp_ex *mqpx = qp->mqpx;
433 	struct mlx5dv_mkey_conf_attr mkey_attr = {};
434 	struct mlx5dv_crypto_attr cattr;
435 	struct accel_mlx5_req *req;
436 	uint64_t iv;
437 	uint32_t num_setters = 3; /* access flags, layout, crypto */
438 	int rc;
439 
440 	iv = task->iv + mlx5_task->num_completed_reqs;
441 
442 	if (!qp->wr_started) {
443 		ibv_wr_start(qpx);
444 		qp->wr_started = true;
445 	}
446 
447 	SPDK_DEBUGLOG(accel_mlx5, "begin, task, %p, reqs: total %u, submitted %u, completed %u\n",
448 		      mlx5_task, mlx5_task->num_reqs, mlx5_task->num_submitted_reqs, mlx5_task->num_completed_reqs);
449 
450 	while (mlx5_task->cur_req && dev->reqs_submitted < dev->max_reqs) {
451 		req = mlx5_task->cur_req;
452 		rc = accel_mlx5_fill_block_sge(dev, req, req->src_sg, &mlx5_task->src, task->src_domain,
453 					       task->src_domain_ctx);
454 		if (spdk_unlikely(rc <= 0)) {
455 			if (rc == 0) {
456 				rc = -EINVAL;
457 			}
458 			SPDK_ERRLOG("failed set src sge, rc %d\n", rc);
459 			goto err_out;
460 		}
461 		req->src_sg_count = rc;
462 
463 		/* prepare memory key - destination for WRITE operation */
464 		qpx->wr_flags = IBV_SEND_INLINE;
465 		qpx->wr_id = (uint64_t)&req->mkey_wrid;
466 		mlx5dv_wr_mkey_configure(mqpx, req->mkey, num_setters, &mkey_attr);
467 		mlx5dv_wr_set_mkey_access_flags(mqpx,
468 						IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_REMOTE_READ);
469 		if (mlx5_task->inplace) {
470 			mlx5dv_wr_set_mkey_layout_list(mqpx, req->src_sg_count, req->src_sg);
471 		} else {
472 			rc = accel_mlx5_fill_block_sge(dev, req, req->dst_sg, &mlx5_task->dst, task->dst_domain,
473 						       task->dst_domain_ctx);
474 			if (spdk_unlikely(rc <= 0)) {
475 				if (rc == 0) {
476 					rc = -EINVAL;
477 				}
478 				SPDK_ERRLOG("failed set dst sge, rc %d\n", rc);
479 				mlx5_task->rc = rc;
480 				goto err_out;
481 			}
482 			req->dst_sg_count = rc;
483 			mlx5dv_wr_set_mkey_layout_list(mqpx, req->dst_sg_count, req->dst_sg);
484 		}
485 		SPDK_DEBUGLOG(accel_mlx5, "req %p, task %p crypto_attr: bs %u, iv %"PRIu64", enc_on_tx %d\n",
486 			      req, req->task, task->block_size, iv, mlx5_task->encrypt_on_tx);
487 		rc = spdk_mlx5_crypto_set_attr(&cattr, task->crypto_key->priv, dev->dev_ctx->pd, task->block_size,
488 					       iv++, mlx5_task->encrypt_on_tx);
489 		if (spdk_unlikely(rc)) {
490 			SPDK_ERRLOG("failed to set crypto attr, rc %d\n", rc);
491 			mlx5_task->rc = rc;
492 			goto err_out;
493 		}
494 		mlx5dv_wr_set_mkey_crypto(mqpx, &cattr);
495 
496 		/* Prepare WRITE, use rkey from mkey, remote addr is always 0 - start of the mkey */
497 		qpx->wr_flags = IBV_SEND_SIGNALED;
498 		qpx->wr_id = (uint64_t)&req->write_wrid;
499 		ibv_wr_rdma_write(qpx, req->mkey->rkey, 0);
500 		/* local buffers, SG is already filled */
501 		ibv_wr_set_sge_list(qpx, req->src_sg_count, req->src_sg);
502 
503 		mlx5_task->num_submitted_reqs++;
504 		assert(mlx5_task->num_submitted_reqs <= mlx5_task->num_reqs);
505 		dev->reqs_submitted++;
506 		mlx5_task->cur_req = TAILQ_NEXT(mlx5_task->cur_req, link);
507 	}
508 
509 	SPDK_DEBUGLOG(accel_mlx5, "end, task, %p, reqs: total %u, submitted %u, completed %u\n", mlx5_task,
510 		      mlx5_task->num_reqs, mlx5_task->num_submitted_reqs, mlx5_task->num_completed_reqs);
511 
512 	TAILQ_INSERT_TAIL(&dev->before_submit, mlx5_task, link);
513 
514 	return 0;
515 
516 err_out:
517 	/* Abort all WRs submitted since last wr_start */
518 	ibv_wr_abort(qpx);
519 	accel_mlx5_task_complete(mlx5_task);
520 	TAILQ_FOREACH(mlx5_task, &dev->before_submit, link) {
521 		mlx5_task->rc = rc;
522 		accel_mlx5_task_complete(mlx5_task);
523 	}
524 	TAILQ_INIT(&dev->before_submit);
525 
526 	return rc;
527 
528 }
529 
530 static inline int
531 accel_mlx5_task_continue(struct accel_mlx5_task *task)
532 {
533 	struct accel_mlx5_req *req;
534 
535 	TAILQ_FOREACH(req, &task->reqs, link) {
536 		spdk_mempool_put(task->dev->dev_ctx->requests_pool, req);
537 	}
538 	TAILQ_INIT(&task->reqs);
539 
540 	if (spdk_unlikely(task->rc)) {
541 		accel_mlx5_task_complete(task);
542 		return 0;
543 	}
544 
545 	if (spdk_unlikely(!accel_mlx5_task_alloc_reqs(task))) {
546 		/* Pool is empty, queue this task */
547 		TAILQ_INSERT_TAIL(&task->dev->nomem, task, link);
548 		return -ENOMEM;
549 	}
550 	task->cur_req = TAILQ_FIRST(&task->reqs);
551 
552 	return accel_mlx5_task_process(task);
553 }
554 
555 static inline int
556 accel_mlx5_task_init(struct accel_mlx5_task *mlx5_task, struct accel_mlx5_dev *dev)
557 {
558 	struct spdk_accel_task *task = &mlx5_task->base;
559 	size_t src_nbytes = 0, dst_nbytes = 0;
560 	uint32_t i;
561 
562 	switch (task->op_code) {
563 	case SPDK_ACCEL_OPC_ENCRYPT:
564 		mlx5_task->encrypt_on_tx = true;
565 		break;
566 	case SPDK_ACCEL_OPC_DECRYPT:
567 		mlx5_task->encrypt_on_tx = false;
568 		break;
569 	default:
570 		SPDK_ERRLOG("Unsupported accel opcode %d\n", task->op_code);
571 		return -ENOTSUP;
572 	}
573 
574 	for (i = 0; i < task->s.iovcnt; i++) {
575 		src_nbytes += task->s.iovs[i].iov_len;
576 	}
577 
578 	for (i = 0; i < task->d.iovcnt; i++) {
579 		dst_nbytes += task->d.iovs[i].iov_len;
580 	}
581 
582 	if (spdk_unlikely(src_nbytes != dst_nbytes)) {
583 		return -EINVAL;
584 	}
585 	if (spdk_unlikely(src_nbytes % mlx5_task->base.block_size != 0)) {
586 		return -EINVAL;
587 	}
588 
589 	mlx5_task->dev = dev;
590 	mlx5_task->rc = 0;
591 	mlx5_task->num_completed_reqs = 0;
592 	mlx5_task->num_submitted_reqs = 0;
593 	mlx5_task->cur_req = NULL;
594 	mlx5_task->num_reqs = src_nbytes / mlx5_task->base.block_size;
595 	spdk_iov_sgl_init(&mlx5_task->src, task->s.iovs, task->s.iovcnt, 0);
596 	if (task->d.iovcnt == 0 || (task->d.iovcnt == task->s.iovcnt &&
597 				    accel_mlx5_compare_iovs(task->d.iovs, task->s.iovs, task->s.iovcnt))) {
598 		mlx5_task->inplace = true;
599 	} else {
600 		mlx5_task->inplace = false;
601 		spdk_iov_sgl_init(&mlx5_task->dst, task->d.iovs, task->d.iovcnt, 0);
602 	}
603 
604 	TAILQ_INIT(&mlx5_task->reqs);
605 	if (spdk_unlikely(!accel_mlx5_task_alloc_reqs(mlx5_task))) {
606 		/* Pool is empty, queue this task */
607 		SPDK_DEBUGLOG(accel_mlx5, "no reqs in pool, dev %s\n",
608 			      mlx5_task->dev->dev_ctx->context->device->name);
609 		return -ENOMEM;
610 	}
611 	mlx5_task->cur_req = TAILQ_FIRST(&mlx5_task->reqs);
612 
613 	SPDK_DEBUGLOG(accel_mlx5, "task %p, inplace %d, num_reqs %d\n", mlx5_task, mlx5_task->inplace,
614 		      mlx5_task->num_reqs);
615 
616 	return 0;
617 }
618 
619 static int
620 accel_mlx5_submit_tasks(struct spdk_io_channel *_ch, struct spdk_accel_task *task)
621 {
622 	struct accel_mlx5_io_channel *ch = spdk_io_channel_get_ctx(_ch);
623 	struct accel_mlx5_task *mlx5_task = SPDK_CONTAINEROF(task, struct accel_mlx5_task, base);
624 	struct accel_mlx5_dev *dev;
625 	int rc;
626 
627 	if (!g_accel_mlx5.enabled || !task->crypto_key ||
628 	    task->crypto_key->module_if != &g_accel_mlx5.module ||
629 	    !task->crypto_key->priv) {
630 		return -EINVAL;
631 	}
632 	dev = &ch->devs[ch->dev_idx];
633 	ch->dev_idx++;
634 	if (ch->dev_idx == ch->num_devs) {
635 		ch->dev_idx = 0;
636 	}
637 
638 	rc = accel_mlx5_task_init(mlx5_task, dev);
639 	if (spdk_unlikely(rc)) {
640 		if (rc == -ENOMEM) {
641 			SPDK_DEBUGLOG(accel_mlx5, "no reqs to handle new task %p (required %u), put to queue\n", mlx5_task,
642 				      mlx5_task->num_reqs);
643 			TAILQ_INSERT_TAIL(&dev->nomem, mlx5_task, link);
644 			return 0;
645 		}
646 		return rc;
647 	}
648 
649 	return accel_mlx5_task_process(mlx5_task);
650 }
651 
652 static inline int64_t
653 accel_mlx5_poll_cq(struct accel_mlx5_dev *dev)
654 {
655 	struct ibv_wc wc[ACCEL_MLX5_MAX_WC];
656 	struct accel_mlx5_task *task;
657 	struct accel_mlx5_req *req;
658 	struct accel_mlx5_wrid *wr;
659 	int reaped, i, rc;
660 
661 	reaped = ibv_poll_cq(dev->cq, ACCEL_MLX5_MAX_WC, wc);
662 	if (spdk_unlikely(reaped < 0)) {
663 		SPDK_ERRLOG("Error polling CQ! (%d): %s\n", errno, spdk_strerror(errno));
664 		return reaped;
665 	} else if (reaped == 0) {
666 		return 0;
667 	}
668 
669 	SPDK_DEBUGLOG(accel_mlx5, "Reaped %d cpls on dev %s\n", reaped,
670 		      dev->dev_ctx->context->device->name);
671 
672 	for (i = 0; i < reaped; i++) {
673 		wr = (struct accel_mlx5_wrid *)wc[i].wr_id;
674 
675 		switch (wr->wrid) {
676 		case ACCEL_MLX5_WRID_MKEY:
677 			/* We only get this completion in error case */
678 			req = SPDK_CONTAINEROF(wr, struct accel_mlx5_req, mkey_wrid);
679 			if (!wc[i].status) {
680 				SPDK_ERRLOG("Got unexpected cpl for mkey configure, req %p, qp %p, state %d\n",
681 					    req, dev->qp->qp, accel_mlx5_get_qp_state(dev->qp->qp));
682 			} else {
683 				SPDK_ERRLOG("MKEY: qp %p, state %d, req %p, task %p WC status %d\n",
684 					    dev->qp->qp, accel_mlx5_get_qp_state(dev->qp->qp), req, req->task, wc[i].status);
685 			}
686 			break;
687 		case ACCEL_MLX5_WRID_WRITE:
688 			req = SPDK_CONTAINEROF(wr, struct accel_mlx5_req, write_wrid);
689 			task = req->task;
690 			if (wc[i].status) {
691 				assert(req->task);
692 				SPDK_ERRLOG("WRITE: qp %p, state %d, req %p, task %p WC status %d\n", dev->qp->qp,
693 					    accel_mlx5_get_qp_state(dev->qp->qp), req, req->task, wc[i].status);
694 				if (!task->rc) {
695 					task->rc = -EIO;
696 				}
697 			}
698 
699 			task->num_completed_reqs++;
700 			assert(dev->reqs_submitted);
701 			dev->reqs_submitted--;
702 			SPDK_DEBUGLOG(accel_mlx5, "req %p, task %p, remaining %u\n", req, task,
703 				      task->num_reqs - task->num_completed_reqs);
704 			if (task->num_completed_reqs == task->num_reqs) {
705 				TAILQ_REMOVE(&dev->in_hw, task, link);
706 				accel_mlx5_task_complete(task);
707 			} else if (task->num_completed_reqs == task->num_submitted_reqs) {
708 				assert(task->num_submitted_reqs < task->num_reqs);
709 				TAILQ_REMOVE(&dev->in_hw, task, link);
710 				rc = accel_mlx5_task_continue(task);
711 				if (spdk_unlikely(rc)) {
712 					if (rc != -ENOMEM) {
713 						task->rc = rc;
714 						accel_mlx5_task_complete(task);
715 					}
716 				}
717 			}
718 			break;
719 		}
720 	}
721 
722 	return reaped;
723 }
724 
725 static inline void
726 accel_mlx5_resubmit_nomem_tasks(struct accel_mlx5_dev *dev)
727 {
728 	struct accel_mlx5_task *task, *tmp;
729 	int rc;
730 
731 	TAILQ_FOREACH_SAFE(task, &dev->nomem, link, tmp) {
732 		TAILQ_REMOVE(&dev->nomem, task, link);
733 		rc = accel_mlx5_task_continue(task);
734 		if (rc) {
735 			if (rc == -ENOMEM) {
736 				break;
737 			} else {
738 				task->rc = rc;
739 				accel_mlx5_task_complete(task);
740 			}
741 		}
742 	}
743 }
744 
745 static int
746 accel_mlx5_poller(void *ctx)
747 {
748 	struct accel_mlx5_io_channel *ch = ctx;
749 	struct accel_mlx5_dev *dev;
750 
751 	int64_t completions = 0, rc;
752 	uint32_t i;
753 
754 	for (i = 0; i < ch->num_devs; i++) {
755 		dev = &ch->devs[i];
756 		if (dev->reqs_submitted) {
757 			rc = accel_mlx5_poll_cq(dev);
758 			if (spdk_unlikely(rc < 0)) {
759 				SPDK_ERRLOG("Error %"PRId64" on CQ, dev %s\n", rc, dev->dev_ctx->context->device->name);
760 			}
761 			completions += rc;
762 			accel_mlx5_flush_wrs(dev);
763 		}
764 		if (!TAILQ_EMPTY(&dev->nomem)) {
765 			accel_mlx5_resubmit_nomem_tasks(dev);
766 		}
767 	}
768 
769 	return !!completions;
770 }
771 
772 static bool
773 accel_mlx5_supports_opcode(enum spdk_accel_opcode opc)
774 {
775 	assert(g_accel_mlx5.enabled);
776 
777 	switch (opc) {
778 	case SPDK_ACCEL_OPC_ENCRYPT:
779 	case SPDK_ACCEL_OPC_DECRYPT:
780 		return true;
781 	default:
782 		return false;
783 	}
784 }
785 
786 static struct spdk_io_channel *
787 accel_mlx5_get_io_channel(void)
788 {
789 	assert(g_accel_mlx5.enabled);
790 	return spdk_get_io_channel(&g_accel_mlx5);
791 }
792 
793 static void
794 accel_mlx5_qp_destroy(struct accel_mlx5_qp *qp)
795 {
796 	if (!qp) {
797 		return;
798 	}
799 
800 	if (qp->qp) {
801 		ibv_destroy_qp(qp->qp);
802 		qp->qp = NULL;
803 	}
804 
805 	free(qp);
806 }
807 
808 static struct accel_mlx5_qp *
809 accel_mlx5_qp_create(struct ibv_cq *cq, struct accel_mlx5_io_channel *ch, struct ibv_pd *pd,
810 		     int qp_size)
811 {
812 	struct accel_mlx5_qp *qp;
813 	struct ibv_qp_init_attr_ex dv_qp_attr = {
814 		.qp_context = ch,
815 		.cap = {
816 			.max_send_wr = qp_size,
817 			.max_recv_wr = 0,
818 			.max_send_sge = ACCEL_MLX5_MAX_SGE,
819 			.max_inline_data = sizeof(struct ibv_sge) * ACCEL_MLX5_MAX_SGE,
820 		},
821 		.qp_type = IBV_QPT_RC,
822 		.comp_mask = IBV_QP_INIT_ATTR_PD | IBV_QP_INIT_ATTR_SEND_OPS_FLAGS,
823 		.pd = pd,
824 		.send_ops_flags = IBV_QP_EX_WITH_RDMA_WRITE |  IBV_QP_EX_WITH_SEND | IBV_QP_EX_WITH_RDMA_READ | IBV_QP_EX_WITH_BIND_MW,
825 		.send_cq = cq,
826 		.recv_cq = cq,
827 	};
828 	/* Attrs required for MKEYs registration */
829 	struct mlx5dv_qp_init_attr mlx5_qp_attr = {
830 		.comp_mask = MLX5DV_QP_INIT_ATTR_MASK_SEND_OPS_FLAGS,
831 		.send_ops_flags = MLX5DV_QP_EX_WITH_MKEY_CONFIGURE
832 	};
833 	int rc;
834 
835 	if (!dv_qp_attr.send_cq || !dv_qp_attr.recv_cq) {
836 		return  NULL;
837 	}
838 
839 	qp = calloc(1, sizeof(*qp));
840 	if (!qp) {
841 		return NULL;
842 	}
843 
844 	qp->qp = mlx5dv_create_qp(cq->context, &dv_qp_attr, &mlx5_qp_attr);
845 	if (!qp->qp) {
846 		SPDK_ERRLOG("Failed to create qpair, errno %s (%d)\n", spdk_strerror(errno), errno);
847 		free(qp);
848 		return NULL;
849 	}
850 
851 	rc = mlx5_qp_init_2_rts(qp->qp, qp->qp->qp_num);
852 	if (rc) {
853 		SPDK_ERRLOG("Failed to create loopback connection, qp_num %u\n", qp->qp->qp_num);
854 		accel_mlx5_qp_destroy(qp);
855 		return NULL;
856 	}
857 
858 	qp->qpex = ibv_qp_to_qp_ex(qp->qp);
859 	if (!qp->qpex) {
860 		SPDK_ERRLOG("Failed to get qpex\n");
861 		accel_mlx5_qp_destroy(qp);
862 		return NULL;
863 	}
864 
865 	qp->mqpx = mlx5dv_qp_ex_from_ibv_qp_ex(qp->qpex);
866 	if (!qp->mqpx) {
867 		SPDK_ERRLOG("Failed to get mqpx\n");
868 		accel_mlx5_qp_destroy(qp);
869 		return NULL;
870 	}
871 
872 	qp->num_reqs = qp_size;
873 	qp->cq = cq;
874 
875 	return qp;
876 }
877 
878 static void
879 accel_mlx5_destroy_cb(void *io_device, void *ctx_buf)
880 {
881 	struct accel_mlx5_io_channel *ch = ctx_buf;
882 	struct accel_mlx5_dev *dev;
883 	uint32_t i;
884 
885 	spdk_poller_unregister(&ch->poller);
886 	for (i = 0; i < ch->num_devs; i++) {
887 		dev = &ch->devs[i];
888 		accel_mlx5_qp_destroy(dev->qp);
889 		if (dev->cq) {
890 			ibv_destroy_cq(dev->cq);
891 			dev->cq = NULL;
892 		}
893 		spdk_rdma_utils_free_mem_map(&dev->mmap);
894 	}
895 	free(ch->devs);
896 }
897 
898 static int
899 accel_mlx5_create_cb(void *io_device, void *ctx_buf)
900 {
901 	struct accel_mlx5_io_channel *ch = ctx_buf;
902 	struct accel_mlx5_crypto_dev_ctx *dev_ctx;
903 	struct accel_mlx5_dev *dev;
904 	uint32_t i;
905 	int rc;
906 
907 	ch->devs = calloc(g_accel_mlx5.num_crypto_ctxs, sizeof(*ch->devs));
908 	if (!ch->devs) {
909 		SPDK_ERRLOG("Memory allocation failed\n");
910 		return -ENOMEM;
911 	}
912 
913 	for (i = 0; i < g_accel_mlx5.num_crypto_ctxs; i++) {
914 		dev_ctx = &g_accel_mlx5.crypto_ctxs[i];
915 		dev = &ch->devs[i];
916 		dev->dev_ctx = dev_ctx;
917 		ch->num_devs++;
918 		dev->cq = ibv_create_cq(dev_ctx->context, g_accel_mlx5.attr.qp_size, ch, NULL, 0);
919 		if (!dev->cq) {
920 			SPDK_ERRLOG("Failed to create CQ on dev %s\n", dev_ctx->context->device->name);
921 			rc = -ENOMEM;
922 			goto err_out;
923 		}
924 
925 		dev->qp = accel_mlx5_qp_create(dev->cq, ch, dev_ctx->pd, g_accel_mlx5.attr.qp_size);
926 		if (!dev->qp) {
927 			SPDK_ERRLOG("Failed to create QP on dev %s\n", dev_ctx->context->device->name);
928 			rc = -ENOMEM;
929 			goto err_out;
930 		}
931 
932 		TAILQ_INIT(&dev->nomem);
933 		TAILQ_INIT(&dev->in_hw);
934 		TAILQ_INIT(&dev->before_submit);
935 		/* Each request consumes 2 WQE - MKEY and RDMA_WRITE. MKEY is unsignaled, so we count only RDMA_WRITE completions.
936 		 * Divide user defined qp_size by two for simplicity */
937 		dev->max_reqs = g_accel_mlx5.attr.qp_size / 2;
938 		dev->mmap = spdk_rdma_utils_create_mem_map(dev_ctx->pd, NULL,
939 				IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_READ | IBV_ACCESS_REMOTE_WRITE);
940 		if (!dev->mmap) {
941 			SPDK_ERRLOG("Failed to create memory map\n");
942 			rc = -ENOMEM;
943 			goto err_out;
944 		}
945 	}
946 
947 	ch->poller = SPDK_POLLER_REGISTER(accel_mlx5_poller, ch, 0);
948 
949 	return 0;
950 
951 err_out:
952 	accel_mlx5_destroy_cb(&g_accel_mlx5, ctx_buf);
953 	return rc;
954 }
955 
956 void
957 accel_mlx5_get_default_attr(struct accel_mlx5_attr *attr)
958 {
959 	assert(attr);
960 
961 	attr->qp_size = ACCEL_MLX5_QP_SIZE;
962 	attr->num_requests = ACCEL_MLX5_NUM_REQUESTS;
963 	attr->allowed_devs = NULL;
964 }
965 
966 static void
967 accel_mlx5_allowed_devs_free(void)
968 {
969 	size_t i;
970 
971 	if (!g_accel_mlx5.allowed_devs) {
972 		return;
973 	}
974 
975 	for (i = 0; i < g_accel_mlx5.allowed_devs_count; i++) {
976 		free(g_accel_mlx5.allowed_devs[i]);
977 	}
978 	free(g_accel_mlx5.attr.allowed_devs);
979 	free(g_accel_mlx5.allowed_devs);
980 	g_accel_mlx5.attr.allowed_devs = NULL;
981 	g_accel_mlx5.allowed_devs = NULL;
982 	g_accel_mlx5.allowed_devs_count = 0;
983 }
984 
985 static int
986 accel_mlx5_allowed_devs_parse(const char *allowed_devs)
987 {
988 	char *str, *tmp, *tok;
989 	size_t devs_count = 0;
990 
991 	str = strdup(allowed_devs);
992 	if (!str) {
993 		return -ENOMEM;
994 	}
995 
996 	accel_mlx5_allowed_devs_free();
997 
998 	tmp = str;
999 	while ((tmp = strchr(tmp, ',')) != NULL) {
1000 		tmp++;
1001 		devs_count++;
1002 	}
1003 	devs_count++;
1004 
1005 	g_accel_mlx5.allowed_devs = calloc(devs_count, sizeof(char *));
1006 	if (!g_accel_mlx5.allowed_devs) {
1007 		free(str);
1008 		return -ENOMEM;
1009 	}
1010 
1011 	devs_count = 0;
1012 	tok = strtok(str, ",");
1013 	while (tok) {
1014 		g_accel_mlx5.allowed_devs[devs_count] = strdup(tok);
1015 		if (!g_accel_mlx5.allowed_devs[devs_count]) {
1016 			free(str);
1017 			accel_mlx5_allowed_devs_free();
1018 			return -ENOMEM;
1019 		}
1020 		tok = strtok(NULL, ",");
1021 		devs_count++;
1022 		g_accel_mlx5.allowed_devs_count++;
1023 	}
1024 
1025 	free(str);
1026 
1027 	return 0;
1028 }
1029 
1030 int
1031 accel_mlx5_enable(struct accel_mlx5_attr *attr)
1032 {
1033 	int rc;
1034 
1035 	if (g_accel_mlx5.enabled) {
1036 		return -EEXIST;
1037 	}
1038 	if (attr) {
1039 		g_accel_mlx5.attr = *attr;
1040 		g_accel_mlx5.attr.allowed_devs = NULL;
1041 
1042 		if (attr->allowed_devs) {
1043 			/* Contains a copy of user's string */
1044 			g_accel_mlx5.attr.allowed_devs = strndup(attr->allowed_devs, ACCEL_MLX5_ALLOWED_DEVS_MAX_LEN);
1045 			if (!g_accel_mlx5.attr.allowed_devs) {
1046 				return -ENOMEM;
1047 			}
1048 			rc = accel_mlx5_allowed_devs_parse(g_accel_mlx5.attr.allowed_devs);
1049 			if (rc) {
1050 				return rc;
1051 			}
1052 			rc = spdk_mlx5_crypto_devs_allow((const char *const *)g_accel_mlx5.allowed_devs,
1053 							 g_accel_mlx5.allowed_devs_count);
1054 			if (rc) {
1055 				accel_mlx5_allowed_devs_free();
1056 				return rc;
1057 			}
1058 		}
1059 	} else {
1060 		accel_mlx5_get_default_attr(&g_accel_mlx5.attr);
1061 	}
1062 
1063 	g_accel_mlx5.enabled = true;
1064 	spdk_accel_module_list_add(&g_accel_mlx5.module);
1065 
1066 	return 0;
1067 }
1068 
1069 static void
1070 accel_mlx5_release_crypto_req(struct spdk_mempool *mp, void *cb_arg, void *_req, unsigned obj_idx)
1071 {
1072 	struct accel_mlx5_req *req = _req;
1073 
1074 	if (req->mkey) {
1075 		mlx5dv_destroy_mkey(req->mkey);
1076 	}
1077 }
1078 
1079 static void
1080 accel_mlx5_release_reqs(struct accel_mlx5_crypto_dev_ctx *dev_ctx)
1081 {
1082 	if (!dev_ctx->requests_pool) {
1083 		return;
1084 	}
1085 
1086 	spdk_mempool_obj_iter(dev_ctx->requests_pool, accel_mlx5_release_crypto_req, NULL);
1087 }
1088 
1089 static void
1090 accel_mlx5_free_resources(void)
1091 {
1092 	uint32_t i;
1093 
1094 	for (i = 0; i < g_accel_mlx5.num_crypto_ctxs; i++) {
1095 		accel_mlx5_release_reqs(&g_accel_mlx5.crypto_ctxs[i]);
1096 		spdk_rdma_utils_put_pd(g_accel_mlx5.crypto_ctxs[i].pd);
1097 		spdk_rdma_utils_put_memory_domain(g_accel_mlx5.crypto_ctxs[i].domain);
1098 	}
1099 
1100 	free(g_accel_mlx5.crypto_ctxs);
1101 	g_accel_mlx5.crypto_ctxs = NULL;
1102 }
1103 
1104 static void
1105 accel_mlx5_deinit_cb(void *ctx)
1106 {
1107 	accel_mlx5_free_resources();
1108 	spdk_accel_module_finish();
1109 }
1110 
1111 static void
1112 accel_mlx5_deinit(void *ctx)
1113 {
1114 	if (g_accel_mlx5.allowed_devs) {
1115 		accel_mlx5_allowed_devs_free();
1116 	}
1117 	spdk_mlx5_crypto_devs_allow(NULL, 0);
1118 	if (g_accel_mlx5.crypto_ctxs) {
1119 		spdk_io_device_unregister(&g_accel_mlx5, accel_mlx5_deinit_cb);
1120 	} else {
1121 		spdk_accel_module_finish();
1122 	}
1123 }
1124 
1125 static void
1126 accel_mlx5_configure_crypto_req(struct spdk_mempool *mp, void *cb_arg, void *_req, unsigned obj_idx)
1127 {
1128 	struct accel_mlx5_req *req = _req;
1129 	struct accel_mlx5_req_init_ctx *ctx = cb_arg;
1130 	struct mlx5dv_mkey_init_attr mkey_attr = {
1131 		.pd = ctx->pd,
1132 		.max_entries = ACCEL_MLX5_MAX_SGE, /* This MKEY refers to N base MKEYs/buffers */
1133 		.create_flags = MLX5DV_MKEY_INIT_ATTR_FLAGS_INDIRECT | /* This MKEY refers to another MKEYs */
1134 		MLX5DV_MKEY_INIT_ATTR_FLAGS_CRYPTO
1135 	};
1136 
1137 	memset(req, 0, sizeof(*req));
1138 	if (ctx->rc) {
1139 		return;
1140 	}
1141 
1142 	req->mkey = mlx5dv_create_mkey(&mkey_attr);
1143 	if (!req->mkey) {
1144 		SPDK_ERRLOG("Failed to create mkey on dev %s, errno %d\n", ctx->pd->context->device->name, errno);
1145 		ctx->rc = errno;
1146 		return;
1147 	}
1148 
1149 	req->mkey_wrid.wrid = ACCEL_MLX5_WRID_MKEY;
1150 	req->write_wrid.wrid = ACCEL_MLX5_WRID_WRITE;
1151 }
1152 
1153 static int
1154 accel_mlx5_crypto_ctx_mempool_create(struct accel_mlx5_crypto_dev_ctx *crypto_dev_ctx,
1155 				     size_t num_entries)
1156 {
1157 	struct accel_mlx5_req_init_ctx init_ctx = {.pd = crypto_dev_ctx->pd };
1158 	char pool_name[32];
1159 	int rc;
1160 
1161 	/* Compiler may produce a warning like
1162 	 * warning: ā€˜%s’ directive output may be truncated writing up to 63 bytes into a region of size 21
1163 	 * [-Wformat-truncation=]
1164 	 * That is expected and that is due to ibv device name is 64 bytes while DPDK mempool API allows
1165 	 * name to be max 32 bytes.
1166 	 * To suppress this warning check the value returned by snprintf */
1167 	rc = snprintf(pool_name, 32, "accel_mlx5_%s", crypto_dev_ctx->context->device->name);
1168 	if (rc < 0) {
1169 		assert(0);
1170 		return -EINVAL;
1171 	}
1172 	crypto_dev_ctx->requests_pool = spdk_mempool_create_ctor(pool_name, num_entries,
1173 					sizeof(struct accel_mlx5_req),
1174 					SPDK_MEMPOOL_DEFAULT_CACHE_SIZE, SPDK_ENV_SOCKET_ID_ANY,
1175 					accel_mlx5_configure_crypto_req, &init_ctx);
1176 	if (!crypto_dev_ctx->requests_pool || init_ctx.rc) {
1177 		SPDK_ERRLOG("Failed to create memory pool\n");
1178 		return init_ctx.rc ? : -ENOMEM;
1179 	}
1180 
1181 	return 0;
1182 }
1183 
1184 static int
1185 accel_mlx5_init(void)
1186 {
1187 	struct accel_mlx5_crypto_dev_ctx *crypto_dev_ctx;
1188 	struct ibv_context **rdma_devs, *dev;
1189 	struct ibv_pd *pd;
1190 	int num_devs = 0, rc = 0, i;
1191 
1192 	if (!g_accel_mlx5.enabled) {
1193 		return -EINVAL;
1194 	}
1195 
1196 	rdma_devs = spdk_mlx5_crypto_devs_get(&num_devs);
1197 	if (!rdma_devs || !num_devs) {
1198 		return -ENODEV;
1199 	}
1200 
1201 	g_accel_mlx5.crypto_ctxs = calloc(num_devs, sizeof(*g_accel_mlx5.crypto_ctxs));
1202 	if (!g_accel_mlx5.crypto_ctxs) {
1203 		SPDK_ERRLOG("Memory allocation failed\n");
1204 		rc = -ENOMEM;
1205 		goto cleanup;
1206 	}
1207 
1208 	for (i = 0; i < num_devs; i++) {
1209 		crypto_dev_ctx = &g_accel_mlx5.crypto_ctxs[i];
1210 		dev = rdma_devs[i];
1211 		pd = spdk_rdma_utils_get_pd(dev);
1212 		if (!pd) {
1213 			SPDK_ERRLOG("Failed to get PD for context %p, dev %s\n", dev, dev->device->name);
1214 			rc = -EINVAL;
1215 			goto cleanup;
1216 		}
1217 		crypto_dev_ctx->context = dev;
1218 		crypto_dev_ctx->pd = pd;
1219 		crypto_dev_ctx->domain = spdk_rdma_utils_get_memory_domain(crypto_dev_ctx->pd);
1220 		if (!crypto_dev_ctx->domain) {
1221 			SPDK_ERRLOG("Failed to get memory domain\n");
1222 			rc = -ENOMEM;
1223 			goto cleanup;
1224 		}
1225 
1226 		g_accel_mlx5.num_crypto_ctxs++;
1227 		rc = accel_mlx5_crypto_ctx_mempool_create(crypto_dev_ctx, g_accel_mlx5.attr.num_requests);
1228 		if (rc) {
1229 			goto cleanup;
1230 		}
1231 	}
1232 
1233 	SPDK_NOTICELOG("Accel framework mlx5 initialized, found %d devices.\n", num_devs);
1234 	spdk_io_device_register(&g_accel_mlx5, accel_mlx5_create_cb, accel_mlx5_destroy_cb,
1235 				sizeof(struct accel_mlx5_io_channel), "accel_mlx5");
1236 
1237 	spdk_mlx5_crypto_devs_release(rdma_devs);
1238 
1239 	return rc;
1240 
1241 cleanup:
1242 	spdk_mlx5_crypto_devs_release(rdma_devs);
1243 	accel_mlx5_free_resources();
1244 
1245 	return rc;
1246 }
1247 
1248 static void
1249 accel_mlx5_write_config_json(struct spdk_json_write_ctx *w)
1250 {
1251 	if (g_accel_mlx5.enabled) {
1252 		spdk_json_write_object_begin(w);
1253 		spdk_json_write_named_string(w, "method", "mlx5_scan_accel_module");
1254 		spdk_json_write_named_object_begin(w, "params");
1255 		spdk_json_write_named_uint16(w, "qp_size", g_accel_mlx5.attr.qp_size);
1256 		spdk_json_write_named_uint32(w, "num_requests", g_accel_mlx5.attr.num_requests);
1257 		if (g_accel_mlx5.attr.allowed_devs) {
1258 			spdk_json_write_named_string(w, "allowed_devs", g_accel_mlx5.attr.allowed_devs);
1259 		}
1260 		spdk_json_write_object_end(w);
1261 		spdk_json_write_object_end(w);
1262 	}
1263 }
1264 
1265 static size_t
1266 accel_mlx5_get_ctx_size(void)
1267 {
1268 	return sizeof(struct accel_mlx5_task);
1269 }
1270 
1271 static int
1272 accel_mlx5_crypto_key_init(struct spdk_accel_crypto_key *key)
1273 {
1274 	struct spdk_mlx5_crypto_dek_create_attr attr = {};
1275 	struct spdk_mlx5_crypto_keytag *keytag;
1276 	int rc;
1277 
1278 	if (!key || !key->key || !key->key2 || !key->key_size || !key->key2_size) {
1279 		return -EINVAL;
1280 	}
1281 
1282 	attr.dek = calloc(1, key->key_size + key->key2_size);
1283 	if (!attr.dek) {
1284 		return -ENOMEM;
1285 	}
1286 
1287 	memcpy(attr.dek, key->key, key->key_size);
1288 	memcpy(attr.dek + key->key_size, key->key2, key->key2_size);
1289 	attr.dek_len = key->key_size + key->key2_size;
1290 
1291 	rc = spdk_mlx5_crypto_keytag_create(&attr, &keytag);
1292 	spdk_memset_s(attr.dek, attr.dek_len, 0, attr.dek_len);
1293 	free(attr.dek);
1294 	if (rc) {
1295 		SPDK_ERRLOG("Failed to create a keytag, rc %d\n", rc);
1296 		return rc;
1297 	}
1298 
1299 	key->priv = keytag;
1300 
1301 	return 0;
1302 }
1303 
1304 static void
1305 accel_mlx5_crypto_key_deinit(struct spdk_accel_crypto_key *key)
1306 {
1307 	if (!key || key->module_if != &g_accel_mlx5.module || !key->priv) {
1308 		return;
1309 	}
1310 
1311 	spdk_mlx5_crypto_keytag_destroy(key->priv);
1312 }
1313 
1314 static bool
1315 accel_mlx5_crypto_supports_cipher(enum spdk_accel_cipher cipher, size_t key_size)
1316 {
1317 	switch (cipher) {
1318 	case SPDK_ACCEL_CIPHER_AES_XTS:
1319 		return key_size == SPDK_ACCEL_AES_XTS_128_KEY_SIZE || key_size == SPDK_ACCEL_AES_XTS_256_KEY_SIZE;
1320 	default:
1321 		return false;
1322 	}
1323 }
1324 
1325 static int
1326 accel_mlx5_get_memory_domains(struct spdk_memory_domain **domains, int array_size)
1327 {
1328 	int i, size;
1329 
1330 	if (!domains || !array_size) {
1331 		return (int)g_accel_mlx5.num_crypto_ctxs;
1332 	}
1333 
1334 	size = spdk_min(array_size, (int)g_accel_mlx5.num_crypto_ctxs);
1335 
1336 	for (i = 0; i < size; i++) {
1337 		domains[i] = g_accel_mlx5.crypto_ctxs[i].domain;
1338 	}
1339 
1340 	return (int)g_accel_mlx5.num_crypto_ctxs;
1341 }
1342 
1343 static struct accel_mlx5_module g_accel_mlx5 = {
1344 	.module = {
1345 		.module_init		= accel_mlx5_init,
1346 		.module_fini		= accel_mlx5_deinit,
1347 		.write_config_json	= accel_mlx5_write_config_json,
1348 		.get_ctx_size		= accel_mlx5_get_ctx_size,
1349 		.name			= "mlx5",
1350 		.supports_opcode	= accel_mlx5_supports_opcode,
1351 		.get_io_channel		= accel_mlx5_get_io_channel,
1352 		.submit_tasks		= accel_mlx5_submit_tasks,
1353 		.crypto_key_init	= accel_mlx5_crypto_key_init,
1354 		.crypto_key_deinit	= accel_mlx5_crypto_key_deinit,
1355 		.crypto_supports_cipher	= accel_mlx5_crypto_supports_cipher,
1356 		.get_memory_domains	= accel_mlx5_get_memory_domains,
1357 	}
1358 };
1359 
1360 SPDK_LOG_REGISTER_COMPONENT(accel_mlx5)
1361