xref: /spdk/module/accel/mlx5/accel_mlx5.c (revision be633ff15f6aa19cb8347cc3f133f74dcf5ed629)
1 /*   SPDX-License-Identifier: BSD-3-Clause
2  *   Copyright (c) 2022-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3  */
4 
5 #include "spdk/env.h"
6 #include "spdk/thread.h"
7 #include "spdk/queue.h"
8 #include "spdk/log.h"
9 #include "spdk/string.h"
10 #include "spdk/likely.h"
11 #include "spdk/dma.h"
12 #include "spdk/json.h"
13 #include "spdk/util.h"
14 
15 #include "spdk_internal/mlx5.h"
16 #include "spdk_internal/rdma_utils.h"
17 #include "spdk/accel_module.h"
18 #include "spdk_internal/assert.h"
19 #include "spdk_internal/sgl.h"
20 #include "accel_mlx5.h"
21 
22 #include <infiniband/mlx5dv.h>
23 #include <rdma/rdma_cma.h>
24 
25 #define ACCEL_MLX5_QP_SIZE (256u)
26 #define ACCEL_MLX5_NUM_REQUESTS (2048u - 1)
27 
28 #define ACCEL_MLX5_MAX_SGE (16u)
29 #define ACCEL_MLX5_MAX_WC (64u)
30 #define ACCEL_MLX5_ALLOC_REQS_IN_BATCH (16u)
31 
32 struct accel_mlx5_io_channel;
33 struct accel_mlx5_task;
34 
35 struct accel_mlx5_crypto_dev_ctx {
36 	struct spdk_mempool *requests_pool;
37 	struct ibv_context *context;
38 	struct ibv_pd *pd;
39 	struct spdk_memory_domain *domain;
40 	TAILQ_ENTRY(accel_mlx5_crypto_dev_ctx) link;
41 };
42 
43 struct accel_mlx5_module {
44 	struct spdk_accel_module_if module;
45 	struct accel_mlx5_crypto_dev_ctx *crypto_ctxs;
46 	uint32_t num_crypto_ctxs;
47 	struct accel_mlx5_attr attr;
48 	bool enabled;
49 };
50 
51 enum accel_mlx5_wrid_type {
52 	ACCEL_MLX5_WRID_MKEY,
53 	ACCEL_MLX5_WRID_WRITE,
54 };
55 
56 struct accel_mlx5_wrid {
57 	uint8_t wrid;
58 };
59 
60 struct accel_mlx5_req {
61 	struct accel_mlx5_task *task;
62 	struct mlx5dv_mkey *mkey;
63 	struct ibv_sge src_sg[ACCEL_MLX5_MAX_SGE];
64 	struct ibv_sge dst_sg[ACCEL_MLX5_MAX_SGE];
65 	uint16_t src_sg_count;
66 	uint16_t dst_sg_count;
67 	struct accel_mlx5_wrid mkey_wrid;
68 	struct accel_mlx5_wrid write_wrid;
69 	TAILQ_ENTRY(accel_mlx5_req) link;
70 };
71 
72 struct accel_mlx5_task {
73 	struct spdk_accel_task base;
74 	struct accel_mlx5_dev *dev;
75 	TAILQ_HEAD(, accel_mlx5_req) reqs;
76 	uint32_t num_reqs;
77 	uint32_t num_completed_reqs;
78 	uint32_t num_submitted_reqs;
79 	int rc;
80 	struct spdk_iov_sgl src;
81 	struct spdk_iov_sgl dst;
82 	struct accel_mlx5_req *cur_req;
83 	/* If set, memory data will be encrypted during TX and wire data will be
84 	  decrypted during RX.
85 	  If not set, memory data will be decrypted during TX and wire data will
86 	  be encrypted during RX. */
87 	bool encrypt_on_tx;
88 	bool inplace;
89 	TAILQ_ENTRY(accel_mlx5_task) link;
90 };
91 
92 struct accel_mlx5_qp {
93 	struct ibv_qp *qp;
94 	struct ibv_qp_ex *qpex;
95 	struct mlx5dv_qp_ex *mqpx; /* more qpairs to the god of qpairs */
96 	struct ibv_cq *cq;
97 	struct accel_mlx5_io_channel *ch;
98 	bool wr_started;
99 	uint16_t num_reqs;
100 	uint16_t num_free_reqs;
101 };
102 
103 struct accel_mlx5_dev {
104 	struct accel_mlx5_qp *qp;
105 	struct ibv_cq *cq;
106 	struct spdk_rdma_utils_mem_map *mmap;
107 	struct accel_mlx5_crypto_dev_ctx *dev_ctx;
108 	uint32_t reqs_submitted;
109 	uint32_t max_reqs;
110 	/* Pending tasks waiting for requests resources */
111 	TAILQ_HEAD(, accel_mlx5_task) nomem;
112 	/* tasks submitted to HW. We can't complete a task even in error case until we reap completions for all
113 	 * submitted requests */
114 	TAILQ_HEAD(, accel_mlx5_task) in_hw;
115 	/* tasks between wr_start and wr_complete */
116 	TAILQ_HEAD(, accel_mlx5_task) before_submit;
117 	TAILQ_ENTRY(accel_mlx5_dev) link;
118 };
119 
120 struct accel_mlx5_io_channel {
121 	struct accel_mlx5_dev *devs;
122 	struct spdk_poller *poller;
123 	uint32_t num_devs;
124 	/* Index in \b devs to be used for crypto in round-robin way */
125 	uint32_t dev_idx;
126 };
127 
128 struct accel_mlx5_req_init_ctx {
129 	struct ibv_pd *pd;
130 	int rc;
131 };
132 
133 static struct accel_mlx5_module g_accel_mlx5;
134 
135 static int
136 mlx5_qp_init_2_rts(struct ibv_qp *qp, uint32_t dest_qp_num)
137 {
138 	struct ibv_qp_attr cur_attr = {}, attr = {};
139 	struct ibv_qp_init_attr init_attr = {};
140 	struct ibv_port_attr port_attr = {};
141 	union ibv_gid gid = {};
142 	int rc;
143 	uint8_t port;
144 	int attr_mask = IBV_QP_PKEY_INDEX |
145 			IBV_QP_PORT |
146 			IBV_QP_ACCESS_FLAGS |
147 			IBV_QP_PATH_MTU |
148 			IBV_QP_AV |
149 			IBV_QP_DEST_QPN |
150 			IBV_QP_RQ_PSN |
151 			IBV_QP_MAX_DEST_RD_ATOMIC |
152 			IBV_QP_MIN_RNR_TIMER |
153 			IBV_QP_TIMEOUT |
154 			IBV_QP_RETRY_CNT |
155 			IBV_QP_RNR_RETRY |
156 			IBV_QP_SQ_PSN |
157 			IBV_QP_MAX_QP_RD_ATOMIC;
158 
159 	if (!qp) {
160 		return -EINVAL;
161 	}
162 
163 	rc = ibv_query_qp(qp, &cur_attr, attr_mask, &init_attr);
164 	if (rc) {
165 		SPDK_ERRLOG("Failed to query qp %p %u\n", qp, qp->qp_num);
166 		return rc;
167 	}
168 
169 	port = cur_attr.port_num;
170 	rc = ibv_query_port(qp->context, port, &port_attr);
171 	if (rc) {
172 		SPDK_ERRLOG("Failed to query port num %d\n", port);
173 		return rc;
174 	}
175 
176 	if (port_attr.state != IBV_PORT_ARMED && port_attr.state != IBV_PORT_ACTIVE) {
177 		SPDK_ERRLOG("Wrong port %d state %d\n", port, port_attr.state);
178 		return -ENETUNREACH;
179 	}
180 
181 	rc = ibv_query_gid(qp->context, port, 0, &gid);
182 	if (rc) {
183 		SPDK_ERRLOG("Failed to get GID on port %d, rc %d\n", port, rc);
184 		return rc;
185 	}
186 
187 	attr.qp_state = IBV_QPS_INIT;
188 	attr.pkey_index = cur_attr.pkey_index;
189 	attr.port_num = cur_attr.port_num;
190 	attr.qp_access_flags = IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_READ | IBV_ACCESS_REMOTE_WRITE;
191 	attr_mask = IBV_QP_STATE | IBV_QP_PKEY_INDEX | IBV_QP_PORT | IBV_QP_ACCESS_FLAGS;
192 
193 	rc = ibv_modify_qp(qp, &attr, attr_mask);
194 	if (rc) {
195 		SPDK_ERRLOG("Failed to modify qp %p %u to INIT state, rc %d\n", qp, qp->qp_num, rc);
196 		return rc;
197 	}
198 
199 	attr.qp_state = IBV_QPS_RTR;
200 	attr.path_mtu = cur_attr.path_mtu;
201 	/* dest_qp_num == qp_num - self loopback connection */
202 	attr.dest_qp_num = dest_qp_num;
203 	attr.rq_psn = cur_attr.rq_psn;
204 	attr.max_dest_rd_atomic = cur_attr.max_dest_rd_atomic;
205 	attr.min_rnr_timer = cur_attr.min_rnr_timer;
206 	attr.ah_attr = cur_attr.ah_attr;
207 	attr.ah_attr.dlid = port_attr.lid;
208 	attr.ah_attr.sl = 0;
209 	attr.ah_attr.src_path_bits = 0;
210 
211 	if (port_attr.link_layer == IBV_LINK_LAYER_ETHERNET) {
212 		/* Ethernet requires to set GRH */
213 		attr.ah_attr.is_global = 1;
214 		attr.ah_attr.grh.hop_limit = 1;
215 		attr.ah_attr.grh.dgid = gid;
216 	} else {
217 		attr.ah_attr.is_global = 0;
218 	}
219 
220 	assert(attr.ah_attr.port_num == port);
221 
222 	attr_mask = IBV_QP_STATE | IBV_QP_PATH_MTU | IBV_QP_DEST_QPN | IBV_QP_RQ_PSN |
223 		    IBV_QP_MAX_DEST_RD_ATOMIC | IBV_QP_MIN_RNR_TIMER | IBV_QP_AV;
224 
225 	rc = ibv_modify_qp(qp, &attr, attr_mask);
226 	if (rc) {
227 		SPDK_ERRLOG("Failed to modify qp %p %u to RTR state, rc %d\n", qp, qp->qp_num, rc);
228 		return rc;
229 	}
230 
231 	memset(&attr, 0, sizeof(attr));
232 	attr.qp_state = IBV_QPS_RTS;
233 	attr.timeout = cur_attr.timeout;
234 	attr.retry_cnt = cur_attr.retry_cnt;
235 	attr.sq_psn = cur_attr.sq_psn;
236 	attr.rnr_retry = cur_attr.rnr_retry;
237 	attr.max_rd_atomic = cur_attr.max_rd_atomic;
238 	attr_mask = IBV_QP_STATE | IBV_QP_TIMEOUT | IBV_QP_RETRY_CNT | IBV_QP_SQ_PSN | IBV_QP_RNR_RETRY |
239 		    IBV_QP_MAX_QP_RD_ATOMIC;
240 
241 	rc = ibv_modify_qp(qp, &attr, attr_mask);
242 	if (rc) {
243 		SPDK_ERRLOG("Failed to modify qp %p %u to RTS state, rc %d\n", qp, qp->qp_num, rc);
244 		return rc;
245 	}
246 
247 	return 0;
248 }
249 
250 static inline enum ibv_qp_state
251 accel_mlx5_get_qp_state(struct ibv_qp *qp) {
252 	struct ibv_qp_attr qp_attr;
253 	struct ibv_qp_init_attr init_attr;
254 
255 	ibv_query_qp(qp, &qp_attr, IBV_QP_STATE, &init_attr);
256 
257 	return qp_attr.qp_state;
258 }
259 
260 static inline void
261 accel_mlx5_task_complete(struct accel_mlx5_task *task)
262 {
263 	struct accel_mlx5_req *req;
264 
265 	assert(task->num_reqs == task->num_completed_reqs);
266 	SPDK_DEBUGLOG(accel_mlx5, "Complete task %p, opc %d\n", task, task->base.op_code);
267 
268 	TAILQ_FOREACH(req, &task->reqs, link) {
269 		spdk_mempool_put(task->dev->dev_ctx->requests_pool, req);
270 	}
271 	spdk_accel_task_complete(&task->base, task->rc);
272 }
273 
274 static inline int
275 accel_mlx5_flush_wrs(struct accel_mlx5_dev *dev)
276 {
277 	struct accel_mlx5_task *task;
278 	struct accel_mlx5_qp *qp = dev->qp;
279 	int rc;
280 
281 	if (spdk_unlikely(!qp->wr_started)) {
282 		return 0;
283 	}
284 
285 	SPDK_DEBUGLOG(accel_mlx5, "Completing WRs on dev %s\n", dev->dev_ctx->context->device->name);
286 	rc = ibv_wr_complete(qp->qpex);
287 	if (spdk_unlikely(rc)) {
288 		SPDK_ERRLOG("ibv_wr_complete rc %d\n", rc);
289 		/* Complete all affected requests */
290 		TAILQ_FOREACH(task, &dev->before_submit, link) {
291 			task->rc = rc;
292 			accel_mlx5_task_complete(task);
293 		}
294 		TAILQ_INIT(&dev->before_submit);
295 	} else {
296 		TAILQ_CONCAT(&dev->in_hw, &dev->before_submit, link);
297 	}
298 
299 	qp->wr_started = false;
300 
301 	return rc;
302 }
303 
304 static int
305 accel_mlx5_translate_addr(void *addr, size_t size, struct spdk_memory_domain *domain,
306 			  void *domain_ctx, struct accel_mlx5_dev *dev, struct ibv_sge *sge)
307 {
308 	struct spdk_rdma_utils_memory_translation map_translation;
309 	struct spdk_memory_domain_translation_result domain_translation;
310 	struct spdk_memory_domain_translation_ctx local_ctx;
311 	int rc;
312 
313 	if (domain) {
314 		domain_translation.size = sizeof(struct spdk_memory_domain_translation_result);
315 		local_ctx.size = sizeof(local_ctx);
316 		local_ctx.rdma.ibv_qp = dev->qp->qp;
317 		rc = spdk_memory_domain_translate_data(domain, domain_ctx, dev->dev_ctx->domain,
318 						       &local_ctx, addr, size, &domain_translation);
319 		if (spdk_unlikely(rc || domain_translation.iov_count != 1)) {
320 			SPDK_ERRLOG("Memory domain translation failed, addr %p, length %zu, iovcnt %u\n", addr, size,
321 				    domain_translation.iov_count);
322 			if (rc == 0) {
323 				rc = -EINVAL;
324 			}
325 
326 			return rc;
327 		}
328 		sge->lkey = domain_translation.rdma.lkey;
329 		sge->addr = (uint64_t) domain_translation.iov.iov_base;
330 		sge->length = domain_translation.iov.iov_len;
331 	} else {
332 		rc = spdk_rdma_utils_get_translation(dev->mmap, addr, size,
333 						     &map_translation);
334 		if (spdk_unlikely(rc)) {
335 			SPDK_ERRLOG("Memory translation failed, addr %p, length %zu\n", addr, size);
336 			return rc;
337 		}
338 		sge->lkey = spdk_rdma_utils_memory_translation_get_lkey(&map_translation);
339 		sge->addr = (uint64_t)addr;
340 		sge->length = size;
341 	}
342 
343 	return 0;
344 }
345 
346 static inline int
347 accel_mlx5_fill_block_sge(struct accel_mlx5_dev *dev, struct accel_mlx5_req *req,
348 			  struct ibv_sge *sge,
349 			  struct spdk_iov_sgl *iovs, struct spdk_memory_domain *domain, void *domain_ctx)
350 {
351 	void *addr;
352 	uint32_t remaining = req->task->base.block_size;
353 	uint32_t size;
354 	int i = 0;
355 	int rc;
356 
357 	while (remaining && i < (int)ACCEL_MLX5_MAX_SGE) {
358 		size = spdk_min(remaining, iovs->iov->iov_len - iovs->iov_offset);
359 		addr = (void *)iovs->iov->iov_base + iovs->iov_offset;
360 		rc = accel_mlx5_translate_addr(addr, size, domain, domain_ctx, dev, &sge[i]);
361 		if (spdk_unlikely(rc)) {
362 			return rc;
363 		}
364 		spdk_iov_sgl_advance(iovs, size);
365 		i++;
366 		assert(remaining >= size);
367 		remaining -= size;
368 	}
369 	assert(remaining == 0);
370 
371 	return i;
372 }
373 
374 static inline bool
375 accel_mlx5_compare_iovs(struct iovec *v1, struct iovec *v2, uint32_t iovcnt)
376 {
377 	uint32_t i;
378 
379 	for (i = 0; i < iovcnt; i++) {
380 		if (v1[i].iov_base != v2[i].iov_base || v1[i].iov_len != v2[i].iov_len) {
381 			return false;
382 		}
383 	}
384 
385 	return true;
386 }
387 
388 static inline uint32_t
389 accel_mlx5_task_alloc_reqs(struct accel_mlx5_task *task)
390 {
391 	struct accel_mlx5_req *reqs_tmp[ACCEL_MLX5_ALLOC_REQS_IN_BATCH], *req;
392 	uint32_t i, num_reqs, allocated_reqs = 0;
393 	uint32_t remaining_reqs = task->num_reqs - task->num_completed_reqs;
394 	uint32_t qp_slot = task->dev->max_reqs - task->dev->reqs_submitted;
395 	int rc;
396 
397 	assert(task->num_reqs >= task->num_completed_reqs);
398 	remaining_reqs = spdk_min(remaining_reqs, qp_slot);
399 
400 	while (remaining_reqs) {
401 		num_reqs = spdk_min(ACCEL_MLX5_ALLOC_REQS_IN_BATCH, remaining_reqs);
402 		rc = spdk_mempool_get_bulk(task->dev->dev_ctx->requests_pool, (void **)reqs_tmp, num_reqs);
403 		if (spdk_unlikely(rc)) {
404 			return allocated_reqs;
405 		}
406 		for (i = 0; i < num_reqs; i++) {
407 			req = reqs_tmp[i];
408 			req->src_sg_count = 0;
409 			req->dst_sg_count = 0;
410 			req->task = task;
411 			TAILQ_INSERT_TAIL(&task->reqs, req, link);
412 		}
413 		allocated_reqs += num_reqs;
414 		remaining_reqs -= num_reqs;
415 	}
416 
417 	return allocated_reqs;
418 }
419 
420 static inline int
421 accel_mlx5_task_process(struct accel_mlx5_task *mlx5_task)
422 {
423 	struct spdk_accel_task *task = &mlx5_task->base;
424 	struct accel_mlx5_dev *dev = mlx5_task->dev;
425 	struct accel_mlx5_qp *qp = dev->qp;
426 	struct ibv_qp_ex *qpx = qp->qpex;
427 	struct mlx5dv_qp_ex *mqpx = qp->mqpx;
428 	struct mlx5dv_mkey_conf_attr mkey_attr = {};
429 	struct mlx5dv_crypto_attr cattr;
430 	struct accel_mlx5_req *req;
431 	uint64_t iv;
432 	uint32_t num_setters = 3; /* access flags, layout, crypto */
433 	int rc;
434 
435 	iv = task->iv + mlx5_task->num_completed_reqs;
436 
437 	if (!qp->wr_started) {
438 		ibv_wr_start(qpx);
439 		qp->wr_started = true;
440 	}
441 
442 	SPDK_DEBUGLOG(accel_mlx5, "begin, task, %p, reqs: total %u, submitted %u, completed %u\n",
443 		      mlx5_task, mlx5_task->num_reqs, mlx5_task->num_submitted_reqs, mlx5_task->num_completed_reqs);
444 
445 	while (mlx5_task->cur_req && dev->reqs_submitted < dev->max_reqs) {
446 		req = mlx5_task->cur_req;
447 		rc = accel_mlx5_fill_block_sge(dev, req, req->src_sg, &mlx5_task->src, task->src_domain,
448 					       task->src_domain_ctx);
449 		if (spdk_unlikely(rc <= 0)) {
450 			if (rc == 0) {
451 				rc = -EINVAL;
452 			}
453 			SPDK_ERRLOG("failed set src sge, rc %d\n", rc);
454 			goto err_out;
455 		}
456 		req->src_sg_count = rc;
457 
458 		/* prepare memory key - destination for WRITE operation */
459 		qpx->wr_flags = IBV_SEND_INLINE;
460 		qpx->wr_id = (uint64_t)&req->mkey_wrid;
461 		mlx5dv_wr_mkey_configure(mqpx, req->mkey, num_setters, &mkey_attr);
462 		mlx5dv_wr_set_mkey_access_flags(mqpx,
463 						IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_REMOTE_READ);
464 		if (mlx5_task->inplace) {
465 			mlx5dv_wr_set_mkey_layout_list(mqpx, req->src_sg_count, req->src_sg);
466 		} else {
467 			rc = accel_mlx5_fill_block_sge(dev, req, req->dst_sg, &mlx5_task->dst, task->dst_domain,
468 						       task->dst_domain_ctx);
469 			if (spdk_unlikely(rc <= 0)) {
470 				if (rc == 0) {
471 					rc = -EINVAL;
472 				}
473 				SPDK_ERRLOG("failed set dst sge, rc %d\n", rc);
474 				mlx5_task->rc = rc;
475 				goto err_out;
476 			}
477 			req->dst_sg_count = rc;
478 			mlx5dv_wr_set_mkey_layout_list(mqpx, req->dst_sg_count, req->dst_sg);
479 		}
480 		SPDK_DEBUGLOG(accel_mlx5, "req %p, task %p crypto_attr: bs %u, iv %"PRIu64", enc_on_tx %d\n",
481 			      req, req->task, task->block_size, iv, mlx5_task->encrypt_on_tx);
482 		rc = spdk_mlx5_crypto_set_attr(&cattr, task->crypto_key->priv, dev->dev_ctx->pd, task->block_size,
483 					       iv++, mlx5_task->encrypt_on_tx);
484 		if (spdk_unlikely(rc)) {
485 			SPDK_ERRLOG("failed to set crypto attr, rc %d\n", rc);
486 			mlx5_task->rc = rc;
487 			goto err_out;
488 		}
489 		mlx5dv_wr_set_mkey_crypto(mqpx, &cattr);
490 
491 		/* Prepare WRITE, use rkey from mkey, remote addr is always 0 - start of the mkey */
492 		qpx->wr_flags = IBV_SEND_SIGNALED;
493 		qpx->wr_id = (uint64_t)&req->write_wrid;
494 		ibv_wr_rdma_write(qpx, req->mkey->rkey, 0);
495 		/* local buffers, SG is already filled */
496 		ibv_wr_set_sge_list(qpx, req->src_sg_count, req->src_sg);
497 
498 		mlx5_task->num_submitted_reqs++;
499 		assert(mlx5_task->num_submitted_reqs <= mlx5_task->num_reqs);
500 		dev->reqs_submitted++;
501 		mlx5_task->cur_req = TAILQ_NEXT(mlx5_task->cur_req, link);
502 	}
503 
504 	SPDK_DEBUGLOG(accel_mlx5, "end, task, %p, reqs: total %u, submitted %u, completed %u\n", mlx5_task,
505 		      mlx5_task->num_reqs, mlx5_task->num_submitted_reqs, mlx5_task->num_completed_reqs);
506 
507 	TAILQ_INSERT_TAIL(&dev->before_submit, mlx5_task, link);
508 
509 	return 0;
510 
511 err_out:
512 	/* Abort all WRs submitted since last wr_start */
513 	ibv_wr_abort(qpx);
514 	accel_mlx5_task_complete(mlx5_task);
515 	TAILQ_FOREACH(mlx5_task, &dev->before_submit, link) {
516 		mlx5_task->rc = rc;
517 		accel_mlx5_task_complete(mlx5_task);
518 	}
519 	TAILQ_INIT(&dev->before_submit);
520 
521 	return rc;
522 
523 }
524 
525 static inline int
526 accel_mlx5_task_continue(struct accel_mlx5_task *task)
527 {
528 	struct accel_mlx5_req *req;
529 
530 	TAILQ_FOREACH(req, &task->reqs, link) {
531 		spdk_mempool_put(task->dev->dev_ctx->requests_pool, req);
532 	}
533 	TAILQ_INIT(&task->reqs);
534 
535 	if (spdk_unlikely(task->rc)) {
536 		accel_mlx5_task_complete(task);
537 		return 0;
538 	}
539 
540 	if (spdk_unlikely(!accel_mlx5_task_alloc_reqs(task))) {
541 		/* Pool is empty, queue this task */
542 		TAILQ_INSERT_TAIL(&task->dev->nomem, task, link);
543 		return -ENOMEM;
544 	}
545 	task->cur_req = TAILQ_FIRST(&task->reqs);
546 
547 	return accel_mlx5_task_process(task);
548 }
549 
550 static inline int
551 accel_mlx5_task_init(struct accel_mlx5_task *mlx5_task, struct accel_mlx5_dev *dev)
552 {
553 	struct spdk_accel_task *task = &mlx5_task->base;
554 	size_t src_nbytes = 0, dst_nbytes = 0;
555 	uint32_t i;
556 
557 	switch (task->op_code) {
558 	case SPDK_ACCEL_OPC_ENCRYPT:
559 		mlx5_task->encrypt_on_tx = true;
560 		break;
561 	case SPDK_ACCEL_OPC_DECRYPT:
562 		mlx5_task->encrypt_on_tx = false;
563 		break;
564 	default:
565 		SPDK_ERRLOG("Unsupported accel opcode %d\n", task->op_code);
566 		return -ENOTSUP;
567 	}
568 
569 	for (i = 0; i < task->s.iovcnt; i++) {
570 		src_nbytes += task->s.iovs[i].iov_len;
571 	}
572 
573 	for (i = 0; i < task->d.iovcnt; i++) {
574 		dst_nbytes += task->d.iovs[i].iov_len;
575 	}
576 
577 	if (spdk_unlikely(src_nbytes != dst_nbytes)) {
578 		return -EINVAL;
579 	}
580 	if (spdk_unlikely(src_nbytes % mlx5_task->base.block_size != 0)) {
581 		return -EINVAL;
582 	}
583 
584 	mlx5_task->dev = dev;
585 	mlx5_task->rc = 0;
586 	mlx5_task->num_completed_reqs = 0;
587 	mlx5_task->num_submitted_reqs = 0;
588 	mlx5_task->cur_req = NULL;
589 	mlx5_task->num_reqs = src_nbytes / mlx5_task->base.block_size;
590 	spdk_iov_sgl_init(&mlx5_task->src, task->s.iovs, task->s.iovcnt, 0);
591 	if (task->d.iovcnt == 0 || (task->d.iovcnt == task->s.iovcnt &&
592 				    accel_mlx5_compare_iovs(task->d.iovs, task->s.iovs, task->s.iovcnt))) {
593 		mlx5_task->inplace = true;
594 	} else {
595 		mlx5_task->inplace = false;
596 		spdk_iov_sgl_init(&mlx5_task->dst, task->d.iovs, task->d.iovcnt, 0);
597 	}
598 
599 	TAILQ_INIT(&mlx5_task->reqs);
600 	if (spdk_unlikely(!accel_mlx5_task_alloc_reqs(mlx5_task))) {
601 		/* Pool is empty, queue this task */
602 		SPDK_DEBUGLOG(accel_mlx5, "no reqs in pool, dev %s\n",
603 			      mlx5_task->dev->dev_ctx->context->device->name);
604 		return -ENOMEM;
605 	}
606 	mlx5_task->cur_req = TAILQ_FIRST(&mlx5_task->reqs);
607 
608 	SPDK_DEBUGLOG(accel_mlx5, "task %p, inplace %d, num_reqs %d\n", mlx5_task, mlx5_task->inplace,
609 		      mlx5_task->num_reqs);
610 
611 	return 0;
612 }
613 
614 static int
615 accel_mlx5_submit_tasks(struct spdk_io_channel *_ch, struct spdk_accel_task *task)
616 {
617 	struct accel_mlx5_io_channel *ch = spdk_io_channel_get_ctx(_ch);
618 	struct accel_mlx5_task *mlx5_task = SPDK_CONTAINEROF(task, struct accel_mlx5_task, base);
619 	struct accel_mlx5_dev *dev;
620 	int rc;
621 
622 	if (!g_accel_mlx5.enabled || !task->crypto_key ||
623 	    task->crypto_key->module_if != &g_accel_mlx5.module ||
624 	    !task->crypto_key->priv) {
625 		return -EINVAL;
626 	}
627 	dev = &ch->devs[ch->dev_idx];
628 	ch->dev_idx++;
629 	if (ch->dev_idx == ch->num_devs) {
630 		ch->dev_idx = 0;
631 	}
632 
633 	rc = accel_mlx5_task_init(mlx5_task, dev);
634 	if (spdk_unlikely(rc)) {
635 		if (rc == -ENOMEM) {
636 			SPDK_DEBUGLOG(accel_mlx5, "no reqs to handle new task %p (requred %u), put to queue\n", mlx5_task,
637 				      mlx5_task->num_reqs);
638 			TAILQ_INSERT_TAIL(&dev->nomem, mlx5_task, link);
639 			return 0;
640 		}
641 		return rc;
642 	}
643 
644 	return accel_mlx5_task_process(mlx5_task);
645 }
646 
647 static inline int64_t
648 accel_mlx5_poll_cq(struct accel_mlx5_dev *dev)
649 {
650 	struct ibv_wc wc[ACCEL_MLX5_MAX_WC];
651 	struct accel_mlx5_task *task;
652 	struct accel_mlx5_req *req;
653 	struct accel_mlx5_wrid *wr;
654 	int reaped, i, rc;
655 
656 	reaped = ibv_poll_cq(dev->cq, ACCEL_MLX5_MAX_WC, wc);
657 	if (spdk_unlikely(reaped < 0)) {
658 		SPDK_ERRLOG("Error polling CQ! (%d): %s\n", errno, spdk_strerror(errno));
659 		return reaped;
660 	} else if (reaped == 0) {
661 		return 0;
662 	}
663 
664 	SPDK_DEBUGLOG(accel_mlx5, "Reaped %d cpls on dev %s\n", reaped,
665 		      dev->dev_ctx->context->device->name);
666 
667 	for (i = 0; i < reaped; i++) {
668 		wr = (struct accel_mlx5_wrid *)wc[i].wr_id;
669 
670 		switch (wr->wrid) {
671 		case ACCEL_MLX5_WRID_MKEY:
672 			/* We only get this completion in error case */
673 			req = SPDK_CONTAINEROF(wr, struct accel_mlx5_req, mkey_wrid);
674 			if (!wc[i].status) {
675 				SPDK_ERRLOG("Got unexpected cpl for mkey configure, req %p, qp %p, state %d\n",
676 					    req, dev->qp->qp, accel_mlx5_get_qp_state(dev->qp->qp));
677 			} else {
678 				SPDK_ERRLOG("MKEY: qp %p, state %d, req %p, task %p WC status %d\n",
679 					    dev->qp->qp, accel_mlx5_get_qp_state(dev->qp->qp), req, req->task, wc[i].status);
680 			}
681 			break;
682 		case ACCEL_MLX5_WRID_WRITE:
683 			req = SPDK_CONTAINEROF(wr, struct accel_mlx5_req, write_wrid);
684 			task = req->task;
685 			if (wc[i].status) {
686 				assert(req->task);
687 				SPDK_ERRLOG("WRITE: qp %p, state %d, req %p, task %p WC status %d\n", dev->qp->qp,
688 					    accel_mlx5_get_qp_state(dev->qp->qp), req, req->task, wc[i].status);
689 				if (!task->rc) {
690 					task->rc = -EIO;
691 				}
692 			}
693 
694 			task->num_completed_reqs++;
695 			assert(dev->reqs_submitted);
696 			dev->reqs_submitted--;
697 			SPDK_DEBUGLOG(accel_mlx5, "req %p, task %p, remaining %u\n", req, task,
698 				      task->num_reqs - task->num_completed_reqs);
699 			if (task->num_completed_reqs == task->num_reqs) {
700 				TAILQ_REMOVE(&dev->in_hw, task, link);
701 				accel_mlx5_task_complete(task);
702 			} else if (task->num_completed_reqs == task->num_submitted_reqs) {
703 				assert(task->num_submitted_reqs < task->num_reqs);
704 				TAILQ_REMOVE(&dev->in_hw, task, link);
705 				rc = accel_mlx5_task_continue(task);
706 				if (spdk_unlikely(rc)) {
707 					if (rc != -ENOMEM) {
708 						task->rc = rc;
709 						accel_mlx5_task_complete(task);
710 					}
711 				}
712 			}
713 			break;
714 		}
715 	}
716 
717 	return reaped;
718 }
719 
720 static inline void
721 accel_mlx5_resubmit_nomem_tasks(struct accel_mlx5_dev *dev)
722 {
723 	struct accel_mlx5_task *task, *tmp;
724 	int rc;
725 
726 	TAILQ_FOREACH_SAFE(task, &dev->nomem, link, tmp) {
727 		TAILQ_REMOVE(&dev->nomem, task, link);
728 		rc = accel_mlx5_task_continue(task);
729 		if (rc) {
730 			if (rc == -ENOMEM) {
731 				break;
732 			} else {
733 				task->rc = rc;
734 				accel_mlx5_task_complete(task);
735 			}
736 		}
737 	}
738 }
739 
740 static int
741 accel_mlx5_poller(void *ctx)
742 {
743 	struct accel_mlx5_io_channel *ch = ctx;
744 	struct accel_mlx5_dev *dev;
745 
746 	int64_t completions = 0, rc;
747 	uint32_t i;
748 
749 	for (i = 0; i < ch->num_devs; i++) {
750 		dev = &ch->devs[i];
751 		if (dev->reqs_submitted) {
752 			rc = accel_mlx5_poll_cq(dev);
753 			if (spdk_unlikely(rc < 0)) {
754 				SPDK_ERRLOG("Error %"PRId64" on CQ, dev %s\n", rc, dev->dev_ctx->context->device->name);
755 			}
756 			completions += rc;
757 			accel_mlx5_flush_wrs(dev);
758 		}
759 		if (!TAILQ_EMPTY(&dev->nomem)) {
760 			accel_mlx5_resubmit_nomem_tasks(dev);
761 		}
762 	}
763 
764 	return !!completions;
765 }
766 
767 static bool
768 accel_mlx5_supports_opcode(enum spdk_accel_opcode opc)
769 {
770 	assert(g_accel_mlx5.enabled);
771 
772 	switch (opc) {
773 	case SPDK_ACCEL_OPC_ENCRYPT:
774 	case SPDK_ACCEL_OPC_DECRYPT:
775 		return true;
776 	default:
777 		return false;
778 	}
779 }
780 
781 static struct spdk_io_channel *
782 accel_mlx5_get_io_channel(void)
783 {
784 	assert(g_accel_mlx5.enabled);
785 	return spdk_get_io_channel(&g_accel_mlx5);
786 }
787 
788 static void
789 accel_mlx5_qp_destroy(struct accel_mlx5_qp *qp)
790 {
791 	if (!qp) {
792 		return;
793 	}
794 
795 	if (qp->qp) {
796 		ibv_destroy_qp(qp->qp);
797 		qp->qp = NULL;
798 	}
799 
800 	free(qp);
801 }
802 
803 static struct accel_mlx5_qp *
804 accel_mlx5_qp_create(struct ibv_cq *cq, struct accel_mlx5_io_channel *ch, struct ibv_pd *pd,
805 		     int qp_size)
806 {
807 	struct accel_mlx5_qp *qp;
808 	struct ibv_qp_init_attr_ex dv_qp_attr = {
809 		.qp_context = ch,
810 		.cap = {
811 			.max_send_wr = qp_size,
812 			.max_recv_wr = 0,
813 			.max_send_sge = ACCEL_MLX5_MAX_SGE,
814 			.max_inline_data = sizeof(struct ibv_sge) * ACCEL_MLX5_MAX_SGE,
815 		},
816 		.qp_type = IBV_QPT_RC,
817 		.comp_mask = IBV_QP_INIT_ATTR_PD | IBV_QP_INIT_ATTR_SEND_OPS_FLAGS,
818 		.pd = pd,
819 		.send_ops_flags = IBV_QP_EX_WITH_RDMA_WRITE |  IBV_QP_EX_WITH_SEND | IBV_QP_EX_WITH_RDMA_READ | IBV_QP_EX_WITH_BIND_MW,
820 		.send_cq = cq,
821 		.recv_cq = cq,
822 	};
823 	/* Attrs required for MKEYs registration */
824 	struct mlx5dv_qp_init_attr mlx5_qp_attr = {
825 		.comp_mask = MLX5DV_QP_INIT_ATTR_MASK_SEND_OPS_FLAGS,
826 		.send_ops_flags = MLX5DV_QP_EX_WITH_MKEY_CONFIGURE
827 	};
828 	int rc;
829 
830 	if (!dv_qp_attr.send_cq || !dv_qp_attr.recv_cq) {
831 		return  NULL;
832 	}
833 
834 	qp = calloc(1, sizeof(*qp));
835 	if (!qp) {
836 		return NULL;
837 	}
838 
839 	qp->qp = mlx5dv_create_qp(cq->context, &dv_qp_attr, &mlx5_qp_attr);
840 	if (!qp->qp) {
841 		SPDK_ERRLOG("Failed to create qpair, errno %s (%d)\n", spdk_strerror(errno), errno);
842 		free(qp);
843 		return NULL;
844 	}
845 
846 	rc = mlx5_qp_init_2_rts(qp->qp, qp->qp->qp_num);
847 	if (rc) {
848 		SPDK_ERRLOG("Failed to create loopback connection, qp_num %u\n", qp->qp->qp_num);
849 		accel_mlx5_qp_destroy(qp);
850 		return NULL;
851 	}
852 
853 	qp->qpex = ibv_qp_to_qp_ex(qp->qp);
854 	if (!qp->qpex) {
855 		SPDK_ERRLOG("Failed to get qpex\n");
856 		accel_mlx5_qp_destroy(qp);
857 		return NULL;
858 	}
859 
860 	qp->mqpx = mlx5dv_qp_ex_from_ibv_qp_ex(qp->qpex);
861 	if (!qp->mqpx) {
862 		SPDK_ERRLOG("Failed to get mqpx\n");
863 		accel_mlx5_qp_destroy(qp);
864 		return NULL;
865 	}
866 
867 	qp->num_reqs = qp_size;
868 	qp->cq = cq;
869 
870 	return qp;
871 }
872 
873 static void
874 accel_mlx5_destroy_cb(void *io_device, void *ctx_buf)
875 {
876 	struct accel_mlx5_io_channel *ch = ctx_buf;
877 	struct accel_mlx5_dev *dev;
878 	uint32_t i;
879 
880 	spdk_poller_unregister(&ch->poller);
881 	for (i = 0; i < ch->num_devs; i++) {
882 		dev = &ch->devs[i];
883 		accel_mlx5_qp_destroy(dev->qp);
884 		if (dev->cq) {
885 			ibv_destroy_cq(dev->cq);
886 			dev->cq = NULL;
887 		}
888 		spdk_rdma_utils_free_mem_map(&dev->mmap);
889 	}
890 	free(ch->devs);
891 }
892 
893 static int
894 accel_mlx5_create_cb(void *io_device, void *ctx_buf)
895 {
896 	struct accel_mlx5_io_channel *ch = ctx_buf;
897 	struct accel_mlx5_crypto_dev_ctx *dev_ctx;
898 	struct accel_mlx5_dev *dev;
899 	uint32_t i;
900 	int rc;
901 
902 	ch->devs = calloc(g_accel_mlx5.num_crypto_ctxs, sizeof(*ch->devs));
903 	if (!ch->devs) {
904 		SPDK_ERRLOG("Memory allocation failed\n");
905 		return -ENOMEM;
906 	}
907 
908 	for (i = 0; i < g_accel_mlx5.num_crypto_ctxs; i++) {
909 		dev_ctx = &g_accel_mlx5.crypto_ctxs[i];
910 		dev = &ch->devs[i];
911 		dev->dev_ctx = dev_ctx;
912 		ch->num_devs++;
913 		dev->cq = ibv_create_cq(dev_ctx->context, g_accel_mlx5.attr.qp_size, ch, NULL, 0);
914 		if (!dev->cq) {
915 			SPDK_ERRLOG("Failed to create CQ on dev %s\n", dev_ctx->context->device->name);
916 			rc = -ENOMEM;
917 			goto err_out;
918 		}
919 
920 		dev->qp = accel_mlx5_qp_create(dev->cq, ch, dev_ctx->pd, g_accel_mlx5.attr.qp_size);
921 		if (!dev->qp) {
922 			SPDK_ERRLOG("Failed to create QP on dev %s\n", dev_ctx->context->device->name);
923 			rc = -ENOMEM;
924 			goto err_out;
925 		}
926 
927 		TAILQ_INIT(&dev->nomem);
928 		TAILQ_INIT(&dev->in_hw);
929 		TAILQ_INIT(&dev->before_submit);
930 		/* Each request consumes 2 WQE - MKEY and RDMA_WRITE. MKEY is unsignaled, so we count only RDMA_WRITE completions.
931 		 * Divide user defined qp_size by two for simplicity */
932 		dev->max_reqs = g_accel_mlx5.attr.qp_size / 2;
933 		dev->mmap = spdk_rdma_utils_create_mem_map(dev_ctx->pd, NULL,
934 				IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_READ | IBV_ACCESS_REMOTE_WRITE);
935 		if (!dev->mmap) {
936 			SPDK_ERRLOG("Failed to create memory map\n");
937 			rc = -ENOMEM;
938 			goto err_out;
939 		}
940 	}
941 
942 	ch->poller = SPDK_POLLER_REGISTER(accel_mlx5_poller, ch, 0);
943 
944 	return 0;
945 
946 err_out:
947 	accel_mlx5_destroy_cb(&g_accel_mlx5, ctx_buf);
948 	return rc;
949 }
950 
951 void
952 accel_mlx5_get_default_attr(struct accel_mlx5_attr *attr)
953 {
954 	assert(attr);
955 
956 	attr->qp_size = ACCEL_MLX5_QP_SIZE;
957 	attr->num_requests = ACCEL_MLX5_NUM_REQUESTS;
958 }
959 
960 int
961 accel_mlx5_enable(struct accel_mlx5_attr *attr)
962 {
963 	if (g_accel_mlx5.enabled) {
964 		return -EEXIST;
965 	}
966 	if (attr) {
967 		g_accel_mlx5.attr = *attr;
968 	} else {
969 		accel_mlx5_get_default_attr(&g_accel_mlx5.attr);
970 	}
971 
972 	g_accel_mlx5.enabled = true;
973 	spdk_accel_module_list_add(&g_accel_mlx5.module);
974 
975 	return 0;
976 }
977 
978 static void
979 accel_mlx5_release_crypto_req(struct spdk_mempool *mp, void *cb_arg, void *_req, unsigned obj_idx)
980 {
981 	struct accel_mlx5_req *req = _req;
982 
983 	if (req->mkey) {
984 		mlx5dv_destroy_mkey(req->mkey);
985 	}
986 }
987 
988 static void
989 accel_mlx5_release_reqs(struct accel_mlx5_crypto_dev_ctx *dev_ctx)
990 {
991 	if (!dev_ctx->requests_pool) {
992 		return;
993 	}
994 
995 	spdk_mempool_obj_iter(dev_ctx->requests_pool, accel_mlx5_release_crypto_req, NULL);
996 }
997 
998 static void
999 accel_mlx5_free_resources(void)
1000 {
1001 	uint32_t i;
1002 
1003 	for (i = 0; i < g_accel_mlx5.num_crypto_ctxs; i++) {
1004 		accel_mlx5_release_reqs(&g_accel_mlx5.crypto_ctxs[i]);
1005 		spdk_rdma_utils_put_pd(g_accel_mlx5.crypto_ctxs[i].pd);
1006 		spdk_rdma_utils_put_memory_domain(g_accel_mlx5.crypto_ctxs[i].domain);
1007 	}
1008 
1009 	free(g_accel_mlx5.crypto_ctxs);
1010 	g_accel_mlx5.crypto_ctxs = NULL;
1011 }
1012 
1013 static void
1014 accel_mlx5_deinit_cb(void *ctx)
1015 {
1016 	accel_mlx5_free_resources();
1017 	spdk_accel_module_finish();
1018 }
1019 
1020 static void
1021 accel_mlx5_deinit(void *ctx)
1022 {
1023 	if (g_accel_mlx5.crypto_ctxs) {
1024 		spdk_io_device_unregister(&g_accel_mlx5, accel_mlx5_deinit_cb);
1025 	} else {
1026 		spdk_accel_module_finish();
1027 	}
1028 }
1029 
1030 static void
1031 accel_mlx5_configure_crypto_req(struct spdk_mempool *mp, void *cb_arg, void *_req, unsigned obj_idx)
1032 {
1033 	struct accel_mlx5_req *req = _req;
1034 	struct accel_mlx5_req_init_ctx *ctx = cb_arg;
1035 	struct mlx5dv_mkey_init_attr mkey_attr = {
1036 		.pd = ctx->pd,
1037 		.max_entries = ACCEL_MLX5_MAX_SGE, /* This MKEY refers to N base MKEYs/buffers */
1038 		.create_flags = MLX5DV_MKEY_INIT_ATTR_FLAGS_INDIRECT | /* This MKEY refers to another MKEYs */
1039 		MLX5DV_MKEY_INIT_ATTR_FLAGS_CRYPTO
1040 	};
1041 
1042 	memset(req, 0, sizeof(*req));
1043 	if (ctx->rc) {
1044 		return;
1045 	}
1046 
1047 	req->mkey = mlx5dv_create_mkey(&mkey_attr);
1048 	if (!req->mkey) {
1049 		SPDK_ERRLOG("Failed to create mkey on dev %s, errno %d\n", ctx->pd->context->device->name, errno);
1050 		ctx->rc = errno;
1051 		return;
1052 	}
1053 
1054 	req->mkey_wrid.wrid = ACCEL_MLX5_WRID_MKEY;
1055 	req->write_wrid.wrid = ACCEL_MLX5_WRID_WRITE;
1056 }
1057 
1058 static int
1059 accel_mlx5_crypto_ctx_mempool_create(struct accel_mlx5_crypto_dev_ctx *crypto_dev_ctx,
1060 				     size_t num_entries)
1061 {
1062 	struct accel_mlx5_req_init_ctx init_ctx = {.pd = crypto_dev_ctx->pd };
1063 	char pool_name[32];
1064 	int rc;
1065 
1066 	/* Compiler may produce a warning like
1067 	 * warning: ‘%s’ directive output may be truncated writing up to 63 bytes into a region of size 21
1068 	 * [-Wformat-truncation=]
1069 	 * That is expected and that is due to ibv device name is 64 bytes while DPDK mempool API allows
1070 	 * name to be max 32 bytes.
1071 	 * To suppress this warning check the value returned by snprintf */
1072 	rc = snprintf(pool_name, 32, "accel_mlx5_%s", crypto_dev_ctx->context->device->name);
1073 	if (rc < 0) {
1074 		assert(0);
1075 		return -EINVAL;
1076 	}
1077 	crypto_dev_ctx->requests_pool = spdk_mempool_create_ctor(pool_name, num_entries,
1078 					sizeof(struct accel_mlx5_req),
1079 					SPDK_MEMPOOL_DEFAULT_CACHE_SIZE, SPDK_ENV_SOCKET_ID_ANY,
1080 					accel_mlx5_configure_crypto_req, &init_ctx);
1081 	if (!crypto_dev_ctx->requests_pool || init_ctx.rc) {
1082 		SPDK_ERRLOG("Failed to create memory pool\n");
1083 		return init_ctx.rc ? : -ENOMEM;
1084 	}
1085 
1086 	return 0;
1087 }
1088 
1089 static int
1090 accel_mlx5_init(void)
1091 {
1092 	struct accel_mlx5_crypto_dev_ctx *crypto_dev_ctx;
1093 	struct ibv_context **rdma_devs, *dev;
1094 	struct ibv_pd *pd;
1095 	int num_devs = 0, rc = 0, i;
1096 
1097 	if (!g_accel_mlx5.enabled) {
1098 		return -EINVAL;
1099 	}
1100 
1101 	rdma_devs = spdk_mlx5_crypto_devs_get(&num_devs);
1102 	if (!rdma_devs || !num_devs) {
1103 		return -ENODEV;
1104 	}
1105 
1106 	g_accel_mlx5.crypto_ctxs = calloc(num_devs, sizeof(*g_accel_mlx5.crypto_ctxs));
1107 	if (!g_accel_mlx5.crypto_ctxs) {
1108 		SPDK_ERRLOG("Memory allocation failed\n");
1109 		rc = -ENOMEM;
1110 		goto cleanup;
1111 	}
1112 
1113 	for (i = 0; i < num_devs; i++) {
1114 		crypto_dev_ctx = &g_accel_mlx5.crypto_ctxs[i];
1115 		dev = rdma_devs[i];
1116 		pd = spdk_rdma_utils_get_pd(dev);
1117 		if (!pd) {
1118 			SPDK_ERRLOG("Failed to get PD for context %p, dev %s\n", dev, dev->device->name);
1119 			rc = -EINVAL;
1120 			goto cleanup;
1121 		}
1122 		crypto_dev_ctx->context = dev;
1123 		crypto_dev_ctx->pd = pd;
1124 		crypto_dev_ctx->domain = spdk_rdma_utils_get_memory_domain(crypto_dev_ctx->pd);
1125 		if (!crypto_dev_ctx->domain) {
1126 			SPDK_ERRLOG("Failed to get memory domain\n");
1127 			rc = -ENOMEM;
1128 			goto cleanup;
1129 		}
1130 
1131 		g_accel_mlx5.num_crypto_ctxs++;
1132 		rc = accel_mlx5_crypto_ctx_mempool_create(crypto_dev_ctx, g_accel_mlx5.attr.num_requests);
1133 		if (rc) {
1134 			goto cleanup;
1135 		}
1136 	}
1137 
1138 	SPDK_NOTICELOG("Accel framework mlx5 initialized, found %d devices.\n", num_devs);
1139 	spdk_io_device_register(&g_accel_mlx5, accel_mlx5_create_cb, accel_mlx5_destroy_cb,
1140 				sizeof(struct accel_mlx5_io_channel), "accel_mlx5");
1141 
1142 	spdk_mlx5_crypto_devs_release(rdma_devs);
1143 
1144 	return rc;
1145 
1146 cleanup:
1147 	spdk_mlx5_crypto_devs_release(rdma_devs);
1148 	accel_mlx5_free_resources();
1149 
1150 	return rc;
1151 }
1152 
1153 static void
1154 accel_mlx5_write_config_json(struct spdk_json_write_ctx *w)
1155 {
1156 	if (g_accel_mlx5.enabled) {
1157 		spdk_json_write_object_begin(w);
1158 		spdk_json_write_named_string(w, "method", "mlx5_scan_accel_module");
1159 		spdk_json_write_named_object_begin(w, "params");
1160 		spdk_json_write_named_uint16(w, "qp_size", g_accel_mlx5.attr.qp_size);
1161 		spdk_json_write_named_uint32(w, "num_requests", g_accel_mlx5.attr.num_requests);
1162 		spdk_json_write_object_end(w);
1163 		spdk_json_write_object_end(w);
1164 	}
1165 }
1166 
1167 static size_t
1168 accel_mlx5_get_ctx_size(void)
1169 {
1170 	return sizeof(struct accel_mlx5_task);
1171 }
1172 
1173 static int
1174 accel_mlx5_crypto_key_init(struct spdk_accel_crypto_key *key)
1175 {
1176 	struct spdk_mlx5_crypto_dek_create_attr attr = {};
1177 	struct spdk_mlx5_crypto_keytag *keytag;
1178 	int rc;
1179 
1180 	if (!key || !key->key || !key->key2 || !key->key_size || !key->key2_size) {
1181 		return -EINVAL;
1182 	}
1183 
1184 	attr.dek = calloc(1, key->key_size + key->key2_size);
1185 	if (!attr.dek) {
1186 		return -ENOMEM;
1187 	}
1188 
1189 	memcpy(attr.dek, key->key, key->key_size);
1190 	memcpy(attr.dek + key->key_size, key->key2, key->key2_size);
1191 	attr.dek_len = key->key_size + key->key2_size;
1192 
1193 	rc = spdk_mlx5_crypto_keytag_create(&attr, &keytag);
1194 	spdk_memset_s(attr.dek, attr.dek_len, 0, attr.dek_len);
1195 	free(attr.dek);
1196 	if (rc) {
1197 		SPDK_ERRLOG("Failed to create a keytag, rc %d\n", rc);
1198 		return rc;
1199 	}
1200 
1201 	key->priv = keytag;
1202 
1203 	return 0;
1204 }
1205 
1206 static void
1207 accel_mlx5_crypto_key_deinit(struct spdk_accel_crypto_key *key)
1208 {
1209 	if (!key || key->module_if != &g_accel_mlx5.module || !key->priv) {
1210 		return;
1211 	}
1212 
1213 	spdk_mlx5_crypto_keytag_destroy(key->priv);
1214 }
1215 
1216 static bool
1217 accel_mlx5_crypto_supports_cipher(enum spdk_accel_cipher cipher, size_t key_size)
1218 {
1219 	switch (cipher) {
1220 	case SPDK_ACCEL_CIPHER_AES_XTS:
1221 		return key_size == SPDK_ACCEL_AES_XTS_128_KEY_SIZE || key_size == SPDK_ACCEL_AES_XTS_256_KEY_SIZE;
1222 	default:
1223 		return false;
1224 	}
1225 }
1226 
1227 static int
1228 accel_mlx5_get_memory_domains(struct spdk_memory_domain **domains, int array_size)
1229 {
1230 	int i, size;
1231 
1232 	if (!domains || !array_size) {
1233 		return (int)g_accel_mlx5.num_crypto_ctxs;
1234 	}
1235 
1236 	size = spdk_min(array_size, (int)g_accel_mlx5.num_crypto_ctxs);
1237 
1238 	for (i = 0; i < size; i++) {
1239 		domains[i] = g_accel_mlx5.crypto_ctxs[i].domain;
1240 	}
1241 
1242 	return (int)g_accel_mlx5.num_crypto_ctxs;
1243 }
1244 
1245 static struct accel_mlx5_module g_accel_mlx5 = {
1246 	.module = {
1247 		.module_init		= accel_mlx5_init,
1248 		.module_fini		= accel_mlx5_deinit,
1249 		.write_config_json	= accel_mlx5_write_config_json,
1250 		.get_ctx_size		= accel_mlx5_get_ctx_size,
1251 		.name			= "mlx5",
1252 		.supports_opcode	= accel_mlx5_supports_opcode,
1253 		.get_io_channel		= accel_mlx5_get_io_channel,
1254 		.submit_tasks		= accel_mlx5_submit_tasks,
1255 		.crypto_key_init	= accel_mlx5_crypto_key_init,
1256 		.crypto_key_deinit	= accel_mlx5_crypto_key_deinit,
1257 		.crypto_supports_cipher	= accel_mlx5_crypto_supports_cipher,
1258 		.get_memory_domains	= accel_mlx5_get_memory_domains,
1259 	}
1260 };
1261 
1262 SPDK_LOG_REGISTER_COMPONENT(accel_mlx5)
1263