xref: /spdk/module/accel/mlx5/accel_mlx5.c (revision 60241941e6cfa4fc04cfcf6840c79f941ccf85d0)
1 /*   SPDX-License-Identifier: BSD-3-Clause
2  *   Copyright (c) 2022-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3  */
4 
5 #include "spdk/env.h"
6 #include "spdk/thread.h"
7 #include "spdk/queue.h"
8 #include "spdk/log.h"
9 #include "spdk/string.h"
10 #include "spdk/likely.h"
11 #include "spdk/dma.h"
12 #include "spdk/json.h"
13 #include "spdk/util.h"
14 
15 #include "spdk_internal/mlx5.h"
16 #include "spdk_internal/rdma_utils.h"
17 #include "spdk/accel_module.h"
18 #include "spdk_internal/assert.h"
19 #include "spdk_internal/sgl.h"
20 #include "accel_mlx5.h"
21 
22 #include <infiniband/mlx5dv.h>
23 #include <rdma/rdma_cma.h>
24 
25 #define ACCEL_MLX5_QP_SIZE (256u)
26 #define ACCEL_MLX5_NUM_REQUESTS (2048u - 1)
27 
28 #define ACCEL_MLX5_MAX_SGE (16u)
29 #define ACCEL_MLX5_MAX_WC (64u)
30 #define ACCEL_MLX5_MAX_MKEYS_IN_TASK (16u)
31 
32 /* Assume we have up to 16 devices */
33 #define ACCEL_MLX5_ALLOWED_DEVS_MAX_LEN ((SPDK_MLX5_DEV_MAX_NAME_LEN + 1) * 16)
34 
35 #define ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED(qp, task)	\
36 do {							\
37 	assert((qp)->wrs_submitted < (qp)->wrs_max);	\
38 	(qp)->wrs_submitted++;				\
39 	assert((task)->num_wrs < UINT16_MAX);		\
40 	(task)->num_wrs++;				\
41 } while (0)
42 
43 #define ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED_SIGNALED(dev, qp, task)	\
44 do {									\
45 	assert((dev)->wrs_in_cq < (dev)->wrs_in_cq_max);		\
46 	(dev)->wrs_in_cq++;						\
47         assert((qp)->wrs_submitted < (qp)->wrs_max);			\
48 	(qp)->wrs_submitted++;						\
49 	assert((task)->num_wrs < UINT16_MAX);				\
50 	(task)->num_wrs++;						\
51 } while (0)
52 
53 struct accel_mlx5_io_channel;
54 struct accel_mlx5_task;
55 
56 struct accel_mlx5_crypto_dev_ctx {
57 	struct ibv_context *context;
58 	struct ibv_pd *pd;
59 	struct spdk_memory_domain *domain;
60 	TAILQ_ENTRY(accel_mlx5_crypto_dev_ctx) link;
61 	bool crypto_mkeys;
62 	bool crypto_multi_block;
63 };
64 
65 struct accel_mlx5_module {
66 	struct spdk_accel_module_if module;
67 	struct accel_mlx5_crypto_dev_ctx *crypto_ctxs;
68 	uint32_t num_crypto_ctxs;
69 	struct accel_mlx5_attr attr;
70 	char **allowed_devs;
71 	size_t allowed_devs_count;
72 	bool initialized;
73 	bool enabled;
74 	bool crypto_supported;
75 };
76 
77 struct accel_mlx5_sge {
78 	uint32_t src_sge_count;
79 	uint32_t dst_sge_count;
80 	struct ibv_sge src_sge[ACCEL_MLX5_MAX_SGE];
81 	struct ibv_sge dst_sge[ACCEL_MLX5_MAX_SGE];
82 };
83 
84 struct accel_mlx5_iov_sgl {
85 	struct iovec	*iov;
86 	uint32_t	iovcnt;
87 	uint32_t	iov_offset;
88 };
89 
90 struct accel_mlx5_task {
91 	struct spdk_accel_task base;
92 	struct accel_mlx5_iov_sgl src;
93 	struct accel_mlx5_iov_sgl dst;
94 	struct accel_mlx5_qp *qp;
95 	STAILQ_ENTRY(accel_mlx5_task) link;
96 	uint16_t num_reqs;
97 	uint16_t num_completed_reqs;
98 	uint16_t num_submitted_reqs;
99 	uint16_t num_ops; /* number of allocated mkeys */
100 	uint16_t blocks_per_req;
101 	uint16_t num_processed_blocks;
102 	uint16_t num_blocks;
103 	uint16_t num_wrs; /* Number of outstanding operations which consume qp slot */
104 	union {
105 		uint8_t raw;
106 		struct {
107 			uint8_t inplace : 1;
108 			uint8_t enc_order : 2;
109 		};
110 	};
111 	/* Keep this array last since not all elements might be accessed, this reduces amount of data to be
112 	 * cached */
113 	struct spdk_mlx5_mkey_pool_obj *mkeys[ACCEL_MLX5_MAX_MKEYS_IN_TASK];
114 };
115 
116 struct accel_mlx5_qp {
117 	struct spdk_mlx5_qp *qp;
118 	struct ibv_qp *verbs_qp;
119 	struct accel_mlx5_dev *dev;
120 	struct accel_mlx5_io_channel *ch;
121 	/* tasks submitted to HW. We can't complete a task even in error case until we reap completions for all
122 	 * submitted requests */
123 	STAILQ_HEAD(, accel_mlx5_task) in_hw;
124 	uint16_t wrs_submitted;
125 	uint16_t wrs_max;
126 };
127 
128 struct accel_mlx5_dev {
129 	struct accel_mlx5_qp qp;
130 	struct spdk_mlx5_cq *cq;
131 	struct spdk_mlx5_mkey_pool *crypto_mkeys;
132 	struct spdk_rdma_utils_mem_map *mmap;
133 	struct accel_mlx5_crypto_dev_ctx *dev_ctx;
134 	uint16_t wrs_in_cq;
135 	uint16_t wrs_in_cq_max;
136 	uint16_t crypto_split_blocks;
137 	bool crypto_multi_block;
138 	/* Pending tasks waiting for requests resources */
139 	STAILQ_HEAD(, accel_mlx5_task) nomem;
140 	TAILQ_ENTRY(accel_mlx5_dev) link;
141 };
142 
143 struct accel_mlx5_io_channel {
144 	struct accel_mlx5_dev *devs;
145 	struct spdk_poller *poller;
146 	uint32_t num_devs;
147 	/* Index in \b devs to be used for crypto in round-robin way */
148 	uint32_t dev_idx;
149 };
150 
151 static struct accel_mlx5_module g_accel_mlx5;
152 
153 static inline void
154 accel_mlx5_iov_sgl_init(struct accel_mlx5_iov_sgl *s, struct iovec *iov, uint32_t iovcnt)
155 {
156 	s->iov = iov;
157 	s->iovcnt = iovcnt;
158 	s->iov_offset = 0;
159 }
160 
161 static inline void
162 accel_mlx5_iov_sgl_advance(struct accel_mlx5_iov_sgl *s, uint32_t step)
163 {
164 	s->iov_offset += step;
165 	while (s->iovcnt > 0) {
166 		assert(s->iov != NULL);
167 		if (s->iov_offset < s->iov->iov_len) {
168 			break;
169 		}
170 
171 		s->iov_offset -= s->iov->iov_len;
172 		s->iov++;
173 		s->iovcnt--;
174 	}
175 }
176 
177 static inline void
178 accel_mlx5_task_complete(struct accel_mlx5_task *task)
179 {
180 	struct accel_mlx5_dev *dev = task->qp->dev;
181 
182 	assert(task->num_reqs == task->num_completed_reqs);
183 	SPDK_DEBUGLOG(accel_mlx5, "Complete task %p, opc %d\n", task, task->base.op_code);
184 
185 	if (task->num_ops) {
186 		spdk_mlx5_mkey_pool_put_bulk(dev->crypto_mkeys, task->mkeys, task->num_ops);
187 	}
188 	spdk_accel_task_complete(&task->base, 0);
189 }
190 
191 static inline void
192 accel_mlx5_task_fail(struct accel_mlx5_task *task, int rc)
193 {
194 	struct accel_mlx5_dev *dev = task->qp->dev;
195 
196 	assert(task->num_reqs == task->num_completed_reqs);
197 	SPDK_DEBUGLOG(accel_mlx5, "Fail task %p, opc %d, rc %d\n", task, task->base.op_code, rc);
198 
199 	if (task->num_ops) {
200 		spdk_mlx5_mkey_pool_put_bulk(dev->crypto_mkeys, task->mkeys, task->num_ops);
201 	}
202 	spdk_accel_task_complete(&task->base, rc);
203 }
204 
205 static int
206 accel_mlx5_translate_addr(void *addr, size_t size, struct spdk_memory_domain *domain,
207 			  void *domain_ctx, struct accel_mlx5_dev *dev, struct ibv_sge *sge)
208 {
209 	struct spdk_rdma_utils_memory_translation map_translation;
210 	struct spdk_memory_domain_translation_result domain_translation;
211 	struct spdk_memory_domain_translation_ctx local_ctx;
212 	int rc;
213 
214 	if (domain) {
215 		domain_translation.size = sizeof(struct spdk_memory_domain_translation_result);
216 		local_ctx.size = sizeof(local_ctx);
217 		local_ctx.rdma.ibv_qp = dev->qp.verbs_qp;
218 		rc = spdk_memory_domain_translate_data(domain, domain_ctx, dev->dev_ctx->domain,
219 						       &local_ctx, addr, size, &domain_translation);
220 		if (spdk_unlikely(rc || domain_translation.iov_count != 1)) {
221 			SPDK_ERRLOG("Memory domain translation failed, addr %p, length %zu, iovcnt %u\n", addr, size,
222 				    domain_translation.iov_count);
223 			if (rc == 0) {
224 				rc = -EINVAL;
225 			}
226 
227 			return rc;
228 		}
229 		sge->lkey = domain_translation.rdma.lkey;
230 		sge->addr = (uint64_t) domain_translation.iov.iov_base;
231 		sge->length = domain_translation.iov.iov_len;
232 	} else {
233 		rc = spdk_rdma_utils_get_translation(dev->mmap, addr, size,
234 						     &map_translation);
235 		if (spdk_unlikely(rc)) {
236 			SPDK_ERRLOG("Memory translation failed, addr %p, length %zu\n", addr, size);
237 			return rc;
238 		}
239 		sge->lkey = spdk_rdma_utils_memory_translation_get_lkey(&map_translation);
240 		sge->addr = (uint64_t)addr;
241 		sge->length = size;
242 	}
243 
244 	return 0;
245 }
246 
247 static inline int
248 accel_mlx5_fill_block_sge(struct accel_mlx5_dev *dev, struct ibv_sge *sge,
249 			  struct accel_mlx5_iov_sgl *iovs, uint32_t len, struct spdk_memory_domain *domain, void *domain_ctx)
250 {
251 	void *addr;
252 	uint32_t remaining = len;
253 	uint32_t size;
254 	int i = 0;
255 	int rc;
256 
257 	while (remaining && i < (int)ACCEL_MLX5_MAX_SGE) {
258 		size = spdk_min(remaining, iovs->iov->iov_len - iovs->iov_offset);
259 		addr = (void *)iovs->iov->iov_base + iovs->iov_offset;
260 		rc = accel_mlx5_translate_addr(addr, size, domain, domain_ctx, dev, &sge[i]);
261 		if (spdk_unlikely(rc)) {
262 			return rc;
263 		}
264 		SPDK_DEBUGLOG(accel_mlx5, "\t sge[%d]: lkey %u, len %u, addr %"PRIx64"\n", i, sge[i].lkey,
265 			      sge[i].length, sge[i].addr);
266 		accel_mlx5_iov_sgl_advance(iovs, size);
267 		i++;
268 		assert(remaining >= size);
269 		remaining -= size;
270 	}
271 	assert(remaining == 0);
272 
273 	return i;
274 }
275 
276 static inline bool
277 accel_mlx5_compare_iovs(struct iovec *v1, struct iovec *v2, uint32_t iovcnt)
278 {
279 	return memcmp(v1, v2, sizeof(*v1) * iovcnt) == 0;
280 }
281 
282 static inline uint16_t
283 accel_mlx5_dev_get_available_slots(struct accel_mlx5_dev *dev, struct accel_mlx5_qp *qp)
284 {
285 	assert(qp->wrs_max >= qp->wrs_submitted);
286 	assert(dev->wrs_in_cq_max >= dev->wrs_in_cq);
287 
288 	/* Each time we produce only 1 CQE, so we need 1 CQ slot */
289 	if (spdk_unlikely(dev->wrs_in_cq == dev->wrs_in_cq_max)) {
290 		return 0;
291 	}
292 
293 	return qp->wrs_max - qp->wrs_submitted;
294 }
295 
296 static inline uint32_t
297 accel_mlx5_task_alloc_mkeys(struct accel_mlx5_task *task)
298 {
299 	struct accel_mlx5_qp *qp = task->qp;
300 	struct accel_mlx5_dev *dev = qp->dev;
301 	uint32_t qp_slot = accel_mlx5_dev_get_available_slots(dev, qp);
302 	uint32_t num_ops;
303 	int rc;
304 
305 	assert(task->num_reqs > task->num_completed_reqs);
306 	num_ops = task->num_reqs - task->num_completed_reqs;
307 	num_ops = spdk_min(num_ops, qp_slot);
308 	num_ops = spdk_min(num_ops, ACCEL_MLX5_MAX_MKEYS_IN_TASK);
309 	if (!num_ops) {
310 		return 0;
311 	}
312 	rc = spdk_mlx5_mkey_pool_get_bulk(dev->crypto_mkeys, task->mkeys, num_ops);
313 	if (spdk_unlikely(rc)) {
314 		return 0;
315 	}
316 	assert(num_ops <= UINT16_MAX);
317 	task->num_ops = num_ops;
318 
319 	return num_ops;
320 }
321 
322 static inline uint8_t
323 bs_to_bs_selector(uint32_t bs)
324 {
325 	switch (bs) {
326 	case 512:
327 		return SPDK_MLX5_BLOCK_SIZE_SELECTOR_512;
328 	case 520:
329 		return SPDK_MLX5_BLOCK_SIZE_SELECTOR_520;
330 	case 4096:
331 		return SPDK_MLX5_BLOCK_SIZE_SELECTOR_4096;
332 	case 4160:
333 		return SPDK_MLX5_BLOCK_SIZE_SELECTOR_4160;
334 	default:
335 		return SPDK_MLX5_BLOCK_SIZE_SELECTOR_RESERVED;
336 	}
337 }
338 
339 static inline int
340 accel_mlx5_configure_crypto_umr(struct accel_mlx5_task *mlx5_task, struct accel_mlx5_sge *sge,
341 				uint32_t mkey, uint32_t num_blocks, struct spdk_mlx5_crypto_dek_data *dek_data)
342 {
343 	struct spdk_mlx5_umr_crypto_attr cattr;
344 	struct spdk_mlx5_umr_attr umr_attr;
345 	struct accel_mlx5_qp *qp = mlx5_task->qp;
346 	struct accel_mlx5_dev *dev = qp->dev;
347 	struct spdk_accel_task *task = &mlx5_task->base;
348 	uint32_t length;
349 	int rc;
350 
351 	length = num_blocks * task->block_size;
352 	SPDK_DEBUGLOG(accel_mlx5, "task %p, domain %p, len %u, blocks %u\n", task, task->src_domain, length,
353 		      num_blocks);
354 	rc = accel_mlx5_fill_block_sge(dev, sge->src_sge, &mlx5_task->src,  length, task->src_domain,
355 				       task->src_domain_ctx);
356 	if (spdk_unlikely(rc <= 0)) {
357 		if (rc == 0) {
358 			rc = -EINVAL;
359 		}
360 		SPDK_ERRLOG("failed set src sge, rc %d\n", rc);
361 		return rc;
362 	}
363 	sge->src_sge_count = rc;
364 
365 	cattr.xts_iv = task->iv + mlx5_task->num_processed_blocks;
366 	cattr.keytag = 0;
367 	cattr.dek_obj_id = dek_data->dek_obj_id;
368 	cattr.tweak_mode = dek_data->tweak_mode;
369 	cattr.enc_order = mlx5_task->enc_order;
370 	cattr.bs_selector = bs_to_bs_selector(mlx5_task->base.block_size);
371 	if (spdk_unlikely(cattr.bs_selector == SPDK_MLX5_BLOCK_SIZE_SELECTOR_RESERVED)) {
372 		SPDK_ERRLOG("unsupported block size %u\n", mlx5_task->base.block_size);
373 		return -EINVAL;
374 	}
375 	umr_attr.mkey = mkey;
376 	umr_attr.sge = sge->src_sge;
377 
378 	if (!mlx5_task->inplace) {
379 		SPDK_DEBUGLOG(accel_mlx5, "task %p, dst sge, domain %p, len %u\n", task, task->dst_domain, length);
380 		rc = accel_mlx5_fill_block_sge(dev, sge->dst_sge, &mlx5_task->dst, length, task->dst_domain,
381 					       task->dst_domain_ctx);
382 		if (spdk_unlikely(rc <= 0)) {
383 			if (rc == 0) {
384 				rc = -EINVAL;
385 			}
386 			SPDK_ERRLOG("failed set dst sge, rc %d\n", rc);
387 			return rc;
388 		}
389 		sge->dst_sge_count = rc;
390 	}
391 
392 	SPDK_DEBUGLOG(accel_mlx5,
393 		      "task %p: bs %u, iv %"PRIu64", enc_on_tx %d, tweak_mode %d, len %u, mkey %x, blocks %u\n",
394 		      mlx5_task, task->block_size, cattr.xts_iv, mlx5_task->enc_order, cattr.tweak_mode, length, mkey,
395 		      num_blocks);
396 
397 	umr_attr.sge_count = sge->src_sge_count;
398 	umr_attr.umr_len = length;
399 	assert((uint32_t)mlx5_task->num_processed_blocks + num_blocks <= UINT16_MAX);
400 	mlx5_task->num_processed_blocks += num_blocks;
401 
402 	rc = spdk_mlx5_umr_configure_crypto(qp->qp, &umr_attr, &cattr, 0, 0);
403 
404 	return rc;
405 }
406 
407 static inline int
408 accel_mlx5_task_process(struct accel_mlx5_task *mlx5_task)
409 {
410 	struct accel_mlx5_sge sges[ACCEL_MLX5_MAX_MKEYS_IN_TASK];
411 	struct spdk_mlx5_crypto_dek_data dek_data;
412 	struct accel_mlx5_qp *qp = mlx5_task->qp;
413 	struct accel_mlx5_dev *dev = qp->dev;
414 	/* First RDMA after UMR must have a SMALL_FENCE */
415 	uint32_t first_rdma_fence = SPDK_MLX5_WQE_CTRL_INITIATOR_SMALL_FENCE;
416 	uint32_t num_blocks;
417 	uint32_t num_ops = spdk_min(mlx5_task->num_reqs - mlx5_task->num_completed_reqs,
418 				    mlx5_task->num_ops);
419 	uint32_t i;
420 	int rc;
421 
422 	if (spdk_unlikely(!num_ops)) {
423 		return -EINVAL;
424 	}
425 
426 	rc = spdk_mlx5_crypto_get_dek_data(mlx5_task->base.crypto_key->priv, dev->dev_ctx->pd, &dek_data);
427 	if (spdk_unlikely(rc)) {
428 		return rc;
429 	}
430 
431 	mlx5_task->num_wrs = 0;
432 	SPDK_DEBUGLOG(accel_mlx5, "begin, task, %p, reqs: total %u, submitted %u, completed %u\n",
433 		      mlx5_task, mlx5_task->num_reqs, mlx5_task->num_submitted_reqs, mlx5_task->num_completed_reqs);
434 	for (i = 0; i < num_ops; i++) {
435 		if (mlx5_task->num_submitted_reqs + i + 1 == mlx5_task->num_reqs) {
436 			/* Last request may consume less than calculated if crypto_multi_block is true */
437 			assert(mlx5_task->num_blocks > mlx5_task->num_submitted_reqs);
438 			num_blocks = mlx5_task->num_blocks - mlx5_task->num_processed_blocks;
439 		} else {
440 			num_blocks = mlx5_task->blocks_per_req;
441 		}
442 
443 		rc = accel_mlx5_configure_crypto_umr(mlx5_task, &sges[i], mlx5_task->mkeys[i]->mkey, num_blocks,
444 						     &dek_data);
445 		if (spdk_unlikely(rc)) {
446 			SPDK_ERRLOG("UMR configure failed with %d\n", rc);
447 			return rc;
448 		}
449 		ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED(qp, mlx5_task);
450 	}
451 
452 	/* Loop `num_ops - 1` for easy flags handling */
453 	for (i = 0; i < num_ops - 1; i++) {
454 		/* UMR is used as a destination for RDMA_READ - from UMR to sge */
455 		if (mlx5_task->inplace) {
456 			rc = spdk_mlx5_qp_rdma_read(qp->qp, sges[i].src_sge, sges[i].src_sge_count, 0,
457 						    mlx5_task->mkeys[i]->mkey, 0, first_rdma_fence);
458 		} else {
459 			rc = spdk_mlx5_qp_rdma_read(qp->qp, sges[i].dst_sge, sges[i].dst_sge_count, 0,
460 						    mlx5_task->mkeys[i]->mkey, 0, first_rdma_fence);
461 		}
462 		if (spdk_unlikely(rc)) {
463 			SPDK_ERRLOG("RDMA READ/WRITE failed with %d\n", rc);
464 			return rc;
465 		}
466 
467 		first_rdma_fence = 0;
468 		assert(mlx5_task->num_submitted_reqs < mlx5_task->num_reqs);
469 		assert(mlx5_task->num_submitted_reqs < UINT16_MAX);
470 		mlx5_task->num_submitted_reqs++;
471 		ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED(qp, mlx5_task);
472 	}
473 
474 	if (mlx5_task->inplace) {
475 		rc = spdk_mlx5_qp_rdma_read(qp->qp, sges[i].src_sge, sges[i].src_sge_count, 0,
476 					    mlx5_task->mkeys[i]->mkey, (uint64_t)mlx5_task, first_rdma_fence | SPDK_MLX5_WQE_CTRL_CE_CQ_UPDATE);
477 	} else {
478 		rc = spdk_mlx5_qp_rdma_read(qp->qp, sges[i].dst_sge, sges[i].dst_sge_count, 0,
479 					    mlx5_task->mkeys[i]->mkey, (uint64_t)mlx5_task, first_rdma_fence | SPDK_MLX5_WQE_CTRL_CE_CQ_UPDATE);
480 	}
481 	if (spdk_unlikely(rc)) {
482 		SPDK_ERRLOG("RDMA READ/WRITE failed with %d\n", rc);
483 		return rc;
484 	}
485 
486 	assert(mlx5_task->num_submitted_reqs < mlx5_task->num_reqs);
487 	assert(mlx5_task->num_submitted_reqs < UINT16_MAX);
488 	mlx5_task->num_submitted_reqs++;
489 	ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED_SIGNALED(dev, qp, mlx5_task);
490 	STAILQ_INSERT_TAIL(&qp->in_hw, mlx5_task, link);
491 
492 	SPDK_DEBUGLOG(accel_mlx5, "end, task, %p, reqs: total %u, submitted %u, completed %u\n", mlx5_task,
493 		      mlx5_task->num_reqs, mlx5_task->num_submitted_reqs, mlx5_task->num_completed_reqs);
494 
495 	return 0;
496 }
497 
498 static inline int
499 accel_mlx5_task_continue(struct accel_mlx5_task *task)
500 {
501 	struct accel_mlx5_qp *qp = task->qp;
502 	struct accel_mlx5_dev *dev = qp->dev;
503 	uint32_t qp_slot = accel_mlx5_dev_get_available_slots(dev, qp);
504 	uint32_t num_ops;
505 
506 	assert(task->num_reqs > task->num_completed_reqs);
507 	num_ops = task->num_reqs - task->num_completed_reqs;
508 	if (task->num_ops == 0) {
509 		/* No mkeys allocated, try to allocate now */
510 		if (spdk_unlikely(!accel_mlx5_task_alloc_mkeys(task))) {
511 			/* Pool is empty, queue this task */
512 			STAILQ_INSERT_TAIL(&dev->nomem, task, link);
513 			return -ENOMEM;
514 		}
515 	}
516 
517 	num_ops = spdk_min(num_ops, task->num_ops);
518 	if (spdk_unlikely(num_ops > qp_slot)) {
519 		STAILQ_INSERT_TAIL(&dev->nomem, task, link);
520 		return -ENOMEM;
521 	}
522 
523 	return accel_mlx5_task_process(task);
524 }
525 
526 static inline int
527 accel_mlx5_task_init(struct accel_mlx5_task *mlx5_task, struct accel_mlx5_dev *dev)
528 {
529 	struct spdk_accel_task *task = &mlx5_task->base;
530 	uint64_t src_nbytes = task->nbytes;
531 #ifdef DEBUG
532 	uint64_t dst_nbytes;
533 	uint32_t i;
534 #endif
535 	switch (task->op_code) {
536 	case SPDK_ACCEL_OPC_ENCRYPT:
537 		mlx5_task->enc_order = SPDK_MLX5_ENCRYPTION_ORDER_ENCRYPTED_RAW_WIRE;
538 		break;
539 	case SPDK_ACCEL_OPC_DECRYPT:
540 		mlx5_task->enc_order = SPDK_MLX5_ENCRYPTION_ORDER_ENCRYPTED_RAW_MEMORY;
541 		break;
542 	default:
543 		SPDK_ERRLOG("Unsupported accel opcode %d\n", task->op_code);
544 		return -ENOTSUP;
545 	}
546 
547 	if (spdk_unlikely(src_nbytes % mlx5_task->base.block_size != 0)) {
548 		return -EINVAL;
549 	}
550 
551 	mlx5_task->qp = &dev->qp;
552 	mlx5_task->num_completed_reqs = 0;
553 	mlx5_task->num_submitted_reqs = 0;
554 	mlx5_task->num_ops = 0;
555 	mlx5_task->num_processed_blocks = 0;
556 	assert(src_nbytes / mlx5_task->base.block_size <= UINT16_MAX);
557 	mlx5_task->num_blocks = src_nbytes / mlx5_task->base.block_size;
558 	accel_mlx5_iov_sgl_init(&mlx5_task->src, task->s.iovs, task->s.iovcnt);
559 	if (task->d.iovcnt == 0 || (task->d.iovcnt == task->s.iovcnt &&
560 				    accel_mlx5_compare_iovs(task->d.iovs, task->s.iovs, task->s.iovcnt))) {
561 		mlx5_task->inplace = 1;
562 	} else {
563 #ifdef DEBUG
564 		dst_nbytes = 0;
565 		for (i = 0; i < task->d.iovcnt; i++) {
566 			dst_nbytes += task->d.iovs[i].iov_len;
567 		}
568 
569 		if (spdk_unlikely(src_nbytes != dst_nbytes)) {
570 			return -EINVAL;
571 		}
572 #endif
573 		mlx5_task->inplace = 0;
574 		accel_mlx5_iov_sgl_init(&mlx5_task->dst, task->d.iovs, task->d.iovcnt);
575 	}
576 
577 	if (dev->crypto_multi_block) {
578 		if (dev->crypto_split_blocks) {
579 			assert(SPDK_CEIL_DIV(mlx5_task->num_blocks, dev->crypto_split_blocks) <= UINT16_MAX);
580 			mlx5_task->num_reqs = SPDK_CEIL_DIV(mlx5_task->num_blocks, dev->crypto_split_blocks);
581 			/* Last req may consume less blocks */
582 			mlx5_task->blocks_per_req = spdk_min(mlx5_task->num_blocks, dev->crypto_split_blocks);
583 		} else {
584 			if (task->s.iovcnt > ACCEL_MLX5_MAX_SGE || task->d.iovcnt > ACCEL_MLX5_MAX_SGE) {
585 				uint32_t max_sge_count = spdk_max(task->s.iovcnt, task->d.iovcnt);
586 
587 				assert(SPDK_CEIL_DIV(max_sge_count, ACCEL_MLX5_MAX_SGE) <= UINT16_MAX);
588 				mlx5_task->num_reqs = SPDK_CEIL_DIV(max_sge_count, ACCEL_MLX5_MAX_SGE);
589 				mlx5_task->blocks_per_req = SPDK_CEIL_DIV(mlx5_task->num_blocks, mlx5_task->num_reqs);
590 			} else {
591 				mlx5_task->num_reqs = 1;
592 				mlx5_task->blocks_per_req = mlx5_task->num_blocks;
593 			}
594 		}
595 	} else {
596 		mlx5_task->num_reqs = mlx5_task->num_blocks;
597 		mlx5_task->blocks_per_req = 1;
598 	}
599 
600 	if (spdk_unlikely(!accel_mlx5_task_alloc_mkeys(mlx5_task))) {
601 		/* Pool is empty, queue this task */
602 		SPDK_DEBUGLOG(accel_mlx5, "no reqs in pool, dev %s\n", dev->dev_ctx->context->device->name);
603 		return -ENOMEM;
604 	}
605 
606 	SPDK_DEBUGLOG(accel_mlx5, "task %p, src_iovs %u, dst_iovs %u, num_reqs %u, "
607 		      "blocks/req %u, blocks %u, inplace %d\n", task, task->s.iovcnt, task->d.iovcnt,
608 		      mlx5_task->num_reqs, mlx5_task->blocks_per_req, mlx5_task->num_blocks, mlx5_task->inplace);
609 
610 	return 0;
611 }
612 
613 static int
614 accel_mlx5_submit_tasks(struct spdk_io_channel *_ch, struct spdk_accel_task *task)
615 {
616 	struct accel_mlx5_io_channel *ch = spdk_io_channel_get_ctx(_ch);
617 	struct accel_mlx5_task *mlx5_task = SPDK_CONTAINEROF(task, struct accel_mlx5_task, base);
618 	struct accel_mlx5_dev *dev;
619 	int rc;
620 
621 	if (!g_accel_mlx5.enabled || !task->crypto_key ||
622 	    task->crypto_key->module_if != &g_accel_mlx5.module ||
623 	    !task->crypto_key->priv) {
624 		return -EINVAL;
625 	}
626 	dev = &ch->devs[ch->dev_idx];
627 	ch->dev_idx++;
628 	if (ch->dev_idx == ch->num_devs) {
629 		ch->dev_idx = 0;
630 	}
631 
632 	rc = accel_mlx5_task_init(mlx5_task, dev);
633 	if (spdk_unlikely(rc)) {
634 		if (rc == -ENOMEM) {
635 			SPDK_DEBUGLOG(accel_mlx5, "no reqs to handle new task %p (required %u), put to queue\n", mlx5_task,
636 				      mlx5_task->num_reqs);
637 			STAILQ_INSERT_TAIL(&dev->nomem, mlx5_task, link);
638 			return 0;
639 		}
640 		return rc;
641 	}
642 
643 	return accel_mlx5_task_process(mlx5_task);
644 }
645 
646 static inline int64_t
647 accel_mlx5_poll_cq(struct accel_mlx5_dev *dev)
648 {
649 	struct spdk_mlx5_cq_completion wc[ACCEL_MLX5_MAX_WC];
650 	struct accel_mlx5_task *task;
651 	struct accel_mlx5_qp *qp;
652 	int reaped, i, rc;
653 	uint16_t completed;
654 
655 	reaped = spdk_mlx5_cq_poll_completions(dev->cq, wc, ACCEL_MLX5_MAX_WC);
656 	if (spdk_unlikely(reaped < 0)) {
657 		SPDK_ERRLOG("Error polling CQ! (%d): %s\n", errno, spdk_strerror(errno));
658 		return reaped;
659 	} else if (reaped == 0) {
660 		return 0;
661 	}
662 
663 	SPDK_DEBUGLOG(accel_mlx5, "Reaped %d cpls on dev %s\n", reaped,
664 		      dev->dev_ctx->context->device->name);
665 
666 	for (i = 0; i < reaped; i++) {
667 		if (spdk_unlikely(!wc[i].wr_id)) {
668 			/* Unsignaled completion with error, ignore */
669 			continue;
670 		}
671 		task = (struct accel_mlx5_task *)wc[i].wr_id;
672 		qp = task->qp;
673 		assert(task == STAILQ_FIRST(&qp->in_hw) && "submission mismatch");
674 		assert(task->num_submitted_reqs > task->num_completed_reqs);
675 		completed = task->num_submitted_reqs - task->num_completed_reqs;
676 		assert((uint32_t)task->num_completed_reqs + completed <= UINT16_MAX);
677 		task->num_completed_reqs += completed;
678 		assert(qp->wrs_submitted >= task->num_wrs);
679 		qp->wrs_submitted -= task->num_wrs;
680 		assert(dev->wrs_in_cq > 0);
681 		dev->wrs_in_cq--;
682 
683 		if (wc[i].status) {
684 			SPDK_ERRLOG("qp %p, task %p WC status %d\n", qp, task, wc[i].status);
685 			if (task->num_completed_reqs == task->num_reqs) {
686 				STAILQ_REMOVE_HEAD(&qp->in_hw, link);
687 				accel_mlx5_task_fail(task, -EIO);
688 			}
689 			continue;
690 		}
691 
692 		SPDK_DEBUGLOG(accel_mlx5, "task %p, remaining %u\n", task,
693 			      task->num_reqs - task->num_completed_reqs);
694 		if (task->num_completed_reqs == task->num_reqs) {
695 			STAILQ_REMOVE_HEAD(&qp->in_hw, link);
696 			accel_mlx5_task_complete(task);
697 		} else {
698 			assert(task->num_submitted_reqs < task->num_reqs);
699 			assert(task->num_completed_reqs == task->num_submitted_reqs);
700 			STAILQ_REMOVE_HEAD(&qp->in_hw, link);
701 			rc = accel_mlx5_task_continue(task);
702 			if (spdk_unlikely(rc)) {
703 				if (rc != -ENOMEM) {
704 					accel_mlx5_task_fail(task, rc);
705 				}
706 			}
707 		}
708 	}
709 
710 	return reaped;
711 }
712 
713 static inline void
714 accel_mlx5_resubmit_nomem_tasks(struct accel_mlx5_dev *dev)
715 {
716 	struct accel_mlx5_task *task, *tmp;
717 	int rc;
718 
719 	STAILQ_FOREACH_SAFE(task, &dev->nomem, link, tmp) {
720 		STAILQ_REMOVE_HEAD(&dev->nomem, link);
721 		rc = accel_mlx5_task_continue(task);
722 		if (rc) {
723 			if (rc == -ENOMEM) {
724 				break;
725 			} else {
726 				accel_mlx5_task_fail(task, rc);
727 			}
728 		}
729 	}
730 }
731 
732 static int
733 accel_mlx5_poller(void *ctx)
734 {
735 	struct accel_mlx5_io_channel *ch = ctx;
736 	struct accel_mlx5_dev *dev;
737 
738 	int64_t completions = 0, rc;
739 	uint32_t i;
740 
741 	for (i = 0; i < ch->num_devs; i++) {
742 		dev = &ch->devs[i];
743 		if (dev->wrs_in_cq) {
744 			rc = accel_mlx5_poll_cq(dev);
745 			if (spdk_unlikely(rc < 0)) {
746 				SPDK_ERRLOG("Error %"PRId64" on CQ, dev %s\n", rc, dev->dev_ctx->context->device->name);
747 			}
748 			completions += rc;
749 			if (dev->qp.wrs_submitted) {
750 				spdk_mlx5_qp_complete_send(dev->qp.qp);
751 			}
752 		}
753 		if (!STAILQ_EMPTY(&dev->nomem)) {
754 			accel_mlx5_resubmit_nomem_tasks(dev);
755 		}
756 	}
757 
758 	return !!completions;
759 }
760 
761 static bool
762 accel_mlx5_supports_opcode(enum spdk_accel_opcode opc)
763 {
764 	assert(g_accel_mlx5.enabled);
765 
766 	switch (opc) {
767 	case SPDK_ACCEL_OPC_ENCRYPT:
768 	case SPDK_ACCEL_OPC_DECRYPT:
769 		return g_accel_mlx5.crypto_supported;
770 	default:
771 		return false;
772 	}
773 }
774 
775 static struct spdk_io_channel *
776 accel_mlx5_get_io_channel(void)
777 {
778 	assert(g_accel_mlx5.enabled);
779 	return spdk_get_io_channel(&g_accel_mlx5);
780 }
781 
782 static int
783 accel_mlx5_create_qp(struct accel_mlx5_dev *dev, struct accel_mlx5_qp *qp)
784 {
785 	struct spdk_mlx5_qp_attr mlx5_qp_attr = {};
786 	int rc;
787 
788 	mlx5_qp_attr.cap.max_send_wr = g_accel_mlx5.attr.qp_size;
789 	mlx5_qp_attr.cap.max_recv_wr = 0;
790 	mlx5_qp_attr.cap.max_send_sge = ACCEL_MLX5_MAX_SGE;
791 	mlx5_qp_attr.cap.max_inline_data = sizeof(struct ibv_sge) * ACCEL_MLX5_MAX_SGE;
792 
793 	rc = spdk_mlx5_qp_create(dev->dev_ctx->pd, dev->cq, &mlx5_qp_attr, &qp->qp);
794 	if (rc) {
795 		return rc;
796 	}
797 
798 	STAILQ_INIT(&qp->in_hw);
799 	qp->dev = dev;
800 	qp->verbs_qp = spdk_mlx5_qp_get_verbs_qp(qp->qp);
801 	assert(qp->verbs_qp);
802 	qp->wrs_max = g_accel_mlx5.attr.qp_size;
803 
804 	return 0;
805 }
806 
807 static void
808 accel_mlx5_destroy_cb(void *io_device, void *ctx_buf)
809 {
810 	struct accel_mlx5_io_channel *ch = ctx_buf;
811 	struct accel_mlx5_dev *dev;
812 	uint32_t i;
813 
814 	spdk_poller_unregister(&ch->poller);
815 	for (i = 0; i < ch->num_devs; i++) {
816 		dev = &ch->devs[i];
817 		spdk_mlx5_qp_destroy(dev->qp.qp);
818 		if (dev->cq) {
819 			spdk_mlx5_cq_destroy(dev->cq);
820 		}
821 		if (dev->crypto_mkeys) {
822 			spdk_mlx5_mkey_pool_put_ref(dev->crypto_mkeys);
823 		}
824 		spdk_rdma_utils_free_mem_map(&dev->mmap);
825 	}
826 	free(ch->devs);
827 }
828 
829 static int
830 accel_mlx5_create_cb(void *io_device, void *ctx_buf)
831 {
832 	struct spdk_mlx5_cq_attr cq_attr = {};
833 	struct accel_mlx5_io_channel *ch = ctx_buf;
834 	struct accel_mlx5_crypto_dev_ctx *dev_ctx;
835 	struct accel_mlx5_dev *dev;
836 	uint32_t i;
837 	int rc;
838 
839 	ch->devs = calloc(g_accel_mlx5.num_crypto_ctxs, sizeof(*ch->devs));
840 	if (!ch->devs) {
841 		SPDK_ERRLOG("Memory allocation failed\n");
842 		return -ENOMEM;
843 	}
844 
845 	for (i = 0; i < g_accel_mlx5.num_crypto_ctxs; i++) {
846 		dev_ctx = &g_accel_mlx5.crypto_ctxs[i];
847 		dev = &ch->devs[i];
848 		dev->dev_ctx = dev_ctx;
849 
850 		if (dev_ctx->crypto_mkeys) {
851 			dev->crypto_mkeys = spdk_mlx5_mkey_pool_get_ref(dev_ctx->pd, SPDK_MLX5_MKEY_POOL_FLAG_CRYPTO);
852 			if (!dev->crypto_mkeys) {
853 				SPDK_ERRLOG("Failed to get crypto mkey pool channel, dev %s\n", dev_ctx->context->device->name);
854 				/* Should not happen since mkey pool is created on accel_mlx5 initialization.
855 				 * We should not be here if pool creation failed */
856 				assert(0);
857 				goto err_out;
858 			}
859 		}
860 
861 		memset(&cq_attr, 0, sizeof(cq_attr));
862 		cq_attr.cqe_cnt = g_accel_mlx5.attr.qp_size;
863 		cq_attr.cqe_size = 64;
864 		cq_attr.cq_context = dev;
865 
866 		ch->num_devs++;
867 		rc = spdk_mlx5_cq_create(dev_ctx->pd, &cq_attr, &dev->cq);
868 		if (rc) {
869 			SPDK_ERRLOG("Failed to create mlx5 CQ, rc %d\n", rc);
870 			goto err_out;
871 		}
872 
873 		rc = accel_mlx5_create_qp(dev, &dev->qp);
874 		if (rc) {
875 			SPDK_ERRLOG("Failed to create mlx5 QP, rc %d\n", rc);
876 			goto err_out;
877 		}
878 
879 		dev->mmap = spdk_rdma_utils_create_mem_map(dev_ctx->pd, NULL,
880 				IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_READ | IBV_ACCESS_REMOTE_WRITE);
881 		if (!dev->mmap) {
882 			SPDK_ERRLOG("Failed to create memory map\n");
883 			rc = -ENOMEM;
884 			goto err_out;
885 		}
886 		dev->crypto_multi_block = dev_ctx->crypto_multi_block;
887 		dev->crypto_split_blocks = dev_ctx->crypto_multi_block ? g_accel_mlx5.attr.crypto_split_blocks : 0;
888 		dev->wrs_in_cq_max = g_accel_mlx5.attr.qp_size;
889 		STAILQ_INIT(&dev->nomem);
890 	}
891 
892 	ch->poller = SPDK_POLLER_REGISTER(accel_mlx5_poller, ch, 0);
893 
894 	return 0;
895 
896 err_out:
897 	accel_mlx5_destroy_cb(&g_accel_mlx5, ctx_buf);
898 	return rc;
899 }
900 
901 void
902 accel_mlx5_get_default_attr(struct accel_mlx5_attr *attr)
903 {
904 	assert(attr);
905 
906 	attr->qp_size = ACCEL_MLX5_QP_SIZE;
907 	attr->num_requests = ACCEL_MLX5_NUM_REQUESTS;
908 	attr->allowed_devs = NULL;
909 	attr->crypto_split_blocks = 0;
910 }
911 
912 static void
913 accel_mlx5_allowed_devs_free(void)
914 {
915 	size_t i;
916 
917 	if (!g_accel_mlx5.allowed_devs) {
918 		return;
919 	}
920 
921 	for (i = 0; i < g_accel_mlx5.allowed_devs_count; i++) {
922 		free(g_accel_mlx5.allowed_devs[i]);
923 	}
924 	free(g_accel_mlx5.attr.allowed_devs);
925 	free(g_accel_mlx5.allowed_devs);
926 	g_accel_mlx5.attr.allowed_devs = NULL;
927 	g_accel_mlx5.allowed_devs = NULL;
928 	g_accel_mlx5.allowed_devs_count = 0;
929 }
930 
931 static int
932 accel_mlx5_allowed_devs_parse(const char *allowed_devs)
933 {
934 	char *str, *tmp, *tok;
935 	size_t devs_count = 0;
936 
937 	str = strdup(allowed_devs);
938 	if (!str) {
939 		return -ENOMEM;
940 	}
941 
942 	accel_mlx5_allowed_devs_free();
943 
944 	tmp = str;
945 	while ((tmp = strchr(tmp, ',')) != NULL) {
946 		tmp++;
947 		devs_count++;
948 	}
949 	devs_count++;
950 
951 	g_accel_mlx5.allowed_devs = calloc(devs_count, sizeof(char *));
952 	if (!g_accel_mlx5.allowed_devs) {
953 		free(str);
954 		return -ENOMEM;
955 	}
956 
957 	devs_count = 0;
958 	tok = strtok(str, ",");
959 	while (tok) {
960 		g_accel_mlx5.allowed_devs[devs_count] = strdup(tok);
961 		if (!g_accel_mlx5.allowed_devs[devs_count]) {
962 			free(str);
963 			accel_mlx5_allowed_devs_free();
964 			return -ENOMEM;
965 		}
966 		tok = strtok(NULL, ",");
967 		devs_count++;
968 		g_accel_mlx5.allowed_devs_count++;
969 	}
970 
971 	free(str);
972 
973 	return 0;
974 }
975 
976 int
977 accel_mlx5_enable(struct accel_mlx5_attr *attr)
978 {
979 	int rc;
980 
981 	if (g_accel_mlx5.enabled) {
982 		return -EEXIST;
983 	}
984 	if (attr) {
985 		g_accel_mlx5.attr = *attr;
986 		g_accel_mlx5.attr.allowed_devs = NULL;
987 
988 		if (attr->allowed_devs) {
989 			/* Contains a copy of user's string */
990 			g_accel_mlx5.attr.allowed_devs = strndup(attr->allowed_devs, ACCEL_MLX5_ALLOWED_DEVS_MAX_LEN);
991 			if (!g_accel_mlx5.attr.allowed_devs) {
992 				return -ENOMEM;
993 			}
994 			rc = accel_mlx5_allowed_devs_parse(g_accel_mlx5.attr.allowed_devs);
995 			if (rc) {
996 				return rc;
997 			}
998 			rc = spdk_mlx5_crypto_devs_allow((const char *const *)g_accel_mlx5.allowed_devs,
999 							 g_accel_mlx5.allowed_devs_count);
1000 			if (rc) {
1001 				accel_mlx5_allowed_devs_free();
1002 				return rc;
1003 			}
1004 		}
1005 	} else {
1006 		accel_mlx5_get_default_attr(&g_accel_mlx5.attr);
1007 	}
1008 
1009 	g_accel_mlx5.enabled = true;
1010 	spdk_accel_module_list_add(&g_accel_mlx5.module);
1011 
1012 	return 0;
1013 }
1014 
1015 static void
1016 accel_mlx5_free_resources(void)
1017 {
1018 	struct accel_mlx5_crypto_dev_ctx *dev_ctx;
1019 	uint32_t i;
1020 
1021 	for (i = 0; i < g_accel_mlx5.num_crypto_ctxs; i++) {
1022 		dev_ctx = &g_accel_mlx5.crypto_ctxs[i];
1023 		if (dev_ctx->pd) {
1024 			if (dev_ctx->crypto_mkeys) {
1025 				spdk_mlx5_mkey_pool_destroy(SPDK_MLX5_MKEY_POOL_FLAG_CRYPTO, dev_ctx->pd);
1026 			}
1027 			spdk_rdma_utils_put_pd(dev_ctx->pd);
1028 		}
1029 		if (dev_ctx->domain) {
1030 			spdk_rdma_utils_put_memory_domain(dev_ctx->domain);
1031 		}
1032 	}
1033 
1034 	free(g_accel_mlx5.crypto_ctxs);
1035 	g_accel_mlx5.crypto_ctxs = NULL;
1036 	g_accel_mlx5.initialized = false;
1037 }
1038 
1039 static void
1040 accel_mlx5_deinit_cb(void *ctx)
1041 {
1042 	accel_mlx5_free_resources();
1043 	spdk_accel_module_finish();
1044 }
1045 
1046 static void
1047 accel_mlx5_deinit(void *ctx)
1048 {
1049 	if (g_accel_mlx5.allowed_devs) {
1050 		accel_mlx5_allowed_devs_free();
1051 	}
1052 	spdk_mlx5_crypto_devs_allow(NULL, 0);
1053 	if (g_accel_mlx5.initialized) {
1054 		spdk_io_device_unregister(&g_accel_mlx5, accel_mlx5_deinit_cb);
1055 	} else {
1056 		spdk_accel_module_finish();
1057 	}
1058 }
1059 
1060 static int
1061 accel_mlx5_mkeys_create(struct ibv_pd *pd, uint32_t num_mkeys, uint32_t flags)
1062 {
1063 	struct spdk_mlx5_mkey_pool_param pool_param = {};
1064 
1065 	pool_param.mkey_count = num_mkeys;
1066 	pool_param.cache_per_thread = num_mkeys * 3 / 4 / spdk_env_get_core_count();
1067 	pool_param.flags = flags;
1068 
1069 	return spdk_mlx5_mkey_pool_init(&pool_param, pd);
1070 }
1071 
1072 static int
1073 accel_mlx5_dev_ctx_init(struct accel_mlx5_crypto_dev_ctx *dev_ctx, struct ibv_context *dev,
1074 			struct spdk_mlx5_device_caps *caps)
1075 {
1076 	struct ibv_pd *pd;
1077 	int rc;
1078 
1079 	pd = spdk_rdma_utils_get_pd(dev);
1080 	if (!pd) {
1081 		SPDK_ERRLOG("Failed to get PD for context %p, dev %s\n", dev, dev->device->name);
1082 		return -EINVAL;
1083 	}
1084 	dev_ctx->context = dev;
1085 	dev_ctx->pd = pd;
1086 	dev_ctx->domain = spdk_rdma_utils_get_memory_domain(pd);
1087 	if (!dev_ctx->domain) {
1088 		return -ENOMEM;
1089 	}
1090 
1091 	if (g_accel_mlx5.crypto_supported) {
1092 		dev_ctx->crypto_multi_block = caps->crypto.multi_block_be_tweak;
1093 		if (!dev_ctx->crypto_multi_block && g_accel_mlx5.attr.crypto_split_blocks) {
1094 			SPDK_WARNLOG("\"crypto_split_blocks\" is set but dev %s doesn't support multi block crypto\n",
1095 				     dev->device->name);
1096 		}
1097 		rc = accel_mlx5_mkeys_create(pd, g_accel_mlx5.attr.num_requests, SPDK_MLX5_MKEY_POOL_FLAG_CRYPTO);
1098 		if (rc) {
1099 			SPDK_ERRLOG("Failed to create crypto mkeys pool, rc %d, dev %s\n", rc, dev->device->name);
1100 			return rc;
1101 		}
1102 		dev_ctx->crypto_mkeys = true;
1103 	}
1104 
1105 	return 0;
1106 }
1107 
1108 static struct ibv_context **
1109 accel_mlx5_get_devices(int *_num_devs)
1110 {
1111 	struct ibv_context **rdma_devs, **rdma_devs_out = NULL, *dev;
1112 	struct ibv_device_attr dev_attr;
1113 	size_t j;
1114 	int num_devs = 0, i, rc;
1115 	int num_devs_out = 0;
1116 	bool dev_allowed;
1117 
1118 	rdma_devs = rdma_get_devices(&num_devs);
1119 	if (!rdma_devs || !num_devs) {
1120 		*_num_devs = 0;
1121 		return NULL;
1122 	}
1123 
1124 	rdma_devs_out = calloc(num_devs + 1, sizeof(struct ibv_context *));
1125 	if (!rdma_devs_out) {
1126 		SPDK_ERRLOG("Memory allocation failed\n");
1127 		rdma_free_devices(rdma_devs);
1128 		*_num_devs = 0;
1129 		return NULL;
1130 	}
1131 
1132 	for (i = 0; i < num_devs; i++) {
1133 		dev = rdma_devs[i];
1134 		rc = ibv_query_device(dev, &dev_attr);
1135 		if (rc) {
1136 			SPDK_ERRLOG("Failed to query dev %s, skipping\n", dev->device->name);
1137 			continue;
1138 		}
1139 		if (dev_attr.vendor_id != SPDK_MLX5_VENDOR_ID_MELLANOX) {
1140 			SPDK_DEBUGLOG(accel_mlx5, "dev %s is not Mellanox device, skipping\n", dev->device->name);
1141 			continue;
1142 		}
1143 
1144 		if (g_accel_mlx5.allowed_devs_count) {
1145 			dev_allowed = false;
1146 			for (j = 0; j < g_accel_mlx5.allowed_devs_count; j++) {
1147 				if (strcmp(g_accel_mlx5.allowed_devs[j], dev->device->name) == 0) {
1148 					dev_allowed = true;
1149 					break;
1150 				}
1151 			}
1152 			if (!dev_allowed) {
1153 				continue;
1154 			}
1155 		}
1156 
1157 		rdma_devs_out[num_devs_out] = dev;
1158 		num_devs_out++;
1159 	}
1160 
1161 	rdma_free_devices(rdma_devs);
1162 	*_num_devs = num_devs_out;
1163 
1164 	return rdma_devs_out;
1165 }
1166 
1167 static inline bool
1168 accel_mlx5_dev_supports_crypto(struct spdk_mlx5_device_caps *caps)
1169 {
1170 	return caps->crypto_supported && !caps->crypto.wrapped_import_method_aes_xts &&
1171 	       (caps->crypto.single_block_le_tweak ||
1172 		caps->crypto.multi_block_le_tweak || caps->crypto.multi_block_be_tweak);
1173 }
1174 
1175 static int
1176 accel_mlx5_init(void)
1177 {
1178 	struct spdk_mlx5_device_caps *caps;
1179 	struct ibv_context **rdma_devs, *dev;
1180 	int num_devs = 0,  rc = 0, i;
1181 	int best_dev = -1, first_dev = 0;
1182 	bool supports_crypto;
1183 	bool find_best_dev = g_accel_mlx5.allowed_devs_count == 0;
1184 
1185 	if (!g_accel_mlx5.enabled) {
1186 		return -EINVAL;
1187 	}
1188 
1189 	rdma_devs = accel_mlx5_get_devices(&num_devs);
1190 	if (!rdma_devs || !num_devs) {
1191 		return -ENODEV;
1192 	}
1193 	caps = calloc(num_devs, sizeof(*caps));
1194 	if (!caps) {
1195 		rc = -ENOMEM;
1196 		goto cleanup;
1197 	}
1198 
1199 	g_accel_mlx5.crypto_supported = true;
1200 	g_accel_mlx5.num_crypto_ctxs = 0;
1201 
1202 	/* Iterate devices. We support an offload if all devices support it */
1203 	for (i = 0; i < num_devs; i++) {
1204 		dev = rdma_devs[i];
1205 
1206 		rc = spdk_mlx5_device_query_caps(dev, &caps[i]);
1207 		if (rc) {
1208 			SPDK_ERRLOG("Failed to get crypto caps, dev %s\n", dev->device->name);
1209 			goto cleanup;
1210 		}
1211 		supports_crypto = accel_mlx5_dev_supports_crypto(&caps[i]);
1212 		if (!supports_crypto) {
1213 			SPDK_DEBUGLOG(accel_mlx5, "Disable crypto support because dev %s doesn't support it\n",
1214 				      rdma_devs[i]->device->name);
1215 			g_accel_mlx5.crypto_supported = false;
1216 		}
1217 		if (find_best_dev) {
1218 			if (supports_crypto && best_dev == -1) {
1219 				best_dev = i;
1220 			}
1221 		}
1222 	}
1223 
1224 	/* User didn't specify devices to use, try to select the best one */
1225 	if (find_best_dev) {
1226 		if (best_dev == -1) {
1227 			best_dev = 0;
1228 		}
1229 		supports_crypto = accel_mlx5_dev_supports_crypto(&caps[best_dev]);
1230 		SPDK_NOTICELOG("Select dev %s, crypto %d\n", rdma_devs[best_dev]->device->name, supports_crypto);
1231 		g_accel_mlx5.crypto_supported = supports_crypto;
1232 		first_dev = best_dev;
1233 		num_devs = 1;
1234 		if (supports_crypto) {
1235 			const char *const dev_name[] = { rdma_devs[best_dev]->device->name };
1236 			/* Let mlx5 library know which device to use */
1237 			spdk_mlx5_crypto_devs_allow(dev_name, 1);
1238 		}
1239 	} else {
1240 		SPDK_NOTICELOG("Found %d devices, crypto %d\n", num_devs, g_accel_mlx5.crypto_supported);
1241 	}
1242 
1243 	if (!g_accel_mlx5.crypto_supported) {
1244 		/* Now accel_mlx5 supports only crypto, exit if no devs supports crypto offload */
1245 		rc = -ENODEV;
1246 		goto cleanup;
1247 	}
1248 
1249 	g_accel_mlx5.crypto_ctxs = calloc(num_devs, sizeof(*g_accel_mlx5.crypto_ctxs));
1250 	if (!g_accel_mlx5.crypto_ctxs) {
1251 		SPDK_ERRLOG("Memory allocation failed\n");
1252 		rc = -ENOMEM;
1253 		goto cleanup;
1254 	}
1255 
1256 	for (i = first_dev; i < first_dev + num_devs; i++) {
1257 		rc = accel_mlx5_dev_ctx_init(&g_accel_mlx5.crypto_ctxs[g_accel_mlx5.num_crypto_ctxs++],
1258 					     rdma_devs[i], &caps[i]);
1259 		if (rc) {
1260 			goto cleanup;
1261 		}
1262 	}
1263 
1264 	SPDK_NOTICELOG("Accel framework mlx5 initialized, found %d devices.\n", num_devs);
1265 	spdk_io_device_register(&g_accel_mlx5, accel_mlx5_create_cb, accel_mlx5_destroy_cb,
1266 				sizeof(struct accel_mlx5_io_channel), "accel_mlx5");
1267 	g_accel_mlx5.initialized = true;
1268 	free(rdma_devs);
1269 	free(caps);
1270 
1271 	return 0;
1272 
1273 cleanup:
1274 	free(rdma_devs);
1275 	free(caps);
1276 	accel_mlx5_free_resources();
1277 
1278 	return rc;
1279 }
1280 
1281 static void
1282 accel_mlx5_write_config_json(struct spdk_json_write_ctx *w)
1283 {
1284 	if (g_accel_mlx5.enabled) {
1285 		spdk_json_write_object_begin(w);
1286 		spdk_json_write_named_string(w, "method", "mlx5_scan_accel_module");
1287 		spdk_json_write_named_object_begin(w, "params");
1288 		spdk_json_write_named_uint16(w, "qp_size", g_accel_mlx5.attr.qp_size);
1289 		spdk_json_write_named_uint32(w, "num_requests", g_accel_mlx5.attr.num_requests);
1290 		if (g_accel_mlx5.attr.allowed_devs) {
1291 			spdk_json_write_named_string(w, "allowed_devs", g_accel_mlx5.attr.allowed_devs);
1292 		}
1293 		spdk_json_write_named_uint16(w, "crypto_split_blocks", g_accel_mlx5.attr.crypto_split_blocks);
1294 		spdk_json_write_object_end(w);
1295 		spdk_json_write_object_end(w);
1296 	}
1297 }
1298 
1299 static size_t
1300 accel_mlx5_get_ctx_size(void)
1301 {
1302 	return sizeof(struct accel_mlx5_task);
1303 }
1304 
1305 static int
1306 accel_mlx5_crypto_key_init(struct spdk_accel_crypto_key *key)
1307 {
1308 	struct spdk_mlx5_crypto_dek_create_attr attr = {};
1309 	struct spdk_mlx5_crypto_keytag *keytag;
1310 	int rc;
1311 
1312 	if (!key || !key->key || !key->key2 || !key->key_size || !key->key2_size) {
1313 		return -EINVAL;
1314 	}
1315 
1316 	attr.dek = calloc(1, key->key_size + key->key2_size);
1317 	if (!attr.dek) {
1318 		return -ENOMEM;
1319 	}
1320 
1321 	memcpy(attr.dek, key->key, key->key_size);
1322 	memcpy(attr.dek + key->key_size, key->key2, key->key2_size);
1323 	attr.dek_len = key->key_size + key->key2_size;
1324 
1325 	rc = spdk_mlx5_crypto_keytag_create(&attr, &keytag);
1326 	spdk_memset_s(attr.dek, attr.dek_len, 0, attr.dek_len);
1327 	free(attr.dek);
1328 	if (rc) {
1329 		SPDK_ERRLOG("Failed to create a keytag, rc %d\n", rc);
1330 		return rc;
1331 	}
1332 
1333 	key->priv = keytag;
1334 
1335 	return 0;
1336 }
1337 
1338 static void
1339 accel_mlx5_crypto_key_deinit(struct spdk_accel_crypto_key *key)
1340 {
1341 	if (!key || key->module_if != &g_accel_mlx5.module || !key->priv) {
1342 		return;
1343 	}
1344 
1345 	spdk_mlx5_crypto_keytag_destroy(key->priv);
1346 }
1347 
1348 static bool
1349 accel_mlx5_crypto_supports_cipher(enum spdk_accel_cipher cipher, size_t key_size)
1350 {
1351 	switch (cipher) {
1352 	case SPDK_ACCEL_CIPHER_AES_XTS:
1353 		return key_size == SPDK_ACCEL_AES_XTS_128_KEY_SIZE || key_size == SPDK_ACCEL_AES_XTS_256_KEY_SIZE;
1354 	default:
1355 		return false;
1356 	}
1357 }
1358 
1359 static int
1360 accel_mlx5_get_memory_domains(struct spdk_memory_domain **domains, int array_size)
1361 {
1362 	int i, size;
1363 
1364 	if (!domains || !array_size) {
1365 		return (int)g_accel_mlx5.num_crypto_ctxs;
1366 	}
1367 
1368 	size = spdk_min(array_size, (int)g_accel_mlx5.num_crypto_ctxs);
1369 
1370 	for (i = 0; i < size; i++) {
1371 		domains[i] = g_accel_mlx5.crypto_ctxs[i].domain;
1372 	}
1373 
1374 	return (int)g_accel_mlx5.num_crypto_ctxs;
1375 }
1376 
1377 static struct accel_mlx5_module g_accel_mlx5 = {
1378 	.module = {
1379 		.module_init		= accel_mlx5_init,
1380 		.module_fini		= accel_mlx5_deinit,
1381 		.write_config_json	= accel_mlx5_write_config_json,
1382 		.get_ctx_size		= accel_mlx5_get_ctx_size,
1383 		.name			= "mlx5",
1384 		.supports_opcode	= accel_mlx5_supports_opcode,
1385 		.get_io_channel		= accel_mlx5_get_io_channel,
1386 		.submit_tasks		= accel_mlx5_submit_tasks,
1387 		.crypto_key_init	= accel_mlx5_crypto_key_init,
1388 		.crypto_key_deinit	= accel_mlx5_crypto_key_deinit,
1389 		.crypto_supports_cipher	= accel_mlx5_crypto_supports_cipher,
1390 		.get_memory_domains	= accel_mlx5_get_memory_domains,
1391 	}
1392 };
1393 
1394 SPDK_LOG_REGISTER_COMPONENT(accel_mlx5)
1395