xref: /spdk/module/accel/mlx5/accel_mlx5.c (revision cfa0a248e28dc42bd51b24c4d4ab64e0b5dd7854)
1 /*   SPDX-License-Identifier: BSD-3-Clause
2  *   Copyright (c) 2022-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3  */
4 
5 #include "spdk/env.h"
6 #include "spdk/thread.h"
7 #include "spdk/queue.h"
8 #include "spdk/log.h"
9 #include "spdk/string.h"
10 #include "spdk/likely.h"
11 #include "spdk/dma.h"
12 #include "spdk/json.h"
13 #include "spdk/util.h"
14 
15 #include "spdk_internal/mlx5.h"
16 #include "spdk_internal/rdma_utils.h"
17 #include "spdk/accel_module.h"
18 #include "spdk_internal/assert.h"
19 #include "spdk_internal/sgl.h"
20 #include "accel_mlx5.h"
21 
22 #include <infiniband/mlx5dv.h>
23 #include <rdma/rdma_cma.h>
24 
25 #define ACCEL_MLX5_QP_SIZE (256u)
26 #define ACCEL_MLX5_NUM_REQUESTS (2048u - 1)
27 #define ACCEL_MLX5_RECOVER_POLLER_PERIOD_US (10000)
28 #define ACCEL_MLX5_MAX_SGE (16u)
29 #define ACCEL_MLX5_MAX_WC (64u)
30 #define ACCEL_MLX5_MAX_MKEYS_IN_TASK (16u)
31 
32 /* Assume we have up to 16 devices */
33 #define ACCEL_MLX5_ALLOWED_DEVS_MAX_LEN ((SPDK_MLX5_DEV_MAX_NAME_LEN + 1) * 16)
34 
35 #define ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED(qp, task)	\
36 do {							\
37 	assert((qp)->wrs_submitted < (qp)->wrs_max);	\
38 	(qp)->wrs_submitted++;				\
39 	assert((task)->num_wrs < UINT16_MAX);		\
40 	(task)->num_wrs++;				\
41 } while (0)
42 
43 #define ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED_SIGNALED(dev, qp, task)	\
44 do {									\
45 	assert((dev)->wrs_in_cq < (dev)->wrs_in_cq_max);		\
46 	(dev)->wrs_in_cq++;						\
47         assert((qp)->wrs_submitted < (qp)->wrs_max);			\
48 	(qp)->wrs_submitted++;						\
49 	assert((task)->num_wrs < UINT16_MAX);				\
50 	(task)->num_wrs++;						\
51 } while (0)
52 
53 struct accel_mlx5_io_channel;
54 struct accel_mlx5_task;
55 
56 struct accel_mlx5_dev_ctx {
57 	struct ibv_context *context;
58 	struct ibv_pd *pd;
59 	struct spdk_memory_domain *domain;
60 	TAILQ_ENTRY(accel_mlx5_dev_ctx) link;
61 	bool crypto_mkeys;
62 	bool crypto_multi_block;
63 };
64 
65 struct accel_mlx5_module {
66 	struct spdk_accel_module_if module;
67 	struct accel_mlx5_dev_ctx *dev_ctxs;
68 	uint32_t num_ctxs;
69 	struct accel_mlx5_attr attr;
70 	char **allowed_devs;
71 	size_t allowed_devs_count;
72 	bool initialized;
73 	bool enabled;
74 	bool crypto_supported;
75 };
76 
77 struct accel_mlx5_sge {
78 	uint32_t src_sge_count;
79 	uint32_t dst_sge_count;
80 	struct ibv_sge src_sge[ACCEL_MLX5_MAX_SGE];
81 	struct ibv_sge dst_sge[ACCEL_MLX5_MAX_SGE];
82 };
83 
84 struct accel_mlx5_iov_sgl {
85 	struct iovec	*iov;
86 	uint32_t	iovcnt;
87 	uint32_t	iov_offset;
88 };
89 
90 enum accel_mlx5_opcode {
91 	ACCEL_MLX5_OPC_COPY,
92 	ACCEL_MLX5_OPC_CRYPTO,
93 	ACCEL_MLX5_OPC_LAST
94 };
95 
96 struct accel_mlx5_task {
97 	struct spdk_accel_task base;
98 	struct accel_mlx5_iov_sgl src;
99 	struct accel_mlx5_iov_sgl dst;
100 	struct accel_mlx5_qp *qp;
101 	STAILQ_ENTRY(accel_mlx5_task) link;
102 	uint16_t num_reqs;
103 	uint16_t num_completed_reqs;
104 	uint16_t num_submitted_reqs;
105 	uint16_t num_ops; /* number of allocated mkeys or number of operations */
106 	uint16_t blocks_per_req;
107 	uint16_t num_processed_blocks;
108 	uint16_t num_blocks;
109 	uint16_t num_wrs; /* Number of outstanding operations which consume qp slot */
110 	union {
111 		uint8_t raw;
112 		struct {
113 			uint8_t inplace : 1;
114 			uint8_t enc_order : 2;
115 			uint8_t mlx5_opcode: 5;
116 		};
117 	};
118 	/* Keep this array last since not all elements might be accessed, this reduces amount of data to be
119 	 * cached */
120 	struct spdk_mlx5_mkey_pool_obj *mkeys[ACCEL_MLX5_MAX_MKEYS_IN_TASK];
121 };
122 
123 struct accel_mlx5_qp {
124 	struct spdk_mlx5_qp *qp;
125 	struct ibv_qp *verbs_qp;
126 	struct accel_mlx5_dev *dev;
127 	struct accel_mlx5_io_channel *ch;
128 	/* tasks submitted to HW. We can't complete a task even in error case until we reap completions for all
129 	 * submitted requests */
130 	STAILQ_HEAD(, accel_mlx5_task) in_hw;
131 	uint16_t wrs_submitted;
132 	uint16_t wrs_max;
133 	bool recovering;
134 	struct spdk_poller *recover_poller;
135 };
136 
137 struct accel_mlx5_dev {
138 	struct accel_mlx5_qp qp;
139 	struct spdk_mlx5_cq *cq;
140 	struct spdk_mlx5_mkey_pool *crypto_mkeys;
141 	struct spdk_rdma_utils_mem_map *mmap;
142 	struct accel_mlx5_dev_ctx *dev_ctx;
143 	uint16_t wrs_in_cq;
144 	uint16_t wrs_in_cq_max;
145 	uint16_t crypto_split_blocks;
146 	bool crypto_multi_block;
147 	/* Pending tasks waiting for requests resources */
148 	STAILQ_HEAD(, accel_mlx5_task) nomem;
149 	TAILQ_ENTRY(accel_mlx5_dev) link;
150 };
151 
152 struct accel_mlx5_io_channel {
153 	struct accel_mlx5_dev *devs;
154 	struct spdk_poller *poller;
155 	uint32_t num_devs;
156 	/* Index in \b devs to be used for operations in round-robin way */
157 	uint32_t dev_idx;
158 };
159 
160 struct accel_mlx5_task_operations {
161 	int (*init)(struct accel_mlx5_task *task);
162 	int (*process)(struct accel_mlx5_task *task);
163 	int (*cont)(struct accel_mlx5_task *task);
164 	void (*complete)(struct accel_mlx5_task *task);
165 };
166 
167 static struct accel_mlx5_module g_accel_mlx5;
168 
169 static inline void
170 accel_mlx5_iov_sgl_init(struct accel_mlx5_iov_sgl *s, struct iovec *iov, uint32_t iovcnt)
171 {
172 	s->iov = iov;
173 	s->iovcnt = iovcnt;
174 	s->iov_offset = 0;
175 }
176 
177 static inline void
178 accel_mlx5_iov_sgl_advance(struct accel_mlx5_iov_sgl *s, uint32_t step)
179 {
180 	s->iov_offset += step;
181 	while (s->iovcnt > 0) {
182 		assert(s->iov != NULL);
183 		if (s->iov_offset < s->iov->iov_len) {
184 			break;
185 		}
186 
187 		s->iov_offset -= s->iov->iov_len;
188 		s->iov++;
189 		s->iovcnt--;
190 	}
191 }
192 
193 static inline void
194 accel_mlx5_iov_sgl_unwind(struct accel_mlx5_iov_sgl *s, uint32_t max_iovs, uint32_t step)
195 {
196 	SPDK_DEBUGLOG(accel_mlx5, "iov %p, iovcnt %u, max %u, offset %u, step %u\n", s->iov, s->iovcnt,
197 		      max_iovs, s->iov_offset, step);
198 	while (s->iovcnt <= max_iovs) {
199 		assert(s->iov != NULL);
200 		if (s->iov_offset >= step) {
201 			s->iov_offset -= step;
202 			SPDK_DEBUGLOG(accel_mlx5, "\tEND, iov %p, iovcnt %u, offset %u\n", s->iov, s->iovcnt,
203 				      s->iov_offset);
204 			return;
205 		}
206 		step -= s->iov_offset;
207 		s->iov--;
208 		s->iovcnt++;
209 		s->iov_offset = s->iov->iov_len;
210 		SPDK_DEBUGLOG(accel_mlx5, "\tiov %p, iovcnt %u, offset %u, step %u\n", s->iov, s->iovcnt,
211 			      s->iov_offset, step);
212 	}
213 
214 	SPDK_ERRLOG("Can't unwind iovs, remaining  %u\n", step);
215 	assert(0);
216 }
217 
218 static inline int
219 accel_mlx5_sge_unwind(struct ibv_sge *sge, uint32_t sge_count, uint32_t step)
220 {
221 	int i;
222 
223 	assert(sge_count > 0);
224 	SPDK_DEBUGLOG(accel_mlx5, "sge %p, count %u, step %u\n", sge, sge_count, step);
225 	for (i = (int)sge_count - 1; i >= 0; i--) {
226 		if (sge[i].length > step) {
227 			sge[i].length -= step;
228 			SPDK_DEBUGLOG(accel_mlx5, "\tsge[%u] len %u, step %u\n", i, sge[i].length, step);
229 			return (int)i + 1;
230 		}
231 		SPDK_DEBUGLOG(accel_mlx5, "\tsge[%u] len %u, step %u\n", i, sge[i].length, step);
232 		step -= sge[i].length;
233 	}
234 
235 	SPDK_ERRLOG("Can't unwind sge, remaining  %u\n", step);
236 	assert(step == 0);
237 
238 	return 0;
239 }
240 
241 static inline void
242 accel_mlx5_crypto_task_complete(struct accel_mlx5_task *task)
243 {
244 	struct accel_mlx5_dev *dev = task->qp->dev;
245 
246 	assert(task->num_ops);
247 	spdk_mlx5_mkey_pool_put_bulk(dev->crypto_mkeys, task->mkeys, task->num_ops);
248 	spdk_accel_task_complete(&task->base, 0);
249 }
250 
251 static inline void
252 accel_mlx5_task_fail(struct accel_mlx5_task *task, int rc)
253 {
254 	struct accel_mlx5_dev *dev = task->qp->dev;
255 
256 	assert(task->num_reqs == task->num_completed_reqs);
257 	SPDK_DEBUGLOG(accel_mlx5, "Fail task %p, opc %d, rc %d\n", task, task->base.op_code, rc);
258 
259 	if (task->num_ops) {
260 		if (task->mlx5_opcode == ACCEL_MLX5_OPC_CRYPTO) {
261 			spdk_mlx5_mkey_pool_put_bulk(dev->crypto_mkeys, task->mkeys, task->num_ops);
262 		}
263 	}
264 	spdk_accel_task_complete(&task->base, rc);
265 }
266 
267 static int
268 accel_mlx5_translate_addr(void *addr, size_t size, struct spdk_memory_domain *domain,
269 			  void *domain_ctx, struct accel_mlx5_dev *dev, struct ibv_sge *sge)
270 {
271 	struct spdk_rdma_utils_memory_translation map_translation;
272 	struct spdk_memory_domain_translation_result domain_translation;
273 	struct spdk_memory_domain_translation_ctx local_ctx;
274 	int rc;
275 
276 	if (domain) {
277 		domain_translation.size = sizeof(struct spdk_memory_domain_translation_result);
278 		local_ctx.size = sizeof(local_ctx);
279 		local_ctx.rdma.ibv_qp = dev->qp.verbs_qp;
280 		rc = spdk_memory_domain_translate_data(domain, domain_ctx, dev->dev_ctx->domain,
281 						       &local_ctx, addr, size, &domain_translation);
282 		if (spdk_unlikely(rc || domain_translation.iov_count != 1)) {
283 			SPDK_ERRLOG("Memory domain translation failed, addr %p, length %zu, iovcnt %u\n", addr, size,
284 				    domain_translation.iov_count);
285 			if (rc == 0) {
286 				rc = -EINVAL;
287 			}
288 
289 			return rc;
290 		}
291 		sge->lkey = domain_translation.rdma.lkey;
292 		sge->addr = (uint64_t) domain_translation.iov.iov_base;
293 		sge->length = domain_translation.iov.iov_len;
294 	} else {
295 		rc = spdk_rdma_utils_get_translation(dev->mmap, addr, size,
296 						     &map_translation);
297 		if (spdk_unlikely(rc)) {
298 			SPDK_ERRLOG("Memory translation failed, addr %p, length %zu\n", addr, size);
299 			return rc;
300 		}
301 		sge->lkey = spdk_rdma_utils_memory_translation_get_lkey(&map_translation);
302 		sge->addr = (uint64_t)addr;
303 		sge->length = size;
304 	}
305 
306 	return 0;
307 }
308 
309 static inline int
310 accel_mlx5_fill_block_sge(struct accel_mlx5_dev *dev, struct ibv_sge *sge,
311 			  struct accel_mlx5_iov_sgl *iovs, uint32_t len, uint32_t *_remaining,
312 			  struct spdk_memory_domain *domain, void *domain_ctx)
313 {
314 	void *addr;
315 	uint32_t remaining = len;
316 	uint32_t size;
317 	int i = 0;
318 	int rc;
319 
320 	while (remaining && i < (int)ACCEL_MLX5_MAX_SGE) {
321 		size = spdk_min(remaining, iovs->iov->iov_len - iovs->iov_offset);
322 		addr = (void *)iovs->iov->iov_base + iovs->iov_offset;
323 		rc = accel_mlx5_translate_addr(addr, size, domain, domain_ctx, dev, &sge[i]);
324 		if (spdk_unlikely(rc)) {
325 			return rc;
326 		}
327 		SPDK_DEBUGLOG(accel_mlx5, "\t sge[%d]: lkey %u, len %u, addr %"PRIx64"\n", i, sge[i].lkey,
328 			      sge[i].length, sge[i].addr);
329 		accel_mlx5_iov_sgl_advance(iovs, size);
330 		i++;
331 		assert(remaining >= size);
332 		remaining -= size;
333 	}
334 	*_remaining = remaining;
335 
336 	return i;
337 }
338 
339 static inline bool
340 accel_mlx5_compare_iovs(struct iovec *v1, struct iovec *v2, uint32_t iovcnt)
341 {
342 	return memcmp(v1, v2, sizeof(*v1) * iovcnt) == 0;
343 }
344 
345 static inline uint16_t
346 accel_mlx5_dev_get_available_slots(struct accel_mlx5_dev *dev, struct accel_mlx5_qp *qp)
347 {
348 	assert(qp->wrs_max >= qp->wrs_submitted);
349 	assert(dev->wrs_in_cq_max >= dev->wrs_in_cq);
350 
351 	/* Each time we produce only 1 CQE, so we need 1 CQ slot */
352 	if (spdk_unlikely(dev->wrs_in_cq == dev->wrs_in_cq_max)) {
353 		return 0;
354 	}
355 
356 	return qp->wrs_max - qp->wrs_submitted;
357 }
358 
359 static inline uint32_t
360 accel_mlx5_task_alloc_mkeys(struct accel_mlx5_task *task)
361 {
362 	struct accel_mlx5_dev *dev = task->qp->dev;
363 	uint32_t num_ops;
364 	int rc;
365 
366 	assert(task->num_reqs > task->num_completed_reqs);
367 	num_ops = task->num_reqs - task->num_completed_reqs;
368 	num_ops = spdk_min(num_ops, ACCEL_MLX5_MAX_MKEYS_IN_TASK);
369 	if (!num_ops) {
370 		return 0;
371 	}
372 	rc = spdk_mlx5_mkey_pool_get_bulk(dev->crypto_mkeys, task->mkeys, num_ops);
373 	if (spdk_unlikely(rc)) {
374 		return 0;
375 	}
376 	assert(num_ops <= UINT16_MAX);
377 	task->num_ops = num_ops;
378 
379 	return num_ops;
380 }
381 
382 static inline uint8_t
383 bs_to_bs_selector(uint32_t bs)
384 {
385 	switch (bs) {
386 	case 512:
387 		return SPDK_MLX5_BLOCK_SIZE_SELECTOR_512;
388 	case 520:
389 		return SPDK_MLX5_BLOCK_SIZE_SELECTOR_520;
390 	case 4096:
391 		return SPDK_MLX5_BLOCK_SIZE_SELECTOR_4096;
392 	case 4160:
393 		return SPDK_MLX5_BLOCK_SIZE_SELECTOR_4160;
394 	default:
395 		return SPDK_MLX5_BLOCK_SIZE_SELECTOR_RESERVED;
396 	}
397 }
398 
399 static inline int
400 accel_mlx5_configure_crypto_umr(struct accel_mlx5_task *mlx5_task, struct accel_mlx5_sge *sge,
401 				uint32_t mkey, uint32_t num_blocks, struct spdk_mlx5_crypto_dek_data *dek_data)
402 {
403 	struct spdk_mlx5_umr_crypto_attr cattr;
404 	struct spdk_mlx5_umr_attr umr_attr;
405 	struct accel_mlx5_qp *qp = mlx5_task->qp;
406 	struct accel_mlx5_dev *dev = qp->dev;
407 	struct spdk_accel_task *task = &mlx5_task->base;
408 	uint32_t length, remaining = 0, block_size = task->block_size;
409 	int rc;
410 
411 	length = num_blocks * block_size;
412 	SPDK_DEBUGLOG(accel_mlx5, "task %p, domain %p, len %u, blocks %u\n", task, task->src_domain, length,
413 		      num_blocks);
414 	rc = accel_mlx5_fill_block_sge(dev, sge->src_sge, &mlx5_task->src,  length, &remaining,
415 				       task->src_domain, task->src_domain_ctx);
416 	if (spdk_unlikely(rc <= 0)) {
417 		if (rc == 0) {
418 			rc = -EINVAL;
419 		}
420 		SPDK_ERRLOG("failed set src sge, rc %d\n", rc);
421 		return rc;
422 	}
423 	sge->src_sge_count = rc;
424 	if (spdk_unlikely(remaining)) {
425 		uint32_t new_len = length - remaining;
426 		uint32_t aligned_len, updated_num_blocks;
427 
428 		SPDK_DEBUGLOG(accel_mlx5, "Incorrect src iovs, handled %u out of %u bytes\n", new_len, length);
429 		if (new_len < block_size) {
430 			/* We need to process at least 1 block. If buffer is too fragmented, we can't do
431 			 * anything */
432 			return -ERANGE;
433 		}
434 
435 		/* Regular integer division, we need to round down to prev block size */
436 		updated_num_blocks = new_len / block_size;
437 		assert(updated_num_blocks);
438 		assert(updated_num_blocks < num_blocks);
439 		aligned_len = updated_num_blocks * block_size;
440 
441 		if (aligned_len < new_len) {
442 			uint32_t dt = new_len - aligned_len;
443 
444 			/* We can't process part of block, need to unwind src iov_sgl and sge to the
445 			 * prev block boundary */
446 			SPDK_DEBUGLOG(accel_mlx5, "task %p, unwind src sge for %u bytes\n", task, dt);
447 			accel_mlx5_iov_sgl_unwind(&mlx5_task->src, task->s.iovcnt, dt);
448 			sge->src_sge_count = accel_mlx5_sge_unwind(sge->src_sge, sge->src_sge_count, dt);
449 			if (!sge->src_sge_count) {
450 				return -ERANGE;
451 			}
452 		}
453 		SPDK_DEBUGLOG(accel_mlx5, "task %p, UMR len %u -> %u\n", task, length, aligned_len);
454 		length = aligned_len;
455 		num_blocks = updated_num_blocks;
456 	}
457 
458 	cattr.xts_iv = task->iv + mlx5_task->num_processed_blocks;
459 	cattr.keytag = 0;
460 	cattr.dek_obj_id = dek_data->dek_obj_id;
461 	cattr.tweak_mode = dek_data->tweak_mode;
462 	cattr.enc_order = mlx5_task->enc_order;
463 	cattr.bs_selector = bs_to_bs_selector(mlx5_task->base.block_size);
464 	if (spdk_unlikely(cattr.bs_selector == SPDK_MLX5_BLOCK_SIZE_SELECTOR_RESERVED)) {
465 		SPDK_ERRLOG("unsupported block size %u\n", mlx5_task->base.block_size);
466 		return -EINVAL;
467 	}
468 	umr_attr.mkey = mkey;
469 	umr_attr.sge = sge->src_sge;
470 
471 	if (!mlx5_task->inplace) {
472 		SPDK_DEBUGLOG(accel_mlx5, "task %p, dst sge, domain %p, len %u\n", task, task->dst_domain, length);
473 		rc = accel_mlx5_fill_block_sge(dev, sge->dst_sge, &mlx5_task->dst, length, &remaining,
474 					       task->dst_domain, task->dst_domain_ctx);
475 		if (spdk_unlikely(rc <= 0)) {
476 			if (rc == 0) {
477 				rc = -EINVAL;
478 			}
479 			SPDK_ERRLOG("failed set dst sge, rc %d\n", rc);
480 			return rc;
481 		}
482 		sge->dst_sge_count = rc;
483 		if (spdk_unlikely(remaining)) {
484 			uint32_t new_len = length - remaining;
485 			uint32_t aligned_len, updated_num_blocks, dt;
486 
487 			SPDK_DEBUGLOG(accel_mlx5, "Incorrect dst iovs, handled %u out of %u bytes\n", new_len, length);
488 			if (new_len < block_size) {
489 				/* We need to process at least 1 block. If buffer is too fragmented, we can't do
490 				 * anything */
491 				return -ERANGE;
492 			}
493 
494 			/* Regular integer division, we need to round down to prev block size */
495 			updated_num_blocks = new_len / block_size;
496 			assert(updated_num_blocks);
497 			assert(updated_num_blocks < num_blocks);
498 			aligned_len = updated_num_blocks * block_size;
499 
500 			if (aligned_len < new_len) {
501 				dt = new_len - aligned_len;
502 				assert(dt > 0 && dt < length);
503 				/* We can't process part of block, need to unwind src and dst iov_sgl and sge to the
504 				 * prev block boundary */
505 				SPDK_DEBUGLOG(accel_mlx5, "task %p, unwind dst sge for %u bytes\n", task, dt);
506 				accel_mlx5_iov_sgl_unwind(&mlx5_task->dst, task->d.iovcnt, dt);
507 				sge->dst_sge_count = accel_mlx5_sge_unwind(sge->dst_sge, sge->dst_sge_count, dt);
508 				assert(sge->dst_sge_count > 0 && sge->dst_sge_count <= ACCEL_MLX5_MAX_SGE);
509 				if (!sge->dst_sge_count) {
510 					return -ERANGE;
511 				}
512 			}
513 			assert(length > aligned_len);
514 			dt = length - aligned_len;
515 			SPDK_DEBUGLOG(accel_mlx5, "task %p, unwind src sge for %u bytes\n", task, dt);
516 			/* The same for src iov_sgl and sge. In worst case we can unwind SRC 2 times */
517 			accel_mlx5_iov_sgl_unwind(&mlx5_task->src, task->s.iovcnt, dt);
518 			sge->src_sge_count = accel_mlx5_sge_unwind(sge->src_sge, sge->src_sge_count, dt);
519 			assert(sge->src_sge_count > 0 && sge->src_sge_count <= ACCEL_MLX5_MAX_SGE);
520 			if (!sge->src_sge_count) {
521 				return -ERANGE;
522 			}
523 			SPDK_DEBUGLOG(accel_mlx5, "task %p, UMR len %u -> %u\n", task, length, aligned_len);
524 			length = aligned_len;
525 			num_blocks = updated_num_blocks;
526 		}
527 	}
528 
529 	SPDK_DEBUGLOG(accel_mlx5,
530 		      "task %p: bs %u, iv %"PRIu64", enc_on_tx %d, tweak_mode %d, len %u, mkey %x, blocks %u\n",
531 		      mlx5_task, task->block_size, cattr.xts_iv, mlx5_task->enc_order, cattr.tweak_mode, length, mkey,
532 		      num_blocks);
533 
534 	umr_attr.sge_count = sge->src_sge_count;
535 	umr_attr.umr_len = length;
536 	assert((uint32_t)mlx5_task->num_processed_blocks + num_blocks <= UINT16_MAX);
537 	mlx5_task->num_processed_blocks += num_blocks;
538 
539 	rc = spdk_mlx5_umr_configure_crypto(qp->qp, &umr_attr, &cattr, 0, 0);
540 
541 	return rc;
542 }
543 
544 static inline int
545 accel_mlx5_crypto_task_process(struct accel_mlx5_task *mlx5_task)
546 {
547 	struct accel_mlx5_sge sges[ACCEL_MLX5_MAX_MKEYS_IN_TASK];
548 	struct spdk_mlx5_crypto_dek_data dek_data;
549 	struct accel_mlx5_qp *qp = mlx5_task->qp;
550 	struct accel_mlx5_dev *dev = qp->dev;
551 	/* First RDMA after UMR must have a SMALL_FENCE */
552 	uint32_t first_rdma_fence = SPDK_MLX5_WQE_CTRL_INITIATOR_SMALL_FENCE;
553 	uint16_t num_blocks;
554 	uint16_t num_ops = spdk_min(mlx5_task->num_reqs - mlx5_task->num_completed_reqs,
555 				    mlx5_task->num_ops);
556 	uint16_t qp_slot = accel_mlx5_dev_get_available_slots(dev, qp);
557 	uint16_t i;
558 	int rc;
559 
560 	assert(qp_slot > 1);
561 	num_ops = spdk_min(num_ops, qp_slot >> 1);
562 	if (spdk_unlikely(!num_ops)) {
563 		return -EINVAL;
564 	}
565 
566 	rc = spdk_mlx5_crypto_get_dek_data(mlx5_task->base.crypto_key->priv, dev->dev_ctx->pd, &dek_data);
567 	if (spdk_unlikely(rc)) {
568 		return rc;
569 	}
570 
571 	mlx5_task->num_wrs = 0;
572 	SPDK_DEBUGLOG(accel_mlx5, "begin, task, %p, reqs: total %u, submitted %u, completed %u\n",
573 		      mlx5_task, mlx5_task->num_reqs, mlx5_task->num_submitted_reqs, mlx5_task->num_completed_reqs);
574 	for (i = 0; i < num_ops; i++) {
575 		if (mlx5_task->num_submitted_reqs + i + 1 == mlx5_task->num_reqs) {
576 			/* Last request may consume less than calculated if crypto_multi_block is true */
577 			assert(mlx5_task->num_blocks > mlx5_task->num_submitted_reqs);
578 			num_blocks = mlx5_task->num_blocks - mlx5_task->num_processed_blocks;
579 		} else {
580 			num_blocks = mlx5_task->blocks_per_req;
581 		}
582 
583 		rc = accel_mlx5_configure_crypto_umr(mlx5_task, &sges[i], mlx5_task->mkeys[i]->mkey, num_blocks,
584 						     &dek_data);
585 		if (spdk_unlikely(rc)) {
586 			SPDK_ERRLOG("UMR configure failed with %d\n", rc);
587 			return rc;
588 		}
589 		ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED(qp, mlx5_task);
590 	}
591 
592 	/* Loop `num_ops - 1` for easy flags handling */
593 	for (i = 0; i < num_ops - 1; i++) {
594 		/* UMR is used as a destination for RDMA_READ - from UMR to sge */
595 		if (mlx5_task->inplace) {
596 			rc = spdk_mlx5_qp_rdma_read(qp->qp, sges[i].src_sge, sges[i].src_sge_count, 0,
597 						    mlx5_task->mkeys[i]->mkey, 0, first_rdma_fence);
598 		} else {
599 			rc = spdk_mlx5_qp_rdma_read(qp->qp, sges[i].dst_sge, sges[i].dst_sge_count, 0,
600 						    mlx5_task->mkeys[i]->mkey, 0, first_rdma_fence);
601 		}
602 		if (spdk_unlikely(rc)) {
603 			SPDK_ERRLOG("RDMA READ/WRITE failed with %d\n", rc);
604 			return rc;
605 		}
606 
607 		first_rdma_fence = 0;
608 		assert(mlx5_task->num_submitted_reqs < mlx5_task->num_reqs);
609 		assert(mlx5_task->num_submitted_reqs < UINT16_MAX);
610 		mlx5_task->num_submitted_reqs++;
611 		ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED(qp, mlx5_task);
612 	}
613 
614 	if (mlx5_task->inplace) {
615 		rc = spdk_mlx5_qp_rdma_read(qp->qp, sges[i].src_sge, sges[i].src_sge_count, 0,
616 					    mlx5_task->mkeys[i]->mkey, (uint64_t)mlx5_task, first_rdma_fence | SPDK_MLX5_WQE_CTRL_CE_CQ_UPDATE);
617 	} else {
618 		rc = spdk_mlx5_qp_rdma_read(qp->qp, sges[i].dst_sge, sges[i].dst_sge_count, 0,
619 					    mlx5_task->mkeys[i]->mkey, (uint64_t)mlx5_task, first_rdma_fence | SPDK_MLX5_WQE_CTRL_CE_CQ_UPDATE);
620 	}
621 	if (spdk_unlikely(rc)) {
622 		SPDK_ERRLOG("RDMA READ/WRITE failed with %d\n", rc);
623 		return rc;
624 	}
625 
626 	assert(mlx5_task->num_submitted_reqs < mlx5_task->num_reqs);
627 	assert(mlx5_task->num_submitted_reqs < UINT16_MAX);
628 	mlx5_task->num_submitted_reqs++;
629 	ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED_SIGNALED(dev, qp, mlx5_task);
630 	STAILQ_INSERT_TAIL(&qp->in_hw, mlx5_task, link);
631 
632 	if (spdk_unlikely(mlx5_task->num_submitted_reqs == mlx5_task->num_reqs &&
633 			  mlx5_task->num_blocks > mlx5_task->num_processed_blocks)) {
634 		/* We hit "out of sge
635 		 * entries" case with highly fragmented payload. In that case
636 		 * accel_mlx5_configure_crypto_umr function handled fewer data blocks than expected
637 		 * That means we need at least 1 more request to complete this task, this request will be
638 		 * executed once all submitted ones are completed */
639 		SPDK_DEBUGLOG(accel_mlx5, "task %p, processed %u/%u blocks, add extra req\n", mlx5_task,
640 			      mlx5_task->num_processed_blocks, mlx5_task->num_blocks);
641 		mlx5_task->num_reqs++;
642 	}
643 
644 	SPDK_DEBUGLOG(accel_mlx5, "end, task, %p, reqs: total %u, submitted %u, completed %u\n", mlx5_task,
645 		      mlx5_task->num_reqs, mlx5_task->num_submitted_reqs, mlx5_task->num_completed_reqs);
646 
647 	return 0;
648 }
649 
650 static inline int
651 accel_mlx5_crypto_task_continue(struct accel_mlx5_task *task)
652 {
653 	struct accel_mlx5_qp *qp = task->qp;
654 	struct accel_mlx5_dev *dev = qp->dev;
655 	uint16_t qp_slot = accel_mlx5_dev_get_available_slots(dev, qp);
656 
657 	assert(task->num_reqs > task->num_completed_reqs);
658 	if (task->num_ops == 0) {
659 		/* No mkeys allocated, try to allocate now */
660 		if (spdk_unlikely(!accel_mlx5_task_alloc_mkeys(task))) {
661 			/* Pool is empty, queue this task */
662 			STAILQ_INSERT_TAIL(&dev->nomem, task, link);
663 			return -ENOMEM;
664 		}
665 	}
666 	/* We need to post at least 1 UMR and 1 RDMA operation */
667 	if (spdk_unlikely(qp_slot < 2)) {
668 		/* QP is full, queue this task */
669 		STAILQ_INSERT_TAIL(&dev->nomem, task, link);
670 		return -ENOMEM;
671 	}
672 
673 	return accel_mlx5_crypto_task_process(task);
674 }
675 
676 static inline int
677 accel_mlx5_crypto_task_init(struct accel_mlx5_task *mlx5_task)
678 {
679 	struct spdk_accel_task *task = &mlx5_task->base;
680 	struct accel_mlx5_dev *dev = mlx5_task->qp->dev;
681 	uint64_t src_nbytes = task->nbytes;
682 #ifdef DEBUG
683 	uint64_t dst_nbytes;
684 	uint32_t i;
685 #endif
686 	bool crypto_key_ok;
687 
688 	crypto_key_ok = (task->crypto_key && task->crypto_key->module_if == &g_accel_mlx5.module &&
689 			 task->crypto_key->priv);
690 	if (spdk_unlikely((task->nbytes % mlx5_task->base.block_size != 0) || !crypto_key_ok)) {
691 		if (crypto_key_ok) {
692 			SPDK_ERRLOG("src length %"PRIu64" is not a multiple of the block size %u\n", task->nbytes,
693 				    mlx5_task->base.block_size);
694 		} else {
695 			SPDK_ERRLOG("Wrong crypto key provided\n");
696 		}
697 		return -EINVAL;
698 	}
699 
700 	assert(src_nbytes / mlx5_task->base.block_size <= UINT16_MAX);
701 	mlx5_task->num_blocks = src_nbytes / mlx5_task->base.block_size;
702 	accel_mlx5_iov_sgl_init(&mlx5_task->src, task->s.iovs, task->s.iovcnt);
703 	if (task->d.iovcnt == 0 || (task->d.iovcnt == task->s.iovcnt &&
704 				    accel_mlx5_compare_iovs(task->d.iovs, task->s.iovs, task->s.iovcnt))) {
705 		mlx5_task->inplace = 1;
706 	} else {
707 #ifdef DEBUG
708 		dst_nbytes = 0;
709 		for (i = 0; i < task->d.iovcnt; i++) {
710 			dst_nbytes += task->d.iovs[i].iov_len;
711 		}
712 
713 		if (spdk_unlikely(src_nbytes != dst_nbytes)) {
714 			return -EINVAL;
715 		}
716 #endif
717 		mlx5_task->inplace = 0;
718 		accel_mlx5_iov_sgl_init(&mlx5_task->dst, task->d.iovs, task->d.iovcnt);
719 	}
720 
721 	if (dev->crypto_multi_block) {
722 		if (dev->crypto_split_blocks) {
723 			assert(SPDK_CEIL_DIV(mlx5_task->num_blocks, dev->crypto_split_blocks) <= UINT16_MAX);
724 			mlx5_task->num_reqs = SPDK_CEIL_DIV(mlx5_task->num_blocks, dev->crypto_split_blocks);
725 			/* Last req may consume less blocks */
726 			mlx5_task->blocks_per_req = spdk_min(mlx5_task->num_blocks, dev->crypto_split_blocks);
727 		} else {
728 			if (task->s.iovcnt > ACCEL_MLX5_MAX_SGE || task->d.iovcnt > ACCEL_MLX5_MAX_SGE) {
729 				uint32_t max_sge_count = spdk_max(task->s.iovcnt, task->d.iovcnt);
730 
731 				assert(SPDK_CEIL_DIV(max_sge_count, ACCEL_MLX5_MAX_SGE) <= UINT16_MAX);
732 				mlx5_task->num_reqs = SPDK_CEIL_DIV(max_sge_count, ACCEL_MLX5_MAX_SGE);
733 				mlx5_task->blocks_per_req = SPDK_CEIL_DIV(mlx5_task->num_blocks, mlx5_task->num_reqs);
734 			} else {
735 				mlx5_task->num_reqs = 1;
736 				mlx5_task->blocks_per_req = mlx5_task->num_blocks;
737 			}
738 		}
739 	} else {
740 		mlx5_task->num_reqs = mlx5_task->num_blocks;
741 		mlx5_task->blocks_per_req = 1;
742 	}
743 
744 	if (spdk_unlikely(!accel_mlx5_task_alloc_mkeys(mlx5_task))) {
745 		/* Pool is empty, queue this task */
746 		SPDK_DEBUGLOG(accel_mlx5, "no reqs in pool, dev %s\n", dev->dev_ctx->context->device->name);
747 		return -ENOMEM;
748 	}
749 	if (spdk_unlikely(accel_mlx5_dev_get_available_slots(dev, &dev->qp) < 2)) {
750 		/* Queue is full, queue this task */
751 		SPDK_DEBUGLOG(accel_mlx5, "dev %s qp %p is full\n", dev->dev_ctx->context->device->name,
752 			      mlx5_task->qp);
753 		return -ENOMEM;
754 	}
755 
756 	SPDK_DEBUGLOG(accel_mlx5, "task %p, src_iovs %u, dst_iovs %u, num_reqs %u, "
757 		      "blocks/req %u, blocks %u, inplace %d\n", task, task->s.iovcnt, task->d.iovcnt,
758 		      mlx5_task->num_reqs, mlx5_task->blocks_per_req, mlx5_task->num_blocks, mlx5_task->inplace);
759 
760 	return 0;
761 }
762 
763 static inline void
764 accel_mlx5_copy_task_complete(struct accel_mlx5_task *mlx5_task)
765 {
766 	spdk_accel_task_complete(&mlx5_task->base, 0);
767 }
768 
769 static inline int
770 accel_mlx5_copy_task_process_one(struct accel_mlx5_task *mlx5_task, struct accel_mlx5_qp *qp,
771 				 uint64_t wrid, uint32_t fence)
772 {
773 	struct spdk_accel_task *task = &mlx5_task->base;
774 	struct accel_mlx5_sge sge;
775 	uint32_t remaining;
776 	uint32_t dst_len;
777 	int rc;
778 
779 	/* Limit one RDMA_WRITE by length of dst buffer. Not all src buffers may fit into one dst buffer due to
780 	 * limitation on ACCEL_MLX5_MAX_SGE. If this is the case then remaining is not zero */
781 	assert(mlx5_task->dst.iov->iov_len > mlx5_task->dst.iov_offset);
782 	dst_len = mlx5_task->dst.iov->iov_len - mlx5_task->dst.iov_offset;
783 	rc = accel_mlx5_fill_block_sge(qp->dev, sge.src_sge, &mlx5_task->src, dst_len, &remaining,
784 				       task->src_domain, task->src_domain_ctx);
785 	if (spdk_unlikely(rc <= 0)) {
786 		if (rc == 0) {
787 			rc = -EINVAL;
788 		}
789 		SPDK_ERRLOG("failed set src sge, rc %d\n", rc);
790 		return rc;
791 	}
792 	sge.src_sge_count = rc;
793 	assert(dst_len > remaining);
794 	dst_len -= remaining;
795 
796 	rc = accel_mlx5_fill_block_sge(qp->dev, sge.dst_sge, &mlx5_task->dst, dst_len,  &remaining,
797 				       task->dst_domain, task->dst_domain_ctx);
798 	if (spdk_unlikely(rc != 1)) {
799 		/* We use single dst entry, any result other than 1 is an error */
800 		if (rc == 0) {
801 			rc = -EINVAL;
802 		}
803 		SPDK_ERRLOG("failed set dst sge, rc %d\n", rc);
804 		return rc;
805 	}
806 	if (spdk_unlikely(remaining)) {
807 		SPDK_ERRLOG("Incorrect dst length, remaining %u\n", remaining);
808 		assert(0);
809 		return -EINVAL;
810 	}
811 
812 	rc = spdk_mlx5_qp_rdma_write(mlx5_task->qp->qp, sge.src_sge, sge.src_sge_count,
813 				     sge.dst_sge[0].addr, sge.dst_sge[0].lkey, wrid, fence);
814 	if (spdk_unlikely(rc)) {
815 		SPDK_ERRLOG("new RDMA WRITE failed with %d\n", rc);
816 		return rc;
817 	}
818 
819 	return 0;
820 }
821 
822 static inline int
823 accel_mlx5_copy_task_process(struct accel_mlx5_task *mlx5_task)
824 {
825 
826 	struct accel_mlx5_qp *qp = mlx5_task->qp;
827 	struct accel_mlx5_dev *dev = qp->dev;
828 	uint16_t i;
829 	int rc;
830 
831 	mlx5_task->num_wrs = 0;
832 	assert(mlx5_task->num_reqs > 0);
833 	assert(mlx5_task->num_ops > 0);
834 
835 	/* Handle n-1 reqs in order to simplify wrid and fence handling */
836 	for (i = 0; i < mlx5_task->num_ops - 1; i++) {
837 		rc = accel_mlx5_copy_task_process_one(mlx5_task, qp, 0, 0);
838 		if (spdk_unlikely(rc)) {
839 			return rc;
840 		}
841 		ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED(qp, mlx5_task);
842 		mlx5_task->num_submitted_reqs++;
843 	}
844 
845 	rc = accel_mlx5_copy_task_process_one(mlx5_task, qp, (uint64_t)mlx5_task,
846 					      SPDK_MLX5_WQE_CTRL_CE_CQ_UPDATE);
847 	if (spdk_unlikely(rc)) {
848 		return rc;
849 	}
850 	ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED_SIGNALED(dev, qp, mlx5_task);
851 	mlx5_task->num_submitted_reqs++;
852 	STAILQ_INSERT_TAIL(&qp->in_hw, mlx5_task, link);
853 
854 	SPDK_DEBUGLOG(accel_mlx5, "end, copy task, %p\n", mlx5_task);
855 
856 	return 0;
857 }
858 
859 static inline int
860 accel_mlx5_copy_task_continue(struct accel_mlx5_task *task)
861 {
862 	struct accel_mlx5_qp *qp = task->qp;
863 	struct accel_mlx5_dev *dev = qp->dev;
864 	uint16_t qp_slot = accel_mlx5_dev_get_available_slots(dev, qp);
865 
866 	task->num_ops = spdk_min(qp_slot, task->num_reqs - task->num_completed_reqs);
867 	if (spdk_unlikely(task->num_ops == 0)) {
868 		STAILQ_INSERT_TAIL(&dev->nomem, task, link);
869 		return -ENOMEM;
870 	}
871 	return accel_mlx5_copy_task_process(task);
872 }
873 
874 static inline uint32_t
875 accel_mlx5_get_copy_task_count(struct iovec *src_iov, uint32_t src_iovcnt,
876 			       struct iovec *dst_iov, uint32_t dst_iovcnt)
877 {
878 	uint32_t src = 0;
879 	uint32_t dst = 0;
880 	uint64_t src_offset = 0;
881 	uint64_t dst_offset = 0;
882 	uint32_t num_ops = 0;
883 	uint32_t src_sge_count = 0;
884 
885 	while (src < src_iovcnt && dst < dst_iovcnt) {
886 		uint64_t src_len = src_iov[src].iov_len - src_offset;
887 		uint64_t dst_len = dst_iov[dst].iov_len - dst_offset;
888 
889 		if (dst_len < src_len) {
890 			dst_offset = 0;
891 			src_offset += dst_len;
892 			dst++;
893 			num_ops++;
894 			src_sge_count = 0;
895 		} else if (src_len < dst_len) {
896 			dst_offset += src_len;
897 			src_offset = 0;
898 			src++;
899 			if (++src_sge_count >= ACCEL_MLX5_MAX_SGE) {
900 				num_ops++;
901 				src_sge_count = 0;
902 			}
903 		} else {
904 			dst_offset = 0;
905 			src_offset = 0;
906 			dst++;
907 			src++;
908 			num_ops++;
909 			src_sge_count = 0;
910 		}
911 	}
912 
913 	assert(src == src_iovcnt);
914 	assert(dst == dst_iovcnt);
915 	assert(src_offset == 0);
916 	assert(dst_offset == 0);
917 	return num_ops;
918 }
919 
920 static inline int
921 accel_mlx5_copy_task_init(struct accel_mlx5_task *mlx5_task)
922 {
923 	struct spdk_accel_task *task = &mlx5_task->base;
924 	struct accel_mlx5_qp *qp = mlx5_task->qp;
925 	uint16_t qp_slot = accel_mlx5_dev_get_available_slots(qp->dev, qp);
926 
927 	if (spdk_likely(task->s.iovcnt <= ACCEL_MLX5_MAX_SGE)) {
928 		mlx5_task->num_reqs = task->d.iovcnt;
929 	} else if (task->d.iovcnt == 1) {
930 		mlx5_task->num_reqs = SPDK_CEIL_DIV(task->s.iovcnt, ACCEL_MLX5_MAX_SGE);
931 	} else {
932 		mlx5_task->num_reqs = accel_mlx5_get_copy_task_count(task->s.iovs, task->s.iovcnt,
933 				      task->d.iovs, task->d.iovcnt);
934 	}
935 	mlx5_task->inplace = 0;
936 	accel_mlx5_iov_sgl_init(&mlx5_task->src, task->s.iovs, task->s.iovcnt);
937 	accel_mlx5_iov_sgl_init(&mlx5_task->dst, task->d.iovs, task->d.iovcnt);
938 	mlx5_task->num_ops = spdk_min(qp_slot, mlx5_task->num_reqs);
939 	if (spdk_unlikely(!mlx5_task->num_ops)) {
940 		return -ENOMEM;
941 	}
942 	SPDK_DEBUGLOG(accel_mlx5, "copy task num_reqs %u, num_ops %u\n", mlx5_task->num_reqs,
943 		      mlx5_task->num_ops);
944 
945 	return 0;
946 }
947 
948 static int
949 accel_mlx5_task_op_not_implemented(struct accel_mlx5_task *mlx5_task)
950 {
951 	SPDK_ERRLOG("wrong function called\n");
952 	SPDK_UNREACHABLE();
953 }
954 
955 static void
956 accel_mlx5_task_op_not_implemented_v(struct accel_mlx5_task *mlx5_task)
957 {
958 	SPDK_ERRLOG("wrong function called\n");
959 	SPDK_UNREACHABLE();
960 }
961 
962 static int
963 accel_mlx5_task_op_not_supported(struct accel_mlx5_task *mlx5_task)
964 {
965 	SPDK_ERRLOG("Unsupported opcode %d\n", mlx5_task->base.op_code);
966 
967 	return -ENOTSUP;
968 }
969 
970 static struct accel_mlx5_task_operations g_accel_mlx5_tasks_ops[] = {
971 	[ACCEL_MLX5_OPC_COPY] = {
972 		.init = accel_mlx5_copy_task_init,
973 		.process = accel_mlx5_copy_task_process,
974 		.cont = accel_mlx5_copy_task_continue,
975 		.complete = accel_mlx5_copy_task_complete,
976 	},
977 	[ACCEL_MLX5_OPC_CRYPTO] = {
978 		.init = accel_mlx5_crypto_task_init,
979 		.process = accel_mlx5_crypto_task_process,
980 		.cont = accel_mlx5_crypto_task_continue,
981 		.complete = accel_mlx5_crypto_task_complete,
982 	},
983 	[ACCEL_MLX5_OPC_LAST] = {
984 		.init = accel_mlx5_task_op_not_supported,
985 		.process = accel_mlx5_task_op_not_implemented,
986 		.cont = accel_mlx5_task_op_not_implemented,
987 		.complete = accel_mlx5_task_op_not_implemented_v
988 	},
989 };
990 
991 static inline void
992 accel_mlx5_task_complete(struct accel_mlx5_task *task)
993 {
994 	assert(task->num_reqs == task->num_completed_reqs);
995 	SPDK_DEBUGLOG(accel_mlx5, "Complete task %p, opc %d\n", task, task->base.op_code);
996 
997 	g_accel_mlx5_tasks_ops[task->mlx5_opcode].complete(task);
998 }
999 
1000 static inline int
1001 accel_mlx5_task_continue(struct accel_mlx5_task *task)
1002 {
1003 	struct accel_mlx5_qp *qp = task->qp;
1004 	struct accel_mlx5_dev *dev = qp->dev;
1005 
1006 	if (spdk_unlikely(qp->recovering)) {
1007 		STAILQ_INSERT_TAIL(&dev->nomem, task, link);
1008 		return 0;
1009 	}
1010 
1011 	return g_accel_mlx5_tasks_ops[task->mlx5_opcode].cont(task);
1012 }
1013 static inline void
1014 accel_mlx5_task_init_opcode(struct accel_mlx5_task *mlx5_task)
1015 {
1016 	uint8_t base_opcode = mlx5_task->base.op_code;
1017 
1018 	switch (base_opcode) {
1019 	case SPDK_ACCEL_OPC_COPY:
1020 		mlx5_task->mlx5_opcode = ACCEL_MLX5_OPC_COPY;
1021 		break;
1022 	case SPDK_ACCEL_OPC_ENCRYPT:
1023 		assert(g_accel_mlx5.crypto_supported);
1024 		mlx5_task->enc_order = SPDK_MLX5_ENCRYPTION_ORDER_ENCRYPTED_RAW_WIRE;
1025 		mlx5_task->mlx5_opcode =  ACCEL_MLX5_OPC_CRYPTO;
1026 		break;
1027 	case SPDK_ACCEL_OPC_DECRYPT:
1028 		assert(g_accel_mlx5.crypto_supported);
1029 		mlx5_task->enc_order = SPDK_MLX5_ENCRYPTION_ORDER_ENCRYPTED_RAW_MEMORY;
1030 		mlx5_task->mlx5_opcode = ACCEL_MLX5_OPC_CRYPTO;
1031 		break;
1032 	default:
1033 		SPDK_ERRLOG("wrong opcode %d\n", base_opcode);
1034 		mlx5_task->mlx5_opcode = ACCEL_MLX5_OPC_LAST;
1035 	}
1036 }
1037 
1038 static inline void
1039 accel_mlx5_task_reset(struct accel_mlx5_task *mlx5_task)
1040 {
1041 	mlx5_task->num_completed_reqs = 0;
1042 	mlx5_task->num_submitted_reqs = 0;
1043 	mlx5_task->num_ops = 0;
1044 	mlx5_task->num_processed_blocks = 0;
1045 	mlx5_task->raw = 0;
1046 }
1047 
1048 static int
1049 accel_mlx5_submit_tasks(struct spdk_io_channel *_ch, struct spdk_accel_task *task)
1050 {
1051 	struct accel_mlx5_io_channel *ch = spdk_io_channel_get_ctx(_ch);
1052 	struct accel_mlx5_task *mlx5_task = SPDK_CONTAINEROF(task, struct accel_mlx5_task, base);
1053 	struct accel_mlx5_dev *dev;
1054 	int rc;
1055 
1056 	/* We should not receive any tasks if the module was not enabled */
1057 	assert(g_accel_mlx5.enabled);
1058 
1059 	dev = &ch->devs[ch->dev_idx];
1060 	ch->dev_idx++;
1061 	if (ch->dev_idx == ch->num_devs) {
1062 		ch->dev_idx = 0;
1063 	}
1064 
1065 	mlx5_task->qp = &dev->qp;
1066 	accel_mlx5_task_reset(mlx5_task);
1067 	accel_mlx5_task_init_opcode(mlx5_task);
1068 
1069 	rc = g_accel_mlx5_tasks_ops[mlx5_task->mlx5_opcode].init(mlx5_task);
1070 	if (spdk_unlikely(rc)) {
1071 		if (rc == -ENOMEM) {
1072 			SPDK_DEBUGLOG(accel_mlx5, "no reqs to handle new task %p (required %u), put to queue\n", mlx5_task,
1073 				      mlx5_task->num_reqs);
1074 			STAILQ_INSERT_TAIL(&dev->nomem, mlx5_task, link);
1075 			return 0;
1076 		}
1077 		SPDK_ERRLOG("Task opc %d init failed, rc %d\n", task->op_code, rc);
1078 		return rc;
1079 	}
1080 
1081 	if (spdk_unlikely(mlx5_task->qp->recovering)) {
1082 		STAILQ_INSERT_TAIL(&dev->nomem, mlx5_task, link);
1083 		return 0;
1084 	}
1085 
1086 	return g_accel_mlx5_tasks_ops[mlx5_task->mlx5_opcode].process(mlx5_task);
1087 }
1088 
1089 static void accel_mlx5_recover_qp(struct accel_mlx5_qp *qp);
1090 
1091 static int
1092 accel_mlx5_recover_qp_poller(void *arg)
1093 {
1094 	struct accel_mlx5_qp *qp = arg;
1095 
1096 	spdk_poller_unregister(&qp->recover_poller);
1097 	accel_mlx5_recover_qp(qp);
1098 	return SPDK_POLLER_BUSY;
1099 }
1100 
1101 static void
1102 accel_mlx5_recover_qp(struct accel_mlx5_qp *qp)
1103 {
1104 	struct accel_mlx5_dev *dev = qp->dev;
1105 	struct spdk_mlx5_qp_attr mlx5_qp_attr = {};
1106 	int rc;
1107 
1108 	SPDK_NOTICELOG("Recovering qp %p, core %u\n", qp, spdk_env_get_current_core());
1109 	if (qp->qp) {
1110 		spdk_mlx5_qp_destroy(qp->qp);
1111 		qp->qp = NULL;
1112 	}
1113 
1114 	mlx5_qp_attr.cap.max_send_wr = g_accel_mlx5.attr.qp_size;
1115 	mlx5_qp_attr.cap.max_recv_wr = 0;
1116 	mlx5_qp_attr.cap.max_send_sge = ACCEL_MLX5_MAX_SGE;
1117 	mlx5_qp_attr.cap.max_inline_data = sizeof(struct ibv_sge) * ACCEL_MLX5_MAX_SGE;
1118 
1119 	rc = spdk_mlx5_qp_create(dev->dev_ctx->pd, dev->cq, &mlx5_qp_attr, &qp->qp);
1120 	if (rc) {
1121 		SPDK_ERRLOG("Failed to create mlx5 dma QP, rc %d. Retry in %d usec\n",
1122 			    rc, ACCEL_MLX5_RECOVER_POLLER_PERIOD_US);
1123 		qp->recover_poller = SPDK_POLLER_REGISTER(accel_mlx5_recover_qp_poller, qp,
1124 				     ACCEL_MLX5_RECOVER_POLLER_PERIOD_US);
1125 		return;
1126 	}
1127 
1128 	qp->recovering = false;
1129 }
1130 
1131 static inline void
1132 accel_mlx5_process_error_cpl(struct spdk_mlx5_cq_completion *wc, struct accel_mlx5_task *task)
1133 {
1134 	struct accel_mlx5_qp *qp = task->qp;
1135 
1136 	if (wc->status != IBV_WC_WR_FLUSH_ERR) {
1137 		SPDK_WARNLOG("RDMA: qp %p, task %p, WC status %d, core %u\n",
1138 			     qp, task, wc->status, spdk_env_get_current_core());
1139 	} else {
1140 		SPDK_DEBUGLOG(accel_mlx5,
1141 			      "RDMA: qp %p, task %p, WC status %d, core %u\n",
1142 			      qp, task, wc->status, spdk_env_get_current_core());
1143 	}
1144 
1145 	qp->recovering = true;
1146 	assert(task->num_completed_reqs <= task->num_submitted_reqs);
1147 	if (task->num_completed_reqs == task->num_submitted_reqs) {
1148 		STAILQ_REMOVE_HEAD(&qp->in_hw, link);
1149 		accel_mlx5_task_fail(task, -EIO);
1150 	}
1151 }
1152 
1153 static inline int64_t
1154 accel_mlx5_poll_cq(struct accel_mlx5_dev *dev)
1155 {
1156 	struct spdk_mlx5_cq_completion wc[ACCEL_MLX5_MAX_WC];
1157 	struct accel_mlx5_task *task;
1158 	struct accel_mlx5_qp *qp;
1159 	int reaped, i, rc;
1160 	uint16_t completed;
1161 
1162 	reaped = spdk_mlx5_cq_poll_completions(dev->cq, wc, ACCEL_MLX5_MAX_WC);
1163 	if (spdk_unlikely(reaped < 0)) {
1164 		SPDK_ERRLOG("Error polling CQ! (%d): %s\n", errno, spdk_strerror(errno));
1165 		return reaped;
1166 	} else if (reaped == 0) {
1167 		return 0;
1168 	}
1169 
1170 	SPDK_DEBUGLOG(accel_mlx5, "Reaped %d cpls on dev %s\n", reaped,
1171 		      dev->dev_ctx->context->device->name);
1172 
1173 	for (i = 0; i < reaped; i++) {
1174 		if (spdk_unlikely(!wc[i].wr_id)) {
1175 			/* Unsignaled completion with error, ignore */
1176 			continue;
1177 		}
1178 		task = (struct accel_mlx5_task *)wc[i].wr_id;
1179 		qp = task->qp;
1180 		assert(task == STAILQ_FIRST(&qp->in_hw) && "submission mismatch");
1181 		assert(task->num_submitted_reqs > task->num_completed_reqs);
1182 		completed = task->num_submitted_reqs - task->num_completed_reqs;
1183 		assert((uint32_t)task->num_completed_reqs + completed <= UINT16_MAX);
1184 		task->num_completed_reqs += completed;
1185 		assert(qp->wrs_submitted >= task->num_wrs);
1186 		qp->wrs_submitted -= task->num_wrs;
1187 		assert(dev->wrs_in_cq > 0);
1188 		dev->wrs_in_cq--;
1189 
1190 		if (wc[i].status) {
1191 			accel_mlx5_process_error_cpl(&wc[i], task);
1192 			if (qp->wrs_submitted == 0) {
1193 				assert(STAILQ_EMPTY(&qp->in_hw));
1194 				accel_mlx5_recover_qp(qp);
1195 			}
1196 			continue;
1197 		}
1198 
1199 		SPDK_DEBUGLOG(accel_mlx5, "task %p, remaining %u\n", task,
1200 			      task->num_reqs - task->num_completed_reqs);
1201 		if (task->num_completed_reqs == task->num_reqs) {
1202 			STAILQ_REMOVE_HEAD(&qp->in_hw, link);
1203 			accel_mlx5_task_complete(task);
1204 		} else {
1205 			assert(task->num_submitted_reqs < task->num_reqs);
1206 			assert(task->num_completed_reqs == task->num_submitted_reqs);
1207 			STAILQ_REMOVE_HEAD(&qp->in_hw, link);
1208 			rc = accel_mlx5_task_continue(task);
1209 			if (spdk_unlikely(rc)) {
1210 				if (rc != -ENOMEM) {
1211 					accel_mlx5_task_fail(task, rc);
1212 				}
1213 			}
1214 		}
1215 	}
1216 
1217 	return reaped;
1218 }
1219 
1220 static inline void
1221 accel_mlx5_resubmit_nomem_tasks(struct accel_mlx5_dev *dev)
1222 {
1223 	struct accel_mlx5_task *task, *tmp, *last;
1224 	int rc;
1225 
1226 	last = STAILQ_LAST(&dev->nomem, accel_mlx5_task, link);
1227 	STAILQ_FOREACH_SAFE(task, &dev->nomem, link, tmp) {
1228 		STAILQ_REMOVE_HEAD(&dev->nomem, link);
1229 		rc = accel_mlx5_task_continue(task);
1230 		if (spdk_unlikely(rc)) {
1231 			if (rc != -ENOMEM) {
1232 				accel_mlx5_task_fail(task, rc);
1233 			}
1234 			break;
1235 		}
1236 		/* If qpair is recovering, task is added back to the nomem list and 0 is returned. In that case we
1237 		 * need a special condition to iterate the list once and stop this FOREACH loop */
1238 		if (task == last) {
1239 			break;
1240 		}
1241 	}
1242 }
1243 
1244 static int
1245 accel_mlx5_poller(void *ctx)
1246 {
1247 	struct accel_mlx5_io_channel *ch = ctx;
1248 	struct accel_mlx5_dev *dev;
1249 
1250 	int64_t completions = 0, rc;
1251 	uint32_t i;
1252 
1253 	for (i = 0; i < ch->num_devs; i++) {
1254 		dev = &ch->devs[i];
1255 		if (dev->wrs_in_cq) {
1256 			rc = accel_mlx5_poll_cq(dev);
1257 			if (spdk_unlikely(rc < 0)) {
1258 				SPDK_ERRLOG("Error %"PRId64" on CQ, dev %s\n", rc, dev->dev_ctx->context->device->name);
1259 			}
1260 			completions += rc;
1261 			if (dev->qp.wrs_submitted) {
1262 				spdk_mlx5_qp_complete_send(dev->qp.qp);
1263 			}
1264 		}
1265 		if (!STAILQ_EMPTY(&dev->nomem)) {
1266 			accel_mlx5_resubmit_nomem_tasks(dev);
1267 		}
1268 	}
1269 
1270 	return !!completions;
1271 }
1272 
1273 static bool
1274 accel_mlx5_supports_opcode(enum spdk_accel_opcode opc)
1275 {
1276 	assert(g_accel_mlx5.enabled);
1277 
1278 	switch (opc) {
1279 	case SPDK_ACCEL_OPC_COPY:
1280 		return true;
1281 	case SPDK_ACCEL_OPC_ENCRYPT:
1282 	case SPDK_ACCEL_OPC_DECRYPT:
1283 		return g_accel_mlx5.crypto_supported;
1284 	default:
1285 		return false;
1286 	}
1287 }
1288 
1289 static struct spdk_io_channel *
1290 accel_mlx5_get_io_channel(void)
1291 {
1292 	assert(g_accel_mlx5.enabled);
1293 	return spdk_get_io_channel(&g_accel_mlx5);
1294 }
1295 
1296 static int
1297 accel_mlx5_create_qp(struct accel_mlx5_dev *dev, struct accel_mlx5_qp *qp)
1298 {
1299 	struct spdk_mlx5_qp_attr mlx5_qp_attr = {};
1300 	int rc;
1301 
1302 	mlx5_qp_attr.cap.max_send_wr = g_accel_mlx5.attr.qp_size;
1303 	mlx5_qp_attr.cap.max_recv_wr = 0;
1304 	mlx5_qp_attr.cap.max_send_sge = ACCEL_MLX5_MAX_SGE;
1305 	mlx5_qp_attr.cap.max_inline_data = sizeof(struct ibv_sge) * ACCEL_MLX5_MAX_SGE;
1306 
1307 	rc = spdk_mlx5_qp_create(dev->dev_ctx->pd, dev->cq, &mlx5_qp_attr, &qp->qp);
1308 	if (rc) {
1309 		return rc;
1310 	}
1311 
1312 	STAILQ_INIT(&qp->in_hw);
1313 	qp->dev = dev;
1314 	qp->verbs_qp = spdk_mlx5_qp_get_verbs_qp(qp->qp);
1315 	assert(qp->verbs_qp);
1316 	qp->wrs_max = g_accel_mlx5.attr.qp_size;
1317 
1318 	return 0;
1319 }
1320 
1321 static void
1322 accel_mlx5_destroy_cb(void *io_device, void *ctx_buf)
1323 {
1324 	struct accel_mlx5_io_channel *ch = ctx_buf;
1325 	struct accel_mlx5_dev *dev;
1326 	uint32_t i;
1327 
1328 	spdk_poller_unregister(&ch->poller);
1329 	for (i = 0; i < ch->num_devs; i++) {
1330 		dev = &ch->devs[i];
1331 		spdk_mlx5_qp_destroy(dev->qp.qp);
1332 		if (dev->cq) {
1333 			spdk_mlx5_cq_destroy(dev->cq);
1334 		}
1335 		spdk_poller_unregister(&dev->qp.recover_poller);
1336 		if (dev->crypto_mkeys) {
1337 			spdk_mlx5_mkey_pool_put_ref(dev->crypto_mkeys);
1338 		}
1339 		spdk_rdma_utils_free_mem_map(&dev->mmap);
1340 	}
1341 	free(ch->devs);
1342 }
1343 
1344 static int
1345 accel_mlx5_create_cb(void *io_device, void *ctx_buf)
1346 {
1347 	struct spdk_mlx5_cq_attr cq_attr = {};
1348 	struct accel_mlx5_io_channel *ch = ctx_buf;
1349 	struct accel_mlx5_dev_ctx *dev_ctx;
1350 	struct accel_mlx5_dev *dev;
1351 	uint32_t i;
1352 	int rc;
1353 
1354 	ch->devs = calloc(g_accel_mlx5.num_ctxs, sizeof(*ch->devs));
1355 	if (!ch->devs) {
1356 		SPDK_ERRLOG("Memory allocation failed\n");
1357 		return -ENOMEM;
1358 	}
1359 
1360 	for (i = 0; i < g_accel_mlx5.num_ctxs; i++) {
1361 		dev_ctx = &g_accel_mlx5.dev_ctxs[i];
1362 		dev = &ch->devs[i];
1363 		dev->dev_ctx = dev_ctx;
1364 
1365 		if (dev_ctx->crypto_mkeys) {
1366 			dev->crypto_mkeys = spdk_mlx5_mkey_pool_get_ref(dev_ctx->pd, SPDK_MLX5_MKEY_POOL_FLAG_CRYPTO);
1367 			if (!dev->crypto_mkeys) {
1368 				SPDK_ERRLOG("Failed to get crypto mkey pool channel, dev %s\n", dev_ctx->context->device->name);
1369 				/* Should not happen since mkey pool is created on accel_mlx5 initialization.
1370 				 * We should not be here if pool creation failed */
1371 				assert(0);
1372 				goto err_out;
1373 			}
1374 		}
1375 
1376 		memset(&cq_attr, 0, sizeof(cq_attr));
1377 		cq_attr.cqe_cnt = g_accel_mlx5.attr.qp_size;
1378 		cq_attr.cqe_size = 64;
1379 		cq_attr.cq_context = dev;
1380 
1381 		ch->num_devs++;
1382 		rc = spdk_mlx5_cq_create(dev_ctx->pd, &cq_attr, &dev->cq);
1383 		if (rc) {
1384 			SPDK_ERRLOG("Failed to create mlx5 CQ, rc %d\n", rc);
1385 			goto err_out;
1386 		}
1387 
1388 		rc = accel_mlx5_create_qp(dev, &dev->qp);
1389 		if (rc) {
1390 			SPDK_ERRLOG("Failed to create mlx5 QP, rc %d\n", rc);
1391 			goto err_out;
1392 		}
1393 
1394 		dev->mmap = spdk_rdma_utils_create_mem_map(dev_ctx->pd, NULL,
1395 				IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_READ | IBV_ACCESS_REMOTE_WRITE);
1396 		if (!dev->mmap) {
1397 			SPDK_ERRLOG("Failed to create memory map\n");
1398 			rc = -ENOMEM;
1399 			goto err_out;
1400 		}
1401 		dev->crypto_multi_block = dev_ctx->crypto_multi_block;
1402 		dev->crypto_split_blocks = dev_ctx->crypto_multi_block ? g_accel_mlx5.attr.crypto_split_blocks : 0;
1403 		dev->wrs_in_cq_max = g_accel_mlx5.attr.qp_size;
1404 		STAILQ_INIT(&dev->nomem);
1405 	}
1406 
1407 	ch->poller = SPDK_POLLER_REGISTER(accel_mlx5_poller, ch, 0);
1408 
1409 	return 0;
1410 
1411 err_out:
1412 	accel_mlx5_destroy_cb(&g_accel_mlx5, ctx_buf);
1413 	return rc;
1414 }
1415 
1416 void
1417 accel_mlx5_get_default_attr(struct accel_mlx5_attr *attr)
1418 {
1419 	assert(attr);
1420 
1421 	attr->qp_size = ACCEL_MLX5_QP_SIZE;
1422 	attr->num_requests = ACCEL_MLX5_NUM_REQUESTS;
1423 	attr->allowed_devs = NULL;
1424 	attr->crypto_split_blocks = 0;
1425 }
1426 
1427 static void
1428 accel_mlx5_allowed_devs_free(void)
1429 {
1430 	size_t i;
1431 
1432 	if (!g_accel_mlx5.allowed_devs) {
1433 		return;
1434 	}
1435 
1436 	for (i = 0; i < g_accel_mlx5.allowed_devs_count; i++) {
1437 		free(g_accel_mlx5.allowed_devs[i]);
1438 	}
1439 	free(g_accel_mlx5.attr.allowed_devs);
1440 	free(g_accel_mlx5.allowed_devs);
1441 	g_accel_mlx5.attr.allowed_devs = NULL;
1442 	g_accel_mlx5.allowed_devs = NULL;
1443 	g_accel_mlx5.allowed_devs_count = 0;
1444 }
1445 
1446 static int
1447 accel_mlx5_allowed_devs_parse(const char *allowed_devs)
1448 {
1449 	char *str, *tmp, *tok;
1450 	size_t devs_count = 0;
1451 
1452 	str = strdup(allowed_devs);
1453 	if (!str) {
1454 		return -ENOMEM;
1455 	}
1456 
1457 	accel_mlx5_allowed_devs_free();
1458 
1459 	tmp = str;
1460 	while ((tmp = strchr(tmp, ',')) != NULL) {
1461 		tmp++;
1462 		devs_count++;
1463 	}
1464 	devs_count++;
1465 
1466 	g_accel_mlx5.allowed_devs = calloc(devs_count, sizeof(char *));
1467 	if (!g_accel_mlx5.allowed_devs) {
1468 		free(str);
1469 		return -ENOMEM;
1470 	}
1471 
1472 	devs_count = 0;
1473 	tok = strtok(str, ",");
1474 	while (tok) {
1475 		g_accel_mlx5.allowed_devs[devs_count] = strdup(tok);
1476 		if (!g_accel_mlx5.allowed_devs[devs_count]) {
1477 			free(str);
1478 			accel_mlx5_allowed_devs_free();
1479 			return -ENOMEM;
1480 		}
1481 		tok = strtok(NULL, ",");
1482 		devs_count++;
1483 		g_accel_mlx5.allowed_devs_count++;
1484 	}
1485 
1486 	free(str);
1487 
1488 	return 0;
1489 }
1490 
1491 int
1492 accel_mlx5_enable(struct accel_mlx5_attr *attr)
1493 {
1494 	int rc;
1495 
1496 	if (g_accel_mlx5.enabled) {
1497 		return -EEXIST;
1498 	}
1499 	if (attr) {
1500 		g_accel_mlx5.attr = *attr;
1501 		g_accel_mlx5.attr.allowed_devs = NULL;
1502 
1503 		if (attr->allowed_devs) {
1504 			/* Contains a copy of user's string */
1505 			g_accel_mlx5.attr.allowed_devs = strndup(attr->allowed_devs, ACCEL_MLX5_ALLOWED_DEVS_MAX_LEN);
1506 			if (!g_accel_mlx5.attr.allowed_devs) {
1507 				return -ENOMEM;
1508 			}
1509 			rc = accel_mlx5_allowed_devs_parse(g_accel_mlx5.attr.allowed_devs);
1510 			if (rc) {
1511 				return rc;
1512 			}
1513 			rc = spdk_mlx5_crypto_devs_allow((const char *const *)g_accel_mlx5.allowed_devs,
1514 							 g_accel_mlx5.allowed_devs_count);
1515 			if (rc) {
1516 				accel_mlx5_allowed_devs_free();
1517 				return rc;
1518 			}
1519 		}
1520 	} else {
1521 		accel_mlx5_get_default_attr(&g_accel_mlx5.attr);
1522 	}
1523 
1524 	g_accel_mlx5.enabled = true;
1525 	spdk_accel_module_list_add(&g_accel_mlx5.module);
1526 
1527 	return 0;
1528 }
1529 
1530 static void
1531 accel_mlx5_free_resources(void)
1532 {
1533 	struct accel_mlx5_dev_ctx *dev_ctx;
1534 	uint32_t i;
1535 
1536 	for (i = 0; i < g_accel_mlx5.num_ctxs; i++) {
1537 		dev_ctx = &g_accel_mlx5.dev_ctxs[i];
1538 		if (dev_ctx->pd) {
1539 			if (dev_ctx->crypto_mkeys) {
1540 				spdk_mlx5_mkey_pool_destroy(SPDK_MLX5_MKEY_POOL_FLAG_CRYPTO, dev_ctx->pd);
1541 			}
1542 			spdk_rdma_utils_put_pd(dev_ctx->pd);
1543 		}
1544 		if (dev_ctx->domain) {
1545 			spdk_rdma_utils_put_memory_domain(dev_ctx->domain);
1546 		}
1547 	}
1548 
1549 	free(g_accel_mlx5.dev_ctxs);
1550 	g_accel_mlx5.dev_ctxs = NULL;
1551 	g_accel_mlx5.initialized = false;
1552 }
1553 
1554 static void
1555 accel_mlx5_deinit_cb(void *ctx)
1556 {
1557 	accel_mlx5_free_resources();
1558 	spdk_accel_module_finish();
1559 }
1560 
1561 static void
1562 accel_mlx5_deinit(void *ctx)
1563 {
1564 	if (g_accel_mlx5.allowed_devs) {
1565 		accel_mlx5_allowed_devs_free();
1566 	}
1567 	spdk_mlx5_crypto_devs_allow(NULL, 0);
1568 	if (g_accel_mlx5.initialized) {
1569 		spdk_io_device_unregister(&g_accel_mlx5, accel_mlx5_deinit_cb);
1570 	} else {
1571 		spdk_accel_module_finish();
1572 	}
1573 }
1574 
1575 static int
1576 accel_mlx5_mkeys_create(struct ibv_pd *pd, uint32_t num_mkeys, uint32_t flags)
1577 {
1578 	struct spdk_mlx5_mkey_pool_param pool_param = {};
1579 
1580 	pool_param.mkey_count = num_mkeys;
1581 	pool_param.cache_per_thread = num_mkeys * 3 / 4 / spdk_env_get_core_count();
1582 	pool_param.flags = flags;
1583 
1584 	return spdk_mlx5_mkey_pool_init(&pool_param, pd);
1585 }
1586 
1587 static int
1588 accel_mlx5_dev_ctx_init(struct accel_mlx5_dev_ctx *dev_ctx, struct ibv_context *dev,
1589 			struct spdk_mlx5_device_caps *caps)
1590 {
1591 	struct ibv_pd *pd;
1592 	int rc;
1593 
1594 	pd = spdk_rdma_utils_get_pd(dev);
1595 	if (!pd) {
1596 		SPDK_ERRLOG("Failed to get PD for context %p, dev %s\n", dev, dev->device->name);
1597 		return -EINVAL;
1598 	}
1599 	dev_ctx->context = dev;
1600 	dev_ctx->pd = pd;
1601 	dev_ctx->domain = spdk_rdma_utils_get_memory_domain(pd);
1602 	if (!dev_ctx->domain) {
1603 		return -ENOMEM;
1604 	}
1605 
1606 	if (g_accel_mlx5.crypto_supported) {
1607 		dev_ctx->crypto_multi_block = caps->crypto.multi_block_be_tweak;
1608 		if (!dev_ctx->crypto_multi_block && g_accel_mlx5.attr.crypto_split_blocks) {
1609 			SPDK_WARNLOG("\"crypto_split_blocks\" is set but dev %s doesn't support multi block crypto\n",
1610 				     dev->device->name);
1611 		}
1612 		rc = accel_mlx5_mkeys_create(pd, g_accel_mlx5.attr.num_requests, SPDK_MLX5_MKEY_POOL_FLAG_CRYPTO);
1613 		if (rc) {
1614 			SPDK_ERRLOG("Failed to create crypto mkeys pool, rc %d, dev %s\n", rc, dev->device->name);
1615 			return rc;
1616 		}
1617 		dev_ctx->crypto_mkeys = true;
1618 	}
1619 
1620 	return 0;
1621 }
1622 
1623 static struct ibv_context **
1624 accel_mlx5_get_devices(int *_num_devs)
1625 {
1626 	struct ibv_context **rdma_devs, **rdma_devs_out = NULL, *dev;
1627 	struct ibv_device_attr dev_attr;
1628 	size_t j;
1629 	int num_devs = 0, i, rc;
1630 	int num_devs_out = 0;
1631 	bool dev_allowed;
1632 
1633 	rdma_devs = rdma_get_devices(&num_devs);
1634 	if (!rdma_devs || !num_devs) {
1635 		*_num_devs = 0;
1636 		return NULL;
1637 	}
1638 
1639 	rdma_devs_out = calloc(num_devs + 1, sizeof(struct ibv_context *));
1640 	if (!rdma_devs_out) {
1641 		SPDK_ERRLOG("Memory allocation failed\n");
1642 		rdma_free_devices(rdma_devs);
1643 		*_num_devs = 0;
1644 		return NULL;
1645 	}
1646 
1647 	for (i = 0; i < num_devs; i++) {
1648 		dev = rdma_devs[i];
1649 		rc = ibv_query_device(dev, &dev_attr);
1650 		if (rc) {
1651 			SPDK_ERRLOG("Failed to query dev %s, skipping\n", dev->device->name);
1652 			continue;
1653 		}
1654 		if (dev_attr.vendor_id != SPDK_MLX5_VENDOR_ID_MELLANOX) {
1655 			SPDK_DEBUGLOG(accel_mlx5, "dev %s is not Mellanox device, skipping\n", dev->device->name);
1656 			continue;
1657 		}
1658 
1659 		if (g_accel_mlx5.allowed_devs_count) {
1660 			dev_allowed = false;
1661 			for (j = 0; j < g_accel_mlx5.allowed_devs_count; j++) {
1662 				if (strcmp(g_accel_mlx5.allowed_devs[j], dev->device->name) == 0) {
1663 					dev_allowed = true;
1664 					break;
1665 				}
1666 			}
1667 			if (!dev_allowed) {
1668 				continue;
1669 			}
1670 		}
1671 
1672 		rdma_devs_out[num_devs_out] = dev;
1673 		num_devs_out++;
1674 	}
1675 
1676 	rdma_free_devices(rdma_devs);
1677 	*_num_devs = num_devs_out;
1678 
1679 	return rdma_devs_out;
1680 }
1681 
1682 static inline bool
1683 accel_mlx5_dev_supports_crypto(struct spdk_mlx5_device_caps *caps)
1684 {
1685 	return caps->crypto_supported && !caps->crypto.wrapped_import_method_aes_xts &&
1686 	       (caps->crypto.single_block_le_tweak ||
1687 		caps->crypto.multi_block_le_tweak || caps->crypto.multi_block_be_tweak);
1688 }
1689 
1690 static int
1691 accel_mlx5_init(void)
1692 {
1693 	struct spdk_mlx5_device_caps *caps;
1694 	struct ibv_context **rdma_devs, *dev;
1695 	int num_devs = 0,  rc = 0, i;
1696 	int best_dev = -1, first_dev = 0;
1697 	bool supports_crypto;
1698 	bool find_best_dev = g_accel_mlx5.allowed_devs_count == 0;
1699 
1700 	if (!g_accel_mlx5.enabled) {
1701 		return -EINVAL;
1702 	}
1703 
1704 	rdma_devs = accel_mlx5_get_devices(&num_devs);
1705 	if (!rdma_devs || !num_devs) {
1706 		return -ENODEV;
1707 	}
1708 	caps = calloc(num_devs, sizeof(*caps));
1709 	if (!caps) {
1710 		rc = -ENOMEM;
1711 		goto cleanup;
1712 	}
1713 
1714 	g_accel_mlx5.crypto_supported = true;
1715 	g_accel_mlx5.num_ctxs = 0;
1716 
1717 	/* Iterate devices. We support an offload if all devices support it */
1718 	for (i = 0; i < num_devs; i++) {
1719 		dev = rdma_devs[i];
1720 
1721 		rc = spdk_mlx5_device_query_caps(dev, &caps[i]);
1722 		if (rc) {
1723 			SPDK_ERRLOG("Failed to get crypto caps, dev %s\n", dev->device->name);
1724 			goto cleanup;
1725 		}
1726 		supports_crypto = accel_mlx5_dev_supports_crypto(&caps[i]);
1727 		if (!supports_crypto) {
1728 			SPDK_DEBUGLOG(accel_mlx5, "Disable crypto support because dev %s doesn't support it\n",
1729 				      rdma_devs[i]->device->name);
1730 			g_accel_mlx5.crypto_supported = false;
1731 		}
1732 		if (find_best_dev) {
1733 			if (supports_crypto && best_dev == -1) {
1734 				best_dev = i;
1735 			}
1736 		}
1737 	}
1738 
1739 	/* User didn't specify devices to use, try to select the best one */
1740 	if (find_best_dev) {
1741 		if (best_dev == -1) {
1742 			best_dev = 0;
1743 		}
1744 		supports_crypto = accel_mlx5_dev_supports_crypto(&caps[best_dev]);
1745 		SPDK_NOTICELOG("Select dev %s, crypto %d\n", rdma_devs[best_dev]->device->name, supports_crypto);
1746 		g_accel_mlx5.crypto_supported = supports_crypto;
1747 		first_dev = best_dev;
1748 		num_devs = 1;
1749 		if (supports_crypto) {
1750 			const char *const dev_name[] = { rdma_devs[best_dev]->device->name };
1751 			/* Let mlx5 library know which device to use */
1752 			spdk_mlx5_crypto_devs_allow(dev_name, 1);
1753 		}
1754 	} else {
1755 		SPDK_NOTICELOG("Found %d devices, crypto %d\n", num_devs, g_accel_mlx5.crypto_supported);
1756 	}
1757 
1758 	g_accel_mlx5.dev_ctxs = calloc(num_devs, sizeof(*g_accel_mlx5.dev_ctxs));
1759 	if (!g_accel_mlx5.dev_ctxs) {
1760 		SPDK_ERRLOG("Memory allocation failed\n");
1761 		rc = -ENOMEM;
1762 		goto cleanup;
1763 	}
1764 
1765 	for (i = first_dev; i < first_dev + num_devs; i++) {
1766 		rc = accel_mlx5_dev_ctx_init(&g_accel_mlx5.dev_ctxs[g_accel_mlx5.num_ctxs++],
1767 					     rdma_devs[i], &caps[i]);
1768 		if (rc) {
1769 			goto cleanup;
1770 		}
1771 	}
1772 
1773 	SPDK_NOTICELOG("Accel framework mlx5 initialized, found %d devices.\n", num_devs);
1774 	spdk_io_device_register(&g_accel_mlx5, accel_mlx5_create_cb, accel_mlx5_destroy_cb,
1775 				sizeof(struct accel_mlx5_io_channel), "accel_mlx5");
1776 	g_accel_mlx5.initialized = true;
1777 	free(rdma_devs);
1778 	free(caps);
1779 
1780 	return 0;
1781 
1782 cleanup:
1783 	free(rdma_devs);
1784 	free(caps);
1785 	accel_mlx5_free_resources();
1786 
1787 	return rc;
1788 }
1789 
1790 static void
1791 accel_mlx5_write_config_json(struct spdk_json_write_ctx *w)
1792 {
1793 	if (g_accel_mlx5.enabled) {
1794 		spdk_json_write_object_begin(w);
1795 		spdk_json_write_named_string(w, "method", "mlx5_scan_accel_module");
1796 		spdk_json_write_named_object_begin(w, "params");
1797 		spdk_json_write_named_uint16(w, "qp_size", g_accel_mlx5.attr.qp_size);
1798 		spdk_json_write_named_uint32(w, "num_requests", g_accel_mlx5.attr.num_requests);
1799 		if (g_accel_mlx5.attr.allowed_devs) {
1800 			spdk_json_write_named_string(w, "allowed_devs", g_accel_mlx5.attr.allowed_devs);
1801 		}
1802 		spdk_json_write_named_uint16(w, "crypto_split_blocks", g_accel_mlx5.attr.crypto_split_blocks);
1803 		spdk_json_write_object_end(w);
1804 		spdk_json_write_object_end(w);
1805 	}
1806 }
1807 
1808 static size_t
1809 accel_mlx5_get_ctx_size(void)
1810 {
1811 	return sizeof(struct accel_mlx5_task);
1812 }
1813 
1814 static int
1815 accel_mlx5_crypto_key_init(struct spdk_accel_crypto_key *key)
1816 {
1817 	struct spdk_mlx5_crypto_dek_create_attr attr = {};
1818 	struct spdk_mlx5_crypto_keytag *keytag;
1819 	int rc;
1820 
1821 	if (!key || !key->key || !key->key2 || !key->key_size || !key->key2_size) {
1822 		return -EINVAL;
1823 	}
1824 
1825 	attr.dek = calloc(1, key->key_size + key->key2_size);
1826 	if (!attr.dek) {
1827 		return -ENOMEM;
1828 	}
1829 
1830 	memcpy(attr.dek, key->key, key->key_size);
1831 	memcpy(attr.dek + key->key_size, key->key2, key->key2_size);
1832 	attr.dek_len = key->key_size + key->key2_size;
1833 
1834 	rc = spdk_mlx5_crypto_keytag_create(&attr, &keytag);
1835 	spdk_memset_s(attr.dek, attr.dek_len, 0, attr.dek_len);
1836 	free(attr.dek);
1837 	if (rc) {
1838 		SPDK_ERRLOG("Failed to create a keytag, rc %d\n", rc);
1839 		return rc;
1840 	}
1841 
1842 	key->priv = keytag;
1843 
1844 	return 0;
1845 }
1846 
1847 static void
1848 accel_mlx5_crypto_key_deinit(struct spdk_accel_crypto_key *key)
1849 {
1850 	if (!key || key->module_if != &g_accel_mlx5.module || !key->priv) {
1851 		return;
1852 	}
1853 
1854 	spdk_mlx5_crypto_keytag_destroy(key->priv);
1855 }
1856 
1857 static bool
1858 accel_mlx5_crypto_supports_cipher(enum spdk_accel_cipher cipher, size_t key_size)
1859 {
1860 	switch (cipher) {
1861 	case SPDK_ACCEL_CIPHER_AES_XTS:
1862 		return key_size == SPDK_ACCEL_AES_XTS_128_KEY_SIZE || key_size == SPDK_ACCEL_AES_XTS_256_KEY_SIZE;
1863 	default:
1864 		return false;
1865 	}
1866 }
1867 
1868 static int
1869 accel_mlx5_get_memory_domains(struct spdk_memory_domain **domains, int array_size)
1870 {
1871 	int i, size;
1872 
1873 	if (!domains || !array_size) {
1874 		return (int)g_accel_mlx5.num_ctxs;
1875 	}
1876 
1877 	size = spdk_min(array_size, (int)g_accel_mlx5.num_ctxs);
1878 
1879 	for (i = 0; i < size; i++) {
1880 		domains[i] = g_accel_mlx5.dev_ctxs[i].domain;
1881 	}
1882 
1883 	return (int)g_accel_mlx5.num_ctxs;
1884 }
1885 
1886 static struct accel_mlx5_module g_accel_mlx5 = {
1887 	.module = {
1888 		.module_init		= accel_mlx5_init,
1889 		.module_fini		= accel_mlx5_deinit,
1890 		.write_config_json	= accel_mlx5_write_config_json,
1891 		.get_ctx_size		= accel_mlx5_get_ctx_size,
1892 		.name			= "mlx5",
1893 		.supports_opcode	= accel_mlx5_supports_opcode,
1894 		.get_io_channel		= accel_mlx5_get_io_channel,
1895 		.submit_tasks		= accel_mlx5_submit_tasks,
1896 		.crypto_key_init	= accel_mlx5_crypto_key_init,
1897 		.crypto_key_deinit	= accel_mlx5_crypto_key_deinit,
1898 		.crypto_supports_cipher	= accel_mlx5_crypto_supports_cipher,
1899 		.get_memory_domains	= accel_mlx5_get_memory_domains,
1900 	}
1901 };
1902 
1903 SPDK_LOG_REGISTER_COMPONENT(accel_mlx5)
1904