xref: /spdk/module/accel/mlx5/accel_mlx5.c (revision b37db06935181fd0e8f5592a96d860040abaa201)
1 /*   SPDX-License-Identifier: BSD-3-Clause
2  *   Copyright (c) 2022-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3  */
4 
5 #include "spdk/env.h"
6 #include "spdk/thread.h"
7 #include "spdk/queue.h"
8 #include "spdk/log.h"
9 #include "spdk/string.h"
10 #include "spdk/likely.h"
11 #include "spdk/dma.h"
12 #include "spdk/json.h"
13 #include "spdk/util.h"
14 
15 #include "spdk_internal/mlx5.h"
16 #include "spdk_internal/rdma_utils.h"
17 #include "spdk/accel_module.h"
18 #include "spdk_internal/assert.h"
19 #include "spdk_internal/sgl.h"
20 #include "accel_mlx5.h"
21 
22 #include <infiniband/mlx5dv.h>
23 #include <rdma/rdma_cma.h>
24 
25 #define ACCEL_MLX5_QP_SIZE (256u)
26 #define ACCEL_MLX5_NUM_REQUESTS (2048u - 1)
27 #define ACCEL_MLX5_RECOVER_POLLER_PERIOD_US (10000)
28 #define ACCEL_MLX5_MAX_SGE (16u)
29 #define ACCEL_MLX5_MAX_WC (64u)
30 #define ACCEL_MLX5_MAX_MKEYS_IN_TASK (16u)
31 
32 /* Assume we have up to 16 devices */
33 #define ACCEL_MLX5_ALLOWED_DEVS_MAX_LEN ((SPDK_MLX5_DEV_MAX_NAME_LEN + 1) * 16)
34 
35 #define ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED(qp, task)	\
36 do {							\
37 	assert((qp)->wrs_submitted < (qp)->wrs_max);	\
38 	(qp)->wrs_submitted++;				\
39 	assert((task)->num_wrs < UINT16_MAX);		\
40 	(task)->num_wrs++;				\
41 } while (0)
42 
43 #define ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED_SIGNALED(dev, qp, task)	\
44 do {									\
45 	assert((dev)->wrs_in_cq < (dev)->wrs_in_cq_max);		\
46 	(dev)->wrs_in_cq++;						\
47         assert((qp)->wrs_submitted < (qp)->wrs_max);			\
48 	(qp)->wrs_submitted++;						\
49 	assert((task)->num_wrs < UINT16_MAX);				\
50 	(task)->num_wrs++;						\
51 } while (0)
52 
53 struct accel_mlx5_io_channel;
54 struct accel_mlx5_task;
55 
56 struct accel_mlx5_dev_ctx {
57 	struct ibv_context *context;
58 	struct ibv_pd *pd;
59 	struct spdk_memory_domain *domain;
60 	struct spdk_mempool *psv_pool;
61 	TAILQ_ENTRY(accel_mlx5_dev_ctx) link;
62 	struct spdk_mlx5_psv **psvs;
63 	bool crypto_mkeys;
64 	bool sig_mkeys;
65 	bool crypto_multi_block;
66 };
67 
68 enum accel_mlx5_opcode {
69 	ACCEL_MLX5_OPC_COPY,
70 	ACCEL_MLX5_OPC_CRYPTO,
71 	ACCEL_MLX5_OPC_CRC32C,
72 	ACCEL_MLX5_OPC_CRYPTO_MKEY,
73 	ACCEL_MLX5_OPC_LAST
74 };
75 
76 SPDK_STATIC_ASSERT(ACCEL_MLX5_OPC_LAST <= 0xf,
77 		   "accel opcode exceeds 4 bits, update accel_mlx5 struct");
78 
79 struct accel_mlx5_stats {
80 	uint64_t crypto_umrs;
81 	uint64_t sig_umrs;
82 	uint64_t rdma_reads;
83 	uint64_t rdma_writes;
84 	uint64_t polls;
85 	uint64_t idle_polls;
86 	uint64_t completions;
87 	uint64_t nomem_qdepth;
88 	uint64_t nomem_mkey;
89 	uint64_t opcodes[ACCEL_MLX5_OPC_LAST];
90 };
91 
92 struct accel_mlx5_module {
93 	struct spdk_accel_module_if module;
94 	struct accel_mlx5_stats stats;
95 	struct spdk_spinlock lock;
96 	struct accel_mlx5_dev_ctx *dev_ctxs;
97 	uint32_t num_ctxs;
98 	struct accel_mlx5_attr attr;
99 	char **allowed_devs;
100 	size_t allowed_devs_count;
101 	bool initialized;
102 	bool enabled;
103 	bool crypto_supported;
104 	bool crc32c_supported;
105 };
106 
107 struct accel_mlx5_sge {
108 	uint32_t src_sge_count;
109 	uint32_t dst_sge_count;
110 	struct ibv_sge src_sge[ACCEL_MLX5_MAX_SGE];
111 	struct ibv_sge dst_sge[ACCEL_MLX5_MAX_SGE];
112 };
113 
114 struct accel_mlx5_iov_sgl {
115 	struct iovec	*iov;
116 	uint32_t	iovcnt;
117 	uint32_t	iov_offset;
118 };
119 
120 struct accel_mlx5_psv_wrapper {
121 	uint32_t psv_index;
122 	struct {
123 		uint32_t error : 1;
124 		uint32_t reserved : 31;
125 	} bits;
126 	/* mlx5 engine requires DMAable memory, use this member to copy user's crc value since we don't know which
127 	 * memory it is in */
128 	uint32_t crc;
129 	uint32_t crc_lkey;
130 };
131 
132 struct accel_mlx5_task {
133 	struct spdk_accel_task base;
134 	struct accel_mlx5_iov_sgl src;
135 	struct accel_mlx5_iov_sgl dst;
136 	struct accel_mlx5_qp *qp;
137 	STAILQ_ENTRY(accel_mlx5_task) link;
138 	uint16_t num_reqs;
139 	uint16_t num_completed_reqs;
140 	uint16_t num_submitted_reqs;
141 	uint16_t num_ops; /* number of allocated mkeys or number of operations */
142 	uint16_t num_wrs; /* Number of outstanding operations which consume qp slot */
143 	union {
144 		struct {
145 			uint16_t blocks_per_req;
146 			uint16_t num_processed_blocks;
147 			uint16_t num_blocks;
148 		};
149 		struct {
150 			struct accel_mlx5_psv_wrapper *psv;
151 			uint32_t last_umr_len;
152 			uint8_t last_mkey_idx;
153 		};
154 	};
155 	union {
156 		uint16_t raw;
157 		struct {
158 			uint16_t inplace : 1;
159 			uint16_t driver_seq : 1;
160 			uint16_t needs_data_transfer : 1;
161 			uint16_t enc_order : 2;
162 			uint16_t mlx5_opcode: 4;
163 		};
164 	};
165 	/* Keep this array last since not all elements might be accessed, this reduces amount of data to be
166 	 * cached */
167 	struct spdk_mlx5_mkey_pool_obj *mkeys[ACCEL_MLX5_MAX_MKEYS_IN_TASK];
168 };
169 
170 SPDK_STATIC_ASSERT(ACCEL_MLX5_MAX_MKEYS_IN_TASK <= UINT8_MAX, "uint8_t is used to iterate mkeys");
171 
172 struct accel_mlx5_qp {
173 	struct spdk_mlx5_qp *qp;
174 	struct ibv_qp *verbs_qp;
175 	struct accel_mlx5_dev *dev;
176 	/* tasks submitted to HW. We can't complete a task even in error case until we reap completions for all
177 	 * submitted requests */
178 	STAILQ_HEAD(, accel_mlx5_task) in_hw;
179 	uint16_t wrs_submitted;
180 	uint16_t wrs_max;
181 	bool recovering;
182 	struct spdk_poller *recover_poller;
183 };
184 
185 struct accel_mlx5_dev {
186 	struct accel_mlx5_qp qp;
187 	struct spdk_mlx5_cq *cq;
188 	struct spdk_mlx5_mkey_pool *crypto_mkeys;
189 	struct spdk_mlx5_mkey_pool *sig_mkeys;
190 	struct spdk_rdma_utils_mem_map *mmap;
191 	struct accel_mlx5_dev_ctx *dev_ctx;
192 	struct spdk_io_channel *ch;
193 	uint16_t wrs_in_cq;
194 	uint16_t wrs_in_cq_max;
195 	uint16_t crypto_split_blocks;
196 	bool crypto_multi_block;
197 	/* Pending tasks waiting for requests resources */
198 	STAILQ_HEAD(, accel_mlx5_task) nomem;
199 	TAILQ_ENTRY(accel_mlx5_dev) link;
200 	struct accel_mlx5_stats stats;
201 };
202 
203 struct accel_mlx5_io_channel {
204 	struct accel_mlx5_dev *devs;
205 	struct spdk_poller *poller;
206 	uint32_t num_devs;
207 	/* Index in \b devs to be used for operations in round-robin way */
208 	uint32_t dev_idx;
209 };
210 
211 struct accel_mlx5_task_operations {
212 	int (*init)(struct accel_mlx5_task *task);
213 	int (*process)(struct accel_mlx5_task *task);
214 	int (*cont)(struct accel_mlx5_task *task);
215 	void (*complete)(struct accel_mlx5_task *task);
216 };
217 
218 struct accel_mlx5_psv_pool_iter_cb_args {
219 	struct accel_mlx5_dev_ctx *dev;
220 	struct spdk_rdma_utils_mem_map *map;
221 	int rc;
222 };
223 
224 struct accel_mlx5_dump_stats_ctx {
225 	struct accel_mlx5_stats total;
226 	struct spdk_json_write_ctx *w;
227 	enum accel_mlx5_dump_state_level level;
228 	accel_mlx5_dump_stat_done_cb cb;
229 	void *ctx;
230 };
231 
232 static struct accel_mlx5_module g_accel_mlx5;
233 static struct spdk_accel_driver g_accel_mlx5_driver;
234 
235 static inline int accel_mlx5_execute_sequence(struct spdk_io_channel *ch,
236 		struct spdk_accel_sequence *seq);
237 static inline void accel_mlx5_task_complete(struct accel_mlx5_task *mlx5_task);
238 
239 static inline void
240 accel_mlx5_iov_sgl_init(struct accel_mlx5_iov_sgl *s, struct iovec *iov, uint32_t iovcnt)
241 {
242 	s->iov = iov;
243 	s->iovcnt = iovcnt;
244 	s->iov_offset = 0;
245 }
246 
247 static inline void
248 accel_mlx5_iov_sgl_advance(struct accel_mlx5_iov_sgl *s, uint32_t step)
249 {
250 	s->iov_offset += step;
251 	while (s->iovcnt > 0) {
252 		assert(s->iov != NULL);
253 		if (s->iov_offset < s->iov->iov_len) {
254 			break;
255 		}
256 
257 		s->iov_offset -= s->iov->iov_len;
258 		s->iov++;
259 		s->iovcnt--;
260 	}
261 }
262 
263 static inline void
264 accel_mlx5_iov_sgl_unwind(struct accel_mlx5_iov_sgl *s, uint32_t max_iovs, uint32_t step)
265 {
266 	SPDK_DEBUGLOG(accel_mlx5, "iov %p, iovcnt %u, max %u, offset %u, step %u\n", s->iov, s->iovcnt,
267 		      max_iovs, s->iov_offset, step);
268 	while (s->iovcnt <= max_iovs) {
269 		assert(s->iov != NULL);
270 		if (s->iov_offset >= step) {
271 			s->iov_offset -= step;
272 			SPDK_DEBUGLOG(accel_mlx5, "\tEND, iov %p, iovcnt %u, offset %u\n", s->iov, s->iovcnt,
273 				      s->iov_offset);
274 			return;
275 		}
276 		step -= s->iov_offset;
277 		s->iov--;
278 		s->iovcnt++;
279 		s->iov_offset = s->iov->iov_len;
280 		SPDK_DEBUGLOG(accel_mlx5, "\tiov %p, iovcnt %u, offset %u, step %u\n", s->iov, s->iovcnt,
281 			      s->iov_offset, step);
282 	}
283 
284 	SPDK_ERRLOG("Can't unwind iovs, remaining  %u\n", step);
285 	assert(0);
286 }
287 
288 static inline int
289 accel_mlx5_sge_unwind(struct ibv_sge *sge, uint32_t sge_count, uint32_t step)
290 {
291 	int i;
292 
293 	assert(sge_count > 0);
294 	SPDK_DEBUGLOG(accel_mlx5, "sge %p, count %u, step %u\n", sge, sge_count, step);
295 	for (i = (int)sge_count - 1; i >= 0; i--) {
296 		if (sge[i].length > step) {
297 			sge[i].length -= step;
298 			SPDK_DEBUGLOG(accel_mlx5, "\tsge[%u] len %u, step %u\n", i, sge[i].length, step);
299 			return (int)i + 1;
300 		}
301 		SPDK_DEBUGLOG(accel_mlx5, "\tsge[%u] len %u, step %u\n", i, sge[i].length, step);
302 		step -= sge[i].length;
303 	}
304 
305 	SPDK_ERRLOG("Can't unwind sge, remaining  %u\n", step);
306 	assert(step == 0);
307 
308 	return 0;
309 }
310 
311 static inline void
312 accel_mlx5_crypto_task_complete(struct accel_mlx5_task *task)
313 {
314 	struct accel_mlx5_dev *dev = task->qp->dev;
315 
316 	assert(task->num_ops);
317 	spdk_mlx5_mkey_pool_put_bulk(dev->crypto_mkeys, task->mkeys, task->num_ops);
318 	spdk_accel_task_complete(&task->base, 0);
319 }
320 
321 static inline void
322 accel_mlx5_task_fail(struct accel_mlx5_task *task, int rc)
323 {
324 	struct accel_mlx5_dev *dev = task->qp->dev;
325 	struct spdk_accel_task *next;
326 	struct spdk_accel_sequence *seq;
327 	bool driver_seq;
328 
329 	assert(task->num_reqs == task->num_completed_reqs);
330 	SPDK_DEBUGLOG(accel_mlx5, "Fail task %p, opc %d, rc %d\n", task, task->base.op_code, rc);
331 
332 	if (task->num_ops) {
333 		if (task->mlx5_opcode == ACCEL_MLX5_OPC_CRYPTO || task->mlx5_opcode == ACCEL_MLX5_OPC_CRYPTO_MKEY) {
334 			spdk_mlx5_mkey_pool_put_bulk(dev->crypto_mkeys, task->mkeys, task->num_ops);
335 		}
336 		if (task->mlx5_opcode == ACCEL_MLX5_OPC_CRC32C) {
337 			spdk_mlx5_mkey_pool_put_bulk(dev->sig_mkeys, task->mkeys, task->num_ops);
338 			spdk_mempool_put(dev->dev_ctx->psv_pool, task->psv);
339 		}
340 	}
341 	next = spdk_accel_sequence_next_task(&task->base);
342 	seq = task->base.seq;
343 	driver_seq = task->driver_seq;
344 
345 	assert(task->num_reqs == task->num_completed_reqs);
346 	SPDK_DEBUGLOG(accel_mlx5, "Fail task %p, opc %d, rc %d\n", task, task->mlx5_opcode, rc);
347 	spdk_accel_task_complete(&task->base, rc);
348 
349 	if (driver_seq) {
350 		struct spdk_io_channel *ch = task->qp->dev->ch;
351 
352 		assert(seq);
353 		if (next) {
354 			accel_mlx5_execute_sequence(ch, seq);
355 		} else {
356 			spdk_accel_sequence_continue(seq);
357 		}
358 	}
359 }
360 
361 static int
362 accel_mlx5_translate_addr(void *addr, size_t size, struct spdk_memory_domain *domain,
363 			  void *domain_ctx, struct accel_mlx5_dev *dev, struct ibv_sge *sge)
364 {
365 	struct spdk_rdma_utils_memory_translation map_translation;
366 	struct spdk_memory_domain_translation_result domain_translation;
367 	struct spdk_memory_domain_translation_ctx local_ctx;
368 	int rc;
369 
370 	if (domain) {
371 		domain_translation.size = sizeof(struct spdk_memory_domain_translation_result);
372 		local_ctx.size = sizeof(local_ctx);
373 		local_ctx.rdma.ibv_qp = dev->qp.verbs_qp;
374 		rc = spdk_memory_domain_translate_data(domain, domain_ctx, dev->dev_ctx->domain,
375 						       &local_ctx, addr, size, &domain_translation);
376 		if (spdk_unlikely(rc || domain_translation.iov_count != 1)) {
377 			SPDK_ERRLOG("Memory domain translation failed, addr %p, length %zu, iovcnt %u\n", addr, size,
378 				    domain_translation.iov_count);
379 			if (rc == 0) {
380 				rc = -EINVAL;
381 			}
382 
383 			return rc;
384 		}
385 		sge->lkey = domain_translation.rdma.lkey;
386 		sge->addr = (uint64_t) domain_translation.iov.iov_base;
387 		sge->length = domain_translation.iov.iov_len;
388 	} else {
389 		rc = spdk_rdma_utils_get_translation(dev->mmap, addr, size,
390 						     &map_translation);
391 		if (spdk_unlikely(rc)) {
392 			SPDK_ERRLOG("Memory translation failed, addr %p, length %zu\n", addr, size);
393 			return rc;
394 		}
395 		sge->lkey = spdk_rdma_utils_memory_translation_get_lkey(&map_translation);
396 		sge->addr = (uint64_t)addr;
397 		sge->length = size;
398 	}
399 
400 	return 0;
401 }
402 
403 static inline int
404 accel_mlx5_fill_block_sge(struct accel_mlx5_dev *dev, struct ibv_sge *sge,
405 			  struct accel_mlx5_iov_sgl *iovs, uint32_t len, uint32_t *_remaining,
406 			  struct spdk_memory_domain *domain, void *domain_ctx)
407 {
408 	void *addr;
409 	uint32_t remaining = len;
410 	uint32_t size;
411 	int i = 0;
412 	int rc;
413 
414 	while (remaining && i < (int)ACCEL_MLX5_MAX_SGE) {
415 		size = spdk_min(remaining, iovs->iov->iov_len - iovs->iov_offset);
416 		addr = (void *)iovs->iov->iov_base + iovs->iov_offset;
417 		rc = accel_mlx5_translate_addr(addr, size, domain, domain_ctx, dev, &sge[i]);
418 		if (spdk_unlikely(rc)) {
419 			return rc;
420 		}
421 		SPDK_DEBUGLOG(accel_mlx5, "\t sge[%d]: lkey %u, len %u, addr %"PRIx64"\n", i, sge[i].lkey,
422 			      sge[i].length, sge[i].addr);
423 		accel_mlx5_iov_sgl_advance(iovs, size);
424 		i++;
425 		assert(remaining >= size);
426 		remaining -= size;
427 	}
428 	*_remaining = remaining;
429 
430 	return i;
431 }
432 
433 static inline bool
434 accel_mlx5_compare_iovs(struct iovec *v1, struct iovec *v2, uint32_t iovcnt)
435 {
436 	return memcmp(v1, v2, sizeof(*v1) * iovcnt) == 0;
437 }
438 
439 static inline uint16_t
440 accel_mlx5_dev_get_available_slots(struct accel_mlx5_dev *dev, struct accel_mlx5_qp *qp)
441 {
442 	assert(qp->wrs_max >= qp->wrs_submitted);
443 	assert(dev->wrs_in_cq_max >= dev->wrs_in_cq);
444 
445 	/* Each time we produce only 1 CQE, so we need 1 CQ slot */
446 	if (spdk_unlikely(dev->wrs_in_cq == dev->wrs_in_cq_max)) {
447 		return 0;
448 	}
449 
450 	return qp->wrs_max - qp->wrs_submitted;
451 }
452 
453 static inline uint32_t
454 accel_mlx5_task_alloc_mkeys(struct accel_mlx5_task *task, struct spdk_mlx5_mkey_pool *pool)
455 {
456 	uint32_t num_ops;
457 	int rc;
458 
459 	assert(task->num_reqs > task->num_completed_reqs);
460 	num_ops = task->num_reqs - task->num_completed_reqs;
461 	num_ops = spdk_min(num_ops, ACCEL_MLX5_MAX_MKEYS_IN_TASK);
462 	if (!num_ops) {
463 		return 0;
464 	}
465 	rc = spdk_mlx5_mkey_pool_get_bulk(pool, task->mkeys, num_ops);
466 	if (spdk_unlikely(rc)) {
467 		return 0;
468 	}
469 	assert(num_ops <= UINT16_MAX);
470 	task->num_ops = num_ops;
471 
472 	return num_ops;
473 }
474 
475 static inline uint8_t
476 bs_to_bs_selector(uint32_t bs)
477 {
478 	switch (bs) {
479 	case 512:
480 		return SPDK_MLX5_BLOCK_SIZE_SELECTOR_512;
481 	case 520:
482 		return SPDK_MLX5_BLOCK_SIZE_SELECTOR_520;
483 	case 4096:
484 		return SPDK_MLX5_BLOCK_SIZE_SELECTOR_4096;
485 	case 4160:
486 		return SPDK_MLX5_BLOCK_SIZE_SELECTOR_4160;
487 	default:
488 		return SPDK_MLX5_BLOCK_SIZE_SELECTOR_RESERVED;
489 	}
490 }
491 
492 static inline int
493 accel_mlx5_configure_crypto_umr(struct accel_mlx5_task *mlx5_task, struct accel_mlx5_sge *sge,
494 				uint32_t mkey, uint32_t num_blocks, struct spdk_mlx5_crypto_dek_data *dek_data,
495 				uint64_t wr_id, uint32_t flags)
496 {
497 	struct spdk_mlx5_umr_crypto_attr cattr;
498 	struct spdk_mlx5_umr_attr umr_attr;
499 	struct accel_mlx5_qp *qp = mlx5_task->qp;
500 	struct accel_mlx5_dev *dev = qp->dev;
501 	struct spdk_accel_task *task = &mlx5_task->base;
502 	uint32_t length, remaining = 0, block_size = task->block_size;
503 	int rc;
504 
505 	length = num_blocks * block_size;
506 	SPDK_DEBUGLOG(accel_mlx5, "task %p, domain %p, len %u, blocks %u\n", task, task->src_domain, length,
507 		      num_blocks);
508 	rc = accel_mlx5_fill_block_sge(dev, sge->src_sge, &mlx5_task->src,  length, &remaining,
509 				       task->src_domain, task->src_domain_ctx);
510 	if (spdk_unlikely(rc <= 0)) {
511 		if (rc == 0) {
512 			rc = -EINVAL;
513 		}
514 		SPDK_ERRLOG("failed set src sge, rc %d\n", rc);
515 		return rc;
516 	}
517 	sge->src_sge_count = rc;
518 	if (spdk_unlikely(remaining)) {
519 		uint32_t new_len = length - remaining;
520 		uint32_t aligned_len, updated_num_blocks;
521 
522 		SPDK_DEBUGLOG(accel_mlx5, "Incorrect src iovs, handled %u out of %u bytes\n", new_len, length);
523 		if (new_len < block_size) {
524 			/* We need to process at least 1 block. If buffer is too fragmented, we can't do
525 			 * anything */
526 			return -ERANGE;
527 		}
528 
529 		/* Regular integer division, we need to round down to prev block size */
530 		updated_num_blocks = new_len / block_size;
531 		assert(updated_num_blocks);
532 		assert(updated_num_blocks < num_blocks);
533 		aligned_len = updated_num_blocks * block_size;
534 
535 		if (aligned_len < new_len) {
536 			uint32_t dt = new_len - aligned_len;
537 
538 			/* We can't process part of block, need to unwind src iov_sgl and sge to the
539 			 * prev block boundary */
540 			SPDK_DEBUGLOG(accel_mlx5, "task %p, unwind src sge for %u bytes\n", task, dt);
541 			accel_mlx5_iov_sgl_unwind(&mlx5_task->src, task->s.iovcnt, dt);
542 			sge->src_sge_count = accel_mlx5_sge_unwind(sge->src_sge, sge->src_sge_count, dt);
543 			if (!sge->src_sge_count) {
544 				return -ERANGE;
545 			}
546 		}
547 		SPDK_DEBUGLOG(accel_mlx5, "task %p, UMR len %u -> %u\n", task, length, aligned_len);
548 		length = aligned_len;
549 		num_blocks = updated_num_blocks;
550 	}
551 
552 	cattr.xts_iv = task->iv + mlx5_task->num_processed_blocks;
553 	cattr.keytag = 0;
554 	cattr.dek_obj_id = dek_data->dek_obj_id;
555 	cattr.tweak_mode = dek_data->tweak_mode;
556 	cattr.enc_order = mlx5_task->enc_order;
557 	cattr.bs_selector = bs_to_bs_selector(mlx5_task->base.block_size);
558 	if (spdk_unlikely(cattr.bs_selector == SPDK_MLX5_BLOCK_SIZE_SELECTOR_RESERVED)) {
559 		SPDK_ERRLOG("unsupported block size %u\n", mlx5_task->base.block_size);
560 		return -EINVAL;
561 	}
562 	umr_attr.mkey = mkey;
563 	umr_attr.sge = sge->src_sge;
564 
565 	if (!mlx5_task->inplace) {
566 		SPDK_DEBUGLOG(accel_mlx5, "task %p, dst sge, domain %p, len %u\n", task, task->dst_domain, length);
567 		rc = accel_mlx5_fill_block_sge(dev, sge->dst_sge, &mlx5_task->dst, length, &remaining,
568 					       task->dst_domain, task->dst_domain_ctx);
569 		if (spdk_unlikely(rc <= 0)) {
570 			if (rc == 0) {
571 				rc = -EINVAL;
572 			}
573 			SPDK_ERRLOG("failed set dst sge, rc %d\n", rc);
574 			return rc;
575 		}
576 		sge->dst_sge_count = rc;
577 		if (spdk_unlikely(remaining)) {
578 			uint32_t new_len = length - remaining;
579 			uint32_t aligned_len, updated_num_blocks, dt;
580 
581 			SPDK_DEBUGLOG(accel_mlx5, "Incorrect dst iovs, handled %u out of %u bytes\n", new_len, length);
582 			if (new_len < block_size) {
583 				/* We need to process at least 1 block. If buffer is too fragmented, we can't do
584 				 * anything */
585 				return -ERANGE;
586 			}
587 
588 			/* Regular integer division, we need to round down to prev block size */
589 			updated_num_blocks = new_len / block_size;
590 			assert(updated_num_blocks);
591 			assert(updated_num_blocks < num_blocks);
592 			aligned_len = updated_num_blocks * block_size;
593 
594 			if (aligned_len < new_len) {
595 				dt = new_len - aligned_len;
596 				assert(dt > 0 && dt < length);
597 				/* We can't process part of block, need to unwind src and dst iov_sgl and sge to the
598 				 * prev block boundary */
599 				SPDK_DEBUGLOG(accel_mlx5, "task %p, unwind dst sge for %u bytes\n", task, dt);
600 				accel_mlx5_iov_sgl_unwind(&mlx5_task->dst, task->d.iovcnt, dt);
601 				sge->dst_sge_count = accel_mlx5_sge_unwind(sge->dst_sge, sge->dst_sge_count, dt);
602 				assert(sge->dst_sge_count > 0 && sge->dst_sge_count <= ACCEL_MLX5_MAX_SGE);
603 				if (!sge->dst_sge_count) {
604 					return -ERANGE;
605 				}
606 			}
607 			assert(length > aligned_len);
608 			dt = length - aligned_len;
609 			SPDK_DEBUGLOG(accel_mlx5, "task %p, unwind src sge for %u bytes\n", task, dt);
610 			/* The same for src iov_sgl and sge. In worst case we can unwind SRC 2 times */
611 			accel_mlx5_iov_sgl_unwind(&mlx5_task->src, task->s.iovcnt, dt);
612 			sge->src_sge_count = accel_mlx5_sge_unwind(sge->src_sge, sge->src_sge_count, dt);
613 			assert(sge->src_sge_count > 0 && sge->src_sge_count <= ACCEL_MLX5_MAX_SGE);
614 			if (!sge->src_sge_count) {
615 				return -ERANGE;
616 			}
617 			SPDK_DEBUGLOG(accel_mlx5, "task %p, UMR len %u -> %u\n", task, length, aligned_len);
618 			length = aligned_len;
619 			num_blocks = updated_num_blocks;
620 		}
621 	}
622 
623 	SPDK_DEBUGLOG(accel_mlx5,
624 		      "task %p: bs %u, iv %"PRIu64", enc_on_tx %d, tweak_mode %d, len %u, mkey %x, blocks %u\n",
625 		      mlx5_task, task->block_size, cattr.xts_iv, mlx5_task->enc_order, cattr.tweak_mode, length, mkey,
626 		      num_blocks);
627 
628 	umr_attr.sge_count = sge->src_sge_count;
629 	umr_attr.umr_len = length;
630 	assert((uint32_t)mlx5_task->num_processed_blocks + num_blocks <= UINT16_MAX);
631 	mlx5_task->num_processed_blocks += num_blocks;
632 
633 	rc = spdk_mlx5_umr_configure_crypto(qp->qp, &umr_attr, &cattr, wr_id, flags);
634 
635 	return rc;
636 }
637 
638 static inline int
639 accel_mlx5_crypto_task_process(struct accel_mlx5_task *mlx5_task)
640 {
641 	struct accel_mlx5_sge sges[ACCEL_MLX5_MAX_MKEYS_IN_TASK];
642 	struct spdk_mlx5_crypto_dek_data dek_data;
643 	struct accel_mlx5_qp *qp = mlx5_task->qp;
644 	struct accel_mlx5_dev *dev = qp->dev;
645 	/* First RDMA after UMR must have a SMALL_FENCE */
646 	uint32_t first_rdma_fence = SPDK_MLX5_WQE_CTRL_INITIATOR_SMALL_FENCE;
647 	uint16_t num_blocks;
648 	uint16_t num_ops = spdk_min(mlx5_task->num_reqs - mlx5_task->num_completed_reqs,
649 				    mlx5_task->num_ops);
650 	uint16_t qp_slot = accel_mlx5_dev_get_available_slots(dev, qp);
651 	uint16_t i;
652 	int rc;
653 
654 	assert(qp_slot > 1);
655 	num_ops = spdk_min(num_ops, qp_slot >> 1);
656 	if (spdk_unlikely(!num_ops)) {
657 		return -EINVAL;
658 	}
659 
660 	rc = spdk_mlx5_crypto_get_dek_data(mlx5_task->base.crypto_key->priv, dev->dev_ctx->pd, &dek_data);
661 	if (spdk_unlikely(rc)) {
662 		return rc;
663 	}
664 
665 	mlx5_task->num_wrs = 0;
666 	SPDK_DEBUGLOG(accel_mlx5, "begin, task, %p, reqs: total %u, submitted %u, completed %u\n",
667 		      mlx5_task, mlx5_task->num_reqs, mlx5_task->num_submitted_reqs, mlx5_task->num_completed_reqs);
668 	for (i = 0; i < num_ops; i++) {
669 		if (mlx5_task->num_submitted_reqs + i + 1 == mlx5_task->num_reqs) {
670 			/* Last request may consume less than calculated if crypto_multi_block is true */
671 			assert(mlx5_task->num_blocks > mlx5_task->num_submitted_reqs);
672 			num_blocks = mlx5_task->num_blocks - mlx5_task->num_processed_blocks;
673 		} else {
674 			num_blocks = mlx5_task->blocks_per_req;
675 		}
676 
677 		rc = accel_mlx5_configure_crypto_umr(mlx5_task, &sges[i], mlx5_task->mkeys[i]->mkey, num_blocks,
678 						     &dek_data, 0, 0);
679 		if (spdk_unlikely(rc)) {
680 			SPDK_ERRLOG("UMR configure failed with %d\n", rc);
681 			return rc;
682 		}
683 		ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED(qp, mlx5_task);
684 		dev->stats.crypto_umrs++;
685 	}
686 
687 	/* Loop `num_ops - 1` for easy flags handling */
688 	for (i = 0; i < num_ops - 1; i++) {
689 		/* UMR is used as a destination for RDMA_READ - from UMR to sge */
690 		if (mlx5_task->inplace) {
691 			rc = spdk_mlx5_qp_rdma_read(qp->qp, sges[i].src_sge, sges[i].src_sge_count, 0,
692 						    mlx5_task->mkeys[i]->mkey, 0, first_rdma_fence);
693 		} else {
694 			rc = spdk_mlx5_qp_rdma_read(qp->qp, sges[i].dst_sge, sges[i].dst_sge_count, 0,
695 						    mlx5_task->mkeys[i]->mkey, 0, first_rdma_fence);
696 		}
697 		if (spdk_unlikely(rc)) {
698 			SPDK_ERRLOG("RDMA READ/WRITE failed with %d\n", rc);
699 			return rc;
700 		}
701 
702 		first_rdma_fence = 0;
703 		assert(mlx5_task->num_submitted_reqs < mlx5_task->num_reqs);
704 		assert(mlx5_task->num_submitted_reqs < UINT16_MAX);
705 		mlx5_task->num_submitted_reqs++;
706 		ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED(qp, mlx5_task);
707 		dev->stats.rdma_reads++;
708 	}
709 
710 	if (mlx5_task->inplace) {
711 		rc = spdk_mlx5_qp_rdma_read(qp->qp, sges[i].src_sge, sges[i].src_sge_count, 0,
712 					    mlx5_task->mkeys[i]->mkey, (uint64_t)mlx5_task, first_rdma_fence | SPDK_MLX5_WQE_CTRL_CE_CQ_UPDATE);
713 	} else {
714 		rc = spdk_mlx5_qp_rdma_read(qp->qp, sges[i].dst_sge, sges[i].dst_sge_count, 0,
715 					    mlx5_task->mkeys[i]->mkey, (uint64_t)mlx5_task, first_rdma_fence | SPDK_MLX5_WQE_CTRL_CE_CQ_UPDATE);
716 	}
717 	if (spdk_unlikely(rc)) {
718 		SPDK_ERRLOG("RDMA READ/WRITE failed with %d\n", rc);
719 		return rc;
720 	}
721 
722 	assert(mlx5_task->num_submitted_reqs < mlx5_task->num_reqs);
723 	assert(mlx5_task->num_submitted_reqs < UINT16_MAX);
724 	mlx5_task->num_submitted_reqs++;
725 	ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED_SIGNALED(dev, qp, mlx5_task);
726 	dev->stats.rdma_reads++;
727 	STAILQ_INSERT_TAIL(&qp->in_hw, mlx5_task, link);
728 
729 	if (spdk_unlikely(mlx5_task->num_submitted_reqs == mlx5_task->num_reqs &&
730 			  mlx5_task->num_blocks > mlx5_task->num_processed_blocks)) {
731 		/* We hit "out of sge
732 		 * entries" case with highly fragmented payload. In that case
733 		 * accel_mlx5_configure_crypto_umr function handled fewer data blocks than expected
734 		 * That means we need at least 1 more request to complete this task, this request will be
735 		 * executed once all submitted ones are completed */
736 		SPDK_DEBUGLOG(accel_mlx5, "task %p, processed %u/%u blocks, add extra req\n", mlx5_task,
737 			      mlx5_task->num_processed_blocks, mlx5_task->num_blocks);
738 		mlx5_task->num_reqs++;
739 	}
740 
741 	SPDK_DEBUGLOG(accel_mlx5, "end, task, %p, reqs: total %u, submitted %u, completed %u\n", mlx5_task,
742 		      mlx5_task->num_reqs, mlx5_task->num_submitted_reqs, mlx5_task->num_completed_reqs);
743 
744 	return 0;
745 }
746 
747 static inline int
748 accel_mlx5_crypto_task_continue(struct accel_mlx5_task *task)
749 {
750 	struct accel_mlx5_qp *qp = task->qp;
751 	struct accel_mlx5_dev *dev = qp->dev;
752 	uint16_t qp_slot = accel_mlx5_dev_get_available_slots(dev, qp);
753 
754 	assert(task->num_reqs > task->num_completed_reqs);
755 	if (task->num_ops == 0) {
756 		/* No mkeys allocated, try to allocate now */
757 		if (spdk_unlikely(!accel_mlx5_task_alloc_mkeys(task, dev->crypto_mkeys))) {
758 			/* Pool is empty, queue this task */
759 			STAILQ_INSERT_TAIL(&dev->nomem, task, link);
760 			dev->stats.nomem_mkey++;
761 			return -ENOMEM;
762 		}
763 	}
764 	/* We need to post at least 1 UMR and 1 RDMA operation */
765 	if (spdk_unlikely(qp_slot < 2)) {
766 		/* QP is full, queue this task */
767 		STAILQ_INSERT_TAIL(&dev->nomem, task, link);
768 		task->qp->dev->stats.nomem_qdepth++;
769 		return -ENOMEM;
770 	}
771 
772 	return accel_mlx5_crypto_task_process(task);
773 }
774 
775 static inline int
776 accel_mlx5_crypto_task_init(struct accel_mlx5_task *mlx5_task)
777 {
778 	struct spdk_accel_task *task = &mlx5_task->base;
779 	struct accel_mlx5_dev *dev = mlx5_task->qp->dev;
780 	uint64_t src_nbytes = task->nbytes;
781 #ifdef DEBUG
782 	uint64_t dst_nbytes;
783 	uint32_t i;
784 #endif
785 	bool crypto_key_ok;
786 
787 	crypto_key_ok = (task->crypto_key && task->crypto_key->module_if == &g_accel_mlx5.module &&
788 			 task->crypto_key->priv);
789 	if (spdk_unlikely((task->nbytes % mlx5_task->base.block_size != 0) || !crypto_key_ok)) {
790 		if (crypto_key_ok) {
791 			SPDK_ERRLOG("src length %"PRIu64" is not a multiple of the block size %u\n", task->nbytes,
792 				    mlx5_task->base.block_size);
793 		} else {
794 			SPDK_ERRLOG("Wrong crypto key provided\n");
795 		}
796 		return -EINVAL;
797 	}
798 
799 	assert(src_nbytes / mlx5_task->base.block_size <= UINT16_MAX);
800 	mlx5_task->num_blocks = src_nbytes / mlx5_task->base.block_size;
801 	accel_mlx5_iov_sgl_init(&mlx5_task->src, task->s.iovs, task->s.iovcnt);
802 	if (task->d.iovcnt == 0 || (task->d.iovcnt == task->s.iovcnt &&
803 				    accel_mlx5_compare_iovs(task->d.iovs, task->s.iovs, task->s.iovcnt))) {
804 		mlx5_task->inplace = 1;
805 	} else {
806 #ifdef DEBUG
807 		dst_nbytes = 0;
808 		for (i = 0; i < task->d.iovcnt; i++) {
809 			dst_nbytes += task->d.iovs[i].iov_len;
810 		}
811 
812 		if (spdk_unlikely(src_nbytes != dst_nbytes)) {
813 			return -EINVAL;
814 		}
815 #endif
816 		mlx5_task->inplace = 0;
817 		accel_mlx5_iov_sgl_init(&mlx5_task->dst, task->d.iovs, task->d.iovcnt);
818 	}
819 
820 	if (dev->crypto_multi_block) {
821 		if (dev->crypto_split_blocks) {
822 			assert(SPDK_CEIL_DIV(mlx5_task->num_blocks, dev->crypto_split_blocks) <= UINT16_MAX);
823 			mlx5_task->num_reqs = SPDK_CEIL_DIV(mlx5_task->num_blocks, dev->crypto_split_blocks);
824 			/* Last req may consume less blocks */
825 			mlx5_task->blocks_per_req = spdk_min(mlx5_task->num_blocks, dev->crypto_split_blocks);
826 		} else {
827 			if (task->s.iovcnt > ACCEL_MLX5_MAX_SGE || task->d.iovcnt > ACCEL_MLX5_MAX_SGE) {
828 				uint32_t max_sge_count = spdk_max(task->s.iovcnt, task->d.iovcnt);
829 
830 				assert(SPDK_CEIL_DIV(max_sge_count, ACCEL_MLX5_MAX_SGE) <= UINT16_MAX);
831 				mlx5_task->num_reqs = SPDK_CEIL_DIV(max_sge_count, ACCEL_MLX5_MAX_SGE);
832 				mlx5_task->blocks_per_req = SPDK_CEIL_DIV(mlx5_task->num_blocks, mlx5_task->num_reqs);
833 			} else {
834 				mlx5_task->num_reqs = 1;
835 				mlx5_task->blocks_per_req = mlx5_task->num_blocks;
836 			}
837 		}
838 	} else {
839 		mlx5_task->num_reqs = mlx5_task->num_blocks;
840 		mlx5_task->blocks_per_req = 1;
841 	}
842 
843 	if (spdk_unlikely(!accel_mlx5_task_alloc_mkeys(mlx5_task, dev->crypto_mkeys))) {
844 		/* Pool is empty, queue this task */
845 		SPDK_DEBUGLOG(accel_mlx5, "no reqs in pool, dev %s\n", dev->dev_ctx->context->device->name);
846 		dev->stats.nomem_mkey++;
847 		return -ENOMEM;
848 	}
849 	if (spdk_unlikely(accel_mlx5_dev_get_available_slots(dev, &dev->qp) < 2)) {
850 		/* Queue is full, queue this task */
851 		SPDK_DEBUGLOG(accel_mlx5, "dev %s qp %p is full\n", dev->dev_ctx->context->device->name,
852 			      mlx5_task->qp);
853 		dev->stats.nomem_qdepth++;
854 		return -ENOMEM;
855 	}
856 
857 	SPDK_DEBUGLOG(accel_mlx5, "task %p, src_iovs %u, dst_iovs %u, num_reqs %u, "
858 		      "blocks/req %u, blocks %u, inplace %d\n", task, task->s.iovcnt, task->d.iovcnt,
859 		      mlx5_task->num_reqs, mlx5_task->blocks_per_req, mlx5_task->num_blocks, mlx5_task->inplace);
860 
861 	return 0;
862 }
863 
864 static inline void
865 accel_mlx5_copy_task_complete(struct accel_mlx5_task *mlx5_task)
866 {
867 	spdk_accel_task_complete(&mlx5_task->base, 0);
868 }
869 
870 static inline int
871 accel_mlx5_copy_task_process_one(struct accel_mlx5_task *mlx5_task, struct accel_mlx5_qp *qp,
872 				 uint64_t wrid, uint32_t fence)
873 {
874 	struct spdk_accel_task *task = &mlx5_task->base;
875 	struct accel_mlx5_sge sge;
876 	uint32_t remaining = 0;
877 	uint32_t dst_len;
878 	int rc;
879 
880 	/* Limit one RDMA_WRITE by length of dst buffer. Not all src buffers may fit into one dst buffer due to
881 	 * limitation on ACCEL_MLX5_MAX_SGE. If this is the case then remaining is not zero */
882 	assert(mlx5_task->dst.iov->iov_len > mlx5_task->dst.iov_offset);
883 	dst_len = mlx5_task->dst.iov->iov_len - mlx5_task->dst.iov_offset;
884 	rc = accel_mlx5_fill_block_sge(qp->dev, sge.src_sge, &mlx5_task->src, dst_len, &remaining,
885 				       task->src_domain, task->src_domain_ctx);
886 	if (spdk_unlikely(rc <= 0)) {
887 		if (rc == 0) {
888 			rc = -EINVAL;
889 		}
890 		SPDK_ERRLOG("failed set src sge, rc %d\n", rc);
891 		return rc;
892 	}
893 	sge.src_sge_count = rc;
894 	assert(dst_len > remaining);
895 	dst_len -= remaining;
896 
897 	rc = accel_mlx5_fill_block_sge(qp->dev, sge.dst_sge, &mlx5_task->dst, dst_len,  &remaining,
898 				       task->dst_domain, task->dst_domain_ctx);
899 	if (spdk_unlikely(rc != 1)) {
900 		/* We use single dst entry, any result other than 1 is an error */
901 		if (rc == 0) {
902 			rc = -EINVAL;
903 		}
904 		SPDK_ERRLOG("failed set dst sge, rc %d\n", rc);
905 		return rc;
906 	}
907 	if (spdk_unlikely(remaining)) {
908 		SPDK_ERRLOG("Incorrect dst length, remaining %u\n", remaining);
909 		assert(0);
910 		return -EINVAL;
911 	}
912 
913 	rc = spdk_mlx5_qp_rdma_write(mlx5_task->qp->qp, sge.src_sge, sge.src_sge_count,
914 				     sge.dst_sge[0].addr, sge.dst_sge[0].lkey, wrid, fence);
915 	if (spdk_unlikely(rc)) {
916 		SPDK_ERRLOG("new RDMA WRITE failed with %d\n", rc);
917 		return rc;
918 	}
919 	qp->dev->stats.rdma_writes++;
920 
921 	return 0;
922 }
923 
924 static inline int
925 accel_mlx5_copy_task_process(struct accel_mlx5_task *mlx5_task)
926 {
927 
928 	struct accel_mlx5_qp *qp = mlx5_task->qp;
929 	struct accel_mlx5_dev *dev = qp->dev;
930 	uint16_t i;
931 	int rc;
932 
933 	mlx5_task->num_wrs = 0;
934 	assert(mlx5_task->num_reqs > 0);
935 	assert(mlx5_task->num_ops > 0);
936 
937 	/* Handle n-1 reqs in order to simplify wrid and fence handling */
938 	for (i = 0; i < mlx5_task->num_ops - 1; i++) {
939 		rc = accel_mlx5_copy_task_process_one(mlx5_task, qp, 0, 0);
940 		if (spdk_unlikely(rc)) {
941 			return rc;
942 		}
943 		ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED(qp, mlx5_task);
944 		mlx5_task->num_submitted_reqs++;
945 	}
946 
947 	rc = accel_mlx5_copy_task_process_one(mlx5_task, qp, (uint64_t)mlx5_task,
948 					      SPDK_MLX5_WQE_CTRL_CE_CQ_UPDATE);
949 	if (spdk_unlikely(rc)) {
950 		return rc;
951 	}
952 	ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED_SIGNALED(dev, qp, mlx5_task);
953 	mlx5_task->num_submitted_reqs++;
954 	STAILQ_INSERT_TAIL(&qp->in_hw, mlx5_task, link);
955 
956 	SPDK_DEBUGLOG(accel_mlx5, "end, copy task, %p\n", mlx5_task);
957 
958 	return 0;
959 }
960 
961 static inline int
962 accel_mlx5_copy_task_continue(struct accel_mlx5_task *task)
963 {
964 	struct accel_mlx5_qp *qp = task->qp;
965 	struct accel_mlx5_dev *dev = qp->dev;
966 	uint16_t qp_slot = accel_mlx5_dev_get_available_slots(dev, qp);
967 
968 	task->num_ops = spdk_min(qp_slot, task->num_reqs - task->num_completed_reqs);
969 	if (spdk_unlikely(task->num_ops == 0)) {
970 		STAILQ_INSERT_TAIL(&dev->nomem, task, link);
971 		dev->stats.nomem_qdepth++;
972 		return -ENOMEM;
973 	}
974 	return accel_mlx5_copy_task_process(task);
975 }
976 
977 static inline uint32_t
978 accel_mlx5_get_copy_task_count(struct iovec *src_iov, uint32_t src_iovcnt,
979 			       struct iovec *dst_iov, uint32_t dst_iovcnt)
980 {
981 	uint32_t src = 0;
982 	uint32_t dst = 0;
983 	uint64_t src_offset = 0;
984 	uint64_t dst_offset = 0;
985 	uint32_t num_ops = 0;
986 	uint32_t src_sge_count = 0;
987 
988 	while (src < src_iovcnt && dst < dst_iovcnt) {
989 		uint64_t src_len = src_iov[src].iov_len - src_offset;
990 		uint64_t dst_len = dst_iov[dst].iov_len - dst_offset;
991 
992 		if (dst_len < src_len) {
993 			dst_offset = 0;
994 			src_offset += dst_len;
995 			dst++;
996 			num_ops++;
997 			src_sge_count = 0;
998 		} else if (src_len < dst_len) {
999 			dst_offset += src_len;
1000 			src_offset = 0;
1001 			src++;
1002 			if (++src_sge_count >= ACCEL_MLX5_MAX_SGE) {
1003 				num_ops++;
1004 				src_sge_count = 0;
1005 			}
1006 		} else {
1007 			dst_offset = 0;
1008 			src_offset = 0;
1009 			dst++;
1010 			src++;
1011 			num_ops++;
1012 			src_sge_count = 0;
1013 		}
1014 	}
1015 
1016 	assert(src == src_iovcnt);
1017 	assert(dst == dst_iovcnt);
1018 	assert(src_offset == 0);
1019 	assert(dst_offset == 0);
1020 	return num_ops;
1021 }
1022 
1023 static inline int
1024 accel_mlx5_copy_task_init(struct accel_mlx5_task *mlx5_task)
1025 {
1026 	struct spdk_accel_task *task = &mlx5_task->base;
1027 	struct accel_mlx5_qp *qp = mlx5_task->qp;
1028 	uint16_t qp_slot = accel_mlx5_dev_get_available_slots(qp->dev, qp);
1029 
1030 	if (spdk_likely(task->s.iovcnt <= ACCEL_MLX5_MAX_SGE)) {
1031 		mlx5_task->num_reqs = task->d.iovcnt;
1032 	} else if (task->d.iovcnt == 1) {
1033 		mlx5_task->num_reqs = SPDK_CEIL_DIV(task->s.iovcnt, ACCEL_MLX5_MAX_SGE);
1034 	} else {
1035 		mlx5_task->num_reqs = accel_mlx5_get_copy_task_count(task->s.iovs, task->s.iovcnt,
1036 				      task->d.iovs, task->d.iovcnt);
1037 	}
1038 	mlx5_task->inplace = 0;
1039 	accel_mlx5_iov_sgl_init(&mlx5_task->src, task->s.iovs, task->s.iovcnt);
1040 	accel_mlx5_iov_sgl_init(&mlx5_task->dst, task->d.iovs, task->d.iovcnt);
1041 	mlx5_task->num_ops = spdk_min(qp_slot, mlx5_task->num_reqs);
1042 	if (spdk_unlikely(!mlx5_task->num_ops)) {
1043 		qp->dev->stats.nomem_qdepth++;
1044 		return -ENOMEM;
1045 	}
1046 	SPDK_DEBUGLOG(accel_mlx5, "copy task num_reqs %u, num_ops %u\n", mlx5_task->num_reqs,
1047 		      mlx5_task->num_ops);
1048 
1049 	return 0;
1050 }
1051 
1052 static inline uint32_t
1053 accel_mlx5_advance_iovec(struct iovec *iov, uint32_t iovcnt, size_t *iov_offset, size_t *len)
1054 {
1055 	uint32_t i;
1056 	size_t iov_len;
1057 
1058 	for (i = 0; *len != 0 && i < iovcnt; i++) {
1059 		iov_len = iov[i].iov_len - *iov_offset;
1060 
1061 		if (iov_len < *len) {
1062 			*iov_offset = 0;
1063 			*len -= iov_len;
1064 			continue;
1065 		}
1066 		if (iov_len == *len) {
1067 			*iov_offset = 0;
1068 			i++;
1069 		} else { /* iov_len > *len */
1070 			*iov_offset += *len;
1071 		}
1072 		*len = 0;
1073 		break;
1074 	}
1075 
1076 	return i;
1077 }
1078 
1079 static inline void
1080 accel_mlx5_crc_task_complete(struct accel_mlx5_task *mlx5_task)
1081 {
1082 	struct accel_mlx5_dev *dev = mlx5_task->qp->dev;
1083 
1084 	*mlx5_task->base.crc_dst = mlx5_task->psv->crc ^ UINT32_MAX;
1085 	/* Normal task completion without allocated mkeys is not possible */
1086 	assert(mlx5_task->num_ops);
1087 	spdk_mlx5_mkey_pool_put_bulk(dev->sig_mkeys, mlx5_task->mkeys, mlx5_task->num_ops);
1088 	spdk_mempool_put(dev->dev_ctx->psv_pool, mlx5_task->psv);
1089 	spdk_accel_task_complete(&mlx5_task->base, 0);
1090 }
1091 
1092 static inline int
1093 accel_mlx5_crc_task_configure_umr(struct accel_mlx5_task *mlx5_task, struct ibv_sge *sge,
1094 				  uint32_t sge_count, struct spdk_mlx5_mkey_pool_obj *mkey,
1095 				  enum spdk_mlx5_umr_sig_domain sig_domain, uint32_t umr_len,
1096 				  bool sig_init, bool sig_check_gen)
1097 {
1098 	struct spdk_mlx5_umr_sig_attr sattr = {
1099 		.seed = mlx5_task->base.seed ^ UINT32_MAX,
1100 		.psv_index = mlx5_task->psv->psv_index,
1101 		.domain = sig_domain,
1102 		.sigerr_count = mkey->sig.sigerr_count,
1103 		.raw_data_size = umr_len,
1104 		.init = sig_init,
1105 		.check_gen = sig_check_gen,
1106 	};
1107 	struct spdk_mlx5_umr_attr umr_attr = {
1108 		.mkey = mkey->mkey,
1109 		.umr_len = umr_len,
1110 		.sge_count = sge_count,
1111 		.sge = sge,
1112 	};
1113 
1114 	return spdk_mlx5_umr_configure_sig(mlx5_task->qp->qp, &umr_attr, &sattr, 0, 0);
1115 }
1116 
1117 static inline int
1118 accel_mlx5_crc_task_fill_sge(struct accel_mlx5_task *mlx5_task, struct accel_mlx5_sge *sge)
1119 {
1120 	struct spdk_accel_task *task = &mlx5_task->base;
1121 	struct accel_mlx5_qp *qp = mlx5_task->qp;
1122 	struct accel_mlx5_dev *dev = qp->dev;
1123 	uint32_t remaining;
1124 	int rc;
1125 
1126 	rc = accel_mlx5_fill_block_sge(dev, sge->src_sge, &mlx5_task->src, task->nbytes, &remaining,
1127 				       task->src_domain, task->src_domain_ctx);
1128 	if (spdk_unlikely(rc <= 0)) {
1129 		if (rc == 0) {
1130 			rc = -EINVAL;
1131 		}
1132 		SPDK_ERRLOG("failed set src sge, rc %d\n", rc);
1133 		return rc;
1134 	}
1135 	assert(remaining == 0);
1136 	sge->src_sge_count = rc;
1137 
1138 	if (!mlx5_task->inplace) {
1139 		rc = accel_mlx5_fill_block_sge(dev, sge->dst_sge, &mlx5_task->dst, task->nbytes, &remaining,
1140 					       task->dst_domain, task->dst_domain_ctx);
1141 		if (spdk_unlikely(rc <= 0)) {
1142 			if (rc == 0) {
1143 				rc = -EINVAL;
1144 			}
1145 			SPDK_ERRLOG("failed set dst sge, rc %d\n", rc);
1146 			return rc;
1147 		}
1148 		assert(remaining == 0);
1149 		sge->dst_sge_count = rc;
1150 	}
1151 
1152 	return 0;
1153 }
1154 
1155 static inline int
1156 accel_mlx5_crc_task_process_one_req(struct accel_mlx5_task *mlx5_task)
1157 {
1158 	struct accel_mlx5_sge sges;
1159 	struct accel_mlx5_qp *qp = mlx5_task->qp;
1160 	struct accel_mlx5_dev *dev = qp->dev;
1161 	uint32_t num_ops = spdk_min(mlx5_task->num_reqs - mlx5_task->num_completed_reqs,
1162 				    mlx5_task->num_ops);
1163 	uint16_t qp_slot = accel_mlx5_dev_get_available_slots(dev, qp);
1164 	uint32_t rdma_fence = SPDK_MLX5_WQE_CTRL_STRONG_ORDERING;
1165 	struct ibv_sge *sge;
1166 	int rc;
1167 	uint16_t sge_count;
1168 
1169 	num_ops = spdk_min(num_ops, qp_slot >> 1);
1170 	if (spdk_unlikely(!num_ops)) {
1171 		return -EINVAL;
1172 	}
1173 
1174 	mlx5_task->num_wrs = 0;
1175 	/* At this moment we have as many requests as can be submitted to a qp */
1176 	rc = accel_mlx5_crc_task_fill_sge(mlx5_task, &sges);
1177 	if (spdk_unlikely(rc)) {
1178 		return rc;
1179 	}
1180 	rc = accel_mlx5_crc_task_configure_umr(mlx5_task, sges.src_sge, sges.src_sge_count,
1181 					       mlx5_task->mkeys[0], SPDK_MLX5_UMR_SIG_DOMAIN_WIRE, mlx5_task->base.nbytes, true, true);
1182 	if (spdk_unlikely(rc)) {
1183 		SPDK_ERRLOG("UMR configure failed with %d\n", rc);
1184 		return rc;
1185 	}
1186 	ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED(qp, mlx5_task);
1187 	dev->stats.sig_umrs++;
1188 
1189 	if (mlx5_task->inplace) {
1190 		sge = sges.src_sge;
1191 		sge_count = sges.src_sge_count;
1192 	} else {
1193 		sge = sges.dst_sge;
1194 		sge_count = sges.dst_sge_count;
1195 	}
1196 
1197 	/*
1198 	 * Add the crc destination to the end of sges. A free entry must be available for CRC
1199 	 * because the task init function reserved it.
1200 	 */
1201 	assert(sge_count < ACCEL_MLX5_MAX_SGE);
1202 	sge[sge_count].lkey = mlx5_task->psv->crc_lkey;
1203 	sge[sge_count].addr = (uintptr_t)&mlx5_task->psv->crc;
1204 	sge[sge_count++].length = sizeof(uint32_t);
1205 
1206 	if (spdk_unlikely(mlx5_task->psv->bits.error)) {
1207 		rc = spdk_mlx5_qp_set_psv(qp->qp, mlx5_task->psv->psv_index, *mlx5_task->base.crc_dst, 0, 0);
1208 		if (spdk_unlikely(rc)) {
1209 			SPDK_ERRLOG("SET_PSV failed with %d\n", rc);
1210 			return rc;
1211 		}
1212 		ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED(qp, mlx5_task);
1213 	}
1214 
1215 	rc = spdk_mlx5_qp_rdma_read(qp->qp, sge, sge_count, 0, mlx5_task->mkeys[0]->mkey,
1216 				    (uint64_t)mlx5_task, rdma_fence | SPDK_MLX5_WQE_CTRL_CE_CQ_UPDATE);
1217 	if (spdk_unlikely(rc)) {
1218 		SPDK_ERRLOG("RDMA READ/WRITE failed with %d\n", rc);
1219 		return rc;
1220 	}
1221 	mlx5_task->num_submitted_reqs++;
1222 	ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED_SIGNALED(dev, qp, mlx5_task);
1223 	dev->stats.rdma_reads++;
1224 
1225 	return 0;
1226 }
1227 
1228 static inline int
1229 accel_mlx5_crc_task_fill_umr_sge(struct accel_mlx5_qp *qp, struct ibv_sge *sge,
1230 				 struct accel_mlx5_iov_sgl *umr_iovs, struct spdk_memory_domain *domain,
1231 				 void *domain_ctx, struct accel_mlx5_iov_sgl *rdma_iovs, size_t *len)
1232 {
1233 	int umr_idx = 0;
1234 	int rdma_idx = 0;
1235 	int umr_iovcnt = spdk_min(umr_iovs->iovcnt, (int)ACCEL_MLX5_MAX_SGE);
1236 	int rdma_iovcnt = spdk_min(rdma_iovs->iovcnt, (int)ACCEL_MLX5_MAX_SGE);
1237 	size_t umr_iov_offset;
1238 	size_t rdma_iov_offset;
1239 	size_t umr_len = 0;
1240 	void *sge_addr;
1241 	size_t sge_len;
1242 	size_t umr_sge_len;
1243 	size_t rdma_sge_len;
1244 	int rc;
1245 
1246 	umr_iov_offset = umr_iovs->iov_offset;
1247 	rdma_iov_offset = rdma_iovs->iov_offset;
1248 
1249 	while (umr_idx < umr_iovcnt && rdma_idx < rdma_iovcnt) {
1250 		umr_sge_len = umr_iovs->iov[umr_idx].iov_len - umr_iov_offset;
1251 		rdma_sge_len = rdma_iovs->iov[rdma_idx].iov_len - rdma_iov_offset;
1252 		sge_addr = umr_iovs->iov[umr_idx].iov_base + umr_iov_offset;
1253 
1254 		if (umr_sge_len == rdma_sge_len) {
1255 			rdma_idx++;
1256 			umr_iov_offset = 0;
1257 			rdma_iov_offset = 0;
1258 			sge_len = umr_sge_len;
1259 		} else if (umr_sge_len < rdma_sge_len) {
1260 			umr_iov_offset = 0;
1261 			rdma_iov_offset += umr_sge_len;
1262 			sge_len = umr_sge_len;
1263 		} else {
1264 			size_t remaining;
1265 
1266 			remaining = umr_sge_len - rdma_sge_len;
1267 			while (remaining) {
1268 				rdma_idx++;
1269 				if (rdma_idx == (int)ACCEL_MLX5_MAX_SGE) {
1270 					break;
1271 				}
1272 				rdma_sge_len = rdma_iovs->iov[rdma_idx].iov_len;
1273 				if (remaining == rdma_sge_len) {
1274 					rdma_idx++;
1275 					rdma_iov_offset = 0;
1276 					umr_iov_offset = 0;
1277 					remaining = 0;
1278 					break;
1279 				}
1280 				if (remaining < rdma_sge_len) {
1281 					rdma_iov_offset = remaining;
1282 					umr_iov_offset = 0;
1283 					remaining = 0;
1284 					break;
1285 				}
1286 				remaining -= rdma_sge_len;
1287 			}
1288 			sge_len = umr_sge_len - remaining;
1289 		}
1290 		rc = accel_mlx5_translate_addr(sge_addr, sge_len, domain, domain_ctx, qp->dev, &sge[umr_idx]);
1291 		if (spdk_unlikely(rc)) {
1292 			return -EINVAL;
1293 		}
1294 		SPDK_DEBUGLOG(accel_mlx5, "\t sge[%d] lkey %u, addr %p, len %u\n", umr_idx, sge[umr_idx].lkey,
1295 			      (void *)sge[umr_idx].addr, sge[umr_idx].length);
1296 		umr_len += sge_len;
1297 		umr_idx++;
1298 	}
1299 	accel_mlx5_iov_sgl_advance(umr_iovs, umr_len);
1300 	accel_mlx5_iov_sgl_advance(rdma_iovs, umr_len);
1301 	*len = umr_len;
1302 
1303 	return umr_idx;
1304 }
1305 
1306 static inline int
1307 accel_mlx5_crc_task_process_multi_req(struct accel_mlx5_task *mlx5_task)
1308 {
1309 	size_t umr_len[ACCEL_MLX5_MAX_MKEYS_IN_TASK];
1310 	struct ibv_sge sges[ACCEL_MLX5_MAX_SGE];
1311 	struct spdk_accel_task *task = &mlx5_task->base;
1312 	struct accel_mlx5_qp *qp = mlx5_task->qp;
1313 	struct accel_mlx5_dev *dev = qp->dev;
1314 	struct accel_mlx5_iov_sgl umr_sgl;
1315 	struct accel_mlx5_iov_sgl *umr_sgl_ptr;
1316 	struct accel_mlx5_iov_sgl rdma_sgl;
1317 	uint64_t umr_offset;
1318 	uint32_t rdma_fence = SPDK_MLX5_WQE_CTRL_INITIATOR_SMALL_FENCE;
1319 	int sge_count;
1320 	uint32_t remaining;
1321 	int rc;
1322 	uint16_t i;
1323 	uint16_t num_ops = spdk_min(mlx5_task->num_reqs - mlx5_task->num_completed_reqs,
1324 				    mlx5_task->num_ops);
1325 	uint16_t qp_slot = accel_mlx5_dev_get_available_slots(dev, qp);
1326 	bool sig_init, sig_check_gen = false;
1327 
1328 	num_ops = spdk_min(num_ops, qp_slot >> 1);
1329 	if (spdk_unlikely(!num_ops)) {
1330 		return -EINVAL;
1331 	}
1332 	/* Init signature on the first UMR */
1333 	sig_init = !mlx5_task->num_submitted_reqs;
1334 
1335 	/*
1336 	 * accel_mlx5_crc_task_fill_umr_sge() and accel_mlx5_fill_block_sge() advance an IOV during iteration
1337 	 * on it. We must copy accel_mlx5_iov_sgl to iterate twice or more on the same IOV.
1338 	 *
1339 	 * In the in-place case, we iterate on the source IOV three times. That's why we need two copies of
1340 	 * the source accel_mlx5_iov_sgl.
1341 	 *
1342 	 * In the out-of-place case, we iterate on the source IOV once and on the destination IOV two times.
1343 	 * So, we need one copy of the destination accel_mlx5_iov_sgl.
1344 	 */
1345 	if (mlx5_task->inplace) {
1346 		accel_mlx5_iov_sgl_init(&umr_sgl, mlx5_task->src.iov, mlx5_task->src.iovcnt);
1347 		umr_sgl_ptr = &umr_sgl;
1348 		accel_mlx5_iov_sgl_init(&rdma_sgl, mlx5_task->src.iov, mlx5_task->src.iovcnt);
1349 	} else {
1350 		umr_sgl_ptr = &mlx5_task->src;
1351 		accel_mlx5_iov_sgl_init(&rdma_sgl, mlx5_task->dst.iov, mlx5_task->dst.iovcnt);
1352 	}
1353 	mlx5_task->num_wrs = 0;
1354 	for (i = 0; i < num_ops; i++) {
1355 		/*
1356 		 * The last request may have only CRC. Skip UMR in this case because the MKey from
1357 		 * the previous request is used.
1358 		 */
1359 		if (umr_sgl_ptr->iovcnt == 0) {
1360 			assert((mlx5_task->num_completed_reqs + i + 1) == mlx5_task->num_reqs);
1361 			break;
1362 		}
1363 		sge_count = accel_mlx5_crc_task_fill_umr_sge(qp, sges, umr_sgl_ptr, task->src_domain,
1364 				task->src_domain_ctx, &rdma_sgl, &umr_len[i]);
1365 		if (spdk_unlikely(sge_count <= 0)) {
1366 			rc = (sge_count == 0) ? -EINVAL : sge_count;
1367 			SPDK_ERRLOG("failed set UMR sge, rc %d\n", rc);
1368 			return rc;
1369 		}
1370 		if (umr_sgl_ptr->iovcnt == 0) {
1371 			/*
1372 			 * We post RDMA without UMR if the last request has only CRC. We use an MKey from
1373 			 * the last UMR in this case. Since the last request can be postponed to the next
1374 			 * call of this function, we must save the MKey to the task structure.
1375 			 */
1376 			mlx5_task->last_umr_len = umr_len[i];
1377 			mlx5_task->last_mkey_idx = i;
1378 			sig_check_gen = true;
1379 		}
1380 		rc = accel_mlx5_crc_task_configure_umr(mlx5_task, sges, sge_count, mlx5_task->mkeys[i],
1381 						       SPDK_MLX5_UMR_SIG_DOMAIN_WIRE, umr_len[i], sig_init,
1382 						       sig_check_gen);
1383 		if (spdk_unlikely(rc)) {
1384 			SPDK_ERRLOG("UMR configure failed with %d\n", rc);
1385 			return rc;
1386 		}
1387 		sig_init = false;
1388 		ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED(qp, mlx5_task);
1389 		dev->stats.sig_umrs++;
1390 	}
1391 
1392 	if (spdk_unlikely(mlx5_task->psv->bits.error)) {
1393 		rc = spdk_mlx5_qp_set_psv(qp->qp, mlx5_task->psv->psv_index, *mlx5_task->base.crc_dst, 0, 0);
1394 		if (spdk_unlikely(rc)) {
1395 			SPDK_ERRLOG("SET_PSV failed with %d\n", rc);
1396 			return rc;
1397 		}
1398 		ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED(qp, mlx5_task);
1399 	}
1400 
1401 	for (i = 0; i < num_ops - 1; i++) {
1402 		if (mlx5_task->inplace) {
1403 			sge_count = accel_mlx5_fill_block_sge(dev, sges, &mlx5_task->src, umr_len[i], &remaining,
1404 							      task->src_domain, task->src_domain_ctx);
1405 		} else {
1406 			sge_count = accel_mlx5_fill_block_sge(dev, sges, &mlx5_task->dst, umr_len[i], &remaining,
1407 							      task->dst_domain, task->dst_domain_ctx);
1408 		}
1409 		if (spdk_unlikely(sge_count <= 0)) {
1410 			rc = (sge_count == 0) ? -EINVAL : sge_count;
1411 			SPDK_ERRLOG("failed set RDMA sge, rc %d\n", rc);
1412 			return rc;
1413 		}
1414 		rc = spdk_mlx5_qp_rdma_read(qp->qp, sges, sge_count, 0, mlx5_task->mkeys[i]->mkey,
1415 					    0, rdma_fence);
1416 		if (spdk_unlikely(rc)) {
1417 			SPDK_ERRLOG("RDMA READ/WRITE failed with %d\n", rc);
1418 			return rc;
1419 		}
1420 		mlx5_task->num_submitted_reqs++;
1421 		ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED(qp, mlx5_task);
1422 		dev->stats.rdma_reads++;
1423 		rdma_fence = SPDK_MLX5_WQE_CTRL_STRONG_ORDERING;
1424 	}
1425 	if ((mlx5_task->inplace && mlx5_task->src.iovcnt == 0) || (!mlx5_task->inplace &&
1426 			mlx5_task->dst.iovcnt == 0)) {
1427 		/*
1428 		 * The last RDMA does not have any data, only CRC. It also does not have a paired Mkey.
1429 		 * The CRC is handled in the previous MKey in this case.
1430 		 */
1431 		sge_count = 0;
1432 		umr_offset = mlx5_task->last_umr_len;
1433 	} else {
1434 		umr_offset = 0;
1435 		mlx5_task->last_mkey_idx = i;
1436 		if (mlx5_task->inplace) {
1437 			sge_count = accel_mlx5_fill_block_sge(dev, sges, &mlx5_task->src, umr_len[i], &remaining,
1438 							      task->src_domain, task->src_domain_ctx);
1439 		} else {
1440 			sge_count = accel_mlx5_fill_block_sge(dev, sges, &mlx5_task->dst, umr_len[i], &remaining,
1441 							      task->dst_domain, task->dst_domain_ctx);
1442 		}
1443 		if (spdk_unlikely(sge_count <= 0)) {
1444 			rc = (sge_count == 0) ? -EINVAL : sge_count;
1445 			SPDK_ERRLOG("failed set RDMA sge, rc %d\n", rc);
1446 			return rc;
1447 		}
1448 		assert(remaining == 0);
1449 	}
1450 	if ((mlx5_task->num_completed_reqs + i + 1) == mlx5_task->num_reqs) {
1451 		/* Ensure that there is a free sge for the CRC destination. */
1452 		assert(sge_count < (int)ACCEL_MLX5_MAX_SGE);
1453 		/* Add the crc destination to the end of sges. */
1454 		sges[sge_count].lkey = mlx5_task->psv->crc_lkey;
1455 		sges[sge_count].addr = (uintptr_t)&mlx5_task->psv->crc;
1456 		sges[sge_count++].length = sizeof(uint32_t);
1457 	}
1458 	rdma_fence |= SPDK_MLX5_WQE_CTRL_CE_CQ_UPDATE;
1459 	rc = spdk_mlx5_qp_rdma_read(qp->qp, sges, sge_count, umr_offset,
1460 				    mlx5_task->mkeys[mlx5_task->last_mkey_idx]->mkey,
1461 				    (uint64_t)mlx5_task, rdma_fence);
1462 	if (spdk_unlikely(rc)) {
1463 		SPDK_ERRLOG("RDMA READ/WRITE failed with %d\n", rc);
1464 		return rc;
1465 	}
1466 	mlx5_task->num_submitted_reqs++;
1467 	ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED_SIGNALED(dev, qp, mlx5_task);
1468 	dev->stats.rdma_reads++;
1469 
1470 	return 0;
1471 }
1472 
1473 static inline int
1474 accel_mlx5_crc_task_process(struct accel_mlx5_task *mlx5_task)
1475 {
1476 	int rc;
1477 
1478 	assert(mlx5_task->mlx5_opcode == ACCEL_MLX5_OPC_CRC32C);
1479 
1480 	SPDK_DEBUGLOG(accel_mlx5, "begin, crc task, %p, reqs: total %u, submitted %u, completed %u\n",
1481 		      mlx5_task, mlx5_task->num_reqs, mlx5_task->num_submitted_reqs, mlx5_task->num_completed_reqs);
1482 
1483 	if (mlx5_task->num_reqs == 1) {
1484 		rc = accel_mlx5_crc_task_process_one_req(mlx5_task);
1485 	} else {
1486 		rc = accel_mlx5_crc_task_process_multi_req(mlx5_task);
1487 	}
1488 
1489 	if (rc == 0) {
1490 		STAILQ_INSERT_TAIL(&mlx5_task->qp->in_hw, mlx5_task, link);
1491 		SPDK_DEBUGLOG(accel_mlx5, "end, crc task, %p, reqs: total %u, submitted %u, completed %u\n",
1492 			      mlx5_task, mlx5_task->num_reqs, mlx5_task->num_submitted_reqs,
1493 			      mlx5_task->num_completed_reqs);
1494 	}
1495 
1496 	return rc;
1497 }
1498 
1499 static inline int
1500 accel_mlx5_task_alloc_crc_ctx(struct accel_mlx5_task *task, uint32_t qp_slot)
1501 {
1502 	struct accel_mlx5_qp *qp = task->qp;
1503 	struct accel_mlx5_dev *dev = qp->dev;
1504 
1505 	if (spdk_unlikely(!accel_mlx5_task_alloc_mkeys(task, dev->sig_mkeys))) {
1506 		SPDK_DEBUGLOG(accel_mlx5, "no mkeys in signature mkey pool, dev %s\n",
1507 			      dev->dev_ctx->context->device->name);
1508 		dev->stats.nomem_mkey++;
1509 		return -ENOMEM;
1510 	}
1511 	task->psv = spdk_mempool_get(dev->dev_ctx->psv_pool);
1512 	if (spdk_unlikely(!task->psv)) {
1513 		SPDK_DEBUGLOG(accel_mlx5, "no reqs in psv pool, dev %s\n", dev->dev_ctx->context->device->name);
1514 		spdk_mlx5_mkey_pool_put_bulk(dev->sig_mkeys, task->mkeys, task->num_ops);
1515 		task->num_ops = 0;
1516 		dev->stats.nomem_mkey++;
1517 		return -ENOMEM;
1518 	}
1519 	/* One extra slot is needed for SET_PSV WQE to reset the error state in PSV. */
1520 	if (spdk_unlikely(task->psv->bits.error)) {
1521 		uint32_t n_slots = task->num_ops * 2 + 1;
1522 
1523 		if (qp_slot < n_slots) {
1524 			spdk_mempool_put(dev->dev_ctx->psv_pool, task->psv);
1525 			spdk_mlx5_mkey_pool_put_bulk(dev->sig_mkeys, task->mkeys, task->num_ops);
1526 			dev->stats.nomem_qdepth++;
1527 			task->num_ops = 0;
1528 			return -ENOMEM;
1529 		}
1530 	}
1531 
1532 	return 0;
1533 }
1534 
1535 static inline int
1536 accel_mlx5_crc_task_continue(struct accel_mlx5_task *task)
1537 {
1538 	struct accel_mlx5_qp *qp = task->qp;
1539 	struct accel_mlx5_dev *dev = qp->dev;
1540 	uint16_t qp_slot = accel_mlx5_dev_get_available_slots(dev, qp);
1541 	int rc;
1542 
1543 	assert(task->num_reqs > task->num_completed_reqs);
1544 	if (task->num_ops == 0) {
1545 		/* No mkeys allocated, try to allocate now. */
1546 		rc = accel_mlx5_task_alloc_crc_ctx(task, qp_slot);
1547 		if (spdk_unlikely(rc)) {
1548 			STAILQ_INSERT_TAIL(&dev->nomem, task, link);
1549 			return -ENOMEM;
1550 		}
1551 	}
1552 	/* We need to post at least 1 UMR and 1 RDMA operation */
1553 	if (spdk_unlikely(qp_slot < 2)) {
1554 		STAILQ_INSERT_TAIL(&dev->nomem, task, link);
1555 		dev->stats.nomem_qdepth++;
1556 		return -ENOMEM;
1557 	}
1558 
1559 	return accel_mlx5_crc_task_process(task);
1560 }
1561 
1562 static inline uint32_t
1563 accel_mlx5_get_crc_task_count(struct iovec *src_iov, uint32_t src_iovcnt, struct iovec *dst_iov,
1564 			      uint32_t dst_iovcnt)
1565 {
1566 	uint32_t src_idx = 0;
1567 	uint32_t dst_idx = 0;
1568 	uint32_t num_ops = 1;
1569 	uint32_t num_src_sge = 1;
1570 	uint32_t num_dst_sge = 1;
1571 	size_t src_offset = 0;
1572 	size_t dst_offset = 0;
1573 	uint32_t num_sge;
1574 	size_t src_len;
1575 	size_t dst_len;
1576 
1577 	/* One operation is enough if both iovs fit into ACCEL_MLX5_MAX_SGE. One SGE is reserved for CRC on dst_iov. */
1578 	if (src_iovcnt <= ACCEL_MLX5_MAX_SGE && (dst_iovcnt + 1) <= ACCEL_MLX5_MAX_SGE) {
1579 		return 1;
1580 	}
1581 
1582 	while (src_idx < src_iovcnt && dst_idx < dst_iovcnt) {
1583 		if (num_src_sge > ACCEL_MLX5_MAX_SGE || num_dst_sge > ACCEL_MLX5_MAX_SGE) {
1584 			num_ops++;
1585 			num_src_sge = 1;
1586 			num_dst_sge = 1;
1587 		}
1588 		src_len = src_iov[src_idx].iov_len - src_offset;
1589 		dst_len = dst_iov[dst_idx].iov_len - dst_offset;
1590 
1591 		if (src_len == dst_len) {
1592 			num_src_sge++;
1593 			num_dst_sge++;
1594 			src_offset = 0;
1595 			dst_offset = 0;
1596 			src_idx++;
1597 			dst_idx++;
1598 			continue;
1599 		}
1600 		if (src_len < dst_len) {
1601 			/* Advance src_iov to reach the point that corresponds to the end of the current dst_iov. */
1602 			num_sge = accel_mlx5_advance_iovec(&src_iov[src_idx],
1603 							   spdk_min(ACCEL_MLX5_MAX_SGE + 1 - num_src_sge,
1604 									   src_iovcnt - src_idx),
1605 							   &src_offset, &dst_len);
1606 			src_idx += num_sge;
1607 			num_src_sge += num_sge;
1608 			if (dst_len != 0) {
1609 				/*
1610 				 * ACCEL_MLX5_MAX_SGE is reached on src_iov, and dst_len bytes
1611 				 * are left on the current dst_iov.
1612 				 */
1613 				dst_offset = dst_iov[dst_idx].iov_len - dst_len;
1614 			} else {
1615 				/* The src_iov advance is completed, shift to the next dst_iov. */
1616 				dst_idx++;
1617 				num_dst_sge++;
1618 				dst_offset = 0;
1619 			}
1620 		} else { /* src_len > dst_len */
1621 			/* Advance dst_iov to reach the point that corresponds to the end of the current src_iov. */
1622 			num_sge = accel_mlx5_advance_iovec(&dst_iov[dst_idx],
1623 							   spdk_min(ACCEL_MLX5_MAX_SGE + 1 - num_dst_sge,
1624 									   dst_iovcnt - dst_idx),
1625 							   &dst_offset, &src_len);
1626 			dst_idx += num_sge;
1627 			num_dst_sge += num_sge;
1628 			if (src_len != 0) {
1629 				/*
1630 				 * ACCEL_MLX5_MAX_SGE is reached on dst_iov, and src_len bytes
1631 				 * are left on the current src_iov.
1632 				 */
1633 				src_offset = src_iov[src_idx].iov_len - src_len;
1634 			} else {
1635 				/* The dst_iov advance is completed, shift to the next src_iov. */
1636 				src_idx++;
1637 				num_src_sge++;
1638 				src_offset = 0;
1639 			}
1640 		}
1641 	}
1642 	/* An extra operation is needed if no space is left on dst_iov because CRC takes one SGE. */
1643 	if (num_dst_sge > ACCEL_MLX5_MAX_SGE) {
1644 		num_ops++;
1645 	}
1646 
1647 	/* The above loop must reach the end of both iovs simultaneously because their size is the same. */
1648 	assert(src_idx == src_iovcnt);
1649 	assert(dst_idx == dst_iovcnt);
1650 	assert(src_offset == 0);
1651 	assert(dst_offset == 0);
1652 
1653 	return num_ops;
1654 }
1655 
1656 static inline int
1657 accel_mlx5_crc_task_init(struct accel_mlx5_task *mlx5_task)
1658 {
1659 	struct spdk_accel_task *task = &mlx5_task->base;
1660 	struct accel_mlx5_qp *qp = mlx5_task->qp;
1661 	uint32_t qp_slot = accel_mlx5_dev_get_available_slots(qp->dev, qp);
1662 	int rc;
1663 
1664 	accel_mlx5_iov_sgl_init(&mlx5_task->src, task->s.iovs, task->s.iovcnt);
1665 	if (mlx5_task->inplace) {
1666 		/* One entry is reserved for CRC */
1667 		mlx5_task->num_reqs = SPDK_CEIL_DIV(mlx5_task->src.iovcnt + 1, ACCEL_MLX5_MAX_SGE);
1668 	} else {
1669 		accel_mlx5_iov_sgl_init(&mlx5_task->dst, task->d.iovs, task->d.iovcnt);
1670 		mlx5_task->num_reqs = accel_mlx5_get_crc_task_count(mlx5_task->src.iov, mlx5_task->src.iovcnt,
1671 				      mlx5_task->dst.iov, mlx5_task->dst.iovcnt);
1672 	}
1673 
1674 	rc = accel_mlx5_task_alloc_crc_ctx(mlx5_task, qp_slot);
1675 	if (spdk_unlikely(rc)) {
1676 		return rc;
1677 	}
1678 
1679 	if (spdk_unlikely(qp_slot < 2)) {
1680 		/* Queue is full, queue this task */
1681 		SPDK_DEBUGLOG(accel_mlx5, "dev %s qp %p is full\n", qp->dev->dev_ctx->context->device->name,
1682 			      mlx5_task->qp);
1683 		qp->dev->stats.nomem_qdepth++;
1684 		return -ENOMEM;
1685 	}
1686 	return 0;
1687 }
1688 
1689 static inline int
1690 accel_mlx5_crypto_mkey_task_init(struct accel_mlx5_task *mlx5_task)
1691 {
1692 	struct spdk_accel_task *task = &mlx5_task->base;
1693 	struct accel_mlx5_qp *qp = mlx5_task->qp;
1694 	struct accel_mlx5_dev *dev = qp->dev;
1695 	uint32_t num_blocks;
1696 	int rc;
1697 	uint16_t qp_slot = accel_mlx5_dev_get_available_slots(dev, qp);
1698 	bool crypto_key_ok;
1699 
1700 	if (spdk_unlikely(task->s.iovcnt > ACCEL_MLX5_MAX_SGE)) {
1701 		/* With `external mkey` we can't split task or register several UMRs */
1702 		SPDK_ERRLOG("src buffer is too fragmented\n");
1703 		return -EINVAL;
1704 	}
1705 	if (spdk_unlikely(task->src_domain == spdk_accel_get_memory_domain())) {
1706 		SPDK_ERRLOG("accel domain is not supported\n");
1707 		return -ENOTSUP;
1708 	}
1709 	if (spdk_unlikely(spdk_accel_sequence_next_task(task) != NULL)) {
1710 		SPDK_ERRLOG("Mkey registration is only supported for single task\n");
1711 		return -ENOTSUP;
1712 	}
1713 
1714 	crypto_key_ok = (task->crypto_key && task->crypto_key->module_if == &g_accel_mlx5.module &&
1715 			 task->crypto_key->priv);
1716 	if (spdk_unlikely(!crypto_key_ok)) {
1717 		SPDK_ERRLOG("Wrong crypto key provided\n");
1718 		return -EINVAL;
1719 	}
1720 	if (spdk_unlikely(task->nbytes % mlx5_task->base.block_size != 0)) {
1721 		SPDK_ERRLOG("src length %"PRIu64" is not a multiple of the block size %u\n", task->nbytes,
1722 			    mlx5_task->base.block_size);
1723 		return -EINVAL;
1724 	}
1725 
1726 	num_blocks = task->nbytes / mlx5_task->base.block_size;
1727 	if (dev->crypto_multi_block) {
1728 		if (spdk_unlikely(g_accel_mlx5.attr.crypto_split_blocks &&
1729 				  num_blocks > g_accel_mlx5.attr.crypto_split_blocks)) {
1730 			SPDK_ERRLOG("Number of blocks in task %u exceeds split threshold %u, can't handle\n",
1731 				    num_blocks, g_accel_mlx5.attr.crypto_split_blocks);
1732 			return -E2BIG;
1733 		}
1734 	} else if (num_blocks != 1) {
1735 		SPDK_ERRLOG("Task contains more than 1 block, can't handle\n");
1736 		return -E2BIG;
1737 	}
1738 
1739 	accel_mlx5_iov_sgl_init(&mlx5_task->src, task->s.iovs, task->s.iovcnt);
1740 	mlx5_task->num_blocks = num_blocks;
1741 	mlx5_task->num_processed_blocks = 0;
1742 	mlx5_task->num_reqs = 1;
1743 	mlx5_task->blocks_per_req = num_blocks;
1744 
1745 	if (spdk_unlikely(qp_slot == 0)) {
1746 		mlx5_task->num_ops = 0;
1747 		dev->stats.nomem_qdepth++;
1748 		return -ENOMEM;
1749 	}
1750 	rc = spdk_mlx5_mkey_pool_get_bulk(dev->crypto_mkeys, mlx5_task->mkeys, 1);
1751 	if (spdk_unlikely(rc)) {
1752 		mlx5_task->num_ops = 0;
1753 		dev->stats.nomem_mkey++;
1754 		return -ENOMEM;
1755 	}
1756 	mlx5_task->num_ops = 1;
1757 
1758 	SPDK_DEBUGLOG(accel_mlx5, "crypto_mkey task num_blocks %u, src_len %zu\n", mlx5_task->num_reqs,
1759 		      task->nbytes);
1760 
1761 	return 0;
1762 }
1763 
1764 static inline int
1765 accel_mlx5_crypto_mkey_task_process(struct accel_mlx5_task *mlx5_task)
1766 {
1767 	struct accel_mlx5_sge sge;
1768 	struct spdk_accel_task *task = &mlx5_task->base;
1769 	struct accel_mlx5_qp *qp = mlx5_task->qp;
1770 	struct accel_mlx5_dev *dev = qp->dev;
1771 	struct spdk_mlx5_crypto_dek_data dek_data;
1772 	int rc;
1773 
1774 	if (spdk_unlikely(!mlx5_task->num_ops)) {
1775 		return -EINVAL;
1776 	}
1777 	SPDK_DEBUGLOG(accel_mlx5, "begin, task %p, dst_domain_ctx %p\n", mlx5_task, task->dst_domain_ctx);
1778 
1779 	mlx5_task->num_wrs = 0;
1780 	rc = spdk_mlx5_crypto_get_dek_data(task->crypto_key->priv, dev->dev_ctx->pd, &dek_data);
1781 	if (spdk_unlikely(rc)) {
1782 		return rc;
1783 	}
1784 
1785 	rc = accel_mlx5_configure_crypto_umr(mlx5_task, &sge, mlx5_task->mkeys[0]->mkey,
1786 					     mlx5_task->num_blocks, &dek_data, (uint64_t)mlx5_task, SPDK_MLX5_WQE_CTRL_CE_CQ_UPDATE);
1787 	if (spdk_unlikely(rc)) {
1788 		SPDK_ERRLOG("UMR configure failed with %d\n", rc);
1789 		return rc;
1790 	}
1791 	dev->stats.crypto_umrs++;
1792 	mlx5_task->num_submitted_reqs++;
1793 	ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED_SIGNALED(dev, qp, mlx5_task);
1794 	STAILQ_INSERT_TAIL(&qp->in_hw, mlx5_task, link);
1795 
1796 	SPDK_DEBUGLOG(accel_mlx5, "end, task %p, dst_domain_ctx %p\n", mlx5_task, task->dst_domain_ctx);
1797 
1798 	return 0;
1799 }
1800 
1801 static inline int
1802 accel_mlx5_crypto_mkey_task_continue(struct accel_mlx5_task *task)
1803 {
1804 	struct accel_mlx5_qp *qp = task->qp;
1805 	struct accel_mlx5_dev *dev = qp->dev;
1806 	int rc;
1807 	uint16_t qp_slot = accel_mlx5_dev_get_available_slots(dev, qp);
1808 
1809 	if (task->num_ops == 0) {
1810 		rc = spdk_mlx5_mkey_pool_get_bulk(dev->crypto_mkeys, task->mkeys, 1);
1811 		if (spdk_unlikely(rc)) {
1812 			dev->stats.nomem_mkey++;
1813 			STAILQ_INSERT_TAIL(&dev->nomem, task, link);
1814 			return -ENOMEM;
1815 		}
1816 		task->num_ops = 1;
1817 	}
1818 	if (spdk_unlikely(qp_slot == 0)) {
1819 		dev->stats.nomem_qdepth++;
1820 		STAILQ_INSERT_TAIL(&dev->nomem, task, link);
1821 		return -ENOMEM;
1822 	}
1823 	return accel_mlx5_crypto_mkey_task_process(task);
1824 }
1825 
1826 static inline void
1827 accel_mlx5_crypto_mkey_task_complete(struct accel_mlx5_task *mlx5_task)
1828 {
1829 	struct accel_mlx5_dev *dev = mlx5_task->qp->dev;
1830 
1831 	assert(mlx5_task->num_ops);
1832 	assert(mlx5_task->num_processed_blocks == mlx5_task->num_blocks);
1833 	assert(mlx5_task->base.seq);
1834 
1835 	spdk_mlx5_mkey_pool_put_bulk(dev->crypto_mkeys, mlx5_task->mkeys, 1);
1836 	spdk_accel_task_complete(&mlx5_task->base, 0);
1837 }
1838 
1839 static int
1840 accel_mlx5_task_op_not_implemented(struct accel_mlx5_task *mlx5_task)
1841 {
1842 	SPDK_ERRLOG("wrong function called\n");
1843 	SPDK_UNREACHABLE();
1844 }
1845 
1846 static void
1847 accel_mlx5_task_op_not_implemented_v(struct accel_mlx5_task *mlx5_task)
1848 {
1849 	SPDK_ERRLOG("wrong function called\n");
1850 	SPDK_UNREACHABLE();
1851 }
1852 
1853 static int
1854 accel_mlx5_task_op_not_supported(struct accel_mlx5_task *mlx5_task)
1855 {
1856 	SPDK_ERRLOG("Unsupported opcode %d\n", mlx5_task->base.op_code);
1857 
1858 	return -ENOTSUP;
1859 }
1860 
1861 static struct accel_mlx5_task_operations g_accel_mlx5_tasks_ops[] = {
1862 	[ACCEL_MLX5_OPC_COPY] = {
1863 		.init = accel_mlx5_copy_task_init,
1864 		.process = accel_mlx5_copy_task_process,
1865 		.cont = accel_mlx5_copy_task_continue,
1866 		.complete = accel_mlx5_copy_task_complete,
1867 	},
1868 	[ACCEL_MLX5_OPC_CRYPTO] = {
1869 		.init = accel_mlx5_crypto_task_init,
1870 		.process = accel_mlx5_crypto_task_process,
1871 		.cont = accel_mlx5_crypto_task_continue,
1872 		.complete = accel_mlx5_crypto_task_complete,
1873 	},
1874 	[ACCEL_MLX5_OPC_CRC32C] = {
1875 		.init = accel_mlx5_crc_task_init,
1876 		.process = accel_mlx5_crc_task_process,
1877 		.cont = accel_mlx5_crc_task_continue,
1878 		.complete = accel_mlx5_crc_task_complete,
1879 	},
1880 	[ACCEL_MLX5_OPC_CRYPTO_MKEY] = {
1881 		.init = accel_mlx5_crypto_mkey_task_init,
1882 		.process = accel_mlx5_crypto_mkey_task_process,
1883 		.cont = accel_mlx5_crypto_mkey_task_continue,
1884 		.complete = accel_mlx5_crypto_mkey_task_complete,
1885 	},
1886 	[ACCEL_MLX5_OPC_LAST] = {
1887 		.init = accel_mlx5_task_op_not_supported,
1888 		.process = accel_mlx5_task_op_not_implemented,
1889 		.cont = accel_mlx5_task_op_not_implemented,
1890 		.complete = accel_mlx5_task_op_not_implemented_v
1891 	},
1892 };
1893 
1894 static void
1895 accel_mlx5_memory_domain_transfer_cpl(void *ctx, int rc)
1896 {
1897 	struct accel_mlx5_task *task = ctx;
1898 
1899 	assert(task->needs_data_transfer);
1900 	task->needs_data_transfer = 0;
1901 
1902 	if (spdk_likely(!rc)) {
1903 		SPDK_DEBUGLOG(accel_mlx5, "task %p, data transfer done\n", task);
1904 		accel_mlx5_task_complete(task);
1905 	} else {
1906 		SPDK_ERRLOG("Task %p, data transfer failed, rc %d\n", task, rc);
1907 		accel_mlx5_task_fail(task, rc);
1908 	}
1909 }
1910 
1911 static inline void
1912 accel_mlx5_memory_domain_transfer(struct accel_mlx5_task *task)
1913 {
1914 	struct spdk_memory_domain_translation_result translation;
1915 	struct spdk_accel_task *base = &task->base;
1916 	struct accel_mlx5_dev *dev = task->qp->dev;
1917 	int rc;
1918 
1919 	assert(task->mlx5_opcode == ACCEL_MLX5_OPC_CRYPTO_MKEY);
1920 	/* UMR is an offset in the addess space, so the start address is 0 */
1921 	translation.iov.iov_base = NULL;
1922 	translation.iov.iov_len = base->nbytes;
1923 	translation.iov_count = 1;
1924 	translation.size = sizeof(translation);
1925 	translation.rdma.rkey = task->mkeys[0]->mkey;
1926 	translation.rdma.lkey = task->mkeys[0]->mkey;
1927 
1928 	SPDK_DEBUGLOG(accel_mlx5, "start transfer, task %p, dst_domain_ctx %p, mkey %u\n", task,
1929 		      task->base.dst_domain_ctx, task->mkeys[0]->mkey);
1930 	rc = spdk_memory_domain_transfer_data(base->dst_domain, base->dst_domain_ctx, &translation.iov, 1,
1931 					      dev->dev_ctx->domain, task, &translation.iov, 1, &translation,
1932 					      accel_mlx5_memory_domain_transfer_cpl, task);
1933 	if (spdk_unlikely(rc)) {
1934 		SPDK_ERRLOG("Failed to start data transfer, task %p rc %d\n", task, rc);
1935 		accel_mlx5_task_fail(task, rc);
1936 	}
1937 }
1938 
1939 static inline void
1940 accel_mlx5_task_complete(struct accel_mlx5_task *task)
1941 {
1942 	struct spdk_accel_sequence *seq = task->base.seq;
1943 	struct spdk_accel_task *next;
1944 	bool driver_seq;
1945 
1946 	if (task->needs_data_transfer) {
1947 		accel_mlx5_memory_domain_transfer(task);
1948 		return;
1949 	}
1950 
1951 	next = spdk_accel_sequence_next_task(&task->base);
1952 	driver_seq = task->driver_seq;
1953 
1954 	assert(task->num_reqs == task->num_completed_reqs);
1955 	SPDK_DEBUGLOG(accel_mlx5, "Complete task %p, opc %d\n", task, task->mlx5_opcode);
1956 
1957 	g_accel_mlx5_tasks_ops[task->mlx5_opcode].complete(task);
1958 
1959 	if (driver_seq) {
1960 		struct spdk_io_channel *ch = task->qp->dev->ch;
1961 
1962 		assert(seq);
1963 		if (next) {
1964 			accel_mlx5_execute_sequence(ch, seq);
1965 		} else {
1966 			spdk_accel_sequence_continue(seq);
1967 		}
1968 	}
1969 }
1970 
1971 static inline int
1972 accel_mlx5_task_continue(struct accel_mlx5_task *task)
1973 {
1974 	struct accel_mlx5_qp *qp = task->qp;
1975 	struct accel_mlx5_dev *dev = qp->dev;
1976 
1977 	if (spdk_unlikely(qp->recovering)) {
1978 		STAILQ_INSERT_TAIL(&dev->nomem, task, link);
1979 		return 0;
1980 	}
1981 
1982 	return g_accel_mlx5_tasks_ops[task->mlx5_opcode].cont(task);
1983 }
1984 static inline void
1985 accel_mlx5_task_init_opcode(struct accel_mlx5_task *mlx5_task)
1986 {
1987 	uint8_t base_opcode = mlx5_task->base.op_code;
1988 
1989 	switch (base_opcode) {
1990 	case SPDK_ACCEL_OPC_COPY:
1991 		mlx5_task->mlx5_opcode = ACCEL_MLX5_OPC_COPY;
1992 		break;
1993 	case SPDK_ACCEL_OPC_ENCRYPT:
1994 		assert(g_accel_mlx5.crypto_supported);
1995 		mlx5_task->enc_order = SPDK_MLX5_ENCRYPTION_ORDER_ENCRYPTED_RAW_WIRE;
1996 		mlx5_task->mlx5_opcode =  ACCEL_MLX5_OPC_CRYPTO;
1997 		break;
1998 	case SPDK_ACCEL_OPC_DECRYPT:
1999 		assert(g_accel_mlx5.crypto_supported);
2000 		mlx5_task->enc_order = SPDK_MLX5_ENCRYPTION_ORDER_ENCRYPTED_RAW_MEMORY;
2001 		mlx5_task->mlx5_opcode = ACCEL_MLX5_OPC_CRYPTO;
2002 		break;
2003 	case SPDK_ACCEL_OPC_CRC32C:
2004 		mlx5_task->inplace = 1;
2005 		mlx5_task->mlx5_opcode = ACCEL_MLX5_OPC_CRC32C;
2006 		break;
2007 	case SPDK_ACCEL_OPC_COPY_CRC32C:
2008 		mlx5_task->inplace = 0;
2009 		mlx5_task->mlx5_opcode = ACCEL_MLX5_OPC_CRC32C;
2010 		break;
2011 	default:
2012 		SPDK_ERRLOG("wrong opcode %d\n", base_opcode);
2013 		mlx5_task->mlx5_opcode = ACCEL_MLX5_OPC_LAST;
2014 	}
2015 }
2016 
2017 static inline int
2018 _accel_mlx5_submit_tasks(struct accel_mlx5_io_channel *accel_ch, struct spdk_accel_task *task)
2019 {
2020 	struct accel_mlx5_task *mlx5_task = SPDK_CONTAINEROF(task, struct accel_mlx5_task, base);
2021 	struct accel_mlx5_dev *dev = mlx5_task->qp->dev;
2022 	int rc;
2023 
2024 	/* We should not receive any tasks if the module was not enabled */
2025 	assert(g_accel_mlx5.enabled);
2026 
2027 	dev->stats.opcodes[mlx5_task->mlx5_opcode]++;
2028 	rc = g_accel_mlx5_tasks_ops[mlx5_task->mlx5_opcode].init(mlx5_task);
2029 	if (spdk_unlikely(rc)) {
2030 		if (rc == -ENOMEM) {
2031 			SPDK_DEBUGLOG(accel_mlx5, "no reqs to handle new task %p (required %u), put to queue\n", mlx5_task,
2032 				      mlx5_task->num_reqs);
2033 			STAILQ_INSERT_TAIL(&dev->nomem, mlx5_task, link);
2034 			return 0;
2035 		}
2036 		SPDK_ERRLOG("Task opc %d init failed, rc %d\n", task->op_code, rc);
2037 		return rc;
2038 	}
2039 
2040 	if (spdk_unlikely(mlx5_task->qp->recovering)) {
2041 		STAILQ_INSERT_TAIL(&dev->nomem, mlx5_task, link);
2042 		return 0;
2043 	}
2044 
2045 	return g_accel_mlx5_tasks_ops[mlx5_task->mlx5_opcode].process(mlx5_task);
2046 }
2047 
2048 static inline void
2049 accel_mlx5_task_assign_qp(struct accel_mlx5_task *mlx5_task, struct accel_mlx5_io_channel *accel_ch)
2050 {
2051 	struct accel_mlx5_dev *dev;
2052 
2053 	dev = &accel_ch->devs[accel_ch->dev_idx];
2054 	accel_ch->dev_idx++;
2055 	if (accel_ch->dev_idx == accel_ch->num_devs) {
2056 		accel_ch->dev_idx = 0;
2057 	}
2058 
2059 	mlx5_task->qp = &dev->qp;
2060 }
2061 
2062 static inline void
2063 accel_mlx5_task_reset(struct accel_mlx5_task *mlx5_task)
2064 {
2065 	mlx5_task->num_completed_reqs = 0;
2066 	mlx5_task->num_submitted_reqs = 0;
2067 	mlx5_task->num_ops = 0;
2068 	mlx5_task->num_processed_blocks = 0;
2069 	mlx5_task->raw = 0;
2070 }
2071 
2072 static int
2073 accel_mlx5_submit_tasks(struct spdk_io_channel *ch, struct spdk_accel_task *task)
2074 {
2075 	struct accel_mlx5_task *mlx5_task = SPDK_CONTAINEROF(task, struct accel_mlx5_task, base);
2076 	struct accel_mlx5_io_channel *accel_ch = spdk_io_channel_get_ctx(ch);
2077 
2078 	accel_mlx5_task_assign_qp(mlx5_task, accel_ch);
2079 	accel_mlx5_task_reset(mlx5_task);
2080 	accel_mlx5_task_init_opcode(mlx5_task);
2081 
2082 	return _accel_mlx5_submit_tasks(accel_ch, task);
2083 }
2084 
2085 static void accel_mlx5_recover_qp(struct accel_mlx5_qp *qp);
2086 
2087 static int
2088 accel_mlx5_recover_qp_poller(void *arg)
2089 {
2090 	struct accel_mlx5_qp *qp = arg;
2091 
2092 	spdk_poller_unregister(&qp->recover_poller);
2093 	accel_mlx5_recover_qp(qp);
2094 	return SPDK_POLLER_BUSY;
2095 }
2096 
2097 static void
2098 accel_mlx5_recover_qp(struct accel_mlx5_qp *qp)
2099 {
2100 	struct accel_mlx5_dev *dev = qp->dev;
2101 	struct spdk_mlx5_qp_attr mlx5_qp_attr = {};
2102 	int rc;
2103 
2104 	SPDK_NOTICELOG("Recovering qp %p, core %u\n", qp, spdk_env_get_current_core());
2105 	if (qp->qp) {
2106 		spdk_mlx5_qp_destroy(qp->qp);
2107 		qp->qp = NULL;
2108 	}
2109 
2110 	mlx5_qp_attr.cap.max_send_wr = g_accel_mlx5.attr.qp_size;
2111 	mlx5_qp_attr.cap.max_recv_wr = 0;
2112 	mlx5_qp_attr.cap.max_send_sge = ACCEL_MLX5_MAX_SGE;
2113 	mlx5_qp_attr.cap.max_inline_data = sizeof(struct ibv_sge) * ACCEL_MLX5_MAX_SGE;
2114 
2115 	rc = spdk_mlx5_qp_create(dev->dev_ctx->pd, dev->cq, &mlx5_qp_attr, &qp->qp);
2116 	if (rc) {
2117 		SPDK_ERRLOG("Failed to create mlx5 dma QP, rc %d. Retry in %d usec\n",
2118 			    rc, ACCEL_MLX5_RECOVER_POLLER_PERIOD_US);
2119 		qp->recover_poller = SPDK_POLLER_REGISTER(accel_mlx5_recover_qp_poller, qp,
2120 				     ACCEL_MLX5_RECOVER_POLLER_PERIOD_US);
2121 		return;
2122 	}
2123 
2124 	qp->recovering = false;
2125 }
2126 
2127 static inline void
2128 accel_mlx5_process_error_cpl(struct spdk_mlx5_cq_completion *wc, struct accel_mlx5_task *task)
2129 {
2130 	struct accel_mlx5_qp *qp = task->qp;
2131 
2132 	if (wc->status != IBV_WC_WR_FLUSH_ERR) {
2133 		SPDK_WARNLOG("RDMA: qp %p, task %p, WC status %d, core %u\n",
2134 			     qp, task, wc->status, spdk_env_get_current_core());
2135 	} else {
2136 		SPDK_DEBUGLOG(accel_mlx5,
2137 			      "RDMA: qp %p, task %p, WC status %d, core %u\n",
2138 			      qp, task, wc->status, spdk_env_get_current_core());
2139 	}
2140 
2141 	qp->recovering = true;
2142 	assert(task->num_completed_reqs <= task->num_submitted_reqs);
2143 	if (task->num_completed_reqs == task->num_submitted_reqs) {
2144 		STAILQ_REMOVE_HEAD(&qp->in_hw, link);
2145 		accel_mlx5_task_fail(task, -EIO);
2146 	}
2147 }
2148 
2149 static inline int64_t
2150 accel_mlx5_poll_cq(struct accel_mlx5_dev *dev)
2151 {
2152 	struct spdk_mlx5_cq_completion wc[ACCEL_MLX5_MAX_WC];
2153 	struct accel_mlx5_task *task;
2154 	struct accel_mlx5_qp *qp;
2155 	int reaped, i, rc;
2156 	uint16_t completed;
2157 
2158 	dev->stats.polls++;
2159 	reaped = spdk_mlx5_cq_poll_completions(dev->cq, wc, ACCEL_MLX5_MAX_WC);
2160 	if (spdk_unlikely(reaped < 0)) {
2161 		SPDK_ERRLOG("Error polling CQ! (%d): %s\n", errno, spdk_strerror(errno));
2162 		return reaped;
2163 	} else if (reaped == 0) {
2164 		dev->stats.idle_polls++;
2165 		return 0;
2166 	}
2167 	dev->stats.completions += reaped;
2168 
2169 	SPDK_DEBUGLOG(accel_mlx5, "Reaped %d cpls on dev %s\n", reaped,
2170 		      dev->dev_ctx->context->device->name);
2171 
2172 	for (i = 0; i < reaped; i++) {
2173 		if (spdk_unlikely(!wc[i].wr_id)) {
2174 			/* Unsignaled completion with error, ignore */
2175 			continue;
2176 		}
2177 		task = (struct accel_mlx5_task *)wc[i].wr_id;
2178 		qp = task->qp;
2179 		assert(task == STAILQ_FIRST(&qp->in_hw) && "submission mismatch");
2180 		assert(task->num_submitted_reqs > task->num_completed_reqs);
2181 		completed = task->num_submitted_reqs - task->num_completed_reqs;
2182 		assert((uint32_t)task->num_completed_reqs + completed <= UINT16_MAX);
2183 		task->num_completed_reqs += completed;
2184 		assert(qp->wrs_submitted >= task->num_wrs);
2185 		qp->wrs_submitted -= task->num_wrs;
2186 		assert(dev->wrs_in_cq > 0);
2187 		dev->wrs_in_cq--;
2188 
2189 		if (spdk_unlikely(wc[i].status)) {
2190 			accel_mlx5_process_error_cpl(&wc[i], task);
2191 			if (qp->wrs_submitted == 0) {
2192 				assert(STAILQ_EMPTY(&qp->in_hw));
2193 				accel_mlx5_recover_qp(qp);
2194 			}
2195 			continue;
2196 		}
2197 
2198 		SPDK_DEBUGLOG(accel_mlx5, "task %p, remaining %u\n", task,
2199 			      task->num_reqs - task->num_completed_reqs);
2200 		if (task->num_completed_reqs == task->num_reqs) {
2201 			STAILQ_REMOVE_HEAD(&qp->in_hw, link);
2202 			accel_mlx5_task_complete(task);
2203 		} else {
2204 			assert(task->num_submitted_reqs < task->num_reqs);
2205 			assert(task->num_completed_reqs == task->num_submitted_reqs);
2206 			STAILQ_REMOVE_HEAD(&qp->in_hw, link);
2207 			rc = accel_mlx5_task_continue(task);
2208 			if (spdk_unlikely(rc)) {
2209 				if (rc != -ENOMEM) {
2210 					accel_mlx5_task_fail(task, rc);
2211 				}
2212 			}
2213 		}
2214 	}
2215 
2216 	return reaped;
2217 }
2218 
2219 static inline void
2220 accel_mlx5_resubmit_nomem_tasks(struct accel_mlx5_dev *dev)
2221 {
2222 	struct accel_mlx5_task *task, *tmp, *last;
2223 	int rc;
2224 
2225 	last = STAILQ_LAST(&dev->nomem, accel_mlx5_task, link);
2226 	STAILQ_FOREACH_SAFE(task, &dev->nomem, link, tmp) {
2227 		STAILQ_REMOVE_HEAD(&dev->nomem, link);
2228 		rc = accel_mlx5_task_continue(task);
2229 		if (spdk_unlikely(rc)) {
2230 			if (rc != -ENOMEM) {
2231 				accel_mlx5_task_fail(task, rc);
2232 			}
2233 			break;
2234 		}
2235 		/* If qpair is recovering, task is added back to the nomem list and 0 is returned. In that case we
2236 		 * need a special condition to iterate the list once and stop this FOREACH loop */
2237 		if (task == last) {
2238 			break;
2239 		}
2240 	}
2241 }
2242 
2243 static int
2244 accel_mlx5_poller(void *ctx)
2245 {
2246 	struct accel_mlx5_io_channel *ch = ctx;
2247 	struct accel_mlx5_dev *dev;
2248 
2249 	int64_t completions = 0, rc;
2250 	uint32_t i;
2251 
2252 	for (i = 0; i < ch->num_devs; i++) {
2253 		dev = &ch->devs[i];
2254 		if (dev->wrs_in_cq) {
2255 			rc = accel_mlx5_poll_cq(dev);
2256 			if (spdk_unlikely(rc < 0)) {
2257 				SPDK_ERRLOG("Error %"PRId64" on CQ, dev %s\n", rc, dev->dev_ctx->context->device->name);
2258 			}
2259 			completions += rc;
2260 			if (dev->qp.wrs_submitted) {
2261 				spdk_mlx5_qp_complete_send(dev->qp.qp);
2262 			}
2263 		}
2264 		if (!STAILQ_EMPTY(&dev->nomem)) {
2265 			accel_mlx5_resubmit_nomem_tasks(dev);
2266 		}
2267 	}
2268 
2269 	return !!completions;
2270 }
2271 
2272 static bool
2273 accel_mlx5_supports_opcode(enum spdk_accel_opcode opc)
2274 {
2275 	assert(g_accel_mlx5.enabled);
2276 
2277 	switch (opc) {
2278 	case SPDK_ACCEL_OPC_COPY:
2279 		return true;
2280 	case SPDK_ACCEL_OPC_ENCRYPT:
2281 	case SPDK_ACCEL_OPC_DECRYPT:
2282 		return g_accel_mlx5.crypto_supported;
2283 	case SPDK_ACCEL_OPC_CRC32C:
2284 	case SPDK_ACCEL_OPC_COPY_CRC32C:
2285 		return g_accel_mlx5.crc32c_supported;
2286 	default:
2287 		return false;
2288 	}
2289 }
2290 
2291 static struct spdk_io_channel *
2292 accel_mlx5_get_io_channel(void)
2293 {
2294 	assert(g_accel_mlx5.enabled);
2295 	return spdk_get_io_channel(&g_accel_mlx5);
2296 }
2297 
2298 static int
2299 accel_mlx5_create_qp(struct accel_mlx5_dev *dev, struct accel_mlx5_qp *qp)
2300 {
2301 	struct spdk_mlx5_qp_attr mlx5_qp_attr = {};
2302 	int rc;
2303 
2304 	mlx5_qp_attr.cap.max_send_wr = g_accel_mlx5.attr.qp_size;
2305 	mlx5_qp_attr.cap.max_recv_wr = 0;
2306 	mlx5_qp_attr.cap.max_send_sge = ACCEL_MLX5_MAX_SGE;
2307 	mlx5_qp_attr.cap.max_inline_data = sizeof(struct ibv_sge) * ACCEL_MLX5_MAX_SGE;
2308 
2309 	rc = spdk_mlx5_qp_create(dev->dev_ctx->pd, dev->cq, &mlx5_qp_attr, &qp->qp);
2310 	if (rc) {
2311 		return rc;
2312 	}
2313 
2314 	STAILQ_INIT(&qp->in_hw);
2315 	qp->dev = dev;
2316 	qp->verbs_qp = spdk_mlx5_qp_get_verbs_qp(qp->qp);
2317 	assert(qp->verbs_qp);
2318 	qp->wrs_max = g_accel_mlx5.attr.qp_size;
2319 
2320 	return 0;
2321 }
2322 
2323 static void
2324 accel_mlx5_add_stats(struct accel_mlx5_stats *stats, const struct accel_mlx5_stats *to_add)
2325 {
2326 	int i;
2327 
2328 	stats->crypto_umrs += to_add->crypto_umrs;
2329 	stats->sig_umrs += to_add->sig_umrs;
2330 	stats->rdma_reads += to_add->rdma_reads;
2331 	stats->rdma_writes += to_add->rdma_writes;
2332 	stats->polls += to_add->polls;
2333 	stats->idle_polls += to_add->idle_polls;
2334 	stats->completions += to_add->completions;
2335 	stats->nomem_qdepth += to_add->nomem_qdepth;
2336 	stats->nomem_mkey += to_add->nomem_mkey;
2337 	for (i = 0; i < ACCEL_MLX5_OPC_LAST; i++) {
2338 		stats->opcodes[i] += to_add->opcodes[i];
2339 	}
2340 }
2341 
2342 static void
2343 accel_mlx5_destroy_cb(void *io_device, void *ctx_buf)
2344 {
2345 	struct accel_mlx5_io_channel *ch = ctx_buf;
2346 	struct accel_mlx5_dev *dev;
2347 	uint32_t i;
2348 
2349 	spdk_poller_unregister(&ch->poller);
2350 	for (i = 0; i < ch->num_devs; i++) {
2351 		dev = &ch->devs[i];
2352 		spdk_mlx5_qp_destroy(dev->qp.qp);
2353 		if (dev->cq) {
2354 			spdk_mlx5_cq_destroy(dev->cq);
2355 		}
2356 		spdk_poller_unregister(&dev->qp.recover_poller);
2357 		if (dev->crypto_mkeys) {
2358 			spdk_mlx5_mkey_pool_put_ref(dev->crypto_mkeys);
2359 		}
2360 		if (dev->sig_mkeys) {
2361 			spdk_mlx5_mkey_pool_put_ref(dev->sig_mkeys);
2362 		}
2363 		spdk_rdma_utils_free_mem_map(&dev->mmap);
2364 		spdk_spin_lock(&g_accel_mlx5.lock);
2365 		accel_mlx5_add_stats(&g_accel_mlx5.stats, &dev->stats);
2366 		spdk_spin_unlock(&g_accel_mlx5.lock);
2367 	}
2368 	free(ch->devs);
2369 }
2370 
2371 static int
2372 accel_mlx5_create_cb(void *io_device, void *ctx_buf)
2373 {
2374 	struct spdk_mlx5_cq_attr cq_attr = {};
2375 	struct accel_mlx5_io_channel *ch = ctx_buf;
2376 	struct accel_mlx5_dev_ctx *dev_ctx;
2377 	struct accel_mlx5_dev *dev;
2378 	uint32_t i;
2379 	int rc;
2380 
2381 	ch->devs = calloc(g_accel_mlx5.num_ctxs, sizeof(*ch->devs));
2382 	if (!ch->devs) {
2383 		SPDK_ERRLOG("Memory allocation failed\n");
2384 		return -ENOMEM;
2385 	}
2386 
2387 	for (i = 0; i < g_accel_mlx5.num_ctxs; i++) {
2388 		dev_ctx = &g_accel_mlx5.dev_ctxs[i];
2389 		dev = &ch->devs[i];
2390 		dev->dev_ctx = dev_ctx;
2391 
2392 		if (dev_ctx->crypto_mkeys) {
2393 			dev->crypto_mkeys = spdk_mlx5_mkey_pool_get_ref(dev_ctx->pd, SPDK_MLX5_MKEY_POOL_FLAG_CRYPTO);
2394 			if (!dev->crypto_mkeys) {
2395 				SPDK_ERRLOG("Failed to get crypto mkey pool channel, dev %s\n", dev_ctx->context->device->name);
2396 				/* Should not happen since mkey pool is created on accel_mlx5 initialization.
2397 				 * We should not be here if pool creation failed */
2398 				assert(0);
2399 				goto err_out;
2400 			}
2401 		}
2402 		if (dev_ctx->sig_mkeys) {
2403 			dev->sig_mkeys = spdk_mlx5_mkey_pool_get_ref(dev_ctx->pd, SPDK_MLX5_MKEY_POOL_FLAG_SIGNATURE);
2404 			if (!dev->sig_mkeys) {
2405 				SPDK_ERRLOG("Failed to get sig mkey pool channel, dev %s\n", dev_ctx->context->device->name);
2406 				/* Should not happen since mkey pool is created on accel_mlx5 initialization.
2407 				 * We should not be here if pool creation failed */
2408 				assert(0);
2409 				goto err_out;
2410 			}
2411 		}
2412 
2413 		memset(&cq_attr, 0, sizeof(cq_attr));
2414 		cq_attr.cqe_cnt = g_accel_mlx5.attr.qp_size;
2415 		cq_attr.cqe_size = 64;
2416 		cq_attr.cq_context = dev;
2417 
2418 		ch->num_devs++;
2419 		rc = spdk_mlx5_cq_create(dev_ctx->pd, &cq_attr, &dev->cq);
2420 		if (rc) {
2421 			SPDK_ERRLOG("Failed to create mlx5 CQ, rc %d\n", rc);
2422 			goto err_out;
2423 		}
2424 
2425 		rc = accel_mlx5_create_qp(dev, &dev->qp);
2426 		if (rc) {
2427 			SPDK_ERRLOG("Failed to create mlx5 QP, rc %d\n", rc);
2428 			goto err_out;
2429 		}
2430 
2431 		dev->mmap = spdk_rdma_utils_create_mem_map(dev_ctx->pd, NULL,
2432 				IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_READ | IBV_ACCESS_REMOTE_WRITE);
2433 		if (!dev->mmap) {
2434 			SPDK_ERRLOG("Failed to create memory map\n");
2435 			rc = -ENOMEM;
2436 			goto err_out;
2437 		}
2438 		dev->crypto_multi_block = dev_ctx->crypto_multi_block;
2439 		dev->crypto_split_blocks = dev_ctx->crypto_multi_block ? g_accel_mlx5.attr.crypto_split_blocks : 0;
2440 		dev->wrs_in_cq_max = g_accel_mlx5.attr.qp_size;
2441 		dev->ch = spdk_io_channel_from_ctx(ctx_buf);
2442 		STAILQ_INIT(&dev->nomem);
2443 	}
2444 
2445 	ch->poller = SPDK_POLLER_REGISTER(accel_mlx5_poller, ch, 0);
2446 
2447 	return 0;
2448 
2449 err_out:
2450 	accel_mlx5_destroy_cb(&g_accel_mlx5, ctx_buf);
2451 	return rc;
2452 }
2453 
2454 void
2455 accel_mlx5_get_default_attr(struct accel_mlx5_attr *attr)
2456 {
2457 	assert(attr);
2458 
2459 	attr->qp_size = ACCEL_MLX5_QP_SIZE;
2460 	attr->num_requests = ACCEL_MLX5_NUM_REQUESTS;
2461 	attr->allowed_devs = NULL;
2462 	attr->crypto_split_blocks = 0;
2463 	attr->enable_driver = false;
2464 }
2465 
2466 static void
2467 accel_mlx5_allowed_devs_free(void)
2468 {
2469 	size_t i;
2470 
2471 	if (!g_accel_mlx5.allowed_devs) {
2472 		return;
2473 	}
2474 
2475 	for (i = 0; i < g_accel_mlx5.allowed_devs_count; i++) {
2476 		free(g_accel_mlx5.allowed_devs[i]);
2477 	}
2478 	free(g_accel_mlx5.attr.allowed_devs);
2479 	free(g_accel_mlx5.allowed_devs);
2480 	g_accel_mlx5.attr.allowed_devs = NULL;
2481 	g_accel_mlx5.allowed_devs = NULL;
2482 	g_accel_mlx5.allowed_devs_count = 0;
2483 }
2484 
2485 static int
2486 accel_mlx5_allowed_devs_parse(const char *allowed_devs)
2487 {
2488 	char *str, *tmp, *tok, *sp = NULL;
2489 	size_t devs_count = 0;
2490 
2491 	str = strdup(allowed_devs);
2492 	if (!str) {
2493 		return -ENOMEM;
2494 	}
2495 
2496 	accel_mlx5_allowed_devs_free();
2497 
2498 	tmp = str;
2499 	while ((tmp = strchr(tmp, ',')) != NULL) {
2500 		tmp++;
2501 		devs_count++;
2502 	}
2503 	devs_count++;
2504 
2505 	g_accel_mlx5.allowed_devs = calloc(devs_count, sizeof(char *));
2506 	if (!g_accel_mlx5.allowed_devs) {
2507 		free(str);
2508 		return -ENOMEM;
2509 	}
2510 
2511 	devs_count = 0;
2512 	tok = strtok_r(str, ",", &sp);
2513 	while (tok) {
2514 		g_accel_mlx5.allowed_devs[devs_count] = strdup(tok);
2515 		if (!g_accel_mlx5.allowed_devs[devs_count]) {
2516 			free(str);
2517 			accel_mlx5_allowed_devs_free();
2518 			return -ENOMEM;
2519 		}
2520 		tok = strtok_r(NULL, ",", &sp);
2521 		devs_count++;
2522 		g_accel_mlx5.allowed_devs_count++;
2523 	}
2524 
2525 	free(str);
2526 
2527 	return 0;
2528 }
2529 
2530 int
2531 accel_mlx5_enable(struct accel_mlx5_attr *attr)
2532 {
2533 	int rc;
2534 
2535 	if (g_accel_mlx5.enabled) {
2536 		return -EEXIST;
2537 	}
2538 	if (attr) {
2539 		if (attr->num_requests / spdk_env_get_core_count() < ACCEL_MLX5_MAX_MKEYS_IN_TASK) {
2540 			SPDK_ERRLOG("num requests per core must not be less than %u, current value %u\n",
2541 				    ACCEL_MLX5_MAX_MKEYS_IN_TASK, attr->num_requests / spdk_env_get_core_count());
2542 			return -EINVAL;
2543 		}
2544 		if (attr->qp_size < 8) {
2545 			SPDK_ERRLOG("qp_size must be at least 8\n");
2546 			return -EINVAL;
2547 		}
2548 		g_accel_mlx5.attr = *attr;
2549 		g_accel_mlx5.attr.allowed_devs = NULL;
2550 
2551 		if (attr->allowed_devs) {
2552 			/* Contains a copy of user's string */
2553 			g_accel_mlx5.attr.allowed_devs = strndup(attr->allowed_devs, ACCEL_MLX5_ALLOWED_DEVS_MAX_LEN);
2554 			if (!g_accel_mlx5.attr.allowed_devs) {
2555 				return -ENOMEM;
2556 			}
2557 			rc = accel_mlx5_allowed_devs_parse(g_accel_mlx5.attr.allowed_devs);
2558 			if (rc) {
2559 				return rc;
2560 			}
2561 			rc = spdk_mlx5_crypto_devs_allow((const char *const *)g_accel_mlx5.allowed_devs,
2562 							 g_accel_mlx5.allowed_devs_count);
2563 			if (rc) {
2564 				accel_mlx5_allowed_devs_free();
2565 				return rc;
2566 			}
2567 		}
2568 	} else {
2569 		accel_mlx5_get_default_attr(&g_accel_mlx5.attr);
2570 	}
2571 
2572 	g_accel_mlx5.enabled = true;
2573 	spdk_accel_module_list_add(&g_accel_mlx5.module);
2574 
2575 	return 0;
2576 }
2577 
2578 static void
2579 accel_mlx5_psvs_release(struct accel_mlx5_dev_ctx *dev_ctx)
2580 {
2581 	uint32_t i, num_psvs, num_psvs_in_pool;
2582 
2583 	if (!dev_ctx->psvs) {
2584 		return;
2585 	}
2586 
2587 	num_psvs = g_accel_mlx5.attr.num_requests;
2588 
2589 	for (i = 0; i < num_psvs; i++) {
2590 		if (dev_ctx->psvs[i]) {
2591 			spdk_mlx5_destroy_psv(dev_ctx->psvs[i]);
2592 			dev_ctx->psvs[i] = NULL;
2593 		}
2594 	}
2595 	free(dev_ctx->psvs);
2596 
2597 	if (!dev_ctx->psv_pool) {
2598 		return;
2599 	}
2600 	num_psvs_in_pool = spdk_mempool_count(dev_ctx->psv_pool);
2601 	if (num_psvs_in_pool != num_psvs) {
2602 		SPDK_ERRLOG("Expected %u reqs in the pool, but got only %u\n", num_psvs, num_psvs_in_pool);
2603 	}
2604 	spdk_mempool_free(dev_ctx->psv_pool);
2605 }
2606 
2607 static void
2608 accel_mlx5_free_resources(void)
2609 {
2610 	struct accel_mlx5_dev_ctx *dev_ctx;
2611 	uint32_t i;
2612 
2613 	for (i = 0; i < g_accel_mlx5.num_ctxs; i++) {
2614 		dev_ctx = &g_accel_mlx5.dev_ctxs[i];
2615 		accel_mlx5_psvs_release(dev_ctx);
2616 		if (dev_ctx->pd) {
2617 			if (dev_ctx->crypto_mkeys) {
2618 				spdk_mlx5_mkey_pool_destroy(SPDK_MLX5_MKEY_POOL_FLAG_CRYPTO, dev_ctx->pd);
2619 			}
2620 			if (dev_ctx->sig_mkeys) {
2621 				spdk_mlx5_mkey_pool_destroy(SPDK_MLX5_MKEY_POOL_FLAG_SIGNATURE, dev_ctx->pd);
2622 			}
2623 			spdk_rdma_utils_put_pd(dev_ctx->pd);
2624 		}
2625 		if (dev_ctx->domain) {
2626 			spdk_rdma_utils_put_memory_domain(dev_ctx->domain);
2627 		}
2628 	}
2629 
2630 	free(g_accel_mlx5.dev_ctxs);
2631 	g_accel_mlx5.dev_ctxs = NULL;
2632 	g_accel_mlx5.initialized = false;
2633 }
2634 
2635 static void
2636 accel_mlx5_deinit_cb(void *ctx)
2637 {
2638 	accel_mlx5_free_resources();
2639 	spdk_spin_destroy(&g_accel_mlx5.lock);
2640 	spdk_mlx5_umr_implementer_register(false);
2641 	spdk_accel_module_finish();
2642 }
2643 
2644 static void
2645 accel_mlx5_deinit(void *ctx)
2646 {
2647 	if (g_accel_mlx5.allowed_devs) {
2648 		accel_mlx5_allowed_devs_free();
2649 	}
2650 	spdk_mlx5_crypto_devs_allow(NULL, 0);
2651 	if (g_accel_mlx5.initialized) {
2652 		spdk_io_device_unregister(&g_accel_mlx5, accel_mlx5_deinit_cb);
2653 	} else {
2654 		spdk_accel_module_finish();
2655 	}
2656 }
2657 
2658 static int
2659 accel_mlx5_mkeys_create(struct ibv_pd *pd, uint32_t num_mkeys, uint32_t flags)
2660 {
2661 	struct spdk_mlx5_mkey_pool_param pool_param = {};
2662 
2663 	pool_param.mkey_count = num_mkeys;
2664 	pool_param.cache_per_thread = num_mkeys * 3 / 4 / spdk_env_get_core_count();
2665 	pool_param.flags = flags;
2666 
2667 	return spdk_mlx5_mkey_pool_init(&pool_param, pd);
2668 }
2669 
2670 static void
2671 accel_mlx5_set_psv_in_pool(struct spdk_mempool *mp, void *cb_arg, void *_psv, unsigned obj_idx)
2672 {
2673 	struct spdk_rdma_utils_memory_translation translation = {};
2674 	struct accel_mlx5_psv_pool_iter_cb_args *args = cb_arg;
2675 	struct accel_mlx5_psv_wrapper *wrapper = _psv;
2676 	struct accel_mlx5_dev_ctx *dev_ctx = args->dev;
2677 	int rc;
2678 
2679 	if (args->rc) {
2680 		return;
2681 	}
2682 	assert(obj_idx < g_accel_mlx5.attr.num_requests);
2683 	assert(dev_ctx->psvs[obj_idx] != NULL);
2684 	memset(wrapper, 0, sizeof(*wrapper));
2685 	wrapper->psv_index = dev_ctx->psvs[obj_idx]->index;
2686 
2687 	rc = spdk_rdma_utils_get_translation(args->map, &wrapper->crc, sizeof(uint32_t), &translation);
2688 	if (rc) {
2689 		SPDK_ERRLOG("Memory translation failed, addr %p, length %zu\n", &wrapper->crc, sizeof(uint32_t));
2690 		args->rc = -EINVAL;
2691 	} else {
2692 		wrapper->crc_lkey = spdk_rdma_utils_memory_translation_get_lkey(&translation);
2693 	}
2694 }
2695 
2696 static int
2697 accel_mlx5_psvs_create(struct accel_mlx5_dev_ctx *dev_ctx)
2698 {
2699 	struct accel_mlx5_psv_pool_iter_cb_args args = {
2700 		.dev = dev_ctx
2701 	};
2702 	char pool_name[32];
2703 	uint32_t i;
2704 	uint32_t num_psvs = g_accel_mlx5.attr.num_requests;
2705 	uint32_t cache_size;
2706 	int rc;
2707 
2708 	dev_ctx->psvs = calloc(num_psvs, (sizeof(struct spdk_mlx5_psv *)));
2709 	if (!dev_ctx->psvs) {
2710 		SPDK_ERRLOG("Failed to alloc PSVs array\n");
2711 		return -ENOMEM;
2712 	}
2713 	for (i = 0; i < num_psvs; i++) {
2714 		dev_ctx->psvs[i] = spdk_mlx5_create_psv(dev_ctx->pd);
2715 		if (!dev_ctx->psvs[i]) {
2716 			SPDK_ERRLOG("Failed to create PSV on dev %s\n", dev_ctx->context->device->name);
2717 			return -EINVAL;
2718 		}
2719 	}
2720 
2721 	rc = snprintf(pool_name, sizeof(pool_name), "accel_psv_%s", dev_ctx->context->device->name);
2722 	if (rc < 0) {
2723 		assert(0);
2724 		return -EINVAL;
2725 	}
2726 	cache_size = num_psvs * 3 / 4 / spdk_env_get_core_count();
2727 	args.map = spdk_rdma_utils_create_mem_map(dev_ctx->pd, NULL,
2728 			IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_READ | IBV_ACCESS_REMOTE_WRITE);
2729 	if (!args.map) {
2730 		return -ENOMEM;
2731 	}
2732 	dev_ctx->psv_pool = spdk_mempool_create_ctor(pool_name, num_psvs,
2733 			    sizeof(struct accel_mlx5_psv_wrapper),
2734 			    cache_size, SPDK_ENV_SOCKET_ID_ANY,
2735 			    accel_mlx5_set_psv_in_pool, &args);
2736 	spdk_rdma_utils_free_mem_map(&args.map);
2737 	if (!dev_ctx->psv_pool) {
2738 		SPDK_ERRLOG("Failed to create PSV memory pool\n");
2739 		return -ENOMEM;
2740 	}
2741 	if (args.rc) {
2742 		SPDK_ERRLOG("Failed to init PSV memory pool objects, rc %d\n", args.rc);
2743 		return args.rc;
2744 	}
2745 
2746 	return 0;
2747 }
2748 
2749 
2750 static int
2751 accel_mlx5_dev_ctx_init(struct accel_mlx5_dev_ctx *dev_ctx, struct ibv_context *dev,
2752 			struct spdk_mlx5_device_caps *caps)
2753 {
2754 	struct ibv_pd *pd;
2755 	int rc;
2756 
2757 	pd = spdk_rdma_utils_get_pd(dev);
2758 	if (!pd) {
2759 		SPDK_ERRLOG("Failed to get PD for context %p, dev %s\n", dev, dev->device->name);
2760 		return -EINVAL;
2761 	}
2762 	dev_ctx->context = dev;
2763 	dev_ctx->pd = pd;
2764 	dev_ctx->domain = spdk_rdma_utils_get_memory_domain(pd);
2765 	if (!dev_ctx->domain) {
2766 		return -ENOMEM;
2767 	}
2768 
2769 	if (g_accel_mlx5.crypto_supported) {
2770 		dev_ctx->crypto_multi_block = caps->crypto.multi_block_be_tweak;
2771 		if (!dev_ctx->crypto_multi_block && g_accel_mlx5.attr.crypto_split_blocks) {
2772 			SPDK_WARNLOG("\"crypto_split_blocks\" is set but dev %s doesn't support multi block crypto\n",
2773 				     dev->device->name);
2774 		}
2775 		rc = accel_mlx5_mkeys_create(pd, g_accel_mlx5.attr.num_requests, SPDK_MLX5_MKEY_POOL_FLAG_CRYPTO);
2776 		if (rc) {
2777 			SPDK_ERRLOG("Failed to create crypto mkeys pool, rc %d, dev %s\n", rc, dev->device->name);
2778 			return rc;
2779 		}
2780 		dev_ctx->crypto_mkeys = true;
2781 	}
2782 	if (g_accel_mlx5.crc32c_supported) {
2783 		rc = accel_mlx5_mkeys_create(pd, g_accel_mlx5.attr.num_requests,
2784 					     SPDK_MLX5_MKEY_POOL_FLAG_SIGNATURE);
2785 		if (rc) {
2786 			SPDK_ERRLOG("Failed to create signature mkeys pool, rc %d, dev %s\n", rc, dev->device->name);
2787 			return rc;
2788 		}
2789 		dev_ctx->sig_mkeys = true;
2790 		rc = accel_mlx5_psvs_create(dev_ctx);
2791 		if (rc) {
2792 			SPDK_ERRLOG("Failed to create PSVs pool, rc %d, dev %s\n", rc, dev->device->name);
2793 			return rc;
2794 		}
2795 	}
2796 
2797 	return 0;
2798 }
2799 
2800 static struct ibv_context **
2801 accel_mlx5_get_devices(int *_num_devs)
2802 {
2803 	struct ibv_context **rdma_devs, **rdma_devs_out = NULL, *dev;
2804 	struct ibv_device_attr dev_attr;
2805 	size_t j;
2806 	int num_devs = 0, i, rc;
2807 	int num_devs_out = 0;
2808 	bool dev_allowed;
2809 
2810 	rdma_devs = rdma_get_devices(&num_devs);
2811 	if (!rdma_devs || !num_devs) {
2812 		*_num_devs = 0;
2813 		return NULL;
2814 	}
2815 
2816 	rdma_devs_out = calloc(num_devs + 1, sizeof(struct ibv_context *));
2817 	if (!rdma_devs_out) {
2818 		SPDK_ERRLOG("Memory allocation failed\n");
2819 		rdma_free_devices(rdma_devs);
2820 		*_num_devs = 0;
2821 		return NULL;
2822 	}
2823 
2824 	for (i = 0; i < num_devs; i++) {
2825 		dev = rdma_devs[i];
2826 		rc = ibv_query_device(dev, &dev_attr);
2827 		if (rc) {
2828 			SPDK_ERRLOG("Failed to query dev %s, skipping\n", dev->device->name);
2829 			continue;
2830 		}
2831 		if (dev_attr.vendor_id != SPDK_MLX5_VENDOR_ID_MELLANOX) {
2832 			SPDK_DEBUGLOG(accel_mlx5, "dev %s is not Mellanox device, skipping\n", dev->device->name);
2833 			continue;
2834 		}
2835 
2836 		if (g_accel_mlx5.allowed_devs_count) {
2837 			dev_allowed = false;
2838 			for (j = 0; j < g_accel_mlx5.allowed_devs_count; j++) {
2839 				if (strcmp(g_accel_mlx5.allowed_devs[j], dev->device->name) == 0) {
2840 					dev_allowed = true;
2841 					break;
2842 				}
2843 			}
2844 			if (!dev_allowed) {
2845 				continue;
2846 			}
2847 		}
2848 
2849 		rdma_devs_out[num_devs_out] = dev;
2850 		num_devs_out++;
2851 	}
2852 
2853 	rdma_free_devices(rdma_devs);
2854 	*_num_devs = num_devs_out;
2855 
2856 	return rdma_devs_out;
2857 }
2858 
2859 static inline bool
2860 accel_mlx5_dev_supports_crypto(struct spdk_mlx5_device_caps *caps)
2861 {
2862 	return caps->crypto_supported && !caps->crypto.wrapped_import_method_aes_xts &&
2863 	       (caps->crypto.single_block_le_tweak ||
2864 		caps->crypto.multi_block_le_tweak || caps->crypto.multi_block_be_tweak);
2865 }
2866 
2867 static int
2868 accel_mlx5_init(void)
2869 {
2870 	struct spdk_mlx5_device_caps *caps;
2871 	struct ibv_context **rdma_devs, *dev;
2872 	int num_devs = 0,  rc = 0, i;
2873 	int best_dev = -1, first_dev = 0;
2874 	int best_dev_stat = 0, dev_stat;
2875 	bool supports_crypto;
2876 	bool find_best_dev = g_accel_mlx5.allowed_devs_count == 0;
2877 
2878 	if (!g_accel_mlx5.enabled) {
2879 		return -EINVAL;
2880 	}
2881 
2882 	spdk_spin_init(&g_accel_mlx5.lock);
2883 	rdma_devs = accel_mlx5_get_devices(&num_devs);
2884 	if (!rdma_devs || !num_devs) {
2885 		return -ENODEV;
2886 	}
2887 	caps = calloc(num_devs, sizeof(*caps));
2888 	if (!caps) {
2889 		rc = -ENOMEM;
2890 		goto cleanup;
2891 	}
2892 
2893 	g_accel_mlx5.crypto_supported = true;
2894 	g_accel_mlx5.crc32c_supported = true;
2895 	g_accel_mlx5.num_ctxs = 0;
2896 
2897 	/* Iterate devices. We support an offload if all devices support it */
2898 	for (i = 0; i < num_devs; i++) {
2899 		dev = rdma_devs[i];
2900 
2901 		rc = spdk_mlx5_device_query_caps(dev, &caps[i]);
2902 		if (rc) {
2903 			SPDK_ERRLOG("Failed to get crypto caps, dev %s\n", dev->device->name);
2904 			goto cleanup;
2905 		}
2906 		supports_crypto = accel_mlx5_dev_supports_crypto(&caps[i]);
2907 		if (!supports_crypto) {
2908 			SPDK_DEBUGLOG(accel_mlx5, "Disable crypto support because dev %s doesn't support it\n",
2909 				      rdma_devs[i]->device->name);
2910 			g_accel_mlx5.crypto_supported = false;
2911 		}
2912 		if (!caps[i].crc32c_supported) {
2913 			SPDK_DEBUGLOG(accel_mlx5, "Disable crc32c support because dev %s doesn't support it\n",
2914 				      rdma_devs[i]->device->name);
2915 			g_accel_mlx5.crc32c_supported = false;
2916 		}
2917 		if (find_best_dev) {
2918 			/* Find device which supports max number of offloads */
2919 			dev_stat = (int)supports_crypto + (int)caps[i].crc32c_supported;
2920 			if (dev_stat > best_dev_stat) {
2921 				best_dev_stat = dev_stat;
2922 				best_dev = i;
2923 			}
2924 		}
2925 	}
2926 
2927 	/* User didn't specify devices to use, try to select the best one */
2928 	if (find_best_dev) {
2929 		if (best_dev == -1) {
2930 			best_dev = 0;
2931 		}
2932 		g_accel_mlx5.crypto_supported = accel_mlx5_dev_supports_crypto(&caps[best_dev]);
2933 		g_accel_mlx5.crc32c_supported = caps[best_dev].crc32c_supported;
2934 		SPDK_NOTICELOG("Select dev %s, crypto %d, crc32c %d\n", rdma_devs[best_dev]->device->name,
2935 			       g_accel_mlx5.crypto_supported, g_accel_mlx5.crc32c_supported);
2936 		first_dev = best_dev;
2937 		num_devs = 1;
2938 		if (g_accel_mlx5.crypto_supported) {
2939 			const char *const dev_name[] = { rdma_devs[best_dev]->device->name };
2940 			/* Let mlx5 library know which device to use */
2941 			spdk_mlx5_crypto_devs_allow(dev_name, 1);
2942 		}
2943 	} else {
2944 		SPDK_NOTICELOG("Found %d devices, crypto %d\n", num_devs, g_accel_mlx5.crypto_supported);
2945 	}
2946 
2947 	g_accel_mlx5.dev_ctxs = calloc(num_devs, sizeof(*g_accel_mlx5.dev_ctxs));
2948 	if (!g_accel_mlx5.dev_ctxs) {
2949 		SPDK_ERRLOG("Memory allocation failed\n");
2950 		rc = -ENOMEM;
2951 		goto cleanup;
2952 	}
2953 
2954 	for (i = first_dev; i < first_dev + num_devs; i++) {
2955 		rc = accel_mlx5_dev_ctx_init(&g_accel_mlx5.dev_ctxs[g_accel_mlx5.num_ctxs++],
2956 					     rdma_devs[i], &caps[i]);
2957 		if (rc) {
2958 			goto cleanup;
2959 		}
2960 	}
2961 
2962 	SPDK_NOTICELOG("Accel framework mlx5 initialized, found %d devices.\n", num_devs);
2963 	spdk_io_device_register(&g_accel_mlx5, accel_mlx5_create_cb, accel_mlx5_destroy_cb,
2964 				sizeof(struct accel_mlx5_io_channel), "accel_mlx5");
2965 	g_accel_mlx5.initialized = true;
2966 	free(rdma_devs);
2967 	free(caps);
2968 
2969 	if (g_accel_mlx5.attr.enable_driver) {
2970 		SPDK_NOTICELOG("Enabling mlx5 platform driver\n");
2971 		spdk_accel_driver_register(&g_accel_mlx5_driver);
2972 		spdk_accel_set_driver(g_accel_mlx5_driver.name);
2973 		spdk_mlx5_umr_implementer_register(true);
2974 	}
2975 
2976 	return 0;
2977 
2978 cleanup:
2979 	free(rdma_devs);
2980 	free(caps);
2981 	accel_mlx5_free_resources();
2982 	spdk_spin_destroy(&g_accel_mlx5.lock);
2983 
2984 	return rc;
2985 }
2986 
2987 static void
2988 accel_mlx5_write_config_json(struct spdk_json_write_ctx *w)
2989 {
2990 	if (g_accel_mlx5.enabled) {
2991 		spdk_json_write_object_begin(w);
2992 		spdk_json_write_named_string(w, "method", "mlx5_scan_accel_module");
2993 		spdk_json_write_named_object_begin(w, "params");
2994 		spdk_json_write_named_uint16(w, "qp_size", g_accel_mlx5.attr.qp_size);
2995 		spdk_json_write_named_uint32(w, "num_requests", g_accel_mlx5.attr.num_requests);
2996 		if (g_accel_mlx5.attr.allowed_devs) {
2997 			spdk_json_write_named_string(w, "allowed_devs", g_accel_mlx5.attr.allowed_devs);
2998 		}
2999 		spdk_json_write_named_uint16(w, "crypto_split_blocks", g_accel_mlx5.attr.crypto_split_blocks);
3000 		spdk_json_write_named_bool(w, "enable_driver", g_accel_mlx5.attr.enable_driver);
3001 		spdk_json_write_object_end(w);
3002 		spdk_json_write_object_end(w);
3003 	}
3004 }
3005 
3006 static size_t
3007 accel_mlx5_get_ctx_size(void)
3008 {
3009 	return sizeof(struct accel_mlx5_task);
3010 }
3011 
3012 static int
3013 accel_mlx5_crypto_key_init(struct spdk_accel_crypto_key *key)
3014 {
3015 	struct spdk_mlx5_crypto_dek_create_attr attr = {};
3016 	struct spdk_mlx5_crypto_keytag *keytag;
3017 	int rc;
3018 
3019 	if (!key || !key->key || !key->key2 || !key->key_size || !key->key2_size) {
3020 		return -EINVAL;
3021 	}
3022 
3023 	attr.dek = calloc(1, key->key_size + key->key2_size);
3024 	if (!attr.dek) {
3025 		return -ENOMEM;
3026 	}
3027 
3028 	memcpy(attr.dek, key->key, key->key_size);
3029 	memcpy(attr.dek + key->key_size, key->key2, key->key2_size);
3030 	attr.dek_len = key->key_size + key->key2_size;
3031 
3032 	rc = spdk_mlx5_crypto_keytag_create(&attr, &keytag);
3033 	spdk_memset_s(attr.dek, attr.dek_len, 0, attr.dek_len);
3034 	free(attr.dek);
3035 	if (rc) {
3036 		SPDK_ERRLOG("Failed to create a keytag, rc %d\n", rc);
3037 		return rc;
3038 	}
3039 
3040 	key->priv = keytag;
3041 
3042 	return 0;
3043 }
3044 
3045 static void
3046 accel_mlx5_crypto_key_deinit(struct spdk_accel_crypto_key *key)
3047 {
3048 	if (!key || key->module_if != &g_accel_mlx5.module || !key->priv) {
3049 		return;
3050 	}
3051 
3052 	spdk_mlx5_crypto_keytag_destroy(key->priv);
3053 }
3054 
3055 static void
3056 accel_mlx5_dump_stats_json(struct spdk_json_write_ctx *w, const char *header,
3057 			   const struct accel_mlx5_stats *stats)
3058 {
3059 	double idle_polls_percentage = 0;
3060 	double cpls_per_poll = 0;
3061 	uint64_t total_tasks = 0;
3062 	int i;
3063 
3064 	if (stats->polls) {
3065 		idle_polls_percentage = (double) stats->idle_polls * 100 / stats->polls;
3066 	}
3067 	if (stats->polls > stats->idle_polls) {
3068 		cpls_per_poll = (double) stats->completions / (stats->polls - stats->idle_polls);
3069 	}
3070 	for (i = 0; i < ACCEL_MLX5_OPC_LAST; i++) {
3071 		total_tasks += stats->opcodes[i];
3072 	}
3073 
3074 	spdk_json_write_named_object_begin(w, header);
3075 
3076 	spdk_json_write_named_object_begin(w, "umrs");
3077 	spdk_json_write_named_uint64(w, "crypto_umrs", stats->crypto_umrs);
3078 	spdk_json_write_named_uint64(w, "sig_umrs", stats->sig_umrs);
3079 	spdk_json_write_named_uint64(w, "total", stats->crypto_umrs + stats->sig_umrs);
3080 	spdk_json_write_object_end(w);
3081 
3082 	spdk_json_write_named_object_begin(w, "rdma");
3083 	spdk_json_write_named_uint64(w, "read", stats->rdma_reads);
3084 	spdk_json_write_named_uint64(w, "write", stats->rdma_writes);
3085 	spdk_json_write_named_uint64(w, "total", stats->rdma_reads + stats->rdma_writes);
3086 	spdk_json_write_object_end(w);
3087 
3088 	spdk_json_write_named_object_begin(w, "polling");
3089 	spdk_json_write_named_uint64(w, "polls", stats->polls);
3090 	spdk_json_write_named_uint64(w, "idle_polls", stats->idle_polls);
3091 	spdk_json_write_named_uint64(w, "completions", stats->completions);
3092 	spdk_json_write_named_double(w, "idle_polls_percentage", idle_polls_percentage);
3093 	spdk_json_write_named_double(w, "cpls_per_poll", cpls_per_poll);
3094 	spdk_json_write_named_uint64(w, "nomem_qdepth", stats->nomem_qdepth);
3095 	spdk_json_write_named_uint64(w, "nomem_mkey", stats->nomem_mkey);
3096 	spdk_json_write_object_end(w);
3097 
3098 	spdk_json_write_named_object_begin(w, "tasks");
3099 	spdk_json_write_named_uint64(w, "copy", stats->opcodes[ACCEL_MLX5_OPC_COPY]);
3100 	spdk_json_write_named_uint64(w, "crypto", stats->opcodes[ACCEL_MLX5_OPC_CRYPTO]);
3101 	spdk_json_write_named_uint64(w, "crypto_mkey", stats->opcodes[ACCEL_MLX5_OPC_CRYPTO_MKEY]);
3102 	spdk_json_write_named_uint64(w, "crc32c", stats->opcodes[ACCEL_MLX5_OPC_CRC32C]);
3103 	spdk_json_write_named_uint64(w, "total", total_tasks);
3104 	spdk_json_write_object_end(w);
3105 
3106 	spdk_json_write_object_end(w);
3107 }
3108 
3109 static void
3110 accel_mlx5_dump_channel_stat(struct spdk_io_channel_iter *i)
3111 {
3112 	struct accel_mlx5_stats ch_stat = {};
3113 	struct accel_mlx5_dump_stats_ctx *ctx;
3114 	struct spdk_io_channel *_ch;
3115 	struct accel_mlx5_io_channel *ch;
3116 	struct accel_mlx5_dev *dev;
3117 	uint32_t j;
3118 
3119 	ctx = spdk_io_channel_iter_get_ctx(i);
3120 	_ch = spdk_io_channel_iter_get_channel(i);
3121 	ch = spdk_io_channel_get_ctx(_ch);
3122 
3123 	if (ctx->level != ACCEL_MLX5_DUMP_STAT_LEVEL_TOTAL) {
3124 		spdk_json_write_object_begin(ctx->w);
3125 		spdk_json_write_named_object_begin(ctx->w, spdk_thread_get_name(spdk_get_thread()));
3126 	}
3127 	if (ctx->level == ACCEL_MLX5_DUMP_STAT_LEVEL_DEV) {
3128 		spdk_json_write_named_array_begin(ctx->w, "devices");
3129 	}
3130 
3131 	for (j = 0; j < ch->num_devs; j++) {
3132 		dev = &ch->devs[j];
3133 		/* Save grand total and channel stats */
3134 		accel_mlx5_add_stats(&ctx->total, &dev->stats);
3135 		accel_mlx5_add_stats(&ch_stat, &dev->stats);
3136 		if (ctx->level == ACCEL_MLX5_DUMP_STAT_LEVEL_DEV) {
3137 			spdk_json_write_object_begin(ctx->w);
3138 			accel_mlx5_dump_stats_json(ctx->w, dev->dev_ctx->context->device->name, &dev->stats);
3139 			spdk_json_write_object_end(ctx->w);
3140 		}
3141 	}
3142 
3143 	if (ctx->level == ACCEL_MLX5_DUMP_STAT_LEVEL_DEV) {
3144 		spdk_json_write_array_end(ctx->w);
3145 	}
3146 	if (ctx->level != ACCEL_MLX5_DUMP_STAT_LEVEL_TOTAL) {
3147 		accel_mlx5_dump_stats_json(ctx->w, "channel_total", &ch_stat);
3148 		spdk_json_write_object_end(ctx->w);
3149 		spdk_json_write_object_end(ctx->w);
3150 	}
3151 
3152 	spdk_for_each_channel_continue(i, 0);
3153 }
3154 
3155 static void
3156 accel_mlx5_dump_channel_stat_done(struct spdk_io_channel_iter *i, int status)
3157 {
3158 	struct accel_mlx5_dump_stats_ctx *ctx;
3159 
3160 	ctx = spdk_io_channel_iter_get_ctx(i);
3161 
3162 	spdk_spin_lock(&g_accel_mlx5.lock);
3163 	/* Add statistics from destroyed channels */
3164 	accel_mlx5_add_stats(&ctx->total, &g_accel_mlx5.stats);
3165 	spdk_spin_unlock(&g_accel_mlx5.lock);
3166 
3167 	if (ctx->level != ACCEL_MLX5_DUMP_STAT_LEVEL_TOTAL) {
3168 		/* channels[] */
3169 		spdk_json_write_array_end(ctx->w);
3170 	}
3171 
3172 	accel_mlx5_dump_stats_json(ctx->w, "total", &ctx->total);
3173 
3174 	/* Ends the whole response which was begun in accel_mlx5_dump_stats */
3175 	spdk_json_write_object_end(ctx->w);
3176 
3177 	ctx->cb(ctx->ctx, 0);
3178 	free(ctx);
3179 }
3180 
3181 int
3182 accel_mlx5_dump_stats(struct spdk_json_write_ctx *w, enum accel_mlx5_dump_state_level level,
3183 		      accel_mlx5_dump_stat_done_cb cb, void *ctx)
3184 {
3185 	struct accel_mlx5_dump_stats_ctx *stat_ctx;
3186 
3187 	if (!w || !cb) {
3188 		return -EINVAL;
3189 	}
3190 	if (!g_accel_mlx5.initialized) {
3191 		return -ENODEV;
3192 	}
3193 
3194 	stat_ctx = calloc(1, sizeof(*stat_ctx));
3195 	if (!stat_ctx) {
3196 		return -ENOMEM;
3197 	}
3198 	stat_ctx->cb = cb;
3199 	stat_ctx->ctx = ctx;
3200 	stat_ctx->level = level;
3201 	stat_ctx->w = w;
3202 
3203 	spdk_json_write_object_begin(w);
3204 
3205 	if (level != ACCEL_MLX5_DUMP_STAT_LEVEL_TOTAL) {
3206 		spdk_json_write_named_array_begin(w, "channels");
3207 	}
3208 
3209 	spdk_for_each_channel(&g_accel_mlx5, accel_mlx5_dump_channel_stat, stat_ctx,
3210 			      accel_mlx5_dump_channel_stat_done);
3211 
3212 	return 0;
3213 }
3214 
3215 static bool
3216 accel_mlx5_crypto_supports_cipher(enum spdk_accel_cipher cipher, size_t key_size)
3217 {
3218 	switch (cipher) {
3219 	case SPDK_ACCEL_CIPHER_AES_XTS:
3220 		return key_size == SPDK_ACCEL_AES_XTS_128_KEY_SIZE || key_size == SPDK_ACCEL_AES_XTS_256_KEY_SIZE;
3221 	default:
3222 		return false;
3223 	}
3224 }
3225 
3226 static int
3227 accel_mlx5_get_memory_domains(struct spdk_memory_domain **domains, int array_size)
3228 {
3229 	int i, size;
3230 
3231 	if (!domains || !array_size) {
3232 		return (int)g_accel_mlx5.num_ctxs;
3233 	}
3234 
3235 	size = spdk_min(array_size, (int)g_accel_mlx5.num_ctxs);
3236 
3237 	for (i = 0; i < size; i++) {
3238 		domains[i] = g_accel_mlx5.dev_ctxs[i].domain;
3239 	}
3240 
3241 	return (int)g_accel_mlx5.num_ctxs;
3242 }
3243 
3244 static inline struct accel_mlx5_dev *
3245 accel_mlx5_ch_get_dev_by_pd(struct accel_mlx5_io_channel *accel_ch, struct ibv_pd *pd)
3246 {
3247 	uint32_t i;
3248 
3249 	for (i = 0; i < accel_ch->num_devs; i++) {
3250 		if (accel_ch->devs[i].dev_ctx->pd == pd) {
3251 			return &accel_ch->devs[i];
3252 		}
3253 	}
3254 
3255 	return NULL;
3256 }
3257 
3258 static inline int
3259 accel_mlx5_task_assign_qp_by_domain_pd(struct accel_mlx5_task *task,
3260 				       struct accel_mlx5_io_channel *acce_ch, struct spdk_memory_domain *domain)
3261 {
3262 	struct spdk_memory_domain_rdma_ctx *domain_ctx;
3263 	struct accel_mlx5_dev *dev;
3264 	struct ibv_pd *domain_pd;
3265 	size_t ctx_size;
3266 
3267 	domain_ctx = spdk_memory_domain_get_user_context(domain, &ctx_size);
3268 	if (spdk_unlikely(!domain_ctx || domain_ctx->size != ctx_size)) {
3269 		SPDK_ERRLOG("no domain context or wrong size, ctx ptr %p, size %zu\n", domain_ctx, ctx_size);
3270 		return -ENOTSUP;
3271 	}
3272 	domain_pd = domain_ctx->ibv_pd;
3273 	if (spdk_unlikely(!domain_pd)) {
3274 		SPDK_ERRLOG("no destination domain PD, task %p", task);
3275 		return -ENOTSUP;
3276 	}
3277 	dev = accel_mlx5_ch_get_dev_by_pd(acce_ch, domain_pd);
3278 	if (spdk_unlikely(!dev)) {
3279 		SPDK_ERRLOG("No dev for PD %p dev %s\n", domain_pd, domain_pd->context->device->name);
3280 		return -ENODEV;
3281 	}
3282 
3283 	if (spdk_unlikely(!dev)) {
3284 		return -ENODEV;
3285 	}
3286 	task->qp = &dev->qp;
3287 
3288 	return 0;
3289 }
3290 
3291 static inline int
3292 accel_mlx5_driver_examine_sequence(struct spdk_accel_sequence *seq,
3293 				   struct accel_mlx5_io_channel *accel_ch)
3294 {
3295 	struct spdk_accel_task *first_base = spdk_accel_sequence_first_task(seq);
3296 	struct accel_mlx5_task *first = SPDK_CONTAINEROF(first_base, struct accel_mlx5_task, base);
3297 	struct spdk_accel_task *next_base = TAILQ_NEXT(first_base, seq_link);
3298 	struct accel_mlx5_task *next;
3299 	int rc;
3300 
3301 	accel_mlx5_task_reset(first);
3302 	SPDK_DEBUGLOG(accel_mlx5, "first %p, opc %d; next %p, opc %d\n", first_base, first_base->op_code,
3303 		      next_base,  next_base ? next_base->op_code : -1);
3304 	if (next_base) {
3305 		switch (first_base->op_code) {
3306 		case SPDK_ACCEL_OPC_COPY:
3307 			if (next_base->op_code == SPDK_ACCEL_OPC_DECRYPT &&
3308 			    first_base->dst_domain &&  spdk_memory_domain_get_dma_device_type(first_base->dst_domain) ==
3309 			    SPDK_DMA_DEVICE_TYPE_RDMA && TAILQ_NEXT(next_base, seq_link) == NULL) {
3310 				next = SPDK_CONTAINEROF(next_base, struct accel_mlx5_task, base);
3311 				rc = accel_mlx5_task_assign_qp_by_domain_pd(next, accel_ch, first_base->dst_domain);
3312 				if (spdk_unlikely(rc)) {
3313 					return rc;
3314 				}
3315 				/* Update decrypt task memory domain, complete copy task */
3316 				SPDK_DEBUGLOG(accel_mlx5, "Merge copy task (%p) and decrypt (%p)\n", first, next);
3317 				next_base->dst_domain = first_base->dst_domain;
3318 				next_base->dst_domain_ctx = first_base->dst_domain_ctx;
3319 				accel_mlx5_task_reset(next);
3320 				next->mlx5_opcode = ACCEL_MLX5_OPC_CRYPTO_MKEY;
3321 				next->enc_order = SPDK_MLX5_ENCRYPTION_ORDER_ENCRYPTED_RAW_WIRE;
3322 				next->needs_data_transfer = 1;
3323 				next->inplace = 1;
3324 				spdk_accel_task_complete(first_base, 0);
3325 				return 0;
3326 			}
3327 			break;
3328 		case SPDK_ACCEL_OPC_ENCRYPT:
3329 			if (next_base->op_code == SPDK_ACCEL_OPC_COPY &&
3330 			    next_base->dst_domain && spdk_memory_domain_get_dma_device_type(next_base->dst_domain) ==
3331 			    SPDK_DMA_DEVICE_TYPE_RDMA && TAILQ_NEXT(next_base, seq_link) == NULL) {
3332 				rc = accel_mlx5_task_assign_qp_by_domain_pd(first, accel_ch, next_base->dst_domain);
3333 				if (spdk_unlikely(rc)) {
3334 					return rc;
3335 				}
3336 
3337 				/* Update encrypt task memory domain, complete copy task */
3338 				SPDK_DEBUGLOG(accel_mlx5, "Merge copy task (%p) and decrypt (%p)\n",
3339 					      SPDK_CONTAINEROF(next_base,
3340 							       struct accel_mlx5_task, base), first);
3341 				first_base->dst_domain = next_base->dst_domain;
3342 				first_base->dst_domain_ctx = next_base->dst_domain_ctx;
3343 				first->mlx5_opcode = ACCEL_MLX5_OPC_CRYPTO_MKEY;
3344 				first->enc_order = SPDK_MLX5_ENCRYPTION_ORDER_ENCRYPTED_RAW_WIRE;
3345 				first->needs_data_transfer = 1;
3346 				first->inplace = 1;
3347 				spdk_accel_task_complete(next_base, 0);
3348 				return 0;
3349 			}
3350 			break;
3351 
3352 		default:
3353 			break;
3354 		}
3355 	}
3356 
3357 	SPDK_DEBUGLOG(accel_mlx5, "seq %p, task %p nothing to merge\n", seq, first_base);
3358 	/* Nothing to merge, execute tasks one by one */
3359 	accel_mlx5_task_assign_qp(first, accel_ch);
3360 	accel_mlx5_task_init_opcode(first);
3361 
3362 	return 0;
3363 }
3364 
3365 static inline int
3366 accel_mlx5_execute_sequence(struct spdk_io_channel *ch, struct spdk_accel_sequence *seq)
3367 {
3368 	struct accel_mlx5_io_channel *accel_ch = spdk_io_channel_get_ctx(ch);
3369 	struct spdk_accel_task *task;
3370 	struct accel_mlx5_task *mlx5_task;
3371 	int rc;
3372 
3373 	rc = accel_mlx5_driver_examine_sequence(seq, accel_ch);
3374 	if (spdk_unlikely(rc)) {
3375 		return rc;
3376 	}
3377 	task = spdk_accel_sequence_first_task(seq);
3378 	assert(task);
3379 	mlx5_task = SPDK_CONTAINEROF(task, struct accel_mlx5_task, base);
3380 	mlx5_task->driver_seq = 1;
3381 
3382 	SPDK_DEBUGLOG(accel_mlx5, "driver starts seq %p, ch %p, task %p\n", seq, accel_ch, task);
3383 
3384 	return _accel_mlx5_submit_tasks(accel_ch, task);
3385 }
3386 
3387 static struct accel_mlx5_module g_accel_mlx5 = {
3388 	.module = {
3389 		.module_init		= accel_mlx5_init,
3390 		.module_fini		= accel_mlx5_deinit,
3391 		.write_config_json	= accel_mlx5_write_config_json,
3392 		.get_ctx_size		= accel_mlx5_get_ctx_size,
3393 		.name			= "mlx5",
3394 		.supports_opcode	= accel_mlx5_supports_opcode,
3395 		.get_io_channel		= accel_mlx5_get_io_channel,
3396 		.submit_tasks		= accel_mlx5_submit_tasks,
3397 		.crypto_key_init	= accel_mlx5_crypto_key_init,
3398 		.crypto_key_deinit	= accel_mlx5_crypto_key_deinit,
3399 		.crypto_supports_cipher	= accel_mlx5_crypto_supports_cipher,
3400 		.get_memory_domains	= accel_mlx5_get_memory_domains,
3401 	}
3402 };
3403 
3404 static struct spdk_accel_driver g_accel_mlx5_driver = {
3405 	.name			= "mlx5",
3406 	.execute_sequence	= accel_mlx5_execute_sequence,
3407 	.get_io_channel		= accel_mlx5_get_io_channel
3408 };
3409 
3410 SPDK_LOG_REGISTER_COMPONENT(accel_mlx5)
3411