xref: /spdk/module/accel/mlx5/accel_mlx5.c (revision cec5ba284b55d19c90359936d77b707e398829f7)
1 /*   SPDX-License-Identifier: BSD-3-Clause
2  *   Copyright (c) 2022-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3  */
4 
5 #include "spdk/env.h"
6 #include "spdk/thread.h"
7 #include "spdk/queue.h"
8 #include "spdk/log.h"
9 #include "spdk/string.h"
10 #include "spdk/likely.h"
11 #include "spdk/dma.h"
12 #include "spdk/json.h"
13 #include "spdk/util.h"
14 
15 #include "spdk_internal/mlx5.h"
16 #include "spdk_internal/rdma_utils.h"
17 #include "spdk/accel_module.h"
18 #include "spdk_internal/assert.h"
19 #include "spdk_internal/sgl.h"
20 #include "accel_mlx5.h"
21 
22 #include <infiniband/mlx5dv.h>
23 #include <rdma/rdma_cma.h>
24 
25 #define ACCEL_MLX5_QP_SIZE (256u)
26 #define ACCEL_MLX5_NUM_REQUESTS (2048u - 1)
27 #define ACCEL_MLX5_RECOVER_POLLER_PERIOD_US (10000)
28 #define ACCEL_MLX5_MAX_SGE (16u)
29 #define ACCEL_MLX5_MAX_WC (64u)
30 #define ACCEL_MLX5_MAX_MKEYS_IN_TASK (16u)
31 
32 /* Assume we have up to 16 devices */
33 #define ACCEL_MLX5_ALLOWED_DEVS_MAX_LEN ((SPDK_MLX5_DEV_MAX_NAME_LEN + 1) * 16)
34 
35 #define ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED(qp, task)	\
36 do {							\
37 	assert((qp)->wrs_submitted < (qp)->wrs_max);	\
38 	(qp)->wrs_submitted++;				\
39 	(qp)->ring_db = true;				\
40 	assert((task)->num_wrs < UINT16_MAX);		\
41 	(task)->num_wrs++;				\
42 } while (0)
43 
44 #define ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED_SIGNALED(dev, qp, task)	\
45 do {									\
46 	assert((dev)->wrs_in_cq < (dev)->wrs_in_cq_max);		\
47 	(dev)->wrs_in_cq++;						\
48         assert((qp)->wrs_submitted < (qp)->wrs_max);			\
49 	(qp)->wrs_submitted++;						\
50 	(qp)->ring_db = true;						\
51 	assert((task)->num_wrs < UINT16_MAX);				\
52 	(task)->num_wrs++;						\
53 } while (0)
54 
55 struct accel_mlx5_io_channel;
56 struct accel_mlx5_task;
57 
58 struct accel_mlx5_dev_ctx {
59 	struct ibv_context *context;
60 	struct ibv_pd *pd;
61 	struct spdk_memory_domain *domain;
62 	struct spdk_mempool *psv_pool;
63 	TAILQ_ENTRY(accel_mlx5_dev_ctx) link;
64 	struct spdk_mlx5_psv **psvs;
65 	bool mkeys;
66 	bool crypto_mkeys;
67 	bool sig_mkeys;
68 	bool crypto_multi_block;
69 };
70 
71 enum accel_mlx5_opcode {
72 	ACCEL_MLX5_OPC_COPY,
73 	ACCEL_MLX5_OPC_CRYPTO,
74 	ACCEL_MLX5_OPC_CRC32C,
75 	ACCEL_MLX5_OPC_CRYPTO_MKEY,
76 	ACCEL_MLX5_OPC_MKEY,
77 	ACCEL_MLX5_OPC_LAST
78 };
79 
80 SPDK_STATIC_ASSERT(ACCEL_MLX5_OPC_LAST <= 0xf,
81 		   "accel opcode exceeds 4 bits, update accel_mlx5 struct");
82 
83 struct accel_mlx5_stats {
84 	uint64_t crypto_umrs;
85 	uint64_t sig_umrs;
86 	uint64_t umrs;
87 	uint64_t rdma_reads;
88 	uint64_t rdma_writes;
89 	uint64_t polls;
90 	uint64_t idle_polls;
91 	uint64_t completions;
92 	uint64_t nomem_qdepth;
93 	uint64_t nomem_mkey;
94 	uint64_t opcodes[ACCEL_MLX5_OPC_LAST];
95 };
96 
97 struct accel_mlx5_module {
98 	struct spdk_accel_module_if module;
99 	struct accel_mlx5_stats stats;
100 	struct spdk_spinlock lock;
101 	struct accel_mlx5_dev_ctx *dev_ctxs;
102 	uint32_t num_ctxs;
103 	struct accel_mlx5_attr attr;
104 	char **allowed_devs;
105 	size_t allowed_devs_count;
106 	bool initialized;
107 	bool enabled;
108 	bool crypto_supported;
109 	bool crc32c_supported;
110 };
111 
112 struct accel_mlx5_sge {
113 	uint32_t src_sge_count;
114 	uint32_t dst_sge_count;
115 	struct ibv_sge src_sge[ACCEL_MLX5_MAX_SGE];
116 	struct ibv_sge dst_sge[ACCEL_MLX5_MAX_SGE];
117 };
118 
119 struct accel_mlx5_iov_sgl {
120 	struct iovec	*iov;
121 	uint32_t	iovcnt;
122 	uint32_t	iov_offset;
123 };
124 
125 struct accel_mlx5_psv_wrapper {
126 	uint32_t psv_index;
127 	struct {
128 		uint32_t error : 1;
129 		uint32_t reserved : 31;
130 	} bits;
131 	/* mlx5 engine requires DMAable memory, use this member to copy user's crc value since we don't know which
132 	 * memory it is in */
133 	uint32_t crc;
134 	uint32_t crc_lkey;
135 };
136 
137 struct accel_mlx5_task {
138 	struct spdk_accel_task base;
139 	struct accel_mlx5_iov_sgl src;
140 	struct accel_mlx5_iov_sgl dst;
141 	struct accel_mlx5_qp *qp;
142 	STAILQ_ENTRY(accel_mlx5_task) link;
143 	uint16_t num_reqs;
144 	uint16_t num_completed_reqs;
145 	uint16_t num_submitted_reqs;
146 	uint16_t num_ops; /* number of allocated mkeys or number of operations */
147 	uint16_t num_wrs; /* Number of outstanding operations which consume qp slot */
148 	union {
149 		struct {
150 			uint16_t blocks_per_req;
151 			uint16_t num_processed_blocks;
152 			uint16_t num_blocks;
153 		};
154 		struct {
155 			struct accel_mlx5_psv_wrapper *psv;
156 			uint32_t last_umr_len;
157 			uint8_t last_mkey_idx;
158 		};
159 	};
160 	union {
161 		uint16_t raw;
162 		struct {
163 			uint16_t inplace : 1;
164 			uint16_t driver_seq : 1;
165 			uint16_t needs_data_transfer : 1;
166 			uint16_t enc_order : 2;
167 			uint16_t mlx5_opcode: 4;
168 		};
169 	};
170 	/* Keep this array last since not all elements might be accessed, this reduces amount of data to be
171 	 * cached */
172 	struct spdk_mlx5_mkey_pool_obj *mkeys[ACCEL_MLX5_MAX_MKEYS_IN_TASK];
173 };
174 
175 SPDK_STATIC_ASSERT(ACCEL_MLX5_MAX_MKEYS_IN_TASK <= UINT8_MAX, "uint8_t is used to iterate mkeys");
176 
177 struct accel_mlx5_qp {
178 	struct spdk_mlx5_qp *qp;
179 	struct ibv_qp *verbs_qp;
180 	struct accel_mlx5_dev *dev;
181 	/* tasks submitted to HW. We can't complete a task even in error case until we reap completions for all
182 	 * submitted requests */
183 	STAILQ_HEAD(, accel_mlx5_task) in_hw;
184 	uint16_t wrs_submitted;
185 	uint16_t wrs_max;
186 	bool ring_db;
187 	bool recovering;
188 	struct spdk_poller *recover_poller;
189 };
190 
191 struct accel_mlx5_dev {
192 	struct accel_mlx5_qp qp;
193 	struct spdk_mlx5_cq *cq;
194 	struct spdk_mlx5_mkey_pool *mkeys;
195 	struct spdk_mlx5_mkey_pool *crypto_mkeys;
196 	struct spdk_mlx5_mkey_pool *sig_mkeys;
197 	struct spdk_rdma_utils_mem_map *mmap;
198 	struct accel_mlx5_dev_ctx *dev_ctx;
199 	struct spdk_io_channel *ch;
200 	uint16_t wrs_in_cq;
201 	uint16_t wrs_in_cq_max;
202 	uint16_t crypto_split_blocks;
203 	bool crypto_multi_block;
204 	/* Pending tasks waiting for requests resources */
205 	STAILQ_HEAD(, accel_mlx5_task) nomem;
206 	TAILQ_ENTRY(accel_mlx5_dev) link;
207 	struct accel_mlx5_stats stats;
208 };
209 
210 struct accel_mlx5_io_channel {
211 	struct accel_mlx5_dev *devs;
212 	struct spdk_poller *poller;
213 	uint16_t num_devs;
214 	/* Index in \b devs to be used for operations in round-robin way */
215 	uint16_t dev_idx;
216 	bool poller_handler_registered;
217 };
218 
219 struct accel_mlx5_task_operations {
220 	int (*init)(struct accel_mlx5_task *task);
221 	int (*process)(struct accel_mlx5_task *task);
222 	int (*cont)(struct accel_mlx5_task *task);
223 	void (*complete)(struct accel_mlx5_task *task);
224 };
225 
226 struct accel_mlx5_psv_pool_iter_cb_args {
227 	struct accel_mlx5_dev_ctx *dev;
228 	struct spdk_rdma_utils_mem_map *map;
229 	int rc;
230 };
231 
232 struct accel_mlx5_dump_stats_ctx {
233 	struct accel_mlx5_stats total;
234 	struct spdk_json_write_ctx *w;
235 	enum accel_mlx5_dump_state_level level;
236 	accel_mlx5_dump_stat_done_cb cb;
237 	void *ctx;
238 };
239 
240 static struct accel_mlx5_module g_accel_mlx5;
241 static struct spdk_accel_driver g_accel_mlx5_driver;
242 
243 static inline int accel_mlx5_execute_sequence(struct spdk_io_channel *ch,
244 		struct spdk_accel_sequence *seq);
245 static inline void accel_mlx5_task_complete(struct accel_mlx5_task *mlx5_task);
246 
247 static inline void
248 accel_mlx5_iov_sgl_init(struct accel_mlx5_iov_sgl *s, struct iovec *iov, uint32_t iovcnt)
249 {
250 	s->iov = iov;
251 	s->iovcnt = iovcnt;
252 	s->iov_offset = 0;
253 }
254 
255 static inline void
256 accel_mlx5_iov_sgl_advance(struct accel_mlx5_iov_sgl *s, uint32_t step)
257 {
258 	s->iov_offset += step;
259 	while (s->iovcnt > 0) {
260 		assert(s->iov != NULL);
261 		if (s->iov_offset < s->iov->iov_len) {
262 			break;
263 		}
264 
265 		s->iov_offset -= s->iov->iov_len;
266 		s->iov++;
267 		s->iovcnt--;
268 	}
269 }
270 
271 static inline void
272 accel_mlx5_iov_sgl_unwind(struct accel_mlx5_iov_sgl *s, uint32_t max_iovs, uint32_t step)
273 {
274 	SPDK_DEBUGLOG(accel_mlx5, "iov %p, iovcnt %u, max %u, offset %u, step %u\n", s->iov, s->iovcnt,
275 		      max_iovs, s->iov_offset, step);
276 	while (s->iovcnt <= max_iovs) {
277 		assert(s->iov != NULL);
278 		if (s->iov_offset >= step) {
279 			s->iov_offset -= step;
280 			SPDK_DEBUGLOG(accel_mlx5, "\tEND, iov %p, iovcnt %u, offset %u\n", s->iov, s->iovcnt,
281 				      s->iov_offset);
282 			return;
283 		}
284 		step -= s->iov_offset;
285 		s->iov--;
286 		s->iovcnt++;
287 		s->iov_offset = s->iov->iov_len;
288 		SPDK_DEBUGLOG(accel_mlx5, "\tiov %p, iovcnt %u, offset %u, step %u\n", s->iov, s->iovcnt,
289 			      s->iov_offset, step);
290 	}
291 
292 	SPDK_ERRLOG("Can't unwind iovs, remaining  %u\n", step);
293 	assert(0);
294 }
295 
296 static inline int
297 accel_mlx5_sge_unwind(struct ibv_sge *sge, uint32_t sge_count, uint32_t step)
298 {
299 	int i;
300 
301 	assert(sge_count > 0);
302 	SPDK_DEBUGLOG(accel_mlx5, "sge %p, count %u, step %u\n", sge, sge_count, step);
303 	for (i = (int)sge_count - 1; i >= 0; i--) {
304 		if (sge[i].length > step) {
305 			sge[i].length -= step;
306 			SPDK_DEBUGLOG(accel_mlx5, "\tsge[%u] len %u, step %u\n", i, sge[i].length, step);
307 			return (int)i + 1;
308 		}
309 		SPDK_DEBUGLOG(accel_mlx5, "\tsge[%u] len %u, step %u\n", i, sge[i].length, step);
310 		step -= sge[i].length;
311 	}
312 
313 	SPDK_ERRLOG("Can't unwind sge, remaining  %u\n", step);
314 	assert(step == 0);
315 
316 	return 0;
317 }
318 
319 static inline void
320 accel_mlx5_crypto_task_complete(struct accel_mlx5_task *task)
321 {
322 	struct accel_mlx5_dev *dev = task->qp->dev;
323 
324 	assert(task->num_ops);
325 	spdk_mlx5_mkey_pool_put_bulk(dev->crypto_mkeys, task->mkeys, task->num_ops);
326 	spdk_accel_task_complete(&task->base, 0);
327 }
328 
329 static inline void
330 accel_mlx5_task_fail(struct accel_mlx5_task *task, int rc)
331 {
332 	struct accel_mlx5_dev *dev = task->qp->dev;
333 	struct spdk_accel_task *next;
334 	struct spdk_accel_sequence *seq;
335 	bool driver_seq;
336 
337 	assert(task->num_reqs == task->num_completed_reqs);
338 	SPDK_DEBUGLOG(accel_mlx5, "Fail task %p, opc %d, rc %d\n", task, task->base.op_code, rc);
339 
340 	if (task->num_ops) {
341 		if (task->mlx5_opcode == ACCEL_MLX5_OPC_CRYPTO || task->mlx5_opcode == ACCEL_MLX5_OPC_CRYPTO_MKEY) {
342 			spdk_mlx5_mkey_pool_put_bulk(dev->crypto_mkeys, task->mkeys, task->num_ops);
343 		}
344 		if (task->mlx5_opcode == ACCEL_MLX5_OPC_CRC32C) {
345 			spdk_mlx5_mkey_pool_put_bulk(dev->sig_mkeys, task->mkeys, task->num_ops);
346 			spdk_mempool_put(dev->dev_ctx->psv_pool, task->psv);
347 		}
348 		if (task->mlx5_opcode == ACCEL_MLX5_OPC_MKEY) {
349 			spdk_mlx5_mkey_pool_put_bulk(dev->mkeys, task->mkeys, task->num_ops);
350 		}
351 	}
352 	next = spdk_accel_sequence_next_task(&task->base);
353 	seq = task->base.seq;
354 	driver_seq = task->driver_seq;
355 
356 	assert(task->num_reqs == task->num_completed_reqs);
357 	SPDK_DEBUGLOG(accel_mlx5, "Fail task %p, opc %d, rc %d\n", task, task->mlx5_opcode, rc);
358 	spdk_accel_task_complete(&task->base, rc);
359 
360 	if (driver_seq) {
361 		struct spdk_io_channel *ch = task->qp->dev->ch;
362 
363 		assert(seq);
364 		if (next) {
365 			accel_mlx5_execute_sequence(ch, seq);
366 		} else {
367 			spdk_accel_sequence_continue(seq);
368 		}
369 	}
370 }
371 
372 static int
373 accel_mlx5_translate_addr(void *addr, size_t size, struct spdk_memory_domain *domain,
374 			  void *domain_ctx, struct accel_mlx5_dev *dev, struct ibv_sge *sge)
375 {
376 	struct spdk_rdma_utils_memory_translation map_translation;
377 	struct spdk_memory_domain_translation_result domain_translation;
378 	struct spdk_memory_domain_translation_ctx local_ctx;
379 	int rc;
380 
381 	if (domain) {
382 		domain_translation.size = sizeof(struct spdk_memory_domain_translation_result);
383 		local_ctx.size = sizeof(local_ctx);
384 		local_ctx.rdma.ibv_qp = dev->qp.verbs_qp;
385 		rc = spdk_memory_domain_translate_data(domain, domain_ctx, dev->dev_ctx->domain,
386 						       &local_ctx, addr, size, &domain_translation);
387 		if (spdk_unlikely(rc || domain_translation.iov_count != 1)) {
388 			SPDK_ERRLOG("Memory domain translation failed, addr %p, length %zu, iovcnt %u\n", addr, size,
389 				    domain_translation.iov_count);
390 			if (rc == 0) {
391 				rc = -EINVAL;
392 			}
393 
394 			return rc;
395 		}
396 		sge->lkey = domain_translation.rdma.lkey;
397 		sge->addr = (uint64_t) domain_translation.iov.iov_base;
398 		sge->length = domain_translation.iov.iov_len;
399 	} else {
400 		rc = spdk_rdma_utils_get_translation(dev->mmap, addr, size,
401 						     &map_translation);
402 		if (spdk_unlikely(rc)) {
403 			SPDK_ERRLOG("Memory translation failed, addr %p, length %zu\n", addr, size);
404 			return rc;
405 		}
406 		sge->lkey = spdk_rdma_utils_memory_translation_get_lkey(&map_translation);
407 		sge->addr = (uint64_t)addr;
408 		sge->length = size;
409 	}
410 
411 	return 0;
412 }
413 
414 static inline int
415 accel_mlx5_fill_block_sge(struct accel_mlx5_dev *dev, struct ibv_sge *sge,
416 			  struct accel_mlx5_iov_sgl *iovs, uint32_t len, uint32_t *_remaining,
417 			  struct spdk_memory_domain *domain, void *domain_ctx)
418 {
419 	void *addr;
420 	uint32_t remaining = len;
421 	uint32_t size;
422 	int i = 0;
423 	int rc;
424 
425 	while (remaining && i < (int)ACCEL_MLX5_MAX_SGE) {
426 		size = spdk_min(remaining, iovs->iov->iov_len - iovs->iov_offset);
427 		addr = (void *)iovs->iov->iov_base + iovs->iov_offset;
428 		rc = accel_mlx5_translate_addr(addr, size, domain, domain_ctx, dev, &sge[i]);
429 		if (spdk_unlikely(rc)) {
430 			return rc;
431 		}
432 		SPDK_DEBUGLOG(accel_mlx5, "\t sge[%d]: lkey %u, len %u, addr %"PRIx64"\n", i, sge[i].lkey,
433 			      sge[i].length, sge[i].addr);
434 		accel_mlx5_iov_sgl_advance(iovs, size);
435 		i++;
436 		assert(remaining >= size);
437 		remaining -= size;
438 	}
439 	*_remaining = remaining;
440 
441 	return i;
442 }
443 
444 static inline bool
445 accel_mlx5_compare_iovs(struct iovec *v1, struct iovec *v2, uint32_t iovcnt)
446 {
447 	return memcmp(v1, v2, sizeof(*v1) * iovcnt) == 0;
448 }
449 
450 static inline uint16_t
451 accel_mlx5_dev_get_available_slots(struct accel_mlx5_dev *dev, struct accel_mlx5_qp *qp)
452 {
453 	assert(qp->wrs_max >= qp->wrs_submitted);
454 	assert(dev->wrs_in_cq_max >= dev->wrs_in_cq);
455 
456 	/* Each time we produce only 1 CQE, so we need 1 CQ slot */
457 	if (spdk_unlikely(dev->wrs_in_cq == dev->wrs_in_cq_max)) {
458 		return 0;
459 	}
460 
461 	return qp->wrs_max - qp->wrs_submitted;
462 }
463 
464 static inline uint32_t
465 accel_mlx5_task_alloc_mkeys(struct accel_mlx5_task *task, struct spdk_mlx5_mkey_pool *pool)
466 {
467 	uint32_t num_ops;
468 	int rc;
469 
470 	assert(task->num_reqs > task->num_completed_reqs);
471 	num_ops = task->num_reqs - task->num_completed_reqs;
472 	num_ops = spdk_min(num_ops, ACCEL_MLX5_MAX_MKEYS_IN_TASK);
473 	if (!num_ops) {
474 		return 0;
475 	}
476 	rc = spdk_mlx5_mkey_pool_get_bulk(pool, task->mkeys, num_ops);
477 	if (spdk_unlikely(rc)) {
478 		return 0;
479 	}
480 	assert(num_ops <= UINT16_MAX);
481 	task->num_ops = num_ops;
482 
483 	return num_ops;
484 }
485 
486 static inline uint8_t
487 bs_to_bs_selector(uint32_t bs)
488 {
489 	switch (bs) {
490 	case 512:
491 		return SPDK_MLX5_BLOCK_SIZE_SELECTOR_512;
492 	case 520:
493 		return SPDK_MLX5_BLOCK_SIZE_SELECTOR_520;
494 	case 4096:
495 		return SPDK_MLX5_BLOCK_SIZE_SELECTOR_4096;
496 	case 4160:
497 		return SPDK_MLX5_BLOCK_SIZE_SELECTOR_4160;
498 	default:
499 		return SPDK_MLX5_BLOCK_SIZE_SELECTOR_RESERVED;
500 	}
501 }
502 
503 static inline int
504 accel_mlx5_configure_crypto_umr(struct accel_mlx5_task *mlx5_task, struct accel_mlx5_sge *sge,
505 				uint32_t mkey, uint32_t num_blocks, struct spdk_mlx5_crypto_dek_data *dek_data,
506 				uint64_t wr_id, uint32_t flags)
507 {
508 	struct spdk_mlx5_umr_crypto_attr cattr;
509 	struct spdk_mlx5_umr_attr umr_attr;
510 	struct accel_mlx5_qp *qp = mlx5_task->qp;
511 	struct accel_mlx5_dev *dev = qp->dev;
512 	struct spdk_accel_task *task = &mlx5_task->base;
513 	uint32_t length, remaining = 0, block_size = task->block_size;
514 	int rc;
515 
516 	length = num_blocks * block_size;
517 	SPDK_DEBUGLOG(accel_mlx5, "task %p, domain %p, len %u, blocks %u\n", task, task->src_domain, length,
518 		      num_blocks);
519 	rc = accel_mlx5_fill_block_sge(dev, sge->src_sge, &mlx5_task->src,  length, &remaining,
520 				       task->src_domain, task->src_domain_ctx);
521 	if (spdk_unlikely(rc <= 0)) {
522 		if (rc == 0) {
523 			rc = -EINVAL;
524 		}
525 		SPDK_ERRLOG("failed set src sge, rc %d\n", rc);
526 		return rc;
527 	}
528 	sge->src_sge_count = rc;
529 	if (spdk_unlikely(remaining)) {
530 		uint32_t new_len = length - remaining;
531 		uint32_t aligned_len, updated_num_blocks;
532 
533 		SPDK_DEBUGLOG(accel_mlx5, "Incorrect src iovs, handled %u out of %u bytes\n", new_len, length);
534 		if (new_len < block_size) {
535 			/* We need to process at least 1 block. If buffer is too fragmented, we can't do
536 			 * anything */
537 			return -ERANGE;
538 		}
539 
540 		/* Regular integer division, we need to round down to prev block size */
541 		updated_num_blocks = new_len / block_size;
542 		assert(updated_num_blocks);
543 		assert(updated_num_blocks < num_blocks);
544 		aligned_len = updated_num_blocks * block_size;
545 
546 		if (aligned_len < new_len) {
547 			uint32_t dt = new_len - aligned_len;
548 
549 			/* We can't process part of block, need to unwind src iov_sgl and sge to the
550 			 * prev block boundary */
551 			SPDK_DEBUGLOG(accel_mlx5, "task %p, unwind src sge for %u bytes\n", task, dt);
552 			accel_mlx5_iov_sgl_unwind(&mlx5_task->src, task->s.iovcnt, dt);
553 			sge->src_sge_count = accel_mlx5_sge_unwind(sge->src_sge, sge->src_sge_count, dt);
554 			if (!sge->src_sge_count) {
555 				return -ERANGE;
556 			}
557 		}
558 		SPDK_DEBUGLOG(accel_mlx5, "task %p, UMR len %u -> %u\n", task, length, aligned_len);
559 		length = aligned_len;
560 		num_blocks = updated_num_blocks;
561 	}
562 
563 	cattr.xts_iv = task->iv + mlx5_task->num_processed_blocks;
564 	cattr.keytag = 0;
565 	cattr.dek_obj_id = dek_data->dek_obj_id;
566 	cattr.tweak_mode = dek_data->tweak_mode;
567 	cattr.enc_order = mlx5_task->enc_order;
568 	cattr.bs_selector = bs_to_bs_selector(mlx5_task->base.block_size);
569 	if (spdk_unlikely(cattr.bs_selector == SPDK_MLX5_BLOCK_SIZE_SELECTOR_RESERVED)) {
570 		SPDK_ERRLOG("unsupported block size %u\n", mlx5_task->base.block_size);
571 		return -EINVAL;
572 	}
573 	umr_attr.mkey = mkey;
574 	umr_attr.sge = sge->src_sge;
575 
576 	if (!mlx5_task->inplace) {
577 		SPDK_DEBUGLOG(accel_mlx5, "task %p, dst sge, domain %p, len %u\n", task, task->dst_domain, length);
578 		rc = accel_mlx5_fill_block_sge(dev, sge->dst_sge, &mlx5_task->dst, length, &remaining,
579 					       task->dst_domain, task->dst_domain_ctx);
580 		if (spdk_unlikely(rc <= 0)) {
581 			if (rc == 0) {
582 				rc = -EINVAL;
583 			}
584 			SPDK_ERRLOG("failed set dst sge, rc %d\n", rc);
585 			return rc;
586 		}
587 		sge->dst_sge_count = rc;
588 		if (spdk_unlikely(remaining)) {
589 			uint32_t new_len = length - remaining;
590 			uint32_t aligned_len, updated_num_blocks, dt;
591 
592 			SPDK_DEBUGLOG(accel_mlx5, "Incorrect dst iovs, handled %u out of %u bytes\n", new_len, length);
593 			if (new_len < block_size) {
594 				/* We need to process at least 1 block. If buffer is too fragmented, we can't do
595 				 * anything */
596 				return -ERANGE;
597 			}
598 
599 			/* Regular integer division, we need to round down to prev block size */
600 			updated_num_blocks = new_len / block_size;
601 			assert(updated_num_blocks);
602 			assert(updated_num_blocks < num_blocks);
603 			aligned_len = updated_num_blocks * block_size;
604 
605 			if (aligned_len < new_len) {
606 				dt = new_len - aligned_len;
607 				assert(dt > 0 && dt < length);
608 				/* We can't process part of block, need to unwind src and dst iov_sgl and sge to the
609 				 * prev block boundary */
610 				SPDK_DEBUGLOG(accel_mlx5, "task %p, unwind dst sge for %u bytes\n", task, dt);
611 				accel_mlx5_iov_sgl_unwind(&mlx5_task->dst, task->d.iovcnt, dt);
612 				sge->dst_sge_count = accel_mlx5_sge_unwind(sge->dst_sge, sge->dst_sge_count, dt);
613 				assert(sge->dst_sge_count > 0 && sge->dst_sge_count <= ACCEL_MLX5_MAX_SGE);
614 				if (!sge->dst_sge_count) {
615 					return -ERANGE;
616 				}
617 			}
618 			assert(length > aligned_len);
619 			dt = length - aligned_len;
620 			SPDK_DEBUGLOG(accel_mlx5, "task %p, unwind src sge for %u bytes\n", task, dt);
621 			/* The same for src iov_sgl and sge. In worst case we can unwind SRC 2 times */
622 			accel_mlx5_iov_sgl_unwind(&mlx5_task->src, task->s.iovcnt, dt);
623 			sge->src_sge_count = accel_mlx5_sge_unwind(sge->src_sge, sge->src_sge_count, dt);
624 			assert(sge->src_sge_count > 0 && sge->src_sge_count <= ACCEL_MLX5_MAX_SGE);
625 			if (!sge->src_sge_count) {
626 				return -ERANGE;
627 			}
628 			SPDK_DEBUGLOG(accel_mlx5, "task %p, UMR len %u -> %u\n", task, length, aligned_len);
629 			length = aligned_len;
630 			num_blocks = updated_num_blocks;
631 		}
632 	}
633 
634 	SPDK_DEBUGLOG(accel_mlx5,
635 		      "task %p: bs %u, iv %"PRIu64", enc_on_tx %d, tweak_mode %d, len %u, mkey %x, blocks %u\n",
636 		      mlx5_task, task->block_size, cattr.xts_iv, mlx5_task->enc_order, cattr.tweak_mode, length, mkey,
637 		      num_blocks);
638 
639 	umr_attr.sge_count = sge->src_sge_count;
640 	umr_attr.umr_len = length;
641 	assert((uint32_t)mlx5_task->num_processed_blocks + num_blocks <= UINT16_MAX);
642 	mlx5_task->num_processed_blocks += num_blocks;
643 
644 	rc = spdk_mlx5_umr_configure_crypto(qp->qp, &umr_attr, &cattr, wr_id, flags);
645 
646 	return rc;
647 }
648 
649 static inline int
650 accel_mlx5_crypto_task_process(struct accel_mlx5_task *mlx5_task)
651 {
652 	struct accel_mlx5_sge sges[ACCEL_MLX5_MAX_MKEYS_IN_TASK];
653 	struct spdk_mlx5_crypto_dek_data dek_data;
654 	struct accel_mlx5_qp *qp = mlx5_task->qp;
655 	struct accel_mlx5_dev *dev = qp->dev;
656 	/* First RDMA after UMR must have a SMALL_FENCE */
657 	uint32_t first_rdma_fence = SPDK_MLX5_WQE_CTRL_INITIATOR_SMALL_FENCE;
658 	uint16_t num_blocks;
659 	uint16_t num_ops = spdk_min(mlx5_task->num_reqs - mlx5_task->num_completed_reqs,
660 				    mlx5_task->num_ops);
661 	uint16_t qp_slot = accel_mlx5_dev_get_available_slots(dev, qp);
662 	uint16_t i;
663 	int rc;
664 
665 	assert(qp_slot > 1);
666 	num_ops = spdk_min(num_ops, qp_slot >> 1);
667 	if (spdk_unlikely(!num_ops)) {
668 		return -EINVAL;
669 	}
670 
671 	rc = spdk_mlx5_crypto_get_dek_data(mlx5_task->base.crypto_key->priv, dev->dev_ctx->pd, &dek_data);
672 	if (spdk_unlikely(rc)) {
673 		return rc;
674 	}
675 
676 	mlx5_task->num_wrs = 0;
677 	SPDK_DEBUGLOG(accel_mlx5, "begin, task, %p, reqs: total %u, submitted %u, completed %u\n",
678 		      mlx5_task, mlx5_task->num_reqs, mlx5_task->num_submitted_reqs, mlx5_task->num_completed_reqs);
679 	for (i = 0; i < num_ops; i++) {
680 		if (mlx5_task->num_submitted_reqs + i + 1 == mlx5_task->num_reqs) {
681 			/* Last request may consume less than calculated if crypto_multi_block is true */
682 			assert(mlx5_task->num_blocks > mlx5_task->num_submitted_reqs);
683 			num_blocks = mlx5_task->num_blocks - mlx5_task->num_processed_blocks;
684 		} else {
685 			num_blocks = mlx5_task->blocks_per_req;
686 		}
687 
688 		rc = accel_mlx5_configure_crypto_umr(mlx5_task, &sges[i], mlx5_task->mkeys[i]->mkey, num_blocks,
689 						     &dek_data, 0, 0);
690 		if (spdk_unlikely(rc)) {
691 			SPDK_ERRLOG("UMR configure failed with %d\n", rc);
692 			return rc;
693 		}
694 		ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED(qp, mlx5_task);
695 		dev->stats.crypto_umrs++;
696 	}
697 
698 	/* Loop `num_ops - 1` for easy flags handling */
699 	for (i = 0; i < num_ops - 1; i++) {
700 		/* UMR is used as a destination for RDMA_READ - from UMR to sge */
701 		if (mlx5_task->inplace) {
702 			rc = spdk_mlx5_qp_rdma_read(qp->qp, sges[i].src_sge, sges[i].src_sge_count, 0,
703 						    mlx5_task->mkeys[i]->mkey, 0, first_rdma_fence);
704 		} else {
705 			rc = spdk_mlx5_qp_rdma_read(qp->qp, sges[i].dst_sge, sges[i].dst_sge_count, 0,
706 						    mlx5_task->mkeys[i]->mkey, 0, first_rdma_fence);
707 		}
708 		if (spdk_unlikely(rc)) {
709 			SPDK_ERRLOG("RDMA READ/WRITE failed with %d\n", rc);
710 			return rc;
711 		}
712 
713 		first_rdma_fence = 0;
714 		assert(mlx5_task->num_submitted_reqs < mlx5_task->num_reqs);
715 		assert(mlx5_task->num_submitted_reqs < UINT16_MAX);
716 		mlx5_task->num_submitted_reqs++;
717 		ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED(qp, mlx5_task);
718 		dev->stats.rdma_reads++;
719 	}
720 
721 	if (mlx5_task->inplace) {
722 		rc = spdk_mlx5_qp_rdma_read(qp->qp, sges[i].src_sge, sges[i].src_sge_count, 0,
723 					    mlx5_task->mkeys[i]->mkey, (uint64_t)mlx5_task, first_rdma_fence | SPDK_MLX5_WQE_CTRL_CE_CQ_UPDATE);
724 	} else {
725 		rc = spdk_mlx5_qp_rdma_read(qp->qp, sges[i].dst_sge, sges[i].dst_sge_count, 0,
726 					    mlx5_task->mkeys[i]->mkey, (uint64_t)mlx5_task, first_rdma_fence | SPDK_MLX5_WQE_CTRL_CE_CQ_UPDATE);
727 	}
728 	if (spdk_unlikely(rc)) {
729 		SPDK_ERRLOG("RDMA READ/WRITE failed with %d\n", rc);
730 		return rc;
731 	}
732 
733 	assert(mlx5_task->num_submitted_reqs < mlx5_task->num_reqs);
734 	assert(mlx5_task->num_submitted_reqs < UINT16_MAX);
735 	mlx5_task->num_submitted_reqs++;
736 	ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED_SIGNALED(dev, qp, mlx5_task);
737 	dev->stats.rdma_reads++;
738 	STAILQ_INSERT_TAIL(&qp->in_hw, mlx5_task, link);
739 
740 	if (spdk_unlikely(mlx5_task->num_submitted_reqs == mlx5_task->num_reqs &&
741 			  mlx5_task->num_blocks > mlx5_task->num_processed_blocks)) {
742 		/* We hit "out of sge
743 		 * entries" case with highly fragmented payload. In that case
744 		 * accel_mlx5_configure_crypto_umr function handled fewer data blocks than expected
745 		 * That means we need at least 1 more request to complete this task, this request will be
746 		 * executed once all submitted ones are completed */
747 		SPDK_DEBUGLOG(accel_mlx5, "task %p, processed %u/%u blocks, add extra req\n", mlx5_task,
748 			      mlx5_task->num_processed_blocks, mlx5_task->num_blocks);
749 		mlx5_task->num_reqs++;
750 	}
751 
752 	SPDK_DEBUGLOG(accel_mlx5, "end, task, %p, reqs: total %u, submitted %u, completed %u\n", mlx5_task,
753 		      mlx5_task->num_reqs, mlx5_task->num_submitted_reqs, mlx5_task->num_completed_reqs);
754 
755 	return 0;
756 }
757 
758 static inline int
759 accel_mlx5_crypto_task_continue(struct accel_mlx5_task *task)
760 {
761 	struct accel_mlx5_qp *qp = task->qp;
762 	struct accel_mlx5_dev *dev = qp->dev;
763 	uint16_t qp_slot = accel_mlx5_dev_get_available_slots(dev, qp);
764 
765 	assert(task->num_reqs > task->num_completed_reqs);
766 	if (task->num_ops == 0) {
767 		/* No mkeys allocated, try to allocate now */
768 		if (spdk_unlikely(!accel_mlx5_task_alloc_mkeys(task, dev->crypto_mkeys))) {
769 			/* Pool is empty, queue this task */
770 			STAILQ_INSERT_TAIL(&dev->nomem, task, link);
771 			dev->stats.nomem_mkey++;
772 			return -ENOMEM;
773 		}
774 	}
775 	/* We need to post at least 1 UMR and 1 RDMA operation */
776 	if (spdk_unlikely(qp_slot < 2)) {
777 		/* QP is full, queue this task */
778 		STAILQ_INSERT_TAIL(&dev->nomem, task, link);
779 		task->qp->dev->stats.nomem_qdepth++;
780 		return -ENOMEM;
781 	}
782 
783 	return accel_mlx5_crypto_task_process(task);
784 }
785 
786 static inline int
787 accel_mlx5_crypto_task_init(struct accel_mlx5_task *mlx5_task)
788 {
789 	struct spdk_accel_task *task = &mlx5_task->base;
790 	struct accel_mlx5_dev *dev = mlx5_task->qp->dev;
791 	uint64_t src_nbytes = task->nbytes;
792 #ifdef DEBUG
793 	uint64_t dst_nbytes;
794 	uint32_t i;
795 #endif
796 	bool crypto_key_ok;
797 
798 	crypto_key_ok = (task->crypto_key && task->crypto_key->module_if == &g_accel_mlx5.module &&
799 			 task->crypto_key->priv);
800 	if (spdk_unlikely((task->nbytes % mlx5_task->base.block_size != 0) || !crypto_key_ok)) {
801 		if (crypto_key_ok) {
802 			SPDK_ERRLOG("src length %"PRIu64" is not a multiple of the block size %u\n", task->nbytes,
803 				    mlx5_task->base.block_size);
804 		} else {
805 			SPDK_ERRLOG("Wrong crypto key provided\n");
806 		}
807 		return -EINVAL;
808 	}
809 
810 	assert(src_nbytes / mlx5_task->base.block_size <= UINT16_MAX);
811 	mlx5_task->num_blocks = src_nbytes / mlx5_task->base.block_size;
812 	accel_mlx5_iov_sgl_init(&mlx5_task->src, task->s.iovs, task->s.iovcnt);
813 	if (task->d.iovcnt == 0 || (task->d.iovcnt == task->s.iovcnt &&
814 				    accel_mlx5_compare_iovs(task->d.iovs, task->s.iovs, task->s.iovcnt))) {
815 		mlx5_task->inplace = 1;
816 	} else {
817 #ifdef DEBUG
818 		dst_nbytes = 0;
819 		for (i = 0; i < task->d.iovcnt; i++) {
820 			dst_nbytes += task->d.iovs[i].iov_len;
821 		}
822 
823 		if (spdk_unlikely(src_nbytes != dst_nbytes)) {
824 			return -EINVAL;
825 		}
826 #endif
827 		mlx5_task->inplace = 0;
828 		accel_mlx5_iov_sgl_init(&mlx5_task->dst, task->d.iovs, task->d.iovcnt);
829 	}
830 
831 	if (dev->crypto_multi_block) {
832 		if (dev->crypto_split_blocks) {
833 			assert(SPDK_CEIL_DIV(mlx5_task->num_blocks, dev->crypto_split_blocks) <= UINT16_MAX);
834 			mlx5_task->num_reqs = SPDK_CEIL_DIV(mlx5_task->num_blocks, dev->crypto_split_blocks);
835 			/* Last req may consume less blocks */
836 			mlx5_task->blocks_per_req = spdk_min(mlx5_task->num_blocks, dev->crypto_split_blocks);
837 		} else {
838 			if (task->s.iovcnt > ACCEL_MLX5_MAX_SGE || task->d.iovcnt > ACCEL_MLX5_MAX_SGE) {
839 				uint32_t max_sge_count = spdk_max(task->s.iovcnt, task->d.iovcnt);
840 
841 				assert(SPDK_CEIL_DIV(max_sge_count, ACCEL_MLX5_MAX_SGE) <= UINT16_MAX);
842 				mlx5_task->num_reqs = SPDK_CEIL_DIV(max_sge_count, ACCEL_MLX5_MAX_SGE);
843 				mlx5_task->blocks_per_req = SPDK_CEIL_DIV(mlx5_task->num_blocks, mlx5_task->num_reqs);
844 			} else {
845 				mlx5_task->num_reqs = 1;
846 				mlx5_task->blocks_per_req = mlx5_task->num_blocks;
847 			}
848 		}
849 	} else {
850 		mlx5_task->num_reqs = mlx5_task->num_blocks;
851 		mlx5_task->blocks_per_req = 1;
852 	}
853 
854 	if (spdk_unlikely(!accel_mlx5_task_alloc_mkeys(mlx5_task, dev->crypto_mkeys))) {
855 		/* Pool is empty, queue this task */
856 		SPDK_DEBUGLOG(accel_mlx5, "no reqs in pool, dev %s\n", dev->dev_ctx->context->device->name);
857 		dev->stats.nomem_mkey++;
858 		return -ENOMEM;
859 	}
860 	if (spdk_unlikely(accel_mlx5_dev_get_available_slots(dev, &dev->qp) < 2)) {
861 		/* Queue is full, queue this task */
862 		SPDK_DEBUGLOG(accel_mlx5, "dev %s qp %p is full\n", dev->dev_ctx->context->device->name,
863 			      mlx5_task->qp);
864 		dev->stats.nomem_qdepth++;
865 		return -ENOMEM;
866 	}
867 
868 	SPDK_DEBUGLOG(accel_mlx5, "task %p, src_iovs %u, dst_iovs %u, num_reqs %u, "
869 		      "blocks/req %u, blocks %u, inplace %d\n", task, task->s.iovcnt, task->d.iovcnt,
870 		      mlx5_task->num_reqs, mlx5_task->blocks_per_req, mlx5_task->num_blocks, mlx5_task->inplace);
871 
872 	return 0;
873 }
874 
875 static inline void
876 accel_mlx5_copy_task_complete(struct accel_mlx5_task *mlx5_task)
877 {
878 	spdk_accel_task_complete(&mlx5_task->base, 0);
879 }
880 
881 static inline int
882 accel_mlx5_copy_task_process_one(struct accel_mlx5_task *mlx5_task, struct accel_mlx5_qp *qp,
883 				 uint64_t wrid, uint32_t fence)
884 {
885 	struct spdk_accel_task *task = &mlx5_task->base;
886 	struct accel_mlx5_sge sge;
887 	uint32_t remaining = 0;
888 	uint32_t dst_len;
889 	int rc;
890 
891 	/* Limit one RDMA_WRITE by length of dst buffer. Not all src buffers may fit into one dst buffer due to
892 	 * limitation on ACCEL_MLX5_MAX_SGE. If this is the case then remaining is not zero */
893 	assert(mlx5_task->dst.iov->iov_len > mlx5_task->dst.iov_offset);
894 	dst_len = mlx5_task->dst.iov->iov_len - mlx5_task->dst.iov_offset;
895 	rc = accel_mlx5_fill_block_sge(qp->dev, sge.src_sge, &mlx5_task->src, dst_len, &remaining,
896 				       task->src_domain, task->src_domain_ctx);
897 	if (spdk_unlikely(rc <= 0)) {
898 		if (rc == 0) {
899 			rc = -EINVAL;
900 		}
901 		SPDK_ERRLOG("failed set src sge, rc %d\n", rc);
902 		return rc;
903 	}
904 	sge.src_sge_count = rc;
905 	assert(dst_len > remaining);
906 	dst_len -= remaining;
907 
908 	rc = accel_mlx5_fill_block_sge(qp->dev, sge.dst_sge, &mlx5_task->dst, dst_len,  &remaining,
909 				       task->dst_domain, task->dst_domain_ctx);
910 	if (spdk_unlikely(rc != 1)) {
911 		/* We use single dst entry, any result other than 1 is an error */
912 		if (rc == 0) {
913 			rc = -EINVAL;
914 		}
915 		SPDK_ERRLOG("failed set dst sge, rc %d\n", rc);
916 		return rc;
917 	}
918 	if (spdk_unlikely(remaining)) {
919 		SPDK_ERRLOG("Incorrect dst length, remaining %u\n", remaining);
920 		assert(0);
921 		return -EINVAL;
922 	}
923 
924 	rc = spdk_mlx5_qp_rdma_write(mlx5_task->qp->qp, sge.src_sge, sge.src_sge_count,
925 				     sge.dst_sge[0].addr, sge.dst_sge[0].lkey, wrid, fence);
926 	if (spdk_unlikely(rc)) {
927 		SPDK_ERRLOG("new RDMA WRITE failed with %d\n", rc);
928 		return rc;
929 	}
930 	qp->dev->stats.rdma_writes++;
931 
932 	return 0;
933 }
934 
935 static inline int
936 accel_mlx5_copy_task_process(struct accel_mlx5_task *mlx5_task)
937 {
938 
939 	struct accel_mlx5_qp *qp = mlx5_task->qp;
940 	struct accel_mlx5_dev *dev = qp->dev;
941 	uint16_t i;
942 	int rc;
943 
944 	mlx5_task->num_wrs = 0;
945 	assert(mlx5_task->num_reqs > 0);
946 	assert(mlx5_task->num_ops > 0);
947 
948 	/* Handle n-1 reqs in order to simplify wrid and fence handling */
949 	for (i = 0; i < mlx5_task->num_ops - 1; i++) {
950 		rc = accel_mlx5_copy_task_process_one(mlx5_task, qp, 0, 0);
951 		if (spdk_unlikely(rc)) {
952 			return rc;
953 		}
954 		ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED(qp, mlx5_task);
955 		mlx5_task->num_submitted_reqs++;
956 	}
957 
958 	rc = accel_mlx5_copy_task_process_one(mlx5_task, qp, (uint64_t)mlx5_task,
959 					      SPDK_MLX5_WQE_CTRL_CE_CQ_UPDATE);
960 	if (spdk_unlikely(rc)) {
961 		return rc;
962 	}
963 	ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED_SIGNALED(dev, qp, mlx5_task);
964 	mlx5_task->num_submitted_reqs++;
965 	STAILQ_INSERT_TAIL(&qp->in_hw, mlx5_task, link);
966 
967 	SPDK_DEBUGLOG(accel_mlx5, "end, copy task, %p\n", mlx5_task);
968 
969 	return 0;
970 }
971 
972 static inline int
973 accel_mlx5_copy_task_continue(struct accel_mlx5_task *task)
974 {
975 	struct accel_mlx5_qp *qp = task->qp;
976 	struct accel_mlx5_dev *dev = qp->dev;
977 	uint16_t qp_slot = accel_mlx5_dev_get_available_slots(dev, qp);
978 
979 	task->num_ops = spdk_min(qp_slot, task->num_reqs - task->num_completed_reqs);
980 	if (spdk_unlikely(task->num_ops == 0)) {
981 		STAILQ_INSERT_TAIL(&dev->nomem, task, link);
982 		dev->stats.nomem_qdepth++;
983 		return -ENOMEM;
984 	}
985 	return accel_mlx5_copy_task_process(task);
986 }
987 
988 static inline uint32_t
989 accel_mlx5_get_copy_task_count(struct iovec *src_iov, uint32_t src_iovcnt,
990 			       struct iovec *dst_iov, uint32_t dst_iovcnt)
991 {
992 	uint32_t src = 0;
993 	uint32_t dst = 0;
994 	uint64_t src_offset = 0;
995 	uint64_t dst_offset = 0;
996 	uint32_t num_ops = 0;
997 	uint32_t src_sge_count = 0;
998 
999 	while (src < src_iovcnt && dst < dst_iovcnt) {
1000 		uint64_t src_len = src_iov[src].iov_len - src_offset;
1001 		uint64_t dst_len = dst_iov[dst].iov_len - dst_offset;
1002 
1003 		if (dst_len < src_len) {
1004 			dst_offset = 0;
1005 			src_offset += dst_len;
1006 			dst++;
1007 			num_ops++;
1008 			src_sge_count = 0;
1009 		} else if (src_len < dst_len) {
1010 			dst_offset += src_len;
1011 			src_offset = 0;
1012 			src++;
1013 			if (++src_sge_count >= ACCEL_MLX5_MAX_SGE) {
1014 				num_ops++;
1015 				src_sge_count = 0;
1016 			}
1017 		} else {
1018 			dst_offset = 0;
1019 			src_offset = 0;
1020 			dst++;
1021 			src++;
1022 			num_ops++;
1023 			src_sge_count = 0;
1024 		}
1025 	}
1026 
1027 	assert(src == src_iovcnt);
1028 	assert(dst == dst_iovcnt);
1029 	assert(src_offset == 0);
1030 	assert(dst_offset == 0);
1031 	return num_ops;
1032 }
1033 
1034 static inline int
1035 accel_mlx5_copy_task_init(struct accel_mlx5_task *mlx5_task)
1036 {
1037 	struct spdk_accel_task *task = &mlx5_task->base;
1038 	struct accel_mlx5_qp *qp = mlx5_task->qp;
1039 	uint16_t qp_slot = accel_mlx5_dev_get_available_slots(qp->dev, qp);
1040 
1041 	if (spdk_likely(task->s.iovcnt <= ACCEL_MLX5_MAX_SGE)) {
1042 		mlx5_task->num_reqs = task->d.iovcnt;
1043 	} else if (task->d.iovcnt == 1) {
1044 		mlx5_task->num_reqs = SPDK_CEIL_DIV(task->s.iovcnt, ACCEL_MLX5_MAX_SGE);
1045 	} else {
1046 		mlx5_task->num_reqs = accel_mlx5_get_copy_task_count(task->s.iovs, task->s.iovcnt,
1047 				      task->d.iovs, task->d.iovcnt);
1048 	}
1049 	mlx5_task->inplace = 0;
1050 	accel_mlx5_iov_sgl_init(&mlx5_task->src, task->s.iovs, task->s.iovcnt);
1051 	accel_mlx5_iov_sgl_init(&mlx5_task->dst, task->d.iovs, task->d.iovcnt);
1052 	mlx5_task->num_ops = spdk_min(qp_slot, mlx5_task->num_reqs);
1053 	if (spdk_unlikely(!mlx5_task->num_ops)) {
1054 		qp->dev->stats.nomem_qdepth++;
1055 		return -ENOMEM;
1056 	}
1057 	SPDK_DEBUGLOG(accel_mlx5, "copy task num_reqs %u, num_ops %u\n", mlx5_task->num_reqs,
1058 		      mlx5_task->num_ops);
1059 
1060 	return 0;
1061 }
1062 
1063 static inline uint32_t
1064 accel_mlx5_advance_iovec(struct iovec *iov, uint32_t iovcnt, size_t *iov_offset, size_t *len)
1065 {
1066 	uint32_t i;
1067 	size_t iov_len;
1068 
1069 	for (i = 0; *len != 0 && i < iovcnt; i++) {
1070 		iov_len = iov[i].iov_len - *iov_offset;
1071 
1072 		if (iov_len < *len) {
1073 			*iov_offset = 0;
1074 			*len -= iov_len;
1075 			continue;
1076 		}
1077 		if (iov_len == *len) {
1078 			*iov_offset = 0;
1079 			i++;
1080 		} else { /* iov_len > *len */
1081 			*iov_offset += *len;
1082 		}
1083 		*len = 0;
1084 		break;
1085 	}
1086 
1087 	return i;
1088 }
1089 
1090 static inline void
1091 accel_mlx5_crc_task_complete(struct accel_mlx5_task *mlx5_task)
1092 {
1093 	struct accel_mlx5_dev *dev = mlx5_task->qp->dev;
1094 
1095 	*mlx5_task->base.crc_dst = mlx5_task->psv->crc ^ UINT32_MAX;
1096 	/* Normal task completion without allocated mkeys is not possible */
1097 	assert(mlx5_task->num_ops);
1098 	spdk_mlx5_mkey_pool_put_bulk(dev->sig_mkeys, mlx5_task->mkeys, mlx5_task->num_ops);
1099 	spdk_mempool_put(dev->dev_ctx->psv_pool, mlx5_task->psv);
1100 	spdk_accel_task_complete(&mlx5_task->base, 0);
1101 }
1102 
1103 static inline int
1104 accel_mlx5_crc_task_configure_umr(struct accel_mlx5_task *mlx5_task, struct ibv_sge *sge,
1105 				  uint32_t sge_count, struct spdk_mlx5_mkey_pool_obj *mkey,
1106 				  enum spdk_mlx5_umr_sig_domain sig_domain, uint32_t umr_len,
1107 				  bool sig_init, bool sig_check_gen)
1108 {
1109 	struct spdk_mlx5_umr_sig_attr sattr = {
1110 		.seed = mlx5_task->base.seed ^ UINT32_MAX,
1111 		.psv_index = mlx5_task->psv->psv_index,
1112 		.domain = sig_domain,
1113 		.sigerr_count = mkey->sig.sigerr_count,
1114 		.raw_data_size = umr_len,
1115 		.init = sig_init,
1116 		.check_gen = sig_check_gen,
1117 	};
1118 	struct spdk_mlx5_umr_attr umr_attr = {
1119 		.mkey = mkey->mkey,
1120 		.umr_len = umr_len,
1121 		.sge_count = sge_count,
1122 		.sge = sge,
1123 	};
1124 
1125 	return spdk_mlx5_umr_configure_sig(mlx5_task->qp->qp, &umr_attr, &sattr, 0, 0);
1126 }
1127 
1128 static inline int
1129 accel_mlx5_crc_task_fill_sge(struct accel_mlx5_task *mlx5_task, struct accel_mlx5_sge *sge)
1130 {
1131 	struct spdk_accel_task *task = &mlx5_task->base;
1132 	struct accel_mlx5_qp *qp = mlx5_task->qp;
1133 	struct accel_mlx5_dev *dev = qp->dev;
1134 	uint32_t remaining;
1135 	int rc;
1136 
1137 	rc = accel_mlx5_fill_block_sge(dev, sge->src_sge, &mlx5_task->src, task->nbytes, &remaining,
1138 				       task->src_domain, task->src_domain_ctx);
1139 	if (spdk_unlikely(rc <= 0)) {
1140 		if (rc == 0) {
1141 			rc = -EINVAL;
1142 		}
1143 		SPDK_ERRLOG("failed set src sge, rc %d\n", rc);
1144 		return rc;
1145 	}
1146 	assert(remaining == 0);
1147 	sge->src_sge_count = rc;
1148 
1149 	if (!mlx5_task->inplace) {
1150 		rc = accel_mlx5_fill_block_sge(dev, sge->dst_sge, &mlx5_task->dst, task->nbytes, &remaining,
1151 					       task->dst_domain, task->dst_domain_ctx);
1152 		if (spdk_unlikely(rc <= 0)) {
1153 			if (rc == 0) {
1154 				rc = -EINVAL;
1155 			}
1156 			SPDK_ERRLOG("failed set dst sge, rc %d\n", rc);
1157 			return rc;
1158 		}
1159 		assert(remaining == 0);
1160 		sge->dst_sge_count = rc;
1161 	}
1162 
1163 	return 0;
1164 }
1165 
1166 static inline int
1167 accel_mlx5_crc_task_process_one_req(struct accel_mlx5_task *mlx5_task)
1168 {
1169 	struct accel_mlx5_sge sges;
1170 	struct accel_mlx5_qp *qp = mlx5_task->qp;
1171 	struct accel_mlx5_dev *dev = qp->dev;
1172 	uint32_t num_ops = spdk_min(mlx5_task->num_reqs - mlx5_task->num_completed_reqs,
1173 				    mlx5_task->num_ops);
1174 	uint16_t qp_slot = accel_mlx5_dev_get_available_slots(dev, qp);
1175 	uint32_t rdma_fence = SPDK_MLX5_WQE_CTRL_STRONG_ORDERING;
1176 	struct ibv_sge *sge;
1177 	int rc;
1178 	uint16_t sge_count;
1179 
1180 	num_ops = spdk_min(num_ops, qp_slot >> 1);
1181 	if (spdk_unlikely(!num_ops)) {
1182 		return -EINVAL;
1183 	}
1184 
1185 	mlx5_task->num_wrs = 0;
1186 	/* At this moment we have as many requests as can be submitted to a qp */
1187 	rc = accel_mlx5_crc_task_fill_sge(mlx5_task, &sges);
1188 	if (spdk_unlikely(rc)) {
1189 		return rc;
1190 	}
1191 	rc = accel_mlx5_crc_task_configure_umr(mlx5_task, sges.src_sge, sges.src_sge_count,
1192 					       mlx5_task->mkeys[0], SPDK_MLX5_UMR_SIG_DOMAIN_WIRE, mlx5_task->base.nbytes, true, true);
1193 	if (spdk_unlikely(rc)) {
1194 		SPDK_ERRLOG("UMR configure failed with %d\n", rc);
1195 		return rc;
1196 	}
1197 	ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED(qp, mlx5_task);
1198 	dev->stats.sig_umrs++;
1199 
1200 	if (mlx5_task->inplace) {
1201 		sge = sges.src_sge;
1202 		sge_count = sges.src_sge_count;
1203 	} else {
1204 		sge = sges.dst_sge;
1205 		sge_count = sges.dst_sge_count;
1206 	}
1207 
1208 	/*
1209 	 * Add the crc destination to the end of sges. A free entry must be available for CRC
1210 	 * because the task init function reserved it.
1211 	 */
1212 	assert(sge_count < ACCEL_MLX5_MAX_SGE);
1213 	sge[sge_count].lkey = mlx5_task->psv->crc_lkey;
1214 	sge[sge_count].addr = (uintptr_t)&mlx5_task->psv->crc;
1215 	sge[sge_count++].length = sizeof(uint32_t);
1216 
1217 	if (spdk_unlikely(mlx5_task->psv->bits.error)) {
1218 		rc = spdk_mlx5_qp_set_psv(qp->qp, mlx5_task->psv->psv_index, *mlx5_task->base.crc_dst, 0, 0);
1219 		if (spdk_unlikely(rc)) {
1220 			SPDK_ERRLOG("SET_PSV failed with %d\n", rc);
1221 			return rc;
1222 		}
1223 		ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED(qp, mlx5_task);
1224 	}
1225 
1226 	rc = spdk_mlx5_qp_rdma_read(qp->qp, sge, sge_count, 0, mlx5_task->mkeys[0]->mkey,
1227 				    (uint64_t)mlx5_task, rdma_fence | SPDK_MLX5_WQE_CTRL_CE_CQ_UPDATE);
1228 	if (spdk_unlikely(rc)) {
1229 		SPDK_ERRLOG("RDMA READ/WRITE failed with %d\n", rc);
1230 		return rc;
1231 	}
1232 	mlx5_task->num_submitted_reqs++;
1233 	ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED_SIGNALED(dev, qp, mlx5_task);
1234 	dev->stats.rdma_reads++;
1235 
1236 	return 0;
1237 }
1238 
1239 static inline int
1240 accel_mlx5_crc_task_fill_umr_sge(struct accel_mlx5_qp *qp, struct ibv_sge *sge,
1241 				 struct accel_mlx5_iov_sgl *umr_iovs, struct spdk_memory_domain *domain,
1242 				 void *domain_ctx, struct accel_mlx5_iov_sgl *rdma_iovs, size_t *len)
1243 {
1244 	int umr_idx = 0;
1245 	int rdma_idx = 0;
1246 	int umr_iovcnt = spdk_min(umr_iovs->iovcnt, (int)ACCEL_MLX5_MAX_SGE);
1247 	int rdma_iovcnt = spdk_min(rdma_iovs->iovcnt, (int)ACCEL_MLX5_MAX_SGE);
1248 	size_t umr_iov_offset;
1249 	size_t rdma_iov_offset;
1250 	size_t umr_len = 0;
1251 	void *sge_addr;
1252 	size_t sge_len;
1253 	size_t umr_sge_len;
1254 	size_t rdma_sge_len;
1255 	int rc;
1256 
1257 	umr_iov_offset = umr_iovs->iov_offset;
1258 	rdma_iov_offset = rdma_iovs->iov_offset;
1259 
1260 	while (umr_idx < umr_iovcnt && rdma_idx < rdma_iovcnt) {
1261 		umr_sge_len = umr_iovs->iov[umr_idx].iov_len - umr_iov_offset;
1262 		rdma_sge_len = rdma_iovs->iov[rdma_idx].iov_len - rdma_iov_offset;
1263 		sge_addr = umr_iovs->iov[umr_idx].iov_base + umr_iov_offset;
1264 
1265 		if (umr_sge_len == rdma_sge_len) {
1266 			rdma_idx++;
1267 			umr_iov_offset = 0;
1268 			rdma_iov_offset = 0;
1269 			sge_len = umr_sge_len;
1270 		} else if (umr_sge_len < rdma_sge_len) {
1271 			umr_iov_offset = 0;
1272 			rdma_iov_offset += umr_sge_len;
1273 			sge_len = umr_sge_len;
1274 		} else {
1275 			size_t remaining;
1276 
1277 			remaining = umr_sge_len - rdma_sge_len;
1278 			while (remaining) {
1279 				rdma_idx++;
1280 				if (rdma_idx == (int)ACCEL_MLX5_MAX_SGE) {
1281 					break;
1282 				}
1283 				rdma_sge_len = rdma_iovs->iov[rdma_idx].iov_len;
1284 				if (remaining == rdma_sge_len) {
1285 					rdma_idx++;
1286 					rdma_iov_offset = 0;
1287 					umr_iov_offset = 0;
1288 					remaining = 0;
1289 					break;
1290 				}
1291 				if (remaining < rdma_sge_len) {
1292 					rdma_iov_offset = remaining;
1293 					umr_iov_offset = 0;
1294 					remaining = 0;
1295 					break;
1296 				}
1297 				remaining -= rdma_sge_len;
1298 			}
1299 			sge_len = umr_sge_len - remaining;
1300 		}
1301 		rc = accel_mlx5_translate_addr(sge_addr, sge_len, domain, domain_ctx, qp->dev, &sge[umr_idx]);
1302 		if (spdk_unlikely(rc)) {
1303 			return -EINVAL;
1304 		}
1305 		SPDK_DEBUGLOG(accel_mlx5, "\t sge[%d] lkey %u, addr %p, len %u\n", umr_idx, sge[umr_idx].lkey,
1306 			      (void *)sge[umr_idx].addr, sge[umr_idx].length);
1307 		umr_len += sge_len;
1308 		umr_idx++;
1309 	}
1310 	accel_mlx5_iov_sgl_advance(umr_iovs, umr_len);
1311 	accel_mlx5_iov_sgl_advance(rdma_iovs, umr_len);
1312 	*len = umr_len;
1313 
1314 	return umr_idx;
1315 }
1316 
1317 static inline int
1318 accel_mlx5_crc_task_process_multi_req(struct accel_mlx5_task *mlx5_task)
1319 {
1320 	size_t umr_len[ACCEL_MLX5_MAX_MKEYS_IN_TASK];
1321 	struct ibv_sge sges[ACCEL_MLX5_MAX_SGE];
1322 	struct spdk_accel_task *task = &mlx5_task->base;
1323 	struct accel_mlx5_qp *qp = mlx5_task->qp;
1324 	struct accel_mlx5_dev *dev = qp->dev;
1325 	struct accel_mlx5_iov_sgl umr_sgl;
1326 	struct accel_mlx5_iov_sgl *umr_sgl_ptr;
1327 	struct accel_mlx5_iov_sgl rdma_sgl;
1328 	uint64_t umr_offset;
1329 	uint32_t rdma_fence = SPDK_MLX5_WQE_CTRL_INITIATOR_SMALL_FENCE;
1330 	int sge_count;
1331 	uint32_t remaining;
1332 	int rc;
1333 	uint16_t i;
1334 	uint16_t num_ops = spdk_min(mlx5_task->num_reqs - mlx5_task->num_completed_reqs,
1335 				    mlx5_task->num_ops);
1336 	uint16_t qp_slot = accel_mlx5_dev_get_available_slots(dev, qp);
1337 	bool sig_init, sig_check_gen = false;
1338 
1339 	num_ops = spdk_min(num_ops, qp_slot >> 1);
1340 	if (spdk_unlikely(!num_ops)) {
1341 		return -EINVAL;
1342 	}
1343 	/* Init signature on the first UMR */
1344 	sig_init = !mlx5_task->num_submitted_reqs;
1345 
1346 	/*
1347 	 * accel_mlx5_crc_task_fill_umr_sge() and accel_mlx5_fill_block_sge() advance an IOV during iteration
1348 	 * on it. We must copy accel_mlx5_iov_sgl to iterate twice or more on the same IOV.
1349 	 *
1350 	 * In the in-place case, we iterate on the source IOV three times. That's why we need two copies of
1351 	 * the source accel_mlx5_iov_sgl.
1352 	 *
1353 	 * In the out-of-place case, we iterate on the source IOV once and on the destination IOV two times.
1354 	 * So, we need one copy of the destination accel_mlx5_iov_sgl.
1355 	 */
1356 	if (mlx5_task->inplace) {
1357 		accel_mlx5_iov_sgl_init(&umr_sgl, mlx5_task->src.iov, mlx5_task->src.iovcnt);
1358 		umr_sgl_ptr = &umr_sgl;
1359 		accel_mlx5_iov_sgl_init(&rdma_sgl, mlx5_task->src.iov, mlx5_task->src.iovcnt);
1360 	} else {
1361 		umr_sgl_ptr = &mlx5_task->src;
1362 		accel_mlx5_iov_sgl_init(&rdma_sgl, mlx5_task->dst.iov, mlx5_task->dst.iovcnt);
1363 	}
1364 	mlx5_task->num_wrs = 0;
1365 	for (i = 0; i < num_ops; i++) {
1366 		/*
1367 		 * The last request may have only CRC. Skip UMR in this case because the MKey from
1368 		 * the previous request is used.
1369 		 */
1370 		if (umr_sgl_ptr->iovcnt == 0) {
1371 			assert((mlx5_task->num_completed_reqs + i + 1) == mlx5_task->num_reqs);
1372 			break;
1373 		}
1374 		sge_count = accel_mlx5_crc_task_fill_umr_sge(qp, sges, umr_sgl_ptr, task->src_domain,
1375 				task->src_domain_ctx, &rdma_sgl, &umr_len[i]);
1376 		if (spdk_unlikely(sge_count <= 0)) {
1377 			rc = (sge_count == 0) ? -EINVAL : sge_count;
1378 			SPDK_ERRLOG("failed set UMR sge, rc %d\n", rc);
1379 			return rc;
1380 		}
1381 		if (umr_sgl_ptr->iovcnt == 0) {
1382 			/*
1383 			 * We post RDMA without UMR if the last request has only CRC. We use an MKey from
1384 			 * the last UMR in this case. Since the last request can be postponed to the next
1385 			 * call of this function, we must save the MKey to the task structure.
1386 			 */
1387 			mlx5_task->last_umr_len = umr_len[i];
1388 			mlx5_task->last_mkey_idx = i;
1389 			sig_check_gen = true;
1390 		}
1391 		rc = accel_mlx5_crc_task_configure_umr(mlx5_task, sges, sge_count, mlx5_task->mkeys[i],
1392 						       SPDK_MLX5_UMR_SIG_DOMAIN_WIRE, umr_len[i], sig_init,
1393 						       sig_check_gen);
1394 		if (spdk_unlikely(rc)) {
1395 			SPDK_ERRLOG("UMR configure failed with %d\n", rc);
1396 			return rc;
1397 		}
1398 		sig_init = false;
1399 		ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED(qp, mlx5_task);
1400 		dev->stats.sig_umrs++;
1401 	}
1402 
1403 	if (spdk_unlikely(mlx5_task->psv->bits.error)) {
1404 		rc = spdk_mlx5_qp_set_psv(qp->qp, mlx5_task->psv->psv_index, *mlx5_task->base.crc_dst, 0, 0);
1405 		if (spdk_unlikely(rc)) {
1406 			SPDK_ERRLOG("SET_PSV failed with %d\n", rc);
1407 			return rc;
1408 		}
1409 		ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED(qp, mlx5_task);
1410 	}
1411 
1412 	for (i = 0; i < num_ops - 1; i++) {
1413 		if (mlx5_task->inplace) {
1414 			sge_count = accel_mlx5_fill_block_sge(dev, sges, &mlx5_task->src, umr_len[i], &remaining,
1415 							      task->src_domain, task->src_domain_ctx);
1416 		} else {
1417 			sge_count = accel_mlx5_fill_block_sge(dev, sges, &mlx5_task->dst, umr_len[i], &remaining,
1418 							      task->dst_domain, task->dst_domain_ctx);
1419 		}
1420 		if (spdk_unlikely(sge_count <= 0)) {
1421 			rc = (sge_count == 0) ? -EINVAL : sge_count;
1422 			SPDK_ERRLOG("failed set RDMA sge, rc %d\n", rc);
1423 			return rc;
1424 		}
1425 		rc = spdk_mlx5_qp_rdma_read(qp->qp, sges, sge_count, 0, mlx5_task->mkeys[i]->mkey,
1426 					    0, rdma_fence);
1427 		if (spdk_unlikely(rc)) {
1428 			SPDK_ERRLOG("RDMA READ/WRITE failed with %d\n", rc);
1429 			return rc;
1430 		}
1431 		mlx5_task->num_submitted_reqs++;
1432 		ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED(qp, mlx5_task);
1433 		dev->stats.rdma_reads++;
1434 		rdma_fence = SPDK_MLX5_WQE_CTRL_STRONG_ORDERING;
1435 	}
1436 	if ((mlx5_task->inplace && mlx5_task->src.iovcnt == 0) || (!mlx5_task->inplace &&
1437 			mlx5_task->dst.iovcnt == 0)) {
1438 		/*
1439 		 * The last RDMA does not have any data, only CRC. It also does not have a paired Mkey.
1440 		 * The CRC is handled in the previous MKey in this case.
1441 		 */
1442 		sge_count = 0;
1443 		umr_offset = mlx5_task->last_umr_len;
1444 	} else {
1445 		umr_offset = 0;
1446 		mlx5_task->last_mkey_idx = i;
1447 		if (mlx5_task->inplace) {
1448 			sge_count = accel_mlx5_fill_block_sge(dev, sges, &mlx5_task->src, umr_len[i], &remaining,
1449 							      task->src_domain, task->src_domain_ctx);
1450 		} else {
1451 			sge_count = accel_mlx5_fill_block_sge(dev, sges, &mlx5_task->dst, umr_len[i], &remaining,
1452 							      task->dst_domain, task->dst_domain_ctx);
1453 		}
1454 		if (spdk_unlikely(sge_count <= 0)) {
1455 			rc = (sge_count == 0) ? -EINVAL : sge_count;
1456 			SPDK_ERRLOG("failed set RDMA sge, rc %d\n", rc);
1457 			return rc;
1458 		}
1459 		assert(remaining == 0);
1460 	}
1461 	if ((mlx5_task->num_completed_reqs + i + 1) == mlx5_task->num_reqs) {
1462 		/* Ensure that there is a free sge for the CRC destination. */
1463 		assert(sge_count < (int)ACCEL_MLX5_MAX_SGE);
1464 		/* Add the crc destination to the end of sges. */
1465 		sges[sge_count].lkey = mlx5_task->psv->crc_lkey;
1466 		sges[sge_count].addr = (uintptr_t)&mlx5_task->psv->crc;
1467 		sges[sge_count++].length = sizeof(uint32_t);
1468 	}
1469 	rdma_fence |= SPDK_MLX5_WQE_CTRL_CE_CQ_UPDATE;
1470 	rc = spdk_mlx5_qp_rdma_read(qp->qp, sges, sge_count, umr_offset,
1471 				    mlx5_task->mkeys[mlx5_task->last_mkey_idx]->mkey,
1472 				    (uint64_t)mlx5_task, rdma_fence);
1473 	if (spdk_unlikely(rc)) {
1474 		SPDK_ERRLOG("RDMA READ/WRITE failed with %d\n", rc);
1475 		return rc;
1476 	}
1477 	mlx5_task->num_submitted_reqs++;
1478 	ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED_SIGNALED(dev, qp, mlx5_task);
1479 	dev->stats.rdma_reads++;
1480 
1481 	return 0;
1482 }
1483 
1484 static inline int
1485 accel_mlx5_crc_task_process(struct accel_mlx5_task *mlx5_task)
1486 {
1487 	int rc;
1488 
1489 	assert(mlx5_task->mlx5_opcode == ACCEL_MLX5_OPC_CRC32C);
1490 
1491 	SPDK_DEBUGLOG(accel_mlx5, "begin, crc task, %p, reqs: total %u, submitted %u, completed %u\n",
1492 		      mlx5_task, mlx5_task->num_reqs, mlx5_task->num_submitted_reqs, mlx5_task->num_completed_reqs);
1493 
1494 	if (mlx5_task->num_reqs == 1) {
1495 		rc = accel_mlx5_crc_task_process_one_req(mlx5_task);
1496 	} else {
1497 		rc = accel_mlx5_crc_task_process_multi_req(mlx5_task);
1498 	}
1499 
1500 	if (rc == 0) {
1501 		STAILQ_INSERT_TAIL(&mlx5_task->qp->in_hw, mlx5_task, link);
1502 		SPDK_DEBUGLOG(accel_mlx5, "end, crc task, %p, reqs: total %u, submitted %u, completed %u\n",
1503 			      mlx5_task, mlx5_task->num_reqs, mlx5_task->num_submitted_reqs,
1504 			      mlx5_task->num_completed_reqs);
1505 	}
1506 
1507 	return rc;
1508 }
1509 
1510 static inline int
1511 accel_mlx5_task_alloc_crc_ctx(struct accel_mlx5_task *task, uint32_t qp_slot)
1512 {
1513 	struct accel_mlx5_qp *qp = task->qp;
1514 	struct accel_mlx5_dev *dev = qp->dev;
1515 
1516 	if (spdk_unlikely(!accel_mlx5_task_alloc_mkeys(task, dev->sig_mkeys))) {
1517 		SPDK_DEBUGLOG(accel_mlx5, "no mkeys in signature mkey pool, dev %s\n",
1518 			      dev->dev_ctx->context->device->name);
1519 		dev->stats.nomem_mkey++;
1520 		return -ENOMEM;
1521 	}
1522 	task->psv = spdk_mempool_get(dev->dev_ctx->psv_pool);
1523 	if (spdk_unlikely(!task->psv)) {
1524 		SPDK_DEBUGLOG(accel_mlx5, "no reqs in psv pool, dev %s\n", dev->dev_ctx->context->device->name);
1525 		spdk_mlx5_mkey_pool_put_bulk(dev->sig_mkeys, task->mkeys, task->num_ops);
1526 		task->num_ops = 0;
1527 		dev->stats.nomem_mkey++;
1528 		return -ENOMEM;
1529 	}
1530 	/* One extra slot is needed for SET_PSV WQE to reset the error state in PSV. */
1531 	if (spdk_unlikely(task->psv->bits.error)) {
1532 		uint32_t n_slots = task->num_ops * 2 + 1;
1533 
1534 		if (qp_slot < n_slots) {
1535 			spdk_mempool_put(dev->dev_ctx->psv_pool, task->psv);
1536 			spdk_mlx5_mkey_pool_put_bulk(dev->sig_mkeys, task->mkeys, task->num_ops);
1537 			dev->stats.nomem_qdepth++;
1538 			task->num_ops = 0;
1539 			return -ENOMEM;
1540 		}
1541 	}
1542 
1543 	return 0;
1544 }
1545 
1546 static inline int
1547 accel_mlx5_crc_task_continue(struct accel_mlx5_task *task)
1548 {
1549 	struct accel_mlx5_qp *qp = task->qp;
1550 	struct accel_mlx5_dev *dev = qp->dev;
1551 	uint16_t qp_slot = accel_mlx5_dev_get_available_slots(dev, qp);
1552 	int rc;
1553 
1554 	assert(task->num_reqs > task->num_completed_reqs);
1555 	if (task->num_ops == 0) {
1556 		/* No mkeys allocated, try to allocate now. */
1557 		rc = accel_mlx5_task_alloc_crc_ctx(task, qp_slot);
1558 		if (spdk_unlikely(rc)) {
1559 			STAILQ_INSERT_TAIL(&dev->nomem, task, link);
1560 			return -ENOMEM;
1561 		}
1562 	}
1563 	/* We need to post at least 1 UMR and 1 RDMA operation */
1564 	if (spdk_unlikely(qp_slot < 2)) {
1565 		STAILQ_INSERT_TAIL(&dev->nomem, task, link);
1566 		dev->stats.nomem_qdepth++;
1567 		return -ENOMEM;
1568 	}
1569 
1570 	return accel_mlx5_crc_task_process(task);
1571 }
1572 
1573 static inline uint32_t
1574 accel_mlx5_get_crc_task_count(struct iovec *src_iov, uint32_t src_iovcnt, struct iovec *dst_iov,
1575 			      uint32_t dst_iovcnt)
1576 {
1577 	uint32_t src_idx = 0;
1578 	uint32_t dst_idx = 0;
1579 	uint32_t num_ops = 1;
1580 	uint32_t num_src_sge = 1;
1581 	uint32_t num_dst_sge = 1;
1582 	size_t src_offset = 0;
1583 	size_t dst_offset = 0;
1584 	uint32_t num_sge;
1585 	size_t src_len;
1586 	size_t dst_len;
1587 
1588 	/* One operation is enough if both iovs fit into ACCEL_MLX5_MAX_SGE. One SGE is reserved for CRC on dst_iov. */
1589 	if (src_iovcnt <= ACCEL_MLX5_MAX_SGE && (dst_iovcnt + 1) <= ACCEL_MLX5_MAX_SGE) {
1590 		return 1;
1591 	}
1592 
1593 	while (src_idx < src_iovcnt && dst_idx < dst_iovcnt) {
1594 		if (num_src_sge > ACCEL_MLX5_MAX_SGE || num_dst_sge > ACCEL_MLX5_MAX_SGE) {
1595 			num_ops++;
1596 			num_src_sge = 1;
1597 			num_dst_sge = 1;
1598 		}
1599 		src_len = src_iov[src_idx].iov_len - src_offset;
1600 		dst_len = dst_iov[dst_idx].iov_len - dst_offset;
1601 
1602 		if (src_len == dst_len) {
1603 			num_src_sge++;
1604 			num_dst_sge++;
1605 			src_offset = 0;
1606 			dst_offset = 0;
1607 			src_idx++;
1608 			dst_idx++;
1609 			continue;
1610 		}
1611 		if (src_len < dst_len) {
1612 			/* Advance src_iov to reach the point that corresponds to the end of the current dst_iov. */
1613 			num_sge = accel_mlx5_advance_iovec(&src_iov[src_idx],
1614 							   spdk_min(ACCEL_MLX5_MAX_SGE + 1 - num_src_sge,
1615 									   src_iovcnt - src_idx),
1616 							   &src_offset, &dst_len);
1617 			src_idx += num_sge;
1618 			num_src_sge += num_sge;
1619 			if (dst_len != 0) {
1620 				/*
1621 				 * ACCEL_MLX5_MAX_SGE is reached on src_iov, and dst_len bytes
1622 				 * are left on the current dst_iov.
1623 				 */
1624 				dst_offset = dst_iov[dst_idx].iov_len - dst_len;
1625 			} else {
1626 				/* The src_iov advance is completed, shift to the next dst_iov. */
1627 				dst_idx++;
1628 				num_dst_sge++;
1629 				dst_offset = 0;
1630 			}
1631 		} else { /* src_len > dst_len */
1632 			/* Advance dst_iov to reach the point that corresponds to the end of the current src_iov. */
1633 			num_sge = accel_mlx5_advance_iovec(&dst_iov[dst_idx],
1634 							   spdk_min(ACCEL_MLX5_MAX_SGE + 1 - num_dst_sge,
1635 									   dst_iovcnt - dst_idx),
1636 							   &dst_offset, &src_len);
1637 			dst_idx += num_sge;
1638 			num_dst_sge += num_sge;
1639 			if (src_len != 0) {
1640 				/*
1641 				 * ACCEL_MLX5_MAX_SGE is reached on dst_iov, and src_len bytes
1642 				 * are left on the current src_iov.
1643 				 */
1644 				src_offset = src_iov[src_idx].iov_len - src_len;
1645 			} else {
1646 				/* The dst_iov advance is completed, shift to the next src_iov. */
1647 				src_idx++;
1648 				num_src_sge++;
1649 				src_offset = 0;
1650 			}
1651 		}
1652 	}
1653 	/* An extra operation is needed if no space is left on dst_iov because CRC takes one SGE. */
1654 	if (num_dst_sge > ACCEL_MLX5_MAX_SGE) {
1655 		num_ops++;
1656 	}
1657 
1658 	/* The above loop must reach the end of both iovs simultaneously because their size is the same. */
1659 	assert(src_idx == src_iovcnt);
1660 	assert(dst_idx == dst_iovcnt);
1661 	assert(src_offset == 0);
1662 	assert(dst_offset == 0);
1663 
1664 	return num_ops;
1665 }
1666 
1667 static inline int
1668 accel_mlx5_crc_task_init(struct accel_mlx5_task *mlx5_task)
1669 {
1670 	struct spdk_accel_task *task = &mlx5_task->base;
1671 	struct accel_mlx5_qp *qp = mlx5_task->qp;
1672 	uint32_t qp_slot = accel_mlx5_dev_get_available_slots(qp->dev, qp);
1673 	int rc;
1674 
1675 	accel_mlx5_iov_sgl_init(&mlx5_task->src, task->s.iovs, task->s.iovcnt);
1676 	if (mlx5_task->inplace) {
1677 		/* One entry is reserved for CRC */
1678 		mlx5_task->num_reqs = SPDK_CEIL_DIV(mlx5_task->src.iovcnt + 1, ACCEL_MLX5_MAX_SGE);
1679 	} else {
1680 		accel_mlx5_iov_sgl_init(&mlx5_task->dst, task->d.iovs, task->d.iovcnt);
1681 		mlx5_task->num_reqs = accel_mlx5_get_crc_task_count(mlx5_task->src.iov, mlx5_task->src.iovcnt,
1682 				      mlx5_task->dst.iov, mlx5_task->dst.iovcnt);
1683 	}
1684 
1685 	rc = accel_mlx5_task_alloc_crc_ctx(mlx5_task, qp_slot);
1686 	if (spdk_unlikely(rc)) {
1687 		return rc;
1688 	}
1689 
1690 	if (spdk_unlikely(qp_slot < 2)) {
1691 		/* Queue is full, queue this task */
1692 		SPDK_DEBUGLOG(accel_mlx5, "dev %s qp %p is full\n", qp->dev->dev_ctx->context->device->name,
1693 			      mlx5_task->qp);
1694 		qp->dev->stats.nomem_qdepth++;
1695 		return -ENOMEM;
1696 	}
1697 	return 0;
1698 }
1699 
1700 static inline int
1701 accel_mlx5_crypto_mkey_task_init(struct accel_mlx5_task *mlx5_task)
1702 {
1703 	struct spdk_accel_task *task = &mlx5_task->base;
1704 	struct accel_mlx5_qp *qp = mlx5_task->qp;
1705 	struct accel_mlx5_dev *dev = qp->dev;
1706 	uint32_t num_blocks;
1707 	int rc;
1708 	uint16_t qp_slot = accel_mlx5_dev_get_available_slots(dev, qp);
1709 	bool crypto_key_ok;
1710 
1711 	if (spdk_unlikely(task->s.iovcnt > ACCEL_MLX5_MAX_SGE)) {
1712 		/* With `external mkey` we can't split task or register several UMRs */
1713 		SPDK_ERRLOG("src buffer is too fragmented\n");
1714 		return -EINVAL;
1715 	}
1716 	if (spdk_unlikely(task->src_domain == spdk_accel_get_memory_domain())) {
1717 		SPDK_ERRLOG("accel domain is not supported\n");
1718 		return -ENOTSUP;
1719 	}
1720 	if (spdk_unlikely(spdk_accel_sequence_next_task(task) != NULL)) {
1721 		SPDK_ERRLOG("Mkey registration is only supported for single task\n");
1722 		return -ENOTSUP;
1723 	}
1724 
1725 	crypto_key_ok = (task->crypto_key && task->crypto_key->module_if == &g_accel_mlx5.module &&
1726 			 task->crypto_key->priv);
1727 	if (spdk_unlikely(!crypto_key_ok)) {
1728 		SPDK_ERRLOG("Wrong crypto key provided\n");
1729 		return -EINVAL;
1730 	}
1731 	if (spdk_unlikely(task->nbytes % mlx5_task->base.block_size != 0)) {
1732 		SPDK_ERRLOG("src length %"PRIu64" is not a multiple of the block size %u\n", task->nbytes,
1733 			    mlx5_task->base.block_size);
1734 		return -EINVAL;
1735 	}
1736 
1737 	num_blocks = task->nbytes / mlx5_task->base.block_size;
1738 	if (dev->crypto_multi_block) {
1739 		if (spdk_unlikely(g_accel_mlx5.attr.crypto_split_blocks &&
1740 				  num_blocks > g_accel_mlx5.attr.crypto_split_blocks)) {
1741 			SPDK_ERRLOG("Number of blocks in task %u exceeds split threshold %u, can't handle\n",
1742 				    num_blocks, g_accel_mlx5.attr.crypto_split_blocks);
1743 			return -E2BIG;
1744 		}
1745 	} else if (num_blocks != 1) {
1746 		SPDK_ERRLOG("Task contains more than 1 block, can't handle\n");
1747 		return -E2BIG;
1748 	}
1749 
1750 	accel_mlx5_iov_sgl_init(&mlx5_task->src, task->s.iovs, task->s.iovcnt);
1751 	mlx5_task->num_blocks = num_blocks;
1752 	mlx5_task->num_processed_blocks = 0;
1753 	mlx5_task->num_reqs = 1;
1754 	mlx5_task->blocks_per_req = num_blocks;
1755 
1756 	if (spdk_unlikely(qp_slot == 0)) {
1757 		mlx5_task->num_ops = 0;
1758 		dev->stats.nomem_qdepth++;
1759 		return -ENOMEM;
1760 	}
1761 	rc = spdk_mlx5_mkey_pool_get_bulk(dev->crypto_mkeys, mlx5_task->mkeys, 1);
1762 	if (spdk_unlikely(rc)) {
1763 		mlx5_task->num_ops = 0;
1764 		dev->stats.nomem_mkey++;
1765 		return -ENOMEM;
1766 	}
1767 	mlx5_task->num_ops = 1;
1768 
1769 	SPDK_DEBUGLOG(accel_mlx5, "crypto_mkey task num_blocks %u, src_len %zu\n", mlx5_task->num_reqs,
1770 		      task->nbytes);
1771 
1772 	return 0;
1773 }
1774 
1775 static inline int
1776 accel_mlx5_crypto_mkey_task_process(struct accel_mlx5_task *mlx5_task)
1777 {
1778 	struct accel_mlx5_sge sge;
1779 	struct spdk_accel_task *task = &mlx5_task->base;
1780 	struct accel_mlx5_qp *qp = mlx5_task->qp;
1781 	struct accel_mlx5_dev *dev = qp->dev;
1782 	struct spdk_mlx5_crypto_dek_data dek_data;
1783 	int rc;
1784 
1785 	if (spdk_unlikely(!mlx5_task->num_ops)) {
1786 		return -EINVAL;
1787 	}
1788 	SPDK_DEBUGLOG(accel_mlx5, "begin, task %p, dst_domain_ctx %p\n", mlx5_task, task->dst_domain_ctx);
1789 
1790 	mlx5_task->num_wrs = 0;
1791 	rc = spdk_mlx5_crypto_get_dek_data(task->crypto_key->priv, dev->dev_ctx->pd, &dek_data);
1792 	if (spdk_unlikely(rc)) {
1793 		return rc;
1794 	}
1795 
1796 	rc = accel_mlx5_configure_crypto_umr(mlx5_task, &sge, mlx5_task->mkeys[0]->mkey,
1797 					     mlx5_task->num_blocks, &dek_data, (uint64_t)mlx5_task, SPDK_MLX5_WQE_CTRL_CE_CQ_UPDATE);
1798 	if (spdk_unlikely(rc)) {
1799 		SPDK_ERRLOG("UMR configure failed with %d\n", rc);
1800 		return rc;
1801 	}
1802 	dev->stats.crypto_umrs++;
1803 	mlx5_task->num_submitted_reqs++;
1804 	ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED_SIGNALED(dev, qp, mlx5_task);
1805 	STAILQ_INSERT_TAIL(&qp->in_hw, mlx5_task, link);
1806 
1807 	SPDK_DEBUGLOG(accel_mlx5, "end, task %p, dst_domain_ctx %p\n", mlx5_task, task->dst_domain_ctx);
1808 
1809 	return 0;
1810 }
1811 
1812 static inline int
1813 accel_mlx5_crypto_mkey_task_continue(struct accel_mlx5_task *task)
1814 {
1815 	struct accel_mlx5_qp *qp = task->qp;
1816 	struct accel_mlx5_dev *dev = qp->dev;
1817 	int rc;
1818 	uint16_t qp_slot = accel_mlx5_dev_get_available_slots(dev, qp);
1819 
1820 	if (task->num_ops == 0) {
1821 		rc = spdk_mlx5_mkey_pool_get_bulk(dev->crypto_mkeys, task->mkeys, 1);
1822 		if (spdk_unlikely(rc)) {
1823 			dev->stats.nomem_mkey++;
1824 			STAILQ_INSERT_TAIL(&dev->nomem, task, link);
1825 			return -ENOMEM;
1826 		}
1827 		task->num_ops = 1;
1828 	}
1829 	if (spdk_unlikely(qp_slot == 0)) {
1830 		dev->stats.nomem_qdepth++;
1831 		STAILQ_INSERT_TAIL(&dev->nomem, task, link);
1832 		return -ENOMEM;
1833 	}
1834 	return accel_mlx5_crypto_mkey_task_process(task);
1835 }
1836 
1837 static inline void
1838 accel_mlx5_crypto_mkey_task_complete(struct accel_mlx5_task *mlx5_task)
1839 {
1840 	struct accel_mlx5_dev *dev = mlx5_task->qp->dev;
1841 
1842 	assert(mlx5_task->num_ops);
1843 	assert(mlx5_task->num_processed_blocks == mlx5_task->num_blocks);
1844 	assert(mlx5_task->base.seq);
1845 
1846 	spdk_mlx5_mkey_pool_put_bulk(dev->crypto_mkeys, mlx5_task->mkeys, 1);
1847 	spdk_accel_task_complete(&mlx5_task->base, 0);
1848 }
1849 
1850 static inline int
1851 accel_mlx5_mkey_task_init(struct accel_mlx5_task *mlx5_task)
1852 {
1853 	struct spdk_accel_task *task = &mlx5_task->base;
1854 	struct accel_mlx5_qp *qp = mlx5_task->qp;
1855 	struct accel_mlx5_dev *dev = qp->dev;
1856 	int rc;
1857 	uint16_t qp_slot = accel_mlx5_dev_get_available_slots(dev, qp);
1858 
1859 	if (spdk_unlikely(task->s.iovcnt > ACCEL_MLX5_MAX_SGE)) {
1860 		/* With `external mkey` we can't split task or register several UMRs */
1861 		SPDK_ERRLOG("src buffer is too fragmented\n");
1862 		return -EINVAL;
1863 	}
1864 	if (spdk_unlikely(task->src_domain == spdk_accel_get_memory_domain())) {
1865 		SPDK_ERRLOG("accel domain is not supported\n");
1866 		return -EINVAL;
1867 	}
1868 	if (spdk_unlikely(spdk_accel_sequence_next_task(task) != NULL)) {
1869 		SPDK_ERRLOG("Mkey registration is only supported for single task\n");
1870 		return -ENOTSUP;
1871 	}
1872 
1873 	accel_mlx5_iov_sgl_init(&mlx5_task->src, task->s.iovs, task->s.iovcnt);
1874 	mlx5_task->num_reqs = 1;
1875 
1876 	if (spdk_unlikely(qp_slot == 0)) {
1877 		mlx5_task->num_ops = 0;
1878 		dev->stats.nomem_qdepth++;
1879 		return -ENOMEM;
1880 	}
1881 	rc = spdk_mlx5_mkey_pool_get_bulk(dev->mkeys, mlx5_task->mkeys, 1);
1882 	if (spdk_unlikely(rc)) {
1883 		mlx5_task->num_ops = 0;
1884 		dev->stats.nomem_mkey++;
1885 		return -ENOMEM;
1886 	}
1887 	mlx5_task->num_ops = 1;
1888 
1889 	SPDK_DEBUGLOG(accel_mlx5, "crypto_mkey task num_blocks %u, src_len %zu\n", mlx5_task->num_reqs,
1890 		      task->nbytes);
1891 
1892 	return 0;
1893 }
1894 
1895 static inline int
1896 accel_mlx5_mkey_task_process(struct accel_mlx5_task *mlx5_task)
1897 {
1898 	struct spdk_mlx5_umr_attr umr_attr;
1899 	struct ibv_sge src_sge[ACCEL_MLX5_MAX_SGE];
1900 	struct spdk_accel_task *task = &mlx5_task->base;
1901 	struct accel_mlx5_qp *qp = mlx5_task->qp;
1902 	struct accel_mlx5_dev *dev = qp->dev;
1903 	uint32_t remaining = 0;
1904 	int rc;
1905 
1906 	if (spdk_unlikely(!mlx5_task->num_ops)) {
1907 		return -EINVAL;
1908 	}
1909 	SPDK_DEBUGLOG(accel_mlx5, "begin, task %p, dst_domain_ctx %p\n", mlx5_task, task->dst_domain_ctx);
1910 
1911 	mlx5_task->num_wrs = 0;
1912 
1913 	rc = accel_mlx5_fill_block_sge(dev, src_sge, &mlx5_task->src,  task->nbytes, &remaining,
1914 				       task->src_domain, task->src_domain_ctx);
1915 	if (spdk_unlikely(rc <= 0 || remaining)) {
1916 		rc = rc ? rc : -EINVAL;
1917 		SPDK_ERRLOG("Failed to set src sge, rc %d, remaining %u\n", rc, remaining);
1918 		return rc;
1919 	}
1920 	umr_attr.mkey = mlx5_task->mkeys[0]->mkey;
1921 	umr_attr.sge = src_sge;
1922 	umr_attr.sge_count = rc;
1923 	umr_attr.umr_len = task->nbytes;
1924 
1925 	rc = spdk_mlx5_umr_configure(qp->qp, &umr_attr, (uint64_t)mlx5_task,
1926 				     SPDK_MLX5_WQE_CTRL_CE_CQ_UPDATE);
1927 	if (spdk_unlikely(rc)) {
1928 		SPDK_ERRLOG("UMR configure failed with %d\n", rc);
1929 		return rc;
1930 	}
1931 	dev->stats.umrs++;
1932 	mlx5_task->num_submitted_reqs++;
1933 	ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED_SIGNALED(dev, qp, mlx5_task);
1934 	STAILQ_INSERT_TAIL(&qp->in_hw, mlx5_task, link);
1935 
1936 	SPDK_DEBUGLOG(accel_mlx5, "end, task %p, dst_domain_ctx %p\n", mlx5_task, task->dst_domain_ctx);
1937 
1938 	return 0;
1939 }
1940 
1941 static inline int
1942 accel_mlx5_mkey_task_continue(struct accel_mlx5_task *task)
1943 {
1944 	struct accel_mlx5_qp *qp = task->qp;
1945 	struct accel_mlx5_dev *dev = qp->dev;
1946 	int rc;
1947 	uint16_t qp_slot = accel_mlx5_dev_get_available_slots(dev, qp);
1948 
1949 	if (task->num_ops == 0) {
1950 		rc = spdk_mlx5_mkey_pool_get_bulk(dev->mkeys, task->mkeys, 1);
1951 		if (spdk_unlikely(rc)) {
1952 			dev->stats.nomem_mkey++;
1953 			STAILQ_INSERT_TAIL(&dev->nomem, task, link);
1954 			return -ENOMEM;
1955 		}
1956 		task->num_ops = 1;
1957 	}
1958 	if (spdk_unlikely(qp_slot == 0)) {
1959 		dev->stats.nomem_qdepth++;
1960 		STAILQ_INSERT_TAIL(&dev->nomem, task, link);
1961 		return -ENOMEM;
1962 	}
1963 	return accel_mlx5_mkey_task_process(task);
1964 }
1965 
1966 static inline void
1967 accel_mlx5_mkey_task_complete(struct accel_mlx5_task *mlx5_task)
1968 {
1969 	struct accel_mlx5_dev *dev = mlx5_task->qp->dev;
1970 
1971 	assert(mlx5_task->num_ops);
1972 	assert(mlx5_task->base.seq);
1973 
1974 	spdk_mlx5_mkey_pool_put_bulk(dev->mkeys, mlx5_task->mkeys, 1);
1975 	spdk_accel_task_complete(&mlx5_task->base, 0);
1976 }
1977 
1978 static int
1979 accel_mlx5_task_op_not_implemented(struct accel_mlx5_task *mlx5_task)
1980 {
1981 	SPDK_ERRLOG("wrong function called\n");
1982 	SPDK_UNREACHABLE();
1983 }
1984 
1985 static void
1986 accel_mlx5_task_op_not_implemented_v(struct accel_mlx5_task *mlx5_task)
1987 {
1988 	SPDK_ERRLOG("wrong function called\n");
1989 	SPDK_UNREACHABLE();
1990 }
1991 
1992 static int
1993 accel_mlx5_task_op_not_supported(struct accel_mlx5_task *mlx5_task)
1994 {
1995 	SPDK_ERRLOG("Unsupported opcode %d\n", mlx5_task->base.op_code);
1996 
1997 	return -ENOTSUP;
1998 }
1999 
2000 static struct accel_mlx5_task_operations g_accel_mlx5_tasks_ops[] = {
2001 	[ACCEL_MLX5_OPC_COPY] = {
2002 		.init = accel_mlx5_copy_task_init,
2003 		.process = accel_mlx5_copy_task_process,
2004 		.cont = accel_mlx5_copy_task_continue,
2005 		.complete = accel_mlx5_copy_task_complete,
2006 	},
2007 	[ACCEL_MLX5_OPC_CRYPTO] = {
2008 		.init = accel_mlx5_crypto_task_init,
2009 		.process = accel_mlx5_crypto_task_process,
2010 		.cont = accel_mlx5_crypto_task_continue,
2011 		.complete = accel_mlx5_crypto_task_complete,
2012 	},
2013 	[ACCEL_MLX5_OPC_CRC32C] = {
2014 		.init = accel_mlx5_crc_task_init,
2015 		.process = accel_mlx5_crc_task_process,
2016 		.cont = accel_mlx5_crc_task_continue,
2017 		.complete = accel_mlx5_crc_task_complete,
2018 	},
2019 	[ACCEL_MLX5_OPC_CRYPTO_MKEY] = {
2020 		.init = accel_mlx5_crypto_mkey_task_init,
2021 		.process = accel_mlx5_crypto_mkey_task_process,
2022 		.cont = accel_mlx5_crypto_mkey_task_continue,
2023 		.complete = accel_mlx5_crypto_mkey_task_complete,
2024 	},
2025 	[ACCEL_MLX5_OPC_MKEY] = {
2026 		.init = accel_mlx5_mkey_task_init,
2027 		.process = accel_mlx5_mkey_task_process,
2028 		.cont = accel_mlx5_mkey_task_continue,
2029 		.complete = accel_mlx5_mkey_task_complete,
2030 	},
2031 	[ACCEL_MLX5_OPC_LAST] = {
2032 		.init = accel_mlx5_task_op_not_supported,
2033 		.process = accel_mlx5_task_op_not_implemented,
2034 		.cont = accel_mlx5_task_op_not_implemented,
2035 		.complete = accel_mlx5_task_op_not_implemented_v
2036 	},
2037 };
2038 
2039 static void
2040 accel_mlx5_memory_domain_transfer_cpl(void *ctx, int rc)
2041 {
2042 	struct accel_mlx5_task *task = ctx;
2043 
2044 	assert(task->needs_data_transfer);
2045 	task->needs_data_transfer = 0;
2046 
2047 	if (spdk_likely(!rc)) {
2048 		SPDK_DEBUGLOG(accel_mlx5, "task %p, data transfer done\n", task);
2049 		accel_mlx5_task_complete(task);
2050 	} else {
2051 		SPDK_ERRLOG("Task %p, data transfer failed, rc %d\n", task, rc);
2052 		accel_mlx5_task_fail(task, rc);
2053 	}
2054 }
2055 
2056 static inline void
2057 accel_mlx5_memory_domain_transfer(struct accel_mlx5_task *task)
2058 {
2059 	struct spdk_memory_domain_translation_result translation;
2060 	struct spdk_accel_task *base = &task->base;
2061 	struct accel_mlx5_dev *dev = task->qp->dev;
2062 	int rc;
2063 
2064 	assert(task->mlx5_opcode == ACCEL_MLX5_OPC_CRYPTO_MKEY || task->mlx5_opcode == ACCEL_MLX5_OPC_MKEY);
2065 	/* UMR is an offset in the addess space, so the start address is 0 */
2066 	translation.iov.iov_base = NULL;
2067 	translation.iov.iov_len = base->nbytes;
2068 	translation.iov_count = 1;
2069 	translation.size = sizeof(translation);
2070 	translation.rdma.rkey = task->mkeys[0]->mkey;
2071 	translation.rdma.lkey = task->mkeys[0]->mkey;
2072 
2073 	SPDK_DEBUGLOG(accel_mlx5, "start transfer, task %p, dst_domain_ctx %p, mkey %u\n", task,
2074 		      task->base.dst_domain_ctx, task->mkeys[0]->mkey);
2075 	rc = spdk_memory_domain_transfer_data(base->dst_domain, base->dst_domain_ctx, &translation.iov, 1,
2076 					      dev->dev_ctx->domain, task, &translation.iov, 1, &translation,
2077 					      accel_mlx5_memory_domain_transfer_cpl, task);
2078 	if (spdk_unlikely(rc)) {
2079 		SPDK_ERRLOG("Failed to start data transfer, task %p rc %d\n", task, rc);
2080 		accel_mlx5_task_fail(task, rc);
2081 	}
2082 }
2083 
2084 static inline void
2085 accel_mlx5_task_complete(struct accel_mlx5_task *task)
2086 {
2087 	struct spdk_accel_sequence *seq = task->base.seq;
2088 	struct spdk_accel_task *next;
2089 	bool driver_seq;
2090 
2091 	if (task->needs_data_transfer) {
2092 		accel_mlx5_memory_domain_transfer(task);
2093 		return;
2094 	}
2095 
2096 	next = spdk_accel_sequence_next_task(&task->base);
2097 	driver_seq = task->driver_seq;
2098 
2099 	assert(task->num_reqs == task->num_completed_reqs);
2100 	SPDK_DEBUGLOG(accel_mlx5, "Complete task %p, opc %d\n", task, task->mlx5_opcode);
2101 
2102 	g_accel_mlx5_tasks_ops[task->mlx5_opcode].complete(task);
2103 
2104 	if (driver_seq) {
2105 		struct spdk_io_channel *ch = task->qp->dev->ch;
2106 
2107 		assert(seq);
2108 		if (next) {
2109 			accel_mlx5_execute_sequence(ch, seq);
2110 		} else {
2111 			spdk_accel_sequence_continue(seq);
2112 		}
2113 	}
2114 }
2115 
2116 static inline int
2117 accel_mlx5_task_continue(struct accel_mlx5_task *task)
2118 {
2119 	struct accel_mlx5_qp *qp = task->qp;
2120 	struct accel_mlx5_dev *dev = qp->dev;
2121 
2122 	if (spdk_unlikely(qp->recovering)) {
2123 		STAILQ_INSERT_TAIL(&dev->nomem, task, link);
2124 		return 0;
2125 	}
2126 
2127 	return g_accel_mlx5_tasks_ops[task->mlx5_opcode].cont(task);
2128 }
2129 static inline void
2130 accel_mlx5_task_init_opcode(struct accel_mlx5_task *mlx5_task)
2131 {
2132 	uint8_t base_opcode = mlx5_task->base.op_code;
2133 
2134 	switch (base_opcode) {
2135 	case SPDK_ACCEL_OPC_COPY:
2136 		mlx5_task->mlx5_opcode = ACCEL_MLX5_OPC_COPY;
2137 		break;
2138 	case SPDK_ACCEL_OPC_ENCRYPT:
2139 		assert(g_accel_mlx5.crypto_supported);
2140 		mlx5_task->enc_order = SPDK_MLX5_ENCRYPTION_ORDER_ENCRYPTED_RAW_WIRE;
2141 		mlx5_task->mlx5_opcode =  ACCEL_MLX5_OPC_CRYPTO;
2142 		break;
2143 	case SPDK_ACCEL_OPC_DECRYPT:
2144 		assert(g_accel_mlx5.crypto_supported);
2145 		mlx5_task->enc_order = SPDK_MLX5_ENCRYPTION_ORDER_ENCRYPTED_RAW_MEMORY;
2146 		mlx5_task->mlx5_opcode = ACCEL_MLX5_OPC_CRYPTO;
2147 		break;
2148 	case SPDK_ACCEL_OPC_CRC32C:
2149 		mlx5_task->inplace = 1;
2150 		mlx5_task->mlx5_opcode = ACCEL_MLX5_OPC_CRC32C;
2151 		break;
2152 	case SPDK_ACCEL_OPC_COPY_CRC32C:
2153 		mlx5_task->inplace = 0;
2154 		mlx5_task->mlx5_opcode = ACCEL_MLX5_OPC_CRC32C;
2155 		break;
2156 	default:
2157 		SPDK_ERRLOG("wrong opcode %d\n", base_opcode);
2158 		mlx5_task->mlx5_opcode = ACCEL_MLX5_OPC_LAST;
2159 	}
2160 }
2161 
2162 static void
2163 accel_mlx5_post_poller_handler(void *fn_arg)
2164 {
2165 	struct accel_mlx5_io_channel *ch = fn_arg;
2166 	struct accel_mlx5_dev *dev;
2167 	uint32_t i;
2168 
2169 	for (i = 0; i < ch->num_devs; i++) {
2170 		dev = &ch->devs[i];
2171 
2172 		if (dev->qp.ring_db) {
2173 			spdk_mlx5_qp_complete_send(dev->qp.qp);
2174 			dev->qp.ring_db = false;
2175 		}
2176 	}
2177 
2178 	ch->poller_handler_registered = false;
2179 }
2180 
2181 static inline int
2182 _accel_mlx5_submit_tasks(struct accel_mlx5_io_channel *accel_ch, struct spdk_accel_task *task)
2183 {
2184 	struct accel_mlx5_task *mlx5_task = SPDK_CONTAINEROF(task, struct accel_mlx5_task, base);
2185 	struct accel_mlx5_dev *dev = mlx5_task->qp->dev;
2186 	int rc;
2187 
2188 	/* We should not receive any tasks if the module was not enabled */
2189 	assert(g_accel_mlx5.enabled);
2190 
2191 	dev->stats.opcodes[mlx5_task->mlx5_opcode]++;
2192 	rc = g_accel_mlx5_tasks_ops[mlx5_task->mlx5_opcode].init(mlx5_task);
2193 	if (spdk_unlikely(rc)) {
2194 		if (rc == -ENOMEM) {
2195 			SPDK_DEBUGLOG(accel_mlx5, "no reqs to handle new task %p (required %u), put to queue\n", mlx5_task,
2196 				      mlx5_task->num_reqs);
2197 			STAILQ_INSERT_TAIL(&dev->nomem, mlx5_task, link);
2198 			return 0;
2199 		}
2200 		SPDK_ERRLOG("Task opc %d init failed, rc %d\n", task->op_code, rc);
2201 		return rc;
2202 	}
2203 
2204 	if (spdk_unlikely(mlx5_task->qp->recovering)) {
2205 		STAILQ_INSERT_TAIL(&dev->nomem, mlx5_task, link);
2206 		return 0;
2207 	}
2208 
2209 	if (!accel_ch->poller_handler_registered) {
2210 		spdk_thread_register_post_poller_handler(accel_mlx5_post_poller_handler, accel_ch);
2211 		/* Function above may fail to register our handler, in that case we ring doorbells on next polling
2212 		 * cycle. That is less efficient but still works */
2213 		accel_ch->poller_handler_registered = true;
2214 	}
2215 
2216 	return g_accel_mlx5_tasks_ops[mlx5_task->mlx5_opcode].process(mlx5_task);
2217 }
2218 
2219 static inline void
2220 accel_mlx5_task_assign_qp(struct accel_mlx5_task *mlx5_task, struct accel_mlx5_io_channel *accel_ch)
2221 {
2222 	struct accel_mlx5_dev *dev;
2223 
2224 	dev = &accel_ch->devs[accel_ch->dev_idx];
2225 	accel_ch->dev_idx++;
2226 	if (accel_ch->dev_idx == accel_ch->num_devs) {
2227 		accel_ch->dev_idx = 0;
2228 	}
2229 
2230 	mlx5_task->qp = &dev->qp;
2231 }
2232 
2233 static inline void
2234 accel_mlx5_task_reset(struct accel_mlx5_task *mlx5_task)
2235 {
2236 	mlx5_task->num_completed_reqs = 0;
2237 	mlx5_task->num_submitted_reqs = 0;
2238 	mlx5_task->num_ops = 0;
2239 	mlx5_task->num_processed_blocks = 0;
2240 	mlx5_task->raw = 0;
2241 }
2242 
2243 static int
2244 accel_mlx5_submit_tasks(struct spdk_io_channel *ch, struct spdk_accel_task *task)
2245 {
2246 	struct accel_mlx5_task *mlx5_task = SPDK_CONTAINEROF(task, struct accel_mlx5_task, base);
2247 	struct accel_mlx5_io_channel *accel_ch = spdk_io_channel_get_ctx(ch);
2248 
2249 	accel_mlx5_task_assign_qp(mlx5_task, accel_ch);
2250 	accel_mlx5_task_reset(mlx5_task);
2251 	accel_mlx5_task_init_opcode(mlx5_task);
2252 
2253 	return _accel_mlx5_submit_tasks(accel_ch, task);
2254 }
2255 
2256 static void accel_mlx5_recover_qp(struct accel_mlx5_qp *qp);
2257 
2258 static int
2259 accel_mlx5_recover_qp_poller(void *arg)
2260 {
2261 	struct accel_mlx5_qp *qp = arg;
2262 
2263 	spdk_poller_unregister(&qp->recover_poller);
2264 	accel_mlx5_recover_qp(qp);
2265 	return SPDK_POLLER_BUSY;
2266 }
2267 
2268 static void
2269 accel_mlx5_recover_qp(struct accel_mlx5_qp *qp)
2270 {
2271 	struct accel_mlx5_dev *dev = qp->dev;
2272 	struct spdk_mlx5_qp_attr mlx5_qp_attr = {};
2273 	int rc;
2274 
2275 	SPDK_NOTICELOG("Recovering qp %p, core %u\n", qp, spdk_env_get_current_core());
2276 	if (qp->qp) {
2277 		spdk_mlx5_qp_destroy(qp->qp);
2278 		qp->qp = NULL;
2279 	}
2280 
2281 	mlx5_qp_attr.cap.max_send_wr = g_accel_mlx5.attr.qp_size;
2282 	mlx5_qp_attr.cap.max_recv_wr = 0;
2283 	mlx5_qp_attr.cap.max_send_sge = ACCEL_MLX5_MAX_SGE;
2284 	mlx5_qp_attr.cap.max_inline_data = sizeof(struct ibv_sge) * ACCEL_MLX5_MAX_SGE;
2285 
2286 	rc = spdk_mlx5_qp_create(dev->dev_ctx->pd, dev->cq, &mlx5_qp_attr, &qp->qp);
2287 	if (rc) {
2288 		SPDK_ERRLOG("Failed to create mlx5 dma QP, rc %d. Retry in %d usec\n",
2289 			    rc, ACCEL_MLX5_RECOVER_POLLER_PERIOD_US);
2290 		qp->recover_poller = SPDK_POLLER_REGISTER(accel_mlx5_recover_qp_poller, qp,
2291 				     ACCEL_MLX5_RECOVER_POLLER_PERIOD_US);
2292 		return;
2293 	}
2294 
2295 	qp->recovering = false;
2296 }
2297 
2298 static inline void
2299 accel_mlx5_process_error_cpl(struct spdk_mlx5_cq_completion *wc, struct accel_mlx5_task *task)
2300 {
2301 	struct accel_mlx5_qp *qp = task->qp;
2302 
2303 	if (wc->status != IBV_WC_WR_FLUSH_ERR) {
2304 		SPDK_WARNLOG("RDMA: qp %p, task %p, WC status %d, core %u\n",
2305 			     qp, task, wc->status, spdk_env_get_current_core());
2306 	} else {
2307 		SPDK_DEBUGLOG(accel_mlx5,
2308 			      "RDMA: qp %p, task %p, WC status %d, core %u\n",
2309 			      qp, task, wc->status, spdk_env_get_current_core());
2310 	}
2311 
2312 	qp->recovering = true;
2313 	assert(task->num_completed_reqs <= task->num_submitted_reqs);
2314 	if (task->num_completed_reqs == task->num_submitted_reqs) {
2315 		STAILQ_REMOVE_HEAD(&qp->in_hw, link);
2316 		accel_mlx5_task_fail(task, -EIO);
2317 	}
2318 }
2319 
2320 static inline int64_t
2321 accel_mlx5_poll_cq(struct accel_mlx5_dev *dev)
2322 {
2323 	struct spdk_mlx5_cq_completion wc[ACCEL_MLX5_MAX_WC];
2324 	struct accel_mlx5_task *task;
2325 	struct accel_mlx5_qp *qp;
2326 	int reaped, i, rc;
2327 	uint16_t completed;
2328 
2329 	dev->stats.polls++;
2330 	reaped = spdk_mlx5_cq_poll_completions(dev->cq, wc, ACCEL_MLX5_MAX_WC);
2331 	if (spdk_unlikely(reaped < 0)) {
2332 		SPDK_ERRLOG("Error polling CQ! (%d): %s\n", errno, spdk_strerror(errno));
2333 		return reaped;
2334 	} else if (reaped == 0) {
2335 		dev->stats.idle_polls++;
2336 		return 0;
2337 	}
2338 	dev->stats.completions += reaped;
2339 
2340 	SPDK_DEBUGLOG(accel_mlx5, "Reaped %d cpls on dev %s\n", reaped,
2341 		      dev->dev_ctx->context->device->name);
2342 
2343 	for (i = 0; i < reaped; i++) {
2344 		if (spdk_unlikely(!wc[i].wr_id)) {
2345 			/* Unsignaled completion with error, ignore */
2346 			continue;
2347 		}
2348 		task = (struct accel_mlx5_task *)wc[i].wr_id;
2349 		qp = task->qp;
2350 		assert(task == STAILQ_FIRST(&qp->in_hw) && "submission mismatch");
2351 		assert(task->num_submitted_reqs > task->num_completed_reqs);
2352 		completed = task->num_submitted_reqs - task->num_completed_reqs;
2353 		assert((uint32_t)task->num_completed_reqs + completed <= UINT16_MAX);
2354 		task->num_completed_reqs += completed;
2355 		assert(qp->wrs_submitted >= task->num_wrs);
2356 		qp->wrs_submitted -= task->num_wrs;
2357 		assert(dev->wrs_in_cq > 0);
2358 		dev->wrs_in_cq--;
2359 
2360 		if (spdk_unlikely(wc[i].status)) {
2361 			accel_mlx5_process_error_cpl(&wc[i], task);
2362 			if (qp->wrs_submitted == 0) {
2363 				assert(STAILQ_EMPTY(&qp->in_hw));
2364 				accel_mlx5_recover_qp(qp);
2365 			}
2366 			continue;
2367 		}
2368 
2369 		SPDK_DEBUGLOG(accel_mlx5, "task %p, remaining %u\n", task,
2370 			      task->num_reqs - task->num_completed_reqs);
2371 		if (task->num_completed_reqs == task->num_reqs) {
2372 			STAILQ_REMOVE_HEAD(&qp->in_hw, link);
2373 			accel_mlx5_task_complete(task);
2374 		} else {
2375 			assert(task->num_submitted_reqs < task->num_reqs);
2376 			assert(task->num_completed_reqs == task->num_submitted_reqs);
2377 			STAILQ_REMOVE_HEAD(&qp->in_hw, link);
2378 			rc = accel_mlx5_task_continue(task);
2379 			if (spdk_unlikely(rc)) {
2380 				if (rc != -ENOMEM) {
2381 					accel_mlx5_task_fail(task, rc);
2382 				}
2383 			}
2384 		}
2385 	}
2386 
2387 	return reaped;
2388 }
2389 
2390 static inline void
2391 accel_mlx5_resubmit_nomem_tasks(struct accel_mlx5_dev *dev)
2392 {
2393 	struct accel_mlx5_task *task, *tmp, *last;
2394 	int rc;
2395 
2396 	last = STAILQ_LAST(&dev->nomem, accel_mlx5_task, link);
2397 	STAILQ_FOREACH_SAFE(task, &dev->nomem, link, tmp) {
2398 		STAILQ_REMOVE_HEAD(&dev->nomem, link);
2399 		rc = accel_mlx5_task_continue(task);
2400 		if (spdk_unlikely(rc)) {
2401 			if (rc != -ENOMEM) {
2402 				accel_mlx5_task_fail(task, rc);
2403 			}
2404 			break;
2405 		}
2406 		/* If qpair is recovering, task is added back to the nomem list and 0 is returned. In that case we
2407 		 * need a special condition to iterate the list once and stop this FOREACH loop */
2408 		if (task == last) {
2409 			break;
2410 		}
2411 	}
2412 }
2413 
2414 static int
2415 accel_mlx5_poller(void *ctx)
2416 {
2417 	struct accel_mlx5_io_channel *ch = ctx;
2418 	struct accel_mlx5_dev *dev;
2419 
2420 	int64_t completions = 0, rc;
2421 	uint32_t i;
2422 
2423 	/* reaped completions may register a post poller handler, that makes no sense in the scope of our own poller */
2424 	ch->poller_handler_registered = true;
2425 	for (i = 0; i < ch->num_devs; i++) {
2426 		dev = &ch->devs[i];
2427 		if (dev->wrs_in_cq) {
2428 			rc = accel_mlx5_poll_cq(dev);
2429 			if (spdk_unlikely(rc < 0)) {
2430 				SPDK_ERRLOG("Error %"PRId64" on CQ, dev %s\n", rc, dev->dev_ctx->context->device->name);
2431 			}
2432 			completions += rc;
2433 			if (dev->qp.ring_db) {
2434 				spdk_mlx5_qp_complete_send(dev->qp.qp);
2435 				dev->qp.ring_db = false;
2436 			}
2437 		}
2438 		if (!STAILQ_EMPTY(&dev->nomem)) {
2439 			accel_mlx5_resubmit_nomem_tasks(dev);
2440 		}
2441 	}
2442 	ch->poller_handler_registered = false;
2443 
2444 	return !!completions;
2445 }
2446 
2447 static bool
2448 accel_mlx5_supports_opcode(enum spdk_accel_opcode opc)
2449 {
2450 	assert(g_accel_mlx5.enabled);
2451 
2452 	switch (opc) {
2453 	case SPDK_ACCEL_OPC_COPY:
2454 		return true;
2455 	case SPDK_ACCEL_OPC_ENCRYPT:
2456 	case SPDK_ACCEL_OPC_DECRYPT:
2457 		return g_accel_mlx5.crypto_supported;
2458 	case SPDK_ACCEL_OPC_CRC32C:
2459 	case SPDK_ACCEL_OPC_COPY_CRC32C:
2460 		return g_accel_mlx5.crc32c_supported;
2461 	default:
2462 		return false;
2463 	}
2464 }
2465 
2466 static struct spdk_io_channel *
2467 accel_mlx5_get_io_channel(void)
2468 {
2469 	assert(g_accel_mlx5.enabled);
2470 	return spdk_get_io_channel(&g_accel_mlx5);
2471 }
2472 
2473 static int
2474 accel_mlx5_create_qp(struct accel_mlx5_dev *dev, struct accel_mlx5_qp *qp)
2475 {
2476 	struct spdk_mlx5_qp_attr mlx5_qp_attr = {};
2477 	int rc;
2478 
2479 	mlx5_qp_attr.cap.max_send_wr = g_accel_mlx5.attr.qp_size;
2480 	mlx5_qp_attr.cap.max_recv_wr = 0;
2481 	mlx5_qp_attr.cap.max_send_sge = ACCEL_MLX5_MAX_SGE;
2482 	mlx5_qp_attr.cap.max_inline_data = sizeof(struct ibv_sge) * ACCEL_MLX5_MAX_SGE;
2483 
2484 	rc = spdk_mlx5_qp_create(dev->dev_ctx->pd, dev->cq, &mlx5_qp_attr, &qp->qp);
2485 	if (rc) {
2486 		return rc;
2487 	}
2488 
2489 	STAILQ_INIT(&qp->in_hw);
2490 	qp->dev = dev;
2491 	qp->verbs_qp = spdk_mlx5_qp_get_verbs_qp(qp->qp);
2492 	assert(qp->verbs_qp);
2493 	qp->wrs_max = g_accel_mlx5.attr.qp_size;
2494 
2495 	return 0;
2496 }
2497 
2498 static void
2499 accel_mlx5_add_stats(struct accel_mlx5_stats *stats, const struct accel_mlx5_stats *to_add)
2500 {
2501 	int i;
2502 
2503 	stats->crypto_umrs += to_add->crypto_umrs;
2504 	stats->sig_umrs += to_add->sig_umrs;
2505 	stats->umrs += to_add->umrs;
2506 	stats->rdma_reads += to_add->rdma_reads;
2507 	stats->rdma_writes += to_add->rdma_writes;
2508 	stats->polls += to_add->polls;
2509 	stats->idle_polls += to_add->idle_polls;
2510 	stats->completions += to_add->completions;
2511 	stats->nomem_qdepth += to_add->nomem_qdepth;
2512 	stats->nomem_mkey += to_add->nomem_mkey;
2513 	for (i = 0; i < ACCEL_MLX5_OPC_LAST; i++) {
2514 		stats->opcodes[i] += to_add->opcodes[i];
2515 	}
2516 }
2517 
2518 static void
2519 accel_mlx5_destroy_cb(void *io_device, void *ctx_buf)
2520 {
2521 	struct accel_mlx5_io_channel *ch = ctx_buf;
2522 	struct accel_mlx5_dev *dev;
2523 	uint32_t i;
2524 
2525 	spdk_poller_unregister(&ch->poller);
2526 	for (i = 0; i < ch->num_devs; i++) {
2527 		dev = &ch->devs[i];
2528 		spdk_mlx5_qp_destroy(dev->qp.qp);
2529 		if (dev->cq) {
2530 			spdk_mlx5_cq_destroy(dev->cq);
2531 		}
2532 		spdk_poller_unregister(&dev->qp.recover_poller);
2533 		if (dev->mkeys) {
2534 			spdk_mlx5_mkey_pool_put_ref(dev->mkeys);
2535 		}
2536 		if (dev->crypto_mkeys) {
2537 			spdk_mlx5_mkey_pool_put_ref(dev->crypto_mkeys);
2538 		}
2539 		if (dev->sig_mkeys) {
2540 			spdk_mlx5_mkey_pool_put_ref(dev->sig_mkeys);
2541 		}
2542 		spdk_rdma_utils_free_mem_map(&dev->mmap);
2543 		spdk_spin_lock(&g_accel_mlx5.lock);
2544 		accel_mlx5_add_stats(&g_accel_mlx5.stats, &dev->stats);
2545 		spdk_spin_unlock(&g_accel_mlx5.lock);
2546 	}
2547 	free(ch->devs);
2548 }
2549 
2550 static int
2551 accel_mlx5_create_cb(void *io_device, void *ctx_buf)
2552 {
2553 	struct spdk_mlx5_cq_attr cq_attr = {};
2554 	struct accel_mlx5_io_channel *ch = ctx_buf;
2555 	struct accel_mlx5_dev_ctx *dev_ctx;
2556 	struct accel_mlx5_dev *dev;
2557 	uint32_t i;
2558 	int rc;
2559 
2560 	ch->devs = calloc(g_accel_mlx5.num_ctxs, sizeof(*ch->devs));
2561 	if (!ch->devs) {
2562 		SPDK_ERRLOG("Memory allocation failed\n");
2563 		return -ENOMEM;
2564 	}
2565 
2566 	for (i = 0; i < g_accel_mlx5.num_ctxs; i++) {
2567 		dev_ctx = &g_accel_mlx5.dev_ctxs[i];
2568 		dev = &ch->devs[i];
2569 		dev->dev_ctx = dev_ctx;
2570 
2571 		assert(dev_ctx->mkeys);
2572 		dev->mkeys = spdk_mlx5_mkey_pool_get_ref(dev_ctx->pd, 0);
2573 		if (!dev->mkeys) {
2574 			SPDK_ERRLOG("Failed to get mkey pool channel, dev %s\n", dev_ctx->context->device->name);
2575 			/* Should not happen since mkey pool is created on accel_mlx5 initialization.
2576 			 * We should not be here if pool creation failed */
2577 			assert(0);
2578 			goto err_out;
2579 		}
2580 
2581 		if (dev_ctx->crypto_mkeys) {
2582 			dev->crypto_mkeys = spdk_mlx5_mkey_pool_get_ref(dev_ctx->pd, SPDK_MLX5_MKEY_POOL_FLAG_CRYPTO);
2583 			if (!dev->crypto_mkeys) {
2584 				SPDK_ERRLOG("Failed to get crypto mkey pool channel, dev %s\n", dev_ctx->context->device->name);
2585 				/* Should not happen since mkey pool is created on accel_mlx5 initialization.
2586 				 * We should not be here if pool creation failed */
2587 				assert(0);
2588 				goto err_out;
2589 			}
2590 		}
2591 		if (dev_ctx->sig_mkeys) {
2592 			dev->sig_mkeys = spdk_mlx5_mkey_pool_get_ref(dev_ctx->pd, SPDK_MLX5_MKEY_POOL_FLAG_SIGNATURE);
2593 			if (!dev->sig_mkeys) {
2594 				SPDK_ERRLOG("Failed to get sig mkey pool channel, dev %s\n", dev_ctx->context->device->name);
2595 				/* Should not happen since mkey pool is created on accel_mlx5 initialization.
2596 				 * We should not be here if pool creation failed */
2597 				assert(0);
2598 				goto err_out;
2599 			}
2600 		}
2601 
2602 		memset(&cq_attr, 0, sizeof(cq_attr));
2603 		cq_attr.cqe_cnt = g_accel_mlx5.attr.qp_size;
2604 		cq_attr.cqe_size = 64;
2605 		cq_attr.cq_context = dev;
2606 
2607 		ch->num_devs++;
2608 		rc = spdk_mlx5_cq_create(dev_ctx->pd, &cq_attr, &dev->cq);
2609 		if (rc) {
2610 			SPDK_ERRLOG("Failed to create mlx5 CQ, rc %d\n", rc);
2611 			goto err_out;
2612 		}
2613 
2614 		rc = accel_mlx5_create_qp(dev, &dev->qp);
2615 		if (rc) {
2616 			SPDK_ERRLOG("Failed to create mlx5 QP, rc %d\n", rc);
2617 			goto err_out;
2618 		}
2619 
2620 		dev->mmap = spdk_rdma_utils_create_mem_map(dev_ctx->pd, NULL,
2621 				IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_READ | IBV_ACCESS_REMOTE_WRITE);
2622 		if (!dev->mmap) {
2623 			SPDK_ERRLOG("Failed to create memory map\n");
2624 			rc = -ENOMEM;
2625 			goto err_out;
2626 		}
2627 		dev->crypto_multi_block = dev_ctx->crypto_multi_block;
2628 		dev->crypto_split_blocks = dev_ctx->crypto_multi_block ? g_accel_mlx5.attr.crypto_split_blocks : 0;
2629 		dev->wrs_in_cq_max = g_accel_mlx5.attr.qp_size;
2630 		dev->ch = spdk_io_channel_from_ctx(ctx_buf);
2631 		STAILQ_INIT(&dev->nomem);
2632 	}
2633 
2634 	ch->poller = SPDK_POLLER_REGISTER(accel_mlx5_poller, ch, 0);
2635 
2636 	return 0;
2637 
2638 err_out:
2639 	accel_mlx5_destroy_cb(&g_accel_mlx5, ctx_buf);
2640 	return rc;
2641 }
2642 
2643 void
2644 accel_mlx5_get_default_attr(struct accel_mlx5_attr *attr)
2645 {
2646 	assert(attr);
2647 
2648 	attr->qp_size = ACCEL_MLX5_QP_SIZE;
2649 	attr->num_requests = ACCEL_MLX5_NUM_REQUESTS;
2650 	attr->allowed_devs = NULL;
2651 	attr->crypto_split_blocks = 0;
2652 	attr->enable_driver = false;
2653 }
2654 
2655 static void
2656 accel_mlx5_allowed_devs_free(void)
2657 {
2658 	size_t i;
2659 
2660 	if (!g_accel_mlx5.allowed_devs) {
2661 		return;
2662 	}
2663 
2664 	for (i = 0; i < g_accel_mlx5.allowed_devs_count; i++) {
2665 		free(g_accel_mlx5.allowed_devs[i]);
2666 	}
2667 	free(g_accel_mlx5.attr.allowed_devs);
2668 	free(g_accel_mlx5.allowed_devs);
2669 	g_accel_mlx5.attr.allowed_devs = NULL;
2670 	g_accel_mlx5.allowed_devs = NULL;
2671 	g_accel_mlx5.allowed_devs_count = 0;
2672 }
2673 
2674 static int
2675 accel_mlx5_allowed_devs_parse(const char *allowed_devs)
2676 {
2677 	char *str, *tmp, *tok, *sp = NULL;
2678 	size_t devs_count = 0;
2679 
2680 	str = strdup(allowed_devs);
2681 	if (!str) {
2682 		return -ENOMEM;
2683 	}
2684 
2685 	accel_mlx5_allowed_devs_free();
2686 
2687 	tmp = str;
2688 	while ((tmp = strchr(tmp, ',')) != NULL) {
2689 		tmp++;
2690 		devs_count++;
2691 	}
2692 	devs_count++;
2693 
2694 	g_accel_mlx5.allowed_devs = calloc(devs_count, sizeof(char *));
2695 	if (!g_accel_mlx5.allowed_devs) {
2696 		free(str);
2697 		return -ENOMEM;
2698 	}
2699 
2700 	devs_count = 0;
2701 	tok = strtok_r(str, ",", &sp);
2702 	while (tok) {
2703 		g_accel_mlx5.allowed_devs[devs_count] = strdup(tok);
2704 		if (!g_accel_mlx5.allowed_devs[devs_count]) {
2705 			free(str);
2706 			accel_mlx5_allowed_devs_free();
2707 			return -ENOMEM;
2708 		}
2709 		tok = strtok_r(NULL, ",", &sp);
2710 		devs_count++;
2711 		g_accel_mlx5.allowed_devs_count++;
2712 	}
2713 
2714 	free(str);
2715 
2716 	return 0;
2717 }
2718 
2719 int
2720 accel_mlx5_enable(struct accel_mlx5_attr *attr)
2721 {
2722 	int rc;
2723 
2724 	if (g_accel_mlx5.enabled) {
2725 		return -EEXIST;
2726 	}
2727 	if (attr) {
2728 		if (attr->num_requests / spdk_env_get_core_count() < ACCEL_MLX5_MAX_MKEYS_IN_TASK) {
2729 			SPDK_ERRLOG("num requests per core must not be less than %u, current value %u\n",
2730 				    ACCEL_MLX5_MAX_MKEYS_IN_TASK, attr->num_requests / spdk_env_get_core_count());
2731 			return -EINVAL;
2732 		}
2733 		if (attr->qp_size < 8) {
2734 			SPDK_ERRLOG("qp_size must be at least 8\n");
2735 			return -EINVAL;
2736 		}
2737 		g_accel_mlx5.attr = *attr;
2738 		g_accel_mlx5.attr.allowed_devs = NULL;
2739 
2740 		if (attr->allowed_devs) {
2741 			/* Contains a copy of user's string */
2742 			g_accel_mlx5.attr.allowed_devs = strndup(attr->allowed_devs, ACCEL_MLX5_ALLOWED_DEVS_MAX_LEN);
2743 			if (!g_accel_mlx5.attr.allowed_devs) {
2744 				return -ENOMEM;
2745 			}
2746 			rc = accel_mlx5_allowed_devs_parse(g_accel_mlx5.attr.allowed_devs);
2747 			if (rc) {
2748 				return rc;
2749 			}
2750 			rc = spdk_mlx5_crypto_devs_allow((const char *const *)g_accel_mlx5.allowed_devs,
2751 							 g_accel_mlx5.allowed_devs_count);
2752 			if (rc) {
2753 				accel_mlx5_allowed_devs_free();
2754 				return rc;
2755 			}
2756 		}
2757 	} else {
2758 		accel_mlx5_get_default_attr(&g_accel_mlx5.attr);
2759 	}
2760 
2761 	g_accel_mlx5.enabled = true;
2762 	spdk_accel_module_list_add(&g_accel_mlx5.module);
2763 
2764 	return 0;
2765 }
2766 
2767 static void
2768 accel_mlx5_psvs_release(struct accel_mlx5_dev_ctx *dev_ctx)
2769 {
2770 	uint32_t i, num_psvs, num_psvs_in_pool;
2771 
2772 	if (!dev_ctx->psvs) {
2773 		return;
2774 	}
2775 
2776 	num_psvs = g_accel_mlx5.attr.num_requests;
2777 
2778 	for (i = 0; i < num_psvs; i++) {
2779 		if (dev_ctx->psvs[i]) {
2780 			spdk_mlx5_destroy_psv(dev_ctx->psvs[i]);
2781 			dev_ctx->psvs[i] = NULL;
2782 		}
2783 	}
2784 	free(dev_ctx->psvs);
2785 
2786 	if (!dev_ctx->psv_pool) {
2787 		return;
2788 	}
2789 	num_psvs_in_pool = spdk_mempool_count(dev_ctx->psv_pool);
2790 	if (num_psvs_in_pool != num_psvs) {
2791 		SPDK_ERRLOG("Expected %u reqs in the pool, but got only %u\n", num_psvs, num_psvs_in_pool);
2792 	}
2793 	spdk_mempool_free(dev_ctx->psv_pool);
2794 }
2795 
2796 static void
2797 accel_mlx5_free_resources(void)
2798 {
2799 	struct accel_mlx5_dev_ctx *dev_ctx;
2800 	uint32_t i;
2801 
2802 	for (i = 0; i < g_accel_mlx5.num_ctxs; i++) {
2803 		dev_ctx = &g_accel_mlx5.dev_ctxs[i];
2804 		accel_mlx5_psvs_release(dev_ctx);
2805 		if (dev_ctx->pd) {
2806 			if (dev_ctx->mkeys) {
2807 				spdk_mlx5_mkey_pool_destroy(0, dev_ctx->pd);
2808 			}
2809 			if (dev_ctx->crypto_mkeys) {
2810 				spdk_mlx5_mkey_pool_destroy(SPDK_MLX5_MKEY_POOL_FLAG_CRYPTO, dev_ctx->pd);
2811 			}
2812 			if (dev_ctx->sig_mkeys) {
2813 				spdk_mlx5_mkey_pool_destroy(SPDK_MLX5_MKEY_POOL_FLAG_SIGNATURE, dev_ctx->pd);
2814 			}
2815 			spdk_rdma_utils_put_pd(dev_ctx->pd);
2816 		}
2817 		if (dev_ctx->domain) {
2818 			spdk_rdma_utils_put_memory_domain(dev_ctx->domain);
2819 		}
2820 	}
2821 
2822 	free(g_accel_mlx5.dev_ctxs);
2823 	g_accel_mlx5.dev_ctxs = NULL;
2824 	g_accel_mlx5.initialized = false;
2825 }
2826 
2827 static void
2828 accel_mlx5_deinit_cb(void *ctx)
2829 {
2830 	accel_mlx5_free_resources();
2831 	spdk_spin_destroy(&g_accel_mlx5.lock);
2832 	spdk_mlx5_umr_implementer_register(false);
2833 	spdk_accel_module_finish();
2834 }
2835 
2836 static void
2837 accel_mlx5_deinit(void *ctx)
2838 {
2839 	if (g_accel_mlx5.allowed_devs) {
2840 		accel_mlx5_allowed_devs_free();
2841 	}
2842 	spdk_mlx5_crypto_devs_allow(NULL, 0);
2843 	if (g_accel_mlx5.initialized) {
2844 		spdk_io_device_unregister(&g_accel_mlx5, accel_mlx5_deinit_cb);
2845 	} else {
2846 		spdk_accel_module_finish();
2847 	}
2848 }
2849 
2850 static int
2851 accel_mlx5_mkeys_create(struct ibv_pd *pd, uint32_t num_mkeys, uint32_t flags)
2852 {
2853 	struct spdk_mlx5_mkey_pool_param pool_param = {};
2854 
2855 	pool_param.mkey_count = num_mkeys;
2856 	pool_param.cache_per_thread = num_mkeys * 3 / 4 / spdk_env_get_core_count();
2857 	pool_param.flags = flags;
2858 
2859 	return spdk_mlx5_mkey_pool_init(&pool_param, pd);
2860 }
2861 
2862 static void
2863 accel_mlx5_set_psv_in_pool(struct spdk_mempool *mp, void *cb_arg, void *_psv, unsigned obj_idx)
2864 {
2865 	struct spdk_rdma_utils_memory_translation translation = {};
2866 	struct accel_mlx5_psv_pool_iter_cb_args *args = cb_arg;
2867 	struct accel_mlx5_psv_wrapper *wrapper = _psv;
2868 	struct accel_mlx5_dev_ctx *dev_ctx = args->dev;
2869 	int rc;
2870 
2871 	if (args->rc) {
2872 		return;
2873 	}
2874 	assert(obj_idx < g_accel_mlx5.attr.num_requests);
2875 	assert(dev_ctx->psvs[obj_idx] != NULL);
2876 	memset(wrapper, 0, sizeof(*wrapper));
2877 	wrapper->psv_index = dev_ctx->psvs[obj_idx]->index;
2878 
2879 	rc = spdk_rdma_utils_get_translation(args->map, &wrapper->crc, sizeof(uint32_t), &translation);
2880 	if (rc) {
2881 		SPDK_ERRLOG("Memory translation failed, addr %p, length %zu\n", &wrapper->crc, sizeof(uint32_t));
2882 		args->rc = -EINVAL;
2883 	} else {
2884 		wrapper->crc_lkey = spdk_rdma_utils_memory_translation_get_lkey(&translation);
2885 	}
2886 }
2887 
2888 static int
2889 accel_mlx5_psvs_create(struct accel_mlx5_dev_ctx *dev_ctx)
2890 {
2891 	struct accel_mlx5_psv_pool_iter_cb_args args = {
2892 		.dev = dev_ctx
2893 	};
2894 	char pool_name[32];
2895 	uint32_t i;
2896 	uint32_t num_psvs = g_accel_mlx5.attr.num_requests;
2897 	uint32_t cache_size;
2898 	int rc;
2899 
2900 	dev_ctx->psvs = calloc(num_psvs, (sizeof(struct spdk_mlx5_psv *)));
2901 	if (!dev_ctx->psvs) {
2902 		SPDK_ERRLOG("Failed to alloc PSVs array\n");
2903 		return -ENOMEM;
2904 	}
2905 	for (i = 0; i < num_psvs; i++) {
2906 		dev_ctx->psvs[i] = spdk_mlx5_create_psv(dev_ctx->pd);
2907 		if (!dev_ctx->psvs[i]) {
2908 			SPDK_ERRLOG("Failed to create PSV on dev %s\n", dev_ctx->context->device->name);
2909 			return -EINVAL;
2910 		}
2911 	}
2912 
2913 	rc = snprintf(pool_name, sizeof(pool_name), "accel_psv_%s", dev_ctx->context->device->name);
2914 	if (rc < 0) {
2915 		assert(0);
2916 		return -EINVAL;
2917 	}
2918 	cache_size = num_psvs * 3 / 4 / spdk_env_get_core_count();
2919 	args.map = spdk_rdma_utils_create_mem_map(dev_ctx->pd, NULL,
2920 			IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_READ | IBV_ACCESS_REMOTE_WRITE);
2921 	if (!args.map) {
2922 		return -ENOMEM;
2923 	}
2924 	dev_ctx->psv_pool = spdk_mempool_create_ctor(pool_name, num_psvs,
2925 			    sizeof(struct accel_mlx5_psv_wrapper),
2926 			    cache_size, SPDK_ENV_SOCKET_ID_ANY,
2927 			    accel_mlx5_set_psv_in_pool, &args);
2928 	spdk_rdma_utils_free_mem_map(&args.map);
2929 	if (!dev_ctx->psv_pool) {
2930 		SPDK_ERRLOG("Failed to create PSV memory pool\n");
2931 		return -ENOMEM;
2932 	}
2933 	if (args.rc) {
2934 		SPDK_ERRLOG("Failed to init PSV memory pool objects, rc %d\n", args.rc);
2935 		return args.rc;
2936 	}
2937 
2938 	return 0;
2939 }
2940 
2941 
2942 static int
2943 accel_mlx5_dev_ctx_init(struct accel_mlx5_dev_ctx *dev_ctx, struct ibv_context *dev,
2944 			struct spdk_mlx5_device_caps *caps)
2945 {
2946 	struct ibv_pd *pd;
2947 	int rc;
2948 
2949 	pd = spdk_rdma_utils_get_pd(dev);
2950 	if (!pd) {
2951 		SPDK_ERRLOG("Failed to get PD for context %p, dev %s\n", dev, dev->device->name);
2952 		return -EINVAL;
2953 	}
2954 	dev_ctx->context = dev;
2955 	dev_ctx->pd = pd;
2956 	dev_ctx->domain = spdk_rdma_utils_get_memory_domain(pd);
2957 	if (!dev_ctx->domain) {
2958 		return -ENOMEM;
2959 	}
2960 
2961 	rc = accel_mlx5_mkeys_create(pd, g_accel_mlx5.attr.num_requests, 0);
2962 	if (rc) {
2963 		SPDK_ERRLOG("Failed to create mkeys pool, rc %d, dev %s\n", rc, dev->device->name);
2964 		return rc;
2965 	}
2966 	dev_ctx->mkeys = true;
2967 
2968 	if (g_accel_mlx5.crypto_supported) {
2969 		dev_ctx->crypto_multi_block = caps->crypto.multi_block_be_tweak;
2970 		if (!dev_ctx->crypto_multi_block && g_accel_mlx5.attr.crypto_split_blocks) {
2971 			SPDK_WARNLOG("\"crypto_split_blocks\" is set but dev %s doesn't support multi block crypto\n",
2972 				     dev->device->name);
2973 		}
2974 		rc = accel_mlx5_mkeys_create(pd, g_accel_mlx5.attr.num_requests, SPDK_MLX5_MKEY_POOL_FLAG_CRYPTO);
2975 		if (rc) {
2976 			SPDK_ERRLOG("Failed to create crypto mkeys pool, rc %d, dev %s\n", rc, dev->device->name);
2977 			return rc;
2978 		}
2979 		dev_ctx->crypto_mkeys = true;
2980 	}
2981 	if (g_accel_mlx5.crc32c_supported) {
2982 		rc = accel_mlx5_mkeys_create(pd, g_accel_mlx5.attr.num_requests,
2983 					     SPDK_MLX5_MKEY_POOL_FLAG_SIGNATURE);
2984 		if (rc) {
2985 			SPDK_ERRLOG("Failed to create signature mkeys pool, rc %d, dev %s\n", rc, dev->device->name);
2986 			return rc;
2987 		}
2988 		dev_ctx->sig_mkeys = true;
2989 		rc = accel_mlx5_psvs_create(dev_ctx);
2990 		if (rc) {
2991 			SPDK_ERRLOG("Failed to create PSVs pool, rc %d, dev %s\n", rc, dev->device->name);
2992 			return rc;
2993 		}
2994 	}
2995 
2996 	return 0;
2997 }
2998 
2999 static struct ibv_context **
3000 accel_mlx5_get_devices(int *_num_devs)
3001 {
3002 	struct ibv_context **rdma_devs, **rdma_devs_out = NULL, *dev;
3003 	struct ibv_device_attr dev_attr;
3004 	size_t j;
3005 	int num_devs = 0, i, rc;
3006 	int num_devs_out = 0;
3007 	bool dev_allowed;
3008 
3009 	rdma_devs = rdma_get_devices(&num_devs);
3010 	if (!rdma_devs || !num_devs) {
3011 		*_num_devs = 0;
3012 		return NULL;
3013 	}
3014 
3015 	rdma_devs_out = calloc(num_devs + 1, sizeof(struct ibv_context *));
3016 	if (!rdma_devs_out) {
3017 		SPDK_ERRLOG("Memory allocation failed\n");
3018 		rdma_free_devices(rdma_devs);
3019 		*_num_devs = 0;
3020 		return NULL;
3021 	}
3022 
3023 	for (i = 0; i < num_devs; i++) {
3024 		dev = rdma_devs[i];
3025 		rc = ibv_query_device(dev, &dev_attr);
3026 		if (rc) {
3027 			SPDK_ERRLOG("Failed to query dev %s, skipping\n", dev->device->name);
3028 			continue;
3029 		}
3030 		if (dev_attr.vendor_id != SPDK_MLX5_VENDOR_ID_MELLANOX) {
3031 			SPDK_DEBUGLOG(accel_mlx5, "dev %s is not Mellanox device, skipping\n", dev->device->name);
3032 			continue;
3033 		}
3034 
3035 		if (g_accel_mlx5.allowed_devs_count) {
3036 			dev_allowed = false;
3037 			for (j = 0; j < g_accel_mlx5.allowed_devs_count; j++) {
3038 				if (strcmp(g_accel_mlx5.allowed_devs[j], dev->device->name) == 0) {
3039 					dev_allowed = true;
3040 					break;
3041 				}
3042 			}
3043 			if (!dev_allowed) {
3044 				continue;
3045 			}
3046 		}
3047 
3048 		rdma_devs_out[num_devs_out] = dev;
3049 		num_devs_out++;
3050 	}
3051 
3052 	rdma_free_devices(rdma_devs);
3053 	*_num_devs = num_devs_out;
3054 
3055 	return rdma_devs_out;
3056 }
3057 
3058 static inline bool
3059 accel_mlx5_dev_supports_crypto(struct spdk_mlx5_device_caps *caps)
3060 {
3061 	return caps->crypto_supported && !caps->crypto.wrapped_import_method_aes_xts &&
3062 	       (caps->crypto.single_block_le_tweak ||
3063 		caps->crypto.multi_block_le_tweak || caps->crypto.multi_block_be_tweak);
3064 }
3065 
3066 static int
3067 accel_mlx5_init(void)
3068 {
3069 	struct spdk_mlx5_device_caps *caps;
3070 	struct ibv_context **rdma_devs, *dev;
3071 	int num_devs = 0,  rc = 0, i;
3072 	int best_dev = -1, first_dev = 0;
3073 	int best_dev_stat = 0, dev_stat;
3074 	bool supports_crypto;
3075 	bool find_best_dev = g_accel_mlx5.allowed_devs_count == 0;
3076 
3077 	if (!g_accel_mlx5.enabled) {
3078 		return -EINVAL;
3079 	}
3080 
3081 	spdk_spin_init(&g_accel_mlx5.lock);
3082 	rdma_devs = accel_mlx5_get_devices(&num_devs);
3083 	if (!rdma_devs || !num_devs) {
3084 		return -ENODEV;
3085 	}
3086 	caps = calloc(num_devs, sizeof(*caps));
3087 	if (!caps) {
3088 		rc = -ENOMEM;
3089 		goto cleanup;
3090 	}
3091 
3092 	g_accel_mlx5.crypto_supported = true;
3093 	g_accel_mlx5.crc32c_supported = true;
3094 	g_accel_mlx5.num_ctxs = 0;
3095 
3096 	/* Iterate devices. We support an offload if all devices support it */
3097 	for (i = 0; i < num_devs; i++) {
3098 		dev = rdma_devs[i];
3099 
3100 		rc = spdk_mlx5_device_query_caps(dev, &caps[i]);
3101 		if (rc) {
3102 			SPDK_ERRLOG("Failed to get crypto caps, dev %s\n", dev->device->name);
3103 			goto cleanup;
3104 		}
3105 		supports_crypto = accel_mlx5_dev_supports_crypto(&caps[i]);
3106 		if (!supports_crypto) {
3107 			SPDK_DEBUGLOG(accel_mlx5, "Disable crypto support because dev %s doesn't support it\n",
3108 				      rdma_devs[i]->device->name);
3109 			g_accel_mlx5.crypto_supported = false;
3110 		}
3111 		if (!caps[i].crc32c_supported) {
3112 			SPDK_DEBUGLOG(accel_mlx5, "Disable crc32c support because dev %s doesn't support it\n",
3113 				      rdma_devs[i]->device->name);
3114 			g_accel_mlx5.crc32c_supported = false;
3115 		}
3116 		if (find_best_dev) {
3117 			/* Find device which supports max number of offloads */
3118 			dev_stat = (int)supports_crypto + (int)caps[i].crc32c_supported;
3119 			if (dev_stat > best_dev_stat) {
3120 				best_dev_stat = dev_stat;
3121 				best_dev = i;
3122 			}
3123 		}
3124 	}
3125 
3126 	/* User didn't specify devices to use, try to select the best one */
3127 	if (find_best_dev) {
3128 		if (best_dev == -1) {
3129 			best_dev = 0;
3130 		}
3131 		g_accel_mlx5.crypto_supported = accel_mlx5_dev_supports_crypto(&caps[best_dev]);
3132 		g_accel_mlx5.crc32c_supported = caps[best_dev].crc32c_supported;
3133 		SPDK_NOTICELOG("Select dev %s, crypto %d, crc32c %d\n", rdma_devs[best_dev]->device->name,
3134 			       g_accel_mlx5.crypto_supported, g_accel_mlx5.crc32c_supported);
3135 		first_dev = best_dev;
3136 		num_devs = 1;
3137 		if (g_accel_mlx5.crypto_supported) {
3138 			const char *const dev_name[] = { rdma_devs[best_dev]->device->name };
3139 			/* Let mlx5 library know which device to use */
3140 			spdk_mlx5_crypto_devs_allow(dev_name, 1);
3141 		}
3142 	} else {
3143 		SPDK_NOTICELOG("Found %d devices, crypto %d\n", num_devs, g_accel_mlx5.crypto_supported);
3144 	}
3145 
3146 	g_accel_mlx5.dev_ctxs = calloc(num_devs, sizeof(*g_accel_mlx5.dev_ctxs));
3147 	if (!g_accel_mlx5.dev_ctxs) {
3148 		SPDK_ERRLOG("Memory allocation failed\n");
3149 		rc = -ENOMEM;
3150 		goto cleanup;
3151 	}
3152 
3153 	for (i = first_dev; i < first_dev + num_devs; i++) {
3154 		rc = accel_mlx5_dev_ctx_init(&g_accel_mlx5.dev_ctxs[g_accel_mlx5.num_ctxs++],
3155 					     rdma_devs[i], &caps[i]);
3156 		if (rc) {
3157 			goto cleanup;
3158 		}
3159 	}
3160 
3161 	SPDK_NOTICELOG("Accel framework mlx5 initialized, found %d devices.\n", num_devs);
3162 	spdk_io_device_register(&g_accel_mlx5, accel_mlx5_create_cb, accel_mlx5_destroy_cb,
3163 				sizeof(struct accel_mlx5_io_channel), "accel_mlx5");
3164 	g_accel_mlx5.initialized = true;
3165 	free(rdma_devs);
3166 	free(caps);
3167 
3168 	if (g_accel_mlx5.attr.enable_driver) {
3169 		SPDK_NOTICELOG("Enabling mlx5 platform driver\n");
3170 		spdk_accel_driver_register(&g_accel_mlx5_driver);
3171 		spdk_accel_set_driver(g_accel_mlx5_driver.name);
3172 		spdk_mlx5_umr_implementer_register(true);
3173 	}
3174 
3175 	return 0;
3176 
3177 cleanup:
3178 	free(rdma_devs);
3179 	free(caps);
3180 	accel_mlx5_free_resources();
3181 	spdk_spin_destroy(&g_accel_mlx5.lock);
3182 
3183 	return rc;
3184 }
3185 
3186 static void
3187 accel_mlx5_write_config_json(struct spdk_json_write_ctx *w)
3188 {
3189 	if (g_accel_mlx5.enabled) {
3190 		spdk_json_write_object_begin(w);
3191 		spdk_json_write_named_string(w, "method", "mlx5_scan_accel_module");
3192 		spdk_json_write_named_object_begin(w, "params");
3193 		spdk_json_write_named_uint16(w, "qp_size", g_accel_mlx5.attr.qp_size);
3194 		spdk_json_write_named_uint32(w, "num_requests", g_accel_mlx5.attr.num_requests);
3195 		if (g_accel_mlx5.attr.allowed_devs) {
3196 			spdk_json_write_named_string(w, "allowed_devs", g_accel_mlx5.attr.allowed_devs);
3197 		}
3198 		spdk_json_write_named_uint16(w, "crypto_split_blocks", g_accel_mlx5.attr.crypto_split_blocks);
3199 		spdk_json_write_named_bool(w, "enable_driver", g_accel_mlx5.attr.enable_driver);
3200 		spdk_json_write_object_end(w);
3201 		spdk_json_write_object_end(w);
3202 	}
3203 }
3204 
3205 static size_t
3206 accel_mlx5_get_ctx_size(void)
3207 {
3208 	return sizeof(struct accel_mlx5_task);
3209 }
3210 
3211 static int
3212 accel_mlx5_crypto_key_init(struct spdk_accel_crypto_key *key)
3213 {
3214 	struct spdk_mlx5_crypto_dek_create_attr attr = {};
3215 	struct spdk_mlx5_crypto_keytag *keytag;
3216 	int rc;
3217 
3218 	if (!key || !key->key || !key->key2 || !key->key_size || !key->key2_size) {
3219 		return -EINVAL;
3220 	}
3221 
3222 	attr.dek = calloc(1, key->key_size + key->key2_size);
3223 	if (!attr.dek) {
3224 		return -ENOMEM;
3225 	}
3226 
3227 	memcpy(attr.dek, key->key, key->key_size);
3228 	memcpy(attr.dek + key->key_size, key->key2, key->key2_size);
3229 	attr.dek_len = key->key_size + key->key2_size;
3230 
3231 	rc = spdk_mlx5_crypto_keytag_create(&attr, &keytag);
3232 	spdk_memset_s(attr.dek, attr.dek_len, 0, attr.dek_len);
3233 	free(attr.dek);
3234 	if (rc) {
3235 		SPDK_ERRLOG("Failed to create a keytag, rc %d\n", rc);
3236 		return rc;
3237 	}
3238 
3239 	key->priv = keytag;
3240 
3241 	return 0;
3242 }
3243 
3244 static void
3245 accel_mlx5_crypto_key_deinit(struct spdk_accel_crypto_key *key)
3246 {
3247 	if (!key || key->module_if != &g_accel_mlx5.module || !key->priv) {
3248 		return;
3249 	}
3250 
3251 	spdk_mlx5_crypto_keytag_destroy(key->priv);
3252 }
3253 
3254 static void
3255 accel_mlx5_dump_stats_json(struct spdk_json_write_ctx *w, const char *header,
3256 			   const struct accel_mlx5_stats *stats)
3257 {
3258 	double idle_polls_percentage = 0;
3259 	double cpls_per_poll = 0;
3260 	uint64_t total_tasks = 0;
3261 	int i;
3262 
3263 	if (stats->polls) {
3264 		idle_polls_percentage = (double) stats->idle_polls * 100 / stats->polls;
3265 	}
3266 	if (stats->polls > stats->idle_polls) {
3267 		cpls_per_poll = (double) stats->completions / (stats->polls - stats->idle_polls);
3268 	}
3269 	for (i = 0; i < ACCEL_MLX5_OPC_LAST; i++) {
3270 		total_tasks += stats->opcodes[i];
3271 	}
3272 
3273 	spdk_json_write_named_object_begin(w, header);
3274 
3275 	spdk_json_write_named_object_begin(w, "umrs");
3276 	spdk_json_write_named_uint64(w, "crypto_umrs", stats->crypto_umrs);
3277 	spdk_json_write_named_uint64(w, "sig_umrs", stats->sig_umrs);
3278 	spdk_json_write_named_uint64(w, "umrs", stats->umrs);
3279 	spdk_json_write_named_uint64(w, "total", stats->crypto_umrs + stats->sig_umrs + stats->umrs);
3280 	spdk_json_write_object_end(w);
3281 
3282 	spdk_json_write_named_object_begin(w, "rdma");
3283 	spdk_json_write_named_uint64(w, "read", stats->rdma_reads);
3284 	spdk_json_write_named_uint64(w, "write", stats->rdma_writes);
3285 	spdk_json_write_named_uint64(w, "total", stats->rdma_reads + stats->rdma_writes);
3286 	spdk_json_write_object_end(w);
3287 
3288 	spdk_json_write_named_object_begin(w, "polling");
3289 	spdk_json_write_named_uint64(w, "polls", stats->polls);
3290 	spdk_json_write_named_uint64(w, "idle_polls", stats->idle_polls);
3291 	spdk_json_write_named_uint64(w, "completions", stats->completions);
3292 	spdk_json_write_named_double(w, "idle_polls_percentage", idle_polls_percentage);
3293 	spdk_json_write_named_double(w, "cpls_per_poll", cpls_per_poll);
3294 	spdk_json_write_named_uint64(w, "nomem_qdepth", stats->nomem_qdepth);
3295 	spdk_json_write_named_uint64(w, "nomem_mkey", stats->nomem_mkey);
3296 	spdk_json_write_object_end(w);
3297 
3298 	spdk_json_write_named_object_begin(w, "tasks");
3299 	spdk_json_write_named_uint64(w, "copy", stats->opcodes[ACCEL_MLX5_OPC_COPY]);
3300 	spdk_json_write_named_uint64(w, "crypto", stats->opcodes[ACCEL_MLX5_OPC_CRYPTO]);
3301 	spdk_json_write_named_uint64(w, "crypto_mkey", stats->opcodes[ACCEL_MLX5_OPC_CRYPTO_MKEY]);
3302 	spdk_json_write_named_uint64(w, "crc32c", stats->opcodes[ACCEL_MLX5_OPC_CRC32C]);
3303 	spdk_json_write_named_uint64(w, "mkey", stats->opcodes[ACCEL_MLX5_OPC_MKEY]);
3304 	spdk_json_write_named_uint64(w, "total", total_tasks);
3305 	spdk_json_write_object_end(w);
3306 
3307 	spdk_json_write_object_end(w);
3308 }
3309 
3310 static void
3311 accel_mlx5_dump_channel_stat(struct spdk_io_channel_iter *i)
3312 {
3313 	struct accel_mlx5_stats ch_stat = {};
3314 	struct accel_mlx5_dump_stats_ctx *ctx;
3315 	struct spdk_io_channel *_ch;
3316 	struct accel_mlx5_io_channel *ch;
3317 	struct accel_mlx5_dev *dev;
3318 	uint32_t j;
3319 
3320 	ctx = spdk_io_channel_iter_get_ctx(i);
3321 	_ch = spdk_io_channel_iter_get_channel(i);
3322 	ch = spdk_io_channel_get_ctx(_ch);
3323 
3324 	if (ctx->level != ACCEL_MLX5_DUMP_STAT_LEVEL_TOTAL) {
3325 		spdk_json_write_object_begin(ctx->w);
3326 		spdk_json_write_named_object_begin(ctx->w, spdk_thread_get_name(spdk_get_thread()));
3327 	}
3328 	if (ctx->level == ACCEL_MLX5_DUMP_STAT_LEVEL_DEV) {
3329 		spdk_json_write_named_array_begin(ctx->w, "devices");
3330 	}
3331 
3332 	for (j = 0; j < ch->num_devs; j++) {
3333 		dev = &ch->devs[j];
3334 		/* Save grand total and channel stats */
3335 		accel_mlx5_add_stats(&ctx->total, &dev->stats);
3336 		accel_mlx5_add_stats(&ch_stat, &dev->stats);
3337 		if (ctx->level == ACCEL_MLX5_DUMP_STAT_LEVEL_DEV) {
3338 			spdk_json_write_object_begin(ctx->w);
3339 			accel_mlx5_dump_stats_json(ctx->w, dev->dev_ctx->context->device->name, &dev->stats);
3340 			spdk_json_write_object_end(ctx->w);
3341 		}
3342 	}
3343 
3344 	if (ctx->level == ACCEL_MLX5_DUMP_STAT_LEVEL_DEV) {
3345 		spdk_json_write_array_end(ctx->w);
3346 	}
3347 	if (ctx->level != ACCEL_MLX5_DUMP_STAT_LEVEL_TOTAL) {
3348 		accel_mlx5_dump_stats_json(ctx->w, "channel_total", &ch_stat);
3349 		spdk_json_write_object_end(ctx->w);
3350 		spdk_json_write_object_end(ctx->w);
3351 	}
3352 
3353 	spdk_for_each_channel_continue(i, 0);
3354 }
3355 
3356 static void
3357 accel_mlx5_dump_channel_stat_done(struct spdk_io_channel_iter *i, int status)
3358 {
3359 	struct accel_mlx5_dump_stats_ctx *ctx;
3360 
3361 	ctx = spdk_io_channel_iter_get_ctx(i);
3362 
3363 	spdk_spin_lock(&g_accel_mlx5.lock);
3364 	/* Add statistics from destroyed channels */
3365 	accel_mlx5_add_stats(&ctx->total, &g_accel_mlx5.stats);
3366 	spdk_spin_unlock(&g_accel_mlx5.lock);
3367 
3368 	if (ctx->level != ACCEL_MLX5_DUMP_STAT_LEVEL_TOTAL) {
3369 		/* channels[] */
3370 		spdk_json_write_array_end(ctx->w);
3371 	}
3372 
3373 	accel_mlx5_dump_stats_json(ctx->w, "total", &ctx->total);
3374 
3375 	/* Ends the whole response which was begun in accel_mlx5_dump_stats */
3376 	spdk_json_write_object_end(ctx->w);
3377 
3378 	ctx->cb(ctx->ctx, 0);
3379 	free(ctx);
3380 }
3381 
3382 int
3383 accel_mlx5_dump_stats(struct spdk_json_write_ctx *w, enum accel_mlx5_dump_state_level level,
3384 		      accel_mlx5_dump_stat_done_cb cb, void *ctx)
3385 {
3386 	struct accel_mlx5_dump_stats_ctx *stat_ctx;
3387 
3388 	if (!w || !cb) {
3389 		return -EINVAL;
3390 	}
3391 	if (!g_accel_mlx5.initialized) {
3392 		return -ENODEV;
3393 	}
3394 
3395 	stat_ctx = calloc(1, sizeof(*stat_ctx));
3396 	if (!stat_ctx) {
3397 		return -ENOMEM;
3398 	}
3399 	stat_ctx->cb = cb;
3400 	stat_ctx->ctx = ctx;
3401 	stat_ctx->level = level;
3402 	stat_ctx->w = w;
3403 
3404 	spdk_json_write_object_begin(w);
3405 
3406 	if (level != ACCEL_MLX5_DUMP_STAT_LEVEL_TOTAL) {
3407 		spdk_json_write_named_array_begin(w, "channels");
3408 	}
3409 
3410 	spdk_for_each_channel(&g_accel_mlx5, accel_mlx5_dump_channel_stat, stat_ctx,
3411 			      accel_mlx5_dump_channel_stat_done);
3412 
3413 	return 0;
3414 }
3415 
3416 static bool
3417 accel_mlx5_crypto_supports_cipher(enum spdk_accel_cipher cipher, size_t key_size)
3418 {
3419 	switch (cipher) {
3420 	case SPDK_ACCEL_CIPHER_AES_XTS:
3421 		return key_size == SPDK_ACCEL_AES_XTS_128_KEY_SIZE || key_size == SPDK_ACCEL_AES_XTS_256_KEY_SIZE;
3422 	default:
3423 		return false;
3424 	}
3425 }
3426 
3427 static int
3428 accel_mlx5_get_memory_domains(struct spdk_memory_domain **domains, int array_size)
3429 {
3430 	int i, size;
3431 
3432 	if (!domains || !array_size) {
3433 		return (int)g_accel_mlx5.num_ctxs;
3434 	}
3435 
3436 	size = spdk_min(array_size, (int)g_accel_mlx5.num_ctxs);
3437 
3438 	for (i = 0; i < size; i++) {
3439 		domains[i] = g_accel_mlx5.dev_ctxs[i].domain;
3440 	}
3441 
3442 	return (int)g_accel_mlx5.num_ctxs;
3443 }
3444 
3445 static inline struct accel_mlx5_dev *
3446 accel_mlx5_ch_get_dev_by_pd(struct accel_mlx5_io_channel *accel_ch, struct ibv_pd *pd)
3447 {
3448 	uint32_t i;
3449 
3450 	for (i = 0; i < accel_ch->num_devs; i++) {
3451 		if (accel_ch->devs[i].dev_ctx->pd == pd) {
3452 			return &accel_ch->devs[i];
3453 		}
3454 	}
3455 
3456 	return NULL;
3457 }
3458 
3459 static inline int
3460 accel_mlx5_task_assign_qp_by_domain_pd(struct accel_mlx5_task *task,
3461 				       struct accel_mlx5_io_channel *acce_ch, struct spdk_memory_domain *domain)
3462 {
3463 	struct spdk_memory_domain_rdma_ctx *domain_ctx;
3464 	struct accel_mlx5_dev *dev;
3465 	struct ibv_pd *domain_pd;
3466 	size_t ctx_size;
3467 
3468 	domain_ctx = spdk_memory_domain_get_user_context(domain, &ctx_size);
3469 	if (spdk_unlikely(!domain_ctx || domain_ctx->size != ctx_size)) {
3470 		SPDK_ERRLOG("no domain context or wrong size, ctx ptr %p, size %zu\n", domain_ctx, ctx_size);
3471 		return -ENOTSUP;
3472 	}
3473 	domain_pd = domain_ctx->ibv_pd;
3474 	if (spdk_unlikely(!domain_pd)) {
3475 		SPDK_ERRLOG("no destination domain PD, task %p", task);
3476 		return -ENOTSUP;
3477 	}
3478 	dev = accel_mlx5_ch_get_dev_by_pd(acce_ch, domain_pd);
3479 	if (spdk_unlikely(!dev)) {
3480 		SPDK_ERRLOG("No dev for PD %p dev %s\n", domain_pd, domain_pd->context->device->name);
3481 		return -ENODEV;
3482 	}
3483 
3484 	if (spdk_unlikely(!dev)) {
3485 		return -ENODEV;
3486 	}
3487 	task->qp = &dev->qp;
3488 
3489 	return 0;
3490 }
3491 
3492 static inline int
3493 accel_mlx5_driver_examine_sequence(struct spdk_accel_sequence *seq,
3494 				   struct accel_mlx5_io_channel *accel_ch)
3495 {
3496 	struct spdk_accel_task *first_base = spdk_accel_sequence_first_task(seq);
3497 	struct accel_mlx5_task *first = SPDK_CONTAINEROF(first_base, struct accel_mlx5_task, base);
3498 	struct spdk_accel_task *next_base = TAILQ_NEXT(first_base, seq_link);
3499 	struct accel_mlx5_task *next;
3500 	int rc;
3501 
3502 	accel_mlx5_task_reset(first);
3503 	SPDK_DEBUGLOG(accel_mlx5, "first %p, opc %d; next %p, opc %d\n", first_base, first_base->op_code,
3504 		      next_base,  next_base ? next_base->op_code : -1);
3505 	if (!next_base) {
3506 		if (first_base->op_code == SPDK_ACCEL_OPC_COPY && first_base->dst_domain &&
3507 		    spdk_memory_domain_get_dma_device_type(first_base->dst_domain) ==
3508 		    SPDK_DMA_DEVICE_TYPE_RDMA &&
3509 		    accel_mlx5_compare_iovs(first_base->d.iovs, first_base->s.iovs, first_base->s.iovcnt)) {
3510 			SPDK_DEBUGLOG(accel_mlx5, "MKEY task %p\n", first);
3511 			rc = accel_mlx5_task_assign_qp_by_domain_pd(first, accel_ch, first_base->dst_domain);
3512 			if (spdk_unlikely(rc)) {
3513 				return rc;
3514 			}
3515 			first->mlx5_opcode = ACCEL_MLX5_OPC_MKEY;
3516 			first->needs_data_transfer = 1;
3517 			first->inplace = 1;
3518 			return 0;
3519 		}
3520 	} else {
3521 		switch (first_base->op_code) {
3522 		case SPDK_ACCEL_OPC_COPY:
3523 			if (next_base->op_code == SPDK_ACCEL_OPC_DECRYPT &&
3524 			    first_base->dst_domain &&  spdk_memory_domain_get_dma_device_type(first_base->dst_domain) ==
3525 			    SPDK_DMA_DEVICE_TYPE_RDMA && TAILQ_NEXT(next_base, seq_link) == NULL) {
3526 				next = SPDK_CONTAINEROF(next_base, struct accel_mlx5_task, base);
3527 				rc = accel_mlx5_task_assign_qp_by_domain_pd(next, accel_ch, first_base->dst_domain);
3528 				if (spdk_unlikely(rc)) {
3529 					return rc;
3530 				}
3531 				/* Update decrypt task memory domain, complete copy task */
3532 				SPDK_DEBUGLOG(accel_mlx5, "Merge copy task (%p) and decrypt (%p)\n", first, next);
3533 				next_base->dst_domain = first_base->dst_domain;
3534 				next_base->dst_domain_ctx = first_base->dst_domain_ctx;
3535 				accel_mlx5_task_reset(next);
3536 				next->mlx5_opcode = ACCEL_MLX5_OPC_CRYPTO_MKEY;
3537 				next->enc_order = SPDK_MLX5_ENCRYPTION_ORDER_ENCRYPTED_RAW_WIRE;
3538 				next->needs_data_transfer = 1;
3539 				next->inplace = 1;
3540 				spdk_accel_task_complete(first_base, 0);
3541 				return 0;
3542 			}
3543 			break;
3544 		case SPDK_ACCEL_OPC_ENCRYPT:
3545 			if (next_base->op_code == SPDK_ACCEL_OPC_COPY &&
3546 			    next_base->dst_domain && spdk_memory_domain_get_dma_device_type(next_base->dst_domain) ==
3547 			    SPDK_DMA_DEVICE_TYPE_RDMA && TAILQ_NEXT(next_base, seq_link) == NULL) {
3548 				rc = accel_mlx5_task_assign_qp_by_domain_pd(first, accel_ch, next_base->dst_domain);
3549 				if (spdk_unlikely(rc)) {
3550 					return rc;
3551 				}
3552 
3553 				/* Update encrypt task memory domain, complete copy task */
3554 				SPDK_DEBUGLOG(accel_mlx5, "Merge copy task (%p) and decrypt (%p)\n",
3555 					      SPDK_CONTAINEROF(next_base,
3556 							       struct accel_mlx5_task, base), first);
3557 				first_base->dst_domain = next_base->dst_domain;
3558 				first_base->dst_domain_ctx = next_base->dst_domain_ctx;
3559 				first->mlx5_opcode = ACCEL_MLX5_OPC_CRYPTO_MKEY;
3560 				first->enc_order = SPDK_MLX5_ENCRYPTION_ORDER_ENCRYPTED_RAW_WIRE;
3561 				first->needs_data_transfer = 1;
3562 				first->inplace = 1;
3563 				spdk_accel_task_complete(next_base, 0);
3564 				return 0;
3565 			}
3566 			break;
3567 
3568 		default:
3569 			break;
3570 		}
3571 	}
3572 
3573 	SPDK_DEBUGLOG(accel_mlx5, "seq %p, task %p nothing to merge\n", seq, first_base);
3574 	/* Nothing to merge, execute tasks one by one */
3575 	accel_mlx5_task_assign_qp(first, accel_ch);
3576 	accel_mlx5_task_init_opcode(first);
3577 
3578 	return 0;
3579 }
3580 
3581 static inline int
3582 accel_mlx5_execute_sequence(struct spdk_io_channel *ch, struct spdk_accel_sequence *seq)
3583 {
3584 	struct accel_mlx5_io_channel *accel_ch = spdk_io_channel_get_ctx(ch);
3585 	struct spdk_accel_task *task;
3586 	struct accel_mlx5_task *mlx5_task;
3587 	int rc;
3588 
3589 	rc = accel_mlx5_driver_examine_sequence(seq, accel_ch);
3590 	if (spdk_unlikely(rc)) {
3591 		return rc;
3592 	}
3593 	task = spdk_accel_sequence_first_task(seq);
3594 	assert(task);
3595 	mlx5_task = SPDK_CONTAINEROF(task, struct accel_mlx5_task, base);
3596 	mlx5_task->driver_seq = 1;
3597 
3598 	SPDK_DEBUGLOG(accel_mlx5, "driver starts seq %p, ch %p, task %p\n", seq, accel_ch, task);
3599 
3600 	return _accel_mlx5_submit_tasks(accel_ch, task);
3601 }
3602 
3603 static struct accel_mlx5_module g_accel_mlx5 = {
3604 	.module = {
3605 		.module_init		= accel_mlx5_init,
3606 		.module_fini		= accel_mlx5_deinit,
3607 		.write_config_json	= accel_mlx5_write_config_json,
3608 		.get_ctx_size		= accel_mlx5_get_ctx_size,
3609 		.name			= "mlx5",
3610 		.supports_opcode	= accel_mlx5_supports_opcode,
3611 		.get_io_channel		= accel_mlx5_get_io_channel,
3612 		.submit_tasks		= accel_mlx5_submit_tasks,
3613 		.crypto_key_init	= accel_mlx5_crypto_key_init,
3614 		.crypto_key_deinit	= accel_mlx5_crypto_key_deinit,
3615 		.crypto_supports_cipher	= accel_mlx5_crypto_supports_cipher,
3616 		.get_memory_domains	= accel_mlx5_get_memory_domains,
3617 	}
3618 };
3619 
3620 static struct spdk_accel_driver g_accel_mlx5_driver = {
3621 	.name			= "mlx5",
3622 	.execute_sequence	= accel_mlx5_execute_sequence,
3623 	.get_io_channel		= accel_mlx5_get_io_channel
3624 };
3625 
3626 SPDK_LOG_REGISTER_COMPONENT(accel_mlx5)
3627