xref: /spdk/module/accel/mlx5/accel_mlx5.c (revision c7acbd6bef98bb47b167bdcbe5efc40128fa190b)
1 /*   SPDX-License-Identifier: BSD-3-Clause
2  *   Copyright (c) 2022-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3  */
4 
5 #include "spdk/env.h"
6 #include "spdk/thread.h"
7 #include "spdk/queue.h"
8 #include "spdk/log.h"
9 #include "spdk/string.h"
10 #include "spdk/likely.h"
11 #include "spdk/dma.h"
12 #include "spdk/json.h"
13 #include "spdk/util.h"
14 
15 #include "spdk_internal/mlx5.h"
16 #include "spdk_internal/rdma_utils.h"
17 #include "spdk/accel_module.h"
18 #include "spdk_internal/assert.h"
19 #include "spdk_internal/sgl.h"
20 #include "accel_mlx5.h"
21 
22 #include <infiniband/mlx5dv.h>
23 #include <rdma/rdma_cma.h>
24 
25 #define ACCEL_MLX5_QP_SIZE (256u)
26 #define ACCEL_MLX5_NUM_REQUESTS (2048u - 1)
27 #define ACCEL_MLX5_RECOVER_POLLER_PERIOD_US (10000)
28 #define ACCEL_MLX5_MAX_SGE (16u)
29 #define ACCEL_MLX5_MAX_WC (64u)
30 #define ACCEL_MLX5_MAX_MKEYS_IN_TASK (16u)
31 
32 /* Assume we have up to 16 devices */
33 #define ACCEL_MLX5_ALLOWED_DEVS_MAX_LEN ((SPDK_MLX5_DEV_MAX_NAME_LEN + 1) * 16)
34 
35 #define ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED(qp, task)	\
36 do {							\
37 	assert((qp)->wrs_submitted < (qp)->wrs_max);	\
38 	(qp)->wrs_submitted++;				\
39 	(qp)->ring_db = true;				\
40 	assert((task)->num_wrs < UINT16_MAX);		\
41 	(task)->num_wrs++;				\
42 } while (0)
43 
44 #define ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED_SIGNALED(dev, qp, task)	\
45 do {									\
46 	assert((dev)->wrs_in_cq < (dev)->wrs_in_cq_max);		\
47 	(dev)->wrs_in_cq++;						\
48         assert((qp)->wrs_submitted < (qp)->wrs_max);			\
49 	(qp)->wrs_submitted++;						\
50 	(qp)->ring_db = true;						\
51 	assert((task)->num_wrs < UINT16_MAX);				\
52 	(task)->num_wrs++;						\
53 } while (0)
54 
55 struct accel_mlx5_io_channel;
56 struct accel_mlx5_task;
57 
58 struct accel_mlx5_dev_ctx {
59 	struct ibv_context *context;
60 	struct ibv_pd *pd;
61 	struct spdk_memory_domain *domain;
62 	struct spdk_mempool *psv_pool;
63 	TAILQ_ENTRY(accel_mlx5_dev_ctx) link;
64 	struct spdk_mlx5_psv **psvs;
65 	bool crypto_mkeys;
66 	bool sig_mkeys;
67 	bool crypto_multi_block;
68 };
69 
70 enum accel_mlx5_opcode {
71 	ACCEL_MLX5_OPC_COPY,
72 	ACCEL_MLX5_OPC_CRYPTO,
73 	ACCEL_MLX5_OPC_CRC32C,
74 	ACCEL_MLX5_OPC_CRYPTO_MKEY,
75 	ACCEL_MLX5_OPC_LAST
76 };
77 
78 SPDK_STATIC_ASSERT(ACCEL_MLX5_OPC_LAST <= 0xf,
79 		   "accel opcode exceeds 4 bits, update accel_mlx5 struct");
80 
81 struct accel_mlx5_stats {
82 	uint64_t crypto_umrs;
83 	uint64_t sig_umrs;
84 	uint64_t rdma_reads;
85 	uint64_t rdma_writes;
86 	uint64_t polls;
87 	uint64_t idle_polls;
88 	uint64_t completions;
89 	uint64_t nomem_qdepth;
90 	uint64_t nomem_mkey;
91 	uint64_t opcodes[ACCEL_MLX5_OPC_LAST];
92 };
93 
94 struct accel_mlx5_module {
95 	struct spdk_accel_module_if module;
96 	struct accel_mlx5_stats stats;
97 	struct spdk_spinlock lock;
98 	struct accel_mlx5_dev_ctx *dev_ctxs;
99 	uint32_t num_ctxs;
100 	struct accel_mlx5_attr attr;
101 	char **allowed_devs;
102 	size_t allowed_devs_count;
103 	bool initialized;
104 	bool enabled;
105 	bool crypto_supported;
106 	bool crc32c_supported;
107 };
108 
109 struct accel_mlx5_sge {
110 	uint32_t src_sge_count;
111 	uint32_t dst_sge_count;
112 	struct ibv_sge src_sge[ACCEL_MLX5_MAX_SGE];
113 	struct ibv_sge dst_sge[ACCEL_MLX5_MAX_SGE];
114 };
115 
116 struct accel_mlx5_iov_sgl {
117 	struct iovec	*iov;
118 	uint32_t	iovcnt;
119 	uint32_t	iov_offset;
120 };
121 
122 struct accel_mlx5_psv_wrapper {
123 	uint32_t psv_index;
124 	struct {
125 		uint32_t error : 1;
126 		uint32_t reserved : 31;
127 	} bits;
128 	/* mlx5 engine requires DMAable memory, use this member to copy user's crc value since we don't know which
129 	 * memory it is in */
130 	uint32_t crc;
131 	uint32_t crc_lkey;
132 };
133 
134 struct accel_mlx5_task {
135 	struct spdk_accel_task base;
136 	struct accel_mlx5_iov_sgl src;
137 	struct accel_mlx5_iov_sgl dst;
138 	struct accel_mlx5_qp *qp;
139 	STAILQ_ENTRY(accel_mlx5_task) link;
140 	uint16_t num_reqs;
141 	uint16_t num_completed_reqs;
142 	uint16_t num_submitted_reqs;
143 	uint16_t num_ops; /* number of allocated mkeys or number of operations */
144 	uint16_t num_wrs; /* Number of outstanding operations which consume qp slot */
145 	union {
146 		struct {
147 			uint16_t blocks_per_req;
148 			uint16_t num_processed_blocks;
149 			uint16_t num_blocks;
150 		};
151 		struct {
152 			struct accel_mlx5_psv_wrapper *psv;
153 			uint32_t last_umr_len;
154 			uint8_t last_mkey_idx;
155 		};
156 	};
157 	union {
158 		uint16_t raw;
159 		struct {
160 			uint16_t inplace : 1;
161 			uint16_t driver_seq : 1;
162 			uint16_t needs_data_transfer : 1;
163 			uint16_t enc_order : 2;
164 			uint16_t mlx5_opcode: 4;
165 		};
166 	};
167 	/* Keep this array last since not all elements might be accessed, this reduces amount of data to be
168 	 * cached */
169 	struct spdk_mlx5_mkey_pool_obj *mkeys[ACCEL_MLX5_MAX_MKEYS_IN_TASK];
170 };
171 
172 SPDK_STATIC_ASSERT(ACCEL_MLX5_MAX_MKEYS_IN_TASK <= UINT8_MAX, "uint8_t is used to iterate mkeys");
173 
174 struct accel_mlx5_qp {
175 	struct spdk_mlx5_qp *qp;
176 	struct ibv_qp *verbs_qp;
177 	struct accel_mlx5_dev *dev;
178 	/* tasks submitted to HW. We can't complete a task even in error case until we reap completions for all
179 	 * submitted requests */
180 	STAILQ_HEAD(, accel_mlx5_task) in_hw;
181 	uint16_t wrs_submitted;
182 	uint16_t wrs_max;
183 	bool ring_db;
184 	bool recovering;
185 	struct spdk_poller *recover_poller;
186 };
187 
188 struct accel_mlx5_dev {
189 	struct accel_mlx5_qp qp;
190 	struct spdk_mlx5_cq *cq;
191 	struct spdk_mlx5_mkey_pool *crypto_mkeys;
192 	struct spdk_mlx5_mkey_pool *sig_mkeys;
193 	struct spdk_rdma_utils_mem_map *mmap;
194 	struct accel_mlx5_dev_ctx *dev_ctx;
195 	struct spdk_io_channel *ch;
196 	uint16_t wrs_in_cq;
197 	uint16_t wrs_in_cq_max;
198 	uint16_t crypto_split_blocks;
199 	bool crypto_multi_block;
200 	/* Pending tasks waiting for requests resources */
201 	STAILQ_HEAD(, accel_mlx5_task) nomem;
202 	TAILQ_ENTRY(accel_mlx5_dev) link;
203 	struct accel_mlx5_stats stats;
204 };
205 
206 struct accel_mlx5_io_channel {
207 	struct accel_mlx5_dev *devs;
208 	struct spdk_poller *poller;
209 	uint16_t num_devs;
210 	/* Index in \b devs to be used for operations in round-robin way */
211 	uint16_t dev_idx;
212 	bool poller_handler_registered;
213 };
214 
215 struct accel_mlx5_task_operations {
216 	int (*init)(struct accel_mlx5_task *task);
217 	int (*process)(struct accel_mlx5_task *task);
218 	int (*cont)(struct accel_mlx5_task *task);
219 	void (*complete)(struct accel_mlx5_task *task);
220 };
221 
222 struct accel_mlx5_psv_pool_iter_cb_args {
223 	struct accel_mlx5_dev_ctx *dev;
224 	struct spdk_rdma_utils_mem_map *map;
225 	int rc;
226 };
227 
228 struct accel_mlx5_dump_stats_ctx {
229 	struct accel_mlx5_stats total;
230 	struct spdk_json_write_ctx *w;
231 	enum accel_mlx5_dump_state_level level;
232 	accel_mlx5_dump_stat_done_cb cb;
233 	void *ctx;
234 };
235 
236 static struct accel_mlx5_module g_accel_mlx5;
237 static struct spdk_accel_driver g_accel_mlx5_driver;
238 
239 static inline int accel_mlx5_execute_sequence(struct spdk_io_channel *ch,
240 		struct spdk_accel_sequence *seq);
241 static inline void accel_mlx5_task_complete(struct accel_mlx5_task *mlx5_task);
242 
243 static inline void
244 accel_mlx5_iov_sgl_init(struct accel_mlx5_iov_sgl *s, struct iovec *iov, uint32_t iovcnt)
245 {
246 	s->iov = iov;
247 	s->iovcnt = iovcnt;
248 	s->iov_offset = 0;
249 }
250 
251 static inline void
252 accel_mlx5_iov_sgl_advance(struct accel_mlx5_iov_sgl *s, uint32_t step)
253 {
254 	s->iov_offset += step;
255 	while (s->iovcnt > 0) {
256 		assert(s->iov != NULL);
257 		if (s->iov_offset < s->iov->iov_len) {
258 			break;
259 		}
260 
261 		s->iov_offset -= s->iov->iov_len;
262 		s->iov++;
263 		s->iovcnt--;
264 	}
265 }
266 
267 static inline void
268 accel_mlx5_iov_sgl_unwind(struct accel_mlx5_iov_sgl *s, uint32_t max_iovs, uint32_t step)
269 {
270 	SPDK_DEBUGLOG(accel_mlx5, "iov %p, iovcnt %u, max %u, offset %u, step %u\n", s->iov, s->iovcnt,
271 		      max_iovs, s->iov_offset, step);
272 	while (s->iovcnt <= max_iovs) {
273 		assert(s->iov != NULL);
274 		if (s->iov_offset >= step) {
275 			s->iov_offset -= step;
276 			SPDK_DEBUGLOG(accel_mlx5, "\tEND, iov %p, iovcnt %u, offset %u\n", s->iov, s->iovcnt,
277 				      s->iov_offset);
278 			return;
279 		}
280 		step -= s->iov_offset;
281 		s->iov--;
282 		s->iovcnt++;
283 		s->iov_offset = s->iov->iov_len;
284 		SPDK_DEBUGLOG(accel_mlx5, "\tiov %p, iovcnt %u, offset %u, step %u\n", s->iov, s->iovcnt,
285 			      s->iov_offset, step);
286 	}
287 
288 	SPDK_ERRLOG("Can't unwind iovs, remaining  %u\n", step);
289 	assert(0);
290 }
291 
292 static inline int
293 accel_mlx5_sge_unwind(struct ibv_sge *sge, uint32_t sge_count, uint32_t step)
294 {
295 	int i;
296 
297 	assert(sge_count > 0);
298 	SPDK_DEBUGLOG(accel_mlx5, "sge %p, count %u, step %u\n", sge, sge_count, step);
299 	for (i = (int)sge_count - 1; i >= 0; i--) {
300 		if (sge[i].length > step) {
301 			sge[i].length -= step;
302 			SPDK_DEBUGLOG(accel_mlx5, "\tsge[%u] len %u, step %u\n", i, sge[i].length, step);
303 			return (int)i + 1;
304 		}
305 		SPDK_DEBUGLOG(accel_mlx5, "\tsge[%u] len %u, step %u\n", i, sge[i].length, step);
306 		step -= sge[i].length;
307 	}
308 
309 	SPDK_ERRLOG("Can't unwind sge, remaining  %u\n", step);
310 	assert(step == 0);
311 
312 	return 0;
313 }
314 
315 static inline void
316 accel_mlx5_crypto_task_complete(struct accel_mlx5_task *task)
317 {
318 	struct accel_mlx5_dev *dev = task->qp->dev;
319 
320 	assert(task->num_ops);
321 	spdk_mlx5_mkey_pool_put_bulk(dev->crypto_mkeys, task->mkeys, task->num_ops);
322 	spdk_accel_task_complete(&task->base, 0);
323 }
324 
325 static inline void
326 accel_mlx5_task_fail(struct accel_mlx5_task *task, int rc)
327 {
328 	struct accel_mlx5_dev *dev = task->qp->dev;
329 	struct spdk_accel_task *next;
330 	struct spdk_accel_sequence *seq;
331 	bool driver_seq;
332 
333 	assert(task->num_reqs == task->num_completed_reqs);
334 	SPDK_DEBUGLOG(accel_mlx5, "Fail task %p, opc %d, rc %d\n", task, task->base.op_code, rc);
335 
336 	if (task->num_ops) {
337 		if (task->mlx5_opcode == ACCEL_MLX5_OPC_CRYPTO || task->mlx5_opcode == ACCEL_MLX5_OPC_CRYPTO_MKEY) {
338 			spdk_mlx5_mkey_pool_put_bulk(dev->crypto_mkeys, task->mkeys, task->num_ops);
339 		}
340 		if (task->mlx5_opcode == ACCEL_MLX5_OPC_CRC32C) {
341 			spdk_mlx5_mkey_pool_put_bulk(dev->sig_mkeys, task->mkeys, task->num_ops);
342 			spdk_mempool_put(dev->dev_ctx->psv_pool, task->psv);
343 		}
344 	}
345 	next = spdk_accel_sequence_next_task(&task->base);
346 	seq = task->base.seq;
347 	driver_seq = task->driver_seq;
348 
349 	assert(task->num_reqs == task->num_completed_reqs);
350 	SPDK_DEBUGLOG(accel_mlx5, "Fail task %p, opc %d, rc %d\n", task, task->mlx5_opcode, rc);
351 	spdk_accel_task_complete(&task->base, rc);
352 
353 	if (driver_seq) {
354 		struct spdk_io_channel *ch = task->qp->dev->ch;
355 
356 		assert(seq);
357 		if (next) {
358 			accel_mlx5_execute_sequence(ch, seq);
359 		} else {
360 			spdk_accel_sequence_continue(seq);
361 		}
362 	}
363 }
364 
365 static int
366 accel_mlx5_translate_addr(void *addr, size_t size, struct spdk_memory_domain *domain,
367 			  void *domain_ctx, struct accel_mlx5_dev *dev, struct ibv_sge *sge)
368 {
369 	struct spdk_rdma_utils_memory_translation map_translation;
370 	struct spdk_memory_domain_translation_result domain_translation;
371 	struct spdk_memory_domain_translation_ctx local_ctx;
372 	int rc;
373 
374 	if (domain) {
375 		domain_translation.size = sizeof(struct spdk_memory_domain_translation_result);
376 		local_ctx.size = sizeof(local_ctx);
377 		local_ctx.rdma.ibv_qp = dev->qp.verbs_qp;
378 		rc = spdk_memory_domain_translate_data(domain, domain_ctx, dev->dev_ctx->domain,
379 						       &local_ctx, addr, size, &domain_translation);
380 		if (spdk_unlikely(rc || domain_translation.iov_count != 1)) {
381 			SPDK_ERRLOG("Memory domain translation failed, addr %p, length %zu, iovcnt %u\n", addr, size,
382 				    domain_translation.iov_count);
383 			if (rc == 0) {
384 				rc = -EINVAL;
385 			}
386 
387 			return rc;
388 		}
389 		sge->lkey = domain_translation.rdma.lkey;
390 		sge->addr = (uint64_t) domain_translation.iov.iov_base;
391 		sge->length = domain_translation.iov.iov_len;
392 	} else {
393 		rc = spdk_rdma_utils_get_translation(dev->mmap, addr, size,
394 						     &map_translation);
395 		if (spdk_unlikely(rc)) {
396 			SPDK_ERRLOG("Memory translation failed, addr %p, length %zu\n", addr, size);
397 			return rc;
398 		}
399 		sge->lkey = spdk_rdma_utils_memory_translation_get_lkey(&map_translation);
400 		sge->addr = (uint64_t)addr;
401 		sge->length = size;
402 	}
403 
404 	return 0;
405 }
406 
407 static inline int
408 accel_mlx5_fill_block_sge(struct accel_mlx5_dev *dev, struct ibv_sge *sge,
409 			  struct accel_mlx5_iov_sgl *iovs, uint32_t len, uint32_t *_remaining,
410 			  struct spdk_memory_domain *domain, void *domain_ctx)
411 {
412 	void *addr;
413 	uint32_t remaining = len;
414 	uint32_t size;
415 	int i = 0;
416 	int rc;
417 
418 	while (remaining && i < (int)ACCEL_MLX5_MAX_SGE) {
419 		size = spdk_min(remaining, iovs->iov->iov_len - iovs->iov_offset);
420 		addr = (void *)iovs->iov->iov_base + iovs->iov_offset;
421 		rc = accel_mlx5_translate_addr(addr, size, domain, domain_ctx, dev, &sge[i]);
422 		if (spdk_unlikely(rc)) {
423 			return rc;
424 		}
425 		SPDK_DEBUGLOG(accel_mlx5, "\t sge[%d]: lkey %u, len %u, addr %"PRIx64"\n", i, sge[i].lkey,
426 			      sge[i].length, sge[i].addr);
427 		accel_mlx5_iov_sgl_advance(iovs, size);
428 		i++;
429 		assert(remaining >= size);
430 		remaining -= size;
431 	}
432 	*_remaining = remaining;
433 
434 	return i;
435 }
436 
437 static inline bool
438 accel_mlx5_compare_iovs(struct iovec *v1, struct iovec *v2, uint32_t iovcnt)
439 {
440 	return memcmp(v1, v2, sizeof(*v1) * iovcnt) == 0;
441 }
442 
443 static inline uint16_t
444 accel_mlx5_dev_get_available_slots(struct accel_mlx5_dev *dev, struct accel_mlx5_qp *qp)
445 {
446 	assert(qp->wrs_max >= qp->wrs_submitted);
447 	assert(dev->wrs_in_cq_max >= dev->wrs_in_cq);
448 
449 	/* Each time we produce only 1 CQE, so we need 1 CQ slot */
450 	if (spdk_unlikely(dev->wrs_in_cq == dev->wrs_in_cq_max)) {
451 		return 0;
452 	}
453 
454 	return qp->wrs_max - qp->wrs_submitted;
455 }
456 
457 static inline uint32_t
458 accel_mlx5_task_alloc_mkeys(struct accel_mlx5_task *task, struct spdk_mlx5_mkey_pool *pool)
459 {
460 	uint32_t num_ops;
461 	int rc;
462 
463 	assert(task->num_reqs > task->num_completed_reqs);
464 	num_ops = task->num_reqs - task->num_completed_reqs;
465 	num_ops = spdk_min(num_ops, ACCEL_MLX5_MAX_MKEYS_IN_TASK);
466 	if (!num_ops) {
467 		return 0;
468 	}
469 	rc = spdk_mlx5_mkey_pool_get_bulk(pool, task->mkeys, num_ops);
470 	if (spdk_unlikely(rc)) {
471 		return 0;
472 	}
473 	assert(num_ops <= UINT16_MAX);
474 	task->num_ops = num_ops;
475 
476 	return num_ops;
477 }
478 
479 static inline uint8_t
480 bs_to_bs_selector(uint32_t bs)
481 {
482 	switch (bs) {
483 	case 512:
484 		return SPDK_MLX5_BLOCK_SIZE_SELECTOR_512;
485 	case 520:
486 		return SPDK_MLX5_BLOCK_SIZE_SELECTOR_520;
487 	case 4096:
488 		return SPDK_MLX5_BLOCK_SIZE_SELECTOR_4096;
489 	case 4160:
490 		return SPDK_MLX5_BLOCK_SIZE_SELECTOR_4160;
491 	default:
492 		return SPDK_MLX5_BLOCK_SIZE_SELECTOR_RESERVED;
493 	}
494 }
495 
496 static inline int
497 accel_mlx5_configure_crypto_umr(struct accel_mlx5_task *mlx5_task, struct accel_mlx5_sge *sge,
498 				uint32_t mkey, uint32_t num_blocks, struct spdk_mlx5_crypto_dek_data *dek_data,
499 				uint64_t wr_id, uint32_t flags)
500 {
501 	struct spdk_mlx5_umr_crypto_attr cattr;
502 	struct spdk_mlx5_umr_attr umr_attr;
503 	struct accel_mlx5_qp *qp = mlx5_task->qp;
504 	struct accel_mlx5_dev *dev = qp->dev;
505 	struct spdk_accel_task *task = &mlx5_task->base;
506 	uint32_t length, remaining = 0, block_size = task->block_size;
507 	int rc;
508 
509 	length = num_blocks * block_size;
510 	SPDK_DEBUGLOG(accel_mlx5, "task %p, domain %p, len %u, blocks %u\n", task, task->src_domain, length,
511 		      num_blocks);
512 	rc = accel_mlx5_fill_block_sge(dev, sge->src_sge, &mlx5_task->src,  length, &remaining,
513 				       task->src_domain, task->src_domain_ctx);
514 	if (spdk_unlikely(rc <= 0)) {
515 		if (rc == 0) {
516 			rc = -EINVAL;
517 		}
518 		SPDK_ERRLOG("failed set src sge, rc %d\n", rc);
519 		return rc;
520 	}
521 	sge->src_sge_count = rc;
522 	if (spdk_unlikely(remaining)) {
523 		uint32_t new_len = length - remaining;
524 		uint32_t aligned_len, updated_num_blocks;
525 
526 		SPDK_DEBUGLOG(accel_mlx5, "Incorrect src iovs, handled %u out of %u bytes\n", new_len, length);
527 		if (new_len < block_size) {
528 			/* We need to process at least 1 block. If buffer is too fragmented, we can't do
529 			 * anything */
530 			return -ERANGE;
531 		}
532 
533 		/* Regular integer division, we need to round down to prev block size */
534 		updated_num_blocks = new_len / block_size;
535 		assert(updated_num_blocks);
536 		assert(updated_num_blocks < num_blocks);
537 		aligned_len = updated_num_blocks * block_size;
538 
539 		if (aligned_len < new_len) {
540 			uint32_t dt = new_len - aligned_len;
541 
542 			/* We can't process part of block, need to unwind src iov_sgl and sge to the
543 			 * prev block boundary */
544 			SPDK_DEBUGLOG(accel_mlx5, "task %p, unwind src sge for %u bytes\n", task, dt);
545 			accel_mlx5_iov_sgl_unwind(&mlx5_task->src, task->s.iovcnt, dt);
546 			sge->src_sge_count = accel_mlx5_sge_unwind(sge->src_sge, sge->src_sge_count, dt);
547 			if (!sge->src_sge_count) {
548 				return -ERANGE;
549 			}
550 		}
551 		SPDK_DEBUGLOG(accel_mlx5, "task %p, UMR len %u -> %u\n", task, length, aligned_len);
552 		length = aligned_len;
553 		num_blocks = updated_num_blocks;
554 	}
555 
556 	cattr.xts_iv = task->iv + mlx5_task->num_processed_blocks;
557 	cattr.keytag = 0;
558 	cattr.dek_obj_id = dek_data->dek_obj_id;
559 	cattr.tweak_mode = dek_data->tweak_mode;
560 	cattr.enc_order = mlx5_task->enc_order;
561 	cattr.bs_selector = bs_to_bs_selector(mlx5_task->base.block_size);
562 	if (spdk_unlikely(cattr.bs_selector == SPDK_MLX5_BLOCK_SIZE_SELECTOR_RESERVED)) {
563 		SPDK_ERRLOG("unsupported block size %u\n", mlx5_task->base.block_size);
564 		return -EINVAL;
565 	}
566 	umr_attr.mkey = mkey;
567 	umr_attr.sge = sge->src_sge;
568 
569 	if (!mlx5_task->inplace) {
570 		SPDK_DEBUGLOG(accel_mlx5, "task %p, dst sge, domain %p, len %u\n", task, task->dst_domain, length);
571 		rc = accel_mlx5_fill_block_sge(dev, sge->dst_sge, &mlx5_task->dst, length, &remaining,
572 					       task->dst_domain, task->dst_domain_ctx);
573 		if (spdk_unlikely(rc <= 0)) {
574 			if (rc == 0) {
575 				rc = -EINVAL;
576 			}
577 			SPDK_ERRLOG("failed set dst sge, rc %d\n", rc);
578 			return rc;
579 		}
580 		sge->dst_sge_count = rc;
581 		if (spdk_unlikely(remaining)) {
582 			uint32_t new_len = length - remaining;
583 			uint32_t aligned_len, updated_num_blocks, dt;
584 
585 			SPDK_DEBUGLOG(accel_mlx5, "Incorrect dst iovs, handled %u out of %u bytes\n", new_len, length);
586 			if (new_len < block_size) {
587 				/* We need to process at least 1 block. If buffer is too fragmented, we can't do
588 				 * anything */
589 				return -ERANGE;
590 			}
591 
592 			/* Regular integer division, we need to round down to prev block size */
593 			updated_num_blocks = new_len / block_size;
594 			assert(updated_num_blocks);
595 			assert(updated_num_blocks < num_blocks);
596 			aligned_len = updated_num_blocks * block_size;
597 
598 			if (aligned_len < new_len) {
599 				dt = new_len - aligned_len;
600 				assert(dt > 0 && dt < length);
601 				/* We can't process part of block, need to unwind src and dst iov_sgl and sge to the
602 				 * prev block boundary */
603 				SPDK_DEBUGLOG(accel_mlx5, "task %p, unwind dst sge for %u bytes\n", task, dt);
604 				accel_mlx5_iov_sgl_unwind(&mlx5_task->dst, task->d.iovcnt, dt);
605 				sge->dst_sge_count = accel_mlx5_sge_unwind(sge->dst_sge, sge->dst_sge_count, dt);
606 				assert(sge->dst_sge_count > 0 && sge->dst_sge_count <= ACCEL_MLX5_MAX_SGE);
607 				if (!sge->dst_sge_count) {
608 					return -ERANGE;
609 				}
610 			}
611 			assert(length > aligned_len);
612 			dt = length - aligned_len;
613 			SPDK_DEBUGLOG(accel_mlx5, "task %p, unwind src sge for %u bytes\n", task, dt);
614 			/* The same for src iov_sgl and sge. In worst case we can unwind SRC 2 times */
615 			accel_mlx5_iov_sgl_unwind(&mlx5_task->src, task->s.iovcnt, dt);
616 			sge->src_sge_count = accel_mlx5_sge_unwind(sge->src_sge, sge->src_sge_count, dt);
617 			assert(sge->src_sge_count > 0 && sge->src_sge_count <= ACCEL_MLX5_MAX_SGE);
618 			if (!sge->src_sge_count) {
619 				return -ERANGE;
620 			}
621 			SPDK_DEBUGLOG(accel_mlx5, "task %p, UMR len %u -> %u\n", task, length, aligned_len);
622 			length = aligned_len;
623 			num_blocks = updated_num_blocks;
624 		}
625 	}
626 
627 	SPDK_DEBUGLOG(accel_mlx5,
628 		      "task %p: bs %u, iv %"PRIu64", enc_on_tx %d, tweak_mode %d, len %u, mkey %x, blocks %u\n",
629 		      mlx5_task, task->block_size, cattr.xts_iv, mlx5_task->enc_order, cattr.tweak_mode, length, mkey,
630 		      num_blocks);
631 
632 	umr_attr.sge_count = sge->src_sge_count;
633 	umr_attr.umr_len = length;
634 	assert((uint32_t)mlx5_task->num_processed_blocks + num_blocks <= UINT16_MAX);
635 	mlx5_task->num_processed_blocks += num_blocks;
636 
637 	rc = spdk_mlx5_umr_configure_crypto(qp->qp, &umr_attr, &cattr, wr_id, flags);
638 
639 	return rc;
640 }
641 
642 static inline int
643 accel_mlx5_crypto_task_process(struct accel_mlx5_task *mlx5_task)
644 {
645 	struct accel_mlx5_sge sges[ACCEL_MLX5_MAX_MKEYS_IN_TASK];
646 	struct spdk_mlx5_crypto_dek_data dek_data;
647 	struct accel_mlx5_qp *qp = mlx5_task->qp;
648 	struct accel_mlx5_dev *dev = qp->dev;
649 	/* First RDMA after UMR must have a SMALL_FENCE */
650 	uint32_t first_rdma_fence = SPDK_MLX5_WQE_CTRL_INITIATOR_SMALL_FENCE;
651 	uint16_t num_blocks;
652 	uint16_t num_ops = spdk_min(mlx5_task->num_reqs - mlx5_task->num_completed_reqs,
653 				    mlx5_task->num_ops);
654 	uint16_t qp_slot = accel_mlx5_dev_get_available_slots(dev, qp);
655 	uint16_t i;
656 	int rc;
657 
658 	assert(qp_slot > 1);
659 	num_ops = spdk_min(num_ops, qp_slot >> 1);
660 	if (spdk_unlikely(!num_ops)) {
661 		return -EINVAL;
662 	}
663 
664 	rc = spdk_mlx5_crypto_get_dek_data(mlx5_task->base.crypto_key->priv, dev->dev_ctx->pd, &dek_data);
665 	if (spdk_unlikely(rc)) {
666 		return rc;
667 	}
668 
669 	mlx5_task->num_wrs = 0;
670 	SPDK_DEBUGLOG(accel_mlx5, "begin, task, %p, reqs: total %u, submitted %u, completed %u\n",
671 		      mlx5_task, mlx5_task->num_reqs, mlx5_task->num_submitted_reqs, mlx5_task->num_completed_reqs);
672 	for (i = 0; i < num_ops; i++) {
673 		if (mlx5_task->num_submitted_reqs + i + 1 == mlx5_task->num_reqs) {
674 			/* Last request may consume less than calculated if crypto_multi_block is true */
675 			assert(mlx5_task->num_blocks > mlx5_task->num_submitted_reqs);
676 			num_blocks = mlx5_task->num_blocks - mlx5_task->num_processed_blocks;
677 		} else {
678 			num_blocks = mlx5_task->blocks_per_req;
679 		}
680 
681 		rc = accel_mlx5_configure_crypto_umr(mlx5_task, &sges[i], mlx5_task->mkeys[i]->mkey, num_blocks,
682 						     &dek_data, 0, 0);
683 		if (spdk_unlikely(rc)) {
684 			SPDK_ERRLOG("UMR configure failed with %d\n", rc);
685 			return rc;
686 		}
687 		ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED(qp, mlx5_task);
688 		dev->stats.crypto_umrs++;
689 	}
690 
691 	/* Loop `num_ops - 1` for easy flags handling */
692 	for (i = 0; i < num_ops - 1; i++) {
693 		/* UMR is used as a destination for RDMA_READ - from UMR to sge */
694 		if (mlx5_task->inplace) {
695 			rc = spdk_mlx5_qp_rdma_read(qp->qp, sges[i].src_sge, sges[i].src_sge_count, 0,
696 						    mlx5_task->mkeys[i]->mkey, 0, first_rdma_fence);
697 		} else {
698 			rc = spdk_mlx5_qp_rdma_read(qp->qp, sges[i].dst_sge, sges[i].dst_sge_count, 0,
699 						    mlx5_task->mkeys[i]->mkey, 0, first_rdma_fence);
700 		}
701 		if (spdk_unlikely(rc)) {
702 			SPDK_ERRLOG("RDMA READ/WRITE failed with %d\n", rc);
703 			return rc;
704 		}
705 
706 		first_rdma_fence = 0;
707 		assert(mlx5_task->num_submitted_reqs < mlx5_task->num_reqs);
708 		assert(mlx5_task->num_submitted_reqs < UINT16_MAX);
709 		mlx5_task->num_submitted_reqs++;
710 		ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED(qp, mlx5_task);
711 		dev->stats.rdma_reads++;
712 	}
713 
714 	if (mlx5_task->inplace) {
715 		rc = spdk_mlx5_qp_rdma_read(qp->qp, sges[i].src_sge, sges[i].src_sge_count, 0,
716 					    mlx5_task->mkeys[i]->mkey, (uint64_t)mlx5_task, first_rdma_fence | SPDK_MLX5_WQE_CTRL_CE_CQ_UPDATE);
717 	} else {
718 		rc = spdk_mlx5_qp_rdma_read(qp->qp, sges[i].dst_sge, sges[i].dst_sge_count, 0,
719 					    mlx5_task->mkeys[i]->mkey, (uint64_t)mlx5_task, first_rdma_fence | SPDK_MLX5_WQE_CTRL_CE_CQ_UPDATE);
720 	}
721 	if (spdk_unlikely(rc)) {
722 		SPDK_ERRLOG("RDMA READ/WRITE failed with %d\n", rc);
723 		return rc;
724 	}
725 
726 	assert(mlx5_task->num_submitted_reqs < mlx5_task->num_reqs);
727 	assert(mlx5_task->num_submitted_reqs < UINT16_MAX);
728 	mlx5_task->num_submitted_reqs++;
729 	ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED_SIGNALED(dev, qp, mlx5_task);
730 	dev->stats.rdma_reads++;
731 	STAILQ_INSERT_TAIL(&qp->in_hw, mlx5_task, link);
732 
733 	if (spdk_unlikely(mlx5_task->num_submitted_reqs == mlx5_task->num_reqs &&
734 			  mlx5_task->num_blocks > mlx5_task->num_processed_blocks)) {
735 		/* We hit "out of sge
736 		 * entries" case with highly fragmented payload. In that case
737 		 * accel_mlx5_configure_crypto_umr function handled fewer data blocks than expected
738 		 * That means we need at least 1 more request to complete this task, this request will be
739 		 * executed once all submitted ones are completed */
740 		SPDK_DEBUGLOG(accel_mlx5, "task %p, processed %u/%u blocks, add extra req\n", mlx5_task,
741 			      mlx5_task->num_processed_blocks, mlx5_task->num_blocks);
742 		mlx5_task->num_reqs++;
743 	}
744 
745 	SPDK_DEBUGLOG(accel_mlx5, "end, task, %p, reqs: total %u, submitted %u, completed %u\n", mlx5_task,
746 		      mlx5_task->num_reqs, mlx5_task->num_submitted_reqs, mlx5_task->num_completed_reqs);
747 
748 	return 0;
749 }
750 
751 static inline int
752 accel_mlx5_crypto_task_continue(struct accel_mlx5_task *task)
753 {
754 	struct accel_mlx5_qp *qp = task->qp;
755 	struct accel_mlx5_dev *dev = qp->dev;
756 	uint16_t qp_slot = accel_mlx5_dev_get_available_slots(dev, qp);
757 
758 	assert(task->num_reqs > task->num_completed_reqs);
759 	if (task->num_ops == 0) {
760 		/* No mkeys allocated, try to allocate now */
761 		if (spdk_unlikely(!accel_mlx5_task_alloc_mkeys(task, dev->crypto_mkeys))) {
762 			/* Pool is empty, queue this task */
763 			STAILQ_INSERT_TAIL(&dev->nomem, task, link);
764 			dev->stats.nomem_mkey++;
765 			return -ENOMEM;
766 		}
767 	}
768 	/* We need to post at least 1 UMR and 1 RDMA operation */
769 	if (spdk_unlikely(qp_slot < 2)) {
770 		/* QP is full, queue this task */
771 		STAILQ_INSERT_TAIL(&dev->nomem, task, link);
772 		task->qp->dev->stats.nomem_qdepth++;
773 		return -ENOMEM;
774 	}
775 
776 	return accel_mlx5_crypto_task_process(task);
777 }
778 
779 static inline int
780 accel_mlx5_crypto_task_init(struct accel_mlx5_task *mlx5_task)
781 {
782 	struct spdk_accel_task *task = &mlx5_task->base;
783 	struct accel_mlx5_dev *dev = mlx5_task->qp->dev;
784 	uint64_t src_nbytes = task->nbytes;
785 #ifdef DEBUG
786 	uint64_t dst_nbytes;
787 	uint32_t i;
788 #endif
789 	bool crypto_key_ok;
790 
791 	crypto_key_ok = (task->crypto_key && task->crypto_key->module_if == &g_accel_mlx5.module &&
792 			 task->crypto_key->priv);
793 	if (spdk_unlikely((task->nbytes % mlx5_task->base.block_size != 0) || !crypto_key_ok)) {
794 		if (crypto_key_ok) {
795 			SPDK_ERRLOG("src length %"PRIu64" is not a multiple of the block size %u\n", task->nbytes,
796 				    mlx5_task->base.block_size);
797 		} else {
798 			SPDK_ERRLOG("Wrong crypto key provided\n");
799 		}
800 		return -EINVAL;
801 	}
802 
803 	assert(src_nbytes / mlx5_task->base.block_size <= UINT16_MAX);
804 	mlx5_task->num_blocks = src_nbytes / mlx5_task->base.block_size;
805 	accel_mlx5_iov_sgl_init(&mlx5_task->src, task->s.iovs, task->s.iovcnt);
806 	if (task->d.iovcnt == 0 || (task->d.iovcnt == task->s.iovcnt &&
807 				    accel_mlx5_compare_iovs(task->d.iovs, task->s.iovs, task->s.iovcnt))) {
808 		mlx5_task->inplace = 1;
809 	} else {
810 #ifdef DEBUG
811 		dst_nbytes = 0;
812 		for (i = 0; i < task->d.iovcnt; i++) {
813 			dst_nbytes += task->d.iovs[i].iov_len;
814 		}
815 
816 		if (spdk_unlikely(src_nbytes != dst_nbytes)) {
817 			return -EINVAL;
818 		}
819 #endif
820 		mlx5_task->inplace = 0;
821 		accel_mlx5_iov_sgl_init(&mlx5_task->dst, task->d.iovs, task->d.iovcnt);
822 	}
823 
824 	if (dev->crypto_multi_block) {
825 		if (dev->crypto_split_blocks) {
826 			assert(SPDK_CEIL_DIV(mlx5_task->num_blocks, dev->crypto_split_blocks) <= UINT16_MAX);
827 			mlx5_task->num_reqs = SPDK_CEIL_DIV(mlx5_task->num_blocks, dev->crypto_split_blocks);
828 			/* Last req may consume less blocks */
829 			mlx5_task->blocks_per_req = spdk_min(mlx5_task->num_blocks, dev->crypto_split_blocks);
830 		} else {
831 			if (task->s.iovcnt > ACCEL_MLX5_MAX_SGE || task->d.iovcnt > ACCEL_MLX5_MAX_SGE) {
832 				uint32_t max_sge_count = spdk_max(task->s.iovcnt, task->d.iovcnt);
833 
834 				assert(SPDK_CEIL_DIV(max_sge_count, ACCEL_MLX5_MAX_SGE) <= UINT16_MAX);
835 				mlx5_task->num_reqs = SPDK_CEIL_DIV(max_sge_count, ACCEL_MLX5_MAX_SGE);
836 				mlx5_task->blocks_per_req = SPDK_CEIL_DIV(mlx5_task->num_blocks, mlx5_task->num_reqs);
837 			} else {
838 				mlx5_task->num_reqs = 1;
839 				mlx5_task->blocks_per_req = mlx5_task->num_blocks;
840 			}
841 		}
842 	} else {
843 		mlx5_task->num_reqs = mlx5_task->num_blocks;
844 		mlx5_task->blocks_per_req = 1;
845 	}
846 
847 	if (spdk_unlikely(!accel_mlx5_task_alloc_mkeys(mlx5_task, dev->crypto_mkeys))) {
848 		/* Pool is empty, queue this task */
849 		SPDK_DEBUGLOG(accel_mlx5, "no reqs in pool, dev %s\n", dev->dev_ctx->context->device->name);
850 		dev->stats.nomem_mkey++;
851 		return -ENOMEM;
852 	}
853 	if (spdk_unlikely(accel_mlx5_dev_get_available_slots(dev, &dev->qp) < 2)) {
854 		/* Queue is full, queue this task */
855 		SPDK_DEBUGLOG(accel_mlx5, "dev %s qp %p is full\n", dev->dev_ctx->context->device->name,
856 			      mlx5_task->qp);
857 		dev->stats.nomem_qdepth++;
858 		return -ENOMEM;
859 	}
860 
861 	SPDK_DEBUGLOG(accel_mlx5, "task %p, src_iovs %u, dst_iovs %u, num_reqs %u, "
862 		      "blocks/req %u, blocks %u, inplace %d\n", task, task->s.iovcnt, task->d.iovcnt,
863 		      mlx5_task->num_reqs, mlx5_task->blocks_per_req, mlx5_task->num_blocks, mlx5_task->inplace);
864 
865 	return 0;
866 }
867 
868 static inline void
869 accel_mlx5_copy_task_complete(struct accel_mlx5_task *mlx5_task)
870 {
871 	spdk_accel_task_complete(&mlx5_task->base, 0);
872 }
873 
874 static inline int
875 accel_mlx5_copy_task_process_one(struct accel_mlx5_task *mlx5_task, struct accel_mlx5_qp *qp,
876 				 uint64_t wrid, uint32_t fence)
877 {
878 	struct spdk_accel_task *task = &mlx5_task->base;
879 	struct accel_mlx5_sge sge;
880 	uint32_t remaining = 0;
881 	uint32_t dst_len;
882 	int rc;
883 
884 	/* Limit one RDMA_WRITE by length of dst buffer. Not all src buffers may fit into one dst buffer due to
885 	 * limitation on ACCEL_MLX5_MAX_SGE. If this is the case then remaining is not zero */
886 	assert(mlx5_task->dst.iov->iov_len > mlx5_task->dst.iov_offset);
887 	dst_len = mlx5_task->dst.iov->iov_len - mlx5_task->dst.iov_offset;
888 	rc = accel_mlx5_fill_block_sge(qp->dev, sge.src_sge, &mlx5_task->src, dst_len, &remaining,
889 				       task->src_domain, task->src_domain_ctx);
890 	if (spdk_unlikely(rc <= 0)) {
891 		if (rc == 0) {
892 			rc = -EINVAL;
893 		}
894 		SPDK_ERRLOG("failed set src sge, rc %d\n", rc);
895 		return rc;
896 	}
897 	sge.src_sge_count = rc;
898 	assert(dst_len > remaining);
899 	dst_len -= remaining;
900 
901 	rc = accel_mlx5_fill_block_sge(qp->dev, sge.dst_sge, &mlx5_task->dst, dst_len,  &remaining,
902 				       task->dst_domain, task->dst_domain_ctx);
903 	if (spdk_unlikely(rc != 1)) {
904 		/* We use single dst entry, any result other than 1 is an error */
905 		if (rc == 0) {
906 			rc = -EINVAL;
907 		}
908 		SPDK_ERRLOG("failed set dst sge, rc %d\n", rc);
909 		return rc;
910 	}
911 	if (spdk_unlikely(remaining)) {
912 		SPDK_ERRLOG("Incorrect dst length, remaining %u\n", remaining);
913 		assert(0);
914 		return -EINVAL;
915 	}
916 
917 	rc = spdk_mlx5_qp_rdma_write(mlx5_task->qp->qp, sge.src_sge, sge.src_sge_count,
918 				     sge.dst_sge[0].addr, sge.dst_sge[0].lkey, wrid, fence);
919 	if (spdk_unlikely(rc)) {
920 		SPDK_ERRLOG("new RDMA WRITE failed with %d\n", rc);
921 		return rc;
922 	}
923 	qp->dev->stats.rdma_writes++;
924 
925 	return 0;
926 }
927 
928 static inline int
929 accel_mlx5_copy_task_process(struct accel_mlx5_task *mlx5_task)
930 {
931 
932 	struct accel_mlx5_qp *qp = mlx5_task->qp;
933 	struct accel_mlx5_dev *dev = qp->dev;
934 	uint16_t i;
935 	int rc;
936 
937 	mlx5_task->num_wrs = 0;
938 	assert(mlx5_task->num_reqs > 0);
939 	assert(mlx5_task->num_ops > 0);
940 
941 	/* Handle n-1 reqs in order to simplify wrid and fence handling */
942 	for (i = 0; i < mlx5_task->num_ops - 1; i++) {
943 		rc = accel_mlx5_copy_task_process_one(mlx5_task, qp, 0, 0);
944 		if (spdk_unlikely(rc)) {
945 			return rc;
946 		}
947 		ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED(qp, mlx5_task);
948 		mlx5_task->num_submitted_reqs++;
949 	}
950 
951 	rc = accel_mlx5_copy_task_process_one(mlx5_task, qp, (uint64_t)mlx5_task,
952 					      SPDK_MLX5_WQE_CTRL_CE_CQ_UPDATE);
953 	if (spdk_unlikely(rc)) {
954 		return rc;
955 	}
956 	ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED_SIGNALED(dev, qp, mlx5_task);
957 	mlx5_task->num_submitted_reqs++;
958 	STAILQ_INSERT_TAIL(&qp->in_hw, mlx5_task, link);
959 
960 	SPDK_DEBUGLOG(accel_mlx5, "end, copy task, %p\n", mlx5_task);
961 
962 	return 0;
963 }
964 
965 static inline int
966 accel_mlx5_copy_task_continue(struct accel_mlx5_task *task)
967 {
968 	struct accel_mlx5_qp *qp = task->qp;
969 	struct accel_mlx5_dev *dev = qp->dev;
970 	uint16_t qp_slot = accel_mlx5_dev_get_available_slots(dev, qp);
971 
972 	task->num_ops = spdk_min(qp_slot, task->num_reqs - task->num_completed_reqs);
973 	if (spdk_unlikely(task->num_ops == 0)) {
974 		STAILQ_INSERT_TAIL(&dev->nomem, task, link);
975 		dev->stats.nomem_qdepth++;
976 		return -ENOMEM;
977 	}
978 	return accel_mlx5_copy_task_process(task);
979 }
980 
981 static inline uint32_t
982 accel_mlx5_get_copy_task_count(struct iovec *src_iov, uint32_t src_iovcnt,
983 			       struct iovec *dst_iov, uint32_t dst_iovcnt)
984 {
985 	uint32_t src = 0;
986 	uint32_t dst = 0;
987 	uint64_t src_offset = 0;
988 	uint64_t dst_offset = 0;
989 	uint32_t num_ops = 0;
990 	uint32_t src_sge_count = 0;
991 
992 	while (src < src_iovcnt && dst < dst_iovcnt) {
993 		uint64_t src_len = src_iov[src].iov_len - src_offset;
994 		uint64_t dst_len = dst_iov[dst].iov_len - dst_offset;
995 
996 		if (dst_len < src_len) {
997 			dst_offset = 0;
998 			src_offset += dst_len;
999 			dst++;
1000 			num_ops++;
1001 			src_sge_count = 0;
1002 		} else if (src_len < dst_len) {
1003 			dst_offset += src_len;
1004 			src_offset = 0;
1005 			src++;
1006 			if (++src_sge_count >= ACCEL_MLX5_MAX_SGE) {
1007 				num_ops++;
1008 				src_sge_count = 0;
1009 			}
1010 		} else {
1011 			dst_offset = 0;
1012 			src_offset = 0;
1013 			dst++;
1014 			src++;
1015 			num_ops++;
1016 			src_sge_count = 0;
1017 		}
1018 	}
1019 
1020 	assert(src == src_iovcnt);
1021 	assert(dst == dst_iovcnt);
1022 	assert(src_offset == 0);
1023 	assert(dst_offset == 0);
1024 	return num_ops;
1025 }
1026 
1027 static inline int
1028 accel_mlx5_copy_task_init(struct accel_mlx5_task *mlx5_task)
1029 {
1030 	struct spdk_accel_task *task = &mlx5_task->base;
1031 	struct accel_mlx5_qp *qp = mlx5_task->qp;
1032 	uint16_t qp_slot = accel_mlx5_dev_get_available_slots(qp->dev, qp);
1033 
1034 	if (spdk_likely(task->s.iovcnt <= ACCEL_MLX5_MAX_SGE)) {
1035 		mlx5_task->num_reqs = task->d.iovcnt;
1036 	} else if (task->d.iovcnt == 1) {
1037 		mlx5_task->num_reqs = SPDK_CEIL_DIV(task->s.iovcnt, ACCEL_MLX5_MAX_SGE);
1038 	} else {
1039 		mlx5_task->num_reqs = accel_mlx5_get_copy_task_count(task->s.iovs, task->s.iovcnt,
1040 				      task->d.iovs, task->d.iovcnt);
1041 	}
1042 	mlx5_task->inplace = 0;
1043 	accel_mlx5_iov_sgl_init(&mlx5_task->src, task->s.iovs, task->s.iovcnt);
1044 	accel_mlx5_iov_sgl_init(&mlx5_task->dst, task->d.iovs, task->d.iovcnt);
1045 	mlx5_task->num_ops = spdk_min(qp_slot, mlx5_task->num_reqs);
1046 	if (spdk_unlikely(!mlx5_task->num_ops)) {
1047 		qp->dev->stats.nomem_qdepth++;
1048 		return -ENOMEM;
1049 	}
1050 	SPDK_DEBUGLOG(accel_mlx5, "copy task num_reqs %u, num_ops %u\n", mlx5_task->num_reqs,
1051 		      mlx5_task->num_ops);
1052 
1053 	return 0;
1054 }
1055 
1056 static inline uint32_t
1057 accel_mlx5_advance_iovec(struct iovec *iov, uint32_t iovcnt, size_t *iov_offset, size_t *len)
1058 {
1059 	uint32_t i;
1060 	size_t iov_len;
1061 
1062 	for (i = 0; *len != 0 && i < iovcnt; i++) {
1063 		iov_len = iov[i].iov_len - *iov_offset;
1064 
1065 		if (iov_len < *len) {
1066 			*iov_offset = 0;
1067 			*len -= iov_len;
1068 			continue;
1069 		}
1070 		if (iov_len == *len) {
1071 			*iov_offset = 0;
1072 			i++;
1073 		} else { /* iov_len > *len */
1074 			*iov_offset += *len;
1075 		}
1076 		*len = 0;
1077 		break;
1078 	}
1079 
1080 	return i;
1081 }
1082 
1083 static inline void
1084 accel_mlx5_crc_task_complete(struct accel_mlx5_task *mlx5_task)
1085 {
1086 	struct accel_mlx5_dev *dev = mlx5_task->qp->dev;
1087 
1088 	*mlx5_task->base.crc_dst = mlx5_task->psv->crc ^ UINT32_MAX;
1089 	/* Normal task completion without allocated mkeys is not possible */
1090 	assert(mlx5_task->num_ops);
1091 	spdk_mlx5_mkey_pool_put_bulk(dev->sig_mkeys, mlx5_task->mkeys, mlx5_task->num_ops);
1092 	spdk_mempool_put(dev->dev_ctx->psv_pool, mlx5_task->psv);
1093 	spdk_accel_task_complete(&mlx5_task->base, 0);
1094 }
1095 
1096 static inline int
1097 accel_mlx5_crc_task_configure_umr(struct accel_mlx5_task *mlx5_task, struct ibv_sge *sge,
1098 				  uint32_t sge_count, struct spdk_mlx5_mkey_pool_obj *mkey,
1099 				  enum spdk_mlx5_umr_sig_domain sig_domain, uint32_t umr_len,
1100 				  bool sig_init, bool sig_check_gen)
1101 {
1102 	struct spdk_mlx5_umr_sig_attr sattr = {
1103 		.seed = mlx5_task->base.seed ^ UINT32_MAX,
1104 		.psv_index = mlx5_task->psv->psv_index,
1105 		.domain = sig_domain,
1106 		.sigerr_count = mkey->sig.sigerr_count,
1107 		.raw_data_size = umr_len,
1108 		.init = sig_init,
1109 		.check_gen = sig_check_gen,
1110 	};
1111 	struct spdk_mlx5_umr_attr umr_attr = {
1112 		.mkey = mkey->mkey,
1113 		.umr_len = umr_len,
1114 		.sge_count = sge_count,
1115 		.sge = sge,
1116 	};
1117 
1118 	return spdk_mlx5_umr_configure_sig(mlx5_task->qp->qp, &umr_attr, &sattr, 0, 0);
1119 }
1120 
1121 static inline int
1122 accel_mlx5_crc_task_fill_sge(struct accel_mlx5_task *mlx5_task, struct accel_mlx5_sge *sge)
1123 {
1124 	struct spdk_accel_task *task = &mlx5_task->base;
1125 	struct accel_mlx5_qp *qp = mlx5_task->qp;
1126 	struct accel_mlx5_dev *dev = qp->dev;
1127 	uint32_t remaining;
1128 	int rc;
1129 
1130 	rc = accel_mlx5_fill_block_sge(dev, sge->src_sge, &mlx5_task->src, task->nbytes, &remaining,
1131 				       task->src_domain, task->src_domain_ctx);
1132 	if (spdk_unlikely(rc <= 0)) {
1133 		if (rc == 0) {
1134 			rc = -EINVAL;
1135 		}
1136 		SPDK_ERRLOG("failed set src sge, rc %d\n", rc);
1137 		return rc;
1138 	}
1139 	assert(remaining == 0);
1140 	sge->src_sge_count = rc;
1141 
1142 	if (!mlx5_task->inplace) {
1143 		rc = accel_mlx5_fill_block_sge(dev, sge->dst_sge, &mlx5_task->dst, task->nbytes, &remaining,
1144 					       task->dst_domain, task->dst_domain_ctx);
1145 		if (spdk_unlikely(rc <= 0)) {
1146 			if (rc == 0) {
1147 				rc = -EINVAL;
1148 			}
1149 			SPDK_ERRLOG("failed set dst sge, rc %d\n", rc);
1150 			return rc;
1151 		}
1152 		assert(remaining == 0);
1153 		sge->dst_sge_count = rc;
1154 	}
1155 
1156 	return 0;
1157 }
1158 
1159 static inline int
1160 accel_mlx5_crc_task_process_one_req(struct accel_mlx5_task *mlx5_task)
1161 {
1162 	struct accel_mlx5_sge sges;
1163 	struct accel_mlx5_qp *qp = mlx5_task->qp;
1164 	struct accel_mlx5_dev *dev = qp->dev;
1165 	uint32_t num_ops = spdk_min(mlx5_task->num_reqs - mlx5_task->num_completed_reqs,
1166 				    mlx5_task->num_ops);
1167 	uint16_t qp_slot = accel_mlx5_dev_get_available_slots(dev, qp);
1168 	uint32_t rdma_fence = SPDK_MLX5_WQE_CTRL_STRONG_ORDERING;
1169 	struct ibv_sge *sge;
1170 	int rc;
1171 	uint16_t sge_count;
1172 
1173 	num_ops = spdk_min(num_ops, qp_slot >> 1);
1174 	if (spdk_unlikely(!num_ops)) {
1175 		return -EINVAL;
1176 	}
1177 
1178 	mlx5_task->num_wrs = 0;
1179 	/* At this moment we have as many requests as can be submitted to a qp */
1180 	rc = accel_mlx5_crc_task_fill_sge(mlx5_task, &sges);
1181 	if (spdk_unlikely(rc)) {
1182 		return rc;
1183 	}
1184 	rc = accel_mlx5_crc_task_configure_umr(mlx5_task, sges.src_sge, sges.src_sge_count,
1185 					       mlx5_task->mkeys[0], SPDK_MLX5_UMR_SIG_DOMAIN_WIRE, mlx5_task->base.nbytes, true, true);
1186 	if (spdk_unlikely(rc)) {
1187 		SPDK_ERRLOG("UMR configure failed with %d\n", rc);
1188 		return rc;
1189 	}
1190 	ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED(qp, mlx5_task);
1191 	dev->stats.sig_umrs++;
1192 
1193 	if (mlx5_task->inplace) {
1194 		sge = sges.src_sge;
1195 		sge_count = sges.src_sge_count;
1196 	} else {
1197 		sge = sges.dst_sge;
1198 		sge_count = sges.dst_sge_count;
1199 	}
1200 
1201 	/*
1202 	 * Add the crc destination to the end of sges. A free entry must be available for CRC
1203 	 * because the task init function reserved it.
1204 	 */
1205 	assert(sge_count < ACCEL_MLX5_MAX_SGE);
1206 	sge[sge_count].lkey = mlx5_task->psv->crc_lkey;
1207 	sge[sge_count].addr = (uintptr_t)&mlx5_task->psv->crc;
1208 	sge[sge_count++].length = sizeof(uint32_t);
1209 
1210 	if (spdk_unlikely(mlx5_task->psv->bits.error)) {
1211 		rc = spdk_mlx5_qp_set_psv(qp->qp, mlx5_task->psv->psv_index, *mlx5_task->base.crc_dst, 0, 0);
1212 		if (spdk_unlikely(rc)) {
1213 			SPDK_ERRLOG("SET_PSV failed with %d\n", rc);
1214 			return rc;
1215 		}
1216 		ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED(qp, mlx5_task);
1217 	}
1218 
1219 	rc = spdk_mlx5_qp_rdma_read(qp->qp, sge, sge_count, 0, mlx5_task->mkeys[0]->mkey,
1220 				    (uint64_t)mlx5_task, rdma_fence | SPDK_MLX5_WQE_CTRL_CE_CQ_UPDATE);
1221 	if (spdk_unlikely(rc)) {
1222 		SPDK_ERRLOG("RDMA READ/WRITE failed with %d\n", rc);
1223 		return rc;
1224 	}
1225 	mlx5_task->num_submitted_reqs++;
1226 	ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED_SIGNALED(dev, qp, mlx5_task);
1227 	dev->stats.rdma_reads++;
1228 
1229 	return 0;
1230 }
1231 
1232 static inline int
1233 accel_mlx5_crc_task_fill_umr_sge(struct accel_mlx5_qp *qp, struct ibv_sge *sge,
1234 				 struct accel_mlx5_iov_sgl *umr_iovs, struct spdk_memory_domain *domain,
1235 				 void *domain_ctx, struct accel_mlx5_iov_sgl *rdma_iovs, size_t *len)
1236 {
1237 	int umr_idx = 0;
1238 	int rdma_idx = 0;
1239 	int umr_iovcnt = spdk_min(umr_iovs->iovcnt, (int)ACCEL_MLX5_MAX_SGE);
1240 	int rdma_iovcnt = spdk_min(rdma_iovs->iovcnt, (int)ACCEL_MLX5_MAX_SGE);
1241 	size_t umr_iov_offset;
1242 	size_t rdma_iov_offset;
1243 	size_t umr_len = 0;
1244 	void *sge_addr;
1245 	size_t sge_len;
1246 	size_t umr_sge_len;
1247 	size_t rdma_sge_len;
1248 	int rc;
1249 
1250 	umr_iov_offset = umr_iovs->iov_offset;
1251 	rdma_iov_offset = rdma_iovs->iov_offset;
1252 
1253 	while (umr_idx < umr_iovcnt && rdma_idx < rdma_iovcnt) {
1254 		umr_sge_len = umr_iovs->iov[umr_idx].iov_len - umr_iov_offset;
1255 		rdma_sge_len = rdma_iovs->iov[rdma_idx].iov_len - rdma_iov_offset;
1256 		sge_addr = umr_iovs->iov[umr_idx].iov_base + umr_iov_offset;
1257 
1258 		if (umr_sge_len == rdma_sge_len) {
1259 			rdma_idx++;
1260 			umr_iov_offset = 0;
1261 			rdma_iov_offset = 0;
1262 			sge_len = umr_sge_len;
1263 		} else if (umr_sge_len < rdma_sge_len) {
1264 			umr_iov_offset = 0;
1265 			rdma_iov_offset += umr_sge_len;
1266 			sge_len = umr_sge_len;
1267 		} else {
1268 			size_t remaining;
1269 
1270 			remaining = umr_sge_len - rdma_sge_len;
1271 			while (remaining) {
1272 				rdma_idx++;
1273 				if (rdma_idx == (int)ACCEL_MLX5_MAX_SGE) {
1274 					break;
1275 				}
1276 				rdma_sge_len = rdma_iovs->iov[rdma_idx].iov_len;
1277 				if (remaining == rdma_sge_len) {
1278 					rdma_idx++;
1279 					rdma_iov_offset = 0;
1280 					umr_iov_offset = 0;
1281 					remaining = 0;
1282 					break;
1283 				}
1284 				if (remaining < rdma_sge_len) {
1285 					rdma_iov_offset = remaining;
1286 					umr_iov_offset = 0;
1287 					remaining = 0;
1288 					break;
1289 				}
1290 				remaining -= rdma_sge_len;
1291 			}
1292 			sge_len = umr_sge_len - remaining;
1293 		}
1294 		rc = accel_mlx5_translate_addr(sge_addr, sge_len, domain, domain_ctx, qp->dev, &sge[umr_idx]);
1295 		if (spdk_unlikely(rc)) {
1296 			return -EINVAL;
1297 		}
1298 		SPDK_DEBUGLOG(accel_mlx5, "\t sge[%d] lkey %u, addr %p, len %u\n", umr_idx, sge[umr_idx].lkey,
1299 			      (void *)sge[umr_idx].addr, sge[umr_idx].length);
1300 		umr_len += sge_len;
1301 		umr_idx++;
1302 	}
1303 	accel_mlx5_iov_sgl_advance(umr_iovs, umr_len);
1304 	accel_mlx5_iov_sgl_advance(rdma_iovs, umr_len);
1305 	*len = umr_len;
1306 
1307 	return umr_idx;
1308 }
1309 
1310 static inline int
1311 accel_mlx5_crc_task_process_multi_req(struct accel_mlx5_task *mlx5_task)
1312 {
1313 	size_t umr_len[ACCEL_MLX5_MAX_MKEYS_IN_TASK];
1314 	struct ibv_sge sges[ACCEL_MLX5_MAX_SGE];
1315 	struct spdk_accel_task *task = &mlx5_task->base;
1316 	struct accel_mlx5_qp *qp = mlx5_task->qp;
1317 	struct accel_mlx5_dev *dev = qp->dev;
1318 	struct accel_mlx5_iov_sgl umr_sgl;
1319 	struct accel_mlx5_iov_sgl *umr_sgl_ptr;
1320 	struct accel_mlx5_iov_sgl rdma_sgl;
1321 	uint64_t umr_offset;
1322 	uint32_t rdma_fence = SPDK_MLX5_WQE_CTRL_INITIATOR_SMALL_FENCE;
1323 	int sge_count;
1324 	uint32_t remaining;
1325 	int rc;
1326 	uint16_t i;
1327 	uint16_t num_ops = spdk_min(mlx5_task->num_reqs - mlx5_task->num_completed_reqs,
1328 				    mlx5_task->num_ops);
1329 	uint16_t qp_slot = accel_mlx5_dev_get_available_slots(dev, qp);
1330 	bool sig_init, sig_check_gen = false;
1331 
1332 	num_ops = spdk_min(num_ops, qp_slot >> 1);
1333 	if (spdk_unlikely(!num_ops)) {
1334 		return -EINVAL;
1335 	}
1336 	/* Init signature on the first UMR */
1337 	sig_init = !mlx5_task->num_submitted_reqs;
1338 
1339 	/*
1340 	 * accel_mlx5_crc_task_fill_umr_sge() and accel_mlx5_fill_block_sge() advance an IOV during iteration
1341 	 * on it. We must copy accel_mlx5_iov_sgl to iterate twice or more on the same IOV.
1342 	 *
1343 	 * In the in-place case, we iterate on the source IOV three times. That's why we need two copies of
1344 	 * the source accel_mlx5_iov_sgl.
1345 	 *
1346 	 * In the out-of-place case, we iterate on the source IOV once and on the destination IOV two times.
1347 	 * So, we need one copy of the destination accel_mlx5_iov_sgl.
1348 	 */
1349 	if (mlx5_task->inplace) {
1350 		accel_mlx5_iov_sgl_init(&umr_sgl, mlx5_task->src.iov, mlx5_task->src.iovcnt);
1351 		umr_sgl_ptr = &umr_sgl;
1352 		accel_mlx5_iov_sgl_init(&rdma_sgl, mlx5_task->src.iov, mlx5_task->src.iovcnt);
1353 	} else {
1354 		umr_sgl_ptr = &mlx5_task->src;
1355 		accel_mlx5_iov_sgl_init(&rdma_sgl, mlx5_task->dst.iov, mlx5_task->dst.iovcnt);
1356 	}
1357 	mlx5_task->num_wrs = 0;
1358 	for (i = 0; i < num_ops; i++) {
1359 		/*
1360 		 * The last request may have only CRC. Skip UMR in this case because the MKey from
1361 		 * the previous request is used.
1362 		 */
1363 		if (umr_sgl_ptr->iovcnt == 0) {
1364 			assert((mlx5_task->num_completed_reqs + i + 1) == mlx5_task->num_reqs);
1365 			break;
1366 		}
1367 		sge_count = accel_mlx5_crc_task_fill_umr_sge(qp, sges, umr_sgl_ptr, task->src_domain,
1368 				task->src_domain_ctx, &rdma_sgl, &umr_len[i]);
1369 		if (spdk_unlikely(sge_count <= 0)) {
1370 			rc = (sge_count == 0) ? -EINVAL : sge_count;
1371 			SPDK_ERRLOG("failed set UMR sge, rc %d\n", rc);
1372 			return rc;
1373 		}
1374 		if (umr_sgl_ptr->iovcnt == 0) {
1375 			/*
1376 			 * We post RDMA without UMR if the last request has only CRC. We use an MKey from
1377 			 * the last UMR in this case. Since the last request can be postponed to the next
1378 			 * call of this function, we must save the MKey to the task structure.
1379 			 */
1380 			mlx5_task->last_umr_len = umr_len[i];
1381 			mlx5_task->last_mkey_idx = i;
1382 			sig_check_gen = true;
1383 		}
1384 		rc = accel_mlx5_crc_task_configure_umr(mlx5_task, sges, sge_count, mlx5_task->mkeys[i],
1385 						       SPDK_MLX5_UMR_SIG_DOMAIN_WIRE, umr_len[i], sig_init,
1386 						       sig_check_gen);
1387 		if (spdk_unlikely(rc)) {
1388 			SPDK_ERRLOG("UMR configure failed with %d\n", rc);
1389 			return rc;
1390 		}
1391 		sig_init = false;
1392 		ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED(qp, mlx5_task);
1393 		dev->stats.sig_umrs++;
1394 	}
1395 
1396 	if (spdk_unlikely(mlx5_task->psv->bits.error)) {
1397 		rc = spdk_mlx5_qp_set_psv(qp->qp, mlx5_task->psv->psv_index, *mlx5_task->base.crc_dst, 0, 0);
1398 		if (spdk_unlikely(rc)) {
1399 			SPDK_ERRLOG("SET_PSV failed with %d\n", rc);
1400 			return rc;
1401 		}
1402 		ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED(qp, mlx5_task);
1403 	}
1404 
1405 	for (i = 0; i < num_ops - 1; i++) {
1406 		if (mlx5_task->inplace) {
1407 			sge_count = accel_mlx5_fill_block_sge(dev, sges, &mlx5_task->src, umr_len[i], &remaining,
1408 							      task->src_domain, task->src_domain_ctx);
1409 		} else {
1410 			sge_count = accel_mlx5_fill_block_sge(dev, sges, &mlx5_task->dst, umr_len[i], &remaining,
1411 							      task->dst_domain, task->dst_domain_ctx);
1412 		}
1413 		if (spdk_unlikely(sge_count <= 0)) {
1414 			rc = (sge_count == 0) ? -EINVAL : sge_count;
1415 			SPDK_ERRLOG("failed set RDMA sge, rc %d\n", rc);
1416 			return rc;
1417 		}
1418 		rc = spdk_mlx5_qp_rdma_read(qp->qp, sges, sge_count, 0, mlx5_task->mkeys[i]->mkey,
1419 					    0, rdma_fence);
1420 		if (spdk_unlikely(rc)) {
1421 			SPDK_ERRLOG("RDMA READ/WRITE failed with %d\n", rc);
1422 			return rc;
1423 		}
1424 		mlx5_task->num_submitted_reqs++;
1425 		ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED(qp, mlx5_task);
1426 		dev->stats.rdma_reads++;
1427 		rdma_fence = SPDK_MLX5_WQE_CTRL_STRONG_ORDERING;
1428 	}
1429 	if ((mlx5_task->inplace && mlx5_task->src.iovcnt == 0) || (!mlx5_task->inplace &&
1430 			mlx5_task->dst.iovcnt == 0)) {
1431 		/*
1432 		 * The last RDMA does not have any data, only CRC. It also does not have a paired Mkey.
1433 		 * The CRC is handled in the previous MKey in this case.
1434 		 */
1435 		sge_count = 0;
1436 		umr_offset = mlx5_task->last_umr_len;
1437 	} else {
1438 		umr_offset = 0;
1439 		mlx5_task->last_mkey_idx = i;
1440 		if (mlx5_task->inplace) {
1441 			sge_count = accel_mlx5_fill_block_sge(dev, sges, &mlx5_task->src, umr_len[i], &remaining,
1442 							      task->src_domain, task->src_domain_ctx);
1443 		} else {
1444 			sge_count = accel_mlx5_fill_block_sge(dev, sges, &mlx5_task->dst, umr_len[i], &remaining,
1445 							      task->dst_domain, task->dst_domain_ctx);
1446 		}
1447 		if (spdk_unlikely(sge_count <= 0)) {
1448 			rc = (sge_count == 0) ? -EINVAL : sge_count;
1449 			SPDK_ERRLOG("failed set RDMA sge, rc %d\n", rc);
1450 			return rc;
1451 		}
1452 		assert(remaining == 0);
1453 	}
1454 	if ((mlx5_task->num_completed_reqs + i + 1) == mlx5_task->num_reqs) {
1455 		/* Ensure that there is a free sge for the CRC destination. */
1456 		assert(sge_count < (int)ACCEL_MLX5_MAX_SGE);
1457 		/* Add the crc destination to the end of sges. */
1458 		sges[sge_count].lkey = mlx5_task->psv->crc_lkey;
1459 		sges[sge_count].addr = (uintptr_t)&mlx5_task->psv->crc;
1460 		sges[sge_count++].length = sizeof(uint32_t);
1461 	}
1462 	rdma_fence |= SPDK_MLX5_WQE_CTRL_CE_CQ_UPDATE;
1463 	rc = spdk_mlx5_qp_rdma_read(qp->qp, sges, sge_count, umr_offset,
1464 				    mlx5_task->mkeys[mlx5_task->last_mkey_idx]->mkey,
1465 				    (uint64_t)mlx5_task, rdma_fence);
1466 	if (spdk_unlikely(rc)) {
1467 		SPDK_ERRLOG("RDMA READ/WRITE failed with %d\n", rc);
1468 		return rc;
1469 	}
1470 	mlx5_task->num_submitted_reqs++;
1471 	ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED_SIGNALED(dev, qp, mlx5_task);
1472 	dev->stats.rdma_reads++;
1473 
1474 	return 0;
1475 }
1476 
1477 static inline int
1478 accel_mlx5_crc_task_process(struct accel_mlx5_task *mlx5_task)
1479 {
1480 	int rc;
1481 
1482 	assert(mlx5_task->mlx5_opcode == ACCEL_MLX5_OPC_CRC32C);
1483 
1484 	SPDK_DEBUGLOG(accel_mlx5, "begin, crc task, %p, reqs: total %u, submitted %u, completed %u\n",
1485 		      mlx5_task, mlx5_task->num_reqs, mlx5_task->num_submitted_reqs, mlx5_task->num_completed_reqs);
1486 
1487 	if (mlx5_task->num_reqs == 1) {
1488 		rc = accel_mlx5_crc_task_process_one_req(mlx5_task);
1489 	} else {
1490 		rc = accel_mlx5_crc_task_process_multi_req(mlx5_task);
1491 	}
1492 
1493 	if (rc == 0) {
1494 		STAILQ_INSERT_TAIL(&mlx5_task->qp->in_hw, mlx5_task, link);
1495 		SPDK_DEBUGLOG(accel_mlx5, "end, crc task, %p, reqs: total %u, submitted %u, completed %u\n",
1496 			      mlx5_task, mlx5_task->num_reqs, mlx5_task->num_submitted_reqs,
1497 			      mlx5_task->num_completed_reqs);
1498 	}
1499 
1500 	return rc;
1501 }
1502 
1503 static inline int
1504 accel_mlx5_task_alloc_crc_ctx(struct accel_mlx5_task *task, uint32_t qp_slot)
1505 {
1506 	struct accel_mlx5_qp *qp = task->qp;
1507 	struct accel_mlx5_dev *dev = qp->dev;
1508 
1509 	if (spdk_unlikely(!accel_mlx5_task_alloc_mkeys(task, dev->sig_mkeys))) {
1510 		SPDK_DEBUGLOG(accel_mlx5, "no mkeys in signature mkey pool, dev %s\n",
1511 			      dev->dev_ctx->context->device->name);
1512 		dev->stats.nomem_mkey++;
1513 		return -ENOMEM;
1514 	}
1515 	task->psv = spdk_mempool_get(dev->dev_ctx->psv_pool);
1516 	if (spdk_unlikely(!task->psv)) {
1517 		SPDK_DEBUGLOG(accel_mlx5, "no reqs in psv pool, dev %s\n", dev->dev_ctx->context->device->name);
1518 		spdk_mlx5_mkey_pool_put_bulk(dev->sig_mkeys, task->mkeys, task->num_ops);
1519 		task->num_ops = 0;
1520 		dev->stats.nomem_mkey++;
1521 		return -ENOMEM;
1522 	}
1523 	/* One extra slot is needed for SET_PSV WQE to reset the error state in PSV. */
1524 	if (spdk_unlikely(task->psv->bits.error)) {
1525 		uint32_t n_slots = task->num_ops * 2 + 1;
1526 
1527 		if (qp_slot < n_slots) {
1528 			spdk_mempool_put(dev->dev_ctx->psv_pool, task->psv);
1529 			spdk_mlx5_mkey_pool_put_bulk(dev->sig_mkeys, task->mkeys, task->num_ops);
1530 			dev->stats.nomem_qdepth++;
1531 			task->num_ops = 0;
1532 			return -ENOMEM;
1533 		}
1534 	}
1535 
1536 	return 0;
1537 }
1538 
1539 static inline int
1540 accel_mlx5_crc_task_continue(struct accel_mlx5_task *task)
1541 {
1542 	struct accel_mlx5_qp *qp = task->qp;
1543 	struct accel_mlx5_dev *dev = qp->dev;
1544 	uint16_t qp_slot = accel_mlx5_dev_get_available_slots(dev, qp);
1545 	int rc;
1546 
1547 	assert(task->num_reqs > task->num_completed_reqs);
1548 	if (task->num_ops == 0) {
1549 		/* No mkeys allocated, try to allocate now. */
1550 		rc = accel_mlx5_task_alloc_crc_ctx(task, qp_slot);
1551 		if (spdk_unlikely(rc)) {
1552 			STAILQ_INSERT_TAIL(&dev->nomem, task, link);
1553 			return -ENOMEM;
1554 		}
1555 	}
1556 	/* We need to post at least 1 UMR and 1 RDMA operation */
1557 	if (spdk_unlikely(qp_slot < 2)) {
1558 		STAILQ_INSERT_TAIL(&dev->nomem, task, link);
1559 		dev->stats.nomem_qdepth++;
1560 		return -ENOMEM;
1561 	}
1562 
1563 	return accel_mlx5_crc_task_process(task);
1564 }
1565 
1566 static inline uint32_t
1567 accel_mlx5_get_crc_task_count(struct iovec *src_iov, uint32_t src_iovcnt, struct iovec *dst_iov,
1568 			      uint32_t dst_iovcnt)
1569 {
1570 	uint32_t src_idx = 0;
1571 	uint32_t dst_idx = 0;
1572 	uint32_t num_ops = 1;
1573 	uint32_t num_src_sge = 1;
1574 	uint32_t num_dst_sge = 1;
1575 	size_t src_offset = 0;
1576 	size_t dst_offset = 0;
1577 	uint32_t num_sge;
1578 	size_t src_len;
1579 	size_t dst_len;
1580 
1581 	/* One operation is enough if both iovs fit into ACCEL_MLX5_MAX_SGE. One SGE is reserved for CRC on dst_iov. */
1582 	if (src_iovcnt <= ACCEL_MLX5_MAX_SGE && (dst_iovcnt + 1) <= ACCEL_MLX5_MAX_SGE) {
1583 		return 1;
1584 	}
1585 
1586 	while (src_idx < src_iovcnt && dst_idx < dst_iovcnt) {
1587 		if (num_src_sge > ACCEL_MLX5_MAX_SGE || num_dst_sge > ACCEL_MLX5_MAX_SGE) {
1588 			num_ops++;
1589 			num_src_sge = 1;
1590 			num_dst_sge = 1;
1591 		}
1592 		src_len = src_iov[src_idx].iov_len - src_offset;
1593 		dst_len = dst_iov[dst_idx].iov_len - dst_offset;
1594 
1595 		if (src_len == dst_len) {
1596 			num_src_sge++;
1597 			num_dst_sge++;
1598 			src_offset = 0;
1599 			dst_offset = 0;
1600 			src_idx++;
1601 			dst_idx++;
1602 			continue;
1603 		}
1604 		if (src_len < dst_len) {
1605 			/* Advance src_iov to reach the point that corresponds to the end of the current dst_iov. */
1606 			num_sge = accel_mlx5_advance_iovec(&src_iov[src_idx],
1607 							   spdk_min(ACCEL_MLX5_MAX_SGE + 1 - num_src_sge,
1608 									   src_iovcnt - src_idx),
1609 							   &src_offset, &dst_len);
1610 			src_idx += num_sge;
1611 			num_src_sge += num_sge;
1612 			if (dst_len != 0) {
1613 				/*
1614 				 * ACCEL_MLX5_MAX_SGE is reached on src_iov, and dst_len bytes
1615 				 * are left on the current dst_iov.
1616 				 */
1617 				dst_offset = dst_iov[dst_idx].iov_len - dst_len;
1618 			} else {
1619 				/* The src_iov advance is completed, shift to the next dst_iov. */
1620 				dst_idx++;
1621 				num_dst_sge++;
1622 				dst_offset = 0;
1623 			}
1624 		} else { /* src_len > dst_len */
1625 			/* Advance dst_iov to reach the point that corresponds to the end of the current src_iov. */
1626 			num_sge = accel_mlx5_advance_iovec(&dst_iov[dst_idx],
1627 							   spdk_min(ACCEL_MLX5_MAX_SGE + 1 - num_dst_sge,
1628 									   dst_iovcnt - dst_idx),
1629 							   &dst_offset, &src_len);
1630 			dst_idx += num_sge;
1631 			num_dst_sge += num_sge;
1632 			if (src_len != 0) {
1633 				/*
1634 				 * ACCEL_MLX5_MAX_SGE is reached on dst_iov, and src_len bytes
1635 				 * are left on the current src_iov.
1636 				 */
1637 				src_offset = src_iov[src_idx].iov_len - src_len;
1638 			} else {
1639 				/* The dst_iov advance is completed, shift to the next src_iov. */
1640 				src_idx++;
1641 				num_src_sge++;
1642 				src_offset = 0;
1643 			}
1644 		}
1645 	}
1646 	/* An extra operation is needed if no space is left on dst_iov because CRC takes one SGE. */
1647 	if (num_dst_sge > ACCEL_MLX5_MAX_SGE) {
1648 		num_ops++;
1649 	}
1650 
1651 	/* The above loop must reach the end of both iovs simultaneously because their size is the same. */
1652 	assert(src_idx == src_iovcnt);
1653 	assert(dst_idx == dst_iovcnt);
1654 	assert(src_offset == 0);
1655 	assert(dst_offset == 0);
1656 
1657 	return num_ops;
1658 }
1659 
1660 static inline int
1661 accel_mlx5_crc_task_init(struct accel_mlx5_task *mlx5_task)
1662 {
1663 	struct spdk_accel_task *task = &mlx5_task->base;
1664 	struct accel_mlx5_qp *qp = mlx5_task->qp;
1665 	uint32_t qp_slot = accel_mlx5_dev_get_available_slots(qp->dev, qp);
1666 	int rc;
1667 
1668 	accel_mlx5_iov_sgl_init(&mlx5_task->src, task->s.iovs, task->s.iovcnt);
1669 	if (mlx5_task->inplace) {
1670 		/* One entry is reserved for CRC */
1671 		mlx5_task->num_reqs = SPDK_CEIL_DIV(mlx5_task->src.iovcnt + 1, ACCEL_MLX5_MAX_SGE);
1672 	} else {
1673 		accel_mlx5_iov_sgl_init(&mlx5_task->dst, task->d.iovs, task->d.iovcnt);
1674 		mlx5_task->num_reqs = accel_mlx5_get_crc_task_count(mlx5_task->src.iov, mlx5_task->src.iovcnt,
1675 				      mlx5_task->dst.iov, mlx5_task->dst.iovcnt);
1676 	}
1677 
1678 	rc = accel_mlx5_task_alloc_crc_ctx(mlx5_task, qp_slot);
1679 	if (spdk_unlikely(rc)) {
1680 		return rc;
1681 	}
1682 
1683 	if (spdk_unlikely(qp_slot < 2)) {
1684 		/* Queue is full, queue this task */
1685 		SPDK_DEBUGLOG(accel_mlx5, "dev %s qp %p is full\n", qp->dev->dev_ctx->context->device->name,
1686 			      mlx5_task->qp);
1687 		qp->dev->stats.nomem_qdepth++;
1688 		return -ENOMEM;
1689 	}
1690 	return 0;
1691 }
1692 
1693 static inline int
1694 accel_mlx5_crypto_mkey_task_init(struct accel_mlx5_task *mlx5_task)
1695 {
1696 	struct spdk_accel_task *task = &mlx5_task->base;
1697 	struct accel_mlx5_qp *qp = mlx5_task->qp;
1698 	struct accel_mlx5_dev *dev = qp->dev;
1699 	uint32_t num_blocks;
1700 	int rc;
1701 	uint16_t qp_slot = accel_mlx5_dev_get_available_slots(dev, qp);
1702 	bool crypto_key_ok;
1703 
1704 	if (spdk_unlikely(task->s.iovcnt > ACCEL_MLX5_MAX_SGE)) {
1705 		/* With `external mkey` we can't split task or register several UMRs */
1706 		SPDK_ERRLOG("src buffer is too fragmented\n");
1707 		return -EINVAL;
1708 	}
1709 	if (spdk_unlikely(task->src_domain == spdk_accel_get_memory_domain())) {
1710 		SPDK_ERRLOG("accel domain is not supported\n");
1711 		return -ENOTSUP;
1712 	}
1713 	if (spdk_unlikely(spdk_accel_sequence_next_task(task) != NULL)) {
1714 		SPDK_ERRLOG("Mkey registration is only supported for single task\n");
1715 		return -ENOTSUP;
1716 	}
1717 
1718 	crypto_key_ok = (task->crypto_key && task->crypto_key->module_if == &g_accel_mlx5.module &&
1719 			 task->crypto_key->priv);
1720 	if (spdk_unlikely(!crypto_key_ok)) {
1721 		SPDK_ERRLOG("Wrong crypto key provided\n");
1722 		return -EINVAL;
1723 	}
1724 	if (spdk_unlikely(task->nbytes % mlx5_task->base.block_size != 0)) {
1725 		SPDK_ERRLOG("src length %"PRIu64" is not a multiple of the block size %u\n", task->nbytes,
1726 			    mlx5_task->base.block_size);
1727 		return -EINVAL;
1728 	}
1729 
1730 	num_blocks = task->nbytes / mlx5_task->base.block_size;
1731 	if (dev->crypto_multi_block) {
1732 		if (spdk_unlikely(g_accel_mlx5.attr.crypto_split_blocks &&
1733 				  num_blocks > g_accel_mlx5.attr.crypto_split_blocks)) {
1734 			SPDK_ERRLOG("Number of blocks in task %u exceeds split threshold %u, can't handle\n",
1735 				    num_blocks, g_accel_mlx5.attr.crypto_split_blocks);
1736 			return -E2BIG;
1737 		}
1738 	} else if (num_blocks != 1) {
1739 		SPDK_ERRLOG("Task contains more than 1 block, can't handle\n");
1740 		return -E2BIG;
1741 	}
1742 
1743 	accel_mlx5_iov_sgl_init(&mlx5_task->src, task->s.iovs, task->s.iovcnt);
1744 	mlx5_task->num_blocks = num_blocks;
1745 	mlx5_task->num_processed_blocks = 0;
1746 	mlx5_task->num_reqs = 1;
1747 	mlx5_task->blocks_per_req = num_blocks;
1748 
1749 	if (spdk_unlikely(qp_slot == 0)) {
1750 		mlx5_task->num_ops = 0;
1751 		dev->stats.nomem_qdepth++;
1752 		return -ENOMEM;
1753 	}
1754 	rc = spdk_mlx5_mkey_pool_get_bulk(dev->crypto_mkeys, mlx5_task->mkeys, 1);
1755 	if (spdk_unlikely(rc)) {
1756 		mlx5_task->num_ops = 0;
1757 		dev->stats.nomem_mkey++;
1758 		return -ENOMEM;
1759 	}
1760 	mlx5_task->num_ops = 1;
1761 
1762 	SPDK_DEBUGLOG(accel_mlx5, "crypto_mkey task num_blocks %u, src_len %zu\n", mlx5_task->num_reqs,
1763 		      task->nbytes);
1764 
1765 	return 0;
1766 }
1767 
1768 static inline int
1769 accel_mlx5_crypto_mkey_task_process(struct accel_mlx5_task *mlx5_task)
1770 {
1771 	struct accel_mlx5_sge sge;
1772 	struct spdk_accel_task *task = &mlx5_task->base;
1773 	struct accel_mlx5_qp *qp = mlx5_task->qp;
1774 	struct accel_mlx5_dev *dev = qp->dev;
1775 	struct spdk_mlx5_crypto_dek_data dek_data;
1776 	int rc;
1777 
1778 	if (spdk_unlikely(!mlx5_task->num_ops)) {
1779 		return -EINVAL;
1780 	}
1781 	SPDK_DEBUGLOG(accel_mlx5, "begin, task %p, dst_domain_ctx %p\n", mlx5_task, task->dst_domain_ctx);
1782 
1783 	mlx5_task->num_wrs = 0;
1784 	rc = spdk_mlx5_crypto_get_dek_data(task->crypto_key->priv, dev->dev_ctx->pd, &dek_data);
1785 	if (spdk_unlikely(rc)) {
1786 		return rc;
1787 	}
1788 
1789 	rc = accel_mlx5_configure_crypto_umr(mlx5_task, &sge, mlx5_task->mkeys[0]->mkey,
1790 					     mlx5_task->num_blocks, &dek_data, (uint64_t)mlx5_task, SPDK_MLX5_WQE_CTRL_CE_CQ_UPDATE);
1791 	if (spdk_unlikely(rc)) {
1792 		SPDK_ERRLOG("UMR configure failed with %d\n", rc);
1793 		return rc;
1794 	}
1795 	dev->stats.crypto_umrs++;
1796 	mlx5_task->num_submitted_reqs++;
1797 	ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED_SIGNALED(dev, qp, mlx5_task);
1798 	STAILQ_INSERT_TAIL(&qp->in_hw, mlx5_task, link);
1799 
1800 	SPDK_DEBUGLOG(accel_mlx5, "end, task %p, dst_domain_ctx %p\n", mlx5_task, task->dst_domain_ctx);
1801 
1802 	return 0;
1803 }
1804 
1805 static inline int
1806 accel_mlx5_crypto_mkey_task_continue(struct accel_mlx5_task *task)
1807 {
1808 	struct accel_mlx5_qp *qp = task->qp;
1809 	struct accel_mlx5_dev *dev = qp->dev;
1810 	int rc;
1811 	uint16_t qp_slot = accel_mlx5_dev_get_available_slots(dev, qp);
1812 
1813 	if (task->num_ops == 0) {
1814 		rc = spdk_mlx5_mkey_pool_get_bulk(dev->crypto_mkeys, task->mkeys, 1);
1815 		if (spdk_unlikely(rc)) {
1816 			dev->stats.nomem_mkey++;
1817 			STAILQ_INSERT_TAIL(&dev->nomem, task, link);
1818 			return -ENOMEM;
1819 		}
1820 		task->num_ops = 1;
1821 	}
1822 	if (spdk_unlikely(qp_slot == 0)) {
1823 		dev->stats.nomem_qdepth++;
1824 		STAILQ_INSERT_TAIL(&dev->nomem, task, link);
1825 		return -ENOMEM;
1826 	}
1827 	return accel_mlx5_crypto_mkey_task_process(task);
1828 }
1829 
1830 static inline void
1831 accel_mlx5_crypto_mkey_task_complete(struct accel_mlx5_task *mlx5_task)
1832 {
1833 	struct accel_mlx5_dev *dev = mlx5_task->qp->dev;
1834 
1835 	assert(mlx5_task->num_ops);
1836 	assert(mlx5_task->num_processed_blocks == mlx5_task->num_blocks);
1837 	assert(mlx5_task->base.seq);
1838 
1839 	spdk_mlx5_mkey_pool_put_bulk(dev->crypto_mkeys, mlx5_task->mkeys, 1);
1840 	spdk_accel_task_complete(&mlx5_task->base, 0);
1841 }
1842 
1843 static int
1844 accel_mlx5_task_op_not_implemented(struct accel_mlx5_task *mlx5_task)
1845 {
1846 	SPDK_ERRLOG("wrong function called\n");
1847 	SPDK_UNREACHABLE();
1848 }
1849 
1850 static void
1851 accel_mlx5_task_op_not_implemented_v(struct accel_mlx5_task *mlx5_task)
1852 {
1853 	SPDK_ERRLOG("wrong function called\n");
1854 	SPDK_UNREACHABLE();
1855 }
1856 
1857 static int
1858 accel_mlx5_task_op_not_supported(struct accel_mlx5_task *mlx5_task)
1859 {
1860 	SPDK_ERRLOG("Unsupported opcode %d\n", mlx5_task->base.op_code);
1861 
1862 	return -ENOTSUP;
1863 }
1864 
1865 static struct accel_mlx5_task_operations g_accel_mlx5_tasks_ops[] = {
1866 	[ACCEL_MLX5_OPC_COPY] = {
1867 		.init = accel_mlx5_copy_task_init,
1868 		.process = accel_mlx5_copy_task_process,
1869 		.cont = accel_mlx5_copy_task_continue,
1870 		.complete = accel_mlx5_copy_task_complete,
1871 	},
1872 	[ACCEL_MLX5_OPC_CRYPTO] = {
1873 		.init = accel_mlx5_crypto_task_init,
1874 		.process = accel_mlx5_crypto_task_process,
1875 		.cont = accel_mlx5_crypto_task_continue,
1876 		.complete = accel_mlx5_crypto_task_complete,
1877 	},
1878 	[ACCEL_MLX5_OPC_CRC32C] = {
1879 		.init = accel_mlx5_crc_task_init,
1880 		.process = accel_mlx5_crc_task_process,
1881 		.cont = accel_mlx5_crc_task_continue,
1882 		.complete = accel_mlx5_crc_task_complete,
1883 	},
1884 	[ACCEL_MLX5_OPC_CRYPTO_MKEY] = {
1885 		.init = accel_mlx5_crypto_mkey_task_init,
1886 		.process = accel_mlx5_crypto_mkey_task_process,
1887 		.cont = accel_mlx5_crypto_mkey_task_continue,
1888 		.complete = accel_mlx5_crypto_mkey_task_complete,
1889 	},
1890 	[ACCEL_MLX5_OPC_LAST] = {
1891 		.init = accel_mlx5_task_op_not_supported,
1892 		.process = accel_mlx5_task_op_not_implemented,
1893 		.cont = accel_mlx5_task_op_not_implemented,
1894 		.complete = accel_mlx5_task_op_not_implemented_v
1895 	},
1896 };
1897 
1898 static void
1899 accel_mlx5_memory_domain_transfer_cpl(void *ctx, int rc)
1900 {
1901 	struct accel_mlx5_task *task = ctx;
1902 
1903 	assert(task->needs_data_transfer);
1904 	task->needs_data_transfer = 0;
1905 
1906 	if (spdk_likely(!rc)) {
1907 		SPDK_DEBUGLOG(accel_mlx5, "task %p, data transfer done\n", task);
1908 		accel_mlx5_task_complete(task);
1909 	} else {
1910 		SPDK_ERRLOG("Task %p, data transfer failed, rc %d\n", task, rc);
1911 		accel_mlx5_task_fail(task, rc);
1912 	}
1913 }
1914 
1915 static inline void
1916 accel_mlx5_memory_domain_transfer(struct accel_mlx5_task *task)
1917 {
1918 	struct spdk_memory_domain_translation_result translation;
1919 	struct spdk_accel_task *base = &task->base;
1920 	struct accel_mlx5_dev *dev = task->qp->dev;
1921 	int rc;
1922 
1923 	assert(task->mlx5_opcode == ACCEL_MLX5_OPC_CRYPTO_MKEY);
1924 	/* UMR is an offset in the addess space, so the start address is 0 */
1925 	translation.iov.iov_base = NULL;
1926 	translation.iov.iov_len = base->nbytes;
1927 	translation.iov_count = 1;
1928 	translation.size = sizeof(translation);
1929 	translation.rdma.rkey = task->mkeys[0]->mkey;
1930 	translation.rdma.lkey = task->mkeys[0]->mkey;
1931 
1932 	SPDK_DEBUGLOG(accel_mlx5, "start transfer, task %p, dst_domain_ctx %p, mkey %u\n", task,
1933 		      task->base.dst_domain_ctx, task->mkeys[0]->mkey);
1934 	rc = spdk_memory_domain_transfer_data(base->dst_domain, base->dst_domain_ctx, &translation.iov, 1,
1935 					      dev->dev_ctx->domain, task, &translation.iov, 1, &translation,
1936 					      accel_mlx5_memory_domain_transfer_cpl, task);
1937 	if (spdk_unlikely(rc)) {
1938 		SPDK_ERRLOG("Failed to start data transfer, task %p rc %d\n", task, rc);
1939 		accel_mlx5_task_fail(task, rc);
1940 	}
1941 }
1942 
1943 static inline void
1944 accel_mlx5_task_complete(struct accel_mlx5_task *task)
1945 {
1946 	struct spdk_accel_sequence *seq = task->base.seq;
1947 	struct spdk_accel_task *next;
1948 	bool driver_seq;
1949 
1950 	if (task->needs_data_transfer) {
1951 		accel_mlx5_memory_domain_transfer(task);
1952 		return;
1953 	}
1954 
1955 	next = spdk_accel_sequence_next_task(&task->base);
1956 	driver_seq = task->driver_seq;
1957 
1958 	assert(task->num_reqs == task->num_completed_reqs);
1959 	SPDK_DEBUGLOG(accel_mlx5, "Complete task %p, opc %d\n", task, task->mlx5_opcode);
1960 
1961 	g_accel_mlx5_tasks_ops[task->mlx5_opcode].complete(task);
1962 
1963 	if (driver_seq) {
1964 		struct spdk_io_channel *ch = task->qp->dev->ch;
1965 
1966 		assert(seq);
1967 		if (next) {
1968 			accel_mlx5_execute_sequence(ch, seq);
1969 		} else {
1970 			spdk_accel_sequence_continue(seq);
1971 		}
1972 	}
1973 }
1974 
1975 static inline int
1976 accel_mlx5_task_continue(struct accel_mlx5_task *task)
1977 {
1978 	struct accel_mlx5_qp *qp = task->qp;
1979 	struct accel_mlx5_dev *dev = qp->dev;
1980 
1981 	if (spdk_unlikely(qp->recovering)) {
1982 		STAILQ_INSERT_TAIL(&dev->nomem, task, link);
1983 		return 0;
1984 	}
1985 
1986 	return g_accel_mlx5_tasks_ops[task->mlx5_opcode].cont(task);
1987 }
1988 static inline void
1989 accel_mlx5_task_init_opcode(struct accel_mlx5_task *mlx5_task)
1990 {
1991 	uint8_t base_opcode = mlx5_task->base.op_code;
1992 
1993 	switch (base_opcode) {
1994 	case SPDK_ACCEL_OPC_COPY:
1995 		mlx5_task->mlx5_opcode = ACCEL_MLX5_OPC_COPY;
1996 		break;
1997 	case SPDK_ACCEL_OPC_ENCRYPT:
1998 		assert(g_accel_mlx5.crypto_supported);
1999 		mlx5_task->enc_order = SPDK_MLX5_ENCRYPTION_ORDER_ENCRYPTED_RAW_WIRE;
2000 		mlx5_task->mlx5_opcode =  ACCEL_MLX5_OPC_CRYPTO;
2001 		break;
2002 	case SPDK_ACCEL_OPC_DECRYPT:
2003 		assert(g_accel_mlx5.crypto_supported);
2004 		mlx5_task->enc_order = SPDK_MLX5_ENCRYPTION_ORDER_ENCRYPTED_RAW_MEMORY;
2005 		mlx5_task->mlx5_opcode = ACCEL_MLX5_OPC_CRYPTO;
2006 		break;
2007 	case SPDK_ACCEL_OPC_CRC32C:
2008 		mlx5_task->inplace = 1;
2009 		mlx5_task->mlx5_opcode = ACCEL_MLX5_OPC_CRC32C;
2010 		break;
2011 	case SPDK_ACCEL_OPC_COPY_CRC32C:
2012 		mlx5_task->inplace = 0;
2013 		mlx5_task->mlx5_opcode = ACCEL_MLX5_OPC_CRC32C;
2014 		break;
2015 	default:
2016 		SPDK_ERRLOG("wrong opcode %d\n", base_opcode);
2017 		mlx5_task->mlx5_opcode = ACCEL_MLX5_OPC_LAST;
2018 	}
2019 }
2020 
2021 static void
2022 accel_mlx5_post_poller_handler(void *fn_arg)
2023 {
2024 	struct accel_mlx5_io_channel *ch = fn_arg;
2025 	struct accel_mlx5_dev *dev;
2026 	uint32_t i;
2027 
2028 	for (i = 0; i < ch->num_devs; i++) {
2029 		dev = &ch->devs[i];
2030 
2031 		if (dev->qp.ring_db) {
2032 			spdk_mlx5_qp_complete_send(dev->qp.qp);
2033 			dev->qp.ring_db = false;
2034 		}
2035 	}
2036 
2037 	ch->poller_handler_registered = false;
2038 }
2039 
2040 static inline int
2041 _accel_mlx5_submit_tasks(struct accel_mlx5_io_channel *accel_ch, struct spdk_accel_task *task)
2042 {
2043 	struct accel_mlx5_task *mlx5_task = SPDK_CONTAINEROF(task, struct accel_mlx5_task, base);
2044 	struct accel_mlx5_dev *dev = mlx5_task->qp->dev;
2045 	int rc;
2046 
2047 	/* We should not receive any tasks if the module was not enabled */
2048 	assert(g_accel_mlx5.enabled);
2049 
2050 	dev->stats.opcodes[mlx5_task->mlx5_opcode]++;
2051 	rc = g_accel_mlx5_tasks_ops[mlx5_task->mlx5_opcode].init(mlx5_task);
2052 	if (spdk_unlikely(rc)) {
2053 		if (rc == -ENOMEM) {
2054 			SPDK_DEBUGLOG(accel_mlx5, "no reqs to handle new task %p (required %u), put to queue\n", mlx5_task,
2055 				      mlx5_task->num_reqs);
2056 			STAILQ_INSERT_TAIL(&dev->nomem, mlx5_task, link);
2057 			return 0;
2058 		}
2059 		SPDK_ERRLOG("Task opc %d init failed, rc %d\n", task->op_code, rc);
2060 		return rc;
2061 	}
2062 
2063 	if (spdk_unlikely(mlx5_task->qp->recovering)) {
2064 		STAILQ_INSERT_TAIL(&dev->nomem, mlx5_task, link);
2065 		return 0;
2066 	}
2067 
2068 	if (!accel_ch->poller_handler_registered) {
2069 		spdk_thread_register_post_poller_handler(accel_mlx5_post_poller_handler, accel_ch);
2070 		/* Function above may fail to register our handler, in that case we ring doorbells on next polling
2071 		 * cycle. That is less efficient but still works */
2072 		accel_ch->poller_handler_registered = true;
2073 	}
2074 
2075 	return g_accel_mlx5_tasks_ops[mlx5_task->mlx5_opcode].process(mlx5_task);
2076 }
2077 
2078 static inline void
2079 accel_mlx5_task_assign_qp(struct accel_mlx5_task *mlx5_task, struct accel_mlx5_io_channel *accel_ch)
2080 {
2081 	struct accel_mlx5_dev *dev;
2082 
2083 	dev = &accel_ch->devs[accel_ch->dev_idx];
2084 	accel_ch->dev_idx++;
2085 	if (accel_ch->dev_idx == accel_ch->num_devs) {
2086 		accel_ch->dev_idx = 0;
2087 	}
2088 
2089 	mlx5_task->qp = &dev->qp;
2090 }
2091 
2092 static inline void
2093 accel_mlx5_task_reset(struct accel_mlx5_task *mlx5_task)
2094 {
2095 	mlx5_task->num_completed_reqs = 0;
2096 	mlx5_task->num_submitted_reqs = 0;
2097 	mlx5_task->num_ops = 0;
2098 	mlx5_task->num_processed_blocks = 0;
2099 	mlx5_task->raw = 0;
2100 }
2101 
2102 static int
2103 accel_mlx5_submit_tasks(struct spdk_io_channel *ch, struct spdk_accel_task *task)
2104 {
2105 	struct accel_mlx5_task *mlx5_task = SPDK_CONTAINEROF(task, struct accel_mlx5_task, base);
2106 	struct accel_mlx5_io_channel *accel_ch = spdk_io_channel_get_ctx(ch);
2107 
2108 	accel_mlx5_task_assign_qp(mlx5_task, accel_ch);
2109 	accel_mlx5_task_reset(mlx5_task);
2110 	accel_mlx5_task_init_opcode(mlx5_task);
2111 
2112 	return _accel_mlx5_submit_tasks(accel_ch, task);
2113 }
2114 
2115 static void accel_mlx5_recover_qp(struct accel_mlx5_qp *qp);
2116 
2117 static int
2118 accel_mlx5_recover_qp_poller(void *arg)
2119 {
2120 	struct accel_mlx5_qp *qp = arg;
2121 
2122 	spdk_poller_unregister(&qp->recover_poller);
2123 	accel_mlx5_recover_qp(qp);
2124 	return SPDK_POLLER_BUSY;
2125 }
2126 
2127 static void
2128 accel_mlx5_recover_qp(struct accel_mlx5_qp *qp)
2129 {
2130 	struct accel_mlx5_dev *dev = qp->dev;
2131 	struct spdk_mlx5_qp_attr mlx5_qp_attr = {};
2132 	int rc;
2133 
2134 	SPDK_NOTICELOG("Recovering qp %p, core %u\n", qp, spdk_env_get_current_core());
2135 	if (qp->qp) {
2136 		spdk_mlx5_qp_destroy(qp->qp);
2137 		qp->qp = NULL;
2138 	}
2139 
2140 	mlx5_qp_attr.cap.max_send_wr = g_accel_mlx5.attr.qp_size;
2141 	mlx5_qp_attr.cap.max_recv_wr = 0;
2142 	mlx5_qp_attr.cap.max_send_sge = ACCEL_MLX5_MAX_SGE;
2143 	mlx5_qp_attr.cap.max_inline_data = sizeof(struct ibv_sge) * ACCEL_MLX5_MAX_SGE;
2144 
2145 	rc = spdk_mlx5_qp_create(dev->dev_ctx->pd, dev->cq, &mlx5_qp_attr, &qp->qp);
2146 	if (rc) {
2147 		SPDK_ERRLOG("Failed to create mlx5 dma QP, rc %d. Retry in %d usec\n",
2148 			    rc, ACCEL_MLX5_RECOVER_POLLER_PERIOD_US);
2149 		qp->recover_poller = SPDK_POLLER_REGISTER(accel_mlx5_recover_qp_poller, qp,
2150 				     ACCEL_MLX5_RECOVER_POLLER_PERIOD_US);
2151 		return;
2152 	}
2153 
2154 	qp->recovering = false;
2155 }
2156 
2157 static inline void
2158 accel_mlx5_process_error_cpl(struct spdk_mlx5_cq_completion *wc, struct accel_mlx5_task *task)
2159 {
2160 	struct accel_mlx5_qp *qp = task->qp;
2161 
2162 	if (wc->status != IBV_WC_WR_FLUSH_ERR) {
2163 		SPDK_WARNLOG("RDMA: qp %p, task %p, WC status %d, core %u\n",
2164 			     qp, task, wc->status, spdk_env_get_current_core());
2165 	} else {
2166 		SPDK_DEBUGLOG(accel_mlx5,
2167 			      "RDMA: qp %p, task %p, WC status %d, core %u\n",
2168 			      qp, task, wc->status, spdk_env_get_current_core());
2169 	}
2170 
2171 	qp->recovering = true;
2172 	assert(task->num_completed_reqs <= task->num_submitted_reqs);
2173 	if (task->num_completed_reqs == task->num_submitted_reqs) {
2174 		STAILQ_REMOVE_HEAD(&qp->in_hw, link);
2175 		accel_mlx5_task_fail(task, -EIO);
2176 	}
2177 }
2178 
2179 static inline int64_t
2180 accel_mlx5_poll_cq(struct accel_mlx5_dev *dev)
2181 {
2182 	struct spdk_mlx5_cq_completion wc[ACCEL_MLX5_MAX_WC];
2183 	struct accel_mlx5_task *task;
2184 	struct accel_mlx5_qp *qp;
2185 	int reaped, i, rc;
2186 	uint16_t completed;
2187 
2188 	dev->stats.polls++;
2189 	reaped = spdk_mlx5_cq_poll_completions(dev->cq, wc, ACCEL_MLX5_MAX_WC);
2190 	if (spdk_unlikely(reaped < 0)) {
2191 		SPDK_ERRLOG("Error polling CQ! (%d): %s\n", errno, spdk_strerror(errno));
2192 		return reaped;
2193 	} else if (reaped == 0) {
2194 		dev->stats.idle_polls++;
2195 		return 0;
2196 	}
2197 	dev->stats.completions += reaped;
2198 
2199 	SPDK_DEBUGLOG(accel_mlx5, "Reaped %d cpls on dev %s\n", reaped,
2200 		      dev->dev_ctx->context->device->name);
2201 
2202 	for (i = 0; i < reaped; i++) {
2203 		if (spdk_unlikely(!wc[i].wr_id)) {
2204 			/* Unsignaled completion with error, ignore */
2205 			continue;
2206 		}
2207 		task = (struct accel_mlx5_task *)wc[i].wr_id;
2208 		qp = task->qp;
2209 		assert(task == STAILQ_FIRST(&qp->in_hw) && "submission mismatch");
2210 		assert(task->num_submitted_reqs > task->num_completed_reqs);
2211 		completed = task->num_submitted_reqs - task->num_completed_reqs;
2212 		assert((uint32_t)task->num_completed_reqs + completed <= UINT16_MAX);
2213 		task->num_completed_reqs += completed;
2214 		assert(qp->wrs_submitted >= task->num_wrs);
2215 		qp->wrs_submitted -= task->num_wrs;
2216 		assert(dev->wrs_in_cq > 0);
2217 		dev->wrs_in_cq--;
2218 
2219 		if (spdk_unlikely(wc[i].status)) {
2220 			accel_mlx5_process_error_cpl(&wc[i], task);
2221 			if (qp->wrs_submitted == 0) {
2222 				assert(STAILQ_EMPTY(&qp->in_hw));
2223 				accel_mlx5_recover_qp(qp);
2224 			}
2225 			continue;
2226 		}
2227 
2228 		SPDK_DEBUGLOG(accel_mlx5, "task %p, remaining %u\n", task,
2229 			      task->num_reqs - task->num_completed_reqs);
2230 		if (task->num_completed_reqs == task->num_reqs) {
2231 			STAILQ_REMOVE_HEAD(&qp->in_hw, link);
2232 			accel_mlx5_task_complete(task);
2233 		} else {
2234 			assert(task->num_submitted_reqs < task->num_reqs);
2235 			assert(task->num_completed_reqs == task->num_submitted_reqs);
2236 			STAILQ_REMOVE_HEAD(&qp->in_hw, link);
2237 			rc = accel_mlx5_task_continue(task);
2238 			if (spdk_unlikely(rc)) {
2239 				if (rc != -ENOMEM) {
2240 					accel_mlx5_task_fail(task, rc);
2241 				}
2242 			}
2243 		}
2244 	}
2245 
2246 	return reaped;
2247 }
2248 
2249 static inline void
2250 accel_mlx5_resubmit_nomem_tasks(struct accel_mlx5_dev *dev)
2251 {
2252 	struct accel_mlx5_task *task, *tmp, *last;
2253 	int rc;
2254 
2255 	last = STAILQ_LAST(&dev->nomem, accel_mlx5_task, link);
2256 	STAILQ_FOREACH_SAFE(task, &dev->nomem, link, tmp) {
2257 		STAILQ_REMOVE_HEAD(&dev->nomem, link);
2258 		rc = accel_mlx5_task_continue(task);
2259 		if (spdk_unlikely(rc)) {
2260 			if (rc != -ENOMEM) {
2261 				accel_mlx5_task_fail(task, rc);
2262 			}
2263 			break;
2264 		}
2265 		/* If qpair is recovering, task is added back to the nomem list and 0 is returned. In that case we
2266 		 * need a special condition to iterate the list once and stop this FOREACH loop */
2267 		if (task == last) {
2268 			break;
2269 		}
2270 	}
2271 }
2272 
2273 static int
2274 accel_mlx5_poller(void *ctx)
2275 {
2276 	struct accel_mlx5_io_channel *ch = ctx;
2277 	struct accel_mlx5_dev *dev;
2278 
2279 	int64_t completions = 0, rc;
2280 	uint32_t i;
2281 
2282 	/* reaped completions may register a post poller handler, that makes no sense in the scope of our own poller */
2283 	ch->poller_handler_registered = true;
2284 	for (i = 0; i < ch->num_devs; i++) {
2285 		dev = &ch->devs[i];
2286 		if (dev->wrs_in_cq) {
2287 			rc = accel_mlx5_poll_cq(dev);
2288 			if (spdk_unlikely(rc < 0)) {
2289 				SPDK_ERRLOG("Error %"PRId64" on CQ, dev %s\n", rc, dev->dev_ctx->context->device->name);
2290 			}
2291 			completions += rc;
2292 			if (dev->qp.ring_db) {
2293 				spdk_mlx5_qp_complete_send(dev->qp.qp);
2294 				dev->qp.ring_db = false;
2295 			}
2296 		}
2297 		if (!STAILQ_EMPTY(&dev->nomem)) {
2298 			accel_mlx5_resubmit_nomem_tasks(dev);
2299 		}
2300 	}
2301 	ch->poller_handler_registered = false;
2302 
2303 	return !!completions;
2304 }
2305 
2306 static bool
2307 accel_mlx5_supports_opcode(enum spdk_accel_opcode opc)
2308 {
2309 	assert(g_accel_mlx5.enabled);
2310 
2311 	switch (opc) {
2312 	case SPDK_ACCEL_OPC_COPY:
2313 		return true;
2314 	case SPDK_ACCEL_OPC_ENCRYPT:
2315 	case SPDK_ACCEL_OPC_DECRYPT:
2316 		return g_accel_mlx5.crypto_supported;
2317 	case SPDK_ACCEL_OPC_CRC32C:
2318 	case SPDK_ACCEL_OPC_COPY_CRC32C:
2319 		return g_accel_mlx5.crc32c_supported;
2320 	default:
2321 		return false;
2322 	}
2323 }
2324 
2325 static struct spdk_io_channel *
2326 accel_mlx5_get_io_channel(void)
2327 {
2328 	assert(g_accel_mlx5.enabled);
2329 	return spdk_get_io_channel(&g_accel_mlx5);
2330 }
2331 
2332 static int
2333 accel_mlx5_create_qp(struct accel_mlx5_dev *dev, struct accel_mlx5_qp *qp)
2334 {
2335 	struct spdk_mlx5_qp_attr mlx5_qp_attr = {};
2336 	int rc;
2337 
2338 	mlx5_qp_attr.cap.max_send_wr = g_accel_mlx5.attr.qp_size;
2339 	mlx5_qp_attr.cap.max_recv_wr = 0;
2340 	mlx5_qp_attr.cap.max_send_sge = ACCEL_MLX5_MAX_SGE;
2341 	mlx5_qp_attr.cap.max_inline_data = sizeof(struct ibv_sge) * ACCEL_MLX5_MAX_SGE;
2342 
2343 	rc = spdk_mlx5_qp_create(dev->dev_ctx->pd, dev->cq, &mlx5_qp_attr, &qp->qp);
2344 	if (rc) {
2345 		return rc;
2346 	}
2347 
2348 	STAILQ_INIT(&qp->in_hw);
2349 	qp->dev = dev;
2350 	qp->verbs_qp = spdk_mlx5_qp_get_verbs_qp(qp->qp);
2351 	assert(qp->verbs_qp);
2352 	qp->wrs_max = g_accel_mlx5.attr.qp_size;
2353 
2354 	return 0;
2355 }
2356 
2357 static void
2358 accel_mlx5_add_stats(struct accel_mlx5_stats *stats, const struct accel_mlx5_stats *to_add)
2359 {
2360 	int i;
2361 
2362 	stats->crypto_umrs += to_add->crypto_umrs;
2363 	stats->sig_umrs += to_add->sig_umrs;
2364 	stats->rdma_reads += to_add->rdma_reads;
2365 	stats->rdma_writes += to_add->rdma_writes;
2366 	stats->polls += to_add->polls;
2367 	stats->idle_polls += to_add->idle_polls;
2368 	stats->completions += to_add->completions;
2369 	stats->nomem_qdepth += to_add->nomem_qdepth;
2370 	stats->nomem_mkey += to_add->nomem_mkey;
2371 	for (i = 0; i < ACCEL_MLX5_OPC_LAST; i++) {
2372 		stats->opcodes[i] += to_add->opcodes[i];
2373 	}
2374 }
2375 
2376 static void
2377 accel_mlx5_destroy_cb(void *io_device, void *ctx_buf)
2378 {
2379 	struct accel_mlx5_io_channel *ch = ctx_buf;
2380 	struct accel_mlx5_dev *dev;
2381 	uint32_t i;
2382 
2383 	spdk_poller_unregister(&ch->poller);
2384 	for (i = 0; i < ch->num_devs; i++) {
2385 		dev = &ch->devs[i];
2386 		spdk_mlx5_qp_destroy(dev->qp.qp);
2387 		if (dev->cq) {
2388 			spdk_mlx5_cq_destroy(dev->cq);
2389 		}
2390 		spdk_poller_unregister(&dev->qp.recover_poller);
2391 		if (dev->crypto_mkeys) {
2392 			spdk_mlx5_mkey_pool_put_ref(dev->crypto_mkeys);
2393 		}
2394 		if (dev->sig_mkeys) {
2395 			spdk_mlx5_mkey_pool_put_ref(dev->sig_mkeys);
2396 		}
2397 		spdk_rdma_utils_free_mem_map(&dev->mmap);
2398 		spdk_spin_lock(&g_accel_mlx5.lock);
2399 		accel_mlx5_add_stats(&g_accel_mlx5.stats, &dev->stats);
2400 		spdk_spin_unlock(&g_accel_mlx5.lock);
2401 	}
2402 	free(ch->devs);
2403 }
2404 
2405 static int
2406 accel_mlx5_create_cb(void *io_device, void *ctx_buf)
2407 {
2408 	struct spdk_mlx5_cq_attr cq_attr = {};
2409 	struct accel_mlx5_io_channel *ch = ctx_buf;
2410 	struct accel_mlx5_dev_ctx *dev_ctx;
2411 	struct accel_mlx5_dev *dev;
2412 	uint32_t i;
2413 	int rc;
2414 
2415 	ch->devs = calloc(g_accel_mlx5.num_ctxs, sizeof(*ch->devs));
2416 	if (!ch->devs) {
2417 		SPDK_ERRLOG("Memory allocation failed\n");
2418 		return -ENOMEM;
2419 	}
2420 
2421 	for (i = 0; i < g_accel_mlx5.num_ctxs; i++) {
2422 		dev_ctx = &g_accel_mlx5.dev_ctxs[i];
2423 		dev = &ch->devs[i];
2424 		dev->dev_ctx = dev_ctx;
2425 
2426 		if (dev_ctx->crypto_mkeys) {
2427 			dev->crypto_mkeys = spdk_mlx5_mkey_pool_get_ref(dev_ctx->pd, SPDK_MLX5_MKEY_POOL_FLAG_CRYPTO);
2428 			if (!dev->crypto_mkeys) {
2429 				SPDK_ERRLOG("Failed to get crypto mkey pool channel, dev %s\n", dev_ctx->context->device->name);
2430 				/* Should not happen since mkey pool is created on accel_mlx5 initialization.
2431 				 * We should not be here if pool creation failed */
2432 				assert(0);
2433 				goto err_out;
2434 			}
2435 		}
2436 		if (dev_ctx->sig_mkeys) {
2437 			dev->sig_mkeys = spdk_mlx5_mkey_pool_get_ref(dev_ctx->pd, SPDK_MLX5_MKEY_POOL_FLAG_SIGNATURE);
2438 			if (!dev->sig_mkeys) {
2439 				SPDK_ERRLOG("Failed to get sig mkey pool channel, dev %s\n", dev_ctx->context->device->name);
2440 				/* Should not happen since mkey pool is created on accel_mlx5 initialization.
2441 				 * We should not be here if pool creation failed */
2442 				assert(0);
2443 				goto err_out;
2444 			}
2445 		}
2446 
2447 		memset(&cq_attr, 0, sizeof(cq_attr));
2448 		cq_attr.cqe_cnt = g_accel_mlx5.attr.qp_size;
2449 		cq_attr.cqe_size = 64;
2450 		cq_attr.cq_context = dev;
2451 
2452 		ch->num_devs++;
2453 		rc = spdk_mlx5_cq_create(dev_ctx->pd, &cq_attr, &dev->cq);
2454 		if (rc) {
2455 			SPDK_ERRLOG("Failed to create mlx5 CQ, rc %d\n", rc);
2456 			goto err_out;
2457 		}
2458 
2459 		rc = accel_mlx5_create_qp(dev, &dev->qp);
2460 		if (rc) {
2461 			SPDK_ERRLOG("Failed to create mlx5 QP, rc %d\n", rc);
2462 			goto err_out;
2463 		}
2464 
2465 		dev->mmap = spdk_rdma_utils_create_mem_map(dev_ctx->pd, NULL,
2466 				IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_READ | IBV_ACCESS_REMOTE_WRITE);
2467 		if (!dev->mmap) {
2468 			SPDK_ERRLOG("Failed to create memory map\n");
2469 			rc = -ENOMEM;
2470 			goto err_out;
2471 		}
2472 		dev->crypto_multi_block = dev_ctx->crypto_multi_block;
2473 		dev->crypto_split_blocks = dev_ctx->crypto_multi_block ? g_accel_mlx5.attr.crypto_split_blocks : 0;
2474 		dev->wrs_in_cq_max = g_accel_mlx5.attr.qp_size;
2475 		dev->ch = spdk_io_channel_from_ctx(ctx_buf);
2476 		STAILQ_INIT(&dev->nomem);
2477 	}
2478 
2479 	ch->poller = SPDK_POLLER_REGISTER(accel_mlx5_poller, ch, 0);
2480 
2481 	return 0;
2482 
2483 err_out:
2484 	accel_mlx5_destroy_cb(&g_accel_mlx5, ctx_buf);
2485 	return rc;
2486 }
2487 
2488 void
2489 accel_mlx5_get_default_attr(struct accel_mlx5_attr *attr)
2490 {
2491 	assert(attr);
2492 
2493 	attr->qp_size = ACCEL_MLX5_QP_SIZE;
2494 	attr->num_requests = ACCEL_MLX5_NUM_REQUESTS;
2495 	attr->allowed_devs = NULL;
2496 	attr->crypto_split_blocks = 0;
2497 	attr->enable_driver = false;
2498 }
2499 
2500 static void
2501 accel_mlx5_allowed_devs_free(void)
2502 {
2503 	size_t i;
2504 
2505 	if (!g_accel_mlx5.allowed_devs) {
2506 		return;
2507 	}
2508 
2509 	for (i = 0; i < g_accel_mlx5.allowed_devs_count; i++) {
2510 		free(g_accel_mlx5.allowed_devs[i]);
2511 	}
2512 	free(g_accel_mlx5.attr.allowed_devs);
2513 	free(g_accel_mlx5.allowed_devs);
2514 	g_accel_mlx5.attr.allowed_devs = NULL;
2515 	g_accel_mlx5.allowed_devs = NULL;
2516 	g_accel_mlx5.allowed_devs_count = 0;
2517 }
2518 
2519 static int
2520 accel_mlx5_allowed_devs_parse(const char *allowed_devs)
2521 {
2522 	char *str, *tmp, *tok, *sp = NULL;
2523 	size_t devs_count = 0;
2524 
2525 	str = strdup(allowed_devs);
2526 	if (!str) {
2527 		return -ENOMEM;
2528 	}
2529 
2530 	accel_mlx5_allowed_devs_free();
2531 
2532 	tmp = str;
2533 	while ((tmp = strchr(tmp, ',')) != NULL) {
2534 		tmp++;
2535 		devs_count++;
2536 	}
2537 	devs_count++;
2538 
2539 	g_accel_mlx5.allowed_devs = calloc(devs_count, sizeof(char *));
2540 	if (!g_accel_mlx5.allowed_devs) {
2541 		free(str);
2542 		return -ENOMEM;
2543 	}
2544 
2545 	devs_count = 0;
2546 	tok = strtok_r(str, ",", &sp);
2547 	while (tok) {
2548 		g_accel_mlx5.allowed_devs[devs_count] = strdup(tok);
2549 		if (!g_accel_mlx5.allowed_devs[devs_count]) {
2550 			free(str);
2551 			accel_mlx5_allowed_devs_free();
2552 			return -ENOMEM;
2553 		}
2554 		tok = strtok_r(NULL, ",", &sp);
2555 		devs_count++;
2556 		g_accel_mlx5.allowed_devs_count++;
2557 	}
2558 
2559 	free(str);
2560 
2561 	return 0;
2562 }
2563 
2564 int
2565 accel_mlx5_enable(struct accel_mlx5_attr *attr)
2566 {
2567 	int rc;
2568 
2569 	if (g_accel_mlx5.enabled) {
2570 		return -EEXIST;
2571 	}
2572 	if (attr) {
2573 		if (attr->num_requests / spdk_env_get_core_count() < ACCEL_MLX5_MAX_MKEYS_IN_TASK) {
2574 			SPDK_ERRLOG("num requests per core must not be less than %u, current value %u\n",
2575 				    ACCEL_MLX5_MAX_MKEYS_IN_TASK, attr->num_requests / spdk_env_get_core_count());
2576 			return -EINVAL;
2577 		}
2578 		if (attr->qp_size < 8) {
2579 			SPDK_ERRLOG("qp_size must be at least 8\n");
2580 			return -EINVAL;
2581 		}
2582 		g_accel_mlx5.attr = *attr;
2583 		g_accel_mlx5.attr.allowed_devs = NULL;
2584 
2585 		if (attr->allowed_devs) {
2586 			/* Contains a copy of user's string */
2587 			g_accel_mlx5.attr.allowed_devs = strndup(attr->allowed_devs, ACCEL_MLX5_ALLOWED_DEVS_MAX_LEN);
2588 			if (!g_accel_mlx5.attr.allowed_devs) {
2589 				return -ENOMEM;
2590 			}
2591 			rc = accel_mlx5_allowed_devs_parse(g_accel_mlx5.attr.allowed_devs);
2592 			if (rc) {
2593 				return rc;
2594 			}
2595 			rc = spdk_mlx5_crypto_devs_allow((const char *const *)g_accel_mlx5.allowed_devs,
2596 							 g_accel_mlx5.allowed_devs_count);
2597 			if (rc) {
2598 				accel_mlx5_allowed_devs_free();
2599 				return rc;
2600 			}
2601 		}
2602 	} else {
2603 		accel_mlx5_get_default_attr(&g_accel_mlx5.attr);
2604 	}
2605 
2606 	g_accel_mlx5.enabled = true;
2607 	spdk_accel_module_list_add(&g_accel_mlx5.module);
2608 
2609 	return 0;
2610 }
2611 
2612 static void
2613 accel_mlx5_psvs_release(struct accel_mlx5_dev_ctx *dev_ctx)
2614 {
2615 	uint32_t i, num_psvs, num_psvs_in_pool;
2616 
2617 	if (!dev_ctx->psvs) {
2618 		return;
2619 	}
2620 
2621 	num_psvs = g_accel_mlx5.attr.num_requests;
2622 
2623 	for (i = 0; i < num_psvs; i++) {
2624 		if (dev_ctx->psvs[i]) {
2625 			spdk_mlx5_destroy_psv(dev_ctx->psvs[i]);
2626 			dev_ctx->psvs[i] = NULL;
2627 		}
2628 	}
2629 	free(dev_ctx->psvs);
2630 
2631 	if (!dev_ctx->psv_pool) {
2632 		return;
2633 	}
2634 	num_psvs_in_pool = spdk_mempool_count(dev_ctx->psv_pool);
2635 	if (num_psvs_in_pool != num_psvs) {
2636 		SPDK_ERRLOG("Expected %u reqs in the pool, but got only %u\n", num_psvs, num_psvs_in_pool);
2637 	}
2638 	spdk_mempool_free(dev_ctx->psv_pool);
2639 }
2640 
2641 static void
2642 accel_mlx5_free_resources(void)
2643 {
2644 	struct accel_mlx5_dev_ctx *dev_ctx;
2645 	uint32_t i;
2646 
2647 	for (i = 0; i < g_accel_mlx5.num_ctxs; i++) {
2648 		dev_ctx = &g_accel_mlx5.dev_ctxs[i];
2649 		accel_mlx5_psvs_release(dev_ctx);
2650 		if (dev_ctx->pd) {
2651 			if (dev_ctx->crypto_mkeys) {
2652 				spdk_mlx5_mkey_pool_destroy(SPDK_MLX5_MKEY_POOL_FLAG_CRYPTO, dev_ctx->pd);
2653 			}
2654 			if (dev_ctx->sig_mkeys) {
2655 				spdk_mlx5_mkey_pool_destroy(SPDK_MLX5_MKEY_POOL_FLAG_SIGNATURE, dev_ctx->pd);
2656 			}
2657 			spdk_rdma_utils_put_pd(dev_ctx->pd);
2658 		}
2659 		if (dev_ctx->domain) {
2660 			spdk_rdma_utils_put_memory_domain(dev_ctx->domain);
2661 		}
2662 	}
2663 
2664 	free(g_accel_mlx5.dev_ctxs);
2665 	g_accel_mlx5.dev_ctxs = NULL;
2666 	g_accel_mlx5.initialized = false;
2667 }
2668 
2669 static void
2670 accel_mlx5_deinit_cb(void *ctx)
2671 {
2672 	accel_mlx5_free_resources();
2673 	spdk_spin_destroy(&g_accel_mlx5.lock);
2674 	spdk_mlx5_umr_implementer_register(false);
2675 	spdk_accel_module_finish();
2676 }
2677 
2678 static void
2679 accel_mlx5_deinit(void *ctx)
2680 {
2681 	if (g_accel_mlx5.allowed_devs) {
2682 		accel_mlx5_allowed_devs_free();
2683 	}
2684 	spdk_mlx5_crypto_devs_allow(NULL, 0);
2685 	if (g_accel_mlx5.initialized) {
2686 		spdk_io_device_unregister(&g_accel_mlx5, accel_mlx5_deinit_cb);
2687 	} else {
2688 		spdk_accel_module_finish();
2689 	}
2690 }
2691 
2692 static int
2693 accel_mlx5_mkeys_create(struct ibv_pd *pd, uint32_t num_mkeys, uint32_t flags)
2694 {
2695 	struct spdk_mlx5_mkey_pool_param pool_param = {};
2696 
2697 	pool_param.mkey_count = num_mkeys;
2698 	pool_param.cache_per_thread = num_mkeys * 3 / 4 / spdk_env_get_core_count();
2699 	pool_param.flags = flags;
2700 
2701 	return spdk_mlx5_mkey_pool_init(&pool_param, pd);
2702 }
2703 
2704 static void
2705 accel_mlx5_set_psv_in_pool(struct spdk_mempool *mp, void *cb_arg, void *_psv, unsigned obj_idx)
2706 {
2707 	struct spdk_rdma_utils_memory_translation translation = {};
2708 	struct accel_mlx5_psv_pool_iter_cb_args *args = cb_arg;
2709 	struct accel_mlx5_psv_wrapper *wrapper = _psv;
2710 	struct accel_mlx5_dev_ctx *dev_ctx = args->dev;
2711 	int rc;
2712 
2713 	if (args->rc) {
2714 		return;
2715 	}
2716 	assert(obj_idx < g_accel_mlx5.attr.num_requests);
2717 	assert(dev_ctx->psvs[obj_idx] != NULL);
2718 	memset(wrapper, 0, sizeof(*wrapper));
2719 	wrapper->psv_index = dev_ctx->psvs[obj_idx]->index;
2720 
2721 	rc = spdk_rdma_utils_get_translation(args->map, &wrapper->crc, sizeof(uint32_t), &translation);
2722 	if (rc) {
2723 		SPDK_ERRLOG("Memory translation failed, addr %p, length %zu\n", &wrapper->crc, sizeof(uint32_t));
2724 		args->rc = -EINVAL;
2725 	} else {
2726 		wrapper->crc_lkey = spdk_rdma_utils_memory_translation_get_lkey(&translation);
2727 	}
2728 }
2729 
2730 static int
2731 accel_mlx5_psvs_create(struct accel_mlx5_dev_ctx *dev_ctx)
2732 {
2733 	struct accel_mlx5_psv_pool_iter_cb_args args = {
2734 		.dev = dev_ctx
2735 	};
2736 	char pool_name[32];
2737 	uint32_t i;
2738 	uint32_t num_psvs = g_accel_mlx5.attr.num_requests;
2739 	uint32_t cache_size;
2740 	int rc;
2741 
2742 	dev_ctx->psvs = calloc(num_psvs, (sizeof(struct spdk_mlx5_psv *)));
2743 	if (!dev_ctx->psvs) {
2744 		SPDK_ERRLOG("Failed to alloc PSVs array\n");
2745 		return -ENOMEM;
2746 	}
2747 	for (i = 0; i < num_psvs; i++) {
2748 		dev_ctx->psvs[i] = spdk_mlx5_create_psv(dev_ctx->pd);
2749 		if (!dev_ctx->psvs[i]) {
2750 			SPDK_ERRLOG("Failed to create PSV on dev %s\n", dev_ctx->context->device->name);
2751 			return -EINVAL;
2752 		}
2753 	}
2754 
2755 	rc = snprintf(pool_name, sizeof(pool_name), "accel_psv_%s", dev_ctx->context->device->name);
2756 	if (rc < 0) {
2757 		assert(0);
2758 		return -EINVAL;
2759 	}
2760 	cache_size = num_psvs * 3 / 4 / spdk_env_get_core_count();
2761 	args.map = spdk_rdma_utils_create_mem_map(dev_ctx->pd, NULL,
2762 			IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_READ | IBV_ACCESS_REMOTE_WRITE);
2763 	if (!args.map) {
2764 		return -ENOMEM;
2765 	}
2766 	dev_ctx->psv_pool = spdk_mempool_create_ctor(pool_name, num_psvs,
2767 			    sizeof(struct accel_mlx5_psv_wrapper),
2768 			    cache_size, SPDK_ENV_SOCKET_ID_ANY,
2769 			    accel_mlx5_set_psv_in_pool, &args);
2770 	spdk_rdma_utils_free_mem_map(&args.map);
2771 	if (!dev_ctx->psv_pool) {
2772 		SPDK_ERRLOG("Failed to create PSV memory pool\n");
2773 		return -ENOMEM;
2774 	}
2775 	if (args.rc) {
2776 		SPDK_ERRLOG("Failed to init PSV memory pool objects, rc %d\n", args.rc);
2777 		return args.rc;
2778 	}
2779 
2780 	return 0;
2781 }
2782 
2783 
2784 static int
2785 accel_mlx5_dev_ctx_init(struct accel_mlx5_dev_ctx *dev_ctx, struct ibv_context *dev,
2786 			struct spdk_mlx5_device_caps *caps)
2787 {
2788 	struct ibv_pd *pd;
2789 	int rc;
2790 
2791 	pd = spdk_rdma_utils_get_pd(dev);
2792 	if (!pd) {
2793 		SPDK_ERRLOG("Failed to get PD for context %p, dev %s\n", dev, dev->device->name);
2794 		return -EINVAL;
2795 	}
2796 	dev_ctx->context = dev;
2797 	dev_ctx->pd = pd;
2798 	dev_ctx->domain = spdk_rdma_utils_get_memory_domain(pd);
2799 	if (!dev_ctx->domain) {
2800 		return -ENOMEM;
2801 	}
2802 
2803 	if (g_accel_mlx5.crypto_supported) {
2804 		dev_ctx->crypto_multi_block = caps->crypto.multi_block_be_tweak;
2805 		if (!dev_ctx->crypto_multi_block && g_accel_mlx5.attr.crypto_split_blocks) {
2806 			SPDK_WARNLOG("\"crypto_split_blocks\" is set but dev %s doesn't support multi block crypto\n",
2807 				     dev->device->name);
2808 		}
2809 		rc = accel_mlx5_mkeys_create(pd, g_accel_mlx5.attr.num_requests, SPDK_MLX5_MKEY_POOL_FLAG_CRYPTO);
2810 		if (rc) {
2811 			SPDK_ERRLOG("Failed to create crypto mkeys pool, rc %d, dev %s\n", rc, dev->device->name);
2812 			return rc;
2813 		}
2814 		dev_ctx->crypto_mkeys = true;
2815 	}
2816 	if (g_accel_mlx5.crc32c_supported) {
2817 		rc = accel_mlx5_mkeys_create(pd, g_accel_mlx5.attr.num_requests,
2818 					     SPDK_MLX5_MKEY_POOL_FLAG_SIGNATURE);
2819 		if (rc) {
2820 			SPDK_ERRLOG("Failed to create signature mkeys pool, rc %d, dev %s\n", rc, dev->device->name);
2821 			return rc;
2822 		}
2823 		dev_ctx->sig_mkeys = true;
2824 		rc = accel_mlx5_psvs_create(dev_ctx);
2825 		if (rc) {
2826 			SPDK_ERRLOG("Failed to create PSVs pool, rc %d, dev %s\n", rc, dev->device->name);
2827 			return rc;
2828 		}
2829 	}
2830 
2831 	return 0;
2832 }
2833 
2834 static struct ibv_context **
2835 accel_mlx5_get_devices(int *_num_devs)
2836 {
2837 	struct ibv_context **rdma_devs, **rdma_devs_out = NULL, *dev;
2838 	struct ibv_device_attr dev_attr;
2839 	size_t j;
2840 	int num_devs = 0, i, rc;
2841 	int num_devs_out = 0;
2842 	bool dev_allowed;
2843 
2844 	rdma_devs = rdma_get_devices(&num_devs);
2845 	if (!rdma_devs || !num_devs) {
2846 		*_num_devs = 0;
2847 		return NULL;
2848 	}
2849 
2850 	rdma_devs_out = calloc(num_devs + 1, sizeof(struct ibv_context *));
2851 	if (!rdma_devs_out) {
2852 		SPDK_ERRLOG("Memory allocation failed\n");
2853 		rdma_free_devices(rdma_devs);
2854 		*_num_devs = 0;
2855 		return NULL;
2856 	}
2857 
2858 	for (i = 0; i < num_devs; i++) {
2859 		dev = rdma_devs[i];
2860 		rc = ibv_query_device(dev, &dev_attr);
2861 		if (rc) {
2862 			SPDK_ERRLOG("Failed to query dev %s, skipping\n", dev->device->name);
2863 			continue;
2864 		}
2865 		if (dev_attr.vendor_id != SPDK_MLX5_VENDOR_ID_MELLANOX) {
2866 			SPDK_DEBUGLOG(accel_mlx5, "dev %s is not Mellanox device, skipping\n", dev->device->name);
2867 			continue;
2868 		}
2869 
2870 		if (g_accel_mlx5.allowed_devs_count) {
2871 			dev_allowed = false;
2872 			for (j = 0; j < g_accel_mlx5.allowed_devs_count; j++) {
2873 				if (strcmp(g_accel_mlx5.allowed_devs[j], dev->device->name) == 0) {
2874 					dev_allowed = true;
2875 					break;
2876 				}
2877 			}
2878 			if (!dev_allowed) {
2879 				continue;
2880 			}
2881 		}
2882 
2883 		rdma_devs_out[num_devs_out] = dev;
2884 		num_devs_out++;
2885 	}
2886 
2887 	rdma_free_devices(rdma_devs);
2888 	*_num_devs = num_devs_out;
2889 
2890 	return rdma_devs_out;
2891 }
2892 
2893 static inline bool
2894 accel_mlx5_dev_supports_crypto(struct spdk_mlx5_device_caps *caps)
2895 {
2896 	return caps->crypto_supported && !caps->crypto.wrapped_import_method_aes_xts &&
2897 	       (caps->crypto.single_block_le_tweak ||
2898 		caps->crypto.multi_block_le_tweak || caps->crypto.multi_block_be_tweak);
2899 }
2900 
2901 static int
2902 accel_mlx5_init(void)
2903 {
2904 	struct spdk_mlx5_device_caps *caps;
2905 	struct ibv_context **rdma_devs, *dev;
2906 	int num_devs = 0,  rc = 0, i;
2907 	int best_dev = -1, first_dev = 0;
2908 	int best_dev_stat = 0, dev_stat;
2909 	bool supports_crypto;
2910 	bool find_best_dev = g_accel_mlx5.allowed_devs_count == 0;
2911 
2912 	if (!g_accel_mlx5.enabled) {
2913 		return -EINVAL;
2914 	}
2915 
2916 	spdk_spin_init(&g_accel_mlx5.lock);
2917 	rdma_devs = accel_mlx5_get_devices(&num_devs);
2918 	if (!rdma_devs || !num_devs) {
2919 		return -ENODEV;
2920 	}
2921 	caps = calloc(num_devs, sizeof(*caps));
2922 	if (!caps) {
2923 		rc = -ENOMEM;
2924 		goto cleanup;
2925 	}
2926 
2927 	g_accel_mlx5.crypto_supported = true;
2928 	g_accel_mlx5.crc32c_supported = true;
2929 	g_accel_mlx5.num_ctxs = 0;
2930 
2931 	/* Iterate devices. We support an offload if all devices support it */
2932 	for (i = 0; i < num_devs; i++) {
2933 		dev = rdma_devs[i];
2934 
2935 		rc = spdk_mlx5_device_query_caps(dev, &caps[i]);
2936 		if (rc) {
2937 			SPDK_ERRLOG("Failed to get crypto caps, dev %s\n", dev->device->name);
2938 			goto cleanup;
2939 		}
2940 		supports_crypto = accel_mlx5_dev_supports_crypto(&caps[i]);
2941 		if (!supports_crypto) {
2942 			SPDK_DEBUGLOG(accel_mlx5, "Disable crypto support because dev %s doesn't support it\n",
2943 				      rdma_devs[i]->device->name);
2944 			g_accel_mlx5.crypto_supported = false;
2945 		}
2946 		if (!caps[i].crc32c_supported) {
2947 			SPDK_DEBUGLOG(accel_mlx5, "Disable crc32c support because dev %s doesn't support it\n",
2948 				      rdma_devs[i]->device->name);
2949 			g_accel_mlx5.crc32c_supported = false;
2950 		}
2951 		if (find_best_dev) {
2952 			/* Find device which supports max number of offloads */
2953 			dev_stat = (int)supports_crypto + (int)caps[i].crc32c_supported;
2954 			if (dev_stat > best_dev_stat) {
2955 				best_dev_stat = dev_stat;
2956 				best_dev = i;
2957 			}
2958 		}
2959 	}
2960 
2961 	/* User didn't specify devices to use, try to select the best one */
2962 	if (find_best_dev) {
2963 		if (best_dev == -1) {
2964 			best_dev = 0;
2965 		}
2966 		g_accel_mlx5.crypto_supported = accel_mlx5_dev_supports_crypto(&caps[best_dev]);
2967 		g_accel_mlx5.crc32c_supported = caps[best_dev].crc32c_supported;
2968 		SPDK_NOTICELOG("Select dev %s, crypto %d, crc32c %d\n", rdma_devs[best_dev]->device->name,
2969 			       g_accel_mlx5.crypto_supported, g_accel_mlx5.crc32c_supported);
2970 		first_dev = best_dev;
2971 		num_devs = 1;
2972 		if (g_accel_mlx5.crypto_supported) {
2973 			const char *const dev_name[] = { rdma_devs[best_dev]->device->name };
2974 			/* Let mlx5 library know which device to use */
2975 			spdk_mlx5_crypto_devs_allow(dev_name, 1);
2976 		}
2977 	} else {
2978 		SPDK_NOTICELOG("Found %d devices, crypto %d\n", num_devs, g_accel_mlx5.crypto_supported);
2979 	}
2980 
2981 	g_accel_mlx5.dev_ctxs = calloc(num_devs, sizeof(*g_accel_mlx5.dev_ctxs));
2982 	if (!g_accel_mlx5.dev_ctxs) {
2983 		SPDK_ERRLOG("Memory allocation failed\n");
2984 		rc = -ENOMEM;
2985 		goto cleanup;
2986 	}
2987 
2988 	for (i = first_dev; i < first_dev + num_devs; i++) {
2989 		rc = accel_mlx5_dev_ctx_init(&g_accel_mlx5.dev_ctxs[g_accel_mlx5.num_ctxs++],
2990 					     rdma_devs[i], &caps[i]);
2991 		if (rc) {
2992 			goto cleanup;
2993 		}
2994 	}
2995 
2996 	SPDK_NOTICELOG("Accel framework mlx5 initialized, found %d devices.\n", num_devs);
2997 	spdk_io_device_register(&g_accel_mlx5, accel_mlx5_create_cb, accel_mlx5_destroy_cb,
2998 				sizeof(struct accel_mlx5_io_channel), "accel_mlx5");
2999 	g_accel_mlx5.initialized = true;
3000 	free(rdma_devs);
3001 	free(caps);
3002 
3003 	if (g_accel_mlx5.attr.enable_driver) {
3004 		SPDK_NOTICELOG("Enabling mlx5 platform driver\n");
3005 		spdk_accel_driver_register(&g_accel_mlx5_driver);
3006 		spdk_accel_set_driver(g_accel_mlx5_driver.name);
3007 		spdk_mlx5_umr_implementer_register(true);
3008 	}
3009 
3010 	return 0;
3011 
3012 cleanup:
3013 	free(rdma_devs);
3014 	free(caps);
3015 	accel_mlx5_free_resources();
3016 	spdk_spin_destroy(&g_accel_mlx5.lock);
3017 
3018 	return rc;
3019 }
3020 
3021 static void
3022 accel_mlx5_write_config_json(struct spdk_json_write_ctx *w)
3023 {
3024 	if (g_accel_mlx5.enabled) {
3025 		spdk_json_write_object_begin(w);
3026 		spdk_json_write_named_string(w, "method", "mlx5_scan_accel_module");
3027 		spdk_json_write_named_object_begin(w, "params");
3028 		spdk_json_write_named_uint16(w, "qp_size", g_accel_mlx5.attr.qp_size);
3029 		spdk_json_write_named_uint32(w, "num_requests", g_accel_mlx5.attr.num_requests);
3030 		if (g_accel_mlx5.attr.allowed_devs) {
3031 			spdk_json_write_named_string(w, "allowed_devs", g_accel_mlx5.attr.allowed_devs);
3032 		}
3033 		spdk_json_write_named_uint16(w, "crypto_split_blocks", g_accel_mlx5.attr.crypto_split_blocks);
3034 		spdk_json_write_named_bool(w, "enable_driver", g_accel_mlx5.attr.enable_driver);
3035 		spdk_json_write_object_end(w);
3036 		spdk_json_write_object_end(w);
3037 	}
3038 }
3039 
3040 static size_t
3041 accel_mlx5_get_ctx_size(void)
3042 {
3043 	return sizeof(struct accel_mlx5_task);
3044 }
3045 
3046 static int
3047 accel_mlx5_crypto_key_init(struct spdk_accel_crypto_key *key)
3048 {
3049 	struct spdk_mlx5_crypto_dek_create_attr attr = {};
3050 	struct spdk_mlx5_crypto_keytag *keytag;
3051 	int rc;
3052 
3053 	if (!key || !key->key || !key->key2 || !key->key_size || !key->key2_size) {
3054 		return -EINVAL;
3055 	}
3056 
3057 	attr.dek = calloc(1, key->key_size + key->key2_size);
3058 	if (!attr.dek) {
3059 		return -ENOMEM;
3060 	}
3061 
3062 	memcpy(attr.dek, key->key, key->key_size);
3063 	memcpy(attr.dek + key->key_size, key->key2, key->key2_size);
3064 	attr.dek_len = key->key_size + key->key2_size;
3065 
3066 	rc = spdk_mlx5_crypto_keytag_create(&attr, &keytag);
3067 	spdk_memset_s(attr.dek, attr.dek_len, 0, attr.dek_len);
3068 	free(attr.dek);
3069 	if (rc) {
3070 		SPDK_ERRLOG("Failed to create a keytag, rc %d\n", rc);
3071 		return rc;
3072 	}
3073 
3074 	key->priv = keytag;
3075 
3076 	return 0;
3077 }
3078 
3079 static void
3080 accel_mlx5_crypto_key_deinit(struct spdk_accel_crypto_key *key)
3081 {
3082 	if (!key || key->module_if != &g_accel_mlx5.module || !key->priv) {
3083 		return;
3084 	}
3085 
3086 	spdk_mlx5_crypto_keytag_destroy(key->priv);
3087 }
3088 
3089 static void
3090 accel_mlx5_dump_stats_json(struct spdk_json_write_ctx *w, const char *header,
3091 			   const struct accel_mlx5_stats *stats)
3092 {
3093 	double idle_polls_percentage = 0;
3094 	double cpls_per_poll = 0;
3095 	uint64_t total_tasks = 0;
3096 	int i;
3097 
3098 	if (stats->polls) {
3099 		idle_polls_percentage = (double) stats->idle_polls * 100 / stats->polls;
3100 	}
3101 	if (stats->polls > stats->idle_polls) {
3102 		cpls_per_poll = (double) stats->completions / (stats->polls - stats->idle_polls);
3103 	}
3104 	for (i = 0; i < ACCEL_MLX5_OPC_LAST; i++) {
3105 		total_tasks += stats->opcodes[i];
3106 	}
3107 
3108 	spdk_json_write_named_object_begin(w, header);
3109 
3110 	spdk_json_write_named_object_begin(w, "umrs");
3111 	spdk_json_write_named_uint64(w, "crypto_umrs", stats->crypto_umrs);
3112 	spdk_json_write_named_uint64(w, "sig_umrs", stats->sig_umrs);
3113 	spdk_json_write_named_uint64(w, "total", stats->crypto_umrs + stats->sig_umrs);
3114 	spdk_json_write_object_end(w);
3115 
3116 	spdk_json_write_named_object_begin(w, "rdma");
3117 	spdk_json_write_named_uint64(w, "read", stats->rdma_reads);
3118 	spdk_json_write_named_uint64(w, "write", stats->rdma_writes);
3119 	spdk_json_write_named_uint64(w, "total", stats->rdma_reads + stats->rdma_writes);
3120 	spdk_json_write_object_end(w);
3121 
3122 	spdk_json_write_named_object_begin(w, "polling");
3123 	spdk_json_write_named_uint64(w, "polls", stats->polls);
3124 	spdk_json_write_named_uint64(w, "idle_polls", stats->idle_polls);
3125 	spdk_json_write_named_uint64(w, "completions", stats->completions);
3126 	spdk_json_write_named_double(w, "idle_polls_percentage", idle_polls_percentage);
3127 	spdk_json_write_named_double(w, "cpls_per_poll", cpls_per_poll);
3128 	spdk_json_write_named_uint64(w, "nomem_qdepth", stats->nomem_qdepth);
3129 	spdk_json_write_named_uint64(w, "nomem_mkey", stats->nomem_mkey);
3130 	spdk_json_write_object_end(w);
3131 
3132 	spdk_json_write_named_object_begin(w, "tasks");
3133 	spdk_json_write_named_uint64(w, "copy", stats->opcodes[ACCEL_MLX5_OPC_COPY]);
3134 	spdk_json_write_named_uint64(w, "crypto", stats->opcodes[ACCEL_MLX5_OPC_CRYPTO]);
3135 	spdk_json_write_named_uint64(w, "crypto_mkey", stats->opcodes[ACCEL_MLX5_OPC_CRYPTO_MKEY]);
3136 	spdk_json_write_named_uint64(w, "crc32c", stats->opcodes[ACCEL_MLX5_OPC_CRC32C]);
3137 	spdk_json_write_named_uint64(w, "total", total_tasks);
3138 	spdk_json_write_object_end(w);
3139 
3140 	spdk_json_write_object_end(w);
3141 }
3142 
3143 static void
3144 accel_mlx5_dump_channel_stat(struct spdk_io_channel_iter *i)
3145 {
3146 	struct accel_mlx5_stats ch_stat = {};
3147 	struct accel_mlx5_dump_stats_ctx *ctx;
3148 	struct spdk_io_channel *_ch;
3149 	struct accel_mlx5_io_channel *ch;
3150 	struct accel_mlx5_dev *dev;
3151 	uint32_t j;
3152 
3153 	ctx = spdk_io_channel_iter_get_ctx(i);
3154 	_ch = spdk_io_channel_iter_get_channel(i);
3155 	ch = spdk_io_channel_get_ctx(_ch);
3156 
3157 	if (ctx->level != ACCEL_MLX5_DUMP_STAT_LEVEL_TOTAL) {
3158 		spdk_json_write_object_begin(ctx->w);
3159 		spdk_json_write_named_object_begin(ctx->w, spdk_thread_get_name(spdk_get_thread()));
3160 	}
3161 	if (ctx->level == ACCEL_MLX5_DUMP_STAT_LEVEL_DEV) {
3162 		spdk_json_write_named_array_begin(ctx->w, "devices");
3163 	}
3164 
3165 	for (j = 0; j < ch->num_devs; j++) {
3166 		dev = &ch->devs[j];
3167 		/* Save grand total and channel stats */
3168 		accel_mlx5_add_stats(&ctx->total, &dev->stats);
3169 		accel_mlx5_add_stats(&ch_stat, &dev->stats);
3170 		if (ctx->level == ACCEL_MLX5_DUMP_STAT_LEVEL_DEV) {
3171 			spdk_json_write_object_begin(ctx->w);
3172 			accel_mlx5_dump_stats_json(ctx->w, dev->dev_ctx->context->device->name, &dev->stats);
3173 			spdk_json_write_object_end(ctx->w);
3174 		}
3175 	}
3176 
3177 	if (ctx->level == ACCEL_MLX5_DUMP_STAT_LEVEL_DEV) {
3178 		spdk_json_write_array_end(ctx->w);
3179 	}
3180 	if (ctx->level != ACCEL_MLX5_DUMP_STAT_LEVEL_TOTAL) {
3181 		accel_mlx5_dump_stats_json(ctx->w, "channel_total", &ch_stat);
3182 		spdk_json_write_object_end(ctx->w);
3183 		spdk_json_write_object_end(ctx->w);
3184 	}
3185 
3186 	spdk_for_each_channel_continue(i, 0);
3187 }
3188 
3189 static void
3190 accel_mlx5_dump_channel_stat_done(struct spdk_io_channel_iter *i, int status)
3191 {
3192 	struct accel_mlx5_dump_stats_ctx *ctx;
3193 
3194 	ctx = spdk_io_channel_iter_get_ctx(i);
3195 
3196 	spdk_spin_lock(&g_accel_mlx5.lock);
3197 	/* Add statistics from destroyed channels */
3198 	accel_mlx5_add_stats(&ctx->total, &g_accel_mlx5.stats);
3199 	spdk_spin_unlock(&g_accel_mlx5.lock);
3200 
3201 	if (ctx->level != ACCEL_MLX5_DUMP_STAT_LEVEL_TOTAL) {
3202 		/* channels[] */
3203 		spdk_json_write_array_end(ctx->w);
3204 	}
3205 
3206 	accel_mlx5_dump_stats_json(ctx->w, "total", &ctx->total);
3207 
3208 	/* Ends the whole response which was begun in accel_mlx5_dump_stats */
3209 	spdk_json_write_object_end(ctx->w);
3210 
3211 	ctx->cb(ctx->ctx, 0);
3212 	free(ctx);
3213 }
3214 
3215 int
3216 accel_mlx5_dump_stats(struct spdk_json_write_ctx *w, enum accel_mlx5_dump_state_level level,
3217 		      accel_mlx5_dump_stat_done_cb cb, void *ctx)
3218 {
3219 	struct accel_mlx5_dump_stats_ctx *stat_ctx;
3220 
3221 	if (!w || !cb) {
3222 		return -EINVAL;
3223 	}
3224 	if (!g_accel_mlx5.initialized) {
3225 		return -ENODEV;
3226 	}
3227 
3228 	stat_ctx = calloc(1, sizeof(*stat_ctx));
3229 	if (!stat_ctx) {
3230 		return -ENOMEM;
3231 	}
3232 	stat_ctx->cb = cb;
3233 	stat_ctx->ctx = ctx;
3234 	stat_ctx->level = level;
3235 	stat_ctx->w = w;
3236 
3237 	spdk_json_write_object_begin(w);
3238 
3239 	if (level != ACCEL_MLX5_DUMP_STAT_LEVEL_TOTAL) {
3240 		spdk_json_write_named_array_begin(w, "channels");
3241 	}
3242 
3243 	spdk_for_each_channel(&g_accel_mlx5, accel_mlx5_dump_channel_stat, stat_ctx,
3244 			      accel_mlx5_dump_channel_stat_done);
3245 
3246 	return 0;
3247 }
3248 
3249 static bool
3250 accel_mlx5_crypto_supports_cipher(enum spdk_accel_cipher cipher, size_t key_size)
3251 {
3252 	switch (cipher) {
3253 	case SPDK_ACCEL_CIPHER_AES_XTS:
3254 		return key_size == SPDK_ACCEL_AES_XTS_128_KEY_SIZE || key_size == SPDK_ACCEL_AES_XTS_256_KEY_SIZE;
3255 	default:
3256 		return false;
3257 	}
3258 }
3259 
3260 static int
3261 accel_mlx5_get_memory_domains(struct spdk_memory_domain **domains, int array_size)
3262 {
3263 	int i, size;
3264 
3265 	if (!domains || !array_size) {
3266 		return (int)g_accel_mlx5.num_ctxs;
3267 	}
3268 
3269 	size = spdk_min(array_size, (int)g_accel_mlx5.num_ctxs);
3270 
3271 	for (i = 0; i < size; i++) {
3272 		domains[i] = g_accel_mlx5.dev_ctxs[i].domain;
3273 	}
3274 
3275 	return (int)g_accel_mlx5.num_ctxs;
3276 }
3277 
3278 static inline struct accel_mlx5_dev *
3279 accel_mlx5_ch_get_dev_by_pd(struct accel_mlx5_io_channel *accel_ch, struct ibv_pd *pd)
3280 {
3281 	uint32_t i;
3282 
3283 	for (i = 0; i < accel_ch->num_devs; i++) {
3284 		if (accel_ch->devs[i].dev_ctx->pd == pd) {
3285 			return &accel_ch->devs[i];
3286 		}
3287 	}
3288 
3289 	return NULL;
3290 }
3291 
3292 static inline int
3293 accel_mlx5_task_assign_qp_by_domain_pd(struct accel_mlx5_task *task,
3294 				       struct accel_mlx5_io_channel *acce_ch, struct spdk_memory_domain *domain)
3295 {
3296 	struct spdk_memory_domain_rdma_ctx *domain_ctx;
3297 	struct accel_mlx5_dev *dev;
3298 	struct ibv_pd *domain_pd;
3299 	size_t ctx_size;
3300 
3301 	domain_ctx = spdk_memory_domain_get_user_context(domain, &ctx_size);
3302 	if (spdk_unlikely(!domain_ctx || domain_ctx->size != ctx_size)) {
3303 		SPDK_ERRLOG("no domain context or wrong size, ctx ptr %p, size %zu\n", domain_ctx, ctx_size);
3304 		return -ENOTSUP;
3305 	}
3306 	domain_pd = domain_ctx->ibv_pd;
3307 	if (spdk_unlikely(!domain_pd)) {
3308 		SPDK_ERRLOG("no destination domain PD, task %p", task);
3309 		return -ENOTSUP;
3310 	}
3311 	dev = accel_mlx5_ch_get_dev_by_pd(acce_ch, domain_pd);
3312 	if (spdk_unlikely(!dev)) {
3313 		SPDK_ERRLOG("No dev for PD %p dev %s\n", domain_pd, domain_pd->context->device->name);
3314 		return -ENODEV;
3315 	}
3316 
3317 	if (spdk_unlikely(!dev)) {
3318 		return -ENODEV;
3319 	}
3320 	task->qp = &dev->qp;
3321 
3322 	return 0;
3323 }
3324 
3325 static inline int
3326 accel_mlx5_driver_examine_sequence(struct spdk_accel_sequence *seq,
3327 				   struct accel_mlx5_io_channel *accel_ch)
3328 {
3329 	struct spdk_accel_task *first_base = spdk_accel_sequence_first_task(seq);
3330 	struct accel_mlx5_task *first = SPDK_CONTAINEROF(first_base, struct accel_mlx5_task, base);
3331 	struct spdk_accel_task *next_base = TAILQ_NEXT(first_base, seq_link);
3332 	struct accel_mlx5_task *next;
3333 	int rc;
3334 
3335 	accel_mlx5_task_reset(first);
3336 	SPDK_DEBUGLOG(accel_mlx5, "first %p, opc %d; next %p, opc %d\n", first_base, first_base->op_code,
3337 		      next_base,  next_base ? next_base->op_code : -1);
3338 	if (next_base) {
3339 		switch (first_base->op_code) {
3340 		case SPDK_ACCEL_OPC_COPY:
3341 			if (next_base->op_code == SPDK_ACCEL_OPC_DECRYPT &&
3342 			    first_base->dst_domain &&  spdk_memory_domain_get_dma_device_type(first_base->dst_domain) ==
3343 			    SPDK_DMA_DEVICE_TYPE_RDMA && TAILQ_NEXT(next_base, seq_link) == NULL) {
3344 				next = SPDK_CONTAINEROF(next_base, struct accel_mlx5_task, base);
3345 				rc = accel_mlx5_task_assign_qp_by_domain_pd(next, accel_ch, first_base->dst_domain);
3346 				if (spdk_unlikely(rc)) {
3347 					return rc;
3348 				}
3349 				/* Update decrypt task memory domain, complete copy task */
3350 				SPDK_DEBUGLOG(accel_mlx5, "Merge copy task (%p) and decrypt (%p)\n", first, next);
3351 				next_base->dst_domain = first_base->dst_domain;
3352 				next_base->dst_domain_ctx = first_base->dst_domain_ctx;
3353 				accel_mlx5_task_reset(next);
3354 				next->mlx5_opcode = ACCEL_MLX5_OPC_CRYPTO_MKEY;
3355 				next->enc_order = SPDK_MLX5_ENCRYPTION_ORDER_ENCRYPTED_RAW_WIRE;
3356 				next->needs_data_transfer = 1;
3357 				next->inplace = 1;
3358 				spdk_accel_task_complete(first_base, 0);
3359 				return 0;
3360 			}
3361 			break;
3362 		case SPDK_ACCEL_OPC_ENCRYPT:
3363 			if (next_base->op_code == SPDK_ACCEL_OPC_COPY &&
3364 			    next_base->dst_domain && spdk_memory_domain_get_dma_device_type(next_base->dst_domain) ==
3365 			    SPDK_DMA_DEVICE_TYPE_RDMA && TAILQ_NEXT(next_base, seq_link) == NULL) {
3366 				rc = accel_mlx5_task_assign_qp_by_domain_pd(first, accel_ch, next_base->dst_domain);
3367 				if (spdk_unlikely(rc)) {
3368 					return rc;
3369 				}
3370 
3371 				/* Update encrypt task memory domain, complete copy task */
3372 				SPDK_DEBUGLOG(accel_mlx5, "Merge copy task (%p) and decrypt (%p)\n",
3373 					      SPDK_CONTAINEROF(next_base,
3374 							       struct accel_mlx5_task, base), first);
3375 				first_base->dst_domain = next_base->dst_domain;
3376 				first_base->dst_domain_ctx = next_base->dst_domain_ctx;
3377 				first->mlx5_opcode = ACCEL_MLX5_OPC_CRYPTO_MKEY;
3378 				first->enc_order = SPDK_MLX5_ENCRYPTION_ORDER_ENCRYPTED_RAW_WIRE;
3379 				first->needs_data_transfer = 1;
3380 				first->inplace = 1;
3381 				spdk_accel_task_complete(next_base, 0);
3382 				return 0;
3383 			}
3384 			break;
3385 
3386 		default:
3387 			break;
3388 		}
3389 	}
3390 
3391 	SPDK_DEBUGLOG(accel_mlx5, "seq %p, task %p nothing to merge\n", seq, first_base);
3392 	/* Nothing to merge, execute tasks one by one */
3393 	accel_mlx5_task_assign_qp(first, accel_ch);
3394 	accel_mlx5_task_init_opcode(first);
3395 
3396 	return 0;
3397 }
3398 
3399 static inline int
3400 accel_mlx5_execute_sequence(struct spdk_io_channel *ch, struct spdk_accel_sequence *seq)
3401 {
3402 	struct accel_mlx5_io_channel *accel_ch = spdk_io_channel_get_ctx(ch);
3403 	struct spdk_accel_task *task;
3404 	struct accel_mlx5_task *mlx5_task;
3405 	int rc;
3406 
3407 	rc = accel_mlx5_driver_examine_sequence(seq, accel_ch);
3408 	if (spdk_unlikely(rc)) {
3409 		return rc;
3410 	}
3411 	task = spdk_accel_sequence_first_task(seq);
3412 	assert(task);
3413 	mlx5_task = SPDK_CONTAINEROF(task, struct accel_mlx5_task, base);
3414 	mlx5_task->driver_seq = 1;
3415 
3416 	SPDK_DEBUGLOG(accel_mlx5, "driver starts seq %p, ch %p, task %p\n", seq, accel_ch, task);
3417 
3418 	return _accel_mlx5_submit_tasks(accel_ch, task);
3419 }
3420 
3421 static struct accel_mlx5_module g_accel_mlx5 = {
3422 	.module = {
3423 		.module_init		= accel_mlx5_init,
3424 		.module_fini		= accel_mlx5_deinit,
3425 		.write_config_json	= accel_mlx5_write_config_json,
3426 		.get_ctx_size		= accel_mlx5_get_ctx_size,
3427 		.name			= "mlx5",
3428 		.supports_opcode	= accel_mlx5_supports_opcode,
3429 		.get_io_channel		= accel_mlx5_get_io_channel,
3430 		.submit_tasks		= accel_mlx5_submit_tasks,
3431 		.crypto_key_init	= accel_mlx5_crypto_key_init,
3432 		.crypto_key_deinit	= accel_mlx5_crypto_key_deinit,
3433 		.crypto_supports_cipher	= accel_mlx5_crypto_supports_cipher,
3434 		.get_memory_domains	= accel_mlx5_get_memory_domains,
3435 	}
3436 };
3437 
3438 static struct spdk_accel_driver g_accel_mlx5_driver = {
3439 	.name			= "mlx5",
3440 	.execute_sequence	= accel_mlx5_execute_sequence,
3441 	.get_io_channel		= accel_mlx5_get_io_channel
3442 };
3443 
3444 SPDK_LOG_REGISTER_COMPONENT(accel_mlx5)
3445