xref: /spdk/module/accel/mlx5/accel_mlx5.c (revision a2f5e1c2d535934bced849d8b079523bc74c98f1)
1 /*   SPDX-License-Identifier: BSD-3-Clause
2  *   Copyright (c) 2022-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3  */
4 
5 #include "spdk/env.h"
6 #include "spdk/thread.h"
7 #include "spdk/queue.h"
8 #include "spdk/log.h"
9 #include "spdk/string.h"
10 #include "spdk/likely.h"
11 #include "spdk/dma.h"
12 #include "spdk/json.h"
13 #include "spdk/util.h"
14 
15 #include "spdk_internal/mlx5.h"
16 #include "spdk_internal/rdma_utils.h"
17 #include "spdk/accel_module.h"
18 #include "spdk_internal/assert.h"
19 #include "spdk_internal/sgl.h"
20 #include "accel_mlx5.h"
21 
22 #include <infiniband/mlx5dv.h>
23 #include <rdma/rdma_cma.h>
24 
25 #define ACCEL_MLX5_QP_SIZE (256u)
26 #define ACCEL_MLX5_NUM_REQUESTS (2048u - 1)
27 #define ACCEL_MLX5_RECOVER_POLLER_PERIOD_US (10000)
28 #define ACCEL_MLX5_MAX_SGE (16u)
29 #define ACCEL_MLX5_MAX_WC (64u)
30 #define ACCEL_MLX5_MAX_MKEYS_IN_TASK (16u)
31 
32 /* Assume we have up to 16 devices */
33 #define ACCEL_MLX5_ALLOWED_DEVS_MAX_LEN ((SPDK_MLX5_DEV_MAX_NAME_LEN + 1) * 16)
34 
35 #define ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED(qp, task)	\
36 do {							\
37 	assert((qp)->wrs_submitted < (qp)->wrs_max);	\
38 	(qp)->wrs_submitted++;				\
39 	(qp)->ring_db = true;				\
40 	assert((task)->num_wrs < UINT16_MAX);		\
41 	(task)->num_wrs++;				\
42 } while (0)
43 
44 #define ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED_SIGNALED(dev, qp, task)	\
45 do {									\
46 	assert((dev)->wrs_in_cq < (dev)->wrs_in_cq_max);		\
47 	(dev)->wrs_in_cq++;						\
48         assert((qp)->wrs_submitted < (qp)->wrs_max);			\
49 	(qp)->wrs_submitted++;						\
50 	(qp)->ring_db = true;						\
51 	assert((task)->num_wrs < UINT16_MAX);				\
52 	(task)->num_wrs++;						\
53 } while (0)
54 
55 struct accel_mlx5_io_channel;
56 struct accel_mlx5_task;
57 
58 struct accel_mlx5_dev_ctx {
59 	struct ibv_context *context;
60 	struct ibv_pd *pd;
61 	struct spdk_memory_domain *domain;
62 	struct spdk_mempool *psv_pool;
63 	TAILQ_ENTRY(accel_mlx5_dev_ctx) link;
64 	struct spdk_mlx5_psv **psvs;
65 	bool mkeys;
66 	bool crypto_mkeys;
67 	bool sig_mkeys;
68 	bool crypto_multi_block;
69 };
70 
71 enum accel_mlx5_opcode {
72 	ACCEL_MLX5_OPC_COPY,
73 	ACCEL_MLX5_OPC_CRYPTO,
74 	ACCEL_MLX5_OPC_CRC32C,
75 	ACCEL_MLX5_OPC_CRYPTO_MKEY,
76 	ACCEL_MLX5_OPC_LAST
77 };
78 
79 SPDK_STATIC_ASSERT(ACCEL_MLX5_OPC_LAST <= 0xf,
80 		   "accel opcode exceeds 4 bits, update accel_mlx5 struct");
81 
82 struct accel_mlx5_stats {
83 	uint64_t crypto_umrs;
84 	uint64_t sig_umrs;
85 	uint64_t rdma_reads;
86 	uint64_t rdma_writes;
87 	uint64_t polls;
88 	uint64_t idle_polls;
89 	uint64_t completions;
90 	uint64_t nomem_qdepth;
91 	uint64_t nomem_mkey;
92 	uint64_t opcodes[ACCEL_MLX5_OPC_LAST];
93 };
94 
95 struct accel_mlx5_module {
96 	struct spdk_accel_module_if module;
97 	struct accel_mlx5_stats stats;
98 	struct spdk_spinlock lock;
99 	struct accel_mlx5_dev_ctx *dev_ctxs;
100 	uint32_t num_ctxs;
101 	struct accel_mlx5_attr attr;
102 	char **allowed_devs;
103 	size_t allowed_devs_count;
104 	bool initialized;
105 	bool enabled;
106 	bool crypto_supported;
107 	bool crc32c_supported;
108 };
109 
110 struct accel_mlx5_sge {
111 	uint32_t src_sge_count;
112 	uint32_t dst_sge_count;
113 	struct ibv_sge src_sge[ACCEL_MLX5_MAX_SGE];
114 	struct ibv_sge dst_sge[ACCEL_MLX5_MAX_SGE];
115 };
116 
117 struct accel_mlx5_iov_sgl {
118 	struct iovec	*iov;
119 	uint32_t	iovcnt;
120 	uint32_t	iov_offset;
121 };
122 
123 struct accel_mlx5_psv_wrapper {
124 	uint32_t psv_index;
125 	struct {
126 		uint32_t error : 1;
127 		uint32_t reserved : 31;
128 	} bits;
129 	/* mlx5 engine requires DMAable memory, use this member to copy user's crc value since we don't know which
130 	 * memory it is in */
131 	uint32_t crc;
132 	uint32_t crc_lkey;
133 };
134 
135 struct accel_mlx5_task {
136 	struct spdk_accel_task base;
137 	struct accel_mlx5_iov_sgl src;
138 	struct accel_mlx5_iov_sgl dst;
139 	struct accel_mlx5_qp *qp;
140 	STAILQ_ENTRY(accel_mlx5_task) link;
141 	uint16_t num_reqs;
142 	uint16_t num_completed_reqs;
143 	uint16_t num_submitted_reqs;
144 	uint16_t num_ops; /* number of allocated mkeys or number of operations */
145 	uint16_t num_wrs; /* Number of outstanding operations which consume qp slot */
146 	union {
147 		struct {
148 			uint16_t blocks_per_req;
149 			uint16_t num_processed_blocks;
150 			uint16_t num_blocks;
151 		};
152 		struct {
153 			struct accel_mlx5_psv_wrapper *psv;
154 			uint32_t last_umr_len;
155 			uint8_t last_mkey_idx;
156 		};
157 	};
158 	union {
159 		uint16_t raw;
160 		struct {
161 			uint16_t inplace : 1;
162 			uint16_t driver_seq : 1;
163 			uint16_t needs_data_transfer : 1;
164 			uint16_t enc_order : 2;
165 			uint16_t mlx5_opcode: 4;
166 		};
167 	};
168 	/* Keep this array last since not all elements might be accessed, this reduces amount of data to be
169 	 * cached */
170 	struct spdk_mlx5_mkey_pool_obj *mkeys[ACCEL_MLX5_MAX_MKEYS_IN_TASK];
171 };
172 
173 SPDK_STATIC_ASSERT(ACCEL_MLX5_MAX_MKEYS_IN_TASK <= UINT8_MAX, "uint8_t is used to iterate mkeys");
174 
175 struct accel_mlx5_qp {
176 	struct spdk_mlx5_qp *qp;
177 	struct ibv_qp *verbs_qp;
178 	struct accel_mlx5_dev *dev;
179 	/* tasks submitted to HW. We can't complete a task even in error case until we reap completions for all
180 	 * submitted requests */
181 	STAILQ_HEAD(, accel_mlx5_task) in_hw;
182 	uint16_t wrs_submitted;
183 	uint16_t wrs_max;
184 	bool ring_db;
185 	bool recovering;
186 	struct spdk_poller *recover_poller;
187 };
188 
189 struct accel_mlx5_dev {
190 	struct accel_mlx5_qp qp;
191 	struct spdk_mlx5_cq *cq;
192 	struct spdk_mlx5_mkey_pool *mkeys;
193 	struct spdk_mlx5_mkey_pool *crypto_mkeys;
194 	struct spdk_mlx5_mkey_pool *sig_mkeys;
195 	struct spdk_rdma_utils_mem_map *mmap;
196 	struct accel_mlx5_dev_ctx *dev_ctx;
197 	struct spdk_io_channel *ch;
198 	uint16_t wrs_in_cq;
199 	uint16_t wrs_in_cq_max;
200 	uint16_t crypto_split_blocks;
201 	bool crypto_multi_block;
202 	/* Pending tasks waiting for requests resources */
203 	STAILQ_HEAD(, accel_mlx5_task) nomem;
204 	TAILQ_ENTRY(accel_mlx5_dev) link;
205 	struct accel_mlx5_stats stats;
206 };
207 
208 struct accel_mlx5_io_channel {
209 	struct accel_mlx5_dev *devs;
210 	struct spdk_poller *poller;
211 	uint16_t num_devs;
212 	/* Index in \b devs to be used for operations in round-robin way */
213 	uint16_t dev_idx;
214 	bool poller_handler_registered;
215 };
216 
217 struct accel_mlx5_task_operations {
218 	int (*init)(struct accel_mlx5_task *task);
219 	int (*process)(struct accel_mlx5_task *task);
220 	int (*cont)(struct accel_mlx5_task *task);
221 	void (*complete)(struct accel_mlx5_task *task);
222 };
223 
224 struct accel_mlx5_psv_pool_iter_cb_args {
225 	struct accel_mlx5_dev_ctx *dev;
226 	struct spdk_rdma_utils_mem_map *map;
227 	int rc;
228 };
229 
230 struct accel_mlx5_dump_stats_ctx {
231 	struct accel_mlx5_stats total;
232 	struct spdk_json_write_ctx *w;
233 	enum accel_mlx5_dump_state_level level;
234 	accel_mlx5_dump_stat_done_cb cb;
235 	void *ctx;
236 };
237 
238 static struct accel_mlx5_module g_accel_mlx5;
239 static struct spdk_accel_driver g_accel_mlx5_driver;
240 
241 static inline int accel_mlx5_execute_sequence(struct spdk_io_channel *ch,
242 		struct spdk_accel_sequence *seq);
243 static inline void accel_mlx5_task_complete(struct accel_mlx5_task *mlx5_task);
244 
245 static inline void
246 accel_mlx5_iov_sgl_init(struct accel_mlx5_iov_sgl *s, struct iovec *iov, uint32_t iovcnt)
247 {
248 	s->iov = iov;
249 	s->iovcnt = iovcnt;
250 	s->iov_offset = 0;
251 }
252 
253 static inline void
254 accel_mlx5_iov_sgl_advance(struct accel_mlx5_iov_sgl *s, uint32_t step)
255 {
256 	s->iov_offset += step;
257 	while (s->iovcnt > 0) {
258 		assert(s->iov != NULL);
259 		if (s->iov_offset < s->iov->iov_len) {
260 			break;
261 		}
262 
263 		s->iov_offset -= s->iov->iov_len;
264 		s->iov++;
265 		s->iovcnt--;
266 	}
267 }
268 
269 static inline void
270 accel_mlx5_iov_sgl_unwind(struct accel_mlx5_iov_sgl *s, uint32_t max_iovs, uint32_t step)
271 {
272 	SPDK_DEBUGLOG(accel_mlx5, "iov %p, iovcnt %u, max %u, offset %u, step %u\n", s->iov, s->iovcnt,
273 		      max_iovs, s->iov_offset, step);
274 	while (s->iovcnt <= max_iovs) {
275 		assert(s->iov != NULL);
276 		if (s->iov_offset >= step) {
277 			s->iov_offset -= step;
278 			SPDK_DEBUGLOG(accel_mlx5, "\tEND, iov %p, iovcnt %u, offset %u\n", s->iov, s->iovcnt,
279 				      s->iov_offset);
280 			return;
281 		}
282 		step -= s->iov_offset;
283 		s->iov--;
284 		s->iovcnt++;
285 		s->iov_offset = s->iov->iov_len;
286 		SPDK_DEBUGLOG(accel_mlx5, "\tiov %p, iovcnt %u, offset %u, step %u\n", s->iov, s->iovcnt,
287 			      s->iov_offset, step);
288 	}
289 
290 	SPDK_ERRLOG("Can't unwind iovs, remaining  %u\n", step);
291 	assert(0);
292 }
293 
294 static inline int
295 accel_mlx5_sge_unwind(struct ibv_sge *sge, uint32_t sge_count, uint32_t step)
296 {
297 	int i;
298 
299 	assert(sge_count > 0);
300 	SPDK_DEBUGLOG(accel_mlx5, "sge %p, count %u, step %u\n", sge, sge_count, step);
301 	for (i = (int)sge_count - 1; i >= 0; i--) {
302 		if (sge[i].length > step) {
303 			sge[i].length -= step;
304 			SPDK_DEBUGLOG(accel_mlx5, "\tsge[%u] len %u, step %u\n", i, sge[i].length, step);
305 			return (int)i + 1;
306 		}
307 		SPDK_DEBUGLOG(accel_mlx5, "\tsge[%u] len %u, step %u\n", i, sge[i].length, step);
308 		step -= sge[i].length;
309 	}
310 
311 	SPDK_ERRLOG("Can't unwind sge, remaining  %u\n", step);
312 	assert(step == 0);
313 
314 	return 0;
315 }
316 
317 static inline void
318 accel_mlx5_crypto_task_complete(struct accel_mlx5_task *task)
319 {
320 	struct accel_mlx5_dev *dev = task->qp->dev;
321 
322 	assert(task->num_ops);
323 	spdk_mlx5_mkey_pool_put_bulk(dev->crypto_mkeys, task->mkeys, task->num_ops);
324 	spdk_accel_task_complete(&task->base, 0);
325 }
326 
327 static inline void
328 accel_mlx5_task_fail(struct accel_mlx5_task *task, int rc)
329 {
330 	struct accel_mlx5_dev *dev = task->qp->dev;
331 	struct spdk_accel_task *next;
332 	struct spdk_accel_sequence *seq;
333 	bool driver_seq;
334 
335 	assert(task->num_reqs == task->num_completed_reqs);
336 	SPDK_DEBUGLOG(accel_mlx5, "Fail task %p, opc %d, rc %d\n", task, task->base.op_code, rc);
337 
338 	if (task->num_ops) {
339 		if (task->mlx5_opcode == ACCEL_MLX5_OPC_CRYPTO || task->mlx5_opcode == ACCEL_MLX5_OPC_CRYPTO_MKEY) {
340 			spdk_mlx5_mkey_pool_put_bulk(dev->crypto_mkeys, task->mkeys, task->num_ops);
341 		}
342 		if (task->mlx5_opcode == ACCEL_MLX5_OPC_CRC32C) {
343 			spdk_mlx5_mkey_pool_put_bulk(dev->sig_mkeys, task->mkeys, task->num_ops);
344 			spdk_mempool_put(dev->dev_ctx->psv_pool, task->psv);
345 		}
346 	}
347 	next = spdk_accel_sequence_next_task(&task->base);
348 	seq = task->base.seq;
349 	driver_seq = task->driver_seq;
350 
351 	assert(task->num_reqs == task->num_completed_reqs);
352 	SPDK_DEBUGLOG(accel_mlx5, "Fail task %p, opc %d, rc %d\n", task, task->mlx5_opcode, rc);
353 	spdk_accel_task_complete(&task->base, rc);
354 
355 	if (driver_seq) {
356 		struct spdk_io_channel *ch = task->qp->dev->ch;
357 
358 		assert(seq);
359 		if (next) {
360 			accel_mlx5_execute_sequence(ch, seq);
361 		} else {
362 			spdk_accel_sequence_continue(seq);
363 		}
364 	}
365 }
366 
367 static int
368 accel_mlx5_translate_addr(void *addr, size_t size, struct spdk_memory_domain *domain,
369 			  void *domain_ctx, struct accel_mlx5_dev *dev, struct ibv_sge *sge)
370 {
371 	struct spdk_rdma_utils_memory_translation map_translation;
372 	struct spdk_memory_domain_translation_result domain_translation;
373 	struct spdk_memory_domain_translation_ctx local_ctx;
374 	int rc;
375 
376 	if (domain) {
377 		domain_translation.size = sizeof(struct spdk_memory_domain_translation_result);
378 		local_ctx.size = sizeof(local_ctx);
379 		local_ctx.rdma.ibv_qp = dev->qp.verbs_qp;
380 		rc = spdk_memory_domain_translate_data(domain, domain_ctx, dev->dev_ctx->domain,
381 						       &local_ctx, addr, size, &domain_translation);
382 		if (spdk_unlikely(rc || domain_translation.iov_count != 1)) {
383 			SPDK_ERRLOG("Memory domain translation failed, addr %p, length %zu, iovcnt %u\n", addr, size,
384 				    domain_translation.iov_count);
385 			if (rc == 0) {
386 				rc = -EINVAL;
387 			}
388 
389 			return rc;
390 		}
391 		sge->lkey = domain_translation.rdma.lkey;
392 		sge->addr = (uint64_t) domain_translation.iov.iov_base;
393 		sge->length = domain_translation.iov.iov_len;
394 	} else {
395 		rc = spdk_rdma_utils_get_translation(dev->mmap, addr, size,
396 						     &map_translation);
397 		if (spdk_unlikely(rc)) {
398 			SPDK_ERRLOG("Memory translation failed, addr %p, length %zu\n", addr, size);
399 			return rc;
400 		}
401 		sge->lkey = spdk_rdma_utils_memory_translation_get_lkey(&map_translation);
402 		sge->addr = (uint64_t)addr;
403 		sge->length = size;
404 	}
405 
406 	return 0;
407 }
408 
409 static inline int
410 accel_mlx5_fill_block_sge(struct accel_mlx5_dev *dev, struct ibv_sge *sge,
411 			  struct accel_mlx5_iov_sgl *iovs, uint32_t len, uint32_t *_remaining,
412 			  struct spdk_memory_domain *domain, void *domain_ctx)
413 {
414 	void *addr;
415 	uint32_t remaining = len;
416 	uint32_t size;
417 	int i = 0;
418 	int rc;
419 
420 	while (remaining && i < (int)ACCEL_MLX5_MAX_SGE) {
421 		size = spdk_min(remaining, iovs->iov->iov_len - iovs->iov_offset);
422 		addr = (void *)iovs->iov->iov_base + iovs->iov_offset;
423 		rc = accel_mlx5_translate_addr(addr, size, domain, domain_ctx, dev, &sge[i]);
424 		if (spdk_unlikely(rc)) {
425 			return rc;
426 		}
427 		SPDK_DEBUGLOG(accel_mlx5, "\t sge[%d]: lkey %u, len %u, addr %"PRIx64"\n", i, sge[i].lkey,
428 			      sge[i].length, sge[i].addr);
429 		accel_mlx5_iov_sgl_advance(iovs, size);
430 		i++;
431 		assert(remaining >= size);
432 		remaining -= size;
433 	}
434 	*_remaining = remaining;
435 
436 	return i;
437 }
438 
439 static inline bool
440 accel_mlx5_compare_iovs(struct iovec *v1, struct iovec *v2, uint32_t iovcnt)
441 {
442 	return memcmp(v1, v2, sizeof(*v1) * iovcnt) == 0;
443 }
444 
445 static inline uint16_t
446 accel_mlx5_dev_get_available_slots(struct accel_mlx5_dev *dev, struct accel_mlx5_qp *qp)
447 {
448 	assert(qp->wrs_max >= qp->wrs_submitted);
449 	assert(dev->wrs_in_cq_max >= dev->wrs_in_cq);
450 
451 	/* Each time we produce only 1 CQE, so we need 1 CQ slot */
452 	if (spdk_unlikely(dev->wrs_in_cq == dev->wrs_in_cq_max)) {
453 		return 0;
454 	}
455 
456 	return qp->wrs_max - qp->wrs_submitted;
457 }
458 
459 static inline uint32_t
460 accel_mlx5_task_alloc_mkeys(struct accel_mlx5_task *task, struct spdk_mlx5_mkey_pool *pool)
461 {
462 	uint32_t num_ops;
463 	int rc;
464 
465 	assert(task->num_reqs > task->num_completed_reqs);
466 	num_ops = task->num_reqs - task->num_completed_reqs;
467 	num_ops = spdk_min(num_ops, ACCEL_MLX5_MAX_MKEYS_IN_TASK);
468 	if (!num_ops) {
469 		return 0;
470 	}
471 	rc = spdk_mlx5_mkey_pool_get_bulk(pool, task->mkeys, num_ops);
472 	if (spdk_unlikely(rc)) {
473 		return 0;
474 	}
475 	assert(num_ops <= UINT16_MAX);
476 	task->num_ops = num_ops;
477 
478 	return num_ops;
479 }
480 
481 static inline uint8_t
482 bs_to_bs_selector(uint32_t bs)
483 {
484 	switch (bs) {
485 	case 512:
486 		return SPDK_MLX5_BLOCK_SIZE_SELECTOR_512;
487 	case 520:
488 		return SPDK_MLX5_BLOCK_SIZE_SELECTOR_520;
489 	case 4096:
490 		return SPDK_MLX5_BLOCK_SIZE_SELECTOR_4096;
491 	case 4160:
492 		return SPDK_MLX5_BLOCK_SIZE_SELECTOR_4160;
493 	default:
494 		return SPDK_MLX5_BLOCK_SIZE_SELECTOR_RESERVED;
495 	}
496 }
497 
498 static inline int
499 accel_mlx5_configure_crypto_umr(struct accel_mlx5_task *mlx5_task, struct accel_mlx5_sge *sge,
500 				uint32_t mkey, uint32_t num_blocks, struct spdk_mlx5_crypto_dek_data *dek_data,
501 				uint64_t wr_id, uint32_t flags)
502 {
503 	struct spdk_mlx5_umr_crypto_attr cattr;
504 	struct spdk_mlx5_umr_attr umr_attr;
505 	struct accel_mlx5_qp *qp = mlx5_task->qp;
506 	struct accel_mlx5_dev *dev = qp->dev;
507 	struct spdk_accel_task *task = &mlx5_task->base;
508 	uint32_t length, remaining = 0, block_size = task->block_size;
509 	int rc;
510 
511 	length = num_blocks * block_size;
512 	SPDK_DEBUGLOG(accel_mlx5, "task %p, domain %p, len %u, blocks %u\n", task, task->src_domain, length,
513 		      num_blocks);
514 	rc = accel_mlx5_fill_block_sge(dev, sge->src_sge, &mlx5_task->src,  length, &remaining,
515 				       task->src_domain, task->src_domain_ctx);
516 	if (spdk_unlikely(rc <= 0)) {
517 		if (rc == 0) {
518 			rc = -EINVAL;
519 		}
520 		SPDK_ERRLOG("failed set src sge, rc %d\n", rc);
521 		return rc;
522 	}
523 	sge->src_sge_count = rc;
524 	if (spdk_unlikely(remaining)) {
525 		uint32_t new_len = length - remaining;
526 		uint32_t aligned_len, updated_num_blocks;
527 
528 		SPDK_DEBUGLOG(accel_mlx5, "Incorrect src iovs, handled %u out of %u bytes\n", new_len, length);
529 		if (new_len < block_size) {
530 			/* We need to process at least 1 block. If buffer is too fragmented, we can't do
531 			 * anything */
532 			return -ERANGE;
533 		}
534 
535 		/* Regular integer division, we need to round down to prev block size */
536 		updated_num_blocks = new_len / block_size;
537 		assert(updated_num_blocks);
538 		assert(updated_num_blocks < num_blocks);
539 		aligned_len = updated_num_blocks * block_size;
540 
541 		if (aligned_len < new_len) {
542 			uint32_t dt = new_len - aligned_len;
543 
544 			/* We can't process part of block, need to unwind src iov_sgl and sge to the
545 			 * prev block boundary */
546 			SPDK_DEBUGLOG(accel_mlx5, "task %p, unwind src sge for %u bytes\n", task, dt);
547 			accel_mlx5_iov_sgl_unwind(&mlx5_task->src, task->s.iovcnt, dt);
548 			sge->src_sge_count = accel_mlx5_sge_unwind(sge->src_sge, sge->src_sge_count, dt);
549 			if (!sge->src_sge_count) {
550 				return -ERANGE;
551 			}
552 		}
553 		SPDK_DEBUGLOG(accel_mlx5, "task %p, UMR len %u -> %u\n", task, length, aligned_len);
554 		length = aligned_len;
555 		num_blocks = updated_num_blocks;
556 	}
557 
558 	cattr.xts_iv = task->iv + mlx5_task->num_processed_blocks;
559 	cattr.keytag = 0;
560 	cattr.dek_obj_id = dek_data->dek_obj_id;
561 	cattr.tweak_mode = dek_data->tweak_mode;
562 	cattr.enc_order = mlx5_task->enc_order;
563 	cattr.bs_selector = bs_to_bs_selector(mlx5_task->base.block_size);
564 	if (spdk_unlikely(cattr.bs_selector == SPDK_MLX5_BLOCK_SIZE_SELECTOR_RESERVED)) {
565 		SPDK_ERRLOG("unsupported block size %u\n", mlx5_task->base.block_size);
566 		return -EINVAL;
567 	}
568 	umr_attr.mkey = mkey;
569 	umr_attr.sge = sge->src_sge;
570 
571 	if (!mlx5_task->inplace) {
572 		SPDK_DEBUGLOG(accel_mlx5, "task %p, dst sge, domain %p, len %u\n", task, task->dst_domain, length);
573 		rc = accel_mlx5_fill_block_sge(dev, sge->dst_sge, &mlx5_task->dst, length, &remaining,
574 					       task->dst_domain, task->dst_domain_ctx);
575 		if (spdk_unlikely(rc <= 0)) {
576 			if (rc == 0) {
577 				rc = -EINVAL;
578 			}
579 			SPDK_ERRLOG("failed set dst sge, rc %d\n", rc);
580 			return rc;
581 		}
582 		sge->dst_sge_count = rc;
583 		if (spdk_unlikely(remaining)) {
584 			uint32_t new_len = length - remaining;
585 			uint32_t aligned_len, updated_num_blocks, dt;
586 
587 			SPDK_DEBUGLOG(accel_mlx5, "Incorrect dst iovs, handled %u out of %u bytes\n", new_len, length);
588 			if (new_len < block_size) {
589 				/* We need to process at least 1 block. If buffer is too fragmented, we can't do
590 				 * anything */
591 				return -ERANGE;
592 			}
593 
594 			/* Regular integer division, we need to round down to prev block size */
595 			updated_num_blocks = new_len / block_size;
596 			assert(updated_num_blocks);
597 			assert(updated_num_blocks < num_blocks);
598 			aligned_len = updated_num_blocks * block_size;
599 
600 			if (aligned_len < new_len) {
601 				dt = new_len - aligned_len;
602 				assert(dt > 0 && dt < length);
603 				/* We can't process part of block, need to unwind src and dst iov_sgl and sge to the
604 				 * prev block boundary */
605 				SPDK_DEBUGLOG(accel_mlx5, "task %p, unwind dst sge for %u bytes\n", task, dt);
606 				accel_mlx5_iov_sgl_unwind(&mlx5_task->dst, task->d.iovcnt, dt);
607 				sge->dst_sge_count = accel_mlx5_sge_unwind(sge->dst_sge, sge->dst_sge_count, dt);
608 				assert(sge->dst_sge_count > 0 && sge->dst_sge_count <= ACCEL_MLX5_MAX_SGE);
609 				if (!sge->dst_sge_count) {
610 					return -ERANGE;
611 				}
612 			}
613 			assert(length > aligned_len);
614 			dt = length - aligned_len;
615 			SPDK_DEBUGLOG(accel_mlx5, "task %p, unwind src sge for %u bytes\n", task, dt);
616 			/* The same for src iov_sgl and sge. In worst case we can unwind SRC 2 times */
617 			accel_mlx5_iov_sgl_unwind(&mlx5_task->src, task->s.iovcnt, dt);
618 			sge->src_sge_count = accel_mlx5_sge_unwind(sge->src_sge, sge->src_sge_count, dt);
619 			assert(sge->src_sge_count > 0 && sge->src_sge_count <= ACCEL_MLX5_MAX_SGE);
620 			if (!sge->src_sge_count) {
621 				return -ERANGE;
622 			}
623 			SPDK_DEBUGLOG(accel_mlx5, "task %p, UMR len %u -> %u\n", task, length, aligned_len);
624 			length = aligned_len;
625 			num_blocks = updated_num_blocks;
626 		}
627 	}
628 
629 	SPDK_DEBUGLOG(accel_mlx5,
630 		      "task %p: bs %u, iv %"PRIu64", enc_on_tx %d, tweak_mode %d, len %u, mkey %x, blocks %u\n",
631 		      mlx5_task, task->block_size, cattr.xts_iv, mlx5_task->enc_order, cattr.tweak_mode, length, mkey,
632 		      num_blocks);
633 
634 	umr_attr.sge_count = sge->src_sge_count;
635 	umr_attr.umr_len = length;
636 	assert((uint32_t)mlx5_task->num_processed_blocks + num_blocks <= UINT16_MAX);
637 	mlx5_task->num_processed_blocks += num_blocks;
638 
639 	rc = spdk_mlx5_umr_configure_crypto(qp->qp, &umr_attr, &cattr, wr_id, flags);
640 
641 	return rc;
642 }
643 
644 static inline int
645 accel_mlx5_crypto_task_process(struct accel_mlx5_task *mlx5_task)
646 {
647 	struct accel_mlx5_sge sges[ACCEL_MLX5_MAX_MKEYS_IN_TASK];
648 	struct spdk_mlx5_crypto_dek_data dek_data;
649 	struct accel_mlx5_qp *qp = mlx5_task->qp;
650 	struct accel_mlx5_dev *dev = qp->dev;
651 	/* First RDMA after UMR must have a SMALL_FENCE */
652 	uint32_t first_rdma_fence = SPDK_MLX5_WQE_CTRL_INITIATOR_SMALL_FENCE;
653 	uint16_t num_blocks;
654 	uint16_t num_ops = spdk_min(mlx5_task->num_reqs - mlx5_task->num_completed_reqs,
655 				    mlx5_task->num_ops);
656 	uint16_t qp_slot = accel_mlx5_dev_get_available_slots(dev, qp);
657 	uint16_t i;
658 	int rc;
659 
660 	assert(qp_slot > 1);
661 	num_ops = spdk_min(num_ops, qp_slot >> 1);
662 	if (spdk_unlikely(!num_ops)) {
663 		return -EINVAL;
664 	}
665 
666 	rc = spdk_mlx5_crypto_get_dek_data(mlx5_task->base.crypto_key->priv, dev->dev_ctx->pd, &dek_data);
667 	if (spdk_unlikely(rc)) {
668 		return rc;
669 	}
670 
671 	mlx5_task->num_wrs = 0;
672 	SPDK_DEBUGLOG(accel_mlx5, "begin, task, %p, reqs: total %u, submitted %u, completed %u\n",
673 		      mlx5_task, mlx5_task->num_reqs, mlx5_task->num_submitted_reqs, mlx5_task->num_completed_reqs);
674 	for (i = 0; i < num_ops; i++) {
675 		if (mlx5_task->num_submitted_reqs + i + 1 == mlx5_task->num_reqs) {
676 			/* Last request may consume less than calculated if crypto_multi_block is true */
677 			assert(mlx5_task->num_blocks > mlx5_task->num_submitted_reqs);
678 			num_blocks = mlx5_task->num_blocks - mlx5_task->num_processed_blocks;
679 		} else {
680 			num_blocks = mlx5_task->blocks_per_req;
681 		}
682 
683 		rc = accel_mlx5_configure_crypto_umr(mlx5_task, &sges[i], mlx5_task->mkeys[i]->mkey, num_blocks,
684 						     &dek_data, 0, 0);
685 		if (spdk_unlikely(rc)) {
686 			SPDK_ERRLOG("UMR configure failed with %d\n", rc);
687 			return rc;
688 		}
689 		ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED(qp, mlx5_task);
690 		dev->stats.crypto_umrs++;
691 	}
692 
693 	/* Loop `num_ops - 1` for easy flags handling */
694 	for (i = 0; i < num_ops - 1; i++) {
695 		/* UMR is used as a destination for RDMA_READ - from UMR to sge */
696 		if (mlx5_task->inplace) {
697 			rc = spdk_mlx5_qp_rdma_read(qp->qp, sges[i].src_sge, sges[i].src_sge_count, 0,
698 						    mlx5_task->mkeys[i]->mkey, 0, first_rdma_fence);
699 		} else {
700 			rc = spdk_mlx5_qp_rdma_read(qp->qp, sges[i].dst_sge, sges[i].dst_sge_count, 0,
701 						    mlx5_task->mkeys[i]->mkey, 0, first_rdma_fence);
702 		}
703 		if (spdk_unlikely(rc)) {
704 			SPDK_ERRLOG("RDMA READ/WRITE failed with %d\n", rc);
705 			return rc;
706 		}
707 
708 		first_rdma_fence = 0;
709 		assert(mlx5_task->num_submitted_reqs < mlx5_task->num_reqs);
710 		assert(mlx5_task->num_submitted_reqs < UINT16_MAX);
711 		mlx5_task->num_submitted_reqs++;
712 		ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED(qp, mlx5_task);
713 		dev->stats.rdma_reads++;
714 	}
715 
716 	if (mlx5_task->inplace) {
717 		rc = spdk_mlx5_qp_rdma_read(qp->qp, sges[i].src_sge, sges[i].src_sge_count, 0,
718 					    mlx5_task->mkeys[i]->mkey, (uint64_t)mlx5_task, first_rdma_fence | SPDK_MLX5_WQE_CTRL_CE_CQ_UPDATE);
719 	} else {
720 		rc = spdk_mlx5_qp_rdma_read(qp->qp, sges[i].dst_sge, sges[i].dst_sge_count, 0,
721 					    mlx5_task->mkeys[i]->mkey, (uint64_t)mlx5_task, first_rdma_fence | SPDK_MLX5_WQE_CTRL_CE_CQ_UPDATE);
722 	}
723 	if (spdk_unlikely(rc)) {
724 		SPDK_ERRLOG("RDMA READ/WRITE failed with %d\n", rc);
725 		return rc;
726 	}
727 
728 	assert(mlx5_task->num_submitted_reqs < mlx5_task->num_reqs);
729 	assert(mlx5_task->num_submitted_reqs < UINT16_MAX);
730 	mlx5_task->num_submitted_reqs++;
731 	ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED_SIGNALED(dev, qp, mlx5_task);
732 	dev->stats.rdma_reads++;
733 	STAILQ_INSERT_TAIL(&qp->in_hw, mlx5_task, link);
734 
735 	if (spdk_unlikely(mlx5_task->num_submitted_reqs == mlx5_task->num_reqs &&
736 			  mlx5_task->num_blocks > mlx5_task->num_processed_blocks)) {
737 		/* We hit "out of sge
738 		 * entries" case with highly fragmented payload. In that case
739 		 * accel_mlx5_configure_crypto_umr function handled fewer data blocks than expected
740 		 * That means we need at least 1 more request to complete this task, this request will be
741 		 * executed once all submitted ones are completed */
742 		SPDK_DEBUGLOG(accel_mlx5, "task %p, processed %u/%u blocks, add extra req\n", mlx5_task,
743 			      mlx5_task->num_processed_blocks, mlx5_task->num_blocks);
744 		mlx5_task->num_reqs++;
745 	}
746 
747 	SPDK_DEBUGLOG(accel_mlx5, "end, task, %p, reqs: total %u, submitted %u, completed %u\n", mlx5_task,
748 		      mlx5_task->num_reqs, mlx5_task->num_submitted_reqs, mlx5_task->num_completed_reqs);
749 
750 	return 0;
751 }
752 
753 static inline int
754 accel_mlx5_crypto_task_continue(struct accel_mlx5_task *task)
755 {
756 	struct accel_mlx5_qp *qp = task->qp;
757 	struct accel_mlx5_dev *dev = qp->dev;
758 	uint16_t qp_slot = accel_mlx5_dev_get_available_slots(dev, qp);
759 
760 	assert(task->num_reqs > task->num_completed_reqs);
761 	if (task->num_ops == 0) {
762 		/* No mkeys allocated, try to allocate now */
763 		if (spdk_unlikely(!accel_mlx5_task_alloc_mkeys(task, dev->crypto_mkeys))) {
764 			/* Pool is empty, queue this task */
765 			STAILQ_INSERT_TAIL(&dev->nomem, task, link);
766 			dev->stats.nomem_mkey++;
767 			return -ENOMEM;
768 		}
769 	}
770 	/* We need to post at least 1 UMR and 1 RDMA operation */
771 	if (spdk_unlikely(qp_slot < 2)) {
772 		/* QP is full, queue this task */
773 		STAILQ_INSERT_TAIL(&dev->nomem, task, link);
774 		task->qp->dev->stats.nomem_qdepth++;
775 		return -ENOMEM;
776 	}
777 
778 	return accel_mlx5_crypto_task_process(task);
779 }
780 
781 static inline int
782 accel_mlx5_crypto_task_init(struct accel_mlx5_task *mlx5_task)
783 {
784 	struct spdk_accel_task *task = &mlx5_task->base;
785 	struct accel_mlx5_dev *dev = mlx5_task->qp->dev;
786 	uint64_t src_nbytes = task->nbytes;
787 #ifdef DEBUG
788 	uint64_t dst_nbytes;
789 	uint32_t i;
790 #endif
791 	bool crypto_key_ok;
792 
793 	crypto_key_ok = (task->crypto_key && task->crypto_key->module_if == &g_accel_mlx5.module &&
794 			 task->crypto_key->priv);
795 	if (spdk_unlikely((task->nbytes % mlx5_task->base.block_size != 0) || !crypto_key_ok)) {
796 		if (crypto_key_ok) {
797 			SPDK_ERRLOG("src length %"PRIu64" is not a multiple of the block size %u\n", task->nbytes,
798 				    mlx5_task->base.block_size);
799 		} else {
800 			SPDK_ERRLOG("Wrong crypto key provided\n");
801 		}
802 		return -EINVAL;
803 	}
804 
805 	assert(src_nbytes / mlx5_task->base.block_size <= UINT16_MAX);
806 	mlx5_task->num_blocks = src_nbytes / mlx5_task->base.block_size;
807 	accel_mlx5_iov_sgl_init(&mlx5_task->src, task->s.iovs, task->s.iovcnt);
808 	if (task->d.iovcnt == 0 || (task->d.iovcnt == task->s.iovcnt &&
809 				    accel_mlx5_compare_iovs(task->d.iovs, task->s.iovs, task->s.iovcnt))) {
810 		mlx5_task->inplace = 1;
811 	} else {
812 #ifdef DEBUG
813 		dst_nbytes = 0;
814 		for (i = 0; i < task->d.iovcnt; i++) {
815 			dst_nbytes += task->d.iovs[i].iov_len;
816 		}
817 
818 		if (spdk_unlikely(src_nbytes != dst_nbytes)) {
819 			return -EINVAL;
820 		}
821 #endif
822 		mlx5_task->inplace = 0;
823 		accel_mlx5_iov_sgl_init(&mlx5_task->dst, task->d.iovs, task->d.iovcnt);
824 	}
825 
826 	if (dev->crypto_multi_block) {
827 		if (dev->crypto_split_blocks) {
828 			assert(SPDK_CEIL_DIV(mlx5_task->num_blocks, dev->crypto_split_blocks) <= UINT16_MAX);
829 			mlx5_task->num_reqs = SPDK_CEIL_DIV(mlx5_task->num_blocks, dev->crypto_split_blocks);
830 			/* Last req may consume less blocks */
831 			mlx5_task->blocks_per_req = spdk_min(mlx5_task->num_blocks, dev->crypto_split_blocks);
832 		} else {
833 			if (task->s.iovcnt > ACCEL_MLX5_MAX_SGE || task->d.iovcnt > ACCEL_MLX5_MAX_SGE) {
834 				uint32_t max_sge_count = spdk_max(task->s.iovcnt, task->d.iovcnt);
835 
836 				assert(SPDK_CEIL_DIV(max_sge_count, ACCEL_MLX5_MAX_SGE) <= UINT16_MAX);
837 				mlx5_task->num_reqs = SPDK_CEIL_DIV(max_sge_count, ACCEL_MLX5_MAX_SGE);
838 				mlx5_task->blocks_per_req = SPDK_CEIL_DIV(mlx5_task->num_blocks, mlx5_task->num_reqs);
839 			} else {
840 				mlx5_task->num_reqs = 1;
841 				mlx5_task->blocks_per_req = mlx5_task->num_blocks;
842 			}
843 		}
844 	} else {
845 		mlx5_task->num_reqs = mlx5_task->num_blocks;
846 		mlx5_task->blocks_per_req = 1;
847 	}
848 
849 	if (spdk_unlikely(!accel_mlx5_task_alloc_mkeys(mlx5_task, dev->crypto_mkeys))) {
850 		/* Pool is empty, queue this task */
851 		SPDK_DEBUGLOG(accel_mlx5, "no reqs in pool, dev %s\n", dev->dev_ctx->context->device->name);
852 		dev->stats.nomem_mkey++;
853 		return -ENOMEM;
854 	}
855 	if (spdk_unlikely(accel_mlx5_dev_get_available_slots(dev, &dev->qp) < 2)) {
856 		/* Queue is full, queue this task */
857 		SPDK_DEBUGLOG(accel_mlx5, "dev %s qp %p is full\n", dev->dev_ctx->context->device->name,
858 			      mlx5_task->qp);
859 		dev->stats.nomem_qdepth++;
860 		return -ENOMEM;
861 	}
862 
863 	SPDK_DEBUGLOG(accel_mlx5, "task %p, src_iovs %u, dst_iovs %u, num_reqs %u, "
864 		      "blocks/req %u, blocks %u, inplace %d\n", task, task->s.iovcnt, task->d.iovcnt,
865 		      mlx5_task->num_reqs, mlx5_task->blocks_per_req, mlx5_task->num_blocks, mlx5_task->inplace);
866 
867 	return 0;
868 }
869 
870 static inline void
871 accel_mlx5_copy_task_complete(struct accel_mlx5_task *mlx5_task)
872 {
873 	spdk_accel_task_complete(&mlx5_task->base, 0);
874 }
875 
876 static inline int
877 accel_mlx5_copy_task_process_one(struct accel_mlx5_task *mlx5_task, struct accel_mlx5_qp *qp,
878 				 uint64_t wrid, uint32_t fence)
879 {
880 	struct spdk_accel_task *task = &mlx5_task->base;
881 	struct accel_mlx5_sge sge;
882 	uint32_t remaining = 0;
883 	uint32_t dst_len;
884 	int rc;
885 
886 	/* Limit one RDMA_WRITE by length of dst buffer. Not all src buffers may fit into one dst buffer due to
887 	 * limitation on ACCEL_MLX5_MAX_SGE. If this is the case then remaining is not zero */
888 	assert(mlx5_task->dst.iov->iov_len > mlx5_task->dst.iov_offset);
889 	dst_len = mlx5_task->dst.iov->iov_len - mlx5_task->dst.iov_offset;
890 	rc = accel_mlx5_fill_block_sge(qp->dev, sge.src_sge, &mlx5_task->src, dst_len, &remaining,
891 				       task->src_domain, task->src_domain_ctx);
892 	if (spdk_unlikely(rc <= 0)) {
893 		if (rc == 0) {
894 			rc = -EINVAL;
895 		}
896 		SPDK_ERRLOG("failed set src sge, rc %d\n", rc);
897 		return rc;
898 	}
899 	sge.src_sge_count = rc;
900 	assert(dst_len > remaining);
901 	dst_len -= remaining;
902 
903 	rc = accel_mlx5_fill_block_sge(qp->dev, sge.dst_sge, &mlx5_task->dst, dst_len,  &remaining,
904 				       task->dst_domain, task->dst_domain_ctx);
905 	if (spdk_unlikely(rc != 1)) {
906 		/* We use single dst entry, any result other than 1 is an error */
907 		if (rc == 0) {
908 			rc = -EINVAL;
909 		}
910 		SPDK_ERRLOG("failed set dst sge, rc %d\n", rc);
911 		return rc;
912 	}
913 	if (spdk_unlikely(remaining)) {
914 		SPDK_ERRLOG("Incorrect dst length, remaining %u\n", remaining);
915 		assert(0);
916 		return -EINVAL;
917 	}
918 
919 	rc = spdk_mlx5_qp_rdma_write(mlx5_task->qp->qp, sge.src_sge, sge.src_sge_count,
920 				     sge.dst_sge[0].addr, sge.dst_sge[0].lkey, wrid, fence);
921 	if (spdk_unlikely(rc)) {
922 		SPDK_ERRLOG("new RDMA WRITE failed with %d\n", rc);
923 		return rc;
924 	}
925 	qp->dev->stats.rdma_writes++;
926 
927 	return 0;
928 }
929 
930 static inline int
931 accel_mlx5_copy_task_process(struct accel_mlx5_task *mlx5_task)
932 {
933 
934 	struct accel_mlx5_qp *qp = mlx5_task->qp;
935 	struct accel_mlx5_dev *dev = qp->dev;
936 	uint16_t i;
937 	int rc;
938 
939 	mlx5_task->num_wrs = 0;
940 	assert(mlx5_task->num_reqs > 0);
941 	assert(mlx5_task->num_ops > 0);
942 
943 	/* Handle n-1 reqs in order to simplify wrid and fence handling */
944 	for (i = 0; i < mlx5_task->num_ops - 1; i++) {
945 		rc = accel_mlx5_copy_task_process_one(mlx5_task, qp, 0, 0);
946 		if (spdk_unlikely(rc)) {
947 			return rc;
948 		}
949 		ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED(qp, mlx5_task);
950 		mlx5_task->num_submitted_reqs++;
951 	}
952 
953 	rc = accel_mlx5_copy_task_process_one(mlx5_task, qp, (uint64_t)mlx5_task,
954 					      SPDK_MLX5_WQE_CTRL_CE_CQ_UPDATE);
955 	if (spdk_unlikely(rc)) {
956 		return rc;
957 	}
958 	ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED_SIGNALED(dev, qp, mlx5_task);
959 	mlx5_task->num_submitted_reqs++;
960 	STAILQ_INSERT_TAIL(&qp->in_hw, mlx5_task, link);
961 
962 	SPDK_DEBUGLOG(accel_mlx5, "end, copy task, %p\n", mlx5_task);
963 
964 	return 0;
965 }
966 
967 static inline int
968 accel_mlx5_copy_task_continue(struct accel_mlx5_task *task)
969 {
970 	struct accel_mlx5_qp *qp = task->qp;
971 	struct accel_mlx5_dev *dev = qp->dev;
972 	uint16_t qp_slot = accel_mlx5_dev_get_available_slots(dev, qp);
973 
974 	task->num_ops = spdk_min(qp_slot, task->num_reqs - task->num_completed_reqs);
975 	if (spdk_unlikely(task->num_ops == 0)) {
976 		STAILQ_INSERT_TAIL(&dev->nomem, task, link);
977 		dev->stats.nomem_qdepth++;
978 		return -ENOMEM;
979 	}
980 	return accel_mlx5_copy_task_process(task);
981 }
982 
983 static inline uint32_t
984 accel_mlx5_get_copy_task_count(struct iovec *src_iov, uint32_t src_iovcnt,
985 			       struct iovec *dst_iov, uint32_t dst_iovcnt)
986 {
987 	uint32_t src = 0;
988 	uint32_t dst = 0;
989 	uint64_t src_offset = 0;
990 	uint64_t dst_offset = 0;
991 	uint32_t num_ops = 0;
992 	uint32_t src_sge_count = 0;
993 
994 	while (src < src_iovcnt && dst < dst_iovcnt) {
995 		uint64_t src_len = src_iov[src].iov_len - src_offset;
996 		uint64_t dst_len = dst_iov[dst].iov_len - dst_offset;
997 
998 		if (dst_len < src_len) {
999 			dst_offset = 0;
1000 			src_offset += dst_len;
1001 			dst++;
1002 			num_ops++;
1003 			src_sge_count = 0;
1004 		} else if (src_len < dst_len) {
1005 			dst_offset += src_len;
1006 			src_offset = 0;
1007 			src++;
1008 			if (++src_sge_count >= ACCEL_MLX5_MAX_SGE) {
1009 				num_ops++;
1010 				src_sge_count = 0;
1011 			}
1012 		} else {
1013 			dst_offset = 0;
1014 			src_offset = 0;
1015 			dst++;
1016 			src++;
1017 			num_ops++;
1018 			src_sge_count = 0;
1019 		}
1020 	}
1021 
1022 	assert(src == src_iovcnt);
1023 	assert(dst == dst_iovcnt);
1024 	assert(src_offset == 0);
1025 	assert(dst_offset == 0);
1026 	return num_ops;
1027 }
1028 
1029 static inline int
1030 accel_mlx5_copy_task_init(struct accel_mlx5_task *mlx5_task)
1031 {
1032 	struct spdk_accel_task *task = &mlx5_task->base;
1033 	struct accel_mlx5_qp *qp = mlx5_task->qp;
1034 	uint16_t qp_slot = accel_mlx5_dev_get_available_slots(qp->dev, qp);
1035 
1036 	if (spdk_likely(task->s.iovcnt <= ACCEL_MLX5_MAX_SGE)) {
1037 		mlx5_task->num_reqs = task->d.iovcnt;
1038 	} else if (task->d.iovcnt == 1) {
1039 		mlx5_task->num_reqs = SPDK_CEIL_DIV(task->s.iovcnt, ACCEL_MLX5_MAX_SGE);
1040 	} else {
1041 		mlx5_task->num_reqs = accel_mlx5_get_copy_task_count(task->s.iovs, task->s.iovcnt,
1042 				      task->d.iovs, task->d.iovcnt);
1043 	}
1044 	mlx5_task->inplace = 0;
1045 	accel_mlx5_iov_sgl_init(&mlx5_task->src, task->s.iovs, task->s.iovcnt);
1046 	accel_mlx5_iov_sgl_init(&mlx5_task->dst, task->d.iovs, task->d.iovcnt);
1047 	mlx5_task->num_ops = spdk_min(qp_slot, mlx5_task->num_reqs);
1048 	if (spdk_unlikely(!mlx5_task->num_ops)) {
1049 		qp->dev->stats.nomem_qdepth++;
1050 		return -ENOMEM;
1051 	}
1052 	SPDK_DEBUGLOG(accel_mlx5, "copy task num_reqs %u, num_ops %u\n", mlx5_task->num_reqs,
1053 		      mlx5_task->num_ops);
1054 
1055 	return 0;
1056 }
1057 
1058 static inline uint32_t
1059 accel_mlx5_advance_iovec(struct iovec *iov, uint32_t iovcnt, size_t *iov_offset, size_t *len)
1060 {
1061 	uint32_t i;
1062 	size_t iov_len;
1063 
1064 	for (i = 0; *len != 0 && i < iovcnt; i++) {
1065 		iov_len = iov[i].iov_len - *iov_offset;
1066 
1067 		if (iov_len < *len) {
1068 			*iov_offset = 0;
1069 			*len -= iov_len;
1070 			continue;
1071 		}
1072 		if (iov_len == *len) {
1073 			*iov_offset = 0;
1074 			i++;
1075 		} else { /* iov_len > *len */
1076 			*iov_offset += *len;
1077 		}
1078 		*len = 0;
1079 		break;
1080 	}
1081 
1082 	return i;
1083 }
1084 
1085 static inline void
1086 accel_mlx5_crc_task_complete(struct accel_mlx5_task *mlx5_task)
1087 {
1088 	struct accel_mlx5_dev *dev = mlx5_task->qp->dev;
1089 
1090 	*mlx5_task->base.crc_dst = mlx5_task->psv->crc ^ UINT32_MAX;
1091 	/* Normal task completion without allocated mkeys is not possible */
1092 	assert(mlx5_task->num_ops);
1093 	spdk_mlx5_mkey_pool_put_bulk(dev->sig_mkeys, mlx5_task->mkeys, mlx5_task->num_ops);
1094 	spdk_mempool_put(dev->dev_ctx->psv_pool, mlx5_task->psv);
1095 	spdk_accel_task_complete(&mlx5_task->base, 0);
1096 }
1097 
1098 static inline int
1099 accel_mlx5_crc_task_configure_umr(struct accel_mlx5_task *mlx5_task, struct ibv_sge *sge,
1100 				  uint32_t sge_count, struct spdk_mlx5_mkey_pool_obj *mkey,
1101 				  enum spdk_mlx5_umr_sig_domain sig_domain, uint32_t umr_len,
1102 				  bool sig_init, bool sig_check_gen)
1103 {
1104 	struct spdk_mlx5_umr_sig_attr sattr = {
1105 		.seed = mlx5_task->base.seed ^ UINT32_MAX,
1106 		.psv_index = mlx5_task->psv->psv_index,
1107 		.domain = sig_domain,
1108 		.sigerr_count = mkey->sig.sigerr_count,
1109 		.raw_data_size = umr_len,
1110 		.init = sig_init,
1111 		.check_gen = sig_check_gen,
1112 	};
1113 	struct spdk_mlx5_umr_attr umr_attr = {
1114 		.mkey = mkey->mkey,
1115 		.umr_len = umr_len,
1116 		.sge_count = sge_count,
1117 		.sge = sge,
1118 	};
1119 
1120 	return spdk_mlx5_umr_configure_sig(mlx5_task->qp->qp, &umr_attr, &sattr, 0, 0);
1121 }
1122 
1123 static inline int
1124 accel_mlx5_crc_task_fill_sge(struct accel_mlx5_task *mlx5_task, struct accel_mlx5_sge *sge)
1125 {
1126 	struct spdk_accel_task *task = &mlx5_task->base;
1127 	struct accel_mlx5_qp *qp = mlx5_task->qp;
1128 	struct accel_mlx5_dev *dev = qp->dev;
1129 	uint32_t remaining;
1130 	int rc;
1131 
1132 	rc = accel_mlx5_fill_block_sge(dev, sge->src_sge, &mlx5_task->src, task->nbytes, &remaining,
1133 				       task->src_domain, task->src_domain_ctx);
1134 	if (spdk_unlikely(rc <= 0)) {
1135 		if (rc == 0) {
1136 			rc = -EINVAL;
1137 		}
1138 		SPDK_ERRLOG("failed set src sge, rc %d\n", rc);
1139 		return rc;
1140 	}
1141 	assert(remaining == 0);
1142 	sge->src_sge_count = rc;
1143 
1144 	if (!mlx5_task->inplace) {
1145 		rc = accel_mlx5_fill_block_sge(dev, sge->dst_sge, &mlx5_task->dst, task->nbytes, &remaining,
1146 					       task->dst_domain, task->dst_domain_ctx);
1147 		if (spdk_unlikely(rc <= 0)) {
1148 			if (rc == 0) {
1149 				rc = -EINVAL;
1150 			}
1151 			SPDK_ERRLOG("failed set dst sge, rc %d\n", rc);
1152 			return rc;
1153 		}
1154 		assert(remaining == 0);
1155 		sge->dst_sge_count = rc;
1156 	}
1157 
1158 	return 0;
1159 }
1160 
1161 static inline int
1162 accel_mlx5_crc_task_process_one_req(struct accel_mlx5_task *mlx5_task)
1163 {
1164 	struct accel_mlx5_sge sges;
1165 	struct accel_mlx5_qp *qp = mlx5_task->qp;
1166 	struct accel_mlx5_dev *dev = qp->dev;
1167 	uint32_t num_ops = spdk_min(mlx5_task->num_reqs - mlx5_task->num_completed_reqs,
1168 				    mlx5_task->num_ops);
1169 	uint16_t qp_slot = accel_mlx5_dev_get_available_slots(dev, qp);
1170 	uint32_t rdma_fence = SPDK_MLX5_WQE_CTRL_STRONG_ORDERING;
1171 	struct ibv_sge *sge;
1172 	int rc;
1173 	uint16_t sge_count;
1174 
1175 	num_ops = spdk_min(num_ops, qp_slot >> 1);
1176 	if (spdk_unlikely(!num_ops)) {
1177 		return -EINVAL;
1178 	}
1179 
1180 	mlx5_task->num_wrs = 0;
1181 	/* At this moment we have as many requests as can be submitted to a qp */
1182 	rc = accel_mlx5_crc_task_fill_sge(mlx5_task, &sges);
1183 	if (spdk_unlikely(rc)) {
1184 		return rc;
1185 	}
1186 	rc = accel_mlx5_crc_task_configure_umr(mlx5_task, sges.src_sge, sges.src_sge_count,
1187 					       mlx5_task->mkeys[0], SPDK_MLX5_UMR_SIG_DOMAIN_WIRE, mlx5_task->base.nbytes, true, true);
1188 	if (spdk_unlikely(rc)) {
1189 		SPDK_ERRLOG("UMR configure failed with %d\n", rc);
1190 		return rc;
1191 	}
1192 	ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED(qp, mlx5_task);
1193 	dev->stats.sig_umrs++;
1194 
1195 	if (mlx5_task->inplace) {
1196 		sge = sges.src_sge;
1197 		sge_count = sges.src_sge_count;
1198 	} else {
1199 		sge = sges.dst_sge;
1200 		sge_count = sges.dst_sge_count;
1201 	}
1202 
1203 	/*
1204 	 * Add the crc destination to the end of sges. A free entry must be available for CRC
1205 	 * because the task init function reserved it.
1206 	 */
1207 	assert(sge_count < ACCEL_MLX5_MAX_SGE);
1208 	sge[sge_count].lkey = mlx5_task->psv->crc_lkey;
1209 	sge[sge_count].addr = (uintptr_t)&mlx5_task->psv->crc;
1210 	sge[sge_count++].length = sizeof(uint32_t);
1211 
1212 	if (spdk_unlikely(mlx5_task->psv->bits.error)) {
1213 		rc = spdk_mlx5_qp_set_psv(qp->qp, mlx5_task->psv->psv_index, *mlx5_task->base.crc_dst, 0, 0);
1214 		if (spdk_unlikely(rc)) {
1215 			SPDK_ERRLOG("SET_PSV failed with %d\n", rc);
1216 			return rc;
1217 		}
1218 		ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED(qp, mlx5_task);
1219 	}
1220 
1221 	rc = spdk_mlx5_qp_rdma_read(qp->qp, sge, sge_count, 0, mlx5_task->mkeys[0]->mkey,
1222 				    (uint64_t)mlx5_task, rdma_fence | SPDK_MLX5_WQE_CTRL_CE_CQ_UPDATE);
1223 	if (spdk_unlikely(rc)) {
1224 		SPDK_ERRLOG("RDMA READ/WRITE failed with %d\n", rc);
1225 		return rc;
1226 	}
1227 	mlx5_task->num_submitted_reqs++;
1228 	ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED_SIGNALED(dev, qp, mlx5_task);
1229 	dev->stats.rdma_reads++;
1230 
1231 	return 0;
1232 }
1233 
1234 static inline int
1235 accel_mlx5_crc_task_fill_umr_sge(struct accel_mlx5_qp *qp, struct ibv_sge *sge,
1236 				 struct accel_mlx5_iov_sgl *umr_iovs, struct spdk_memory_domain *domain,
1237 				 void *domain_ctx, struct accel_mlx5_iov_sgl *rdma_iovs, size_t *len)
1238 {
1239 	int umr_idx = 0;
1240 	int rdma_idx = 0;
1241 	int umr_iovcnt = spdk_min(umr_iovs->iovcnt, (int)ACCEL_MLX5_MAX_SGE);
1242 	int rdma_iovcnt = spdk_min(rdma_iovs->iovcnt, (int)ACCEL_MLX5_MAX_SGE);
1243 	size_t umr_iov_offset;
1244 	size_t rdma_iov_offset;
1245 	size_t umr_len = 0;
1246 	void *sge_addr;
1247 	size_t sge_len;
1248 	size_t umr_sge_len;
1249 	size_t rdma_sge_len;
1250 	int rc;
1251 
1252 	umr_iov_offset = umr_iovs->iov_offset;
1253 	rdma_iov_offset = rdma_iovs->iov_offset;
1254 
1255 	while (umr_idx < umr_iovcnt && rdma_idx < rdma_iovcnt) {
1256 		umr_sge_len = umr_iovs->iov[umr_idx].iov_len - umr_iov_offset;
1257 		rdma_sge_len = rdma_iovs->iov[rdma_idx].iov_len - rdma_iov_offset;
1258 		sge_addr = umr_iovs->iov[umr_idx].iov_base + umr_iov_offset;
1259 
1260 		if (umr_sge_len == rdma_sge_len) {
1261 			rdma_idx++;
1262 			umr_iov_offset = 0;
1263 			rdma_iov_offset = 0;
1264 			sge_len = umr_sge_len;
1265 		} else if (umr_sge_len < rdma_sge_len) {
1266 			umr_iov_offset = 0;
1267 			rdma_iov_offset += umr_sge_len;
1268 			sge_len = umr_sge_len;
1269 		} else {
1270 			size_t remaining;
1271 
1272 			remaining = umr_sge_len - rdma_sge_len;
1273 			while (remaining) {
1274 				rdma_idx++;
1275 				if (rdma_idx == (int)ACCEL_MLX5_MAX_SGE) {
1276 					break;
1277 				}
1278 				rdma_sge_len = rdma_iovs->iov[rdma_idx].iov_len;
1279 				if (remaining == rdma_sge_len) {
1280 					rdma_idx++;
1281 					rdma_iov_offset = 0;
1282 					umr_iov_offset = 0;
1283 					remaining = 0;
1284 					break;
1285 				}
1286 				if (remaining < rdma_sge_len) {
1287 					rdma_iov_offset = remaining;
1288 					umr_iov_offset = 0;
1289 					remaining = 0;
1290 					break;
1291 				}
1292 				remaining -= rdma_sge_len;
1293 			}
1294 			sge_len = umr_sge_len - remaining;
1295 		}
1296 		rc = accel_mlx5_translate_addr(sge_addr, sge_len, domain, domain_ctx, qp->dev, &sge[umr_idx]);
1297 		if (spdk_unlikely(rc)) {
1298 			return -EINVAL;
1299 		}
1300 		SPDK_DEBUGLOG(accel_mlx5, "\t sge[%d] lkey %u, addr %p, len %u\n", umr_idx, sge[umr_idx].lkey,
1301 			      (void *)sge[umr_idx].addr, sge[umr_idx].length);
1302 		umr_len += sge_len;
1303 		umr_idx++;
1304 	}
1305 	accel_mlx5_iov_sgl_advance(umr_iovs, umr_len);
1306 	accel_mlx5_iov_sgl_advance(rdma_iovs, umr_len);
1307 	*len = umr_len;
1308 
1309 	return umr_idx;
1310 }
1311 
1312 static inline int
1313 accel_mlx5_crc_task_process_multi_req(struct accel_mlx5_task *mlx5_task)
1314 {
1315 	size_t umr_len[ACCEL_MLX5_MAX_MKEYS_IN_TASK];
1316 	struct ibv_sge sges[ACCEL_MLX5_MAX_SGE];
1317 	struct spdk_accel_task *task = &mlx5_task->base;
1318 	struct accel_mlx5_qp *qp = mlx5_task->qp;
1319 	struct accel_mlx5_dev *dev = qp->dev;
1320 	struct accel_mlx5_iov_sgl umr_sgl;
1321 	struct accel_mlx5_iov_sgl *umr_sgl_ptr;
1322 	struct accel_mlx5_iov_sgl rdma_sgl;
1323 	uint64_t umr_offset;
1324 	uint32_t rdma_fence = SPDK_MLX5_WQE_CTRL_INITIATOR_SMALL_FENCE;
1325 	int sge_count;
1326 	uint32_t remaining;
1327 	int rc;
1328 	uint16_t i;
1329 	uint16_t num_ops = spdk_min(mlx5_task->num_reqs - mlx5_task->num_completed_reqs,
1330 				    mlx5_task->num_ops);
1331 	uint16_t qp_slot = accel_mlx5_dev_get_available_slots(dev, qp);
1332 	bool sig_init, sig_check_gen = false;
1333 
1334 	num_ops = spdk_min(num_ops, qp_slot >> 1);
1335 	if (spdk_unlikely(!num_ops)) {
1336 		return -EINVAL;
1337 	}
1338 	/* Init signature on the first UMR */
1339 	sig_init = !mlx5_task->num_submitted_reqs;
1340 
1341 	/*
1342 	 * accel_mlx5_crc_task_fill_umr_sge() and accel_mlx5_fill_block_sge() advance an IOV during iteration
1343 	 * on it. We must copy accel_mlx5_iov_sgl to iterate twice or more on the same IOV.
1344 	 *
1345 	 * In the in-place case, we iterate on the source IOV three times. That's why we need two copies of
1346 	 * the source accel_mlx5_iov_sgl.
1347 	 *
1348 	 * In the out-of-place case, we iterate on the source IOV once and on the destination IOV two times.
1349 	 * So, we need one copy of the destination accel_mlx5_iov_sgl.
1350 	 */
1351 	if (mlx5_task->inplace) {
1352 		accel_mlx5_iov_sgl_init(&umr_sgl, mlx5_task->src.iov, mlx5_task->src.iovcnt);
1353 		umr_sgl_ptr = &umr_sgl;
1354 		accel_mlx5_iov_sgl_init(&rdma_sgl, mlx5_task->src.iov, mlx5_task->src.iovcnt);
1355 	} else {
1356 		umr_sgl_ptr = &mlx5_task->src;
1357 		accel_mlx5_iov_sgl_init(&rdma_sgl, mlx5_task->dst.iov, mlx5_task->dst.iovcnt);
1358 	}
1359 	mlx5_task->num_wrs = 0;
1360 	for (i = 0; i < num_ops; i++) {
1361 		/*
1362 		 * The last request may have only CRC. Skip UMR in this case because the MKey from
1363 		 * the previous request is used.
1364 		 */
1365 		if (umr_sgl_ptr->iovcnt == 0) {
1366 			assert((mlx5_task->num_completed_reqs + i + 1) == mlx5_task->num_reqs);
1367 			break;
1368 		}
1369 		sge_count = accel_mlx5_crc_task_fill_umr_sge(qp, sges, umr_sgl_ptr, task->src_domain,
1370 				task->src_domain_ctx, &rdma_sgl, &umr_len[i]);
1371 		if (spdk_unlikely(sge_count <= 0)) {
1372 			rc = (sge_count == 0) ? -EINVAL : sge_count;
1373 			SPDK_ERRLOG("failed set UMR sge, rc %d\n", rc);
1374 			return rc;
1375 		}
1376 		if (umr_sgl_ptr->iovcnt == 0) {
1377 			/*
1378 			 * We post RDMA without UMR if the last request has only CRC. We use an MKey from
1379 			 * the last UMR in this case. Since the last request can be postponed to the next
1380 			 * call of this function, we must save the MKey to the task structure.
1381 			 */
1382 			mlx5_task->last_umr_len = umr_len[i];
1383 			mlx5_task->last_mkey_idx = i;
1384 			sig_check_gen = true;
1385 		}
1386 		rc = accel_mlx5_crc_task_configure_umr(mlx5_task, sges, sge_count, mlx5_task->mkeys[i],
1387 						       SPDK_MLX5_UMR_SIG_DOMAIN_WIRE, umr_len[i], sig_init,
1388 						       sig_check_gen);
1389 		if (spdk_unlikely(rc)) {
1390 			SPDK_ERRLOG("UMR configure failed with %d\n", rc);
1391 			return rc;
1392 		}
1393 		sig_init = false;
1394 		ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED(qp, mlx5_task);
1395 		dev->stats.sig_umrs++;
1396 	}
1397 
1398 	if (spdk_unlikely(mlx5_task->psv->bits.error)) {
1399 		rc = spdk_mlx5_qp_set_psv(qp->qp, mlx5_task->psv->psv_index, *mlx5_task->base.crc_dst, 0, 0);
1400 		if (spdk_unlikely(rc)) {
1401 			SPDK_ERRLOG("SET_PSV failed with %d\n", rc);
1402 			return rc;
1403 		}
1404 		ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED(qp, mlx5_task);
1405 	}
1406 
1407 	for (i = 0; i < num_ops - 1; i++) {
1408 		if (mlx5_task->inplace) {
1409 			sge_count = accel_mlx5_fill_block_sge(dev, sges, &mlx5_task->src, umr_len[i], &remaining,
1410 							      task->src_domain, task->src_domain_ctx);
1411 		} else {
1412 			sge_count = accel_mlx5_fill_block_sge(dev, sges, &mlx5_task->dst, umr_len[i], &remaining,
1413 							      task->dst_domain, task->dst_domain_ctx);
1414 		}
1415 		if (spdk_unlikely(sge_count <= 0)) {
1416 			rc = (sge_count == 0) ? -EINVAL : sge_count;
1417 			SPDK_ERRLOG("failed set RDMA sge, rc %d\n", rc);
1418 			return rc;
1419 		}
1420 		rc = spdk_mlx5_qp_rdma_read(qp->qp, sges, sge_count, 0, mlx5_task->mkeys[i]->mkey,
1421 					    0, rdma_fence);
1422 		if (spdk_unlikely(rc)) {
1423 			SPDK_ERRLOG("RDMA READ/WRITE failed with %d\n", rc);
1424 			return rc;
1425 		}
1426 		mlx5_task->num_submitted_reqs++;
1427 		ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED(qp, mlx5_task);
1428 		dev->stats.rdma_reads++;
1429 		rdma_fence = SPDK_MLX5_WQE_CTRL_STRONG_ORDERING;
1430 	}
1431 	if ((mlx5_task->inplace && mlx5_task->src.iovcnt == 0) || (!mlx5_task->inplace &&
1432 			mlx5_task->dst.iovcnt == 0)) {
1433 		/*
1434 		 * The last RDMA does not have any data, only CRC. It also does not have a paired Mkey.
1435 		 * The CRC is handled in the previous MKey in this case.
1436 		 */
1437 		sge_count = 0;
1438 		umr_offset = mlx5_task->last_umr_len;
1439 	} else {
1440 		umr_offset = 0;
1441 		mlx5_task->last_mkey_idx = i;
1442 		if (mlx5_task->inplace) {
1443 			sge_count = accel_mlx5_fill_block_sge(dev, sges, &mlx5_task->src, umr_len[i], &remaining,
1444 							      task->src_domain, task->src_domain_ctx);
1445 		} else {
1446 			sge_count = accel_mlx5_fill_block_sge(dev, sges, &mlx5_task->dst, umr_len[i], &remaining,
1447 							      task->dst_domain, task->dst_domain_ctx);
1448 		}
1449 		if (spdk_unlikely(sge_count <= 0)) {
1450 			rc = (sge_count == 0) ? -EINVAL : sge_count;
1451 			SPDK_ERRLOG("failed set RDMA sge, rc %d\n", rc);
1452 			return rc;
1453 		}
1454 		assert(remaining == 0);
1455 	}
1456 	if ((mlx5_task->num_completed_reqs + i + 1) == mlx5_task->num_reqs) {
1457 		/* Ensure that there is a free sge for the CRC destination. */
1458 		assert(sge_count < (int)ACCEL_MLX5_MAX_SGE);
1459 		/* Add the crc destination to the end of sges. */
1460 		sges[sge_count].lkey = mlx5_task->psv->crc_lkey;
1461 		sges[sge_count].addr = (uintptr_t)&mlx5_task->psv->crc;
1462 		sges[sge_count++].length = sizeof(uint32_t);
1463 	}
1464 	rdma_fence |= SPDK_MLX5_WQE_CTRL_CE_CQ_UPDATE;
1465 	rc = spdk_mlx5_qp_rdma_read(qp->qp, sges, sge_count, umr_offset,
1466 				    mlx5_task->mkeys[mlx5_task->last_mkey_idx]->mkey,
1467 				    (uint64_t)mlx5_task, rdma_fence);
1468 	if (spdk_unlikely(rc)) {
1469 		SPDK_ERRLOG("RDMA READ/WRITE failed with %d\n", rc);
1470 		return rc;
1471 	}
1472 	mlx5_task->num_submitted_reqs++;
1473 	ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED_SIGNALED(dev, qp, mlx5_task);
1474 	dev->stats.rdma_reads++;
1475 
1476 	return 0;
1477 }
1478 
1479 static inline int
1480 accel_mlx5_crc_task_process(struct accel_mlx5_task *mlx5_task)
1481 {
1482 	int rc;
1483 
1484 	assert(mlx5_task->mlx5_opcode == ACCEL_MLX5_OPC_CRC32C);
1485 
1486 	SPDK_DEBUGLOG(accel_mlx5, "begin, crc task, %p, reqs: total %u, submitted %u, completed %u\n",
1487 		      mlx5_task, mlx5_task->num_reqs, mlx5_task->num_submitted_reqs, mlx5_task->num_completed_reqs);
1488 
1489 	if (mlx5_task->num_reqs == 1) {
1490 		rc = accel_mlx5_crc_task_process_one_req(mlx5_task);
1491 	} else {
1492 		rc = accel_mlx5_crc_task_process_multi_req(mlx5_task);
1493 	}
1494 
1495 	if (rc == 0) {
1496 		STAILQ_INSERT_TAIL(&mlx5_task->qp->in_hw, mlx5_task, link);
1497 		SPDK_DEBUGLOG(accel_mlx5, "end, crc task, %p, reqs: total %u, submitted %u, completed %u\n",
1498 			      mlx5_task, mlx5_task->num_reqs, mlx5_task->num_submitted_reqs,
1499 			      mlx5_task->num_completed_reqs);
1500 	}
1501 
1502 	return rc;
1503 }
1504 
1505 static inline int
1506 accel_mlx5_task_alloc_crc_ctx(struct accel_mlx5_task *task, uint32_t qp_slot)
1507 {
1508 	struct accel_mlx5_qp *qp = task->qp;
1509 	struct accel_mlx5_dev *dev = qp->dev;
1510 
1511 	if (spdk_unlikely(!accel_mlx5_task_alloc_mkeys(task, dev->sig_mkeys))) {
1512 		SPDK_DEBUGLOG(accel_mlx5, "no mkeys in signature mkey pool, dev %s\n",
1513 			      dev->dev_ctx->context->device->name);
1514 		dev->stats.nomem_mkey++;
1515 		return -ENOMEM;
1516 	}
1517 	task->psv = spdk_mempool_get(dev->dev_ctx->psv_pool);
1518 	if (spdk_unlikely(!task->psv)) {
1519 		SPDK_DEBUGLOG(accel_mlx5, "no reqs in psv pool, dev %s\n", dev->dev_ctx->context->device->name);
1520 		spdk_mlx5_mkey_pool_put_bulk(dev->sig_mkeys, task->mkeys, task->num_ops);
1521 		task->num_ops = 0;
1522 		dev->stats.nomem_mkey++;
1523 		return -ENOMEM;
1524 	}
1525 	/* One extra slot is needed for SET_PSV WQE to reset the error state in PSV. */
1526 	if (spdk_unlikely(task->psv->bits.error)) {
1527 		uint32_t n_slots = task->num_ops * 2 + 1;
1528 
1529 		if (qp_slot < n_slots) {
1530 			spdk_mempool_put(dev->dev_ctx->psv_pool, task->psv);
1531 			spdk_mlx5_mkey_pool_put_bulk(dev->sig_mkeys, task->mkeys, task->num_ops);
1532 			dev->stats.nomem_qdepth++;
1533 			task->num_ops = 0;
1534 			return -ENOMEM;
1535 		}
1536 	}
1537 
1538 	return 0;
1539 }
1540 
1541 static inline int
1542 accel_mlx5_crc_task_continue(struct accel_mlx5_task *task)
1543 {
1544 	struct accel_mlx5_qp *qp = task->qp;
1545 	struct accel_mlx5_dev *dev = qp->dev;
1546 	uint16_t qp_slot = accel_mlx5_dev_get_available_slots(dev, qp);
1547 	int rc;
1548 
1549 	assert(task->num_reqs > task->num_completed_reqs);
1550 	if (task->num_ops == 0) {
1551 		/* No mkeys allocated, try to allocate now. */
1552 		rc = accel_mlx5_task_alloc_crc_ctx(task, qp_slot);
1553 		if (spdk_unlikely(rc)) {
1554 			STAILQ_INSERT_TAIL(&dev->nomem, task, link);
1555 			return -ENOMEM;
1556 		}
1557 	}
1558 	/* We need to post at least 1 UMR and 1 RDMA operation */
1559 	if (spdk_unlikely(qp_slot < 2)) {
1560 		STAILQ_INSERT_TAIL(&dev->nomem, task, link);
1561 		dev->stats.nomem_qdepth++;
1562 		return -ENOMEM;
1563 	}
1564 
1565 	return accel_mlx5_crc_task_process(task);
1566 }
1567 
1568 static inline uint32_t
1569 accel_mlx5_get_crc_task_count(struct iovec *src_iov, uint32_t src_iovcnt, struct iovec *dst_iov,
1570 			      uint32_t dst_iovcnt)
1571 {
1572 	uint32_t src_idx = 0;
1573 	uint32_t dst_idx = 0;
1574 	uint32_t num_ops = 1;
1575 	uint32_t num_src_sge = 1;
1576 	uint32_t num_dst_sge = 1;
1577 	size_t src_offset = 0;
1578 	size_t dst_offset = 0;
1579 	uint32_t num_sge;
1580 	size_t src_len;
1581 	size_t dst_len;
1582 
1583 	/* One operation is enough if both iovs fit into ACCEL_MLX5_MAX_SGE. One SGE is reserved for CRC on dst_iov. */
1584 	if (src_iovcnt <= ACCEL_MLX5_MAX_SGE && (dst_iovcnt + 1) <= ACCEL_MLX5_MAX_SGE) {
1585 		return 1;
1586 	}
1587 
1588 	while (src_idx < src_iovcnt && dst_idx < dst_iovcnt) {
1589 		if (num_src_sge > ACCEL_MLX5_MAX_SGE || num_dst_sge > ACCEL_MLX5_MAX_SGE) {
1590 			num_ops++;
1591 			num_src_sge = 1;
1592 			num_dst_sge = 1;
1593 		}
1594 		src_len = src_iov[src_idx].iov_len - src_offset;
1595 		dst_len = dst_iov[dst_idx].iov_len - dst_offset;
1596 
1597 		if (src_len == dst_len) {
1598 			num_src_sge++;
1599 			num_dst_sge++;
1600 			src_offset = 0;
1601 			dst_offset = 0;
1602 			src_idx++;
1603 			dst_idx++;
1604 			continue;
1605 		}
1606 		if (src_len < dst_len) {
1607 			/* Advance src_iov to reach the point that corresponds to the end of the current dst_iov. */
1608 			num_sge = accel_mlx5_advance_iovec(&src_iov[src_idx],
1609 							   spdk_min(ACCEL_MLX5_MAX_SGE + 1 - num_src_sge,
1610 									   src_iovcnt - src_idx),
1611 							   &src_offset, &dst_len);
1612 			src_idx += num_sge;
1613 			num_src_sge += num_sge;
1614 			if (dst_len != 0) {
1615 				/*
1616 				 * ACCEL_MLX5_MAX_SGE is reached on src_iov, and dst_len bytes
1617 				 * are left on the current dst_iov.
1618 				 */
1619 				dst_offset = dst_iov[dst_idx].iov_len - dst_len;
1620 			} else {
1621 				/* The src_iov advance is completed, shift to the next dst_iov. */
1622 				dst_idx++;
1623 				num_dst_sge++;
1624 				dst_offset = 0;
1625 			}
1626 		} else { /* src_len > dst_len */
1627 			/* Advance dst_iov to reach the point that corresponds to the end of the current src_iov. */
1628 			num_sge = accel_mlx5_advance_iovec(&dst_iov[dst_idx],
1629 							   spdk_min(ACCEL_MLX5_MAX_SGE + 1 - num_dst_sge,
1630 									   dst_iovcnt - dst_idx),
1631 							   &dst_offset, &src_len);
1632 			dst_idx += num_sge;
1633 			num_dst_sge += num_sge;
1634 			if (src_len != 0) {
1635 				/*
1636 				 * ACCEL_MLX5_MAX_SGE is reached on dst_iov, and src_len bytes
1637 				 * are left on the current src_iov.
1638 				 */
1639 				src_offset = src_iov[src_idx].iov_len - src_len;
1640 			} else {
1641 				/* The dst_iov advance is completed, shift to the next src_iov. */
1642 				src_idx++;
1643 				num_src_sge++;
1644 				src_offset = 0;
1645 			}
1646 		}
1647 	}
1648 	/* An extra operation is needed if no space is left on dst_iov because CRC takes one SGE. */
1649 	if (num_dst_sge > ACCEL_MLX5_MAX_SGE) {
1650 		num_ops++;
1651 	}
1652 
1653 	/* The above loop must reach the end of both iovs simultaneously because their size is the same. */
1654 	assert(src_idx == src_iovcnt);
1655 	assert(dst_idx == dst_iovcnt);
1656 	assert(src_offset == 0);
1657 	assert(dst_offset == 0);
1658 
1659 	return num_ops;
1660 }
1661 
1662 static inline int
1663 accel_mlx5_crc_task_init(struct accel_mlx5_task *mlx5_task)
1664 {
1665 	struct spdk_accel_task *task = &mlx5_task->base;
1666 	struct accel_mlx5_qp *qp = mlx5_task->qp;
1667 	uint32_t qp_slot = accel_mlx5_dev_get_available_slots(qp->dev, qp);
1668 	int rc;
1669 
1670 	accel_mlx5_iov_sgl_init(&mlx5_task->src, task->s.iovs, task->s.iovcnt);
1671 	if (mlx5_task->inplace) {
1672 		/* One entry is reserved for CRC */
1673 		mlx5_task->num_reqs = SPDK_CEIL_DIV(mlx5_task->src.iovcnt + 1, ACCEL_MLX5_MAX_SGE);
1674 	} else {
1675 		accel_mlx5_iov_sgl_init(&mlx5_task->dst, task->d.iovs, task->d.iovcnt);
1676 		mlx5_task->num_reqs = accel_mlx5_get_crc_task_count(mlx5_task->src.iov, mlx5_task->src.iovcnt,
1677 				      mlx5_task->dst.iov, mlx5_task->dst.iovcnt);
1678 	}
1679 
1680 	rc = accel_mlx5_task_alloc_crc_ctx(mlx5_task, qp_slot);
1681 	if (spdk_unlikely(rc)) {
1682 		return rc;
1683 	}
1684 
1685 	if (spdk_unlikely(qp_slot < 2)) {
1686 		/* Queue is full, queue this task */
1687 		SPDK_DEBUGLOG(accel_mlx5, "dev %s qp %p is full\n", qp->dev->dev_ctx->context->device->name,
1688 			      mlx5_task->qp);
1689 		qp->dev->stats.nomem_qdepth++;
1690 		return -ENOMEM;
1691 	}
1692 	return 0;
1693 }
1694 
1695 static inline int
1696 accel_mlx5_crypto_mkey_task_init(struct accel_mlx5_task *mlx5_task)
1697 {
1698 	struct spdk_accel_task *task = &mlx5_task->base;
1699 	struct accel_mlx5_qp *qp = mlx5_task->qp;
1700 	struct accel_mlx5_dev *dev = qp->dev;
1701 	uint32_t num_blocks;
1702 	int rc;
1703 	uint16_t qp_slot = accel_mlx5_dev_get_available_slots(dev, qp);
1704 	bool crypto_key_ok;
1705 
1706 	if (spdk_unlikely(task->s.iovcnt > ACCEL_MLX5_MAX_SGE)) {
1707 		/* With `external mkey` we can't split task or register several UMRs */
1708 		SPDK_ERRLOG("src buffer is too fragmented\n");
1709 		return -EINVAL;
1710 	}
1711 	if (spdk_unlikely(task->src_domain == spdk_accel_get_memory_domain())) {
1712 		SPDK_ERRLOG("accel domain is not supported\n");
1713 		return -ENOTSUP;
1714 	}
1715 	if (spdk_unlikely(spdk_accel_sequence_next_task(task) != NULL)) {
1716 		SPDK_ERRLOG("Mkey registration is only supported for single task\n");
1717 		return -ENOTSUP;
1718 	}
1719 
1720 	crypto_key_ok = (task->crypto_key && task->crypto_key->module_if == &g_accel_mlx5.module &&
1721 			 task->crypto_key->priv);
1722 	if (spdk_unlikely(!crypto_key_ok)) {
1723 		SPDK_ERRLOG("Wrong crypto key provided\n");
1724 		return -EINVAL;
1725 	}
1726 	if (spdk_unlikely(task->nbytes % mlx5_task->base.block_size != 0)) {
1727 		SPDK_ERRLOG("src length %"PRIu64" is not a multiple of the block size %u\n", task->nbytes,
1728 			    mlx5_task->base.block_size);
1729 		return -EINVAL;
1730 	}
1731 
1732 	num_blocks = task->nbytes / mlx5_task->base.block_size;
1733 	if (dev->crypto_multi_block) {
1734 		if (spdk_unlikely(g_accel_mlx5.attr.crypto_split_blocks &&
1735 				  num_blocks > g_accel_mlx5.attr.crypto_split_blocks)) {
1736 			SPDK_ERRLOG("Number of blocks in task %u exceeds split threshold %u, can't handle\n",
1737 				    num_blocks, g_accel_mlx5.attr.crypto_split_blocks);
1738 			return -E2BIG;
1739 		}
1740 	} else if (num_blocks != 1) {
1741 		SPDK_ERRLOG("Task contains more than 1 block, can't handle\n");
1742 		return -E2BIG;
1743 	}
1744 
1745 	accel_mlx5_iov_sgl_init(&mlx5_task->src, task->s.iovs, task->s.iovcnt);
1746 	mlx5_task->num_blocks = num_blocks;
1747 	mlx5_task->num_processed_blocks = 0;
1748 	mlx5_task->num_reqs = 1;
1749 	mlx5_task->blocks_per_req = num_blocks;
1750 
1751 	if (spdk_unlikely(qp_slot == 0)) {
1752 		mlx5_task->num_ops = 0;
1753 		dev->stats.nomem_qdepth++;
1754 		return -ENOMEM;
1755 	}
1756 	rc = spdk_mlx5_mkey_pool_get_bulk(dev->crypto_mkeys, mlx5_task->mkeys, 1);
1757 	if (spdk_unlikely(rc)) {
1758 		mlx5_task->num_ops = 0;
1759 		dev->stats.nomem_mkey++;
1760 		return -ENOMEM;
1761 	}
1762 	mlx5_task->num_ops = 1;
1763 
1764 	SPDK_DEBUGLOG(accel_mlx5, "crypto_mkey task num_blocks %u, src_len %zu\n", mlx5_task->num_reqs,
1765 		      task->nbytes);
1766 
1767 	return 0;
1768 }
1769 
1770 static inline int
1771 accel_mlx5_crypto_mkey_task_process(struct accel_mlx5_task *mlx5_task)
1772 {
1773 	struct accel_mlx5_sge sge;
1774 	struct spdk_accel_task *task = &mlx5_task->base;
1775 	struct accel_mlx5_qp *qp = mlx5_task->qp;
1776 	struct accel_mlx5_dev *dev = qp->dev;
1777 	struct spdk_mlx5_crypto_dek_data dek_data;
1778 	int rc;
1779 
1780 	if (spdk_unlikely(!mlx5_task->num_ops)) {
1781 		return -EINVAL;
1782 	}
1783 	SPDK_DEBUGLOG(accel_mlx5, "begin, task %p, dst_domain_ctx %p\n", mlx5_task, task->dst_domain_ctx);
1784 
1785 	mlx5_task->num_wrs = 0;
1786 	rc = spdk_mlx5_crypto_get_dek_data(task->crypto_key->priv, dev->dev_ctx->pd, &dek_data);
1787 	if (spdk_unlikely(rc)) {
1788 		return rc;
1789 	}
1790 
1791 	rc = accel_mlx5_configure_crypto_umr(mlx5_task, &sge, mlx5_task->mkeys[0]->mkey,
1792 					     mlx5_task->num_blocks, &dek_data, (uint64_t)mlx5_task, SPDK_MLX5_WQE_CTRL_CE_CQ_UPDATE);
1793 	if (spdk_unlikely(rc)) {
1794 		SPDK_ERRLOG("UMR configure failed with %d\n", rc);
1795 		return rc;
1796 	}
1797 	dev->stats.crypto_umrs++;
1798 	mlx5_task->num_submitted_reqs++;
1799 	ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED_SIGNALED(dev, qp, mlx5_task);
1800 	STAILQ_INSERT_TAIL(&qp->in_hw, mlx5_task, link);
1801 
1802 	SPDK_DEBUGLOG(accel_mlx5, "end, task %p, dst_domain_ctx %p\n", mlx5_task, task->dst_domain_ctx);
1803 
1804 	return 0;
1805 }
1806 
1807 static inline int
1808 accel_mlx5_crypto_mkey_task_continue(struct accel_mlx5_task *task)
1809 {
1810 	struct accel_mlx5_qp *qp = task->qp;
1811 	struct accel_mlx5_dev *dev = qp->dev;
1812 	int rc;
1813 	uint16_t qp_slot = accel_mlx5_dev_get_available_slots(dev, qp);
1814 
1815 	if (task->num_ops == 0) {
1816 		rc = spdk_mlx5_mkey_pool_get_bulk(dev->crypto_mkeys, task->mkeys, 1);
1817 		if (spdk_unlikely(rc)) {
1818 			dev->stats.nomem_mkey++;
1819 			STAILQ_INSERT_TAIL(&dev->nomem, task, link);
1820 			return -ENOMEM;
1821 		}
1822 		task->num_ops = 1;
1823 	}
1824 	if (spdk_unlikely(qp_slot == 0)) {
1825 		dev->stats.nomem_qdepth++;
1826 		STAILQ_INSERT_TAIL(&dev->nomem, task, link);
1827 		return -ENOMEM;
1828 	}
1829 	return accel_mlx5_crypto_mkey_task_process(task);
1830 }
1831 
1832 static inline void
1833 accel_mlx5_crypto_mkey_task_complete(struct accel_mlx5_task *mlx5_task)
1834 {
1835 	struct accel_mlx5_dev *dev = mlx5_task->qp->dev;
1836 
1837 	assert(mlx5_task->num_ops);
1838 	assert(mlx5_task->num_processed_blocks == mlx5_task->num_blocks);
1839 	assert(mlx5_task->base.seq);
1840 
1841 	spdk_mlx5_mkey_pool_put_bulk(dev->crypto_mkeys, mlx5_task->mkeys, 1);
1842 	spdk_accel_task_complete(&mlx5_task->base, 0);
1843 }
1844 
1845 static int
1846 accel_mlx5_task_op_not_implemented(struct accel_mlx5_task *mlx5_task)
1847 {
1848 	SPDK_ERRLOG("wrong function called\n");
1849 	SPDK_UNREACHABLE();
1850 }
1851 
1852 static void
1853 accel_mlx5_task_op_not_implemented_v(struct accel_mlx5_task *mlx5_task)
1854 {
1855 	SPDK_ERRLOG("wrong function called\n");
1856 	SPDK_UNREACHABLE();
1857 }
1858 
1859 static int
1860 accel_mlx5_task_op_not_supported(struct accel_mlx5_task *mlx5_task)
1861 {
1862 	SPDK_ERRLOG("Unsupported opcode %d\n", mlx5_task->base.op_code);
1863 
1864 	return -ENOTSUP;
1865 }
1866 
1867 static struct accel_mlx5_task_operations g_accel_mlx5_tasks_ops[] = {
1868 	[ACCEL_MLX5_OPC_COPY] = {
1869 		.init = accel_mlx5_copy_task_init,
1870 		.process = accel_mlx5_copy_task_process,
1871 		.cont = accel_mlx5_copy_task_continue,
1872 		.complete = accel_mlx5_copy_task_complete,
1873 	},
1874 	[ACCEL_MLX5_OPC_CRYPTO] = {
1875 		.init = accel_mlx5_crypto_task_init,
1876 		.process = accel_mlx5_crypto_task_process,
1877 		.cont = accel_mlx5_crypto_task_continue,
1878 		.complete = accel_mlx5_crypto_task_complete,
1879 	},
1880 	[ACCEL_MLX5_OPC_CRC32C] = {
1881 		.init = accel_mlx5_crc_task_init,
1882 		.process = accel_mlx5_crc_task_process,
1883 		.cont = accel_mlx5_crc_task_continue,
1884 		.complete = accel_mlx5_crc_task_complete,
1885 	},
1886 	[ACCEL_MLX5_OPC_CRYPTO_MKEY] = {
1887 		.init = accel_mlx5_crypto_mkey_task_init,
1888 		.process = accel_mlx5_crypto_mkey_task_process,
1889 		.cont = accel_mlx5_crypto_mkey_task_continue,
1890 		.complete = accel_mlx5_crypto_mkey_task_complete,
1891 	},
1892 	[ACCEL_MLX5_OPC_LAST] = {
1893 		.init = accel_mlx5_task_op_not_supported,
1894 		.process = accel_mlx5_task_op_not_implemented,
1895 		.cont = accel_mlx5_task_op_not_implemented,
1896 		.complete = accel_mlx5_task_op_not_implemented_v
1897 	},
1898 };
1899 
1900 static void
1901 accel_mlx5_memory_domain_transfer_cpl(void *ctx, int rc)
1902 {
1903 	struct accel_mlx5_task *task = ctx;
1904 
1905 	assert(task->needs_data_transfer);
1906 	task->needs_data_transfer = 0;
1907 
1908 	if (spdk_likely(!rc)) {
1909 		SPDK_DEBUGLOG(accel_mlx5, "task %p, data transfer done\n", task);
1910 		accel_mlx5_task_complete(task);
1911 	} else {
1912 		SPDK_ERRLOG("Task %p, data transfer failed, rc %d\n", task, rc);
1913 		accel_mlx5_task_fail(task, rc);
1914 	}
1915 }
1916 
1917 static inline void
1918 accel_mlx5_memory_domain_transfer(struct accel_mlx5_task *task)
1919 {
1920 	struct spdk_memory_domain_translation_result translation;
1921 	struct spdk_accel_task *base = &task->base;
1922 	struct accel_mlx5_dev *dev = task->qp->dev;
1923 	int rc;
1924 
1925 	assert(task->mlx5_opcode == ACCEL_MLX5_OPC_CRYPTO_MKEY);
1926 	/* UMR is an offset in the addess space, so the start address is 0 */
1927 	translation.iov.iov_base = NULL;
1928 	translation.iov.iov_len = base->nbytes;
1929 	translation.iov_count = 1;
1930 	translation.size = sizeof(translation);
1931 	translation.rdma.rkey = task->mkeys[0]->mkey;
1932 	translation.rdma.lkey = task->mkeys[0]->mkey;
1933 
1934 	SPDK_DEBUGLOG(accel_mlx5, "start transfer, task %p, dst_domain_ctx %p, mkey %u\n", task,
1935 		      task->base.dst_domain_ctx, task->mkeys[0]->mkey);
1936 	rc = spdk_memory_domain_transfer_data(base->dst_domain, base->dst_domain_ctx, &translation.iov, 1,
1937 					      dev->dev_ctx->domain, task, &translation.iov, 1, &translation,
1938 					      accel_mlx5_memory_domain_transfer_cpl, task);
1939 	if (spdk_unlikely(rc)) {
1940 		SPDK_ERRLOG("Failed to start data transfer, task %p rc %d\n", task, rc);
1941 		accel_mlx5_task_fail(task, rc);
1942 	}
1943 }
1944 
1945 static inline void
1946 accel_mlx5_task_complete(struct accel_mlx5_task *task)
1947 {
1948 	struct spdk_accel_sequence *seq = task->base.seq;
1949 	struct spdk_accel_task *next;
1950 	bool driver_seq;
1951 
1952 	if (task->needs_data_transfer) {
1953 		accel_mlx5_memory_domain_transfer(task);
1954 		return;
1955 	}
1956 
1957 	next = spdk_accel_sequence_next_task(&task->base);
1958 	driver_seq = task->driver_seq;
1959 
1960 	assert(task->num_reqs == task->num_completed_reqs);
1961 	SPDK_DEBUGLOG(accel_mlx5, "Complete task %p, opc %d\n", task, task->mlx5_opcode);
1962 
1963 	g_accel_mlx5_tasks_ops[task->mlx5_opcode].complete(task);
1964 
1965 	if (driver_seq) {
1966 		struct spdk_io_channel *ch = task->qp->dev->ch;
1967 
1968 		assert(seq);
1969 		if (next) {
1970 			accel_mlx5_execute_sequence(ch, seq);
1971 		} else {
1972 			spdk_accel_sequence_continue(seq);
1973 		}
1974 	}
1975 }
1976 
1977 static inline int
1978 accel_mlx5_task_continue(struct accel_mlx5_task *task)
1979 {
1980 	struct accel_mlx5_qp *qp = task->qp;
1981 	struct accel_mlx5_dev *dev = qp->dev;
1982 
1983 	if (spdk_unlikely(qp->recovering)) {
1984 		STAILQ_INSERT_TAIL(&dev->nomem, task, link);
1985 		return 0;
1986 	}
1987 
1988 	return g_accel_mlx5_tasks_ops[task->mlx5_opcode].cont(task);
1989 }
1990 static inline void
1991 accel_mlx5_task_init_opcode(struct accel_mlx5_task *mlx5_task)
1992 {
1993 	uint8_t base_opcode = mlx5_task->base.op_code;
1994 
1995 	switch (base_opcode) {
1996 	case SPDK_ACCEL_OPC_COPY:
1997 		mlx5_task->mlx5_opcode = ACCEL_MLX5_OPC_COPY;
1998 		break;
1999 	case SPDK_ACCEL_OPC_ENCRYPT:
2000 		assert(g_accel_mlx5.crypto_supported);
2001 		mlx5_task->enc_order = SPDK_MLX5_ENCRYPTION_ORDER_ENCRYPTED_RAW_WIRE;
2002 		mlx5_task->mlx5_opcode =  ACCEL_MLX5_OPC_CRYPTO;
2003 		break;
2004 	case SPDK_ACCEL_OPC_DECRYPT:
2005 		assert(g_accel_mlx5.crypto_supported);
2006 		mlx5_task->enc_order = SPDK_MLX5_ENCRYPTION_ORDER_ENCRYPTED_RAW_MEMORY;
2007 		mlx5_task->mlx5_opcode = ACCEL_MLX5_OPC_CRYPTO;
2008 		break;
2009 	case SPDK_ACCEL_OPC_CRC32C:
2010 		mlx5_task->inplace = 1;
2011 		mlx5_task->mlx5_opcode = ACCEL_MLX5_OPC_CRC32C;
2012 		break;
2013 	case SPDK_ACCEL_OPC_COPY_CRC32C:
2014 		mlx5_task->inplace = 0;
2015 		mlx5_task->mlx5_opcode = ACCEL_MLX5_OPC_CRC32C;
2016 		break;
2017 	default:
2018 		SPDK_ERRLOG("wrong opcode %d\n", base_opcode);
2019 		mlx5_task->mlx5_opcode = ACCEL_MLX5_OPC_LAST;
2020 	}
2021 }
2022 
2023 static void
2024 accel_mlx5_post_poller_handler(void *fn_arg)
2025 {
2026 	struct accel_mlx5_io_channel *ch = fn_arg;
2027 	struct accel_mlx5_dev *dev;
2028 	uint32_t i;
2029 
2030 	for (i = 0; i < ch->num_devs; i++) {
2031 		dev = &ch->devs[i];
2032 
2033 		if (dev->qp.ring_db) {
2034 			spdk_mlx5_qp_complete_send(dev->qp.qp);
2035 			dev->qp.ring_db = false;
2036 		}
2037 	}
2038 
2039 	ch->poller_handler_registered = false;
2040 }
2041 
2042 static inline int
2043 _accel_mlx5_submit_tasks(struct accel_mlx5_io_channel *accel_ch, struct spdk_accel_task *task)
2044 {
2045 	struct accel_mlx5_task *mlx5_task = SPDK_CONTAINEROF(task, struct accel_mlx5_task, base);
2046 	struct accel_mlx5_dev *dev = mlx5_task->qp->dev;
2047 	int rc;
2048 
2049 	/* We should not receive any tasks if the module was not enabled */
2050 	assert(g_accel_mlx5.enabled);
2051 
2052 	dev->stats.opcodes[mlx5_task->mlx5_opcode]++;
2053 	rc = g_accel_mlx5_tasks_ops[mlx5_task->mlx5_opcode].init(mlx5_task);
2054 	if (spdk_unlikely(rc)) {
2055 		if (rc == -ENOMEM) {
2056 			SPDK_DEBUGLOG(accel_mlx5, "no reqs to handle new task %p (required %u), put to queue\n", mlx5_task,
2057 				      mlx5_task->num_reqs);
2058 			STAILQ_INSERT_TAIL(&dev->nomem, mlx5_task, link);
2059 			return 0;
2060 		}
2061 		SPDK_ERRLOG("Task opc %d init failed, rc %d\n", task->op_code, rc);
2062 		return rc;
2063 	}
2064 
2065 	if (spdk_unlikely(mlx5_task->qp->recovering)) {
2066 		STAILQ_INSERT_TAIL(&dev->nomem, mlx5_task, link);
2067 		return 0;
2068 	}
2069 
2070 	if (!accel_ch->poller_handler_registered) {
2071 		spdk_thread_register_post_poller_handler(accel_mlx5_post_poller_handler, accel_ch);
2072 		/* Function above may fail to register our handler, in that case we ring doorbells on next polling
2073 		 * cycle. That is less efficient but still works */
2074 		accel_ch->poller_handler_registered = true;
2075 	}
2076 
2077 	return g_accel_mlx5_tasks_ops[mlx5_task->mlx5_opcode].process(mlx5_task);
2078 }
2079 
2080 static inline void
2081 accel_mlx5_task_assign_qp(struct accel_mlx5_task *mlx5_task, struct accel_mlx5_io_channel *accel_ch)
2082 {
2083 	struct accel_mlx5_dev *dev;
2084 
2085 	dev = &accel_ch->devs[accel_ch->dev_idx];
2086 	accel_ch->dev_idx++;
2087 	if (accel_ch->dev_idx == accel_ch->num_devs) {
2088 		accel_ch->dev_idx = 0;
2089 	}
2090 
2091 	mlx5_task->qp = &dev->qp;
2092 }
2093 
2094 static inline void
2095 accel_mlx5_task_reset(struct accel_mlx5_task *mlx5_task)
2096 {
2097 	mlx5_task->num_completed_reqs = 0;
2098 	mlx5_task->num_submitted_reqs = 0;
2099 	mlx5_task->num_ops = 0;
2100 	mlx5_task->num_processed_blocks = 0;
2101 	mlx5_task->raw = 0;
2102 }
2103 
2104 static int
2105 accel_mlx5_submit_tasks(struct spdk_io_channel *ch, struct spdk_accel_task *task)
2106 {
2107 	struct accel_mlx5_task *mlx5_task = SPDK_CONTAINEROF(task, struct accel_mlx5_task, base);
2108 	struct accel_mlx5_io_channel *accel_ch = spdk_io_channel_get_ctx(ch);
2109 
2110 	accel_mlx5_task_assign_qp(mlx5_task, accel_ch);
2111 	accel_mlx5_task_reset(mlx5_task);
2112 	accel_mlx5_task_init_opcode(mlx5_task);
2113 
2114 	return _accel_mlx5_submit_tasks(accel_ch, task);
2115 }
2116 
2117 static void accel_mlx5_recover_qp(struct accel_mlx5_qp *qp);
2118 
2119 static int
2120 accel_mlx5_recover_qp_poller(void *arg)
2121 {
2122 	struct accel_mlx5_qp *qp = arg;
2123 
2124 	spdk_poller_unregister(&qp->recover_poller);
2125 	accel_mlx5_recover_qp(qp);
2126 	return SPDK_POLLER_BUSY;
2127 }
2128 
2129 static void
2130 accel_mlx5_recover_qp(struct accel_mlx5_qp *qp)
2131 {
2132 	struct accel_mlx5_dev *dev = qp->dev;
2133 	struct spdk_mlx5_qp_attr mlx5_qp_attr = {};
2134 	int rc;
2135 
2136 	SPDK_NOTICELOG("Recovering qp %p, core %u\n", qp, spdk_env_get_current_core());
2137 	if (qp->qp) {
2138 		spdk_mlx5_qp_destroy(qp->qp);
2139 		qp->qp = NULL;
2140 	}
2141 
2142 	mlx5_qp_attr.cap.max_send_wr = g_accel_mlx5.attr.qp_size;
2143 	mlx5_qp_attr.cap.max_recv_wr = 0;
2144 	mlx5_qp_attr.cap.max_send_sge = ACCEL_MLX5_MAX_SGE;
2145 	mlx5_qp_attr.cap.max_inline_data = sizeof(struct ibv_sge) * ACCEL_MLX5_MAX_SGE;
2146 
2147 	rc = spdk_mlx5_qp_create(dev->dev_ctx->pd, dev->cq, &mlx5_qp_attr, &qp->qp);
2148 	if (rc) {
2149 		SPDK_ERRLOG("Failed to create mlx5 dma QP, rc %d. Retry in %d usec\n",
2150 			    rc, ACCEL_MLX5_RECOVER_POLLER_PERIOD_US);
2151 		qp->recover_poller = SPDK_POLLER_REGISTER(accel_mlx5_recover_qp_poller, qp,
2152 				     ACCEL_MLX5_RECOVER_POLLER_PERIOD_US);
2153 		return;
2154 	}
2155 
2156 	qp->recovering = false;
2157 }
2158 
2159 static inline void
2160 accel_mlx5_process_error_cpl(struct spdk_mlx5_cq_completion *wc, struct accel_mlx5_task *task)
2161 {
2162 	struct accel_mlx5_qp *qp = task->qp;
2163 
2164 	if (wc->status != IBV_WC_WR_FLUSH_ERR) {
2165 		SPDK_WARNLOG("RDMA: qp %p, task %p, WC status %d, core %u\n",
2166 			     qp, task, wc->status, spdk_env_get_current_core());
2167 	} else {
2168 		SPDK_DEBUGLOG(accel_mlx5,
2169 			      "RDMA: qp %p, task %p, WC status %d, core %u\n",
2170 			      qp, task, wc->status, spdk_env_get_current_core());
2171 	}
2172 
2173 	qp->recovering = true;
2174 	assert(task->num_completed_reqs <= task->num_submitted_reqs);
2175 	if (task->num_completed_reqs == task->num_submitted_reqs) {
2176 		STAILQ_REMOVE_HEAD(&qp->in_hw, link);
2177 		accel_mlx5_task_fail(task, -EIO);
2178 	}
2179 }
2180 
2181 static inline int64_t
2182 accel_mlx5_poll_cq(struct accel_mlx5_dev *dev)
2183 {
2184 	struct spdk_mlx5_cq_completion wc[ACCEL_MLX5_MAX_WC];
2185 	struct accel_mlx5_task *task;
2186 	struct accel_mlx5_qp *qp;
2187 	int reaped, i, rc;
2188 	uint16_t completed;
2189 
2190 	dev->stats.polls++;
2191 	reaped = spdk_mlx5_cq_poll_completions(dev->cq, wc, ACCEL_MLX5_MAX_WC);
2192 	if (spdk_unlikely(reaped < 0)) {
2193 		SPDK_ERRLOG("Error polling CQ! (%d): %s\n", errno, spdk_strerror(errno));
2194 		return reaped;
2195 	} else if (reaped == 0) {
2196 		dev->stats.idle_polls++;
2197 		return 0;
2198 	}
2199 	dev->stats.completions += reaped;
2200 
2201 	SPDK_DEBUGLOG(accel_mlx5, "Reaped %d cpls on dev %s\n", reaped,
2202 		      dev->dev_ctx->context->device->name);
2203 
2204 	for (i = 0; i < reaped; i++) {
2205 		if (spdk_unlikely(!wc[i].wr_id)) {
2206 			/* Unsignaled completion with error, ignore */
2207 			continue;
2208 		}
2209 		task = (struct accel_mlx5_task *)wc[i].wr_id;
2210 		qp = task->qp;
2211 		assert(task == STAILQ_FIRST(&qp->in_hw) && "submission mismatch");
2212 		assert(task->num_submitted_reqs > task->num_completed_reqs);
2213 		completed = task->num_submitted_reqs - task->num_completed_reqs;
2214 		assert((uint32_t)task->num_completed_reqs + completed <= UINT16_MAX);
2215 		task->num_completed_reqs += completed;
2216 		assert(qp->wrs_submitted >= task->num_wrs);
2217 		qp->wrs_submitted -= task->num_wrs;
2218 		assert(dev->wrs_in_cq > 0);
2219 		dev->wrs_in_cq--;
2220 
2221 		if (spdk_unlikely(wc[i].status)) {
2222 			accel_mlx5_process_error_cpl(&wc[i], task);
2223 			if (qp->wrs_submitted == 0) {
2224 				assert(STAILQ_EMPTY(&qp->in_hw));
2225 				accel_mlx5_recover_qp(qp);
2226 			}
2227 			continue;
2228 		}
2229 
2230 		SPDK_DEBUGLOG(accel_mlx5, "task %p, remaining %u\n", task,
2231 			      task->num_reqs - task->num_completed_reqs);
2232 		if (task->num_completed_reqs == task->num_reqs) {
2233 			STAILQ_REMOVE_HEAD(&qp->in_hw, link);
2234 			accel_mlx5_task_complete(task);
2235 		} else {
2236 			assert(task->num_submitted_reqs < task->num_reqs);
2237 			assert(task->num_completed_reqs == task->num_submitted_reqs);
2238 			STAILQ_REMOVE_HEAD(&qp->in_hw, link);
2239 			rc = accel_mlx5_task_continue(task);
2240 			if (spdk_unlikely(rc)) {
2241 				if (rc != -ENOMEM) {
2242 					accel_mlx5_task_fail(task, rc);
2243 				}
2244 			}
2245 		}
2246 	}
2247 
2248 	return reaped;
2249 }
2250 
2251 static inline void
2252 accel_mlx5_resubmit_nomem_tasks(struct accel_mlx5_dev *dev)
2253 {
2254 	struct accel_mlx5_task *task, *tmp, *last;
2255 	int rc;
2256 
2257 	last = STAILQ_LAST(&dev->nomem, accel_mlx5_task, link);
2258 	STAILQ_FOREACH_SAFE(task, &dev->nomem, link, tmp) {
2259 		STAILQ_REMOVE_HEAD(&dev->nomem, link);
2260 		rc = accel_mlx5_task_continue(task);
2261 		if (spdk_unlikely(rc)) {
2262 			if (rc != -ENOMEM) {
2263 				accel_mlx5_task_fail(task, rc);
2264 			}
2265 			break;
2266 		}
2267 		/* If qpair is recovering, task is added back to the nomem list and 0 is returned. In that case we
2268 		 * need a special condition to iterate the list once and stop this FOREACH loop */
2269 		if (task == last) {
2270 			break;
2271 		}
2272 	}
2273 }
2274 
2275 static int
2276 accel_mlx5_poller(void *ctx)
2277 {
2278 	struct accel_mlx5_io_channel *ch = ctx;
2279 	struct accel_mlx5_dev *dev;
2280 
2281 	int64_t completions = 0, rc;
2282 	uint32_t i;
2283 
2284 	/* reaped completions may register a post poller handler, that makes no sense in the scope of our own poller */
2285 	ch->poller_handler_registered = true;
2286 	for (i = 0; i < ch->num_devs; i++) {
2287 		dev = &ch->devs[i];
2288 		if (dev->wrs_in_cq) {
2289 			rc = accel_mlx5_poll_cq(dev);
2290 			if (spdk_unlikely(rc < 0)) {
2291 				SPDK_ERRLOG("Error %"PRId64" on CQ, dev %s\n", rc, dev->dev_ctx->context->device->name);
2292 			}
2293 			completions += rc;
2294 			if (dev->qp.ring_db) {
2295 				spdk_mlx5_qp_complete_send(dev->qp.qp);
2296 				dev->qp.ring_db = false;
2297 			}
2298 		}
2299 		if (!STAILQ_EMPTY(&dev->nomem)) {
2300 			accel_mlx5_resubmit_nomem_tasks(dev);
2301 		}
2302 	}
2303 	ch->poller_handler_registered = false;
2304 
2305 	return !!completions;
2306 }
2307 
2308 static bool
2309 accel_mlx5_supports_opcode(enum spdk_accel_opcode opc)
2310 {
2311 	assert(g_accel_mlx5.enabled);
2312 
2313 	switch (opc) {
2314 	case SPDK_ACCEL_OPC_COPY:
2315 		return true;
2316 	case SPDK_ACCEL_OPC_ENCRYPT:
2317 	case SPDK_ACCEL_OPC_DECRYPT:
2318 		return g_accel_mlx5.crypto_supported;
2319 	case SPDK_ACCEL_OPC_CRC32C:
2320 	case SPDK_ACCEL_OPC_COPY_CRC32C:
2321 		return g_accel_mlx5.crc32c_supported;
2322 	default:
2323 		return false;
2324 	}
2325 }
2326 
2327 static struct spdk_io_channel *
2328 accel_mlx5_get_io_channel(void)
2329 {
2330 	assert(g_accel_mlx5.enabled);
2331 	return spdk_get_io_channel(&g_accel_mlx5);
2332 }
2333 
2334 static int
2335 accel_mlx5_create_qp(struct accel_mlx5_dev *dev, struct accel_mlx5_qp *qp)
2336 {
2337 	struct spdk_mlx5_qp_attr mlx5_qp_attr = {};
2338 	int rc;
2339 
2340 	mlx5_qp_attr.cap.max_send_wr = g_accel_mlx5.attr.qp_size;
2341 	mlx5_qp_attr.cap.max_recv_wr = 0;
2342 	mlx5_qp_attr.cap.max_send_sge = ACCEL_MLX5_MAX_SGE;
2343 	mlx5_qp_attr.cap.max_inline_data = sizeof(struct ibv_sge) * ACCEL_MLX5_MAX_SGE;
2344 
2345 	rc = spdk_mlx5_qp_create(dev->dev_ctx->pd, dev->cq, &mlx5_qp_attr, &qp->qp);
2346 	if (rc) {
2347 		return rc;
2348 	}
2349 
2350 	STAILQ_INIT(&qp->in_hw);
2351 	qp->dev = dev;
2352 	qp->verbs_qp = spdk_mlx5_qp_get_verbs_qp(qp->qp);
2353 	assert(qp->verbs_qp);
2354 	qp->wrs_max = g_accel_mlx5.attr.qp_size;
2355 
2356 	return 0;
2357 }
2358 
2359 static void
2360 accel_mlx5_add_stats(struct accel_mlx5_stats *stats, const struct accel_mlx5_stats *to_add)
2361 {
2362 	int i;
2363 
2364 	stats->crypto_umrs += to_add->crypto_umrs;
2365 	stats->sig_umrs += to_add->sig_umrs;
2366 	stats->rdma_reads += to_add->rdma_reads;
2367 	stats->rdma_writes += to_add->rdma_writes;
2368 	stats->polls += to_add->polls;
2369 	stats->idle_polls += to_add->idle_polls;
2370 	stats->completions += to_add->completions;
2371 	stats->nomem_qdepth += to_add->nomem_qdepth;
2372 	stats->nomem_mkey += to_add->nomem_mkey;
2373 	for (i = 0; i < ACCEL_MLX5_OPC_LAST; i++) {
2374 		stats->opcodes[i] += to_add->opcodes[i];
2375 	}
2376 }
2377 
2378 static void
2379 accel_mlx5_destroy_cb(void *io_device, void *ctx_buf)
2380 {
2381 	struct accel_mlx5_io_channel *ch = ctx_buf;
2382 	struct accel_mlx5_dev *dev;
2383 	uint32_t i;
2384 
2385 	spdk_poller_unregister(&ch->poller);
2386 	for (i = 0; i < ch->num_devs; i++) {
2387 		dev = &ch->devs[i];
2388 		spdk_mlx5_qp_destroy(dev->qp.qp);
2389 		if (dev->cq) {
2390 			spdk_mlx5_cq_destroy(dev->cq);
2391 		}
2392 		spdk_poller_unregister(&dev->qp.recover_poller);
2393 		if (dev->mkeys) {
2394 			spdk_mlx5_mkey_pool_put_ref(dev->mkeys);
2395 		}
2396 		if (dev->crypto_mkeys) {
2397 			spdk_mlx5_mkey_pool_put_ref(dev->crypto_mkeys);
2398 		}
2399 		if (dev->sig_mkeys) {
2400 			spdk_mlx5_mkey_pool_put_ref(dev->sig_mkeys);
2401 		}
2402 		spdk_rdma_utils_free_mem_map(&dev->mmap);
2403 		spdk_spin_lock(&g_accel_mlx5.lock);
2404 		accel_mlx5_add_stats(&g_accel_mlx5.stats, &dev->stats);
2405 		spdk_spin_unlock(&g_accel_mlx5.lock);
2406 	}
2407 	free(ch->devs);
2408 }
2409 
2410 static int
2411 accel_mlx5_create_cb(void *io_device, void *ctx_buf)
2412 {
2413 	struct spdk_mlx5_cq_attr cq_attr = {};
2414 	struct accel_mlx5_io_channel *ch = ctx_buf;
2415 	struct accel_mlx5_dev_ctx *dev_ctx;
2416 	struct accel_mlx5_dev *dev;
2417 	uint32_t i;
2418 	int rc;
2419 
2420 	ch->devs = calloc(g_accel_mlx5.num_ctxs, sizeof(*ch->devs));
2421 	if (!ch->devs) {
2422 		SPDK_ERRLOG("Memory allocation failed\n");
2423 		return -ENOMEM;
2424 	}
2425 
2426 	for (i = 0; i < g_accel_mlx5.num_ctxs; i++) {
2427 		dev_ctx = &g_accel_mlx5.dev_ctxs[i];
2428 		dev = &ch->devs[i];
2429 		dev->dev_ctx = dev_ctx;
2430 
2431 		assert(dev_ctx->mkeys);
2432 		dev->mkeys = spdk_mlx5_mkey_pool_get_ref(dev_ctx->pd, 0);
2433 		if (!dev->mkeys) {
2434 			SPDK_ERRLOG("Failed to get mkey pool channel, dev %s\n", dev_ctx->context->device->name);
2435 			/* Should not happen since mkey pool is created on accel_mlx5 initialization.
2436 			 * We should not be here if pool creation failed */
2437 			assert(0);
2438 			goto err_out;
2439 		}
2440 
2441 		if (dev_ctx->crypto_mkeys) {
2442 			dev->crypto_mkeys = spdk_mlx5_mkey_pool_get_ref(dev_ctx->pd, SPDK_MLX5_MKEY_POOL_FLAG_CRYPTO);
2443 			if (!dev->crypto_mkeys) {
2444 				SPDK_ERRLOG("Failed to get crypto mkey pool channel, dev %s\n", dev_ctx->context->device->name);
2445 				/* Should not happen since mkey pool is created on accel_mlx5 initialization.
2446 				 * We should not be here if pool creation failed */
2447 				assert(0);
2448 				goto err_out;
2449 			}
2450 		}
2451 		if (dev_ctx->sig_mkeys) {
2452 			dev->sig_mkeys = spdk_mlx5_mkey_pool_get_ref(dev_ctx->pd, SPDK_MLX5_MKEY_POOL_FLAG_SIGNATURE);
2453 			if (!dev->sig_mkeys) {
2454 				SPDK_ERRLOG("Failed to get sig mkey pool channel, dev %s\n", dev_ctx->context->device->name);
2455 				/* Should not happen since mkey pool is created on accel_mlx5 initialization.
2456 				 * We should not be here if pool creation failed */
2457 				assert(0);
2458 				goto err_out;
2459 			}
2460 		}
2461 
2462 		memset(&cq_attr, 0, sizeof(cq_attr));
2463 		cq_attr.cqe_cnt = g_accel_mlx5.attr.qp_size;
2464 		cq_attr.cqe_size = 64;
2465 		cq_attr.cq_context = dev;
2466 
2467 		ch->num_devs++;
2468 		rc = spdk_mlx5_cq_create(dev_ctx->pd, &cq_attr, &dev->cq);
2469 		if (rc) {
2470 			SPDK_ERRLOG("Failed to create mlx5 CQ, rc %d\n", rc);
2471 			goto err_out;
2472 		}
2473 
2474 		rc = accel_mlx5_create_qp(dev, &dev->qp);
2475 		if (rc) {
2476 			SPDK_ERRLOG("Failed to create mlx5 QP, rc %d\n", rc);
2477 			goto err_out;
2478 		}
2479 
2480 		dev->mmap = spdk_rdma_utils_create_mem_map(dev_ctx->pd, NULL,
2481 				IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_READ | IBV_ACCESS_REMOTE_WRITE);
2482 		if (!dev->mmap) {
2483 			SPDK_ERRLOG("Failed to create memory map\n");
2484 			rc = -ENOMEM;
2485 			goto err_out;
2486 		}
2487 		dev->crypto_multi_block = dev_ctx->crypto_multi_block;
2488 		dev->crypto_split_blocks = dev_ctx->crypto_multi_block ? g_accel_mlx5.attr.crypto_split_blocks : 0;
2489 		dev->wrs_in_cq_max = g_accel_mlx5.attr.qp_size;
2490 		dev->ch = spdk_io_channel_from_ctx(ctx_buf);
2491 		STAILQ_INIT(&dev->nomem);
2492 	}
2493 
2494 	ch->poller = SPDK_POLLER_REGISTER(accel_mlx5_poller, ch, 0);
2495 
2496 	return 0;
2497 
2498 err_out:
2499 	accel_mlx5_destroy_cb(&g_accel_mlx5, ctx_buf);
2500 	return rc;
2501 }
2502 
2503 void
2504 accel_mlx5_get_default_attr(struct accel_mlx5_attr *attr)
2505 {
2506 	assert(attr);
2507 
2508 	attr->qp_size = ACCEL_MLX5_QP_SIZE;
2509 	attr->num_requests = ACCEL_MLX5_NUM_REQUESTS;
2510 	attr->allowed_devs = NULL;
2511 	attr->crypto_split_blocks = 0;
2512 	attr->enable_driver = false;
2513 }
2514 
2515 static void
2516 accel_mlx5_allowed_devs_free(void)
2517 {
2518 	size_t i;
2519 
2520 	if (!g_accel_mlx5.allowed_devs) {
2521 		return;
2522 	}
2523 
2524 	for (i = 0; i < g_accel_mlx5.allowed_devs_count; i++) {
2525 		free(g_accel_mlx5.allowed_devs[i]);
2526 	}
2527 	free(g_accel_mlx5.attr.allowed_devs);
2528 	free(g_accel_mlx5.allowed_devs);
2529 	g_accel_mlx5.attr.allowed_devs = NULL;
2530 	g_accel_mlx5.allowed_devs = NULL;
2531 	g_accel_mlx5.allowed_devs_count = 0;
2532 }
2533 
2534 static int
2535 accel_mlx5_allowed_devs_parse(const char *allowed_devs)
2536 {
2537 	char *str, *tmp, *tok, *sp = NULL;
2538 	size_t devs_count = 0;
2539 
2540 	str = strdup(allowed_devs);
2541 	if (!str) {
2542 		return -ENOMEM;
2543 	}
2544 
2545 	accel_mlx5_allowed_devs_free();
2546 
2547 	tmp = str;
2548 	while ((tmp = strchr(tmp, ',')) != NULL) {
2549 		tmp++;
2550 		devs_count++;
2551 	}
2552 	devs_count++;
2553 
2554 	g_accel_mlx5.allowed_devs = calloc(devs_count, sizeof(char *));
2555 	if (!g_accel_mlx5.allowed_devs) {
2556 		free(str);
2557 		return -ENOMEM;
2558 	}
2559 
2560 	devs_count = 0;
2561 	tok = strtok_r(str, ",", &sp);
2562 	while (tok) {
2563 		g_accel_mlx5.allowed_devs[devs_count] = strdup(tok);
2564 		if (!g_accel_mlx5.allowed_devs[devs_count]) {
2565 			free(str);
2566 			accel_mlx5_allowed_devs_free();
2567 			return -ENOMEM;
2568 		}
2569 		tok = strtok_r(NULL, ",", &sp);
2570 		devs_count++;
2571 		g_accel_mlx5.allowed_devs_count++;
2572 	}
2573 
2574 	free(str);
2575 
2576 	return 0;
2577 }
2578 
2579 int
2580 accel_mlx5_enable(struct accel_mlx5_attr *attr)
2581 {
2582 	int rc;
2583 
2584 	if (g_accel_mlx5.enabled) {
2585 		return -EEXIST;
2586 	}
2587 	if (attr) {
2588 		if (attr->num_requests / spdk_env_get_core_count() < ACCEL_MLX5_MAX_MKEYS_IN_TASK) {
2589 			SPDK_ERRLOG("num requests per core must not be less than %u, current value %u\n",
2590 				    ACCEL_MLX5_MAX_MKEYS_IN_TASK, attr->num_requests / spdk_env_get_core_count());
2591 			return -EINVAL;
2592 		}
2593 		if (attr->qp_size < 8) {
2594 			SPDK_ERRLOG("qp_size must be at least 8\n");
2595 			return -EINVAL;
2596 		}
2597 		g_accel_mlx5.attr = *attr;
2598 		g_accel_mlx5.attr.allowed_devs = NULL;
2599 
2600 		if (attr->allowed_devs) {
2601 			/* Contains a copy of user's string */
2602 			g_accel_mlx5.attr.allowed_devs = strndup(attr->allowed_devs, ACCEL_MLX5_ALLOWED_DEVS_MAX_LEN);
2603 			if (!g_accel_mlx5.attr.allowed_devs) {
2604 				return -ENOMEM;
2605 			}
2606 			rc = accel_mlx5_allowed_devs_parse(g_accel_mlx5.attr.allowed_devs);
2607 			if (rc) {
2608 				return rc;
2609 			}
2610 			rc = spdk_mlx5_crypto_devs_allow((const char *const *)g_accel_mlx5.allowed_devs,
2611 							 g_accel_mlx5.allowed_devs_count);
2612 			if (rc) {
2613 				accel_mlx5_allowed_devs_free();
2614 				return rc;
2615 			}
2616 		}
2617 	} else {
2618 		accel_mlx5_get_default_attr(&g_accel_mlx5.attr);
2619 	}
2620 
2621 	g_accel_mlx5.enabled = true;
2622 	spdk_accel_module_list_add(&g_accel_mlx5.module);
2623 
2624 	return 0;
2625 }
2626 
2627 static void
2628 accel_mlx5_psvs_release(struct accel_mlx5_dev_ctx *dev_ctx)
2629 {
2630 	uint32_t i, num_psvs, num_psvs_in_pool;
2631 
2632 	if (!dev_ctx->psvs) {
2633 		return;
2634 	}
2635 
2636 	num_psvs = g_accel_mlx5.attr.num_requests;
2637 
2638 	for (i = 0; i < num_psvs; i++) {
2639 		if (dev_ctx->psvs[i]) {
2640 			spdk_mlx5_destroy_psv(dev_ctx->psvs[i]);
2641 			dev_ctx->psvs[i] = NULL;
2642 		}
2643 	}
2644 	free(dev_ctx->psvs);
2645 
2646 	if (!dev_ctx->psv_pool) {
2647 		return;
2648 	}
2649 	num_psvs_in_pool = spdk_mempool_count(dev_ctx->psv_pool);
2650 	if (num_psvs_in_pool != num_psvs) {
2651 		SPDK_ERRLOG("Expected %u reqs in the pool, but got only %u\n", num_psvs, num_psvs_in_pool);
2652 	}
2653 	spdk_mempool_free(dev_ctx->psv_pool);
2654 }
2655 
2656 static void
2657 accel_mlx5_free_resources(void)
2658 {
2659 	struct accel_mlx5_dev_ctx *dev_ctx;
2660 	uint32_t i;
2661 
2662 	for (i = 0; i < g_accel_mlx5.num_ctxs; i++) {
2663 		dev_ctx = &g_accel_mlx5.dev_ctxs[i];
2664 		accel_mlx5_psvs_release(dev_ctx);
2665 		if (dev_ctx->pd) {
2666 			if (dev_ctx->mkeys) {
2667 				spdk_mlx5_mkey_pool_destroy(0, dev_ctx->pd);
2668 			}
2669 			if (dev_ctx->crypto_mkeys) {
2670 				spdk_mlx5_mkey_pool_destroy(SPDK_MLX5_MKEY_POOL_FLAG_CRYPTO, dev_ctx->pd);
2671 			}
2672 			if (dev_ctx->sig_mkeys) {
2673 				spdk_mlx5_mkey_pool_destroy(SPDK_MLX5_MKEY_POOL_FLAG_SIGNATURE, dev_ctx->pd);
2674 			}
2675 			spdk_rdma_utils_put_pd(dev_ctx->pd);
2676 		}
2677 		if (dev_ctx->domain) {
2678 			spdk_rdma_utils_put_memory_domain(dev_ctx->domain);
2679 		}
2680 	}
2681 
2682 	free(g_accel_mlx5.dev_ctxs);
2683 	g_accel_mlx5.dev_ctxs = NULL;
2684 	g_accel_mlx5.initialized = false;
2685 }
2686 
2687 static void
2688 accel_mlx5_deinit_cb(void *ctx)
2689 {
2690 	accel_mlx5_free_resources();
2691 	spdk_spin_destroy(&g_accel_mlx5.lock);
2692 	spdk_mlx5_umr_implementer_register(false);
2693 	spdk_accel_module_finish();
2694 }
2695 
2696 static void
2697 accel_mlx5_deinit(void *ctx)
2698 {
2699 	if (g_accel_mlx5.allowed_devs) {
2700 		accel_mlx5_allowed_devs_free();
2701 	}
2702 	spdk_mlx5_crypto_devs_allow(NULL, 0);
2703 	if (g_accel_mlx5.initialized) {
2704 		spdk_io_device_unregister(&g_accel_mlx5, accel_mlx5_deinit_cb);
2705 	} else {
2706 		spdk_accel_module_finish();
2707 	}
2708 }
2709 
2710 static int
2711 accel_mlx5_mkeys_create(struct ibv_pd *pd, uint32_t num_mkeys, uint32_t flags)
2712 {
2713 	struct spdk_mlx5_mkey_pool_param pool_param = {};
2714 
2715 	pool_param.mkey_count = num_mkeys;
2716 	pool_param.cache_per_thread = num_mkeys * 3 / 4 / spdk_env_get_core_count();
2717 	pool_param.flags = flags;
2718 
2719 	return spdk_mlx5_mkey_pool_init(&pool_param, pd);
2720 }
2721 
2722 static void
2723 accel_mlx5_set_psv_in_pool(struct spdk_mempool *mp, void *cb_arg, void *_psv, unsigned obj_idx)
2724 {
2725 	struct spdk_rdma_utils_memory_translation translation = {};
2726 	struct accel_mlx5_psv_pool_iter_cb_args *args = cb_arg;
2727 	struct accel_mlx5_psv_wrapper *wrapper = _psv;
2728 	struct accel_mlx5_dev_ctx *dev_ctx = args->dev;
2729 	int rc;
2730 
2731 	if (args->rc) {
2732 		return;
2733 	}
2734 	assert(obj_idx < g_accel_mlx5.attr.num_requests);
2735 	assert(dev_ctx->psvs[obj_idx] != NULL);
2736 	memset(wrapper, 0, sizeof(*wrapper));
2737 	wrapper->psv_index = dev_ctx->psvs[obj_idx]->index;
2738 
2739 	rc = spdk_rdma_utils_get_translation(args->map, &wrapper->crc, sizeof(uint32_t), &translation);
2740 	if (rc) {
2741 		SPDK_ERRLOG("Memory translation failed, addr %p, length %zu\n", &wrapper->crc, sizeof(uint32_t));
2742 		args->rc = -EINVAL;
2743 	} else {
2744 		wrapper->crc_lkey = spdk_rdma_utils_memory_translation_get_lkey(&translation);
2745 	}
2746 }
2747 
2748 static int
2749 accel_mlx5_psvs_create(struct accel_mlx5_dev_ctx *dev_ctx)
2750 {
2751 	struct accel_mlx5_psv_pool_iter_cb_args args = {
2752 		.dev = dev_ctx
2753 	};
2754 	char pool_name[32];
2755 	uint32_t i;
2756 	uint32_t num_psvs = g_accel_mlx5.attr.num_requests;
2757 	uint32_t cache_size;
2758 	int rc;
2759 
2760 	dev_ctx->psvs = calloc(num_psvs, (sizeof(struct spdk_mlx5_psv *)));
2761 	if (!dev_ctx->psvs) {
2762 		SPDK_ERRLOG("Failed to alloc PSVs array\n");
2763 		return -ENOMEM;
2764 	}
2765 	for (i = 0; i < num_psvs; i++) {
2766 		dev_ctx->psvs[i] = spdk_mlx5_create_psv(dev_ctx->pd);
2767 		if (!dev_ctx->psvs[i]) {
2768 			SPDK_ERRLOG("Failed to create PSV on dev %s\n", dev_ctx->context->device->name);
2769 			return -EINVAL;
2770 		}
2771 	}
2772 
2773 	rc = snprintf(pool_name, sizeof(pool_name), "accel_psv_%s", dev_ctx->context->device->name);
2774 	if (rc < 0) {
2775 		assert(0);
2776 		return -EINVAL;
2777 	}
2778 	cache_size = num_psvs * 3 / 4 / spdk_env_get_core_count();
2779 	args.map = spdk_rdma_utils_create_mem_map(dev_ctx->pd, NULL,
2780 			IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_READ | IBV_ACCESS_REMOTE_WRITE);
2781 	if (!args.map) {
2782 		return -ENOMEM;
2783 	}
2784 	dev_ctx->psv_pool = spdk_mempool_create_ctor(pool_name, num_psvs,
2785 			    sizeof(struct accel_mlx5_psv_wrapper),
2786 			    cache_size, SPDK_ENV_SOCKET_ID_ANY,
2787 			    accel_mlx5_set_psv_in_pool, &args);
2788 	spdk_rdma_utils_free_mem_map(&args.map);
2789 	if (!dev_ctx->psv_pool) {
2790 		SPDK_ERRLOG("Failed to create PSV memory pool\n");
2791 		return -ENOMEM;
2792 	}
2793 	if (args.rc) {
2794 		SPDK_ERRLOG("Failed to init PSV memory pool objects, rc %d\n", args.rc);
2795 		return args.rc;
2796 	}
2797 
2798 	return 0;
2799 }
2800 
2801 
2802 static int
2803 accel_mlx5_dev_ctx_init(struct accel_mlx5_dev_ctx *dev_ctx, struct ibv_context *dev,
2804 			struct spdk_mlx5_device_caps *caps)
2805 {
2806 	struct ibv_pd *pd;
2807 	int rc;
2808 
2809 	pd = spdk_rdma_utils_get_pd(dev);
2810 	if (!pd) {
2811 		SPDK_ERRLOG("Failed to get PD for context %p, dev %s\n", dev, dev->device->name);
2812 		return -EINVAL;
2813 	}
2814 	dev_ctx->context = dev;
2815 	dev_ctx->pd = pd;
2816 	dev_ctx->domain = spdk_rdma_utils_get_memory_domain(pd);
2817 	if (!dev_ctx->domain) {
2818 		return -ENOMEM;
2819 	}
2820 
2821 	rc = accel_mlx5_mkeys_create(pd, g_accel_mlx5.attr.num_requests, 0);
2822 	if (rc) {
2823 		SPDK_ERRLOG("Failed to create mkeys pool, rc %d, dev %s\n", rc, dev->device->name);
2824 		return rc;
2825 	}
2826 	dev_ctx->mkeys = true;
2827 
2828 	if (g_accel_mlx5.crypto_supported) {
2829 		dev_ctx->crypto_multi_block = caps->crypto.multi_block_be_tweak;
2830 		if (!dev_ctx->crypto_multi_block && g_accel_mlx5.attr.crypto_split_blocks) {
2831 			SPDK_WARNLOG("\"crypto_split_blocks\" is set but dev %s doesn't support multi block crypto\n",
2832 				     dev->device->name);
2833 		}
2834 		rc = accel_mlx5_mkeys_create(pd, g_accel_mlx5.attr.num_requests, SPDK_MLX5_MKEY_POOL_FLAG_CRYPTO);
2835 		if (rc) {
2836 			SPDK_ERRLOG("Failed to create crypto mkeys pool, rc %d, dev %s\n", rc, dev->device->name);
2837 			return rc;
2838 		}
2839 		dev_ctx->crypto_mkeys = true;
2840 	}
2841 	if (g_accel_mlx5.crc32c_supported) {
2842 		rc = accel_mlx5_mkeys_create(pd, g_accel_mlx5.attr.num_requests,
2843 					     SPDK_MLX5_MKEY_POOL_FLAG_SIGNATURE);
2844 		if (rc) {
2845 			SPDK_ERRLOG("Failed to create signature mkeys pool, rc %d, dev %s\n", rc, dev->device->name);
2846 			return rc;
2847 		}
2848 		dev_ctx->sig_mkeys = true;
2849 		rc = accel_mlx5_psvs_create(dev_ctx);
2850 		if (rc) {
2851 			SPDK_ERRLOG("Failed to create PSVs pool, rc %d, dev %s\n", rc, dev->device->name);
2852 			return rc;
2853 		}
2854 	}
2855 
2856 	return 0;
2857 }
2858 
2859 static struct ibv_context **
2860 accel_mlx5_get_devices(int *_num_devs)
2861 {
2862 	struct ibv_context **rdma_devs, **rdma_devs_out = NULL, *dev;
2863 	struct ibv_device_attr dev_attr;
2864 	size_t j;
2865 	int num_devs = 0, i, rc;
2866 	int num_devs_out = 0;
2867 	bool dev_allowed;
2868 
2869 	rdma_devs = rdma_get_devices(&num_devs);
2870 	if (!rdma_devs || !num_devs) {
2871 		*_num_devs = 0;
2872 		return NULL;
2873 	}
2874 
2875 	rdma_devs_out = calloc(num_devs + 1, sizeof(struct ibv_context *));
2876 	if (!rdma_devs_out) {
2877 		SPDK_ERRLOG("Memory allocation failed\n");
2878 		rdma_free_devices(rdma_devs);
2879 		*_num_devs = 0;
2880 		return NULL;
2881 	}
2882 
2883 	for (i = 0; i < num_devs; i++) {
2884 		dev = rdma_devs[i];
2885 		rc = ibv_query_device(dev, &dev_attr);
2886 		if (rc) {
2887 			SPDK_ERRLOG("Failed to query dev %s, skipping\n", dev->device->name);
2888 			continue;
2889 		}
2890 		if (dev_attr.vendor_id != SPDK_MLX5_VENDOR_ID_MELLANOX) {
2891 			SPDK_DEBUGLOG(accel_mlx5, "dev %s is not Mellanox device, skipping\n", dev->device->name);
2892 			continue;
2893 		}
2894 
2895 		if (g_accel_mlx5.allowed_devs_count) {
2896 			dev_allowed = false;
2897 			for (j = 0; j < g_accel_mlx5.allowed_devs_count; j++) {
2898 				if (strcmp(g_accel_mlx5.allowed_devs[j], dev->device->name) == 0) {
2899 					dev_allowed = true;
2900 					break;
2901 				}
2902 			}
2903 			if (!dev_allowed) {
2904 				continue;
2905 			}
2906 		}
2907 
2908 		rdma_devs_out[num_devs_out] = dev;
2909 		num_devs_out++;
2910 	}
2911 
2912 	rdma_free_devices(rdma_devs);
2913 	*_num_devs = num_devs_out;
2914 
2915 	return rdma_devs_out;
2916 }
2917 
2918 static inline bool
2919 accel_mlx5_dev_supports_crypto(struct spdk_mlx5_device_caps *caps)
2920 {
2921 	return caps->crypto_supported && !caps->crypto.wrapped_import_method_aes_xts &&
2922 	       (caps->crypto.single_block_le_tweak ||
2923 		caps->crypto.multi_block_le_tweak || caps->crypto.multi_block_be_tweak);
2924 }
2925 
2926 static int
2927 accel_mlx5_init(void)
2928 {
2929 	struct spdk_mlx5_device_caps *caps;
2930 	struct ibv_context **rdma_devs, *dev;
2931 	int num_devs = 0,  rc = 0, i;
2932 	int best_dev = -1, first_dev = 0;
2933 	int best_dev_stat = 0, dev_stat;
2934 	bool supports_crypto;
2935 	bool find_best_dev = g_accel_mlx5.allowed_devs_count == 0;
2936 
2937 	if (!g_accel_mlx5.enabled) {
2938 		return -EINVAL;
2939 	}
2940 
2941 	spdk_spin_init(&g_accel_mlx5.lock);
2942 	rdma_devs = accel_mlx5_get_devices(&num_devs);
2943 	if (!rdma_devs || !num_devs) {
2944 		return -ENODEV;
2945 	}
2946 	caps = calloc(num_devs, sizeof(*caps));
2947 	if (!caps) {
2948 		rc = -ENOMEM;
2949 		goto cleanup;
2950 	}
2951 
2952 	g_accel_mlx5.crypto_supported = true;
2953 	g_accel_mlx5.crc32c_supported = true;
2954 	g_accel_mlx5.num_ctxs = 0;
2955 
2956 	/* Iterate devices. We support an offload if all devices support it */
2957 	for (i = 0; i < num_devs; i++) {
2958 		dev = rdma_devs[i];
2959 
2960 		rc = spdk_mlx5_device_query_caps(dev, &caps[i]);
2961 		if (rc) {
2962 			SPDK_ERRLOG("Failed to get crypto caps, dev %s\n", dev->device->name);
2963 			goto cleanup;
2964 		}
2965 		supports_crypto = accel_mlx5_dev_supports_crypto(&caps[i]);
2966 		if (!supports_crypto) {
2967 			SPDK_DEBUGLOG(accel_mlx5, "Disable crypto support because dev %s doesn't support it\n",
2968 				      rdma_devs[i]->device->name);
2969 			g_accel_mlx5.crypto_supported = false;
2970 		}
2971 		if (!caps[i].crc32c_supported) {
2972 			SPDK_DEBUGLOG(accel_mlx5, "Disable crc32c support because dev %s doesn't support it\n",
2973 				      rdma_devs[i]->device->name);
2974 			g_accel_mlx5.crc32c_supported = false;
2975 		}
2976 		if (find_best_dev) {
2977 			/* Find device which supports max number of offloads */
2978 			dev_stat = (int)supports_crypto + (int)caps[i].crc32c_supported;
2979 			if (dev_stat > best_dev_stat) {
2980 				best_dev_stat = dev_stat;
2981 				best_dev = i;
2982 			}
2983 		}
2984 	}
2985 
2986 	/* User didn't specify devices to use, try to select the best one */
2987 	if (find_best_dev) {
2988 		if (best_dev == -1) {
2989 			best_dev = 0;
2990 		}
2991 		g_accel_mlx5.crypto_supported = accel_mlx5_dev_supports_crypto(&caps[best_dev]);
2992 		g_accel_mlx5.crc32c_supported = caps[best_dev].crc32c_supported;
2993 		SPDK_NOTICELOG("Select dev %s, crypto %d, crc32c %d\n", rdma_devs[best_dev]->device->name,
2994 			       g_accel_mlx5.crypto_supported, g_accel_mlx5.crc32c_supported);
2995 		first_dev = best_dev;
2996 		num_devs = 1;
2997 		if (g_accel_mlx5.crypto_supported) {
2998 			const char *const dev_name[] = { rdma_devs[best_dev]->device->name };
2999 			/* Let mlx5 library know which device to use */
3000 			spdk_mlx5_crypto_devs_allow(dev_name, 1);
3001 		}
3002 	} else {
3003 		SPDK_NOTICELOG("Found %d devices, crypto %d\n", num_devs, g_accel_mlx5.crypto_supported);
3004 	}
3005 
3006 	g_accel_mlx5.dev_ctxs = calloc(num_devs, sizeof(*g_accel_mlx5.dev_ctxs));
3007 	if (!g_accel_mlx5.dev_ctxs) {
3008 		SPDK_ERRLOG("Memory allocation failed\n");
3009 		rc = -ENOMEM;
3010 		goto cleanup;
3011 	}
3012 
3013 	for (i = first_dev; i < first_dev + num_devs; i++) {
3014 		rc = accel_mlx5_dev_ctx_init(&g_accel_mlx5.dev_ctxs[g_accel_mlx5.num_ctxs++],
3015 					     rdma_devs[i], &caps[i]);
3016 		if (rc) {
3017 			goto cleanup;
3018 		}
3019 	}
3020 
3021 	SPDK_NOTICELOG("Accel framework mlx5 initialized, found %d devices.\n", num_devs);
3022 	spdk_io_device_register(&g_accel_mlx5, accel_mlx5_create_cb, accel_mlx5_destroy_cb,
3023 				sizeof(struct accel_mlx5_io_channel), "accel_mlx5");
3024 	g_accel_mlx5.initialized = true;
3025 	free(rdma_devs);
3026 	free(caps);
3027 
3028 	if (g_accel_mlx5.attr.enable_driver) {
3029 		SPDK_NOTICELOG("Enabling mlx5 platform driver\n");
3030 		spdk_accel_driver_register(&g_accel_mlx5_driver);
3031 		spdk_accel_set_driver(g_accel_mlx5_driver.name);
3032 		spdk_mlx5_umr_implementer_register(true);
3033 	}
3034 
3035 	return 0;
3036 
3037 cleanup:
3038 	free(rdma_devs);
3039 	free(caps);
3040 	accel_mlx5_free_resources();
3041 	spdk_spin_destroy(&g_accel_mlx5.lock);
3042 
3043 	return rc;
3044 }
3045 
3046 static void
3047 accel_mlx5_write_config_json(struct spdk_json_write_ctx *w)
3048 {
3049 	if (g_accel_mlx5.enabled) {
3050 		spdk_json_write_object_begin(w);
3051 		spdk_json_write_named_string(w, "method", "mlx5_scan_accel_module");
3052 		spdk_json_write_named_object_begin(w, "params");
3053 		spdk_json_write_named_uint16(w, "qp_size", g_accel_mlx5.attr.qp_size);
3054 		spdk_json_write_named_uint32(w, "num_requests", g_accel_mlx5.attr.num_requests);
3055 		if (g_accel_mlx5.attr.allowed_devs) {
3056 			spdk_json_write_named_string(w, "allowed_devs", g_accel_mlx5.attr.allowed_devs);
3057 		}
3058 		spdk_json_write_named_uint16(w, "crypto_split_blocks", g_accel_mlx5.attr.crypto_split_blocks);
3059 		spdk_json_write_named_bool(w, "enable_driver", g_accel_mlx5.attr.enable_driver);
3060 		spdk_json_write_object_end(w);
3061 		spdk_json_write_object_end(w);
3062 	}
3063 }
3064 
3065 static size_t
3066 accel_mlx5_get_ctx_size(void)
3067 {
3068 	return sizeof(struct accel_mlx5_task);
3069 }
3070 
3071 static int
3072 accel_mlx5_crypto_key_init(struct spdk_accel_crypto_key *key)
3073 {
3074 	struct spdk_mlx5_crypto_dek_create_attr attr = {};
3075 	struct spdk_mlx5_crypto_keytag *keytag;
3076 	int rc;
3077 
3078 	if (!key || !key->key || !key->key2 || !key->key_size || !key->key2_size) {
3079 		return -EINVAL;
3080 	}
3081 
3082 	attr.dek = calloc(1, key->key_size + key->key2_size);
3083 	if (!attr.dek) {
3084 		return -ENOMEM;
3085 	}
3086 
3087 	memcpy(attr.dek, key->key, key->key_size);
3088 	memcpy(attr.dek + key->key_size, key->key2, key->key2_size);
3089 	attr.dek_len = key->key_size + key->key2_size;
3090 
3091 	rc = spdk_mlx5_crypto_keytag_create(&attr, &keytag);
3092 	spdk_memset_s(attr.dek, attr.dek_len, 0, attr.dek_len);
3093 	free(attr.dek);
3094 	if (rc) {
3095 		SPDK_ERRLOG("Failed to create a keytag, rc %d\n", rc);
3096 		return rc;
3097 	}
3098 
3099 	key->priv = keytag;
3100 
3101 	return 0;
3102 }
3103 
3104 static void
3105 accel_mlx5_crypto_key_deinit(struct spdk_accel_crypto_key *key)
3106 {
3107 	if (!key || key->module_if != &g_accel_mlx5.module || !key->priv) {
3108 		return;
3109 	}
3110 
3111 	spdk_mlx5_crypto_keytag_destroy(key->priv);
3112 }
3113 
3114 static void
3115 accel_mlx5_dump_stats_json(struct spdk_json_write_ctx *w, const char *header,
3116 			   const struct accel_mlx5_stats *stats)
3117 {
3118 	double idle_polls_percentage = 0;
3119 	double cpls_per_poll = 0;
3120 	uint64_t total_tasks = 0;
3121 	int i;
3122 
3123 	if (stats->polls) {
3124 		idle_polls_percentage = (double) stats->idle_polls * 100 / stats->polls;
3125 	}
3126 	if (stats->polls > stats->idle_polls) {
3127 		cpls_per_poll = (double) stats->completions / (stats->polls - stats->idle_polls);
3128 	}
3129 	for (i = 0; i < ACCEL_MLX5_OPC_LAST; i++) {
3130 		total_tasks += stats->opcodes[i];
3131 	}
3132 
3133 	spdk_json_write_named_object_begin(w, header);
3134 
3135 	spdk_json_write_named_object_begin(w, "umrs");
3136 	spdk_json_write_named_uint64(w, "crypto_umrs", stats->crypto_umrs);
3137 	spdk_json_write_named_uint64(w, "sig_umrs", stats->sig_umrs);
3138 	spdk_json_write_named_uint64(w, "total", stats->crypto_umrs + stats->sig_umrs);
3139 	spdk_json_write_object_end(w);
3140 
3141 	spdk_json_write_named_object_begin(w, "rdma");
3142 	spdk_json_write_named_uint64(w, "read", stats->rdma_reads);
3143 	spdk_json_write_named_uint64(w, "write", stats->rdma_writes);
3144 	spdk_json_write_named_uint64(w, "total", stats->rdma_reads + stats->rdma_writes);
3145 	spdk_json_write_object_end(w);
3146 
3147 	spdk_json_write_named_object_begin(w, "polling");
3148 	spdk_json_write_named_uint64(w, "polls", stats->polls);
3149 	spdk_json_write_named_uint64(w, "idle_polls", stats->idle_polls);
3150 	spdk_json_write_named_uint64(w, "completions", stats->completions);
3151 	spdk_json_write_named_double(w, "idle_polls_percentage", idle_polls_percentage);
3152 	spdk_json_write_named_double(w, "cpls_per_poll", cpls_per_poll);
3153 	spdk_json_write_named_uint64(w, "nomem_qdepth", stats->nomem_qdepth);
3154 	spdk_json_write_named_uint64(w, "nomem_mkey", stats->nomem_mkey);
3155 	spdk_json_write_object_end(w);
3156 
3157 	spdk_json_write_named_object_begin(w, "tasks");
3158 	spdk_json_write_named_uint64(w, "copy", stats->opcodes[ACCEL_MLX5_OPC_COPY]);
3159 	spdk_json_write_named_uint64(w, "crypto", stats->opcodes[ACCEL_MLX5_OPC_CRYPTO]);
3160 	spdk_json_write_named_uint64(w, "crypto_mkey", stats->opcodes[ACCEL_MLX5_OPC_CRYPTO_MKEY]);
3161 	spdk_json_write_named_uint64(w, "crc32c", stats->opcodes[ACCEL_MLX5_OPC_CRC32C]);
3162 	spdk_json_write_named_uint64(w, "total", total_tasks);
3163 	spdk_json_write_object_end(w);
3164 
3165 	spdk_json_write_object_end(w);
3166 }
3167 
3168 static void
3169 accel_mlx5_dump_channel_stat(struct spdk_io_channel_iter *i)
3170 {
3171 	struct accel_mlx5_stats ch_stat = {};
3172 	struct accel_mlx5_dump_stats_ctx *ctx;
3173 	struct spdk_io_channel *_ch;
3174 	struct accel_mlx5_io_channel *ch;
3175 	struct accel_mlx5_dev *dev;
3176 	uint32_t j;
3177 
3178 	ctx = spdk_io_channel_iter_get_ctx(i);
3179 	_ch = spdk_io_channel_iter_get_channel(i);
3180 	ch = spdk_io_channel_get_ctx(_ch);
3181 
3182 	if (ctx->level != ACCEL_MLX5_DUMP_STAT_LEVEL_TOTAL) {
3183 		spdk_json_write_object_begin(ctx->w);
3184 		spdk_json_write_named_object_begin(ctx->w, spdk_thread_get_name(spdk_get_thread()));
3185 	}
3186 	if (ctx->level == ACCEL_MLX5_DUMP_STAT_LEVEL_DEV) {
3187 		spdk_json_write_named_array_begin(ctx->w, "devices");
3188 	}
3189 
3190 	for (j = 0; j < ch->num_devs; j++) {
3191 		dev = &ch->devs[j];
3192 		/* Save grand total and channel stats */
3193 		accel_mlx5_add_stats(&ctx->total, &dev->stats);
3194 		accel_mlx5_add_stats(&ch_stat, &dev->stats);
3195 		if (ctx->level == ACCEL_MLX5_DUMP_STAT_LEVEL_DEV) {
3196 			spdk_json_write_object_begin(ctx->w);
3197 			accel_mlx5_dump_stats_json(ctx->w, dev->dev_ctx->context->device->name, &dev->stats);
3198 			spdk_json_write_object_end(ctx->w);
3199 		}
3200 	}
3201 
3202 	if (ctx->level == ACCEL_MLX5_DUMP_STAT_LEVEL_DEV) {
3203 		spdk_json_write_array_end(ctx->w);
3204 	}
3205 	if (ctx->level != ACCEL_MLX5_DUMP_STAT_LEVEL_TOTAL) {
3206 		accel_mlx5_dump_stats_json(ctx->w, "channel_total", &ch_stat);
3207 		spdk_json_write_object_end(ctx->w);
3208 		spdk_json_write_object_end(ctx->w);
3209 	}
3210 
3211 	spdk_for_each_channel_continue(i, 0);
3212 }
3213 
3214 static void
3215 accel_mlx5_dump_channel_stat_done(struct spdk_io_channel_iter *i, int status)
3216 {
3217 	struct accel_mlx5_dump_stats_ctx *ctx;
3218 
3219 	ctx = spdk_io_channel_iter_get_ctx(i);
3220 
3221 	spdk_spin_lock(&g_accel_mlx5.lock);
3222 	/* Add statistics from destroyed channels */
3223 	accel_mlx5_add_stats(&ctx->total, &g_accel_mlx5.stats);
3224 	spdk_spin_unlock(&g_accel_mlx5.lock);
3225 
3226 	if (ctx->level != ACCEL_MLX5_DUMP_STAT_LEVEL_TOTAL) {
3227 		/* channels[] */
3228 		spdk_json_write_array_end(ctx->w);
3229 	}
3230 
3231 	accel_mlx5_dump_stats_json(ctx->w, "total", &ctx->total);
3232 
3233 	/* Ends the whole response which was begun in accel_mlx5_dump_stats */
3234 	spdk_json_write_object_end(ctx->w);
3235 
3236 	ctx->cb(ctx->ctx, 0);
3237 	free(ctx);
3238 }
3239 
3240 int
3241 accel_mlx5_dump_stats(struct spdk_json_write_ctx *w, enum accel_mlx5_dump_state_level level,
3242 		      accel_mlx5_dump_stat_done_cb cb, void *ctx)
3243 {
3244 	struct accel_mlx5_dump_stats_ctx *stat_ctx;
3245 
3246 	if (!w || !cb) {
3247 		return -EINVAL;
3248 	}
3249 	if (!g_accel_mlx5.initialized) {
3250 		return -ENODEV;
3251 	}
3252 
3253 	stat_ctx = calloc(1, sizeof(*stat_ctx));
3254 	if (!stat_ctx) {
3255 		return -ENOMEM;
3256 	}
3257 	stat_ctx->cb = cb;
3258 	stat_ctx->ctx = ctx;
3259 	stat_ctx->level = level;
3260 	stat_ctx->w = w;
3261 
3262 	spdk_json_write_object_begin(w);
3263 
3264 	if (level != ACCEL_MLX5_DUMP_STAT_LEVEL_TOTAL) {
3265 		spdk_json_write_named_array_begin(w, "channels");
3266 	}
3267 
3268 	spdk_for_each_channel(&g_accel_mlx5, accel_mlx5_dump_channel_stat, stat_ctx,
3269 			      accel_mlx5_dump_channel_stat_done);
3270 
3271 	return 0;
3272 }
3273 
3274 static bool
3275 accel_mlx5_crypto_supports_cipher(enum spdk_accel_cipher cipher, size_t key_size)
3276 {
3277 	switch (cipher) {
3278 	case SPDK_ACCEL_CIPHER_AES_XTS:
3279 		return key_size == SPDK_ACCEL_AES_XTS_128_KEY_SIZE || key_size == SPDK_ACCEL_AES_XTS_256_KEY_SIZE;
3280 	default:
3281 		return false;
3282 	}
3283 }
3284 
3285 static int
3286 accel_mlx5_get_memory_domains(struct spdk_memory_domain **domains, int array_size)
3287 {
3288 	int i, size;
3289 
3290 	if (!domains || !array_size) {
3291 		return (int)g_accel_mlx5.num_ctxs;
3292 	}
3293 
3294 	size = spdk_min(array_size, (int)g_accel_mlx5.num_ctxs);
3295 
3296 	for (i = 0; i < size; i++) {
3297 		domains[i] = g_accel_mlx5.dev_ctxs[i].domain;
3298 	}
3299 
3300 	return (int)g_accel_mlx5.num_ctxs;
3301 }
3302 
3303 static inline struct accel_mlx5_dev *
3304 accel_mlx5_ch_get_dev_by_pd(struct accel_mlx5_io_channel *accel_ch, struct ibv_pd *pd)
3305 {
3306 	uint32_t i;
3307 
3308 	for (i = 0; i < accel_ch->num_devs; i++) {
3309 		if (accel_ch->devs[i].dev_ctx->pd == pd) {
3310 			return &accel_ch->devs[i];
3311 		}
3312 	}
3313 
3314 	return NULL;
3315 }
3316 
3317 static inline int
3318 accel_mlx5_task_assign_qp_by_domain_pd(struct accel_mlx5_task *task,
3319 				       struct accel_mlx5_io_channel *acce_ch, struct spdk_memory_domain *domain)
3320 {
3321 	struct spdk_memory_domain_rdma_ctx *domain_ctx;
3322 	struct accel_mlx5_dev *dev;
3323 	struct ibv_pd *domain_pd;
3324 	size_t ctx_size;
3325 
3326 	domain_ctx = spdk_memory_domain_get_user_context(domain, &ctx_size);
3327 	if (spdk_unlikely(!domain_ctx || domain_ctx->size != ctx_size)) {
3328 		SPDK_ERRLOG("no domain context or wrong size, ctx ptr %p, size %zu\n", domain_ctx, ctx_size);
3329 		return -ENOTSUP;
3330 	}
3331 	domain_pd = domain_ctx->ibv_pd;
3332 	if (spdk_unlikely(!domain_pd)) {
3333 		SPDK_ERRLOG("no destination domain PD, task %p", task);
3334 		return -ENOTSUP;
3335 	}
3336 	dev = accel_mlx5_ch_get_dev_by_pd(acce_ch, domain_pd);
3337 	if (spdk_unlikely(!dev)) {
3338 		SPDK_ERRLOG("No dev for PD %p dev %s\n", domain_pd, domain_pd->context->device->name);
3339 		return -ENODEV;
3340 	}
3341 
3342 	if (spdk_unlikely(!dev)) {
3343 		return -ENODEV;
3344 	}
3345 	task->qp = &dev->qp;
3346 
3347 	return 0;
3348 }
3349 
3350 static inline int
3351 accel_mlx5_driver_examine_sequence(struct spdk_accel_sequence *seq,
3352 				   struct accel_mlx5_io_channel *accel_ch)
3353 {
3354 	struct spdk_accel_task *first_base = spdk_accel_sequence_first_task(seq);
3355 	struct accel_mlx5_task *first = SPDK_CONTAINEROF(first_base, struct accel_mlx5_task, base);
3356 	struct spdk_accel_task *next_base = TAILQ_NEXT(first_base, seq_link);
3357 	struct accel_mlx5_task *next;
3358 	int rc;
3359 
3360 	accel_mlx5_task_reset(first);
3361 	SPDK_DEBUGLOG(accel_mlx5, "first %p, opc %d; next %p, opc %d\n", first_base, first_base->op_code,
3362 		      next_base,  next_base ? next_base->op_code : -1);
3363 	if (next_base) {
3364 		switch (first_base->op_code) {
3365 		case SPDK_ACCEL_OPC_COPY:
3366 			if (next_base->op_code == SPDK_ACCEL_OPC_DECRYPT &&
3367 			    first_base->dst_domain &&  spdk_memory_domain_get_dma_device_type(first_base->dst_domain) ==
3368 			    SPDK_DMA_DEVICE_TYPE_RDMA && TAILQ_NEXT(next_base, seq_link) == NULL) {
3369 				next = SPDK_CONTAINEROF(next_base, struct accel_mlx5_task, base);
3370 				rc = accel_mlx5_task_assign_qp_by_domain_pd(next, accel_ch, first_base->dst_domain);
3371 				if (spdk_unlikely(rc)) {
3372 					return rc;
3373 				}
3374 				/* Update decrypt task memory domain, complete copy task */
3375 				SPDK_DEBUGLOG(accel_mlx5, "Merge copy task (%p) and decrypt (%p)\n", first, next);
3376 				next_base->dst_domain = first_base->dst_domain;
3377 				next_base->dst_domain_ctx = first_base->dst_domain_ctx;
3378 				accel_mlx5_task_reset(next);
3379 				next->mlx5_opcode = ACCEL_MLX5_OPC_CRYPTO_MKEY;
3380 				next->enc_order = SPDK_MLX5_ENCRYPTION_ORDER_ENCRYPTED_RAW_WIRE;
3381 				next->needs_data_transfer = 1;
3382 				next->inplace = 1;
3383 				spdk_accel_task_complete(first_base, 0);
3384 				return 0;
3385 			}
3386 			break;
3387 		case SPDK_ACCEL_OPC_ENCRYPT:
3388 			if (next_base->op_code == SPDK_ACCEL_OPC_COPY &&
3389 			    next_base->dst_domain && spdk_memory_domain_get_dma_device_type(next_base->dst_domain) ==
3390 			    SPDK_DMA_DEVICE_TYPE_RDMA && TAILQ_NEXT(next_base, seq_link) == NULL) {
3391 				rc = accel_mlx5_task_assign_qp_by_domain_pd(first, accel_ch, next_base->dst_domain);
3392 				if (spdk_unlikely(rc)) {
3393 					return rc;
3394 				}
3395 
3396 				/* Update encrypt task memory domain, complete copy task */
3397 				SPDK_DEBUGLOG(accel_mlx5, "Merge copy task (%p) and decrypt (%p)\n",
3398 					      SPDK_CONTAINEROF(next_base,
3399 							       struct accel_mlx5_task, base), first);
3400 				first_base->dst_domain = next_base->dst_domain;
3401 				first_base->dst_domain_ctx = next_base->dst_domain_ctx;
3402 				first->mlx5_opcode = ACCEL_MLX5_OPC_CRYPTO_MKEY;
3403 				first->enc_order = SPDK_MLX5_ENCRYPTION_ORDER_ENCRYPTED_RAW_WIRE;
3404 				first->needs_data_transfer = 1;
3405 				first->inplace = 1;
3406 				spdk_accel_task_complete(next_base, 0);
3407 				return 0;
3408 			}
3409 			break;
3410 
3411 		default:
3412 			break;
3413 		}
3414 	}
3415 
3416 	SPDK_DEBUGLOG(accel_mlx5, "seq %p, task %p nothing to merge\n", seq, first_base);
3417 	/* Nothing to merge, execute tasks one by one */
3418 	accel_mlx5_task_assign_qp(first, accel_ch);
3419 	accel_mlx5_task_init_opcode(first);
3420 
3421 	return 0;
3422 }
3423 
3424 static inline int
3425 accel_mlx5_execute_sequence(struct spdk_io_channel *ch, struct spdk_accel_sequence *seq)
3426 {
3427 	struct accel_mlx5_io_channel *accel_ch = spdk_io_channel_get_ctx(ch);
3428 	struct spdk_accel_task *task;
3429 	struct accel_mlx5_task *mlx5_task;
3430 	int rc;
3431 
3432 	rc = accel_mlx5_driver_examine_sequence(seq, accel_ch);
3433 	if (spdk_unlikely(rc)) {
3434 		return rc;
3435 	}
3436 	task = spdk_accel_sequence_first_task(seq);
3437 	assert(task);
3438 	mlx5_task = SPDK_CONTAINEROF(task, struct accel_mlx5_task, base);
3439 	mlx5_task->driver_seq = 1;
3440 
3441 	SPDK_DEBUGLOG(accel_mlx5, "driver starts seq %p, ch %p, task %p\n", seq, accel_ch, task);
3442 
3443 	return _accel_mlx5_submit_tasks(accel_ch, task);
3444 }
3445 
3446 static struct accel_mlx5_module g_accel_mlx5 = {
3447 	.module = {
3448 		.module_init		= accel_mlx5_init,
3449 		.module_fini		= accel_mlx5_deinit,
3450 		.write_config_json	= accel_mlx5_write_config_json,
3451 		.get_ctx_size		= accel_mlx5_get_ctx_size,
3452 		.name			= "mlx5",
3453 		.supports_opcode	= accel_mlx5_supports_opcode,
3454 		.get_io_channel		= accel_mlx5_get_io_channel,
3455 		.submit_tasks		= accel_mlx5_submit_tasks,
3456 		.crypto_key_init	= accel_mlx5_crypto_key_init,
3457 		.crypto_key_deinit	= accel_mlx5_crypto_key_deinit,
3458 		.crypto_supports_cipher	= accel_mlx5_crypto_supports_cipher,
3459 		.get_memory_domains	= accel_mlx5_get_memory_domains,
3460 	}
3461 };
3462 
3463 static struct spdk_accel_driver g_accel_mlx5_driver = {
3464 	.name			= "mlx5",
3465 	.execute_sequence	= accel_mlx5_execute_sequence,
3466 	.get_io_channel		= accel_mlx5_get_io_channel
3467 };
3468 
3469 SPDK_LOG_REGISTER_COMPONENT(accel_mlx5)
3470