xref: /dpdk/drivers/crypto/mlx5/mlx5_crypto_xts.c (revision 3cddeba0ca38b00c7dc646277484d08a4cb2d862)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright (c) 2023 NVIDIA Corporation & Affiliates
3  */
4 
5 #include <rte_malloc.h>
6 #include <rte_mempool.h>
7 #include <rte_eal_paging.h>
8 #include <rte_errno.h>
9 #include <rte_log.h>
10 #include <bus_pci_driver.h>
11 #include <rte_memory.h>
12 
13 #include <mlx5_glue.h>
14 #include <mlx5_common.h>
15 #include <mlx5_devx_cmds.h>
16 #include <mlx5_common_os.h>
17 
18 #include "mlx5_crypto_utils.h"
19 #include "mlx5_crypto.h"
20 
21 const struct rte_cryptodev_capabilities mlx5_crypto_caps[] = {
22 	{		/* AES XTS */
23 		.op = RTE_CRYPTO_OP_TYPE_SYMMETRIC,
24 		{.sym = {
25 			.xform_type = RTE_CRYPTO_SYM_XFORM_CIPHER,
26 			{.cipher = {
27 				.algo = RTE_CRYPTO_CIPHER_AES_XTS,
28 				.block_size = 16,
29 				.key_size = {
30 					.min = 32,
31 					.max = 64,
32 					.increment = 32
33 				},
34 				.iv_size = {
35 					.min = 16,
36 					.max = 16,
37 					.increment = 0
38 				},
39 				.dataunit_set =
40 				RTE_CRYPTO_CIPHER_DATA_UNIT_LEN_512_BYTES |
41 				RTE_CRYPTO_CIPHER_DATA_UNIT_LEN_4096_BYTES |
42 				RTE_CRYPTO_CIPHER_DATA_UNIT_LEN_1_MEGABYTES,
43 			}, }
44 		}, }
45 	},
46 };
47 
48 int
49 mlx5_crypto_dek_fill_xts_attr(struct mlx5_crypto_dek *dek,
50 			      struct mlx5_devx_dek_attr *dek_attr,
51 			      void *cb_ctx)
52 {
53 	struct mlx5_crypto_dek_ctx *ctx = cb_ctx;
54 	struct rte_crypto_cipher_xform *cipher_ctx = &ctx->xform->cipher;
55 	bool is_wrapped = ctx->priv->is_wrapped_mode;
56 
57 	if (cipher_ctx->algo != RTE_CRYPTO_CIPHER_AES_XTS) {
58 		DRV_LOG(ERR, "Only AES-XTS algo supported.");
59 		return -EINVAL;
60 	}
61 	dek_attr->key_purpose = MLX5_CRYPTO_KEY_PURPOSE_AES_XTS;
62 	dek_attr->has_keytag = 1;
63 	if (is_wrapped) {
64 		switch (cipher_ctx->key.length) {
65 		case 48:
66 			dek->size = 48;
67 			dek_attr->key_size = MLX5_CRYPTO_KEY_SIZE_128b;
68 			break;
69 		case 80:
70 			dek->size = 80;
71 			dek_attr->key_size = MLX5_CRYPTO_KEY_SIZE_256b;
72 			break;
73 		default:
74 			DRV_LOG(ERR, "Wrapped key size not supported.");
75 			return -EINVAL;
76 		}
77 	} else {
78 		switch (cipher_ctx->key.length) {
79 		case 32:
80 			dek->size = 40;
81 			dek_attr->key_size = MLX5_CRYPTO_KEY_SIZE_128b;
82 			break;
83 		case 64:
84 			dek->size = 72;
85 			dek_attr->key_size = MLX5_CRYPTO_KEY_SIZE_256b;
86 			break;
87 		default:
88 			DRV_LOG(ERR, "Key size not supported.");
89 			return -EINVAL;
90 		}
91 		memcpy(&dek_attr->key[cipher_ctx->key.length],
92 						&ctx->priv->keytag, 8);
93 	}
94 	memcpy(&dek_attr->key, cipher_ctx->key.data, cipher_ctx->key.length);
95 	memcpy(&dek->data, cipher_ctx->key.data, cipher_ctx->key.length);
96 	return 0;
97 }
98 
99 static int
100 mlx5_crypto_xts_sym_session_configure(struct rte_cryptodev *dev,
101 				      struct rte_crypto_sym_xform *xform,
102 				      struct rte_cryptodev_sym_session *session)
103 {
104 	struct mlx5_crypto_priv *priv = dev->data->dev_private;
105 	struct mlx5_crypto_session *sess_private_data =
106 		CRYPTODEV_GET_SYM_SESS_PRIV(session);
107 	struct rte_crypto_cipher_xform *cipher;
108 	uint8_t encryption_order;
109 
110 	if (unlikely(xform->next != NULL)) {
111 		DRV_LOG(ERR, "Xform next is not supported.");
112 		return -ENOTSUP;
113 	}
114 	if (unlikely((xform->type != RTE_CRYPTO_SYM_XFORM_CIPHER) ||
115 		     (xform->cipher.algo != RTE_CRYPTO_CIPHER_AES_XTS))) {
116 		DRV_LOG(ERR, "Only AES-XTS algorithm is supported.");
117 		return -ENOTSUP;
118 	}
119 	cipher = &xform->cipher;
120 	sess_private_data->dek = mlx5_crypto_dek_prepare(priv, xform);
121 	if (sess_private_data->dek == NULL) {
122 		DRV_LOG(ERR, "Failed to prepare dek.");
123 		return -ENOMEM;
124 	}
125 	if (cipher->op == RTE_CRYPTO_CIPHER_OP_ENCRYPT)
126 		encryption_order = MLX5_ENCRYPTION_ORDER_ENCRYPTED_RAW_MEMORY;
127 	else
128 		encryption_order = MLX5_ENCRYPTION_ORDER_ENCRYPTED_RAW_WIRE;
129 	sess_private_data->bs_bpt_eo_es = rte_cpu_to_be_32
130 			(MLX5_BSF_SIZE_64B << MLX5_BSF_SIZE_OFFSET |
131 			 MLX5_BSF_P_TYPE_CRYPTO << MLX5_BSF_P_TYPE_OFFSET |
132 			 encryption_order << MLX5_ENCRYPTION_ORDER_OFFSET |
133 			 MLX5_ENCRYPTION_STANDARD_AES_XTS);
134 	switch (xform->cipher.dataunit_len) {
135 	case 0:
136 		sess_private_data->bsp_res = 0;
137 		break;
138 	case 512:
139 		sess_private_data->bsp_res = rte_cpu_to_be_32
140 					     ((uint32_t)MLX5_BLOCK_SIZE_512B <<
141 					     MLX5_BLOCK_SIZE_OFFSET);
142 		break;
143 	case 4096:
144 		sess_private_data->bsp_res = rte_cpu_to_be_32
145 					     ((uint32_t)MLX5_BLOCK_SIZE_4096B <<
146 					     MLX5_BLOCK_SIZE_OFFSET);
147 		break;
148 	case 1048576:
149 		sess_private_data->bsp_res = rte_cpu_to_be_32
150 					     ((uint32_t)MLX5_BLOCK_SIZE_1MB <<
151 					     MLX5_BLOCK_SIZE_OFFSET);
152 		break;
153 	default:
154 		DRV_LOG(ERR, "Cipher data unit length is not supported.");
155 		return -ENOTSUP;
156 	}
157 	sess_private_data->iv_offset = cipher->iv.offset;
158 	sess_private_data->dek_id =
159 			rte_cpu_to_be_32(sess_private_data->dek->obj->id &
160 					 0xffffff);
161 	DRV_LOG(DEBUG, "Session %p was configured.", sess_private_data);
162 	return 0;
163 }
164 
165 static void
166 mlx5_crypto_xts_qp_release(struct mlx5_crypto_qp *qp)
167 {
168 	if (qp == NULL)
169 		return;
170 	mlx5_devx_qp_destroy(&qp->qp_obj);
171 	mlx5_mr_btree_free(&qp->mr_ctrl.cache_bh);
172 	mlx5_devx_cq_destroy(&qp->cq_obj);
173 	rte_free(qp);
174 }
175 
176 static int
177 mlx5_crypto_xts_queue_pair_release(struct rte_cryptodev *dev, uint16_t qp_id)
178 {
179 	struct mlx5_crypto_qp *qp = dev->data->queue_pairs[qp_id];
180 
181 	mlx5_crypto_indirect_mkeys_release(qp, qp->entries_n);
182 	mlx5_crypto_xts_qp_release(qp);
183 	dev->data->queue_pairs[qp_id] = NULL;
184 	return 0;
185 }
186 
187 static __rte_noinline uint32_t
188 mlx5_crypto_xts_get_block_size(struct rte_crypto_op *op)
189 {
190 	uint32_t bl = op->sym->cipher.data.length;
191 
192 	switch (bl) {
193 	case (1 << 20):
194 		return RTE_BE32(MLX5_BLOCK_SIZE_1MB << MLX5_BLOCK_SIZE_OFFSET);
195 	case (1 << 12):
196 		return RTE_BE32(MLX5_BLOCK_SIZE_4096B <<
197 				MLX5_BLOCK_SIZE_OFFSET);
198 	case (1 << 9):
199 		return RTE_BE32(MLX5_BLOCK_SIZE_512B << MLX5_BLOCK_SIZE_OFFSET);
200 	default:
201 		DRV_LOG(ERR, "Unknown block size: %u.", bl);
202 		return UINT32_MAX;
203 	}
204 }
205 
206 static __rte_always_inline uint32_t
207 mlx5_crypto_xts_klm_set(struct mlx5_crypto_qp *qp, struct rte_mbuf *mbuf,
208 			struct mlx5_wqe_dseg *klm, uint32_t offset,
209 			uint32_t *remain)
210 {
211 	uint32_t data_len = (rte_pktmbuf_data_len(mbuf) - offset);
212 	uintptr_t addr = rte_pktmbuf_mtod_offset(mbuf, uintptr_t, offset);
213 
214 	if (data_len > *remain)
215 		data_len = *remain;
216 	*remain -= data_len;
217 	klm->bcount = rte_cpu_to_be_32(data_len);
218 	klm->pbuf = rte_cpu_to_be_64(addr);
219 	klm->lkey = mlx5_mr_mb2mr(&qp->mr_ctrl, mbuf);
220 	return klm->lkey;
221 
222 }
223 
224 static __rte_always_inline uint32_t
225 mlx5_crypto_xts_klms_set(struct mlx5_crypto_qp *qp, struct rte_crypto_op *op,
226 			 struct rte_mbuf *mbuf, struct mlx5_wqe_dseg *klm)
227 {
228 	uint32_t remain_len = op->sym->cipher.data.length;
229 	uint32_t nb_segs = mbuf->nb_segs;
230 	uint32_t klm_n = 1u;
231 
232 	/* First mbuf needs to take the cipher offset. */
233 	if (unlikely(mlx5_crypto_xts_klm_set(qp, mbuf, klm,
234 		     op->sym->cipher.data.offset, &remain_len) == UINT32_MAX)) {
235 		op->status = RTE_CRYPTO_OP_STATUS_ERROR;
236 		return 0;
237 	}
238 	while (remain_len) {
239 		nb_segs--;
240 		mbuf = mbuf->next;
241 		if (unlikely(mbuf == NULL || nb_segs == 0)) {
242 			op->status = RTE_CRYPTO_OP_STATUS_INVALID_ARGS;
243 			return 0;
244 		}
245 		if (unlikely(mlx5_crypto_xts_klm_set(qp, mbuf, ++klm, 0,
246 						&remain_len) == UINT32_MAX)) {
247 			op->status = RTE_CRYPTO_OP_STATUS_ERROR;
248 			return 0;
249 		}
250 		klm_n++;
251 	}
252 	return klm_n;
253 }
254 
255 static __rte_always_inline int
256 mlx5_crypto_xts_wqe_set(struct mlx5_crypto_priv *priv,
257 			 struct mlx5_crypto_qp *qp,
258 			 struct rte_crypto_op *op,
259 			 struct mlx5_umr_wqe *umr)
260 {
261 	struct mlx5_crypto_session *sess = CRYPTODEV_GET_SYM_SESS_PRIV(op->sym->session);
262 	struct mlx5_wqe_cseg *cseg = &umr->ctr;
263 	struct mlx5_wqe_mkey_cseg *mkc = &umr->mkc;
264 	struct mlx5_wqe_dseg *klms = &umr->kseg[0];
265 	struct mlx5_wqe_umr_bsf_seg *bsf = ((struct mlx5_wqe_umr_bsf_seg *)
266 				      RTE_PTR_ADD(umr, priv->umr_wqe_size)) - 1;
267 	uint32_t ds;
268 	bool ipl = op->sym->m_dst == NULL || op->sym->m_dst == op->sym->m_src;
269 	/* Set UMR WQE. */
270 	uint32_t klm_n = mlx5_crypto_xts_klms_set(qp, op,
271 				   ipl ? op->sym->m_src : op->sym->m_dst, klms);
272 
273 	if (unlikely(klm_n == 0))
274 		return 0;
275 	bsf->bs_bpt_eo_es = sess->bs_bpt_eo_es;
276 	if (unlikely(!sess->bsp_res)) {
277 		bsf->bsp_res = mlx5_crypto_xts_get_block_size(op);
278 		if (unlikely(bsf->bsp_res == UINT32_MAX)) {
279 			op->status = RTE_CRYPTO_OP_STATUS_INVALID_ARGS;
280 			return 0;
281 		}
282 	} else {
283 		bsf->bsp_res = sess->bsp_res;
284 	}
285 	bsf->raw_data_size = rte_cpu_to_be_32(op->sym->cipher.data.length);
286 	memcpy(bsf->xts_initial_tweak,
287 	       rte_crypto_op_ctod_offset(op, uint8_t *, sess->iv_offset), 16);
288 	bsf->res_dp = sess->dek_id;
289 	mkc->len = rte_cpu_to_be_64(op->sym->cipher.data.length);
290 	cseg->opcode = rte_cpu_to_be_32((qp->db_pi << 8) | MLX5_OPCODE_UMR);
291 	qp->db_pi += priv->umr_wqe_stride;
292 	/* Set RDMA_WRITE WQE. */
293 	cseg = RTE_PTR_ADD(cseg, priv->umr_wqe_size);
294 	klms = RTE_PTR_ADD(cseg, sizeof(struct mlx5_rdma_write_wqe));
295 	if (!ipl) {
296 		klm_n = mlx5_crypto_xts_klms_set(qp, op, op->sym->m_src, klms);
297 		if (unlikely(klm_n == 0))
298 			return 0;
299 	} else {
300 		memcpy(klms, &umr->kseg[0], sizeof(*klms) * klm_n);
301 	}
302 	ds = 2 + klm_n;
303 	cseg->sq_ds = rte_cpu_to_be_32((qp->qp_obj.qp->id << 8) | ds);
304 	cseg->opcode = rte_cpu_to_be_32((qp->db_pi << 8) |
305 							MLX5_OPCODE_RDMA_WRITE);
306 	ds = RTE_ALIGN(ds, 4);
307 	qp->db_pi += ds >> 2;
308 	/* Set NOP WQE if needed. */
309 	if (priv->max_rdmar_ds > ds) {
310 		cseg += ds;
311 		ds = priv->max_rdmar_ds - ds;
312 		cseg->sq_ds = rte_cpu_to_be_32((qp->qp_obj.qp->id << 8) | ds);
313 		cseg->opcode = rte_cpu_to_be_32((qp->db_pi << 8) |
314 							       MLX5_OPCODE_NOP);
315 		qp->db_pi += ds >> 2; /* Here, DS is 4 aligned for sure. */
316 	}
317 	qp->wqe = (uint8_t *)cseg;
318 	return 1;
319 }
320 
321 static uint16_t
322 mlx5_crypto_xts_enqueue_burst(void *queue_pair, struct rte_crypto_op **ops,
323 			      uint16_t nb_ops)
324 {
325 	struct mlx5_crypto_qp *qp = queue_pair;
326 	struct mlx5_crypto_priv *priv = qp->priv;
327 	struct mlx5_umr_wqe *umr;
328 	struct rte_crypto_op *op;
329 	uint16_t mask = qp->entries_n - 1;
330 	uint16_t remain = qp->entries_n - (qp->pi - qp->ci);
331 	uint32_t idx;
332 
333 	if (remain < nb_ops)
334 		nb_ops = remain;
335 	else
336 		remain = nb_ops;
337 	if (unlikely(remain == 0))
338 		return 0;
339 	do {
340 		idx = qp->pi & mask;
341 		op = *ops++;
342 		umr = RTE_PTR_ADD(qp->qp_obj.umem_buf,
343 			priv->wqe_set_size * idx);
344 		if (unlikely(mlx5_crypto_xts_wqe_set(priv, qp, op, umr) == 0)) {
345 			qp->stats.enqueue_err_count++;
346 			if (remain != nb_ops) {
347 				qp->stats.enqueued_count -= remain;
348 				break;
349 			}
350 			return 0;
351 		}
352 		qp->ops[idx] = op;
353 		qp->pi++;
354 	} while (--remain);
355 	qp->stats.enqueued_count += nb_ops;
356 	mlx5_doorbell_ring(&priv->uar.bf_db, *(volatile uint64_t *)qp->wqe,
357 			   qp->db_pi, &qp->qp_obj.db_rec[MLX5_SND_DBR],
358 			   !priv->uar.dbnc);
359 	return nb_ops;
360 }
361 
362 static __rte_noinline void
363 mlx5_crypto_xts_cqe_err_handle(struct mlx5_crypto_qp *qp, struct rte_crypto_op *op)
364 {
365 	const uint32_t idx = qp->ci & (qp->entries_n - 1);
366 	volatile struct mlx5_error_cqe *cqe = (volatile struct mlx5_error_cqe *)
367 							&qp->cq_obj.cqes[idx];
368 
369 	op->status = RTE_CRYPTO_OP_STATUS_ERROR;
370 	qp->stats.dequeue_err_count++;
371 	DRV_LOG(ERR, "CQE ERR:%x.\n", rte_be_to_cpu_32(cqe->syndrome));
372 }
373 
374 static uint16_t
375 mlx5_crypto_xts_dequeue_burst(void *queue_pair, struct rte_crypto_op **ops,
376 			  uint16_t nb_ops)
377 {
378 	struct mlx5_crypto_qp *qp = queue_pair;
379 	volatile struct mlx5_cqe *restrict cqe;
380 	struct rte_crypto_op *restrict op;
381 	const unsigned int cq_size = qp->entries_n;
382 	const unsigned int mask = cq_size - 1;
383 	uint32_t idx;
384 	uint32_t next_idx = qp->ci & mask;
385 	const uint16_t max = RTE_MIN((uint16_t)(qp->pi - qp->ci), nb_ops);
386 	uint16_t i = 0;
387 	int ret;
388 
389 	if (unlikely(max == 0))
390 		return 0;
391 	do {
392 		idx = next_idx;
393 		next_idx = (qp->ci + 1) & mask;
394 		op = qp->ops[idx];
395 		cqe = &qp->cq_obj.cqes[idx];
396 		ret = check_cqe(cqe, cq_size, qp->ci);
397 		rte_io_rmb();
398 		if (unlikely(ret != MLX5_CQE_STATUS_SW_OWN)) {
399 			if (unlikely(ret != MLX5_CQE_STATUS_HW_OWN))
400 				mlx5_crypto_xts_cqe_err_handle(qp, op);
401 			break;
402 		}
403 		op->status = RTE_CRYPTO_OP_STATUS_SUCCESS;
404 		ops[i++] = op;
405 		qp->ci++;
406 	} while (i < max);
407 	if (likely(i != 0)) {
408 		rte_io_wmb();
409 		qp->cq_obj.db_rec[0] = rte_cpu_to_be_32(qp->ci);
410 		qp->stats.dequeued_count += i;
411 	}
412 	return i;
413 }
414 
415 static void
416 mlx5_crypto_xts_qp_init(struct mlx5_crypto_priv *priv, struct mlx5_crypto_qp *qp)
417 {
418 	uint32_t i;
419 
420 	for (i = 0 ; i < qp->entries_n; i++) {
421 		struct mlx5_wqe_cseg *cseg = RTE_PTR_ADD(qp->qp_obj.umem_buf,
422 			i * priv->wqe_set_size);
423 		struct mlx5_wqe_umr_cseg *ucseg = (struct mlx5_wqe_umr_cseg *)
424 								     (cseg + 1);
425 		struct mlx5_wqe_umr_bsf_seg *bsf =
426 			(struct mlx5_wqe_umr_bsf_seg *)(RTE_PTR_ADD(cseg,
427 						       priv->umr_wqe_size)) - 1;
428 		struct mlx5_wqe_rseg *rseg;
429 
430 		/* Init UMR WQE. */
431 		cseg->sq_ds = rte_cpu_to_be_32((qp->qp_obj.qp->id << 8) |
432 					 (priv->umr_wqe_size / MLX5_WSEG_SIZE));
433 		cseg->flags = RTE_BE32(MLX5_COMP_ONLY_FIRST_ERR <<
434 				       MLX5_COMP_MODE_OFFSET);
435 		cseg->misc = rte_cpu_to_be_32(qp->mkey[i]->id);
436 		ucseg->if_cf_toe_cq_res = RTE_BE32(1u << MLX5_UMRC_IF_OFFSET);
437 		ucseg->mkey_mask = RTE_BE64(1u << 0); /* Mkey length bit. */
438 		ucseg->ko_to_bs = rte_cpu_to_be_32
439 			((MLX5_CRYPTO_KLM_SEGS_NUM(priv->umr_wqe_size) <<
440 			 MLX5_UMRC_KO_OFFSET) | (4 << MLX5_UMRC_TO_BS_OFFSET));
441 		bsf->keytag = priv->keytag;
442 		/* Init RDMA WRITE WQE. */
443 		cseg = RTE_PTR_ADD(cseg, priv->umr_wqe_size);
444 		cseg->flags = RTE_BE32((MLX5_COMP_ALWAYS <<
445 				      MLX5_COMP_MODE_OFFSET) |
446 				      MLX5_WQE_CTRL_INITIATOR_SMALL_FENCE);
447 		rseg = (struct mlx5_wqe_rseg *)(cseg + 1);
448 		rseg->rkey = rte_cpu_to_be_32(qp->mkey[i]->id);
449 	}
450 }
451 
452 static void *
453 mlx5_crypto_gcm_mkey_klm_update(struct mlx5_crypto_priv *priv,
454 				struct mlx5_crypto_qp *qp,
455 				uint32_t idx)
456 {
457 	return RTE_PTR_ADD(qp->qp_obj.umem_buf, priv->wqe_set_size * idx);
458 }
459 
460 static int
461 mlx5_crypto_xts_queue_pair_setup(struct rte_cryptodev *dev, uint16_t qp_id,
462 				 const struct rte_cryptodev_qp_conf *qp_conf,
463 				 int socket_id)
464 {
465 	struct mlx5_crypto_priv *priv = dev->data->dev_private;
466 	struct mlx5_devx_qp_attr attr = {0};
467 	struct mlx5_crypto_qp *qp;
468 	uint16_t log_nb_desc = rte_log2_u32(qp_conf->nb_descriptors);
469 	uint32_t ret;
470 	uint32_t alloc_size = sizeof(*qp);
471 	uint32_t log_wqbb_n;
472 	struct mlx5_devx_cq_attr cq_attr = {
473 		.uar_page_id = mlx5_os_get_devx_uar_page_id(priv->uar.obj),
474 	};
475 	struct mlx5_devx_mkey_attr mkey_attr = {
476 		.pd = priv->cdev->pdn,
477 		.umr_en = 1,
478 		.crypto_en = 1,
479 		.set_remote_rw = 1,
480 		.klm_num = MLX5_CRYPTO_KLM_SEGS_NUM(priv->umr_wqe_size),
481 	};
482 
483 	if (dev->data->queue_pairs[qp_id] != NULL)
484 		mlx5_crypto_xts_queue_pair_release(dev, qp_id);
485 	alloc_size = RTE_ALIGN(alloc_size, RTE_CACHE_LINE_SIZE);
486 	alloc_size += (sizeof(struct rte_crypto_op *) +
487 		       sizeof(struct mlx5_devx_obj *)) *
488 		       RTE_BIT32(log_nb_desc);
489 	qp = rte_zmalloc_socket(__func__, alloc_size, RTE_CACHE_LINE_SIZE,
490 				socket_id);
491 	if (qp == NULL) {
492 		DRV_LOG(ERR, "Failed to allocate QP memory.");
493 		rte_errno = ENOMEM;
494 		return -rte_errno;
495 	}
496 	if (mlx5_devx_cq_create(priv->cdev->ctx, &qp->cq_obj, log_nb_desc,
497 				&cq_attr, socket_id) != 0) {
498 		DRV_LOG(ERR, "Failed to create CQ.");
499 		goto error;
500 	}
501 	log_wqbb_n = rte_log2_u32(RTE_BIT32(log_nb_desc) *
502 				(priv->wqe_set_size / MLX5_SEND_WQE_BB));
503 	attr.pd = priv->cdev->pdn;
504 	attr.uar_index = mlx5_os_get_devx_uar_page_id(priv->uar.obj);
505 	attr.cqn = qp->cq_obj.cq->id;
506 	attr.num_of_receive_wqes = 0;
507 	attr.num_of_send_wqbbs = RTE_BIT32(log_wqbb_n);
508 	attr.ts_format =
509 		mlx5_ts_format_conv(priv->cdev->config.hca_attr.qp_ts_format);
510 	ret = mlx5_devx_qp_create(priv->cdev->ctx, &qp->qp_obj,
511 					attr.num_of_send_wqbbs * MLX5_WQE_SIZE,
512 					&attr, socket_id);
513 	if (ret) {
514 		DRV_LOG(ERR, "Failed to create QP.");
515 		goto error;
516 	}
517 	if (mlx5_mr_ctrl_init(&qp->mr_ctrl, &priv->cdev->mr_scache.dev_gen,
518 			      priv->dev_config.socket_id) != 0) {
519 		DRV_LOG(ERR, "Cannot allocate MR Btree for qp %u.",
520 			(uint32_t)qp_id);
521 		rte_errno = ENOMEM;
522 		goto error;
523 	}
524 	/*
525 	 * In Order to configure self loopback, when calling devx qp2rts the
526 	 * remote QP id that is used is the id of the same QP.
527 	 */
528 	if (mlx5_devx_qp2rts(&qp->qp_obj, qp->qp_obj.qp->id))
529 		goto error;
530 	qp->mkey = (struct mlx5_devx_obj **)RTE_ALIGN((uintptr_t)(qp + 1),
531 							   RTE_CACHE_LINE_SIZE);
532 	qp->ops = (struct rte_crypto_op **)(qp->mkey + RTE_BIT32(log_nb_desc));
533 	qp->entries_n = 1 << log_nb_desc;
534 	if (mlx5_crypto_indirect_mkeys_prepare(priv, qp, &mkey_attr,
535 					       mlx5_crypto_gcm_mkey_klm_update)) {
536 		DRV_LOG(ERR, "Cannot allocate indirect memory regions.");
537 		rte_errno = ENOMEM;
538 		goto error;
539 	}
540 	mlx5_crypto_xts_qp_init(priv, qp);
541 	qp->priv = priv;
542 	dev->data->queue_pairs[qp_id] = qp;
543 	return 0;
544 error:
545 	mlx5_crypto_xts_qp_release(qp);
546 	return -1;
547 }
548 
549 /*
550  * Calculate UMR WQE size and RDMA Write WQE size with the
551  * following limitations:
552  *	- Each WQE size is multiple of 64.
553  *	- The summarize of both UMR WQE and RDMA_W WQE is a power of 2.
554  *	- The number of entries in the UMR WQE's KLM list is multiple of 4.
555  */
556 static void
557 mlx5_crypto_xts_get_wqe_sizes(uint32_t segs_num, uint32_t *umr_size,
558 			      uint32_t *rdmaw_size)
559 {
560 	uint32_t diff, wqe_set_size;
561 
562 	*umr_size = MLX5_CRYPTO_UMR_WQE_STATIC_SIZE +
563 			RTE_ALIGN(segs_num, 4) *
564 			sizeof(struct mlx5_wqe_dseg);
565 	/* Make sure UMR WQE size is multiple of WQBB. */
566 	*umr_size = RTE_ALIGN(*umr_size, MLX5_SEND_WQE_BB);
567 	*rdmaw_size = sizeof(struct mlx5_rdma_write_wqe) +
568 			sizeof(struct mlx5_wqe_dseg) *
569 			(segs_num <= 2 ? 2 : 2 +
570 			RTE_ALIGN(segs_num - 2, 4));
571 	/* Make sure RDMA_WRITE WQE size is multiple of WQBB. */
572 	*rdmaw_size = RTE_ALIGN(*rdmaw_size, MLX5_SEND_WQE_BB);
573 	wqe_set_size = *rdmaw_size + *umr_size;
574 	diff = rte_align32pow2(wqe_set_size) - wqe_set_size;
575 	/* Make sure wqe_set size is power of 2. */
576 	if (diff)
577 		*umr_size += diff;
578 }
579 
580 static uint8_t
581 mlx5_crypto_xts_max_segs_num(uint16_t max_wqe_size)
582 {
583 	int klms_sizes = max_wqe_size - MLX5_CRYPTO_UMR_WQE_STATIC_SIZE;
584 	uint32_t max_segs_cap = RTE_ALIGN_FLOOR(klms_sizes, MLX5_SEND_WQE_BB) /
585 			sizeof(struct mlx5_wqe_dseg);
586 
587 	MLX5_ASSERT(klms_sizes >= MLX5_SEND_WQE_BB);
588 	while (max_segs_cap) {
589 		uint32_t umr_wqe_size, rdmw_wqe_size;
590 
591 		mlx5_crypto_xts_get_wqe_sizes(max_segs_cap, &umr_wqe_size,
592 						&rdmw_wqe_size);
593 		if (umr_wqe_size <= max_wqe_size &&
594 				rdmw_wqe_size <= max_wqe_size)
595 			break;
596 		max_segs_cap -= 4;
597 	}
598 	return max_segs_cap;
599 }
600 
601 static int
602 mlx5_crypto_xts_configure_wqe_size(struct mlx5_crypto_priv *priv,
603 				   uint16_t max_wqe_size, uint32_t max_segs_num)
604 {
605 	uint32_t rdmw_wqe_size, umr_wqe_size;
606 
607 	mlx5_crypto_xts_get_wqe_sizes(max_segs_num, &umr_wqe_size,
608 			&rdmw_wqe_size);
609 	priv->wqe_set_size = rdmw_wqe_size + umr_wqe_size;
610 	if (umr_wqe_size > max_wqe_size ||
611 				rdmw_wqe_size > max_wqe_size) {
612 		DRV_LOG(ERR, "Invalid max_segs_num: %u. should be %u or lower.",
613 			max_segs_num,
614 			mlx5_crypto_xts_max_segs_num(max_wqe_size));
615 		rte_errno = EINVAL;
616 		return -EINVAL;
617 	}
618 	priv->umr_wqe_size = (uint16_t)umr_wqe_size;
619 	priv->umr_wqe_stride = priv->umr_wqe_size / MLX5_SEND_WQE_BB;
620 	priv->max_rdmar_ds = rdmw_wqe_size / sizeof(struct mlx5_wqe_dseg);
621 	return 0;
622 }
623 
624 int
625 mlx5_crypto_xts_init(struct mlx5_crypto_priv *priv)
626 {
627 	struct mlx5_common_device *cdev = priv->cdev;
628 	struct rte_cryptodev *crypto_dev = priv->crypto_dev;
629 	struct rte_cryptodev_ops *dev_ops = crypto_dev->dev_ops;
630 	int ret;
631 
632 	ret = mlx5_crypto_xts_configure_wqe_size(priv,
633 		cdev->config.hca_attr.max_wqe_sz_sq, priv->max_segs_num);
634 	if (ret)
635 		return -EINVAL;
636 	/* Override AES-XST specified ops. */
637 	dev_ops->sym_session_configure = mlx5_crypto_xts_sym_session_configure;
638 	dev_ops->queue_pair_setup = mlx5_crypto_xts_queue_pair_setup;
639 	dev_ops->queue_pair_release = mlx5_crypto_xts_queue_pair_release;
640 	crypto_dev->dequeue_burst = mlx5_crypto_xts_dequeue_burst;
641 	crypto_dev->enqueue_burst = mlx5_crypto_xts_enqueue_burst;
642 	priv->caps = mlx5_crypto_caps;
643 	return 0;
644 }
645