xref: /dpdk/drivers/crypto/mlx5/mlx5_crypto_gcm.c (revision e77506397fc8005c5129e22e9e2d15d5876790fd)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright (c) 2023 NVIDIA Corporation & Affiliates
3  */
4 
5 #include <rte_malloc.h>
6 #include <rte_mempool.h>
7 #include <rte_eal_paging.h>
8 #include <rte_errno.h>
9 #include <rte_log.h>
10 #include <bus_pci_driver.h>
11 #include <rte_memory.h>
12 #include <rte_io.h>
13 
14 #include <mlx5_glue.h>
15 #include <mlx5_common.h>
16 #include <mlx5_devx_cmds.h>
17 #include <mlx5_common_os.h>
18 
19 #include "mlx5_crypto_utils.h"
20 #include "mlx5_crypto.h"
21 
22 /*
23  * AES-GCM uses indirect KLM mode. The UMR WQE comprises of WQE control +
24  * UMR control + mkey context + indirect KLM. The WQE size is aligned to
25  * be 3 WQEBBS.
26  */
27 #define MLX5_UMR_GCM_WQE_SIZE \
28 	(RTE_ALIGN(sizeof(struct mlx5_umr_wqe) + sizeof(struct mlx5_wqe_dseg), \
29 			MLX5_SEND_WQE_BB))
30 
31 #define MLX5_UMR_GCM_WQE_SET_SIZE \
32 	(MLX5_UMR_GCM_WQE_SIZE + \
33 	 RTE_ALIGN(sizeof(struct mlx5_wqe_send_en_wqe), \
34 	 MLX5_SEND_WQE_BB))
35 
36 #define MLX5_UMR_GCM_WQE_STRIDE \
37 	(MLX5_UMR_GCM_WQE_SIZE / MLX5_SEND_WQE_BB)
38 
39 #define MLX5_MMO_CRYPTO_OPC (MLX5_OPCODE_MMO | \
40 	(MLX5_OPC_MOD_MMO_CRYPTO << WQE_CSEG_OPC_MOD_OFFSET))
41 
42 /*
43  * The status default value is RTE_CRYPTO_OP_STATUS_SUCCESS.
44  * Copy tag should fill different value to status.
45  */
46 #define MLX5_CRYPTO_OP_STATUS_GCM_TAG_COPY (RTE_CRYPTO_OP_STATUS_SUCCESS + 1)
47 
48 struct mlx5_crypto_gcm_op_info {
49 	bool need_umr;
50 	bool is_oop;
51 	bool is_enc;
52 	void *digest;
53 	void *src_addr;
54 };
55 
56 struct mlx5_crypto_gcm_data {
57 	void *src_addr;
58 	uint32_t src_bytes;
59 	void *dst_addr;
60 	uint32_t dst_bytes;
61 	uint32_t src_mkey;
62 	uint32_t dst_mkey;
63 };
64 
65 struct __rte_packed_begin mlx5_crypto_gcm_tag_cpy_info {
66 	void *digest;
67 	uint8_t tag_len;
68 } __rte_packed_end;
69 
70 static struct rte_cryptodev_capabilities mlx5_crypto_gcm_caps[] = {
71 	{
72 		.op = RTE_CRYPTO_OP_TYPE_UNDEFINED,
73 	},
74 	{
75 		.op = RTE_CRYPTO_OP_TYPE_UNDEFINED,
76 	}
77 };
78 
79 int
80 mlx5_crypto_dek_fill_gcm_attr(struct mlx5_crypto_dek *dek,
81 			      struct mlx5_devx_dek_attr *dek_attr,
82 			      void *cb_ctx)
83 {
84 	uint32_t offset = 0;
85 	struct mlx5_crypto_dek_ctx *ctx = cb_ctx;
86 	struct rte_crypto_aead_xform *aead_ctx = &ctx->xform->aead;
87 
88 	if (aead_ctx->algo != RTE_CRYPTO_AEAD_AES_GCM) {
89 		DRV_LOG(ERR, "Only AES-GCM algo supported.");
90 		return -EINVAL;
91 	}
92 	dek_attr->key_purpose = MLX5_CRYPTO_KEY_PURPOSE_GCM;
93 	switch (aead_ctx->key.length) {
94 	case 16:
95 		offset = 16;
96 		dek->size = 16;
97 		dek_attr->key_size = MLX5_CRYPTO_KEY_SIZE_128b;
98 		break;
99 	case 32:
100 		dek->size = 32;
101 		dek_attr->key_size = MLX5_CRYPTO_KEY_SIZE_256b;
102 		break;
103 	default:
104 		DRV_LOG(ERR, "Wrapped key size not supported.");
105 		return -EINVAL;
106 	}
107 	memcpy(&dek_attr->key[offset], aead_ctx->key.data, aead_ctx->key.length);
108 	memcpy(&dek->data, aead_ctx->key.data, aead_ctx->key.length);
109 	return 0;
110 }
111 
112 static int
113 mlx5_crypto_generate_gcm_cap(struct mlx5_hca_crypto_mmo_attr *mmo_attr,
114 			     struct rte_cryptodev_capabilities *cap)
115 {
116 	/* Init key size. */
117 	if (mmo_attr->gcm_128_encrypt && mmo_attr->gcm_128_decrypt &&
118 		mmo_attr->gcm_256_encrypt && mmo_attr->gcm_256_decrypt) {
119 		cap->sym.aead.key_size.min = 16;
120 		cap->sym.aead.key_size.max = 32;
121 		cap->sym.aead.key_size.increment = 16;
122 	} else if (mmo_attr->gcm_256_encrypt && mmo_attr->gcm_256_decrypt) {
123 		cap->sym.aead.key_size.min = 32;
124 		cap->sym.aead.key_size.max = 32;
125 		cap->sym.aead.key_size.increment = 0;
126 	} else if (mmo_attr->gcm_128_encrypt && mmo_attr->gcm_128_decrypt) {
127 		cap->sym.aead.key_size.min = 16;
128 		cap->sym.aead.key_size.max = 16;
129 		cap->sym.aead.key_size.increment = 0;
130 	} else {
131 		DRV_LOG(ERR, "No available AES-GCM encryption/decryption supported.");
132 		return -1;
133 	}
134 	/* Init tag size. */
135 	if (mmo_attr->gcm_auth_tag_128 && mmo_attr->gcm_auth_tag_96) {
136 		cap->sym.aead.digest_size.min = 12;
137 		cap->sym.aead.digest_size.max = 16;
138 		cap->sym.aead.digest_size.increment = 4;
139 	} else if (mmo_attr->gcm_auth_tag_96) {
140 		cap->sym.aead.digest_size.min = 12;
141 		cap->sym.aead.digest_size.max = 12;
142 		cap->sym.aead.digest_size.increment = 0;
143 	} else if (mmo_attr->gcm_auth_tag_128) {
144 		cap->sym.aead.digest_size.min = 16;
145 		cap->sym.aead.digest_size.max = 16;
146 		cap->sym.aead.digest_size.increment = 0;
147 	} else {
148 		DRV_LOG(ERR, "No available AES-GCM tag size supported.");
149 		return -1;
150 	}
151 	/* Init AAD size. */
152 	cap->sym.aead.aad_size.min = 0;
153 	cap->sym.aead.aad_size.max = UINT16_MAX;
154 	cap->sym.aead.aad_size.increment = 1;
155 	/* Init IV size. */
156 	cap->sym.aead.iv_size.min = 12;
157 	cap->sym.aead.iv_size.max = 12;
158 	cap->sym.aead.iv_size.increment = 0;
159 	/* Init left items. */
160 	cap->op = RTE_CRYPTO_OP_TYPE_SYMMETRIC;
161 	cap->sym.xform_type = RTE_CRYPTO_SYM_XFORM_AEAD;
162 	cap->sym.aead.algo = RTE_CRYPTO_AEAD_AES_GCM;
163 	return 0;
164 }
165 
166 static int
167 mlx5_crypto_sym_gcm_session_configure(struct rte_cryptodev *dev,
168 				  struct rte_crypto_sym_xform *xform,
169 				  struct rte_cryptodev_sym_session *session)
170 {
171 	struct mlx5_crypto_priv *priv = dev->data->dev_private;
172 	struct mlx5_crypto_session *sess_private_data = CRYPTODEV_GET_SYM_SESS_PRIV(session);
173 	struct rte_crypto_aead_xform *aead = &xform->aead;
174 	uint32_t op_type;
175 
176 	if (unlikely(xform->next != NULL)) {
177 		DRV_LOG(ERR, "Xform next is not supported.");
178 		return -ENOTSUP;
179 	}
180 	if (aead->algo != RTE_CRYPTO_AEAD_AES_GCM) {
181 		DRV_LOG(ERR, "Only AES-GCM algorithm is supported.");
182 		return -ENOTSUP;
183 	}
184 
185 	if (aead->op == RTE_CRYPTO_AEAD_OP_ENCRYPT)
186 		op_type = MLX5_CRYPTO_OP_TYPE_ENCRYPTION;
187 	else
188 		op_type = MLX5_CRYPTO_OP_TYPE_DECRYPTION;
189 	sess_private_data->op_type = op_type;
190 	sess_private_data->mmo_ctrl = rte_cpu_to_be_32
191 			(op_type << MLX5_CRYPTO_MMO_OP_OFFSET |
192 			 MLX5_ENCRYPTION_TYPE_AES_GCM << MLX5_CRYPTO_MMO_TYPE_OFFSET);
193 	sess_private_data->wqe_aad_len = rte_cpu_to_be_32((uint32_t)aead->aad_length);
194 	sess_private_data->wqe_tag_len = rte_cpu_to_be_32((uint32_t)aead->digest_length);
195 	sess_private_data->aad_len = aead->aad_length;
196 	sess_private_data->tag_len = aead->digest_length;
197 	sess_private_data->iv_offset = aead->iv.offset;
198 	sess_private_data->iv_len = aead->iv.length;
199 	sess_private_data->dek = mlx5_crypto_dek_prepare(priv, xform);
200 	if (sess_private_data->dek == NULL) {
201 		DRV_LOG(ERR, "Failed to prepare dek.");
202 		return -ENOMEM;
203 	}
204 	sess_private_data->dek_id =
205 			rte_cpu_to_be_32(sess_private_data->dek->obj->id &
206 					 0xffffff);
207 	DRV_LOG(DEBUG, "Session %p was configured.", sess_private_data);
208 	return 0;
209 }
210 
211 static void *
212 mlx5_crypto_gcm_mkey_klm_update(struct mlx5_crypto_priv *priv,
213 				struct mlx5_crypto_qp *qp __rte_unused,
214 				uint32_t idx)
215 {
216 	return &qp->klm_array[idx * priv->max_klm_num];
217 }
218 
219 static int
220 mlx5_crypto_gcm_qp_release(struct rte_cryptodev *dev, uint16_t qp_id)
221 {
222 	struct mlx5_crypto_priv *priv = dev->data->dev_private;
223 	struct mlx5_crypto_qp *qp = dev->data->queue_pairs[qp_id];
224 
225 	if (qp->umr_qp_obj.qp != NULL)
226 		mlx5_devx_qp_destroy(&qp->umr_qp_obj);
227 	if (qp->qp_obj.qp != NULL)
228 		mlx5_devx_qp_destroy(&qp->qp_obj);
229 	if (qp->cq_obj.cq != NULL)
230 		mlx5_devx_cq_destroy(&qp->cq_obj);
231 	if (qp->mr.obj != NULL) {
232 		void *opaq = qp->mr.addr;
233 
234 		priv->dereg_mr_cb(&qp->mr);
235 		rte_free(opaq);
236 	}
237 	mlx5_crypto_indirect_mkeys_release(qp, qp->entries_n);
238 	mlx5_mr_btree_free(&qp->mr_ctrl.cache_bh);
239 	rte_free(qp->ipsec_mem);
240 	rte_free(qp);
241 	dev->data->queue_pairs[qp_id] = NULL;
242 	return 0;
243 }
244 
245 static void
246 mlx5_crypto_gcm_init_qp(struct mlx5_crypto_qp *qp)
247 {
248 	volatile struct mlx5_gga_wqe *restrict wqe =
249 				    (volatile struct mlx5_gga_wqe *)qp->qp_obj.wqes;
250 	volatile union mlx5_gga_crypto_opaque *opaq = qp->opaque_addr;
251 	const uint32_t sq_ds = rte_cpu_to_be_32((qp->qp_obj.qp->id << 8) | 4u);
252 	const uint32_t flags = RTE_BE32(MLX5_COMP_ALWAYS <<
253 					MLX5_COMP_MODE_OFFSET);
254 	const uint32_t opaq_lkey = rte_cpu_to_be_32(qp->mr.lkey);
255 	int i;
256 
257 	/* All the next fields state should stay constant. */
258 	for (i = 0; i < qp->entries_n; ++i, ++wqe) {
259 		wqe->sq_ds = sq_ds;
260 		wqe->flags = flags;
261 		wqe->opaque_lkey = opaq_lkey;
262 		wqe->opaque_vaddr = rte_cpu_to_be_64((uint64_t)(uintptr_t)&opaq[i]);
263 	}
264 }
265 
266 static inline int
267 mlx5_crypto_gcm_umr_qp_setup(struct rte_cryptodev *dev, struct mlx5_crypto_qp *qp,
268 			     int socket_id)
269 {
270 	struct mlx5_crypto_priv *priv = dev->data->dev_private;
271 	struct mlx5_devx_qp_attr attr = {0};
272 	uint32_t ret;
273 	uint32_t log_wqbb_n;
274 
275 	/* Set UMR + SEND_EN WQE as maximum same with crypto. */
276 	log_wqbb_n = rte_log2_u32(qp->entries_n *
277 			(MLX5_UMR_GCM_WQE_SET_SIZE / MLX5_SEND_WQE_BB));
278 	attr.pd = priv->cdev->pdn;
279 	attr.uar_index = mlx5_os_get_devx_uar_page_id(priv->uar.obj);
280 	attr.cqn = qp->cq_obj.cq->id;
281 	attr.num_of_receive_wqes = 0;
282 	attr.num_of_send_wqbbs = RTE_BIT32(log_wqbb_n);
283 	attr.ts_format =
284 		mlx5_ts_format_conv(priv->cdev->config.hca_attr.qp_ts_format);
285 	attr.cd_master = 1;
286 	ret = mlx5_devx_qp_create(priv->cdev->ctx, &qp->umr_qp_obj,
287 				  attr.num_of_send_wqbbs * MLX5_SEND_WQE_BB,
288 				  &attr, socket_id);
289 	if (ret) {
290 		DRV_LOG(ERR, "Failed to create UMR QP.");
291 		return -1;
292 	}
293 	if (mlx5_devx_qp2rts(&qp->umr_qp_obj, qp->umr_qp_obj.qp->id)) {
294 		DRV_LOG(ERR, "Failed to change UMR QP state to RTS.");
295 		return -1;
296 	}
297 	/* Save the UMR WQEBBS for checking the WQE boundary. */
298 	qp->umr_wqbbs = attr.num_of_send_wqbbs;
299 	return 0;
300 }
301 
302 static int
303 mlx5_crypto_gcm_qp_setup(struct rte_cryptodev *dev, uint16_t qp_id,
304 			 const struct rte_cryptodev_qp_conf *qp_conf,
305 			 int socket_id)
306 {
307 	struct mlx5_crypto_priv *priv = dev->data->dev_private;
308 	struct mlx5_hca_attr *attr = &priv->cdev->config.hca_attr;
309 	struct mlx5_crypto_qp *qp;
310 	struct mlx5_devx_cq_attr cq_attr = {
311 		.uar_page_id = mlx5_os_get_devx_uar_page_id(priv->uar.obj),
312 	};
313 	struct mlx5_devx_qp_attr qp_attr = {
314 		.pd = priv->cdev->pdn,
315 		.uar_index = mlx5_os_get_devx_uar_page_id(priv->uar.obj),
316 		.user_index = qp_id,
317 	};
318 	struct mlx5_devx_mkey_attr mkey_attr = {
319 		.pd = priv->cdev->pdn,
320 		.umr_en = 1,
321 		.klm_num = priv->max_klm_num,
322 	};
323 	uint32_t log_ops_n = rte_log2_u32(qp_conf->nb_descriptors);
324 	uint32_t entries = RTE_BIT32(log_ops_n);
325 	uint32_t alloc_size = sizeof(*qp);
326 	uint32_t extra_obj_size = 0;
327 	size_t mr_size, opaq_size;
328 	void *mr_buf;
329 	int ret;
330 
331 	if (!mlx5_crypto_is_ipsec_opt(priv))
332 		extra_obj_size = sizeof(struct mlx5_devx_obj *);
333 	alloc_size = RTE_ALIGN(alloc_size, RTE_CACHE_LINE_SIZE);
334 	alloc_size += (sizeof(struct rte_crypto_op *) +
335 		       extra_obj_size) * entries;
336 	qp = rte_zmalloc_socket(__func__, alloc_size, RTE_CACHE_LINE_SIZE,
337 				socket_id);
338 	if (qp == NULL) {
339 		DRV_LOG(ERR, "Failed to allocate qp memory.");
340 		rte_errno = ENOMEM;
341 		return -rte_errno;
342 	}
343 	qp->priv = priv;
344 	qp->entries_n = entries;
345 	if (mlx5_mr_ctrl_init(&qp->mr_ctrl, &priv->cdev->mr_scache.dev_gen,
346 				  priv->dev_config.socket_id)) {
347 		DRV_LOG(ERR, "Cannot allocate MR Btree for qp %u.",
348 			(uint32_t)qp_id);
349 		rte_errno = ENOMEM;
350 		goto err;
351 	}
352 	/*
353 	 * The following KLM pointer must be aligned with
354 	 * MLX5_UMR_KLM_PTR_ALIGN. Aligned opaq_size here
355 	 * to make the KLM pointer with offset be aligned.
356 	 */
357 	opaq_size = RTE_ALIGN(sizeof(union mlx5_gga_crypto_opaque) * entries,
358 			      MLX5_UMR_KLM_PTR_ALIGN);
359 	mr_size = (priv->max_klm_num * sizeof(struct mlx5_klm) * entries) + opaq_size;
360 	mr_buf = rte_calloc(__func__, (size_t)1, mr_size, MLX5_UMR_KLM_PTR_ALIGN);
361 	if (mr_buf == NULL) {
362 		DRV_LOG(ERR, "Failed to allocate mr memory.");
363 		rte_errno = ENOMEM;
364 		goto err;
365 	}
366 	if (priv->reg_mr_cb(priv->cdev->pd, mr_buf, mr_size, &qp->mr) != 0) {
367 		rte_free(mr_buf);
368 		DRV_LOG(ERR, "Failed to register opaque MR.");
369 		rte_errno = ENOMEM;
370 		goto err;
371 	}
372 	qp->opaque_addr = qp->mr.addr;
373 	qp->klm_array = RTE_PTR_ADD(qp->opaque_addr, opaq_size);
374 	/*
375 	 * Triple the CQ size as UMR QP which contains UMR and SEND_EN WQE
376 	 * will share this CQ .
377 	 */
378 	qp->cq_entries_n = rte_align32pow2(entries * (mlx5_crypto_is_ipsec_opt(priv) ? 1 : 3));
379 	ret = mlx5_devx_cq_create(priv->cdev->ctx, &qp->cq_obj,
380 				  rte_log2_u32(qp->cq_entries_n),
381 				  &cq_attr, socket_id);
382 	if (ret != 0) {
383 		DRV_LOG(ERR, "Failed to create CQ.");
384 		goto err;
385 	}
386 	qp_attr.cqn = qp->cq_obj.cq->id;
387 	qp_attr.ts_format = mlx5_ts_format_conv(attr->qp_ts_format);
388 	qp_attr.num_of_receive_wqes = 0;
389 	qp_attr.num_of_send_wqbbs = entries;
390 	qp_attr.mmo = attr->crypto_mmo.crypto_mmo_qp;
391 	/* Set MMO QP as follower as the input data may depend on UMR. */
392 	qp_attr.cd_slave_send = !mlx5_crypto_is_ipsec_opt(priv);
393 	ret = mlx5_devx_qp_create(priv->cdev->ctx, &qp->qp_obj,
394 				  qp_attr.num_of_send_wqbbs * MLX5_WQE_SIZE,
395 				  &qp_attr, socket_id);
396 	if (ret != 0) {
397 		DRV_LOG(ERR, "Failed to create QP.");
398 		goto err;
399 	}
400 	mlx5_crypto_gcm_init_qp(qp);
401 	ret = mlx5_devx_qp2rts(&qp->qp_obj, 0);
402 	if (ret)
403 		goto err;
404 	qp->ops = (struct rte_crypto_op **)(qp + 1);
405 	if (!mlx5_crypto_is_ipsec_opt(priv)) {
406 		qp->mkey = (struct mlx5_devx_obj **)(qp->ops + entries);
407 		if (mlx5_crypto_gcm_umr_qp_setup(dev, qp, socket_id)) {
408 			DRV_LOG(ERR, "Failed to setup UMR QP.");
409 			goto err;
410 		}
411 		DRV_LOG(INFO, "QP %u: SQN=0x%X CQN=0x%X entries num = %u",
412 			(uint32_t)qp_id, qp->qp_obj.qp->id, qp->cq_obj.cq->id, entries);
413 		if (mlx5_crypto_indirect_mkeys_prepare(priv, qp, &mkey_attr,
414 						       mlx5_crypto_gcm_mkey_klm_update)) {
415 			DRV_LOG(ERR, "Cannot allocate indirect memory regions.");
416 			rte_errno = ENOMEM;
417 			goto err;
418 		}
419 	} else {
420 		extra_obj_size = sizeof(struct mlx5_crypto_ipsec_mem) * entries;
421 		qp->ipsec_mem = rte_calloc(__func__, (size_t)1, extra_obj_size,
422 					   RTE_CACHE_LINE_SIZE);
423 		if (!qp->ipsec_mem) {
424 			DRV_LOG(ERR, "Failed to allocate ipsec_mem.");
425 			goto err;
426 		}
427 	}
428 	dev->data->queue_pairs[qp_id] = qp;
429 	return 0;
430 err:
431 	mlx5_crypto_gcm_qp_release(dev, qp_id);
432 	return -1;
433 }
434 
435 static __rte_always_inline void
436 mlx5_crypto_gcm_get_op_info(struct mlx5_crypto_qp *qp,
437 			    struct rte_crypto_op *op,
438 			    struct mlx5_crypto_gcm_op_info *op_info)
439 {
440 	struct mlx5_crypto_session *sess = CRYPTODEV_GET_SYM_SESS_PRIV(op->sym->session);
441 	struct rte_mbuf *m_src = op->sym->m_src;
442 	void *aad_addr = op->sym->aead.aad.data;
443 	void *tag_addr = op->sym->aead.digest.data;
444 	void *src_addr = rte_pktmbuf_mtod_offset(m_src, void *, op->sym->aead.data.offset);
445 	struct rte_mbuf *m_dst = m_src;
446 	void *dst_addr = src_addr;
447 	void *expected_aad = NULL;
448 	void *expected_tag = NULL;
449 	bool is_enc = sess->op_type == MLX5_CRYPTO_OP_TYPE_ENCRYPTION;
450 	bool cp_aad = false;
451 	bool cp_tag = false;
452 
453 	op_info->is_oop = false;
454 	op_info->need_umr = false;
455 	op_info->is_enc = is_enc;
456 	op_info->digest = NULL;
457 	op_info->src_addr = aad_addr;
458 	if (op->sym->m_dst && op->sym->m_dst != m_src) {
459 		/* Add 2 for AAD and digest. */
460 		MLX5_ASSERT((uint32_t)(m_dst->nb_segs + m_src->nb_segs + 2) <
461 			    qp->priv->max_klm_num);
462 		op_info->is_oop = true;
463 		m_dst = op->sym->m_dst;
464 		dst_addr = rte_pktmbuf_mtod_offset(m_dst, void *, op->sym->aead.data.offset);
465 		if (m_dst->nb_segs > 1) {
466 			op_info->need_umr = true;
467 			return;
468 		}
469 		/*
470 		 * If the op's mbuf has extra data offset, don't copy AAD to
471 		 * this area.
472 		 */
473 		if (rte_pktmbuf_headroom(m_dst) < sess->aad_len ||
474 		    op->sym->aead.data.offset) {
475 			op_info->need_umr = true;
476 			return;
477 		}
478 	} else {
479 		/* Add 2 for AAD and digest. */
480 		MLX5_ASSERT((uint32_t)(m_src->nb_segs) + 2 < qp->priv->max_klm_num);
481 	}
482 	if (m_src->nb_segs > 1) {
483 		op_info->need_umr = true;
484 		return;
485 	}
486 	expected_aad = RTE_PTR_SUB(src_addr, sess->aad_len);
487 	if (expected_aad != aad_addr) {
488 		/*
489 		 * If the op's mbuf has extra data offset, don't copy AAD to
490 		 * this area.
491 		 */
492 		if (sess->aad_len > MLX5_CRYPTO_GCM_MAX_AAD ||
493 		    sess->aad_len > rte_pktmbuf_headroom(m_src) ||
494 		    op->sym->aead.data.offset) {
495 			op_info->need_umr = true;
496 			return;
497 		}
498 		cp_aad = true;
499 		op_info->src_addr = expected_aad;
500 	}
501 	expected_tag = RTE_PTR_ADD(is_enc ? dst_addr : src_addr, op->sym->aead.data.length);
502 	if (expected_tag != tag_addr) {
503 		struct rte_mbuf *mbuf = is_enc ? m_dst : m_src;
504 
505 		/*
506 		 * If op's mbuf is not fully set as payload, don't copy digest to
507 		 * the left area.
508 		 */
509 		if (rte_pktmbuf_tailroom(mbuf) < sess->tag_len ||
510 		    rte_pktmbuf_data_len(mbuf) != op->sym->aead.data.length) {
511 			op_info->need_umr = true;
512 			return;
513 		}
514 		if (is_enc) {
515 			op_info->digest = expected_tag;
516 			qp->cpy_tag_op++;
517 		} else {
518 			cp_tag = true;
519 		}
520 	}
521 	if (cp_aad)
522 		memcpy(expected_aad, aad_addr, sess->aad_len);
523 	if (cp_tag)
524 		memcpy(expected_tag, tag_addr, sess->tag_len);
525 }
526 
527 static __rte_always_inline uint32_t
528 _mlx5_crypto_gcm_umr_build_mbuf_klm(struct mlx5_crypto_qp *qp,
529 				    struct rte_mbuf *mbuf,
530 				    struct mlx5_klm *klm,
531 				    uint32_t offset,
532 				    uint32_t *remain)
533 {
534 	uint32_t data_len = (rte_pktmbuf_data_len(mbuf) - offset);
535 	uintptr_t addr = rte_pktmbuf_mtod_offset(mbuf, uintptr_t, offset);
536 
537 	if (data_len > *remain)
538 		data_len = *remain;
539 	*remain -= data_len;
540 	klm->byte_count = rte_cpu_to_be_32(data_len);
541 	klm->address = rte_cpu_to_be_64(addr);
542 	klm->mkey = mlx5_mr_mb2mr(&qp->mr_ctrl, mbuf);
543 	return klm->mkey;
544 }
545 
546 static __rte_always_inline int
547 mlx5_crypto_gcm_build_mbuf_chain_klms(struct mlx5_crypto_qp *qp,
548 				      struct rte_crypto_op *op,
549 				      struct rte_mbuf *mbuf,
550 				      struct mlx5_klm *klm)
551 {
552 	uint32_t remain_len = op->sym->aead.data.length;
553 	__rte_unused uint32_t nb_segs = mbuf->nb_segs;
554 	uint32_t klm_n = 0;
555 
556 	/* mbuf seg num should be less than max_segs_num. */
557 	MLX5_ASSERT(nb_segs <= qp->priv->max_segs_num);
558 	/* First mbuf needs to take the data offset. */
559 	if (unlikely(_mlx5_crypto_gcm_umr_build_mbuf_klm(qp, mbuf, klm,
560 		     op->sym->aead.data.offset, &remain_len) == UINT32_MAX)) {
561 		op->status = RTE_CRYPTO_OP_STATUS_ERROR;
562 		return 0;
563 	}
564 	klm++;
565 	klm_n++;
566 	while (remain_len) {
567 		nb_segs--;
568 		mbuf = mbuf->next;
569 		MLX5_ASSERT(mbuf && nb_segs);
570 		if (unlikely(_mlx5_crypto_gcm_umr_build_mbuf_klm(qp, mbuf, klm,
571 						0, &remain_len) == UINT32_MAX)) {
572 			op->status = RTE_CRYPTO_OP_STATUS_ERROR;
573 			return 0;
574 		}
575 		klm++;
576 		klm_n++;
577 	}
578 	return klm_n;
579 }
580 
581 static __rte_always_inline int
582 mlx5_crypto_gcm_build_klm_by_addr(struct mlx5_crypto_qp *qp,
583 				  struct mlx5_klm *klm,
584 				  void *addr,
585 				  uint32_t len)
586 {
587 	klm->byte_count = rte_cpu_to_be_32(len);
588 	klm->address = rte_cpu_to_be_64((uintptr_t)addr);
589 	klm->mkey = mlx5_mr_addr2mr_bh(&qp->mr_ctrl, (uintptr_t)addr);
590 	if (klm->mkey == UINT32_MAX)
591 		return 0;
592 	return 1;
593 }
594 
595 static __rte_always_inline int
596 mlx5_crypto_gcm_build_op_klm(struct mlx5_crypto_qp *qp,
597 			     struct rte_crypto_op *op,
598 			     struct mlx5_crypto_gcm_op_info *op_info,
599 			     struct mlx5_klm *klm,
600 			     uint32_t *len)
601 {
602 	struct mlx5_crypto_session *sess = CRYPTODEV_GET_SYM_SESS_PRIV(op->sym->session);
603 	struct mlx5_klm *digest = NULL, *aad = NULL;
604 	uint32_t total_len = op->sym->aead.data.length + sess->aad_len + sess->tag_len;
605 	uint32_t klm_n = 0, klm_src = 0, klm_dst = 0;
606 
607 	/* Build AAD KLM. */
608 	aad = klm;
609 	if (!mlx5_crypto_gcm_build_klm_by_addr(qp, aad, op->sym->aead.aad.data, sess->aad_len))
610 		return 0;
611 	klm_n++;
612 	/* Build src mubf KLM. */
613 	klm_src = mlx5_crypto_gcm_build_mbuf_chain_klms(qp, op, op->sym->m_src, &klm[klm_n]);
614 	if (!klm_src)
615 		return 0;
616 	klm_n += klm_src;
617 	/* Reserve digest KLM if needed. */
618 	if (!op_info->is_oop ||
619 	    sess->op_type == MLX5_CRYPTO_OP_TYPE_DECRYPTION) {
620 		digest = &klm[klm_n];
621 		klm_n++;
622 	}
623 	/* Build dst mbuf KLM. */
624 	if (op_info->is_oop) {
625 		klm[klm_n] = *aad;
626 		klm_n++;
627 		klm_dst = mlx5_crypto_gcm_build_mbuf_chain_klms(qp, op, op->sym->m_dst,
628 								&klm[klm_n]);
629 		if (!klm_dst)
630 			return 0;
631 		klm_n += klm_dst;
632 		total_len += (op->sym->aead.data.length + sess->aad_len);
633 	}
634 	/* Update digest at the end if it is not set. */
635 	if (!digest) {
636 		digest = &klm[klm_n];
637 		klm_n++;
638 	}
639 	/* Build digest KLM. */
640 	if (!mlx5_crypto_gcm_build_klm_by_addr(qp, digest, op->sym->aead.digest.data,
641 					       sess->tag_len))
642 		return 0;
643 	*len = total_len;
644 	return klm_n;
645 }
646 
647 static __rte_always_inline struct mlx5_wqe_cseg *
648 mlx5_crypto_gcm_get_umr_wqe(struct mlx5_crypto_qp *qp)
649 {
650 	uint32_t wqe_offset = qp->umr_pi & (qp->umr_wqbbs - 1);
651 	uint32_t left_wqbbs = qp->umr_wqbbs - wqe_offset;
652 	struct mlx5_wqe_cseg *wqe;
653 
654 	/* If UMR WQE is near the boundary. */
655 	if (left_wqbbs < MLX5_UMR_GCM_WQE_STRIDE) {
656 		/* Append NOP WQE as the left WQEBBS is not enough for UMR. */
657 		wqe = RTE_PTR_ADD(qp->umr_qp_obj.umem_buf, wqe_offset * MLX5_SEND_WQE_BB);
658 		wqe->opcode = rte_cpu_to_be_32(MLX5_OPCODE_NOP | ((uint32_t)qp->umr_pi << 8));
659 		wqe->sq_ds = rte_cpu_to_be_32((qp->umr_qp_obj.qp->id << 8) | (left_wqbbs << 2));
660 		wqe->flags = RTE_BE32(0);
661 		wqe->misc = RTE_BE32(0);
662 		qp->umr_pi += left_wqbbs;
663 		wqe_offset = qp->umr_pi & (qp->umr_wqbbs - 1);
664 	}
665 	wqe_offset *= MLX5_SEND_WQE_BB;
666 	return RTE_PTR_ADD(qp->umr_qp_obj.umem_buf, wqe_offset);
667 }
668 
669 static __rte_always_inline int
670 mlx5_crypto_gcm_build_umr(struct mlx5_crypto_qp *qp,
671 			  struct rte_crypto_op *op,
672 			  uint32_t idx,
673 			  struct mlx5_crypto_gcm_op_info *op_info,
674 			  struct mlx5_crypto_gcm_data *data)
675 {
676 	struct mlx5_crypto_priv *priv = qp->priv;
677 	struct mlx5_crypto_session *sess = CRYPTODEV_GET_SYM_SESS_PRIV(op->sym->session);
678 	struct mlx5_wqe_cseg *wqe;
679 	struct mlx5_wqe_umr_cseg *ucseg;
680 	struct mlx5_wqe_mkey_cseg *mkc;
681 	struct mlx5_klm *iklm;
682 	struct mlx5_klm *klm = &qp->klm_array[idx * priv->max_klm_num];
683 	uint16_t klm_size, klm_align;
684 	uint32_t total_len;
685 
686 	/* Build KLM base on the op. */
687 	klm_size = mlx5_crypto_gcm_build_op_klm(qp, op, op_info, klm, &total_len);
688 	if (!klm_size)
689 		return -EINVAL;
690 	klm_align = RTE_ALIGN(klm_size, 4);
691 	/* Get UMR WQE memory. */
692 	wqe = mlx5_crypto_gcm_get_umr_wqe(qp);
693 	memset(wqe, 0, MLX5_UMR_GCM_WQE_SIZE);
694 	/* Set WQE control seg. Non-inline KLM UMR WQE size must be 9 WQE_DS. */
695 	wqe->opcode = rte_cpu_to_be_32(MLX5_OPCODE_UMR | ((uint32_t)qp->umr_pi << 8));
696 	wqe->sq_ds = rte_cpu_to_be_32((qp->umr_qp_obj.qp->id << 8) | 9);
697 	wqe->flags = RTE_BE32(MLX5_COMP_ONLY_FIRST_ERR << MLX5_COMP_MODE_OFFSET);
698 	wqe->misc = rte_cpu_to_be_32(qp->mkey[idx]->id);
699 	/* Set UMR WQE control seg. */
700 	ucseg = (struct mlx5_wqe_umr_cseg *)(wqe + 1);
701 	ucseg->mkey_mask |= RTE_BE64(1u << 0);
702 	ucseg->ko_to_bs = rte_cpu_to_be_32(klm_align << MLX5_UMRC_KO_OFFSET);
703 	/* Set mkey context seg. */
704 	mkc = (struct mlx5_wqe_mkey_cseg *)(ucseg + 1);
705 	mkc->len = rte_cpu_to_be_64(total_len);
706 	mkc->qpn_mkey = rte_cpu_to_be_32(0xffffff00 | (qp->mkey[idx]->id & 0xff));
707 	/* Set UMR pointer to data seg. */
708 	iklm = (struct mlx5_klm *)(mkc + 1);
709 	iklm->address = rte_cpu_to_be_64((uintptr_t)((char *)klm));
710 	iklm->mkey = rte_cpu_to_be_32(qp->mr.lkey);
711 	data->src_mkey = rte_cpu_to_be_32(qp->mkey[idx]->id);
712 	data->dst_mkey = data->src_mkey;
713 	data->src_addr = 0;
714 	data->src_bytes = sess->aad_len + op->sym->aead.data.length;
715 	data->dst_bytes = data->src_bytes;
716 	if (op_info->is_enc)
717 		data->dst_bytes += sess->tag_len;
718 	else
719 		data->src_bytes += sess->tag_len;
720 	if (op_info->is_oop)
721 		data->dst_addr = (void *)(uintptr_t)(data->src_bytes);
722 	else
723 		data->dst_addr = 0;
724 	/* Clear the padding memory. */
725 	memset(&klm[klm_size], 0, sizeof(struct mlx5_klm) * (klm_align - klm_size));
726 	/* Update PI and WQE */
727 	qp->umr_pi += MLX5_UMR_GCM_WQE_STRIDE;
728 	qp->umr_wqe = (uint8_t *)wqe;
729 	return 0;
730 }
731 
732 static __rte_always_inline void
733 mlx5_crypto_gcm_build_send_en(struct mlx5_crypto_qp *qp)
734 {
735 	uint32_t wqe_offset = (qp->umr_pi & (qp->umr_wqbbs - 1)) * MLX5_SEND_WQE_BB;
736 	struct mlx5_wqe_cseg *cs = RTE_PTR_ADD(qp->umr_qp_obj.wqes, wqe_offset);
737 	struct mlx5_wqe_qseg *qs = RTE_PTR_ADD(cs, sizeof(struct mlx5_wqe_cseg));
738 
739 	cs->opcode = rte_cpu_to_be_32(MLX5_OPCODE_SEND_EN | ((uint32_t)qp->umr_pi << 8));
740 	cs->sq_ds = rte_cpu_to_be_32((qp->umr_qp_obj.qp->id << 8) | 2);
741 	/*
742 	 * No need to generate the SEND_EN CQE as we want only GGA CQE
743 	 * in the CQ normally. We can compare qp->last_send_gga_pi with
744 	 * qp->pi to know if all SEND_EN be consumed.
745 	 */
746 	cs->flags = RTE_BE32((MLX5_COMP_ONLY_FIRST_ERR << MLX5_COMP_MODE_OFFSET) |
747 			MLX5_WQE_CTRL_INITIATOR_SMALL_FENCE);
748 	cs->misc = RTE_BE32(0);
749 	qs->max_index = rte_cpu_to_be_32(qp->pi);
750 	qs->qpn_cqn = rte_cpu_to_be_32(qp->qp_obj.qp->id);
751 	qp->umr_wqe = (uint8_t *)cs;
752 	qp->umr_pi += 1;
753 }
754 
755 static __rte_always_inline void
756 mlx5_crypto_gcm_wqe_set(struct mlx5_crypto_qp *qp,
757 			struct rte_crypto_op *op,
758 			uint32_t idx,
759 			struct mlx5_crypto_gcm_data *data)
760 {
761 	struct mlx5_crypto_session *sess = CRYPTODEV_GET_SYM_SESS_PRIV(op->sym->session);
762 	struct mlx5_gga_wqe *wqe = &((struct mlx5_gga_wqe *)qp->qp_obj.wqes)[idx];
763 	union mlx5_gga_crypto_opaque *opaq = qp->opaque_addr;
764 
765 	memcpy(opaq[idx].cp.iv,
766 		rte_crypto_op_ctod_offset(op, uint8_t *, sess->iv_offset), sess->iv_len);
767 	opaq[idx].cp.tag_size = sess->wqe_tag_len;
768 	opaq[idx].cp.aad_size = sess->wqe_aad_len;
769 	/* Update control seg. */
770 	wqe->opcode = rte_cpu_to_be_32(MLX5_MMO_CRYPTO_OPC + (qp->pi << 8));
771 	wqe->gga_ctrl1 = sess->mmo_ctrl;
772 	wqe->gga_ctrl2 = sess->dek_id;
773 	wqe->flags = RTE_BE32(MLX5_COMP_ONLY_FIRST_ERR << MLX5_COMP_MODE_OFFSET);
774 	/* Update op_info seg. */
775 	wqe->gather.bcount = rte_cpu_to_be_32(data->src_bytes);
776 	wqe->gather.lkey = data->src_mkey;
777 	wqe->gather.pbuf = rte_cpu_to_be_64((uintptr_t)data->src_addr);
778 	/* Update output seg. */
779 	wqe->scatter.bcount = rte_cpu_to_be_32(data->dst_bytes);
780 	wqe->scatter.lkey = data->dst_mkey;
781 	wqe->scatter.pbuf = rte_cpu_to_be_64((uintptr_t)data->dst_addr);
782 	qp->wqe = (uint8_t *)wqe;
783 }
784 
785 static uint16_t
786 mlx5_crypto_gcm_enqueue_burst(void *queue_pair,
787 			      struct rte_crypto_op **ops,
788 			      uint16_t nb_ops)
789 {
790 	struct mlx5_crypto_qp *qp = queue_pair;
791 	struct mlx5_crypto_session *sess;
792 	struct mlx5_crypto_priv *priv = qp->priv;
793 	struct mlx5_crypto_gcm_tag_cpy_info *tag;
794 	struct mlx5_crypto_gcm_data gcm_data;
795 	struct rte_crypto_op *op;
796 	struct mlx5_crypto_gcm_op_info op_info;
797 	uint16_t mask = qp->entries_n - 1;
798 	uint16_t remain = qp->entries_n - (qp->pi - qp->qp_ci);
799 	uint32_t idx;
800 	uint16_t umr_cnt = 0;
801 
802 	if (remain < nb_ops)
803 		nb_ops = remain;
804 	else
805 		remain = nb_ops;
806 	if (unlikely(remain == 0))
807 		return 0;
808 	do {
809 		op = *ops++;
810 		sess = CRYPTODEV_GET_SYM_SESS_PRIV(op->sym->session);
811 		idx = qp->pi & mask;
812 		mlx5_crypto_gcm_get_op_info(qp, op, &op_info);
813 		if (!op_info.need_umr) {
814 			gcm_data.src_addr = op_info.src_addr;
815 			gcm_data.src_bytes = op->sym->aead.data.length + sess->aad_len;
816 			gcm_data.src_mkey = mlx5_mr_mb2mr(&qp->mr_ctrl, op->sym->m_src);
817 			if (op_info.is_oop) {
818 				gcm_data.dst_addr = RTE_PTR_SUB
819 					(rte_pktmbuf_mtod_offset(op->sym->m_dst,
820 					 void *, op->sym->aead.data.offset), sess->aad_len);
821 				gcm_data.dst_mkey = mlx5_mr_mb2mr(&qp->mr_ctrl, op->sym->m_dst);
822 			} else {
823 				gcm_data.dst_addr = gcm_data.src_addr;
824 				gcm_data.dst_mkey = gcm_data.src_mkey;
825 			}
826 			gcm_data.dst_bytes = gcm_data.src_bytes;
827 			if (op_info.is_enc)
828 				gcm_data.dst_bytes += sess->tag_len;
829 			else
830 				gcm_data.src_bytes += sess->tag_len;
831 		} else {
832 			if (unlikely(mlx5_crypto_gcm_build_umr(qp, op, idx,
833 							&op_info, &gcm_data))) {
834 				qp->stats.enqueue_err_count++;
835 				if (remain != nb_ops) {
836 					qp->stats.enqueued_count -= remain;
837 					break;
838 				}
839 				return 0;
840 			}
841 			umr_cnt++;
842 		}
843 		mlx5_crypto_gcm_wqe_set(qp, op, idx, &gcm_data);
844 		if (op_info.digest) {
845 			tag = (struct mlx5_crypto_gcm_tag_cpy_info *)op->sym->aead.digest.data;
846 			tag->digest = op_info.digest;
847 			tag->tag_len = sess->tag_len;
848 			op->status = MLX5_CRYPTO_OP_STATUS_GCM_TAG_COPY;
849 		} else {
850 			op->status = RTE_CRYPTO_OP_STATUS_SUCCESS;
851 		}
852 		qp->ops[idx] = op;
853 		qp->pi++;
854 	} while (--remain);
855 	qp->stats.enqueued_count += nb_ops;
856 	/* Update the last GGA cseg with COMP. */
857 	((struct mlx5_wqe_cseg *)qp->wqe)->flags =
858 		RTE_BE32(MLX5_COMP_ALWAYS << MLX5_COMP_MODE_OFFSET);
859 	/* Only when there are no pending SEND_EN WQEs in background. */
860 	if (!umr_cnt && !qp->has_umr) {
861 		mlx5_doorbell_ring(&priv->uar.bf_db, *(volatile uint64_t *)qp->wqe,
862 				   qp->pi, &qp->qp_obj.db_rec[MLX5_SND_DBR],
863 				   !priv->uar.dbnc);
864 	} else {
865 		mlx5_crypto_gcm_build_send_en(qp);
866 		mlx5_doorbell_ring(&priv->uar.bf_db, *(volatile uint64_t *)qp->umr_wqe,
867 				   qp->umr_pi, &qp->umr_qp_obj.db_rec[MLX5_SND_DBR],
868 				   !priv->uar.dbnc);
869 		qp->last_gga_pi = qp->pi;
870 		qp->has_umr = true;
871 	}
872 	return nb_ops;
873 }
874 
875 static __rte_noinline void
876 mlx5_crypto_gcm_cqe_err_handle(struct mlx5_crypto_qp *qp, struct rte_crypto_op *op)
877 {
878 	uint8_t op_code;
879 	const uint32_t idx = qp->cq_ci & (qp->entries_n - 1);
880 	volatile struct mlx5_error_cqe *cqe = (volatile struct mlx5_error_cqe *)
881 							&qp->cq_obj.cqes[idx];
882 
883 	op_code = rte_be_to_cpu_32(cqe->s_wqe_opcode_qpn) >> MLX5_CQ_INDEX_WIDTH;
884 	DRV_LOG(ERR, "CQE ERR:0x%x, Vendor_ERR:0x%x, OP:0x%x, QPN:0x%x, WQE_CNT:0x%x",
885 		cqe->syndrome, cqe->vendor_err_synd, op_code,
886 		(rte_be_to_cpu_32(cqe->s_wqe_opcode_qpn) & 0xffffff),
887 		rte_be_to_cpu_16(cqe->wqe_counter));
888 	if (op && op_code == MLX5_OPCODE_MMO) {
889 		op->status = RTE_CRYPTO_OP_STATUS_ERROR;
890 		qp->stats.dequeue_err_count++;
891 	}
892 }
893 
894 static __rte_always_inline void
895 mlx5_crypto_gcm_fill_op(struct mlx5_crypto_qp *qp,
896 			struct rte_crypto_op **ops,
897 			uint16_t orci,
898 			uint16_t rci,
899 			uint16_t op_mask)
900 {
901 	uint16_t n;
902 
903 	orci &= op_mask;
904 	rci &= op_mask;
905 	if (unlikely(orci > rci)) {
906 		n = op_mask - orci + 1;
907 		memcpy(ops, &qp->ops[orci], n * sizeof(*ops));
908 		orci = 0;
909 	} else {
910 		n = 0;
911 	}
912 	/* rci can be 0 here, memcpy will skip that. */
913 	memcpy(&ops[n], &qp->ops[orci], (rci - orci) * sizeof(*ops));
914 }
915 
916 static __rte_always_inline void
917 mlx5_crypto_gcm_cpy_tag(struct mlx5_crypto_qp *qp,
918 			uint16_t orci,
919 			uint16_t rci,
920 			uint16_t op_mask)
921 {
922 	struct rte_crypto_op *op;
923 	struct mlx5_crypto_gcm_tag_cpy_info *tag;
924 
925 	while (qp->cpy_tag_op && orci != rci) {
926 		op = qp->ops[orci & op_mask];
927 		if (op->status == MLX5_CRYPTO_OP_STATUS_GCM_TAG_COPY) {
928 			tag = (struct mlx5_crypto_gcm_tag_cpy_info *)op->sym->aead.digest.data;
929 			memcpy(op->sym->aead.digest.data, tag->digest, tag->tag_len);
930 			op->status = RTE_CRYPTO_OP_STATUS_SUCCESS;
931 			qp->cpy_tag_op--;
932 		}
933 		orci++;
934 	}
935 }
936 
937 static uint16_t
938 mlx5_crypto_gcm_dequeue_burst(void *queue_pair,
939 			      struct rte_crypto_op **ops,
940 			      uint16_t nb_ops)
941 {
942 	struct mlx5_crypto_qp *qp = queue_pair;
943 	volatile struct mlx5_cqe *restrict cqe;
944 	const unsigned int cq_size = qp->cq_entries_n;
945 	const unsigned int mask = cq_size - 1;
946 	const unsigned int op_mask = qp->entries_n - 1;
947 	uint32_t idx;
948 	uint32_t next_idx = qp->cq_ci & mask;
949 	uint16_t reported_ci = qp->reported_ci;
950 	uint16_t qp_ci = qp->qp_ci;
951 	const uint16_t max = RTE_MIN((uint16_t)(qp->pi - reported_ci), nb_ops);
952 	uint16_t op_num = 0;
953 	int ret;
954 
955 	if (unlikely(max == 0))
956 		return 0;
957 	while (qp_ci - reported_ci < max) {
958 		idx = next_idx;
959 		next_idx = (qp->cq_ci + 1) & mask;
960 		cqe = &qp->cq_obj.cqes[idx];
961 		ret = check_cqe(cqe, cq_size, qp->cq_ci);
962 		if (unlikely(ret != MLX5_CQE_STATUS_SW_OWN)) {
963 			if (unlikely(ret != MLX5_CQE_STATUS_HW_OWN))
964 				mlx5_crypto_gcm_cqe_err_handle(qp,
965 						qp->ops[reported_ci & op_mask]);
966 			break;
967 		}
968 		qp_ci = rte_be_to_cpu_16(cqe->wqe_counter) + 1;
969 		if (qp->has_umr &&
970 		    (qp->last_gga_pi + 1) == qp_ci)
971 			qp->has_umr = false;
972 		qp->cq_ci++;
973 	}
974 	/* If wqe_counter changed, means CQE handled. */
975 	if (likely(qp->qp_ci != qp_ci)) {
976 		qp->qp_ci = qp_ci;
977 		rte_io_wmb();
978 		qp->cq_obj.db_rec[0] = rte_cpu_to_be_32(qp->cq_ci);
979 	}
980 	/* If reported_ci is not same with qp_ci, means op retrieved. */
981 	if (qp_ci != reported_ci) {
982 		op_num = RTE_MIN((uint16_t)(qp_ci - reported_ci), max);
983 		reported_ci += op_num;
984 		mlx5_crypto_gcm_cpy_tag(qp, qp->reported_ci, reported_ci, op_mask);
985 		mlx5_crypto_gcm_fill_op(qp, ops, qp->reported_ci, reported_ci, op_mask);
986 		qp->stats.dequeued_count += op_num;
987 		qp->reported_ci = reported_ci;
988 	}
989 	return op_num;
990 }
991 
992 static uint16_t
993 mlx5_crypto_gcm_ipsec_enqueue_burst(void *queue_pair,
994 				    struct rte_crypto_op **ops,
995 				    uint16_t nb_ops)
996 {
997 	struct mlx5_crypto_qp *qp = queue_pair;
998 	struct mlx5_crypto_session *sess;
999 	struct mlx5_crypto_priv *priv = qp->priv;
1000 	struct mlx5_crypto_gcm_data gcm_data;
1001 	struct rte_crypto_op *op;
1002 	struct rte_mbuf *m_src;
1003 	struct rte_mbuf *m_dst;
1004 	uint16_t mask = qp->entries_n - 1;
1005 	uint16_t remain = qp->entries_n - (qp->pi - qp->qp_ci);
1006 	uint32_t idx;
1007 	uint32_t pkt_iv_len;
1008 	uint8_t *payload;
1009 
1010 	if (remain < nb_ops)
1011 		nb_ops = remain;
1012 	else
1013 		remain = nb_ops;
1014 	if (unlikely(remain == 0))
1015 		return 0;
1016 	do {
1017 		op = *ops++;
1018 		sess = CRYPTODEV_GET_SYM_SESS_PRIV(op->sym->session);
1019 		idx = qp->pi & mask;
1020 		m_src = op->sym->m_src;
1021 		MLX5_ASSERT(m_src->nb_segs == 1);
1022 		payload = rte_pktmbuf_mtod_offset(m_src, void *, op->sym->aead.data.offset);
1023 		gcm_data.src_addr = RTE_PTR_SUB(payload, sess->aad_len);
1024 		/*
1025 		 * IPsec IV between payload and AAD should be equal or less than
1026 		 * MLX5_CRYPTO_GCM_IPSEC_IV_SIZE.
1027 		 */
1028 		pkt_iv_len = RTE_PTR_DIFF(payload,
1029 				RTE_PTR_ADD(op->sym->aead.aad.data, sess->aad_len));
1030 		MLX5_ASSERT(pkt_iv_len <= MLX5_CRYPTO_GCM_IPSEC_IV_SIZE);
1031 		gcm_data.src_bytes = op->sym->aead.data.length + sess->aad_len;
1032 		gcm_data.src_mkey = mlx5_mr_mb2mr(&qp->mr_ctrl, op->sym->m_src);
1033 		m_dst = op->sym->m_dst;
1034 		if (m_dst && m_dst != m_src) {
1035 			MLX5_ASSERT(m_dst->nb_segs == 1 &&
1036 				    (rte_pktmbuf_headroom(m_dst) + op->sym->aead.data.offset)
1037 				    >= sess->aad_len + pkt_iv_len);
1038 			gcm_data.dst_addr = RTE_PTR_SUB
1039 				(rte_pktmbuf_mtod_offset(m_dst,
1040 				 void *, op->sym->aead.data.offset), sess->aad_len);
1041 			gcm_data.dst_mkey = mlx5_mr_mb2mr(&qp->mr_ctrl, m_dst);
1042 		} else {
1043 			gcm_data.dst_addr = gcm_data.src_addr;
1044 			gcm_data.dst_mkey = gcm_data.src_mkey;
1045 		}
1046 		gcm_data.dst_bytes = gcm_data.src_bytes;
1047 		/* Digest should follow payload. */
1048 		if (sess->op_type == MLX5_CRYPTO_OP_TYPE_ENCRYPTION) {
1049 			MLX5_ASSERT(RTE_PTR_ADD(gcm_data.dst_addr,
1050 				    sess->aad_len + op->sym->aead.data.length) ==
1051 				    op->sym->aead.digest.data);
1052 			gcm_data.dst_bytes += sess->tag_len;
1053 		} else {
1054 			MLX5_ASSERT(RTE_PTR_ADD(gcm_data.src_addr,
1055 				    sess->aad_len + op->sym->aead.data.length) ==
1056 				    op->sym->aead.digest.data);
1057 			gcm_data.src_bytes += sess->tag_len;
1058 		}
1059 		mlx5_crypto_gcm_wqe_set(qp, op, idx, &gcm_data);
1060 		/*
1061 		 * All the data such as IV have been copied above,
1062 		 * shrink AAD before payload. First backup the mem,
1063 		 * then do shrink.
1064 		 */
1065 		rte_memcpy(&qp->ipsec_mem[idx],
1066 			   RTE_PTR_SUB(payload, MLX5_CRYPTO_GCM_IPSEC_IV_SIZE),
1067 			   MLX5_CRYPTO_GCM_IPSEC_IV_SIZE);
1068 		/* If no memory overlap, do copy directly, otherwise memmove. */
1069 		if (likely(pkt_iv_len >= sess->aad_len))
1070 			rte_memcpy(gcm_data.src_addr, op->sym->aead.aad.data, sess->aad_len);
1071 		else
1072 			memmove(gcm_data.src_addr, op->sym->aead.aad.data, sess->aad_len);
1073 		op->status = RTE_CRYPTO_OP_STATUS_SUCCESS;
1074 		qp->ops[idx] = op;
1075 		qp->pi++;
1076 	} while (--remain);
1077 	qp->stats.enqueued_count += nb_ops;
1078 	/* Update the last GGA cseg with COMP. */
1079 	((struct mlx5_wqe_cseg *)qp->wqe)->flags =
1080 		RTE_BE32(MLX5_COMP_ALWAYS << MLX5_COMP_MODE_OFFSET);
1081 	mlx5_doorbell_ring(&priv->uar.bf_db, *(volatile uint64_t *)qp->wqe,
1082 			   qp->pi, &qp->qp_obj.db_rec[MLX5_SND_DBR],
1083 			   !priv->uar.dbnc);
1084 	return nb_ops;
1085 }
1086 
1087 static __rte_always_inline void
1088 mlx5_crypto_gcm_restore_ipsec_mem(struct mlx5_crypto_qp *qp,
1089 				  uint16_t orci,
1090 				  uint16_t rci,
1091 				  uint16_t op_mask)
1092 {
1093 	uint32_t idx;
1094 	struct mlx5_crypto_session *sess;
1095 	struct rte_crypto_op *op;
1096 	struct rte_mbuf *m_src;
1097 	struct rte_mbuf *m_dst;
1098 	uint8_t *payload;
1099 
1100 	while (orci != rci) {
1101 		idx = orci & op_mask;
1102 		op = qp->ops[idx];
1103 		sess = CRYPTODEV_GET_SYM_SESS_PRIV(op->sym->session);
1104 		m_src = op->sym->m_src;
1105 		payload = rte_pktmbuf_mtod_offset(m_src, void *,
1106 						  op->sym->aead.data.offset);
1107 		/* Restore the IPsec memory. */
1108 		if (unlikely(sess->aad_len > MLX5_CRYPTO_GCM_IPSEC_IV_SIZE))
1109 			memmove(op->sym->aead.aad.data,
1110 				RTE_PTR_SUB(payload, sess->aad_len), sess->aad_len);
1111 		rte_memcpy(RTE_PTR_SUB(payload, MLX5_CRYPTO_GCM_IPSEC_IV_SIZE),
1112 			   &qp->ipsec_mem[idx], MLX5_CRYPTO_GCM_IPSEC_IV_SIZE);
1113 		m_dst = op->sym->m_dst;
1114 		if (m_dst && m_dst != m_src) {
1115 			uint32_t bytes_to_copy;
1116 
1117 			bytes_to_copy = RTE_PTR_DIFF(payload, op->sym->aead.aad.data);
1118 			rte_memcpy(RTE_PTR_SUB(rte_pktmbuf_mtod_offset(m_dst, void *,
1119 				   op->sym->aead.data.offset), bytes_to_copy),
1120 				   op->sym->aead.aad.data,
1121 				   bytes_to_copy);
1122 		}
1123 		orci++;
1124 	}
1125 }
1126 
1127 static uint16_t
1128 mlx5_crypto_gcm_ipsec_dequeue_burst(void *queue_pair,
1129 				    struct rte_crypto_op **ops,
1130 				    uint16_t nb_ops)
1131 {
1132 	struct mlx5_crypto_qp *qp = queue_pair;
1133 	volatile struct mlx5_cqe *restrict cqe;
1134 	const unsigned int cq_size = qp->cq_entries_n;
1135 	const unsigned int mask = cq_size - 1;
1136 	const unsigned int op_mask = qp->entries_n - 1;
1137 	uint32_t idx;
1138 	uint32_t next_idx = qp->cq_ci & mask;
1139 	uint16_t reported_ci = qp->reported_ci;
1140 	uint16_t qp_ci = qp->qp_ci;
1141 	const uint16_t max = RTE_MIN((uint16_t)(qp->pi - reported_ci), nb_ops);
1142 	uint16_t op_num = 0;
1143 	int ret;
1144 
1145 	if (unlikely(max == 0))
1146 		return 0;
1147 	while (qp_ci - reported_ci < max) {
1148 		idx = next_idx;
1149 		next_idx = (qp->cq_ci + 1) & mask;
1150 		cqe = &qp->cq_obj.cqes[idx];
1151 		ret = check_cqe(cqe, cq_size, qp->cq_ci);
1152 		if (unlikely(ret != MLX5_CQE_STATUS_SW_OWN)) {
1153 			if (unlikely(ret != MLX5_CQE_STATUS_HW_OWN))
1154 				mlx5_crypto_gcm_cqe_err_handle(qp,
1155 						qp->ops[reported_ci & op_mask]);
1156 			break;
1157 		}
1158 		qp_ci = rte_be_to_cpu_16(cqe->wqe_counter) + 1;
1159 		qp->cq_ci++;
1160 	}
1161 	/* If wqe_counter changed, means CQE handled. */
1162 	if (likely(qp->qp_ci != qp_ci)) {
1163 		qp->qp_ci = qp_ci;
1164 		rte_io_wmb();
1165 		qp->cq_obj.db_rec[0] = rte_cpu_to_be_32(qp->cq_ci);
1166 	}
1167 	/* If reported_ci is not same with qp_ci, means op retrieved. */
1168 	if (qp_ci != reported_ci) {
1169 		op_num = RTE_MIN((uint16_t)(qp_ci - reported_ci), max);
1170 		reported_ci += op_num;
1171 		mlx5_crypto_gcm_restore_ipsec_mem(qp, qp->reported_ci, reported_ci, op_mask);
1172 		mlx5_crypto_gcm_fill_op(qp, ops, qp->reported_ci, reported_ci, op_mask);
1173 		qp->stats.dequeued_count += op_num;
1174 		qp->reported_ci = reported_ci;
1175 	}
1176 	return op_num;
1177 }
1178 
1179 int
1180 mlx5_crypto_gcm_init(struct mlx5_crypto_priv *priv)
1181 {
1182 	struct mlx5_common_device *cdev = priv->cdev;
1183 	struct rte_cryptodev *crypto_dev = priv->crypto_dev;
1184 	struct rte_cryptodev_ops *dev_ops = crypto_dev->dev_ops;
1185 	int ret;
1186 
1187 	/* Override AES-GCM specified ops. */
1188 	dev_ops->sym_session_configure = mlx5_crypto_sym_gcm_session_configure;
1189 	mlx5_os_set_reg_mr_cb(&priv->reg_mr_cb, &priv->dereg_mr_cb);
1190 	dev_ops->queue_pair_setup = mlx5_crypto_gcm_qp_setup;
1191 	dev_ops->queue_pair_release = mlx5_crypto_gcm_qp_release;
1192 	if (mlx5_crypto_is_ipsec_opt(priv)) {
1193 		crypto_dev->dequeue_burst = mlx5_crypto_gcm_ipsec_dequeue_burst;
1194 		crypto_dev->enqueue_burst = mlx5_crypto_gcm_ipsec_enqueue_burst;
1195 		priv->max_klm_num = 0;
1196 	} else {
1197 		crypto_dev->dequeue_burst = mlx5_crypto_gcm_dequeue_burst;
1198 		crypto_dev->enqueue_burst = mlx5_crypto_gcm_enqueue_burst;
1199 		priv->max_klm_num = RTE_ALIGN((priv->max_segs_num + 1) * 2 + 1,
1200 					MLX5_UMR_KLM_NUM_ALIGN);
1201 	}
1202 	/* Generate GCM capability. */
1203 	ret = mlx5_crypto_generate_gcm_cap(&cdev->config.hca_attr.crypto_mmo,
1204 					   mlx5_crypto_gcm_caps);
1205 	if (ret) {
1206 		DRV_LOG(ERR, "No enough AES-GCM cap.");
1207 		return -1;
1208 	}
1209 	priv->caps = mlx5_crypto_gcm_caps;
1210 	return 0;
1211 }
1212