xref: /dpdk/drivers/regex/mlx5/mlx5_regex_fastpath.c (revision daa02b5cddbb8e11b31d41e2bf7bb1ae64dcae2f)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2020 Mellanox Technologies, Ltd
3  */
4 
5 #include <unistd.h>
6 #include <strings.h>
7 #include <stdint.h>
8 #include <sys/mman.h>
9 
10 #include <rte_malloc.h>
11 #include <rte_log.h>
12 #include <rte_errno.h>
13 #include <rte_bus_pci.h>
14 #include <rte_pci.h>
15 #include <rte_regexdev_driver.h>
16 #include <rte_mbuf.h>
17 
18 #include <infiniband/mlx5dv.h>
19 #include <mlx5_glue.h>
20 #include <mlx5_common.h>
21 #include <mlx5_prm.h>
22 
23 #include "mlx5_regex_utils.h"
24 #include "mlx5_rxp.h"
25 #include "mlx5_regex.h"
26 
27 #define MLX5_REGEX_MAX_WQE_INDEX 0xffff
28 #define MLX5_REGEX_METADATA_SIZE ((size_t)64)
29 #define MLX5_REGEX_MAX_OUTPUT (((size_t)1) << 11)
30 #define MLX5_REGEX_WQE_CTRL_OFFSET 12
31 #define MLX5_REGEX_WQE_METADATA_OFFSET 16
32 #define MLX5_REGEX_WQE_GATHER_OFFSET 32
33 #define MLX5_REGEX_WQE_SCATTER_OFFSET 48
34 #define MLX5_REGEX_METADATA_OFF 32
35 #define MLX5_REGEX_UMR_WQE_SIZE 192
36 /* The maximum KLMs can be added to one UMR indirect mkey. */
37 #define MLX5_REGEX_MAX_KLM_NUM 128
38 /* The KLM array size for one job. */
39 #define MLX5_REGEX_KLMS_SIZE \
40 	((MLX5_REGEX_MAX_KLM_NUM) * sizeof(struct mlx5_klm))
41 /* In WQE set mode, the pi should be quarter of the MLX5_REGEX_MAX_WQE_INDEX. */
42 #define MLX5_REGEX_UMR_QP_PI_IDX(pi, ops) \
43 	(((pi) + (ops)) & (MLX5_REGEX_MAX_WQE_INDEX >> 2))
44 
45 static inline uint32_t
46 qp_size_get(struct mlx5_regex_hw_qp *qp)
47 {
48 	return (1U << qp->log_nb_desc);
49 }
50 
51 static inline uint32_t
52 cq_size_get(struct mlx5_regex_cq *cq)
53 {
54 	return (1U << cq->log_nb_desc);
55 }
56 
57 struct mlx5_regex_job {
58 	uint64_t user_id;
59 	volatile uint8_t *output;
60 	volatile uint8_t *metadata;
61 	struct mlx5_klm *imkey_array; /* Indirect mkey's KLM array. */
62 	struct mlx5_devx_obj *imkey; /* UMR WQE's indirect meky. */
63 } __rte_cached_aligned;
64 
65 static inline void
66 set_data_seg(struct mlx5_wqe_data_seg *seg,
67 	     uint32_t length, uint32_t lkey,
68 	     uintptr_t address)
69 {
70 	seg->byte_count = rte_cpu_to_be_32(length);
71 	seg->lkey = rte_cpu_to_be_32(lkey);
72 	seg->addr = rte_cpu_to_be_64(address);
73 }
74 
75 static inline void
76 set_metadata_seg(struct mlx5_wqe_metadata_seg *seg,
77 		 uint32_t mmo_control_31_0, uint32_t lkey,
78 		 uintptr_t address)
79 {
80 	seg->mmo_control_31_0 = htobe32(mmo_control_31_0);
81 	seg->lkey = rte_cpu_to_be_32(lkey);
82 	seg->addr = rte_cpu_to_be_64(address);
83 }
84 
85 static inline void
86 set_regex_ctrl_seg(void *seg, uint8_t le, uint16_t subset_id0,
87 		   uint16_t subset_id1, uint16_t subset_id2,
88 		   uint16_t subset_id3, uint8_t ctrl)
89 {
90 	MLX5_SET(regexp_mmo_control, seg, le, le);
91 	MLX5_SET(regexp_mmo_control, seg, ctrl, ctrl);
92 	MLX5_SET(regexp_mmo_control, seg, subset_id_0, subset_id0);
93 	MLX5_SET(regexp_mmo_control, seg, subset_id_1, subset_id1);
94 	MLX5_SET(regexp_mmo_control, seg, subset_id_2, subset_id2);
95 	MLX5_SET(regexp_mmo_control, seg, subset_id_3, subset_id3);
96 }
97 
98 static inline void
99 set_wqe_ctrl_seg(struct mlx5_wqe_ctrl_seg *seg, uint16_t pi, uint8_t opcode,
100 		 uint8_t opmod, uint32_t qp_num, uint8_t fm_ce_se, uint8_t ds,
101 		 uint8_t signature, uint32_t imm)
102 {
103 	seg->opmod_idx_opcode = rte_cpu_to_be_32(((uint32_t)opmod << 24) |
104 						 ((uint32_t)pi << 8) |
105 						 opcode);
106 	seg->qpn_ds = rte_cpu_to_be_32((qp_num << 8) | ds);
107 	seg->fm_ce_se = fm_ce_se;
108 	seg->signature = signature;
109 	seg->imm = imm;
110 }
111 
112 /**
113  * Query LKey from a packet buffer for QP. If not found, add the mempool.
114  *
115  * @param priv
116  *   Pointer to the priv object.
117  * @param mr_ctrl
118  *   Pointer to per-queue MR control structure.
119  * @param mbuf
120  *   Pointer to source mbuf, to search in.
121  *
122  * @return
123  *   Searched LKey on success, UINT32_MAX on no match.
124  */
125 static inline uint32_t
126 mlx5_regex_mb2mr(struct mlx5_regex_priv *priv, struct mlx5_mr_ctrl *mr_ctrl,
127 		 struct rte_mbuf *mbuf)
128 {
129 	return mlx5_mr_mb2mr(priv->cdev, 0, mr_ctrl, mbuf);
130 }
131 
132 static inline void
133 __prep_one(struct mlx5_regex_priv *priv, struct mlx5_regex_hw_qp *qp_obj,
134 	   struct rte_regex_ops *op, struct mlx5_regex_job *job,
135 	   size_t pi, struct mlx5_klm *klm)
136 {
137 	size_t wqe_offset = (pi & (qp_size_get(qp_obj) - 1)) *
138 			    (MLX5_SEND_WQE_BB << (priv->has_umr ? 2 : 0)) +
139 			    (priv->has_umr ? MLX5_REGEX_UMR_WQE_SIZE : 0);
140 	uint16_t group0 = op->req_flags & RTE_REGEX_OPS_REQ_GROUP_ID0_VALID_F ?
141 				op->group_id0 : 0;
142 	uint16_t group1 = op->req_flags & RTE_REGEX_OPS_REQ_GROUP_ID1_VALID_F ?
143 				op->group_id1 : 0;
144 	uint16_t group2 = op->req_flags & RTE_REGEX_OPS_REQ_GROUP_ID2_VALID_F ?
145 				op->group_id2 : 0;
146 	uint16_t group3 = op->req_flags & RTE_REGEX_OPS_REQ_GROUP_ID3_VALID_F ?
147 				op->group_id3 : 0;
148 	uint8_t control = op->req_flags &
149 				RTE_REGEX_OPS_REQ_MATCH_HIGH_PRIORITY_F ? 1 : 0;
150 
151 	/* For backward compatibility. */
152 	if (!(op->req_flags & (RTE_REGEX_OPS_REQ_GROUP_ID0_VALID_F |
153 			       RTE_REGEX_OPS_REQ_GROUP_ID1_VALID_F |
154 			       RTE_REGEX_OPS_REQ_GROUP_ID2_VALID_F |
155 			       RTE_REGEX_OPS_REQ_GROUP_ID3_VALID_F)))
156 		group0 = op->group_id0;
157 	uint8_t *wqe = (uint8_t *)(uintptr_t)qp_obj->qp_obj.wqes + wqe_offset;
158 	int ds = 4; /*  ctrl + meta + input + output */
159 
160 	set_wqe_ctrl_seg((struct mlx5_wqe_ctrl_seg *)wqe,
161 			 (priv->has_umr ? (pi * 4 + 3) : pi),
162 			 MLX5_OPCODE_MMO, MLX5_OPC_MOD_MMO_REGEX,
163 			 qp_obj->qp_obj.qp->id, 0, ds, 0, 0);
164 	set_regex_ctrl_seg(wqe + 12, 0, group0, group1, group2, group3,
165 			   control);
166 	struct mlx5_wqe_data_seg *input_seg =
167 		(struct mlx5_wqe_data_seg *)(wqe +
168 					     MLX5_REGEX_WQE_GATHER_OFFSET);
169 	input_seg->byte_count = rte_cpu_to_be_32(klm->byte_count);
170 	input_seg->addr = rte_cpu_to_be_64(klm->address);
171 	input_seg->lkey = klm->mkey;
172 	job->user_id = op->user_id;
173 }
174 
175 static inline void
176 prep_one(struct mlx5_regex_priv *priv, struct mlx5_regex_qp *qp,
177 	 struct mlx5_regex_hw_qp *qp_obj, struct rte_regex_ops *op,
178 	 struct mlx5_regex_job *job)
179 {
180 	struct mlx5_klm klm;
181 
182 	klm.byte_count = rte_pktmbuf_data_len(op->mbuf);
183 	klm.mkey = mlx5_regex_mb2mr(priv, &qp->mr_ctrl, op->mbuf);
184 	klm.address = rte_pktmbuf_mtod(op->mbuf, uintptr_t);
185 	__prep_one(priv, qp_obj, op, job, qp_obj->pi, &klm);
186 	qp_obj->db_pi = qp_obj->pi;
187 	qp_obj->pi = (qp_obj->pi + 1) & MLX5_REGEX_MAX_WQE_INDEX;
188 }
189 
190 static inline void
191 send_doorbell(struct mlx5_regex_priv *priv, struct mlx5_regex_hw_qp *qp_obj)
192 {
193 	struct mlx5dv_devx_uar *uar = priv->uar;
194 	size_t wqe_offset = (qp_obj->db_pi & (qp_size_get(qp_obj) - 1)) *
195 		(MLX5_SEND_WQE_BB << (priv->has_umr ? 2 : 0)) +
196 		(priv->has_umr ? MLX5_REGEX_UMR_WQE_SIZE : 0);
197 	uint8_t *wqe = (uint8_t *)(uintptr_t)qp_obj->qp_obj.wqes + wqe_offset;
198 	/* Or the fm_ce_se instead of set, avoid the fence be cleared. */
199 	((struct mlx5_wqe_ctrl_seg *)wqe)->fm_ce_se |= MLX5_WQE_CTRL_CQ_UPDATE;
200 	uint64_t *doorbell_addr =
201 		(uint64_t *)((uint8_t *)uar->base_addr + 0x800);
202 	rte_io_wmb();
203 	qp_obj->qp_obj.db_rec[MLX5_SND_DBR] = rte_cpu_to_be_32((priv->has_umr ?
204 					(qp_obj->db_pi * 4 + 3) : qp_obj->db_pi)
205 					& MLX5_REGEX_MAX_WQE_INDEX);
206 	rte_wmb();
207 	*doorbell_addr = *(volatile uint64_t *)wqe;
208 	rte_wmb();
209 }
210 
211 static inline int
212 get_free(struct mlx5_regex_hw_qp *qp, uint8_t has_umr) {
213 	return (qp_size_get(qp) - ((qp->pi - qp->ci) &
214 			(has_umr ? (MLX5_REGEX_MAX_WQE_INDEX >> 2) :
215 			MLX5_REGEX_MAX_WQE_INDEX)));
216 }
217 
218 static inline uint32_t
219 job_id_get(uint32_t qid, size_t qp_size, size_t index) {
220 	return qid * qp_size + (index & (qp_size - 1));
221 }
222 
223 #ifdef HAVE_MLX5_UMR_IMKEY
224 static inline int
225 mkey_klm_available(struct mlx5_klm *klm, uint32_t pos, uint32_t new)
226 {
227 	return (klm && ((pos + new) <= MLX5_REGEX_MAX_KLM_NUM));
228 }
229 
230 static inline void
231 complete_umr_wqe(struct mlx5_regex_qp *qp, struct mlx5_regex_hw_qp *qp_obj,
232 		 struct mlx5_regex_job *mkey_job,
233 		 size_t umr_index, uint32_t klm_size, uint32_t total_len)
234 {
235 	size_t wqe_offset = (umr_index & (qp_size_get(qp_obj) - 1)) *
236 		(MLX5_SEND_WQE_BB * 4);
237 	struct mlx5_wqe_ctrl_seg *wqe = (struct mlx5_wqe_ctrl_seg *)((uint8_t *)
238 				   (uintptr_t)qp_obj->qp_obj.wqes + wqe_offset);
239 	struct mlx5_wqe_umr_ctrl_seg *ucseg =
240 				(struct mlx5_wqe_umr_ctrl_seg *)(wqe + 1);
241 	struct mlx5_wqe_mkey_context_seg *mkc =
242 				(struct mlx5_wqe_mkey_context_seg *)(ucseg + 1);
243 	struct mlx5_klm *iklm = (struct mlx5_klm *)(mkc + 1);
244 	uint16_t klm_align = RTE_ALIGN(klm_size, 4);
245 
246 	memset(wqe, 0, MLX5_REGEX_UMR_WQE_SIZE);
247 	/* Set WQE control seg. Non-inline KLM UMR WQE size must be 9 WQE_DS. */
248 	set_wqe_ctrl_seg(wqe, (umr_index * 4), MLX5_OPCODE_UMR,
249 			 0, qp_obj->qp_obj.qp->id, 0, 9, 0,
250 			 rte_cpu_to_be_32(mkey_job->imkey->id));
251 	/* Set UMR WQE control seg. */
252 	ucseg->mkey_mask |= rte_cpu_to_be_64(MLX5_WQE_UMR_CTRL_MKEY_MASK_LEN |
253 				MLX5_WQE_UMR_CTRL_FLAG_TRNSLATION_OFFSET |
254 				MLX5_WQE_UMR_CTRL_MKEY_MASK_ACCESS_LOCAL_WRITE);
255 	ucseg->klm_octowords = rte_cpu_to_be_16(klm_align);
256 	/* Set mkey context seg. */
257 	mkc->len = rte_cpu_to_be_64(total_len);
258 	mkc->qpn_mkey = rte_cpu_to_be_32(0xffffff00 |
259 					(mkey_job->imkey->id & 0xff));
260 	/* Set UMR pointer to data seg. */
261 	iklm->address = rte_cpu_to_be_64
262 				((uintptr_t)((char *)mkey_job->imkey_array));
263 	iklm->mkey = rte_cpu_to_be_32(qp->imkey_addr->lkey);
264 	iklm->byte_count = rte_cpu_to_be_32(klm_align);
265 	/* Clear the padding memory. */
266 	memset((uint8_t *)&mkey_job->imkey_array[klm_size], 0,
267 	       sizeof(struct mlx5_klm) * (klm_align - klm_size));
268 
269 	/* Add the following RegEx WQE with fence. */
270 	wqe = (struct mlx5_wqe_ctrl_seg *)
271 				(((uint8_t *)wqe) + MLX5_REGEX_UMR_WQE_SIZE);
272 	wqe->fm_ce_se |= MLX5_WQE_CTRL_INITIATOR_SMALL_FENCE;
273 }
274 
275 static inline void
276 prep_nop_regex_wqe_set(struct mlx5_regex_priv *priv,
277 		struct mlx5_regex_hw_qp *qp, struct rte_regex_ops *op,
278 		struct mlx5_regex_job *job, size_t pi, struct mlx5_klm *klm)
279 {
280 	size_t wqe_offset = (pi & (qp_size_get(qp) - 1)) *
281 			    (MLX5_SEND_WQE_BB << 2);
282 	struct mlx5_wqe_ctrl_seg *wqe = (struct mlx5_wqe_ctrl_seg *)((uint8_t *)
283 				   (uintptr_t)qp->qp_obj.wqes + wqe_offset);
284 
285 	/* Clear the WQE memory used as UMR WQE previously. */
286 	if ((rte_be_to_cpu_32(wqe->opmod_idx_opcode) & 0xff) != MLX5_OPCODE_NOP)
287 		memset(wqe, 0, MLX5_REGEX_UMR_WQE_SIZE);
288 	/* UMR WQE size is 9 DS, align nop WQE to 3 WQEBBS(12 DS). */
289 	set_wqe_ctrl_seg(wqe, pi * 4, MLX5_OPCODE_NOP, 0, qp->qp_obj.qp->id,
290 			 0, 12, 0, 0);
291 	__prep_one(priv, qp, op, job, pi, klm);
292 }
293 
294 static inline void
295 prep_regex_umr_wqe_set(struct mlx5_regex_priv *priv, struct mlx5_regex_qp *qp,
296 		struct mlx5_regex_hw_qp *qp_obj, struct rte_regex_ops **op,
297 		size_t nb_ops)
298 {
299 	struct mlx5_regex_job *job = NULL;
300 	size_t hw_qpid = qp_obj->qpn, mkey_job_id = 0;
301 	size_t left_ops = nb_ops;
302 	uint32_t klm_num = 0;
303 	uint32_t len = 0;
304 	struct mlx5_klm *mkey_klm = NULL;
305 	struct mlx5_klm klm;
306 	uintptr_t addr;
307 
308 	while (left_ops--)
309 		rte_prefetch0(op[left_ops]);
310 	left_ops = nb_ops;
311 	/*
312 	 * Build the WQE set by reverse. In case the burst may consume
313 	 * multiple mkeys, build the WQE set as normal will hard to
314 	 * address the last mkey index, since we will only know the last
315 	 * RegEx WQE's index when finishes building.
316 	 */
317 	while (left_ops--) {
318 		struct rte_mbuf *mbuf = op[left_ops]->mbuf;
319 		size_t pi = MLX5_REGEX_UMR_QP_PI_IDX(qp_obj->pi, left_ops);
320 
321 		if (mbuf->nb_segs > 1) {
322 			size_t scatter_size = 0;
323 
324 			if (!mkey_klm_available(mkey_klm, klm_num,
325 						mbuf->nb_segs)) {
326 				/*
327 				 * The mkey's KLM is full, create the UMR
328 				 * WQE in the next WQE set.
329 				 */
330 				if (mkey_klm)
331 					complete_umr_wqe(qp, qp_obj,
332 						&qp->jobs[mkey_job_id],
333 						MLX5_REGEX_UMR_QP_PI_IDX(pi, 1),
334 						klm_num, len);
335 				/*
336 				 * Get the indircet mkey and KLM array index
337 				 * from the last WQE set.
338 				 */
339 				mkey_job_id = job_id_get(hw_qpid,
340 						qp_size_get(qp_obj), pi);
341 				mkey_klm = qp->jobs[mkey_job_id].imkey_array;
342 				klm_num = 0;
343 				len = 0;
344 			}
345 			/* Build RegEx WQE's data segment KLM. */
346 			klm.address = len;
347 			klm.mkey = rte_cpu_to_be_32
348 					(qp->jobs[mkey_job_id].imkey->id);
349 			while (mbuf) {
350 				addr = rte_pktmbuf_mtod(mbuf, uintptr_t);
351 				/* Build indirect mkey seg's KLM. */
352 				mkey_klm->mkey = mlx5_regex_mb2mr(priv,
353 								  &qp->mr_ctrl,
354 								  mbuf);
355 				mkey_klm->address = rte_cpu_to_be_64(addr);
356 				mkey_klm->byte_count = rte_cpu_to_be_32
357 						(rte_pktmbuf_data_len(mbuf));
358 				/*
359 				 * Save the mbuf's total size for RegEx data
360 				 * segment.
361 				 */
362 				scatter_size += rte_pktmbuf_data_len(mbuf);
363 				mkey_klm++;
364 				klm_num++;
365 				mbuf = mbuf->next;
366 			}
367 			len += scatter_size;
368 			klm.byte_count = scatter_size;
369 		} else {
370 			/* The single mubf case. Build the KLM directly. */
371 			klm.mkey = mlx5_regex_mb2mr(priv, &qp->mr_ctrl, mbuf);
372 			klm.address = rte_pktmbuf_mtod(mbuf, uintptr_t);
373 			klm.byte_count = rte_pktmbuf_data_len(mbuf);
374 		}
375 		job = &qp->jobs[job_id_get(hw_qpid, qp_size_get(qp_obj), pi)];
376 		/*
377 		 * Build the nop + RegEx WQE set by default. The fist nop WQE
378 		 * will be updated later as UMR WQE if scattered mubf exist.
379 		 */
380 		prep_nop_regex_wqe_set(priv, qp_obj, op[left_ops], job, pi,
381 					&klm);
382 	}
383 	/*
384 	 * Scattered mbuf have been added to the KLM array. Complete the build
385 	 * of UMR WQE, update the first nop WQE as UMR WQE.
386 	 */
387 	if (mkey_klm)
388 		complete_umr_wqe(qp, qp_obj, &qp->jobs[mkey_job_id], qp_obj->pi,
389 				 klm_num, len);
390 	qp_obj->db_pi = MLX5_REGEX_UMR_QP_PI_IDX(qp_obj->pi, nb_ops - 1);
391 	qp_obj->pi = MLX5_REGEX_UMR_QP_PI_IDX(qp_obj->pi, nb_ops);
392 }
393 
394 uint16_t
395 mlx5_regexdev_enqueue_gga(struct rte_regexdev *dev, uint16_t qp_id,
396 			  struct rte_regex_ops **ops, uint16_t nb_ops)
397 {
398 	struct mlx5_regex_priv *priv = dev->data->dev_private;
399 	struct mlx5_regex_qp *queue = &priv->qps[qp_id];
400 	struct mlx5_regex_hw_qp *qp_obj;
401 	size_t hw_qpid, nb_left = nb_ops, nb_desc;
402 
403 	while ((hw_qpid = ffs(queue->free_qps))) {
404 		hw_qpid--; /* ffs returns 1 for bit 0 */
405 		qp_obj = &queue->qps[hw_qpid];
406 		nb_desc = get_free(qp_obj, priv->has_umr);
407 		if (nb_desc) {
408 			/* The ops be handled can't exceed nb_ops. */
409 			if (nb_desc > nb_left)
410 				nb_desc = nb_left;
411 			else
412 				queue->free_qps &= ~(1 << hw_qpid);
413 			prep_regex_umr_wqe_set(priv, queue, qp_obj, ops,
414 				nb_desc);
415 			send_doorbell(priv, qp_obj);
416 			nb_left -= nb_desc;
417 		}
418 		if (!nb_left)
419 			break;
420 		ops += nb_desc;
421 	}
422 	nb_ops -= nb_left;
423 	queue->pi += nb_ops;
424 	return nb_ops;
425 }
426 #endif
427 
428 uint16_t
429 mlx5_regexdev_enqueue(struct rte_regexdev *dev, uint16_t qp_id,
430 		      struct rte_regex_ops **ops, uint16_t nb_ops)
431 {
432 	struct mlx5_regex_priv *priv = dev->data->dev_private;
433 	struct mlx5_regex_qp *queue = &priv->qps[qp_id];
434 	struct mlx5_regex_hw_qp *qp_obj;
435 	size_t hw_qpid, job_id, i = 0;
436 
437 	while ((hw_qpid = ffs(queue->free_qps))) {
438 		hw_qpid--; /* ffs returns 1 for bit 0 */
439 		qp_obj = &queue->qps[hw_qpid];
440 		while (get_free(qp_obj, priv->has_umr)) {
441 			job_id = job_id_get(hw_qpid, qp_size_get(qp_obj),
442 				qp_obj->pi);
443 			prep_one(priv, queue, qp_obj, ops[i],
444 				&queue->jobs[job_id]);
445 			i++;
446 			if (unlikely(i == nb_ops)) {
447 				send_doorbell(priv, qp_obj);
448 				goto out;
449 			}
450 		}
451 		queue->free_qps &= ~(1 << hw_qpid);
452 		send_doorbell(priv, qp_obj);
453 	}
454 
455 out:
456 	queue->pi += i;
457 	return i;
458 }
459 
460 #define MLX5_REGEX_RESP_SZ 8
461 
462 static inline void
463 extract_result(struct rte_regex_ops *op, struct mlx5_regex_job *job)
464 {
465 	size_t j;
466 	size_t offset;
467 	uint16_t status;
468 
469 	op->user_id = job->user_id;
470 	op->nb_matches = MLX5_GET_VOLATILE(regexp_metadata, job->metadata +
471 					   MLX5_REGEX_METADATA_OFF,
472 					   match_count);
473 	op->nb_actual_matches = MLX5_GET_VOLATILE(regexp_metadata,
474 						  job->metadata +
475 						  MLX5_REGEX_METADATA_OFF,
476 						  detected_match_count);
477 	for (j = 0; j < op->nb_matches; j++) {
478 		offset = MLX5_REGEX_RESP_SZ * j;
479 		op->matches[j].rule_id =
480 			MLX5_GET_VOLATILE(regexp_match_tuple,
481 					  (job->output + offset), rule_id);
482 		op->matches[j].start_offset =
483 			MLX5_GET_VOLATILE(regexp_match_tuple,
484 					  (job->output +  offset), start_ptr);
485 		op->matches[j].len =
486 			MLX5_GET_VOLATILE(regexp_match_tuple,
487 					  (job->output +  offset), length);
488 	}
489 	status = MLX5_GET_VOLATILE(regexp_metadata, job->metadata +
490 				   MLX5_REGEX_METADATA_OFF,
491 				   status);
492 	op->rsp_flags = 0;
493 	if (status & MLX5_RXP_RESP_STATUS_PMI_SOJ)
494 		op->rsp_flags |= RTE_REGEX_OPS_RSP_PMI_SOJ_F;
495 	if (status & MLX5_RXP_RESP_STATUS_PMI_EOJ)
496 		op->rsp_flags |= RTE_REGEX_OPS_RSP_PMI_EOJ_F;
497 	if (status & MLX5_RXP_RESP_STATUS_MAX_LATENCY)
498 		op->rsp_flags |= RTE_REGEX_OPS_RSP_MAX_SCAN_TIMEOUT_F;
499 	if (status & MLX5_RXP_RESP_STATUS_MAX_MATCH)
500 		op->rsp_flags |= RTE_REGEX_OPS_RSP_MAX_MATCH_F;
501 	if (status & MLX5_RXP_RESP_STATUS_MAX_PREFIX)
502 		op->rsp_flags |= RTE_REGEX_OPS_RSP_MAX_PREFIX_F;
503 	if (status & MLX5_RXP_RESP_STATUS_MAX_PRI_THREADS)
504 		op->rsp_flags |= RTE_REGEX_OPS_RSP_RESOURCE_LIMIT_REACHED_F;
505 	if (status & MLX5_RXP_RESP_STATUS_MAX_SEC_THREADS)
506 		op->rsp_flags |= RTE_REGEX_OPS_RSP_RESOURCE_LIMIT_REACHED_F;
507 }
508 
509 static inline volatile struct mlx5_cqe *
510 poll_one(struct mlx5_regex_cq *cq)
511 {
512 	volatile struct mlx5_cqe *cqe;
513 	size_t next_cqe_offset;
514 
515 	next_cqe_offset =  (cq->ci & (cq_size_get(cq) - 1));
516 	cqe = (volatile struct mlx5_cqe *)(cq->cq_obj.cqes + next_cqe_offset);
517 	rte_io_wmb();
518 
519 	int ret = check_cqe(cqe, cq_size_get(cq), cq->ci);
520 
521 	if (unlikely(ret == MLX5_CQE_STATUS_ERR)) {
522 		DRV_LOG(ERR, "Completion with error on qp 0x%x",  0);
523 		return NULL;
524 	}
525 
526 	if (unlikely(ret != MLX5_CQE_STATUS_SW_OWN))
527 		return NULL;
528 
529 	return cqe;
530 }
531 
532 
533 /**
534  * DPDK callback for dequeue.
535  *
536  * @param dev
537  *   Pointer to the regex dev structure.
538  * @param qp_id
539  *   The queue to enqueue the traffic to.
540  * @param ops
541  *   List of regex ops to dequeue.
542  * @param nb_ops
543  *   Number of ops in ops parameter.
544  *
545  * @return
546  *   Number of packets successfully dequeued (<= pkts_n).
547  */
548 uint16_t
549 mlx5_regexdev_dequeue(struct rte_regexdev *dev, uint16_t qp_id,
550 		      struct rte_regex_ops **ops, uint16_t nb_ops)
551 {
552 	struct mlx5_regex_priv *priv = dev->data->dev_private;
553 	struct mlx5_regex_qp *queue = &priv->qps[qp_id];
554 	struct mlx5_regex_cq *cq = &queue->cq;
555 	volatile struct mlx5_cqe *cqe;
556 	size_t i = 0;
557 
558 	while ((cqe = poll_one(cq))) {
559 		uint16_t wq_counter
560 			= (rte_be_to_cpu_16(cqe->wqe_counter) + 1) &
561 			  MLX5_REGEX_MAX_WQE_INDEX;
562 		size_t hw_qpid = cqe->rsvd3[2];
563 		struct mlx5_regex_hw_qp *qp_obj = &queue->qps[hw_qpid];
564 
565 		/* UMR mode WQE counter move as WQE set(4 WQEBBS).*/
566 		if (priv->has_umr)
567 			wq_counter >>= 2;
568 		while (qp_obj->ci != wq_counter) {
569 			if (unlikely(i == nb_ops)) {
570 				/* Return without updating cq->ci */
571 				goto out;
572 			}
573 			uint32_t job_id = job_id_get(hw_qpid,
574 					qp_size_get(qp_obj), qp_obj->ci);
575 			extract_result(ops[i], &queue->jobs[job_id]);
576 			qp_obj->ci = (qp_obj->ci + 1) & (priv->has_umr ?
577 				 (MLX5_REGEX_MAX_WQE_INDEX >> 2) :
578 				  MLX5_REGEX_MAX_WQE_INDEX);
579 			i++;
580 		}
581 		cq->ci = (cq->ci + 1) & 0xffffff;
582 		rte_wmb();
583 		cq->cq_obj.db_rec[0] = rte_cpu_to_be_32(cq->ci);
584 		queue->free_qps |= (1 << hw_qpid);
585 	}
586 
587 out:
588 	queue->ci += i;
589 	return i;
590 }
591 
592 static void
593 setup_qps(struct mlx5_regex_priv *priv, struct mlx5_regex_qp *queue)
594 {
595 	size_t hw_qpid, entry;
596 	uint32_t job_id;
597 	for (hw_qpid = 0; hw_qpid < queue->nb_obj; hw_qpid++) {
598 		struct mlx5_regex_hw_qp *qp_obj = &queue->qps[hw_qpid];
599 		uint8_t *wqe = (uint8_t *)(uintptr_t)qp_obj->qp_obj.wqes;
600 		for (entry = 0 ; entry < qp_size_get(qp_obj); entry++) {
601 			job_id = hw_qpid * qp_size_get(qp_obj) + entry;
602 			struct mlx5_regex_job *job = &queue->jobs[job_id];
603 
604 			/* Fill UMR WQE with NOP in advanced. */
605 			if (priv->has_umr) {
606 				set_wqe_ctrl_seg
607 					((struct mlx5_wqe_ctrl_seg *)wqe,
608 					 entry * 2, MLX5_OPCODE_NOP, 0,
609 					 qp_obj->qp_obj.qp->id, 0, 12, 0, 0);
610 				wqe += MLX5_REGEX_UMR_WQE_SIZE;
611 			}
612 			set_metadata_seg((struct mlx5_wqe_metadata_seg *)
613 					 (wqe + MLX5_REGEX_WQE_METADATA_OFFSET),
614 					 0, queue->metadata->lkey,
615 					 (uintptr_t)job->metadata);
616 			set_data_seg((struct mlx5_wqe_data_seg *)
617 				     (wqe + MLX5_REGEX_WQE_SCATTER_OFFSET),
618 				     MLX5_REGEX_MAX_OUTPUT,
619 				     queue->outputs->lkey,
620 				     (uintptr_t)job->output);
621 			wqe += 64;
622 		}
623 		queue->free_qps |= 1 << hw_qpid;
624 	}
625 }
626 
627 static int
628 setup_buffers(struct mlx5_regex_priv *priv, struct mlx5_regex_qp *qp)
629 {
630 	struct ibv_pd *pd = priv->cdev->pd;
631 	uint32_t i;
632 	int err;
633 
634 	void *ptr = rte_calloc(__func__, qp->nb_desc,
635 			       MLX5_REGEX_METADATA_SIZE,
636 			       MLX5_REGEX_METADATA_SIZE);
637 	if (!ptr)
638 		return -ENOMEM;
639 
640 	qp->metadata = mlx5_glue->reg_mr(pd, ptr,
641 					 MLX5_REGEX_METADATA_SIZE * qp->nb_desc,
642 					 IBV_ACCESS_LOCAL_WRITE);
643 	if (!qp->metadata) {
644 		DRV_LOG(ERR, "Failed to register metadata");
645 		rte_free(ptr);
646 		return -EINVAL;
647 	}
648 
649 	ptr = rte_calloc(__func__, qp->nb_desc,
650 			 MLX5_REGEX_MAX_OUTPUT,
651 			 MLX5_REGEX_MAX_OUTPUT);
652 	if (!ptr) {
653 		err = -ENOMEM;
654 		goto err_output;
655 	}
656 	qp->outputs = mlx5_glue->reg_mr(pd, ptr,
657 					MLX5_REGEX_MAX_OUTPUT * qp->nb_desc,
658 					IBV_ACCESS_LOCAL_WRITE);
659 	if (!qp->outputs) {
660 		rte_free(ptr);
661 		DRV_LOG(ERR, "Failed to register output");
662 		err = -EINVAL;
663 		goto err_output;
664 	}
665 
666 	if (priv->has_umr) {
667 		ptr = rte_calloc(__func__, qp->nb_desc, MLX5_REGEX_KLMS_SIZE,
668 				 MLX5_REGEX_KLMS_SIZE);
669 		if (!ptr) {
670 			err = -ENOMEM;
671 			goto err_imkey;
672 		}
673 		qp->imkey_addr = mlx5_glue->reg_mr(pd, ptr,
674 					MLX5_REGEX_KLMS_SIZE * qp->nb_desc,
675 					IBV_ACCESS_LOCAL_WRITE);
676 		if (!qp->imkey_addr) {
677 			rte_free(ptr);
678 			DRV_LOG(ERR, "Failed to register output");
679 			err = -EINVAL;
680 			goto err_imkey;
681 		}
682 	}
683 
684 	/* distribute buffers to jobs */
685 	for (i = 0; i < qp->nb_desc; i++) {
686 		qp->jobs[i].output =
687 			(uint8_t *)qp->outputs->addr +
688 			(i % qp->nb_desc) * MLX5_REGEX_MAX_OUTPUT;
689 		qp->jobs[i].metadata =
690 			(uint8_t *)qp->metadata->addr +
691 			(i % qp->nb_desc) * MLX5_REGEX_METADATA_SIZE;
692 		if (qp->imkey_addr)
693 			qp->jobs[i].imkey_array = (struct mlx5_klm *)
694 				qp->imkey_addr->addr +
695 				(i % qp->nb_desc) * MLX5_REGEX_MAX_KLM_NUM;
696 	}
697 
698 	return 0;
699 
700 err_imkey:
701 	ptr = qp->outputs->addr;
702 	rte_free(ptr);
703 	mlx5_glue->dereg_mr(qp->outputs);
704 err_output:
705 	ptr = qp->metadata->addr;
706 	rte_free(ptr);
707 	mlx5_glue->dereg_mr(qp->metadata);
708 	return err;
709 }
710 
711 int
712 mlx5_regexdev_setup_fastpath(struct mlx5_regex_priv *priv, uint32_t qp_id)
713 {
714 	struct mlx5_regex_qp *qp = &priv->qps[qp_id];
715 	struct mlx5_klm klm = { 0 };
716 	struct mlx5_devx_mkey_attr attr = {
717 		.klm_array = &klm,
718 		.klm_num = 1,
719 		.umr_en = 1,
720 	};
721 	uint32_t i;
722 	int err = 0;
723 
724 	qp->jobs = rte_calloc(__func__, qp->nb_desc, sizeof(*qp->jobs), 64);
725 	if (!qp->jobs)
726 		return -ENOMEM;
727 	err = setup_buffers(priv, qp);
728 	if (err) {
729 		rte_free(qp->jobs);
730 		return err;
731 	}
732 
733 	setup_qps(priv, qp);
734 
735 	if (priv->has_umr) {
736 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
737 		attr.pd = priv->cdev->pdn;
738 #endif
739 		for (i = 0; i < qp->nb_desc; i++) {
740 			attr.klm_num = MLX5_REGEX_MAX_KLM_NUM;
741 			attr.klm_array = qp->jobs[i].imkey_array;
742 			qp->jobs[i].imkey = mlx5_devx_cmd_mkey_create
743 						       (priv->cdev->ctx, &attr);
744 			if (!qp->jobs[i].imkey) {
745 				err = -rte_errno;
746 				DRV_LOG(ERR, "Failed to allocate imkey.");
747 				mlx5_regexdev_teardown_fastpath(priv, qp_id);
748 			}
749 		}
750 	}
751 	return err;
752 }
753 
754 static void
755 free_buffers(struct mlx5_regex_qp *qp)
756 {
757 	if (qp->imkey_addr) {
758 		mlx5_glue->dereg_mr(qp->imkey_addr);
759 		rte_free(qp->imkey_addr->addr);
760 	}
761 	if (qp->metadata) {
762 		mlx5_glue->dereg_mr(qp->metadata);
763 		rte_free(qp->metadata->addr);
764 	}
765 	if (qp->outputs) {
766 		mlx5_glue->dereg_mr(qp->outputs);
767 		rte_free(qp->outputs->addr);
768 	}
769 }
770 
771 void
772 mlx5_regexdev_teardown_fastpath(struct mlx5_regex_priv *priv, uint32_t qp_id)
773 {
774 	struct mlx5_regex_qp *qp = &priv->qps[qp_id];
775 	uint32_t i;
776 
777 	if (qp) {
778 		for (i = 0; i < qp->nb_desc; i++) {
779 			if (qp->jobs[i].imkey)
780 				claim_zero(mlx5_devx_cmd_destroy
781 							(qp->jobs[i].imkey));
782 		}
783 		free_buffers(qp);
784 		if (qp->jobs)
785 			rte_free(qp->jobs);
786 	}
787 }
788