xref: /dpdk/drivers/net/mlx5/mlx5_rxtx_vec.c (revision 2bf48044dca1892e571fd4964eecaacf6cb0c1c2)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2017 6WIND S.A.
3  * Copyright 2017 Mellanox Technologies, Ltd
4  */
5 
6 #include <stdint.h>
7 #include <string.h>
8 #include <stdlib.h>
9 
10 #include <rte_mbuf.h>
11 #include <rte_mempool.h>
12 #include <rte_prefetch.h>
13 #include <rte_vect.h>
14 
15 #include <mlx5_glue.h>
16 #include <mlx5_prm.h>
17 
18 #include "mlx5_defs.h"
19 #include "mlx5.h"
20 #include "mlx5_utils.h"
21 #include "mlx5_rxtx.h"
22 #include "mlx5_rx.h"
23 #include "mlx5_rxtx_vec.h"
24 #include "mlx5_autoconf.h"
25 
26 #if defined RTE_ARCH_X86_64
27 #include "mlx5_rxtx_vec_sse.h"
28 #elif defined RTE_ARCH_ARM64
29 #include "mlx5_rxtx_vec_neon.h"
30 #elif defined RTE_ARCH_PPC_64
31 #include "mlx5_rxtx_vec_altivec.h"
32 #else
33 #error "This should not be compiled if SIMD instructions are not supported."
34 #endif
35 
36 /**
37  * Skip error packets.
38  *
39  * @param rxq
40  *   Pointer to RX queue structure.
41  * @param[out] pkts
42  *   Array to store received packets.
43  * @param pkts_n
44  *   Maximum number of packets in array.
45  *
46  * @return
47  *   Number of packets successfully received (<= pkts_n).
48  */
49 static uint16_t
50 rxq_handle_pending_error(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts,
51 			 uint16_t pkts_n)
52 {
53 	uint16_t n = 0;
54 	uint16_t skip_cnt;
55 	unsigned int i;
56 #ifdef MLX5_PMD_SOFT_COUNTERS
57 	uint32_t err_bytes = 0;
58 #endif
59 
60 	for (i = 0; i < pkts_n; ++i) {
61 		struct rte_mbuf *pkt = pkts[i];
62 
63 		if (pkt->packet_type == RTE_PTYPE_ALL_MASK || rxq->err_state) {
64 #ifdef MLX5_PMD_SOFT_COUNTERS
65 			err_bytes += PKT_LEN(pkt);
66 #endif
67 			rte_pktmbuf_free_seg(pkt);
68 		} else {
69 			pkts[n++] = pkt;
70 		}
71 	}
72 	rxq->stats.idropped += (pkts_n - n);
73 #ifdef MLX5_PMD_SOFT_COUNTERS
74 	/* Correct counters of errored completions. */
75 	rxq->stats.ipackets -= (pkts_n - n);
76 	rxq->stats.ibytes -= err_bytes;
77 #endif
78 	mlx5_rx_err_handle(rxq, 1, pkts_n, &skip_cnt);
79 	return n;
80 }
81 
82 /**
83  * Replenish buffers for RX in bulk.
84  *
85  * @param rxq
86  *   Pointer to RX queue structure.
87  */
88 static inline void
89 mlx5_rx_replenish_bulk_mbuf(struct mlx5_rxq_data *rxq)
90 {
91 	const uint16_t q_n = 1 << rxq->elts_n;
92 	const uint16_t q_mask = q_n - 1;
93 	uint16_t n = q_n - (rxq->rq_ci - rxq->rq_pi);
94 	uint16_t elts_idx = rxq->rq_ci & q_mask;
95 	struct rte_mbuf **elts = &(*rxq->elts)[elts_idx];
96 	volatile struct mlx5_wqe_data_seg *wq =
97 		&((volatile struct mlx5_wqe_data_seg *)rxq->wqes)[elts_idx];
98 	unsigned int i;
99 
100 	if (n >= rxq->rq_repl_thresh) {
101 		MLX5_ASSERT(n >= MLX5_VPMD_RXQ_RPLNSH_THRESH(q_n));
102 		MLX5_ASSERT(MLX5_VPMD_RXQ_RPLNSH_THRESH(q_n) >
103 			    MLX5_VPMD_DESCS_PER_LOOP);
104 		/* Not to cross queue end. */
105 		n = RTE_MIN(n - MLX5_VPMD_DESCS_PER_LOOP, q_n - elts_idx);
106 		if (rte_mempool_get_bulk(rxq->mp, (void *)elts, n) < 0) {
107 			rxq->stats.rx_nombuf += n;
108 			return;
109 		}
110 		if (unlikely(mlx5_mr_btree_len(&rxq->mr_ctrl.cache_bh) > 1)) {
111 			for (i = 0; i < n; ++i) {
112 				/*
113 				 * In order to support the mbufs with external attached
114 				 * data buffer we should use the buf_addr pointer
115 				 * instead of rte_mbuf_buf_addr(). It touches the mbuf
116 				 * itself and may impact the performance.
117 				 */
118 				void *buf_addr = elts[i]->buf_addr;
119 
120 				wq[i].addr = rte_cpu_to_be_64((uintptr_t)buf_addr +
121 							      RTE_PKTMBUF_HEADROOM);
122 				wq[i].lkey = mlx5_rx_mb2mr(rxq, elts[i]);
123 			}
124 		} else {
125 			for (i = 0; i < n; ++i) {
126 				void *buf_addr = elts[i]->buf_addr;
127 
128 				wq[i].addr = rte_cpu_to_be_64((uintptr_t)buf_addr +
129 							      RTE_PKTMBUF_HEADROOM);
130 			}
131 		}
132 		rxq->rq_ci += n;
133 		/* Prevent overflowing into consumed mbufs. */
134 		elts_idx = rxq->rq_ci & q_mask;
135 		for (i = 0; i < MLX5_VPMD_DESCS_PER_LOOP; ++i)
136 			(*rxq->elts)[elts_idx + i] = &rxq->fake_mbuf;
137 		rte_io_wmb();
138 		*rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci);
139 	}
140 }
141 
142 /**
143  * Replenish buffers for MPRQ RX in bulk.
144  *
145  * @param rxq
146  *   Pointer to RX queue structure.
147  */
148 static inline void
149 mlx5_rx_mprq_replenish_bulk_mbuf(struct mlx5_rxq_data *rxq)
150 {
151 	const uint16_t wqe_n = 1 << rxq->elts_n;
152 	const uint32_t strd_n = RTE_BIT32(rxq->log_strd_num);
153 	const uint32_t elts_n = wqe_n * strd_n;
154 	const uint32_t wqe_mask = elts_n - 1;
155 	uint32_t n = elts_n - (rxq->elts_ci - rxq->rq_pi);
156 	uint32_t elts_idx = rxq->elts_ci & wqe_mask;
157 	struct rte_mbuf **elts = &(*rxq->elts)[elts_idx];
158 	unsigned int i;
159 
160 	if (n >= rxq->rq_repl_thresh &&
161 	    rxq->elts_ci - rxq->rq_pi <=
162 	    rxq->rq_repl_thresh + MLX5_VPMD_RX_MAX_BURST) {
163 		MLX5_ASSERT(n >= MLX5_VPMD_RXQ_RPLNSH_THRESH(elts_n));
164 		MLX5_ASSERT(MLX5_VPMD_RXQ_RPLNSH_THRESH(elts_n) >
165 			     MLX5_VPMD_DESCS_PER_LOOP);
166 		/* Not to cross queue end. */
167 		n = RTE_MIN(n - MLX5_VPMD_DESCS_PER_LOOP, elts_n - elts_idx);
168 		/* Limit replenish number to threshold value. */
169 		n = RTE_MIN(n, rxq->rq_repl_thresh);
170 		if (rte_mempool_get_bulk(rxq->mp, (void *)elts, n) < 0) {
171 			rxq->stats.rx_nombuf += n;
172 			return;
173 		}
174 		rxq->elts_ci += n;
175 		/* Prevent overflowing into consumed mbufs. */
176 		elts_idx = rxq->elts_ci & wqe_mask;
177 		for (i = 0; i < MLX5_VPMD_DESCS_PER_LOOP; ++i)
178 			(*rxq->elts)[elts_idx + i] = &rxq->fake_mbuf;
179 	}
180 }
181 
182 /**
183  * Copy or attach MPRQ buffers to RX SW ring.
184  *
185  * @param rxq
186  *   Pointer to RX queue structure.
187  * @param pkts
188  *   Pointer to array of packets to be stored.
189  * @param pkts_n
190  *   Number of packets to be stored.
191  *
192  * @return
193  *   Number of packets successfully copied/attached (<= pkts_n).
194  */
195 static inline uint16_t
196 rxq_copy_mprq_mbuf_v(struct mlx5_rxq_data *rxq,
197 		     struct rte_mbuf **pkts, uint16_t pkts_n)
198 {
199 	const uint16_t wqe_n = 1 << rxq->elts_n;
200 	const uint16_t wqe_mask = wqe_n - 1;
201 	const uint16_t strd_sz = RTE_BIT32(rxq->log_strd_sz);
202 	const uint32_t strd_n = RTE_BIT32(rxq->log_strd_num);
203 	const uint32_t elts_n = wqe_n * strd_n;
204 	const uint32_t elts_mask = elts_n - 1;
205 	uint32_t elts_idx = rxq->rq_pi & elts_mask;
206 	struct rte_mbuf **elts = &(*rxq->elts)[elts_idx];
207 	uint32_t rq_ci = rxq->rq_ci;
208 	struct mlx5_mprq_buf *buf = (*rxq->mprq_bufs)[rq_ci & wqe_mask];
209 	uint16_t copied = 0;
210 	uint16_t i = 0;
211 
212 	for (i = 0; i < pkts_n; ++i) {
213 		uint16_t strd_cnt;
214 		enum mlx5_rqx_code rxq_code;
215 
216 		if (rxq->consumed_strd == strd_n) {
217 			/* Replace WQE if the buffer is still in use. */
218 			mprq_buf_replace(rxq, rq_ci & wqe_mask);
219 			/* Advance to the next WQE. */
220 			rxq->consumed_strd = 0;
221 			rq_ci++;
222 			buf = (*rxq->mprq_bufs)[rq_ci & wqe_mask];
223 		}
224 
225 		if (!elts[i]->pkt_len) {
226 			rxq->consumed_strd = strd_n;
227 			rte_pktmbuf_free_seg(elts[i]);
228 #ifdef MLX5_PMD_SOFT_COUNTERS
229 			rxq->stats.ipackets -= 1;
230 #endif
231 			continue;
232 		}
233 		strd_cnt = (elts[i]->pkt_len / strd_sz) +
234 			   ((elts[i]->pkt_len % strd_sz) ? 1 : 0);
235 		rxq_code = mprq_buf_to_pkt(rxq, elts[i], elts[i]->pkt_len,
236 					   buf, rxq->consumed_strd, strd_cnt);
237 		rxq->consumed_strd += strd_cnt;
238 		if (unlikely(rxq_code != MLX5_RXQ_CODE_EXIT)) {
239 			rte_pktmbuf_free_seg(elts[i]);
240 #ifdef MLX5_PMD_SOFT_COUNTERS
241 			rxq->stats.ipackets -= 1;
242 			rxq->stats.ibytes -= elts[i]->pkt_len;
243 #endif
244 			if (rxq_code == MLX5_RXQ_CODE_NOMBUF) {
245 				++rxq->stats.rx_nombuf;
246 				break;
247 			}
248 			if (rxq_code == MLX5_RXQ_CODE_DROPPED) {
249 				++rxq->stats.idropped;
250 				continue;
251 			}
252 		}
253 		pkts[copied++] = elts[i];
254 	}
255 	rxq->rq_pi += i;
256 	rxq->cq_ci += i;
257 	if (rq_ci != rxq->rq_ci) {
258 		rxq->rq_ci = rq_ci;
259 		rte_io_wmb();
260 		*rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci);
261 	}
262 	return copied;
263 }
264 
265 /**
266  * Receive burst of packets. An errored completion also consumes a mbuf, but the
267  * packet_type is set to be RTE_PTYPE_ALL_MASK. Marked mbufs should be freed
268  * before returning to application.
269  *
270  * @param rxq
271  *   Pointer to RX queue structure.
272  * @param[out] pkts
273  *   Array to store received packets.
274  * @param pkts_n
275  *   Maximum number of packets in array.
276  * @param[out] err
277  *   Pointer to a flag. Set non-zero value if pkts array has at least one error
278  *   packet to handle.
279  * @param[out] no_cq
280  *   Pointer to a boolean. Set true if no new CQE seen.
281  *
282  * @return
283  *   Number of packets received including errors (<= pkts_n).
284  */
285 static inline uint16_t
286 rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts,
287 	    uint16_t pkts_n, uint64_t *err, bool *no_cq)
288 {
289 	const uint16_t q_n = 1 << rxq->cqe_n;
290 	const uint16_t q_mask = q_n - 1;
291 	const uint16_t e_n = 1 << rxq->elts_n;
292 	const uint16_t e_mask = e_n - 1;
293 	volatile struct mlx5_cqe *cq, *next;
294 	struct rte_mbuf **elts;
295 	uint64_t comp_idx = MLX5_VPMD_DESCS_PER_LOOP;
296 	uint16_t nocmp_n = 0;
297 	uint16_t rcvd_pkt = 0;
298 	unsigned int cq_idx = rxq->cq_ci & q_mask;
299 	unsigned int elts_idx;
300 	int ret;
301 
302 	MLX5_ASSERT(rxq->sges_n == 0);
303 	MLX5_ASSERT(rxq->cqe_n == rxq->elts_n);
304 	cq = &(*rxq->cqes)[cq_idx];
305 	rte_prefetch0(cq);
306 	rte_prefetch0(cq + 1);
307 	rte_prefetch0(cq + 2);
308 	rte_prefetch0(cq + 3);
309 	pkts_n = RTE_MIN(pkts_n, MLX5_VPMD_RX_MAX_BURST);
310 	mlx5_rx_replenish_bulk_mbuf(rxq);
311 	/* See if there're unreturned mbufs from compressed CQE. */
312 	rcvd_pkt = rxq->decompressed;
313 	if (rcvd_pkt > 0) {
314 		rcvd_pkt = RTE_MIN(rcvd_pkt, pkts_n);
315 		rxq_copy_mbuf_v(&(*rxq->elts)[rxq->rq_pi & e_mask],
316 				pkts, rcvd_pkt);
317 		rxq->rq_pi += rcvd_pkt;
318 		rxq->decompressed -= rcvd_pkt;
319 		pkts += rcvd_pkt;
320 	}
321 	elts_idx = rxq->rq_pi & e_mask;
322 	elts = &(*rxq->elts)[elts_idx];
323 	/* Not to overflow pkts array. */
324 	pkts_n = RTE_ALIGN_FLOOR(pkts_n - rcvd_pkt, MLX5_VPMD_DESCS_PER_LOOP);
325 	/* Not to cross queue end. */
326 	pkts_n = RTE_MIN(pkts_n, q_n - elts_idx);
327 	pkts_n = RTE_MIN(pkts_n, q_n - cq_idx);
328 	if (!pkts_n) {
329 		*no_cq = !rcvd_pkt;
330 		return rcvd_pkt;
331 	}
332 	/* At this point, there shouldn't be any remaining packets. */
333 	MLX5_ASSERT(rxq->decompressed == 0);
334 	/* Process all the CQEs */
335 	nocmp_n = rxq_cq_process_v(rxq, cq, elts, pkts, pkts_n, err, &comp_idx);
336 	/* If no new CQE seen, return without updating cq_db. */
337 	if (unlikely(!nocmp_n && comp_idx == MLX5_VPMD_DESCS_PER_LOOP)) {
338 		*no_cq = true;
339 		return rcvd_pkt;
340 	}
341 	/* Update the consumer indexes for non-compressed CQEs. */
342 	MLX5_ASSERT(nocmp_n <= pkts_n);
343 	rxq->cq_ci += nocmp_n;
344 	rxq->rq_pi += nocmp_n;
345 	rcvd_pkt += nocmp_n;
346 	/* Copy title packet for future compressed sessions. */
347 	if (rxq->cqe_comp_layout) {
348 		next = &(*rxq->cqes)[rxq->cq_ci & q_mask];
349 		ret = check_cqe_iteration(next,	rxq->cqe_n, rxq->cq_ci);
350 		if (ret != MLX5_CQE_STATUS_SW_OWN ||
351 		    MLX5_CQE_FORMAT(next->op_own) == MLX5_COMPRESSED)
352 			rte_memcpy(&rxq->title_pkt, elts[nocmp_n - 1],
353 				   sizeof(struct rte_mbuf));
354 	}
355 	/* Decompress the last CQE if compressed. */
356 	if (comp_idx < MLX5_VPMD_DESCS_PER_LOOP) {
357 		MLX5_ASSERT(comp_idx == (nocmp_n % MLX5_VPMD_DESCS_PER_LOOP));
358 		rxq->decompressed = rxq_cq_decompress_v(rxq, &cq[nocmp_n],
359 							&elts[nocmp_n]);
360 		rxq->cq_ci += rxq->decompressed;
361 		/* Return more packets if needed. */
362 		if (nocmp_n < pkts_n) {
363 			uint16_t n = rxq->decompressed;
364 
365 			n = RTE_MIN(n, pkts_n - nocmp_n);
366 			rxq_copy_mbuf_v(&(*rxq->elts)[rxq->rq_pi & e_mask],
367 					&pkts[nocmp_n], n);
368 			rxq->rq_pi += n;
369 			rcvd_pkt += n;
370 			rxq->decompressed -= n;
371 		}
372 	}
373 	*no_cq = !rcvd_pkt;
374 	return rcvd_pkt;
375 }
376 
377 /**
378  * DPDK callback for vectorized RX.
379  *
380  * @param dpdk_rxq
381  *   Generic pointer to RX queue structure.
382  * @param[out] pkts
383  *   Array to store received packets.
384  * @param pkts_n
385  *   Maximum number of packets in array.
386  *
387  * @return
388  *   Number of packets successfully received (<= pkts_n).
389  */
390 uint16_t
391 mlx5_rx_burst_vec(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
392 {
393 	struct mlx5_rxq_data *rxq = dpdk_rxq;
394 	uint16_t nb_rx = 0;
395 	uint16_t tn = 0;
396 	uint64_t err = 0;
397 	bool no_cq = false;
398 
399 	do {
400 		err = 0;
401 		nb_rx = rxq_burst_v(rxq, pkts + tn, pkts_n - tn,
402 				    &err, &no_cq);
403 		if (unlikely(err | rxq->err_state))
404 			nb_rx = rxq_handle_pending_error(rxq, pkts + tn, nb_rx);
405 		tn += nb_rx;
406 		if (unlikely(no_cq))
407 			break;
408 		rte_io_wmb();
409 		*rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci);
410 	} while (tn != pkts_n);
411 	return tn;
412 }
413 
414 /**
415  * Receive burst of packets. An errored completion also consumes a mbuf, but the
416  * packet_type is set to be RTE_PTYPE_ALL_MASK. Marked mbufs should be freed
417  * before returning to application.
418  *
419  * @param rxq
420  *   Pointer to RX queue structure.
421  * @param[out] pkts
422  *   Array to store received packets.
423  * @param pkts_n
424  *   Maximum number of packets in array.
425  * @param[out] err
426  *   Pointer to a flag. Set non-zero value if pkts array has at least one error
427  *   packet to handle.
428  * @param[out] no_cq
429  *   Pointer to a boolean. Set true if no new CQE seen.
430  *
431  * @return
432  *   Number of packets received including errors (<= pkts_n).
433  */
434 static inline uint16_t
435 rxq_burst_mprq_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts,
436 		 uint16_t pkts_n, uint64_t *err, bool *no_cq)
437 {
438 	const uint16_t q_n = 1 << rxq->cqe_n;
439 	const uint16_t q_mask = q_n - 1;
440 	const uint16_t wqe_n = 1 << rxq->elts_n;
441 	const uint32_t strd_n = RTE_BIT32(rxq->log_strd_num);
442 	const uint32_t elts_n = wqe_n * strd_n;
443 	const uint32_t elts_mask = elts_n - 1;
444 	volatile struct mlx5_cqe *cq, *next;
445 	struct rte_mbuf **elts;
446 	uint64_t comp_idx = MLX5_VPMD_DESCS_PER_LOOP;
447 	uint16_t nocmp_n = 0;
448 	uint16_t rcvd_pkt = 0;
449 	uint16_t cp_pkt = 0;
450 	unsigned int cq_idx = rxq->cq_ci & q_mask;
451 	unsigned int elts_idx;
452 	int ret;
453 
454 	MLX5_ASSERT(rxq->sges_n == 0);
455 	cq = &(*rxq->cqes)[cq_idx];
456 	rte_prefetch0(cq);
457 	rte_prefetch0(cq + 1);
458 	rte_prefetch0(cq + 2);
459 	rte_prefetch0(cq + 3);
460 	pkts_n = RTE_MIN(pkts_n, MLX5_VPMD_RX_MAX_BURST);
461 	mlx5_rx_mprq_replenish_bulk_mbuf(rxq);
462 	/* Not to move past the allocated mbufs. */
463 	pkts_n = RTE_MIN(pkts_n, rxq->elts_ci - rxq->rq_pi);
464 	/* See if there're unreturned mbufs from compressed CQE. */
465 	rcvd_pkt = rxq->decompressed;
466 	if (rcvd_pkt > 0) {
467 		rcvd_pkt = RTE_MIN(rcvd_pkt, pkts_n);
468 		cp_pkt = rxq_copy_mprq_mbuf_v(rxq, pkts, rcvd_pkt);
469 		rxq->decompressed -= rcvd_pkt;
470 		pkts += cp_pkt;
471 	}
472 	elts_idx = rxq->rq_pi & elts_mask;
473 	elts = &(*rxq->elts)[elts_idx];
474 	/* Not to overflow pkts array. */
475 	pkts_n = RTE_ALIGN_FLOOR(pkts_n - cp_pkt, MLX5_VPMD_DESCS_PER_LOOP);
476 	/* Not to cross queue end. */
477 	pkts_n = RTE_MIN(pkts_n, elts_n - elts_idx);
478 	pkts_n = RTE_MIN(pkts_n, q_n - cq_idx);
479 	if (!pkts_n) {
480 		*no_cq = !cp_pkt;
481 		return cp_pkt;
482 	}
483 	/* At this point, there shouldn't be any remaining packets. */
484 	MLX5_ASSERT(rxq->decompressed == 0);
485 	/* Process all the CQEs */
486 	nocmp_n = rxq_cq_process_v(rxq, cq, elts, pkts, pkts_n, err, &comp_idx);
487 	/* If no new CQE seen, return without updating cq_db. */
488 	if (unlikely(!nocmp_n && comp_idx == MLX5_VPMD_DESCS_PER_LOOP)) {
489 		*no_cq = true;
490 		return cp_pkt;
491 	}
492 	/* Update the consumer indexes for non-compressed CQEs. */
493 	MLX5_ASSERT(nocmp_n <= pkts_n);
494 	cp_pkt = rxq_copy_mprq_mbuf_v(rxq, pkts, nocmp_n);
495 	rcvd_pkt += cp_pkt;
496 	/* Copy title packet for future compressed sessions. */
497 	if (rxq->cqe_comp_layout) {
498 		next = &(*rxq->cqes)[rxq->cq_ci & q_mask];
499 		ret = check_cqe_iteration(next,	rxq->cqe_n, rxq->cq_ci);
500 		if (ret != MLX5_CQE_STATUS_SW_OWN ||
501 		    MLX5_CQE_FORMAT(next->op_own) == MLX5_COMPRESSED)
502 			rte_memcpy(&rxq->title_pkt, elts[nocmp_n - 1],
503 				   sizeof(struct rte_mbuf));
504 	}
505 	/* Decompress the last CQE if compressed. */
506 	if (comp_idx < MLX5_VPMD_DESCS_PER_LOOP) {
507 		MLX5_ASSERT(comp_idx == (nocmp_n % MLX5_VPMD_DESCS_PER_LOOP));
508 		rxq->decompressed = rxq_cq_decompress_v(rxq, &cq[nocmp_n],
509 							&elts[nocmp_n]);
510 		/* Return more packets if needed. */
511 		if (nocmp_n < pkts_n) {
512 			uint16_t n = rxq->decompressed;
513 
514 			n = RTE_MIN(n, pkts_n - nocmp_n);
515 			cp_pkt = rxq_copy_mprq_mbuf_v(rxq, &pkts[cp_pkt], n);
516 			rcvd_pkt += cp_pkt;
517 			rxq->decompressed -= n;
518 		}
519 	}
520 	*no_cq = !rcvd_pkt;
521 	return rcvd_pkt;
522 }
523 
524 /**
525  * DPDK callback for vectorized MPRQ RX.
526  *
527  * @param dpdk_rxq
528  *   Generic pointer to RX queue structure.
529  * @param[out] pkts
530  *   Array to store received packets.
531  * @param pkts_n
532  *   Maximum number of packets in array.
533  *
534  * @return
535  *   Number of packets successfully received (<= pkts_n).
536  */
537 uint16_t
538 mlx5_rx_burst_mprq_vec(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
539 {
540 	struct mlx5_rxq_data *rxq = dpdk_rxq;
541 	uint16_t nb_rx = 0;
542 	uint16_t tn = 0;
543 	uint64_t err = 0;
544 	bool no_cq = false;
545 
546 	do {
547 		err = 0;
548 		nb_rx = rxq_burst_mprq_v(rxq, pkts + tn, pkts_n - tn,
549 					 &err, &no_cq);
550 		if (unlikely(err | rxq->err_state))
551 			nb_rx = rxq_handle_pending_error(rxq, pkts + tn, nb_rx);
552 		tn += nb_rx;
553 		if (unlikely(no_cq))
554 			break;
555 		rte_io_wmb();
556 		*rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci);
557 	} while (tn != pkts_n);
558 	return tn;
559 }
560 
561 /**
562  * Check a RX queue can support vectorized RX.
563  *
564  * @param rxq
565  *   Pointer to RX queue.
566  *
567  * @return
568  *   1 if supported, negative errno value if not.
569  */
570 int __rte_cold
571 mlx5_rxq_check_vec_support(struct mlx5_rxq_data *rxq)
572 {
573 	struct mlx5_rxq_ctrl *ctrl =
574 		container_of(rxq, struct mlx5_rxq_ctrl, rxq);
575 
576 	if (!RXQ_PORT(ctrl)->config.rx_vec_en || rxq->sges_n != 0)
577 		return -ENOTSUP;
578 	if (rxq->lro)
579 		return -ENOTSUP;
580 	return 1;
581 }
582 
583 /**
584  * Check a device can support vectorized RX.
585  *
586  * @param dev
587  *   Pointer to Ethernet device.
588  *
589  * @return
590  *   1 if supported, negative errno value if not.
591  */
592 int __rte_cold
593 mlx5_check_vec_rx_support(struct rte_eth_dev *dev)
594 {
595 	struct mlx5_priv *priv = dev->data->dev_private;
596 	uint32_t i;
597 
598 	if (rte_vect_get_max_simd_bitwidth() < RTE_VECT_SIMD_128)
599 		return -ENOTSUP;
600 	if (!priv->config.rx_vec_en)
601 		return -ENOTSUP;
602 	/* All the configured queues should support. */
603 	for (i = 0; i < priv->rxqs_n; ++i) {
604 		struct mlx5_rxq_data *rxq_data = mlx5_rxq_data_get(dev, i);
605 
606 		if (!rxq_data)
607 			continue;
608 		if (mlx5_rxq_check_vec_support(rxq_data) < 0)
609 			break;
610 	}
611 	if (i != priv->rxqs_n)
612 		return -ENOTSUP;
613 	return 1;
614 }
615