xref: /dpdk/drivers/net/mlx5/mlx5_rxtx_vec.c (revision 0dff3f26d6faad4e51f75e5245f0387ee9bb0c6d)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2017 6WIND S.A.
3  * Copyright 2017 Mellanox Technologies, Ltd
4  */
5 
6 #include <stdint.h>
7 #include <string.h>
8 #include <stdlib.h>
9 
10 #include <rte_mbuf.h>
11 #include <rte_mempool.h>
12 #include <rte_prefetch.h>
13 #include <rte_vect.h>
14 
15 #include <mlx5_glue.h>
16 #include <mlx5_prm.h>
17 
18 #include "mlx5_defs.h"
19 #include "mlx5.h"
20 #include "mlx5_utils.h"
21 #include "mlx5_rxtx.h"
22 #include "mlx5_rx.h"
23 #include "mlx5_rxtx_vec.h"
24 #include "mlx5_autoconf.h"
25 
26 #if defined RTE_ARCH_X86_64
27 #include "mlx5_rxtx_vec_sse.h"
28 #elif defined RTE_ARCH_ARM64
29 #include "mlx5_rxtx_vec_neon.h"
30 #elif defined RTE_ARCH_PPC_64
31 #include "mlx5_rxtx_vec_altivec.h"
32 #else
33 #error "This should not be compiled if SIMD instructions are not supported."
34 #endif
35 
36 /**
37  * Skip error packets.
38  *
39  * @param rxq
40  *   Pointer to RX queue structure.
41  * @param[out] pkts
42  *   Array to store received packets.
43  * @param pkts_n
44  *   Maximum number of packets in array.
45  *
46  * @return
47  *   Number of packets successfully received (<= pkts_n).
48  */
49 static uint16_t
50 rxq_handle_pending_error(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts,
51 			 uint16_t pkts_n)
52 {
53 	uint16_t n = 0;
54 	unsigned int i;
55 #ifdef MLX5_PMD_SOFT_COUNTERS
56 	uint32_t err_bytes = 0;
57 #endif
58 
59 	for (i = 0; i < pkts_n; ++i) {
60 		struct rte_mbuf *pkt = pkts[i];
61 
62 		if (pkt->packet_type == RTE_PTYPE_ALL_MASK || rxq->err_state) {
63 #ifdef MLX5_PMD_SOFT_COUNTERS
64 			err_bytes += PKT_LEN(pkt);
65 #endif
66 			rte_pktmbuf_free_seg(pkt);
67 		} else {
68 			pkts[n++] = pkt;
69 		}
70 	}
71 	rxq->stats.idropped += (pkts_n - n);
72 #ifdef MLX5_PMD_SOFT_COUNTERS
73 	/* Correct counters of errored completions. */
74 	rxq->stats.ipackets -= (pkts_n - n);
75 	rxq->stats.ibytes -= err_bytes;
76 #endif
77 	mlx5_rx_err_handle(rxq, 1);
78 	return n;
79 }
80 
81 /**
82  * Replenish buffers for RX in bulk.
83  *
84  * @param rxq
85  *   Pointer to RX queue structure.
86  */
87 static inline void
88 mlx5_rx_replenish_bulk_mbuf(struct mlx5_rxq_data *rxq)
89 {
90 	const uint16_t q_n = 1 << rxq->elts_n;
91 	const uint16_t q_mask = q_n - 1;
92 	uint16_t n = q_n - (rxq->rq_ci - rxq->rq_pi);
93 	uint16_t elts_idx = rxq->rq_ci & q_mask;
94 	struct rte_mbuf **elts = &(*rxq->elts)[elts_idx];
95 	volatile struct mlx5_wqe_data_seg *wq =
96 		&((volatile struct mlx5_wqe_data_seg *)rxq->wqes)[elts_idx];
97 	unsigned int i;
98 
99 	if (n >= rxq->rq_repl_thresh) {
100 		MLX5_ASSERT(n >= MLX5_VPMD_RXQ_RPLNSH_THRESH(q_n));
101 		MLX5_ASSERT(MLX5_VPMD_RXQ_RPLNSH_THRESH(q_n) >
102 			    MLX5_VPMD_DESCS_PER_LOOP);
103 		/* Not to cross queue end. */
104 		n = RTE_MIN(n - MLX5_VPMD_DESCS_PER_LOOP, q_n - elts_idx);
105 		if (rte_mempool_get_bulk(rxq->mp, (void *)elts, n) < 0) {
106 			rxq->stats.rx_nombuf += n;
107 			return;
108 		}
109 		if (unlikely(mlx5_mr_btree_len(&rxq->mr_ctrl.cache_bh) > 1)) {
110 			for (i = 0; i < n; ++i) {
111 				/*
112 				 * In order to support the mbufs with external attached
113 				 * data buffer we should use the buf_addr pointer
114 				 * instead of rte_mbuf_buf_addr(). It touches the mbuf
115 				 * itself and may impact the performance.
116 				 */
117 				void *buf_addr = elts[i]->buf_addr;
118 
119 				wq[i].addr = rte_cpu_to_be_64((uintptr_t)buf_addr +
120 							      RTE_PKTMBUF_HEADROOM);
121 				wq[i].lkey = mlx5_rx_mb2mr(rxq, elts[i]);
122 			}
123 		} else {
124 			for (i = 0; i < n; ++i) {
125 				void *buf_addr = elts[i]->buf_addr;
126 
127 				wq[i].addr = rte_cpu_to_be_64((uintptr_t)buf_addr +
128 							      RTE_PKTMBUF_HEADROOM);
129 			}
130 		}
131 		rxq->rq_ci += n;
132 		/* Prevent overflowing into consumed mbufs. */
133 		elts_idx = rxq->rq_ci & q_mask;
134 		for (i = 0; i < MLX5_VPMD_DESCS_PER_LOOP; ++i)
135 			(*rxq->elts)[elts_idx + i] = &rxq->fake_mbuf;
136 		rte_io_wmb();
137 		*rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci);
138 	}
139 }
140 
141 /**
142  * Replenish buffers for MPRQ RX in bulk.
143  *
144  * @param rxq
145  *   Pointer to RX queue structure.
146  */
147 static inline void
148 mlx5_rx_mprq_replenish_bulk_mbuf(struct mlx5_rxq_data *rxq)
149 {
150 	const uint16_t wqe_n = 1 << rxq->elts_n;
151 	const uint32_t strd_n = RTE_BIT32(rxq->log_strd_num);
152 	const uint32_t elts_n = wqe_n * strd_n;
153 	const uint32_t wqe_mask = elts_n - 1;
154 	uint32_t n = elts_n - (rxq->elts_ci - rxq->rq_pi);
155 	uint32_t elts_idx = rxq->elts_ci & wqe_mask;
156 	struct rte_mbuf **elts = &(*rxq->elts)[elts_idx];
157 	unsigned int i;
158 
159 	if (n >= rxq->rq_repl_thresh &&
160 	    rxq->elts_ci - rxq->rq_pi <=
161 	    rxq->rq_repl_thresh + MLX5_VPMD_RX_MAX_BURST) {
162 		MLX5_ASSERT(n >= MLX5_VPMD_RXQ_RPLNSH_THRESH(elts_n));
163 		MLX5_ASSERT(MLX5_VPMD_RXQ_RPLNSH_THRESH(elts_n) >
164 			     MLX5_VPMD_DESCS_PER_LOOP);
165 		/* Not to cross queue end. */
166 		n = RTE_MIN(n - MLX5_VPMD_DESCS_PER_LOOP, elts_n - elts_idx);
167 		/* Limit replenish number to threshold value. */
168 		n = RTE_MIN(n, rxq->rq_repl_thresh);
169 		if (rte_mempool_get_bulk(rxq->mp, (void *)elts, n) < 0) {
170 			rxq->stats.rx_nombuf += n;
171 			return;
172 		}
173 		rxq->elts_ci += n;
174 		/* Prevent overflowing into consumed mbufs. */
175 		elts_idx = rxq->elts_ci & wqe_mask;
176 		for (i = 0; i < MLX5_VPMD_DESCS_PER_LOOP; ++i)
177 			(*rxq->elts)[elts_idx + i] = &rxq->fake_mbuf;
178 	}
179 }
180 
181 /**
182  * Copy or attach MPRQ buffers to RX SW ring.
183  *
184  * @param rxq
185  *   Pointer to RX queue structure.
186  * @param pkts
187  *   Pointer to array of packets to be stored.
188  * @param pkts_n
189  *   Number of packets to be stored.
190  *
191  * @return
192  *   Number of packets successfully copied/attached (<= pkts_n).
193  */
194 static inline uint16_t
195 rxq_copy_mprq_mbuf_v(struct mlx5_rxq_data *rxq,
196 		     struct rte_mbuf **pkts, uint16_t pkts_n)
197 {
198 	const uint16_t wqe_n = 1 << rxq->elts_n;
199 	const uint16_t wqe_mask = wqe_n - 1;
200 	const uint16_t strd_sz = RTE_BIT32(rxq->log_strd_sz);
201 	const uint32_t strd_n = RTE_BIT32(rxq->log_strd_num);
202 	const uint32_t elts_n = wqe_n * strd_n;
203 	const uint32_t elts_mask = elts_n - 1;
204 	uint32_t elts_idx = rxq->rq_pi & elts_mask;
205 	struct rte_mbuf **elts = &(*rxq->elts)[elts_idx];
206 	uint32_t rq_ci = rxq->rq_ci;
207 	struct mlx5_mprq_buf *buf = (*rxq->mprq_bufs)[rq_ci & wqe_mask];
208 	uint16_t copied = 0;
209 	uint16_t i = 0;
210 
211 	for (i = 0; i < pkts_n; ++i) {
212 		uint16_t strd_cnt;
213 		enum mlx5_rqx_code rxq_code;
214 
215 		if (rxq->consumed_strd == strd_n) {
216 			/* Replace WQE if the buffer is still in use. */
217 			mprq_buf_replace(rxq, rq_ci & wqe_mask);
218 			/* Advance to the next WQE. */
219 			rxq->consumed_strd = 0;
220 			rq_ci++;
221 			buf = (*rxq->mprq_bufs)[rq_ci & wqe_mask];
222 		}
223 
224 		if (!elts[i]->pkt_len) {
225 			rxq->consumed_strd = strd_n;
226 			rte_pktmbuf_free_seg(elts[i]);
227 #ifdef MLX5_PMD_SOFT_COUNTERS
228 			rxq->stats.ipackets -= 1;
229 #endif
230 			continue;
231 		}
232 		strd_cnt = (elts[i]->pkt_len / strd_sz) +
233 			   ((elts[i]->pkt_len % strd_sz) ? 1 : 0);
234 		rxq_code = mprq_buf_to_pkt(rxq, elts[i], elts[i]->pkt_len,
235 					   buf, rxq->consumed_strd, strd_cnt);
236 		rxq->consumed_strd += strd_cnt;
237 		if (unlikely(rxq_code != MLX5_RXQ_CODE_EXIT)) {
238 			rte_pktmbuf_free_seg(elts[i]);
239 #ifdef MLX5_PMD_SOFT_COUNTERS
240 			rxq->stats.ipackets -= 1;
241 			rxq->stats.ibytes -= elts[i]->pkt_len;
242 #endif
243 			if (rxq_code == MLX5_RXQ_CODE_NOMBUF) {
244 				++rxq->stats.rx_nombuf;
245 				break;
246 			}
247 			if (rxq_code == MLX5_RXQ_CODE_DROPPED) {
248 				++rxq->stats.idropped;
249 				continue;
250 			}
251 		}
252 		pkts[copied++] = elts[i];
253 	}
254 	rxq->rq_pi += i;
255 	rxq->cq_ci += i;
256 	rte_io_wmb();
257 	*rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci);
258 	if (rq_ci != rxq->rq_ci) {
259 		rxq->rq_ci = rq_ci;
260 		rte_io_wmb();
261 		*rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci);
262 	}
263 	return copied;
264 }
265 
266 /**
267  * Receive burst of packets. An errored completion also consumes a mbuf, but the
268  * packet_type is set to be RTE_PTYPE_ALL_MASK. Marked mbufs should be freed
269  * before returning to application.
270  *
271  * @param rxq
272  *   Pointer to RX queue structure.
273  * @param[out] pkts
274  *   Array to store received packets.
275  * @param pkts_n
276  *   Maximum number of packets in array.
277  * @param[out] err
278  *   Pointer to a flag. Set non-zero value if pkts array has at least one error
279  *   packet to handle.
280  * @param[out] no_cq
281  *   Pointer to a boolean. Set true if no new CQE seen.
282  *
283  * @return
284  *   Number of packets received including errors (<= pkts_n).
285  */
286 static inline uint16_t
287 rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts,
288 	    uint16_t pkts_n, uint64_t *err, bool *no_cq)
289 {
290 	const uint16_t q_n = 1 << rxq->cqe_n;
291 	const uint16_t q_mask = q_n - 1;
292 	const uint16_t e_n = 1 << rxq->elts_n;
293 	const uint16_t e_mask = e_n - 1;
294 	volatile struct mlx5_cqe *cq;
295 	struct rte_mbuf **elts;
296 	uint64_t comp_idx = MLX5_VPMD_DESCS_PER_LOOP;
297 	uint16_t nocmp_n = 0;
298 	uint16_t rcvd_pkt = 0;
299 	unsigned int cq_idx = rxq->cq_ci & q_mask;
300 	unsigned int elts_idx;
301 
302 	MLX5_ASSERT(rxq->sges_n == 0);
303 	MLX5_ASSERT(rxq->cqe_n == rxq->elts_n);
304 	cq = &(*rxq->cqes)[cq_idx];
305 	rte_prefetch0(cq);
306 	rte_prefetch0(cq + 1);
307 	rte_prefetch0(cq + 2);
308 	rte_prefetch0(cq + 3);
309 	pkts_n = RTE_MIN(pkts_n, MLX5_VPMD_RX_MAX_BURST);
310 	mlx5_rx_replenish_bulk_mbuf(rxq);
311 	/* See if there're unreturned mbufs from compressed CQE. */
312 	rcvd_pkt = rxq->decompressed;
313 	if (rcvd_pkt > 0) {
314 		rcvd_pkt = RTE_MIN(rcvd_pkt, pkts_n);
315 		rxq_copy_mbuf_v(&(*rxq->elts)[rxq->rq_pi & e_mask],
316 				pkts, rcvd_pkt);
317 		rxq->rq_pi += rcvd_pkt;
318 		rxq->decompressed -= rcvd_pkt;
319 		pkts += rcvd_pkt;
320 	}
321 	elts_idx = rxq->rq_pi & e_mask;
322 	elts = &(*rxq->elts)[elts_idx];
323 	/* Not to overflow pkts array. */
324 	pkts_n = RTE_ALIGN_FLOOR(pkts_n - rcvd_pkt, MLX5_VPMD_DESCS_PER_LOOP);
325 	/* Not to cross queue end. */
326 	pkts_n = RTE_MIN(pkts_n, q_n - elts_idx);
327 	pkts_n = RTE_MIN(pkts_n, q_n - cq_idx);
328 	if (!pkts_n) {
329 		*no_cq = !rcvd_pkt;
330 		return rcvd_pkt;
331 	}
332 	/* At this point, there shouldn't be any remaining packets. */
333 	MLX5_ASSERT(rxq->decompressed == 0);
334 	/* Process all the CQEs */
335 	nocmp_n = rxq_cq_process_v(rxq, cq, elts, pkts, pkts_n, err, &comp_idx);
336 	/* If no new CQE seen, return without updating cq_db. */
337 	if (unlikely(!nocmp_n && comp_idx == MLX5_VPMD_DESCS_PER_LOOP)) {
338 		*no_cq = true;
339 		return rcvd_pkt;
340 	}
341 	/* Update the consumer indexes for non-compressed CQEs. */
342 	MLX5_ASSERT(nocmp_n <= pkts_n);
343 	rxq->cq_ci += nocmp_n;
344 	rxq->rq_pi += nocmp_n;
345 	rcvd_pkt += nocmp_n;
346 	/* Decompress the last CQE if compressed. */
347 	if (comp_idx < MLX5_VPMD_DESCS_PER_LOOP) {
348 		MLX5_ASSERT(comp_idx == (nocmp_n % MLX5_VPMD_DESCS_PER_LOOP));
349 		rxq->decompressed = rxq_cq_decompress_v(rxq, &cq[nocmp_n],
350 							&elts[nocmp_n]);
351 		rxq->cq_ci += rxq->decompressed;
352 		/* Return more packets if needed. */
353 		if (nocmp_n < pkts_n) {
354 			uint16_t n = rxq->decompressed;
355 
356 			n = RTE_MIN(n, pkts_n - nocmp_n);
357 			rxq_copy_mbuf_v(&(*rxq->elts)[rxq->rq_pi & e_mask],
358 					&pkts[nocmp_n], n);
359 			rxq->rq_pi += n;
360 			rcvd_pkt += n;
361 			rxq->decompressed -= n;
362 		}
363 	}
364 	rte_io_wmb();
365 	*rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci);
366 	*no_cq = !rcvd_pkt;
367 	return rcvd_pkt;
368 }
369 
370 /**
371  * DPDK callback for vectorized RX.
372  *
373  * @param dpdk_rxq
374  *   Generic pointer to RX queue structure.
375  * @param[out] pkts
376  *   Array to store received packets.
377  * @param pkts_n
378  *   Maximum number of packets in array.
379  *
380  * @return
381  *   Number of packets successfully received (<= pkts_n).
382  */
383 uint16_t
384 mlx5_rx_burst_vec(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
385 {
386 	struct mlx5_rxq_data *rxq = dpdk_rxq;
387 	uint16_t nb_rx = 0;
388 	uint16_t tn = 0;
389 	uint64_t err = 0;
390 	bool no_cq = false;
391 
392 	do {
393 		nb_rx = rxq_burst_v(rxq, pkts + tn, pkts_n - tn,
394 				    &err, &no_cq);
395 		if (unlikely(err | rxq->err_state))
396 			nb_rx = rxq_handle_pending_error(rxq, pkts + tn, nb_rx);
397 		tn += nb_rx;
398 		if (unlikely(no_cq))
399 			break;
400 	} while (tn != pkts_n);
401 	return tn;
402 }
403 
404 /**
405  * Receive burst of packets. An errored completion also consumes a mbuf, but the
406  * packet_type is set to be RTE_PTYPE_ALL_MASK. Marked mbufs should be freed
407  * before returning to application.
408  *
409  * @param rxq
410  *   Pointer to RX queue structure.
411  * @param[out] pkts
412  *   Array to store received packets.
413  * @param pkts_n
414  *   Maximum number of packets in array.
415  * @param[out] err
416  *   Pointer to a flag. Set non-zero value if pkts array has at least one error
417  *   packet to handle.
418  * @param[out] no_cq
419  *   Pointer to a boolean. Set true if no new CQE seen.
420  *
421  * @return
422  *   Number of packets received including errors (<= pkts_n).
423  */
424 static inline uint16_t
425 rxq_burst_mprq_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts,
426 		 uint16_t pkts_n, uint64_t *err, bool *no_cq)
427 {
428 	const uint16_t q_n = 1 << rxq->cqe_n;
429 	const uint16_t q_mask = q_n - 1;
430 	const uint16_t wqe_n = 1 << rxq->elts_n;
431 	const uint32_t strd_n = RTE_BIT32(rxq->log_strd_num);
432 	const uint32_t elts_n = wqe_n * strd_n;
433 	const uint32_t elts_mask = elts_n - 1;
434 	volatile struct mlx5_cqe *cq;
435 	struct rte_mbuf **elts;
436 	uint64_t comp_idx = MLX5_VPMD_DESCS_PER_LOOP;
437 	uint16_t nocmp_n = 0;
438 	uint16_t rcvd_pkt = 0;
439 	uint16_t cp_pkt = 0;
440 	unsigned int cq_idx = rxq->cq_ci & q_mask;
441 	unsigned int elts_idx;
442 
443 	MLX5_ASSERT(rxq->sges_n == 0);
444 	cq = &(*rxq->cqes)[cq_idx];
445 	rte_prefetch0(cq);
446 	rte_prefetch0(cq + 1);
447 	rte_prefetch0(cq + 2);
448 	rte_prefetch0(cq + 3);
449 	pkts_n = RTE_MIN(pkts_n, MLX5_VPMD_RX_MAX_BURST);
450 	mlx5_rx_mprq_replenish_bulk_mbuf(rxq);
451 	/* Not to move past the allocated mbufs. */
452 	pkts_n = RTE_MIN(pkts_n, rxq->elts_ci - rxq->rq_pi);
453 	/* See if there're unreturned mbufs from compressed CQE. */
454 	rcvd_pkt = rxq->decompressed;
455 	if (rcvd_pkt > 0) {
456 		rcvd_pkt = RTE_MIN(rcvd_pkt, pkts_n);
457 		cp_pkt = rxq_copy_mprq_mbuf_v(rxq, pkts, rcvd_pkt);
458 		rxq->decompressed -= rcvd_pkt;
459 		pkts += cp_pkt;
460 	}
461 	elts_idx = rxq->rq_pi & elts_mask;
462 	elts = &(*rxq->elts)[elts_idx];
463 	/* Not to overflow pkts array. */
464 	pkts_n = RTE_ALIGN_FLOOR(pkts_n - cp_pkt, MLX5_VPMD_DESCS_PER_LOOP);
465 	/* Not to cross queue end. */
466 	pkts_n = RTE_MIN(pkts_n, elts_n - elts_idx);
467 	pkts_n = RTE_MIN(pkts_n, q_n - cq_idx);
468 	if (!pkts_n) {
469 		*no_cq = !cp_pkt;
470 		return cp_pkt;
471 	}
472 	/* At this point, there shouldn't be any remaining packets. */
473 	MLX5_ASSERT(rxq->decompressed == 0);
474 	/* Process all the CQEs */
475 	nocmp_n = rxq_cq_process_v(rxq, cq, elts, pkts, pkts_n, err, &comp_idx);
476 	/* If no new CQE seen, return without updating cq_db. */
477 	if (unlikely(!nocmp_n && comp_idx == MLX5_VPMD_DESCS_PER_LOOP)) {
478 		*no_cq = true;
479 		return cp_pkt;
480 	}
481 	/* Update the consumer indexes for non-compressed CQEs. */
482 	MLX5_ASSERT(nocmp_n <= pkts_n);
483 	cp_pkt = rxq_copy_mprq_mbuf_v(rxq, pkts, nocmp_n);
484 	rcvd_pkt += cp_pkt;
485 	/* Decompress the last CQE if compressed. */
486 	if (comp_idx < MLX5_VPMD_DESCS_PER_LOOP) {
487 		MLX5_ASSERT(comp_idx == (nocmp_n % MLX5_VPMD_DESCS_PER_LOOP));
488 		rxq->decompressed = rxq_cq_decompress_v(rxq, &cq[nocmp_n],
489 							&elts[nocmp_n]);
490 		/* Return more packets if needed. */
491 		if (nocmp_n < pkts_n) {
492 			uint16_t n = rxq->decompressed;
493 
494 			n = RTE_MIN(n, pkts_n - nocmp_n);
495 			cp_pkt = rxq_copy_mprq_mbuf_v(rxq, &pkts[cp_pkt], n);
496 			rcvd_pkt += cp_pkt;
497 			rxq->decompressed -= n;
498 		}
499 	}
500 	*no_cq = !rcvd_pkt;
501 	return rcvd_pkt;
502 }
503 
504 /**
505  * DPDK callback for vectorized MPRQ RX.
506  *
507  * @param dpdk_rxq
508  *   Generic pointer to RX queue structure.
509  * @param[out] pkts
510  *   Array to store received packets.
511  * @param pkts_n
512  *   Maximum number of packets in array.
513  *
514  * @return
515  *   Number of packets successfully received (<= pkts_n).
516  */
517 uint16_t
518 mlx5_rx_burst_mprq_vec(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
519 {
520 	struct mlx5_rxq_data *rxq = dpdk_rxq;
521 	uint16_t nb_rx = 0;
522 	uint16_t tn = 0;
523 	uint64_t err = 0;
524 	bool no_cq = false;
525 
526 	do {
527 		nb_rx = rxq_burst_mprq_v(rxq, pkts + tn, pkts_n - tn,
528 					 &err, &no_cq);
529 		if (unlikely(err | rxq->err_state))
530 			nb_rx = rxq_handle_pending_error(rxq, pkts + tn, nb_rx);
531 		tn += nb_rx;
532 		if (unlikely(no_cq))
533 			break;
534 	} while (tn != pkts_n);
535 	return tn;
536 }
537 
538 /**
539  * Check a RX queue can support vectorized RX.
540  *
541  * @param rxq
542  *   Pointer to RX queue.
543  *
544  * @return
545  *   1 if supported, negative errno value if not.
546  */
547 int __rte_cold
548 mlx5_rxq_check_vec_support(struct mlx5_rxq_data *rxq)
549 {
550 	struct mlx5_rxq_ctrl *ctrl =
551 		container_of(rxq, struct mlx5_rxq_ctrl, rxq);
552 
553 	if (!RXQ_PORT(ctrl)->config.rx_vec_en || rxq->sges_n != 0)
554 		return -ENOTSUP;
555 	if (rxq->lro)
556 		return -ENOTSUP;
557 	return 1;
558 }
559 
560 /**
561  * Check a device can support vectorized RX.
562  *
563  * @param dev
564  *   Pointer to Ethernet device.
565  *
566  * @return
567  *   1 if supported, negative errno value if not.
568  */
569 int __rte_cold
570 mlx5_check_vec_rx_support(struct rte_eth_dev *dev)
571 {
572 	struct mlx5_priv *priv = dev->data->dev_private;
573 	uint32_t i;
574 
575 	if (rte_vect_get_max_simd_bitwidth() < RTE_VECT_SIMD_128)
576 		return -ENOTSUP;
577 	if (!priv->config.rx_vec_en)
578 		return -ENOTSUP;
579 	/* All the configured queues should support. */
580 	for (i = 0; i < priv->rxqs_n; ++i) {
581 		struct mlx5_rxq_data *rxq_data = mlx5_rxq_data_get(dev, i);
582 
583 		if (!rxq_data)
584 			continue;
585 		if (mlx5_rxq_check_vec_support(rxq_data) < 0)
586 			break;
587 	}
588 	if (i != priv->rxqs_n)
589 		return -ENOTSUP;
590 	return 1;
591 }
592