xref: /dpdk/drivers/net/mlx5/mlx5_rxtx_vec.c (revision 1db288f941ad4a0ddabd15f4ed65eee9d56b38e9)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2017 6WIND S.A.
3  * Copyright 2017 Mellanox Technologies, Ltd
4  */
5 
6 #include <stdint.h>
7 #include <string.h>
8 #include <stdlib.h>
9 
10 #include <rte_mbuf.h>
11 #include <rte_mempool.h>
12 #include <rte_prefetch.h>
13 #include <rte_vect.h>
14 
15 #include <mlx5_glue.h>
16 #include <mlx5_prm.h>
17 
18 #include "mlx5_defs.h"
19 #include "mlx5.h"
20 #include "mlx5_utils.h"
21 #include "mlx5_rxtx.h"
22 #include "mlx5_rx.h"
23 #include "mlx5_rxtx_vec.h"
24 #include "mlx5_autoconf.h"
25 
26 #if defined RTE_ARCH_X86_64
27 #include "mlx5_rxtx_vec_sse.h"
28 #elif defined RTE_ARCH_ARM64
29 #include "mlx5_rxtx_vec_neon.h"
30 #elif defined RTE_ARCH_PPC_64
31 #include "mlx5_rxtx_vec_altivec.h"
32 #else
33 #error "This should not be compiled if SIMD instructions are not supported."
34 #endif
35 
36 /**
37  * Skip error packets.
38  *
39  * @param rxq
40  *   Pointer to RX queue structure.
41  * @param[out] pkts
42  *   Array to store received packets.
43  * @param pkts_n
44  *   Maximum number of packets in array.
45  *
46  * @return
47  *   Number of packets successfully received (<= pkts_n).
48  */
49 static uint16_t
50 rxq_handle_pending_error(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts,
51 			 uint16_t pkts_n)
52 {
53 	uint16_t n = 0;
54 	unsigned int i;
55 #ifdef MLX5_PMD_SOFT_COUNTERS
56 	uint32_t err_bytes = 0;
57 #endif
58 
59 	for (i = 0; i < pkts_n; ++i) {
60 		struct rte_mbuf *pkt = pkts[i];
61 
62 		if (pkt->packet_type == RTE_PTYPE_ALL_MASK || rxq->err_state) {
63 #ifdef MLX5_PMD_SOFT_COUNTERS
64 			err_bytes += PKT_LEN(pkt);
65 #endif
66 			rte_pktmbuf_free_seg(pkt);
67 		} else {
68 			pkts[n++] = pkt;
69 		}
70 	}
71 	rxq->stats.idropped += (pkts_n - n);
72 #ifdef MLX5_PMD_SOFT_COUNTERS
73 	/* Correct counters of errored completions. */
74 	rxq->stats.ipackets -= (pkts_n - n);
75 	rxq->stats.ibytes -= err_bytes;
76 #endif
77 	mlx5_rx_err_handle(rxq, 1);
78 	return n;
79 }
80 
81 /**
82  * Replenish buffers for RX in bulk.
83  *
84  * @param rxq
85  *   Pointer to RX queue structure.
86  */
87 static inline void
88 mlx5_rx_replenish_bulk_mbuf(struct mlx5_rxq_data *rxq)
89 {
90 	const uint16_t q_n = 1 << rxq->elts_n;
91 	const uint16_t q_mask = q_n - 1;
92 	uint16_t n = q_n - (rxq->rq_ci - rxq->rq_pi);
93 	uint16_t elts_idx = rxq->rq_ci & q_mask;
94 	struct rte_mbuf **elts = &(*rxq->elts)[elts_idx];
95 	volatile struct mlx5_wqe_data_seg *wq =
96 		&((volatile struct mlx5_wqe_data_seg *)rxq->wqes)[elts_idx];
97 	unsigned int i;
98 
99 	if (n >= rxq->rq_repl_thresh) {
100 		MLX5_ASSERT(n >= MLX5_VPMD_RXQ_RPLNSH_THRESH(q_n));
101 		MLX5_ASSERT(MLX5_VPMD_RXQ_RPLNSH_THRESH(q_n) >
102 			    MLX5_VPMD_DESCS_PER_LOOP);
103 		/* Not to cross queue end. */
104 		n = RTE_MIN(n - MLX5_VPMD_DESCS_PER_LOOP, q_n - elts_idx);
105 		if (rte_mempool_get_bulk(rxq->mp, (void *)elts, n) < 0) {
106 			rxq->stats.rx_nombuf += n;
107 			return;
108 		}
109 		if (unlikely(mlx5_mr_btree_len(&rxq->mr_ctrl.cache_bh) > 1)) {
110 			for (i = 0; i < n; ++i) {
111 				/*
112 				 * In order to support the mbufs with external attached
113 				 * data buffer we should use the buf_addr pointer
114 				 * instead of rte_mbuf_buf_addr(). It touches the mbuf
115 				 * itself and may impact the performance.
116 				 */
117 				void *buf_addr = elts[i]->buf_addr;
118 
119 				wq[i].addr = rte_cpu_to_be_64((uintptr_t)buf_addr +
120 							      RTE_PKTMBUF_HEADROOM);
121 				wq[i].lkey = mlx5_rx_mb2mr(rxq, elts[i]);
122 			}
123 		} else {
124 			for (i = 0; i < n; ++i) {
125 				void *buf_addr = elts[i]->buf_addr;
126 
127 				wq[i].addr = rte_cpu_to_be_64((uintptr_t)buf_addr +
128 							      RTE_PKTMBUF_HEADROOM);
129 			}
130 		}
131 		rxq->rq_ci += n;
132 		/* Prevent overflowing into consumed mbufs. */
133 		elts_idx = rxq->rq_ci & q_mask;
134 		for (i = 0; i < MLX5_VPMD_DESCS_PER_LOOP; ++i)
135 			(*rxq->elts)[elts_idx + i] = &rxq->fake_mbuf;
136 		rte_io_wmb();
137 		*rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci);
138 	}
139 }
140 
141 /**
142  * Replenish buffers for MPRQ RX in bulk.
143  *
144  * @param rxq
145  *   Pointer to RX queue structure.
146  */
147 static inline void
148 mlx5_rx_mprq_replenish_bulk_mbuf(struct mlx5_rxq_data *rxq)
149 {
150 	const uint16_t wqe_n = 1 << rxq->elts_n;
151 	const uint32_t strd_n = 1 << rxq->strd_num_n;
152 	const uint32_t elts_n = wqe_n * strd_n;
153 	const uint32_t wqe_mask = elts_n - 1;
154 	uint32_t n = elts_n - (rxq->elts_ci - rxq->rq_pi);
155 	uint32_t elts_idx = rxq->elts_ci & wqe_mask;
156 	struct rte_mbuf **elts = &(*rxq->elts)[elts_idx];
157 	unsigned int i;
158 
159 	if (n >= rxq->rq_repl_thresh &&
160 	    rxq->elts_ci - rxq->rq_pi <= rxq->rq_repl_thresh) {
161 		MLX5_ASSERT(n >= MLX5_VPMD_RXQ_RPLNSH_THRESH(elts_n));
162 		MLX5_ASSERT(MLX5_VPMD_RXQ_RPLNSH_THRESH(elts_n) >
163 			     MLX5_VPMD_DESCS_PER_LOOP);
164 		/* Not to cross queue end. */
165 		n = RTE_MIN(n - MLX5_VPMD_DESCS_PER_LOOP, elts_n - elts_idx);
166 		/* Limit replenish number to threshold value. */
167 		n = RTE_MIN(n, rxq->rq_repl_thresh);
168 		if (rte_mempool_get_bulk(rxq->mp, (void *)elts, n) < 0) {
169 			rxq->stats.rx_nombuf += n;
170 			return;
171 		}
172 		rxq->elts_ci += n;
173 		/* Prevent overflowing into consumed mbufs. */
174 		elts_idx = rxq->elts_ci & wqe_mask;
175 		for (i = 0; i < MLX5_VPMD_DESCS_PER_LOOP; ++i)
176 			(*rxq->elts)[elts_idx + i] = &rxq->fake_mbuf;
177 	}
178 }
179 
180 /**
181  * Copy or attach MPRQ buffers to RX SW ring.
182  *
183  * @param rxq
184  *   Pointer to RX queue structure.
185  * @param pkts
186  *   Pointer to array of packets to be stored.
187  * @param pkts_n
188  *   Number of packets to be stored.
189  *
190  * @return
191  *   Number of packets successfully copied/attached (<= pkts_n).
192  */
193 static inline uint16_t
194 rxq_copy_mprq_mbuf_v(struct mlx5_rxq_data *rxq,
195 		     struct rte_mbuf **pkts, uint16_t pkts_n)
196 {
197 	const uint16_t wqe_n = 1 << rxq->elts_n;
198 	const uint16_t wqe_mask = wqe_n - 1;
199 	const uint16_t strd_sz = 1 << rxq->strd_sz_n;
200 	const uint32_t strd_n = 1 << rxq->strd_num_n;
201 	const uint32_t elts_n = wqe_n * strd_n;
202 	const uint32_t elts_mask = elts_n - 1;
203 	uint32_t elts_idx = rxq->rq_pi & elts_mask;
204 	struct rte_mbuf **elts = &(*rxq->elts)[elts_idx];
205 	uint32_t rq_ci = rxq->rq_ci;
206 	struct mlx5_mprq_buf *buf = (*rxq->mprq_bufs)[rq_ci & wqe_mask];
207 	uint16_t copied = 0;
208 	uint16_t i = 0;
209 
210 	for (i = 0; i < pkts_n; ++i) {
211 		uint16_t strd_cnt;
212 		enum mlx5_rqx_code rxq_code;
213 
214 		if (rxq->consumed_strd == strd_n) {
215 			/* Replace WQE if the buffer is still in use. */
216 			mprq_buf_replace(rxq, rq_ci & wqe_mask);
217 			/* Advance to the next WQE. */
218 			rxq->consumed_strd = 0;
219 			rq_ci++;
220 			buf = (*rxq->mprq_bufs)[rq_ci & wqe_mask];
221 		}
222 
223 		if (!elts[i]->pkt_len) {
224 			rxq->consumed_strd = strd_n;
225 			rte_pktmbuf_free_seg(elts[i]);
226 #ifdef MLX5_PMD_SOFT_COUNTERS
227 			rxq->stats.ipackets -= 1;
228 #endif
229 			continue;
230 		}
231 		strd_cnt = (elts[i]->pkt_len / strd_sz) +
232 			   ((elts[i]->pkt_len % strd_sz) ? 1 : 0);
233 		rxq_code = mprq_buf_to_pkt(rxq, elts[i], elts[i]->pkt_len,
234 					   buf, rxq->consumed_strd, strd_cnt);
235 		rxq->consumed_strd += strd_cnt;
236 		if (unlikely(rxq_code != MLX5_RXQ_CODE_EXIT)) {
237 			rte_pktmbuf_free_seg(elts[i]);
238 #ifdef MLX5_PMD_SOFT_COUNTERS
239 			rxq->stats.ipackets -= 1;
240 			rxq->stats.ibytes -= elts[i]->pkt_len;
241 #endif
242 			if (rxq_code == MLX5_RXQ_CODE_NOMBUF) {
243 				++rxq->stats.rx_nombuf;
244 				break;
245 			}
246 			if (rxq_code == MLX5_RXQ_CODE_DROPPED) {
247 				++rxq->stats.idropped;
248 				continue;
249 			}
250 		}
251 		pkts[copied++] = elts[i];
252 	}
253 	rxq->rq_pi += i;
254 	rxq->cq_ci += i;
255 	rte_io_wmb();
256 	*rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci);
257 	if (rq_ci != rxq->rq_ci) {
258 		rxq->rq_ci = rq_ci;
259 		rte_io_wmb();
260 		*rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci);
261 	}
262 	return copied;
263 }
264 
265 /**
266  * Receive burst of packets. An errored completion also consumes a mbuf, but the
267  * packet_type is set to be RTE_PTYPE_ALL_MASK. Marked mbufs should be freed
268  * before returning to application.
269  *
270  * @param rxq
271  *   Pointer to RX queue structure.
272  * @param[out] pkts
273  *   Array to store received packets.
274  * @param pkts_n
275  *   Maximum number of packets in array.
276  * @param[out] err
277  *   Pointer to a flag. Set non-zero value if pkts array has at least one error
278  *   packet to handle.
279  * @param[out] no_cq
280  *   Pointer to a boolean. Set true if no new CQE seen.
281  *
282  * @return
283  *   Number of packets received including errors (<= pkts_n).
284  */
285 static inline uint16_t
286 rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts,
287 	    uint16_t pkts_n, uint64_t *err, bool *no_cq)
288 {
289 	const uint16_t q_n = 1 << rxq->cqe_n;
290 	const uint16_t q_mask = q_n - 1;
291 	const uint16_t e_n = 1 << rxq->elts_n;
292 	const uint16_t e_mask = e_n - 1;
293 	volatile struct mlx5_cqe *cq;
294 	struct rte_mbuf **elts;
295 	uint64_t comp_idx = MLX5_VPMD_DESCS_PER_LOOP;
296 	uint16_t nocmp_n = 0;
297 	uint16_t rcvd_pkt = 0;
298 	unsigned int cq_idx = rxq->cq_ci & q_mask;
299 	unsigned int elts_idx;
300 
301 	MLX5_ASSERT(rxq->sges_n == 0);
302 	MLX5_ASSERT(rxq->cqe_n == rxq->elts_n);
303 	cq = &(*rxq->cqes)[cq_idx];
304 	rte_prefetch0(cq);
305 	rte_prefetch0(cq + 1);
306 	rte_prefetch0(cq + 2);
307 	rte_prefetch0(cq + 3);
308 	pkts_n = RTE_MIN(pkts_n, MLX5_VPMD_RX_MAX_BURST);
309 	mlx5_rx_replenish_bulk_mbuf(rxq);
310 	/* See if there're unreturned mbufs from compressed CQE. */
311 	rcvd_pkt = rxq->decompressed;
312 	if (rcvd_pkt > 0) {
313 		rcvd_pkt = RTE_MIN(rcvd_pkt, pkts_n);
314 		rxq_copy_mbuf_v(&(*rxq->elts)[rxq->rq_pi & e_mask],
315 				pkts, rcvd_pkt);
316 		rxq->rq_pi += rcvd_pkt;
317 		rxq->decompressed -= rcvd_pkt;
318 		pkts += rcvd_pkt;
319 	}
320 	elts_idx = rxq->rq_pi & e_mask;
321 	elts = &(*rxq->elts)[elts_idx];
322 	/* Not to overflow pkts array. */
323 	pkts_n = RTE_ALIGN_FLOOR(pkts_n - rcvd_pkt, MLX5_VPMD_DESCS_PER_LOOP);
324 	/* Not to cross queue end. */
325 	pkts_n = RTE_MIN(pkts_n, q_n - elts_idx);
326 	pkts_n = RTE_MIN(pkts_n, q_n - cq_idx);
327 	if (!pkts_n) {
328 		*no_cq = !rcvd_pkt;
329 		return rcvd_pkt;
330 	}
331 	/* At this point, there shouldn't be any remaining packets. */
332 	MLX5_ASSERT(rxq->decompressed == 0);
333 	/* Process all the CQEs */
334 	nocmp_n = rxq_cq_process_v(rxq, cq, elts, pkts, pkts_n, err, &comp_idx);
335 	/* If no new CQE seen, return without updating cq_db. */
336 	if (unlikely(!nocmp_n && comp_idx == MLX5_VPMD_DESCS_PER_LOOP)) {
337 		*no_cq = true;
338 		return rcvd_pkt;
339 	}
340 	/* Update the consumer indexes for non-compressed CQEs. */
341 	MLX5_ASSERT(nocmp_n <= pkts_n);
342 	rxq->cq_ci += nocmp_n;
343 	rxq->rq_pi += nocmp_n;
344 	rcvd_pkt += nocmp_n;
345 	/* Decompress the last CQE if compressed. */
346 	if (comp_idx < MLX5_VPMD_DESCS_PER_LOOP) {
347 		MLX5_ASSERT(comp_idx == (nocmp_n % MLX5_VPMD_DESCS_PER_LOOP));
348 		rxq->decompressed = rxq_cq_decompress_v(rxq, &cq[nocmp_n],
349 							&elts[nocmp_n]);
350 		rxq->cq_ci += rxq->decompressed;
351 		/* Return more packets if needed. */
352 		if (nocmp_n < pkts_n) {
353 			uint16_t n = rxq->decompressed;
354 
355 			n = RTE_MIN(n, pkts_n - nocmp_n);
356 			rxq_copy_mbuf_v(&(*rxq->elts)[rxq->rq_pi & e_mask],
357 					&pkts[nocmp_n], n);
358 			rxq->rq_pi += n;
359 			rcvd_pkt += n;
360 			rxq->decompressed -= n;
361 		}
362 	}
363 	rte_io_wmb();
364 	*rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci);
365 	*no_cq = !rcvd_pkt;
366 	return rcvd_pkt;
367 }
368 
369 /**
370  * DPDK callback for vectorized RX.
371  *
372  * @param dpdk_rxq
373  *   Generic pointer to RX queue structure.
374  * @param[out] pkts
375  *   Array to store received packets.
376  * @param pkts_n
377  *   Maximum number of packets in array.
378  *
379  * @return
380  *   Number of packets successfully received (<= pkts_n).
381  */
382 uint16_t
383 mlx5_rx_burst_vec(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
384 {
385 	struct mlx5_rxq_data *rxq = dpdk_rxq;
386 	uint16_t nb_rx = 0;
387 	uint16_t tn = 0;
388 	uint64_t err = 0;
389 	bool no_cq = false;
390 
391 	do {
392 		nb_rx = rxq_burst_v(rxq, pkts + tn, pkts_n - tn,
393 				    &err, &no_cq);
394 		if (unlikely(err | rxq->err_state))
395 			nb_rx = rxq_handle_pending_error(rxq, pkts + tn, nb_rx);
396 		tn += nb_rx;
397 		if (unlikely(no_cq))
398 			break;
399 	} while (tn != pkts_n);
400 	return tn;
401 }
402 
403 /**
404  * Receive burst of packets. An errored completion also consumes a mbuf, but the
405  * packet_type is set to be RTE_PTYPE_ALL_MASK. Marked mbufs should be freed
406  * before returning to application.
407  *
408  * @param rxq
409  *   Pointer to RX queue structure.
410  * @param[out] pkts
411  *   Array to store received packets.
412  * @param pkts_n
413  *   Maximum number of packets in array.
414  * @param[out] err
415  *   Pointer to a flag. Set non-zero value if pkts array has at least one error
416  *   packet to handle.
417  * @param[out] no_cq
418  *   Pointer to a boolean. Set true if no new CQE seen.
419  *
420  * @return
421  *   Number of packets received including errors (<= pkts_n).
422  */
423 static inline uint16_t
424 rxq_burst_mprq_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts,
425 		 uint16_t pkts_n, uint64_t *err, bool *no_cq)
426 {
427 	const uint16_t q_n = 1 << rxq->cqe_n;
428 	const uint16_t q_mask = q_n - 1;
429 	const uint16_t wqe_n = 1 << rxq->elts_n;
430 	const uint32_t strd_n = 1 << rxq->strd_num_n;
431 	const uint32_t elts_n = wqe_n * strd_n;
432 	const uint32_t elts_mask = elts_n - 1;
433 	volatile struct mlx5_cqe *cq;
434 	struct rte_mbuf **elts;
435 	uint64_t comp_idx = MLX5_VPMD_DESCS_PER_LOOP;
436 	uint16_t nocmp_n = 0;
437 	uint16_t rcvd_pkt = 0;
438 	uint16_t cp_pkt = 0;
439 	unsigned int cq_idx = rxq->cq_ci & q_mask;
440 	unsigned int elts_idx;
441 
442 	MLX5_ASSERT(rxq->sges_n == 0);
443 	cq = &(*rxq->cqes)[cq_idx];
444 	rte_prefetch0(cq);
445 	rte_prefetch0(cq + 1);
446 	rte_prefetch0(cq + 2);
447 	rte_prefetch0(cq + 3);
448 	pkts_n = RTE_MIN(pkts_n, MLX5_VPMD_RX_MAX_BURST);
449 	mlx5_rx_mprq_replenish_bulk_mbuf(rxq);
450 	/* See if there're unreturned mbufs from compressed CQE. */
451 	rcvd_pkt = rxq->decompressed;
452 	if (rcvd_pkt > 0) {
453 		rcvd_pkt = RTE_MIN(rcvd_pkt, pkts_n);
454 		cp_pkt = rxq_copy_mprq_mbuf_v(rxq, pkts, rcvd_pkt);
455 		rxq->decompressed -= rcvd_pkt;
456 		pkts += cp_pkt;
457 	}
458 	elts_idx = rxq->rq_pi & elts_mask;
459 	elts = &(*rxq->elts)[elts_idx];
460 	/* Not to overflow pkts array. */
461 	pkts_n = RTE_ALIGN_FLOOR(pkts_n - cp_pkt, MLX5_VPMD_DESCS_PER_LOOP);
462 	/* Not to cross queue end. */
463 	pkts_n = RTE_MIN(pkts_n, elts_n - elts_idx);
464 	pkts_n = RTE_MIN(pkts_n, q_n - cq_idx);
465 	/* Not to move past the allocated mbufs. */
466 	pkts_n = RTE_MIN(pkts_n, rxq->elts_ci - rxq->rq_pi);
467 	if (!pkts_n) {
468 		*no_cq = !cp_pkt;
469 		return cp_pkt;
470 	}
471 	/* At this point, there shouldn't be any remaining packets. */
472 	MLX5_ASSERT(rxq->decompressed == 0);
473 	/* Process all the CQEs */
474 	nocmp_n = rxq_cq_process_v(rxq, cq, elts, pkts, pkts_n, err, &comp_idx);
475 	/* If no new CQE seen, return without updating cq_db. */
476 	if (unlikely(!nocmp_n && comp_idx == MLX5_VPMD_DESCS_PER_LOOP)) {
477 		*no_cq = true;
478 		return cp_pkt;
479 	}
480 	/* Update the consumer indexes for non-compressed CQEs. */
481 	MLX5_ASSERT(nocmp_n <= pkts_n);
482 	cp_pkt = rxq_copy_mprq_mbuf_v(rxq, pkts, nocmp_n);
483 	rcvd_pkt += cp_pkt;
484 	/* Decompress the last CQE if compressed. */
485 	if (comp_idx < MLX5_VPMD_DESCS_PER_LOOP) {
486 		MLX5_ASSERT(comp_idx == (nocmp_n % MLX5_VPMD_DESCS_PER_LOOP));
487 		rxq->decompressed = rxq_cq_decompress_v(rxq, &cq[nocmp_n],
488 							&elts[nocmp_n]);
489 		/* Return more packets if needed. */
490 		if (nocmp_n < pkts_n) {
491 			uint16_t n = rxq->decompressed;
492 
493 			n = RTE_MIN(n, pkts_n - nocmp_n);
494 			cp_pkt = rxq_copy_mprq_mbuf_v(rxq, &pkts[cp_pkt], n);
495 			rcvd_pkt += cp_pkt;
496 			rxq->decompressed -= n;
497 		}
498 	}
499 	*no_cq = !rcvd_pkt;
500 	return rcvd_pkt;
501 }
502 
503 /**
504  * DPDK callback for vectorized MPRQ RX.
505  *
506  * @param dpdk_rxq
507  *   Generic pointer to RX queue structure.
508  * @param[out] pkts
509  *   Array to store received packets.
510  * @param pkts_n
511  *   Maximum number of packets in array.
512  *
513  * @return
514  *   Number of packets successfully received (<= pkts_n).
515  */
516 uint16_t
517 mlx5_rx_burst_mprq_vec(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
518 {
519 	struct mlx5_rxq_data *rxq = dpdk_rxq;
520 	uint16_t nb_rx = 0;
521 	uint16_t tn = 0;
522 	uint64_t err = 0;
523 	bool no_cq = false;
524 
525 	do {
526 		nb_rx = rxq_burst_mprq_v(rxq, pkts + tn, pkts_n - tn,
527 					 &err, &no_cq);
528 		if (unlikely(err | rxq->err_state))
529 			nb_rx = rxq_handle_pending_error(rxq, pkts + tn, nb_rx);
530 		tn += nb_rx;
531 		if (unlikely(no_cq))
532 			break;
533 	} while (tn != pkts_n);
534 	return tn;
535 }
536 
537 /**
538  * Check a RX queue can support vectorized RX.
539  *
540  * @param rxq
541  *   Pointer to RX queue.
542  *
543  * @return
544  *   1 if supported, negative errno value if not.
545  */
546 int __rte_cold
547 mlx5_rxq_check_vec_support(struct mlx5_rxq_data *rxq)
548 {
549 	struct mlx5_rxq_ctrl *ctrl =
550 		container_of(rxq, struct mlx5_rxq_ctrl, rxq);
551 
552 	if (!ctrl->priv->config.rx_vec_en || rxq->sges_n != 0)
553 		return -ENOTSUP;
554 	if (rxq->lro)
555 		return -ENOTSUP;
556 	return 1;
557 }
558 
559 /**
560  * Check a device can support vectorized RX.
561  *
562  * @param dev
563  *   Pointer to Ethernet device.
564  *
565  * @return
566  *   1 if supported, negative errno value if not.
567  */
568 int __rte_cold
569 mlx5_check_vec_rx_support(struct rte_eth_dev *dev)
570 {
571 	struct mlx5_priv *priv = dev->data->dev_private;
572 	uint32_t i;
573 
574 	if (rte_vect_get_max_simd_bitwidth() < RTE_VECT_SIMD_128)
575 		return -ENOTSUP;
576 	if (!priv->config.rx_vec_en)
577 		return -ENOTSUP;
578 	/* All the configured queues should support. */
579 	for (i = 0; i < priv->rxqs_n; ++i) {
580 		struct mlx5_rxq_data *rxq = (*priv->rxqs)[i];
581 
582 		if (!rxq)
583 			continue;
584 		if (mlx5_rxq_check_vec_support(rxq) < 0)
585 			break;
586 	}
587 	if (i != priv->rxqs_n)
588 		return -ENOTSUP;
589 	return 1;
590 }
591