xref: /dpdk/drivers/net/mlx5/mlx5_rxtx_vec.c (revision 665b49c51639a10c553433bc2bcd85c7331c631e)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2017 6WIND S.A.
3  * Copyright 2017 Mellanox Technologies, Ltd
4  */
5 
6 #include <stdint.h>
7 #include <string.h>
8 #include <stdlib.h>
9 
10 #include <rte_mbuf.h>
11 #include <rte_mempool.h>
12 #include <rte_prefetch.h>
13 #include <rte_vect.h>
14 
15 #include <mlx5_glue.h>
16 #include <mlx5_prm.h>
17 
18 #include "mlx5_defs.h"
19 #include "mlx5.h"
20 #include "mlx5_utils.h"
21 #include "mlx5_rxtx.h"
22 #include "mlx5_rx.h"
23 #include "mlx5_rxtx_vec.h"
24 #include "mlx5_autoconf.h"
25 
26 #if defined RTE_ARCH_X86_64
27 #include "mlx5_rxtx_vec_sse.h"
28 #elif defined RTE_ARCH_ARM64
29 #include "mlx5_rxtx_vec_neon.h"
30 #elif defined RTE_ARCH_PPC_64
31 #include "mlx5_rxtx_vec_altivec.h"
32 #else
33 #error "This should not be compiled if SIMD instructions are not supported."
34 #endif
35 
36 /**
37  * Skip error packets.
38  *
39  * @param rxq
40  *   Pointer to RX queue structure.
41  * @param[out] pkts
42  *   Array to store received packets.
43  * @param pkts_n
44  *   Maximum number of packets in array.
45  *
46  * @return
47  *   Number of packets successfully received (<= pkts_n).
48  */
49 static uint16_t
50 rxq_handle_pending_error(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts,
51 			 uint16_t pkts_n)
52 {
53 	uint16_t n = 0;
54 	uint16_t skip_cnt;
55 	unsigned int i;
56 #ifdef MLX5_PMD_SOFT_COUNTERS
57 	uint32_t err_bytes = 0;
58 #endif
59 
60 	for (i = 0; i < pkts_n; ++i) {
61 		struct rte_mbuf *pkt = pkts[i];
62 
63 		if (pkt->packet_type == RTE_PTYPE_ALL_MASK || rxq->err_state) {
64 #ifdef MLX5_PMD_SOFT_COUNTERS
65 			err_bytes += PKT_LEN(pkt);
66 #endif
67 			rte_pktmbuf_free_seg(pkt);
68 		} else {
69 			pkts[n++] = pkt;
70 		}
71 	}
72 	rxq->stats.idropped += (pkts_n - n);
73 #ifdef MLX5_PMD_SOFT_COUNTERS
74 	/* Correct counters of errored completions. */
75 	rxq->stats.ipackets -= (pkts_n - n);
76 	rxq->stats.ibytes -= err_bytes;
77 #endif
78 	mlx5_rx_err_handle(rxq, 1, pkts_n, &skip_cnt);
79 	return n;
80 }
81 
82 /**
83  * Replenish buffers for RX in bulk.
84  *
85  * @param rxq
86  *   Pointer to RX queue structure.
87  */
88 static inline void
89 mlx5_rx_replenish_bulk_mbuf(struct mlx5_rxq_data *rxq)
90 {
91 	const uint16_t q_n = 1 << rxq->elts_n;
92 	const uint16_t q_mask = q_n - 1;
93 	uint16_t n = q_n - (rxq->rq_ci - rxq->rq_pi);
94 	uint16_t elts_idx = rxq->rq_ci & q_mask;
95 	struct rte_mbuf **elts = &(*rxq->elts)[elts_idx];
96 	volatile struct mlx5_wqe_data_seg *wq =
97 		&((volatile struct mlx5_wqe_data_seg *)rxq->wqes)[elts_idx];
98 	unsigned int i;
99 
100 	if (n >= rxq->rq_repl_thresh) {
101 		MLX5_ASSERT(n >= MLX5_VPMD_RXQ_RPLNSH_THRESH(q_n));
102 		MLX5_ASSERT(MLX5_VPMD_RXQ_RPLNSH_THRESH(q_n) >
103 			    MLX5_VPMD_DESCS_PER_LOOP);
104 		/* Not to cross queue end. */
105 		n = RTE_MIN(n - MLX5_VPMD_DESCS_PER_LOOP, q_n - elts_idx);
106 		if (rte_mempool_get_bulk(rxq->mp, (void *)elts, n) < 0) {
107 			rxq->stats.rx_nombuf += n;
108 			return;
109 		}
110 		if (unlikely(mlx5_mr_btree_len(&rxq->mr_ctrl.cache_bh) > 1)) {
111 			for (i = 0; i < n; ++i) {
112 				/*
113 				 * In order to support the mbufs with external attached
114 				 * data buffer we should use the buf_addr pointer
115 				 * instead of rte_mbuf_buf_addr(). It touches the mbuf
116 				 * itself and may impact the performance.
117 				 */
118 				void *buf_addr = elts[i]->buf_addr;
119 
120 				wq[i].addr = rte_cpu_to_be_64((uintptr_t)buf_addr +
121 							      RTE_PKTMBUF_HEADROOM);
122 				wq[i].lkey = mlx5_rx_mb2mr(rxq, elts[i]);
123 			}
124 		} else {
125 			for (i = 0; i < n; ++i) {
126 				void *buf_addr = elts[i]->buf_addr;
127 
128 				wq[i].addr = rte_cpu_to_be_64((uintptr_t)buf_addr +
129 							      RTE_PKTMBUF_HEADROOM);
130 			}
131 		}
132 		rxq->rq_ci += n;
133 		/* Prevent overflowing into consumed mbufs. */
134 		elts_idx = rxq->rq_ci & q_mask;
135 		for (i = 0; i < MLX5_VPMD_DESCS_PER_LOOP; ++i)
136 			(*rxq->elts)[elts_idx + i] = &rxq->fake_mbuf;
137 		rte_io_wmb();
138 		*rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci);
139 	}
140 }
141 
142 /**
143  * Replenish buffers for MPRQ RX in bulk.
144  *
145  * @param rxq
146  *   Pointer to RX queue structure.
147  */
148 static inline void
149 mlx5_rx_mprq_replenish_bulk_mbuf(struct mlx5_rxq_data *rxq)
150 {
151 	const uint16_t wqe_n = 1 << rxq->elts_n;
152 	const uint32_t strd_n = RTE_BIT32(rxq->log_strd_num);
153 	const uint32_t elts_n = wqe_n * strd_n;
154 	const uint32_t wqe_mask = elts_n - 1;
155 	uint32_t n = elts_n - (rxq->elts_ci - rxq->rq_pi);
156 	uint32_t elts_idx = rxq->elts_ci & wqe_mask;
157 	struct rte_mbuf **elts = &(*rxq->elts)[elts_idx];
158 	unsigned int i;
159 
160 	if (n >= rxq->rq_repl_thresh &&
161 	    rxq->elts_ci - rxq->rq_pi <=
162 	    rxq->rq_repl_thresh + MLX5_VPMD_RX_MAX_BURST) {
163 		MLX5_ASSERT(n >= MLX5_VPMD_RXQ_RPLNSH_THRESH(elts_n));
164 		MLX5_ASSERT(MLX5_VPMD_RXQ_RPLNSH_THRESH(elts_n) >
165 			     MLX5_VPMD_DESCS_PER_LOOP);
166 		/* Not to cross queue end. */
167 		n = RTE_MIN(n - MLX5_VPMD_DESCS_PER_LOOP, elts_n - elts_idx);
168 		/* Limit replenish number to threshold value. */
169 		n = RTE_MIN(n, rxq->rq_repl_thresh);
170 		if (rte_mempool_get_bulk(rxq->mp, (void *)elts, n) < 0) {
171 			rxq->stats.rx_nombuf += n;
172 			return;
173 		}
174 		rxq->elts_ci += n;
175 		/* Prevent overflowing into consumed mbufs. */
176 		elts_idx = rxq->elts_ci & wqe_mask;
177 		for (i = 0; i < MLX5_VPMD_DESCS_PER_LOOP; ++i)
178 			(*rxq->elts)[elts_idx + i] = &rxq->fake_mbuf;
179 	}
180 }
181 
182 /**
183  * Copy or attach MPRQ buffers to RX SW ring.
184  *
185  * @param rxq
186  *   Pointer to RX queue structure.
187  * @param pkts
188  *   Pointer to array of packets to be stored.
189  * @param pkts_n
190  *   Number of packets to be stored.
191  *
192  * @return
193  *   Number of packets successfully copied/attached (<= pkts_n).
194  */
195 static inline uint16_t
196 rxq_copy_mprq_mbuf_v(struct mlx5_rxq_data *rxq,
197 		     struct rte_mbuf **pkts, uint16_t pkts_n)
198 {
199 	const uint16_t wqe_n = 1 << rxq->elts_n;
200 	const uint16_t wqe_mask = wqe_n - 1;
201 	const uint16_t strd_sz = RTE_BIT32(rxq->log_strd_sz);
202 	const uint32_t strd_n = RTE_BIT32(rxq->log_strd_num);
203 	const uint32_t elts_n = wqe_n * strd_n;
204 	const uint32_t elts_mask = elts_n - 1;
205 	uint32_t elts_idx = rxq->rq_pi & elts_mask;
206 	struct rte_mbuf **elts = &(*rxq->elts)[elts_idx];
207 	uint32_t rq_ci = rxq->rq_ci;
208 	struct mlx5_mprq_buf *buf = (*rxq->mprq_bufs)[rq_ci & wqe_mask];
209 	uint16_t copied = 0;
210 	uint16_t i = 0;
211 
212 	for (i = 0; i < pkts_n; ++i) {
213 		uint16_t strd_cnt;
214 		enum mlx5_rqx_code rxq_code;
215 
216 		if (rxq->consumed_strd == strd_n) {
217 			/* Replace WQE if the buffer is still in use. */
218 			mprq_buf_replace(rxq, rq_ci & wqe_mask);
219 			/* Advance to the next WQE. */
220 			rxq->consumed_strd = 0;
221 			rq_ci++;
222 			buf = (*rxq->mprq_bufs)[rq_ci & wqe_mask];
223 		}
224 
225 		if (!elts[i]->pkt_len) {
226 			rxq->consumed_strd = strd_n;
227 			rte_pktmbuf_free_seg(elts[i]);
228 #ifdef MLX5_PMD_SOFT_COUNTERS
229 			rxq->stats.ipackets -= 1;
230 #endif
231 			continue;
232 		}
233 		strd_cnt = (elts[i]->pkt_len / strd_sz) +
234 			   ((elts[i]->pkt_len % strd_sz) ? 1 : 0);
235 		rxq_code = mprq_buf_to_pkt(rxq, elts[i], elts[i]->pkt_len,
236 					   buf, rxq->consumed_strd, strd_cnt);
237 		rxq->consumed_strd += strd_cnt;
238 		if (unlikely(rxq_code != MLX5_RXQ_CODE_EXIT)) {
239 			rte_pktmbuf_free_seg(elts[i]);
240 #ifdef MLX5_PMD_SOFT_COUNTERS
241 			rxq->stats.ipackets -= 1;
242 			rxq->stats.ibytes -= elts[i]->pkt_len;
243 #endif
244 			if (rxq_code == MLX5_RXQ_CODE_NOMBUF) {
245 				++rxq->stats.rx_nombuf;
246 				break;
247 			}
248 			if (rxq_code == MLX5_RXQ_CODE_DROPPED) {
249 				++rxq->stats.idropped;
250 				continue;
251 			}
252 		}
253 		pkts[copied++] = elts[i];
254 	}
255 	rxq->rq_pi += i;
256 	rxq->cq_ci += i;
257 	if (rq_ci != rxq->rq_ci) {
258 		rxq->rq_ci = rq_ci;
259 		rte_io_wmb();
260 		*rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci);
261 	}
262 	return copied;
263 }
264 
265 /**
266  * Receive burst of packets. An errored completion also consumes a mbuf, but the
267  * packet_type is set to be RTE_PTYPE_ALL_MASK. Marked mbufs should be freed
268  * before returning to application.
269  *
270  * @param rxq
271  *   Pointer to RX queue structure.
272  * @param[out] pkts
273  *   Array to store received packets.
274  * @param pkts_n
275  *   Maximum number of packets in array.
276  * @param[out] err
277  *   Pointer to a flag. Set non-zero value if pkts array has at least one error
278  *   packet to handle.
279  * @param[out] no_cq
280  *   Pointer to a boolean. Set true if no new CQE seen.
281  *
282  * @return
283  *   Number of packets received including errors (<= pkts_n).
284  */
285 static inline uint16_t
286 rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts,
287 	    uint16_t pkts_n, uint64_t *err, bool *no_cq)
288 {
289 	const uint16_t q_n = 1 << rxq->cqe_n;
290 	const uint16_t q_mask = q_n - 1;
291 	const uint16_t e_n = 1 << rxq->elts_n;
292 	const uint16_t e_mask = e_n - 1;
293 	volatile struct mlx5_cqe *cq;
294 	struct rte_mbuf **elts;
295 	uint64_t comp_idx = MLX5_VPMD_DESCS_PER_LOOP;
296 	uint16_t nocmp_n = 0;
297 	uint16_t rcvd_pkt = 0;
298 	unsigned int cq_idx = rxq->cq_ci & q_mask;
299 	unsigned int elts_idx;
300 
301 	MLX5_ASSERT(rxq->sges_n == 0);
302 	MLX5_ASSERT(rxq->cqe_n == rxq->elts_n);
303 	cq = &(*rxq->cqes)[cq_idx];
304 	rte_prefetch0(cq);
305 	rte_prefetch0(cq + 1);
306 	rte_prefetch0(cq + 2);
307 	rte_prefetch0(cq + 3);
308 	pkts_n = RTE_MIN(pkts_n, MLX5_VPMD_RX_MAX_BURST);
309 	mlx5_rx_replenish_bulk_mbuf(rxq);
310 	/* See if there're unreturned mbufs from compressed CQE. */
311 	rcvd_pkt = rxq->decompressed;
312 	if (rcvd_pkt > 0) {
313 		rcvd_pkt = RTE_MIN(rcvd_pkt, pkts_n);
314 		rxq_copy_mbuf_v(&(*rxq->elts)[rxq->rq_pi & e_mask],
315 				pkts, rcvd_pkt);
316 		rxq->rq_pi += rcvd_pkt;
317 		rxq->decompressed -= rcvd_pkt;
318 		pkts += rcvd_pkt;
319 	}
320 	elts_idx = rxq->rq_pi & e_mask;
321 	elts = &(*rxq->elts)[elts_idx];
322 	/* Not to overflow pkts array. */
323 	pkts_n = RTE_ALIGN_FLOOR(pkts_n - rcvd_pkt, MLX5_VPMD_DESCS_PER_LOOP);
324 	/* Not to cross queue end. */
325 	pkts_n = RTE_MIN(pkts_n, q_n - elts_idx);
326 	pkts_n = RTE_MIN(pkts_n, q_n - cq_idx);
327 	if (!pkts_n) {
328 		*no_cq = !rcvd_pkt;
329 		return rcvd_pkt;
330 	}
331 	/* At this point, there shouldn't be any remaining packets. */
332 	MLX5_ASSERT(rxq->decompressed == 0);
333 	/* Process all the CQEs */
334 	nocmp_n = rxq_cq_process_v(rxq, cq, elts, pkts, pkts_n, err, &comp_idx);
335 	/* If no new CQE seen, return without updating cq_db. */
336 	if (unlikely(!nocmp_n && comp_idx == MLX5_VPMD_DESCS_PER_LOOP)) {
337 		*no_cq = true;
338 		return rcvd_pkt;
339 	}
340 	/* Update the consumer indexes for non-compressed CQEs. */
341 	MLX5_ASSERT(nocmp_n <= pkts_n);
342 	rxq->cq_ci += nocmp_n;
343 	rxq->rq_pi += nocmp_n;
344 	rcvd_pkt += nocmp_n;
345 	/* Decompress the last CQE if compressed. */
346 	if (comp_idx < MLX5_VPMD_DESCS_PER_LOOP) {
347 		MLX5_ASSERT(comp_idx == (nocmp_n % MLX5_VPMD_DESCS_PER_LOOP));
348 		rxq->decompressed = rxq_cq_decompress_v(rxq, &cq[nocmp_n],
349 							&elts[nocmp_n]);
350 		rxq->cq_ci += rxq->decompressed;
351 		/* Return more packets if needed. */
352 		if (nocmp_n < pkts_n) {
353 			uint16_t n = rxq->decompressed;
354 
355 			n = RTE_MIN(n, pkts_n - nocmp_n);
356 			rxq_copy_mbuf_v(&(*rxq->elts)[rxq->rq_pi & e_mask],
357 					&pkts[nocmp_n], n);
358 			rxq->rq_pi += n;
359 			rcvd_pkt += n;
360 			rxq->decompressed -= n;
361 		}
362 	}
363 	*no_cq = !rcvd_pkt;
364 	return rcvd_pkt;
365 }
366 
367 /**
368  * DPDK callback for vectorized RX.
369  *
370  * @param dpdk_rxq
371  *   Generic pointer to RX queue structure.
372  * @param[out] pkts
373  *   Array to store received packets.
374  * @param pkts_n
375  *   Maximum number of packets in array.
376  *
377  * @return
378  *   Number of packets successfully received (<= pkts_n).
379  */
380 uint16_t
381 mlx5_rx_burst_vec(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
382 {
383 	struct mlx5_rxq_data *rxq = dpdk_rxq;
384 	uint16_t nb_rx = 0;
385 	uint16_t tn = 0;
386 	uint64_t err = 0;
387 	bool no_cq = false;
388 
389 	do {
390 		err = 0;
391 		nb_rx = rxq_burst_v(rxq, pkts + tn, pkts_n - tn,
392 				    &err, &no_cq);
393 		if (unlikely(err | rxq->err_state))
394 			nb_rx = rxq_handle_pending_error(rxq, pkts + tn, nb_rx);
395 		tn += nb_rx;
396 		if (unlikely(no_cq))
397 			break;
398 		rte_io_wmb();
399 		*rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci);
400 	} while (tn != pkts_n);
401 	return tn;
402 }
403 
404 /**
405  * Receive burst of packets. An errored completion also consumes a mbuf, but the
406  * packet_type is set to be RTE_PTYPE_ALL_MASK. Marked mbufs should be freed
407  * before returning to application.
408  *
409  * @param rxq
410  *   Pointer to RX queue structure.
411  * @param[out] pkts
412  *   Array to store received packets.
413  * @param pkts_n
414  *   Maximum number of packets in array.
415  * @param[out] err
416  *   Pointer to a flag. Set non-zero value if pkts array has at least one error
417  *   packet to handle.
418  * @param[out] no_cq
419  *   Pointer to a boolean. Set true if no new CQE seen.
420  *
421  * @return
422  *   Number of packets received including errors (<= pkts_n).
423  */
424 static inline uint16_t
425 rxq_burst_mprq_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts,
426 		 uint16_t pkts_n, uint64_t *err, bool *no_cq)
427 {
428 	const uint16_t q_n = 1 << rxq->cqe_n;
429 	const uint16_t q_mask = q_n - 1;
430 	const uint16_t wqe_n = 1 << rxq->elts_n;
431 	const uint32_t strd_n = RTE_BIT32(rxq->log_strd_num);
432 	const uint32_t elts_n = wqe_n * strd_n;
433 	const uint32_t elts_mask = elts_n - 1;
434 	volatile struct mlx5_cqe *cq;
435 	struct rte_mbuf **elts;
436 	uint64_t comp_idx = MLX5_VPMD_DESCS_PER_LOOP;
437 	uint16_t nocmp_n = 0;
438 	uint16_t rcvd_pkt = 0;
439 	uint16_t cp_pkt = 0;
440 	unsigned int cq_idx = rxq->cq_ci & q_mask;
441 	unsigned int elts_idx;
442 
443 	MLX5_ASSERT(rxq->sges_n == 0);
444 	cq = &(*rxq->cqes)[cq_idx];
445 	rte_prefetch0(cq);
446 	rte_prefetch0(cq + 1);
447 	rte_prefetch0(cq + 2);
448 	rte_prefetch0(cq + 3);
449 	pkts_n = RTE_MIN(pkts_n, MLX5_VPMD_RX_MAX_BURST);
450 	mlx5_rx_mprq_replenish_bulk_mbuf(rxq);
451 	/* Not to move past the allocated mbufs. */
452 	pkts_n = RTE_MIN(pkts_n, rxq->elts_ci - rxq->rq_pi);
453 	/* See if there're unreturned mbufs from compressed CQE. */
454 	rcvd_pkt = rxq->decompressed;
455 	if (rcvd_pkt > 0) {
456 		rcvd_pkt = RTE_MIN(rcvd_pkt, pkts_n);
457 		cp_pkt = rxq_copy_mprq_mbuf_v(rxq, pkts, rcvd_pkt);
458 		rxq->decompressed -= rcvd_pkt;
459 		pkts += cp_pkt;
460 	}
461 	elts_idx = rxq->rq_pi & elts_mask;
462 	elts = &(*rxq->elts)[elts_idx];
463 	/* Not to overflow pkts array. */
464 	pkts_n = RTE_ALIGN_FLOOR(pkts_n - cp_pkt, MLX5_VPMD_DESCS_PER_LOOP);
465 	/* Not to cross queue end. */
466 	pkts_n = RTE_MIN(pkts_n, elts_n - elts_idx);
467 	pkts_n = RTE_MIN(pkts_n, q_n - cq_idx);
468 	if (!pkts_n) {
469 		*no_cq = !cp_pkt;
470 		return cp_pkt;
471 	}
472 	/* At this point, there shouldn't be any remaining packets. */
473 	MLX5_ASSERT(rxq->decompressed == 0);
474 	/* Process all the CQEs */
475 	nocmp_n = rxq_cq_process_v(rxq, cq, elts, pkts, pkts_n, err, &comp_idx);
476 	/* If no new CQE seen, return without updating cq_db. */
477 	if (unlikely(!nocmp_n && comp_idx == MLX5_VPMD_DESCS_PER_LOOP)) {
478 		*no_cq = true;
479 		return cp_pkt;
480 	}
481 	/* Update the consumer indexes for non-compressed CQEs. */
482 	MLX5_ASSERT(nocmp_n <= pkts_n);
483 	cp_pkt = rxq_copy_mprq_mbuf_v(rxq, pkts, nocmp_n);
484 	rcvd_pkt += cp_pkt;
485 	/* Decompress the last CQE if compressed. */
486 	if (comp_idx < MLX5_VPMD_DESCS_PER_LOOP) {
487 		MLX5_ASSERT(comp_idx == (nocmp_n % MLX5_VPMD_DESCS_PER_LOOP));
488 		rxq->decompressed = rxq_cq_decompress_v(rxq, &cq[nocmp_n],
489 							&elts[nocmp_n]);
490 		/* Return more packets if needed. */
491 		if (nocmp_n < pkts_n) {
492 			uint16_t n = rxq->decompressed;
493 
494 			n = RTE_MIN(n, pkts_n - nocmp_n);
495 			cp_pkt = rxq_copy_mprq_mbuf_v(rxq, &pkts[cp_pkt], n);
496 			rcvd_pkt += cp_pkt;
497 			rxq->decompressed -= n;
498 		}
499 	}
500 	*no_cq = !rcvd_pkt;
501 	return rcvd_pkt;
502 }
503 
504 /**
505  * DPDK callback for vectorized MPRQ RX.
506  *
507  * @param dpdk_rxq
508  *   Generic pointer to RX queue structure.
509  * @param[out] pkts
510  *   Array to store received packets.
511  * @param pkts_n
512  *   Maximum number of packets in array.
513  *
514  * @return
515  *   Number of packets successfully received (<= pkts_n).
516  */
517 uint16_t
518 mlx5_rx_burst_mprq_vec(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
519 {
520 	struct mlx5_rxq_data *rxq = dpdk_rxq;
521 	uint16_t nb_rx = 0;
522 	uint16_t tn = 0;
523 	uint64_t err = 0;
524 	bool no_cq = false;
525 
526 	do {
527 		err = 0;
528 		nb_rx = rxq_burst_mprq_v(rxq, pkts + tn, pkts_n - tn,
529 					 &err, &no_cq);
530 		if (unlikely(err | rxq->err_state))
531 			nb_rx = rxq_handle_pending_error(rxq, pkts + tn, nb_rx);
532 		tn += nb_rx;
533 		if (unlikely(no_cq))
534 			break;
535 		rte_io_wmb();
536 		*rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci);
537 	} while (tn != pkts_n);
538 	return tn;
539 }
540 
541 /**
542  * Check a RX queue can support vectorized RX.
543  *
544  * @param rxq
545  *   Pointer to RX queue.
546  *
547  * @return
548  *   1 if supported, negative errno value if not.
549  */
550 int __rte_cold
551 mlx5_rxq_check_vec_support(struct mlx5_rxq_data *rxq)
552 {
553 	struct mlx5_rxq_ctrl *ctrl =
554 		container_of(rxq, struct mlx5_rxq_ctrl, rxq);
555 
556 	if (!RXQ_PORT(ctrl)->config.rx_vec_en || rxq->sges_n != 0)
557 		return -ENOTSUP;
558 	if (rxq->lro)
559 		return -ENOTSUP;
560 	return 1;
561 }
562 
563 /**
564  * Check a device can support vectorized RX.
565  *
566  * @param dev
567  *   Pointer to Ethernet device.
568  *
569  * @return
570  *   1 if supported, negative errno value if not.
571  */
572 int __rte_cold
573 mlx5_check_vec_rx_support(struct rte_eth_dev *dev)
574 {
575 	struct mlx5_priv *priv = dev->data->dev_private;
576 	uint32_t i;
577 
578 	if (rte_vect_get_max_simd_bitwidth() < RTE_VECT_SIMD_128)
579 		return -ENOTSUP;
580 	if (!priv->config.rx_vec_en)
581 		return -ENOTSUP;
582 	/* All the configured queues should support. */
583 	for (i = 0; i < priv->rxqs_n; ++i) {
584 		struct mlx5_rxq_data *rxq_data = mlx5_rxq_data_get(dev, i);
585 
586 		if (!rxq_data)
587 			continue;
588 		if (mlx5_rxq_check_vec_support(rxq_data) < 0)
589 			break;
590 	}
591 	if (i != priv->rxqs_n)
592 		return -ENOTSUP;
593 	return 1;
594 }
595