xref: /dpdk/drivers/net/mlx5/mlx5_rxtx_vec.c (revision 73f7ae1d721aa5c388123db11827937205985999)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2017 6WIND S.A.
3  * Copyright 2017 Mellanox Technologies, Ltd
4  */
5 
6 #include <stdint.h>
7 #include <string.h>
8 #include <stdlib.h>
9 
10 #include <rte_mbuf.h>
11 #include <rte_mempool.h>
12 #include <rte_prefetch.h>
13 #include <rte_vect.h>
14 
15 #include <mlx5_glue.h>
16 #include <mlx5_prm.h>
17 
18 #include "mlx5_defs.h"
19 #include "mlx5.h"
20 #include "mlx5_utils.h"
21 #include "mlx5_rxtx.h"
22 #include "mlx5_rx.h"
23 #include "mlx5_rxtx_vec.h"
24 #include "mlx5_autoconf.h"
25 
26 #if defined RTE_ARCH_X86_64
27 #include "mlx5_rxtx_vec_sse.h"
28 #elif defined RTE_ARCH_ARM64
29 #include "mlx5_rxtx_vec_neon.h"
30 #elif defined RTE_ARCH_PPC_64
31 #include "mlx5_rxtx_vec_altivec.h"
32 #else
33 #error "This should not be compiled if SIMD instructions are not supported."
34 #endif
35 
36 /**
37  * Skip error packets.
38  *
39  * @param rxq
40  *   Pointer to RX queue structure.
41  * @param[out] pkts
42  *   Array to store received packets.
43  * @param pkts_n
44  *   Maximum number of packets in array.
45  *
46  * @return
47  *   Number of packets successfully received (<= pkts_n).
48  */
49 static uint16_t
50 rxq_handle_pending_error(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts,
51 			 uint16_t pkts_n)
52 {
53 	uint16_t n = 0;
54 	uint16_t skip_cnt;
55 	unsigned int i;
56 #ifdef MLX5_PMD_SOFT_COUNTERS
57 	uint32_t err_bytes = 0;
58 #endif
59 
60 	for (i = 0; i < pkts_n; ++i) {
61 		struct rte_mbuf *pkt = pkts[i];
62 
63 		if (pkt->packet_type == RTE_PTYPE_ALL_MASK || rxq->err_state) {
64 #ifdef MLX5_PMD_SOFT_COUNTERS
65 			err_bytes += PKT_LEN(pkt);
66 #endif
67 			rte_pktmbuf_free_seg(pkt);
68 		} else {
69 			pkts[n++] = pkt;
70 		}
71 	}
72 	rxq->stats.idropped += (pkts_n - n);
73 #ifdef MLX5_PMD_SOFT_COUNTERS
74 	/* Correct counters of errored completions. */
75 	rxq->stats.ipackets -= (pkts_n - n);
76 	rxq->stats.ibytes -= err_bytes;
77 #endif
78 	mlx5_rx_err_handle(rxq, 1, pkts_n, &skip_cnt);
79 	return n;
80 }
81 
82 /**
83  * Replenish buffers for RX in bulk.
84  *
85  * @param rxq
86  *   Pointer to RX queue structure.
87  */
88 static inline void
89 mlx5_rx_replenish_bulk_mbuf(struct mlx5_rxq_data *rxq)
90 {
91 	const uint16_t q_n = 1 << rxq->elts_n;
92 	const uint16_t q_mask = q_n - 1;
93 	uint16_t n = q_n - (rxq->rq_ci - rxq->rq_pi);
94 	uint16_t elts_idx = rxq->rq_ci & q_mask;
95 	struct rte_mbuf **elts = &(*rxq->elts)[elts_idx];
96 	volatile struct mlx5_wqe_data_seg *wq =
97 		&((volatile struct mlx5_wqe_data_seg *)rxq->wqes)[elts_idx];
98 	unsigned int i;
99 
100 	if (n >= rxq->rq_repl_thresh) {
101 		MLX5_ASSERT(n >= MLX5_VPMD_RXQ_RPLNSH_THRESH(q_n));
102 		MLX5_ASSERT(MLX5_VPMD_RXQ_RPLNSH_THRESH(q_n) >
103 			    MLX5_VPMD_DESCS_PER_LOOP);
104 		/* Not to cross queue end. */
105 		n = RTE_MIN(n - MLX5_VPMD_DESCS_PER_LOOP, q_n - elts_idx);
106 		if (rte_mempool_get_bulk(rxq->mp, (void *)elts, n) < 0) {
107 			rxq->stats.rx_nombuf += n;
108 			return;
109 		}
110 		if (unlikely(mlx5_mr_btree_len(&rxq->mr_ctrl.cache_bh) > 1)) {
111 			for (i = 0; i < n; ++i) {
112 				/*
113 				 * In order to support the mbufs with external attached
114 				 * data buffer we should use the buf_addr pointer
115 				 * instead of rte_mbuf_buf_addr(). It touches the mbuf
116 				 * itself and may impact the performance.
117 				 */
118 				void *buf_addr = elts[i]->buf_addr;
119 
120 				wq[i].addr = rte_cpu_to_be_64((uintptr_t)buf_addr +
121 							      RTE_PKTMBUF_HEADROOM);
122 				wq[i].lkey = mlx5_rx_mb2mr(rxq, elts[i]);
123 			}
124 		} else {
125 			for (i = 0; i < n; ++i) {
126 				void *buf_addr = elts[i]->buf_addr;
127 
128 				wq[i].addr = rte_cpu_to_be_64((uintptr_t)buf_addr +
129 							      RTE_PKTMBUF_HEADROOM);
130 			}
131 		}
132 		rxq->rq_ci += n;
133 		/* Prevent overflowing into consumed mbufs. */
134 		elts_idx = rxq->rq_ci & q_mask;
135 		for (i = 0; i < MLX5_VPMD_DESCS_PER_LOOP; ++i)
136 			(*rxq->elts)[elts_idx + i] = &rxq->fake_mbuf;
137 		rte_io_wmb();
138 		*rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci);
139 	}
140 }
141 
142 /**
143  * Replenish buffers for MPRQ RX in bulk.
144  *
145  * @param rxq
146  *   Pointer to RX queue structure.
147  */
148 static inline void
149 mlx5_rx_mprq_replenish_bulk_mbuf(struct mlx5_rxq_data *rxq)
150 {
151 	const uint16_t wqe_n = 1 << rxq->elts_n;
152 	const uint32_t strd_n = RTE_BIT32(rxq->log_strd_num);
153 	const uint32_t elts_n = wqe_n * strd_n;
154 	const uint32_t wqe_mask = elts_n - 1;
155 	uint32_t n = elts_n - (rxq->elts_ci - rxq->rq_pi);
156 	uint32_t elts_idx = rxq->elts_ci & wqe_mask;
157 	struct rte_mbuf **elts = &(*rxq->elts)[elts_idx];
158 	unsigned int i;
159 
160 	if (n >= rxq->rq_repl_thresh &&
161 	    rxq->elts_ci - rxq->rq_pi <=
162 	    rxq->rq_repl_thresh + MLX5_VPMD_RX_MAX_BURST) {
163 		MLX5_ASSERT(n >= MLX5_VPMD_RXQ_RPLNSH_THRESH(elts_n));
164 		MLX5_ASSERT(MLX5_VPMD_RXQ_RPLNSH_THRESH(elts_n) >
165 			     MLX5_VPMD_DESCS_PER_LOOP);
166 		/* Not to cross queue end. */
167 		n = RTE_MIN(n - MLX5_VPMD_DESCS_PER_LOOP, elts_n - elts_idx);
168 		/* Limit replenish number to threshold value. */
169 		n = RTE_MIN(n, rxq->rq_repl_thresh);
170 		if (rte_mempool_get_bulk(rxq->mp, (void *)elts, n) < 0) {
171 			rxq->stats.rx_nombuf += n;
172 			return;
173 		}
174 		rxq->elts_ci += n;
175 		/* Prevent overflowing into consumed mbufs. */
176 		elts_idx = rxq->elts_ci & wqe_mask;
177 		for (i = 0; i < MLX5_VPMD_DESCS_PER_LOOP; ++i)
178 			(*rxq->elts)[elts_idx + i] = &rxq->fake_mbuf;
179 	}
180 }
181 
182 /**
183  * Copy or attach MPRQ buffers to RX SW ring.
184  *
185  * @param rxq
186  *   Pointer to RX queue structure.
187  * @param pkts
188  *   Pointer to array of packets to be stored.
189  * @param pkts_n
190  *   Number of packets to be stored.
191  *
192  * @return
193  *   Number of packets successfully copied/attached (<= pkts_n).
194  */
195 static inline uint16_t
196 rxq_copy_mprq_mbuf_v(struct mlx5_rxq_data *rxq,
197 		     struct rte_mbuf **pkts, uint16_t pkts_n)
198 {
199 	const uint16_t wqe_n = 1 << rxq->elts_n;
200 	const uint16_t wqe_mask = wqe_n - 1;
201 	const uint16_t strd_sz = RTE_BIT32(rxq->log_strd_sz);
202 	const uint32_t strd_n = RTE_BIT32(rxq->log_strd_num);
203 	const uint32_t elts_n = wqe_n * strd_n;
204 	const uint32_t elts_mask = elts_n - 1;
205 	uint32_t elts_idx = rxq->rq_pi & elts_mask;
206 	struct rte_mbuf **elts = &(*rxq->elts)[elts_idx];
207 	uint32_t rq_ci = rxq->rq_ci;
208 	struct mlx5_mprq_buf *buf = (*rxq->mprq_bufs)[rq_ci & wqe_mask];
209 	uint16_t copied = 0;
210 	uint16_t i = 0;
211 
212 	for (i = 0; i < pkts_n; ++i) {
213 		uint16_t strd_cnt;
214 		enum mlx5_rqx_code rxq_code;
215 
216 		if (rxq->consumed_strd == strd_n) {
217 			/* Replace WQE if the buffer is still in use. */
218 			mprq_buf_replace(rxq, rq_ci & wqe_mask);
219 			/* Advance to the next WQE. */
220 			rxq->consumed_strd = 0;
221 			rq_ci++;
222 			buf = (*rxq->mprq_bufs)[rq_ci & wqe_mask];
223 		}
224 
225 		if (!elts[i]->pkt_len) {
226 			rxq->consumed_strd = strd_n;
227 			rte_pktmbuf_free_seg(elts[i]);
228 #ifdef MLX5_PMD_SOFT_COUNTERS
229 			rxq->stats.ipackets -= 1;
230 #endif
231 			continue;
232 		}
233 		strd_cnt = (elts[i]->pkt_len / strd_sz) +
234 			   ((elts[i]->pkt_len % strd_sz) ? 1 : 0);
235 		rxq_code = mprq_buf_to_pkt(rxq, elts[i], elts[i]->pkt_len,
236 					   buf, rxq->consumed_strd, strd_cnt);
237 		rxq->consumed_strd += strd_cnt;
238 		if (unlikely(rxq_code != MLX5_RXQ_CODE_EXIT)) {
239 			rte_pktmbuf_free_seg(elts[i]);
240 #ifdef MLX5_PMD_SOFT_COUNTERS
241 			rxq->stats.ipackets -= 1;
242 			rxq->stats.ibytes -= elts[i]->pkt_len;
243 #endif
244 			if (rxq_code == MLX5_RXQ_CODE_NOMBUF) {
245 				++rxq->stats.rx_nombuf;
246 				break;
247 			}
248 			if (rxq_code == MLX5_RXQ_CODE_DROPPED) {
249 				++rxq->stats.idropped;
250 				continue;
251 			}
252 		}
253 		pkts[copied++] = elts[i];
254 	}
255 	rxq->rq_pi += i;
256 	rxq->cq_ci += i;
257 	if (rq_ci != rxq->rq_ci) {
258 		rxq->rq_ci = rq_ci;
259 		rte_io_wmb();
260 		*rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci);
261 	}
262 	return copied;
263 }
264 
265 /**
266  * Receive burst of packets. An errored completion also consumes a mbuf, but the
267  * packet_type is set to be RTE_PTYPE_ALL_MASK. Marked mbufs should be freed
268  * before returning to application.
269  *
270  * @param rxq
271  *   Pointer to RX queue structure.
272  * @param[out] pkts
273  *   Array to store received packets.
274  * @param pkts_n
275  *   Maximum number of packets in array.
276  * @param[out] err
277  *   Pointer to a flag. Set non-zero value if pkts array has at least one error
278  *   packet to handle.
279  * @param[out] no_cq
280  *   Pointer to a boolean. Set true if no new CQE seen.
281  *
282  * @return
283  *   Number of packets received including errors (<= pkts_n).
284  */
285 static inline uint16_t
286 rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts,
287 	    uint16_t pkts_n, uint64_t *err, bool *no_cq)
288 {
289 	const uint16_t q_n = 1 << rxq->cqe_n;
290 	const uint16_t q_mask = q_n - 1;
291 	const uint16_t e_n = 1 << rxq->elts_n;
292 	const uint16_t e_mask = e_n - 1;
293 	volatile struct mlx5_cqe *cq, *next;
294 	struct rte_mbuf **elts;
295 	uint64_t comp_idx = MLX5_VPMD_DESCS_PER_LOOP;
296 	uint16_t nocmp_n = 0;
297 	uint16_t rcvd_pkt = 0;
298 	unsigned int cq_idx = rxq->cq_ci & q_mask;
299 	unsigned int elts_idx;
300 	int ret;
301 
302 	MLX5_ASSERT(rxq->sges_n == 0);
303 	MLX5_ASSERT(rxq->cqe_n == rxq->elts_n);
304 	cq = &(*rxq->cqes)[cq_idx];
305 	rte_prefetch0(cq);
306 	rte_prefetch0(cq + 1);
307 	rte_prefetch0(cq + 2);
308 	rte_prefetch0(cq + 3);
309 	pkts_n = RTE_MIN(pkts_n, MLX5_VPMD_RX_MAX_BURST);
310 	mlx5_rx_replenish_bulk_mbuf(rxq);
311 	/* See if there're unreturned mbufs from compressed CQE. */
312 	rcvd_pkt = rxq->decompressed;
313 	if (rcvd_pkt > 0) {
314 		rcvd_pkt = RTE_MIN(rcvd_pkt, pkts_n);
315 		rxq_copy_mbuf_v(&(*rxq->elts)[rxq->rq_pi & e_mask],
316 				pkts, rcvd_pkt);
317 		rxq->rq_pi += rcvd_pkt;
318 		rxq->decompressed -= rcvd_pkt;
319 		pkts += rcvd_pkt;
320 	}
321 	elts_idx = rxq->rq_pi & e_mask;
322 	elts = &(*rxq->elts)[elts_idx];
323 	/* Not to overflow pkts array. */
324 	pkts_n = RTE_ALIGN_FLOOR(pkts_n - rcvd_pkt, MLX5_VPMD_DESCS_PER_LOOP);
325 	/* Not to cross queue end. */
326 	pkts_n = RTE_MIN(pkts_n, q_n - elts_idx);
327 	pkts_n = RTE_MIN(pkts_n, q_n - cq_idx);
328 	/* Not to move past the allocated mbufs. */
329 	pkts_n = RTE_MIN(pkts_n, RTE_ALIGN_FLOOR(rxq->rq_ci - rxq->rq_pi,
330 						MLX5_VPMD_DESCS_PER_LOOP));
331 	if (!pkts_n) {
332 		*no_cq = !rcvd_pkt;
333 		return rcvd_pkt;
334 	}
335 	/* At this point, there shouldn't be any remaining packets. */
336 	MLX5_ASSERT(rxq->decompressed == 0);
337 	/* Go directly to unzipping in case the first CQE is compressed. */
338 	if (rxq->cqe_comp_layout) {
339 		ret = check_cqe_iteration(cq, rxq->cqe_n, rxq->cq_ci);
340 		if (ret == MLX5_CQE_STATUS_SW_OWN &&
341 		    (MLX5_CQE_FORMAT(cq->op_own) == MLX5_COMPRESSED)) {
342 			comp_idx = 0;
343 			goto decompress;
344 		}
345 	}
346 	/* Process all the CQEs */
347 	nocmp_n = rxq_cq_process_v(rxq, cq, elts, pkts, pkts_n, err, &comp_idx);
348 	/* If no new CQE seen, return without updating cq_db. */
349 	if (unlikely(!nocmp_n && comp_idx == MLX5_VPMD_DESCS_PER_LOOP)) {
350 		*no_cq = true;
351 		return rcvd_pkt;
352 	}
353 	/* Update the consumer indexes for non-compressed CQEs. */
354 	MLX5_ASSERT(nocmp_n <= pkts_n);
355 	rxq->cq_ci += nocmp_n;
356 	rxq->rq_pi += nocmp_n;
357 	rcvd_pkt += nocmp_n;
358 	/* Copy title packet for future compressed sessions. */
359 	if (rxq->cqe_comp_layout) {
360 		ret = check_cqe_iteration(cq, rxq->cqe_n, rxq->cq_ci);
361 		if (ret == MLX5_CQE_STATUS_SW_OWN &&
362 		    (MLX5_CQE_FORMAT(cq->op_own) != MLX5_COMPRESSED)) {
363 			next = &(*rxq->cqes)[rxq->cq_ci & q_mask];
364 			ret = check_cqe_iteration(next,	rxq->cqe_n, rxq->cq_ci);
365 			if (MLX5_CQE_FORMAT(next->op_own) == MLX5_COMPRESSED ||
366 			    ret != MLX5_CQE_STATUS_SW_OWN)
367 				rte_memcpy(&rxq->title_pkt, elts[nocmp_n - 1],
368 					   sizeof(struct rte_mbuf));
369 		}
370 	}
371 decompress:
372 	/* Decompress the last CQE if compressed. */
373 	if (comp_idx < MLX5_VPMD_DESCS_PER_LOOP) {
374 		MLX5_ASSERT(comp_idx == (nocmp_n % MLX5_VPMD_DESCS_PER_LOOP));
375 		rxq->decompressed = rxq_cq_decompress_v(rxq, &cq[nocmp_n],
376 							&elts[nocmp_n], true);
377 		rxq->cq_ci += rxq->decompressed;
378 		/* Return more packets if needed. */
379 		if (nocmp_n < pkts_n) {
380 			uint16_t n = rxq->decompressed;
381 
382 			n = RTE_MIN(n, pkts_n - nocmp_n);
383 			rxq_copy_mbuf_v(&(*rxq->elts)[rxq->rq_pi & e_mask],
384 					&pkts[nocmp_n], n);
385 			rxq->rq_pi += n;
386 			rcvd_pkt += n;
387 			rxq->decompressed -= n;
388 		}
389 	}
390 	*no_cq = !rcvd_pkt;
391 	return rcvd_pkt;
392 }
393 
394 /**
395  * DPDK callback for vectorized RX.
396  *
397  * @param dpdk_rxq
398  *   Generic pointer to RX queue structure.
399  * @param[out] pkts
400  *   Array to store received packets.
401  * @param pkts_n
402  *   Maximum number of packets in array.
403  *
404  * @return
405  *   Number of packets successfully received (<= pkts_n).
406  */
407 uint16_t
408 mlx5_rx_burst_vec(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
409 {
410 	struct mlx5_rxq_data *rxq = dpdk_rxq;
411 	uint16_t nb_rx = 0;
412 	uint16_t tn = 0;
413 	uint64_t err = 0;
414 	bool no_cq = false;
415 
416 	do {
417 		err = 0;
418 		nb_rx = rxq_burst_v(rxq, pkts + tn, pkts_n - tn,
419 				    &err, &no_cq);
420 		if (unlikely(err | rxq->err_state))
421 			nb_rx = rxq_handle_pending_error(rxq, pkts + tn, nb_rx);
422 		tn += nb_rx;
423 		if (unlikely(no_cq))
424 			break;
425 		rte_io_wmb();
426 		*rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci);
427 	} while (tn != pkts_n);
428 	return tn;
429 }
430 
431 /**
432  * Receive burst of packets. An errored completion also consumes a mbuf, but the
433  * packet_type is set to be RTE_PTYPE_ALL_MASK. Marked mbufs should be freed
434  * before returning to application.
435  *
436  * @param rxq
437  *   Pointer to RX queue structure.
438  * @param[out] pkts
439  *   Array to store received packets.
440  * @param pkts_n
441  *   Maximum number of packets in array.
442  * @param[out] err
443  *   Pointer to a flag. Set non-zero value if pkts array has at least one error
444  *   packet to handle.
445  * @param[out] no_cq
446  *   Pointer to a boolean. Set true if no new CQE seen.
447  *
448  * @return
449  *   Number of packets received including errors (<= pkts_n).
450  */
451 static inline uint16_t
452 rxq_burst_mprq_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts,
453 		 uint16_t pkts_n, uint64_t *err, bool *no_cq)
454 {
455 	const uint16_t q_n = 1 << rxq->cqe_n;
456 	const uint16_t q_mask = q_n - 1;
457 	const uint16_t wqe_n = 1 << rxq->elts_n;
458 	const uint32_t strd_n = RTE_BIT32(rxq->log_strd_num);
459 	const uint32_t elts_n = wqe_n * strd_n;
460 	const uint32_t elts_mask = elts_n - 1;
461 	volatile struct mlx5_cqe *cq, *next;
462 	struct rte_mbuf **elts;
463 	uint64_t comp_idx = MLX5_VPMD_DESCS_PER_LOOP;
464 	uint16_t nocmp_n = 0;
465 	uint16_t rcvd_pkt = 0;
466 	uint16_t cp_pkt = 0;
467 	unsigned int cq_idx = rxq->cq_ci & q_mask;
468 	unsigned int elts_idx;
469 	int ret;
470 
471 	MLX5_ASSERT(rxq->sges_n == 0);
472 	cq = &(*rxq->cqes)[cq_idx];
473 	rte_prefetch0(cq);
474 	rte_prefetch0(cq + 1);
475 	rte_prefetch0(cq + 2);
476 	rte_prefetch0(cq + 3);
477 	pkts_n = RTE_MIN(pkts_n, MLX5_VPMD_RX_MAX_BURST);
478 	mlx5_rx_mprq_replenish_bulk_mbuf(rxq);
479 	/* Not to move past the allocated mbufs. */
480 	pkts_n = RTE_MIN(pkts_n, rxq->elts_ci - rxq->rq_pi);
481 	/* See if there're unreturned mbufs from compressed CQE. */
482 	rcvd_pkt = rxq->decompressed;
483 	if (rcvd_pkt > 0) {
484 		rcvd_pkt = RTE_MIN(rcvd_pkt, pkts_n);
485 		cp_pkt = rxq_copy_mprq_mbuf_v(rxq, pkts, rcvd_pkt);
486 		rxq->decompressed -= rcvd_pkt;
487 		pkts += cp_pkt;
488 	}
489 	elts_idx = rxq->rq_pi & elts_mask;
490 	elts = &(*rxq->elts)[elts_idx];
491 	/* Not to overflow pkts array. */
492 	pkts_n = RTE_ALIGN_FLOOR(pkts_n - cp_pkt, MLX5_VPMD_DESCS_PER_LOOP);
493 	/* Not to cross queue end. */
494 	pkts_n = RTE_MIN(pkts_n, elts_n - elts_idx);
495 	pkts_n = RTE_MIN(pkts_n, q_n - cq_idx);
496 	if (!pkts_n) {
497 		*no_cq = !cp_pkt;
498 		return cp_pkt;
499 	}
500 	/* At this point, there shouldn't be any remaining packets. */
501 	MLX5_ASSERT(rxq->decompressed == 0);
502 	/* Go directly to unzipping in case the first CQE is compressed. */
503 	if (rxq->cqe_comp_layout) {
504 		ret = check_cqe_iteration(cq, rxq->cqe_n, rxq->cq_ci);
505 		if (ret == MLX5_CQE_STATUS_SW_OWN &&
506 		    (MLX5_CQE_FORMAT(cq->op_own) == MLX5_COMPRESSED)) {
507 			comp_idx = 0;
508 			goto decompress;
509 		}
510 	}
511 	/* Process all the CQEs */
512 	nocmp_n = rxq_cq_process_v(rxq, cq, elts, pkts, pkts_n, err, &comp_idx);
513 	/* If no new CQE seen, return without updating cq_db. */
514 	if (unlikely(!nocmp_n && comp_idx == MLX5_VPMD_DESCS_PER_LOOP)) {
515 		*no_cq = true;
516 		return cp_pkt;
517 	}
518 	/* Update the consumer indexes for non-compressed CQEs. */
519 	MLX5_ASSERT(nocmp_n <= pkts_n);
520 	cp_pkt = rxq_copy_mprq_mbuf_v(rxq, pkts, nocmp_n);
521 	rcvd_pkt += cp_pkt;
522 	/* Copy title packet for future compressed sessions. */
523 	if (rxq->cqe_comp_layout) {
524 		ret = check_cqe_iteration(cq, rxq->cqe_n, rxq->cq_ci);
525 		if (ret == MLX5_CQE_STATUS_SW_OWN &&
526 		    (MLX5_CQE_FORMAT(cq->op_own) != MLX5_COMPRESSED)) {
527 			next = &(*rxq->cqes)[rxq->cq_ci & q_mask];
528 			ret = check_cqe_iteration(next,	rxq->cqe_n, rxq->cq_ci);
529 			if (MLX5_CQE_FORMAT(next->op_own) == MLX5_COMPRESSED ||
530 			    ret != MLX5_CQE_STATUS_SW_OWN)
531 				rte_memcpy(&rxq->title_pkt, elts[nocmp_n - 1],
532 					   sizeof(struct rte_mbuf));
533 		}
534 	}
535 decompress:
536 	/* Decompress the last CQE if compressed. */
537 	if (comp_idx < MLX5_VPMD_DESCS_PER_LOOP) {
538 		MLX5_ASSERT(comp_idx == (nocmp_n % MLX5_VPMD_DESCS_PER_LOOP));
539 		rxq->decompressed = rxq_cq_decompress_v(rxq, &cq[nocmp_n],
540 							&elts[nocmp_n], false);
541 		/* Return more packets if needed. */
542 		if (nocmp_n < pkts_n) {
543 			uint16_t n = rxq->decompressed;
544 
545 			n = RTE_MIN(n, pkts_n - nocmp_n);
546 			cp_pkt = rxq_copy_mprq_mbuf_v(rxq, &pkts[cp_pkt], n);
547 			rcvd_pkt += cp_pkt;
548 			rxq->decompressed -= n;
549 		}
550 	}
551 	*no_cq = !rcvd_pkt;
552 	return rcvd_pkt;
553 }
554 
555 /**
556  * DPDK callback for vectorized MPRQ RX.
557  *
558  * @param dpdk_rxq
559  *   Generic pointer to RX queue structure.
560  * @param[out] pkts
561  *   Array to store received packets.
562  * @param pkts_n
563  *   Maximum number of packets in array.
564  *
565  * @return
566  *   Number of packets successfully received (<= pkts_n).
567  */
568 uint16_t
569 mlx5_rx_burst_mprq_vec(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
570 {
571 	struct mlx5_rxq_data *rxq = dpdk_rxq;
572 	uint16_t nb_rx = 0;
573 	uint16_t tn = 0;
574 	uint64_t err = 0;
575 	bool no_cq = false;
576 
577 	do {
578 		err = 0;
579 		nb_rx = rxq_burst_mprq_v(rxq, pkts + tn, pkts_n - tn,
580 					 &err, &no_cq);
581 		if (unlikely(err | rxq->err_state))
582 			nb_rx = rxq_handle_pending_error(rxq, pkts + tn, nb_rx);
583 		tn += nb_rx;
584 		if (unlikely(no_cq))
585 			break;
586 		rte_io_wmb();
587 		*rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci);
588 	} while (tn != pkts_n);
589 	return tn;
590 }
591 
592 /**
593  * Check a RX queue can support vectorized RX.
594  *
595  * @param rxq
596  *   Pointer to RX queue.
597  *
598  * @return
599  *   1 if supported, negative errno value if not.
600  */
601 int __rte_cold
602 mlx5_rxq_check_vec_support(struct mlx5_rxq_data *rxq)
603 {
604 	struct mlx5_rxq_ctrl *ctrl =
605 		container_of(rxq, struct mlx5_rxq_ctrl, rxq);
606 
607 	if (!RXQ_PORT(ctrl)->config.rx_vec_en || rxq->sges_n != 0)
608 		return -ENOTSUP;
609 	if (rxq->lro)
610 		return -ENOTSUP;
611 	return 1;
612 }
613 
614 /**
615  * Check a device can support vectorized RX.
616  *
617  * @param dev
618  *   Pointer to Ethernet device.
619  *
620  * @return
621  *   1 if supported, negative errno value if not.
622  */
623 int __rte_cold
624 mlx5_check_vec_rx_support(struct rte_eth_dev *dev)
625 {
626 	struct mlx5_priv *priv = dev->data->dev_private;
627 	uint32_t i;
628 
629 	if (rte_vect_get_max_simd_bitwidth() < RTE_VECT_SIMD_128)
630 		return -ENOTSUP;
631 	if (!priv->config.rx_vec_en)
632 		return -ENOTSUP;
633 	/* All the configured queues should support. */
634 	for (i = 0; i < priv->rxqs_n; ++i) {
635 		struct mlx5_rxq_data *rxq_data = mlx5_rxq_data_get(dev, i);
636 
637 		if (!rxq_data)
638 			continue;
639 		if (mlx5_rxq_check_vec_support(rxq_data) < 0)
640 			break;
641 	}
642 	if (i != priv->rxqs_n)
643 		return -ENOTSUP;
644 	return 1;
645 }
646