xref: /dpdk/drivers/net/mlx5/mlx5_rxtx_vec.c (revision 89b5642d0d45c22c0ceab57efe3fab3b49ff4324)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2017 6WIND S.A.
3  * Copyright 2017 Mellanox Technologies, Ltd
4  */
5 
6 #include <stdint.h>
7 #include <string.h>
8 #include <stdlib.h>
9 
10 #include <rte_mbuf.h>
11 #include <rte_mempool.h>
12 #include <rte_prefetch.h>
13 #include <rte_vect.h>
14 
15 #include <mlx5_glue.h>
16 #include <mlx5_prm.h>
17 
18 #include "mlx5_defs.h"
19 #include "mlx5.h"
20 #include "mlx5_utils.h"
21 #include "mlx5_rxtx.h"
22 #include "mlx5_rx.h"
23 #include "mlx5_rxtx_vec.h"
24 #include "mlx5_autoconf.h"
25 
26 #if defined RTE_ARCH_X86_64
27 #include "mlx5_rxtx_vec_sse.h"
28 #elif defined RTE_ARCH_ARM64
29 #include "mlx5_rxtx_vec_neon.h"
30 #elif defined RTE_ARCH_PPC_64
31 #include "mlx5_rxtx_vec_altivec.h"
32 #else
33 #error "This should not be compiled if SIMD instructions are not supported."
34 #endif
35 
36 /**
37  * Skip error packets.
38  *
39  * @param rxq
40  *   Pointer to RX queue structure.
41  * @param[out] pkts
42  *   Array to store received packets.
43  * @param pkts_n
44  *   Maximum number of packets in array.
45  *
46  * @return
47  *   Number of packets successfully received (<= pkts_n).
48  */
49 static uint16_t
50 rxq_handle_pending_error(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts,
51 			 uint16_t pkts_n)
52 {
53 	uint16_t n = 0;
54 	uint16_t skip_cnt;
55 	unsigned int i;
56 #ifdef MLX5_PMD_SOFT_COUNTERS
57 	uint32_t err_bytes = 0;
58 #endif
59 
60 	for (i = 0; i < pkts_n; ++i) {
61 		struct rte_mbuf *pkt = pkts[i];
62 
63 		if (pkt->packet_type == RTE_PTYPE_ALL_MASK || rxq->err_state) {
64 #ifdef MLX5_PMD_SOFT_COUNTERS
65 			err_bytes += PKT_LEN(pkt);
66 #endif
67 			rte_pktmbuf_free_seg(pkt);
68 		} else {
69 			pkts[n++] = pkt;
70 		}
71 	}
72 	rxq->stats.idropped += (pkts_n - n);
73 #ifdef MLX5_PMD_SOFT_COUNTERS
74 	/* Correct counters of errored completions. */
75 	rxq->stats.ipackets -= (pkts_n - n);
76 	rxq->stats.ibytes -= err_bytes;
77 #endif
78 	mlx5_rx_err_handle(rxq, 1, pkts_n, &skip_cnt);
79 	return n;
80 }
81 
82 /**
83  * Replenish buffers for RX in bulk.
84  *
85  * @param rxq
86  *   Pointer to RX queue structure.
87  */
88 static inline void
89 mlx5_rx_replenish_bulk_mbuf(struct mlx5_rxq_data *rxq)
90 {
91 	const uint16_t q_n = 1 << rxq->elts_n;
92 	const uint16_t q_mask = q_n - 1;
93 	uint16_t n = q_n - (rxq->rq_ci - rxq->rq_pi);
94 	uint16_t elts_idx = rxq->rq_ci & q_mask;
95 	struct rte_mbuf **elts = &(*rxq->elts)[elts_idx];
96 	volatile struct mlx5_wqe_data_seg *wq =
97 		&((volatile struct mlx5_wqe_data_seg *)rxq->wqes)[elts_idx];
98 	unsigned int i;
99 
100 	if (n >= rxq->rq_repl_thresh) {
101 		MLX5_ASSERT(n >= MLX5_VPMD_RXQ_RPLNSH_THRESH(q_n));
102 		MLX5_ASSERT(MLX5_VPMD_RXQ_RPLNSH_THRESH(q_n) >
103 			    MLX5_VPMD_DESCS_PER_LOOP);
104 		/* Not to cross queue end. */
105 		n = RTE_MIN(n - MLX5_VPMD_DESCS_PER_LOOP, q_n - elts_idx);
106 		if (rte_mempool_get_bulk(rxq->mp, (void *)elts, n) < 0) {
107 			rxq->stats.rx_nombuf += n;
108 			return;
109 		}
110 		if (unlikely(mlx5_mr_btree_len(&rxq->mr_ctrl.cache_bh) > 1)) {
111 			for (i = 0; i < n; ++i) {
112 				/*
113 				 * In order to support the mbufs with external attached
114 				 * data buffer we should use the buf_addr pointer
115 				 * instead of rte_mbuf_buf_addr(). It touches the mbuf
116 				 * itself and may impact the performance.
117 				 */
118 				void *buf_addr = elts[i]->buf_addr;
119 
120 				wq[i].addr = rte_cpu_to_be_64((uintptr_t)buf_addr +
121 							      RTE_PKTMBUF_HEADROOM);
122 				wq[i].lkey = mlx5_rx_mb2mr(rxq, elts[i]);
123 			}
124 		} else {
125 			for (i = 0; i < n; ++i) {
126 				void *buf_addr = elts[i]->buf_addr;
127 
128 				wq[i].addr = rte_cpu_to_be_64((uintptr_t)buf_addr +
129 							      RTE_PKTMBUF_HEADROOM);
130 			}
131 		}
132 		rxq->rq_ci += n;
133 		/* Prevent overflowing into consumed mbufs. */
134 		elts_idx = rxq->rq_ci & q_mask;
135 		for (i = 0; i < MLX5_VPMD_DESCS_PER_LOOP; ++i)
136 			(*rxq->elts)[elts_idx + i] = &rxq->fake_mbuf;
137 		rte_io_wmb();
138 		*rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci);
139 	}
140 }
141 
142 /**
143  * Replenish buffers for MPRQ RX in bulk.
144  *
145  * @param rxq
146  *   Pointer to RX queue structure.
147  */
148 static inline void
149 mlx5_rx_mprq_replenish_bulk_mbuf(struct mlx5_rxq_data *rxq)
150 {
151 	const uint16_t wqe_n = 1 << rxq->elts_n;
152 	const uint32_t strd_n = RTE_BIT32(rxq->log_strd_num);
153 	const uint32_t elts_n = wqe_n * strd_n;
154 	const uint32_t wqe_mask = elts_n - 1;
155 	uint32_t n = elts_n - (rxq->elts_ci - rxq->rq_pi);
156 	uint32_t elts_idx = rxq->elts_ci & wqe_mask;
157 	struct rte_mbuf **elts = &(*rxq->elts)[elts_idx];
158 	unsigned int i;
159 
160 	if (n >= rxq->rq_repl_thresh &&
161 	    rxq->elts_ci - rxq->rq_pi <=
162 	    rxq->rq_repl_thresh + MLX5_VPMD_RX_MAX_BURST) {
163 		MLX5_ASSERT(n >= MLX5_VPMD_RXQ_RPLNSH_THRESH(elts_n));
164 		MLX5_ASSERT(MLX5_VPMD_RXQ_RPLNSH_THRESH(elts_n) >
165 			     MLX5_VPMD_DESCS_PER_LOOP);
166 		/* Not to cross queue end. */
167 		n = RTE_MIN(n - MLX5_VPMD_DESCS_PER_LOOP, elts_n - elts_idx);
168 		/* Limit replenish number to threshold value. */
169 		n = RTE_MIN(n, rxq->rq_repl_thresh);
170 		if (rte_mempool_get_bulk(rxq->mp, (void *)elts, n) < 0) {
171 			rxq->stats.rx_nombuf += n;
172 			return;
173 		}
174 		rxq->elts_ci += n;
175 		/* Prevent overflowing into consumed mbufs. */
176 		elts_idx = rxq->elts_ci & wqe_mask;
177 		for (i = 0; i < MLX5_VPMD_DESCS_PER_LOOP; ++i)
178 			(*rxq->elts)[elts_idx + i] = &rxq->fake_mbuf;
179 	}
180 }
181 
182 /**
183  * Copy or attach MPRQ buffers to RX SW ring.
184  *
185  * @param rxq
186  *   Pointer to RX queue structure.
187  * @param pkts
188  *   Pointer to array of packets to be stored.
189  * @param pkts_n
190  *   Number of packets to be stored.
191  *
192  * @return
193  *   Number of packets successfully copied/attached (<= pkts_n).
194  */
195 static inline uint16_t
196 rxq_copy_mprq_mbuf_v(struct mlx5_rxq_data *rxq,
197 		     struct rte_mbuf **pkts, uint16_t pkts_n)
198 {
199 	const uint16_t wqe_n = 1 << rxq->elts_n;
200 	const uint16_t wqe_mask = wqe_n - 1;
201 	const uint16_t strd_sz = RTE_BIT32(rxq->log_strd_sz);
202 	const uint32_t strd_n = RTE_BIT32(rxq->log_strd_num);
203 	const uint32_t elts_n = wqe_n * strd_n;
204 	const uint32_t elts_mask = elts_n - 1;
205 	uint32_t elts_idx = rxq->rq_pi & elts_mask;
206 	struct rte_mbuf **elts = &(*rxq->elts)[elts_idx];
207 	uint32_t rq_ci = rxq->rq_ci;
208 	struct mlx5_mprq_buf *buf = (*rxq->mprq_bufs)[rq_ci & wqe_mask];
209 	uint16_t copied = 0;
210 	uint16_t i = 0;
211 
212 	for (i = 0; i < pkts_n; ++i) {
213 		uint16_t strd_cnt;
214 		enum mlx5_rqx_code rxq_code;
215 
216 		if (rxq->consumed_strd == strd_n) {
217 			/* Replace WQE if the buffer is still in use. */
218 			mprq_buf_replace(rxq, rq_ci & wqe_mask);
219 			/* Advance to the next WQE. */
220 			rxq->consumed_strd = 0;
221 			rq_ci++;
222 			buf = (*rxq->mprq_bufs)[rq_ci & wqe_mask];
223 		}
224 
225 		if (!elts[i]->pkt_len) {
226 			rxq->consumed_strd = strd_n;
227 			rte_pktmbuf_free_seg(elts[i]);
228 #ifdef MLX5_PMD_SOFT_COUNTERS
229 			rxq->stats.ipackets -= 1;
230 #endif
231 			continue;
232 		}
233 		strd_cnt = (elts[i]->pkt_len / strd_sz) +
234 			   ((elts[i]->pkt_len % strd_sz) ? 1 : 0);
235 		rxq_code = mprq_buf_to_pkt(rxq, elts[i], elts[i]->pkt_len,
236 					   buf, rxq->consumed_strd, strd_cnt);
237 		rxq->consumed_strd += strd_cnt;
238 		if (unlikely(rxq_code != MLX5_RXQ_CODE_EXIT)) {
239 			rte_pktmbuf_free_seg(elts[i]);
240 #ifdef MLX5_PMD_SOFT_COUNTERS
241 			rxq->stats.ipackets -= 1;
242 			rxq->stats.ibytes -= elts[i]->pkt_len;
243 #endif
244 			if (rxq_code == MLX5_RXQ_CODE_NOMBUF) {
245 				++rxq->stats.rx_nombuf;
246 				break;
247 			}
248 			if (rxq_code == MLX5_RXQ_CODE_DROPPED) {
249 				++rxq->stats.idropped;
250 				continue;
251 			}
252 		}
253 		pkts[copied++] = elts[i];
254 	}
255 	rxq->rq_pi += i;
256 	rxq->cq_ci += i;
257 	if (rq_ci != rxq->rq_ci) {
258 		rxq->rq_ci = rq_ci;
259 		rte_io_wmb();
260 		*rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci);
261 	}
262 	return copied;
263 }
264 
265 /**
266  * Receive burst of packets. An errored completion also consumes a mbuf, but the
267  * packet_type is set to be RTE_PTYPE_ALL_MASK. Marked mbufs should be freed
268  * before returning to application.
269  *
270  * @param rxq
271  *   Pointer to RX queue structure.
272  * @param[out] pkts
273  *   Array to store received packets.
274  * @param pkts_n
275  *   Maximum number of packets in array.
276  * @param[out] err
277  *   Pointer to a flag. Set non-zero value if pkts array has at least one error
278  *   packet to handle.
279  * @param[out] no_cq
280  *   Pointer to a boolean. Set true if no new CQE seen.
281  *
282  * @return
283  *   Number of packets received including errors (<= pkts_n).
284  */
285 static inline uint16_t
286 rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts,
287 	    uint16_t pkts_n, uint64_t *err, bool *no_cq)
288 {
289 	const uint16_t q_n = 1 << rxq->cqe_n;
290 	const uint16_t q_mask = q_n - 1;
291 	const uint16_t e_n = 1 << rxq->elts_n;
292 	const uint16_t e_mask = e_n - 1;
293 	volatile struct mlx5_cqe *cq, *next;
294 	struct rte_mbuf **elts;
295 	uint64_t comp_idx = MLX5_VPMD_DESCS_PER_LOOP;
296 	uint16_t nocmp_n = 0;
297 	uint16_t rcvd_pkt = 0;
298 	unsigned int cq_idx = rxq->cq_ci & q_mask;
299 	unsigned int elts_idx;
300 	int ret;
301 
302 	MLX5_ASSERT(rxq->sges_n == 0);
303 	MLX5_ASSERT(rxq->cqe_n == rxq->elts_n);
304 	cq = &(*rxq->cqes)[cq_idx];
305 	rte_prefetch0(cq);
306 	rte_prefetch0(cq + 1);
307 	rte_prefetch0(cq + 2);
308 	rte_prefetch0(cq + 3);
309 	pkts_n = RTE_MIN(pkts_n, MLX5_VPMD_RX_MAX_BURST);
310 	mlx5_rx_replenish_bulk_mbuf(rxq);
311 	/* See if there're unreturned mbufs from compressed CQE. */
312 	rcvd_pkt = rxq->decompressed;
313 	if (rcvd_pkt > 0) {
314 		rcvd_pkt = RTE_MIN(rcvd_pkt, pkts_n);
315 		rxq_copy_mbuf_v(&(*rxq->elts)[rxq->rq_pi & e_mask],
316 				pkts, rcvd_pkt);
317 		rxq->rq_pi += rcvd_pkt;
318 		rxq->decompressed -= rcvd_pkt;
319 		pkts += rcvd_pkt;
320 	}
321 	elts_idx = rxq->rq_pi & e_mask;
322 	elts = &(*rxq->elts)[elts_idx];
323 	/* Not to overflow pkts array. */
324 	pkts_n = RTE_ALIGN_FLOOR(pkts_n - rcvd_pkt, MLX5_VPMD_DESCS_PER_LOOP);
325 	/* Not to cross queue end. */
326 	pkts_n = RTE_MIN(pkts_n, q_n - elts_idx);
327 	pkts_n = RTE_MIN(pkts_n, q_n - cq_idx);
328 	if (!pkts_n) {
329 		*no_cq = !rcvd_pkt;
330 		return rcvd_pkt;
331 	}
332 	/* At this point, there shouldn't be any remaining packets. */
333 	MLX5_ASSERT(rxq->decompressed == 0);
334 	/* Go directly to unzipping in case the first CQE is compressed. */
335 	if (rxq->cqe_comp_layout) {
336 		ret = check_cqe_iteration(cq, rxq->cqe_n, rxq->cq_ci);
337 		if (ret == MLX5_CQE_STATUS_SW_OWN &&
338 		    (MLX5_CQE_FORMAT(cq->op_own) == MLX5_COMPRESSED)) {
339 			comp_idx = 0;
340 			goto decompress;
341 		}
342 	}
343 	/* Process all the CQEs */
344 	nocmp_n = rxq_cq_process_v(rxq, cq, elts, pkts, pkts_n, err, &comp_idx);
345 	/* If no new CQE seen, return without updating cq_db. */
346 	if (unlikely(!nocmp_n && comp_idx == MLX5_VPMD_DESCS_PER_LOOP)) {
347 		*no_cq = true;
348 		return rcvd_pkt;
349 	}
350 	/* Update the consumer indexes for non-compressed CQEs. */
351 	MLX5_ASSERT(nocmp_n <= pkts_n);
352 	rxq->cq_ci += nocmp_n;
353 	rxq->rq_pi += nocmp_n;
354 	rcvd_pkt += nocmp_n;
355 	/* Copy title packet for future compressed sessions. */
356 	if (rxq->cqe_comp_layout) {
357 		ret = check_cqe_iteration(cq, rxq->cqe_n, rxq->cq_ci);
358 		if (ret == MLX5_CQE_STATUS_SW_OWN &&
359 		    (MLX5_CQE_FORMAT(cq->op_own) != MLX5_COMPRESSED)) {
360 			next = &(*rxq->cqes)[rxq->cq_ci & q_mask];
361 			ret = check_cqe_iteration(next,	rxq->cqe_n, rxq->cq_ci);
362 			if (MLX5_CQE_FORMAT(next->op_own) == MLX5_COMPRESSED ||
363 			    ret != MLX5_CQE_STATUS_SW_OWN)
364 				rte_memcpy(&rxq->title_pkt, elts[nocmp_n - 1],
365 					   sizeof(struct rte_mbuf));
366 		}
367 	}
368 decompress:
369 	/* Decompress the last CQE if compressed. */
370 	if (comp_idx < MLX5_VPMD_DESCS_PER_LOOP) {
371 		MLX5_ASSERT(comp_idx == (nocmp_n % MLX5_VPMD_DESCS_PER_LOOP));
372 		rxq->decompressed = rxq_cq_decompress_v(rxq, &cq[nocmp_n],
373 							&elts[nocmp_n], true);
374 		rxq->cq_ci += rxq->decompressed;
375 		/* Return more packets if needed. */
376 		if (nocmp_n < pkts_n) {
377 			uint16_t n = rxq->decompressed;
378 
379 			n = RTE_MIN(n, pkts_n - nocmp_n);
380 			rxq_copy_mbuf_v(&(*rxq->elts)[rxq->rq_pi & e_mask],
381 					&pkts[nocmp_n], n);
382 			rxq->rq_pi += n;
383 			rcvd_pkt += n;
384 			rxq->decompressed -= n;
385 		}
386 	}
387 	*no_cq = !rcvd_pkt;
388 	return rcvd_pkt;
389 }
390 
391 /**
392  * DPDK callback for vectorized RX.
393  *
394  * @param dpdk_rxq
395  *   Generic pointer to RX queue structure.
396  * @param[out] pkts
397  *   Array to store received packets.
398  * @param pkts_n
399  *   Maximum number of packets in array.
400  *
401  * @return
402  *   Number of packets successfully received (<= pkts_n).
403  */
404 uint16_t
405 mlx5_rx_burst_vec(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
406 {
407 	struct mlx5_rxq_data *rxq = dpdk_rxq;
408 	uint16_t nb_rx = 0;
409 	uint16_t tn = 0;
410 	uint64_t err = 0;
411 	bool no_cq = false;
412 
413 	do {
414 		err = 0;
415 		nb_rx = rxq_burst_v(rxq, pkts + tn, pkts_n - tn,
416 				    &err, &no_cq);
417 		if (unlikely(err | rxq->err_state))
418 			nb_rx = rxq_handle_pending_error(rxq, pkts + tn, nb_rx);
419 		tn += nb_rx;
420 		if (unlikely(no_cq))
421 			break;
422 		rte_io_wmb();
423 		*rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci);
424 	} while (tn != pkts_n);
425 	return tn;
426 }
427 
428 /**
429  * Receive burst of packets. An errored completion also consumes a mbuf, but the
430  * packet_type is set to be RTE_PTYPE_ALL_MASK. Marked mbufs should be freed
431  * before returning to application.
432  *
433  * @param rxq
434  *   Pointer to RX queue structure.
435  * @param[out] pkts
436  *   Array to store received packets.
437  * @param pkts_n
438  *   Maximum number of packets in array.
439  * @param[out] err
440  *   Pointer to a flag. Set non-zero value if pkts array has at least one error
441  *   packet to handle.
442  * @param[out] no_cq
443  *   Pointer to a boolean. Set true if no new CQE seen.
444  *
445  * @return
446  *   Number of packets received including errors (<= pkts_n).
447  */
448 static inline uint16_t
449 rxq_burst_mprq_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts,
450 		 uint16_t pkts_n, uint64_t *err, bool *no_cq)
451 {
452 	const uint16_t q_n = 1 << rxq->cqe_n;
453 	const uint16_t q_mask = q_n - 1;
454 	const uint16_t wqe_n = 1 << rxq->elts_n;
455 	const uint32_t strd_n = RTE_BIT32(rxq->log_strd_num);
456 	const uint32_t elts_n = wqe_n * strd_n;
457 	const uint32_t elts_mask = elts_n - 1;
458 	volatile struct mlx5_cqe *cq, *next;
459 	struct rte_mbuf **elts;
460 	uint64_t comp_idx = MLX5_VPMD_DESCS_PER_LOOP;
461 	uint16_t nocmp_n = 0;
462 	uint16_t rcvd_pkt = 0;
463 	uint16_t cp_pkt = 0;
464 	unsigned int cq_idx = rxq->cq_ci & q_mask;
465 	unsigned int elts_idx;
466 	int ret;
467 
468 	MLX5_ASSERT(rxq->sges_n == 0);
469 	cq = &(*rxq->cqes)[cq_idx];
470 	rte_prefetch0(cq);
471 	rte_prefetch0(cq + 1);
472 	rte_prefetch0(cq + 2);
473 	rte_prefetch0(cq + 3);
474 	pkts_n = RTE_MIN(pkts_n, MLX5_VPMD_RX_MAX_BURST);
475 	mlx5_rx_mprq_replenish_bulk_mbuf(rxq);
476 	/* Not to move past the allocated mbufs. */
477 	pkts_n = RTE_MIN(pkts_n, rxq->elts_ci - rxq->rq_pi);
478 	/* See if there're unreturned mbufs from compressed CQE. */
479 	rcvd_pkt = rxq->decompressed;
480 	if (rcvd_pkt > 0) {
481 		rcvd_pkt = RTE_MIN(rcvd_pkt, pkts_n);
482 		cp_pkt = rxq_copy_mprq_mbuf_v(rxq, pkts, rcvd_pkt);
483 		rxq->decompressed -= rcvd_pkt;
484 		pkts += cp_pkt;
485 	}
486 	elts_idx = rxq->rq_pi & elts_mask;
487 	elts = &(*rxq->elts)[elts_idx];
488 	/* Not to overflow pkts array. */
489 	pkts_n = RTE_ALIGN_FLOOR(pkts_n - cp_pkt, MLX5_VPMD_DESCS_PER_LOOP);
490 	/* Not to cross queue end. */
491 	pkts_n = RTE_MIN(pkts_n, elts_n - elts_idx);
492 	pkts_n = RTE_MIN(pkts_n, q_n - cq_idx);
493 	if (!pkts_n) {
494 		*no_cq = !cp_pkt;
495 		return cp_pkt;
496 	}
497 	/* At this point, there shouldn't be any remaining packets. */
498 	MLX5_ASSERT(rxq->decompressed == 0);
499 	/* Go directly to unzipping in case the first CQE is compressed. */
500 	if (rxq->cqe_comp_layout) {
501 		ret = check_cqe_iteration(cq, rxq->cqe_n, rxq->cq_ci);
502 		if (ret == MLX5_CQE_STATUS_SW_OWN &&
503 		    (MLX5_CQE_FORMAT(cq->op_own) == MLX5_COMPRESSED)) {
504 			comp_idx = 0;
505 			goto decompress;
506 		}
507 	}
508 	/* Process all the CQEs */
509 	nocmp_n = rxq_cq_process_v(rxq, cq, elts, pkts, pkts_n, err, &comp_idx);
510 	/* If no new CQE seen, return without updating cq_db. */
511 	if (unlikely(!nocmp_n && comp_idx == MLX5_VPMD_DESCS_PER_LOOP)) {
512 		*no_cq = true;
513 		return cp_pkt;
514 	}
515 	/* Update the consumer indexes for non-compressed CQEs. */
516 	MLX5_ASSERT(nocmp_n <= pkts_n);
517 	cp_pkt = rxq_copy_mprq_mbuf_v(rxq, pkts, nocmp_n);
518 	rcvd_pkt += cp_pkt;
519 	/* Copy title packet for future compressed sessions. */
520 	if (rxq->cqe_comp_layout) {
521 		ret = check_cqe_iteration(cq, rxq->cqe_n, rxq->cq_ci);
522 		if (ret == MLX5_CQE_STATUS_SW_OWN &&
523 		    (MLX5_CQE_FORMAT(cq->op_own) != MLX5_COMPRESSED)) {
524 			next = &(*rxq->cqes)[rxq->cq_ci & q_mask];
525 			ret = check_cqe_iteration(next,	rxq->cqe_n, rxq->cq_ci);
526 			if (MLX5_CQE_FORMAT(next->op_own) == MLX5_COMPRESSED ||
527 			    ret != MLX5_CQE_STATUS_SW_OWN)
528 				rte_memcpy(&rxq->title_pkt, elts[nocmp_n - 1],
529 					   sizeof(struct rte_mbuf));
530 		}
531 	}
532 decompress:
533 	/* Decompress the last CQE if compressed. */
534 	if (comp_idx < MLX5_VPMD_DESCS_PER_LOOP) {
535 		MLX5_ASSERT(comp_idx == (nocmp_n % MLX5_VPMD_DESCS_PER_LOOP));
536 		rxq->decompressed = rxq_cq_decompress_v(rxq, &cq[nocmp_n],
537 							&elts[nocmp_n], false);
538 		/* Return more packets if needed. */
539 		if (nocmp_n < pkts_n) {
540 			uint16_t n = rxq->decompressed;
541 
542 			n = RTE_MIN(n, pkts_n - nocmp_n);
543 			cp_pkt = rxq_copy_mprq_mbuf_v(rxq, &pkts[cp_pkt], n);
544 			rcvd_pkt += cp_pkt;
545 			rxq->decompressed -= n;
546 		}
547 	}
548 	*no_cq = !rcvd_pkt;
549 	return rcvd_pkt;
550 }
551 
552 /**
553  * DPDK callback for vectorized MPRQ RX.
554  *
555  * @param dpdk_rxq
556  *   Generic pointer to RX queue structure.
557  * @param[out] pkts
558  *   Array to store received packets.
559  * @param pkts_n
560  *   Maximum number of packets in array.
561  *
562  * @return
563  *   Number of packets successfully received (<= pkts_n).
564  */
565 uint16_t
566 mlx5_rx_burst_mprq_vec(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
567 {
568 	struct mlx5_rxq_data *rxq = dpdk_rxq;
569 	uint16_t nb_rx = 0;
570 	uint16_t tn = 0;
571 	uint64_t err = 0;
572 	bool no_cq = false;
573 
574 	do {
575 		err = 0;
576 		nb_rx = rxq_burst_mprq_v(rxq, pkts + tn, pkts_n - tn,
577 					 &err, &no_cq);
578 		if (unlikely(err | rxq->err_state))
579 			nb_rx = rxq_handle_pending_error(rxq, pkts + tn, nb_rx);
580 		tn += nb_rx;
581 		if (unlikely(no_cq))
582 			break;
583 		rte_io_wmb();
584 		*rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci);
585 	} while (tn != pkts_n);
586 	return tn;
587 }
588 
589 /**
590  * Check a RX queue can support vectorized RX.
591  *
592  * @param rxq
593  *   Pointer to RX queue.
594  *
595  * @return
596  *   1 if supported, negative errno value if not.
597  */
598 int __rte_cold
599 mlx5_rxq_check_vec_support(struct mlx5_rxq_data *rxq)
600 {
601 	struct mlx5_rxq_ctrl *ctrl =
602 		container_of(rxq, struct mlx5_rxq_ctrl, rxq);
603 
604 	if (!RXQ_PORT(ctrl)->config.rx_vec_en || rxq->sges_n != 0)
605 		return -ENOTSUP;
606 	if (rxq->lro)
607 		return -ENOTSUP;
608 	return 1;
609 }
610 
611 /**
612  * Check a device can support vectorized RX.
613  *
614  * @param dev
615  *   Pointer to Ethernet device.
616  *
617  * @return
618  *   1 if supported, negative errno value if not.
619  */
620 int __rte_cold
621 mlx5_check_vec_rx_support(struct rte_eth_dev *dev)
622 {
623 	struct mlx5_priv *priv = dev->data->dev_private;
624 	uint32_t i;
625 
626 	if (rte_vect_get_max_simd_bitwidth() < RTE_VECT_SIMD_128)
627 		return -ENOTSUP;
628 	if (!priv->config.rx_vec_en)
629 		return -ENOTSUP;
630 	/* All the configured queues should support. */
631 	for (i = 0; i < priv->rxqs_n; ++i) {
632 		struct mlx5_rxq_data *rxq_data = mlx5_rxq_data_get(dev, i);
633 
634 		if (!rxq_data)
635 			continue;
636 		if (mlx5_rxq_check_vec_support(rxq_data) < 0)
637 			break;
638 	}
639 	if (i != priv->rxqs_n)
640 		return -ENOTSUP;
641 	return 1;
642 }
643