xref: /dpdk/drivers/net/mlx5/mlx5_rxtx_vec.c (revision 5c6876437768cfde8c24852277b22fce7c3535cb)
18fd92a66SOlivier Matz /* SPDX-License-Identifier: BSD-3-Clause
2f0d2114fSYongseok Koh  * Copyright 2017 6WIND S.A.
35feecc57SShahaf Shuler  * Copyright 2017 Mellanox Technologies, Ltd
4f0d2114fSYongseok Koh  */
5f0d2114fSYongseok Koh 
6f0d2114fSYongseok Koh #include <stdint.h>
7f0d2114fSYongseok Koh #include <string.h>
8f0d2114fSYongseok Koh #include <stdlib.h>
9f0d2114fSYongseok Koh 
10f0d2114fSYongseok Koh #include <rte_mbuf.h>
11f0d2114fSYongseok Koh #include <rte_mempool.h>
12f0d2114fSYongseok Koh #include <rte_prefetch.h>
132c5e0dd2SCiara Power #include <rte_vect.h>
14f0d2114fSYongseok Koh 
159d60f545SOphir Munk #include <mlx5_glue.h>
167b4f1e6bSMatan Azrad #include <mlx5_prm.h>
177b4f1e6bSMatan Azrad 
187b4f1e6bSMatan Azrad #include "mlx5_defs.h"
19f0d2114fSYongseok Koh #include "mlx5.h"
20f0d2114fSYongseok Koh #include "mlx5_utils.h"
21f0d2114fSYongseok Koh #include "mlx5_rxtx.h"
225bfc9fc1SYongseok Koh #include "mlx5_rxtx_vec.h"
23f0d2114fSYongseok Koh #include "mlx5_autoconf.h"
24f0d2114fSYongseok Koh 
25570acdb1SYongseok Koh #if defined RTE_ARCH_X86_64
263c2ddbd4SYongseok Koh #include "mlx5_rxtx_vec_sse.h"
27570acdb1SYongseok Koh #elif defined RTE_ARCH_ARM64
28570acdb1SYongseok Koh #include "mlx5_rxtx_vec_neon.h"
292e542da7SDavid Christensen #elif defined RTE_ARCH_PPC_64
302e542da7SDavid Christensen #include "mlx5_rxtx_vec_altivec.h"
313c2ddbd4SYongseok Koh #else
323c2ddbd4SYongseok Koh #error "This should not be compiled if SIMD instructions are not supported."
33f0d2114fSYongseok Koh #endif
34f0d2114fSYongseok Koh 
35f0d2114fSYongseok Koh /**
36f0d2114fSYongseok Koh  * Skip error packets.
37f0d2114fSYongseok Koh  *
38f0d2114fSYongseok Koh  * @param rxq
39f0d2114fSYongseok Koh  *   Pointer to RX queue structure.
40f0d2114fSYongseok Koh  * @param[out] pkts
41f0d2114fSYongseok Koh  *   Array to store received packets.
42f0d2114fSYongseok Koh  * @param pkts_n
43f0d2114fSYongseok Koh  *   Maximum number of packets in array.
44f0d2114fSYongseok Koh  *
45f0d2114fSYongseok Koh  * @return
46f0d2114fSYongseok Koh  *   Number of packets successfully received (<= pkts_n).
47f0d2114fSYongseok Koh  */
48f0d2114fSYongseok Koh static uint16_t
49f0d2114fSYongseok Koh rxq_handle_pending_error(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts,
50f0d2114fSYongseok Koh 			 uint16_t pkts_n)
51f0d2114fSYongseok Koh {
52f0d2114fSYongseok Koh 	uint16_t n = 0;
53f0d2114fSYongseok Koh 	unsigned int i;
54f0d2114fSYongseok Koh #ifdef MLX5_PMD_SOFT_COUNTERS
55f0d2114fSYongseok Koh 	uint32_t err_bytes = 0;
56f0d2114fSYongseok Koh #endif
57f0d2114fSYongseok Koh 
58f0d2114fSYongseok Koh 	for (i = 0; i < pkts_n; ++i) {
59f0d2114fSYongseok Koh 		struct rte_mbuf *pkt = pkts[i];
60f0d2114fSYongseok Koh 
6188c07335SMatan Azrad 		if (pkt->packet_type == RTE_PTYPE_ALL_MASK || rxq->err_state) {
62f0d2114fSYongseok Koh #ifdef MLX5_PMD_SOFT_COUNTERS
63f0d2114fSYongseok Koh 			err_bytes += PKT_LEN(pkt);
64f0d2114fSYongseok Koh #endif
65f0d2114fSYongseok Koh 			rte_pktmbuf_free_seg(pkt);
66f0d2114fSYongseok Koh 		} else {
67f0d2114fSYongseok Koh 			pkts[n++] = pkt;
68f0d2114fSYongseok Koh 		}
69f0d2114fSYongseok Koh 	}
70f0d2114fSYongseok Koh 	rxq->stats.idropped += (pkts_n - n);
71f0d2114fSYongseok Koh #ifdef MLX5_PMD_SOFT_COUNTERS
72f0d2114fSYongseok Koh 	/* Correct counters of errored completions. */
73f0d2114fSYongseok Koh 	rxq->stats.ipackets -= (pkts_n - n);
74f0d2114fSYongseok Koh 	rxq->stats.ibytes -= err_bytes;
75f0d2114fSYongseok Koh #endif
7688c07335SMatan Azrad 	mlx5_rx_err_handle(rxq, 1);
77f0d2114fSYongseok Koh 	return n;
78f0d2114fSYongseok Koh }
79f0d2114fSYongseok Koh 
80f0d2114fSYongseok Koh /**
810f20acbfSAlexander Kozyrev  * Replenish buffers for RX in bulk.
820f20acbfSAlexander Kozyrev  *
830f20acbfSAlexander Kozyrev  * @param rxq
840f20acbfSAlexander Kozyrev  *   Pointer to RX queue structure.
850f20acbfSAlexander Kozyrev  */
860f20acbfSAlexander Kozyrev static inline void
870f20acbfSAlexander Kozyrev mlx5_rx_replenish_bulk_mbuf(struct mlx5_rxq_data *rxq)
880f20acbfSAlexander Kozyrev {
890f20acbfSAlexander Kozyrev 	const uint16_t q_n = 1 << rxq->elts_n;
900f20acbfSAlexander Kozyrev 	const uint16_t q_mask = q_n - 1;
910f20acbfSAlexander Kozyrev 	uint16_t n = q_n - (rxq->rq_ci - rxq->rq_pi);
920f20acbfSAlexander Kozyrev 	uint16_t elts_idx = rxq->rq_ci & q_mask;
930f20acbfSAlexander Kozyrev 	struct rte_mbuf **elts = &(*rxq->elts)[elts_idx];
940f20acbfSAlexander Kozyrev 	volatile struct mlx5_wqe_data_seg *wq =
950f20acbfSAlexander Kozyrev 		&((volatile struct mlx5_wqe_data_seg *)rxq->wqes)[elts_idx];
960f20acbfSAlexander Kozyrev 	unsigned int i;
970f20acbfSAlexander Kozyrev 
980f20acbfSAlexander Kozyrev 	if (n >= rxq->rq_repl_thresh) {
990f20acbfSAlexander Kozyrev 		MLX5_ASSERT(n >= MLX5_VPMD_RXQ_RPLNSH_THRESH(q_n));
1000f20acbfSAlexander Kozyrev 		MLX5_ASSERT(MLX5_VPMD_RXQ_RPLNSH_THRESH(q_n) >
1010f20acbfSAlexander Kozyrev 			    MLX5_VPMD_DESCS_PER_LOOP);
1020f20acbfSAlexander Kozyrev 		/* Not to cross queue end. */
1030f20acbfSAlexander Kozyrev 		n = RTE_MIN(n - MLX5_VPMD_DESCS_PER_LOOP, q_n - elts_idx);
1040f20acbfSAlexander Kozyrev 		if (rte_mempool_get_bulk(rxq->mp, (void *)elts, n) < 0) {
1050f20acbfSAlexander Kozyrev 			rxq->stats.rx_nombuf += n;
1060f20acbfSAlexander Kozyrev 			return;
1070f20acbfSAlexander Kozyrev 		}
1080f20acbfSAlexander Kozyrev 		for (i = 0; i < n; ++i) {
1090f20acbfSAlexander Kozyrev 			void *buf_addr;
1100f20acbfSAlexander Kozyrev 
1110f20acbfSAlexander Kozyrev 			/*
1120f20acbfSAlexander Kozyrev 			 * In order to support the mbufs with external attached
1130f20acbfSAlexander Kozyrev 			 * data buffer we should use the buf_addr pointer
1140f20acbfSAlexander Kozyrev 			 * instead of rte_mbuf_buf_addr(). It touches the mbuf
1150f20acbfSAlexander Kozyrev 			 * itself and may impact the performance.
1160f20acbfSAlexander Kozyrev 			 */
1170f20acbfSAlexander Kozyrev 			buf_addr = elts[i]->buf_addr;
1180f20acbfSAlexander Kozyrev 			wq[i].addr = rte_cpu_to_be_64((uintptr_t)buf_addr +
1190f20acbfSAlexander Kozyrev 						      RTE_PKTMBUF_HEADROOM);
1200f20acbfSAlexander Kozyrev 			/* If there's a single MR, no need to replace LKey. */
1210f20acbfSAlexander Kozyrev 			if (unlikely(mlx5_mr_btree_len(&rxq->mr_ctrl.cache_bh)
1220f20acbfSAlexander Kozyrev 				     > 1))
1230f20acbfSAlexander Kozyrev 				wq[i].lkey = mlx5_rx_mb2mr(rxq, elts[i]);
1240f20acbfSAlexander Kozyrev 		}
1250f20acbfSAlexander Kozyrev 		rxq->rq_ci += n;
1260f20acbfSAlexander Kozyrev 		/* Prevent overflowing into consumed mbufs. */
1270f20acbfSAlexander Kozyrev 		elts_idx = rxq->rq_ci & q_mask;
1280f20acbfSAlexander Kozyrev 		for (i = 0; i < MLX5_VPMD_DESCS_PER_LOOP; ++i)
1290f20acbfSAlexander Kozyrev 			(*rxq->elts)[elts_idx + i] = &rxq->fake_mbuf;
1300f20acbfSAlexander Kozyrev 		rte_io_wmb();
1310f20acbfSAlexander Kozyrev 		*rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci);
1320f20acbfSAlexander Kozyrev 	}
1330f20acbfSAlexander Kozyrev }
1340f20acbfSAlexander Kozyrev 
1350f20acbfSAlexander Kozyrev /**
1360f20acbfSAlexander Kozyrev  * Replenish buffers for MPRQ RX in bulk.
1370f20acbfSAlexander Kozyrev  *
1380f20acbfSAlexander Kozyrev  * @param rxq
1390f20acbfSAlexander Kozyrev  *   Pointer to RX queue structure.
1400f20acbfSAlexander Kozyrev  */
1410f20acbfSAlexander Kozyrev static inline void
1420f20acbfSAlexander Kozyrev mlx5_rx_mprq_replenish_bulk_mbuf(struct mlx5_rxq_data *rxq)
1430f20acbfSAlexander Kozyrev {
1440f20acbfSAlexander Kozyrev 	const uint16_t wqe_n = 1 << rxq->elts_n;
1450f20acbfSAlexander Kozyrev 	const uint32_t strd_n = 1 << rxq->strd_num_n;
1460f20acbfSAlexander Kozyrev 	const uint32_t elts_n = wqe_n * strd_n;
1470f20acbfSAlexander Kozyrev 	const uint32_t wqe_mask = elts_n - 1;
148*5c687643SAlexander Kozyrev 	uint32_t n = rxq->elts_ci - rxq->rq_pi;
1490f20acbfSAlexander Kozyrev 	uint32_t elts_idx = rxq->elts_ci & wqe_mask;
1500f20acbfSAlexander Kozyrev 	struct rte_mbuf **elts = &(*rxq->elts)[elts_idx];
1510f20acbfSAlexander Kozyrev 
152*5c687643SAlexander Kozyrev 	if (n <= rxq->rq_repl_thresh) {
153*5c687643SAlexander Kozyrev 		MLX5_ASSERT(n + MLX5_VPMD_RX_MAX_BURST >=
154*5c687643SAlexander Kozyrev 			    MLX5_VPMD_RXQ_RPLNSH_THRESH(elts_n));
1550f20acbfSAlexander Kozyrev 		MLX5_ASSERT(MLX5_VPMD_RXQ_RPLNSH_THRESH(elts_n) >
1560f20acbfSAlexander Kozyrev 			     MLX5_VPMD_DESCS_PER_LOOP);
157*5c687643SAlexander Kozyrev 		/* Not to cross queue end. */
158*5c687643SAlexander Kozyrev 		n = RTE_MIN(n + MLX5_VPMD_RX_MAX_BURST, elts_n - elts_idx);
1590f20acbfSAlexander Kozyrev 		if (rte_mempool_get_bulk(rxq->mp, (void *)elts, n) < 0) {
1600f20acbfSAlexander Kozyrev 			rxq->stats.rx_nombuf += n;
1610f20acbfSAlexander Kozyrev 			return;
1620f20acbfSAlexander Kozyrev 		}
1630f20acbfSAlexander Kozyrev 		rxq->elts_ci += n;
1640f20acbfSAlexander Kozyrev 	}
1650f20acbfSAlexander Kozyrev }
1660f20acbfSAlexander Kozyrev 
1670f20acbfSAlexander Kozyrev /**
1680f20acbfSAlexander Kozyrev  * Copy or attach MPRQ buffers to RX SW ring.
1690f20acbfSAlexander Kozyrev  *
1700f20acbfSAlexander Kozyrev  * @param rxq
1710f20acbfSAlexander Kozyrev  *   Pointer to RX queue structure.
1720f20acbfSAlexander Kozyrev  * @param pkts
1730f20acbfSAlexander Kozyrev  *   Pointer to array of packets to be stored.
1740f20acbfSAlexander Kozyrev  * @param pkts_n
1750f20acbfSAlexander Kozyrev  *   Number of packets to be stored.
1760f20acbfSAlexander Kozyrev  *
1770f20acbfSAlexander Kozyrev  * @return
1780f20acbfSAlexander Kozyrev  *   Number of packets successfully copied/attached (<= pkts_n).
1790f20acbfSAlexander Kozyrev  */
1800f20acbfSAlexander Kozyrev static inline uint16_t
1810f20acbfSAlexander Kozyrev rxq_copy_mprq_mbuf_v(struct mlx5_rxq_data *rxq,
1820f20acbfSAlexander Kozyrev 		     struct rte_mbuf **pkts, uint16_t pkts_n)
1830f20acbfSAlexander Kozyrev {
1840f20acbfSAlexander Kozyrev 	const uint16_t wqe_n = 1 << rxq->elts_n;
1850f20acbfSAlexander Kozyrev 	const uint16_t wqe_mask = wqe_n - 1;
1860f20acbfSAlexander Kozyrev 	const uint16_t strd_sz = 1 << rxq->strd_sz_n;
1870f20acbfSAlexander Kozyrev 	const uint32_t strd_n = 1 << rxq->strd_num_n;
1880f20acbfSAlexander Kozyrev 	const uint32_t elts_n = wqe_n * strd_n;
1890f20acbfSAlexander Kozyrev 	const uint32_t elts_mask = elts_n - 1;
1900f20acbfSAlexander Kozyrev 	uint32_t elts_idx = rxq->rq_pi & elts_mask;
1910f20acbfSAlexander Kozyrev 	struct rte_mbuf **elts = &(*rxq->elts)[elts_idx];
1920f20acbfSAlexander Kozyrev 	uint32_t rq_ci = rxq->rq_ci;
1930f20acbfSAlexander Kozyrev 	struct mlx5_mprq_buf *buf = (*rxq->mprq_bufs)[rq_ci & wqe_mask];
1940f20acbfSAlexander Kozyrev 	uint16_t copied = 0;
1950f20acbfSAlexander Kozyrev 	uint16_t i = 0;
1960f20acbfSAlexander Kozyrev 
1970f20acbfSAlexander Kozyrev 	for (i = 0; i < pkts_n; ++i) {
1980f20acbfSAlexander Kozyrev 		uint16_t strd_cnt;
1990f20acbfSAlexander Kozyrev 		enum mlx5_rqx_code rxq_code;
2000f20acbfSAlexander Kozyrev 
2010f20acbfSAlexander Kozyrev 		if (rxq->consumed_strd == strd_n) {
2020f20acbfSAlexander Kozyrev 			/* Replace WQE if the buffer is still in use. */
2030f20acbfSAlexander Kozyrev 			mprq_buf_replace(rxq, rq_ci & wqe_mask);
2040f20acbfSAlexander Kozyrev 			/* Advance to the next WQE. */
2050f20acbfSAlexander Kozyrev 			rxq->consumed_strd = 0;
2060f20acbfSAlexander Kozyrev 			rq_ci++;
2070f20acbfSAlexander Kozyrev 			buf = (*rxq->mprq_bufs)[rq_ci & wqe_mask];
2080f20acbfSAlexander Kozyrev 		}
2090f20acbfSAlexander Kozyrev 
2100f20acbfSAlexander Kozyrev 		if (!elts[i]->pkt_len) {
2110f20acbfSAlexander Kozyrev 			rxq->consumed_strd = strd_n;
2120f20acbfSAlexander Kozyrev 			rte_pktmbuf_free_seg(elts[i]);
2130f20acbfSAlexander Kozyrev #ifdef MLX5_PMD_SOFT_COUNTERS
2140f20acbfSAlexander Kozyrev 			rxq->stats.ipackets -= 1;
2150f20acbfSAlexander Kozyrev #endif
2160f20acbfSAlexander Kozyrev 			continue;
2170f20acbfSAlexander Kozyrev 		}
2180f20acbfSAlexander Kozyrev 		strd_cnt = (elts[i]->pkt_len / strd_sz) +
2190f20acbfSAlexander Kozyrev 			   ((elts[i]->pkt_len % strd_sz) ? 1 : 0);
2200f20acbfSAlexander Kozyrev 		rxq_code = mprq_buf_to_pkt(rxq, elts[i], elts[i]->pkt_len,
2210f20acbfSAlexander Kozyrev 					   buf, rxq->consumed_strd, strd_cnt);
2220f20acbfSAlexander Kozyrev 		rxq->consumed_strd += strd_cnt;
2230f20acbfSAlexander Kozyrev 		if (unlikely(rxq_code != MLX5_RXQ_CODE_EXIT)) {
2240f20acbfSAlexander Kozyrev 			rte_pktmbuf_free_seg(elts[i]);
2250f20acbfSAlexander Kozyrev #ifdef MLX5_PMD_SOFT_COUNTERS
2260f20acbfSAlexander Kozyrev 			rxq->stats.ipackets -= 1;
2270f20acbfSAlexander Kozyrev 			rxq->stats.ibytes -= elts[i]->pkt_len;
2280f20acbfSAlexander Kozyrev #endif
2290f20acbfSAlexander Kozyrev 			if (rxq_code == MLX5_RXQ_CODE_NOMBUF) {
2300f20acbfSAlexander Kozyrev 				++rxq->stats.rx_nombuf;
2310f20acbfSAlexander Kozyrev 				break;
2320f20acbfSAlexander Kozyrev 			}
2330f20acbfSAlexander Kozyrev 			if (rxq_code == MLX5_RXQ_CODE_DROPPED) {
2340f20acbfSAlexander Kozyrev 				++rxq->stats.idropped;
2350f20acbfSAlexander Kozyrev 				continue;
2360f20acbfSAlexander Kozyrev 			}
2370f20acbfSAlexander Kozyrev 		}
2380f20acbfSAlexander Kozyrev 		pkts[copied++] = elts[i];
2390f20acbfSAlexander Kozyrev 	}
2400f20acbfSAlexander Kozyrev 	rxq->rq_pi += i;
2410f20acbfSAlexander Kozyrev 	rxq->cq_ci += i;
2420f20acbfSAlexander Kozyrev 	rte_io_wmb();
2430f20acbfSAlexander Kozyrev 	*rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci);
2440f20acbfSAlexander Kozyrev 	if (rq_ci != rxq->rq_ci) {
2450f20acbfSAlexander Kozyrev 		rxq->rq_ci = rq_ci;
2460f20acbfSAlexander Kozyrev 		rte_io_wmb();
2470f20acbfSAlexander Kozyrev 		*rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci);
2480f20acbfSAlexander Kozyrev 	}
2490f20acbfSAlexander Kozyrev 	return copied;
2500f20acbfSAlexander Kozyrev }
2510f20acbfSAlexander Kozyrev 
2520f20acbfSAlexander Kozyrev /**
2531ded2623SAlexander Kozyrev  * Receive burst of packets. An errored completion also consumes a mbuf, but the
2541ded2623SAlexander Kozyrev  * packet_type is set to be RTE_PTYPE_ALL_MASK. Marked mbufs should be freed
2551ded2623SAlexander Kozyrev  * before returning to application.
2561ded2623SAlexander Kozyrev  *
2571ded2623SAlexander Kozyrev  * @param rxq
2581ded2623SAlexander Kozyrev  *   Pointer to RX queue structure.
2591ded2623SAlexander Kozyrev  * @param[out] pkts
2601ded2623SAlexander Kozyrev  *   Array to store received packets.
2611ded2623SAlexander Kozyrev  * @param pkts_n
2621ded2623SAlexander Kozyrev  *   Maximum number of packets in array.
2631ded2623SAlexander Kozyrev  * @param[out] err
2641ded2623SAlexander Kozyrev  *   Pointer to a flag. Set non-zero value if pkts array has at least one error
2651ded2623SAlexander Kozyrev  *   packet to handle.
2661ded2623SAlexander Kozyrev  * @param[out] no_cq
2671ded2623SAlexander Kozyrev  *   Pointer to a boolean. Set true if no new CQE seen.
2681ded2623SAlexander Kozyrev  *
2691ded2623SAlexander Kozyrev  * @return
2701ded2623SAlexander Kozyrev  *   Number of packets received including errors (<= pkts_n).
2711ded2623SAlexander Kozyrev  */
2721ded2623SAlexander Kozyrev static inline uint16_t
2731ded2623SAlexander Kozyrev rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts,
2741ded2623SAlexander Kozyrev 	    uint16_t pkts_n, uint64_t *err, bool *no_cq)
2751ded2623SAlexander Kozyrev {
2761ded2623SAlexander Kozyrev 	const uint16_t q_n = 1 << rxq->cqe_n;
2771ded2623SAlexander Kozyrev 	const uint16_t q_mask = q_n - 1;
2781ded2623SAlexander Kozyrev 	const uint16_t e_n = 1 << rxq->elts_n;
2791ded2623SAlexander Kozyrev 	const uint16_t e_mask = e_n - 1;
2801ded2623SAlexander Kozyrev 	volatile struct mlx5_cqe *cq;
2811ded2623SAlexander Kozyrev 	struct rte_mbuf **elts;
2821ded2623SAlexander Kozyrev 	uint64_t comp_idx = MLX5_VPMD_DESCS_PER_LOOP;
2831ded2623SAlexander Kozyrev 	uint16_t nocmp_n = 0;
2841ded2623SAlexander Kozyrev 	uint16_t rcvd_pkt = 0;
2851ded2623SAlexander Kozyrev 	unsigned int cq_idx = rxq->cq_ci & q_mask;
2861ded2623SAlexander Kozyrev 	unsigned int elts_idx;
2871ded2623SAlexander Kozyrev 
2881ded2623SAlexander Kozyrev 	MLX5_ASSERT(rxq->sges_n == 0);
2891ded2623SAlexander Kozyrev 	MLX5_ASSERT(rxq->cqe_n == rxq->elts_n);
2901ded2623SAlexander Kozyrev 	cq = &(*rxq->cqes)[cq_idx];
2911ded2623SAlexander Kozyrev 	rte_prefetch0(cq);
2921ded2623SAlexander Kozyrev 	rte_prefetch0(cq + 1);
2931ded2623SAlexander Kozyrev 	rte_prefetch0(cq + 2);
2941ded2623SAlexander Kozyrev 	rte_prefetch0(cq + 3);
2951ded2623SAlexander Kozyrev 	pkts_n = RTE_MIN(pkts_n, MLX5_VPMD_RX_MAX_BURST);
2961ded2623SAlexander Kozyrev 	mlx5_rx_replenish_bulk_mbuf(rxq);
2971ded2623SAlexander Kozyrev 	/* See if there're unreturned mbufs from compressed CQE. */
2981ded2623SAlexander Kozyrev 	rcvd_pkt = rxq->decompressed;
2991ded2623SAlexander Kozyrev 	if (rcvd_pkt > 0) {
3001ded2623SAlexander Kozyrev 		rcvd_pkt = RTE_MIN(rcvd_pkt, pkts_n);
3011ded2623SAlexander Kozyrev 		rxq_copy_mbuf_v(&(*rxq->elts)[rxq->rq_pi & e_mask],
3021ded2623SAlexander Kozyrev 				pkts, rcvd_pkt);
3031ded2623SAlexander Kozyrev 		rxq->rq_pi += rcvd_pkt;
3041ded2623SAlexander Kozyrev 		rxq->decompressed -= rcvd_pkt;
3051ded2623SAlexander Kozyrev 		pkts += rcvd_pkt;
3061ded2623SAlexander Kozyrev 	}
3071ded2623SAlexander Kozyrev 	elts_idx = rxq->rq_pi & e_mask;
3081ded2623SAlexander Kozyrev 	elts = &(*rxq->elts)[elts_idx];
3091ded2623SAlexander Kozyrev 	/* Not to overflow pkts array. */
3101ded2623SAlexander Kozyrev 	pkts_n = RTE_ALIGN_FLOOR(pkts_n - rcvd_pkt, MLX5_VPMD_DESCS_PER_LOOP);
3111ded2623SAlexander Kozyrev 	/* Not to cross queue end. */
3121ded2623SAlexander Kozyrev 	pkts_n = RTE_MIN(pkts_n, q_n - elts_idx);
3131ded2623SAlexander Kozyrev 	pkts_n = RTE_MIN(pkts_n, q_n - cq_idx);
3141ded2623SAlexander Kozyrev 	if (!pkts_n) {
3151ded2623SAlexander Kozyrev 		*no_cq = !rcvd_pkt;
3161ded2623SAlexander Kozyrev 		return rcvd_pkt;
3171ded2623SAlexander Kozyrev 	}
3181ded2623SAlexander Kozyrev 	/* At this point, there shouldn't be any remaining packets. */
3191ded2623SAlexander Kozyrev 	MLX5_ASSERT(rxq->decompressed == 0);
3201ded2623SAlexander Kozyrev 	/* Process all the CQEs */
3211ded2623SAlexander Kozyrev 	nocmp_n = rxq_cq_process_v(rxq, cq, elts, pkts, pkts_n, err, &comp_idx);
3221ded2623SAlexander Kozyrev 	/* If no new CQE seen, return without updating cq_db. */
3231ded2623SAlexander Kozyrev 	if (unlikely(!nocmp_n && comp_idx == MLX5_VPMD_DESCS_PER_LOOP)) {
3241ded2623SAlexander Kozyrev 		*no_cq = true;
3251ded2623SAlexander Kozyrev 		return rcvd_pkt;
3261ded2623SAlexander Kozyrev 	}
3271ded2623SAlexander Kozyrev 	/* Update the consumer indexes for non-compressed CQEs. */
3281ded2623SAlexander Kozyrev 	MLX5_ASSERT(nocmp_n <= pkts_n);
3291ded2623SAlexander Kozyrev 	rxq->cq_ci += nocmp_n;
3301ded2623SAlexander Kozyrev 	rxq->rq_pi += nocmp_n;
3311ded2623SAlexander Kozyrev 	rcvd_pkt += nocmp_n;
3321ded2623SAlexander Kozyrev 	/* Decompress the last CQE if compressed. */
3331ded2623SAlexander Kozyrev 	if (comp_idx < MLX5_VPMD_DESCS_PER_LOOP) {
3341ded2623SAlexander Kozyrev 		MLX5_ASSERT(comp_idx == (nocmp_n % MLX5_VPMD_DESCS_PER_LOOP));
3351ded2623SAlexander Kozyrev 		rxq->decompressed = rxq_cq_decompress_v(rxq, &cq[nocmp_n],
3361ded2623SAlexander Kozyrev 							&elts[nocmp_n]);
3371ded2623SAlexander Kozyrev 		rxq->cq_ci += rxq->decompressed;
3381ded2623SAlexander Kozyrev 		/* Return more packets if needed. */
3391ded2623SAlexander Kozyrev 		if (nocmp_n < pkts_n) {
3401ded2623SAlexander Kozyrev 			uint16_t n = rxq->decompressed;
3411ded2623SAlexander Kozyrev 
3421ded2623SAlexander Kozyrev 			n = RTE_MIN(n, pkts_n - nocmp_n);
3431ded2623SAlexander Kozyrev 			rxq_copy_mbuf_v(&(*rxq->elts)[rxq->rq_pi & e_mask],
3441ded2623SAlexander Kozyrev 					&pkts[nocmp_n], n);
3451ded2623SAlexander Kozyrev 			rxq->rq_pi += n;
3461ded2623SAlexander Kozyrev 			rcvd_pkt += n;
3471ded2623SAlexander Kozyrev 			rxq->decompressed -= n;
3481ded2623SAlexander Kozyrev 		}
3491ded2623SAlexander Kozyrev 	}
3501ded2623SAlexander Kozyrev 	rte_io_wmb();
3511ded2623SAlexander Kozyrev 	*rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci);
3521ded2623SAlexander Kozyrev 	*no_cq = !rcvd_pkt;
3531ded2623SAlexander Kozyrev 	return rcvd_pkt;
3541ded2623SAlexander Kozyrev }
3551ded2623SAlexander Kozyrev 
3561ded2623SAlexander Kozyrev /**
357f0d2114fSYongseok Koh  * DPDK callback for vectorized RX.
358f0d2114fSYongseok Koh  *
359f0d2114fSYongseok Koh  * @param dpdk_rxq
360f0d2114fSYongseok Koh  *   Generic pointer to RX queue structure.
361f0d2114fSYongseok Koh  * @param[out] pkts
362f0d2114fSYongseok Koh  *   Array to store received packets.
363f0d2114fSYongseok Koh  * @param pkts_n
364f0d2114fSYongseok Koh  *   Maximum number of packets in array.
365f0d2114fSYongseok Koh  *
366f0d2114fSYongseok Koh  * @return
367f0d2114fSYongseok Koh  *   Number of packets successfully received (<= pkts_n).
368f0d2114fSYongseok Koh  */
369f0d2114fSYongseok Koh uint16_t
370f0d2114fSYongseok Koh mlx5_rx_burst_vec(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
371f0d2114fSYongseok Koh {
372f0d2114fSYongseok Koh 	struct mlx5_rxq_data *rxq = dpdk_rxq;
373c9cc554bSAlexander Kozyrev 	uint16_t nb_rx = 0;
374c9cc554bSAlexander Kozyrev 	uint16_t tn = 0;
375d27fb0deSYongseok Koh 	uint64_t err = 0;
376c9cc554bSAlexander Kozyrev 	bool no_cq = false;
377f0d2114fSYongseok Koh 
378c9cc554bSAlexander Kozyrev 	do {
3790f20acbfSAlexander Kozyrev 		nb_rx = rxq_burst_v(rxq, pkts + tn, pkts_n - tn,
3800f20acbfSAlexander Kozyrev 				    &err, &no_cq);
3810f20acbfSAlexander Kozyrev 		if (unlikely(err | rxq->err_state))
3820f20acbfSAlexander Kozyrev 			nb_rx = rxq_handle_pending_error(rxq, pkts + tn, nb_rx);
3830f20acbfSAlexander Kozyrev 		tn += nb_rx;
3840f20acbfSAlexander Kozyrev 		if (unlikely(no_cq))
3850f20acbfSAlexander Kozyrev 			break;
3860f20acbfSAlexander Kozyrev 	} while (tn != pkts_n);
3870f20acbfSAlexander Kozyrev 	return tn;
3880f20acbfSAlexander Kozyrev }
3890f20acbfSAlexander Kozyrev 
3900f20acbfSAlexander Kozyrev /**
3910f20acbfSAlexander Kozyrev  * Receive burst of packets. An errored completion also consumes a mbuf, but the
3920f20acbfSAlexander Kozyrev  * packet_type is set to be RTE_PTYPE_ALL_MASK. Marked mbufs should be freed
3930f20acbfSAlexander Kozyrev  * before returning to application.
3940f20acbfSAlexander Kozyrev  *
3950f20acbfSAlexander Kozyrev  * @param rxq
3960f20acbfSAlexander Kozyrev  *   Pointer to RX queue structure.
3970f20acbfSAlexander Kozyrev  * @param[out] pkts
3980f20acbfSAlexander Kozyrev  *   Array to store received packets.
3990f20acbfSAlexander Kozyrev  * @param pkts_n
4000f20acbfSAlexander Kozyrev  *   Maximum number of packets in array.
4010f20acbfSAlexander Kozyrev  * @param[out] err
4020f20acbfSAlexander Kozyrev  *   Pointer to a flag. Set non-zero value if pkts array has at least one error
4030f20acbfSAlexander Kozyrev  *   packet to handle.
4040f20acbfSAlexander Kozyrev  * @param[out] no_cq
4050f20acbfSAlexander Kozyrev  *   Pointer to a boolean. Set true if no new CQE seen.
4060f20acbfSAlexander Kozyrev  *
4070f20acbfSAlexander Kozyrev  * @return
4080f20acbfSAlexander Kozyrev  *   Number of packets received including errors (<= pkts_n).
4090f20acbfSAlexander Kozyrev  */
4100f20acbfSAlexander Kozyrev static inline uint16_t
4110f20acbfSAlexander Kozyrev rxq_burst_mprq_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts,
4120f20acbfSAlexander Kozyrev 		 uint16_t pkts_n, uint64_t *err, bool *no_cq)
4130f20acbfSAlexander Kozyrev {
4140f20acbfSAlexander Kozyrev 	const uint16_t q_n = 1 << rxq->cqe_n;
4150f20acbfSAlexander Kozyrev 	const uint16_t q_mask = q_n - 1;
4160f20acbfSAlexander Kozyrev 	const uint16_t wqe_n = 1 << rxq->elts_n;
4170f20acbfSAlexander Kozyrev 	const uint32_t strd_n = 1 << rxq->strd_num_n;
4180f20acbfSAlexander Kozyrev 	const uint32_t elts_n = wqe_n * strd_n;
4190f20acbfSAlexander Kozyrev 	const uint32_t elts_mask = elts_n - 1;
4200f20acbfSAlexander Kozyrev 	volatile struct mlx5_cqe *cq;
4210f20acbfSAlexander Kozyrev 	struct rte_mbuf **elts;
4220f20acbfSAlexander Kozyrev 	uint64_t comp_idx = MLX5_VPMD_DESCS_PER_LOOP;
4230f20acbfSAlexander Kozyrev 	uint16_t nocmp_n = 0;
4240f20acbfSAlexander Kozyrev 	uint16_t rcvd_pkt = 0;
4250f20acbfSAlexander Kozyrev 	uint16_t cp_pkt = 0;
4260f20acbfSAlexander Kozyrev 	unsigned int cq_idx = rxq->cq_ci & q_mask;
4270f20acbfSAlexander Kozyrev 	unsigned int elts_idx;
4280f20acbfSAlexander Kozyrev 
4290f20acbfSAlexander Kozyrev 	MLX5_ASSERT(rxq->sges_n == 0);
4300f20acbfSAlexander Kozyrev 	cq = &(*rxq->cqes)[cq_idx];
4310f20acbfSAlexander Kozyrev 	rte_prefetch0(cq);
4320f20acbfSAlexander Kozyrev 	rte_prefetch0(cq + 1);
4330f20acbfSAlexander Kozyrev 	rte_prefetch0(cq + 2);
4340f20acbfSAlexander Kozyrev 	rte_prefetch0(cq + 3);
4350f20acbfSAlexander Kozyrev 	pkts_n = RTE_MIN(pkts_n, MLX5_VPMD_RX_MAX_BURST);
4360f20acbfSAlexander Kozyrev 	mlx5_rx_mprq_replenish_bulk_mbuf(rxq);
4370f20acbfSAlexander Kozyrev 	/* See if there're unreturned mbufs from compressed CQE. */
4380f20acbfSAlexander Kozyrev 	rcvd_pkt = rxq->decompressed;
4390f20acbfSAlexander Kozyrev 	if (rcvd_pkt > 0) {
4400f20acbfSAlexander Kozyrev 		rcvd_pkt = RTE_MIN(rcvd_pkt, pkts_n);
4410f20acbfSAlexander Kozyrev 		cp_pkt = rxq_copy_mprq_mbuf_v(rxq, pkts, rcvd_pkt);
4420f20acbfSAlexander Kozyrev 		rxq->decompressed -= rcvd_pkt;
4430f20acbfSAlexander Kozyrev 		pkts += cp_pkt;
4440f20acbfSAlexander Kozyrev 	}
4450f20acbfSAlexander Kozyrev 	elts_idx = rxq->rq_pi & elts_mask;
4460f20acbfSAlexander Kozyrev 	elts = &(*rxq->elts)[elts_idx];
4470f20acbfSAlexander Kozyrev 	/* Not to overflow pkts array. */
4480f20acbfSAlexander Kozyrev 	pkts_n = RTE_ALIGN_FLOOR(pkts_n - cp_pkt, MLX5_VPMD_DESCS_PER_LOOP);
4490f20acbfSAlexander Kozyrev 	/* Not to cross queue end. */
4500f20acbfSAlexander Kozyrev 	pkts_n = RTE_MIN(pkts_n, elts_n - elts_idx);
4510f20acbfSAlexander Kozyrev 	pkts_n = RTE_MIN(pkts_n, q_n - cq_idx);
4520f20acbfSAlexander Kozyrev 	/* Not to move past the allocated mbufs. */
4530f20acbfSAlexander Kozyrev 	pkts_n = RTE_MIN(pkts_n, rxq->elts_ci - rxq->rq_pi);
4540f20acbfSAlexander Kozyrev 	if (!pkts_n) {
4550f20acbfSAlexander Kozyrev 		*no_cq = !cp_pkt;
4560f20acbfSAlexander Kozyrev 		return cp_pkt;
4570f20acbfSAlexander Kozyrev 	}
4580f20acbfSAlexander Kozyrev 	/* At this point, there shouldn't be any remaining packets. */
4590f20acbfSAlexander Kozyrev 	MLX5_ASSERT(rxq->decompressed == 0);
4600f20acbfSAlexander Kozyrev 	/* Process all the CQEs */
4610f20acbfSAlexander Kozyrev 	nocmp_n = rxq_cq_process_v(rxq, cq, elts, pkts, pkts_n, err, &comp_idx);
4620f20acbfSAlexander Kozyrev 	/* If no new CQE seen, return without updating cq_db. */
4630f20acbfSAlexander Kozyrev 	if (unlikely(!nocmp_n && comp_idx == MLX5_VPMD_DESCS_PER_LOOP)) {
4640f20acbfSAlexander Kozyrev 		*no_cq = true;
4650f20acbfSAlexander Kozyrev 		return cp_pkt;
4660f20acbfSAlexander Kozyrev 	}
4670f20acbfSAlexander Kozyrev 	/* Update the consumer indexes for non-compressed CQEs. */
4680f20acbfSAlexander Kozyrev 	MLX5_ASSERT(nocmp_n <= pkts_n);
4690f20acbfSAlexander Kozyrev 	cp_pkt = rxq_copy_mprq_mbuf_v(rxq, pkts, nocmp_n);
4700f20acbfSAlexander Kozyrev 	rcvd_pkt += cp_pkt;
4710f20acbfSAlexander Kozyrev 	/* Decompress the last CQE if compressed. */
4720f20acbfSAlexander Kozyrev 	if (comp_idx < MLX5_VPMD_DESCS_PER_LOOP) {
4730f20acbfSAlexander Kozyrev 		MLX5_ASSERT(comp_idx == (nocmp_n % MLX5_VPMD_DESCS_PER_LOOP));
4740f20acbfSAlexander Kozyrev 		rxq->decompressed = rxq_cq_decompress_v(rxq, &cq[nocmp_n],
4750f20acbfSAlexander Kozyrev 							&elts[nocmp_n]);
4760f20acbfSAlexander Kozyrev 		/* Return more packets if needed. */
4770f20acbfSAlexander Kozyrev 		if (nocmp_n < pkts_n) {
4780f20acbfSAlexander Kozyrev 			uint16_t n = rxq->decompressed;
4790f20acbfSAlexander Kozyrev 
4800f20acbfSAlexander Kozyrev 			n = RTE_MIN(n, pkts_n - nocmp_n);
4810f20acbfSAlexander Kozyrev 			cp_pkt = rxq_copy_mprq_mbuf_v(rxq, &pkts[cp_pkt], n);
4820f20acbfSAlexander Kozyrev 			rcvd_pkt += cp_pkt;
4830f20acbfSAlexander Kozyrev 			rxq->decompressed -= n;
4840f20acbfSAlexander Kozyrev 		}
4850f20acbfSAlexander Kozyrev 	}
4860f20acbfSAlexander Kozyrev 	*no_cq = !rcvd_pkt;
4870f20acbfSAlexander Kozyrev 	return rcvd_pkt;
4880f20acbfSAlexander Kozyrev }
4890f20acbfSAlexander Kozyrev 
4900f20acbfSAlexander Kozyrev /**
4910f20acbfSAlexander Kozyrev  * DPDK callback for vectorized MPRQ RX.
4920f20acbfSAlexander Kozyrev  *
4930f20acbfSAlexander Kozyrev  * @param dpdk_rxq
4940f20acbfSAlexander Kozyrev  *   Generic pointer to RX queue structure.
4950f20acbfSAlexander Kozyrev  * @param[out] pkts
4960f20acbfSAlexander Kozyrev  *   Array to store received packets.
4970f20acbfSAlexander Kozyrev  * @param pkts_n
4980f20acbfSAlexander Kozyrev  *   Maximum number of packets in array.
4990f20acbfSAlexander Kozyrev  *
5000f20acbfSAlexander Kozyrev  * @return
5010f20acbfSAlexander Kozyrev  *   Number of packets successfully received (<= pkts_n).
5020f20acbfSAlexander Kozyrev  */
5030f20acbfSAlexander Kozyrev uint16_t
5040f20acbfSAlexander Kozyrev mlx5_rx_burst_mprq_vec(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
5050f20acbfSAlexander Kozyrev {
5060f20acbfSAlexander Kozyrev 	struct mlx5_rxq_data *rxq = dpdk_rxq;
5070f20acbfSAlexander Kozyrev 	uint16_t nb_rx = 0;
5080f20acbfSAlexander Kozyrev 	uint16_t tn = 0;
5090f20acbfSAlexander Kozyrev 	uint64_t err = 0;
5100f20acbfSAlexander Kozyrev 	bool no_cq = false;
5110f20acbfSAlexander Kozyrev 
5120f20acbfSAlexander Kozyrev 	do {
5130f20acbfSAlexander Kozyrev 		nb_rx = rxq_burst_mprq_v(rxq, pkts + tn, pkts_n - tn,
5140f20acbfSAlexander Kozyrev 					 &err, &no_cq);
51588c07335SMatan Azrad 		if (unlikely(err | rxq->err_state))
516c9cc554bSAlexander Kozyrev 			nb_rx = rxq_handle_pending_error(rxq, pkts + tn, nb_rx);
517c9cc554bSAlexander Kozyrev 		tn += nb_rx;
518c9cc554bSAlexander Kozyrev 		if (unlikely(no_cq))
519c9cc554bSAlexander Kozyrev 			break;
520c9cc554bSAlexander Kozyrev 	} while (tn != pkts_n);
521c9cc554bSAlexander Kozyrev 	return tn;
522f0d2114fSYongseok Koh }
523f0d2114fSYongseok Koh 
524f0d2114fSYongseok Koh /**
525f0d2114fSYongseok Koh  * Check a RX queue can support vectorized RX.
526f0d2114fSYongseok Koh  *
527f0d2114fSYongseok Koh  * @param rxq
528f0d2114fSYongseok Koh  *   Pointer to RX queue.
529f0d2114fSYongseok Koh  *
530f0d2114fSYongseok Koh  * @return
531f0d2114fSYongseok Koh  *   1 if supported, negative errno value if not.
532f0d2114fSYongseok Koh  */
533ce6427ddSThomas Monjalon int __rte_cold
534af4f09f2SNélio Laranjeiro mlx5_rxq_check_vec_support(struct mlx5_rxq_data *rxq)
535f0d2114fSYongseok Koh {
536f0d2114fSYongseok Koh 	struct mlx5_rxq_ctrl *ctrl =
537f0d2114fSYongseok Koh 		container_of(rxq, struct mlx5_rxq_ctrl, rxq);
538f0d2114fSYongseok Koh 
5397fe24446SShahaf Shuler 	if (!ctrl->priv->config.rx_vec_en || rxq->sges_n != 0)
540f0d2114fSYongseok Koh 		return -ENOTSUP;
54117ed314cSMatan Azrad 	if (rxq->lro)
54217ed314cSMatan Azrad 		return -ENOTSUP;
543f0d2114fSYongseok Koh 	return 1;
544f0d2114fSYongseok Koh }
545f0d2114fSYongseok Koh 
546f0d2114fSYongseok Koh /**
547f0d2114fSYongseok Koh  * Check a device can support vectorized RX.
548f0d2114fSYongseok Koh  *
549af4f09f2SNélio Laranjeiro  * @param dev
550af4f09f2SNélio Laranjeiro  *   Pointer to Ethernet device.
551f0d2114fSYongseok Koh  *
552f0d2114fSYongseok Koh  * @return
553f0d2114fSYongseok Koh  *   1 if supported, negative errno value if not.
554f0d2114fSYongseok Koh  */
555ce6427ddSThomas Monjalon int __rte_cold
556af4f09f2SNélio Laranjeiro mlx5_check_vec_rx_support(struct rte_eth_dev *dev)
557f0d2114fSYongseok Koh {
558dbeba4cfSThomas Monjalon 	struct mlx5_priv *priv = dev->data->dev_private;
5590f006468SMichael Baum 	uint32_t i;
560f0d2114fSYongseok Koh 
5612c5e0dd2SCiara Power 	if (rte_vect_get_max_simd_bitwidth() < RTE_VECT_SIMD_128)
5622c5e0dd2SCiara Power 		return -ENOTSUP;
5637fe24446SShahaf Shuler 	if (!priv->config.rx_vec_en)
564f0d2114fSYongseok Koh 		return -ENOTSUP;
565f0d2114fSYongseok Koh 	/* All the configured queues should support. */
566f0d2114fSYongseok Koh 	for (i = 0; i < priv->rxqs_n; ++i) {
567f0d2114fSYongseok Koh 		struct mlx5_rxq_data *rxq = (*priv->rxqs)[i];
568f0d2114fSYongseok Koh 
569f0d2114fSYongseok Koh 		if (!rxq)
570f0d2114fSYongseok Koh 			continue;
571af4f09f2SNélio Laranjeiro 		if (mlx5_rxq_check_vec_support(rxq) < 0)
572f0d2114fSYongseok Koh 			break;
573f0d2114fSYongseok Koh 	}
574f0d2114fSYongseok Koh 	if (i != priv->rxqs_n)
575f0d2114fSYongseok Koh 		return -ENOTSUP;
576f0d2114fSYongseok Koh 	return 1;
577f0d2114fSYongseok Koh }
578