xref: /dpdk/drivers/net/mlx5/mlx5_rxtx_vec.c (revision 5fc2e5c27d69bcebb352d17603a1d3ca2628f17b)
18fd92a66SOlivier Matz /* SPDX-License-Identifier: BSD-3-Clause
2f0d2114fSYongseok Koh  * Copyright 2017 6WIND S.A.
35feecc57SShahaf Shuler  * Copyright 2017 Mellanox Technologies, Ltd
4f0d2114fSYongseok Koh  */
5f0d2114fSYongseok Koh 
6f0d2114fSYongseok Koh #include <stdint.h>
7f0d2114fSYongseok Koh #include <string.h>
8f0d2114fSYongseok Koh #include <stdlib.h>
9f0d2114fSYongseok Koh 
10f0d2114fSYongseok Koh #include <rte_mbuf.h>
11f0d2114fSYongseok Koh #include <rte_mempool.h>
12f0d2114fSYongseok Koh #include <rte_prefetch.h>
132c5e0dd2SCiara Power #include <rte_vect.h>
14f0d2114fSYongseok Koh 
159d60f545SOphir Munk #include <mlx5_glue.h>
167b4f1e6bSMatan Azrad #include <mlx5_prm.h>
177b4f1e6bSMatan Azrad 
187b4f1e6bSMatan Azrad #include "mlx5_defs.h"
19f0d2114fSYongseok Koh #include "mlx5.h"
20f0d2114fSYongseok Koh #include "mlx5_utils.h"
21f0d2114fSYongseok Koh #include "mlx5_rxtx.h"
225bfc9fc1SYongseok Koh #include "mlx5_rxtx_vec.h"
23f0d2114fSYongseok Koh #include "mlx5_autoconf.h"
24f0d2114fSYongseok Koh 
25570acdb1SYongseok Koh #if defined RTE_ARCH_X86_64
263c2ddbd4SYongseok Koh #include "mlx5_rxtx_vec_sse.h"
27570acdb1SYongseok Koh #elif defined RTE_ARCH_ARM64
28570acdb1SYongseok Koh #include "mlx5_rxtx_vec_neon.h"
292e542da7SDavid Christensen #elif defined RTE_ARCH_PPC_64
302e542da7SDavid Christensen #include "mlx5_rxtx_vec_altivec.h"
313c2ddbd4SYongseok Koh #else
323c2ddbd4SYongseok Koh #error "This should not be compiled if SIMD instructions are not supported."
33f0d2114fSYongseok Koh #endif
34f0d2114fSYongseok Koh 
35f0d2114fSYongseok Koh /**
36f0d2114fSYongseok Koh  * Skip error packets.
37f0d2114fSYongseok Koh  *
38f0d2114fSYongseok Koh  * @param rxq
39f0d2114fSYongseok Koh  *   Pointer to RX queue structure.
40f0d2114fSYongseok Koh  * @param[out] pkts
41f0d2114fSYongseok Koh  *   Array to store received packets.
42f0d2114fSYongseok Koh  * @param pkts_n
43f0d2114fSYongseok Koh  *   Maximum number of packets in array.
44f0d2114fSYongseok Koh  *
45f0d2114fSYongseok Koh  * @return
46f0d2114fSYongseok Koh  *   Number of packets successfully received (<= pkts_n).
47f0d2114fSYongseok Koh  */
48f0d2114fSYongseok Koh static uint16_t
49f0d2114fSYongseok Koh rxq_handle_pending_error(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts,
50f0d2114fSYongseok Koh 			 uint16_t pkts_n)
51f0d2114fSYongseok Koh {
52f0d2114fSYongseok Koh 	uint16_t n = 0;
53f0d2114fSYongseok Koh 	unsigned int i;
54f0d2114fSYongseok Koh #ifdef MLX5_PMD_SOFT_COUNTERS
55f0d2114fSYongseok Koh 	uint32_t err_bytes = 0;
56f0d2114fSYongseok Koh #endif
57f0d2114fSYongseok Koh 
58f0d2114fSYongseok Koh 	for (i = 0; i < pkts_n; ++i) {
59f0d2114fSYongseok Koh 		struct rte_mbuf *pkt = pkts[i];
60f0d2114fSYongseok Koh 
6188c07335SMatan Azrad 		if (pkt->packet_type == RTE_PTYPE_ALL_MASK || rxq->err_state) {
62f0d2114fSYongseok Koh #ifdef MLX5_PMD_SOFT_COUNTERS
63f0d2114fSYongseok Koh 			err_bytes += PKT_LEN(pkt);
64f0d2114fSYongseok Koh #endif
65f0d2114fSYongseok Koh 			rte_pktmbuf_free_seg(pkt);
66f0d2114fSYongseok Koh 		} else {
67f0d2114fSYongseok Koh 			pkts[n++] = pkt;
68f0d2114fSYongseok Koh 		}
69f0d2114fSYongseok Koh 	}
70f0d2114fSYongseok Koh 	rxq->stats.idropped += (pkts_n - n);
71f0d2114fSYongseok Koh #ifdef MLX5_PMD_SOFT_COUNTERS
72f0d2114fSYongseok Koh 	/* Correct counters of errored completions. */
73f0d2114fSYongseok Koh 	rxq->stats.ipackets -= (pkts_n - n);
74f0d2114fSYongseok Koh 	rxq->stats.ibytes -= err_bytes;
75f0d2114fSYongseok Koh #endif
7688c07335SMatan Azrad 	mlx5_rx_err_handle(rxq, 1);
77f0d2114fSYongseok Koh 	return n;
78f0d2114fSYongseok Koh }
79f0d2114fSYongseok Koh 
80f0d2114fSYongseok Koh /**
810f20acbfSAlexander Kozyrev  * Replenish buffers for RX in bulk.
820f20acbfSAlexander Kozyrev  *
830f20acbfSAlexander Kozyrev  * @param rxq
840f20acbfSAlexander Kozyrev  *   Pointer to RX queue structure.
850f20acbfSAlexander Kozyrev  */
860f20acbfSAlexander Kozyrev static inline void
870f20acbfSAlexander Kozyrev mlx5_rx_replenish_bulk_mbuf(struct mlx5_rxq_data *rxq)
880f20acbfSAlexander Kozyrev {
890f20acbfSAlexander Kozyrev 	const uint16_t q_n = 1 << rxq->elts_n;
900f20acbfSAlexander Kozyrev 	const uint16_t q_mask = q_n - 1;
910f20acbfSAlexander Kozyrev 	uint16_t n = q_n - (rxq->rq_ci - rxq->rq_pi);
920f20acbfSAlexander Kozyrev 	uint16_t elts_idx = rxq->rq_ci & q_mask;
930f20acbfSAlexander Kozyrev 	struct rte_mbuf **elts = &(*rxq->elts)[elts_idx];
940f20acbfSAlexander Kozyrev 	volatile struct mlx5_wqe_data_seg *wq =
950f20acbfSAlexander Kozyrev 		&((volatile struct mlx5_wqe_data_seg *)rxq->wqes)[elts_idx];
960f20acbfSAlexander Kozyrev 	unsigned int i;
970f20acbfSAlexander Kozyrev 
980f20acbfSAlexander Kozyrev 	if (n >= rxq->rq_repl_thresh) {
990f20acbfSAlexander Kozyrev 		MLX5_ASSERT(n >= MLX5_VPMD_RXQ_RPLNSH_THRESH(q_n));
1000f20acbfSAlexander Kozyrev 		MLX5_ASSERT(MLX5_VPMD_RXQ_RPLNSH_THRESH(q_n) >
1010f20acbfSAlexander Kozyrev 			    MLX5_VPMD_DESCS_PER_LOOP);
1020f20acbfSAlexander Kozyrev 		/* Not to cross queue end. */
1030f20acbfSAlexander Kozyrev 		n = RTE_MIN(n - MLX5_VPMD_DESCS_PER_LOOP, q_n - elts_idx);
1040f20acbfSAlexander Kozyrev 		if (rte_mempool_get_bulk(rxq->mp, (void *)elts, n) < 0) {
1050f20acbfSAlexander Kozyrev 			rxq->stats.rx_nombuf += n;
1060f20acbfSAlexander Kozyrev 			return;
1070f20acbfSAlexander Kozyrev 		}
1080f20acbfSAlexander Kozyrev 		for (i = 0; i < n; ++i) {
1090f20acbfSAlexander Kozyrev 			void *buf_addr;
1100f20acbfSAlexander Kozyrev 
1110f20acbfSAlexander Kozyrev 			/*
1120f20acbfSAlexander Kozyrev 			 * In order to support the mbufs with external attached
1130f20acbfSAlexander Kozyrev 			 * data buffer we should use the buf_addr pointer
1140f20acbfSAlexander Kozyrev 			 * instead of rte_mbuf_buf_addr(). It touches the mbuf
1150f20acbfSAlexander Kozyrev 			 * itself and may impact the performance.
1160f20acbfSAlexander Kozyrev 			 */
1170f20acbfSAlexander Kozyrev 			buf_addr = elts[i]->buf_addr;
1180f20acbfSAlexander Kozyrev 			wq[i].addr = rte_cpu_to_be_64((uintptr_t)buf_addr +
1190f20acbfSAlexander Kozyrev 						      RTE_PKTMBUF_HEADROOM);
1200f20acbfSAlexander Kozyrev 			/* If there's a single MR, no need to replace LKey. */
1210f20acbfSAlexander Kozyrev 			if (unlikely(mlx5_mr_btree_len(&rxq->mr_ctrl.cache_bh)
1220f20acbfSAlexander Kozyrev 				     > 1))
1230f20acbfSAlexander Kozyrev 				wq[i].lkey = mlx5_rx_mb2mr(rxq, elts[i]);
1240f20acbfSAlexander Kozyrev 		}
1250f20acbfSAlexander Kozyrev 		rxq->rq_ci += n;
1260f20acbfSAlexander Kozyrev 		/* Prevent overflowing into consumed mbufs. */
1270f20acbfSAlexander Kozyrev 		elts_idx = rxq->rq_ci & q_mask;
1280f20acbfSAlexander Kozyrev 		for (i = 0; i < MLX5_VPMD_DESCS_PER_LOOP; ++i)
1290f20acbfSAlexander Kozyrev 			(*rxq->elts)[elts_idx + i] = &rxq->fake_mbuf;
1300f20acbfSAlexander Kozyrev 		rte_io_wmb();
1310f20acbfSAlexander Kozyrev 		*rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci);
1320f20acbfSAlexander Kozyrev 	}
1330f20acbfSAlexander Kozyrev }
1340f20acbfSAlexander Kozyrev 
1350f20acbfSAlexander Kozyrev /**
1360f20acbfSAlexander Kozyrev  * Replenish buffers for MPRQ RX in bulk.
1370f20acbfSAlexander Kozyrev  *
1380f20acbfSAlexander Kozyrev  * @param rxq
1390f20acbfSAlexander Kozyrev  *   Pointer to RX queue structure.
1400f20acbfSAlexander Kozyrev  */
1410f20acbfSAlexander Kozyrev static inline void
1420f20acbfSAlexander Kozyrev mlx5_rx_mprq_replenish_bulk_mbuf(struct mlx5_rxq_data *rxq)
1430f20acbfSAlexander Kozyrev {
1440f20acbfSAlexander Kozyrev 	const uint16_t wqe_n = 1 << rxq->elts_n;
1450f20acbfSAlexander Kozyrev 	const uint32_t strd_n = 1 << rxq->strd_num_n;
1460f20acbfSAlexander Kozyrev 	const uint32_t elts_n = wqe_n * strd_n;
1470f20acbfSAlexander Kozyrev 	const uint32_t wqe_mask = elts_n - 1;
148*5fc2e5c2SAlexander Kozyrev 	uint32_t n = elts_n - (rxq->elts_ci - rxq->rq_pi);
1490f20acbfSAlexander Kozyrev 	uint32_t elts_idx = rxq->elts_ci & wqe_mask;
1500f20acbfSAlexander Kozyrev 	struct rte_mbuf **elts = &(*rxq->elts)[elts_idx];
151*5fc2e5c2SAlexander Kozyrev 	unsigned int i;
1520f20acbfSAlexander Kozyrev 
153*5fc2e5c2SAlexander Kozyrev 	if (n >= rxq->rq_repl_thresh &&
154*5fc2e5c2SAlexander Kozyrev 	    rxq->elts_ci - rxq->rq_pi <= rxq->rq_repl_thresh) {
155*5fc2e5c2SAlexander Kozyrev 		MLX5_ASSERT(n >= MLX5_VPMD_RXQ_RPLNSH_THRESH(elts_n));
1560f20acbfSAlexander Kozyrev 		MLX5_ASSERT(MLX5_VPMD_RXQ_RPLNSH_THRESH(elts_n) >
1570f20acbfSAlexander Kozyrev 			     MLX5_VPMD_DESCS_PER_LOOP);
1585c687643SAlexander Kozyrev 		/* Not to cross queue end. */
159*5fc2e5c2SAlexander Kozyrev 		n = RTE_MIN(n - MLX5_VPMD_DESCS_PER_LOOP, elts_n - elts_idx);
160*5fc2e5c2SAlexander Kozyrev 		/* Limit replenish number to threshold value. */
161*5fc2e5c2SAlexander Kozyrev 		n = RTE_MIN(n, rxq->rq_repl_thresh);
1620f20acbfSAlexander Kozyrev 		if (rte_mempool_get_bulk(rxq->mp, (void *)elts, n) < 0) {
1630f20acbfSAlexander Kozyrev 			rxq->stats.rx_nombuf += n;
1640f20acbfSAlexander Kozyrev 			return;
1650f20acbfSAlexander Kozyrev 		}
1660f20acbfSAlexander Kozyrev 		rxq->elts_ci += n;
167*5fc2e5c2SAlexander Kozyrev 		/* Prevent overflowing into consumed mbufs. */
168*5fc2e5c2SAlexander Kozyrev 		elts_idx = rxq->elts_ci & wqe_mask;
169*5fc2e5c2SAlexander Kozyrev 		for (i = 0; i < MLX5_VPMD_DESCS_PER_LOOP; ++i)
170*5fc2e5c2SAlexander Kozyrev 			(*rxq->elts)[elts_idx + i] = &rxq->fake_mbuf;
1710f20acbfSAlexander Kozyrev 	}
1720f20acbfSAlexander Kozyrev }
1730f20acbfSAlexander Kozyrev 
1740f20acbfSAlexander Kozyrev /**
1750f20acbfSAlexander Kozyrev  * Copy or attach MPRQ buffers to RX SW ring.
1760f20acbfSAlexander Kozyrev  *
1770f20acbfSAlexander Kozyrev  * @param rxq
1780f20acbfSAlexander Kozyrev  *   Pointer to RX queue structure.
1790f20acbfSAlexander Kozyrev  * @param pkts
1800f20acbfSAlexander Kozyrev  *   Pointer to array of packets to be stored.
1810f20acbfSAlexander Kozyrev  * @param pkts_n
1820f20acbfSAlexander Kozyrev  *   Number of packets to be stored.
1830f20acbfSAlexander Kozyrev  *
1840f20acbfSAlexander Kozyrev  * @return
1850f20acbfSAlexander Kozyrev  *   Number of packets successfully copied/attached (<= pkts_n).
1860f20acbfSAlexander Kozyrev  */
1870f20acbfSAlexander Kozyrev static inline uint16_t
1880f20acbfSAlexander Kozyrev rxq_copy_mprq_mbuf_v(struct mlx5_rxq_data *rxq,
1890f20acbfSAlexander Kozyrev 		     struct rte_mbuf **pkts, uint16_t pkts_n)
1900f20acbfSAlexander Kozyrev {
1910f20acbfSAlexander Kozyrev 	const uint16_t wqe_n = 1 << rxq->elts_n;
1920f20acbfSAlexander Kozyrev 	const uint16_t wqe_mask = wqe_n - 1;
1930f20acbfSAlexander Kozyrev 	const uint16_t strd_sz = 1 << rxq->strd_sz_n;
1940f20acbfSAlexander Kozyrev 	const uint32_t strd_n = 1 << rxq->strd_num_n;
1950f20acbfSAlexander Kozyrev 	const uint32_t elts_n = wqe_n * strd_n;
1960f20acbfSAlexander Kozyrev 	const uint32_t elts_mask = elts_n - 1;
1970f20acbfSAlexander Kozyrev 	uint32_t elts_idx = rxq->rq_pi & elts_mask;
1980f20acbfSAlexander Kozyrev 	struct rte_mbuf **elts = &(*rxq->elts)[elts_idx];
1990f20acbfSAlexander Kozyrev 	uint32_t rq_ci = rxq->rq_ci;
2000f20acbfSAlexander Kozyrev 	struct mlx5_mprq_buf *buf = (*rxq->mprq_bufs)[rq_ci & wqe_mask];
2010f20acbfSAlexander Kozyrev 	uint16_t copied = 0;
2020f20acbfSAlexander Kozyrev 	uint16_t i = 0;
2030f20acbfSAlexander Kozyrev 
2040f20acbfSAlexander Kozyrev 	for (i = 0; i < pkts_n; ++i) {
2050f20acbfSAlexander Kozyrev 		uint16_t strd_cnt;
2060f20acbfSAlexander Kozyrev 		enum mlx5_rqx_code rxq_code;
2070f20acbfSAlexander Kozyrev 
2080f20acbfSAlexander Kozyrev 		if (rxq->consumed_strd == strd_n) {
2090f20acbfSAlexander Kozyrev 			/* Replace WQE if the buffer is still in use. */
2100f20acbfSAlexander Kozyrev 			mprq_buf_replace(rxq, rq_ci & wqe_mask);
2110f20acbfSAlexander Kozyrev 			/* Advance to the next WQE. */
2120f20acbfSAlexander Kozyrev 			rxq->consumed_strd = 0;
2130f20acbfSAlexander Kozyrev 			rq_ci++;
2140f20acbfSAlexander Kozyrev 			buf = (*rxq->mprq_bufs)[rq_ci & wqe_mask];
2150f20acbfSAlexander Kozyrev 		}
2160f20acbfSAlexander Kozyrev 
2170f20acbfSAlexander Kozyrev 		if (!elts[i]->pkt_len) {
2180f20acbfSAlexander Kozyrev 			rxq->consumed_strd = strd_n;
2190f20acbfSAlexander Kozyrev 			rte_pktmbuf_free_seg(elts[i]);
2200f20acbfSAlexander Kozyrev #ifdef MLX5_PMD_SOFT_COUNTERS
2210f20acbfSAlexander Kozyrev 			rxq->stats.ipackets -= 1;
2220f20acbfSAlexander Kozyrev #endif
2230f20acbfSAlexander Kozyrev 			continue;
2240f20acbfSAlexander Kozyrev 		}
2250f20acbfSAlexander Kozyrev 		strd_cnt = (elts[i]->pkt_len / strd_sz) +
2260f20acbfSAlexander Kozyrev 			   ((elts[i]->pkt_len % strd_sz) ? 1 : 0);
2270f20acbfSAlexander Kozyrev 		rxq_code = mprq_buf_to_pkt(rxq, elts[i], elts[i]->pkt_len,
2280f20acbfSAlexander Kozyrev 					   buf, rxq->consumed_strd, strd_cnt);
2290f20acbfSAlexander Kozyrev 		rxq->consumed_strd += strd_cnt;
2300f20acbfSAlexander Kozyrev 		if (unlikely(rxq_code != MLX5_RXQ_CODE_EXIT)) {
2310f20acbfSAlexander Kozyrev 			rte_pktmbuf_free_seg(elts[i]);
2320f20acbfSAlexander Kozyrev #ifdef MLX5_PMD_SOFT_COUNTERS
2330f20acbfSAlexander Kozyrev 			rxq->stats.ipackets -= 1;
2340f20acbfSAlexander Kozyrev 			rxq->stats.ibytes -= elts[i]->pkt_len;
2350f20acbfSAlexander Kozyrev #endif
2360f20acbfSAlexander Kozyrev 			if (rxq_code == MLX5_RXQ_CODE_NOMBUF) {
2370f20acbfSAlexander Kozyrev 				++rxq->stats.rx_nombuf;
2380f20acbfSAlexander Kozyrev 				break;
2390f20acbfSAlexander Kozyrev 			}
2400f20acbfSAlexander Kozyrev 			if (rxq_code == MLX5_RXQ_CODE_DROPPED) {
2410f20acbfSAlexander Kozyrev 				++rxq->stats.idropped;
2420f20acbfSAlexander Kozyrev 				continue;
2430f20acbfSAlexander Kozyrev 			}
2440f20acbfSAlexander Kozyrev 		}
2450f20acbfSAlexander Kozyrev 		pkts[copied++] = elts[i];
2460f20acbfSAlexander Kozyrev 	}
2470f20acbfSAlexander Kozyrev 	rxq->rq_pi += i;
2480f20acbfSAlexander Kozyrev 	rxq->cq_ci += i;
2490f20acbfSAlexander Kozyrev 	rte_io_wmb();
2500f20acbfSAlexander Kozyrev 	*rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci);
2510f20acbfSAlexander Kozyrev 	if (rq_ci != rxq->rq_ci) {
2520f20acbfSAlexander Kozyrev 		rxq->rq_ci = rq_ci;
2530f20acbfSAlexander Kozyrev 		rte_io_wmb();
2540f20acbfSAlexander Kozyrev 		*rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci);
2550f20acbfSAlexander Kozyrev 	}
2560f20acbfSAlexander Kozyrev 	return copied;
2570f20acbfSAlexander Kozyrev }
2580f20acbfSAlexander Kozyrev 
2590f20acbfSAlexander Kozyrev /**
2601ded2623SAlexander Kozyrev  * Receive burst of packets. An errored completion also consumes a mbuf, but the
2611ded2623SAlexander Kozyrev  * packet_type is set to be RTE_PTYPE_ALL_MASK. Marked mbufs should be freed
2621ded2623SAlexander Kozyrev  * before returning to application.
2631ded2623SAlexander Kozyrev  *
2641ded2623SAlexander Kozyrev  * @param rxq
2651ded2623SAlexander Kozyrev  *   Pointer to RX queue structure.
2661ded2623SAlexander Kozyrev  * @param[out] pkts
2671ded2623SAlexander Kozyrev  *   Array to store received packets.
2681ded2623SAlexander Kozyrev  * @param pkts_n
2691ded2623SAlexander Kozyrev  *   Maximum number of packets in array.
2701ded2623SAlexander Kozyrev  * @param[out] err
2711ded2623SAlexander Kozyrev  *   Pointer to a flag. Set non-zero value if pkts array has at least one error
2721ded2623SAlexander Kozyrev  *   packet to handle.
2731ded2623SAlexander Kozyrev  * @param[out] no_cq
2741ded2623SAlexander Kozyrev  *   Pointer to a boolean. Set true if no new CQE seen.
2751ded2623SAlexander Kozyrev  *
2761ded2623SAlexander Kozyrev  * @return
2771ded2623SAlexander Kozyrev  *   Number of packets received including errors (<= pkts_n).
2781ded2623SAlexander Kozyrev  */
2791ded2623SAlexander Kozyrev static inline uint16_t
2801ded2623SAlexander Kozyrev rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts,
2811ded2623SAlexander Kozyrev 	    uint16_t pkts_n, uint64_t *err, bool *no_cq)
2821ded2623SAlexander Kozyrev {
2831ded2623SAlexander Kozyrev 	const uint16_t q_n = 1 << rxq->cqe_n;
2841ded2623SAlexander Kozyrev 	const uint16_t q_mask = q_n - 1;
2851ded2623SAlexander Kozyrev 	const uint16_t e_n = 1 << rxq->elts_n;
2861ded2623SAlexander Kozyrev 	const uint16_t e_mask = e_n - 1;
2871ded2623SAlexander Kozyrev 	volatile struct mlx5_cqe *cq;
2881ded2623SAlexander Kozyrev 	struct rte_mbuf **elts;
2891ded2623SAlexander Kozyrev 	uint64_t comp_idx = MLX5_VPMD_DESCS_PER_LOOP;
2901ded2623SAlexander Kozyrev 	uint16_t nocmp_n = 0;
2911ded2623SAlexander Kozyrev 	uint16_t rcvd_pkt = 0;
2921ded2623SAlexander Kozyrev 	unsigned int cq_idx = rxq->cq_ci & q_mask;
2931ded2623SAlexander Kozyrev 	unsigned int elts_idx;
2941ded2623SAlexander Kozyrev 
2951ded2623SAlexander Kozyrev 	MLX5_ASSERT(rxq->sges_n == 0);
2961ded2623SAlexander Kozyrev 	MLX5_ASSERT(rxq->cqe_n == rxq->elts_n);
2971ded2623SAlexander Kozyrev 	cq = &(*rxq->cqes)[cq_idx];
2981ded2623SAlexander Kozyrev 	rte_prefetch0(cq);
2991ded2623SAlexander Kozyrev 	rte_prefetch0(cq + 1);
3001ded2623SAlexander Kozyrev 	rte_prefetch0(cq + 2);
3011ded2623SAlexander Kozyrev 	rte_prefetch0(cq + 3);
3021ded2623SAlexander Kozyrev 	pkts_n = RTE_MIN(pkts_n, MLX5_VPMD_RX_MAX_BURST);
3031ded2623SAlexander Kozyrev 	mlx5_rx_replenish_bulk_mbuf(rxq);
3041ded2623SAlexander Kozyrev 	/* See if there're unreturned mbufs from compressed CQE. */
3051ded2623SAlexander Kozyrev 	rcvd_pkt = rxq->decompressed;
3061ded2623SAlexander Kozyrev 	if (rcvd_pkt > 0) {
3071ded2623SAlexander Kozyrev 		rcvd_pkt = RTE_MIN(rcvd_pkt, pkts_n);
3081ded2623SAlexander Kozyrev 		rxq_copy_mbuf_v(&(*rxq->elts)[rxq->rq_pi & e_mask],
3091ded2623SAlexander Kozyrev 				pkts, rcvd_pkt);
3101ded2623SAlexander Kozyrev 		rxq->rq_pi += rcvd_pkt;
3111ded2623SAlexander Kozyrev 		rxq->decompressed -= rcvd_pkt;
3121ded2623SAlexander Kozyrev 		pkts += rcvd_pkt;
3131ded2623SAlexander Kozyrev 	}
3141ded2623SAlexander Kozyrev 	elts_idx = rxq->rq_pi & e_mask;
3151ded2623SAlexander Kozyrev 	elts = &(*rxq->elts)[elts_idx];
3161ded2623SAlexander Kozyrev 	/* Not to overflow pkts array. */
3171ded2623SAlexander Kozyrev 	pkts_n = RTE_ALIGN_FLOOR(pkts_n - rcvd_pkt, MLX5_VPMD_DESCS_PER_LOOP);
3181ded2623SAlexander Kozyrev 	/* Not to cross queue end. */
3191ded2623SAlexander Kozyrev 	pkts_n = RTE_MIN(pkts_n, q_n - elts_idx);
3201ded2623SAlexander Kozyrev 	pkts_n = RTE_MIN(pkts_n, q_n - cq_idx);
3211ded2623SAlexander Kozyrev 	if (!pkts_n) {
3221ded2623SAlexander Kozyrev 		*no_cq = !rcvd_pkt;
3231ded2623SAlexander Kozyrev 		return rcvd_pkt;
3241ded2623SAlexander Kozyrev 	}
3251ded2623SAlexander Kozyrev 	/* At this point, there shouldn't be any remaining packets. */
3261ded2623SAlexander Kozyrev 	MLX5_ASSERT(rxq->decompressed == 0);
3271ded2623SAlexander Kozyrev 	/* Process all the CQEs */
3281ded2623SAlexander Kozyrev 	nocmp_n = rxq_cq_process_v(rxq, cq, elts, pkts, pkts_n, err, &comp_idx);
3291ded2623SAlexander Kozyrev 	/* If no new CQE seen, return without updating cq_db. */
3301ded2623SAlexander Kozyrev 	if (unlikely(!nocmp_n && comp_idx == MLX5_VPMD_DESCS_PER_LOOP)) {
3311ded2623SAlexander Kozyrev 		*no_cq = true;
3321ded2623SAlexander Kozyrev 		return rcvd_pkt;
3331ded2623SAlexander Kozyrev 	}
3341ded2623SAlexander Kozyrev 	/* Update the consumer indexes for non-compressed CQEs. */
3351ded2623SAlexander Kozyrev 	MLX5_ASSERT(nocmp_n <= pkts_n);
3361ded2623SAlexander Kozyrev 	rxq->cq_ci += nocmp_n;
3371ded2623SAlexander Kozyrev 	rxq->rq_pi += nocmp_n;
3381ded2623SAlexander Kozyrev 	rcvd_pkt += nocmp_n;
3391ded2623SAlexander Kozyrev 	/* Decompress the last CQE if compressed. */
3401ded2623SAlexander Kozyrev 	if (comp_idx < MLX5_VPMD_DESCS_PER_LOOP) {
3411ded2623SAlexander Kozyrev 		MLX5_ASSERT(comp_idx == (nocmp_n % MLX5_VPMD_DESCS_PER_LOOP));
3421ded2623SAlexander Kozyrev 		rxq->decompressed = rxq_cq_decompress_v(rxq, &cq[nocmp_n],
3431ded2623SAlexander Kozyrev 							&elts[nocmp_n]);
3441ded2623SAlexander Kozyrev 		rxq->cq_ci += rxq->decompressed;
3451ded2623SAlexander Kozyrev 		/* Return more packets if needed. */
3461ded2623SAlexander Kozyrev 		if (nocmp_n < pkts_n) {
3471ded2623SAlexander Kozyrev 			uint16_t n = rxq->decompressed;
3481ded2623SAlexander Kozyrev 
3491ded2623SAlexander Kozyrev 			n = RTE_MIN(n, pkts_n - nocmp_n);
3501ded2623SAlexander Kozyrev 			rxq_copy_mbuf_v(&(*rxq->elts)[rxq->rq_pi & e_mask],
3511ded2623SAlexander Kozyrev 					&pkts[nocmp_n], n);
3521ded2623SAlexander Kozyrev 			rxq->rq_pi += n;
3531ded2623SAlexander Kozyrev 			rcvd_pkt += n;
3541ded2623SAlexander Kozyrev 			rxq->decompressed -= n;
3551ded2623SAlexander Kozyrev 		}
3561ded2623SAlexander Kozyrev 	}
3571ded2623SAlexander Kozyrev 	rte_io_wmb();
3581ded2623SAlexander Kozyrev 	*rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci);
3591ded2623SAlexander Kozyrev 	*no_cq = !rcvd_pkt;
3601ded2623SAlexander Kozyrev 	return rcvd_pkt;
3611ded2623SAlexander Kozyrev }
3621ded2623SAlexander Kozyrev 
3631ded2623SAlexander Kozyrev /**
364f0d2114fSYongseok Koh  * DPDK callback for vectorized RX.
365f0d2114fSYongseok Koh  *
366f0d2114fSYongseok Koh  * @param dpdk_rxq
367f0d2114fSYongseok Koh  *   Generic pointer to RX queue structure.
368f0d2114fSYongseok Koh  * @param[out] pkts
369f0d2114fSYongseok Koh  *   Array to store received packets.
370f0d2114fSYongseok Koh  * @param pkts_n
371f0d2114fSYongseok Koh  *   Maximum number of packets in array.
372f0d2114fSYongseok Koh  *
373f0d2114fSYongseok Koh  * @return
374f0d2114fSYongseok Koh  *   Number of packets successfully received (<= pkts_n).
375f0d2114fSYongseok Koh  */
376f0d2114fSYongseok Koh uint16_t
377f0d2114fSYongseok Koh mlx5_rx_burst_vec(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
378f0d2114fSYongseok Koh {
379f0d2114fSYongseok Koh 	struct mlx5_rxq_data *rxq = dpdk_rxq;
380c9cc554bSAlexander Kozyrev 	uint16_t nb_rx = 0;
381c9cc554bSAlexander Kozyrev 	uint16_t tn = 0;
382d27fb0deSYongseok Koh 	uint64_t err = 0;
383c9cc554bSAlexander Kozyrev 	bool no_cq = false;
384f0d2114fSYongseok Koh 
385c9cc554bSAlexander Kozyrev 	do {
3860f20acbfSAlexander Kozyrev 		nb_rx = rxq_burst_v(rxq, pkts + tn, pkts_n - tn,
3870f20acbfSAlexander Kozyrev 				    &err, &no_cq);
3880f20acbfSAlexander Kozyrev 		if (unlikely(err | rxq->err_state))
3890f20acbfSAlexander Kozyrev 			nb_rx = rxq_handle_pending_error(rxq, pkts + tn, nb_rx);
3900f20acbfSAlexander Kozyrev 		tn += nb_rx;
3910f20acbfSAlexander Kozyrev 		if (unlikely(no_cq))
3920f20acbfSAlexander Kozyrev 			break;
3930f20acbfSAlexander Kozyrev 	} while (tn != pkts_n);
3940f20acbfSAlexander Kozyrev 	return tn;
3950f20acbfSAlexander Kozyrev }
3960f20acbfSAlexander Kozyrev 
3970f20acbfSAlexander Kozyrev /**
3980f20acbfSAlexander Kozyrev  * Receive burst of packets. An errored completion also consumes a mbuf, but the
3990f20acbfSAlexander Kozyrev  * packet_type is set to be RTE_PTYPE_ALL_MASK. Marked mbufs should be freed
4000f20acbfSAlexander Kozyrev  * before returning to application.
4010f20acbfSAlexander Kozyrev  *
4020f20acbfSAlexander Kozyrev  * @param rxq
4030f20acbfSAlexander Kozyrev  *   Pointer to RX queue structure.
4040f20acbfSAlexander Kozyrev  * @param[out] pkts
4050f20acbfSAlexander Kozyrev  *   Array to store received packets.
4060f20acbfSAlexander Kozyrev  * @param pkts_n
4070f20acbfSAlexander Kozyrev  *   Maximum number of packets in array.
4080f20acbfSAlexander Kozyrev  * @param[out] err
4090f20acbfSAlexander Kozyrev  *   Pointer to a flag. Set non-zero value if pkts array has at least one error
4100f20acbfSAlexander Kozyrev  *   packet to handle.
4110f20acbfSAlexander Kozyrev  * @param[out] no_cq
4120f20acbfSAlexander Kozyrev  *   Pointer to a boolean. Set true if no new CQE seen.
4130f20acbfSAlexander Kozyrev  *
4140f20acbfSAlexander Kozyrev  * @return
4150f20acbfSAlexander Kozyrev  *   Number of packets received including errors (<= pkts_n).
4160f20acbfSAlexander Kozyrev  */
4170f20acbfSAlexander Kozyrev static inline uint16_t
4180f20acbfSAlexander Kozyrev rxq_burst_mprq_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts,
4190f20acbfSAlexander Kozyrev 		 uint16_t pkts_n, uint64_t *err, bool *no_cq)
4200f20acbfSAlexander Kozyrev {
4210f20acbfSAlexander Kozyrev 	const uint16_t q_n = 1 << rxq->cqe_n;
4220f20acbfSAlexander Kozyrev 	const uint16_t q_mask = q_n - 1;
4230f20acbfSAlexander Kozyrev 	const uint16_t wqe_n = 1 << rxq->elts_n;
4240f20acbfSAlexander Kozyrev 	const uint32_t strd_n = 1 << rxq->strd_num_n;
4250f20acbfSAlexander Kozyrev 	const uint32_t elts_n = wqe_n * strd_n;
4260f20acbfSAlexander Kozyrev 	const uint32_t elts_mask = elts_n - 1;
4270f20acbfSAlexander Kozyrev 	volatile struct mlx5_cqe *cq;
4280f20acbfSAlexander Kozyrev 	struct rte_mbuf **elts;
4290f20acbfSAlexander Kozyrev 	uint64_t comp_idx = MLX5_VPMD_DESCS_PER_LOOP;
4300f20acbfSAlexander Kozyrev 	uint16_t nocmp_n = 0;
4310f20acbfSAlexander Kozyrev 	uint16_t rcvd_pkt = 0;
4320f20acbfSAlexander Kozyrev 	uint16_t cp_pkt = 0;
4330f20acbfSAlexander Kozyrev 	unsigned int cq_idx = rxq->cq_ci & q_mask;
4340f20acbfSAlexander Kozyrev 	unsigned int elts_idx;
4350f20acbfSAlexander Kozyrev 
4360f20acbfSAlexander Kozyrev 	MLX5_ASSERT(rxq->sges_n == 0);
4370f20acbfSAlexander Kozyrev 	cq = &(*rxq->cqes)[cq_idx];
4380f20acbfSAlexander Kozyrev 	rte_prefetch0(cq);
4390f20acbfSAlexander Kozyrev 	rte_prefetch0(cq + 1);
4400f20acbfSAlexander Kozyrev 	rte_prefetch0(cq + 2);
4410f20acbfSAlexander Kozyrev 	rte_prefetch0(cq + 3);
4420f20acbfSAlexander Kozyrev 	pkts_n = RTE_MIN(pkts_n, MLX5_VPMD_RX_MAX_BURST);
4430f20acbfSAlexander Kozyrev 	mlx5_rx_mprq_replenish_bulk_mbuf(rxq);
4440f20acbfSAlexander Kozyrev 	/* See if there're unreturned mbufs from compressed CQE. */
4450f20acbfSAlexander Kozyrev 	rcvd_pkt = rxq->decompressed;
4460f20acbfSAlexander Kozyrev 	if (rcvd_pkt > 0) {
4470f20acbfSAlexander Kozyrev 		rcvd_pkt = RTE_MIN(rcvd_pkt, pkts_n);
4480f20acbfSAlexander Kozyrev 		cp_pkt = rxq_copy_mprq_mbuf_v(rxq, pkts, rcvd_pkt);
4490f20acbfSAlexander Kozyrev 		rxq->decompressed -= rcvd_pkt;
4500f20acbfSAlexander Kozyrev 		pkts += cp_pkt;
4510f20acbfSAlexander Kozyrev 	}
4520f20acbfSAlexander Kozyrev 	elts_idx = rxq->rq_pi & elts_mask;
4530f20acbfSAlexander Kozyrev 	elts = &(*rxq->elts)[elts_idx];
4540f20acbfSAlexander Kozyrev 	/* Not to overflow pkts array. */
4550f20acbfSAlexander Kozyrev 	pkts_n = RTE_ALIGN_FLOOR(pkts_n - cp_pkt, MLX5_VPMD_DESCS_PER_LOOP);
4560f20acbfSAlexander Kozyrev 	/* Not to cross queue end. */
4570f20acbfSAlexander Kozyrev 	pkts_n = RTE_MIN(pkts_n, elts_n - elts_idx);
4580f20acbfSAlexander Kozyrev 	pkts_n = RTE_MIN(pkts_n, q_n - cq_idx);
4590f20acbfSAlexander Kozyrev 	/* Not to move past the allocated mbufs. */
4600f20acbfSAlexander Kozyrev 	pkts_n = RTE_MIN(pkts_n, rxq->elts_ci - rxq->rq_pi);
4610f20acbfSAlexander Kozyrev 	if (!pkts_n) {
4620f20acbfSAlexander Kozyrev 		*no_cq = !cp_pkt;
4630f20acbfSAlexander Kozyrev 		return cp_pkt;
4640f20acbfSAlexander Kozyrev 	}
4650f20acbfSAlexander Kozyrev 	/* At this point, there shouldn't be any remaining packets. */
4660f20acbfSAlexander Kozyrev 	MLX5_ASSERT(rxq->decompressed == 0);
4670f20acbfSAlexander Kozyrev 	/* Process all the CQEs */
4680f20acbfSAlexander Kozyrev 	nocmp_n = rxq_cq_process_v(rxq, cq, elts, pkts, pkts_n, err, &comp_idx);
4690f20acbfSAlexander Kozyrev 	/* If no new CQE seen, return without updating cq_db. */
4700f20acbfSAlexander Kozyrev 	if (unlikely(!nocmp_n && comp_idx == MLX5_VPMD_DESCS_PER_LOOP)) {
4710f20acbfSAlexander Kozyrev 		*no_cq = true;
4720f20acbfSAlexander Kozyrev 		return cp_pkt;
4730f20acbfSAlexander Kozyrev 	}
4740f20acbfSAlexander Kozyrev 	/* Update the consumer indexes for non-compressed CQEs. */
4750f20acbfSAlexander Kozyrev 	MLX5_ASSERT(nocmp_n <= pkts_n);
4760f20acbfSAlexander Kozyrev 	cp_pkt = rxq_copy_mprq_mbuf_v(rxq, pkts, nocmp_n);
4770f20acbfSAlexander Kozyrev 	rcvd_pkt += cp_pkt;
4780f20acbfSAlexander Kozyrev 	/* Decompress the last CQE if compressed. */
4790f20acbfSAlexander Kozyrev 	if (comp_idx < MLX5_VPMD_DESCS_PER_LOOP) {
4800f20acbfSAlexander Kozyrev 		MLX5_ASSERT(comp_idx == (nocmp_n % MLX5_VPMD_DESCS_PER_LOOP));
4810f20acbfSAlexander Kozyrev 		rxq->decompressed = rxq_cq_decompress_v(rxq, &cq[nocmp_n],
4820f20acbfSAlexander Kozyrev 							&elts[nocmp_n]);
4830f20acbfSAlexander Kozyrev 		/* Return more packets if needed. */
4840f20acbfSAlexander Kozyrev 		if (nocmp_n < pkts_n) {
4850f20acbfSAlexander Kozyrev 			uint16_t n = rxq->decompressed;
4860f20acbfSAlexander Kozyrev 
4870f20acbfSAlexander Kozyrev 			n = RTE_MIN(n, pkts_n - nocmp_n);
4880f20acbfSAlexander Kozyrev 			cp_pkt = rxq_copy_mprq_mbuf_v(rxq, &pkts[cp_pkt], n);
4890f20acbfSAlexander Kozyrev 			rcvd_pkt += cp_pkt;
4900f20acbfSAlexander Kozyrev 			rxq->decompressed -= n;
4910f20acbfSAlexander Kozyrev 		}
4920f20acbfSAlexander Kozyrev 	}
4930f20acbfSAlexander Kozyrev 	*no_cq = !rcvd_pkt;
4940f20acbfSAlexander Kozyrev 	return rcvd_pkt;
4950f20acbfSAlexander Kozyrev }
4960f20acbfSAlexander Kozyrev 
4970f20acbfSAlexander Kozyrev /**
4980f20acbfSAlexander Kozyrev  * DPDK callback for vectorized MPRQ RX.
4990f20acbfSAlexander Kozyrev  *
5000f20acbfSAlexander Kozyrev  * @param dpdk_rxq
5010f20acbfSAlexander Kozyrev  *   Generic pointer to RX queue structure.
5020f20acbfSAlexander Kozyrev  * @param[out] pkts
5030f20acbfSAlexander Kozyrev  *   Array to store received packets.
5040f20acbfSAlexander Kozyrev  * @param pkts_n
5050f20acbfSAlexander Kozyrev  *   Maximum number of packets in array.
5060f20acbfSAlexander Kozyrev  *
5070f20acbfSAlexander Kozyrev  * @return
5080f20acbfSAlexander Kozyrev  *   Number of packets successfully received (<= pkts_n).
5090f20acbfSAlexander Kozyrev  */
5100f20acbfSAlexander Kozyrev uint16_t
5110f20acbfSAlexander Kozyrev mlx5_rx_burst_mprq_vec(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
5120f20acbfSAlexander Kozyrev {
5130f20acbfSAlexander Kozyrev 	struct mlx5_rxq_data *rxq = dpdk_rxq;
5140f20acbfSAlexander Kozyrev 	uint16_t nb_rx = 0;
5150f20acbfSAlexander Kozyrev 	uint16_t tn = 0;
5160f20acbfSAlexander Kozyrev 	uint64_t err = 0;
5170f20acbfSAlexander Kozyrev 	bool no_cq = false;
5180f20acbfSAlexander Kozyrev 
5190f20acbfSAlexander Kozyrev 	do {
5200f20acbfSAlexander Kozyrev 		nb_rx = rxq_burst_mprq_v(rxq, pkts + tn, pkts_n - tn,
5210f20acbfSAlexander Kozyrev 					 &err, &no_cq);
52288c07335SMatan Azrad 		if (unlikely(err | rxq->err_state))
523c9cc554bSAlexander Kozyrev 			nb_rx = rxq_handle_pending_error(rxq, pkts + tn, nb_rx);
524c9cc554bSAlexander Kozyrev 		tn += nb_rx;
525c9cc554bSAlexander Kozyrev 		if (unlikely(no_cq))
526c9cc554bSAlexander Kozyrev 			break;
527c9cc554bSAlexander Kozyrev 	} while (tn != pkts_n);
528c9cc554bSAlexander Kozyrev 	return tn;
529f0d2114fSYongseok Koh }
530f0d2114fSYongseok Koh 
531f0d2114fSYongseok Koh /**
532f0d2114fSYongseok Koh  * Check a RX queue can support vectorized RX.
533f0d2114fSYongseok Koh  *
534f0d2114fSYongseok Koh  * @param rxq
535f0d2114fSYongseok Koh  *   Pointer to RX queue.
536f0d2114fSYongseok Koh  *
537f0d2114fSYongseok Koh  * @return
538f0d2114fSYongseok Koh  *   1 if supported, negative errno value if not.
539f0d2114fSYongseok Koh  */
540ce6427ddSThomas Monjalon int __rte_cold
541af4f09f2SNélio Laranjeiro mlx5_rxq_check_vec_support(struct mlx5_rxq_data *rxq)
542f0d2114fSYongseok Koh {
543f0d2114fSYongseok Koh 	struct mlx5_rxq_ctrl *ctrl =
544f0d2114fSYongseok Koh 		container_of(rxq, struct mlx5_rxq_ctrl, rxq);
545f0d2114fSYongseok Koh 
5467fe24446SShahaf Shuler 	if (!ctrl->priv->config.rx_vec_en || rxq->sges_n != 0)
547f0d2114fSYongseok Koh 		return -ENOTSUP;
54817ed314cSMatan Azrad 	if (rxq->lro)
54917ed314cSMatan Azrad 		return -ENOTSUP;
550f0d2114fSYongseok Koh 	return 1;
551f0d2114fSYongseok Koh }
552f0d2114fSYongseok Koh 
553f0d2114fSYongseok Koh /**
554f0d2114fSYongseok Koh  * Check a device can support vectorized RX.
555f0d2114fSYongseok Koh  *
556af4f09f2SNélio Laranjeiro  * @param dev
557af4f09f2SNélio Laranjeiro  *   Pointer to Ethernet device.
558f0d2114fSYongseok Koh  *
559f0d2114fSYongseok Koh  * @return
560f0d2114fSYongseok Koh  *   1 if supported, negative errno value if not.
561f0d2114fSYongseok Koh  */
562ce6427ddSThomas Monjalon int __rte_cold
563af4f09f2SNélio Laranjeiro mlx5_check_vec_rx_support(struct rte_eth_dev *dev)
564f0d2114fSYongseok Koh {
565dbeba4cfSThomas Monjalon 	struct mlx5_priv *priv = dev->data->dev_private;
5660f006468SMichael Baum 	uint32_t i;
567f0d2114fSYongseok Koh 
5682c5e0dd2SCiara Power 	if (rte_vect_get_max_simd_bitwidth() < RTE_VECT_SIMD_128)
5692c5e0dd2SCiara Power 		return -ENOTSUP;
5707fe24446SShahaf Shuler 	if (!priv->config.rx_vec_en)
571f0d2114fSYongseok Koh 		return -ENOTSUP;
572f0d2114fSYongseok Koh 	/* All the configured queues should support. */
573f0d2114fSYongseok Koh 	for (i = 0; i < priv->rxqs_n; ++i) {
574f0d2114fSYongseok Koh 		struct mlx5_rxq_data *rxq = (*priv->rxqs)[i];
575f0d2114fSYongseok Koh 
576f0d2114fSYongseok Koh 		if (!rxq)
577f0d2114fSYongseok Koh 			continue;
578af4f09f2SNélio Laranjeiro 		if (mlx5_rxq_check_vec_support(rxq) < 0)
579f0d2114fSYongseok Koh 			break;
580f0d2114fSYongseok Koh 	}
581f0d2114fSYongseok Koh 	if (i != priv->rxqs_n)
582f0d2114fSYongseok Koh 		return -ENOTSUP;
583f0d2114fSYongseok Koh 	return 1;
584f0d2114fSYongseok Koh }
585