xref: /dpdk/drivers/net/mlx5/mlx5_rxtx_vec.c (revision dbeba4cf18a5e1d9f7aaa284457bf15c351eb965)
18fd92a66SOlivier Matz /* SPDX-License-Identifier: BSD-3-Clause
2f0d2114fSYongseok Koh  * Copyright 2017 6WIND S.A.
35feecc57SShahaf Shuler  * Copyright 2017 Mellanox Technologies, Ltd
4f0d2114fSYongseok Koh  */
5f0d2114fSYongseok Koh 
6f0d2114fSYongseok Koh #include <assert.h>
7f0d2114fSYongseok Koh #include <stdint.h>
8f0d2114fSYongseok Koh #include <string.h>
9f0d2114fSYongseok Koh #include <stdlib.h>
10f0d2114fSYongseok Koh 
11f0d2114fSYongseok Koh /* Verbs header. */
12f0d2114fSYongseok Koh /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
13f0d2114fSYongseok Koh #ifdef PEDANTIC
14f0d2114fSYongseok Koh #pragma GCC diagnostic ignored "-Wpedantic"
15f0d2114fSYongseok Koh #endif
16f0d2114fSYongseok Koh #include <infiniband/verbs.h>
17f0d2114fSYongseok Koh #include <infiniband/mlx5dv.h>
18f0d2114fSYongseok Koh #ifdef PEDANTIC
19f0d2114fSYongseok Koh #pragma GCC diagnostic error "-Wpedantic"
20f0d2114fSYongseok Koh #endif
21f0d2114fSYongseok Koh 
22f0d2114fSYongseok Koh #include <rte_mbuf.h>
23f0d2114fSYongseok Koh #include <rte_mempool.h>
24f0d2114fSYongseok Koh #include <rte_prefetch.h>
25f0d2114fSYongseok Koh 
26f0d2114fSYongseok Koh #include "mlx5.h"
27f0d2114fSYongseok Koh #include "mlx5_utils.h"
28f0d2114fSYongseok Koh #include "mlx5_rxtx.h"
295bfc9fc1SYongseok Koh #include "mlx5_rxtx_vec.h"
30f0d2114fSYongseok Koh #include "mlx5_autoconf.h"
31f0d2114fSYongseok Koh #include "mlx5_defs.h"
32f0d2114fSYongseok Koh #include "mlx5_prm.h"
33f0d2114fSYongseok Koh 
34570acdb1SYongseok Koh #if defined RTE_ARCH_X86_64
353c2ddbd4SYongseok Koh #include "mlx5_rxtx_vec_sse.h"
36570acdb1SYongseok Koh #elif defined RTE_ARCH_ARM64
37570acdb1SYongseok Koh #include "mlx5_rxtx_vec_neon.h"
383c2ddbd4SYongseok Koh #else
393c2ddbd4SYongseok Koh #error "This should not be compiled if SIMD instructions are not supported."
40f0d2114fSYongseok Koh #endif
41f0d2114fSYongseok Koh 
42f0d2114fSYongseok Koh /**
436bd7fbd0SDekel Peled  * Count the number of packets having same ol_flags and same metadata (if
446bd7fbd0SDekel Peled  * PKT_TX_METADATA is set in ol_flags), and calculate cs_flags.
45f0d2114fSYongseok Koh  *
46f0d2114fSYongseok Koh  * @param pkts
47f0d2114fSYongseok Koh  *   Pointer to array of packets.
48f0d2114fSYongseok Koh  * @param pkts_n
49f0d2114fSYongseok Koh  *   Number of packets.
50f0d2114fSYongseok Koh  * @param cs_flags
51f0d2114fSYongseok Koh  *   Pointer of flags to be returned.
526bd7fbd0SDekel Peled  * @param metadata
536bd7fbd0SDekel Peled  *   Pointer of metadata to be returned.
546bd7fbd0SDekel Peled  * @param txq_offloads
556bd7fbd0SDekel Peled  *   Offloads enabled on Tx queue
56f0d2114fSYongseok Koh  *
57f0d2114fSYongseok Koh  * @return
586bd7fbd0SDekel Peled  *   Number of packets having same ol_flags and metadata, if relevant.
59f0d2114fSYongseok Koh  */
60f0d2114fSYongseok Koh static inline unsigned int
616bd7fbd0SDekel Peled txq_calc_offload(struct rte_mbuf **pkts, uint16_t pkts_n, uint8_t *cs_flags,
626bd7fbd0SDekel Peled 		 rte_be32_t *metadata, const uint64_t txq_offloads)
63f0d2114fSYongseok Koh {
64f0d2114fSYongseok Koh 	unsigned int pos;
656bd7fbd0SDekel Peled 	const uint64_t cksum_ol_mask =
66f0d2114fSYongseok Koh 		PKT_TX_IP_CKSUM | PKT_TX_TCP_CKSUM |
67f0d2114fSYongseok Koh 		PKT_TX_UDP_CKSUM | PKT_TX_TUNNEL_GRE |
68f0d2114fSYongseok Koh 		PKT_TX_TUNNEL_VXLAN | PKT_TX_OUTER_IP_CKSUM;
696bd7fbd0SDekel Peled 	rte_be32_t p0_metadata, pn_metadata;
70f0d2114fSYongseok Koh 
71f0d2114fSYongseok Koh 	if (!pkts_n)
72f0d2114fSYongseok Koh 		return 0;
736bd7fbd0SDekel Peled 	p0_metadata = pkts[0]->ol_flags & PKT_TX_METADATA ?
746bd7fbd0SDekel Peled 			pkts[0]->tx_metadata : 0;
756bd7fbd0SDekel Peled 	/* Count the number of packets having same offload parameters. */
766bd7fbd0SDekel Peled 	for (pos = 1; pos < pkts_n; ++pos) {
776bd7fbd0SDekel Peled 		/* Check if packet has same checksum flags. */
786bd7fbd0SDekel Peled 		if ((txq_offloads & MLX5_VEC_TX_CKSUM_OFFLOAD_CAP) &&
796bd7fbd0SDekel Peled 		    ((pkts[pos]->ol_flags ^ pkts[0]->ol_flags) & cksum_ol_mask))
80f0d2114fSYongseok Koh 			break;
816bd7fbd0SDekel Peled 		/* Check if packet has same metadata. */
826bd7fbd0SDekel Peled 		if (txq_offloads & DEV_TX_OFFLOAD_MATCH_METADATA) {
836bd7fbd0SDekel Peled 			pn_metadata = pkts[pos]->ol_flags & PKT_TX_METADATA ?
846bd7fbd0SDekel Peled 					pkts[pos]->tx_metadata : 0;
856bd7fbd0SDekel Peled 			if (pn_metadata != p0_metadata)
866bd7fbd0SDekel Peled 				break;
876bd7fbd0SDekel Peled 		}
886bd7fbd0SDekel Peled 	}
895f8ba81cSXueming Li 	*cs_flags = txq_ol_cksum_to_cs(pkts[0]);
906bd7fbd0SDekel Peled 	*metadata = p0_metadata;
91f0d2114fSYongseok Koh 	return pos;
92f0d2114fSYongseok Koh }
93f0d2114fSYongseok Koh 
94f0d2114fSYongseok Koh /**
95f0d2114fSYongseok Koh  * DPDK callback for vectorized TX.
96f0d2114fSYongseok Koh  *
97f0d2114fSYongseok Koh  * @param dpdk_txq
98f0d2114fSYongseok Koh  *   Generic pointer to TX queue structure.
99f0d2114fSYongseok Koh  * @param[in] pkts
100f0d2114fSYongseok Koh  *   Packets to transmit.
101f0d2114fSYongseok Koh  * @param pkts_n
102f0d2114fSYongseok Koh  *   Number of packets in array.
103f0d2114fSYongseok Koh  *
104f0d2114fSYongseok Koh  * @return
105f0d2114fSYongseok Koh  *   Number of packets successfully transmitted (<= pkts_n).
106f0d2114fSYongseok Koh  */
107f0d2114fSYongseok Koh uint16_t
108f0d2114fSYongseok Koh mlx5_tx_burst_raw_vec(void *dpdk_txq, struct rte_mbuf **pkts,
109f0d2114fSYongseok Koh 		      uint16_t pkts_n)
110f0d2114fSYongseok Koh {
111f0d2114fSYongseok Koh 	struct mlx5_txq_data *txq = (struct mlx5_txq_data *)dpdk_txq;
112f0d2114fSYongseok Koh 	uint16_t nb_tx = 0;
113f0d2114fSYongseok Koh 
114f0d2114fSYongseok Koh 	while (pkts_n > nb_tx) {
115f0d2114fSYongseok Koh 		uint16_t n;
116f0d2114fSYongseok Koh 		uint16_t ret;
117f0d2114fSYongseok Koh 
118f0d2114fSYongseok Koh 		n = RTE_MIN((uint16_t)(pkts_n - nb_tx), MLX5_VPMD_TX_MAX_BURST);
1196bd7fbd0SDekel Peled 		ret = txq_burst_v(txq, &pkts[nb_tx], n, 0, 0);
120f0d2114fSYongseok Koh 		nb_tx += ret;
121f0d2114fSYongseok Koh 		if (!ret)
122f0d2114fSYongseok Koh 			break;
123f0d2114fSYongseok Koh 	}
124f0d2114fSYongseok Koh 	return nb_tx;
125f0d2114fSYongseok Koh }
126f0d2114fSYongseok Koh 
127f0d2114fSYongseok Koh /**
128f0d2114fSYongseok Koh  * DPDK callback for vectorized TX with multi-seg packets and offload.
129f0d2114fSYongseok Koh  *
130f0d2114fSYongseok Koh  * @param dpdk_txq
131f0d2114fSYongseok Koh  *   Generic pointer to TX queue structure.
132f0d2114fSYongseok Koh  * @param[in] pkts
133f0d2114fSYongseok Koh  *   Packets to transmit.
134f0d2114fSYongseok Koh  * @param pkts_n
135f0d2114fSYongseok Koh  *   Number of packets in array.
136f0d2114fSYongseok Koh  *
137f0d2114fSYongseok Koh  * @return
138f0d2114fSYongseok Koh  *   Number of packets successfully transmitted (<= pkts_n).
139f0d2114fSYongseok Koh  */
140f0d2114fSYongseok Koh uint16_t
141f0d2114fSYongseok Koh mlx5_tx_burst_vec(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
142f0d2114fSYongseok Koh {
143f0d2114fSYongseok Koh 	struct mlx5_txq_data *txq = (struct mlx5_txq_data *)dpdk_txq;
144f0d2114fSYongseok Koh 	uint16_t nb_tx = 0;
145f0d2114fSYongseok Koh 
146f0d2114fSYongseok Koh 	while (pkts_n > nb_tx) {
147f0d2114fSYongseok Koh 		uint8_t cs_flags = 0;
148f0d2114fSYongseok Koh 		uint16_t n;
149f0d2114fSYongseok Koh 		uint16_t ret;
1506bd7fbd0SDekel Peled 		rte_be32_t metadata = 0;
151f0d2114fSYongseok Koh 
152f0d2114fSYongseok Koh 		/* Transmit multi-seg packets in the head of pkts list. */
153dbccb4cdSShahaf Shuler 		if ((txq->offloads & DEV_TX_OFFLOAD_MULTI_SEGS) &&
154f0d2114fSYongseok Koh 		    NB_SEGS(pkts[nb_tx]) > 1)
155f0d2114fSYongseok Koh 			nb_tx += txq_scatter_v(txq,
156f0d2114fSYongseok Koh 					       &pkts[nb_tx],
157f0d2114fSYongseok Koh 					       pkts_n - nb_tx);
158f0d2114fSYongseok Koh 		n = RTE_MIN((uint16_t)(pkts_n - nb_tx), MLX5_VPMD_TX_MAX_BURST);
159dbccb4cdSShahaf Shuler 		if (txq->offloads & DEV_TX_OFFLOAD_MULTI_SEGS)
1604b0d7b7fSYongseok Koh 			n = txq_count_contig_single_seg(&pkts[nb_tx], n);
1616bd7fbd0SDekel Peled 		if (txq->offloads & (MLX5_VEC_TX_CKSUM_OFFLOAD_CAP |
1626bd7fbd0SDekel Peled 				     DEV_TX_OFFLOAD_MATCH_METADATA))
1636bd7fbd0SDekel Peled 			n = txq_calc_offload(&pkts[nb_tx], n,
1646bd7fbd0SDekel Peled 					     &cs_flags, &metadata,
1656bd7fbd0SDekel Peled 					     txq->offloads);
1666bd7fbd0SDekel Peled 		ret = txq_burst_v(txq, &pkts[nb_tx], n, cs_flags, metadata);
167f0d2114fSYongseok Koh 		nb_tx += ret;
168f0d2114fSYongseok Koh 		if (!ret)
169f0d2114fSYongseok Koh 			break;
170f0d2114fSYongseok Koh 	}
171f0d2114fSYongseok Koh 	return nb_tx;
172f0d2114fSYongseok Koh }
173f0d2114fSYongseok Koh 
174f0d2114fSYongseok Koh /**
175f0d2114fSYongseok Koh  * Skip error packets.
176f0d2114fSYongseok Koh  *
177f0d2114fSYongseok Koh  * @param rxq
178f0d2114fSYongseok Koh  *   Pointer to RX queue structure.
179f0d2114fSYongseok Koh  * @param[out] pkts
180f0d2114fSYongseok Koh  *   Array to store received packets.
181f0d2114fSYongseok Koh  * @param pkts_n
182f0d2114fSYongseok Koh  *   Maximum number of packets in array.
183f0d2114fSYongseok Koh  *
184f0d2114fSYongseok Koh  * @return
185f0d2114fSYongseok Koh  *   Number of packets successfully received (<= pkts_n).
186f0d2114fSYongseok Koh  */
187f0d2114fSYongseok Koh static uint16_t
188f0d2114fSYongseok Koh rxq_handle_pending_error(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts,
189f0d2114fSYongseok Koh 			 uint16_t pkts_n)
190f0d2114fSYongseok Koh {
191f0d2114fSYongseok Koh 	uint16_t n = 0;
192f0d2114fSYongseok Koh 	unsigned int i;
193f0d2114fSYongseok Koh #ifdef MLX5_PMD_SOFT_COUNTERS
194f0d2114fSYongseok Koh 	uint32_t err_bytes = 0;
195f0d2114fSYongseok Koh #endif
196f0d2114fSYongseok Koh 
197f0d2114fSYongseok Koh 	for (i = 0; i < pkts_n; ++i) {
198f0d2114fSYongseok Koh 		struct rte_mbuf *pkt = pkts[i];
199f0d2114fSYongseok Koh 
200f0d2114fSYongseok Koh 		if (pkt->packet_type == RTE_PTYPE_ALL_MASK) {
201f0d2114fSYongseok Koh #ifdef MLX5_PMD_SOFT_COUNTERS
202f0d2114fSYongseok Koh 			err_bytes += PKT_LEN(pkt);
203f0d2114fSYongseok Koh #endif
204f0d2114fSYongseok Koh 			rte_pktmbuf_free_seg(pkt);
205f0d2114fSYongseok Koh 		} else {
206f0d2114fSYongseok Koh 			pkts[n++] = pkt;
207f0d2114fSYongseok Koh 		}
208f0d2114fSYongseok Koh 	}
209f0d2114fSYongseok Koh 	rxq->stats.idropped += (pkts_n - n);
210f0d2114fSYongseok Koh #ifdef MLX5_PMD_SOFT_COUNTERS
211f0d2114fSYongseok Koh 	/* Correct counters of errored completions. */
212f0d2114fSYongseok Koh 	rxq->stats.ipackets -= (pkts_n - n);
213f0d2114fSYongseok Koh 	rxq->stats.ibytes -= err_bytes;
214f0d2114fSYongseok Koh #endif
215f0d2114fSYongseok Koh 	return n;
216f0d2114fSYongseok Koh }
217f0d2114fSYongseok Koh 
218f0d2114fSYongseok Koh /**
219f0d2114fSYongseok Koh  * DPDK callback for vectorized RX.
220f0d2114fSYongseok Koh  *
221f0d2114fSYongseok Koh  * @param dpdk_rxq
222f0d2114fSYongseok Koh  *   Generic pointer to RX queue structure.
223f0d2114fSYongseok Koh  * @param[out] pkts
224f0d2114fSYongseok Koh  *   Array to store received packets.
225f0d2114fSYongseok Koh  * @param pkts_n
226f0d2114fSYongseok Koh  *   Maximum number of packets in array.
227f0d2114fSYongseok Koh  *
228f0d2114fSYongseok Koh  * @return
229f0d2114fSYongseok Koh  *   Number of packets successfully received (<= pkts_n).
230f0d2114fSYongseok Koh  */
231f0d2114fSYongseok Koh uint16_t
232f0d2114fSYongseok Koh mlx5_rx_burst_vec(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
233f0d2114fSYongseok Koh {
234f0d2114fSYongseok Koh 	struct mlx5_rxq_data *rxq = dpdk_rxq;
235f0d2114fSYongseok Koh 	uint16_t nb_rx;
236d27fb0deSYongseok Koh 	uint64_t err = 0;
237f0d2114fSYongseok Koh 
238d27fb0deSYongseok Koh 	nb_rx = rxq_burst_v(rxq, pkts, pkts_n, &err);
239d27fb0deSYongseok Koh 	if (unlikely(err))
240f0d2114fSYongseok Koh 		nb_rx = rxq_handle_pending_error(rxq, pkts, nb_rx);
241f0d2114fSYongseok Koh 	return nb_rx;
242f0d2114fSYongseok Koh }
243f0d2114fSYongseok Koh 
244f0d2114fSYongseok Koh /**
245f0d2114fSYongseok Koh  * Check Tx queue flags are set for raw vectorized Tx.
246f0d2114fSYongseok Koh  *
247dbccb4cdSShahaf Shuler  * @param dev
248af4f09f2SNélio Laranjeiro  *   Pointer to Ethernet device.
249f0d2114fSYongseok Koh  *
250f0d2114fSYongseok Koh  * @return
251f0d2114fSYongseok Koh  *   1 if supported, negative errno value if not.
252f0d2114fSYongseok Koh  */
253f0d2114fSYongseok Koh int __attribute__((cold))
254af4f09f2SNélio Laranjeiro mlx5_check_raw_vec_tx_support(struct rte_eth_dev *dev)
255f0d2114fSYongseok Koh {
256dbccb4cdSShahaf Shuler 	uint64_t offloads = dev->data->dev_conf.txmode.offloads;
257f0d2114fSYongseok Koh 
258dbccb4cdSShahaf Shuler 	/* Doesn't support any offload. */
259dbccb4cdSShahaf Shuler 	if (offloads)
260f0d2114fSYongseok Koh 		return -ENOTSUP;
261f0d2114fSYongseok Koh 	return 1;
262f0d2114fSYongseok Koh }
263f0d2114fSYongseok Koh 
264f0d2114fSYongseok Koh /**
265f0d2114fSYongseok Koh  * Check a device can support vectorized TX.
266f0d2114fSYongseok Koh  *
267dbccb4cdSShahaf Shuler  * @param dev
268af4f09f2SNélio Laranjeiro  *   Pointer to Ethernet device.
269f0d2114fSYongseok Koh  *
270f0d2114fSYongseok Koh  * @return
271f0d2114fSYongseok Koh  *   1 if supported, negative errno value if not.
272f0d2114fSYongseok Koh  */
273f0d2114fSYongseok Koh int __attribute__((cold))
274af4f09f2SNélio Laranjeiro mlx5_check_vec_tx_support(struct rte_eth_dev *dev)
275f0d2114fSYongseok Koh {
276*dbeba4cfSThomas Monjalon 	struct mlx5_priv *priv = dev->data->dev_private;
277dbccb4cdSShahaf Shuler 	uint64_t offloads = dev->data->dev_conf.txmode.offloads;
278dbccb4cdSShahaf Shuler 
2797fe24446SShahaf Shuler 	if (!priv->config.tx_vec_en ||
28009d8b416SYongseok Koh 	    priv->txqs_n > (unsigned int)priv->config.txqs_vec ||
2817fe24446SShahaf Shuler 	    priv->config.mps != MLX5_MPW_ENHANCED ||
282dbccb4cdSShahaf Shuler 	    offloads & ~MLX5_VEC_TX_OFFLOAD_CAP)
283f0d2114fSYongseok Koh 		return -ENOTSUP;
284f0d2114fSYongseok Koh 	return 1;
285f0d2114fSYongseok Koh }
286f0d2114fSYongseok Koh 
287f0d2114fSYongseok Koh /**
288f0d2114fSYongseok Koh  * Check a RX queue can support vectorized RX.
289f0d2114fSYongseok Koh  *
290f0d2114fSYongseok Koh  * @param rxq
291f0d2114fSYongseok Koh  *   Pointer to RX queue.
292f0d2114fSYongseok Koh  *
293f0d2114fSYongseok Koh  * @return
294f0d2114fSYongseok Koh  *   1 if supported, negative errno value if not.
295f0d2114fSYongseok Koh  */
296f0d2114fSYongseok Koh int __attribute__((cold))
297af4f09f2SNélio Laranjeiro mlx5_rxq_check_vec_support(struct mlx5_rxq_data *rxq)
298f0d2114fSYongseok Koh {
299f0d2114fSYongseok Koh 	struct mlx5_rxq_ctrl *ctrl =
300f0d2114fSYongseok Koh 		container_of(rxq, struct mlx5_rxq_ctrl, rxq);
301f0d2114fSYongseok Koh 
3027d6bf6b8SYongseok Koh 	if (mlx5_mprq_enabled(ETH_DEV(ctrl->priv)))
3037d6bf6b8SYongseok Koh 		return -ENOTSUP;
3047fe24446SShahaf Shuler 	if (!ctrl->priv->config.rx_vec_en || rxq->sges_n != 0)
305f0d2114fSYongseok Koh 		return -ENOTSUP;
306f0d2114fSYongseok Koh 	return 1;
307f0d2114fSYongseok Koh }
308f0d2114fSYongseok Koh 
309f0d2114fSYongseok Koh /**
310f0d2114fSYongseok Koh  * Check a device can support vectorized RX.
311f0d2114fSYongseok Koh  *
312af4f09f2SNélio Laranjeiro  * @param dev
313af4f09f2SNélio Laranjeiro  *   Pointer to Ethernet device.
314f0d2114fSYongseok Koh  *
315f0d2114fSYongseok Koh  * @return
316f0d2114fSYongseok Koh  *   1 if supported, negative errno value if not.
317f0d2114fSYongseok Koh  */
318f0d2114fSYongseok Koh int __attribute__((cold))
319af4f09f2SNélio Laranjeiro mlx5_check_vec_rx_support(struct rte_eth_dev *dev)
320f0d2114fSYongseok Koh {
321*dbeba4cfSThomas Monjalon 	struct mlx5_priv *priv = dev->data->dev_private;
322f0d2114fSYongseok Koh 	uint16_t i;
323f0d2114fSYongseok Koh 
3247fe24446SShahaf Shuler 	if (!priv->config.rx_vec_en)
325f0d2114fSYongseok Koh 		return -ENOTSUP;
3267d6bf6b8SYongseok Koh 	if (mlx5_mprq_enabled(dev))
3277d6bf6b8SYongseok Koh 		return -ENOTSUP;
328f0d2114fSYongseok Koh 	/* All the configured queues should support. */
329f0d2114fSYongseok Koh 	for (i = 0; i < priv->rxqs_n; ++i) {
330f0d2114fSYongseok Koh 		struct mlx5_rxq_data *rxq = (*priv->rxqs)[i];
331f0d2114fSYongseok Koh 
332f0d2114fSYongseok Koh 		if (!rxq)
333f0d2114fSYongseok Koh 			continue;
334af4f09f2SNélio Laranjeiro 		if (mlx5_rxq_check_vec_support(rxq) < 0)
335f0d2114fSYongseok Koh 			break;
336f0d2114fSYongseok Koh 	}
337f0d2114fSYongseok Koh 	if (i != priv->rxqs_n)
338f0d2114fSYongseok Koh 		return -ENOTSUP;
339f0d2114fSYongseok Koh 	return 1;
340f0d2114fSYongseok Koh }
341