xref: /dpdk/drivers/net/mlx5/mlx5_rxtx_vec.c (revision 7d6bf6b866b8c25ec06539b3eeed1db4f785577c)
18fd92a66SOlivier Matz /* SPDX-License-Identifier: BSD-3-Clause
2f0d2114fSYongseok Koh  * Copyright 2017 6WIND S.A.
35feecc57SShahaf Shuler  * Copyright 2017 Mellanox Technologies, Ltd
4f0d2114fSYongseok Koh  */
5f0d2114fSYongseok Koh 
6f0d2114fSYongseok Koh #include <assert.h>
7f0d2114fSYongseok Koh #include <stdint.h>
8f0d2114fSYongseok Koh #include <string.h>
9f0d2114fSYongseok Koh #include <stdlib.h>
10f0d2114fSYongseok Koh 
11f0d2114fSYongseok Koh /* Verbs header. */
12f0d2114fSYongseok Koh /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
13f0d2114fSYongseok Koh #ifdef PEDANTIC
14f0d2114fSYongseok Koh #pragma GCC diagnostic ignored "-Wpedantic"
15f0d2114fSYongseok Koh #endif
16f0d2114fSYongseok Koh #include <infiniband/verbs.h>
17f0d2114fSYongseok Koh #include <infiniband/mlx5dv.h>
18f0d2114fSYongseok Koh #ifdef PEDANTIC
19f0d2114fSYongseok Koh #pragma GCC diagnostic error "-Wpedantic"
20f0d2114fSYongseok Koh #endif
21f0d2114fSYongseok Koh 
22f0d2114fSYongseok Koh #include <rte_mbuf.h>
23f0d2114fSYongseok Koh #include <rte_mempool.h>
24f0d2114fSYongseok Koh #include <rte_prefetch.h>
25f0d2114fSYongseok Koh 
26f0d2114fSYongseok Koh #include "mlx5.h"
27f0d2114fSYongseok Koh #include "mlx5_utils.h"
28f0d2114fSYongseok Koh #include "mlx5_rxtx.h"
295bfc9fc1SYongseok Koh #include "mlx5_rxtx_vec.h"
30f0d2114fSYongseok Koh #include "mlx5_autoconf.h"
31f0d2114fSYongseok Koh #include "mlx5_defs.h"
32f0d2114fSYongseok Koh #include "mlx5_prm.h"
33f0d2114fSYongseok Koh 
34570acdb1SYongseok Koh #if defined RTE_ARCH_X86_64
353c2ddbd4SYongseok Koh #include "mlx5_rxtx_vec_sse.h"
36570acdb1SYongseok Koh #elif defined RTE_ARCH_ARM64
37570acdb1SYongseok Koh #include "mlx5_rxtx_vec_neon.h"
383c2ddbd4SYongseok Koh #else
393c2ddbd4SYongseok Koh #error "This should not be compiled if SIMD instructions are not supported."
40f0d2114fSYongseok Koh #endif
41f0d2114fSYongseok Koh 
42f0d2114fSYongseok Koh /**
43f0d2114fSYongseok Koh  * Count the number of packets having same ol_flags and calculate cs_flags.
44f0d2114fSYongseok Koh  *
45f0d2114fSYongseok Koh  * @param pkts
46f0d2114fSYongseok Koh  *   Pointer to array of packets.
47f0d2114fSYongseok Koh  * @param pkts_n
48f0d2114fSYongseok Koh  *   Number of packets.
49f0d2114fSYongseok Koh  * @param cs_flags
50f0d2114fSYongseok Koh  *   Pointer of flags to be returned.
51f0d2114fSYongseok Koh  *
52f0d2114fSYongseok Koh  * @return
53f0d2114fSYongseok Koh  *   Number of packets having same ol_flags.
54f0d2114fSYongseok Koh  */
55f0d2114fSYongseok Koh static inline unsigned int
565f8ba81cSXueming Li txq_calc_offload(struct rte_mbuf **pkts, uint16_t pkts_n, uint8_t *cs_flags)
57f0d2114fSYongseok Koh {
58f0d2114fSYongseok Koh 	unsigned int pos;
59f0d2114fSYongseok Koh 	const uint64_t ol_mask =
60f0d2114fSYongseok Koh 		PKT_TX_IP_CKSUM | PKT_TX_TCP_CKSUM |
61f0d2114fSYongseok Koh 		PKT_TX_UDP_CKSUM | PKT_TX_TUNNEL_GRE |
62f0d2114fSYongseok Koh 		PKT_TX_TUNNEL_VXLAN | PKT_TX_OUTER_IP_CKSUM;
63f0d2114fSYongseok Koh 
64f0d2114fSYongseok Koh 	if (!pkts_n)
65f0d2114fSYongseok Koh 		return 0;
66f0d2114fSYongseok Koh 	/* Count the number of packets having same ol_flags. */
67f0d2114fSYongseok Koh 	for (pos = 1; pos < pkts_n; ++pos)
68f0d2114fSYongseok Koh 		if ((pkts[pos]->ol_flags ^ pkts[0]->ol_flags) & ol_mask)
69f0d2114fSYongseok Koh 			break;
705f8ba81cSXueming Li 	*cs_flags = txq_ol_cksum_to_cs(pkts[0]);
71f0d2114fSYongseok Koh 	return pos;
72f0d2114fSYongseok Koh }
73f0d2114fSYongseok Koh 
74f0d2114fSYongseok Koh /**
75f0d2114fSYongseok Koh  * DPDK callback for vectorized TX.
76f0d2114fSYongseok Koh  *
77f0d2114fSYongseok Koh  * @param dpdk_txq
78f0d2114fSYongseok Koh  *   Generic pointer to TX queue structure.
79f0d2114fSYongseok Koh  * @param[in] pkts
80f0d2114fSYongseok Koh  *   Packets to transmit.
81f0d2114fSYongseok Koh  * @param pkts_n
82f0d2114fSYongseok Koh  *   Number of packets in array.
83f0d2114fSYongseok Koh  *
84f0d2114fSYongseok Koh  * @return
85f0d2114fSYongseok Koh  *   Number of packets successfully transmitted (<= pkts_n).
86f0d2114fSYongseok Koh  */
87f0d2114fSYongseok Koh uint16_t
88f0d2114fSYongseok Koh mlx5_tx_burst_raw_vec(void *dpdk_txq, struct rte_mbuf **pkts,
89f0d2114fSYongseok Koh 		      uint16_t pkts_n)
90f0d2114fSYongseok Koh {
91f0d2114fSYongseok Koh 	struct mlx5_txq_data *txq = (struct mlx5_txq_data *)dpdk_txq;
92f0d2114fSYongseok Koh 	uint16_t nb_tx = 0;
93f0d2114fSYongseok Koh 
94f0d2114fSYongseok Koh 	while (pkts_n > nb_tx) {
95f0d2114fSYongseok Koh 		uint16_t n;
96f0d2114fSYongseok Koh 		uint16_t ret;
97f0d2114fSYongseok Koh 
98f0d2114fSYongseok Koh 		n = RTE_MIN((uint16_t)(pkts_n - nb_tx), MLX5_VPMD_TX_MAX_BURST);
99f0d2114fSYongseok Koh 		ret = txq_burst_v(txq, &pkts[nb_tx], n, 0);
100f0d2114fSYongseok Koh 		nb_tx += ret;
101f0d2114fSYongseok Koh 		if (!ret)
102f0d2114fSYongseok Koh 			break;
103f0d2114fSYongseok Koh 	}
104f0d2114fSYongseok Koh 	return nb_tx;
105f0d2114fSYongseok Koh }
106f0d2114fSYongseok Koh 
107f0d2114fSYongseok Koh /**
108f0d2114fSYongseok Koh  * DPDK callback for vectorized TX with multi-seg packets and offload.
109f0d2114fSYongseok Koh  *
110f0d2114fSYongseok Koh  * @param dpdk_txq
111f0d2114fSYongseok Koh  *   Generic pointer to TX queue structure.
112f0d2114fSYongseok Koh  * @param[in] pkts
113f0d2114fSYongseok Koh  *   Packets to transmit.
114f0d2114fSYongseok Koh  * @param pkts_n
115f0d2114fSYongseok Koh  *   Number of packets in array.
116f0d2114fSYongseok Koh  *
117f0d2114fSYongseok Koh  * @return
118f0d2114fSYongseok Koh  *   Number of packets successfully transmitted (<= pkts_n).
119f0d2114fSYongseok Koh  */
120f0d2114fSYongseok Koh uint16_t
121f0d2114fSYongseok Koh mlx5_tx_burst_vec(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
122f0d2114fSYongseok Koh {
123f0d2114fSYongseok Koh 	struct mlx5_txq_data *txq = (struct mlx5_txq_data *)dpdk_txq;
124f0d2114fSYongseok Koh 	uint16_t nb_tx = 0;
125f0d2114fSYongseok Koh 
126f0d2114fSYongseok Koh 	while (pkts_n > nb_tx) {
127f0d2114fSYongseok Koh 		uint8_t cs_flags = 0;
128f0d2114fSYongseok Koh 		uint16_t n;
129f0d2114fSYongseok Koh 		uint16_t ret;
130f0d2114fSYongseok Koh 
131f0d2114fSYongseok Koh 		/* Transmit multi-seg packets in the head of pkts list. */
132dbccb4cdSShahaf Shuler 		if ((txq->offloads & DEV_TX_OFFLOAD_MULTI_SEGS) &&
133f0d2114fSYongseok Koh 		    NB_SEGS(pkts[nb_tx]) > 1)
134f0d2114fSYongseok Koh 			nb_tx += txq_scatter_v(txq,
135f0d2114fSYongseok Koh 					       &pkts[nb_tx],
136f0d2114fSYongseok Koh 					       pkts_n - nb_tx);
137f0d2114fSYongseok Koh 		n = RTE_MIN((uint16_t)(pkts_n - nb_tx), MLX5_VPMD_TX_MAX_BURST);
138dbccb4cdSShahaf Shuler 		if (txq->offloads & DEV_TX_OFFLOAD_MULTI_SEGS)
1394b0d7b7fSYongseok Koh 			n = txq_count_contig_single_seg(&pkts[nb_tx], n);
140dbccb4cdSShahaf Shuler 		if (txq->offloads & MLX5_VEC_TX_CKSUM_OFFLOAD_CAP)
1415f8ba81cSXueming Li 			n = txq_calc_offload(&pkts[nb_tx], n, &cs_flags);
142f0d2114fSYongseok Koh 		ret = txq_burst_v(txq, &pkts[nb_tx], n, cs_flags);
143f0d2114fSYongseok Koh 		nb_tx += ret;
144f0d2114fSYongseok Koh 		if (!ret)
145f0d2114fSYongseok Koh 			break;
146f0d2114fSYongseok Koh 	}
147f0d2114fSYongseok Koh 	return nb_tx;
148f0d2114fSYongseok Koh }
149f0d2114fSYongseok Koh 
150f0d2114fSYongseok Koh /**
151f0d2114fSYongseok Koh  * Skip error packets.
152f0d2114fSYongseok Koh  *
153f0d2114fSYongseok Koh  * @param rxq
154f0d2114fSYongseok Koh  *   Pointer to RX queue structure.
155f0d2114fSYongseok Koh  * @param[out] pkts
156f0d2114fSYongseok Koh  *   Array to store received packets.
157f0d2114fSYongseok Koh  * @param pkts_n
158f0d2114fSYongseok Koh  *   Maximum number of packets in array.
159f0d2114fSYongseok Koh  *
160f0d2114fSYongseok Koh  * @return
161f0d2114fSYongseok Koh  *   Number of packets successfully received (<= pkts_n).
162f0d2114fSYongseok Koh  */
163f0d2114fSYongseok Koh static uint16_t
164f0d2114fSYongseok Koh rxq_handle_pending_error(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts,
165f0d2114fSYongseok Koh 			 uint16_t pkts_n)
166f0d2114fSYongseok Koh {
167f0d2114fSYongseok Koh 	uint16_t n = 0;
168f0d2114fSYongseok Koh 	unsigned int i;
169f0d2114fSYongseok Koh #ifdef MLX5_PMD_SOFT_COUNTERS
170f0d2114fSYongseok Koh 	uint32_t err_bytes = 0;
171f0d2114fSYongseok Koh #endif
172f0d2114fSYongseok Koh 
173f0d2114fSYongseok Koh 	for (i = 0; i < pkts_n; ++i) {
174f0d2114fSYongseok Koh 		struct rte_mbuf *pkt = pkts[i];
175f0d2114fSYongseok Koh 
176f0d2114fSYongseok Koh 		if (pkt->packet_type == RTE_PTYPE_ALL_MASK) {
177f0d2114fSYongseok Koh #ifdef MLX5_PMD_SOFT_COUNTERS
178f0d2114fSYongseok Koh 			err_bytes += PKT_LEN(pkt);
179f0d2114fSYongseok Koh #endif
180f0d2114fSYongseok Koh 			rte_pktmbuf_free_seg(pkt);
181f0d2114fSYongseok Koh 		} else {
182f0d2114fSYongseok Koh 			pkts[n++] = pkt;
183f0d2114fSYongseok Koh 		}
184f0d2114fSYongseok Koh 	}
185f0d2114fSYongseok Koh 	rxq->stats.idropped += (pkts_n - n);
186f0d2114fSYongseok Koh #ifdef MLX5_PMD_SOFT_COUNTERS
187f0d2114fSYongseok Koh 	/* Correct counters of errored completions. */
188f0d2114fSYongseok Koh 	rxq->stats.ipackets -= (pkts_n - n);
189f0d2114fSYongseok Koh 	rxq->stats.ibytes -= err_bytes;
190f0d2114fSYongseok Koh #endif
191f0d2114fSYongseok Koh 	return n;
192f0d2114fSYongseok Koh }
193f0d2114fSYongseok Koh 
194f0d2114fSYongseok Koh /**
195f0d2114fSYongseok Koh  * DPDK callback for vectorized RX.
196f0d2114fSYongseok Koh  *
197f0d2114fSYongseok Koh  * @param dpdk_rxq
198f0d2114fSYongseok Koh  *   Generic pointer to RX queue structure.
199f0d2114fSYongseok Koh  * @param[out] pkts
200f0d2114fSYongseok Koh  *   Array to store received packets.
201f0d2114fSYongseok Koh  * @param pkts_n
202f0d2114fSYongseok Koh  *   Maximum number of packets in array.
203f0d2114fSYongseok Koh  *
204f0d2114fSYongseok Koh  * @return
205f0d2114fSYongseok Koh  *   Number of packets successfully received (<= pkts_n).
206f0d2114fSYongseok Koh  */
207f0d2114fSYongseok Koh uint16_t
208f0d2114fSYongseok Koh mlx5_rx_burst_vec(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
209f0d2114fSYongseok Koh {
210f0d2114fSYongseok Koh 	struct mlx5_rxq_data *rxq = dpdk_rxq;
211f0d2114fSYongseok Koh 	uint16_t nb_rx;
212d27fb0deSYongseok Koh 	uint64_t err = 0;
213f0d2114fSYongseok Koh 
214d27fb0deSYongseok Koh 	nb_rx = rxq_burst_v(rxq, pkts, pkts_n, &err);
215d27fb0deSYongseok Koh 	if (unlikely(err))
216f0d2114fSYongseok Koh 		nb_rx = rxq_handle_pending_error(rxq, pkts, nb_rx);
217f0d2114fSYongseok Koh 	return nb_rx;
218f0d2114fSYongseok Koh }
219f0d2114fSYongseok Koh 
220f0d2114fSYongseok Koh /**
221f0d2114fSYongseok Koh  * Check Tx queue flags are set for raw vectorized Tx.
222f0d2114fSYongseok Koh  *
223dbccb4cdSShahaf Shuler  * @param dev
224af4f09f2SNélio Laranjeiro  *   Pointer to Ethernet device.
225f0d2114fSYongseok Koh  *
226f0d2114fSYongseok Koh  * @return
227f0d2114fSYongseok Koh  *   1 if supported, negative errno value if not.
228f0d2114fSYongseok Koh  */
229f0d2114fSYongseok Koh int __attribute__((cold))
230af4f09f2SNélio Laranjeiro mlx5_check_raw_vec_tx_support(struct rte_eth_dev *dev)
231f0d2114fSYongseok Koh {
232dbccb4cdSShahaf Shuler 	uint64_t offloads = dev->data->dev_conf.txmode.offloads;
233f0d2114fSYongseok Koh 
234dbccb4cdSShahaf Shuler 	/* Doesn't support any offload. */
235dbccb4cdSShahaf Shuler 	if (offloads)
236f0d2114fSYongseok Koh 		return -ENOTSUP;
237f0d2114fSYongseok Koh 	return 1;
238f0d2114fSYongseok Koh }
239f0d2114fSYongseok Koh 
240f0d2114fSYongseok Koh /**
241f0d2114fSYongseok Koh  * Check a device can support vectorized TX.
242f0d2114fSYongseok Koh  *
243dbccb4cdSShahaf Shuler  * @param dev
244af4f09f2SNélio Laranjeiro  *   Pointer to Ethernet device.
245f0d2114fSYongseok Koh  *
246f0d2114fSYongseok Koh  * @return
247f0d2114fSYongseok Koh  *   1 if supported, negative errno value if not.
248f0d2114fSYongseok Koh  */
249f0d2114fSYongseok Koh int __attribute__((cold))
250af4f09f2SNélio Laranjeiro mlx5_check_vec_tx_support(struct rte_eth_dev *dev)
251f0d2114fSYongseok Koh {
252af4f09f2SNélio Laranjeiro 	struct priv *priv = dev->data->dev_private;
253dbccb4cdSShahaf Shuler 	uint64_t offloads = dev->data->dev_conf.txmode.offloads;
254dbccb4cdSShahaf Shuler 
2557fe24446SShahaf Shuler 	if (!priv->config.tx_vec_en ||
256f0d2114fSYongseok Koh 	    priv->txqs_n > MLX5_VPMD_MIN_TXQS ||
2577fe24446SShahaf Shuler 	    priv->config.mps != MLX5_MPW_ENHANCED ||
258dbccb4cdSShahaf Shuler 	    offloads & ~MLX5_VEC_TX_OFFLOAD_CAP)
259f0d2114fSYongseok Koh 		return -ENOTSUP;
260f0d2114fSYongseok Koh 	return 1;
261f0d2114fSYongseok Koh }
262f0d2114fSYongseok Koh 
263f0d2114fSYongseok Koh /**
264f0d2114fSYongseok Koh  * Check a RX queue can support vectorized RX.
265f0d2114fSYongseok Koh  *
266f0d2114fSYongseok Koh  * @param rxq
267f0d2114fSYongseok Koh  *   Pointer to RX queue.
268f0d2114fSYongseok Koh  *
269f0d2114fSYongseok Koh  * @return
270f0d2114fSYongseok Koh  *   1 if supported, negative errno value if not.
271f0d2114fSYongseok Koh  */
272f0d2114fSYongseok Koh int __attribute__((cold))
273af4f09f2SNélio Laranjeiro mlx5_rxq_check_vec_support(struct mlx5_rxq_data *rxq)
274f0d2114fSYongseok Koh {
275f0d2114fSYongseok Koh 	struct mlx5_rxq_ctrl *ctrl =
276f0d2114fSYongseok Koh 		container_of(rxq, struct mlx5_rxq_ctrl, rxq);
277f0d2114fSYongseok Koh 
278*7d6bf6b8SYongseok Koh 	if (mlx5_mprq_enabled(ETH_DEV(ctrl->priv)))
279*7d6bf6b8SYongseok Koh 		return -ENOTSUP;
2807fe24446SShahaf Shuler 	if (!ctrl->priv->config.rx_vec_en || rxq->sges_n != 0)
281f0d2114fSYongseok Koh 		return -ENOTSUP;
282f0d2114fSYongseok Koh 	return 1;
283f0d2114fSYongseok Koh }
284f0d2114fSYongseok Koh 
285f0d2114fSYongseok Koh /**
286f0d2114fSYongseok Koh  * Check a device can support vectorized RX.
287f0d2114fSYongseok Koh  *
288af4f09f2SNélio Laranjeiro  * @param dev
289af4f09f2SNélio Laranjeiro  *   Pointer to Ethernet device.
290f0d2114fSYongseok Koh  *
291f0d2114fSYongseok Koh  * @return
292f0d2114fSYongseok Koh  *   1 if supported, negative errno value if not.
293f0d2114fSYongseok Koh  */
294f0d2114fSYongseok Koh int __attribute__((cold))
295af4f09f2SNélio Laranjeiro mlx5_check_vec_rx_support(struct rte_eth_dev *dev)
296f0d2114fSYongseok Koh {
297af4f09f2SNélio Laranjeiro 	struct priv *priv = dev->data->dev_private;
298f0d2114fSYongseok Koh 	uint16_t i;
299f0d2114fSYongseok Koh 
3007fe24446SShahaf Shuler 	if (!priv->config.rx_vec_en)
301f0d2114fSYongseok Koh 		return -ENOTSUP;
302*7d6bf6b8SYongseok Koh 	if (mlx5_mprq_enabled(dev))
303*7d6bf6b8SYongseok Koh 		return -ENOTSUP;
304f0d2114fSYongseok Koh 	/* All the configured queues should support. */
305f0d2114fSYongseok Koh 	for (i = 0; i < priv->rxqs_n; ++i) {
306f0d2114fSYongseok Koh 		struct mlx5_rxq_data *rxq = (*priv->rxqs)[i];
307f0d2114fSYongseok Koh 
308f0d2114fSYongseok Koh 		if (!rxq)
309f0d2114fSYongseok Koh 			continue;
310af4f09f2SNélio Laranjeiro 		if (mlx5_rxq_check_vec_support(rxq) < 0)
311f0d2114fSYongseok Koh 			break;
312f0d2114fSYongseok Koh 	}
313f0d2114fSYongseok Koh 	if (i != priv->rxqs_n)
314f0d2114fSYongseok Koh 		return -ENOTSUP;
315f0d2114fSYongseok Koh 	return 1;
316f0d2114fSYongseok Koh }
317