xref: /dpdk/drivers/net/mlx5/mlx5_rxtx_vec.c (revision af4f09f28294fac762ff413fbf14b48c42c128fd)
18fd92a66SOlivier Matz /* SPDX-License-Identifier: BSD-3-Clause
2f0d2114fSYongseok Koh  * Copyright 2017 6WIND S.A.
3f0d2114fSYongseok Koh  * Copyright 2017 Mellanox.
4f0d2114fSYongseok Koh  */
5f0d2114fSYongseok Koh 
6f0d2114fSYongseok Koh #include <assert.h>
7f0d2114fSYongseok Koh #include <stdint.h>
8f0d2114fSYongseok Koh #include <string.h>
9f0d2114fSYongseok Koh #include <stdlib.h>
10f0d2114fSYongseok Koh 
11f0d2114fSYongseok Koh /* Verbs header. */
12f0d2114fSYongseok Koh /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
13f0d2114fSYongseok Koh #ifdef PEDANTIC
14f0d2114fSYongseok Koh #pragma GCC diagnostic ignored "-Wpedantic"
15f0d2114fSYongseok Koh #endif
16f0d2114fSYongseok Koh #include <infiniband/verbs.h>
17f0d2114fSYongseok Koh #include <infiniband/mlx5dv.h>
18f0d2114fSYongseok Koh #ifdef PEDANTIC
19f0d2114fSYongseok Koh #pragma GCC diagnostic error "-Wpedantic"
20f0d2114fSYongseok Koh #endif
21f0d2114fSYongseok Koh 
22f0d2114fSYongseok Koh #include <rte_mbuf.h>
23f0d2114fSYongseok Koh #include <rte_mempool.h>
24f0d2114fSYongseok Koh #include <rte_prefetch.h>
25f0d2114fSYongseok Koh 
26f0d2114fSYongseok Koh #include "mlx5.h"
27f0d2114fSYongseok Koh #include "mlx5_utils.h"
28f0d2114fSYongseok Koh #include "mlx5_rxtx.h"
295bfc9fc1SYongseok Koh #include "mlx5_rxtx_vec.h"
30f0d2114fSYongseok Koh #include "mlx5_autoconf.h"
31f0d2114fSYongseok Koh #include "mlx5_defs.h"
32f0d2114fSYongseok Koh #include "mlx5_prm.h"
33f0d2114fSYongseok Koh 
34570acdb1SYongseok Koh #if defined RTE_ARCH_X86_64
353c2ddbd4SYongseok Koh #include "mlx5_rxtx_vec_sse.h"
36570acdb1SYongseok Koh #elif defined RTE_ARCH_ARM64
37570acdb1SYongseok Koh #include "mlx5_rxtx_vec_neon.h"
383c2ddbd4SYongseok Koh #else
393c2ddbd4SYongseok Koh #error "This should not be compiled if SIMD instructions are not supported."
40f0d2114fSYongseok Koh #endif
41f0d2114fSYongseok Koh 
42f0d2114fSYongseok Koh /**
43f0d2114fSYongseok Koh  * Count the number of packets having same ol_flags and calculate cs_flags.
44f0d2114fSYongseok Koh  *
45f0d2114fSYongseok Koh  * @param txq
46f0d2114fSYongseok Koh  *   Pointer to TX queue structure.
47f0d2114fSYongseok Koh  * @param pkts
48f0d2114fSYongseok Koh  *   Pointer to array of packets.
49f0d2114fSYongseok Koh  * @param pkts_n
50f0d2114fSYongseok Koh  *   Number of packets.
51f0d2114fSYongseok Koh  * @param cs_flags
52f0d2114fSYongseok Koh  *   Pointer of flags to be returned.
53f0d2114fSYongseok Koh  *
54f0d2114fSYongseok Koh  * @return
55f0d2114fSYongseok Koh  *   Number of packets having same ol_flags.
56f0d2114fSYongseok Koh  */
57f0d2114fSYongseok Koh static inline unsigned int
58f0d2114fSYongseok Koh txq_calc_offload(struct mlx5_txq_data *txq, struct rte_mbuf **pkts,
59f0d2114fSYongseok Koh 		 uint16_t pkts_n, uint8_t *cs_flags)
60f0d2114fSYongseok Koh {
61f0d2114fSYongseok Koh 	unsigned int pos;
62f0d2114fSYongseok Koh 	const uint64_t ol_mask =
63f0d2114fSYongseok Koh 		PKT_TX_IP_CKSUM | PKT_TX_TCP_CKSUM |
64f0d2114fSYongseok Koh 		PKT_TX_UDP_CKSUM | PKT_TX_TUNNEL_GRE |
65f0d2114fSYongseok Koh 		PKT_TX_TUNNEL_VXLAN | PKT_TX_OUTER_IP_CKSUM;
66f0d2114fSYongseok Koh 
67f0d2114fSYongseok Koh 	if (!pkts_n)
68f0d2114fSYongseok Koh 		return 0;
69f0d2114fSYongseok Koh 	/* Count the number of packets having same ol_flags. */
70f0d2114fSYongseok Koh 	for (pos = 1; pos < pkts_n; ++pos)
71f0d2114fSYongseok Koh 		if ((pkts[pos]->ol_flags ^ pkts[0]->ol_flags) & ol_mask)
72f0d2114fSYongseok Koh 			break;
734aa15eb1SNélio Laranjeiro 	*cs_flags = txq_ol_cksum_to_cs(txq, pkts[0]);
74f0d2114fSYongseok Koh 	return pos;
75f0d2114fSYongseok Koh }
76f0d2114fSYongseok Koh 
77f0d2114fSYongseok Koh /**
78f0d2114fSYongseok Koh  * DPDK callback for vectorized TX.
79f0d2114fSYongseok Koh  *
80f0d2114fSYongseok Koh  * @param dpdk_txq
81f0d2114fSYongseok Koh  *   Generic pointer to TX queue structure.
82f0d2114fSYongseok Koh  * @param[in] pkts
83f0d2114fSYongseok Koh  *   Packets to transmit.
84f0d2114fSYongseok Koh  * @param pkts_n
85f0d2114fSYongseok Koh  *   Number of packets in array.
86f0d2114fSYongseok Koh  *
87f0d2114fSYongseok Koh  * @return
88f0d2114fSYongseok Koh  *   Number of packets successfully transmitted (<= pkts_n).
89f0d2114fSYongseok Koh  */
90f0d2114fSYongseok Koh uint16_t
91f0d2114fSYongseok Koh mlx5_tx_burst_raw_vec(void *dpdk_txq, struct rte_mbuf **pkts,
92f0d2114fSYongseok Koh 		      uint16_t pkts_n)
93f0d2114fSYongseok Koh {
94f0d2114fSYongseok Koh 	struct mlx5_txq_data *txq = (struct mlx5_txq_data *)dpdk_txq;
95f0d2114fSYongseok Koh 	uint16_t nb_tx = 0;
96f0d2114fSYongseok Koh 
97f0d2114fSYongseok Koh 	while (pkts_n > nb_tx) {
98f0d2114fSYongseok Koh 		uint16_t n;
99f0d2114fSYongseok Koh 		uint16_t ret;
100f0d2114fSYongseok Koh 
101f0d2114fSYongseok Koh 		n = RTE_MIN((uint16_t)(pkts_n - nb_tx), MLX5_VPMD_TX_MAX_BURST);
102f0d2114fSYongseok Koh 		ret = txq_burst_v(txq, &pkts[nb_tx], n, 0);
103f0d2114fSYongseok Koh 		nb_tx += ret;
104f0d2114fSYongseok Koh 		if (!ret)
105f0d2114fSYongseok Koh 			break;
106f0d2114fSYongseok Koh 	}
107f0d2114fSYongseok Koh 	return nb_tx;
108f0d2114fSYongseok Koh }
109f0d2114fSYongseok Koh 
110f0d2114fSYongseok Koh /**
111f0d2114fSYongseok Koh  * DPDK callback for vectorized TX with multi-seg packets and offload.
112f0d2114fSYongseok Koh  *
113f0d2114fSYongseok Koh  * @param dpdk_txq
114f0d2114fSYongseok Koh  *   Generic pointer to TX queue structure.
115f0d2114fSYongseok Koh  * @param[in] pkts
116f0d2114fSYongseok Koh  *   Packets to transmit.
117f0d2114fSYongseok Koh  * @param pkts_n
118f0d2114fSYongseok Koh  *   Number of packets in array.
119f0d2114fSYongseok Koh  *
120f0d2114fSYongseok Koh  * @return
121f0d2114fSYongseok Koh  *   Number of packets successfully transmitted (<= pkts_n).
122f0d2114fSYongseok Koh  */
123f0d2114fSYongseok Koh uint16_t
124f0d2114fSYongseok Koh mlx5_tx_burst_vec(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
125f0d2114fSYongseok Koh {
126f0d2114fSYongseok Koh 	struct mlx5_txq_data *txq = (struct mlx5_txq_data *)dpdk_txq;
127f0d2114fSYongseok Koh 	uint16_t nb_tx = 0;
128f0d2114fSYongseok Koh 
129f0d2114fSYongseok Koh 	while (pkts_n > nb_tx) {
130f0d2114fSYongseok Koh 		uint8_t cs_flags = 0;
131f0d2114fSYongseok Koh 		uint16_t n;
132f0d2114fSYongseok Koh 		uint16_t ret;
133f0d2114fSYongseok Koh 
134f0d2114fSYongseok Koh 		/* Transmit multi-seg packets in the head of pkts list. */
135dbccb4cdSShahaf Shuler 		if ((txq->offloads & DEV_TX_OFFLOAD_MULTI_SEGS) &&
136f0d2114fSYongseok Koh 		    NB_SEGS(pkts[nb_tx]) > 1)
137f0d2114fSYongseok Koh 			nb_tx += txq_scatter_v(txq,
138f0d2114fSYongseok Koh 					       &pkts[nb_tx],
139f0d2114fSYongseok Koh 					       pkts_n - nb_tx);
140f0d2114fSYongseok Koh 		n = RTE_MIN((uint16_t)(pkts_n - nb_tx), MLX5_VPMD_TX_MAX_BURST);
141dbccb4cdSShahaf Shuler 		if (txq->offloads & DEV_TX_OFFLOAD_MULTI_SEGS)
1424b0d7b7fSYongseok Koh 			n = txq_count_contig_single_seg(&pkts[nb_tx], n);
143dbccb4cdSShahaf Shuler 		if (txq->offloads & MLX5_VEC_TX_CKSUM_OFFLOAD_CAP)
144f0d2114fSYongseok Koh 			n = txq_calc_offload(txq, &pkts[nb_tx], n, &cs_flags);
145f0d2114fSYongseok Koh 		ret = txq_burst_v(txq, &pkts[nb_tx], n, cs_flags);
146f0d2114fSYongseok Koh 		nb_tx += ret;
147f0d2114fSYongseok Koh 		if (!ret)
148f0d2114fSYongseok Koh 			break;
149f0d2114fSYongseok Koh 	}
150f0d2114fSYongseok Koh 	return nb_tx;
151f0d2114fSYongseok Koh }
152f0d2114fSYongseok Koh 
153f0d2114fSYongseok Koh /**
154f0d2114fSYongseok Koh  * Skip error packets.
155f0d2114fSYongseok Koh  *
156f0d2114fSYongseok Koh  * @param rxq
157f0d2114fSYongseok Koh  *   Pointer to RX queue structure.
158f0d2114fSYongseok Koh  * @param[out] pkts
159f0d2114fSYongseok Koh  *   Array to store received packets.
160f0d2114fSYongseok Koh  * @param pkts_n
161f0d2114fSYongseok Koh  *   Maximum number of packets in array.
162f0d2114fSYongseok Koh  *
163f0d2114fSYongseok Koh  * @return
164f0d2114fSYongseok Koh  *   Number of packets successfully received (<= pkts_n).
165f0d2114fSYongseok Koh  */
166f0d2114fSYongseok Koh static uint16_t
167f0d2114fSYongseok Koh rxq_handle_pending_error(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts,
168f0d2114fSYongseok Koh 			 uint16_t pkts_n)
169f0d2114fSYongseok Koh {
170f0d2114fSYongseok Koh 	uint16_t n = 0;
171f0d2114fSYongseok Koh 	unsigned int i;
172f0d2114fSYongseok Koh #ifdef MLX5_PMD_SOFT_COUNTERS
173f0d2114fSYongseok Koh 	uint32_t err_bytes = 0;
174f0d2114fSYongseok Koh #endif
175f0d2114fSYongseok Koh 
176f0d2114fSYongseok Koh 	for (i = 0; i < pkts_n; ++i) {
177f0d2114fSYongseok Koh 		struct rte_mbuf *pkt = pkts[i];
178f0d2114fSYongseok Koh 
179f0d2114fSYongseok Koh 		if (pkt->packet_type == RTE_PTYPE_ALL_MASK) {
180f0d2114fSYongseok Koh #ifdef MLX5_PMD_SOFT_COUNTERS
181f0d2114fSYongseok Koh 			err_bytes += PKT_LEN(pkt);
182f0d2114fSYongseok Koh #endif
183f0d2114fSYongseok Koh 			rte_pktmbuf_free_seg(pkt);
184f0d2114fSYongseok Koh 		} else {
185f0d2114fSYongseok Koh 			pkts[n++] = pkt;
186f0d2114fSYongseok Koh 		}
187f0d2114fSYongseok Koh 	}
188f0d2114fSYongseok Koh 	rxq->stats.idropped += (pkts_n - n);
189f0d2114fSYongseok Koh #ifdef MLX5_PMD_SOFT_COUNTERS
190f0d2114fSYongseok Koh 	/* Correct counters of errored completions. */
191f0d2114fSYongseok Koh 	rxq->stats.ipackets -= (pkts_n - n);
192f0d2114fSYongseok Koh 	rxq->stats.ibytes -= err_bytes;
193f0d2114fSYongseok Koh #endif
194f0d2114fSYongseok Koh 	return n;
195f0d2114fSYongseok Koh }
196f0d2114fSYongseok Koh 
197f0d2114fSYongseok Koh /**
198f0d2114fSYongseok Koh  * DPDK callback for vectorized RX.
199f0d2114fSYongseok Koh  *
200f0d2114fSYongseok Koh  * @param dpdk_rxq
201f0d2114fSYongseok Koh  *   Generic pointer to RX queue structure.
202f0d2114fSYongseok Koh  * @param[out] pkts
203f0d2114fSYongseok Koh  *   Array to store received packets.
204f0d2114fSYongseok Koh  * @param pkts_n
205f0d2114fSYongseok Koh  *   Maximum number of packets in array.
206f0d2114fSYongseok Koh  *
207f0d2114fSYongseok Koh  * @return
208f0d2114fSYongseok Koh  *   Number of packets successfully received (<= pkts_n).
209f0d2114fSYongseok Koh  */
210f0d2114fSYongseok Koh uint16_t
211f0d2114fSYongseok Koh mlx5_rx_burst_vec(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
212f0d2114fSYongseok Koh {
213f0d2114fSYongseok Koh 	struct mlx5_rxq_data *rxq = dpdk_rxq;
214f0d2114fSYongseok Koh 	uint16_t nb_rx;
215d27fb0deSYongseok Koh 	uint64_t err = 0;
216f0d2114fSYongseok Koh 
217d27fb0deSYongseok Koh 	nb_rx = rxq_burst_v(rxq, pkts, pkts_n, &err);
218d27fb0deSYongseok Koh 	if (unlikely(err))
219f0d2114fSYongseok Koh 		nb_rx = rxq_handle_pending_error(rxq, pkts, nb_rx);
220f0d2114fSYongseok Koh 	return nb_rx;
221f0d2114fSYongseok Koh }
222f0d2114fSYongseok Koh 
223f0d2114fSYongseok Koh /**
224f0d2114fSYongseok Koh  * Check Tx queue flags are set for raw vectorized Tx.
225f0d2114fSYongseok Koh  *
226dbccb4cdSShahaf Shuler  * @param dev
227*af4f09f2SNélio Laranjeiro  *   Pointer to Ethernet device.
228f0d2114fSYongseok Koh  *
229f0d2114fSYongseok Koh  * @return
230f0d2114fSYongseok Koh  *   1 if supported, negative errno value if not.
231f0d2114fSYongseok Koh  */
232f0d2114fSYongseok Koh int __attribute__((cold))
233*af4f09f2SNélio Laranjeiro mlx5_check_raw_vec_tx_support(struct rte_eth_dev *dev)
234f0d2114fSYongseok Koh {
235dbccb4cdSShahaf Shuler 	uint64_t offloads = dev->data->dev_conf.txmode.offloads;
236f0d2114fSYongseok Koh 
237dbccb4cdSShahaf Shuler 	/* Doesn't support any offload. */
238dbccb4cdSShahaf Shuler 	if (offloads)
239f0d2114fSYongseok Koh 		return -ENOTSUP;
240f0d2114fSYongseok Koh 	return 1;
241f0d2114fSYongseok Koh }
242f0d2114fSYongseok Koh 
243f0d2114fSYongseok Koh /**
244f0d2114fSYongseok Koh  * Check a device can support vectorized TX.
245f0d2114fSYongseok Koh  *
246dbccb4cdSShahaf Shuler  * @param dev
247*af4f09f2SNélio Laranjeiro  *   Pointer to Ethernet device.
248f0d2114fSYongseok Koh  *
249f0d2114fSYongseok Koh  * @return
250f0d2114fSYongseok Koh  *   1 if supported, negative errno value if not.
251f0d2114fSYongseok Koh  */
252f0d2114fSYongseok Koh int __attribute__((cold))
253*af4f09f2SNélio Laranjeiro mlx5_check_vec_tx_support(struct rte_eth_dev *dev)
254f0d2114fSYongseok Koh {
255*af4f09f2SNélio Laranjeiro 	struct priv *priv = dev->data->dev_private;
256dbccb4cdSShahaf Shuler 	uint64_t offloads = dev->data->dev_conf.txmode.offloads;
257dbccb4cdSShahaf Shuler 
2587fe24446SShahaf Shuler 	if (!priv->config.tx_vec_en ||
259f0d2114fSYongseok Koh 	    priv->txqs_n > MLX5_VPMD_MIN_TXQS ||
2607fe24446SShahaf Shuler 	    priv->config.mps != MLX5_MPW_ENHANCED ||
261dbccb4cdSShahaf Shuler 	    offloads & ~MLX5_VEC_TX_OFFLOAD_CAP)
262f0d2114fSYongseok Koh 		return -ENOTSUP;
263f0d2114fSYongseok Koh 	return 1;
264f0d2114fSYongseok Koh }
265f0d2114fSYongseok Koh 
266f0d2114fSYongseok Koh /**
267f0d2114fSYongseok Koh  * Check a RX queue can support vectorized RX.
268f0d2114fSYongseok Koh  *
269f0d2114fSYongseok Koh  * @param rxq
270f0d2114fSYongseok Koh  *   Pointer to RX queue.
271f0d2114fSYongseok Koh  *
272f0d2114fSYongseok Koh  * @return
273f0d2114fSYongseok Koh  *   1 if supported, negative errno value if not.
274f0d2114fSYongseok Koh  */
275f0d2114fSYongseok Koh int __attribute__((cold))
276*af4f09f2SNélio Laranjeiro mlx5_rxq_check_vec_support(struct mlx5_rxq_data *rxq)
277f0d2114fSYongseok Koh {
278f0d2114fSYongseok Koh 	struct mlx5_rxq_ctrl *ctrl =
279f0d2114fSYongseok Koh 		container_of(rxq, struct mlx5_rxq_ctrl, rxq);
280f0d2114fSYongseok Koh 
2817fe24446SShahaf Shuler 	if (!ctrl->priv->config.rx_vec_en || rxq->sges_n != 0)
282f0d2114fSYongseok Koh 		return -ENOTSUP;
283f0d2114fSYongseok Koh 	return 1;
284f0d2114fSYongseok Koh }
285f0d2114fSYongseok Koh 
286f0d2114fSYongseok Koh /**
287f0d2114fSYongseok Koh  * Check a device can support vectorized RX.
288f0d2114fSYongseok Koh  *
289*af4f09f2SNélio Laranjeiro  * @param dev
290*af4f09f2SNélio Laranjeiro  *   Pointer to Ethernet device.
291f0d2114fSYongseok Koh  *
292f0d2114fSYongseok Koh  * @return
293f0d2114fSYongseok Koh  *   1 if supported, negative errno value if not.
294f0d2114fSYongseok Koh  */
295f0d2114fSYongseok Koh int __attribute__((cold))
296*af4f09f2SNélio Laranjeiro mlx5_check_vec_rx_support(struct rte_eth_dev *dev)
297f0d2114fSYongseok Koh {
298*af4f09f2SNélio Laranjeiro 	struct priv *priv = dev->data->dev_private;
299f0d2114fSYongseok Koh 	uint16_t i;
300f0d2114fSYongseok Koh 
3017fe24446SShahaf Shuler 	if (!priv->config.rx_vec_en)
302f0d2114fSYongseok Koh 		return -ENOTSUP;
303f0d2114fSYongseok Koh 	/* All the configured queues should support. */
304f0d2114fSYongseok Koh 	for (i = 0; i < priv->rxqs_n; ++i) {
305f0d2114fSYongseok Koh 		struct mlx5_rxq_data *rxq = (*priv->rxqs)[i];
306f0d2114fSYongseok Koh 
307f0d2114fSYongseok Koh 		if (!rxq)
308f0d2114fSYongseok Koh 			continue;
309*af4f09f2SNélio Laranjeiro 		if (mlx5_rxq_check_vec_support(rxq) < 0)
310f0d2114fSYongseok Koh 			break;
311f0d2114fSYongseok Koh 	}
312f0d2114fSYongseok Koh 	if (i != priv->rxqs_n)
313f0d2114fSYongseok Koh 		return -ENOTSUP;
314f0d2114fSYongseok Koh 	return 1;
315f0d2114fSYongseok Koh }
316