xref: /dpdk/drivers/net/mlx5/mlx5_rxtx_vec.c (revision fb73e096110a41b77448fe27fd9be8c489ec5d82)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2017 6WIND S.A.
3  * Copyright 2017 Mellanox Technologies, Ltd
4  */
5 
6 #include <assert.h>
7 #include <stdint.h>
8 #include <string.h>
9 #include <stdlib.h>
10 
11 /* Verbs header. */
12 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
13 #ifdef PEDANTIC
14 #pragma GCC diagnostic ignored "-Wpedantic"
15 #endif
16 #include <infiniband/verbs.h>
17 #include <infiniband/mlx5dv.h>
18 #ifdef PEDANTIC
19 #pragma GCC diagnostic error "-Wpedantic"
20 #endif
21 
22 #include <rte_mbuf.h>
23 #include <rte_mempool.h>
24 #include <rte_prefetch.h>
25 
26 #include "mlx5.h"
27 #include "mlx5_utils.h"
28 #include "mlx5_rxtx.h"
29 #include "mlx5_rxtx_vec.h"
30 #include "mlx5_autoconf.h"
31 #include "mlx5_defs.h"
32 #include "mlx5_prm.h"
33 
34 #if defined RTE_ARCH_X86_64
35 #include "mlx5_rxtx_vec_sse.h"
36 #elif defined RTE_ARCH_ARM64
37 #include "mlx5_rxtx_vec_neon.h"
38 #else
39 #error "This should not be compiled if SIMD instructions are not supported."
40 #endif
41 
42 /**
43  * Count the number of packets having same ol_flags and calculate cs_flags.
44  *
45  * @param txq
46  *   Pointer to TX queue structure.
47  * @param pkts
48  *   Pointer to array of packets.
49  * @param pkts_n
50  *   Number of packets.
51  * @param cs_flags
52  *   Pointer of flags to be returned.
53  *
54  * @return
55  *   Number of packets having same ol_flags.
56  */
57 static inline unsigned int
58 txq_calc_offload(struct mlx5_txq_data *txq, struct rte_mbuf **pkts,
59 		 uint16_t pkts_n, uint8_t *cs_flags)
60 {
61 	unsigned int pos;
62 	const uint64_t ol_mask =
63 		PKT_TX_IP_CKSUM | PKT_TX_TCP_CKSUM |
64 		PKT_TX_UDP_CKSUM | PKT_TX_TUNNEL_GRE |
65 		PKT_TX_TUNNEL_VXLAN | PKT_TX_OUTER_IP_CKSUM;
66 
67 	if (!pkts_n)
68 		return 0;
69 	/* Count the number of packets having same ol_flags. */
70 	for (pos = 1; pos < pkts_n; ++pos)
71 		if ((pkts[pos]->ol_flags ^ pkts[0]->ol_flags) & ol_mask)
72 			break;
73 	*cs_flags = txq_ol_cksum_to_cs(txq, pkts[0]);
74 	return pos;
75 }
76 
77 /**
78  * DPDK callback for vectorized TX.
79  *
80  * @param dpdk_txq
81  *   Generic pointer to TX queue structure.
82  * @param[in] pkts
83  *   Packets to transmit.
84  * @param pkts_n
85  *   Number of packets in array.
86  *
87  * @return
88  *   Number of packets successfully transmitted (<= pkts_n).
89  */
90 uint16_t
91 mlx5_tx_burst_raw_vec(void *dpdk_txq, struct rte_mbuf **pkts,
92 		      uint16_t pkts_n)
93 {
94 	struct mlx5_txq_data *txq = (struct mlx5_txq_data *)dpdk_txq;
95 	uint16_t nb_tx = 0;
96 
97 	while (pkts_n > nb_tx) {
98 		uint16_t n;
99 		uint16_t ret;
100 
101 		n = RTE_MIN((uint16_t)(pkts_n - nb_tx), MLX5_VPMD_TX_MAX_BURST);
102 		ret = txq_burst_v(txq, &pkts[nb_tx], n, 0);
103 		nb_tx += ret;
104 		if (!ret)
105 			break;
106 	}
107 	return nb_tx;
108 }
109 
110 /**
111  * DPDK callback for vectorized TX with multi-seg packets and offload.
112  *
113  * @param dpdk_txq
114  *   Generic pointer to TX queue structure.
115  * @param[in] pkts
116  *   Packets to transmit.
117  * @param pkts_n
118  *   Number of packets in array.
119  *
120  * @return
121  *   Number of packets successfully transmitted (<= pkts_n).
122  */
123 uint16_t
124 mlx5_tx_burst_vec(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
125 {
126 	struct mlx5_txq_data *txq = (struct mlx5_txq_data *)dpdk_txq;
127 	uint16_t nb_tx = 0;
128 
129 	while (pkts_n > nb_tx) {
130 		uint8_t cs_flags = 0;
131 		uint16_t n;
132 		uint16_t ret;
133 
134 		/* Transmit multi-seg packets in the head of pkts list. */
135 		if ((txq->offloads & DEV_TX_OFFLOAD_MULTI_SEGS) &&
136 		    NB_SEGS(pkts[nb_tx]) > 1)
137 			nb_tx += txq_scatter_v(txq,
138 					       &pkts[nb_tx],
139 					       pkts_n - nb_tx);
140 		n = RTE_MIN((uint16_t)(pkts_n - nb_tx), MLX5_VPMD_TX_MAX_BURST);
141 		if (txq->offloads & DEV_TX_OFFLOAD_MULTI_SEGS)
142 			n = txq_count_contig_single_seg(&pkts[nb_tx], n);
143 		if (txq->offloads & MLX5_VEC_TX_CKSUM_OFFLOAD_CAP)
144 			n = txq_calc_offload(txq, &pkts[nb_tx], n, &cs_flags);
145 		ret = txq_burst_v(txq, &pkts[nb_tx], n, cs_flags);
146 		nb_tx += ret;
147 		if (!ret)
148 			break;
149 	}
150 	return nb_tx;
151 }
152 
153 /**
154  * Skip error packets.
155  *
156  * @param rxq
157  *   Pointer to RX queue structure.
158  * @param[out] pkts
159  *   Array to store received packets.
160  * @param pkts_n
161  *   Maximum number of packets in array.
162  *
163  * @return
164  *   Number of packets successfully received (<= pkts_n).
165  */
166 static uint16_t
167 rxq_handle_pending_error(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts,
168 			 uint16_t pkts_n)
169 {
170 	uint16_t n = 0;
171 	unsigned int i;
172 #ifdef MLX5_PMD_SOFT_COUNTERS
173 	uint32_t err_bytes = 0;
174 #endif
175 
176 	for (i = 0; i < pkts_n; ++i) {
177 		struct rte_mbuf *pkt = pkts[i];
178 
179 		if (pkt->packet_type == RTE_PTYPE_ALL_MASK) {
180 #ifdef MLX5_PMD_SOFT_COUNTERS
181 			err_bytes += PKT_LEN(pkt);
182 #endif
183 			rte_pktmbuf_free_seg(pkt);
184 		} else {
185 			pkts[n++] = pkt;
186 		}
187 	}
188 	rxq->stats.idropped += (pkts_n - n);
189 #ifdef MLX5_PMD_SOFT_COUNTERS
190 	/* Correct counters of errored completions. */
191 	rxq->stats.ipackets -= (pkts_n - n);
192 	rxq->stats.ibytes -= err_bytes;
193 #endif
194 	return n;
195 }
196 
197 /**
198  * DPDK callback for vectorized RX.
199  *
200  * @param dpdk_rxq
201  *   Generic pointer to RX queue structure.
202  * @param[out] pkts
203  *   Array to store received packets.
204  * @param pkts_n
205  *   Maximum number of packets in array.
206  *
207  * @return
208  *   Number of packets successfully received (<= pkts_n).
209  */
210 uint16_t
211 mlx5_rx_burst_vec(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
212 {
213 	struct mlx5_rxq_data *rxq = dpdk_rxq;
214 	uint16_t nb_rx;
215 	uint64_t err = 0;
216 
217 	nb_rx = rxq_burst_v(rxq, pkts, pkts_n, &err);
218 	if (unlikely(err))
219 		nb_rx = rxq_handle_pending_error(rxq, pkts, nb_rx);
220 	return nb_rx;
221 }
222 
223 /**
224  * Check Tx queue flags are set for raw vectorized Tx.
225  *
226  * @param dev
227  *   Pointer to Ethernet device.
228  *
229  * @return
230  *   1 if supported, negative errno value if not.
231  */
232 int __attribute__((cold))
233 mlx5_check_raw_vec_tx_support(struct rte_eth_dev *dev)
234 {
235 	uint64_t offloads = dev->data->dev_conf.txmode.offloads;
236 
237 	/* Doesn't support any offload. */
238 	if (offloads)
239 		return -ENOTSUP;
240 	return 1;
241 }
242 
243 /**
244  * Check a device can support vectorized TX.
245  *
246  * @param dev
247  *   Pointer to Ethernet device.
248  *
249  * @return
250  *   1 if supported, negative errno value if not.
251  */
252 int __attribute__((cold))
253 mlx5_check_vec_tx_support(struct rte_eth_dev *dev)
254 {
255 	struct priv *priv = dev->data->dev_private;
256 	uint64_t offloads = dev->data->dev_conf.txmode.offloads;
257 
258 	if (!priv->config.tx_vec_en ||
259 	    priv->txqs_n > MLX5_VPMD_MIN_TXQS ||
260 	    priv->config.mps != MLX5_MPW_ENHANCED ||
261 	    offloads & ~MLX5_VEC_TX_OFFLOAD_CAP)
262 		return -ENOTSUP;
263 	return 1;
264 }
265 
266 /**
267  * Check a RX queue can support vectorized RX.
268  *
269  * @param rxq
270  *   Pointer to RX queue.
271  *
272  * @return
273  *   1 if supported, negative errno value if not.
274  */
275 int __attribute__((cold))
276 mlx5_rxq_check_vec_support(struct mlx5_rxq_data *rxq)
277 {
278 	struct mlx5_rxq_ctrl *ctrl =
279 		container_of(rxq, struct mlx5_rxq_ctrl, rxq);
280 
281 	if (!ctrl->priv->config.rx_vec_en || rxq->sges_n != 0)
282 		return -ENOTSUP;
283 	return 1;
284 }
285 
286 /**
287  * Check a device can support vectorized RX.
288  *
289  * @param dev
290  *   Pointer to Ethernet device.
291  *
292  * @return
293  *   1 if supported, negative errno value if not.
294  */
295 int __attribute__((cold))
296 mlx5_check_vec_rx_support(struct rte_eth_dev *dev)
297 {
298 	struct priv *priv = dev->data->dev_private;
299 	uint16_t i;
300 
301 	if (!priv->config.rx_vec_en)
302 		return -ENOTSUP;
303 	/* All the configured queues should support. */
304 	for (i = 0; i < priv->rxqs_n; ++i) {
305 		struct mlx5_rxq_data *rxq = (*priv->rxqs)[i];
306 
307 		if (!rxq)
308 			continue;
309 		if (mlx5_rxq_check_vec_support(rxq) < 0)
310 			break;
311 	}
312 	if (i != priv->rxqs_n)
313 		return -ENOTSUP;
314 	return 1;
315 }
316