xref: /dpdk/drivers/net/nfp/nfd3/nfp_nfd3_dp.c (revision b6de43530dfa30cbf6b70857e3835099701063d4)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright (c) 2023 Corigine, Inc.
3  * All rights reserved.
4  */
5 
6 #include "nfp_nfd3.h"
7 
8 #include <bus_pci_driver.h>
9 #include <rte_malloc.h>
10 
11 #include "../flower/nfp_flower.h"
12 #include "../nfp_logs.h"
13 #include "../nfp_net_meta.h"
14 
15 /* Flags in the host TX descriptor */
16 #define NFD3_DESC_TX_CSUM               RTE_BIT32(7)
17 #define NFD3_DESC_TX_IP4_CSUM           RTE_BIT32(6)
18 #define NFD3_DESC_TX_TCP_CSUM           RTE_BIT32(5)
19 #define NFD3_DESC_TX_UDP_CSUM           RTE_BIT32(4)
20 #define NFD3_DESC_TX_VLAN               RTE_BIT32(3)
21 #define NFD3_DESC_TX_LSO                RTE_BIT32(2)
22 #define NFD3_DESC_TX_ENCAP              RTE_BIT32(1)
23 #define NFD3_DESC_TX_O_IP4_CSUM         RTE_BIT32(0)
24 
25 /* Set NFD3 TX descriptor for TSO */
26 static void
27 nfp_net_nfd3_tx_tso(struct nfp_net_txq *txq,
28 		struct nfp_net_nfd3_tx_desc *txd,
29 		struct rte_mbuf *mb)
30 {
31 	uint64_t ol_flags;
32 	struct nfp_net_hw *hw = txq->hw;
33 
34 	if ((hw->super.ctrl & NFP_NET_CFG_CTRL_LSO_ANY) == 0)
35 		goto clean_txd;
36 
37 	ol_flags = mb->ol_flags;
38 	if ((ol_flags & RTE_MBUF_F_TX_TCP_SEG) == 0 &&
39 			(ol_flags & RTE_MBUF_F_TX_UDP_SEG) == 0)
40 		goto clean_txd;
41 
42 	txd->l3_offset = mb->l2_len;
43 	txd->l4_offset = mb->l2_len + mb->l3_len;
44 	txd->lso_hdrlen = mb->l2_len + mb->l3_len + mb->l4_len;
45 
46 	if ((ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK) != 0) {
47 		txd->l3_offset += mb->outer_l2_len + mb->outer_l3_len;
48 		txd->l4_offset += mb->outer_l2_len + mb->outer_l3_len;
49 		txd->lso_hdrlen += mb->outer_l2_len + mb->outer_l3_len;
50 	}
51 
52 	txd->mss = rte_cpu_to_le_16(mb->tso_segsz);
53 	txd->flags = NFD3_DESC_TX_LSO;
54 
55 	return;
56 
57 clean_txd:
58 	txd->flags = 0;
59 	txd->l3_offset = 0;
60 	txd->l4_offset = 0;
61 	txd->lso_hdrlen = 0;
62 	txd->mss = 0;
63 }
64 
65 /* Set TX CSUM offload flags in NFD3 TX descriptor */
66 static void
67 nfp_net_nfd3_tx_cksum(struct nfp_net_txq *txq,
68 		struct nfp_net_nfd3_tx_desc *txd,
69 		struct rte_mbuf *mb)
70 {
71 	uint64_t ol_flags;
72 	struct nfp_net_hw *hw = txq->hw;
73 
74 	if ((hw->super.ctrl & NFP_NET_CFG_CTRL_TXCSUM) == 0)
75 		return;
76 
77 	ol_flags = mb->ol_flags;
78 
79 	/* Set TCP csum offload if TSO enabled. */
80 	if ((ol_flags & RTE_MBUF_F_TX_TCP_SEG) != 0)
81 		txd->flags |= NFD3_DESC_TX_TCP_CSUM;
82 
83 	/* Set UDP csum offload if UFO enabled. */
84 	if ((ol_flags & RTE_MBUF_F_TX_UDP_SEG) != 0)
85 		txd->flags |= NFD3_DESC_TX_UDP_CSUM;
86 
87 	/* IPv6 does not need checksum */
88 	if ((ol_flags & RTE_MBUF_F_TX_IP_CKSUM) != 0)
89 		txd->flags |= NFD3_DESC_TX_IP4_CSUM;
90 
91 	if ((ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK) != 0)
92 		txd->flags |= NFD3_DESC_TX_ENCAP;
93 
94 	switch (ol_flags & RTE_MBUF_F_TX_L4_MASK) {
95 	case RTE_MBUF_F_TX_UDP_CKSUM:
96 		txd->flags |= NFD3_DESC_TX_UDP_CSUM;
97 		break;
98 	case RTE_MBUF_F_TX_TCP_CKSUM:
99 		txd->flags |= NFD3_DESC_TX_TCP_CSUM;
100 		break;
101 	}
102 
103 	if ((ol_flags & (RTE_MBUF_F_TX_IP_CKSUM | RTE_MBUF_F_TX_L4_MASK)) != 0)
104 		txd->flags |= NFD3_DESC_TX_CSUM;
105 }
106 
107 uint32_t
108 nfp_flower_nfd3_pkt_add_metadata(struct rte_mbuf *mbuf,
109 		uint32_t port_id)
110 {
111 	char *meta_offset;
112 
113 	meta_offset = rte_pktmbuf_prepend(mbuf, FLOWER_PKT_DATA_OFFSET);
114 	*(rte_be32_t *)meta_offset = rte_cpu_to_be_32(NFP_NET_META_PORTID);
115 	meta_offset += NFP_NET_META_HEADER_SIZE;
116 	*(rte_be32_t *)meta_offset = rte_cpu_to_be_32(port_id);
117 
118 	return FLOWER_PKT_DATA_OFFSET;
119 }
120 
121 /*
122  * Set vlan info in the nfd3 tx desc
123  *
124  * If enable NFP_NET_CFG_CTRL_TXVLAN_V2
125  *   Vlan_info is stored in the meta and is handled in the @nfp_net_nfd3_set_meta_vlan()
126  * else if enable NFP_NET_CFG_CTRL_TXVLAN
127  *   Vlan_info is stored in the tx_desc and is handled in the @nfp_net_nfd3_tx_vlan()
128  */
129 static inline void
130 nfp_net_nfd3_tx_vlan(struct nfp_net_txq *txq,
131 		struct nfp_net_nfd3_tx_desc *txd,
132 		struct rte_mbuf *mb)
133 {
134 	struct nfp_net_hw *hw = txq->hw;
135 
136 	if ((hw->super.ctrl & NFP_NET_CFG_CTRL_TXVLAN_V2) != 0 ||
137 			(hw->super.ctrl & NFP_NET_CFG_CTRL_TXVLAN) == 0)
138 		return;
139 
140 	if ((mb->ol_flags & RTE_MBUF_F_TX_VLAN) != 0) {
141 		txd->flags |= NFD3_DESC_TX_VLAN;
142 		txd->vlan = mb->vlan_tci;
143 	}
144 }
145 
146 static inline int
147 nfp_net_nfd3_set_meta_data(struct nfp_net_meta_raw *meta_data,
148 		struct nfp_net_txq *txq,
149 		struct rte_mbuf *pkt)
150 {
151 	char *meta;
152 	uint8_t layer = 0;
153 	uint32_t meta_info;
154 	struct nfp_net_hw *hw;
155 	uint8_t vlan_layer = 0;
156 	uint8_t ipsec_layer = 0;
157 
158 	hw = txq->hw;
159 
160 	if ((pkt->ol_flags & RTE_MBUF_F_TX_VLAN) != 0 &&
161 			(hw->super.ctrl & NFP_NET_CFG_CTRL_TXVLAN_V2) != 0) {
162 		if (meta_data->length == 0)
163 			meta_data->length = NFP_NET_META_HEADER_SIZE;
164 		meta_data->length += NFP_NET_META_FIELD_SIZE;
165 		meta_data->header |= NFP_NET_META_VLAN;
166 	}
167 
168 	if ((pkt->ol_flags & RTE_MBUF_F_TX_SEC_OFFLOAD) != 0 &&
169 			(hw->super.ctrl_ext & NFP_NET_CFG_CTRL_IPSEC) != 0) {
170 		uint32_t ipsec_type = NFP_NET_META_IPSEC |
171 				NFP_NET_META_IPSEC << NFP_NET_META_FIELD_SIZE |
172 				NFP_NET_META_IPSEC << (2 * NFP_NET_META_FIELD_SIZE);
173 		if (meta_data->length == 0)
174 			meta_data->length = NFP_NET_META_FIELD_SIZE;
175 		uint8_t ipsec_offset = meta_data->length - NFP_NET_META_FIELD_SIZE;
176 		meta_data->header |= (ipsec_type << ipsec_offset);
177 		meta_data->length += 3 * NFP_NET_META_FIELD_SIZE;
178 	}
179 
180 	if (meta_data->length == 0)
181 		return 0;
182 
183 	meta_info = meta_data->header;
184 	meta = rte_pktmbuf_prepend(pkt, meta_data->length);
185 	*(rte_be32_t *)meta = rte_cpu_to_be_32(meta_data->header);
186 	meta += NFP_NET_META_HEADER_SIZE;
187 
188 	for (; meta_info != 0; meta_info >>= NFP_NET_META_FIELD_SIZE, layer++,
189 			meta += NFP_NET_META_FIELD_SIZE) {
190 		switch (meta_info & NFP_NET_META_FIELD_MASK) {
191 		case NFP_NET_META_VLAN:
192 			if (vlan_layer > 0) {
193 				PMD_DRV_LOG(ERR, "At most 1 layers of vlan is supported.");
194 				return -EINVAL;
195 			}
196 			nfp_net_meta_set_vlan(meta_data, pkt, layer);
197 			vlan_layer++;
198 			break;
199 		case NFP_NET_META_IPSEC:
200 			if (ipsec_layer > 2) {
201 				PMD_DRV_LOG(ERR, "At most 3 layers of ipsec is supported for now.");
202 				return -EINVAL;
203 			}
204 
205 			nfp_net_meta_set_ipsec(meta_data, txq, pkt, layer, ipsec_layer);
206 			ipsec_layer++;
207 			break;
208 		default:
209 			PMD_DRV_LOG(ERR, "The metadata type not supported.");
210 			return -ENOTSUP;
211 		}
212 
213 		*(rte_be32_t *)meta = rte_cpu_to_be_32(meta_data->data[layer]);
214 	}
215 
216 	return 0;
217 }
218 
219 uint16_t
220 nfp_net_nfd3_xmit_pkts(void *tx_queue,
221 		struct rte_mbuf **tx_pkts,
222 		uint16_t nb_pkts)
223 {
224 	return nfp_net_nfd3_xmit_pkts_common(tx_queue, tx_pkts, nb_pkts, false);
225 }
226 
227 uint16_t
228 nfp_net_nfd3_xmit_pkts_common(void *tx_queue,
229 		struct rte_mbuf **tx_pkts,
230 		uint16_t nb_pkts,
231 		bool repr_flag)
232 {
233 	int ret;
234 	uint16_t i;
235 	uint8_t offset;
236 	uint32_t pkt_size;
237 	uint16_t dma_size;
238 	uint64_t dma_addr;
239 	uint16_t free_descs;
240 	struct rte_mbuf *pkt;
241 	uint16_t issued_descs;
242 	struct nfp_net_hw *hw;
243 	struct rte_mbuf **lmbuf;
244 	struct nfp_net_txq *txq;
245 	struct nfp_net_nfd3_tx_desc txd;
246 	struct nfp_net_nfd3_tx_desc *txds;
247 
248 	txq = tx_queue;
249 	hw = txq->hw;
250 	txds = &txq->txds[txq->wr_p];
251 
252 	PMD_TX_LOG(DEBUG, "Working for queue %hu at pos %d and %hu packets.",
253 			txq->qidx, txq->wr_p, nb_pkts);
254 
255 	if (nfp_net_nfd3_free_tx_desc(txq) < NFD3_TX_DESC_PER_PKT * nb_pkts ||
256 			nfp_net_nfd3_txq_full(txq))
257 		nfp_net_tx_free_bufs(txq);
258 
259 	free_descs = nfp_net_nfd3_free_tx_desc(txq);
260 	if (unlikely(free_descs == 0))
261 		return 0;
262 
263 	pkt = *tx_pkts;
264 
265 	issued_descs = 0;
266 	PMD_TX_LOG(DEBUG, "Queue: %hu. Sending %hu packets.", txq->qidx, nb_pkts);
267 
268 	/* Sending packets */
269 	for (i = 0; i < nb_pkts && free_descs > 0; i++) {
270 		/* Grabbing the mbuf linked to the current descriptor */
271 		lmbuf = &txq->txbufs[txq->wr_p].mbuf;
272 		/* Warming the cache for releasing the mbuf later on */
273 		RTE_MBUF_PREFETCH_TO_FREE(*lmbuf);
274 
275 		pkt = *(tx_pkts + i);
276 
277 		if (!repr_flag) {
278 			struct nfp_net_meta_raw meta_data;
279 			memset(&meta_data, 0, sizeof(meta_data));
280 			ret = nfp_net_nfd3_set_meta_data(&meta_data, txq, pkt);
281 			if (unlikely(ret != 0))
282 				goto xmit_end;
283 
284 			offset = meta_data.length;
285 		} else {
286 			offset = FLOWER_PKT_DATA_OFFSET;
287 		}
288 
289 		if (unlikely(pkt->nb_segs > 1 &&
290 				(hw->super.ctrl & NFP_NET_CFG_CTRL_GATHER) == 0)) {
291 			PMD_TX_LOG(ERR, "Multisegment packet not supported.");
292 			goto xmit_end;
293 		}
294 
295 		/* Checking if we have enough descriptors */
296 		if (unlikely(pkt->nb_segs > free_descs))
297 			goto xmit_end;
298 
299 		/*
300 		 * Checksum and VLAN flags just in the first descriptor for a
301 		 * multisegment packet, but TSO info needs to be in all of them.
302 		 */
303 		txd.data_len = pkt->pkt_len;
304 		nfp_net_nfd3_tx_tso(txq, &txd, pkt);
305 		nfp_net_nfd3_tx_cksum(txq, &txd, pkt);
306 		nfp_net_nfd3_tx_vlan(txq, &txd, pkt);
307 
308 		/*
309 		 * Mbuf data_len is the data in one segment and pkt_len data
310 		 * in the whole packet. When the packet is just one segment,
311 		 * then data_len = pkt_len.
312 		 */
313 		pkt_size = pkt->pkt_len;
314 
315 		while (pkt != NULL && free_descs > 0) {
316 			/* Copying TSO, VLAN and cksum info */
317 			*txds = txd;
318 
319 			/* Releasing mbuf used by this descriptor previously */
320 			if (*lmbuf != NULL)
321 				rte_pktmbuf_free_seg(*lmbuf);
322 
323 			/*
324 			 * Linking mbuf with descriptor for being released
325 			 * next time descriptor is used.
326 			 */
327 			*lmbuf = pkt;
328 
329 			dma_size = pkt->data_len;
330 			dma_addr = rte_mbuf_data_iova(pkt);
331 
332 			/* Filling descriptors fields */
333 			txds->dma_len = dma_size;
334 			txds->data_len = txd.data_len;
335 			txds->dma_addr_hi = (dma_addr >> 32) & 0xff;
336 			txds->dma_addr_lo = (dma_addr & 0xffffffff);
337 			free_descs--;
338 
339 			txq->wr_p++;
340 			if (unlikely(txq->wr_p == txq->tx_count)) /* Wrapping */
341 				txq->wr_p = 0;
342 
343 			pkt_size -= dma_size;
344 
345 			/*
346 			 * Making the EOP, packets with just one segment
347 			 * the priority.
348 			 */
349 			if (likely(pkt_size == 0))
350 				txds->offset_eop = NFD3_DESC_TX_EOP;
351 			else
352 				txds->offset_eop = 0;
353 
354 			/* Set the meta_len */
355 			txds->offset_eop |= offset;
356 
357 			pkt = pkt->next;
358 			/* Referencing next free TX descriptor */
359 			txds = &txq->txds[txq->wr_p];
360 			lmbuf = &txq->txbufs[txq->wr_p].mbuf;
361 			issued_descs++;
362 		}
363 	}
364 
365 xmit_end:
366 	/* Increment write pointers. Force memory write before we let HW know */
367 	rte_wmb();
368 	nfp_qcp_ptr_add(txq->qcp_q, NFP_QCP_WRITE_PTR, issued_descs);
369 
370 	return i;
371 }
372 
373 int
374 nfp_net_nfd3_tx_queue_setup(struct rte_eth_dev *dev,
375 		uint16_t queue_idx,
376 		uint16_t nb_desc,
377 		unsigned int socket_id,
378 		const struct rte_eth_txconf *tx_conf)
379 {
380 	size_t size;
381 	uint32_t tx_desc_sz;
382 	uint16_t min_tx_desc;
383 	uint16_t max_tx_desc;
384 	struct nfp_net_hw *hw;
385 	struct nfp_net_txq *txq;
386 	uint16_t tx_free_thresh;
387 	const struct rte_memzone *tz;
388 	struct nfp_net_hw_priv *hw_priv;
389 
390 	hw = nfp_net_get_hw(dev);
391 	hw_priv = dev->process_private;
392 
393 	nfp_net_tx_desc_limits(hw_priv, &min_tx_desc, &max_tx_desc);
394 
395 	/* Validating number of descriptors */
396 	tx_desc_sz = nb_desc * sizeof(struct nfp_net_nfd3_tx_desc);
397 	if ((NFD3_TX_DESC_PER_PKT * tx_desc_sz) % NFP_ALIGN_RING_DESC != 0 ||
398 			nb_desc > max_tx_desc || nb_desc < min_tx_desc) {
399 		PMD_DRV_LOG(ERR, "Wrong nb_desc value.");
400 		return -EINVAL;
401 	}
402 
403 	tx_free_thresh = (tx_conf->tx_free_thresh != 0) ?
404 			tx_conf->tx_free_thresh : DEFAULT_TX_FREE_THRESH;
405 	if (tx_free_thresh > nb_desc) {
406 		PMD_DRV_LOG(ERR, "The tx_free_thresh must be less than the number of TX "
407 				"descriptors. (tx_free_thresh=%u port=%d queue=%d)",
408 				tx_free_thresh, dev->data->port_id, queue_idx);
409 		return -EINVAL;
410 	}
411 
412 	/*
413 	 * Free memory prior to re-allocation if needed. This is the case after
414 	 * calling nfp_net_stop().
415 	 */
416 	if (dev->data->tx_queues[queue_idx] != NULL) {
417 		PMD_TX_LOG(DEBUG, "Freeing memory prior to re-allocation %d.",
418 				queue_idx);
419 		nfp_net_tx_queue_release(dev, queue_idx);
420 		dev->data->tx_queues[queue_idx] = NULL;
421 	}
422 
423 	/* Allocating tx queue data structure */
424 	txq = rte_zmalloc_socket("ethdev TX queue", sizeof(struct nfp_net_txq),
425 			RTE_CACHE_LINE_SIZE, socket_id);
426 	if (txq == NULL) {
427 		PMD_DRV_LOG(ERR, "Error allocating tx dma.");
428 		return -ENOMEM;
429 	}
430 
431 	dev->data->tx_queues[queue_idx] = txq;
432 
433 	/*
434 	 * Allocate TX ring hardware descriptors. A memzone large enough to
435 	 * handle the maximum ring size is allocated in order to allow for
436 	 * resizing in later calls to the queue setup function.
437 	 */
438 	size = sizeof(struct nfp_net_nfd3_tx_desc) * NFD3_TX_DESC_PER_PKT * max_tx_desc;
439 	tz = rte_eth_dma_zone_reserve(dev, "tx_ring", queue_idx, size,
440 			NFP_MEMZONE_ALIGN, socket_id);
441 	if (tz == NULL) {
442 		PMD_DRV_LOG(ERR, "Error allocating tx dma.");
443 		nfp_net_tx_queue_release(dev, queue_idx);
444 		dev->data->tx_queues[queue_idx] = NULL;
445 		return -ENOMEM;
446 	}
447 
448 	txq->tx_count = nb_desc * NFD3_TX_DESC_PER_PKT;
449 	txq->tx_free_thresh = tx_free_thresh;
450 
451 	/* Queue mapping based on firmware configuration */
452 	txq->qidx = queue_idx;
453 	txq->tx_qcidx = queue_idx * hw->stride_tx;
454 	txq->qcp_q = hw->tx_bar + NFP_QCP_QUEUE_OFF(txq->tx_qcidx);
455 	txq->port_id = dev->data->port_id;
456 
457 	/* Saving physical and virtual addresses for the TX ring */
458 	txq->dma = tz->iova;
459 	txq->txds = tz->addr;
460 
461 	/* Mbuf pointers array for referencing mbufs linked to TX descriptors */
462 	txq->txbufs = rte_zmalloc_socket("txq->txbufs",
463 			sizeof(*txq->txbufs) * txq->tx_count,
464 			RTE_CACHE_LINE_SIZE, socket_id);
465 	if (txq->txbufs == NULL) {
466 		nfp_net_tx_queue_release(dev, queue_idx);
467 		dev->data->tx_queues[queue_idx] = NULL;
468 		return -ENOMEM;
469 	}
470 
471 	nfp_net_reset_tx_queue(txq);
472 
473 	txq->hw = hw;
474 	txq->hw_priv = dev->process_private;
475 
476 	/*
477 	 * Telling the HW about the physical address of the TX ring and number
478 	 * of descriptors in log2 format.
479 	 */
480 	nn_cfg_writeq(&hw->super, NFP_NET_CFG_TXR_ADDR(queue_idx), txq->dma);
481 	nn_cfg_writeb(&hw->super, NFP_NET_CFG_TXR_SZ(queue_idx), rte_log2_u32(txq->tx_count));
482 
483 	return 0;
484 }
485