xref: /dpdk/drivers/net/nfp/nfp_rxtx.c (revision 4f0ece5e5a38df8aac666e1f7ea2642b449be848)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright (c) 2014-2021 Netronome Systems, Inc.
3  * All rights reserved.
4  *
5  * Small portions derived from code Copyright(c) 2010-2015 Intel Corporation.
6  */
7 
8 #include "nfp_rxtx.h"
9 
10 #include <ethdev_pci.h>
11 #include <rte_security.h>
12 
13 #include "nfd3/nfp_nfd3.h"
14 #include "nfdk/nfp_nfdk.h"
15 #include "nfdk/nfp_nfdk_vec.h"
16 #include "flower/nfp_flower.h"
17 
18 #include "nfp_ipsec.h"
19 #include "nfp_logs.h"
20 #include "nfp_net_meta.h"
21 #include "nfp_rxtx_vec.h"
22 
23 /*
24  * The bit format and map of nfp packet type for rxd.offload_info in Rx descriptor.
25  *
26  * Bit format about nfp packet type refers to the following:
27  * ---------------------------------
28  *            1                   0
29  *  5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
30  * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
31  * |       |ol3|tunnel |  l3 |  l4 |
32  * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
33  *
34  * Bit map about nfp packet type refers to the following:
35  *
36  * L4: bit 0~2, used for layer 4 or inner layer 4.
37  * 000: NFP_NET_PTYPE_L4_NONE
38  * 001: NFP_NET_PTYPE_L4_TCP
39  * 010: NFP_NET_PTYPE_L4_UDP
40  * 011: NFP_NET_PTYPE_L4_FRAG
41  * 100: NFP_NET_PTYPE_L4_NONFRAG
42  * 101: NFP_NET_PTYPE_L4_ICMP
43  * 110: NFP_NET_PTYPE_L4_SCTP
44  * 111: reserved
45  *
46  * L3: bit 3~5, used for layer 3 or inner layer 3.
47  * 000: NFP_NET_PTYPE_L3_NONE
48  * 001: NFP_NET_PTYPE_L3_IPV6
49  * 010: NFP_NET_PTYPE_L3_IPV4
50  * 011: NFP_NET_PTYPE_L3_IPV4_EXT
51  * 100: NFP_NET_PTYPE_L3_IPV6_EXT
52  * 101: NFP_NET_PTYPE_L3_IPV4_EXT_UNKNOWN
53  * 110: NFP_NET_PTYPE_L3_IPV6_EXT_UNKNOWN
54  * 111: reserved
55  *
56  * Tunnel: bit 6~9, used for tunnel.
57  * 0000: NFP_NET_PTYPE_TUNNEL_NONE
58  * 0001: NFP_NET_PTYPE_TUNNEL_VXLAN
59  * 0100: NFP_NET_PTYPE_TUNNEL_NVGRE
60  * 0101: NFP_NET_PTYPE_TUNNEL_GENEVE
61  * 0010, 0011, 0110~1111: reserved
62  *
63  * Outer L3: bit 10~11, used for outer layer 3.
64  * 00: NFP_NET_PTYPE_OUTER_L3_NONE
65  * 01: NFP_NET_PTYPE_OUTER_L3_IPV6
66  * 10: NFP_NET_PTYPE_OUTER_L3_IPV4
67  * 11: reserved
68  *
69  * Reserved: bit 10~15, used for extension.
70  */
71 
72 /* Mask and offset about nfp packet type based on the bit map above. */
73 #define NFP_NET_PTYPE_L4_MASK                  0x0007
74 #define NFP_NET_PTYPE_L3_MASK                  0x0038
75 #define NFP_NET_PTYPE_TUNNEL_MASK              0x03c0
76 #define NFP_NET_PTYPE_OUTER_L3_MASK            0x0c00
77 
78 #define NFP_NET_PTYPE_L4_OFFSET                0
79 #define NFP_NET_PTYPE_L3_OFFSET                3
80 #define NFP_NET_PTYPE_TUNNEL_OFFSET            6
81 #define NFP_NET_PTYPE_OUTER_L3_OFFSET          10
82 
83 /* Case about nfp packet type based on the bit map above. */
84 #define NFP_NET_PTYPE_L4_NONE                  0
85 #define NFP_NET_PTYPE_L4_TCP                   1
86 #define NFP_NET_PTYPE_L4_UDP                   2
87 #define NFP_NET_PTYPE_L4_FRAG                  3
88 #define NFP_NET_PTYPE_L4_NONFRAG               4
89 #define NFP_NET_PTYPE_L4_ICMP                  5
90 #define NFP_NET_PTYPE_L4_SCTP                  6
91 
92 #define NFP_NET_PTYPE_L3_NONE                  0
93 #define NFP_NET_PTYPE_L3_IPV6                  1
94 #define NFP_NET_PTYPE_L3_IPV4                  2
95 #define NFP_NET_PTYPE_L3_IPV4_EXT              3
96 #define NFP_NET_PTYPE_L3_IPV6_EXT              4
97 #define NFP_NET_PTYPE_L3_IPV4_EXT_UNKNOWN      5
98 #define NFP_NET_PTYPE_L3_IPV6_EXT_UNKNOWN      6
99 
100 #define NFP_NET_PTYPE_TUNNEL_NONE              0
101 #define NFP_NET_PTYPE_TUNNEL_VXLAN             1
102 #define NFP_NET_PTYPE_TUNNEL_NVGRE             4
103 #define NFP_NET_PTYPE_TUNNEL_GENEVE            5
104 
105 #define NFP_NET_PTYPE_OUTER_L3_NONE            0
106 #define NFP_NET_PTYPE_OUTER_L3_IPV6            1
107 #define NFP_NET_PTYPE_OUTER_L3_IPV4            2
108 
109 #define NFP_PTYPE2RTE(tunnel, type) ((tunnel) ? RTE_PTYPE_INNER_##type : RTE_PTYPE_##type)
110 
111 /* Record NFP packet type parsed from rxd.offload_info. */
112 struct nfp_ptype_parsed {
113 	uint8_t l4_ptype;       /**< Packet type of layer 4, or inner layer 4. */
114 	uint8_t l3_ptype;       /**< Packet type of layer 3, or inner layer 3. */
115 	uint8_t tunnel_ptype;   /**< Packet type of tunnel. */
116 	uint8_t outer_l3_ptype; /**< Packet type of outer layer 3. */
117 };
118 
119 /* Set mbuf checksum flags based on RX descriptor flags */
120 void
121 nfp_net_rx_cksum(struct nfp_net_rxq *rxq,
122 		struct nfp_net_rx_desc *rxd,
123 		struct rte_mbuf *mb)
124 {
125 	struct nfp_net_hw *hw = rxq->hw;
126 
127 	if ((hw->super.ctrl & NFP_NET_CFG_CTRL_RXCSUM) == 0)
128 		return;
129 
130 	/* If IPv4 and IP checksum error, fail */
131 	if (unlikely((rxd->rxd.flags & PCIE_DESC_RX_IP4_CSUM) != 0 &&
132 			(rxd->rxd.flags & PCIE_DESC_RX_IP4_CSUM_OK) == 0))
133 		mb->ol_flags |= RTE_MBUF_F_RX_IP_CKSUM_BAD;
134 	else
135 		mb->ol_flags |= RTE_MBUF_F_RX_IP_CKSUM_GOOD;
136 
137 	/* If neither UDP nor TCP return */
138 	if ((rxd->rxd.flags & PCIE_DESC_RX_TCP_CSUM) == 0 &&
139 			(rxd->rxd.flags & PCIE_DESC_RX_UDP_CSUM) == 0)
140 		return;
141 
142 	if (likely(rxd->rxd.flags & PCIE_DESC_RX_L4_CSUM_OK) != 0)
143 		mb->ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_GOOD;
144 	else
145 		mb->ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_BAD;
146 }
147 
148 static int
149 nfp_net_rx_fill_freelist(struct nfp_net_rxq *rxq)
150 {
151 	uint16_t i;
152 	uint64_t dma_addr;
153 	struct nfp_net_dp_buf *rxe = rxq->rxbufs;
154 
155 	PMD_RX_LOG(DEBUG, "Fill Rx Freelist for %hu descriptors.",
156 			rxq->rx_count);
157 
158 	for (i = 0; i < rxq->rx_count; i++) {
159 		struct nfp_net_rx_desc *rxd;
160 		struct rte_mbuf *mbuf = rte_pktmbuf_alloc(rxq->mem_pool);
161 
162 		if (mbuf == NULL) {
163 			PMD_DRV_LOG(ERR, "RX mbuf alloc failed queue_id=%hu.",
164 				rxq->qidx);
165 			return -ENOMEM;
166 		}
167 
168 		dma_addr = rte_cpu_to_le_64(rte_mbuf_data_iova_default(mbuf));
169 
170 		rxd = &rxq->rxds[i];
171 		rxd->fld.dd = 0;
172 		rxd->fld.dma_addr_hi = (dma_addr >> 32) & 0xffff;
173 		rxd->fld.dma_addr_lo = dma_addr & 0xffffffff;
174 
175 		rxe[i].mbuf = mbuf;
176 	}
177 
178 	/* Make sure all writes are flushed before telling the hardware */
179 	rte_wmb();
180 
181 	/* Not advertising the whole ring as the firmware gets confused if so */
182 	PMD_RX_LOG(DEBUG, "Increment FL write pointer in %hu.", rxq->rx_count - 1);
183 
184 	nfp_qcp_ptr_add(rxq->qcp_fl, NFP_QCP_WRITE_PTR, rxq->rx_count - 1);
185 
186 	return 0;
187 }
188 
189 int
190 nfp_net_rx_freelist_setup(struct rte_eth_dev *dev)
191 {
192 	uint16_t i;
193 
194 	for (i = 0; i < dev->data->nb_rx_queues; i++) {
195 		if (nfp_net_rx_fill_freelist(dev->data->rx_queues[i]) != 0)
196 			return -1;
197 	}
198 
199 	return 0;
200 }
201 
202 uint32_t
203 nfp_net_rx_queue_count(void *rx_queue)
204 {
205 	uint32_t idx;
206 	uint32_t count = 0;
207 	struct nfp_net_rxq *rxq;
208 	struct nfp_net_rx_desc *rxds;
209 
210 	rxq = rx_queue;
211 	idx = rxq->rd_p;
212 
213 	/*
214 	 * Other PMDs are just checking the DD bit in intervals of 4
215 	 * descriptors and counting all four if the first has the DD
216 	 * bit on. Of course, this is not accurate but can be good for
217 	 * performance. But ideally that should be done in descriptors
218 	 * chunks belonging to the same cache line.
219 	 */
220 	while (count < rxq->rx_count) {
221 		rxds = &rxq->rxds[idx];
222 		if ((rxds->rxd.meta_len_dd & PCIE_DESC_RX_DD) == 0)
223 			break;
224 
225 		count++;
226 		idx++;
227 
228 		/* Wrapping */
229 		if ((idx) == rxq->rx_count)
230 			idx = 0;
231 	}
232 
233 	return count;
234 }
235 
236 /**
237  * Set packet type to mbuf based on parsed structure.
238  *
239  * @param nfp_ptype
240  *   Packet type structure parsing from Rx descriptor.
241  * @param mb
242  *   Mbuf to set the packet type.
243  */
244 static void
245 nfp_net_set_ptype(const struct nfp_ptype_parsed *nfp_ptype,
246 		struct rte_mbuf *mb)
247 {
248 	uint32_t mbuf_ptype = RTE_PTYPE_L2_ETHER;
249 	uint8_t nfp_tunnel_ptype = nfp_ptype->tunnel_ptype;
250 
251 	if (nfp_tunnel_ptype != NFP_NET_PTYPE_TUNNEL_NONE)
252 		mbuf_ptype |= RTE_PTYPE_INNER_L2_ETHER;
253 
254 	switch (nfp_ptype->outer_l3_ptype) {
255 	case NFP_NET_PTYPE_OUTER_L3_NONE:
256 		break;
257 	case NFP_NET_PTYPE_OUTER_L3_IPV4:
258 		mbuf_ptype |= RTE_PTYPE_L3_IPV4;
259 		break;
260 	case NFP_NET_PTYPE_OUTER_L3_IPV6:
261 		mbuf_ptype |= RTE_PTYPE_L3_IPV6;
262 		break;
263 	default:
264 		PMD_RX_LOG(DEBUG, "Unrecognized nfp outer layer 3 packet type: %u.",
265 				nfp_ptype->outer_l3_ptype);
266 		break;
267 	}
268 
269 	switch (nfp_tunnel_ptype) {
270 	case NFP_NET_PTYPE_TUNNEL_NONE:
271 		break;
272 	case NFP_NET_PTYPE_TUNNEL_VXLAN:
273 		mbuf_ptype |= RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_L4_UDP;
274 		break;
275 	case NFP_NET_PTYPE_TUNNEL_NVGRE:
276 		mbuf_ptype |= RTE_PTYPE_TUNNEL_NVGRE;
277 		break;
278 	case NFP_NET_PTYPE_TUNNEL_GENEVE:
279 		mbuf_ptype |= RTE_PTYPE_TUNNEL_GENEVE | RTE_PTYPE_L4_UDP;
280 		break;
281 	default:
282 		PMD_RX_LOG(DEBUG, "Unrecognized nfp tunnel packet type: %u.",
283 				nfp_tunnel_ptype);
284 		break;
285 	}
286 
287 	switch (nfp_ptype->l4_ptype) {
288 	case NFP_NET_PTYPE_L4_NONE:
289 		break;
290 	case NFP_NET_PTYPE_L4_TCP:
291 		mbuf_ptype |= NFP_PTYPE2RTE(nfp_tunnel_ptype, L4_TCP);
292 		break;
293 	case NFP_NET_PTYPE_L4_UDP:
294 		mbuf_ptype |= NFP_PTYPE2RTE(nfp_tunnel_ptype, L4_UDP);
295 		break;
296 	case NFP_NET_PTYPE_L4_FRAG:
297 		mbuf_ptype |= NFP_PTYPE2RTE(nfp_tunnel_ptype, L4_FRAG);
298 		break;
299 	case NFP_NET_PTYPE_L4_NONFRAG:
300 		mbuf_ptype |= NFP_PTYPE2RTE(nfp_tunnel_ptype, L4_NONFRAG);
301 		break;
302 	case NFP_NET_PTYPE_L4_ICMP:
303 		mbuf_ptype |= NFP_PTYPE2RTE(nfp_tunnel_ptype, L4_ICMP);
304 		break;
305 	case NFP_NET_PTYPE_L4_SCTP:
306 		mbuf_ptype |= NFP_PTYPE2RTE(nfp_tunnel_ptype, L4_SCTP);
307 		break;
308 	default:
309 		PMD_RX_LOG(DEBUG, "Unrecognized nfp layer 4 packet type: %u.",
310 				nfp_ptype->l4_ptype);
311 		break;
312 	}
313 
314 	switch (nfp_ptype->l3_ptype) {
315 	case NFP_NET_PTYPE_L3_NONE:
316 		break;
317 	case NFP_NET_PTYPE_L3_IPV4:
318 		mbuf_ptype |= NFP_PTYPE2RTE(nfp_tunnel_ptype, L3_IPV4);
319 		break;
320 	case NFP_NET_PTYPE_L3_IPV6:
321 		mbuf_ptype |= NFP_PTYPE2RTE(nfp_tunnel_ptype, L3_IPV6);
322 		break;
323 	case NFP_NET_PTYPE_L3_IPV4_EXT:
324 		mbuf_ptype |= NFP_PTYPE2RTE(nfp_tunnel_ptype, L3_IPV4_EXT);
325 		break;
326 	case NFP_NET_PTYPE_L3_IPV6_EXT:
327 		mbuf_ptype |= NFP_PTYPE2RTE(nfp_tunnel_ptype, L3_IPV6_EXT);
328 		break;
329 	case NFP_NET_PTYPE_L3_IPV4_EXT_UNKNOWN:
330 		mbuf_ptype |= NFP_PTYPE2RTE(nfp_tunnel_ptype, L3_IPV4_EXT_UNKNOWN);
331 		break;
332 	case NFP_NET_PTYPE_L3_IPV6_EXT_UNKNOWN:
333 		mbuf_ptype |= NFP_PTYPE2RTE(nfp_tunnel_ptype, L3_IPV6_EXT_UNKNOWN);
334 		break;
335 	default:
336 		PMD_RX_LOG(DEBUG, "Unrecognized nfp layer 3 packet type: %u.",
337 				nfp_ptype->l3_ptype);
338 		break;
339 	}
340 
341 	mb->packet_type = mbuf_ptype;
342 }
343 
344 /**
345  * Parse the packet type from Rx descriptor and set to mbuf.
346  *
347  * @param rxq
348  *   Rx queue
349  * @param rxds
350  *   Rx descriptor including the offloading info of packet type.
351  * @param mb
352  *   Mbuf to set the packet type.
353  */
354 void
355 nfp_net_parse_ptype(struct nfp_net_rxq *rxq,
356 		struct nfp_net_rx_desc *rxds,
357 		struct rte_mbuf *mb)
358 {
359 	struct nfp_net_hw *hw = rxq->hw;
360 	struct nfp_ptype_parsed nfp_ptype;
361 	uint16_t rxd_ptype = rxds->rxd.offload_info;
362 
363 	if ((hw->super.ctrl_ext & NFP_NET_CFG_CTRL_PKT_TYPE) == 0)
364 		return;
365 
366 	if (rxd_ptype == 0 || (rxds->rxd.flags & PCIE_DESC_RX_VLAN) != 0)
367 		return;
368 
369 	nfp_ptype.l4_ptype = (rxd_ptype & NFP_NET_PTYPE_L4_MASK) >>
370 			NFP_NET_PTYPE_L4_OFFSET;
371 	nfp_ptype.l3_ptype = (rxd_ptype & NFP_NET_PTYPE_L3_MASK) >>
372 			NFP_NET_PTYPE_L3_OFFSET;
373 	nfp_ptype.tunnel_ptype = (rxd_ptype & NFP_NET_PTYPE_TUNNEL_MASK) >>
374 			NFP_NET_PTYPE_TUNNEL_OFFSET;
375 	nfp_ptype.outer_l3_ptype = (rxd_ptype & NFP_NET_PTYPE_OUTER_L3_MASK) >>
376 			NFP_NET_PTYPE_OUTER_L3_OFFSET;
377 
378 	nfp_net_set_ptype(&nfp_ptype, mb);
379 }
380 
381 /*
382  * RX path design:
383  *
384  * There are some decisions to take:
385  * 1) How to check DD RX descriptors bit
386  * 2) How and when to allocate new mbufs
387  *
388  * Current implementation checks just one single DD bit each loop. As each
389  * descriptor is 8 bytes, it is likely a good idea to check descriptors in
390  * a single cache line instead. Tests with this change have not shown any
391  * performance improvement but it requires further investigation. For example,
392  * depending on which descriptor is next, the number of descriptors could be
393  * less than 8 for just checking those in the same cache line. This implies
394  * extra work which could be counterproductive by itself. Indeed, last firmware
395  * changes are just doing this: writing several descriptors with the DD bit
396  * for saving PCIe bandwidth and DMA operations from the NFP.
397  *
398  * Mbuf allocation is done when a new packet is received. Then the descriptor
399  * is automatically linked with the new mbuf and the old one is given to the
400  * user. The main drawback with this design is mbuf allocation is heavier than
401  * using bulk allocations allowed by DPDK with rte_mempool_get_bulk. From the
402  * cache point of view it does not seem allocating the mbuf early on as we are
403  * doing now have any benefit at all. Again, tests with this change have not
404  * shown any improvement. Also, rte_mempool_get_bulk returns all or nothing
405  * so looking at the implications of this type of allocation should be studied
406  * deeply.
407  */
408 uint16_t
409 nfp_net_recv_pkts(void *rx_queue,
410 		struct rte_mbuf **rx_pkts,
411 		uint16_t nb_pkts)
412 {
413 	uint64_t dma_addr;
414 	uint16_t avail = 0;
415 	struct rte_mbuf *mb;
416 	uint16_t nb_hold = 0;
417 	struct nfp_net_hw *hw;
418 	struct rte_mbuf *new_mb;
419 	struct nfp_net_rxq *rxq;
420 	struct nfp_pf_dev *pf_dev;
421 	struct nfp_net_dp_buf *rxb;
422 	struct nfp_net_rx_desc *rxds;
423 	uint16_t avail_multiplexed = 0;
424 
425 	rxq = rx_queue;
426 	if (unlikely(rxq == NULL)) {
427 		/*
428 		 * DPDK just checks the queue is lower than max queues
429 		 * enabled. But the queue needs to be configured.
430 		 */
431 		PMD_RX_LOG(ERR, "RX Bad queue.");
432 		return 0;
433 	}
434 
435 	hw = rxq->hw;
436 	pf_dev = rxq->hw_priv->pf_dev;
437 
438 	while (avail + avail_multiplexed < nb_pkts) {
439 		rxb = &rxq->rxbufs[rxq->rd_p];
440 		if (unlikely(rxb == NULL)) {
441 			PMD_RX_LOG(ERR, "The rxb does not exist!");
442 			break;
443 		}
444 
445 		rxds = &rxq->rxds[rxq->rd_p];
446 		if ((rxds->rxd.meta_len_dd & PCIE_DESC_RX_DD) == 0)
447 			break;
448 
449 		/*
450 		 * Memory barrier to ensure that we won't do other
451 		 * reads before the DD bit.
452 		 */
453 		rte_rmb();
454 
455 		/*
456 		 * We got a packet. Let's alloc a new mbuf for refilling the
457 		 * free descriptor ring as soon as possible.
458 		 */
459 		new_mb = rte_pktmbuf_alloc(rxq->mem_pool);
460 		if (unlikely(new_mb == NULL)) {
461 			PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u queue_id=%hu.",
462 					rxq->port_id, rxq->qidx);
463 			nfp_net_mbuf_alloc_failed(rxq);
464 			break;
465 		}
466 
467 		/*
468 		 * Grab the mbuf and refill the descriptor with the
469 		 * previously allocated mbuf.
470 		 */
471 		mb = rxb->mbuf;
472 		rxb->mbuf = new_mb;
473 
474 		PMD_RX_LOG(DEBUG, "Packet len: %u, mbuf_size: %u.",
475 				rxds->rxd.data_len, rxq->mbuf_size);
476 
477 		/* Size of this segment */
478 		mb->data_len = rxds->rxd.data_len - NFP_DESC_META_LEN(rxds);
479 		/* Size of the whole packet. We just support 1 segment */
480 		mb->pkt_len = rxds->rxd.data_len - NFP_DESC_META_LEN(rxds);
481 
482 		if (unlikely((mb->data_len + hw->rx_offset) > rxq->mbuf_size)) {
483 			/*
484 			 * This should not happen and the user has the
485 			 * responsibility of avoiding it. But we have
486 			 * to give some info about the error.
487 			 */
488 			PMD_RX_LOG(ERR, "The mbuf overflow likely due to the RX offset.");
489 			rte_pktmbuf_free(mb);
490 			break;
491 		}
492 
493 		/* Filling the received mbuf with packet info */
494 		if (hw->rx_offset != 0)
495 			mb->data_off = RTE_PKTMBUF_HEADROOM + hw->rx_offset;
496 		else
497 			mb->data_off = RTE_PKTMBUF_HEADROOM + NFP_DESC_META_LEN(rxds);
498 
499 		/* No scatter mode supported */
500 		mb->nb_segs = 1;
501 		mb->next = NULL;
502 		mb->port = rxq->port_id;
503 
504 		struct nfp_net_meta_parsed meta;
505 		nfp_net_meta_parse(rxds, rxq, hw, mb, &meta);
506 
507 		nfp_net_parse_ptype(rxq, rxds, mb);
508 
509 		/* Checking the checksum flag */
510 		nfp_net_rx_cksum(rxq, rxds, mb);
511 
512 		/* Now resetting and updating the descriptor */
513 		rxds->vals[0] = 0;
514 		rxds->vals[1] = 0;
515 		dma_addr = rte_cpu_to_le_64(rte_mbuf_data_iova_default(new_mb));
516 		rxds->fld.dd = 0;
517 		rxds->fld.dma_addr_hi = (dma_addr >> 32) & 0xffff;
518 		rxds->fld.dma_addr_lo = dma_addr & 0xffffffff;
519 		nb_hold++;
520 
521 		rxq->rd_p++;
522 		if (unlikely(rxq->rd_p == rxq->rx_count)) /* Wrapping */
523 			rxq->rd_p = 0;
524 
525 		if (pf_dev->recv_pkt_meta_check_t(&meta)) {
526 			rx_pkts[avail++] = mb;
527 		} else {
528 			if (nfp_flower_pf_dispatch_pkts(rxq, mb, meta.port_id)) {
529 				avail_multiplexed++;
530 			} else {
531 				rte_pktmbuf_free(mb);
532 				break;
533 			}
534 		}
535 	}
536 
537 	if (nb_hold == 0)
538 		return nb_hold;
539 
540 	PMD_RX_LOG(DEBUG, "RX  port_id=%hu queue_id=%hu, %hu packets received.",
541 			rxq->port_id, rxq->qidx, avail);
542 
543 	nb_hold += rxq->nb_rx_hold;
544 
545 	/*
546 	 * FL descriptors needs to be written before incrementing the
547 	 * FL queue WR pointer.
548 	 */
549 	rte_wmb();
550 	if (nb_hold > rxq->rx_free_thresh) {
551 		PMD_RX_LOG(DEBUG, "The port=%hu queue=%hu nb_hold=%hu avail=%hu.",
552 				rxq->port_id, rxq->qidx, nb_hold, avail);
553 		nfp_qcp_ptr_add(rxq->qcp_fl, NFP_QCP_WRITE_PTR, nb_hold);
554 		nb_hold = 0;
555 	}
556 	rxq->nb_rx_hold = nb_hold;
557 
558 	return avail;
559 }
560 
561 static void
562 nfp_net_rx_queue_release_mbufs(struct nfp_net_rxq *rxq)
563 {
564 	uint16_t i;
565 
566 	if (rxq->rxbufs == NULL)
567 		return;
568 
569 	for (i = 0; i < rxq->rx_count; i++) {
570 		if (rxq->rxbufs[i].mbuf != NULL) {
571 			rte_pktmbuf_free_seg(rxq->rxbufs[i].mbuf);
572 			rxq->rxbufs[i].mbuf = NULL;
573 		}
574 	}
575 }
576 
577 void
578 nfp_net_rx_queue_release(struct rte_eth_dev *dev,
579 		uint16_t queue_idx)
580 {
581 	struct nfp_net_rxq *rxq = dev->data->rx_queues[queue_idx];
582 
583 	if (rxq != NULL) {
584 		nfp_net_rx_queue_release_mbufs(rxq);
585 		rte_eth_dma_zone_free(dev, "rx_ring", queue_idx);
586 		rte_free(rxq->rxbufs);
587 		rte_free(rxq);
588 	}
589 }
590 
591 void
592 nfp_net_reset_rx_queue(struct nfp_net_rxq *rxq)
593 {
594 	nfp_net_rx_queue_release_mbufs(rxq);
595 	rxq->rd_p = 0;
596 	rxq->nb_rx_hold = 0;
597 }
598 
599 static void
600 nfp_rx_queue_setup_flbufsz(struct nfp_net_hw *hw,
601 		struct nfp_net_rxq *rxq)
602 {
603 	if (!hw->flbufsz_set_flag) {
604 		hw->flbufsz_set_flag = true;
605 		hw->flbufsz = rxq->mbuf_size;
606 		return;
607 	}
608 
609 	if (hw->flbufsz < rxq->mbuf_size)
610 		hw->flbufsz = rxq->mbuf_size;
611 }
612 
613 int
614 nfp_net_rx_queue_setup(struct rte_eth_dev *dev,
615 		uint16_t queue_idx,
616 		uint16_t nb_desc,
617 		unsigned int socket_id,
618 		const struct rte_eth_rxconf *rx_conf,
619 		struct rte_mempool *mp)
620 {
621 	uint32_t rx_desc_sz;
622 	uint16_t min_rx_desc;
623 	uint16_t max_rx_desc;
624 	struct nfp_net_hw *hw;
625 	struct nfp_net_rxq *rxq;
626 	const struct rte_memzone *tz;
627 	struct nfp_net_hw_priv *hw_priv;
628 
629 	hw = nfp_net_get_hw(dev);
630 	hw_priv = dev->process_private;
631 
632 	nfp_net_rx_desc_limits(hw_priv, &min_rx_desc, &max_rx_desc);
633 
634 	/* Validating number of descriptors */
635 	rx_desc_sz = nb_desc * sizeof(struct nfp_net_rx_desc);
636 	if (rx_desc_sz % NFP_ALIGN_RING_DESC != 0 ||
637 			nb_desc > max_rx_desc || nb_desc < min_rx_desc) {
638 		PMD_DRV_LOG(ERR, "Wrong nb_desc value.");
639 		return -EINVAL;
640 	}
641 
642 	/*
643 	 * Free memory prior to re-allocation if needed. This is the case after
644 	 * calling @nfp_net_stop().
645 	 */
646 	if (dev->data->rx_queues[queue_idx] != NULL) {
647 		nfp_net_rx_queue_release(dev, queue_idx);
648 		dev->data->rx_queues[queue_idx] = NULL;
649 	}
650 
651 	/* Allocating rx queue data structure */
652 	rxq = rte_zmalloc_socket("ethdev RX queue", sizeof(struct nfp_net_rxq),
653 			RTE_CACHE_LINE_SIZE, socket_id);
654 	if (rxq == NULL)
655 		return -ENOMEM;
656 
657 	dev->data->rx_queues[queue_idx] = rxq;
658 
659 	/* Hw queues mapping based on firmware configuration */
660 	rxq->qidx = queue_idx;
661 	rxq->fl_qcidx = queue_idx * hw->stride_rx;
662 	rxq->qcp_fl = hw->rx_bar + NFP_QCP_QUEUE_OFF(rxq->fl_qcidx);
663 
664 	/*
665 	 * Tracking mbuf size for detecting a potential mbuf overflow due to
666 	 * RX offset.
667 	 */
668 	rxq->mem_pool = mp;
669 	rxq->mbuf_size = rxq->mem_pool->elt_size;
670 	rxq->mbuf_size -= (sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM);
671 	nfp_rx_queue_setup_flbufsz(hw, rxq);
672 
673 	rxq->rx_count = nb_desc;
674 	rxq->port_id = dev->data->port_id;
675 	rxq->rx_free_thresh = rx_conf->rx_free_thresh;
676 
677 	/*
678 	 * Allocate RX ring hardware descriptors. A memzone large enough to
679 	 * handle the maximum ring size is allocated in order to allow for
680 	 * resizing in later calls to the queue setup function.
681 	 */
682 	tz = rte_eth_dma_zone_reserve(dev, "rx_ring", queue_idx,
683 			sizeof(struct nfp_net_rx_desc) * max_rx_desc,
684 			NFP_MEMZONE_ALIGN, socket_id);
685 	if (tz == NULL) {
686 		PMD_DRV_LOG(ERR, "Error allocating rx dma.");
687 		nfp_net_rx_queue_release(dev, queue_idx);
688 		dev->data->rx_queues[queue_idx] = NULL;
689 		return -ENOMEM;
690 	}
691 
692 	/* Saving physical and virtual addresses for the RX ring */
693 	rxq->dma = (uint64_t)tz->iova;
694 	rxq->rxds = tz->addr;
695 
696 	/* Mbuf pointers array for referencing mbufs linked to RX descriptors */
697 	rxq->rxbufs = rte_zmalloc_socket("rxq->rxbufs",
698 			sizeof(*rxq->rxbufs) * nb_desc, RTE_CACHE_LINE_SIZE,
699 			socket_id);
700 	if (rxq->rxbufs == NULL) {
701 		nfp_net_rx_queue_release(dev, queue_idx);
702 		dev->data->rx_queues[queue_idx] = NULL;
703 		return -ENOMEM;
704 	}
705 
706 	nfp_net_reset_rx_queue(rxq);
707 
708 	rxq->hw = hw;
709 	rxq->hw_priv = dev->process_private;
710 
711 	/*
712 	 * Telling the HW about the physical address of the RX ring and number
713 	 * of descriptors in log2 format.
714 	 */
715 	nn_cfg_writeq(&hw->super, NFP_NET_CFG_RXR_ADDR(queue_idx), rxq->dma);
716 	nn_cfg_writeb(&hw->super, NFP_NET_CFG_RXR_SZ(queue_idx), rte_log2_u32(nb_desc));
717 
718 	return 0;
719 }
720 
721 static inline uint32_t
722 nfp_net_read_tx_free_qcp(struct nfp_net_txq *txq)
723 {
724 	/*
725 	 * If TX ring pointer write back is not supported, do a PCIe read.
726 	 * Otherwise read qcp value from write back dma address.
727 	 */
728 	if (txq->txrwb == NULL)
729 		return nfp_qcp_read(txq->qcp_q, NFP_QCP_READ_PTR);
730 
731 	/*
732 	 * In most cases the TX count is a power of two and the costly modulus
733 	 * operation can be substituted with a subtraction and an AND operation.
734 	 */
735 	if (rte_is_power_of_2(txq->tx_count) == 1)
736 		return (*txq->txrwb) & (txq->tx_count - 1);
737 	else
738 		return (*txq->txrwb) % txq->tx_count;
739 }
740 
741 /**
742  * Check for descriptors with a complete status
743  *
744  * @param txq
745  *   TX queue to work with
746  *
747  * @return
748  *   Number of descriptors freed
749  */
750 uint32_t
751 nfp_net_tx_free_bufs(struct nfp_net_txq *txq)
752 {
753 	uint32_t todo;
754 	uint32_t qcp_rd_p;
755 
756 	PMD_TX_LOG(DEBUG, "Queue %hu. Check for descriptor with a complete"
757 			" status.", txq->qidx);
758 
759 	/* Work out how many packets have been sent */
760 	qcp_rd_p = nfp_net_read_tx_free_qcp(txq);
761 
762 	if (qcp_rd_p == txq->rd_p) {
763 		PMD_TX_LOG(DEBUG, "Queue %hu: It seems harrier is not sending "
764 				"packets (%u, %u).", txq->qidx,
765 				qcp_rd_p, txq->rd_p);
766 		return 0;
767 	}
768 
769 	if (qcp_rd_p > txq->rd_p)
770 		todo = qcp_rd_p - txq->rd_p;
771 	else
772 		todo = qcp_rd_p + txq->tx_count - txq->rd_p;
773 
774 	PMD_TX_LOG(DEBUG, "The qcp_rd_p %u, txq->rd_p: %u, qcp->rd_p: %u.",
775 			qcp_rd_p, txq->rd_p, txq->rd_p);
776 
777 	if (todo == 0)
778 		return todo;
779 
780 	txq->rd_p += todo;
781 	if (unlikely(txq->rd_p >= txq->tx_count))
782 		txq->rd_p -= txq->tx_count;
783 
784 	return todo;
785 }
786 
787 static void
788 nfp_net_tx_queue_release_mbufs(struct nfp_net_txq *txq)
789 {
790 	uint32_t i;
791 
792 	if (txq->txbufs == NULL)
793 		return;
794 
795 	for (i = 0; i < txq->tx_count; i++) {
796 		if (txq->txbufs[i].mbuf != NULL) {
797 			rte_pktmbuf_free_seg(txq->txbufs[i].mbuf);
798 			txq->txbufs[i].mbuf = NULL;
799 		}
800 	}
801 }
802 
803 void
804 nfp_net_tx_queue_release(struct rte_eth_dev *dev,
805 		uint16_t queue_idx)
806 {
807 	struct nfp_net_hw *net_hw;
808 	struct nfp_net_txq *txq = dev->data->tx_queues[queue_idx];
809 
810 	if (txq != NULL) {
811 		net_hw = nfp_net_get_hw(dev);
812 		if (net_hw->txrwb_mz != NULL)
813 			nn_cfg_writeq(&net_hw->super, NFP_NET_CFG_TXR_WB_ADDR(queue_idx), 0);
814 		nfp_net_tx_queue_release_mbufs(txq);
815 		rte_eth_dma_zone_free(dev, "tx_ring", queue_idx);
816 		rte_free(txq->txbufs);
817 		rte_free(txq);
818 	}
819 }
820 
821 void
822 nfp_net_reset_tx_queue(struct nfp_net_txq *txq)
823 {
824 	nfp_net_tx_queue_release_mbufs(txq);
825 	txq->wr_p = 0;
826 	txq->rd_p = 0;
827 	if (txq->txrwb != NULL)
828 		*txq->txrwb = 0;
829 }
830 
831 int
832 nfp_net_tx_queue_setup(struct rte_eth_dev *dev,
833 		uint16_t queue_idx,
834 		uint16_t nb_desc,
835 		unsigned int socket_id,
836 		const struct rte_eth_txconf *tx_conf)
837 {
838 	struct nfp_net_hw_priv *hw_priv;
839 
840 	hw_priv = dev->process_private;
841 
842 	if (hw_priv->pf_dev->ver.extend == NFP_NET_CFG_VERSION_DP_NFD3)
843 		return nfp_net_nfd3_tx_queue_setup(dev, queue_idx,
844 				nb_desc, socket_id, tx_conf);
845 	else
846 		return nfp_net_nfdk_tx_queue_setup(dev, queue_idx,
847 				nb_desc, socket_id, tx_conf);
848 }
849 
850 void
851 nfp_net_rx_queue_info_get(struct rte_eth_dev *dev,
852 		uint16_t queue_id,
853 		struct rte_eth_rxq_info *info)
854 {
855 	struct rte_eth_dev_info dev_info;
856 	struct nfp_net_rxq *rxq = dev->data->rx_queues[queue_id];
857 
858 	info->mp = rxq->mem_pool;
859 	info->nb_desc = rxq->rx_count;
860 
861 	info->conf.rx_free_thresh = rxq->rx_free_thresh;
862 
863 	nfp_net_infos_get(dev, &dev_info);
864 	info->conf.offloads = dev_info.rx_offload_capa &
865 			dev->data->dev_conf.rxmode.offloads;
866 	info->conf.rx_thresh = dev_info.default_rxconf.rx_thresh;
867 }
868 
869 void
870 nfp_net_tx_queue_info_get(struct rte_eth_dev *dev,
871 		uint16_t queue_id,
872 		struct rte_eth_txq_info *info)
873 {
874 	struct rte_eth_dev_info dev_info;
875 	struct nfp_net_hw_priv *hw_priv = dev->process_private;
876 	struct nfp_net_txq *txq = dev->data->tx_queues[queue_id];
877 
878 	if (hw_priv->pf_dev->ver.extend == NFP_NET_CFG_VERSION_DP_NFD3)
879 		info->nb_desc = txq->tx_count / NFD3_TX_DESC_PER_PKT;
880 	else
881 		info->nb_desc = txq->tx_count / NFDK_TX_DESC_PER_SIMPLE_PKT;
882 
883 	info->conf.tx_free_thresh = txq->tx_free_thresh;
884 
885 	nfp_net_infos_get(dev, &dev_info);
886 	info->conf.offloads = dev_info.tx_offload_capa &
887 			dev->data->dev_conf.txmode.offloads;
888 	info->conf.tx_thresh = dev_info.default_txconf.tx_thresh;
889 }
890 
891 void
892 nfp_net_recv_pkts_set(struct rte_eth_dev *eth_dev)
893 {
894 	if (nfp_net_get_avx2_supported())
895 		eth_dev->rx_pkt_burst = nfp_net_vec_avx2_recv_pkts;
896 	else
897 		eth_dev->rx_pkt_burst = nfp_net_recv_pkts;
898 }
899 
900 int
901 nfp_net_rx_burst_mode_get(struct rte_eth_dev *eth_dev,
902 		uint16_t queue_id __rte_unused,
903 		struct rte_eth_burst_mode *mode)
904 {
905 	eth_rx_burst_t pkt_burst;
906 
907 	pkt_burst = eth_dev->rx_pkt_burst;
908 	if (pkt_burst == nfp_net_recv_pkts) {
909 		strlcpy(mode->info, "Scalar",
910 				RTE_ETH_BURST_MODE_INFO_SIZE);
911 	} else if (pkt_burst == nfp_net_vec_avx2_recv_pkts) {
912 		strlcpy(mode->info, "Vector AVX2",
913 				RTE_ETH_BURST_MODE_INFO_SIZE);
914 	} else {
915 		return -EINVAL;
916 	}
917 
918 	return 0;
919 }
920 
921 int
922 nfp_net_tx_burst_mode_get(struct rte_eth_dev *eth_dev,
923 		uint16_t queue_id __rte_unused,
924 		struct rte_eth_burst_mode *mode)
925 {
926 	eth_tx_burst_t pkt_burst;
927 
928 	pkt_burst = eth_dev->tx_pkt_burst;
929 	if (pkt_burst == nfp_net_nfd3_xmit_pkts) {
930 		strlcpy(mode->info, "NFD3 Scalar",
931 				RTE_ETH_BURST_MODE_INFO_SIZE);
932 	} else if (pkt_burst == nfp_net_nfdk_xmit_pkts) {
933 		strlcpy(mode->info, "NFDk Scalar",
934 				RTE_ETH_BURST_MODE_INFO_SIZE);
935 	} else if (pkt_burst == nfp_net_nfdk_vec_avx2_xmit_pkts) {
936 		strlcpy(mode->info, "NFDk Vector AVX2",
937 				RTE_ETH_BURST_MODE_INFO_SIZE);
938 	} else {
939 		return -EINVAL;
940 	}
941 
942 	return 0;
943 }
944