xref: /dpdk/drivers/net/enetc/enetc_rxtx.c (revision daa02b5cddbb8e11b31d41e2bf7bb1ae64dcae2f)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2018-2020 NXP
3  */
4 
5 #include <stdbool.h>
6 #include <stdint.h>
7 #include <unistd.h>
8 
9 #include "rte_ethdev.h"
10 #include "rte_malloc.h"
11 #include "rte_memzone.h"
12 
13 #include "base/enetc_hw.h"
14 #include "enetc.h"
15 #include "enetc_logs.h"
16 
17 #define ENETC_CACHE_LINE_RXBDS	(RTE_CACHE_LINE_SIZE / \
18 				 sizeof(union enetc_rx_bd))
19 #define ENETC_RXBD_BUNDLE 16 /* Number of buffers to allocate at once */
20 
21 static int
enetc_clean_tx_ring(struct enetc_bdr * tx_ring)22 enetc_clean_tx_ring(struct enetc_bdr *tx_ring)
23 {
24 	int tx_frm_cnt = 0;
25 	struct enetc_swbd *tx_swbd, *tx_swbd_base;
26 	int i, hwci, bd_count;
27 	struct rte_mbuf *m[ENETC_RXBD_BUNDLE];
28 
29 	/* we don't need barriers here, we just want a relatively current value
30 	 * from HW.
31 	 */
32 	hwci = (int)(rte_read32_relaxed(tx_ring->tcisr) &
33 		     ENETC_TBCISR_IDX_MASK);
34 
35 	tx_swbd_base = tx_ring->q_swbd;
36 	bd_count = tx_ring->bd_count;
37 	i = tx_ring->next_to_clean;
38 	tx_swbd = &tx_swbd_base[i];
39 
40 	/* we're only reading the CI index once here, which means HW may update
41 	 * it while we're doing clean-up.  We could read the register in a loop
42 	 * but for now I assume it's OK to leave a few Tx frames for next call.
43 	 * The issue with reading the register in a loop is that we're stalling
44 	 * here trying to catch up with HW which keeps sending traffic as long
45 	 * as it has traffic to send, so in effect we could be waiting here for
46 	 * the Tx ring to be drained by HW, instead of us doing Rx in that
47 	 * meantime.
48 	 */
49 	while (i != hwci) {
50 		/* It seems calling rte_pktmbuf_free is wasting a lot of cycles,
51 		 * make a list and call _free when it's done.
52 		 */
53 		if (tx_frm_cnt == ENETC_RXBD_BUNDLE) {
54 			rte_pktmbuf_free_bulk(m, tx_frm_cnt);
55 			tx_frm_cnt = 0;
56 		}
57 
58 		m[tx_frm_cnt] = tx_swbd->buffer_addr;
59 		tx_swbd->buffer_addr = NULL;
60 
61 		i++;
62 		tx_swbd++;
63 		if (unlikely(i == bd_count)) {
64 			i = 0;
65 			tx_swbd = tx_swbd_base;
66 		}
67 
68 		tx_frm_cnt++;
69 	}
70 
71 	if (tx_frm_cnt)
72 		rte_pktmbuf_free_bulk(m, tx_frm_cnt);
73 
74 	tx_ring->next_to_clean = i;
75 
76 	return 0;
77 }
78 
79 uint16_t
enetc_xmit_pkts(void * tx_queue,struct rte_mbuf ** tx_pkts,uint16_t nb_pkts)80 enetc_xmit_pkts(void *tx_queue,
81 		struct rte_mbuf **tx_pkts,
82 		uint16_t nb_pkts)
83 {
84 	struct enetc_swbd *tx_swbd;
85 	int i, start, bds_to_use;
86 	struct enetc_tx_bd *txbd;
87 	struct enetc_bdr *tx_ring = (struct enetc_bdr *)tx_queue;
88 
89 	i = tx_ring->next_to_use;
90 
91 	bds_to_use = enetc_bd_unused(tx_ring);
92 	if (bds_to_use < nb_pkts)
93 		nb_pkts = bds_to_use;
94 
95 	start = 0;
96 	while (nb_pkts--) {
97 		tx_ring->q_swbd[i].buffer_addr = tx_pkts[start];
98 		txbd = ENETC_TXBD(*tx_ring, i);
99 		tx_swbd = &tx_ring->q_swbd[i];
100 		txbd->frm_len = tx_pkts[start]->pkt_len;
101 		txbd->buf_len = txbd->frm_len;
102 		txbd->flags = rte_cpu_to_le_16(ENETC_TXBD_FLAGS_F);
103 		txbd->addr = (uint64_t)(uintptr_t)
104 		rte_cpu_to_le_64((size_t)tx_swbd->buffer_addr->buf_iova +
105 				 tx_swbd->buffer_addr->data_off);
106 		i++;
107 		start++;
108 		if (unlikely(i == tx_ring->bd_count))
109 			i = 0;
110 	}
111 
112 	/* we're only cleaning up the Tx ring here, on the assumption that
113 	 * software is slower than hardware and hardware completed sending
114 	 * older frames out by now.
115 	 * We're also cleaning up the ring before kicking off Tx for the new
116 	 * batch to minimize chances of contention on the Tx ring
117 	 */
118 	enetc_clean_tx_ring(tx_ring);
119 
120 	tx_ring->next_to_use = i;
121 	enetc_wr_reg(tx_ring->tcir, i);
122 	return start;
123 }
124 
125 int
enetc_refill_rx_ring(struct enetc_bdr * rx_ring,const int buff_cnt)126 enetc_refill_rx_ring(struct enetc_bdr *rx_ring, const int buff_cnt)
127 {
128 	struct enetc_swbd *rx_swbd;
129 	union enetc_rx_bd *rxbd;
130 	int i, j, k = ENETC_RXBD_BUNDLE;
131 	struct rte_mbuf *m[ENETC_RXBD_BUNDLE];
132 	struct rte_mempool *mb_pool;
133 
134 	i = rx_ring->next_to_use;
135 	mb_pool = rx_ring->mb_pool;
136 	rx_swbd = &rx_ring->q_swbd[i];
137 	rxbd = ENETC_RXBD(*rx_ring, i);
138 	for (j = 0; j < buff_cnt; j++) {
139 		/* bulk alloc for the next up to 8 BDs */
140 		if (k == ENETC_RXBD_BUNDLE) {
141 			k = 0;
142 			int m_cnt = RTE_MIN(buff_cnt - j, ENETC_RXBD_BUNDLE);
143 
144 			if (rte_pktmbuf_alloc_bulk(mb_pool, m, m_cnt))
145 				return -1;
146 		}
147 
148 		rx_swbd->buffer_addr = m[k];
149 		rxbd->w.addr = (uint64_t)(uintptr_t)
150 			       rx_swbd->buffer_addr->buf_iova +
151 			       rx_swbd->buffer_addr->data_off;
152 		/* clear 'R" as well */
153 		rxbd->r.lstatus = 0;
154 		rx_swbd++;
155 		rxbd++;
156 		i++;
157 		k++;
158 		if (unlikely(i == rx_ring->bd_count)) {
159 			i = 0;
160 			rxbd = ENETC_RXBD(*rx_ring, 0);
161 			rx_swbd = &rx_ring->q_swbd[i];
162 		}
163 	}
164 
165 	if (likely(j)) {
166 		rx_ring->next_to_alloc = i;
167 		rx_ring->next_to_use = i;
168 		enetc_wr_reg(rx_ring->rcir, i);
169 	}
170 
171 	return j;
172 }
173 
enetc_slow_parsing(struct rte_mbuf * m,uint64_t parse_results)174 static inline void enetc_slow_parsing(struct rte_mbuf *m,
175 				     uint64_t parse_results)
176 {
177 	m->ol_flags &= ~(RTE_MBUF_F_RX_IP_CKSUM_GOOD | RTE_MBUF_F_RX_L4_CKSUM_GOOD);
178 
179 	switch (parse_results) {
180 	case ENETC_PARSE_ERROR | ENETC_PKT_TYPE_IPV4:
181 		m->packet_type = RTE_PTYPE_L2_ETHER |
182 				 RTE_PTYPE_L3_IPV4;
183 		m->ol_flags |= RTE_MBUF_F_RX_IP_CKSUM_BAD;
184 		return;
185 	case ENETC_PARSE_ERROR | ENETC_PKT_TYPE_IPV6:
186 		m->packet_type = RTE_PTYPE_L2_ETHER |
187 				 RTE_PTYPE_L3_IPV6;
188 		m->ol_flags |= RTE_MBUF_F_RX_IP_CKSUM_BAD;
189 		return;
190 	case ENETC_PARSE_ERROR | ENETC_PKT_TYPE_IPV4_TCP:
191 		m->packet_type = RTE_PTYPE_L2_ETHER |
192 				 RTE_PTYPE_L3_IPV4 |
193 				 RTE_PTYPE_L4_TCP;
194 		m->ol_flags |= RTE_MBUF_F_RX_IP_CKSUM_GOOD |
195 			       RTE_MBUF_F_RX_L4_CKSUM_BAD;
196 		return;
197 	case ENETC_PARSE_ERROR | ENETC_PKT_TYPE_IPV6_TCP:
198 		m->packet_type = RTE_PTYPE_L2_ETHER |
199 				 RTE_PTYPE_L3_IPV6 |
200 				 RTE_PTYPE_L4_TCP;
201 		m->ol_flags |= RTE_MBUF_F_RX_IP_CKSUM_GOOD |
202 			       RTE_MBUF_F_RX_L4_CKSUM_BAD;
203 		return;
204 	case ENETC_PARSE_ERROR | ENETC_PKT_TYPE_IPV4_UDP:
205 		m->packet_type = RTE_PTYPE_L2_ETHER |
206 				 RTE_PTYPE_L3_IPV4 |
207 				 RTE_PTYPE_L4_UDP;
208 		m->ol_flags |= RTE_MBUF_F_RX_IP_CKSUM_GOOD |
209 			       RTE_MBUF_F_RX_L4_CKSUM_BAD;
210 		return;
211 	case ENETC_PARSE_ERROR | ENETC_PKT_TYPE_IPV6_UDP:
212 		m->packet_type = RTE_PTYPE_L2_ETHER |
213 				 RTE_PTYPE_L3_IPV6 |
214 				 RTE_PTYPE_L4_UDP;
215 		m->ol_flags |= RTE_MBUF_F_RX_IP_CKSUM_GOOD |
216 			       RTE_MBUF_F_RX_L4_CKSUM_BAD;
217 		return;
218 	case ENETC_PARSE_ERROR | ENETC_PKT_TYPE_IPV4_SCTP:
219 		m->packet_type = RTE_PTYPE_L2_ETHER |
220 				 RTE_PTYPE_L3_IPV4 |
221 				 RTE_PTYPE_L4_SCTP;
222 		m->ol_flags |= RTE_MBUF_F_RX_IP_CKSUM_GOOD |
223 			       RTE_MBUF_F_RX_L4_CKSUM_BAD;
224 		return;
225 	case ENETC_PARSE_ERROR | ENETC_PKT_TYPE_IPV6_SCTP:
226 		m->packet_type = RTE_PTYPE_L2_ETHER |
227 				 RTE_PTYPE_L3_IPV6 |
228 				 RTE_PTYPE_L4_SCTP;
229 		m->ol_flags |= RTE_MBUF_F_RX_IP_CKSUM_GOOD |
230 			       RTE_MBUF_F_RX_L4_CKSUM_BAD;
231 		return;
232 	case ENETC_PARSE_ERROR | ENETC_PKT_TYPE_IPV4_ICMP:
233 		m->packet_type = RTE_PTYPE_L2_ETHER |
234 				 RTE_PTYPE_L3_IPV4 |
235 				 RTE_PTYPE_L4_ICMP;
236 		m->ol_flags |= RTE_MBUF_F_RX_IP_CKSUM_GOOD |
237 			       RTE_MBUF_F_RX_L4_CKSUM_BAD;
238 		return;
239 	case ENETC_PARSE_ERROR | ENETC_PKT_TYPE_IPV6_ICMP:
240 		m->packet_type = RTE_PTYPE_L2_ETHER |
241 				 RTE_PTYPE_L3_IPV6 |
242 				 RTE_PTYPE_L4_ICMP;
243 		m->ol_flags |= RTE_MBUF_F_RX_IP_CKSUM_GOOD |
244 			       RTE_MBUF_F_RX_L4_CKSUM_BAD;
245 		return;
246 	/* More switch cases can be added */
247 	default:
248 		m->packet_type = RTE_PTYPE_UNKNOWN;
249 		m->ol_flags |= RTE_MBUF_F_RX_IP_CKSUM_UNKNOWN |
250 			       RTE_MBUF_F_RX_L4_CKSUM_UNKNOWN;
251 	}
252 }
253 
254 
255 static inline void __rte_hot
enetc_dev_rx_parse(struct rte_mbuf * m,uint16_t parse_results)256 enetc_dev_rx_parse(struct rte_mbuf *m, uint16_t parse_results)
257 {
258 	ENETC_PMD_DP_DEBUG("parse summary = 0x%x   ", parse_results);
259 	m->ol_flags |= RTE_MBUF_F_RX_IP_CKSUM_GOOD | RTE_MBUF_F_RX_L4_CKSUM_GOOD;
260 
261 	switch (parse_results) {
262 	case ENETC_PKT_TYPE_ETHER:
263 		m->packet_type = RTE_PTYPE_L2_ETHER;
264 		return;
265 	case ENETC_PKT_TYPE_IPV4:
266 		m->packet_type = RTE_PTYPE_L2_ETHER |
267 				 RTE_PTYPE_L3_IPV4;
268 		return;
269 	case ENETC_PKT_TYPE_IPV6:
270 		m->packet_type = RTE_PTYPE_L2_ETHER |
271 				 RTE_PTYPE_L3_IPV6;
272 		return;
273 	case ENETC_PKT_TYPE_IPV4_TCP:
274 		m->packet_type = RTE_PTYPE_L2_ETHER |
275 				 RTE_PTYPE_L3_IPV4 |
276 				 RTE_PTYPE_L4_TCP;
277 		return;
278 	case ENETC_PKT_TYPE_IPV6_TCP:
279 		m->packet_type = RTE_PTYPE_L2_ETHER |
280 				 RTE_PTYPE_L3_IPV6 |
281 				 RTE_PTYPE_L4_TCP;
282 		return;
283 	case ENETC_PKT_TYPE_IPV4_UDP:
284 		m->packet_type = RTE_PTYPE_L2_ETHER |
285 				 RTE_PTYPE_L3_IPV4 |
286 				 RTE_PTYPE_L4_UDP;
287 		return;
288 	case ENETC_PKT_TYPE_IPV6_UDP:
289 		m->packet_type = RTE_PTYPE_L2_ETHER |
290 				 RTE_PTYPE_L3_IPV6 |
291 				 RTE_PTYPE_L4_UDP;
292 		return;
293 	case ENETC_PKT_TYPE_IPV4_SCTP:
294 		m->packet_type = RTE_PTYPE_L2_ETHER |
295 				 RTE_PTYPE_L3_IPV4 |
296 				 RTE_PTYPE_L4_SCTP;
297 		return;
298 	case ENETC_PKT_TYPE_IPV6_SCTP:
299 		m->packet_type = RTE_PTYPE_L2_ETHER |
300 				 RTE_PTYPE_L3_IPV6 |
301 				 RTE_PTYPE_L4_SCTP;
302 		return;
303 	case ENETC_PKT_TYPE_IPV4_ICMP:
304 		m->packet_type = RTE_PTYPE_L2_ETHER |
305 				 RTE_PTYPE_L3_IPV4 |
306 				 RTE_PTYPE_L4_ICMP;
307 		return;
308 	case ENETC_PKT_TYPE_IPV6_ICMP:
309 		m->packet_type = RTE_PTYPE_L2_ETHER |
310 				 RTE_PTYPE_L3_IPV6 |
311 				 RTE_PTYPE_L4_ICMP;
312 		return;
313 	/* More switch cases can be added */
314 	default:
315 		enetc_slow_parsing(m, parse_results);
316 	}
317 
318 }
319 
320 static int
enetc_clean_rx_ring(struct enetc_bdr * rx_ring,struct rte_mbuf ** rx_pkts,int work_limit)321 enetc_clean_rx_ring(struct enetc_bdr *rx_ring,
322 		    struct rte_mbuf **rx_pkts,
323 		    int work_limit)
324 {
325 	int rx_frm_cnt = 0;
326 	int cleaned_cnt, i, bd_count;
327 	struct enetc_swbd *rx_swbd;
328 	union enetc_rx_bd *rxbd;
329 
330 	/* next descriptor to process */
331 	i = rx_ring->next_to_clean;
332 	/* next descriptor to process */
333 	rxbd = ENETC_RXBD(*rx_ring, i);
334 	rte_prefetch0(rxbd);
335 	bd_count = rx_ring->bd_count;
336 	/* LS1028A does not have platform cache so any software access following
337 	 * a hardware write will go directly to DDR.  Latency of such a read is
338 	 * in excess of 100 core cycles, so try to prefetch more in advance to
339 	 * mitigate this.
340 	 * How much is worth prefetching really depends on traffic conditions.
341 	 * With congested Rx this could go up to 4 cache lines or so.  But if
342 	 * software keeps up with hardware and follows behind Rx PI by a cache
343 	 * line or less then it's harmful in terms of performance to cache more.
344 	 * We would only prefetch BDs that have yet to be written by ENETC,
345 	 * which will have to be evicted again anyway.
346 	 */
347 	rte_prefetch0(ENETC_RXBD(*rx_ring,
348 				 (i + ENETC_CACHE_LINE_RXBDS) % bd_count));
349 	rte_prefetch0(ENETC_RXBD(*rx_ring,
350 				 (i + ENETC_CACHE_LINE_RXBDS * 2) % bd_count));
351 
352 	cleaned_cnt = enetc_bd_unused(rx_ring);
353 	rx_swbd = &rx_ring->q_swbd[i];
354 	while (likely(rx_frm_cnt < work_limit)) {
355 		uint32_t bd_status;
356 
357 		bd_status = rte_le_to_cpu_32(rxbd->r.lstatus);
358 		if (!bd_status)
359 			break;
360 
361 		rx_swbd->buffer_addr->pkt_len = rxbd->r.buf_len -
362 						rx_ring->crc_len;
363 		rx_swbd->buffer_addr->data_len = rxbd->r.buf_len -
364 						 rx_ring->crc_len;
365 		rx_swbd->buffer_addr->hash.rss = rxbd->r.rss_hash;
366 		rx_swbd->buffer_addr->ol_flags = 0;
367 		enetc_dev_rx_parse(rx_swbd->buffer_addr,
368 				   rxbd->r.parse_summary);
369 		rx_pkts[rx_frm_cnt] = rx_swbd->buffer_addr;
370 		cleaned_cnt++;
371 		rx_swbd++;
372 		i++;
373 		if (unlikely(i == rx_ring->bd_count)) {
374 			i = 0;
375 			rx_swbd = &rx_ring->q_swbd[i];
376 		}
377 		rxbd = ENETC_RXBD(*rx_ring, i);
378 		rte_prefetch0(ENETC_RXBD(*rx_ring,
379 					 (i + ENETC_CACHE_LINE_RXBDS) %
380 					  bd_count));
381 		rte_prefetch0(ENETC_RXBD(*rx_ring,
382 					 (i + ENETC_CACHE_LINE_RXBDS * 2) %
383 					 bd_count));
384 
385 		rx_frm_cnt++;
386 	}
387 
388 	rx_ring->next_to_clean = i;
389 	enetc_refill_rx_ring(rx_ring, cleaned_cnt);
390 
391 	return rx_frm_cnt;
392 }
393 
394 uint16_t
enetc_recv_pkts(void * rxq,struct rte_mbuf ** rx_pkts,uint16_t nb_pkts)395 enetc_recv_pkts(void *rxq, struct rte_mbuf **rx_pkts,
396 		uint16_t nb_pkts)
397 {
398 	struct enetc_bdr *rx_ring = (struct enetc_bdr *)rxq;
399 
400 	return enetc_clean_rx_ring(rx_ring, rx_pkts, nb_pkts);
401 }
402