1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright 2018-2020 NXP
3 */
4
5 #include <stdbool.h>
6 #include <stdint.h>
7 #include <unistd.h>
8
9 #include "rte_ethdev.h"
10 #include "rte_malloc.h"
11 #include "rte_memzone.h"
12
13 #include "base/enetc_hw.h"
14 #include "enetc.h"
15 #include "enetc_logs.h"
16
17 #define ENETC_CACHE_LINE_RXBDS (RTE_CACHE_LINE_SIZE / \
18 sizeof(union enetc_rx_bd))
19 #define ENETC_RXBD_BUNDLE 16 /* Number of buffers to allocate at once */
20
21 static int
enetc_clean_tx_ring(struct enetc_bdr * tx_ring)22 enetc_clean_tx_ring(struct enetc_bdr *tx_ring)
23 {
24 int tx_frm_cnt = 0;
25 struct enetc_swbd *tx_swbd, *tx_swbd_base;
26 int i, hwci, bd_count;
27 struct rte_mbuf *m[ENETC_RXBD_BUNDLE];
28
29 /* we don't need barriers here, we just want a relatively current value
30 * from HW.
31 */
32 hwci = (int)(rte_read32_relaxed(tx_ring->tcisr) &
33 ENETC_TBCISR_IDX_MASK);
34
35 tx_swbd_base = tx_ring->q_swbd;
36 bd_count = tx_ring->bd_count;
37 i = tx_ring->next_to_clean;
38 tx_swbd = &tx_swbd_base[i];
39
40 /* we're only reading the CI index once here, which means HW may update
41 * it while we're doing clean-up. We could read the register in a loop
42 * but for now I assume it's OK to leave a few Tx frames for next call.
43 * The issue with reading the register in a loop is that we're stalling
44 * here trying to catch up with HW which keeps sending traffic as long
45 * as it has traffic to send, so in effect we could be waiting here for
46 * the Tx ring to be drained by HW, instead of us doing Rx in that
47 * meantime.
48 */
49 while (i != hwci) {
50 /* It seems calling rte_pktmbuf_free is wasting a lot of cycles,
51 * make a list and call _free when it's done.
52 */
53 if (tx_frm_cnt == ENETC_RXBD_BUNDLE) {
54 rte_pktmbuf_free_bulk(m, tx_frm_cnt);
55 tx_frm_cnt = 0;
56 }
57
58 m[tx_frm_cnt] = tx_swbd->buffer_addr;
59 tx_swbd->buffer_addr = NULL;
60
61 i++;
62 tx_swbd++;
63 if (unlikely(i == bd_count)) {
64 i = 0;
65 tx_swbd = tx_swbd_base;
66 }
67
68 tx_frm_cnt++;
69 }
70
71 if (tx_frm_cnt)
72 rte_pktmbuf_free_bulk(m, tx_frm_cnt);
73
74 tx_ring->next_to_clean = i;
75
76 return 0;
77 }
78
79 uint16_t
enetc_xmit_pkts(void * tx_queue,struct rte_mbuf ** tx_pkts,uint16_t nb_pkts)80 enetc_xmit_pkts(void *tx_queue,
81 struct rte_mbuf **tx_pkts,
82 uint16_t nb_pkts)
83 {
84 struct enetc_swbd *tx_swbd;
85 int i, start, bds_to_use;
86 struct enetc_tx_bd *txbd;
87 struct enetc_bdr *tx_ring = (struct enetc_bdr *)tx_queue;
88
89 i = tx_ring->next_to_use;
90
91 bds_to_use = enetc_bd_unused(tx_ring);
92 if (bds_to_use < nb_pkts)
93 nb_pkts = bds_to_use;
94
95 start = 0;
96 while (nb_pkts--) {
97 tx_ring->q_swbd[i].buffer_addr = tx_pkts[start];
98 txbd = ENETC_TXBD(*tx_ring, i);
99 tx_swbd = &tx_ring->q_swbd[i];
100 txbd->frm_len = tx_pkts[start]->pkt_len;
101 txbd->buf_len = txbd->frm_len;
102 txbd->flags = rte_cpu_to_le_16(ENETC_TXBD_FLAGS_F);
103 txbd->addr = (uint64_t)(uintptr_t)
104 rte_cpu_to_le_64((size_t)tx_swbd->buffer_addr->buf_iova +
105 tx_swbd->buffer_addr->data_off);
106 i++;
107 start++;
108 if (unlikely(i == tx_ring->bd_count))
109 i = 0;
110 }
111
112 /* we're only cleaning up the Tx ring here, on the assumption that
113 * software is slower than hardware and hardware completed sending
114 * older frames out by now.
115 * We're also cleaning up the ring before kicking off Tx for the new
116 * batch to minimize chances of contention on the Tx ring
117 */
118 enetc_clean_tx_ring(tx_ring);
119
120 tx_ring->next_to_use = i;
121 enetc_wr_reg(tx_ring->tcir, i);
122 return start;
123 }
124
125 int
enetc_refill_rx_ring(struct enetc_bdr * rx_ring,const int buff_cnt)126 enetc_refill_rx_ring(struct enetc_bdr *rx_ring, const int buff_cnt)
127 {
128 struct enetc_swbd *rx_swbd;
129 union enetc_rx_bd *rxbd;
130 int i, j, k = ENETC_RXBD_BUNDLE;
131 struct rte_mbuf *m[ENETC_RXBD_BUNDLE];
132 struct rte_mempool *mb_pool;
133
134 i = rx_ring->next_to_use;
135 mb_pool = rx_ring->mb_pool;
136 rx_swbd = &rx_ring->q_swbd[i];
137 rxbd = ENETC_RXBD(*rx_ring, i);
138 for (j = 0; j < buff_cnt; j++) {
139 /* bulk alloc for the next up to 8 BDs */
140 if (k == ENETC_RXBD_BUNDLE) {
141 k = 0;
142 int m_cnt = RTE_MIN(buff_cnt - j, ENETC_RXBD_BUNDLE);
143
144 if (rte_pktmbuf_alloc_bulk(mb_pool, m, m_cnt))
145 return -1;
146 }
147
148 rx_swbd->buffer_addr = m[k];
149 rxbd->w.addr = (uint64_t)(uintptr_t)
150 rx_swbd->buffer_addr->buf_iova +
151 rx_swbd->buffer_addr->data_off;
152 /* clear 'R" as well */
153 rxbd->r.lstatus = 0;
154 rx_swbd++;
155 rxbd++;
156 i++;
157 k++;
158 if (unlikely(i == rx_ring->bd_count)) {
159 i = 0;
160 rxbd = ENETC_RXBD(*rx_ring, 0);
161 rx_swbd = &rx_ring->q_swbd[i];
162 }
163 }
164
165 if (likely(j)) {
166 rx_ring->next_to_alloc = i;
167 rx_ring->next_to_use = i;
168 enetc_wr_reg(rx_ring->rcir, i);
169 }
170
171 return j;
172 }
173
enetc_slow_parsing(struct rte_mbuf * m,uint64_t parse_results)174 static inline void enetc_slow_parsing(struct rte_mbuf *m,
175 uint64_t parse_results)
176 {
177 m->ol_flags &= ~(RTE_MBUF_F_RX_IP_CKSUM_GOOD | RTE_MBUF_F_RX_L4_CKSUM_GOOD);
178
179 switch (parse_results) {
180 case ENETC_PARSE_ERROR | ENETC_PKT_TYPE_IPV4:
181 m->packet_type = RTE_PTYPE_L2_ETHER |
182 RTE_PTYPE_L3_IPV4;
183 m->ol_flags |= RTE_MBUF_F_RX_IP_CKSUM_BAD;
184 return;
185 case ENETC_PARSE_ERROR | ENETC_PKT_TYPE_IPV6:
186 m->packet_type = RTE_PTYPE_L2_ETHER |
187 RTE_PTYPE_L3_IPV6;
188 m->ol_flags |= RTE_MBUF_F_RX_IP_CKSUM_BAD;
189 return;
190 case ENETC_PARSE_ERROR | ENETC_PKT_TYPE_IPV4_TCP:
191 m->packet_type = RTE_PTYPE_L2_ETHER |
192 RTE_PTYPE_L3_IPV4 |
193 RTE_PTYPE_L4_TCP;
194 m->ol_flags |= RTE_MBUF_F_RX_IP_CKSUM_GOOD |
195 RTE_MBUF_F_RX_L4_CKSUM_BAD;
196 return;
197 case ENETC_PARSE_ERROR | ENETC_PKT_TYPE_IPV6_TCP:
198 m->packet_type = RTE_PTYPE_L2_ETHER |
199 RTE_PTYPE_L3_IPV6 |
200 RTE_PTYPE_L4_TCP;
201 m->ol_flags |= RTE_MBUF_F_RX_IP_CKSUM_GOOD |
202 RTE_MBUF_F_RX_L4_CKSUM_BAD;
203 return;
204 case ENETC_PARSE_ERROR | ENETC_PKT_TYPE_IPV4_UDP:
205 m->packet_type = RTE_PTYPE_L2_ETHER |
206 RTE_PTYPE_L3_IPV4 |
207 RTE_PTYPE_L4_UDP;
208 m->ol_flags |= RTE_MBUF_F_RX_IP_CKSUM_GOOD |
209 RTE_MBUF_F_RX_L4_CKSUM_BAD;
210 return;
211 case ENETC_PARSE_ERROR | ENETC_PKT_TYPE_IPV6_UDP:
212 m->packet_type = RTE_PTYPE_L2_ETHER |
213 RTE_PTYPE_L3_IPV6 |
214 RTE_PTYPE_L4_UDP;
215 m->ol_flags |= RTE_MBUF_F_RX_IP_CKSUM_GOOD |
216 RTE_MBUF_F_RX_L4_CKSUM_BAD;
217 return;
218 case ENETC_PARSE_ERROR | ENETC_PKT_TYPE_IPV4_SCTP:
219 m->packet_type = RTE_PTYPE_L2_ETHER |
220 RTE_PTYPE_L3_IPV4 |
221 RTE_PTYPE_L4_SCTP;
222 m->ol_flags |= RTE_MBUF_F_RX_IP_CKSUM_GOOD |
223 RTE_MBUF_F_RX_L4_CKSUM_BAD;
224 return;
225 case ENETC_PARSE_ERROR | ENETC_PKT_TYPE_IPV6_SCTP:
226 m->packet_type = RTE_PTYPE_L2_ETHER |
227 RTE_PTYPE_L3_IPV6 |
228 RTE_PTYPE_L4_SCTP;
229 m->ol_flags |= RTE_MBUF_F_RX_IP_CKSUM_GOOD |
230 RTE_MBUF_F_RX_L4_CKSUM_BAD;
231 return;
232 case ENETC_PARSE_ERROR | ENETC_PKT_TYPE_IPV4_ICMP:
233 m->packet_type = RTE_PTYPE_L2_ETHER |
234 RTE_PTYPE_L3_IPV4 |
235 RTE_PTYPE_L4_ICMP;
236 m->ol_flags |= RTE_MBUF_F_RX_IP_CKSUM_GOOD |
237 RTE_MBUF_F_RX_L4_CKSUM_BAD;
238 return;
239 case ENETC_PARSE_ERROR | ENETC_PKT_TYPE_IPV6_ICMP:
240 m->packet_type = RTE_PTYPE_L2_ETHER |
241 RTE_PTYPE_L3_IPV6 |
242 RTE_PTYPE_L4_ICMP;
243 m->ol_flags |= RTE_MBUF_F_RX_IP_CKSUM_GOOD |
244 RTE_MBUF_F_RX_L4_CKSUM_BAD;
245 return;
246 /* More switch cases can be added */
247 default:
248 m->packet_type = RTE_PTYPE_UNKNOWN;
249 m->ol_flags |= RTE_MBUF_F_RX_IP_CKSUM_UNKNOWN |
250 RTE_MBUF_F_RX_L4_CKSUM_UNKNOWN;
251 }
252 }
253
254
255 static inline void __rte_hot
enetc_dev_rx_parse(struct rte_mbuf * m,uint16_t parse_results)256 enetc_dev_rx_parse(struct rte_mbuf *m, uint16_t parse_results)
257 {
258 ENETC_PMD_DP_DEBUG("parse summary = 0x%x ", parse_results);
259 m->ol_flags |= RTE_MBUF_F_RX_IP_CKSUM_GOOD | RTE_MBUF_F_RX_L4_CKSUM_GOOD;
260
261 switch (parse_results) {
262 case ENETC_PKT_TYPE_ETHER:
263 m->packet_type = RTE_PTYPE_L2_ETHER;
264 return;
265 case ENETC_PKT_TYPE_IPV4:
266 m->packet_type = RTE_PTYPE_L2_ETHER |
267 RTE_PTYPE_L3_IPV4;
268 return;
269 case ENETC_PKT_TYPE_IPV6:
270 m->packet_type = RTE_PTYPE_L2_ETHER |
271 RTE_PTYPE_L3_IPV6;
272 return;
273 case ENETC_PKT_TYPE_IPV4_TCP:
274 m->packet_type = RTE_PTYPE_L2_ETHER |
275 RTE_PTYPE_L3_IPV4 |
276 RTE_PTYPE_L4_TCP;
277 return;
278 case ENETC_PKT_TYPE_IPV6_TCP:
279 m->packet_type = RTE_PTYPE_L2_ETHER |
280 RTE_PTYPE_L3_IPV6 |
281 RTE_PTYPE_L4_TCP;
282 return;
283 case ENETC_PKT_TYPE_IPV4_UDP:
284 m->packet_type = RTE_PTYPE_L2_ETHER |
285 RTE_PTYPE_L3_IPV4 |
286 RTE_PTYPE_L4_UDP;
287 return;
288 case ENETC_PKT_TYPE_IPV6_UDP:
289 m->packet_type = RTE_PTYPE_L2_ETHER |
290 RTE_PTYPE_L3_IPV6 |
291 RTE_PTYPE_L4_UDP;
292 return;
293 case ENETC_PKT_TYPE_IPV4_SCTP:
294 m->packet_type = RTE_PTYPE_L2_ETHER |
295 RTE_PTYPE_L3_IPV4 |
296 RTE_PTYPE_L4_SCTP;
297 return;
298 case ENETC_PKT_TYPE_IPV6_SCTP:
299 m->packet_type = RTE_PTYPE_L2_ETHER |
300 RTE_PTYPE_L3_IPV6 |
301 RTE_PTYPE_L4_SCTP;
302 return;
303 case ENETC_PKT_TYPE_IPV4_ICMP:
304 m->packet_type = RTE_PTYPE_L2_ETHER |
305 RTE_PTYPE_L3_IPV4 |
306 RTE_PTYPE_L4_ICMP;
307 return;
308 case ENETC_PKT_TYPE_IPV6_ICMP:
309 m->packet_type = RTE_PTYPE_L2_ETHER |
310 RTE_PTYPE_L3_IPV6 |
311 RTE_PTYPE_L4_ICMP;
312 return;
313 /* More switch cases can be added */
314 default:
315 enetc_slow_parsing(m, parse_results);
316 }
317
318 }
319
320 static int
enetc_clean_rx_ring(struct enetc_bdr * rx_ring,struct rte_mbuf ** rx_pkts,int work_limit)321 enetc_clean_rx_ring(struct enetc_bdr *rx_ring,
322 struct rte_mbuf **rx_pkts,
323 int work_limit)
324 {
325 int rx_frm_cnt = 0;
326 int cleaned_cnt, i, bd_count;
327 struct enetc_swbd *rx_swbd;
328 union enetc_rx_bd *rxbd;
329
330 /* next descriptor to process */
331 i = rx_ring->next_to_clean;
332 /* next descriptor to process */
333 rxbd = ENETC_RXBD(*rx_ring, i);
334 rte_prefetch0(rxbd);
335 bd_count = rx_ring->bd_count;
336 /* LS1028A does not have platform cache so any software access following
337 * a hardware write will go directly to DDR. Latency of such a read is
338 * in excess of 100 core cycles, so try to prefetch more in advance to
339 * mitigate this.
340 * How much is worth prefetching really depends on traffic conditions.
341 * With congested Rx this could go up to 4 cache lines or so. But if
342 * software keeps up with hardware and follows behind Rx PI by a cache
343 * line or less then it's harmful in terms of performance to cache more.
344 * We would only prefetch BDs that have yet to be written by ENETC,
345 * which will have to be evicted again anyway.
346 */
347 rte_prefetch0(ENETC_RXBD(*rx_ring,
348 (i + ENETC_CACHE_LINE_RXBDS) % bd_count));
349 rte_prefetch0(ENETC_RXBD(*rx_ring,
350 (i + ENETC_CACHE_LINE_RXBDS * 2) % bd_count));
351
352 cleaned_cnt = enetc_bd_unused(rx_ring);
353 rx_swbd = &rx_ring->q_swbd[i];
354 while (likely(rx_frm_cnt < work_limit)) {
355 uint32_t bd_status;
356
357 bd_status = rte_le_to_cpu_32(rxbd->r.lstatus);
358 if (!bd_status)
359 break;
360
361 rx_swbd->buffer_addr->pkt_len = rxbd->r.buf_len -
362 rx_ring->crc_len;
363 rx_swbd->buffer_addr->data_len = rxbd->r.buf_len -
364 rx_ring->crc_len;
365 rx_swbd->buffer_addr->hash.rss = rxbd->r.rss_hash;
366 rx_swbd->buffer_addr->ol_flags = 0;
367 enetc_dev_rx_parse(rx_swbd->buffer_addr,
368 rxbd->r.parse_summary);
369 rx_pkts[rx_frm_cnt] = rx_swbd->buffer_addr;
370 cleaned_cnt++;
371 rx_swbd++;
372 i++;
373 if (unlikely(i == rx_ring->bd_count)) {
374 i = 0;
375 rx_swbd = &rx_ring->q_swbd[i];
376 }
377 rxbd = ENETC_RXBD(*rx_ring, i);
378 rte_prefetch0(ENETC_RXBD(*rx_ring,
379 (i + ENETC_CACHE_LINE_RXBDS) %
380 bd_count));
381 rte_prefetch0(ENETC_RXBD(*rx_ring,
382 (i + ENETC_CACHE_LINE_RXBDS * 2) %
383 bd_count));
384
385 rx_frm_cnt++;
386 }
387
388 rx_ring->next_to_clean = i;
389 enetc_refill_rx_ring(rx_ring, cleaned_cnt);
390
391 return rx_frm_cnt;
392 }
393
394 uint16_t
enetc_recv_pkts(void * rxq,struct rte_mbuf ** rx_pkts,uint16_t nb_pkts)395 enetc_recv_pkts(void *rxq, struct rte_mbuf **rx_pkts,
396 uint16_t nb_pkts)
397 {
398 struct enetc_bdr *rx_ring = (struct enetc_bdr *)rxq;
399
400 return enetc_clean_rx_ring(rx_ring, rx_pkts, nb_pkts);
401 }
402