1 /* $NetBSD: ix_txrx.c,v 1.117 2024/06/29 12:11:12 riastradh Exp $ */
2
3 /******************************************************************************
4
5 Copyright (c) 2001-2017, Intel Corporation
6 All rights reserved.
7
8 Redistribution and use in source and binary forms, with or without
9 modification, are permitted provided that the following conditions are met:
10
11 1. Redistributions of source code must retain the above copyright notice,
12 this list of conditions and the following disclaimer.
13
14 2. Redistributions in binary form must reproduce the above copyright
15 notice, this list of conditions and the following disclaimer in the
16 documentation and/or other materials provided with the distribution.
17
18 3. Neither the name of the Intel Corporation nor the names of its
19 contributors may be used to endorse or promote products derived from
20 this software without specific prior written permission.
21
22 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
23 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
26 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
27 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
28 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
29 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
30 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
31 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
32 POSSIBILITY OF SUCH DAMAGE.
33
34 ******************************************************************************/
35 /*$FreeBSD: head/sys/dev/ixgbe/ix_txrx.c 327031 2017-12-20 18:15:06Z erj $*/
36
37 /*
38 * Copyright (c) 2011 The NetBSD Foundation, Inc.
39 * All rights reserved.
40 *
41 * This code is derived from software contributed to The NetBSD Foundation
42 * by Coyote Point Systems, Inc.
43 *
44 * Redistribution and use in source and binary forms, with or without
45 * modification, are permitted provided that the following conditions
46 * are met:
47 * 1. Redistributions of source code must retain the above copyright
48 * notice, this list of conditions and the following disclaimer.
49 * 2. Redistributions in binary form must reproduce the above copyright
50 * notice, this list of conditions and the following disclaimer in the
51 * documentation and/or other materials provided with the distribution.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
54 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
55 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
56 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
57 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
58 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
59 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
60 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
61 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
62 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
63 * POSSIBILITY OF SUCH DAMAGE.
64 */
65
66 #include <sys/cdefs.h>
67 __KERNEL_RCSID(0, "$NetBSD: ix_txrx.c,v 1.117 2024/06/29 12:11:12 riastradh Exp $");
68
69 #include "opt_inet.h"
70 #include "opt_inet6.h"
71
72 #include "ixgbe.h"
73
74 #ifdef RSC
75 /*
76 * HW RSC control:
77 * this feature only works with
78 * IPv4, and only on 82599 and later.
79 * Also this will cause IP forwarding to
80 * fail and that can't be controlled by
81 * the stack as LRO can. For all these
82 * reasons I've deemed it best to leave
83 * this off and not bother with a tuneable
84 * interface, this would need to be compiled
85 * to enable.
86 */
87 static bool ixgbe_rsc_enable = FALSE;
88 #endif
89
90 #ifdef IXGBE_FDIR
91 /*
92 * For Flow Director: this is the
93 * number of TX packets we sample
94 * for the filter pool, this means
95 * every 20th packet will be probed.
96 *
97 * This feature can be disabled by
98 * setting this to 0.
99 */
100 static int atr_sample_rate = 20;
101 #endif
102
103 #define IXGBE_M_ADJ(sc, rxr, mp) \
104 if (sc->max_frame_size <= (rxr->mbuf_sz - ETHER_ALIGN)) \
105 m_adj(mp, ETHER_ALIGN)
106
107 /************************************************************************
108 * Local Function prototypes
109 ************************************************************************/
110 static void ixgbe_setup_transmit_ring(struct tx_ring *);
111 static void ixgbe_free_transmit_buffers(struct tx_ring *);
112 static int ixgbe_setup_receive_ring(struct rx_ring *);
113 static void ixgbe_free_receive_buffers(struct rx_ring *);
114 static void ixgbe_rx_checksum(u32, struct mbuf *, u32,
115 struct ixgbe_hw_stats *);
116 static void ixgbe_refresh_mbufs(struct rx_ring *, int);
117 static void ixgbe_drain(struct ifnet *, struct tx_ring *);
118 static int ixgbe_xmit(struct tx_ring *, struct mbuf *);
119 static int ixgbe_tx_ctx_setup(struct tx_ring *,
120 struct mbuf *, u32 *, u32 *);
121 static int ixgbe_tso_setup(struct tx_ring *,
122 struct mbuf *, u32 *, u32 *);
123 static __inline void ixgbe_rx_discard(struct rx_ring *, int);
124 static __inline void ixgbe_rx_input(struct rx_ring *, struct ifnet *,
125 struct mbuf *, u32);
126 static int ixgbe_dma_malloc(struct ixgbe_softc *, bus_size_t,
127 struct ixgbe_dma_alloc *, int);
128 static void ixgbe_dma_free(struct ixgbe_softc *, struct ixgbe_dma_alloc *);
129 #ifdef RSC
130 static void ixgbe_setup_hw_rsc(struct rx_ring *);
131 #endif
132
133 /************************************************************************
134 * ixgbe_legacy_start_locked - Transmit entry point
135 *
136 * Called by the stack to initiate a transmit.
137 * The driver will remain in this routine as long as there are
138 * packets to transmit and transmit resources are available.
139 * In case resources are not available, the stack is notified
140 * and the packet is requeued.
141 ************************************************************************/
142 int
ixgbe_legacy_start_locked(struct ifnet * ifp,struct tx_ring * txr)143 ixgbe_legacy_start_locked(struct ifnet *ifp, struct tx_ring *txr)
144 {
145 int rc;
146 struct mbuf *m_head;
147 struct ixgbe_softc *sc = txr->sc;
148
149 IXGBE_TX_LOCK_ASSERT(txr);
150
151 if (sc->link_active != LINK_STATE_UP) {
152 /*
153 * discard all packets buffered in IFQ to avoid
154 * sending old packets at next link up timing.
155 */
156 ixgbe_drain(ifp, txr);
157 return (ENETDOWN);
158 }
159 if ((ifp->if_flags & IFF_RUNNING) == 0)
160 return (ENETDOWN);
161 if (txr->txr_no_space)
162 return (ENETDOWN);
163
164 while (!IFQ_IS_EMPTY(&ifp->if_snd)) {
165 if (txr->tx_avail <= IXGBE_QUEUE_MIN_FREE)
166 break;
167
168 IFQ_POLL(&ifp->if_snd, m_head);
169 if (m_head == NULL)
170 break;
171
172 if ((rc = ixgbe_xmit(txr, m_head)) == EAGAIN) {
173 break;
174 }
175 IFQ_DEQUEUE(&ifp->if_snd, m_head);
176 if (rc != 0) {
177 m_freem(m_head);
178 continue;
179 }
180
181 /* Send a copy of the frame to the BPF listener */
182 bpf_mtap(ifp, m_head, BPF_D_OUT);
183 }
184
185 return IXGBE_SUCCESS;
186 } /* ixgbe_legacy_start_locked */
187
188 /************************************************************************
189 * ixgbe_legacy_start
190 *
191 * Called by the stack, this always uses the first tx ring,
192 * and should not be used with multiqueue tx enabled.
193 ************************************************************************/
194 void
ixgbe_legacy_start(struct ifnet * ifp)195 ixgbe_legacy_start(struct ifnet *ifp)
196 {
197 struct ixgbe_softc *sc = ifp->if_softc;
198 struct tx_ring *txr = sc->tx_rings;
199
200 if (ifp->if_flags & IFF_RUNNING) {
201 IXGBE_TX_LOCK(txr);
202 ixgbe_legacy_start_locked(ifp, txr);
203 IXGBE_TX_UNLOCK(txr);
204 }
205 } /* ixgbe_legacy_start */
206
207 /************************************************************************
208 * ixgbe_mq_start - Multiqueue Transmit Entry Point
209 *
210 * (if_transmit function)
211 ************************************************************************/
212 int
ixgbe_mq_start(struct ifnet * ifp,struct mbuf * m)213 ixgbe_mq_start(struct ifnet *ifp, struct mbuf *m)
214 {
215 struct ixgbe_softc *sc = ifp->if_softc;
216 struct tx_ring *txr;
217 int i;
218 #ifdef RSS
219 uint32_t bucket_id;
220 #endif
221
222 /*
223 * When doing RSS, map it to the same outbound queue
224 * as the incoming flow would be mapped to.
225 *
226 * If everything is setup correctly, it should be the
227 * same bucket that the current CPU we're on is.
228 */
229 #ifdef RSS
230 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
231 if ((sc->feat_en & IXGBE_FEATURE_RSS) &&
232 (rss_hash2bucket(m->m_pkthdr.flowid, M_HASHTYPE_GET(m),
233 &bucket_id) == 0)) {
234 i = bucket_id % sc->num_queues;
235 #ifdef IXGBE_DEBUG
236 if (bucket_id > sc->num_queues)
237 if_printf(ifp,
238 "bucket_id (%d) > num_queues (%d)\n",
239 bucket_id, sc->num_queues);
240 #endif
241 } else
242 i = m->m_pkthdr.flowid % sc->num_queues;
243 } else
244 #endif /* 0 */
245 i = (cpu_index(curcpu()) % ncpu) % sc->num_queues;
246
247 /* Check for a hung queue and pick alternative */
248 if (((1ULL << i) & sc->active_queues) == 0)
249 i = ffs64(sc->active_queues);
250
251 txr = &sc->tx_rings[i];
252
253 if (__predict_false(!pcq_put(txr->txr_interq, m))) {
254 m_freem(m);
255 IXGBE_EVC_ADD(&txr->pcq_drops, 1);
256 return ENOBUFS;
257 }
258 #ifdef IXGBE_ALWAYS_TXDEFER
259 kpreempt_disable();
260 softint_schedule(txr->txr_si);
261 kpreempt_enable();
262 #else
263 if (IXGBE_TX_TRYLOCK(txr)) {
264 ixgbe_mq_start_locked(ifp, txr);
265 IXGBE_TX_UNLOCK(txr);
266 } else {
267 if (sc->txrx_use_workqueue) {
268 u_int *enqueued;
269
270 /*
271 * This function itself is not called in interrupt
272 * context, however it can be called in fast softint
273 * context right after receiving forwarding packets.
274 * So, it is required to protect workqueue from twice
275 * enqueuing when the machine uses both spontaneous
276 * packets and forwarding packets.
277 */
278 enqueued = percpu_getref(sc->txr_wq_enqueued);
279 if (*enqueued == 0) {
280 *enqueued = 1;
281 percpu_putref(sc->txr_wq_enqueued);
282 workqueue_enqueue(sc->txr_wq,
283 &txr->wq_cookie, curcpu());
284 } else
285 percpu_putref(sc->txr_wq_enqueued);
286 } else {
287 kpreempt_disable();
288 softint_schedule(txr->txr_si);
289 kpreempt_enable();
290 }
291 }
292 #endif
293
294 return (0);
295 } /* ixgbe_mq_start */
296
297 /************************************************************************
298 * ixgbe_mq_start_locked
299 ************************************************************************/
300 int
ixgbe_mq_start_locked(struct ifnet * ifp,struct tx_ring * txr)301 ixgbe_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
302 {
303 struct mbuf *next;
304 int enqueued = 0, err = 0;
305
306 if (txr->sc->link_active != LINK_STATE_UP) {
307 /*
308 * discard all packets buffered in txr_interq to avoid
309 * sending old packets at next link up timing.
310 */
311 ixgbe_drain(ifp, txr);
312 return (ENETDOWN);
313 }
314 if ((ifp->if_flags & IFF_RUNNING) == 0)
315 return (ENETDOWN);
316 if (txr->txr_no_space)
317 return (ENETDOWN);
318
319 /* Process the queue */
320 while ((next = pcq_get(txr->txr_interq)) != NULL) {
321 if ((err = ixgbe_xmit(txr, next)) != 0) {
322 m_freem(next);
323 /* All errors are counted in ixgbe_xmit() */
324 break;
325 }
326 enqueued++;
327 #if __FreeBSD_version >= 1100036
328 /*
329 * Since we're looking at the tx ring, we can check
330 * to see if we're a VF by examining our tail register
331 * address.
332 */
333 if ((txr->sc->feat_en & IXGBE_FEATURE_VF) &&
334 (next->m_flags & M_MCAST))
335 if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
336 #endif
337 /* Send a copy of the frame to the BPF listener */
338 bpf_mtap(ifp, next, BPF_D_OUT);
339 if ((ifp->if_flags & IFF_RUNNING) == 0)
340 break;
341 }
342
343 if (txr->tx_avail < IXGBE_TX_CLEANUP_THRESHOLD(txr->sc))
344 ixgbe_txeof(txr);
345
346 return (err);
347 } /* ixgbe_mq_start_locked */
348
349 /************************************************************************
350 * ixgbe_deferred_mq_start
351 *
352 * Called from a softint and workqueue (indirectly) to drain queued
353 * transmit packets.
354 ************************************************************************/
355 void
ixgbe_deferred_mq_start(void * arg)356 ixgbe_deferred_mq_start(void *arg)
357 {
358 struct tx_ring *txr = arg;
359 struct ixgbe_softc *sc = txr->sc;
360 struct ifnet *ifp = sc->ifp;
361
362 IXGBE_TX_LOCK(txr);
363 if (pcq_peek(txr->txr_interq) != NULL)
364 ixgbe_mq_start_locked(ifp, txr);
365 IXGBE_TX_UNLOCK(txr);
366 } /* ixgbe_deferred_mq_start */
367
368 /************************************************************************
369 * ixgbe_deferred_mq_start_work
370 *
371 * Called from a workqueue to drain queued transmit packets.
372 ************************************************************************/
373 void
ixgbe_deferred_mq_start_work(struct work * wk,void * arg)374 ixgbe_deferred_mq_start_work(struct work *wk, void *arg)
375 {
376 struct tx_ring *txr = container_of(wk, struct tx_ring, wq_cookie);
377 struct ixgbe_softc *sc = txr->sc;
378 u_int *enqueued = percpu_getref(sc->txr_wq_enqueued);
379 *enqueued = 0;
380 percpu_putref(sc->txr_wq_enqueued);
381
382 ixgbe_deferred_mq_start(txr);
383 } /* ixgbe_deferred_mq_start */
384
385 /************************************************************************
386 * ixgbe_drain_all
387 ************************************************************************/
388 void
ixgbe_drain_all(struct ixgbe_softc * sc)389 ixgbe_drain_all(struct ixgbe_softc *sc)
390 {
391 struct ifnet *ifp = sc->ifp;
392 struct ix_queue *que = sc->queues;
393
394 for (int i = 0; i < sc->num_queues; i++, que++) {
395 struct tx_ring *txr = que->txr;
396
397 IXGBE_TX_LOCK(txr);
398 ixgbe_drain(ifp, txr);
399 IXGBE_TX_UNLOCK(txr);
400 }
401 }
402
403 /************************************************************************
404 * ixgbe_xmit
405 *
406 * Maps the mbufs to tx descriptors, allowing the
407 * TX engine to transmit the packets.
408 *
409 * Return 0 on success, positive on failure
410 ************************************************************************/
411 static int
ixgbe_xmit(struct tx_ring * txr,struct mbuf * m_head)412 ixgbe_xmit(struct tx_ring *txr, struct mbuf *m_head)
413 {
414 struct ixgbe_softc *sc = txr->sc;
415 struct ixgbe_tx_buf *txbuf;
416 union ixgbe_adv_tx_desc *txd = NULL;
417 struct ifnet *ifp = sc->ifp;
418 int i, j, error;
419 int first;
420 u32 olinfo_status = 0, cmd_type_len;
421 bool remap = TRUE;
422 bus_dmamap_t map;
423
424 /* Basic descriptor defines */
425 cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA |
426 IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT);
427
428 if (vlan_has_tag(m_head))
429 cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE;
430
431 /*
432 * Important to capture the first descriptor
433 * used because it will contain the index of
434 * the one we tell the hardware to report back
435 */
436 first = txr->next_avail_desc;
437 txbuf = &txr->tx_buffers[first];
438 map = txbuf->map;
439
440 /*
441 * Map the packet for DMA.
442 */
443 retry:
444 error = bus_dmamap_load_mbuf(txr->txtag->dt_dmat, map, m_head,
445 BUS_DMA_NOWAIT);
446
447 if (__predict_false(error)) {
448 struct mbuf *m;
449
450 switch (error) {
451 case EAGAIN:
452 txr->q_eagain_tx_dma_setup++;
453 return EAGAIN;
454 case ENOMEM:
455 txr->q_enomem_tx_dma_setup++;
456 return EAGAIN;
457 case EFBIG:
458 /* Try it again? - one try */
459 if (remap == TRUE) {
460 remap = FALSE;
461 /*
462 * XXX: m_defrag will choke on
463 * non-MCLBYTES-sized clusters
464 */
465 txr->q_efbig_tx_dma_setup++;
466 m = m_defrag(m_head, M_NOWAIT);
467 if (m == NULL) {
468 txr->q_mbuf_defrag_failed++;
469 return ENOBUFS;
470 }
471 m_head = m;
472 goto retry;
473 } else {
474 txr->q_efbig2_tx_dma_setup++;
475 return error;
476 }
477 case EINVAL:
478 txr->q_einval_tx_dma_setup++;
479 return error;
480 default:
481 txr->q_other_tx_dma_setup++;
482 return error;
483 }
484 }
485
486 /* Make certain there are enough descriptors */
487 if (txr->tx_avail < (map->dm_nsegs + 2)) {
488 txr->txr_no_space = true;
489 IXGBE_EVC_ADD(&txr->no_desc_avail, 1);
490 ixgbe_dmamap_unload(txr->txtag, txbuf->map);
491 return EAGAIN;
492 }
493
494 /*
495 * Set up the appropriate offload context if requested,
496 * this may consume one TX descriptor.
497 */
498 error = ixgbe_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status);
499 if (__predict_false(error)) {
500 return (error);
501 }
502
503 #ifdef IXGBE_FDIR
504 /* Do the flow director magic */
505 if ((sc->feat_en & IXGBE_FEATURE_FDIR) &&
506 (txr->atr_sample) && (!sc->fdir_reinit)) {
507 ++txr->atr_count;
508 if (txr->atr_count >= atr_sample_rate) {
509 ixgbe_atr(txr, m_head);
510 txr->atr_count = 0;
511 }
512 }
513 #endif
514
515 olinfo_status |= IXGBE_ADVTXD_CC;
516 i = txr->next_avail_desc;
517 for (j = 0; j < map->dm_nsegs; j++) {
518 bus_size_t seglen;
519 uint64_t segaddr;
520
521 txbuf = &txr->tx_buffers[i];
522 txd = &txr->tx_base[i];
523 seglen = map->dm_segs[j].ds_len;
524 segaddr = htole64(map->dm_segs[j].ds_addr);
525
526 txd->read.buffer_addr = segaddr;
527 txd->read.cmd_type_len = htole32(cmd_type_len | seglen);
528 txd->read.olinfo_status = htole32(olinfo_status);
529
530 if (++i == txr->num_desc)
531 i = 0;
532 }
533
534 txd->read.cmd_type_len |= htole32(IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS);
535 txr->tx_avail -= map->dm_nsegs;
536 txr->next_avail_desc = i;
537
538 txbuf->m_head = m_head;
539 /*
540 * Here we swap the map so the last descriptor,
541 * which gets the completion interrupt has the
542 * real map, and the first descriptor gets the
543 * unused map from this descriptor.
544 */
545 txr->tx_buffers[first].map = txbuf->map;
546 txbuf->map = map;
547 bus_dmamap_sync(txr->txtag->dt_dmat, map, 0, m_head->m_pkthdr.len,
548 BUS_DMASYNC_PREWRITE);
549
550 /* Set the EOP descriptor that will be marked done */
551 txbuf = &txr->tx_buffers[first];
552 txbuf->eop = txd;
553
554 ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
555 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
556 /*
557 * Advance the Transmit Descriptor Tail (Tdt), this tells the
558 * hardware that this frame is available to transmit.
559 */
560 IXGBE_EVC_ADD(&txr->total_packets, 1);
561 IXGBE_WRITE_REG(&sc->hw, txr->tail, i);
562
563 net_stat_ref_t nsr = IF_STAT_GETREF(ifp);
564 if_statadd_ref(ifp, nsr, if_obytes, m_head->m_pkthdr.len);
565 if (m_head->m_flags & M_MCAST)
566 if_statinc_ref(ifp, nsr, if_omcasts);
567 IF_STAT_PUTREF(ifp);
568
569 /* Mark queue as having work */
570 if (txr->busy == 0)
571 txr->busy = 1;
572
573 return (0);
574 } /* ixgbe_xmit */
575
576 /************************************************************************
577 * ixgbe_drain
578 ************************************************************************/
579 static void
ixgbe_drain(struct ifnet * ifp,struct tx_ring * txr)580 ixgbe_drain(struct ifnet *ifp, struct tx_ring *txr)
581 {
582 struct mbuf *m;
583
584 IXGBE_TX_LOCK_ASSERT(txr);
585
586 if (txr->me == 0) {
587 while (!IFQ_IS_EMPTY(&ifp->if_snd)) {
588 IFQ_DEQUEUE(&ifp->if_snd, m);
589 m_freem(m);
590 IF_DROP(&ifp->if_snd);
591 }
592 }
593
594 while ((m = pcq_get(txr->txr_interq)) != NULL) {
595 m_freem(m);
596 IXGBE_EVC_ADD(&txr->pcq_drops, 1);
597 }
598 }
599
600 /************************************************************************
601 * ixgbe_allocate_transmit_buffers
602 *
603 * Allocate memory for tx_buffer structures. The tx_buffer stores all
604 * the information needed to transmit a packet on the wire. This is
605 * called only once at attach, setup is done every reset.
606 ************************************************************************/
607 static int
ixgbe_allocate_transmit_buffers(struct tx_ring * txr)608 ixgbe_allocate_transmit_buffers(struct tx_ring *txr)
609 {
610 struct ixgbe_softc *sc = txr->sc;
611 device_t dev = sc->dev;
612 struct ixgbe_tx_buf *txbuf;
613 int error, i;
614
615 /*
616 * Setup DMA descriptor areas.
617 */
618 error = ixgbe_dma_tag_create(
619 /* parent */ sc->osdep.dmat,
620 /* alignment */ 1,
621 /* bounds */ 0,
622 /* maxsize */ IXGBE_TSO_SIZE,
623 /* nsegments */ sc->num_segs,
624 /* maxsegsize */ PAGE_SIZE,
625 /* flags */ 0,
626 &txr->txtag);
627 if (error != 0) {
628 aprint_error_dev(dev,"Unable to allocate TX DMA tag\n");
629 goto fail;
630 }
631
632 txr->tx_buffers = kmem_zalloc(sizeof(struct ixgbe_tx_buf) *
633 sc->num_tx_desc, KM_SLEEP);
634
635 /* Create the descriptor buffer dma maps */
636 txbuf = txr->tx_buffers;
637 for (i = 0; i < sc->num_tx_desc; i++, txbuf++) {
638 error = ixgbe_dmamap_create(txr->txtag, 0, &txbuf->map);
639 if (error != 0) {
640 aprint_error_dev(dev,
641 "Unable to create TX DMA map (%d)\n", error);
642 goto fail;
643 }
644 }
645
646 return 0;
647 fail:
648 /* We free all, it handles case where we are in the middle */
649 #if 0 /* XXX was FreeBSD */
650 ixgbe_free_transmit_structures(sc);
651 #else
652 ixgbe_free_transmit_buffers(txr);
653 #endif
654 return (error);
655 } /* ixgbe_allocate_transmit_buffers */
656
657 /************************************************************************
658 * ixgbe_setup_transmit_ring - Initialize a transmit ring.
659 ************************************************************************/
660 static void
ixgbe_setup_transmit_ring(struct tx_ring * txr)661 ixgbe_setup_transmit_ring(struct tx_ring *txr)
662 {
663 struct ixgbe_softc *sc = txr->sc;
664 struct ixgbe_tx_buf *txbuf;
665 #ifdef DEV_NETMAP
666 struct netmap_sc *na = NA(sc->ifp);
667 struct netmap_slot *slot;
668 #endif /* DEV_NETMAP */
669
670 /* Clear the old ring contents */
671 IXGBE_TX_LOCK(txr);
672
673 #ifdef DEV_NETMAP
674 if (sc->feat_en & IXGBE_FEATURE_NETMAP) {
675 /*
676 * (under lock): if in netmap mode, do some consistency
677 * checks and set slot to entry 0 of the netmap ring.
678 */
679 slot = netmap_reset(na, NR_TX, txr->me, 0);
680 }
681 #endif /* DEV_NETMAP */
682
683 bzero((void *)txr->tx_base,
684 (sizeof(union ixgbe_adv_tx_desc)) * sc->num_tx_desc);
685 /* Reset indices */
686 txr->next_avail_desc = 0;
687 txr->next_to_clean = 0;
688
689 /* Free any existing tx buffers. */
690 txbuf = txr->tx_buffers;
691 for (int i = 0; i < txr->num_desc; i++, txbuf++) {
692 if (txbuf->m_head != NULL) {
693 bus_dmamap_sync(txr->txtag->dt_dmat, txbuf->map,
694 0, txbuf->m_head->m_pkthdr.len,
695 BUS_DMASYNC_POSTWRITE);
696 ixgbe_dmamap_unload(txr->txtag, txbuf->map);
697 m_freem(txbuf->m_head);
698 txbuf->m_head = NULL;
699 }
700
701 #ifdef DEV_NETMAP
702 /*
703 * In netmap mode, set the map for the packet buffer.
704 * NOTE: Some drivers (not this one) also need to set
705 * the physical buffer address in the NIC ring.
706 * Slots in the netmap ring (indexed by "si") are
707 * kring->nkr_hwofs positions "ahead" wrt the
708 * corresponding slot in the NIC ring. In some drivers
709 * (not here) nkr_hwofs can be negative. Function
710 * netmap_idx_n2k() handles wraparounds properly.
711 */
712 if ((sc->feat_en & IXGBE_FEATURE_NETMAP) && slot) {
713 int si = netmap_idx_n2k(na->tx_rings[txr->me], i);
714 netmap_load_map(na, txr->txtag,
715 txbuf->map, NMB(na, slot + si));
716 }
717 #endif /* DEV_NETMAP */
718
719 /* Clear the EOP descriptor pointer */
720 txbuf->eop = NULL;
721 }
722
723 #ifdef IXGBE_FDIR
724 /* Set the rate at which we sample packets */
725 if (sc->feat_en & IXGBE_FEATURE_FDIR)
726 txr->atr_sample = atr_sample_rate;
727 #endif
728
729 /* Set number of descriptors available */
730 txr->tx_avail = sc->num_tx_desc;
731
732 ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
733 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
734 IXGBE_TX_UNLOCK(txr);
735 } /* ixgbe_setup_transmit_ring */
736
737 /************************************************************************
738 * ixgbe_setup_transmit_structures - Initialize all transmit rings.
739 ************************************************************************/
740 int
ixgbe_setup_transmit_structures(struct ixgbe_softc * sc)741 ixgbe_setup_transmit_structures(struct ixgbe_softc *sc)
742 {
743 struct tx_ring *txr = sc->tx_rings;
744
745 for (int i = 0; i < sc->num_queues; i++, txr++)
746 ixgbe_setup_transmit_ring(txr);
747
748 return (0);
749 } /* ixgbe_setup_transmit_structures */
750
751 /************************************************************************
752 * ixgbe_free_transmit_structures - Free all transmit rings.
753 ************************************************************************/
754 void
ixgbe_free_transmit_structures(struct ixgbe_softc * sc)755 ixgbe_free_transmit_structures(struct ixgbe_softc *sc)
756 {
757 struct tx_ring *txr = sc->tx_rings;
758
759 for (int i = 0; i < sc->num_queues; i++, txr++) {
760 ixgbe_free_transmit_buffers(txr);
761 ixgbe_dma_free(sc, &txr->txdma);
762 IXGBE_TX_LOCK_DESTROY(txr);
763 }
764 kmem_free(sc->tx_rings, sizeof(struct tx_ring) * sc->num_queues);
765 } /* ixgbe_free_transmit_structures */
766
767 /************************************************************************
768 * ixgbe_free_transmit_buffers
769 *
770 * Free transmit ring related data structures.
771 ************************************************************************/
772 static void
ixgbe_free_transmit_buffers(struct tx_ring * txr)773 ixgbe_free_transmit_buffers(struct tx_ring *txr)
774 {
775 struct ixgbe_softc *sc = txr->sc;
776 struct ixgbe_tx_buf *tx_buffer;
777 int i;
778
779 INIT_DEBUGOUT("ixgbe_free_transmit_buffers: begin");
780
781 if (txr->tx_buffers == NULL)
782 return;
783
784 tx_buffer = txr->tx_buffers;
785 for (i = 0; i < sc->num_tx_desc; i++, tx_buffer++) {
786 if (tx_buffer->m_head != NULL) {
787 bus_dmamap_sync(txr->txtag->dt_dmat, tx_buffer->map,
788 0, tx_buffer->m_head->m_pkthdr.len,
789 BUS_DMASYNC_POSTWRITE);
790 ixgbe_dmamap_unload(txr->txtag, tx_buffer->map);
791 m_freem(tx_buffer->m_head);
792 tx_buffer->m_head = NULL;
793 if (tx_buffer->map != NULL) {
794 ixgbe_dmamap_destroy(txr->txtag,
795 tx_buffer->map);
796 tx_buffer->map = NULL;
797 }
798 } else if (tx_buffer->map != NULL) {
799 ixgbe_dmamap_unload(txr->txtag, tx_buffer->map);
800 ixgbe_dmamap_destroy(txr->txtag, tx_buffer->map);
801 tx_buffer->map = NULL;
802 }
803 }
804 if (txr->txr_interq != NULL) {
805 struct mbuf *m;
806
807 while ((m = pcq_get(txr->txr_interq)) != NULL)
808 m_freem(m);
809 pcq_destroy(txr->txr_interq);
810 }
811 if (txr->tx_buffers != NULL) {
812 kmem_free(txr->tx_buffers,
813 sizeof(struct ixgbe_tx_buf) * sc->num_tx_desc);
814 txr->tx_buffers = NULL;
815 }
816 if (txr->txtag != NULL) {
817 ixgbe_dma_tag_destroy(txr->txtag);
818 txr->txtag = NULL;
819 }
820 } /* ixgbe_free_transmit_buffers */
821
822 /************************************************************************
823 * ixgbe_tx_ctx_setup
824 *
825 * Advanced Context Descriptor setup for VLAN, CSUM or TSO
826 ************************************************************************/
827 static int
ixgbe_tx_ctx_setup(struct tx_ring * txr,struct mbuf * mp,u32 * cmd_type_len,u32 * olinfo_status)828 ixgbe_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp,
829 u32 *cmd_type_len, u32 *olinfo_status)
830 {
831 struct ixgbe_softc *sc = txr->sc;
832 struct ixgbe_adv_tx_context_desc *TXD;
833 struct ether_vlan_header *eh;
834 #ifdef INET
835 struct ip *ip;
836 #endif
837 #ifdef INET6
838 struct ip6_hdr *ip6;
839 #endif
840 int ehdrlen, ip_hlen = 0;
841 int offload = TRUE;
842 int ctxd = txr->next_avail_desc;
843 u32 vlan_macip_lens = 0;
844 u32 type_tucmd_mlhl = 0;
845 u16 vtag = 0;
846 u16 etype;
847 u8 ipproto = 0;
848 char *l3d;
849
850 /* First check if TSO is to be used */
851 if (mp->m_pkthdr.csum_flags & (M_CSUM_TSOv4 | M_CSUM_TSOv6)) {
852 int rv = ixgbe_tso_setup(txr, mp, cmd_type_len, olinfo_status);
853
854 if (rv != 0)
855 IXGBE_EVC_ADD(&sc->tso_err, 1);
856 return rv;
857 }
858
859 if ((mp->m_pkthdr.csum_flags & M_CSUM_OFFLOAD) == 0)
860 offload = FALSE;
861
862 /* Indicate the whole packet as payload when not doing TSO */
863 *olinfo_status |= mp->m_pkthdr.len << IXGBE_ADVTXD_PAYLEN_SHIFT;
864
865 /*
866 * In advanced descriptors the vlan tag must
867 * be placed into the context descriptor. Hence
868 * we need to make one even if not doing offloads.
869 */
870 if (vlan_has_tag(mp)) {
871 vtag = htole16(vlan_get_tag(mp));
872 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
873 } else if (!(txr->sc->feat_en & IXGBE_FEATURE_NEEDS_CTXD) &&
874 (offload == FALSE))
875 return (0);
876
877 /*
878 * Determine where frame payload starts.
879 * Jump over vlan headers if already present,
880 * helpful for QinQ too.
881 */
882 KASSERT(mp->m_len >= offsetof(struct ether_vlan_header, evl_tag));
883 eh = mtod(mp, struct ether_vlan_header *);
884 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
885 KASSERT(mp->m_len >= sizeof(struct ether_vlan_header));
886 etype = ntohs(eh->evl_proto);
887 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
888 } else {
889 etype = ntohs(eh->evl_encap_proto);
890 ehdrlen = ETHER_HDR_LEN;
891 }
892
893 /* Set the ether header length */
894 vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
895
896 if (offload == FALSE)
897 goto no_offloads;
898
899 /*
900 * If the first mbuf only includes the ethernet header,
901 * jump to the next one
902 * XXX: This assumes the stack splits mbufs containing headers
903 * on header boundaries
904 * XXX: And assumes the entire IP header is contained in one mbuf
905 */
906 if (mp->m_len == ehdrlen && mp->m_next)
907 l3d = mtod(mp->m_next, char *);
908 else
909 l3d = mtod(mp, char *) + ehdrlen;
910
911 switch (etype) {
912 #ifdef INET
913 case ETHERTYPE_IP:
914 ip = (struct ip *)(l3d);
915 ip_hlen = ip->ip_hl << 2;
916 ipproto = ip->ip_p;
917 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
918 KASSERT((mp->m_pkthdr.csum_flags & M_CSUM_IPv4) == 0 ||
919 ip->ip_sum == 0);
920 break;
921 #endif
922 #ifdef INET6
923 case ETHERTYPE_IPV6:
924 ip6 = (struct ip6_hdr *)(l3d);
925 ip_hlen = sizeof(struct ip6_hdr);
926 ipproto = ip6->ip6_nxt;
927 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
928 break;
929 #endif
930 default:
931 offload = false;
932 break;
933 }
934
935 if ((mp->m_pkthdr.csum_flags & M_CSUM_IPv4) != 0)
936 *olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
937
938 vlan_macip_lens |= ip_hlen;
939
940 /* No support for offloads for non-L4 next headers */
941 switch (ipproto) {
942 case IPPROTO_TCP:
943 if (mp->m_pkthdr.csum_flags &
944 (M_CSUM_TCPv4 | M_CSUM_TCPv6))
945 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
946 else
947 offload = false;
948 break;
949 case IPPROTO_UDP:
950 if (mp->m_pkthdr.csum_flags &
951 (M_CSUM_UDPv4 | M_CSUM_UDPv6))
952 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP;
953 else
954 offload = false;
955 break;
956 default:
957 offload = false;
958 break;
959 }
960
961 if (offload) /* Insert L4 checksum into data descriptors */
962 *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
963
964 no_offloads:
965 type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
966
967 /* Now ready a context descriptor */
968 TXD = (struct ixgbe_adv_tx_context_desc *)&txr->tx_base[ctxd];
969
970 /* Now copy bits into descriptor */
971 TXD->vlan_macip_lens = htole32(vlan_macip_lens);
972 TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
973 TXD->seqnum_seed = htole32(0);
974 TXD->mss_l4len_idx = htole32(0);
975
976 /* We've consumed the first desc, adjust counters */
977 if (++ctxd == txr->num_desc)
978 ctxd = 0;
979 txr->next_avail_desc = ctxd;
980 --txr->tx_avail;
981
982 return (0);
983 } /* ixgbe_tx_ctx_setup */
984
985 /************************************************************************
986 * ixgbe_tso_setup
987 *
988 * Setup work for hardware segmentation offload (TSO) on
989 * adapters using advanced tx descriptors
990 ************************************************************************/
991 static int
ixgbe_tso_setup(struct tx_ring * txr,struct mbuf * mp,u32 * cmd_type_len,u32 * olinfo_status)992 ixgbe_tso_setup(struct tx_ring *txr, struct mbuf *mp, u32 *cmd_type_len,
993 u32 *olinfo_status)
994 {
995 struct ixgbe_adv_tx_context_desc *TXD;
996 struct ether_vlan_header *eh;
997 #ifdef INET6
998 struct ip6_hdr *ip6;
999 #endif
1000 #ifdef INET
1001 struct ip *ip;
1002 #endif
1003 struct tcphdr *th;
1004 int ctxd, ehdrlen, ip_hlen, tcp_hlen;
1005 u32 vlan_macip_lens = 0;
1006 u32 type_tucmd_mlhl = 0;
1007 u32 mss_l4len_idx = 0, paylen;
1008 u16 vtag = 0, eh_type;
1009
1010 /*
1011 * Determine where frame payload starts.
1012 * Jump over vlan headers if already present
1013 */
1014 eh = mtod(mp, struct ether_vlan_header *);
1015 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
1016 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
1017 eh_type = eh->evl_proto;
1018 } else {
1019 ehdrlen = ETHER_HDR_LEN;
1020 eh_type = eh->evl_encap_proto;
1021 }
1022
1023 switch (ntohs(eh_type)) {
1024 #ifdef INET
1025 case ETHERTYPE_IP:
1026 ip = (struct ip *)(mp->m_data + ehdrlen);
1027 if (ip->ip_p != IPPROTO_TCP)
1028 return (ENXIO);
1029 ip->ip_sum = 0;
1030 ip_hlen = ip->ip_hl << 2;
1031 th = (struct tcphdr *)((char *)ip + ip_hlen);
1032 th->th_sum = in_cksum_phdr(ip->ip_src.s_addr,
1033 ip->ip_dst.s_addr, htons(IPPROTO_TCP));
1034 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
1035 /* Tell transmit desc to also do IPv4 checksum. */
1036 *olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
1037 break;
1038 #endif
1039 #ifdef INET6
1040 case ETHERTYPE_IPV6:
1041 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
1042 /* XXX-BZ For now we do not pretend to support ext. hdrs. */
1043 if (ip6->ip6_nxt != IPPROTO_TCP)
1044 return (ENXIO);
1045 ip_hlen = sizeof(struct ip6_hdr);
1046 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
1047 th = (struct tcphdr *)((char *)ip6 + ip_hlen);
1048 th->th_sum = in6_cksum_phdr(&ip6->ip6_src,
1049 &ip6->ip6_dst, 0, htonl(IPPROTO_TCP));
1050 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
1051 break;
1052 #endif
1053 default:
1054 panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
1055 __func__, ntohs(eh_type));
1056 break;
1057 }
1058
1059 ctxd = txr->next_avail_desc;
1060 TXD = (struct ixgbe_adv_tx_context_desc *)&txr->tx_base[ctxd];
1061
1062 tcp_hlen = th->th_off << 2;
1063
1064 /* This is used in the transmit desc in encap */
1065 paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
1066
1067 /* VLAN MACLEN IPLEN */
1068 if (vlan_has_tag(mp)) {
1069 vtag = htole16(vlan_get_tag(mp));
1070 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
1071 }
1072
1073 vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
1074 vlan_macip_lens |= ip_hlen;
1075 TXD->vlan_macip_lens = htole32(vlan_macip_lens);
1076
1077 /* ADV DTYPE TUCMD */
1078 type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
1079 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
1080 TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
1081
1082 /* MSS L4LEN IDX */
1083 mss_l4len_idx |= (mp->m_pkthdr.segsz << IXGBE_ADVTXD_MSS_SHIFT);
1084 mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT);
1085 TXD->mss_l4len_idx = htole32(mss_l4len_idx);
1086
1087 TXD->seqnum_seed = htole32(0);
1088
1089 if (++ctxd == txr->num_desc)
1090 ctxd = 0;
1091
1092 txr->tx_avail--;
1093 txr->next_avail_desc = ctxd;
1094 *cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
1095 *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
1096 *olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT;
1097 IXGBE_EVC_ADD(&txr->tso_tx, 1);
1098
1099 return (0);
1100 } /* ixgbe_tso_setup */
1101
1102
1103 /************************************************************************
1104 * ixgbe_txeof
1105 *
1106 * Examine each tx_buffer in the used queue. If the hardware is done
1107 * processing the packet then free associated resources. The
1108 * tx_buffer is put back on the free queue.
1109 ************************************************************************/
1110 bool
ixgbe_txeof(struct tx_ring * txr)1111 ixgbe_txeof(struct tx_ring *txr)
1112 {
1113 struct ixgbe_softc *sc = txr->sc;
1114 struct ifnet *ifp = sc->ifp;
1115 struct ixgbe_tx_buf *buf;
1116 union ixgbe_adv_tx_desc *txd;
1117 u32 work, processed = 0;
1118 u32 limit = sc->tx_process_limit;
1119 u16 avail;
1120
1121 KASSERT(mutex_owned(&txr->tx_mtx));
1122
1123 #ifdef DEV_NETMAP
1124 if ((sc->feat_en & IXGBE_FEATURE_NETMAP) &&
1125 (sc->ifp->if_capenable & IFCAP_NETMAP)) {
1126 struct netmap_sc *na = NA(sc->ifp);
1127 struct netmap_kring *kring = na->tx_rings[txr->me];
1128 txd = txr->tx_base;
1129 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1130 BUS_DMASYNC_POSTREAD);
1131 /*
1132 * In netmap mode, all the work is done in the context
1133 * of the client thread. Interrupt handlers only wake up
1134 * clients, which may be sleeping on individual rings
1135 * or on a global resource for all rings.
1136 * To implement tx interrupt mitigation, we wake up the client
1137 * thread roughly every half ring, even if the NIC interrupts
1138 * more frequently. This is implemented as follows:
1139 * - ixgbe_txsync() sets kring->nr_kflags with the index of
1140 * the slot that should wake up the thread (nkr_num_slots
1141 * means the user thread should not be woken up);
1142 * - the driver ignores tx interrupts unless netmap_mitigate=0
1143 * or the slot has the DD bit set.
1144 */
1145 if (kring->nr_kflags < kring->nkr_num_slots &&
1146 le32toh(txd[kring->nr_kflags].wb.status) & IXGBE_TXD_STAT_DD) {
1147 netmap_tx_irq(ifp, txr->me);
1148 }
1149 return false;
1150 }
1151 #endif /* DEV_NETMAP */
1152
1153 if (txr->tx_avail == txr->num_desc) {
1154 txr->busy = 0;
1155 return false;
1156 }
1157
1158 /* Get work starting point */
1159 work = txr->next_to_clean;
1160 buf = &txr->tx_buffers[work];
1161 txd = &txr->tx_base[work];
1162 work -= txr->num_desc; /* The distance to ring end */
1163 avail = txr->tx_avail;
1164 ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1165 BUS_DMASYNC_POSTREAD);
1166
1167 do {
1168 union ixgbe_adv_tx_desc *eop = buf->eop;
1169 if (eop == NULL) /* No work */
1170 break;
1171
1172 if ((le32toh(eop->wb.status) & IXGBE_TXD_STAT_DD) == 0)
1173 break; /* I/O not complete */
1174
1175 if (buf->m_head) {
1176 txr->bytes += buf->m_head->m_pkthdr.len;
1177 bus_dmamap_sync(txr->txtag->dt_dmat, buf->map,
1178 0, buf->m_head->m_pkthdr.len,
1179 BUS_DMASYNC_POSTWRITE);
1180 ixgbe_dmamap_unload(txr->txtag, buf->map);
1181 m_freem(buf->m_head);
1182 buf->m_head = NULL;
1183 }
1184 buf->eop = NULL;
1185 ++avail;
1186
1187 /* We clean the range if multi segment */
1188 while (txd != eop) {
1189 ++txd;
1190 ++buf;
1191 ++work;
1192 /* wrap the ring? */
1193 if (__predict_false(!work)) {
1194 work -= txr->num_desc;
1195 buf = txr->tx_buffers;
1196 txd = txr->tx_base;
1197 }
1198 if (buf->m_head) {
1199 txr->bytes +=
1200 buf->m_head->m_pkthdr.len;
1201 bus_dmamap_sync(txr->txtag->dt_dmat,
1202 buf->map,
1203 0, buf->m_head->m_pkthdr.len,
1204 BUS_DMASYNC_POSTWRITE);
1205 ixgbe_dmamap_unload(txr->txtag,
1206 buf->map);
1207 m_freem(buf->m_head);
1208 buf->m_head = NULL;
1209 }
1210 ++avail;
1211 buf->eop = NULL;
1212
1213 }
1214 ++processed;
1215
1216 /* Try the next packet */
1217 ++txd;
1218 ++buf;
1219 ++work;
1220 /* reset with a wrap */
1221 if (__predict_false(!work)) {
1222 work -= txr->num_desc;
1223 buf = txr->tx_buffers;
1224 txd = txr->tx_base;
1225 }
1226 prefetch(txd);
1227 } while (__predict_true(--limit));
1228
1229 ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1230 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1231
1232 work += txr->num_desc;
1233 txr->next_to_clean = work;
1234 if (processed) {
1235 txr->tx_avail = avail;
1236 txr->txr_no_space = false;
1237 txr->packets += processed;
1238 if_statadd(ifp, if_opackets, processed);
1239 }
1240
1241 /*
1242 * Queue Hang detection, we know there's
1243 * work outstanding or the first return
1244 * would have been taken, so increment busy
1245 * if nothing managed to get cleaned, then
1246 * in local_timer it will be checked and
1247 * marked as HUNG if it exceeds a MAX attempt.
1248 */
1249 if ((processed == 0) && (txr->busy != IXGBE_QUEUE_HUNG))
1250 ++txr->busy;
1251 /*
1252 * If anything gets cleaned we reset state to 1,
1253 * note this will turn off HUNG if its set.
1254 */
1255 if (processed)
1256 txr->busy = 1;
1257
1258 if (txr->tx_avail == txr->num_desc)
1259 txr->busy = 0;
1260
1261 return ((limit > 0) ? false : true);
1262 } /* ixgbe_txeof */
1263
1264 #ifdef RSC
1265 /************************************************************************
1266 * ixgbe_rsc_count
1267 *
1268 * Used to detect a descriptor that has been merged by Hardware RSC.
1269 ************************************************************************/
1270 static inline u32
ixgbe_rsc_count(union ixgbe_adv_rx_desc * rx)1271 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
1272 {
1273 return (le32toh(rx->wb.lower.lo_dword.data) &
1274 IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
1275 } /* ixgbe_rsc_count */
1276
1277 /************************************************************************
1278 * ixgbe_setup_hw_rsc
1279 *
1280 * Initialize Hardware RSC (LRO) feature on 82599
1281 * for an RX ring, this is toggled by the LRO capability
1282 * even though it is transparent to the stack.
1283 *
1284 * NOTE: Since this HW feature only works with IPv4 and
1285 * testing has shown soft LRO to be as effective,
1286 * this feature will be disabled by default.
1287 ************************************************************************/
1288 static void
ixgbe_setup_hw_rsc(struct rx_ring * rxr)1289 ixgbe_setup_hw_rsc(struct rx_ring *rxr)
1290 {
1291 struct ixgbe_softc *sc = rxr->sc;
1292 struct ixgbe_hw *hw = &sc->hw;
1293 u32 rscctrl, rdrxctl;
1294
1295 /* If turning LRO/RSC off we need to disable it */
1296 if ((sc->ifp->if_capenable & IFCAP_LRO) == 0) {
1297 rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
1298 rscctrl &= ~IXGBE_RSCCTL_RSCEN;
1299 return;
1300 }
1301
1302 rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
1303 rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
1304 #ifdef DEV_NETMAP
1305 /* Always strip CRC unless Netmap disabled it */
1306 if (!(sc->feat_en & IXGBE_FEATURE_NETMAP) ||
1307 !(sc->ifp->if_capenable & IFCAP_NETMAP) ||
1308 ix_crcstrip)
1309 #endif /* DEV_NETMAP */
1310 rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
1311 rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
1312 IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
1313
1314 rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
1315 rscctrl |= IXGBE_RSCCTL_RSCEN;
1316 /*
1317 * Limit the total number of descriptors that
1318 * can be combined, so it does not exceed 64K
1319 */
1320 if (rxr->mbuf_sz == MCLBYTES)
1321 rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
1322 else if (rxr->mbuf_sz == MJUMPAGESIZE)
1323 rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
1324 else if (rxr->mbuf_sz == MJUM9BYTES)
1325 rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
1326 else /* Using 16K cluster */
1327 rscctrl |= IXGBE_RSCCTL_MAXDESC_1;
1328
1329 IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl);
1330
1331 /* Enable TCP header recognition */
1332 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0),
1333 (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) | IXGBE_PSRTYPE_TCPHDR));
1334
1335 /* Disable RSC for ACK packets */
1336 IXGBE_WRITE_REG(hw, IXGBE_RSCDBU,
1337 (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU)));
1338
1339 rxr->hw_rsc = TRUE;
1340 } /* ixgbe_setup_hw_rsc */
1341 #endif
1342
1343 /************************************************************************
1344 * ixgbe_refresh_mbufs
1345 *
1346 * Refresh mbuf buffers for RX descriptor rings
1347 * - now keeps its own state so discards due to resource
1348 * exhaustion are unnecessary, if an mbuf cannot be obtained
1349 * it just returns, keeping its placeholder, thus it can simply
1350 * be recalled to try again.
1351 ************************************************************************/
1352 static void
ixgbe_refresh_mbufs(struct rx_ring * rxr,int limit)1353 ixgbe_refresh_mbufs(struct rx_ring *rxr, int limit)
1354 {
1355 struct ixgbe_softc *sc = rxr->sc;
1356 struct ixgbe_rx_buf *rxbuf;
1357 struct mbuf *mp;
1358 int i, error;
1359 bool refreshed = false;
1360
1361 i = rxr->next_to_refresh;
1362 /* next_to_refresh points to the previous one */
1363 if (++i == rxr->num_desc)
1364 i = 0;
1365
1366 while (i != limit) {
1367 rxbuf = &rxr->rx_buffers[i];
1368 if (__predict_false(rxbuf->buf == NULL)) {
1369 mp = ixgbe_getcl();
1370 if (mp == NULL) {
1371 IXGBE_EVC_ADD(&rxr->no_mbuf, 1);
1372 goto update;
1373 }
1374 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1375 IXGBE_M_ADJ(sc, rxr, mp);
1376 } else
1377 mp = rxbuf->buf;
1378
1379 /* If we're dealing with an mbuf that was copied rather
1380 * than replaced, there's no need to go through busdma.
1381 */
1382 if ((rxbuf->flags & IXGBE_RX_COPY) == 0) {
1383 /* Get the memory mapping */
1384 ixgbe_dmamap_unload(rxr->ptag, rxbuf->pmap);
1385 error = bus_dmamap_load_mbuf(rxr->ptag->dt_dmat,
1386 rxbuf->pmap, mp, BUS_DMA_NOWAIT);
1387 if (__predict_false(error != 0)) {
1388 device_printf(sc->dev, "Refresh mbufs: "
1389 "payload dmamap load failure - %d\n",
1390 error);
1391 m_free(mp);
1392 rxbuf->buf = NULL;
1393 goto update;
1394 }
1395 rxbuf->buf = mp;
1396 bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
1397 0, mp->m_pkthdr.len, BUS_DMASYNC_PREREAD);
1398 rxbuf->addr = rxr->rx_base[i].read.pkt_addr =
1399 htole64(rxbuf->pmap->dm_segs[0].ds_addr);
1400 } else {
1401 rxr->rx_base[i].read.pkt_addr = rxbuf->addr;
1402 rxbuf->flags &= ~IXGBE_RX_COPY;
1403 }
1404
1405 refreshed = true;
1406 /* next_to_refresh points to the previous one */
1407 rxr->next_to_refresh = i;
1408 if (++i == rxr->num_desc)
1409 i = 0;
1410 }
1411
1412 update:
1413 if (refreshed) /* Update hardware tail index */
1414 IXGBE_WRITE_REG(&sc->hw, rxr->tail, rxr->next_to_refresh);
1415
1416 return;
1417 } /* ixgbe_refresh_mbufs */
1418
1419 /************************************************************************
1420 * ixgbe_allocate_receive_buffers
1421 *
1422 * Allocate memory for rx_buffer structures. Since we use one
1423 * rx_buffer per received packet, the maximum number of rx_buffer's
1424 * that we'll need is equal to the number of receive descriptors
1425 * that we've allocated.
1426 ************************************************************************/
1427 static int
ixgbe_allocate_receive_buffers(struct rx_ring * rxr)1428 ixgbe_allocate_receive_buffers(struct rx_ring *rxr)
1429 {
1430 struct ixgbe_softc *sc = rxr->sc;
1431 device_t dev = sc->dev;
1432 struct ixgbe_rx_buf *rxbuf;
1433 int bsize, error;
1434
1435 bsize = sizeof(struct ixgbe_rx_buf) * rxr->num_desc;
1436 rxr->rx_buffers = kmem_zalloc(bsize, KM_SLEEP);
1437
1438 error = ixgbe_dma_tag_create(
1439 /* parent */ sc->osdep.dmat,
1440 /* alignment */ 1,
1441 /* bounds */ 0,
1442 /* maxsize */ MJUM16BYTES,
1443 /* nsegments */ 1,
1444 /* maxsegsize */ MJUM16BYTES,
1445 /* flags */ 0,
1446 &rxr->ptag);
1447 if (error != 0) {
1448 aprint_error_dev(dev, "Unable to create RX DMA tag\n");
1449 goto fail;
1450 }
1451
1452 for (int i = 0; i < rxr->num_desc; i++, rxbuf++) {
1453 rxbuf = &rxr->rx_buffers[i];
1454 error = ixgbe_dmamap_create(rxr->ptag, 0, &rxbuf->pmap);
1455 if (error) {
1456 aprint_error_dev(dev, "Unable to create RX dma map\n");
1457 goto fail;
1458 }
1459 }
1460
1461 return (0);
1462
1463 fail:
1464 /* Frees all, but can handle partial completion */
1465 ixgbe_free_receive_structures(sc);
1466
1467 return (error);
1468 } /* ixgbe_allocate_receive_buffers */
1469
1470 /************************************************************************
1471 * ixgbe_free_receive_ring
1472 ************************************************************************/
1473 static void
ixgbe_free_receive_ring(struct rx_ring * rxr)1474 ixgbe_free_receive_ring(struct rx_ring *rxr)
1475 {
1476 for (int i = 0; i < rxr->num_desc; i++) {
1477 ixgbe_rx_discard(rxr, i);
1478 }
1479 } /* ixgbe_free_receive_ring */
1480
1481 /************************************************************************
1482 * ixgbe_setup_receive_ring
1483 *
1484 * Initialize a receive ring and its buffers.
1485 ************************************************************************/
1486 static int
ixgbe_setup_receive_ring(struct rx_ring * rxr)1487 ixgbe_setup_receive_ring(struct rx_ring *rxr)
1488 {
1489 struct ixgbe_softc *sc;
1490 struct ixgbe_rx_buf *rxbuf;
1491 #ifdef LRO
1492 struct ifnet *ifp;
1493 struct lro_ctrl *lro = &rxr->lro;
1494 #endif /* LRO */
1495 #ifdef DEV_NETMAP
1496 struct netmap_sc *na = NA(rxr->sc->ifp);
1497 struct netmap_slot *slot;
1498 #endif /* DEV_NETMAP */
1499 int rsize, error = 0;
1500
1501 sc = rxr->sc;
1502 #ifdef LRO
1503 ifp = sc->ifp;
1504 #endif /* LRO */
1505
1506 /* Clear the ring contents */
1507 IXGBE_RX_LOCK(rxr);
1508
1509 #ifdef DEV_NETMAP
1510 if (sc->feat_en & IXGBE_FEATURE_NETMAP)
1511 slot = netmap_reset(na, NR_RX, rxr->me, 0);
1512 #endif /* DEV_NETMAP */
1513
1514 rsize = sc->num_rx_desc * sizeof(union ixgbe_adv_rx_desc);
1515 KASSERT((rsize % DBA_ALIGN) == 0);
1516 bzero((void *)rxr->rx_base, rsize);
1517 /* Cache the size */
1518 rxr->mbuf_sz = sc->rx_mbuf_sz;
1519
1520 /* Free current RX buffer structs and their mbufs */
1521 ixgbe_free_receive_ring(rxr);
1522
1523 /* Now replenish the mbufs */
1524 for (int i = 0; i < rxr->num_desc; i++) {
1525 struct mbuf *mp;
1526
1527 rxbuf = &rxr->rx_buffers[i];
1528
1529 #ifdef DEV_NETMAP
1530 /*
1531 * In netmap mode, fill the map and set the buffer
1532 * address in the NIC ring, considering the offset
1533 * between the netmap and NIC rings (see comment in
1534 * ixgbe_setup_transmit_ring() ). No need to allocate
1535 * an mbuf, so end the block with a continue;
1536 */
1537 if ((sc->feat_en & IXGBE_FEATURE_NETMAP) && slot) {
1538 int sj = netmap_idx_n2k(na->rx_rings[rxr->me], i);
1539 uint64_t paddr;
1540 void *addr;
1541
1542 addr = PNMB(na, slot + sj, &paddr);
1543 netmap_load_map(na, rxr->ptag, rxbuf->pmap, addr);
1544 /* Update descriptor and the cached value */
1545 rxr->rx_base[i].read.pkt_addr = htole64(paddr);
1546 rxbuf->addr = htole64(paddr);
1547 continue;
1548 }
1549 #endif /* DEV_NETMAP */
1550
1551 rxbuf->flags = 0;
1552 rxbuf->buf = ixgbe_getcl();
1553 if (rxbuf->buf == NULL) {
1554 IXGBE_EVC_ADD(&rxr->no_mbuf, 1);
1555 error = ENOBUFS;
1556 goto fail;
1557 }
1558 mp = rxbuf->buf;
1559 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1560 IXGBE_M_ADJ(sc, rxr, mp);
1561 /* Get the memory mapping */
1562 error = bus_dmamap_load_mbuf(rxr->ptag->dt_dmat, rxbuf->pmap,
1563 mp, BUS_DMA_NOWAIT);
1564 if (error != 0) {
1565 /*
1566 * Clear this entry for later cleanup in
1567 * ixgbe_discard() which is called via
1568 * ixgbe_free_receive_ring().
1569 */
1570 m_freem(mp);
1571 rxbuf->buf = NULL;
1572 goto fail;
1573 }
1574 bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
1575 0, mp->m_pkthdr.len, BUS_DMASYNC_PREREAD);
1576 /* Update the descriptor and the cached value */
1577 rxr->rx_base[i].read.pkt_addr =
1578 htole64(rxbuf->pmap->dm_segs[0].ds_addr);
1579 rxbuf->addr = htole64(rxbuf->pmap->dm_segs[0].ds_addr);
1580 }
1581
1582 /* Setup our descriptor indices */
1583 rxr->next_to_check = 0;
1584 rxr->next_to_refresh = sc->num_rx_desc - 1; /* Fully allocated */
1585 #ifdef LRO
1586 rxr->lro_enabled = FALSE;
1587 #endif
1588 rxr->discard_multidesc = false;
1589 IXGBE_EVC_STORE(&rxr->rx_copies, 0);
1590 #if 0 /* NetBSD */
1591 IXGBE_EVC_STORE(&rxr->rx_bytes, 0);
1592 #if 1 /* Fix inconsistency */
1593 IXGBE_EVC_STORE(&rxr->rx_packets, 0);
1594 #endif
1595 #endif
1596 rxr->vtag_strip = FALSE;
1597
1598 ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1599 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1600
1601 /*
1602 * Now set up the LRO interface
1603 */
1604 #ifdef RSC
1605 if (ixgbe_rsc_enable)
1606 ixgbe_setup_hw_rsc(rxr);
1607 #endif
1608 #ifdef LRO
1609 #ifdef RSC
1610 else
1611 #endif
1612 if (ifp->if_capenable & IFCAP_LRO) {
1613 device_t dev = sc->dev;
1614 int err = tcp_lro_init(lro);
1615 if (err) {
1616 device_printf(dev, "LRO Initialization failed!\n");
1617 goto fail;
1618 }
1619 INIT_DEBUGOUT("RX Soft LRO Initialized\n");
1620 rxr->lro_enabled = TRUE;
1621 lro->ifp = sc->ifp;
1622 }
1623 #endif /* LRO */
1624
1625 IXGBE_RX_UNLOCK(rxr);
1626
1627 return (0);
1628
1629 fail:
1630 ixgbe_free_receive_ring(rxr);
1631 IXGBE_RX_UNLOCK(rxr);
1632
1633 return (error);
1634 } /* ixgbe_setup_receive_ring */
1635
1636 /************************************************************************
1637 * ixgbe_setup_receive_structures - Initialize all receive rings.
1638 ************************************************************************/
1639 int
ixgbe_setup_receive_structures(struct ixgbe_softc * sc)1640 ixgbe_setup_receive_structures(struct ixgbe_softc *sc)
1641 {
1642 struct rx_ring *rxr = sc->rx_rings;
1643 int j;
1644
1645 INIT_DEBUGOUT("ixgbe_setup_receive_structures");
1646 for (j = 0; j < sc->num_queues; j++, rxr++)
1647 if (ixgbe_setup_receive_ring(rxr))
1648 goto fail;
1649
1650 return (0);
1651 fail:
1652 /*
1653 * Free RX buffers allocated so far, we will only handle
1654 * the rings that completed, the failing case will have
1655 * cleaned up for itself. 'j' failed, so its the terminus.
1656 */
1657 for (int i = 0; i < j; ++i) {
1658 rxr = &sc->rx_rings[i];
1659 IXGBE_RX_LOCK(rxr);
1660 ixgbe_free_receive_ring(rxr);
1661 IXGBE_RX_UNLOCK(rxr);
1662 }
1663
1664 return (ENOBUFS);
1665 } /* ixgbe_setup_receive_structures */
1666
1667
1668 /************************************************************************
1669 * ixgbe_free_receive_structures - Free all receive rings.
1670 ************************************************************************/
1671 void
ixgbe_free_receive_structures(struct ixgbe_softc * sc)1672 ixgbe_free_receive_structures(struct ixgbe_softc *sc)
1673 {
1674 struct rx_ring *rxr = sc->rx_rings;
1675
1676 INIT_DEBUGOUT("ixgbe_free_receive_structures: begin");
1677
1678 for (int i = 0; i < sc->num_queues; i++, rxr++) {
1679 ixgbe_free_receive_buffers(rxr);
1680 #ifdef LRO
1681 /* Free LRO memory */
1682 tcp_lro_free(&rxr->lro);
1683 #endif /* LRO */
1684 /* Free the ring memory as well */
1685 ixgbe_dma_free(sc, &rxr->rxdma);
1686 IXGBE_RX_LOCK_DESTROY(rxr);
1687 }
1688
1689 kmem_free(sc->rx_rings, sizeof(struct rx_ring) * sc->num_queues);
1690 } /* ixgbe_free_receive_structures */
1691
1692
1693 /************************************************************************
1694 * ixgbe_free_receive_buffers - Free receive ring data structures
1695 ************************************************************************/
1696 static void
ixgbe_free_receive_buffers(struct rx_ring * rxr)1697 ixgbe_free_receive_buffers(struct rx_ring *rxr)
1698 {
1699 struct ixgbe_softc *sc = rxr->sc;
1700 struct ixgbe_rx_buf *rxbuf;
1701
1702 INIT_DEBUGOUT("ixgbe_free_receive_buffers: begin");
1703
1704 /* Cleanup any existing buffers */
1705 if (rxr->rx_buffers != NULL) {
1706 for (int i = 0; i < sc->num_rx_desc; i++) {
1707 rxbuf = &rxr->rx_buffers[i];
1708 ixgbe_rx_discard(rxr, i);
1709 if (rxbuf->pmap != NULL) {
1710 ixgbe_dmamap_destroy(rxr->ptag, rxbuf->pmap);
1711 rxbuf->pmap = NULL;
1712 }
1713 }
1714
1715 if (rxr->rx_buffers != NULL) {
1716 kmem_free(rxr->rx_buffers,
1717 sizeof(struct ixgbe_rx_buf) * rxr->num_desc);
1718 rxr->rx_buffers = NULL;
1719 }
1720 }
1721
1722 if (rxr->ptag != NULL) {
1723 ixgbe_dma_tag_destroy(rxr->ptag);
1724 rxr->ptag = NULL;
1725 }
1726
1727 return;
1728 } /* ixgbe_free_receive_buffers */
1729
1730 /************************************************************************
1731 * ixgbe_rx_input
1732 ************************************************************************/
1733 static __inline void
ixgbe_rx_input(struct rx_ring * rxr,struct ifnet * ifp,struct mbuf * m,u32 ptype)1734 ixgbe_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m,
1735 u32 ptype)
1736 {
1737 struct ixgbe_softc *sc = ifp->if_softc;
1738
1739 #ifdef LRO
1740 struct ethercom *ec = &sc->osdep.ec;
1741
1742 /*
1743 * ATM LRO is only for IP/TCP packets and TCP checksum of the packet
1744 * should be computed by hardware. Also it should not have VLAN tag in
1745 * ethernet header. In case of IPv6 we do not yet support ext. hdrs.
1746 */
1747 if (rxr->lro_enabled &&
1748 (ec->ec_capenable & ETHERCAP_VLAN_HWTAGGING) != 0 &&
1749 (ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
1750 ((ptype & (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
1751 (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP) ||
1752 (ptype & (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
1753 (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) &&
1754 (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
1755 (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
1756 /*
1757 * Send to the stack if:
1758 * - LRO not enabled, or
1759 * - no LRO resources, or
1760 * - lro enqueue fails
1761 */
1762 if (rxr->lro.lro_cnt != 0)
1763 if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
1764 return;
1765 }
1766 #endif /* LRO */
1767
1768 if_percpuq_enqueue(sc->ipq, m);
1769 } /* ixgbe_rx_input */
1770
1771 /************************************************************************
1772 * ixgbe_rx_discard
1773 ************************************************************************/
1774 static __inline void
ixgbe_rx_discard(struct rx_ring * rxr,int i)1775 ixgbe_rx_discard(struct rx_ring *rxr, int i)
1776 {
1777 struct ixgbe_rx_buf *rbuf;
1778
1779 rbuf = &rxr->rx_buffers[i];
1780
1781 /*
1782 * With advanced descriptors the writeback clobbers the buffer addrs,
1783 * so its easier to just free the existing mbufs and take the normal
1784 * refresh path to get new buffers and mapping.
1785 */
1786
1787 if (rbuf->fmp != NULL) {/* Partial chain ? */
1788 bus_dmamap_sync(rxr->ptag->dt_dmat, rbuf->pmap, 0,
1789 rbuf->buf->m_pkthdr.len, BUS_DMASYNC_POSTREAD);
1790 ixgbe_dmamap_unload(rxr->ptag, rbuf->pmap);
1791 m_freem(rbuf->fmp);
1792 rbuf->fmp = NULL;
1793 rbuf->buf = NULL; /* rbuf->buf is part of fmp's chain */
1794 } else if (rbuf->buf) {
1795 bus_dmamap_sync(rxr->ptag->dt_dmat, rbuf->pmap, 0,
1796 rbuf->buf->m_pkthdr.len, BUS_DMASYNC_POSTREAD);
1797 ixgbe_dmamap_unload(rxr->ptag, rbuf->pmap);
1798 m_free(rbuf->buf);
1799 rbuf->buf = NULL;
1800 }
1801
1802 rbuf->flags = 0;
1803
1804 return;
1805 } /* ixgbe_rx_discard */
1806
1807
1808 /************************************************************************
1809 * ixgbe_rxeof
1810 *
1811 * Executes in interrupt context. It replenishes the
1812 * mbufs in the descriptor and sends data which has
1813 * been dma'ed into host memory to upper layer.
1814 *
1815 * Return TRUE for more work, FALSE for all clean.
1816 ************************************************************************/
1817 bool
ixgbe_rxeof(struct ix_queue * que)1818 ixgbe_rxeof(struct ix_queue *que)
1819 {
1820 struct ixgbe_softc *sc = que->sc;
1821 struct rx_ring *rxr = que->rxr;
1822 struct ifnet *ifp = sc->ifp;
1823 #ifdef LRO
1824 struct lro_ctrl *lro = &rxr->lro;
1825 #endif /* LRO */
1826 union ixgbe_adv_rx_desc *cur;
1827 struct ixgbe_rx_buf *rbuf, *nbuf;
1828 int i, nextp, processed = 0;
1829 u32 staterr = 0;
1830 u32 loopcount = 0, numdesc;
1831 u32 limit = sc->rx_process_limit;
1832 u32 rx_copy_len = sc->rx_copy_len;
1833 bool discard_multidesc = rxr->discard_multidesc;
1834 bool wraparound = false;
1835 unsigned int syncremain;
1836 #ifdef RSS
1837 u16 pkt_info;
1838 #endif
1839
1840 IXGBE_RX_LOCK(rxr);
1841
1842 #ifdef DEV_NETMAP
1843 if (sc->feat_en & IXGBE_FEATURE_NETMAP) {
1844 /* Same as the txeof routine: wakeup clients on intr. */
1845 if (netmap_rx_irq(ifp, rxr->me, &processed)) {
1846 IXGBE_RX_UNLOCK(rxr);
1847 return (FALSE);
1848 }
1849 }
1850 #endif /* DEV_NETMAP */
1851
1852 /* Sync the ring. The size is rx_process_limit or the first half */
1853 if ((rxr->next_to_check + limit) <= rxr->num_desc) {
1854 /* Non-wraparound */
1855 numdesc = limit;
1856 syncremain = 0;
1857 } else {
1858 /* Wraparound. Sync the first half. */
1859 numdesc = rxr->num_desc - rxr->next_to_check;
1860
1861 /* Set the size of the last half */
1862 syncremain = limit - numdesc;
1863 }
1864 bus_dmamap_sync(rxr->rxdma.dma_tag->dt_dmat,
1865 rxr->rxdma.dma_map,
1866 sizeof(union ixgbe_adv_rx_desc) * rxr->next_to_check,
1867 sizeof(union ixgbe_adv_rx_desc) * numdesc,
1868 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
1869
1870 /*
1871 * The max number of loop is rx_process_limit. If discard_multidesc is
1872 * true, continue processing to not to send broken packet to the upper
1873 * layer.
1874 */
1875 for (i = rxr->next_to_check;
1876 (loopcount < limit) || (discard_multidesc == true);) {
1877
1878 struct mbuf *sendmp, *mp;
1879 struct mbuf *newmp;
1880 #ifdef RSC
1881 u32 rsc;
1882 #endif
1883 u32 ptype;
1884 u16 len;
1885 u16 vtag = 0;
1886 bool eop;
1887 bool discard = false;
1888
1889 if (wraparound) {
1890 /* Sync the last half. */
1891 KASSERT(syncremain != 0);
1892 numdesc = syncremain;
1893 wraparound = false;
1894 } else if (__predict_false(loopcount >= limit)) {
1895 KASSERT(discard_multidesc == true);
1896 numdesc = 1;
1897 } else
1898 numdesc = 0;
1899
1900 if (numdesc != 0)
1901 bus_dmamap_sync(rxr->rxdma.dma_tag->dt_dmat,
1902 rxr->rxdma.dma_map, 0,
1903 sizeof(union ixgbe_adv_rx_desc) * numdesc,
1904 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
1905
1906 cur = &rxr->rx_base[i];
1907 staterr = le32toh(cur->wb.upper.status_error);
1908 #ifdef RSS
1909 pkt_info = le16toh(cur->wb.lower.lo_dword.hs_rss.pkt_info);
1910 #endif
1911
1912 if ((staterr & IXGBE_RXD_STAT_DD) == 0)
1913 break;
1914
1915 loopcount++;
1916 sendmp = newmp = NULL;
1917 nbuf = NULL;
1918 #ifdef RSC
1919 rsc = 0;
1920 #endif
1921 cur->wb.upper.status_error = 0;
1922 rbuf = &rxr->rx_buffers[i];
1923 mp = rbuf->buf;
1924
1925 len = le16toh(cur->wb.upper.length);
1926 ptype = le32toh(cur->wb.lower.lo_dword.data) &
1927 IXGBE_RXDADV_PKTTYPE_MASK;
1928 eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0);
1929
1930 /* Make sure bad packets are discarded */
1931 if (eop && (staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) != 0) {
1932 #if __FreeBSD_version >= 1100036
1933 if (sc->feat_en & IXGBE_FEATURE_VF)
1934 if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
1935 #endif
1936 IXGBE_EVC_ADD(&rxr->rx_discarded, 1);
1937 ixgbe_rx_discard(rxr, i);
1938 discard_multidesc = false;
1939 goto next_desc;
1940 }
1941
1942 if (__predict_false(discard_multidesc))
1943 discard = true;
1944 else {
1945 /* Pre-alloc new mbuf. */
1946
1947 if ((rbuf->fmp == NULL) &&
1948 eop && (len <= rx_copy_len)) {
1949 /* For short packet. See below. */
1950 sendmp = m_gethdr(M_NOWAIT, MT_DATA);
1951 if (__predict_false(sendmp == NULL)) {
1952 IXGBE_EVC_ADD(&rxr->no_mbuf, 1);
1953 discard = true;
1954 }
1955 } else {
1956 /* For long packet. */
1957 newmp = ixgbe_getcl();
1958 if (__predict_false(newmp == NULL)) {
1959 IXGBE_EVC_ADD(&rxr->no_mbuf, 1);
1960 discard = true;
1961 }
1962 }
1963 }
1964
1965 if (__predict_false(discard)) {
1966 /*
1967 * Descriptor initialization is already done by the
1968 * above code (cur->wb.upper.status_error = 0).
1969 * So, we can reuse current rbuf->buf for new packet.
1970 *
1971 * Rewrite the buffer addr, see comment in
1972 * ixgbe_rx_discard().
1973 */
1974 cur->read.pkt_addr = rbuf->addr;
1975 m_freem(rbuf->fmp);
1976 rbuf->fmp = NULL;
1977 if (!eop) {
1978 /* Discard the entire packet. */
1979 discard_multidesc = true;
1980 } else
1981 discard_multidesc = false;
1982 goto next_desc;
1983 }
1984 discard_multidesc = false;
1985
1986 bus_dmamap_sync(rxr->ptag->dt_dmat, rbuf->pmap, 0,
1987 rbuf->buf->m_pkthdr.len, BUS_DMASYNC_POSTREAD);
1988
1989 /*
1990 * On 82599 which supports a hardware
1991 * LRO (called HW RSC), packets need
1992 * not be fragmented across sequential
1993 * descriptors, rather the next descriptor
1994 * is indicated in bits of the descriptor.
1995 * This also means that we might process
1996 * more than one packet at a time, something
1997 * that has never been true before, it
1998 * required eliminating global chain pointers
1999 * in favor of what we are doing here. -jfv
2000 */
2001 if (!eop) {
2002 /*
2003 * Figure out the next descriptor
2004 * of this frame.
2005 */
2006 #ifdef RSC
2007 if (rxr->hw_rsc == TRUE) {
2008 rsc = ixgbe_rsc_count(cur);
2009 rxr->rsc_num += (rsc - 1);
2010 }
2011 if (rsc) { /* Get hardware index */
2012 nextp = ((staterr & IXGBE_RXDADV_NEXTP_MASK) >>
2013 IXGBE_RXDADV_NEXTP_SHIFT);
2014 } else
2015 #endif
2016 { /* Just sequential */
2017 nextp = i + 1;
2018 if (nextp == sc->num_rx_desc)
2019 nextp = 0;
2020 }
2021 nbuf = &rxr->rx_buffers[nextp];
2022 prefetch(nbuf);
2023 }
2024 /*
2025 * Rather than using the fmp/lmp global pointers
2026 * we now keep the head of a packet chain in the
2027 * buffer struct and pass this along from one
2028 * descriptor to the next, until we get EOP.
2029 */
2030 /*
2031 * See if there is a stored head
2032 * that determines what we are
2033 */
2034 if (rbuf->fmp != NULL) {
2035 /* Secondary frag */
2036 sendmp = rbuf->fmp;
2037
2038 /* Update new (used in future) mbuf */
2039 newmp->m_pkthdr.len = newmp->m_len = rxr->mbuf_sz;
2040 IXGBE_M_ADJ(sc, rxr, newmp);
2041 rbuf->buf = newmp;
2042 rbuf->fmp = NULL;
2043
2044 /* For secondary frag */
2045 mp->m_len = len;
2046 mp->m_flags &= ~M_PKTHDR;
2047
2048 /* For sendmp */
2049 sendmp->m_pkthdr.len += mp->m_len;
2050 } else {
2051 /*
2052 * It's the first segment of a multi descriptor
2053 * packet or a single segment which contains a full
2054 * packet.
2055 */
2056
2057 if (eop && (len <= rx_copy_len)) {
2058 /*
2059 * Optimize. This might be a small packet, may
2060 * be just a TCP ACK. Copy into a new mbuf, and
2061 * Leave the old mbuf+cluster for re-use.
2062 */
2063 sendmp->m_data += ETHER_ALIGN;
2064 memcpy(mtod(sendmp, void *),
2065 mtod(mp, void *), len);
2066 IXGBE_EVC_ADD(&rxr->rx_copies, 1);
2067 rbuf->flags |= IXGBE_RX_COPY;
2068 } else {
2069 /* For long packet */
2070
2071 /* Update new (used in future) mbuf */
2072 newmp->m_pkthdr.len = newmp->m_len
2073 = rxr->mbuf_sz;
2074 IXGBE_M_ADJ(sc, rxr, newmp);
2075 rbuf->buf = newmp;
2076 rbuf->fmp = NULL;
2077
2078 /* For sendmp */
2079 sendmp = mp;
2080 }
2081
2082 /* first desc of a non-ps chain */
2083 sendmp->m_pkthdr.len = sendmp->m_len = len;
2084 }
2085 ++processed;
2086
2087 /* Pass the head pointer on */
2088 if (eop == 0) {
2089 nbuf->fmp = sendmp;
2090 sendmp = NULL;
2091 mp->m_next = nbuf->buf;
2092 } else { /* Sending this frame */
2093 m_set_rcvif(sendmp, ifp);
2094 ++rxr->packets;
2095 IXGBE_EVC_ADD(&rxr->rx_packets, 1);
2096 /* capture data for AIM */
2097 rxr->bytes += sendmp->m_pkthdr.len;
2098 IXGBE_EVC_ADD(&rxr->rx_bytes, sendmp->m_pkthdr.len);
2099 /* Process vlan info */
2100 if ((rxr->vtag_strip) && (staterr & IXGBE_RXD_STAT_VP))
2101 vtag = le16toh(cur->wb.upper.vlan);
2102 if (vtag) {
2103 vlan_set_tag(sendmp, vtag);
2104 }
2105 if ((ifp->if_capenable & IFCAP_RXCSUM) != 0) {
2106 ixgbe_rx_checksum(staterr, sendmp, ptype,
2107 &sc->stats.pf);
2108 }
2109
2110 #if 0 /* FreeBSD */
2111 /*
2112 * In case of multiqueue, we have RXCSUM.PCSD bit set
2113 * and never cleared. This means we have RSS hash
2114 * available to be used.
2115 */
2116 if (sc->num_queues > 1) {
2117 sendmp->m_pkthdr.flowid =
2118 le32toh(cur->wb.lower.hi_dword.rss);
2119 switch (pkt_info & IXGBE_RXDADV_RSSTYPE_MASK) {
2120 case IXGBE_RXDADV_RSSTYPE_IPV4:
2121 M_HASHTYPE_SET(sendmp,
2122 M_HASHTYPE_RSS_IPV4);
2123 break;
2124 case IXGBE_RXDADV_RSSTYPE_IPV4_TCP:
2125 M_HASHTYPE_SET(sendmp,
2126 M_HASHTYPE_RSS_TCP_IPV4);
2127 break;
2128 case IXGBE_RXDADV_RSSTYPE_IPV6:
2129 M_HASHTYPE_SET(sendmp,
2130 M_HASHTYPE_RSS_IPV6);
2131 break;
2132 case IXGBE_RXDADV_RSSTYPE_IPV6_TCP:
2133 M_HASHTYPE_SET(sendmp,
2134 M_HASHTYPE_RSS_TCP_IPV6);
2135 break;
2136 case IXGBE_RXDADV_RSSTYPE_IPV6_EX:
2137 M_HASHTYPE_SET(sendmp,
2138 M_HASHTYPE_RSS_IPV6_EX);
2139 break;
2140 case IXGBE_RXDADV_RSSTYPE_IPV6_TCP_EX:
2141 M_HASHTYPE_SET(sendmp,
2142 M_HASHTYPE_RSS_TCP_IPV6_EX);
2143 break;
2144 #if __FreeBSD_version > 1100000
2145 case IXGBE_RXDADV_RSSTYPE_IPV4_UDP:
2146 M_HASHTYPE_SET(sendmp,
2147 M_HASHTYPE_RSS_UDP_IPV4);
2148 break;
2149 case IXGBE_RXDADV_RSSTYPE_IPV6_UDP:
2150 M_HASHTYPE_SET(sendmp,
2151 M_HASHTYPE_RSS_UDP_IPV6);
2152 break;
2153 case IXGBE_RXDADV_RSSTYPE_IPV6_UDP_EX:
2154 M_HASHTYPE_SET(sendmp,
2155 M_HASHTYPE_RSS_UDP_IPV6_EX);
2156 break;
2157 #endif
2158 default:
2159 M_HASHTYPE_SET(sendmp,
2160 M_HASHTYPE_OPAQUE_HASH);
2161 }
2162 } else {
2163 sendmp->m_pkthdr.flowid = que->msix;
2164 M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE);
2165 }
2166 #endif
2167 }
2168 next_desc:
2169 ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
2170 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2171
2172 /* Advance our pointers to the next descriptor. */
2173 if (++i == rxr->num_desc) {
2174 wraparound = true;
2175 i = 0;
2176 }
2177 rxr->next_to_check = i;
2178
2179 /* Now send to the stack or do LRO */
2180 if (sendmp != NULL)
2181 ixgbe_rx_input(rxr, ifp, sendmp, ptype);
2182
2183 /* Every 8 descriptors we go to refresh mbufs */
2184 if (processed == 8) {
2185 ixgbe_refresh_mbufs(rxr, i);
2186 processed = 0;
2187 }
2188 }
2189
2190 /* Save the current status */
2191 rxr->discard_multidesc = discard_multidesc;
2192
2193 /* Refresh any remaining buf structs */
2194 if (ixgbe_rx_unrefreshed(rxr))
2195 ixgbe_refresh_mbufs(rxr, i);
2196
2197 IXGBE_RX_UNLOCK(rxr);
2198
2199 #ifdef LRO
2200 /*
2201 * Flush any outstanding LRO work
2202 */
2203 tcp_lro_flush_all(lro);
2204 #endif /* LRO */
2205
2206 /*
2207 * Still have cleaning to do?
2208 */
2209 if ((staterr & IXGBE_RXD_STAT_DD) != 0)
2210 return (TRUE);
2211
2212 return (FALSE);
2213 } /* ixgbe_rxeof */
2214
2215
2216 /************************************************************************
2217 * ixgbe_rx_checksum
2218 *
2219 * Verify that the hardware indicated that the checksum is valid.
2220 * Inform the stack about the status of checksum so that stack
2221 * doesn't spend time verifying the checksum.
2222 ************************************************************************/
2223 static void
ixgbe_rx_checksum(u32 staterr,struct mbuf * mp,u32 ptype,struct ixgbe_hw_stats * stats)2224 ixgbe_rx_checksum(u32 staterr, struct mbuf * mp, u32 ptype,
2225 struct ixgbe_hw_stats *stats)
2226 {
2227 u16 status = (u16)staterr;
2228 u8 errors = (u8)(staterr >> 24);
2229 #if 0
2230 bool sctp = false;
2231
2232 if ((ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
2233 (ptype & IXGBE_RXDADV_PKTTYPE_SCTP) != 0)
2234 sctp = true;
2235 #endif
2236
2237 /* IPv4 checksum */
2238 if (status & IXGBE_RXD_STAT_IPCS) {
2239 IXGBE_EVC_ADD(&stats->ipcs, 1);
2240 if (!(errors & IXGBE_RXD_ERR_IPE)) {
2241 /* IP Checksum Good */
2242 mp->m_pkthdr.csum_flags = M_CSUM_IPv4;
2243 } else {
2244 IXGBE_EVC_ADD(&stats->ipcs_bad, 1);
2245 mp->m_pkthdr.csum_flags = M_CSUM_IPv4|M_CSUM_IPv4_BAD;
2246 }
2247 }
2248 /* TCP/UDP/SCTP checksum */
2249 if (status & IXGBE_RXD_STAT_L4CS) {
2250 IXGBE_EVC_ADD(&stats->l4cs, 1);
2251 int type = M_CSUM_TCPv4|M_CSUM_TCPv6|M_CSUM_UDPv4|M_CSUM_UDPv6;
2252 if (!(errors & IXGBE_RXD_ERR_TCPE)) {
2253 mp->m_pkthdr.csum_flags |= type;
2254 } else {
2255 IXGBE_EVC_ADD(&stats->l4cs_bad, 1);
2256 mp->m_pkthdr.csum_flags |= type | M_CSUM_TCP_UDP_BAD;
2257 }
2258 }
2259 } /* ixgbe_rx_checksum */
2260
2261 /************************************************************************
2262 * ixgbe_dma_malloc
2263 ************************************************************************/
2264 int
ixgbe_dma_malloc(struct ixgbe_softc * sc,const bus_size_t size,struct ixgbe_dma_alloc * dma,const int mapflags)2265 ixgbe_dma_malloc(struct ixgbe_softc *sc, const bus_size_t size,
2266 struct ixgbe_dma_alloc *dma, const int mapflags)
2267 {
2268 device_t dev = sc->dev;
2269 int r, rsegs;
2270
2271 r = ixgbe_dma_tag_create(
2272 /* parent */ sc->osdep.dmat,
2273 /* alignment */ DBA_ALIGN,
2274 /* bounds */ 0,
2275 /* maxsize */ size,
2276 /* nsegments */ 1,
2277 /* maxsegsize */ size,
2278 /* flags */ BUS_DMA_ALLOCNOW,
2279 &dma->dma_tag);
2280 if (r != 0) {
2281 aprint_error_dev(dev,
2282 "%s: ixgbe_dma_tag_create failed; error %d\n", __func__,
2283 r);
2284 goto fail_0;
2285 }
2286
2287 r = bus_dmamem_alloc(dma->dma_tag->dt_dmat, size,
2288 dma->dma_tag->dt_alignment, dma->dma_tag->dt_boundary,
2289 &dma->dma_seg, 1, &rsegs, BUS_DMA_NOWAIT);
2290 if (r != 0) {
2291 aprint_error_dev(dev,
2292 "%s: bus_dmamem_alloc failed; error %d\n", __func__, r);
2293 goto fail_1;
2294 }
2295
2296 r = bus_dmamem_map(dma->dma_tag->dt_dmat, &dma->dma_seg, rsegs,
2297 size, &dma->dma_vaddr, BUS_DMA_NOWAIT | BUS_DMA_COHERENT);
2298 if (r != 0) {
2299 aprint_error_dev(dev, "%s: bus_dmamem_map failed; error %d\n",
2300 __func__, r);
2301 goto fail_2;
2302 }
2303
2304 r = ixgbe_dmamap_create(dma->dma_tag, 0, &dma->dma_map);
2305 if (r != 0) {
2306 aprint_error_dev(dev, "%s: bus_dmamem_map failed; error %d\n",
2307 __func__, r);
2308 goto fail_3;
2309 }
2310
2311 r = bus_dmamap_load(dma->dma_tag->dt_dmat, dma->dma_map,
2312 dma->dma_vaddr, size, NULL, mapflags | BUS_DMA_NOWAIT);
2313 if (r != 0) {
2314 aprint_error_dev(dev, "%s: bus_dmamap_load failed; error %d\n",
2315 __func__, r);
2316 goto fail_4;
2317 }
2318 dma->dma_paddr = dma->dma_map->dm_segs[0].ds_addr;
2319 dma->dma_size = size;
2320 return 0;
2321 fail_4:
2322 ixgbe_dmamap_destroy(dma->dma_tag, dma->dma_map);
2323 fail_3:
2324 bus_dmamem_unmap(dma->dma_tag->dt_dmat, dma->dma_vaddr, size);
2325 fail_2:
2326 bus_dmamem_free(dma->dma_tag->dt_dmat, &dma->dma_seg, rsegs);
2327 fail_1:
2328 ixgbe_dma_tag_destroy(dma->dma_tag);
2329 fail_0:
2330
2331 return (r);
2332 } /* ixgbe_dma_malloc */
2333
2334 /************************************************************************
2335 * ixgbe_dma_free
2336 ************************************************************************/
2337 void
ixgbe_dma_free(struct ixgbe_softc * sc,struct ixgbe_dma_alloc * dma)2338 ixgbe_dma_free(struct ixgbe_softc *sc, struct ixgbe_dma_alloc *dma)
2339 {
2340 bus_dmamap_sync(dma->dma_tag->dt_dmat, dma->dma_map, 0, dma->dma_size,
2341 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2342 ixgbe_dmamap_unload(dma->dma_tag, dma->dma_map);
2343 bus_dmamem_unmap(dma->dma_tag->dt_dmat, dma->dma_vaddr, dma->dma_size);
2344 bus_dmamem_free(dma->dma_tag->dt_dmat, &dma->dma_seg, 1);
2345 ixgbe_dma_tag_destroy(dma->dma_tag);
2346 } /* ixgbe_dma_free */
2347
2348
2349 /************************************************************************
2350 * ixgbe_allocate_queues
2351 *
2352 * Allocate memory for the transmit and receive rings, and then
2353 * the descriptors associated with each, called only once at attach.
2354 ************************************************************************/
2355 int
ixgbe_allocate_queues(struct ixgbe_softc * sc)2356 ixgbe_allocate_queues(struct ixgbe_softc *sc)
2357 {
2358 device_t dev = sc->dev;
2359 struct ix_queue *que;
2360 struct tx_ring *txr;
2361 struct rx_ring *rxr;
2362 int rsize, tsize, error = IXGBE_SUCCESS;
2363 int txconf = 0, rxconf = 0;
2364
2365 /* First, allocate the top level queue structs */
2366 sc->queues = kmem_zalloc(sizeof(struct ix_queue) * sc->num_queues,
2367 KM_SLEEP);
2368
2369 /* Second, allocate the TX ring struct memory */
2370 sc->tx_rings = kmem_zalloc(sizeof(struct tx_ring) * sc->num_queues,
2371 KM_SLEEP);
2372
2373 /* Third, allocate the RX ring */
2374 sc->rx_rings = kmem_zalloc(sizeof(struct rx_ring) * sc->num_queues,
2375 KM_SLEEP);
2376
2377 /* For the ring itself */
2378 tsize = sc->num_tx_desc * sizeof(union ixgbe_adv_tx_desc);
2379 KASSERT((tsize % DBA_ALIGN) == 0);
2380
2381 /*
2382 * Now set up the TX queues, txconf is needed to handle the
2383 * possibility that things fail midcourse and we need to
2384 * undo memory gracefully
2385 */
2386 for (int i = 0; i < sc->num_queues; i++, txconf++) {
2387 /* Set up some basics */
2388 txr = &sc->tx_rings[i];
2389 txr->sc = sc;
2390 txr->txr_interq = NULL;
2391 /* In case SR-IOV is enabled, align the index properly */
2392 #ifdef PCI_IOV
2393 txr->me = ixgbe_vf_que_index(sc->iov_mode, sc->pool,
2394 i);
2395 #else
2396 txr->me = i;
2397 #endif
2398 txr->num_desc = sc->num_tx_desc;
2399
2400 /* Initialize the TX side lock */
2401 mutex_init(&txr->tx_mtx, MUTEX_DEFAULT, IPL_NET);
2402
2403 if (ixgbe_dma_malloc(sc, tsize, &txr->txdma,
2404 BUS_DMA_NOWAIT)) {
2405 aprint_error_dev(dev,
2406 "Unable to allocate TX Descriptor memory\n");
2407 error = ENOMEM;
2408 goto err_tx_desc;
2409 }
2410 txr->tx_base = (union ixgbe_adv_tx_desc *)txr->txdma.dma_vaddr;
2411 bzero((void *)txr->tx_base, tsize);
2412
2413 /* Now allocate transmit buffers for the ring */
2414 if (ixgbe_allocate_transmit_buffers(txr)) {
2415 aprint_error_dev(dev,
2416 "Critical Failure setting up transmit buffers\n");
2417 error = ENOMEM;
2418 goto err_tx_desc;
2419 }
2420 if (!(sc->feat_en & IXGBE_FEATURE_LEGACY_TX)) {
2421 /* Allocate a buf ring */
2422 txr->txr_interq = pcq_create(IXGBE_BR_SIZE, KM_SLEEP);
2423 if (txr->txr_interq == NULL) {
2424 aprint_error_dev(dev,
2425 "Critical Failure setting up buf ring\n");
2426 error = ENOMEM;
2427 goto err_tx_desc;
2428 }
2429 }
2430 }
2431
2432 /*
2433 * Next the RX queues...
2434 */
2435 rsize = sc->num_rx_desc * sizeof(union ixgbe_adv_rx_desc);
2436 KASSERT((rsize % DBA_ALIGN) == 0);
2437 for (int i = 0; i < sc->num_queues; i++, rxconf++) {
2438 rxr = &sc->rx_rings[i];
2439 /* Set up some basics */
2440 rxr->sc = sc;
2441 #ifdef PCI_IOV
2442 /* In case SR-IOV is enabled, align the index properly */
2443 rxr->me = ixgbe_vf_que_index(sc->iov_mode, sc->pool,
2444 i);
2445 #else
2446 rxr->me = i;
2447 #endif
2448 rxr->num_desc = sc->num_rx_desc;
2449
2450 /* Initialize the RX side lock */
2451 mutex_init(&rxr->rx_mtx, MUTEX_DEFAULT, IPL_NET);
2452
2453 if (ixgbe_dma_malloc(sc, rsize, &rxr->rxdma,
2454 BUS_DMA_NOWAIT)) {
2455 aprint_error_dev(dev,
2456 "Unable to allocate RxDescriptor memory\n");
2457 error = ENOMEM;
2458 goto err_rx_desc;
2459 }
2460 rxr->rx_base = (union ixgbe_adv_rx_desc *)rxr->rxdma.dma_vaddr;
2461 bzero((void *)rxr->rx_base, rsize);
2462
2463 /* Allocate receive buffers for the ring */
2464 if (ixgbe_allocate_receive_buffers(rxr)) {
2465 aprint_error_dev(dev,
2466 "Critical Failure setting up receive buffers\n");
2467 error = ENOMEM;
2468 goto err_rx_desc;
2469 }
2470 }
2471
2472 /*
2473 * Finally set up the queue holding structs
2474 */
2475 for (int i = 0; i < sc->num_queues; i++) {
2476 que = &sc->queues[i];
2477 que->sc = sc;
2478 que->me = i;
2479 que->txr = &sc->tx_rings[i];
2480 que->rxr = &sc->rx_rings[i];
2481
2482 mutex_init(&que->dc_mtx, MUTEX_DEFAULT, IPL_NET);
2483 que->disabled_count = 0;
2484 }
2485
2486 return (0);
2487
2488 err_rx_desc:
2489 for (rxr = sc->rx_rings; rxconf > 0; rxr++, rxconf--)
2490 ixgbe_dma_free(sc, &rxr->rxdma);
2491 err_tx_desc:
2492 for (txr = sc->tx_rings; txconf > 0; txr++, txconf--)
2493 ixgbe_dma_free(sc, &txr->txdma);
2494 kmem_free(sc->rx_rings, sizeof(struct rx_ring) * sc->num_queues);
2495 kmem_free(sc->tx_rings, sizeof(struct tx_ring) * sc->num_queues);
2496 kmem_free(sc->queues, sizeof(struct ix_queue) * sc->num_queues);
2497 return (error);
2498 } /* ixgbe_allocate_queues */
2499
2500 /************************************************************************
2501 * ixgbe_free_queues
2502 *
2503 * Free descriptors for the transmit and receive rings, and then
2504 * the memory associated with each.
2505 ************************************************************************/
2506 void
ixgbe_free_queues(struct ixgbe_softc * sc)2507 ixgbe_free_queues(struct ixgbe_softc *sc)
2508 {
2509 struct ix_queue *que;
2510 int i;
2511
2512 ixgbe_free_transmit_structures(sc);
2513 ixgbe_free_receive_structures(sc);
2514 for (i = 0; i < sc->num_queues; i++) {
2515 que = &sc->queues[i];
2516 mutex_destroy(&que->dc_mtx);
2517 }
2518 kmem_free(sc->queues, sizeof(struct ix_queue) * sc->num_queues);
2519 } /* ixgbe_free_queues */
2520