xref: /netbsd-src/sys/dev/pci/ixgbe/ix_txrx.c (revision be6f2fcee7fefd8149c125c7283a8c03adc8149e)
1 /* $NetBSD: ix_txrx.c,v 1.117 2024/06/29 12:11:12 riastradh Exp $ */
2 
3 /******************************************************************************
4 
5   Copyright (c) 2001-2017, Intel Corporation
6   All rights reserved.
7 
8   Redistribution and use in source and binary forms, with or without
9   modification, are permitted provided that the following conditions are met:
10 
11    1. Redistributions of source code must retain the above copyright notice,
12       this list of conditions and the following disclaimer.
13 
14    2. Redistributions in binary form must reproduce the above copyright
15       notice, this list of conditions and the following disclaimer in the
16       documentation and/or other materials provided with the distribution.
17 
18    3. Neither the name of the Intel Corporation nor the names of its
19       contributors may be used to endorse or promote products derived from
20       this software without specific prior written permission.
21 
22   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
23   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
26   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
27   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
28   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
29   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
30   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
31   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
32   POSSIBILITY OF SUCH DAMAGE.
33 
34 ******************************************************************************/
35 /*$FreeBSD: head/sys/dev/ixgbe/ix_txrx.c 327031 2017-12-20 18:15:06Z erj $*/
36 
37 /*
38  * Copyright (c) 2011 The NetBSD Foundation, Inc.
39  * All rights reserved.
40  *
41  * This code is derived from software contributed to The NetBSD Foundation
42  * by Coyote Point Systems, Inc.
43  *
44  * Redistribution and use in source and binary forms, with or without
45  * modification, are permitted provided that the following conditions
46  * are met:
47  * 1. Redistributions of source code must retain the above copyright
48  *    notice, this list of conditions and the following disclaimer.
49  * 2. Redistributions in binary form must reproduce the above copyright
50  *    notice, this list of conditions and the following disclaimer in the
51  *    documentation and/or other materials provided with the distribution.
52  *
53  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
54  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
55  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
56  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
57  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
58  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
59  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
60  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
61  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
62  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
63  * POSSIBILITY OF SUCH DAMAGE.
64  */
65 
66 #include <sys/cdefs.h>
67 __KERNEL_RCSID(0, "$NetBSD: ix_txrx.c,v 1.117 2024/06/29 12:11:12 riastradh Exp $");
68 
69 #include "opt_inet.h"
70 #include "opt_inet6.h"
71 
72 #include "ixgbe.h"
73 
74 #ifdef RSC
75 /*
76  * HW RSC control:
77  *  this feature only works with
78  *  IPv4, and only on 82599 and later.
79  *  Also this will cause IP forwarding to
80  *  fail and that can't be controlled by
81  *  the stack as LRO can. For all these
82  *  reasons I've deemed it best to leave
83  *  this off and not bother with a tuneable
84  *  interface, this would need to be compiled
85  *  to enable.
86  */
87 static bool ixgbe_rsc_enable = FALSE;
88 #endif
89 
90 #ifdef IXGBE_FDIR
91 /*
92  * For Flow Director: this is the
93  * number of TX packets we sample
94  * for the filter pool, this means
95  * every 20th packet will be probed.
96  *
97  * This feature can be disabled by
98  * setting this to 0.
99  */
100 static int atr_sample_rate = 20;
101 #endif
102 
103 #define IXGBE_M_ADJ(sc, rxr, mp)					\
104 	if (sc->max_frame_size <= (rxr->mbuf_sz - ETHER_ALIGN))	\
105 		m_adj(mp, ETHER_ALIGN)
106 
107 /************************************************************************
108  *  Local Function prototypes
109  ************************************************************************/
110 static void          ixgbe_setup_transmit_ring(struct tx_ring *);
111 static void          ixgbe_free_transmit_buffers(struct tx_ring *);
112 static int           ixgbe_setup_receive_ring(struct rx_ring *);
113 static void          ixgbe_free_receive_buffers(struct rx_ring *);
114 static void          ixgbe_rx_checksum(u32, struct mbuf *, u32,
115                                        struct ixgbe_hw_stats *);
116 static void          ixgbe_refresh_mbufs(struct rx_ring *, int);
117 static void          ixgbe_drain(struct ifnet *, struct tx_ring *);
118 static int           ixgbe_xmit(struct tx_ring *, struct mbuf *);
119 static int           ixgbe_tx_ctx_setup(struct tx_ring *,
120                                         struct mbuf *, u32 *, u32 *);
121 static int           ixgbe_tso_setup(struct tx_ring *,
122                                      struct mbuf *, u32 *, u32 *);
123 static __inline void ixgbe_rx_discard(struct rx_ring *, int);
124 static __inline void ixgbe_rx_input(struct rx_ring *, struct ifnet *,
125                                     struct mbuf *, u32);
126 static int           ixgbe_dma_malloc(struct ixgbe_softc *, bus_size_t,
127                                       struct ixgbe_dma_alloc *, int);
128 static void          ixgbe_dma_free(struct ixgbe_softc *, struct ixgbe_dma_alloc *);
129 #ifdef RSC
130 static void	     ixgbe_setup_hw_rsc(struct rx_ring *);
131 #endif
132 
133 /************************************************************************
134  * ixgbe_legacy_start_locked - Transmit entry point
135  *
136  *   Called by the stack to initiate a transmit.
137  *   The driver will remain in this routine as long as there are
138  *   packets to transmit and transmit resources are available.
139  *   In case resources are not available, the stack is notified
140  *   and the packet is requeued.
141  ************************************************************************/
142 int
ixgbe_legacy_start_locked(struct ifnet * ifp,struct tx_ring * txr)143 ixgbe_legacy_start_locked(struct ifnet *ifp, struct tx_ring *txr)
144 {
145 	int rc;
146 	struct mbuf    *m_head;
147 	struct ixgbe_softc *sc = txr->sc;
148 
149 	IXGBE_TX_LOCK_ASSERT(txr);
150 
151 	if (sc->link_active != LINK_STATE_UP) {
152 		/*
153 		 * discard all packets buffered in IFQ to avoid
154 		 * sending old packets at next link up timing.
155 		 */
156 		ixgbe_drain(ifp, txr);
157 		return (ENETDOWN);
158 	}
159 	if ((ifp->if_flags & IFF_RUNNING) == 0)
160 		return (ENETDOWN);
161 	if (txr->txr_no_space)
162 		return (ENETDOWN);
163 
164 	while (!IFQ_IS_EMPTY(&ifp->if_snd)) {
165 		if (txr->tx_avail <= IXGBE_QUEUE_MIN_FREE)
166 			break;
167 
168 		IFQ_POLL(&ifp->if_snd, m_head);
169 		if (m_head == NULL)
170 			break;
171 
172 		if ((rc = ixgbe_xmit(txr, m_head)) == EAGAIN) {
173 			break;
174 		}
175 		IFQ_DEQUEUE(&ifp->if_snd, m_head);
176 		if (rc != 0) {
177 			m_freem(m_head);
178 			continue;
179 		}
180 
181 		/* Send a copy of the frame to the BPF listener */
182 		bpf_mtap(ifp, m_head, BPF_D_OUT);
183 	}
184 
185 	return IXGBE_SUCCESS;
186 } /* ixgbe_legacy_start_locked */
187 
188 /************************************************************************
189  * ixgbe_legacy_start
190  *
191  *   Called by the stack, this always uses the first tx ring,
192  *   and should not be used with multiqueue tx enabled.
193  ************************************************************************/
194 void
ixgbe_legacy_start(struct ifnet * ifp)195 ixgbe_legacy_start(struct ifnet *ifp)
196 {
197 	struct ixgbe_softc *sc = ifp->if_softc;
198 	struct tx_ring *txr = sc->tx_rings;
199 
200 	if (ifp->if_flags & IFF_RUNNING) {
201 		IXGBE_TX_LOCK(txr);
202 		ixgbe_legacy_start_locked(ifp, txr);
203 		IXGBE_TX_UNLOCK(txr);
204 	}
205 } /* ixgbe_legacy_start */
206 
207 /************************************************************************
208  * ixgbe_mq_start - Multiqueue Transmit Entry Point
209  *
210  *   (if_transmit function)
211  ************************************************************************/
212 int
ixgbe_mq_start(struct ifnet * ifp,struct mbuf * m)213 ixgbe_mq_start(struct ifnet *ifp, struct mbuf *m)
214 {
215 	struct ixgbe_softc *sc = ifp->if_softc;
216 	struct tx_ring	*txr;
217 	int		i;
218 #ifdef RSS
219 	uint32_t bucket_id;
220 #endif
221 
222 	/*
223 	 * When doing RSS, map it to the same outbound queue
224 	 * as the incoming flow would be mapped to.
225 	 *
226 	 * If everything is setup correctly, it should be the
227 	 * same bucket that the current CPU we're on is.
228 	 */
229 #ifdef RSS
230 	if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
231 		if ((sc->feat_en & IXGBE_FEATURE_RSS) &&
232 		    (rss_hash2bucket(m->m_pkthdr.flowid, M_HASHTYPE_GET(m),
233 		    &bucket_id) == 0)) {
234 			i = bucket_id % sc->num_queues;
235 #ifdef IXGBE_DEBUG
236 			if (bucket_id > sc->num_queues)
237 				if_printf(ifp,
238 				    "bucket_id (%d) > num_queues (%d)\n",
239 				    bucket_id, sc->num_queues);
240 #endif
241 		} else
242 			i = m->m_pkthdr.flowid % sc->num_queues;
243 	} else
244 #endif /* 0 */
245 		i = (cpu_index(curcpu()) % ncpu) % sc->num_queues;
246 
247 	/* Check for a hung queue and pick alternative */
248 	if (((1ULL << i) & sc->active_queues) == 0)
249 		i = ffs64(sc->active_queues);
250 
251 	txr = &sc->tx_rings[i];
252 
253 	if (__predict_false(!pcq_put(txr->txr_interq, m))) {
254 		m_freem(m);
255 		IXGBE_EVC_ADD(&txr->pcq_drops, 1);
256 		return ENOBUFS;
257 	}
258 #ifdef IXGBE_ALWAYS_TXDEFER
259 	kpreempt_disable();
260 	softint_schedule(txr->txr_si);
261 	kpreempt_enable();
262 #else
263 	if (IXGBE_TX_TRYLOCK(txr)) {
264 		ixgbe_mq_start_locked(ifp, txr);
265 		IXGBE_TX_UNLOCK(txr);
266 	} else {
267 		if (sc->txrx_use_workqueue) {
268 			u_int *enqueued;
269 
270 			/*
271 			 * This function itself is not called in interrupt
272 			 * context, however it can be called in fast softint
273 			 * context right after receiving forwarding packets.
274 			 * So, it is required to protect workqueue from twice
275 			 * enqueuing when the machine uses both spontaneous
276 			 * packets and forwarding packets.
277 			 */
278 			enqueued = percpu_getref(sc->txr_wq_enqueued);
279 			if (*enqueued == 0) {
280 				*enqueued = 1;
281 				percpu_putref(sc->txr_wq_enqueued);
282 				workqueue_enqueue(sc->txr_wq,
283 				    &txr->wq_cookie, curcpu());
284 			} else
285 				percpu_putref(sc->txr_wq_enqueued);
286 		} else {
287 			kpreempt_disable();
288 			softint_schedule(txr->txr_si);
289 			kpreempt_enable();
290 		}
291 	}
292 #endif
293 
294 	return (0);
295 } /* ixgbe_mq_start */
296 
297 /************************************************************************
298  * ixgbe_mq_start_locked
299  ************************************************************************/
300 int
ixgbe_mq_start_locked(struct ifnet * ifp,struct tx_ring * txr)301 ixgbe_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
302 {
303 	struct mbuf    *next;
304 	int            enqueued = 0, err = 0;
305 
306 	if (txr->sc->link_active != LINK_STATE_UP) {
307 		/*
308 		 * discard all packets buffered in txr_interq to avoid
309 		 * sending old packets at next link up timing.
310 		 */
311 		ixgbe_drain(ifp, txr);
312 		return (ENETDOWN);
313 	}
314 	if ((ifp->if_flags & IFF_RUNNING) == 0)
315 		return (ENETDOWN);
316 	if (txr->txr_no_space)
317 		return (ENETDOWN);
318 
319 	/* Process the queue */
320 	while ((next = pcq_get(txr->txr_interq)) != NULL) {
321 		if ((err = ixgbe_xmit(txr, next)) != 0) {
322 			m_freem(next);
323 			/* All errors are counted in ixgbe_xmit() */
324 			break;
325 		}
326 		enqueued++;
327 #if __FreeBSD_version >= 1100036
328 		/*
329 		 * Since we're looking at the tx ring, we can check
330 		 * to see if we're a VF by examining our tail register
331 		 * address.
332 		 */
333 		if ((txr->sc->feat_en & IXGBE_FEATURE_VF) &&
334 		    (next->m_flags & M_MCAST))
335 			if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
336 #endif
337 		/* Send a copy of the frame to the BPF listener */
338 		bpf_mtap(ifp, next, BPF_D_OUT);
339 		if ((ifp->if_flags & IFF_RUNNING) == 0)
340 			break;
341 	}
342 
343 	if (txr->tx_avail < IXGBE_TX_CLEANUP_THRESHOLD(txr->sc))
344 		ixgbe_txeof(txr);
345 
346 	return (err);
347 } /* ixgbe_mq_start_locked */
348 
349 /************************************************************************
350  * ixgbe_deferred_mq_start
351  *
352  *   Called from a softint and workqueue (indirectly) to drain queued
353  *   transmit packets.
354  ************************************************************************/
355 void
ixgbe_deferred_mq_start(void * arg)356 ixgbe_deferred_mq_start(void *arg)
357 {
358 	struct tx_ring *txr = arg;
359 	struct ixgbe_softc *sc = txr->sc;
360 	struct ifnet   *ifp = sc->ifp;
361 
362 	IXGBE_TX_LOCK(txr);
363 	if (pcq_peek(txr->txr_interq) != NULL)
364 		ixgbe_mq_start_locked(ifp, txr);
365 	IXGBE_TX_UNLOCK(txr);
366 } /* ixgbe_deferred_mq_start */
367 
368 /************************************************************************
369  * ixgbe_deferred_mq_start_work
370  *
371  *   Called from a workqueue to drain queued transmit packets.
372  ************************************************************************/
373 void
ixgbe_deferred_mq_start_work(struct work * wk,void * arg)374 ixgbe_deferred_mq_start_work(struct work *wk, void *arg)
375 {
376 	struct tx_ring *txr = container_of(wk, struct tx_ring, wq_cookie);
377 	struct ixgbe_softc *sc = txr->sc;
378 	u_int *enqueued = percpu_getref(sc->txr_wq_enqueued);
379 	*enqueued = 0;
380 	percpu_putref(sc->txr_wq_enqueued);
381 
382 	ixgbe_deferred_mq_start(txr);
383 } /* ixgbe_deferred_mq_start */
384 
385 /************************************************************************
386  * ixgbe_drain_all
387  ************************************************************************/
388 void
ixgbe_drain_all(struct ixgbe_softc * sc)389 ixgbe_drain_all(struct ixgbe_softc *sc)
390 {
391 	struct ifnet *ifp = sc->ifp;
392 	struct ix_queue *que = sc->queues;
393 
394 	for (int i = 0; i < sc->num_queues; i++, que++) {
395 		struct tx_ring  *txr = que->txr;
396 
397 		IXGBE_TX_LOCK(txr);
398 		ixgbe_drain(ifp, txr);
399 		IXGBE_TX_UNLOCK(txr);
400 	}
401 }
402 
403 /************************************************************************
404  * ixgbe_xmit
405  *
406  *   Maps the mbufs to tx descriptors, allowing the
407  *   TX engine to transmit the packets.
408  *
409  *   Return 0 on success, positive on failure
410  ************************************************************************/
411 static int
ixgbe_xmit(struct tx_ring * txr,struct mbuf * m_head)412 ixgbe_xmit(struct tx_ring *txr, struct mbuf *m_head)
413 {
414 	struct ixgbe_softc      *sc = txr->sc;
415 	struct ixgbe_tx_buf     *txbuf;
416 	union ixgbe_adv_tx_desc *txd = NULL;
417 	struct ifnet	        *ifp = sc->ifp;
418 	int                     i, j, error;
419 	int                     first;
420 	u32                     olinfo_status = 0, cmd_type_len;
421 	bool                    remap = TRUE;
422 	bus_dmamap_t            map;
423 
424 	/* Basic descriptor defines */
425 	cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA |
426 	    IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT);
427 
428 	if (vlan_has_tag(m_head))
429 		cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE;
430 
431 	/*
432 	 * Important to capture the first descriptor
433 	 * used because it will contain the index of
434 	 * the one we tell the hardware to report back
435 	 */
436 	first = txr->next_avail_desc;
437 	txbuf = &txr->tx_buffers[first];
438 	map = txbuf->map;
439 
440 	/*
441 	 * Map the packet for DMA.
442 	 */
443 retry:
444 	error = bus_dmamap_load_mbuf(txr->txtag->dt_dmat, map, m_head,
445 	    BUS_DMA_NOWAIT);
446 
447 	if (__predict_false(error)) {
448 		struct mbuf *m;
449 
450 		switch (error) {
451 		case EAGAIN:
452 			txr->q_eagain_tx_dma_setup++;
453 			return EAGAIN;
454 		case ENOMEM:
455 			txr->q_enomem_tx_dma_setup++;
456 			return EAGAIN;
457 		case EFBIG:
458 			/* Try it again? - one try */
459 			if (remap == TRUE) {
460 				remap = FALSE;
461 				/*
462 				 * XXX: m_defrag will choke on
463 				 * non-MCLBYTES-sized clusters
464 				 */
465 				txr->q_efbig_tx_dma_setup++;
466 				m = m_defrag(m_head, M_NOWAIT);
467 				if (m == NULL) {
468 					txr->q_mbuf_defrag_failed++;
469 					return ENOBUFS;
470 				}
471 				m_head = m;
472 				goto retry;
473 			} else {
474 				txr->q_efbig2_tx_dma_setup++;
475 				return error;
476 			}
477 		case EINVAL:
478 			txr->q_einval_tx_dma_setup++;
479 			return error;
480 		default:
481 			txr->q_other_tx_dma_setup++;
482 			return error;
483 		}
484 	}
485 
486 	/* Make certain there are enough descriptors */
487 	if (txr->tx_avail < (map->dm_nsegs + 2)) {
488 		txr->txr_no_space = true;
489 		IXGBE_EVC_ADD(&txr->no_desc_avail, 1);
490 		ixgbe_dmamap_unload(txr->txtag, txbuf->map);
491 		return EAGAIN;
492 	}
493 
494 	/*
495 	 * Set up the appropriate offload context if requested,
496 	 * this may consume one TX descriptor.
497 	 */
498 	error = ixgbe_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status);
499 	if (__predict_false(error)) {
500 		return (error);
501 	}
502 
503 #ifdef IXGBE_FDIR
504 	/* Do the flow director magic */
505 	if ((sc->feat_en & IXGBE_FEATURE_FDIR) &&
506 	    (txr->atr_sample) && (!sc->fdir_reinit)) {
507 		++txr->atr_count;
508 		if (txr->atr_count >= atr_sample_rate) {
509 			ixgbe_atr(txr, m_head);
510 			txr->atr_count = 0;
511 		}
512 	}
513 #endif
514 
515 	olinfo_status |= IXGBE_ADVTXD_CC;
516 	i = txr->next_avail_desc;
517 	for (j = 0; j < map->dm_nsegs; j++) {
518 		bus_size_t seglen;
519 		uint64_t segaddr;
520 
521 		txbuf = &txr->tx_buffers[i];
522 		txd = &txr->tx_base[i];
523 		seglen = map->dm_segs[j].ds_len;
524 		segaddr = htole64(map->dm_segs[j].ds_addr);
525 
526 		txd->read.buffer_addr = segaddr;
527 		txd->read.cmd_type_len = htole32(cmd_type_len | seglen);
528 		txd->read.olinfo_status = htole32(olinfo_status);
529 
530 		if (++i == txr->num_desc)
531 			i = 0;
532 	}
533 
534 	txd->read.cmd_type_len |= htole32(IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS);
535 	txr->tx_avail -= map->dm_nsegs;
536 	txr->next_avail_desc = i;
537 
538 	txbuf->m_head = m_head;
539 	/*
540 	 * Here we swap the map so the last descriptor,
541 	 * which gets the completion interrupt has the
542 	 * real map, and the first descriptor gets the
543 	 * unused map from this descriptor.
544 	 */
545 	txr->tx_buffers[first].map = txbuf->map;
546 	txbuf->map = map;
547 	bus_dmamap_sync(txr->txtag->dt_dmat, map, 0, m_head->m_pkthdr.len,
548 	    BUS_DMASYNC_PREWRITE);
549 
550 	/* Set the EOP descriptor that will be marked done */
551 	txbuf = &txr->tx_buffers[first];
552 	txbuf->eop = txd;
553 
554 	ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
555 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
556 	/*
557 	 * Advance the Transmit Descriptor Tail (Tdt), this tells the
558 	 * hardware that this frame is available to transmit.
559 	 */
560 	IXGBE_EVC_ADD(&txr->total_packets, 1);
561 	IXGBE_WRITE_REG(&sc->hw, txr->tail, i);
562 
563 	net_stat_ref_t nsr = IF_STAT_GETREF(ifp);
564 	if_statadd_ref(ifp, nsr, if_obytes, m_head->m_pkthdr.len);
565 	if (m_head->m_flags & M_MCAST)
566 		if_statinc_ref(ifp, nsr, if_omcasts);
567 	IF_STAT_PUTREF(ifp);
568 
569 	/* Mark queue as having work */
570 	if (txr->busy == 0)
571 		txr->busy = 1;
572 
573 	return (0);
574 } /* ixgbe_xmit */
575 
576 /************************************************************************
577  * ixgbe_drain
578  ************************************************************************/
579 static void
ixgbe_drain(struct ifnet * ifp,struct tx_ring * txr)580 ixgbe_drain(struct ifnet *ifp, struct tx_ring *txr)
581 {
582 	struct mbuf *m;
583 
584 	IXGBE_TX_LOCK_ASSERT(txr);
585 
586 	if (txr->me == 0) {
587 		while (!IFQ_IS_EMPTY(&ifp->if_snd)) {
588 			IFQ_DEQUEUE(&ifp->if_snd, m);
589 			m_freem(m);
590 			IF_DROP(&ifp->if_snd);
591 		}
592 	}
593 
594 	while ((m = pcq_get(txr->txr_interq)) != NULL) {
595 		m_freem(m);
596 		IXGBE_EVC_ADD(&txr->pcq_drops, 1);
597 	}
598 }
599 
600 /************************************************************************
601  * ixgbe_allocate_transmit_buffers
602  *
603  *   Allocate memory for tx_buffer structures. The tx_buffer stores all
604  *   the information needed to transmit a packet on the wire. This is
605  *   called only once at attach, setup is done every reset.
606  ************************************************************************/
607 static int
ixgbe_allocate_transmit_buffers(struct tx_ring * txr)608 ixgbe_allocate_transmit_buffers(struct tx_ring *txr)
609 {
610 	struct ixgbe_softc  *sc = txr->sc;
611 	device_t            dev = sc->dev;
612 	struct ixgbe_tx_buf *txbuf;
613 	int                 error, i;
614 
615 	/*
616 	 * Setup DMA descriptor areas.
617 	 */
618 	error = ixgbe_dma_tag_create(
619 	         /*      parent */ sc->osdep.dmat,
620 	         /*   alignment */ 1,
621 	         /*      bounds */ 0,
622 	         /*     maxsize */ IXGBE_TSO_SIZE,
623 	         /*   nsegments */ sc->num_segs,
624 	         /*  maxsegsize */ PAGE_SIZE,
625 	         /*       flags */ 0,
626 	                           &txr->txtag);
627 	if (error != 0) {
628 		aprint_error_dev(dev,"Unable to allocate TX DMA tag\n");
629 		goto fail;
630 	}
631 
632 	txr->tx_buffers = kmem_zalloc(sizeof(struct ixgbe_tx_buf) *
633 	    sc->num_tx_desc, KM_SLEEP);
634 
635 	/* Create the descriptor buffer dma maps */
636 	txbuf = txr->tx_buffers;
637 	for (i = 0; i < sc->num_tx_desc; i++, txbuf++) {
638 		error = ixgbe_dmamap_create(txr->txtag, 0, &txbuf->map);
639 		if (error != 0) {
640 			aprint_error_dev(dev,
641 			    "Unable to create TX DMA map (%d)\n", error);
642 			goto fail;
643 		}
644 	}
645 
646 	return 0;
647 fail:
648 	/* We free all, it handles case where we are in the middle */
649 #if 0 /* XXX was FreeBSD */
650 	ixgbe_free_transmit_structures(sc);
651 #else
652 	ixgbe_free_transmit_buffers(txr);
653 #endif
654 	return (error);
655 } /* ixgbe_allocate_transmit_buffers */
656 
657 /************************************************************************
658  * ixgbe_setup_transmit_ring - Initialize a transmit ring.
659  ************************************************************************/
660 static void
ixgbe_setup_transmit_ring(struct tx_ring * txr)661 ixgbe_setup_transmit_ring(struct tx_ring *txr)
662 {
663 	struct ixgbe_softc    *sc = txr->sc;
664 	struct ixgbe_tx_buf   *txbuf;
665 #ifdef DEV_NETMAP
666 	struct netmap_sc      *na = NA(sc->ifp);
667 	struct netmap_slot    *slot;
668 #endif /* DEV_NETMAP */
669 
670 	/* Clear the old ring contents */
671 	IXGBE_TX_LOCK(txr);
672 
673 #ifdef DEV_NETMAP
674 	if (sc->feat_en & IXGBE_FEATURE_NETMAP) {
675 		/*
676 		 * (under lock): if in netmap mode, do some consistency
677 		 * checks and set slot to entry 0 of the netmap ring.
678 		 */
679 		slot = netmap_reset(na, NR_TX, txr->me, 0);
680 	}
681 #endif /* DEV_NETMAP */
682 
683 	bzero((void *)txr->tx_base,
684 	    (sizeof(union ixgbe_adv_tx_desc)) * sc->num_tx_desc);
685 	/* Reset indices */
686 	txr->next_avail_desc = 0;
687 	txr->next_to_clean = 0;
688 
689 	/* Free any existing tx buffers. */
690 	txbuf = txr->tx_buffers;
691 	for (int i = 0; i < txr->num_desc; i++, txbuf++) {
692 		if (txbuf->m_head != NULL) {
693 			bus_dmamap_sync(txr->txtag->dt_dmat, txbuf->map,
694 			    0, txbuf->m_head->m_pkthdr.len,
695 			    BUS_DMASYNC_POSTWRITE);
696 			ixgbe_dmamap_unload(txr->txtag, txbuf->map);
697 			m_freem(txbuf->m_head);
698 			txbuf->m_head = NULL;
699 		}
700 
701 #ifdef DEV_NETMAP
702 		/*
703 		 * In netmap mode, set the map for the packet buffer.
704 		 * NOTE: Some drivers (not this one) also need to set
705 		 * the physical buffer address in the NIC ring.
706 		 * Slots in the netmap ring (indexed by "si") are
707 		 * kring->nkr_hwofs positions "ahead" wrt the
708 		 * corresponding slot in the NIC ring. In some drivers
709 		 * (not here) nkr_hwofs can be negative. Function
710 		 * netmap_idx_n2k() handles wraparounds properly.
711 		 */
712 		if ((sc->feat_en & IXGBE_FEATURE_NETMAP) && slot) {
713 			int si = netmap_idx_n2k(na->tx_rings[txr->me], i);
714 			netmap_load_map(na, txr->txtag,
715 			    txbuf->map, NMB(na, slot + si));
716 		}
717 #endif /* DEV_NETMAP */
718 
719 		/* Clear the EOP descriptor pointer */
720 		txbuf->eop = NULL;
721 	}
722 
723 #ifdef IXGBE_FDIR
724 	/* Set the rate at which we sample packets */
725 	if (sc->feat_en & IXGBE_FEATURE_FDIR)
726 		txr->atr_sample = atr_sample_rate;
727 #endif
728 
729 	/* Set number of descriptors available */
730 	txr->tx_avail = sc->num_tx_desc;
731 
732 	ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
733 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
734 	IXGBE_TX_UNLOCK(txr);
735 } /* ixgbe_setup_transmit_ring */
736 
737 /************************************************************************
738  * ixgbe_setup_transmit_structures - Initialize all transmit rings.
739  ************************************************************************/
740 int
ixgbe_setup_transmit_structures(struct ixgbe_softc * sc)741 ixgbe_setup_transmit_structures(struct ixgbe_softc *sc)
742 {
743 	struct tx_ring *txr = sc->tx_rings;
744 
745 	for (int i = 0; i < sc->num_queues; i++, txr++)
746 		ixgbe_setup_transmit_ring(txr);
747 
748 	return (0);
749 } /* ixgbe_setup_transmit_structures */
750 
751 /************************************************************************
752  * ixgbe_free_transmit_structures - Free all transmit rings.
753  ************************************************************************/
754 void
ixgbe_free_transmit_structures(struct ixgbe_softc * sc)755 ixgbe_free_transmit_structures(struct ixgbe_softc *sc)
756 {
757 	struct tx_ring *txr = sc->tx_rings;
758 
759 	for (int i = 0; i < sc->num_queues; i++, txr++) {
760 		ixgbe_free_transmit_buffers(txr);
761 		ixgbe_dma_free(sc, &txr->txdma);
762 		IXGBE_TX_LOCK_DESTROY(txr);
763 	}
764 	kmem_free(sc->tx_rings, sizeof(struct tx_ring) * sc->num_queues);
765 } /* ixgbe_free_transmit_structures */
766 
767 /************************************************************************
768  * ixgbe_free_transmit_buffers
769  *
770  *   Free transmit ring related data structures.
771  ************************************************************************/
772 static void
ixgbe_free_transmit_buffers(struct tx_ring * txr)773 ixgbe_free_transmit_buffers(struct tx_ring *txr)
774 {
775 	struct ixgbe_softc  *sc = txr->sc;
776 	struct ixgbe_tx_buf *tx_buffer;
777 	int                 i;
778 
779 	INIT_DEBUGOUT("ixgbe_free_transmit_buffers: begin");
780 
781 	if (txr->tx_buffers == NULL)
782 		return;
783 
784 	tx_buffer = txr->tx_buffers;
785 	for (i = 0; i < sc->num_tx_desc; i++, tx_buffer++) {
786 		if (tx_buffer->m_head != NULL) {
787 			bus_dmamap_sync(txr->txtag->dt_dmat, tx_buffer->map,
788 			    0, tx_buffer->m_head->m_pkthdr.len,
789 			    BUS_DMASYNC_POSTWRITE);
790 			ixgbe_dmamap_unload(txr->txtag, tx_buffer->map);
791 			m_freem(tx_buffer->m_head);
792 			tx_buffer->m_head = NULL;
793 			if (tx_buffer->map != NULL) {
794 				ixgbe_dmamap_destroy(txr->txtag,
795 				    tx_buffer->map);
796 				tx_buffer->map = NULL;
797 			}
798 		} else if (tx_buffer->map != NULL) {
799 			ixgbe_dmamap_unload(txr->txtag, tx_buffer->map);
800 			ixgbe_dmamap_destroy(txr->txtag, tx_buffer->map);
801 			tx_buffer->map = NULL;
802 		}
803 	}
804 	if (txr->txr_interq != NULL) {
805 		struct mbuf *m;
806 
807 		while ((m = pcq_get(txr->txr_interq)) != NULL)
808 			m_freem(m);
809 		pcq_destroy(txr->txr_interq);
810 	}
811 	if (txr->tx_buffers != NULL) {
812 		kmem_free(txr->tx_buffers,
813 		    sizeof(struct ixgbe_tx_buf) * sc->num_tx_desc);
814 		txr->tx_buffers = NULL;
815 	}
816 	if (txr->txtag != NULL) {
817 		ixgbe_dma_tag_destroy(txr->txtag);
818 		txr->txtag = NULL;
819 	}
820 } /* ixgbe_free_transmit_buffers */
821 
822 /************************************************************************
823  * ixgbe_tx_ctx_setup
824  *
825  *   Advanced Context Descriptor setup for VLAN, CSUM or TSO
826  ************************************************************************/
827 static int
ixgbe_tx_ctx_setup(struct tx_ring * txr,struct mbuf * mp,u32 * cmd_type_len,u32 * olinfo_status)828 ixgbe_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp,
829     u32 *cmd_type_len, u32 *olinfo_status)
830 {
831 	struct ixgbe_softc               *sc = txr->sc;
832 	struct ixgbe_adv_tx_context_desc *TXD;
833 	struct ether_vlan_header         *eh;
834 #ifdef INET
835 	struct ip                        *ip;
836 #endif
837 #ifdef INET6
838 	struct ip6_hdr                   *ip6;
839 #endif
840 	int                              ehdrlen, ip_hlen = 0;
841 	int                              offload = TRUE;
842 	int                              ctxd = txr->next_avail_desc;
843 	u32                              vlan_macip_lens = 0;
844 	u32                              type_tucmd_mlhl = 0;
845 	u16                              vtag = 0;
846 	u16                              etype;
847 	u8                               ipproto = 0;
848 	char                             *l3d;
849 
850 	/* First check if TSO is to be used */
851 	if (mp->m_pkthdr.csum_flags & (M_CSUM_TSOv4 | M_CSUM_TSOv6)) {
852 		int rv = ixgbe_tso_setup(txr, mp, cmd_type_len, olinfo_status);
853 
854 		if (rv != 0)
855 			IXGBE_EVC_ADD(&sc->tso_err, 1);
856 		return rv;
857 	}
858 
859 	if ((mp->m_pkthdr.csum_flags & M_CSUM_OFFLOAD) == 0)
860 		offload = FALSE;
861 
862 	/* Indicate the whole packet as payload when not doing TSO */
863 	*olinfo_status |= mp->m_pkthdr.len << IXGBE_ADVTXD_PAYLEN_SHIFT;
864 
865 	/*
866 	 * In advanced descriptors the vlan tag must
867 	 * be placed into the context descriptor. Hence
868 	 * we need to make one even if not doing offloads.
869 	 */
870 	if (vlan_has_tag(mp)) {
871 		vtag = htole16(vlan_get_tag(mp));
872 		vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
873 	} else if (!(txr->sc->feat_en & IXGBE_FEATURE_NEEDS_CTXD) &&
874 	           (offload == FALSE))
875 		return (0);
876 
877 	/*
878 	 * Determine where frame payload starts.
879 	 * Jump over vlan headers if already present,
880 	 * helpful for QinQ too.
881 	 */
882 	KASSERT(mp->m_len >= offsetof(struct ether_vlan_header, evl_tag));
883 	eh = mtod(mp, struct ether_vlan_header *);
884 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
885 		KASSERT(mp->m_len >= sizeof(struct ether_vlan_header));
886 		etype = ntohs(eh->evl_proto);
887 		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
888 	} else {
889 		etype = ntohs(eh->evl_encap_proto);
890 		ehdrlen = ETHER_HDR_LEN;
891 	}
892 
893 	/* Set the ether header length */
894 	vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
895 
896 	if (offload == FALSE)
897 		goto no_offloads;
898 
899 	/*
900 	 * If the first mbuf only includes the ethernet header,
901 	 * jump to the next one
902 	 * XXX: This assumes the stack splits mbufs containing headers
903 	 *      on header boundaries
904 	 * XXX: And assumes the entire IP header is contained in one mbuf
905 	 */
906 	if (mp->m_len == ehdrlen && mp->m_next)
907 		l3d = mtod(mp->m_next, char *);
908 	else
909 		l3d = mtod(mp, char *) + ehdrlen;
910 
911 	switch (etype) {
912 #ifdef INET
913 	case ETHERTYPE_IP:
914 		ip = (struct ip *)(l3d);
915 		ip_hlen = ip->ip_hl << 2;
916 		ipproto = ip->ip_p;
917 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
918 		KASSERT((mp->m_pkthdr.csum_flags & M_CSUM_IPv4) == 0 ||
919 		    ip->ip_sum == 0);
920 		break;
921 #endif
922 #ifdef INET6
923 	case ETHERTYPE_IPV6:
924 		ip6 = (struct ip6_hdr *)(l3d);
925 		ip_hlen = sizeof(struct ip6_hdr);
926 		ipproto = ip6->ip6_nxt;
927 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
928 		break;
929 #endif
930 	default:
931 		offload = false;
932 		break;
933 	}
934 
935 	if ((mp->m_pkthdr.csum_flags & M_CSUM_IPv4) != 0)
936 		*olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
937 
938 	vlan_macip_lens |= ip_hlen;
939 
940 	/* No support for offloads for non-L4 next headers */
941 	switch (ipproto) {
942 	case IPPROTO_TCP:
943 		if (mp->m_pkthdr.csum_flags &
944 		    (M_CSUM_TCPv4 | M_CSUM_TCPv6))
945 			type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
946 		else
947 			offload = false;
948 		break;
949 	case IPPROTO_UDP:
950 		if (mp->m_pkthdr.csum_flags &
951 		    (M_CSUM_UDPv4 | M_CSUM_UDPv6))
952 			type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP;
953 		else
954 			offload = false;
955 		break;
956 	default:
957 		offload = false;
958 		break;
959 	}
960 
961 	if (offload) /* Insert L4 checksum into data descriptors */
962 		*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
963 
964 no_offloads:
965 	type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
966 
967 	/* Now ready a context descriptor */
968 	TXD = (struct ixgbe_adv_tx_context_desc *)&txr->tx_base[ctxd];
969 
970 	/* Now copy bits into descriptor */
971 	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
972 	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
973 	TXD->seqnum_seed = htole32(0);
974 	TXD->mss_l4len_idx = htole32(0);
975 
976 	/* We've consumed the first desc, adjust counters */
977 	if (++ctxd == txr->num_desc)
978 		ctxd = 0;
979 	txr->next_avail_desc = ctxd;
980 	--txr->tx_avail;
981 
982 	return (0);
983 } /* ixgbe_tx_ctx_setup */
984 
985 /************************************************************************
986  * ixgbe_tso_setup
987  *
988  *   Setup work for hardware segmentation offload (TSO) on
989  *   adapters using advanced tx descriptors
990  ************************************************************************/
991 static int
ixgbe_tso_setup(struct tx_ring * txr,struct mbuf * mp,u32 * cmd_type_len,u32 * olinfo_status)992 ixgbe_tso_setup(struct tx_ring *txr, struct mbuf *mp, u32 *cmd_type_len,
993     u32 *olinfo_status)
994 {
995 	struct ixgbe_adv_tx_context_desc *TXD;
996 	struct ether_vlan_header         *eh;
997 #ifdef INET6
998 	struct ip6_hdr                   *ip6;
999 #endif
1000 #ifdef INET
1001 	struct ip                        *ip;
1002 #endif
1003 	struct tcphdr                    *th;
1004 	int                              ctxd, ehdrlen, ip_hlen, tcp_hlen;
1005 	u32                              vlan_macip_lens = 0;
1006 	u32                              type_tucmd_mlhl = 0;
1007 	u32                              mss_l4len_idx = 0, paylen;
1008 	u16                              vtag = 0, eh_type;
1009 
1010 	/*
1011 	 * Determine where frame payload starts.
1012 	 * Jump over vlan headers if already present
1013 	 */
1014 	eh = mtod(mp, struct ether_vlan_header *);
1015 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
1016 		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
1017 		eh_type = eh->evl_proto;
1018 	} else {
1019 		ehdrlen = ETHER_HDR_LEN;
1020 		eh_type = eh->evl_encap_proto;
1021 	}
1022 
1023 	switch (ntohs(eh_type)) {
1024 #ifdef INET
1025 	case ETHERTYPE_IP:
1026 		ip = (struct ip *)(mp->m_data + ehdrlen);
1027 		if (ip->ip_p != IPPROTO_TCP)
1028 			return (ENXIO);
1029 		ip->ip_sum = 0;
1030 		ip_hlen = ip->ip_hl << 2;
1031 		th = (struct tcphdr *)((char *)ip + ip_hlen);
1032 		th->th_sum = in_cksum_phdr(ip->ip_src.s_addr,
1033 		    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
1034 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
1035 		/* Tell transmit desc to also do IPv4 checksum. */
1036 		*olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
1037 		break;
1038 #endif
1039 #ifdef INET6
1040 	case ETHERTYPE_IPV6:
1041 		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
1042 		/* XXX-BZ For now we do not pretend to support ext. hdrs. */
1043 		if (ip6->ip6_nxt != IPPROTO_TCP)
1044 			return (ENXIO);
1045 		ip_hlen = sizeof(struct ip6_hdr);
1046 		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
1047 		th = (struct tcphdr *)((char *)ip6 + ip_hlen);
1048 		th->th_sum = in6_cksum_phdr(&ip6->ip6_src,
1049 		    &ip6->ip6_dst, 0, htonl(IPPROTO_TCP));
1050 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
1051 		break;
1052 #endif
1053 	default:
1054 		panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
1055 		    __func__, ntohs(eh_type));
1056 		break;
1057 	}
1058 
1059 	ctxd = txr->next_avail_desc;
1060 	TXD = (struct ixgbe_adv_tx_context_desc *)&txr->tx_base[ctxd];
1061 
1062 	tcp_hlen = th->th_off << 2;
1063 
1064 	/* This is used in the transmit desc in encap */
1065 	paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
1066 
1067 	/* VLAN MACLEN IPLEN */
1068 	if (vlan_has_tag(mp)) {
1069 		vtag = htole16(vlan_get_tag(mp));
1070 		vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
1071 	}
1072 
1073 	vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
1074 	vlan_macip_lens |= ip_hlen;
1075 	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
1076 
1077 	/* ADV DTYPE TUCMD */
1078 	type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
1079 	type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
1080 	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
1081 
1082 	/* MSS L4LEN IDX */
1083 	mss_l4len_idx |= (mp->m_pkthdr.segsz << IXGBE_ADVTXD_MSS_SHIFT);
1084 	mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT);
1085 	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
1086 
1087 	TXD->seqnum_seed = htole32(0);
1088 
1089 	if (++ctxd == txr->num_desc)
1090 		ctxd = 0;
1091 
1092 	txr->tx_avail--;
1093 	txr->next_avail_desc = ctxd;
1094 	*cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
1095 	*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
1096 	*olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT;
1097 	IXGBE_EVC_ADD(&txr->tso_tx, 1);
1098 
1099 	return (0);
1100 } /* ixgbe_tso_setup */
1101 
1102 
1103 /************************************************************************
1104  * ixgbe_txeof
1105  *
1106  *   Examine each tx_buffer in the used queue. If the hardware is done
1107  *   processing the packet then free associated resources. The
1108  *   tx_buffer is put back on the free queue.
1109  ************************************************************************/
1110 bool
ixgbe_txeof(struct tx_ring * txr)1111 ixgbe_txeof(struct tx_ring *txr)
1112 {
1113 	struct ixgbe_softc	*sc = txr->sc;
1114 	struct ifnet		*ifp = sc->ifp;
1115 	struct ixgbe_tx_buf	*buf;
1116 	union ixgbe_adv_tx_desc *txd;
1117 	u32			work, processed = 0;
1118 	u32			limit = sc->tx_process_limit;
1119 	u16			avail;
1120 
1121 	KASSERT(mutex_owned(&txr->tx_mtx));
1122 
1123 #ifdef DEV_NETMAP
1124 	if ((sc->feat_en & IXGBE_FEATURE_NETMAP) &&
1125 	    (sc->ifp->if_capenable & IFCAP_NETMAP)) {
1126 		struct netmap_sc *na = NA(sc->ifp);
1127 		struct netmap_kring *kring = na->tx_rings[txr->me];
1128 		txd = txr->tx_base;
1129 		bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1130 		    BUS_DMASYNC_POSTREAD);
1131 		/*
1132 		 * In netmap mode, all the work is done in the context
1133 		 * of the client thread. Interrupt handlers only wake up
1134 		 * clients, which may be sleeping on individual rings
1135 		 * or on a global resource for all rings.
1136 		 * To implement tx interrupt mitigation, we wake up the client
1137 		 * thread roughly every half ring, even if the NIC interrupts
1138 		 * more frequently. This is implemented as follows:
1139 		 * - ixgbe_txsync() sets kring->nr_kflags with the index of
1140 		 *   the slot that should wake up the thread (nkr_num_slots
1141 		 *   means the user thread should not be woken up);
1142 		 * - the driver ignores tx interrupts unless netmap_mitigate=0
1143 		 *   or the slot has the DD bit set.
1144 		 */
1145 		if (kring->nr_kflags < kring->nkr_num_slots &&
1146 		    le32toh(txd[kring->nr_kflags].wb.status) & IXGBE_TXD_STAT_DD) {
1147 			netmap_tx_irq(ifp, txr->me);
1148 		}
1149 		return false;
1150 	}
1151 #endif /* DEV_NETMAP */
1152 
1153 	if (txr->tx_avail == txr->num_desc) {
1154 		txr->busy = 0;
1155 		return false;
1156 	}
1157 
1158 	/* Get work starting point */
1159 	work = txr->next_to_clean;
1160 	buf = &txr->tx_buffers[work];
1161 	txd = &txr->tx_base[work];
1162 	work -= txr->num_desc; /* The distance to ring end */
1163 	avail = txr->tx_avail;
1164 	ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1165 	    BUS_DMASYNC_POSTREAD);
1166 
1167 	do {
1168 		union ixgbe_adv_tx_desc *eop = buf->eop;
1169 		if (eop == NULL) /* No work */
1170 			break;
1171 
1172 		if ((le32toh(eop->wb.status) & IXGBE_TXD_STAT_DD) == 0)
1173 			break;	/* I/O not complete */
1174 
1175 		if (buf->m_head) {
1176 			txr->bytes += buf->m_head->m_pkthdr.len;
1177 			bus_dmamap_sync(txr->txtag->dt_dmat, buf->map,
1178 			    0, buf->m_head->m_pkthdr.len,
1179 			    BUS_DMASYNC_POSTWRITE);
1180 			ixgbe_dmamap_unload(txr->txtag, buf->map);
1181 			m_freem(buf->m_head);
1182 			buf->m_head = NULL;
1183 		}
1184 		buf->eop = NULL;
1185 		++avail;
1186 
1187 		/* We clean the range if multi segment */
1188 		while (txd != eop) {
1189 			++txd;
1190 			++buf;
1191 			++work;
1192 			/* wrap the ring? */
1193 			if (__predict_false(!work)) {
1194 				work -= txr->num_desc;
1195 				buf = txr->tx_buffers;
1196 				txd = txr->tx_base;
1197 			}
1198 			if (buf->m_head) {
1199 				txr->bytes +=
1200 				    buf->m_head->m_pkthdr.len;
1201 				bus_dmamap_sync(txr->txtag->dt_dmat,
1202 				    buf->map,
1203 				    0, buf->m_head->m_pkthdr.len,
1204 				    BUS_DMASYNC_POSTWRITE);
1205 				ixgbe_dmamap_unload(txr->txtag,
1206 				    buf->map);
1207 				m_freem(buf->m_head);
1208 				buf->m_head = NULL;
1209 			}
1210 			++avail;
1211 			buf->eop = NULL;
1212 
1213 		}
1214 		++processed;
1215 
1216 		/* Try the next packet */
1217 		++txd;
1218 		++buf;
1219 		++work;
1220 		/* reset with a wrap */
1221 		if (__predict_false(!work)) {
1222 			work -= txr->num_desc;
1223 			buf = txr->tx_buffers;
1224 			txd = txr->tx_base;
1225 		}
1226 		prefetch(txd);
1227 	} while (__predict_true(--limit));
1228 
1229 	ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1230 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1231 
1232 	work += txr->num_desc;
1233 	txr->next_to_clean = work;
1234 	if (processed) {
1235 		txr->tx_avail = avail;
1236 		txr->txr_no_space = false;
1237 		txr->packets += processed;
1238 		if_statadd(ifp, if_opackets, processed);
1239 	}
1240 
1241 	/*
1242 	 * Queue Hang detection, we know there's
1243 	 * work outstanding or the first return
1244 	 * would have been taken, so increment busy
1245 	 * if nothing managed to get cleaned, then
1246 	 * in local_timer it will be checked and
1247 	 * marked as HUNG if it exceeds a MAX attempt.
1248 	 */
1249 	if ((processed == 0) && (txr->busy != IXGBE_QUEUE_HUNG))
1250 		++txr->busy;
1251 	/*
1252 	 * If anything gets cleaned we reset state to 1,
1253 	 * note this will turn off HUNG if its set.
1254 	 */
1255 	if (processed)
1256 		txr->busy = 1;
1257 
1258 	if (txr->tx_avail == txr->num_desc)
1259 		txr->busy = 0;
1260 
1261 	return ((limit > 0) ? false : true);
1262 } /* ixgbe_txeof */
1263 
1264 #ifdef RSC
1265 /************************************************************************
1266  * ixgbe_rsc_count
1267  *
1268  *   Used to detect a descriptor that has been merged by Hardware RSC.
1269  ************************************************************************/
1270 static inline u32
ixgbe_rsc_count(union ixgbe_adv_rx_desc * rx)1271 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
1272 {
1273 	return (le32toh(rx->wb.lower.lo_dword.data) &
1274 	    IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
1275 } /* ixgbe_rsc_count */
1276 
1277 /************************************************************************
1278  * ixgbe_setup_hw_rsc
1279  *
1280  *   Initialize Hardware RSC (LRO) feature on 82599
1281  *   for an RX ring, this is toggled by the LRO capability
1282  *   even though it is transparent to the stack.
1283  *
1284  *   NOTE: Since this HW feature only works with IPv4 and
1285  *         testing has shown soft LRO to be as effective,
1286  *         this feature will be disabled by default.
1287  ************************************************************************/
1288 static void
ixgbe_setup_hw_rsc(struct rx_ring * rxr)1289 ixgbe_setup_hw_rsc(struct rx_ring *rxr)
1290 {
1291 	struct ixgbe_softc *sc = rxr->sc;
1292 	struct ixgbe_hw	*hw = &sc->hw;
1293 	u32		rscctrl, rdrxctl;
1294 
1295 	/* If turning LRO/RSC off we need to disable it */
1296 	if ((sc->ifp->if_capenable & IFCAP_LRO) == 0) {
1297 		rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
1298 		rscctrl &= ~IXGBE_RSCCTL_RSCEN;
1299 		return;
1300 	}
1301 
1302 	rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
1303 	rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
1304 #ifdef DEV_NETMAP
1305 	/* Always strip CRC unless Netmap disabled it */
1306 	if (!(sc->feat_en & IXGBE_FEATURE_NETMAP) ||
1307 	    !(sc->ifp->if_capenable & IFCAP_NETMAP) ||
1308 	    ix_crcstrip)
1309 #endif /* DEV_NETMAP */
1310 		rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
1311 	rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
1312 	IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
1313 
1314 	rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
1315 	rscctrl |= IXGBE_RSCCTL_RSCEN;
1316 	/*
1317 	 * Limit the total number of descriptors that
1318 	 * can be combined, so it does not exceed 64K
1319 	 */
1320 	if (rxr->mbuf_sz == MCLBYTES)
1321 		rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
1322 	else if (rxr->mbuf_sz == MJUMPAGESIZE)
1323 		rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
1324 	else if (rxr->mbuf_sz == MJUM9BYTES)
1325 		rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
1326 	else  /* Using 16K cluster */
1327 		rscctrl |= IXGBE_RSCCTL_MAXDESC_1;
1328 
1329 	IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl);
1330 
1331 	/* Enable TCP header recognition */
1332 	IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0),
1333 	    (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) | IXGBE_PSRTYPE_TCPHDR));
1334 
1335 	/* Disable RSC for ACK packets */
1336 	IXGBE_WRITE_REG(hw, IXGBE_RSCDBU,
1337 	    (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU)));
1338 
1339 	rxr->hw_rsc = TRUE;
1340 } /* ixgbe_setup_hw_rsc */
1341 #endif
1342 
1343 /************************************************************************
1344  * ixgbe_refresh_mbufs
1345  *
1346  *   Refresh mbuf buffers for RX descriptor rings
1347  *    - now keeps its own state so discards due to resource
1348  *      exhaustion are unnecessary, if an mbuf cannot be obtained
1349  *      it just returns, keeping its placeholder, thus it can simply
1350  *      be recalled to try again.
1351  ************************************************************************/
1352 static void
ixgbe_refresh_mbufs(struct rx_ring * rxr,int limit)1353 ixgbe_refresh_mbufs(struct rx_ring *rxr, int limit)
1354 {
1355 	struct ixgbe_softc  *sc = rxr->sc;
1356 	struct ixgbe_rx_buf *rxbuf;
1357 	struct mbuf         *mp;
1358 	int                 i, error;
1359 	bool                refreshed = false;
1360 
1361 	i = rxr->next_to_refresh;
1362 	/* next_to_refresh points to the previous one */
1363 	if (++i == rxr->num_desc)
1364 		i = 0;
1365 
1366 	while (i != limit) {
1367 		rxbuf = &rxr->rx_buffers[i];
1368 		if (__predict_false(rxbuf->buf == NULL)) {
1369 			mp = ixgbe_getcl();
1370 			if (mp == NULL) {
1371 				IXGBE_EVC_ADD(&rxr->no_mbuf, 1);
1372 				goto update;
1373 			}
1374 			mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1375 			IXGBE_M_ADJ(sc, rxr, mp);
1376 		} else
1377 			mp = rxbuf->buf;
1378 
1379 		/* If we're dealing with an mbuf that was copied rather
1380 		 * than replaced, there's no need to go through busdma.
1381 		 */
1382 		if ((rxbuf->flags & IXGBE_RX_COPY) == 0) {
1383 			/* Get the memory mapping */
1384 			ixgbe_dmamap_unload(rxr->ptag, rxbuf->pmap);
1385 			error = bus_dmamap_load_mbuf(rxr->ptag->dt_dmat,
1386 			    rxbuf->pmap, mp, BUS_DMA_NOWAIT);
1387 			if (__predict_false(error != 0)) {
1388 				device_printf(sc->dev, "Refresh mbufs: "
1389 				    "payload dmamap load failure - %d\n",
1390 				    error);
1391 				m_free(mp);
1392 				rxbuf->buf = NULL;
1393 				goto update;
1394 			}
1395 			rxbuf->buf = mp;
1396 			bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
1397 			    0, mp->m_pkthdr.len, BUS_DMASYNC_PREREAD);
1398 			rxbuf->addr = rxr->rx_base[i].read.pkt_addr =
1399 			    htole64(rxbuf->pmap->dm_segs[0].ds_addr);
1400 		} else {
1401 			rxr->rx_base[i].read.pkt_addr = rxbuf->addr;
1402 			rxbuf->flags &= ~IXGBE_RX_COPY;
1403 		}
1404 
1405 		refreshed = true;
1406 		/* next_to_refresh points to the previous one */
1407 		rxr->next_to_refresh = i;
1408 		if (++i == rxr->num_desc)
1409 			i = 0;
1410 	}
1411 
1412 update:
1413 	if (refreshed) /* Update hardware tail index */
1414 		IXGBE_WRITE_REG(&sc->hw, rxr->tail, rxr->next_to_refresh);
1415 
1416 	return;
1417 } /* ixgbe_refresh_mbufs */
1418 
1419 /************************************************************************
1420  * ixgbe_allocate_receive_buffers
1421  *
1422  *   Allocate memory for rx_buffer structures. Since we use one
1423  *   rx_buffer per received packet, the maximum number of rx_buffer's
1424  *   that we'll need is equal to the number of receive descriptors
1425  *   that we've allocated.
1426  ************************************************************************/
1427 static int
ixgbe_allocate_receive_buffers(struct rx_ring * rxr)1428 ixgbe_allocate_receive_buffers(struct rx_ring *rxr)
1429 {
1430 	struct ixgbe_softc  *sc = rxr->sc;
1431 	device_t            dev = sc->dev;
1432 	struct ixgbe_rx_buf *rxbuf;
1433 	int                 bsize, error;
1434 
1435 	bsize = sizeof(struct ixgbe_rx_buf) * rxr->num_desc;
1436 	rxr->rx_buffers = kmem_zalloc(bsize, KM_SLEEP);
1437 
1438 	error = ixgbe_dma_tag_create(
1439 	         /*      parent */ sc->osdep.dmat,
1440 	         /*   alignment */ 1,
1441 	         /*      bounds */ 0,
1442 	         /*     maxsize */ MJUM16BYTES,
1443 	         /*   nsegments */ 1,
1444 	         /*  maxsegsize */ MJUM16BYTES,
1445 	         /*       flags */ 0,
1446 	                           &rxr->ptag);
1447 	if (error != 0) {
1448 		aprint_error_dev(dev, "Unable to create RX DMA tag\n");
1449 		goto fail;
1450 	}
1451 
1452 	for (int i = 0; i < rxr->num_desc; i++, rxbuf++) {
1453 		rxbuf = &rxr->rx_buffers[i];
1454 		error = ixgbe_dmamap_create(rxr->ptag, 0, &rxbuf->pmap);
1455 		if (error) {
1456 			aprint_error_dev(dev, "Unable to create RX dma map\n");
1457 			goto fail;
1458 		}
1459 	}
1460 
1461 	return (0);
1462 
1463 fail:
1464 	/* Frees all, but can handle partial completion */
1465 	ixgbe_free_receive_structures(sc);
1466 
1467 	return (error);
1468 } /* ixgbe_allocate_receive_buffers */
1469 
1470 /************************************************************************
1471  * ixgbe_free_receive_ring
1472  ************************************************************************/
1473 static void
ixgbe_free_receive_ring(struct rx_ring * rxr)1474 ixgbe_free_receive_ring(struct rx_ring *rxr)
1475 {
1476 	for (int i = 0; i < rxr->num_desc; i++) {
1477 		ixgbe_rx_discard(rxr, i);
1478 	}
1479 } /* ixgbe_free_receive_ring */
1480 
1481 /************************************************************************
1482  * ixgbe_setup_receive_ring
1483  *
1484  *   Initialize a receive ring and its buffers.
1485  ************************************************************************/
1486 static int
ixgbe_setup_receive_ring(struct rx_ring * rxr)1487 ixgbe_setup_receive_ring(struct rx_ring *rxr)
1488 {
1489 	struct ixgbe_softc    *sc;
1490 	struct ixgbe_rx_buf   *rxbuf;
1491 #ifdef LRO
1492 	struct ifnet          *ifp;
1493 	struct lro_ctrl       *lro = &rxr->lro;
1494 #endif /* LRO */
1495 #ifdef DEV_NETMAP
1496 	struct netmap_sc      *na = NA(rxr->sc->ifp);
1497 	struct netmap_slot    *slot;
1498 #endif /* DEV_NETMAP */
1499 	int                   rsize, error = 0;
1500 
1501 	sc = rxr->sc;
1502 #ifdef LRO
1503 	ifp = sc->ifp;
1504 #endif /* LRO */
1505 
1506 	/* Clear the ring contents */
1507 	IXGBE_RX_LOCK(rxr);
1508 
1509 #ifdef DEV_NETMAP
1510 	if (sc->feat_en & IXGBE_FEATURE_NETMAP)
1511 		slot = netmap_reset(na, NR_RX, rxr->me, 0);
1512 #endif /* DEV_NETMAP */
1513 
1514 	rsize = sc->num_rx_desc * sizeof(union ixgbe_adv_rx_desc);
1515 	KASSERT((rsize % DBA_ALIGN) == 0);
1516 	bzero((void *)rxr->rx_base, rsize);
1517 	/* Cache the size */
1518 	rxr->mbuf_sz = sc->rx_mbuf_sz;
1519 
1520 	/* Free current RX buffer structs and their mbufs */
1521 	ixgbe_free_receive_ring(rxr);
1522 
1523 	/* Now replenish the mbufs */
1524 	for (int i = 0; i < rxr->num_desc; i++) {
1525 		struct mbuf *mp;
1526 
1527 		rxbuf = &rxr->rx_buffers[i];
1528 
1529 #ifdef DEV_NETMAP
1530 		/*
1531 		 * In netmap mode, fill the map and set the buffer
1532 		 * address in the NIC ring, considering the offset
1533 		 * between the netmap and NIC rings (see comment in
1534 		 * ixgbe_setup_transmit_ring() ). No need to allocate
1535 		 * an mbuf, so end the block with a continue;
1536 		 */
1537 		if ((sc->feat_en & IXGBE_FEATURE_NETMAP) && slot) {
1538 			int sj = netmap_idx_n2k(na->rx_rings[rxr->me], i);
1539 			uint64_t paddr;
1540 			void *addr;
1541 
1542 			addr = PNMB(na, slot + sj, &paddr);
1543 			netmap_load_map(na, rxr->ptag, rxbuf->pmap, addr);
1544 			/* Update descriptor and the cached value */
1545 			rxr->rx_base[i].read.pkt_addr = htole64(paddr);
1546 			rxbuf->addr = htole64(paddr);
1547 			continue;
1548 		}
1549 #endif /* DEV_NETMAP */
1550 
1551 		rxbuf->flags = 0;
1552 		rxbuf->buf = ixgbe_getcl();
1553 		if (rxbuf->buf == NULL) {
1554 			IXGBE_EVC_ADD(&rxr->no_mbuf, 1);
1555 			error = ENOBUFS;
1556 			goto fail;
1557 		}
1558 		mp = rxbuf->buf;
1559 		mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1560 		IXGBE_M_ADJ(sc, rxr, mp);
1561 		/* Get the memory mapping */
1562 		error = bus_dmamap_load_mbuf(rxr->ptag->dt_dmat, rxbuf->pmap,
1563 		    mp, BUS_DMA_NOWAIT);
1564 		if (error != 0) {
1565 			/*
1566 			 * Clear this entry for later cleanup in
1567 			 * ixgbe_discard() which is called via
1568 			 * ixgbe_free_receive_ring().
1569 			 */
1570 			m_freem(mp);
1571 			rxbuf->buf = NULL;
1572 			goto fail;
1573 		}
1574 		bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
1575 		    0, mp->m_pkthdr.len, BUS_DMASYNC_PREREAD);
1576 		/* Update the descriptor and the cached value */
1577 		rxr->rx_base[i].read.pkt_addr =
1578 		    htole64(rxbuf->pmap->dm_segs[0].ds_addr);
1579 		rxbuf->addr = htole64(rxbuf->pmap->dm_segs[0].ds_addr);
1580 	}
1581 
1582 	/* Setup our descriptor indices */
1583 	rxr->next_to_check = 0;
1584 	rxr->next_to_refresh = sc->num_rx_desc - 1; /* Fully allocated */
1585 #ifdef LRO
1586 	rxr->lro_enabled = FALSE;
1587 #endif
1588 	rxr->discard_multidesc = false;
1589 	IXGBE_EVC_STORE(&rxr->rx_copies, 0);
1590 #if 0 /* NetBSD */
1591 	IXGBE_EVC_STORE(&rxr->rx_bytes, 0);
1592 #if 1	/* Fix inconsistency */
1593 	IXGBE_EVC_STORE(&rxr->rx_packets, 0);
1594 #endif
1595 #endif
1596 	rxr->vtag_strip = FALSE;
1597 
1598 	ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1599 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1600 
1601 	/*
1602 	 * Now set up the LRO interface
1603 	 */
1604 #ifdef RSC
1605 	if (ixgbe_rsc_enable)
1606 		ixgbe_setup_hw_rsc(rxr);
1607 #endif
1608 #ifdef LRO
1609 #ifdef RSC
1610 	else
1611 #endif
1612 	if (ifp->if_capenable & IFCAP_LRO) {
1613 		device_t dev = sc->dev;
1614 		int err = tcp_lro_init(lro);
1615 		if (err) {
1616 			device_printf(dev, "LRO Initialization failed!\n");
1617 			goto fail;
1618 		}
1619 		INIT_DEBUGOUT("RX Soft LRO Initialized\n");
1620 		rxr->lro_enabled = TRUE;
1621 		lro->ifp = sc->ifp;
1622 	}
1623 #endif /* LRO */
1624 
1625 	IXGBE_RX_UNLOCK(rxr);
1626 
1627 	return (0);
1628 
1629 fail:
1630 	ixgbe_free_receive_ring(rxr);
1631 	IXGBE_RX_UNLOCK(rxr);
1632 
1633 	return (error);
1634 } /* ixgbe_setup_receive_ring */
1635 
1636 /************************************************************************
1637  * ixgbe_setup_receive_structures - Initialize all receive rings.
1638  ************************************************************************/
1639 int
ixgbe_setup_receive_structures(struct ixgbe_softc * sc)1640 ixgbe_setup_receive_structures(struct ixgbe_softc *sc)
1641 {
1642 	struct rx_ring *rxr = sc->rx_rings;
1643 	int            j;
1644 
1645 	INIT_DEBUGOUT("ixgbe_setup_receive_structures");
1646 	for (j = 0; j < sc->num_queues; j++, rxr++)
1647 		if (ixgbe_setup_receive_ring(rxr))
1648 			goto fail;
1649 
1650 	return (0);
1651 fail:
1652 	/*
1653 	 * Free RX buffers allocated so far, we will only handle
1654 	 * the rings that completed, the failing case will have
1655 	 * cleaned up for itself. 'j' failed, so its the terminus.
1656 	 */
1657 	for (int i = 0; i < j; ++i) {
1658 		rxr = &sc->rx_rings[i];
1659 		IXGBE_RX_LOCK(rxr);
1660 		ixgbe_free_receive_ring(rxr);
1661 		IXGBE_RX_UNLOCK(rxr);
1662 	}
1663 
1664 	return (ENOBUFS);
1665 } /* ixgbe_setup_receive_structures */
1666 
1667 
1668 /************************************************************************
1669  * ixgbe_free_receive_structures - Free all receive rings.
1670  ************************************************************************/
1671 void
ixgbe_free_receive_structures(struct ixgbe_softc * sc)1672 ixgbe_free_receive_structures(struct ixgbe_softc *sc)
1673 {
1674 	struct rx_ring *rxr = sc->rx_rings;
1675 
1676 	INIT_DEBUGOUT("ixgbe_free_receive_structures: begin");
1677 
1678 	for (int i = 0; i < sc->num_queues; i++, rxr++) {
1679 		ixgbe_free_receive_buffers(rxr);
1680 #ifdef LRO
1681 		/* Free LRO memory */
1682 		tcp_lro_free(&rxr->lro);
1683 #endif /* LRO */
1684 		/* Free the ring memory as well */
1685 		ixgbe_dma_free(sc, &rxr->rxdma);
1686 		IXGBE_RX_LOCK_DESTROY(rxr);
1687 	}
1688 
1689 	kmem_free(sc->rx_rings, sizeof(struct rx_ring) * sc->num_queues);
1690 } /* ixgbe_free_receive_structures */
1691 
1692 
1693 /************************************************************************
1694  * ixgbe_free_receive_buffers - Free receive ring data structures
1695  ************************************************************************/
1696 static void
ixgbe_free_receive_buffers(struct rx_ring * rxr)1697 ixgbe_free_receive_buffers(struct rx_ring *rxr)
1698 {
1699 	struct ixgbe_softc  *sc = rxr->sc;
1700 	struct ixgbe_rx_buf *rxbuf;
1701 
1702 	INIT_DEBUGOUT("ixgbe_free_receive_buffers: begin");
1703 
1704 	/* Cleanup any existing buffers */
1705 	if (rxr->rx_buffers != NULL) {
1706 		for (int i = 0; i < sc->num_rx_desc; i++) {
1707 			rxbuf = &rxr->rx_buffers[i];
1708 			ixgbe_rx_discard(rxr, i);
1709 			if (rxbuf->pmap != NULL) {
1710 				ixgbe_dmamap_destroy(rxr->ptag, rxbuf->pmap);
1711 				rxbuf->pmap = NULL;
1712 			}
1713 		}
1714 
1715 		if (rxr->rx_buffers != NULL) {
1716 			kmem_free(rxr->rx_buffers,
1717 			    sizeof(struct ixgbe_rx_buf) * rxr->num_desc);
1718 			rxr->rx_buffers = NULL;
1719 		}
1720 	}
1721 
1722 	if (rxr->ptag != NULL) {
1723 		ixgbe_dma_tag_destroy(rxr->ptag);
1724 		rxr->ptag = NULL;
1725 	}
1726 
1727 	return;
1728 } /* ixgbe_free_receive_buffers */
1729 
1730 /************************************************************************
1731  * ixgbe_rx_input
1732  ************************************************************************/
1733 static __inline void
ixgbe_rx_input(struct rx_ring * rxr,struct ifnet * ifp,struct mbuf * m,u32 ptype)1734 ixgbe_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m,
1735     u32 ptype)
1736 {
1737 	struct ixgbe_softc *sc = ifp->if_softc;
1738 
1739 #ifdef LRO
1740 	struct ethercom *ec = &sc->osdep.ec;
1741 
1742 	/*
1743 	 * ATM LRO is only for IP/TCP packets and TCP checksum of the packet
1744 	 * should be computed by hardware. Also it should not have VLAN tag in
1745 	 * ethernet header.  In case of IPv6 we do not yet support ext. hdrs.
1746 	 */
1747         if (rxr->lro_enabled &&
1748             (ec->ec_capenable & ETHERCAP_VLAN_HWTAGGING) != 0 &&
1749             (ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
1750             ((ptype & (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
1751             (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP) ||
1752             (ptype & (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
1753             (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) &&
1754             (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
1755             (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
1756                 /*
1757                  * Send to the stack if:
1758                  *  - LRO not enabled, or
1759                  *  - no LRO resources, or
1760                  *  - lro enqueue fails
1761                  */
1762                 if (rxr->lro.lro_cnt != 0)
1763                         if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
1764                                 return;
1765         }
1766 #endif /* LRO */
1767 
1768 	if_percpuq_enqueue(sc->ipq, m);
1769 } /* ixgbe_rx_input */
1770 
1771 /************************************************************************
1772  * ixgbe_rx_discard
1773  ************************************************************************/
1774 static __inline void
ixgbe_rx_discard(struct rx_ring * rxr,int i)1775 ixgbe_rx_discard(struct rx_ring *rxr, int i)
1776 {
1777 	struct ixgbe_rx_buf *rbuf;
1778 
1779 	rbuf = &rxr->rx_buffers[i];
1780 
1781 	/*
1782 	 * With advanced descriptors the writeback clobbers the buffer addrs,
1783 	 * so its easier to just free the existing mbufs and take the normal
1784 	 * refresh path to get new buffers and mapping.
1785 	 */
1786 
1787 	if (rbuf->fmp != NULL) {/* Partial chain ? */
1788 		bus_dmamap_sync(rxr->ptag->dt_dmat, rbuf->pmap, 0,
1789 		    rbuf->buf->m_pkthdr.len, BUS_DMASYNC_POSTREAD);
1790 		ixgbe_dmamap_unload(rxr->ptag, rbuf->pmap);
1791 		m_freem(rbuf->fmp);
1792 		rbuf->fmp = NULL;
1793 		rbuf->buf = NULL; /* rbuf->buf is part of fmp's chain */
1794 	} else if (rbuf->buf) {
1795 		bus_dmamap_sync(rxr->ptag->dt_dmat, rbuf->pmap, 0,
1796 		    rbuf->buf->m_pkthdr.len, BUS_DMASYNC_POSTREAD);
1797 		ixgbe_dmamap_unload(rxr->ptag, rbuf->pmap);
1798 		m_free(rbuf->buf);
1799 		rbuf->buf = NULL;
1800 	}
1801 
1802 	rbuf->flags = 0;
1803 
1804 	return;
1805 } /* ixgbe_rx_discard */
1806 
1807 
1808 /************************************************************************
1809  * ixgbe_rxeof
1810  *
1811  *   Executes in interrupt context. It replenishes the
1812  *   mbufs in the descriptor and sends data which has
1813  *   been dma'ed into host memory to upper layer.
1814  *
1815  *   Return TRUE for more work, FALSE for all clean.
1816  ************************************************************************/
1817 bool
ixgbe_rxeof(struct ix_queue * que)1818 ixgbe_rxeof(struct ix_queue *que)
1819 {
1820 	struct ixgbe_softc	*sc = que->sc;
1821 	struct rx_ring		*rxr = que->rxr;
1822 	struct ifnet		*ifp = sc->ifp;
1823 #ifdef LRO
1824 	struct lro_ctrl		*lro = &rxr->lro;
1825 #endif /* LRO */
1826 	union ixgbe_adv_rx_desc	*cur;
1827 	struct ixgbe_rx_buf	*rbuf, *nbuf;
1828 	int			i, nextp, processed = 0;
1829 	u32			staterr = 0;
1830 	u32			loopcount = 0, numdesc;
1831 	u32			limit = sc->rx_process_limit;
1832 	u32			rx_copy_len = sc->rx_copy_len;
1833 	bool			discard_multidesc = rxr->discard_multidesc;
1834 	bool			wraparound = false;
1835 	unsigned int		syncremain;
1836 #ifdef RSS
1837 	u16			pkt_info;
1838 #endif
1839 
1840 	IXGBE_RX_LOCK(rxr);
1841 
1842 #ifdef DEV_NETMAP
1843 	if (sc->feat_en & IXGBE_FEATURE_NETMAP) {
1844 		/* Same as the txeof routine: wakeup clients on intr. */
1845 		if (netmap_rx_irq(ifp, rxr->me, &processed)) {
1846 			IXGBE_RX_UNLOCK(rxr);
1847 			return (FALSE);
1848 		}
1849 	}
1850 #endif /* DEV_NETMAP */
1851 
1852 	/* Sync the ring. The size is rx_process_limit or the first half */
1853 	if ((rxr->next_to_check + limit) <= rxr->num_desc) {
1854 		/* Non-wraparound */
1855 		numdesc = limit;
1856 		syncremain = 0;
1857 	} else {
1858 		/* Wraparound. Sync the first half. */
1859 		numdesc = rxr->num_desc - rxr->next_to_check;
1860 
1861 		/* Set the size of the last half */
1862 		syncremain = limit - numdesc;
1863 	}
1864 	bus_dmamap_sync(rxr->rxdma.dma_tag->dt_dmat,
1865 	    rxr->rxdma.dma_map,
1866 	    sizeof(union ixgbe_adv_rx_desc) * rxr->next_to_check,
1867 	    sizeof(union ixgbe_adv_rx_desc) * numdesc,
1868 	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
1869 
1870 	/*
1871 	 * The max number of loop is rx_process_limit. If discard_multidesc is
1872 	 * true, continue processing to not to send broken packet to the upper
1873 	 * layer.
1874 	 */
1875 	for (i = rxr->next_to_check;
1876 	     (loopcount < limit) || (discard_multidesc == true);) {
1877 
1878 		struct mbuf *sendmp, *mp;
1879 		struct mbuf *newmp;
1880 #ifdef RSC
1881 		u32         rsc;
1882 #endif
1883 		u32         ptype;
1884 		u16         len;
1885 		u16         vtag = 0;
1886 		bool        eop;
1887 		bool        discard = false;
1888 
1889 		if (wraparound) {
1890 			/* Sync the last half. */
1891 			KASSERT(syncremain != 0);
1892 			numdesc = syncremain;
1893 			wraparound = false;
1894 		} else if (__predict_false(loopcount >= limit)) {
1895 			KASSERT(discard_multidesc == true);
1896 			numdesc = 1;
1897 		} else
1898 			numdesc = 0;
1899 
1900 		if (numdesc != 0)
1901 			bus_dmamap_sync(rxr->rxdma.dma_tag->dt_dmat,
1902 			    rxr->rxdma.dma_map, 0,
1903 			    sizeof(union ixgbe_adv_rx_desc) * numdesc,
1904 			    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
1905 
1906 		cur = &rxr->rx_base[i];
1907 		staterr = le32toh(cur->wb.upper.status_error);
1908 #ifdef RSS
1909 		pkt_info = le16toh(cur->wb.lower.lo_dword.hs_rss.pkt_info);
1910 #endif
1911 
1912 		if ((staterr & IXGBE_RXD_STAT_DD) == 0)
1913 			break;
1914 
1915 		loopcount++;
1916 		sendmp = newmp = NULL;
1917 		nbuf = NULL;
1918 #ifdef RSC
1919 		rsc = 0;
1920 #endif
1921 		cur->wb.upper.status_error = 0;
1922 		rbuf = &rxr->rx_buffers[i];
1923 		mp = rbuf->buf;
1924 
1925 		len = le16toh(cur->wb.upper.length);
1926 		ptype = le32toh(cur->wb.lower.lo_dword.data) &
1927 		    IXGBE_RXDADV_PKTTYPE_MASK;
1928 		eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0);
1929 
1930 		/* Make sure bad packets are discarded */
1931 		if (eop && (staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) != 0) {
1932 #if __FreeBSD_version >= 1100036
1933 			if (sc->feat_en & IXGBE_FEATURE_VF)
1934 				if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
1935 #endif
1936 			IXGBE_EVC_ADD(&rxr->rx_discarded, 1);
1937 			ixgbe_rx_discard(rxr, i);
1938 			discard_multidesc = false;
1939 			goto next_desc;
1940 		}
1941 
1942 		if (__predict_false(discard_multidesc))
1943 			discard = true;
1944 		else {
1945 			/* Pre-alloc new mbuf. */
1946 
1947 			if ((rbuf->fmp == NULL) &&
1948 			    eop && (len <= rx_copy_len)) {
1949 				/* For short packet. See below. */
1950 				sendmp = m_gethdr(M_NOWAIT, MT_DATA);
1951 				if (__predict_false(sendmp == NULL)) {
1952 					IXGBE_EVC_ADD(&rxr->no_mbuf, 1);
1953 					discard = true;
1954 				}
1955 			} else {
1956 				/* For long packet. */
1957 				newmp = ixgbe_getcl();
1958 				if (__predict_false(newmp == NULL)) {
1959 					IXGBE_EVC_ADD(&rxr->no_mbuf, 1);
1960 					discard = true;
1961 				}
1962 			}
1963 		}
1964 
1965 		if (__predict_false(discard)) {
1966 			/*
1967 			 * Descriptor initialization is already done by the
1968 			 * above code (cur->wb.upper.status_error = 0).
1969 			 * So, we can reuse current rbuf->buf for new packet.
1970 			 *
1971 			 * Rewrite the buffer addr, see comment in
1972 			 * ixgbe_rx_discard().
1973 			 */
1974 			cur->read.pkt_addr = rbuf->addr;
1975 			m_freem(rbuf->fmp);
1976 			rbuf->fmp = NULL;
1977 			if (!eop) {
1978 				/* Discard the entire packet. */
1979 				discard_multidesc = true;
1980 			} else
1981 				discard_multidesc = false;
1982 			goto next_desc;
1983 		}
1984 		discard_multidesc = false;
1985 
1986 		bus_dmamap_sync(rxr->ptag->dt_dmat, rbuf->pmap, 0,
1987 		    rbuf->buf->m_pkthdr.len, BUS_DMASYNC_POSTREAD);
1988 
1989 		/*
1990 		 * On 82599 which supports a hardware
1991 		 * LRO (called HW RSC), packets need
1992 		 * not be fragmented across sequential
1993 		 * descriptors, rather the next descriptor
1994 		 * is indicated in bits of the descriptor.
1995 		 * This also means that we might process
1996 		 * more than one packet at a time, something
1997 		 * that has never been true before, it
1998 		 * required eliminating global chain pointers
1999 		 * in favor of what we are doing here.  -jfv
2000 		 */
2001 		if (!eop) {
2002 			/*
2003 			 * Figure out the next descriptor
2004 			 * of this frame.
2005 			 */
2006 #ifdef RSC
2007 			if (rxr->hw_rsc == TRUE) {
2008 				rsc = ixgbe_rsc_count(cur);
2009 				rxr->rsc_num += (rsc - 1);
2010 			}
2011 			if (rsc) { /* Get hardware index */
2012 				nextp = ((staterr & IXGBE_RXDADV_NEXTP_MASK) >>
2013 				    IXGBE_RXDADV_NEXTP_SHIFT);
2014 			} else
2015 #endif
2016 			{ /* Just sequential */
2017 				nextp = i + 1;
2018 				if (nextp == sc->num_rx_desc)
2019 					nextp = 0;
2020 			}
2021 			nbuf = &rxr->rx_buffers[nextp];
2022 			prefetch(nbuf);
2023 		}
2024 		/*
2025 		 * Rather than using the fmp/lmp global pointers
2026 		 * we now keep the head of a packet chain in the
2027 		 * buffer struct and pass this along from one
2028 		 * descriptor to the next, until we get EOP.
2029 		 */
2030 		/*
2031 		 * See if there is a stored head
2032 		 * that determines what we are
2033 		 */
2034 		if (rbuf->fmp != NULL) {
2035 			/* Secondary frag */
2036 			sendmp = rbuf->fmp;
2037 
2038 			/* Update new (used in future) mbuf */
2039 			newmp->m_pkthdr.len = newmp->m_len = rxr->mbuf_sz;
2040 			IXGBE_M_ADJ(sc, rxr, newmp);
2041 			rbuf->buf = newmp;
2042 			rbuf->fmp = NULL;
2043 
2044 			/* For secondary frag */
2045 			mp->m_len = len;
2046 			mp->m_flags &= ~M_PKTHDR;
2047 
2048 			/* For sendmp */
2049 			sendmp->m_pkthdr.len += mp->m_len;
2050 		} else {
2051 			/*
2052 			 * It's the first segment of a multi descriptor
2053 			 * packet or a single segment which contains a full
2054 			 * packet.
2055 			 */
2056 
2057 			if (eop && (len <= rx_copy_len)) {
2058 				/*
2059 				 * Optimize.  This might be a small packet, may
2060 				 * be just a TCP ACK. Copy into a new mbuf, and
2061 				 * Leave the old mbuf+cluster for re-use.
2062 				 */
2063 				sendmp->m_data += ETHER_ALIGN;
2064 				memcpy(mtod(sendmp, void *),
2065 				    mtod(mp, void *), len);
2066 				IXGBE_EVC_ADD(&rxr->rx_copies, 1);
2067 				rbuf->flags |= IXGBE_RX_COPY;
2068 			} else {
2069 				/* For long packet */
2070 
2071 				/* Update new (used in future) mbuf */
2072 				newmp->m_pkthdr.len = newmp->m_len
2073 				    = rxr->mbuf_sz;
2074 				IXGBE_M_ADJ(sc, rxr, newmp);
2075 				rbuf->buf = newmp;
2076 				rbuf->fmp = NULL;
2077 
2078 				/* For sendmp */
2079 				sendmp = mp;
2080 			}
2081 
2082 			/* first desc of a non-ps chain */
2083 			sendmp->m_pkthdr.len = sendmp->m_len = len;
2084 		}
2085 		++processed;
2086 
2087 		/* Pass the head pointer on */
2088 		if (eop == 0) {
2089 			nbuf->fmp = sendmp;
2090 			sendmp = NULL;
2091 			mp->m_next = nbuf->buf;
2092 		} else { /* Sending this frame */
2093 			m_set_rcvif(sendmp, ifp);
2094 			++rxr->packets;
2095 			IXGBE_EVC_ADD(&rxr->rx_packets, 1);
2096 			/* capture data for AIM */
2097 			rxr->bytes += sendmp->m_pkthdr.len;
2098 			IXGBE_EVC_ADD(&rxr->rx_bytes, sendmp->m_pkthdr.len);
2099 			/* Process vlan info */
2100 			if ((rxr->vtag_strip) && (staterr & IXGBE_RXD_STAT_VP))
2101 				vtag = le16toh(cur->wb.upper.vlan);
2102 			if (vtag) {
2103 				vlan_set_tag(sendmp, vtag);
2104 			}
2105 			if ((ifp->if_capenable & IFCAP_RXCSUM) != 0) {
2106 				ixgbe_rx_checksum(staterr, sendmp, ptype,
2107 				   &sc->stats.pf);
2108 			}
2109 
2110 #if 0 /* FreeBSD */
2111 			/*
2112 			 * In case of multiqueue, we have RXCSUM.PCSD bit set
2113 			 * and never cleared. This means we have RSS hash
2114 			 * available to be used.
2115 			 */
2116 			if (sc->num_queues > 1) {
2117 				sendmp->m_pkthdr.flowid =
2118 				    le32toh(cur->wb.lower.hi_dword.rss);
2119 				switch (pkt_info & IXGBE_RXDADV_RSSTYPE_MASK) {
2120 				case IXGBE_RXDADV_RSSTYPE_IPV4:
2121 					M_HASHTYPE_SET(sendmp,
2122 					    M_HASHTYPE_RSS_IPV4);
2123 					break;
2124 				case IXGBE_RXDADV_RSSTYPE_IPV4_TCP:
2125 					M_HASHTYPE_SET(sendmp,
2126 					    M_HASHTYPE_RSS_TCP_IPV4);
2127 					break;
2128 				case IXGBE_RXDADV_RSSTYPE_IPV6:
2129 					M_HASHTYPE_SET(sendmp,
2130 					    M_HASHTYPE_RSS_IPV6);
2131 					break;
2132 				case IXGBE_RXDADV_RSSTYPE_IPV6_TCP:
2133 					M_HASHTYPE_SET(sendmp,
2134 					    M_HASHTYPE_RSS_TCP_IPV6);
2135 					break;
2136 				case IXGBE_RXDADV_RSSTYPE_IPV6_EX:
2137 					M_HASHTYPE_SET(sendmp,
2138 					    M_HASHTYPE_RSS_IPV6_EX);
2139 					break;
2140 				case IXGBE_RXDADV_RSSTYPE_IPV6_TCP_EX:
2141 					M_HASHTYPE_SET(sendmp,
2142 					    M_HASHTYPE_RSS_TCP_IPV6_EX);
2143 					break;
2144 #if __FreeBSD_version > 1100000
2145 				case IXGBE_RXDADV_RSSTYPE_IPV4_UDP:
2146 					M_HASHTYPE_SET(sendmp,
2147 					    M_HASHTYPE_RSS_UDP_IPV4);
2148 					break;
2149 				case IXGBE_RXDADV_RSSTYPE_IPV6_UDP:
2150 					M_HASHTYPE_SET(sendmp,
2151 					    M_HASHTYPE_RSS_UDP_IPV6);
2152 					break;
2153 				case IXGBE_RXDADV_RSSTYPE_IPV6_UDP_EX:
2154 					M_HASHTYPE_SET(sendmp,
2155 					    M_HASHTYPE_RSS_UDP_IPV6_EX);
2156 					break;
2157 #endif
2158 				default:
2159 					M_HASHTYPE_SET(sendmp,
2160 					    M_HASHTYPE_OPAQUE_HASH);
2161 				}
2162 			} else {
2163 				sendmp->m_pkthdr.flowid = que->msix;
2164 				M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE);
2165 			}
2166 #endif
2167 		}
2168 next_desc:
2169 		ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
2170 		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2171 
2172 		/* Advance our pointers to the next descriptor. */
2173 		if (++i == rxr->num_desc) {
2174 			wraparound = true;
2175 			i = 0;
2176 		}
2177 		rxr->next_to_check = i;
2178 
2179 		/* Now send to the stack or do LRO */
2180 		if (sendmp != NULL)
2181 			ixgbe_rx_input(rxr, ifp, sendmp, ptype);
2182 
2183 		/* Every 8 descriptors we go to refresh mbufs */
2184 		if (processed == 8) {
2185 			ixgbe_refresh_mbufs(rxr, i);
2186 			processed = 0;
2187 		}
2188 	}
2189 
2190 	/* Save the current status */
2191 	rxr->discard_multidesc = discard_multidesc;
2192 
2193 	/* Refresh any remaining buf structs */
2194 	if (ixgbe_rx_unrefreshed(rxr))
2195 		ixgbe_refresh_mbufs(rxr, i);
2196 
2197 	IXGBE_RX_UNLOCK(rxr);
2198 
2199 #ifdef LRO
2200 	/*
2201 	 * Flush any outstanding LRO work
2202 	 */
2203 	tcp_lro_flush_all(lro);
2204 #endif /* LRO */
2205 
2206 	/*
2207 	 * Still have cleaning to do?
2208 	 */
2209 	if ((staterr & IXGBE_RXD_STAT_DD) != 0)
2210 		return (TRUE);
2211 
2212 	return (FALSE);
2213 } /* ixgbe_rxeof */
2214 
2215 
2216 /************************************************************************
2217  * ixgbe_rx_checksum
2218  *
2219  *   Verify that the hardware indicated that the checksum is valid.
2220  *   Inform the stack about the status of checksum so that stack
2221  *   doesn't spend time verifying the checksum.
2222  ************************************************************************/
2223 static void
ixgbe_rx_checksum(u32 staterr,struct mbuf * mp,u32 ptype,struct ixgbe_hw_stats * stats)2224 ixgbe_rx_checksum(u32 staterr, struct mbuf * mp, u32 ptype,
2225     struct ixgbe_hw_stats *stats)
2226 {
2227 	u16  status = (u16)staterr;
2228 	u8   errors = (u8)(staterr >> 24);
2229 #if 0
2230 	bool sctp = false;
2231 
2232 	if ((ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
2233 	    (ptype & IXGBE_RXDADV_PKTTYPE_SCTP) != 0)
2234 		sctp = true;
2235 #endif
2236 
2237 	/* IPv4 checksum */
2238 	if (status & IXGBE_RXD_STAT_IPCS) {
2239 		IXGBE_EVC_ADD(&stats->ipcs, 1);
2240 		if (!(errors & IXGBE_RXD_ERR_IPE)) {
2241 			/* IP Checksum Good */
2242 			mp->m_pkthdr.csum_flags = M_CSUM_IPv4;
2243 		} else {
2244 			IXGBE_EVC_ADD(&stats->ipcs_bad, 1);
2245 			mp->m_pkthdr.csum_flags = M_CSUM_IPv4|M_CSUM_IPv4_BAD;
2246 		}
2247 	}
2248 	/* TCP/UDP/SCTP checksum */
2249 	if (status & IXGBE_RXD_STAT_L4CS) {
2250 		IXGBE_EVC_ADD(&stats->l4cs, 1);
2251 		int type = M_CSUM_TCPv4|M_CSUM_TCPv6|M_CSUM_UDPv4|M_CSUM_UDPv6;
2252 		if (!(errors & IXGBE_RXD_ERR_TCPE)) {
2253 			mp->m_pkthdr.csum_flags |= type;
2254 		} else {
2255 			IXGBE_EVC_ADD(&stats->l4cs_bad, 1);
2256 			mp->m_pkthdr.csum_flags |= type | M_CSUM_TCP_UDP_BAD;
2257 		}
2258 	}
2259 } /* ixgbe_rx_checksum */
2260 
2261 /************************************************************************
2262  * ixgbe_dma_malloc
2263  ************************************************************************/
2264 int
ixgbe_dma_malloc(struct ixgbe_softc * sc,const bus_size_t size,struct ixgbe_dma_alloc * dma,const int mapflags)2265 ixgbe_dma_malloc(struct ixgbe_softc *sc, const bus_size_t size,
2266 		struct ixgbe_dma_alloc *dma, const int mapflags)
2267 {
2268 	device_t dev = sc->dev;
2269 	int      r, rsegs;
2270 
2271 	r = ixgbe_dma_tag_create(
2272 	     /*      parent */ sc->osdep.dmat,
2273 	     /*   alignment */ DBA_ALIGN,
2274 	     /*      bounds */ 0,
2275 	     /*     maxsize */ size,
2276 	     /*   nsegments */ 1,
2277 	     /*  maxsegsize */ size,
2278 	     /*       flags */ BUS_DMA_ALLOCNOW,
2279 			       &dma->dma_tag);
2280 	if (r != 0) {
2281 		aprint_error_dev(dev,
2282 		    "%s: ixgbe_dma_tag_create failed; error %d\n", __func__,
2283 		    r);
2284 		goto fail_0;
2285 	}
2286 
2287 	r = bus_dmamem_alloc(dma->dma_tag->dt_dmat, size,
2288 	    dma->dma_tag->dt_alignment, dma->dma_tag->dt_boundary,
2289 	    &dma->dma_seg, 1, &rsegs, BUS_DMA_NOWAIT);
2290 	if (r != 0) {
2291 		aprint_error_dev(dev,
2292 		    "%s: bus_dmamem_alloc failed; error %d\n", __func__, r);
2293 		goto fail_1;
2294 	}
2295 
2296 	r = bus_dmamem_map(dma->dma_tag->dt_dmat, &dma->dma_seg, rsegs,
2297 	    size, &dma->dma_vaddr, BUS_DMA_NOWAIT | BUS_DMA_COHERENT);
2298 	if (r != 0) {
2299 		aprint_error_dev(dev, "%s: bus_dmamem_map failed; error %d\n",
2300 		    __func__, r);
2301 		goto fail_2;
2302 	}
2303 
2304 	r = ixgbe_dmamap_create(dma->dma_tag, 0, &dma->dma_map);
2305 	if (r != 0) {
2306 		aprint_error_dev(dev, "%s: bus_dmamem_map failed; error %d\n",
2307 		    __func__, r);
2308 		goto fail_3;
2309 	}
2310 
2311 	r = bus_dmamap_load(dma->dma_tag->dt_dmat, dma->dma_map,
2312 	    dma->dma_vaddr, size, NULL, mapflags | BUS_DMA_NOWAIT);
2313 	if (r != 0) {
2314 		aprint_error_dev(dev, "%s: bus_dmamap_load failed; error %d\n",
2315 		    __func__, r);
2316 		goto fail_4;
2317 	}
2318 	dma->dma_paddr = dma->dma_map->dm_segs[0].ds_addr;
2319 	dma->dma_size = size;
2320 	return 0;
2321 fail_4:
2322 	ixgbe_dmamap_destroy(dma->dma_tag, dma->dma_map);
2323 fail_3:
2324 	bus_dmamem_unmap(dma->dma_tag->dt_dmat, dma->dma_vaddr, size);
2325 fail_2:
2326 	bus_dmamem_free(dma->dma_tag->dt_dmat, &dma->dma_seg, rsegs);
2327 fail_1:
2328 	ixgbe_dma_tag_destroy(dma->dma_tag);
2329 fail_0:
2330 
2331 	return (r);
2332 } /* ixgbe_dma_malloc */
2333 
2334 /************************************************************************
2335  * ixgbe_dma_free
2336  ************************************************************************/
2337 void
ixgbe_dma_free(struct ixgbe_softc * sc,struct ixgbe_dma_alloc * dma)2338 ixgbe_dma_free(struct ixgbe_softc *sc, struct ixgbe_dma_alloc *dma)
2339 {
2340 	bus_dmamap_sync(dma->dma_tag->dt_dmat, dma->dma_map, 0, dma->dma_size,
2341 	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2342 	ixgbe_dmamap_unload(dma->dma_tag, dma->dma_map);
2343 	bus_dmamem_unmap(dma->dma_tag->dt_dmat, dma->dma_vaddr, dma->dma_size);
2344 	bus_dmamem_free(dma->dma_tag->dt_dmat, &dma->dma_seg, 1);
2345 	ixgbe_dma_tag_destroy(dma->dma_tag);
2346 } /* ixgbe_dma_free */
2347 
2348 
2349 /************************************************************************
2350  * ixgbe_allocate_queues
2351  *
2352  *   Allocate memory for the transmit and receive rings, and then
2353  *   the descriptors associated with each, called only once at attach.
2354  ************************************************************************/
2355 int
ixgbe_allocate_queues(struct ixgbe_softc * sc)2356 ixgbe_allocate_queues(struct ixgbe_softc *sc)
2357 {
2358 	device_t	dev = sc->dev;
2359 	struct ix_queue	*que;
2360 	struct tx_ring	*txr;
2361 	struct rx_ring	*rxr;
2362 	int             rsize, tsize, error = IXGBE_SUCCESS;
2363 	int             txconf = 0, rxconf = 0;
2364 
2365 	/* First, allocate the top level queue structs */
2366 	sc->queues = kmem_zalloc(sizeof(struct ix_queue) * sc->num_queues,
2367 	    KM_SLEEP);
2368 
2369 	/* Second, allocate the TX ring struct memory */
2370 	sc->tx_rings = kmem_zalloc(sizeof(struct tx_ring) * sc->num_queues,
2371 	    KM_SLEEP);
2372 
2373 	/* Third, allocate the RX ring */
2374 	sc->rx_rings = kmem_zalloc(sizeof(struct rx_ring) * sc->num_queues,
2375 	    KM_SLEEP);
2376 
2377 	/* For the ring itself */
2378 	tsize = sc->num_tx_desc * sizeof(union ixgbe_adv_tx_desc);
2379 	KASSERT((tsize % DBA_ALIGN) == 0);
2380 
2381 	/*
2382 	 * Now set up the TX queues, txconf is needed to handle the
2383 	 * possibility that things fail midcourse and we need to
2384 	 * undo memory gracefully
2385 	 */
2386 	for (int i = 0; i < sc->num_queues; i++, txconf++) {
2387 		/* Set up some basics */
2388 		txr = &sc->tx_rings[i];
2389 		txr->sc = sc;
2390 		txr->txr_interq = NULL;
2391 		/* In case SR-IOV is enabled, align the index properly */
2392 #ifdef PCI_IOV
2393 		txr->me = ixgbe_vf_que_index(sc->iov_mode, sc->pool,
2394 		    i);
2395 #else
2396 		txr->me = i;
2397 #endif
2398 		txr->num_desc = sc->num_tx_desc;
2399 
2400 		/* Initialize the TX side lock */
2401 		mutex_init(&txr->tx_mtx, MUTEX_DEFAULT, IPL_NET);
2402 
2403 		if (ixgbe_dma_malloc(sc, tsize, &txr->txdma,
2404 		    BUS_DMA_NOWAIT)) {
2405 			aprint_error_dev(dev,
2406 			    "Unable to allocate TX Descriptor memory\n");
2407 			error = ENOMEM;
2408 			goto err_tx_desc;
2409 		}
2410 		txr->tx_base = (union ixgbe_adv_tx_desc *)txr->txdma.dma_vaddr;
2411 		bzero((void *)txr->tx_base, tsize);
2412 
2413 		/* Now allocate transmit buffers for the ring */
2414 		if (ixgbe_allocate_transmit_buffers(txr)) {
2415 			aprint_error_dev(dev,
2416 			    "Critical Failure setting up transmit buffers\n");
2417 			error = ENOMEM;
2418 			goto err_tx_desc;
2419 		}
2420 		if (!(sc->feat_en & IXGBE_FEATURE_LEGACY_TX)) {
2421 			/* Allocate a buf ring */
2422 			txr->txr_interq = pcq_create(IXGBE_BR_SIZE, KM_SLEEP);
2423 			if (txr->txr_interq == NULL) {
2424 				aprint_error_dev(dev,
2425 				    "Critical Failure setting up buf ring\n");
2426 				error = ENOMEM;
2427 				goto err_tx_desc;
2428 			}
2429 		}
2430 	}
2431 
2432 	/*
2433 	 * Next the RX queues...
2434 	 */
2435 	rsize = sc->num_rx_desc * sizeof(union ixgbe_adv_rx_desc);
2436 	KASSERT((rsize % DBA_ALIGN) == 0);
2437 	for (int i = 0; i < sc->num_queues; i++, rxconf++) {
2438 		rxr = &sc->rx_rings[i];
2439 		/* Set up some basics */
2440 		rxr->sc = sc;
2441 #ifdef PCI_IOV
2442 		/* In case SR-IOV is enabled, align the index properly */
2443 		rxr->me = ixgbe_vf_que_index(sc->iov_mode, sc->pool,
2444 		    i);
2445 #else
2446 		rxr->me = i;
2447 #endif
2448 		rxr->num_desc = sc->num_rx_desc;
2449 
2450 		/* Initialize the RX side lock */
2451 		mutex_init(&rxr->rx_mtx, MUTEX_DEFAULT, IPL_NET);
2452 
2453 		if (ixgbe_dma_malloc(sc, rsize, &rxr->rxdma,
2454 		    BUS_DMA_NOWAIT)) {
2455 			aprint_error_dev(dev,
2456 			    "Unable to allocate RxDescriptor memory\n");
2457 			error = ENOMEM;
2458 			goto err_rx_desc;
2459 		}
2460 		rxr->rx_base = (union ixgbe_adv_rx_desc *)rxr->rxdma.dma_vaddr;
2461 		bzero((void *)rxr->rx_base, rsize);
2462 
2463 		/* Allocate receive buffers for the ring */
2464 		if (ixgbe_allocate_receive_buffers(rxr)) {
2465 			aprint_error_dev(dev,
2466 			    "Critical Failure setting up receive buffers\n");
2467 			error = ENOMEM;
2468 			goto err_rx_desc;
2469 		}
2470 	}
2471 
2472 	/*
2473 	 * Finally set up the queue holding structs
2474 	 */
2475 	for (int i = 0; i < sc->num_queues; i++) {
2476 		que = &sc->queues[i];
2477 		que->sc = sc;
2478 		que->me = i;
2479 		que->txr = &sc->tx_rings[i];
2480 		que->rxr = &sc->rx_rings[i];
2481 
2482 		mutex_init(&que->dc_mtx, MUTEX_DEFAULT, IPL_NET);
2483 		que->disabled_count = 0;
2484 	}
2485 
2486 	return (0);
2487 
2488 err_rx_desc:
2489 	for (rxr = sc->rx_rings; rxconf > 0; rxr++, rxconf--)
2490 		ixgbe_dma_free(sc, &rxr->rxdma);
2491 err_tx_desc:
2492 	for (txr = sc->tx_rings; txconf > 0; txr++, txconf--)
2493 		ixgbe_dma_free(sc, &txr->txdma);
2494 	kmem_free(sc->rx_rings, sizeof(struct rx_ring) * sc->num_queues);
2495 	kmem_free(sc->tx_rings, sizeof(struct tx_ring) * sc->num_queues);
2496 	kmem_free(sc->queues, sizeof(struct ix_queue) * sc->num_queues);
2497 	return (error);
2498 } /* ixgbe_allocate_queues */
2499 
2500 /************************************************************************
2501  * ixgbe_free_queues
2502  *
2503  *   Free descriptors for the transmit and receive rings, and then
2504  *   the memory associated with each.
2505  ************************************************************************/
2506 void
ixgbe_free_queues(struct ixgbe_softc * sc)2507 ixgbe_free_queues(struct ixgbe_softc *sc)
2508 {
2509 	struct ix_queue *que;
2510 	int i;
2511 
2512 	ixgbe_free_transmit_structures(sc);
2513 	ixgbe_free_receive_structures(sc);
2514 	for (i = 0; i < sc->num_queues; i++) {
2515 		que = &sc->queues[i];
2516 		mutex_destroy(&que->dc_mtx);
2517 	}
2518 	kmem_free(sc->queues, sizeof(struct ix_queue) * sc->num_queues);
2519 } /* ixgbe_free_queues */
2520