xref: /netbsd-src/sys/dev/pci/ixgbe/ix_txrx.c (revision c38e7cc395b1472a774ff828e46123de44c628e9)
1 /* $NetBSD: ix_txrx.c,v 1.41 2018/04/25 08:46:19 msaitoh Exp $ */
2 
3 /******************************************************************************
4 
5   Copyright (c) 2001-2017, Intel Corporation
6   All rights reserved.
7 
8   Redistribution and use in source and binary forms, with or without
9   modification, are permitted provided that the following conditions are met:
10 
11    1. Redistributions of source code must retain the above copyright notice,
12       this list of conditions and the following disclaimer.
13 
14    2. Redistributions in binary form must reproduce the above copyright
15       notice, this list of conditions and the following disclaimer in the
16       documentation and/or other materials provided with the distribution.
17 
18    3. Neither the name of the Intel Corporation nor the names of its
19       contributors may be used to endorse or promote products derived from
20       this software without specific prior written permission.
21 
22   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
23   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
26   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
27   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
28   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
29   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
30   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
31   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
32   POSSIBILITY OF SUCH DAMAGE.
33 
34 ******************************************************************************/
35 /*$FreeBSD: head/sys/dev/ixgbe/ix_txrx.c 327031 2017-12-20 18:15:06Z erj $*/
36 
37 /*
38  * Copyright (c) 2011 The NetBSD Foundation, Inc.
39  * All rights reserved.
40  *
41  * This code is derived from software contributed to The NetBSD Foundation
42  * by Coyote Point Systems, Inc.
43  *
44  * Redistribution and use in source and binary forms, with or without
45  * modification, are permitted provided that the following conditions
46  * are met:
47  * 1. Redistributions of source code must retain the above copyright
48  *    notice, this list of conditions and the following disclaimer.
49  * 2. Redistributions in binary form must reproduce the above copyright
50  *    notice, this list of conditions and the following disclaimer in the
51  *    documentation and/or other materials provided with the distribution.
52  *
53  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
54  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
55  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
56  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
57  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
58  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
59  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
60  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
61  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
62  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
63  * POSSIBILITY OF SUCH DAMAGE.
64  */
65 
66 #include "opt_inet.h"
67 #include "opt_inet6.h"
68 
69 #include "ixgbe.h"
70 
71 /*
72  * HW RSC control:
73  *  this feature only works with
74  *  IPv4, and only on 82599 and later.
75  *  Also this will cause IP forwarding to
76  *  fail and that can't be controlled by
77  *  the stack as LRO can. For all these
78  *  reasons I've deemed it best to leave
79  *  this off and not bother with a tuneable
80  *  interface, this would need to be compiled
81  *  to enable.
82  */
83 static bool ixgbe_rsc_enable = FALSE;
84 
85 /*
86  * For Flow Director: this is the
87  * number of TX packets we sample
88  * for the filter pool, this means
89  * every 20th packet will be probed.
90  *
91  * This feature can be disabled by
92  * setting this to 0.
93  */
94 static int atr_sample_rate = 20;
95 
96 /************************************************************************
97  *  Local Function prototypes
98  ************************************************************************/
99 static void          ixgbe_setup_transmit_ring(struct tx_ring *);
100 static void          ixgbe_free_transmit_buffers(struct tx_ring *);
101 static int           ixgbe_setup_receive_ring(struct rx_ring *);
102 static void          ixgbe_free_receive_buffers(struct rx_ring *);
103 static void          ixgbe_rx_checksum(u32, struct mbuf *, u32,
104                                        struct ixgbe_hw_stats *);
105 static void          ixgbe_refresh_mbufs(struct rx_ring *, int);
106 static void          ixgbe_drain(struct ifnet *, struct tx_ring *);
107 static int           ixgbe_xmit(struct tx_ring *, struct mbuf *);
108 static int           ixgbe_tx_ctx_setup(struct tx_ring *,
109                                         struct mbuf *, u32 *, u32 *);
110 static int           ixgbe_tso_setup(struct tx_ring *,
111                                      struct mbuf *, u32 *, u32 *);
112 static __inline void ixgbe_rx_discard(struct rx_ring *, int);
113 static __inline void ixgbe_rx_input(struct rx_ring *, struct ifnet *,
114                                     struct mbuf *, u32);
115 static int           ixgbe_dma_malloc(struct adapter *, bus_size_t,
116                                       struct ixgbe_dma_alloc *, int);
117 static void          ixgbe_dma_free(struct adapter *, struct ixgbe_dma_alloc *);
118 
119 static void	ixgbe_setup_hw_rsc(struct rx_ring *);
120 
121 /************************************************************************
122  * ixgbe_legacy_start_locked - Transmit entry point
123  *
124  *   Called by the stack to initiate a transmit.
125  *   The driver will remain in this routine as long as there are
126  *   packets to transmit and transmit resources are available.
127  *   In case resources are not available, the stack is notified
128  *   and the packet is requeued.
129  ************************************************************************/
130 int
131 ixgbe_legacy_start_locked(struct ifnet *ifp, struct tx_ring *txr)
132 {
133 	int rc;
134 	struct mbuf    *m_head;
135 	struct adapter *adapter = txr->adapter;
136 
137 	IXGBE_TX_LOCK_ASSERT(txr);
138 
139 	if (!adapter->link_active) {
140 		/*
141 		 * discard all packets buffered in IFQ to avoid
142 		 * sending old packets at next link up timing.
143 		 */
144 		ixgbe_drain(ifp, txr);
145 		return (ENETDOWN);
146 	}
147 	if ((ifp->if_flags & IFF_RUNNING) == 0)
148 		return (ENETDOWN);
149 
150 	while (!IFQ_IS_EMPTY(&ifp->if_snd)) {
151 		if (txr->tx_avail <= IXGBE_QUEUE_MIN_FREE)
152 			break;
153 
154 		IFQ_POLL(&ifp->if_snd, m_head);
155 		if (m_head == NULL)
156 			break;
157 
158 		if ((rc = ixgbe_xmit(txr, m_head)) == EAGAIN) {
159 			break;
160 		}
161 		IFQ_DEQUEUE(&ifp->if_snd, m_head);
162 		if (rc != 0) {
163 			m_freem(m_head);
164 			continue;
165 		}
166 
167 		/* Send a copy of the frame to the BPF listener */
168 		bpf_mtap(ifp, m_head);
169 	}
170 
171 	return IXGBE_SUCCESS;
172 } /* ixgbe_legacy_start_locked */
173 
174 /************************************************************************
175  * ixgbe_legacy_start
176  *
177  *   Called by the stack, this always uses the first tx ring,
178  *   and should not be used with multiqueue tx enabled.
179  ************************************************************************/
180 void
181 ixgbe_legacy_start(struct ifnet *ifp)
182 {
183 	struct adapter *adapter = ifp->if_softc;
184 	struct tx_ring *txr = adapter->tx_rings;
185 
186 	if (ifp->if_flags & IFF_RUNNING) {
187 		IXGBE_TX_LOCK(txr);
188 		ixgbe_legacy_start_locked(ifp, txr);
189 		IXGBE_TX_UNLOCK(txr);
190 	}
191 } /* ixgbe_legacy_start */
192 
193 /************************************************************************
194  * ixgbe_mq_start - Multiqueue Transmit Entry Point
195  *
196  *   (if_transmit function)
197  ************************************************************************/
198 int
199 ixgbe_mq_start(struct ifnet *ifp, struct mbuf *m)
200 {
201 	struct adapter	*adapter = ifp->if_softc;
202 	struct tx_ring	*txr;
203 	int 		i, err = 0;
204 #ifdef RSS
205 	uint32_t bucket_id;
206 #endif
207 
208 	/*
209 	 * When doing RSS, map it to the same outbound queue
210 	 * as the incoming flow would be mapped to.
211 	 *
212 	 * If everything is setup correctly, it should be the
213 	 * same bucket that the current CPU we're on is.
214 	 */
215 #ifdef RSS
216 	if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
217 		if ((adapter->feat_en & IXGBE_FEATURE_RSS) &&
218 		    (rss_hash2bucket(m->m_pkthdr.flowid, M_HASHTYPE_GET(m),
219 		    &bucket_id) == 0)) {
220 			i = bucket_id % adapter->num_queues;
221 #ifdef IXGBE_DEBUG
222 			if (bucket_id > adapter->num_queues)
223 				if_printf(ifp,
224 				    "bucket_id (%d) > num_queues (%d)\n",
225 				    bucket_id, adapter->num_queues);
226 #endif
227 		} else
228 			i = m->m_pkthdr.flowid % adapter->num_queues;
229 	} else
230 #endif /* 0 */
231 		i = cpu_index(curcpu()) % adapter->num_queues;
232 
233 	/* Check for a hung queue and pick alternative */
234 	if (((1 << i) & adapter->active_queues) == 0)
235 		i = ffs64(adapter->active_queues);
236 
237 	txr = &adapter->tx_rings[i];
238 
239 	err = pcq_put(txr->txr_interq, m);
240 	if (err == false) {
241 		m_freem(m);
242 		txr->pcq_drops.ev_count++;
243 		return (err);
244 	}
245 	if (IXGBE_TX_TRYLOCK(txr)) {
246 		ixgbe_mq_start_locked(ifp, txr);
247 		IXGBE_TX_UNLOCK(txr);
248 	} else {
249 		if (adapter->txrx_use_workqueue) {
250 			/*
251 			 * This function itself is not called in interrupt
252 			 * context, however it can be called in fast softint
253 			 * context right after receiving forwarding packets.
254 			 * So, it is required to protect workqueue from twice
255 			 * enqueuing when the machine uses both spontaneous
256 			 * packets and forwarding packets.
257 			 */
258 			u_int *enqueued = percpu_getref(adapter->txr_wq_enqueued);
259 			if (*enqueued == 0) {
260 				*enqueued = 1;
261 				percpu_putref(adapter->txr_wq_enqueued);
262 				workqueue_enqueue(adapter->txr_wq, &txr->wq_cookie, curcpu());
263 			} else
264 				percpu_putref(adapter->txr_wq_enqueued);
265 		} else
266 			softint_schedule(txr->txr_si);
267 	}
268 
269 	return (0);
270 } /* ixgbe_mq_start */
271 
272 /************************************************************************
273  * ixgbe_mq_start_locked
274  ************************************************************************/
275 int
276 ixgbe_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
277 {
278 	struct mbuf    *next;
279 	int            enqueued = 0, err = 0;
280 
281 	if (!txr->adapter->link_active) {
282 		/*
283 		 * discard all packets buffered in txr_interq to avoid
284 		 * sending old packets at next link up timing.
285 		 */
286 		ixgbe_drain(ifp, txr);
287 		return (ENETDOWN);
288 	}
289 	if ((ifp->if_flags & IFF_RUNNING) == 0)
290 		return (ENETDOWN);
291 
292 	/* Process the queue */
293 	while ((next = pcq_get(txr->txr_interq)) != NULL) {
294 		if ((err = ixgbe_xmit(txr, next)) != 0) {
295 			m_freem(next);
296 			/* All errors are counted in ixgbe_xmit() */
297 			break;
298 		}
299 		enqueued++;
300 #if __FreeBSD_version >= 1100036
301 		/*
302 		 * Since we're looking at the tx ring, we can check
303 		 * to see if we're a VF by examing our tail register
304 		 * address.
305 		 */
306 		if ((txr->adapter->feat_en & IXGBE_FEATURE_VF) &&
307 		    (next->m_flags & M_MCAST))
308 			if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
309 #endif
310 		/* Send a copy of the frame to the BPF listener */
311 		bpf_mtap(ifp, next);
312 		if ((ifp->if_flags & IFF_RUNNING) == 0)
313 			break;
314 	}
315 
316 	if (txr->tx_avail < IXGBE_TX_CLEANUP_THRESHOLD(txr->adapter))
317 		ixgbe_txeof(txr);
318 
319 	return (err);
320 } /* ixgbe_mq_start_locked */
321 
322 /************************************************************************
323  * ixgbe_deferred_mq_start
324  *
325  *   Called from a softint and workqueue (indirectly) to drain queued
326  *   transmit packets.
327  ************************************************************************/
328 void
329 ixgbe_deferred_mq_start(void *arg)
330 {
331 	struct tx_ring *txr = arg;
332 	struct adapter *adapter = txr->adapter;
333 	struct ifnet   *ifp = adapter->ifp;
334 
335 	IXGBE_TX_LOCK(txr);
336 	if (pcq_peek(txr->txr_interq) != NULL)
337 		ixgbe_mq_start_locked(ifp, txr);
338 	IXGBE_TX_UNLOCK(txr);
339 } /* ixgbe_deferred_mq_start */
340 
341 /************************************************************************
342  * ixgbe_deferred_mq_start_work
343  *
344  *   Called from a workqueue to drain queued transmit packets.
345  ************************************************************************/
346 void
347 ixgbe_deferred_mq_start_work(struct work *wk, void *arg)
348 {
349 	struct tx_ring *txr = container_of(wk, struct tx_ring, wq_cookie);
350 	struct adapter *adapter = txr->adapter;
351 	u_int *enqueued = percpu_getref(adapter->txr_wq_enqueued);
352 	*enqueued = 0;
353 	percpu_putref(adapter->txr_wq_enqueued);
354 
355 	ixgbe_deferred_mq_start(txr);
356 } /* ixgbe_deferred_mq_start */
357 
358 /************************************************************************
359  * ixgbe_drain_all
360  ************************************************************************/
361 void
362 ixgbe_drain_all(struct adapter *adapter)
363 {
364 	struct ifnet *ifp = adapter->ifp;
365 	struct ix_queue *que = adapter->queues;
366 
367 	for (int i = 0; i < adapter->num_queues; i++, que++) {
368 		struct tx_ring  *txr = que->txr;
369 
370 		IXGBE_TX_LOCK(txr);
371 		ixgbe_drain(ifp, txr);
372 		IXGBE_TX_UNLOCK(txr);
373 	}
374 }
375 
376 /************************************************************************
377  * ixgbe_xmit
378  *
379  *   Maps the mbufs to tx descriptors, allowing the
380  *   TX engine to transmit the packets.
381  *
382  *   Return 0 on success, positive on failure
383  ************************************************************************/
384 static int
385 ixgbe_xmit(struct tx_ring *txr, struct mbuf *m_head)
386 {
387 	struct adapter          *adapter = txr->adapter;
388 	struct ixgbe_tx_buf     *txbuf;
389 	union ixgbe_adv_tx_desc *txd = NULL;
390 	struct ifnet	        *ifp = adapter->ifp;
391 	int                     i, j, error;
392 	int                     first;
393 	u32                     olinfo_status = 0, cmd_type_len;
394 	bool                    remap = TRUE;
395 	bus_dmamap_t            map;
396 
397 	/* Basic descriptor defines */
398 	cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA |
399 	    IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT);
400 
401 	if (vlan_has_tag(m_head))
402 		cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE;
403 
404 	/*
405 	 * Important to capture the first descriptor
406 	 * used because it will contain the index of
407 	 * the one we tell the hardware to report back
408 	 */
409 	first = txr->next_avail_desc;
410 	txbuf = &txr->tx_buffers[first];
411 	map = txbuf->map;
412 
413 	/*
414 	 * Map the packet for DMA.
415 	 */
416 retry:
417 	error = bus_dmamap_load_mbuf(txr->txtag->dt_dmat, map, m_head,
418 	    BUS_DMA_NOWAIT);
419 
420 	if (__predict_false(error)) {
421 		struct mbuf *m;
422 
423 		switch (error) {
424 		case EAGAIN:
425 			txr->q_eagain_tx_dma_setup++;
426 			return EAGAIN;
427 		case ENOMEM:
428 			txr->q_enomem_tx_dma_setup++;
429 			return EAGAIN;
430 		case EFBIG:
431 			/* Try it again? - one try */
432 			if (remap == TRUE) {
433 				remap = FALSE;
434 				/*
435 				 * XXX: m_defrag will choke on
436 				 * non-MCLBYTES-sized clusters
437 				 */
438 				txr->q_efbig_tx_dma_setup++;
439 				m = m_defrag(m_head, M_NOWAIT);
440 				if (m == NULL) {
441 					txr->q_mbuf_defrag_failed++;
442 					return ENOBUFS;
443 				}
444 				m_head = m;
445 				goto retry;
446 			} else {
447 				txr->q_efbig2_tx_dma_setup++;
448 				return error;
449 			}
450 		case EINVAL:
451 			txr->q_einval_tx_dma_setup++;
452 			return error;
453 		default:
454 			txr->q_other_tx_dma_setup++;
455 			return error;
456 		}
457 	}
458 
459 	/* Make certain there are enough descriptors */
460 	if (txr->tx_avail < (map->dm_nsegs + 2)) {
461 		txr->no_desc_avail.ev_count++;
462 		ixgbe_dmamap_unload(txr->txtag, txbuf->map);
463 		return EAGAIN;
464 	}
465 
466 	/*
467 	 * Set up the appropriate offload context
468 	 * this will consume the first descriptor
469 	 */
470 	error = ixgbe_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status);
471 	if (__predict_false(error)) {
472 		return (error);
473 	}
474 
475 	/* Do the flow director magic */
476 	if ((adapter->feat_en & IXGBE_FEATURE_FDIR) &&
477 	    (txr->atr_sample) && (!adapter->fdir_reinit)) {
478 		++txr->atr_count;
479 		if (txr->atr_count >= atr_sample_rate) {
480 			ixgbe_atr(txr, m_head);
481 			txr->atr_count = 0;
482 		}
483 	}
484 
485 	olinfo_status |= IXGBE_ADVTXD_CC;
486 	i = txr->next_avail_desc;
487 	for (j = 0; j < map->dm_nsegs; j++) {
488 		bus_size_t seglen;
489 		bus_addr_t segaddr;
490 
491 		txbuf = &txr->tx_buffers[i];
492 		txd = &txr->tx_base[i];
493 		seglen = map->dm_segs[j].ds_len;
494 		segaddr = htole64(map->dm_segs[j].ds_addr);
495 
496 		txd->read.buffer_addr = segaddr;
497 		txd->read.cmd_type_len = htole32(cmd_type_len | seglen);
498 		txd->read.olinfo_status = htole32(olinfo_status);
499 
500 		if (++i == txr->num_desc)
501 			i = 0;
502 	}
503 
504 	txd->read.cmd_type_len |= htole32(IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS);
505 	txr->tx_avail -= map->dm_nsegs;
506 	txr->next_avail_desc = i;
507 
508 	txbuf->m_head = m_head;
509 	/*
510 	 * Here we swap the map so the last descriptor,
511 	 * which gets the completion interrupt has the
512 	 * real map, and the first descriptor gets the
513 	 * unused map from this descriptor.
514 	 */
515 	txr->tx_buffers[first].map = txbuf->map;
516 	txbuf->map = map;
517 	bus_dmamap_sync(txr->txtag->dt_dmat, map, 0, m_head->m_pkthdr.len,
518 	    BUS_DMASYNC_PREWRITE);
519 
520 	/* Set the EOP descriptor that will be marked done */
521 	txbuf = &txr->tx_buffers[first];
522 	txbuf->eop = txd;
523 
524 	ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
525 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
526 	/*
527 	 * Advance the Transmit Descriptor Tail (Tdt), this tells the
528 	 * hardware that this frame is available to transmit.
529 	 */
530 	++txr->total_packets.ev_count;
531 	IXGBE_WRITE_REG(&adapter->hw, txr->tail, i);
532 
533 	/*
534 	 * XXXX NOMPSAFE: ifp->if_data should be percpu.
535 	 */
536 	ifp->if_obytes += m_head->m_pkthdr.len;
537 	if (m_head->m_flags & M_MCAST)
538 		ifp->if_omcasts++;
539 
540 	/* Mark queue as having work */
541 	if (txr->busy == 0)
542 		txr->busy = 1;
543 
544 	return (0);
545 } /* ixgbe_xmit */
546 
547 /************************************************************************
548  * ixgbe_drain
549  ************************************************************************/
550 static void
551 ixgbe_drain(struct ifnet *ifp, struct tx_ring *txr)
552 {
553 	struct mbuf *m;
554 
555 	IXGBE_TX_LOCK_ASSERT(txr);
556 
557 	if (txr->me == 0) {
558 		while (!IFQ_IS_EMPTY(&ifp->if_snd)) {
559 			IFQ_DEQUEUE(&ifp->if_snd, m);
560 			m_freem(m);
561 			IF_DROP(&ifp->if_snd);
562 		}
563 	}
564 
565 	while ((m = pcq_get(txr->txr_interq)) != NULL) {
566 		m_freem(m);
567 		txr->pcq_drops.ev_count++;
568 	}
569 }
570 
571 /************************************************************************
572  * ixgbe_allocate_transmit_buffers
573  *
574  *   Allocate memory for tx_buffer structures. The tx_buffer stores all
575  *   the information needed to transmit a packet on the wire. This is
576  *   called only once at attach, setup is done every reset.
577  ************************************************************************/
578 static int
579 ixgbe_allocate_transmit_buffers(struct tx_ring *txr)
580 {
581 	struct adapter      *adapter = txr->adapter;
582 	device_t            dev = adapter->dev;
583 	struct ixgbe_tx_buf *txbuf;
584 	int                 error, i;
585 
586 	/*
587 	 * Setup DMA descriptor areas.
588 	 */
589 	error = ixgbe_dma_tag_create(
590 	         /*      parent */ adapter->osdep.dmat,
591 	         /*   alignment */ 1,
592 	         /*      bounds */ 0,
593 	         /*     maxsize */ IXGBE_TSO_SIZE,
594 	         /*   nsegments */ adapter->num_segs,
595 	         /*  maxsegsize */ PAGE_SIZE,
596 	         /*       flags */ 0,
597 	                           &txr->txtag);
598 	if (error != 0) {
599 		aprint_error_dev(dev,"Unable to allocate TX DMA tag\n");
600 		goto fail;
601 	}
602 
603 	txr->tx_buffers =
604 	    (struct ixgbe_tx_buf *) malloc(sizeof(struct ixgbe_tx_buf) *
605 	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
606 	if (txr->tx_buffers == NULL) {
607 		aprint_error_dev(dev, "Unable to allocate tx_buffer memory\n");
608 		error = ENOMEM;
609 		goto fail;
610 	}
611 
612 	/* Create the descriptor buffer dma maps */
613 	txbuf = txr->tx_buffers;
614 	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
615 		error = ixgbe_dmamap_create(txr->txtag, 0, &txbuf->map);
616 		if (error != 0) {
617 			aprint_error_dev(dev,
618 			    "Unable to create TX DMA map (%d)\n", error);
619 			goto fail;
620 		}
621 	}
622 
623 	return 0;
624 fail:
625 	/* We free all, it handles case where we are in the middle */
626 #if 0 /* XXX was FreeBSD */
627 	ixgbe_free_transmit_structures(adapter);
628 #else
629 	ixgbe_free_transmit_buffers(txr);
630 #endif
631 	return (error);
632 } /* ixgbe_allocate_transmit_buffers */
633 
634 /************************************************************************
635  * ixgbe_setup_transmit_ring - Initialize a transmit ring.
636  ************************************************************************/
637 static void
638 ixgbe_setup_transmit_ring(struct tx_ring *txr)
639 {
640 	struct adapter        *adapter = txr->adapter;
641 	struct ixgbe_tx_buf   *txbuf;
642 #ifdef DEV_NETMAP
643 	struct netmap_adapter *na = NA(adapter->ifp);
644 	struct netmap_slot    *slot;
645 #endif /* DEV_NETMAP */
646 
647 	/* Clear the old ring contents */
648 	IXGBE_TX_LOCK(txr);
649 
650 #ifdef DEV_NETMAP
651 	if (adapter->feat_en & IXGBE_FEATURE_NETMAP) {
652 		/*
653 		 * (under lock): if in netmap mode, do some consistency
654 		 * checks and set slot to entry 0 of the netmap ring.
655 		 */
656 		slot = netmap_reset(na, NR_TX, txr->me, 0);
657 	}
658 #endif /* DEV_NETMAP */
659 
660 	bzero((void *)txr->tx_base,
661 	    (sizeof(union ixgbe_adv_tx_desc)) * adapter->num_tx_desc);
662 	/* Reset indices */
663 	txr->next_avail_desc = 0;
664 	txr->next_to_clean = 0;
665 
666 	/* Free any existing tx buffers. */
667 	txbuf = txr->tx_buffers;
668 	for (int i = 0; i < txr->num_desc; i++, txbuf++) {
669 		if (txbuf->m_head != NULL) {
670 			bus_dmamap_sync(txr->txtag->dt_dmat, txbuf->map,
671 			    0, txbuf->m_head->m_pkthdr.len,
672 			    BUS_DMASYNC_POSTWRITE);
673 			ixgbe_dmamap_unload(txr->txtag, txbuf->map);
674 			m_freem(txbuf->m_head);
675 			txbuf->m_head = NULL;
676 		}
677 
678 #ifdef DEV_NETMAP
679 		/*
680 		 * In netmap mode, set the map for the packet buffer.
681 		 * NOTE: Some drivers (not this one) also need to set
682 		 * the physical buffer address in the NIC ring.
683 		 * Slots in the netmap ring (indexed by "si") are
684 		 * kring->nkr_hwofs positions "ahead" wrt the
685 		 * corresponding slot in the NIC ring. In some drivers
686 		 * (not here) nkr_hwofs can be negative. Function
687 		 * netmap_idx_n2k() handles wraparounds properly.
688 		 */
689 		if ((adapter->feat_en & IXGBE_FEATURE_NETMAP) && slot) {
690 			int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
691 			netmap_load_map(na, txr->txtag,
692 			    txbuf->map, NMB(na, slot + si));
693 		}
694 #endif /* DEV_NETMAP */
695 
696 		/* Clear the EOP descriptor pointer */
697 		txbuf->eop = NULL;
698 	}
699 
700 	/* Set the rate at which we sample packets */
701 	if (adapter->feat_en & IXGBE_FEATURE_FDIR)
702 		txr->atr_sample = atr_sample_rate;
703 
704 	/* Set number of descriptors available */
705 	txr->tx_avail = adapter->num_tx_desc;
706 
707 	ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
708 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
709 	IXGBE_TX_UNLOCK(txr);
710 } /* ixgbe_setup_transmit_ring */
711 
712 /************************************************************************
713  * ixgbe_setup_transmit_structures - Initialize all transmit rings.
714  ************************************************************************/
715 int
716 ixgbe_setup_transmit_structures(struct adapter *adapter)
717 {
718 	struct tx_ring *txr = adapter->tx_rings;
719 
720 	for (int i = 0; i < adapter->num_queues; i++, txr++)
721 		ixgbe_setup_transmit_ring(txr);
722 
723 	return (0);
724 } /* ixgbe_setup_transmit_structures */
725 
726 /************************************************************************
727  * ixgbe_free_transmit_structures - Free all transmit rings.
728  ************************************************************************/
729 void
730 ixgbe_free_transmit_structures(struct adapter *adapter)
731 {
732 	struct tx_ring *txr = adapter->tx_rings;
733 
734 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
735 		ixgbe_free_transmit_buffers(txr);
736 		ixgbe_dma_free(adapter, &txr->txdma);
737 		IXGBE_TX_LOCK_DESTROY(txr);
738 	}
739 	free(adapter->tx_rings, M_DEVBUF);
740 } /* ixgbe_free_transmit_structures */
741 
742 /************************************************************************
743  * ixgbe_free_transmit_buffers
744  *
745  *   Free transmit ring related data structures.
746  ************************************************************************/
747 static void
748 ixgbe_free_transmit_buffers(struct tx_ring *txr)
749 {
750 	struct adapter      *adapter = txr->adapter;
751 	struct ixgbe_tx_buf *tx_buffer;
752 	int                 i;
753 
754 	INIT_DEBUGOUT("ixgbe_free_transmit_buffers: begin");
755 
756 	if (txr->tx_buffers == NULL)
757 		return;
758 
759 	tx_buffer = txr->tx_buffers;
760 	for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
761 		if (tx_buffer->m_head != NULL) {
762 			bus_dmamap_sync(txr->txtag->dt_dmat, tx_buffer->map,
763 			    0, tx_buffer->m_head->m_pkthdr.len,
764 			    BUS_DMASYNC_POSTWRITE);
765 			ixgbe_dmamap_unload(txr->txtag, tx_buffer->map);
766 			m_freem(tx_buffer->m_head);
767 			tx_buffer->m_head = NULL;
768 			if (tx_buffer->map != NULL) {
769 				ixgbe_dmamap_destroy(txr->txtag,
770 				    tx_buffer->map);
771 				tx_buffer->map = NULL;
772 			}
773 		} else if (tx_buffer->map != NULL) {
774 			ixgbe_dmamap_unload(txr->txtag, tx_buffer->map);
775 			ixgbe_dmamap_destroy(txr->txtag, tx_buffer->map);
776 			tx_buffer->map = NULL;
777 		}
778 	}
779 	if (txr->txr_interq != NULL) {
780 		struct mbuf *m;
781 
782 		while ((m = pcq_get(txr->txr_interq)) != NULL)
783 			m_freem(m);
784 		pcq_destroy(txr->txr_interq);
785 	}
786 	if (txr->tx_buffers != NULL) {
787 		free(txr->tx_buffers, M_DEVBUF);
788 		txr->tx_buffers = NULL;
789 	}
790 	if (txr->txtag != NULL) {
791 		ixgbe_dma_tag_destroy(txr->txtag);
792 		txr->txtag = NULL;
793 	}
794 } /* ixgbe_free_transmit_buffers */
795 
796 /************************************************************************
797  * ixgbe_tx_ctx_setup
798  *
799  *   Advanced Context Descriptor setup for VLAN, CSUM or TSO
800  ************************************************************************/
801 static int
802 ixgbe_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp,
803     u32 *cmd_type_len, u32 *olinfo_status)
804 {
805 	struct adapter                   *adapter = txr->adapter;
806 	struct ixgbe_adv_tx_context_desc *TXD;
807 	struct ether_vlan_header         *eh;
808 #ifdef INET
809 	struct ip                        *ip;
810 #endif
811 #ifdef INET6
812 	struct ip6_hdr                   *ip6;
813 #endif
814 	int                              ehdrlen, ip_hlen = 0;
815 	int                              offload = TRUE;
816 	int                              ctxd = txr->next_avail_desc;
817 	u32                              vlan_macip_lens = 0;
818 	u32                              type_tucmd_mlhl = 0;
819 	u16                              vtag = 0;
820 	u16                              etype;
821 	u8                               ipproto = 0;
822 	char                             *l3d;
823 
824 
825 	/* First check if TSO is to be used */
826 	if (mp->m_pkthdr.csum_flags & (M_CSUM_TSOv4 | M_CSUM_TSOv6)) {
827 		int rv = ixgbe_tso_setup(txr, mp, cmd_type_len, olinfo_status);
828 
829 		if (rv != 0)
830 			++adapter->tso_err.ev_count;
831 		return rv;
832 	}
833 
834 	if ((mp->m_pkthdr.csum_flags & M_CSUM_OFFLOAD) == 0)
835 		offload = FALSE;
836 
837 	/* Indicate the whole packet as payload when not doing TSO */
838 	*olinfo_status |= mp->m_pkthdr.len << IXGBE_ADVTXD_PAYLEN_SHIFT;
839 
840 	/* Now ready a context descriptor */
841 	TXD = (struct ixgbe_adv_tx_context_desc *)&txr->tx_base[ctxd];
842 
843 	/*
844 	 * In advanced descriptors the vlan tag must
845 	 * be placed into the context descriptor. Hence
846 	 * we need to make one even if not doing offloads.
847 	 */
848 	if (vlan_has_tag(mp)) {
849 		vtag = htole16(vlan_get_tag(mp));
850 		vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
851 	} else if (!(txr->adapter->feat_en & IXGBE_FEATURE_NEEDS_CTXD) &&
852 	           (offload == FALSE))
853 		return (0);
854 
855 	/*
856 	 * Determine where frame payload starts.
857 	 * Jump over vlan headers if already present,
858 	 * helpful for QinQ too.
859 	 */
860 	KASSERT(mp->m_len >= offsetof(struct ether_vlan_header, evl_tag));
861 	eh = mtod(mp, struct ether_vlan_header *);
862 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
863 		KASSERT(mp->m_len >= sizeof(struct ether_vlan_header));
864 		etype = ntohs(eh->evl_proto);
865 		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
866 	} else {
867 		etype = ntohs(eh->evl_encap_proto);
868 		ehdrlen = ETHER_HDR_LEN;
869 	}
870 
871 	/* Set the ether header length */
872 	vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
873 
874 	if (offload == FALSE)
875 		goto no_offloads;
876 
877 	/*
878 	 * If the first mbuf only includes the ethernet header,
879 	 * jump to the next one
880 	 * XXX: This assumes the stack splits mbufs containing headers
881 	 *      on header boundaries
882 	 * XXX: And assumes the entire IP header is contained in one mbuf
883 	 */
884 	if (mp->m_len == ehdrlen && mp->m_next)
885 		l3d = mtod(mp->m_next, char *);
886 	else
887 		l3d = mtod(mp, char *) + ehdrlen;
888 
889 	switch (etype) {
890 #ifdef INET
891 	case ETHERTYPE_IP:
892 		ip = (struct ip *)(l3d);
893 		ip_hlen = ip->ip_hl << 2;
894 		ipproto = ip->ip_p;
895 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
896 		KASSERT((mp->m_pkthdr.csum_flags & M_CSUM_IPv4) == 0 ||
897 		    ip->ip_sum == 0);
898 		break;
899 #endif
900 #ifdef INET6
901 	case ETHERTYPE_IPV6:
902 		ip6 = (struct ip6_hdr *)(l3d);
903 		ip_hlen = sizeof(struct ip6_hdr);
904 		ipproto = ip6->ip6_nxt;
905 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
906 		break;
907 #endif
908 	default:
909 		offload = false;
910 		break;
911 	}
912 
913 	if ((mp->m_pkthdr.csum_flags & M_CSUM_IPv4) != 0)
914 		*olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
915 
916 	vlan_macip_lens |= ip_hlen;
917 
918 	/* No support for offloads for non-L4 next headers */
919  	switch (ipproto) {
920 	case IPPROTO_TCP:
921 		if (mp->m_pkthdr.csum_flags &
922 		    (M_CSUM_TCPv4 | M_CSUM_TCPv6))
923 			type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
924 		else
925 			offload = false;
926 		break;
927 	case IPPROTO_UDP:
928 		if (mp->m_pkthdr.csum_flags &
929 		    (M_CSUM_UDPv4 | M_CSUM_UDPv6))
930 			type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP;
931 		else
932 			offload = false;
933 		break;
934 	default:
935 		offload = false;
936 		break;
937 	}
938 
939 	if (offload) /* Insert L4 checksum into data descriptors */
940 		*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
941 
942 no_offloads:
943 	type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
944 
945 	/* Now copy bits into descriptor */
946 	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
947 	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
948 	TXD->seqnum_seed = htole32(0);
949 	TXD->mss_l4len_idx = htole32(0);
950 
951 	/* We've consumed the first desc, adjust counters */
952 	if (++ctxd == txr->num_desc)
953 		ctxd = 0;
954 	txr->next_avail_desc = ctxd;
955 	--txr->tx_avail;
956 
957 	return (0);
958 } /* ixgbe_tx_ctx_setup */
959 
960 /************************************************************************
961  * ixgbe_tso_setup
962  *
963  *   Setup work for hardware segmentation offload (TSO) on
964  *   adapters using advanced tx descriptors
965  ************************************************************************/
966 static int
967 ixgbe_tso_setup(struct tx_ring *txr, struct mbuf *mp, u32 *cmd_type_len,
968     u32 *olinfo_status)
969 {
970 	struct ixgbe_adv_tx_context_desc *TXD;
971 	struct ether_vlan_header         *eh;
972 #ifdef INET6
973 	struct ip6_hdr                   *ip6;
974 #endif
975 #ifdef INET
976 	struct ip                        *ip;
977 #endif
978 	struct tcphdr                    *th;
979 	int                              ctxd, ehdrlen, ip_hlen, tcp_hlen;
980 	u32                              vlan_macip_lens = 0;
981 	u32                              type_tucmd_mlhl = 0;
982 	u32                              mss_l4len_idx = 0, paylen;
983 	u16                              vtag = 0, eh_type;
984 
985 	/*
986 	 * Determine where frame payload starts.
987 	 * Jump over vlan headers if already present
988 	 */
989 	eh = mtod(mp, struct ether_vlan_header *);
990 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
991 		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
992 		eh_type = eh->evl_proto;
993 	} else {
994 		ehdrlen = ETHER_HDR_LEN;
995 		eh_type = eh->evl_encap_proto;
996 	}
997 
998 	switch (ntohs(eh_type)) {
999 #ifdef INET
1000 	case ETHERTYPE_IP:
1001 		ip = (struct ip *)(mp->m_data + ehdrlen);
1002 		if (ip->ip_p != IPPROTO_TCP)
1003 			return (ENXIO);
1004 		ip->ip_sum = 0;
1005 		ip_hlen = ip->ip_hl << 2;
1006 		th = (struct tcphdr *)((char *)ip + ip_hlen);
1007 		th->th_sum = in_cksum_phdr(ip->ip_src.s_addr,
1008 		    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
1009 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
1010 		/* Tell transmit desc to also do IPv4 checksum. */
1011 		*olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
1012 		break;
1013 #endif
1014 #ifdef INET6
1015 	case ETHERTYPE_IPV6:
1016 		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
1017 		/* XXX-BZ For now we do not pretend to support ext. hdrs. */
1018 		if (ip6->ip6_nxt != IPPROTO_TCP)
1019 			return (ENXIO);
1020 		ip_hlen = sizeof(struct ip6_hdr);
1021 		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
1022 		th = (struct tcphdr *)((char *)ip6 + ip_hlen);
1023 		th->th_sum = in6_cksum_phdr(&ip6->ip6_src,
1024 		    &ip6->ip6_dst, 0, htonl(IPPROTO_TCP));
1025 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
1026 		break;
1027 #endif
1028 	default:
1029 		panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
1030 		    __func__, ntohs(eh_type));
1031 		break;
1032 	}
1033 
1034 	ctxd = txr->next_avail_desc;
1035 	TXD = (struct ixgbe_adv_tx_context_desc *)&txr->tx_base[ctxd];
1036 
1037 	tcp_hlen = th->th_off << 2;
1038 
1039 	/* This is used in the transmit desc in encap */
1040 	paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
1041 
1042 	/* VLAN MACLEN IPLEN */
1043 	if (vlan_has_tag(mp)) {
1044 		vtag = htole16(vlan_get_tag(mp));
1045 		vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
1046 	}
1047 
1048 	vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
1049 	vlan_macip_lens |= ip_hlen;
1050 	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
1051 
1052 	/* ADV DTYPE TUCMD */
1053 	type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
1054 	type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
1055 	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
1056 
1057 	/* MSS L4LEN IDX */
1058 	mss_l4len_idx |= (mp->m_pkthdr.segsz << IXGBE_ADVTXD_MSS_SHIFT);
1059 	mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT);
1060 	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
1061 
1062 	TXD->seqnum_seed = htole32(0);
1063 
1064 	if (++ctxd == txr->num_desc)
1065 		ctxd = 0;
1066 
1067 	txr->tx_avail--;
1068 	txr->next_avail_desc = ctxd;
1069 	*cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
1070 	*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
1071 	*olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT;
1072 	++txr->tso_tx.ev_count;
1073 
1074 	return (0);
1075 } /* ixgbe_tso_setup */
1076 
1077 
1078 /************************************************************************
1079  * ixgbe_txeof
1080  *
1081  *   Examine each tx_buffer in the used queue. If the hardware is done
1082  *   processing the packet then free associated resources. The
1083  *   tx_buffer is put back on the free queue.
1084  ************************************************************************/
1085 bool
1086 ixgbe_txeof(struct tx_ring *txr)
1087 {
1088 	struct adapter		*adapter = txr->adapter;
1089 	struct ifnet		*ifp = adapter->ifp;
1090 	struct ixgbe_tx_buf	*buf;
1091 	union ixgbe_adv_tx_desc *txd;
1092 	u32			work, processed = 0;
1093 	u32			limit = adapter->tx_process_limit;
1094 
1095 	KASSERT(mutex_owned(&txr->tx_mtx));
1096 
1097 #ifdef DEV_NETMAP
1098 	if ((adapter->feat_en & IXGBE_FEATURE_NETMAP) &&
1099 	    (adapter->ifp->if_capenable & IFCAP_NETMAP)) {
1100 		struct netmap_adapter *na = NA(adapter->ifp);
1101 		struct netmap_kring *kring = &na->tx_rings[txr->me];
1102 		txd = txr->tx_base;
1103 		bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1104 		    BUS_DMASYNC_POSTREAD);
1105 		/*
1106 		 * In netmap mode, all the work is done in the context
1107 		 * of the client thread. Interrupt handlers only wake up
1108 		 * clients, which may be sleeping on individual rings
1109 		 * or on a global resource for all rings.
1110 		 * To implement tx interrupt mitigation, we wake up the client
1111 		 * thread roughly every half ring, even if the NIC interrupts
1112 		 * more frequently. This is implemented as follows:
1113 		 * - ixgbe_txsync() sets kring->nr_kflags with the index of
1114 		 *   the slot that should wake up the thread (nkr_num_slots
1115 		 *   means the user thread should not be woken up);
1116 		 * - the driver ignores tx interrupts unless netmap_mitigate=0
1117 		 *   or the slot has the DD bit set.
1118 		 */
1119 		if (!netmap_mitigate ||
1120 		    (kring->nr_kflags < kring->nkr_num_slots &&
1121 		     txd[kring->nr_kflags].wb.status & IXGBE_TXD_STAT_DD)) {
1122 			netmap_tx_irq(ifp, txr->me);
1123 		}
1124 		return false;
1125 	}
1126 #endif /* DEV_NETMAP */
1127 
1128 	if (txr->tx_avail == txr->num_desc) {
1129 		txr->busy = 0;
1130 		return false;
1131 	}
1132 
1133 	/* Get work starting point */
1134 	work = txr->next_to_clean;
1135 	buf = &txr->tx_buffers[work];
1136 	txd = &txr->tx_base[work];
1137 	work -= txr->num_desc; /* The distance to ring end */
1138 	ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1139 	    BUS_DMASYNC_POSTREAD);
1140 
1141 	do {
1142 		union ixgbe_adv_tx_desc *eop = buf->eop;
1143 		if (eop == NULL) /* No work */
1144 			break;
1145 
1146 		if ((eop->wb.status & IXGBE_TXD_STAT_DD) == 0)
1147 			break;	/* I/O not complete */
1148 
1149 		if (buf->m_head) {
1150 			txr->bytes += buf->m_head->m_pkthdr.len;
1151 			bus_dmamap_sync(txr->txtag->dt_dmat, buf->map,
1152 			    0, buf->m_head->m_pkthdr.len,
1153 			    BUS_DMASYNC_POSTWRITE);
1154 			ixgbe_dmamap_unload(txr->txtag, buf->map);
1155 			m_freem(buf->m_head);
1156 			buf->m_head = NULL;
1157 		}
1158 		buf->eop = NULL;
1159 		++txr->tx_avail;
1160 
1161 		/* We clean the range if multi segment */
1162 		while (txd != eop) {
1163 			++txd;
1164 			++buf;
1165 			++work;
1166 			/* wrap the ring? */
1167 			if (__predict_false(!work)) {
1168 				work -= txr->num_desc;
1169 				buf = txr->tx_buffers;
1170 				txd = txr->tx_base;
1171 			}
1172 			if (buf->m_head) {
1173 				txr->bytes +=
1174 				    buf->m_head->m_pkthdr.len;
1175 				bus_dmamap_sync(txr->txtag->dt_dmat,
1176 				    buf->map,
1177 				    0, buf->m_head->m_pkthdr.len,
1178 				    BUS_DMASYNC_POSTWRITE);
1179 				ixgbe_dmamap_unload(txr->txtag,
1180 				    buf->map);
1181 				m_freem(buf->m_head);
1182 				buf->m_head = NULL;
1183 			}
1184 			++txr->tx_avail;
1185 			buf->eop = NULL;
1186 
1187 		}
1188 		++txr->packets;
1189 		++processed;
1190 		++ifp->if_opackets;
1191 
1192 		/* Try the next packet */
1193 		++txd;
1194 		++buf;
1195 		++work;
1196 		/* reset with a wrap */
1197 		if (__predict_false(!work)) {
1198 			work -= txr->num_desc;
1199 			buf = txr->tx_buffers;
1200 			txd = txr->tx_base;
1201 		}
1202 		prefetch(txd);
1203 	} while (__predict_true(--limit));
1204 
1205 	ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1206 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1207 
1208 	work += txr->num_desc;
1209 	txr->next_to_clean = work;
1210 
1211 	/*
1212 	 * Queue Hang detection, we know there's
1213 	 * work outstanding or the first return
1214 	 * would have been taken, so increment busy
1215 	 * if nothing managed to get cleaned, then
1216 	 * in local_timer it will be checked and
1217 	 * marked as HUNG if it exceeds a MAX attempt.
1218 	 */
1219 	if ((processed == 0) && (txr->busy != IXGBE_QUEUE_HUNG))
1220 		++txr->busy;
1221 	/*
1222 	 * If anything gets cleaned we reset state to 1,
1223 	 * note this will turn off HUNG if its set.
1224 	 */
1225 	if (processed)
1226 		txr->busy = 1;
1227 
1228 	if (txr->tx_avail == txr->num_desc)
1229 		txr->busy = 0;
1230 
1231 	return ((limit > 0) ? false : true);
1232 } /* ixgbe_txeof */
1233 
1234 /************************************************************************
1235  * ixgbe_rsc_count
1236  *
1237  *   Used to detect a descriptor that has been merged by Hardware RSC.
1238  ************************************************************************/
1239 static inline u32
1240 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
1241 {
1242 	return (le32toh(rx->wb.lower.lo_dword.data) &
1243 	    IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
1244 } /* ixgbe_rsc_count */
1245 
1246 /************************************************************************
1247  * ixgbe_setup_hw_rsc
1248  *
1249  *   Initialize Hardware RSC (LRO) feature on 82599
1250  *   for an RX ring, this is toggled by the LRO capability
1251  *   even though it is transparent to the stack.
1252  *
1253  *   NOTE: Since this HW feature only works with IPv4 and
1254  *         testing has shown soft LRO to be as effective,
1255  *         this feature will be disabled by default.
1256  ************************************************************************/
1257 static void
1258 ixgbe_setup_hw_rsc(struct rx_ring *rxr)
1259 {
1260 	struct	adapter  *adapter = rxr->adapter;
1261 	struct	ixgbe_hw *hw = &adapter->hw;
1262 	u32              rscctrl, rdrxctl;
1263 
1264 	/* If turning LRO/RSC off we need to disable it */
1265 	if ((adapter->ifp->if_capenable & IFCAP_LRO) == 0) {
1266 		rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
1267 		rscctrl &= ~IXGBE_RSCCTL_RSCEN;
1268 		return;
1269 	}
1270 
1271 	rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
1272 	rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
1273 #ifdef DEV_NETMAP
1274 	/* Always strip CRC unless Netmap disabled it */
1275 	if (!(adapter->feat_en & IXGBE_FEATURE_NETMAP) ||
1276 	    !(adapter->ifp->if_capenable & IFCAP_NETMAP) ||
1277 	    ix_crcstrip)
1278 #endif /* DEV_NETMAP */
1279 		rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
1280 	rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
1281 	IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
1282 
1283 	rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
1284 	rscctrl |= IXGBE_RSCCTL_RSCEN;
1285 	/*
1286 	 * Limit the total number of descriptors that
1287 	 * can be combined, so it does not exceed 64K
1288 	 */
1289 	if (rxr->mbuf_sz == MCLBYTES)
1290 		rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
1291 	else if (rxr->mbuf_sz == MJUMPAGESIZE)
1292 		rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
1293 	else if (rxr->mbuf_sz == MJUM9BYTES)
1294 		rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
1295 	else  /* Using 16K cluster */
1296 		rscctrl |= IXGBE_RSCCTL_MAXDESC_1;
1297 
1298 	IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl);
1299 
1300 	/* Enable TCP header recognition */
1301 	IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0),
1302 	    (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) | IXGBE_PSRTYPE_TCPHDR));
1303 
1304 	/* Disable RSC for ACK packets */
1305 	IXGBE_WRITE_REG(hw, IXGBE_RSCDBU,
1306 	    (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU)));
1307 
1308 	rxr->hw_rsc = TRUE;
1309 } /* ixgbe_setup_hw_rsc */
1310 
1311 /************************************************************************
1312  * ixgbe_refresh_mbufs
1313  *
1314  *   Refresh mbuf buffers for RX descriptor rings
1315  *    - now keeps its own state so discards due to resource
1316  *      exhaustion are unnecessary, if an mbuf cannot be obtained
1317  *      it just returns, keeping its placeholder, thus it can simply
1318  *      be recalled to try again.
1319  ************************************************************************/
1320 static void
1321 ixgbe_refresh_mbufs(struct rx_ring *rxr, int limit)
1322 {
1323 	struct adapter      *adapter = rxr->adapter;
1324 	struct ixgbe_rx_buf *rxbuf;
1325 	struct mbuf         *mp;
1326 	int                 i, j, error;
1327 	bool                refreshed = false;
1328 
1329 	i = j = rxr->next_to_refresh;
1330 	/* Control the loop with one beyond */
1331 	if (++j == rxr->num_desc)
1332 		j = 0;
1333 
1334 	while (j != limit) {
1335 		rxbuf = &rxr->rx_buffers[i];
1336 		if (rxbuf->buf == NULL) {
1337 			mp = ixgbe_getjcl(&adapter->jcl_head, M_NOWAIT,
1338 			    MT_DATA, M_PKTHDR, rxr->mbuf_sz);
1339 			if (mp == NULL) {
1340 				rxr->no_jmbuf.ev_count++;
1341 				goto update;
1342 			}
1343 			if (adapter->max_frame_size <= (MCLBYTES - ETHER_ALIGN))
1344 				m_adj(mp, ETHER_ALIGN);
1345 		} else
1346 			mp = rxbuf->buf;
1347 
1348 		mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1349 
1350 		/* If we're dealing with an mbuf that was copied rather
1351 		 * than replaced, there's no need to go through busdma.
1352 		 */
1353 		if ((rxbuf->flags & IXGBE_RX_COPY) == 0) {
1354 			/* Get the memory mapping */
1355 			ixgbe_dmamap_unload(rxr->ptag, rxbuf->pmap);
1356 			error = bus_dmamap_load_mbuf(rxr->ptag->dt_dmat,
1357 			    rxbuf->pmap, mp, BUS_DMA_NOWAIT);
1358 			if (error != 0) {
1359 				printf("Refresh mbufs: payload dmamap load failure - %d\n", error);
1360 				m_free(mp);
1361 				rxbuf->buf = NULL;
1362 				goto update;
1363 			}
1364 			rxbuf->buf = mp;
1365 			bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
1366 			    0, mp->m_pkthdr.len, BUS_DMASYNC_PREREAD);
1367 			rxbuf->addr = rxr->rx_base[i].read.pkt_addr =
1368 			    htole64(rxbuf->pmap->dm_segs[0].ds_addr);
1369 		} else {
1370 			rxr->rx_base[i].read.pkt_addr = rxbuf->addr;
1371 			rxbuf->flags &= ~IXGBE_RX_COPY;
1372 		}
1373 
1374 		refreshed = true;
1375 		/* Next is precalculated */
1376 		i = j;
1377 		rxr->next_to_refresh = i;
1378 		if (++j == rxr->num_desc)
1379 			j = 0;
1380 	}
1381 
1382 update:
1383 	if (refreshed) /* Update hardware tail index */
1384 		IXGBE_WRITE_REG(&adapter->hw, rxr->tail, rxr->next_to_refresh);
1385 
1386 	return;
1387 } /* ixgbe_refresh_mbufs */
1388 
1389 /************************************************************************
1390  * ixgbe_allocate_receive_buffers
1391  *
1392  *   Allocate memory for rx_buffer structures. Since we use one
1393  *   rx_buffer per received packet, the maximum number of rx_buffer's
1394  *   that we'll need is equal to the number of receive descriptors
1395  *   that we've allocated.
1396  ************************************************************************/
1397 static int
1398 ixgbe_allocate_receive_buffers(struct rx_ring *rxr)
1399 {
1400 	struct	adapter     *adapter = rxr->adapter;
1401 	device_t            dev = adapter->dev;
1402 	struct ixgbe_rx_buf *rxbuf;
1403 	int                 bsize, error;
1404 
1405 	bsize = sizeof(struct ixgbe_rx_buf) * rxr->num_desc;
1406 	rxr->rx_buffers = (struct ixgbe_rx_buf *)malloc(bsize, M_DEVBUF,
1407 	    M_NOWAIT | M_ZERO);
1408 	if (rxr->rx_buffers == NULL) {
1409 		aprint_error_dev(dev, "Unable to allocate rx_buffer memory\n");
1410 		error = ENOMEM;
1411 		goto fail;
1412 	}
1413 
1414 	error = ixgbe_dma_tag_create(
1415 	         /*      parent */ adapter->osdep.dmat,
1416 	         /*   alignment */ 1,
1417 	         /*      bounds */ 0,
1418 	         /*     maxsize */ MJUM16BYTES,
1419 	         /*   nsegments */ 1,
1420 	         /*  maxsegsize */ MJUM16BYTES,
1421 	         /*       flags */ 0,
1422 	                           &rxr->ptag);
1423 	if (error != 0) {
1424 		aprint_error_dev(dev, "Unable to create RX DMA tag\n");
1425 		goto fail;
1426 	}
1427 
1428 	for (int i = 0; i < rxr->num_desc; i++, rxbuf++) {
1429 		rxbuf = &rxr->rx_buffers[i];
1430 		error = ixgbe_dmamap_create(rxr->ptag, 0, &rxbuf->pmap);
1431 		if (error) {
1432 			aprint_error_dev(dev, "Unable to create RX dma map\n");
1433 			goto fail;
1434 		}
1435 	}
1436 
1437 	return (0);
1438 
1439 fail:
1440 	/* Frees all, but can handle partial completion */
1441 	ixgbe_free_receive_structures(adapter);
1442 
1443 	return (error);
1444 } /* ixgbe_allocate_receive_buffers */
1445 
1446 /************************************************************************
1447  * ixgbe_free_receive_ring
1448  ************************************************************************/
1449 static void
1450 ixgbe_free_receive_ring(struct rx_ring *rxr)
1451 {
1452 	for (int i = 0; i < rxr->num_desc; i++) {
1453 		ixgbe_rx_discard(rxr, i);
1454 	}
1455 } /* ixgbe_free_receive_ring */
1456 
1457 /************************************************************************
1458  * ixgbe_setup_receive_ring
1459  *
1460  *   Initialize a receive ring and its buffers.
1461  ************************************************************************/
1462 static int
1463 ixgbe_setup_receive_ring(struct rx_ring *rxr)
1464 {
1465 	struct adapter        *adapter;
1466 	struct ixgbe_rx_buf   *rxbuf;
1467 #ifdef LRO
1468 	struct ifnet          *ifp;
1469 	struct lro_ctrl       *lro = &rxr->lro;
1470 #endif /* LRO */
1471 #ifdef DEV_NETMAP
1472 	struct netmap_adapter *na = NA(rxr->adapter->ifp);
1473 	struct netmap_slot    *slot;
1474 #endif /* DEV_NETMAP */
1475 	int                   rsize, error = 0;
1476 
1477 	adapter = rxr->adapter;
1478 #ifdef LRO
1479 	ifp = adapter->ifp;
1480 #endif /* LRO */
1481 
1482 	/* Clear the ring contents */
1483 	IXGBE_RX_LOCK(rxr);
1484 
1485 #ifdef DEV_NETMAP
1486 	if (adapter->feat_en & IXGBE_FEATURE_NETMAP)
1487 		slot = netmap_reset(na, NR_RX, rxr->me, 0);
1488 #endif /* DEV_NETMAP */
1489 
1490 	rsize = roundup2(adapter->num_rx_desc *
1491 	    sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
1492 	bzero((void *)rxr->rx_base, rsize);
1493 	/* Cache the size */
1494 	rxr->mbuf_sz = adapter->rx_mbuf_sz;
1495 
1496 	/* Free current RX buffer structs and their mbufs */
1497 	ixgbe_free_receive_ring(rxr);
1498 
1499 	/* Now replenish the mbufs */
1500 	for (int j = 0; j != rxr->num_desc; ++j) {
1501 		struct mbuf *mp;
1502 
1503 		rxbuf = &rxr->rx_buffers[j];
1504 
1505 #ifdef DEV_NETMAP
1506 		/*
1507 		 * In netmap mode, fill the map and set the buffer
1508 		 * address in the NIC ring, considering the offset
1509 		 * between the netmap and NIC rings (see comment in
1510 		 * ixgbe_setup_transmit_ring() ). No need to allocate
1511 		 * an mbuf, so end the block with a continue;
1512 		 */
1513 		if ((adapter->feat_en & IXGBE_FEATURE_NETMAP) && slot) {
1514 			int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
1515 			uint64_t paddr;
1516 			void *addr;
1517 
1518 			addr = PNMB(na, slot + sj, &paddr);
1519 			netmap_load_map(na, rxr->ptag, rxbuf->pmap, addr);
1520 			/* Update descriptor and the cached value */
1521 			rxr->rx_base[j].read.pkt_addr = htole64(paddr);
1522 			rxbuf->addr = htole64(paddr);
1523 			continue;
1524 		}
1525 #endif /* DEV_NETMAP */
1526 
1527 		rxbuf->flags = 0;
1528 		rxbuf->buf = ixgbe_getjcl(&adapter->jcl_head, M_NOWAIT,
1529 		    MT_DATA, M_PKTHDR, adapter->rx_mbuf_sz);
1530 		if (rxbuf->buf == NULL) {
1531 			error = ENOBUFS;
1532 			goto fail;
1533 		}
1534 		mp = rxbuf->buf;
1535 		mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1536 		/* Get the memory mapping */
1537 		error = bus_dmamap_load_mbuf(rxr->ptag->dt_dmat, rxbuf->pmap,
1538 		    mp, BUS_DMA_NOWAIT);
1539 		if (error != 0)
1540                         goto fail;
1541 		bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
1542 		    0, adapter->rx_mbuf_sz, BUS_DMASYNC_PREREAD);
1543 		/* Update the descriptor and the cached value */
1544 		rxr->rx_base[j].read.pkt_addr =
1545 		    htole64(rxbuf->pmap->dm_segs[0].ds_addr);
1546 		rxbuf->addr = htole64(rxbuf->pmap->dm_segs[0].ds_addr);
1547 	}
1548 
1549 
1550 	/* Setup our descriptor indices */
1551 	rxr->next_to_check = 0;
1552 	rxr->next_to_refresh = 0;
1553 	rxr->lro_enabled = FALSE;
1554 	rxr->rx_copies.ev_count = 0;
1555 #if 0 /* NetBSD */
1556 	rxr->rx_bytes.ev_count = 0;
1557 #if 1	/* Fix inconsistency */
1558 	rxr->rx_packets.ev_count = 0;
1559 #endif
1560 #endif
1561 	rxr->vtag_strip = FALSE;
1562 
1563 	ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1564 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1565 
1566 	/*
1567 	 * Now set up the LRO interface
1568 	 */
1569 	if (ixgbe_rsc_enable)
1570 		ixgbe_setup_hw_rsc(rxr);
1571 #ifdef LRO
1572 	else if (ifp->if_capenable & IFCAP_LRO) {
1573 		device_t dev = adapter->dev;
1574 		int err = tcp_lro_init(lro);
1575 		if (err) {
1576 			device_printf(dev, "LRO Initialization failed!\n");
1577 			goto fail;
1578 		}
1579 		INIT_DEBUGOUT("RX Soft LRO Initialized\n");
1580 		rxr->lro_enabled = TRUE;
1581 		lro->ifp = adapter->ifp;
1582 	}
1583 #endif /* LRO */
1584 
1585 	IXGBE_RX_UNLOCK(rxr);
1586 
1587 	return (0);
1588 
1589 fail:
1590 	ixgbe_free_receive_ring(rxr);
1591 	IXGBE_RX_UNLOCK(rxr);
1592 
1593 	return (error);
1594 } /* ixgbe_setup_receive_ring */
1595 
1596 /************************************************************************
1597  * ixgbe_setup_receive_structures - Initialize all receive rings.
1598  ************************************************************************/
1599 int
1600 ixgbe_setup_receive_structures(struct adapter *adapter)
1601 {
1602 	struct rx_ring *rxr = adapter->rx_rings;
1603 	int            j;
1604 
1605 	/*
1606 	 * Now reinitialize our supply of jumbo mbufs.  The number
1607 	 * or size of jumbo mbufs may have changed.
1608 	 * Assume all of rxr->ptag are the same.
1609 	 */
1610 	ixgbe_jcl_reinit(adapter, rxr->ptag->dt_dmat,
1611 	    (2 * adapter->num_rx_desc) * adapter->num_queues,
1612 	    adapter->rx_mbuf_sz);
1613 
1614 	for (j = 0; j < adapter->num_queues; j++, rxr++)
1615 		if (ixgbe_setup_receive_ring(rxr))
1616 			goto fail;
1617 
1618 	return (0);
1619 fail:
1620 	/*
1621 	 * Free RX buffers allocated so far, we will only handle
1622 	 * the rings that completed, the failing case will have
1623 	 * cleaned up for itself. 'j' failed, so its the terminus.
1624 	 */
1625 	for (int i = 0; i < j; ++i) {
1626 		rxr = &adapter->rx_rings[i];
1627 		IXGBE_RX_LOCK(rxr);
1628 		ixgbe_free_receive_ring(rxr);
1629 		IXGBE_RX_UNLOCK(rxr);
1630 	}
1631 
1632 	return (ENOBUFS);
1633 } /* ixgbe_setup_receive_structures */
1634 
1635 
1636 /************************************************************************
1637  * ixgbe_free_receive_structures - Free all receive rings.
1638  ************************************************************************/
1639 void
1640 ixgbe_free_receive_structures(struct adapter *adapter)
1641 {
1642 	struct rx_ring *rxr = adapter->rx_rings;
1643 
1644 	INIT_DEBUGOUT("ixgbe_free_receive_structures: begin");
1645 
1646 	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
1647 		ixgbe_free_receive_buffers(rxr);
1648 #ifdef LRO
1649 		/* Free LRO memory */
1650 		tcp_lro_free(&rxr->lro);
1651 #endif /* LRO */
1652 		/* Free the ring memory as well */
1653 		ixgbe_dma_free(adapter, &rxr->rxdma);
1654 		IXGBE_RX_LOCK_DESTROY(rxr);
1655 	}
1656 
1657 	free(adapter->rx_rings, M_DEVBUF);
1658 } /* ixgbe_free_receive_structures */
1659 
1660 
1661 /************************************************************************
1662  * ixgbe_free_receive_buffers - Free receive ring data structures
1663  ************************************************************************/
1664 static void
1665 ixgbe_free_receive_buffers(struct rx_ring *rxr)
1666 {
1667 	struct adapter      *adapter = rxr->adapter;
1668 	struct ixgbe_rx_buf *rxbuf;
1669 
1670 	INIT_DEBUGOUT("ixgbe_free_receive_buffers: begin");
1671 
1672 	/* Cleanup any existing buffers */
1673 	if (rxr->rx_buffers != NULL) {
1674 		for (int i = 0; i < adapter->num_rx_desc; i++) {
1675 			rxbuf = &rxr->rx_buffers[i];
1676 			ixgbe_rx_discard(rxr, i);
1677 			if (rxbuf->pmap != NULL) {
1678 				ixgbe_dmamap_destroy(rxr->ptag, rxbuf->pmap);
1679 				rxbuf->pmap = NULL;
1680 			}
1681 		}
1682 		if (rxr->rx_buffers != NULL) {
1683 			free(rxr->rx_buffers, M_DEVBUF);
1684 			rxr->rx_buffers = NULL;
1685 		}
1686 	}
1687 
1688 	if (rxr->ptag != NULL) {
1689 		ixgbe_dma_tag_destroy(rxr->ptag);
1690 		rxr->ptag = NULL;
1691 	}
1692 
1693 	return;
1694 } /* ixgbe_free_receive_buffers */
1695 
1696 /************************************************************************
1697  * ixgbe_rx_input
1698  ************************************************************************/
1699 static __inline void
1700 ixgbe_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m,
1701     u32 ptype)
1702 {
1703 	struct adapter	*adapter = ifp->if_softc;
1704 
1705 #ifdef LRO
1706 	struct ethercom *ec = &adapter->osdep.ec;
1707 
1708 	/*
1709 	 * ATM LRO is only for IP/TCP packets and TCP checksum of the packet
1710 	 * should be computed by hardware. Also it should not have VLAN tag in
1711 	 * ethernet header.  In case of IPv6 we do not yet support ext. hdrs.
1712 	 */
1713         if (rxr->lro_enabled &&
1714             (ec->ec_capenable & ETHERCAP_VLAN_HWTAGGING) != 0 &&
1715             (ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
1716             ((ptype & (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
1717             (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP) ||
1718             (ptype & (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
1719             (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) &&
1720             (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
1721             (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
1722                 /*
1723                  * Send to the stack if:
1724                  **  - LRO not enabled, or
1725                  **  - no LRO resources, or
1726                  **  - lro enqueue fails
1727                  */
1728                 if (rxr->lro.lro_cnt != 0)
1729                         if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
1730                                 return;
1731         }
1732 #endif /* LRO */
1733 
1734 	if_percpuq_enqueue(adapter->ipq, m);
1735 } /* ixgbe_rx_input */
1736 
1737 /************************************************************************
1738  * ixgbe_rx_discard
1739  ************************************************************************/
1740 static __inline void
1741 ixgbe_rx_discard(struct rx_ring *rxr, int i)
1742 {
1743 	struct ixgbe_rx_buf *rbuf;
1744 
1745 	rbuf = &rxr->rx_buffers[i];
1746 
1747 	/*
1748 	 * With advanced descriptors the writeback
1749 	 * clobbers the buffer addrs, so its easier
1750 	 * to just free the existing mbufs and take
1751 	 * the normal refresh path to get new buffers
1752 	 * and mapping.
1753 	 */
1754 
1755 	if (rbuf->fmp != NULL) {/* Partial chain ? */
1756 		bus_dmamap_sync(rxr->ptag->dt_dmat, rbuf->pmap, 0,
1757 		    rbuf->buf->m_pkthdr.len, BUS_DMASYNC_POSTREAD);
1758 		m_freem(rbuf->fmp);
1759 		rbuf->fmp = NULL;
1760 		rbuf->buf = NULL; /* rbuf->buf is part of fmp's chain */
1761 	} else if (rbuf->buf) {
1762 		bus_dmamap_sync(rxr->ptag->dt_dmat, rbuf->pmap, 0,
1763 		    rbuf->buf->m_pkthdr.len, BUS_DMASYNC_POSTREAD);
1764 		m_free(rbuf->buf);
1765 		rbuf->buf = NULL;
1766 	}
1767 	ixgbe_dmamap_unload(rxr->ptag, rbuf->pmap);
1768 
1769 	rbuf->flags = 0;
1770 
1771 	return;
1772 } /* ixgbe_rx_discard */
1773 
1774 
1775 /************************************************************************
1776  * ixgbe_rxeof
1777  *
1778  *   Executes in interrupt context. It replenishes the
1779  *   mbufs in the descriptor and sends data which has
1780  *   been dma'ed into host memory to upper layer.
1781  *
1782  *   Return TRUE for more work, FALSE for all clean.
1783  ************************************************************************/
1784 bool
1785 ixgbe_rxeof(struct ix_queue *que)
1786 {
1787 	struct adapter		*adapter = que->adapter;
1788 	struct rx_ring		*rxr = que->rxr;
1789 	struct ifnet		*ifp = adapter->ifp;
1790 #ifdef LRO
1791 	struct lro_ctrl		*lro = &rxr->lro;
1792 #endif /* LRO */
1793 	union ixgbe_adv_rx_desc	*cur;
1794 	struct ixgbe_rx_buf	*rbuf, *nbuf;
1795 	int			i, nextp, processed = 0;
1796 	u32			staterr = 0;
1797 	u32			count = adapter->rx_process_limit;
1798 #ifdef RSS
1799 	u16			pkt_info;
1800 #endif
1801 
1802 	IXGBE_RX_LOCK(rxr);
1803 
1804 #ifdef DEV_NETMAP
1805 	if (adapter->feat_en & IXGBE_FEATURE_NETMAP) {
1806 		/* Same as the txeof routine: wakeup clients on intr. */
1807 		if (netmap_rx_irq(ifp, rxr->me, &processed)) {
1808 			IXGBE_RX_UNLOCK(rxr);
1809 			return (FALSE);
1810 		}
1811 	}
1812 #endif /* DEV_NETMAP */
1813 
1814 	for (i = rxr->next_to_check; count != 0;) {
1815 		struct mbuf *sendmp, *mp;
1816 		u32         rsc, ptype;
1817 		u16         len;
1818 		u16         vtag = 0;
1819 		bool        eop;
1820 
1821 		/* Sync the ring. */
1822 		ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1823 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
1824 
1825 		cur = &rxr->rx_base[i];
1826 		staterr = le32toh(cur->wb.upper.status_error);
1827 #ifdef RSS
1828 		pkt_info = le16toh(cur->wb.lower.lo_dword.hs_rss.pkt_info);
1829 #endif
1830 
1831 		if ((staterr & IXGBE_RXD_STAT_DD) == 0)
1832 			break;
1833 		if ((ifp->if_flags & IFF_RUNNING) == 0)
1834 			break;
1835 
1836 		count--;
1837 		sendmp = NULL;
1838 		nbuf = NULL;
1839 		rsc = 0;
1840 		cur->wb.upper.status_error = 0;
1841 		rbuf = &rxr->rx_buffers[i];
1842 		mp = rbuf->buf;
1843 
1844 		len = le16toh(cur->wb.upper.length);
1845 		ptype = le32toh(cur->wb.lower.lo_dword.data) &
1846 		    IXGBE_RXDADV_PKTTYPE_MASK;
1847 		eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0);
1848 
1849 		/* Make sure bad packets are discarded */
1850 		if (eop && (staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) != 0) {
1851 #if __FreeBSD_version >= 1100036
1852 			if (adapter->feat_en & IXGBE_FEATURE_VF)
1853 				if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
1854 #endif
1855 			rxr->rx_discarded.ev_count++;
1856 			ixgbe_rx_discard(rxr, i);
1857 			goto next_desc;
1858 		}
1859 
1860 		bus_dmamap_sync(rxr->ptag->dt_dmat, rbuf->pmap, 0,
1861 		    rbuf->buf->m_pkthdr.len, BUS_DMASYNC_POSTREAD);
1862 
1863 		/*
1864 		 * On 82599 which supports a hardware
1865 		 * LRO (called HW RSC), packets need
1866 		 * not be fragmented across sequential
1867 		 * descriptors, rather the next descriptor
1868 		 * is indicated in bits of the descriptor.
1869 		 * This also means that we might proceses
1870 		 * more than one packet at a time, something
1871 		 * that has never been true before, it
1872 		 * required eliminating global chain pointers
1873 		 * in favor of what we are doing here.  -jfv
1874 		 */
1875 		if (!eop) {
1876 			/*
1877 			 * Figure out the next descriptor
1878 			 * of this frame.
1879 			 */
1880 			if (rxr->hw_rsc == TRUE) {
1881 				rsc = ixgbe_rsc_count(cur);
1882 				rxr->rsc_num += (rsc - 1);
1883 			}
1884 			if (rsc) { /* Get hardware index */
1885 				nextp = ((staterr & IXGBE_RXDADV_NEXTP_MASK) >>
1886 				    IXGBE_RXDADV_NEXTP_SHIFT);
1887 			} else { /* Just sequential */
1888 				nextp = i + 1;
1889 				if (nextp == adapter->num_rx_desc)
1890 					nextp = 0;
1891 			}
1892 			nbuf = &rxr->rx_buffers[nextp];
1893 			prefetch(nbuf);
1894 		}
1895 		/*
1896 		 * Rather than using the fmp/lmp global pointers
1897 		 * we now keep the head of a packet chain in the
1898 		 * buffer struct and pass this along from one
1899 		 * descriptor to the next, until we get EOP.
1900 		 */
1901 		mp->m_len = len;
1902 		/*
1903 		 * See if there is a stored head
1904 		 * that determines what we are
1905 		 */
1906 		sendmp = rbuf->fmp;
1907 		if (sendmp != NULL) {  /* secondary frag */
1908 			rbuf->buf = rbuf->fmp = NULL;
1909 			mp->m_flags &= ~M_PKTHDR;
1910 			sendmp->m_pkthdr.len += mp->m_len;
1911 		} else {
1912 			/*
1913 			 * Optimize.  This might be a small packet,
1914 			 * maybe just a TCP ACK.  Do a fast copy that
1915 			 * is cache aligned into a new mbuf, and
1916 			 * leave the old mbuf+cluster for re-use.
1917 			 */
1918 			if (eop && len <= IXGBE_RX_COPY_LEN) {
1919 				sendmp = m_gethdr(M_NOWAIT, MT_DATA);
1920 				if (sendmp != NULL) {
1921 					sendmp->m_data += IXGBE_RX_COPY_ALIGN;
1922 					ixgbe_bcopy(mp->m_data, sendmp->m_data,
1923 					    len);
1924 					sendmp->m_len = len;
1925 					rxr->rx_copies.ev_count++;
1926 					rbuf->flags |= IXGBE_RX_COPY;
1927 				}
1928 			}
1929 			if (sendmp == NULL) {
1930 				rbuf->buf = rbuf->fmp = NULL;
1931 				sendmp = mp;
1932 			}
1933 
1934 			/* first desc of a non-ps chain */
1935 			sendmp->m_flags |= M_PKTHDR;
1936 			sendmp->m_pkthdr.len = mp->m_len;
1937 		}
1938 		++processed;
1939 
1940 		/* Pass the head pointer on */
1941 		if (eop == 0) {
1942 			nbuf->fmp = sendmp;
1943 			sendmp = NULL;
1944 			mp->m_next = nbuf->buf;
1945 		} else { /* Sending this frame */
1946 			m_set_rcvif(sendmp, ifp);
1947 			++rxr->packets;
1948 			rxr->rx_packets.ev_count++;
1949 			/* capture data for AIM */
1950 			rxr->bytes += sendmp->m_pkthdr.len;
1951 			rxr->rx_bytes.ev_count += sendmp->m_pkthdr.len;
1952 			/* Process vlan info */
1953 			if ((rxr->vtag_strip) && (staterr & IXGBE_RXD_STAT_VP))
1954 				vtag = le16toh(cur->wb.upper.vlan);
1955 			if (vtag) {
1956 				vlan_set_tag(sendmp, vtag);
1957 			}
1958 			if ((ifp->if_capenable & IFCAP_RXCSUM) != 0) {
1959 				ixgbe_rx_checksum(staterr, sendmp, ptype,
1960 				   &adapter->stats.pf);
1961 			}
1962 
1963 #if 0 /* FreeBSD */
1964 			/*
1965 			 * In case of multiqueue, we have RXCSUM.PCSD bit set
1966 			 * and never cleared. This means we have RSS hash
1967 			 * available to be used.
1968 			 */
1969 			if (adapter->num_queues > 1) {
1970 				sendmp->m_pkthdr.flowid =
1971 				    le32toh(cur->wb.lower.hi_dword.rss);
1972 				switch (pkt_info & IXGBE_RXDADV_RSSTYPE_MASK) {
1973 				    case IXGBE_RXDADV_RSSTYPE_IPV4:
1974 					M_HASHTYPE_SET(sendmp,
1975 					    M_HASHTYPE_RSS_IPV4);
1976 					break;
1977 				    case IXGBE_RXDADV_RSSTYPE_IPV4_TCP:
1978 					M_HASHTYPE_SET(sendmp,
1979 					    M_HASHTYPE_RSS_TCP_IPV4);
1980 					break;
1981 				    case IXGBE_RXDADV_RSSTYPE_IPV6:
1982 					M_HASHTYPE_SET(sendmp,
1983 					    M_HASHTYPE_RSS_IPV6);
1984 					break;
1985 				    case IXGBE_RXDADV_RSSTYPE_IPV6_TCP:
1986 					M_HASHTYPE_SET(sendmp,
1987 					    M_HASHTYPE_RSS_TCP_IPV6);
1988 					break;
1989 				    case IXGBE_RXDADV_RSSTYPE_IPV6_EX:
1990 					M_HASHTYPE_SET(sendmp,
1991 					    M_HASHTYPE_RSS_IPV6_EX);
1992 					break;
1993 				    case IXGBE_RXDADV_RSSTYPE_IPV6_TCP_EX:
1994 					M_HASHTYPE_SET(sendmp,
1995 					    M_HASHTYPE_RSS_TCP_IPV6_EX);
1996 					break;
1997 #if __FreeBSD_version > 1100000
1998 				    case IXGBE_RXDADV_RSSTYPE_IPV4_UDP:
1999 					M_HASHTYPE_SET(sendmp,
2000 					    M_HASHTYPE_RSS_UDP_IPV4);
2001 					break;
2002 				    case IXGBE_RXDADV_RSSTYPE_IPV6_UDP:
2003 					M_HASHTYPE_SET(sendmp,
2004 					    M_HASHTYPE_RSS_UDP_IPV6);
2005 					break;
2006 				    case IXGBE_RXDADV_RSSTYPE_IPV6_UDP_EX:
2007 					M_HASHTYPE_SET(sendmp,
2008 					    M_HASHTYPE_RSS_UDP_IPV6_EX);
2009 					break;
2010 #endif
2011 				    default:
2012 					M_HASHTYPE_SET(sendmp,
2013 					    M_HASHTYPE_OPAQUE_HASH);
2014 				}
2015 			} else {
2016 				sendmp->m_pkthdr.flowid = que->msix;
2017 				M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE);
2018 			}
2019 #endif
2020 		}
2021 next_desc:
2022 		ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
2023 		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2024 
2025 		/* Advance our pointers to the next descriptor. */
2026 		if (++i == rxr->num_desc)
2027 			i = 0;
2028 
2029 		/* Now send to the stack or do LRO */
2030 		if (sendmp != NULL) {
2031 			rxr->next_to_check = i;
2032 			IXGBE_RX_UNLOCK(rxr);
2033 			ixgbe_rx_input(rxr, ifp, sendmp, ptype);
2034 			IXGBE_RX_LOCK(rxr);
2035 			i = rxr->next_to_check;
2036 		}
2037 
2038 		/* Every 8 descriptors we go to refresh mbufs */
2039 		if (processed == 8) {
2040 			ixgbe_refresh_mbufs(rxr, i);
2041 			processed = 0;
2042 		}
2043 	}
2044 
2045 	/* Refresh any remaining buf structs */
2046 	if (ixgbe_rx_unrefreshed(rxr))
2047 		ixgbe_refresh_mbufs(rxr, i);
2048 
2049 	rxr->next_to_check = i;
2050 
2051 	IXGBE_RX_UNLOCK(rxr);
2052 
2053 #ifdef LRO
2054 	/*
2055 	 * Flush any outstanding LRO work
2056 	 */
2057 	tcp_lro_flush_all(lro);
2058 #endif /* LRO */
2059 
2060 	/*
2061 	 * Still have cleaning to do?
2062 	 */
2063 	if ((staterr & IXGBE_RXD_STAT_DD) != 0)
2064 		return (TRUE);
2065 
2066 	return (FALSE);
2067 } /* ixgbe_rxeof */
2068 
2069 
2070 /************************************************************************
2071  * ixgbe_rx_checksum
2072  *
2073  *   Verify that the hardware indicated that the checksum is valid.
2074  *   Inform the stack about the status of checksum so that stack
2075  *   doesn't spend time verifying the checksum.
2076  ************************************************************************/
2077 static void
2078 ixgbe_rx_checksum(u32 staterr, struct mbuf * mp, u32 ptype,
2079     struct ixgbe_hw_stats *stats)
2080 {
2081 	u16  status = (u16)staterr;
2082 	u8   errors = (u8)(staterr >> 24);
2083 #if 0
2084 	bool sctp = false;
2085 
2086 	if ((ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
2087 	    (ptype & IXGBE_RXDADV_PKTTYPE_SCTP) != 0)
2088 		sctp = true;
2089 #endif
2090 
2091 	/* IPv4 checksum */
2092 	if (status & IXGBE_RXD_STAT_IPCS) {
2093 		stats->ipcs.ev_count++;
2094 		if (!(errors & IXGBE_RXD_ERR_IPE)) {
2095 			/* IP Checksum Good */
2096 			mp->m_pkthdr.csum_flags = M_CSUM_IPv4;
2097 		} else {
2098 			stats->ipcs_bad.ev_count++;
2099 			mp->m_pkthdr.csum_flags = M_CSUM_IPv4|M_CSUM_IPv4_BAD;
2100 		}
2101 	}
2102 	/* TCP/UDP/SCTP checksum */
2103 	if (status & IXGBE_RXD_STAT_L4CS) {
2104 		stats->l4cs.ev_count++;
2105 		int type = M_CSUM_TCPv4|M_CSUM_TCPv6|M_CSUM_UDPv4|M_CSUM_UDPv6;
2106 		if (!(errors & IXGBE_RXD_ERR_TCPE)) {
2107 			mp->m_pkthdr.csum_flags |= type;
2108 		} else {
2109 			stats->l4cs_bad.ev_count++;
2110 			mp->m_pkthdr.csum_flags |= type | M_CSUM_TCP_UDP_BAD;
2111 		}
2112 	}
2113 } /* ixgbe_rx_checksum */
2114 
2115 /************************************************************************
2116  * ixgbe_dma_malloc
2117  ************************************************************************/
2118 int
2119 ixgbe_dma_malloc(struct adapter *adapter, const bus_size_t size,
2120 		struct ixgbe_dma_alloc *dma, const int mapflags)
2121 {
2122 	device_t dev = adapter->dev;
2123 	int      r, rsegs;
2124 
2125 	r = ixgbe_dma_tag_create(
2126 	     /*      parent */ adapter->osdep.dmat,
2127 	     /*   alignment */ DBA_ALIGN,
2128 	     /*      bounds */ 0,
2129 	     /*     maxsize */ size,
2130 	     /*   nsegments */ 1,
2131 	     /*  maxsegsize */ size,
2132 	     /*       flags */ BUS_DMA_ALLOCNOW,
2133 			       &dma->dma_tag);
2134 	if (r != 0) {
2135 		aprint_error_dev(dev,
2136 		    "%s: ixgbe_dma_tag_create failed; error %d\n", __func__, r);
2137 		goto fail_0;
2138 	}
2139 
2140 	r = bus_dmamem_alloc(dma->dma_tag->dt_dmat, size,
2141 	    dma->dma_tag->dt_alignment, dma->dma_tag->dt_boundary,
2142 	    &dma->dma_seg, 1, &rsegs, BUS_DMA_NOWAIT);
2143 	if (r != 0) {
2144 		aprint_error_dev(dev,
2145 		    "%s: bus_dmamem_alloc failed; error %d\n", __func__, r);
2146 		goto fail_1;
2147 	}
2148 
2149 	r = bus_dmamem_map(dma->dma_tag->dt_dmat, &dma->dma_seg, rsegs,
2150 	    size, &dma->dma_vaddr, BUS_DMA_NOWAIT);
2151 	if (r != 0) {
2152 		aprint_error_dev(dev, "%s: bus_dmamem_map failed; error %d\n",
2153 		    __func__, r);
2154 		goto fail_2;
2155 	}
2156 
2157 	r = ixgbe_dmamap_create(dma->dma_tag, 0, &dma->dma_map);
2158 	if (r != 0) {
2159 		aprint_error_dev(dev, "%s: bus_dmamem_map failed; error %d\n",
2160 		    __func__, r);
2161 		goto fail_3;
2162 	}
2163 
2164 	r = bus_dmamap_load(dma->dma_tag->dt_dmat, dma->dma_map,
2165 	    dma->dma_vaddr, size, NULL, mapflags | BUS_DMA_NOWAIT);
2166 	if (r != 0) {
2167 		aprint_error_dev(dev, "%s: bus_dmamap_load failed; error %d\n",
2168 		    __func__, r);
2169 		goto fail_4;
2170 	}
2171 	dma->dma_paddr = dma->dma_map->dm_segs[0].ds_addr;
2172 	dma->dma_size = size;
2173 	return 0;
2174 fail_4:
2175 	ixgbe_dmamap_destroy(dma->dma_tag, dma->dma_map);
2176 fail_3:
2177 	bus_dmamem_unmap(dma->dma_tag->dt_dmat, dma->dma_vaddr, size);
2178 fail_2:
2179 	bus_dmamem_free(dma->dma_tag->dt_dmat, &dma->dma_seg, rsegs);
2180 fail_1:
2181 	ixgbe_dma_tag_destroy(dma->dma_tag);
2182 fail_0:
2183 
2184 	return (r);
2185 } /* ixgbe_dma_malloc */
2186 
2187 /************************************************************************
2188  * ixgbe_dma_free
2189  ************************************************************************/
2190 void
2191 ixgbe_dma_free(struct adapter *adapter, struct ixgbe_dma_alloc *dma)
2192 {
2193 	bus_dmamap_sync(dma->dma_tag->dt_dmat, dma->dma_map, 0, dma->dma_size,
2194 	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2195 	ixgbe_dmamap_unload(dma->dma_tag, dma->dma_map);
2196 	bus_dmamem_free(dma->dma_tag->dt_dmat, &dma->dma_seg, 1);
2197 	ixgbe_dma_tag_destroy(dma->dma_tag);
2198 } /* ixgbe_dma_free */
2199 
2200 
2201 /************************************************************************
2202  * ixgbe_allocate_queues
2203  *
2204  *   Allocate memory for the transmit and receive rings, and then
2205  *   the descriptors associated with each, called only once at attach.
2206  ************************************************************************/
2207 int
2208 ixgbe_allocate_queues(struct adapter *adapter)
2209 {
2210 	device_t	dev = adapter->dev;
2211 	struct ix_queue	*que;
2212 	struct tx_ring	*txr;
2213 	struct rx_ring	*rxr;
2214 	int             rsize, tsize, error = IXGBE_SUCCESS;
2215 	int             txconf = 0, rxconf = 0;
2216 
2217 	/* First, allocate the top level queue structs */
2218 	adapter->queues = (struct ix_queue *)malloc(sizeof(struct ix_queue) *
2219             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO);
2220         if (adapter->queues == NULL) {
2221 		aprint_error_dev(dev, "Unable to allocate queue memory\n");
2222                 error = ENOMEM;
2223                 goto fail;
2224         }
2225 
2226 	/* Second, allocate the TX ring struct memory */
2227 	adapter->tx_rings = (struct tx_ring *)malloc(sizeof(struct tx_ring) *
2228 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO);
2229 	if (adapter->tx_rings == NULL) {
2230 		aprint_error_dev(dev, "Unable to allocate TX ring memory\n");
2231 		error = ENOMEM;
2232 		goto tx_fail;
2233 	}
2234 
2235 	/* Third, allocate the RX ring */
2236 	adapter->rx_rings = (struct rx_ring *)malloc(sizeof(struct rx_ring) *
2237 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO);
2238 	if (adapter->rx_rings == NULL) {
2239 		aprint_error_dev(dev, "Unable to allocate RX ring memory\n");
2240 		error = ENOMEM;
2241 		goto rx_fail;
2242 	}
2243 
2244 	/* For the ring itself */
2245 	tsize = roundup2(adapter->num_tx_desc * sizeof(union ixgbe_adv_tx_desc),
2246 	    DBA_ALIGN);
2247 
2248 	/*
2249 	 * Now set up the TX queues, txconf is needed to handle the
2250 	 * possibility that things fail midcourse and we need to
2251 	 * undo memory gracefully
2252 	 */
2253 	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
2254 		/* Set up some basics */
2255 		txr = &adapter->tx_rings[i];
2256 		txr->adapter = adapter;
2257 		txr->txr_interq = NULL;
2258 		/* In case SR-IOV is enabled, align the index properly */
2259 #ifdef PCI_IOV
2260 		txr->me = ixgbe_vf_que_index(adapter->iov_mode, adapter->pool,
2261 		    i);
2262 #else
2263 		txr->me = i;
2264 #endif
2265 		txr->num_desc = adapter->num_tx_desc;
2266 
2267 		/* Initialize the TX side lock */
2268 		mutex_init(&txr->tx_mtx, MUTEX_DEFAULT, IPL_NET);
2269 
2270 		if (ixgbe_dma_malloc(adapter, tsize, &txr->txdma,
2271 		    BUS_DMA_NOWAIT)) {
2272 			aprint_error_dev(dev,
2273 			    "Unable to allocate TX Descriptor memory\n");
2274 			error = ENOMEM;
2275 			goto err_tx_desc;
2276 		}
2277 		txr->tx_base = (union ixgbe_adv_tx_desc *)txr->txdma.dma_vaddr;
2278 		bzero((void *)txr->tx_base, tsize);
2279 
2280 		/* Now allocate transmit buffers for the ring */
2281 		if (ixgbe_allocate_transmit_buffers(txr)) {
2282 			aprint_error_dev(dev,
2283 			    "Critical Failure setting up transmit buffers\n");
2284 			error = ENOMEM;
2285 			goto err_tx_desc;
2286         	}
2287 		if (!(adapter->feat_en & IXGBE_FEATURE_LEGACY_TX)) {
2288 			/* Allocate a buf ring */
2289 			txr->txr_interq = pcq_create(IXGBE_BR_SIZE, KM_SLEEP);
2290 			if (txr->txr_interq == NULL) {
2291 				aprint_error_dev(dev,
2292 				    "Critical Failure setting up buf ring\n");
2293 				error = ENOMEM;
2294 				goto err_tx_desc;
2295 			}
2296 		}
2297 	}
2298 
2299 	/*
2300 	 * Next the RX queues...
2301 	 */
2302 	rsize = roundup2(adapter->num_rx_desc * sizeof(union ixgbe_adv_rx_desc),
2303 	    DBA_ALIGN);
2304 	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
2305 		rxr = &adapter->rx_rings[i];
2306 		/* Set up some basics */
2307 		rxr->adapter = adapter;
2308 #ifdef PCI_IOV
2309 		/* In case SR-IOV is enabled, align the index properly */
2310 		rxr->me = ixgbe_vf_que_index(adapter->iov_mode, adapter->pool,
2311 		    i);
2312 #else
2313 		rxr->me = i;
2314 #endif
2315 		rxr->num_desc = adapter->num_rx_desc;
2316 
2317 		/* Initialize the RX side lock */
2318 		mutex_init(&rxr->rx_mtx, MUTEX_DEFAULT, IPL_NET);
2319 
2320 		if (ixgbe_dma_malloc(adapter, rsize, &rxr->rxdma,
2321 		    BUS_DMA_NOWAIT)) {
2322 			aprint_error_dev(dev,
2323 			    "Unable to allocate RxDescriptor memory\n");
2324 			error = ENOMEM;
2325 			goto err_rx_desc;
2326 		}
2327 		rxr->rx_base = (union ixgbe_adv_rx_desc *)rxr->rxdma.dma_vaddr;
2328 		bzero((void *)rxr->rx_base, rsize);
2329 
2330 		/* Allocate receive buffers for the ring */
2331 		if (ixgbe_allocate_receive_buffers(rxr)) {
2332 			aprint_error_dev(dev,
2333 			    "Critical Failure setting up receive buffers\n");
2334 			error = ENOMEM;
2335 			goto err_rx_desc;
2336 		}
2337 	}
2338 
2339 	/*
2340 	 * Finally set up the queue holding structs
2341 	 */
2342 	for (int i = 0; i < adapter->num_queues; i++) {
2343 		que = &adapter->queues[i];
2344 		que->adapter = adapter;
2345 		que->me = i;
2346 		que->txr = &adapter->tx_rings[i];
2347 		que->rxr = &adapter->rx_rings[i];
2348 
2349 		mutex_init(&que->dc_mtx, MUTEX_DEFAULT, IPL_NET);
2350 		que->disabled_count = 0;
2351 	}
2352 
2353 	return (0);
2354 
2355 err_rx_desc:
2356 	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
2357 		ixgbe_dma_free(adapter, &rxr->rxdma);
2358 err_tx_desc:
2359 	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
2360 		ixgbe_dma_free(adapter, &txr->txdma);
2361 	free(adapter->rx_rings, M_DEVBUF);
2362 rx_fail:
2363 	free(adapter->tx_rings, M_DEVBUF);
2364 tx_fail:
2365 	free(adapter->queues, M_DEVBUF);
2366 fail:
2367 	return (error);
2368 } /* ixgbe_allocate_queues */
2369