xref: /netbsd-src/sys/dev/pci/ixgbe/ix_txrx.c (revision bdc22b2e01993381dcefeff2bc9b56ca75a4235c)
1 /* $NetBSD: ix_txrx.c,v 1.48 2018/06/26 06:48:01 msaitoh Exp $ */
2 
3 /******************************************************************************
4 
5   Copyright (c) 2001-2017, Intel Corporation
6   All rights reserved.
7 
8   Redistribution and use in source and binary forms, with or without
9   modification, are permitted provided that the following conditions are met:
10 
11    1. Redistributions of source code must retain the above copyright notice,
12       this list of conditions and the following disclaimer.
13 
14    2. Redistributions in binary form must reproduce the above copyright
15       notice, this list of conditions and the following disclaimer in the
16       documentation and/or other materials provided with the distribution.
17 
18    3. Neither the name of the Intel Corporation nor the names of its
19       contributors may be used to endorse or promote products derived from
20       this software without specific prior written permission.
21 
22   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
23   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
26   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
27   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
28   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
29   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
30   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
31   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
32   POSSIBILITY OF SUCH DAMAGE.
33 
34 ******************************************************************************/
35 /*$FreeBSD: head/sys/dev/ixgbe/ix_txrx.c 327031 2017-12-20 18:15:06Z erj $*/
36 
37 /*
38  * Copyright (c) 2011 The NetBSD Foundation, Inc.
39  * All rights reserved.
40  *
41  * This code is derived from software contributed to The NetBSD Foundation
42  * by Coyote Point Systems, Inc.
43  *
44  * Redistribution and use in source and binary forms, with or without
45  * modification, are permitted provided that the following conditions
46  * are met:
47  * 1. Redistributions of source code must retain the above copyright
48  *    notice, this list of conditions and the following disclaimer.
49  * 2. Redistributions in binary form must reproduce the above copyright
50  *    notice, this list of conditions and the following disclaimer in the
51  *    documentation and/or other materials provided with the distribution.
52  *
53  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
54  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
55  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
56  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
57  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
58  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
59  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
60  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
61  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
62  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
63  * POSSIBILITY OF SUCH DAMAGE.
64  */
65 
66 #include "opt_inet.h"
67 #include "opt_inet6.h"
68 
69 #include "ixgbe.h"
70 
71 /*
72  * HW RSC control:
73  *  this feature only works with
74  *  IPv4, and only on 82599 and later.
75  *  Also this will cause IP forwarding to
76  *  fail and that can't be controlled by
77  *  the stack as LRO can. For all these
78  *  reasons I've deemed it best to leave
79  *  this off and not bother with a tuneable
80  *  interface, this would need to be compiled
81  *  to enable.
82  */
83 static bool ixgbe_rsc_enable = FALSE;
84 
85 /*
86  * For Flow Director: this is the
87  * number of TX packets we sample
88  * for the filter pool, this means
89  * every 20th packet will be probed.
90  *
91  * This feature can be disabled by
92  * setting this to 0.
93  */
94 static int atr_sample_rate = 20;
95 
96 /************************************************************************
97  *  Local Function prototypes
98  ************************************************************************/
99 static void          ixgbe_setup_transmit_ring(struct tx_ring *);
100 static void          ixgbe_free_transmit_buffers(struct tx_ring *);
101 static int           ixgbe_setup_receive_ring(struct rx_ring *);
102 static void          ixgbe_free_receive_buffers(struct rx_ring *);
103 static void          ixgbe_rx_checksum(u32, struct mbuf *, u32,
104                                        struct ixgbe_hw_stats *);
105 static void          ixgbe_refresh_mbufs(struct rx_ring *, int);
106 static void          ixgbe_drain(struct ifnet *, struct tx_ring *);
107 static int           ixgbe_xmit(struct tx_ring *, struct mbuf *);
108 static int           ixgbe_tx_ctx_setup(struct tx_ring *,
109                                         struct mbuf *, u32 *, u32 *);
110 static int           ixgbe_tso_setup(struct tx_ring *,
111                                      struct mbuf *, u32 *, u32 *);
112 static __inline void ixgbe_rx_discard(struct rx_ring *, int);
113 static __inline void ixgbe_rx_input(struct rx_ring *, struct ifnet *,
114                                     struct mbuf *, u32);
115 static int           ixgbe_dma_malloc(struct adapter *, bus_size_t,
116                                       struct ixgbe_dma_alloc *, int);
117 static void          ixgbe_dma_free(struct adapter *, struct ixgbe_dma_alloc *);
118 
119 static void	ixgbe_setup_hw_rsc(struct rx_ring *);
120 
121 /************************************************************************
122  * ixgbe_legacy_start_locked - Transmit entry point
123  *
124  *   Called by the stack to initiate a transmit.
125  *   The driver will remain in this routine as long as there are
126  *   packets to transmit and transmit resources are available.
127  *   In case resources are not available, the stack is notified
128  *   and the packet is requeued.
129  ************************************************************************/
130 int
131 ixgbe_legacy_start_locked(struct ifnet *ifp, struct tx_ring *txr)
132 {
133 	int rc;
134 	struct mbuf    *m_head;
135 	struct adapter *adapter = txr->adapter;
136 
137 	IXGBE_TX_LOCK_ASSERT(txr);
138 
139 	if (!adapter->link_active) {
140 		/*
141 		 * discard all packets buffered in IFQ to avoid
142 		 * sending old packets at next link up timing.
143 		 */
144 		ixgbe_drain(ifp, txr);
145 		return (ENETDOWN);
146 	}
147 	if ((ifp->if_flags & IFF_RUNNING) == 0)
148 		return (ENETDOWN);
149 	if (txr->txr_no_space)
150 		return (ENETDOWN);
151 
152 	while (!IFQ_IS_EMPTY(&ifp->if_snd)) {
153 		if (txr->tx_avail <= IXGBE_QUEUE_MIN_FREE)
154 			break;
155 
156 		IFQ_POLL(&ifp->if_snd, m_head);
157 		if (m_head == NULL)
158 			break;
159 
160 		if ((rc = ixgbe_xmit(txr, m_head)) == EAGAIN) {
161 			break;
162 		}
163 		IFQ_DEQUEUE(&ifp->if_snd, m_head);
164 		if (rc != 0) {
165 			m_freem(m_head);
166 			continue;
167 		}
168 
169 		/* Send a copy of the frame to the BPF listener */
170 		bpf_mtap(ifp, m_head, BPF_D_OUT);
171 	}
172 
173 	return IXGBE_SUCCESS;
174 } /* ixgbe_legacy_start_locked */
175 
176 /************************************************************************
177  * ixgbe_legacy_start
178  *
179  *   Called by the stack, this always uses the first tx ring,
180  *   and should not be used with multiqueue tx enabled.
181  ************************************************************************/
182 void
183 ixgbe_legacy_start(struct ifnet *ifp)
184 {
185 	struct adapter *adapter = ifp->if_softc;
186 	struct tx_ring *txr = adapter->tx_rings;
187 
188 	if (ifp->if_flags & IFF_RUNNING) {
189 		IXGBE_TX_LOCK(txr);
190 		ixgbe_legacy_start_locked(ifp, txr);
191 		IXGBE_TX_UNLOCK(txr);
192 	}
193 } /* ixgbe_legacy_start */
194 
195 /************************************************************************
196  * ixgbe_mq_start - Multiqueue Transmit Entry Point
197  *
198  *   (if_transmit function)
199  ************************************************************************/
200 int
201 ixgbe_mq_start(struct ifnet *ifp, struct mbuf *m)
202 {
203 	struct adapter	*adapter = ifp->if_softc;
204 	struct tx_ring	*txr;
205 	int 		i, err = 0;
206 #ifdef RSS
207 	uint32_t bucket_id;
208 #endif
209 
210 	/*
211 	 * When doing RSS, map it to the same outbound queue
212 	 * as the incoming flow would be mapped to.
213 	 *
214 	 * If everything is setup correctly, it should be the
215 	 * same bucket that the current CPU we're on is.
216 	 */
217 #ifdef RSS
218 	if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
219 		if ((adapter->feat_en & IXGBE_FEATURE_RSS) &&
220 		    (rss_hash2bucket(m->m_pkthdr.flowid, M_HASHTYPE_GET(m),
221 		    &bucket_id) == 0)) {
222 			i = bucket_id % adapter->num_queues;
223 #ifdef IXGBE_DEBUG
224 			if (bucket_id > adapter->num_queues)
225 				if_printf(ifp,
226 				    "bucket_id (%d) > num_queues (%d)\n",
227 				    bucket_id, adapter->num_queues);
228 #endif
229 		} else
230 			i = m->m_pkthdr.flowid % adapter->num_queues;
231 	} else
232 #endif /* 0 */
233 		i = cpu_index(curcpu()) % adapter->num_queues;
234 
235 	/* Check for a hung queue and pick alternative */
236 	if (((1 << i) & adapter->active_queues) == 0)
237 		i = ffs64(adapter->active_queues);
238 
239 	txr = &adapter->tx_rings[i];
240 
241 	err = pcq_put(txr->txr_interq, m);
242 	if (err == false) {
243 		m_freem(m);
244 		txr->pcq_drops.ev_count++;
245 		return (err);
246 	}
247 	if (IXGBE_TX_TRYLOCK(txr)) {
248 		ixgbe_mq_start_locked(ifp, txr);
249 		IXGBE_TX_UNLOCK(txr);
250 	} else {
251 		if (adapter->txrx_use_workqueue) {
252 			u_int *enqueued;
253 
254 			/*
255 			 * This function itself is not called in interrupt
256 			 * context, however it can be called in fast softint
257 			 * context right after receiving forwarding packets.
258 			 * So, it is required to protect workqueue from twice
259 			 * enqueuing when the machine uses both spontaneous
260 			 * packets and forwarding packets.
261 			 */
262 			enqueued = percpu_getref(adapter->txr_wq_enqueued);
263 			if (*enqueued == 0) {
264 				*enqueued = 1;
265 				percpu_putref(adapter->txr_wq_enqueued);
266 				workqueue_enqueue(adapter->txr_wq,
267 				    &txr->wq_cookie, curcpu());
268 			} else
269 				percpu_putref(adapter->txr_wq_enqueued);
270 		} else
271 			softint_schedule(txr->txr_si);
272 	}
273 
274 	return (0);
275 } /* ixgbe_mq_start */
276 
277 /************************************************************************
278  * ixgbe_mq_start_locked
279  ************************************************************************/
280 int
281 ixgbe_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
282 {
283 	struct mbuf    *next;
284 	int            enqueued = 0, err = 0;
285 
286 	if (!txr->adapter->link_active) {
287 		/*
288 		 * discard all packets buffered in txr_interq to avoid
289 		 * sending old packets at next link up timing.
290 		 */
291 		ixgbe_drain(ifp, txr);
292 		return (ENETDOWN);
293 	}
294 	if ((ifp->if_flags & IFF_RUNNING) == 0)
295 		return (ENETDOWN);
296 	if (txr->txr_no_space)
297 		return (ENETDOWN);
298 
299 	/* Process the queue */
300 	while ((next = pcq_get(txr->txr_interq)) != NULL) {
301 		if ((err = ixgbe_xmit(txr, next)) != 0) {
302 			m_freem(next);
303 			/* All errors are counted in ixgbe_xmit() */
304 			break;
305 		}
306 		enqueued++;
307 #if __FreeBSD_version >= 1100036
308 		/*
309 		 * Since we're looking at the tx ring, we can check
310 		 * to see if we're a VF by examing our tail register
311 		 * address.
312 		 */
313 		if ((txr->adapter->feat_en & IXGBE_FEATURE_VF) &&
314 		    (next->m_flags & M_MCAST))
315 			if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
316 #endif
317 		/* Send a copy of the frame to the BPF listener */
318 		bpf_mtap(ifp, next, BPF_D_OUT);
319 		if ((ifp->if_flags & IFF_RUNNING) == 0)
320 			break;
321 	}
322 
323 	if (txr->tx_avail < IXGBE_TX_CLEANUP_THRESHOLD(txr->adapter))
324 		ixgbe_txeof(txr);
325 
326 	return (err);
327 } /* ixgbe_mq_start_locked */
328 
329 /************************************************************************
330  * ixgbe_deferred_mq_start
331  *
332  *   Called from a softint and workqueue (indirectly) to drain queued
333  *   transmit packets.
334  ************************************************************************/
335 void
336 ixgbe_deferred_mq_start(void *arg)
337 {
338 	struct tx_ring *txr = arg;
339 	struct adapter *adapter = txr->adapter;
340 	struct ifnet   *ifp = adapter->ifp;
341 
342 	IXGBE_TX_LOCK(txr);
343 	if (pcq_peek(txr->txr_interq) != NULL)
344 		ixgbe_mq_start_locked(ifp, txr);
345 	IXGBE_TX_UNLOCK(txr);
346 } /* ixgbe_deferred_mq_start */
347 
348 /************************************************************************
349  * ixgbe_deferred_mq_start_work
350  *
351  *   Called from a workqueue to drain queued transmit packets.
352  ************************************************************************/
353 void
354 ixgbe_deferred_mq_start_work(struct work *wk, void *arg)
355 {
356 	struct tx_ring *txr = container_of(wk, struct tx_ring, wq_cookie);
357 	struct adapter *adapter = txr->adapter;
358 	u_int *enqueued = percpu_getref(adapter->txr_wq_enqueued);
359 	*enqueued = 0;
360 	percpu_putref(adapter->txr_wq_enqueued);
361 
362 	ixgbe_deferred_mq_start(txr);
363 } /* ixgbe_deferred_mq_start */
364 
365 /************************************************************************
366  * ixgbe_drain_all
367  ************************************************************************/
368 void
369 ixgbe_drain_all(struct adapter *adapter)
370 {
371 	struct ifnet *ifp = adapter->ifp;
372 	struct ix_queue *que = adapter->queues;
373 
374 	for (int i = 0; i < adapter->num_queues; i++, que++) {
375 		struct tx_ring  *txr = que->txr;
376 
377 		IXGBE_TX_LOCK(txr);
378 		ixgbe_drain(ifp, txr);
379 		IXGBE_TX_UNLOCK(txr);
380 	}
381 }
382 
383 /************************************************************************
384  * ixgbe_xmit
385  *
386  *   Maps the mbufs to tx descriptors, allowing the
387  *   TX engine to transmit the packets.
388  *
389  *   Return 0 on success, positive on failure
390  ************************************************************************/
391 static int
392 ixgbe_xmit(struct tx_ring *txr, struct mbuf *m_head)
393 {
394 	struct adapter          *adapter = txr->adapter;
395 	struct ixgbe_tx_buf     *txbuf;
396 	union ixgbe_adv_tx_desc *txd = NULL;
397 	struct ifnet	        *ifp = adapter->ifp;
398 	int                     i, j, error;
399 	int                     first;
400 	u32                     olinfo_status = 0, cmd_type_len;
401 	bool                    remap = TRUE;
402 	bus_dmamap_t            map;
403 
404 	/* Basic descriptor defines */
405 	cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA |
406 	    IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT);
407 
408 	if (vlan_has_tag(m_head))
409 		cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE;
410 
411 	/*
412 	 * Important to capture the first descriptor
413 	 * used because it will contain the index of
414 	 * the one we tell the hardware to report back
415 	 */
416 	first = txr->next_avail_desc;
417 	txbuf = &txr->tx_buffers[first];
418 	map = txbuf->map;
419 
420 	/*
421 	 * Map the packet for DMA.
422 	 */
423 retry:
424 	error = bus_dmamap_load_mbuf(txr->txtag->dt_dmat, map, m_head,
425 	    BUS_DMA_NOWAIT);
426 
427 	if (__predict_false(error)) {
428 		struct mbuf *m;
429 
430 		switch (error) {
431 		case EAGAIN:
432 			txr->q_eagain_tx_dma_setup++;
433 			return EAGAIN;
434 		case ENOMEM:
435 			txr->q_enomem_tx_dma_setup++;
436 			return EAGAIN;
437 		case EFBIG:
438 			/* Try it again? - one try */
439 			if (remap == TRUE) {
440 				remap = FALSE;
441 				/*
442 				 * XXX: m_defrag will choke on
443 				 * non-MCLBYTES-sized clusters
444 				 */
445 				txr->q_efbig_tx_dma_setup++;
446 				m = m_defrag(m_head, M_NOWAIT);
447 				if (m == NULL) {
448 					txr->q_mbuf_defrag_failed++;
449 					return ENOBUFS;
450 				}
451 				m_head = m;
452 				goto retry;
453 			} else {
454 				txr->q_efbig2_tx_dma_setup++;
455 				return error;
456 			}
457 		case EINVAL:
458 			txr->q_einval_tx_dma_setup++;
459 			return error;
460 		default:
461 			txr->q_other_tx_dma_setup++;
462 			return error;
463 		}
464 	}
465 
466 	/* Make certain there are enough descriptors */
467 	if (txr->tx_avail < (map->dm_nsegs + 2)) {
468 		txr->txr_no_space = true;
469 		txr->no_desc_avail.ev_count++;
470 		ixgbe_dmamap_unload(txr->txtag, txbuf->map);
471 		return EAGAIN;
472 	}
473 
474 	/*
475 	 * Set up the appropriate offload context
476 	 * this will consume the first descriptor
477 	 */
478 	error = ixgbe_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status);
479 	if (__predict_false(error)) {
480 		return (error);
481 	}
482 
483 	/* Do the flow director magic */
484 	if ((adapter->feat_en & IXGBE_FEATURE_FDIR) &&
485 	    (txr->atr_sample) && (!adapter->fdir_reinit)) {
486 		++txr->atr_count;
487 		if (txr->atr_count >= atr_sample_rate) {
488 			ixgbe_atr(txr, m_head);
489 			txr->atr_count = 0;
490 		}
491 	}
492 
493 	olinfo_status |= IXGBE_ADVTXD_CC;
494 	i = txr->next_avail_desc;
495 	for (j = 0; j < map->dm_nsegs; j++) {
496 		bus_size_t seglen;
497 		bus_addr_t segaddr;
498 
499 		txbuf = &txr->tx_buffers[i];
500 		txd = &txr->tx_base[i];
501 		seglen = map->dm_segs[j].ds_len;
502 		segaddr = htole64(map->dm_segs[j].ds_addr);
503 
504 		txd->read.buffer_addr = segaddr;
505 		txd->read.cmd_type_len = htole32(cmd_type_len | seglen);
506 		txd->read.olinfo_status = htole32(olinfo_status);
507 
508 		if (++i == txr->num_desc)
509 			i = 0;
510 	}
511 
512 	txd->read.cmd_type_len |= htole32(IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS);
513 	txr->tx_avail -= map->dm_nsegs;
514 	txr->next_avail_desc = i;
515 
516 	txbuf->m_head = m_head;
517 	/*
518 	 * Here we swap the map so the last descriptor,
519 	 * which gets the completion interrupt has the
520 	 * real map, and the first descriptor gets the
521 	 * unused map from this descriptor.
522 	 */
523 	txr->tx_buffers[first].map = txbuf->map;
524 	txbuf->map = map;
525 	bus_dmamap_sync(txr->txtag->dt_dmat, map, 0, m_head->m_pkthdr.len,
526 	    BUS_DMASYNC_PREWRITE);
527 
528 	/* Set the EOP descriptor that will be marked done */
529 	txbuf = &txr->tx_buffers[first];
530 	txbuf->eop = txd;
531 
532 	ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
533 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
534 	/*
535 	 * Advance the Transmit Descriptor Tail (Tdt), this tells the
536 	 * hardware that this frame is available to transmit.
537 	 */
538 	++txr->total_packets.ev_count;
539 	IXGBE_WRITE_REG(&adapter->hw, txr->tail, i);
540 
541 	/*
542 	 * XXXX NOMPSAFE: ifp->if_data should be percpu.
543 	 */
544 	ifp->if_obytes += m_head->m_pkthdr.len;
545 	if (m_head->m_flags & M_MCAST)
546 		ifp->if_omcasts++;
547 
548 	/* Mark queue as having work */
549 	if (txr->busy == 0)
550 		txr->busy = 1;
551 
552 	return (0);
553 } /* ixgbe_xmit */
554 
555 /************************************************************************
556  * ixgbe_drain
557  ************************************************************************/
558 static void
559 ixgbe_drain(struct ifnet *ifp, struct tx_ring *txr)
560 {
561 	struct mbuf *m;
562 
563 	IXGBE_TX_LOCK_ASSERT(txr);
564 
565 	if (txr->me == 0) {
566 		while (!IFQ_IS_EMPTY(&ifp->if_snd)) {
567 			IFQ_DEQUEUE(&ifp->if_snd, m);
568 			m_freem(m);
569 			IF_DROP(&ifp->if_snd);
570 		}
571 	}
572 
573 	while ((m = pcq_get(txr->txr_interq)) != NULL) {
574 		m_freem(m);
575 		txr->pcq_drops.ev_count++;
576 	}
577 }
578 
579 /************************************************************************
580  * ixgbe_allocate_transmit_buffers
581  *
582  *   Allocate memory for tx_buffer structures. The tx_buffer stores all
583  *   the information needed to transmit a packet on the wire. This is
584  *   called only once at attach, setup is done every reset.
585  ************************************************************************/
586 static int
587 ixgbe_allocate_transmit_buffers(struct tx_ring *txr)
588 {
589 	struct adapter      *adapter = txr->adapter;
590 	device_t            dev = adapter->dev;
591 	struct ixgbe_tx_buf *txbuf;
592 	int                 error, i;
593 
594 	/*
595 	 * Setup DMA descriptor areas.
596 	 */
597 	error = ixgbe_dma_tag_create(
598 	         /*      parent */ adapter->osdep.dmat,
599 	         /*   alignment */ 1,
600 	         /*      bounds */ 0,
601 	         /*     maxsize */ IXGBE_TSO_SIZE,
602 	         /*   nsegments */ adapter->num_segs,
603 	         /*  maxsegsize */ PAGE_SIZE,
604 	         /*       flags */ 0,
605 	                           &txr->txtag);
606 	if (error != 0) {
607 		aprint_error_dev(dev,"Unable to allocate TX DMA tag\n");
608 		goto fail;
609 	}
610 
611 	txr->tx_buffers =
612 	    (struct ixgbe_tx_buf *) malloc(sizeof(struct ixgbe_tx_buf) *
613 	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
614 	if (txr->tx_buffers == NULL) {
615 		aprint_error_dev(dev, "Unable to allocate tx_buffer memory\n");
616 		error = ENOMEM;
617 		goto fail;
618 	}
619 
620 	/* Create the descriptor buffer dma maps */
621 	txbuf = txr->tx_buffers;
622 	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
623 		error = ixgbe_dmamap_create(txr->txtag, 0, &txbuf->map);
624 		if (error != 0) {
625 			aprint_error_dev(dev,
626 			    "Unable to create TX DMA map (%d)\n", error);
627 			goto fail;
628 		}
629 	}
630 
631 	return 0;
632 fail:
633 	/* We free all, it handles case where we are in the middle */
634 #if 0 /* XXX was FreeBSD */
635 	ixgbe_free_transmit_structures(adapter);
636 #else
637 	ixgbe_free_transmit_buffers(txr);
638 #endif
639 	return (error);
640 } /* ixgbe_allocate_transmit_buffers */
641 
642 /************************************************************************
643  * ixgbe_setup_transmit_ring - Initialize a transmit ring.
644  ************************************************************************/
645 static void
646 ixgbe_setup_transmit_ring(struct tx_ring *txr)
647 {
648 	struct adapter        *adapter = txr->adapter;
649 	struct ixgbe_tx_buf   *txbuf;
650 #ifdef DEV_NETMAP
651 	struct netmap_adapter *na = NA(adapter->ifp);
652 	struct netmap_slot    *slot;
653 #endif /* DEV_NETMAP */
654 
655 	/* Clear the old ring contents */
656 	IXGBE_TX_LOCK(txr);
657 
658 #ifdef DEV_NETMAP
659 	if (adapter->feat_en & IXGBE_FEATURE_NETMAP) {
660 		/*
661 		 * (under lock): if in netmap mode, do some consistency
662 		 * checks and set slot to entry 0 of the netmap ring.
663 		 */
664 		slot = netmap_reset(na, NR_TX, txr->me, 0);
665 	}
666 #endif /* DEV_NETMAP */
667 
668 	bzero((void *)txr->tx_base,
669 	    (sizeof(union ixgbe_adv_tx_desc)) * adapter->num_tx_desc);
670 	/* Reset indices */
671 	txr->next_avail_desc = 0;
672 	txr->next_to_clean = 0;
673 
674 	/* Free any existing tx buffers. */
675 	txbuf = txr->tx_buffers;
676 	for (int i = 0; i < txr->num_desc; i++, txbuf++) {
677 		if (txbuf->m_head != NULL) {
678 			bus_dmamap_sync(txr->txtag->dt_dmat, txbuf->map,
679 			    0, txbuf->m_head->m_pkthdr.len,
680 			    BUS_DMASYNC_POSTWRITE);
681 			ixgbe_dmamap_unload(txr->txtag, txbuf->map);
682 			m_freem(txbuf->m_head);
683 			txbuf->m_head = NULL;
684 		}
685 
686 #ifdef DEV_NETMAP
687 		/*
688 		 * In netmap mode, set the map for the packet buffer.
689 		 * NOTE: Some drivers (not this one) also need to set
690 		 * the physical buffer address in the NIC ring.
691 		 * Slots in the netmap ring (indexed by "si") are
692 		 * kring->nkr_hwofs positions "ahead" wrt the
693 		 * corresponding slot in the NIC ring. In some drivers
694 		 * (not here) nkr_hwofs can be negative. Function
695 		 * netmap_idx_n2k() handles wraparounds properly.
696 		 */
697 		if ((adapter->feat_en & IXGBE_FEATURE_NETMAP) && slot) {
698 			int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
699 			netmap_load_map(na, txr->txtag,
700 			    txbuf->map, NMB(na, slot + si));
701 		}
702 #endif /* DEV_NETMAP */
703 
704 		/* Clear the EOP descriptor pointer */
705 		txbuf->eop = NULL;
706 	}
707 
708 	/* Set the rate at which we sample packets */
709 	if (adapter->feat_en & IXGBE_FEATURE_FDIR)
710 		txr->atr_sample = atr_sample_rate;
711 
712 	/* Set number of descriptors available */
713 	txr->tx_avail = adapter->num_tx_desc;
714 
715 	ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
716 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
717 	IXGBE_TX_UNLOCK(txr);
718 } /* ixgbe_setup_transmit_ring */
719 
720 /************************************************************************
721  * ixgbe_setup_transmit_structures - Initialize all transmit rings.
722  ************************************************************************/
723 int
724 ixgbe_setup_transmit_structures(struct adapter *adapter)
725 {
726 	struct tx_ring *txr = adapter->tx_rings;
727 
728 	for (int i = 0; i < adapter->num_queues; i++, txr++)
729 		ixgbe_setup_transmit_ring(txr);
730 
731 	return (0);
732 } /* ixgbe_setup_transmit_structures */
733 
734 /************************************************************************
735  * ixgbe_free_transmit_structures - Free all transmit rings.
736  ************************************************************************/
737 void
738 ixgbe_free_transmit_structures(struct adapter *adapter)
739 {
740 	struct tx_ring *txr = adapter->tx_rings;
741 
742 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
743 		ixgbe_free_transmit_buffers(txr);
744 		ixgbe_dma_free(adapter, &txr->txdma);
745 		IXGBE_TX_LOCK_DESTROY(txr);
746 	}
747 	free(adapter->tx_rings, M_DEVBUF);
748 } /* ixgbe_free_transmit_structures */
749 
750 /************************************************************************
751  * ixgbe_free_transmit_buffers
752  *
753  *   Free transmit ring related data structures.
754  ************************************************************************/
755 static void
756 ixgbe_free_transmit_buffers(struct tx_ring *txr)
757 {
758 	struct adapter      *adapter = txr->adapter;
759 	struct ixgbe_tx_buf *tx_buffer;
760 	int                 i;
761 
762 	INIT_DEBUGOUT("ixgbe_free_transmit_buffers: begin");
763 
764 	if (txr->tx_buffers == NULL)
765 		return;
766 
767 	tx_buffer = txr->tx_buffers;
768 	for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
769 		if (tx_buffer->m_head != NULL) {
770 			bus_dmamap_sync(txr->txtag->dt_dmat, tx_buffer->map,
771 			    0, tx_buffer->m_head->m_pkthdr.len,
772 			    BUS_DMASYNC_POSTWRITE);
773 			ixgbe_dmamap_unload(txr->txtag, tx_buffer->map);
774 			m_freem(tx_buffer->m_head);
775 			tx_buffer->m_head = NULL;
776 			if (tx_buffer->map != NULL) {
777 				ixgbe_dmamap_destroy(txr->txtag,
778 				    tx_buffer->map);
779 				tx_buffer->map = NULL;
780 			}
781 		} else if (tx_buffer->map != NULL) {
782 			ixgbe_dmamap_unload(txr->txtag, tx_buffer->map);
783 			ixgbe_dmamap_destroy(txr->txtag, tx_buffer->map);
784 			tx_buffer->map = NULL;
785 		}
786 	}
787 	if (txr->txr_interq != NULL) {
788 		struct mbuf *m;
789 
790 		while ((m = pcq_get(txr->txr_interq)) != NULL)
791 			m_freem(m);
792 		pcq_destroy(txr->txr_interq);
793 	}
794 	if (txr->tx_buffers != NULL) {
795 		free(txr->tx_buffers, M_DEVBUF);
796 		txr->tx_buffers = NULL;
797 	}
798 	if (txr->txtag != NULL) {
799 		ixgbe_dma_tag_destroy(txr->txtag);
800 		txr->txtag = NULL;
801 	}
802 } /* ixgbe_free_transmit_buffers */
803 
804 /************************************************************************
805  * ixgbe_tx_ctx_setup
806  *
807  *   Advanced Context Descriptor setup for VLAN, CSUM or TSO
808  ************************************************************************/
809 static int
810 ixgbe_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp,
811     u32 *cmd_type_len, u32 *olinfo_status)
812 {
813 	struct adapter                   *adapter = txr->adapter;
814 	struct ixgbe_adv_tx_context_desc *TXD;
815 	struct ether_vlan_header         *eh;
816 #ifdef INET
817 	struct ip                        *ip;
818 #endif
819 #ifdef INET6
820 	struct ip6_hdr                   *ip6;
821 #endif
822 	int                              ehdrlen, ip_hlen = 0;
823 	int                              offload = TRUE;
824 	int                              ctxd = txr->next_avail_desc;
825 	u32                              vlan_macip_lens = 0;
826 	u32                              type_tucmd_mlhl = 0;
827 	u16                              vtag = 0;
828 	u16                              etype;
829 	u8                               ipproto = 0;
830 	char                             *l3d;
831 
832 
833 	/* First check if TSO is to be used */
834 	if (mp->m_pkthdr.csum_flags & (M_CSUM_TSOv4 | M_CSUM_TSOv6)) {
835 		int rv = ixgbe_tso_setup(txr, mp, cmd_type_len, olinfo_status);
836 
837 		if (rv != 0)
838 			++adapter->tso_err.ev_count;
839 		return rv;
840 	}
841 
842 	if ((mp->m_pkthdr.csum_flags & M_CSUM_OFFLOAD) == 0)
843 		offload = FALSE;
844 
845 	/* Indicate the whole packet as payload when not doing TSO */
846 	*olinfo_status |= mp->m_pkthdr.len << IXGBE_ADVTXD_PAYLEN_SHIFT;
847 
848 	/* Now ready a context descriptor */
849 	TXD = (struct ixgbe_adv_tx_context_desc *)&txr->tx_base[ctxd];
850 
851 	/*
852 	 * In advanced descriptors the vlan tag must
853 	 * be placed into the context descriptor. Hence
854 	 * we need to make one even if not doing offloads.
855 	 */
856 	if (vlan_has_tag(mp)) {
857 		vtag = htole16(vlan_get_tag(mp));
858 		vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
859 	} else if (!(txr->adapter->feat_en & IXGBE_FEATURE_NEEDS_CTXD) &&
860 	           (offload == FALSE))
861 		return (0);
862 
863 	/*
864 	 * Determine where frame payload starts.
865 	 * Jump over vlan headers if already present,
866 	 * helpful for QinQ too.
867 	 */
868 	KASSERT(mp->m_len >= offsetof(struct ether_vlan_header, evl_tag));
869 	eh = mtod(mp, struct ether_vlan_header *);
870 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
871 		KASSERT(mp->m_len >= sizeof(struct ether_vlan_header));
872 		etype = ntohs(eh->evl_proto);
873 		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
874 	} else {
875 		etype = ntohs(eh->evl_encap_proto);
876 		ehdrlen = ETHER_HDR_LEN;
877 	}
878 
879 	/* Set the ether header length */
880 	vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
881 
882 	if (offload == FALSE)
883 		goto no_offloads;
884 
885 	/*
886 	 * If the first mbuf only includes the ethernet header,
887 	 * jump to the next one
888 	 * XXX: This assumes the stack splits mbufs containing headers
889 	 *      on header boundaries
890 	 * XXX: And assumes the entire IP header is contained in one mbuf
891 	 */
892 	if (mp->m_len == ehdrlen && mp->m_next)
893 		l3d = mtod(mp->m_next, char *);
894 	else
895 		l3d = mtod(mp, char *) + ehdrlen;
896 
897 	switch (etype) {
898 #ifdef INET
899 	case ETHERTYPE_IP:
900 		ip = (struct ip *)(l3d);
901 		ip_hlen = ip->ip_hl << 2;
902 		ipproto = ip->ip_p;
903 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
904 		KASSERT((mp->m_pkthdr.csum_flags & M_CSUM_IPv4) == 0 ||
905 		    ip->ip_sum == 0);
906 		break;
907 #endif
908 #ifdef INET6
909 	case ETHERTYPE_IPV6:
910 		ip6 = (struct ip6_hdr *)(l3d);
911 		ip_hlen = sizeof(struct ip6_hdr);
912 		ipproto = ip6->ip6_nxt;
913 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
914 		break;
915 #endif
916 	default:
917 		offload = false;
918 		break;
919 	}
920 
921 	if ((mp->m_pkthdr.csum_flags & M_CSUM_IPv4) != 0)
922 		*olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
923 
924 	vlan_macip_lens |= ip_hlen;
925 
926 	/* No support for offloads for non-L4 next headers */
927  	switch (ipproto) {
928 	case IPPROTO_TCP:
929 		if (mp->m_pkthdr.csum_flags &
930 		    (M_CSUM_TCPv4 | M_CSUM_TCPv6))
931 			type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
932 		else
933 			offload = false;
934 		break;
935 	case IPPROTO_UDP:
936 		if (mp->m_pkthdr.csum_flags &
937 		    (M_CSUM_UDPv4 | M_CSUM_UDPv6))
938 			type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP;
939 		else
940 			offload = false;
941 		break;
942 	default:
943 		offload = false;
944 		break;
945 	}
946 
947 	if (offload) /* Insert L4 checksum into data descriptors */
948 		*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
949 
950 no_offloads:
951 	type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
952 
953 	/* Now copy bits into descriptor */
954 	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
955 	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
956 	TXD->seqnum_seed = htole32(0);
957 	TXD->mss_l4len_idx = htole32(0);
958 
959 	/* We've consumed the first desc, adjust counters */
960 	if (++ctxd == txr->num_desc)
961 		ctxd = 0;
962 	txr->next_avail_desc = ctxd;
963 	--txr->tx_avail;
964 
965 	return (0);
966 } /* ixgbe_tx_ctx_setup */
967 
968 /************************************************************************
969  * ixgbe_tso_setup
970  *
971  *   Setup work for hardware segmentation offload (TSO) on
972  *   adapters using advanced tx descriptors
973  ************************************************************************/
974 static int
975 ixgbe_tso_setup(struct tx_ring *txr, struct mbuf *mp, u32 *cmd_type_len,
976     u32 *olinfo_status)
977 {
978 	struct ixgbe_adv_tx_context_desc *TXD;
979 	struct ether_vlan_header         *eh;
980 #ifdef INET6
981 	struct ip6_hdr                   *ip6;
982 #endif
983 #ifdef INET
984 	struct ip                        *ip;
985 #endif
986 	struct tcphdr                    *th;
987 	int                              ctxd, ehdrlen, ip_hlen, tcp_hlen;
988 	u32                              vlan_macip_lens = 0;
989 	u32                              type_tucmd_mlhl = 0;
990 	u32                              mss_l4len_idx = 0, paylen;
991 	u16                              vtag = 0, eh_type;
992 
993 	/*
994 	 * Determine where frame payload starts.
995 	 * Jump over vlan headers if already present
996 	 */
997 	eh = mtod(mp, struct ether_vlan_header *);
998 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
999 		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
1000 		eh_type = eh->evl_proto;
1001 	} else {
1002 		ehdrlen = ETHER_HDR_LEN;
1003 		eh_type = eh->evl_encap_proto;
1004 	}
1005 
1006 	switch (ntohs(eh_type)) {
1007 #ifdef INET
1008 	case ETHERTYPE_IP:
1009 		ip = (struct ip *)(mp->m_data + ehdrlen);
1010 		if (ip->ip_p != IPPROTO_TCP)
1011 			return (ENXIO);
1012 		ip->ip_sum = 0;
1013 		ip_hlen = ip->ip_hl << 2;
1014 		th = (struct tcphdr *)((char *)ip + ip_hlen);
1015 		th->th_sum = in_cksum_phdr(ip->ip_src.s_addr,
1016 		    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
1017 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
1018 		/* Tell transmit desc to also do IPv4 checksum. */
1019 		*olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
1020 		break;
1021 #endif
1022 #ifdef INET6
1023 	case ETHERTYPE_IPV6:
1024 		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
1025 		/* XXX-BZ For now we do not pretend to support ext. hdrs. */
1026 		if (ip6->ip6_nxt != IPPROTO_TCP)
1027 			return (ENXIO);
1028 		ip_hlen = sizeof(struct ip6_hdr);
1029 		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
1030 		th = (struct tcphdr *)((char *)ip6 + ip_hlen);
1031 		th->th_sum = in6_cksum_phdr(&ip6->ip6_src,
1032 		    &ip6->ip6_dst, 0, htonl(IPPROTO_TCP));
1033 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
1034 		break;
1035 #endif
1036 	default:
1037 		panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
1038 		    __func__, ntohs(eh_type));
1039 		break;
1040 	}
1041 
1042 	ctxd = txr->next_avail_desc;
1043 	TXD = (struct ixgbe_adv_tx_context_desc *)&txr->tx_base[ctxd];
1044 
1045 	tcp_hlen = th->th_off << 2;
1046 
1047 	/* This is used in the transmit desc in encap */
1048 	paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
1049 
1050 	/* VLAN MACLEN IPLEN */
1051 	if (vlan_has_tag(mp)) {
1052 		vtag = htole16(vlan_get_tag(mp));
1053 		vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
1054 	}
1055 
1056 	vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
1057 	vlan_macip_lens |= ip_hlen;
1058 	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
1059 
1060 	/* ADV DTYPE TUCMD */
1061 	type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
1062 	type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
1063 	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
1064 
1065 	/* MSS L4LEN IDX */
1066 	mss_l4len_idx |= (mp->m_pkthdr.segsz << IXGBE_ADVTXD_MSS_SHIFT);
1067 	mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT);
1068 	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
1069 
1070 	TXD->seqnum_seed = htole32(0);
1071 
1072 	if (++ctxd == txr->num_desc)
1073 		ctxd = 0;
1074 
1075 	txr->tx_avail--;
1076 	txr->next_avail_desc = ctxd;
1077 	*cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
1078 	*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
1079 	*olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT;
1080 	++txr->tso_tx.ev_count;
1081 
1082 	return (0);
1083 } /* ixgbe_tso_setup */
1084 
1085 
1086 /************************************************************************
1087  * ixgbe_txeof
1088  *
1089  *   Examine each tx_buffer in the used queue. If the hardware is done
1090  *   processing the packet then free associated resources. The
1091  *   tx_buffer is put back on the free queue.
1092  ************************************************************************/
1093 bool
1094 ixgbe_txeof(struct tx_ring *txr)
1095 {
1096 	struct adapter		*adapter = txr->adapter;
1097 	struct ifnet		*ifp = adapter->ifp;
1098 	struct ixgbe_tx_buf	*buf;
1099 	union ixgbe_adv_tx_desc *txd;
1100 	u32			work, processed = 0;
1101 	u32			limit = adapter->tx_process_limit;
1102 
1103 	KASSERT(mutex_owned(&txr->tx_mtx));
1104 
1105 #ifdef DEV_NETMAP
1106 	if ((adapter->feat_en & IXGBE_FEATURE_NETMAP) &&
1107 	    (adapter->ifp->if_capenable & IFCAP_NETMAP)) {
1108 		struct netmap_adapter *na = NA(adapter->ifp);
1109 		struct netmap_kring *kring = &na->tx_rings[txr->me];
1110 		txd = txr->tx_base;
1111 		bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1112 		    BUS_DMASYNC_POSTREAD);
1113 		/*
1114 		 * In netmap mode, all the work is done in the context
1115 		 * of the client thread. Interrupt handlers only wake up
1116 		 * clients, which may be sleeping on individual rings
1117 		 * or on a global resource for all rings.
1118 		 * To implement tx interrupt mitigation, we wake up the client
1119 		 * thread roughly every half ring, even if the NIC interrupts
1120 		 * more frequently. This is implemented as follows:
1121 		 * - ixgbe_txsync() sets kring->nr_kflags with the index of
1122 		 *   the slot that should wake up the thread (nkr_num_slots
1123 		 *   means the user thread should not be woken up);
1124 		 * - the driver ignores tx interrupts unless netmap_mitigate=0
1125 		 *   or the slot has the DD bit set.
1126 		 */
1127 		if (!netmap_mitigate ||
1128 		    (kring->nr_kflags < kring->nkr_num_slots &&
1129 		     txd[kring->nr_kflags].wb.status & IXGBE_TXD_STAT_DD)) {
1130 			netmap_tx_irq(ifp, txr->me);
1131 		}
1132 		return false;
1133 	}
1134 #endif /* DEV_NETMAP */
1135 
1136 	if (txr->tx_avail == txr->num_desc) {
1137 		txr->busy = 0;
1138 		return false;
1139 	}
1140 
1141 	/* Get work starting point */
1142 	work = txr->next_to_clean;
1143 	buf = &txr->tx_buffers[work];
1144 	txd = &txr->tx_base[work];
1145 	work -= txr->num_desc; /* The distance to ring end */
1146 	ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1147 	    BUS_DMASYNC_POSTREAD);
1148 
1149 	do {
1150 		union ixgbe_adv_tx_desc *eop = buf->eop;
1151 		if (eop == NULL) /* No work */
1152 			break;
1153 
1154 		if ((eop->wb.status & IXGBE_TXD_STAT_DD) == 0)
1155 			break;	/* I/O not complete */
1156 
1157 		if (buf->m_head) {
1158 			txr->bytes += buf->m_head->m_pkthdr.len;
1159 			bus_dmamap_sync(txr->txtag->dt_dmat, buf->map,
1160 			    0, buf->m_head->m_pkthdr.len,
1161 			    BUS_DMASYNC_POSTWRITE);
1162 			ixgbe_dmamap_unload(txr->txtag, buf->map);
1163 			m_freem(buf->m_head);
1164 			buf->m_head = NULL;
1165 		}
1166 		buf->eop = NULL;
1167 		txr->txr_no_space = false;
1168 		++txr->tx_avail;
1169 
1170 		/* We clean the range if multi segment */
1171 		while (txd != eop) {
1172 			++txd;
1173 			++buf;
1174 			++work;
1175 			/* wrap the ring? */
1176 			if (__predict_false(!work)) {
1177 				work -= txr->num_desc;
1178 				buf = txr->tx_buffers;
1179 				txd = txr->tx_base;
1180 			}
1181 			if (buf->m_head) {
1182 				txr->bytes +=
1183 				    buf->m_head->m_pkthdr.len;
1184 				bus_dmamap_sync(txr->txtag->dt_dmat,
1185 				    buf->map,
1186 				    0, buf->m_head->m_pkthdr.len,
1187 				    BUS_DMASYNC_POSTWRITE);
1188 				ixgbe_dmamap_unload(txr->txtag,
1189 				    buf->map);
1190 				m_freem(buf->m_head);
1191 				buf->m_head = NULL;
1192 			}
1193 			++txr->tx_avail;
1194 			buf->eop = NULL;
1195 
1196 		}
1197 		++txr->packets;
1198 		++processed;
1199 		++ifp->if_opackets;
1200 
1201 		/* Try the next packet */
1202 		++txd;
1203 		++buf;
1204 		++work;
1205 		/* reset with a wrap */
1206 		if (__predict_false(!work)) {
1207 			work -= txr->num_desc;
1208 			buf = txr->tx_buffers;
1209 			txd = txr->tx_base;
1210 		}
1211 		prefetch(txd);
1212 	} while (__predict_true(--limit));
1213 
1214 	ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1215 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1216 
1217 	work += txr->num_desc;
1218 	txr->next_to_clean = work;
1219 
1220 	/*
1221 	 * Queue Hang detection, we know there's
1222 	 * work outstanding or the first return
1223 	 * would have been taken, so increment busy
1224 	 * if nothing managed to get cleaned, then
1225 	 * in local_timer it will be checked and
1226 	 * marked as HUNG if it exceeds a MAX attempt.
1227 	 */
1228 	if ((processed == 0) && (txr->busy != IXGBE_QUEUE_HUNG))
1229 		++txr->busy;
1230 	/*
1231 	 * If anything gets cleaned we reset state to 1,
1232 	 * note this will turn off HUNG if its set.
1233 	 */
1234 	if (processed)
1235 		txr->busy = 1;
1236 
1237 	if (txr->tx_avail == txr->num_desc)
1238 		txr->busy = 0;
1239 
1240 	return ((limit > 0) ? false : true);
1241 } /* ixgbe_txeof */
1242 
1243 /************************************************************************
1244  * ixgbe_rsc_count
1245  *
1246  *   Used to detect a descriptor that has been merged by Hardware RSC.
1247  ************************************************************************/
1248 static inline u32
1249 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
1250 {
1251 	return (le32toh(rx->wb.lower.lo_dword.data) &
1252 	    IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
1253 } /* ixgbe_rsc_count */
1254 
1255 /************************************************************************
1256  * ixgbe_setup_hw_rsc
1257  *
1258  *   Initialize Hardware RSC (LRO) feature on 82599
1259  *   for an RX ring, this is toggled by the LRO capability
1260  *   even though it is transparent to the stack.
1261  *
1262  *   NOTE: Since this HW feature only works with IPv4 and
1263  *         testing has shown soft LRO to be as effective,
1264  *         this feature will be disabled by default.
1265  ************************************************************************/
1266 static void
1267 ixgbe_setup_hw_rsc(struct rx_ring *rxr)
1268 {
1269 	struct	adapter  *adapter = rxr->adapter;
1270 	struct	ixgbe_hw *hw = &adapter->hw;
1271 	u32              rscctrl, rdrxctl;
1272 
1273 	/* If turning LRO/RSC off we need to disable it */
1274 	if ((adapter->ifp->if_capenable & IFCAP_LRO) == 0) {
1275 		rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
1276 		rscctrl &= ~IXGBE_RSCCTL_RSCEN;
1277 		return;
1278 	}
1279 
1280 	rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
1281 	rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
1282 #ifdef DEV_NETMAP
1283 	/* Always strip CRC unless Netmap disabled it */
1284 	if (!(adapter->feat_en & IXGBE_FEATURE_NETMAP) ||
1285 	    !(adapter->ifp->if_capenable & IFCAP_NETMAP) ||
1286 	    ix_crcstrip)
1287 #endif /* DEV_NETMAP */
1288 		rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
1289 	rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
1290 	IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
1291 
1292 	rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
1293 	rscctrl |= IXGBE_RSCCTL_RSCEN;
1294 	/*
1295 	 * Limit the total number of descriptors that
1296 	 * can be combined, so it does not exceed 64K
1297 	 */
1298 	if (rxr->mbuf_sz == MCLBYTES)
1299 		rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
1300 	else if (rxr->mbuf_sz == MJUMPAGESIZE)
1301 		rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
1302 	else if (rxr->mbuf_sz == MJUM9BYTES)
1303 		rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
1304 	else  /* Using 16K cluster */
1305 		rscctrl |= IXGBE_RSCCTL_MAXDESC_1;
1306 
1307 	IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl);
1308 
1309 	/* Enable TCP header recognition */
1310 	IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0),
1311 	    (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) | IXGBE_PSRTYPE_TCPHDR));
1312 
1313 	/* Disable RSC for ACK packets */
1314 	IXGBE_WRITE_REG(hw, IXGBE_RSCDBU,
1315 	    (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU)));
1316 
1317 	rxr->hw_rsc = TRUE;
1318 } /* ixgbe_setup_hw_rsc */
1319 
1320 /************************************************************************
1321  * ixgbe_refresh_mbufs
1322  *
1323  *   Refresh mbuf buffers for RX descriptor rings
1324  *    - now keeps its own state so discards due to resource
1325  *      exhaustion are unnecessary, if an mbuf cannot be obtained
1326  *      it just returns, keeping its placeholder, thus it can simply
1327  *      be recalled to try again.
1328  ************************************************************************/
1329 static void
1330 ixgbe_refresh_mbufs(struct rx_ring *rxr, int limit)
1331 {
1332 	struct adapter      *adapter = rxr->adapter;
1333 	struct ixgbe_rx_buf *rxbuf;
1334 	struct mbuf         *mp;
1335 	int                 i, j, error;
1336 	bool                refreshed = false;
1337 
1338 	i = j = rxr->next_to_refresh;
1339 	/* Control the loop with one beyond */
1340 	if (++j == rxr->num_desc)
1341 		j = 0;
1342 
1343 	while (j != limit) {
1344 		rxbuf = &rxr->rx_buffers[i];
1345 		if (rxbuf->buf == NULL) {
1346 			mp = ixgbe_getjcl(&adapter->jcl_head, M_NOWAIT,
1347 			    MT_DATA, M_PKTHDR, rxr->mbuf_sz);
1348 			if (mp == NULL) {
1349 				rxr->no_jmbuf.ev_count++;
1350 				goto update;
1351 			}
1352 			if (adapter->max_frame_size <= (MCLBYTES - ETHER_ALIGN))
1353 				m_adj(mp, ETHER_ALIGN);
1354 		} else
1355 			mp = rxbuf->buf;
1356 
1357 		mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1358 
1359 		/* If we're dealing with an mbuf that was copied rather
1360 		 * than replaced, there's no need to go through busdma.
1361 		 */
1362 		if ((rxbuf->flags & IXGBE_RX_COPY) == 0) {
1363 			/* Get the memory mapping */
1364 			ixgbe_dmamap_unload(rxr->ptag, rxbuf->pmap);
1365 			error = bus_dmamap_load_mbuf(rxr->ptag->dt_dmat,
1366 			    rxbuf->pmap, mp, BUS_DMA_NOWAIT);
1367 			if (error != 0) {
1368 				printf("Refresh mbufs: payload dmamap load failure - %d\n", error);
1369 				m_free(mp);
1370 				rxbuf->buf = NULL;
1371 				goto update;
1372 			}
1373 			rxbuf->buf = mp;
1374 			bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
1375 			    0, mp->m_pkthdr.len, BUS_DMASYNC_PREREAD);
1376 			rxbuf->addr = rxr->rx_base[i].read.pkt_addr =
1377 			    htole64(rxbuf->pmap->dm_segs[0].ds_addr);
1378 		} else {
1379 			rxr->rx_base[i].read.pkt_addr = rxbuf->addr;
1380 			rxbuf->flags &= ~IXGBE_RX_COPY;
1381 		}
1382 
1383 		refreshed = true;
1384 		/* Next is precalculated */
1385 		i = j;
1386 		rxr->next_to_refresh = i;
1387 		if (++j == rxr->num_desc)
1388 			j = 0;
1389 	}
1390 
1391 update:
1392 	if (refreshed) /* Update hardware tail index */
1393 		IXGBE_WRITE_REG(&adapter->hw, rxr->tail, rxr->next_to_refresh);
1394 
1395 	return;
1396 } /* ixgbe_refresh_mbufs */
1397 
1398 /************************************************************************
1399  * ixgbe_allocate_receive_buffers
1400  *
1401  *   Allocate memory for rx_buffer structures. Since we use one
1402  *   rx_buffer per received packet, the maximum number of rx_buffer's
1403  *   that we'll need is equal to the number of receive descriptors
1404  *   that we've allocated.
1405  ************************************************************************/
1406 static int
1407 ixgbe_allocate_receive_buffers(struct rx_ring *rxr)
1408 {
1409 	struct	adapter     *adapter = rxr->adapter;
1410 	device_t            dev = adapter->dev;
1411 	struct ixgbe_rx_buf *rxbuf;
1412 	int                 bsize, error;
1413 
1414 	bsize = sizeof(struct ixgbe_rx_buf) * rxr->num_desc;
1415 	rxr->rx_buffers = (struct ixgbe_rx_buf *)malloc(bsize, M_DEVBUF,
1416 	    M_NOWAIT | M_ZERO);
1417 	if (rxr->rx_buffers == NULL) {
1418 		aprint_error_dev(dev, "Unable to allocate rx_buffer memory\n");
1419 		error = ENOMEM;
1420 		goto fail;
1421 	}
1422 
1423 	error = ixgbe_dma_tag_create(
1424 	         /*      parent */ adapter->osdep.dmat,
1425 	         /*   alignment */ 1,
1426 	         /*      bounds */ 0,
1427 	         /*     maxsize */ MJUM16BYTES,
1428 	         /*   nsegments */ 1,
1429 	         /*  maxsegsize */ MJUM16BYTES,
1430 	         /*       flags */ 0,
1431 	                           &rxr->ptag);
1432 	if (error != 0) {
1433 		aprint_error_dev(dev, "Unable to create RX DMA tag\n");
1434 		goto fail;
1435 	}
1436 
1437 	for (int i = 0; i < rxr->num_desc; i++, rxbuf++) {
1438 		rxbuf = &rxr->rx_buffers[i];
1439 		error = ixgbe_dmamap_create(rxr->ptag, 0, &rxbuf->pmap);
1440 		if (error) {
1441 			aprint_error_dev(dev, "Unable to create RX dma map\n");
1442 			goto fail;
1443 		}
1444 	}
1445 
1446 	return (0);
1447 
1448 fail:
1449 	/* Frees all, but can handle partial completion */
1450 	ixgbe_free_receive_structures(adapter);
1451 
1452 	return (error);
1453 } /* ixgbe_allocate_receive_buffers */
1454 
1455 /************************************************************************
1456  * ixgbe_free_receive_ring
1457  ************************************************************************/
1458 static void
1459 ixgbe_free_receive_ring(struct rx_ring *rxr)
1460 {
1461 	for (int i = 0; i < rxr->num_desc; i++) {
1462 		ixgbe_rx_discard(rxr, i);
1463 	}
1464 } /* ixgbe_free_receive_ring */
1465 
1466 /************************************************************************
1467  * ixgbe_setup_receive_ring
1468  *
1469  *   Initialize a receive ring and its buffers.
1470  ************************************************************************/
1471 static int
1472 ixgbe_setup_receive_ring(struct rx_ring *rxr)
1473 {
1474 	struct adapter        *adapter;
1475 	struct ixgbe_rx_buf   *rxbuf;
1476 #ifdef LRO
1477 	struct ifnet          *ifp;
1478 	struct lro_ctrl       *lro = &rxr->lro;
1479 #endif /* LRO */
1480 #ifdef DEV_NETMAP
1481 	struct netmap_adapter *na = NA(rxr->adapter->ifp);
1482 	struct netmap_slot    *slot;
1483 #endif /* DEV_NETMAP */
1484 	int                   rsize, error = 0;
1485 
1486 	adapter = rxr->adapter;
1487 #ifdef LRO
1488 	ifp = adapter->ifp;
1489 #endif /* LRO */
1490 
1491 	/* Clear the ring contents */
1492 	IXGBE_RX_LOCK(rxr);
1493 
1494 #ifdef DEV_NETMAP
1495 	if (adapter->feat_en & IXGBE_FEATURE_NETMAP)
1496 		slot = netmap_reset(na, NR_RX, rxr->me, 0);
1497 #endif /* DEV_NETMAP */
1498 
1499 	rsize = roundup2(adapter->num_rx_desc *
1500 	    sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
1501 	bzero((void *)rxr->rx_base, rsize);
1502 	/* Cache the size */
1503 	rxr->mbuf_sz = adapter->rx_mbuf_sz;
1504 
1505 	/* Free current RX buffer structs and their mbufs */
1506 	ixgbe_free_receive_ring(rxr);
1507 
1508 	/* Now replenish the mbufs */
1509 	for (int j = 0; j != rxr->num_desc; ++j) {
1510 		struct mbuf *mp;
1511 
1512 		rxbuf = &rxr->rx_buffers[j];
1513 
1514 #ifdef DEV_NETMAP
1515 		/*
1516 		 * In netmap mode, fill the map and set the buffer
1517 		 * address in the NIC ring, considering the offset
1518 		 * between the netmap and NIC rings (see comment in
1519 		 * ixgbe_setup_transmit_ring() ). No need to allocate
1520 		 * an mbuf, so end the block with a continue;
1521 		 */
1522 		if ((adapter->feat_en & IXGBE_FEATURE_NETMAP) && slot) {
1523 			int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
1524 			uint64_t paddr;
1525 			void *addr;
1526 
1527 			addr = PNMB(na, slot + sj, &paddr);
1528 			netmap_load_map(na, rxr->ptag, rxbuf->pmap, addr);
1529 			/* Update descriptor and the cached value */
1530 			rxr->rx_base[j].read.pkt_addr = htole64(paddr);
1531 			rxbuf->addr = htole64(paddr);
1532 			continue;
1533 		}
1534 #endif /* DEV_NETMAP */
1535 
1536 		rxbuf->flags = 0;
1537 		rxbuf->buf = ixgbe_getjcl(&adapter->jcl_head, M_NOWAIT,
1538 		    MT_DATA, M_PKTHDR, adapter->rx_mbuf_sz);
1539 		if (rxbuf->buf == NULL) {
1540 			error = ENOBUFS;
1541 			goto fail;
1542 		}
1543 		mp = rxbuf->buf;
1544 		mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1545 		/* Get the memory mapping */
1546 		error = bus_dmamap_load_mbuf(rxr->ptag->dt_dmat, rxbuf->pmap,
1547 		    mp, BUS_DMA_NOWAIT);
1548 		if (error != 0)
1549                         goto fail;
1550 		bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
1551 		    0, adapter->rx_mbuf_sz, BUS_DMASYNC_PREREAD);
1552 		/* Update the descriptor and the cached value */
1553 		rxr->rx_base[j].read.pkt_addr =
1554 		    htole64(rxbuf->pmap->dm_segs[0].ds_addr);
1555 		rxbuf->addr = htole64(rxbuf->pmap->dm_segs[0].ds_addr);
1556 	}
1557 
1558 
1559 	/* Setup our descriptor indices */
1560 	rxr->next_to_check = 0;
1561 	rxr->next_to_refresh = 0;
1562 	rxr->lro_enabled = FALSE;
1563 	rxr->rx_copies.ev_count = 0;
1564 #if 0 /* NetBSD */
1565 	rxr->rx_bytes.ev_count = 0;
1566 #if 1	/* Fix inconsistency */
1567 	rxr->rx_packets.ev_count = 0;
1568 #endif
1569 #endif
1570 	rxr->vtag_strip = FALSE;
1571 
1572 	ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1573 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1574 
1575 	/*
1576 	 * Now set up the LRO interface
1577 	 */
1578 	if (ixgbe_rsc_enable)
1579 		ixgbe_setup_hw_rsc(rxr);
1580 #ifdef LRO
1581 	else if (ifp->if_capenable & IFCAP_LRO) {
1582 		device_t dev = adapter->dev;
1583 		int err = tcp_lro_init(lro);
1584 		if (err) {
1585 			device_printf(dev, "LRO Initialization failed!\n");
1586 			goto fail;
1587 		}
1588 		INIT_DEBUGOUT("RX Soft LRO Initialized\n");
1589 		rxr->lro_enabled = TRUE;
1590 		lro->ifp = adapter->ifp;
1591 	}
1592 #endif /* LRO */
1593 
1594 	IXGBE_RX_UNLOCK(rxr);
1595 
1596 	return (0);
1597 
1598 fail:
1599 	ixgbe_free_receive_ring(rxr);
1600 	IXGBE_RX_UNLOCK(rxr);
1601 
1602 	return (error);
1603 } /* ixgbe_setup_receive_ring */
1604 
1605 /************************************************************************
1606  * ixgbe_setup_receive_structures - Initialize all receive rings.
1607  ************************************************************************/
1608 int
1609 ixgbe_setup_receive_structures(struct adapter *adapter)
1610 {
1611 	struct rx_ring *rxr = adapter->rx_rings;
1612 	int            j;
1613 
1614 	/*
1615 	 * Now reinitialize our supply of jumbo mbufs.  The number
1616 	 * or size of jumbo mbufs may have changed.
1617 	 * Assume all of rxr->ptag are the same.
1618 	 */
1619 	ixgbe_jcl_reinit(adapter, rxr->ptag->dt_dmat,
1620 	    (2 * adapter->num_rx_desc) * adapter->num_queues,
1621 	    adapter->rx_mbuf_sz);
1622 
1623 	for (j = 0; j < adapter->num_queues; j++, rxr++)
1624 		if (ixgbe_setup_receive_ring(rxr))
1625 			goto fail;
1626 
1627 	return (0);
1628 fail:
1629 	/*
1630 	 * Free RX buffers allocated so far, we will only handle
1631 	 * the rings that completed, the failing case will have
1632 	 * cleaned up for itself. 'j' failed, so its the terminus.
1633 	 */
1634 	for (int i = 0; i < j; ++i) {
1635 		rxr = &adapter->rx_rings[i];
1636 		IXGBE_RX_LOCK(rxr);
1637 		ixgbe_free_receive_ring(rxr);
1638 		IXGBE_RX_UNLOCK(rxr);
1639 	}
1640 
1641 	return (ENOBUFS);
1642 } /* ixgbe_setup_receive_structures */
1643 
1644 
1645 /************************************************************************
1646  * ixgbe_free_receive_structures - Free all receive rings.
1647  ************************************************************************/
1648 void
1649 ixgbe_free_receive_structures(struct adapter *adapter)
1650 {
1651 	struct rx_ring *rxr = adapter->rx_rings;
1652 
1653 	INIT_DEBUGOUT("ixgbe_free_receive_structures: begin");
1654 
1655 	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
1656 		ixgbe_free_receive_buffers(rxr);
1657 #ifdef LRO
1658 		/* Free LRO memory */
1659 		tcp_lro_free(&rxr->lro);
1660 #endif /* LRO */
1661 		/* Free the ring memory as well */
1662 		ixgbe_dma_free(adapter, &rxr->rxdma);
1663 		IXGBE_RX_LOCK_DESTROY(rxr);
1664 	}
1665 
1666 	free(adapter->rx_rings, M_DEVBUF);
1667 } /* ixgbe_free_receive_structures */
1668 
1669 
1670 /************************************************************************
1671  * ixgbe_free_receive_buffers - Free receive ring data structures
1672  ************************************************************************/
1673 static void
1674 ixgbe_free_receive_buffers(struct rx_ring *rxr)
1675 {
1676 	struct adapter      *adapter = rxr->adapter;
1677 	struct ixgbe_rx_buf *rxbuf;
1678 
1679 	INIT_DEBUGOUT("ixgbe_free_receive_buffers: begin");
1680 
1681 	/* Cleanup any existing buffers */
1682 	if (rxr->rx_buffers != NULL) {
1683 		for (int i = 0; i < adapter->num_rx_desc; i++) {
1684 			rxbuf = &rxr->rx_buffers[i];
1685 			ixgbe_rx_discard(rxr, i);
1686 			if (rxbuf->pmap != NULL) {
1687 				ixgbe_dmamap_destroy(rxr->ptag, rxbuf->pmap);
1688 				rxbuf->pmap = NULL;
1689 			}
1690 		}
1691 		if (rxr->rx_buffers != NULL) {
1692 			free(rxr->rx_buffers, M_DEVBUF);
1693 			rxr->rx_buffers = NULL;
1694 		}
1695 	}
1696 
1697 	if (rxr->ptag != NULL) {
1698 		ixgbe_dma_tag_destroy(rxr->ptag);
1699 		rxr->ptag = NULL;
1700 	}
1701 
1702 	return;
1703 } /* ixgbe_free_receive_buffers */
1704 
1705 /************************************************************************
1706  * ixgbe_rx_input
1707  ************************************************************************/
1708 static __inline void
1709 ixgbe_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m,
1710     u32 ptype)
1711 {
1712 	struct adapter	*adapter = ifp->if_softc;
1713 
1714 #ifdef LRO
1715 	struct ethercom *ec = &adapter->osdep.ec;
1716 
1717 	/*
1718 	 * ATM LRO is only for IP/TCP packets and TCP checksum of the packet
1719 	 * should be computed by hardware. Also it should not have VLAN tag in
1720 	 * ethernet header.  In case of IPv6 we do not yet support ext. hdrs.
1721 	 */
1722         if (rxr->lro_enabled &&
1723             (ec->ec_capenable & ETHERCAP_VLAN_HWTAGGING) != 0 &&
1724             (ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
1725             ((ptype & (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
1726             (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP) ||
1727             (ptype & (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
1728             (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) &&
1729             (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
1730             (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
1731                 /*
1732                  * Send to the stack if:
1733                  **  - LRO not enabled, or
1734                  **  - no LRO resources, or
1735                  **  - lro enqueue fails
1736                  */
1737                 if (rxr->lro.lro_cnt != 0)
1738                         if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
1739                                 return;
1740         }
1741 #endif /* LRO */
1742 
1743 	if_percpuq_enqueue(adapter->ipq, m);
1744 } /* ixgbe_rx_input */
1745 
1746 /************************************************************************
1747  * ixgbe_rx_discard
1748  ************************************************************************/
1749 static __inline void
1750 ixgbe_rx_discard(struct rx_ring *rxr, int i)
1751 {
1752 	struct ixgbe_rx_buf *rbuf;
1753 
1754 	rbuf = &rxr->rx_buffers[i];
1755 
1756 	/*
1757 	 * With advanced descriptors the writeback
1758 	 * clobbers the buffer addrs, so its easier
1759 	 * to just free the existing mbufs and take
1760 	 * the normal refresh path to get new buffers
1761 	 * and mapping.
1762 	 */
1763 
1764 	if (rbuf->fmp != NULL) {/* Partial chain ? */
1765 		bus_dmamap_sync(rxr->ptag->dt_dmat, rbuf->pmap, 0,
1766 		    rbuf->buf->m_pkthdr.len, BUS_DMASYNC_POSTREAD);
1767 		m_freem(rbuf->fmp);
1768 		rbuf->fmp = NULL;
1769 		rbuf->buf = NULL; /* rbuf->buf is part of fmp's chain */
1770 	} else if (rbuf->buf) {
1771 		bus_dmamap_sync(rxr->ptag->dt_dmat, rbuf->pmap, 0,
1772 		    rbuf->buf->m_pkthdr.len, BUS_DMASYNC_POSTREAD);
1773 		m_free(rbuf->buf);
1774 		rbuf->buf = NULL;
1775 	}
1776 	ixgbe_dmamap_unload(rxr->ptag, rbuf->pmap);
1777 
1778 	rbuf->flags = 0;
1779 
1780 	return;
1781 } /* ixgbe_rx_discard */
1782 
1783 
1784 /************************************************************************
1785  * ixgbe_rxeof
1786  *
1787  *   Executes in interrupt context. It replenishes the
1788  *   mbufs in the descriptor and sends data which has
1789  *   been dma'ed into host memory to upper layer.
1790  *
1791  *   Return TRUE for more work, FALSE for all clean.
1792  ************************************************************************/
1793 bool
1794 ixgbe_rxeof(struct ix_queue *que)
1795 {
1796 	struct adapter		*adapter = que->adapter;
1797 	struct rx_ring		*rxr = que->rxr;
1798 	struct ifnet		*ifp = adapter->ifp;
1799 #ifdef LRO
1800 	struct lro_ctrl		*lro = &rxr->lro;
1801 #endif /* LRO */
1802 	union ixgbe_adv_rx_desc	*cur;
1803 	struct ixgbe_rx_buf	*rbuf, *nbuf;
1804 	int			i, nextp, processed = 0;
1805 	u32			staterr = 0;
1806 	u32			count = adapter->rx_process_limit;
1807 #ifdef RSS
1808 	u16			pkt_info;
1809 #endif
1810 
1811 	IXGBE_RX_LOCK(rxr);
1812 
1813 #ifdef DEV_NETMAP
1814 	if (adapter->feat_en & IXGBE_FEATURE_NETMAP) {
1815 		/* Same as the txeof routine: wakeup clients on intr. */
1816 		if (netmap_rx_irq(ifp, rxr->me, &processed)) {
1817 			IXGBE_RX_UNLOCK(rxr);
1818 			return (FALSE);
1819 		}
1820 	}
1821 #endif /* DEV_NETMAP */
1822 
1823 	for (i = rxr->next_to_check; count != 0;) {
1824 		struct mbuf *sendmp, *mp;
1825 		u32         rsc, ptype;
1826 		u16         len;
1827 		u16         vtag = 0;
1828 		bool        eop;
1829 
1830 		/* Sync the ring. */
1831 		ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1832 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
1833 
1834 		cur = &rxr->rx_base[i];
1835 		staterr = le32toh(cur->wb.upper.status_error);
1836 #ifdef RSS
1837 		pkt_info = le16toh(cur->wb.lower.lo_dword.hs_rss.pkt_info);
1838 #endif
1839 
1840 		if ((staterr & IXGBE_RXD_STAT_DD) == 0)
1841 			break;
1842 
1843 		count--;
1844 		sendmp = NULL;
1845 		nbuf = NULL;
1846 		rsc = 0;
1847 		cur->wb.upper.status_error = 0;
1848 		rbuf = &rxr->rx_buffers[i];
1849 		mp = rbuf->buf;
1850 
1851 		len = le16toh(cur->wb.upper.length);
1852 		ptype = le32toh(cur->wb.lower.lo_dword.data) &
1853 		    IXGBE_RXDADV_PKTTYPE_MASK;
1854 		eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0);
1855 
1856 		/* Make sure bad packets are discarded */
1857 		if (eop && (staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) != 0) {
1858 #if __FreeBSD_version >= 1100036
1859 			if (adapter->feat_en & IXGBE_FEATURE_VF)
1860 				if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
1861 #endif
1862 			rxr->rx_discarded.ev_count++;
1863 			ixgbe_rx_discard(rxr, i);
1864 			goto next_desc;
1865 		}
1866 
1867 		bus_dmamap_sync(rxr->ptag->dt_dmat, rbuf->pmap, 0,
1868 		    rbuf->buf->m_pkthdr.len, BUS_DMASYNC_POSTREAD);
1869 
1870 		/*
1871 		 * On 82599 which supports a hardware
1872 		 * LRO (called HW RSC), packets need
1873 		 * not be fragmented across sequential
1874 		 * descriptors, rather the next descriptor
1875 		 * is indicated in bits of the descriptor.
1876 		 * This also means that we might proceses
1877 		 * more than one packet at a time, something
1878 		 * that has never been true before, it
1879 		 * required eliminating global chain pointers
1880 		 * in favor of what we are doing here.  -jfv
1881 		 */
1882 		if (!eop) {
1883 			/*
1884 			 * Figure out the next descriptor
1885 			 * of this frame.
1886 			 */
1887 			if (rxr->hw_rsc == TRUE) {
1888 				rsc = ixgbe_rsc_count(cur);
1889 				rxr->rsc_num += (rsc - 1);
1890 			}
1891 			if (rsc) { /* Get hardware index */
1892 				nextp = ((staterr & IXGBE_RXDADV_NEXTP_MASK) >>
1893 				    IXGBE_RXDADV_NEXTP_SHIFT);
1894 			} else { /* Just sequential */
1895 				nextp = i + 1;
1896 				if (nextp == adapter->num_rx_desc)
1897 					nextp = 0;
1898 			}
1899 			nbuf = &rxr->rx_buffers[nextp];
1900 			prefetch(nbuf);
1901 		}
1902 		/*
1903 		 * Rather than using the fmp/lmp global pointers
1904 		 * we now keep the head of a packet chain in the
1905 		 * buffer struct and pass this along from one
1906 		 * descriptor to the next, until we get EOP.
1907 		 */
1908 		mp->m_len = len;
1909 		/*
1910 		 * See if there is a stored head
1911 		 * that determines what we are
1912 		 */
1913 		sendmp = rbuf->fmp;
1914 		if (sendmp != NULL) {  /* secondary frag */
1915 			rbuf->buf = rbuf->fmp = NULL;
1916 			mp->m_flags &= ~M_PKTHDR;
1917 			sendmp->m_pkthdr.len += mp->m_len;
1918 		} else {
1919 			/*
1920 			 * Optimize.  This might be a small packet,
1921 			 * maybe just a TCP ACK.  Do a fast copy that
1922 			 * is cache aligned into a new mbuf, and
1923 			 * leave the old mbuf+cluster for re-use.
1924 			 */
1925 			if (eop && len <= IXGBE_RX_COPY_LEN) {
1926 				sendmp = m_gethdr(M_NOWAIT, MT_DATA);
1927 				if (sendmp != NULL) {
1928 					sendmp->m_data += IXGBE_RX_COPY_ALIGN;
1929 					ixgbe_bcopy(mp->m_data, sendmp->m_data,
1930 					    len);
1931 					sendmp->m_len = len;
1932 					rxr->rx_copies.ev_count++;
1933 					rbuf->flags |= IXGBE_RX_COPY;
1934 				}
1935 			}
1936 			if (sendmp == NULL) {
1937 				rbuf->buf = rbuf->fmp = NULL;
1938 				sendmp = mp;
1939 			}
1940 
1941 			/* first desc of a non-ps chain */
1942 			sendmp->m_flags |= M_PKTHDR;
1943 			sendmp->m_pkthdr.len = mp->m_len;
1944 		}
1945 		++processed;
1946 
1947 		/* Pass the head pointer on */
1948 		if (eop == 0) {
1949 			nbuf->fmp = sendmp;
1950 			sendmp = NULL;
1951 			mp->m_next = nbuf->buf;
1952 		} else { /* Sending this frame */
1953 			m_set_rcvif(sendmp, ifp);
1954 			++rxr->packets;
1955 			rxr->rx_packets.ev_count++;
1956 			/* capture data for AIM */
1957 			rxr->bytes += sendmp->m_pkthdr.len;
1958 			rxr->rx_bytes.ev_count += sendmp->m_pkthdr.len;
1959 			/* Process vlan info */
1960 			if ((rxr->vtag_strip) && (staterr & IXGBE_RXD_STAT_VP))
1961 				vtag = le16toh(cur->wb.upper.vlan);
1962 			if (vtag) {
1963 				vlan_set_tag(sendmp, vtag);
1964 			}
1965 			if ((ifp->if_capenable & IFCAP_RXCSUM) != 0) {
1966 				ixgbe_rx_checksum(staterr, sendmp, ptype,
1967 				   &adapter->stats.pf);
1968 			}
1969 
1970 #if 0 /* FreeBSD */
1971 			/*
1972 			 * In case of multiqueue, we have RXCSUM.PCSD bit set
1973 			 * and never cleared. This means we have RSS hash
1974 			 * available to be used.
1975 			 */
1976 			if (adapter->num_queues > 1) {
1977 				sendmp->m_pkthdr.flowid =
1978 				    le32toh(cur->wb.lower.hi_dword.rss);
1979 				switch (pkt_info & IXGBE_RXDADV_RSSTYPE_MASK) {
1980 				case IXGBE_RXDADV_RSSTYPE_IPV4:
1981 					M_HASHTYPE_SET(sendmp,
1982 					    M_HASHTYPE_RSS_IPV4);
1983 					break;
1984 				case IXGBE_RXDADV_RSSTYPE_IPV4_TCP:
1985 					M_HASHTYPE_SET(sendmp,
1986 					    M_HASHTYPE_RSS_TCP_IPV4);
1987 					break;
1988 				case IXGBE_RXDADV_RSSTYPE_IPV6:
1989 					M_HASHTYPE_SET(sendmp,
1990 					    M_HASHTYPE_RSS_IPV6);
1991 					break;
1992 				case IXGBE_RXDADV_RSSTYPE_IPV6_TCP:
1993 					M_HASHTYPE_SET(sendmp,
1994 					    M_HASHTYPE_RSS_TCP_IPV6);
1995 					break;
1996 				case IXGBE_RXDADV_RSSTYPE_IPV6_EX:
1997 					M_HASHTYPE_SET(sendmp,
1998 					    M_HASHTYPE_RSS_IPV6_EX);
1999 					break;
2000 				case IXGBE_RXDADV_RSSTYPE_IPV6_TCP_EX:
2001 					M_HASHTYPE_SET(sendmp,
2002 					    M_HASHTYPE_RSS_TCP_IPV6_EX);
2003 					break;
2004 #if __FreeBSD_version > 1100000
2005 				case IXGBE_RXDADV_RSSTYPE_IPV4_UDP:
2006 					M_HASHTYPE_SET(sendmp,
2007 					    M_HASHTYPE_RSS_UDP_IPV4);
2008 					break;
2009 				case IXGBE_RXDADV_RSSTYPE_IPV6_UDP:
2010 					M_HASHTYPE_SET(sendmp,
2011 					    M_HASHTYPE_RSS_UDP_IPV6);
2012 					break;
2013 				case IXGBE_RXDADV_RSSTYPE_IPV6_UDP_EX:
2014 					M_HASHTYPE_SET(sendmp,
2015 					    M_HASHTYPE_RSS_UDP_IPV6_EX);
2016 					break;
2017 #endif
2018 				default:
2019 					M_HASHTYPE_SET(sendmp,
2020 					    M_HASHTYPE_OPAQUE_HASH);
2021 				}
2022 			} else {
2023 				sendmp->m_pkthdr.flowid = que->msix;
2024 				M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE);
2025 			}
2026 #endif
2027 		}
2028 next_desc:
2029 		ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
2030 		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2031 
2032 		/* Advance our pointers to the next descriptor. */
2033 		if (++i == rxr->num_desc)
2034 			i = 0;
2035 
2036 		/* Now send to the stack or do LRO */
2037 		if (sendmp != NULL) {
2038 			rxr->next_to_check = i;
2039 			IXGBE_RX_UNLOCK(rxr);
2040 			ixgbe_rx_input(rxr, ifp, sendmp, ptype);
2041 			IXGBE_RX_LOCK(rxr);
2042 			i = rxr->next_to_check;
2043 		}
2044 
2045 		/* Every 8 descriptors we go to refresh mbufs */
2046 		if (processed == 8) {
2047 			ixgbe_refresh_mbufs(rxr, i);
2048 			processed = 0;
2049 		}
2050 	}
2051 
2052 	/* Refresh any remaining buf structs */
2053 	if (ixgbe_rx_unrefreshed(rxr))
2054 		ixgbe_refresh_mbufs(rxr, i);
2055 
2056 	rxr->next_to_check = i;
2057 
2058 	IXGBE_RX_UNLOCK(rxr);
2059 
2060 #ifdef LRO
2061 	/*
2062 	 * Flush any outstanding LRO work
2063 	 */
2064 	tcp_lro_flush_all(lro);
2065 #endif /* LRO */
2066 
2067 	/*
2068 	 * Still have cleaning to do?
2069 	 */
2070 	if ((staterr & IXGBE_RXD_STAT_DD) != 0)
2071 		return (TRUE);
2072 
2073 	return (FALSE);
2074 } /* ixgbe_rxeof */
2075 
2076 
2077 /************************************************************************
2078  * ixgbe_rx_checksum
2079  *
2080  *   Verify that the hardware indicated that the checksum is valid.
2081  *   Inform the stack about the status of checksum so that stack
2082  *   doesn't spend time verifying the checksum.
2083  ************************************************************************/
2084 static void
2085 ixgbe_rx_checksum(u32 staterr, struct mbuf * mp, u32 ptype,
2086     struct ixgbe_hw_stats *stats)
2087 {
2088 	u16  status = (u16)staterr;
2089 	u8   errors = (u8)(staterr >> 24);
2090 #if 0
2091 	bool sctp = false;
2092 
2093 	if ((ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
2094 	    (ptype & IXGBE_RXDADV_PKTTYPE_SCTP) != 0)
2095 		sctp = true;
2096 #endif
2097 
2098 	/* IPv4 checksum */
2099 	if (status & IXGBE_RXD_STAT_IPCS) {
2100 		stats->ipcs.ev_count++;
2101 		if (!(errors & IXGBE_RXD_ERR_IPE)) {
2102 			/* IP Checksum Good */
2103 			mp->m_pkthdr.csum_flags = M_CSUM_IPv4;
2104 		} else {
2105 			stats->ipcs_bad.ev_count++;
2106 			mp->m_pkthdr.csum_flags = M_CSUM_IPv4|M_CSUM_IPv4_BAD;
2107 		}
2108 	}
2109 	/* TCP/UDP/SCTP checksum */
2110 	if (status & IXGBE_RXD_STAT_L4CS) {
2111 		stats->l4cs.ev_count++;
2112 		int type = M_CSUM_TCPv4|M_CSUM_TCPv6|M_CSUM_UDPv4|M_CSUM_UDPv6;
2113 		if (!(errors & IXGBE_RXD_ERR_TCPE)) {
2114 			mp->m_pkthdr.csum_flags |= type;
2115 		} else {
2116 			stats->l4cs_bad.ev_count++;
2117 			mp->m_pkthdr.csum_flags |= type | M_CSUM_TCP_UDP_BAD;
2118 		}
2119 	}
2120 } /* ixgbe_rx_checksum */
2121 
2122 /************************************************************************
2123  * ixgbe_dma_malloc
2124  ************************************************************************/
2125 int
2126 ixgbe_dma_malloc(struct adapter *adapter, const bus_size_t size,
2127 		struct ixgbe_dma_alloc *dma, const int mapflags)
2128 {
2129 	device_t dev = adapter->dev;
2130 	int      r, rsegs;
2131 
2132 	r = ixgbe_dma_tag_create(
2133 	     /*      parent */ adapter->osdep.dmat,
2134 	     /*   alignment */ DBA_ALIGN,
2135 	     /*      bounds */ 0,
2136 	     /*     maxsize */ size,
2137 	     /*   nsegments */ 1,
2138 	     /*  maxsegsize */ size,
2139 	     /*       flags */ BUS_DMA_ALLOCNOW,
2140 			       &dma->dma_tag);
2141 	if (r != 0) {
2142 		aprint_error_dev(dev,
2143 		    "%s: ixgbe_dma_tag_create failed; error %d\n", __func__,
2144 		    r);
2145 		goto fail_0;
2146 	}
2147 
2148 	r = bus_dmamem_alloc(dma->dma_tag->dt_dmat, size,
2149 	    dma->dma_tag->dt_alignment, dma->dma_tag->dt_boundary,
2150 	    &dma->dma_seg, 1, &rsegs, BUS_DMA_NOWAIT);
2151 	if (r != 0) {
2152 		aprint_error_dev(dev,
2153 		    "%s: bus_dmamem_alloc failed; error %d\n", __func__, r);
2154 		goto fail_1;
2155 	}
2156 
2157 	r = bus_dmamem_map(dma->dma_tag->dt_dmat, &dma->dma_seg, rsegs,
2158 	    size, &dma->dma_vaddr, BUS_DMA_NOWAIT);
2159 	if (r != 0) {
2160 		aprint_error_dev(dev, "%s: bus_dmamem_map failed; error %d\n",
2161 		    __func__, r);
2162 		goto fail_2;
2163 	}
2164 
2165 	r = ixgbe_dmamap_create(dma->dma_tag, 0, &dma->dma_map);
2166 	if (r != 0) {
2167 		aprint_error_dev(dev, "%s: bus_dmamem_map failed; error %d\n",
2168 		    __func__, r);
2169 		goto fail_3;
2170 	}
2171 
2172 	r = bus_dmamap_load(dma->dma_tag->dt_dmat, dma->dma_map,
2173 	    dma->dma_vaddr, size, NULL, mapflags | BUS_DMA_NOWAIT);
2174 	if (r != 0) {
2175 		aprint_error_dev(dev, "%s: bus_dmamap_load failed; error %d\n",
2176 		    __func__, r);
2177 		goto fail_4;
2178 	}
2179 	dma->dma_paddr = dma->dma_map->dm_segs[0].ds_addr;
2180 	dma->dma_size = size;
2181 	return 0;
2182 fail_4:
2183 	ixgbe_dmamap_destroy(dma->dma_tag, dma->dma_map);
2184 fail_3:
2185 	bus_dmamem_unmap(dma->dma_tag->dt_dmat, dma->dma_vaddr, size);
2186 fail_2:
2187 	bus_dmamem_free(dma->dma_tag->dt_dmat, &dma->dma_seg, rsegs);
2188 fail_1:
2189 	ixgbe_dma_tag_destroy(dma->dma_tag);
2190 fail_0:
2191 
2192 	return (r);
2193 } /* ixgbe_dma_malloc */
2194 
2195 /************************************************************************
2196  * ixgbe_dma_free
2197  ************************************************************************/
2198 void
2199 ixgbe_dma_free(struct adapter *adapter, struct ixgbe_dma_alloc *dma)
2200 {
2201 	bus_dmamap_sync(dma->dma_tag->dt_dmat, dma->dma_map, 0, dma->dma_size,
2202 	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2203 	ixgbe_dmamap_unload(dma->dma_tag, dma->dma_map);
2204 	bus_dmamem_free(dma->dma_tag->dt_dmat, &dma->dma_seg, 1);
2205 	ixgbe_dma_tag_destroy(dma->dma_tag);
2206 } /* ixgbe_dma_free */
2207 
2208 
2209 /************************************************************************
2210  * ixgbe_allocate_queues
2211  *
2212  *   Allocate memory for the transmit and receive rings, and then
2213  *   the descriptors associated with each, called only once at attach.
2214  ************************************************************************/
2215 int
2216 ixgbe_allocate_queues(struct adapter *adapter)
2217 {
2218 	device_t	dev = adapter->dev;
2219 	struct ix_queue	*que;
2220 	struct tx_ring	*txr;
2221 	struct rx_ring	*rxr;
2222 	int             rsize, tsize, error = IXGBE_SUCCESS;
2223 	int             txconf = 0, rxconf = 0;
2224 
2225 	/* First, allocate the top level queue structs */
2226 	adapter->queues = (struct ix_queue *)malloc(sizeof(struct ix_queue) *
2227             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO);
2228         if (adapter->queues == NULL) {
2229 		aprint_error_dev(dev, "Unable to allocate queue memory\n");
2230                 error = ENOMEM;
2231                 goto fail;
2232         }
2233 
2234 	/* Second, allocate the TX ring struct memory */
2235 	adapter->tx_rings = (struct tx_ring *)malloc(sizeof(struct tx_ring) *
2236 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO);
2237 	if (adapter->tx_rings == NULL) {
2238 		aprint_error_dev(dev, "Unable to allocate TX ring memory\n");
2239 		error = ENOMEM;
2240 		goto tx_fail;
2241 	}
2242 
2243 	/* Third, allocate the RX ring */
2244 	adapter->rx_rings = (struct rx_ring *)malloc(sizeof(struct rx_ring) *
2245 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO);
2246 	if (adapter->rx_rings == NULL) {
2247 		aprint_error_dev(dev, "Unable to allocate RX ring memory\n");
2248 		error = ENOMEM;
2249 		goto rx_fail;
2250 	}
2251 
2252 	/* For the ring itself */
2253 	tsize = roundup2(adapter->num_tx_desc * sizeof(union ixgbe_adv_tx_desc),
2254 	    DBA_ALIGN);
2255 
2256 	/*
2257 	 * Now set up the TX queues, txconf is needed to handle the
2258 	 * possibility that things fail midcourse and we need to
2259 	 * undo memory gracefully
2260 	 */
2261 	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
2262 		/* Set up some basics */
2263 		txr = &adapter->tx_rings[i];
2264 		txr->adapter = adapter;
2265 		txr->txr_interq = NULL;
2266 		/* In case SR-IOV is enabled, align the index properly */
2267 #ifdef PCI_IOV
2268 		txr->me = ixgbe_vf_que_index(adapter->iov_mode, adapter->pool,
2269 		    i);
2270 #else
2271 		txr->me = i;
2272 #endif
2273 		txr->num_desc = adapter->num_tx_desc;
2274 
2275 		/* Initialize the TX side lock */
2276 		mutex_init(&txr->tx_mtx, MUTEX_DEFAULT, IPL_NET);
2277 
2278 		if (ixgbe_dma_malloc(adapter, tsize, &txr->txdma,
2279 		    BUS_DMA_NOWAIT)) {
2280 			aprint_error_dev(dev,
2281 			    "Unable to allocate TX Descriptor memory\n");
2282 			error = ENOMEM;
2283 			goto err_tx_desc;
2284 		}
2285 		txr->tx_base = (union ixgbe_adv_tx_desc *)txr->txdma.dma_vaddr;
2286 		bzero((void *)txr->tx_base, tsize);
2287 
2288 		/* Now allocate transmit buffers for the ring */
2289 		if (ixgbe_allocate_transmit_buffers(txr)) {
2290 			aprint_error_dev(dev,
2291 			    "Critical Failure setting up transmit buffers\n");
2292 			error = ENOMEM;
2293 			goto err_tx_desc;
2294         	}
2295 		if (!(adapter->feat_en & IXGBE_FEATURE_LEGACY_TX)) {
2296 			/* Allocate a buf ring */
2297 			txr->txr_interq = pcq_create(IXGBE_BR_SIZE, KM_SLEEP);
2298 			if (txr->txr_interq == NULL) {
2299 				aprint_error_dev(dev,
2300 				    "Critical Failure setting up buf ring\n");
2301 				error = ENOMEM;
2302 				goto err_tx_desc;
2303 			}
2304 		}
2305 	}
2306 
2307 	/*
2308 	 * Next the RX queues...
2309 	 */
2310 	rsize = roundup2(adapter->num_rx_desc * sizeof(union ixgbe_adv_rx_desc),
2311 	    DBA_ALIGN);
2312 	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
2313 		rxr = &adapter->rx_rings[i];
2314 		/* Set up some basics */
2315 		rxr->adapter = adapter;
2316 #ifdef PCI_IOV
2317 		/* In case SR-IOV is enabled, align the index properly */
2318 		rxr->me = ixgbe_vf_que_index(adapter->iov_mode, adapter->pool,
2319 		    i);
2320 #else
2321 		rxr->me = i;
2322 #endif
2323 		rxr->num_desc = adapter->num_rx_desc;
2324 
2325 		/* Initialize the RX side lock */
2326 		mutex_init(&rxr->rx_mtx, MUTEX_DEFAULT, IPL_NET);
2327 
2328 		if (ixgbe_dma_malloc(adapter, rsize, &rxr->rxdma,
2329 		    BUS_DMA_NOWAIT)) {
2330 			aprint_error_dev(dev,
2331 			    "Unable to allocate RxDescriptor memory\n");
2332 			error = ENOMEM;
2333 			goto err_rx_desc;
2334 		}
2335 		rxr->rx_base = (union ixgbe_adv_rx_desc *)rxr->rxdma.dma_vaddr;
2336 		bzero((void *)rxr->rx_base, rsize);
2337 
2338 		/* Allocate receive buffers for the ring */
2339 		if (ixgbe_allocate_receive_buffers(rxr)) {
2340 			aprint_error_dev(dev,
2341 			    "Critical Failure setting up receive buffers\n");
2342 			error = ENOMEM;
2343 			goto err_rx_desc;
2344 		}
2345 	}
2346 
2347 	/*
2348 	 * Finally set up the queue holding structs
2349 	 */
2350 	for (int i = 0; i < adapter->num_queues; i++) {
2351 		que = &adapter->queues[i];
2352 		que->adapter = adapter;
2353 		que->me = i;
2354 		que->txr = &adapter->tx_rings[i];
2355 		que->rxr = &adapter->rx_rings[i];
2356 
2357 		mutex_init(&que->dc_mtx, MUTEX_DEFAULT, IPL_NET);
2358 		que->disabled_count = 0;
2359 	}
2360 
2361 	return (0);
2362 
2363 err_rx_desc:
2364 	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
2365 		ixgbe_dma_free(adapter, &rxr->rxdma);
2366 err_tx_desc:
2367 	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
2368 		ixgbe_dma_free(adapter, &txr->txdma);
2369 	free(adapter->rx_rings, M_DEVBUF);
2370 rx_fail:
2371 	free(adapter->tx_rings, M_DEVBUF);
2372 tx_fail:
2373 	free(adapter->queues, M_DEVBUF);
2374 fail:
2375 	return (error);
2376 } /* ixgbe_allocate_queues */
2377