xref: /netbsd-src/sys/dev/pci/ixgbe/ix_txrx.c (revision 8ecbf5f02b752fcb7debe1a8fab1dc82602bc760)
1 /* $NetBSD: ix_txrx.c,v 1.63 2020/04/17 02:21:25 msaitoh Exp $ */
2 
3 /******************************************************************************
4 
5   Copyright (c) 2001-2017, Intel Corporation
6   All rights reserved.
7 
8   Redistribution and use in source and binary forms, with or without
9   modification, are permitted provided that the following conditions are met:
10 
11    1. Redistributions of source code must retain the above copyright notice,
12       this list of conditions and the following disclaimer.
13 
14    2. Redistributions in binary form must reproduce the above copyright
15       notice, this list of conditions and the following disclaimer in the
16       documentation and/or other materials provided with the distribution.
17 
18    3. Neither the name of the Intel Corporation nor the names of its
19       contributors may be used to endorse or promote products derived from
20       this software without specific prior written permission.
21 
22   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
23   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
26   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
27   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
28   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
29   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
30   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
31   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
32   POSSIBILITY OF SUCH DAMAGE.
33 
34 ******************************************************************************/
35 /*$FreeBSD: head/sys/dev/ixgbe/ix_txrx.c 327031 2017-12-20 18:15:06Z erj $*/
36 
37 /*
38  * Copyright (c) 2011 The NetBSD Foundation, Inc.
39  * All rights reserved.
40  *
41  * This code is derived from software contributed to The NetBSD Foundation
42  * by Coyote Point Systems, Inc.
43  *
44  * Redistribution and use in source and binary forms, with or without
45  * modification, are permitted provided that the following conditions
46  * are met:
47  * 1. Redistributions of source code must retain the above copyright
48  *    notice, this list of conditions and the following disclaimer.
49  * 2. Redistributions in binary form must reproduce the above copyright
50  *    notice, this list of conditions and the following disclaimer in the
51  *    documentation and/or other materials provided with the distribution.
52  *
53  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
54  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
55  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
56  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
57  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
58  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
59  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
60  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
61  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
62  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
63  * POSSIBILITY OF SUCH DAMAGE.
64  */
65 
66 #include "opt_inet.h"
67 #include "opt_inet6.h"
68 
69 #include "ixgbe.h"
70 
71 /*
72  * HW RSC control:
73  *  this feature only works with
74  *  IPv4, and only on 82599 and later.
75  *  Also this will cause IP forwarding to
76  *  fail and that can't be controlled by
77  *  the stack as LRO can. For all these
78  *  reasons I've deemed it best to leave
79  *  this off and not bother with a tuneable
80  *  interface, this would need to be compiled
81  *  to enable.
82  */
83 static bool ixgbe_rsc_enable = FALSE;
84 
85 /*
86  * For Flow Director: this is the
87  * number of TX packets we sample
88  * for the filter pool, this means
89  * every 20th packet will be probed.
90  *
91  * This feature can be disabled by
92  * setting this to 0.
93  */
94 static int atr_sample_rate = 20;
95 
96 /************************************************************************
97  *  Local Function prototypes
98  ************************************************************************/
99 static void          ixgbe_setup_transmit_ring(struct tx_ring *);
100 static void          ixgbe_free_transmit_buffers(struct tx_ring *);
101 static int           ixgbe_setup_receive_ring(struct rx_ring *);
102 static void          ixgbe_free_receive_buffers(struct rx_ring *);
103 static void          ixgbe_rx_checksum(u32, struct mbuf *, u32,
104                                        struct ixgbe_hw_stats *);
105 static void          ixgbe_refresh_mbufs(struct rx_ring *, int);
106 static void          ixgbe_drain(struct ifnet *, struct tx_ring *);
107 static int           ixgbe_xmit(struct tx_ring *, struct mbuf *);
108 static int           ixgbe_tx_ctx_setup(struct tx_ring *,
109                                         struct mbuf *, u32 *, u32 *);
110 static int           ixgbe_tso_setup(struct tx_ring *,
111                                      struct mbuf *, u32 *, u32 *);
112 static __inline void ixgbe_rx_discard(struct rx_ring *, int);
113 static __inline void ixgbe_rx_input(struct rx_ring *, struct ifnet *,
114                                     struct mbuf *, u32);
115 static int           ixgbe_dma_malloc(struct adapter *, bus_size_t,
116                                       struct ixgbe_dma_alloc *, int);
117 static void          ixgbe_dma_free(struct adapter *, struct ixgbe_dma_alloc *);
118 
119 static void	ixgbe_setup_hw_rsc(struct rx_ring *);
120 
121 /************************************************************************
122  * ixgbe_legacy_start_locked - Transmit entry point
123  *
124  *   Called by the stack to initiate a transmit.
125  *   The driver will remain in this routine as long as there are
126  *   packets to transmit and transmit resources are available.
127  *   In case resources are not available, the stack is notified
128  *   and the packet is requeued.
129  ************************************************************************/
130 int
131 ixgbe_legacy_start_locked(struct ifnet *ifp, struct tx_ring *txr)
132 {
133 	int rc;
134 	struct mbuf    *m_head;
135 	struct adapter *adapter = txr->adapter;
136 
137 	IXGBE_TX_LOCK_ASSERT(txr);
138 
139 	if (adapter->link_active != LINK_STATE_UP) {
140 		/*
141 		 * discard all packets buffered in IFQ to avoid
142 		 * sending old packets at next link up timing.
143 		 */
144 		ixgbe_drain(ifp, txr);
145 		return (ENETDOWN);
146 	}
147 	if ((ifp->if_flags & IFF_RUNNING) == 0)
148 		return (ENETDOWN);
149 	if (txr->txr_no_space)
150 		return (ENETDOWN);
151 
152 	while (!IFQ_IS_EMPTY(&ifp->if_snd)) {
153 		if (txr->tx_avail <= IXGBE_QUEUE_MIN_FREE)
154 			break;
155 
156 		IFQ_POLL(&ifp->if_snd, m_head);
157 		if (m_head == NULL)
158 			break;
159 
160 		if ((rc = ixgbe_xmit(txr, m_head)) == EAGAIN) {
161 			break;
162 		}
163 		IFQ_DEQUEUE(&ifp->if_snd, m_head);
164 		if (rc != 0) {
165 			m_freem(m_head);
166 			continue;
167 		}
168 
169 		/* Send a copy of the frame to the BPF listener */
170 		bpf_mtap(ifp, m_head, BPF_D_OUT);
171 	}
172 
173 	return IXGBE_SUCCESS;
174 } /* ixgbe_legacy_start_locked */
175 
176 /************************************************************************
177  * ixgbe_legacy_start
178  *
179  *   Called by the stack, this always uses the first tx ring,
180  *   and should not be used with multiqueue tx enabled.
181  ************************************************************************/
182 void
183 ixgbe_legacy_start(struct ifnet *ifp)
184 {
185 	struct adapter *adapter = ifp->if_softc;
186 	struct tx_ring *txr = adapter->tx_rings;
187 
188 	if (ifp->if_flags & IFF_RUNNING) {
189 		IXGBE_TX_LOCK(txr);
190 		ixgbe_legacy_start_locked(ifp, txr);
191 		IXGBE_TX_UNLOCK(txr);
192 	}
193 } /* ixgbe_legacy_start */
194 
195 /************************************************************************
196  * ixgbe_mq_start - Multiqueue Transmit Entry Point
197  *
198  *   (if_transmit function)
199  ************************************************************************/
200 int
201 ixgbe_mq_start(struct ifnet *ifp, struct mbuf *m)
202 {
203 	struct adapter	*adapter = ifp->if_softc;
204 	struct tx_ring	*txr;
205 	int 		i;
206 #ifdef RSS
207 	uint32_t bucket_id;
208 #endif
209 
210 	/*
211 	 * When doing RSS, map it to the same outbound queue
212 	 * as the incoming flow would be mapped to.
213 	 *
214 	 * If everything is setup correctly, it should be the
215 	 * same bucket that the current CPU we're on is.
216 	 */
217 #ifdef RSS
218 	if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
219 		if ((adapter->feat_en & IXGBE_FEATURE_RSS) &&
220 		    (rss_hash2bucket(m->m_pkthdr.flowid, M_HASHTYPE_GET(m),
221 		    &bucket_id) == 0)) {
222 			i = bucket_id % adapter->num_queues;
223 #ifdef IXGBE_DEBUG
224 			if (bucket_id > adapter->num_queues)
225 				if_printf(ifp,
226 				    "bucket_id (%d) > num_queues (%d)\n",
227 				    bucket_id, adapter->num_queues);
228 #endif
229 		} else
230 			i = m->m_pkthdr.flowid % adapter->num_queues;
231 	} else
232 #endif /* 0 */
233 		i = (cpu_index(curcpu()) % ncpu) % adapter->num_queues;
234 
235 	/* Check for a hung queue and pick alternative */
236 	if (((1ULL << i) & adapter->active_queues) == 0)
237 		i = ffs64(adapter->active_queues);
238 
239 	txr = &adapter->tx_rings[i];
240 
241 	if (__predict_false(!pcq_put(txr->txr_interq, m))) {
242 		m_freem(m);
243 		txr->pcq_drops.ev_count++;
244 		return ENOBUFS;
245 	}
246 	if (IXGBE_TX_TRYLOCK(txr)) {
247 		ixgbe_mq_start_locked(ifp, txr);
248 		IXGBE_TX_UNLOCK(txr);
249 	} else {
250 		if (adapter->txrx_use_workqueue) {
251 			u_int *enqueued;
252 
253 			/*
254 			 * This function itself is not called in interrupt
255 			 * context, however it can be called in fast softint
256 			 * context right after receiving forwarding packets.
257 			 * So, it is required to protect workqueue from twice
258 			 * enqueuing when the machine uses both spontaneous
259 			 * packets and forwarding packets.
260 			 */
261 			enqueued = percpu_getref(adapter->txr_wq_enqueued);
262 			if (*enqueued == 0) {
263 				*enqueued = 1;
264 				percpu_putref(adapter->txr_wq_enqueued);
265 				workqueue_enqueue(adapter->txr_wq,
266 				    &txr->wq_cookie, curcpu());
267 			} else
268 				percpu_putref(adapter->txr_wq_enqueued);
269 		} else {
270 			kpreempt_disable();
271 			softint_schedule(txr->txr_si);
272 			kpreempt_enable();
273 		}
274 	}
275 
276 	return (0);
277 } /* ixgbe_mq_start */
278 
279 /************************************************************************
280  * ixgbe_mq_start_locked
281  ************************************************************************/
282 int
283 ixgbe_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
284 {
285 	struct mbuf    *next;
286 	int            enqueued = 0, err = 0;
287 
288 	if (txr->adapter->link_active != LINK_STATE_UP) {
289 		/*
290 		 * discard all packets buffered in txr_interq to avoid
291 		 * sending old packets at next link up timing.
292 		 */
293 		ixgbe_drain(ifp, txr);
294 		return (ENETDOWN);
295 	}
296 	if ((ifp->if_flags & IFF_RUNNING) == 0)
297 		return (ENETDOWN);
298 	if (txr->txr_no_space)
299 		return (ENETDOWN);
300 
301 	/* Process the queue */
302 	while ((next = pcq_get(txr->txr_interq)) != NULL) {
303 		if ((err = ixgbe_xmit(txr, next)) != 0) {
304 			m_freem(next);
305 			/* All errors are counted in ixgbe_xmit() */
306 			break;
307 		}
308 		enqueued++;
309 #if __FreeBSD_version >= 1100036
310 		/*
311 		 * Since we're looking at the tx ring, we can check
312 		 * to see if we're a VF by examing our tail register
313 		 * address.
314 		 */
315 		if ((txr->adapter->feat_en & IXGBE_FEATURE_VF) &&
316 		    (next->m_flags & M_MCAST))
317 			if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
318 #endif
319 		/* Send a copy of the frame to the BPF listener */
320 		bpf_mtap(ifp, next, BPF_D_OUT);
321 		if ((ifp->if_flags & IFF_RUNNING) == 0)
322 			break;
323 	}
324 
325 	if (txr->tx_avail < IXGBE_TX_CLEANUP_THRESHOLD(txr->adapter))
326 		ixgbe_txeof(txr);
327 
328 	return (err);
329 } /* ixgbe_mq_start_locked */
330 
331 /************************************************************************
332  * ixgbe_deferred_mq_start
333  *
334  *   Called from a softint and workqueue (indirectly) to drain queued
335  *   transmit packets.
336  ************************************************************************/
337 void
338 ixgbe_deferred_mq_start(void *arg)
339 {
340 	struct tx_ring *txr = arg;
341 	struct adapter *adapter = txr->adapter;
342 	struct ifnet   *ifp = adapter->ifp;
343 
344 	IXGBE_TX_LOCK(txr);
345 	if (pcq_peek(txr->txr_interq) != NULL)
346 		ixgbe_mq_start_locked(ifp, txr);
347 	IXGBE_TX_UNLOCK(txr);
348 } /* ixgbe_deferred_mq_start */
349 
350 /************************************************************************
351  * ixgbe_deferred_mq_start_work
352  *
353  *   Called from a workqueue to drain queued transmit packets.
354  ************************************************************************/
355 void
356 ixgbe_deferred_mq_start_work(struct work *wk, void *arg)
357 {
358 	struct tx_ring *txr = container_of(wk, struct tx_ring, wq_cookie);
359 	struct adapter *adapter = txr->adapter;
360 	u_int *enqueued = percpu_getref(adapter->txr_wq_enqueued);
361 	*enqueued = 0;
362 	percpu_putref(adapter->txr_wq_enqueued);
363 
364 	ixgbe_deferred_mq_start(txr);
365 } /* ixgbe_deferred_mq_start */
366 
367 /************************************************************************
368  * ixgbe_drain_all
369  ************************************************************************/
370 void
371 ixgbe_drain_all(struct adapter *adapter)
372 {
373 	struct ifnet *ifp = adapter->ifp;
374 	struct ix_queue *que = adapter->queues;
375 
376 	for (int i = 0; i < adapter->num_queues; i++, que++) {
377 		struct tx_ring  *txr = que->txr;
378 
379 		IXGBE_TX_LOCK(txr);
380 		ixgbe_drain(ifp, txr);
381 		IXGBE_TX_UNLOCK(txr);
382 	}
383 }
384 
385 /************************************************************************
386  * ixgbe_xmit
387  *
388  *   Maps the mbufs to tx descriptors, allowing the
389  *   TX engine to transmit the packets.
390  *
391  *   Return 0 on success, positive on failure
392  ************************************************************************/
393 static int
394 ixgbe_xmit(struct tx_ring *txr, struct mbuf *m_head)
395 {
396 	struct adapter          *adapter = txr->adapter;
397 	struct ixgbe_tx_buf     *txbuf;
398 	union ixgbe_adv_tx_desc *txd = NULL;
399 	struct ifnet	        *ifp = adapter->ifp;
400 	int                     i, j, error;
401 	int                     first;
402 	u32                     olinfo_status = 0, cmd_type_len;
403 	bool                    remap = TRUE;
404 	bus_dmamap_t            map;
405 
406 	/* Basic descriptor defines */
407 	cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA |
408 	    IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT);
409 
410 	if (vlan_has_tag(m_head))
411 		cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE;
412 
413 	/*
414 	 * Important to capture the first descriptor
415 	 * used because it will contain the index of
416 	 * the one we tell the hardware to report back
417 	 */
418 	first = txr->next_avail_desc;
419 	txbuf = &txr->tx_buffers[first];
420 	map = txbuf->map;
421 
422 	/*
423 	 * Map the packet for DMA.
424 	 */
425 retry:
426 	error = bus_dmamap_load_mbuf(txr->txtag->dt_dmat, map, m_head,
427 	    BUS_DMA_NOWAIT);
428 
429 	if (__predict_false(error)) {
430 		struct mbuf *m;
431 
432 		switch (error) {
433 		case EAGAIN:
434 			txr->q_eagain_tx_dma_setup++;
435 			return EAGAIN;
436 		case ENOMEM:
437 			txr->q_enomem_tx_dma_setup++;
438 			return EAGAIN;
439 		case EFBIG:
440 			/* Try it again? - one try */
441 			if (remap == TRUE) {
442 				remap = FALSE;
443 				/*
444 				 * XXX: m_defrag will choke on
445 				 * non-MCLBYTES-sized clusters
446 				 */
447 				txr->q_efbig_tx_dma_setup++;
448 				m = m_defrag(m_head, M_NOWAIT);
449 				if (m == NULL) {
450 					txr->q_mbuf_defrag_failed++;
451 					return ENOBUFS;
452 				}
453 				m_head = m;
454 				goto retry;
455 			} else {
456 				txr->q_efbig2_tx_dma_setup++;
457 				return error;
458 			}
459 		case EINVAL:
460 			txr->q_einval_tx_dma_setup++;
461 			return error;
462 		default:
463 			txr->q_other_tx_dma_setup++;
464 			return error;
465 		}
466 	}
467 
468 	/* Make certain there are enough descriptors */
469 	if (txr->tx_avail < (map->dm_nsegs + 2)) {
470 		txr->txr_no_space = true;
471 		txr->no_desc_avail.ev_count++;
472 		ixgbe_dmamap_unload(txr->txtag, txbuf->map);
473 		return EAGAIN;
474 	}
475 
476 	/*
477 	 * Set up the appropriate offload context
478 	 * this will consume the first descriptor
479 	 */
480 	error = ixgbe_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status);
481 	if (__predict_false(error)) {
482 		return (error);
483 	}
484 
485 	/* Do the flow director magic */
486 	if ((adapter->feat_en & IXGBE_FEATURE_FDIR) &&
487 	    (txr->atr_sample) && (!adapter->fdir_reinit)) {
488 		++txr->atr_count;
489 		if (txr->atr_count >= atr_sample_rate) {
490 			ixgbe_atr(txr, m_head);
491 			txr->atr_count = 0;
492 		}
493 	}
494 
495 	olinfo_status |= IXGBE_ADVTXD_CC;
496 	i = txr->next_avail_desc;
497 	for (j = 0; j < map->dm_nsegs; j++) {
498 		bus_size_t seglen;
499 		bus_addr_t segaddr;
500 
501 		txbuf = &txr->tx_buffers[i];
502 		txd = &txr->tx_base[i];
503 		seglen = map->dm_segs[j].ds_len;
504 		segaddr = htole64(map->dm_segs[j].ds_addr);
505 
506 		txd->read.buffer_addr = segaddr;
507 		txd->read.cmd_type_len = htole32(cmd_type_len | seglen);
508 		txd->read.olinfo_status = htole32(olinfo_status);
509 
510 		if (++i == txr->num_desc)
511 			i = 0;
512 	}
513 
514 	txd->read.cmd_type_len |= htole32(IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS);
515 	txr->tx_avail -= map->dm_nsegs;
516 	txr->next_avail_desc = i;
517 
518 	txbuf->m_head = m_head;
519 	/*
520 	 * Here we swap the map so the last descriptor,
521 	 * which gets the completion interrupt has the
522 	 * real map, and the first descriptor gets the
523 	 * unused map from this descriptor.
524 	 */
525 	txr->tx_buffers[first].map = txbuf->map;
526 	txbuf->map = map;
527 	bus_dmamap_sync(txr->txtag->dt_dmat, map, 0, m_head->m_pkthdr.len,
528 	    BUS_DMASYNC_PREWRITE);
529 
530 	/* Set the EOP descriptor that will be marked done */
531 	txbuf = &txr->tx_buffers[first];
532 	txbuf->eop = txd;
533 
534 	ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
535 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
536 	/*
537 	 * Advance the Transmit Descriptor Tail (Tdt), this tells the
538 	 * hardware that this frame is available to transmit.
539 	 */
540 	++txr->total_packets.ev_count;
541 	IXGBE_WRITE_REG(&adapter->hw, txr->tail, i);
542 
543 	net_stat_ref_t nsr = IF_STAT_GETREF(ifp);
544 	if_statadd_ref(nsr, if_obytes, m_head->m_pkthdr.len);
545 	if (m_head->m_flags & M_MCAST)
546 		if_statinc_ref(nsr, if_omcasts);
547 	IF_STAT_PUTREF(ifp);
548 
549 	/* Mark queue as having work */
550 	if (txr->busy == 0)
551 		txr->busy = 1;
552 
553 	return (0);
554 } /* ixgbe_xmit */
555 
556 /************************************************************************
557  * ixgbe_drain
558  ************************************************************************/
559 static void
560 ixgbe_drain(struct ifnet *ifp, struct tx_ring *txr)
561 {
562 	struct mbuf *m;
563 
564 	IXGBE_TX_LOCK_ASSERT(txr);
565 
566 	if (txr->me == 0) {
567 		while (!IFQ_IS_EMPTY(&ifp->if_snd)) {
568 			IFQ_DEQUEUE(&ifp->if_snd, m);
569 			m_freem(m);
570 			IF_DROP(&ifp->if_snd);
571 		}
572 	}
573 
574 	while ((m = pcq_get(txr->txr_interq)) != NULL) {
575 		m_freem(m);
576 		txr->pcq_drops.ev_count++;
577 	}
578 }
579 
580 /************************************************************************
581  * ixgbe_allocate_transmit_buffers
582  *
583  *   Allocate memory for tx_buffer structures. The tx_buffer stores all
584  *   the information needed to transmit a packet on the wire. This is
585  *   called only once at attach, setup is done every reset.
586  ************************************************************************/
587 static int
588 ixgbe_allocate_transmit_buffers(struct tx_ring *txr)
589 {
590 	struct adapter      *adapter = txr->adapter;
591 	device_t            dev = adapter->dev;
592 	struct ixgbe_tx_buf *txbuf;
593 	int                 error, i;
594 
595 	/*
596 	 * Setup DMA descriptor areas.
597 	 */
598 	error = ixgbe_dma_tag_create(
599 	         /*      parent */ adapter->osdep.dmat,
600 	         /*   alignment */ 1,
601 	         /*      bounds */ 0,
602 	         /*     maxsize */ IXGBE_TSO_SIZE,
603 	         /*   nsegments */ adapter->num_segs,
604 	         /*  maxsegsize */ PAGE_SIZE,
605 	         /*       flags */ 0,
606 	                           &txr->txtag);
607 	if (error != 0) {
608 		aprint_error_dev(dev,"Unable to allocate TX DMA tag\n");
609 		goto fail;
610 	}
611 
612 	txr->tx_buffers = malloc(sizeof(struct ixgbe_tx_buf) *
613 	    adapter->num_tx_desc, M_DEVBUF, M_WAITOK | M_ZERO);
614 
615 	/* Create the descriptor buffer dma maps */
616 	txbuf = txr->tx_buffers;
617 	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
618 		error = ixgbe_dmamap_create(txr->txtag, 0, &txbuf->map);
619 		if (error != 0) {
620 			aprint_error_dev(dev,
621 			    "Unable to create TX DMA map (%d)\n", error);
622 			goto fail;
623 		}
624 	}
625 
626 	return 0;
627 fail:
628 	/* We free all, it handles case where we are in the middle */
629 #if 0 /* XXX was FreeBSD */
630 	ixgbe_free_transmit_structures(adapter);
631 #else
632 	ixgbe_free_transmit_buffers(txr);
633 #endif
634 	return (error);
635 } /* ixgbe_allocate_transmit_buffers */
636 
637 /************************************************************************
638  * ixgbe_setup_transmit_ring - Initialize a transmit ring.
639  ************************************************************************/
640 static void
641 ixgbe_setup_transmit_ring(struct tx_ring *txr)
642 {
643 	struct adapter        *adapter = txr->adapter;
644 	struct ixgbe_tx_buf   *txbuf;
645 #ifdef DEV_NETMAP
646 	struct netmap_adapter *na = NA(adapter->ifp);
647 	struct netmap_slot    *slot;
648 #endif /* DEV_NETMAP */
649 
650 	/* Clear the old ring contents */
651 	IXGBE_TX_LOCK(txr);
652 
653 #ifdef DEV_NETMAP
654 	if (adapter->feat_en & IXGBE_FEATURE_NETMAP) {
655 		/*
656 		 * (under lock): if in netmap mode, do some consistency
657 		 * checks and set slot to entry 0 of the netmap ring.
658 		 */
659 		slot = netmap_reset(na, NR_TX, txr->me, 0);
660 	}
661 #endif /* DEV_NETMAP */
662 
663 	bzero((void *)txr->tx_base,
664 	    (sizeof(union ixgbe_adv_tx_desc)) * adapter->num_tx_desc);
665 	/* Reset indices */
666 	txr->next_avail_desc = 0;
667 	txr->next_to_clean = 0;
668 
669 	/* Free any existing tx buffers. */
670 	txbuf = txr->tx_buffers;
671 	for (int i = 0; i < txr->num_desc; i++, txbuf++) {
672 		if (txbuf->m_head != NULL) {
673 			bus_dmamap_sync(txr->txtag->dt_dmat, txbuf->map,
674 			    0, txbuf->m_head->m_pkthdr.len,
675 			    BUS_DMASYNC_POSTWRITE);
676 			ixgbe_dmamap_unload(txr->txtag, txbuf->map);
677 			m_freem(txbuf->m_head);
678 			txbuf->m_head = NULL;
679 		}
680 
681 #ifdef DEV_NETMAP
682 		/*
683 		 * In netmap mode, set the map for the packet buffer.
684 		 * NOTE: Some drivers (not this one) also need to set
685 		 * the physical buffer address in the NIC ring.
686 		 * Slots in the netmap ring (indexed by "si") are
687 		 * kring->nkr_hwofs positions "ahead" wrt the
688 		 * corresponding slot in the NIC ring. In some drivers
689 		 * (not here) nkr_hwofs can be negative. Function
690 		 * netmap_idx_n2k() handles wraparounds properly.
691 		 */
692 		if ((adapter->feat_en & IXGBE_FEATURE_NETMAP) && slot) {
693 			int si = netmap_idx_n2k(na->tx_rings[txr->me], i);
694 			netmap_load_map(na, txr->txtag,
695 			    txbuf->map, NMB(na, slot + si));
696 		}
697 #endif /* DEV_NETMAP */
698 
699 		/* Clear the EOP descriptor pointer */
700 		txbuf->eop = NULL;
701 	}
702 
703 	/* Set the rate at which we sample packets */
704 	if (adapter->feat_en & IXGBE_FEATURE_FDIR)
705 		txr->atr_sample = atr_sample_rate;
706 
707 	/* Set number of descriptors available */
708 	txr->tx_avail = adapter->num_tx_desc;
709 
710 	ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
711 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
712 	IXGBE_TX_UNLOCK(txr);
713 } /* ixgbe_setup_transmit_ring */
714 
715 /************************************************************************
716  * ixgbe_setup_transmit_structures - Initialize all transmit rings.
717  ************************************************************************/
718 int
719 ixgbe_setup_transmit_structures(struct adapter *adapter)
720 {
721 	struct tx_ring *txr = adapter->tx_rings;
722 
723 	for (int i = 0; i < adapter->num_queues; i++, txr++)
724 		ixgbe_setup_transmit_ring(txr);
725 
726 	return (0);
727 } /* ixgbe_setup_transmit_structures */
728 
729 /************************************************************************
730  * ixgbe_free_transmit_structures - Free all transmit rings.
731  ************************************************************************/
732 void
733 ixgbe_free_transmit_structures(struct adapter *adapter)
734 {
735 	struct tx_ring *txr = adapter->tx_rings;
736 
737 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
738 		ixgbe_free_transmit_buffers(txr);
739 		ixgbe_dma_free(adapter, &txr->txdma);
740 		IXGBE_TX_LOCK_DESTROY(txr);
741 	}
742 	free(adapter->tx_rings, M_DEVBUF);
743 } /* ixgbe_free_transmit_structures */
744 
745 /************************************************************************
746  * ixgbe_free_transmit_buffers
747  *
748  *   Free transmit ring related data structures.
749  ************************************************************************/
750 static void
751 ixgbe_free_transmit_buffers(struct tx_ring *txr)
752 {
753 	struct adapter      *adapter = txr->adapter;
754 	struct ixgbe_tx_buf *tx_buffer;
755 	int                 i;
756 
757 	INIT_DEBUGOUT("ixgbe_free_transmit_buffers: begin");
758 
759 	if (txr->tx_buffers == NULL)
760 		return;
761 
762 	tx_buffer = txr->tx_buffers;
763 	for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
764 		if (tx_buffer->m_head != NULL) {
765 			bus_dmamap_sync(txr->txtag->dt_dmat, tx_buffer->map,
766 			    0, tx_buffer->m_head->m_pkthdr.len,
767 			    BUS_DMASYNC_POSTWRITE);
768 			ixgbe_dmamap_unload(txr->txtag, tx_buffer->map);
769 			m_freem(tx_buffer->m_head);
770 			tx_buffer->m_head = NULL;
771 			if (tx_buffer->map != NULL) {
772 				ixgbe_dmamap_destroy(txr->txtag,
773 				    tx_buffer->map);
774 				tx_buffer->map = NULL;
775 			}
776 		} else if (tx_buffer->map != NULL) {
777 			ixgbe_dmamap_unload(txr->txtag, tx_buffer->map);
778 			ixgbe_dmamap_destroy(txr->txtag, tx_buffer->map);
779 			tx_buffer->map = NULL;
780 		}
781 	}
782 	if (txr->txr_interq != NULL) {
783 		struct mbuf *m;
784 
785 		while ((m = pcq_get(txr->txr_interq)) != NULL)
786 			m_freem(m);
787 		pcq_destroy(txr->txr_interq);
788 	}
789 	if (txr->tx_buffers != NULL) {
790 		free(txr->tx_buffers, M_DEVBUF);
791 		txr->tx_buffers = NULL;
792 	}
793 	if (txr->txtag != NULL) {
794 		ixgbe_dma_tag_destroy(txr->txtag);
795 		txr->txtag = NULL;
796 	}
797 } /* ixgbe_free_transmit_buffers */
798 
799 /************************************************************************
800  * ixgbe_tx_ctx_setup
801  *
802  *   Advanced Context Descriptor setup for VLAN, CSUM or TSO
803  ************************************************************************/
804 static int
805 ixgbe_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp,
806     u32 *cmd_type_len, u32 *olinfo_status)
807 {
808 	struct adapter                   *adapter = txr->adapter;
809 	struct ixgbe_adv_tx_context_desc *TXD;
810 	struct ether_vlan_header         *eh;
811 #ifdef INET
812 	struct ip                        *ip;
813 #endif
814 #ifdef INET6
815 	struct ip6_hdr                   *ip6;
816 #endif
817 	int                              ehdrlen, ip_hlen = 0;
818 	int                              offload = TRUE;
819 	int                              ctxd = txr->next_avail_desc;
820 	u32                              vlan_macip_lens = 0;
821 	u32                              type_tucmd_mlhl = 0;
822 	u16                              vtag = 0;
823 	u16                              etype;
824 	u8                               ipproto = 0;
825 	char                             *l3d;
826 
827 
828 	/* First check if TSO is to be used */
829 	if (mp->m_pkthdr.csum_flags & (M_CSUM_TSOv4 | M_CSUM_TSOv6)) {
830 		int rv = ixgbe_tso_setup(txr, mp, cmd_type_len, olinfo_status);
831 
832 		if (rv != 0)
833 			++adapter->tso_err.ev_count;
834 		return rv;
835 	}
836 
837 	if ((mp->m_pkthdr.csum_flags & M_CSUM_OFFLOAD) == 0)
838 		offload = FALSE;
839 
840 	/* Indicate the whole packet as payload when not doing TSO */
841 	*olinfo_status |= mp->m_pkthdr.len << IXGBE_ADVTXD_PAYLEN_SHIFT;
842 
843 	/* Now ready a context descriptor */
844 	TXD = (struct ixgbe_adv_tx_context_desc *)&txr->tx_base[ctxd];
845 
846 	/*
847 	 * In advanced descriptors the vlan tag must
848 	 * be placed into the context descriptor. Hence
849 	 * we need to make one even if not doing offloads.
850 	 */
851 	if (vlan_has_tag(mp)) {
852 		vtag = htole16(vlan_get_tag(mp));
853 		vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
854 	} else if (!(txr->adapter->feat_en & IXGBE_FEATURE_NEEDS_CTXD) &&
855 	           (offload == FALSE))
856 		return (0);
857 
858 	/*
859 	 * Determine where frame payload starts.
860 	 * Jump over vlan headers if already present,
861 	 * helpful for QinQ too.
862 	 */
863 	KASSERT(mp->m_len >= offsetof(struct ether_vlan_header, evl_tag));
864 	eh = mtod(mp, struct ether_vlan_header *);
865 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
866 		KASSERT(mp->m_len >= sizeof(struct ether_vlan_header));
867 		etype = ntohs(eh->evl_proto);
868 		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
869 	} else {
870 		etype = ntohs(eh->evl_encap_proto);
871 		ehdrlen = ETHER_HDR_LEN;
872 	}
873 
874 	/* Set the ether header length */
875 	vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
876 
877 	if (offload == FALSE)
878 		goto no_offloads;
879 
880 	/*
881 	 * If the first mbuf only includes the ethernet header,
882 	 * jump to the next one
883 	 * XXX: This assumes the stack splits mbufs containing headers
884 	 *      on header boundaries
885 	 * XXX: And assumes the entire IP header is contained in one mbuf
886 	 */
887 	if (mp->m_len == ehdrlen && mp->m_next)
888 		l3d = mtod(mp->m_next, char *);
889 	else
890 		l3d = mtod(mp, char *) + ehdrlen;
891 
892 	switch (etype) {
893 #ifdef INET
894 	case ETHERTYPE_IP:
895 		ip = (struct ip *)(l3d);
896 		ip_hlen = ip->ip_hl << 2;
897 		ipproto = ip->ip_p;
898 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
899 		KASSERT((mp->m_pkthdr.csum_flags & M_CSUM_IPv4) == 0 ||
900 		    ip->ip_sum == 0);
901 		break;
902 #endif
903 #ifdef INET6
904 	case ETHERTYPE_IPV6:
905 		ip6 = (struct ip6_hdr *)(l3d);
906 		ip_hlen = sizeof(struct ip6_hdr);
907 		ipproto = ip6->ip6_nxt;
908 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
909 		break;
910 #endif
911 	default:
912 		offload = false;
913 		break;
914 	}
915 
916 	if ((mp->m_pkthdr.csum_flags & M_CSUM_IPv4) != 0)
917 		*olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
918 
919 	vlan_macip_lens |= ip_hlen;
920 
921 	/* No support for offloads for non-L4 next headers */
922 	switch (ipproto) {
923 	case IPPROTO_TCP:
924 		if (mp->m_pkthdr.csum_flags &
925 		    (M_CSUM_TCPv4 | M_CSUM_TCPv6))
926 			type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
927 		else
928 			offload = false;
929 		break;
930 	case IPPROTO_UDP:
931 		if (mp->m_pkthdr.csum_flags &
932 		    (M_CSUM_UDPv4 | M_CSUM_UDPv6))
933 			type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP;
934 		else
935 			offload = false;
936 		break;
937 	default:
938 		offload = false;
939 		break;
940 	}
941 
942 	if (offload) /* Insert L4 checksum into data descriptors */
943 		*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
944 
945 no_offloads:
946 	type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
947 
948 	/* Now copy bits into descriptor */
949 	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
950 	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
951 	TXD->seqnum_seed = htole32(0);
952 	TXD->mss_l4len_idx = htole32(0);
953 
954 	/* We've consumed the first desc, adjust counters */
955 	if (++ctxd == txr->num_desc)
956 		ctxd = 0;
957 	txr->next_avail_desc = ctxd;
958 	--txr->tx_avail;
959 
960 	return (0);
961 } /* ixgbe_tx_ctx_setup */
962 
963 /************************************************************************
964  * ixgbe_tso_setup
965  *
966  *   Setup work for hardware segmentation offload (TSO) on
967  *   adapters using advanced tx descriptors
968  ************************************************************************/
969 static int
970 ixgbe_tso_setup(struct tx_ring *txr, struct mbuf *mp, u32 *cmd_type_len,
971     u32 *olinfo_status)
972 {
973 	struct ixgbe_adv_tx_context_desc *TXD;
974 	struct ether_vlan_header         *eh;
975 #ifdef INET6
976 	struct ip6_hdr                   *ip6;
977 #endif
978 #ifdef INET
979 	struct ip                        *ip;
980 #endif
981 	struct tcphdr                    *th;
982 	int                              ctxd, ehdrlen, ip_hlen, tcp_hlen;
983 	u32                              vlan_macip_lens = 0;
984 	u32                              type_tucmd_mlhl = 0;
985 	u32                              mss_l4len_idx = 0, paylen;
986 	u16                              vtag = 0, eh_type;
987 
988 	/*
989 	 * Determine where frame payload starts.
990 	 * Jump over vlan headers if already present
991 	 */
992 	eh = mtod(mp, struct ether_vlan_header *);
993 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
994 		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
995 		eh_type = eh->evl_proto;
996 	} else {
997 		ehdrlen = ETHER_HDR_LEN;
998 		eh_type = eh->evl_encap_proto;
999 	}
1000 
1001 	switch (ntohs(eh_type)) {
1002 #ifdef INET
1003 	case ETHERTYPE_IP:
1004 		ip = (struct ip *)(mp->m_data + ehdrlen);
1005 		if (ip->ip_p != IPPROTO_TCP)
1006 			return (ENXIO);
1007 		ip->ip_sum = 0;
1008 		ip_hlen = ip->ip_hl << 2;
1009 		th = (struct tcphdr *)((char *)ip + ip_hlen);
1010 		th->th_sum = in_cksum_phdr(ip->ip_src.s_addr,
1011 		    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
1012 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
1013 		/* Tell transmit desc to also do IPv4 checksum. */
1014 		*olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
1015 		break;
1016 #endif
1017 #ifdef INET6
1018 	case ETHERTYPE_IPV6:
1019 		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
1020 		/* XXX-BZ For now we do not pretend to support ext. hdrs. */
1021 		if (ip6->ip6_nxt != IPPROTO_TCP)
1022 			return (ENXIO);
1023 		ip_hlen = sizeof(struct ip6_hdr);
1024 		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
1025 		th = (struct tcphdr *)((char *)ip6 + ip_hlen);
1026 		th->th_sum = in6_cksum_phdr(&ip6->ip6_src,
1027 		    &ip6->ip6_dst, 0, htonl(IPPROTO_TCP));
1028 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
1029 		break;
1030 #endif
1031 	default:
1032 		panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
1033 		    __func__, ntohs(eh_type));
1034 		break;
1035 	}
1036 
1037 	ctxd = txr->next_avail_desc;
1038 	TXD = (struct ixgbe_adv_tx_context_desc *)&txr->tx_base[ctxd];
1039 
1040 	tcp_hlen = th->th_off << 2;
1041 
1042 	/* This is used in the transmit desc in encap */
1043 	paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
1044 
1045 	/* VLAN MACLEN IPLEN */
1046 	if (vlan_has_tag(mp)) {
1047 		vtag = htole16(vlan_get_tag(mp));
1048 		vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
1049 	}
1050 
1051 	vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
1052 	vlan_macip_lens |= ip_hlen;
1053 	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
1054 
1055 	/* ADV DTYPE TUCMD */
1056 	type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
1057 	type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
1058 	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
1059 
1060 	/* MSS L4LEN IDX */
1061 	mss_l4len_idx |= (mp->m_pkthdr.segsz << IXGBE_ADVTXD_MSS_SHIFT);
1062 	mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT);
1063 	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
1064 
1065 	TXD->seqnum_seed = htole32(0);
1066 
1067 	if (++ctxd == txr->num_desc)
1068 		ctxd = 0;
1069 
1070 	txr->tx_avail--;
1071 	txr->next_avail_desc = ctxd;
1072 	*cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
1073 	*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
1074 	*olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT;
1075 	++txr->tso_tx.ev_count;
1076 
1077 	return (0);
1078 } /* ixgbe_tso_setup */
1079 
1080 
1081 /************************************************************************
1082  * ixgbe_txeof
1083  *
1084  *   Examine each tx_buffer in the used queue. If the hardware is done
1085  *   processing the packet then free associated resources. The
1086  *   tx_buffer is put back on the free queue.
1087  ************************************************************************/
1088 bool
1089 ixgbe_txeof(struct tx_ring *txr)
1090 {
1091 	struct adapter		*adapter = txr->adapter;
1092 	struct ifnet		*ifp = adapter->ifp;
1093 	struct ixgbe_tx_buf	*buf;
1094 	union ixgbe_adv_tx_desc *txd;
1095 	u32			work, processed = 0;
1096 	u32			limit = adapter->tx_process_limit;
1097 
1098 	KASSERT(mutex_owned(&txr->tx_mtx));
1099 
1100 #ifdef DEV_NETMAP
1101 	if ((adapter->feat_en & IXGBE_FEATURE_NETMAP) &&
1102 	    (adapter->ifp->if_capenable & IFCAP_NETMAP)) {
1103 		struct netmap_adapter *na = NA(adapter->ifp);
1104 		struct netmap_kring *kring = na->tx_rings[txr->me];
1105 		txd = txr->tx_base;
1106 		bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1107 		    BUS_DMASYNC_POSTREAD);
1108 		/*
1109 		 * In netmap mode, all the work is done in the context
1110 		 * of the client thread. Interrupt handlers only wake up
1111 		 * clients, which may be sleeping on individual rings
1112 		 * or on a global resource for all rings.
1113 		 * To implement tx interrupt mitigation, we wake up the client
1114 		 * thread roughly every half ring, even if the NIC interrupts
1115 		 * more frequently. This is implemented as follows:
1116 		 * - ixgbe_txsync() sets kring->nr_kflags with the index of
1117 		 *   the slot that should wake up the thread (nkr_num_slots
1118 		 *   means the user thread should not be woken up);
1119 		 * - the driver ignores tx interrupts unless netmap_mitigate=0
1120 		 *   or the slot has the DD bit set.
1121 		 */
1122 		if (kring->nr_kflags < kring->nkr_num_slots &&
1123 		    txd[kring->nr_kflags].wb.status & IXGBE_TXD_STAT_DD) {
1124 			netmap_tx_irq(ifp, txr->me);
1125 		}
1126 		return false;
1127 	}
1128 #endif /* DEV_NETMAP */
1129 
1130 	if (txr->tx_avail == txr->num_desc) {
1131 		txr->busy = 0;
1132 		return false;
1133 	}
1134 
1135 	/* Get work starting point */
1136 	work = txr->next_to_clean;
1137 	buf = &txr->tx_buffers[work];
1138 	txd = &txr->tx_base[work];
1139 	work -= txr->num_desc; /* The distance to ring end */
1140 	ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1141 	    BUS_DMASYNC_POSTREAD);
1142 
1143 	do {
1144 		union ixgbe_adv_tx_desc *eop = buf->eop;
1145 		if (eop == NULL) /* No work */
1146 			break;
1147 
1148 		if ((eop->wb.status & IXGBE_TXD_STAT_DD) == 0)
1149 			break;	/* I/O not complete */
1150 
1151 		if (buf->m_head) {
1152 			txr->bytes += buf->m_head->m_pkthdr.len;
1153 			bus_dmamap_sync(txr->txtag->dt_dmat, buf->map,
1154 			    0, buf->m_head->m_pkthdr.len,
1155 			    BUS_DMASYNC_POSTWRITE);
1156 			ixgbe_dmamap_unload(txr->txtag, buf->map);
1157 			m_freem(buf->m_head);
1158 			buf->m_head = NULL;
1159 		}
1160 		buf->eop = NULL;
1161 		txr->txr_no_space = false;
1162 		++txr->tx_avail;
1163 
1164 		/* We clean the range if multi segment */
1165 		while (txd != eop) {
1166 			++txd;
1167 			++buf;
1168 			++work;
1169 			/* wrap the ring? */
1170 			if (__predict_false(!work)) {
1171 				work -= txr->num_desc;
1172 				buf = txr->tx_buffers;
1173 				txd = txr->tx_base;
1174 			}
1175 			if (buf->m_head) {
1176 				txr->bytes +=
1177 				    buf->m_head->m_pkthdr.len;
1178 				bus_dmamap_sync(txr->txtag->dt_dmat,
1179 				    buf->map,
1180 				    0, buf->m_head->m_pkthdr.len,
1181 				    BUS_DMASYNC_POSTWRITE);
1182 				ixgbe_dmamap_unload(txr->txtag,
1183 				    buf->map);
1184 				m_freem(buf->m_head);
1185 				buf->m_head = NULL;
1186 			}
1187 			++txr->tx_avail;
1188 			buf->eop = NULL;
1189 
1190 		}
1191 		++txr->packets;
1192 		++processed;
1193 		if_statinc(ifp, if_opackets);
1194 
1195 		/* Try the next packet */
1196 		++txd;
1197 		++buf;
1198 		++work;
1199 		/* reset with a wrap */
1200 		if (__predict_false(!work)) {
1201 			work -= txr->num_desc;
1202 			buf = txr->tx_buffers;
1203 			txd = txr->tx_base;
1204 		}
1205 		prefetch(txd);
1206 	} while (__predict_true(--limit));
1207 
1208 	ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1209 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1210 
1211 	work += txr->num_desc;
1212 	txr->next_to_clean = work;
1213 
1214 	/*
1215 	 * Queue Hang detection, we know there's
1216 	 * work outstanding or the first return
1217 	 * would have been taken, so increment busy
1218 	 * if nothing managed to get cleaned, then
1219 	 * in local_timer it will be checked and
1220 	 * marked as HUNG if it exceeds a MAX attempt.
1221 	 */
1222 	if ((processed == 0) && (txr->busy != IXGBE_QUEUE_HUNG))
1223 		++txr->busy;
1224 	/*
1225 	 * If anything gets cleaned we reset state to 1,
1226 	 * note this will turn off HUNG if its set.
1227 	 */
1228 	if (processed)
1229 		txr->busy = 1;
1230 
1231 	if (txr->tx_avail == txr->num_desc)
1232 		txr->busy = 0;
1233 
1234 	return ((limit > 0) ? false : true);
1235 } /* ixgbe_txeof */
1236 
1237 /************************************************************************
1238  * ixgbe_rsc_count
1239  *
1240  *   Used to detect a descriptor that has been merged by Hardware RSC.
1241  ************************************************************************/
1242 static inline u32
1243 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
1244 {
1245 	return (le32toh(rx->wb.lower.lo_dword.data) &
1246 	    IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
1247 } /* ixgbe_rsc_count */
1248 
1249 /************************************************************************
1250  * ixgbe_setup_hw_rsc
1251  *
1252  *   Initialize Hardware RSC (LRO) feature on 82599
1253  *   for an RX ring, this is toggled by the LRO capability
1254  *   even though it is transparent to the stack.
1255  *
1256  *   NOTE: Since this HW feature only works with IPv4 and
1257  *         testing has shown soft LRO to be as effective,
1258  *         this feature will be disabled by default.
1259  ************************************************************************/
1260 static void
1261 ixgbe_setup_hw_rsc(struct rx_ring *rxr)
1262 {
1263 	struct	adapter  *adapter = rxr->adapter;
1264 	struct	ixgbe_hw *hw = &adapter->hw;
1265 	u32              rscctrl, rdrxctl;
1266 
1267 	/* If turning LRO/RSC off we need to disable it */
1268 	if ((adapter->ifp->if_capenable & IFCAP_LRO) == 0) {
1269 		rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
1270 		rscctrl &= ~IXGBE_RSCCTL_RSCEN;
1271 		return;
1272 	}
1273 
1274 	rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
1275 	rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
1276 #ifdef DEV_NETMAP
1277 	/* Always strip CRC unless Netmap disabled it */
1278 	if (!(adapter->feat_en & IXGBE_FEATURE_NETMAP) ||
1279 	    !(adapter->ifp->if_capenable & IFCAP_NETMAP) ||
1280 	    ix_crcstrip)
1281 #endif /* DEV_NETMAP */
1282 		rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
1283 	rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
1284 	IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
1285 
1286 	rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
1287 	rscctrl |= IXGBE_RSCCTL_RSCEN;
1288 	/*
1289 	 * Limit the total number of descriptors that
1290 	 * can be combined, so it does not exceed 64K
1291 	 */
1292 	if (rxr->mbuf_sz == MCLBYTES)
1293 		rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
1294 	else if (rxr->mbuf_sz == MJUMPAGESIZE)
1295 		rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
1296 	else if (rxr->mbuf_sz == MJUM9BYTES)
1297 		rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
1298 	else  /* Using 16K cluster */
1299 		rscctrl |= IXGBE_RSCCTL_MAXDESC_1;
1300 
1301 	IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl);
1302 
1303 	/* Enable TCP header recognition */
1304 	IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0),
1305 	    (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) | IXGBE_PSRTYPE_TCPHDR));
1306 
1307 	/* Disable RSC for ACK packets */
1308 	IXGBE_WRITE_REG(hw, IXGBE_RSCDBU,
1309 	    (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU)));
1310 
1311 	rxr->hw_rsc = TRUE;
1312 } /* ixgbe_setup_hw_rsc */
1313 
1314 /************************************************************************
1315  * ixgbe_refresh_mbufs
1316  *
1317  *   Refresh mbuf buffers for RX descriptor rings
1318  *    - now keeps its own state so discards due to resource
1319  *      exhaustion are unnecessary, if an mbuf cannot be obtained
1320  *      it just returns, keeping its placeholder, thus it can simply
1321  *      be recalled to try again.
1322  ************************************************************************/
1323 static void
1324 ixgbe_refresh_mbufs(struct rx_ring *rxr, int limit)
1325 {
1326 	struct adapter      *adapter = rxr->adapter;
1327 	struct ixgbe_rx_buf *rxbuf;
1328 	struct mbuf         *mp;
1329 	int                 i, j, error;
1330 	bool                refreshed = false;
1331 
1332 	i = j = rxr->next_to_refresh;
1333 	/* Control the loop with one beyond */
1334 	if (++j == rxr->num_desc)
1335 		j = 0;
1336 
1337 	while (j != limit) {
1338 		rxbuf = &rxr->rx_buffers[i];
1339 		if (rxbuf->buf == NULL) {
1340 			mp = ixgbe_getjcl(&rxr->jcl_head, M_NOWAIT,
1341 			    MT_DATA, M_PKTHDR, rxr->mbuf_sz);
1342 			if (mp == NULL) {
1343 				rxr->no_jmbuf.ev_count++;
1344 				goto update;
1345 			}
1346 			if (adapter->max_frame_size <= (MCLBYTES - ETHER_ALIGN))
1347 				m_adj(mp, ETHER_ALIGN);
1348 		} else
1349 			mp = rxbuf->buf;
1350 
1351 		mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1352 
1353 		/* If we're dealing with an mbuf that was copied rather
1354 		 * than replaced, there's no need to go through busdma.
1355 		 */
1356 		if ((rxbuf->flags & IXGBE_RX_COPY) == 0) {
1357 			/* Get the memory mapping */
1358 			ixgbe_dmamap_unload(rxr->ptag, rxbuf->pmap);
1359 			error = bus_dmamap_load_mbuf(rxr->ptag->dt_dmat,
1360 			    rxbuf->pmap, mp, BUS_DMA_NOWAIT);
1361 			if (error != 0) {
1362 				device_printf(adapter->dev, "Refresh mbufs: "
1363 				    "payload dmamap load failure - %d\n",
1364 				    error);
1365 				m_free(mp);
1366 				rxbuf->buf = NULL;
1367 				goto update;
1368 			}
1369 			rxbuf->buf = mp;
1370 			bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
1371 			    0, mp->m_pkthdr.len, BUS_DMASYNC_PREREAD);
1372 			rxbuf->addr = rxr->rx_base[i].read.pkt_addr =
1373 			    htole64(rxbuf->pmap->dm_segs[0].ds_addr);
1374 		} else {
1375 			rxr->rx_base[i].read.pkt_addr = rxbuf->addr;
1376 			rxbuf->flags &= ~IXGBE_RX_COPY;
1377 		}
1378 
1379 		refreshed = true;
1380 		/* Next is precalculated */
1381 		i = j;
1382 		rxr->next_to_refresh = i;
1383 		if (++j == rxr->num_desc)
1384 			j = 0;
1385 	}
1386 
1387 update:
1388 	if (refreshed) /* Update hardware tail index */
1389 		IXGBE_WRITE_REG(&adapter->hw, rxr->tail, rxr->next_to_refresh);
1390 
1391 	return;
1392 } /* ixgbe_refresh_mbufs */
1393 
1394 /************************************************************************
1395  * ixgbe_allocate_receive_buffers
1396  *
1397  *   Allocate memory for rx_buffer structures. Since we use one
1398  *   rx_buffer per received packet, the maximum number of rx_buffer's
1399  *   that we'll need is equal to the number of receive descriptors
1400  *   that we've allocated.
1401  ************************************************************************/
1402 static int
1403 ixgbe_allocate_receive_buffers(struct rx_ring *rxr)
1404 {
1405 	struct adapter      *adapter = rxr->adapter;
1406 	device_t            dev = adapter->dev;
1407 	struct ixgbe_rx_buf *rxbuf;
1408 	int                 bsize, error;
1409 
1410 	bsize = sizeof(struct ixgbe_rx_buf) * rxr->num_desc;
1411 	rxr->rx_buffers = malloc(bsize, M_DEVBUF, M_WAITOK | M_ZERO);
1412 
1413 	error = ixgbe_dma_tag_create(
1414 	         /*      parent */ adapter->osdep.dmat,
1415 	         /*   alignment */ 1,
1416 	         /*      bounds */ 0,
1417 	         /*     maxsize */ MJUM16BYTES,
1418 	         /*   nsegments */ 1,
1419 	         /*  maxsegsize */ MJUM16BYTES,
1420 	         /*       flags */ 0,
1421 	                           &rxr->ptag);
1422 	if (error != 0) {
1423 		aprint_error_dev(dev, "Unable to create RX DMA tag\n");
1424 		goto fail;
1425 	}
1426 
1427 	for (int i = 0; i < rxr->num_desc; i++, rxbuf++) {
1428 		rxbuf = &rxr->rx_buffers[i];
1429 		error = ixgbe_dmamap_create(rxr->ptag, 0, &rxbuf->pmap);
1430 		if (error) {
1431 			aprint_error_dev(dev, "Unable to create RX dma map\n");
1432 			goto fail;
1433 		}
1434 	}
1435 
1436 	return (0);
1437 
1438 fail:
1439 	/* Frees all, but can handle partial completion */
1440 	ixgbe_free_receive_structures(adapter);
1441 
1442 	return (error);
1443 } /* ixgbe_allocate_receive_buffers */
1444 
1445 /************************************************************************
1446  * ixgbe_free_receive_ring
1447  ************************************************************************/
1448 static void
1449 ixgbe_free_receive_ring(struct rx_ring *rxr)
1450 {
1451 	for (int i = 0; i < rxr->num_desc; i++) {
1452 		ixgbe_rx_discard(rxr, i);
1453 	}
1454 } /* ixgbe_free_receive_ring */
1455 
1456 /************************************************************************
1457  * ixgbe_setup_receive_ring
1458  *
1459  *   Initialize a receive ring and its buffers.
1460  ************************************************************************/
1461 static int
1462 ixgbe_setup_receive_ring(struct rx_ring *rxr)
1463 {
1464 	struct adapter        *adapter;
1465 	struct ixgbe_rx_buf   *rxbuf;
1466 #ifdef LRO
1467 	struct ifnet          *ifp;
1468 	struct lro_ctrl       *lro = &rxr->lro;
1469 #endif /* LRO */
1470 #ifdef DEV_NETMAP
1471 	struct netmap_adapter *na = NA(rxr->adapter->ifp);
1472 	struct netmap_slot    *slot;
1473 #endif /* DEV_NETMAP */
1474 	int                   rsize, error = 0;
1475 
1476 	adapter = rxr->adapter;
1477 #ifdef LRO
1478 	ifp = adapter->ifp;
1479 #endif /* LRO */
1480 
1481 	/* Clear the ring contents */
1482 	IXGBE_RX_LOCK(rxr);
1483 
1484 #ifdef DEV_NETMAP
1485 	if (adapter->feat_en & IXGBE_FEATURE_NETMAP)
1486 		slot = netmap_reset(na, NR_RX, rxr->me, 0);
1487 #endif /* DEV_NETMAP */
1488 
1489 	rsize = roundup2(adapter->num_rx_desc *
1490 	    sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
1491 	bzero((void *)rxr->rx_base, rsize);
1492 	/* Cache the size */
1493 	rxr->mbuf_sz = adapter->rx_mbuf_sz;
1494 
1495 	/* Free current RX buffer structs and their mbufs */
1496 	ixgbe_free_receive_ring(rxr);
1497 
1498 	IXGBE_RX_UNLOCK(rxr);
1499 	/*
1500 	 * Now reinitialize our supply of jumbo mbufs.  The number
1501 	 * or size of jumbo mbufs may have changed.
1502 	 * Assume all of rxr->ptag are the same.
1503 	 */
1504 	ixgbe_jcl_reinit(adapter, rxr->ptag->dt_dmat, rxr,
1505 	    (2 * adapter->num_rx_desc), adapter->rx_mbuf_sz);
1506 
1507 	IXGBE_RX_LOCK(rxr);
1508 
1509 	/* Now replenish the mbufs */
1510 	for (int j = 0; j != rxr->num_desc; ++j) {
1511 		struct mbuf *mp;
1512 
1513 		rxbuf = &rxr->rx_buffers[j];
1514 
1515 #ifdef DEV_NETMAP
1516 		/*
1517 		 * In netmap mode, fill the map and set the buffer
1518 		 * address in the NIC ring, considering the offset
1519 		 * between the netmap and NIC rings (see comment in
1520 		 * ixgbe_setup_transmit_ring() ). No need to allocate
1521 		 * an mbuf, so end the block with a continue;
1522 		 */
1523 		if ((adapter->feat_en & IXGBE_FEATURE_NETMAP) && slot) {
1524 			int sj = netmap_idx_n2k(na->rx_rings[rxr->me], j);
1525 			uint64_t paddr;
1526 			void *addr;
1527 
1528 			addr = PNMB(na, slot + sj, &paddr);
1529 			netmap_load_map(na, rxr->ptag, rxbuf->pmap, addr);
1530 			/* Update descriptor and the cached value */
1531 			rxr->rx_base[j].read.pkt_addr = htole64(paddr);
1532 			rxbuf->addr = htole64(paddr);
1533 			continue;
1534 		}
1535 #endif /* DEV_NETMAP */
1536 
1537 		rxbuf->flags = 0;
1538 		rxbuf->buf = ixgbe_getjcl(&rxr->jcl_head, M_NOWAIT,
1539 		    MT_DATA, M_PKTHDR, adapter->rx_mbuf_sz);
1540 		if (rxbuf->buf == NULL) {
1541 			error = ENOBUFS;
1542 			goto fail;
1543 		}
1544 		mp = rxbuf->buf;
1545 		mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1546 		/* Get the memory mapping */
1547 		error = bus_dmamap_load_mbuf(rxr->ptag->dt_dmat, rxbuf->pmap,
1548 		    mp, BUS_DMA_NOWAIT);
1549 		if (error != 0)
1550                         goto fail;
1551 		bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
1552 		    0, adapter->rx_mbuf_sz, BUS_DMASYNC_PREREAD);
1553 		/* Update the descriptor and the cached value */
1554 		rxr->rx_base[j].read.pkt_addr =
1555 		    htole64(rxbuf->pmap->dm_segs[0].ds_addr);
1556 		rxbuf->addr = htole64(rxbuf->pmap->dm_segs[0].ds_addr);
1557 	}
1558 
1559 	/* Setup our descriptor indices */
1560 	rxr->next_to_check = 0;
1561 	rxr->next_to_refresh = 0;
1562 	rxr->lro_enabled = FALSE;
1563 	rxr->rx_copies.ev_count = 0;
1564 #if 0 /* NetBSD */
1565 	rxr->rx_bytes.ev_count = 0;
1566 #if 1	/* Fix inconsistency */
1567 	rxr->rx_packets.ev_count = 0;
1568 #endif
1569 #endif
1570 	rxr->vtag_strip = FALSE;
1571 
1572 	ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1573 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1574 
1575 	/*
1576 	 * Now set up the LRO interface
1577 	 */
1578 	if (ixgbe_rsc_enable)
1579 		ixgbe_setup_hw_rsc(rxr);
1580 #ifdef LRO
1581 	else if (ifp->if_capenable & IFCAP_LRO) {
1582 		device_t dev = adapter->dev;
1583 		int err = tcp_lro_init(lro);
1584 		if (err) {
1585 			device_printf(dev, "LRO Initialization failed!\n");
1586 			goto fail;
1587 		}
1588 		INIT_DEBUGOUT("RX Soft LRO Initialized\n");
1589 		rxr->lro_enabled = TRUE;
1590 		lro->ifp = adapter->ifp;
1591 	}
1592 #endif /* LRO */
1593 
1594 	IXGBE_RX_UNLOCK(rxr);
1595 
1596 	return (0);
1597 
1598 fail:
1599 	ixgbe_free_receive_ring(rxr);
1600 	IXGBE_RX_UNLOCK(rxr);
1601 
1602 	return (error);
1603 } /* ixgbe_setup_receive_ring */
1604 
1605 /************************************************************************
1606  * ixgbe_setup_receive_structures - Initialize all receive rings.
1607  ************************************************************************/
1608 int
1609 ixgbe_setup_receive_structures(struct adapter *adapter)
1610 {
1611 	struct rx_ring *rxr = adapter->rx_rings;
1612 	int            j;
1613 
1614 	INIT_DEBUGOUT("ixgbe_setup_receive_structures");
1615 	for (j = 0; j < adapter->num_queues; j++, rxr++)
1616 		if (ixgbe_setup_receive_ring(rxr))
1617 			goto fail;
1618 
1619 	return (0);
1620 fail:
1621 	/*
1622 	 * Free RX buffers allocated so far, we will only handle
1623 	 * the rings that completed, the failing case will have
1624 	 * cleaned up for itself. 'j' failed, so its the terminus.
1625 	 */
1626 	for (int i = 0; i < j; ++i) {
1627 		rxr = &adapter->rx_rings[i];
1628 		IXGBE_RX_LOCK(rxr);
1629 		ixgbe_free_receive_ring(rxr);
1630 		IXGBE_RX_UNLOCK(rxr);
1631 	}
1632 
1633 	return (ENOBUFS);
1634 } /* ixgbe_setup_receive_structures */
1635 
1636 
1637 /************************************************************************
1638  * ixgbe_free_receive_structures - Free all receive rings.
1639  ************************************************************************/
1640 void
1641 ixgbe_free_receive_structures(struct adapter *adapter)
1642 {
1643 	struct rx_ring *rxr = adapter->rx_rings;
1644 
1645 	INIT_DEBUGOUT("ixgbe_free_receive_structures: begin");
1646 
1647 	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
1648 		ixgbe_free_receive_buffers(rxr);
1649 #ifdef LRO
1650 		/* Free LRO memory */
1651 		tcp_lro_free(&rxr->lro);
1652 #endif /* LRO */
1653 		/* Free the ring memory as well */
1654 		ixgbe_dma_free(adapter, &rxr->rxdma);
1655 		IXGBE_RX_LOCK_DESTROY(rxr);
1656 	}
1657 
1658 	free(adapter->rx_rings, M_DEVBUF);
1659 } /* ixgbe_free_receive_structures */
1660 
1661 
1662 /************************************************************************
1663  * ixgbe_free_receive_buffers - Free receive ring data structures
1664  ************************************************************************/
1665 static void
1666 ixgbe_free_receive_buffers(struct rx_ring *rxr)
1667 {
1668 	struct adapter      *adapter = rxr->adapter;
1669 	struct ixgbe_rx_buf *rxbuf;
1670 
1671 	INIT_DEBUGOUT("ixgbe_free_receive_buffers: begin");
1672 
1673 	/* Cleanup any existing buffers */
1674 	if (rxr->rx_buffers != NULL) {
1675 		for (int i = 0; i < adapter->num_rx_desc; i++) {
1676 			rxbuf = &rxr->rx_buffers[i];
1677 			ixgbe_rx_discard(rxr, i);
1678 			if (rxbuf->pmap != NULL) {
1679 				ixgbe_dmamap_destroy(rxr->ptag, rxbuf->pmap);
1680 				rxbuf->pmap = NULL;
1681 			}
1682 		}
1683 
1684 		/* NetBSD specific. See ixgbe_netbsd.c */
1685 		ixgbe_jcl_destroy(adapter, rxr);
1686 
1687 		if (rxr->rx_buffers != NULL) {
1688 			free(rxr->rx_buffers, M_DEVBUF);
1689 			rxr->rx_buffers = NULL;
1690 		}
1691 	}
1692 
1693 	if (rxr->ptag != NULL) {
1694 		ixgbe_dma_tag_destroy(rxr->ptag);
1695 		rxr->ptag = NULL;
1696 	}
1697 
1698 	return;
1699 } /* ixgbe_free_receive_buffers */
1700 
1701 /************************************************************************
1702  * ixgbe_rx_input
1703  ************************************************************************/
1704 static __inline void
1705 ixgbe_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m,
1706     u32 ptype)
1707 {
1708 	struct adapter	*adapter = ifp->if_softc;
1709 
1710 #ifdef LRO
1711 	struct ethercom *ec = &adapter->osdep.ec;
1712 
1713 	/*
1714 	 * ATM LRO is only for IP/TCP packets and TCP checksum of the packet
1715 	 * should be computed by hardware. Also it should not have VLAN tag in
1716 	 * ethernet header.  In case of IPv6 we do not yet support ext. hdrs.
1717 	 */
1718         if (rxr->lro_enabled &&
1719             (ec->ec_capenable & ETHERCAP_VLAN_HWTAGGING) != 0 &&
1720             (ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
1721             ((ptype & (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
1722             (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP) ||
1723             (ptype & (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
1724             (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) &&
1725             (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
1726             (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
1727                 /*
1728                  * Send to the stack if:
1729                  **  - LRO not enabled, or
1730                  **  - no LRO resources, or
1731                  **  - lro enqueue fails
1732                  */
1733                 if (rxr->lro.lro_cnt != 0)
1734                         if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
1735                                 return;
1736         }
1737 #endif /* LRO */
1738 
1739 	if_percpuq_enqueue(adapter->ipq, m);
1740 } /* ixgbe_rx_input */
1741 
1742 /************************************************************************
1743  * ixgbe_rx_discard
1744  ************************************************************************/
1745 static __inline void
1746 ixgbe_rx_discard(struct rx_ring *rxr, int i)
1747 {
1748 	struct ixgbe_rx_buf *rbuf;
1749 
1750 	rbuf = &rxr->rx_buffers[i];
1751 
1752 	/*
1753 	 * With advanced descriptors the writeback
1754 	 * clobbers the buffer addrs, so its easier
1755 	 * to just free the existing mbufs and take
1756 	 * the normal refresh path to get new buffers
1757 	 * and mapping.
1758 	 */
1759 
1760 	if (rbuf->fmp != NULL) {/* Partial chain ? */
1761 		bus_dmamap_sync(rxr->ptag->dt_dmat, rbuf->pmap, 0,
1762 		    rbuf->buf->m_pkthdr.len, BUS_DMASYNC_POSTREAD);
1763 		m_freem(rbuf->fmp);
1764 		rbuf->fmp = NULL;
1765 		rbuf->buf = NULL; /* rbuf->buf is part of fmp's chain */
1766 	} else if (rbuf->buf) {
1767 		bus_dmamap_sync(rxr->ptag->dt_dmat, rbuf->pmap, 0,
1768 		    rbuf->buf->m_pkthdr.len, BUS_DMASYNC_POSTREAD);
1769 		m_free(rbuf->buf);
1770 		rbuf->buf = NULL;
1771 	}
1772 	ixgbe_dmamap_unload(rxr->ptag, rbuf->pmap);
1773 
1774 	rbuf->flags = 0;
1775 
1776 	return;
1777 } /* ixgbe_rx_discard */
1778 
1779 
1780 /************************************************************************
1781  * ixgbe_rxeof
1782  *
1783  *   Executes in interrupt context. It replenishes the
1784  *   mbufs in the descriptor and sends data which has
1785  *   been dma'ed into host memory to upper layer.
1786  *
1787  *   Return TRUE for more work, FALSE for all clean.
1788  ************************************************************************/
1789 bool
1790 ixgbe_rxeof(struct ix_queue *que)
1791 {
1792 	struct adapter		*adapter = que->adapter;
1793 	struct rx_ring		*rxr = que->rxr;
1794 	struct ifnet		*ifp = adapter->ifp;
1795 #ifdef LRO
1796 	struct lro_ctrl		*lro = &rxr->lro;
1797 #endif /* LRO */
1798 	union ixgbe_adv_rx_desc	*cur;
1799 	struct ixgbe_rx_buf	*rbuf, *nbuf;
1800 	int			i, nextp, processed = 0;
1801 	u32			staterr = 0;
1802 	u32			count = adapter->rx_process_limit;
1803 #ifdef RSS
1804 	u16			pkt_info;
1805 #endif
1806 
1807 	IXGBE_RX_LOCK(rxr);
1808 
1809 #ifdef DEV_NETMAP
1810 	if (adapter->feat_en & IXGBE_FEATURE_NETMAP) {
1811 		/* Same as the txeof routine: wakeup clients on intr. */
1812 		if (netmap_rx_irq(ifp, rxr->me, &processed)) {
1813 			IXGBE_RX_UNLOCK(rxr);
1814 			return (FALSE);
1815 		}
1816 	}
1817 #endif /* DEV_NETMAP */
1818 
1819 	for (i = rxr->next_to_check; count != 0;) {
1820 		struct mbuf *sendmp, *mp;
1821 		u32         rsc, ptype;
1822 		u16         len;
1823 		u16         vtag = 0;
1824 		bool        eop;
1825 
1826 		/* Sync the ring. */
1827 		ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1828 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
1829 
1830 		cur = &rxr->rx_base[i];
1831 		staterr = le32toh(cur->wb.upper.status_error);
1832 #ifdef RSS
1833 		pkt_info = le16toh(cur->wb.lower.lo_dword.hs_rss.pkt_info);
1834 #endif
1835 
1836 		if ((staterr & IXGBE_RXD_STAT_DD) == 0)
1837 			break;
1838 
1839 		count--;
1840 		sendmp = NULL;
1841 		nbuf = NULL;
1842 		rsc = 0;
1843 		cur->wb.upper.status_error = 0;
1844 		rbuf = &rxr->rx_buffers[i];
1845 		mp = rbuf->buf;
1846 
1847 		len = le16toh(cur->wb.upper.length);
1848 		ptype = le32toh(cur->wb.lower.lo_dword.data) &
1849 		    IXGBE_RXDADV_PKTTYPE_MASK;
1850 		eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0);
1851 
1852 		/* Make sure bad packets are discarded */
1853 		if (eop && (staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) != 0) {
1854 #if __FreeBSD_version >= 1100036
1855 			if (adapter->feat_en & IXGBE_FEATURE_VF)
1856 				if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
1857 #endif
1858 			rxr->rx_discarded.ev_count++;
1859 			ixgbe_rx_discard(rxr, i);
1860 			goto next_desc;
1861 		}
1862 
1863 		bus_dmamap_sync(rxr->ptag->dt_dmat, rbuf->pmap, 0,
1864 		    rbuf->buf->m_pkthdr.len, BUS_DMASYNC_POSTREAD);
1865 
1866 		/*
1867 		 * On 82599 which supports a hardware
1868 		 * LRO (called HW RSC), packets need
1869 		 * not be fragmented across sequential
1870 		 * descriptors, rather the next descriptor
1871 		 * is indicated in bits of the descriptor.
1872 		 * This also means that we might proceses
1873 		 * more than one packet at a time, something
1874 		 * that has never been true before, it
1875 		 * required eliminating global chain pointers
1876 		 * in favor of what we are doing here.  -jfv
1877 		 */
1878 		if (!eop) {
1879 			/*
1880 			 * Figure out the next descriptor
1881 			 * of this frame.
1882 			 */
1883 			if (rxr->hw_rsc == TRUE) {
1884 				rsc = ixgbe_rsc_count(cur);
1885 				rxr->rsc_num += (rsc - 1);
1886 			}
1887 			if (rsc) { /* Get hardware index */
1888 				nextp = ((staterr & IXGBE_RXDADV_NEXTP_MASK) >>
1889 				    IXGBE_RXDADV_NEXTP_SHIFT);
1890 			} else { /* Just sequential */
1891 				nextp = i + 1;
1892 				if (nextp == adapter->num_rx_desc)
1893 					nextp = 0;
1894 			}
1895 			nbuf = &rxr->rx_buffers[nextp];
1896 			prefetch(nbuf);
1897 		}
1898 		/*
1899 		 * Rather than using the fmp/lmp global pointers
1900 		 * we now keep the head of a packet chain in the
1901 		 * buffer struct and pass this along from one
1902 		 * descriptor to the next, until we get EOP.
1903 		 */
1904 		mp->m_len = len;
1905 		/*
1906 		 * See if there is a stored head
1907 		 * that determines what we are
1908 		 */
1909 		sendmp = rbuf->fmp;
1910 		if (sendmp != NULL) {  /* secondary frag */
1911 			rbuf->buf = rbuf->fmp = NULL;
1912 			mp->m_flags &= ~M_PKTHDR;
1913 			sendmp->m_pkthdr.len += mp->m_len;
1914 		} else {
1915 			/*
1916 			 * Optimize.  This might be a small packet,
1917 			 * maybe just a TCP ACK.  Do a fast copy that
1918 			 * is cache aligned into a new mbuf, and
1919 			 * leave the old mbuf+cluster for re-use.
1920 			 */
1921 			if (eop && len <= IXGBE_RX_COPY_LEN) {
1922 				sendmp = m_gethdr(M_NOWAIT, MT_DATA);
1923 				if (sendmp != NULL) {
1924 					sendmp->m_data += IXGBE_RX_COPY_ALIGN;
1925 					ixgbe_bcopy(mp->m_data, sendmp->m_data,
1926 					    len);
1927 					sendmp->m_len = len;
1928 					rxr->rx_copies.ev_count++;
1929 					rbuf->flags |= IXGBE_RX_COPY;
1930 				}
1931 			}
1932 			if (sendmp == NULL) {
1933 				rbuf->buf = rbuf->fmp = NULL;
1934 				sendmp = mp;
1935 			}
1936 
1937 			/* first desc of a non-ps chain */
1938 			sendmp->m_flags |= M_PKTHDR;
1939 			sendmp->m_pkthdr.len = mp->m_len;
1940 		}
1941 		++processed;
1942 
1943 		/* Pass the head pointer on */
1944 		if (eop == 0) {
1945 			nbuf->fmp = sendmp;
1946 			sendmp = NULL;
1947 			mp->m_next = nbuf->buf;
1948 		} else { /* Sending this frame */
1949 			m_set_rcvif(sendmp, ifp);
1950 			++rxr->packets;
1951 			rxr->rx_packets.ev_count++;
1952 			/* capture data for AIM */
1953 			rxr->bytes += sendmp->m_pkthdr.len;
1954 			rxr->rx_bytes.ev_count += sendmp->m_pkthdr.len;
1955 			/* Process vlan info */
1956 			if ((rxr->vtag_strip) && (staterr & IXGBE_RXD_STAT_VP))
1957 				vtag = le16toh(cur->wb.upper.vlan);
1958 			if (vtag) {
1959 				vlan_set_tag(sendmp, vtag);
1960 			}
1961 			if ((ifp->if_capenable & IFCAP_RXCSUM) != 0) {
1962 				ixgbe_rx_checksum(staterr, sendmp, ptype,
1963 				   &adapter->stats.pf);
1964 			}
1965 
1966 #if 0 /* FreeBSD */
1967 			/*
1968 			 * In case of multiqueue, we have RXCSUM.PCSD bit set
1969 			 * and never cleared. This means we have RSS hash
1970 			 * available to be used.
1971 			 */
1972 			if (adapter->num_queues > 1) {
1973 				sendmp->m_pkthdr.flowid =
1974 				    le32toh(cur->wb.lower.hi_dword.rss);
1975 				switch (pkt_info & IXGBE_RXDADV_RSSTYPE_MASK) {
1976 				case IXGBE_RXDADV_RSSTYPE_IPV4:
1977 					M_HASHTYPE_SET(sendmp,
1978 					    M_HASHTYPE_RSS_IPV4);
1979 					break;
1980 				case IXGBE_RXDADV_RSSTYPE_IPV4_TCP:
1981 					M_HASHTYPE_SET(sendmp,
1982 					    M_HASHTYPE_RSS_TCP_IPV4);
1983 					break;
1984 				case IXGBE_RXDADV_RSSTYPE_IPV6:
1985 					M_HASHTYPE_SET(sendmp,
1986 					    M_HASHTYPE_RSS_IPV6);
1987 					break;
1988 				case IXGBE_RXDADV_RSSTYPE_IPV6_TCP:
1989 					M_HASHTYPE_SET(sendmp,
1990 					    M_HASHTYPE_RSS_TCP_IPV6);
1991 					break;
1992 				case IXGBE_RXDADV_RSSTYPE_IPV6_EX:
1993 					M_HASHTYPE_SET(sendmp,
1994 					    M_HASHTYPE_RSS_IPV6_EX);
1995 					break;
1996 				case IXGBE_RXDADV_RSSTYPE_IPV6_TCP_EX:
1997 					M_HASHTYPE_SET(sendmp,
1998 					    M_HASHTYPE_RSS_TCP_IPV6_EX);
1999 					break;
2000 #if __FreeBSD_version > 1100000
2001 				case IXGBE_RXDADV_RSSTYPE_IPV4_UDP:
2002 					M_HASHTYPE_SET(sendmp,
2003 					    M_HASHTYPE_RSS_UDP_IPV4);
2004 					break;
2005 				case IXGBE_RXDADV_RSSTYPE_IPV6_UDP:
2006 					M_HASHTYPE_SET(sendmp,
2007 					    M_HASHTYPE_RSS_UDP_IPV6);
2008 					break;
2009 				case IXGBE_RXDADV_RSSTYPE_IPV6_UDP_EX:
2010 					M_HASHTYPE_SET(sendmp,
2011 					    M_HASHTYPE_RSS_UDP_IPV6_EX);
2012 					break;
2013 #endif
2014 				default:
2015 					M_HASHTYPE_SET(sendmp,
2016 					    M_HASHTYPE_OPAQUE_HASH);
2017 				}
2018 			} else {
2019 				sendmp->m_pkthdr.flowid = que->msix;
2020 				M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE);
2021 			}
2022 #endif
2023 		}
2024 next_desc:
2025 		ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
2026 		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2027 
2028 		/* Advance our pointers to the next descriptor. */
2029 		if (++i == rxr->num_desc)
2030 			i = 0;
2031 
2032 		/* Now send to the stack or do LRO */
2033 		if (sendmp != NULL) {
2034 			rxr->next_to_check = i;
2035 			IXGBE_RX_UNLOCK(rxr);
2036 			ixgbe_rx_input(rxr, ifp, sendmp, ptype);
2037 			IXGBE_RX_LOCK(rxr);
2038 			i = rxr->next_to_check;
2039 		}
2040 
2041 		/* Every 8 descriptors we go to refresh mbufs */
2042 		if (processed == 8) {
2043 			ixgbe_refresh_mbufs(rxr, i);
2044 			processed = 0;
2045 		}
2046 	}
2047 
2048 	/* Refresh any remaining buf structs */
2049 	if (ixgbe_rx_unrefreshed(rxr))
2050 		ixgbe_refresh_mbufs(rxr, i);
2051 
2052 	rxr->next_to_check = i;
2053 
2054 	IXGBE_RX_UNLOCK(rxr);
2055 
2056 #ifdef LRO
2057 	/*
2058 	 * Flush any outstanding LRO work
2059 	 */
2060 	tcp_lro_flush_all(lro);
2061 #endif /* LRO */
2062 
2063 	/*
2064 	 * Still have cleaning to do?
2065 	 */
2066 	if ((staterr & IXGBE_RXD_STAT_DD) != 0)
2067 		return (TRUE);
2068 
2069 	return (FALSE);
2070 } /* ixgbe_rxeof */
2071 
2072 
2073 /************************************************************************
2074  * ixgbe_rx_checksum
2075  *
2076  *   Verify that the hardware indicated that the checksum is valid.
2077  *   Inform the stack about the status of checksum so that stack
2078  *   doesn't spend time verifying the checksum.
2079  ************************************************************************/
2080 static void
2081 ixgbe_rx_checksum(u32 staterr, struct mbuf * mp, u32 ptype,
2082     struct ixgbe_hw_stats *stats)
2083 {
2084 	u16  status = (u16)staterr;
2085 	u8   errors = (u8)(staterr >> 24);
2086 #if 0
2087 	bool sctp = false;
2088 
2089 	if ((ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
2090 	    (ptype & IXGBE_RXDADV_PKTTYPE_SCTP) != 0)
2091 		sctp = true;
2092 #endif
2093 
2094 	/* IPv4 checksum */
2095 	if (status & IXGBE_RXD_STAT_IPCS) {
2096 		stats->ipcs.ev_count++;
2097 		if (!(errors & IXGBE_RXD_ERR_IPE)) {
2098 			/* IP Checksum Good */
2099 			mp->m_pkthdr.csum_flags = M_CSUM_IPv4;
2100 		} else {
2101 			stats->ipcs_bad.ev_count++;
2102 			mp->m_pkthdr.csum_flags = M_CSUM_IPv4|M_CSUM_IPv4_BAD;
2103 		}
2104 	}
2105 	/* TCP/UDP/SCTP checksum */
2106 	if (status & IXGBE_RXD_STAT_L4CS) {
2107 		stats->l4cs.ev_count++;
2108 		int type = M_CSUM_TCPv4|M_CSUM_TCPv6|M_CSUM_UDPv4|M_CSUM_UDPv6;
2109 		if (!(errors & IXGBE_RXD_ERR_TCPE)) {
2110 			mp->m_pkthdr.csum_flags |= type;
2111 		} else {
2112 			stats->l4cs_bad.ev_count++;
2113 			mp->m_pkthdr.csum_flags |= type | M_CSUM_TCP_UDP_BAD;
2114 		}
2115 	}
2116 } /* ixgbe_rx_checksum */
2117 
2118 /************************************************************************
2119  * ixgbe_dma_malloc
2120  ************************************************************************/
2121 int
2122 ixgbe_dma_malloc(struct adapter *adapter, const bus_size_t size,
2123 		struct ixgbe_dma_alloc *dma, const int mapflags)
2124 {
2125 	device_t dev = adapter->dev;
2126 	int      r, rsegs;
2127 
2128 	r = ixgbe_dma_tag_create(
2129 	     /*      parent */ adapter->osdep.dmat,
2130 	     /*   alignment */ DBA_ALIGN,
2131 	     /*      bounds */ 0,
2132 	     /*     maxsize */ size,
2133 	     /*   nsegments */ 1,
2134 	     /*  maxsegsize */ size,
2135 	     /*       flags */ BUS_DMA_ALLOCNOW,
2136 			       &dma->dma_tag);
2137 	if (r != 0) {
2138 		aprint_error_dev(dev,
2139 		    "%s: ixgbe_dma_tag_create failed; error %d\n", __func__,
2140 		    r);
2141 		goto fail_0;
2142 	}
2143 
2144 	r = bus_dmamem_alloc(dma->dma_tag->dt_dmat, size,
2145 	    dma->dma_tag->dt_alignment, dma->dma_tag->dt_boundary,
2146 	    &dma->dma_seg, 1, &rsegs, BUS_DMA_NOWAIT);
2147 	if (r != 0) {
2148 		aprint_error_dev(dev,
2149 		    "%s: bus_dmamem_alloc failed; error %d\n", __func__, r);
2150 		goto fail_1;
2151 	}
2152 
2153 	r = bus_dmamem_map(dma->dma_tag->dt_dmat, &dma->dma_seg, rsegs,
2154 	    size, &dma->dma_vaddr, BUS_DMA_NOWAIT);
2155 	if (r != 0) {
2156 		aprint_error_dev(dev, "%s: bus_dmamem_map failed; error %d\n",
2157 		    __func__, r);
2158 		goto fail_2;
2159 	}
2160 
2161 	r = ixgbe_dmamap_create(dma->dma_tag, 0, &dma->dma_map);
2162 	if (r != 0) {
2163 		aprint_error_dev(dev, "%s: bus_dmamem_map failed; error %d\n",
2164 		    __func__, r);
2165 		goto fail_3;
2166 	}
2167 
2168 	r = bus_dmamap_load(dma->dma_tag->dt_dmat, dma->dma_map,
2169 	    dma->dma_vaddr, size, NULL, mapflags | BUS_DMA_NOWAIT);
2170 	if (r != 0) {
2171 		aprint_error_dev(dev, "%s: bus_dmamap_load failed; error %d\n",
2172 		    __func__, r);
2173 		goto fail_4;
2174 	}
2175 	dma->dma_paddr = dma->dma_map->dm_segs[0].ds_addr;
2176 	dma->dma_size = size;
2177 	return 0;
2178 fail_4:
2179 	ixgbe_dmamap_destroy(dma->dma_tag, dma->dma_map);
2180 fail_3:
2181 	bus_dmamem_unmap(dma->dma_tag->dt_dmat, dma->dma_vaddr, size);
2182 fail_2:
2183 	bus_dmamem_free(dma->dma_tag->dt_dmat, &dma->dma_seg, rsegs);
2184 fail_1:
2185 	ixgbe_dma_tag_destroy(dma->dma_tag);
2186 fail_0:
2187 
2188 	return (r);
2189 } /* ixgbe_dma_malloc */
2190 
2191 /************************************************************************
2192  * ixgbe_dma_free
2193  ************************************************************************/
2194 void
2195 ixgbe_dma_free(struct adapter *adapter, struct ixgbe_dma_alloc *dma)
2196 {
2197 	bus_dmamap_sync(dma->dma_tag->dt_dmat, dma->dma_map, 0, dma->dma_size,
2198 	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2199 	ixgbe_dmamap_unload(dma->dma_tag, dma->dma_map);
2200 	bus_dmamem_free(dma->dma_tag->dt_dmat, &dma->dma_seg, 1);
2201 	ixgbe_dma_tag_destroy(dma->dma_tag);
2202 } /* ixgbe_dma_free */
2203 
2204 
2205 /************************************************************************
2206  * ixgbe_allocate_queues
2207  *
2208  *   Allocate memory for the transmit and receive rings, and then
2209  *   the descriptors associated with each, called only once at attach.
2210  ************************************************************************/
2211 int
2212 ixgbe_allocate_queues(struct adapter *adapter)
2213 {
2214 	device_t	dev = adapter->dev;
2215 	struct ix_queue	*que;
2216 	struct tx_ring	*txr;
2217 	struct rx_ring	*rxr;
2218 	int             rsize, tsize, error = IXGBE_SUCCESS;
2219 	int             txconf = 0, rxconf = 0;
2220 
2221 	/* First, allocate the top level queue structs */
2222 	adapter->queues = (struct ix_queue *)malloc(sizeof(struct ix_queue) *
2223 	    adapter->num_queues, M_DEVBUF, M_WAITOK | M_ZERO);
2224 
2225 	/* Second, allocate the TX ring struct memory */
2226 	adapter->tx_rings = malloc(sizeof(struct tx_ring) *
2227 	    adapter->num_queues, M_DEVBUF, M_WAITOK | M_ZERO);
2228 
2229 	/* Third, allocate the RX ring */
2230 	adapter->rx_rings = (struct rx_ring *)malloc(sizeof(struct rx_ring) *
2231 	    adapter->num_queues, M_DEVBUF, M_WAITOK | M_ZERO);
2232 
2233 	/* For the ring itself */
2234 	tsize = roundup2(adapter->num_tx_desc * sizeof(union ixgbe_adv_tx_desc),
2235 	    DBA_ALIGN);
2236 
2237 	/*
2238 	 * Now set up the TX queues, txconf is needed to handle the
2239 	 * possibility that things fail midcourse and we need to
2240 	 * undo memory gracefully
2241 	 */
2242 	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
2243 		/* Set up some basics */
2244 		txr = &adapter->tx_rings[i];
2245 		txr->adapter = adapter;
2246 		txr->txr_interq = NULL;
2247 		/* In case SR-IOV is enabled, align the index properly */
2248 #ifdef PCI_IOV
2249 		txr->me = ixgbe_vf_que_index(adapter->iov_mode, adapter->pool,
2250 		    i);
2251 #else
2252 		txr->me = i;
2253 #endif
2254 		txr->num_desc = adapter->num_tx_desc;
2255 
2256 		/* Initialize the TX side lock */
2257 		mutex_init(&txr->tx_mtx, MUTEX_DEFAULT, IPL_NET);
2258 
2259 		if (ixgbe_dma_malloc(adapter, tsize, &txr->txdma,
2260 		    BUS_DMA_NOWAIT)) {
2261 			aprint_error_dev(dev,
2262 			    "Unable to allocate TX Descriptor memory\n");
2263 			error = ENOMEM;
2264 			goto err_tx_desc;
2265 		}
2266 		txr->tx_base = (union ixgbe_adv_tx_desc *)txr->txdma.dma_vaddr;
2267 		bzero((void *)txr->tx_base, tsize);
2268 
2269 		/* Now allocate transmit buffers for the ring */
2270 		if (ixgbe_allocate_transmit_buffers(txr)) {
2271 			aprint_error_dev(dev,
2272 			    "Critical Failure setting up transmit buffers\n");
2273 			error = ENOMEM;
2274 			goto err_tx_desc;
2275 		}
2276 		if (!(adapter->feat_en & IXGBE_FEATURE_LEGACY_TX)) {
2277 			/* Allocate a buf ring */
2278 			txr->txr_interq = pcq_create(IXGBE_BR_SIZE, KM_SLEEP);
2279 			if (txr->txr_interq == NULL) {
2280 				aprint_error_dev(dev,
2281 				    "Critical Failure setting up buf ring\n");
2282 				error = ENOMEM;
2283 				goto err_tx_desc;
2284 			}
2285 		}
2286 	}
2287 
2288 	/*
2289 	 * Next the RX queues...
2290 	 */
2291 	rsize = roundup2(adapter->num_rx_desc * sizeof(union ixgbe_adv_rx_desc),
2292 	    DBA_ALIGN);
2293 	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
2294 		rxr = &adapter->rx_rings[i];
2295 		/* Set up some basics */
2296 		rxr->adapter = adapter;
2297 #ifdef PCI_IOV
2298 		/* In case SR-IOV is enabled, align the index properly */
2299 		rxr->me = ixgbe_vf_que_index(adapter->iov_mode, adapter->pool,
2300 		    i);
2301 #else
2302 		rxr->me = i;
2303 #endif
2304 		rxr->num_desc = adapter->num_rx_desc;
2305 
2306 		/* Initialize the RX side lock */
2307 		mutex_init(&rxr->rx_mtx, MUTEX_DEFAULT, IPL_NET);
2308 
2309 		if (ixgbe_dma_malloc(adapter, rsize, &rxr->rxdma,
2310 		    BUS_DMA_NOWAIT)) {
2311 			aprint_error_dev(dev,
2312 			    "Unable to allocate RxDescriptor memory\n");
2313 			error = ENOMEM;
2314 			goto err_rx_desc;
2315 		}
2316 		rxr->rx_base = (union ixgbe_adv_rx_desc *)rxr->rxdma.dma_vaddr;
2317 		bzero((void *)rxr->rx_base, rsize);
2318 
2319 		/* Allocate receive buffers for the ring */
2320 		if (ixgbe_allocate_receive_buffers(rxr)) {
2321 			aprint_error_dev(dev,
2322 			    "Critical Failure setting up receive buffers\n");
2323 			error = ENOMEM;
2324 			goto err_rx_desc;
2325 		}
2326 	}
2327 
2328 	/*
2329 	 * Finally set up the queue holding structs
2330 	 */
2331 	for (int i = 0; i < adapter->num_queues; i++) {
2332 		que = &adapter->queues[i];
2333 		que->adapter = adapter;
2334 		que->me = i;
2335 		que->txr = &adapter->tx_rings[i];
2336 		que->rxr = &adapter->rx_rings[i];
2337 
2338 		mutex_init(&que->dc_mtx, MUTEX_DEFAULT, IPL_NET);
2339 		que->disabled_count = 0;
2340 	}
2341 
2342 	return (0);
2343 
2344 err_rx_desc:
2345 	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
2346 		ixgbe_dma_free(adapter, &rxr->rxdma);
2347 err_tx_desc:
2348 	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
2349 		ixgbe_dma_free(adapter, &txr->txdma);
2350 	free(adapter->rx_rings, M_DEVBUF);
2351 	free(adapter->tx_rings, M_DEVBUF);
2352 	free(adapter->queues, M_DEVBUF);
2353 	return (error);
2354 } /* ixgbe_allocate_queues */
2355 
2356 /************************************************************************
2357  * ixgbe_free_queues
2358  *
2359  *   Free descriptors for the transmit and receive rings, and then
2360  *   the memory associated with each.
2361  ************************************************************************/
2362 void
2363 ixgbe_free_queues(struct adapter *adapter)
2364 {
2365 	struct ix_queue *que;
2366 	int i;
2367 
2368 	ixgbe_free_transmit_structures(adapter);
2369 	ixgbe_free_receive_structures(adapter);
2370 	for (i = 0; i < adapter->num_queues; i++) {
2371 		que = &adapter->queues[i];
2372 		mutex_destroy(&que->dc_mtx);
2373 	}
2374 	free(adapter->queues, M_DEVBUF);
2375 } /* ixgbe_free_queues */
2376