xref: /netbsd-src/sys/dev/pci/ixgbe/ix_txrx.c (revision 82d56013d7b633d116a93943de88e08335357a7c)
1 /* $NetBSD: ix_txrx.c,v 1.79 2021/05/27 06:11:34 msaitoh Exp $ */
2 
3 /******************************************************************************
4 
5   Copyright (c) 2001-2017, Intel Corporation
6   All rights reserved.
7 
8   Redistribution and use in source and binary forms, with or without
9   modification, are permitted provided that the following conditions are met:
10 
11    1. Redistributions of source code must retain the above copyright notice,
12       this list of conditions and the following disclaimer.
13 
14    2. Redistributions in binary form must reproduce the above copyright
15       notice, this list of conditions and the following disclaimer in the
16       documentation and/or other materials provided with the distribution.
17 
18    3. Neither the name of the Intel Corporation nor the names of its
19       contributors may be used to endorse or promote products derived from
20       this software without specific prior written permission.
21 
22   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
23   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
26   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
27   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
28   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
29   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
30   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
31   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
32   POSSIBILITY OF SUCH DAMAGE.
33 
34 ******************************************************************************/
35 /*$FreeBSD: head/sys/dev/ixgbe/ix_txrx.c 327031 2017-12-20 18:15:06Z erj $*/
36 
37 /*
38  * Copyright (c) 2011 The NetBSD Foundation, Inc.
39  * All rights reserved.
40  *
41  * This code is derived from software contributed to The NetBSD Foundation
42  * by Coyote Point Systems, Inc.
43  *
44  * Redistribution and use in source and binary forms, with or without
45  * modification, are permitted provided that the following conditions
46  * are met:
47  * 1. Redistributions of source code must retain the above copyright
48  *    notice, this list of conditions and the following disclaimer.
49  * 2. Redistributions in binary form must reproduce the above copyright
50  *    notice, this list of conditions and the following disclaimer in the
51  *    documentation and/or other materials provided with the distribution.
52  *
53  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
54  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
55  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
56  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
57  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
58  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
59  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
60  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
61  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
62  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
63  * POSSIBILITY OF SUCH DAMAGE.
64  */
65 
66 #include <sys/cdefs.h>
67 __KERNEL_RCSID(0, "$NetBSD: ix_txrx.c,v 1.79 2021/05/27 06:11:34 msaitoh Exp $");
68 
69 #include "opt_inet.h"
70 #include "opt_inet6.h"
71 
72 #include "ixgbe.h"
73 
74 /*
75  * HW RSC control:
76  *  this feature only works with
77  *  IPv4, and only on 82599 and later.
78  *  Also this will cause IP forwarding to
79  *  fail and that can't be controlled by
80  *  the stack as LRO can. For all these
81  *  reasons I've deemed it best to leave
82  *  this off and not bother with a tuneable
83  *  interface, this would need to be compiled
84  *  to enable.
85  */
86 static bool ixgbe_rsc_enable = FALSE;
87 
88 /*
89  * For Flow Director: this is the
90  * number of TX packets we sample
91  * for the filter pool, this means
92  * every 20th packet will be probed.
93  *
94  * This feature can be disabled by
95  * setting this to 0.
96  */
97 static int atr_sample_rate = 20;
98 
99 /************************************************************************
100  *  Local Function prototypes
101  ************************************************************************/
102 static void          ixgbe_setup_transmit_ring(struct tx_ring *);
103 static void          ixgbe_free_transmit_buffers(struct tx_ring *);
104 static int           ixgbe_setup_receive_ring(struct rx_ring *);
105 static void          ixgbe_free_receive_buffers(struct rx_ring *);
106 static void          ixgbe_rx_checksum(u32, struct mbuf *, u32,
107                                        struct ixgbe_hw_stats *);
108 static void          ixgbe_refresh_mbufs(struct rx_ring *, int);
109 static void          ixgbe_drain(struct ifnet *, struct tx_ring *);
110 static int           ixgbe_xmit(struct tx_ring *, struct mbuf *);
111 static int           ixgbe_tx_ctx_setup(struct tx_ring *,
112                                         struct mbuf *, u32 *, u32 *);
113 static int           ixgbe_tso_setup(struct tx_ring *,
114                                      struct mbuf *, u32 *, u32 *);
115 static __inline void ixgbe_rx_discard(struct rx_ring *, int);
116 static __inline void ixgbe_rx_input(struct rx_ring *, struct ifnet *,
117                                     struct mbuf *, u32);
118 static int           ixgbe_dma_malloc(struct adapter *, bus_size_t,
119                                       struct ixgbe_dma_alloc *, int);
120 static void          ixgbe_dma_free(struct adapter *, struct ixgbe_dma_alloc *);
121 
122 static void	ixgbe_setup_hw_rsc(struct rx_ring *);
123 
124 /************************************************************************
125  * ixgbe_legacy_start_locked - Transmit entry point
126  *
127  *   Called by the stack to initiate a transmit.
128  *   The driver will remain in this routine as long as there are
129  *   packets to transmit and transmit resources are available.
130  *   In case resources are not available, the stack is notified
131  *   and the packet is requeued.
132  ************************************************************************/
133 int
134 ixgbe_legacy_start_locked(struct ifnet *ifp, struct tx_ring *txr)
135 {
136 	int rc;
137 	struct mbuf    *m_head;
138 	struct adapter *adapter = txr->adapter;
139 
140 	IXGBE_TX_LOCK_ASSERT(txr);
141 
142 	if (adapter->link_active != LINK_STATE_UP) {
143 		/*
144 		 * discard all packets buffered in IFQ to avoid
145 		 * sending old packets at next link up timing.
146 		 */
147 		ixgbe_drain(ifp, txr);
148 		return (ENETDOWN);
149 	}
150 	if ((ifp->if_flags & IFF_RUNNING) == 0)
151 		return (ENETDOWN);
152 	if (txr->txr_no_space)
153 		return (ENETDOWN);
154 
155 	while (!IFQ_IS_EMPTY(&ifp->if_snd)) {
156 		if (txr->tx_avail <= IXGBE_QUEUE_MIN_FREE)
157 			break;
158 
159 		IFQ_POLL(&ifp->if_snd, m_head);
160 		if (m_head == NULL)
161 			break;
162 
163 		if ((rc = ixgbe_xmit(txr, m_head)) == EAGAIN) {
164 			break;
165 		}
166 		IFQ_DEQUEUE(&ifp->if_snd, m_head);
167 		if (rc != 0) {
168 			m_freem(m_head);
169 			continue;
170 		}
171 
172 		/* Send a copy of the frame to the BPF listener */
173 		bpf_mtap(ifp, m_head, BPF_D_OUT);
174 	}
175 
176 	return IXGBE_SUCCESS;
177 } /* ixgbe_legacy_start_locked */
178 
179 /************************************************************************
180  * ixgbe_legacy_start
181  *
182  *   Called by the stack, this always uses the first tx ring,
183  *   and should not be used with multiqueue tx enabled.
184  ************************************************************************/
185 void
186 ixgbe_legacy_start(struct ifnet *ifp)
187 {
188 	struct adapter *adapter = ifp->if_softc;
189 	struct tx_ring *txr = adapter->tx_rings;
190 
191 	if (ifp->if_flags & IFF_RUNNING) {
192 		IXGBE_TX_LOCK(txr);
193 		ixgbe_legacy_start_locked(ifp, txr);
194 		IXGBE_TX_UNLOCK(txr);
195 	}
196 } /* ixgbe_legacy_start */
197 
198 /************************************************************************
199  * ixgbe_mq_start - Multiqueue Transmit Entry Point
200  *
201  *   (if_transmit function)
202  ************************************************************************/
203 int
204 ixgbe_mq_start(struct ifnet *ifp, struct mbuf *m)
205 {
206 	struct adapter	*adapter = ifp->if_softc;
207 	struct tx_ring	*txr;
208 	int		i;
209 #ifdef RSS
210 	uint32_t bucket_id;
211 #endif
212 
213 	/*
214 	 * When doing RSS, map it to the same outbound queue
215 	 * as the incoming flow would be mapped to.
216 	 *
217 	 * If everything is setup correctly, it should be the
218 	 * same bucket that the current CPU we're on is.
219 	 */
220 #ifdef RSS
221 	if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
222 		if ((adapter->feat_en & IXGBE_FEATURE_RSS) &&
223 		    (rss_hash2bucket(m->m_pkthdr.flowid, M_HASHTYPE_GET(m),
224 		    &bucket_id) == 0)) {
225 			i = bucket_id % adapter->num_queues;
226 #ifdef IXGBE_DEBUG
227 			if (bucket_id > adapter->num_queues)
228 				if_printf(ifp,
229 				    "bucket_id (%d) > num_queues (%d)\n",
230 				    bucket_id, adapter->num_queues);
231 #endif
232 		} else
233 			i = m->m_pkthdr.flowid % adapter->num_queues;
234 	} else
235 #endif /* 0 */
236 		i = (cpu_index(curcpu()) % ncpu) % adapter->num_queues;
237 
238 	/* Check for a hung queue and pick alternative */
239 	if (((1ULL << i) & adapter->active_queues) == 0)
240 		i = ffs64(adapter->active_queues);
241 
242 	txr = &adapter->tx_rings[i];
243 
244 	if (__predict_false(!pcq_put(txr->txr_interq, m))) {
245 		m_freem(m);
246 		txr->pcq_drops.ev_count++;
247 		return ENOBUFS;
248 	}
249 	if (IXGBE_TX_TRYLOCK(txr)) {
250 		ixgbe_mq_start_locked(ifp, txr);
251 		IXGBE_TX_UNLOCK(txr);
252 	} else {
253 		if (adapter->txrx_use_workqueue) {
254 			u_int *enqueued;
255 
256 			/*
257 			 * This function itself is not called in interrupt
258 			 * context, however it can be called in fast softint
259 			 * context right after receiving forwarding packets.
260 			 * So, it is required to protect workqueue from twice
261 			 * enqueuing when the machine uses both spontaneous
262 			 * packets and forwarding packets.
263 			 */
264 			enqueued = percpu_getref(adapter->txr_wq_enqueued);
265 			if (*enqueued == 0) {
266 				*enqueued = 1;
267 				percpu_putref(adapter->txr_wq_enqueued);
268 				workqueue_enqueue(adapter->txr_wq,
269 				    &txr->wq_cookie, curcpu());
270 			} else
271 				percpu_putref(adapter->txr_wq_enqueued);
272 		} else {
273 			kpreempt_disable();
274 			softint_schedule(txr->txr_si);
275 			kpreempt_enable();
276 		}
277 	}
278 
279 	return (0);
280 } /* ixgbe_mq_start */
281 
282 /************************************************************************
283  * ixgbe_mq_start_locked
284  ************************************************************************/
285 int
286 ixgbe_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
287 {
288 	struct mbuf    *next;
289 	int            enqueued = 0, err = 0;
290 
291 	if (txr->adapter->link_active != LINK_STATE_UP) {
292 		/*
293 		 * discard all packets buffered in txr_interq to avoid
294 		 * sending old packets at next link up timing.
295 		 */
296 		ixgbe_drain(ifp, txr);
297 		return (ENETDOWN);
298 	}
299 	if ((ifp->if_flags & IFF_RUNNING) == 0)
300 		return (ENETDOWN);
301 	if (txr->txr_no_space)
302 		return (ENETDOWN);
303 
304 	/* Process the queue */
305 	while ((next = pcq_get(txr->txr_interq)) != NULL) {
306 		if ((err = ixgbe_xmit(txr, next)) != 0) {
307 			m_freem(next);
308 			/* All errors are counted in ixgbe_xmit() */
309 			break;
310 		}
311 		enqueued++;
312 #if __FreeBSD_version >= 1100036
313 		/*
314 		 * Since we're looking at the tx ring, we can check
315 		 * to see if we're a VF by examing our tail register
316 		 * address.
317 		 */
318 		if ((txr->adapter->feat_en & IXGBE_FEATURE_VF) &&
319 		    (next->m_flags & M_MCAST))
320 			if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
321 #endif
322 		/* Send a copy of the frame to the BPF listener */
323 		bpf_mtap(ifp, next, BPF_D_OUT);
324 		if ((ifp->if_flags & IFF_RUNNING) == 0)
325 			break;
326 	}
327 
328 	if (txr->tx_avail < IXGBE_TX_CLEANUP_THRESHOLD(txr->adapter))
329 		ixgbe_txeof(txr);
330 
331 	return (err);
332 } /* ixgbe_mq_start_locked */
333 
334 /************************************************************************
335  * ixgbe_deferred_mq_start
336  *
337  *   Called from a softint and workqueue (indirectly) to drain queued
338  *   transmit packets.
339  ************************************************************************/
340 void
341 ixgbe_deferred_mq_start(void *arg)
342 {
343 	struct tx_ring *txr = arg;
344 	struct adapter *adapter = txr->adapter;
345 	struct ifnet   *ifp = adapter->ifp;
346 
347 	IXGBE_TX_LOCK(txr);
348 	if (pcq_peek(txr->txr_interq) != NULL)
349 		ixgbe_mq_start_locked(ifp, txr);
350 	IXGBE_TX_UNLOCK(txr);
351 } /* ixgbe_deferred_mq_start */
352 
353 /************************************************************************
354  * ixgbe_deferred_mq_start_work
355  *
356  *   Called from a workqueue to drain queued transmit packets.
357  ************************************************************************/
358 void
359 ixgbe_deferred_mq_start_work(struct work *wk, void *arg)
360 {
361 	struct tx_ring *txr = container_of(wk, struct tx_ring, wq_cookie);
362 	struct adapter *adapter = txr->adapter;
363 	u_int *enqueued = percpu_getref(adapter->txr_wq_enqueued);
364 	*enqueued = 0;
365 	percpu_putref(adapter->txr_wq_enqueued);
366 
367 	ixgbe_deferred_mq_start(txr);
368 } /* ixgbe_deferred_mq_start */
369 
370 /************************************************************************
371  * ixgbe_drain_all
372  ************************************************************************/
373 void
374 ixgbe_drain_all(struct adapter *adapter)
375 {
376 	struct ifnet *ifp = adapter->ifp;
377 	struct ix_queue *que = adapter->queues;
378 
379 	for (int i = 0; i < adapter->num_queues; i++, que++) {
380 		struct tx_ring  *txr = que->txr;
381 
382 		IXGBE_TX_LOCK(txr);
383 		ixgbe_drain(ifp, txr);
384 		IXGBE_TX_UNLOCK(txr);
385 	}
386 }
387 
388 /************************************************************************
389  * ixgbe_xmit
390  *
391  *   Maps the mbufs to tx descriptors, allowing the
392  *   TX engine to transmit the packets.
393  *
394  *   Return 0 on success, positive on failure
395  ************************************************************************/
396 static int
397 ixgbe_xmit(struct tx_ring *txr, struct mbuf *m_head)
398 {
399 	struct adapter          *adapter = txr->adapter;
400 	struct ixgbe_tx_buf     *txbuf;
401 	union ixgbe_adv_tx_desc *txd = NULL;
402 	struct ifnet	        *ifp = adapter->ifp;
403 	int                     i, j, error;
404 	int                     first;
405 	u32                     olinfo_status = 0, cmd_type_len;
406 	bool                    remap = TRUE;
407 	bus_dmamap_t            map;
408 
409 	/* Basic descriptor defines */
410 	cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA |
411 	    IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT);
412 
413 	if (vlan_has_tag(m_head))
414 		cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE;
415 
416 	/*
417 	 * Important to capture the first descriptor
418 	 * used because it will contain the index of
419 	 * the one we tell the hardware to report back
420 	 */
421 	first = txr->next_avail_desc;
422 	txbuf = &txr->tx_buffers[first];
423 	map = txbuf->map;
424 
425 	/*
426 	 * Map the packet for DMA.
427 	 */
428 retry:
429 	error = bus_dmamap_load_mbuf(txr->txtag->dt_dmat, map, m_head,
430 	    BUS_DMA_NOWAIT);
431 
432 	if (__predict_false(error)) {
433 		struct mbuf *m;
434 
435 		switch (error) {
436 		case EAGAIN:
437 			txr->q_eagain_tx_dma_setup++;
438 			return EAGAIN;
439 		case ENOMEM:
440 			txr->q_enomem_tx_dma_setup++;
441 			return EAGAIN;
442 		case EFBIG:
443 			/* Try it again? - one try */
444 			if (remap == TRUE) {
445 				remap = FALSE;
446 				/*
447 				 * XXX: m_defrag will choke on
448 				 * non-MCLBYTES-sized clusters
449 				 */
450 				txr->q_efbig_tx_dma_setup++;
451 				m = m_defrag(m_head, M_NOWAIT);
452 				if (m == NULL) {
453 					txr->q_mbuf_defrag_failed++;
454 					return ENOBUFS;
455 				}
456 				m_head = m;
457 				goto retry;
458 			} else {
459 				txr->q_efbig2_tx_dma_setup++;
460 				return error;
461 			}
462 		case EINVAL:
463 			txr->q_einval_tx_dma_setup++;
464 			return error;
465 		default:
466 			txr->q_other_tx_dma_setup++;
467 			return error;
468 		}
469 	}
470 
471 	/* Make certain there are enough descriptors */
472 	if (txr->tx_avail < (map->dm_nsegs + 2)) {
473 		txr->txr_no_space = true;
474 		txr->no_desc_avail.ev_count++;
475 		ixgbe_dmamap_unload(txr->txtag, txbuf->map);
476 		return EAGAIN;
477 	}
478 
479 	/*
480 	 * Set up the appropriate offload context
481 	 * this will consume the first descriptor
482 	 */
483 	error = ixgbe_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status);
484 	if (__predict_false(error)) {
485 		return (error);
486 	}
487 
488 #ifdef IXGBE_FDIR
489 	/* Do the flow director magic */
490 	if ((adapter->feat_en & IXGBE_FEATURE_FDIR) &&
491 	    (txr->atr_sample) && (!adapter->fdir_reinit)) {
492 		++txr->atr_count;
493 		if (txr->atr_count >= atr_sample_rate) {
494 			ixgbe_atr(txr, m_head);
495 			txr->atr_count = 0;
496 		}
497 	}
498 #endif
499 
500 	olinfo_status |= IXGBE_ADVTXD_CC;
501 	i = txr->next_avail_desc;
502 	for (j = 0; j < map->dm_nsegs; j++) {
503 		bus_size_t seglen;
504 		uint64_t segaddr;
505 
506 		txbuf = &txr->tx_buffers[i];
507 		txd = &txr->tx_base[i];
508 		seglen = map->dm_segs[j].ds_len;
509 		segaddr = htole64(map->dm_segs[j].ds_addr);
510 
511 		txd->read.buffer_addr = segaddr;
512 		txd->read.cmd_type_len = htole32(cmd_type_len | seglen);
513 		txd->read.olinfo_status = htole32(olinfo_status);
514 
515 		if (++i == txr->num_desc)
516 			i = 0;
517 	}
518 
519 	txd->read.cmd_type_len |= htole32(IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS);
520 	txr->tx_avail -= map->dm_nsegs;
521 	txr->next_avail_desc = i;
522 
523 	txbuf->m_head = m_head;
524 	/*
525 	 * Here we swap the map so the last descriptor,
526 	 * which gets the completion interrupt has the
527 	 * real map, and the first descriptor gets the
528 	 * unused map from this descriptor.
529 	 */
530 	txr->tx_buffers[first].map = txbuf->map;
531 	txbuf->map = map;
532 	bus_dmamap_sync(txr->txtag->dt_dmat, map, 0, m_head->m_pkthdr.len,
533 	    BUS_DMASYNC_PREWRITE);
534 
535 	/* Set the EOP descriptor that will be marked done */
536 	txbuf = &txr->tx_buffers[first];
537 	txbuf->eop = txd;
538 
539 	ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
540 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
541 	/*
542 	 * Advance the Transmit Descriptor Tail (Tdt), this tells the
543 	 * hardware that this frame is available to transmit.
544 	 */
545 	++txr->total_packets.ev_count;
546 	IXGBE_WRITE_REG(&adapter->hw, txr->tail, i);
547 
548 	net_stat_ref_t nsr = IF_STAT_GETREF(ifp);
549 	if_statadd_ref(nsr, if_obytes, m_head->m_pkthdr.len);
550 	if (m_head->m_flags & M_MCAST)
551 		if_statinc_ref(nsr, if_omcasts);
552 	IF_STAT_PUTREF(ifp);
553 
554 	/* Mark queue as having work */
555 	if (txr->busy == 0)
556 		txr->busy = 1;
557 
558 	return (0);
559 } /* ixgbe_xmit */
560 
561 /************************************************************************
562  * ixgbe_drain
563  ************************************************************************/
564 static void
565 ixgbe_drain(struct ifnet *ifp, struct tx_ring *txr)
566 {
567 	struct mbuf *m;
568 
569 	IXGBE_TX_LOCK_ASSERT(txr);
570 
571 	if (txr->me == 0) {
572 		while (!IFQ_IS_EMPTY(&ifp->if_snd)) {
573 			IFQ_DEQUEUE(&ifp->if_snd, m);
574 			m_freem(m);
575 			IF_DROP(&ifp->if_snd);
576 		}
577 	}
578 
579 	while ((m = pcq_get(txr->txr_interq)) != NULL) {
580 		m_freem(m);
581 		txr->pcq_drops.ev_count++;
582 	}
583 }
584 
585 /************************************************************************
586  * ixgbe_allocate_transmit_buffers
587  *
588  *   Allocate memory for tx_buffer structures. The tx_buffer stores all
589  *   the information needed to transmit a packet on the wire. This is
590  *   called only once at attach, setup is done every reset.
591  ************************************************************************/
592 static int
593 ixgbe_allocate_transmit_buffers(struct tx_ring *txr)
594 {
595 	struct adapter      *adapter = txr->adapter;
596 	device_t            dev = adapter->dev;
597 	struct ixgbe_tx_buf *txbuf;
598 	int                 error, i;
599 
600 	/*
601 	 * Setup DMA descriptor areas.
602 	 */
603 	error = ixgbe_dma_tag_create(
604 	         /*      parent */ adapter->osdep.dmat,
605 	         /*   alignment */ 1,
606 	         /*      bounds */ 0,
607 	         /*     maxsize */ IXGBE_TSO_SIZE,
608 	         /*   nsegments */ adapter->num_segs,
609 	         /*  maxsegsize */ PAGE_SIZE,
610 	         /*       flags */ 0,
611 	                           &txr->txtag);
612 	if (error != 0) {
613 		aprint_error_dev(dev,"Unable to allocate TX DMA tag\n");
614 		goto fail;
615 	}
616 
617 	txr->tx_buffers = malloc(sizeof(struct ixgbe_tx_buf) *
618 	    adapter->num_tx_desc, M_DEVBUF, M_WAITOK | M_ZERO);
619 
620 	/* Create the descriptor buffer dma maps */
621 	txbuf = txr->tx_buffers;
622 	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
623 		error = ixgbe_dmamap_create(txr->txtag, 0, &txbuf->map);
624 		if (error != 0) {
625 			aprint_error_dev(dev,
626 			    "Unable to create TX DMA map (%d)\n", error);
627 			goto fail;
628 		}
629 	}
630 
631 	return 0;
632 fail:
633 	/* We free all, it handles case where we are in the middle */
634 #if 0 /* XXX was FreeBSD */
635 	ixgbe_free_transmit_structures(adapter);
636 #else
637 	ixgbe_free_transmit_buffers(txr);
638 #endif
639 	return (error);
640 } /* ixgbe_allocate_transmit_buffers */
641 
642 /************************************************************************
643  * ixgbe_setup_transmit_ring - Initialize a transmit ring.
644  ************************************************************************/
645 static void
646 ixgbe_setup_transmit_ring(struct tx_ring *txr)
647 {
648 	struct adapter        *adapter = txr->adapter;
649 	struct ixgbe_tx_buf   *txbuf;
650 #ifdef DEV_NETMAP
651 	struct netmap_adapter *na = NA(adapter->ifp);
652 	struct netmap_slot    *slot;
653 #endif /* DEV_NETMAP */
654 
655 	/* Clear the old ring contents */
656 	IXGBE_TX_LOCK(txr);
657 
658 #ifdef DEV_NETMAP
659 	if (adapter->feat_en & IXGBE_FEATURE_NETMAP) {
660 		/*
661 		 * (under lock): if in netmap mode, do some consistency
662 		 * checks and set slot to entry 0 of the netmap ring.
663 		 */
664 		slot = netmap_reset(na, NR_TX, txr->me, 0);
665 	}
666 #endif /* DEV_NETMAP */
667 
668 	bzero((void *)txr->tx_base,
669 	    (sizeof(union ixgbe_adv_tx_desc)) * adapter->num_tx_desc);
670 	/* Reset indices */
671 	txr->next_avail_desc = 0;
672 	txr->next_to_clean = 0;
673 
674 	/* Free any existing tx buffers. */
675 	txbuf = txr->tx_buffers;
676 	for (int i = 0; i < txr->num_desc; i++, txbuf++) {
677 		if (txbuf->m_head != NULL) {
678 			bus_dmamap_sync(txr->txtag->dt_dmat, txbuf->map,
679 			    0, txbuf->m_head->m_pkthdr.len,
680 			    BUS_DMASYNC_POSTWRITE);
681 			ixgbe_dmamap_unload(txr->txtag, txbuf->map);
682 			m_freem(txbuf->m_head);
683 			txbuf->m_head = NULL;
684 		}
685 
686 #ifdef DEV_NETMAP
687 		/*
688 		 * In netmap mode, set the map for the packet buffer.
689 		 * NOTE: Some drivers (not this one) also need to set
690 		 * the physical buffer address in the NIC ring.
691 		 * Slots in the netmap ring (indexed by "si") are
692 		 * kring->nkr_hwofs positions "ahead" wrt the
693 		 * corresponding slot in the NIC ring. In some drivers
694 		 * (not here) nkr_hwofs can be negative. Function
695 		 * netmap_idx_n2k() handles wraparounds properly.
696 		 */
697 		if ((adapter->feat_en & IXGBE_FEATURE_NETMAP) && slot) {
698 			int si = netmap_idx_n2k(na->tx_rings[txr->me], i);
699 			netmap_load_map(na, txr->txtag,
700 			    txbuf->map, NMB(na, slot + si));
701 		}
702 #endif /* DEV_NETMAP */
703 
704 		/* Clear the EOP descriptor pointer */
705 		txbuf->eop = NULL;
706 	}
707 
708 	/* Set the rate at which we sample packets */
709 	if (adapter->feat_en & IXGBE_FEATURE_FDIR)
710 		txr->atr_sample = atr_sample_rate;
711 
712 	/* Set number of descriptors available */
713 	txr->tx_avail = adapter->num_tx_desc;
714 
715 	ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
716 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
717 	IXGBE_TX_UNLOCK(txr);
718 } /* ixgbe_setup_transmit_ring */
719 
720 /************************************************************************
721  * ixgbe_setup_transmit_structures - Initialize all transmit rings.
722  ************************************************************************/
723 int
724 ixgbe_setup_transmit_structures(struct adapter *adapter)
725 {
726 	struct tx_ring *txr = adapter->tx_rings;
727 
728 	for (int i = 0; i < adapter->num_queues; i++, txr++)
729 		ixgbe_setup_transmit_ring(txr);
730 
731 	return (0);
732 } /* ixgbe_setup_transmit_structures */
733 
734 /************************************************************************
735  * ixgbe_free_transmit_structures - Free all transmit rings.
736  ************************************************************************/
737 void
738 ixgbe_free_transmit_structures(struct adapter *adapter)
739 {
740 	struct tx_ring *txr = adapter->tx_rings;
741 
742 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
743 		ixgbe_free_transmit_buffers(txr);
744 		ixgbe_dma_free(adapter, &txr->txdma);
745 		IXGBE_TX_LOCK_DESTROY(txr);
746 	}
747 	free(adapter->tx_rings, M_DEVBUF);
748 } /* ixgbe_free_transmit_structures */
749 
750 /************************************************************************
751  * ixgbe_free_transmit_buffers
752  *
753  *   Free transmit ring related data structures.
754  ************************************************************************/
755 static void
756 ixgbe_free_transmit_buffers(struct tx_ring *txr)
757 {
758 	struct adapter      *adapter = txr->adapter;
759 	struct ixgbe_tx_buf *tx_buffer;
760 	int                 i;
761 
762 	INIT_DEBUGOUT("ixgbe_free_transmit_buffers: begin");
763 
764 	if (txr->tx_buffers == NULL)
765 		return;
766 
767 	tx_buffer = txr->tx_buffers;
768 	for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
769 		if (tx_buffer->m_head != NULL) {
770 			bus_dmamap_sync(txr->txtag->dt_dmat, tx_buffer->map,
771 			    0, tx_buffer->m_head->m_pkthdr.len,
772 			    BUS_DMASYNC_POSTWRITE);
773 			ixgbe_dmamap_unload(txr->txtag, tx_buffer->map);
774 			m_freem(tx_buffer->m_head);
775 			tx_buffer->m_head = NULL;
776 			if (tx_buffer->map != NULL) {
777 				ixgbe_dmamap_destroy(txr->txtag,
778 				    tx_buffer->map);
779 				tx_buffer->map = NULL;
780 			}
781 		} else if (tx_buffer->map != NULL) {
782 			ixgbe_dmamap_unload(txr->txtag, tx_buffer->map);
783 			ixgbe_dmamap_destroy(txr->txtag, tx_buffer->map);
784 			tx_buffer->map = NULL;
785 		}
786 	}
787 	if (txr->txr_interq != NULL) {
788 		struct mbuf *m;
789 
790 		while ((m = pcq_get(txr->txr_interq)) != NULL)
791 			m_freem(m);
792 		pcq_destroy(txr->txr_interq);
793 	}
794 	if (txr->tx_buffers != NULL) {
795 		free(txr->tx_buffers, M_DEVBUF);
796 		txr->tx_buffers = NULL;
797 	}
798 	if (txr->txtag != NULL) {
799 		ixgbe_dma_tag_destroy(txr->txtag);
800 		txr->txtag = NULL;
801 	}
802 } /* ixgbe_free_transmit_buffers */
803 
804 /************************************************************************
805  * ixgbe_tx_ctx_setup
806  *
807  *   Advanced Context Descriptor setup for VLAN, CSUM or TSO
808  ************************************************************************/
809 static int
810 ixgbe_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp,
811     u32 *cmd_type_len, u32 *olinfo_status)
812 {
813 	struct adapter                   *adapter = txr->adapter;
814 	struct ixgbe_adv_tx_context_desc *TXD;
815 	struct ether_vlan_header         *eh;
816 #ifdef INET
817 	struct ip                        *ip;
818 #endif
819 #ifdef INET6
820 	struct ip6_hdr                   *ip6;
821 #endif
822 	int                              ehdrlen, ip_hlen = 0;
823 	int                              offload = TRUE;
824 	int                              ctxd = txr->next_avail_desc;
825 	u32                              vlan_macip_lens = 0;
826 	u32                              type_tucmd_mlhl = 0;
827 	u16                              vtag = 0;
828 	u16                              etype;
829 	u8                               ipproto = 0;
830 	char                             *l3d;
831 
832 
833 	/* First check if TSO is to be used */
834 	if (mp->m_pkthdr.csum_flags & (M_CSUM_TSOv4 | M_CSUM_TSOv6)) {
835 		int rv = ixgbe_tso_setup(txr, mp, cmd_type_len, olinfo_status);
836 
837 		if (rv != 0)
838 			++adapter->tso_err.ev_count;
839 		return rv;
840 	}
841 
842 	if ((mp->m_pkthdr.csum_flags & M_CSUM_OFFLOAD) == 0)
843 		offload = FALSE;
844 
845 	/* Indicate the whole packet as payload when not doing TSO */
846 	*olinfo_status |= mp->m_pkthdr.len << IXGBE_ADVTXD_PAYLEN_SHIFT;
847 
848 	/* Now ready a context descriptor */
849 	TXD = (struct ixgbe_adv_tx_context_desc *)&txr->tx_base[ctxd];
850 
851 	/*
852 	 * In advanced descriptors the vlan tag must
853 	 * be placed into the context descriptor. Hence
854 	 * we need to make one even if not doing offloads.
855 	 */
856 	if (vlan_has_tag(mp)) {
857 		vtag = htole16(vlan_get_tag(mp));
858 		vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
859 	} else if (!(txr->adapter->feat_en & IXGBE_FEATURE_NEEDS_CTXD) &&
860 	           (offload == FALSE))
861 		return (0);
862 
863 	/*
864 	 * Determine where frame payload starts.
865 	 * Jump over vlan headers if already present,
866 	 * helpful for QinQ too.
867 	 */
868 	KASSERT(mp->m_len >= offsetof(struct ether_vlan_header, evl_tag));
869 	eh = mtod(mp, struct ether_vlan_header *);
870 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
871 		KASSERT(mp->m_len >= sizeof(struct ether_vlan_header));
872 		etype = ntohs(eh->evl_proto);
873 		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
874 	} else {
875 		etype = ntohs(eh->evl_encap_proto);
876 		ehdrlen = ETHER_HDR_LEN;
877 	}
878 
879 	/* Set the ether header length */
880 	vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
881 
882 	if (offload == FALSE)
883 		goto no_offloads;
884 
885 	/*
886 	 * If the first mbuf only includes the ethernet header,
887 	 * jump to the next one
888 	 * XXX: This assumes the stack splits mbufs containing headers
889 	 *      on header boundaries
890 	 * XXX: And assumes the entire IP header is contained in one mbuf
891 	 */
892 	if (mp->m_len == ehdrlen && mp->m_next)
893 		l3d = mtod(mp->m_next, char *);
894 	else
895 		l3d = mtod(mp, char *) + ehdrlen;
896 
897 	switch (etype) {
898 #ifdef INET
899 	case ETHERTYPE_IP:
900 		ip = (struct ip *)(l3d);
901 		ip_hlen = ip->ip_hl << 2;
902 		ipproto = ip->ip_p;
903 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
904 		KASSERT((mp->m_pkthdr.csum_flags & M_CSUM_IPv4) == 0 ||
905 		    ip->ip_sum == 0);
906 		break;
907 #endif
908 #ifdef INET6
909 	case ETHERTYPE_IPV6:
910 		ip6 = (struct ip6_hdr *)(l3d);
911 		ip_hlen = sizeof(struct ip6_hdr);
912 		ipproto = ip6->ip6_nxt;
913 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
914 		break;
915 #endif
916 	default:
917 		offload = false;
918 		break;
919 	}
920 
921 	if ((mp->m_pkthdr.csum_flags & M_CSUM_IPv4) != 0)
922 		*olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
923 
924 	vlan_macip_lens |= ip_hlen;
925 
926 	/* No support for offloads for non-L4 next headers */
927 	switch (ipproto) {
928 	case IPPROTO_TCP:
929 		if (mp->m_pkthdr.csum_flags &
930 		    (M_CSUM_TCPv4 | M_CSUM_TCPv6))
931 			type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
932 		else
933 			offload = false;
934 		break;
935 	case IPPROTO_UDP:
936 		if (mp->m_pkthdr.csum_flags &
937 		    (M_CSUM_UDPv4 | M_CSUM_UDPv6))
938 			type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP;
939 		else
940 			offload = false;
941 		break;
942 	default:
943 		offload = false;
944 		break;
945 	}
946 
947 	if (offload) /* Insert L4 checksum into data descriptors */
948 		*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
949 
950 no_offloads:
951 	type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
952 
953 	/* Now copy bits into descriptor */
954 	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
955 	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
956 	TXD->seqnum_seed = htole32(0);
957 	TXD->mss_l4len_idx = htole32(0);
958 
959 	/* We've consumed the first desc, adjust counters */
960 	if (++ctxd == txr->num_desc)
961 		ctxd = 0;
962 	txr->next_avail_desc = ctxd;
963 	--txr->tx_avail;
964 
965 	return (0);
966 } /* ixgbe_tx_ctx_setup */
967 
968 /************************************************************************
969  * ixgbe_tso_setup
970  *
971  *   Setup work for hardware segmentation offload (TSO) on
972  *   adapters using advanced tx descriptors
973  ************************************************************************/
974 static int
975 ixgbe_tso_setup(struct tx_ring *txr, struct mbuf *mp, u32 *cmd_type_len,
976     u32 *olinfo_status)
977 {
978 	struct ixgbe_adv_tx_context_desc *TXD;
979 	struct ether_vlan_header         *eh;
980 #ifdef INET6
981 	struct ip6_hdr                   *ip6;
982 #endif
983 #ifdef INET
984 	struct ip                        *ip;
985 #endif
986 	struct tcphdr                    *th;
987 	int                              ctxd, ehdrlen, ip_hlen, tcp_hlen;
988 	u32                              vlan_macip_lens = 0;
989 	u32                              type_tucmd_mlhl = 0;
990 	u32                              mss_l4len_idx = 0, paylen;
991 	u16                              vtag = 0, eh_type;
992 
993 	/*
994 	 * Determine where frame payload starts.
995 	 * Jump over vlan headers if already present
996 	 */
997 	eh = mtod(mp, struct ether_vlan_header *);
998 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
999 		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
1000 		eh_type = eh->evl_proto;
1001 	} else {
1002 		ehdrlen = ETHER_HDR_LEN;
1003 		eh_type = eh->evl_encap_proto;
1004 	}
1005 
1006 	switch (ntohs(eh_type)) {
1007 #ifdef INET
1008 	case ETHERTYPE_IP:
1009 		ip = (struct ip *)(mp->m_data + ehdrlen);
1010 		if (ip->ip_p != IPPROTO_TCP)
1011 			return (ENXIO);
1012 		ip->ip_sum = 0;
1013 		ip_hlen = ip->ip_hl << 2;
1014 		th = (struct tcphdr *)((char *)ip + ip_hlen);
1015 		th->th_sum = in_cksum_phdr(ip->ip_src.s_addr,
1016 		    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
1017 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
1018 		/* Tell transmit desc to also do IPv4 checksum. */
1019 		*olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
1020 		break;
1021 #endif
1022 #ifdef INET6
1023 	case ETHERTYPE_IPV6:
1024 		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
1025 		/* XXX-BZ For now we do not pretend to support ext. hdrs. */
1026 		if (ip6->ip6_nxt != IPPROTO_TCP)
1027 			return (ENXIO);
1028 		ip_hlen = sizeof(struct ip6_hdr);
1029 		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
1030 		th = (struct tcphdr *)((char *)ip6 + ip_hlen);
1031 		th->th_sum = in6_cksum_phdr(&ip6->ip6_src,
1032 		    &ip6->ip6_dst, 0, htonl(IPPROTO_TCP));
1033 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
1034 		break;
1035 #endif
1036 	default:
1037 		panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
1038 		    __func__, ntohs(eh_type));
1039 		break;
1040 	}
1041 
1042 	ctxd = txr->next_avail_desc;
1043 	TXD = (struct ixgbe_adv_tx_context_desc *)&txr->tx_base[ctxd];
1044 
1045 	tcp_hlen = th->th_off << 2;
1046 
1047 	/* This is used in the transmit desc in encap */
1048 	paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
1049 
1050 	/* VLAN MACLEN IPLEN */
1051 	if (vlan_has_tag(mp)) {
1052 		vtag = htole16(vlan_get_tag(mp));
1053 		vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
1054 	}
1055 
1056 	vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
1057 	vlan_macip_lens |= ip_hlen;
1058 	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
1059 
1060 	/* ADV DTYPE TUCMD */
1061 	type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
1062 	type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
1063 	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
1064 
1065 	/* MSS L4LEN IDX */
1066 	mss_l4len_idx |= (mp->m_pkthdr.segsz << IXGBE_ADVTXD_MSS_SHIFT);
1067 	mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT);
1068 	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
1069 
1070 	TXD->seqnum_seed = htole32(0);
1071 
1072 	if (++ctxd == txr->num_desc)
1073 		ctxd = 0;
1074 
1075 	txr->tx_avail--;
1076 	txr->next_avail_desc = ctxd;
1077 	*cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
1078 	*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
1079 	*olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT;
1080 	++txr->tso_tx.ev_count;
1081 
1082 	return (0);
1083 } /* ixgbe_tso_setup */
1084 
1085 
1086 /************************************************************************
1087  * ixgbe_txeof
1088  *
1089  *   Examine each tx_buffer in the used queue. If the hardware is done
1090  *   processing the packet then free associated resources. The
1091  *   tx_buffer is put back on the free queue.
1092  ************************************************************************/
1093 bool
1094 ixgbe_txeof(struct tx_ring *txr)
1095 {
1096 	struct adapter		*adapter = txr->adapter;
1097 	struct ifnet		*ifp = adapter->ifp;
1098 	struct ixgbe_tx_buf	*buf;
1099 	union ixgbe_adv_tx_desc *txd;
1100 	u32			work, processed = 0;
1101 	u32			limit = adapter->tx_process_limit;
1102 
1103 	KASSERT(mutex_owned(&txr->tx_mtx));
1104 
1105 #ifdef DEV_NETMAP
1106 	if ((adapter->feat_en & IXGBE_FEATURE_NETMAP) &&
1107 	    (adapter->ifp->if_capenable & IFCAP_NETMAP)) {
1108 		struct netmap_adapter *na = NA(adapter->ifp);
1109 		struct netmap_kring *kring = na->tx_rings[txr->me];
1110 		txd = txr->tx_base;
1111 		bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1112 		    BUS_DMASYNC_POSTREAD);
1113 		/*
1114 		 * In netmap mode, all the work is done in the context
1115 		 * of the client thread. Interrupt handlers only wake up
1116 		 * clients, which may be sleeping on individual rings
1117 		 * or on a global resource for all rings.
1118 		 * To implement tx interrupt mitigation, we wake up the client
1119 		 * thread roughly every half ring, even if the NIC interrupts
1120 		 * more frequently. This is implemented as follows:
1121 		 * - ixgbe_txsync() sets kring->nr_kflags with the index of
1122 		 *   the slot that should wake up the thread (nkr_num_slots
1123 		 *   means the user thread should not be woken up);
1124 		 * - the driver ignores tx interrupts unless netmap_mitigate=0
1125 		 *   or the slot has the DD bit set.
1126 		 */
1127 		if (kring->nr_kflags < kring->nkr_num_slots &&
1128 		    le32toh(txd[kring->nr_kflags].wb.status) & IXGBE_TXD_STAT_DD) {
1129 			netmap_tx_irq(ifp, txr->me);
1130 		}
1131 		return false;
1132 	}
1133 #endif /* DEV_NETMAP */
1134 
1135 	if (txr->tx_avail == txr->num_desc) {
1136 		txr->busy = 0;
1137 		return false;
1138 	}
1139 
1140 	/* Get work starting point */
1141 	work = txr->next_to_clean;
1142 	buf = &txr->tx_buffers[work];
1143 	txd = &txr->tx_base[work];
1144 	work -= txr->num_desc; /* The distance to ring end */
1145 	ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1146 	    BUS_DMASYNC_POSTREAD);
1147 
1148 	do {
1149 		union ixgbe_adv_tx_desc *eop = buf->eop;
1150 		if (eop == NULL) /* No work */
1151 			break;
1152 
1153 		if ((le32toh(eop->wb.status) & IXGBE_TXD_STAT_DD) == 0)
1154 			break;	/* I/O not complete */
1155 
1156 		if (buf->m_head) {
1157 			txr->bytes += buf->m_head->m_pkthdr.len;
1158 			bus_dmamap_sync(txr->txtag->dt_dmat, buf->map,
1159 			    0, buf->m_head->m_pkthdr.len,
1160 			    BUS_DMASYNC_POSTWRITE);
1161 			ixgbe_dmamap_unload(txr->txtag, buf->map);
1162 			m_freem(buf->m_head);
1163 			buf->m_head = NULL;
1164 		}
1165 		buf->eop = NULL;
1166 		txr->txr_no_space = false;
1167 		++txr->tx_avail;
1168 
1169 		/* We clean the range if multi segment */
1170 		while (txd != eop) {
1171 			++txd;
1172 			++buf;
1173 			++work;
1174 			/* wrap the ring? */
1175 			if (__predict_false(!work)) {
1176 				work -= txr->num_desc;
1177 				buf = txr->tx_buffers;
1178 				txd = txr->tx_base;
1179 			}
1180 			if (buf->m_head) {
1181 				txr->bytes +=
1182 				    buf->m_head->m_pkthdr.len;
1183 				bus_dmamap_sync(txr->txtag->dt_dmat,
1184 				    buf->map,
1185 				    0, buf->m_head->m_pkthdr.len,
1186 				    BUS_DMASYNC_POSTWRITE);
1187 				ixgbe_dmamap_unload(txr->txtag,
1188 				    buf->map);
1189 				m_freem(buf->m_head);
1190 				buf->m_head = NULL;
1191 			}
1192 			++txr->tx_avail;
1193 			buf->eop = NULL;
1194 
1195 		}
1196 		++txr->packets;
1197 		++processed;
1198 		if_statinc(ifp, if_opackets);
1199 
1200 		/* Try the next packet */
1201 		++txd;
1202 		++buf;
1203 		++work;
1204 		/* reset with a wrap */
1205 		if (__predict_false(!work)) {
1206 			work -= txr->num_desc;
1207 			buf = txr->tx_buffers;
1208 			txd = txr->tx_base;
1209 		}
1210 		prefetch(txd);
1211 	} while (__predict_true(--limit));
1212 
1213 	ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1214 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1215 
1216 	work += txr->num_desc;
1217 	txr->next_to_clean = work;
1218 
1219 	/*
1220 	 * Queue Hang detection, we know there's
1221 	 * work outstanding or the first return
1222 	 * would have been taken, so increment busy
1223 	 * if nothing managed to get cleaned, then
1224 	 * in local_timer it will be checked and
1225 	 * marked as HUNG if it exceeds a MAX attempt.
1226 	 */
1227 	if ((processed == 0) && (txr->busy != IXGBE_QUEUE_HUNG))
1228 		++txr->busy;
1229 	/*
1230 	 * If anything gets cleaned we reset state to 1,
1231 	 * note this will turn off HUNG if its set.
1232 	 */
1233 	if (processed)
1234 		txr->busy = 1;
1235 
1236 	if (txr->tx_avail == txr->num_desc)
1237 		txr->busy = 0;
1238 
1239 	return ((limit > 0) ? false : true);
1240 } /* ixgbe_txeof */
1241 
1242 /************************************************************************
1243  * ixgbe_rsc_count
1244  *
1245  *   Used to detect a descriptor that has been merged by Hardware RSC.
1246  ************************************************************************/
1247 static inline u32
1248 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
1249 {
1250 	return (le32toh(rx->wb.lower.lo_dword.data) &
1251 	    IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
1252 } /* ixgbe_rsc_count */
1253 
1254 /************************************************************************
1255  * ixgbe_setup_hw_rsc
1256  *
1257  *   Initialize Hardware RSC (LRO) feature on 82599
1258  *   for an RX ring, this is toggled by the LRO capability
1259  *   even though it is transparent to the stack.
1260  *
1261  *   NOTE: Since this HW feature only works with IPv4 and
1262  *         testing has shown soft LRO to be as effective,
1263  *         this feature will be disabled by default.
1264  ************************************************************************/
1265 static void
1266 ixgbe_setup_hw_rsc(struct rx_ring *rxr)
1267 {
1268 	struct	adapter  *adapter = rxr->adapter;
1269 	struct	ixgbe_hw *hw = &adapter->hw;
1270 	u32              rscctrl, rdrxctl;
1271 
1272 	/* If turning LRO/RSC off we need to disable it */
1273 	if ((adapter->ifp->if_capenable & IFCAP_LRO) == 0) {
1274 		rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
1275 		rscctrl &= ~IXGBE_RSCCTL_RSCEN;
1276 		return;
1277 	}
1278 
1279 	rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
1280 	rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
1281 #ifdef DEV_NETMAP
1282 	/* Always strip CRC unless Netmap disabled it */
1283 	if (!(adapter->feat_en & IXGBE_FEATURE_NETMAP) ||
1284 	    !(adapter->ifp->if_capenable & IFCAP_NETMAP) ||
1285 	    ix_crcstrip)
1286 #endif /* DEV_NETMAP */
1287 		rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
1288 	rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
1289 	IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
1290 
1291 	rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
1292 	rscctrl |= IXGBE_RSCCTL_RSCEN;
1293 	/*
1294 	 * Limit the total number of descriptors that
1295 	 * can be combined, so it does not exceed 64K
1296 	 */
1297 	if (rxr->mbuf_sz == MCLBYTES)
1298 		rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
1299 	else if (rxr->mbuf_sz == MJUMPAGESIZE)
1300 		rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
1301 	else if (rxr->mbuf_sz == MJUM9BYTES)
1302 		rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
1303 	else  /* Using 16K cluster */
1304 		rscctrl |= IXGBE_RSCCTL_MAXDESC_1;
1305 
1306 	IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl);
1307 
1308 	/* Enable TCP header recognition */
1309 	IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0),
1310 	    (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) | IXGBE_PSRTYPE_TCPHDR));
1311 
1312 	/* Disable RSC for ACK packets */
1313 	IXGBE_WRITE_REG(hw, IXGBE_RSCDBU,
1314 	    (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU)));
1315 
1316 	rxr->hw_rsc = TRUE;
1317 } /* ixgbe_setup_hw_rsc */
1318 
1319 /************************************************************************
1320  * ixgbe_refresh_mbufs
1321  *
1322  *   Refresh mbuf buffers for RX descriptor rings
1323  *    - now keeps its own state so discards due to resource
1324  *      exhaustion are unnecessary, if an mbuf cannot be obtained
1325  *      it just returns, keeping its placeholder, thus it can simply
1326  *      be recalled to try again.
1327  *
1328  *   XXX NetBSD TODO:
1329  *    - The ixgbe_rxeof() function always preallocates mbuf cluster (jcl),
1330  *      so the ixgbe_refresh_mbufs() function can be simplified.
1331  *
1332  ************************************************************************/
1333 static void
1334 ixgbe_refresh_mbufs(struct rx_ring *rxr, int limit)
1335 {
1336 	struct adapter      *adapter = rxr->adapter;
1337 	struct ixgbe_rx_buf *rxbuf;
1338 	struct mbuf         *mp;
1339 	int                 i, j, error;
1340 	bool                refreshed = false;
1341 
1342 	i = j = rxr->next_to_refresh;
1343 	/* Control the loop with one beyond */
1344 	if (++j == rxr->num_desc)
1345 		j = 0;
1346 
1347 	while (j != limit) {
1348 		rxbuf = &rxr->rx_buffers[i];
1349 		if (rxbuf->buf == NULL) {
1350 			mp = ixgbe_getjcl(&rxr->jcl_head, M_NOWAIT,
1351 			    MT_DATA, M_PKTHDR, rxr->mbuf_sz);
1352 			if (mp == NULL) {
1353 				rxr->no_jmbuf.ev_count++;
1354 				goto update;
1355 			}
1356 			if (adapter->max_frame_size <= (MCLBYTES - ETHER_ALIGN))
1357 				m_adj(mp, ETHER_ALIGN);
1358 		} else
1359 			mp = rxbuf->buf;
1360 
1361 		mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1362 
1363 		/* If we're dealing with an mbuf that was copied rather
1364 		 * than replaced, there's no need to go through busdma.
1365 		 */
1366 		if ((rxbuf->flags & IXGBE_RX_COPY) == 0) {
1367 			/* Get the memory mapping */
1368 			ixgbe_dmamap_unload(rxr->ptag, rxbuf->pmap);
1369 			error = bus_dmamap_load_mbuf(rxr->ptag->dt_dmat,
1370 			    rxbuf->pmap, mp, BUS_DMA_NOWAIT);
1371 			if (error != 0) {
1372 				device_printf(adapter->dev, "Refresh mbufs: "
1373 				    "payload dmamap load failure - %d\n",
1374 				    error);
1375 				m_free(mp);
1376 				rxbuf->buf = NULL;
1377 				goto update;
1378 			}
1379 			rxbuf->buf = mp;
1380 			bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
1381 			    0, mp->m_pkthdr.len, BUS_DMASYNC_PREREAD);
1382 			rxbuf->addr = rxr->rx_base[i].read.pkt_addr =
1383 			    htole64(rxbuf->pmap->dm_segs[0].ds_addr);
1384 		} else {
1385 			rxr->rx_base[i].read.pkt_addr = rxbuf->addr;
1386 			rxbuf->flags &= ~IXGBE_RX_COPY;
1387 		}
1388 
1389 		refreshed = true;
1390 		/* Next is precalculated */
1391 		i = j;
1392 		rxr->next_to_refresh = i;
1393 		if (++j == rxr->num_desc)
1394 			j = 0;
1395 	}
1396 
1397 update:
1398 	if (refreshed) /* Update hardware tail index */
1399 		IXGBE_WRITE_REG(&adapter->hw, rxr->tail, rxr->next_to_refresh);
1400 
1401 	return;
1402 } /* ixgbe_refresh_mbufs */
1403 
1404 /************************************************************************
1405  * ixgbe_allocate_receive_buffers
1406  *
1407  *   Allocate memory for rx_buffer structures. Since we use one
1408  *   rx_buffer per received packet, the maximum number of rx_buffer's
1409  *   that we'll need is equal to the number of receive descriptors
1410  *   that we've allocated.
1411  ************************************************************************/
1412 static int
1413 ixgbe_allocate_receive_buffers(struct rx_ring *rxr)
1414 {
1415 	struct adapter      *adapter = rxr->adapter;
1416 	device_t            dev = adapter->dev;
1417 	struct ixgbe_rx_buf *rxbuf;
1418 	int                 bsize, error;
1419 
1420 	bsize = sizeof(struct ixgbe_rx_buf) * rxr->num_desc;
1421 	rxr->rx_buffers = malloc(bsize, M_DEVBUF, M_WAITOK | M_ZERO);
1422 
1423 	error = ixgbe_dma_tag_create(
1424 	         /*      parent */ adapter->osdep.dmat,
1425 	         /*   alignment */ 1,
1426 	         /*      bounds */ 0,
1427 	         /*     maxsize */ MJUM16BYTES,
1428 	         /*   nsegments */ 1,
1429 	         /*  maxsegsize */ MJUM16BYTES,
1430 	         /*       flags */ 0,
1431 	                           &rxr->ptag);
1432 	if (error != 0) {
1433 		aprint_error_dev(dev, "Unable to create RX DMA tag\n");
1434 		goto fail;
1435 	}
1436 
1437 	for (int i = 0; i < rxr->num_desc; i++, rxbuf++) {
1438 		rxbuf = &rxr->rx_buffers[i];
1439 		error = ixgbe_dmamap_create(rxr->ptag, 0, &rxbuf->pmap);
1440 		if (error) {
1441 			aprint_error_dev(dev, "Unable to create RX dma map\n");
1442 			goto fail;
1443 		}
1444 	}
1445 
1446 	return (0);
1447 
1448 fail:
1449 	/* Frees all, but can handle partial completion */
1450 	ixgbe_free_receive_structures(adapter);
1451 
1452 	return (error);
1453 } /* ixgbe_allocate_receive_buffers */
1454 
1455 /************************************************************************
1456  * ixgbe_free_receive_ring
1457  ************************************************************************/
1458 static void
1459 ixgbe_free_receive_ring(struct rx_ring *rxr)
1460 {
1461 	for (int i = 0; i < rxr->num_desc; i++) {
1462 		ixgbe_rx_discard(rxr, i);
1463 	}
1464 } /* ixgbe_free_receive_ring */
1465 
1466 /************************************************************************
1467  * ixgbe_setup_receive_ring
1468  *
1469  *   Initialize a receive ring and its buffers.
1470  ************************************************************************/
1471 static int
1472 ixgbe_setup_receive_ring(struct rx_ring *rxr)
1473 {
1474 	struct adapter        *adapter;
1475 	struct ixgbe_rx_buf   *rxbuf;
1476 #ifdef LRO
1477 	struct ifnet          *ifp;
1478 	struct lro_ctrl       *lro = &rxr->lro;
1479 #endif /* LRO */
1480 #ifdef DEV_NETMAP
1481 	struct netmap_adapter *na = NA(rxr->adapter->ifp);
1482 	struct netmap_slot    *slot;
1483 #endif /* DEV_NETMAP */
1484 	int                   rsize, error = 0;
1485 
1486 	adapter = rxr->adapter;
1487 #ifdef LRO
1488 	ifp = adapter->ifp;
1489 #endif /* LRO */
1490 
1491 	/* Clear the ring contents */
1492 	IXGBE_RX_LOCK(rxr);
1493 
1494 #ifdef DEV_NETMAP
1495 	if (adapter->feat_en & IXGBE_FEATURE_NETMAP)
1496 		slot = netmap_reset(na, NR_RX, rxr->me, 0);
1497 #endif /* DEV_NETMAP */
1498 
1499 	rsize = roundup2(adapter->num_rx_desc *
1500 	    sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
1501 	bzero((void *)rxr->rx_base, rsize);
1502 	/* Cache the size */
1503 	rxr->mbuf_sz = adapter->rx_mbuf_sz;
1504 
1505 	/* Free current RX buffer structs and their mbufs */
1506 	ixgbe_free_receive_ring(rxr);
1507 
1508 	IXGBE_RX_UNLOCK(rxr);
1509 	/*
1510 	 * Now reinitialize our supply of jumbo mbufs.  The number
1511 	 * or size of jumbo mbufs may have changed.
1512 	 * Assume all of rxr->ptag are the same.
1513 	 */
1514 	ixgbe_jcl_reinit(adapter, rxr->ptag->dt_dmat, rxr,
1515 	    adapter->num_jcl, adapter->rx_mbuf_sz);
1516 
1517 	IXGBE_RX_LOCK(rxr);
1518 
1519 	/* Now replenish the mbufs */
1520 	for (int j = 0; j != rxr->num_desc; ++j) {
1521 		struct mbuf *mp;
1522 
1523 		rxbuf = &rxr->rx_buffers[j];
1524 
1525 #ifdef DEV_NETMAP
1526 		/*
1527 		 * In netmap mode, fill the map and set the buffer
1528 		 * address in the NIC ring, considering the offset
1529 		 * between the netmap and NIC rings (see comment in
1530 		 * ixgbe_setup_transmit_ring() ). No need to allocate
1531 		 * an mbuf, so end the block with a continue;
1532 		 */
1533 		if ((adapter->feat_en & IXGBE_FEATURE_NETMAP) && slot) {
1534 			int sj = netmap_idx_n2k(na->rx_rings[rxr->me], j);
1535 			uint64_t paddr;
1536 			void *addr;
1537 
1538 			addr = PNMB(na, slot + sj, &paddr);
1539 			netmap_load_map(na, rxr->ptag, rxbuf->pmap, addr);
1540 			/* Update descriptor and the cached value */
1541 			rxr->rx_base[j].read.pkt_addr = htole64(paddr);
1542 			rxbuf->addr = htole64(paddr);
1543 			continue;
1544 		}
1545 #endif /* DEV_NETMAP */
1546 
1547 		rxbuf->flags = 0;
1548 		rxbuf->buf = ixgbe_getjcl(&rxr->jcl_head, M_NOWAIT,
1549 		    MT_DATA, M_PKTHDR, adapter->rx_mbuf_sz);
1550 		if (rxbuf->buf == NULL) {
1551 			rxr->no_jmbuf.ev_count++;
1552 			error = ENOBUFS;
1553 			goto fail;
1554 		}
1555 		mp = rxbuf->buf;
1556 		mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1557 		/* Get the memory mapping */
1558 		error = bus_dmamap_load_mbuf(rxr->ptag->dt_dmat, rxbuf->pmap,
1559 		    mp, BUS_DMA_NOWAIT);
1560 		if (error != 0) {
1561 			/*
1562 			 * Clear this entry for later cleanup in
1563 			 * ixgbe_discard() which is called via
1564 			 * ixgbe_free_receive_ring().
1565 			 */
1566 			m_freem(mp);
1567 			rxbuf->buf = NULL;
1568                         goto fail;
1569 		}
1570 		bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
1571 		    0, adapter->rx_mbuf_sz, BUS_DMASYNC_PREREAD);
1572 		/* Update the descriptor and the cached value */
1573 		rxr->rx_base[j].read.pkt_addr =
1574 		    htole64(rxbuf->pmap->dm_segs[0].ds_addr);
1575 		rxbuf->addr = htole64(rxbuf->pmap->dm_segs[0].ds_addr);
1576 	}
1577 
1578 	/* Setup our descriptor indices */
1579 	rxr->next_to_check = 0;
1580 	rxr->next_to_refresh = 0;
1581 	rxr->lro_enabled = FALSE;
1582 	rxr->rx_copies.ev_count = 0;
1583 #if 0 /* NetBSD */
1584 	rxr->rx_bytes.ev_count = 0;
1585 #if 1	/* Fix inconsistency */
1586 	rxr->rx_packets.ev_count = 0;
1587 #endif
1588 #endif
1589 	rxr->vtag_strip = FALSE;
1590 
1591 	ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1592 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1593 
1594 	/*
1595 	 * Now set up the LRO interface
1596 	 */
1597 	if (ixgbe_rsc_enable)
1598 		ixgbe_setup_hw_rsc(rxr);
1599 #ifdef LRO
1600 	else if (ifp->if_capenable & IFCAP_LRO) {
1601 		device_t dev = adapter->dev;
1602 		int err = tcp_lro_init(lro);
1603 		if (err) {
1604 			device_printf(dev, "LRO Initialization failed!\n");
1605 			goto fail;
1606 		}
1607 		INIT_DEBUGOUT("RX Soft LRO Initialized\n");
1608 		rxr->lro_enabled = TRUE;
1609 		lro->ifp = adapter->ifp;
1610 	}
1611 #endif /* LRO */
1612 
1613 	IXGBE_RX_UNLOCK(rxr);
1614 
1615 	return (0);
1616 
1617 fail:
1618 	ixgbe_free_receive_ring(rxr);
1619 	IXGBE_RX_UNLOCK(rxr);
1620 
1621 	return (error);
1622 } /* ixgbe_setup_receive_ring */
1623 
1624 /************************************************************************
1625  * ixgbe_setup_receive_structures - Initialize all receive rings.
1626  ************************************************************************/
1627 int
1628 ixgbe_setup_receive_structures(struct adapter *adapter)
1629 {
1630 	struct rx_ring *rxr = adapter->rx_rings;
1631 	int            j;
1632 
1633 	INIT_DEBUGOUT("ixgbe_setup_receive_structures");
1634 	for (j = 0; j < adapter->num_queues; j++, rxr++)
1635 		if (ixgbe_setup_receive_ring(rxr))
1636 			goto fail;
1637 
1638 	return (0);
1639 fail:
1640 	/*
1641 	 * Free RX buffers allocated so far, we will only handle
1642 	 * the rings that completed, the failing case will have
1643 	 * cleaned up for itself. 'j' failed, so its the terminus.
1644 	 */
1645 	for (int i = 0; i < j; ++i) {
1646 		rxr = &adapter->rx_rings[i];
1647 		IXGBE_RX_LOCK(rxr);
1648 		ixgbe_free_receive_ring(rxr);
1649 		IXGBE_RX_UNLOCK(rxr);
1650 	}
1651 
1652 	return (ENOBUFS);
1653 } /* ixgbe_setup_receive_structures */
1654 
1655 
1656 /************************************************************************
1657  * ixgbe_free_receive_structures - Free all receive rings.
1658  ************************************************************************/
1659 void
1660 ixgbe_free_receive_structures(struct adapter *adapter)
1661 {
1662 	struct rx_ring *rxr = adapter->rx_rings;
1663 
1664 	INIT_DEBUGOUT("ixgbe_free_receive_structures: begin");
1665 
1666 	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
1667 		ixgbe_free_receive_buffers(rxr);
1668 #ifdef LRO
1669 		/* Free LRO memory */
1670 		tcp_lro_free(&rxr->lro);
1671 #endif /* LRO */
1672 		/* Free the ring memory as well */
1673 		ixgbe_dma_free(adapter, &rxr->rxdma);
1674 		IXGBE_RX_LOCK_DESTROY(rxr);
1675 	}
1676 
1677 	free(adapter->rx_rings, M_DEVBUF);
1678 } /* ixgbe_free_receive_structures */
1679 
1680 
1681 /************************************************************************
1682  * ixgbe_free_receive_buffers - Free receive ring data structures
1683  ************************************************************************/
1684 static void
1685 ixgbe_free_receive_buffers(struct rx_ring *rxr)
1686 {
1687 	struct adapter      *adapter = rxr->adapter;
1688 	struct ixgbe_rx_buf *rxbuf;
1689 
1690 	INIT_DEBUGOUT("ixgbe_free_receive_buffers: begin");
1691 
1692 	/* Cleanup any existing buffers */
1693 	if (rxr->rx_buffers != NULL) {
1694 		for (int i = 0; i < adapter->num_rx_desc; i++) {
1695 			rxbuf = &rxr->rx_buffers[i];
1696 			ixgbe_rx_discard(rxr, i);
1697 			if (rxbuf->pmap != NULL) {
1698 				ixgbe_dmamap_destroy(rxr->ptag, rxbuf->pmap);
1699 				rxbuf->pmap = NULL;
1700 			}
1701 		}
1702 
1703 		/* NetBSD specific. See ixgbe_netbsd.c */
1704 		ixgbe_jcl_destroy(adapter, rxr);
1705 
1706 		if (rxr->rx_buffers != NULL) {
1707 			free(rxr->rx_buffers, M_DEVBUF);
1708 			rxr->rx_buffers = NULL;
1709 		}
1710 	}
1711 
1712 	if (rxr->ptag != NULL) {
1713 		ixgbe_dma_tag_destroy(rxr->ptag);
1714 		rxr->ptag = NULL;
1715 	}
1716 
1717 	return;
1718 } /* ixgbe_free_receive_buffers */
1719 
1720 /************************************************************************
1721  * ixgbe_rx_input
1722  ************************************************************************/
1723 static __inline void
1724 ixgbe_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m,
1725     u32 ptype)
1726 {
1727 	struct adapter	*adapter = ifp->if_softc;
1728 
1729 #ifdef LRO
1730 	struct ethercom *ec = &adapter->osdep.ec;
1731 
1732 	/*
1733 	 * ATM LRO is only for IP/TCP packets and TCP checksum of the packet
1734 	 * should be computed by hardware. Also it should not have VLAN tag in
1735 	 * ethernet header.  In case of IPv6 we do not yet support ext. hdrs.
1736 	 */
1737         if (rxr->lro_enabled &&
1738             (ec->ec_capenable & ETHERCAP_VLAN_HWTAGGING) != 0 &&
1739             (ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
1740             ((ptype & (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
1741             (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP) ||
1742             (ptype & (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
1743             (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) &&
1744             (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
1745             (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
1746                 /*
1747                  * Send to the stack if:
1748                  **  - LRO not enabled, or
1749                  **  - no LRO resources, or
1750                  **  - lro enqueue fails
1751                  */
1752                 if (rxr->lro.lro_cnt != 0)
1753                         if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
1754                                 return;
1755         }
1756 #endif /* LRO */
1757 
1758 	if_percpuq_enqueue(adapter->ipq, m);
1759 } /* ixgbe_rx_input */
1760 
1761 /************************************************************************
1762  * ixgbe_rx_discard
1763  ************************************************************************/
1764 static __inline void
1765 ixgbe_rx_discard(struct rx_ring *rxr, int i)
1766 {
1767 	struct ixgbe_rx_buf *rbuf;
1768 
1769 	rbuf = &rxr->rx_buffers[i];
1770 
1771 	/*
1772 	 * With advanced descriptors the writeback clobbers the buffer addrs,
1773 	 * so its easier to just free the existing mbufs and take the normal
1774 	 * refresh path to get new buffers and mapping.
1775 	 */
1776 
1777 	if (rbuf->fmp != NULL) {/* Partial chain ? */
1778 		bus_dmamap_sync(rxr->ptag->dt_dmat, rbuf->pmap, 0,
1779 		    rbuf->buf->m_pkthdr.len, BUS_DMASYNC_POSTREAD);
1780 		ixgbe_dmamap_unload(rxr->ptag, rbuf->pmap);
1781 		m_freem(rbuf->fmp);
1782 		rbuf->fmp = NULL;
1783 		rbuf->buf = NULL; /* rbuf->buf is part of fmp's chain */
1784 	} else if (rbuf->buf) {
1785 		bus_dmamap_sync(rxr->ptag->dt_dmat, rbuf->pmap, 0,
1786 		    rbuf->buf->m_pkthdr.len, BUS_DMASYNC_POSTREAD);
1787 		ixgbe_dmamap_unload(rxr->ptag, rbuf->pmap);
1788 		m_free(rbuf->buf);
1789 		rbuf->buf = NULL;
1790 	}
1791 
1792 	rbuf->flags = 0;
1793 
1794 	return;
1795 } /* ixgbe_rx_discard */
1796 
1797 
1798 /************************************************************************
1799  * ixgbe_rxeof
1800  *
1801  *   Executes in interrupt context. It replenishes the
1802  *   mbufs in the descriptor and sends data which has
1803  *   been dma'ed into host memory to upper layer.
1804  *
1805  *   Return TRUE for more work, FALSE for all clean.
1806  ************************************************************************/
1807 bool
1808 ixgbe_rxeof(struct ix_queue *que)
1809 {
1810 	struct adapter		*adapter = que->adapter;
1811 	struct rx_ring		*rxr = que->rxr;
1812 	struct ifnet		*ifp = adapter->ifp;
1813 #ifdef LRO
1814 	struct lro_ctrl		*lro = &rxr->lro;
1815 #endif /* LRO */
1816 	union ixgbe_adv_rx_desc	*cur;
1817 	struct ixgbe_rx_buf	*rbuf, *nbuf;
1818 	int			i, nextp, processed = 0;
1819 	u32			staterr = 0;
1820 	u32			count = 0;
1821 	u32			limit = adapter->rx_process_limit;
1822 	bool			discard_multidesc = false;
1823 #ifdef RSS
1824 	u16			pkt_info;
1825 #endif
1826 
1827 	IXGBE_RX_LOCK(rxr);
1828 
1829 #ifdef DEV_NETMAP
1830 	if (adapter->feat_en & IXGBE_FEATURE_NETMAP) {
1831 		/* Same as the txeof routine: wakeup clients on intr. */
1832 		if (netmap_rx_irq(ifp, rxr->me, &processed)) {
1833 			IXGBE_RX_UNLOCK(rxr);
1834 			return (FALSE);
1835 		}
1836 	}
1837 #endif /* DEV_NETMAP */
1838 
1839 	/*
1840 	 * The max number of loop is rx_process_limit. If discard_multidesc is
1841 	 * true, continue processing to not to send broken packet to the upper
1842 	 * layer.
1843 	 */
1844 	for (i = rxr->next_to_check;
1845 	     (count < limit) || (discard_multidesc == true);) {
1846 
1847 		struct mbuf *sendmp, *mp;
1848 		struct mbuf *newmp;
1849 		u32         rsc, ptype;
1850 		u16         len;
1851 		u16         vtag = 0;
1852 		bool        eop;
1853 
1854 		/* Sync the ring. */
1855 		ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1856 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
1857 
1858 		cur = &rxr->rx_base[i];
1859 		staterr = le32toh(cur->wb.upper.status_error);
1860 #ifdef RSS
1861 		pkt_info = le16toh(cur->wb.lower.lo_dword.hs_rss.pkt_info);
1862 #endif
1863 
1864 		if ((staterr & IXGBE_RXD_STAT_DD) == 0)
1865 			break;
1866 
1867 		count++;
1868 		sendmp = NULL;
1869 		nbuf = NULL;
1870 		rsc = 0;
1871 		cur->wb.upper.status_error = 0;
1872 		rbuf = &rxr->rx_buffers[i];
1873 		mp = rbuf->buf;
1874 
1875 		len = le16toh(cur->wb.upper.length);
1876 		ptype = le32toh(cur->wb.lower.lo_dword.data) &
1877 		    IXGBE_RXDADV_PKTTYPE_MASK;
1878 		eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0);
1879 
1880 		/* Make sure bad packets are discarded */
1881 		if (eop && (staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) != 0) {
1882 #if __FreeBSD_version >= 1100036
1883 			if (adapter->feat_en & IXGBE_FEATURE_VF)
1884 				if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
1885 #endif
1886 			rxr->rx_discarded.ev_count++;
1887 			ixgbe_rx_discard(rxr, i);
1888 			discard_multidesc = false;
1889 			goto next_desc;
1890 		}
1891 
1892 		/* pre-alloc new mbuf */
1893 		if (!discard_multidesc)
1894 			newmp = ixgbe_getjcl(&rxr->jcl_head, M_NOWAIT, MT_DATA,
1895 			    M_PKTHDR, rxr->mbuf_sz);
1896 		else
1897 			newmp = NULL;
1898 		if (newmp == NULL) {
1899 			rxr->no_jmbuf.ev_count++;
1900 			/*
1901 			 * Descriptor initialization is already done by the
1902 			 * above code (cur->wb.upper.status_error = 0).
1903 			 * So, we can reuse current rbuf->buf for new packet.
1904 			 *
1905 			 * Rewrite the buffer addr, see comment in
1906 			 * ixgbe_rx_discard().
1907 			 */
1908 			cur->read.pkt_addr = rbuf->addr;
1909 			m_freem(rbuf->fmp);
1910 			rbuf->fmp = NULL;
1911 			if (!eop) {
1912 				/* Discard the entire packet. */
1913 				discard_multidesc = true;
1914 			} else
1915 				discard_multidesc = false;
1916 			goto next_desc;
1917 		}
1918 		discard_multidesc = false;
1919 
1920 		bus_dmamap_sync(rxr->ptag->dt_dmat, rbuf->pmap, 0,
1921 		    rbuf->buf->m_pkthdr.len, BUS_DMASYNC_POSTREAD);
1922 
1923 		/*
1924 		 * On 82599 which supports a hardware
1925 		 * LRO (called HW RSC), packets need
1926 		 * not be fragmented across sequential
1927 		 * descriptors, rather the next descriptor
1928 		 * is indicated in bits of the descriptor.
1929 		 * This also means that we might proceses
1930 		 * more than one packet at a time, something
1931 		 * that has never been true before, it
1932 		 * required eliminating global chain pointers
1933 		 * in favor of what we are doing here.  -jfv
1934 		 */
1935 		if (!eop) {
1936 			/*
1937 			 * Figure out the next descriptor
1938 			 * of this frame.
1939 			 */
1940 			if (rxr->hw_rsc == TRUE) {
1941 				rsc = ixgbe_rsc_count(cur);
1942 				rxr->rsc_num += (rsc - 1);
1943 			}
1944 			if (rsc) { /* Get hardware index */
1945 				nextp = ((staterr & IXGBE_RXDADV_NEXTP_MASK) >>
1946 				    IXGBE_RXDADV_NEXTP_SHIFT);
1947 			} else { /* Just sequential */
1948 				nextp = i + 1;
1949 				if (nextp == adapter->num_rx_desc)
1950 					nextp = 0;
1951 			}
1952 			nbuf = &rxr->rx_buffers[nextp];
1953 			prefetch(nbuf);
1954 		}
1955 		/*
1956 		 * Rather than using the fmp/lmp global pointers
1957 		 * we now keep the head of a packet chain in the
1958 		 * buffer struct and pass this along from one
1959 		 * descriptor to the next, until we get EOP.
1960 		 */
1961 		/*
1962 		 * See if there is a stored head
1963 		 * that determines what we are
1964 		 */
1965 		sendmp = rbuf->fmp;
1966 		if (sendmp != NULL) {  /* secondary frag */
1967 			rbuf->buf = newmp;
1968 			rbuf->fmp = NULL;
1969 			mp->m_len = len;
1970 			mp->m_flags &= ~M_PKTHDR;
1971 			sendmp->m_pkthdr.len += mp->m_len;
1972 		} else {
1973 			/*
1974 			 * Optimize.  This might be a small packet,
1975 			 * maybe just a TCP ACK.  Do a fast copy that
1976 			 * is cache aligned into a new mbuf, and
1977 			 * leave the old mbuf+cluster for re-use.
1978 			 */
1979 			if (eop && len <= IXGBE_RX_COPY_LEN) {
1980 				sendmp = m_gethdr(M_NOWAIT, MT_DATA);
1981 				if (sendmp != NULL) {
1982 					sendmp->m_data += IXGBE_RX_COPY_ALIGN;
1983 					ixgbe_bcopy(mp->m_data, sendmp->m_data,
1984 					    len);
1985 					sendmp->m_len = len;
1986 					rxr->rx_copies.ev_count++;
1987 					rbuf->flags |= IXGBE_RX_COPY;
1988 
1989 					m_freem(newmp);
1990 				}
1991 			}
1992 			if (sendmp == NULL) {
1993 				rbuf->buf = newmp;
1994 				rbuf->fmp = NULL;
1995 				mp->m_len = len;
1996 				sendmp = mp;
1997 			}
1998 
1999 			/* first desc of a non-ps chain */
2000 			sendmp->m_flags |= M_PKTHDR;
2001 			sendmp->m_pkthdr.len = len;
2002 		}
2003 		++processed;
2004 
2005 		/* Pass the head pointer on */
2006 		if (eop == 0) {
2007 			nbuf->fmp = sendmp;
2008 			sendmp = NULL;
2009 			mp->m_next = nbuf->buf;
2010 		} else { /* Sending this frame */
2011 			m_set_rcvif(sendmp, ifp);
2012 			++rxr->packets;
2013 			rxr->rx_packets.ev_count++;
2014 			/* capture data for AIM */
2015 			rxr->bytes += sendmp->m_pkthdr.len;
2016 			rxr->rx_bytes.ev_count += sendmp->m_pkthdr.len;
2017 			/* Process vlan info */
2018 			if ((rxr->vtag_strip) && (staterr & IXGBE_RXD_STAT_VP))
2019 				vtag = le16toh(cur->wb.upper.vlan);
2020 			if (vtag) {
2021 				vlan_set_tag(sendmp, vtag);
2022 			}
2023 			if ((ifp->if_capenable & IFCAP_RXCSUM) != 0) {
2024 				ixgbe_rx_checksum(staterr, sendmp, ptype,
2025 				   &adapter->stats.pf);
2026 			}
2027 
2028 #if 0 /* FreeBSD */
2029 			/*
2030 			 * In case of multiqueue, we have RXCSUM.PCSD bit set
2031 			 * and never cleared. This means we have RSS hash
2032 			 * available to be used.
2033 			 */
2034 			if (adapter->num_queues > 1) {
2035 				sendmp->m_pkthdr.flowid =
2036 				    le32toh(cur->wb.lower.hi_dword.rss);
2037 				switch (pkt_info & IXGBE_RXDADV_RSSTYPE_MASK) {
2038 				case IXGBE_RXDADV_RSSTYPE_IPV4:
2039 					M_HASHTYPE_SET(sendmp,
2040 					    M_HASHTYPE_RSS_IPV4);
2041 					break;
2042 				case IXGBE_RXDADV_RSSTYPE_IPV4_TCP:
2043 					M_HASHTYPE_SET(sendmp,
2044 					    M_HASHTYPE_RSS_TCP_IPV4);
2045 					break;
2046 				case IXGBE_RXDADV_RSSTYPE_IPV6:
2047 					M_HASHTYPE_SET(sendmp,
2048 					    M_HASHTYPE_RSS_IPV6);
2049 					break;
2050 				case IXGBE_RXDADV_RSSTYPE_IPV6_TCP:
2051 					M_HASHTYPE_SET(sendmp,
2052 					    M_HASHTYPE_RSS_TCP_IPV6);
2053 					break;
2054 				case IXGBE_RXDADV_RSSTYPE_IPV6_EX:
2055 					M_HASHTYPE_SET(sendmp,
2056 					    M_HASHTYPE_RSS_IPV6_EX);
2057 					break;
2058 				case IXGBE_RXDADV_RSSTYPE_IPV6_TCP_EX:
2059 					M_HASHTYPE_SET(sendmp,
2060 					    M_HASHTYPE_RSS_TCP_IPV6_EX);
2061 					break;
2062 #if __FreeBSD_version > 1100000
2063 				case IXGBE_RXDADV_RSSTYPE_IPV4_UDP:
2064 					M_HASHTYPE_SET(sendmp,
2065 					    M_HASHTYPE_RSS_UDP_IPV4);
2066 					break;
2067 				case IXGBE_RXDADV_RSSTYPE_IPV6_UDP:
2068 					M_HASHTYPE_SET(sendmp,
2069 					    M_HASHTYPE_RSS_UDP_IPV6);
2070 					break;
2071 				case IXGBE_RXDADV_RSSTYPE_IPV6_UDP_EX:
2072 					M_HASHTYPE_SET(sendmp,
2073 					    M_HASHTYPE_RSS_UDP_IPV6_EX);
2074 					break;
2075 #endif
2076 				default:
2077 					M_HASHTYPE_SET(sendmp,
2078 					    M_HASHTYPE_OPAQUE_HASH);
2079 				}
2080 			} else {
2081 				sendmp->m_pkthdr.flowid = que->msix;
2082 				M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE);
2083 			}
2084 #endif
2085 		}
2086 next_desc:
2087 		ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
2088 		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2089 
2090 		/* Advance our pointers to the next descriptor. */
2091 		if (++i == rxr->num_desc)
2092 			i = 0;
2093 
2094 		/* Now send to the stack or do LRO */
2095 		if (sendmp != NULL) {
2096 			ixgbe_rx_input(rxr, ifp, sendmp, ptype);
2097 		}
2098 
2099 		/* Every 8 descriptors we go to refresh mbufs */
2100 		if (processed == 8) {
2101 			ixgbe_refresh_mbufs(rxr, i);
2102 			processed = 0;
2103 		}
2104 	}
2105 
2106 	/* Refresh any remaining buf structs */
2107 	if (ixgbe_rx_unrefreshed(rxr))
2108 		ixgbe_refresh_mbufs(rxr, i);
2109 
2110 	rxr->next_to_check = i;
2111 
2112 	IXGBE_RX_UNLOCK(rxr);
2113 
2114 #ifdef LRO
2115 	/*
2116 	 * Flush any outstanding LRO work
2117 	 */
2118 	tcp_lro_flush_all(lro);
2119 #endif /* LRO */
2120 
2121 	/*
2122 	 * Still have cleaning to do?
2123 	 */
2124 	if ((staterr & IXGBE_RXD_STAT_DD) != 0)
2125 		return (TRUE);
2126 
2127 	return (FALSE);
2128 } /* ixgbe_rxeof */
2129 
2130 
2131 /************************************************************************
2132  * ixgbe_rx_checksum
2133  *
2134  *   Verify that the hardware indicated that the checksum is valid.
2135  *   Inform the stack about the status of checksum so that stack
2136  *   doesn't spend time verifying the checksum.
2137  ************************************************************************/
2138 static void
2139 ixgbe_rx_checksum(u32 staterr, struct mbuf * mp, u32 ptype,
2140     struct ixgbe_hw_stats *stats)
2141 {
2142 	u16  status = (u16)staterr;
2143 	u8   errors = (u8)(staterr >> 24);
2144 #if 0
2145 	bool sctp = false;
2146 
2147 	if ((ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
2148 	    (ptype & IXGBE_RXDADV_PKTTYPE_SCTP) != 0)
2149 		sctp = true;
2150 #endif
2151 
2152 	/* IPv4 checksum */
2153 	if (status & IXGBE_RXD_STAT_IPCS) {
2154 		stats->ipcs.ev_count++;
2155 		if (!(errors & IXGBE_RXD_ERR_IPE)) {
2156 			/* IP Checksum Good */
2157 			mp->m_pkthdr.csum_flags = M_CSUM_IPv4;
2158 		} else {
2159 			stats->ipcs_bad.ev_count++;
2160 			mp->m_pkthdr.csum_flags = M_CSUM_IPv4|M_CSUM_IPv4_BAD;
2161 		}
2162 	}
2163 	/* TCP/UDP/SCTP checksum */
2164 	if (status & IXGBE_RXD_STAT_L4CS) {
2165 		stats->l4cs.ev_count++;
2166 		int type = M_CSUM_TCPv4|M_CSUM_TCPv6|M_CSUM_UDPv4|M_CSUM_UDPv6;
2167 		if (!(errors & IXGBE_RXD_ERR_TCPE)) {
2168 			mp->m_pkthdr.csum_flags |= type;
2169 		} else {
2170 			stats->l4cs_bad.ev_count++;
2171 			mp->m_pkthdr.csum_flags |= type | M_CSUM_TCP_UDP_BAD;
2172 		}
2173 	}
2174 } /* ixgbe_rx_checksum */
2175 
2176 /************************************************************************
2177  * ixgbe_dma_malloc
2178  ************************************************************************/
2179 int
2180 ixgbe_dma_malloc(struct adapter *adapter, const bus_size_t size,
2181 		struct ixgbe_dma_alloc *dma, const int mapflags)
2182 {
2183 	device_t dev = adapter->dev;
2184 	int      r, rsegs;
2185 
2186 	r = ixgbe_dma_tag_create(
2187 	     /*      parent */ adapter->osdep.dmat,
2188 	     /*   alignment */ DBA_ALIGN,
2189 	     /*      bounds */ 0,
2190 	     /*     maxsize */ size,
2191 	     /*   nsegments */ 1,
2192 	     /*  maxsegsize */ size,
2193 	     /*       flags */ BUS_DMA_ALLOCNOW,
2194 			       &dma->dma_tag);
2195 	if (r != 0) {
2196 		aprint_error_dev(dev,
2197 		    "%s: ixgbe_dma_tag_create failed; error %d\n", __func__,
2198 		    r);
2199 		goto fail_0;
2200 	}
2201 
2202 	r = bus_dmamem_alloc(dma->dma_tag->dt_dmat, size,
2203 	    dma->dma_tag->dt_alignment, dma->dma_tag->dt_boundary,
2204 	    &dma->dma_seg, 1, &rsegs, BUS_DMA_NOWAIT);
2205 	if (r != 0) {
2206 		aprint_error_dev(dev,
2207 		    "%s: bus_dmamem_alloc failed; error %d\n", __func__, r);
2208 		goto fail_1;
2209 	}
2210 
2211 	r = bus_dmamem_map(dma->dma_tag->dt_dmat, &dma->dma_seg, rsegs,
2212 	    size, &dma->dma_vaddr, BUS_DMA_NOWAIT | BUS_DMA_COHERENT);
2213 	if (r != 0) {
2214 		aprint_error_dev(dev, "%s: bus_dmamem_map failed; error %d\n",
2215 		    __func__, r);
2216 		goto fail_2;
2217 	}
2218 
2219 	r = ixgbe_dmamap_create(dma->dma_tag, 0, &dma->dma_map);
2220 	if (r != 0) {
2221 		aprint_error_dev(dev, "%s: bus_dmamem_map failed; error %d\n",
2222 		    __func__, r);
2223 		goto fail_3;
2224 	}
2225 
2226 	r = bus_dmamap_load(dma->dma_tag->dt_dmat, dma->dma_map,
2227 	    dma->dma_vaddr, size, NULL, mapflags | BUS_DMA_NOWAIT);
2228 	if (r != 0) {
2229 		aprint_error_dev(dev, "%s: bus_dmamap_load failed; error %d\n",
2230 		    __func__, r);
2231 		goto fail_4;
2232 	}
2233 	dma->dma_paddr = dma->dma_map->dm_segs[0].ds_addr;
2234 	dma->dma_size = size;
2235 	return 0;
2236 fail_4:
2237 	ixgbe_dmamap_destroy(dma->dma_tag, dma->dma_map);
2238 fail_3:
2239 	bus_dmamem_unmap(dma->dma_tag->dt_dmat, dma->dma_vaddr, size);
2240 fail_2:
2241 	bus_dmamem_free(dma->dma_tag->dt_dmat, &dma->dma_seg, rsegs);
2242 fail_1:
2243 	ixgbe_dma_tag_destroy(dma->dma_tag);
2244 fail_0:
2245 
2246 	return (r);
2247 } /* ixgbe_dma_malloc */
2248 
2249 /************************************************************************
2250  * ixgbe_dma_free
2251  ************************************************************************/
2252 void
2253 ixgbe_dma_free(struct adapter *adapter, struct ixgbe_dma_alloc *dma)
2254 {
2255 	bus_dmamap_sync(dma->dma_tag->dt_dmat, dma->dma_map, 0, dma->dma_size,
2256 	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2257 	ixgbe_dmamap_unload(dma->dma_tag, dma->dma_map);
2258 	bus_dmamem_free(dma->dma_tag->dt_dmat, &dma->dma_seg, 1);
2259 	ixgbe_dma_tag_destroy(dma->dma_tag);
2260 } /* ixgbe_dma_free */
2261 
2262 
2263 /************************************************************************
2264  * ixgbe_allocate_queues
2265  *
2266  *   Allocate memory for the transmit and receive rings, and then
2267  *   the descriptors associated with each, called only once at attach.
2268  ************************************************************************/
2269 int
2270 ixgbe_allocate_queues(struct adapter *adapter)
2271 {
2272 	device_t	dev = adapter->dev;
2273 	struct ix_queue	*que;
2274 	struct tx_ring	*txr;
2275 	struct rx_ring	*rxr;
2276 	int             rsize, tsize, error = IXGBE_SUCCESS;
2277 	int             txconf = 0, rxconf = 0;
2278 
2279 	/* First, allocate the top level queue structs */
2280 	adapter->queues = (struct ix_queue *)malloc(sizeof(struct ix_queue) *
2281 	    adapter->num_queues, M_DEVBUF, M_WAITOK | M_ZERO);
2282 
2283 	/* Second, allocate the TX ring struct memory */
2284 	adapter->tx_rings = malloc(sizeof(struct tx_ring) *
2285 	    adapter->num_queues, M_DEVBUF, M_WAITOK | M_ZERO);
2286 
2287 	/* Third, allocate the RX ring */
2288 	adapter->rx_rings = (struct rx_ring *)malloc(sizeof(struct rx_ring) *
2289 	    adapter->num_queues, M_DEVBUF, M_WAITOK | M_ZERO);
2290 
2291 	/* For the ring itself */
2292 	tsize = roundup2(adapter->num_tx_desc * sizeof(union ixgbe_adv_tx_desc),
2293 	    DBA_ALIGN);
2294 
2295 	/*
2296 	 * Now set up the TX queues, txconf is needed to handle the
2297 	 * possibility that things fail midcourse and we need to
2298 	 * undo memory gracefully
2299 	 */
2300 	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
2301 		/* Set up some basics */
2302 		txr = &adapter->tx_rings[i];
2303 		txr->adapter = adapter;
2304 		txr->txr_interq = NULL;
2305 		/* In case SR-IOV is enabled, align the index properly */
2306 #ifdef PCI_IOV
2307 		txr->me = ixgbe_vf_que_index(adapter->iov_mode, adapter->pool,
2308 		    i);
2309 #else
2310 		txr->me = i;
2311 #endif
2312 		txr->num_desc = adapter->num_tx_desc;
2313 
2314 		/* Initialize the TX side lock */
2315 		mutex_init(&txr->tx_mtx, MUTEX_DEFAULT, IPL_NET);
2316 
2317 		if (ixgbe_dma_malloc(adapter, tsize, &txr->txdma,
2318 		    BUS_DMA_NOWAIT)) {
2319 			aprint_error_dev(dev,
2320 			    "Unable to allocate TX Descriptor memory\n");
2321 			error = ENOMEM;
2322 			goto err_tx_desc;
2323 		}
2324 		txr->tx_base = (union ixgbe_adv_tx_desc *)txr->txdma.dma_vaddr;
2325 		bzero((void *)txr->tx_base, tsize);
2326 
2327 		/* Now allocate transmit buffers for the ring */
2328 		if (ixgbe_allocate_transmit_buffers(txr)) {
2329 			aprint_error_dev(dev,
2330 			    "Critical Failure setting up transmit buffers\n");
2331 			error = ENOMEM;
2332 			goto err_tx_desc;
2333 		}
2334 		if (!(adapter->feat_en & IXGBE_FEATURE_LEGACY_TX)) {
2335 			/* Allocate a buf ring */
2336 			txr->txr_interq = pcq_create(IXGBE_BR_SIZE, KM_SLEEP);
2337 			if (txr->txr_interq == NULL) {
2338 				aprint_error_dev(dev,
2339 				    "Critical Failure setting up buf ring\n");
2340 				error = ENOMEM;
2341 				goto err_tx_desc;
2342 			}
2343 		}
2344 	}
2345 
2346 	/*
2347 	 * Next the RX queues...
2348 	 */
2349 	rsize = roundup2(adapter->num_rx_desc * sizeof(union ixgbe_adv_rx_desc),
2350 	    DBA_ALIGN);
2351 	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
2352 		rxr = &adapter->rx_rings[i];
2353 		/* Set up some basics */
2354 		rxr->adapter = adapter;
2355 #ifdef PCI_IOV
2356 		/* In case SR-IOV is enabled, align the index properly */
2357 		rxr->me = ixgbe_vf_que_index(adapter->iov_mode, adapter->pool,
2358 		    i);
2359 #else
2360 		rxr->me = i;
2361 #endif
2362 		rxr->num_desc = adapter->num_rx_desc;
2363 
2364 		/* Initialize the RX side lock */
2365 		mutex_init(&rxr->rx_mtx, MUTEX_DEFAULT, IPL_NET);
2366 
2367 		if (ixgbe_dma_malloc(adapter, rsize, &rxr->rxdma,
2368 		    BUS_DMA_NOWAIT)) {
2369 			aprint_error_dev(dev,
2370 			    "Unable to allocate RxDescriptor memory\n");
2371 			error = ENOMEM;
2372 			goto err_rx_desc;
2373 		}
2374 		rxr->rx_base = (union ixgbe_adv_rx_desc *)rxr->rxdma.dma_vaddr;
2375 		bzero((void *)rxr->rx_base, rsize);
2376 
2377 		/* Allocate receive buffers for the ring */
2378 		if (ixgbe_allocate_receive_buffers(rxr)) {
2379 			aprint_error_dev(dev,
2380 			    "Critical Failure setting up receive buffers\n");
2381 			error = ENOMEM;
2382 			goto err_rx_desc;
2383 		}
2384 	}
2385 
2386 	/*
2387 	 * Finally set up the queue holding structs
2388 	 */
2389 	for (int i = 0; i < adapter->num_queues; i++) {
2390 		que = &adapter->queues[i];
2391 		que->adapter = adapter;
2392 		que->me = i;
2393 		que->txr = &adapter->tx_rings[i];
2394 		que->rxr = &adapter->rx_rings[i];
2395 
2396 		mutex_init(&que->dc_mtx, MUTEX_DEFAULT, IPL_NET);
2397 		que->disabled_count = 0;
2398 	}
2399 
2400 	return (0);
2401 
2402 err_rx_desc:
2403 	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
2404 		ixgbe_dma_free(adapter, &rxr->rxdma);
2405 err_tx_desc:
2406 	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
2407 		ixgbe_dma_free(adapter, &txr->txdma);
2408 	free(adapter->rx_rings, M_DEVBUF);
2409 	free(adapter->tx_rings, M_DEVBUF);
2410 	free(adapter->queues, M_DEVBUF);
2411 	return (error);
2412 } /* ixgbe_allocate_queues */
2413 
2414 /************************************************************************
2415  * ixgbe_free_queues
2416  *
2417  *   Free descriptors for the transmit and receive rings, and then
2418  *   the memory associated with each.
2419  ************************************************************************/
2420 void
2421 ixgbe_free_queues(struct adapter *adapter)
2422 {
2423 	struct ix_queue *que;
2424 	int i;
2425 
2426 	ixgbe_free_transmit_structures(adapter);
2427 	ixgbe_free_receive_structures(adapter);
2428 	for (i = 0; i < adapter->num_queues; i++) {
2429 		que = &adapter->queues[i];
2430 		mutex_destroy(&que->dc_mtx);
2431 	}
2432 	free(adapter->queues, M_DEVBUF);
2433 } /* ixgbe_free_queues */
2434