xref: /netbsd-src/sys/dev/pci/ixgbe/ix_txrx.c (revision d16b7486a53dcb8072b60ec6fcb4373a2d0c27b7)
1 /* $NetBSD: ix_txrx.c,v 1.100 2022/09/16 03:05:51 knakahara Exp $ */
2 
3 /******************************************************************************
4 
5   Copyright (c) 2001-2017, Intel Corporation
6   All rights reserved.
7 
8   Redistribution and use in source and binary forms, with or without
9   modification, are permitted provided that the following conditions are met:
10 
11    1. Redistributions of source code must retain the above copyright notice,
12       this list of conditions and the following disclaimer.
13 
14    2. Redistributions in binary form must reproduce the above copyright
15       notice, this list of conditions and the following disclaimer in the
16       documentation and/or other materials provided with the distribution.
17 
18    3. Neither the name of the Intel Corporation nor the names of its
19       contributors may be used to endorse or promote products derived from
20       this software without specific prior written permission.
21 
22   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
23   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
26   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
27   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
28   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
29   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
30   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
31   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
32   POSSIBILITY OF SUCH DAMAGE.
33 
34 ******************************************************************************/
35 /*$FreeBSD: head/sys/dev/ixgbe/ix_txrx.c 327031 2017-12-20 18:15:06Z erj $*/
36 
37 /*
38  * Copyright (c) 2011 The NetBSD Foundation, Inc.
39  * All rights reserved.
40  *
41  * This code is derived from software contributed to The NetBSD Foundation
42  * by Coyote Point Systems, Inc.
43  *
44  * Redistribution and use in source and binary forms, with or without
45  * modification, are permitted provided that the following conditions
46  * are met:
47  * 1. Redistributions of source code must retain the above copyright
48  *    notice, this list of conditions and the following disclaimer.
49  * 2. Redistributions in binary form must reproduce the above copyright
50  *    notice, this list of conditions and the following disclaimer in the
51  *    documentation and/or other materials provided with the distribution.
52  *
53  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
54  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
55  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
56  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
57  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
58  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
59  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
60  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
61  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
62  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
63  * POSSIBILITY OF SUCH DAMAGE.
64  */
65 
66 #include <sys/cdefs.h>
67 __KERNEL_RCSID(0, "$NetBSD: ix_txrx.c,v 1.100 2022/09/16 03:05:51 knakahara Exp $");
68 
69 #include "opt_inet.h"
70 #include "opt_inet6.h"
71 
72 #include "ixgbe.h"
73 
74 /*
75  * HW RSC control:
76  *  this feature only works with
77  *  IPv4, and only on 82599 and later.
78  *  Also this will cause IP forwarding to
79  *  fail and that can't be controlled by
80  *  the stack as LRO can. For all these
81  *  reasons I've deemed it best to leave
82  *  this off and not bother with a tuneable
83  *  interface, this would need to be compiled
84  *  to enable.
85  */
86 static bool ixgbe_rsc_enable = FALSE;
87 
88 /*
89  * For Flow Director: this is the
90  * number of TX packets we sample
91  * for the filter pool, this means
92  * every 20th packet will be probed.
93  *
94  * This feature can be disabled by
95  * setting this to 0.
96  */
97 static int atr_sample_rate = 20;
98 
99 #define IXGBE_M_ADJ(adapter, rxr, mp)					\
100 	if (adapter->max_frame_size <= (rxr->mbuf_sz - ETHER_ALIGN))	\
101 		m_adj(mp, ETHER_ALIGN)
102 
103 /************************************************************************
104  *  Local Function prototypes
105  ************************************************************************/
106 static void          ixgbe_setup_transmit_ring(struct tx_ring *);
107 static void          ixgbe_free_transmit_buffers(struct tx_ring *);
108 static int           ixgbe_setup_receive_ring(struct rx_ring *);
109 static void          ixgbe_free_receive_buffers(struct rx_ring *);
110 static void          ixgbe_rx_checksum(u32, struct mbuf *, u32,
111                                        struct ixgbe_hw_stats *);
112 static void          ixgbe_refresh_mbufs(struct rx_ring *, int);
113 static void          ixgbe_drain(struct ifnet *, struct tx_ring *);
114 static int           ixgbe_xmit(struct tx_ring *, struct mbuf *);
115 static int           ixgbe_tx_ctx_setup(struct tx_ring *,
116                                         struct mbuf *, u32 *, u32 *);
117 static int           ixgbe_tso_setup(struct tx_ring *,
118                                      struct mbuf *, u32 *, u32 *);
119 static __inline void ixgbe_rx_discard(struct rx_ring *, int);
120 static __inline void ixgbe_rx_input(struct rx_ring *, struct ifnet *,
121                                     struct mbuf *, u32);
122 static int           ixgbe_dma_malloc(struct adapter *, bus_size_t,
123                                       struct ixgbe_dma_alloc *, int);
124 static void          ixgbe_dma_free(struct adapter *, struct ixgbe_dma_alloc *);
125 
126 static void	ixgbe_setup_hw_rsc(struct rx_ring *);
127 
128 /************************************************************************
129  * ixgbe_legacy_start_locked - Transmit entry point
130  *
131  *   Called by the stack to initiate a transmit.
132  *   The driver will remain in this routine as long as there are
133  *   packets to transmit and transmit resources are available.
134  *   In case resources are not available, the stack is notified
135  *   and the packet is requeued.
136  ************************************************************************/
137 int
138 ixgbe_legacy_start_locked(struct ifnet *ifp, struct tx_ring *txr)
139 {
140 	int rc;
141 	struct mbuf    *m_head;
142 	struct adapter *adapter = txr->adapter;
143 
144 	IXGBE_TX_LOCK_ASSERT(txr);
145 
146 	if (adapter->link_active != LINK_STATE_UP) {
147 		/*
148 		 * discard all packets buffered in IFQ to avoid
149 		 * sending old packets at next link up timing.
150 		 */
151 		ixgbe_drain(ifp, txr);
152 		return (ENETDOWN);
153 	}
154 	if ((ifp->if_flags & IFF_RUNNING) == 0)
155 		return (ENETDOWN);
156 	if (txr->txr_no_space)
157 		return (ENETDOWN);
158 
159 	while (!IFQ_IS_EMPTY(&ifp->if_snd)) {
160 		if (txr->tx_avail <= IXGBE_QUEUE_MIN_FREE)
161 			break;
162 
163 		IFQ_POLL(&ifp->if_snd, m_head);
164 		if (m_head == NULL)
165 			break;
166 
167 		if ((rc = ixgbe_xmit(txr, m_head)) == EAGAIN) {
168 			break;
169 		}
170 		IFQ_DEQUEUE(&ifp->if_snd, m_head);
171 		if (rc != 0) {
172 			m_freem(m_head);
173 			continue;
174 		}
175 
176 		/* Send a copy of the frame to the BPF listener */
177 		bpf_mtap(ifp, m_head, BPF_D_OUT);
178 	}
179 
180 	return IXGBE_SUCCESS;
181 } /* ixgbe_legacy_start_locked */
182 
183 /************************************************************************
184  * ixgbe_legacy_start
185  *
186  *   Called by the stack, this always uses the first tx ring,
187  *   and should not be used with multiqueue tx enabled.
188  ************************************************************************/
189 void
190 ixgbe_legacy_start(struct ifnet *ifp)
191 {
192 	struct adapter *adapter = ifp->if_softc;
193 	struct tx_ring *txr = adapter->tx_rings;
194 
195 	if (ifp->if_flags & IFF_RUNNING) {
196 		IXGBE_TX_LOCK(txr);
197 		ixgbe_legacy_start_locked(ifp, txr);
198 		IXGBE_TX_UNLOCK(txr);
199 	}
200 } /* ixgbe_legacy_start */
201 
202 /************************************************************************
203  * ixgbe_mq_start - Multiqueue Transmit Entry Point
204  *
205  *   (if_transmit function)
206  ************************************************************************/
207 int
208 ixgbe_mq_start(struct ifnet *ifp, struct mbuf *m)
209 {
210 	struct adapter	*adapter = ifp->if_softc;
211 	struct tx_ring	*txr;
212 	int		i;
213 #ifdef RSS
214 	uint32_t bucket_id;
215 #endif
216 
217 	/*
218 	 * When doing RSS, map it to the same outbound queue
219 	 * as the incoming flow would be mapped to.
220 	 *
221 	 * If everything is setup correctly, it should be the
222 	 * same bucket that the current CPU we're on is.
223 	 */
224 #ifdef RSS
225 	if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
226 		if ((adapter->feat_en & IXGBE_FEATURE_RSS) &&
227 		    (rss_hash2bucket(m->m_pkthdr.flowid, M_HASHTYPE_GET(m),
228 		    &bucket_id) == 0)) {
229 			i = bucket_id % adapter->num_queues;
230 #ifdef IXGBE_DEBUG
231 			if (bucket_id > adapter->num_queues)
232 				if_printf(ifp,
233 				    "bucket_id (%d) > num_queues (%d)\n",
234 				    bucket_id, adapter->num_queues);
235 #endif
236 		} else
237 			i = m->m_pkthdr.flowid % adapter->num_queues;
238 	} else
239 #endif /* 0 */
240 		i = (cpu_index(curcpu()) % ncpu) % adapter->num_queues;
241 
242 	/* Check for a hung queue and pick alternative */
243 	if (((1ULL << i) & adapter->active_queues) == 0)
244 		i = ffs64(adapter->active_queues);
245 
246 	txr = &adapter->tx_rings[i];
247 
248 	if (__predict_false(!pcq_put(txr->txr_interq, m))) {
249 		m_freem(m);
250 		IXGBE_EVC_ADD(&txr->pcq_drops, 1);
251 		return ENOBUFS;
252 	}
253 #ifdef IXGBE_ALWAYS_TXDEFER
254 	kpreempt_disable();
255 	softint_schedule(txr->txr_si);
256 	kpreempt_enable();
257 #else
258 	if (IXGBE_TX_TRYLOCK(txr)) {
259 		ixgbe_mq_start_locked(ifp, txr);
260 		IXGBE_TX_UNLOCK(txr);
261 	} else {
262 		if (adapter->txrx_use_workqueue) {
263 			u_int *enqueued;
264 
265 			/*
266 			 * This function itself is not called in interrupt
267 			 * context, however it can be called in fast softint
268 			 * context right after receiving forwarding packets.
269 			 * So, it is required to protect workqueue from twice
270 			 * enqueuing when the machine uses both spontaneous
271 			 * packets and forwarding packets.
272 			 */
273 			enqueued = percpu_getref(adapter->txr_wq_enqueued);
274 			if (*enqueued == 0) {
275 				*enqueued = 1;
276 				percpu_putref(adapter->txr_wq_enqueued);
277 				workqueue_enqueue(adapter->txr_wq,
278 				    &txr->wq_cookie, curcpu());
279 			} else
280 				percpu_putref(adapter->txr_wq_enqueued);
281 		} else {
282 			kpreempt_disable();
283 			softint_schedule(txr->txr_si);
284 			kpreempt_enable();
285 		}
286 	}
287 #endif
288 
289 	return (0);
290 } /* ixgbe_mq_start */
291 
292 /************************************************************************
293  * ixgbe_mq_start_locked
294  ************************************************************************/
295 int
296 ixgbe_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
297 {
298 	struct mbuf    *next;
299 	int            enqueued = 0, err = 0;
300 
301 	if (txr->adapter->link_active != LINK_STATE_UP) {
302 		/*
303 		 * discard all packets buffered in txr_interq to avoid
304 		 * sending old packets at next link up timing.
305 		 */
306 		ixgbe_drain(ifp, txr);
307 		return (ENETDOWN);
308 	}
309 	if ((ifp->if_flags & IFF_RUNNING) == 0)
310 		return (ENETDOWN);
311 	if (txr->txr_no_space)
312 		return (ENETDOWN);
313 
314 	/* Process the queue */
315 	while ((next = pcq_get(txr->txr_interq)) != NULL) {
316 		if ((err = ixgbe_xmit(txr, next)) != 0) {
317 			m_freem(next);
318 			/* All errors are counted in ixgbe_xmit() */
319 			break;
320 		}
321 		enqueued++;
322 #if __FreeBSD_version >= 1100036
323 		/*
324 		 * Since we're looking at the tx ring, we can check
325 		 * to see if we're a VF by examining our tail register
326 		 * address.
327 		 */
328 		if ((txr->adapter->feat_en & IXGBE_FEATURE_VF) &&
329 		    (next->m_flags & M_MCAST))
330 			if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
331 #endif
332 		/* Send a copy of the frame to the BPF listener */
333 		bpf_mtap(ifp, next, BPF_D_OUT);
334 		if ((ifp->if_flags & IFF_RUNNING) == 0)
335 			break;
336 	}
337 
338 	if (txr->tx_avail < IXGBE_TX_CLEANUP_THRESHOLD(txr->adapter))
339 		ixgbe_txeof(txr);
340 
341 	return (err);
342 } /* ixgbe_mq_start_locked */
343 
344 /************************************************************************
345  * ixgbe_deferred_mq_start
346  *
347  *   Called from a softint and workqueue (indirectly) to drain queued
348  *   transmit packets.
349  ************************************************************************/
350 void
351 ixgbe_deferred_mq_start(void *arg)
352 {
353 	struct tx_ring *txr = arg;
354 	struct adapter *adapter = txr->adapter;
355 	struct ifnet   *ifp = adapter->ifp;
356 
357 	IXGBE_TX_LOCK(txr);
358 	if (pcq_peek(txr->txr_interq) != NULL)
359 		ixgbe_mq_start_locked(ifp, txr);
360 	IXGBE_TX_UNLOCK(txr);
361 } /* ixgbe_deferred_mq_start */
362 
363 /************************************************************************
364  * ixgbe_deferred_mq_start_work
365  *
366  *   Called from a workqueue to drain queued transmit packets.
367  ************************************************************************/
368 void
369 ixgbe_deferred_mq_start_work(struct work *wk, void *arg)
370 {
371 	struct tx_ring *txr = container_of(wk, struct tx_ring, wq_cookie);
372 	struct adapter *adapter = txr->adapter;
373 	u_int *enqueued = percpu_getref(adapter->txr_wq_enqueued);
374 	*enqueued = 0;
375 	percpu_putref(adapter->txr_wq_enqueued);
376 
377 	ixgbe_deferred_mq_start(txr);
378 } /* ixgbe_deferred_mq_start */
379 
380 /************************************************************************
381  * ixgbe_drain_all
382  ************************************************************************/
383 void
384 ixgbe_drain_all(struct adapter *adapter)
385 {
386 	struct ifnet *ifp = adapter->ifp;
387 	struct ix_queue *que = adapter->queues;
388 
389 	for (int i = 0; i < adapter->num_queues; i++, que++) {
390 		struct tx_ring  *txr = que->txr;
391 
392 		IXGBE_TX_LOCK(txr);
393 		ixgbe_drain(ifp, txr);
394 		IXGBE_TX_UNLOCK(txr);
395 	}
396 }
397 
398 /************************************************************************
399  * ixgbe_xmit
400  *
401  *   Maps the mbufs to tx descriptors, allowing the
402  *   TX engine to transmit the packets.
403  *
404  *   Return 0 on success, positive on failure
405  ************************************************************************/
406 static int
407 ixgbe_xmit(struct tx_ring *txr, struct mbuf *m_head)
408 {
409 	struct adapter          *adapter = txr->adapter;
410 	struct ixgbe_tx_buf     *txbuf;
411 	union ixgbe_adv_tx_desc *txd = NULL;
412 	struct ifnet	        *ifp = adapter->ifp;
413 	int                     i, j, error;
414 	int                     first;
415 	u32                     olinfo_status = 0, cmd_type_len;
416 	bool                    remap = TRUE;
417 	bus_dmamap_t            map;
418 
419 	/* Basic descriptor defines */
420 	cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA |
421 	    IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT);
422 
423 	if (vlan_has_tag(m_head))
424 		cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE;
425 
426 	/*
427 	 * Important to capture the first descriptor
428 	 * used because it will contain the index of
429 	 * the one we tell the hardware to report back
430 	 */
431 	first = txr->next_avail_desc;
432 	txbuf = &txr->tx_buffers[first];
433 	map = txbuf->map;
434 
435 	/*
436 	 * Map the packet for DMA.
437 	 */
438 retry:
439 	error = bus_dmamap_load_mbuf(txr->txtag->dt_dmat, map, m_head,
440 	    BUS_DMA_NOWAIT);
441 
442 	if (__predict_false(error)) {
443 		struct mbuf *m;
444 
445 		switch (error) {
446 		case EAGAIN:
447 			txr->q_eagain_tx_dma_setup++;
448 			return EAGAIN;
449 		case ENOMEM:
450 			txr->q_enomem_tx_dma_setup++;
451 			return EAGAIN;
452 		case EFBIG:
453 			/* Try it again? - one try */
454 			if (remap == TRUE) {
455 				remap = FALSE;
456 				/*
457 				 * XXX: m_defrag will choke on
458 				 * non-MCLBYTES-sized clusters
459 				 */
460 				txr->q_efbig_tx_dma_setup++;
461 				m = m_defrag(m_head, M_NOWAIT);
462 				if (m == NULL) {
463 					txr->q_mbuf_defrag_failed++;
464 					return ENOBUFS;
465 				}
466 				m_head = m;
467 				goto retry;
468 			} else {
469 				txr->q_efbig2_tx_dma_setup++;
470 				return error;
471 			}
472 		case EINVAL:
473 			txr->q_einval_tx_dma_setup++;
474 			return error;
475 		default:
476 			txr->q_other_tx_dma_setup++;
477 			return error;
478 		}
479 	}
480 
481 	/* Make certain there are enough descriptors */
482 	if (txr->tx_avail < (map->dm_nsegs + 2)) {
483 		txr->txr_no_space = true;
484 		IXGBE_EVC_ADD(&txr->no_desc_avail, 1);
485 		ixgbe_dmamap_unload(txr->txtag, txbuf->map);
486 		return EAGAIN;
487 	}
488 
489 	/*
490 	 * Set up the appropriate offload context
491 	 * this will consume the first descriptor
492 	 */
493 	error = ixgbe_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status);
494 	if (__predict_false(error)) {
495 		return (error);
496 	}
497 
498 #ifdef IXGBE_FDIR
499 	/* Do the flow director magic */
500 	if ((adapter->feat_en & IXGBE_FEATURE_FDIR) &&
501 	    (txr->atr_sample) && (!adapter->fdir_reinit)) {
502 		++txr->atr_count;
503 		if (txr->atr_count >= atr_sample_rate) {
504 			ixgbe_atr(txr, m_head);
505 			txr->atr_count = 0;
506 		}
507 	}
508 #endif
509 
510 	olinfo_status |= IXGBE_ADVTXD_CC;
511 	i = txr->next_avail_desc;
512 	for (j = 0; j < map->dm_nsegs; j++) {
513 		bus_size_t seglen;
514 		uint64_t segaddr;
515 
516 		txbuf = &txr->tx_buffers[i];
517 		txd = &txr->tx_base[i];
518 		seglen = map->dm_segs[j].ds_len;
519 		segaddr = htole64(map->dm_segs[j].ds_addr);
520 
521 		txd->read.buffer_addr = segaddr;
522 		txd->read.cmd_type_len = htole32(cmd_type_len | seglen);
523 		txd->read.olinfo_status = htole32(olinfo_status);
524 
525 		if (++i == txr->num_desc)
526 			i = 0;
527 	}
528 
529 	txd->read.cmd_type_len |= htole32(IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS);
530 	txr->tx_avail -= map->dm_nsegs;
531 	txr->next_avail_desc = i;
532 
533 	txbuf->m_head = m_head;
534 	/*
535 	 * Here we swap the map so the last descriptor,
536 	 * which gets the completion interrupt has the
537 	 * real map, and the first descriptor gets the
538 	 * unused map from this descriptor.
539 	 */
540 	txr->tx_buffers[first].map = txbuf->map;
541 	txbuf->map = map;
542 	bus_dmamap_sync(txr->txtag->dt_dmat, map, 0, m_head->m_pkthdr.len,
543 	    BUS_DMASYNC_PREWRITE);
544 
545 	/* Set the EOP descriptor that will be marked done */
546 	txbuf = &txr->tx_buffers[first];
547 	txbuf->eop = txd;
548 
549 	ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
550 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
551 	/*
552 	 * Advance the Transmit Descriptor Tail (Tdt), this tells the
553 	 * hardware that this frame is available to transmit.
554 	 */
555 	IXGBE_EVC_ADD(&txr->total_packets, 1);
556 	IXGBE_WRITE_REG(&adapter->hw, txr->tail, i);
557 
558 	net_stat_ref_t nsr = IF_STAT_GETREF(ifp);
559 	if_statadd_ref(nsr, if_obytes, m_head->m_pkthdr.len);
560 	if (m_head->m_flags & M_MCAST)
561 		if_statinc_ref(nsr, if_omcasts);
562 	IF_STAT_PUTREF(ifp);
563 
564 	/* Mark queue as having work */
565 	if (txr->busy == 0)
566 		txr->busy = 1;
567 
568 	return (0);
569 } /* ixgbe_xmit */
570 
571 /************************************************************************
572  * ixgbe_drain
573  ************************************************************************/
574 static void
575 ixgbe_drain(struct ifnet *ifp, struct tx_ring *txr)
576 {
577 	struct mbuf *m;
578 
579 	IXGBE_TX_LOCK_ASSERT(txr);
580 
581 	if (txr->me == 0) {
582 		while (!IFQ_IS_EMPTY(&ifp->if_snd)) {
583 			IFQ_DEQUEUE(&ifp->if_snd, m);
584 			m_freem(m);
585 			IF_DROP(&ifp->if_snd);
586 		}
587 	}
588 
589 	while ((m = pcq_get(txr->txr_interq)) != NULL) {
590 		m_freem(m);
591 		IXGBE_EVC_ADD(&txr->pcq_drops, 1);
592 	}
593 }
594 
595 /************************************************************************
596  * ixgbe_allocate_transmit_buffers
597  *
598  *   Allocate memory for tx_buffer structures. The tx_buffer stores all
599  *   the information needed to transmit a packet on the wire. This is
600  *   called only once at attach, setup is done every reset.
601  ************************************************************************/
602 static int
603 ixgbe_allocate_transmit_buffers(struct tx_ring *txr)
604 {
605 	struct adapter      *adapter = txr->adapter;
606 	device_t            dev = adapter->dev;
607 	struct ixgbe_tx_buf *txbuf;
608 	int                 error, i;
609 
610 	/*
611 	 * Setup DMA descriptor areas.
612 	 */
613 	error = ixgbe_dma_tag_create(
614 	         /*      parent */ adapter->osdep.dmat,
615 	         /*   alignment */ 1,
616 	         /*      bounds */ 0,
617 	         /*     maxsize */ IXGBE_TSO_SIZE,
618 	         /*   nsegments */ adapter->num_segs,
619 	         /*  maxsegsize */ PAGE_SIZE,
620 	         /*       flags */ 0,
621 	                           &txr->txtag);
622 	if (error != 0) {
623 		aprint_error_dev(dev,"Unable to allocate TX DMA tag\n");
624 		goto fail;
625 	}
626 
627 	txr->tx_buffers = malloc(sizeof(struct ixgbe_tx_buf) *
628 	    adapter->num_tx_desc, M_DEVBUF, M_WAITOK | M_ZERO);
629 
630 	/* Create the descriptor buffer dma maps */
631 	txbuf = txr->tx_buffers;
632 	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
633 		error = ixgbe_dmamap_create(txr->txtag, 0, &txbuf->map);
634 		if (error != 0) {
635 			aprint_error_dev(dev,
636 			    "Unable to create TX DMA map (%d)\n", error);
637 			goto fail;
638 		}
639 	}
640 
641 	return 0;
642 fail:
643 	/* We free all, it handles case where we are in the middle */
644 #if 0 /* XXX was FreeBSD */
645 	ixgbe_free_transmit_structures(adapter);
646 #else
647 	ixgbe_free_transmit_buffers(txr);
648 #endif
649 	return (error);
650 } /* ixgbe_allocate_transmit_buffers */
651 
652 /************************************************************************
653  * ixgbe_setup_transmit_ring - Initialize a transmit ring.
654  ************************************************************************/
655 static void
656 ixgbe_setup_transmit_ring(struct tx_ring *txr)
657 {
658 	struct adapter        *adapter = txr->adapter;
659 	struct ixgbe_tx_buf   *txbuf;
660 #ifdef DEV_NETMAP
661 	struct netmap_adapter *na = NA(adapter->ifp);
662 	struct netmap_slot    *slot;
663 #endif /* DEV_NETMAP */
664 
665 	/* Clear the old ring contents */
666 	IXGBE_TX_LOCK(txr);
667 
668 #ifdef DEV_NETMAP
669 	if (adapter->feat_en & IXGBE_FEATURE_NETMAP) {
670 		/*
671 		 * (under lock): if in netmap mode, do some consistency
672 		 * checks and set slot to entry 0 of the netmap ring.
673 		 */
674 		slot = netmap_reset(na, NR_TX, txr->me, 0);
675 	}
676 #endif /* DEV_NETMAP */
677 
678 	bzero((void *)txr->tx_base,
679 	    (sizeof(union ixgbe_adv_tx_desc)) * adapter->num_tx_desc);
680 	/* Reset indices */
681 	txr->next_avail_desc = 0;
682 	txr->next_to_clean = 0;
683 
684 	/* Free any existing tx buffers. */
685 	txbuf = txr->tx_buffers;
686 	for (int i = 0; i < txr->num_desc; i++, txbuf++) {
687 		if (txbuf->m_head != NULL) {
688 			bus_dmamap_sync(txr->txtag->dt_dmat, txbuf->map,
689 			    0, txbuf->m_head->m_pkthdr.len,
690 			    BUS_DMASYNC_POSTWRITE);
691 			ixgbe_dmamap_unload(txr->txtag, txbuf->map);
692 			m_freem(txbuf->m_head);
693 			txbuf->m_head = NULL;
694 		}
695 
696 #ifdef DEV_NETMAP
697 		/*
698 		 * In netmap mode, set the map for the packet buffer.
699 		 * NOTE: Some drivers (not this one) also need to set
700 		 * the physical buffer address in the NIC ring.
701 		 * Slots in the netmap ring (indexed by "si") are
702 		 * kring->nkr_hwofs positions "ahead" wrt the
703 		 * corresponding slot in the NIC ring. In some drivers
704 		 * (not here) nkr_hwofs can be negative. Function
705 		 * netmap_idx_n2k() handles wraparounds properly.
706 		 */
707 		if ((adapter->feat_en & IXGBE_FEATURE_NETMAP) && slot) {
708 			int si = netmap_idx_n2k(na->tx_rings[txr->me], i);
709 			netmap_load_map(na, txr->txtag,
710 			    txbuf->map, NMB(na, slot + si));
711 		}
712 #endif /* DEV_NETMAP */
713 
714 		/* Clear the EOP descriptor pointer */
715 		txbuf->eop = NULL;
716 	}
717 
718 	/* Set the rate at which we sample packets */
719 	if (adapter->feat_en & IXGBE_FEATURE_FDIR)
720 		txr->atr_sample = atr_sample_rate;
721 
722 	/* Set number of descriptors available */
723 	txr->tx_avail = adapter->num_tx_desc;
724 
725 	ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
726 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
727 	IXGBE_TX_UNLOCK(txr);
728 } /* ixgbe_setup_transmit_ring */
729 
730 /************************************************************************
731  * ixgbe_setup_transmit_structures - Initialize all transmit rings.
732  ************************************************************************/
733 int
734 ixgbe_setup_transmit_structures(struct adapter *adapter)
735 {
736 	struct tx_ring *txr = adapter->tx_rings;
737 
738 	for (int i = 0; i < adapter->num_queues; i++, txr++)
739 		ixgbe_setup_transmit_ring(txr);
740 
741 	return (0);
742 } /* ixgbe_setup_transmit_structures */
743 
744 /************************************************************************
745  * ixgbe_free_transmit_structures - Free all transmit rings.
746  ************************************************************************/
747 void
748 ixgbe_free_transmit_structures(struct adapter *adapter)
749 {
750 	struct tx_ring *txr = adapter->tx_rings;
751 
752 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
753 		ixgbe_free_transmit_buffers(txr);
754 		ixgbe_dma_free(adapter, &txr->txdma);
755 		IXGBE_TX_LOCK_DESTROY(txr);
756 	}
757 	free(adapter->tx_rings, M_DEVBUF);
758 } /* ixgbe_free_transmit_structures */
759 
760 /************************************************************************
761  * ixgbe_free_transmit_buffers
762  *
763  *   Free transmit ring related data structures.
764  ************************************************************************/
765 static void
766 ixgbe_free_transmit_buffers(struct tx_ring *txr)
767 {
768 	struct adapter      *adapter = txr->adapter;
769 	struct ixgbe_tx_buf *tx_buffer;
770 	int                 i;
771 
772 	INIT_DEBUGOUT("ixgbe_free_transmit_buffers: begin");
773 
774 	if (txr->tx_buffers == NULL)
775 		return;
776 
777 	tx_buffer = txr->tx_buffers;
778 	for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
779 		if (tx_buffer->m_head != NULL) {
780 			bus_dmamap_sync(txr->txtag->dt_dmat, tx_buffer->map,
781 			    0, tx_buffer->m_head->m_pkthdr.len,
782 			    BUS_DMASYNC_POSTWRITE);
783 			ixgbe_dmamap_unload(txr->txtag, tx_buffer->map);
784 			m_freem(tx_buffer->m_head);
785 			tx_buffer->m_head = NULL;
786 			if (tx_buffer->map != NULL) {
787 				ixgbe_dmamap_destroy(txr->txtag,
788 				    tx_buffer->map);
789 				tx_buffer->map = NULL;
790 			}
791 		} else if (tx_buffer->map != NULL) {
792 			ixgbe_dmamap_unload(txr->txtag, tx_buffer->map);
793 			ixgbe_dmamap_destroy(txr->txtag, tx_buffer->map);
794 			tx_buffer->map = NULL;
795 		}
796 	}
797 	if (txr->txr_interq != NULL) {
798 		struct mbuf *m;
799 
800 		while ((m = pcq_get(txr->txr_interq)) != NULL)
801 			m_freem(m);
802 		pcq_destroy(txr->txr_interq);
803 	}
804 	if (txr->tx_buffers != NULL) {
805 		free(txr->tx_buffers, M_DEVBUF);
806 		txr->tx_buffers = NULL;
807 	}
808 	if (txr->txtag != NULL) {
809 		ixgbe_dma_tag_destroy(txr->txtag);
810 		txr->txtag = NULL;
811 	}
812 } /* ixgbe_free_transmit_buffers */
813 
814 /************************************************************************
815  * ixgbe_tx_ctx_setup
816  *
817  *   Advanced Context Descriptor setup for VLAN, CSUM or TSO
818  ************************************************************************/
819 static int
820 ixgbe_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp,
821     u32 *cmd_type_len, u32 *olinfo_status)
822 {
823 	struct adapter                   *adapter = txr->adapter;
824 	struct ixgbe_adv_tx_context_desc *TXD;
825 	struct ether_vlan_header         *eh;
826 #ifdef INET
827 	struct ip                        *ip;
828 #endif
829 #ifdef INET6
830 	struct ip6_hdr                   *ip6;
831 #endif
832 	int                              ehdrlen, ip_hlen = 0;
833 	int                              offload = TRUE;
834 	int                              ctxd = txr->next_avail_desc;
835 	u32                              vlan_macip_lens = 0;
836 	u32                              type_tucmd_mlhl = 0;
837 	u16                              vtag = 0;
838 	u16                              etype;
839 	u8                               ipproto = 0;
840 	char                             *l3d;
841 
842 
843 	/* First check if TSO is to be used */
844 	if (mp->m_pkthdr.csum_flags & (M_CSUM_TSOv4 | M_CSUM_TSOv6)) {
845 		int rv = ixgbe_tso_setup(txr, mp, cmd_type_len, olinfo_status);
846 
847 		if (rv != 0)
848 			IXGBE_EVC_ADD(&adapter->tso_err, 1);
849 		return rv;
850 	}
851 
852 	if ((mp->m_pkthdr.csum_flags & M_CSUM_OFFLOAD) == 0)
853 		offload = FALSE;
854 
855 	/* Indicate the whole packet as payload when not doing TSO */
856 	*olinfo_status |= mp->m_pkthdr.len << IXGBE_ADVTXD_PAYLEN_SHIFT;
857 
858 	/* Now ready a context descriptor */
859 	TXD = (struct ixgbe_adv_tx_context_desc *)&txr->tx_base[ctxd];
860 
861 	/*
862 	 * In advanced descriptors the vlan tag must
863 	 * be placed into the context descriptor. Hence
864 	 * we need to make one even if not doing offloads.
865 	 */
866 	if (vlan_has_tag(mp)) {
867 		vtag = htole16(vlan_get_tag(mp));
868 		vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
869 	} else if (!(txr->adapter->feat_en & IXGBE_FEATURE_NEEDS_CTXD) &&
870 	           (offload == FALSE))
871 		return (0);
872 
873 	/*
874 	 * Determine where frame payload starts.
875 	 * Jump over vlan headers if already present,
876 	 * helpful for QinQ too.
877 	 */
878 	KASSERT(mp->m_len >= offsetof(struct ether_vlan_header, evl_tag));
879 	eh = mtod(mp, struct ether_vlan_header *);
880 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
881 		KASSERT(mp->m_len >= sizeof(struct ether_vlan_header));
882 		etype = ntohs(eh->evl_proto);
883 		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
884 	} else {
885 		etype = ntohs(eh->evl_encap_proto);
886 		ehdrlen = ETHER_HDR_LEN;
887 	}
888 
889 	/* Set the ether header length */
890 	vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
891 
892 	if (offload == FALSE)
893 		goto no_offloads;
894 
895 	/*
896 	 * If the first mbuf only includes the ethernet header,
897 	 * jump to the next one
898 	 * XXX: This assumes the stack splits mbufs containing headers
899 	 *      on header boundaries
900 	 * XXX: And assumes the entire IP header is contained in one mbuf
901 	 */
902 	if (mp->m_len == ehdrlen && mp->m_next)
903 		l3d = mtod(mp->m_next, char *);
904 	else
905 		l3d = mtod(mp, char *) + ehdrlen;
906 
907 	switch (etype) {
908 #ifdef INET
909 	case ETHERTYPE_IP:
910 		ip = (struct ip *)(l3d);
911 		ip_hlen = ip->ip_hl << 2;
912 		ipproto = ip->ip_p;
913 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
914 		KASSERT((mp->m_pkthdr.csum_flags & M_CSUM_IPv4) == 0 ||
915 		    ip->ip_sum == 0);
916 		break;
917 #endif
918 #ifdef INET6
919 	case ETHERTYPE_IPV6:
920 		ip6 = (struct ip6_hdr *)(l3d);
921 		ip_hlen = sizeof(struct ip6_hdr);
922 		ipproto = ip6->ip6_nxt;
923 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
924 		break;
925 #endif
926 	default:
927 		offload = false;
928 		break;
929 	}
930 
931 	if ((mp->m_pkthdr.csum_flags & M_CSUM_IPv4) != 0)
932 		*olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
933 
934 	vlan_macip_lens |= ip_hlen;
935 
936 	/* No support for offloads for non-L4 next headers */
937 	switch (ipproto) {
938 	case IPPROTO_TCP:
939 		if (mp->m_pkthdr.csum_flags &
940 		    (M_CSUM_TCPv4 | M_CSUM_TCPv6))
941 			type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
942 		else
943 			offload = false;
944 		break;
945 	case IPPROTO_UDP:
946 		if (mp->m_pkthdr.csum_flags &
947 		    (M_CSUM_UDPv4 | M_CSUM_UDPv6))
948 			type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP;
949 		else
950 			offload = false;
951 		break;
952 	default:
953 		offload = false;
954 		break;
955 	}
956 
957 	if (offload) /* Insert L4 checksum into data descriptors */
958 		*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
959 
960 no_offloads:
961 	type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
962 
963 	/* Now copy bits into descriptor */
964 	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
965 	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
966 	TXD->seqnum_seed = htole32(0);
967 	TXD->mss_l4len_idx = htole32(0);
968 
969 	/* We've consumed the first desc, adjust counters */
970 	if (++ctxd == txr->num_desc)
971 		ctxd = 0;
972 	txr->next_avail_desc = ctxd;
973 	--txr->tx_avail;
974 
975 	return (0);
976 } /* ixgbe_tx_ctx_setup */
977 
978 /************************************************************************
979  * ixgbe_tso_setup
980  *
981  *   Setup work for hardware segmentation offload (TSO) on
982  *   adapters using advanced tx descriptors
983  ************************************************************************/
984 static int
985 ixgbe_tso_setup(struct tx_ring *txr, struct mbuf *mp, u32 *cmd_type_len,
986     u32 *olinfo_status)
987 {
988 	struct ixgbe_adv_tx_context_desc *TXD;
989 	struct ether_vlan_header         *eh;
990 #ifdef INET6
991 	struct ip6_hdr                   *ip6;
992 #endif
993 #ifdef INET
994 	struct ip                        *ip;
995 #endif
996 	struct tcphdr                    *th;
997 	int                              ctxd, ehdrlen, ip_hlen, tcp_hlen;
998 	u32                              vlan_macip_lens = 0;
999 	u32                              type_tucmd_mlhl = 0;
1000 	u32                              mss_l4len_idx = 0, paylen;
1001 	u16                              vtag = 0, eh_type;
1002 
1003 	/*
1004 	 * Determine where frame payload starts.
1005 	 * Jump over vlan headers if already present
1006 	 */
1007 	eh = mtod(mp, struct ether_vlan_header *);
1008 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
1009 		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
1010 		eh_type = eh->evl_proto;
1011 	} else {
1012 		ehdrlen = ETHER_HDR_LEN;
1013 		eh_type = eh->evl_encap_proto;
1014 	}
1015 
1016 	switch (ntohs(eh_type)) {
1017 #ifdef INET
1018 	case ETHERTYPE_IP:
1019 		ip = (struct ip *)(mp->m_data + ehdrlen);
1020 		if (ip->ip_p != IPPROTO_TCP)
1021 			return (ENXIO);
1022 		ip->ip_sum = 0;
1023 		ip_hlen = ip->ip_hl << 2;
1024 		th = (struct tcphdr *)((char *)ip + ip_hlen);
1025 		th->th_sum = in_cksum_phdr(ip->ip_src.s_addr,
1026 		    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
1027 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
1028 		/* Tell transmit desc to also do IPv4 checksum. */
1029 		*olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
1030 		break;
1031 #endif
1032 #ifdef INET6
1033 	case ETHERTYPE_IPV6:
1034 		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
1035 		/* XXX-BZ For now we do not pretend to support ext. hdrs. */
1036 		if (ip6->ip6_nxt != IPPROTO_TCP)
1037 			return (ENXIO);
1038 		ip_hlen = sizeof(struct ip6_hdr);
1039 		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
1040 		th = (struct tcphdr *)((char *)ip6 + ip_hlen);
1041 		th->th_sum = in6_cksum_phdr(&ip6->ip6_src,
1042 		    &ip6->ip6_dst, 0, htonl(IPPROTO_TCP));
1043 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
1044 		break;
1045 #endif
1046 	default:
1047 		panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
1048 		    __func__, ntohs(eh_type));
1049 		break;
1050 	}
1051 
1052 	ctxd = txr->next_avail_desc;
1053 	TXD = (struct ixgbe_adv_tx_context_desc *)&txr->tx_base[ctxd];
1054 
1055 	tcp_hlen = th->th_off << 2;
1056 
1057 	/* This is used in the transmit desc in encap */
1058 	paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
1059 
1060 	/* VLAN MACLEN IPLEN */
1061 	if (vlan_has_tag(mp)) {
1062 		vtag = htole16(vlan_get_tag(mp));
1063 		vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
1064 	}
1065 
1066 	vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
1067 	vlan_macip_lens |= ip_hlen;
1068 	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
1069 
1070 	/* ADV DTYPE TUCMD */
1071 	type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
1072 	type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
1073 	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
1074 
1075 	/* MSS L4LEN IDX */
1076 	mss_l4len_idx |= (mp->m_pkthdr.segsz << IXGBE_ADVTXD_MSS_SHIFT);
1077 	mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT);
1078 	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
1079 
1080 	TXD->seqnum_seed = htole32(0);
1081 
1082 	if (++ctxd == txr->num_desc)
1083 		ctxd = 0;
1084 
1085 	txr->tx_avail--;
1086 	txr->next_avail_desc = ctxd;
1087 	*cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
1088 	*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
1089 	*olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT;
1090 	IXGBE_EVC_ADD(&txr->tso_tx, 1);
1091 
1092 	return (0);
1093 } /* ixgbe_tso_setup */
1094 
1095 
1096 /************************************************************************
1097  * ixgbe_txeof
1098  *
1099  *   Examine each tx_buffer in the used queue. If the hardware is done
1100  *   processing the packet then free associated resources. The
1101  *   tx_buffer is put back on the free queue.
1102  ************************************************************************/
1103 bool
1104 ixgbe_txeof(struct tx_ring *txr)
1105 {
1106 	struct adapter		*adapter = txr->adapter;
1107 	struct ifnet		*ifp = adapter->ifp;
1108 	struct ixgbe_tx_buf	*buf;
1109 	union ixgbe_adv_tx_desc *txd;
1110 	u32			work, processed = 0;
1111 	u32			limit = adapter->tx_process_limit;
1112 
1113 	KASSERT(mutex_owned(&txr->tx_mtx));
1114 
1115 #ifdef DEV_NETMAP
1116 	if ((adapter->feat_en & IXGBE_FEATURE_NETMAP) &&
1117 	    (adapter->ifp->if_capenable & IFCAP_NETMAP)) {
1118 		struct netmap_adapter *na = NA(adapter->ifp);
1119 		struct netmap_kring *kring = na->tx_rings[txr->me];
1120 		txd = txr->tx_base;
1121 		bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1122 		    BUS_DMASYNC_POSTREAD);
1123 		/*
1124 		 * In netmap mode, all the work is done in the context
1125 		 * of the client thread. Interrupt handlers only wake up
1126 		 * clients, which may be sleeping on individual rings
1127 		 * or on a global resource for all rings.
1128 		 * To implement tx interrupt mitigation, we wake up the client
1129 		 * thread roughly every half ring, even if the NIC interrupts
1130 		 * more frequently. This is implemented as follows:
1131 		 * - ixgbe_txsync() sets kring->nr_kflags with the index of
1132 		 *   the slot that should wake up the thread (nkr_num_slots
1133 		 *   means the user thread should not be woken up);
1134 		 * - the driver ignores tx interrupts unless netmap_mitigate=0
1135 		 *   or the slot has the DD bit set.
1136 		 */
1137 		if (kring->nr_kflags < kring->nkr_num_slots &&
1138 		    le32toh(txd[kring->nr_kflags].wb.status) & IXGBE_TXD_STAT_DD) {
1139 			netmap_tx_irq(ifp, txr->me);
1140 		}
1141 		return false;
1142 	}
1143 #endif /* DEV_NETMAP */
1144 
1145 	if (txr->tx_avail == txr->num_desc) {
1146 		txr->busy = 0;
1147 		return false;
1148 	}
1149 
1150 	/* Get work starting point */
1151 	work = txr->next_to_clean;
1152 	buf = &txr->tx_buffers[work];
1153 	txd = &txr->tx_base[work];
1154 	work -= txr->num_desc; /* The distance to ring end */
1155 	ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1156 	    BUS_DMASYNC_POSTREAD);
1157 
1158 	do {
1159 		union ixgbe_adv_tx_desc *eop = buf->eop;
1160 		if (eop == NULL) /* No work */
1161 			break;
1162 
1163 		if ((le32toh(eop->wb.status) & IXGBE_TXD_STAT_DD) == 0)
1164 			break;	/* I/O not complete */
1165 
1166 		if (buf->m_head) {
1167 			txr->bytes += buf->m_head->m_pkthdr.len;
1168 			bus_dmamap_sync(txr->txtag->dt_dmat, buf->map,
1169 			    0, buf->m_head->m_pkthdr.len,
1170 			    BUS_DMASYNC_POSTWRITE);
1171 			ixgbe_dmamap_unload(txr->txtag, buf->map);
1172 			m_freem(buf->m_head);
1173 			buf->m_head = NULL;
1174 		}
1175 		buf->eop = NULL;
1176 		txr->txr_no_space = false;
1177 		++txr->tx_avail;
1178 
1179 		/* We clean the range if multi segment */
1180 		while (txd != eop) {
1181 			++txd;
1182 			++buf;
1183 			++work;
1184 			/* wrap the ring? */
1185 			if (__predict_false(!work)) {
1186 				work -= txr->num_desc;
1187 				buf = txr->tx_buffers;
1188 				txd = txr->tx_base;
1189 			}
1190 			if (buf->m_head) {
1191 				txr->bytes +=
1192 				    buf->m_head->m_pkthdr.len;
1193 				bus_dmamap_sync(txr->txtag->dt_dmat,
1194 				    buf->map,
1195 				    0, buf->m_head->m_pkthdr.len,
1196 				    BUS_DMASYNC_POSTWRITE);
1197 				ixgbe_dmamap_unload(txr->txtag,
1198 				    buf->map);
1199 				m_freem(buf->m_head);
1200 				buf->m_head = NULL;
1201 			}
1202 			++txr->tx_avail;
1203 			buf->eop = NULL;
1204 
1205 		}
1206 		++txr->packets;
1207 		++processed;
1208 		if_statinc(ifp, if_opackets);
1209 
1210 		/* Try the next packet */
1211 		++txd;
1212 		++buf;
1213 		++work;
1214 		/* reset with a wrap */
1215 		if (__predict_false(!work)) {
1216 			work -= txr->num_desc;
1217 			buf = txr->tx_buffers;
1218 			txd = txr->tx_base;
1219 		}
1220 		prefetch(txd);
1221 	} while (__predict_true(--limit));
1222 
1223 	ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1224 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1225 
1226 	work += txr->num_desc;
1227 	txr->next_to_clean = work;
1228 
1229 	/*
1230 	 * Queue Hang detection, we know there's
1231 	 * work outstanding or the first return
1232 	 * would have been taken, so increment busy
1233 	 * if nothing managed to get cleaned, then
1234 	 * in local_timer it will be checked and
1235 	 * marked as HUNG if it exceeds a MAX attempt.
1236 	 */
1237 	if ((processed == 0) && (txr->busy != IXGBE_QUEUE_HUNG))
1238 		++txr->busy;
1239 	/*
1240 	 * If anything gets cleaned we reset state to 1,
1241 	 * note this will turn off HUNG if its set.
1242 	 */
1243 	if (processed)
1244 		txr->busy = 1;
1245 
1246 	if (txr->tx_avail == txr->num_desc)
1247 		txr->busy = 0;
1248 
1249 	return ((limit > 0) ? false : true);
1250 } /* ixgbe_txeof */
1251 
1252 /************************************************************************
1253  * ixgbe_rsc_count
1254  *
1255  *   Used to detect a descriptor that has been merged by Hardware RSC.
1256  ************************************************************************/
1257 static inline u32
1258 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
1259 {
1260 	return (le32toh(rx->wb.lower.lo_dword.data) &
1261 	    IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
1262 } /* ixgbe_rsc_count */
1263 
1264 /************************************************************************
1265  * ixgbe_setup_hw_rsc
1266  *
1267  *   Initialize Hardware RSC (LRO) feature on 82599
1268  *   for an RX ring, this is toggled by the LRO capability
1269  *   even though it is transparent to the stack.
1270  *
1271  *   NOTE: Since this HW feature only works with IPv4 and
1272  *         testing has shown soft LRO to be as effective,
1273  *         this feature will be disabled by default.
1274  ************************************************************************/
1275 static void
1276 ixgbe_setup_hw_rsc(struct rx_ring *rxr)
1277 {
1278 	struct	adapter  *adapter = rxr->adapter;
1279 	struct	ixgbe_hw *hw = &adapter->hw;
1280 	u32              rscctrl, rdrxctl;
1281 
1282 	/* If turning LRO/RSC off we need to disable it */
1283 	if ((adapter->ifp->if_capenable & IFCAP_LRO) == 0) {
1284 		rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
1285 		rscctrl &= ~IXGBE_RSCCTL_RSCEN;
1286 		return;
1287 	}
1288 
1289 	rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
1290 	rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
1291 #ifdef DEV_NETMAP
1292 	/* Always strip CRC unless Netmap disabled it */
1293 	if (!(adapter->feat_en & IXGBE_FEATURE_NETMAP) ||
1294 	    !(adapter->ifp->if_capenable & IFCAP_NETMAP) ||
1295 	    ix_crcstrip)
1296 #endif /* DEV_NETMAP */
1297 		rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
1298 	rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
1299 	IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
1300 
1301 	rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
1302 	rscctrl |= IXGBE_RSCCTL_RSCEN;
1303 	/*
1304 	 * Limit the total number of descriptors that
1305 	 * can be combined, so it does not exceed 64K
1306 	 */
1307 	if (rxr->mbuf_sz == MCLBYTES)
1308 		rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
1309 	else if (rxr->mbuf_sz == MJUMPAGESIZE)
1310 		rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
1311 	else if (rxr->mbuf_sz == MJUM9BYTES)
1312 		rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
1313 	else  /* Using 16K cluster */
1314 		rscctrl |= IXGBE_RSCCTL_MAXDESC_1;
1315 
1316 	IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl);
1317 
1318 	/* Enable TCP header recognition */
1319 	IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0),
1320 	    (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) | IXGBE_PSRTYPE_TCPHDR));
1321 
1322 	/* Disable RSC for ACK packets */
1323 	IXGBE_WRITE_REG(hw, IXGBE_RSCDBU,
1324 	    (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU)));
1325 
1326 	rxr->hw_rsc = TRUE;
1327 } /* ixgbe_setup_hw_rsc */
1328 
1329 /************************************************************************
1330  * ixgbe_refresh_mbufs
1331  *
1332  *   Refresh mbuf buffers for RX descriptor rings
1333  *    - now keeps its own state so discards due to resource
1334  *      exhaustion are unnecessary, if an mbuf cannot be obtained
1335  *      it just returns, keeping its placeholder, thus it can simply
1336  *      be recalled to try again.
1337  *
1338  *   XXX NetBSD TODO:
1339  *    - The ixgbe_rxeof() function always preallocates mbuf cluster,
1340  *      so the ixgbe_refresh_mbufs() function can be simplified.
1341  *
1342  ************************************************************************/
1343 static void
1344 ixgbe_refresh_mbufs(struct rx_ring *rxr, int limit)
1345 {
1346 	struct adapter      *adapter = rxr->adapter;
1347 	struct ixgbe_rx_buf *rxbuf;
1348 	struct mbuf         *mp;
1349 	int                 i, error;
1350 	bool                refreshed = false;
1351 
1352 	i = rxr->next_to_refresh;
1353 	/* next_to_refresh points to the previous one */
1354 	if (++i == rxr->num_desc)
1355 		i = 0;
1356 
1357 	while (i != limit) {
1358 		rxbuf = &rxr->rx_buffers[i];
1359 		if (__predict_false(rxbuf->buf == NULL)) {
1360 			mp = ixgbe_getcl();
1361 			if (mp == NULL) {
1362 				IXGBE_EVC_ADD(&rxr->no_mbuf, 1);
1363 				goto update;
1364 			}
1365 			mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1366 			IXGBE_M_ADJ(adapter, rxr, mp);
1367 		} else
1368 			mp = rxbuf->buf;
1369 
1370 		/* If we're dealing with an mbuf that was copied rather
1371 		 * than replaced, there's no need to go through busdma.
1372 		 */
1373 		if ((rxbuf->flags & IXGBE_RX_COPY) == 0) {
1374 			/* Get the memory mapping */
1375 			ixgbe_dmamap_unload(rxr->ptag, rxbuf->pmap);
1376 			error = bus_dmamap_load_mbuf(rxr->ptag->dt_dmat,
1377 			    rxbuf->pmap, mp, BUS_DMA_NOWAIT);
1378 			if (__predict_false(error != 0)) {
1379 				device_printf(adapter->dev, "Refresh mbufs: "
1380 				    "payload dmamap load failure - %d\n",
1381 				    error);
1382 				m_free(mp);
1383 				rxbuf->buf = NULL;
1384 				goto update;
1385 			}
1386 			rxbuf->buf = mp;
1387 			bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
1388 			    0, mp->m_pkthdr.len, BUS_DMASYNC_PREREAD);
1389 			rxbuf->addr = rxr->rx_base[i].read.pkt_addr =
1390 			    htole64(rxbuf->pmap->dm_segs[0].ds_addr);
1391 		} else {
1392 			rxr->rx_base[i].read.pkt_addr = rxbuf->addr;
1393 			rxbuf->flags &= ~IXGBE_RX_COPY;
1394 		}
1395 
1396 		refreshed = true;
1397 		/* next_to_refresh points to the previous one */
1398 		rxr->next_to_refresh = i;
1399 		if (++i == rxr->num_desc)
1400 			i = 0;
1401 	}
1402 
1403 update:
1404 	if (refreshed) /* Update hardware tail index */
1405 		IXGBE_WRITE_REG(&adapter->hw, rxr->tail, rxr->next_to_refresh);
1406 
1407 	return;
1408 } /* ixgbe_refresh_mbufs */
1409 
1410 /************************************************************************
1411  * ixgbe_allocate_receive_buffers
1412  *
1413  *   Allocate memory for rx_buffer structures. Since we use one
1414  *   rx_buffer per received packet, the maximum number of rx_buffer's
1415  *   that we'll need is equal to the number of receive descriptors
1416  *   that we've allocated.
1417  ************************************************************************/
1418 static int
1419 ixgbe_allocate_receive_buffers(struct rx_ring *rxr)
1420 {
1421 	struct adapter      *adapter = rxr->adapter;
1422 	device_t            dev = adapter->dev;
1423 	struct ixgbe_rx_buf *rxbuf;
1424 	int                 bsize, error;
1425 
1426 	bsize = sizeof(struct ixgbe_rx_buf) * rxr->num_desc;
1427 	rxr->rx_buffers = malloc(bsize, M_DEVBUF, M_WAITOK | M_ZERO);
1428 
1429 	error = ixgbe_dma_tag_create(
1430 	         /*      parent */ adapter->osdep.dmat,
1431 	         /*   alignment */ 1,
1432 	         /*      bounds */ 0,
1433 	         /*     maxsize */ MJUM16BYTES,
1434 	         /*   nsegments */ 1,
1435 	         /*  maxsegsize */ MJUM16BYTES,
1436 	         /*       flags */ 0,
1437 	                           &rxr->ptag);
1438 	if (error != 0) {
1439 		aprint_error_dev(dev, "Unable to create RX DMA tag\n");
1440 		goto fail;
1441 	}
1442 
1443 	for (int i = 0; i < rxr->num_desc; i++, rxbuf++) {
1444 		rxbuf = &rxr->rx_buffers[i];
1445 		error = ixgbe_dmamap_create(rxr->ptag, 0, &rxbuf->pmap);
1446 		if (error) {
1447 			aprint_error_dev(dev, "Unable to create RX dma map\n");
1448 			goto fail;
1449 		}
1450 	}
1451 
1452 	return (0);
1453 
1454 fail:
1455 	/* Frees all, but can handle partial completion */
1456 	ixgbe_free_receive_structures(adapter);
1457 
1458 	return (error);
1459 } /* ixgbe_allocate_receive_buffers */
1460 
1461 /************************************************************************
1462  * ixgbe_free_receive_ring
1463  ************************************************************************/
1464 static void
1465 ixgbe_free_receive_ring(struct rx_ring *rxr)
1466 {
1467 	for (int i = 0; i < rxr->num_desc; i++) {
1468 		ixgbe_rx_discard(rxr, i);
1469 	}
1470 } /* ixgbe_free_receive_ring */
1471 
1472 /************************************************************************
1473  * ixgbe_setup_receive_ring
1474  *
1475  *   Initialize a receive ring and its buffers.
1476  ************************************************************************/
1477 static int
1478 ixgbe_setup_receive_ring(struct rx_ring *rxr)
1479 {
1480 	struct adapter        *adapter;
1481 	struct ixgbe_rx_buf   *rxbuf;
1482 #ifdef LRO
1483 	struct ifnet          *ifp;
1484 	struct lro_ctrl       *lro = &rxr->lro;
1485 #endif /* LRO */
1486 #ifdef DEV_NETMAP
1487 	struct netmap_adapter *na = NA(rxr->adapter->ifp);
1488 	struct netmap_slot    *slot;
1489 #endif /* DEV_NETMAP */
1490 	int                   rsize, error = 0;
1491 
1492 	adapter = rxr->adapter;
1493 #ifdef LRO
1494 	ifp = adapter->ifp;
1495 #endif /* LRO */
1496 
1497 	/* Clear the ring contents */
1498 	IXGBE_RX_LOCK(rxr);
1499 
1500 #ifdef DEV_NETMAP
1501 	if (adapter->feat_en & IXGBE_FEATURE_NETMAP)
1502 		slot = netmap_reset(na, NR_RX, rxr->me, 0);
1503 #endif /* DEV_NETMAP */
1504 
1505 	rsize = roundup2(adapter->num_rx_desc *
1506 	    sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
1507 	bzero((void *)rxr->rx_base, rsize);
1508 	/* Cache the size */
1509 	rxr->mbuf_sz = adapter->rx_mbuf_sz;
1510 
1511 	/* Free current RX buffer structs and their mbufs */
1512 	ixgbe_free_receive_ring(rxr);
1513 
1514 	/* Now replenish the mbufs */
1515 	for (int j = 0; j != rxr->num_desc; ++j) {
1516 		struct mbuf *mp;
1517 
1518 		rxbuf = &rxr->rx_buffers[j];
1519 
1520 #ifdef DEV_NETMAP
1521 		/*
1522 		 * In netmap mode, fill the map and set the buffer
1523 		 * address in the NIC ring, considering the offset
1524 		 * between the netmap and NIC rings (see comment in
1525 		 * ixgbe_setup_transmit_ring() ). No need to allocate
1526 		 * an mbuf, so end the block with a continue;
1527 		 */
1528 		if ((adapter->feat_en & IXGBE_FEATURE_NETMAP) && slot) {
1529 			int sj = netmap_idx_n2k(na->rx_rings[rxr->me], j);
1530 			uint64_t paddr;
1531 			void *addr;
1532 
1533 			addr = PNMB(na, slot + sj, &paddr);
1534 			netmap_load_map(na, rxr->ptag, rxbuf->pmap, addr);
1535 			/* Update descriptor and the cached value */
1536 			rxr->rx_base[j].read.pkt_addr = htole64(paddr);
1537 			rxbuf->addr = htole64(paddr);
1538 			continue;
1539 		}
1540 #endif /* DEV_NETMAP */
1541 
1542 		rxbuf->flags = 0;
1543 		rxbuf->buf = ixgbe_getcl();
1544 		if (rxbuf->buf == NULL) {
1545 			IXGBE_EVC_ADD(&rxr->no_mbuf, 1);
1546 			error = ENOBUFS;
1547 			goto fail;
1548 		}
1549 		mp = rxbuf->buf;
1550 		mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1551 		IXGBE_M_ADJ(adapter, rxr, mp);
1552 		/* Get the memory mapping */
1553 		error = bus_dmamap_load_mbuf(rxr->ptag->dt_dmat, rxbuf->pmap,
1554 		    mp, BUS_DMA_NOWAIT);
1555 		if (error != 0) {
1556 			/*
1557 			 * Clear this entry for later cleanup in
1558 			 * ixgbe_discard() which is called via
1559 			 * ixgbe_free_receive_ring().
1560 			 */
1561 			m_freem(mp);
1562 			rxbuf->buf = NULL;
1563 			goto fail;
1564 		}
1565 		bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
1566 		    0, mp->m_pkthdr.len, BUS_DMASYNC_PREREAD);
1567 		/* Update the descriptor and the cached value */
1568 		rxr->rx_base[j].read.pkt_addr =
1569 		    htole64(rxbuf->pmap->dm_segs[0].ds_addr);
1570 		rxbuf->addr = htole64(rxbuf->pmap->dm_segs[0].ds_addr);
1571 	}
1572 
1573 	/* Setup our descriptor indices */
1574 	rxr->next_to_check = 0;
1575 	rxr->next_to_refresh = adapter->num_rx_desc - 1; /* Fully allocated */
1576 	rxr->lro_enabled = FALSE;
1577 	rxr->discard_multidesc = false;
1578 	IXGBE_EVC_STORE(&rxr->rx_copies, 0);
1579 #if 0 /* NetBSD */
1580 	IXGBE_EVC_STORE(&rxr->rx_bytes, 0);
1581 #if 1	/* Fix inconsistency */
1582 	IXGBE_EVC_STORE(&rxr->rx_packets, 0);
1583 #endif
1584 #endif
1585 	rxr->vtag_strip = FALSE;
1586 
1587 	ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1588 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1589 
1590 	/*
1591 	 * Now set up the LRO interface
1592 	 */
1593 	if (ixgbe_rsc_enable)
1594 		ixgbe_setup_hw_rsc(rxr);
1595 #ifdef LRO
1596 	else if (ifp->if_capenable & IFCAP_LRO) {
1597 		device_t dev = adapter->dev;
1598 		int err = tcp_lro_init(lro);
1599 		if (err) {
1600 			device_printf(dev, "LRO Initialization failed!\n");
1601 			goto fail;
1602 		}
1603 		INIT_DEBUGOUT("RX Soft LRO Initialized\n");
1604 		rxr->lro_enabled = TRUE;
1605 		lro->ifp = adapter->ifp;
1606 	}
1607 #endif /* LRO */
1608 
1609 	IXGBE_RX_UNLOCK(rxr);
1610 
1611 	return (0);
1612 
1613 fail:
1614 	ixgbe_free_receive_ring(rxr);
1615 	IXGBE_RX_UNLOCK(rxr);
1616 
1617 	return (error);
1618 } /* ixgbe_setup_receive_ring */
1619 
1620 /************************************************************************
1621  * ixgbe_setup_receive_structures - Initialize all receive rings.
1622  ************************************************************************/
1623 int
1624 ixgbe_setup_receive_structures(struct adapter *adapter)
1625 {
1626 	struct rx_ring *rxr = adapter->rx_rings;
1627 	int            j;
1628 
1629 	INIT_DEBUGOUT("ixgbe_setup_receive_structures");
1630 	for (j = 0; j < adapter->num_queues; j++, rxr++)
1631 		if (ixgbe_setup_receive_ring(rxr))
1632 			goto fail;
1633 
1634 	return (0);
1635 fail:
1636 	/*
1637 	 * Free RX buffers allocated so far, we will only handle
1638 	 * the rings that completed, the failing case will have
1639 	 * cleaned up for itself. 'j' failed, so its the terminus.
1640 	 */
1641 	for (int i = 0; i < j; ++i) {
1642 		rxr = &adapter->rx_rings[i];
1643 		IXGBE_RX_LOCK(rxr);
1644 		ixgbe_free_receive_ring(rxr);
1645 		IXGBE_RX_UNLOCK(rxr);
1646 	}
1647 
1648 	return (ENOBUFS);
1649 } /* ixgbe_setup_receive_structures */
1650 
1651 
1652 /************************************************************************
1653  * ixgbe_free_receive_structures - Free all receive rings.
1654  ************************************************************************/
1655 void
1656 ixgbe_free_receive_structures(struct adapter *adapter)
1657 {
1658 	struct rx_ring *rxr = adapter->rx_rings;
1659 
1660 	INIT_DEBUGOUT("ixgbe_free_receive_structures: begin");
1661 
1662 	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
1663 		ixgbe_free_receive_buffers(rxr);
1664 #ifdef LRO
1665 		/* Free LRO memory */
1666 		tcp_lro_free(&rxr->lro);
1667 #endif /* LRO */
1668 		/* Free the ring memory as well */
1669 		ixgbe_dma_free(adapter, &rxr->rxdma);
1670 		IXGBE_RX_LOCK_DESTROY(rxr);
1671 	}
1672 
1673 	free(adapter->rx_rings, M_DEVBUF);
1674 } /* ixgbe_free_receive_structures */
1675 
1676 
1677 /************************************************************************
1678  * ixgbe_free_receive_buffers - Free receive ring data structures
1679  ************************************************************************/
1680 static void
1681 ixgbe_free_receive_buffers(struct rx_ring *rxr)
1682 {
1683 	struct adapter      *adapter = rxr->adapter;
1684 	struct ixgbe_rx_buf *rxbuf;
1685 
1686 	INIT_DEBUGOUT("ixgbe_free_receive_buffers: begin");
1687 
1688 	/* Cleanup any existing buffers */
1689 	if (rxr->rx_buffers != NULL) {
1690 		for (int i = 0; i < adapter->num_rx_desc; i++) {
1691 			rxbuf = &rxr->rx_buffers[i];
1692 			ixgbe_rx_discard(rxr, i);
1693 			if (rxbuf->pmap != NULL) {
1694 				ixgbe_dmamap_destroy(rxr->ptag, rxbuf->pmap);
1695 				rxbuf->pmap = NULL;
1696 			}
1697 		}
1698 
1699 		if (rxr->rx_buffers != NULL) {
1700 			free(rxr->rx_buffers, M_DEVBUF);
1701 			rxr->rx_buffers = NULL;
1702 		}
1703 	}
1704 
1705 	if (rxr->ptag != NULL) {
1706 		ixgbe_dma_tag_destroy(rxr->ptag);
1707 		rxr->ptag = NULL;
1708 	}
1709 
1710 	return;
1711 } /* ixgbe_free_receive_buffers */
1712 
1713 /************************************************************************
1714  * ixgbe_rx_input
1715  ************************************************************************/
1716 static __inline void
1717 ixgbe_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m,
1718     u32 ptype)
1719 {
1720 	struct adapter	*adapter = ifp->if_softc;
1721 
1722 #ifdef LRO
1723 	struct ethercom *ec = &adapter->osdep.ec;
1724 
1725 	/*
1726 	 * ATM LRO is only for IP/TCP packets and TCP checksum of the packet
1727 	 * should be computed by hardware. Also it should not have VLAN tag in
1728 	 * ethernet header.  In case of IPv6 we do not yet support ext. hdrs.
1729 	 */
1730         if (rxr->lro_enabled &&
1731             (ec->ec_capenable & ETHERCAP_VLAN_HWTAGGING) != 0 &&
1732             (ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
1733             ((ptype & (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
1734             (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP) ||
1735             (ptype & (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
1736             (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) &&
1737             (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
1738             (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
1739                 /*
1740                  * Send to the stack if:
1741                  **  - LRO not enabled, or
1742                  **  - no LRO resources, or
1743                  **  - lro enqueue fails
1744                  */
1745                 if (rxr->lro.lro_cnt != 0)
1746                         if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
1747                                 return;
1748         }
1749 #endif /* LRO */
1750 
1751 	if_percpuq_enqueue(adapter->ipq, m);
1752 } /* ixgbe_rx_input */
1753 
1754 /************************************************************************
1755  * ixgbe_rx_discard
1756  ************************************************************************/
1757 static __inline void
1758 ixgbe_rx_discard(struct rx_ring *rxr, int i)
1759 {
1760 	struct ixgbe_rx_buf *rbuf;
1761 
1762 	rbuf = &rxr->rx_buffers[i];
1763 
1764 	/*
1765 	 * With advanced descriptors the writeback clobbers the buffer addrs,
1766 	 * so its easier to just free the existing mbufs and take the normal
1767 	 * refresh path to get new buffers and mapping.
1768 	 */
1769 
1770 	if (rbuf->fmp != NULL) {/* Partial chain ? */
1771 		bus_dmamap_sync(rxr->ptag->dt_dmat, rbuf->pmap, 0,
1772 		    rbuf->buf->m_pkthdr.len, BUS_DMASYNC_POSTREAD);
1773 		ixgbe_dmamap_unload(rxr->ptag, rbuf->pmap);
1774 		m_freem(rbuf->fmp);
1775 		rbuf->fmp = NULL;
1776 		rbuf->buf = NULL; /* rbuf->buf is part of fmp's chain */
1777 	} else if (rbuf->buf) {
1778 		bus_dmamap_sync(rxr->ptag->dt_dmat, rbuf->pmap, 0,
1779 		    rbuf->buf->m_pkthdr.len, BUS_DMASYNC_POSTREAD);
1780 		ixgbe_dmamap_unload(rxr->ptag, rbuf->pmap);
1781 		m_free(rbuf->buf);
1782 		rbuf->buf = NULL;
1783 	}
1784 
1785 	rbuf->flags = 0;
1786 
1787 	return;
1788 } /* ixgbe_rx_discard */
1789 
1790 
1791 /************************************************************************
1792  * ixgbe_rxeof
1793  *
1794  *   Executes in interrupt context. It replenishes the
1795  *   mbufs in the descriptor and sends data which has
1796  *   been dma'ed into host memory to upper layer.
1797  *
1798  *   Return TRUE for more work, FALSE for all clean.
1799  ************************************************************************/
1800 bool
1801 ixgbe_rxeof(struct ix_queue *que)
1802 {
1803 	struct adapter		*adapter = que->adapter;
1804 	struct rx_ring		*rxr = que->rxr;
1805 	struct ifnet		*ifp = adapter->ifp;
1806 #ifdef LRO
1807 	struct lro_ctrl		*lro = &rxr->lro;
1808 #endif /* LRO */
1809 	union ixgbe_adv_rx_desc	*cur;
1810 	struct ixgbe_rx_buf	*rbuf, *nbuf;
1811 	int			i, nextp, processed = 0;
1812 	u32			staterr = 0;
1813 	u32			loopcount = 0, numdesc;
1814 	u32			limit = adapter->rx_process_limit;
1815 	u32			rx_copy_len = adapter->rx_copy_len;
1816 	bool			discard_multidesc = rxr->discard_multidesc;
1817 	bool			wraparound = false;
1818 	unsigned int		syncremain;
1819 #ifdef RSS
1820 	u16			pkt_info;
1821 #endif
1822 
1823 	IXGBE_RX_LOCK(rxr);
1824 
1825 #ifdef DEV_NETMAP
1826 	if (adapter->feat_en & IXGBE_FEATURE_NETMAP) {
1827 		/* Same as the txeof routine: wakeup clients on intr. */
1828 		if (netmap_rx_irq(ifp, rxr->me, &processed)) {
1829 			IXGBE_RX_UNLOCK(rxr);
1830 			return (FALSE);
1831 		}
1832 	}
1833 #endif /* DEV_NETMAP */
1834 
1835 	/* Sync the ring. The size is rx_process_limit or the first half */
1836 	if ((rxr->next_to_check + limit) <= rxr->num_desc) {
1837 		/* Non-wraparound */
1838 		numdesc = limit;
1839 		syncremain = 0;
1840 	} else {
1841 		/* Wraparound. Sync the first half. */
1842 		numdesc = rxr->num_desc - rxr->next_to_check;
1843 
1844 		/* Set the size of the last half */
1845 		syncremain = limit - numdesc;
1846 	}
1847 	bus_dmamap_sync(rxr->rxdma.dma_tag->dt_dmat,
1848 	    rxr->rxdma.dma_map,
1849 	    sizeof(union ixgbe_adv_rx_desc) * rxr->next_to_check,
1850 	    sizeof(union ixgbe_adv_rx_desc) * numdesc,
1851 	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
1852 
1853 	/*
1854 	 * The max number of loop is rx_process_limit. If discard_multidesc is
1855 	 * true, continue processing to not to send broken packet to the upper
1856 	 * layer.
1857 	 */
1858 	for (i = rxr->next_to_check;
1859 	     (loopcount < limit) || (discard_multidesc == true);) {
1860 
1861 		struct mbuf *sendmp, *mp;
1862 		struct mbuf *newmp;
1863 		u32         rsc, ptype;
1864 		u16         len;
1865 		u16         vtag = 0;
1866 		bool        eop;
1867 		bool        discard = false;
1868 
1869 		if (wraparound) {
1870 			/* Sync the last half. */
1871 			KASSERT(syncremain != 0);
1872 			numdesc = syncremain;
1873 			wraparound = false;
1874 		} else if (__predict_false(loopcount >= limit)) {
1875 			KASSERT(discard_multidesc == true);
1876 			numdesc = 1;
1877 		} else
1878 			numdesc = 0;
1879 
1880 		if (numdesc != 0)
1881 			bus_dmamap_sync(rxr->rxdma.dma_tag->dt_dmat,
1882 			    rxr->rxdma.dma_map, 0,
1883 			    sizeof(union ixgbe_adv_rx_desc) * numdesc,
1884 			    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
1885 
1886 		cur = &rxr->rx_base[i];
1887 		staterr = le32toh(cur->wb.upper.status_error);
1888 #ifdef RSS
1889 		pkt_info = le16toh(cur->wb.lower.lo_dword.hs_rss.pkt_info);
1890 #endif
1891 
1892 		if ((staterr & IXGBE_RXD_STAT_DD) == 0)
1893 			break;
1894 
1895 		loopcount++;
1896 		sendmp = newmp = NULL;
1897 		nbuf = NULL;
1898 		rsc = 0;
1899 		cur->wb.upper.status_error = 0;
1900 		rbuf = &rxr->rx_buffers[i];
1901 		mp = rbuf->buf;
1902 
1903 		len = le16toh(cur->wb.upper.length);
1904 		ptype = le32toh(cur->wb.lower.lo_dword.data) &
1905 		    IXGBE_RXDADV_PKTTYPE_MASK;
1906 		eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0);
1907 
1908 		/* Make sure bad packets are discarded */
1909 		if (eop && (staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) != 0) {
1910 #if __FreeBSD_version >= 1100036
1911 			if (adapter->feat_en & IXGBE_FEATURE_VF)
1912 				if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
1913 #endif
1914 			IXGBE_EVC_ADD(&rxr->rx_discarded, 1);
1915 			ixgbe_rx_discard(rxr, i);
1916 			discard_multidesc = false;
1917 			goto next_desc;
1918 		}
1919 
1920 		if (__predict_false(discard_multidesc))
1921 			discard = true;
1922 		else {
1923 			/* Pre-alloc new mbuf. */
1924 
1925 			if ((rbuf->fmp == NULL) &&
1926 			    eop && (len <= rx_copy_len)) {
1927 				/* For short packet. See below. */
1928 				sendmp = m_gethdr(M_NOWAIT, MT_DATA);
1929 				if (__predict_false(sendmp == NULL)) {
1930 					IXGBE_EVC_ADD(&rxr->no_mbuf, 1);
1931 					discard = true;
1932 				}
1933 			} else {
1934 				/* For long packet. */
1935 				newmp = ixgbe_getcl();
1936 				if (__predict_false(newmp == NULL)) {
1937 					IXGBE_EVC_ADD(&rxr->no_mbuf, 1);
1938 					discard = true;
1939 				}
1940 			}
1941 		}
1942 
1943 		if (__predict_false(discard)) {
1944 			/*
1945 			 * Descriptor initialization is already done by the
1946 			 * above code (cur->wb.upper.status_error = 0).
1947 			 * So, we can reuse current rbuf->buf for new packet.
1948 			 *
1949 			 * Rewrite the buffer addr, see comment in
1950 			 * ixgbe_rx_discard().
1951 			 */
1952 			cur->read.pkt_addr = rbuf->addr;
1953 			m_freem(rbuf->fmp);
1954 			rbuf->fmp = NULL;
1955 			if (!eop) {
1956 				/* Discard the entire packet. */
1957 				discard_multidesc = true;
1958 			} else
1959 				discard_multidesc = false;
1960 			goto next_desc;
1961 		}
1962 		discard_multidesc = false;
1963 
1964 		bus_dmamap_sync(rxr->ptag->dt_dmat, rbuf->pmap, 0,
1965 		    rbuf->buf->m_pkthdr.len, BUS_DMASYNC_POSTREAD);
1966 
1967 		/*
1968 		 * On 82599 which supports a hardware
1969 		 * LRO (called HW RSC), packets need
1970 		 * not be fragmented across sequential
1971 		 * descriptors, rather the next descriptor
1972 		 * is indicated in bits of the descriptor.
1973 		 * This also means that we might process
1974 		 * more than one packet at a time, something
1975 		 * that has never been true before, it
1976 		 * required eliminating global chain pointers
1977 		 * in favor of what we are doing here.  -jfv
1978 		 */
1979 		if (!eop) {
1980 			/*
1981 			 * Figure out the next descriptor
1982 			 * of this frame.
1983 			 */
1984 			if (rxr->hw_rsc == TRUE) {
1985 				rsc = ixgbe_rsc_count(cur);
1986 				rxr->rsc_num += (rsc - 1);
1987 			}
1988 			if (rsc) { /* Get hardware index */
1989 				nextp = ((staterr & IXGBE_RXDADV_NEXTP_MASK) >>
1990 				    IXGBE_RXDADV_NEXTP_SHIFT);
1991 			} else { /* Just sequential */
1992 				nextp = i + 1;
1993 				if (nextp == adapter->num_rx_desc)
1994 					nextp = 0;
1995 			}
1996 			nbuf = &rxr->rx_buffers[nextp];
1997 			prefetch(nbuf);
1998 		}
1999 		/*
2000 		 * Rather than using the fmp/lmp global pointers
2001 		 * we now keep the head of a packet chain in the
2002 		 * buffer struct and pass this along from one
2003 		 * descriptor to the next, until we get EOP.
2004 		 */
2005 		/*
2006 		 * See if there is a stored head
2007 		 * that determines what we are
2008 		 */
2009 		if (rbuf->fmp != NULL) {
2010 			/* Secondary frag */
2011 			sendmp = rbuf->fmp;
2012 
2013 			/* Update new (used in future) mbuf */
2014 			newmp->m_pkthdr.len = newmp->m_len = rxr->mbuf_sz;
2015 			IXGBE_M_ADJ(adapter, rxr, newmp);
2016 			rbuf->buf = newmp;
2017 			rbuf->fmp = NULL;
2018 
2019 			/* For secondary frag */
2020 			mp->m_len = len;
2021 			mp->m_flags &= ~M_PKTHDR;
2022 
2023 			/* For sendmp */
2024 			sendmp->m_pkthdr.len += mp->m_len;
2025 		} else {
2026 			/*
2027 			 * It's the first segment of a multi descriptor
2028 			 * packet or a single segment which contains a full
2029 			 * packet.
2030 			 */
2031 
2032 			if (eop && (len <= rx_copy_len)) {
2033 				/*
2034 				 * Optimize.  This might be a small packet, may
2035 				 * be just a TCP ACK. Copy into a new mbuf, and
2036 				 * Leave the old mbuf+cluster for re-use.
2037 				 */
2038 				sendmp->m_data += ETHER_ALIGN;
2039 				memcpy(mtod(sendmp, void *),
2040 				    mtod(mp, void *), len);
2041 				IXGBE_EVC_ADD(&rxr->rx_copies, 1);
2042 				rbuf->flags |= IXGBE_RX_COPY;
2043 			} else {
2044 				/* For long packet */
2045 
2046 				/* Update new (used in future) mbuf */
2047 				newmp->m_pkthdr.len = newmp->m_len
2048 				    = rxr->mbuf_sz;
2049 				IXGBE_M_ADJ(adapter, rxr, newmp);
2050 				rbuf->buf = newmp;
2051 				rbuf->fmp = NULL;
2052 
2053 				/* For sendmp */
2054 				sendmp = mp;
2055 			}
2056 
2057 			/* first desc of a non-ps chain */
2058 			sendmp->m_pkthdr.len = sendmp->m_len = len;
2059 		}
2060 		++processed;
2061 
2062 		/* Pass the head pointer on */
2063 		if (eop == 0) {
2064 			nbuf->fmp = sendmp;
2065 			sendmp = NULL;
2066 			mp->m_next = nbuf->buf;
2067 		} else { /* Sending this frame */
2068 			m_set_rcvif(sendmp, ifp);
2069 			++rxr->packets;
2070 			IXGBE_EVC_ADD(&rxr->rx_packets, 1);
2071 			/* capture data for AIM */
2072 			rxr->bytes += sendmp->m_pkthdr.len;
2073 			IXGBE_EVC_ADD(&rxr->rx_bytes, sendmp->m_pkthdr.len);
2074 			/* Process vlan info */
2075 			if ((rxr->vtag_strip) && (staterr & IXGBE_RXD_STAT_VP))
2076 				vtag = le16toh(cur->wb.upper.vlan);
2077 			if (vtag) {
2078 				vlan_set_tag(sendmp, vtag);
2079 			}
2080 			if ((ifp->if_capenable & IFCAP_RXCSUM) != 0) {
2081 				ixgbe_rx_checksum(staterr, sendmp, ptype,
2082 				   &adapter->stats.pf);
2083 			}
2084 
2085 #if 0 /* FreeBSD */
2086 			/*
2087 			 * In case of multiqueue, we have RXCSUM.PCSD bit set
2088 			 * and never cleared. This means we have RSS hash
2089 			 * available to be used.
2090 			 */
2091 			if (adapter->num_queues > 1) {
2092 				sendmp->m_pkthdr.flowid =
2093 				    le32toh(cur->wb.lower.hi_dword.rss);
2094 				switch (pkt_info & IXGBE_RXDADV_RSSTYPE_MASK) {
2095 				case IXGBE_RXDADV_RSSTYPE_IPV4:
2096 					M_HASHTYPE_SET(sendmp,
2097 					    M_HASHTYPE_RSS_IPV4);
2098 					break;
2099 				case IXGBE_RXDADV_RSSTYPE_IPV4_TCP:
2100 					M_HASHTYPE_SET(sendmp,
2101 					    M_HASHTYPE_RSS_TCP_IPV4);
2102 					break;
2103 				case IXGBE_RXDADV_RSSTYPE_IPV6:
2104 					M_HASHTYPE_SET(sendmp,
2105 					    M_HASHTYPE_RSS_IPV6);
2106 					break;
2107 				case IXGBE_RXDADV_RSSTYPE_IPV6_TCP:
2108 					M_HASHTYPE_SET(sendmp,
2109 					    M_HASHTYPE_RSS_TCP_IPV6);
2110 					break;
2111 				case IXGBE_RXDADV_RSSTYPE_IPV6_EX:
2112 					M_HASHTYPE_SET(sendmp,
2113 					    M_HASHTYPE_RSS_IPV6_EX);
2114 					break;
2115 				case IXGBE_RXDADV_RSSTYPE_IPV6_TCP_EX:
2116 					M_HASHTYPE_SET(sendmp,
2117 					    M_HASHTYPE_RSS_TCP_IPV6_EX);
2118 					break;
2119 #if __FreeBSD_version > 1100000
2120 				case IXGBE_RXDADV_RSSTYPE_IPV4_UDP:
2121 					M_HASHTYPE_SET(sendmp,
2122 					    M_HASHTYPE_RSS_UDP_IPV4);
2123 					break;
2124 				case IXGBE_RXDADV_RSSTYPE_IPV6_UDP:
2125 					M_HASHTYPE_SET(sendmp,
2126 					    M_HASHTYPE_RSS_UDP_IPV6);
2127 					break;
2128 				case IXGBE_RXDADV_RSSTYPE_IPV6_UDP_EX:
2129 					M_HASHTYPE_SET(sendmp,
2130 					    M_HASHTYPE_RSS_UDP_IPV6_EX);
2131 					break;
2132 #endif
2133 				default:
2134 					M_HASHTYPE_SET(sendmp,
2135 					    M_HASHTYPE_OPAQUE_HASH);
2136 				}
2137 			} else {
2138 				sendmp->m_pkthdr.flowid = que->msix;
2139 				M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE);
2140 			}
2141 #endif
2142 		}
2143 next_desc:
2144 		ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
2145 		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2146 
2147 		/* Advance our pointers to the next descriptor. */
2148 		if (++i == rxr->num_desc) {
2149 			wraparound = true;
2150 			i = 0;
2151 		}
2152 		rxr->next_to_check = i;
2153 
2154 		/* Now send to the stack or do LRO */
2155 		if (sendmp != NULL)
2156 			ixgbe_rx_input(rxr, ifp, sendmp, ptype);
2157 
2158 		/* Every 8 descriptors we go to refresh mbufs */
2159 		if (processed == 8) {
2160 			ixgbe_refresh_mbufs(rxr, i);
2161 			processed = 0;
2162 		}
2163 	}
2164 
2165 	/* Save the current status */
2166 	rxr->discard_multidesc = discard_multidesc;
2167 
2168 	/* Refresh any remaining buf structs */
2169 	if (ixgbe_rx_unrefreshed(rxr))
2170 		ixgbe_refresh_mbufs(rxr, i);
2171 
2172 	IXGBE_RX_UNLOCK(rxr);
2173 
2174 #ifdef LRO
2175 	/*
2176 	 * Flush any outstanding LRO work
2177 	 */
2178 	tcp_lro_flush_all(lro);
2179 #endif /* LRO */
2180 
2181 	/*
2182 	 * Still have cleaning to do?
2183 	 */
2184 	if ((staterr & IXGBE_RXD_STAT_DD) != 0)
2185 		return (TRUE);
2186 
2187 	return (FALSE);
2188 } /* ixgbe_rxeof */
2189 
2190 
2191 /************************************************************************
2192  * ixgbe_rx_checksum
2193  *
2194  *   Verify that the hardware indicated that the checksum is valid.
2195  *   Inform the stack about the status of checksum so that stack
2196  *   doesn't spend time verifying the checksum.
2197  ************************************************************************/
2198 static void
2199 ixgbe_rx_checksum(u32 staterr, struct mbuf * mp, u32 ptype,
2200     struct ixgbe_hw_stats *stats)
2201 {
2202 	u16  status = (u16)staterr;
2203 	u8   errors = (u8)(staterr >> 24);
2204 #if 0
2205 	bool sctp = false;
2206 
2207 	if ((ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
2208 	    (ptype & IXGBE_RXDADV_PKTTYPE_SCTP) != 0)
2209 		sctp = true;
2210 #endif
2211 
2212 	/* IPv4 checksum */
2213 	if (status & IXGBE_RXD_STAT_IPCS) {
2214 		IXGBE_EVC_ADD(&stats->ipcs, 1);
2215 		if (!(errors & IXGBE_RXD_ERR_IPE)) {
2216 			/* IP Checksum Good */
2217 			mp->m_pkthdr.csum_flags = M_CSUM_IPv4;
2218 		} else {
2219 			IXGBE_EVC_ADD(&stats->ipcs_bad, 1);
2220 			mp->m_pkthdr.csum_flags = M_CSUM_IPv4|M_CSUM_IPv4_BAD;
2221 		}
2222 	}
2223 	/* TCP/UDP/SCTP checksum */
2224 	if (status & IXGBE_RXD_STAT_L4CS) {
2225 		IXGBE_EVC_ADD(&stats->l4cs, 1);
2226 		int type = M_CSUM_TCPv4|M_CSUM_TCPv6|M_CSUM_UDPv4|M_CSUM_UDPv6;
2227 		if (!(errors & IXGBE_RXD_ERR_TCPE)) {
2228 			mp->m_pkthdr.csum_flags |= type;
2229 		} else {
2230 			IXGBE_EVC_ADD(&stats->l4cs_bad, 1);
2231 			mp->m_pkthdr.csum_flags |= type | M_CSUM_TCP_UDP_BAD;
2232 		}
2233 	}
2234 } /* ixgbe_rx_checksum */
2235 
2236 /************************************************************************
2237  * ixgbe_dma_malloc
2238  ************************************************************************/
2239 int
2240 ixgbe_dma_malloc(struct adapter *adapter, const bus_size_t size,
2241 		struct ixgbe_dma_alloc *dma, const int mapflags)
2242 {
2243 	device_t dev = adapter->dev;
2244 	int      r, rsegs;
2245 
2246 	r = ixgbe_dma_tag_create(
2247 	     /*      parent */ adapter->osdep.dmat,
2248 	     /*   alignment */ DBA_ALIGN,
2249 	     /*      bounds */ 0,
2250 	     /*     maxsize */ size,
2251 	     /*   nsegments */ 1,
2252 	     /*  maxsegsize */ size,
2253 	     /*       flags */ BUS_DMA_ALLOCNOW,
2254 			       &dma->dma_tag);
2255 	if (r != 0) {
2256 		aprint_error_dev(dev,
2257 		    "%s: ixgbe_dma_tag_create failed; error %d\n", __func__,
2258 		    r);
2259 		goto fail_0;
2260 	}
2261 
2262 	r = bus_dmamem_alloc(dma->dma_tag->dt_dmat, size,
2263 	    dma->dma_tag->dt_alignment, dma->dma_tag->dt_boundary,
2264 	    &dma->dma_seg, 1, &rsegs, BUS_DMA_NOWAIT);
2265 	if (r != 0) {
2266 		aprint_error_dev(dev,
2267 		    "%s: bus_dmamem_alloc failed; error %d\n", __func__, r);
2268 		goto fail_1;
2269 	}
2270 
2271 	r = bus_dmamem_map(dma->dma_tag->dt_dmat, &dma->dma_seg, rsegs,
2272 	    size, &dma->dma_vaddr, BUS_DMA_NOWAIT | BUS_DMA_COHERENT);
2273 	if (r != 0) {
2274 		aprint_error_dev(dev, "%s: bus_dmamem_map failed; error %d\n",
2275 		    __func__, r);
2276 		goto fail_2;
2277 	}
2278 
2279 	r = ixgbe_dmamap_create(dma->dma_tag, 0, &dma->dma_map);
2280 	if (r != 0) {
2281 		aprint_error_dev(dev, "%s: bus_dmamem_map failed; error %d\n",
2282 		    __func__, r);
2283 		goto fail_3;
2284 	}
2285 
2286 	r = bus_dmamap_load(dma->dma_tag->dt_dmat, dma->dma_map,
2287 	    dma->dma_vaddr, size, NULL, mapflags | BUS_DMA_NOWAIT);
2288 	if (r != 0) {
2289 		aprint_error_dev(dev, "%s: bus_dmamap_load failed; error %d\n",
2290 		    __func__, r);
2291 		goto fail_4;
2292 	}
2293 	dma->dma_paddr = dma->dma_map->dm_segs[0].ds_addr;
2294 	dma->dma_size = size;
2295 	return 0;
2296 fail_4:
2297 	ixgbe_dmamap_destroy(dma->dma_tag, dma->dma_map);
2298 fail_3:
2299 	bus_dmamem_unmap(dma->dma_tag->dt_dmat, dma->dma_vaddr, size);
2300 fail_2:
2301 	bus_dmamem_free(dma->dma_tag->dt_dmat, &dma->dma_seg, rsegs);
2302 fail_1:
2303 	ixgbe_dma_tag_destroy(dma->dma_tag);
2304 fail_0:
2305 
2306 	return (r);
2307 } /* ixgbe_dma_malloc */
2308 
2309 /************************************************************************
2310  * ixgbe_dma_free
2311  ************************************************************************/
2312 void
2313 ixgbe_dma_free(struct adapter *adapter, struct ixgbe_dma_alloc *dma)
2314 {
2315 	bus_dmamap_sync(dma->dma_tag->dt_dmat, dma->dma_map, 0, dma->dma_size,
2316 	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2317 	ixgbe_dmamap_unload(dma->dma_tag, dma->dma_map);
2318 	bus_dmamem_unmap(dma->dma_tag->dt_dmat, dma->dma_vaddr, dma->dma_size);
2319 	bus_dmamem_free(dma->dma_tag->dt_dmat, &dma->dma_seg, 1);
2320 	ixgbe_dma_tag_destroy(dma->dma_tag);
2321 } /* ixgbe_dma_free */
2322 
2323 
2324 /************************************************************************
2325  * ixgbe_allocate_queues
2326  *
2327  *   Allocate memory for the transmit and receive rings, and then
2328  *   the descriptors associated with each, called only once at attach.
2329  ************************************************************************/
2330 int
2331 ixgbe_allocate_queues(struct adapter *adapter)
2332 {
2333 	device_t	dev = adapter->dev;
2334 	struct ix_queue	*que;
2335 	struct tx_ring	*txr;
2336 	struct rx_ring	*rxr;
2337 	int             rsize, tsize, error = IXGBE_SUCCESS;
2338 	int             txconf = 0, rxconf = 0;
2339 
2340 	/* First, allocate the top level queue structs */
2341 	adapter->queues = (struct ix_queue *)malloc(sizeof(struct ix_queue) *
2342 	    adapter->num_queues, M_DEVBUF, M_WAITOK | M_ZERO);
2343 
2344 	/* Second, allocate the TX ring struct memory */
2345 	adapter->tx_rings = malloc(sizeof(struct tx_ring) *
2346 	    adapter->num_queues, M_DEVBUF, M_WAITOK | M_ZERO);
2347 
2348 	/* Third, allocate the RX ring */
2349 	adapter->rx_rings = (struct rx_ring *)malloc(sizeof(struct rx_ring) *
2350 	    adapter->num_queues, M_DEVBUF, M_WAITOK | M_ZERO);
2351 
2352 	/* For the ring itself */
2353 	tsize = roundup2(adapter->num_tx_desc * sizeof(union ixgbe_adv_tx_desc),
2354 	    DBA_ALIGN);
2355 
2356 	/*
2357 	 * Now set up the TX queues, txconf is needed to handle the
2358 	 * possibility that things fail midcourse and we need to
2359 	 * undo memory gracefully
2360 	 */
2361 	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
2362 		/* Set up some basics */
2363 		txr = &adapter->tx_rings[i];
2364 		txr->adapter = adapter;
2365 		txr->txr_interq = NULL;
2366 		/* In case SR-IOV is enabled, align the index properly */
2367 #ifdef PCI_IOV
2368 		txr->me = ixgbe_vf_que_index(adapter->iov_mode, adapter->pool,
2369 		    i);
2370 #else
2371 		txr->me = i;
2372 #endif
2373 		txr->num_desc = adapter->num_tx_desc;
2374 
2375 		/* Initialize the TX side lock */
2376 		mutex_init(&txr->tx_mtx, MUTEX_DEFAULT, IPL_NET);
2377 
2378 		if (ixgbe_dma_malloc(adapter, tsize, &txr->txdma,
2379 		    BUS_DMA_NOWAIT)) {
2380 			aprint_error_dev(dev,
2381 			    "Unable to allocate TX Descriptor memory\n");
2382 			error = ENOMEM;
2383 			goto err_tx_desc;
2384 		}
2385 		txr->tx_base = (union ixgbe_adv_tx_desc *)txr->txdma.dma_vaddr;
2386 		bzero((void *)txr->tx_base, tsize);
2387 
2388 		/* Now allocate transmit buffers for the ring */
2389 		if (ixgbe_allocate_transmit_buffers(txr)) {
2390 			aprint_error_dev(dev,
2391 			    "Critical Failure setting up transmit buffers\n");
2392 			error = ENOMEM;
2393 			goto err_tx_desc;
2394 		}
2395 		if (!(adapter->feat_en & IXGBE_FEATURE_LEGACY_TX)) {
2396 			/* Allocate a buf ring */
2397 			txr->txr_interq = pcq_create(IXGBE_BR_SIZE, KM_SLEEP);
2398 			if (txr->txr_interq == NULL) {
2399 				aprint_error_dev(dev,
2400 				    "Critical Failure setting up buf ring\n");
2401 				error = ENOMEM;
2402 				goto err_tx_desc;
2403 			}
2404 		}
2405 	}
2406 
2407 	/*
2408 	 * Next the RX queues...
2409 	 */
2410 	rsize = roundup2(adapter->num_rx_desc * sizeof(union ixgbe_adv_rx_desc),
2411 	    DBA_ALIGN);
2412 	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
2413 		rxr = &adapter->rx_rings[i];
2414 		/* Set up some basics */
2415 		rxr->adapter = adapter;
2416 #ifdef PCI_IOV
2417 		/* In case SR-IOV is enabled, align the index properly */
2418 		rxr->me = ixgbe_vf_que_index(adapter->iov_mode, adapter->pool,
2419 		    i);
2420 #else
2421 		rxr->me = i;
2422 #endif
2423 		rxr->num_desc = adapter->num_rx_desc;
2424 
2425 		/* Initialize the RX side lock */
2426 		mutex_init(&rxr->rx_mtx, MUTEX_DEFAULT, IPL_NET);
2427 
2428 		if (ixgbe_dma_malloc(adapter, rsize, &rxr->rxdma,
2429 		    BUS_DMA_NOWAIT)) {
2430 			aprint_error_dev(dev,
2431 			    "Unable to allocate RxDescriptor memory\n");
2432 			error = ENOMEM;
2433 			goto err_rx_desc;
2434 		}
2435 		rxr->rx_base = (union ixgbe_adv_rx_desc *)rxr->rxdma.dma_vaddr;
2436 		bzero((void *)rxr->rx_base, rsize);
2437 
2438 		/* Allocate receive buffers for the ring */
2439 		if (ixgbe_allocate_receive_buffers(rxr)) {
2440 			aprint_error_dev(dev,
2441 			    "Critical Failure setting up receive buffers\n");
2442 			error = ENOMEM;
2443 			goto err_rx_desc;
2444 		}
2445 	}
2446 
2447 	/*
2448 	 * Finally set up the queue holding structs
2449 	 */
2450 	for (int i = 0; i < adapter->num_queues; i++) {
2451 		que = &adapter->queues[i];
2452 		que->adapter = adapter;
2453 		que->me = i;
2454 		que->txr = &adapter->tx_rings[i];
2455 		que->rxr = &adapter->rx_rings[i];
2456 
2457 		mutex_init(&que->dc_mtx, MUTEX_DEFAULT, IPL_NET);
2458 		que->disabled_count = 0;
2459 	}
2460 
2461 	return (0);
2462 
2463 err_rx_desc:
2464 	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
2465 		ixgbe_dma_free(adapter, &rxr->rxdma);
2466 err_tx_desc:
2467 	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
2468 		ixgbe_dma_free(adapter, &txr->txdma);
2469 	free(adapter->rx_rings, M_DEVBUF);
2470 	free(adapter->tx_rings, M_DEVBUF);
2471 	free(adapter->queues, M_DEVBUF);
2472 	return (error);
2473 } /* ixgbe_allocate_queues */
2474 
2475 /************************************************************************
2476  * ixgbe_free_queues
2477  *
2478  *   Free descriptors for the transmit and receive rings, and then
2479  *   the memory associated with each.
2480  ************************************************************************/
2481 void
2482 ixgbe_free_queues(struct adapter *adapter)
2483 {
2484 	struct ix_queue *que;
2485 	int i;
2486 
2487 	ixgbe_free_transmit_structures(adapter);
2488 	ixgbe_free_receive_structures(adapter);
2489 	for (i = 0; i < adapter->num_queues; i++) {
2490 		que = &adapter->queues[i];
2491 		mutex_destroy(&que->dc_mtx);
2492 	}
2493 	free(adapter->queues, M_DEVBUF);
2494 } /* ixgbe_free_queues */
2495