xref: /openbsd-src/sys/dev/pci/if_ix.c (revision 7c0ec4b8992567abb1e1536622dc789a9a39d9f1)
1 /*	$OpenBSD: if_ix.c,v 1.217 2024/09/04 07:54:52 mglocker Exp $	*/
2 
3 /******************************************************************************
4 
5   Copyright (c) 2001-2013, Intel Corporation
6   All rights reserved.
7 
8   Redistribution and use in source and binary forms, with or without
9   modification, are permitted provided that the following conditions are met:
10 
11    1. Redistributions of source code must retain the above copyright notice,
12       this list of conditions and the following disclaimer.
13 
14    2. Redistributions in binary form must reproduce the above copyright
15       notice, this list of conditions and the following disclaimer in the
16       documentation and/or other materials provided with the distribution.
17 
18    3. Neither the name of the Intel Corporation nor the names of its
19       contributors may be used to endorse or promote products derived from
20       this software without specific prior written permission.
21 
22   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
23   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
26   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
27   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
28   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
29   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
30   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
31   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
32   POSSIBILITY OF SUCH DAMAGE.
33 
34 ******************************************************************************/
35 /* FreeBSD: src/sys/dev/ixgbe/ixgbe.c 251964 Jun 18 21:28:19 2013 UTC */
36 
37 #include <dev/pci/if_ix.h>
38 #include <dev/pci/ixgbe_type.h>
39 
40 /*
41  * Our TCP/IP Stack is unable to handle packets greater than MAXMCLBYTES.
42  * This interface is unable to handle packets greater than IXGBE_TSO_SIZE.
43  */
44 CTASSERT(MAXMCLBYTES <= IXGBE_TSO_SIZE);
45 
46 /*********************************************************************
47  *  Driver version
48  *********************************************************************/
49 /* char ixgbe_driver_version[] = "2.5.13"; */
50 
51 /*********************************************************************
52  *  PCI Device ID Table
53  *
54  *  Used by probe to select devices to load on
55  *********************************************************************/
56 
57 const struct pci_matchid ixgbe_devices[] = {
58 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82598 },
59 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82598_BX },
60 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82598AF_DUAL },
61 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82598AF },
62 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82598AT },
63 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82598AT2 },
64 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82598AT_DUAL },
65 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82598EB_CX4 },
66 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82598EB_CX4_DUAL },
67 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82598EB_XF_LR },
68 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82598EB_SFP },
69 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82598_SR_DUAL_EM },
70 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82598_DA_DUAL },
71 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82599_KX4 },
72 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82599_KX4_MEZZ },
73 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82599_XAUI },
74 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82599_COMBO_BP },
75 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82599_BPLANE_FCOE },
76 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82599_CX4 },
77 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82599_T3_LOM },
78 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82599_SFP },
79 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82599_SFP_EM },
80 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82599_SFP_SF_QP },
81 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82599_SFP_SF2 },
82 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82599_SFP_FCOE },
83 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82599EN_SFP },
84 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82599_QSFP_SF_QP },
85 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_X540T },
86 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_X540T1 },
87 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_X550T },
88 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_X550T1 },
89 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_X550EM_X_KX4 },
90 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_X550EM_X_KR },
91 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_X550EM_X_SFP },
92 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_X550EM_X_10G_T },
93 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_X550EM_X_1G_T },
94 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_X550EM_A_KR },
95 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_X550EM_A_KR_L },
96 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_X550EM_A_SFP_N },
97 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_X550EM_A_SFP },
98 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_X550EM_A_SGMII },
99 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_X550EM_A_SGMII_L },
100 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_X550EM_A_10G_T },
101 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_X550EM_A_1G_T },
102 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_X550EM_A_1G_T_L }
103 };
104 
105 /*********************************************************************
106  *  Function prototypes
107  *********************************************************************/
108 int	ixgbe_probe(struct device *, void *, void *);
109 void	ixgbe_attach(struct device *, struct device *, void *);
110 int	ixgbe_detach(struct device *, int);
111 int	ixgbe_activate(struct device *, int);
112 void	ixgbe_start(struct ifqueue *);
113 int	ixgbe_ioctl(struct ifnet *, u_long, caddr_t);
114 int	ixgbe_rxrinfo(struct ix_softc *, struct if_rxrinfo *);
115 int	ixgbe_get_sffpage(struct ix_softc *, struct if_sffpage *);
116 void	ixgbe_watchdog(struct ifnet *);
117 void	ixgbe_init(void *);
118 void	ixgbe_stop(void *);
119 void	ixgbe_media_status(struct ifnet *, struct ifmediareq *);
120 int	ixgbe_media_change(struct ifnet *);
121 void	ixgbe_identify_hardware(struct ix_softc *);
122 int	ixgbe_allocate_pci_resources(struct ix_softc *);
123 int	ixgbe_allocate_legacy(struct ix_softc *);
124 int	ixgbe_allocate_msix(struct ix_softc *);
125 void	ixgbe_setup_msix(struct ix_softc *);
126 int	ixgbe_allocate_queues(struct ix_softc *);
127 void	ixgbe_free_pci_resources(struct ix_softc *);
128 void	ixgbe_setup_interface(struct ix_softc *);
129 void	ixgbe_config_gpie(struct ix_softc *);
130 void	ixgbe_config_delay_values(struct ix_softc *);
131 void	ixgbe_add_media_types(struct ix_softc *);
132 void	ixgbe_config_link(struct ix_softc *);
133 
134 int	ixgbe_allocate_transmit_buffers(struct ix_txring *);
135 int	ixgbe_setup_transmit_structures(struct ix_softc *);
136 int	ixgbe_setup_transmit_ring(struct ix_txring *);
137 void	ixgbe_initialize_transmit_units(struct ix_softc *);
138 void	ixgbe_free_transmit_structures(struct ix_softc *);
139 void	ixgbe_free_transmit_buffers(struct ix_txring *);
140 
141 int	ixgbe_allocate_receive_buffers(struct ix_rxring *);
142 int	ixgbe_setup_receive_structures(struct ix_softc *);
143 int	ixgbe_setup_receive_ring(struct ix_rxring *);
144 void	ixgbe_initialize_receive_units(struct ix_softc *);
145 void	ixgbe_free_receive_structures(struct ix_softc *);
146 void	ixgbe_free_receive_buffers(struct ix_rxring *);
147 void	ixgbe_initialize_rss_mapping(struct ix_softc *);
148 int	ixgbe_rxfill(struct ix_rxring *);
149 void	ixgbe_rxrefill(void *);
150 
151 int	ixgbe_intr(struct ix_softc *sc);
152 void	ixgbe_enable_intr(struct ix_softc *);
153 void	ixgbe_disable_intr(struct ix_softc *);
154 int	ixgbe_txeof(struct ix_txring *);
155 int	ixgbe_rxeof(struct ix_rxring *);
156 void	ixgbe_rx_offload(uint32_t, uint16_t, struct mbuf *);
157 void	ixgbe_iff(struct ix_softc *);
158 void	ixgbe_map_queue_statistics(struct ix_softc *);
159 void	ixgbe_update_link_status(struct ix_softc *);
160 int	ixgbe_get_buf(struct ix_rxring *, int);
161 int	ixgbe_encap(struct ix_txring *, struct mbuf *);
162 int	ixgbe_dma_malloc(struct ix_softc *, bus_size_t,
163 		    struct ixgbe_dma_alloc *, int);
164 void	ixgbe_dma_free(struct ix_softc *, struct ixgbe_dma_alloc *);
165 static int
166 	ixgbe_tx_ctx_setup(struct ix_txring *, struct mbuf *, uint32_t *,
167 	    uint32_t *);
168 void	ixgbe_set_ivar(struct ix_softc *, uint8_t, uint8_t, int8_t);
169 void	ixgbe_configure_ivars(struct ix_softc *);
170 uint8_t	*ixgbe_mc_array_itr(struct ixgbe_hw *, uint8_t **, uint32_t *);
171 
172 void	ixgbe_setup_vlan_hw_support(struct ix_softc *);
173 
174 /* Support for pluggable optic modules */
175 void	ixgbe_handle_mod(struct ix_softc *);
176 void	ixgbe_handle_msf(struct ix_softc *);
177 void	ixgbe_handle_phy(struct ix_softc *);
178 
179 /* Legacy (single vector interrupt handler */
180 int	ixgbe_legacy_intr(void *);
181 void	ixgbe_enable_queue(struct ix_softc *, uint32_t);
182 void	ixgbe_enable_queues(struct ix_softc *);
183 void	ixgbe_disable_queue(struct ix_softc *, uint32_t);
184 
185 /* MSI-X (multiple vectors interrupt handlers)  */
186 int	ixgbe_link_intr(void *);
187 int	ixgbe_queue_intr(void *);
188 
189 #if NKSTAT > 0
190 static void	ix_kstats(struct ix_softc *);
191 static void	ix_rxq_kstats(struct ix_softc *, struct ix_rxring *);
192 static void	ix_txq_kstats(struct ix_softc *, struct ix_txring *);
193 static void	ix_kstats_tick(void *);
194 #endif
195 
196 /*********************************************************************
197  *  OpenBSD Device Interface Entry Points
198  *********************************************************************/
199 
200 struct cfdriver ix_cd = {
201 	NULL, "ix", DV_IFNET
202 };
203 
204 const struct cfattach ix_ca = {
205 	sizeof(struct ix_softc), ixgbe_probe, ixgbe_attach, ixgbe_detach,
206 	ixgbe_activate
207 };
208 
209 int ixgbe_smart_speed = ixgbe_smart_speed_on;
210 int ixgbe_enable_msix = 1;
211 
212 /*********************************************************************
213  *  Device identification routine
214  *
215  *  ixgbe_probe determines if the driver should be loaded on
216  *  adapter based on PCI vendor/device id of the adapter.
217  *
218  *  return 0 on success, positive on failure
219  *********************************************************************/
220 
221 int
222 ixgbe_probe(struct device *parent, void *match, void *aux)
223 {
224 	INIT_DEBUGOUT("ixgbe_probe: begin");
225 
226 	return (pci_matchbyid((struct pci_attach_args *)aux, ixgbe_devices,
227 	    nitems(ixgbe_devices)));
228 }
229 
230 /*********************************************************************
231  *  Device initialization routine
232  *
233  *  The attach entry point is called when the driver is being loaded.
234  *  This routine identifies the type of hardware, allocates all resources
235  *  and initializes the hardware.
236  *
237  *  return 0 on success, positive on failure
238  *********************************************************************/
239 
240 void
241 ixgbe_attach(struct device *parent, struct device *self, void *aux)
242 {
243 	struct pci_attach_args	*pa = (struct pci_attach_args *)aux;
244 	struct ix_softc		*sc = (struct ix_softc *)self;
245 	int			 error = 0;
246 	uint16_t		 csum;
247 	uint32_t			 ctrl_ext;
248 	struct ixgbe_hw		*hw = &sc->hw;
249 
250 	INIT_DEBUGOUT("ixgbe_attach: begin");
251 
252 	sc->osdep.os_sc = sc;
253 	sc->osdep.os_pa = *pa;
254 
255 	rw_init(&sc->sfflock, "ixsff");
256 
257 #if NKSTAT > 0
258 	ix_kstats(sc);
259 #endif
260 
261 	/* Determine hardware revision */
262 	ixgbe_identify_hardware(sc);
263 
264 	/* Indicate to RX setup to use Jumbo Clusters */
265 	sc->num_tx_desc = DEFAULT_TXD;
266 	sc->num_rx_desc = DEFAULT_RXD;
267 
268 	/* Do base PCI setup - map BAR0 */
269 	if (ixgbe_allocate_pci_resources(sc))
270 		goto err_out;
271 
272 	/* Allocate our TX/RX Queues */
273 	if (ixgbe_allocate_queues(sc))
274 		goto err_out;
275 
276 	/* Allocate multicast array memory. */
277 	sc->mta = mallocarray(IXGBE_ETH_LENGTH_OF_ADDRESS,
278 	    MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
279 	if (sc->mta == NULL) {
280 		printf(": Can not allocate multicast setup array\n");
281 		goto err_late;
282 	}
283 
284 	/* Initialize the shared code */
285 	error = ixgbe_init_shared_code(hw);
286 	if (error) {
287 		printf(": Unable to initialize the shared code\n");
288 		goto err_late;
289 	}
290 
291 	/* Make sure we have a good EEPROM before we read from it */
292 	if (sc->hw.eeprom.ops.validate_checksum(&sc->hw, &csum) < 0) {
293 		printf(": The EEPROM Checksum Is Not Valid\n");
294 		goto err_late;
295 	}
296 
297 	error = ixgbe_init_hw(hw);
298 	if (error == IXGBE_ERR_EEPROM_VERSION) {
299 		printf(": This device is a pre-production adapter/"
300 		    "LOM.  Please be aware there may be issues associated "
301 		    "with your hardware.\nIf you are experiencing problems "
302 		    "please contact your Intel or hardware representative "
303 		    "who provided you with this hardware.\n");
304 	} else if (error && (error != IXGBE_ERR_SFP_NOT_PRESENT &&
305 	    error != IXGBE_ERR_SFP_NOT_SUPPORTED)) {
306 		printf(": Hardware Initialization Failure\n");
307 		goto err_late;
308 	}
309 
310 	bcopy(sc->hw.mac.addr, sc->arpcom.ac_enaddr,
311 	    IXGBE_ETH_LENGTH_OF_ADDRESS);
312 
313 	if (sc->sc_intrmap)
314 		error = ixgbe_allocate_msix(sc);
315 	else
316 		error = ixgbe_allocate_legacy(sc);
317 	if (error)
318 		goto err_late;
319 
320 	/* Enable the optics for 82599 SFP+ fiber */
321 	if (sc->hw.mac.ops.enable_tx_laser)
322 		sc->hw.mac.ops.enable_tx_laser(&sc->hw);
323 
324 	/* Enable power to the phy */
325 	if (hw->phy.ops.set_phy_power)
326 		hw->phy.ops.set_phy_power(&sc->hw, TRUE);
327 
328 	/* Setup OS specific network interface */
329 	ixgbe_setup_interface(sc);
330 
331 	/* Get the PCI-E bus info and determine LAN ID */
332 	hw->mac.ops.get_bus_info(hw);
333 
334 	/* Set an initial default flow control value */
335 	sc->fc = ixgbe_fc_full;
336 
337 	/* let hardware know driver is loaded */
338 	ctrl_ext = IXGBE_READ_REG(&sc->hw, IXGBE_CTRL_EXT);
339 	ctrl_ext |= IXGBE_CTRL_EXT_DRV_LOAD;
340 	IXGBE_WRITE_REG(&sc->hw, IXGBE_CTRL_EXT, ctrl_ext);
341 
342 	printf(", address %s\n", ether_sprintf(sc->hw.mac.addr));
343 
344 	INIT_DEBUGOUT("ixgbe_attach: end");
345 	return;
346 
347 err_late:
348 	ixgbe_free_transmit_structures(sc);
349 	ixgbe_free_receive_structures(sc);
350 err_out:
351 	ixgbe_free_pci_resources(sc);
352 	free(sc->mta, M_DEVBUF, IXGBE_ETH_LENGTH_OF_ADDRESS *
353 	    MAX_NUM_MULTICAST_ADDRESSES);
354 }
355 
356 /*********************************************************************
357  *  Device removal routine
358  *
359  *  The detach entry point is called when the driver is being removed.
360  *  This routine stops the adapter and deallocates all the resources
361  *  that were allocated for driver operation.
362  *
363  *  return 0 on success, positive on failure
364  *********************************************************************/
365 
366 int
367 ixgbe_detach(struct device *self, int flags)
368 {
369 	struct ix_softc *sc = (struct ix_softc *)self;
370 	struct ifnet *ifp = &sc->arpcom.ac_if;
371 	uint32_t	ctrl_ext;
372 
373 	INIT_DEBUGOUT("ixgbe_detach: begin");
374 
375 	ixgbe_stop(sc);
376 
377 	/* let hardware know driver is unloading */
378 	ctrl_ext = IXGBE_READ_REG(&sc->hw, IXGBE_CTRL_EXT);
379 	ctrl_ext &= ~IXGBE_CTRL_EXT_DRV_LOAD;
380 	IXGBE_WRITE_REG(&sc->hw, IXGBE_CTRL_EXT, ctrl_ext);
381 
382 	ether_ifdetach(ifp);
383 	if_detach(ifp);
384 
385 	ixgbe_free_pci_resources(sc);
386 
387 	ixgbe_free_transmit_structures(sc);
388 	ixgbe_free_receive_structures(sc);
389 	free(sc->mta, M_DEVBUF, IXGBE_ETH_LENGTH_OF_ADDRESS *
390 	    MAX_NUM_MULTICAST_ADDRESSES);
391 
392 	/* XXX kstat */
393 
394 	return (0);
395 }
396 
397 int
398 ixgbe_activate(struct device *self, int act)
399 {
400 	struct ix_softc *sc = (struct ix_softc *)self;
401 	struct ifnet *ifp = &sc->arpcom.ac_if;
402 	struct ixgbe_hw		*hw = &sc->hw;
403 	uint32_t			 ctrl_ext;
404 
405 	switch (act) {
406 	case DVACT_QUIESCE:
407 		if (ifp->if_flags & IFF_RUNNING)
408 			ixgbe_stop(sc);
409 		break;
410 	case DVACT_RESUME:
411 		ixgbe_init_hw(hw);
412 
413 		/* Enable the optics for 82599 SFP+ fiber */
414 		if (sc->hw.mac.ops.enable_tx_laser)
415 			sc->hw.mac.ops.enable_tx_laser(&sc->hw);
416 
417 		/* Enable power to the phy */
418 		if (hw->phy.ops.set_phy_power)
419 			hw->phy.ops.set_phy_power(&sc->hw, TRUE);
420 
421 		/* Get the PCI-E bus info and determine LAN ID */
422 		hw->mac.ops.get_bus_info(hw);
423 
424 		/* let hardware know driver is loaded */
425 		ctrl_ext = IXGBE_READ_REG(&sc->hw, IXGBE_CTRL_EXT);
426 		ctrl_ext |= IXGBE_CTRL_EXT_DRV_LOAD;
427 		IXGBE_WRITE_REG(&sc->hw, IXGBE_CTRL_EXT, ctrl_ext);
428 
429 		if (ifp->if_flags & IFF_UP)
430 			ixgbe_init(sc);
431 		break;
432 	}
433 	return (0);
434 }
435 
436 /*********************************************************************
437  *  Transmit entry point
438  *
439  *  ixgbe_start is called by the stack to initiate a transmit.
440  *  The driver will remain in this routine as long as there are
441  *  packets to transmit and transmit resources are available.
442  *  In case resources are not available stack is notified and
443  *  the packet is requeued.
444  **********************************************************************/
445 
446 void
447 ixgbe_start(struct ifqueue *ifq)
448 {
449 	struct ifnet		*ifp = ifq->ifq_if;
450 	struct ix_softc		*sc = ifp->if_softc;
451 	struct ix_txring	*txr = ifq->ifq_softc;
452 	struct mbuf  		*m_head;
453 	unsigned int		 head, free, used;
454 	int			 post = 0;
455 
456 	if (!sc->link_up)
457 		return;
458 
459 	head = txr->next_avail_desc;
460 	free = txr->next_to_clean;
461 	if (free <= head)
462 		free += sc->num_tx_desc;
463 	free -= head;
464 
465 	membar_consumer();
466 
467 	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
468 	    0, txr->txdma.dma_map->dm_mapsize,
469 	    BUS_DMASYNC_POSTWRITE);
470 
471 	for (;;) {
472 		/* Check that we have the minimal number of TX descriptors. */
473 		if (free <= IXGBE_TX_OP_THRESHOLD) {
474 			ifq_set_oactive(ifq);
475 			break;
476 		}
477 
478 		m_head = ifq_dequeue(ifq);
479 		if (m_head == NULL)
480 			break;
481 
482 		used = ixgbe_encap(txr, m_head);
483 		if (used == 0) {
484 			m_freem(m_head);
485 			continue;
486 		}
487 
488 		free -= used;
489 
490 #if NBPFILTER > 0
491 		if (ifp->if_bpf)
492 			bpf_mtap_ether(ifp->if_bpf, m_head, BPF_DIRECTION_OUT);
493 #endif
494 
495 		/* Set timeout in case hardware has problems transmitting */
496 		txr->watchdog_timer = IXGBE_TX_TIMEOUT;
497 		ifp->if_timer = IXGBE_TX_TIMEOUT;
498 
499 		post = 1;
500 	}
501 
502 	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
503 	    0, txr->txdma.dma_map->dm_mapsize,
504 	    BUS_DMASYNC_PREWRITE);
505 
506 	/*
507 	 * Advance the Transmit Descriptor Tail (Tdt), this tells the
508 	 * hardware that this frame is available to transmit.
509 	 */
510 	if (post)
511 		IXGBE_WRITE_REG(&sc->hw, IXGBE_TDT(txr->me),
512 		    txr->next_avail_desc);
513 }
514 
515 /*********************************************************************
516  *  Ioctl entry point
517  *
518  *  ixgbe_ioctl is called when the user wants to configure the
519  *  interface.
520  *
521  *  return 0 on success, positive on failure
522  **********************************************************************/
523 
524 int
525 ixgbe_ioctl(struct ifnet * ifp, u_long command, caddr_t data)
526 {
527 	struct ix_softc	*sc = ifp->if_softc;
528 	struct ifreq	*ifr = (struct ifreq *) data;
529 	int		s, error = 0;
530 
531 	s = splnet();
532 
533 	switch (command) {
534 	case SIOCSIFADDR:
535 		IOCTL_DEBUGOUT("ioctl: SIOCxIFADDR (Get/Set Interface Addr)");
536 		ifp->if_flags |= IFF_UP;
537 		if (!(ifp->if_flags & IFF_RUNNING))
538 			ixgbe_init(sc);
539 		break;
540 
541 	case SIOCSIFFLAGS:
542 		IOCTL_DEBUGOUT("ioctl: SIOCSIFFLAGS (Set Interface Flags)");
543 		if (ifp->if_flags & IFF_UP) {
544 			if (ifp->if_flags & IFF_RUNNING)
545 				error = ENETRESET;
546 			else
547 				ixgbe_init(sc);
548 		} else {
549 			if (ifp->if_flags & IFF_RUNNING)
550 				ixgbe_stop(sc);
551 		}
552 		break;
553 
554 	case SIOCSIFMEDIA:
555 	case SIOCGIFMEDIA:
556 		IOCTL_DEBUGOUT("ioctl: SIOCxIFMEDIA (Get/Set Interface Media)");
557 		error = ifmedia_ioctl(ifp, ifr, &sc->media, command);
558 		break;
559 
560 	case SIOCGIFRXR:
561 		error = ixgbe_rxrinfo(sc, (struct if_rxrinfo *)ifr->ifr_data);
562 		break;
563 
564 	case SIOCGIFSFFPAGE:
565 		error = rw_enter(&sc->sfflock, RW_WRITE|RW_INTR);
566 		if (error != 0)
567 			break;
568 
569 		error = ixgbe_get_sffpage(sc, (struct if_sffpage *)data);
570 		rw_exit(&sc->sfflock);
571 		break;
572 
573 	default:
574 		error = ether_ioctl(ifp, &sc->arpcom, command, data);
575 	}
576 
577 	if (error == ENETRESET) {
578 		if (ifp->if_flags & IFF_RUNNING) {
579 			ixgbe_disable_intr(sc);
580 			ixgbe_iff(sc);
581 			ixgbe_enable_intr(sc);
582 			ixgbe_enable_queues(sc);
583 		}
584 		error = 0;
585 	}
586 
587 	splx(s);
588 	return (error);
589 }
590 
591 int
592 ixgbe_get_sffpage(struct ix_softc *sc, struct if_sffpage *sff)
593 {
594 	struct ixgbe_hw *hw = &sc->hw;
595 	uint32_t swfw_mask = hw->phy.phy_semaphore_mask;
596 	uint8_t page;
597 	size_t i;
598 	int error = EIO;
599 
600 	if (hw->phy.type == ixgbe_phy_fw)
601 		return (ENODEV);
602 
603 	if (hw->mac.ops.acquire_swfw_sync(hw, swfw_mask))
604 		return (EBUSY); /* XXX */
605 
606 	if (sff->sff_addr == IFSFF_ADDR_EEPROM) {
607 		if (hw->phy.ops.read_i2c_byte_unlocked(hw, 127,
608 		    IFSFF_ADDR_EEPROM, &page))
609 			goto error;
610 		if (page != sff->sff_page &&
611 		    hw->phy.ops.write_i2c_byte_unlocked(hw, 127,
612 		    IFSFF_ADDR_EEPROM, sff->sff_page))
613 			goto error;
614 	}
615 
616 	for (i = 0; i < sizeof(sff->sff_data); i++) {
617 		if (hw->phy.ops.read_i2c_byte_unlocked(hw, i,
618 		    sff->sff_addr, &sff->sff_data[i]))
619 			goto error;
620 	}
621 
622 	if (sff->sff_addr == IFSFF_ADDR_EEPROM) {
623 		if (page != sff->sff_page &&
624 		    hw->phy.ops.write_i2c_byte_unlocked(hw, 127,
625 		    IFSFF_ADDR_EEPROM, page))
626 			goto error;
627 	}
628 
629 	error = 0;
630 error:
631 	hw->mac.ops.release_swfw_sync(hw, swfw_mask);
632 	return (error);
633 }
634 
635 int
636 ixgbe_rxrinfo(struct ix_softc *sc, struct if_rxrinfo *ifri)
637 {
638 	struct if_rxring_info *ifr, ifr1;
639 	struct ix_rxring *rxr;
640 	int error, i;
641 	u_int n = 0;
642 
643 	if (sc->num_queues > 1) {
644 		ifr = mallocarray(sc->num_queues, sizeof(*ifr), M_DEVBUF,
645 		    M_WAITOK | M_ZERO);
646 	} else
647 		ifr = &ifr1;
648 
649 	for (i = 0; i < sc->num_queues; i++) {
650 		rxr = &sc->rx_rings[i];
651 		ifr[n].ifr_size = MCLBYTES;
652 		snprintf(ifr[n].ifr_name, sizeof(ifr[n].ifr_name), "%d", i);
653 		ifr[n].ifr_info = rxr->rx_ring;
654 		n++;
655 	}
656 
657 	error = if_rxr_info_ioctl(ifri, sc->num_queues, ifr);
658 
659 	if (sc->num_queues > 1)
660 		free(ifr, M_DEVBUF, sc->num_queues * sizeof(*ifr));
661 	return (error);
662 }
663 
664 /*********************************************************************
665  *  Watchdog entry point
666  *
667  **********************************************************************/
668 
669 void
670 ixgbe_watchdog(struct ifnet * ifp)
671 {
672 	struct ix_softc *sc = (struct ix_softc *)ifp->if_softc;
673 	struct ix_txring *txr = sc->tx_rings;
674 	struct ixgbe_hw *hw = &sc->hw;
675 	int		tx_hang = FALSE;
676 	int		i;
677 
678 	/*
679 	 * The timer is set to 5 every time ixgbe_start() queues a packet.
680 	 * Anytime all descriptors are clean the timer is set to 0.
681 	 */
682 	for (i = 0; i < sc->num_queues; i++, txr++) {
683 		if (txr->watchdog_timer == 0 || --txr->watchdog_timer)
684 			continue;
685 		else {
686 			tx_hang = TRUE;
687 			break;
688 		}
689 	}
690 	if (tx_hang == FALSE)
691 		return;
692 
693 	/*
694 	 * If we are in this routine because of pause frames, then don't
695 	 * reset the hardware.
696 	 */
697 	if (!(IXGBE_READ_REG(hw, IXGBE_TFCS) & IXGBE_TFCS_TXON)) {
698 		for (i = 0; i < sc->num_queues; i++, txr++)
699 			txr->watchdog_timer = IXGBE_TX_TIMEOUT;
700 		ifp->if_timer = IXGBE_TX_TIMEOUT;
701 		return;
702 	}
703 
704 
705 	printf("%s: Watchdog timeout -- resetting\n", ifp->if_xname);
706 	for (i = 0; i < sc->num_queues; i++, txr++) {
707 		printf("%s: Queue(%d) tdh = %d, hw tdt = %d\n", ifp->if_xname, i,
708 		    IXGBE_READ_REG(hw, IXGBE_TDH(i)),
709 		    IXGBE_READ_REG(hw, IXGBE_TDT(i)));
710 		printf("%s: TX(%d) Next TX to Clean = %d\n", ifp->if_xname,
711 		    i, txr->next_to_clean);
712 	}
713 	ifp->if_flags &= ~IFF_RUNNING;
714 
715 	ixgbe_init(sc);
716 }
717 
718 /*********************************************************************
719  *  Init entry point
720  *
721  *  This routine is used in two ways. It is used by the stack as
722  *  init entry point in network interface structure. It is also used
723  *  by the driver as a hw/sw initialization routine to get to a
724  *  consistent state.
725  *
726  *  return 0 on success, positive on failure
727  **********************************************************************/
728 #define IXGBE_MHADD_MFS_SHIFT 16
729 
730 void
731 ixgbe_init(void *arg)
732 {
733 	struct ix_softc	*sc = (struct ix_softc *)arg;
734 	struct ifnet	*ifp = &sc->arpcom.ac_if;
735 	struct ix_rxring	*rxr = sc->rx_rings;
736 	uint32_t	 k, txdctl, rxdctl, rxctrl, mhadd, itr;
737 	int		 i, s, err;
738 
739 	INIT_DEBUGOUT("ixgbe_init: begin");
740 
741 	s = splnet();
742 
743 	ixgbe_stop(sc);
744 
745 	/* reprogram the RAR[0] in case user changed it. */
746 	ixgbe_set_rar(&sc->hw, 0, sc->hw.mac.addr, 0, IXGBE_RAH_AV);
747 
748 	/* Get the latest mac address, User can use a LAA */
749 	bcopy(sc->arpcom.ac_enaddr, sc->hw.mac.addr,
750 	      IXGBE_ETH_LENGTH_OF_ADDRESS);
751 	ixgbe_set_rar(&sc->hw, 0, sc->hw.mac.addr, 0, 1);
752 	sc->hw.addr_ctrl.rar_used_count = 1;
753 
754 	/* Prepare transmit descriptors and buffers */
755 	if (ixgbe_setup_transmit_structures(sc)) {
756 		printf("%s: Could not setup transmit structures\n",
757 		    ifp->if_xname);
758 		ixgbe_stop(sc);
759 		splx(s);
760 		return;
761 	}
762 
763 	ixgbe_init_hw(&sc->hw);
764 	ixgbe_initialize_transmit_units(sc);
765 
766 	/* Use 2k clusters, even for jumbo frames */
767 	sc->rx_mbuf_sz = MCLBYTES + ETHER_ALIGN;
768 
769 	/* Prepare receive descriptors and buffers */
770 	if (ixgbe_setup_receive_structures(sc)) {
771 		printf("%s: Could not setup receive structures\n",
772 		    ifp->if_xname);
773 		ixgbe_stop(sc);
774 		splx(s);
775 		return;
776 	}
777 
778 	/* Configure RX settings */
779 	ixgbe_initialize_receive_units(sc);
780 
781 	/* Enable SDP & MSIX interrupts based on adapter */
782 	ixgbe_config_gpie(sc);
783 
784 	/* Program promiscuous mode and multicast filters. */
785 	ixgbe_iff(sc);
786 
787 	/* Set MRU size */
788 	mhadd = IXGBE_READ_REG(&sc->hw, IXGBE_MHADD);
789 	mhadd &= ~IXGBE_MHADD_MFS_MASK;
790 	mhadd |= sc->max_frame_size << IXGBE_MHADD_MFS_SHIFT;
791 	IXGBE_WRITE_REG(&sc->hw, IXGBE_MHADD, mhadd);
792 
793 	/* Now enable all the queues */
794 	for (i = 0; i < sc->num_queues; i++) {
795 		txdctl = IXGBE_READ_REG(&sc->hw, IXGBE_TXDCTL(i));
796 		txdctl |= IXGBE_TXDCTL_ENABLE;
797 		/* Set WTHRESH to 8, burst writeback */
798 		txdctl |= (8 << 16);
799 		/*
800 		 * When the internal queue falls below PTHRESH (16),
801 		 * start prefetching as long as there are at least
802 		 * HTHRESH (1) buffers ready.
803 		 */
804 		txdctl |= (16 << 0) | (1 << 8);
805 		IXGBE_WRITE_REG(&sc->hw, IXGBE_TXDCTL(i), txdctl);
806 	}
807 
808 	for (i = 0; i < sc->num_queues; i++) {
809 		rxdctl = IXGBE_READ_REG(&sc->hw, IXGBE_RXDCTL(i));
810 		if (sc->hw.mac.type == ixgbe_mac_82598EB) {
811 			/*
812 			 * PTHRESH = 21
813 			 * HTHRESH = 4
814 			 * WTHRESH = 8
815 			 */
816 			rxdctl &= ~0x3FFFFF;
817 			rxdctl |= 0x080420;
818 		}
819 		rxdctl |= IXGBE_RXDCTL_ENABLE;
820 		IXGBE_WRITE_REG(&sc->hw, IXGBE_RXDCTL(i), rxdctl);
821 		for (k = 0; k < 10; k++) {
822 			if (IXGBE_READ_REG(&sc->hw, IXGBE_RXDCTL(i)) &
823 			    IXGBE_RXDCTL_ENABLE)
824 				break;
825 			else
826 				msec_delay(1);
827 		}
828 		IXGBE_WRITE_FLUSH(&sc->hw);
829 		IXGBE_WRITE_REG(&sc->hw, IXGBE_RDT(i), rxr->last_desc_filled);
830 	}
831 
832 	/* Set up VLAN support and filter */
833 	ixgbe_setup_vlan_hw_support(sc);
834 
835 	/* Enable Receive engine */
836 	rxctrl = IXGBE_READ_REG(&sc->hw, IXGBE_RXCTRL);
837 	if (sc->hw.mac.type == ixgbe_mac_82598EB)
838 		rxctrl |= IXGBE_RXCTRL_DMBYPS;
839 	rxctrl |= IXGBE_RXCTRL_RXEN;
840 	sc->hw.mac.ops.enable_rx_dma(&sc->hw, rxctrl);
841 
842 	/* Set up MSI/X routing */
843 	if (sc->sc_intrmap) {
844 		ixgbe_configure_ivars(sc);
845 		/* Set up auto-mask */
846 		if (sc->hw.mac.type == ixgbe_mac_82598EB)
847 			IXGBE_WRITE_REG(&sc->hw, IXGBE_EIAM, IXGBE_EICS_RTX_QUEUE);
848 		else {
849 			IXGBE_WRITE_REG(&sc->hw, IXGBE_EIAM_EX(0), 0xFFFFFFFF);
850 			IXGBE_WRITE_REG(&sc->hw, IXGBE_EIAM_EX(1), 0xFFFFFFFF);
851 		}
852 	} else {  /* Simple settings for Legacy/MSI */
853 		ixgbe_set_ivar(sc, 0, 0, 0);
854 		ixgbe_set_ivar(sc, 0, 0, 1);
855 		IXGBE_WRITE_REG(&sc->hw, IXGBE_EIAM, IXGBE_EICS_RTX_QUEUE);
856 	}
857 
858 	/* Check on any SFP devices that need to be kick-started */
859 	if (sc->hw.phy.type == ixgbe_phy_none) {
860 		err = sc->hw.phy.ops.identify(&sc->hw);
861 		if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) {
862 			printf("Unsupported SFP+ module type was detected.\n");
863 			splx(s);
864 			return;
865 		}
866 	}
867 
868 	/* Setup interrupt moderation */
869 	itr = (4000000 / IXGBE_INTS_PER_SEC) & 0xff8;
870 	if (sc->hw.mac.type != ixgbe_mac_82598EB)
871 		itr |= IXGBE_EITR_LLI_MOD | IXGBE_EITR_CNT_WDIS;
872 	IXGBE_WRITE_REG(&sc->hw, IXGBE_EITR(0), itr);
873 
874 	if (sc->sc_intrmap) {
875 		/* Set moderation on the Link interrupt */
876 		IXGBE_WRITE_REG(&sc->hw, IXGBE_EITR(sc->linkvec),
877 		    IXGBE_LINK_ITR);
878 	}
879 
880 	/* Enable power to the phy */
881 	if (sc->hw.phy.ops.set_phy_power)
882 		sc->hw.phy.ops.set_phy_power(&sc->hw, TRUE);
883 
884 	/* Config/Enable Link */
885 	ixgbe_config_link(sc);
886 
887 	/* Hardware Packet Buffer & Flow Control setup */
888 	ixgbe_config_delay_values(sc);
889 
890 	/* Initialize the FC settings */
891 	sc->hw.mac.ops.start_hw(&sc->hw);
892 
893 	/* And now turn on interrupts */
894 	ixgbe_enable_intr(sc);
895 	ixgbe_enable_queues(sc);
896 
897 	/* Now inform the stack we're ready */
898 	ifp->if_flags |= IFF_RUNNING;
899 	for (i = 0; i < sc->num_queues; i++)
900 		ifq_clr_oactive(ifp->if_ifqs[i]);
901 
902 #if NKSTAT > 0
903 	ix_kstats_tick(sc);
904 #endif
905 
906 	splx(s);
907 }
908 
909 void
910 ixgbe_config_gpie(struct ix_softc *sc)
911 {
912 	struct ixgbe_hw	*hw = &sc->hw;
913 	uint32_t gpie;
914 
915 	gpie = IXGBE_READ_REG(&sc->hw, IXGBE_GPIE);
916 
917 	/* Fan Failure Interrupt */
918 	if (hw->device_id == IXGBE_DEV_ID_82598AT)
919 		gpie |= IXGBE_SDP1_GPIEN;
920 
921 	if (sc->hw.mac.type == ixgbe_mac_82599EB) {
922 		/* Add for Module detection */
923 		gpie |= IXGBE_SDP2_GPIEN;
924 
925 		/* Media ready */
926 		if (hw->device_id != IXGBE_DEV_ID_82599_QSFP_SF_QP)
927 			gpie |= IXGBE_SDP1_GPIEN;
928 
929 		/*
930 		 * Set LL interval to max to reduce the number of low latency
931 		 * interrupts hitting the card when the ring is getting full.
932 		 */
933 		gpie |= 0xf << IXGBE_GPIE_LLI_DELAY_SHIFT;
934 	}
935 
936 	if (sc->hw.mac.type == ixgbe_mac_X540 ||
937 	    sc->hw.mac.type == ixgbe_mac_X550EM_x ||
938 	    sc->hw.mac.type == ixgbe_mac_X550EM_a) {
939 		/*
940 		 * Thermal Failure Detection (X540)
941 		 * Link Detection (X552 SFP+, X552/X557-AT)
942 		 */
943 		gpie |= IXGBE_SDP0_GPIEN_X540;
944 
945 		/*
946 		 * Set LL interval to max to reduce the number of low latency
947 		 * interrupts hitting the card when the ring is getting full.
948 		 */
949 		gpie |= 0xf << IXGBE_GPIE_LLI_DELAY_SHIFT;
950 	}
951 
952 	if (sc->sc_intrmap) {
953 		/* Enable Enhanced MSIX mode */
954 		gpie |= IXGBE_GPIE_MSIX_MODE;
955 		gpie |= IXGBE_GPIE_EIAME | IXGBE_GPIE_PBA_SUPPORT |
956 		    IXGBE_GPIE_OCD;
957 	}
958 
959 	IXGBE_WRITE_REG(&sc->hw, IXGBE_GPIE, gpie);
960 }
961 
962 /*
963  * Requires sc->max_frame_size to be set.
964  */
965 void
966 ixgbe_config_delay_values(struct ix_softc *sc)
967 {
968 	struct ixgbe_hw *hw = &sc->hw;
969 	uint32_t rxpb, frame, size, tmp;
970 
971 	frame = sc->max_frame_size;
972 
973 	/* Calculate High Water */
974 	switch (hw->mac.type) {
975 	case ixgbe_mac_X540:
976 	case ixgbe_mac_X550:
977 	case ixgbe_mac_X550EM_x:
978 	case ixgbe_mac_X550EM_a:
979 		tmp = IXGBE_DV_X540(frame, frame);
980 		break;
981 	default:
982 		tmp = IXGBE_DV(frame, frame);
983 		break;
984 	}
985 	size = IXGBE_BT2KB(tmp);
986 	rxpb = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(0)) >> 10;
987 	hw->fc.high_water[0] = rxpb - size;
988 
989 	/* Now calculate Low Water */
990 	switch (hw->mac.type) {
991 	case ixgbe_mac_X540:
992 	case ixgbe_mac_X550:
993 	case ixgbe_mac_X550EM_x:
994 	case ixgbe_mac_X550EM_a:
995 		tmp = IXGBE_LOW_DV_X540(frame);
996 		break;
997 	default:
998 		tmp = IXGBE_LOW_DV(frame);
999 		break;
1000 	}
1001 	hw->fc.low_water[0] = IXGBE_BT2KB(tmp);
1002 
1003 	hw->fc.requested_mode = sc->fc;
1004 	hw->fc.pause_time = IXGBE_FC_PAUSE;
1005 	hw->fc.send_xon = TRUE;
1006 }
1007 
1008 /*
1009  * MSIX Interrupt Handlers
1010  */
1011 void
1012 ixgbe_enable_queue(struct ix_softc *sc, uint32_t vector)
1013 {
1014 	uint64_t queue = 1ULL << vector;
1015 	uint32_t mask;
1016 
1017 	if (sc->hw.mac.type == ixgbe_mac_82598EB) {
1018 		mask = (IXGBE_EIMS_RTX_QUEUE & queue);
1019 		IXGBE_WRITE_REG(&sc->hw, IXGBE_EIMS, mask);
1020 	} else {
1021 		mask = (queue & 0xFFFFFFFF);
1022 		if (mask)
1023 			IXGBE_WRITE_REG(&sc->hw, IXGBE_EIMS_EX(0), mask);
1024 		mask = (queue >> 32);
1025 		if (mask)
1026 			IXGBE_WRITE_REG(&sc->hw, IXGBE_EIMS_EX(1), mask);
1027 	}
1028 }
1029 
1030 void
1031 ixgbe_enable_queues(struct ix_softc *sc)
1032 {
1033 	struct ix_queue *que;
1034 	int i;
1035 
1036 	for (i = 0, que = sc->queues; i < sc->num_queues; i++, que++)
1037 		ixgbe_enable_queue(sc, que->msix);
1038 }
1039 
1040 void
1041 ixgbe_disable_queue(struct ix_softc *sc, uint32_t vector)
1042 {
1043 	uint64_t queue = 1ULL << vector;
1044 	uint32_t mask;
1045 
1046 	if (sc->hw.mac.type == ixgbe_mac_82598EB) {
1047 		mask = (IXGBE_EIMS_RTX_QUEUE & queue);
1048 		IXGBE_WRITE_REG(&sc->hw, IXGBE_EIMC, mask);
1049 	} else {
1050 		mask = (queue & 0xFFFFFFFF);
1051 		if (mask)
1052 			IXGBE_WRITE_REG(&sc->hw, IXGBE_EIMC_EX(0), mask);
1053 		mask = (queue >> 32);
1054 		if (mask)
1055 			IXGBE_WRITE_REG(&sc->hw, IXGBE_EIMC_EX(1), mask);
1056 	}
1057 }
1058 
1059 /*
1060  * MSIX Interrupt Handlers
1061  */
1062 int
1063 ixgbe_link_intr(void *vsc)
1064 {
1065 	struct ix_softc	*sc = (struct ix_softc *)vsc;
1066 
1067 	return ixgbe_intr(sc);
1068 }
1069 
1070 int
1071 ixgbe_queue_intr(void *vque)
1072 {
1073 	struct ix_queue *que = vque;
1074 	struct ix_softc	*sc = que->sc;
1075 	struct ifnet	*ifp = &sc->arpcom.ac_if;
1076 	struct ix_rxring	*rxr = que->rxr;
1077 	struct ix_txring	*txr = que->txr;
1078 
1079 	if (ISSET(ifp->if_flags, IFF_RUNNING)) {
1080 		ixgbe_rxeof(rxr);
1081 		ixgbe_txeof(txr);
1082 		ixgbe_rxrefill(rxr);
1083 	}
1084 
1085 	ixgbe_enable_queue(sc, que->msix);
1086 
1087 	return (1);
1088 }
1089 
1090 /*********************************************************************
1091  *
1092  *  Legacy Interrupt Service routine
1093  *
1094  **********************************************************************/
1095 
1096 int
1097 ixgbe_legacy_intr(void *arg)
1098 {
1099 	struct ix_softc	*sc = (struct ix_softc *)arg;
1100 	struct ifnet	*ifp = &sc->arpcom.ac_if;
1101 	struct ix_rxring	*rxr = sc->rx_rings;
1102 	struct ix_txring	*txr = sc->tx_rings;
1103 	int rv;
1104 
1105 	rv = ixgbe_intr(sc);
1106 	if (rv == 0) {
1107 		return (0);
1108 	}
1109 
1110 	if (ISSET(ifp->if_flags, IFF_RUNNING)) {
1111 		ixgbe_rxeof(rxr);
1112 		ixgbe_txeof(txr);
1113 		ixgbe_rxrefill(rxr);
1114 	}
1115 
1116 	ixgbe_enable_queues(sc);
1117 	return (rv);
1118 }
1119 
1120 int
1121 ixgbe_intr(struct ix_softc *sc)
1122 {
1123 	struct ifnet	*ifp = &sc->arpcom.ac_if;
1124 	struct ixgbe_hw	*hw = &sc->hw;
1125 	uint32_t	 reg_eicr, mod_mask, msf_mask;
1126 
1127 	if (sc->sc_intrmap) {
1128 		/* Pause other interrupts */
1129 		IXGBE_WRITE_REG(hw, IXGBE_EIMC, IXGBE_EIMC_OTHER);
1130 		/* First get the cause */
1131 		reg_eicr = IXGBE_READ_REG(hw, IXGBE_EICS);
1132 		/* Be sure the queue bits are not cleared */
1133 		reg_eicr &= ~IXGBE_EICR_RTX_QUEUE;
1134 		/* Clear interrupt with write */
1135 		IXGBE_WRITE_REG(hw, IXGBE_EICR, reg_eicr);
1136 	} else {
1137 		reg_eicr = IXGBE_READ_REG(hw, IXGBE_EICR);
1138 		if (reg_eicr == 0) {
1139 			ixgbe_enable_intr(sc);
1140 			ixgbe_enable_queues(sc);
1141 			return (0);
1142 		}
1143 	}
1144 
1145 	/* Link status change */
1146 	if (reg_eicr & IXGBE_EICR_LSC) {
1147 		IXGBE_WRITE_REG(hw, IXGBE_EIMC, IXGBE_EIMC_LSC);
1148 		KERNEL_LOCK();
1149 		ixgbe_update_link_status(sc);
1150 		KERNEL_UNLOCK();
1151 	}
1152 
1153 	if (hw->mac.type != ixgbe_mac_82598EB) {
1154 		if (reg_eicr & IXGBE_EICR_ECC) {
1155 			printf("%s: CRITICAL: ECC ERROR!! "
1156 			    "Please Reboot!!\n", sc->dev.dv_xname);
1157 			IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_ECC);
1158 		}
1159 		/* Check for over temp condition */
1160 		if (reg_eicr & IXGBE_EICR_TS) {
1161 			printf("%s: CRITICAL: OVER TEMP!! "
1162 			    "PHY IS SHUT DOWN!!\n", ifp->if_xname);
1163 			IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_TS);
1164 		}
1165 	}
1166 
1167 	/* Pluggable optics-related interrupt */
1168 	if (ixgbe_is_sfp(hw)) {
1169 		if (hw->device_id == IXGBE_DEV_ID_X550EM_X_SFP) {
1170 			mod_mask = IXGBE_EICR_GPI_SDP0_X540;
1171 			msf_mask = IXGBE_EICR_GPI_SDP1_X540;
1172 		} else if (hw->mac.type == ixgbe_mac_X540 ||
1173 		    hw->mac.type == ixgbe_mac_X550 ||
1174 		    hw->mac.type == ixgbe_mac_X550EM_x) {
1175 			mod_mask = IXGBE_EICR_GPI_SDP2_X540;
1176 			msf_mask = IXGBE_EICR_GPI_SDP1_X540;
1177 		} else {
1178 			mod_mask = IXGBE_EICR_GPI_SDP2;
1179 			msf_mask = IXGBE_EICR_GPI_SDP1;
1180 		}
1181 		if (reg_eicr & mod_mask) {
1182 			/* Clear the interrupt */
1183 			IXGBE_WRITE_REG(hw, IXGBE_EICR, mod_mask);
1184 			KERNEL_LOCK();
1185 			ixgbe_handle_mod(sc);
1186 			KERNEL_UNLOCK();
1187 		} else if ((hw->phy.media_type != ixgbe_media_type_copper) &&
1188 		    (reg_eicr & msf_mask)) {
1189 			/* Clear the interrupt */
1190 			IXGBE_WRITE_REG(hw, IXGBE_EICR, msf_mask);
1191 			KERNEL_LOCK();
1192 			ixgbe_handle_msf(sc);
1193 			KERNEL_UNLOCK();
1194 		}
1195 	}
1196 
1197 	/* Check for fan failure */
1198 	if ((hw->device_id == IXGBE_DEV_ID_82598AT) &&
1199 	    (reg_eicr & IXGBE_EICR_GPI_SDP1)) {
1200 		printf("%s: CRITICAL: FAN FAILURE!! "
1201 		    "REPLACE IMMEDIATELY!!\n", ifp->if_xname);
1202 		IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP1);
1203 	}
1204 
1205 	/* External PHY interrupt */
1206 	if (hw->device_id == IXGBE_DEV_ID_X550EM_X_10G_T &&
1207 	    (reg_eicr & IXGBE_EICR_GPI_SDP0_X540)) {
1208 		/* Clear the interrupt */
1209 		IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP0_X540);
1210 		KERNEL_LOCK();
1211 		ixgbe_handle_phy(sc);
1212 		KERNEL_UNLOCK();
1213 	}
1214 
1215 	IXGBE_WRITE_REG(hw, IXGBE_EIMS, IXGBE_EIMS_OTHER | IXGBE_EIMS_LSC);
1216 
1217 	return (1);
1218 }
1219 
1220 /*********************************************************************
1221  *
1222  *  Media Ioctl callback
1223  *
1224  *  This routine is called whenever the user queries the status of
1225  *  the interface using ifconfig.
1226  *
1227  **********************************************************************/
1228 void
1229 ixgbe_media_status(struct ifnet * ifp, struct ifmediareq *ifmr)
1230 {
1231 	struct ix_softc *sc = ifp->if_softc;
1232 	uint64_t layer;
1233 
1234 	ifmr->ifm_active = IFM_ETHER;
1235 	ifmr->ifm_status = IFM_AVALID;
1236 
1237 	INIT_DEBUGOUT("ixgbe_media_status: begin");
1238 	ixgbe_update_link_status(sc);
1239 
1240 	if (!LINK_STATE_IS_UP(ifp->if_link_state))
1241 		return;
1242 
1243 	ifmr->ifm_status |= IFM_ACTIVE;
1244 	layer = sc->phy_layer;
1245 
1246 	if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_T ||
1247 	    layer & IXGBE_PHYSICAL_LAYER_1000BASE_T ||
1248 	    layer & IXGBE_PHYSICAL_LAYER_100BASE_TX ||
1249 	    layer & IXGBE_PHYSICAL_LAYER_10BASE_T) {
1250 		switch (sc->link_speed) {
1251 		case IXGBE_LINK_SPEED_10GB_FULL:
1252 			ifmr->ifm_active |= IFM_10G_T | IFM_FDX;
1253 			break;
1254 		case IXGBE_LINK_SPEED_1GB_FULL:
1255 			ifmr->ifm_active |= IFM_1000_T | IFM_FDX;
1256 			break;
1257 		case IXGBE_LINK_SPEED_100_FULL:
1258 			ifmr->ifm_active |= IFM_100_TX | IFM_FDX;
1259 			break;
1260 		case IXGBE_LINK_SPEED_10_FULL:
1261 			ifmr->ifm_active |= IFM_10_T | IFM_FDX;
1262 			break;
1263 		}
1264 	}
1265 	if (layer & IXGBE_PHYSICAL_LAYER_SFP_PLUS_CU ||
1266 	    layer & IXGBE_PHYSICAL_LAYER_SFP_ACTIVE_DA) {
1267 		switch (sc->link_speed) {
1268 		case IXGBE_LINK_SPEED_10GB_FULL:
1269 			ifmr->ifm_active |= IFM_10G_SFP_CU | IFM_FDX;
1270 			break;
1271 		}
1272 	}
1273 	if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_LR) {
1274 		switch (sc->link_speed) {
1275 		case IXGBE_LINK_SPEED_10GB_FULL:
1276 			ifmr->ifm_active |= IFM_10G_LR | IFM_FDX;
1277 			break;
1278 		case IXGBE_LINK_SPEED_1GB_FULL:
1279 			ifmr->ifm_active |= IFM_1000_LX | IFM_FDX;
1280 			break;
1281 		}
1282 	}
1283 	if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_SR ||
1284 	    layer & IXGBE_PHYSICAL_LAYER_1000BASE_SX) {
1285 		switch (sc->link_speed) {
1286 		case IXGBE_LINK_SPEED_10GB_FULL:
1287 			ifmr->ifm_active |= IFM_10G_SR | IFM_FDX;
1288 			break;
1289 		case IXGBE_LINK_SPEED_1GB_FULL:
1290 			ifmr->ifm_active |= IFM_1000_SX | IFM_FDX;
1291 			break;
1292 		}
1293 	}
1294 	if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_CX4) {
1295 		switch (sc->link_speed) {
1296 		case IXGBE_LINK_SPEED_10GB_FULL:
1297 			ifmr->ifm_active |= IFM_10G_CX4 | IFM_FDX;
1298 			break;
1299 		}
1300 	}
1301 	if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_KR) {
1302 		switch (sc->link_speed) {
1303 		case IXGBE_LINK_SPEED_10GB_FULL:
1304 			ifmr->ifm_active |= IFM_10G_KR | IFM_FDX;
1305 			break;
1306 		case IXGBE_LINK_SPEED_2_5GB_FULL:
1307 			ifmr->ifm_active |= IFM_2500_KX | IFM_FDX;
1308 			break;
1309 		case IXGBE_LINK_SPEED_1GB_FULL:
1310 			ifmr->ifm_active |= IFM_1000_KX | IFM_FDX;
1311 			break;
1312 		}
1313 	} else if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_KX4 ||
1314 	    layer & IXGBE_PHYSICAL_LAYER_2500BASE_KX ||
1315 	    layer & IXGBE_PHYSICAL_LAYER_1000BASE_KX) {
1316 		switch (sc->link_speed) {
1317 		case IXGBE_LINK_SPEED_10GB_FULL:
1318 			ifmr->ifm_active |= IFM_10G_KX4 | IFM_FDX;
1319 			break;
1320 		case IXGBE_LINK_SPEED_2_5GB_FULL:
1321 			ifmr->ifm_active |= IFM_2500_KX | IFM_FDX;
1322 			break;
1323 		case IXGBE_LINK_SPEED_1GB_FULL:
1324 			ifmr->ifm_active |= IFM_1000_KX | IFM_FDX;
1325 			break;
1326 		}
1327 	}
1328 
1329 	switch (sc->hw.fc.current_mode) {
1330 	case ixgbe_fc_tx_pause:
1331 		ifmr->ifm_active |= IFM_FLOW | IFM_ETH_TXPAUSE;
1332 		break;
1333 	case ixgbe_fc_rx_pause:
1334 		ifmr->ifm_active |= IFM_FLOW | IFM_ETH_RXPAUSE;
1335 		break;
1336 	case ixgbe_fc_full:
1337 		ifmr->ifm_active |= IFM_FLOW | IFM_ETH_RXPAUSE |
1338 		    IFM_ETH_TXPAUSE;
1339 		break;
1340 	default:
1341 		ifmr->ifm_active &= ~(IFM_FLOW | IFM_ETH_RXPAUSE |
1342 		    IFM_ETH_TXPAUSE);
1343 		break;
1344 	}
1345 }
1346 
1347 /*********************************************************************
1348  *
1349  *  Media Ioctl callback
1350  *
1351  *  This routine is called when the user changes speed/duplex using
1352  *  media/mediopt option with ifconfig.
1353  *
1354  **********************************************************************/
1355 int
1356 ixgbe_media_change(struct ifnet *ifp)
1357 {
1358 	struct ix_softc	*sc = ifp->if_softc;
1359 	struct ixgbe_hw	*hw = &sc->hw;
1360 	struct ifmedia	*ifm = &sc->media;
1361 	ixgbe_link_speed speed = 0;
1362 
1363 	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1364 		return (EINVAL);
1365 
1366 	if (hw->phy.media_type == ixgbe_media_type_backplane)
1367 		return (ENODEV);
1368 
1369 	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1370 		case IFM_AUTO:
1371 		case IFM_10G_T:
1372 			speed |= IXGBE_LINK_SPEED_100_FULL;
1373 			speed |= IXGBE_LINK_SPEED_1GB_FULL;
1374 			speed |= IXGBE_LINK_SPEED_10GB_FULL;
1375 			break;
1376 		case IFM_10G_SR:
1377 		case IFM_10G_KR:
1378 		case IFM_10G_LR:
1379 		case IFM_10G_LRM:
1380 		case IFM_10G_CX4:
1381 		case IFM_10G_KX4:
1382 			speed |= IXGBE_LINK_SPEED_1GB_FULL;
1383 			speed |= IXGBE_LINK_SPEED_10GB_FULL;
1384 			break;
1385 		case IFM_10G_SFP_CU:
1386 			speed |= IXGBE_LINK_SPEED_10GB_FULL;
1387 			break;
1388 		case IFM_1000_T:
1389 			speed |= IXGBE_LINK_SPEED_100_FULL;
1390 			speed |= IXGBE_LINK_SPEED_1GB_FULL;
1391 			break;
1392 		case IFM_1000_LX:
1393 		case IFM_1000_SX:
1394 		case IFM_1000_CX:
1395 		case IFM_1000_KX:
1396 			speed |= IXGBE_LINK_SPEED_1GB_FULL;
1397 			break;
1398 		case IFM_100_TX:
1399 			speed |= IXGBE_LINK_SPEED_100_FULL;
1400 			break;
1401 		case IFM_10_T:
1402 			speed |= IXGBE_LINK_SPEED_10_FULL;
1403 			break;
1404 		default:
1405 			return (EINVAL);
1406 	}
1407 
1408 	hw->mac.autotry_restart = TRUE;
1409 	hw->mac.ops.setup_link(hw, speed, TRUE);
1410 
1411 	return (0);
1412 }
1413 
1414 /*********************************************************************
1415  *
1416  *  This routine maps the mbufs to tx descriptors, allowing the
1417  *  TX engine to transmit the packets.
1418  *  	- return 0 on success, positive on failure
1419  *
1420  **********************************************************************/
1421 
1422 int
1423 ixgbe_encap(struct ix_txring *txr, struct mbuf *m_head)
1424 {
1425 	struct ix_softc *sc = txr->sc;
1426 	uint32_t	olinfo_status = 0, cmd_type_len;
1427 	int             i, j, ntxc;
1428 	int		first, last = 0;
1429 	bus_dmamap_t	map;
1430 	struct ixgbe_tx_buf *txbuf;
1431 	union ixgbe_adv_tx_desc *txd = NULL;
1432 
1433 	/* Basic descriptor defines */
1434 	cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA |
1435 	    IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT);
1436 
1437 	/*
1438 	 * Important to capture the first descriptor
1439 	 * used because it will contain the index of
1440 	 * the one we tell the hardware to report back
1441 	 */
1442 	first = txr->next_avail_desc;
1443 	txbuf = &txr->tx_buffers[first];
1444 	map = txbuf->map;
1445 
1446 	/*
1447 	 * Set the appropriate offload context
1448 	 * this will becomes the first descriptor.
1449 	 */
1450 	ntxc = ixgbe_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status);
1451 	if (ntxc == -1)
1452 		goto xmit_fail;
1453 
1454 	/*
1455 	 * Map the packet for DMA.
1456 	 */
1457 	switch (bus_dmamap_load_mbuf(txr->txdma.dma_tag, map,
1458 	    m_head, BUS_DMA_NOWAIT)) {
1459 	case 0:
1460 		break;
1461 	case EFBIG:
1462 		if (m_defrag(m_head, M_NOWAIT) == 0 &&
1463 		    bus_dmamap_load_mbuf(txr->txdma.dma_tag, map,
1464 		     m_head, BUS_DMA_NOWAIT) == 0)
1465 			break;
1466 		/* FALLTHROUGH */
1467 	default:
1468 		return (0);
1469 	}
1470 
1471 	i = txr->next_avail_desc + ntxc;
1472 	if (i >= sc->num_tx_desc)
1473 		i -= sc->num_tx_desc;
1474 
1475 	for (j = 0; j < map->dm_nsegs; j++) {
1476 		txd = &txr->tx_base[i];
1477 
1478 		txd->read.buffer_addr = htole64(map->dm_segs[j].ds_addr);
1479 		txd->read.cmd_type_len = htole32(txr->txd_cmd |
1480 		    cmd_type_len | map->dm_segs[j].ds_len);
1481 		txd->read.olinfo_status = htole32(olinfo_status);
1482 		last = i; /* descriptor that will get completion IRQ */
1483 
1484 		if (++i == sc->num_tx_desc)
1485 			i = 0;
1486 	}
1487 
1488 	txd->read.cmd_type_len |=
1489 	    htole32(IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS);
1490 
1491 	bus_dmamap_sync(txr->txdma.dma_tag, map, 0, map->dm_mapsize,
1492 	    BUS_DMASYNC_PREWRITE);
1493 
1494 	/* Set the index of the descriptor that will be marked done */
1495 	txbuf->m_head = m_head;
1496 	txbuf->eop_index = last;
1497 
1498 	membar_producer();
1499 
1500 	txr->next_avail_desc = i;
1501 
1502 	return (ntxc + j);
1503 
1504 xmit_fail:
1505 	bus_dmamap_unload(txr->txdma.dma_tag, txbuf->map);
1506 	return (0);
1507 }
1508 
1509 void
1510 ixgbe_iff(struct ix_softc *sc)
1511 {
1512 	struct ifnet *ifp = &sc->arpcom.ac_if;
1513 	struct arpcom *ac = &sc->arpcom;
1514 	uint32_t	fctrl;
1515 	uint8_t	*mta;
1516 	uint8_t	*update_ptr;
1517 	struct ether_multi *enm;
1518 	struct ether_multistep step;
1519 	int	mcnt = 0;
1520 
1521 	IOCTL_DEBUGOUT("ixgbe_iff: begin");
1522 
1523 	mta = sc->mta;
1524 	bzero(mta, sizeof(uint8_t) * IXGBE_ETH_LENGTH_OF_ADDRESS *
1525 	    MAX_NUM_MULTICAST_ADDRESSES);
1526 
1527 	fctrl = IXGBE_READ_REG(&sc->hw, IXGBE_FCTRL);
1528 	fctrl &= ~(IXGBE_FCTRL_MPE | IXGBE_FCTRL_UPE);
1529 	ifp->if_flags &= ~IFF_ALLMULTI;
1530 
1531 	if (ifp->if_flags & IFF_PROMISC || ac->ac_multirangecnt > 0 ||
1532 	    ac->ac_multicnt > MAX_NUM_MULTICAST_ADDRESSES) {
1533 		ifp->if_flags |= IFF_ALLMULTI;
1534 		fctrl |= IXGBE_FCTRL_MPE;
1535 		if (ifp->if_flags & IFF_PROMISC)
1536 			fctrl |= IXGBE_FCTRL_UPE;
1537 	} else {
1538 		ETHER_FIRST_MULTI(step, &sc->arpcom, enm);
1539 		while (enm != NULL) {
1540 			bcopy(enm->enm_addrlo,
1541 			    &mta[mcnt * IXGBE_ETH_LENGTH_OF_ADDRESS],
1542 			    IXGBE_ETH_LENGTH_OF_ADDRESS);
1543 			mcnt++;
1544 
1545 			ETHER_NEXT_MULTI(step, enm);
1546 		}
1547 
1548 		update_ptr = mta;
1549 		sc->hw.mac.ops.update_mc_addr_list(&sc->hw, update_ptr, mcnt,
1550 		    ixgbe_mc_array_itr, TRUE);
1551 	}
1552 
1553 	IXGBE_WRITE_REG(&sc->hw, IXGBE_FCTRL, fctrl);
1554 }
1555 
1556 /*
1557  * This is an iterator function now needed by the multicast
1558  * shared code. It simply feeds the shared code routine the
1559  * addresses in the array of ixgbe_iff() one by one.
1560  */
1561 uint8_t *
1562 ixgbe_mc_array_itr(struct ixgbe_hw *hw, uint8_t **update_ptr, uint32_t *vmdq)
1563 {
1564 	uint8_t *addr = *update_ptr;
1565 	uint8_t *newptr;
1566 	*vmdq = 0;
1567 
1568 	newptr = addr + IXGBE_ETH_LENGTH_OF_ADDRESS;
1569 	*update_ptr = newptr;
1570 	return addr;
1571 }
1572 
1573 void
1574 ixgbe_update_link_status(struct ix_softc *sc)
1575 {
1576 	struct ifnet	*ifp = &sc->arpcom.ac_if;
1577 	int		link_state = LINK_STATE_DOWN;
1578 
1579 	splassert(IPL_NET);
1580 	KERNEL_ASSERT_LOCKED();
1581 
1582 	ixgbe_check_link(&sc->hw, &sc->link_speed, &sc->link_up, 0);
1583 
1584 	ifp->if_baudrate = 0;
1585 	if (sc->link_up) {
1586 		link_state = LINK_STATE_FULL_DUPLEX;
1587 
1588 		switch (sc->link_speed) {
1589 		case IXGBE_LINK_SPEED_UNKNOWN:
1590 			ifp->if_baudrate = 0;
1591 			break;
1592 		case IXGBE_LINK_SPEED_100_FULL:
1593 			ifp->if_baudrate = IF_Mbps(100);
1594 			break;
1595 		case IXGBE_LINK_SPEED_1GB_FULL:
1596 			ifp->if_baudrate = IF_Gbps(1);
1597 			break;
1598 		case IXGBE_LINK_SPEED_10GB_FULL:
1599 			ifp->if_baudrate = IF_Gbps(10);
1600 			break;
1601 		}
1602 
1603 		/* Update any Flow Control changes */
1604 		sc->hw.mac.ops.fc_enable(&sc->hw);
1605 	}
1606 	if (ifp->if_link_state != link_state) {
1607 		ifp->if_link_state = link_state;
1608 		if_link_state_change(ifp);
1609 	}
1610 }
1611 
1612 
1613 /*********************************************************************
1614  *
1615  *  This routine disables all traffic on the adapter by issuing a
1616  *  global reset on the MAC and deallocates TX/RX buffers.
1617  *
1618  **********************************************************************/
1619 
1620 void
1621 ixgbe_stop(void *arg)
1622 {
1623 	struct ix_softc *sc = arg;
1624 	struct ifnet   *ifp = &sc->arpcom.ac_if;
1625 	int i;
1626 
1627 	/* Tell the stack that the interface is no longer active */
1628 	ifp->if_flags &= ~IFF_RUNNING;
1629 
1630 #if NKSTAT > 0
1631 	timeout_del(&sc->sc_kstat_tmo);
1632 #endif
1633 	ifp->if_timer = 0;
1634 
1635 	INIT_DEBUGOUT("ixgbe_stop: begin\n");
1636 	ixgbe_disable_intr(sc);
1637 
1638 	sc->hw.mac.ops.reset_hw(&sc->hw);
1639 	sc->hw.adapter_stopped = FALSE;
1640 	sc->hw.mac.ops.stop_adapter(&sc->hw);
1641 	if (sc->hw.mac.type == ixgbe_mac_82599EB)
1642 		sc->hw.mac.ops.stop_mac_link_on_d3(&sc->hw);
1643 	/* Turn off the laser */
1644 	if (sc->hw.mac.ops.disable_tx_laser)
1645 		sc->hw.mac.ops.disable_tx_laser(&sc->hw);
1646 
1647 	/* reprogram the RAR[0] in case user changed it. */
1648 	ixgbe_set_rar(&sc->hw, 0, sc->hw.mac.addr, 0, IXGBE_RAH_AV);
1649 
1650 	intr_barrier(sc->tag);
1651 	for (i = 0; i < sc->num_queues; i++) {
1652 		struct ifqueue *ifq = ifp->if_ifqs[i];
1653 		ifq_barrier(ifq);
1654 		ifq_clr_oactive(ifq);
1655 
1656 		if (sc->queues[i].tag != NULL)
1657 			intr_barrier(sc->queues[i].tag);
1658 		timeout_del(&sc->rx_rings[i].rx_refill);
1659 	}
1660 
1661 	KASSERT((ifp->if_flags & IFF_RUNNING) == 0);
1662 
1663 	/* Should we really clear all structures on stop? */
1664 	ixgbe_free_transmit_structures(sc);
1665 	ixgbe_free_receive_structures(sc);
1666 
1667 	ixgbe_update_link_status(sc);
1668 }
1669 
1670 
1671 /*********************************************************************
1672  *
1673  *  Determine hardware revision.
1674  *
1675  **********************************************************************/
1676 void
1677 ixgbe_identify_hardware(struct ix_softc *sc)
1678 {
1679 	struct ixgbe_osdep	*os = &sc->osdep;
1680 	struct pci_attach_args	*pa = &os->os_pa;
1681 	uint32_t		 reg;
1682 
1683 	/* Save off the information about this board */
1684 	sc->hw.vendor_id = PCI_VENDOR(pa->pa_id);
1685 	sc->hw.device_id = PCI_PRODUCT(pa->pa_id);
1686 
1687 	reg = pci_conf_read(pa->pa_pc, pa->pa_tag, PCI_CLASS_REG);
1688 	sc->hw.revision_id = PCI_REVISION(reg);
1689 
1690 	reg = pci_conf_read(pa->pa_pc, pa->pa_tag, PCI_SUBSYS_ID_REG);
1691 	sc->hw.subsystem_vendor_id = PCI_VENDOR(reg);
1692 	sc->hw.subsystem_device_id = PCI_PRODUCT(reg);
1693 
1694 	/* We need this here to set the num_segs below */
1695 	ixgbe_set_mac_type(&sc->hw);
1696 
1697 	/* Pick up the 82599 and VF settings */
1698 	if (sc->hw.mac.type != ixgbe_mac_82598EB)
1699 		sc->hw.phy.smart_speed = ixgbe_smart_speed;
1700 	sc->num_segs = IXGBE_82599_SCATTER;
1701 }
1702 
1703 /*********************************************************************
1704  *
1705  *  Setup the Legacy or MSI Interrupt handler
1706  *
1707  **********************************************************************/
1708 int
1709 ixgbe_allocate_legacy(struct ix_softc *sc)
1710 {
1711 	struct ixgbe_osdep	*os = &sc->osdep;
1712 	struct pci_attach_args	*pa = &os->os_pa;
1713 	const char		*intrstr = NULL;
1714 	pci_chipset_tag_t	pc = pa->pa_pc;
1715 	pci_intr_handle_t	ih;
1716 
1717 	/* We allocate a single interrupt resource */
1718 	if (pci_intr_map_msi(pa, &ih) != 0 &&
1719 	    pci_intr_map(pa, &ih) != 0) {
1720 		printf(": couldn't map interrupt\n");
1721 		return (ENXIO);
1722 	}
1723 
1724 #if 0
1725 	/* XXX */
1726 	/* Tasklets for Link, SFP and Multispeed Fiber */
1727 	TASK_INIT(&sc->link_task, 0, ixgbe_handle_link, sc);
1728 	TASK_INIT(&sc->mod_task, 0, ixgbe_handle_mod, sc);
1729 	TASK_INIT(&sc->msf_task, 0, ixgbe_handle_msf, sc);
1730 #endif
1731 
1732 	intrstr = pci_intr_string(pc, ih);
1733 	sc->tag = pci_intr_establish(pc, ih, IPL_NET | IPL_MPSAFE,
1734 	    ixgbe_legacy_intr, sc, sc->dev.dv_xname);
1735 	if (sc->tag == NULL) {
1736 		printf(": couldn't establish interrupt");
1737 		if (intrstr != NULL)
1738 			printf(" at %s", intrstr);
1739 		printf("\n");
1740 		return (ENXIO);
1741 	}
1742 	printf(": %s", intrstr);
1743 
1744 	/* For simplicity in the handlers */
1745 	sc->que_mask = IXGBE_EIMS_ENABLE_MASK;
1746 
1747 	return (0);
1748 }
1749 
1750 /*********************************************************************
1751  *
1752  *  Setup the MSI-X Interrupt handlers
1753  *
1754  **********************************************************************/
1755 int
1756 ixgbe_allocate_msix(struct ix_softc *sc)
1757 {
1758 	struct ixgbe_osdep	*os = &sc->osdep;
1759 	struct pci_attach_args	*pa  = &os->os_pa;
1760 	int                      i = 0, error = 0;
1761 	struct ix_queue         *que;
1762 	pci_intr_handle_t	ih;
1763 
1764 	for (i = 0, que = sc->queues; i < sc->num_queues; i++, que++) {
1765 		if (pci_intr_map_msix(pa, i, &ih)) {
1766 			printf("ixgbe_allocate_msix: "
1767 			    "pci_intr_map_msix vec %d failed\n", i);
1768 			error = ENOMEM;
1769 			goto fail;
1770 		}
1771 
1772 		que->tag = pci_intr_establish_cpu(pa->pa_pc, ih,
1773 		    IPL_NET | IPL_MPSAFE, intrmap_cpu(sc->sc_intrmap, i),
1774 		    ixgbe_queue_intr, que, que->name);
1775 		if (que->tag == NULL) {
1776 			printf("ixgbe_allocate_msix: "
1777 			    "pci_intr_establish vec %d failed\n", i);
1778 			error = ENOMEM;
1779 			goto fail;
1780 		}
1781 
1782 		que->msix = i;
1783 	}
1784 
1785 	/* Now the link status/control last MSI-X vector */
1786 	if (pci_intr_map_msix(pa, i, &ih)) {
1787 		printf("ixgbe_allocate_msix: "
1788 		    "pci_intr_map_msix link vector failed\n");
1789 		error = ENOMEM;
1790 		goto fail;
1791 	}
1792 
1793 	sc->tag = pci_intr_establish(pa->pa_pc, ih, IPL_NET | IPL_MPSAFE,
1794 	    ixgbe_link_intr, sc, sc->dev.dv_xname);
1795 	if (sc->tag == NULL) {
1796 		printf("ixgbe_allocate_msix: "
1797 		    "pci_intr_establish link vector failed\n");
1798 		error = ENOMEM;
1799 		goto fail;
1800 	}
1801 	sc->linkvec = i;
1802 	printf(", %s, %d queue%s", pci_intr_string(pa->pa_pc, ih),
1803 	    i, (i > 1) ? "s" : "");
1804 
1805 	return (0);
1806 fail:
1807 	for (que = sc->queues; i > 0; i--, que++) {
1808 		if (que->tag == NULL)
1809 			continue;
1810 		pci_intr_disestablish(pa->pa_pc, que->tag);
1811 		que->tag = NULL;
1812 	}
1813 
1814 	return (error);
1815 }
1816 
1817 void
1818 ixgbe_setup_msix(struct ix_softc *sc)
1819 {
1820 	struct ixgbe_osdep	*os = &sc->osdep;
1821 	struct pci_attach_args	*pa = &os->os_pa;
1822 	int			 nmsix;
1823 	unsigned int		 maxq;
1824 
1825 	if (!ixgbe_enable_msix)
1826 		return;
1827 
1828 	nmsix = pci_intr_msix_count(pa);
1829 	if (nmsix <= 1)
1830 		return;
1831 
1832 	/* give one vector to events */
1833 	nmsix--;
1834 
1835 	/* XXX the number of queues is limited to what we can keep stats on */
1836 	maxq = (sc->hw.mac.type == ixgbe_mac_82598EB) ? 8 : 16;
1837 
1838 	sc->sc_intrmap = intrmap_create(&sc->dev, nmsix, maxq, 0);
1839 	sc->num_queues = intrmap_count(sc->sc_intrmap);
1840 }
1841 
1842 int
1843 ixgbe_allocate_pci_resources(struct ix_softc *sc)
1844 {
1845 	struct ixgbe_osdep	*os = &sc->osdep;
1846 	struct pci_attach_args	*pa = &os->os_pa;
1847 	int			 val;
1848 
1849 	val = pci_conf_read(pa->pa_pc, pa->pa_tag, PCIR_BAR(0));
1850 	if (PCI_MAPREG_TYPE(val) != PCI_MAPREG_TYPE_MEM) {
1851 		printf(": mmba is not mem space\n");
1852 		return (ENXIO);
1853 	}
1854 
1855 	if (pci_mapreg_map(pa, PCIR_BAR(0), PCI_MAPREG_MEM_TYPE(val), 0,
1856 	    &os->os_memt, &os->os_memh, &os->os_membase, &os->os_memsize, 0)) {
1857 		printf(": cannot find mem space\n");
1858 		return (ENXIO);
1859 	}
1860 	sc->hw.hw_addr = (uint8_t *)os->os_membase;
1861 
1862 	/* Legacy defaults */
1863 	sc->num_queues = 1;
1864 	sc->hw.back = os;
1865 
1866 	/* Now setup MSI or MSI/X, return us the number of supported vectors. */
1867 	ixgbe_setup_msix(sc);
1868 
1869 	return (0);
1870 }
1871 
1872 void
1873 ixgbe_free_pci_resources(struct ix_softc * sc)
1874 {
1875 	struct ixgbe_osdep	*os = &sc->osdep;
1876 	struct pci_attach_args	*pa = &os->os_pa;
1877 	struct ix_queue *que = sc->queues;
1878 	int i;
1879 
1880 	/* Release all msix queue resources: */
1881 	for (i = 0; i < sc->num_queues; i++, que++) {
1882 		if (que->tag)
1883 			pci_intr_disestablish(pa->pa_pc, que->tag);
1884 		que->tag = NULL;
1885 	}
1886 
1887 	if (sc->tag)
1888 		pci_intr_disestablish(pa->pa_pc, sc->tag);
1889 	sc->tag = NULL;
1890 	if (os->os_membase != 0)
1891 		bus_space_unmap(os->os_memt, os->os_memh, os->os_memsize);
1892 	os->os_membase = 0;
1893 }
1894 
1895 /*********************************************************************
1896  *
1897  *  Setup networking device structure and register an interface.
1898  *
1899  **********************************************************************/
1900 void
1901 ixgbe_setup_interface(struct ix_softc *sc)
1902 {
1903 	struct ifnet   *ifp = &sc->arpcom.ac_if;
1904 	int i;
1905 
1906 	strlcpy(ifp->if_xname, sc->dev.dv_xname, IFNAMSIZ);
1907 	ifp->if_softc = sc;
1908 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
1909 	ifp->if_xflags = IFXF_MPSAFE;
1910 	ifp->if_ioctl = ixgbe_ioctl;
1911 	ifp->if_qstart = ixgbe_start;
1912 	ifp->if_timer = 0;
1913 	ifp->if_watchdog = ixgbe_watchdog;
1914 	ifp->if_hardmtu = IXGBE_MAX_FRAME_SIZE -
1915 	    ETHER_HDR_LEN - ETHER_CRC_LEN;
1916 	ifq_init_maxlen(&ifp->if_snd, sc->num_tx_desc - 1);
1917 
1918 	ifp->if_capabilities = IFCAP_VLAN_MTU;
1919 
1920 #if NVLAN > 0
1921 	ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING;
1922 #endif
1923 
1924 	ifp->if_capabilities |= IFCAP_CSUM_TCPv4 | IFCAP_CSUM_UDPv4;
1925 	ifp->if_capabilities |= IFCAP_CSUM_TCPv6 | IFCAP_CSUM_UDPv6;
1926 	ifp->if_capabilities |= IFCAP_CSUM_IPv4;
1927 
1928 	ifp->if_capabilities |= IFCAP_TSOv4 | IFCAP_TSOv6;
1929 	if (sc->hw.mac.type != ixgbe_mac_82598EB) {
1930 #ifndef __sparc64__
1931 		ifp->if_xflags |= IFXF_LRO;
1932 #endif
1933 		ifp->if_capabilities |= IFCAP_LRO;
1934 	}
1935 
1936 	/*
1937 	 * Specify the media types supported by this sc and register
1938 	 * callbacks to update media and link information
1939 	 */
1940 	ifmedia_init(&sc->media, IFM_IMASK, ixgbe_media_change,
1941 	    ixgbe_media_status);
1942 	ixgbe_add_media_types(sc);
1943 	ifmedia_set(&sc->media, IFM_ETHER | IFM_AUTO);
1944 
1945 	if_attach(ifp);
1946 	ether_ifattach(ifp);
1947 
1948 	if_attach_queues(ifp, sc->num_queues);
1949 	if_attach_iqueues(ifp, sc->num_queues);
1950 	for (i = 0; i < sc->num_queues; i++) {
1951 		struct ifqueue *ifq = ifp->if_ifqs[i];
1952 		struct ifiqueue *ifiq = ifp->if_iqs[i];
1953 		struct ix_txring *txr = &sc->tx_rings[i];
1954 		struct ix_rxring *rxr = &sc->rx_rings[i];
1955 
1956 		ifq->ifq_softc = txr;
1957 		txr->ifq = ifq;
1958 
1959 		ifiq->ifiq_softc = rxr;
1960 		rxr->ifiq = ifiq;
1961 
1962 #if NKSTAT > 0
1963 		ix_txq_kstats(sc, txr);
1964 		ix_rxq_kstats(sc, rxr);
1965 #endif
1966 	}
1967 
1968 	sc->max_frame_size = IXGBE_MAX_FRAME_SIZE;
1969 }
1970 
1971 void
1972 ixgbe_add_media_types(struct ix_softc *sc)
1973 {
1974 	struct ixgbe_hw	*hw = &sc->hw;
1975 	uint64_t layer;
1976 
1977 	sc->phy_layer = hw->mac.ops.get_supported_physical_layer(hw);
1978 	layer = sc->phy_layer;
1979 
1980 	if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_T)
1981 		ifmedia_add(&sc->media, IFM_ETHER | IFM_10G_T, 0, NULL);
1982 	if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_T)
1983 		ifmedia_add(&sc->media, IFM_ETHER | IFM_1000_T, 0, NULL);
1984 	if (layer & IXGBE_PHYSICAL_LAYER_100BASE_TX)
1985 		ifmedia_add(&sc->media, IFM_ETHER | IFM_100_TX, 0, NULL);
1986 	if (layer & IXGBE_PHYSICAL_LAYER_SFP_PLUS_CU ||
1987 	    layer & IXGBE_PHYSICAL_LAYER_SFP_ACTIVE_DA)
1988 		ifmedia_add(&sc->media, IFM_ETHER | IFM_10G_SFP_CU, 0, NULL);
1989 	if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_LR) {
1990 		ifmedia_add(&sc->media, IFM_ETHER | IFM_10G_LR, 0, NULL);
1991 		if (hw->phy.multispeed_fiber)
1992 			ifmedia_add(&sc->media, IFM_ETHER | IFM_1000_LX, 0,
1993 			    NULL);
1994 	}
1995 	if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_SR) {
1996 		ifmedia_add(&sc->media, IFM_ETHER | IFM_10G_SR, 0, NULL);
1997 		if (hw->phy.multispeed_fiber)
1998 			ifmedia_add(&sc->media, IFM_ETHER | IFM_1000_SX, 0,
1999 			    NULL);
2000 	} else if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_SX)
2001 		ifmedia_add(&sc->media, IFM_ETHER | IFM_1000_SX, 0, NULL);
2002 	if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_CX4)
2003 		ifmedia_add(&sc->media, IFM_ETHER | IFM_10G_CX4, 0, NULL);
2004 	if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_KR)
2005 		ifmedia_add(&sc->media, IFM_ETHER | IFM_10G_KR, 0, NULL);
2006 	if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_KX4)
2007 		ifmedia_add(&sc->media, IFM_ETHER | IFM_10G_KX4, 0, NULL);
2008 	if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_KX)
2009 		ifmedia_add(&sc->media, IFM_ETHER | IFM_1000_KX, 0, NULL);
2010 	if (layer & IXGBE_PHYSICAL_LAYER_2500BASE_KX)
2011 		ifmedia_add(&sc->media, IFM_ETHER | IFM_2500_KX, 0, NULL);
2012 
2013 	if (hw->device_id == IXGBE_DEV_ID_82598AT) {
2014 		ifmedia_add(&sc->media, IFM_ETHER | IFM_1000_T | IFM_FDX, 0,
2015 		    NULL);
2016 		ifmedia_add(&sc->media, IFM_ETHER | IFM_1000_T, 0, NULL);
2017 	}
2018 
2019 	ifmedia_add(&sc->media, IFM_ETHER | IFM_AUTO, 0, NULL);
2020 }
2021 
2022 void
2023 ixgbe_config_link(struct ix_softc *sc)
2024 {
2025 	uint32_t	autoneg, err = 0;
2026 	bool		negotiate;
2027 
2028 	if (ixgbe_is_sfp(&sc->hw)) {
2029 		if (sc->hw.phy.multispeed_fiber) {
2030 			sc->hw.mac.ops.setup_sfp(&sc->hw);
2031 			if (sc->hw.mac.ops.enable_tx_laser)
2032 				sc->hw.mac.ops.enable_tx_laser(&sc->hw);
2033 			ixgbe_handle_msf(sc);
2034 		} else
2035 			ixgbe_handle_mod(sc);
2036 	} else {
2037 		if (sc->hw.mac.ops.check_link)
2038 			err = sc->hw.mac.ops.check_link(&sc->hw, &autoneg,
2039 			    &sc->link_up, FALSE);
2040 		if (err)
2041 			return;
2042 		autoneg = sc->hw.phy.autoneg_advertised;
2043 		if ((!autoneg) && (sc->hw.mac.ops.get_link_capabilities))
2044 			err = sc->hw.mac.ops.get_link_capabilities(&sc->hw,
2045 			    &autoneg, &negotiate);
2046 		if (err)
2047 			return;
2048 		if (sc->hw.mac.ops.setup_link)
2049 			sc->hw.mac.ops.setup_link(&sc->hw,
2050 			    autoneg, sc->link_up);
2051 	}
2052 }
2053 
2054 /********************************************************************
2055  * Manage DMA'able memory.
2056   *******************************************************************/
2057 int
2058 ixgbe_dma_malloc(struct ix_softc *sc, bus_size_t size,
2059 		struct ixgbe_dma_alloc *dma, int mapflags)
2060 {
2061 	struct ifnet		*ifp = &sc->arpcom.ac_if;
2062 	struct ixgbe_osdep	*os = &sc->osdep;
2063 	int			 r;
2064 
2065 	dma->dma_tag = os->os_pa.pa_dmat;
2066 	r = bus_dmamap_create(dma->dma_tag, size, 1,
2067 	    size, 0, BUS_DMA_NOWAIT, &dma->dma_map);
2068 	if (r != 0) {
2069 		printf("%s: ixgbe_dma_malloc: bus_dmamap_create failed; "
2070 		       "error %u\n", ifp->if_xname, r);
2071 		goto fail_0;
2072 	}
2073 
2074 	r = bus_dmamem_alloc(dma->dma_tag, size, PAGE_SIZE, 0, &dma->dma_seg,
2075 	    1, &dma->dma_nseg, BUS_DMA_NOWAIT);
2076 	if (r != 0) {
2077 		printf("%s: ixgbe_dma_malloc: bus_dmamem_alloc failed; "
2078 		       "error %u\n", ifp->if_xname, r);
2079 		goto fail_1;
2080 	}
2081 
2082 	r = bus_dmamem_map(dma->dma_tag, &dma->dma_seg, dma->dma_nseg, size,
2083 	    &dma->dma_vaddr, BUS_DMA_NOWAIT);
2084 	if (r != 0) {
2085 		printf("%s: ixgbe_dma_malloc: bus_dmamem_map failed; "
2086 		       "error %u\n", ifp->if_xname, r);
2087 		goto fail_2;
2088 	}
2089 
2090 	r = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
2091 	    size, NULL, mapflags | BUS_DMA_NOWAIT);
2092 	if (r != 0) {
2093 		printf("%s: ixgbe_dma_malloc: bus_dmamap_load failed; "
2094 		       "error %u\n", ifp->if_xname, r);
2095 		goto fail_3;
2096 	}
2097 
2098 	dma->dma_size = size;
2099 	return (0);
2100 fail_3:
2101 	bus_dmamem_unmap(dma->dma_tag, dma->dma_vaddr, size);
2102 fail_2:
2103 	bus_dmamem_free(dma->dma_tag, &dma->dma_seg, dma->dma_nseg);
2104 fail_1:
2105 	bus_dmamap_destroy(dma->dma_tag, dma->dma_map);
2106 fail_0:
2107 	dma->dma_map = NULL;
2108 	dma->dma_tag = NULL;
2109 	return (r);
2110 }
2111 
2112 void
2113 ixgbe_dma_free(struct ix_softc *sc, struct ixgbe_dma_alloc *dma)
2114 {
2115 	if (dma->dma_tag == NULL)
2116 		return;
2117 
2118 	if (dma->dma_map != NULL) {
2119 		bus_dmamap_sync(dma->dma_tag, dma->dma_map, 0,
2120 		    dma->dma_map->dm_mapsize,
2121 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2122 		bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2123 		bus_dmamem_unmap(dma->dma_tag, dma->dma_vaddr, dma->dma_size);
2124 		bus_dmamem_free(dma->dma_tag, &dma->dma_seg, dma->dma_nseg);
2125 		bus_dmamap_destroy(dma->dma_tag, dma->dma_map);
2126 		dma->dma_map = NULL;
2127 	}
2128 }
2129 
2130 
2131 /*********************************************************************
2132  *
2133  *  Allocate memory for the transmit and receive rings, and then
2134  *  the descriptors associated with each, called only once at attach.
2135  *
2136  **********************************************************************/
2137 int
2138 ixgbe_allocate_queues(struct ix_softc *sc)
2139 {
2140 	struct ifnet	*ifp = &sc->arpcom.ac_if;
2141 	struct ix_queue *que;
2142 	struct ix_txring *txr;
2143 	struct ix_rxring *rxr;
2144 	int rsize, tsize;
2145 	int txconf = 0, rxconf = 0, i;
2146 
2147 	/* First allocate the top level queue structs */
2148 	if (!(sc->queues = mallocarray(sc->num_queues,
2149 	    sizeof(struct ix_queue), M_DEVBUF, M_NOWAIT | M_ZERO))) {
2150 		printf("%s: Unable to allocate queue memory\n", ifp->if_xname);
2151 		goto fail;
2152 	}
2153 
2154 	/* Then allocate the TX ring struct memory */
2155 	if (!(sc->tx_rings = mallocarray(sc->num_queues,
2156 	    sizeof(struct ix_txring), M_DEVBUF, M_NOWAIT | M_ZERO))) {
2157 		printf("%s: Unable to allocate TX ring memory\n", ifp->if_xname);
2158 		goto fail;
2159 	}
2160 
2161 	/* Next allocate the RX */
2162 	if (!(sc->rx_rings = mallocarray(sc->num_queues,
2163 	    sizeof(struct ix_rxring), M_DEVBUF, M_NOWAIT | M_ZERO))) {
2164 		printf("%s: Unable to allocate RX ring memory\n", ifp->if_xname);
2165 		goto rx_fail;
2166 	}
2167 
2168 	/* For the ring itself */
2169 	tsize = roundup2(sc->num_tx_desc *
2170 	    sizeof(union ixgbe_adv_tx_desc), DBA_ALIGN);
2171 
2172 	/*
2173 	 * Now set up the TX queues, txconf is needed to handle the
2174 	 * possibility that things fail midcourse and we need to
2175 	 * undo memory gracefully
2176 	 */
2177 	for (i = 0; i < sc->num_queues; i++, txconf++) {
2178 		/* Set up some basics */
2179 		txr = &sc->tx_rings[i];
2180 		txr->sc = sc;
2181 		txr->me = i;
2182 
2183 		if (ixgbe_dma_malloc(sc, tsize,
2184 		    &txr->txdma, BUS_DMA_NOWAIT)) {
2185 			printf("%s: Unable to allocate TX Descriptor memory\n",
2186 			    ifp->if_xname);
2187 			goto err_tx_desc;
2188 		}
2189 		txr->tx_base = (union ixgbe_adv_tx_desc *)txr->txdma.dma_vaddr;
2190 		bzero((void *)txr->tx_base, tsize);
2191 	}
2192 
2193 	/*
2194 	 * Next the RX queues...
2195 	 */
2196 	rsize = roundup2(sc->num_rx_desc *
2197 	    sizeof(union ixgbe_adv_rx_desc), 4096);
2198 	for (i = 0; i < sc->num_queues; i++, rxconf++) {
2199 		rxr = &sc->rx_rings[i];
2200 		/* Set up some basics */
2201 		rxr->sc = sc;
2202 		rxr->me = i;
2203 		timeout_set(&rxr->rx_refill, ixgbe_rxrefill, rxr);
2204 
2205 		if (ixgbe_dma_malloc(sc, rsize,
2206 			&rxr->rxdma, BUS_DMA_NOWAIT)) {
2207 			printf("%s: Unable to allocate RxDescriptor memory\n",
2208 			    ifp->if_xname);
2209 			goto err_rx_desc;
2210 		}
2211 		rxr->rx_base = (union ixgbe_adv_rx_desc *)rxr->rxdma.dma_vaddr;
2212 		bzero((void *)rxr->rx_base, rsize);
2213 	}
2214 
2215 	/*
2216 	 * Finally set up the queue holding structs
2217 	 */
2218 	for (i = 0; i < sc->num_queues; i++) {
2219 		que = &sc->queues[i];
2220 		que->sc = sc;
2221 		que->txr = &sc->tx_rings[i];
2222 		que->rxr = &sc->rx_rings[i];
2223 		snprintf(que->name, sizeof(que->name), "%s:%d",
2224 		    sc->dev.dv_xname, i);
2225 	}
2226 
2227 	return (0);
2228 
2229 err_rx_desc:
2230 	for (rxr = sc->rx_rings; rxconf > 0; rxr++, rxconf--)
2231 		ixgbe_dma_free(sc, &rxr->rxdma);
2232 err_tx_desc:
2233 	for (txr = sc->tx_rings; txconf > 0; txr++, txconf--)
2234 		ixgbe_dma_free(sc, &txr->txdma);
2235 	free(sc->rx_rings, M_DEVBUF, sc->num_queues * sizeof(struct ix_rxring));
2236 	sc->rx_rings = NULL;
2237 rx_fail:
2238 	free(sc->tx_rings, M_DEVBUF, sc->num_queues * sizeof(struct ix_txring));
2239 	sc->tx_rings = NULL;
2240 fail:
2241 	return (ENOMEM);
2242 }
2243 
2244 /*********************************************************************
2245  *
2246  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
2247  *  the information needed to transmit a packet on the wire. This is
2248  *  called only once at attach, setup is done every reset.
2249  *
2250  **********************************************************************/
2251 int
2252 ixgbe_allocate_transmit_buffers(struct ix_txring *txr)
2253 {
2254 	struct ix_softc 	*sc = txr->sc;
2255 	struct ifnet		*ifp = &sc->arpcom.ac_if;
2256 	struct ixgbe_tx_buf	*txbuf;
2257 	int			 error, i;
2258 
2259 	if (!(txr->tx_buffers = mallocarray(sc->num_tx_desc,
2260 	    sizeof(struct ixgbe_tx_buf), M_DEVBUF, M_NOWAIT | M_ZERO))) {
2261 		printf("%s: Unable to allocate tx_buffer memory\n",
2262 		    ifp->if_xname);
2263 		error = ENOMEM;
2264 		goto fail;
2265 	}
2266 	txr->txtag = txr->txdma.dma_tag;
2267 
2268 	/* Create the descriptor buffer dma maps */
2269 	for (i = 0; i < sc->num_tx_desc; i++) {
2270 		txbuf = &txr->tx_buffers[i];
2271 		error = bus_dmamap_create(txr->txdma.dma_tag, MAXMCLBYTES,
2272 			    sc->num_segs, PAGE_SIZE, 0,
2273 			    BUS_DMA_NOWAIT, &txbuf->map);
2274 
2275 		if (error != 0) {
2276 			printf("%s: Unable to create TX DMA map\n",
2277 			    ifp->if_xname);
2278 			goto fail;
2279 		}
2280 	}
2281 
2282 	return 0;
2283 fail:
2284 	return (error);
2285 }
2286 
2287 /*********************************************************************
2288  *
2289  *  Initialize a transmit ring.
2290  *
2291  **********************************************************************/
2292 int
2293 ixgbe_setup_transmit_ring(struct ix_txring *txr)
2294 {
2295 	struct ix_softc		*sc = txr->sc;
2296 	int			 error;
2297 
2298 	/* Now allocate transmit buffers for the ring */
2299 	if ((error = ixgbe_allocate_transmit_buffers(txr)) != 0)
2300 		return (error);
2301 
2302 	/* Clear the old ring contents */
2303 	bzero((void *)txr->tx_base,
2304 	      (sizeof(union ixgbe_adv_tx_desc)) * sc->num_tx_desc);
2305 
2306 	/* Reset indices */
2307 	txr->next_avail_desc = 0;
2308 	txr->next_to_clean = 0;
2309 
2310 	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
2311 	    0, txr->txdma.dma_map->dm_mapsize,
2312 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2313 
2314 	return (0);
2315 }
2316 
2317 /*********************************************************************
2318  *
2319  *  Initialize all transmit rings.
2320  *
2321  **********************************************************************/
2322 int
2323 ixgbe_setup_transmit_structures(struct ix_softc *sc)
2324 {
2325 	struct ix_txring *txr = sc->tx_rings;
2326 	int		i, error;
2327 
2328 	for (i = 0; i < sc->num_queues; i++, txr++) {
2329 		if ((error = ixgbe_setup_transmit_ring(txr)) != 0)
2330 			goto fail;
2331 	}
2332 
2333 	return (0);
2334 fail:
2335 	ixgbe_free_transmit_structures(sc);
2336 	return (error);
2337 }
2338 
2339 /*********************************************************************
2340  *
2341  *  Enable transmit unit.
2342  *
2343  **********************************************************************/
2344 void
2345 ixgbe_initialize_transmit_units(struct ix_softc *sc)
2346 {
2347 	struct ifnet	*ifp = &sc->arpcom.ac_if;
2348 	struct ix_txring	*txr;
2349 	struct ixgbe_hw	*hw = &sc->hw;
2350 	int		 i;
2351 	uint64_t	 tdba;
2352 	uint32_t	 txctrl;
2353 	uint32_t	 hlreg;
2354 
2355 	/* Setup the Base and Length of the Tx Descriptor Ring */
2356 
2357 	for (i = 0; i < sc->num_queues; i++) {
2358 		txr = &sc->tx_rings[i];
2359 
2360 		/* Setup descriptor base address */
2361 		tdba = txr->txdma.dma_map->dm_segs[0].ds_addr;
2362 		IXGBE_WRITE_REG(hw, IXGBE_TDBAL(i),
2363 		       (tdba & 0x00000000ffffffffULL));
2364 		IXGBE_WRITE_REG(hw, IXGBE_TDBAH(i), (tdba >> 32));
2365 		IXGBE_WRITE_REG(hw, IXGBE_TDLEN(i),
2366 		    sc->num_tx_desc * sizeof(struct ixgbe_legacy_tx_desc));
2367 
2368 		/* Setup the HW Tx Head and Tail descriptor pointers */
2369 		IXGBE_WRITE_REG(hw, IXGBE_TDH(i), 0);
2370 		IXGBE_WRITE_REG(hw, IXGBE_TDT(i), 0);
2371 
2372 		/* Setup Transmit Descriptor Cmd Settings */
2373 		txr->txd_cmd = IXGBE_TXD_CMD_IFCS;
2374 		txr->queue_status = IXGBE_QUEUE_IDLE;
2375 		txr->watchdog_timer = 0;
2376 
2377 		/* Disable Head Writeback */
2378 		switch (hw->mac.type) {
2379 		case ixgbe_mac_82598EB:
2380 			txctrl = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL(i));
2381 			break;
2382 		case ixgbe_mac_82599EB:
2383 		case ixgbe_mac_X540:
2384 		default:
2385 			txctrl = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL_82599(i));
2386 			break;
2387 		}
2388 		txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
2389 		switch (hw->mac.type) {
2390 		case ixgbe_mac_82598EB:
2391 			IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL(i), txctrl);
2392 			break;
2393 		case ixgbe_mac_82599EB:
2394 		case ixgbe_mac_X540:
2395 		default:
2396 			IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL_82599(i), txctrl);
2397 			break;
2398 		}
2399 	}
2400 	ifp->if_timer = 0;
2401 
2402 	if (hw->mac.type != ixgbe_mac_82598EB) {
2403 		uint32_t dmatxctl, rttdcs;
2404 		dmatxctl = IXGBE_READ_REG(hw, IXGBE_DMATXCTL);
2405 		dmatxctl |= IXGBE_DMATXCTL_TE;
2406 		IXGBE_WRITE_REG(hw, IXGBE_DMATXCTL, dmatxctl);
2407 		/* Disable arbiter to set MTQC */
2408 		rttdcs = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
2409 		rttdcs |= IXGBE_RTTDCS_ARBDIS;
2410 		IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
2411 		IXGBE_WRITE_REG(hw, IXGBE_MTQC, IXGBE_MTQC_64Q_1PB);
2412 		rttdcs &= ~IXGBE_RTTDCS_ARBDIS;
2413 		IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
2414 	}
2415 
2416 	/* Enable TCP/UDP padding when using TSO */
2417 	hlreg = IXGBE_READ_REG(hw, IXGBE_HLREG0);
2418 	hlreg |= IXGBE_HLREG0_TXPADEN;
2419 	IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg);
2420 }
2421 
2422 /*********************************************************************
2423  *
2424  *  Free all transmit rings.
2425  *
2426  **********************************************************************/
2427 void
2428 ixgbe_free_transmit_structures(struct ix_softc *sc)
2429 {
2430 	struct ix_txring *txr = sc->tx_rings;
2431 	int		i;
2432 
2433 	for (i = 0; i < sc->num_queues; i++, txr++)
2434 		ixgbe_free_transmit_buffers(txr);
2435 }
2436 
2437 /*********************************************************************
2438  *
2439  *  Free transmit ring related data structures.
2440  *
2441  **********************************************************************/
2442 void
2443 ixgbe_free_transmit_buffers(struct ix_txring *txr)
2444 {
2445 	struct ix_softc *sc = txr->sc;
2446 	struct ixgbe_tx_buf *tx_buffer;
2447 	int             i;
2448 
2449 	INIT_DEBUGOUT("free_transmit_ring: begin");
2450 
2451 	if (txr->tx_buffers == NULL)
2452 		return;
2453 
2454 	tx_buffer = txr->tx_buffers;
2455 	for (i = 0; i < sc->num_tx_desc; i++, tx_buffer++) {
2456 		if (tx_buffer->map != NULL && tx_buffer->map->dm_nsegs > 0) {
2457 			bus_dmamap_sync(txr->txdma.dma_tag, tx_buffer->map,
2458 			    0, tx_buffer->map->dm_mapsize,
2459 			    BUS_DMASYNC_POSTWRITE);
2460 			bus_dmamap_unload(txr->txdma.dma_tag,
2461 			    tx_buffer->map);
2462 		}
2463 		if (tx_buffer->m_head != NULL) {
2464 			m_freem(tx_buffer->m_head);
2465 			tx_buffer->m_head = NULL;
2466 		}
2467 		if (tx_buffer->map != NULL) {
2468 			bus_dmamap_destroy(txr->txdma.dma_tag,
2469 			    tx_buffer->map);
2470 			tx_buffer->map = NULL;
2471 		}
2472 	}
2473 
2474 	if (txr->tx_buffers != NULL)
2475 		free(txr->tx_buffers, M_DEVBUF,
2476 		    sc->num_tx_desc * sizeof(struct ixgbe_tx_buf));
2477 	txr->tx_buffers = NULL;
2478 	txr->txtag = NULL;
2479 }
2480 
2481 /*********************************************************************
2482  *
2483  *  Advanced Context Descriptor setup for VLAN or CSUM
2484  *
2485  **********************************************************************/
2486 
2487 static inline int
2488 ixgbe_tx_offload(struct mbuf *mp, uint32_t *vlan_macip_lens,
2489     uint32_t *type_tucmd_mlhl, uint32_t *olinfo_status, uint32_t *cmd_type_len,
2490     uint32_t *mss_l4len_idx)
2491 {
2492 	struct ether_extracted ext;
2493 	int offload = 0;
2494 
2495 	ether_extract_headers(mp, &ext);
2496 
2497 	*vlan_macip_lens |= (sizeof(*ext.eh) << IXGBE_ADVTXD_MACLEN_SHIFT);
2498 
2499 	if (ext.ip4) {
2500 		if (ISSET(mp->m_pkthdr.csum_flags, M_IPV4_CSUM_OUT)) {
2501 			*olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
2502 			offload = 1;
2503 		}
2504 
2505 		*type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
2506 #ifdef INET6
2507 	} else if (ext.ip6) {
2508 		*type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
2509 #endif
2510 	} else {
2511 		if (mp->m_pkthdr.csum_flags & M_TCP_TSO)
2512 			tcpstat_inc(tcps_outbadtso);
2513 		return offload;
2514 	}
2515 
2516 	*vlan_macip_lens |= ext.iphlen;
2517 
2518 	if (ext.tcp) {
2519 		*type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
2520 		if (ISSET(mp->m_pkthdr.csum_flags, M_TCP_CSUM_OUT)) {
2521 			*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
2522 			offload = 1;
2523 		}
2524 	} else if (ext.udp) {
2525 		*type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP;
2526 		if (ISSET(mp->m_pkthdr.csum_flags, M_UDP_CSUM_OUT)) {
2527 			*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
2528 			offload = 1;
2529 		}
2530 	}
2531 
2532 	if (mp->m_pkthdr.csum_flags & M_TCP_TSO) {
2533 		if (ext.tcp && mp->m_pkthdr.ph_mss > 0) {
2534 			uint32_t hdrlen, thlen, paylen, outlen;
2535 
2536 			thlen = ext.tcphlen;
2537 
2538 			outlen = mp->m_pkthdr.ph_mss;
2539 			*mss_l4len_idx |= outlen << IXGBE_ADVTXD_MSS_SHIFT;
2540 			*mss_l4len_idx |= thlen << IXGBE_ADVTXD_L4LEN_SHIFT;
2541 
2542 			hdrlen = sizeof(*ext.eh) + ext.iphlen + thlen;
2543 			paylen = mp->m_pkthdr.len - hdrlen;
2544 			CLR(*olinfo_status, IXGBE_ADVTXD_PAYLEN_MASK
2545 			    << IXGBE_ADVTXD_PAYLEN_SHIFT);
2546 			*olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT;
2547 
2548 			*cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
2549 			offload = 1;
2550 
2551 			tcpstat_add(tcps_outpkttso,
2552 			    (paylen + outlen - 1) / outlen);
2553 		} else
2554 			tcpstat_inc(tcps_outbadtso);
2555 	}
2556 
2557 	return offload;
2558 }
2559 
2560 static int
2561 ixgbe_tx_ctx_setup(struct ix_txring *txr, struct mbuf *mp,
2562     uint32_t *cmd_type_len, uint32_t *olinfo_status)
2563 {
2564 	struct ixgbe_adv_tx_context_desc *TXD;
2565 	struct ixgbe_tx_buf *tx_buffer;
2566 	uint32_t vlan_macip_lens = 0, type_tucmd_mlhl = 0;
2567 	uint32_t mss_l4len_idx = 0;
2568 	int	ctxd = txr->next_avail_desc;
2569 	int	offload = 0;
2570 
2571 	/* Indicate the whole packet as payload when not doing TSO */
2572 	*olinfo_status |= mp->m_pkthdr.len << IXGBE_ADVTXD_PAYLEN_SHIFT;
2573 
2574 #if NVLAN > 0
2575 	if (ISSET(mp->m_flags, M_VLANTAG)) {
2576 		uint32_t vtag = mp->m_pkthdr.ether_vtag;
2577 		vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
2578 		*cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE;
2579 		offload |= 1;
2580 	}
2581 #endif
2582 
2583 	offload |= ixgbe_tx_offload(mp, &vlan_macip_lens, &type_tucmd_mlhl,
2584 	    olinfo_status, cmd_type_len, &mss_l4len_idx);
2585 
2586 	if (!offload)
2587 		return (0);
2588 
2589 	TXD = (struct ixgbe_adv_tx_context_desc *)&txr->tx_base[ctxd];
2590 	tx_buffer = &txr->tx_buffers[ctxd];
2591 
2592 	type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
2593 
2594 	/* Now copy bits into descriptor */
2595 	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
2596 	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
2597 	TXD->seqnum_seed = htole32(0);
2598 	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
2599 
2600 	tx_buffer->m_head = NULL;
2601 	tx_buffer->eop_index = -1;
2602 
2603 	return (1);
2604 }
2605 
2606 /**********************************************************************
2607  *
2608  *  Examine each tx_buffer in the used queue. If the hardware is done
2609  *  processing the packet then free associated resources. The
2610  *  tx_buffer is put back on the free queue.
2611  *
2612  **********************************************************************/
2613 int
2614 ixgbe_txeof(struct ix_txring *txr)
2615 {
2616 	struct ix_softc			*sc = txr->sc;
2617 	struct ifqueue			*ifq = txr->ifq;
2618 	struct ifnet			*ifp = &sc->arpcom.ac_if;
2619 	unsigned int			 head, tail, last;
2620 	struct ixgbe_tx_buf		*tx_buffer;
2621 	struct ixgbe_legacy_tx_desc	*tx_desc;
2622 
2623 	if (!ISSET(ifp->if_flags, IFF_RUNNING))
2624 		return FALSE;
2625 
2626 	head = txr->next_avail_desc;
2627 	tail = txr->next_to_clean;
2628 
2629 	membar_consumer();
2630 
2631 	if (head == tail)
2632 		return (FALSE);
2633 
2634 	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
2635 	    0, txr->txdma.dma_map->dm_mapsize,
2636 	    BUS_DMASYNC_POSTREAD);
2637 
2638 	for (;;) {
2639 		tx_buffer = &txr->tx_buffers[tail];
2640 		last = tx_buffer->eop_index;
2641 		tx_desc = (struct ixgbe_legacy_tx_desc *)&txr->tx_base[last];
2642 
2643 		if (!ISSET(tx_desc->upper.fields.status, IXGBE_TXD_STAT_DD))
2644 			break;
2645 
2646 		bus_dmamap_sync(txr->txdma.dma_tag, tx_buffer->map,
2647 		    0, tx_buffer->map->dm_mapsize, BUS_DMASYNC_POSTWRITE);
2648 		bus_dmamap_unload(txr->txdma.dma_tag, tx_buffer->map);
2649 		m_freem(tx_buffer->m_head);
2650 
2651 		tx_buffer->m_head = NULL;
2652 		tx_buffer->eop_index = -1;
2653 
2654 		tail = last + 1;
2655 		if (tail == sc->num_tx_desc)
2656 			tail = 0;
2657 		if (head == tail) {
2658 			/* All clean, turn off the timer */
2659 			ifp->if_timer = 0;
2660 			break;
2661 		}
2662 	}
2663 
2664 	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
2665 	    0, txr->txdma.dma_map->dm_mapsize,
2666 	    BUS_DMASYNC_PREREAD);
2667 
2668 	membar_producer();
2669 
2670 	txr->next_to_clean = tail;
2671 
2672 	if (ifq_is_oactive(ifq))
2673 		ifq_restart(ifq);
2674 
2675 	return TRUE;
2676 }
2677 
2678 /*********************************************************************
2679  *
2680  *  Get a buffer from system mbuf buffer pool.
2681  *
2682  **********************************************************************/
2683 int
2684 ixgbe_get_buf(struct ix_rxring *rxr, int i)
2685 {
2686 	struct ix_softc		*sc = rxr->sc;
2687 	struct ixgbe_rx_buf	*rxbuf;
2688 	struct mbuf		*mp;
2689 	int			error;
2690 	union ixgbe_adv_rx_desc	*rxdesc;
2691 
2692 	rxbuf = &rxr->rx_buffers[i];
2693 	rxdesc = &rxr->rx_base[i];
2694 	if (rxbuf->buf) {
2695 		printf("%s: ixgbe_get_buf: slot %d already has an mbuf\n",
2696 		    sc->dev.dv_xname, i);
2697 		return (ENOBUFS);
2698 	}
2699 
2700 	/* needed in any case so preallocate since this one will fail for sure */
2701 	mp = MCLGETL(NULL, M_DONTWAIT, sc->rx_mbuf_sz);
2702 	if (!mp)
2703 		return (ENOBUFS);
2704 
2705 	mp->m_data += (mp->m_ext.ext_size - sc->rx_mbuf_sz);
2706 	mp->m_len = mp->m_pkthdr.len = sc->rx_mbuf_sz;
2707 
2708 	error = bus_dmamap_load_mbuf(rxr->rxdma.dma_tag, rxbuf->map,
2709 	    mp, BUS_DMA_NOWAIT);
2710 	if (error) {
2711 		m_freem(mp);
2712 		return (error);
2713 	}
2714 
2715 	bus_dmamap_sync(rxr->rxdma.dma_tag, rxbuf->map,
2716 	    0, rxbuf->map->dm_mapsize, BUS_DMASYNC_PREREAD);
2717 	rxbuf->buf = mp;
2718 
2719 	rxdesc->read.pkt_addr = htole64(rxbuf->map->dm_segs[0].ds_addr);
2720 
2721 	return (0);
2722 }
2723 
2724 /*********************************************************************
2725  *
2726  *  Allocate memory for rx_buffer structures. Since we use one
2727  *  rx_buffer per received packet, the maximum number of rx_buffer's
2728  *  that we'll need is equal to the number of receive descriptors
2729  *  that we've allocated.
2730  *
2731  **********************************************************************/
2732 int
2733 ixgbe_allocate_receive_buffers(struct ix_rxring *rxr)
2734 {
2735 	struct ix_softc		*sc = rxr->sc;
2736 	struct ifnet		*ifp = &sc->arpcom.ac_if;
2737 	struct ixgbe_rx_buf 	*rxbuf;
2738 	int			i, error;
2739 
2740 	if (!(rxr->rx_buffers = mallocarray(sc->num_rx_desc,
2741 	    sizeof(struct ixgbe_rx_buf), M_DEVBUF, M_NOWAIT | M_ZERO))) {
2742 		printf("%s: Unable to allocate rx_buffer memory\n",
2743 		    ifp->if_xname);
2744 		error = ENOMEM;
2745 		goto fail;
2746 	}
2747 
2748 	rxbuf = rxr->rx_buffers;
2749 	for (i = 0; i < sc->num_rx_desc; i++, rxbuf++) {
2750 		error = bus_dmamap_create(rxr->rxdma.dma_tag, 16 * 1024, 1,
2751 		    16 * 1024, 0, BUS_DMA_NOWAIT, &rxbuf->map);
2752 		if (error) {
2753 			printf("%s: Unable to create Pack DMA map\n",
2754 			    ifp->if_xname);
2755 			goto fail;
2756 		}
2757 	}
2758 	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map, 0,
2759 	    rxr->rxdma.dma_map->dm_mapsize,
2760 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2761 
2762 	return (0);
2763 
2764 fail:
2765 	return (error);
2766 }
2767 
2768 /*********************************************************************
2769  *
2770  *  Initialize a receive ring and its buffers.
2771  *
2772  **********************************************************************/
2773 int
2774 ixgbe_setup_receive_ring(struct ix_rxring *rxr)
2775 {
2776 	struct ix_softc		*sc = rxr->sc;
2777 	struct ifnet		*ifp = &sc->arpcom.ac_if;
2778 	int			 rsize, error;
2779 
2780 	rsize = roundup2(sc->num_rx_desc *
2781 	    sizeof(union ixgbe_adv_rx_desc), 4096);
2782 	/* Clear the ring contents */
2783 	bzero((void *)rxr->rx_base, rsize);
2784 
2785 	if ((error = ixgbe_allocate_receive_buffers(rxr)) != 0)
2786 		return (error);
2787 
2788 	/* Setup our descriptor indices */
2789 	rxr->next_to_check = 0;
2790 	rxr->last_desc_filled = sc->num_rx_desc - 1;
2791 
2792 	if_rxr_init(&rxr->rx_ring, 2 * ((ifp->if_hardmtu / MCLBYTES) + 1),
2793 	    sc->num_rx_desc - 1);
2794 
2795 	ixgbe_rxfill(rxr);
2796 	if (if_rxr_inuse(&rxr->rx_ring) == 0) {
2797 		printf("%s: unable to fill any rx descriptors\n",
2798 		    sc->dev.dv_xname);
2799 		return (ENOBUFS);
2800 	}
2801 
2802 	return (0);
2803 }
2804 
2805 int
2806 ixgbe_rxfill(struct ix_rxring *rxr)
2807 {
2808 	struct ix_softc *sc = rxr->sc;
2809 	int		 post = 0;
2810 	u_int		 slots;
2811 	int		 i;
2812 
2813 	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
2814 	    0, rxr->rxdma.dma_map->dm_mapsize,
2815 	    BUS_DMASYNC_POSTWRITE);
2816 
2817 	i = rxr->last_desc_filled;
2818 	for (slots = if_rxr_get(&rxr->rx_ring, sc->num_rx_desc);
2819 	    slots > 0; slots--) {
2820 		if (++i == sc->num_rx_desc)
2821 			i = 0;
2822 
2823 		if (ixgbe_get_buf(rxr, i) != 0)
2824 			break;
2825 
2826 		rxr->last_desc_filled = i;
2827 		post = 1;
2828 	}
2829 
2830 	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
2831 	    0, rxr->rxdma.dma_map->dm_mapsize,
2832 	    BUS_DMASYNC_PREWRITE);
2833 
2834 	if_rxr_put(&rxr->rx_ring, slots);
2835 
2836 	return (post);
2837 }
2838 
2839 void
2840 ixgbe_rxrefill(void *xrxr)
2841 {
2842 	struct ix_rxring *rxr = xrxr;
2843 	struct ix_softc *sc = rxr->sc;
2844 
2845 	if (ixgbe_rxfill(rxr)) {
2846 		/* Advance the Rx Queue "Tail Pointer" */
2847 		IXGBE_WRITE_REG(&sc->hw, IXGBE_RDT(rxr->me),
2848 		    rxr->last_desc_filled);
2849 	} else if (if_rxr_inuse(&rxr->rx_ring) == 0)
2850 		timeout_add(&rxr->rx_refill, 1);
2851 
2852 }
2853 
2854 /*********************************************************************
2855  *
2856  *  Initialize all receive rings.
2857  *
2858  **********************************************************************/
2859 int
2860 ixgbe_setup_receive_structures(struct ix_softc *sc)
2861 {
2862 	struct ix_rxring *rxr = sc->rx_rings;
2863 	int i;
2864 
2865 	for (i = 0; i < sc->num_queues; i++, rxr++)
2866 		if (ixgbe_setup_receive_ring(rxr))
2867 			goto fail;
2868 
2869 	return (0);
2870 fail:
2871 	ixgbe_free_receive_structures(sc);
2872 	return (ENOBUFS);
2873 }
2874 
2875 /*********************************************************************
2876  *
2877  *  Setup receive registers and features.
2878  *
2879  **********************************************************************/
2880 #define IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT 2
2881 
2882 void
2883 ixgbe_initialize_receive_units(struct ix_softc *sc)
2884 {
2885 	struct ifnet	*ifp = &sc->arpcom.ac_if;
2886 	struct ix_rxring	*rxr = sc->rx_rings;
2887 	struct ixgbe_hw	*hw = &sc->hw;
2888 	uint32_t	bufsz, fctrl, srrctl, rxcsum, rdrxctl;
2889 	uint32_t	hlreg;
2890 	int		i;
2891 
2892 	/*
2893 	 * Make sure receives are disabled while
2894 	 * setting up the descriptor ring
2895 	 */
2896 	ixgbe_disable_rx(hw);
2897 
2898 	/* Enable broadcasts */
2899 	fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL);
2900 	fctrl |= IXGBE_FCTRL_BAM;
2901 	if (sc->hw.mac.type == ixgbe_mac_82598EB) {
2902 		fctrl |= IXGBE_FCTRL_DPF;
2903 		fctrl |= IXGBE_FCTRL_PMCF;
2904 	}
2905 	IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl);
2906 
2907 	hlreg = IXGBE_READ_REG(hw, IXGBE_HLREG0);
2908 	/* Always enable jumbo frame reception */
2909 	hlreg |= IXGBE_HLREG0_JUMBOEN;
2910 	/* Always enable CRC stripping */
2911 	hlreg |= IXGBE_HLREG0_RXCRCSTRP;
2912 	IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg);
2913 
2914 	if (ISSET(ifp->if_xflags, IFXF_LRO)) {
2915 		rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
2916 
2917 		/* This field has to be set to zero. */
2918 		rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
2919 
2920 		/* RSC Coalescing on ACK Change */
2921 		rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
2922 		rdrxctl |= IXGBE_RDRXCTL_FCOE_WRFIX;
2923 
2924 		IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
2925 	}
2926 
2927 	bufsz = (sc->rx_mbuf_sz - ETHER_ALIGN) >> IXGBE_SRRCTL_BSIZEPKT_SHIFT;
2928 
2929 	for (i = 0; i < sc->num_queues; i++, rxr++) {
2930 		uint64_t rdba = rxr->rxdma.dma_map->dm_segs[0].ds_addr;
2931 
2932 		/* Setup the Base and Length of the Rx Descriptor Ring */
2933 		IXGBE_WRITE_REG(hw, IXGBE_RDBAL(i),
2934 			       (rdba & 0x00000000ffffffffULL));
2935 		IXGBE_WRITE_REG(hw, IXGBE_RDBAH(i), (rdba >> 32));
2936 		IXGBE_WRITE_REG(hw, IXGBE_RDLEN(i),
2937 		    sc->num_rx_desc * sizeof(union ixgbe_adv_rx_desc));
2938 
2939 		/* Set up the SRRCTL register */
2940 		srrctl = bufsz | IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
2941 		IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(i), srrctl);
2942 
2943 		if (ISSET(ifp->if_xflags, IFXF_LRO)) {
2944 			rdrxctl = IXGBE_READ_REG(&sc->hw, IXGBE_RSCCTL(i));
2945 
2946 			/* Enable Receive Side Coalescing */
2947 			rdrxctl |= IXGBE_RSCCTL_RSCEN;
2948 			rdrxctl |= IXGBE_RSCCTL_MAXDESC_16;
2949 
2950 			IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(i), rdrxctl);
2951 		}
2952 
2953 		/* Setup the HW Rx Head and Tail Descriptor Pointers */
2954 		IXGBE_WRITE_REG(hw, IXGBE_RDH(i), 0);
2955 		IXGBE_WRITE_REG(hw, IXGBE_RDT(i), 0);
2956 	}
2957 
2958 	if (sc->hw.mac.type != ixgbe_mac_82598EB) {
2959 		uint32_t psrtype = IXGBE_PSRTYPE_TCPHDR |
2960 			      IXGBE_PSRTYPE_UDPHDR |
2961 			      IXGBE_PSRTYPE_IPV4HDR |
2962 			      IXGBE_PSRTYPE_IPV6HDR;
2963 		IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0), psrtype);
2964 	}
2965 
2966 	rxcsum = IXGBE_READ_REG(hw, IXGBE_RXCSUM);
2967 	rxcsum &= ~IXGBE_RXCSUM_PCSD;
2968 
2969 	ixgbe_initialize_rss_mapping(sc);
2970 
2971 	/* Setup RSS */
2972 	if (sc->num_queues > 1) {
2973 		/* RSS and RX IPP Checksum are mutually exclusive */
2974 		rxcsum |= IXGBE_RXCSUM_PCSD;
2975 	}
2976 
2977 	/* Map QPRC/QPRDC/QPTC on a per queue basis */
2978 	ixgbe_map_queue_statistics(sc);
2979 
2980 	/* This is useful for calculating UDP/IP fragment checksums */
2981 	if (!(rxcsum & IXGBE_RXCSUM_PCSD))
2982 		rxcsum |= IXGBE_RXCSUM_IPPCSE;
2983 
2984 	IXGBE_WRITE_REG(hw, IXGBE_RXCSUM, rxcsum);
2985 }
2986 
2987 void
2988 ixgbe_initialize_rss_mapping(struct ix_softc *sc)
2989 {
2990 	struct ixgbe_hw	*hw = &sc->hw;
2991 	uint32_t reta = 0, mrqc, rss_key[10];
2992 	int i, j, queue_id, table_size, index_mult;
2993 
2994 	/* set up random bits */
2995 	stoeplitz_to_key(&rss_key, sizeof(rss_key));
2996 
2997 	/* Set multiplier for RETA setup and table size based on MAC */
2998 	index_mult = 0x1;
2999 	table_size = 128;
3000 	switch (sc->hw.mac.type) {
3001 	case ixgbe_mac_82598EB:
3002 		index_mult = 0x11;
3003 		break;
3004 	case ixgbe_mac_X550:
3005 	case ixgbe_mac_X550EM_x:
3006 	case ixgbe_mac_X550EM_a:
3007 		table_size = 512;
3008 		break;
3009 	default:
3010 		break;
3011 	}
3012 
3013 	/* Set up the redirection table */
3014 	for (i = 0, j = 0; i < table_size; i++, j++) {
3015 		if (j == sc->num_queues) j = 0;
3016 		queue_id = (j * index_mult);
3017 		/*
3018 		 * The low 8 bits are for hash value (n+0);
3019 		 * The next 8 bits are for hash value (n+1), etc.
3020 		 */
3021 		reta = reta >> 8;
3022 		reta = reta | ( ((uint32_t) queue_id) << 24);
3023 		if ((i & 3) == 3) {
3024 			if (i < 128)
3025 				IXGBE_WRITE_REG(hw, IXGBE_RETA(i >> 2), reta);
3026 			else
3027 				IXGBE_WRITE_REG(hw, IXGBE_ERETA((i >> 2) - 32),
3028 				    reta);
3029 			reta = 0;
3030 		}
3031 	}
3032 
3033 	/* Now fill our hash function seeds */
3034 	for (i = 0; i < 10; i++)
3035 		IXGBE_WRITE_REG(hw, IXGBE_RSSRK(i), rss_key[i]);
3036 
3037 	/*
3038 	 * Disable UDP - IP fragments aren't currently being handled
3039 	 * and so we end up with a mix of 2-tuple and 4-tuple
3040 	 * traffic.
3041 	 */
3042 	mrqc = IXGBE_MRQC_RSSEN
3043 	     | IXGBE_MRQC_RSS_FIELD_IPV4
3044 	     | IXGBE_MRQC_RSS_FIELD_IPV4_TCP
3045 	     | IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP
3046 	     | IXGBE_MRQC_RSS_FIELD_IPV6_EX
3047 	     | IXGBE_MRQC_RSS_FIELD_IPV6
3048 	     | IXGBE_MRQC_RSS_FIELD_IPV6_TCP
3049 	;
3050 	IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
3051 }
3052 
3053 /*********************************************************************
3054  *
3055  *  Free all receive rings.
3056  *
3057  **********************************************************************/
3058 void
3059 ixgbe_free_receive_structures(struct ix_softc *sc)
3060 {
3061 	struct ix_rxring *rxr;
3062 	int		i;
3063 
3064 	for (i = 0, rxr = sc->rx_rings; i < sc->num_queues; i++, rxr++)
3065 		if_rxr_init(&rxr->rx_ring, 0, 0);
3066 
3067 	for (i = 0, rxr = sc->rx_rings; i < sc->num_queues; i++, rxr++)
3068 		ixgbe_free_receive_buffers(rxr);
3069 }
3070 
3071 /*********************************************************************
3072  *
3073  *  Free receive ring data structures
3074  *
3075  **********************************************************************/
3076 void
3077 ixgbe_free_receive_buffers(struct ix_rxring *rxr)
3078 {
3079 	struct ix_softc		*sc;
3080 	struct ixgbe_rx_buf	*rxbuf;
3081 	int			 i;
3082 
3083 	sc = rxr->sc;
3084 	if (rxr->rx_buffers != NULL) {
3085 		for (i = 0; i < sc->num_rx_desc; i++) {
3086 			rxbuf = &rxr->rx_buffers[i];
3087 			if (rxbuf->buf != NULL) {
3088 				bus_dmamap_sync(rxr->rxdma.dma_tag, rxbuf->map,
3089 				    0, rxbuf->map->dm_mapsize,
3090 				    BUS_DMASYNC_POSTREAD);
3091 				bus_dmamap_unload(rxr->rxdma.dma_tag,
3092 				    rxbuf->map);
3093 				m_freem(rxbuf->buf);
3094 				rxbuf->buf = NULL;
3095 			}
3096 			if (rxbuf->map != NULL) {
3097 				bus_dmamap_destroy(rxr->rxdma.dma_tag,
3098 				    rxbuf->map);
3099 				rxbuf->map = NULL;
3100 			}
3101 		}
3102 		free(rxr->rx_buffers, M_DEVBUF,
3103 		    sc->num_rx_desc * sizeof(struct ixgbe_rx_buf));
3104 		rxr->rx_buffers = NULL;
3105 	}
3106 }
3107 
3108 /*********************************************************************
3109  *
3110  *  This routine executes in interrupt context. It replenishes
3111  *  the mbufs in the descriptor and sends data which has been
3112  *  dma'ed into host memory to upper layer.
3113  *
3114  *********************************************************************/
3115 int
3116 ixgbe_rxeof(struct ix_rxring *rxr)
3117 {
3118 	struct ix_softc 	*sc = rxr->sc;
3119 	struct ifnet   		*ifp = &sc->arpcom.ac_if;
3120 	struct mbuf_list	 ml = MBUF_LIST_INITIALIZER();
3121 	struct mbuf    		*mp, *sendmp;
3122 	uint8_t		    	 eop = 0;
3123 	uint16_t		 len, vtag;
3124 	uint32_t		 staterr = 0;
3125 	struct ixgbe_rx_buf	*rxbuf, *nxbuf;
3126 	union ixgbe_adv_rx_desc	*rxdesc;
3127 	size_t			 dsize = sizeof(union ixgbe_adv_rx_desc);
3128 	int			 i, nextp, rsccnt;
3129 
3130 	if (!ISSET(ifp->if_flags, IFF_RUNNING))
3131 		return FALSE;
3132 
3133 	i = rxr->next_to_check;
3134 	while (if_rxr_inuse(&rxr->rx_ring) > 0) {
3135 		uint32_t hash;
3136 		uint16_t hashtype;
3137 
3138 		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
3139 		    dsize * i, dsize, BUS_DMASYNC_POSTREAD);
3140 
3141 		rxdesc = &rxr->rx_base[i];
3142 		staterr = letoh32(rxdesc->wb.upper.status_error);
3143 		if (!ISSET(staterr, IXGBE_RXD_STAT_DD)) {
3144 			bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
3145 			    dsize * i, dsize,
3146 			    BUS_DMASYNC_PREREAD);
3147 			break;
3148 		}
3149 
3150 		/* Zero out the receive descriptors status  */
3151 		rxdesc->wb.upper.status_error = 0;
3152 		rxbuf = &rxr->rx_buffers[i];
3153 
3154 		/* pull the mbuf off the ring */
3155 		bus_dmamap_sync(rxr->rxdma.dma_tag, rxbuf->map, 0,
3156 		    rxbuf->map->dm_mapsize, BUS_DMASYNC_POSTREAD);
3157 		bus_dmamap_unload(rxr->rxdma.dma_tag, rxbuf->map);
3158 
3159 		mp = rxbuf->buf;
3160 		len = letoh16(rxdesc->wb.upper.length);
3161 		vtag = letoh16(rxdesc->wb.upper.vlan);
3162 		eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0);
3163 		hash = lemtoh32(&rxdesc->wb.lower.hi_dword.rss);
3164 		hashtype =
3165 		    lemtoh16(&rxdesc->wb.lower.lo_dword.hs_rss.pkt_info) &
3166 		    IXGBE_RXDADV_RSSTYPE_MASK;
3167 		rsccnt = lemtoh32(&rxdesc->wb.lower.lo_dword.data) &
3168 		    IXGBE_RXDADV_RSCCNT_MASK;
3169 		rsccnt >>= IXGBE_RXDADV_RSCCNT_SHIFT;
3170 
3171 		if (staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) {
3172 			if (rxbuf->fmp) {
3173 				m_freem(rxbuf->fmp);
3174 			} else {
3175 				m_freem(mp);
3176 			}
3177 			rxbuf->fmp = NULL;
3178 			rxbuf->buf = NULL;
3179 			goto next_desc;
3180 		}
3181 
3182 		if (mp == NULL) {
3183 			panic("%s: ixgbe_rxeof: NULL mbuf in slot %d "
3184 			    "(nrx %d, filled %d)", sc->dev.dv_xname,
3185 			    i, if_rxr_inuse(&rxr->rx_ring),
3186 			    rxr->last_desc_filled);
3187 		}
3188 
3189 		if (!eop) {
3190 			/*
3191 			 * Figure out the next descriptor of this frame.
3192 			 */
3193 			if (rsccnt) {
3194 				nextp = staterr & IXGBE_RXDADV_NEXTP_MASK;
3195 				nextp >>= IXGBE_RXDADV_NEXTP_SHIFT;
3196 			} else {
3197 				nextp = i + 1;
3198 			}
3199 			if (nextp == sc->num_rx_desc)
3200 				nextp = 0;
3201 			nxbuf = &rxr->rx_buffers[nextp];
3202 			/* prefetch(nxbuf); */
3203 		}
3204 
3205 		/*
3206 		 * Rather than using the fmp/lmp global pointers
3207 		 * we now keep the head of a packet chain in the
3208 		 * buffer struct and pass this along from one
3209 		 * descriptor to the next, until we get EOP.
3210 		 */
3211 		mp->m_len = len;
3212 		/*
3213 		 * See if there is a stored head
3214 		 * that determines what we are
3215 		 */
3216 		sendmp = rxbuf->fmp;
3217 		rxbuf->buf = rxbuf->fmp = NULL;
3218 
3219 		if (sendmp == NULL) {
3220 			/* first desc of a non-ps chain */
3221 			sendmp = mp;
3222 			sendmp->m_pkthdr.len = 0;
3223 			sendmp->m_pkthdr.ph_mss = 0;
3224 		} else {
3225 			mp->m_flags &= ~M_PKTHDR;
3226 		}
3227 		sendmp->m_pkthdr.len += mp->m_len;
3228 		/*
3229 		 * This function iterates over interleaved descriptors.
3230 		 * Thus, we reuse ph_mss as global segment counter per
3231 		 * TCP connection, instead of introducing a new variable
3232 		 * in m_pkthdr.
3233 		 */
3234 		if (rsccnt)
3235 			sendmp->m_pkthdr.ph_mss += rsccnt - 1;
3236 
3237 		/* Pass the head pointer on */
3238 		if (eop == 0) {
3239 			nxbuf->fmp = sendmp;
3240 			sendmp = NULL;
3241 			mp->m_next = nxbuf->buf;
3242 		} else { /* Sending this frame? */
3243 			ixgbe_rx_offload(staterr, vtag, sendmp);
3244 
3245 			if (hashtype != IXGBE_RXDADV_RSSTYPE_NONE) {
3246 				sendmp->m_pkthdr.ph_flowid = hash;
3247 				SET(sendmp->m_pkthdr.csum_flags, M_FLOWID);
3248 			}
3249 
3250 			ml_enqueue(&ml, sendmp);
3251 		}
3252 next_desc:
3253 		if_rxr_put(&rxr->rx_ring, 1);
3254 		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
3255 		    dsize * i, dsize,
3256 		    BUS_DMASYNC_PREREAD);
3257 
3258 		/* Advance our pointers to the next descriptor. */
3259 		if (++i == sc->num_rx_desc)
3260 			i = 0;
3261 	}
3262 	rxr->next_to_check = i;
3263 
3264 	if (ifiq_input(rxr->ifiq, &ml))
3265 		if_rxr_livelocked(&rxr->rx_ring);
3266 
3267 	if (!(staterr & IXGBE_RXD_STAT_DD))
3268 		return FALSE;
3269 
3270 	return TRUE;
3271 }
3272 
3273 /*********************************************************************
3274  *
3275  *  Check VLAN indication from hardware and inform the stack about the
3276  *  annotated TAG.
3277  *
3278  *  Verify that the hardware indicated that the checksum is valid.
3279  *  Inform the stack about the status of checksum so that stack
3280  *  doesn't spend time verifying the checksum.
3281  *
3282  *  Propagate TCP LRO packet from hardware to the stack with MSS annotation.
3283  *
3284  *********************************************************************/
3285 void
3286 ixgbe_rx_offload(uint32_t staterr, uint16_t vtag, struct mbuf *m)
3287 {
3288 	uint16_t status = (uint16_t) staterr;
3289 	uint8_t  errors = (uint8_t) (staterr >> 24);
3290 	int16_t  pkts;
3291 
3292 	/*
3293 	 * VLAN Offload
3294 	 */
3295 
3296 #if NVLAN > 0
3297 	if (ISSET(staterr, IXGBE_RXD_STAT_VP)) {
3298 		m->m_pkthdr.ether_vtag = vtag;
3299 		SET(m->m_flags, M_VLANTAG);
3300 	}
3301 #endif
3302 
3303 	/*
3304 	 * Checksum Offload
3305 	 */
3306 
3307 	if (ISSET(status, IXGBE_RXD_STAT_IPCS)) {
3308 		if (ISSET(errors, IXGBE_RXD_ERR_IPE))
3309 			SET(m->m_pkthdr.csum_flags, M_IPV4_CSUM_IN_BAD);
3310 		else
3311 			SET(m->m_pkthdr.csum_flags, M_IPV4_CSUM_IN_OK);
3312 	}
3313 	if (ISSET(status, IXGBE_RXD_STAT_L4CS) &&
3314 	    !ISSET(status, IXGBE_RXD_STAT_UDPCS)) {
3315 		if (ISSET(errors, IXGBE_RXD_ERR_TCPE)) {
3316 			/* on some hardware IPv6 + TCP + Bad is broken */
3317 			if (ISSET(status, IXGBE_RXD_STAT_IPCS))
3318 				SET(m->m_pkthdr.csum_flags, M_TCP_CSUM_IN_BAD);
3319 		} else
3320 			SET(m->m_pkthdr.csum_flags, M_TCP_CSUM_IN_OK);
3321 	}
3322 	if (ISSET(status, IXGBE_RXD_STAT_L4CS) &&
3323 	    ISSET(status, IXGBE_RXD_STAT_UDPCS)) {
3324 		if (ISSET(errors, IXGBE_RXD_ERR_TCPE))
3325 			SET(m->m_pkthdr.csum_flags, M_UDP_CSUM_IN_BAD);
3326 		else
3327 			SET(m->m_pkthdr.csum_flags, M_UDP_CSUM_IN_OK);
3328 	}
3329 
3330 	/*
3331 	 * TCP Large Receive Offload
3332 	 */
3333 
3334 	pkts = m->m_pkthdr.ph_mss;
3335 	m->m_pkthdr.ph_mss = 0;
3336 
3337 	if (pkts > 1) {
3338 		struct ether_extracted ext;
3339 		uint32_t paylen;
3340 
3341 		/*
3342 		 * Calculate the payload size:
3343 		 *
3344 		 * The packet length returned by the NIC (m->m_pkthdr.len)
3345 		 * can contain padding, which we don't want to count in to the
3346 		 * payload size.  Therefore, we calculate the real payload size
3347 		 * based on the total ip length field (ext.iplen).
3348 		 */
3349 		ether_extract_headers(m, &ext);
3350 		paylen = ext.iplen;
3351 		if (ext.ip4 || ext.ip6)
3352 			paylen -= ext.iphlen;
3353 		if (ext.tcp) {
3354 			paylen -= ext.tcphlen;
3355 			tcpstat_inc(tcps_inhwlro);
3356 			tcpstat_add(tcps_inpktlro, pkts);
3357 		} else {
3358 			tcpstat_inc(tcps_inbadlro);
3359 		}
3360 
3361 		/*
3362 		 * If we gonna forward this packet, we have to mark it as TSO,
3363 		 * set a correct mss, and recalculate the TCP checksum.
3364 		 */
3365 		if (ext.tcp && paylen >= pkts) {
3366 			SET(m->m_pkthdr.csum_flags, M_TCP_TSO);
3367 			m->m_pkthdr.ph_mss = paylen / pkts;
3368 		}
3369 		if (ext.tcp && ISSET(m->m_pkthdr.csum_flags, M_TCP_CSUM_IN_OK))
3370 			SET(m->m_pkthdr.csum_flags, M_TCP_CSUM_OUT);
3371 	}
3372 }
3373 
3374 void
3375 ixgbe_setup_vlan_hw_support(struct ix_softc *sc)
3376 {
3377 	uint32_t	ctrl;
3378 	int		i;
3379 
3380 	/*
3381 	 * A soft reset zero's out the VFTA, so
3382 	 * we need to repopulate it now.
3383 	 */
3384 	for (i = 0; i < IXGBE_VFTA_SIZE; i++) {
3385 		if (sc->shadow_vfta[i] != 0)
3386 			IXGBE_WRITE_REG(&sc->hw, IXGBE_VFTA(i),
3387 			    sc->shadow_vfta[i]);
3388 	}
3389 
3390 	ctrl = IXGBE_READ_REG(&sc->hw, IXGBE_VLNCTRL);
3391 #if 0
3392 	/* Enable the Filter Table if enabled */
3393 	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) {
3394 		ctrl &= ~IXGBE_VLNCTRL_CFIEN;
3395 		ctrl |= IXGBE_VLNCTRL_VFE;
3396 	}
3397 #endif
3398 	if (sc->hw.mac.type == ixgbe_mac_82598EB)
3399 		ctrl |= IXGBE_VLNCTRL_VME;
3400 	IXGBE_WRITE_REG(&sc->hw, IXGBE_VLNCTRL, ctrl);
3401 
3402 	/* On 82599 the VLAN enable is per/queue in RXDCTL */
3403 	if (sc->hw.mac.type != ixgbe_mac_82598EB) {
3404 		for (i = 0; i < sc->num_queues; i++) {
3405 			ctrl = IXGBE_READ_REG(&sc->hw, IXGBE_RXDCTL(i));
3406 			ctrl |= IXGBE_RXDCTL_VME;
3407 			IXGBE_WRITE_REG(&sc->hw, IXGBE_RXDCTL(i), ctrl);
3408 		}
3409 	}
3410 }
3411 
3412 void
3413 ixgbe_enable_intr(struct ix_softc *sc)
3414 {
3415 	struct ixgbe_hw *hw = &sc->hw;
3416 	uint32_t	mask, fwsm;
3417 
3418 	mask = (IXGBE_EIMS_ENABLE_MASK & ~IXGBE_EIMS_RTX_QUEUE);
3419 	/* Enable Fan Failure detection */
3420 	if (hw->device_id == IXGBE_DEV_ID_82598AT)
3421 		    mask |= IXGBE_EIMS_GPI_SDP1;
3422 
3423 	switch (sc->hw.mac.type) {
3424 	case ixgbe_mac_82599EB:
3425 		mask |= IXGBE_EIMS_ECC;
3426 		/* Temperature sensor on some adapters */
3427 		mask |= IXGBE_EIMS_GPI_SDP0;
3428 		/* SFP+ (RX_LOS_N & MOD_ABS_N) */
3429 		mask |= IXGBE_EIMS_GPI_SDP1;
3430 		mask |= IXGBE_EIMS_GPI_SDP2;
3431 		break;
3432 	case ixgbe_mac_X540:
3433 		mask |= IXGBE_EIMS_ECC;
3434 		/* Detect if Thermal Sensor is enabled */
3435 		fwsm = IXGBE_READ_REG(hw, IXGBE_FWSM);
3436 		if (fwsm & IXGBE_FWSM_TS_ENABLED)
3437 			mask |= IXGBE_EIMS_TS;
3438 		break;
3439 	case ixgbe_mac_X550:
3440 	case ixgbe_mac_X550EM_x:
3441 	case ixgbe_mac_X550EM_a:
3442 		mask |= IXGBE_EIMS_ECC;
3443 		/* MAC thermal sensor is automatically enabled */
3444 		mask |= IXGBE_EIMS_TS;
3445 		/* Some devices use SDP0 for important information */
3446 		if (hw->device_id == IXGBE_DEV_ID_X550EM_X_SFP ||
3447 		    hw->device_id == IXGBE_DEV_ID_X550EM_X_10G_T)
3448 			mask |= IXGBE_EIMS_GPI_SDP0_X540;
3449 	default:
3450 		break;
3451 	}
3452 
3453 	IXGBE_WRITE_REG(hw, IXGBE_EIMS, mask);
3454 
3455 	/* With MSI-X we use auto clear */
3456 	if (sc->sc_intrmap) {
3457 		mask = IXGBE_EIMS_ENABLE_MASK;
3458 		/* Don't autoclear Link */
3459 		mask &= ~IXGBE_EIMS_OTHER;
3460 		mask &= ~IXGBE_EIMS_LSC;
3461 		IXGBE_WRITE_REG(hw, IXGBE_EIAC, mask);
3462 	}
3463 
3464 	IXGBE_WRITE_FLUSH(hw);
3465 }
3466 
3467 void
3468 ixgbe_disable_intr(struct ix_softc *sc)
3469 {
3470 	if (sc->sc_intrmap)
3471 		IXGBE_WRITE_REG(&sc->hw, IXGBE_EIAC, 0);
3472 	if (sc->hw.mac.type == ixgbe_mac_82598EB) {
3473 		IXGBE_WRITE_REG(&sc->hw, IXGBE_EIMC, ~0);
3474 	} else {
3475 		IXGBE_WRITE_REG(&sc->hw, IXGBE_EIMC, 0xFFFF0000);
3476 		IXGBE_WRITE_REG(&sc->hw, IXGBE_EIMC_EX(0), ~0);
3477 		IXGBE_WRITE_REG(&sc->hw, IXGBE_EIMC_EX(1), ~0);
3478 	}
3479 	IXGBE_WRITE_FLUSH(&sc->hw);
3480 }
3481 
3482 uint16_t
3483 ixgbe_read_pci_cfg(struct ixgbe_hw *hw, uint32_t reg)
3484 {
3485 	struct pci_attach_args	*pa;
3486 	uint32_t value;
3487 	int high = 0;
3488 
3489 	if (reg & 0x2) {
3490 		high = 1;
3491 		reg &= ~0x2;
3492 	}
3493 	pa = &((struct ixgbe_osdep *)hw->back)->os_pa;
3494 	value = pci_conf_read(pa->pa_pc, pa->pa_tag, reg);
3495 
3496 	if (high)
3497 		value >>= 16;
3498 
3499 	return (value & 0xffff);
3500 }
3501 
3502 void
3503 ixgbe_write_pci_cfg(struct ixgbe_hw *hw, uint32_t reg, uint16_t value)
3504 {
3505 	struct pci_attach_args	*pa;
3506 	uint32_t rv;
3507 	int high = 0;
3508 
3509 	/* Need to do read/mask/write... because 16 vs 32 bit!!! */
3510 	if (reg & 0x2) {
3511 		high = 1;
3512 		reg &= ~0x2;
3513 	}
3514 	pa = &((struct ixgbe_osdep *)hw->back)->os_pa;
3515 	rv = pci_conf_read(pa->pa_pc, pa->pa_tag, reg);
3516 	if (!high)
3517 		rv = (rv & 0xffff0000) | value;
3518 	else
3519 		rv = (rv & 0xffff) | ((uint32_t)value << 16);
3520 	pci_conf_write(pa->pa_pc, pa->pa_tag, reg, rv);
3521 }
3522 
3523 /*
3524  * Setup the correct IVAR register for a particular MSIX interrupt
3525  *   (yes this is all very magic and confusing :)
3526  *  - entry is the register array entry
3527  *  - vector is the MSIX vector for this queue
3528  *  - type is RX/TX/MISC
3529  */
3530 void
3531 ixgbe_set_ivar(struct ix_softc *sc, uint8_t entry, uint8_t vector, int8_t type)
3532 {
3533 	struct ixgbe_hw *hw = &sc->hw;
3534 	uint32_t ivar, index;
3535 
3536 	vector |= IXGBE_IVAR_ALLOC_VAL;
3537 
3538 	switch (hw->mac.type) {
3539 
3540 	case ixgbe_mac_82598EB:
3541 		if (type == -1)
3542 			entry = IXGBE_IVAR_OTHER_CAUSES_INDEX;
3543 		else
3544 			entry += (type * 64);
3545 		index = (entry >> 2) & 0x1F;
3546 		ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(index));
3547 		ivar &= ~((uint32_t)0xFF << (8 * (entry & 0x3)));
3548 		ivar |= ((uint32_t)vector << (8 * (entry & 0x3)));
3549 		IXGBE_WRITE_REG(&sc->hw, IXGBE_IVAR(index), ivar);
3550 		break;
3551 
3552 	case ixgbe_mac_82599EB:
3553 	case ixgbe_mac_X540:
3554 	case ixgbe_mac_X550:
3555 	case ixgbe_mac_X550EM_x:
3556 	case ixgbe_mac_X550EM_a:
3557 		if (type == -1) { /* MISC IVAR */
3558 			index = (entry & 1) * 8;
3559 			ivar = IXGBE_READ_REG(hw, IXGBE_IVAR_MISC);
3560 			ivar &= ~((uint32_t)0xFF << index);
3561 			ivar |= ((uint32_t)vector << index);
3562 			IXGBE_WRITE_REG(hw, IXGBE_IVAR_MISC, ivar);
3563 		} else {	/* RX/TX IVARS */
3564 			index = (16 * (entry & 1)) + (8 * type);
3565 			ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(entry >> 1));
3566 			ivar &= ~((uint32_t)0xFF << index);
3567 			ivar |= ((uint32_t)vector << index);
3568 			IXGBE_WRITE_REG(hw, IXGBE_IVAR(entry >> 1), ivar);
3569 		}
3570 
3571 	default:
3572 		break;
3573 	}
3574 }
3575 
3576 void
3577 ixgbe_configure_ivars(struct ix_softc *sc)
3578 {
3579 	struct ix_queue *que = sc->queues;
3580 	uint32_t newitr;
3581 	int i;
3582 
3583 	newitr = (4000000 / IXGBE_INTS_PER_SEC) & 0x0FF8;
3584 
3585 	for (i = 0; i < sc->num_queues; i++, que++) {
3586 		/* First the RX queue entry */
3587 		ixgbe_set_ivar(sc, i, que->msix, 0);
3588 		/* ... and the TX */
3589 		ixgbe_set_ivar(sc, i, que->msix, 1);
3590 		/* Set an Initial EITR value */
3591 		IXGBE_WRITE_REG(&sc->hw,
3592 		    IXGBE_EITR(que->msix), newitr);
3593 	}
3594 
3595 	/* For the Link interrupt */
3596 	ixgbe_set_ivar(sc, 1, sc->linkvec, -1);
3597 }
3598 
3599 /*
3600  * SFP module interrupts handler
3601  */
3602 void
3603 ixgbe_handle_mod(struct ix_softc *sc)
3604 {
3605 	struct ixgbe_hw *hw = &sc->hw;
3606 	uint32_t err;
3607 
3608 	err = hw->phy.ops.identify_sfp(hw);
3609 	if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) {
3610 		printf("%s: Unsupported SFP+ module type was detected!\n",
3611 		    sc->dev.dv_xname);
3612 		return;
3613 	}
3614 	err = hw->mac.ops.setup_sfp(hw);
3615 	if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) {
3616 		printf("%s: Setup failure - unsupported SFP+ module type!\n",
3617 		    sc->dev.dv_xname);
3618 		return;
3619 	}
3620 
3621 	ixgbe_handle_msf(sc);
3622 }
3623 
3624 
3625 /*
3626  * MSF (multispeed fiber) interrupts handler
3627  */
3628 void
3629 ixgbe_handle_msf(struct ix_softc *sc)
3630 {
3631 	struct ixgbe_hw *hw = &sc->hw;
3632 	uint32_t autoneg;
3633 	bool negotiate;
3634 
3635 	autoneg = hw->phy.autoneg_advertised;
3636 	if ((!autoneg) && (hw->mac.ops.get_link_capabilities)) {
3637 		if (hw->mac.ops.get_link_capabilities(hw, &autoneg, &negotiate))
3638 			return;
3639 	}
3640 	if (hw->mac.ops.setup_link)
3641 		hw->mac.ops.setup_link(hw, autoneg, TRUE);
3642 
3643 	ifmedia_delete_instance(&sc->media, IFM_INST_ANY);
3644 	ixgbe_add_media_types(sc);
3645 	ifmedia_set(&sc->media, IFM_ETHER | IFM_AUTO);
3646 }
3647 
3648 /*
3649  * External PHY interrupts handler
3650  */
3651 void
3652 ixgbe_handle_phy(struct ix_softc *sc)
3653 {
3654 	struct ixgbe_hw *hw = &sc->hw;
3655 	int error;
3656 
3657 	error = hw->phy.ops.handle_lasi(hw);
3658 	if (error == IXGBE_ERR_OVERTEMP)
3659 		printf("%s: CRITICAL: EXTERNAL PHY OVER TEMP!! "
3660 		    " PHY will downshift to lower power state!\n",
3661 		    sc->dev.dv_xname);
3662 	else if (error)
3663 		printf("%s: Error handling LASI interrupt: %d\n",
3664 		    sc->dev.dv_xname, error);
3665 
3666 }
3667 
3668 #if NKSTAT > 0
3669 enum ix_counter_idx {
3670 	ix_counter_crcerrs,
3671 	ix_counter_lxontxc,
3672 	ix_counter_lxonrxc,
3673 	ix_counter_lxofftxc,
3674 	ix_counter_lxoffrxc,
3675 	ix_counter_prc64,
3676 	ix_counter_prc127,
3677 	ix_counter_prc255,
3678 	ix_counter_prc511,
3679 	ix_counter_prc1023,
3680 	ix_counter_prc1522,
3681 	ix_counter_gptc,
3682 	ix_counter_gorc,
3683 	ix_counter_gotc,
3684 	ix_counter_ruc,
3685 	ix_counter_rfc,
3686 	ix_counter_roc,
3687 	ix_counter_rjc,
3688 	ix_counter_tor,
3689 	ix_counter_tpr,
3690 	ix_counter_tpt,
3691 	ix_counter_gprc,
3692 	ix_counter_bprc,
3693 	ix_counter_mprc,
3694 	ix_counter_ptc64,
3695 	ix_counter_ptc127,
3696 	ix_counter_ptc255,
3697 	ix_counter_ptc511,
3698 	ix_counter_ptc1023,
3699 	ix_counter_ptc1522,
3700 	ix_counter_mptc,
3701 	ix_counter_bptc,
3702 
3703 	ix_counter_num,
3704 };
3705 
3706 CTASSERT(KSTAT_KV_U_PACKETS <= 0xff);
3707 CTASSERT(KSTAT_KV_U_BYTES <= 0xff);
3708 
3709 struct ix_counter {
3710 	char			 name[KSTAT_KV_NAMELEN];
3711 	uint32_t		 reg;
3712 	uint8_t			 width;
3713 	uint8_t			 unit;
3714 };
3715 
3716 static const struct ix_counter ix_counters[ix_counter_num] = {
3717 	[ix_counter_crcerrs] = {	"crc errs",	IXGBE_CRCERRS,	32,
3718 					    KSTAT_KV_U_PACKETS },
3719 	[ix_counter_lxontxc] = {	"tx link xon",	IXGBE_LXONTXC,	32,
3720 					    KSTAT_KV_U_PACKETS },
3721 	[ix_counter_lxonrxc] = {	"rx link xon",	0,		32,
3722 					    KSTAT_KV_U_PACKETS },
3723 	[ix_counter_lxofftxc] = {	"tx link xoff",	IXGBE_LXOFFTXC,	32,
3724 					    KSTAT_KV_U_PACKETS },
3725 	[ix_counter_lxoffrxc] = {	"rx link xoff",	0,		32,
3726 					    KSTAT_KV_U_PACKETS },
3727 	[ix_counter_prc64] = {		"rx 64B",	IXGBE_PRC64,	32,
3728 					    KSTAT_KV_U_PACKETS },
3729 	[ix_counter_prc127] = {		"rx 65-127B",	IXGBE_PRC127,	32,
3730 					    KSTAT_KV_U_PACKETS },
3731 	[ix_counter_prc255] = {		"rx 128-255B",	IXGBE_PRC255,	32,
3732 					    KSTAT_KV_U_PACKETS },
3733 	[ix_counter_prc511] = {		"rx 256-511B",	IXGBE_PRC511,	32,
3734 					    KSTAT_KV_U_PACKETS },
3735 	[ix_counter_prc1023] = {	"rx 512-1023B",	IXGBE_PRC1023,	32,
3736 					    KSTAT_KV_U_PACKETS },
3737 	[ix_counter_prc1522] = {	"rx 1024-maxB",	IXGBE_PRC1522,	32,
3738 					    KSTAT_KV_U_PACKETS },
3739 	[ix_counter_gptc] = {		"tx good",	IXGBE_GPTC,	32,
3740 					    KSTAT_KV_U_PACKETS },
3741 	[ix_counter_gorc] = {		"rx good",	IXGBE_GORCL,	36,
3742 					    KSTAT_KV_U_BYTES },
3743 	[ix_counter_gotc] = {		"tx good",	IXGBE_GOTCL,	36,
3744 					    KSTAT_KV_U_BYTES },
3745 	[ix_counter_ruc] = {		"rx undersize",	IXGBE_RUC,	32,
3746 					    KSTAT_KV_U_PACKETS },
3747 	[ix_counter_rfc] = {		"rx fragment",	IXGBE_RFC,	32,
3748 					    KSTAT_KV_U_PACKETS },
3749 	[ix_counter_roc] = {		"rx oversize",	IXGBE_ROC,	32,
3750 					    KSTAT_KV_U_PACKETS },
3751 	[ix_counter_rjc] = {		"rx jabber",	IXGBE_RJC,	32,
3752 					    KSTAT_KV_U_PACKETS },
3753 	[ix_counter_tor] = {		"rx total",	IXGBE_TORL,	36,
3754 					    KSTAT_KV_U_BYTES },
3755 	[ix_counter_tpr] = {		"rx total",	IXGBE_TPR,	32,
3756 					    KSTAT_KV_U_PACKETS },
3757 	[ix_counter_tpt] = {		"tx total",	IXGBE_TPT,	32,
3758 					    KSTAT_KV_U_PACKETS },
3759 	[ix_counter_gprc] = {		"rx good",	IXGBE_GPRC,	32,
3760 					    KSTAT_KV_U_PACKETS },
3761 	[ix_counter_bprc] = {		"rx bcast",	IXGBE_BPRC,	32,
3762 					    KSTAT_KV_U_PACKETS },
3763 	[ix_counter_mprc] = {		"rx mcast",	IXGBE_MPRC,	32,
3764 					    KSTAT_KV_U_PACKETS },
3765 	[ix_counter_ptc64] = {		"tx 64B",	IXGBE_PTC64,	32,
3766 					    KSTAT_KV_U_PACKETS },
3767 	[ix_counter_ptc127] = {		"tx 65-127B",	IXGBE_PTC127,	32,
3768 					    KSTAT_KV_U_PACKETS },
3769 	[ix_counter_ptc255] = {		"tx 128-255B",	IXGBE_PTC255,	32,
3770 					    KSTAT_KV_U_PACKETS },
3771 	[ix_counter_ptc511] = {		"tx 256-511B",	IXGBE_PTC511,	32,
3772 					    KSTAT_KV_U_PACKETS },
3773 	[ix_counter_ptc1023] = {	"tx 512-1023B",	IXGBE_PTC1023,	32,
3774 					    KSTAT_KV_U_PACKETS },
3775 	[ix_counter_ptc1522] = {	"tx 1024-maxB",	IXGBE_PTC1522,	32,
3776 					    KSTAT_KV_U_PACKETS },
3777 	[ix_counter_mptc] = {		"tx mcast",	IXGBE_MPTC,	32,
3778 					    KSTAT_KV_U_PACKETS },
3779 	[ix_counter_bptc] = {		"tx bcast",	IXGBE_BPTC,	32,
3780 					    KSTAT_KV_U_PACKETS },
3781 };
3782 
3783 struct ix_rxq_kstats {
3784 	struct kstat_kv	qprc;
3785 	struct kstat_kv	qbrc;
3786 	struct kstat_kv	qprdc;
3787 };
3788 
3789 static const struct ix_rxq_kstats ix_rxq_kstats_tpl = {
3790 	KSTAT_KV_UNIT_INITIALIZER("packets",
3791 	    KSTAT_KV_T_COUNTER64, KSTAT_KV_U_PACKETS),
3792 	KSTAT_KV_UNIT_INITIALIZER("bytes",
3793 	    KSTAT_KV_T_COUNTER64, KSTAT_KV_U_BYTES),
3794 	KSTAT_KV_UNIT_INITIALIZER("qdrops",
3795 	    KSTAT_KV_T_COUNTER64, KSTAT_KV_U_PACKETS),
3796 };
3797 
3798 struct ix_txq_kstats {
3799 	struct kstat_kv	qptc;
3800 	struct kstat_kv	qbtc;
3801 };
3802 
3803 static const struct ix_txq_kstats ix_txq_kstats_tpl = {
3804 	KSTAT_KV_UNIT_INITIALIZER("packets",
3805 	    KSTAT_KV_T_COUNTER64, KSTAT_KV_U_PACKETS),
3806 	KSTAT_KV_UNIT_INITIALIZER("bytes",
3807 	    KSTAT_KV_T_COUNTER64, KSTAT_KV_U_BYTES),
3808 };
3809 
3810 static int	ix_kstats_read(struct kstat *ks);
3811 static int	ix_rxq_kstats_read(struct kstat *ks);
3812 static int	ix_txq_kstats_read(struct kstat *ks);
3813 
3814 static void
3815 ix_kstats(struct ix_softc *sc)
3816 {
3817 	struct kstat *ks;
3818 	struct kstat_kv *kvs;
3819 	unsigned int i;
3820 
3821 	mtx_init(&sc->sc_kstat_mtx, IPL_SOFTCLOCK);
3822 	timeout_set(&sc->sc_kstat_tmo, ix_kstats_tick, sc);
3823 
3824 	ks = kstat_create(sc->dev.dv_xname, 0, "ix-stats", 0,
3825 	    KSTAT_T_KV, 0);
3826 	if (ks == NULL)
3827 		return;
3828 
3829 	kvs = mallocarray(nitems(ix_counters), sizeof(*kvs),
3830 	    M_DEVBUF, M_WAITOK|M_ZERO);
3831 
3832 	for (i = 0; i < nitems(ix_counters); i++) {
3833 		const struct ix_counter *ixc = &ix_counters[i];
3834 
3835 		kstat_kv_unit_init(&kvs[i], ixc->name,
3836 		    KSTAT_KV_T_COUNTER64, ixc->unit);
3837 	}
3838 
3839 	kstat_set_mutex(ks, &sc->sc_kstat_mtx);
3840 	ks->ks_softc = sc;
3841 	ks->ks_data = kvs;
3842 	ks->ks_datalen = nitems(ix_counters) * sizeof(*kvs);
3843 	ks->ks_read = ix_kstats_read;
3844 
3845 	sc->sc_kstat = ks;
3846 	kstat_install(ks);
3847 }
3848 
3849 static void
3850 ix_rxq_kstats(struct ix_softc *sc, struct ix_rxring *rxr)
3851 {
3852 	struct ix_rxq_kstats *stats;
3853 	struct kstat *ks;
3854 
3855 	ks = kstat_create(sc->dev.dv_xname, 0, "ix-rxq", rxr->me,
3856 	    KSTAT_T_KV, 0);
3857 	if (ks == NULL)
3858 		return;
3859 
3860 	stats = malloc(sizeof(*stats), M_DEVBUF, M_WAITOK|M_ZERO);
3861 	*stats = ix_rxq_kstats_tpl;
3862 
3863 	kstat_set_mutex(ks, &sc->sc_kstat_mtx);
3864 	ks->ks_softc = rxr;
3865 	ks->ks_data = stats;
3866 	ks->ks_datalen = sizeof(*stats);
3867 	ks->ks_read = ix_rxq_kstats_read;
3868 
3869 	rxr->kstat = ks;
3870 	kstat_install(ks);
3871 }
3872 
3873 static void
3874 ix_txq_kstats(struct ix_softc *sc, struct ix_txring *txr)
3875 {
3876 	struct ix_txq_kstats *stats;
3877 	struct kstat *ks;
3878 
3879 	ks = kstat_create(sc->dev.dv_xname, 0, "ix-txq", txr->me,
3880 	    KSTAT_T_KV, 0);
3881 	if (ks == NULL)
3882 		return;
3883 
3884 	stats = malloc(sizeof(*stats), M_DEVBUF, M_WAITOK|M_ZERO);
3885 	*stats = ix_txq_kstats_tpl;
3886 
3887 	kstat_set_mutex(ks, &sc->sc_kstat_mtx);
3888 	ks->ks_softc = txr;
3889 	ks->ks_data = stats;
3890 	ks->ks_datalen = sizeof(*stats);
3891 	ks->ks_read = ix_txq_kstats_read;
3892 
3893 	txr->kstat = ks;
3894 	kstat_install(ks);
3895 }
3896 
3897 /**********************************************************************
3898  *
3899  *  Update the board statistics counters.
3900  *
3901  **********************************************************************/
3902 
3903 static void
3904 ix_kstats_tick(void *arg)
3905 {
3906 	struct ix_softc *sc = arg;
3907 	int i;
3908 
3909 	timeout_add_sec(&sc->sc_kstat_tmo, 1);
3910 
3911 	mtx_enter(&sc->sc_kstat_mtx);
3912 	ix_kstats_read(sc->sc_kstat);
3913 	for (i = 0; i < sc->num_queues; i++) {
3914 		ix_rxq_kstats_read(sc->rx_rings[i].kstat);
3915 		ix_txq_kstats_read(sc->tx_rings[i].kstat);
3916 	}
3917 	mtx_leave(&sc->sc_kstat_mtx);
3918 }
3919 
3920 static uint64_t
3921 ix_read36(struct ixgbe_hw *hw, bus_size_t loreg, bus_size_t hireg)
3922 {
3923 	uint64_t lo, hi;
3924 
3925 	lo = IXGBE_READ_REG(hw, loreg);
3926 	hi = IXGBE_READ_REG(hw, hireg);
3927 
3928 	return (((hi & 0xf) << 32) | lo);
3929 }
3930 
3931 static int
3932 ix_kstats_read(struct kstat *ks)
3933 {
3934 	struct ix_softc *sc = ks->ks_softc;
3935 	struct kstat_kv *kvs = ks->ks_data;
3936 	struct ixgbe_hw	*hw = &sc->hw;
3937 	unsigned int i;
3938 
3939 	for (i = 0; i < nitems(ix_counters); i++) {
3940 		const struct ix_counter *ixc = &ix_counters[i];
3941 		uint32_t reg = ixc->reg;
3942 		uint64_t v;
3943 
3944 		if (reg == 0)
3945 			continue;
3946 
3947 		if (ixc->width > 32) {
3948 			if (sc->hw.mac.type == ixgbe_mac_82598EB)
3949 				v = IXGBE_READ_REG(hw, reg + 4);
3950 			else
3951 				v = ix_read36(hw, reg, reg + 4);
3952 		} else
3953 			v = IXGBE_READ_REG(hw, reg);
3954 
3955 		kstat_kv_u64(&kvs[i]) += v;
3956 	}
3957 
3958 	/* handle the exceptions */
3959 	if (sc->hw.mac.type == ixgbe_mac_82598EB) {
3960 		kstat_kv_u64(&kvs[ix_counter_lxonrxc]) +=
3961 		    IXGBE_READ_REG(hw, IXGBE_LXONRXC);
3962 		kstat_kv_u64(&kvs[ix_counter_lxoffrxc]) +=
3963 		    IXGBE_READ_REG(hw, IXGBE_LXOFFRXC);
3964 	} else {
3965 		kstat_kv_u64(&kvs[ix_counter_lxonrxc]) +=
3966 		    IXGBE_READ_REG(hw, IXGBE_LXONRXCNT);
3967 		kstat_kv_u64(&kvs[ix_counter_lxoffrxc]) +=
3968 		    IXGBE_READ_REG(hw, IXGBE_LXOFFRXCNT);
3969 	}
3970 
3971 	getnanouptime(&ks->ks_updated);
3972 
3973 	return (0);
3974 }
3975 
3976 int
3977 ix_rxq_kstats_read(struct kstat *ks)
3978 {
3979 	struct ix_rxq_kstats *stats = ks->ks_data;
3980 	struct ix_rxring *rxr = ks->ks_softc;
3981 	struct ix_softc *sc = rxr->sc;
3982 	struct ixgbe_hw	*hw = &sc->hw;
3983 	uint32_t i = rxr->me;
3984 
3985 	kstat_kv_u64(&stats->qprc) += IXGBE_READ_REG(hw, IXGBE_QPRC(i));
3986 	if (sc->hw.mac.type == ixgbe_mac_82598EB) {
3987 		kstat_kv_u64(&stats->qprdc) +=
3988 		    IXGBE_READ_REG(hw, IXGBE_RNBC(i));
3989 		kstat_kv_u64(&stats->qbrc) +=
3990 		    IXGBE_READ_REG(hw, IXGBE_QBRC(i));
3991 	} else {
3992 		kstat_kv_u64(&stats->qprdc) +=
3993 		    IXGBE_READ_REG(hw, IXGBE_QPRDC(i));
3994 		kstat_kv_u64(&stats->qbrc) +=
3995 		    ix_read36(hw, IXGBE_QBRC_L(i), IXGBE_QBRC_H(i));
3996 	}
3997 
3998 	getnanouptime(&ks->ks_updated);
3999 
4000 	return (0);
4001 }
4002 
4003 int
4004 ix_txq_kstats_read(struct kstat *ks)
4005 {
4006 	struct ix_txq_kstats *stats = ks->ks_data;
4007 	struct ix_txring *txr = ks->ks_softc;
4008 	struct ix_softc *sc = txr->sc;
4009 	struct ixgbe_hw	*hw = &sc->hw;
4010 	uint32_t i = txr->me;
4011 
4012 	kstat_kv_u64(&stats->qptc) += IXGBE_READ_REG(hw, IXGBE_QPTC(i));
4013 	if (sc->hw.mac.type == ixgbe_mac_82598EB) {
4014 		kstat_kv_u64(&stats->qbtc) +=
4015 		    IXGBE_READ_REG(hw, IXGBE_QBTC(i));
4016 	} else {
4017 		kstat_kv_u64(&stats->qbtc) +=
4018 		    ix_read36(hw, IXGBE_QBTC_L(i), IXGBE_QBTC_H(i));
4019 	}
4020 
4021 	getnanouptime(&ks->ks_updated);
4022 
4023 	return (0);
4024 }
4025 #endif /* NKVSTAT > 0 */
4026 
4027 void
4028 ixgbe_map_queue_statistics(struct ix_softc *sc)
4029 {
4030 	int i;
4031 	uint32_t r;
4032 
4033 	for (i = 0; i < 32; i++) {
4034 		/*
4035 		 * Queues 0-15 are mapped 1:1
4036 		 * Queue 0 -> Counter 0
4037 		 * Queue 1 -> Counter 1
4038 		 * Queue 2 -> Counter 2....
4039 		 * Queues 16-127 are mapped to Counter 0
4040 		 */
4041 		if (i < 4) {
4042 			r = (i * 4 + 0);
4043 			r |= (i * 4 + 1) << 8;
4044 			r |= (i * 4 + 2) << 16;
4045 			r |= (i * 4 + 3) << 24;
4046 		} else
4047 			r = 0;
4048 
4049 		IXGBE_WRITE_REG(&sc->hw, IXGBE_RQSMR(i), r);
4050 		IXGBE_WRITE_REG(&sc->hw, IXGBE_TQSM(i), r);
4051 	}
4052 }
4053