xref: /netbsd-src/sys/dev/pci/igc/if_igc.c (revision 972ad69cba10dfb4a4ac1c1f00f31906419718dc)
1 /*	$NetBSD: if_igc.c,v 1.17 2024/11/24 11:07:03 mlelstv Exp $	*/
2 /*	$OpenBSD: if_igc.c,v 1.13 2023/04/28 10:18:57 bluhm Exp $	*/
3 /*-
4  * SPDX-License-Identifier: BSD-2-Clause
5  *
6  * Copyright (c) 2016 Nicole Graziano <nicole@nextbsd.org>
7  * All rights reserved.
8  * Copyright (c) 2021 Rubicon Communications, LLC (Netgate)
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  */
31 
32 #include <sys/cdefs.h>
33 __KERNEL_RCSID(0, "$NetBSD: if_igc.c,v 1.17 2024/11/24 11:07:03 mlelstv Exp $");
34 
35 #ifdef _KERNEL_OPT
36 #include "opt_if_igc.h"
37 #if 0 /* notyet */
38 #include "vlan.h"
39 #endif
40 #endif
41 
42 #include <sys/param.h>
43 #include <sys/systm.h>
44 #include <sys/bus.h>
45 #include <sys/cpu.h>
46 #include <sys/device.h>
47 #include <sys/endian.h>
48 #include <sys/intr.h>
49 #include <sys/interrupt.h>
50 #include <sys/kernel.h>
51 #include <sys/kmem.h>
52 #include <sys/mbuf.h>
53 #include <sys/mutex.h>
54 #include <sys/socket.h>
55 #include <sys/workqueue.h>
56 #include <sys/xcall.h>
57 
58 #include <net/bpf.h>
59 #include <net/if.h>
60 #include <net/if_ether.h>
61 #include <net/if_media.h>
62 #include <net/if_vlanvar.h>
63 #include <net/rss_config.h>
64 
65 #include <netinet/in.h>
66 #include <netinet/ip.h>
67 #include <netinet/ip6.h>
68 #include <netinet/tcp.h>
69 
70 #include <dev/pci/pcivar.h>
71 #include <dev/pci/pcireg.h>
72 #include <dev/pci/pcidevs.h>
73 
74 #include <dev/pci/igc/if_igc.h>
75 #include <dev/pci/igc/igc_evcnt.h>
76 #include <dev/pci/igc/igc_hw.h>
77 #include <dev/mii/miivar.h>
78 
79 #define IGC_WORKQUEUE_PRI	PRI_SOFTNET
80 
81 #ifndef IGC_RX_INTR_PROCESS_LIMIT_DEFAULT
82 #define IGC_RX_INTR_PROCESS_LIMIT_DEFAULT	0
83 #endif
84 #ifndef IGC_TX_INTR_PROCESS_LIMIT_DEFAULT
85 #define IGC_TX_INTR_PROCESS_LIMIT_DEFAULT	0
86 #endif
87 
88 #ifndef IGC_RX_PROCESS_LIMIT_DEFAULT
89 #define IGC_RX_PROCESS_LIMIT_DEFAULT		256
90 #endif
91 #ifndef IGC_TX_PROCESS_LIMIT_DEFAULT
92 #define IGC_TX_PROCESS_LIMIT_DEFAULT		256
93 #endif
94 
95 #define	htolem32(p, x)	(*((uint32_t *)(p)) = htole32(x))
96 #define	htolem64(p, x)	(*((uint64_t *)(p)) = htole64(x))
97 
98 static const struct igc_product {
99 	pci_vendor_id_t		igcp_vendor;
100 	pci_product_id_t	igcp_product;
101 	const char		*igcp_name;
102 } igc_products[] = {
103 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I225_IT,
104 	    "Intel(R) Ethernet Controller I225-IT(2)" },
105 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I226_LM,
106 	    "Intel(R) Ethernet Controller I226-LM" },
107 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I226_V,
108 	    "Intel(R) Ethernet Controller I226-V" },
109 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I226_IT,
110 	    "Intel(R) Ethernet Controller I226-IT" },
111 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I221_V,
112 	    "Intel(R) Ethernet Controller I221-V" },
113 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I226_BLANK_NVM,
114 	    "Intel(R) Ethernet Controller I226(blankNVM)" },
115 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I225_LM,
116 	    "Intel(R) Ethernet Controller I225-LM" },
117 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I225_V,
118 	    "Intel(R) Ethernet Controller I225-V" },
119 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I220_V,
120 	    "Intel(R) Ethernet Controller I220-V" },
121 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I225_I,
122 	    "Intel(R) Ethernet Controller I225-I" },
123 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I225_BLANK_NVM,
124 	    "Intel(R) Ethernet Controller I225(blankNVM)" },
125 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I225_K,
126 	    "Intel(R) Ethernet Controller I225-K" },
127 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I225_K2,
128 	    "Intel(R) Ethernet Controller I225-K(2)" },
129 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I226_K,
130 	    "Intel(R) Ethernet Controller I226-K" },
131 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I225_LMVP,
132 	    "Intel(R) Ethernet Controller I225-LMvP(2)" },
133 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I226_LMVP,
134 	    "Intel(R) Ethernet Controller I226-LMvP" },
135 	{ 0, 0, NULL },
136 };
137 
138 #define	IGC_DF_CFG	0x1
139 #define	IGC_DF_TX	0x2
140 #define	IGC_DF_RX	0x4
141 #define	IGC_DF_MISC	0x8
142 
143 #ifdef IGC_DEBUG_FLAGS
144 int igc_debug_flags = IGC_DEBUG_FLAGS;
145 #else
146 int igc_debug_flags = 0;
147 #endif
148 
149 #define	DPRINTF(flag, fmt, args...)		do {			\
150 	if (igc_debug_flags & (IGC_DF_ ## flag))			\
151 		printf("%s: %d: " fmt, __func__, __LINE__, ##args);	\
152     } while (0)
153 
154 /*********************************************************************
155  *  Function Prototypes
156  *********************************************************************/
157 static int	igc_match(device_t, cfdata_t, void *);
158 static void	igc_attach(device_t, device_t, void *);
159 static int	igc_detach(device_t, int);
160 
161 static void	igc_identify_hardware(struct igc_softc *);
162 static int	igc_adjust_nqueues(struct igc_softc *);
163 static int	igc_allocate_pci_resources(struct igc_softc *);
164 static int	igc_allocate_interrupts(struct igc_softc *);
165 static int	igc_allocate_queues(struct igc_softc *);
166 static void	igc_free_pci_resources(struct igc_softc *);
167 static void	igc_free_interrupts(struct igc_softc *);
168 static void	igc_free_queues(struct igc_softc *);
169 static void	igc_reset(struct igc_softc *);
170 static void	igc_init_dmac(struct igc_softc *, uint32_t);
171 static int	igc_setup_interrupts(struct igc_softc *);
172 static void	igc_attach_counters(struct igc_softc *sc);
173 static void	igc_detach_counters(struct igc_softc *sc);
174 static void	igc_update_counters(struct igc_softc *sc);
175 static void	igc_clear_counters(struct igc_softc *sc);
176 static int	igc_setup_msix(struct igc_softc *);
177 static int	igc_setup_msi(struct igc_softc *);
178 static int	igc_setup_intx(struct igc_softc *);
179 static int	igc_dma_malloc(struct igc_softc *, bus_size_t,
180 		    struct igc_dma_alloc *);
181 static void	igc_dma_free(struct igc_softc *, struct igc_dma_alloc *);
182 static void	igc_setup_interface(struct igc_softc *);
183 
184 static int	igc_init(struct ifnet *);
185 static int	igc_init_locked(struct igc_softc *);
186 static void	igc_start(struct ifnet *);
187 static int	igc_transmit(struct ifnet *, struct mbuf *);
188 static void	igc_tx_common_locked(struct ifnet *, struct tx_ring *, int);
189 static bool	igc_txeof(struct tx_ring *, u_int);
190 static void	igc_intr_barrier(struct igc_softc *);
191 static void	igc_stop(struct ifnet *, int);
192 static void	igc_stop_locked(struct igc_softc *);
193 static int	igc_ioctl(struct ifnet *, u_long, void *);
194 #ifdef IF_RXR
195 static int	igc_rxrinfo(struct igc_softc *, struct if_rxrinfo *);
196 #endif
197 static void	igc_rxfill(struct rx_ring *);
198 static void	igc_rxrefill(struct rx_ring *, int);
199 static bool	igc_rxeof(struct rx_ring *, u_int);
200 static int	igc_rx_checksum(struct igc_queue *, uint64_t, uint32_t,
201 		    uint32_t);
202 static void	igc_watchdog(struct ifnet *);
203 static void	igc_tick(void *);
204 static void	igc_media_status(struct ifnet *, struct ifmediareq *);
205 static int	igc_media_change(struct ifnet *);
206 static int	igc_ifflags_cb(struct ethercom *);
207 static void	igc_set_filter(struct igc_softc *);
208 static void	igc_update_link_status(struct igc_softc *);
209 static int	igc_get_buf(struct rx_ring *, int, bool);
210 static int	igc_tx_ctx_setup(struct tx_ring *, struct mbuf *, int,
211 		    uint32_t *, uint32_t *);
212 static int	igc_tso_setup(struct tx_ring *, struct mbuf *, int,
213 		    uint32_t *, uint32_t *);
214 
215 static void	igc_configure_queues(struct igc_softc *);
216 static void	igc_set_queues(struct igc_softc *, uint32_t, uint32_t, int);
217 static void	igc_enable_queue(struct igc_softc *, uint32_t);
218 static void	igc_enable_intr(struct igc_softc *);
219 static void	igc_disable_intr(struct igc_softc *);
220 static int	igc_intr_link(void *);
221 static int	igc_intr_queue(void *);
222 static int	igc_intr(void *);
223 static void	igc_handle_queue(void *);
224 static void	igc_handle_queue_work(struct work *, void *);
225 static void	igc_sched_handle_queue(struct igc_softc *, struct igc_queue *);
226 static void	igc_barrier_handle_queue(struct igc_softc *);
227 
228 static int	igc_allocate_transmit_buffers(struct tx_ring *);
229 static int	igc_setup_transmit_structures(struct igc_softc *);
230 static int	igc_setup_transmit_ring(struct tx_ring *);
231 static void	igc_initialize_transmit_unit(struct igc_softc *);
232 static void	igc_free_transmit_structures(struct igc_softc *);
233 static void	igc_free_transmit_buffers(struct tx_ring *);
234 static void	igc_withdraw_transmit_packets(struct tx_ring *, bool);
235 static int	igc_allocate_receive_buffers(struct rx_ring *);
236 static int	igc_setup_receive_structures(struct igc_softc *);
237 static int	igc_setup_receive_ring(struct rx_ring *);
238 static void	igc_initialize_receive_unit(struct igc_softc *);
239 static void	igc_free_receive_structures(struct igc_softc *);
240 static void	igc_free_receive_buffers(struct rx_ring *);
241 static void	igc_clear_receive_status(struct rx_ring *);
242 static void	igc_initialize_rss_mapping(struct igc_softc *);
243 
244 static void	igc_get_hw_control(struct igc_softc *);
245 static void	igc_release_hw_control(struct igc_softc *);
246 static int	igc_is_valid_ether_addr(uint8_t *);
247 static void	igc_print_devinfo(struct igc_softc *);
248 
249 CFATTACH_DECL3_NEW(igc, sizeof(struct igc_softc),
250     igc_match, igc_attach, igc_detach, NULL, NULL, NULL, 0);
251 
252 static inline int
253 igc_txdesc_incr(struct igc_softc *sc, int id)
254 {
255 
256 	if (++id == sc->num_tx_desc)
257 		id = 0;
258 	return id;
259 }
260 
261 static inline int __unused
262 igc_txdesc_decr(struct igc_softc *sc, int id)
263 {
264 
265 	if (--id < 0)
266 		id = sc->num_tx_desc - 1;
267 	return id;
268 }
269 
270 static inline void
271 igc_txdesc_sync(struct tx_ring *txr, int id, int ops)
272 {
273 
274 	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
275 	    id * sizeof(union igc_adv_tx_desc), sizeof(union igc_adv_tx_desc),
276 	    ops);
277 }
278 
279 static inline int
280 igc_rxdesc_incr(struct igc_softc *sc, int id)
281 {
282 
283 	if (++id == sc->num_rx_desc)
284 		id = 0;
285 	return id;
286 }
287 
288 static inline int
289 igc_rxdesc_decr(struct igc_softc *sc, int id)
290 {
291 
292 	if (--id < 0)
293 		id = sc->num_rx_desc - 1;
294 	return id;
295 }
296 
297 static inline void
298 igc_rxdesc_sync(struct rx_ring *rxr, int id, int ops)
299 {
300 
301 	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
302 	    id * sizeof(union igc_adv_rx_desc), sizeof(union igc_adv_rx_desc),
303 	    ops);
304 }
305 
306 static const struct igc_product *
307 igc_lookup(const struct pci_attach_args *pa)
308 {
309 	const struct igc_product *igcp;
310 
311 	for (igcp = igc_products; igcp->igcp_name != NULL; igcp++) {
312 		if (PCI_VENDOR(pa->pa_id) == igcp->igcp_vendor &&
313 		    PCI_PRODUCT(pa->pa_id) == igcp->igcp_product)
314 			return igcp;
315 	}
316 	return NULL;
317 }
318 
319 /*********************************************************************
320  *  Device identification routine
321  *
322  *  igc_match determines if the driver should be loaded on
323  *  adapter based on PCI vendor/device id of the adapter.
324  *
325  *  return 0 on success, positive on failure
326  *********************************************************************/
327 static int
328 igc_match(device_t parent, cfdata_t match, void *aux)
329 {
330 	struct pci_attach_args *pa = aux;
331 
332 	if (igc_lookup(pa) != NULL)
333 		return 1;
334 
335 	return 0;
336 }
337 
338 /*********************************************************************
339  *  Device initialization routine
340  *
341  *  The attach entry point is called when the driver is being loaded.
342  *  This routine identifies the type of hardware, allocates all resources
343  *  and initializes the hardware.
344  *
345  *  return 0 on success, positive on failure
346  *********************************************************************/
347 static void
348 igc_attach(device_t parent, device_t self, void *aux)
349 {
350 	struct pci_attach_args *pa = aux;
351 	struct igc_softc *sc = device_private(self);
352 	struct igc_hw *hw = &sc->hw;
353 
354 	const struct igc_product *igcp = igc_lookup(pa);
355 	KASSERT(igcp != NULL);
356 
357 	sc->sc_dev = self;
358 	callout_init(&sc->sc_tick_ch, CALLOUT_MPSAFE);
359 	callout_setfunc(&sc->sc_tick_ch, igc_tick, sc);
360 	sc->sc_core_stopping = false;
361 
362 	sc->osdep.os_sc = sc;
363 	sc->osdep.os_pa = *pa;
364 #ifndef __aarch64__
365 	/*
366 	 * XXX PR port-arm/57643
367 	 * 64-bit DMA does not work at least for LX2K with 32/64GB memory.
368 	 * smmu(4) support may be required.
369 	 */
370 	if (pci_dma64_available(pa)) {
371 		aprint_verbose(", 64-bit DMA");
372 		sc->osdep.os_dmat = pa->pa_dmat64;
373 	} else
374 #endif
375 	{
376 		aprint_verbose(", 32-bit DMA");
377 		sc->osdep.os_dmat = pa->pa_dmat;
378 	}
379 
380 	pci_aprint_devinfo_fancy(pa, "Ethernet controller", igcp->igcp_name, 1);
381 
382 	/* Determine hardware and mac info */
383 	igc_identify_hardware(sc);
384 
385 	sc->num_tx_desc = IGC_DEFAULT_TXD;
386 	sc->num_rx_desc = IGC_DEFAULT_RXD;
387 
388 	 /* Setup PCI resources */
389 	if (igc_allocate_pci_resources(sc)) {
390 		aprint_error_dev(sc->sc_dev,
391 		    "unable to allocate PCI resources\n");
392 		goto err_pci;
393 	}
394 
395 	if (igc_allocate_interrupts(sc)) {
396 		aprint_error_dev(sc->sc_dev, "unable to allocate interrupts\n");
397 		goto err_pci;
398 	}
399 
400 	/* Allocate TX/RX queues */
401 	if (igc_allocate_queues(sc)) {
402 		aprint_error_dev(sc->sc_dev, "unable to allocate queues\n");
403 		goto err_alloc_intr;
404 	}
405 
406 	/* Do shared code initialization */
407 	if (igc_setup_init_funcs(hw, true)) {
408 		aprint_error_dev(sc->sc_dev, "unable to initialize\n");
409 		goto err_alloc_intr;
410 	}
411 
412 	hw->mac.autoneg = DO_AUTO_NEG;
413 	hw->phy.autoneg_wait_to_complete = false;
414 	hw->phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
415 
416 	/* Copper options. */
417 	if (hw->phy.media_type == igc_media_type_copper)
418 		hw->phy.mdix = AUTO_ALL_MODES;
419 
420 	/* Set the max frame size. */
421 	sc->hw.mac.max_frame_size = 9234;
422 
423 	/* Allocate multicast array memory. */
424 	sc->mta = kmem_alloc(IGC_MTA_LEN, KM_SLEEP);
425 
426 	/* Check SOL/IDER usage. */
427 	if (igc_check_reset_block(hw)) {
428 		aprint_error_dev(sc->sc_dev,
429 		    "PHY reset is blocked due to SOL/IDER session\n");
430 	}
431 
432 	/* Disable Energy Efficient Ethernet. */
433 	sc->hw.dev_spec._i225.eee_disable = true;
434 
435 	igc_reset_hw(hw);
436 
437 	/* Make sure we have a good EEPROM before we read from it. */
438 	if (igc_validate_nvm_checksum(hw) < 0) {
439 		/*
440 		 * Some PCI-E parts fail the first check due to
441 		 * the link being in sleep state, call it again,
442 		 * if it fails a second time its a real issue.
443 		 */
444 		if (igc_validate_nvm_checksum(hw) < 0) {
445 			aprint_error_dev(sc->sc_dev,
446 			    "EEPROM checksum invalid\n");
447 			goto err_late;
448 		}
449 	}
450 
451 	/* Copy the permanent MAC address out of the EEPROM. */
452 	if (igc_read_mac_addr(hw) < 0) {
453 		aprint_error_dev(sc->sc_dev,
454 		    "unable to read MAC address from EEPROM\n");
455 		goto err_late;
456 	}
457 
458 	if (!igc_is_valid_ether_addr(hw->mac.addr)) {
459 		aprint_error_dev(sc->sc_dev, "invalid MAC address\n");
460 		goto err_late;
461 	}
462 
463 	if (igc_setup_interrupts(sc))
464 		goto err_late;
465 
466 	/* Attach counters. */
467 	igc_attach_counters(sc);
468 
469 	/* Setup OS specific network interface. */
470 	igc_setup_interface(sc);
471 
472 	igc_print_devinfo(sc);
473 
474 	igc_reset(sc);
475 	hw->mac.get_link_status = true;
476 	igc_update_link_status(sc);
477 
478 	/* The driver can now take control from firmware. */
479 	igc_get_hw_control(sc);
480 
481 	aprint_normal_dev(sc->sc_dev, "Ethernet address %s\n",
482 	    ether_sprintf(sc->hw.mac.addr));
483 
484 	if (pmf_device_register(self, NULL, NULL))
485 		pmf_class_network_register(self, &sc->sc_ec.ec_if);
486 	else
487 		aprint_error_dev(self, "couldn't establish power handler\n");
488 
489 	return;
490 
491  err_late:
492 	igc_release_hw_control(sc);
493  err_alloc_intr:
494 	igc_free_interrupts(sc);
495  err_pci:
496 	igc_free_pci_resources(sc);
497 	kmem_free(sc->mta, IGC_MTA_LEN);
498 }
499 
500 /*********************************************************************
501  *  Device removal routine
502  *
503  *  The detach entry point is called when the driver is being removed.
504  *  This routine stops the adapter and deallocates all the resources
505  *  that were allocated for driver operation.
506  *
507  *  return 0 on success, positive on failure
508  *********************************************************************/
509 static int
510 igc_detach(device_t self, int flags)
511 {
512 	struct igc_softc *sc = device_private(self);
513 	struct ifnet *ifp = &sc->sc_ec.ec_if;
514 
515 	mutex_enter(&sc->sc_core_lock);
516 	igc_stop_locked(sc);
517 	mutex_exit(&sc->sc_core_lock);
518 
519 	igc_detach_counters(sc);
520 
521 	igc_free_queues(sc);
522 
523 	igc_phy_hw_reset(&sc->hw);
524 	igc_release_hw_control(sc);
525 
526 	ether_ifdetach(ifp);
527 	if_detach(ifp);
528 	ifmedia_fini(&sc->media);
529 
530 	igc_free_interrupts(sc);
531 	igc_free_pci_resources(sc);
532 	kmem_free(sc->mta, IGC_MTA_LEN);
533 
534 	mutex_destroy(&sc->sc_core_lock);
535 
536 	return 0;
537 }
538 
539 static void
540 igc_identify_hardware(struct igc_softc *sc)
541 {
542 	struct igc_osdep *os = &sc->osdep;
543 	struct pci_attach_args *pa = &os->os_pa;
544 
545 	/* Save off the information about this board. */
546 	sc->hw.device_id = PCI_PRODUCT(pa->pa_id);
547 
548 	/* Do shared code init and setup. */
549 	if (igc_set_mac_type(&sc->hw)) {
550 		aprint_error_dev(sc->sc_dev, "unable to identify hardware\n");
551 		return;
552 	}
553 }
554 
555 static int
556 igc_allocate_pci_resources(struct igc_softc *sc)
557 {
558 	struct igc_osdep *os = &sc->osdep;
559 	struct pci_attach_args *pa = &os->os_pa;
560 
561 	/*
562 	 * Enable bus mastering and memory-mapped I/O for sure.
563 	 */
564 	pcireg_t csr =
565 	    pci_conf_read(pa->pa_pc, pa->pa_tag, PCI_COMMAND_STATUS_REG);
566 	csr |= PCI_COMMAND_MASTER_ENABLE | PCI_COMMAND_MEM_ENABLE;
567 	pci_conf_write(pa->pa_pc, pa->pa_tag, PCI_COMMAND_STATUS_REG, csr);
568 
569 	const pcireg_t memtype =
570 	    pci_mapreg_type(pa->pa_pc, pa->pa_tag, IGC_PCIREG);
571 	if (pci_mapreg_map(pa, IGC_PCIREG, memtype, 0, &os->os_memt,
572 	    &os->os_memh, &os->os_membase, &os->os_memsize)) {
573 		aprint_error_dev(sc->sc_dev, "unable to map registers\n");
574 		return ENXIO;
575 	}
576 
577 	sc->hw.hw_addr = os->os_membase;
578 	sc->hw.back = os;
579 
580 	return 0;
581 }
582 
583 static int __unused
584 igc_adjust_nqueues(struct igc_softc *sc)
585 {
586 	struct pci_attach_args *pa = &sc->osdep.os_pa;
587 	int nqueues = MIN(IGC_MAX_NQUEUES, ncpu);
588 
589 	const int nmsix = pci_msix_count(pa->pa_pc, pa->pa_tag);
590 	if (nmsix <= 1)
591 		nqueues = 1;
592 	else if (nmsix < nqueues + 1)
593 		nqueues = nmsix - 1;
594 
595 	return nqueues;
596 }
597 
598 static int
599 igc_allocate_interrupts(struct igc_softc *sc)
600 {
601 	struct pci_attach_args *pa = &sc->osdep.os_pa;
602 	int error;
603 
604 #ifndef IGC_DISABLE_MSIX
605 	const int nqueues = igc_adjust_nqueues(sc);
606 	if (nqueues > 1) {
607 		sc->sc_nintrs = nqueues + 1;
608 		error = pci_msix_alloc_exact(pa, &sc->sc_intrs, sc->sc_nintrs);
609 		if (!error) {
610 			sc->sc_nqueues = nqueues;
611 			sc->sc_intr_type = PCI_INTR_TYPE_MSIX;
612 			return 0;
613 		}
614 	}
615 #endif
616 
617 	/* fallback to MSI */
618 	sc->sc_nintrs = sc->sc_nqueues = 1;
619 
620 #ifndef IGC_DISABLE_MSI
621 	error = pci_msi_alloc_exact(pa, &sc->sc_intrs, sc->sc_nintrs);
622 	if (!error) {
623 		sc->sc_intr_type = PCI_INTR_TYPE_MSI;
624 		return 0;
625 	}
626 #endif
627 
628 	/* fallback to INTx */
629 
630 	error = pci_intx_alloc(pa, &sc->sc_intrs);
631 	if (!error) {
632 		sc->sc_intr_type = PCI_INTR_TYPE_INTX;
633 		return 0;
634 	}
635 
636 	return error;
637 }
638 
639 static int
640 igc_allocate_queues(struct igc_softc *sc)
641 {
642 	device_t dev = sc->sc_dev;
643 	int rxconf = 0, txconf = 0;
644 
645 	/* Allocate the top level queue structs. */
646 	sc->queues =
647 	    kmem_zalloc(sc->sc_nqueues * sizeof(struct igc_queue), KM_SLEEP);
648 
649 	/* Allocate the TX ring. */
650 	sc->tx_rings =
651 	    kmem_zalloc(sc->sc_nqueues * sizeof(struct tx_ring), KM_SLEEP);
652 
653 	/* Allocate the RX ring. */
654 	sc->rx_rings =
655 	    kmem_zalloc(sc->sc_nqueues * sizeof(struct rx_ring), KM_SLEEP);
656 
657 	/* Set up the TX queues. */
658 	for (int iq = 0; iq < sc->sc_nqueues; iq++, txconf++) {
659 		struct tx_ring *txr = &sc->tx_rings[iq];
660 		const int tsize = roundup2(
661 		    sc->num_tx_desc * sizeof(union igc_adv_tx_desc),
662 		    IGC_DBA_ALIGN);
663 
664 		txr->sc = sc;
665 		txr->txr_igcq = &sc->queues[iq];
666 		txr->me = iq;
667 		if (igc_dma_malloc(sc, tsize, &txr->txdma)) {
668 			aprint_error_dev(dev,
669 			    "unable to allocate TX descriptor\n");
670 			goto fail;
671 		}
672 		txr->tx_base = (union igc_adv_tx_desc *)txr->txdma.dma_vaddr;
673 		memset(txr->tx_base, 0, tsize);
674 	}
675 
676 	/* Prepare transmit descriptors and buffers. */
677 	if (igc_setup_transmit_structures(sc)) {
678 		aprint_error_dev(dev, "unable to setup transmit structures\n");
679 		goto fail;
680 	}
681 
682 	/* Set up the RX queues. */
683 	for (int iq = 0; iq < sc->sc_nqueues; iq++, rxconf++) {
684 		struct rx_ring *rxr = &sc->rx_rings[iq];
685 		const int rsize = roundup2(
686 		    sc->num_rx_desc * sizeof(union igc_adv_rx_desc),
687 		    IGC_DBA_ALIGN);
688 
689 		rxr->sc = sc;
690 		rxr->rxr_igcq = &sc->queues[iq];
691 		rxr->me = iq;
692 #ifdef OPENBSD
693 		timeout_set(&rxr->rx_refill, igc_rxrefill, rxr);
694 #endif
695 		if (igc_dma_malloc(sc, rsize, &rxr->rxdma)) {
696 			aprint_error_dev(dev,
697 			    "unable to allocate RX descriptor\n");
698 			goto fail;
699 		}
700 		rxr->rx_base = (union igc_adv_rx_desc *)rxr->rxdma.dma_vaddr;
701 		memset(rxr->rx_base, 0, rsize);
702 	}
703 
704 	sc->rx_mbuf_sz = MCLBYTES;
705 	/* Prepare receive descriptors and buffers. */
706 	if (igc_setup_receive_structures(sc)) {
707 		aprint_error_dev(sc->sc_dev,
708 		    "unable to setup receive structures\n");
709 		goto fail;
710 	}
711 
712 	/* Set up the queue holding structs. */
713 	for (int iq = 0; iq < sc->sc_nqueues; iq++) {
714 		struct igc_queue *q = &sc->queues[iq];
715 
716 		q->sc = sc;
717 		q->txr = &sc->tx_rings[iq];
718 		q->rxr = &sc->rx_rings[iq];
719 	}
720 
721 	return 0;
722 
723  fail:
724 	for (struct rx_ring *rxr = sc->rx_rings; rxconf > 0; rxr++, rxconf--)
725 		igc_dma_free(sc, &rxr->rxdma);
726 	for (struct tx_ring *txr = sc->tx_rings; txconf > 0; txr++, txconf--)
727 		igc_dma_free(sc, &txr->txdma);
728 
729 	kmem_free(sc->rx_rings, sc->sc_nqueues * sizeof(struct rx_ring));
730 	sc->rx_rings = NULL;
731 	kmem_free(sc->tx_rings, sc->sc_nqueues * sizeof(struct tx_ring));
732 	sc->tx_rings = NULL;
733 	kmem_free(sc->queues, sc->sc_nqueues * sizeof(struct igc_queue));
734 	sc->queues = NULL;
735 
736 	return ENOMEM;
737 }
738 
739 static void
740 igc_free_pci_resources(struct igc_softc *sc)
741 {
742 	struct igc_osdep *os = &sc->osdep;
743 
744 	if (os->os_membase != 0)
745 		bus_space_unmap(os->os_memt, os->os_memh, os->os_memsize);
746 	os->os_membase = 0;
747 }
748 
749 static void
750 igc_free_interrupts(struct igc_softc *sc)
751 {
752 	struct pci_attach_args *pa = &sc->osdep.os_pa;
753 	pci_chipset_tag_t pc = pa->pa_pc;
754 
755 	for (int i = 0; i < sc->sc_nintrs; i++) {
756 		if (sc->sc_ihs[i] != NULL) {
757 			pci_intr_disestablish(pc, sc->sc_ihs[i]);
758 			sc->sc_ihs[i] = NULL;
759 		}
760 	}
761 	pci_intr_release(pc, sc->sc_intrs, sc->sc_nintrs);
762 }
763 
764 static void
765 igc_free_queues(struct igc_softc *sc)
766 {
767 
768 	igc_free_receive_structures(sc);
769 	for (int iq = 0; iq < sc->sc_nqueues; iq++) {
770 		struct rx_ring *rxr = &sc->rx_rings[iq];
771 
772 		igc_dma_free(sc, &rxr->rxdma);
773 	}
774 
775 	igc_free_transmit_structures(sc);
776 	for (int iq = 0; iq < sc->sc_nqueues; iq++) {
777 		struct tx_ring *txr = &sc->tx_rings[iq];
778 
779 		igc_dma_free(sc, &txr->txdma);
780 	}
781 
782 	kmem_free(sc->rx_rings, sc->sc_nqueues * sizeof(struct rx_ring));
783 	kmem_free(sc->tx_rings, sc->sc_nqueues * sizeof(struct tx_ring));
784 	kmem_free(sc->queues, sc->sc_nqueues * sizeof(struct igc_queue));
785 }
786 
787 /*********************************************************************
788  *
789  *  Initialize the hardware to a configuration as specified by the
790  *  adapter structure.
791  *
792  **********************************************************************/
793 static void
794 igc_reset(struct igc_softc *sc)
795 {
796 	struct igc_hw *hw = &sc->hw;
797 
798 	/* Let the firmware know the OS is in control */
799 	igc_get_hw_control(sc);
800 
801 	/*
802 	 * Packet Buffer Allocation (PBA)
803 	 * Writing PBA sets the receive portion of the buffer
804 	 * the remainder is used for the transmit buffer.
805 	 */
806 	const uint32_t pba = IGC_PBA_34K;
807 
808 	/*
809 	 * These parameters control the automatic generation (Tx) and
810 	 * response (Rx) to Ethernet PAUSE frames.
811 	 * - High water mark should allow for at least two frames to be
812 	 *   received after sending an XOFF.
813 	 * - Low water mark works best when it is very near the high water mark.
814 	 *   This allows the receiver to restart by sending XON when it has
815 	 *   drained a bit. Here we use an arbitrary value of 1500 which will
816 	 *   restart after one full frame is pulled from the buffer. There
817 	 *   could be several smaller frames in the buffer and if so they will
818 	 *   not trigger the XON until their total number reduces the buffer
819 	 *   by 1500.
820 	 * - The pause time is fairly large at 1000 x 512ns = 512 usec.
821 	 */
822 	const uint16_t rx_buffer_size = (pba & 0xffff) << 10;
823 
824 	hw->fc.high_water = rx_buffer_size -
825 	    roundup2(sc->hw.mac.max_frame_size, 1024);
826 	/* 16-byte granularity */
827 	hw->fc.low_water = hw->fc.high_water - 16;
828 
829 	if (sc->fc) /* locally set flow control value? */
830 		hw->fc.requested_mode = sc->fc;
831 	else
832 		hw->fc.requested_mode = igc_fc_full;
833 
834 	hw->fc.pause_time = IGC_FC_PAUSE_TIME;
835 
836 	hw->fc.send_xon = true;
837 
838 	/* Issue a global reset */
839 	igc_reset_hw(hw);
840 	IGC_WRITE_REG(hw, IGC_WUC, 0);
841 
842 	/* and a re-init */
843 	if (igc_init_hw(hw) < 0) {
844 		aprint_error_dev(sc->sc_dev, "unable to reset hardware\n");
845 		return;
846 	}
847 
848 	/* Setup DMA Coalescing */
849 	igc_init_dmac(sc, pba);
850 
851 	IGC_WRITE_REG(hw, IGC_VET, ETHERTYPE_VLAN);
852 	igc_get_phy_info(hw);
853 	igc_check_for_link(hw);
854 }
855 
856 /*********************************************************************
857  *
858  *  Initialize the DMA Coalescing feature
859  *
860  **********************************************************************/
861 static void
862 igc_init_dmac(struct igc_softc *sc, uint32_t pba)
863 {
864 	struct igc_hw *hw = &sc->hw;
865 	const uint16_t max_frame_size = sc->hw.mac.max_frame_size;
866 	uint32_t reg, status;
867 
868 	if (sc->dmac == 0) { /* Disabling it */
869 		reg = ~IGC_DMACR_DMAC_EN;	/* XXXRO */
870 		IGC_WRITE_REG(hw, IGC_DMACR, reg);
871 		DPRINTF(MISC, "DMA coalescing disabled\n");
872 		return;
873 	} else {
874 		device_printf(sc->sc_dev, "DMA coalescing enabled\n");
875 	}
876 
877 	/* Set starting threshold */
878 	IGC_WRITE_REG(hw, IGC_DMCTXTH, 0);
879 
880 	uint16_t hwm = 64 * pba - max_frame_size / 16;
881 	if (hwm < 64 * (pba - 6))
882 		hwm = 64 * (pba - 6);
883 	reg = IGC_READ_REG(hw, IGC_FCRTC);
884 	reg &= ~IGC_FCRTC_RTH_COAL_MASK;
885 	reg |= (hwm << IGC_FCRTC_RTH_COAL_SHIFT) & IGC_FCRTC_RTH_COAL_MASK;
886 	IGC_WRITE_REG(hw, IGC_FCRTC, reg);
887 
888 	uint32_t dmac = pba - max_frame_size / 512;
889 	if (dmac < pba - 10)
890 		dmac = pba - 10;
891 	reg = IGC_READ_REG(hw, IGC_DMACR);
892 	reg &= ~IGC_DMACR_DMACTHR_MASK;
893 	reg |= (dmac << IGC_DMACR_DMACTHR_SHIFT) & IGC_DMACR_DMACTHR_MASK;
894 
895 	/* transition to L0x or L1 if available..*/
896 	reg |= IGC_DMACR_DMAC_EN | IGC_DMACR_DMAC_LX_MASK;
897 
898 	/* Check if status is 2.5Gb backplane connection
899 	 * before configuration of watchdog timer, which is
900 	 * in msec values in 12.8usec intervals
901 	 * watchdog timer= msec values in 32usec intervals
902 	 * for non 2.5Gb connection
903 	 */
904 	status = IGC_READ_REG(hw, IGC_STATUS);
905 	if ((status & IGC_STATUS_2P5_SKU) &&
906 	    !(status & IGC_STATUS_2P5_SKU_OVER))
907 		reg |= (sc->dmac * 5) >> 6;
908 	else
909 		reg |= sc->dmac >> 5;
910 
911 	IGC_WRITE_REG(hw, IGC_DMACR, reg);
912 
913 	IGC_WRITE_REG(hw, IGC_DMCRTRH, 0);
914 
915 	/* Set the interval before transition */
916 	reg = IGC_READ_REG(hw, IGC_DMCTLX);
917 	reg |= IGC_DMCTLX_DCFLUSH_DIS;
918 
919 	/*
920 	 * in 2.5Gb connection, TTLX unit is 0.4 usec
921 	 * which is 0x4*2 = 0xA. But delay is still 4 usec
922 	 */
923 	status = IGC_READ_REG(hw, IGC_STATUS);
924 	if ((status & IGC_STATUS_2P5_SKU) &&
925 	    !(status & IGC_STATUS_2P5_SKU_OVER))
926 		reg |= 0xA;
927 	else
928 		reg |= 0x4;
929 
930 	IGC_WRITE_REG(hw, IGC_DMCTLX, reg);
931 
932 	/* free space in tx packet buffer to wake from DMA coal */
933 	IGC_WRITE_REG(hw, IGC_DMCTXTH,
934 	    (IGC_TXPBSIZE - (2 * max_frame_size)) >> 6);
935 
936 	/* make low power state decision controlled by DMA coal */
937 	reg = IGC_READ_REG(hw, IGC_PCIEMISC);
938 	reg &= ~IGC_PCIEMISC_LX_DECISION;
939 	IGC_WRITE_REG(hw, IGC_PCIEMISC, reg);
940 }
941 
942 static int
943 igc_setup_interrupts(struct igc_softc *sc)
944 {
945 	int error;
946 
947 	switch (sc->sc_intr_type) {
948 	case PCI_INTR_TYPE_MSIX:
949 		error = igc_setup_msix(sc);
950 		break;
951 	case PCI_INTR_TYPE_MSI:
952 		error = igc_setup_msi(sc);
953 		break;
954 	case PCI_INTR_TYPE_INTX:
955 		error = igc_setup_intx(sc);
956 		break;
957 	default:
958 		panic("%s: invalid interrupt type: %d",
959 		    device_xname(sc->sc_dev), sc->sc_intr_type);
960 	}
961 
962 	return error;
963 }
964 
965 static void
966 igc_attach_counters(struct igc_softc *sc)
967 {
968 #ifdef IGC_EVENT_COUNTERS
969 
970 	/* Global counters */
971 	sc->sc_global_evcnts = kmem_zalloc(
972 	    IGC_GLOBAL_COUNTERS * sizeof(sc->sc_global_evcnts[0]), KM_SLEEP);
973 
974 	for (int cnt = 0; cnt < IGC_GLOBAL_COUNTERS; cnt++) {
975 		evcnt_attach_dynamic(&sc->sc_global_evcnts[cnt],
976 		    igc_global_counters[cnt].type, NULL,
977 		    device_xname(sc->sc_dev), igc_global_counters[cnt].name);
978 	}
979 
980 	/* Driver counters */
981 	sc->sc_driver_evcnts = kmem_zalloc(
982 	    IGC_DRIVER_COUNTERS * sizeof(sc->sc_driver_evcnts[0]), KM_SLEEP);
983 
984 	for (int cnt = 0; cnt < IGC_DRIVER_COUNTERS; cnt++) {
985 		evcnt_attach_dynamic(&sc->sc_driver_evcnts[cnt],
986 		    igc_driver_counters[cnt].type, NULL,
987 		    device_xname(sc->sc_dev), igc_driver_counters[cnt].name);
988 	}
989 
990 	for (int iq = 0; iq < sc->sc_nqueues; iq++) {
991 		struct igc_queue *q = &sc->queues[iq];
992 
993 		q->igcq_driver_counters = kmem_zalloc(
994 		    IGC_DRIVER_COUNTERS * sizeof(q->igcq_driver_counters[0]),
995 		    KM_SLEEP);
996 	}
997 
998 	/* Queue counters */
999 	for (int iq = 0; iq < sc->sc_nqueues; iq++) {
1000 		struct igc_queue *q = &sc->queues[iq];
1001 
1002 		snprintf(q->igcq_queue_evname, sizeof(q->igcq_queue_evname),
1003 		    "%s q%d", device_xname(sc->sc_dev), iq);
1004 
1005 		q->igcq_queue_evcnts = kmem_zalloc(
1006 		    IGC_QUEUE_COUNTERS * sizeof(q->igcq_queue_evcnts[0]),
1007 		    KM_SLEEP);
1008 
1009 		for (int cnt = 0; cnt < IGC_QUEUE_COUNTERS; cnt++) {
1010 			evcnt_attach_dynamic(&q->igcq_queue_evcnts[cnt],
1011 			    igc_queue_counters[cnt].type, NULL,
1012 			    q->igcq_queue_evname, igc_queue_counters[cnt].name);
1013 		}
1014 	}
1015 
1016 	/* MAC counters */
1017 	snprintf(sc->sc_mac_evname, sizeof(sc->sc_mac_evname),
1018 	    "%s Mac Statistics", device_xname(sc->sc_dev));
1019 
1020 	sc->sc_mac_evcnts = kmem_zalloc(
1021 	    IGC_MAC_COUNTERS * sizeof(sc->sc_mac_evcnts[0]), KM_SLEEP);
1022 
1023 	for (int cnt = 0; cnt < IGC_MAC_COUNTERS; cnt++) {
1024 		evcnt_attach_dynamic(&sc->sc_mac_evcnts[cnt], EVCNT_TYPE_MISC,
1025 		    NULL, sc->sc_mac_evname, igc_mac_counters[cnt].name);
1026 	}
1027 #endif
1028 }
1029 
1030 static void
1031 igc_detach_counters(struct igc_softc *sc)
1032 {
1033 #ifdef IGC_EVENT_COUNTERS
1034 
1035 	/* Global counters */
1036 	for (int cnt = 0; cnt < IGC_GLOBAL_COUNTERS; cnt++)
1037 		evcnt_detach(&sc->sc_global_evcnts[cnt]);
1038 
1039 	kmem_free(sc->sc_global_evcnts,
1040 	    IGC_GLOBAL_COUNTERS * sizeof(sc->sc_global_evcnts));
1041 
1042 	/* Driver counters */
1043 	for (int iq = 0; iq < sc->sc_nqueues; iq++) {
1044 		struct igc_queue *q = &sc->queues[iq];
1045 
1046 		kmem_free(q->igcq_driver_counters,
1047 		    IGC_DRIVER_COUNTERS * sizeof(q->igcq_driver_counters[0]));
1048 	}
1049 
1050 	for (int cnt = 0; cnt < IGC_DRIVER_COUNTERS; cnt++)
1051 		evcnt_detach(&sc->sc_driver_evcnts[cnt]);
1052 
1053 	kmem_free(sc->sc_driver_evcnts,
1054 	    IGC_DRIVER_COUNTERS * sizeof(sc->sc_driver_evcnts));
1055 
1056 	/* Queue counters */
1057 	for (int iq = 0; iq < sc->sc_nqueues; iq++) {
1058 		struct igc_queue *q = &sc->queues[iq];
1059 
1060 		for (int cnt = 0; cnt < IGC_QUEUE_COUNTERS; cnt++)
1061 			evcnt_detach(&q->igcq_queue_evcnts[cnt]);
1062 
1063 		kmem_free(q->igcq_queue_evcnts,
1064 		    IGC_QUEUE_COUNTERS * sizeof(q->igcq_queue_evcnts[0]));
1065 	}
1066 
1067 	/* MAC statistics */
1068 	for (int cnt = 0; cnt < IGC_MAC_COUNTERS; cnt++)
1069 		evcnt_detach(&sc->sc_mac_evcnts[cnt]);
1070 
1071 	kmem_free(sc->sc_mac_evcnts,
1072 	    IGC_MAC_COUNTERS * sizeof(sc->sc_mac_evcnts[0]));
1073 #endif
1074 }
1075 
1076 /*
1077  * XXX
1078  * FreeBSD uses 4-byte-wise read for 64-bit counters, while Linux just
1079  * drops hi words.
1080  */
1081 static inline uint64_t __unused
1082 igc_read_mac_counter(struct igc_hw *hw, bus_size_t reg, bool is64)
1083 {
1084 	uint64_t val;
1085 
1086 	val = IGC_READ_REG(hw, reg);
1087 	if (is64)
1088 		val += ((uint64_t)IGC_READ_REG(hw, reg + 4)) << 32;
1089 	return val;
1090 }
1091 
1092 static void
1093 igc_update_counters(struct igc_softc *sc)
1094 {
1095 #ifdef IGC_EVENT_COUNTERS
1096 
1097 	/* Global counters: nop */
1098 
1099 	/* Driver counters */
1100 	uint64_t sum[IGC_DRIVER_COUNTERS];
1101 
1102 	memset(sum, 0, sizeof(sum));
1103 	for (int iq = 0; iq < sc->sc_nqueues; iq++) {
1104 		struct igc_queue *q = &sc->queues[iq];
1105 
1106 		for (int cnt = 0; cnt < IGC_DRIVER_COUNTERS; cnt++) {
1107 			sum[cnt] += IGC_QUEUE_DRIVER_COUNTER_VAL(q, cnt);
1108 			IGC_QUEUE_DRIVER_COUNTER_STORE(q, cnt, 0);
1109 		}
1110 	}
1111 
1112 	for (int cnt = 0; cnt < IGC_DRIVER_COUNTERS; cnt++)
1113 		IGC_DRIVER_COUNTER_ADD(sc, cnt, sum[cnt]);
1114 
1115 	/* Queue counters: nop */
1116 
1117 	/* Mac statistics */
1118 	struct igc_hw *hw = &sc->hw;
1119 	struct ifnet *ifp = &sc->sc_ec.ec_if;
1120 	uint64_t iqdrops = 0;
1121 
1122 	for (int cnt = 0; cnt < IGC_MAC_COUNTERS; cnt++) {
1123 		uint64_t val;
1124 		bus_size_t regaddr = igc_mac_counters[cnt].reg;
1125 
1126 		val = igc_read_mac_counter(hw, regaddr,
1127 		    igc_mac_counters[cnt].is64);
1128 		IGC_MAC_COUNTER_ADD(sc, cnt, val);
1129 		/* XXX Count MPC to iqdrops. */
1130 		if (regaddr == IGC_MPC)
1131 			iqdrops += val;
1132 	}
1133 
1134 	for (int iq = 0; iq < sc->sc_nqueues; iq++) {
1135 		uint32_t val;
1136 
1137 		/* XXX RQDPC should be visible via evcnt(9). */
1138 		val = IGC_READ_REG(hw, IGC_RQDPC(iq));
1139 
1140 		/* RQDPC is not cleard on read. */
1141 		if (val != 0)
1142 			IGC_WRITE_REG(hw, IGC_RQDPC(iq), 0);
1143 		iqdrops += val;
1144 	}
1145 
1146 	if (iqdrops != 0)
1147 		if_statadd(ifp, if_iqdrops, iqdrops);
1148 #endif
1149 }
1150 
1151 static void
1152 igc_clear_counters(struct igc_softc *sc)
1153 {
1154 #ifdef IGC_EVENT_COUNTERS
1155 
1156 	/* Global counters */
1157 	for (int cnt = 0; cnt < IGC_GLOBAL_COUNTERS; cnt++)
1158 		IGC_GLOBAL_COUNTER_STORE(sc, cnt, 0);
1159 
1160 	/* Driver counters */
1161 	for (int iq = 0; iq < sc->sc_nqueues; iq++) {
1162 		struct igc_queue *q = &sc->queues[iq];
1163 
1164 		for (int cnt = 0; cnt < IGC_DRIVER_COUNTERS; cnt++)
1165 			IGC_QUEUE_DRIVER_COUNTER_STORE(q, cnt, 0);
1166 	}
1167 
1168 	for (int cnt = 0; cnt < IGC_DRIVER_COUNTERS; cnt++)
1169 		IGC_DRIVER_COUNTER_STORE(sc, cnt, 0);
1170 
1171 	/* Queue counters */
1172 	for (int iq = 0; iq < sc->sc_nqueues; iq++) {
1173 		struct igc_queue *q = &sc->queues[iq];
1174 
1175 		for (int cnt = 0; cnt < IGC_QUEUE_COUNTERS; cnt++)
1176 			IGC_QUEUE_COUNTER_STORE(q, cnt, 0);
1177 	}
1178 
1179 	/* Mac statistics */
1180 	struct igc_hw *hw = &sc->hw;
1181 
1182 	for (int cnt = 0; cnt < IGC_MAC_COUNTERS; cnt++) {
1183 		(void)igc_read_mac_counter(hw, igc_mac_counters[cnt].reg,
1184 		    igc_mac_counters[cnt].is64);
1185 		IGC_MAC_COUNTER_STORE(sc, cnt, 0);
1186 	}
1187 #endif
1188 }
1189 
1190 static int
1191 igc_setup_msix(struct igc_softc *sc)
1192 {
1193 	pci_chipset_tag_t pc = sc->osdep.os_pa.pa_pc;
1194 	device_t dev = sc->sc_dev;
1195 	pci_intr_handle_t *intrs;
1196 	void **ihs;
1197 	const char *intrstr;
1198 	char intrbuf[PCI_INTRSTR_LEN];
1199 	char xnamebuf[MAX(32, MAXCOMLEN)];
1200 	int iq, error;
1201 
1202 	for (iq = 0, intrs = sc->sc_intrs, ihs = sc->sc_ihs;
1203 	    iq < sc->sc_nqueues; iq++, intrs++, ihs++) {
1204 		struct igc_queue *q = &sc->queues[iq];
1205 
1206 		snprintf(xnamebuf, sizeof(xnamebuf), "%s: txrx %d",
1207 		    device_xname(dev), iq);
1208 
1209 		intrstr = pci_intr_string(pc, *intrs, intrbuf, sizeof(intrbuf));
1210 
1211 		pci_intr_setattr(pc, intrs, PCI_INTR_MPSAFE, true);
1212 		*ihs = pci_intr_establish_xname(pc, *intrs, IPL_NET,
1213 		    igc_intr_queue, q, xnamebuf);
1214 		if (*ihs == NULL) {
1215 			aprint_error_dev(dev,
1216 			    "unable to establish txrx interrupt at %s\n",
1217 			    intrstr);
1218 			return ENOBUFS;
1219 		}
1220 		aprint_normal_dev(dev, "txrx interrupting at %s\n", intrstr);
1221 
1222 		kcpuset_t *affinity;
1223 		kcpuset_create(&affinity, true);
1224 		kcpuset_set(affinity, iq % ncpu);
1225 		error = interrupt_distribute(*ihs, affinity, NULL);
1226 		if (error) {
1227 			aprint_normal_dev(dev,
1228 			    "%s: unable to change affinity, use default CPU\n",
1229 			    intrstr);
1230 		}
1231 		kcpuset_destroy(affinity);
1232 
1233 		q->igcq_si = softint_establish(SOFTINT_NET | SOFTINT_MPSAFE,
1234 		    igc_handle_queue, q);
1235 		if (q->igcq_si == NULL) {
1236 			aprint_error_dev(dev,
1237 			    "%s: unable to establish softint\n", intrstr);
1238 			return ENOBUFS;
1239 		}
1240 
1241 		q->msix = iq;
1242 		q->eims = 1 << iq;
1243 	}
1244 
1245 	snprintf(xnamebuf, MAXCOMLEN, "%s_tx_rx", device_xname(dev));
1246 	error = workqueue_create(&sc->sc_queue_wq, xnamebuf,
1247 	    igc_handle_queue_work, sc, IGC_WORKQUEUE_PRI, IPL_NET,
1248 	    WQ_PERCPU | WQ_MPSAFE);
1249 	if (error) {
1250 		aprint_error_dev(dev, "workqueue_create failed\n");
1251 		return ENOBUFS;
1252 	}
1253 	sc->sc_txrx_workqueue = false;
1254 
1255 	intrstr = pci_intr_string(pc, *intrs, intrbuf, sizeof(intrbuf));
1256 	snprintf(xnamebuf, sizeof(xnamebuf), "%s: link", device_xname(dev));
1257 	pci_intr_setattr(pc, intrs, PCI_INTR_MPSAFE, true);
1258 	*ihs = pci_intr_establish_xname(pc, *intrs, IPL_NET,
1259 	    igc_intr_link, sc, xnamebuf);
1260 	if (*ihs == NULL) {
1261 		aprint_error_dev(dev,
1262 		    "unable to establish link interrupt at %s\n", intrstr);
1263 		return ENOBUFS;
1264 	}
1265 	aprint_normal_dev(dev, "link interrupting at %s\n", intrstr);
1266 	/* use later in igc_configure_queues() */
1267 	sc->linkvec = iq;
1268 
1269 	return 0;
1270 }
1271 
1272 static int
1273 igc_setup_msi(struct igc_softc *sc)
1274 {
1275 	pci_chipset_tag_t pc = sc->osdep.os_pa.pa_pc;
1276 	device_t dev = sc->sc_dev;
1277 	pci_intr_handle_t *intr = sc->sc_intrs;
1278 	void **ihs = sc->sc_ihs;
1279 	const char *intrstr;
1280 	char intrbuf[PCI_INTRSTR_LEN];
1281 	char xnamebuf[MAX(32, MAXCOMLEN)];
1282 	int error;
1283 
1284 	intrstr = pci_intr_string(pc, *intr, intrbuf, sizeof(intrbuf));
1285 
1286 	snprintf(xnamebuf, sizeof(xnamebuf), "%s: msi", device_xname(dev));
1287 	pci_intr_setattr(pc, intr, PCI_INTR_MPSAFE, true);
1288 	*ihs = pci_intr_establish_xname(pc, *intr, IPL_NET,
1289 	    igc_intr, sc, xnamebuf);
1290 	if (*ihs == NULL) {
1291 		aprint_error_dev(dev,
1292 		    "unable to establish interrupt at %s\n", intrstr);
1293 		return ENOBUFS;
1294 	}
1295 	aprint_normal_dev(dev, "interrupting at %s\n", intrstr);
1296 
1297 	struct igc_queue *iq = sc->queues;
1298 	iq->igcq_si = softint_establish(SOFTINT_NET | SOFTINT_MPSAFE,
1299 	    igc_handle_queue, iq);
1300 	if (iq->igcq_si == NULL) {
1301 		aprint_error_dev(dev,
1302 		    "%s: unable to establish softint\n", intrstr);
1303 		return ENOBUFS;
1304 	}
1305 
1306 	snprintf(xnamebuf, MAXCOMLEN, "%s_tx_rx", device_xname(dev));
1307 	error = workqueue_create(&sc->sc_queue_wq, xnamebuf,
1308 	    igc_handle_queue_work, sc, IGC_WORKQUEUE_PRI, IPL_NET,
1309 	    WQ_PERCPU | WQ_MPSAFE);
1310 	if (error) {
1311 		aprint_error_dev(dev, "workqueue_create failed\n");
1312 		return ENOBUFS;
1313 	}
1314 	sc->sc_txrx_workqueue = false;
1315 
1316 	sc->queues[0].msix = 0;
1317 	sc->linkvec = 0;
1318 
1319 	return 0;
1320 }
1321 
1322 static int
1323 igc_setup_intx(struct igc_softc *sc)
1324 {
1325 	pci_chipset_tag_t pc = sc->osdep.os_pa.pa_pc;
1326 	device_t dev = sc->sc_dev;
1327 	pci_intr_handle_t *intr = sc->sc_intrs;
1328 	void **ihs = sc->sc_ihs;
1329 	const char *intrstr;
1330 	char intrbuf[PCI_INTRSTR_LEN];
1331 	char xnamebuf[32];
1332 
1333 	intrstr = pci_intr_string(pc, *intr, intrbuf, sizeof(intrbuf));
1334 
1335 	snprintf(xnamebuf, sizeof(xnamebuf), "%s:intx", device_xname(dev));
1336 	pci_intr_setattr(pc, intr, PCI_INTR_MPSAFE, true);
1337 	*ihs = pci_intr_establish_xname(pc, *intr, IPL_NET,
1338 	    igc_intr, sc, xnamebuf);
1339 	if (*ihs == NULL) {
1340 		aprint_error_dev(dev,
1341 		    "unable to establish interrupt at %s\n", intrstr);
1342 		return ENOBUFS;
1343 	}
1344 	aprint_normal_dev(dev, "interrupting at %s\n", intrstr);
1345 
1346 	struct igc_queue *iq = sc->queues;
1347 	iq->igcq_si = softint_establish(SOFTINT_NET | SOFTINT_MPSAFE,
1348 	    igc_handle_queue, iq);
1349 	if (iq->igcq_si == NULL) {
1350 		aprint_error_dev(dev,
1351 		    "%s: unable to establish softint\n", intrstr);
1352 		return ENOBUFS;
1353 	}
1354 
1355 	/* create workqueue? */
1356 	sc->sc_txrx_workqueue = false;
1357 
1358 	sc->queues[0].msix = 0;
1359 	sc->linkvec = 0;
1360 
1361 	return 0;
1362 }
1363 
1364 static int
1365 igc_dma_malloc(struct igc_softc *sc, bus_size_t size, struct igc_dma_alloc *dma)
1366 {
1367 	struct igc_osdep *os = &sc->osdep;
1368 
1369 	dma->dma_tag = os->os_dmat;
1370 
1371 	if (bus_dmamap_create(dma->dma_tag, size, 1, size, 0,
1372 	    BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW, &dma->dma_map))
1373 		return 1;
1374 	if (bus_dmamem_alloc(dma->dma_tag, size, PAGE_SIZE, 0, &dma->dma_seg,
1375 	    1, &dma->dma_nseg, BUS_DMA_WAITOK))
1376 		goto destroy;
1377 	/*
1378 	 * XXXRO
1379 	 *
1380 	 * Coherent mapping for descriptors is required for now.
1381 	 *
1382 	 * Both TX and RX descriptors are 16-byte length, which is shorter
1383 	 * than dcache lines on modern CPUs. Therefore, sync for a descriptor
1384 	 * may overwrite DMA read for descriptors in the same cache line.
1385 	 *
1386 	 * Can't we avoid this by use cache-line-aligned descriptors at once?
1387 	 */
1388 	if (bus_dmamem_map(dma->dma_tag, &dma->dma_seg, dma->dma_nseg, size,
1389 	    &dma->dma_vaddr, BUS_DMA_WAITOK | BUS_DMA_COHERENT /* XXXRO */))
1390 		goto free;
1391 	if (bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr, size,
1392 	    NULL, BUS_DMA_WAITOK))
1393 		goto unmap;
1394 
1395 	dma->dma_size = size;
1396 
1397 	return 0;
1398  unmap:
1399 	bus_dmamem_unmap(dma->dma_tag, dma->dma_vaddr, size);
1400  free:
1401 	bus_dmamem_free(dma->dma_tag, &dma->dma_seg, dma->dma_nseg);
1402  destroy:
1403 	bus_dmamap_destroy(dma->dma_tag, dma->dma_map);
1404 	dma->dma_map = NULL;
1405 	dma->dma_tag = NULL;
1406 	return 1;
1407 }
1408 
1409 static void
1410 igc_dma_free(struct igc_softc *sc, struct igc_dma_alloc *dma)
1411 {
1412 
1413 	if (dma->dma_tag == NULL)
1414 		return;
1415 
1416 	if (dma->dma_map != NULL) {
1417 		bus_dmamap_sync(dma->dma_tag, dma->dma_map, 0,
1418 		    dma->dma_map->dm_mapsize,
1419 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
1420 		bus_dmamap_unload(dma->dma_tag, dma->dma_map);
1421 		bus_dmamem_unmap(dma->dma_tag, dma->dma_vaddr, dma->dma_size);
1422 		bus_dmamem_free(dma->dma_tag, &dma->dma_seg, dma->dma_nseg);
1423 		bus_dmamap_destroy(dma->dma_tag, dma->dma_map);
1424 		dma->dma_map = NULL;
1425 	}
1426 }
1427 
1428 /*********************************************************************
1429  *
1430  *  Setup networking device structure and register an interface.
1431  *
1432  **********************************************************************/
1433 static void
1434 igc_setup_interface(struct igc_softc *sc)
1435 {
1436 	struct ifnet *ifp = &sc->sc_ec.ec_if;
1437 
1438 	strlcpy(ifp->if_xname, device_xname(sc->sc_dev), sizeof(ifp->if_xname));
1439 	ifp->if_softc = sc;
1440 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
1441 	ifp->if_extflags = IFEF_MPSAFE;
1442 	ifp->if_ioctl = igc_ioctl;
1443 	ifp->if_start = igc_start;
1444 	if (sc->sc_nqueues > 1)
1445 		ifp->if_transmit = igc_transmit;
1446 	ifp->if_watchdog = igc_watchdog;
1447 	ifp->if_init = igc_init;
1448 	ifp->if_stop = igc_stop;
1449 
1450 #if 0 /* notyet */
1451 	ifp->if_capabilities = IFCAP_TSOv4 | IFCAP_TSOv6;
1452 #endif
1453 
1454 	ifp->if_capabilities |=
1455 	    IFCAP_CSUM_IPv4_Tx  | IFCAP_CSUM_IPv4_Rx  |
1456 	    IFCAP_CSUM_TCPv4_Tx | IFCAP_CSUM_TCPv4_Rx |
1457 	    IFCAP_CSUM_UDPv4_Tx | IFCAP_CSUM_UDPv4_Rx |
1458 	    IFCAP_CSUM_TCPv6_Tx | IFCAP_CSUM_TCPv6_Rx |
1459 	    IFCAP_CSUM_UDPv6_Tx | IFCAP_CSUM_UDPv6_Rx;
1460 
1461 	ifp->if_capenable = 0;
1462 
1463 	sc->sc_ec.ec_capabilities |=
1464 	    ETHERCAP_JUMBO_MTU | ETHERCAP_VLAN_MTU;
1465 
1466 	IFQ_SET_MAXLEN(&ifp->if_snd, sc->num_tx_desc - 1);
1467 	IFQ_SET_READY(&ifp->if_snd);
1468 
1469 #if NVLAN > 0
1470 	sc->sc_ec.ec_capabilities |=  ETHERCAP_VLAN_HWTAGGING;
1471 #endif
1472 
1473 	mutex_init(&sc->sc_core_lock, MUTEX_DEFAULT, IPL_NET);
1474 
1475 	/* Initialize ifmedia structures. */
1476 	sc->sc_ec.ec_ifmedia = &sc->media;
1477 	ifmedia_init_with_lock(&sc->media, IFM_IMASK, igc_media_change,
1478 	    igc_media_status, &sc->sc_core_lock);
1479 	ifmedia_add(&sc->media, IFM_ETHER | IFM_10_T, 0, NULL);
1480 	ifmedia_add(&sc->media, IFM_ETHER | IFM_10_T | IFM_FDX, 0, NULL);
1481 	ifmedia_add(&sc->media, IFM_ETHER | IFM_100_TX, 0, NULL);
1482 	ifmedia_add(&sc->media, IFM_ETHER | IFM_100_TX | IFM_FDX, 0, NULL);
1483 	ifmedia_add(&sc->media, IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
1484 	ifmedia_add(&sc->media, IFM_ETHER | IFM_2500_T | IFM_FDX, 0, NULL);
1485 	ifmedia_add(&sc->media, IFM_ETHER | IFM_AUTO, 0, NULL);
1486 	ifmedia_set(&sc->media, IFM_ETHER | IFM_AUTO);
1487 
1488 	sc->sc_rx_intr_process_limit = IGC_RX_INTR_PROCESS_LIMIT_DEFAULT;
1489 	sc->sc_tx_intr_process_limit = IGC_TX_INTR_PROCESS_LIMIT_DEFAULT;
1490 	sc->sc_rx_process_limit = IGC_RX_PROCESS_LIMIT_DEFAULT;
1491 	sc->sc_tx_process_limit = IGC_TX_PROCESS_LIMIT_DEFAULT;
1492 
1493 	if_initialize(ifp);
1494 	sc->sc_ipq = if_percpuq_create(ifp);
1495 	if_deferred_start_init(ifp, NULL);
1496 	ether_ifattach(ifp, sc->hw.mac.addr);
1497 	ether_set_ifflags_cb(&sc->sc_ec, igc_ifflags_cb);
1498 	if_register(ifp);
1499 }
1500 
1501 static int
1502 igc_init(struct ifnet *ifp)
1503 {
1504 	struct igc_softc *sc = ifp->if_softc;
1505 	int error;
1506 
1507 	mutex_enter(&sc->sc_core_lock);
1508 	error = igc_init_locked(sc);
1509 	mutex_exit(&sc->sc_core_lock);
1510 
1511 	return error;
1512 }
1513 
1514 static int
1515 igc_init_locked(struct igc_softc *sc)
1516 {
1517 	struct ethercom *ec = &sc->sc_ec;
1518 	struct ifnet *ifp = &ec->ec_if;
1519 
1520 	DPRINTF(CFG, "called\n");
1521 
1522 	KASSERT(mutex_owned(&sc->sc_core_lock));
1523 
1524 	if (ISSET(ifp->if_flags, IFF_RUNNING))
1525 		igc_stop_locked(sc);
1526 
1527 	/* Put the address into the receive address array. */
1528 	igc_rar_set(&sc->hw, sc->hw.mac.addr, 0);
1529 
1530 	/* Initialize the hardware. */
1531 	igc_reset(sc);
1532 	igc_update_link_status(sc);
1533 
1534 	/* Setup VLAN support, basic and offload if available. */
1535 	IGC_WRITE_REG(&sc->hw, IGC_VET, ETHERTYPE_VLAN);
1536 
1537 	igc_initialize_transmit_unit(sc);
1538 	igc_initialize_receive_unit(sc);
1539 
1540 	if (ec->ec_capenable & ETHERCAP_VLAN_HWTAGGING) {
1541 		uint32_t ctrl = IGC_READ_REG(&sc->hw, IGC_CTRL);
1542 		ctrl |= IGC_CTRL_VME;
1543 		IGC_WRITE_REG(&sc->hw, IGC_CTRL, ctrl);
1544 	}
1545 
1546 	/* Setup multicast table. */
1547 	igc_set_filter(sc);
1548 
1549 	igc_clear_hw_cntrs_base_generic(&sc->hw);
1550 
1551 	if (sc->sc_intr_type == PCI_INTR_TYPE_MSIX)
1552 		igc_configure_queues(sc);
1553 
1554 	/* This clears any pending interrupts */
1555 	IGC_READ_REG(&sc->hw, IGC_ICR);
1556 	IGC_WRITE_REG(&sc->hw, IGC_ICS, IGC_ICS_LSC);
1557 
1558 	/* The driver can now take control from firmware. */
1559 	igc_get_hw_control(sc);
1560 
1561 	/* Set Energy Efficient Ethernet. */
1562 	igc_set_eee_i225(&sc->hw, true, true, true);
1563 
1564 	for (int iq = 0; iq < sc->sc_nqueues; iq++) {
1565 		struct rx_ring *rxr = &sc->rx_rings[iq];
1566 
1567 		mutex_enter(&rxr->rxr_lock);
1568 		igc_rxfill(rxr);
1569 		mutex_exit(&rxr->rxr_lock);
1570 	}
1571 
1572 	sc->sc_core_stopping = false;
1573 
1574 	ifp->if_flags |= IFF_RUNNING;
1575 
1576 	/* Save last flags for the callback */
1577 	sc->sc_if_flags = ifp->if_flags;
1578 
1579 	callout_schedule(&sc->sc_tick_ch, hz);
1580 
1581 	igc_enable_intr(sc);
1582 
1583 	return 0;
1584 }
1585 
1586 static inline int
1587 igc_load_mbuf(struct igc_queue *q, bus_dma_tag_t dmat, bus_dmamap_t map,
1588     struct mbuf *m)
1589 {
1590 	int error;
1591 
1592 	error = bus_dmamap_load_mbuf(dmat, map, m,
1593 	    BUS_DMA_WRITE | BUS_DMA_NOWAIT);
1594 
1595 	if (__predict_false(error == EFBIG)) {
1596 		IGC_DRIVER_EVENT(q, txdma_efbig, 1);
1597 		m = m_defrag(m, M_NOWAIT);
1598 		if (__predict_false(m == NULL)) {
1599 			IGC_DRIVER_EVENT(q, txdma_defrag, 1);
1600 			return ENOBUFS;
1601 		}
1602 		error = bus_dmamap_load_mbuf(dmat, map, m,
1603 		    BUS_DMA_WRITE | BUS_DMA_NOWAIT);
1604 	}
1605 
1606 	switch (error) {
1607 	case 0:
1608 		break;
1609 	case ENOMEM:
1610 		IGC_DRIVER_EVENT(q, txdma_enomem, 1);
1611 		break;
1612 	case EINVAL:
1613 		IGC_DRIVER_EVENT(q, txdma_einval, 1);
1614 		break;
1615 	case EAGAIN:
1616 		IGC_DRIVER_EVENT(q, txdma_eagain, 1);
1617 		break;
1618 	default:
1619 		IGC_DRIVER_EVENT(q, txdma_other, 1);
1620 		break;
1621 	}
1622 
1623 	return error;
1624 }
1625 
1626 #define IGC_TX_START	1
1627 #define IGC_TX_TRANSMIT	2
1628 
1629 static void
1630 igc_start(struct ifnet *ifp)
1631 {
1632 	struct igc_softc *sc = ifp->if_softc;
1633 
1634 	if (__predict_false(!sc->link_active)) {
1635 		IFQ_PURGE(&ifp->if_snd);
1636 		return;
1637 	}
1638 
1639 	struct tx_ring *txr = &sc->tx_rings[0]; /* queue 0 */
1640 	mutex_enter(&txr->txr_lock);
1641 	igc_tx_common_locked(ifp, txr, IGC_TX_START);
1642 	mutex_exit(&txr->txr_lock);
1643 }
1644 
1645 static inline u_int
1646 igc_select_txqueue(struct igc_softc *sc, struct mbuf *m __unused)
1647 {
1648 	const u_int cpuid = cpu_index(curcpu());
1649 
1650 	return cpuid % sc->sc_nqueues;
1651 }
1652 
1653 static int
1654 igc_transmit(struct ifnet *ifp, struct mbuf *m)
1655 {
1656 	struct igc_softc *sc = ifp->if_softc;
1657 	const u_int qid = igc_select_txqueue(sc, m);
1658 	struct tx_ring *txr = &sc->tx_rings[qid];
1659 	struct igc_queue *q = txr->txr_igcq;
1660 
1661 	if (__predict_false(!pcq_put(txr->txr_interq, m))) {
1662 		IGC_QUEUE_EVENT(q, tx_pcq_drop, 1);
1663 		m_freem(m);
1664 		return ENOBUFS;
1665 	}
1666 
1667 	mutex_enter(&txr->txr_lock);
1668 	igc_tx_common_locked(ifp, txr, IGC_TX_TRANSMIT);
1669 	mutex_exit(&txr->txr_lock);
1670 
1671 	return 0;
1672 }
1673 
1674 static void
1675 igc_tx_common_locked(struct ifnet *ifp, struct tx_ring *txr, int caller)
1676 {
1677 	struct igc_softc *sc = ifp->if_softc;
1678 	struct igc_queue *q = txr->txr_igcq;
1679 	net_stat_ref_t nsr = IF_STAT_GETREF(ifp);
1680 	int prod, free, last = -1;
1681 	bool post = false;
1682 
1683 	prod = txr->next_avail_desc;
1684 	free = txr->next_to_clean;
1685 	if (free <= prod)
1686 		free += sc->num_tx_desc;
1687 	free -= prod;
1688 
1689 	DPRINTF(TX, "%s: begin: msix %d prod %d n2c %d free %d\n",
1690 	    caller == IGC_TX_TRANSMIT ? "transmit" : "start",
1691 	    txr->me, prod, txr->next_to_clean, free);
1692 
1693 	for (;;) {
1694 		struct mbuf *m;
1695 
1696 		if (__predict_false(free <= IGC_MAX_SCATTER)) {
1697 			IGC_QUEUE_EVENT(q, tx_no_desc, 1);
1698 			break;
1699 		}
1700 
1701 		if (caller == IGC_TX_TRANSMIT)
1702 			m = pcq_get(txr->txr_interq);
1703 		else
1704 			IFQ_DEQUEUE(&ifp->if_snd, m);
1705 		if (__predict_false(m == NULL))
1706 			break;
1707 
1708 		struct igc_tx_buf *txbuf = &txr->tx_buffers[prod];
1709 		bus_dmamap_t map = txbuf->map;
1710 
1711 		if (__predict_false(
1712 		    igc_load_mbuf(q, txr->txdma.dma_tag, map, m))) {
1713 			if (caller == IGC_TX_TRANSMIT)
1714 				IGC_QUEUE_EVENT(q, tx_pcq_drop, 1);
1715 			m_freem(m);
1716 			if_statinc_ref(ifp, nsr, if_oerrors);
1717 			continue;
1718 		}
1719 
1720 		bus_dmamap_sync(txr->txdma.dma_tag, map, 0,
1721 		    map->dm_mapsize, BUS_DMASYNC_PREWRITE);
1722 
1723 		uint32_t ctx_cmd_type_len = 0, olinfo_status = 0;
1724 		if (igc_tx_ctx_setup(txr, m, prod, &ctx_cmd_type_len,
1725 		    &olinfo_status)) {
1726 			IGC_QUEUE_EVENT(q, tx_ctx, 1);
1727 			/* Consume the first descriptor */
1728 			prod = igc_txdesc_incr(sc, prod);
1729 			free--;
1730 		}
1731 		for (int i = 0; i < map->dm_nsegs; i++) {
1732 			union igc_adv_tx_desc *txdesc = &txr->tx_base[prod];
1733 
1734 			uint32_t cmd_type_len = ctx_cmd_type_len |
1735 			    IGC_ADVTXD_DCMD_IFCS | IGC_ADVTXD_DTYP_DATA |
1736 			    IGC_ADVTXD_DCMD_DEXT | map->dm_segs[i].ds_len;
1737 			if (i == map->dm_nsegs - 1) {
1738 				cmd_type_len |=
1739 				    IGC_ADVTXD_DCMD_EOP | IGC_ADVTXD_DCMD_RS;
1740 			}
1741 
1742 			igc_txdesc_sync(txr, prod,
1743 			    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
1744 			htolem64(&txdesc->read.buffer_addr,
1745 			    map->dm_segs[i].ds_addr);
1746 			htolem32(&txdesc->read.cmd_type_len, cmd_type_len);
1747 			htolem32(&txdesc->read.olinfo_status, olinfo_status);
1748 			igc_txdesc_sync(txr, prod,
1749 			    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1750 
1751 			last = prod;
1752 			prod = igc_txdesc_incr(sc, prod);
1753 		}
1754 
1755 		txbuf->m_head = m;
1756 		txbuf->eop_index = last;
1757 
1758 		bpf_mtap(ifp, m, BPF_D_OUT);
1759 
1760 		if_statadd_ref(ifp, nsr, if_obytes, m->m_pkthdr.len);
1761 		if (m->m_flags & M_MCAST)
1762 			if_statinc_ref(ifp, nsr, if_omcasts);
1763 		IGC_QUEUE_EVENT(q, tx_packets, 1);
1764 		IGC_QUEUE_EVENT(q, tx_bytes, m->m_pkthdr.len);
1765 
1766 		free -= map->dm_nsegs;
1767 		post = true;
1768 	}
1769 
1770 	if (post) {
1771 		txr->next_avail_desc = prod;
1772 		IGC_WRITE_REG(&sc->hw, IGC_TDT(txr->me), prod);
1773 	}
1774 
1775 	DPRINTF(TX, "%s: done : msix %d prod %d n2c %d free %d\n",
1776 	    caller == IGC_TX_TRANSMIT ? "transmit" : "start",
1777 	    txr->me, prod, txr->next_to_clean, free);
1778 
1779 	IF_STAT_PUTREF(ifp);
1780 }
1781 
1782 static bool
1783 igc_txeof(struct tx_ring *txr, u_int limit)
1784 {
1785 	struct igc_softc *sc = txr->sc;
1786 	struct ifnet *ifp = &sc->sc_ec.ec_if;
1787 	int cons, prod;
1788 	bool more = false;
1789 
1790 	prod = txr->next_avail_desc;
1791 	cons = txr->next_to_clean;
1792 
1793 	if (cons == prod) {
1794 		DPRINTF(TX, "false: msix %d cons %d prod %d\n",
1795 		    txr->me, cons, prod);
1796 		return false;
1797 	}
1798 
1799 	do {
1800 		struct igc_tx_buf *txbuf = &txr->tx_buffers[cons];
1801 		const int last = txbuf->eop_index;
1802 
1803 		membar_consumer();	/* XXXRO necessary? */
1804 
1805 		KASSERT(last != -1);
1806 		union igc_adv_tx_desc *txdesc = &txr->tx_base[last];
1807 		igc_txdesc_sync(txr, last, BUS_DMASYNC_POSTREAD);
1808 		const uint32_t status = le32toh(txdesc->wb.status);
1809 		igc_txdesc_sync(txr, last, BUS_DMASYNC_PREREAD);
1810 
1811 		if (!(status & IGC_TXD_STAT_DD))
1812 			break;
1813 
1814 		if (limit-- == 0) {
1815 			more = true;
1816 			DPRINTF(TX, "pending TX "
1817 			    "msix %d cons %d last %d prod %d "
1818 			    "status 0x%08x\n",
1819 			    txr->me, cons, last, prod, status);
1820 			break;
1821 		}
1822 
1823 		DPRINTF(TX, "handled TX "
1824 		    "msix %d cons %d last %d prod %d "
1825 		    "status 0x%08x\n",
1826 		    txr->me, cons, last, prod, status);
1827 
1828 		if_statinc(ifp, if_opackets);
1829 
1830 		bus_dmamap_t map = txbuf->map;
1831 		bus_dmamap_sync(txr->txdma.dma_tag, map, 0, map->dm_mapsize,
1832 		    BUS_DMASYNC_POSTWRITE);
1833 		bus_dmamap_unload(txr->txdma.dma_tag, map);
1834 		m_freem(txbuf->m_head);
1835 
1836 		txbuf->m_head = NULL;
1837 		txbuf->eop_index = -1;
1838 
1839 		cons = igc_txdesc_incr(sc, last);
1840 	} while (cons != prod);
1841 
1842 	txr->next_to_clean = cons;
1843 
1844 	return more;
1845 }
1846 
1847 static void
1848 igc_intr_barrier(struct igc_softc *sc __unused)
1849 {
1850 
1851 	xc_barrier(0);
1852 }
1853 
1854 static void
1855 igc_stop(struct ifnet *ifp, int disable)
1856 {
1857 	struct igc_softc *sc = ifp->if_softc;
1858 
1859 	mutex_enter(&sc->sc_core_lock);
1860 	igc_stop_locked(sc);
1861 	mutex_exit(&sc->sc_core_lock);
1862 }
1863 
1864 /*********************************************************************
1865  *
1866  *  This routine disables all traffic on the adapter by issuing a
1867  *  global reset on the MAC.
1868  *
1869  **********************************************************************/
1870 static void
1871 igc_stop_locked(struct igc_softc *sc)
1872 {
1873 	struct ifnet *ifp = &sc->sc_ec.ec_if;
1874 
1875 	DPRINTF(CFG, "called\n");
1876 
1877 	KASSERT(mutex_owned(&sc->sc_core_lock));
1878 
1879 	/*
1880 	 * If stopping processing has already started, do nothing.
1881 	 */
1882 	if ((ifp->if_flags & IFF_RUNNING) == 0)
1883 		return;
1884 
1885 	/* Tell the stack that the interface is no longer active. */
1886 	ifp->if_flags &= ~IFF_RUNNING;
1887 
1888 	/*
1889 	 * igc_handle_queue() can enable interrupts, so wait for completion of
1890 	 * last igc_handle_queue() after unset IFF_RUNNING.
1891 	 */
1892 	mutex_exit(&sc->sc_core_lock);
1893 	igc_barrier_handle_queue(sc);
1894 	mutex_enter(&sc->sc_core_lock);
1895 
1896 	sc->sc_core_stopping = true;
1897 
1898 	igc_disable_intr(sc);
1899 
1900 	callout_halt(&sc->sc_tick_ch, &sc->sc_core_lock);
1901 
1902 	igc_reset_hw(&sc->hw);
1903 	IGC_WRITE_REG(&sc->hw, IGC_WUC, 0);
1904 
1905 	/*
1906 	 * Wait for completion of interrupt handlers.
1907 	 */
1908 	mutex_exit(&sc->sc_core_lock);
1909 	igc_intr_barrier(sc);
1910 	mutex_enter(&sc->sc_core_lock);
1911 
1912 	igc_update_link_status(sc);
1913 
1914 	for (int iq = 0; iq < sc->sc_nqueues; iq++) {
1915 		struct tx_ring *txr = &sc->tx_rings[iq];
1916 
1917 		igc_withdraw_transmit_packets(txr, false);
1918 	}
1919 
1920 	for (int iq = 0; iq < sc->sc_nqueues; iq++) {
1921 		struct rx_ring *rxr = &sc->rx_rings[iq];
1922 
1923 		igc_clear_receive_status(rxr);
1924 	}
1925 
1926 	/* Save last flags for the callback */
1927 	sc->sc_if_flags = ifp->if_flags;
1928 }
1929 
1930 /*********************************************************************
1931  *  Ioctl entry point
1932  *
1933  *  igc_ioctl is called when the user wants to configure the
1934  *  interface.
1935  *
1936  *  return 0 on success, positive on failure
1937  **********************************************************************/
1938 static int
1939 igc_ioctl(struct ifnet * ifp, u_long cmd, void *data)
1940 {
1941 	struct igc_softc *sc __unused = ifp->if_softc;
1942 	int s;
1943 	int error;
1944 
1945 	DPRINTF(CFG, "cmd 0x%016lx\n", cmd);
1946 
1947 	switch (cmd) {
1948 	case SIOCADDMULTI:
1949 	case SIOCDELMULTI:
1950 		break;
1951 	default:
1952 		KASSERT(IFNET_LOCKED(ifp));
1953 	}
1954 
1955 	if (cmd == SIOCZIFDATA) {
1956 		mutex_enter(&sc->sc_core_lock);
1957 		igc_clear_counters(sc);
1958 		mutex_exit(&sc->sc_core_lock);
1959 	}
1960 
1961 	switch (cmd) {
1962 #ifdef IF_RXR
1963 	case SIOCGIFRXR:
1964 		s = splnet();
1965 		error = igc_rxrinfo(sc, (struct if_rxrinfo *)ifr->ifr_data);
1966 		splx(s);
1967 		break;
1968 #endif
1969 	default:
1970 		s = splnet();
1971 		error = ether_ioctl(ifp, cmd, data);
1972 		splx(s);
1973 		break;
1974 	}
1975 
1976 	if (error != ENETRESET)
1977 		return error;
1978 
1979 	error = 0;
1980 
1981 	if (cmd == SIOCSIFCAP)
1982 		error = if_init(ifp);
1983 	else if ((cmd == SIOCADDMULTI) || (cmd == SIOCDELMULTI)) {
1984 		mutex_enter(&sc->sc_core_lock);
1985 		if (sc->sc_if_flags & IFF_RUNNING) {
1986 			/*
1987 			 * Multicast list has changed; set the hardware filter
1988 			 * accordingly.
1989 			 */
1990 			igc_disable_intr(sc);
1991 			igc_set_filter(sc);
1992 			igc_enable_intr(sc);
1993 		}
1994 		mutex_exit(&sc->sc_core_lock);
1995 	}
1996 
1997 	return error;
1998 }
1999 
2000 #ifdef IF_RXR
2001 static int
2002 igc_rxrinfo(struct igc_softc *sc, struct if_rxrinfo *ifri)
2003 {
2004 	struct if_rxring_info *ifr, ifr1;
2005 	int error;
2006 
2007 	if (sc->sc_nqueues > 1) {
2008 		ifr = kmem_zalloc(sc->sc_nqueues * sizeof(*ifr), KM_SLEEP);
2009 	} else {
2010 		ifr = &ifr1;
2011 		memset(ifr, 0, sizeof(*ifr));
2012 	}
2013 
2014 	for (int iq = 0; iq < sc->sc_nqueues; iq++) {
2015 		struct rx_ring *rxr = &sc->rx_rings[iq];
2016 
2017 		ifr[iq].ifr_size = MCLBYTES;
2018 		snprintf(ifr[iq].ifr_name, sizeof(ifr[iq].ifr_name), "%d", iq);
2019 		ifr[iq].ifr_info = rxr->rx_ring;
2020 	}
2021 
2022 	error = if_rxr_info_ioctl(ifri, sc->sc_nqueues, ifr);
2023 	if (sc->sc_nqueues > 1)
2024 		kmem_free(ifr, sc->sc_nqueues * sizeof(*ifr));
2025 
2026 	return error;
2027 }
2028 #endif
2029 
2030 static void
2031 igc_rxfill(struct rx_ring *rxr)
2032 {
2033 	struct igc_softc *sc = rxr->sc;
2034 	int id;
2035 
2036 	for (id = 0; id < sc->num_rx_desc; id++) {
2037 		if (igc_get_buf(rxr, id, false)) {
2038 			panic("%s: msix=%d i=%d\n", __func__, rxr->me, id);
2039 		}
2040 	}
2041 
2042 	id = sc->num_rx_desc - 1;
2043 	rxr->last_desc_filled = id;
2044 	IGC_WRITE_REG(&sc->hw, IGC_RDT(rxr->me), id);
2045 	rxr->next_to_check = 0;
2046 }
2047 
2048 static void
2049 igc_rxrefill(struct rx_ring *rxr, int end)
2050 {
2051 	struct igc_softc *sc = rxr->sc;
2052 	int id;
2053 
2054 	for (id = rxr->next_to_check; id != end; id = igc_rxdesc_incr(sc, id)) {
2055 		if (igc_get_buf(rxr, id, true)) {
2056 			/* XXXRO */
2057 			panic("%s: msix=%d id=%d\n", __func__, rxr->me, id);
2058 		}
2059 	}
2060 
2061 	id = igc_rxdesc_decr(sc, id);
2062 	DPRINTF(RX, "%s RDT %d id %d\n",
2063 	    rxr->last_desc_filled == id ? "same" : "diff",
2064 	    rxr->last_desc_filled, id);
2065 	rxr->last_desc_filled = id;
2066 	IGC_WRITE_REG(&sc->hw, IGC_RDT(rxr->me), id);
2067 }
2068 
2069 /*********************************************************************
2070  *
2071  *  This routine executes in interrupt context. It replenishes
2072  *  the mbufs in the descriptor and sends data which has been
2073  *  dma'ed into host memory to upper layer.
2074  *
2075  *********************************************************************/
2076 static bool
2077 igc_rxeof(struct rx_ring *rxr, u_int limit)
2078 {
2079 	struct igc_softc *sc = rxr->sc;
2080 	struct igc_queue *q = rxr->rxr_igcq;
2081 	struct ifnet *ifp = &sc->sc_ec.ec_if;
2082 	int id;
2083 	bool more = false;
2084 
2085 	id = rxr->next_to_check;
2086 	for (;;) {
2087 		union igc_adv_rx_desc *rxdesc = &rxr->rx_base[id];
2088 		struct igc_rx_buf *rxbuf, *nxbuf;
2089 		struct mbuf *mp, *m;
2090 
2091 		igc_rxdesc_sync(rxr, id,
2092 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2093 
2094 		const uint32_t staterr = le32toh(rxdesc->wb.upper.status_error);
2095 
2096 		if (!ISSET(staterr, IGC_RXD_STAT_DD)) {
2097 			igc_rxdesc_sync(rxr, id,
2098 			    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2099 			break;
2100 		}
2101 
2102 		if (limit-- == 0) {
2103 			igc_rxdesc_sync(rxr, id,
2104 			    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2105 			DPRINTF(RX, "more=true\n");
2106 			more = true;
2107 			break;
2108 		}
2109 
2110 		/* Zero out the receive descriptors status. */
2111 		rxdesc->wb.upper.status_error = 0;
2112 
2113 		/* Pull the mbuf off the ring. */
2114 		rxbuf = &rxr->rx_buffers[id];
2115 		bus_dmamap_t map = rxbuf->map;
2116 		bus_dmamap_sync(rxr->rxdma.dma_tag, map,
2117 		    0, map->dm_mapsize, BUS_DMASYNC_POSTREAD);
2118 		bus_dmamap_unload(rxr->rxdma.dma_tag, map);
2119 
2120 		mp = rxbuf->buf;
2121 		rxbuf->buf = NULL;
2122 
2123 		const bool eop = staterr & IGC_RXD_STAT_EOP;
2124 		const uint16_t len = le16toh(rxdesc->wb.upper.length);
2125 
2126 #if NVLAN > 0
2127 		const uint16_t vtag = le16toh(rxdesc->wb.upper.vlan);
2128 #endif
2129 
2130 		const uint32_t ptype = le32toh(rxdesc->wb.lower.lo_dword.data) &
2131 		    IGC_PKTTYPE_MASK;
2132 
2133 		const uint32_t hash __unused =
2134 		    le32toh(rxdesc->wb.lower.hi_dword.rss);
2135 		const uint16_t hashtype __unused =
2136 		    le16toh(rxdesc->wb.lower.lo_dword.hs_rss.pkt_info) &
2137 		    IGC_RXDADV_RSSTYPE_MASK;
2138 
2139 		igc_rxdesc_sync(rxr, id,
2140 		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2141 
2142 		if (__predict_false(staterr & IGC_RXDEXT_STATERR_RXE)) {
2143 			m_freem(rxbuf->fmp);
2144 			rxbuf->fmp = NULL;
2145 
2146 			m_freem(mp);
2147 			m = NULL;
2148 
2149 			if_statinc(ifp, if_ierrors);
2150 			IGC_QUEUE_EVENT(q, rx_discard, 1);
2151 
2152 			DPRINTF(RX, "ierrors++\n");
2153 
2154 			goto next_desc;
2155 		}
2156 
2157 		if (__predict_false(mp == NULL)) {
2158 			panic("%s: igc_rxeof: NULL mbuf in slot %d "
2159 			    "(filled %d)", device_xname(sc->sc_dev),
2160 			    id, rxr->last_desc_filled);
2161 		}
2162 
2163 		if (!eop) {
2164 			/*
2165 			 * Figure out the next descriptor of this frame.
2166 			 */
2167 			int nextp = igc_rxdesc_incr(sc, id);
2168 
2169 			nxbuf = &rxr->rx_buffers[nextp];
2170 			/*
2171 			 * TODO prefetch(nxbuf);
2172 			 */
2173 		}
2174 
2175 		mp->m_len = len;
2176 
2177 		m = rxbuf->fmp;
2178 		rxbuf->fmp = NULL;
2179 
2180 		if (m != NULL) {
2181 			m->m_pkthdr.len += mp->m_len;
2182 		} else {
2183 			m = mp;
2184 			m->m_pkthdr.len = mp->m_len;
2185 #if NVLAN > 0
2186 			if (staterr & IGC_RXD_STAT_VP)
2187 				vlan_set_tag(m, vtag);
2188 #endif
2189 		}
2190 
2191 		/* Pass the head pointer on */
2192 		if (!eop) {
2193 			nxbuf->fmp = m;
2194 			m = NULL;
2195 			mp->m_next = nxbuf->buf;
2196 		} else {
2197 			m_set_rcvif(m, ifp);
2198 
2199 			m->m_pkthdr.csum_flags = igc_rx_checksum(q,
2200 			    ifp->if_capenable, staterr, ptype);
2201 
2202 #ifdef notyet
2203 			if (hashtype != IGC_RXDADV_RSSTYPE_NONE) {
2204 				m->m_pkthdr.ph_flowid = hash;
2205 				SET(m->m_pkthdr.csum_flags, M_FLOWID);
2206 			}
2207 			ml_enqueue(&ml, m);
2208 #endif
2209 
2210 			if_percpuq_enqueue(sc->sc_ipq, m);
2211 
2212 			if_statinc(ifp, if_ipackets);
2213 			IGC_QUEUE_EVENT(q, rx_packets, 1);
2214 			IGC_QUEUE_EVENT(q, rx_bytes, m->m_pkthdr.len);
2215 		}
2216  next_desc:
2217 		/* Advance our pointers to the next descriptor. */
2218 		id = igc_rxdesc_incr(sc, id);
2219 	}
2220 
2221 	DPRINTF(RX, "fill queue[%d]\n", rxr->me);
2222 	igc_rxrefill(rxr, id);
2223 
2224 	DPRINTF(RX, "%s n2c %d id %d\n",
2225 	    rxr->next_to_check == id ? "same" : "diff",
2226 	    rxr->next_to_check, id);
2227 	rxr->next_to_check = id;
2228 
2229 #ifdef OPENBSD
2230 	if (!(staterr & IGC_RXD_STAT_DD))
2231 		return 0;
2232 #endif
2233 
2234 	return more;
2235 }
2236 
2237 /*********************************************************************
2238  *
2239  *  Verify that the hardware indicated that the checksum is valid.
2240  *  Inform the stack about the status of checksum so that stack
2241  *  doesn't spend time verifying the checksum.
2242  *
2243  *********************************************************************/
2244 static int
2245 igc_rx_checksum(struct igc_queue *q, uint64_t capenable, uint32_t staterr,
2246     uint32_t ptype)
2247 {
2248 	const uint16_t status = (uint16_t)staterr;
2249 	const uint8_t errors = (uint8_t)(staterr >> 24);
2250 	int flags = 0;
2251 
2252 	if ((status & IGC_RXD_STAT_IPCS) != 0 &&
2253 	    (capenable & IFCAP_CSUM_IPv4_Rx) != 0) {
2254 		IGC_DRIVER_EVENT(q, rx_ipcs, 1);
2255 		flags |= M_CSUM_IPv4;
2256 		if (__predict_false((errors & IGC_RXD_ERR_IPE) != 0)) {
2257 			IGC_DRIVER_EVENT(q, rx_ipcs_bad, 1);
2258 			flags |= M_CSUM_IPv4_BAD;
2259 		}
2260 	}
2261 
2262 	if ((status & IGC_RXD_STAT_TCPCS) != 0) {
2263 		IGC_DRIVER_EVENT(q, rx_tcpcs, 1);
2264 		if ((capenable & IFCAP_CSUM_TCPv4_Rx) != 0)
2265 			flags |= M_CSUM_TCPv4;
2266 		if ((capenable & IFCAP_CSUM_TCPv6_Rx) != 0)
2267 			flags |= M_CSUM_TCPv6;
2268 	}
2269 
2270 	if ((status & IGC_RXD_STAT_UDPCS) != 0) {
2271 		IGC_DRIVER_EVENT(q, rx_udpcs, 1);
2272 		if ((capenable & IFCAP_CSUM_UDPv4_Rx) != 0)
2273 			flags |= M_CSUM_UDPv4;
2274 		if ((capenable & IFCAP_CSUM_UDPv6_Rx) != 0)
2275 			flags |= M_CSUM_UDPv6;
2276 	}
2277 
2278 	if (__predict_false((errors & IGC_RXD_ERR_TCPE) != 0)) {
2279 		IGC_DRIVER_EVENT(q, rx_l4cs_bad, 1);
2280 		if ((flags & ~M_CSUM_IPv4) != 0)
2281 			flags |= M_CSUM_TCP_UDP_BAD;
2282 	}
2283 
2284 	return flags;
2285 }
2286 
2287 static void
2288 igc_watchdog(struct ifnet * ifp)
2289 {
2290 }
2291 
2292 static void
2293 igc_tick(void *arg)
2294 {
2295 	struct igc_softc *sc = arg;
2296 
2297 	mutex_enter(&sc->sc_core_lock);
2298 
2299 	if (__predict_false(sc->sc_core_stopping)) {
2300 		mutex_exit(&sc->sc_core_lock);
2301 		return;
2302 	}
2303 
2304 	/* XXX watchdog */
2305 	if (0) {
2306 		IGC_GLOBAL_EVENT(sc, watchdog, 1);
2307 	}
2308 
2309 	igc_update_counters(sc);
2310 
2311 	mutex_exit(&sc->sc_core_lock);
2312 
2313 	callout_schedule(&sc->sc_tick_ch, hz);
2314 }
2315 
2316 /*********************************************************************
2317  *
2318  *  Media Ioctl callback
2319  *
2320  *  This routine is called whenever the user queries the status of
2321  *  the interface using ifconfig.
2322  *
2323  **********************************************************************/
2324 static void
2325 igc_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
2326 {
2327 	struct igc_softc *sc = ifp->if_softc;
2328 	struct igc_hw *hw = &sc->hw;
2329 
2330 	igc_update_link_status(sc);
2331 
2332 	ifmr->ifm_status = IFM_AVALID;
2333 	ifmr->ifm_active = IFM_ETHER;
2334 
2335 	if (!sc->link_active) {
2336 		ifmr->ifm_active |= IFM_NONE;
2337 		return;
2338 	}
2339 
2340 	ifmr->ifm_status |= IFM_ACTIVE;
2341 
2342 	switch (sc->link_speed) {
2343 	case 10:
2344 		ifmr->ifm_active |= IFM_10_T;
2345 		break;
2346 	case 100:
2347 		ifmr->ifm_active |= IFM_100_TX;
2348 		break;
2349 	case 1000:
2350 		ifmr->ifm_active |= IFM_1000_T;
2351 		break;
2352 	case 2500:
2353 		ifmr->ifm_active |= IFM_2500_T;
2354 		break;
2355 	}
2356 
2357 	if (sc->link_duplex == FULL_DUPLEX)
2358 		ifmr->ifm_active |= IFM_FDX;
2359 	else
2360 		ifmr->ifm_active |= IFM_HDX;
2361 
2362 	switch (hw->fc.current_mode) {
2363 	case igc_fc_tx_pause:
2364 		ifmr->ifm_active |= IFM_FLOW | IFM_ETH_TXPAUSE;
2365 		break;
2366 	case igc_fc_rx_pause:
2367 		ifmr->ifm_active |= IFM_FLOW | IFM_ETH_RXPAUSE;
2368 		break;
2369 	case igc_fc_full:
2370 		ifmr->ifm_active |= IFM_FLOW |
2371 		    IFM_ETH_TXPAUSE | IFM_ETH_RXPAUSE;
2372 		break;
2373 	case igc_fc_none:
2374 	default:
2375 		break;
2376 	}
2377 }
2378 
2379 /*********************************************************************
2380  *
2381  *  Media Ioctl callback
2382  *
2383  *  This routine is called when the user changes speed/duplex using
2384  *  media/mediopt option with ifconfig.
2385  *
2386  **********************************************************************/
2387 static int
2388 igc_media_change(struct ifnet *ifp)
2389 {
2390 	struct igc_softc *sc = ifp->if_softc;
2391 	struct ifmedia *ifm = &sc->media;
2392 
2393 	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
2394 		return EINVAL;
2395 
2396 	sc->hw.mac.autoneg = DO_AUTO_NEG;
2397 
2398 	switch (IFM_SUBTYPE(ifm->ifm_media)) {
2399 	case IFM_AUTO:
2400 		sc->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
2401 		break;
2402 	case IFM_2500_T:
2403 		sc->hw.phy.autoneg_advertised = ADVERTISE_2500_FULL;
2404 		break;
2405 	case IFM_1000_T:
2406 		sc->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
2407 		break;
2408 	case IFM_100_TX:
2409 		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
2410 			sc->hw.phy.autoneg_advertised = ADVERTISE_100_FULL;
2411 		else
2412 			sc->hw.phy.autoneg_advertised = ADVERTISE_100_HALF;
2413 		break;
2414 	case IFM_10_T:
2415 		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
2416 			sc->hw.phy.autoneg_advertised = ADVERTISE_10_FULL;
2417 		else
2418 			sc->hw.phy.autoneg_advertised = ADVERTISE_10_HALF;
2419 		break;
2420 	default:
2421 		return EINVAL;
2422 	}
2423 
2424 	igc_init_locked(sc);
2425 
2426 	return 0;
2427 }
2428 
2429 static int
2430 igc_ifflags_cb(struct ethercom *ec)
2431 {
2432 	struct ifnet *ifp = &ec->ec_if;
2433 	struct igc_softc *sc = ifp->if_softc;
2434 	int rc = 0;
2435 	u_short iffchange;
2436 	bool needreset = false;
2437 
2438 	DPRINTF(CFG, "called\n");
2439 
2440 	KASSERT(IFNET_LOCKED(ifp));
2441 
2442 	mutex_enter(&sc->sc_core_lock);
2443 
2444 	/*
2445 	 * Check for if_flags.
2446 	 * Main usage is to prevent linkdown when opening bpf.
2447 	 */
2448 	iffchange = ifp->if_flags ^ sc->sc_if_flags;
2449 	sc->sc_if_flags = ifp->if_flags;
2450 	if ((iffchange & ~(IFF_CANTCHANGE | IFF_DEBUG)) != 0) {
2451 		needreset = true;
2452 		goto ec;
2453 	}
2454 
2455 	/* iff related updates */
2456 	if ((iffchange & IFF_PROMISC) != 0)
2457 		igc_set_filter(sc);
2458 
2459 #ifdef notyet
2460 	igc_set_vlan(sc);
2461 #endif
2462 
2463 ec:
2464 #ifdef notyet
2465 	/* Check for ec_capenable. */
2466 	ecchange = ec->ec_capenable ^ sc->sc_ec_capenable;
2467 	sc->sc_ec_capenable = ec->ec_capenable;
2468 	if ((ecchange & ~ETHERCAP_SOMETHING) != 0) {
2469 		needreset = true;
2470 		goto out;
2471 	}
2472 #endif
2473 	if (needreset)
2474 		rc = ENETRESET;
2475 
2476 	mutex_exit(&sc->sc_core_lock);
2477 
2478 	return rc;
2479 }
2480 
2481 static void
2482 igc_set_filter(struct igc_softc *sc)
2483 {
2484 	struct ethercom *ec = &sc->sc_ec;
2485 	uint32_t rctl;
2486 
2487 	rctl = IGC_READ_REG(&sc->hw, IGC_RCTL);
2488 	rctl &= ~(IGC_RCTL_BAM |IGC_RCTL_UPE | IGC_RCTL_MPE);
2489 
2490 	if ((sc->sc_if_flags & IFF_BROADCAST) != 0)
2491 		rctl |= IGC_RCTL_BAM;
2492 	if ((sc->sc_if_flags & IFF_PROMISC) != 0) {
2493 		DPRINTF(CFG, "promisc\n");
2494 		rctl |= IGC_RCTL_UPE;
2495 		ETHER_LOCK(ec);
2496  allmulti:
2497 		ec->ec_flags |= ETHER_F_ALLMULTI;
2498 		ETHER_UNLOCK(ec);
2499 		rctl |= IGC_RCTL_MPE;
2500 	} else {
2501 		struct ether_multistep step;
2502 		struct ether_multi *enm;
2503 		int mcnt = 0;
2504 
2505 		memset(sc->mta, 0, IGC_MTA_LEN);
2506 
2507 		ETHER_LOCK(ec);
2508 		ETHER_FIRST_MULTI(step, ec, enm);
2509 		while (enm != NULL) {
2510 			if (((memcmp(enm->enm_addrlo, enm->enm_addrhi,
2511 					ETHER_ADDR_LEN)) != 0) ||
2512 			    (mcnt >= MAX_NUM_MULTICAST_ADDRESSES)) {
2513 				/*
2514 				 * We must listen to a range of multicast
2515 				 * addresses. For now, just accept all
2516 				 * multicasts, rather than trying to set only
2517 				 * those filter bits needed to match the range.
2518 				 * (At this time, the only use of address
2519 				 * ranges is for IP multicast routing, for
2520 				 * which the range is big enough to require all
2521 				 * bits set.)
2522 				 */
2523 				goto allmulti;
2524 			}
2525 			DPRINTF(CFG, "%d: %s\n", mcnt,
2526 			    ether_sprintf(enm->enm_addrlo));
2527 			memcpy(&sc->mta[mcnt * ETHER_ADDR_LEN],
2528 			    enm->enm_addrlo, ETHER_ADDR_LEN);
2529 
2530 			mcnt++;
2531 			ETHER_NEXT_MULTI(step, enm);
2532 		}
2533 		ec->ec_flags &= ~ETHER_F_ALLMULTI;
2534 		ETHER_UNLOCK(ec);
2535 
2536 		DPRINTF(CFG, "hw filter\n");
2537 		igc_update_mc_addr_list(&sc->hw, sc->mta, mcnt);
2538 	}
2539 
2540 	IGC_WRITE_REG(&sc->hw, IGC_RCTL, rctl);
2541 }
2542 
2543 static void
2544 igc_update_link_status(struct igc_softc *sc)
2545 {
2546 	struct ifnet *ifp = &sc->sc_ec.ec_if;
2547 	struct igc_hw *hw = &sc->hw;
2548 
2549 	if (hw->mac.get_link_status == true)
2550 		igc_check_for_link(hw);
2551 
2552 	if (IGC_READ_REG(&sc->hw, IGC_STATUS) & IGC_STATUS_LU) {
2553 		if (sc->link_active == 0) {
2554 			igc_get_speed_and_duplex(hw, &sc->link_speed,
2555 			    &sc->link_duplex);
2556 			sc->link_active = 1;
2557 			ifp->if_baudrate = IF_Mbps(sc->link_speed);
2558 			if_link_state_change(ifp, LINK_STATE_UP);
2559 		}
2560 	} else {
2561 		if (sc->link_active == 1) {
2562 			ifp->if_baudrate = sc->link_speed = 0;
2563 			sc->link_duplex = 0;
2564 			sc->link_active = 0;
2565 			if_link_state_change(ifp, LINK_STATE_DOWN);
2566 		}
2567 	}
2568 }
2569 
2570 /*********************************************************************
2571  *
2572  *  Get a buffer from system mbuf buffer pool.
2573  *
2574  **********************************************************************/
2575 static int
2576 igc_get_buf(struct rx_ring *rxr, int id, bool strict)
2577 {
2578 	struct igc_softc *sc = rxr->sc;
2579 	struct igc_queue *q = rxr->rxr_igcq;
2580 	struct igc_rx_buf *rxbuf = &rxr->rx_buffers[id];
2581 	bus_dmamap_t map = rxbuf->map;
2582 	struct mbuf *m;
2583 	int error;
2584 
2585 	if (__predict_false(rxbuf->buf)) {
2586 		if (strict) {
2587 			DPRINTF(RX, "slot %d already has an mbuf\n", id);
2588 			return EINVAL;
2589 		}
2590 		return 0;
2591 	}
2592 
2593 	MGETHDR(m, M_DONTWAIT, MT_DATA);
2594 	if (__predict_false(m == NULL)) {
2595  enobuf:
2596 		IGC_QUEUE_EVENT(q, rx_no_mbuf, 1);
2597 		return ENOBUFS;
2598 	}
2599 	MCLAIM(m, &sc->sc_ec.ec_rx_mowner);
2600 
2601 	MCLGET(m, M_DONTWAIT);
2602 	if (__predict_false(!(m->m_flags & M_EXT))) {
2603 		m_freem(m);
2604 		goto enobuf;
2605 	}
2606 
2607 	m->m_len = m->m_pkthdr.len = sc->rx_mbuf_sz;
2608 
2609 	error = bus_dmamap_load_mbuf(rxr->rxdma.dma_tag, map, m,
2610 	    BUS_DMA_READ | BUS_DMA_NOWAIT);
2611 	if (error) {
2612 		m_freem(m);
2613 		return error;
2614 	}
2615 
2616 	bus_dmamap_sync(rxr->rxdma.dma_tag, map, 0,
2617 	    map->dm_mapsize, BUS_DMASYNC_PREREAD);
2618 	rxbuf->buf = m;
2619 
2620 	union igc_adv_rx_desc *rxdesc = &rxr->rx_base[id];
2621 	igc_rxdesc_sync(rxr, id, BUS_DMASYNC_POSTWRITE | BUS_DMASYNC_POSTREAD);
2622 	rxdesc->read.pkt_addr = htole64(map->dm_segs[0].ds_addr);
2623 	igc_rxdesc_sync(rxr, id, BUS_DMASYNC_PREWRITE | BUS_DMASYNC_PREREAD);
2624 
2625 	return 0;
2626 }
2627 
2628 static void
2629 igc_configure_queues(struct igc_softc *sc)
2630 {
2631 	struct igc_hw *hw = &sc->hw;
2632 	uint32_t ivar;
2633 
2634 	/* First turn on RSS capability */
2635 	IGC_WRITE_REG(hw, IGC_GPIE, IGC_GPIE_MSIX_MODE | IGC_GPIE_EIAME |
2636 	    IGC_GPIE_PBA | IGC_GPIE_NSICR);
2637 
2638 	/* Set the starting interrupt rate */
2639 	uint32_t newitr = (4000000 / MAX_INTS_PER_SEC) & 0x7FFC;
2640 	newitr |= IGC_EITR_CNT_IGNR;
2641 
2642 	/* Turn on MSI-X */
2643 	uint32_t newmask = 0;
2644 	for (int iq = 0; iq < sc->sc_nqueues; iq++) {
2645 		struct igc_queue *q = &sc->queues[iq];
2646 
2647 		/* RX entries */
2648 		igc_set_queues(sc, iq, q->msix, 0);
2649 		/* TX entries */
2650 		igc_set_queues(sc, iq, q->msix, 1);
2651 		newmask |= q->eims;
2652 		IGC_WRITE_REG(hw, IGC_EITR(q->msix), newitr);
2653 	}
2654 	sc->msix_queuesmask = newmask;
2655 
2656 #if 1
2657 	ivar = IGC_READ_REG_ARRAY(hw, IGC_IVAR0, 0);
2658 	DPRINTF(CFG, "ivar(0)=0x%x\n", ivar);
2659 	ivar = IGC_READ_REG_ARRAY(hw, IGC_IVAR0, 1);
2660 	DPRINTF(CFG, "ivar(1)=0x%x\n", ivar);
2661 #endif
2662 
2663 	/* And for the link interrupt */
2664 	ivar = (sc->linkvec | IGC_IVAR_VALID) << 8;
2665 	sc->msix_linkmask = 1 << sc->linkvec;
2666 	IGC_WRITE_REG(hw, IGC_IVAR_MISC, ivar);
2667 }
2668 
2669 static void
2670 igc_set_queues(struct igc_softc *sc, uint32_t entry, uint32_t vector, int type)
2671 {
2672 	struct igc_hw *hw = &sc->hw;
2673 	const uint32_t index = entry >> 1;
2674 	uint32_t ivar = IGC_READ_REG_ARRAY(hw, IGC_IVAR0, index);
2675 
2676 	if (type) {
2677 		if (entry & 1) {
2678 			ivar &= 0x00FFFFFF;
2679 			ivar |= (vector | IGC_IVAR_VALID) << 24;
2680 		} else {
2681 			ivar &= 0xFFFF00FF;
2682 			ivar |= (vector | IGC_IVAR_VALID) << 8;
2683 		}
2684 	} else {
2685 		if (entry & 1) {
2686 			ivar &= 0xFF00FFFF;
2687 			ivar |= (vector | IGC_IVAR_VALID) << 16;
2688 		} else {
2689 			ivar &= 0xFFFFFF00;
2690 			ivar |= vector | IGC_IVAR_VALID;
2691 		}
2692 	}
2693 	IGC_WRITE_REG_ARRAY(hw, IGC_IVAR0, index, ivar);
2694 }
2695 
2696 static void
2697 igc_enable_queue(struct igc_softc *sc, uint32_t eims)
2698 {
2699 	IGC_WRITE_REG(&sc->hw, IGC_EIMS, eims);
2700 }
2701 
2702 static void
2703 igc_enable_intr(struct igc_softc *sc)
2704 {
2705 	struct igc_hw *hw = &sc->hw;
2706 
2707 	if (sc->sc_intr_type == PCI_INTR_TYPE_MSIX) {
2708 		const uint32_t mask = sc->msix_queuesmask | sc->msix_linkmask;
2709 
2710 		IGC_WRITE_REG(hw, IGC_EIAC, mask);
2711 		IGC_WRITE_REG(hw, IGC_EIAM, mask);
2712 		IGC_WRITE_REG(hw, IGC_EIMS, mask);
2713 		IGC_WRITE_REG(hw, IGC_IMS, IGC_IMS_LSC);
2714 	} else {
2715 		IGC_WRITE_REG(hw, IGC_IMS, IMS_ENABLE_MASK);
2716 	}
2717 	IGC_WRITE_FLUSH(hw);
2718 }
2719 
2720 static void
2721 igc_disable_intr(struct igc_softc *sc)
2722 {
2723 	struct igc_hw *hw = &sc->hw;
2724 
2725 	if (sc->sc_intr_type == PCI_INTR_TYPE_MSIX) {
2726 		IGC_WRITE_REG(hw, IGC_EIMC, 0xffffffff);
2727 		IGC_WRITE_REG(hw, IGC_EIAC, 0);
2728 	}
2729 	IGC_WRITE_REG(hw, IGC_IMC, 0xffffffff);
2730 	IGC_WRITE_FLUSH(hw);
2731 }
2732 
2733 static int
2734 igc_intr_link(void *arg)
2735 {
2736 	struct igc_softc *sc = (struct igc_softc *)arg;
2737 	const uint32_t reg_icr = IGC_READ_REG(&sc->hw, IGC_ICR);
2738 
2739 	IGC_GLOBAL_EVENT(sc, link, 1);
2740 
2741 	if (reg_icr & IGC_ICR_LSC) {
2742 		mutex_enter(&sc->sc_core_lock);
2743 		sc->hw.mac.get_link_status = true;
2744 		igc_update_link_status(sc);
2745 		mutex_exit(&sc->sc_core_lock);
2746 	}
2747 
2748 	IGC_WRITE_REG(&sc->hw, IGC_IMS, IGC_IMS_LSC);
2749 	IGC_WRITE_REG(&sc->hw, IGC_EIMS, sc->msix_linkmask);
2750 
2751 	return 1;
2752 }
2753 
2754 static int
2755 igc_intr_queue(void *arg)
2756 {
2757 	struct igc_queue *iq = arg;
2758 	struct igc_softc *sc = iq->sc;
2759 	struct ifnet *ifp = &sc->sc_ec.ec_if;
2760 	struct rx_ring *rxr = iq->rxr;
2761 	struct tx_ring *txr = iq->txr;
2762 	const u_int txlimit = sc->sc_tx_intr_process_limit,
2763 		    rxlimit = sc->sc_rx_intr_process_limit;
2764 	bool txmore, rxmore;
2765 
2766 	IGC_QUEUE_EVENT(iq, irqs, 1);
2767 
2768 	if (__predict_false(!ISSET(ifp->if_flags, IFF_RUNNING)))
2769 		return 0;
2770 
2771 	mutex_enter(&txr->txr_lock);
2772 	txmore = igc_txeof(txr, txlimit);
2773 	mutex_exit(&txr->txr_lock);
2774 	mutex_enter(&rxr->rxr_lock);
2775 	rxmore = igc_rxeof(rxr, rxlimit);
2776 	mutex_exit(&rxr->rxr_lock);
2777 
2778 	if (txmore || rxmore) {
2779 		IGC_QUEUE_EVENT(iq, req, 1);
2780 		igc_sched_handle_queue(sc, iq);
2781 	} else {
2782 		igc_enable_queue(sc, iq->eims);
2783 	}
2784 
2785 	return 1;
2786 }
2787 
2788 static int
2789 igc_intr(void *arg)
2790 {
2791 	struct igc_softc *sc = arg;
2792 	struct ifnet *ifp = &sc->sc_ec.ec_if;
2793 	struct igc_queue *iq = &sc->queues[0];
2794 	struct rx_ring *rxr = iq->rxr;
2795 	struct tx_ring *txr = iq->txr;
2796 	const u_int txlimit = sc->sc_tx_intr_process_limit,
2797 		    rxlimit = sc->sc_rx_intr_process_limit;
2798 	bool txmore, rxmore;
2799 
2800 	if (__predict_false(!ISSET(ifp->if_flags, IFF_RUNNING)))
2801 		return 0;
2802 
2803 	const uint32_t reg_icr = IGC_READ_REG(&sc->hw, IGC_ICR);
2804 	DPRINTF(MISC, "reg_icr=0x%x\n", reg_icr);
2805 
2806 	/* Definitely not our interrupt. */
2807 	if (reg_icr == 0x0) {
2808 		DPRINTF(MISC, "not for me\n");
2809 		return 0;
2810 	}
2811 
2812 	IGC_QUEUE_EVENT(iq, irqs, 1);
2813 
2814 	/* Hot eject? */
2815 	if (__predict_false(reg_icr == 0xffffffff)) {
2816 		DPRINTF(MISC, "hot eject\n");
2817 		return 0;
2818 	}
2819 
2820 	if (__predict_false(!(reg_icr & IGC_ICR_INT_ASSERTED))) {
2821 		DPRINTF(MISC, "not set IGC_ICR_INT_ASSERTED");
2822 		return 0;
2823 	}
2824 
2825 	/*
2826 	 * Only MSI-X interrupts have one-shot behavior by taking advantage
2827 	 * of the EIAC register.  Thus, explicitly disable interrupts.  This
2828 	 * also works around the MSI message reordering errata on certain
2829 	 * systems.
2830 	 */
2831 	igc_disable_intr(sc);
2832 
2833 	mutex_enter(&txr->txr_lock);
2834 	txmore = igc_txeof(txr, txlimit);
2835 	mutex_exit(&txr->txr_lock);
2836 	mutex_enter(&rxr->rxr_lock);
2837 	rxmore = igc_rxeof(rxr, rxlimit);
2838 	mutex_exit(&rxr->rxr_lock);
2839 
2840 	/* Link status change */
2841 	// XXXX FreeBSD checks IGC_ICR_RXSEQ
2842 	if (__predict_false(reg_icr & IGC_ICR_LSC)) {
2843 		IGC_GLOBAL_EVENT(sc, link, 1);
2844 		mutex_enter(&sc->sc_core_lock);
2845 		sc->hw.mac.get_link_status = true;
2846 		igc_update_link_status(sc);
2847 		mutex_exit(&sc->sc_core_lock);
2848 	}
2849 
2850 	if (txmore || rxmore) {
2851 		IGC_QUEUE_EVENT(iq, req, 1);
2852 		igc_sched_handle_queue(sc, iq);
2853 	} else {
2854 		igc_enable_intr(sc);
2855 	}
2856 
2857 	return 1;
2858 }
2859 
2860 static void
2861 igc_handle_queue(void *arg)
2862 {
2863 	struct igc_queue *iq = arg;
2864 	struct igc_softc *sc = iq->sc;
2865 	struct tx_ring *txr = iq->txr;
2866 	struct rx_ring *rxr = iq->rxr;
2867 	const u_int txlimit = sc->sc_tx_process_limit,
2868 		    rxlimit = sc->sc_rx_process_limit;
2869 	bool txmore, rxmore;
2870 
2871 	IGC_QUEUE_EVENT(iq, handleq, 1);
2872 
2873 	mutex_enter(&txr->txr_lock);
2874 	txmore = igc_txeof(txr, txlimit);
2875 	/* for ALTQ, dequeue from if_snd */
2876 	if (txr->me == 0) {
2877 		struct ifnet *ifp = &sc->sc_ec.ec_if;
2878 
2879 		igc_tx_common_locked(ifp, txr, IGC_TX_START);
2880 	}
2881 	mutex_exit(&txr->txr_lock);
2882 
2883 	mutex_enter(&rxr->rxr_lock);
2884 	rxmore = igc_rxeof(rxr, rxlimit);
2885 	mutex_exit(&rxr->rxr_lock);
2886 
2887 	if (txmore || rxmore) {
2888 		igc_sched_handle_queue(sc, iq);
2889 	} else {
2890 		if (sc->sc_intr_type == PCI_INTR_TYPE_MSIX)
2891 			igc_enable_queue(sc, iq->eims);
2892 		else
2893 			igc_enable_intr(sc);
2894 	}
2895 }
2896 
2897 static void
2898 igc_handle_queue_work(struct work *wk, void *context)
2899 {
2900 	struct igc_queue *iq =
2901 	    container_of(wk, struct igc_queue, igcq_wq_cookie);
2902 
2903 	igc_handle_queue(iq);
2904 }
2905 
2906 static void
2907 igc_sched_handle_queue(struct igc_softc *sc, struct igc_queue *iq)
2908 {
2909 
2910 	if (iq->igcq_workqueue) {
2911 		/* XXXRO notyet */
2912 		workqueue_enqueue(sc->sc_queue_wq, &iq->igcq_wq_cookie,
2913 		    curcpu());
2914 	} else {
2915 		softint_schedule(iq->igcq_si);
2916 	}
2917 }
2918 
2919 static void
2920 igc_barrier_handle_queue(struct igc_softc *sc)
2921 {
2922 
2923 	if (sc->sc_txrx_workqueue) {
2924 		for (int iq = 0; iq < sc->sc_nqueues; iq++) {
2925 			struct igc_queue *q = &sc->queues[iq];
2926 
2927 			workqueue_wait(sc->sc_queue_wq, &q->igcq_wq_cookie);
2928 		}
2929 	} else {
2930 		xc_barrier(0);
2931 	}
2932 }
2933 
2934 /*********************************************************************
2935  *
2936  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
2937  *  the information needed to transmit a packet on the wire.
2938  *
2939  **********************************************************************/
2940 static int
2941 igc_allocate_transmit_buffers(struct tx_ring *txr)
2942 {
2943 	struct igc_softc *sc = txr->sc;
2944 	int error;
2945 
2946 	txr->tx_buffers =
2947 	    kmem_zalloc(sc->num_tx_desc * sizeof(struct igc_tx_buf), KM_SLEEP);
2948 	txr->txtag = txr->txdma.dma_tag;
2949 
2950 	/* Create the descriptor buffer dma maps. */
2951 	for (int id = 0; id < sc->num_tx_desc; id++) {
2952 		struct igc_tx_buf *txbuf = &txr->tx_buffers[id];
2953 
2954 		error = bus_dmamap_create(txr->txdma.dma_tag,
2955 		    round_page(IGC_TSO_SIZE + sizeof(struct ether_vlan_header)),
2956 		    IGC_MAX_SCATTER, PAGE_SIZE, 0, BUS_DMA_NOWAIT, &txbuf->map);
2957 		if (error != 0) {
2958 			aprint_error_dev(sc->sc_dev,
2959 			    "unable to create TX DMA map\n");
2960 			goto fail;
2961 		}
2962 
2963 		txbuf->eop_index = -1;
2964 	}
2965 
2966 	return 0;
2967  fail:
2968 	return error;
2969 }
2970 
2971 
2972 /*********************************************************************
2973  *
2974  *  Allocate and initialize transmit structures.
2975  *
2976  **********************************************************************/
2977 static int
2978 igc_setup_transmit_structures(struct igc_softc *sc)
2979 {
2980 
2981 	for (int iq = 0; iq < sc->sc_nqueues; iq++) {
2982 		struct tx_ring *txr = &sc->tx_rings[iq];
2983 
2984 		if (igc_setup_transmit_ring(txr))
2985 			goto fail;
2986 	}
2987 
2988 	return 0;
2989  fail:
2990 	igc_free_transmit_structures(sc);
2991 	return ENOBUFS;
2992 }
2993 
2994 /*********************************************************************
2995  *
2996  *  Initialize a transmit ring.
2997  *
2998  **********************************************************************/
2999 static int
3000 igc_setup_transmit_ring(struct tx_ring *txr)
3001 {
3002 	struct igc_softc *sc = txr->sc;
3003 
3004 	/* Now allocate transmit buffers for the ring. */
3005 	if (igc_allocate_transmit_buffers(txr))
3006 		return ENOMEM;
3007 
3008 	/* Clear the old ring contents */
3009 	memset(txr->tx_base, 0,
3010 	    sizeof(union igc_adv_tx_desc) * sc->num_tx_desc);
3011 
3012 	/* Reset indices. */
3013 	txr->next_avail_desc = 0;
3014 	txr->next_to_clean = 0;
3015 
3016 	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, 0,
3017 	    txr->txdma.dma_map->dm_mapsize,
3018 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3019 
3020 	txr->txr_interq = pcq_create(sc->num_tx_desc, KM_SLEEP);
3021 
3022 	mutex_init(&txr->txr_lock, MUTEX_DEFAULT, IPL_NET);
3023 
3024 	return 0;
3025 }
3026 
3027 /*********************************************************************
3028  *
3029  *  Enable transmit unit.
3030  *
3031  **********************************************************************/
3032 static void
3033 igc_initialize_transmit_unit(struct igc_softc *sc)
3034 {
3035 	struct ifnet *ifp = &sc->sc_ec.ec_if;
3036 	struct igc_hw *hw = &sc->hw;
3037 
3038 	/* Setup the Base and Length of the TX descriptor ring. */
3039 	for (int iq = 0; iq < sc->sc_nqueues; iq++) {
3040 		struct tx_ring *txr = &sc->tx_rings[iq];
3041 		const uint64_t bus_addr =
3042 		    txr->txdma.dma_map->dm_segs[0].ds_addr;
3043 
3044 		/* Base and len of TX ring */
3045 		IGC_WRITE_REG(hw, IGC_TDLEN(iq),
3046 		    sc->num_tx_desc * sizeof(union igc_adv_tx_desc));
3047 		IGC_WRITE_REG(hw, IGC_TDBAH(iq), (uint32_t)(bus_addr >> 32));
3048 		IGC_WRITE_REG(hw, IGC_TDBAL(iq), (uint32_t)bus_addr);
3049 
3050 		/* Init the HEAD/TAIL indices */
3051 		IGC_WRITE_REG(hw, IGC_TDT(iq), 0 /* XXX txr->next_avail_desc */);
3052 		IGC_WRITE_REG(hw, IGC_TDH(iq), 0);
3053 
3054 		txr->watchdog_timer = 0;
3055 
3056 		uint32_t txdctl = 0;	/* Clear txdctl */
3057 		txdctl |= 0x1f;		/* PTHRESH */
3058 		txdctl |= 1 << 8;	/* HTHRESH */
3059 		txdctl |= 1 << 16;	/* WTHRESH */
3060 		txdctl |= 1 << 22;	/* Reserved bit 22 must always be 1 */
3061 		txdctl |= IGC_TXDCTL_GRAN;
3062 		txdctl |= 1 << 25;	/* LWTHRESH */
3063 
3064 		IGC_WRITE_REG(hw, IGC_TXDCTL(iq), txdctl);
3065 	}
3066 	ifp->if_timer = 0;
3067 
3068 	/* Program the Transmit Control Register */
3069 	uint32_t tctl = IGC_READ_REG(&sc->hw, IGC_TCTL);
3070 	tctl &= ~IGC_TCTL_CT;
3071 	tctl |= (IGC_TCTL_PSP | IGC_TCTL_RTLC | IGC_TCTL_EN |
3072 	    (IGC_COLLISION_THRESHOLD << IGC_CT_SHIFT));
3073 
3074 	/* This write will effectively turn on the transmit unit. */
3075 	IGC_WRITE_REG(&sc->hw, IGC_TCTL, tctl);
3076 }
3077 
3078 /*********************************************************************
3079  *
3080  *  Free all transmit rings.
3081  *
3082  **********************************************************************/
3083 static void
3084 igc_free_transmit_structures(struct igc_softc *sc)
3085 {
3086 
3087 	for (int iq = 0; iq < sc->sc_nqueues; iq++) {
3088 		struct tx_ring *txr = &sc->tx_rings[iq];
3089 
3090 		igc_free_transmit_buffers(txr);
3091 	}
3092 }
3093 
3094 /*********************************************************************
3095  *
3096  *  Free transmit ring related data structures.
3097  *
3098  **********************************************************************/
3099 static void
3100 igc_free_transmit_buffers(struct tx_ring *txr)
3101 {
3102 	struct igc_softc *sc = txr->sc;
3103 
3104 	if (txr->tx_buffers == NULL)
3105 		return;
3106 
3107 	igc_withdraw_transmit_packets(txr, true);
3108 
3109 	kmem_free(txr->tx_buffers,
3110 	    sc->num_tx_desc * sizeof(struct igc_tx_buf));
3111 	txr->tx_buffers = NULL;
3112 	txr->txtag = NULL;
3113 
3114 	pcq_destroy(txr->txr_interq);
3115 	mutex_destroy(&txr->txr_lock);
3116 }
3117 
3118 /*********************************************************************
3119  *
3120  *  Withdraw transmit packets.
3121  *
3122  **********************************************************************/
3123 static void
3124 igc_withdraw_transmit_packets(struct tx_ring *txr, bool destroy)
3125 {
3126 	struct igc_softc *sc = txr->sc;
3127 	struct igc_queue *q = txr->txr_igcq;
3128 
3129 	mutex_enter(&txr->txr_lock);
3130 
3131 	for (int id = 0; id < sc->num_tx_desc; id++) {
3132 		union igc_adv_tx_desc *txdesc = &txr->tx_base[id];
3133 
3134 		igc_txdesc_sync(txr, id,
3135 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3136 		txdesc->read.buffer_addr = 0;
3137 		txdesc->read.cmd_type_len = 0;
3138 		txdesc->read.olinfo_status = 0;
3139 		igc_txdesc_sync(txr, id,
3140 		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3141 
3142 		struct igc_tx_buf *txbuf = &txr->tx_buffers[id];
3143 		bus_dmamap_t map = txbuf->map;
3144 
3145 		if (map != NULL && map->dm_nsegs > 0) {
3146 			bus_dmamap_sync(txr->txdma.dma_tag, map,
3147 			    0, map->dm_mapsize, BUS_DMASYNC_POSTWRITE);
3148 			bus_dmamap_unload(txr->txdma.dma_tag, map);
3149 		}
3150 		m_freem(txbuf->m_head);
3151 		txbuf->m_head = NULL;
3152 		if (map != NULL && destroy) {
3153 			bus_dmamap_destroy(txr->txdma.dma_tag, map);
3154 			txbuf->map = NULL;
3155 		}
3156 		txbuf->eop_index = -1;
3157 
3158 		txr->next_avail_desc = 0;
3159 		txr->next_to_clean = 0;
3160 	}
3161 
3162 	struct mbuf *m;
3163 	while ((m = pcq_get(txr->txr_interq)) != NULL) {
3164 		IGC_QUEUE_EVENT(q, tx_pcq_drop, 1);
3165 		m_freem(m);
3166 	}
3167 
3168 	mutex_exit(&txr->txr_lock);
3169 }
3170 
3171 
3172 /*********************************************************************
3173  *
3174  *  Advanced Context Descriptor setup for VLAN, CSUM or TSO
3175  *
3176  **********************************************************************/
3177 
3178 static int
3179 igc_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp, int prod,
3180     uint32_t *cmd_type_len, uint32_t *olinfo_status)
3181 {
3182 	struct ether_vlan_header *evl;
3183 	uint32_t type_tucmd_mlhl = 0;
3184 	uint32_t vlan_macip_lens = 0;
3185 	uint32_t ehlen, iphlen;
3186 	uint16_t ehtype;
3187 	int off = 0;
3188 
3189 	const int csum_flags = mp->m_pkthdr.csum_flags;
3190 
3191 	/* First check if TSO is to be used */
3192 	if ((csum_flags & (M_CSUM_TSOv4 | M_CSUM_TSOv6)) != 0) {
3193 		return igc_tso_setup(txr, mp, prod, cmd_type_len,
3194 		    olinfo_status);
3195 	}
3196 
3197 	const bool v4 = (csum_flags &
3198 	    (M_CSUM_IPv4 | M_CSUM_TCPv4 | M_CSUM_UDPv4)) != 0;
3199 	const bool v6 = (csum_flags & (M_CSUM_UDPv6 | M_CSUM_TCPv6)) != 0;
3200 
3201 	/* Indicate the whole packet as payload when not doing TSO */
3202 	*olinfo_status |= mp->m_pkthdr.len << IGC_ADVTXD_PAYLEN_SHIFT;
3203 
3204 	/*
3205 	 * In advanced descriptors the vlan tag must
3206 	 * be placed into the context descriptor. Hence
3207 	 * we need to make one even if not doing offloads.
3208 	 */
3209 #if NVLAN > 0
3210 	if (vlan_has_tag(mp)) {
3211 		vlan_macip_lens |= (uint32_t)vlan_get_tag(mp)
3212 		    << IGC_ADVTXD_VLAN_SHIFT;
3213 		off = 1;
3214 	} else
3215 #endif
3216 	if (!v4 && !v6)
3217 		return 0;
3218 
3219 	KASSERT(mp->m_len >= sizeof(struct ether_header));
3220 	evl = mtod(mp, struct ether_vlan_header *);
3221 	if (evl->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3222 		KASSERT(mp->m_len >= sizeof(struct ether_vlan_header));
3223 		ehlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3224 		ehtype = evl->evl_proto;
3225 	} else {
3226 		ehlen = ETHER_HDR_LEN;
3227 		ehtype = evl->evl_encap_proto;
3228 	}
3229 
3230 	vlan_macip_lens |= ehlen << IGC_ADVTXD_MACLEN_SHIFT;
3231 
3232 #ifdef IGC_DEBUG
3233 	/*
3234 	 * For checksum offloading, L3 headers are not mandatory.
3235 	 * We use these only for consistency checks.
3236 	 */
3237 	struct ip *ip;
3238 	struct ip6_hdr *ip6;
3239 	uint8_t ipproto;
3240 	char *l3d;
3241 
3242 	if (mp->m_len == ehlen && mp->m_next != NULL)
3243 		l3d = mtod(mp->m_next, char *);
3244 	else
3245 		l3d = mtod(mp, char *) + ehlen;
3246 #endif
3247 
3248 	switch (ntohs(ehtype)) {
3249 	case ETHERTYPE_IP:
3250 		iphlen = M_CSUM_DATA_IPv4_IPHL(mp->m_pkthdr.csum_data);
3251 		type_tucmd_mlhl |= IGC_ADVTXD_TUCMD_IPV4;
3252 
3253 		if ((csum_flags & M_CSUM_IPv4) != 0) {
3254 			*olinfo_status |= IGC_TXD_POPTS_IXSM << 8;
3255 			off = 1;
3256 		}
3257 #ifdef IGC_DEBUG
3258 		KASSERT(!v6);
3259 		ip = (void *)l3d;
3260 		ipproto = ip->ip_p;
3261 		KASSERT(iphlen == ip->ip_hl << 2);
3262 		KASSERT((mp->m_pkthdr.csum_flags & M_CSUM_IPv4) == 0 ||
3263 		    ip->ip_sum == 0);
3264 #endif
3265 		break;
3266 	case ETHERTYPE_IPV6:
3267 		iphlen = M_CSUM_DATA_IPv6_IPHL(mp->m_pkthdr.csum_data);
3268 		type_tucmd_mlhl |= IGC_ADVTXD_TUCMD_IPV6;
3269 #ifdef IGC_DEBUG
3270 		KASSERT(!v4);
3271 		ip6 = (void *)l3d;
3272 		ipproto = ip6->ip6_nxt;	/* XXX */
3273 		KASSERT(iphlen == sizeof(struct ip6_hdr));
3274 #endif
3275 		break;
3276 	default:
3277 		/*
3278 		 * Unknown L3 protocol. Clear L3 header length and proceed for
3279 		 * LAN as done by Linux driver.
3280 		 */
3281 		iphlen = 0;
3282 #ifdef IGC_DEBUG
3283 		KASSERT(!v4 && !v6);
3284 		ipproto = 0;
3285 #endif
3286 		break;
3287 	}
3288 
3289 	vlan_macip_lens |= iphlen;
3290 
3291 	const bool tcp = (csum_flags & (M_CSUM_TCPv4 | M_CSUM_TCPv6)) != 0;
3292 	const bool udp = (csum_flags & (M_CSUM_UDPv4 | M_CSUM_UDPv6)) != 0;
3293 
3294 	if (tcp) {
3295 #ifdef IGC_DEBUG
3296 		KASSERTMSG(ipproto == IPPROTO_TCP, "ipproto = %d", ipproto);
3297 #endif
3298 		type_tucmd_mlhl |= IGC_ADVTXD_TUCMD_L4T_TCP;
3299 		*olinfo_status |= IGC_TXD_POPTS_TXSM << 8;
3300 		off = 1;
3301 	} else if (udp) {
3302 #ifdef IGC_DEBUG
3303 		KASSERTMSG(ipproto == IPPROTO_UDP, "ipproto = %d", ipproto);
3304 #endif
3305 		type_tucmd_mlhl |= IGC_ADVTXD_TUCMD_L4T_UDP;
3306 		*olinfo_status |= IGC_TXD_POPTS_TXSM << 8;
3307 		off = 1;
3308 	}
3309 
3310 	if (off == 0)
3311 		return 0;
3312 
3313 	type_tucmd_mlhl |= IGC_ADVTXD_DCMD_DEXT | IGC_ADVTXD_DTYP_CTXT;
3314 
3315 	/* Now ready a context descriptor */
3316 	struct igc_adv_tx_context_desc *txdesc =
3317 	    (struct igc_adv_tx_context_desc *)&txr->tx_base[prod];
3318 
3319 	/* Now copy bits into descriptor */
3320 	igc_txdesc_sync(txr, prod,
3321 	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3322 	htolem32(&txdesc->vlan_macip_lens, vlan_macip_lens);
3323 	htolem32(&txdesc->type_tucmd_mlhl, type_tucmd_mlhl);
3324 	htolem32(&txdesc->seqnum_seed, 0);
3325 	htolem32(&txdesc->mss_l4len_idx, 0);
3326 	igc_txdesc_sync(txr, prod,
3327 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3328 
3329 	return 1;
3330 }
3331 
3332 /*********************************************************************
3333  *
3334  *  Advanced Context Descriptor setup for TSO
3335  *
3336  *  XXX XXXRO
3337  *	Not working. Some packets are sent with correct csums, but
3338  *	others aren't. th->th_sum may be adjusted.
3339  *
3340  **********************************************************************/
3341 
3342 static int
3343 igc_tso_setup(struct tx_ring *txr, struct mbuf *mp, int prod,
3344     uint32_t *cmd_type_len, uint32_t *olinfo_status)
3345 {
3346 #if 1 /* notyet */
3347 	return 0;
3348 #else
3349 	struct ether_vlan_header *evl;
3350 	struct ip *ip;
3351 	struct ip6_hdr *ip6;
3352 	struct tcphdr *th;
3353 	uint32_t type_tucmd_mlhl = 0;
3354 	uint32_t vlan_macip_lens = 0;
3355 	uint32_t mss_l4len_idx = 0;
3356 	uint32_t ehlen, iphlen, tcphlen, paylen;
3357 	uint16_t ehtype;
3358 
3359 	/*
3360 	 * In advanced descriptors the vlan tag must
3361 	 * be placed into the context descriptor. Hence
3362 	 * we need to make one even if not doing offloads.
3363 	 */
3364 #if NVLAN > 0
3365 	if (vlan_has_tag(mp)) {
3366 		vlan_macip_lens |= (uint32_t)vlan_get_tag(mp)
3367 		    << IGC_ADVTXD_VLAN_SHIFT;
3368 	}
3369 #endif
3370 
3371 	KASSERT(mp->m_len >= sizeof(struct ether_header));
3372 	evl = mtod(mp, struct ether_vlan_header *);
3373 	if (evl->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3374 		KASSERT(mp->m_len >= sizeof(struct ether_vlan_header));
3375 		ehlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3376 		ehtype = evl->evl_proto;
3377 	} else {
3378 		ehlen = ETHER_HDR_LEN;
3379 		ehtype = evl->evl_encap_proto;
3380 	}
3381 
3382 	vlan_macip_lens |= ehlen << IGC_ADVTXD_MACLEN_SHIFT;
3383 
3384 	switch (ntohs(ehtype)) {
3385 	case ETHERTYPE_IP:
3386 		iphlen = M_CSUM_DATA_IPv4_IPHL(mp->m_pkthdr.csum_data);
3387 		type_tucmd_mlhl |= IGC_ADVTXD_TUCMD_IPV4;
3388 		*olinfo_status |= IGC_TXD_POPTS_IXSM << 8;
3389 
3390 		KASSERT(mp->m_len >= ehlen + sizeof(*ip));
3391 		ip = (void *)(mtod(mp, char *) + ehlen);
3392 		ip->ip_len = 0;
3393 		KASSERT(iphlen == ip->ip_hl << 2);
3394 		KASSERT(ip->ip_sum == 0);
3395 		KASSERT(ip->ip_p == IPPROTO_TCP);
3396 
3397 		KASSERT(mp->m_len >= ehlen + iphlen + sizeof(*th));
3398 		th = (void *)((char *)ip + iphlen);
3399 		th->th_sum = in_cksum_phdr(ip->ip_src.s_addr, ip->ip_dst.s_addr,
3400 		    htons(IPPROTO_TCP));
3401 		break;
3402 	case ETHERTYPE_IPV6:
3403 		iphlen = M_CSUM_DATA_IPv6_IPHL(mp->m_pkthdr.csum_data);
3404 		type_tucmd_mlhl |= IGC_ADVTXD_TUCMD_IPV6;
3405 
3406 		KASSERT(mp->m_len >= ehlen + sizeof(*ip6));
3407 		ip6 = (void *)(mtod(mp, char *) + ehlen);
3408 		ip6->ip6_plen = 0;
3409 		KASSERT(iphlen == sizeof(struct ip6_hdr));
3410 		KASSERT(ip6->ip6_nxt == IPPROTO_TCP);
3411 
3412 		KASSERT(mp->m_len >= ehlen + iphlen + sizeof(*th));
3413 		th = (void *)((char *)ip6 + iphlen);
3414 		tcphlen = th->th_off << 2;
3415 		paylen = mp->m_pkthdr.len - ehlen - iphlen - tcphlen;
3416 		th->th_sum = in6_cksum_phdr(&ip6->ip6_src, &ip6->ip6_dst, 0,
3417 		    htonl(IPPROTO_TCP));
3418 		break;
3419 	default:
3420 		panic("%s", __func__);
3421 	}
3422 
3423 	tcphlen = th->th_off << 2;
3424 	paylen = mp->m_pkthdr.len - ehlen - iphlen - tcphlen;
3425 
3426 	vlan_macip_lens |= iphlen;
3427 
3428 	type_tucmd_mlhl |= IGC_ADVTXD_DCMD_DEXT | IGC_ADVTXD_DTYP_CTXT;
3429 	type_tucmd_mlhl |= IGC_ADVTXD_TUCMD_L4T_TCP;
3430 
3431 	mss_l4len_idx |= mp->m_pkthdr.segsz << IGC_ADVTXD_MSS_SHIFT;
3432 	mss_l4len_idx |= tcphlen << IGC_ADVTXD_L4LEN_SHIFT;
3433 
3434 	/* Now ready a context descriptor */
3435 	struct igc_adv_tx_context_desc *txdesc =
3436 	    (struct igc_adv_tx_context_desc *)&txr->tx_base[prod];
3437 
3438 	/* Now copy bits into descriptor */
3439 	igc_txdesc_sync(txr, prod,
3440 	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3441 	htolem32(&txdesc->vlan_macip_lens, vlan_macip_lens);
3442 	htolem32(&txdesc->type_tucmd_mlhl, type_tucmd_mlhl);
3443 	htolem32(&txdesc->seqnum_seed, 0);
3444 	htolem32(&txdesc->mss_l4len_idx, mss_l4len_idx);
3445 	igc_txdesc_sync(txr, prod,
3446 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3447 
3448 	*cmd_type_len |= IGC_ADVTXD_DCMD_TSE;
3449 	*olinfo_status |= IGC_TXD_POPTS_TXSM << 8;
3450 	*olinfo_status |= paylen << IGC_ADVTXD_PAYLEN_SHIFT;
3451 
3452 	return 1;
3453 #endif /* notyet */
3454 }
3455 
3456 /*********************************************************************
3457  *
3458  *  Allocate memory for rx_buffer structures. Since we use one
3459  *  rx_buffer per received packet, the maximum number of rx_buffer's
3460  *  that we'll need is equal to the number of receive descriptors
3461  *  that we've allocated.
3462  *
3463  **********************************************************************/
3464 static int
3465 igc_allocate_receive_buffers(struct rx_ring *rxr)
3466 {
3467 	struct igc_softc *sc = rxr->sc;
3468 	int error;
3469 
3470 	rxr->rx_buffers =
3471 	    kmem_zalloc(sc->num_rx_desc * sizeof(struct igc_rx_buf), KM_SLEEP);
3472 
3473 	for (int id = 0; id < sc->num_rx_desc; id++) {
3474 		struct igc_rx_buf *rxbuf = &rxr->rx_buffers[id];
3475 
3476 		error = bus_dmamap_create(rxr->rxdma.dma_tag, MCLBYTES, 1,
3477 		    MCLBYTES, 0, BUS_DMA_WAITOK, &rxbuf->map);
3478 		if (error) {
3479 			aprint_error_dev(sc->sc_dev,
3480 			    "unable to create RX DMA map\n");
3481 			goto fail;
3482 		}
3483 	}
3484 	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map, 0,
3485 	    rxr->rxdma.dma_map->dm_mapsize,
3486 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3487 
3488 	return 0;
3489  fail:
3490 	return error;
3491 }
3492 
3493 /*********************************************************************
3494  *
3495  *  Allocate and initialize receive structures.
3496  *
3497  **********************************************************************/
3498 static int
3499 igc_setup_receive_structures(struct igc_softc *sc)
3500 {
3501 
3502 	for (int iq = 0; iq < sc->sc_nqueues; iq++) {
3503 		struct rx_ring *rxr = &sc->rx_rings[iq];
3504 
3505 		if (igc_setup_receive_ring(rxr))
3506 			goto fail;
3507 	}
3508 
3509 	return 0;
3510  fail:
3511 	igc_free_receive_structures(sc);
3512 	return ENOBUFS;
3513 }
3514 
3515 /*********************************************************************
3516  *
3517  *  Initialize a receive ring and its buffers.
3518  *
3519  **********************************************************************/
3520 static int
3521 igc_setup_receive_ring(struct rx_ring *rxr)
3522 {
3523 	struct igc_softc *sc = rxr->sc;
3524 	const int rsize = roundup2(
3525 	    sc->num_rx_desc * sizeof(union igc_adv_rx_desc), IGC_DBA_ALIGN);
3526 
3527 	/* Clear the ring contents. */
3528 	memset(rxr->rx_base, 0, rsize);
3529 
3530 	if (igc_allocate_receive_buffers(rxr))
3531 		return ENOMEM;
3532 
3533 	/* Setup our descriptor indices. */
3534 	rxr->next_to_check = 0;
3535 	rxr->last_desc_filled = 0;
3536 
3537 	mutex_init(&rxr->rxr_lock, MUTEX_DEFAULT, IPL_NET);
3538 
3539 	return 0;
3540 }
3541 
3542 /*********************************************************************
3543  *
3544  *  Enable receive unit.
3545  *
3546  **********************************************************************/
3547 static void
3548 igc_initialize_receive_unit(struct igc_softc *sc)
3549 {
3550 	struct ifnet *ifp = &sc->sc_ec.ec_if;
3551 	struct igc_hw *hw = &sc->hw;
3552 	uint32_t rctl, rxcsum, srrctl;
3553 
3554 	DPRINTF(RX, "called\n");
3555 
3556 	/*
3557 	 * Make sure receives are disabled while setting
3558 	 * up the descriptor ring.
3559 	 */
3560 	rctl = IGC_READ_REG(hw, IGC_RCTL);
3561 	IGC_WRITE_REG(hw, IGC_RCTL, rctl & ~IGC_RCTL_EN);
3562 
3563 	/* Setup the Receive Control Register */
3564 	rctl &= ~(3 << IGC_RCTL_MO_SHIFT);
3565 	rctl |= IGC_RCTL_EN | IGC_RCTL_BAM | IGC_RCTL_LBM_NO |
3566 	    IGC_RCTL_RDMTS_HALF | (hw->mac.mc_filter_type << IGC_RCTL_MO_SHIFT);
3567 
3568 #if 1
3569 	/* Do not store bad packets */
3570 	rctl &= ~IGC_RCTL_SBP;
3571 #else
3572 	/* for debug */
3573 	rctl |= IGC_RCTL_SBP;
3574 #endif
3575 
3576 	/* Enable Long Packet receive */
3577 	if (sc->hw.mac.max_frame_size > ETHER_MAX_LEN)
3578 		rctl |= IGC_RCTL_LPE;
3579 	else
3580 		rctl &= ~IGC_RCTL_LPE;
3581 
3582 	/* Strip the CRC */
3583 	rctl |= IGC_RCTL_SECRC;
3584 
3585 	/*
3586 	 * Set the interrupt throttling rate. Value is calculated
3587 	 * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns)
3588 	 *
3589 	 * XXX Sync with Linux, especially for jumbo MTU or TSO.
3590 	 * XXX Shouldn't be here?
3591 	 */
3592 	IGC_WRITE_REG(hw, IGC_ITR, DEFAULT_ITR);
3593 
3594 	rxcsum = IGC_READ_REG(hw, IGC_RXCSUM);
3595 	rxcsum &= ~(IGC_RXCSUM_IPOFL | IGC_RXCSUM_TUOFL | IGC_RXCSUM_PCSD);
3596 	if (ifp->if_capenable & IFCAP_CSUM_IPv4_Rx)
3597 		rxcsum |= IGC_RXCSUM_IPOFL;
3598 	if (ifp->if_capenable & (IFCAP_CSUM_TCPv4_Rx | IFCAP_CSUM_UDPv4_Rx |
3599 				 IFCAP_CSUM_TCPv6_Rx | IFCAP_CSUM_UDPv6_Rx))
3600 		rxcsum |= IGC_RXCSUM_TUOFL;
3601 	if (sc->sc_nqueues > 1)
3602 		rxcsum |= IGC_RXCSUM_PCSD;
3603 	IGC_WRITE_REG(hw, IGC_RXCSUM, rxcsum);
3604 
3605 	if (sc->sc_nqueues > 1)
3606 		igc_initialize_rss_mapping(sc);
3607 
3608 	srrctl = 0;
3609 #if 0
3610 	srrctl |= 4096 >> IGC_SRRCTL_BSIZEPKT_SHIFT;
3611 	rctl |= IGC_RCTL_SZ_4096 | IGC_RCTL_BSEX;
3612 #else
3613 	srrctl |= 2048 >> IGC_SRRCTL_BSIZEPKT_SHIFT;
3614 	rctl |= IGC_RCTL_SZ_2048;
3615 #endif
3616 
3617 	/*
3618 	 * If TX flow control is disabled and there's > 1 queue defined,
3619 	 * enable DROP.
3620 	 *
3621 	 * This drops frames rather than hanging the RX MAC for all queues.
3622 	 */
3623 	if (sc->sc_nqueues > 1 &&
3624 	    (sc->fc == igc_fc_none || sc->fc == igc_fc_rx_pause))
3625 		srrctl |= IGC_SRRCTL_DROP_EN;
3626 
3627 	/* Setup the Base and Length of the RX descriptor rings. */
3628 	for (int iq = 0; iq < sc->sc_nqueues; iq++) {
3629 		struct rx_ring *rxr = &sc->rx_rings[iq];
3630 		const uint64_t bus_addr =
3631 		    rxr->rxdma.dma_map->dm_segs[0].ds_addr;
3632 
3633 		IGC_WRITE_REG(hw, IGC_RXDCTL(iq), 0);
3634 
3635 		srrctl |= IGC_SRRCTL_DESCTYPE_ADV_ONEBUF;
3636 
3637 		IGC_WRITE_REG(hw, IGC_RDLEN(iq),
3638 		    sc->num_rx_desc * sizeof(union igc_adv_rx_desc));
3639 		IGC_WRITE_REG(hw, IGC_RDBAH(iq), (uint32_t)(bus_addr >> 32));
3640 		IGC_WRITE_REG(hw, IGC_RDBAL(iq), (uint32_t)bus_addr);
3641 		IGC_WRITE_REG(hw, IGC_SRRCTL(iq), srrctl);
3642 
3643 		/* Setup the Head and Tail Descriptor Pointers */
3644 		IGC_WRITE_REG(hw, IGC_RDH(iq), 0);
3645 		IGC_WRITE_REG(hw, IGC_RDT(iq), 0 /* XXX rxr->last_desc_filled */);
3646 
3647 		/* Enable this Queue */
3648 		uint32_t rxdctl = IGC_READ_REG(hw, IGC_RXDCTL(iq));
3649 		rxdctl |= IGC_RXDCTL_QUEUE_ENABLE;
3650 		rxdctl &= 0xFFF00000;
3651 		rxdctl |= IGC_RX_PTHRESH;
3652 		rxdctl |= IGC_RX_HTHRESH << 8;
3653 		rxdctl |= IGC_RX_WTHRESH << 16;
3654 		IGC_WRITE_REG(hw, IGC_RXDCTL(iq), rxdctl);
3655 	}
3656 
3657 	/* Make sure VLAN Filters are off */
3658 	rctl &= ~IGC_RCTL_VFE;
3659 
3660 	/* Write out the settings */
3661 	IGC_WRITE_REG(hw, IGC_RCTL, rctl);
3662 }
3663 
3664 /*********************************************************************
3665  *
3666  *  Free all receive rings.
3667  *
3668  **********************************************************************/
3669 static void
3670 igc_free_receive_structures(struct igc_softc *sc)
3671 {
3672 
3673 	for (int iq = 0; iq < sc->sc_nqueues; iq++) {
3674 		struct rx_ring *rxr = &sc->rx_rings[iq];
3675 
3676 		igc_free_receive_buffers(rxr);
3677 	}
3678 }
3679 
3680 /*********************************************************************
3681  *
3682  *  Free receive ring data structures
3683  *
3684  **********************************************************************/
3685 static void
3686 igc_free_receive_buffers(struct rx_ring *rxr)
3687 {
3688 	struct igc_softc *sc = rxr->sc;
3689 
3690 	if (rxr->rx_buffers != NULL) {
3691 		for (int id = 0; id < sc->num_rx_desc; id++) {
3692 			struct igc_rx_buf *rxbuf = &rxr->rx_buffers[id];
3693 			bus_dmamap_t map = rxbuf->map;
3694 
3695 			if (rxbuf->buf != NULL) {
3696 				bus_dmamap_sync(rxr->rxdma.dma_tag, map,
3697 				    0, map->dm_mapsize, BUS_DMASYNC_POSTREAD);
3698 				bus_dmamap_unload(rxr->rxdma.dma_tag, map);
3699 				m_freem(rxbuf->buf);
3700 				rxbuf->buf = NULL;
3701 			}
3702 			bus_dmamap_destroy(rxr->rxdma.dma_tag, map);
3703 			rxbuf->map = NULL;
3704 		}
3705 		kmem_free(rxr->rx_buffers,
3706 		    sc->num_rx_desc * sizeof(struct igc_rx_buf));
3707 		rxr->rx_buffers = NULL;
3708 	}
3709 
3710 	mutex_destroy(&rxr->rxr_lock);
3711 }
3712 
3713 /*********************************************************************
3714  *
3715  * Clear status registers in all RX descriptors.
3716  *
3717  **********************************************************************/
3718 static void
3719 igc_clear_receive_status(struct rx_ring *rxr)
3720 {
3721 	struct igc_softc *sc = rxr->sc;
3722 
3723 	mutex_enter(&rxr->rxr_lock);
3724 
3725 	for (int id = 0; id < sc->num_rx_desc; id++) {
3726 		union igc_adv_rx_desc *rxdesc = &rxr->rx_base[id];
3727 
3728 		igc_rxdesc_sync(rxr, id,
3729 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3730 		rxdesc->wb.upper.status_error = 0;
3731 		igc_rxdesc_sync(rxr, id,
3732 		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3733 	}
3734 
3735 	mutex_exit(&rxr->rxr_lock);
3736 }
3737 
3738 /*
3739  * Initialise the RSS mapping for NICs that support multiple transmit/
3740  * receive rings.
3741  */
3742 static void
3743 igc_initialize_rss_mapping(struct igc_softc *sc)
3744 {
3745 	struct igc_hw *hw = &sc->hw;
3746 
3747 	/*
3748 	 * The redirection table controls which destination
3749 	 * queue each bucket redirects traffic to.
3750 	 * Each DWORD represents four queues, with the LSB
3751 	 * being the first queue in the DWORD.
3752 	 *
3753 	 * This just allocates buckets to queues using round-robin
3754 	 * allocation.
3755 	 *
3756 	 * NOTE: It Just Happens to line up with the default
3757 	 * RSS allocation method.
3758 	 */
3759 
3760 	/* Warning FM follows */
3761 	uint32_t reta = 0;
3762 	for (int i = 0; i < 128; i++) {
3763 		const int shift = 0; /* XXXRO */
3764 		int queue_id = i % sc->sc_nqueues;
3765 		/* Adjust if required */
3766 		queue_id <<= shift;
3767 
3768 		/*
3769 		 * The low 8 bits are for hash value (n+0);
3770 		 * The next 8 bits are for hash value (n+1), etc.
3771 		 */
3772 		reta >>= 8;
3773 		reta |= ((uint32_t)queue_id) << 24;
3774 		if ((i & 3) == 3) {
3775 			IGC_WRITE_REG(hw, IGC_RETA(i >> 2), reta);
3776 			reta = 0;
3777 		}
3778 	}
3779 
3780 	/*
3781 	 * MRQC: Multiple Receive Queues Command
3782 	 * Set queuing to RSS control, number depends on the device.
3783 	 */
3784 
3785 	/* Set up random bits */
3786 	uint32_t rss_key[RSS_KEYSIZE / sizeof(uint32_t)];
3787 	rss_getkey((uint8_t *)rss_key);
3788 
3789 	/* Now fill our hash function seeds */
3790 	for (int i = 0; i < __arraycount(rss_key); i++)
3791 		IGC_WRITE_REG_ARRAY(hw, IGC_RSSRK(0), i, rss_key[i]);
3792 
3793 	/*
3794 	 * Configure the RSS fields to hash upon.
3795 	 */
3796 	uint32_t mrqc = IGC_MRQC_ENABLE_RSS_4Q;
3797 	mrqc |= IGC_MRQC_RSS_FIELD_IPV4 | IGC_MRQC_RSS_FIELD_IPV4_TCP;
3798 	mrqc |= IGC_MRQC_RSS_FIELD_IPV6 | IGC_MRQC_RSS_FIELD_IPV6_TCP;
3799 	mrqc |= IGC_MRQC_RSS_FIELD_IPV6_TCP_EX;
3800 
3801 	IGC_WRITE_REG(hw, IGC_MRQC, mrqc);
3802 }
3803 
3804 /*
3805  * igc_get_hw_control sets the {CTRL_EXT|FWSM}:DRV_LOAD bit.
3806  * For ASF and Pass Through versions of f/w this means
3807  * that the driver is loaded. For AMT version type f/w
3808  * this means that the network i/f is open.
3809  */
3810 static void
3811 igc_get_hw_control(struct igc_softc *sc)
3812 {
3813 	const uint32_t ctrl_ext = IGC_READ_REG(&sc->hw, IGC_CTRL_EXT);
3814 
3815 	IGC_WRITE_REG(&sc->hw, IGC_CTRL_EXT, ctrl_ext | IGC_CTRL_EXT_DRV_LOAD);
3816 }
3817 
3818 /*
3819  * igc_release_hw_control resets {CTRL_EXT|FWSM}:DRV_LOAD bit.
3820  * For ASF and Pass Through versions of f/w this means that
3821  * the driver is no longer loaded. For AMT versions of the
3822  * f/w this means that the network i/f is closed.
3823  */
3824 static void
3825 igc_release_hw_control(struct igc_softc *sc)
3826 {
3827 	const uint32_t ctrl_ext = IGC_READ_REG(&sc->hw, IGC_CTRL_EXT);
3828 
3829 	IGC_WRITE_REG(&sc->hw, IGC_CTRL_EXT, ctrl_ext & ~IGC_CTRL_EXT_DRV_LOAD);
3830 }
3831 
3832 static int
3833 igc_is_valid_ether_addr(uint8_t *addr)
3834 {
3835 	const char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
3836 
3837 	if ((addr[0] & 1) || !bcmp(addr, zero_addr, ETHER_ADDR_LEN))
3838 		return 0;
3839 
3840 	return 1;
3841 }
3842 
3843 static void
3844 igc_print_devinfo(struct igc_softc *sc)
3845 {
3846 	device_t dev = sc->sc_dev;
3847 	struct igc_hw *hw = &sc->hw;
3848 	struct igc_phy_info *phy = &hw->phy;
3849 	u_int oui, model, rev;
3850 	uint16_t id1, id2, nvm_ver, phy_ver, etk_lo, etk_hi;
3851 	char descr[MII_MAX_DESCR_LEN];
3852 
3853 	/* Print PHY Info */
3854 	id1 = phy->id >> 16;
3855 	/* The revision field in phy->id is cleard and it's in phy->revision */
3856 	id2 = (phy->id & 0xfff0) | phy->revision;
3857 	oui = MII_OUI(id1, id2);
3858 	model = MII_MODEL(id2);
3859 	rev = MII_REV(id2);
3860 	mii_get_descr(descr, sizeof(descr), oui, model);
3861 	if (descr[0])
3862 		aprint_normal_dev(dev, "PHY: %s, rev. %d",
3863 		    descr, rev);
3864 	else
3865 		aprint_normal_dev(dev,
3866 		    "PHY OUI 0x%06x, model 0x%04x, rev. %d",
3867 		    oui, model, rev);
3868 
3869 	/* PHY FW version */
3870 	phy->ops.read_reg(hw, 0x1e, &phy_ver);
3871 	aprint_normal(", PHY FW version 0x%04hx\n", phy_ver);
3872 
3873 	/* NVM version */
3874 	hw->nvm.ops.read(hw, NVM_VERSION, 1, &nvm_ver);
3875 
3876 	/* EtrackID */
3877 	hw->nvm.ops.read(hw, NVM_ETKID_LO, 1, &etk_lo);
3878 	hw->nvm.ops.read(hw, NVM_ETKID_HI, 1, &etk_hi);
3879 
3880 	aprint_normal_dev(dev,
3881 	    "NVM image version %x.%02x, EtrackID %04hx%04hx\n",
3882 	    (nvm_ver & NVM_VERSION_MAJOR) >> NVM_VERSION_MAJOR_SHIFT,
3883 	    nvm_ver & NVM_VERSION_MINOR, etk_hi, etk_lo);
3884 }
3885