xref: /dflybsd-src/sys/dev/netif/ix/if_ix.c (revision 381fa6da48f61b44a92fbaf73b2f65e40c705e14)
1 /*
2  * Copyright (c) 2001-2013, Intel Corporation
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  *
8  *  1. Redistributions of source code must retain the above copyright notice,
9  *     this list of conditions and the following disclaimer.
10  *
11  *  2. Redistributions in binary form must reproduce the above copyright
12  *     notice, this list of conditions and the following disclaimer in the
13  *     documentation and/or other materials provided with the distribution.
14  *
15  *  3. Neither the name of the Intel Corporation nor the names of its
16  *     contributors may be used to endorse or promote products derived from
17  *     this software without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
23  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 #include "opt_ix.h"
33 
34 #include <sys/param.h>
35 #include <sys/bus.h>
36 #include <sys/endian.h>
37 #include <sys/interrupt.h>
38 #include <sys/kernel.h>
39 #include <sys/malloc.h>
40 #include <sys/mbuf.h>
41 #include <sys/proc.h>
42 #include <sys/rman.h>
43 #include <sys/serialize.h>
44 #include <sys/serialize2.h>
45 #include <sys/socket.h>
46 #include <sys/sockio.h>
47 #include <sys/sysctl.h>
48 #include <sys/systm.h>
49 
50 #include <net/bpf.h>
51 #include <net/ethernet.h>
52 #include <net/if.h>
53 #include <net/if_arp.h>
54 #include <net/if_dl.h>
55 #include <net/if_media.h>
56 #include <net/ifq_var.h>
57 #include <net/toeplitz.h>
58 #include <net/toeplitz2.h>
59 #include <net/vlan/if_vlan_var.h>
60 #include <net/vlan/if_vlan_ether.h>
61 #include <net/if_poll.h>
62 
63 #include <netinet/in_systm.h>
64 #include <netinet/in.h>
65 #include <netinet/ip.h>
66 
67 #include <bus/pci/pcivar.h>
68 #include <bus/pci/pcireg.h>
69 
70 #include <dev/netif/ix/ixgbe_api.h>
71 #include <dev/netif/ix/if_ix.h>
72 
73 #ifdef IX_RSS_DEBUG
74 #define IX_RSS_DPRINTF(sc, lvl, fmt, ...) \
75 do { \
76 	if (sc->rss_debug >= lvl) \
77 		if_printf(&sc->arpcom.ac_if, fmt, __VA_ARGS__); \
78 } while (0)
79 #else	/* !IX_RSS_DEBUG */
80 #define IX_RSS_DPRINTF(sc, lvl, fmt, ...)	((void)0)
81 #endif	/* IX_RSS_DEBUG */
82 
83 #define IX_NAME			"Intel(R) PRO/10GbE "
84 #define IX_DEVICE(id) \
85 	{ IXGBE_VENDOR_ID, IXGBE_DEV_ID_##id, IX_NAME #id }
86 #define IX_DEVICE_NULL		{ 0, 0, NULL }
87 
88 static struct ix_device {
89 	uint16_t	vid;
90 	uint16_t	did;
91 	const char	*desc;
92 } ix_devices[] = {
93 	IX_DEVICE(82598AF_DUAL_PORT),
94 	IX_DEVICE(82598AF_SINGLE_PORT),
95 	IX_DEVICE(82598EB_CX4),
96 	IX_DEVICE(82598AT),
97 	IX_DEVICE(82598AT2),
98 	IX_DEVICE(82598),
99 	IX_DEVICE(82598_DA_DUAL_PORT),
100 	IX_DEVICE(82598_CX4_DUAL_PORT),
101 	IX_DEVICE(82598EB_XF_LR),
102 	IX_DEVICE(82598_SR_DUAL_PORT_EM),
103 	IX_DEVICE(82598EB_SFP_LOM),
104 	IX_DEVICE(82599_KX4),
105 	IX_DEVICE(82599_KX4_MEZZ),
106 	IX_DEVICE(82599_SFP),
107 	IX_DEVICE(82599_XAUI_LOM),
108 	IX_DEVICE(82599_CX4),
109 	IX_DEVICE(82599_T3_LOM),
110 	IX_DEVICE(82599_COMBO_BACKPLANE),
111 	IX_DEVICE(82599_BACKPLANE_FCOE),
112 	IX_DEVICE(82599_SFP_SF2),
113 	IX_DEVICE(82599_SFP_FCOE),
114 	IX_DEVICE(82599EN_SFP),
115 	IX_DEVICE(82599_SFP_SF_QP),
116 	IX_DEVICE(X540T),
117 
118 	/* required last entry */
119 	IX_DEVICE_NULL
120 };
121 
122 static int	ix_probe(device_t);
123 static int	ix_attach(device_t);
124 static int	ix_detach(device_t);
125 static int	ix_shutdown(device_t);
126 
127 static void	ix_serialize(struct ifnet *, enum ifnet_serialize);
128 static void	ix_deserialize(struct ifnet *, enum ifnet_serialize);
129 static int	ix_tryserialize(struct ifnet *, enum ifnet_serialize);
130 #ifdef INVARIANTS
131 static void	ix_serialize_assert(struct ifnet *, enum ifnet_serialize,
132 		    boolean_t);
133 #endif
134 static void	ix_start(struct ifnet *, struct ifaltq_subque *);
135 static void	ix_watchdog(struct ifaltq_subque *);
136 static int	ix_ioctl(struct ifnet *, u_long, caddr_t, struct ucred *);
137 static void	ix_init(void *);
138 static void	ix_stop(struct ix_softc *);
139 static void	ix_media_status(struct ifnet *, struct ifmediareq *);
140 static int	ix_media_change(struct ifnet *);
141 static void	ix_timer(void *);
142 
143 static void	ix_add_sysctl(struct ix_softc *);
144 static void	ix_add_intr_rate_sysctl(struct ix_softc *, int,
145 		    const char *, int (*)(SYSCTL_HANDLER_ARGS), const char *);
146 static int	ix_sysctl_tx_wreg_nsegs(SYSCTL_HANDLER_ARGS);
147 static int	ix_sysctl_rx_wreg_nsegs(SYSCTL_HANDLER_ARGS);
148 static int	ix_sysctl_txd(SYSCTL_HANDLER_ARGS);
149 static int	ix_sysctl_rxd(SYSCTL_HANDLER_ARGS);
150 static int	ix_sysctl_tx_intr_nsegs(SYSCTL_HANDLER_ARGS);
151 static int	ix_sysctl_intr_rate(SYSCTL_HANDLER_ARGS, int);
152 static int	ix_sysctl_rxtx_intr_rate(SYSCTL_HANDLER_ARGS);
153 static int	ix_sysctl_rx_intr_rate(SYSCTL_HANDLER_ARGS);
154 static int	ix_sysctl_tx_intr_rate(SYSCTL_HANDLER_ARGS);
155 static int	ix_sysctl_sts_intr_rate(SYSCTL_HANDLER_ARGS);
156 static int	ix_sysctl_flowctrl(SYSCTL_HANDLER_ARGS);
157 #ifdef foo
158 static int	ix_sysctl_advspeed(SYSCTL_HANDLER_ARGS);
159 #endif
160 #if 0
161 static void     ix_add_hw_stats(struct ix_softc *);
162 #endif
163 
164 static void	ix_slot_info(struct ix_softc *);
165 static int	ix_alloc_rings(struct ix_softc *);
166 static void	ix_free_rings(struct ix_softc *);
167 static void	ix_setup_ifp(struct ix_softc *);
168 static void	ix_setup_serialize(struct ix_softc *);
169 static void	ix_set_ring_inuse(struct ix_softc *, boolean_t);
170 static void	ix_set_timer_cpuid(struct ix_softc *, boolean_t);
171 static void	ix_update_stats(struct ix_softc *);
172 
173 static void	ix_set_promisc(struct ix_softc *);
174 static void	ix_set_multi(struct ix_softc *);
175 static void	ix_set_vlan(struct ix_softc *);
176 static uint8_t	*ix_mc_array_itr(struct ixgbe_hw *, uint8_t **, uint32_t *);
177 
178 static int	ix_get_txring_inuse(const struct ix_softc *, boolean_t);
179 static void	ix_init_tx_ring(struct ix_tx_ring *);
180 static void	ix_free_tx_ring(struct ix_tx_ring *);
181 static int	ix_create_tx_ring(struct ix_tx_ring *);
182 static void	ix_destroy_tx_ring(struct ix_tx_ring *, int);
183 static void	ix_init_tx_unit(struct ix_softc *);
184 static int	ix_encap(struct ix_tx_ring *, struct mbuf **,
185 		    uint16_t *, int *);
186 static int	ix_tx_ctx_setup(struct ix_tx_ring *,
187 		    const struct mbuf *, uint32_t *, uint32_t *);
188 static int	ix_tso_ctx_setup(struct ix_tx_ring *,
189 		    const struct mbuf *, uint32_t *, uint32_t *);
190 static void	ix_txeof(struct ix_tx_ring *, int);
191 
192 static int	ix_get_rxring_inuse(const struct ix_softc *, boolean_t);
193 static int	ix_init_rx_ring(struct ix_rx_ring *);
194 static void	ix_free_rx_ring(struct ix_rx_ring *);
195 static int	ix_create_rx_ring(struct ix_rx_ring *);
196 static void	ix_destroy_rx_ring(struct ix_rx_ring *, int);
197 static void	ix_init_rx_unit(struct ix_softc *);
198 #if 0
199 static void	ix_setup_hw_rsc(struct ix_rx_ring *);
200 #endif
201 static int	ix_newbuf(struct ix_rx_ring *, int, boolean_t);
202 static void	ix_rxeof(struct ix_rx_ring *);
203 static void	ix_rx_discard(struct ix_rx_ring *, int, boolean_t);
204 static void	ix_enable_rx_drop(struct ix_softc *);
205 static void	ix_disable_rx_drop(struct ix_softc *);
206 
207 static void	ix_alloc_msix(struct ix_softc *);
208 static void	ix_free_msix(struct ix_softc *, boolean_t);
209 static void	ix_conf_rx_msix(struct ix_softc *, int, int *, int);
210 static void	ix_conf_tx_msix(struct ix_softc *, int, int *, int);
211 static void	ix_setup_msix_eims(const struct ix_softc *, int,
212 		    uint32_t *, uint32_t *);
213 static int	ix_alloc_intr(struct ix_softc *);
214 static void	ix_free_intr(struct ix_softc *);
215 static int	ix_setup_intr(struct ix_softc *);
216 static void	ix_teardown_intr(struct ix_softc *, int);
217 static void	ix_enable_intr(struct ix_softc *);
218 static void	ix_disable_intr(struct ix_softc *);
219 static void	ix_set_ivar(struct ix_softc *, uint8_t, uint8_t, int8_t);
220 static void	ix_set_eitr(struct ix_softc *, int, int);
221 static void	ix_intr_status(struct ix_softc *, uint32_t);
222 static void	ix_intr(void *);
223 static void	ix_msix_rxtx(void *);
224 static void	ix_msix_rx(void *);
225 static void	ix_msix_tx(void *);
226 static void	ix_msix_status(void *);
227 
228 static void	ix_config_link(struct ix_softc *);
229 static boolean_t ix_sfp_probe(struct ix_softc *);
230 static boolean_t ix_is_sfp(const struct ixgbe_hw *);
231 static void	ix_setup_optics(struct ix_softc *);
232 static void	ix_update_link_status(struct ix_softc *);
233 static void	ix_handle_link(struct ix_softc *);
234 static void	ix_handle_mod(struct ix_softc *);
235 static void	ix_handle_msf(struct ix_softc *);
236 
237 /* XXX Shared code structure requires this for the moment */
238 extern void ixgbe_stop_mac_link_on_d3_82599(struct ixgbe_hw *);
239 
240 static device_method_t ix_methods[] = {
241 	/* Device interface */
242 	DEVMETHOD(device_probe,		ix_probe),
243 	DEVMETHOD(device_attach,	ix_attach),
244 	DEVMETHOD(device_detach,	ix_detach),
245 	DEVMETHOD(device_shutdown,	ix_shutdown),
246 	DEVMETHOD_END
247 };
248 
249 static driver_t ix_driver = {
250 	"ix",
251 	ix_methods,
252 	sizeof(struct ix_softc)
253 };
254 
255 static devclass_t ix_devclass;
256 
257 DECLARE_DUMMY_MODULE(if_ix);
258 DRIVER_MODULE(if_ix, pci, ix_driver, ix_devclass, NULL, NULL);
259 
260 static int	ix_msi_enable = 1;
261 static int	ix_msix_enable = 1;
262 static int	ix_msix_agg_rxtx = 1;
263 static int	ix_rxr = 0;
264 static int	ix_txr = 0;
265 static int	ix_txd = IX_PERF_TXD;
266 static int	ix_rxd = IX_PERF_RXD;
267 static int	ix_unsupported_sfp = 0;
268 
269 TUNABLE_INT("hw.ix.msi.enable", &ix_msi_enable);
270 TUNABLE_INT("hw.ix.msix.enable", &ix_msix_enable);
271 TUNABLE_INT("hw.ix.msix.agg_rxtx", &ix_msix_agg_rxtx);
272 TUNABLE_INT("hw.ix.rxr", &ix_rxr);
273 TUNABLE_INT("hw.ix.txr", &ix_txr);
274 TUNABLE_INT("hw.ix.txd", &ix_txd);
275 TUNABLE_INT("hw.ix.rxd", &ix_rxd);
276 TUNABLE_INT("hw.ix.unsupported_sfp", &ix_unsupported_sfp);
277 
278 /*
279  * Smart speed setting, default to on.  This only works
280  * as a compile option right now as its during attach,
281  * set this to 'ixgbe_smart_speed_off' to disable.
282  */
283 static const enum ixgbe_smart_speed ix_smart_speed =
284     ixgbe_smart_speed_on;
285 
286 static int
287 ix_probe(device_t dev)
288 {
289 	const struct ix_device *d;
290 	uint16_t vid, did;
291 
292 	vid = pci_get_vendor(dev);
293 	did = pci_get_device(dev);
294 
295 	for (d = ix_devices; d->desc != NULL; ++d) {
296 		if (vid == d->vid && did == d->did) {
297 			device_set_desc(dev, d->desc);
298 			return 0;
299 		}
300 	}
301 	return ENXIO;
302 }
303 
304 static int
305 ix_attach(device_t dev)
306 {
307 	struct ix_softc *sc = device_get_softc(dev);
308 	struct ixgbe_hw *hw;
309 	int error, ring_cnt_max;
310 	uint16_t csum;
311 	uint32_t ctrl_ext;
312 
313 	sc->dev = sc->osdep.dev = dev;
314 	hw = &sc->hw;
315 
316 	if_initname(&sc->arpcom.ac_if, device_get_name(dev),
317 	    device_get_unit(dev));
318 	ifmedia_init(&sc->media, IFM_IMASK,
319 	    ix_media_change, ix_media_status);
320 
321 	/* Save frame size */
322 	sc->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHER_CRC_LEN;
323 
324 	callout_init_mp(&sc->timer);
325 	lwkt_serialize_init(&sc->main_serialize);
326 
327 	/*
328 	 * Save off the information about this board
329 	 */
330 	hw->vendor_id = pci_get_vendor(dev);
331 	hw->device_id = pci_get_device(dev);
332 	hw->revision_id = pci_read_config(dev, PCIR_REVID, 1);
333 	hw->subsystem_vendor_id = pci_read_config(dev, PCIR_SUBVEND_0, 2);
334 	hw->subsystem_device_id = pci_read_config(dev, PCIR_SUBDEV_0, 2);
335 
336 	ixgbe_set_mac_type(hw);
337 
338 	/* Pick up the 82599 and VF settings */
339 	if (hw->mac.type != ixgbe_mac_82598EB)
340 		hw->phy.smart_speed = ix_smart_speed;
341 
342 	/* Enable bus mastering */
343 	pci_enable_busmaster(dev);
344 
345 	/*
346 	 * Allocate IO memory
347 	 */
348 	sc->mem_rid = PCIR_BAR(0);
349 	sc->mem_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
350 	    &sc->mem_rid, RF_ACTIVE);
351 	if (sc->mem_res == NULL) {
352 		device_printf(dev, "Unable to allocate bus resource: memory\n");
353 		error = ENXIO;
354 		goto failed;
355 	}
356 
357 	sc->osdep.mem_bus_space_tag = rman_get_bustag(sc->mem_res);
358 	sc->osdep.mem_bus_space_handle = rman_get_bushandle(sc->mem_res);
359 
360 	sc->hw.hw_addr = (uint8_t *)&sc->osdep.mem_bus_space_handle;
361 	sc->hw.back = &sc->osdep;
362 
363 	/*
364 	 * Configure total supported RX/TX ring count
365 	 */
366 	sc->rx_ring_cnt = device_getenv_int(dev, "rxr", ix_rxr);
367 	sc->rx_ring_cnt = if_ring_count2(sc->rx_ring_cnt, IX_MAX_RXRING);
368 	sc->rx_ring_inuse = sc->rx_ring_cnt;
369 
370 	switch (hw->mac.type) {
371 	case ixgbe_mac_82598EB:
372 		ring_cnt_max = IX_MAX_TXRING_82598;
373 		break;
374 
375 	case ixgbe_mac_82599EB:
376 		ring_cnt_max = IX_MAX_TXRING_82599;
377 		break;
378 
379 	case ixgbe_mac_X540:
380 		ring_cnt_max = IX_MAX_TXRING_X540;
381 		break;
382 
383 	default:
384 		ring_cnt_max = 1;
385 		break;
386 	}
387 	sc->tx_ring_cnt = device_getenv_int(dev, "txr", ix_txr);
388 	sc->tx_ring_cnt = if_ring_count2(sc->tx_ring_cnt, ring_cnt_max);
389 	sc->tx_ring_inuse = sc->tx_ring_cnt;
390 
391 	/* Allocate TX/RX rings */
392 	error = ix_alloc_rings(sc);
393 	if (error)
394 		goto failed;
395 
396 	/* Allocate interrupt */
397 	error = ix_alloc_intr(sc);
398 	if (error)
399 		goto failed;
400 
401 	/* Setup serializes */
402 	ix_setup_serialize(sc);
403 
404 	/* Allocate multicast array memory. */
405 	sc->mta = kmalloc(IXGBE_ETH_LENGTH_OF_ADDRESS * IX_MAX_MCASTADDR,
406 	    M_DEVBUF, M_WAITOK);
407 
408 	/* Initialize the shared code */
409 	hw->allow_unsupported_sfp = ix_unsupported_sfp;
410 	error = ixgbe_init_shared_code(hw);
411 	if (error == IXGBE_ERR_SFP_NOT_PRESENT) {
412 		/*
413 		 * No optics in this port; ask timer routine
414 		 * to probe for later insertion.
415 		 */
416 		sc->sfp_probe = TRUE;
417 		error = 0;
418 	} else if (error == IXGBE_ERR_SFP_NOT_SUPPORTED) {
419 		device_printf(dev, "Unsupported SFP+ module detected!\n");
420 		error = EIO;
421 		goto failed;
422 	} else if (error) {
423 		device_printf(dev, "Unable to initialize the shared code\n");
424 		error = EIO;
425 		goto failed;
426 	}
427 
428 	/* Make sure we have a good EEPROM before we read from it */
429 	if (ixgbe_validate_eeprom_checksum(&sc->hw, &csum) < 0) {
430 		device_printf(dev, "The EEPROM Checksum Is Not Valid\n");
431 		error = EIO;
432 		goto failed;
433 	}
434 
435 	error = ixgbe_init_hw(hw);
436 	if (error == IXGBE_ERR_EEPROM_VERSION) {
437 		device_printf(dev, "Pre-production device detected\n");
438 	} else if (error == IXGBE_ERR_SFP_NOT_SUPPORTED) {
439 		device_printf(dev, "Unsupported SFP+ Module\n");
440 		error = EIO;
441 		goto failed;
442 	} else if (error == IXGBE_ERR_SFP_NOT_PRESENT) {
443 		device_printf(dev, "No SFP+ Module found\n");
444 	}
445 
446 	/* Detect and set physical type */
447 	ix_setup_optics(sc);
448 
449 	/* Setup OS specific network interface */
450 	ix_setup_ifp(sc);
451 
452 	/* Add sysctl tree */
453 	ix_add_sysctl(sc);
454 
455 	error = ix_setup_intr(sc);
456 	if (error) {
457 		ether_ifdetach(&sc->arpcom.ac_if);
458 		goto failed;
459 	}
460 
461 	/* Initialize statistics */
462 	ix_update_stats(sc);
463 
464 	/*
465 	 * Check PCIE slot type/speed/width
466 	 */
467 	ix_slot_info(sc);
468 
469 	/* Set an initial default flow control value */
470 	sc->fc = ixgbe_fc_full;
471 
472 	/* Let hardware know driver is loaded */
473 	ctrl_ext = IXGBE_READ_REG(hw, IXGBE_CTRL_EXT);
474 	ctrl_ext |= IXGBE_CTRL_EXT_DRV_LOAD;
475 	IXGBE_WRITE_REG(hw, IXGBE_CTRL_EXT, ctrl_ext);
476 
477 	return 0;
478 failed:
479 	ix_detach(dev);
480 	return error;
481 }
482 
483 static int
484 ix_detach(device_t dev)
485 {
486 	struct ix_softc *sc = device_get_softc(dev);
487 
488 	if (device_is_attached(dev)) {
489 		struct ifnet *ifp = &sc->arpcom.ac_if;
490 		uint32_t ctrl_ext;
491 
492 		ifnet_serialize_all(ifp);
493 
494 		ix_stop(sc);
495 		ix_teardown_intr(sc, sc->intr_cnt);
496 
497 		ifnet_deserialize_all(ifp);
498 
499 		callout_terminate(&sc->timer);
500 		ether_ifdetach(ifp);
501 
502 		/* Let hardware know driver is unloading */
503 		ctrl_ext = IXGBE_READ_REG(&sc->hw, IXGBE_CTRL_EXT);
504 		ctrl_ext &= ~IXGBE_CTRL_EXT_DRV_LOAD;
505 		IXGBE_WRITE_REG(&sc->hw, IXGBE_CTRL_EXT, ctrl_ext);
506 	}
507 
508 	ifmedia_removeall(&sc->media);
509 	bus_generic_detach(dev);
510 
511 	if (sc->sysctl_tree != NULL)
512 		sysctl_ctx_free(&sc->sysctl_ctx);
513 
514 	ix_free_intr(sc);
515 
516 	if (sc->msix_mem_res != NULL) {
517 		bus_release_resource(dev, SYS_RES_MEMORY, sc->msix_mem_rid,
518 		    sc->msix_mem_res);
519 	}
520 	if (sc->mem_res != NULL) {
521 		bus_release_resource(dev, SYS_RES_MEMORY, sc->mem_rid,
522 		    sc->mem_res);
523 	}
524 
525 	ix_free_rings(sc);
526 
527 	if (sc->mta != NULL)
528 		kfree(sc->mta, M_DEVBUF);
529 	if (sc->serializes != NULL)
530 		kfree(sc->serializes, M_DEVBUF);
531 
532 	return 0;
533 }
534 
535 static int
536 ix_shutdown(device_t dev)
537 {
538 	struct ix_softc *sc = device_get_softc(dev);
539 	struct ifnet *ifp = &sc->arpcom.ac_if;
540 
541 	ifnet_serialize_all(ifp);
542 	ix_stop(sc);
543 	ifnet_deserialize_all(ifp);
544 
545 	return 0;
546 }
547 
548 static void
549 ix_start(struct ifnet *ifp, struct ifaltq_subque *ifsq)
550 {
551 	struct ix_softc *sc = ifp->if_softc;
552 	struct ix_tx_ring *txr = ifsq_get_priv(ifsq);
553 	int idx = -1;
554 	uint16_t nsegs;
555 
556 	KKASSERT(txr->tx_ifsq == ifsq);
557 	ASSERT_SERIALIZED(&txr->tx_serialize);
558 
559 	if ((ifp->if_flags & IFF_RUNNING) == 0 || ifsq_is_oactive(ifsq))
560 		return;
561 
562 	if (!sc->link_active) {
563 		ifsq_purge(ifsq);
564 		return;
565 	}
566 
567 	while (!ifsq_is_empty(ifsq)) {
568 		struct mbuf *m_head;
569 
570 		if (txr->tx_avail <= IX_MAX_SCATTER + IX_TX_RESERVED) {
571 			ifsq_set_oactive(ifsq);
572 			txr->tx_watchdog.wd_timer = 5;
573 			break;
574 		}
575 
576 		m_head = ifsq_dequeue(ifsq);
577 		if (m_head == NULL)
578 			break;
579 
580 		if (ix_encap(txr, &m_head, &nsegs, &idx)) {
581 			IFNET_STAT_INC(ifp, oerrors, 1);
582 			continue;
583 		}
584 
585 		if (nsegs >= txr->tx_wreg_nsegs) {
586 			IXGBE_WRITE_REG(&sc->hw, IXGBE_TDT(txr->tx_idx), idx);
587 			nsegs = 0;
588 			idx = -1;
589 		}
590 
591 		ETHER_BPF_MTAP(ifp, m_head);
592 	}
593 	if (idx >= 0)
594 		IXGBE_WRITE_REG(&sc->hw, IXGBE_TDT(txr->tx_idx), idx);
595 }
596 
597 static int
598 ix_ioctl(struct ifnet *ifp, u_long command, caddr_t data, struct ucred *cr)
599 {
600 	struct ix_softc *sc = ifp->if_softc;
601 	struct ifreq *ifr = (struct ifreq *) data;
602 	int error = 0, mask, reinit;
603 
604 	ASSERT_IFNET_SERIALIZED_ALL(ifp);
605 
606 	switch (command) {
607 	case SIOCSIFMTU:
608 		if (ifr->ifr_mtu > IX_MAX_FRAME_SIZE - ETHER_HDR_LEN) {
609 			error = EINVAL;
610 		} else {
611 			ifp->if_mtu = ifr->ifr_mtu;
612 			sc->max_frame_size =
613 			    ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
614 			ix_init(sc);
615 		}
616 		break;
617 
618 	case SIOCSIFFLAGS:
619 		if (ifp->if_flags & IFF_UP) {
620 			if (ifp->if_flags & IFF_RUNNING) {
621 				if ((ifp->if_flags ^ sc->if_flags) &
622 				    (IFF_PROMISC | IFF_ALLMULTI))
623 					ix_set_promisc(sc);
624 			} else {
625 				ix_init(sc);
626 			}
627 		} else if (ifp->if_flags & IFF_RUNNING) {
628 			ix_stop(sc);
629 		}
630 		sc->if_flags = ifp->if_flags;
631 		break;
632 
633 	case SIOCADDMULTI:
634 	case SIOCDELMULTI:
635 		if (ifp->if_flags & IFF_RUNNING) {
636 			ix_disable_intr(sc);
637 			ix_set_multi(sc);
638 			ix_enable_intr(sc);
639 		}
640 		break;
641 
642 	case SIOCSIFMEDIA:
643 	case SIOCGIFMEDIA:
644 		error = ifmedia_ioctl(ifp, ifr, &sc->media, command);
645 		break;
646 
647 	case SIOCSIFCAP:
648 		reinit = 0;
649 		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
650 		if (mask & IFCAP_RXCSUM) {
651 			ifp->if_capenable ^= IFCAP_RXCSUM;
652 			reinit = 1;
653 		}
654 		if (mask & IFCAP_VLAN_HWTAGGING) {
655 			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
656 			reinit = 1;
657 		}
658 		if (mask & IFCAP_TXCSUM) {
659 			ifp->if_capenable ^= IFCAP_TXCSUM;
660 			if (ifp->if_capenable & IFCAP_TXCSUM)
661 				ifp->if_hwassist |= CSUM_OFFLOAD;
662 			else
663 				ifp->if_hwassist &= ~CSUM_OFFLOAD;
664 		}
665 		if (mask & IFCAP_TSO) {
666 			ifp->if_capenable ^= IFCAP_TSO;
667 			if (ifp->if_capenable & IFCAP_TSO)
668 				ifp->if_hwassist |= CSUM_TSO;
669 			else
670 				ifp->if_hwassist &= ~CSUM_TSO;
671 		}
672 		if (mask & IFCAP_RSS)
673 			ifp->if_capenable ^= IFCAP_RSS;
674 		if (reinit && (ifp->if_flags & IFF_RUNNING))
675 			ix_init(sc);
676 		break;
677 
678 #if 0
679 	case SIOCGI2C:
680 	{
681 		struct ixgbe_i2c_req	i2c;
682 		error = copyin(ifr->ifr_data, &i2c, sizeof(i2c));
683 		if (error)
684 			break;
685 		if ((i2c.dev_addr != 0xA0) || (i2c.dev_addr != 0xA2)){
686 			error = EINVAL;
687 			break;
688 		}
689 		hw->phy.ops.read_i2c_byte(hw, i2c.offset,
690 		    i2c.dev_addr, i2c.data);
691 		error = copyout(&i2c, ifr->ifr_data, sizeof(i2c));
692 		break;
693 	}
694 #endif
695 
696 	default:
697 		error = ether_ioctl(ifp, command, data);
698 		break;
699 	}
700 	return error;
701 }
702 
703 #define IXGBE_MHADD_MFS_SHIFT 16
704 
705 static void
706 ix_init(void *xsc)
707 {
708 	struct ix_softc *sc = xsc;
709 	struct ifnet *ifp = &sc->arpcom.ac_if;
710 	struct ixgbe_hw *hw = &sc->hw;
711 	uint32_t rxpb, frame, size, tmp;
712 	uint32_t gpie, rxctrl;
713 	int i, error;
714 
715 	ASSERT_IFNET_SERIALIZED_ALL(ifp);
716 
717 	ix_stop(sc);
718 
719 	/* Configure # of used RX/TX rings */
720 	ix_set_ring_inuse(sc, FALSE);
721 	ifq_set_subq_mask(&ifp->if_snd, sc->tx_ring_inuse - 1);
722 
723 	/* Get the latest mac address, User can use a LAA */
724 	bcopy(IF_LLADDR(ifp), hw->mac.addr, IXGBE_ETH_LENGTH_OF_ADDRESS);
725 	ixgbe_set_rar(hw, 0, hw->mac.addr, 0, 1);
726 	hw->addr_ctrl.rar_used_count = 1;
727 
728 	/* Prepare transmit descriptors and buffers */
729 	for (i = 0; i < sc->tx_ring_inuse; ++i)
730 		ix_init_tx_ring(&sc->tx_rings[i]);
731 
732 	ixgbe_init_hw(hw);
733 	ix_init_tx_unit(sc);
734 
735 	/* Setup Multicast table */
736 	ix_set_multi(sc);
737 
738 	/* Prepare receive descriptors and buffers */
739 	for (i = 0; i < sc->rx_ring_inuse; ++i) {
740 		error = ix_init_rx_ring(&sc->rx_rings[i]);
741 		if (error) {
742 			if_printf(ifp, "Could not initialize RX ring%d\n", i);
743 			ix_stop(sc);
744 			return;
745 		}
746 	}
747 
748 	/* Configure RX settings */
749 	ix_init_rx_unit(sc);
750 
751 	gpie = IXGBE_READ_REG(hw, IXGBE_GPIE);
752 
753 	/* Enable Fan Failure Interrupt */
754 	gpie |= IXGBE_SDP1_GPIEN;
755 
756 	/* Add for Module detection */
757 	if (hw->mac.type == ixgbe_mac_82599EB)
758 		gpie |= IXGBE_SDP2_GPIEN;
759 
760 	/* Thermal Failure Detection */
761 	if (hw->mac.type == ixgbe_mac_X540)
762 		gpie |= IXGBE_SDP0_GPIEN;
763 
764 	if (sc->intr_type == PCI_INTR_TYPE_MSIX) {
765 		/* Enable Enhanced MSIX mode */
766 		gpie |= IXGBE_GPIE_MSIX_MODE;
767 		gpie |= IXGBE_GPIE_EIAME | IXGBE_GPIE_PBA_SUPPORT |
768 		    IXGBE_GPIE_OCD;
769 	}
770 	IXGBE_WRITE_REG(hw, IXGBE_GPIE, gpie);
771 
772 	/* Set MTU size */
773 	if (ifp->if_mtu > ETHERMTU) {
774 		uint32_t mhadd;
775 
776 		mhadd = IXGBE_READ_REG(hw, IXGBE_MHADD);
777 		mhadd &= ~IXGBE_MHADD_MFS_MASK;
778 		mhadd |= sc->max_frame_size << IXGBE_MHADD_MFS_SHIFT;
779 		IXGBE_WRITE_REG(hw, IXGBE_MHADD, mhadd);
780 	}
781 
782 	/*
783 	 * Enable TX rings
784 	 */
785 	for (i = 0; i < sc->tx_ring_inuse; ++i) {
786 		uint32_t txdctl;
787 
788 		txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(i));
789 		txdctl |= IXGBE_TXDCTL_ENABLE;
790 
791 		/*
792 		 * Set WTHRESH to 0, since TX head write-back is used
793 		 */
794 		txdctl &= ~(0x7f << 16);
795 
796 		/*
797 		 * When the internal queue falls below PTHRESH (32),
798 		 * start prefetching as long as there are at least
799 		 * HTHRESH (1) buffers ready. The values are taken
800 		 * from the Intel linux driver 3.8.21.
801 		 * Prefetching enables tx line rate even with 1 queue.
802 		 */
803 		txdctl |= (32 << 0) | (1 << 8);
804 		IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(i), txdctl);
805 	}
806 
807 	/*
808 	 * Enable RX rings
809 	 */
810 	for (i = 0; i < sc->rx_ring_inuse; ++i) {
811 		uint32_t rxdctl;
812 		int k;
813 
814 		rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(i));
815 		if (hw->mac.type == ixgbe_mac_82598EB) {
816 			/*
817 			 * PTHRESH = 21
818 			 * HTHRESH = 4
819 			 * WTHRESH = 8
820 			 */
821 			rxdctl &= ~0x3FFFFF;
822 			rxdctl |= 0x080420;
823 		}
824 		rxdctl |= IXGBE_RXDCTL_ENABLE;
825 		IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(i), rxdctl);
826 		for (k = 0; k < 10; ++k) {
827 			if (IXGBE_READ_REG(hw, IXGBE_RXDCTL(i)) &
828 			    IXGBE_RXDCTL_ENABLE)
829 				break;
830 			else
831 				msec_delay(1);
832 		}
833 		wmb();
834 		IXGBE_WRITE_REG(hw, IXGBE_RDT(i),
835 		    sc->rx_rings[0].rx_ndesc - 1);
836 	}
837 
838 	/* Set up VLAN support and filter */
839 	ix_set_vlan(sc);
840 
841 	/* Enable Receive engine */
842 	rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
843 	if (hw->mac.type == ixgbe_mac_82598EB)
844 		rxctrl |= IXGBE_RXCTRL_DMBYPS;
845 	rxctrl |= IXGBE_RXCTRL_RXEN;
846 	ixgbe_enable_rx_dma(hw, rxctrl);
847 
848 	for (i = 0; i < sc->tx_ring_inuse; ++i) {
849 		const struct ix_tx_ring *txr = &sc->tx_rings[i];
850 
851 		if (txr->tx_intr_vec >= 0) {
852 			ix_set_ivar(sc, i, txr->tx_intr_vec, 1);
853 		} else {
854 			/*
855 			 * Unconfigured TX interrupt vector could only
856 			 * happen for MSI-X.
857 			 */
858 			KASSERT(sc->intr_type == PCI_INTR_TYPE_MSIX,
859 			    ("TX intr vector is not set"));
860 			KASSERT(i < sc->rx_ring_inuse,
861 			    ("invalid TX ring %d, no piggyback RX ring", i));
862 			KASSERT(sc->rx_rings[i].rx_txr == txr,
863 			    ("RX ring %d piggybacked TX ring mismatch", i));
864 			if (bootverbose)
865 				if_printf(ifp, "IVAR skips TX ring %d\n", i);
866 		}
867 	}
868 	for (i = 0; i < sc->rx_ring_inuse; ++i) {
869 		const struct ix_rx_ring *rxr = &sc->rx_rings[i];
870 
871 		KKASSERT(rxr->rx_intr_vec >= 0);
872 		ix_set_ivar(sc, i, rxr->rx_intr_vec, 0);
873 		if (rxr->rx_txr != NULL) {
874 			/*
875 			 * Piggyback the TX ring interrupt onto the RX
876 			 * ring interrupt vector.
877 			 */
878 			KASSERT(rxr->rx_txr->tx_intr_vec < 0,
879 			    ("piggybacked TX ring configured intr vector"));
880 			KASSERT(rxr->rx_txr->tx_idx == i,
881 			    ("RX ring %d piggybacked TX ring %u",
882 			     i, rxr->rx_txr->tx_idx));
883 			ix_set_ivar(sc, i, rxr->rx_intr_vec, 1);
884 			if (bootverbose) {
885 				if_printf(ifp, "IVAR RX ring %d piggybacks "
886 				    "TX ring %u\n", i, rxr->rx_txr->tx_idx);
887 			}
888 		}
889 	}
890 	if (sc->intr_type == PCI_INTR_TYPE_MSIX) {
891 		/* Set up status MSI-X vector; it is using fixed entry 1 */
892 		ix_set_ivar(sc, 1, sc->sts_msix_vec, -1);
893 
894 		/* Set up auto-mask for TX and RX rings */
895 		if (hw->mac.type == ixgbe_mac_82598EB) {
896 			IXGBE_WRITE_REG(hw, IXGBE_EIAM, IXGBE_EIMS_RTX_QUEUE);
897 		} else {
898 			IXGBE_WRITE_REG(hw, IXGBE_EIAM_EX(0), 0xFFFFFFFF);
899 			IXGBE_WRITE_REG(hw, IXGBE_EIAM_EX(1), 0xFFFFFFFF);
900 		}
901 	} else {
902 		IXGBE_WRITE_REG(hw, IXGBE_EIAM, IXGBE_EIMS_RTX_QUEUE);
903 	}
904 	for (i = 0; i < sc->intr_cnt; ++i)
905 		ix_set_eitr(sc, i, sc->intr_data[i].intr_rate);
906 
907 	/*
908 	 * Check on any SFP devices that need to be kick-started
909 	 */
910 	if (hw->phy.type == ixgbe_phy_none) {
911 		error = hw->phy.ops.identify(hw);
912 		if (error == IXGBE_ERR_SFP_NOT_SUPPORTED) {
913 			if_printf(ifp,
914 			    "Unsupported SFP+ module type was detected.\n");
915 			/* XXX stop */
916 			return;
917 		}
918 	}
919 
920 	/* Config/Enable Link */
921 	ix_config_link(sc);
922 
923 	/*
924 	 * Hardware Packet Buffer & Flow Control setup
925 	 */
926 	frame = sc->max_frame_size;
927 
928 	/* Calculate High Water */
929 	if (hw->mac.type == ixgbe_mac_X540)
930 		tmp = IXGBE_DV_X540(frame, frame);
931 	else
932 		tmp = IXGBE_DV(frame, frame);
933 	size = IXGBE_BT2KB(tmp);
934 	rxpb = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(0)) >> 10;
935 	hw->fc.high_water[0] = rxpb - size;
936 
937 	/* Now calculate Low Water */
938 	if (hw->mac.type == ixgbe_mac_X540)
939 		tmp = IXGBE_LOW_DV_X540(frame);
940 	else
941 		tmp = IXGBE_LOW_DV(frame);
942 	hw->fc.low_water[0] = IXGBE_BT2KB(tmp);
943 
944 	hw->fc.requested_mode = sc->fc;
945 	hw->fc.pause_time = IX_FC_PAUSE;
946 	hw->fc.send_xon = TRUE;
947 
948 	/* Initialize the FC settings */
949 	ixgbe_start_hw(hw);
950 
951 	/* And now turn on interrupts */
952 	ix_enable_intr(sc);
953 
954 	ifp->if_flags |= IFF_RUNNING;
955 	for (i = 0; i < sc->tx_ring_inuse; ++i) {
956 		ifsq_clr_oactive(sc->tx_rings[i].tx_ifsq);
957 		ifsq_watchdog_start(&sc->tx_rings[i].tx_watchdog);
958 	}
959 
960 	ix_set_timer_cpuid(sc, FALSE);
961 	callout_reset_bycpu(&sc->timer, hz, ix_timer, sc, sc->timer_cpuid);
962 }
963 
964 static void
965 ix_intr(void *xsc)
966 {
967 	struct ix_softc *sc = xsc;
968 	struct ixgbe_hw	*hw = &sc->hw;
969 	uint32_t eicr;
970 
971 	ASSERT_SERIALIZED(&sc->main_serialize);
972 
973 	eicr = IXGBE_READ_REG(hw, IXGBE_EICR);
974 	if (eicr == 0) {
975 		IXGBE_WRITE_REG(hw, IXGBE_EIMS, sc->intr_mask);
976 		return;
977 	}
978 
979 	if (eicr & IX_RX0_INTR_MASK) {
980 		struct ix_rx_ring *rxr = &sc->rx_rings[0];
981 
982 		lwkt_serialize_enter(&rxr->rx_serialize);
983 		ix_rxeof(rxr);
984 		lwkt_serialize_exit(&rxr->rx_serialize);
985 	}
986 	if (eicr & IX_RX1_INTR_MASK) {
987 		struct ix_rx_ring *rxr;
988 
989 		KKASSERT(sc->rx_ring_inuse == IX_MIN_RXRING_RSS);
990 		rxr = &sc->rx_rings[1];
991 
992 		lwkt_serialize_enter(&rxr->rx_serialize);
993 		ix_rxeof(rxr);
994 		lwkt_serialize_exit(&rxr->rx_serialize);
995 	}
996 
997 	if (eicr & IX_TX_INTR_MASK) {
998 		struct ix_tx_ring *txr = &sc->tx_rings[0];
999 
1000 		lwkt_serialize_enter(&txr->tx_serialize);
1001 		ix_txeof(txr, *(txr->tx_hdr));
1002 		if (!ifsq_is_empty(txr->tx_ifsq))
1003 			ifsq_devstart(txr->tx_ifsq);
1004 		lwkt_serialize_exit(&txr->tx_serialize);
1005 	}
1006 
1007 	if (__predict_false(eicr & IX_EICR_STATUS))
1008 		ix_intr_status(sc, eicr);
1009 
1010 	IXGBE_WRITE_REG(hw, IXGBE_EIMS, sc->intr_mask);
1011 }
1012 
1013 static void
1014 ix_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1015 {
1016 	struct ix_softc *sc = ifp->if_softc;
1017 
1018 	ix_update_link_status(sc);
1019 
1020 	ifmr->ifm_status = IFM_AVALID;
1021 	ifmr->ifm_active = IFM_ETHER;
1022 
1023 	if (!sc->link_active)
1024 		return;
1025 
1026 	ifmr->ifm_status |= IFM_ACTIVE;
1027 
1028 	switch (sc->link_speed) {
1029 	case IXGBE_LINK_SPEED_100_FULL:
1030 		ifmr->ifm_active |= IFM_100_TX | IFM_FDX;
1031 		break;
1032 	case IXGBE_LINK_SPEED_1GB_FULL:
1033 		ifmr->ifm_active |= IFM_1000_SX | IFM_FDX;
1034 		break;
1035 	case IXGBE_LINK_SPEED_10GB_FULL:
1036 		ifmr->ifm_active |= sc->optics | IFM_FDX;
1037 		break;
1038 	}
1039 }
1040 
1041 static int
1042 ix_media_change(struct ifnet *ifp)
1043 {
1044 	struct ix_softc *sc = ifp->if_softc;
1045 	struct ifmedia *ifm = &sc->media;
1046 
1047 	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1048 		return EINVAL;
1049 
1050 	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1051 	case IFM_AUTO:
1052 		sc->hw.phy.autoneg_advertised =
1053 		    IXGBE_LINK_SPEED_100_FULL |
1054 		    IXGBE_LINK_SPEED_1GB_FULL |
1055 		    IXGBE_LINK_SPEED_10GB_FULL;
1056 		break;
1057 	default:
1058 		if_printf(ifp, "Only auto media type\n");
1059 		return EINVAL;
1060 	}
1061 	return 0;
1062 }
1063 
1064 static __inline int
1065 ix_tso_pullup(struct mbuf **mp)
1066 {
1067 	int hoff, iphlen, thoff;
1068 	struct mbuf *m;
1069 
1070 	m = *mp;
1071 	KASSERT(M_WRITABLE(m), ("TSO mbuf not writable"));
1072 
1073 	iphlen = m->m_pkthdr.csum_iphlen;
1074 	thoff = m->m_pkthdr.csum_thlen;
1075 	hoff = m->m_pkthdr.csum_lhlen;
1076 
1077 	KASSERT(iphlen > 0, ("invalid ip hlen"));
1078 	KASSERT(thoff > 0, ("invalid tcp hlen"));
1079 	KASSERT(hoff > 0, ("invalid ether hlen"));
1080 
1081 	if (__predict_false(m->m_len < hoff + iphlen + thoff)) {
1082 		m = m_pullup(m, hoff + iphlen + thoff);
1083 		if (m == NULL) {
1084 			*mp = NULL;
1085 			return ENOBUFS;
1086 		}
1087 		*mp = m;
1088 	}
1089 	return 0;
1090 }
1091 
1092 static int
1093 ix_encap(struct ix_tx_ring *txr, struct mbuf **m_headp,
1094     uint16_t *segs_used, int *idx)
1095 {
1096 	uint32_t olinfo_status = 0, cmd_type_len, cmd_rs = 0;
1097 	int i, j, error, nsegs, first, maxsegs;
1098 	struct mbuf *m_head = *m_headp;
1099 	bus_dma_segment_t segs[IX_MAX_SCATTER];
1100 	bus_dmamap_t map;
1101 	struct ix_tx_buf *txbuf;
1102 	union ixgbe_adv_tx_desc *txd = NULL;
1103 
1104 	if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
1105 		error = ix_tso_pullup(m_headp);
1106 		if (__predict_false(error))
1107 			return error;
1108 		m_head = *m_headp;
1109 	}
1110 
1111 	/* Basic descriptor defines */
1112 	cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA |
1113 	    IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT);
1114 
1115 	if (m_head->m_flags & M_VLANTAG)
1116 		cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE;
1117 
1118 	/*
1119 	 * Important to capture the first descriptor
1120 	 * used because it will contain the index of
1121 	 * the one we tell the hardware to report back
1122 	 */
1123 	first = txr->tx_next_avail;
1124 	txbuf = &txr->tx_buf[first];
1125 	map = txbuf->map;
1126 
1127 	/*
1128 	 * Map the packet for DMA.
1129 	 */
1130 	maxsegs = txr->tx_avail - IX_TX_RESERVED;
1131 	if (maxsegs > IX_MAX_SCATTER)
1132 		maxsegs = IX_MAX_SCATTER;
1133 
1134 	error = bus_dmamap_load_mbuf_defrag(txr->tx_tag, map, m_headp,
1135 	    segs, maxsegs, &nsegs, BUS_DMA_NOWAIT);
1136 	if (__predict_false(error)) {
1137 		m_freem(*m_headp);
1138 		*m_headp = NULL;
1139 		return error;
1140 	}
1141 	bus_dmamap_sync(txr->tx_tag, map, BUS_DMASYNC_PREWRITE);
1142 
1143 	m_head = *m_headp;
1144 
1145 	/*
1146 	 * Set up the appropriate offload context if requested,
1147 	 * this may consume one TX descriptor.
1148 	 */
1149 	if (ix_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status)) {
1150 		(*segs_used)++;
1151 		txr->tx_nsegs++;
1152 	}
1153 
1154 	*segs_used += nsegs;
1155 	txr->tx_nsegs += nsegs;
1156 	if (txr->tx_nsegs >= txr->tx_intr_nsegs) {
1157 		/*
1158 		 * Report Status (RS) is turned on every intr_nsegs
1159 		 * descriptors (roughly).
1160 		 */
1161 		txr->tx_nsegs = 0;
1162 		cmd_rs = IXGBE_TXD_CMD_RS;
1163 	}
1164 
1165 	i = txr->tx_next_avail;
1166 	for (j = 0; j < nsegs; j++) {
1167 		bus_size_t seglen;
1168 		bus_addr_t segaddr;
1169 
1170 		txbuf = &txr->tx_buf[i];
1171 		txd = &txr->tx_base[i];
1172 		seglen = segs[j].ds_len;
1173 		segaddr = htole64(segs[j].ds_addr);
1174 
1175 		txd->read.buffer_addr = segaddr;
1176 		txd->read.cmd_type_len = htole32(IXGBE_TXD_CMD_IFCS |
1177 		    cmd_type_len |seglen);
1178 		txd->read.olinfo_status = htole32(olinfo_status);
1179 
1180 		if (++i == txr->tx_ndesc)
1181 			i = 0;
1182 	}
1183 	txd->read.cmd_type_len |= htole32(IXGBE_TXD_CMD_EOP | cmd_rs);
1184 
1185 	txr->tx_avail -= nsegs;
1186 	txr->tx_next_avail = i;
1187 
1188 	txbuf->m_head = m_head;
1189 	txr->tx_buf[first].map = txbuf->map;
1190 	txbuf->map = map;
1191 
1192 	/*
1193 	 * Defer TDT updating, until enough descrptors are setup
1194 	 */
1195 	*idx = i;
1196 
1197 	return 0;
1198 }
1199 
1200 static void
1201 ix_set_promisc(struct ix_softc *sc)
1202 {
1203 	struct ifnet *ifp = &sc->arpcom.ac_if;
1204 	uint32_t reg_rctl;
1205 	int mcnt = 0;
1206 
1207 	reg_rctl = IXGBE_READ_REG(&sc->hw, IXGBE_FCTRL);
1208 	reg_rctl &= ~IXGBE_FCTRL_UPE;
1209 	if (ifp->if_flags & IFF_ALLMULTI) {
1210 		mcnt = IX_MAX_MCASTADDR;
1211 	} else {
1212 		struct ifmultiaddr *ifma;
1213 
1214 		TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1215 			if (ifma->ifma_addr->sa_family != AF_LINK)
1216 				continue;
1217 			if (mcnt == IX_MAX_MCASTADDR)
1218 				break;
1219 			mcnt++;
1220 		}
1221 	}
1222 	if (mcnt < IX_MAX_MCASTADDR)
1223 		reg_rctl &= ~IXGBE_FCTRL_MPE;
1224 	IXGBE_WRITE_REG(&sc->hw, IXGBE_FCTRL, reg_rctl);
1225 
1226 	if (ifp->if_flags & IFF_PROMISC) {
1227 		reg_rctl |= IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE;
1228 		IXGBE_WRITE_REG(&sc->hw, IXGBE_FCTRL, reg_rctl);
1229 	} else if (ifp->if_flags & IFF_ALLMULTI) {
1230 		reg_rctl |= IXGBE_FCTRL_MPE;
1231 		reg_rctl &= ~IXGBE_FCTRL_UPE;
1232 		IXGBE_WRITE_REG(&sc->hw, IXGBE_FCTRL, reg_rctl);
1233 	}
1234 }
1235 
1236 static void
1237 ix_set_multi(struct ix_softc *sc)
1238 {
1239 	struct ifnet *ifp = &sc->arpcom.ac_if;
1240 	struct ifmultiaddr *ifma;
1241 	uint32_t fctrl;
1242 	uint8_t	*mta;
1243 	int mcnt = 0;
1244 
1245 	mta = sc->mta;
1246 	bzero(mta, IXGBE_ETH_LENGTH_OF_ADDRESS * IX_MAX_MCASTADDR);
1247 
1248 	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1249 		if (ifma->ifma_addr->sa_family != AF_LINK)
1250 			continue;
1251 		if (mcnt == IX_MAX_MCASTADDR)
1252 			break;
1253 		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
1254 		    &mta[mcnt * IXGBE_ETH_LENGTH_OF_ADDRESS],
1255 		    IXGBE_ETH_LENGTH_OF_ADDRESS);
1256 		mcnt++;
1257 	}
1258 
1259 	fctrl = IXGBE_READ_REG(&sc->hw, IXGBE_FCTRL);
1260 	fctrl |= (IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
1261 	if (ifp->if_flags & IFF_PROMISC) {
1262 		fctrl |= IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE;
1263 	} else if (mcnt >= IX_MAX_MCASTADDR || (ifp->if_flags & IFF_ALLMULTI)) {
1264 		fctrl |= IXGBE_FCTRL_MPE;
1265 		fctrl &= ~IXGBE_FCTRL_UPE;
1266 	} else {
1267 		fctrl &= ~(IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
1268 	}
1269 	IXGBE_WRITE_REG(&sc->hw, IXGBE_FCTRL, fctrl);
1270 
1271 	if (mcnt < IX_MAX_MCASTADDR) {
1272 		ixgbe_update_mc_addr_list(&sc->hw,
1273 		    mta, mcnt, ix_mc_array_itr, TRUE);
1274 	}
1275 }
1276 
1277 /*
1278  * This is an iterator function now needed by the multicast
1279  * shared code. It simply feeds the shared code routine the
1280  * addresses in the array of ix_set_multi() one by one.
1281  */
1282 static uint8_t *
1283 ix_mc_array_itr(struct ixgbe_hw *hw, uint8_t **update_ptr, uint32_t *vmdq)
1284 {
1285 	uint8_t *addr = *update_ptr;
1286 	uint8_t *newptr;
1287 	*vmdq = 0;
1288 
1289 	newptr = addr + IXGBE_ETH_LENGTH_OF_ADDRESS;
1290 	*update_ptr = newptr;
1291 	return addr;
1292 }
1293 
1294 static void
1295 ix_timer(void *arg)
1296 {
1297 	struct ix_softc *sc = arg;
1298 
1299 	lwkt_serialize_enter(&sc->main_serialize);
1300 
1301 	if ((sc->arpcom.ac_if.if_flags & IFF_RUNNING) == 0) {
1302 		lwkt_serialize_exit(&sc->main_serialize);
1303 		return;
1304 	}
1305 
1306 	/* Check for pluggable optics */
1307 	if (sc->sfp_probe) {
1308 		if (!ix_sfp_probe(sc))
1309 			goto done; /* Nothing to do */
1310 	}
1311 
1312 	ix_update_link_status(sc);
1313 	ix_update_stats(sc);
1314 
1315 done:
1316 	callout_reset_bycpu(&sc->timer, hz, ix_timer, sc, sc->timer_cpuid);
1317 	lwkt_serialize_exit(&sc->main_serialize);
1318 }
1319 
1320 static void
1321 ix_update_link_status(struct ix_softc *sc)
1322 {
1323 	struct ifnet *ifp = &sc->arpcom.ac_if;
1324 
1325 	if (sc->link_up) {
1326 		if (sc->link_active == FALSE) {
1327 			if (bootverbose) {
1328 				if_printf(ifp, "Link is up %d Gbps %s\n",
1329 				    sc->link_speed == 128 ? 10 : 1,
1330 				    "Full Duplex");
1331 			}
1332 			sc->link_active = TRUE;
1333 
1334 			/* Update any Flow Control changes */
1335 			ixgbe_fc_enable(&sc->hw);
1336 
1337 			ifp->if_link_state = LINK_STATE_UP;
1338 			if_link_state_change(ifp);
1339 		}
1340 	} else { /* Link down */
1341 		if (sc->link_active == TRUE) {
1342 			if (bootverbose)
1343 				if_printf(ifp, "Link is Down\n");
1344 			ifp->if_link_state = LINK_STATE_DOWN;
1345 			if_link_state_change(ifp);
1346 
1347 			sc->link_active = FALSE;
1348 		}
1349 	}
1350 }
1351 
1352 static void
1353 ix_stop(struct ix_softc *sc)
1354 {
1355 	struct ixgbe_hw *hw = &sc->hw;
1356 	struct ifnet *ifp = &sc->arpcom.ac_if;
1357 	int i;
1358 
1359 	ASSERT_IFNET_SERIALIZED_ALL(ifp);
1360 
1361 	ix_disable_intr(sc);
1362 	callout_stop(&sc->timer);
1363 
1364 	ifp->if_flags &= ~IFF_RUNNING;
1365 	for (i = 0; i < sc->tx_ring_cnt; ++i) {
1366 		ifsq_clr_oactive(sc->tx_rings[i].tx_ifsq);
1367 		ifsq_watchdog_stop(&sc->tx_rings[i].tx_watchdog);
1368 	}
1369 
1370 	ixgbe_reset_hw(hw);
1371 	hw->adapter_stopped = FALSE;
1372 	ixgbe_stop_adapter(hw);
1373 	if (hw->mac.type == ixgbe_mac_82599EB)
1374 		ixgbe_stop_mac_link_on_d3_82599(hw);
1375 	/* Turn off the laser - noop with no optics */
1376 	ixgbe_disable_tx_laser(hw);
1377 
1378 	/* Update the stack */
1379 	sc->link_up = FALSE;
1380 	ix_update_link_status(sc);
1381 
1382 	/* Reprogram the RAR[0] in case user changed it. */
1383 	ixgbe_set_rar(hw, 0, hw->mac.addr, 0, IXGBE_RAH_AV);
1384 
1385 	for (i = 0; i < sc->tx_ring_cnt; ++i)
1386 		ix_free_tx_ring(&sc->tx_rings[i]);
1387 
1388 	for (i = 0; i < sc->rx_ring_cnt; ++i)
1389 		ix_free_rx_ring(&sc->rx_rings[i]);
1390 }
1391 
1392 static void
1393 ix_setup_optics(struct ix_softc *sc)
1394 {
1395 	struct ixgbe_hw *hw = &sc->hw;
1396 	int layer;
1397 
1398 	layer = ixgbe_get_supported_physical_layer(hw);
1399 
1400 	if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_T) {
1401 		sc->optics = IFM_10G_T;
1402 		return;
1403 	}
1404 
1405 	if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_T) {
1406 		sc->optics = IFM_1000_T;
1407 		return;
1408 	}
1409 
1410 	if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_SX) {
1411 		sc->optics = IFM_1000_SX;
1412 		return;
1413 	}
1414 
1415 	if (layer & (IXGBE_PHYSICAL_LAYER_10GBASE_LR |
1416 	    IXGBE_PHYSICAL_LAYER_10GBASE_LRM)) {
1417 		sc->optics = IFM_10G_LR;
1418 		return;
1419 	}
1420 
1421 	if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_SR) {
1422 		sc->optics = IFM_10G_SR;
1423 		return;
1424 	}
1425 
1426 	if (layer & IXGBE_PHYSICAL_LAYER_SFP_PLUS_CU) {
1427 		sc->optics = IFM_10G_TWINAX;
1428 		return;
1429 	}
1430 
1431 	if (layer & (IXGBE_PHYSICAL_LAYER_10GBASE_KX4 |
1432 	    IXGBE_PHYSICAL_LAYER_10GBASE_CX4)) {
1433 		sc->optics = IFM_10G_CX4;
1434 		return;
1435 	}
1436 
1437 	/* If we get here just set the default */
1438 	sc->optics = IFM_ETHER | IFM_AUTO;
1439 }
1440 
1441 static void
1442 ix_setup_ifp(struct ix_softc *sc)
1443 {
1444 	struct ixgbe_hw *hw = &sc->hw;
1445 	struct ifnet *ifp = &sc->arpcom.ac_if;
1446 	int i;
1447 
1448 	ifp->if_baudrate = IF_Gbps(10UL);
1449 
1450 	ifp->if_softc = sc;
1451 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
1452 	ifp->if_init = ix_init;
1453 	ifp->if_ioctl = ix_ioctl;
1454 	ifp->if_start = ix_start;
1455 	ifp->if_serialize = ix_serialize;
1456 	ifp->if_deserialize = ix_deserialize;
1457 	ifp->if_tryserialize = ix_tryserialize;
1458 #ifdef INVARIANTS
1459 	ifp->if_serialize_assert = ix_serialize_assert;
1460 #endif
1461 
1462 	/* Increase TSO burst length */
1463 	ifp->if_tsolen = (8 * ETHERMTU);
1464 
1465 	ifq_set_maxlen(&ifp->if_snd, sc->tx_rings[0].tx_ndesc - 2);
1466 	ifq_set_ready(&ifp->if_snd);
1467 	ifq_set_subq_cnt(&ifp->if_snd, sc->tx_ring_cnt);
1468 
1469 	ifp->if_mapsubq = ifq_mapsubq_mask;
1470 	ifq_set_subq_mask(&ifp->if_snd, 0);
1471 
1472 	ether_ifattach(ifp, hw->mac.addr, NULL);
1473 
1474 	ifp->if_capabilities =
1475 	    IFCAP_HWCSUM | IFCAP_TSO | IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
1476 	if (IX_ENABLE_HWRSS(sc))
1477 		ifp->if_capabilities |= IFCAP_RSS;
1478 	ifp->if_capenable = ifp->if_capabilities;
1479 	ifp->if_hwassist = CSUM_OFFLOAD | CSUM_TSO;
1480 
1481 	/*
1482 	 * Tell the upper layer(s) we support long frames.
1483 	 */
1484 	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
1485 
1486 	/* Setup TX rings and subqueues */
1487 	for (i = 0; i < sc->tx_ring_cnt; ++i) {
1488 		struct ifaltq_subque *ifsq = ifq_get_subq(&ifp->if_snd, i);
1489 		struct ix_tx_ring *txr = &sc->tx_rings[i];
1490 
1491 		ifsq_set_cpuid(ifsq, txr->tx_intr_cpuid);
1492 		ifsq_set_priv(ifsq, txr);
1493 		ifsq_set_hw_serialize(ifsq, &txr->tx_serialize);
1494 		txr->tx_ifsq = ifsq;
1495 
1496 		ifsq_watchdog_init(&txr->tx_watchdog, ifsq, ix_watchdog);
1497 	}
1498 
1499 	/*
1500 	 * Specify the media types supported by this adapter and register
1501 	 * callbacks to update media and link information
1502 	 */
1503 	ifmedia_add(&sc->media, IFM_ETHER | sc->optics, 0, NULL);
1504 	ifmedia_set(&sc->media, IFM_ETHER | sc->optics);
1505 	if (hw->device_id == IXGBE_DEV_ID_82598AT) {
1506 		ifmedia_add(&sc->media,
1507 		    IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
1508 		ifmedia_add(&sc->media, IFM_ETHER | IFM_1000_T, 0, NULL);
1509 	}
1510 	ifmedia_add(&sc->media, IFM_ETHER | IFM_AUTO, 0, NULL);
1511 	ifmedia_set(&sc->media, IFM_ETHER | IFM_AUTO);
1512 }
1513 
1514 static boolean_t
1515 ix_is_sfp(const struct ixgbe_hw *hw)
1516 {
1517 	switch (hw->phy.type) {
1518 	case ixgbe_phy_sfp_avago:
1519 	case ixgbe_phy_sfp_ftl:
1520 	case ixgbe_phy_sfp_intel:
1521 	case ixgbe_phy_sfp_unknown:
1522 	case ixgbe_phy_sfp_passive_tyco:
1523 	case ixgbe_phy_sfp_passive_unknown:
1524 		return TRUE;
1525 	default:
1526 		return FALSE;
1527 	}
1528 }
1529 
1530 static void
1531 ix_config_link(struct ix_softc *sc)
1532 {
1533 	struct ixgbe_hw *hw = &sc->hw;
1534 	boolean_t sfp;
1535 
1536 	sfp = ix_is_sfp(hw);
1537 	if (sfp) {
1538 		if (hw->phy.multispeed_fiber) {
1539 			hw->mac.ops.setup_sfp(hw);
1540 			ixgbe_enable_tx_laser(hw);
1541 			ix_handle_msf(sc);
1542 		} else {
1543 			ix_handle_mod(sc);
1544 		}
1545 	} else {
1546 		uint32_t autoneg, err = 0;
1547 
1548 		if (hw->mac.ops.check_link != NULL) {
1549 			err = ixgbe_check_link(hw, &sc->link_speed,
1550 			    &sc->link_up, FALSE);
1551 			if (err)
1552 				return;
1553 		}
1554 
1555 		autoneg = hw->phy.autoneg_advertised;
1556 		if (!autoneg && hw->mac.ops.get_link_capabilities != NULL) {
1557 			bool negotiate;
1558 
1559 			err = hw->mac.ops.get_link_capabilities(hw,
1560 			    &autoneg, &negotiate);
1561 			if (err)
1562 				return;
1563 		}
1564 
1565 		if (hw->mac.ops.setup_link != NULL) {
1566 			err = hw->mac.ops.setup_link(hw,
1567 			    autoneg, sc->link_up);
1568 			if (err)
1569 				return;
1570 		}
1571 	}
1572 }
1573 
1574 static int
1575 ix_alloc_rings(struct ix_softc *sc)
1576 {
1577 	int error, i;
1578 
1579 	/*
1580 	 * Create top level busdma tag
1581 	 */
1582 	error = bus_dma_tag_create(NULL, 1, 0,
1583 	    BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL,
1584 	    BUS_SPACE_MAXSIZE_32BIT, 0, BUS_SPACE_MAXSIZE_32BIT, 0,
1585 	    &sc->parent_tag);
1586 	if (error) {
1587 		device_printf(sc->dev, "could not create top level DMA tag\n");
1588 		return error;
1589 	}
1590 
1591 	/*
1592 	 * Allocate TX descriptor rings and buffers
1593 	 */
1594 	sc->tx_rings = kmalloc_cachealign(
1595 	    sizeof(struct ix_tx_ring) * sc->tx_ring_cnt,
1596 	    M_DEVBUF, M_WAITOK | M_ZERO);
1597 	for (i = 0; i < sc->tx_ring_cnt; ++i) {
1598 		struct ix_tx_ring *txr = &sc->tx_rings[i];
1599 
1600 		txr->tx_sc = sc;
1601 		txr->tx_idx = i;
1602 		txr->tx_intr_vec = -1;
1603 		lwkt_serialize_init(&txr->tx_serialize);
1604 
1605 		error = ix_create_tx_ring(txr);
1606 		if (error)
1607 			return error;
1608 	}
1609 
1610 	/*
1611 	 * Allocate RX descriptor rings and buffers
1612 	 */
1613 	sc->rx_rings = kmalloc_cachealign(
1614 	    sizeof(struct ix_rx_ring) * sc->rx_ring_cnt,
1615 	    M_DEVBUF, M_WAITOK | M_ZERO);
1616 	for (i = 0; i < sc->rx_ring_cnt; ++i) {
1617 		struct ix_rx_ring *rxr = &sc->rx_rings[i];
1618 
1619 		rxr->rx_sc = sc;
1620 		rxr->rx_idx = i;
1621 		rxr->rx_intr_vec = -1;
1622 		lwkt_serialize_init(&rxr->rx_serialize);
1623 
1624 		error = ix_create_rx_ring(rxr);
1625 		if (error)
1626 			return error;
1627 	}
1628 
1629 	return 0;
1630 }
1631 
1632 static int
1633 ix_create_tx_ring(struct ix_tx_ring *txr)
1634 {
1635 	int error, i, tsize, ntxd;
1636 
1637 	/*
1638 	 * Validate number of transmit descriptors.  It must not exceed
1639 	 * hardware maximum, and must be multiple of IX_DBA_ALIGN.
1640 	 */
1641 	ntxd = device_getenv_int(txr->tx_sc->dev, "txd", ix_txd);
1642 	if (((ntxd * sizeof(union ixgbe_adv_tx_desc)) % IX_DBA_ALIGN) != 0 ||
1643 	    ntxd < IX_MIN_TXD || ntxd > IX_MAX_TXD) {
1644 		device_printf(txr->tx_sc->dev,
1645 		    "Using %d TX descriptors instead of %d!\n",
1646 		    IX_DEF_TXD, ntxd);
1647 		txr->tx_ndesc = IX_DEF_TXD;
1648 	} else {
1649 		txr->tx_ndesc = ntxd;
1650 	}
1651 
1652 	/*
1653 	 * Allocate TX head write-back buffer
1654 	 */
1655 	txr->tx_hdr = bus_dmamem_coherent_any(txr->tx_sc->parent_tag,
1656 	    __VM_CACHELINE_SIZE, __VM_CACHELINE_SIZE, BUS_DMA_WAITOK,
1657 	    &txr->tx_hdr_dtag, &txr->tx_hdr_map, &txr->tx_hdr_paddr);
1658 	if (txr->tx_hdr == NULL) {
1659 		device_printf(txr->tx_sc->dev,
1660 		    "Unable to allocate TX head write-back buffer\n");
1661 		return ENOMEM;
1662 	}
1663 
1664 	/*
1665 	 * Allocate TX descriptor ring
1666 	 */
1667 	tsize = roundup2(txr->tx_ndesc * sizeof(union ixgbe_adv_tx_desc),
1668 	    IX_DBA_ALIGN);
1669 	txr->tx_base = bus_dmamem_coherent_any(txr->tx_sc->parent_tag,
1670 	    IX_DBA_ALIGN, tsize, BUS_DMA_WAITOK | BUS_DMA_ZERO,
1671 	    &txr->tx_base_dtag, &txr->tx_base_map, &txr->tx_base_paddr);
1672 	if (txr->tx_base == NULL) {
1673 		device_printf(txr->tx_sc->dev,
1674 		    "Unable to allocate TX Descriptor memory\n");
1675 		return ENOMEM;
1676 	}
1677 
1678 	tsize = __VM_CACHELINE_ALIGN(sizeof(struct ix_tx_buf) * txr->tx_ndesc);
1679 	txr->tx_buf = kmalloc_cachealign(tsize, M_DEVBUF, M_WAITOK | M_ZERO);
1680 
1681 	/*
1682 	 * Create DMA tag for TX buffers
1683 	 */
1684 	error = bus_dma_tag_create(txr->tx_sc->parent_tag,
1685 	    1, 0,		/* alignment, bounds */
1686 	    BUS_SPACE_MAXADDR,	/* lowaddr */
1687 	    BUS_SPACE_MAXADDR,	/* highaddr */
1688 	    NULL, NULL,		/* filter, filterarg */
1689 	    IX_TSO_SIZE,	/* maxsize */
1690 	    IX_MAX_SCATTER,	/* nsegments */
1691 	    PAGE_SIZE,		/* maxsegsize */
1692 	    BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW |
1693 	    BUS_DMA_ONEBPAGE,	/* flags */
1694 	    &txr->tx_tag);
1695 	if (error) {
1696 		device_printf(txr->tx_sc->dev,
1697 		    "Unable to allocate TX DMA tag\n");
1698 		kfree(txr->tx_buf, M_DEVBUF);
1699 		txr->tx_buf = NULL;
1700 		return error;
1701 	}
1702 
1703 	/*
1704 	 * Create DMA maps for TX buffers
1705 	 */
1706 	for (i = 0; i < txr->tx_ndesc; ++i) {
1707 		struct ix_tx_buf *txbuf = &txr->tx_buf[i];
1708 
1709 		error = bus_dmamap_create(txr->tx_tag,
1710 		    BUS_DMA_WAITOK | BUS_DMA_ONEBPAGE, &txbuf->map);
1711 		if (error) {
1712 			device_printf(txr->tx_sc->dev,
1713 			    "Unable to create TX DMA map\n");
1714 			ix_destroy_tx_ring(txr, i);
1715 			return error;
1716 		}
1717 	}
1718 
1719 	/*
1720 	 * Initialize various watermark
1721 	 */
1722 	txr->tx_wreg_nsegs = IX_DEF_TXWREG_NSEGS;
1723 	txr->tx_intr_nsegs = txr->tx_ndesc / 16;
1724 
1725 	return 0;
1726 }
1727 
1728 static void
1729 ix_destroy_tx_ring(struct ix_tx_ring *txr, int ndesc)
1730 {
1731 	int i;
1732 
1733 	if (txr->tx_hdr != NULL) {
1734 		bus_dmamap_unload(txr->tx_hdr_dtag, txr->tx_hdr_map);
1735 		bus_dmamem_free(txr->tx_hdr_dtag,
1736 		    __DEVOLATILE(void *, txr->tx_hdr), txr->tx_hdr_map);
1737 		bus_dma_tag_destroy(txr->tx_hdr_dtag);
1738 		txr->tx_hdr = NULL;
1739 	}
1740 
1741 	if (txr->tx_base != NULL) {
1742 		bus_dmamap_unload(txr->tx_base_dtag, txr->tx_base_map);
1743 		bus_dmamem_free(txr->tx_base_dtag, txr->tx_base,
1744 		    txr->tx_base_map);
1745 		bus_dma_tag_destroy(txr->tx_base_dtag);
1746 		txr->tx_base = NULL;
1747 	}
1748 
1749 	if (txr->tx_buf == NULL)
1750 		return;
1751 
1752 	for (i = 0; i < ndesc; ++i) {
1753 		struct ix_tx_buf *txbuf = &txr->tx_buf[i];
1754 
1755 		KKASSERT(txbuf->m_head == NULL);
1756 		bus_dmamap_destroy(txr->tx_tag, txbuf->map);
1757 	}
1758 	bus_dma_tag_destroy(txr->tx_tag);
1759 
1760 	kfree(txr->tx_buf, M_DEVBUF);
1761 	txr->tx_buf = NULL;
1762 }
1763 
1764 static void
1765 ix_init_tx_ring(struct ix_tx_ring *txr)
1766 {
1767 	/* Clear the old ring contents */
1768 	bzero(txr->tx_base, sizeof(union ixgbe_adv_tx_desc) * txr->tx_ndesc);
1769 
1770 	/* Clear TX head write-back buffer */
1771 	*(txr->tx_hdr) = 0;
1772 
1773 	/* Reset indices */
1774 	txr->tx_next_avail = 0;
1775 	txr->tx_next_clean = 0;
1776 	txr->tx_nsegs = 0;
1777 
1778 	/* Set number of descriptors available */
1779 	txr->tx_avail = txr->tx_ndesc;
1780 }
1781 
1782 static void
1783 ix_init_tx_unit(struct ix_softc *sc)
1784 {
1785 	struct ixgbe_hw	*hw = &sc->hw;
1786 	int i;
1787 
1788 	/*
1789 	 * Setup the Base and Length of the Tx Descriptor Ring
1790 	 */
1791 	for (i = 0; i < sc->tx_ring_inuse; ++i) {
1792 		struct ix_tx_ring *txr = &sc->tx_rings[i];
1793 		uint64_t tdba = txr->tx_base_paddr;
1794 		uint64_t hdr_paddr = txr->tx_hdr_paddr;
1795 		uint32_t txctrl;
1796 
1797 		IXGBE_WRITE_REG(hw, IXGBE_TDBAL(i), (uint32_t)tdba);
1798 		IXGBE_WRITE_REG(hw, IXGBE_TDBAH(i), (uint32_t)(tdba >> 32));
1799 		IXGBE_WRITE_REG(hw, IXGBE_TDLEN(i),
1800 		    txr->tx_ndesc * sizeof(union ixgbe_adv_tx_desc));
1801 
1802 		/* Setup the HW Tx Head and Tail descriptor pointers */
1803 		IXGBE_WRITE_REG(hw, IXGBE_TDH(i), 0);
1804 		IXGBE_WRITE_REG(hw, IXGBE_TDT(i), 0);
1805 
1806 		/* Disable TX head write-back relax ordering */
1807 		switch (hw->mac.type) {
1808 		case ixgbe_mac_82598EB:
1809 			txctrl = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL(i));
1810 			break;
1811 		case ixgbe_mac_82599EB:
1812 		case ixgbe_mac_X540:
1813 		default:
1814 			txctrl = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL_82599(i));
1815 			break;
1816 		}
1817 		txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
1818 		switch (hw->mac.type) {
1819 		case ixgbe_mac_82598EB:
1820 			IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL(i), txctrl);
1821 			break;
1822 		case ixgbe_mac_82599EB:
1823 		case ixgbe_mac_X540:
1824 		default:
1825 			IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL_82599(i), txctrl);
1826 			break;
1827 		}
1828 
1829 		/* Enable TX head write-back */
1830 		IXGBE_WRITE_REG(hw, IXGBE_TDWBAH(i),
1831 		    (uint32_t)(hdr_paddr >> 32));
1832 		IXGBE_WRITE_REG(hw, IXGBE_TDWBAL(i),
1833 		    ((uint32_t)hdr_paddr) | IXGBE_TDWBAL_HEAD_WB_ENABLE);
1834 	}
1835 
1836 	if (hw->mac.type != ixgbe_mac_82598EB) {
1837 		uint32_t dmatxctl, rttdcs;
1838 
1839 		dmatxctl = IXGBE_READ_REG(hw, IXGBE_DMATXCTL);
1840 		dmatxctl |= IXGBE_DMATXCTL_TE;
1841 		IXGBE_WRITE_REG(hw, IXGBE_DMATXCTL, dmatxctl);
1842 
1843 		/* Disable arbiter to set MTQC */
1844 		rttdcs = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
1845 		rttdcs |= IXGBE_RTTDCS_ARBDIS;
1846 		IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
1847 
1848 		IXGBE_WRITE_REG(hw, IXGBE_MTQC, IXGBE_MTQC_64Q_1PB);
1849 
1850 		/* Reenable aribter */
1851 		rttdcs &= ~IXGBE_RTTDCS_ARBDIS;
1852 		IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
1853 	}
1854 }
1855 
1856 static int
1857 ix_tx_ctx_setup(struct ix_tx_ring *txr, const struct mbuf *mp,
1858     uint32_t *cmd_type_len, uint32_t *olinfo_status)
1859 {
1860 	struct ixgbe_adv_tx_context_desc *TXD;
1861 	uint32_t vlan_macip_lens = 0, type_tucmd_mlhl = 0;
1862 	int ehdrlen, ip_hlen = 0, ctxd;
1863 	boolean_t offload = TRUE;
1864 
1865 	/* First check if TSO is to be used */
1866 	if (mp->m_pkthdr.csum_flags & CSUM_TSO) {
1867 		return ix_tso_ctx_setup(txr, mp,
1868 		    cmd_type_len, olinfo_status);
1869 	}
1870 
1871 	if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
1872 		offload = FALSE;
1873 
1874 	/* Indicate the whole packet as payload when not doing TSO */
1875 	*olinfo_status |= mp->m_pkthdr.len << IXGBE_ADVTXD_PAYLEN_SHIFT;
1876 
1877 	/*
1878 	 * In advanced descriptors the vlan tag must be placed into the
1879 	 * context descriptor.  Hence we need to make one even if not
1880 	 * doing checksum offloads.
1881 	 */
1882 	if (mp->m_flags & M_VLANTAG) {
1883 		vlan_macip_lens |= htole16(mp->m_pkthdr.ether_vlantag) <<
1884 		    IXGBE_ADVTXD_VLAN_SHIFT;
1885 	} else if (!offload) {
1886 		/* No TX descriptor is consumed */
1887 		return 0;
1888 	}
1889 
1890 	/* Set the ether header length */
1891 	ehdrlen = mp->m_pkthdr.csum_lhlen;
1892 	KASSERT(ehdrlen > 0, ("invalid ether hlen"));
1893 	vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
1894 
1895 	if (mp->m_pkthdr.csum_flags & CSUM_IP) {
1896 		*olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
1897 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
1898 		ip_hlen = mp->m_pkthdr.csum_iphlen;
1899 		KASSERT(ip_hlen > 0, ("invalid ip hlen"));
1900 	}
1901 	vlan_macip_lens |= ip_hlen;
1902 
1903 	type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
1904 	if (mp->m_pkthdr.csum_flags & CSUM_TCP)
1905 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
1906 	else if (mp->m_pkthdr.csum_flags & CSUM_UDP)
1907 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP;
1908 
1909 	if (mp->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP))
1910 		*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
1911 
1912 	/* Now ready a context descriptor */
1913 	ctxd = txr->tx_next_avail;
1914 	TXD = (struct ixgbe_adv_tx_context_desc *)&txr->tx_base[ctxd];
1915 
1916 	/* Now copy bits into descriptor */
1917 	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
1918 	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
1919 	TXD->seqnum_seed = htole32(0);
1920 	TXD->mss_l4len_idx = htole32(0);
1921 
1922 	/* We've consumed the first desc, adjust counters */
1923 	if (++ctxd == txr->tx_ndesc)
1924 		ctxd = 0;
1925 	txr->tx_next_avail = ctxd;
1926 	--txr->tx_avail;
1927 
1928 	/* One TX descriptor is consumed */
1929 	return 1;
1930 }
1931 
1932 static int
1933 ix_tso_ctx_setup(struct ix_tx_ring *txr, const struct mbuf *mp,
1934     uint32_t *cmd_type_len, uint32_t *olinfo_status)
1935 {
1936 	struct ixgbe_adv_tx_context_desc *TXD;
1937 	uint32_t vlan_macip_lens = 0, type_tucmd_mlhl = 0;
1938 	uint32_t mss_l4len_idx = 0, paylen;
1939 	int ctxd, ehdrlen, ip_hlen, tcp_hlen;
1940 
1941 	ehdrlen = mp->m_pkthdr.csum_lhlen;
1942 	KASSERT(ehdrlen > 0, ("invalid ether hlen"));
1943 
1944 	ip_hlen = mp->m_pkthdr.csum_iphlen;
1945 	KASSERT(ip_hlen > 0, ("invalid ip hlen"));
1946 
1947 	tcp_hlen = mp->m_pkthdr.csum_thlen;
1948 	KASSERT(tcp_hlen > 0, ("invalid tcp hlen"));
1949 
1950 	ctxd = txr->tx_next_avail;
1951 	TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
1952 
1953 	if (mp->m_flags & M_VLANTAG) {
1954 		vlan_macip_lens |= htole16(mp->m_pkthdr.ether_vlantag) <<
1955 		    IXGBE_ADVTXD_VLAN_SHIFT;
1956 	}
1957 	vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
1958 	vlan_macip_lens |= ip_hlen;
1959 	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
1960 
1961 	/* ADV DTYPE TUCMD */
1962 	type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
1963 	type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
1964 	type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
1965 	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
1966 
1967 	/* MSS L4LEN IDX */
1968 	mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << IXGBE_ADVTXD_MSS_SHIFT);
1969 	mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT);
1970 	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
1971 
1972 	TXD->seqnum_seed = htole32(0);
1973 
1974 	if (++ctxd == txr->tx_ndesc)
1975 		ctxd = 0;
1976 
1977 	txr->tx_avail--;
1978 	txr->tx_next_avail = ctxd;
1979 
1980 	*cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
1981 
1982 	/* This is used in the transmit desc in encap */
1983 	paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
1984 
1985 	*olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
1986 	*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
1987 	*olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT;
1988 
1989 	/* One TX descriptor is consumed */
1990 	return 1;
1991 }
1992 
1993 static void
1994 ix_txeof(struct ix_tx_ring *txr, int hdr)
1995 {
1996 	struct ifnet *ifp = &txr->tx_sc->arpcom.ac_if;
1997 	int first, avail;
1998 
1999 	if (txr->tx_avail == txr->tx_ndesc)
2000 		return;
2001 
2002 	first = txr->tx_next_clean;
2003 	if (first == hdr)
2004 		return;
2005 
2006 	avail = txr->tx_avail;
2007 	while (first != hdr) {
2008 		struct ix_tx_buf *txbuf = &txr->tx_buf[first];
2009 
2010 		++avail;
2011 		if (txbuf->m_head) {
2012 			bus_dmamap_unload(txr->tx_tag, txbuf->map);
2013 			m_freem(txbuf->m_head);
2014 			txbuf->m_head = NULL;
2015 			IFNET_STAT_INC(ifp, opackets, 1);
2016 		}
2017 		if (++first == txr->tx_ndesc)
2018 			first = 0;
2019 	}
2020 	txr->tx_next_clean = first;
2021 	txr->tx_avail = avail;
2022 
2023 	if (txr->tx_avail > IX_MAX_SCATTER + IX_TX_RESERVED) {
2024 		ifsq_clr_oactive(txr->tx_ifsq);
2025 		txr->tx_watchdog.wd_timer = 0;
2026 	}
2027 }
2028 
2029 static int
2030 ix_create_rx_ring(struct ix_rx_ring *rxr)
2031 {
2032 	int i, rsize, error, nrxd;
2033 
2034 	/*
2035 	 * Validate number of receive descriptors.  It must not exceed
2036 	 * hardware maximum, and must be multiple of IX_DBA_ALIGN.
2037 	 */
2038 	nrxd = device_getenv_int(rxr->rx_sc->dev, "rxd", ix_rxd);
2039 	if (((nrxd * sizeof(union ixgbe_adv_rx_desc)) % IX_DBA_ALIGN) != 0 ||
2040 	    nrxd < IX_MIN_RXD || nrxd > IX_MAX_RXD) {
2041 		device_printf(rxr->rx_sc->dev,
2042 		    "Using %d RX descriptors instead of %d!\n",
2043 		    IX_DEF_RXD, nrxd);
2044 		rxr->rx_ndesc = IX_DEF_RXD;
2045 	} else {
2046 		rxr->rx_ndesc = nrxd;
2047 	}
2048 
2049 	/*
2050 	 * Allocate RX descriptor ring
2051 	 */
2052 	rsize = roundup2(rxr->rx_ndesc * sizeof(union ixgbe_adv_rx_desc),
2053 	    IX_DBA_ALIGN);
2054 	rxr->rx_base = bus_dmamem_coherent_any(rxr->rx_sc->parent_tag,
2055 	    IX_DBA_ALIGN, rsize, BUS_DMA_WAITOK | BUS_DMA_ZERO,
2056 	    &rxr->rx_base_dtag, &rxr->rx_base_map, &rxr->rx_base_paddr);
2057 	if (rxr->rx_base == NULL) {
2058 		device_printf(rxr->rx_sc->dev,
2059 		    "Unable to allocate TX Descriptor memory\n");
2060 		return ENOMEM;
2061 	}
2062 
2063 	rsize = __VM_CACHELINE_ALIGN(sizeof(struct ix_rx_buf) * rxr->rx_ndesc);
2064 	rxr->rx_buf = kmalloc_cachealign(rsize, M_DEVBUF, M_WAITOK | M_ZERO);
2065 
2066 	/*
2067 	 * Create DMA tag for RX buffers
2068 	 */
2069 	error = bus_dma_tag_create(rxr->rx_sc->parent_tag,
2070 	    1, 0,		/* alignment, bounds */
2071 	    BUS_SPACE_MAXADDR,	/* lowaddr */
2072 	    BUS_SPACE_MAXADDR,	/* highaddr */
2073 	    NULL, NULL,		/* filter, filterarg */
2074 	    PAGE_SIZE,		/* maxsize */
2075 	    1,			/* nsegments */
2076 	    PAGE_SIZE,		/* maxsegsize */
2077 	    BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW, /* flags */
2078 	    &rxr->rx_tag);
2079 	if (error) {
2080 		device_printf(rxr->rx_sc->dev,
2081 		    "Unable to create RX DMA tag\n");
2082 		kfree(rxr->rx_buf, M_DEVBUF);
2083 		rxr->rx_buf = NULL;
2084 		return error;
2085 	}
2086 
2087 	/*
2088 	 * Create spare DMA map for RX buffers
2089 	 */
2090 	error = bus_dmamap_create(rxr->rx_tag, BUS_DMA_WAITOK,
2091 	    &rxr->rx_sparemap);
2092 	if (error) {
2093 		device_printf(rxr->rx_sc->dev,
2094 		    "Unable to create spare RX DMA map\n");
2095 		bus_dma_tag_destroy(rxr->rx_tag);
2096 		kfree(rxr->rx_buf, M_DEVBUF);
2097 		rxr->rx_buf = NULL;
2098 		return error;
2099 	}
2100 
2101 	/*
2102 	 * Create DMA maps for RX buffers
2103 	 */
2104 	for (i = 0; i < rxr->rx_ndesc; ++i) {
2105 		struct ix_rx_buf *rxbuf = &rxr->rx_buf[i];
2106 
2107 		error = bus_dmamap_create(rxr->rx_tag,
2108 		    BUS_DMA_WAITOK, &rxbuf->map);
2109 		if (error) {
2110 			device_printf(rxr->rx_sc->dev,
2111 			    "Unable to create RX dma map\n");
2112 			ix_destroy_rx_ring(rxr, i);
2113 			return error;
2114 		}
2115 	}
2116 
2117 	/*
2118 	 * Initialize various watermark
2119 	 */
2120 	rxr->rx_wreg_nsegs = IX_DEF_RXWREG_NSEGS;
2121 
2122 	return 0;
2123 }
2124 
2125 static void
2126 ix_destroy_rx_ring(struct ix_rx_ring *rxr, int ndesc)
2127 {
2128 	int i;
2129 
2130 	if (rxr->rx_base != NULL) {
2131 		bus_dmamap_unload(rxr->rx_base_dtag, rxr->rx_base_map);
2132 		bus_dmamem_free(rxr->rx_base_dtag, rxr->rx_base,
2133 		    rxr->rx_base_map);
2134 		bus_dma_tag_destroy(rxr->rx_base_dtag);
2135 		rxr->rx_base = NULL;
2136 	}
2137 
2138 	if (rxr->rx_buf == NULL)
2139 		return;
2140 
2141 	for (i = 0; i < ndesc; ++i) {
2142 		struct ix_rx_buf *rxbuf = &rxr->rx_buf[i];
2143 
2144 		KKASSERT(rxbuf->m_head == NULL);
2145 		bus_dmamap_destroy(rxr->rx_tag, rxbuf->map);
2146 	}
2147 	bus_dmamap_destroy(rxr->rx_tag, rxr->rx_sparemap);
2148 	bus_dma_tag_destroy(rxr->rx_tag);
2149 
2150 	kfree(rxr->rx_buf, M_DEVBUF);
2151 	rxr->rx_buf = NULL;
2152 }
2153 
2154 /*
2155 ** Used to detect a descriptor that has
2156 ** been merged by Hardware RSC.
2157 */
2158 static __inline uint32_t
2159 ix_rsc_count(union ixgbe_adv_rx_desc *rx)
2160 {
2161 	return (le32toh(rx->wb.lower.lo_dword.data) &
2162 	    IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
2163 }
2164 
2165 #if 0
2166 /*********************************************************************
2167  *
2168  *  Initialize Hardware RSC (LRO) feature on 82599
2169  *  for an RX ring, this is toggled by the LRO capability
2170  *  even though it is transparent to the stack.
2171  *
2172  *  NOTE: since this HW feature only works with IPV4 and
2173  *        our testing has shown soft LRO to be as effective
2174  *        I have decided to disable this by default.
2175  *
2176  **********************************************************************/
2177 static void
2178 ix_setup_hw_rsc(struct ix_rx_ring *rxr)
2179 {
2180 	struct	ix_softc 	*sc = rxr->rx_sc;
2181 	struct	ixgbe_hw	*hw = &sc->hw;
2182 	uint32_t			rscctrl, rdrxctl;
2183 
2184 #if 0
2185 	/* If turning LRO/RSC off we need to disable it */
2186 	if ((sc->arpcom.ac_if.if_capenable & IFCAP_LRO) == 0) {
2187 		rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
2188 		rscctrl &= ~IXGBE_RSCCTL_RSCEN;
2189 		return;
2190 	}
2191 #endif
2192 
2193 	rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
2194 	rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
2195 	rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
2196 	rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
2197 	IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
2198 
2199 	rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
2200 	rscctrl |= IXGBE_RSCCTL_RSCEN;
2201 	/*
2202 	** Limit the total number of descriptors that
2203 	** can be combined, so it does not exceed 64K
2204 	*/
2205 	if (rxr->mbuf_sz == MCLBYTES)
2206 		rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
2207 	else if (rxr->mbuf_sz == MJUMPAGESIZE)
2208 		rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
2209 	else if (rxr->mbuf_sz == MJUM9BYTES)
2210 		rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
2211 	else  /* Using 16K cluster */
2212 		rscctrl |= IXGBE_RSCCTL_MAXDESC_1;
2213 
2214 	IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl);
2215 
2216 	/* Enable TCP header recognition */
2217 	IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0),
2218 	    (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) |
2219 	    IXGBE_PSRTYPE_TCPHDR));
2220 
2221 	/* Disable RSC for ACK packets */
2222 	IXGBE_WRITE_REG(hw, IXGBE_RSCDBU,
2223 	    (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU)));
2224 
2225 	rxr->hw_rsc = TRUE;
2226 }
2227 #endif
2228 
2229 static int
2230 ix_init_rx_ring(struct ix_rx_ring *rxr)
2231 {
2232 	int i;
2233 
2234 	/* Clear the ring contents */
2235 	bzero(rxr->rx_base, rxr->rx_ndesc * sizeof(union ixgbe_adv_rx_desc));
2236 
2237 	/* XXX we need JUMPAGESIZE for RSC too */
2238 	if (rxr->rx_sc->max_frame_size <= MCLBYTES)
2239 		rxr->rx_mbuf_sz = MCLBYTES;
2240 	else
2241 		rxr->rx_mbuf_sz = MJUMPAGESIZE;
2242 
2243 	/* Now replenish the mbufs */
2244 	for (i = 0; i < rxr->rx_ndesc; ++i) {
2245 		int error;
2246 
2247 		error = ix_newbuf(rxr, i, TRUE);
2248 		if (error)
2249 			return error;
2250 	}
2251 
2252 	/* Setup our descriptor indices */
2253 	rxr->rx_next_check = 0;
2254 	rxr->rx_flags &= ~IX_RXRING_FLAG_DISC;
2255 
2256 #if 0
2257 	/*
2258 	** Now set up the LRO interface:
2259 	*/
2260 	if (ixgbe_rsc_enable)
2261 		ix_setup_hw_rsc(rxr);
2262 #endif
2263 
2264 	return 0;
2265 }
2266 
2267 #define IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT 2
2268 
2269 #define BSIZEPKT_ROUNDUP ((1<<IXGBE_SRRCTL_BSIZEPKT_SHIFT)-1)
2270 
2271 static void
2272 ix_init_rx_unit(struct ix_softc *sc)
2273 {
2274 	struct ixgbe_hw	*hw = &sc->hw;
2275 	struct ifnet *ifp = &sc->arpcom.ac_if;
2276 	uint32_t bufsz, rxctrl, fctrl, rxcsum, hlreg;
2277 	int i;
2278 
2279 	/*
2280 	 * Make sure receives are disabled while setting up the descriptor ring
2281 	 */
2282 	rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
2283 	IXGBE_WRITE_REG(hw, IXGBE_RXCTRL, rxctrl & ~IXGBE_RXCTRL_RXEN);
2284 
2285 	/* Enable broadcasts */
2286 	fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL);
2287 	fctrl |= IXGBE_FCTRL_BAM;
2288 	fctrl |= IXGBE_FCTRL_DPF;
2289 	fctrl |= IXGBE_FCTRL_PMCF;
2290 	IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl);
2291 
2292 	/* Set for Jumbo Frames? */
2293 	hlreg = IXGBE_READ_REG(hw, IXGBE_HLREG0);
2294 	if (ifp->if_mtu > ETHERMTU)
2295 		hlreg |= IXGBE_HLREG0_JUMBOEN;
2296 	else
2297 		hlreg &= ~IXGBE_HLREG0_JUMBOEN;
2298 	IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg);
2299 
2300 	KKASSERT(sc->rx_rings[0].rx_mbuf_sz >= MCLBYTES);
2301 	bufsz = (sc->rx_rings[0].rx_mbuf_sz + BSIZEPKT_ROUNDUP) >>
2302 	    IXGBE_SRRCTL_BSIZEPKT_SHIFT;
2303 
2304 	for (i = 0; i < sc->rx_ring_inuse; ++i) {
2305 		struct ix_rx_ring *rxr = &sc->rx_rings[i];
2306 		uint64_t rdba = rxr->rx_base_paddr;
2307 		uint32_t srrctl;
2308 
2309 		/* Setup the Base and Length of the Rx Descriptor Ring */
2310 		IXGBE_WRITE_REG(hw, IXGBE_RDBAL(i), (uint32_t)rdba);
2311 		IXGBE_WRITE_REG(hw, IXGBE_RDBAH(i), (uint32_t)(rdba >> 32));
2312 		IXGBE_WRITE_REG(hw, IXGBE_RDLEN(i),
2313 		    rxr->rx_ndesc * sizeof(union ixgbe_adv_rx_desc));
2314 
2315 		/*
2316 		 * Set up the SRRCTL register
2317 		 */
2318 		srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i));
2319 
2320 		srrctl &= ~IXGBE_SRRCTL_BSIZEHDR_MASK;
2321 		srrctl &= ~IXGBE_SRRCTL_BSIZEPKT_MASK;
2322 		srrctl |= bufsz;
2323 		srrctl |= IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
2324 		if (sc->rx_ring_inuse > 1) {
2325 			/* See the commend near ix_enable_rx_drop() */
2326 			switch (sc->fc) {
2327 			case ixgbe_fc_rx_pause:
2328 			case ixgbe_fc_tx_pause:
2329 			case ixgbe_fc_full:
2330 				srrctl &= ~IXGBE_SRRCTL_DROP_EN;
2331 				if (i == 0 && bootverbose) {
2332 					if_printf(ifp, "flow control %d, "
2333 					    "disable RX drop\n", sc->fc);
2334 				}
2335 				break;
2336 
2337 			case ixgbe_fc_none:
2338 				srrctl |= IXGBE_SRRCTL_DROP_EN;
2339 				if (i == 0 && bootverbose) {
2340 					if_printf(ifp, "flow control %d, "
2341 					    "enable RX drop\n", sc->fc);
2342 				}
2343 				break;
2344 
2345 			default:
2346 				break;
2347 			}
2348 		}
2349 		IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(i), srrctl);
2350 
2351 		/* Setup the HW Rx Head and Tail Descriptor Pointers */
2352 		IXGBE_WRITE_REG(hw, IXGBE_RDH(i), 0);
2353 		IXGBE_WRITE_REG(hw, IXGBE_RDT(i), 0);
2354 	}
2355 
2356 	if (sc->hw.mac.type != ixgbe_mac_82598EB)
2357 		IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0), 0);
2358 
2359 	rxcsum = IXGBE_READ_REG(hw, IXGBE_RXCSUM);
2360 
2361 	/*
2362 	 * Setup RSS
2363 	 */
2364 	if (IX_ENABLE_HWRSS(sc)) {
2365 		uint8_t key[IX_NRSSRK * IX_RSSRK_SIZE];
2366 		int j, r;
2367 
2368 		/*
2369 		 * NOTE:
2370 		 * When we reach here, RSS has already been disabled
2371 		 * in ix_stop(), so we could safely configure RSS key
2372 		 * and redirect table.
2373 		 */
2374 
2375 		/*
2376 		 * Configure RSS key
2377 		 */
2378 		toeplitz_get_key(key, sizeof(key));
2379 		for (i = 0; i < IX_NRSSRK; ++i) {
2380 			uint32_t rssrk;
2381 
2382 			rssrk = IX_RSSRK_VAL(key, i);
2383 			IX_RSS_DPRINTF(sc, 1, "rssrk%d 0x%08x\n",
2384 			    i, rssrk);
2385 
2386 			IXGBE_WRITE_REG(hw, IXGBE_RSSRK(i), rssrk);
2387 		}
2388 
2389 		/*
2390 		 * Configure RSS redirect table in following fashion:
2391 		 * (hash & ring_cnt_mask) == rdr_table[(hash & rdr_table_mask)]
2392 		 */
2393 		r = 0;
2394 		for (j = 0; j < IX_NRETA; ++j) {
2395 			uint32_t reta = 0;
2396 
2397 			for (i = 0; i < IX_RETA_SIZE; ++i) {
2398 				uint32_t q;
2399 
2400 				q = r % sc->rx_ring_inuse;
2401 				reta |= q << (8 * i);
2402 				++r;
2403 			}
2404 			IX_RSS_DPRINTF(sc, 1, "reta 0x%08x\n", reta);
2405 			IXGBE_WRITE_REG(hw, IXGBE_RETA(j), reta);
2406 		}
2407 
2408 		/*
2409 		 * Enable multiple receive queues.
2410 		 * Enable IPv4 RSS standard hash functions.
2411 		 */
2412 		IXGBE_WRITE_REG(hw, IXGBE_MRQC,
2413 		    IXGBE_MRQC_RSSEN |
2414 		    IXGBE_MRQC_RSS_FIELD_IPV4 |
2415 		    IXGBE_MRQC_RSS_FIELD_IPV4_TCP);
2416 
2417 		/*
2418 		 * NOTE:
2419 		 * PCSD must be enabled to enable multiple
2420 		 * receive queues.
2421 		 */
2422 		rxcsum |= IXGBE_RXCSUM_PCSD;
2423 	}
2424 
2425 	if (ifp->if_capenable & IFCAP_RXCSUM)
2426 		rxcsum |= IXGBE_RXCSUM_PCSD;
2427 
2428 	IXGBE_WRITE_REG(hw, IXGBE_RXCSUM, rxcsum);
2429 }
2430 
2431 static __inline void
2432 ix_rx_refresh(struct ix_rx_ring *rxr, int i)
2433 {
2434 	if (--i < 0)
2435 		i = rxr->rx_ndesc - 1;
2436 	IXGBE_WRITE_REG(&rxr->rx_sc->hw, IXGBE_RDT(rxr->rx_idx), i);
2437 }
2438 
2439 static __inline void
2440 ix_rxcsum(uint32_t staterr, struct mbuf *mp, uint32_t ptype)
2441 {
2442 	if ((ptype &
2443 	     (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_IPV4_EX)) == 0) {
2444 		/* Not IPv4 */
2445 		return;
2446 	}
2447 
2448 	if ((staterr & (IXGBE_RXD_STAT_IPCS | IXGBE_RXDADV_ERR_IPE)) ==
2449 	    IXGBE_RXD_STAT_IPCS)
2450 		mp->m_pkthdr.csum_flags |= CSUM_IP_CHECKED | CSUM_IP_VALID;
2451 
2452 	if ((ptype &
2453 	     (IXGBE_RXDADV_PKTTYPE_TCP | IXGBE_RXDADV_PKTTYPE_UDP)) == 0) {
2454 		/*
2455 		 * - Neither TCP nor UDP
2456 		 * - IPv4 fragment
2457 		 */
2458 		return;
2459 	}
2460 
2461 	if ((staterr & (IXGBE_RXD_STAT_L4CS | IXGBE_RXDADV_ERR_TCPE)) ==
2462 	    IXGBE_RXD_STAT_L4CS) {
2463 		mp->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR |
2464 		    CSUM_FRAG_NOT_CHECKED;
2465 		mp->m_pkthdr.csum_data = htons(0xffff);
2466 	}
2467 }
2468 
2469 static __inline struct pktinfo *
2470 ix_rssinfo(struct mbuf *m, struct pktinfo *pi,
2471     uint32_t hash, uint32_t hashtype, uint32_t ptype)
2472 {
2473 	switch (hashtype) {
2474 	case IXGBE_RXDADV_RSSTYPE_IPV4_TCP:
2475 		pi->pi_netisr = NETISR_IP;
2476 		pi->pi_flags = 0;
2477 		pi->pi_l3proto = IPPROTO_TCP;
2478 		break;
2479 
2480 	case IXGBE_RXDADV_RSSTYPE_IPV4:
2481 		if ((ptype & IXGBE_RXDADV_PKTTYPE_UDP) == 0) {
2482 			/* Not UDP or is fragment */
2483 			return NULL;
2484 		}
2485 		pi->pi_netisr = NETISR_IP;
2486 		pi->pi_flags = 0;
2487 		pi->pi_l3proto = IPPROTO_UDP;
2488 		break;
2489 
2490 	default:
2491 		return NULL;
2492 	}
2493 
2494 	m->m_flags |= M_HASH;
2495 	m->m_pkthdr.hash = toeplitz_hash(hash);
2496 	return pi;
2497 }
2498 
2499 static __inline void
2500 ix_setup_rxdesc(union ixgbe_adv_rx_desc *rxd, const struct ix_rx_buf *rxbuf)
2501 {
2502 	rxd->read.pkt_addr = htole64(rxbuf->paddr);
2503 	rxd->wb.upper.status_error = 0;
2504 }
2505 
2506 static void
2507 ix_rx_discard(struct ix_rx_ring *rxr, int i, boolean_t eop)
2508 {
2509 	struct ix_rx_buf *rxbuf = &rxr->rx_buf[i];
2510 
2511 	/*
2512 	 * XXX discard may not be correct
2513 	 */
2514 	if (eop) {
2515 		IFNET_STAT_INC(&rxr->rx_sc->arpcom.ac_if, ierrors, 1);
2516 		rxr->rx_flags &= ~IX_RXRING_FLAG_DISC;
2517 	} else {
2518 		rxr->rx_flags |= IX_RXRING_FLAG_DISC;
2519 	}
2520 	if (rxbuf->fmp != NULL) {
2521 		m_freem(rxbuf->fmp);
2522 		rxbuf->fmp = NULL;
2523 		rxbuf->lmp = NULL;
2524 	}
2525 	ix_setup_rxdesc(&rxr->rx_base[i], rxbuf);
2526 }
2527 
2528 static void
2529 ix_rxeof(struct ix_rx_ring *rxr)
2530 {
2531 	struct ifnet *ifp = &rxr->rx_sc->arpcom.ac_if;
2532 	int i, nsegs = 0;
2533 
2534 	i = rxr->rx_next_check;
2535 	for (;;) {
2536 		struct ix_rx_buf *rxbuf, *nbuf = NULL;
2537 		union ixgbe_adv_rx_desc	*cur;
2538 		struct mbuf *sendmp = NULL, *mp;
2539 		struct pktinfo *pi = NULL, pi0;
2540 		uint32_t rsc = 0, ptype, staterr, hash, hashtype;
2541 		uint16_t len;
2542 		boolean_t eop;
2543 
2544 		cur = &rxr->rx_base[i];
2545 		staterr = le32toh(cur->wb.upper.status_error);
2546 
2547 		if ((staterr & IXGBE_RXD_STAT_DD) == 0)
2548 			break;
2549 		++nsegs;
2550 
2551 		rxbuf = &rxr->rx_buf[i];
2552 		mp = rxbuf->m_head;
2553 
2554 		len = le16toh(cur->wb.upper.length);
2555 		ptype = le32toh(cur->wb.lower.lo_dword.data) &
2556 		    IXGBE_RXDADV_PKTTYPE_MASK;
2557 		hash = le32toh(cur->wb.lower.hi_dword.rss);
2558 		hashtype = le32toh(cur->wb.lower.lo_dword.data) &
2559 		    IXGBE_RXDADV_RSSTYPE_MASK;
2560 		eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0);
2561 
2562 		/*
2563 		 * Make sure bad packets are discarded
2564 		 */
2565 		if ((staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) ||
2566 		    (rxr->rx_flags & IX_RXRING_FLAG_DISC)) {
2567 			ix_rx_discard(rxr, i, eop);
2568 			goto next_desc;
2569 		}
2570 
2571 		bus_dmamap_sync(rxr->rx_tag, rxbuf->map, BUS_DMASYNC_POSTREAD);
2572 		if (ix_newbuf(rxr, i, FALSE) != 0) {
2573 			ix_rx_discard(rxr, i, eop);
2574 			goto next_desc;
2575 		}
2576 
2577 		/*
2578 		 * On 82599 which supports a hardware LRO, packets
2579 		 * need not be fragmented across sequential descriptors,
2580 		 * rather the next descriptor is indicated in bits
2581 		 * of the descriptor.  This also means that we might
2582 		 * proceses more than one packet at a time, something
2583 		 * that has never been true before, it required
2584 		 * eliminating global chain pointers in favor of what
2585 		 * we are doing here.
2586 		 */
2587 		if (!eop) {
2588 			int nextp;
2589 
2590 			/*
2591 			 * Figure out the next descriptor
2592 			 * of this frame.
2593 			 */
2594 			if (rxr->rx_flags & IX_RXRING_FLAG_LRO)
2595 				rsc = ix_rsc_count(cur);
2596 			if (rsc) { /* Get hardware index */
2597 				nextp = ((staterr &
2598 				    IXGBE_RXDADV_NEXTP_MASK) >>
2599 				    IXGBE_RXDADV_NEXTP_SHIFT);
2600 			} else { /* Just sequential */
2601 				nextp = i + 1;
2602 				if (nextp == rxr->rx_ndesc)
2603 					nextp = 0;
2604 			}
2605 			nbuf = &rxr->rx_buf[nextp];
2606 			prefetch(nbuf);
2607 		}
2608 		mp->m_len = len;
2609 
2610 		/*
2611 		 * Rather than using the fmp/lmp global pointers
2612 		 * we now keep the head of a packet chain in the
2613 		 * buffer struct and pass this along from one
2614 		 * descriptor to the next, until we get EOP.
2615 		 */
2616 		if (rxbuf->fmp == NULL) {
2617 			mp->m_pkthdr.len = len;
2618 			rxbuf->fmp = mp;
2619 			rxbuf->lmp = mp;
2620 		} else {
2621 			rxbuf->fmp->m_pkthdr.len += len;
2622 			rxbuf->lmp->m_next = mp;
2623 			rxbuf->lmp = mp;
2624 		}
2625 
2626 		if (nbuf != NULL) {
2627 			/*
2628 			 * Not the last fragment of this frame,
2629 			 * pass this fragment list on
2630 			 */
2631 			nbuf->fmp = rxbuf->fmp;
2632 			nbuf->lmp = rxbuf->lmp;
2633 		} else {
2634 			/*
2635 			 * Send this frame
2636 			 */
2637 			sendmp = rxbuf->fmp;
2638 
2639 			sendmp->m_pkthdr.rcvif = ifp;
2640 			IFNET_STAT_INC(ifp, ipackets, 1);
2641 #ifdef IX_RSS_DEBUG
2642 			rxr->rx_pkts++;
2643 #endif
2644 
2645 			/* Process vlan info */
2646 			if (staterr & IXGBE_RXD_STAT_VP) {
2647 				sendmp->m_pkthdr.ether_vlantag =
2648 				    le16toh(cur->wb.upper.vlan);
2649 				sendmp->m_flags |= M_VLANTAG;
2650 			}
2651 			if (ifp->if_capenable & IFCAP_RXCSUM)
2652 				ix_rxcsum(staterr, sendmp, ptype);
2653 			if (ifp->if_capenable & IFCAP_RSS) {
2654 				pi = ix_rssinfo(sendmp, &pi0,
2655 				    hash, hashtype, ptype);
2656 			}
2657 		}
2658 		rxbuf->fmp = NULL;
2659 		rxbuf->lmp = NULL;
2660 next_desc:
2661 		/* Advance our pointers to the next descriptor. */
2662 		if (++i == rxr->rx_ndesc)
2663 			i = 0;
2664 
2665 		if (sendmp != NULL)
2666 			ether_input_pkt(ifp, sendmp, pi);
2667 
2668 		if (nsegs >= rxr->rx_wreg_nsegs) {
2669 			ix_rx_refresh(rxr, i);
2670 			nsegs = 0;
2671 		}
2672 	}
2673 	rxr->rx_next_check = i;
2674 
2675 	if (nsegs > 0)
2676 		ix_rx_refresh(rxr, i);
2677 }
2678 
2679 static void
2680 ix_set_vlan(struct ix_softc *sc)
2681 {
2682 	struct ixgbe_hw *hw = &sc->hw;
2683 	uint32_t ctrl;
2684 
2685 	if (hw->mac.type == ixgbe_mac_82598EB) {
2686 		ctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
2687 		ctrl |= IXGBE_VLNCTRL_VME;
2688 		IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, ctrl);
2689 	} else {
2690 		int i;
2691 
2692 		/*
2693 		 * On 82599 and later chips the VLAN enable is
2694 		 * per queue in RXDCTL
2695 		 */
2696 		for (i = 0; i < sc->rx_ring_inuse; ++i) {
2697 			ctrl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(i));
2698 			ctrl |= IXGBE_RXDCTL_VME;
2699 			IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(i), ctrl);
2700 		}
2701 	}
2702 }
2703 
2704 static void
2705 ix_enable_intr(struct ix_softc *sc)
2706 {
2707 	struct ixgbe_hw	*hw = &sc->hw;
2708 	uint32_t fwsm;
2709 	int i;
2710 
2711 	for (i = 0; i < sc->intr_cnt; ++i)
2712 		lwkt_serialize_handler_enable(sc->intr_data[i].intr_serialize);
2713 
2714 	sc->intr_mask = (IXGBE_EIMS_ENABLE_MASK & ~IXGBE_EIMS_RTX_QUEUE);
2715 
2716 	/* Enable Fan Failure detection */
2717 	if (hw->device_id == IXGBE_DEV_ID_82598AT)
2718 		sc->intr_mask |= IXGBE_EIMS_GPI_SDP1;
2719 
2720 	switch (sc->hw.mac.type) {
2721 	case ixgbe_mac_82599EB:
2722 		sc->intr_mask |= IXGBE_EIMS_ECC;
2723 		sc->intr_mask |= IXGBE_EIMS_GPI_SDP0;
2724 		sc->intr_mask |= IXGBE_EIMS_GPI_SDP1;
2725 		sc->intr_mask |= IXGBE_EIMS_GPI_SDP2;
2726 		break;
2727 
2728 	case ixgbe_mac_X540:
2729 		sc->intr_mask |= IXGBE_EIMS_ECC;
2730 		/* Detect if Thermal Sensor is enabled */
2731 		fwsm = IXGBE_READ_REG(hw, IXGBE_FWSM);
2732 		if (fwsm & IXGBE_FWSM_TS_ENABLED)
2733 			sc->intr_mask |= IXGBE_EIMS_TS;
2734 		/* FALL THROUGH */
2735 	default:
2736 		break;
2737 	}
2738 
2739 	/* With MSI-X we use auto clear for RX and TX rings */
2740 	if (sc->intr_type == PCI_INTR_TYPE_MSIX) {
2741 		/*
2742 		 * There are no EIAC1/EIAC2 for newer chips; the related
2743 		 * bits for TX and RX rings > 16 are always auto clear.
2744 		 *
2745 		 * XXX which bits?  There are _no_ documented EICR1 and
2746 		 * EICR2 at all; only EICR.
2747 		 */
2748 		IXGBE_WRITE_REG(hw, IXGBE_EIAC, IXGBE_EIMS_RTX_QUEUE);
2749 	} else {
2750 		sc->intr_mask |= IX_TX_INTR_MASK | IX_RX0_INTR_MASK;
2751 
2752 		KKASSERT(sc->rx_ring_inuse <= IX_MIN_RXRING_RSS);
2753 		if (sc->rx_ring_inuse == IX_MIN_RXRING_RSS)
2754 			sc->intr_mask |= IX_RX1_INTR_MASK;
2755 	}
2756 
2757 	IXGBE_WRITE_REG(hw, IXGBE_EIMS, sc->intr_mask);
2758 
2759 	/*
2760 	 * Enable RX and TX rings for MSI-X
2761 	 */
2762 	if (sc->intr_type == PCI_INTR_TYPE_MSIX) {
2763 		for (i = 0; i < sc->tx_ring_inuse; ++i) {
2764 			const struct ix_tx_ring *txr = &sc->tx_rings[i];
2765 
2766 			if (txr->tx_intr_vec >= 0) {
2767 				IXGBE_WRITE_REG(hw, txr->tx_eims,
2768 				    txr->tx_eims_val);
2769 			}
2770 		}
2771 		for (i = 0; i < sc->rx_ring_inuse; ++i) {
2772 			const struct ix_rx_ring *rxr = &sc->rx_rings[i];
2773 
2774 			KKASSERT(rxr->rx_intr_vec >= 0);
2775 			IXGBE_WRITE_REG(hw, rxr->rx_eims, rxr->rx_eims_val);
2776 		}
2777 	}
2778 
2779 	IXGBE_WRITE_FLUSH(hw);
2780 }
2781 
2782 static void
2783 ix_disable_intr(struct ix_softc *sc)
2784 {
2785 	int i;
2786 
2787 	if (sc->intr_type == PCI_INTR_TYPE_MSIX)
2788 		IXGBE_WRITE_REG(&sc->hw, IXGBE_EIAC, 0);
2789 
2790 	if (sc->hw.mac.type == ixgbe_mac_82598EB) {
2791 		IXGBE_WRITE_REG(&sc->hw, IXGBE_EIMC, ~0);
2792 	} else {
2793 		IXGBE_WRITE_REG(&sc->hw, IXGBE_EIMC, 0xFFFF0000);
2794 		IXGBE_WRITE_REG(&sc->hw, IXGBE_EIMC_EX(0), ~0);
2795 		IXGBE_WRITE_REG(&sc->hw, IXGBE_EIMC_EX(1), ~0);
2796 	}
2797 	IXGBE_WRITE_FLUSH(&sc->hw);
2798 
2799 	for (i = 0; i < sc->intr_cnt; ++i)
2800 		lwkt_serialize_handler_disable(sc->intr_data[i].intr_serialize);
2801 }
2802 
2803 uint16_t
2804 ixgbe_read_pci_cfg(struct ixgbe_hw *hw, uint32_t reg)
2805 {
2806 	return pci_read_config(((struct ixgbe_osdep *)hw->back)->dev,
2807 	    reg, 2);
2808 }
2809 
2810 void
2811 ixgbe_write_pci_cfg(struct ixgbe_hw *hw, uint32_t reg, uint16_t value)
2812 {
2813 	pci_write_config(((struct ixgbe_osdep *)hw->back)->dev,
2814 	    reg, value, 2);
2815 }
2816 
2817 static void
2818 ix_slot_info(struct ix_softc *sc)
2819 {
2820 	struct ixgbe_hw *hw = &sc->hw;
2821 	device_t dev = sc->dev;
2822 	struct ixgbe_mac_info *mac = &hw->mac;
2823 	uint16_t link;
2824 	uint32_t offset;
2825 
2826 	/* For most devices simply call the shared code routine */
2827 	if (hw->device_id != IXGBE_DEV_ID_82599_SFP_SF_QP) {
2828 		ixgbe_get_bus_info(hw);
2829 		goto display;
2830 	}
2831 
2832 	/*
2833 	 * For the Quad port adapter we need to parse back
2834 	 * up the PCI tree to find the speed of the expansion
2835 	 * slot into which this adapter is plugged. A bit more work.
2836 	 */
2837 	dev = device_get_parent(device_get_parent(dev));
2838 #ifdef IXGBE_DEBUG
2839 	device_printf(dev, "parent pcib = %x,%x,%x\n",
2840 	    pci_get_bus(dev), pci_get_slot(dev), pci_get_function(dev));
2841 #endif
2842 	dev = device_get_parent(device_get_parent(dev));
2843 #ifdef IXGBE_DEBUG
2844 	device_printf(dev, "slot pcib = %x,%x,%x\n",
2845 	    pci_get_bus(dev), pci_get_slot(dev), pci_get_function(dev));
2846 #endif
2847 	/* Now get the PCI Express Capabilities offset */
2848 	offset = pci_get_pciecap_ptr(dev);
2849 	/* ...and read the Link Status Register */
2850 	link = pci_read_config(dev, offset + PCIER_LINKSTAT, 2);
2851 	switch (link & IXGBE_PCI_LINK_WIDTH) {
2852 	case IXGBE_PCI_LINK_WIDTH_1:
2853 		hw->bus.width = ixgbe_bus_width_pcie_x1;
2854 		break;
2855 	case IXGBE_PCI_LINK_WIDTH_2:
2856 		hw->bus.width = ixgbe_bus_width_pcie_x2;
2857 		break;
2858 	case IXGBE_PCI_LINK_WIDTH_4:
2859 		hw->bus.width = ixgbe_bus_width_pcie_x4;
2860 		break;
2861 	case IXGBE_PCI_LINK_WIDTH_8:
2862 		hw->bus.width = ixgbe_bus_width_pcie_x8;
2863 		break;
2864 	default:
2865 		hw->bus.width = ixgbe_bus_width_unknown;
2866 		break;
2867 	}
2868 
2869 	switch (link & IXGBE_PCI_LINK_SPEED) {
2870 	case IXGBE_PCI_LINK_SPEED_2500:
2871 		hw->bus.speed = ixgbe_bus_speed_2500;
2872 		break;
2873 	case IXGBE_PCI_LINK_SPEED_5000:
2874 		hw->bus.speed = ixgbe_bus_speed_5000;
2875 		break;
2876 	case IXGBE_PCI_LINK_SPEED_8000:
2877 		hw->bus.speed = ixgbe_bus_speed_8000;
2878 		break;
2879 	default:
2880 		hw->bus.speed = ixgbe_bus_speed_unknown;
2881 		break;
2882 	}
2883 
2884 	mac->ops.set_lan_id(hw);
2885 
2886 display:
2887 	device_printf(dev, "PCI Express Bus: Speed %s %s\n",
2888 	    hw->bus.speed == ixgbe_bus_speed_8000 ? "8.0GT/s" :
2889 	    hw->bus.speed == ixgbe_bus_speed_5000 ? "5.0GT/s" :
2890 	    hw->bus.speed == ixgbe_bus_speed_2500 ? "2.5GT/s" : "Unknown",
2891 	    hw->bus.width == ixgbe_bus_width_pcie_x8 ? "Width x8" :
2892 	    hw->bus.width == ixgbe_bus_width_pcie_x4 ? "Width x4" :
2893 	    hw->bus.width == ixgbe_bus_width_pcie_x1 ? "Width x1" : "Unknown");
2894 
2895 	if (hw->device_id != IXGBE_DEV_ID_82599_SFP_SF_QP &&
2896 	    hw->bus.width <= ixgbe_bus_width_pcie_x4 &&
2897 	    hw->bus.speed == ixgbe_bus_speed_2500) {
2898 		device_printf(dev, "For optimal performance a x8 "
2899 		    "PCIE, or x4 PCIE Gen2 slot is required.\n");
2900 	} else if (hw->device_id == IXGBE_DEV_ID_82599_SFP_SF_QP &&
2901 	    hw->bus.width <= ixgbe_bus_width_pcie_x8 &&
2902 	    hw->bus.speed < ixgbe_bus_speed_8000) {
2903 		device_printf(dev, "For optimal performance a x8 "
2904 		    "PCIE Gen3 slot is required.\n");
2905 	}
2906 }
2907 
2908 /*
2909  * TODO comment is incorrect
2910  *
2911  * Setup the correct IVAR register for a particular MSIX interrupt
2912  * - entry is the register array entry
2913  * - vector is the MSIX vector for this queue
2914  * - type is RX/TX/MISC
2915  */
2916 static void
2917 ix_set_ivar(struct ix_softc *sc, uint8_t entry, uint8_t vector,
2918     int8_t type)
2919 {
2920 	struct ixgbe_hw *hw = &sc->hw;
2921 	uint32_t ivar, index;
2922 
2923 	vector |= IXGBE_IVAR_ALLOC_VAL;
2924 
2925 	switch (hw->mac.type) {
2926 	case ixgbe_mac_82598EB:
2927 		if (type == -1)
2928 			entry = IXGBE_IVAR_OTHER_CAUSES_INDEX;
2929 		else
2930 			entry += (type * 64);
2931 		index = (entry >> 2) & 0x1F;
2932 		ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(index));
2933 		ivar &= ~(0xFF << (8 * (entry & 0x3)));
2934 		ivar |= (vector << (8 * (entry & 0x3)));
2935 		IXGBE_WRITE_REG(hw, IXGBE_IVAR(index), ivar);
2936 		break;
2937 
2938 	case ixgbe_mac_82599EB:
2939 	case ixgbe_mac_X540:
2940 		if (type == -1) { /* MISC IVAR */
2941 			index = (entry & 1) * 8;
2942 			ivar = IXGBE_READ_REG(hw, IXGBE_IVAR_MISC);
2943 			ivar &= ~(0xFF << index);
2944 			ivar |= (vector << index);
2945 			IXGBE_WRITE_REG(hw, IXGBE_IVAR_MISC, ivar);
2946 		} else {	/* RX/TX IVARS */
2947 			index = (16 * (entry & 1)) + (8 * type);
2948 			ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(entry >> 1));
2949 			ivar &= ~(0xFF << index);
2950 			ivar |= (vector << index);
2951 			IXGBE_WRITE_REG(hw, IXGBE_IVAR(entry >> 1), ivar);
2952 		}
2953 
2954 	default:
2955 		break;
2956 	}
2957 }
2958 
2959 static boolean_t
2960 ix_sfp_probe(struct ix_softc *sc)
2961 {
2962 	struct ixgbe_hw	*hw = &sc->hw;
2963 
2964 	if (hw->phy.type == ixgbe_phy_nl &&
2965 	    hw->phy.sfp_type == ixgbe_sfp_type_not_present) {
2966 		int32_t ret;
2967 
2968 		ret = hw->phy.ops.identify_sfp(hw);
2969 		if (ret)
2970 			return FALSE;
2971 
2972 		ret = hw->phy.ops.reset(hw);
2973 		if (ret == IXGBE_ERR_SFP_NOT_SUPPORTED) {
2974 			if_printf(&sc->arpcom.ac_if,
2975 			     "Unsupported SFP+ module detected!  "
2976 			     "Reload driver with supported module.\n");
2977 			sc->sfp_probe = FALSE;
2978 			return FALSE;
2979 		}
2980 		if_printf(&sc->arpcom.ac_if, "SFP+ module detected!\n");
2981 
2982 		/* We now have supported optics */
2983 		sc->sfp_probe = FALSE;
2984 		/* Set the optics type so system reports correctly */
2985 		ix_setup_optics(sc);
2986 
2987 		return TRUE;
2988 	}
2989 	return FALSE;
2990 }
2991 
2992 static void
2993 ix_handle_link(struct ix_softc *sc)
2994 {
2995 	ixgbe_check_link(&sc->hw, &sc->link_speed, &sc->link_up, 0);
2996 	ix_update_link_status(sc);
2997 }
2998 
2999 /*
3000  * Handling SFP module
3001  */
3002 static void
3003 ix_handle_mod(struct ix_softc *sc)
3004 {
3005 	struct ixgbe_hw *hw = &sc->hw;
3006 	uint32_t err;
3007 
3008 	err = hw->phy.ops.identify_sfp(hw);
3009 	if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) {
3010 		if_printf(&sc->arpcom.ac_if,
3011 		    "Unsupported SFP+ module type was detected.\n");
3012 		return;
3013 	}
3014 	err = hw->mac.ops.setup_sfp(hw);
3015 	if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) {
3016 		if_printf(&sc->arpcom.ac_if,
3017 		    "Setup failure - unsupported SFP+ module type.\n");
3018 		return;
3019 	}
3020 	ix_handle_msf(sc);
3021 }
3022 
3023 /*
3024  * Handling MSF (multispeed fiber)
3025  */
3026 static void
3027 ix_handle_msf(struct ix_softc *sc)
3028 {
3029 	struct ixgbe_hw *hw = &sc->hw;
3030 	uint32_t autoneg;
3031 
3032 	autoneg = hw->phy.autoneg_advertised;
3033 	if (!autoneg && hw->mac.ops.get_link_capabilities != NULL) {
3034 		bool negotiate;
3035 
3036 		hw->mac.ops.get_link_capabilities(hw, &autoneg, &negotiate);
3037 	}
3038 	if (hw->mac.ops.setup_link != NULL)
3039 		hw->mac.ops.setup_link(hw, autoneg, TRUE);
3040 }
3041 
3042 static void
3043 ix_update_stats(struct ix_softc *sc)
3044 {
3045 	struct ifnet *ifp = &sc->arpcom.ac_if;
3046 	struct ixgbe_hw *hw = &sc->hw;
3047 	uint32_t missed_rx = 0, bprc, lxon, lxoff, total;
3048 	uint64_t total_missed_rx = 0;
3049 	int i;
3050 
3051 	sc->stats.crcerrs += IXGBE_READ_REG(hw, IXGBE_CRCERRS);
3052 	sc->stats.illerrc += IXGBE_READ_REG(hw, IXGBE_ILLERRC);
3053 	sc->stats.errbc += IXGBE_READ_REG(hw, IXGBE_ERRBC);
3054 	sc->stats.mspdc += IXGBE_READ_REG(hw, IXGBE_MSPDC);
3055 
3056 	/*
3057 	 * Note: These are for the 8 possible traffic classes, which
3058 	 * in current implementation is unused, therefore only 0 should
3059 	 * read real data.
3060 	 */
3061 	for (i = 0; i < 8; i++) {
3062 		uint32_t mp;
3063 
3064 		mp = IXGBE_READ_REG(hw, IXGBE_MPC(i));
3065 		/* missed_rx tallies misses for the gprc workaround */
3066 		missed_rx += mp;
3067 		/* global total per queue */
3068 		sc->stats.mpc[i] += mp;
3069 
3070 		/* Running comprehensive total for stats display */
3071 		total_missed_rx += sc->stats.mpc[i];
3072 
3073 		if (hw->mac.type == ixgbe_mac_82598EB) {
3074 			sc->stats.rnbc[i] += IXGBE_READ_REG(hw, IXGBE_RNBC(i));
3075 			sc->stats.qbtc[i] += IXGBE_READ_REG(hw, IXGBE_QBTC(i));
3076 			sc->stats.qbrc[i] += IXGBE_READ_REG(hw, IXGBE_QBRC(i));
3077 			sc->stats.pxonrxc[i] +=
3078 			    IXGBE_READ_REG(hw, IXGBE_PXONRXC(i));
3079 		} else {
3080 			sc->stats.pxonrxc[i] +=
3081 			    IXGBE_READ_REG(hw, IXGBE_PXONRXCNT(i));
3082 		}
3083 		sc->stats.pxontxc[i] += IXGBE_READ_REG(hw, IXGBE_PXONTXC(i));
3084 		sc->stats.pxofftxc[i] += IXGBE_READ_REG(hw, IXGBE_PXOFFTXC(i));
3085 		sc->stats.pxoffrxc[i] += IXGBE_READ_REG(hw, IXGBE_PXOFFRXC(i));
3086 		sc->stats.pxon2offc[i] +=
3087 		    IXGBE_READ_REG(hw, IXGBE_PXON2OFFCNT(i));
3088 	}
3089 	for (i = 0; i < 16; i++) {
3090 		sc->stats.qprc[i] += IXGBE_READ_REG(hw, IXGBE_QPRC(i));
3091 		sc->stats.qptc[i] += IXGBE_READ_REG(hw, IXGBE_QPTC(i));
3092 		sc->stats.qprdc[i] += IXGBE_READ_REG(hw, IXGBE_QPRDC(i));
3093 	}
3094 	sc->stats.mlfc += IXGBE_READ_REG(hw, IXGBE_MLFC);
3095 	sc->stats.mrfc += IXGBE_READ_REG(hw, IXGBE_MRFC);
3096 	sc->stats.rlec += IXGBE_READ_REG(hw, IXGBE_RLEC);
3097 
3098 	/* Hardware workaround, gprc counts missed packets */
3099 	sc->stats.gprc += IXGBE_READ_REG(hw, IXGBE_GPRC);
3100 	sc->stats.gprc -= missed_rx;
3101 
3102 	if (hw->mac.type != ixgbe_mac_82598EB) {
3103 		sc->stats.gorc += IXGBE_READ_REG(hw, IXGBE_GORCL) +
3104 		    ((uint64_t)IXGBE_READ_REG(hw, IXGBE_GORCH) << 32);
3105 		sc->stats.gotc += IXGBE_READ_REG(hw, IXGBE_GOTCL) +
3106 		    ((uint64_t)IXGBE_READ_REG(hw, IXGBE_GOTCH) << 32);
3107 		sc->stats.tor += IXGBE_READ_REG(hw, IXGBE_TORL) +
3108 		    ((uint64_t)IXGBE_READ_REG(hw, IXGBE_TORH) << 32);
3109 		sc->stats.lxonrxc += IXGBE_READ_REG(hw, IXGBE_LXONRXCNT);
3110 		sc->stats.lxoffrxc += IXGBE_READ_REG(hw, IXGBE_LXOFFRXCNT);
3111 	} else {
3112 		sc->stats.lxonrxc += IXGBE_READ_REG(hw, IXGBE_LXONRXC);
3113 		sc->stats.lxoffrxc += IXGBE_READ_REG(hw, IXGBE_LXOFFRXC);
3114 		/* 82598 only has a counter in the high register */
3115 		sc->stats.gorc += IXGBE_READ_REG(hw, IXGBE_GORCH);
3116 		sc->stats.gotc += IXGBE_READ_REG(hw, IXGBE_GOTCH);
3117 		sc->stats.tor += IXGBE_READ_REG(hw, IXGBE_TORH);
3118 	}
3119 
3120 	/*
3121 	 * Workaround: mprc hardware is incorrectly counting
3122 	 * broadcasts, so for now we subtract those.
3123 	 */
3124 	bprc = IXGBE_READ_REG(hw, IXGBE_BPRC);
3125 	sc->stats.bprc += bprc;
3126 	sc->stats.mprc += IXGBE_READ_REG(hw, IXGBE_MPRC);
3127 	if (hw->mac.type == ixgbe_mac_82598EB)
3128 		sc->stats.mprc -= bprc;
3129 
3130 	sc->stats.prc64 += IXGBE_READ_REG(hw, IXGBE_PRC64);
3131 	sc->stats.prc127 += IXGBE_READ_REG(hw, IXGBE_PRC127);
3132 	sc->stats.prc255 += IXGBE_READ_REG(hw, IXGBE_PRC255);
3133 	sc->stats.prc511 += IXGBE_READ_REG(hw, IXGBE_PRC511);
3134 	sc->stats.prc1023 += IXGBE_READ_REG(hw, IXGBE_PRC1023);
3135 	sc->stats.prc1522 += IXGBE_READ_REG(hw, IXGBE_PRC1522);
3136 
3137 	lxon = IXGBE_READ_REG(hw, IXGBE_LXONTXC);
3138 	sc->stats.lxontxc += lxon;
3139 	lxoff = IXGBE_READ_REG(hw, IXGBE_LXOFFTXC);
3140 	sc->stats.lxofftxc += lxoff;
3141 	total = lxon + lxoff;
3142 
3143 	sc->stats.gptc += IXGBE_READ_REG(hw, IXGBE_GPTC);
3144 	sc->stats.mptc += IXGBE_READ_REG(hw, IXGBE_MPTC);
3145 	sc->stats.ptc64 += IXGBE_READ_REG(hw, IXGBE_PTC64);
3146 	sc->stats.gptc -= total;
3147 	sc->stats.mptc -= total;
3148 	sc->stats.ptc64 -= total;
3149 	sc->stats.gotc -= total * ETHER_MIN_LEN;
3150 
3151 	sc->stats.ruc += IXGBE_READ_REG(hw, IXGBE_RUC);
3152 	sc->stats.rfc += IXGBE_READ_REG(hw, IXGBE_RFC);
3153 	sc->stats.roc += IXGBE_READ_REG(hw, IXGBE_ROC);
3154 	sc->stats.rjc += IXGBE_READ_REG(hw, IXGBE_RJC);
3155 	sc->stats.mngprc += IXGBE_READ_REG(hw, IXGBE_MNGPRC);
3156 	sc->stats.mngpdc += IXGBE_READ_REG(hw, IXGBE_MNGPDC);
3157 	sc->stats.mngptc += IXGBE_READ_REG(hw, IXGBE_MNGPTC);
3158 	sc->stats.tpr += IXGBE_READ_REG(hw, IXGBE_TPR);
3159 	sc->stats.tpt += IXGBE_READ_REG(hw, IXGBE_TPT);
3160 	sc->stats.ptc127 += IXGBE_READ_REG(hw, IXGBE_PTC127);
3161 	sc->stats.ptc255 += IXGBE_READ_REG(hw, IXGBE_PTC255);
3162 	sc->stats.ptc511 += IXGBE_READ_REG(hw, IXGBE_PTC511);
3163 	sc->stats.ptc1023 += IXGBE_READ_REG(hw, IXGBE_PTC1023);
3164 	sc->stats.ptc1522 += IXGBE_READ_REG(hw, IXGBE_PTC1522);
3165 	sc->stats.bptc += IXGBE_READ_REG(hw, IXGBE_BPTC);
3166 	sc->stats.xec += IXGBE_READ_REG(hw, IXGBE_XEC);
3167 	sc->stats.fccrc += IXGBE_READ_REG(hw, IXGBE_FCCRC);
3168 	sc->stats.fclast += IXGBE_READ_REG(hw, IXGBE_FCLAST);
3169 	/* Only read FCOE on 82599 */
3170 	if (hw->mac.type != ixgbe_mac_82598EB) {
3171 		sc->stats.fcoerpdc += IXGBE_READ_REG(hw, IXGBE_FCOERPDC);
3172 		sc->stats.fcoeprc += IXGBE_READ_REG(hw, IXGBE_FCOEPRC);
3173 		sc->stats.fcoeptc += IXGBE_READ_REG(hw, IXGBE_FCOEPTC);
3174 		sc->stats.fcoedwrc += IXGBE_READ_REG(hw, IXGBE_FCOEDWRC);
3175 		sc->stats.fcoedwtc += IXGBE_READ_REG(hw, IXGBE_FCOEDWTC);
3176 	}
3177 
3178 	/* Rx Errors */
3179 	IFNET_STAT_SET(ifp, iqdrops, total_missed_rx);
3180 	IFNET_STAT_SET(ifp, ierrors, sc->stats.crcerrs + sc->stats.rlec);
3181 }
3182 
3183 #if 0
3184 /*
3185  * Add sysctl variables, one per statistic, to the system.
3186  */
3187 static void
3188 ix_add_hw_stats(struct ix_softc *sc)
3189 {
3190 
3191 	device_t dev = sc->dev;
3192 
3193 	struct ix_tx_ring *txr = sc->tx_rings;
3194 	struct ix_rx_ring *rxr = sc->rx_rings;
3195 
3196 	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
3197 	struct sysctl_oid *tree = device_get_sysctl_tree(dev);
3198 	struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
3199 	struct ixgbe_hw_stats *stats = &sc->stats;
3200 
3201 	struct sysctl_oid *stat_node, *queue_node;
3202 	struct sysctl_oid_list *stat_list, *queue_list;
3203 
3204 #define QUEUE_NAME_LEN 32
3205 	char namebuf[QUEUE_NAME_LEN];
3206 
3207 	/* MAC stats get the own sub node */
3208 
3209 	stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats",
3210 				    CTLFLAG_RD, NULL, "MAC Statistics");
3211 	stat_list = SYSCTL_CHILDREN(stat_node);
3212 
3213 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs",
3214 			CTLFLAG_RD, &stats->crcerrs,
3215 			"CRC Errors");
3216 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "ill_errs",
3217 			CTLFLAG_RD, &stats->illerrc,
3218 			"Illegal Byte Errors");
3219 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "byte_errs",
3220 			CTLFLAG_RD, &stats->errbc,
3221 			"Byte Errors");
3222 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "short_discards",
3223 			CTLFLAG_RD, &stats->mspdc,
3224 			"MAC Short Packets Discarded");
3225 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "local_faults",
3226 			CTLFLAG_RD, &stats->mlfc,
3227 			"MAC Local Faults");
3228 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "remote_faults",
3229 			CTLFLAG_RD, &stats->mrfc,
3230 			"MAC Remote Faults");
3231 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rec_len_errs",
3232 			CTLFLAG_RD, &stats->rlec,
3233 			"Receive Length Errors");
3234 
3235 	/* Flow Control stats */
3236 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_txd",
3237 			CTLFLAG_RD, &stats->lxontxc,
3238 			"Link XON Transmitted");
3239 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
3240 			CTLFLAG_RD, &stats->lxonrxc,
3241 			"Link XON Received");
3242 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
3243 			CTLFLAG_RD, &stats->lxofftxc,
3244 			"Link XOFF Transmitted");
3245 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
3246 			CTLFLAG_RD, &stats->lxoffrxc,
3247 			"Link XOFF Received");
3248 
3249 	/* Packet Reception Stats */
3250 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_octets_rcvd",
3251 			CTLFLAG_RD, &stats->tor,
3252 			"Total Octets Received");
3253 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_rcvd",
3254 			CTLFLAG_RD, &stats->gorc,
3255 			"Good Octets Received");
3256 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_rcvd",
3257 			CTLFLAG_RD, &stats->tpr,
3258 			"Total Packets Received");
3259 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_rcvd",
3260 			CTLFLAG_RD, &stats->gprc,
3261 			"Good Packets Received");
3262 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_rcvd",
3263 			CTLFLAG_RD, &stats->mprc,
3264 			"Multicast Packets Received");
3265 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_rcvd",
3266 			CTLFLAG_RD, &stats->bprc,
3267 			"Broadcast Packets Received");
3268 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
3269 			CTLFLAG_RD, &stats->prc64,
3270 			"64 byte frames received ");
3271 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
3272 			CTLFLAG_RD, &stats->prc127,
3273 			"65-127 byte frames received");
3274 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
3275 			CTLFLAG_RD, &stats->prc255,
3276 			"128-255 byte frames received");
3277 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
3278 			CTLFLAG_RD, &stats->prc511,
3279 			"256-511 byte frames received");
3280 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
3281 			CTLFLAG_RD, &stats->prc1023,
3282 			"512-1023 byte frames received");
3283 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
3284 			CTLFLAG_RD, &stats->prc1522,
3285 			"1023-1522 byte frames received");
3286 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersized",
3287 			CTLFLAG_RD, &stats->ruc,
3288 			"Receive Undersized");
3289 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
3290 			CTLFLAG_RD, &stats->rfc,
3291 			"Fragmented Packets Received ");
3292 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversized",
3293 			CTLFLAG_RD, &stats->roc,
3294 			"Oversized Packets Received");
3295 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabberd",
3296 			CTLFLAG_RD, &stats->rjc,
3297 			"Received Jabber");
3298 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "management_pkts_rcvd",
3299 			CTLFLAG_RD, &stats->mngprc,
3300 			"Management Packets Received");
3301 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "management_pkts_drpd",
3302 			CTLFLAG_RD, &stats->mngptc,
3303 			"Management Packets Dropped");
3304 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "checksum_errs",
3305 			CTLFLAG_RD, &stats->xec,
3306 			"Checksum Errors");
3307 
3308 	/* Packet Transmission Stats */
3309 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
3310 			CTLFLAG_RD, &stats->gotc,
3311 			"Good Octets Transmitted");
3312 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
3313 			CTLFLAG_RD, &stats->tpt,
3314 			"Total Packets Transmitted");
3315 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
3316 			CTLFLAG_RD, &stats->gptc,
3317 			"Good Packets Transmitted");
3318 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
3319 			CTLFLAG_RD, &stats->bptc,
3320 			"Broadcast Packets Transmitted");
3321 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
3322 			CTLFLAG_RD, &stats->mptc,
3323 			"Multicast Packets Transmitted");
3324 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "management_pkts_txd",
3325 			CTLFLAG_RD, &stats->mngptc,
3326 			"Management Packets Transmitted");
3327 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
3328 			CTLFLAG_RD, &stats->ptc64,
3329 			"64 byte frames transmitted ");
3330 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
3331 			CTLFLAG_RD, &stats->ptc127,
3332 			"65-127 byte frames transmitted");
3333 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
3334 			CTLFLAG_RD, &stats->ptc255,
3335 			"128-255 byte frames transmitted");
3336 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
3337 			CTLFLAG_RD, &stats->ptc511,
3338 			"256-511 byte frames transmitted");
3339 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
3340 			CTLFLAG_RD, &stats->ptc1023,
3341 			"512-1023 byte frames transmitted");
3342 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
3343 			CTLFLAG_RD, &stats->ptc1522,
3344 			"1024-1522 byte frames transmitted");
3345 }
3346 #endif
3347 
3348 /*
3349  * Enable the hardware to drop packets when the buffer is full.
3350  * This is useful when multiple RX rings are used, so that no
3351  * single RX ring being full stalls the entire RX engine.  We
3352  * only enable this when multiple RX rings are used and when
3353  * flow control is disabled.
3354  */
3355 static void
3356 ix_enable_rx_drop(struct ix_softc *sc)
3357 {
3358 	struct ixgbe_hw *hw = &sc->hw;
3359 	int i;
3360 
3361 	if (bootverbose) {
3362 		if_printf(&sc->arpcom.ac_if,
3363 		    "flow control %d, enable RX drop\n", sc->fc);
3364 	}
3365 
3366 	for (i = 0; i < sc->rx_ring_inuse; ++i) {
3367 		uint32_t srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i));
3368 
3369 		srrctl |= IXGBE_SRRCTL_DROP_EN;
3370 		IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(i), srrctl);
3371 	}
3372 }
3373 
3374 static void
3375 ix_disable_rx_drop(struct ix_softc *sc)
3376 {
3377 	struct ixgbe_hw *hw = &sc->hw;
3378 	int i;
3379 
3380 	if (bootverbose) {
3381 		if_printf(&sc->arpcom.ac_if,
3382 		    "flow control %d, disable RX drop\n", sc->fc);
3383 	}
3384 
3385 	for (i = 0; i < sc->rx_ring_inuse; ++i) {
3386 		uint32_t srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i));
3387 
3388 		srrctl &= ~IXGBE_SRRCTL_DROP_EN;
3389 		IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(i), srrctl);
3390 	}
3391 }
3392 
3393 static int
3394 ix_sysctl_flowctrl(SYSCTL_HANDLER_ARGS)
3395 {
3396 	struct ix_softc *sc = (struct ix_softc *)arg1;
3397 	struct ifnet *ifp = &sc->arpcom.ac_if;
3398 	int error, fc;
3399 
3400 	fc = sc->fc;
3401 	error = sysctl_handle_int(oidp, &fc, 0, req);
3402 	if (error || req->newptr == NULL)
3403 		return error;
3404 
3405 	switch (fc) {
3406 	case ixgbe_fc_rx_pause:
3407 	case ixgbe_fc_tx_pause:
3408 	case ixgbe_fc_full:
3409 	case ixgbe_fc_none:
3410 		break;
3411 	default:
3412 		return EINVAL;
3413 	}
3414 
3415 	ifnet_serialize_all(ifp);
3416 
3417 	/* Don't bother if it's not changed */
3418 	if (sc->fc == fc)
3419 		goto done;
3420 	sc->fc = fc;
3421 
3422 	/* Don't do anything, if the interface is not up yet */
3423 	if ((ifp->if_flags & IFF_RUNNING) == 0)
3424 		goto done;
3425 
3426 	if (sc->rx_ring_inuse > 1) {
3427 		switch (sc->fc) {
3428 		case ixgbe_fc_rx_pause:
3429 		case ixgbe_fc_tx_pause:
3430 		case ixgbe_fc_full:
3431 			ix_disable_rx_drop(sc);
3432 			break;
3433 
3434 		case ixgbe_fc_none:
3435 			ix_enable_rx_drop(sc);
3436 			break;
3437 
3438 		default:
3439 			panic("leading fc check mismatch");
3440 		}
3441 	}
3442 
3443 	sc->hw.fc.requested_mode = sc->fc;
3444 	/* Don't autoneg if forcing a value */
3445 	sc->hw.fc.disable_fc_autoneg = TRUE;
3446 	ixgbe_fc_enable(&sc->hw);
3447 
3448 done:
3449 	ifnet_deserialize_all(ifp);
3450 	return error;
3451 }
3452 
3453 #ifdef foo
3454 /* XXX not working properly w/ 82599 connected w/ DAC */
3455 /* XXX only work after the interface is up */
3456 static int
3457 ix_sysctl_advspeed(SYSCTL_HANDLER_ARGS)
3458 {
3459 	struct ix_softc *sc = (struct ix_softc *)arg1;
3460 	struct ifnet *ifp = &sc->arpcom.ac_if;
3461 	struct ixgbe_hw *hw = &sc->hw;
3462 	ixgbe_link_speed speed;
3463 	int error, advspeed;
3464 
3465 	advspeed = sc->advspeed;
3466 	error = sysctl_handle_int(oidp, &advspeed, 0, req);
3467 	if (error || req->newptr == NULL)
3468 		return error;
3469 
3470 	if (!(hw->phy.media_type == ixgbe_media_type_copper ||
3471 	    hw->phy.multispeed_fiber))
3472 		return EOPNOTSUPP;
3473 	if (hw->mac.ops.setup_link == NULL)
3474 		return EOPNOTSUPP;
3475 
3476 	switch (advspeed) {
3477 	case 0:	/* auto */
3478 		speed = IXGBE_LINK_SPEED_UNKNOWN;
3479 		break;
3480 
3481 	case 1:	/* 1Gb */
3482 		speed = IXGBE_LINK_SPEED_1GB_FULL;
3483 		break;
3484 
3485 	case 2:	/* 100Mb */
3486 		speed = IXGBE_LINK_SPEED_100_FULL;
3487 		break;
3488 
3489 	case 3:	/* 1Gb/10Gb */
3490 		speed = IXGBE_LINK_SPEED_1GB_FULL |
3491 		    IXGBE_LINK_SPEED_10GB_FULL;
3492 		break;
3493 
3494 	default:
3495 		return EINVAL;
3496 	}
3497 
3498 	ifnet_serialize_all(ifp);
3499 
3500 	if (sc->advspeed == advspeed) /* no change */
3501 		goto done;
3502 
3503 	if ((speed & IXGBE_LINK_SPEED_100_FULL) &&
3504 	    hw->mac.type != ixgbe_mac_X540) {
3505 		error = EOPNOTSUPP;
3506 		goto done;
3507 	}
3508 
3509 	sc->advspeed = advspeed;
3510 
3511 	if ((ifp->if_flags & IFF_RUNNING) == 0)
3512 		goto done;
3513 
3514 	if (speed == IXGBE_LINK_SPEED_UNKNOWN) {
3515 		ix_config_link(sc);
3516 	} else {
3517 		hw->mac.autotry_restart = TRUE;
3518 		hw->mac.ops.setup_link(hw, speed, sc->link_up);
3519 	}
3520 
3521 done:
3522 	ifnet_deserialize_all(ifp);
3523 	return error;
3524 }
3525 #endif
3526 
3527 static void
3528 ix_setup_serialize(struct ix_softc *sc)
3529 {
3530 	int i = 0, j;
3531 
3532 	/* Main + RX + TX */
3533 	sc->nserialize = 1 + sc->rx_ring_cnt + sc->tx_ring_cnt;
3534 	sc->serializes =
3535 	    kmalloc(sc->nserialize * sizeof(struct lwkt_serialize *),
3536 	        M_DEVBUF, M_WAITOK | M_ZERO);
3537 
3538 	/*
3539 	 * Setup serializes
3540 	 *
3541 	 * NOTE: Order is critical
3542 	 */
3543 
3544 	KKASSERT(i < sc->nserialize);
3545 	sc->serializes[i++] = &sc->main_serialize;
3546 
3547 	for (j = 0; j < sc->rx_ring_cnt; ++j) {
3548 		KKASSERT(i < sc->nserialize);
3549 		sc->serializes[i++] = &sc->rx_rings[j].rx_serialize;
3550 	}
3551 
3552 	for (j = 0; j < sc->tx_ring_cnt; ++j) {
3553 		KKASSERT(i < sc->nserialize);
3554 		sc->serializes[i++] = &sc->tx_rings[j].tx_serialize;
3555 	}
3556 
3557 	KKASSERT(i == sc->nserialize);
3558 }
3559 
3560 static int
3561 ix_alloc_intr(struct ix_softc *sc)
3562 {
3563 	struct ix_intr_data *intr;
3564 	u_int intr_flags;
3565 
3566 	ix_alloc_msix(sc);
3567 	if (sc->intr_type == PCI_INTR_TYPE_MSIX) {
3568 		ix_set_ring_inuse(sc, FALSE);
3569 		return 0;
3570 	}
3571 
3572 	if (sc->intr_data != NULL)
3573 		kfree(sc->intr_data, M_DEVBUF);
3574 
3575 	sc->intr_cnt = 1;
3576 	sc->intr_data = kmalloc(sizeof(struct ix_intr_data), M_DEVBUF,
3577 	    M_WAITOK | M_ZERO);
3578 	intr = &sc->intr_data[0];
3579 
3580 	/*
3581 	 * Allocate MSI/legacy interrupt resource
3582 	 */
3583 	sc->intr_type = pci_alloc_1intr(sc->dev, ix_msi_enable,
3584 	    &intr->intr_rid, &intr_flags);
3585 
3586 	intr->intr_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ,
3587 	    &intr->intr_rid, intr_flags);
3588 	if (intr->intr_res == NULL) {
3589 		device_printf(sc->dev, "Unable to allocate bus resource: "
3590 		    "interrupt\n");
3591 		return ENXIO;
3592 	}
3593 
3594 	intr->intr_serialize = &sc->main_serialize;
3595 	intr->intr_cpuid = rman_get_cpuid(intr->intr_res);
3596 	intr->intr_func = ix_intr;
3597 	intr->intr_funcarg = sc;
3598 	intr->intr_rate = IX_INTR_RATE;
3599 	intr->intr_use = IX_INTR_USE_RXTX;
3600 
3601 	sc->tx_rings[0].tx_intr_cpuid = intr->intr_cpuid;
3602 	sc->tx_rings[0].tx_intr_vec = IX_TX_INTR_VEC;
3603 
3604 	sc->rx_rings[0].rx_intr_vec = IX_RX0_INTR_VEC;
3605 
3606 	ix_set_ring_inuse(sc, FALSE);
3607 
3608 	KKASSERT(sc->rx_ring_inuse <= IX_MIN_RXRING_RSS);
3609 	if (sc->rx_ring_inuse == IX_MIN_RXRING_RSS)
3610 		sc->rx_rings[1].rx_intr_vec = IX_RX1_INTR_VEC;
3611 
3612 	return 0;
3613 }
3614 
3615 static void
3616 ix_free_intr(struct ix_softc *sc)
3617 {
3618 	if (sc->intr_data == NULL)
3619 		return;
3620 
3621 	if (sc->intr_type != PCI_INTR_TYPE_MSIX) {
3622 		struct ix_intr_data *intr = &sc->intr_data[0];
3623 
3624 		KKASSERT(sc->intr_cnt == 1);
3625 		if (intr->intr_res != NULL) {
3626 			bus_release_resource(sc->dev, SYS_RES_IRQ,
3627 			    intr->intr_rid, intr->intr_res);
3628 		}
3629 		if (sc->intr_type == PCI_INTR_TYPE_MSI)
3630 			pci_release_msi(sc->dev);
3631 
3632 		kfree(sc->intr_data, M_DEVBUF);
3633 	} else {
3634 		ix_free_msix(sc, TRUE);
3635 	}
3636 }
3637 
3638 static void
3639 ix_set_ring_inuse(struct ix_softc *sc, boolean_t polling)
3640 {
3641 	sc->rx_ring_inuse = ix_get_rxring_inuse(sc, polling);
3642 	sc->tx_ring_inuse = ix_get_txring_inuse(sc, polling);
3643 	if (bootverbose) {
3644 		if_printf(&sc->arpcom.ac_if,
3645 		    "RX rings %d/%d, TX rings %d/%d\n",
3646 		    sc->rx_ring_inuse, sc->rx_ring_cnt,
3647 		    sc->tx_ring_inuse, sc->tx_ring_cnt);
3648 	}
3649 }
3650 
3651 static int
3652 ix_get_rxring_inuse(const struct ix_softc *sc, boolean_t polling)
3653 {
3654 	if (!IX_ENABLE_HWRSS(sc))
3655 		return 1;
3656 
3657 	if (polling)
3658 		return sc->rx_ring_cnt;
3659 	else if (sc->intr_type != PCI_INTR_TYPE_MSIX)
3660 		return IX_MIN_RXRING_RSS;
3661 	else
3662 		return sc->rx_ring_msix;
3663 }
3664 
3665 static int
3666 ix_get_txring_inuse(const struct ix_softc *sc, boolean_t polling)
3667 {
3668 	if (!IX_ENABLE_HWTSS(sc))
3669 		return 1;
3670 
3671 	if (polling)
3672 		return sc->tx_ring_cnt;
3673 	else if (sc->intr_type != PCI_INTR_TYPE_MSIX)
3674 		return 1;
3675 	else
3676 		return sc->tx_ring_msix;
3677 }
3678 
3679 static int
3680 ix_setup_intr(struct ix_softc *sc)
3681 {
3682 	int i;
3683 
3684 	for (i = 0; i < sc->intr_cnt; ++i) {
3685 		struct ix_intr_data *intr = &sc->intr_data[i];
3686 		int error;
3687 
3688 		error = bus_setup_intr_descr(sc->dev, intr->intr_res,
3689 		    INTR_MPSAFE, intr->intr_func, intr->intr_funcarg,
3690 		    &intr->intr_hand, intr->intr_serialize, intr->intr_desc);
3691 		if (error) {
3692 			device_printf(sc->dev, "can't setup %dth intr\n", i);
3693 			ix_teardown_intr(sc, i);
3694 			return error;
3695 		}
3696 	}
3697 	return 0;
3698 }
3699 
3700 static void
3701 ix_teardown_intr(struct ix_softc *sc, int intr_cnt)
3702 {
3703 	int i;
3704 
3705 	if (sc->intr_data == NULL)
3706 		return;
3707 
3708 	for (i = 0; i < intr_cnt; ++i) {
3709 		struct ix_intr_data *intr = &sc->intr_data[i];
3710 
3711 		bus_teardown_intr(sc->dev, intr->intr_res, intr->intr_hand);
3712 	}
3713 }
3714 
3715 static void
3716 ix_serialize(struct ifnet *ifp, enum ifnet_serialize slz)
3717 {
3718 	struct ix_softc *sc = ifp->if_softc;
3719 
3720 	ifnet_serialize_array_enter(sc->serializes, sc->nserialize, slz);
3721 }
3722 
3723 static void
3724 ix_deserialize(struct ifnet *ifp, enum ifnet_serialize slz)
3725 {
3726 	struct ix_softc *sc = ifp->if_softc;
3727 
3728 	ifnet_serialize_array_exit(sc->serializes, sc->nserialize, slz);
3729 }
3730 
3731 static int
3732 ix_tryserialize(struct ifnet *ifp, enum ifnet_serialize slz)
3733 {
3734 	struct ix_softc *sc = ifp->if_softc;
3735 
3736 	return ifnet_serialize_array_try(sc->serializes, sc->nserialize, slz);
3737 }
3738 
3739 #ifdef INVARIANTS
3740 
3741 static void
3742 ix_serialize_assert(struct ifnet *ifp, enum ifnet_serialize slz,
3743     boolean_t serialized)
3744 {
3745 	struct ix_softc *sc = ifp->if_softc;
3746 
3747 	ifnet_serialize_array_assert(sc->serializes, sc->nserialize, slz,
3748 	    serialized);
3749 }
3750 
3751 #endif	/* INVARIANTS */
3752 
3753 static void
3754 ix_free_rings(struct ix_softc *sc)
3755 {
3756 	int i;
3757 
3758 	if (sc->tx_rings != NULL) {
3759 		for (i = 0; i < sc->tx_ring_cnt; ++i) {
3760 			struct ix_tx_ring *txr = &sc->tx_rings[i];
3761 
3762 			ix_destroy_tx_ring(txr, txr->tx_ndesc);
3763 		}
3764 		kfree(sc->tx_rings, M_DEVBUF);
3765 	}
3766 
3767 	if (sc->rx_rings != NULL) {
3768 		for (i =0; i < sc->rx_ring_cnt; ++i) {
3769 			struct ix_rx_ring *rxr = &sc->rx_rings[i];
3770 
3771 			ix_destroy_rx_ring(rxr, rxr->rx_ndesc);
3772 		}
3773 		kfree(sc->rx_rings, M_DEVBUF);
3774 	}
3775 
3776 	if (sc->parent_tag != NULL)
3777 		bus_dma_tag_destroy(sc->parent_tag);
3778 }
3779 
3780 static void
3781 ix_watchdog(struct ifaltq_subque *ifsq)
3782 {
3783 	struct ix_tx_ring *txr = ifsq_get_priv(ifsq);
3784 	struct ifnet *ifp = ifsq_get_ifp(ifsq);
3785 	struct ix_softc *sc = ifp->if_softc;
3786 	int i;
3787 
3788 	KKASSERT(txr->tx_ifsq == ifsq);
3789 	ASSERT_IFNET_SERIALIZED_ALL(ifp);
3790 
3791 	/*
3792 	 * If the interface has been paused then don't do the watchdog check
3793 	 */
3794 	if (IXGBE_READ_REG(&sc->hw, IXGBE_TFCS) & IXGBE_TFCS_TXOFF) {
3795 		txr->tx_watchdog.wd_timer = 5;
3796 		return;
3797 	}
3798 
3799 	if_printf(ifp, "Watchdog timeout -- resetting\n");
3800 	if_printf(ifp, "Queue(%d) tdh = %d, hw tdt = %d\n", txr->tx_idx,
3801 	    IXGBE_READ_REG(&sc->hw, IXGBE_TDH(txr->tx_idx)),
3802 	    IXGBE_READ_REG(&sc->hw, IXGBE_TDT(txr->tx_idx)));
3803 	if_printf(ifp, "TX(%d) desc avail = %d, next TX to Clean = %d\n",
3804 	    txr->tx_idx, txr->tx_avail, txr->tx_next_clean);
3805 
3806 	ix_init(sc);
3807 	for (i = 0; i < sc->tx_ring_inuse; ++i)
3808 		ifsq_devstart_sched(sc->tx_rings[i].tx_ifsq);
3809 }
3810 
3811 static void
3812 ix_free_tx_ring(struct ix_tx_ring *txr)
3813 {
3814 	int i;
3815 
3816 	for (i = 0; i < txr->tx_ndesc; ++i) {
3817 		struct ix_tx_buf *txbuf = &txr->tx_buf[i];
3818 
3819 		if (txbuf->m_head != NULL) {
3820 			bus_dmamap_unload(txr->tx_tag, txbuf->map);
3821 			m_freem(txbuf->m_head);
3822 			txbuf->m_head = NULL;
3823 		}
3824 	}
3825 }
3826 
3827 static void
3828 ix_free_rx_ring(struct ix_rx_ring *rxr)
3829 {
3830 	int i;
3831 
3832 	for (i = 0; i < rxr->rx_ndesc; ++i) {
3833 		struct ix_rx_buf *rxbuf = &rxr->rx_buf[i];
3834 
3835 		if (rxbuf->fmp != NULL) {
3836 			m_freem(rxbuf->fmp);
3837 			rxbuf->fmp = NULL;
3838 			rxbuf->lmp = NULL;
3839 		} else {
3840 			KKASSERT(rxbuf->lmp == NULL);
3841 		}
3842 		if (rxbuf->m_head != NULL) {
3843 			bus_dmamap_unload(rxr->rx_tag, rxbuf->map);
3844 			m_freem(rxbuf->m_head);
3845 			rxbuf->m_head = NULL;
3846 		}
3847 	}
3848 }
3849 
3850 static int
3851 ix_newbuf(struct ix_rx_ring *rxr, int i, boolean_t wait)
3852 {
3853 	struct mbuf *m;
3854 	bus_dma_segment_t seg;
3855 	bus_dmamap_t map;
3856 	struct ix_rx_buf *rxbuf;
3857 	int flags, error, nseg;
3858 
3859 	flags = MB_DONTWAIT;
3860 	if (__predict_false(wait))
3861 		flags = MB_WAIT;
3862 
3863 	m = m_getjcl(flags, MT_DATA, M_PKTHDR, rxr->rx_mbuf_sz);
3864 	if (m == NULL) {
3865 		if (wait) {
3866 			if_printf(&rxr->rx_sc->arpcom.ac_if,
3867 			    "Unable to allocate RX mbuf\n");
3868 		}
3869 		return ENOBUFS;
3870 	}
3871 	m->m_len = m->m_pkthdr.len = rxr->rx_mbuf_sz;
3872 
3873 	error = bus_dmamap_load_mbuf_segment(rxr->rx_tag,
3874 	    rxr->rx_sparemap, m, &seg, 1, &nseg, BUS_DMA_NOWAIT);
3875 	if (error) {
3876 		m_freem(m);
3877 		if (wait) {
3878 			if_printf(&rxr->rx_sc->arpcom.ac_if,
3879 			    "Unable to load RX mbuf\n");
3880 		}
3881 		return error;
3882 	}
3883 
3884 	rxbuf = &rxr->rx_buf[i];
3885 	if (rxbuf->m_head != NULL)
3886 		bus_dmamap_unload(rxr->rx_tag, rxbuf->map);
3887 
3888 	map = rxbuf->map;
3889 	rxbuf->map = rxr->rx_sparemap;
3890 	rxr->rx_sparemap = map;
3891 
3892 	rxbuf->m_head = m;
3893 	rxbuf->paddr = seg.ds_addr;
3894 
3895 	ix_setup_rxdesc(&rxr->rx_base[i], rxbuf);
3896 	return 0;
3897 }
3898 
3899 static void
3900 ix_add_sysctl(struct ix_softc *sc)
3901 {
3902 #ifdef IX_RSS_DEBUG
3903 	char node[32];
3904 	int i;
3905 #endif
3906 
3907 	sysctl_ctx_init(&sc->sysctl_ctx);
3908 	sc->sysctl_tree = SYSCTL_ADD_NODE(&sc->sysctl_ctx,
3909 	    SYSCTL_STATIC_CHILDREN(_hw), OID_AUTO,
3910 	    device_get_nameunit(sc->dev), CTLFLAG_RD, 0, "");
3911 	if (sc->sysctl_tree == NULL) {
3912 		device_printf(sc->dev, "can't add sysctl node\n");
3913 		return;
3914 	}
3915 
3916 	SYSCTL_ADD_INT(&sc->sysctl_ctx, SYSCTL_CHILDREN(sc->sysctl_tree),
3917 	    OID_AUTO, "rxr", CTLFLAG_RD, &sc->rx_ring_cnt, 0, "# of RX rings");
3918 	SYSCTL_ADD_INT(&sc->sysctl_ctx, SYSCTL_CHILDREN(sc->sysctl_tree),
3919 	    OID_AUTO, "rxr_inuse", CTLFLAG_RD, &sc->rx_ring_inuse, 0,
3920 	    "# of RX rings used");
3921 	SYSCTL_ADD_INT(&sc->sysctl_ctx, SYSCTL_CHILDREN(sc->sysctl_tree),
3922 	    OID_AUTO, "txr", CTLFLAG_RD, &sc->tx_ring_cnt, 0, "# of TX rings");
3923 	SYSCTL_ADD_INT(&sc->sysctl_ctx, SYSCTL_CHILDREN(sc->sysctl_tree),
3924 	    OID_AUTO, "txr_inuse", CTLFLAG_RD, &sc->tx_ring_inuse, 0,
3925 	    "# of TX rings used");
3926 	SYSCTL_ADD_PROC(&sc->sysctl_ctx, SYSCTL_CHILDREN(sc->sysctl_tree),
3927 	    OID_AUTO, "rxd", CTLTYPE_INT | CTLFLAG_RD,
3928 	    sc, 0, ix_sysctl_rxd, "I",
3929 	    "# of RX descs");
3930 	SYSCTL_ADD_PROC(&sc->sysctl_ctx, SYSCTL_CHILDREN(sc->sysctl_tree),
3931 	    OID_AUTO, "txd", CTLTYPE_INT | CTLFLAG_RD,
3932 	    sc, 0, ix_sysctl_txd, "I",
3933 	    "# of TX descs");
3934 	SYSCTL_ADD_PROC(&sc->sysctl_ctx, SYSCTL_CHILDREN(sc->sysctl_tree),
3935 	    OID_AUTO, "tx_wreg_nsegs", CTLTYPE_INT | CTLFLAG_RW,
3936 	    sc, 0, ix_sysctl_tx_wreg_nsegs, "I",
3937 	    "# of segments sent before write to hardware register");
3938 	SYSCTL_ADD_PROC(&sc->sysctl_ctx, SYSCTL_CHILDREN(sc->sysctl_tree),
3939 	    OID_AUTO, "rx_wreg_nsegs", CTLTYPE_INT | CTLFLAG_RW,
3940 	    sc, 0, ix_sysctl_rx_wreg_nsegs, "I",
3941 	    "# of received segments sent before write to hardware register");
3942 	SYSCTL_ADD_PROC(&sc->sysctl_ctx, SYSCTL_CHILDREN(sc->sysctl_tree),
3943 	    OID_AUTO, "tx_intr_nsegs", CTLTYPE_INT | CTLFLAG_RW,
3944 	    sc, 0, ix_sysctl_tx_intr_nsegs, "I",
3945 	    "# of segments per TX interrupt");
3946 
3947 #define IX_ADD_INTR_RATE_SYSCTL(sc, use, name) \
3948 do { \
3949 	ix_add_intr_rate_sysctl(sc, IX_INTR_USE_##use, #name, \
3950 	    ix_sysctl_##name, #use " interrupt rate"); \
3951 } while (0)
3952 
3953 	IX_ADD_INTR_RATE_SYSCTL(sc, RXTX, rxtx_intr_rate);
3954 	IX_ADD_INTR_RATE_SYSCTL(sc, RX, rx_intr_rate);
3955 	IX_ADD_INTR_RATE_SYSCTL(sc, TX, tx_intr_rate);
3956 	IX_ADD_INTR_RATE_SYSCTL(sc, STATUS, sts_intr_rate);
3957 
3958 #undef IX_ADD_INTR_RATE_SYSCTL
3959 
3960 #ifdef IX_RSS_DEBUG
3961 	SYSCTL_ADD_INT(&sc->sysctl_ctx, SYSCTL_CHILDREN(sc->sysctl_tree),
3962 	    OID_AUTO, "rss_debug", CTLFLAG_RW, &sc->rss_debug, 0,
3963 	    "RSS debug level");
3964 	for (i = 0; i < sc->rx_ring_cnt; ++i) {
3965 		ksnprintf(node, sizeof(node), "rx%d_pkt", i);
3966 		SYSCTL_ADD_ULONG(&sc->sysctl_ctx,
3967 		    SYSCTL_CHILDREN(sc->sysctl_tree), OID_AUTO, node,
3968 		    CTLFLAG_RW, &sc->rx_rings[i].rx_pkts, "RXed packets");
3969 	}
3970 #endif
3971 
3972 	SYSCTL_ADD_PROC(&sc->sysctl_ctx, SYSCTL_CHILDREN(sc->sysctl_tree),
3973 	    OID_AUTO, "flowctrl", CTLTYPE_INT | CTLFLAG_RW,
3974 	    sc, 0, ix_sysctl_flowctrl, "I",
3975 	    "flow control, 0 - off, 1 - rx pause, 2 - tx pause, 3 - full");
3976 
3977 #ifdef foo
3978 	/*
3979 	 * Allow a kind of speed control by forcing the autoneg
3980 	 * advertised speed list to only a certain value, this
3981 	 * supports 1G on 82599 devices, and 100Mb on X540.
3982 	 */
3983 	SYSCTL_ADD_PROC(&sc->sysctl_ctx, SYSCTL_CHILDREN(sc->sysctl_tree),
3984 	    OID_AUTO, "advspeed", CTLTYPE_INT | CTLFLAG_RW,
3985 	    sc, 0, ix_sysctl_advspeed, "I",
3986 	    "advertised link speed, "
3987 	    "0 - auto, 1 - 1Gb, 2 - 100Mb, 3 - 1Gb/10Gb");
3988 #endif
3989 
3990 #if 0
3991 	ix_add_hw_stats(sc);
3992 #endif
3993 
3994 }
3995 
3996 static int
3997 ix_sysctl_tx_wreg_nsegs(SYSCTL_HANDLER_ARGS)
3998 {
3999 	struct ix_softc *sc = (void *)arg1;
4000 	struct ifnet *ifp = &sc->arpcom.ac_if;
4001 	int error, nsegs, i;
4002 
4003 	nsegs = sc->tx_rings[0].tx_wreg_nsegs;
4004 	error = sysctl_handle_int(oidp, &nsegs, 0, req);
4005 	if (error || req->newptr == NULL)
4006 		return error;
4007 	if (nsegs < 0)
4008 		return EINVAL;
4009 
4010 	ifnet_serialize_all(ifp);
4011 	for (i = 0; i < sc->tx_ring_cnt; ++i)
4012 		sc->tx_rings[i].tx_wreg_nsegs = nsegs;
4013 	ifnet_deserialize_all(ifp);
4014 
4015 	return 0;
4016 }
4017 
4018 static int
4019 ix_sysctl_rx_wreg_nsegs(SYSCTL_HANDLER_ARGS)
4020 {
4021 	struct ix_softc *sc = (void *)arg1;
4022 	struct ifnet *ifp = &sc->arpcom.ac_if;
4023 	int error, nsegs, i;
4024 
4025 	nsegs = sc->rx_rings[0].rx_wreg_nsegs;
4026 	error = sysctl_handle_int(oidp, &nsegs, 0, req);
4027 	if (error || req->newptr == NULL)
4028 		return error;
4029 	if (nsegs < 0)
4030 		return EINVAL;
4031 
4032 	ifnet_serialize_all(ifp);
4033 	for (i = 0; i < sc->rx_ring_cnt; ++i)
4034 		sc->rx_rings[i].rx_wreg_nsegs =nsegs;
4035 	ifnet_deserialize_all(ifp);
4036 
4037 	return 0;
4038 }
4039 
4040 static int
4041 ix_sysctl_txd(SYSCTL_HANDLER_ARGS)
4042 {
4043 	struct ix_softc *sc = (void *)arg1;
4044 	int txd;
4045 
4046 	txd = sc->tx_rings[0].tx_ndesc;
4047 	return sysctl_handle_int(oidp, &txd, 0, req);
4048 }
4049 
4050 static int
4051 ix_sysctl_rxd(SYSCTL_HANDLER_ARGS)
4052 {
4053 	struct ix_softc *sc = (void *)arg1;
4054 	int rxd;
4055 
4056 	rxd = sc->rx_rings[0].rx_ndesc;
4057 	return sysctl_handle_int(oidp, &rxd, 0, req);
4058 }
4059 
4060 static int
4061 ix_sysctl_tx_intr_nsegs(SYSCTL_HANDLER_ARGS)
4062 {
4063 	struct ix_softc *sc = (void *)arg1;
4064 	struct ifnet *ifp = &sc->arpcom.ac_if;
4065 	struct ix_tx_ring *txr = &sc->tx_rings[0];
4066 	int error, nsegs;
4067 
4068 	nsegs = txr->tx_intr_nsegs;
4069 	error = sysctl_handle_int(oidp, &nsegs, 0, req);
4070 	if (error || req->newptr == NULL)
4071 		return error;
4072 	if (nsegs < 0)
4073 		return EINVAL;
4074 
4075 	ifnet_serialize_all(ifp);
4076 
4077 	if (nsegs >= txr->tx_ndesc - IX_MAX_SCATTER - IX_TX_RESERVED) {
4078 		error = EINVAL;
4079 	} else {
4080 		int i;
4081 
4082 		error = 0;
4083 		for (i = 0; i < sc->tx_ring_cnt; ++i)
4084 			sc->tx_rings[i].tx_intr_nsegs = nsegs;
4085 	}
4086 
4087 	ifnet_deserialize_all(ifp);
4088 
4089 	return error;
4090 }
4091 
4092 static void
4093 ix_set_eitr(struct ix_softc *sc, int idx, int rate)
4094 {
4095 	uint32_t eitr, eitr_intvl;
4096 
4097 	eitr = IXGBE_READ_REG(&sc->hw, IXGBE_EITR(idx));
4098 	eitr_intvl = 1000000000 / 256 / rate;
4099 
4100 	if (sc->hw.mac.type == ixgbe_mac_82598EB) {
4101 		eitr &= ~IX_EITR_INTVL_MASK_82598;
4102 		if (eitr_intvl == 0)
4103 			eitr_intvl = 1;
4104 		else if (eitr_intvl > IX_EITR_INTVL_MASK_82598)
4105 			eitr_intvl = IX_EITR_INTVL_MASK_82598;
4106 	} else {
4107 		eitr &= ~IX_EITR_INTVL_MASK;
4108 
4109 		eitr_intvl &= ~IX_EITR_INTVL_RSVD_MASK;
4110 		if (eitr_intvl == 0)
4111 			eitr_intvl = IX_EITR_INTVL_MIN;
4112 		else if (eitr_intvl > IX_EITR_INTVL_MAX)
4113 			eitr_intvl = IX_EITR_INTVL_MAX;
4114 	}
4115 	eitr |= eitr_intvl;
4116 
4117 	IXGBE_WRITE_REG(&sc->hw, IXGBE_EITR(idx), eitr);
4118 }
4119 
4120 static int
4121 ix_sysctl_rxtx_intr_rate(SYSCTL_HANDLER_ARGS)
4122 {
4123 	return ix_sysctl_intr_rate(oidp, arg1, arg2, req, IX_INTR_USE_RXTX);
4124 }
4125 
4126 static int
4127 ix_sysctl_rx_intr_rate(SYSCTL_HANDLER_ARGS)
4128 {
4129 	return ix_sysctl_intr_rate(oidp, arg1, arg2, req, IX_INTR_USE_RX);
4130 }
4131 
4132 static int
4133 ix_sysctl_tx_intr_rate(SYSCTL_HANDLER_ARGS)
4134 {
4135 	return ix_sysctl_intr_rate(oidp, arg1, arg2, req, IX_INTR_USE_TX);
4136 }
4137 
4138 static int
4139 ix_sysctl_sts_intr_rate(SYSCTL_HANDLER_ARGS)
4140 {
4141 	return ix_sysctl_intr_rate(oidp, arg1, arg2, req, IX_INTR_USE_STATUS);
4142 }
4143 
4144 static int
4145 ix_sysctl_intr_rate(SYSCTL_HANDLER_ARGS, int use)
4146 {
4147 	struct ix_softc *sc = (void *)arg1;
4148 	struct ifnet *ifp = &sc->arpcom.ac_if;
4149 	int error, rate, i;
4150 
4151 	rate = 0;
4152 	for (i = 0; i < sc->intr_cnt; ++i) {
4153 		if (sc->intr_data[i].intr_use == use) {
4154 			rate = sc->intr_data[i].intr_rate;
4155 			break;
4156 		}
4157 	}
4158 
4159 	error = sysctl_handle_int(oidp, &rate, 0, req);
4160 	if (error || req->newptr == NULL)
4161 		return error;
4162 	if (rate <= 0)
4163 		return EINVAL;
4164 
4165 	ifnet_serialize_all(ifp);
4166 
4167 	for (i = 0; i < sc->intr_cnt; ++i) {
4168 		if (sc->intr_data[i].intr_use == use) {
4169 			sc->intr_data[i].intr_rate = rate;
4170 			if (ifp->if_flags & IFF_RUNNING)
4171 				ix_set_eitr(sc, i, rate);
4172 		}
4173 	}
4174 
4175 	ifnet_deserialize_all(ifp);
4176 
4177 	return error;
4178 }
4179 
4180 static void
4181 ix_add_intr_rate_sysctl(struct ix_softc *sc, int use,
4182     const char *name, int (*handler)(SYSCTL_HANDLER_ARGS), const char *desc)
4183 {
4184 	int i;
4185 
4186 	for (i = 0; i < sc->intr_cnt; ++i) {
4187 		if (sc->intr_data[i].intr_use == use) {
4188 			SYSCTL_ADD_PROC(&sc->sysctl_ctx,
4189 			    SYSCTL_CHILDREN(sc->sysctl_tree),
4190 			    OID_AUTO, name, CTLTYPE_INT | CTLFLAG_RW,
4191 			    sc, 0, handler, "I", desc);
4192 			break;
4193 		}
4194 	}
4195 }
4196 
4197 static void
4198 ix_set_timer_cpuid(struct ix_softc *sc, boolean_t polling)
4199 {
4200 	if (polling || sc->intr_type == PCI_INTR_TYPE_MSIX)
4201 		sc->timer_cpuid = 0; /* XXX fixed */
4202 	else
4203 		sc->timer_cpuid = rman_get_cpuid(sc->intr_data[0].intr_res);
4204 }
4205 
4206 static void
4207 ix_alloc_msix(struct ix_softc *sc)
4208 {
4209 	int msix_enable, msix_cnt, msix_cnt2, alloc_cnt;
4210 	struct ix_intr_data *intr;
4211 	int i, x, error;
4212 	int offset, offset_def, agg_rxtx, ring_max;
4213 	boolean_t aggregate, setup = FALSE;
4214 
4215 	msix_enable = ix_msix_enable;
4216 	/*
4217 	 * Don't enable MSI-X on 82598 by default, see:
4218 	 * 82598 specification update errata #38
4219 	 */
4220 	if (sc->hw.mac.type == ixgbe_mac_82598EB)
4221 		msix_enable = 0;
4222 	msix_enable = device_getenv_int(sc->dev, "msix.enable", msix_enable);
4223 	if (!msix_enable)
4224 		return;
4225 
4226 	msix_cnt = pci_msix_count(sc->dev);
4227 #ifdef IX_MSIX_DEBUG
4228 	msix_cnt = device_getenv_int(sc->dev, "msix.count", msix_cnt);
4229 #endif
4230 	if (msix_cnt <= 1) {
4231 		/* One MSI-X model does not make sense */
4232 		return;
4233 	}
4234 
4235 	i = 0;
4236 	while ((1 << (i + 1)) <= msix_cnt)
4237 		++i;
4238 	msix_cnt2 = 1 << i;
4239 
4240 	if (bootverbose) {
4241 		device_printf(sc->dev, "MSI-X count %d/%d\n",
4242 		    msix_cnt2, msix_cnt);
4243 	}
4244 
4245 	KKASSERT(msix_cnt >= msix_cnt2);
4246 	if (msix_cnt == msix_cnt2) {
4247 		/* We need at least one MSI-X for link status */
4248 		msix_cnt2 >>= 1;
4249 		if (msix_cnt2 <= 1) {
4250 			/* One MSI-X for RX/TX does not make sense */
4251 			device_printf(sc->dev, "not enough MSI-X for TX/RX, "
4252 			    "MSI-X count %d/%d\n", msix_cnt2, msix_cnt);
4253 			return;
4254 		}
4255 		KKASSERT(msix_cnt > msix_cnt2);
4256 
4257 		if (bootverbose) {
4258 			device_printf(sc->dev, "MSI-X count eq fixup %d/%d\n",
4259 			    msix_cnt2, msix_cnt);
4260 		}
4261 	}
4262 
4263 	/*
4264 	 * Make sure that we don't break interrupt related registers
4265 	 * (EIMS, etc) limitation.
4266 	 *
4267 	 * NOTE: msix_cnt > msix_cnt2, when we reach here
4268 	 */
4269 	if (sc->hw.mac.type == ixgbe_mac_82598EB) {
4270 		if (msix_cnt2 > IX_MAX_MSIX_82598)
4271 			msix_cnt2 = IX_MAX_MSIX_82598;
4272 	} else {
4273 		if (msix_cnt2 > IX_MAX_MSIX)
4274 			msix_cnt2 = IX_MAX_MSIX;
4275 	}
4276 	msix_cnt = msix_cnt2 + 1;	/* +1 for status */
4277 
4278 	if (bootverbose) {
4279 		device_printf(sc->dev, "MSI-X count max fixup %d/%d\n",
4280 		    msix_cnt2, msix_cnt);
4281 	}
4282 
4283 	sc->rx_ring_msix = sc->rx_ring_cnt;
4284 	if (sc->rx_ring_msix > msix_cnt2)
4285 		sc->rx_ring_msix = msix_cnt2;
4286 
4287 	sc->tx_ring_msix = sc->tx_ring_cnt;
4288 	if (sc->tx_ring_msix > msix_cnt2)
4289 		sc->tx_ring_msix = msix_cnt2;
4290 
4291 	ring_max = sc->rx_ring_msix;
4292 	if (ring_max < sc->tx_ring_msix)
4293 		ring_max = sc->tx_ring_msix;
4294 
4295 	/* Allow user to force independent RX/TX MSI-X handling */
4296 	agg_rxtx = device_getenv_int(sc->dev, "msix.agg_rxtx",
4297 	    ix_msix_agg_rxtx);
4298 
4299 	if (!agg_rxtx && msix_cnt >= sc->tx_ring_msix + sc->rx_ring_msix + 1) {
4300 		/*
4301 		 * Independent TX/RX MSI-X
4302 		 */
4303 		aggregate = FALSE;
4304 		if (bootverbose)
4305 			device_printf(sc->dev, "independent TX/RX MSI-X\n");
4306 		alloc_cnt = sc->tx_ring_msix + sc->rx_ring_msix;
4307 	} else {
4308 		/*
4309 		 * Aggregate TX/RX MSI-X
4310 		 */
4311 		aggregate = TRUE;
4312 		if (bootverbose)
4313 			device_printf(sc->dev, "aggregate TX/RX MSI-X\n");
4314 		alloc_cnt = msix_cnt2;
4315 		if (alloc_cnt > ring_max)
4316 			alloc_cnt = ring_max;
4317 		KKASSERT(alloc_cnt >= sc->rx_ring_msix &&
4318 		    alloc_cnt >= sc->tx_ring_msix);
4319 	}
4320 	++alloc_cnt;	/* For status */
4321 
4322 	if (bootverbose) {
4323 		device_printf(sc->dev, "MSI-X alloc %d, "
4324 		    "RX ring %d, TX ring %d\n", alloc_cnt,
4325 		    sc->rx_ring_msix, sc->tx_ring_msix);
4326 	}
4327 
4328 	sc->msix_mem_rid = PCIR_BAR(IX_MSIX_BAR_82598);
4329 	sc->msix_mem_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY,
4330 	    &sc->msix_mem_rid, RF_ACTIVE);
4331 	if (sc->msix_mem_res == NULL) {
4332 		sc->msix_mem_rid = PCIR_BAR(IX_MSIX_BAR_82599);
4333 		sc->msix_mem_res = bus_alloc_resource_any(sc->dev,
4334 		    SYS_RES_MEMORY, &sc->msix_mem_rid, RF_ACTIVE);
4335 		if (sc->msix_mem_res == NULL) {
4336 			device_printf(sc->dev, "Unable to map MSI-X table\n");
4337 			return;
4338 		}
4339 	}
4340 
4341 	sc->intr_cnt = alloc_cnt;
4342 	sc->intr_data = kmalloc(sizeof(struct ix_intr_data) * sc->intr_cnt,
4343 	    M_DEVBUF, M_WAITOK | M_ZERO);
4344 	for (x = 0; x < sc->intr_cnt; ++x) {
4345 		intr = &sc->intr_data[x];
4346 		intr->intr_rid = -1;
4347 		intr->intr_rate = IX_INTR_RATE;
4348 	}
4349 
4350 	x = 0;
4351 	if (!aggregate) {
4352 		/*
4353 		 * RX rings
4354 		 */
4355 		if (sc->rx_ring_msix == ncpus2) {
4356 			offset = 0;
4357 		} else {
4358 			offset_def = (sc->rx_ring_msix *
4359 			    device_get_unit(sc->dev)) % ncpus2;
4360 
4361 			offset = device_getenv_int(sc->dev,
4362 			    "msix.rxoff", offset_def);
4363 			if (offset >= ncpus2 ||
4364 			    offset % sc->rx_ring_msix != 0) {
4365 				device_printf(sc->dev,
4366 				    "invalid msix.rxoff %d, use %d\n",
4367 				    offset, offset_def);
4368 				offset = offset_def;
4369 			}
4370 		}
4371 		ix_conf_rx_msix(sc, 0, &x, offset);
4372 
4373 		/*
4374 		 * TX rings
4375 		 */
4376 		if (sc->tx_ring_msix == ncpus2) {
4377 			offset = 0;
4378 		} else {
4379 			offset_def = (sc->tx_ring_msix *
4380 			    device_get_unit(sc->dev)) % ncpus2;
4381 
4382 			offset = device_getenv_int(sc->dev,
4383 			    "msix.txoff", offset_def);
4384 			if (offset >= ncpus2 ||
4385 			    offset % sc->tx_ring_msix != 0) {
4386 				device_printf(sc->dev,
4387 				    "invalid msix.txoff %d, use %d\n",
4388 				    offset, offset_def);
4389 				offset = offset_def;
4390 			}
4391 		}
4392 		ix_conf_tx_msix(sc, 0, &x, offset);
4393 	} else {
4394 		int ring_agg;
4395 
4396 		ring_agg = sc->rx_ring_msix;
4397 		if (ring_agg > sc->tx_ring_msix)
4398 			ring_agg = sc->tx_ring_msix;
4399 
4400 		if (ring_max == ncpus2) {
4401 			offset = 0;
4402 		} else {
4403 			offset_def = (ring_max * device_get_unit(sc->dev)) %
4404 			    ncpus2;
4405 
4406 			offset = device_getenv_int(sc->dev, "msix.off",
4407 			    offset_def);
4408 			if (offset >= ncpus2 || offset % ring_max != 0) {
4409 				device_printf(sc->dev,
4410 				    "invalid msix.off %d, use %d\n",
4411 				    offset, offset_def);
4412 				offset = offset_def;
4413 			}
4414 		}
4415 
4416 		for (i = 0; i < ring_agg; ++i) {
4417 			struct ix_tx_ring *txr = &sc->tx_rings[i];
4418 			struct ix_rx_ring *rxr = &sc->rx_rings[i];
4419 
4420 			KKASSERT(x < sc->intr_cnt);
4421 			rxr->rx_intr_vec = x;
4422 			ix_setup_msix_eims(sc, x,
4423 			    &rxr->rx_eims, &rxr->rx_eims_val);
4424 			rxr->rx_txr = txr;
4425 			/* NOTE: Leave TX ring's intr_vec negative */
4426 
4427 			intr = &sc->intr_data[x++];
4428 
4429 			intr->intr_serialize = &rxr->rx_serialize;
4430 			intr->intr_func = ix_msix_rxtx;
4431 			intr->intr_funcarg = rxr;
4432 			intr->intr_use = IX_INTR_USE_RXTX;
4433 
4434 			intr->intr_cpuid = i + offset;
4435 			KKASSERT(intr->intr_cpuid < ncpus2);
4436 			txr->tx_intr_cpuid = intr->intr_cpuid;
4437 
4438 			ksnprintf(intr->intr_desc0, sizeof(intr->intr_desc0),
4439 			    "%s rxtx%d", device_get_nameunit(sc->dev), i);
4440 			intr->intr_desc = intr->intr_desc0;
4441 		}
4442 
4443 		if (ring_agg != ring_max) {
4444 			if (ring_max == sc->tx_ring_msix)
4445 				ix_conf_tx_msix(sc, i, &x, offset);
4446 			else
4447 				ix_conf_rx_msix(sc, i, &x, offset);
4448 		}
4449 	}
4450 
4451 	/*
4452 	 * Status MSI-X
4453 	 */
4454 	KKASSERT(x < sc->intr_cnt);
4455 	sc->sts_msix_vec = x;
4456 
4457 	intr = &sc->intr_data[x++];
4458 
4459 	intr->intr_serialize = &sc->main_serialize;
4460 	intr->intr_func = ix_msix_status;
4461 	intr->intr_funcarg = sc;
4462 	intr->intr_cpuid = 0;
4463 	intr->intr_use = IX_INTR_USE_STATUS;
4464 
4465 	ksnprintf(intr->intr_desc0, sizeof(intr->intr_desc0), "%s sts",
4466 	    device_get_nameunit(sc->dev));
4467 	intr->intr_desc = intr->intr_desc0;
4468 
4469 	KKASSERT(x == sc->intr_cnt);
4470 
4471 	error = pci_setup_msix(sc->dev);
4472 	if (error) {
4473 		device_printf(sc->dev, "Setup MSI-X failed\n");
4474 		goto back;
4475 	}
4476 	setup = TRUE;
4477 
4478 	for (i = 0; i < sc->intr_cnt; ++i) {
4479 		intr = &sc->intr_data[i];
4480 
4481 		error = pci_alloc_msix_vector(sc->dev, i, &intr->intr_rid,
4482 		    intr->intr_cpuid);
4483 		if (error) {
4484 			device_printf(sc->dev,
4485 			    "Unable to allocate MSI-X %d on cpu%d\n", i,
4486 			    intr->intr_cpuid);
4487 			goto back;
4488 		}
4489 
4490 		intr->intr_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ,
4491 		    &intr->intr_rid, RF_ACTIVE);
4492 		if (intr->intr_res == NULL) {
4493 			device_printf(sc->dev,
4494 			    "Unable to allocate MSI-X %d resource\n", i);
4495 			error = ENOMEM;
4496 			goto back;
4497 		}
4498 	}
4499 
4500 	pci_enable_msix(sc->dev);
4501 	sc->intr_type = PCI_INTR_TYPE_MSIX;
4502 back:
4503 	if (error)
4504 		ix_free_msix(sc, setup);
4505 }
4506 
4507 static void
4508 ix_free_msix(struct ix_softc *sc, boolean_t setup)
4509 {
4510 	int i;
4511 
4512 	KKASSERT(sc->intr_cnt > 1);
4513 
4514 	for (i = 0; i < sc->intr_cnt; ++i) {
4515 		struct ix_intr_data *intr = &sc->intr_data[i];
4516 
4517 		if (intr->intr_res != NULL) {
4518 			bus_release_resource(sc->dev, SYS_RES_IRQ,
4519 			    intr->intr_rid, intr->intr_res);
4520 		}
4521 		if (intr->intr_rid >= 0)
4522 			pci_release_msix_vector(sc->dev, intr->intr_rid);
4523 	}
4524 	if (setup)
4525 		pci_teardown_msix(sc->dev);
4526 
4527 	sc->intr_cnt = 0;
4528 	kfree(sc->intr_data, M_DEVBUF);
4529 	sc->intr_data = NULL;
4530 }
4531 
4532 static void
4533 ix_conf_rx_msix(struct ix_softc *sc, int i, int *x0, int offset)
4534 {
4535 	int x = *x0;
4536 
4537 	for (; i < sc->rx_ring_msix; ++i) {
4538 		struct ix_rx_ring *rxr = &sc->rx_rings[i];
4539 		struct ix_intr_data *intr;
4540 
4541 		KKASSERT(x < sc->intr_cnt);
4542 		rxr->rx_intr_vec = x;
4543 		ix_setup_msix_eims(sc, x, &rxr->rx_eims, &rxr->rx_eims_val);
4544 
4545 		intr = &sc->intr_data[x++];
4546 
4547 		intr->intr_serialize = &rxr->rx_serialize;
4548 		intr->intr_func = ix_msix_rx;
4549 		intr->intr_funcarg = rxr;
4550 		intr->intr_rate = IX_MSIX_RX_RATE;
4551 		intr->intr_use = IX_INTR_USE_RX;
4552 
4553 		intr->intr_cpuid = i + offset;
4554 		KKASSERT(intr->intr_cpuid < ncpus2);
4555 
4556 		ksnprintf(intr->intr_desc0, sizeof(intr->intr_desc0), "%s rx%d",
4557 		    device_get_nameunit(sc->dev), i);
4558 		intr->intr_desc = intr->intr_desc0;
4559 	}
4560 	*x0 = x;
4561 }
4562 
4563 static void
4564 ix_conf_tx_msix(struct ix_softc *sc, int i, int *x0, int offset)
4565 {
4566 	int x = *x0;
4567 
4568 	for (; i < sc->tx_ring_msix; ++i) {
4569 		struct ix_tx_ring *txr = &sc->tx_rings[i];
4570 		struct ix_intr_data *intr;
4571 
4572 		KKASSERT(x < sc->intr_cnt);
4573 		txr->tx_intr_vec = x;
4574 		ix_setup_msix_eims(sc, x, &txr->tx_eims, &txr->tx_eims_val);
4575 
4576 		intr = &sc->intr_data[x++];
4577 
4578 		intr->intr_serialize = &txr->tx_serialize;
4579 		intr->intr_func = ix_msix_tx;
4580 		intr->intr_funcarg = txr;
4581 		intr->intr_rate = IX_MSIX_TX_RATE;
4582 		intr->intr_use = IX_INTR_USE_TX;
4583 
4584 		intr->intr_cpuid = i + offset;
4585 		KKASSERT(intr->intr_cpuid < ncpus2);
4586 		txr->tx_intr_cpuid = intr->intr_cpuid;
4587 
4588 		ksnprintf(intr->intr_desc0, sizeof(intr->intr_desc0), "%s tx%d",
4589 		    device_get_nameunit(sc->dev), i);
4590 		intr->intr_desc = intr->intr_desc0;
4591 	}
4592 	*x0 = x;
4593 }
4594 
4595 static void
4596 ix_msix_rx(void *xrxr)
4597 {
4598 	struct ix_rx_ring *rxr = xrxr;
4599 
4600 	ASSERT_SERIALIZED(&rxr->rx_serialize);
4601 
4602 	ix_rxeof(rxr);
4603 	IXGBE_WRITE_REG(&rxr->rx_sc->hw, rxr->rx_eims, rxr->rx_eims_val);
4604 }
4605 
4606 static void
4607 ix_msix_tx(void *xtxr)
4608 {
4609 	struct ix_tx_ring *txr = xtxr;
4610 
4611 	ASSERT_SERIALIZED(&txr->tx_serialize);
4612 
4613 	ix_txeof(txr, *(txr->tx_hdr));
4614 	if (!ifsq_is_empty(txr->tx_ifsq))
4615 		ifsq_devstart(txr->tx_ifsq);
4616 	IXGBE_WRITE_REG(&txr->tx_sc->hw, txr->tx_eims, txr->tx_eims_val);
4617 }
4618 
4619 static void
4620 ix_msix_rxtx(void *xrxr)
4621 {
4622 	struct ix_rx_ring *rxr = xrxr;
4623 	struct ix_tx_ring *txr;
4624 	int hdr;
4625 
4626 	ASSERT_SERIALIZED(&rxr->rx_serialize);
4627 
4628 	ix_rxeof(rxr);
4629 
4630 	/*
4631 	 * NOTE:
4632 	 * Since tx_next_clean is only changed by ix_txeof(),
4633 	 * which is called only in interrupt handler, the
4634 	 * check w/o holding tx serializer is MPSAFE.
4635 	 */
4636 	txr = rxr->rx_txr;
4637 	hdr = *(txr->tx_hdr);
4638 	if (hdr != txr->tx_next_clean) {
4639 		lwkt_serialize_enter(&txr->tx_serialize);
4640 		ix_txeof(txr, hdr);
4641 		if (!ifsq_is_empty(txr->tx_ifsq))
4642 			ifsq_devstart(txr->tx_ifsq);
4643 		lwkt_serialize_exit(&txr->tx_serialize);
4644 	}
4645 
4646 	IXGBE_WRITE_REG(&rxr->rx_sc->hw, rxr->rx_eims, rxr->rx_eims_val);
4647 }
4648 
4649 static void
4650 ix_intr_status(struct ix_softc *sc, uint32_t eicr)
4651 {
4652 	struct ixgbe_hw *hw = &sc->hw;
4653 
4654 	/* Link status change */
4655 	if (eicr & IXGBE_EICR_LSC)
4656 		ix_handle_link(sc);
4657 
4658 	if (hw->mac.type != ixgbe_mac_82598EB) {
4659 		if (eicr & IXGBE_EICR_ECC)
4660 			if_printf(&sc->arpcom.ac_if, "ECC ERROR!!  Reboot!!\n");
4661 		else if (eicr & IXGBE_EICR_GPI_SDP1)
4662 			ix_handle_msf(sc);
4663 		else if (eicr & IXGBE_EICR_GPI_SDP2)
4664 			ix_handle_mod(sc);
4665 	}
4666 
4667 	/* Check for fan failure */
4668 	if (hw->device_id == IXGBE_DEV_ID_82598AT &&
4669 	    (eicr & IXGBE_EICR_GPI_SDP1))
4670 		if_printf(&sc->arpcom.ac_if, "FAN FAILURE!!  Replace!!\n");
4671 
4672 	/* Check for over temp condition */
4673 	if (hw->mac.type == ixgbe_mac_X540 && (eicr & IXGBE_EICR_TS)) {
4674 		if_printf(&sc->arpcom.ac_if, "OVER TEMP!!  "
4675 		    "PHY IS SHUT DOWN!!  Reboot\n");
4676 	}
4677 }
4678 
4679 static void
4680 ix_msix_status(void *xsc)
4681 {
4682 	struct ix_softc *sc = xsc;
4683 	uint32_t eicr;
4684 
4685 	ASSERT_SERIALIZED(&sc->main_serialize);
4686 
4687 	eicr = IXGBE_READ_REG(&sc->hw, IXGBE_EICR);
4688 	ix_intr_status(sc, eicr);
4689 
4690 	IXGBE_WRITE_REG(&sc->hw, IXGBE_EIMS, sc->intr_mask);
4691 }
4692 
4693 static void
4694 ix_setup_msix_eims(const struct ix_softc *sc, int x,
4695     uint32_t *eims, uint32_t *eims_val)
4696 {
4697 	if (x < 32) {
4698 		if (sc->hw.mac.type == ixgbe_mac_82598EB) {
4699 			KASSERT(x < IX_MAX_MSIX_82598,
4700 			    ("%s: invalid vector %d for 82598",
4701 			     device_get_nameunit(sc->dev), x));
4702 			*eims = IXGBE_EIMS;
4703 		} else {
4704 			*eims = IXGBE_EIMS_EX(0);
4705 		}
4706 		*eims_val = 1 << x;
4707 	} else {
4708 		KASSERT(x < IX_MAX_MSIX, ("%s: invalid vector %d",
4709 		    device_get_nameunit(sc->dev), x));
4710 		KASSERT(sc->hw.mac.type != ixgbe_mac_82598EB,
4711 		    ("%s: invalid vector %d for 82598",
4712 		     device_get_nameunit(sc->dev), x));
4713 		*eims = IXGBE_EIMS_EX(1);
4714 		*eims_val = 1 << (x - 32);
4715 	}
4716 }
4717