xref: /dflybsd-src/sys/dev/netif/ix/if_ix.c (revision 0cf7fc2c82ff74133aba14fda9c476d564ce3506)
1 /*
2  * Copyright (c) 2001-2013, Intel Corporation
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  *
8  *  1. Redistributions of source code must retain the above copyright notice,
9  *     this list of conditions and the following disclaimer.
10  *
11  *  2. Redistributions in binary form must reproduce the above copyright
12  *     notice, this list of conditions and the following disclaimer in the
13  *     documentation and/or other materials provided with the distribution.
14  *
15  *  3. Neither the name of the Intel Corporation nor the names of its
16  *     contributors may be used to endorse or promote products derived from
17  *     this software without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
23  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 #include "opt_ifpoll.h"
33 #include "opt_ix.h"
34 
35 #include <sys/param.h>
36 #include <sys/bus.h>
37 #include <sys/endian.h>
38 #include <sys/interrupt.h>
39 #include <sys/kernel.h>
40 #include <sys/malloc.h>
41 #include <sys/mbuf.h>
42 #include <sys/proc.h>
43 #include <sys/rman.h>
44 #include <sys/serialize.h>
45 #include <sys/serialize2.h>
46 #include <sys/socket.h>
47 #include <sys/sockio.h>
48 #include <sys/sysctl.h>
49 #include <sys/systm.h>
50 
51 #include <net/bpf.h>
52 #include <net/ethernet.h>
53 #include <net/if.h>
54 #include <net/if_arp.h>
55 #include <net/if_dl.h>
56 #include <net/if_media.h>
57 #include <net/ifq_var.h>
58 #include <net/toeplitz.h>
59 #include <net/toeplitz2.h>
60 #include <net/vlan/if_vlan_var.h>
61 #include <net/vlan/if_vlan_ether.h>
62 #include <net/if_poll.h>
63 
64 #include <netinet/in_systm.h>
65 #include <netinet/in.h>
66 #include <netinet/ip.h>
67 
68 #include <bus/pci/pcivar.h>
69 #include <bus/pci/pcireg.h>
70 
71 #include <dev/netif/ix/ixgbe_api.h>
72 #include <dev/netif/ix/if_ix.h>
73 
74 #ifdef IX_RSS_DEBUG
75 #define IX_RSS_DPRINTF(sc, lvl, fmt, ...) \
76 do { \
77 	if (sc->rss_debug >= lvl) \
78 		if_printf(&sc->arpcom.ac_if, fmt, __VA_ARGS__); \
79 } while (0)
80 #else	/* !IX_RSS_DEBUG */
81 #define IX_RSS_DPRINTF(sc, lvl, fmt, ...)	((void)0)
82 #endif	/* IX_RSS_DEBUG */
83 
84 #define IX_NAME			"Intel(R) PRO/10GbE "
85 #define IX_DEVICE(id) \
86 	{ IXGBE_VENDOR_ID, IXGBE_DEV_ID_##id, IX_NAME #id }
87 #define IX_DEVICE_NULL		{ 0, 0, NULL }
88 
89 static struct ix_device {
90 	uint16_t	vid;
91 	uint16_t	did;
92 	const char	*desc;
93 } ix_devices[] = {
94 	IX_DEVICE(82598AF_DUAL_PORT),
95 	IX_DEVICE(82598AF_SINGLE_PORT),
96 	IX_DEVICE(82598EB_CX4),
97 	IX_DEVICE(82598AT),
98 	IX_DEVICE(82598AT2),
99 	IX_DEVICE(82598),
100 	IX_DEVICE(82598_DA_DUAL_PORT),
101 	IX_DEVICE(82598_CX4_DUAL_PORT),
102 	IX_DEVICE(82598EB_XF_LR),
103 	IX_DEVICE(82598_SR_DUAL_PORT_EM),
104 	IX_DEVICE(82598EB_SFP_LOM),
105 	IX_DEVICE(82599_KX4),
106 	IX_DEVICE(82599_KX4_MEZZ),
107 	IX_DEVICE(82599_SFP),
108 	IX_DEVICE(82599_XAUI_LOM),
109 	IX_DEVICE(82599_CX4),
110 	IX_DEVICE(82599_T3_LOM),
111 	IX_DEVICE(82599_COMBO_BACKPLANE),
112 	IX_DEVICE(82599_BACKPLANE_FCOE),
113 	IX_DEVICE(82599_SFP_SF2),
114 	IX_DEVICE(82599_SFP_FCOE),
115 	IX_DEVICE(82599EN_SFP),
116 	IX_DEVICE(82599_SFP_SF_QP),
117 	IX_DEVICE(X540T),
118 
119 	/* required last entry */
120 	IX_DEVICE_NULL
121 };
122 
123 static int	ix_probe(device_t);
124 static int	ix_attach(device_t);
125 static int	ix_detach(device_t);
126 static int	ix_shutdown(device_t);
127 
128 static void	ix_serialize(struct ifnet *, enum ifnet_serialize);
129 static void	ix_deserialize(struct ifnet *, enum ifnet_serialize);
130 static int	ix_tryserialize(struct ifnet *, enum ifnet_serialize);
131 #ifdef INVARIANTS
132 static void	ix_serialize_assert(struct ifnet *, enum ifnet_serialize,
133 		    boolean_t);
134 #endif
135 static void	ix_start(struct ifnet *, struct ifaltq_subque *);
136 static void	ix_watchdog(struct ifaltq_subque *);
137 static int	ix_ioctl(struct ifnet *, u_long, caddr_t, struct ucred *);
138 static void	ix_init(void *);
139 static void	ix_stop(struct ix_softc *);
140 static void	ix_media_status(struct ifnet *, struct ifmediareq *);
141 static int	ix_media_change(struct ifnet *);
142 static void	ix_timer(void *);
143 #ifdef IFPOLL_ENABLE
144 static void	ix_npoll(struct ifnet *, struct ifpoll_info *);
145 static void	ix_npoll_rx(struct ifnet *, void *, int);
146 static void	ix_npoll_tx(struct ifnet *, void *, int);
147 static void	ix_npoll_status(struct ifnet *);
148 #endif
149 
150 static void	ix_add_sysctl(struct ix_softc *);
151 static void	ix_add_intr_rate_sysctl(struct ix_softc *, int,
152 		    const char *, int (*)(SYSCTL_HANDLER_ARGS), const char *);
153 static int	ix_sysctl_tx_wreg_nsegs(SYSCTL_HANDLER_ARGS);
154 static int	ix_sysctl_rx_wreg_nsegs(SYSCTL_HANDLER_ARGS);
155 static int	ix_sysctl_txd(SYSCTL_HANDLER_ARGS);
156 static int	ix_sysctl_rxd(SYSCTL_HANDLER_ARGS);
157 static int	ix_sysctl_tx_intr_nsegs(SYSCTL_HANDLER_ARGS);
158 static int	ix_sysctl_intr_rate(SYSCTL_HANDLER_ARGS, int);
159 static int	ix_sysctl_rxtx_intr_rate(SYSCTL_HANDLER_ARGS);
160 static int	ix_sysctl_rx_intr_rate(SYSCTL_HANDLER_ARGS);
161 static int	ix_sysctl_tx_intr_rate(SYSCTL_HANDLER_ARGS);
162 static int	ix_sysctl_sts_intr_rate(SYSCTL_HANDLER_ARGS);
163 #ifdef foo
164 static int	ix_sysctl_advspeed(SYSCTL_HANDLER_ARGS);
165 #endif
166 #if 0
167 static void     ix_add_hw_stats(struct ix_softc *);
168 #endif
169 #ifdef IFPOLL_ENABLE
170 static int	ix_sysctl_npoll_rxoff(SYSCTL_HANDLER_ARGS);
171 static int	ix_sysctl_npoll_txoff(SYSCTL_HANDLER_ARGS);
172 #endif
173 
174 static void	ix_slot_info(struct ix_softc *);
175 static int	ix_alloc_rings(struct ix_softc *);
176 static void	ix_free_rings(struct ix_softc *);
177 static void	ix_setup_ifp(struct ix_softc *);
178 static void	ix_setup_serialize(struct ix_softc *);
179 static void	ix_set_ring_inuse(struct ix_softc *, boolean_t);
180 static void	ix_set_timer_cpuid(struct ix_softc *, boolean_t);
181 static void	ix_update_stats(struct ix_softc *);
182 
183 static void	ix_set_promisc(struct ix_softc *);
184 static void	ix_set_multi(struct ix_softc *);
185 static void	ix_set_vlan(struct ix_softc *);
186 static uint8_t	*ix_mc_array_itr(struct ixgbe_hw *, uint8_t **, uint32_t *);
187 static enum ixgbe_fc_mode ix_ifmedia2fc(int);
188 static const char *ix_ifmedia2str(int);
189 static const char *ix_fc2str(enum ixgbe_fc_mode);
190 
191 static int	ix_get_txring_inuse(const struct ix_softc *, boolean_t);
192 static void	ix_init_tx_ring(struct ix_tx_ring *);
193 static void	ix_free_tx_ring(struct ix_tx_ring *);
194 static int	ix_create_tx_ring(struct ix_tx_ring *);
195 static void	ix_destroy_tx_ring(struct ix_tx_ring *, int);
196 static void	ix_init_tx_unit(struct ix_softc *);
197 static int	ix_encap(struct ix_tx_ring *, struct mbuf **,
198 		    uint16_t *, int *);
199 static int	ix_tx_ctx_setup(struct ix_tx_ring *,
200 		    const struct mbuf *, uint32_t *, uint32_t *);
201 static int	ix_tso_ctx_setup(struct ix_tx_ring *,
202 		    const struct mbuf *, uint32_t *, uint32_t *);
203 static void	ix_txeof(struct ix_tx_ring *, int);
204 
205 static int	ix_get_rxring_inuse(const struct ix_softc *, boolean_t);
206 static int	ix_init_rx_ring(struct ix_rx_ring *);
207 static void	ix_free_rx_ring(struct ix_rx_ring *);
208 static int	ix_create_rx_ring(struct ix_rx_ring *);
209 static void	ix_destroy_rx_ring(struct ix_rx_ring *, int);
210 static void	ix_init_rx_unit(struct ix_softc *);
211 #if 0
212 static void	ix_setup_hw_rsc(struct ix_rx_ring *);
213 #endif
214 static int	ix_newbuf(struct ix_rx_ring *, int, boolean_t);
215 static void	ix_rxeof(struct ix_rx_ring *, int);
216 static void	ix_rx_discard(struct ix_rx_ring *, int, boolean_t);
217 static void	ix_enable_rx_drop(struct ix_softc *);
218 static void	ix_disable_rx_drop(struct ix_softc *);
219 
220 static void	ix_alloc_msix(struct ix_softc *);
221 static void	ix_free_msix(struct ix_softc *, boolean_t);
222 static void	ix_conf_rx_msix(struct ix_softc *, int, int *, int);
223 static void	ix_conf_tx_msix(struct ix_softc *, int, int *, int);
224 static void	ix_setup_msix_eims(const struct ix_softc *, int,
225 		    uint32_t *, uint32_t *);
226 static int	ix_alloc_intr(struct ix_softc *);
227 static void	ix_free_intr(struct ix_softc *);
228 static int	ix_setup_intr(struct ix_softc *);
229 static void	ix_teardown_intr(struct ix_softc *, int);
230 static void	ix_enable_intr(struct ix_softc *);
231 static void	ix_disable_intr(struct ix_softc *);
232 static void	ix_set_ivar(struct ix_softc *, uint8_t, uint8_t, int8_t);
233 static void	ix_set_eitr(struct ix_softc *, int, int);
234 static void	ix_intr_status(struct ix_softc *, uint32_t);
235 static void	ix_intr(void *);
236 static void	ix_msix_rxtx(void *);
237 static void	ix_msix_rx(void *);
238 static void	ix_msix_tx(void *);
239 static void	ix_msix_status(void *);
240 
241 static void	ix_config_link(struct ix_softc *);
242 static boolean_t ix_sfp_probe(struct ix_softc *);
243 static boolean_t ix_is_sfp(const struct ixgbe_hw *);
244 static void	ix_setup_optics(struct ix_softc *);
245 static void	ix_update_link_status(struct ix_softc *);
246 static void	ix_handle_link(struct ix_softc *);
247 static void	ix_handle_mod(struct ix_softc *);
248 static void	ix_handle_msf(struct ix_softc *);
249 
250 /* XXX Shared code structure requires this for the moment */
251 extern void ixgbe_stop_mac_link_on_d3_82599(struct ixgbe_hw *);
252 
253 static device_method_t ix_methods[] = {
254 	/* Device interface */
255 	DEVMETHOD(device_probe,		ix_probe),
256 	DEVMETHOD(device_attach,	ix_attach),
257 	DEVMETHOD(device_detach,	ix_detach),
258 	DEVMETHOD(device_shutdown,	ix_shutdown),
259 	DEVMETHOD_END
260 };
261 
262 static driver_t ix_driver = {
263 	"ix",
264 	ix_methods,
265 	sizeof(struct ix_softc)
266 };
267 
268 static devclass_t ix_devclass;
269 
270 DECLARE_DUMMY_MODULE(if_ix);
271 DRIVER_MODULE(if_ix, pci, ix_driver, ix_devclass, NULL, NULL);
272 
273 static int	ix_msi_enable = 1;
274 static int	ix_msix_enable = 1;
275 static int	ix_msix_agg_rxtx = 1;
276 static int	ix_rxr = 0;
277 static int	ix_txr = 0;
278 static int	ix_txd = IX_PERF_TXD;
279 static int	ix_rxd = IX_PERF_RXD;
280 static int	ix_unsupported_sfp = 0;
281 
282 static char	ix_flowctrl[IFM_ETH_FC_STRLEN] = IFM_ETH_FC_FULL;
283 
284 TUNABLE_INT("hw.ix.msi.enable", &ix_msi_enable);
285 TUNABLE_INT("hw.ix.msix.enable", &ix_msix_enable);
286 TUNABLE_INT("hw.ix.msix.agg_rxtx", &ix_msix_agg_rxtx);
287 TUNABLE_INT("hw.ix.rxr", &ix_rxr);
288 TUNABLE_INT("hw.ix.txr", &ix_txr);
289 TUNABLE_INT("hw.ix.txd", &ix_txd);
290 TUNABLE_INT("hw.ix.rxd", &ix_rxd);
291 TUNABLE_INT("hw.ix.unsupported_sfp", &ix_unsupported_sfp);
292 TUNABLE_STR("hw.ix.flow_ctrl", ix_flowctrl, sizeof(ix_flowctrl));
293 
294 /*
295  * Smart speed setting, default to on.  This only works
296  * as a compile option right now as its during attach,
297  * set this to 'ixgbe_smart_speed_off' to disable.
298  */
299 static const enum ixgbe_smart_speed ix_smart_speed =
300     ixgbe_smart_speed_on;
301 
302 static int
303 ix_probe(device_t dev)
304 {
305 	const struct ix_device *d;
306 	uint16_t vid, did;
307 
308 	vid = pci_get_vendor(dev);
309 	did = pci_get_device(dev);
310 
311 	for (d = ix_devices; d->desc != NULL; ++d) {
312 		if (vid == d->vid && did == d->did) {
313 			device_set_desc(dev, d->desc);
314 			return 0;
315 		}
316 	}
317 	return ENXIO;
318 }
319 
320 static int
321 ix_attach(device_t dev)
322 {
323 	struct ix_softc *sc = device_get_softc(dev);
324 	struct ixgbe_hw *hw;
325 	int error, ring_cnt_max;
326 	uint16_t csum;
327 	uint32_t ctrl_ext;
328 #ifdef IFPOLL_ENABLE
329 	int offset, offset_def;
330 #endif
331 	char flowctrl[IFM_ETH_FC_STRLEN];
332 
333 	sc->dev = sc->osdep.dev = dev;
334 	hw = &sc->hw;
335 
336 	if_initname(&sc->arpcom.ac_if, device_get_name(dev),
337 	    device_get_unit(dev));
338 	ifmedia_init(&sc->media, IFM_IMASK | IFM_ETH_FCMASK,
339 	    ix_media_change, ix_media_status);
340 
341 	/* Save frame size */
342 	sc->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHER_CRC_LEN;
343 
344 	callout_init_mp(&sc->timer);
345 	lwkt_serialize_init(&sc->main_serialize);
346 
347 	/*
348 	 * Save off the information about this board
349 	 */
350 	hw->vendor_id = pci_get_vendor(dev);
351 	hw->device_id = pci_get_device(dev);
352 	hw->revision_id = pci_read_config(dev, PCIR_REVID, 1);
353 	hw->subsystem_vendor_id = pci_read_config(dev, PCIR_SUBVEND_0, 2);
354 	hw->subsystem_device_id = pci_read_config(dev, PCIR_SUBDEV_0, 2);
355 
356 	ixgbe_set_mac_type(hw);
357 
358 	/* Pick up the 82599 and VF settings */
359 	if (hw->mac.type != ixgbe_mac_82598EB)
360 		hw->phy.smart_speed = ix_smart_speed;
361 
362 	/* Enable bus mastering */
363 	pci_enable_busmaster(dev);
364 
365 	/*
366 	 * Allocate IO memory
367 	 */
368 	sc->mem_rid = PCIR_BAR(0);
369 	sc->mem_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
370 	    &sc->mem_rid, RF_ACTIVE);
371 	if (sc->mem_res == NULL) {
372 		device_printf(dev, "Unable to allocate bus resource: memory\n");
373 		error = ENXIO;
374 		goto failed;
375 	}
376 
377 	sc->osdep.mem_bus_space_tag = rman_get_bustag(sc->mem_res);
378 	sc->osdep.mem_bus_space_handle = rman_get_bushandle(sc->mem_res);
379 
380 	sc->hw.hw_addr = (uint8_t *)&sc->osdep.mem_bus_space_handle;
381 	sc->hw.back = &sc->osdep;
382 
383 	/*
384 	 * Configure total supported RX/TX ring count
385 	 */
386 	sc->rx_ring_cnt = device_getenv_int(dev, "rxr", ix_rxr);
387 	sc->rx_ring_cnt = if_ring_count2(sc->rx_ring_cnt, IX_MAX_RXRING);
388 	sc->rx_ring_inuse = sc->rx_ring_cnt;
389 
390 	switch (hw->mac.type) {
391 	case ixgbe_mac_82598EB:
392 		ring_cnt_max = IX_MAX_TXRING_82598;
393 		break;
394 
395 	case ixgbe_mac_82599EB:
396 		ring_cnt_max = IX_MAX_TXRING_82599;
397 		break;
398 
399 	case ixgbe_mac_X540:
400 		ring_cnt_max = IX_MAX_TXRING_X540;
401 		break;
402 
403 	default:
404 		ring_cnt_max = 1;
405 		break;
406 	}
407 	sc->tx_ring_cnt = device_getenv_int(dev, "txr", ix_txr);
408 	sc->tx_ring_cnt = if_ring_count2(sc->tx_ring_cnt, ring_cnt_max);
409 	sc->tx_ring_inuse = sc->tx_ring_cnt;
410 
411 	/* Allocate TX/RX rings */
412 	error = ix_alloc_rings(sc);
413 	if (error)
414 		goto failed;
415 
416 #ifdef IFPOLL_ENABLE
417 	/*
418 	 * NPOLLING RX CPU offset
419 	 */
420 	if (sc->rx_ring_cnt == ncpus2) {
421 		offset = 0;
422 	} else {
423 		offset_def = (sc->rx_ring_cnt * device_get_unit(dev)) % ncpus2;
424 		offset = device_getenv_int(dev, "npoll.rxoff", offset_def);
425 		if (offset >= ncpus2 ||
426 		    offset % sc->rx_ring_cnt != 0) {
427 			device_printf(dev, "invalid npoll.rxoff %d, use %d\n",
428 			    offset, offset_def);
429 			offset = offset_def;
430 		}
431 	}
432 	sc->rx_npoll_off = offset;
433 
434 	/*
435 	 * NPOLLING TX CPU offset
436 	 */
437 	if (sc->tx_ring_cnt == ncpus2) {
438 		offset = 0;
439 	} else {
440 		offset_def = (sc->tx_ring_cnt * device_get_unit(dev)) % ncpus2;
441 		offset = device_getenv_int(dev, "npoll.txoff", offset_def);
442 		if (offset >= ncpus2 ||
443 		    offset % sc->tx_ring_cnt != 0) {
444 			device_printf(dev, "invalid npoll.txoff %d, use %d\n",
445 			    offset, offset_def);
446 			offset = offset_def;
447 		}
448 	}
449 	sc->tx_npoll_off = offset;
450 #endif
451 
452 	/* Allocate interrupt */
453 	error = ix_alloc_intr(sc);
454 	if (error)
455 		goto failed;
456 
457 	/* Setup serializes */
458 	ix_setup_serialize(sc);
459 
460 	/* Allocate multicast array memory. */
461 	sc->mta = kmalloc(IXGBE_ETH_LENGTH_OF_ADDRESS * IX_MAX_MCASTADDR,
462 	    M_DEVBUF, M_WAITOK);
463 
464 	/* Initialize the shared code */
465 	hw->allow_unsupported_sfp = ix_unsupported_sfp;
466 	error = ixgbe_init_shared_code(hw);
467 	if (error == IXGBE_ERR_SFP_NOT_PRESENT) {
468 		/*
469 		 * No optics in this port; ask timer routine
470 		 * to probe for later insertion.
471 		 */
472 		sc->sfp_probe = TRUE;
473 		error = 0;
474 	} else if (error == IXGBE_ERR_SFP_NOT_SUPPORTED) {
475 		device_printf(dev, "Unsupported SFP+ module detected!\n");
476 		error = EIO;
477 		goto failed;
478 	} else if (error) {
479 		device_printf(dev, "Unable to initialize the shared code\n");
480 		error = EIO;
481 		goto failed;
482 	}
483 
484 	/* Make sure we have a good EEPROM before we read from it */
485 	if (ixgbe_validate_eeprom_checksum(&sc->hw, &csum) < 0) {
486 		device_printf(dev, "The EEPROM Checksum Is Not Valid\n");
487 		error = EIO;
488 		goto failed;
489 	}
490 
491 	error = ixgbe_init_hw(hw);
492 	if (error == IXGBE_ERR_EEPROM_VERSION) {
493 		device_printf(dev, "Pre-production device detected\n");
494 	} else if (error == IXGBE_ERR_SFP_NOT_SUPPORTED) {
495 		device_printf(dev, "Unsupported SFP+ Module\n");
496 		error = EIO;
497 		goto failed;
498 	} else if (error == IXGBE_ERR_SFP_NOT_PRESENT) {
499 		device_printf(dev, "No SFP+ Module found\n");
500 	}
501 
502 	/* Detect and set physical type */
503 	ix_setup_optics(sc);
504 
505 	/* Get default flow control settings */
506 	device_getenv_string(dev, "flow_ctrl", flowctrl, sizeof(flowctrl),
507 	    ix_flowctrl);
508 	sc->ifm_flowctrl = ifmedia_str2ethfc(flowctrl);
509 
510 	/* Setup OS specific network interface */
511 	ix_setup_ifp(sc);
512 
513 	/* Add sysctl tree */
514 	ix_add_sysctl(sc);
515 
516 	error = ix_setup_intr(sc);
517 	if (error) {
518 		ether_ifdetach(&sc->arpcom.ac_if);
519 		goto failed;
520 	}
521 
522 	/* Initialize statistics */
523 	ix_update_stats(sc);
524 
525 	/*
526 	 * Check PCIE slot type/speed/width
527 	 */
528 	ix_slot_info(sc);
529 
530 	/* Let hardware know driver is loaded */
531 	ctrl_ext = IXGBE_READ_REG(hw, IXGBE_CTRL_EXT);
532 	ctrl_ext |= IXGBE_CTRL_EXT_DRV_LOAD;
533 	IXGBE_WRITE_REG(hw, IXGBE_CTRL_EXT, ctrl_ext);
534 
535 	return 0;
536 failed:
537 	ix_detach(dev);
538 	return error;
539 }
540 
541 static int
542 ix_detach(device_t dev)
543 {
544 	struct ix_softc *sc = device_get_softc(dev);
545 
546 	if (device_is_attached(dev)) {
547 		struct ifnet *ifp = &sc->arpcom.ac_if;
548 		uint32_t ctrl_ext;
549 
550 		ifnet_serialize_all(ifp);
551 
552 		ix_stop(sc);
553 		ix_teardown_intr(sc, sc->intr_cnt);
554 
555 		ifnet_deserialize_all(ifp);
556 
557 		callout_terminate(&sc->timer);
558 		ether_ifdetach(ifp);
559 
560 		/* Let hardware know driver is unloading */
561 		ctrl_ext = IXGBE_READ_REG(&sc->hw, IXGBE_CTRL_EXT);
562 		ctrl_ext &= ~IXGBE_CTRL_EXT_DRV_LOAD;
563 		IXGBE_WRITE_REG(&sc->hw, IXGBE_CTRL_EXT, ctrl_ext);
564 	}
565 
566 	ifmedia_removeall(&sc->media);
567 	bus_generic_detach(dev);
568 
569 	ix_free_intr(sc);
570 
571 	if (sc->msix_mem_res != NULL) {
572 		bus_release_resource(dev, SYS_RES_MEMORY, sc->msix_mem_rid,
573 		    sc->msix_mem_res);
574 	}
575 	if (sc->mem_res != NULL) {
576 		bus_release_resource(dev, SYS_RES_MEMORY, sc->mem_rid,
577 		    sc->mem_res);
578 	}
579 
580 	ix_free_rings(sc);
581 
582 	if (sc->mta != NULL)
583 		kfree(sc->mta, M_DEVBUF);
584 	if (sc->serializes != NULL)
585 		kfree(sc->serializes, M_DEVBUF);
586 
587 	return 0;
588 }
589 
590 static int
591 ix_shutdown(device_t dev)
592 {
593 	struct ix_softc *sc = device_get_softc(dev);
594 	struct ifnet *ifp = &sc->arpcom.ac_if;
595 
596 	ifnet_serialize_all(ifp);
597 	ix_stop(sc);
598 	ifnet_deserialize_all(ifp);
599 
600 	return 0;
601 }
602 
603 static void
604 ix_start(struct ifnet *ifp, struct ifaltq_subque *ifsq)
605 {
606 	struct ix_softc *sc = ifp->if_softc;
607 	struct ix_tx_ring *txr = ifsq_get_priv(ifsq);
608 	int idx = -1;
609 	uint16_t nsegs;
610 
611 	KKASSERT(txr->tx_ifsq == ifsq);
612 	ASSERT_SERIALIZED(&txr->tx_serialize);
613 
614 	if ((ifp->if_flags & IFF_RUNNING) == 0 || ifsq_is_oactive(ifsq))
615 		return;
616 
617 	if (!sc->link_active || (txr->tx_flags & IX_TXFLAG_ENABLED) == 0) {
618 		ifsq_purge(ifsq);
619 		return;
620 	}
621 
622 	while (!ifsq_is_empty(ifsq)) {
623 		struct mbuf *m_head;
624 
625 		if (txr->tx_avail <= IX_MAX_SCATTER + IX_TX_RESERVED) {
626 			ifsq_set_oactive(ifsq);
627 			txr->tx_watchdog.wd_timer = 5;
628 			break;
629 		}
630 
631 		m_head = ifsq_dequeue(ifsq);
632 		if (m_head == NULL)
633 			break;
634 
635 		if (ix_encap(txr, &m_head, &nsegs, &idx)) {
636 			IFNET_STAT_INC(ifp, oerrors, 1);
637 			continue;
638 		}
639 
640 		/*
641 		 * TX interrupt are aggressively aggregated, so increasing
642 		 * opackets at TX interrupt time will make the opackets
643 		 * statistics vastly inaccurate; we do the opackets increment
644 		 * now.
645 		 */
646 		IFNET_STAT_INC(ifp, opackets, 1);
647 
648 		if (nsegs >= txr->tx_wreg_nsegs) {
649 			IXGBE_WRITE_REG(&sc->hw, IXGBE_TDT(txr->tx_idx), idx);
650 			nsegs = 0;
651 			idx = -1;
652 		}
653 
654 		ETHER_BPF_MTAP(ifp, m_head);
655 	}
656 	if (idx >= 0)
657 		IXGBE_WRITE_REG(&sc->hw, IXGBE_TDT(txr->tx_idx), idx);
658 }
659 
660 static int
661 ix_ioctl(struct ifnet *ifp, u_long command, caddr_t data, struct ucred *cr)
662 {
663 	struct ix_softc *sc = ifp->if_softc;
664 	struct ifreq *ifr = (struct ifreq *) data;
665 	int error = 0, mask, reinit;
666 
667 	ASSERT_IFNET_SERIALIZED_ALL(ifp);
668 
669 	switch (command) {
670 	case SIOCSIFMTU:
671 		if (ifr->ifr_mtu > IX_MAX_FRAME_SIZE - ETHER_HDR_LEN) {
672 			error = EINVAL;
673 		} else {
674 			ifp->if_mtu = ifr->ifr_mtu;
675 			sc->max_frame_size =
676 			    ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
677 			ix_init(sc);
678 		}
679 		break;
680 
681 	case SIOCSIFFLAGS:
682 		if (ifp->if_flags & IFF_UP) {
683 			if (ifp->if_flags & IFF_RUNNING) {
684 				if ((ifp->if_flags ^ sc->if_flags) &
685 				    (IFF_PROMISC | IFF_ALLMULTI))
686 					ix_set_promisc(sc);
687 			} else {
688 				ix_init(sc);
689 			}
690 		} else if (ifp->if_flags & IFF_RUNNING) {
691 			ix_stop(sc);
692 		}
693 		sc->if_flags = ifp->if_flags;
694 		break;
695 
696 	case SIOCADDMULTI:
697 	case SIOCDELMULTI:
698 		if (ifp->if_flags & IFF_RUNNING) {
699 			ix_disable_intr(sc);
700 			ix_set_multi(sc);
701 #ifdef IFPOLL_ENABLE
702 			if ((ifp->if_flags & IFF_NPOLLING) == 0)
703 #endif
704 				ix_enable_intr(sc);
705 		}
706 		break;
707 
708 	case SIOCSIFMEDIA:
709 	case SIOCGIFMEDIA:
710 		error = ifmedia_ioctl(ifp, ifr, &sc->media, command);
711 		break;
712 
713 	case SIOCSIFCAP:
714 		reinit = 0;
715 		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
716 		if (mask & IFCAP_RXCSUM) {
717 			ifp->if_capenable ^= IFCAP_RXCSUM;
718 			reinit = 1;
719 		}
720 		if (mask & IFCAP_VLAN_HWTAGGING) {
721 			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
722 			reinit = 1;
723 		}
724 		if (mask & IFCAP_TXCSUM) {
725 			ifp->if_capenable ^= IFCAP_TXCSUM;
726 			if (ifp->if_capenable & IFCAP_TXCSUM)
727 				ifp->if_hwassist |= CSUM_OFFLOAD;
728 			else
729 				ifp->if_hwassist &= ~CSUM_OFFLOAD;
730 		}
731 		if (mask & IFCAP_TSO) {
732 			ifp->if_capenable ^= IFCAP_TSO;
733 			if (ifp->if_capenable & IFCAP_TSO)
734 				ifp->if_hwassist |= CSUM_TSO;
735 			else
736 				ifp->if_hwassist &= ~CSUM_TSO;
737 		}
738 		if (mask & IFCAP_RSS)
739 			ifp->if_capenable ^= IFCAP_RSS;
740 		if (reinit && (ifp->if_flags & IFF_RUNNING))
741 			ix_init(sc);
742 		break;
743 
744 #if 0
745 	case SIOCGI2C:
746 	{
747 		struct ixgbe_i2c_req	i2c;
748 		error = copyin(ifr->ifr_data, &i2c, sizeof(i2c));
749 		if (error)
750 			break;
751 		if ((i2c.dev_addr != 0xA0) || (i2c.dev_addr != 0xA2)){
752 			error = EINVAL;
753 			break;
754 		}
755 		hw->phy.ops.read_i2c_byte(hw, i2c.offset,
756 		    i2c.dev_addr, i2c.data);
757 		error = copyout(&i2c, ifr->ifr_data, sizeof(i2c));
758 		break;
759 	}
760 #endif
761 
762 	default:
763 		error = ether_ioctl(ifp, command, data);
764 		break;
765 	}
766 	return error;
767 }
768 
769 #define IXGBE_MHADD_MFS_SHIFT 16
770 
771 static void
772 ix_init(void *xsc)
773 {
774 	struct ix_softc *sc = xsc;
775 	struct ifnet *ifp = &sc->arpcom.ac_if;
776 	struct ixgbe_hw *hw = &sc->hw;
777 	uint32_t rxpb, frame, size, tmp;
778 	uint32_t gpie, rxctrl;
779 	int i, error;
780 	boolean_t polling;
781 
782 	ASSERT_IFNET_SERIALIZED_ALL(ifp);
783 
784 	ix_stop(sc);
785 
786 	polling = FALSE;
787 #ifdef IFPOLL_ENABLE
788 	if (ifp->if_flags & IFF_NPOLLING)
789 		polling = TRUE;
790 #endif
791 
792 	/* Configure # of used RX/TX rings */
793 	ix_set_ring_inuse(sc, polling);
794 	ifq_set_subq_mask(&ifp->if_snd, sc->tx_ring_inuse - 1);
795 
796 	/* Get the latest mac address, User can use a LAA */
797 	bcopy(IF_LLADDR(ifp), hw->mac.addr, IXGBE_ETH_LENGTH_OF_ADDRESS);
798 	ixgbe_set_rar(hw, 0, hw->mac.addr, 0, 1);
799 	hw->addr_ctrl.rar_used_count = 1;
800 
801 	/* Prepare transmit descriptors and buffers */
802 	for (i = 0; i < sc->tx_ring_inuse; ++i)
803 		ix_init_tx_ring(&sc->tx_rings[i]);
804 
805 	ixgbe_init_hw(hw);
806 	ix_init_tx_unit(sc);
807 
808 	/* Setup Multicast table */
809 	ix_set_multi(sc);
810 
811 	/* Prepare receive descriptors and buffers */
812 	for (i = 0; i < sc->rx_ring_inuse; ++i) {
813 		error = ix_init_rx_ring(&sc->rx_rings[i]);
814 		if (error) {
815 			if_printf(ifp, "Could not initialize RX ring%d\n", i);
816 			ix_stop(sc);
817 			return;
818 		}
819 	}
820 
821 	/* Configure RX settings */
822 	ix_init_rx_unit(sc);
823 
824 	gpie = IXGBE_READ_REG(hw, IXGBE_GPIE);
825 
826 	/* Enable Fan Failure Interrupt */
827 	gpie |= IXGBE_SDP1_GPIEN;
828 
829 	/* Add for Module detection */
830 	if (hw->mac.type == ixgbe_mac_82599EB)
831 		gpie |= IXGBE_SDP2_GPIEN;
832 
833 	/* Thermal Failure Detection */
834 	if (hw->mac.type == ixgbe_mac_X540)
835 		gpie |= IXGBE_SDP0_GPIEN;
836 
837 	if (sc->intr_type == PCI_INTR_TYPE_MSIX) {
838 		/* Enable Enhanced MSIX mode */
839 		gpie |= IXGBE_GPIE_MSIX_MODE;
840 		gpie |= IXGBE_GPIE_EIAME | IXGBE_GPIE_PBA_SUPPORT |
841 		    IXGBE_GPIE_OCD;
842 	}
843 	IXGBE_WRITE_REG(hw, IXGBE_GPIE, gpie);
844 
845 	/* Set MTU size */
846 	if (ifp->if_mtu > ETHERMTU) {
847 		uint32_t mhadd;
848 
849 		mhadd = IXGBE_READ_REG(hw, IXGBE_MHADD);
850 		mhadd &= ~IXGBE_MHADD_MFS_MASK;
851 		mhadd |= sc->max_frame_size << IXGBE_MHADD_MFS_SHIFT;
852 		IXGBE_WRITE_REG(hw, IXGBE_MHADD, mhadd);
853 	}
854 
855 	/*
856 	 * Enable TX rings
857 	 */
858 	for (i = 0; i < sc->tx_ring_inuse; ++i) {
859 		uint32_t txdctl;
860 
861 		txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(i));
862 		txdctl |= IXGBE_TXDCTL_ENABLE;
863 
864 		/*
865 		 * Set WTHRESH to 0, since TX head write-back is used
866 		 */
867 		txdctl &= ~(0x7f << 16);
868 
869 		/*
870 		 * When the internal queue falls below PTHRESH (32),
871 		 * start prefetching as long as there are at least
872 		 * HTHRESH (1) buffers ready. The values are taken
873 		 * from the Intel linux driver 3.8.21.
874 		 * Prefetching enables tx line rate even with 1 queue.
875 		 */
876 		txdctl |= (32 << 0) | (1 << 8);
877 		IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(i), txdctl);
878 	}
879 
880 	/*
881 	 * Enable RX rings
882 	 */
883 	for (i = 0; i < sc->rx_ring_inuse; ++i) {
884 		uint32_t rxdctl;
885 		int k;
886 
887 		rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(i));
888 		if (hw->mac.type == ixgbe_mac_82598EB) {
889 			/*
890 			 * PTHRESH = 21
891 			 * HTHRESH = 4
892 			 * WTHRESH = 8
893 			 */
894 			rxdctl &= ~0x3FFFFF;
895 			rxdctl |= 0x080420;
896 		}
897 		rxdctl |= IXGBE_RXDCTL_ENABLE;
898 		IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(i), rxdctl);
899 		for (k = 0; k < 10; ++k) {
900 			if (IXGBE_READ_REG(hw, IXGBE_RXDCTL(i)) &
901 			    IXGBE_RXDCTL_ENABLE)
902 				break;
903 			else
904 				msec_delay(1);
905 		}
906 		wmb();
907 		IXGBE_WRITE_REG(hw, IXGBE_RDT(i),
908 		    sc->rx_rings[0].rx_ndesc - 1);
909 	}
910 
911 	/* Set up VLAN support and filter */
912 	ix_set_vlan(sc);
913 
914 	/* Enable Receive engine */
915 	rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
916 	if (hw->mac.type == ixgbe_mac_82598EB)
917 		rxctrl |= IXGBE_RXCTRL_DMBYPS;
918 	rxctrl |= IXGBE_RXCTRL_RXEN;
919 	ixgbe_enable_rx_dma(hw, rxctrl);
920 
921 	for (i = 0; i < sc->tx_ring_inuse; ++i) {
922 		const struct ix_tx_ring *txr = &sc->tx_rings[i];
923 
924 		if (txr->tx_intr_vec >= 0) {
925 			ix_set_ivar(sc, i, txr->tx_intr_vec, 1);
926 		} else {
927 			/*
928 			 * Unconfigured TX interrupt vector could only
929 			 * happen for MSI-X.
930 			 */
931 			KASSERT(sc->intr_type == PCI_INTR_TYPE_MSIX,
932 			    ("TX intr vector is not set"));
933 			KASSERT(i < sc->rx_ring_inuse,
934 			    ("invalid TX ring %d, no piggyback RX ring", i));
935 			KASSERT(sc->rx_rings[i].rx_txr == txr,
936 			    ("RX ring %d piggybacked TX ring mismatch", i));
937 			if (bootverbose)
938 				if_printf(ifp, "IVAR skips TX ring %d\n", i);
939 		}
940 	}
941 	for (i = 0; i < sc->rx_ring_inuse; ++i) {
942 		const struct ix_rx_ring *rxr = &sc->rx_rings[i];
943 
944 		KKASSERT(rxr->rx_intr_vec >= 0);
945 		ix_set_ivar(sc, i, rxr->rx_intr_vec, 0);
946 		if (rxr->rx_txr != NULL) {
947 			/*
948 			 * Piggyback the TX ring interrupt onto the RX
949 			 * ring interrupt vector.
950 			 */
951 			KASSERT(rxr->rx_txr->tx_intr_vec < 0,
952 			    ("piggybacked TX ring configured intr vector"));
953 			KASSERT(rxr->rx_txr->tx_idx == i,
954 			    ("RX ring %d piggybacked TX ring %u",
955 			     i, rxr->rx_txr->tx_idx));
956 			ix_set_ivar(sc, i, rxr->rx_intr_vec, 1);
957 			if (bootverbose) {
958 				if_printf(ifp, "IVAR RX ring %d piggybacks "
959 				    "TX ring %u\n", i, rxr->rx_txr->tx_idx);
960 			}
961 		}
962 	}
963 	if (sc->intr_type == PCI_INTR_TYPE_MSIX) {
964 		/* Set up status MSI-X vector; it is using fixed entry 1 */
965 		ix_set_ivar(sc, 1, sc->sts_msix_vec, -1);
966 
967 		/* Set up auto-mask for TX and RX rings */
968 		if (hw->mac.type == ixgbe_mac_82598EB) {
969 			IXGBE_WRITE_REG(hw, IXGBE_EIAM, IXGBE_EIMS_RTX_QUEUE);
970 		} else {
971 			IXGBE_WRITE_REG(hw, IXGBE_EIAM_EX(0), 0xFFFFFFFF);
972 			IXGBE_WRITE_REG(hw, IXGBE_EIAM_EX(1), 0xFFFFFFFF);
973 		}
974 	} else {
975 		IXGBE_WRITE_REG(hw, IXGBE_EIAM, IXGBE_EIMS_RTX_QUEUE);
976 	}
977 	for (i = 0; i < sc->intr_cnt; ++i)
978 		ix_set_eitr(sc, i, sc->intr_data[i].intr_rate);
979 
980 	/*
981 	 * Check on any SFP devices that need to be kick-started
982 	 */
983 	if (hw->phy.type == ixgbe_phy_none) {
984 		error = hw->phy.ops.identify(hw);
985 		if (error == IXGBE_ERR_SFP_NOT_SUPPORTED) {
986 			if_printf(ifp,
987 			    "Unsupported SFP+ module type was detected.\n");
988 			/* XXX stop */
989 			return;
990 		}
991 	}
992 
993 	/* Config/Enable Link */
994 	ix_config_link(sc);
995 
996 	/*
997 	 * Hardware Packet Buffer & Flow Control setup
998 	 */
999 	frame = sc->max_frame_size;
1000 
1001 	/* Calculate High Water */
1002 	if (hw->mac.type == ixgbe_mac_X540)
1003 		tmp = IXGBE_DV_X540(frame, frame);
1004 	else
1005 		tmp = IXGBE_DV(frame, frame);
1006 	size = IXGBE_BT2KB(tmp);
1007 	rxpb = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(0)) >> 10;
1008 	hw->fc.high_water[0] = rxpb - size;
1009 
1010 	/* Now calculate Low Water */
1011 	if (hw->mac.type == ixgbe_mac_X540)
1012 		tmp = IXGBE_LOW_DV_X540(frame);
1013 	else
1014 		tmp = IXGBE_LOW_DV(frame);
1015 	hw->fc.low_water[0] = IXGBE_BT2KB(tmp);
1016 
1017 	hw->fc.requested_mode = ix_ifmedia2fc(sc->ifm_flowctrl);
1018 	if (sc->ifm_flowctrl & IFM_ETH_FORCEPAUSE)
1019 		hw->fc.disable_fc_autoneg = TRUE;
1020 	else
1021 		hw->fc.disable_fc_autoneg = FALSE;
1022 	hw->fc.pause_time = IX_FC_PAUSE;
1023 	hw->fc.send_xon = TRUE;
1024 
1025 	/* Initialize the FC settings */
1026 	ixgbe_start_hw(hw);
1027 
1028 	/*
1029 	 * Only enable interrupts if we are not polling, make sure
1030 	 * they are off otherwise.
1031 	 */
1032 	if (polling)
1033 		ix_disable_intr(sc);
1034 	else
1035 		ix_enable_intr(sc);
1036 
1037 	ifp->if_flags |= IFF_RUNNING;
1038 	for (i = 0; i < sc->tx_ring_inuse; ++i) {
1039 		ifsq_clr_oactive(sc->tx_rings[i].tx_ifsq);
1040 		ifsq_watchdog_start(&sc->tx_rings[i].tx_watchdog);
1041 	}
1042 
1043 	ix_set_timer_cpuid(sc, polling);
1044 	callout_reset_bycpu(&sc->timer, hz, ix_timer, sc, sc->timer_cpuid);
1045 }
1046 
1047 static void
1048 ix_intr(void *xsc)
1049 {
1050 	struct ix_softc *sc = xsc;
1051 	struct ixgbe_hw	*hw = &sc->hw;
1052 	uint32_t eicr;
1053 
1054 	ASSERT_SERIALIZED(&sc->main_serialize);
1055 
1056 	eicr = IXGBE_READ_REG(hw, IXGBE_EICR);
1057 	if (eicr == 0) {
1058 		IXGBE_WRITE_REG(hw, IXGBE_EIMS, sc->intr_mask);
1059 		return;
1060 	}
1061 
1062 	if (eicr & IX_RX0_INTR_MASK) {
1063 		struct ix_rx_ring *rxr = &sc->rx_rings[0];
1064 
1065 		lwkt_serialize_enter(&rxr->rx_serialize);
1066 		ix_rxeof(rxr, -1);
1067 		lwkt_serialize_exit(&rxr->rx_serialize);
1068 	}
1069 	if (eicr & IX_RX1_INTR_MASK) {
1070 		struct ix_rx_ring *rxr;
1071 
1072 		KKASSERT(sc->rx_ring_inuse == IX_MIN_RXRING_RSS);
1073 		rxr = &sc->rx_rings[1];
1074 
1075 		lwkt_serialize_enter(&rxr->rx_serialize);
1076 		ix_rxeof(rxr, -1);
1077 		lwkt_serialize_exit(&rxr->rx_serialize);
1078 	}
1079 
1080 	if (eicr & IX_TX_INTR_MASK) {
1081 		struct ix_tx_ring *txr = &sc->tx_rings[0];
1082 
1083 		lwkt_serialize_enter(&txr->tx_serialize);
1084 		ix_txeof(txr, *(txr->tx_hdr));
1085 		if (!ifsq_is_empty(txr->tx_ifsq))
1086 			ifsq_devstart(txr->tx_ifsq);
1087 		lwkt_serialize_exit(&txr->tx_serialize);
1088 	}
1089 
1090 	if (__predict_false(eicr & IX_EICR_STATUS))
1091 		ix_intr_status(sc, eicr);
1092 
1093 	IXGBE_WRITE_REG(hw, IXGBE_EIMS, sc->intr_mask);
1094 }
1095 
1096 static void
1097 ix_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1098 {
1099 	struct ix_softc *sc = ifp->if_softc;
1100 
1101 	ix_update_link_status(sc);
1102 
1103 	ifmr->ifm_status = IFM_AVALID;
1104 	ifmr->ifm_active = IFM_ETHER;
1105 
1106 	if (!sc->link_active) {
1107 		ifmr->ifm_active |= IFM_NONE;
1108 		return;
1109 	}
1110 
1111 	ifmr->ifm_status |= IFM_ACTIVE;
1112 
1113 	switch (sc->link_speed) {
1114 	case IXGBE_LINK_SPEED_100_FULL:
1115 		ifmr->ifm_active |= IFM_100_TX | IFM_FDX;
1116 		break;
1117 	case IXGBE_LINK_SPEED_1GB_FULL:
1118 		ifmr->ifm_active |= IFM_1000_SX | IFM_FDX;
1119 		break;
1120 	case IXGBE_LINK_SPEED_10GB_FULL:
1121 		ifmr->ifm_active |= sc->optics | IFM_FDX;
1122 		break;
1123 	default:
1124 		ifmr->ifm_active |= IFM_NONE;
1125 		return;
1126 	}
1127 
1128 	if (sc->ifm_flowctrl & IFM_ETH_FORCEPAUSE)
1129 		ifmr->ifm_active |= IFM_ETH_FORCEPAUSE;
1130 
1131 	switch (sc->hw.fc.current_mode) {
1132 	case ixgbe_fc_full:
1133 		ifmr->ifm_active |= IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE;
1134 		break;
1135 	case ixgbe_fc_rx_pause:
1136 		ifmr->ifm_active |= IFM_ETH_RXPAUSE;
1137 		break;
1138 	case ixgbe_fc_tx_pause:
1139 		ifmr->ifm_active |= IFM_ETH_TXPAUSE;
1140 		break;
1141 	default:
1142 		break;
1143 	}
1144 }
1145 
1146 static int
1147 ix_media_change(struct ifnet *ifp)
1148 {
1149 	struct ix_softc *sc = ifp->if_softc;
1150 	struct ifmedia *ifm = &sc->media;
1151 
1152 	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1153 		return EINVAL;
1154 
1155 	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1156 	case IFM_AUTO:
1157 		sc->hw.phy.autoneg_advertised =
1158 		    IXGBE_LINK_SPEED_100_FULL |
1159 		    IXGBE_LINK_SPEED_1GB_FULL |
1160 		    IXGBE_LINK_SPEED_10GB_FULL;
1161 		break;
1162 	default:
1163 		if_printf(ifp, "Only auto media type\n");
1164 		return EINVAL;
1165 	}
1166 	sc->ifm_flowctrl = ifm->ifm_media & IFM_ETH_FCMASK;
1167 
1168 	if (ifp->if_flags & IFF_RUNNING)
1169 		ix_init(sc);
1170 	return 0;
1171 }
1172 
1173 static __inline int
1174 ix_tso_pullup(struct mbuf **mp)
1175 {
1176 	int hoff, iphlen, thoff;
1177 	struct mbuf *m;
1178 
1179 	m = *mp;
1180 	KASSERT(M_WRITABLE(m), ("TSO mbuf not writable"));
1181 
1182 	iphlen = m->m_pkthdr.csum_iphlen;
1183 	thoff = m->m_pkthdr.csum_thlen;
1184 	hoff = m->m_pkthdr.csum_lhlen;
1185 
1186 	KASSERT(iphlen > 0, ("invalid ip hlen"));
1187 	KASSERT(thoff > 0, ("invalid tcp hlen"));
1188 	KASSERT(hoff > 0, ("invalid ether hlen"));
1189 
1190 	if (__predict_false(m->m_len < hoff + iphlen + thoff)) {
1191 		m = m_pullup(m, hoff + iphlen + thoff);
1192 		if (m == NULL) {
1193 			*mp = NULL;
1194 			return ENOBUFS;
1195 		}
1196 		*mp = m;
1197 	}
1198 	return 0;
1199 }
1200 
1201 static int
1202 ix_encap(struct ix_tx_ring *txr, struct mbuf **m_headp,
1203     uint16_t *segs_used, int *idx)
1204 {
1205 	uint32_t olinfo_status = 0, cmd_type_len, cmd_rs = 0;
1206 	int i, j, error, nsegs, first, maxsegs;
1207 	struct mbuf *m_head = *m_headp;
1208 	bus_dma_segment_t segs[IX_MAX_SCATTER];
1209 	bus_dmamap_t map;
1210 	struct ix_tx_buf *txbuf;
1211 	union ixgbe_adv_tx_desc *txd = NULL;
1212 
1213 	if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
1214 		error = ix_tso_pullup(m_headp);
1215 		if (__predict_false(error))
1216 			return error;
1217 		m_head = *m_headp;
1218 	}
1219 
1220 	/* Basic descriptor defines */
1221 	cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA |
1222 	    IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT);
1223 
1224 	if (m_head->m_flags & M_VLANTAG)
1225 		cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE;
1226 
1227 	/*
1228 	 * Important to capture the first descriptor
1229 	 * used because it will contain the index of
1230 	 * the one we tell the hardware to report back
1231 	 */
1232 	first = txr->tx_next_avail;
1233 	txbuf = &txr->tx_buf[first];
1234 	map = txbuf->map;
1235 
1236 	/*
1237 	 * Map the packet for DMA.
1238 	 */
1239 	maxsegs = txr->tx_avail - IX_TX_RESERVED;
1240 	if (maxsegs > IX_MAX_SCATTER)
1241 		maxsegs = IX_MAX_SCATTER;
1242 
1243 	error = bus_dmamap_load_mbuf_defrag(txr->tx_tag, map, m_headp,
1244 	    segs, maxsegs, &nsegs, BUS_DMA_NOWAIT);
1245 	if (__predict_false(error)) {
1246 		m_freem(*m_headp);
1247 		*m_headp = NULL;
1248 		return error;
1249 	}
1250 	bus_dmamap_sync(txr->tx_tag, map, BUS_DMASYNC_PREWRITE);
1251 
1252 	m_head = *m_headp;
1253 
1254 	/*
1255 	 * Set up the appropriate offload context if requested,
1256 	 * this may consume one TX descriptor.
1257 	 */
1258 	if (ix_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status)) {
1259 		(*segs_used)++;
1260 		txr->tx_nsegs++;
1261 	}
1262 
1263 	*segs_used += nsegs;
1264 	txr->tx_nsegs += nsegs;
1265 	if (txr->tx_nsegs >= txr->tx_intr_nsegs) {
1266 		/*
1267 		 * Report Status (RS) is turned on every intr_nsegs
1268 		 * descriptors (roughly).
1269 		 */
1270 		txr->tx_nsegs = 0;
1271 		cmd_rs = IXGBE_TXD_CMD_RS;
1272 	}
1273 
1274 	i = txr->tx_next_avail;
1275 	for (j = 0; j < nsegs; j++) {
1276 		bus_size_t seglen;
1277 		bus_addr_t segaddr;
1278 
1279 		txbuf = &txr->tx_buf[i];
1280 		txd = &txr->tx_base[i];
1281 		seglen = segs[j].ds_len;
1282 		segaddr = htole64(segs[j].ds_addr);
1283 
1284 		txd->read.buffer_addr = segaddr;
1285 		txd->read.cmd_type_len = htole32(IXGBE_TXD_CMD_IFCS |
1286 		    cmd_type_len |seglen);
1287 		txd->read.olinfo_status = htole32(olinfo_status);
1288 
1289 		if (++i == txr->tx_ndesc)
1290 			i = 0;
1291 	}
1292 	txd->read.cmd_type_len |= htole32(IXGBE_TXD_CMD_EOP | cmd_rs);
1293 
1294 	txr->tx_avail -= nsegs;
1295 	txr->tx_next_avail = i;
1296 
1297 	txbuf->m_head = m_head;
1298 	txr->tx_buf[first].map = txbuf->map;
1299 	txbuf->map = map;
1300 
1301 	/*
1302 	 * Defer TDT updating, until enough descrptors are setup
1303 	 */
1304 	*idx = i;
1305 
1306 	return 0;
1307 }
1308 
1309 static void
1310 ix_set_promisc(struct ix_softc *sc)
1311 {
1312 	struct ifnet *ifp = &sc->arpcom.ac_if;
1313 	uint32_t reg_rctl;
1314 	int mcnt = 0;
1315 
1316 	reg_rctl = IXGBE_READ_REG(&sc->hw, IXGBE_FCTRL);
1317 	reg_rctl &= ~IXGBE_FCTRL_UPE;
1318 	if (ifp->if_flags & IFF_ALLMULTI) {
1319 		mcnt = IX_MAX_MCASTADDR;
1320 	} else {
1321 		struct ifmultiaddr *ifma;
1322 
1323 		TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1324 			if (ifma->ifma_addr->sa_family != AF_LINK)
1325 				continue;
1326 			if (mcnt == IX_MAX_MCASTADDR)
1327 				break;
1328 			mcnt++;
1329 		}
1330 	}
1331 	if (mcnt < IX_MAX_MCASTADDR)
1332 		reg_rctl &= ~IXGBE_FCTRL_MPE;
1333 	IXGBE_WRITE_REG(&sc->hw, IXGBE_FCTRL, reg_rctl);
1334 
1335 	if (ifp->if_flags & IFF_PROMISC) {
1336 		reg_rctl |= IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE;
1337 		IXGBE_WRITE_REG(&sc->hw, IXGBE_FCTRL, reg_rctl);
1338 	} else if (ifp->if_flags & IFF_ALLMULTI) {
1339 		reg_rctl |= IXGBE_FCTRL_MPE;
1340 		reg_rctl &= ~IXGBE_FCTRL_UPE;
1341 		IXGBE_WRITE_REG(&sc->hw, IXGBE_FCTRL, reg_rctl);
1342 	}
1343 }
1344 
1345 static void
1346 ix_set_multi(struct ix_softc *sc)
1347 {
1348 	struct ifnet *ifp = &sc->arpcom.ac_if;
1349 	struct ifmultiaddr *ifma;
1350 	uint32_t fctrl;
1351 	uint8_t	*mta;
1352 	int mcnt = 0;
1353 
1354 	mta = sc->mta;
1355 	bzero(mta, IXGBE_ETH_LENGTH_OF_ADDRESS * IX_MAX_MCASTADDR);
1356 
1357 	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1358 		if (ifma->ifma_addr->sa_family != AF_LINK)
1359 			continue;
1360 		if (mcnt == IX_MAX_MCASTADDR)
1361 			break;
1362 		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
1363 		    &mta[mcnt * IXGBE_ETH_LENGTH_OF_ADDRESS],
1364 		    IXGBE_ETH_LENGTH_OF_ADDRESS);
1365 		mcnt++;
1366 	}
1367 
1368 	fctrl = IXGBE_READ_REG(&sc->hw, IXGBE_FCTRL);
1369 	fctrl |= (IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
1370 	if (ifp->if_flags & IFF_PROMISC) {
1371 		fctrl |= IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE;
1372 	} else if (mcnt >= IX_MAX_MCASTADDR || (ifp->if_flags & IFF_ALLMULTI)) {
1373 		fctrl |= IXGBE_FCTRL_MPE;
1374 		fctrl &= ~IXGBE_FCTRL_UPE;
1375 	} else {
1376 		fctrl &= ~(IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
1377 	}
1378 	IXGBE_WRITE_REG(&sc->hw, IXGBE_FCTRL, fctrl);
1379 
1380 	if (mcnt < IX_MAX_MCASTADDR) {
1381 		ixgbe_update_mc_addr_list(&sc->hw,
1382 		    mta, mcnt, ix_mc_array_itr, TRUE);
1383 	}
1384 }
1385 
1386 /*
1387  * This is an iterator function now needed by the multicast
1388  * shared code. It simply feeds the shared code routine the
1389  * addresses in the array of ix_set_multi() one by one.
1390  */
1391 static uint8_t *
1392 ix_mc_array_itr(struct ixgbe_hw *hw, uint8_t **update_ptr, uint32_t *vmdq)
1393 {
1394 	uint8_t *addr = *update_ptr;
1395 	uint8_t *newptr;
1396 	*vmdq = 0;
1397 
1398 	newptr = addr + IXGBE_ETH_LENGTH_OF_ADDRESS;
1399 	*update_ptr = newptr;
1400 	return addr;
1401 }
1402 
1403 static void
1404 ix_timer(void *arg)
1405 {
1406 	struct ix_softc *sc = arg;
1407 
1408 	lwkt_serialize_enter(&sc->main_serialize);
1409 
1410 	if ((sc->arpcom.ac_if.if_flags & IFF_RUNNING) == 0) {
1411 		lwkt_serialize_exit(&sc->main_serialize);
1412 		return;
1413 	}
1414 
1415 	/* Check for pluggable optics */
1416 	if (sc->sfp_probe) {
1417 		if (!ix_sfp_probe(sc))
1418 			goto done; /* Nothing to do */
1419 	}
1420 
1421 	ix_update_link_status(sc);
1422 	ix_update_stats(sc);
1423 
1424 done:
1425 	callout_reset_bycpu(&sc->timer, hz, ix_timer, sc, sc->timer_cpuid);
1426 	lwkt_serialize_exit(&sc->main_serialize);
1427 }
1428 
1429 static void
1430 ix_update_link_status(struct ix_softc *sc)
1431 {
1432 	struct ifnet *ifp = &sc->arpcom.ac_if;
1433 
1434 	if (sc->link_up) {
1435 		if (sc->link_active == FALSE) {
1436 			if (bootverbose) {
1437 				if_printf(ifp, "Link is up %d Gbps %s\n",
1438 				    sc->link_speed == 128 ? 10 : 1,
1439 				    "Full Duplex");
1440 			}
1441 
1442 			/*
1443 			 * Update any Flow Control changes
1444 			 */
1445 			ixgbe_fc_enable(&sc->hw);
1446 			/* MUST after ixgbe_fc_enable() */
1447 			if (sc->rx_ring_inuse > 1) {
1448 				switch (sc->hw.fc.current_mode) {
1449 				case ixgbe_fc_rx_pause:
1450 				case ixgbe_fc_tx_pause:
1451 				case ixgbe_fc_full:
1452 					ix_disable_rx_drop(sc);
1453 					break;
1454 
1455 				case ixgbe_fc_none:
1456 					ix_enable_rx_drop(sc);
1457 					break;
1458 
1459 				default:
1460 					break;
1461 				}
1462 			}
1463 
1464 			sc->link_active = TRUE;
1465 
1466 			ifp->if_link_state = LINK_STATE_UP;
1467 			if_link_state_change(ifp);
1468 		}
1469 	} else { /* Link down */
1470 		if (sc->link_active == TRUE) {
1471 			if (bootverbose)
1472 				if_printf(ifp, "Link is Down\n");
1473 			ifp->if_link_state = LINK_STATE_DOWN;
1474 			if_link_state_change(ifp);
1475 
1476 			sc->link_active = FALSE;
1477 		}
1478 	}
1479 }
1480 
1481 static void
1482 ix_stop(struct ix_softc *sc)
1483 {
1484 	struct ixgbe_hw *hw = &sc->hw;
1485 	struct ifnet *ifp = &sc->arpcom.ac_if;
1486 	int i;
1487 
1488 	ASSERT_IFNET_SERIALIZED_ALL(ifp);
1489 
1490 	ix_disable_intr(sc);
1491 	callout_stop(&sc->timer);
1492 
1493 	ifp->if_flags &= ~IFF_RUNNING;
1494 	for (i = 0; i < sc->tx_ring_cnt; ++i) {
1495 		struct ix_tx_ring *txr = &sc->tx_rings[i];
1496 
1497 		ifsq_clr_oactive(txr->tx_ifsq);
1498 		ifsq_watchdog_stop(&txr->tx_watchdog);
1499 		txr->tx_flags &= ~IX_TXFLAG_ENABLED;
1500 	}
1501 
1502 	ixgbe_reset_hw(hw);
1503 	hw->adapter_stopped = FALSE;
1504 	ixgbe_stop_adapter(hw);
1505 	if (hw->mac.type == ixgbe_mac_82599EB)
1506 		ixgbe_stop_mac_link_on_d3_82599(hw);
1507 	/* Turn off the laser - noop with no optics */
1508 	ixgbe_disable_tx_laser(hw);
1509 
1510 	/* Update the stack */
1511 	sc->link_up = FALSE;
1512 	ix_update_link_status(sc);
1513 
1514 	/* Reprogram the RAR[0] in case user changed it. */
1515 	ixgbe_set_rar(hw, 0, hw->mac.addr, 0, IXGBE_RAH_AV);
1516 
1517 	for (i = 0; i < sc->tx_ring_cnt; ++i)
1518 		ix_free_tx_ring(&sc->tx_rings[i]);
1519 
1520 	for (i = 0; i < sc->rx_ring_cnt; ++i)
1521 		ix_free_rx_ring(&sc->rx_rings[i]);
1522 }
1523 
1524 static void
1525 ix_setup_optics(struct ix_softc *sc)
1526 {
1527 	struct ixgbe_hw *hw = &sc->hw;
1528 	int layer;
1529 
1530 	layer = ixgbe_get_supported_physical_layer(hw);
1531 
1532 	if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_T) {
1533 		sc->optics = IFM_10G_T;
1534 		return;
1535 	}
1536 
1537 	if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_T) {
1538 		sc->optics = IFM_1000_T;
1539 		return;
1540 	}
1541 
1542 	if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_SX) {
1543 		sc->optics = IFM_1000_SX;
1544 		return;
1545 	}
1546 
1547 	if (layer & (IXGBE_PHYSICAL_LAYER_10GBASE_LR |
1548 	    IXGBE_PHYSICAL_LAYER_10GBASE_LRM)) {
1549 		sc->optics = IFM_10G_LR;
1550 		return;
1551 	}
1552 
1553 	if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_SR) {
1554 		sc->optics = IFM_10G_SR;
1555 		return;
1556 	}
1557 
1558 	if (layer & IXGBE_PHYSICAL_LAYER_SFP_PLUS_CU) {
1559 		sc->optics = IFM_10G_TWINAX;
1560 		return;
1561 	}
1562 
1563 	if (layer & (IXGBE_PHYSICAL_LAYER_10GBASE_KX4 |
1564 	    IXGBE_PHYSICAL_LAYER_10GBASE_CX4)) {
1565 		sc->optics = IFM_10G_CX4;
1566 		return;
1567 	}
1568 
1569 	/*
1570 	 * If we get here just set the default.
1571 	 * XXX this probably is wrong.
1572 	 */
1573 	sc->optics = IFM_AUTO;
1574 }
1575 
1576 static void
1577 ix_setup_ifp(struct ix_softc *sc)
1578 {
1579 	struct ixgbe_hw *hw = &sc->hw;
1580 	struct ifnet *ifp = &sc->arpcom.ac_if;
1581 	int i;
1582 
1583 	ifp->if_baudrate = IF_Gbps(10UL);
1584 
1585 	ifp->if_softc = sc;
1586 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
1587 	ifp->if_init = ix_init;
1588 	ifp->if_ioctl = ix_ioctl;
1589 	ifp->if_start = ix_start;
1590 	ifp->if_serialize = ix_serialize;
1591 	ifp->if_deserialize = ix_deserialize;
1592 	ifp->if_tryserialize = ix_tryserialize;
1593 #ifdef INVARIANTS
1594 	ifp->if_serialize_assert = ix_serialize_assert;
1595 #endif
1596 #ifdef IFPOLL_ENABLE
1597 	ifp->if_npoll = ix_npoll;
1598 #endif
1599 
1600 	/* Increase TSO burst length */
1601 	ifp->if_tsolen = (8 * ETHERMTU);
1602 
1603 	ifp->if_nmbclusters = sc->rx_ring_cnt * sc->rx_rings[0].rx_ndesc;
1604 	ifp->if_nmbjclusters = ifp->if_nmbclusters;
1605 
1606 	ifq_set_maxlen(&ifp->if_snd, sc->tx_rings[0].tx_ndesc - 2);
1607 	ifq_set_ready(&ifp->if_snd);
1608 	ifq_set_subq_cnt(&ifp->if_snd, sc->tx_ring_cnt);
1609 
1610 	ifp->if_mapsubq = ifq_mapsubq_mask;
1611 	ifq_set_subq_mask(&ifp->if_snd, 0);
1612 
1613 	ether_ifattach(ifp, hw->mac.addr, NULL);
1614 
1615 	ifp->if_capabilities =
1616 	    IFCAP_HWCSUM | IFCAP_TSO | IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
1617 	if (IX_ENABLE_HWRSS(sc))
1618 		ifp->if_capabilities |= IFCAP_RSS;
1619 	ifp->if_capenable = ifp->if_capabilities;
1620 	ifp->if_hwassist = CSUM_OFFLOAD | CSUM_TSO;
1621 
1622 	/*
1623 	 * Tell the upper layer(s) we support long frames.
1624 	 */
1625 	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
1626 
1627 	/* Setup TX rings and subqueues */
1628 	for (i = 0; i < sc->tx_ring_cnt; ++i) {
1629 		struct ifaltq_subque *ifsq = ifq_get_subq(&ifp->if_snd, i);
1630 		struct ix_tx_ring *txr = &sc->tx_rings[i];
1631 
1632 		ifsq_set_cpuid(ifsq, txr->tx_intr_cpuid);
1633 		ifsq_set_priv(ifsq, txr);
1634 		ifsq_set_hw_serialize(ifsq, &txr->tx_serialize);
1635 		txr->tx_ifsq = ifsq;
1636 
1637 		ifsq_watchdog_init(&txr->tx_watchdog, ifsq, ix_watchdog);
1638 	}
1639 
1640 	/*
1641 	 * Specify the media types supported by this adapter and register
1642 	 * callbacks to update media and link information
1643 	 */
1644 	ifmedia_add(&sc->media, IFM_ETHER | sc->optics | IFM_FDX, 0, NULL);
1645 	if (hw->device_id == IXGBE_DEV_ID_82598AT) {
1646 		if (sc->optics != IFM_1000_T) {
1647 			ifmedia_add(&sc->media,
1648 			    IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
1649 		}
1650 	}
1651 	if (sc->optics != IFM_AUTO)
1652 		ifmedia_add(&sc->media, IFM_ETHER | IFM_AUTO, 0, NULL);
1653 	ifmedia_set(&sc->media, IFM_ETHER | IFM_AUTO | sc->ifm_flowctrl);
1654 }
1655 
1656 static boolean_t
1657 ix_is_sfp(const struct ixgbe_hw *hw)
1658 {
1659 	switch (hw->phy.type) {
1660 	case ixgbe_phy_sfp_avago:
1661 	case ixgbe_phy_sfp_ftl:
1662 	case ixgbe_phy_sfp_intel:
1663 	case ixgbe_phy_sfp_unknown:
1664 	case ixgbe_phy_sfp_passive_tyco:
1665 	case ixgbe_phy_sfp_passive_unknown:
1666 		return TRUE;
1667 	default:
1668 		return FALSE;
1669 	}
1670 }
1671 
1672 static void
1673 ix_config_link(struct ix_softc *sc)
1674 {
1675 	struct ixgbe_hw *hw = &sc->hw;
1676 	boolean_t sfp;
1677 
1678 	sfp = ix_is_sfp(hw);
1679 	if (sfp) {
1680 		if (hw->phy.multispeed_fiber) {
1681 			hw->mac.ops.setup_sfp(hw);
1682 			ixgbe_enable_tx_laser(hw);
1683 			ix_handle_msf(sc);
1684 		} else {
1685 			ix_handle_mod(sc);
1686 		}
1687 	} else {
1688 		uint32_t autoneg, err = 0;
1689 
1690 		if (hw->mac.ops.check_link != NULL) {
1691 			err = ixgbe_check_link(hw, &sc->link_speed,
1692 			    &sc->link_up, FALSE);
1693 			if (err)
1694 				return;
1695 		}
1696 
1697 		autoneg = hw->phy.autoneg_advertised;
1698 		if (!autoneg && hw->mac.ops.get_link_capabilities != NULL) {
1699 			bool negotiate;
1700 
1701 			err = hw->mac.ops.get_link_capabilities(hw,
1702 			    &autoneg, &negotiate);
1703 			if (err)
1704 				return;
1705 		}
1706 
1707 		if (hw->mac.ops.setup_link != NULL) {
1708 			err = hw->mac.ops.setup_link(hw,
1709 			    autoneg, sc->link_up);
1710 			if (err)
1711 				return;
1712 		}
1713 	}
1714 }
1715 
1716 static int
1717 ix_alloc_rings(struct ix_softc *sc)
1718 {
1719 	int error, i;
1720 
1721 	/*
1722 	 * Create top level busdma tag
1723 	 */
1724 	error = bus_dma_tag_create(NULL, 1, 0,
1725 	    BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL,
1726 	    BUS_SPACE_MAXSIZE_32BIT, 0, BUS_SPACE_MAXSIZE_32BIT, 0,
1727 	    &sc->parent_tag);
1728 	if (error) {
1729 		device_printf(sc->dev, "could not create top level DMA tag\n");
1730 		return error;
1731 	}
1732 
1733 	/*
1734 	 * Allocate TX descriptor rings and buffers
1735 	 */
1736 	sc->tx_rings = kmalloc_cachealign(
1737 	    sizeof(struct ix_tx_ring) * sc->tx_ring_cnt,
1738 	    M_DEVBUF, M_WAITOK | M_ZERO);
1739 	for (i = 0; i < sc->tx_ring_cnt; ++i) {
1740 		struct ix_tx_ring *txr = &sc->tx_rings[i];
1741 
1742 		txr->tx_sc = sc;
1743 		txr->tx_idx = i;
1744 		txr->tx_intr_vec = -1;
1745 		lwkt_serialize_init(&txr->tx_serialize);
1746 
1747 		error = ix_create_tx_ring(txr);
1748 		if (error)
1749 			return error;
1750 	}
1751 
1752 	/*
1753 	 * Allocate RX descriptor rings and buffers
1754 	 */
1755 	sc->rx_rings = kmalloc_cachealign(
1756 	    sizeof(struct ix_rx_ring) * sc->rx_ring_cnt,
1757 	    M_DEVBUF, M_WAITOK | M_ZERO);
1758 	for (i = 0; i < sc->rx_ring_cnt; ++i) {
1759 		struct ix_rx_ring *rxr = &sc->rx_rings[i];
1760 
1761 		rxr->rx_sc = sc;
1762 		rxr->rx_idx = i;
1763 		rxr->rx_intr_vec = -1;
1764 		lwkt_serialize_init(&rxr->rx_serialize);
1765 
1766 		error = ix_create_rx_ring(rxr);
1767 		if (error)
1768 			return error;
1769 	}
1770 
1771 	return 0;
1772 }
1773 
1774 static int
1775 ix_create_tx_ring(struct ix_tx_ring *txr)
1776 {
1777 	int error, i, tsize, ntxd;
1778 
1779 	/*
1780 	 * Validate number of transmit descriptors.  It must not exceed
1781 	 * hardware maximum, and must be multiple of IX_DBA_ALIGN.
1782 	 */
1783 	ntxd = device_getenv_int(txr->tx_sc->dev, "txd", ix_txd);
1784 	if (((ntxd * sizeof(union ixgbe_adv_tx_desc)) % IX_DBA_ALIGN) != 0 ||
1785 	    ntxd < IX_MIN_TXD || ntxd > IX_MAX_TXD) {
1786 		device_printf(txr->tx_sc->dev,
1787 		    "Using %d TX descriptors instead of %d!\n",
1788 		    IX_DEF_TXD, ntxd);
1789 		txr->tx_ndesc = IX_DEF_TXD;
1790 	} else {
1791 		txr->tx_ndesc = ntxd;
1792 	}
1793 
1794 	/*
1795 	 * Allocate TX head write-back buffer
1796 	 */
1797 	txr->tx_hdr = bus_dmamem_coherent_any(txr->tx_sc->parent_tag,
1798 	    __VM_CACHELINE_SIZE, __VM_CACHELINE_SIZE, BUS_DMA_WAITOK,
1799 	    &txr->tx_hdr_dtag, &txr->tx_hdr_map, &txr->tx_hdr_paddr);
1800 	if (txr->tx_hdr == NULL) {
1801 		device_printf(txr->tx_sc->dev,
1802 		    "Unable to allocate TX head write-back buffer\n");
1803 		return ENOMEM;
1804 	}
1805 
1806 	/*
1807 	 * Allocate TX descriptor ring
1808 	 */
1809 	tsize = roundup2(txr->tx_ndesc * sizeof(union ixgbe_adv_tx_desc),
1810 	    IX_DBA_ALIGN);
1811 	txr->tx_base = bus_dmamem_coherent_any(txr->tx_sc->parent_tag,
1812 	    IX_DBA_ALIGN, tsize, BUS_DMA_WAITOK | BUS_DMA_ZERO,
1813 	    &txr->tx_base_dtag, &txr->tx_base_map, &txr->tx_base_paddr);
1814 	if (txr->tx_base == NULL) {
1815 		device_printf(txr->tx_sc->dev,
1816 		    "Unable to allocate TX Descriptor memory\n");
1817 		return ENOMEM;
1818 	}
1819 
1820 	tsize = __VM_CACHELINE_ALIGN(sizeof(struct ix_tx_buf) * txr->tx_ndesc);
1821 	txr->tx_buf = kmalloc_cachealign(tsize, M_DEVBUF, M_WAITOK | M_ZERO);
1822 
1823 	/*
1824 	 * Create DMA tag for TX buffers
1825 	 */
1826 	error = bus_dma_tag_create(txr->tx_sc->parent_tag,
1827 	    1, 0,		/* alignment, bounds */
1828 	    BUS_SPACE_MAXADDR,	/* lowaddr */
1829 	    BUS_SPACE_MAXADDR,	/* highaddr */
1830 	    NULL, NULL,		/* filter, filterarg */
1831 	    IX_TSO_SIZE,	/* maxsize */
1832 	    IX_MAX_SCATTER,	/* nsegments */
1833 	    PAGE_SIZE,		/* maxsegsize */
1834 	    BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW |
1835 	    BUS_DMA_ONEBPAGE,	/* flags */
1836 	    &txr->tx_tag);
1837 	if (error) {
1838 		device_printf(txr->tx_sc->dev,
1839 		    "Unable to allocate TX DMA tag\n");
1840 		kfree(txr->tx_buf, M_DEVBUF);
1841 		txr->tx_buf = NULL;
1842 		return error;
1843 	}
1844 
1845 	/*
1846 	 * Create DMA maps for TX buffers
1847 	 */
1848 	for (i = 0; i < txr->tx_ndesc; ++i) {
1849 		struct ix_tx_buf *txbuf = &txr->tx_buf[i];
1850 
1851 		error = bus_dmamap_create(txr->tx_tag,
1852 		    BUS_DMA_WAITOK | BUS_DMA_ONEBPAGE, &txbuf->map);
1853 		if (error) {
1854 			device_printf(txr->tx_sc->dev,
1855 			    "Unable to create TX DMA map\n");
1856 			ix_destroy_tx_ring(txr, i);
1857 			return error;
1858 		}
1859 	}
1860 
1861 	/*
1862 	 * Initialize various watermark
1863 	 */
1864 	txr->tx_wreg_nsegs = IX_DEF_TXWREG_NSEGS;
1865 	txr->tx_intr_nsegs = txr->tx_ndesc / 16;
1866 
1867 	return 0;
1868 }
1869 
1870 static void
1871 ix_destroy_tx_ring(struct ix_tx_ring *txr, int ndesc)
1872 {
1873 	int i;
1874 
1875 	if (txr->tx_hdr != NULL) {
1876 		bus_dmamap_unload(txr->tx_hdr_dtag, txr->tx_hdr_map);
1877 		bus_dmamem_free(txr->tx_hdr_dtag,
1878 		    __DEVOLATILE(void *, txr->tx_hdr), txr->tx_hdr_map);
1879 		bus_dma_tag_destroy(txr->tx_hdr_dtag);
1880 		txr->tx_hdr = NULL;
1881 	}
1882 
1883 	if (txr->tx_base != NULL) {
1884 		bus_dmamap_unload(txr->tx_base_dtag, txr->tx_base_map);
1885 		bus_dmamem_free(txr->tx_base_dtag, txr->tx_base,
1886 		    txr->tx_base_map);
1887 		bus_dma_tag_destroy(txr->tx_base_dtag);
1888 		txr->tx_base = NULL;
1889 	}
1890 
1891 	if (txr->tx_buf == NULL)
1892 		return;
1893 
1894 	for (i = 0; i < ndesc; ++i) {
1895 		struct ix_tx_buf *txbuf = &txr->tx_buf[i];
1896 
1897 		KKASSERT(txbuf->m_head == NULL);
1898 		bus_dmamap_destroy(txr->tx_tag, txbuf->map);
1899 	}
1900 	bus_dma_tag_destroy(txr->tx_tag);
1901 
1902 	kfree(txr->tx_buf, M_DEVBUF);
1903 	txr->tx_buf = NULL;
1904 }
1905 
1906 static void
1907 ix_init_tx_ring(struct ix_tx_ring *txr)
1908 {
1909 	/* Clear the old ring contents */
1910 	bzero(txr->tx_base, sizeof(union ixgbe_adv_tx_desc) * txr->tx_ndesc);
1911 
1912 	/* Clear TX head write-back buffer */
1913 	*(txr->tx_hdr) = 0;
1914 
1915 	/* Reset indices */
1916 	txr->tx_next_avail = 0;
1917 	txr->tx_next_clean = 0;
1918 	txr->tx_nsegs = 0;
1919 
1920 	/* Set number of descriptors available */
1921 	txr->tx_avail = txr->tx_ndesc;
1922 
1923 	/* Enable this TX ring */
1924 	txr->tx_flags |= IX_TXFLAG_ENABLED;
1925 }
1926 
1927 static void
1928 ix_init_tx_unit(struct ix_softc *sc)
1929 {
1930 	struct ixgbe_hw	*hw = &sc->hw;
1931 	int i;
1932 
1933 	/*
1934 	 * Setup the Base and Length of the Tx Descriptor Ring
1935 	 */
1936 	for (i = 0; i < sc->tx_ring_inuse; ++i) {
1937 		struct ix_tx_ring *txr = &sc->tx_rings[i];
1938 		uint64_t tdba = txr->tx_base_paddr;
1939 		uint64_t hdr_paddr = txr->tx_hdr_paddr;
1940 		uint32_t txctrl;
1941 
1942 		IXGBE_WRITE_REG(hw, IXGBE_TDBAL(i), (uint32_t)tdba);
1943 		IXGBE_WRITE_REG(hw, IXGBE_TDBAH(i), (uint32_t)(tdba >> 32));
1944 		IXGBE_WRITE_REG(hw, IXGBE_TDLEN(i),
1945 		    txr->tx_ndesc * sizeof(union ixgbe_adv_tx_desc));
1946 
1947 		/* Setup the HW Tx Head and Tail descriptor pointers */
1948 		IXGBE_WRITE_REG(hw, IXGBE_TDH(i), 0);
1949 		IXGBE_WRITE_REG(hw, IXGBE_TDT(i), 0);
1950 
1951 		/* Disable TX head write-back relax ordering */
1952 		switch (hw->mac.type) {
1953 		case ixgbe_mac_82598EB:
1954 			txctrl = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL(i));
1955 			break;
1956 		case ixgbe_mac_82599EB:
1957 		case ixgbe_mac_X540:
1958 		default:
1959 			txctrl = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL_82599(i));
1960 			break;
1961 		}
1962 		txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
1963 		switch (hw->mac.type) {
1964 		case ixgbe_mac_82598EB:
1965 			IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL(i), txctrl);
1966 			break;
1967 		case ixgbe_mac_82599EB:
1968 		case ixgbe_mac_X540:
1969 		default:
1970 			IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL_82599(i), txctrl);
1971 			break;
1972 		}
1973 
1974 		/* Enable TX head write-back */
1975 		IXGBE_WRITE_REG(hw, IXGBE_TDWBAH(i),
1976 		    (uint32_t)(hdr_paddr >> 32));
1977 		IXGBE_WRITE_REG(hw, IXGBE_TDWBAL(i),
1978 		    ((uint32_t)hdr_paddr) | IXGBE_TDWBAL_HEAD_WB_ENABLE);
1979 	}
1980 
1981 	if (hw->mac.type != ixgbe_mac_82598EB) {
1982 		uint32_t dmatxctl, rttdcs;
1983 
1984 		dmatxctl = IXGBE_READ_REG(hw, IXGBE_DMATXCTL);
1985 		dmatxctl |= IXGBE_DMATXCTL_TE;
1986 		IXGBE_WRITE_REG(hw, IXGBE_DMATXCTL, dmatxctl);
1987 
1988 		/* Disable arbiter to set MTQC */
1989 		rttdcs = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
1990 		rttdcs |= IXGBE_RTTDCS_ARBDIS;
1991 		IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
1992 
1993 		IXGBE_WRITE_REG(hw, IXGBE_MTQC, IXGBE_MTQC_64Q_1PB);
1994 
1995 		/* Reenable aribter */
1996 		rttdcs &= ~IXGBE_RTTDCS_ARBDIS;
1997 		IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
1998 	}
1999 }
2000 
2001 static int
2002 ix_tx_ctx_setup(struct ix_tx_ring *txr, const struct mbuf *mp,
2003     uint32_t *cmd_type_len, uint32_t *olinfo_status)
2004 {
2005 	struct ixgbe_adv_tx_context_desc *TXD;
2006 	uint32_t vlan_macip_lens = 0, type_tucmd_mlhl = 0;
2007 	int ehdrlen, ip_hlen = 0, ctxd;
2008 	boolean_t offload = TRUE;
2009 
2010 	/* First check if TSO is to be used */
2011 	if (mp->m_pkthdr.csum_flags & CSUM_TSO) {
2012 		return ix_tso_ctx_setup(txr, mp,
2013 		    cmd_type_len, olinfo_status);
2014 	}
2015 
2016 	if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
2017 		offload = FALSE;
2018 
2019 	/* Indicate the whole packet as payload when not doing TSO */
2020 	*olinfo_status |= mp->m_pkthdr.len << IXGBE_ADVTXD_PAYLEN_SHIFT;
2021 
2022 	/*
2023 	 * In advanced descriptors the vlan tag must be placed into the
2024 	 * context descriptor.  Hence we need to make one even if not
2025 	 * doing checksum offloads.
2026 	 */
2027 	if (mp->m_flags & M_VLANTAG) {
2028 		vlan_macip_lens |= htole16(mp->m_pkthdr.ether_vlantag) <<
2029 		    IXGBE_ADVTXD_VLAN_SHIFT;
2030 	} else if (!offload) {
2031 		/* No TX descriptor is consumed */
2032 		return 0;
2033 	}
2034 
2035 	/* Set the ether header length */
2036 	ehdrlen = mp->m_pkthdr.csum_lhlen;
2037 	KASSERT(ehdrlen > 0, ("invalid ether hlen"));
2038 	vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
2039 
2040 	if (mp->m_pkthdr.csum_flags & CSUM_IP) {
2041 		*olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
2042 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
2043 		ip_hlen = mp->m_pkthdr.csum_iphlen;
2044 		KASSERT(ip_hlen > 0, ("invalid ip hlen"));
2045 	}
2046 	vlan_macip_lens |= ip_hlen;
2047 
2048 	type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
2049 	if (mp->m_pkthdr.csum_flags & CSUM_TCP)
2050 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
2051 	else if (mp->m_pkthdr.csum_flags & CSUM_UDP)
2052 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP;
2053 
2054 	if (mp->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP))
2055 		*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
2056 
2057 	/* Now ready a context descriptor */
2058 	ctxd = txr->tx_next_avail;
2059 	TXD = (struct ixgbe_adv_tx_context_desc *)&txr->tx_base[ctxd];
2060 
2061 	/* Now copy bits into descriptor */
2062 	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
2063 	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
2064 	TXD->seqnum_seed = htole32(0);
2065 	TXD->mss_l4len_idx = htole32(0);
2066 
2067 	/* We've consumed the first desc, adjust counters */
2068 	if (++ctxd == txr->tx_ndesc)
2069 		ctxd = 0;
2070 	txr->tx_next_avail = ctxd;
2071 	--txr->tx_avail;
2072 
2073 	/* One TX descriptor is consumed */
2074 	return 1;
2075 }
2076 
2077 static int
2078 ix_tso_ctx_setup(struct ix_tx_ring *txr, const struct mbuf *mp,
2079     uint32_t *cmd_type_len, uint32_t *olinfo_status)
2080 {
2081 	struct ixgbe_adv_tx_context_desc *TXD;
2082 	uint32_t vlan_macip_lens = 0, type_tucmd_mlhl = 0;
2083 	uint32_t mss_l4len_idx = 0, paylen;
2084 	int ctxd, ehdrlen, ip_hlen, tcp_hlen;
2085 
2086 	ehdrlen = mp->m_pkthdr.csum_lhlen;
2087 	KASSERT(ehdrlen > 0, ("invalid ether hlen"));
2088 
2089 	ip_hlen = mp->m_pkthdr.csum_iphlen;
2090 	KASSERT(ip_hlen > 0, ("invalid ip hlen"));
2091 
2092 	tcp_hlen = mp->m_pkthdr.csum_thlen;
2093 	KASSERT(tcp_hlen > 0, ("invalid tcp hlen"));
2094 
2095 	ctxd = txr->tx_next_avail;
2096 	TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
2097 
2098 	if (mp->m_flags & M_VLANTAG) {
2099 		vlan_macip_lens |= htole16(mp->m_pkthdr.ether_vlantag) <<
2100 		    IXGBE_ADVTXD_VLAN_SHIFT;
2101 	}
2102 	vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
2103 	vlan_macip_lens |= ip_hlen;
2104 	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
2105 
2106 	/* ADV DTYPE TUCMD */
2107 	type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
2108 	type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
2109 	type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
2110 	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
2111 
2112 	/* MSS L4LEN IDX */
2113 	mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << IXGBE_ADVTXD_MSS_SHIFT);
2114 	mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT);
2115 	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
2116 
2117 	TXD->seqnum_seed = htole32(0);
2118 
2119 	if (++ctxd == txr->tx_ndesc)
2120 		ctxd = 0;
2121 
2122 	txr->tx_avail--;
2123 	txr->tx_next_avail = ctxd;
2124 
2125 	*cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
2126 
2127 	/* This is used in the transmit desc in encap */
2128 	paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
2129 
2130 	*olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
2131 	*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
2132 	*olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT;
2133 
2134 	/* One TX descriptor is consumed */
2135 	return 1;
2136 }
2137 
2138 static void
2139 ix_txeof(struct ix_tx_ring *txr, int hdr)
2140 {
2141 	int first, avail;
2142 
2143 	if (txr->tx_avail == txr->tx_ndesc)
2144 		return;
2145 
2146 	first = txr->tx_next_clean;
2147 	if (first == hdr)
2148 		return;
2149 
2150 	avail = txr->tx_avail;
2151 	while (first != hdr) {
2152 		struct ix_tx_buf *txbuf = &txr->tx_buf[first];
2153 
2154 		++avail;
2155 		if (txbuf->m_head) {
2156 			bus_dmamap_unload(txr->tx_tag, txbuf->map);
2157 			m_freem(txbuf->m_head);
2158 			txbuf->m_head = NULL;
2159 		}
2160 		if (++first == txr->tx_ndesc)
2161 			first = 0;
2162 	}
2163 	txr->tx_next_clean = first;
2164 	txr->tx_avail = avail;
2165 
2166 	if (txr->tx_avail > IX_MAX_SCATTER + IX_TX_RESERVED) {
2167 		ifsq_clr_oactive(txr->tx_ifsq);
2168 		txr->tx_watchdog.wd_timer = 0;
2169 	}
2170 }
2171 
2172 static int
2173 ix_create_rx_ring(struct ix_rx_ring *rxr)
2174 {
2175 	int i, rsize, error, nrxd;
2176 
2177 	/*
2178 	 * Validate number of receive descriptors.  It must not exceed
2179 	 * hardware maximum, and must be multiple of IX_DBA_ALIGN.
2180 	 */
2181 	nrxd = device_getenv_int(rxr->rx_sc->dev, "rxd", ix_rxd);
2182 	if (((nrxd * sizeof(union ixgbe_adv_rx_desc)) % IX_DBA_ALIGN) != 0 ||
2183 	    nrxd < IX_MIN_RXD || nrxd > IX_MAX_RXD) {
2184 		device_printf(rxr->rx_sc->dev,
2185 		    "Using %d RX descriptors instead of %d!\n",
2186 		    IX_DEF_RXD, nrxd);
2187 		rxr->rx_ndesc = IX_DEF_RXD;
2188 	} else {
2189 		rxr->rx_ndesc = nrxd;
2190 	}
2191 
2192 	/*
2193 	 * Allocate RX descriptor ring
2194 	 */
2195 	rsize = roundup2(rxr->rx_ndesc * sizeof(union ixgbe_adv_rx_desc),
2196 	    IX_DBA_ALIGN);
2197 	rxr->rx_base = bus_dmamem_coherent_any(rxr->rx_sc->parent_tag,
2198 	    IX_DBA_ALIGN, rsize, BUS_DMA_WAITOK | BUS_DMA_ZERO,
2199 	    &rxr->rx_base_dtag, &rxr->rx_base_map, &rxr->rx_base_paddr);
2200 	if (rxr->rx_base == NULL) {
2201 		device_printf(rxr->rx_sc->dev,
2202 		    "Unable to allocate TX Descriptor memory\n");
2203 		return ENOMEM;
2204 	}
2205 
2206 	rsize = __VM_CACHELINE_ALIGN(sizeof(struct ix_rx_buf) * rxr->rx_ndesc);
2207 	rxr->rx_buf = kmalloc_cachealign(rsize, M_DEVBUF, M_WAITOK | M_ZERO);
2208 
2209 	/*
2210 	 * Create DMA tag for RX buffers
2211 	 */
2212 	error = bus_dma_tag_create(rxr->rx_sc->parent_tag,
2213 	    1, 0,		/* alignment, bounds */
2214 	    BUS_SPACE_MAXADDR,	/* lowaddr */
2215 	    BUS_SPACE_MAXADDR,	/* highaddr */
2216 	    NULL, NULL,		/* filter, filterarg */
2217 	    PAGE_SIZE,		/* maxsize */
2218 	    1,			/* nsegments */
2219 	    PAGE_SIZE,		/* maxsegsize */
2220 	    BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW, /* flags */
2221 	    &rxr->rx_tag);
2222 	if (error) {
2223 		device_printf(rxr->rx_sc->dev,
2224 		    "Unable to create RX DMA tag\n");
2225 		kfree(rxr->rx_buf, M_DEVBUF);
2226 		rxr->rx_buf = NULL;
2227 		return error;
2228 	}
2229 
2230 	/*
2231 	 * Create spare DMA map for RX buffers
2232 	 */
2233 	error = bus_dmamap_create(rxr->rx_tag, BUS_DMA_WAITOK,
2234 	    &rxr->rx_sparemap);
2235 	if (error) {
2236 		device_printf(rxr->rx_sc->dev,
2237 		    "Unable to create spare RX DMA map\n");
2238 		bus_dma_tag_destroy(rxr->rx_tag);
2239 		kfree(rxr->rx_buf, M_DEVBUF);
2240 		rxr->rx_buf = NULL;
2241 		return error;
2242 	}
2243 
2244 	/*
2245 	 * Create DMA maps for RX buffers
2246 	 */
2247 	for (i = 0; i < rxr->rx_ndesc; ++i) {
2248 		struct ix_rx_buf *rxbuf = &rxr->rx_buf[i];
2249 
2250 		error = bus_dmamap_create(rxr->rx_tag,
2251 		    BUS_DMA_WAITOK, &rxbuf->map);
2252 		if (error) {
2253 			device_printf(rxr->rx_sc->dev,
2254 			    "Unable to create RX dma map\n");
2255 			ix_destroy_rx_ring(rxr, i);
2256 			return error;
2257 		}
2258 	}
2259 
2260 	/*
2261 	 * Initialize various watermark
2262 	 */
2263 	rxr->rx_wreg_nsegs = IX_DEF_RXWREG_NSEGS;
2264 
2265 	return 0;
2266 }
2267 
2268 static void
2269 ix_destroy_rx_ring(struct ix_rx_ring *rxr, int ndesc)
2270 {
2271 	int i;
2272 
2273 	if (rxr->rx_base != NULL) {
2274 		bus_dmamap_unload(rxr->rx_base_dtag, rxr->rx_base_map);
2275 		bus_dmamem_free(rxr->rx_base_dtag, rxr->rx_base,
2276 		    rxr->rx_base_map);
2277 		bus_dma_tag_destroy(rxr->rx_base_dtag);
2278 		rxr->rx_base = NULL;
2279 	}
2280 
2281 	if (rxr->rx_buf == NULL)
2282 		return;
2283 
2284 	for (i = 0; i < ndesc; ++i) {
2285 		struct ix_rx_buf *rxbuf = &rxr->rx_buf[i];
2286 
2287 		KKASSERT(rxbuf->m_head == NULL);
2288 		bus_dmamap_destroy(rxr->rx_tag, rxbuf->map);
2289 	}
2290 	bus_dmamap_destroy(rxr->rx_tag, rxr->rx_sparemap);
2291 	bus_dma_tag_destroy(rxr->rx_tag);
2292 
2293 	kfree(rxr->rx_buf, M_DEVBUF);
2294 	rxr->rx_buf = NULL;
2295 }
2296 
2297 /*
2298 ** Used to detect a descriptor that has
2299 ** been merged by Hardware RSC.
2300 */
2301 static __inline uint32_t
2302 ix_rsc_count(union ixgbe_adv_rx_desc *rx)
2303 {
2304 	return (le32toh(rx->wb.lower.lo_dword.data) &
2305 	    IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
2306 }
2307 
2308 #if 0
2309 /*********************************************************************
2310  *
2311  *  Initialize Hardware RSC (LRO) feature on 82599
2312  *  for an RX ring, this is toggled by the LRO capability
2313  *  even though it is transparent to the stack.
2314  *
2315  *  NOTE: since this HW feature only works with IPV4 and
2316  *        our testing has shown soft LRO to be as effective
2317  *        I have decided to disable this by default.
2318  *
2319  **********************************************************************/
2320 static void
2321 ix_setup_hw_rsc(struct ix_rx_ring *rxr)
2322 {
2323 	struct	ix_softc 	*sc = rxr->rx_sc;
2324 	struct	ixgbe_hw	*hw = &sc->hw;
2325 	uint32_t			rscctrl, rdrxctl;
2326 
2327 #if 0
2328 	/* If turning LRO/RSC off we need to disable it */
2329 	if ((sc->arpcom.ac_if.if_capenable & IFCAP_LRO) == 0) {
2330 		rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
2331 		rscctrl &= ~IXGBE_RSCCTL_RSCEN;
2332 		return;
2333 	}
2334 #endif
2335 
2336 	rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
2337 	rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
2338 	rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
2339 	rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
2340 	IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
2341 
2342 	rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
2343 	rscctrl |= IXGBE_RSCCTL_RSCEN;
2344 	/*
2345 	** Limit the total number of descriptors that
2346 	** can be combined, so it does not exceed 64K
2347 	*/
2348 	if (rxr->mbuf_sz == MCLBYTES)
2349 		rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
2350 	else if (rxr->mbuf_sz == MJUMPAGESIZE)
2351 		rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
2352 	else if (rxr->mbuf_sz == MJUM9BYTES)
2353 		rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
2354 	else  /* Using 16K cluster */
2355 		rscctrl |= IXGBE_RSCCTL_MAXDESC_1;
2356 
2357 	IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl);
2358 
2359 	/* Enable TCP header recognition */
2360 	IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0),
2361 	    (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) |
2362 	    IXGBE_PSRTYPE_TCPHDR));
2363 
2364 	/* Disable RSC for ACK packets */
2365 	IXGBE_WRITE_REG(hw, IXGBE_RSCDBU,
2366 	    (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU)));
2367 
2368 	rxr->hw_rsc = TRUE;
2369 }
2370 #endif
2371 
2372 static int
2373 ix_init_rx_ring(struct ix_rx_ring *rxr)
2374 {
2375 	int i;
2376 
2377 	/* Clear the ring contents */
2378 	bzero(rxr->rx_base, rxr->rx_ndesc * sizeof(union ixgbe_adv_rx_desc));
2379 
2380 	/* XXX we need JUMPAGESIZE for RSC too */
2381 	if (rxr->rx_sc->max_frame_size <= MCLBYTES)
2382 		rxr->rx_mbuf_sz = MCLBYTES;
2383 	else
2384 		rxr->rx_mbuf_sz = MJUMPAGESIZE;
2385 
2386 	/* Now replenish the mbufs */
2387 	for (i = 0; i < rxr->rx_ndesc; ++i) {
2388 		int error;
2389 
2390 		error = ix_newbuf(rxr, i, TRUE);
2391 		if (error)
2392 			return error;
2393 	}
2394 
2395 	/* Setup our descriptor indices */
2396 	rxr->rx_next_check = 0;
2397 	rxr->rx_flags &= ~IX_RXRING_FLAG_DISC;
2398 
2399 #if 0
2400 	/*
2401 	** Now set up the LRO interface:
2402 	*/
2403 	if (ixgbe_rsc_enable)
2404 		ix_setup_hw_rsc(rxr);
2405 #endif
2406 
2407 	return 0;
2408 }
2409 
2410 #define IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT 2
2411 
2412 #define BSIZEPKT_ROUNDUP ((1<<IXGBE_SRRCTL_BSIZEPKT_SHIFT)-1)
2413 
2414 static void
2415 ix_init_rx_unit(struct ix_softc *sc)
2416 {
2417 	struct ixgbe_hw	*hw = &sc->hw;
2418 	struct ifnet *ifp = &sc->arpcom.ac_if;
2419 	uint32_t bufsz, rxctrl, fctrl, rxcsum, hlreg;
2420 	int i;
2421 
2422 	/*
2423 	 * Make sure receives are disabled while setting up the descriptor ring
2424 	 */
2425 	rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
2426 	IXGBE_WRITE_REG(hw, IXGBE_RXCTRL, rxctrl & ~IXGBE_RXCTRL_RXEN);
2427 
2428 	/* Enable broadcasts */
2429 	fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL);
2430 	fctrl |= IXGBE_FCTRL_BAM;
2431 	fctrl |= IXGBE_FCTRL_DPF;
2432 	fctrl |= IXGBE_FCTRL_PMCF;
2433 	IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl);
2434 
2435 	/* Set for Jumbo Frames? */
2436 	hlreg = IXGBE_READ_REG(hw, IXGBE_HLREG0);
2437 	if (ifp->if_mtu > ETHERMTU)
2438 		hlreg |= IXGBE_HLREG0_JUMBOEN;
2439 	else
2440 		hlreg &= ~IXGBE_HLREG0_JUMBOEN;
2441 	IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg);
2442 
2443 	KKASSERT(sc->rx_rings[0].rx_mbuf_sz >= MCLBYTES);
2444 	bufsz = (sc->rx_rings[0].rx_mbuf_sz + BSIZEPKT_ROUNDUP) >>
2445 	    IXGBE_SRRCTL_BSIZEPKT_SHIFT;
2446 
2447 	for (i = 0; i < sc->rx_ring_inuse; ++i) {
2448 		struct ix_rx_ring *rxr = &sc->rx_rings[i];
2449 		uint64_t rdba = rxr->rx_base_paddr;
2450 		uint32_t srrctl;
2451 
2452 		/* Setup the Base and Length of the Rx Descriptor Ring */
2453 		IXGBE_WRITE_REG(hw, IXGBE_RDBAL(i), (uint32_t)rdba);
2454 		IXGBE_WRITE_REG(hw, IXGBE_RDBAH(i), (uint32_t)(rdba >> 32));
2455 		IXGBE_WRITE_REG(hw, IXGBE_RDLEN(i),
2456 		    rxr->rx_ndesc * sizeof(union ixgbe_adv_rx_desc));
2457 
2458 		/*
2459 		 * Set up the SRRCTL register
2460 		 */
2461 		srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i));
2462 
2463 		srrctl &= ~IXGBE_SRRCTL_BSIZEHDR_MASK;
2464 		srrctl &= ~IXGBE_SRRCTL_BSIZEPKT_MASK;
2465 		srrctl |= bufsz;
2466 		srrctl |= IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
2467 		if (sc->rx_ring_inuse > 1) {
2468 			/* See the commend near ix_enable_rx_drop() */
2469 			if (sc->ifm_flowctrl &
2470 			    (IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE)) {
2471 				srrctl &= ~IXGBE_SRRCTL_DROP_EN;
2472 				if (i == 0 && bootverbose) {
2473 					if_printf(ifp, "flow control %s, "
2474 					    "disable RX drop\n",
2475 					    ix_ifmedia2str(sc->ifm_flowctrl));
2476 				}
2477 			} else {
2478 				srrctl |= IXGBE_SRRCTL_DROP_EN;
2479 				if (i == 0 && bootverbose) {
2480 					if_printf(ifp, "flow control %s, "
2481 					    "enable RX drop\n",
2482 					    ix_ifmedia2str(sc->ifm_flowctrl));
2483 				}
2484 			}
2485 		}
2486 		IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(i), srrctl);
2487 
2488 		/* Setup the HW Rx Head and Tail Descriptor Pointers */
2489 		IXGBE_WRITE_REG(hw, IXGBE_RDH(i), 0);
2490 		IXGBE_WRITE_REG(hw, IXGBE_RDT(i), 0);
2491 	}
2492 
2493 	if (sc->hw.mac.type != ixgbe_mac_82598EB)
2494 		IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0), 0);
2495 
2496 	rxcsum = IXGBE_READ_REG(hw, IXGBE_RXCSUM);
2497 
2498 	/*
2499 	 * Setup RSS
2500 	 */
2501 	if (IX_ENABLE_HWRSS(sc)) {
2502 		uint8_t key[IX_NRSSRK * IX_RSSRK_SIZE];
2503 		int j, r;
2504 
2505 		/*
2506 		 * NOTE:
2507 		 * When we reach here, RSS has already been disabled
2508 		 * in ix_stop(), so we could safely configure RSS key
2509 		 * and redirect table.
2510 		 */
2511 
2512 		/*
2513 		 * Configure RSS key
2514 		 */
2515 		toeplitz_get_key(key, sizeof(key));
2516 		for (i = 0; i < IX_NRSSRK; ++i) {
2517 			uint32_t rssrk;
2518 
2519 			rssrk = IX_RSSRK_VAL(key, i);
2520 			IX_RSS_DPRINTF(sc, 1, "rssrk%d 0x%08x\n",
2521 			    i, rssrk);
2522 
2523 			IXGBE_WRITE_REG(hw, IXGBE_RSSRK(i), rssrk);
2524 		}
2525 
2526 		/*
2527 		 * Configure RSS redirect table in following fashion:
2528 		 * (hash & ring_cnt_mask) == rdr_table[(hash & rdr_table_mask)]
2529 		 */
2530 		r = 0;
2531 		for (j = 0; j < IX_NRETA; ++j) {
2532 			uint32_t reta = 0;
2533 
2534 			for (i = 0; i < IX_RETA_SIZE; ++i) {
2535 				uint32_t q;
2536 
2537 				q = r % sc->rx_ring_inuse;
2538 				reta |= q << (8 * i);
2539 				++r;
2540 			}
2541 			IX_RSS_DPRINTF(sc, 1, "reta 0x%08x\n", reta);
2542 			IXGBE_WRITE_REG(hw, IXGBE_RETA(j), reta);
2543 		}
2544 
2545 		/*
2546 		 * Enable multiple receive queues.
2547 		 * Enable IPv4 RSS standard hash functions.
2548 		 */
2549 		IXGBE_WRITE_REG(hw, IXGBE_MRQC,
2550 		    IXGBE_MRQC_RSSEN |
2551 		    IXGBE_MRQC_RSS_FIELD_IPV4 |
2552 		    IXGBE_MRQC_RSS_FIELD_IPV4_TCP);
2553 
2554 		/*
2555 		 * NOTE:
2556 		 * PCSD must be enabled to enable multiple
2557 		 * receive queues.
2558 		 */
2559 		rxcsum |= IXGBE_RXCSUM_PCSD;
2560 	}
2561 
2562 	if (ifp->if_capenable & IFCAP_RXCSUM)
2563 		rxcsum |= IXGBE_RXCSUM_PCSD;
2564 
2565 	IXGBE_WRITE_REG(hw, IXGBE_RXCSUM, rxcsum);
2566 }
2567 
2568 static __inline void
2569 ix_rx_refresh(struct ix_rx_ring *rxr, int i)
2570 {
2571 	if (--i < 0)
2572 		i = rxr->rx_ndesc - 1;
2573 	IXGBE_WRITE_REG(&rxr->rx_sc->hw, IXGBE_RDT(rxr->rx_idx), i);
2574 }
2575 
2576 static __inline void
2577 ix_rxcsum(uint32_t staterr, struct mbuf *mp, uint32_t ptype)
2578 {
2579 	if ((ptype &
2580 	     (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_IPV4_EX)) == 0) {
2581 		/* Not IPv4 */
2582 		return;
2583 	}
2584 
2585 	if ((staterr & (IXGBE_RXD_STAT_IPCS | IXGBE_RXDADV_ERR_IPE)) ==
2586 	    IXGBE_RXD_STAT_IPCS)
2587 		mp->m_pkthdr.csum_flags |= CSUM_IP_CHECKED | CSUM_IP_VALID;
2588 
2589 	if ((ptype &
2590 	     (IXGBE_RXDADV_PKTTYPE_TCP | IXGBE_RXDADV_PKTTYPE_UDP)) == 0) {
2591 		/*
2592 		 * - Neither TCP nor UDP
2593 		 * - IPv4 fragment
2594 		 */
2595 		return;
2596 	}
2597 
2598 	if ((staterr & (IXGBE_RXD_STAT_L4CS | IXGBE_RXDADV_ERR_TCPE)) ==
2599 	    IXGBE_RXD_STAT_L4CS) {
2600 		mp->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR |
2601 		    CSUM_FRAG_NOT_CHECKED;
2602 		mp->m_pkthdr.csum_data = htons(0xffff);
2603 	}
2604 }
2605 
2606 static __inline struct pktinfo *
2607 ix_rssinfo(struct mbuf *m, struct pktinfo *pi,
2608     uint32_t hash, uint32_t hashtype, uint32_t ptype)
2609 {
2610 	switch (hashtype) {
2611 	case IXGBE_RXDADV_RSSTYPE_IPV4_TCP:
2612 		pi->pi_netisr = NETISR_IP;
2613 		pi->pi_flags = 0;
2614 		pi->pi_l3proto = IPPROTO_TCP;
2615 		break;
2616 
2617 	case IXGBE_RXDADV_RSSTYPE_IPV4:
2618 		if ((ptype & IXGBE_RXDADV_PKTTYPE_UDP) == 0) {
2619 			/* Not UDP or is fragment */
2620 			return NULL;
2621 		}
2622 		pi->pi_netisr = NETISR_IP;
2623 		pi->pi_flags = 0;
2624 		pi->pi_l3proto = IPPROTO_UDP;
2625 		break;
2626 
2627 	default:
2628 		return NULL;
2629 	}
2630 
2631 	m->m_flags |= M_HASH;
2632 	m->m_pkthdr.hash = toeplitz_hash(hash);
2633 	return pi;
2634 }
2635 
2636 static __inline void
2637 ix_setup_rxdesc(union ixgbe_adv_rx_desc *rxd, const struct ix_rx_buf *rxbuf)
2638 {
2639 	rxd->read.pkt_addr = htole64(rxbuf->paddr);
2640 	rxd->wb.upper.status_error = 0;
2641 }
2642 
2643 static void
2644 ix_rx_discard(struct ix_rx_ring *rxr, int i, boolean_t eop)
2645 {
2646 	struct ix_rx_buf *rxbuf = &rxr->rx_buf[i];
2647 
2648 	/*
2649 	 * XXX discard may not be correct
2650 	 */
2651 	if (eop) {
2652 		IFNET_STAT_INC(&rxr->rx_sc->arpcom.ac_if, ierrors, 1);
2653 		rxr->rx_flags &= ~IX_RXRING_FLAG_DISC;
2654 	} else {
2655 		rxr->rx_flags |= IX_RXRING_FLAG_DISC;
2656 	}
2657 	if (rxbuf->fmp != NULL) {
2658 		m_freem(rxbuf->fmp);
2659 		rxbuf->fmp = NULL;
2660 		rxbuf->lmp = NULL;
2661 	}
2662 	ix_setup_rxdesc(&rxr->rx_base[i], rxbuf);
2663 }
2664 
2665 static void
2666 ix_rxeof(struct ix_rx_ring *rxr, int count)
2667 {
2668 	struct ifnet *ifp = &rxr->rx_sc->arpcom.ac_if;
2669 	int i, nsegs = 0, cpuid = mycpuid;
2670 
2671 	i = rxr->rx_next_check;
2672 	while (count != 0) {
2673 		struct ix_rx_buf *rxbuf, *nbuf = NULL;
2674 		union ixgbe_adv_rx_desc	*cur;
2675 		struct mbuf *sendmp = NULL, *mp;
2676 		struct pktinfo *pi = NULL, pi0;
2677 		uint32_t rsc = 0, ptype, staterr, hash, hashtype;
2678 		uint16_t len;
2679 		boolean_t eop;
2680 
2681 		cur = &rxr->rx_base[i];
2682 		staterr = le32toh(cur->wb.upper.status_error);
2683 
2684 		if ((staterr & IXGBE_RXD_STAT_DD) == 0)
2685 			break;
2686 		++nsegs;
2687 
2688 		rxbuf = &rxr->rx_buf[i];
2689 		mp = rxbuf->m_head;
2690 
2691 		len = le16toh(cur->wb.upper.length);
2692 		ptype = le32toh(cur->wb.lower.lo_dword.data) &
2693 		    IXGBE_RXDADV_PKTTYPE_MASK;
2694 		hash = le32toh(cur->wb.lower.hi_dword.rss);
2695 		hashtype = le32toh(cur->wb.lower.lo_dword.data) &
2696 		    IXGBE_RXDADV_RSSTYPE_MASK;
2697 
2698 		eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0);
2699 		if (eop)
2700 			--count;
2701 
2702 		/*
2703 		 * Make sure bad packets are discarded
2704 		 */
2705 		if ((staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) ||
2706 		    (rxr->rx_flags & IX_RXRING_FLAG_DISC)) {
2707 			ix_rx_discard(rxr, i, eop);
2708 			goto next_desc;
2709 		}
2710 
2711 		bus_dmamap_sync(rxr->rx_tag, rxbuf->map, BUS_DMASYNC_POSTREAD);
2712 		if (ix_newbuf(rxr, i, FALSE) != 0) {
2713 			ix_rx_discard(rxr, i, eop);
2714 			goto next_desc;
2715 		}
2716 
2717 		/*
2718 		 * On 82599 which supports a hardware LRO, packets
2719 		 * need not be fragmented across sequential descriptors,
2720 		 * rather the next descriptor is indicated in bits
2721 		 * of the descriptor.  This also means that we might
2722 		 * proceses more than one packet at a time, something
2723 		 * that has never been true before, it required
2724 		 * eliminating global chain pointers in favor of what
2725 		 * we are doing here.
2726 		 */
2727 		if (!eop) {
2728 			int nextp;
2729 
2730 			/*
2731 			 * Figure out the next descriptor
2732 			 * of this frame.
2733 			 */
2734 			if (rxr->rx_flags & IX_RXRING_FLAG_LRO)
2735 				rsc = ix_rsc_count(cur);
2736 			if (rsc) { /* Get hardware index */
2737 				nextp = ((staterr &
2738 				    IXGBE_RXDADV_NEXTP_MASK) >>
2739 				    IXGBE_RXDADV_NEXTP_SHIFT);
2740 			} else { /* Just sequential */
2741 				nextp = i + 1;
2742 				if (nextp == rxr->rx_ndesc)
2743 					nextp = 0;
2744 			}
2745 			nbuf = &rxr->rx_buf[nextp];
2746 			prefetch(nbuf);
2747 		}
2748 		mp->m_len = len;
2749 
2750 		/*
2751 		 * Rather than using the fmp/lmp global pointers
2752 		 * we now keep the head of a packet chain in the
2753 		 * buffer struct and pass this along from one
2754 		 * descriptor to the next, until we get EOP.
2755 		 */
2756 		if (rxbuf->fmp == NULL) {
2757 			mp->m_pkthdr.len = len;
2758 			rxbuf->fmp = mp;
2759 			rxbuf->lmp = mp;
2760 		} else {
2761 			rxbuf->fmp->m_pkthdr.len += len;
2762 			rxbuf->lmp->m_next = mp;
2763 			rxbuf->lmp = mp;
2764 		}
2765 
2766 		if (nbuf != NULL) {
2767 			/*
2768 			 * Not the last fragment of this frame,
2769 			 * pass this fragment list on
2770 			 */
2771 			nbuf->fmp = rxbuf->fmp;
2772 			nbuf->lmp = rxbuf->lmp;
2773 		} else {
2774 			/*
2775 			 * Send this frame
2776 			 */
2777 			sendmp = rxbuf->fmp;
2778 
2779 			sendmp->m_pkthdr.rcvif = ifp;
2780 			IFNET_STAT_INC(ifp, ipackets, 1);
2781 #ifdef IX_RSS_DEBUG
2782 			rxr->rx_pkts++;
2783 #endif
2784 
2785 			/* Process vlan info */
2786 			if (staterr & IXGBE_RXD_STAT_VP) {
2787 				sendmp->m_pkthdr.ether_vlantag =
2788 				    le16toh(cur->wb.upper.vlan);
2789 				sendmp->m_flags |= M_VLANTAG;
2790 			}
2791 			if (ifp->if_capenable & IFCAP_RXCSUM)
2792 				ix_rxcsum(staterr, sendmp, ptype);
2793 			if (ifp->if_capenable & IFCAP_RSS) {
2794 				pi = ix_rssinfo(sendmp, &pi0,
2795 				    hash, hashtype, ptype);
2796 			}
2797 		}
2798 		rxbuf->fmp = NULL;
2799 		rxbuf->lmp = NULL;
2800 next_desc:
2801 		/* Advance our pointers to the next descriptor. */
2802 		if (++i == rxr->rx_ndesc)
2803 			i = 0;
2804 
2805 		if (sendmp != NULL)
2806 			ifp->if_input(ifp, sendmp, pi, cpuid);
2807 
2808 		if (nsegs >= rxr->rx_wreg_nsegs) {
2809 			ix_rx_refresh(rxr, i);
2810 			nsegs = 0;
2811 		}
2812 	}
2813 	rxr->rx_next_check = i;
2814 
2815 	if (nsegs > 0)
2816 		ix_rx_refresh(rxr, i);
2817 }
2818 
2819 static void
2820 ix_set_vlan(struct ix_softc *sc)
2821 {
2822 	struct ixgbe_hw *hw = &sc->hw;
2823 	uint32_t ctrl;
2824 
2825 	if (hw->mac.type == ixgbe_mac_82598EB) {
2826 		ctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
2827 		ctrl |= IXGBE_VLNCTRL_VME;
2828 		IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, ctrl);
2829 	} else {
2830 		int i;
2831 
2832 		/*
2833 		 * On 82599 and later chips the VLAN enable is
2834 		 * per queue in RXDCTL
2835 		 */
2836 		for (i = 0; i < sc->rx_ring_inuse; ++i) {
2837 			ctrl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(i));
2838 			ctrl |= IXGBE_RXDCTL_VME;
2839 			IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(i), ctrl);
2840 		}
2841 	}
2842 }
2843 
2844 static void
2845 ix_enable_intr(struct ix_softc *sc)
2846 {
2847 	struct ixgbe_hw	*hw = &sc->hw;
2848 	uint32_t fwsm;
2849 	int i;
2850 
2851 	for (i = 0; i < sc->intr_cnt; ++i)
2852 		lwkt_serialize_handler_enable(sc->intr_data[i].intr_serialize);
2853 
2854 	sc->intr_mask = (IXGBE_EIMS_ENABLE_MASK & ~IXGBE_EIMS_RTX_QUEUE);
2855 
2856 	/* Enable Fan Failure detection */
2857 	if (hw->device_id == IXGBE_DEV_ID_82598AT)
2858 		sc->intr_mask |= IXGBE_EIMS_GPI_SDP1;
2859 
2860 	switch (sc->hw.mac.type) {
2861 	case ixgbe_mac_82599EB:
2862 		sc->intr_mask |= IXGBE_EIMS_ECC;
2863 		sc->intr_mask |= IXGBE_EIMS_GPI_SDP0;
2864 		sc->intr_mask |= IXGBE_EIMS_GPI_SDP1;
2865 		sc->intr_mask |= IXGBE_EIMS_GPI_SDP2;
2866 		break;
2867 
2868 	case ixgbe_mac_X540:
2869 		sc->intr_mask |= IXGBE_EIMS_ECC;
2870 		/* Detect if Thermal Sensor is enabled */
2871 		fwsm = IXGBE_READ_REG(hw, IXGBE_FWSM);
2872 		if (fwsm & IXGBE_FWSM_TS_ENABLED)
2873 			sc->intr_mask |= IXGBE_EIMS_TS;
2874 		/* FALL THROUGH */
2875 	default:
2876 		break;
2877 	}
2878 
2879 	/* With MSI-X we use auto clear for RX and TX rings */
2880 	if (sc->intr_type == PCI_INTR_TYPE_MSIX) {
2881 		/*
2882 		 * There are no EIAC1/EIAC2 for newer chips; the related
2883 		 * bits for TX and RX rings > 16 are always auto clear.
2884 		 *
2885 		 * XXX which bits?  There are _no_ documented EICR1 and
2886 		 * EICR2 at all; only EICR.
2887 		 */
2888 		IXGBE_WRITE_REG(hw, IXGBE_EIAC, IXGBE_EIMS_RTX_QUEUE);
2889 	} else {
2890 		sc->intr_mask |= IX_TX_INTR_MASK | IX_RX0_INTR_MASK;
2891 
2892 		KKASSERT(sc->rx_ring_inuse <= IX_MIN_RXRING_RSS);
2893 		if (sc->rx_ring_inuse == IX_MIN_RXRING_RSS)
2894 			sc->intr_mask |= IX_RX1_INTR_MASK;
2895 	}
2896 
2897 	IXGBE_WRITE_REG(hw, IXGBE_EIMS, sc->intr_mask);
2898 
2899 	/*
2900 	 * Enable RX and TX rings for MSI-X
2901 	 */
2902 	if (sc->intr_type == PCI_INTR_TYPE_MSIX) {
2903 		for (i = 0; i < sc->tx_ring_inuse; ++i) {
2904 			const struct ix_tx_ring *txr = &sc->tx_rings[i];
2905 
2906 			if (txr->tx_intr_vec >= 0) {
2907 				IXGBE_WRITE_REG(hw, txr->tx_eims,
2908 				    txr->tx_eims_val);
2909 			}
2910 		}
2911 		for (i = 0; i < sc->rx_ring_inuse; ++i) {
2912 			const struct ix_rx_ring *rxr = &sc->rx_rings[i];
2913 
2914 			KKASSERT(rxr->rx_intr_vec >= 0);
2915 			IXGBE_WRITE_REG(hw, rxr->rx_eims, rxr->rx_eims_val);
2916 		}
2917 	}
2918 
2919 	IXGBE_WRITE_FLUSH(hw);
2920 }
2921 
2922 static void
2923 ix_disable_intr(struct ix_softc *sc)
2924 {
2925 	int i;
2926 
2927 	if (sc->intr_type == PCI_INTR_TYPE_MSIX)
2928 		IXGBE_WRITE_REG(&sc->hw, IXGBE_EIAC, 0);
2929 
2930 	if (sc->hw.mac.type == ixgbe_mac_82598EB) {
2931 		IXGBE_WRITE_REG(&sc->hw, IXGBE_EIMC, ~0);
2932 	} else {
2933 		IXGBE_WRITE_REG(&sc->hw, IXGBE_EIMC, 0xFFFF0000);
2934 		IXGBE_WRITE_REG(&sc->hw, IXGBE_EIMC_EX(0), ~0);
2935 		IXGBE_WRITE_REG(&sc->hw, IXGBE_EIMC_EX(1), ~0);
2936 	}
2937 	IXGBE_WRITE_FLUSH(&sc->hw);
2938 
2939 	for (i = 0; i < sc->intr_cnt; ++i)
2940 		lwkt_serialize_handler_disable(sc->intr_data[i].intr_serialize);
2941 }
2942 
2943 uint16_t
2944 ixgbe_read_pci_cfg(struct ixgbe_hw *hw, uint32_t reg)
2945 {
2946 	return pci_read_config(((struct ixgbe_osdep *)hw->back)->dev,
2947 	    reg, 2);
2948 }
2949 
2950 void
2951 ixgbe_write_pci_cfg(struct ixgbe_hw *hw, uint32_t reg, uint16_t value)
2952 {
2953 	pci_write_config(((struct ixgbe_osdep *)hw->back)->dev,
2954 	    reg, value, 2);
2955 }
2956 
2957 static void
2958 ix_slot_info(struct ix_softc *sc)
2959 {
2960 	struct ixgbe_hw *hw = &sc->hw;
2961 	device_t dev = sc->dev;
2962 	struct ixgbe_mac_info *mac = &hw->mac;
2963 	uint16_t link;
2964 	uint32_t offset;
2965 
2966 	/* For most devices simply call the shared code routine */
2967 	if (hw->device_id != IXGBE_DEV_ID_82599_SFP_SF_QP) {
2968 		ixgbe_get_bus_info(hw);
2969 		goto display;
2970 	}
2971 
2972 	/*
2973 	 * For the Quad port adapter we need to parse back
2974 	 * up the PCI tree to find the speed of the expansion
2975 	 * slot into which this adapter is plugged. A bit more work.
2976 	 */
2977 	dev = device_get_parent(device_get_parent(dev));
2978 #ifdef IXGBE_DEBUG
2979 	device_printf(dev, "parent pcib = %x,%x,%x\n",
2980 	    pci_get_bus(dev), pci_get_slot(dev), pci_get_function(dev));
2981 #endif
2982 	dev = device_get_parent(device_get_parent(dev));
2983 #ifdef IXGBE_DEBUG
2984 	device_printf(dev, "slot pcib = %x,%x,%x\n",
2985 	    pci_get_bus(dev), pci_get_slot(dev), pci_get_function(dev));
2986 #endif
2987 	/* Now get the PCI Express Capabilities offset */
2988 	offset = pci_get_pciecap_ptr(dev);
2989 	/* ...and read the Link Status Register */
2990 	link = pci_read_config(dev, offset + PCIER_LINKSTAT, 2);
2991 	switch (link & IXGBE_PCI_LINK_WIDTH) {
2992 	case IXGBE_PCI_LINK_WIDTH_1:
2993 		hw->bus.width = ixgbe_bus_width_pcie_x1;
2994 		break;
2995 	case IXGBE_PCI_LINK_WIDTH_2:
2996 		hw->bus.width = ixgbe_bus_width_pcie_x2;
2997 		break;
2998 	case IXGBE_PCI_LINK_WIDTH_4:
2999 		hw->bus.width = ixgbe_bus_width_pcie_x4;
3000 		break;
3001 	case IXGBE_PCI_LINK_WIDTH_8:
3002 		hw->bus.width = ixgbe_bus_width_pcie_x8;
3003 		break;
3004 	default:
3005 		hw->bus.width = ixgbe_bus_width_unknown;
3006 		break;
3007 	}
3008 
3009 	switch (link & IXGBE_PCI_LINK_SPEED) {
3010 	case IXGBE_PCI_LINK_SPEED_2500:
3011 		hw->bus.speed = ixgbe_bus_speed_2500;
3012 		break;
3013 	case IXGBE_PCI_LINK_SPEED_5000:
3014 		hw->bus.speed = ixgbe_bus_speed_5000;
3015 		break;
3016 	case IXGBE_PCI_LINK_SPEED_8000:
3017 		hw->bus.speed = ixgbe_bus_speed_8000;
3018 		break;
3019 	default:
3020 		hw->bus.speed = ixgbe_bus_speed_unknown;
3021 		break;
3022 	}
3023 
3024 	mac->ops.set_lan_id(hw);
3025 
3026 display:
3027 	device_printf(dev, "PCI Express Bus: Speed %s %s\n",
3028 	    hw->bus.speed == ixgbe_bus_speed_8000 ? "8.0GT/s" :
3029 	    hw->bus.speed == ixgbe_bus_speed_5000 ? "5.0GT/s" :
3030 	    hw->bus.speed == ixgbe_bus_speed_2500 ? "2.5GT/s" : "Unknown",
3031 	    hw->bus.width == ixgbe_bus_width_pcie_x8 ? "Width x8" :
3032 	    hw->bus.width == ixgbe_bus_width_pcie_x4 ? "Width x4" :
3033 	    hw->bus.width == ixgbe_bus_width_pcie_x1 ? "Width x1" : "Unknown");
3034 
3035 	if (hw->device_id != IXGBE_DEV_ID_82599_SFP_SF_QP &&
3036 	    hw->bus.width <= ixgbe_bus_width_pcie_x4 &&
3037 	    hw->bus.speed == ixgbe_bus_speed_2500) {
3038 		device_printf(dev, "For optimal performance a x8 "
3039 		    "PCIE, or x4 PCIE Gen2 slot is required.\n");
3040 	} else if (hw->device_id == IXGBE_DEV_ID_82599_SFP_SF_QP &&
3041 	    hw->bus.width <= ixgbe_bus_width_pcie_x8 &&
3042 	    hw->bus.speed < ixgbe_bus_speed_8000) {
3043 		device_printf(dev, "For optimal performance a x8 "
3044 		    "PCIE Gen3 slot is required.\n");
3045 	}
3046 }
3047 
3048 /*
3049  * TODO comment is incorrect
3050  *
3051  * Setup the correct IVAR register for a particular MSIX interrupt
3052  * - entry is the register array entry
3053  * - vector is the MSIX vector for this queue
3054  * - type is RX/TX/MISC
3055  */
3056 static void
3057 ix_set_ivar(struct ix_softc *sc, uint8_t entry, uint8_t vector,
3058     int8_t type)
3059 {
3060 	struct ixgbe_hw *hw = &sc->hw;
3061 	uint32_t ivar, index;
3062 
3063 	vector |= IXGBE_IVAR_ALLOC_VAL;
3064 
3065 	switch (hw->mac.type) {
3066 	case ixgbe_mac_82598EB:
3067 		if (type == -1)
3068 			entry = IXGBE_IVAR_OTHER_CAUSES_INDEX;
3069 		else
3070 			entry += (type * 64);
3071 		index = (entry >> 2) & 0x1F;
3072 		ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(index));
3073 		ivar &= ~(0xFF << (8 * (entry & 0x3)));
3074 		ivar |= (vector << (8 * (entry & 0x3)));
3075 		IXGBE_WRITE_REG(hw, IXGBE_IVAR(index), ivar);
3076 		break;
3077 
3078 	case ixgbe_mac_82599EB:
3079 	case ixgbe_mac_X540:
3080 		if (type == -1) { /* MISC IVAR */
3081 			index = (entry & 1) * 8;
3082 			ivar = IXGBE_READ_REG(hw, IXGBE_IVAR_MISC);
3083 			ivar &= ~(0xFF << index);
3084 			ivar |= (vector << index);
3085 			IXGBE_WRITE_REG(hw, IXGBE_IVAR_MISC, ivar);
3086 		} else {	/* RX/TX IVARS */
3087 			index = (16 * (entry & 1)) + (8 * type);
3088 			ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(entry >> 1));
3089 			ivar &= ~(0xFF << index);
3090 			ivar |= (vector << index);
3091 			IXGBE_WRITE_REG(hw, IXGBE_IVAR(entry >> 1), ivar);
3092 		}
3093 
3094 	default:
3095 		break;
3096 	}
3097 }
3098 
3099 static boolean_t
3100 ix_sfp_probe(struct ix_softc *sc)
3101 {
3102 	struct ixgbe_hw	*hw = &sc->hw;
3103 
3104 	if (hw->phy.type == ixgbe_phy_nl &&
3105 	    hw->phy.sfp_type == ixgbe_sfp_type_not_present) {
3106 		int32_t ret;
3107 
3108 		ret = hw->phy.ops.identify_sfp(hw);
3109 		if (ret)
3110 			return FALSE;
3111 
3112 		ret = hw->phy.ops.reset(hw);
3113 		if (ret == IXGBE_ERR_SFP_NOT_SUPPORTED) {
3114 			if_printf(&sc->arpcom.ac_if,
3115 			     "Unsupported SFP+ module detected!  "
3116 			     "Reload driver with supported module.\n");
3117 			sc->sfp_probe = FALSE;
3118 			return FALSE;
3119 		}
3120 		if_printf(&sc->arpcom.ac_if, "SFP+ module detected!\n");
3121 
3122 		/* We now have supported optics */
3123 		sc->sfp_probe = FALSE;
3124 		/* Set the optics type so system reports correctly */
3125 		ix_setup_optics(sc);
3126 
3127 		return TRUE;
3128 	}
3129 	return FALSE;
3130 }
3131 
3132 static void
3133 ix_handle_link(struct ix_softc *sc)
3134 {
3135 	ixgbe_check_link(&sc->hw, &sc->link_speed, &sc->link_up, 0);
3136 	ix_update_link_status(sc);
3137 }
3138 
3139 /*
3140  * Handling SFP module
3141  */
3142 static void
3143 ix_handle_mod(struct ix_softc *sc)
3144 {
3145 	struct ixgbe_hw *hw = &sc->hw;
3146 	uint32_t err;
3147 
3148 	err = hw->phy.ops.identify_sfp(hw);
3149 	if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) {
3150 		if_printf(&sc->arpcom.ac_if,
3151 		    "Unsupported SFP+ module type was detected.\n");
3152 		return;
3153 	}
3154 	err = hw->mac.ops.setup_sfp(hw);
3155 	if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) {
3156 		if_printf(&sc->arpcom.ac_if,
3157 		    "Setup failure - unsupported SFP+ module type.\n");
3158 		return;
3159 	}
3160 	ix_handle_msf(sc);
3161 }
3162 
3163 /*
3164  * Handling MSF (multispeed fiber)
3165  */
3166 static void
3167 ix_handle_msf(struct ix_softc *sc)
3168 {
3169 	struct ixgbe_hw *hw = &sc->hw;
3170 	uint32_t autoneg;
3171 
3172 	autoneg = hw->phy.autoneg_advertised;
3173 	if (!autoneg && hw->mac.ops.get_link_capabilities != NULL) {
3174 		bool negotiate;
3175 
3176 		hw->mac.ops.get_link_capabilities(hw, &autoneg, &negotiate);
3177 	}
3178 	if (hw->mac.ops.setup_link != NULL)
3179 		hw->mac.ops.setup_link(hw, autoneg, TRUE);
3180 }
3181 
3182 static void
3183 ix_update_stats(struct ix_softc *sc)
3184 {
3185 	struct ifnet *ifp = &sc->arpcom.ac_if;
3186 	struct ixgbe_hw *hw = &sc->hw;
3187 	uint32_t missed_rx = 0, bprc, lxon, lxoff, total;
3188 	uint64_t total_missed_rx = 0;
3189 	int i;
3190 
3191 	sc->stats.crcerrs += IXGBE_READ_REG(hw, IXGBE_CRCERRS);
3192 	sc->stats.illerrc += IXGBE_READ_REG(hw, IXGBE_ILLERRC);
3193 	sc->stats.errbc += IXGBE_READ_REG(hw, IXGBE_ERRBC);
3194 	sc->stats.mspdc += IXGBE_READ_REG(hw, IXGBE_MSPDC);
3195 
3196 	/*
3197 	 * Note: These are for the 8 possible traffic classes, which
3198 	 * in current implementation is unused, therefore only 0 should
3199 	 * read real data.
3200 	 */
3201 	for (i = 0; i < 8; i++) {
3202 		uint32_t mp;
3203 
3204 		mp = IXGBE_READ_REG(hw, IXGBE_MPC(i));
3205 		/* missed_rx tallies misses for the gprc workaround */
3206 		missed_rx += mp;
3207 		/* global total per queue */
3208 		sc->stats.mpc[i] += mp;
3209 
3210 		/* Running comprehensive total for stats display */
3211 		total_missed_rx += sc->stats.mpc[i];
3212 
3213 		if (hw->mac.type == ixgbe_mac_82598EB) {
3214 			sc->stats.rnbc[i] += IXGBE_READ_REG(hw, IXGBE_RNBC(i));
3215 			sc->stats.qbtc[i] += IXGBE_READ_REG(hw, IXGBE_QBTC(i));
3216 			sc->stats.qbrc[i] += IXGBE_READ_REG(hw, IXGBE_QBRC(i));
3217 			sc->stats.pxonrxc[i] +=
3218 			    IXGBE_READ_REG(hw, IXGBE_PXONRXC(i));
3219 		} else {
3220 			sc->stats.pxonrxc[i] +=
3221 			    IXGBE_READ_REG(hw, IXGBE_PXONRXCNT(i));
3222 		}
3223 		sc->stats.pxontxc[i] += IXGBE_READ_REG(hw, IXGBE_PXONTXC(i));
3224 		sc->stats.pxofftxc[i] += IXGBE_READ_REG(hw, IXGBE_PXOFFTXC(i));
3225 		sc->stats.pxoffrxc[i] += IXGBE_READ_REG(hw, IXGBE_PXOFFRXC(i));
3226 		sc->stats.pxon2offc[i] +=
3227 		    IXGBE_READ_REG(hw, IXGBE_PXON2OFFCNT(i));
3228 	}
3229 	for (i = 0; i < 16; i++) {
3230 		sc->stats.qprc[i] += IXGBE_READ_REG(hw, IXGBE_QPRC(i));
3231 		sc->stats.qptc[i] += IXGBE_READ_REG(hw, IXGBE_QPTC(i));
3232 		sc->stats.qprdc[i] += IXGBE_READ_REG(hw, IXGBE_QPRDC(i));
3233 	}
3234 	sc->stats.mlfc += IXGBE_READ_REG(hw, IXGBE_MLFC);
3235 	sc->stats.mrfc += IXGBE_READ_REG(hw, IXGBE_MRFC);
3236 	sc->stats.rlec += IXGBE_READ_REG(hw, IXGBE_RLEC);
3237 
3238 	/* Hardware workaround, gprc counts missed packets */
3239 	sc->stats.gprc += IXGBE_READ_REG(hw, IXGBE_GPRC);
3240 	sc->stats.gprc -= missed_rx;
3241 
3242 	if (hw->mac.type != ixgbe_mac_82598EB) {
3243 		sc->stats.gorc += IXGBE_READ_REG(hw, IXGBE_GORCL) +
3244 		    ((uint64_t)IXGBE_READ_REG(hw, IXGBE_GORCH) << 32);
3245 		sc->stats.gotc += IXGBE_READ_REG(hw, IXGBE_GOTCL) +
3246 		    ((uint64_t)IXGBE_READ_REG(hw, IXGBE_GOTCH) << 32);
3247 		sc->stats.tor += IXGBE_READ_REG(hw, IXGBE_TORL) +
3248 		    ((uint64_t)IXGBE_READ_REG(hw, IXGBE_TORH) << 32);
3249 		sc->stats.lxonrxc += IXGBE_READ_REG(hw, IXGBE_LXONRXCNT);
3250 		sc->stats.lxoffrxc += IXGBE_READ_REG(hw, IXGBE_LXOFFRXCNT);
3251 	} else {
3252 		sc->stats.lxonrxc += IXGBE_READ_REG(hw, IXGBE_LXONRXC);
3253 		sc->stats.lxoffrxc += IXGBE_READ_REG(hw, IXGBE_LXOFFRXC);
3254 		/* 82598 only has a counter in the high register */
3255 		sc->stats.gorc += IXGBE_READ_REG(hw, IXGBE_GORCH);
3256 		sc->stats.gotc += IXGBE_READ_REG(hw, IXGBE_GOTCH);
3257 		sc->stats.tor += IXGBE_READ_REG(hw, IXGBE_TORH);
3258 	}
3259 
3260 	/*
3261 	 * Workaround: mprc hardware is incorrectly counting
3262 	 * broadcasts, so for now we subtract those.
3263 	 */
3264 	bprc = IXGBE_READ_REG(hw, IXGBE_BPRC);
3265 	sc->stats.bprc += bprc;
3266 	sc->stats.mprc += IXGBE_READ_REG(hw, IXGBE_MPRC);
3267 	if (hw->mac.type == ixgbe_mac_82598EB)
3268 		sc->stats.mprc -= bprc;
3269 
3270 	sc->stats.prc64 += IXGBE_READ_REG(hw, IXGBE_PRC64);
3271 	sc->stats.prc127 += IXGBE_READ_REG(hw, IXGBE_PRC127);
3272 	sc->stats.prc255 += IXGBE_READ_REG(hw, IXGBE_PRC255);
3273 	sc->stats.prc511 += IXGBE_READ_REG(hw, IXGBE_PRC511);
3274 	sc->stats.prc1023 += IXGBE_READ_REG(hw, IXGBE_PRC1023);
3275 	sc->stats.prc1522 += IXGBE_READ_REG(hw, IXGBE_PRC1522);
3276 
3277 	lxon = IXGBE_READ_REG(hw, IXGBE_LXONTXC);
3278 	sc->stats.lxontxc += lxon;
3279 	lxoff = IXGBE_READ_REG(hw, IXGBE_LXOFFTXC);
3280 	sc->stats.lxofftxc += lxoff;
3281 	total = lxon + lxoff;
3282 
3283 	sc->stats.gptc += IXGBE_READ_REG(hw, IXGBE_GPTC);
3284 	sc->stats.mptc += IXGBE_READ_REG(hw, IXGBE_MPTC);
3285 	sc->stats.ptc64 += IXGBE_READ_REG(hw, IXGBE_PTC64);
3286 	sc->stats.gptc -= total;
3287 	sc->stats.mptc -= total;
3288 	sc->stats.ptc64 -= total;
3289 	sc->stats.gotc -= total * ETHER_MIN_LEN;
3290 
3291 	sc->stats.ruc += IXGBE_READ_REG(hw, IXGBE_RUC);
3292 	sc->stats.rfc += IXGBE_READ_REG(hw, IXGBE_RFC);
3293 	sc->stats.roc += IXGBE_READ_REG(hw, IXGBE_ROC);
3294 	sc->stats.rjc += IXGBE_READ_REG(hw, IXGBE_RJC);
3295 	sc->stats.mngprc += IXGBE_READ_REG(hw, IXGBE_MNGPRC);
3296 	sc->stats.mngpdc += IXGBE_READ_REG(hw, IXGBE_MNGPDC);
3297 	sc->stats.mngptc += IXGBE_READ_REG(hw, IXGBE_MNGPTC);
3298 	sc->stats.tpr += IXGBE_READ_REG(hw, IXGBE_TPR);
3299 	sc->stats.tpt += IXGBE_READ_REG(hw, IXGBE_TPT);
3300 	sc->stats.ptc127 += IXGBE_READ_REG(hw, IXGBE_PTC127);
3301 	sc->stats.ptc255 += IXGBE_READ_REG(hw, IXGBE_PTC255);
3302 	sc->stats.ptc511 += IXGBE_READ_REG(hw, IXGBE_PTC511);
3303 	sc->stats.ptc1023 += IXGBE_READ_REG(hw, IXGBE_PTC1023);
3304 	sc->stats.ptc1522 += IXGBE_READ_REG(hw, IXGBE_PTC1522);
3305 	sc->stats.bptc += IXGBE_READ_REG(hw, IXGBE_BPTC);
3306 	sc->stats.xec += IXGBE_READ_REG(hw, IXGBE_XEC);
3307 	sc->stats.fccrc += IXGBE_READ_REG(hw, IXGBE_FCCRC);
3308 	sc->stats.fclast += IXGBE_READ_REG(hw, IXGBE_FCLAST);
3309 	/* Only read FCOE on 82599 */
3310 	if (hw->mac.type != ixgbe_mac_82598EB) {
3311 		sc->stats.fcoerpdc += IXGBE_READ_REG(hw, IXGBE_FCOERPDC);
3312 		sc->stats.fcoeprc += IXGBE_READ_REG(hw, IXGBE_FCOEPRC);
3313 		sc->stats.fcoeptc += IXGBE_READ_REG(hw, IXGBE_FCOEPTC);
3314 		sc->stats.fcoedwrc += IXGBE_READ_REG(hw, IXGBE_FCOEDWRC);
3315 		sc->stats.fcoedwtc += IXGBE_READ_REG(hw, IXGBE_FCOEDWTC);
3316 	}
3317 
3318 	/* Rx Errors */
3319 	IFNET_STAT_SET(ifp, iqdrops, total_missed_rx);
3320 	IFNET_STAT_SET(ifp, ierrors, sc->stats.crcerrs + sc->stats.rlec);
3321 }
3322 
3323 #if 0
3324 /*
3325  * Add sysctl variables, one per statistic, to the system.
3326  */
3327 static void
3328 ix_add_hw_stats(struct ix_softc *sc)
3329 {
3330 
3331 	device_t dev = sc->dev;
3332 
3333 	struct ix_tx_ring *txr = sc->tx_rings;
3334 	struct ix_rx_ring *rxr = sc->rx_rings;
3335 
3336 	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
3337 	struct sysctl_oid *tree = device_get_sysctl_tree(dev);
3338 	struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
3339 	struct ixgbe_hw_stats *stats = &sc->stats;
3340 
3341 	struct sysctl_oid *stat_node, *queue_node;
3342 	struct sysctl_oid_list *stat_list, *queue_list;
3343 
3344 #define QUEUE_NAME_LEN 32
3345 	char namebuf[QUEUE_NAME_LEN];
3346 
3347 	/* MAC stats get the own sub node */
3348 
3349 	stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats",
3350 				    CTLFLAG_RD, NULL, "MAC Statistics");
3351 	stat_list = SYSCTL_CHILDREN(stat_node);
3352 
3353 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs",
3354 			CTLFLAG_RD, &stats->crcerrs,
3355 			"CRC Errors");
3356 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "ill_errs",
3357 			CTLFLAG_RD, &stats->illerrc,
3358 			"Illegal Byte Errors");
3359 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "byte_errs",
3360 			CTLFLAG_RD, &stats->errbc,
3361 			"Byte Errors");
3362 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "short_discards",
3363 			CTLFLAG_RD, &stats->mspdc,
3364 			"MAC Short Packets Discarded");
3365 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "local_faults",
3366 			CTLFLAG_RD, &stats->mlfc,
3367 			"MAC Local Faults");
3368 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "remote_faults",
3369 			CTLFLAG_RD, &stats->mrfc,
3370 			"MAC Remote Faults");
3371 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rec_len_errs",
3372 			CTLFLAG_RD, &stats->rlec,
3373 			"Receive Length Errors");
3374 
3375 	/* Flow Control stats */
3376 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_txd",
3377 			CTLFLAG_RD, &stats->lxontxc,
3378 			"Link XON Transmitted");
3379 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
3380 			CTLFLAG_RD, &stats->lxonrxc,
3381 			"Link XON Received");
3382 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
3383 			CTLFLAG_RD, &stats->lxofftxc,
3384 			"Link XOFF Transmitted");
3385 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
3386 			CTLFLAG_RD, &stats->lxoffrxc,
3387 			"Link XOFF Received");
3388 
3389 	/* Packet Reception Stats */
3390 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_octets_rcvd",
3391 			CTLFLAG_RD, &stats->tor,
3392 			"Total Octets Received");
3393 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_rcvd",
3394 			CTLFLAG_RD, &stats->gorc,
3395 			"Good Octets Received");
3396 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_rcvd",
3397 			CTLFLAG_RD, &stats->tpr,
3398 			"Total Packets Received");
3399 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_rcvd",
3400 			CTLFLAG_RD, &stats->gprc,
3401 			"Good Packets Received");
3402 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_rcvd",
3403 			CTLFLAG_RD, &stats->mprc,
3404 			"Multicast Packets Received");
3405 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_rcvd",
3406 			CTLFLAG_RD, &stats->bprc,
3407 			"Broadcast Packets Received");
3408 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
3409 			CTLFLAG_RD, &stats->prc64,
3410 			"64 byte frames received ");
3411 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
3412 			CTLFLAG_RD, &stats->prc127,
3413 			"65-127 byte frames received");
3414 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
3415 			CTLFLAG_RD, &stats->prc255,
3416 			"128-255 byte frames received");
3417 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
3418 			CTLFLAG_RD, &stats->prc511,
3419 			"256-511 byte frames received");
3420 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
3421 			CTLFLAG_RD, &stats->prc1023,
3422 			"512-1023 byte frames received");
3423 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
3424 			CTLFLAG_RD, &stats->prc1522,
3425 			"1023-1522 byte frames received");
3426 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersized",
3427 			CTLFLAG_RD, &stats->ruc,
3428 			"Receive Undersized");
3429 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
3430 			CTLFLAG_RD, &stats->rfc,
3431 			"Fragmented Packets Received ");
3432 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversized",
3433 			CTLFLAG_RD, &stats->roc,
3434 			"Oversized Packets Received");
3435 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabberd",
3436 			CTLFLAG_RD, &stats->rjc,
3437 			"Received Jabber");
3438 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "management_pkts_rcvd",
3439 			CTLFLAG_RD, &stats->mngprc,
3440 			"Management Packets Received");
3441 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "management_pkts_drpd",
3442 			CTLFLAG_RD, &stats->mngptc,
3443 			"Management Packets Dropped");
3444 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "checksum_errs",
3445 			CTLFLAG_RD, &stats->xec,
3446 			"Checksum Errors");
3447 
3448 	/* Packet Transmission Stats */
3449 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
3450 			CTLFLAG_RD, &stats->gotc,
3451 			"Good Octets Transmitted");
3452 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
3453 			CTLFLAG_RD, &stats->tpt,
3454 			"Total Packets Transmitted");
3455 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
3456 			CTLFLAG_RD, &stats->gptc,
3457 			"Good Packets Transmitted");
3458 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
3459 			CTLFLAG_RD, &stats->bptc,
3460 			"Broadcast Packets Transmitted");
3461 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
3462 			CTLFLAG_RD, &stats->mptc,
3463 			"Multicast Packets Transmitted");
3464 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "management_pkts_txd",
3465 			CTLFLAG_RD, &stats->mngptc,
3466 			"Management Packets Transmitted");
3467 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
3468 			CTLFLAG_RD, &stats->ptc64,
3469 			"64 byte frames transmitted ");
3470 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
3471 			CTLFLAG_RD, &stats->ptc127,
3472 			"65-127 byte frames transmitted");
3473 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
3474 			CTLFLAG_RD, &stats->ptc255,
3475 			"128-255 byte frames transmitted");
3476 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
3477 			CTLFLAG_RD, &stats->ptc511,
3478 			"256-511 byte frames transmitted");
3479 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
3480 			CTLFLAG_RD, &stats->ptc1023,
3481 			"512-1023 byte frames transmitted");
3482 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
3483 			CTLFLAG_RD, &stats->ptc1522,
3484 			"1024-1522 byte frames transmitted");
3485 }
3486 #endif
3487 
3488 /*
3489  * Enable the hardware to drop packets when the buffer is full.
3490  * This is useful when multiple RX rings are used, so that no
3491  * single RX ring being full stalls the entire RX engine.  We
3492  * only enable this when multiple RX rings are used and when
3493  * flow control is disabled.
3494  */
3495 static void
3496 ix_enable_rx_drop(struct ix_softc *sc)
3497 {
3498 	struct ixgbe_hw *hw = &sc->hw;
3499 	int i;
3500 
3501 	if (bootverbose) {
3502 		if_printf(&sc->arpcom.ac_if,
3503 		    "flow control %s, enable RX drop\n",
3504 		    ix_fc2str(sc->hw.fc.current_mode));
3505 	}
3506 
3507 	for (i = 0; i < sc->rx_ring_inuse; ++i) {
3508 		uint32_t srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i));
3509 
3510 		srrctl |= IXGBE_SRRCTL_DROP_EN;
3511 		IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(i), srrctl);
3512 	}
3513 }
3514 
3515 static void
3516 ix_disable_rx_drop(struct ix_softc *sc)
3517 {
3518 	struct ixgbe_hw *hw = &sc->hw;
3519 	int i;
3520 
3521 	if (bootverbose) {
3522 		if_printf(&sc->arpcom.ac_if,
3523 		    "flow control %s, disable RX drop\n",
3524 		    ix_fc2str(sc->hw.fc.current_mode));
3525 	}
3526 
3527 	for (i = 0; i < sc->rx_ring_inuse; ++i) {
3528 		uint32_t srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i));
3529 
3530 		srrctl &= ~IXGBE_SRRCTL_DROP_EN;
3531 		IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(i), srrctl);
3532 	}
3533 }
3534 
3535 #ifdef foo
3536 /* XXX not working properly w/ 82599 connected w/ DAC */
3537 /* XXX only work after the interface is up */
3538 static int
3539 ix_sysctl_advspeed(SYSCTL_HANDLER_ARGS)
3540 {
3541 	struct ix_softc *sc = (struct ix_softc *)arg1;
3542 	struct ifnet *ifp = &sc->arpcom.ac_if;
3543 	struct ixgbe_hw *hw = &sc->hw;
3544 	ixgbe_link_speed speed;
3545 	int error, advspeed;
3546 
3547 	advspeed = sc->advspeed;
3548 	error = sysctl_handle_int(oidp, &advspeed, 0, req);
3549 	if (error || req->newptr == NULL)
3550 		return error;
3551 
3552 	if (!(hw->phy.media_type == ixgbe_media_type_copper ||
3553 	    hw->phy.multispeed_fiber))
3554 		return EOPNOTSUPP;
3555 	if (hw->mac.ops.setup_link == NULL)
3556 		return EOPNOTSUPP;
3557 
3558 	switch (advspeed) {
3559 	case 0:	/* auto */
3560 		speed = IXGBE_LINK_SPEED_UNKNOWN;
3561 		break;
3562 
3563 	case 1:	/* 1Gb */
3564 		speed = IXGBE_LINK_SPEED_1GB_FULL;
3565 		break;
3566 
3567 	case 2:	/* 100Mb */
3568 		speed = IXGBE_LINK_SPEED_100_FULL;
3569 		break;
3570 
3571 	case 3:	/* 1Gb/10Gb */
3572 		speed = IXGBE_LINK_SPEED_1GB_FULL |
3573 		    IXGBE_LINK_SPEED_10GB_FULL;
3574 		break;
3575 
3576 	default:
3577 		return EINVAL;
3578 	}
3579 
3580 	ifnet_serialize_all(ifp);
3581 
3582 	if (sc->advspeed == advspeed) /* no change */
3583 		goto done;
3584 
3585 	if ((speed & IXGBE_LINK_SPEED_100_FULL) &&
3586 	    hw->mac.type != ixgbe_mac_X540) {
3587 		error = EOPNOTSUPP;
3588 		goto done;
3589 	}
3590 
3591 	sc->advspeed = advspeed;
3592 
3593 	if ((ifp->if_flags & IFF_RUNNING) == 0)
3594 		goto done;
3595 
3596 	if (speed == IXGBE_LINK_SPEED_UNKNOWN) {
3597 		ix_config_link(sc);
3598 	} else {
3599 		hw->mac.autotry_restart = TRUE;
3600 		hw->mac.ops.setup_link(hw, speed, sc->link_up);
3601 	}
3602 
3603 done:
3604 	ifnet_deserialize_all(ifp);
3605 	return error;
3606 }
3607 #endif
3608 
3609 static void
3610 ix_setup_serialize(struct ix_softc *sc)
3611 {
3612 	int i = 0, j;
3613 
3614 	/* Main + RX + TX */
3615 	sc->nserialize = 1 + sc->rx_ring_cnt + sc->tx_ring_cnt;
3616 	sc->serializes =
3617 	    kmalloc(sc->nserialize * sizeof(struct lwkt_serialize *),
3618 	        M_DEVBUF, M_WAITOK | M_ZERO);
3619 
3620 	/*
3621 	 * Setup serializes
3622 	 *
3623 	 * NOTE: Order is critical
3624 	 */
3625 
3626 	KKASSERT(i < sc->nserialize);
3627 	sc->serializes[i++] = &sc->main_serialize;
3628 
3629 	for (j = 0; j < sc->rx_ring_cnt; ++j) {
3630 		KKASSERT(i < sc->nserialize);
3631 		sc->serializes[i++] = &sc->rx_rings[j].rx_serialize;
3632 	}
3633 
3634 	for (j = 0; j < sc->tx_ring_cnt; ++j) {
3635 		KKASSERT(i < sc->nserialize);
3636 		sc->serializes[i++] = &sc->tx_rings[j].tx_serialize;
3637 	}
3638 
3639 	KKASSERT(i == sc->nserialize);
3640 }
3641 
3642 static int
3643 ix_alloc_intr(struct ix_softc *sc)
3644 {
3645 	struct ix_intr_data *intr;
3646 	u_int intr_flags;
3647 
3648 	ix_alloc_msix(sc);
3649 	if (sc->intr_type == PCI_INTR_TYPE_MSIX) {
3650 		ix_set_ring_inuse(sc, FALSE);
3651 		return 0;
3652 	}
3653 
3654 	if (sc->intr_data != NULL)
3655 		kfree(sc->intr_data, M_DEVBUF);
3656 
3657 	sc->intr_cnt = 1;
3658 	sc->intr_data = kmalloc(sizeof(struct ix_intr_data), M_DEVBUF,
3659 	    M_WAITOK | M_ZERO);
3660 	intr = &sc->intr_data[0];
3661 
3662 	/*
3663 	 * Allocate MSI/legacy interrupt resource
3664 	 */
3665 	sc->intr_type = pci_alloc_1intr(sc->dev, ix_msi_enable,
3666 	    &intr->intr_rid, &intr_flags);
3667 
3668 	intr->intr_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ,
3669 	    &intr->intr_rid, intr_flags);
3670 	if (intr->intr_res == NULL) {
3671 		device_printf(sc->dev, "Unable to allocate bus resource: "
3672 		    "interrupt\n");
3673 		return ENXIO;
3674 	}
3675 
3676 	intr->intr_serialize = &sc->main_serialize;
3677 	intr->intr_cpuid = rman_get_cpuid(intr->intr_res);
3678 	intr->intr_func = ix_intr;
3679 	intr->intr_funcarg = sc;
3680 	intr->intr_rate = IX_INTR_RATE;
3681 	intr->intr_use = IX_INTR_USE_RXTX;
3682 
3683 	sc->tx_rings[0].tx_intr_cpuid = intr->intr_cpuid;
3684 	sc->tx_rings[0].tx_intr_vec = IX_TX_INTR_VEC;
3685 
3686 	sc->rx_rings[0].rx_intr_vec = IX_RX0_INTR_VEC;
3687 
3688 	ix_set_ring_inuse(sc, FALSE);
3689 
3690 	KKASSERT(sc->rx_ring_inuse <= IX_MIN_RXRING_RSS);
3691 	if (sc->rx_ring_inuse == IX_MIN_RXRING_RSS)
3692 		sc->rx_rings[1].rx_intr_vec = IX_RX1_INTR_VEC;
3693 
3694 	return 0;
3695 }
3696 
3697 static void
3698 ix_free_intr(struct ix_softc *sc)
3699 {
3700 	if (sc->intr_data == NULL)
3701 		return;
3702 
3703 	if (sc->intr_type != PCI_INTR_TYPE_MSIX) {
3704 		struct ix_intr_data *intr = &sc->intr_data[0];
3705 
3706 		KKASSERT(sc->intr_cnt == 1);
3707 		if (intr->intr_res != NULL) {
3708 			bus_release_resource(sc->dev, SYS_RES_IRQ,
3709 			    intr->intr_rid, intr->intr_res);
3710 		}
3711 		if (sc->intr_type == PCI_INTR_TYPE_MSI)
3712 			pci_release_msi(sc->dev);
3713 
3714 		kfree(sc->intr_data, M_DEVBUF);
3715 	} else {
3716 		ix_free_msix(sc, TRUE);
3717 	}
3718 }
3719 
3720 static void
3721 ix_set_ring_inuse(struct ix_softc *sc, boolean_t polling)
3722 {
3723 	sc->rx_ring_inuse = ix_get_rxring_inuse(sc, polling);
3724 	sc->tx_ring_inuse = ix_get_txring_inuse(sc, polling);
3725 	if (bootverbose) {
3726 		if_printf(&sc->arpcom.ac_if,
3727 		    "RX rings %d/%d, TX rings %d/%d\n",
3728 		    sc->rx_ring_inuse, sc->rx_ring_cnt,
3729 		    sc->tx_ring_inuse, sc->tx_ring_cnt);
3730 	}
3731 }
3732 
3733 static int
3734 ix_get_rxring_inuse(const struct ix_softc *sc, boolean_t polling)
3735 {
3736 	if (!IX_ENABLE_HWRSS(sc))
3737 		return 1;
3738 
3739 	if (polling)
3740 		return sc->rx_ring_cnt;
3741 	else if (sc->intr_type != PCI_INTR_TYPE_MSIX)
3742 		return IX_MIN_RXRING_RSS;
3743 	else
3744 		return sc->rx_ring_msix;
3745 }
3746 
3747 static int
3748 ix_get_txring_inuse(const struct ix_softc *sc, boolean_t polling)
3749 {
3750 	if (!IX_ENABLE_HWTSS(sc))
3751 		return 1;
3752 
3753 	if (polling)
3754 		return sc->tx_ring_cnt;
3755 	else if (sc->intr_type != PCI_INTR_TYPE_MSIX)
3756 		return 1;
3757 	else
3758 		return sc->tx_ring_msix;
3759 }
3760 
3761 static int
3762 ix_setup_intr(struct ix_softc *sc)
3763 {
3764 	int i;
3765 
3766 	for (i = 0; i < sc->intr_cnt; ++i) {
3767 		struct ix_intr_data *intr = &sc->intr_data[i];
3768 		int error;
3769 
3770 		error = bus_setup_intr_descr(sc->dev, intr->intr_res,
3771 		    INTR_MPSAFE, intr->intr_func, intr->intr_funcarg,
3772 		    &intr->intr_hand, intr->intr_serialize, intr->intr_desc);
3773 		if (error) {
3774 			device_printf(sc->dev, "can't setup %dth intr\n", i);
3775 			ix_teardown_intr(sc, i);
3776 			return error;
3777 		}
3778 	}
3779 	return 0;
3780 }
3781 
3782 static void
3783 ix_teardown_intr(struct ix_softc *sc, int intr_cnt)
3784 {
3785 	int i;
3786 
3787 	if (sc->intr_data == NULL)
3788 		return;
3789 
3790 	for (i = 0; i < intr_cnt; ++i) {
3791 		struct ix_intr_data *intr = &sc->intr_data[i];
3792 
3793 		bus_teardown_intr(sc->dev, intr->intr_res, intr->intr_hand);
3794 	}
3795 }
3796 
3797 static void
3798 ix_serialize(struct ifnet *ifp, enum ifnet_serialize slz)
3799 {
3800 	struct ix_softc *sc = ifp->if_softc;
3801 
3802 	ifnet_serialize_array_enter(sc->serializes, sc->nserialize, slz);
3803 }
3804 
3805 static void
3806 ix_deserialize(struct ifnet *ifp, enum ifnet_serialize slz)
3807 {
3808 	struct ix_softc *sc = ifp->if_softc;
3809 
3810 	ifnet_serialize_array_exit(sc->serializes, sc->nserialize, slz);
3811 }
3812 
3813 static int
3814 ix_tryserialize(struct ifnet *ifp, enum ifnet_serialize slz)
3815 {
3816 	struct ix_softc *sc = ifp->if_softc;
3817 
3818 	return ifnet_serialize_array_try(sc->serializes, sc->nserialize, slz);
3819 }
3820 
3821 #ifdef INVARIANTS
3822 
3823 static void
3824 ix_serialize_assert(struct ifnet *ifp, enum ifnet_serialize slz,
3825     boolean_t serialized)
3826 {
3827 	struct ix_softc *sc = ifp->if_softc;
3828 
3829 	ifnet_serialize_array_assert(sc->serializes, sc->nserialize, slz,
3830 	    serialized);
3831 }
3832 
3833 #endif	/* INVARIANTS */
3834 
3835 static void
3836 ix_free_rings(struct ix_softc *sc)
3837 {
3838 	int i;
3839 
3840 	if (sc->tx_rings != NULL) {
3841 		for (i = 0; i < sc->tx_ring_cnt; ++i) {
3842 			struct ix_tx_ring *txr = &sc->tx_rings[i];
3843 
3844 			ix_destroy_tx_ring(txr, txr->tx_ndesc);
3845 		}
3846 		kfree(sc->tx_rings, M_DEVBUF);
3847 	}
3848 
3849 	if (sc->rx_rings != NULL) {
3850 		for (i =0; i < sc->rx_ring_cnt; ++i) {
3851 			struct ix_rx_ring *rxr = &sc->rx_rings[i];
3852 
3853 			ix_destroy_rx_ring(rxr, rxr->rx_ndesc);
3854 		}
3855 		kfree(sc->rx_rings, M_DEVBUF);
3856 	}
3857 
3858 	if (sc->parent_tag != NULL)
3859 		bus_dma_tag_destroy(sc->parent_tag);
3860 }
3861 
3862 static void
3863 ix_watchdog(struct ifaltq_subque *ifsq)
3864 {
3865 	struct ix_tx_ring *txr = ifsq_get_priv(ifsq);
3866 	struct ifnet *ifp = ifsq_get_ifp(ifsq);
3867 	struct ix_softc *sc = ifp->if_softc;
3868 	int i;
3869 
3870 	KKASSERT(txr->tx_ifsq == ifsq);
3871 	ASSERT_IFNET_SERIALIZED_ALL(ifp);
3872 
3873 	/*
3874 	 * If the interface has been paused then don't do the watchdog check
3875 	 */
3876 	if (IXGBE_READ_REG(&sc->hw, IXGBE_TFCS) & IXGBE_TFCS_TXOFF) {
3877 		txr->tx_watchdog.wd_timer = 5;
3878 		return;
3879 	}
3880 
3881 	if_printf(ifp, "Watchdog timeout -- resetting\n");
3882 	if_printf(ifp, "Queue(%d) tdh = %d, hw tdt = %d\n", txr->tx_idx,
3883 	    IXGBE_READ_REG(&sc->hw, IXGBE_TDH(txr->tx_idx)),
3884 	    IXGBE_READ_REG(&sc->hw, IXGBE_TDT(txr->tx_idx)));
3885 	if_printf(ifp, "TX(%d) desc avail = %d, next TX to Clean = %d\n",
3886 	    txr->tx_idx, txr->tx_avail, txr->tx_next_clean);
3887 
3888 	ix_init(sc);
3889 	for (i = 0; i < sc->tx_ring_inuse; ++i)
3890 		ifsq_devstart_sched(sc->tx_rings[i].tx_ifsq);
3891 }
3892 
3893 static void
3894 ix_free_tx_ring(struct ix_tx_ring *txr)
3895 {
3896 	int i;
3897 
3898 	for (i = 0; i < txr->tx_ndesc; ++i) {
3899 		struct ix_tx_buf *txbuf = &txr->tx_buf[i];
3900 
3901 		if (txbuf->m_head != NULL) {
3902 			bus_dmamap_unload(txr->tx_tag, txbuf->map);
3903 			m_freem(txbuf->m_head);
3904 			txbuf->m_head = NULL;
3905 		}
3906 	}
3907 }
3908 
3909 static void
3910 ix_free_rx_ring(struct ix_rx_ring *rxr)
3911 {
3912 	int i;
3913 
3914 	for (i = 0; i < rxr->rx_ndesc; ++i) {
3915 		struct ix_rx_buf *rxbuf = &rxr->rx_buf[i];
3916 
3917 		if (rxbuf->fmp != NULL) {
3918 			m_freem(rxbuf->fmp);
3919 			rxbuf->fmp = NULL;
3920 			rxbuf->lmp = NULL;
3921 		} else {
3922 			KKASSERT(rxbuf->lmp == NULL);
3923 		}
3924 		if (rxbuf->m_head != NULL) {
3925 			bus_dmamap_unload(rxr->rx_tag, rxbuf->map);
3926 			m_freem(rxbuf->m_head);
3927 			rxbuf->m_head = NULL;
3928 		}
3929 	}
3930 }
3931 
3932 static int
3933 ix_newbuf(struct ix_rx_ring *rxr, int i, boolean_t wait)
3934 {
3935 	struct mbuf *m;
3936 	bus_dma_segment_t seg;
3937 	bus_dmamap_t map;
3938 	struct ix_rx_buf *rxbuf;
3939 	int flags, error, nseg;
3940 
3941 	flags = M_NOWAIT;
3942 	if (__predict_false(wait))
3943 		flags = M_WAITOK;
3944 
3945 	m = m_getjcl(flags, MT_DATA, M_PKTHDR, rxr->rx_mbuf_sz);
3946 	if (m == NULL) {
3947 		if (wait) {
3948 			if_printf(&rxr->rx_sc->arpcom.ac_if,
3949 			    "Unable to allocate RX mbuf\n");
3950 		}
3951 		return ENOBUFS;
3952 	}
3953 	m->m_len = m->m_pkthdr.len = rxr->rx_mbuf_sz;
3954 
3955 	error = bus_dmamap_load_mbuf_segment(rxr->rx_tag,
3956 	    rxr->rx_sparemap, m, &seg, 1, &nseg, BUS_DMA_NOWAIT);
3957 	if (error) {
3958 		m_freem(m);
3959 		if (wait) {
3960 			if_printf(&rxr->rx_sc->arpcom.ac_if,
3961 			    "Unable to load RX mbuf\n");
3962 		}
3963 		return error;
3964 	}
3965 
3966 	rxbuf = &rxr->rx_buf[i];
3967 	if (rxbuf->m_head != NULL)
3968 		bus_dmamap_unload(rxr->rx_tag, rxbuf->map);
3969 
3970 	map = rxbuf->map;
3971 	rxbuf->map = rxr->rx_sparemap;
3972 	rxr->rx_sparemap = map;
3973 
3974 	rxbuf->m_head = m;
3975 	rxbuf->paddr = seg.ds_addr;
3976 
3977 	ix_setup_rxdesc(&rxr->rx_base[i], rxbuf);
3978 	return 0;
3979 }
3980 
3981 static void
3982 ix_add_sysctl(struct ix_softc *sc)
3983 {
3984 	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(sc->dev);
3985 	struct sysctl_oid *tree = device_get_sysctl_tree(sc->dev);
3986 #ifdef IX_RSS_DEBUG
3987 	char node[32];
3988 	int i;
3989 #endif
3990 
3991 	SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(tree),
3992 	    OID_AUTO, "rxr", CTLFLAG_RD, &sc->rx_ring_cnt, 0, "# of RX rings");
3993 	SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(tree),
3994 	    OID_AUTO, "rxr_inuse", CTLFLAG_RD, &sc->rx_ring_inuse, 0,
3995 	    "# of RX rings used");
3996 	SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(tree),
3997 	    OID_AUTO, "txr", CTLFLAG_RD, &sc->tx_ring_cnt, 0, "# of TX rings");
3998 	SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(tree),
3999 	    OID_AUTO, "txr_inuse", CTLFLAG_RD, &sc->tx_ring_inuse, 0,
4000 	    "# of TX rings used");
4001 	SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree),
4002 	    OID_AUTO, "rxd", CTLTYPE_INT | CTLFLAG_RD,
4003 	    sc, 0, ix_sysctl_rxd, "I",
4004 	    "# of RX descs");
4005 	SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree),
4006 	    OID_AUTO, "txd", CTLTYPE_INT | CTLFLAG_RD,
4007 	    sc, 0, ix_sysctl_txd, "I",
4008 	    "# of TX descs");
4009 	SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree),
4010 	    OID_AUTO, "tx_wreg_nsegs", CTLTYPE_INT | CTLFLAG_RW,
4011 	    sc, 0, ix_sysctl_tx_wreg_nsegs, "I",
4012 	    "# of segments sent before write to hardware register");
4013 	SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree),
4014 	    OID_AUTO, "rx_wreg_nsegs", CTLTYPE_INT | CTLFLAG_RW,
4015 	    sc, 0, ix_sysctl_rx_wreg_nsegs, "I",
4016 	    "# of received segments sent before write to hardware register");
4017 	SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree),
4018 	    OID_AUTO, "tx_intr_nsegs", CTLTYPE_INT | CTLFLAG_RW,
4019 	    sc, 0, ix_sysctl_tx_intr_nsegs, "I",
4020 	    "# of segments per TX interrupt");
4021 
4022 #ifdef IFPOLL_ENABLE
4023 	SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree),
4024 	    OID_AUTO, "npoll_rxoff", CTLTYPE_INT|CTLFLAG_RW,
4025 	    sc, 0, ix_sysctl_npoll_rxoff, "I", "NPOLLING RX cpu offset");
4026 	SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree),
4027 	    OID_AUTO, "npoll_txoff", CTLTYPE_INT|CTLFLAG_RW,
4028 	    sc, 0, ix_sysctl_npoll_txoff, "I", "NPOLLING TX cpu offset");
4029 #endif
4030 
4031 #define IX_ADD_INTR_RATE_SYSCTL(sc, use, name) \
4032 do { \
4033 	ix_add_intr_rate_sysctl(sc, IX_INTR_USE_##use, #name, \
4034 	    ix_sysctl_##name, #use " interrupt rate"); \
4035 } while (0)
4036 
4037 	IX_ADD_INTR_RATE_SYSCTL(sc, RXTX, rxtx_intr_rate);
4038 	IX_ADD_INTR_RATE_SYSCTL(sc, RX, rx_intr_rate);
4039 	IX_ADD_INTR_RATE_SYSCTL(sc, TX, tx_intr_rate);
4040 	IX_ADD_INTR_RATE_SYSCTL(sc, STATUS, sts_intr_rate);
4041 
4042 #undef IX_ADD_INTR_RATE_SYSCTL
4043 
4044 #ifdef IX_RSS_DEBUG
4045 	SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(tree),
4046 	    OID_AUTO, "rss_debug", CTLFLAG_RW, &sc->rss_debug, 0,
4047 	    "RSS debug level");
4048 	for (i = 0; i < sc->rx_ring_cnt; ++i) {
4049 		ksnprintf(node, sizeof(node), "rx%d_pkt", i);
4050 		SYSCTL_ADD_ULONG(ctx,
4051 		    SYSCTL_CHILDREN(tree), OID_AUTO, node,
4052 		    CTLFLAG_RW, &sc->rx_rings[i].rx_pkts, "RXed packets");
4053 	}
4054 #endif
4055 
4056 #ifdef foo
4057 	/*
4058 	 * Allow a kind of speed control by forcing the autoneg
4059 	 * advertised speed list to only a certain value, this
4060 	 * supports 1G on 82599 devices, and 100Mb on X540.
4061 	 */
4062 	SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree),
4063 	    OID_AUTO, "advspeed", CTLTYPE_INT | CTLFLAG_RW,
4064 	    sc, 0, ix_sysctl_advspeed, "I",
4065 	    "advertised link speed, "
4066 	    "0 - auto, 1 - 1Gb, 2 - 100Mb, 3 - 1Gb/10Gb");
4067 #endif
4068 
4069 #if 0
4070 	ix_add_hw_stats(sc);
4071 #endif
4072 
4073 }
4074 
4075 static int
4076 ix_sysctl_tx_wreg_nsegs(SYSCTL_HANDLER_ARGS)
4077 {
4078 	struct ix_softc *sc = (void *)arg1;
4079 	struct ifnet *ifp = &sc->arpcom.ac_if;
4080 	int error, nsegs, i;
4081 
4082 	nsegs = sc->tx_rings[0].tx_wreg_nsegs;
4083 	error = sysctl_handle_int(oidp, &nsegs, 0, req);
4084 	if (error || req->newptr == NULL)
4085 		return error;
4086 	if (nsegs < 0)
4087 		return EINVAL;
4088 
4089 	ifnet_serialize_all(ifp);
4090 	for (i = 0; i < sc->tx_ring_cnt; ++i)
4091 		sc->tx_rings[i].tx_wreg_nsegs = nsegs;
4092 	ifnet_deserialize_all(ifp);
4093 
4094 	return 0;
4095 }
4096 
4097 static int
4098 ix_sysctl_rx_wreg_nsegs(SYSCTL_HANDLER_ARGS)
4099 {
4100 	struct ix_softc *sc = (void *)arg1;
4101 	struct ifnet *ifp = &sc->arpcom.ac_if;
4102 	int error, nsegs, i;
4103 
4104 	nsegs = sc->rx_rings[0].rx_wreg_nsegs;
4105 	error = sysctl_handle_int(oidp, &nsegs, 0, req);
4106 	if (error || req->newptr == NULL)
4107 		return error;
4108 	if (nsegs < 0)
4109 		return EINVAL;
4110 
4111 	ifnet_serialize_all(ifp);
4112 	for (i = 0; i < sc->rx_ring_cnt; ++i)
4113 		sc->rx_rings[i].rx_wreg_nsegs =nsegs;
4114 	ifnet_deserialize_all(ifp);
4115 
4116 	return 0;
4117 }
4118 
4119 static int
4120 ix_sysctl_txd(SYSCTL_HANDLER_ARGS)
4121 {
4122 	struct ix_softc *sc = (void *)arg1;
4123 	int txd;
4124 
4125 	txd = sc->tx_rings[0].tx_ndesc;
4126 	return sysctl_handle_int(oidp, &txd, 0, req);
4127 }
4128 
4129 static int
4130 ix_sysctl_rxd(SYSCTL_HANDLER_ARGS)
4131 {
4132 	struct ix_softc *sc = (void *)arg1;
4133 	int rxd;
4134 
4135 	rxd = sc->rx_rings[0].rx_ndesc;
4136 	return sysctl_handle_int(oidp, &rxd, 0, req);
4137 }
4138 
4139 static int
4140 ix_sysctl_tx_intr_nsegs(SYSCTL_HANDLER_ARGS)
4141 {
4142 	struct ix_softc *sc = (void *)arg1;
4143 	struct ifnet *ifp = &sc->arpcom.ac_if;
4144 	struct ix_tx_ring *txr = &sc->tx_rings[0];
4145 	int error, nsegs;
4146 
4147 	nsegs = txr->tx_intr_nsegs;
4148 	error = sysctl_handle_int(oidp, &nsegs, 0, req);
4149 	if (error || req->newptr == NULL)
4150 		return error;
4151 	if (nsegs < 0)
4152 		return EINVAL;
4153 
4154 	ifnet_serialize_all(ifp);
4155 
4156 	if (nsegs >= txr->tx_ndesc - IX_MAX_SCATTER - IX_TX_RESERVED) {
4157 		error = EINVAL;
4158 	} else {
4159 		int i;
4160 
4161 		error = 0;
4162 		for (i = 0; i < sc->tx_ring_cnt; ++i)
4163 			sc->tx_rings[i].tx_intr_nsegs = nsegs;
4164 	}
4165 
4166 	ifnet_deserialize_all(ifp);
4167 
4168 	return error;
4169 }
4170 
4171 static void
4172 ix_set_eitr(struct ix_softc *sc, int idx, int rate)
4173 {
4174 	uint32_t eitr, eitr_intvl;
4175 
4176 	eitr = IXGBE_READ_REG(&sc->hw, IXGBE_EITR(idx));
4177 	eitr_intvl = 1000000000 / 256 / rate;
4178 
4179 	if (sc->hw.mac.type == ixgbe_mac_82598EB) {
4180 		eitr &= ~IX_EITR_INTVL_MASK_82598;
4181 		if (eitr_intvl == 0)
4182 			eitr_intvl = 1;
4183 		else if (eitr_intvl > IX_EITR_INTVL_MASK_82598)
4184 			eitr_intvl = IX_EITR_INTVL_MASK_82598;
4185 	} else {
4186 		eitr &= ~IX_EITR_INTVL_MASK;
4187 
4188 		eitr_intvl &= ~IX_EITR_INTVL_RSVD_MASK;
4189 		if (eitr_intvl == 0)
4190 			eitr_intvl = IX_EITR_INTVL_MIN;
4191 		else if (eitr_intvl > IX_EITR_INTVL_MAX)
4192 			eitr_intvl = IX_EITR_INTVL_MAX;
4193 	}
4194 	eitr |= eitr_intvl;
4195 
4196 	IXGBE_WRITE_REG(&sc->hw, IXGBE_EITR(idx), eitr);
4197 }
4198 
4199 static int
4200 ix_sysctl_rxtx_intr_rate(SYSCTL_HANDLER_ARGS)
4201 {
4202 	return ix_sysctl_intr_rate(oidp, arg1, arg2, req, IX_INTR_USE_RXTX);
4203 }
4204 
4205 static int
4206 ix_sysctl_rx_intr_rate(SYSCTL_HANDLER_ARGS)
4207 {
4208 	return ix_sysctl_intr_rate(oidp, arg1, arg2, req, IX_INTR_USE_RX);
4209 }
4210 
4211 static int
4212 ix_sysctl_tx_intr_rate(SYSCTL_HANDLER_ARGS)
4213 {
4214 	return ix_sysctl_intr_rate(oidp, arg1, arg2, req, IX_INTR_USE_TX);
4215 }
4216 
4217 static int
4218 ix_sysctl_sts_intr_rate(SYSCTL_HANDLER_ARGS)
4219 {
4220 	return ix_sysctl_intr_rate(oidp, arg1, arg2, req, IX_INTR_USE_STATUS);
4221 }
4222 
4223 static int
4224 ix_sysctl_intr_rate(SYSCTL_HANDLER_ARGS, int use)
4225 {
4226 	struct ix_softc *sc = (void *)arg1;
4227 	struct ifnet *ifp = &sc->arpcom.ac_if;
4228 	int error, rate, i;
4229 
4230 	rate = 0;
4231 	for (i = 0; i < sc->intr_cnt; ++i) {
4232 		if (sc->intr_data[i].intr_use == use) {
4233 			rate = sc->intr_data[i].intr_rate;
4234 			break;
4235 		}
4236 	}
4237 
4238 	error = sysctl_handle_int(oidp, &rate, 0, req);
4239 	if (error || req->newptr == NULL)
4240 		return error;
4241 	if (rate <= 0)
4242 		return EINVAL;
4243 
4244 	ifnet_serialize_all(ifp);
4245 
4246 	for (i = 0; i < sc->intr_cnt; ++i) {
4247 		if (sc->intr_data[i].intr_use == use) {
4248 			sc->intr_data[i].intr_rate = rate;
4249 			if (ifp->if_flags & IFF_RUNNING)
4250 				ix_set_eitr(sc, i, rate);
4251 		}
4252 	}
4253 
4254 	ifnet_deserialize_all(ifp);
4255 
4256 	return error;
4257 }
4258 
4259 static void
4260 ix_add_intr_rate_sysctl(struct ix_softc *sc, int use,
4261     const char *name, int (*handler)(SYSCTL_HANDLER_ARGS), const char *desc)
4262 {
4263 	int i;
4264 
4265 	for (i = 0; i < sc->intr_cnt; ++i) {
4266 		if (sc->intr_data[i].intr_use == use) {
4267 			SYSCTL_ADD_PROC(device_get_sysctl_ctx(sc->dev),
4268 			    SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)),
4269 			    OID_AUTO, name, CTLTYPE_INT | CTLFLAG_RW,
4270 			    sc, 0, handler, "I", desc);
4271 			break;
4272 		}
4273 	}
4274 }
4275 
4276 static void
4277 ix_set_timer_cpuid(struct ix_softc *sc, boolean_t polling)
4278 {
4279 	if (polling || sc->intr_type == PCI_INTR_TYPE_MSIX)
4280 		sc->timer_cpuid = 0; /* XXX fixed */
4281 	else
4282 		sc->timer_cpuid = rman_get_cpuid(sc->intr_data[0].intr_res);
4283 }
4284 
4285 static void
4286 ix_alloc_msix(struct ix_softc *sc)
4287 {
4288 	int msix_enable, msix_cnt, msix_cnt2, alloc_cnt;
4289 	struct ix_intr_data *intr;
4290 	int i, x, error;
4291 	int offset, offset_def, agg_rxtx, ring_max;
4292 	boolean_t aggregate, setup = FALSE;
4293 
4294 	msix_enable = ix_msix_enable;
4295 	/*
4296 	 * Don't enable MSI-X on 82598 by default, see:
4297 	 * 82598 specification update errata #38
4298 	 */
4299 	if (sc->hw.mac.type == ixgbe_mac_82598EB)
4300 		msix_enable = 0;
4301 	msix_enable = device_getenv_int(sc->dev, "msix.enable", msix_enable);
4302 	if (!msix_enable)
4303 		return;
4304 
4305 	msix_cnt = pci_msix_count(sc->dev);
4306 #ifdef IX_MSIX_DEBUG
4307 	msix_cnt = device_getenv_int(sc->dev, "msix.count", msix_cnt);
4308 #endif
4309 	if (msix_cnt <= 1) {
4310 		/* One MSI-X model does not make sense */
4311 		return;
4312 	}
4313 
4314 	i = 0;
4315 	while ((1 << (i + 1)) <= msix_cnt)
4316 		++i;
4317 	msix_cnt2 = 1 << i;
4318 
4319 	if (bootverbose) {
4320 		device_printf(sc->dev, "MSI-X count %d/%d\n",
4321 		    msix_cnt2, msix_cnt);
4322 	}
4323 
4324 	KKASSERT(msix_cnt >= msix_cnt2);
4325 	if (msix_cnt == msix_cnt2) {
4326 		/* We need at least one MSI-X for link status */
4327 		msix_cnt2 >>= 1;
4328 		if (msix_cnt2 <= 1) {
4329 			/* One MSI-X for RX/TX does not make sense */
4330 			device_printf(sc->dev, "not enough MSI-X for TX/RX, "
4331 			    "MSI-X count %d/%d\n", msix_cnt2, msix_cnt);
4332 			return;
4333 		}
4334 		KKASSERT(msix_cnt > msix_cnt2);
4335 
4336 		if (bootverbose) {
4337 			device_printf(sc->dev, "MSI-X count eq fixup %d/%d\n",
4338 			    msix_cnt2, msix_cnt);
4339 		}
4340 	}
4341 
4342 	/*
4343 	 * Make sure that we don't break interrupt related registers
4344 	 * (EIMS, etc) limitation.
4345 	 *
4346 	 * NOTE: msix_cnt > msix_cnt2, when we reach here
4347 	 */
4348 	if (sc->hw.mac.type == ixgbe_mac_82598EB) {
4349 		if (msix_cnt2 > IX_MAX_MSIX_82598)
4350 			msix_cnt2 = IX_MAX_MSIX_82598;
4351 	} else {
4352 		if (msix_cnt2 > IX_MAX_MSIX)
4353 			msix_cnt2 = IX_MAX_MSIX;
4354 	}
4355 	msix_cnt = msix_cnt2 + 1;	/* +1 for status */
4356 
4357 	if (bootverbose) {
4358 		device_printf(sc->dev, "MSI-X count max fixup %d/%d\n",
4359 		    msix_cnt2, msix_cnt);
4360 	}
4361 
4362 	sc->rx_ring_msix = sc->rx_ring_cnt;
4363 	if (sc->rx_ring_msix > msix_cnt2)
4364 		sc->rx_ring_msix = msix_cnt2;
4365 
4366 	sc->tx_ring_msix = sc->tx_ring_cnt;
4367 	if (sc->tx_ring_msix > msix_cnt2)
4368 		sc->tx_ring_msix = msix_cnt2;
4369 
4370 	ring_max = sc->rx_ring_msix;
4371 	if (ring_max < sc->tx_ring_msix)
4372 		ring_max = sc->tx_ring_msix;
4373 
4374 	/* Allow user to force independent RX/TX MSI-X handling */
4375 	agg_rxtx = device_getenv_int(sc->dev, "msix.agg_rxtx",
4376 	    ix_msix_agg_rxtx);
4377 
4378 	if (!agg_rxtx && msix_cnt >= sc->tx_ring_msix + sc->rx_ring_msix + 1) {
4379 		/*
4380 		 * Independent TX/RX MSI-X
4381 		 */
4382 		aggregate = FALSE;
4383 		if (bootverbose)
4384 			device_printf(sc->dev, "independent TX/RX MSI-X\n");
4385 		alloc_cnt = sc->tx_ring_msix + sc->rx_ring_msix;
4386 	} else {
4387 		/*
4388 		 * Aggregate TX/RX MSI-X
4389 		 */
4390 		aggregate = TRUE;
4391 		if (bootverbose)
4392 			device_printf(sc->dev, "aggregate TX/RX MSI-X\n");
4393 		alloc_cnt = msix_cnt2;
4394 		if (alloc_cnt > ring_max)
4395 			alloc_cnt = ring_max;
4396 		KKASSERT(alloc_cnt >= sc->rx_ring_msix &&
4397 		    alloc_cnt >= sc->tx_ring_msix);
4398 	}
4399 	++alloc_cnt;	/* For status */
4400 
4401 	if (bootverbose) {
4402 		device_printf(sc->dev, "MSI-X alloc %d, "
4403 		    "RX ring %d, TX ring %d\n", alloc_cnt,
4404 		    sc->rx_ring_msix, sc->tx_ring_msix);
4405 	}
4406 
4407 	sc->msix_mem_rid = PCIR_BAR(IX_MSIX_BAR_82598);
4408 	sc->msix_mem_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY,
4409 	    &sc->msix_mem_rid, RF_ACTIVE);
4410 	if (sc->msix_mem_res == NULL) {
4411 		sc->msix_mem_rid = PCIR_BAR(IX_MSIX_BAR_82599);
4412 		sc->msix_mem_res = bus_alloc_resource_any(sc->dev,
4413 		    SYS_RES_MEMORY, &sc->msix_mem_rid, RF_ACTIVE);
4414 		if (sc->msix_mem_res == NULL) {
4415 			device_printf(sc->dev, "Unable to map MSI-X table\n");
4416 			return;
4417 		}
4418 	}
4419 
4420 	sc->intr_cnt = alloc_cnt;
4421 	sc->intr_data = kmalloc(sizeof(struct ix_intr_data) * sc->intr_cnt,
4422 	    M_DEVBUF, M_WAITOK | M_ZERO);
4423 	for (x = 0; x < sc->intr_cnt; ++x) {
4424 		intr = &sc->intr_data[x];
4425 		intr->intr_rid = -1;
4426 		intr->intr_rate = IX_INTR_RATE;
4427 	}
4428 
4429 	x = 0;
4430 	if (!aggregate) {
4431 		/*
4432 		 * RX rings
4433 		 */
4434 		if (sc->rx_ring_msix == ncpus2) {
4435 			offset = 0;
4436 		} else {
4437 			offset_def = (sc->rx_ring_msix *
4438 			    device_get_unit(sc->dev)) % ncpus2;
4439 
4440 			offset = device_getenv_int(sc->dev,
4441 			    "msix.rxoff", offset_def);
4442 			if (offset >= ncpus2 ||
4443 			    offset % sc->rx_ring_msix != 0) {
4444 				device_printf(sc->dev,
4445 				    "invalid msix.rxoff %d, use %d\n",
4446 				    offset, offset_def);
4447 				offset = offset_def;
4448 			}
4449 		}
4450 		ix_conf_rx_msix(sc, 0, &x, offset);
4451 
4452 		/*
4453 		 * TX rings
4454 		 */
4455 		if (sc->tx_ring_msix == ncpus2) {
4456 			offset = 0;
4457 		} else {
4458 			offset_def = (sc->tx_ring_msix *
4459 			    device_get_unit(sc->dev)) % ncpus2;
4460 
4461 			offset = device_getenv_int(sc->dev,
4462 			    "msix.txoff", offset_def);
4463 			if (offset >= ncpus2 ||
4464 			    offset % sc->tx_ring_msix != 0) {
4465 				device_printf(sc->dev,
4466 				    "invalid msix.txoff %d, use %d\n",
4467 				    offset, offset_def);
4468 				offset = offset_def;
4469 			}
4470 		}
4471 		ix_conf_tx_msix(sc, 0, &x, offset);
4472 	} else {
4473 		int ring_agg;
4474 
4475 		ring_agg = sc->rx_ring_msix;
4476 		if (ring_agg > sc->tx_ring_msix)
4477 			ring_agg = sc->tx_ring_msix;
4478 
4479 		if (ring_max == ncpus2) {
4480 			offset = 0;
4481 		} else {
4482 			offset_def = (ring_max * device_get_unit(sc->dev)) %
4483 			    ncpus2;
4484 
4485 			offset = device_getenv_int(sc->dev, "msix.off",
4486 			    offset_def);
4487 			if (offset >= ncpus2 || offset % ring_max != 0) {
4488 				device_printf(sc->dev,
4489 				    "invalid msix.off %d, use %d\n",
4490 				    offset, offset_def);
4491 				offset = offset_def;
4492 			}
4493 		}
4494 
4495 		for (i = 0; i < ring_agg; ++i) {
4496 			struct ix_tx_ring *txr = &sc->tx_rings[i];
4497 			struct ix_rx_ring *rxr = &sc->rx_rings[i];
4498 
4499 			KKASSERT(x < sc->intr_cnt);
4500 			rxr->rx_intr_vec = x;
4501 			ix_setup_msix_eims(sc, x,
4502 			    &rxr->rx_eims, &rxr->rx_eims_val);
4503 			rxr->rx_txr = txr;
4504 			/* NOTE: Leave TX ring's intr_vec negative */
4505 
4506 			intr = &sc->intr_data[x++];
4507 
4508 			intr->intr_serialize = &rxr->rx_serialize;
4509 			intr->intr_func = ix_msix_rxtx;
4510 			intr->intr_funcarg = rxr;
4511 			intr->intr_use = IX_INTR_USE_RXTX;
4512 
4513 			intr->intr_cpuid = i + offset;
4514 			KKASSERT(intr->intr_cpuid < ncpus2);
4515 			txr->tx_intr_cpuid = intr->intr_cpuid;
4516 
4517 			ksnprintf(intr->intr_desc0, sizeof(intr->intr_desc0),
4518 			    "%s rxtx%d", device_get_nameunit(sc->dev), i);
4519 			intr->intr_desc = intr->intr_desc0;
4520 		}
4521 
4522 		if (ring_agg != ring_max) {
4523 			if (ring_max == sc->tx_ring_msix)
4524 				ix_conf_tx_msix(sc, i, &x, offset);
4525 			else
4526 				ix_conf_rx_msix(sc, i, &x, offset);
4527 		}
4528 	}
4529 
4530 	/*
4531 	 * Status MSI-X
4532 	 */
4533 	KKASSERT(x < sc->intr_cnt);
4534 	sc->sts_msix_vec = x;
4535 
4536 	intr = &sc->intr_data[x++];
4537 
4538 	intr->intr_serialize = &sc->main_serialize;
4539 	intr->intr_func = ix_msix_status;
4540 	intr->intr_funcarg = sc;
4541 	intr->intr_cpuid = 0;
4542 	intr->intr_use = IX_INTR_USE_STATUS;
4543 
4544 	ksnprintf(intr->intr_desc0, sizeof(intr->intr_desc0), "%s sts",
4545 	    device_get_nameunit(sc->dev));
4546 	intr->intr_desc = intr->intr_desc0;
4547 
4548 	KKASSERT(x == sc->intr_cnt);
4549 
4550 	error = pci_setup_msix(sc->dev);
4551 	if (error) {
4552 		device_printf(sc->dev, "Setup MSI-X failed\n");
4553 		goto back;
4554 	}
4555 	setup = TRUE;
4556 
4557 	for (i = 0; i < sc->intr_cnt; ++i) {
4558 		intr = &sc->intr_data[i];
4559 
4560 		error = pci_alloc_msix_vector(sc->dev, i, &intr->intr_rid,
4561 		    intr->intr_cpuid);
4562 		if (error) {
4563 			device_printf(sc->dev,
4564 			    "Unable to allocate MSI-X %d on cpu%d\n", i,
4565 			    intr->intr_cpuid);
4566 			goto back;
4567 		}
4568 
4569 		intr->intr_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ,
4570 		    &intr->intr_rid, RF_ACTIVE);
4571 		if (intr->intr_res == NULL) {
4572 			device_printf(sc->dev,
4573 			    "Unable to allocate MSI-X %d resource\n", i);
4574 			error = ENOMEM;
4575 			goto back;
4576 		}
4577 	}
4578 
4579 	pci_enable_msix(sc->dev);
4580 	sc->intr_type = PCI_INTR_TYPE_MSIX;
4581 back:
4582 	if (error)
4583 		ix_free_msix(sc, setup);
4584 }
4585 
4586 static void
4587 ix_free_msix(struct ix_softc *sc, boolean_t setup)
4588 {
4589 	int i;
4590 
4591 	KKASSERT(sc->intr_cnt > 1);
4592 
4593 	for (i = 0; i < sc->intr_cnt; ++i) {
4594 		struct ix_intr_data *intr = &sc->intr_data[i];
4595 
4596 		if (intr->intr_res != NULL) {
4597 			bus_release_resource(sc->dev, SYS_RES_IRQ,
4598 			    intr->intr_rid, intr->intr_res);
4599 		}
4600 		if (intr->intr_rid >= 0)
4601 			pci_release_msix_vector(sc->dev, intr->intr_rid);
4602 	}
4603 	if (setup)
4604 		pci_teardown_msix(sc->dev);
4605 
4606 	sc->intr_cnt = 0;
4607 	kfree(sc->intr_data, M_DEVBUF);
4608 	sc->intr_data = NULL;
4609 }
4610 
4611 static void
4612 ix_conf_rx_msix(struct ix_softc *sc, int i, int *x0, int offset)
4613 {
4614 	int x = *x0;
4615 
4616 	for (; i < sc->rx_ring_msix; ++i) {
4617 		struct ix_rx_ring *rxr = &sc->rx_rings[i];
4618 		struct ix_intr_data *intr;
4619 
4620 		KKASSERT(x < sc->intr_cnt);
4621 		rxr->rx_intr_vec = x;
4622 		ix_setup_msix_eims(sc, x, &rxr->rx_eims, &rxr->rx_eims_val);
4623 
4624 		intr = &sc->intr_data[x++];
4625 
4626 		intr->intr_serialize = &rxr->rx_serialize;
4627 		intr->intr_func = ix_msix_rx;
4628 		intr->intr_funcarg = rxr;
4629 		intr->intr_rate = IX_MSIX_RX_RATE;
4630 		intr->intr_use = IX_INTR_USE_RX;
4631 
4632 		intr->intr_cpuid = i + offset;
4633 		KKASSERT(intr->intr_cpuid < ncpus2);
4634 
4635 		ksnprintf(intr->intr_desc0, sizeof(intr->intr_desc0), "%s rx%d",
4636 		    device_get_nameunit(sc->dev), i);
4637 		intr->intr_desc = intr->intr_desc0;
4638 	}
4639 	*x0 = x;
4640 }
4641 
4642 static void
4643 ix_conf_tx_msix(struct ix_softc *sc, int i, int *x0, int offset)
4644 {
4645 	int x = *x0;
4646 
4647 	for (; i < sc->tx_ring_msix; ++i) {
4648 		struct ix_tx_ring *txr = &sc->tx_rings[i];
4649 		struct ix_intr_data *intr;
4650 
4651 		KKASSERT(x < sc->intr_cnt);
4652 		txr->tx_intr_vec = x;
4653 		ix_setup_msix_eims(sc, x, &txr->tx_eims, &txr->tx_eims_val);
4654 
4655 		intr = &sc->intr_data[x++];
4656 
4657 		intr->intr_serialize = &txr->tx_serialize;
4658 		intr->intr_func = ix_msix_tx;
4659 		intr->intr_funcarg = txr;
4660 		intr->intr_rate = IX_MSIX_TX_RATE;
4661 		intr->intr_use = IX_INTR_USE_TX;
4662 
4663 		intr->intr_cpuid = i + offset;
4664 		KKASSERT(intr->intr_cpuid < ncpus2);
4665 		txr->tx_intr_cpuid = intr->intr_cpuid;
4666 
4667 		ksnprintf(intr->intr_desc0, sizeof(intr->intr_desc0), "%s tx%d",
4668 		    device_get_nameunit(sc->dev), i);
4669 		intr->intr_desc = intr->intr_desc0;
4670 	}
4671 	*x0 = x;
4672 }
4673 
4674 static void
4675 ix_msix_rx(void *xrxr)
4676 {
4677 	struct ix_rx_ring *rxr = xrxr;
4678 
4679 	ASSERT_SERIALIZED(&rxr->rx_serialize);
4680 
4681 	ix_rxeof(rxr, -1);
4682 	IXGBE_WRITE_REG(&rxr->rx_sc->hw, rxr->rx_eims, rxr->rx_eims_val);
4683 }
4684 
4685 static void
4686 ix_msix_tx(void *xtxr)
4687 {
4688 	struct ix_tx_ring *txr = xtxr;
4689 
4690 	ASSERT_SERIALIZED(&txr->tx_serialize);
4691 
4692 	ix_txeof(txr, *(txr->tx_hdr));
4693 	if (!ifsq_is_empty(txr->tx_ifsq))
4694 		ifsq_devstart(txr->tx_ifsq);
4695 	IXGBE_WRITE_REG(&txr->tx_sc->hw, txr->tx_eims, txr->tx_eims_val);
4696 }
4697 
4698 static void
4699 ix_msix_rxtx(void *xrxr)
4700 {
4701 	struct ix_rx_ring *rxr = xrxr;
4702 	struct ix_tx_ring *txr;
4703 	int hdr;
4704 
4705 	ASSERT_SERIALIZED(&rxr->rx_serialize);
4706 
4707 	ix_rxeof(rxr, -1);
4708 
4709 	/*
4710 	 * NOTE:
4711 	 * Since tx_next_clean is only changed by ix_txeof(),
4712 	 * which is called only in interrupt handler, the
4713 	 * check w/o holding tx serializer is MPSAFE.
4714 	 */
4715 	txr = rxr->rx_txr;
4716 	hdr = *(txr->tx_hdr);
4717 	if (hdr != txr->tx_next_clean) {
4718 		lwkt_serialize_enter(&txr->tx_serialize);
4719 		ix_txeof(txr, hdr);
4720 		if (!ifsq_is_empty(txr->tx_ifsq))
4721 			ifsq_devstart(txr->tx_ifsq);
4722 		lwkt_serialize_exit(&txr->tx_serialize);
4723 	}
4724 
4725 	IXGBE_WRITE_REG(&rxr->rx_sc->hw, rxr->rx_eims, rxr->rx_eims_val);
4726 }
4727 
4728 static void
4729 ix_intr_status(struct ix_softc *sc, uint32_t eicr)
4730 {
4731 	struct ixgbe_hw *hw = &sc->hw;
4732 
4733 	/* Link status change */
4734 	if (eicr & IXGBE_EICR_LSC)
4735 		ix_handle_link(sc);
4736 
4737 	if (hw->mac.type != ixgbe_mac_82598EB) {
4738 		if (eicr & IXGBE_EICR_ECC)
4739 			if_printf(&sc->arpcom.ac_if, "ECC ERROR!!  Reboot!!\n");
4740 		else if (eicr & IXGBE_EICR_GPI_SDP1)
4741 			ix_handle_msf(sc);
4742 		else if (eicr & IXGBE_EICR_GPI_SDP2)
4743 			ix_handle_mod(sc);
4744 	}
4745 
4746 	/* Check for fan failure */
4747 	if (hw->device_id == IXGBE_DEV_ID_82598AT &&
4748 	    (eicr & IXGBE_EICR_GPI_SDP1))
4749 		if_printf(&sc->arpcom.ac_if, "FAN FAILURE!!  Replace!!\n");
4750 
4751 	/* Check for over temp condition */
4752 	if (hw->mac.type == ixgbe_mac_X540 && (eicr & IXGBE_EICR_TS)) {
4753 		if_printf(&sc->arpcom.ac_if, "OVER TEMP!!  "
4754 		    "PHY IS SHUT DOWN!!  Reboot\n");
4755 	}
4756 }
4757 
4758 static void
4759 ix_msix_status(void *xsc)
4760 {
4761 	struct ix_softc *sc = xsc;
4762 	uint32_t eicr;
4763 
4764 	ASSERT_SERIALIZED(&sc->main_serialize);
4765 
4766 	eicr = IXGBE_READ_REG(&sc->hw, IXGBE_EICR);
4767 	ix_intr_status(sc, eicr);
4768 
4769 	IXGBE_WRITE_REG(&sc->hw, IXGBE_EIMS, sc->intr_mask);
4770 }
4771 
4772 static void
4773 ix_setup_msix_eims(const struct ix_softc *sc, int x,
4774     uint32_t *eims, uint32_t *eims_val)
4775 {
4776 	if (x < 32) {
4777 		if (sc->hw.mac.type == ixgbe_mac_82598EB) {
4778 			KASSERT(x < IX_MAX_MSIX_82598,
4779 			    ("%s: invalid vector %d for 82598",
4780 			     device_get_nameunit(sc->dev), x));
4781 			*eims = IXGBE_EIMS;
4782 		} else {
4783 			*eims = IXGBE_EIMS_EX(0);
4784 		}
4785 		*eims_val = 1 << x;
4786 	} else {
4787 		KASSERT(x < IX_MAX_MSIX, ("%s: invalid vector %d",
4788 		    device_get_nameunit(sc->dev), x));
4789 		KASSERT(sc->hw.mac.type != ixgbe_mac_82598EB,
4790 		    ("%s: invalid vector %d for 82598",
4791 		     device_get_nameunit(sc->dev), x));
4792 		*eims = IXGBE_EIMS_EX(1);
4793 		*eims_val = 1 << (x - 32);
4794 	}
4795 }
4796 
4797 #ifdef IFPOLL_ENABLE
4798 
4799 static void
4800 ix_npoll_status(struct ifnet *ifp)
4801 {
4802 	struct ix_softc *sc = ifp->if_softc;
4803 	uint32_t eicr;
4804 
4805 	ASSERT_SERIALIZED(&sc->main_serialize);
4806 
4807 	eicr = IXGBE_READ_REG(&sc->hw, IXGBE_EICR);
4808 	ix_intr_status(sc, eicr);
4809 }
4810 
4811 static void
4812 ix_npoll_tx(struct ifnet *ifp, void *arg, int cycle __unused)
4813 {
4814 	struct ix_tx_ring *txr = arg;
4815 
4816 	ASSERT_SERIALIZED(&txr->tx_serialize);
4817 
4818 	ix_txeof(txr, *(txr->tx_hdr));
4819 	if (!ifsq_is_empty(txr->tx_ifsq))
4820 		ifsq_devstart(txr->tx_ifsq);
4821 }
4822 
4823 static void
4824 ix_npoll_rx(struct ifnet *ifp __unused, void *arg, int cycle)
4825 {
4826 	struct ix_rx_ring *rxr = arg;
4827 
4828 	ASSERT_SERIALIZED(&rxr->rx_serialize);
4829 
4830 	ix_rxeof(rxr, cycle);
4831 }
4832 
4833 static void
4834 ix_npoll(struct ifnet *ifp, struct ifpoll_info *info)
4835 {
4836 	struct ix_softc *sc = ifp->if_softc;
4837 	int i, txr_cnt, rxr_cnt;
4838 
4839 	ASSERT_IFNET_SERIALIZED_ALL(ifp);
4840 
4841 	if (info) {
4842 		int off;
4843 
4844 		info->ifpi_status.status_func = ix_npoll_status;
4845 		info->ifpi_status.serializer = &sc->main_serialize;
4846 
4847 		txr_cnt = ix_get_txring_inuse(sc, TRUE);
4848 		off = sc->tx_npoll_off;
4849 		for (i = 0; i < txr_cnt; ++i) {
4850 			struct ix_tx_ring *txr = &sc->tx_rings[i];
4851 			int idx = i + off;
4852 
4853 			KKASSERT(idx < ncpus2);
4854 			info->ifpi_tx[idx].poll_func = ix_npoll_tx;
4855 			info->ifpi_tx[idx].arg = txr;
4856 			info->ifpi_tx[idx].serializer = &txr->tx_serialize;
4857 			ifsq_set_cpuid(txr->tx_ifsq, idx);
4858 		}
4859 
4860 		rxr_cnt = ix_get_rxring_inuse(sc, TRUE);
4861 		off = sc->rx_npoll_off;
4862 		for (i = 0; i < rxr_cnt; ++i) {
4863 			struct ix_rx_ring *rxr = &sc->rx_rings[i];
4864 			int idx = i + off;
4865 
4866 			KKASSERT(idx < ncpus2);
4867 			info->ifpi_rx[idx].poll_func = ix_npoll_rx;
4868 			info->ifpi_rx[idx].arg = rxr;
4869 			info->ifpi_rx[idx].serializer = &rxr->rx_serialize;
4870 		}
4871 
4872 		if (ifp->if_flags & IFF_RUNNING) {
4873 			if (rxr_cnt == sc->rx_ring_inuse &&
4874 			    txr_cnt == sc->tx_ring_inuse) {
4875 				ix_set_timer_cpuid(sc, TRUE);
4876 				ix_disable_intr(sc);
4877 			} else {
4878 				ix_init(sc);
4879 			}
4880 		}
4881 	} else {
4882 		for (i = 0; i < sc->tx_ring_cnt; ++i) {
4883 			struct ix_tx_ring *txr = &sc->tx_rings[i];
4884 
4885 			ifsq_set_cpuid(txr->tx_ifsq, txr->tx_intr_cpuid);
4886 		}
4887 
4888 		if (ifp->if_flags & IFF_RUNNING) {
4889 			txr_cnt = ix_get_txring_inuse(sc, FALSE);
4890 			rxr_cnt = ix_get_rxring_inuse(sc, FALSE);
4891 
4892 			if (rxr_cnt == sc->rx_ring_inuse &&
4893 			    txr_cnt == sc->tx_ring_inuse) {
4894 				ix_set_timer_cpuid(sc, FALSE);
4895 				ix_enable_intr(sc);
4896 			} else {
4897 				ix_init(sc);
4898 			}
4899 		}
4900 	}
4901 }
4902 
4903 static int
4904 ix_sysctl_npoll_rxoff(SYSCTL_HANDLER_ARGS)
4905 {
4906 	struct ix_softc *sc = (void *)arg1;
4907 	struct ifnet *ifp = &sc->arpcom.ac_if;
4908 	int error, off;
4909 
4910 	off = sc->rx_npoll_off;
4911 	error = sysctl_handle_int(oidp, &off, 0, req);
4912 	if (error || req->newptr == NULL)
4913 		return error;
4914 	if (off < 0)
4915 		return EINVAL;
4916 
4917 	ifnet_serialize_all(ifp);
4918 	if (off >= ncpus2 || off % sc->rx_ring_cnt != 0) {
4919 		error = EINVAL;
4920 	} else {
4921 		error = 0;
4922 		sc->rx_npoll_off = off;
4923 	}
4924 	ifnet_deserialize_all(ifp);
4925 
4926 	return error;
4927 }
4928 
4929 static int
4930 ix_sysctl_npoll_txoff(SYSCTL_HANDLER_ARGS)
4931 {
4932 	struct ix_softc *sc = (void *)arg1;
4933 	struct ifnet *ifp = &sc->arpcom.ac_if;
4934 	int error, off;
4935 
4936 	off = sc->tx_npoll_off;
4937 	error = sysctl_handle_int(oidp, &off, 0, req);
4938 	if (error || req->newptr == NULL)
4939 		return error;
4940 	if (off < 0)
4941 		return EINVAL;
4942 
4943 	ifnet_serialize_all(ifp);
4944 	if (off >= ncpus2 || off % sc->tx_ring_cnt != 0) {
4945 		error = EINVAL;
4946 	} else {
4947 		error = 0;
4948 		sc->tx_npoll_off = off;
4949 	}
4950 	ifnet_deserialize_all(ifp);
4951 
4952 	return error;
4953 }
4954 
4955 #endif /* IFPOLL_ENABLE */
4956 
4957 static enum ixgbe_fc_mode
4958 ix_ifmedia2fc(int ifm)
4959 {
4960 	int fc_opt = ifm & (IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE);
4961 
4962 	switch (fc_opt) {
4963 	case (IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE):
4964 		return ixgbe_fc_full;
4965 
4966 	case IFM_ETH_RXPAUSE:
4967 		return ixgbe_fc_rx_pause;
4968 
4969 	case IFM_ETH_TXPAUSE:
4970 		return ixgbe_fc_tx_pause;
4971 
4972 	default:
4973 		return ixgbe_fc_none;
4974 	}
4975 }
4976 
4977 static const char *
4978 ix_ifmedia2str(int ifm)
4979 {
4980 	int fc_opt = ifm & (IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE);
4981 
4982 	switch (fc_opt) {
4983 	case (IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE):
4984 		return IFM_ETH_FC_FULL;
4985 
4986 	case IFM_ETH_RXPAUSE:
4987 		return IFM_ETH_FC_RXPAUSE;
4988 
4989 	case IFM_ETH_TXPAUSE:
4990 		return IFM_ETH_FC_TXPAUSE;
4991 
4992 	default:
4993 		return IFM_ETH_FC_NONE;
4994 	}
4995 }
4996 
4997 static const char *
4998 ix_fc2str(enum ixgbe_fc_mode fc)
4999 {
5000 	switch (fc) {
5001 	case ixgbe_fc_full:
5002 		return IFM_ETH_FC_FULL;
5003 
5004 	case ixgbe_fc_rx_pause:
5005 		return IFM_ETH_FC_RXPAUSE;
5006 
5007 	case ixgbe_fc_tx_pause:
5008 		return IFM_ETH_FC_TXPAUSE;
5009 
5010 	default:
5011 		return IFM_ETH_FC_NONE;
5012 	}
5013 }
5014